diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..d47b75c --- /dev/null +++ b/.dockerignore @@ -0,0 +1,36 @@ +.git +.gitignore +__pycache__/ +*.pyc +*.pyo +*.pyd +.Python +env/ +venv/ +.venv/ +build/ +dist/ +*.egg-info/ +.cache +.mypy_cache +.pytest_cache +.DS_Store +.vscode +.idea +node_modules/ +terminals/ + +# OpenSpiel dependencies (will be cloned fresh in Docker) +scenarios/bargaining/open_spiel/open_spiel/abseil-cpp/ +scenarios/bargaining/open_spiel/pybind11/ +scenarios/bargaining/open_spiel/open_spiel/pybind11_abseil/ +scenarios/bargaining/open_spiel/open_spiel/games/bridge/double_dummy_solver/ +scenarios/bargaining/open_spiel/*.so +scenarios/bargaining/open_spiel/build/ + +# Large files not needed for build +bargaining_runs/ +meta_game_analysis/ +*.pkl +*.npy + diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..285bcd1 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,67 @@ +name: Build and Publish Docker Image + +on: + push: + branches: [main] + tags: ['v*'] + pull_request: + branches: [main] + workflow_dispatch: # Allow manual triggering + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + lfs: true + + - name: Debug - List OpenSpiel files + run: | + echo "=== Checking open_spiel directory ===" + ls -la scenarios/bargaining/open_spiel/ || echo "Directory not found" + echo "=== Checking for setup.py ===" + cat scenarios/bargaining/open_spiel/setup.py | head -5 || echo "setup.py not found" + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Container Registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=raw,value=latest,enable={{is_default_branch}} + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.gitignore b/.gitignore index 9b5fdcd..00791be 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,12 @@ +OPEN_AI_API_KEY.txt .env .DS_Store .python-version .venv/ __pycache__/ -*.pyc \ No newline at end of file +*.pyc + +# Runtime data directories +bargaining_llm_traces/ +bargaining_runs/ +meta_game_analysis/ \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..96634e8 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "python-envs.defaultEnvManager": "ms-python.python:pyenv", + "python-envs.defaultPackageManager": "ms-python.python:pip", + "python-envs.pythonProjects": [] +} \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..dd43d97 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,52 @@ +FROM python:3.11-slim + +# Install system dependencies for OpenSpiel build and MILP solvers +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential \ + g++ \ + clang \ + cmake \ + git \ + curl \ + python3-dev \ + libffi-dev \ + libglpk-dev \ + glpk-utils && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Install Python deps first +COPY pyproject.toml* uv.lock* requirements.txt* ./ +RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \ + pip install --no-cache-dir -r requirements.txt + +# Copy source +COPY . . + +# Download OpenSpiel dependencies to correct locations and build +WORKDIR /app/scenarios/bargaining/open_spiel +RUN echo "=== Cloning dependencies ===" && \ + git clone --depth 1 https://github.com/abseil/abseil-cpp.git open_spiel/abseil-cpp && \ + git clone --depth 1 https://github.com/pybind/pybind11.git pybind11 && \ + git clone --depth 1 https://github.com/pybind/pybind11_abseil.git open_spiel/pybind11_abseil && \ + git clone -b develop --depth 1 https://github.com/jblespiau/dds.git open_spiel/games/bridge/double_dummy_solver && \ + echo "=== Building OpenSpiel ===" && \ + export CXX=g++ && \ + python setup.py build_ext --inplace 2>&1 && \ + python setup.py install + +WORKDIR /app + +# Ensure local src is importable +ENV PYTHONPATH=/app/src:/app/scenarios/bargaining/open_spiel + +# Cloud Run sets PORT; default to 8080 locally +ENV PORT=8080 + +EXPOSE 8080 + +# Use ENTRYPOINT so compose commands append to it +ENTRYPOINT ["python", "-m", "scenarios.bargaining.bargaining_green", "serve"] +CMD ["--host", "0.0.0.0", "--port", "8080"] diff --git a/Procfile b/Procfile new file mode 100644 index 0000000..9f773bc --- /dev/null +++ b/Procfile @@ -0,0 +1 @@ +web: python -m scenarios.bargaining.controller diff --git a/README.md b/README.md index bee2a7a..0fb3014 100644 --- a/README.md +++ b/README.md @@ -1,239 +1,425 @@ -## Quickstart -1. Clone (or fork) the repo: -``` -git clone git@github.com:agentbeats/tutorial -cd agentbeats-tutorial -``` -2. Install dependencies -``` +# Meta-Game Bargaining Evaluator + +**AgentBeats Competition Submission: Green Agent for Multi-Agent Negotiation Assessment** + +This repository contains a **green agent** that implements the **Empirical Meta-Game Analysis** framework from Smithline, Mascioli, Chakraborty & Wellman (2025) for evaluating negotiation agents. The agent computes **Maximum Entropy Nash Equilibrium (MENE)** to rigorously assess purple agent strategies within their strategic ecosystem. + +## Requirements + +- **Python 3.11** (required - the OpenSpiel binary is compiled for Python 3.11) +- [uv](https://docs.astral.sh/uv/) package manager (recommended) + +## Quick Start + +### Option A: Run Locally + +```bash +# Clone and setup +git clone https://github.com/gsmithline/tutorial-agent-beats-comp.git +cd tutorial-agent-beats-comp + +# Install dependencies (uses Python 3.11 via .python-version) uv sync -``` -3. Set environment variables -``` + +# Set environment variables cp sample.env .env +# Add your API key to .env + +# Run a local assessment +PYTHONPATH=scenarios/bargaining/open_spiel:$PYTHONPATH \ + uv run python -m scenarios.bargaining.bargaining_green once \ + --config '{"challenger_url": "https://your-purple-agent.com", "games": 10}' +``` + +> **Note**: The `PYTHONPATH` must include `scenarios/bargaining/open_spiel` to load the pre-compiled OpenSpiel module. + +### Option B: Deploy to Cloud Run + +```bash +# Deploy using the pre-built Docker image +gcloud run deploy bargaining-green-agent \ + --image ghcr.io/gsmithline/tutorial-agent-beats-comp:latest \ + --region=us-central1 \ + --allow-unauthenticated \ + --memory=4Gi + +# Or build from source +gcloud run deploy bargaining-green-agent \ + --source . \ + --region=us-central1 \ + --allow-unauthenticated ``` -Add your Google API key to the .env file -4. Run the [debate example](#example) +### Option C: Register on AgentBeats Platform + +1. Deploy your green agent (Option B above) +2. Navigate to [agentbeats.dev](https://agentbeats.dev) +3. Register your agent with the Cloud Run URL +4. Run assessments against purple agents via the platform + +--- + +## The Meta-Game Framework + +This green agent implements the **Empirical Meta-Game Analysis** methodology introduced by Li & Wellman (2024) and applied to LLM bargaining evaluation in Smithline et al. (2025). + +### Why Meta-Game Analysis? + +Traditional benchmarks evaluate agents in isolation against fixed opponents. But in strategic environments, an agent's performance inherently depends on the behavior of other agents. Meta-game analysis addresses this by: + +1. **Constructing an empirical game** over the space of agent strategies +2. **Computing Nash equilibria** to identify stable population mixtures +3. **Evaluating agents at equilibrium** to measure how well-adapted they are to strategic competition + +### Framework Overview + ``` -uv run agentbeats-run scenarios/debate/scenario.toml +┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐ +│ Purple Agent │ │ Green Agent │ │ Baseline Pool │ +│ (Challenger) │───▶│ (Evaluator) │◀───│ soft, tough, │ +└─────────────────┘ │ │ │ aspire, walk, │ + │ 1. Build Roster │ │ nfsp, rnad │ + │ 2. Simulate N² │ └─────────────────┘ + │ Matchups │ + │ 3. MENE Solve │ + │ 4. Compute │ + │ Metrics │ + └────────┬─────────┘ + │ + ▼ + ┌─────────────────┐ + │ Evaluation │ + │ Results │ + │ - MENE Regret │ + │ - Welfare % │ + │ - Fairness % │ + └─────────────────┘ ``` -This command will: -- Start the agent servers using the commands specified in scenario.toml -- Construct an `assessment_request` message containing the participant's role-endpoint mapping and the assessment config -- Send the `assessment_request` to the green agent and print streamed responses -**Note:** Use `--show-logs` to see agent outputs during the assessment, and `--serve-only` to start agents without running the assessment. +### Evaluation Process -To run this example manually, start the agent servers in separate terminals, and then in another terminal run the A2A client on the scenario.toml file to initiate the assessment. +**Step 1: Agent Roster Construction** +- Your purple agent joins a pool of baseline strategies +- Heuristic agents: `soft` (accepts any offer), `tough` (minimal offers), `aspire` (concession schedule), `walk` (takes BATNA) +- RL-derived policies: `nfsp` (Neural Fictitious Self-Play), `rnad` (Regularized Nash Dynamics) -After running, you should see an output similar to this. +**Step 2: Pairwise Simulation** +- For each ordered pair (i, j), simulate N games with agent i as row player and j as column player +- Uses OpenSpiel's negotiation game with: + - T=3 item types with quantities (7, 4, 1) + - Private valuations drawn uniformly from [1, 100] + - Private BATNAs (outside options) + - Discount factor γ ∈ {0.9, 0.98} per round + - Maximum R ∈ {3, 5} rounds -![Sample output](assets/sample_output.png) +**Game Configurations (from paper)** + +| Config | Discount (γ) | Rounds (R) | Description | +|--------|--------------|------------|-------------| +| BG4 | 0.9 | 3 | High time pressure, short horizon | +| BG5 | 0.98 | 3 | Low time pressure, short horizon | +| BG6 | 0.98 | 5 | Low time pressure, long horizon | + +Pre-trained NFSP and RNAD checkpoints are provided for all three configurations. + +**Step 3: Payoff Matrix & MENE** +- Construct symmetric payoff matrix where M[i][j] = agent i's average payoff when playing against agent j +- Solve for **Maximum Entropy Nash Equilibrium** using MILP (CVXPY) +- Bootstrap resampling (default 100 iterations) for statistical robustness + +**Step 4: Metrics Computation** +- Compute regret and welfare metrics weighted by the MENE mixture + +--- + +## Evaluation Metrics + +### MENE Regret (Primary Metric) + +The regret of a pure strategy π at Nash equilibrium σ* measures the deviation incentive: -## Project Structure ``` -src/ -└─ agentbeats/ - ├─ green_executor.py # base A2A green agent executor - ├─ models.py # pydantic models for green agent IO - ├─ client.py # A2A messaging helpers - ├─ client_cli.py # CLI client to start assessment - └─ run_scenario.py # run agents and start assessment +Regret(π) = max(0, u(π, σ*) - u(σ*)) +``` + +Where: +- u(π, σ*) = expected payoff for pure strategy π against the equilibrium mixture +- u(σ*) = expected payoff at equilibrium (playing the mixture) + +**Interpretation**: Lower regret means the agent is better adapted to the equilibrium. An agent with zero regret has no incentive to deviate—it is either in the equilibrium support or weakly dominated. Positive regret indicates the strategy outperforms the equilibrium mixture (which should be near-zero for a correctly computed MENE). + +### Welfare Metrics -scenarios/ -└─ debate/ # implementation of the debate example - ├─ debate_judge.py # green agent impl using the official A2A SDK - ├─ adk_debate_judge.py # alternative green agent impl using Google ADK - ├─ debate_judge_common.py # models and utils shared by above impls - ├─ debater.py # debater agent (Google ADK) - └─ scenario.toml # config for the debate example +| Metric | Formula | Description | +|--------|---------|-------------| +| **UW (Utilitarian Welfare)** | u₁ + u₂ | Total value created by both players | +| **NW (Nash Welfare)** | √(u₁ × u₂) | Geometric mean - balances efficiency and equity | +| **NW+ (Nash Welfare Advantage)** | √(max(0, u₁-b₁) × max(0, u₂-b₂)) | Surplus over BATNAs | +| **EF1 (Envy-Free up to 1 item)** | Boolean per game | Fairness: envy eliminable by removing one item | + +All welfare metrics are normalized against calibration constants for cross-comparison. +--- + +## Assessment Configuration + +### Assessment Request Format + +Per the A2A protocol, send an assessment request to the green agent: + +```json +{ + "participants": { + "challenger": "https://your-purple-agent.example.com" + }, + "config": { + "games": 50, + "max_rounds": 5, + "discount": 0.98, + "bootstrap": 100, + "challenger_circle": 5 + } +} ``` -# Agentbeats Tutorial -Welcome to the Agentbeats Tutorial! 🤖🎵 +### Configuration Options -Agentbeats is an open platform for **standardized and reproducible agent evaluations** and research. +| Parameter | Default | Description | +|-----------|---------|-------------| +| `games` | 50 | Number of games per agent pair | +| `max_rounds` | 5 | Maximum negotiation rounds (R) | +| `discount` | 0.98 | Per-round discount factor (γ) | +| `bootstrap` | 100 | Bootstrap iterations for MENE | +| `challenger_circle` | 0 | Prompt sophistication level (0-6) | +| `challenger_label` | "challenger" | Label for your agent in results | +| `remote_agents` | {} | Additional remote agents `{"label": "url"}` | -This tutorial is designed to help you get started, whether you are: -- 🔬 **Researcher** → running controlled experiments and publishing reproducible results -- 🛠️ **Builder** → developing new agents and testing them against benchmarks -- 📊 **Evaluator** → designing benchmarks, scenarios, or games to measure agent performance -- ✨ **Enthusiast** → exploring agent behavior, running experiments, and learning by tinkering +### Prompt Circles (LLM Agents) -By the end, you’ll understand: -- The core concepts behind Agentbeats - green agents, purple agents, and A2A assessments -- How to run existing evaluations on the platform via the web UI -- How to build and test your own agents locally -- Share your agents and evaluation results with the community +The green agent provides structured prompts to LLM-based purple agents via "circles" - a hierarchical prompting framework: -This guide will help you quickly get started with Agentbeats and contribute to a growing ecosystem of open agent benchmarks. +| Circle | Content | +|--------|---------| +| 0 | Bare rules: items, valuations, BATNA, actions | +| 1 | + Objective specification (maximize outcome) | +| 2 | + Worked numeric example of offer evaluation | +| 3 | + Step-by-step routine: assess, compare, decide | +| 4 | + Five common negotiation mistakes to avoid | +| 5 | + Quick numeric checks against those mistakes | +| 6 | + Strategic inference from opponent's offers | +Set `challenger_circle` to inject these prompts into observations sent to your agent. -## Core Concepts -**Green agents** orchestrate and manage evaluations of one or more purple agents by providing an evaluation harness. -A green agent may implement a single-player benchmark or a multi-player game where agents compete or collaborate. It sets the rules of the game, hosts the match and decides results. +--- -**Purple agents** are the participants being evaluated. They possess certain skills (e.g. computer use) that green agents evaluate. In security-themed games, agents are often referred to as red and blue (attackers and defenders). +## Building a Purple Agent -An **assessment** is a single evaluation session hosted by a green agent and involving one or more purple agents. Purple agents demonstrate their skills, and the green agent evaluates and reports results. +Your purple agent must: -All agents communicate via the **A2A protocol**, ensuring compatibility with the open standard for agent interoperability. Learn more about A2A [here](https://a2a-protocol.org/latest/). +1. **Implement A2A protocol** - Expose an A2A server endpoint +2. **Handle negotiation messages** - Receive observations with valuations, BATNAs, and offers +3. **Return valid actions** - Propose offers or accept/walk -## Run an Assessment -Follow these steps to run assessments using agents that are already available on the platform. +### Expected Message Format -1. Navigate to agentbeats.org -2. Create an account (or log in) -3. Select the green and purple agents to participate in an assessment -4. Start the assessment -5. Observe results +The green agent sends observations like: -## Agent Development -In this section, you will learn how to: -- Develop purple agents (participants) and green agents (evaluators) -- Use common patterns and best practices for building agents -- Run assessments locally during development -- Evaluate your agents on the Agentbeats platform +```json +{ + "role": "row", + "round": 2, + "valuations": [45, 72, 33], + "batna": 85, + "quantities": [7, 4, 1], + "last_offer": [3, 2, 0], + "history": [...] +} +``` -### General Principles -You are welcome to develop agents using **any programming language, framework, or SDK** of your choice, as long as you expose your agent as an **A2A server**. This ensures compatibility with other agents and benchmarks on the platform. For example, you can implement your agent from scratch using the official [A2A SDK](https://a2a-protocol.org/latest/sdk/), or use a downstream SDK such as [Google ADK](https://google.github.io/adk-docs/). +Your agent responds with an action: -At the beginning of an assessment, the green agent receives an `assessment_request` signal. This signal includes the addresses of the participating agents and the assessment configuration. The green agent then creates a new A2A task and uses the A2A protocol to interact with participants and orchestrate the assessment. During the orchestration, the green agent produces A2A task updates (logs) so that the assessment can be tracked. After the orchestration, the green agent evaluates purple agent performance and produces an A2A artifact with the assessment results. +```json +{"action": "COUNTEROFFER", "offer": [4, 2, 1]} +``` +Or: -#### Assessment Patterns -Below are some common patterns to help guide your assessment design. +```json +{"action": "ACCEPT"} +``` -- **Artifact submission**: The purple agent produces artifacts (e.g. a trace, code, or research report) and sends them to the green agent for assessment. -- **Traced environment**: The green agent provides a traced environment (e.g. via MCP, SSH, or a hosted website) and observes the purple agent's actions for scoring. -- **Message-based assessment**: The green agent evaluates purple agents based on simple message exchanges (e.g. question answering, dialogue, or reasoning tasks). -- **Multi-agent games**: The green agent orchestrates interactions between multiple purple agents, such as security games, negotiation games, social deduction games, etc. +Or: +```json +{"action": "WALK"} +``` + +### Common Mistakes to Avoid -#### Reproducibility -To ensure reproducibility, your agents (including their tools and environments) must join each assessment with a fresh state. +From our analysis, these are the five key mistakes that LLM negotiators make: -### Example -To make things concrete, we will use a debate scenario as our toy example: -- Green agent (`DebateJudge`) orchestrates a debate between two agents by using an A2A client to alternate turns between participants. Each participant's response is forwarded to the caller as a task update. After the orchestration, it applies an LLM-as-Judge technique to evaluate which debater performed better and finally produces an artifact with the results. -- Two purple agents (`Debater`) participate by presenting arguments for their side of the topic. +1. **M1**: Making an offer worse than your previous offer +2. **M2**: Making an offer worse for you than your BATNA +3. **M3**: Offering no items or all items (extreme divisions) +4. **M4**: Accepting an offer worse than your BATNA +5. **M5**: Walking away from an offer better than your BATNA -To run this example, we start all three servers and then use an A2A client to send an `assessment_request` to the green agent and observe its outputs. -The full example code is given in the template repository. Follow the quickstart guide to setup the project and run the example. +--- +## Local Development -### Evaluate Your Agent on the Platform -To run assessments on your agent on the platform, you'll need a public address for your agent service. We recommend using [Cloudflare Tunnel](https://developers.cloudflare.com/cloudflare-one/connections/connect-networks/) for quick onboarding without bandwidth limits, but you are welcome to use nginx or ngrok if you prefer. +### Running the Green Agent Server -1. Install Cloudflare Tunnel ```bash -brew install cloudflared # macOS +# Start the A2A server +PYTHONPATH=scenarios/bargaining/open_spiel:$PYTHONPATH \ + uv run python -m scenarios.bargaining.bargaining_green serve \ + --host 0.0.0.0 \ + --port 8080 + +# In another terminal, send an assessment request +curl -X POST http://localhost:8080/a2a \ + -H "Content-Type: application/json" \ + -d '{"type": "assessment_request", "participants": {...}, "config": {...}}' ``` -2. Start the Cloudflare tunnel pointing to your local server + +### Running a Single Assessment + ```bash -cloudflared tunnel --url http://127.0.0.1:9019 +PYTHONPATH=scenarios/bargaining/open_spiel:$PYTHONPATH \ + uv run python -m scenarios.bargaining.bargaining_green once \ + --config '{"challenger_url": "https://...", "games": 10}' ``` -The tunnel will output a public URL (e.g., `https://abc-123.trycloudflare.com`). Copy this URL. -3. Start your A2A server with the `--card-url` flag using the URL from step 2 +### Docker Build + ```bash -python scenarios/debate/debater.py --host 127.0.0.1 --port 9019 --card-url https://abc-123.trycloudflare.com +# Build locally +docker build -t bargaining-green-agent . + +# Run locally +docker run -p 8080:8080 bargaining-green-agent +``` + +--- + +## Project Structure + +``` +scenarios/bargaining/ +├── bargaining_green.py # Main green agent implementation +├── bargaining_env/ +│ ├── agents/ # Baseline negotiation agents +│ │ ├── soft.py # Always-accept agent +│ │ ├── tough.py # Minimal-offer agent +│ │ ├── aspiration.py # Concession-schedule agent +│ │ ├── walk.py # BATNA-preferring agent +│ │ ├── nfsp.py # Neural Fictitious Self-Play +│ │ └── rnad.py # Regularized Nash Dynamics +│ ├── pyspiel_integration.py # Game parameter builder +│ ├── pyspiel_runner.py # OpenSpiel game interface +│ ├── mene_solver.py # MENE computation via MILP +│ └── run_entire_matrix.py # Matrix simulation orchestrator +├── rl_agent_checkpoints/ # Pre-trained RL policies +│ ├── nfsp/ # NFSP checkpoints (bg4, bg5, bg6) +│ └── rnad/ # RNAD checkpoints (bg4, bg5, bg6) +└── open_spiel/ # Custom OpenSpiel with negotiation game ``` -The agent card will now contain the correct public URL when communicating with -other agents. -4. Register your agent on agentbeats.org with this public URL. -5. Run an assessment as described [earlier](#run-an-assessment) +--- -Note: Restarting the tunnel generates a new URL, so you'll need to restart your -agent with the new `--card-url` and update the URL in the web UI. You may -consider using a [Named Tunnel](https://developers.cloudflare.com/learning-paths/clientless-access/connect-private-applications/create-tunnel/) -for a persistent URL. +## Technical Details +### OpenSpiel Integration -## Best Practices 💡 +This repository includes a custom OpenSpiel build with the negotiation/bargaining game. -Developing robust and efficient agents requires more than just writing code. Here are some best practices to follow when building for the AgentBeats platform, covering security, performance, and reproducibility. +**Important**: The pre-compiled `pyspiel.so` in `scenarios/bargaining/open_spiel/` is built for **Python 3.11**. The project is configured to use Python 3.11 via `.python-version`. -### API Keys and Cost Management +The Docker build compiles OpenSpiel from source with: -AgentBeats uses a Bring-Your-Own-Key (BYOK) model. This gives you maximum flexibility to use any LLM provider, but also means you are responsible for securing your keys and managing costs. +- Abseil C++ library +- pybind11 Python bindings +- Double Dummy Solver (for bridge, included in full build) -- **Security**: You provide your API keys directly to the agents running on your own infrastructure. Never expose your keys in client-side code or commit them to public repositories. Use environment variables (like in the tutorial's `.env` file) to manage them securely. +**Loading the Game Correctly** + +Always use `build_negotiation_params()` from `pyspiel_integration.py` to ensure correct game loading: + +```python +from scenarios.bargaining.bargaining_env.pyspiel_integration import ( + build_negotiation_params, + try_load_pyspiel_game +) + +params = build_negotiation_params( + discount=0.98, + max_rounds=3, + num_items=3, + item_quantities=(7, 4, 1), + min_value=1, + max_value=100, + max_quantity=10, +) +game = try_load_pyspiel_game(params) +``` -- **Cost Control**: If you publish a public agent, it could become popular unexpectedly. To prevent surprise bills, it's crucial to set spending limits and alerts on your API keys or cloud account. For example, if you're only using an API for a single agent on AgentBeats, a limit of $10 with an alert at $5 might be a safe starting point. +> **Note**: The `item_quantities` parameter must use comma-separated values internally (e.g., `"7,4,1"`). The helper function handles this automatically. -#### Getting Started with Low Costs -If you are just getting started and want to minimize costs, many services offer generous free tiers. -- **Google Gemini**: Often has a substantial free tier for API access. -- **OpenRouter**: Provides free credits upon signup and can route requests to many different models, including free ones. -- **Local LLMs**: If you run agents on your own hardware, you can use a local LLM provider like [Ollama](https://ollama.com/) to avoid API costs entirely. +### MENE Solver -#### Provider-Specific Guides -- **OpenAI**: - - Finding your key: [Where do I find my OpenAI API key?](https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key) - - Setting limits: [Usage limits](https://platform.openai.com/settings/organization/limits) +The Maximum Entropy Nash Equilibrium is computed using: -- **Anthropic (Claude)**: - - Getting started: [API Guide](https://docs.anthropic.com/claude/reference/getting-started-with-the-api) - - Setting limits: [Spending limits](https://console.anthropic.com/settings/limits) +- CVXPY for convex optimization +- ECOS_BB or GLPK_MI as MILP solvers +- Bootstrap resampling for robustness (following Wiedenbeck et al., 2014) -- **Google Gemini**: - - Finding your key: [Get an API key](https://ai.google.dev/gemini-api/docs/api-key) - - Setting limits requires using Google Cloud's billing and budget features. Be sure to set up [billing alerts](https://cloud.google.com/billing/docs/how-to/budgets). +### RL Agent Checkpoints -- **OpenRouter**: - - Request a key from your profile page under "Keys". - - You can set a spending limit directly in the key creation flow. This limit aggregates spend across all models accessed via that key. +Pre-trained checkpoints are available for both NFSP and RNAD agents: +| Agent | BG4 | BG5 | BG6 | +|-------|-----|-----|-----| +| NFSP | `nfsp_bg4.pt` | `nfsp_ng5.pt` | `nfsp_bg6.pt` | +| RNAD | `rnad_bg4.pkl` | `rnad_bg5.pkl` | `rnad_bg6.pkl` | -### Efficient & Reliable Assessments +The checkpoints are automatically selected based on the game configuration (discount and max_rounds). -#### Communication -Agents in an assessment often run on different machines across the world. They communicate over the internet, which introduces latency. +--- -- **Minimize Chattiness**: Design interactions to be meaningful and infrequent. Avoid back-and-forth for trivial information. -- **Set Timeouts**: A single unresponsive agent can stall an entire assessment. Your A2A SDK may handle timeouts, but it's good practice to be aware of them and configure them appropriately. -- **Compute Close to Data**: If an agent needs to process a large dataset or file, it should download that resource and process it locally, rather than streaming it piece by piece through another agent. +## References -#### Division of Responsibilities -The green and purple agents have distinct roles. Adhering to this separation is key for efficient and scalable assessments, especially over a network. +1. **Smithline, G., Mascioli, C., Chakraborty, M., & Wellman, M. P.** (2025). "Measuring Competition and Cooperation in LLM Bargaining: An Empirical Meta-Game Analysis." University of Michigan. -- **Green agent**: A lightweight verifier or orchestrator. Its main job is to set up the scenario, provide context to purple agents, and evaluate the final result. It should not perform heavy computation. -- **Purple agent**: The workhorse. It performs the core task, which may involve complex computation, running tools, or long-running processes. +2. **Li, Z., & Wellman, M. P.** (2024). "A Meta-Game Evaluation Framework for Deep Multiagent Reinforcement Learning." IJCAI. -Here's an example for a security benchmark: -1. The **green agent** defines a task (e.g., "find a vulnerability in this codebase") and sends the repository URL to the purple agent. -2. The **purple agent** clones the code, runs its static analysis tools, fuzzers, and other agentic processes. This could take a long time and consume significant resources. -3. Once it finds a vulnerability, the **purple agent** sends back a concise report: the steps to reproduce the bug and a proposed patch. -4. The **green agent** receives this small payload, runs the reproduction steps, and verifies the result. This final verification step is quick and lightweight. +3. **Wellman, M. P., Tuyls, K., & Greenwald, A.** (2025). "Empirical Game-Theoretic Analysis: A Survey." JAIR. -This structure keeps communication overhead low and makes the assessment efficient. +4. **Lewis, M., et al.** (2017). "Deal or No Deal? End-to-End Learning for Negotiation Dialogues." EMNLP. -### Taking Advantage of Platform Features -AgentBeats is more than just a runner; it's an observability platform. You can make your agent's "thought process" visible to the community and to evaluators. +5. **Lanctot, M., et al.** (2019). "OpenSpiel: A Framework for Reinforcement Learning in Games." arXiv:1908.09453. -- **Emit Traces**: As your agent works through a problem, use A2A `task update` messages to report its progress, current strategy, or intermediate findings. These updates appear in real-time in the web UI and in the console during local development. -- **Generate Artifacts**: When your agent produces a meaningful output (like a piece of code, a report, or a log file), save it as an A2A `artifact`. Artifacts are stored with the assessment results and can be examined by anyone viewing the battle. +--- -Rich traces and artifacts are invaluable for debugging, understanding agent behavior, and enabling more sophisticated, automated "meta-evaluations" of agent strategies. +## License -### Assessment Isolation and Reproducibility -For benchmarks to be fair and meaningful, every assessment run must be independent and reproducible. +Apache 2.0 -- **Start Fresh**: Each agent should start every assessment from a clean, stateless initial state. Avoid carrying over memory, files, or context from previous battles. -- **Isolate Contexts**: The A2A protocol provides a `task_id` for each assessment. Use this ID to namespace any local resources your agent might create, such as temporary files or database entries. This prevents collisions between concurrent assessments. -- **Reset State**: If your agent maintains a long-running state, ensure you have a mechanism to reset it completely between assessments. +--- -Following these principles ensures that your agent's performance is measured based on its capability for the task at hand, not on leftover state from a previous run. +## Related Repositories +| Repository | Description | +|------------|-------------| +| [meta-game-leaderboard](https://github.com/gsmithline/meta-game-leaderboard) | Leaderboard for submitting and comparing agents | +| [llm-negotiator-purple](https://github.com/gsmithline/llm-negotiator-purple) | Example Claude-powered purple agent | -## Next Steps -Now that you’ve completed the tutorial, you’re ready to take the next step with Agentbeats. +## AgentBeats Competition -- 📊 **Develop new assessments** → Build a green agent along with baseline purple agents. Share your GitHub repo with us and we'll help with hosting and onboarding to the platform. -- 🏆 **Evaluate your agents** → Create and test agents against existing benchmarks to climb the leaderboards. -- 🌐 **Join the community** → Connect with researchers, builders, and enthusiasts to exchange ideas, share results, and collaborate on new evaluations. +This is a submission for the **AgentBeats x AgentX Competition 2025**. -The more agents and assessments are shared, the richer and more useful the platform becomes. We’re excited to see what you create! +- **Agent Type**: Green (Evaluator) +- **Domain**: Multi-agent negotiation / bargaining +- **Methodology**: Empirical Meta-Game Analysis with MENE +- **Docker Image**: `ghcr.io/gsmithline/tutorial-agent-beats-comp:latest` +- **Python Version**: 3.11 (required) +- **Authors**: Based on research from the University of Michigan Strategic Reasoning Group diff --git a/SUBMISSION.md b/SUBMISSION.md new file mode 100644 index 0000000..f6b2363 --- /dev/null +++ b/SUBMISSION.md @@ -0,0 +1,269 @@ +# AgentBeats Competition Submission + +## Meta-Game Bargaining Evaluator + +**Submission Type**: Green Agent (Evaluator) +**Domain**: Multi-Agent Negotiation +**Docker Image**: `ghcr.io/gsmithline/tutorial-agent-beats-comp:latest` + +--- + +## Abstract + +We present a **green agent** for the AgentBeats platform that implements an Empirical Game-Theoretic Analysis (EGTA) framework for evaluating negotiation agents. Rather than testing agents against fixed opponents, our evaluator computes the **Maximum Entropy Nash Equilibrium (MENE)** of the meta-game to measure how well-adapted each agent is to strategic competition. + +The framework is based on computational game theory methodology developed by Zun Li and Michael Wellman, enabling rigorous, reproducible evaluation of agent strategies. + +--- + +## Competition Requirements Compliance + +### Green Agent Requirements + +| Requirement | Implementation | +|-------------|----------------| +| A2A Protocol | Full compliance via `a2a-sdk` | +| Assessment Handling | `assessment_request` → simulation → artifact | +| Docker Container | Multi-stage build, GHCR published | +| Reproducibility | Stateless, fresh state per assessment | +| Result Artifacts | JSON with per-agent metrics | + +### Registration + +1. **Docker Image**: Published to GitHub Container Registry + ``` + ghcr.io/gsmithline/tutorial-agent-beats-comp:latest + ``` + +2. **Deployment**: Google Cloud Run compatible + ```bash + gcloud run deploy bargaining-green-agent \ + --image ghcr.io/gsmithline/tutorial-agent-beats-comp:latest \ + --region=us-central1 \ + --allow-unauthenticated \ + --memory=4Gi + ``` + +3. **Platform Registration**: Register the Cloud Run URL at agentbeats.dev + +--- + +## How the Evaluation Works + +### Assessment Flow + +``` +1. Green agent receives assessment_request with: + - participants.challenger = purple agent URL + - config = evaluation parameters + +2. Green agent builds agent roster: + - Challenger (remote purple agent) + - Baseline agents: soft, tough, aspiration, walk + - RL agents: nfsp, rnad (when available) + +3. Pairwise simulation: + - For each (agent_i, agent_j) pair + - Run N games in OpenSpiel negotiation environment + - Record payoffs and outcomes + +4. Meta-game analysis: + - Construct payoff matrix where M[i][j] = agent i's average payoff against agent j + - Solve for MENE via MILP + - Compute regret and welfare metrics + +5. Return artifact with results +``` + +### The Negotiation Environment + +Based on OpenSpiel's negotiation game: + +- **Items**: 3 types with quantities [7, 4, 1] +- **Valuations**: Private, drawn uniformly from [1, 100] +- **BATNAs**: Private outside options +- **Discount**: Configurable (0.9 or 0.98 per round) +- **Rounds**: Configurable (3 or 5 maximum) + +Each game is a multi-round alternating-offer protocol where agents propose item divisions until one accepts or the deadline hits. + +### Game Configurations + +| Config | Discount (γ) | Rounds (R) | Use Case | +|--------|--------------|------------|----------| +| BG4 | 0.9 | 3 | High time pressure | +| BG5 | 0.98 | 3 | Low time pressure, short | +| BG6 | 0.98 | 5 | Low time pressure, long | + +Pre-trained NFSP and RNAD checkpoints are included for all configurations. + +### Baseline Agent Pool + +| Agent | Strategy | +|-------|----------| +| `soft` | Always accepts any offer | +| `tough` | Proposes minimal offers, rarely concedes | +| `aspiration` | Gradually lowers aspirations over rounds | +| `walk` | Takes BATNA if offers don't improve quickly | +| `nfsp` | Neural Fictitious Self-Play (learned) | +| `rnad` | Regularized Nash Dynamics (learned) | + +--- + +## Evaluation Metrics + +### Primary Metric: MENE Regret + +The regret measures the deviation incentive for each pure strategy: + +``` +Regret(π) = max(0, u(π, σ*) - u(σ*)) +``` + +Where: +- u(π, σ*) = expected payoff for pure strategy π against the MENE mixture +- u(σ*) = expected payoff at equilibrium + +**Interpretation**: Lower regret = better adapted to strategic competition. Zero regret means the strategy is in the equilibrium support or weakly dominated. + +### Welfare Metrics + +| Metric | Formula | Interpretation | +|--------|---------|----------------| +| **UW** | p₁ + p₂ | Total value created | +| **NW** | √(p₁ × p₂) | Balanced value (Pareto-fair) | +| **NWA** | √(max(0,p₁-b₁) × max(0,p₂-b₂)) | Surplus over BATNAs | +| **EF1** | Boolean | Envy-free up to one item | + +All metrics are normalized as percentages of calibration constants. + +### Result Format + +Results include bootstrap means with standard errors (following the paper's format: mean±SE): + +```json +{ + "summary": { + "num_agents": 5, + "mene_regret_mean": 2.34, + "uw_percent_mean": 87.2, + "nw_percent_mean": 82.1, + "nwa_percent_mean": 45.3, + "ef1_percent_mean": 91.5 + }, + "per_agent": [ + { + "agent_name": "challenger", + "mene_regret": 1.2, + "mene_regret_se": 0.15, + "uw_percent": 89.1, + "uw_percent_se": 0.08, + "nw_percent": 85.3, + "nw_percent_se": 0.12, + "nwa_percent": 52.1, + "nwa_percent_se": 0.18, + "ef1_percent": 94.2, + "ef1_percent_se": 0.25 + } + ], + "mene_distribution": { + "challenger": 0.25, + "soft": 0.15, + "tough": 0.20, + "aspiration": 0.25, + "walk": 0.15 + } +} +``` + +**Note**: Standard errors are computed via bootstrap resampling (default 100 iterations), following the methodology from [Wiedenbeck et al., 2014]. + +--- + +## Configuration Options + +### Assessment Request + +```json +{ + "participants": { + "challenger": "https://your-purple-agent.example.com" + }, + "config": { + "games": 50, + "max_rounds": 5, + "discount": 0.98, + "bootstrap": 100, + "challenger_circle": 5, + "challenger_label": "my_agent_v1", + "remote_agents": { + "agent_b": "https://another-agent.example.com" + } + } +} +``` + +### Parameter Reference + +| Parameter | Default | Range | Description | +|-----------|---------|-------|-------------| +| `games` | 50 | 10-200 | Games per pair | +| `max_rounds` | 5 | 3-10 | Negotiation rounds | +| `discount` | 0.98 | 0.9-1.0 | Time pressure | +| `bootstrap` | 100 | 50-500 | MENE bootstrap iterations | +| `challenger_circle` | 0 | 0-6 | LLM prompt level | + +--- + +## LLM Agent Support + +For LLM-based purple agents, the green agent provides structured prompts via "circles": + +| Circle | Content | +|--------|---------| +| 0 | Basic game rules | +| 1 | + Valuations and actions | +| 2 | + BATNA emphasis | +| 3 | + Step-by-step reasoning | +| 4 | + Common mistake list | +| 5 | + Error prevention examples | +| 6 | + Advanced strategic guidance | + +Set `challenger_circle` to inject these prompts into observations sent to your agent. + +--- + +## Dependencies + +- **Python 3.11**: Required (OpenSpiel binary is compiled for Python 3.11) +- **OpenSpiel**: Custom build with negotiation game (bundled) +- **CVXPY + ECOS**: MENE computation via MILP +- **A2A SDK**: Agent communication protocol +- **PyTorch**: For NFSP agent checkpoints +- **JAX/Haiku**: For RNAD agent checkpoints + +### RL Checkpoints + +Pre-trained checkpoints are included in `scenarios/bargaining/rl_agent_checkpoints/`: + +| Agent | Format | Configs | +|-------|--------|---------| +| NFSP | PyTorch (`.pt`) | BG4, BG5, BG6 | +| RNAD | Pickle (`.pkl`) | BG4, BG5, BG6 | + +--- + +## References + +1. Li, Z., & Wellman, M. P. (2023). "Empirical Game-Theoretic Analysis of Adaptive Bargaining Strategies" +2. Wellman, M. P. (2016). "Putting the agent in agent-based modeling." Autonomous Agents and Multi-Agent Systems. +3. Lewis, M., et al. (2017). "Deal or No Deal? End-to-End Learning for Negotiation Dialogues." EMNLP. +4. Lanctot, M., et al. (2019). "OpenSpiel: A Framework for Reinforcement Learning in Games." + +--- + +## Authors + +Submission for the AgentBeats x AgentX Competition 2025. + +**Repository**: https://github.com/gsmithline/tutorial-agent-beats-comp diff --git a/pyproject.toml b/pyproject.toml index 7668f2d..fd1edda 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "agentbeats-tutorial" version = "0.1.0" description = "Agentbeats Tutorial" readme = "README.md" -requires-python = ">=3.11" +requires-python = ">=3.11,<3.12" dependencies = [ "a2a-sdk>=0.3.5", "google-adk>=1.14.1", @@ -15,6 +15,18 @@ dependencies = [ "pydantic>=2.11.9", "python-dotenv>=1.1.1", "uvicorn>=0.35.0", + "numpy>=1.26.0", + "cvxpy>=1.4.0", + "ecos>=2.0.0", + # RL dependencies for NFSP/RNAD agents + "torch>=2.0.0", + "jax>=0.4.20", + "jaxlib>=0.4.20", + "dm-haiku>=0.0.12", + "optax>=0.1.7", + "chex>=0.1.8", + "rlax>=0.1.6", + "ml-collections>=0.1.1", ] [project.scripts] @@ -22,7 +34,9 @@ agentbeats-run = "agentbeats.run_scenario:main" [tool.uv] package = true -dev-dependencies = [ + +[dependency-groups] +dev = [ "mypy>=1.18.1", ] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6dee423 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,36 @@ +# Requirements for AgentBeats Green Agent +# Python 3.11 is required (the OpenSpiel binary is compiled for Python 3.11) + +# Core AgentBeats and A2A dependencies +a2a-sdk>=0.3.5 +google-adk>=1.14.1 +google-genai>=1.36.0 +pydantic>=2.11.9 +python-dotenv>=1.1.1 +uvicorn>=0.35.0 + +# Bargaining environment dependencies +numpy>=1.26.0 +cvxpy>=1.4.0 +ecos>=2.0.0 # MILP solver for MENE computation (includes ECOS_BB) +cvxopt>=1.3.0 # Provides GLPK_MI solver interface for CVXPY + +# RL dependencies (NFSP/RNAD wrappers) +# PyTorch CPU wheel via PyTorch index for NFSP +--extra-index-url https://download.pytorch.org/whl/cpu +torch==2.3.1 + +# JAX/Haiku for RNAD agent (uses pre-trained RNaDSolver checkpoints) +jax>=0.4.20 +jaxlib>=0.4.20 +dm-haiku>=0.0.12 +optax>=0.1.7 +chex>=0.1.8 +rlax>=0.1.6 + +# OpenSpiel transitive dependency (required for full RNAD policy) +ml-collections>=0.1.1 + +# Note: The agentbeats package is included as a local package in src/agentbeats/ +# Python buildpacks will automatically include it when deploying from the repo root. + diff --git a/rnad.py b/rnad.py new file mode 100644 index 0000000..513ddfe --- /dev/null +++ b/rnad.py @@ -0,0 +1,122 @@ +""" +Compatibility shim for RNAD checkpoints. + +The provided RNAD pickles reference the module name `rnad` and expect classes +like `RNaDSolver`, `RNaDConfig`, `StateRepresentation`, `NerdConfig`, etc. + +This module attempts to import the full RNAD implementation from the checkpoints +directory. If that fails (e.g., missing JAX/Haiku dependencies), it falls back +to stub classes that allow unpickling but use a uniform random policy. +""" + +from __future__ import annotations + +import logging +from typing import Any, Dict + +logger = logging.getLogger(__name__) + +# Try to import the full RNAD module from the checkpoints directory +_FULL_RNAD_AVAILABLE = False +try: + from scenarios.bargaining.rl_agent_checkpoints.rnad.rnad import ( + EntropySchedule, + FineTuning, + AdamConfig, + NerdConfig, + StateRepresentation, + RNaDConfig, + EnvStep, + ActorStep, + TimeStep, + RNaDSolver, + ) + _FULL_RNAD_AVAILABLE = True + logger.debug("Full RNAD module loaded from checkpoints directory") +except ImportError as e: + logger.warning(f"Full RNAD module not available ({e}); using fallback stubs") + + +# Fallback stub classes if full module is not available +if not _FULL_RNAD_AVAILABLE: + + class EntropySchedule: + """Stub for EntropySchedule.""" + def __init__(self, *args: Any, **kwargs: Any) -> None: + self.__dict__.update(kwargs) + + class FineTuning: + """Stub for FineTuning.""" + def __init__(self, *args: Any, **kwargs: Any) -> None: + self.__dict__.update(kwargs) + + class AdamConfig: + """Stub for AdamConfig.""" + def __init__(self, **kwargs: Any) -> None: + self.__dict__.update(kwargs) + + class NerdConfig: + """Stub for NerdConfig.""" + beta: float = 2.0 + clip: float = 10_000 + + def __init__(self, **kwargs: Any) -> None: + self.__dict__.update(kwargs) + + class StateRepresentation: + """Stub for StateRepresentation enum.""" + INFO_SET = "info_set" + OBSERVATION = "observation" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + if args: + self.value = args[0] + self.__dict__.update(kwargs) + + class RNaDConfig: + """Stub for RNaDConfig.""" + def __init__(self, **kwargs: Any) -> None: + self.__dict__.update(kwargs) + + class EnvStep: + """Stub for EnvStep.""" + def __init__(self, *args: Any, **kwargs: Any) -> None: + self.__dict__.update(kwargs) + + class ActorStep: + """Stub for ActorStep.""" + def __init__(self, *args: Any, **kwargs: Any) -> None: + self.__dict__.update(kwargs) + + class TimeStep: + """Stub for TimeStep.""" + def __init__(self, *args: Any, **kwargs: Any) -> None: + self.__dict__.update(kwargs) + + class RNaDSolver: + """Stub RNaDSolver that uses uniform random policy as fallback.""" + def __init__(self, *args: Any, **kwargs: Any) -> None: + self._initialized = True + self._fallback_warned = False + self.__dict__.update(kwargs) + + def __setstate__(self, state: Dict[str, Any]) -> None: + self.__dict__.update(state) + self._fallback_warned = False + + def action_probabilities(self, state) -> Dict[int, float]: + """Fallback policy: uniform over legal actions.""" + try: + legal = list(state.legal_actions()) + except Exception: + legal = [] + if not legal: + return {} + if not self._fallback_warned: + logger.warning( + "RNAD fallback policy in use (uniform over legal actions); " + "original rnad module not available." + ) + self._fallback_warned = True + p = 1.0 / len(legal) + return {int(a): p for a in legal} diff --git a/scenario.toml b/scenario.toml new file mode 100644 index 0000000..80a2436 --- /dev/null +++ b/scenario.toml @@ -0,0 +1,24 @@ +# AgentBeats Leaderboard Configuration for Meta-Game Negotiation Assessor +# This file configures assessments for the bargaining green agent + +[green_agent] +# Use Docker image for cloud runs, or local endpoint for testing +image = "ghcr.io/gsmithline/tutorial-agent-beats-comp:latest" +# agentbeats_id = "" # Fill in your agent ID from the AgentBeats registration page + +# Baseline evaluation (no external purple agent required) +# The green agent evaluates its built-in baseline agents: +# soft, tough, aspiration, walk, nfsp, rnad + +[config] +games = 50 +max_rounds = 5 +discount = 0.98 +bootstrap = 100 +full_matrix = true + +# Optional: Add external purple agents to evaluate +# [[participants]] +# agentbeats_id = "" +# name = "challenger" +# env = { OPENAI_API_KEY = "${OPENAI_API_KEY}" } diff --git a/scenarios/__init__.py b/scenarios/__init__.py new file mode 100644 index 0000000..279e7db --- /dev/null +++ b/scenarios/__init__.py @@ -0,0 +1 @@ +# Scenarios package diff --git a/scenarios/bargaining/__init__.py b/scenarios/bargaining/__init__.py new file mode 100644 index 0000000..6850502 --- /dev/null +++ b/scenarios/bargaining/__init__.py @@ -0,0 +1 @@ +# Bargaining scenario package diff --git a/scenarios/bargaining/bargaining_env/__init__.py b/scenarios/bargaining/bargaining_env/__init__.py new file mode 100644 index 0000000..5b51bd3 --- /dev/null +++ b/scenarios/bargaining/bargaining_env/__init__.py @@ -0,0 +1,7 @@ +"""Lightweight bargaining pipeline for the BGS (small) game. + +This module provides a minimal simulation and analysis pipeline used by +the BargainingGreenAgent. It does NOT depend on external libraries. +""" + + diff --git a/scenarios/bargaining/bargaining_env/agents/__init__.py b/scenarios/bargaining/bargaining_env/agents/__init__.py new file mode 100644 index 0000000..c69cafc --- /dev/null +++ b/scenarios/bargaining/bargaining_env/agents/__init__.py @@ -0,0 +1,22 @@ +from .base import BaseNegotiator +from .soft import SoftNegotiator +from .tough import ToughNegotiator +from .aspiration import AspirationNegotiator +from .llm_agent import LLMAgent, LLMSpec +from .nfsp import NFSPAgentWrapper +from .rnad import RNaDAgentWrapper +from .remote import RemoteNegotiator + +__all__ = [ + "BaseNegotiator", + "SoftNegotiator", + "ToughNegotiator", + "AspirationNegotiator", + "LLMAgent", + "LLMSpec", + "NFSPAgentWrapper", + "RNaDAgentWrapper", + "RemoteNegotiator", +] + + diff --git a/scenarios/bargaining/bargaining_env/agents/aspiration.py b/scenarios/bargaining/bargaining_env/agents/aspiration.py new file mode 100644 index 0000000..3702cc7 --- /dev/null +++ b/scenarios/bargaining/bargaining_env/agents/aspiration.py @@ -0,0 +1,47 @@ +from __future__ import annotations +from typing import List, Tuple + +from .base import BaseNegotiator + + +class AspirationNegotiator(BaseNegotiator): + """ + Lightweight aspiration-based negotiator: + - As proposer: keeps enough items to reach ~85% of total self value, gives the rest + - As responder: accepts if offer meets BATNA or is within 5% of a plausible counter + """ + + def __init__(self, keep_fraction: float = 0.85, accept_slack: float = 0.05): + self.keep_fraction = float(max(0.0, min(1.0, keep_fraction))) + self.accept_slack = float(max(0.0, accept_slack)) + + def propose(self, quantities: Tuple[int, int, int], role: str, v_self: List[int], v_opp: List[int]) -> Tuple[List[int], List[int]]: + total_value = v_self[0] * quantities[0] + v_self[1] * quantities[1] + v_self[2] * quantities[2] + target_value = self.keep_fraction * total_value + + # Greedy keep by value density + idxs = sorted(range(3), key=lambda i: (-v_self[i], i)) + keep = [0, 0, 0] + acc = 0.0 + for i in idxs: + if quantities[i] <= 0 or v_self[i] <= 0: + continue + if acc >= target_value: + break + # keep as many as needed up to available + need = int(max(0, (target_value - acc) // max(1, v_self[i]))) + need = min(need, quantities[i]) + if need == 0 and acc < target_value: + need = min(1, quantities[i]) + keep[i] = need + acc += need * v_self[i] + + a_self = keep + a_opp = [quantities[i] - a_self[i] for i in range(3)] + return a_self, a_opp + + def accepts(self, offer_value: int, batna_value: int, counter_value: int) -> bool: + threshold = max(batna_value, int(counter_value * (1.0 - self.accept_slack))) + return offer_value >= threshold + + diff --git a/scenarios/bargaining/bargaining_env/agents/base.py b/scenarios/bargaining/bargaining_env/agents/base.py new file mode 100644 index 0000000..b5264ca --- /dev/null +++ b/scenarios/bargaining/bargaining_env/agents/base.py @@ -0,0 +1,26 @@ +from __future__ import annotations +from typing import List, Tuple + + +class BaseNegotiator: + """ + Base interface for simplified bargaining negotiators used by the lightweight simulator. + """ + + def propose(self, quantities: Tuple[int, int, int], role: str, v_self: List[int], v_opp: List[int]) -> Tuple[List[int], List[int]]: + """ + Return a proposed allocation (a_self, a_opp) such that element-wise sums equal quantities. + role is 'row' or 'col' indicating proposer. + """ + raise NotImplementedError + + def accepts(self, offer_value: int, batna_value: int, counter_value: int) -> bool: + """ + Decide whether to accept an offer given: + - offer_value: realized value from current offer for self + - batna_value: BATNA value for self + - counter_value: realized value the agent expects from its own counterproposal this round + """ + raise NotImplementedError + + diff --git a/scenarios/bargaining/bargaining_env/agents/llm_agent.py b/scenarios/bargaining/bargaining_env/agents/llm_agent.py new file mode 100644 index 0000000..64ac958 --- /dev/null +++ b/scenarios/bargaining/bargaining_env/agents/llm_agent.py @@ -0,0 +1,372 @@ +import argparse +import contextlib +import importlib.util +import json +import os +from typing import Any, Callable, Optional +from dataclasses import dataclass +from urllib.parse import urljoin + +from dotenv import load_dotenv +load_dotenv() + +from google import genai +from a2a.server.agent_execution import AgentExecutor, RequestContext +from a2a.server.events import EventQueue +from a2a.server.tasks import TaskUpdater, InMemoryTaskStore +from a2a.types import ( + TaskState, + Part, + TextPart, + Task, + UnsupportedOperationError, + AgentCard, + AgentCapabilities, + AgentSkill, +) +from a2a.utils import ( + new_agent_text_message, + new_task, +) +from a2a.utils.errors import ServerError +from a2a.server.apps import A2AStarletteApplication +from a2a.server.request_handlers import DefaultRequestHandler +import uvicorn + +from scenarios.bargaining.bargaining_env.reasoning_trace import ReasoningTracer + +def minimal_agent_card(name: str, url: str) -> AgentCard: + skill = AgentSkill( + id="bargaining_llm", + name="Bargaining negotiation", + description="Negotiate divisions of items via text", + tags=["bargaining", "negotiation"], + examples=[], + ) + return AgentCard( + name=name, + version="0.1.0", + description="LLM Agent for bargaining decisions", + url=url, + preferred_transport="JSONRPC", + protocol_version="0.3.0", + default_input_modes=["text"], + default_output_modes=["text"], + capabilities=AgentCapabilities(streaming=True), + skills=[skill], + ) + + +def load_custom_decider(module_path: Optional[str]) -> Optional[Callable[[str, Optional[list[str]]], str]]: + if not module_path: + return None + spec = importlib.util.spec_from_file_location("custom_agent", module_path) + if spec is None or spec.loader is None: + return None + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) # type: ignore + fn = getattr(mod, "decide", None) + if callable(fn): + return fn # type: ignore + return None + + +class LLMAgent(AgentExecutor): + def __init__( + self, + model: str, + system_prompt: Optional[str], + custom_decider: Optional[Callable[[str, Optional[list[str]]], str]] = None, + trace_dir: Optional[str] = None, + agent_name: str = "BargainingLLM", + # Provider config + provider: str = os.environ.get("LLM_AGENT_PROVIDER", "gemini"), + api_key: Optional[str] = None, + base_url: Optional[str] = None, + headers: Optional[dict[str, str]] = None, + temperature: float = 0.0, + top_p: Optional[float] = None, + timeout: int = 60, + ): + self._provider = (provider or "gemini").lower() + self._model = model + self._system_prompt = system_prompt + self._custom_decider = custom_decider + self._tracer = ReasoningTracer(base_dir=trace_dir) + self._agent_name = agent_name + self._api_key = api_key or os.environ.get("LLM_API_KEY") or os.environ.get("OPENAI_API_KEY") or os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("GOOGLE_API_KEY") + self._base_url = base_url + self._headers = headers or {} + self._temperature = float(temperature) if temperature is not None else None + self._top_p = float(top_p) if top_p is not None else None + self._timeout = int(timeout) + # Lazy clients (created on first use) + self._gemini_client = None + self._openai_client = None + self._anthropic_client = None + + async def execute(self, context: RequestContext, event_queue: EventQueue) -> None: + msg = context.message + if not msg: + raise ServerError("Missing message") + task = new_task(msg) + await event_queue.enqueue_event(task) + updater = TaskUpdater(event_queue, task.id, task.context_id) + + await updater.update_status(TaskState.working, new_agent_text_message("LLM agent received request.", context_id=context.context_id)) + request_text = context.get_user_input() + + # Expected input JSON: + # { "prompt": "...", "options": ["...", "..."] } # options optional + try: + body = json.loads(request_text) + except Exception: + body = {"prompt": request_text} + + prompt = str(body.get("prompt", "")) + options = body.get("options", None) + if options is not None and not isinstance(options, list): + options = None + meta = body.get("meta", {}) if isinstance(body.get("meta", {}), dict) else {} + pair = meta.get("pair") + game_idx = meta.get("game") + round_idx = meta.get("round") + role = meta.get("role") + + raw_text = "" + if self._custom_decider is not None: + try: + choice = self._custom_decider(prompt, options) # returns string (either freeform or chosen option) + except Exception as e: + choice = f"[custom_decider_error] {e}" + else: + if options: + # Ask the model to pick one option index; capture raw model text + choice, raw_text = self._choose_from_options(prompt, options) + else: + choice, raw_text = self._freeform(prompt) + + # Log reasoning trace (prompt, options, raw model output, final decision) + try: + self._tracer.log( + agent=self._agent_name, + pair=pair, + game=game_idx, + round_index=round_idx, + role=role, + prompt=prompt, + options=options, + raw_response=raw_text if raw_text else str(choice), + decision=str(choice), + extra_meta={k: v for k, v in meta.items() if k not in {"pair", "game", "round", "role"}}, + ) + except Exception: + # Tracing should not interfere with decisions + pass + + await updater.update_status(TaskState.completed, new_agent_text_message("Decision complete.", context_id=context.context_id)) + await updater.add_artifact(parts=[Part(root=TextPart(text=str(choice)))], name="decision") + + async def cancel(self, request: RequestContext, event_queue: EventQueue) -> Task | None: + raise ServerError(error=UnsupportedOperationError()) + + def _freeform(self, prompt: str) -> tuple[str, str]: + provider = self._provider + sys_inst = self._system_prompt or "" + # Gemini + if provider == "gemini": + if self._gemini_client is None: + self._gemini_client = genai.Client() + config_kwargs = { + "system_instruction": sys_inst, + "response_mime_type": "text/plain", + } + if self._temperature is not None: + config_kwargs["temperature"] = self._temperature + resp = self._gemini_client.models.generate_content( + model=self._model, + config=genai.types.GenerateContentConfig(**config_kwargs), + contents=prompt, + ) + text = resp.text or "" + return text, text + # OpenAI API + if provider == "openai": + try: + from openai import OpenAI # type: ignore + except Exception as e: + raise RuntimeError(f"openai package not available: {e}") + if self._openai_client is None: + self._openai_client = OpenAI(api_key=self._api_key) + is_o3 = self._model.lower().startswith("o3-") + create_kwargs = { + "model": self._model, + "messages": ( + ([{"role": "system", "content": sys_inst}] if sys_inst else []) + + [{"role": "user", "content": prompt}] + ), + "timeout": self._timeout, + } + if (not is_o3) and self._temperature is not None: + create_kwargs["temperature"] = self._temperature + if (not is_o3) and self._top_p is not None: + create_kwargs["top_p"] = self._top_p + resp = self._openai_client.chat.completions.create(**create_kwargs) + text = (resp.choices[0].message.content or "").strip() + return text, text + # Anthropic + if provider == "anthropic": + try: + import anthropic # type: ignore + except Exception as e: + raise RuntimeError(f"anthropic package not available: {e}") + if self._anthropic_client is None: + self._anthropic_client = anthropic.Anthropic(api_key=self._api_key) + msg = self._anthropic_client.messages.create( + model=self._model, + max_tokens=2048, + temperature=self._temperature if self._temperature is not None else 0.0, + system=sys_inst if sys_inst else None, + messages=[{"role": "user", "content": prompt}], + ) + try: + text = "".join([b.text for b in msg.content if getattr(b, "type", "") == "text"]) + except Exception: + text = "" + return text, text + # OpenAI-compatible HTTP endpoint (incl. Azure OpenAI, vLLM, Ollama w/ compat) + if provider in ("openai_compat", "http", "http_compat"): + try: + import requests # type: ignore + except Exception as e: + raise RuntimeError(f"requests package not available for HTTP provider: {e}") + if not self._base_url: + raise RuntimeError("base_url is required for openai_compat/http provider") + url = self._base_url + if "chat/completions" not in url: + url = urljoin(self._base_url.rstrip("/") + "/", "v1/chat/completions") + headers = dict(self._headers) + if self._api_key and "authorization" not in {k.lower(): v for k, v in headers.items()}: + headers["Authorization"] = f"Bearer {self._api_key}" + payload = { + "model": self._model, + "messages": ( + ([{"role": "system", "content": sys_inst}] if sys_inst else []) + + [{"role": "user", "content": prompt}] + ), + } + if self._temperature is not None: + payload["temperature"] = self._temperature + r = requests.post(url, headers=headers, json=payload, timeout=self._timeout) + r.raise_for_status() + data = r.json() + try: + text = (data["choices"][0]["message"]["content"] or "").strip() + except Exception: + text = json.dumps(data) + return text, text + raise RuntimeError(f"Unsupported provider: {provider}") + + def _choose_from_options(self, prompt: str, options: list[str]) -> tuple[str, str]: + sys_inst = (self._system_prompt or "") + "\nSelect the best option index and respond ONLY with the integer index." + opt_text = "\n".join(f"{i}: {opt}" for i, opt in enumerate(options)) + content = f"{prompt}\nOptions:\n{opt_text}\nAnswer index only." + # Reuse _freeform generation with adjusted system content + orig_prompt = self._system_prompt + try: + self._system_prompt = sys_inst + text, raw = self._freeform(content) + text = (text or "").strip() + try: + idx = int(text.split()[0]) + if 0 <= idx < len(options): + return options[idx], raw + except Exception: + pass + return (options[0] if options else ""), raw + finally: + self._system_prompt = orig_prompt + + +@dataclass +class LLMSpec: + model: str + prompt: str + + +def main(): + parser = argparse.ArgumentParser(description="Run an LLM Agent for bargaining.") + parser.add_argument("--host", type=str, default="127.0.0.1") + parser.add_argument("--port", type=int, default=9039) + parser.add_argument("--card-url", type=str) + parser.add_argument("--cloudflare-quick-tunnel", action="store_true") + parser.add_argument("--model", type=str, default=os.environ.get("LLM_AGENT_MODEL", "gemini-2.5-flash")) + parser.add_argument("--system-prompt-file", type=str) + parser.add_argument("--prompt", type=str, help="Inline system prompt text for the LLM agent") + parser.add_argument("--custom-decider", type=str, help="Path to a Python file exposing decide(prompt, options)->str") + parser.add_argument("--trace-dir", type=str, help="Directory to write JSONL reasoning logs") + parser.add_argument("--agent-name", type=str, default="BargainingLLM", help="Agent name used in trace logs") + # Provider settings + parser.add_argument("--provider", type=str, default=os.environ.get("LLM_AGENT_PROVIDER", "gemini"), help="gemini | openai | anthropic | openai_compat | http") + parser.add_argument("--api-key", type=str, default=os.environ.get("LLM_API_KEY")) + parser.add_argument("--base-url", type=str, help="Base URL for provider (for openai_compat/http)") + parser.add_argument("--headers-json", type=str, help="Path to JSON file with extra HTTP headers") + parser.add_argument("--temperature", type=float, default=None) + parser.add_argument("--top-p", type=float, default=None) + parser.add_argument("--timeout", type=int, default=int(os.environ.get("LLM_TIMEOUT", "60"))) + args = parser.parse_args() + + system_prompt = None + if args.system_prompt_file and os.path.exists(args.system_prompt_file): + system_prompt = open(args.system_prompt_file, "r").read() + if system_prompt is None and args.prompt: + system_prompt = args.prompt + if system_prompt is None: + raise RuntimeError("LLM Agent requires a prompt. Provide --system-prompt-file or --prompt.") + custom_decider = load_custom_decider(args.custom_decider) + + if args.cloudflare_quick_tunnel: + from agentbeats.cloudflare import quick_tunnel + agent_url_cm = quick_tunnel(f"http://{args.host}:{args.port}") + else: + agent_url_cm = contextlib.nullcontext(args.card_url or f"http://{args.host}:{args.port}/") + + headers: dict[str, str] | None = None + if args.headers_json and os.path.exists(args.headers_json): + try: + with open(args.headers_json, "r") as hf: + headers = json.load(hf) + except Exception: + headers = None + + async def _serve(): + async with agent_url_cm as agent_url: + executor = LLMAgent( + model=args.model, + system_prompt=system_prompt, + custom_decider=custom_decider, + trace_dir=args.trace_dir, + agent_name=args.agent_name, + provider=args.provider, + api_key=args.api_key, + base_url=args.base_url, + headers=headers, + temperature=args.temperature, + top_p=args.top_p, + timeout=args.timeout, + ) + card = minimal_agent_card("BargainingLLM", agent_url) + request_handler = DefaultRequestHandler(agent_executor=executor, task_store=InMemoryTaskStore()) + server = A2AStarletteApplication(agent_card=card, http_handler=request_handler) + uvicorn_config = uvicorn.Config(server.build(), host=args.host, port=args.port) + uvicorn_server = uvicorn.Server(uvicorn_config) + await uvicorn_server.serve() + + import asyncio + asyncio.run(_serve()) + + +if __name__ == "__main__": + main() + + diff --git a/scenarios/bargaining/bargaining_env/agents/nfsp.py b/scenarios/bargaining/bargaining_env/agents/nfsp.py new file mode 100644 index 0000000..ce3113f --- /dev/null +++ b/scenarios/bargaining/bargaining_env/agents/nfsp.py @@ -0,0 +1,204 @@ +from __future__ import annotations + +import os +import traceback +from typing import List, Optional + +import numpy as np + + +try: + import torch + import torch.nn as nn + import torch.nn.functional as F +except Exception as _e: + torch = None # type: ignore + nn = None # type: ignore + F = None # type: ignore + + +# Optional: OpenSpiel is only needed when actually running this agent +try: + import pyspiel # type: ignore +except Exception: + pyspiel = None # type: ignore + + +if torch is not None and nn is not None: + class PolicyModel(nn.Module): # type: ignore + def __init__(self, n_actions: int, hidden_size: int = 256): + super().__init__() + self.lin1 = nn.LazyLinear(hidden_size) + self.lin2 = nn.Linear(hidden_size, hidden_size) + # Match checkpoint naming "policy_head.*" + self.policy_head = nn.Linear(hidden_size, n_actions) + + def forward(self, x, action_mask): # type: ignore + x = self.lin1(x.float()) + x = F.relu(x) + out = self.lin2(x) + x + out = F.relu(out) + out = self.policy_head(out) + out = torch.masked_fill(out, ~action_mask.bool(), -1e9) + return out +else: + class PolicyModel: # type: ignore + def __init__(self, *args, **kwargs): + raise ImportError("PyTorch is required for NFSP (torch not available).") + + +class NFSPAgentWrapper: + """ + Average-policy NFSP agent for the OpenSpiel negotiation game. + This wrapper is intended for inference-only usage with provided checkpoints. + """ + + def __init__( + self, + game, # pyspiel.Game + player_id: int, + *, + checkpoint_path: Optional[str] = None, + discount: float = 0.98, + max_rounds: int = 3, + debug: bool = False, + ) -> None: + if torch is None or nn is None: + raise RuntimeError("PyTorch not available; NFSP requires torch to run.") + if pyspiel is None: + raise RuntimeError("OpenSpiel (pyspiel) not available; NFSP agent requires it.") + + self.player_id = int(player_id) + self.discount = float(discount) + self.max_rounds = int(max_rounds) + self.debug = bool(debug) + + self.last_logits = None + self.last_probs = None + + self._num_actions = int(game.num_distinct_actions()) + self._state_size = int(game.observation_tensor_size()) + self._num_items = int(game.get_parameters().get("num_items", 3)) + self._rng = np.random.RandomState(123) + + # Book-keeping decoded from observations (best-effort) + self.items: Optional[List[int]] = None + self.valuation_vector: Optional[List[int]] = None + self.walk_away_value: Optional[float] = None + self.current_counter_offer_give_format: Optional[List[int]] = None + + # Device + model + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + if self.debug: + print(f"[NFSP P{self.player_id}] device: {self.device}") + + self._policy_net = PolicyModel(self._num_actions, hidden_size=256).to(self.device) + # Materialize LazyLinear BEFORE loading weights + with torch.no_grad(): + dummy_x = torch.zeros(1, self._state_size, device=self.device) + dummy_mask = torch.ones(1, self._num_actions, dtype=torch.bool, device=self.device) + _ = self._policy_net(dummy_x, dummy_mask) + + # Load checkpoint if provided + if checkpoint_path: + if os.path.exists(checkpoint_path): + try: + checkpoint = torch.load(checkpoint_path, map_location=self.device, weights_only=True) + self._policy_net.load_state_dict(checkpoint) + self._policy_net.eval() + if self.debug: + print(f"[NFSP P{self.player_id}] Loaded checkpoint {checkpoint_path}") + except Exception as e: + print(f"[NFSP ERROR] Failed to load checkpoint {checkpoint_path}: {e}") + self._policy_net = None # disable, will fallback to random + else: + raise FileNotFoundError(f"NFSP checkpoint not found: {checkpoint_path}") + else: + # Allow running without checkpoint (random policy over legal actions) + if self.debug: + print(f"[NFSP P{self.player_id}] No checkpoint provided; using random legal policy.") + self._policy_net = None + + def _decode_action_to_proposal(self, action_id: int) -> Optional[np.ndarray]: + # Decode base-(max_quantity+1) to get the kept quantities per item + base = 10 + 1 # default OpenSpiel max_quantity is typically 10 for BGS configs we use + max_id = base ** self._num_items - 1 + if action_id < 0 or action_id > max_id: + return None + proposal = np.zeros(self._num_items, dtype=int) + tmp = action_id + for i in range(self._num_items - 1, -1, -1): + div = base ** i + proposal[self._num_items - 1 - i] = tmp // div + tmp %= div + return proposal + + def _process_observation(self, observation: List[float]) -> None: + # Heuristic decode matching typical OpenSpiel negotiation observation layout + num_items = self._num_items + is_response_state = observation[3] == 0.0 if len(observation) > 3 else False + round_number = int(observation[6]) + 1 if len(observation) > 6 else 1 + discount_factor = observation[7] if len(observation) > 7 else self.discount + item_quantities = observation[9 : 9 + num_items] + self.items = list(map(int, item_quantities)) + pv_start = 9 + num_items + pv_end = pv_start + num_items + player_values = observation[pv_start:pv_end] + self.valuation_vector = list(map(int, player_values)) + walk_away_value = observation[9 + 2 * num_items] if len(observation) > (9 + 2 * num_items) else 0.0 + self.walk_away_value = float(walk_away_value) + total_utility = float(np.dot(player_values, item_quantities)) + if self.debug: + print(f"[NFSP P{self.player_id}] round {round_number} items {item_quantities} values {player_values} W {walk_away_value} g={discount_factor} TU={total_utility}") + + def step(self, state, *, is_evaluation: bool = True) -> int: # pyspiel.State + cur = state.current_player() + self.current_counter_offer_give_format = None + self._process_observation(state.observation_tensor(self.player_id)) + if cur != self.player_id: + return 0 + + obs_raw = np.asarray(state.observation_tensor(cur), dtype=np.float32) + obs = obs_raw[: self._state_size] + + try: + legal = list(state.legal_actions(cur)) + except Exception: + legal = [] + + # Constrain to known action space + legal = [a for a in legal if 0 <= int(a) < self._num_actions] + if not legal: + return 0 + + # Random if no policy loaded + if self._policy_net is None or torch is None: + return int(self._rng.choice(legal)) + + inp = torch.from_numpy(obs).unsqueeze(0).to(self.device) + mask = torch.zeros(self._num_actions, dtype=torch.bool, device=self.device) + for a in legal: + mask[int(a)] = True + with torch.no_grad(): + logits = self._policy_net(inp, mask.unsqueeze(0)).squeeze(0) + probs = torch.softmax(logits, dim=-1) + probs = probs.masked_fill(~mask, 0.0) + mass = probs.sum() + if mass.item() > 0: + probs = probs / mass + p_cpu = probs.detach().cpu().numpy() + final_action = int(self._rng.choice(p_cpu.size, p=p_cpu)) + else: + legal_idx_cpu = np.flatnonzero(mask.detach().cpu().numpy()) + final_action = int(self._rng.choice(legal_idx_cpu)) + + # Attempt to decode current counter-offer (for external logging) + try: + proposal_keep = self._decode_action_to_proposal(final_action) + if proposal_keep is not None and self.items is not None: + give = (np.array(self.items) - proposal_keep).tolist() + self.current_counter_offer_give_format = give + except Exception: + traceback.print_exc() + + return final_action diff --git a/scenarios/bargaining/bargaining_env/agents/remote.py b/scenarios/bargaining/bargaining_env/agents/remote.py new file mode 100644 index 0000000..6b443c2 --- /dev/null +++ b/scenarios/bargaining/bargaining_env/agents/remote.py @@ -0,0 +1,412 @@ +from __future__ import annotations + +import asyncio +import json +import logging +import re +from typing import Any, Dict, List, Sequence, Tuple + +from agentbeats.tool_provider import ToolProvider +from scenarios.prompts.make_prompt import make_prompt +from scenarios.utils.offer import Offer + +from .base import BaseNegotiator + +logger = logging.getLogger(__name__) + + +class RemoteNegotiatorError(RuntimeError): + def __init__(self, message: str): + super().__init__(message) + + +class RemoteNegotiator(BaseNegotiator): + """ + Proxy negotiator that forwards decisions to a remote purple agent via ToolProvider. + """ + + def __init__( + self, + *, + label: str, + endpoint: str, + tool_provider: ToolProvider | None = None, + max_retries: int = 2, + prompt_circle: int | None = None, + ): + self._label = label + self._endpoint = endpoint + self._tool_provider = tool_provider or ToolProvider() + self._max_retries = max_retries + self._conversation_started = False + self._context: Dict[str, Any] = {} + self._pending_offer: Dict[str, Any] | None = None + self._prompt_circle = prompt_circle + self._history: Dict[int, List[Any]] = {0: [], 1: []} + self._current_offer: Offer | None = None + + def set_context( + self, + *, + pair_key: str, + game_index: int, + role: str, + valuations_self: Sequence[int], + valuations_opp: Sequence[int], + batna_self: int, + batna_opp: int, + discount: float, + max_rounds: int, + quantities: Sequence[int], + value_cap: int | None = None, + ) -> None: + self._history = {0: [], 1: []} + player_idx = 0 if role == "row" else 1 + other_idx = 1 - player_idx + q_list = list(map(int, quantities)) + v_player1 = list(valuations_self) if role == "row" else list(valuations_opp) + v_player2 = list(valuations_opp) if role == "row" else list(valuations_self) + total_p1 = int(sum(v_player1[i] * q_list[i] for i in range(len(q_list)))) + total_p2 = int(sum(v_player2[i] * q_list[i] for i in range(len(q_list)))) + example_offer = [max(0, q // 2) for q in q_list] + self._context = { + "pair": pair_key, + "game_index": game_index, + "role": role, + "valuations_self": list(valuations_self), + "valuations_opp": list(valuations_opp), + "batna_self": int(batna_self), + "batna_opp": int(batna_opp), + "discount": float(discount), + "max_rounds": int(max_rounds), + "quantities": list(quantities), + "round_index": 1, + "player_index": player_idx, + "my_player_num": player_idx + 1, + "other_player_num": other_idx + 1, + "p1_outside_offer": [1, max(1, total_p1)], + "p2_outside_offer": [1, max(1, total_p2)], + "value_cap": int(value_cap) if value_cap is not None else None, + "example_offer": example_offer, + "batna_player1": int(batna_self if role == "row" else batna_opp), + "batna_player2": int(batna_opp if role == "row" else batna_self), + } + self._pending_offer = {} + self._conversation_started = False + self._current_offer = None + + def set_round(self, round_index: int) -> None: + if self._context: + self._context["round_index"] = int(round_index) + + def set_offer_context(self, **offer_data: Any) -> None: + if not self._context: + return + if self._pending_offer is None: + self._pending_offer = {} + proposer_role = offer_data.get("proposer") + proposer_idx = None + if isinstance(proposer_role, str): + proposer_idx = 0 if proposer_role.lower() == "row" else 1 + for key, value in offer_data.items(): + if value is None: + continue + if isinstance(value, (list, tuple)): + self._pending_offer[key] = [int(v) for v in value] + else: + self._pending_offer[key] = value + if "offer_allocation_self" in self._pending_offer: + allocation = self._pending_offer["offer_allocation_self"] + try: + allocation_list = [int(v) for v in allocation] + except Exception: + allocation_list = allocation + player_number = (proposer_idx + 1) if proposer_idx is not None else self._context.get("other_player_num", 0) + offer_obj = Offer(player=player_number, offer=allocation_list) + self._current_offer = offer_obj + self._history.setdefault(player_number - 1, []).append(offer_obj) + + def propose( + self, + quantities: Tuple[int, int, int], + role: str, + v_self: List[int], + v_opp: List[int], + ) -> Tuple[List[int], List[int]]: + observation = self._build_observation(action="propose", quantities=quantities) + options = self._enumerate_allocations(quantities) + prompt = self._format_prompt( + action="PROPOSE", + observation=observation, + instruction=( + "Return ONLY JSON. Preferred: " + '{"allocation_self":[...],"allocation_other":[...],"reason":"..."} ' + "or use the catalog: {\"choice_id\": , \"reason\": \"...\"}.\n" + "No extra text. Arrays must sum to quantities. If you omit allocation_other, it will be inferred as the complement." + ), + options=options, + ) + response = self._send(prompt) + # Support action-based responses from circle prompts: COUNTEROFFER with "offer" field + if "action" in response and isinstance(response.get("action"), str): + act = response.get("action", "").strip().upper() + if act in ("COUNTEROFFER", "COUNTER_OFFER", "OFFER"): + offer = response.get("offer") or response.get("allocation_self") + if offer is not None: + response = dict(response) + response.setdefault("allocation_self", offer) + # ACCEPT/WALK here fall back to normal parsing; invalid allocation will be handled below. + allocation_self, allocation_other = self._extract_allocation(response, quantities, options) + return allocation_self, allocation_other + + def accepts(self, offer_value: int, batna_value: int, counter_value: int) -> bool: + observation = self._build_observation( + action="ACCEPT_OR_REJECT", + extra={ + "offer_value": int(offer_value), + "batna_value": int(batna_value), + "counter_value": int(counter_value), + }, + ) + prompt = self._format_prompt( + action="ACCEPT_OR_REJECT", + observation=observation, + instruction=( + "Return ONLY JSON: " + '{"accept": true|false, "reason": "...", "plan_allocation": [..optional..]}.\n' + 'Actions are ACCEPT (accept=true) or COUNTER_OFFER/WALK (accept=false). No extra text.' + ), + ) + response = self._send(prompt) + # Support action-based responses (ACCEPT/COUNTEROFFER/WALK) from circle prompts + if "accept" in response: + decision = response.get("accept") + elif "decision" in response: + decision = response.get("decision") + elif "action" in response: + act = str(response.get("action", "")).strip().lower() + if act == "accept": + decision = True + elif act in ("counteroffer", "counter_offer", "offer", "walk"): + decision = False + else: + decision = None + else: + decision = None + if decision is None: + raise RemoteNegotiatorError(f"{self._label} response missing 'accept' field: {response}") + return self._coerce_bool(decision) + + def _build_observation( + self, + *, + action: str, + quantities: Tuple[int, int, int] | None = None, + extra: Dict[str, Any] | None = None, + ) -> Dict[str, Any]: + if not self._context: + raise RemoteNegotiatorError("Remote negotiator context not initialized.") + data = dict(self._context) + data["action"] = action + if quantities is not None: + data["quantities"] = list(quantities) + if self._pending_offer: + data["pending_offer"] = self._pending_offer + if extra: + data.update(extra) + # Do not leak opponent private info to the remote agent + for k in ["valuations_opp", "batna_opp", "p2_outside_offer"]: + data.pop(k, None) + return data + + def _format_prompt( + self, + *, + action: str, + observation: Dict[str, Any], + instruction: str, + options: List[Dict[str, Any]] | None = None, + ) -> str: + circle_prompt = self._build_circle_prompt() + # Do not leak opponent-private info in the rendered prompt + obs_public = dict(observation) + for k in ("valuations_opp", "batna_opp", "p2_outside_offer"): + obs_public.pop(k, None) + message = ( + f"You are participating in the AgentBeats bargaining meta-game as '{self._label}'.\n" + f"Action: {action}.\n" + f"{instruction}\n" + "Always answer with valid JSON only.\n" + "Observation:\n" + f"```json\n{json.dumps(obs_public, indent=2)}\n```" + ) + if options: + message += ( + "\nAllocation catalog (use `choice_id` to reference an entry):\n" + f"```json\n{json.dumps(options, indent=2)}\n```" + ) + if circle_prompt: + message = f"{circle_prompt}\n\n----\n{message}" + return message + + def _build_circle_prompt(self) -> str | None: + # Build a circle prompt without leaking opponent-private info. + if self._prompt_circle is None or not self._context: + return None + try: + quantities = list(self._context.get("quantities", [])) + values = list(self._context.get("valuations_self", [])) + value_cap = int(self._context.get("value_cap") or (max(quantities or [1]) + 1)) + prompt_text = make_prompt( + T=len(quantities), + quantities=quantities, + V=value_cap, + values=values, + W1=int(self._context.get("batna_self", 0)), #doesnt matter + # Do not expose opponent BATNA; provide a neutral placeholder + W2=0, #doesnt matter + w=int(self._context.get("batna_self", 0)), + R=int(self._context.get("max_rounds", 2)), + g=float(self._context.get("discount", 0.98)), + r=int(self._context.get("round_index", 1)), + history=self._history, + current_offer=self._current_offer, + player_num=int(self._context.get("player_index", 0)), + p1_outside_offer=self._context.get("p1_outside_offer"), + # Do not expose opponent outside offer; use a neutral placeholder + p2_outside_offer=self._context.get("p2_outside_offer"), + circle=int(self._prompt_circle), + example_offer_less_than_outside_offer_self=self._context.get("example_offer"), + ) + return prompt_text.strip() + except Exception as exc: # noqa: BLE001 + logger.warning("Remote negotiator %s could not build circle prompt: %s", self._label, exc) + return None + + def _enumerate_allocations(self, quantities: Sequence[int]) -> List[Dict[str, Any]]: + counts = list(quantities) + options: List[Dict[str, Any]] = [] + idx = 0 + for q0 in range(counts[0] + 1): + for q1 in range(counts[1] + 1): + for q2 in range(counts[2] + 1): + allocation_self = [q0, q1, q2] + allocation_other = [counts[0] - q0, counts[1] - q1, counts[2] - q2] + options.append( + { + "id": idx, + "allocation_self": allocation_self, + "allocation_other": allocation_other, + } + ) + idx += 1 + return options + + def _send(self, prompt: str) -> Dict[str, Any]: + last_exc: Exception | None = None + for attempt in range(self._max_retries): + try: + response_text = asyncio.run( + self._tool_provider.talk_to_agent( + prompt, + self._endpoint, + new_conversation=not self._conversation_started, + ) + ) + self._conversation_started = True + return self._parse_json(response_text) + except Exception as exc: # noqa: BLE001 + last_exc = exc + self._conversation_started = False + logger.warning("Remote negotiator %s attempt %d failed: %s", self._label, attempt + 1, exc) + # Fallback: treat as WALK to avoid stalling meta-game when remote emits non-JSON + logger.warning("Remote negotiator %s exhausted retries; defaulting to WALK.", self._label) + return {"action": "WALK", "reason": f"defaulted after non-JSON: {last_exc}"} + + def _parse_json(self, payload: str) -> Dict[str, Any]: + candidates = [] + blocks = re.findall(r"```(?:json)?\s*(.*?)```", payload, flags=re.IGNORECASE | re.DOTALL) + if blocks: + candidates.extend(blocks) + candidates.append(payload) + for candidate in candidates: + text = candidate.strip() + if not text: + continue + try: + data = json.loads(text) + if isinstance(data, dict): + return data + except json.JSONDecodeError: + continue + # Heuristic fallbacks for plain-text replies + pl = payload.strip().lower() + if pl in {"accept", "accepted", "yes"}: + return {"accept": True, "reason": "parsed from plain text"} + if any(k in pl for k in ["walk", "reject", "decline", "decision complete"]): + return {"accept": False, "action": "WALK", "reason": "parsed from plain text"} + # Heuristic: bare list counteroffers like "[5,2,1]" -> allocation_self + if payload.strip().startswith("[") and payload.strip().endswith("]"): + try: + arr = json.loads(payload) + if isinstance(arr, list): + return {"allocation_self": arr, "reason": "parsed from bare list"} + except Exception: + pass + raise RemoteNegotiatorError(f"{self._label} returned non-JSON response: {payload[:200]}") + + def _extract_allocation( + self, + response: Dict[str, Any], + quantities: Sequence[int], + options: List[Dict[str, Any]], + ) -> Tuple[List[int], List[int]]: + opt_lookup = {int(opt["id"]): opt for opt in options} + option = None + for key in ("choice_id", "choice_idx", "choice"): + if key in response: + try: + option = opt_lookup[int(response[key])] + break + except (ValueError, KeyError, TypeError): + continue + + if option: + allocation_self = option["allocation_self"] + allocation_other = option["allocation_other"] + else: + allocation_self = response.get("allocation_self") or response.get("allocation") or response.get("a_self") + if allocation_self is None: + raise RemoteNegotiatorError(f"{self._label} must provide allocation_self or choice_id.") + allocation_other = ( + response.get("allocation_other") + or response.get("other_allocation") + or response.get("a_opp") + or [quantities[i] - int(allocation_self[i]) for i in range(len(quantities))] + ) + + alloc_self = self._coerce_allocation(allocation_self, quantities, "allocation_self") + alloc_other = self._coerce_allocation(allocation_other, quantities, "allocation_other") + for i, total in enumerate(quantities): + if alloc_self[i] + alloc_other[i] != int(total): + raise RemoteNegotiatorError(f"{self._label} produced invalid allocation that does not sum to quantities.") + return alloc_self, alloc_other + + def _coerce_allocation(self, raw: Sequence[Any], quantities: Sequence[int], label: str) -> List[int]: + values = [int(float(v)) for v in raw] + if len(values) != len(quantities): + raise RemoteNegotiatorError(f"{self._label} {label} must match item dimension {len(quantities)}.") + for idx, (val, total) in enumerate(zip(values, quantities, strict=True)): + if val < 0 or val > int(total): + raise RemoteNegotiatorError(f"{self._label} {label}[{idx}] out of range: {val} vs {total}.") + return values + + def _coerce_bool(self, raw: Any) -> bool: + if isinstance(raw, bool): + return raw + if isinstance(raw, (int, float)): + return raw != 0 + if isinstance(raw, str): + return raw.strip().lower() in {"1", "true", "accept", "accepted", "yes", "y"} + raise RemoteNegotiatorError(f"{self._label} returned non-boolean decision: {raw}") + diff --git a/scenarios/bargaining/bargaining_env/agents/rnad.py b/scenarios/bargaining/bargaining_env/agents/rnad.py new file mode 100644 index 0000000..a364fc6 --- /dev/null +++ b/scenarios/bargaining/bargaining_env/agents/rnad.py @@ -0,0 +1,192 @@ +from __future__ import annotations + +import importlib +import os +import pickle +import sys +import traceback +from typing import Dict, List, Optional + +import numpy as np + +try: + import pyspiel # type: ignore +except Exception: + pyspiel = None # type: ignore + +# Attempt to import the original RNAD module; if missing, provide a fallback +# that still allows checkpoints to unpickle. Preferred path is the provided +# checkpoints repo: scenarios/bargaining/rl_agent_checkpoints/rnad/rnad.py +RNaDSolver = None # type: ignore[var-annotated] +if RNaDSolver is None: + try: + _rnad_mod = importlib.import_module("scenarios.bargaining.rl_agent_checkpoints.rnad.rnad") + sys.modules.setdefault("rnad", _rnad_mod) + RNaDSolver = _rnad_mod.RNaDSolver # type: ignore[attr-defined] + except Exception: + pass +if RNaDSolver is None: + try: + _rnad_mod = importlib.import_module("agents.rnad_working.rnad") + sys.modules.setdefault("rnad", _rnad_mod) + RNaDSolver = _rnad_mod.RNaDSolver # type: ignore[attr-defined] + except Exception: + pass +if RNaDSolver is None: + class RNaDSolver: # type: ignore + def __init__(self, *args, **kwargs): + self._fallback_warned = False + + def __setstate__(self, state): + self.__dict__.update(state) + + def action_probabilities(self, state) -> Dict[int, float]: + try: + legal = list(state.legal_actions()) + except Exception: + legal = [] + if not legal: + return {} + if not self._fallback_warned: + print("[RNAD fallback] rnad module missing; using uniform policy.") + self._fallback_warned = True + p = 1.0 / len(legal) + return {int(a): p for a in legal} + + +class RNaDAgentWrapper: + """RNAD average‑policy agent using a pickled solver with action_probabilities(state).""" + + def __init__( + self, + game, # pyspiel.Game + player_id: int, + *, + checkpoint_path: str, + debug: bool = True, + ) -> None: + if pyspiel is None: + raise RuntimeError("OpenSpiel (pyspiel) not available; RNAD agent requires it.") + + self.player_id = int(player_id) + self.debug = bool(debug) + + self._num_actions = int(game.num_distinct_actions()) + self._state_size = int(game.observation_tensor_size()) + self._num_items = int(game.get_parameters().get("num_items", 3)) + self.max_quantity_for_encoding = 10 + + # Public fields (parity with NFSP wrapper) + self.action: str = "RNAD_ACTION" + self.items: Optional[List[int]] = None + self.valuation_vector: Optional[List[int]] = None + self.walk_away_value: Optional[float] = None + self.current_counter_offer_give_format: Optional[List[int]] = None + self.prompt: Optional[str] = None + self.last_probs: Optional[np.ndarray] = None + self.player_num = self.player_id + self._rng = np.random.RandomState(123) + self.last_dist = None + self.last_action = None + self.last_prob = None + + if not os.path.exists(checkpoint_path): + raise FileNotFoundError(f"RNAD checkpoint not found: {checkpoint_path}") + if self.debug: + print(f"[RNAD P{self.player_id}] loading: {checkpoint_path}") + with open(checkpoint_path, "rb") as f: + self._solver: RNaDSolver = pickle.load(f) # type: ignore[assignment] + if self.debug: + cfg = getattr(self._solver, "config", None) + if cfg is not None: + game_name = getattr(cfg, "game_name", None) + if game_name is not None: + print(f"[RNAD P{self.player_id}] loaded config game_name: {game_name}") + + def _decode_action_to_proposal(self, action_id: int) -> Optional[np.ndarray]: + base = self.max_quantity_for_encoding + 1 + max_id = base ** self._num_items - 1 + if action_id < 0 or action_id > max_id: + return None + proposal = np.zeros(self._num_items, dtype=int) + tmp = action_id + for i in range(self._num_items - 1, -1, -1): + div = base ** i + proposal[self._num_items - 1 - i] = tmp // div + tmp %= div + return proposal + + def _process_observation(self, observation: List[float]) -> None: + # Minimal decode similar to NFSP wrapper + num_items = self._num_items + item_quantities = observation[9 : 9 + num_items] + self.items = list(map(int, item_quantities)) + pv_start = 9 + num_items + pv_end = pv_start + num_items + player_values = observation[pv_start:pv_end] + self.valuation_vector = list(map(int, player_values)) + walk_away_value = observation[9 + 2 * num_items] if len(observation) > (9 + 2 * num_items) else 0.0 + self.walk_away_value = float(walk_away_value) + + def step(self, state, *, is_evaluation: bool = True) -> int: # pyspiel.State + cur = state.current_player() + self.current_counter_offer_give_format = None + self._process_observation(state.observation_tensor(self.player_id)) + if cur != self.player_id: + self.action = "ERROR_WRONG_PLAYER" + return 0 + + try: + legal = list(state.legal_actions(cur)) + except Exception: + legal = [] + legal = [a for a in legal if 0 <= int(a) < self._num_actions] + if not legal: + self.action = "ERROR_NO_LEGAL_ACTIONS" + return 0 + + try: + probs_dict: Dict[int, float] = self._solver.action_probabilities(state) # type: ignore[attr-defined] + except Exception: + traceback.print_exc() + return int(self._rng.choice(legal)) + + probs = np.zeros(self._num_actions, dtype=np.float32) + for a, p in probs_dict.items(): + if 0 <= int(a) < self._num_actions: + probs[int(a)] = float(p) + + q = np.zeros_like(probs, dtype=float) + q[legal] = probs[legal] + mass = q.sum() + if mass > 0: + q /= mass + final_action = int(self._rng.choice(q.size, p=q)) + else: + final_action = int(self._rng.choice(legal)) + + self.last_dist = q.copy() + self.last_action = final_action + self.last_prob = float(q[final_action]) if q[final_action] > 0 else 0.0 + + try: + action_str = state.action_to_string(cur, final_action).lower() + if "agreement" in action_str: + self.action = "ACCEPT" + elif "walk" in action_str: + self.action = "WALK" + else: + self.action = "COUNTEROFFER" + proposal_keep = self._decode_action_to_proposal(final_action) + if proposal_keep is not None and self.items is not None: + give = (np.array(self.items) - proposal_keep).tolist() + self.current_counter_offer_give_format = give + except Exception: + traceback.print_exc() + self.action = "UNKNOWN" + + if self.debug: + print(f"[RNAD P{self.player_id}] chosen={final_action} prob={self.last_prob:.3f}") + return final_action + + diff --git a/scenarios/bargaining/bargaining_env/agents/soft.py b/scenarios/bargaining/bargaining_env/agents/soft.py new file mode 100644 index 0000000..59f7de0 --- /dev/null +++ b/scenarios/bargaining/bargaining_env/agents/soft.py @@ -0,0 +1,19 @@ +from __future__ import annotations +from typing import List, Tuple +import random + +from .base import BaseNegotiator + + +class SoftNegotiator(BaseNegotiator): + def propose(self, quantities: Tuple[int, int, int], role: str, v_self: List[int], v_opp: List[int]) -> Tuple[List[int], List[int]]: + # Starting agent proposes a random split across items + a_self = [random.randint(0, q) for q in quantities] + a_opp = [q - a_self[i] for i, q in enumerate(quantities)] + return a_self, a_opp + + def accepts(self, offer_value: int, batna_value: int, counter_value: int) -> bool: + # Always accept any offer on the table + return True + + diff --git a/scenarios/bargaining/bargaining_env/agents/tough.py b/scenarios/bargaining/bargaining_env/agents/tough.py new file mode 100644 index 0000000..22df449 --- /dev/null +++ b/scenarios/bargaining/bargaining_env/agents/tough.py @@ -0,0 +1,30 @@ +from __future__ import annotations +from typing import List, Tuple + +from .base import BaseNegotiator + + +class ToughNegotiator(BaseNegotiator): + def propose(self, quantities: Tuple[int, int, int], role: str, v_self: List[int], v_opp: List[int]) -> Tuple[List[int], List[int]]: + # Offer exactly one unit of the least-valued item (by v_self), keep the rest + idx = 0 + min_val = None + for i, q in enumerate(quantities): + if q <= 0: + continue + val = v_self[i] if i < len(v_self) else 0 + if min_val is None or val < min_val: + min_val = val + idx = i + a_opp = [0, 0, 0] + if sum(quantities) > 0: + # give one of least-valued available item + a_opp[idx] = 1 + a_self = [quantities[i] - a_opp[i] for i in range(len(quantities))] + return a_self, a_opp + + def accepts(self, offer_value: int, batna_value: int, counter_value: int) -> bool: + # Never accept; always counter with the least-valued item offer + return False + + diff --git a/scenarios/bargaining/bargaining_env/game_data.py b/scenarios/bargaining/bargaining_env/game_data.py new file mode 100644 index 0000000..2434d96 --- /dev/null +++ b/scenarios/bargaining/bargaining_env/game_data.py @@ -0,0 +1,106 @@ +import json +import pickle + + +class GameData: + def __init__(self, circle, date, agent1, agent2): + """ + circle: An integer or string indicating which circle/stage of the experiment + date: String or datetime indicating the date/time of the game + agent1: Identifier for agent 1 + agent2: Identifier for agent 2 + """ + self.circle = circle + self.date = date + self.agent1 = agent1 + self.agent2 = agent2 + + # Store a list of rounds with prompts/responses/actions + # Each entry can be a dict with keys "prompt", "response", "action" + self.round_data = [] + + # Store the final outcome separately + self.outcome = None + + def add_round_data(self, prompt, response, action): + """ + Store data from a single round. + + prompt: The prompt text shown to the agent + response: The raw text (or structured data) from the agent's response + action: A string or structured data describing the action (ACCEPT, WALK, COUNTEROFFER, etc.) + metrics: A dictionary of metrics for the round + """ + self.round_data.append({ + "prompt": prompt, + "response": response, + "action": action, + }) + + def set_outcome(self, outcome): + """ + Set or update the final outcome of the game. + """ + self.outcome = outcome + + @classmethod + def from_dict(cls, data): + """ + Create a GameData instance from a dictionary, typically loaded from JSON. + """ + game_data = cls( + circle=data["circle"], + date=data["date"], + agent1=data["agent1"], + agent2=data["agent2"] + ) + game_data.round_data = data.get("round_data", []) + game_data.outcome = data.get("outcome") + return game_data + + def save_to_json(self, filename): + """ + Save the GameData as JSON to a specified file. + """ + with open(filename, "w") as f: + json.dump(self.to_dict(), f) + + def to_dict(self): + """ + Convert the GameData instance into a dictionary with all data + in JSON-serializable formats. + """ + data = { + "circle": self.circle, + "date": self.date, + "agent1": self.agent1, + "agent2": self.agent2, + "round_data": self.round_data # Assuming round_data is a list of dicts + } + return data + + @classmethod + def load_from_json(cls, filename): + """ + Load the GameData from a JSON file. + """ + with open(filename, "r") as f: + data = json.load(f) + return cls.from_dict(data) + + def save_pickle(self, filename): + """ + Pickle the GameData object to a specified file. + """ + with open(filename, "wb") as f: + pickle.dump(self, f) + + @classmethod + def load_pickle(cls, filename): + """ + Load a pickled GameData object from a file. + """ + with open(filename, "rb") as f: + return pickle.load(f) + + diff --git a/scenarios/bargaining/bargaining_env/main.py b/scenarios/bargaining/bargaining_env/main.py new file mode 100644 index 0000000..f7d75bf --- /dev/null +++ b/scenarios/bargaining/bargaining_env/main.py @@ -0,0 +1,438 @@ +import json +from pathlib import Path +from typing import Any, Dict, List, Tuple +import random + + +def _ensure_dir(p: str | Path) -> Path: + path = Path(p) + path.mkdir(parents=True, exist_ok=True) + return path + + +def _load_meta(input_dir: Path) -> Dict[str, Any]: + return json.loads((input_dir / "meta.json").read_text()) + + +def _load_payoffs(input_dir: Path) -> Dict[str, Any]: + return json.loads((input_dir / "payoffs.json").read_text()) + + +def _build_matrix(agents: List[str], payoffs: Dict[str, Any]) -> List[List[float]]: + n = len(agents) + M = [[0.0 for _ in range(n)] for _ in range(n)] + for i, ai in enumerate(agents): + for j, aj in enumerate(agents): + key = f"{ai}__vs__{aj}" + cell = payoffs.get(key, None) + if cell is None: + continue + M[i][j] = float(cell.get("row_mean_payoff", 0.0)) + return M + + +def run_analysis( + *, + input_dir: str, + output_dir: str, + discount_factor: float = 0.98, + num_bootstrap: int = 100, + norm_constants: Dict[str, float] | None = None, + random_seed: int | None = 42, +) -> Dict[str, Any]: + """Empirical meta-game analysis with bootstrapping: + - Build symmetric payoff matrix from per-match traces via bootstrap resampling + - Compute MENE using MILP + - Compute NE regrets + - Evaluate normalized UW, NW, NWA and EF1 frequency against MENE for each agent + Repeat for num_bootstrap samples and write results. + """ + in_dir = Path(input_dir) + out_dir = _ensure_dir(output_dir) + + meta = _load_meta(in_dir) + agents: List[str] = list(meta.get("agents", [])) + if len(agents) == 0: + raise ValueError("No agents found for meta-game analysis.") + num_agents = len(agents) + + # Normalization constants: user-provided + norm = { + "UW": float(norm_constants.get("UW")) if norm_constants and "UW" in norm_constants else 1.0, + "NW": float(norm_constants.get("NW")) if norm_constants and "NW" in norm_constants else 1.0, + "NWA": float(norm_constants.get("NWA")) if norm_constants and "NWA" in norm_constants else 1.0, + } + + # Load all ordered traces once + traces_dir = in_dir / "traces" + if not traces_dir.exists(): + raise ValueError(f"Trace directory not found: {traces_dir}") + + def load_records(pair_key: str) -> List[Dict[str, Any]]: + records: List[Dict[str, Any]] = [] + p = traces_dir / f"{pair_key}.jsonl" + if not p.exists(): + return records + with p.open("r") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + records.append(json.loads(line)) + except Exception: + continue + return records + + ordered_data: Dict[Tuple[int, int], List[Dict[str, Any]]] = {} + for i, ai in enumerate(agents): + for j, aj in enumerate(agents): + key = f"{ai}__vs__{aj}" + ordered_data[(i, j)] = load_records(key) + + rng = random.Random(random_seed) + + def bootstrap_sample(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + if not items: + return [] + return [items[rng.randrange(0, len(items))] for _ in range(len(items))] + + def ef1_from_record(rec: Dict[str, Any]) -> int: + ef1 = rec.get("ef1", None) + if isinstance(ef1, bool): + return 1 if ef1 else 0 + return 0 + + def ordered_stats(records: List[Dict[str, Any]]) -> Dict[str, float]: + if not records: + return { + "row_mean": 0.0, + "col_mean": 0.0, + "uw_norm": 0.0, + "nw_norm": 0.0, + "nwa_norm": 0.0, + "ef1_freq": 0.0, + } + sum_row = 0.0 + sum_col = 0.0 + sum_uw = 0.0 + sum_nw = 0.0 + sum_nwa = 0.0 + acc_count = 0 + ef1_count = 0 + for rec in records: + p1 = float(rec.get("payoff1", 0.0)) + p2 = float(rec.get("payoff2", 0.0)) + sum_row += p1 + sum_col += p2 + # welfare (discounted) + uw = p1 + p2 + nw = (p1 * p2) ** 0.5 if p1 >= 0.0 and p2 >= 0.0 else 0.0 + # NWA with discount applied to BATNAs based on round (if available) + w1 = float(rec.get("b1", 0.0)) + w2 = float(rec.get("b2", 0.0)) + round_idx = rec.get("round", None) + disc = 1.0 + if isinstance(round_idx, int) and round_idx >= 1: + disc = discount_factor ** (round_idx - 1) + s1 = max(0.0, p1 - w1 * disc) + s2 = max(0.0, p2 - w2 * disc) + nwa = (s1 * s2) ** 0.5 + sum_uw += uw / max(1e-12, norm["UW"]) + sum_nw += nw / max(1e-12, norm["NW"]) + sum_nwa += nwa / max(1e-12, norm["NWA"]) + if rec.get("accepted", False): + acc_count += 1 + ef1_count += ef1_from_record(rec) + + n = float(len(records)) + ef1_freq = (ef1_count / max(1, acc_count)) if acc_count > 0 else 0.0 + return { + "row_mean": sum_row / n, + "col_mean": sum_col / n, + "uw_norm": sum_uw / n, + "nw_norm": sum_nw / n, + "nwa_norm": sum_nwa / n, + "ef1_freq": ef1_freq, + } + + # Require MENE solver; no fallback + try: + from scenarios.bargaining.bargaining_env.mene_solver import milp_max_sym_ent_2p, compute_regret # type: ignore + import numpy as np # type: ignore + except Exception as e: + raise RuntimeError(f"MENE solver dependencies unavailable: {e}") + + boot_results: List[Dict[str, Any]] = [] + + for b in range(max(1, int(num_bootstrap))): + # Build symmetric payoff matrix via bootstrap + M = [[0.0 for _ in range(num_agents)] for _ in range(num_agents)] + # Also precompute per-ordered normalized welfare metrics for agent-level aggregation + W_uw = [[0.0 for _ in range(num_agents)] for _ in range(num_agents)] + W_nw = [[0.0 for _ in range(num_agents)] for _ in range(num_agents)] + W_nwa = [[0.0 for _ in range(num_agents)] for _ in range(num_agents)] + W_ef1 = [[0.0 for _ in range(num_agents)] for _ in range(num_agents)] + sample_cache: Dict[Tuple[int, int], List[Dict[str, Any]]] = {} + stats_cache: Dict[Tuple[int, int], Dict[str, float]] = {} + + for i in range(num_agents): + for j in range(num_agents): + recs_ij = ordered_data[(i, j)] + sample_ij = bootstrap_sample(recs_ij) + stats_ij = ordered_stats(sample_ij) + sample_cache[(i, j)] = sample_ij + stats_cache[(i, j)] = stats_ij + if i == j: + M[i][j] = stats_ij["row_mean"] + W_uw[i][j] = stats_ij["uw_norm"] + W_nw[i][j] = stats_ij["nw_norm"] + W_nwa[i][j] = stats_ij["nwa_norm"] + W_ef1[i][j] = stats_ij["ef1_freq"] + + # Fill symmetric entries for i != j using both role datasets + for i in range(num_agents): + for j in range(num_agents): + if i == j: + continue + row_stats = stats_cache.get((i, j)) + col_stats = stats_cache.get((j, i)) + row_mean = row_stats["row_mean"] if row_stats else 0.0 + col_from_rev = col_stats["col_mean"] if col_stats else 0.0 + M[i][j] = 0.5 * (row_mean + col_from_rev) + + # Solve MENE + x_np = milp_max_sym_ent_2p(np.array(M), discrete_factors=100) + reg_vec, nash_val, u_vals = compute_regret(x_np, np.array(M)) + regrets = [float(r) for r in reg_vec.tolist()] + mixture = [float(p) for p in x_np.tolist()] + + # Aggregate welfare metrics vs MENE mixture for each agent + agent_metrics: Dict[str, Dict[str, float]] = {} + for i, ai in enumerate(agents): + uw_i = 0.0 + nw_i = 0.0 + nwa_i = 0.0 + ef1_i = 0.0 + for j in range(num_agents): + if i == j: + # self-play: use i__vs__i only + uw_ij = W_uw[i][i] + nw_ij = W_nw[i][i] + nwa_ij = W_nwa[i][i] + ef1_ij = W_ef1[i][i] + else: + #, should all be same + uw_ij = 0.5 * (W_uw[i][j] + W_uw[j][i]) + nw_ij = 0.5 * (W_nw[i][j] + W_nw[j][i]) + nwa_ij = 0.5 * (W_nwa[i][j] + W_nwa[j][i]) + ef1_ij = 0.5 * (W_ef1[i][j] + W_ef1[j][i]) + uw_i += mixture[j] * uw_ij + nw_i += mixture[j] * nw_ij + nwa_i += mixture[j] * nwa_ij + ef1_i += mixture[j] * ef1_ij + agent_metrics[ai] = { + "UW_norm": uw_i, + "NW_norm": nw_i, + "NWA_norm": nwa_i, + "EF1_freq": ef1_i, + } + + boot_results.append({ + "mixture": mixture, + "regrets": regrets, + "agent_metrics": agent_metrics, + }) + + # Summaries (means and standard errors over bootstraps) + def average_list(lst: List[List[float]]) -> List[float]: + if not lst: + return [] + k = len(lst[0]) + sums = [0.0] * k + for v in lst: + for t in range(k): + sums[t] += v[t] + return [x / len(lst) for x in sums] + + def bootstrap_se_list(lst: List[List[float]]) -> List[float]: + """Compute bootstrap standard error = std of bootstrap distribution for each position.""" + if not lst or len(lst) < 2: + return [0.0] * (len(lst[0]) if lst else 0) + k = len(lst[0]) + n = len(lst) + means = average_list(lst) + variances = [0.0] * k + for v in lst: + for t in range(k): + variances[t] += (v[t] - means[t]) ** 2 + # Bootstrap SE = std of the bootstrap distribution (sample std) + return [((variances[t] / (n - 1)) ** 0.5) if n > 1 else 0.0 for t in range(k)] + + mixtures = [br["mixture"] for br in boot_results] + regs = [br["regrets"] for br in boot_results] + avg_mixture = average_list(mixtures) + avg_regrets = average_list(regs) + se_regrets = bootstrap_se_list(regs) + + # Average agent metrics with standard errors + avg_agent_metrics: Dict[str, Dict[str, float]] = {} + se_agent_metrics: Dict[str, Dict[str, float]] = {} + for ai in agents: + acc = {"UW_norm": 0.0, "NW_norm": 0.0, "NWA_norm": 0.0, "EF1_freq": 0.0} + vals: Dict[str, List[float]] = {"UW_norm": [], "NW_norm": [], "NWA_norm": [], "EF1_freq": []} + for br in boot_results: + m = br["agent_metrics"][ai] + for k in acc: + acc[k] += float(m[k]) + vals[k].append(float(m[k])) + for k in acc: + acc[k] /= len(boot_results) if boot_results else 1.0 + avg_agent_metrics[ai] = acc + # Compute bootstrap standard errors (std of bootstrap distribution) + se = {} + n = len(boot_results) + for k in vals: + if n > 1: + mean_val = acc[k] + variance = sum((v - mean_val) ** 2 for v in vals[k]) / (n - 1) + se[k] = variance ** 0.5 # Bootstrap SE = std of bootstrap distribution + else: + se[k] = 0.0 + se_agent_metrics[ai] = se + + result = { + "agents": agents, + "bootstrap": { + "num_bootstrap": num_bootstrap, + "results": boot_results, + "averages": { + "mixture": avg_mixture, + "regrets": avg_regrets, + "agent_metrics": avg_agent_metrics, + }, + "standard_errors": { + "regrets": se_regrets, + "agent_metrics": se_agent_metrics, + }, + }, + "params": { + "discount_factor": discount_factor, + "normalization": norm, + }, + } + (Path(output_dir) / "results.json").write_text(json.dumps(result, indent=2)) + return result + + +def _count_profiles(traces_dir: Path) -> int: + """Count available strategy profiles as number of trace files.""" + if not traces_dir.exists(): + return 0 + return len(list(traces_dir.glob("*.jsonl"))) + + +def run_metagame_analysis(config: Dict[str, Any] | None = None) -> Dict[str, Any]: + """ + Run the full metagame analysis and return metrics in a standardized schema. + + Returns: + { + "summary": {...}, + "per_agent": [...], + "error": {...optional...} + } + """ + cfg = config or {} + defaults = { + "discount_factor": 0.98, + "num_bootstrap": 100, + "norm_constants": {"UW": 805.9, "NW": 378.7, "NWA": 81.7}, + "random_seed": 42, + } + try: + input_dir = cfg.get("input_dir") + output_dir = cfg.get("output_dir", "meta_game_analysis/results_bargaining") + if not input_dir: + raise ValueError("input_dir is required for metagame analysis") + + analysis_kwargs = { + "input_dir": input_dir, + "output_dir": output_dir, + "discount_factor": cfg.get("discount_factor", defaults["discount_factor"]), + "num_bootstrap": cfg.get("num_bootstrap", defaults["num_bootstrap"]), + "norm_constants": cfg.get("norm_constants", defaults["norm_constants"]), + "random_seed": cfg.get("random_seed", defaults["random_seed"]), + } + res = run_analysis(**analysis_kwargs) + + agents: List[str] = list(res.get("agents", [])) + avg_regrets: List[float] = res.get("bootstrap", {}).get("averages", {}).get("regrets", []) or [] + avg_agent_metrics: Dict[str, Dict[str, float]] = res.get("bootstrap", {}).get("averages", {}).get("agent_metrics", {}) or {} + se_regrets: List[float] = res.get("bootstrap", {}).get("standard_errors", {}).get("regrets", []) or [] + se_agent_metrics: Dict[str, Dict[str, float]] = res.get("bootstrap", {}).get("standard_errors", {}).get("agent_metrics", {}) or {} + + per_agent: List[Dict[str, Any]] = [] + for idx, agent in enumerate(agents): + am = avg_agent_metrics.get(agent, {}) + se_am = se_agent_metrics.get(agent, {}) + per_agent.append( + { + "agent_name": agent, + "mene_regret": float(avg_regrets[idx]) if idx < len(avg_regrets) else None, + "mene_regret_se": float(se_regrets[idx]) if idx < len(se_regrets) else None, + "nw_percent": float(am.get("NW_norm", 0.0)) * 100.0, + "nw_percent_se": float(se_am.get("NW_norm", 0.0)) * 100.0, + "nwa_percent": float(am.get("NWA_norm", 0.0)) * 100.0, + "nwa_percent_se": float(se_am.get("NWA_norm", 0.0)) * 100.0, + "uw_percent": float(am.get("UW_norm", 0.0)) * 100.0, + "uw_percent_se": float(se_am.get("UW_norm", 0.0)) * 100.0, + "ef1_percent": float(am.get("EF1_freq", 0.0)) * 100.0, + "ef1_percent_se": float(se_am.get("EF1_freq", 0.0)) * 100.0, + } + ) + + summary = {} + if per_agent: + summary = { + "num_agents": len(per_agent), + "num_profiles": _count_profiles(Path(input_dir) / "traces"), + "mene_regret_mean": float(sum(pa["mene_regret"] for pa in per_agent if pa["mene_regret"] is not None) / max(1, sum(1 for pa in per_agent if pa["mene_regret"] is not None))), + "nw_percent_mean": float(sum(pa["nw_percent"] for pa in per_agent) / len(per_agent)), + "nwa_percent_mean": float(sum(pa["nwa_percent"] for pa in per_agent) / len(per_agent)), + "uw_percent_mean": float(sum(pa["uw_percent"] for pa in per_agent) / len(per_agent)), + "ef1_percent_mean": float(sum(pa["ef1_percent"] for pa in per_agent) / len(per_agent)), + } + else: + summary = { + "num_agents": 0, + "num_profiles": 0, + "mene_regret_mean": None, + "nw_percent_mean": None, + "nwa_percent_mean": None, + "uw_percent_mean": None, + "ef1_percent_mean": None, + } + + return { + "summary": summary, + "per_agent": per_agent, + } + except Exception as e: + return { + "summary": { + "num_agents": 0, + "num_profiles": 0, + "mene_regret_mean": None, + "nw_percent_mean": None, + "nwa_percent_mean": None, + "uw_percent_mean": None, + "ef1_percent_mean": None, + }, + "per_agent": [], + "error": { + "type": type(e).__name__, + "message": str(e), + }, + } + diff --git a/scenarios/bargaining/bargaining_env/mene_solver.py b/scenarios/bargaining/bargaining_env/mene_solver.py new file mode 100644 index 0000000..b4104b6 --- /dev/null +++ b/scenarios/bargaining/bargaining_env/mene_solver.py @@ -0,0 +1,126 @@ +import warnings +from typing import Tuple + +import numpy as np + +# Tolerance for regret feasibility +# Note: 1e-6 can be too tight for numerical stability with MILP solvers +EPSILON: float = 1e-4 + + +def _simplex_projection(x: np.ndarray) -> np.ndarray: + """ + Project onto probability simplex. + """ + x = np.asarray(x, dtype=float).reshape(-1) + if (x >= 0).all() and abs(np.sum(x) - 1) < 1e-10: + return x + + n = len(x) + u = np.sort(x)[::-1] + cssv = np.cumsum(u) - 1 + rho = np.nonzero(u * np.arange(1, n + 1) > cssv)[0][-1] + theta = cssv[rho] / (rho + 1) + return np.maximum(x - theta, 0.0) + + +def compute_regret(mix: np.ndarray, game_matrix: np.ndarray) -> Tuple[np.ndarray, float, np.ndarray]: + """ + Regret for symmetric 2p game under mixture `mix` (row payoffs in game_matrix). + - regrets[i] = max(0, (M[i]·mix) - (mix^T M mix)) + Returns (regrets, nash_value, expected_utils_per_pure). + """ + M = np.asarray(game_matrix, dtype=float) + x = np.asarray(mix, dtype=float).reshape(-1) + u_vals = M @ x + nash_value = float(x @ u_vals) # x^T M x + regrets = np.maximum(0.0, u_vals - nash_value) + return regrets, nash_value, u_vals + + +def milp_max_sym_ent_2p(game_matrix, discrete_factors: int = 100) -> np.ndarray: + """ + Compute maximum-entropy symmetric Nash equilibrium for a 2-player symmetric game. + Uses CVXPY MIP with ECOS_BB, fallback to GLPK_MI. Based on the provided formulation. + """ + # Lazy import so the module can be imported without cvxpy installed + try: + import cvxpy as cp # type: ignore + except Exception as e: + raise RuntimeError(f"cvxpy is required for MILP MENE solver: {e}") + + game_matrix_np = np.array(game_matrix, dtype=np.float64) + if game_matrix_np.ndim != 2 or game_matrix_np.shape[0] != game_matrix_np.shape[1]: + raise ValueError("game_matrix must be a square 2D array") + + # Fill NaNs column-wise with column mean, fallback to 0 if all NaN + if np.isnan(game_matrix_np).any(): + for j in range(game_matrix_np.shape[1]): + col = game_matrix_np[:, j] + if np.isnan(col).any(): + col_mean = np.nanmean(col) + if np.isnan(col_mean): + col_mean = 0.0 + col_filled = np.where(np.isnan(col), col_mean, col) + game_matrix_np[:, j] = col_filled + + M = game_matrix_np.shape[0] + U = float(np.max(game_matrix_np) - np.min(game_matrix_np)) + if U <= 0: + # Degenerate: all entries equal; any mixture works + return _simplex_projection(np.ones(M) / M) + + x = cp.Variable(M) + u = cp.Variable(1) + z = cp.Variable(M) + b = cp.Variable(M, boolean=True) + + obj = cp.Minimize(cp.sum(z)) + + a_mat = np.ones((1, M)) + u_m = game_matrix_np @ x + + constraints = [ + u_m <= u + EPSILON, + a_mat @ x == 1, + x >= 0, + u - u_m <= U * b, + x <= 1 - b, + ] + + for k in range(discrete_factors): + if k == 0: + constraints.append(np.log(1 / discrete_factors) * x <= z) + else: + # linear approximation of x*log(x) at k/discrete_factors + slope = ((k + 1) * np.log((k + 1) / discrete_factors) - k * np.log(max(k, 1) / discrete_factors)) + intercept = (k / discrete_factors) * np.log(max(k, 1) / discrete_factors) + constraints.append(intercept + slope * (x - k / discrete_factors) <= z) + + prob = cp.Problem(obj, constraints) + + try: + prob.solve(solver=cp.ECOS_BB) + if not (prob.status and prob.status.startswith("optimal")): + raise ValueError(f"ECOS_BB status: {prob.status}") + except Exception as e: + warnings.warn(f"Failed to solve with ECOS_BB: {e}") + try: + prob.solve(solver=cp.GLPK_MI) + if not (prob.status and prob.status.startswith("optimal")): + raise ValueError(f"GLPK_MI status: {prob.status}") + except Exception as e2: + raise RuntimeError(f"Both ECOS_BB and GLPK_MI solvers failed. ECOS_BB error: {e}, GLPK_MI error: {e2}") + + ne_strategy = _simplex_projection(np.array(x.value).reshape(-1)) + regret, _, _ = compute_regret(ne_strategy, game_matrix_np) + max_regret = float(np.max(regret)) if regret.size else 0.0 + if max_regret <= EPSILON: + return ne_strategy + # If regret is high but we have a valid solution, warn and return it anyway + # This handles numerical precision issues without failing the evaluation + warnings.warn(f"Nash equilibrium regret {max_regret:.6f} exceeds tolerance {EPSILON}, " + f"but returning best solution found") + return ne_strategy + + diff --git a/scenarios/bargaining/bargaining_env/pyspiel_integration.py b/scenarios/bargaining/bargaining_env/pyspiel_integration.py new file mode 100644 index 0000000..b7c2723 --- /dev/null +++ b/scenarios/bargaining/bargaining_env/pyspiel_integration.py @@ -0,0 +1,38 @@ +from __future__ import annotations +from typing import Dict, Tuple, Any + + +def build_negotiation_params( + *, + discount: float, + max_rounds: int, + num_items: int = 3, + item_quantities: Tuple[int, int, int] = (7, 4, 1), + min_value: int = 1, + max_value: int = 100, + max_quantity: int = 10, +) -> Dict[str, Any]: + return { + "enable_proposals": True, + "enable_utterances": False, + "num_items": num_items, + "discount": discount, + "min_value": min_value, + "max_value": max_value, + "max_rounds": max_rounds, + "max_quantity": max_quantity, + "item_quantities": f"{item_quantities[0]},{item_quantities[1]},{item_quantities[2]}", + } + + +def try_load_pyspiel_game(params: Dict[str, Any]): + try: + import pyspiel # type: ignore + except Exception: + return None + try: + return pyspiel.load_game("negotiation", params) + except Exception: + return None + + diff --git a/scenarios/bargaining/bargaining_env/pyspiel_runner.py b/scenarios/bargaining/bargaining_env/pyspiel_runner.py new file mode 100644 index 0000000..efd1f5d --- /dev/null +++ b/scenarios/bargaining/bargaining_env/pyspiel_runner.py @@ -0,0 +1,606 @@ +import json +import random +import re +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +from .pyspiel_integration import try_load_pyspiel_game, build_negotiation_params +from .agents.nfsp import NFSPAgentWrapper +from .agents.rnad import RNaDAgentWrapper +from .agents.remote import RemoteNegotiator + +# Simple, dependency-light OpenSpiel runner for reference data dumps. +# Only engages when pyspiel is available and explicitly requested. + + +AGREE_TOKEN = "Agreement" +WALK_TOKEN = "walk away" +PROPOSAL_RE = re.compile(r"Proposal:\s*\[([^\]]+)\]") + + +def _parse_keep_vector(action_str: str) -> Optional[List[int]]: + m = PROPOSAL_RE.search(action_str) + if not m: + return None + try: + inner = m.group(1) + nums = [int(x.strip()) for x in inner.split(",")] + return nums + except Exception: + return None + + +def _list_actions(state) -> List[Tuple[int, str]]: + acts = [] + try: + la = state.legal_actions() + for a in la: + try: + s = state.action_to_string(state.current_player(), a) + except Exception: + s = str(a) + acts.append((a, s)) + except Exception: + pass + return acts + + +def _find_accept_action(actions: List[Tuple[int, str]]) -> Optional[int]: + for a, s in actions: + if AGREE_TOKEN in s: + return a + return None + + +def _find_walk_action(actions: List[Tuple[int, str]]) -> Optional[int]: + for a, s in actions: + if WALK_TOKEN in s.lower(): + return a + return None + + +def _non_terminal_actions(actions: List[Tuple[int, str]]) -> List[int]: + res = [] + for a, s in actions: + if AGREE_TOKEN in s: + continue + if WALK_TOKEN in s.lower(): + continue + res.append(a) + return res + + +@dataclass +class TurnRecord: + round_index: int + player: int + action: int + action_str: str + + +@dataclass +class GameRecord: + pair: str + game_index: int + returns: List[float] + turns: List[TurnRecord] + + +def _soft_step(state) -> int: + actions = _list_actions(state) + # If there's an offer on the table, Accept should be legal: always accept + a_acc = _find_accept_action(actions) + if a_acc is not None: + return a_acc + # Otherwise propose randomly among non-terminal actions + choices = _non_terminal_actions(actions) + if choices: + return random.choice(choices) + # Fallback to any legal action + return actions[0][0] if actions else 0 + + +def _tough_step(state, quantities: Tuple[int, int, int]) -> int: + actions = _list_actions(state) + # Never walk or accept; choose a proposal that gives exactly 1 of an item if possible + candidates: List[Tuple[int, str, List[int]]] = [] + for a, s in actions: + if AGREE_TOKEN in s or WALK_TOKEN in s.lower(): + continue + keep = _parse_keep_vector(s) + if keep is None or len(keep) != len(quantities): + continue + give = [quantities[i] - keep[i] for i in range(len(quantities))] + if sum(give) == 1 and all(g >= 0 for g in give): + candidates.append((a, s, give)) + if candidates: + # Prefer giving the lowest-index item + candidates.sort(key=lambda t: next(i for i, g in enumerate(t[2]) if g == 1)) + return candidates[0][0] + # Otherwise any non-terminal proposal + choices = _non_terminal_actions(actions) + if choices: + return random.choice(choices) + return actions[0][0] if actions else 0 + + +def run_pyspiel_pair( + *, + pair_key: str, + agent_row: str, + agent_col: str, + discount: float, + max_rounds: int, + num_items: int, + quantities: Tuple[int, int, int], + games: int, + out_dir: Path, +) -> Dict[str, Any]: + params = build_negotiation_params( + discount=discount, + max_rounds=max_rounds, + num_items=num_items, + item_quantities=quantities, + min_value=1, + max_value=100, + max_quantity=10, + ) + game = try_load_pyspiel_game(params) + if game is None: + return {"status": "pyspiel_not_available"} + + pair_dir = out_dir / "pyspiel_traces" + pair_dir.mkdir(parents=True, exist_ok=True) + records: List[GameRecord] = [] + + for gi in range(games): + state = game.new_initial_state() + turn_log: List[TurnRecord] = [] + round_idx = 1 + # Drive chance nodes and turns + while not state.is_terminal(): + if state.is_chance_node(): + outcomes = state.chance_outcomes() + if not outcomes: + break + action, _ = outcomes[0] + state.apply_action(action) + continue + cur = state.current_player() + # Choose step fn + if cur == 0: + # row agent + if "soft" in agent_row.lower(): + a = _soft_step(state) + elif "tough" in agent_row.lower(): + a = _tough_step(state, quantities) + else: + a = _soft_step(state) # default simple + else: + if "soft" in agent_col.lower(): + a = _soft_step(state) + elif "tough" in agent_col.lower(): + a = _tough_step(state, quantities) + else: + a = _soft_step(state) + try: + a_str = state.action_to_string(cur, a) + except Exception: + a_str = str(a) + turn_log.append(TurnRecord(round_index=round_idx, player=cur, action=a, action_str=a_str)) + state.apply_action(a) + if cur == 1: + round_idx += 1 + if round_idx > max_rounds: + break + try: + rets = state.returns() + rets = [float(x) for x in rets] + except Exception: + rets = [] + records.append(GameRecord(pair=pair_key, game_index=gi, returns=rets, turns=turn_log)) + + # Save JSON + dump = [asdict(gr) for gr in records] + out_file = pair_dir / f"{pair_key}.json" + out_file.write_text(json.dumps(dump, indent=2)) + return {"status": "ok", "file": str(out_file), "games": games} + + +# -------------------- NFSP-enabled runner producing JSONL traces -------------------- + +def _decode_basic_from_obs(obs: List[float], num_items: int) -> Tuple[List[int], List[int], float]: + # Returns (items, values, batna) + items = list(map(int, obs[9: 9 + num_items])) + pv_start = 9 + num_items + pv_end = pv_start + num_items + vals = list(map(int, obs[pv_start:pv_end])) + w = float(obs[9 + 2 * num_items]) if len(obs) > (9 + 2 * num_items) else 0.0 + return items, vals, w + + +def _value(v: List[int], a: List[int]) -> int: + return v[0] * a[0] + v[1] * a[1] + v[2] * a[2] + + +def _is_ef1(v: List[int], a_self: List[int], a_other: List[int]) -> bool: + self_val = _value(v, a_self) + other_val = _value(v, a_other) + if other_val <= self_val: + return True + max_item = 0 + for k in range(3): + if a_other[k] > 0: + max_item = max(max_item, v[k]) + return (other_val - self_val) <= max_item + + +def _aspiration_step(state, quantities: Tuple[int, int, int], keep_fraction: float = 0.85) -> int: + # Choose a non-terminal proposal that keeps ~keep_fraction of total value (greedy) + actions = _list_actions(state) + choices = [] + for a, s in actions: + if AGREE_TOKEN in s or WALK_TOKEN in s.lower(): + continue + keep = _parse_keep_vector(s) + if keep is None or len(keep) != len(quantities): + continue + choices.append(a) + if choices: + # Simple heuristic fallback: random among non-terminals + return random.choice(choices) + return actions[0][0] if actions else 0 + + +def run_pyspiel_pair_nfsp_with_traces( + *, + pair_key: str, + agent_row: str, + agent_col: str, + discount: float, + max_rounds: int, + num_items: int, + quantities: Tuple[int, int, int], + games: int, + out_dir: Path, + nfsp_checkpoint_path: Optional[str], + rnad_checkpoint_path: Optional[str], + remote_agents: Optional[Dict[str, str]] = None, + remote_agent_circles: Optional[Dict[str, int]] = None, +) -> Dict[str, Any]: + """ + Run OpenSpiel negotiation games where at least one agent is NFSP or RNAD. + Writes JSONL traces compatible with the lightweight simulator format and + returns aggregate payoffs for payoffs.json. + """ + params = build_negotiation_params( + discount=discount, + max_rounds=max_rounds, + num_items=num_items, + item_quantities=quantities, + min_value=1, + max_value=100, + max_quantity=10, + ) + game = try_load_pyspiel_game(params) + if game is None: + raise RuntimeError("OpenSpiel 'negotiation' game is unavailable; cannot run NFSP matches.") + + traces_dir = out_dir / "traces" + traces_dir.mkdir(parents=True, exist_ok=True) + trace_file = traces_dir / f"{pair_key}.jsonl" + + remote_map: Dict[str, str] = {str(k): str(v) for k, v in (remote_agents or {}).items()} + remote_circles: Dict[str, int] = {} + for k, v in (remote_agent_circles or {}).items(): + try: + remote_circles[str(k)] = int(v) + except Exception: + continue + + def _allocations_from_keep(keep: List[int], proposer: str) -> Tuple[List[int], List[int]]: + # keep is items proposer keeps; other gets remainder + other = [quantities[i] - keep[i] for i in range(len(quantities))] + if proposer == "row": + return keep, other + return other, keep + + def _find_action_for_keep(actions: List[Tuple[int, str]], keep_vec: List[int]) -> Optional[int]: + for a, s in actions: + parsed = _parse_keep_vector(s) + if parsed is None: + continue + try: + if all(int(parsed[i]) == int(keep_vec[i]) for i in range(len(keep_vec))): + return a + except Exception: + continue + return None + + # Aggregates + n_accept = 0 + n_ef1 = 0 + sum_row = 0.0 + sum_col = 0.0 + + # Instantiate NFSP/RNAD wrappers ONCE before the game loop (not per-game) + # This avoids repeated JAX/Haiku initialization which is very slow on some CPUs + is_row_nfsp = "nfsp" in agent_row.lower() + is_col_nfsp = "nfsp" in agent_col.lower() + is_row_rnad = "rnad" in agent_row.lower() + is_col_rnad = "rnad" in agent_col.lower() + is_row_remote = agent_row in remote_map + is_col_remote = agent_col in remote_map + + nfsp_row = NFSPAgentWrapper(game, 0, checkpoint_path=nfsp_checkpoint_path, discount=discount, max_rounds=max_rounds) if is_row_nfsp else None + nfsp_col = NFSPAgentWrapper(game, 1, checkpoint_path=nfsp_checkpoint_path, discount=discount, max_rounds=max_rounds) if is_col_nfsp else None + rnad_row = RNaDAgentWrapper(game, 0, checkpoint_path=rnad_checkpoint_path) if is_row_rnad else None + rnad_col = RNaDAgentWrapper(game, 1, checkpoint_path=rnad_checkpoint_path) if is_col_rnad else None + remote_row = RemoteNegotiator(label=agent_row, endpoint=remote_map[agent_row], prompt_circle=remote_circles.get(agent_row)) if is_row_remote else None + remote_col = RemoteNegotiator(label=agent_col, endpoint=remote_map[agent_col], prompt_circle=remote_circles.get(agent_col)) if is_col_remote else None + + for gi in range(games): + state = game.new_initial_state() + round_idx = 1 + + # Capture valuations/BATNAs from observations (best-effort) once at first decision node + v1 = v2 = [0, 0, 0] + b1 = b2 = 0.0 + captured_info = False + + # Track last proposed keep vectors to reconstruct accepted allocation + last_keep_from_row: Optional[List[int]] = None + last_keep_from_col: Optional[List[int]] = None + accepted = False + accepted_round = 1 + + while not state.is_terminal(): + if state.is_chance_node(): + outcomes = state.chance_outcomes() + if not outcomes: + break + action, _ = outcomes[0] + state.apply_action(action) + continue + + # First time at a decision node, capture obs-derived params + if not captured_info: + try: + obs0 = state.observation_tensor(0) + obs1 = state.observation_tensor(1) + _, v1, b1 = _decode_basic_from_obs(obs0, num_items) + _, v2, b2 = _decode_basic_from_obs(obs1, num_items) + except Exception: + pass + captured_info = True + # Initialize remote contexts once valuations are known + if remote_row is not None: + remote_row.set_context( + pair_key=pair_key, + game_index=gi, + role="row", + valuations_self=v1, + valuations_opp=v2, + batna_self=b1, + batna_opp=b2, + discount=discount, + max_rounds=max_rounds, + quantities=quantities, + value_cap=100, + ) + if remote_col is not None: + remote_col.set_context( + pair_key=pair_key, + game_index=gi, + role="col", + valuations_self=v2, + valuations_opp=v1, + batna_self=b2, + batna_opp=b1, + discount=discount, + max_rounds=max_rounds, + quantities=quantities, + value_cap=100, + ) + + cur = state.current_player() + if cur == 0: + if is_row_remote and remote_row is not None: + actions = _list_actions(state) + remote_row.set_round(round_idx) + walk_action = _find_walk_action(actions) + choose_walk = False + if last_keep_from_col: + alloc_self, alloc_other = _allocations_from_keep(last_keep_from_col, "col") + remote_row.set_offer_context( + proposer="col", + offer_allocation_self=alloc_self, + offer_allocation_other=alloc_other, + round_index=round_idx, + ) + a_acc = _find_accept_action(actions) + chosen_accept = False + if a_acc is not None and last_keep_from_col is not None: + alloc_self, _ = _allocations_from_keep(last_keep_from_col, "col") + offer_value = _value(v1, alloc_self) + batna_value = b1 + counter_value = batna_value + try: + if remote_row.accepts(offer_value, batna_value, counter_value): + a = a_acc + chosen_accept = True + except Exception: + chosen_accept = False + if not chosen_accept: + try: + alloc_self, alloc_other = remote_row.propose(quantities, "row", v1, v2) + keep_vec = [int(x) for x in alloc_self] + a_keep = _find_action_for_keep(actions, keep_vec) + if a_keep is None: + choices = _non_terminal_actions(actions) + a = choices[0] if choices else (actions[0][0] if actions else 0) + else: + a = a_keep + except Exception: + choose_walk = True + if walk_action is not None: + a = walk_action + else: + a = actions[0][0] if actions else 0 + elif is_row_nfsp and nfsp_row is not None: + a = nfsp_row.step(state) + elif is_row_rnad and rnad_row is not None: + a = rnad_row.step(state) + else: + if "tough" in agent_row.lower(): + a = _tough_step(state, quantities) + elif "aspire" in agent_row.lower() or "aspiration" in agent_row.lower(): + a = _aspiration_step(state, quantities) + else: + a = _soft_step(state) + else: + if is_col_remote and remote_col is not None: + actions = _list_actions(state) + remote_col.set_round(round_idx) + walk_action = _find_walk_action(actions) + choose_walk = False + if last_keep_from_row: + alloc_self, alloc_other = _allocations_from_keep(last_keep_from_row, "row") + remote_col.set_offer_context( + proposer="row", + offer_allocation_self=alloc_self, + offer_allocation_other=alloc_other, + round_index=round_idx, + ) + a_acc = _find_accept_action(actions) + chosen_accept = False + if a_acc is not None and last_keep_from_row is not None: + alloc_self, _ = _allocations_from_keep(last_keep_from_row, "row") + offer_value = _value(v2, alloc_self) + batna_value = b2 + counter_value = batna_value + try: + if remote_col.accepts(offer_value, batna_value, counter_value): + a = a_acc + chosen_accept = True + except Exception: + chosen_accept = False + if not chosen_accept: + try: + alloc_self, alloc_other = remote_col.propose(quantities, "col", v2, v1) + keep_vec = [int(x) for x in alloc_self] + a_keep = _find_action_for_keep(actions, keep_vec) + if a_keep is None: + choices = _non_terminal_actions(actions) + a = choices[0] if choices else (actions[0][0] if actions else 0) + else: + a = a_keep + except Exception: + choose_walk = True + if walk_action is not None: + a = walk_action + else: + a = actions[0][0] if actions else 0 + elif is_col_nfsp and nfsp_col is not None: + a = nfsp_col.step(state) + elif is_col_rnad and rnad_col is not None: + a = rnad_col.step(state) + else: + if "tough" in agent_col.lower(): + a = _tough_step(state, quantities) + elif "aspire" in agent_col.lower() or "aspiration" in agent_col.lower(): + a = _aspiration_step(state, quantities) + else: + a = _soft_step(state) + + # Decode keep vector for proposals + try: + a_str = state.action_to_string(cur, a) + except Exception: + a_str = str(a) + keep_vec = _parse_keep_vector(a_str) + if keep_vec is not None and len(keep_vec) == len(quantities): + if cur == 0: + last_keep_from_row = keep_vec + else: + last_keep_from_col = keep_vec + + # Check if accepting + if AGREE_TOKEN in a_str: + accepted = True + accepted_round = round_idx + + state.apply_action(a) + if cur == 1: + round_idx += 1 + if round_idx > max_rounds: + break + + # Compute payoffs and record JSONL + if accepted: + # Accepted allocation: the acceptor gets what the proposer offered to them. + # In OpenSpiel negotiation, "keep" vector = what the proposer keeps. + # Round 1 (odd): Row proposes, Column accepts → Row keeps last_keep_from_row + # Round 2 (even): Column proposes, Row accepts → Column keeps last_keep_from_col + if accepted_round % 2 == 0: + # Even round: Column proposed, Row accepted + # Column keeps last_keep_from_col, Row gets the complement + col_keeps = last_keep_from_col or [quantities[0] // 2, quantities[1] // 2, quantities[2] // 2] + a1 = [quantities[i] - col_keeps[i] for i in range(len(quantities))] # Row gets complement + a2 = col_keeps # Column keeps what they proposed + else: + # Odd round: Row proposed, Column accepted + # Row keeps last_keep_from_row, Column gets the complement + row_keeps = last_keep_from_row or [quantities[0] // 2, quantities[1] // 2, quantities[2] // 2] + a1 = row_keeps # Row keeps what they proposed + a2 = [quantities[i] - row_keeps[i] for i in range(len(quantities))] # Column gets complement + disc = discount ** (accepted_round - 1) + p1 = float(_value(v1, a1)) * disc + p2 = float(_value(v2, a2)) * disc + ef1_ok = _is_ef1(v1, a1, a2) and _is_ef1(v2, a2, a1) + else: + # Walk: both get BATNAs discounted at the last round reached (cap at max_rounds) + end_round = min(round_idx, max_rounds) + disc = discount ** (end_round - 1) + p1 = float(b1) * disc + p2 = float(b2) * disc + ef1_ok = None + + # Write record + rec = { + "pair": pair_key, + "game": gi, + "accepted": bool(accepted), + "round": int(accepted_round if accepted else min(round_idx, max_rounds)), + "q": list(quantities), + "v1": list(map(int, v1)), + "v2": list(map(int, v2)), + "b1": float(b1), + "b2": float(b2), + "a1": a1 if accepted else [0, 0, 0], + "a2": a2 if accepted else [0, 0, 0], + "payoff1": p1, + "payoff2": p2, + "ef1": ef1_ok, + } + with (trace_file).open("a") as f: + f.write(json.dumps(rec) + "\n") + # Aggregates + if accepted: + n_accept += 1 + if isinstance(ef1_ok, bool) and ef1_ok: + n_ef1 += 1 + sum_row += p1 + sum_col += p2 + + return { + "pair": pair_key, + "trace_file": str(trace_file), + "accept_rate": n_accept / max(1, games), + "ef1_rate": n_ef1 / max(1, n_accept) if n_accept else 0.0, + "row_mean_payoff": sum_row / max(1, games), + "col_mean_payoff": sum_col / max(1, games), + } + + diff --git a/scenarios/bargaining/bargaining_env/reasoning_trace.py b/scenarios/bargaining/bargaining_env/reasoning_trace.py new file mode 100644 index 0000000..5e3df93 --- /dev/null +++ b/scenarios/bargaining/bargaining_env/reasoning_trace.py @@ -0,0 +1,60 @@ +import json +import os +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, Optional + + +class ReasoningTracer: + def __init__(self, base_dir: Optional[str] = None, file_prefix: str = "llm_reasoning"): + self._base = Path(base_dir or "bargaining_llm_traces") + self._base.mkdir(parents=True, exist_ok=True) + self._file_prefix = file_prefix + + def _default_file(self) -> Path: + ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S") + return self._base / f"{self._file_prefix}_{ts}.jsonl" + + def _resolve_path(self, agent: Optional[str], pair: Optional[str]) -> Path: + # Organize logs by agent (if provided), then by pair (if provided) + if agent: + dir_path = self._base / agent + dir_path.mkdir(parents=True, exist_ok=True) + if pair: + return dir_path / f"{pair}.jsonl" + return dir_path / "session.jsonl" + return self._default_file() + + def log( + self, + *, + agent: Optional[str], + pair: Optional[str], + game: Optional[int], + round_index: Optional[int], + role: Optional[str], + prompt: str, + options: Optional[list[str]], + raw_response: str, + decision: str, + extra_meta: Optional[Dict[str, Any]] = None, + ) -> None: + record: Dict[str, Any] = { + "timestamp": datetime.utcnow().isoformat() + "Z", + "agent": agent, + "pair": pair, + "game": game, + "round": round_index, + "role": role, + "prompt": prompt, + "options": options, + "raw_model_response": raw_response, + "decision": decision, + } + if extra_meta: + record["meta"] = extra_meta + out_path = self._resolve_path(agent, pair) + with out_path.open("a") as f: + f.write(json.dumps(record) + "\n") + + diff --git a/scenarios/bargaining/bargaining_env/run_entire_matrix.py b/scenarios/bargaining/bargaining_env/run_entire_matrix.py new file mode 100644 index 0000000..16135ec --- /dev/null +++ b/scenarios/bargaining/bargaining_env/run_entire_matrix.py @@ -0,0 +1,795 @@ +import json +import logging +import math +import os +import random +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, List, Tuple +import concurrent.futures +from pathlib import Path +from urllib.parse import urlsplit, urlunsplit + +from .agents.soft import SoftNegotiator +from .agents.tough import ToughNegotiator +from .agents.aspiration import AspirationNegotiator +from .agents.base import BaseNegotiator +from .agents.remote import RemoteNegotiator, RemoteNegotiatorError +from .pyspiel_integration import build_negotiation_params, try_load_pyspiel_game +from .pyspiel_runner import run_pyspiel_pair, run_pyspiel_pair_nfsp_with_traces + +# BGS parameters (small game): fixed items +Q_BGS: Tuple[int, int, int] = (7, 4, 1) # quantities per item type +V_MAX_DEFAULT: int = 100 +WALK_BASELINE_GAMES: int = 300_000 +WALK_BASELINE_SEED: int = 42 + +logger = logging.getLogger(__name__) + +# Cache for synthetic walk traces so we only generate once per run +_WALK_BASELINE_CACHE: Dict[Tuple[Path, float, Tuple[int, int, int], int, int], Dict[str, Any]] = {} + + +@dataclass +class GameParams: + q: Tuple[int, int, int] + v_max: int + gamma: float + max_rounds: int + + +def _now_tag() -> str: + return time.strftime("%Y%m%d_%H%M%S", time.localtime()) + + +def _ensure_dir(p: str | Path) -> Path: + path = Path(p) + path.mkdir(parents=True, exist_ok=True) + return path + + +def _sanitize_endpoint(url: str) -> str: + try: + parsed = urlsplit(str(url)) + except Exception: + return str(url) + hostname = parsed.hostname or "" + port = f":{parsed.port}" if parsed.port else "" + netloc = f"{hostname}{port}" if hostname or port else parsed.netloc + sanitized = (parsed.scheme or "", netloc, parsed.path, "", "") + return urlunsplit(sanitized) + + +def _sample_instance(v_max: int) -> Tuple[List[int], List[int], int, int]: + # Sample valuations v1, v2 ~ U({1..v_max}) per item type; BATNAs ~ U(1..v_i·q) + v1 = [random.randint(1, v_max) for _ in range(3)] + v2 = [random.randint(1, v_max) for _ in range(3)] + return v1, v2, 0, 0 # batnas computed after dot with q + + +def _dot(v: List[int], q: Tuple[int, int, int]) -> int: + return v[0] * q[0] + v[1] * q[1] + v[2] * q[2] + + +def _policy_kind(agent_name: str) -> str: + n = agent_name.lower() + # "challenger" is a remote agent that always walks + if "walk" in n or "challenger" in n: + return "walk" + if "tough" in n or "boulware" in n or "-c-6" in n or "c-6" in n: + return "tough" + if "aspire" in n or "aspiration" in n or "conceder" in n: + return "aspiration" + if "soft" in n or "conceder" in n or "-c-0" in n or "c-0" in n or "-c-1" in n or "c-1" in n or "-c-2" in n or "c-2" in n: + return "soft" + return "balanced" + +def _agent_impl(kind: str) -> BaseNegotiator | None: + if kind == "soft": + return SoftNegotiator() + if kind == "tough": + return ToughNegotiator() + if kind == "aspiration": + return AspirationNegotiator() + return None + + +def _propose_allocation(policy: str, q: Tuple[int, int, int]) -> Tuple[List[int], List[int]]: + # Returns (a1, a2) allocations + # "challenger" is a remote agent that always walks - treat it like walk on fallback + if policy in ("walk", "challenger"): + return [0, 0, 0], [0, 0, 0] + if policy == "soft": + # even-ish split + a1 = [q[0] // 2, q[1] // 2, q[2] // 2] + a2 = [q[0] - a1[0], q[1] - a1[1], q[2] - a1[2]] + return a1, a2 + if policy == "tough": + # keep most items; leave at least 1 if available + a1 = [max(q[0] - 1, 0), max(q[1] - 1, 0), max(q[2] - 1, 0)] + a2 = [q[0] - a1[0], q[1] - a1[1], q[2] - a1[2]] + return a1, a2 + # balanced + take = [math.ceil(q[0] * 0.6), math.ceil(q[1] * 0.6), math.ceil(q[2] * 0.6)] + take = [min(take[0], q[0]), min(take[1], q[1]), min(take[2], q[2])] + a1 = take + a2 = [q[0] - a1[0], q[1] - a1[1], q[2] - a1[2]] + return a1, a2 + + +def _value(v: List[int], a: List[int]) -> int: + return v[0] * a[0] + v[1] * a[1] + v[2] * a[2] + + +def _accepts(policy: str, offer_value: int, batna: int, counter_value: int) -> bool: + # Simple acceptance thresholds by policy + # "challenger" is a remote agent that always walks - treat it like walk on fallback + if policy in ("walk", "challenger"): + return False + if policy == "soft": + return offer_value >= batna + if policy == "tough": + return offer_value >= max(batna, int(counter_value * 1.1)) + # balanced + return offer_value >= max(batna, int(counter_value * 0.95)) + + +def _is_ef1(v: List[int], a_self: List[int], a_other: List[int]) -> bool: + # EF1 condition: v·a_other - v·a_self <= max_{k: a_other[k] > 0} v[k] + self_val = _value(v, a_self) + other_val = _value(v, a_other) + if other_val <= self_val: + return True + max_item = 0 + for k in range(3): + if a_other[k] > 0: + max_item = max(max_item, v[k]) + return (other_val - self_val) <= max_item + + +def _ensure_walk_baseline( + *, + base_dir: Path, + discount: float, + quantities: Tuple[int, int, int], + v_max: int = V_MAX_DEFAULT, + games: int = WALK_BASELINE_GAMES, + rng_seed: int = WALK_BASELINE_SEED, +) -> Dict[str, Any]: + """ + Deterministically synthesize walk-vs-anyone traces/payoffs once and reuse. + Walk agent always walks in round 1; both players receive their BATNAs (no discount). + """ + key = (Path(base_dir).resolve(), float(discount), quantities, int(v_max), int(games)) + if key in _WALK_BASELINE_CACHE: + return _WALK_BASELINE_CACHE[key] + + rand = random.Random(rng_seed) + traces_dir = _ensure_dir(base_dir / "traces") + trace_file = traces_dir / "walk_baseline.jsonl" + # Always regenerate to ensure correct game count/params + if trace_file.exists(): + trace_file.unlink() + + sum_row = 0.0 + sum_col = 0.0 + accept_rate = 0.0 + + with trace_file.open("w") as f: + for gi in range(games): + v1 = [rand.randint(1, v_max) for _ in range(3)] + v2 = [rand.randint(1, v_max) for _ in range(3)] + b1 = rand.randint(1, _dot(v1, quantities)) + b2 = rand.randint(1, _dot(v2, quantities)) + rec = { + "pair": "walk_baseline", + "game": gi, + "accepted": False, + "round": 1, + "q": list(quantities), + "v1": v1, + "v2": v2, + "b1": float(b1), + "b2": float(b2), + "a1": [0, 0, 0], + "a2": [0, 0, 0], + "payoff1": float(b1), # round 1 -> no discount + "payoff2": float(b2), + "ef1": None, + } + f.write(json.dumps(rec) + "\n") + sum_row += float(b1) + sum_col += float(b2) + + result = { + "pair": "walk_baseline", + "trace_file": str(trace_file), + "accept_rate": accept_rate, + "ef1_rate": 0.0, + "row_mean_payoff": sum_row / max(1, games), + "col_mean_payoff": sum_col / max(1, games), + } + _WALK_BASELINE_CACHE[key] = result + return result + + +def _simulate_pair( + agent_row: str, + agent_col: str, + params: GameParams, + games: int, + base_dir: Path, + pair_key: str, + rng_seed: int | None = None, + remote_agents: Dict[str, str] | None = None, + remote_agent_circles: Dict[str, int] | None = None, + debug: bool = False, +) -> Dict[str, Any]: + random_gen = random.Random(rng_seed) + out_path = _ensure_dir(base_dir / "traces") + trace_file = out_path / f"{pair_key}.jsonl" + + row_policy = _policy_kind(agent_row) + col_policy = _policy_kind(agent_col) + remote_agents = remote_agents or {} + remote_agent_circles = remote_agent_circles or {} + row_is_remote = agent_row in remote_agents + col_is_remote = agent_col in remote_agents + + row_impl: BaseNegotiator | None + col_impl: BaseNegotiator | None + if row_is_remote: + row_impl = RemoteNegotiator( + label=agent_row, + endpoint=remote_agents[agent_row], + prompt_circle=remote_agent_circles.get(agent_row), + ) + else: + row_impl = _agent_impl(row_policy) + if col_is_remote: + col_impl = RemoteNegotiator( + label=agent_col, + endpoint=remote_agents[agent_col], + prompt_circle=remote_agent_circles.get(agent_col), + ) + else: + col_impl = _agent_impl(col_policy) + + n_accept = 0 + n_ef1 = 0 + sum_row = 0.0 + sum_col = 0.0 + + with open(trace_file, "w") as f: + for g in range(games): + if row_is_remote and row_impl is None: + row_impl = RemoteNegotiator( + label=agent_row, + endpoint=remote_agents[agent_row], + prompt_circle=remote_agent_circles.get(agent_row), + ) + if col_is_remote and col_impl is None: + col_impl = RemoteNegotiator( + label=agent_col, + endpoint=remote_agents[agent_col], + prompt_circle=remote_agent_circles.get(agent_col), + ) + + v1 = [random_gen.randint(1, params.v_max) for _ in range(3)] + v2 = [random_gen.randint(1, params.v_max) for _ in range(3)] + b1 = random_gen.randint(1, _dot(v1, params.q)) + b2 = random_gen.randint(1, _dot(v2, params.q)) + + if row_impl is not None and hasattr(row_impl, "set_context"): + row_impl.set_context( + pair_key=pair_key, + game_index=g, + role="row", + valuations_self=v1, + valuations_opp=v2, + batna_self=b1, + batna_opp=b2, + discount=params.gamma, + max_rounds=params.max_rounds, + quantities=list(params.q), + value_cap=params.v_max, + ) + if col_impl is not None and hasattr(col_impl, "set_context"): + col_impl.set_context( + pair_key=pair_key, + game_index=g, + role="col", + valuations_self=v2, + valuations_opp=v1, + batna_self=b2, + batna_opp=b1, + discount=params.gamma, + max_rounds=params.max_rounds, + quantities=list(params.q), + value_cap=params.v_max, + ) + + # Round 1: row proposes + if row_impl is not None and hasattr(row_impl, "set_round"): + row_impl.set_round(1) + if row_impl is not None: + try: + a1_prop, a2_prop = row_impl.propose(params.q, role="row", v_self=v1, v_opp=v2) + except RemoteNegotiatorError as err: + logger.warning("Remote agent %s failed to propose as row: %s", agent_row, err) + if debug: + print(f"[DEBUG] remote row {agent_row} propose fallback: {err}") + row_impl = None + a1_prop, a2_prop = _propose_allocation(row_policy, params.q) + else: + a1_prop, a2_prop = _propose_allocation(row_policy, params.q) + v2_offer = _value(v2, a2_prop) + v1_offer = _value(v1, a1_prop) + + if col_impl is not None and hasattr(col_impl, "set_offer_context"): + col_impl.set_offer_context( + proposer="row", + offer_allocation_self=a2_prop, + offer_allocation_opp=a1_prop, + offer_value=v2_offer, + round_index=1, + ) + + #counterfactual if column proposes + if col_impl is not None and hasattr(col_impl, "set_round"): + col_impl.set_round(2) + if col_impl is not None: + try: + a2_counter, a1_counter = col_impl.propose(params.q, role="col", v_self=v2, v_opp=v1) + except RemoteNegotiatorError as err: + logger.warning("Remote agent %s failed to propose as column: %s", agent_col, err) + if debug: + print(f"[DEBUG] remote col {agent_col} propose fallback: {err}") + col_impl = None + a2_counter, a1_counter = _propose_allocation(col_policy, params.q) + else: + a2_counter, a1_counter = _propose_allocation(col_policy, params.q) + v2_counter_val = _value(v2, a2_counter) + v1_counter_val = _value(v1, a1_counter) + if col_impl is not None and hasattr(col_impl, "set_offer_context"): + col_impl.set_offer_context( + counter_allocation_self=a2_counter, + counter_allocation_opp=a1_counter, + counter_value=v2_counter_val, + ) + + accepted = False + accepted_round = 1 + a1_final, a2_final = a1_prop, a2_prop + if col_impl is not None: + try: + col_accepts = col_impl.accepts(v2_offer, b2, v2_counter_val) + except RemoteNegotiatorError as err: + logger.warning("Remote agent %s failed to decide acceptance: %s", agent_col, err) + if debug: + print(f"[DEBUG] remote col {agent_col} accept fallback: {err}") + col_impl = None + col_accepts = _accepts(col_policy, v2_offer, b2, v2_counter_val) + else: + col_accepts = _accepts(col_policy, v2_offer, b2, v2_counter_val) + if col_accepts: + accepted = True + accepted_round = 1 + else: + # Round 2: column proposes + a2_prop2, a1_prop2 = a2_counter, a1_counter + v1_offer2 = _value(v1, a1_prop2) + v2_offer2 = _value(v2, a2_prop2) + a1_final, a2_final = a1_prop2, a2_prop2 + if row_impl is not None and hasattr(row_impl, "set_offer_context"): + row_impl.set_offer_context( + proposer="col", + offer_allocation_self=a1_prop2, + offer_allocation_opp=a2_prop2, + offer_value=v1_offer2, + counter_allocation_self=a1_prop, + counter_allocation_opp=a2_prop, + counter_value=v1_offer, + round_index=2, + ) + if row_impl is not None and hasattr(row_impl, "set_round"): + row_impl.set_round(2) + if row_impl is not None: + try: + row_accepts = row_impl.accepts(v1_offer2, b1, v1_offer) + except RemoteNegotiatorError as err: + logger.warning("Remote agent %s failed to decide acceptance: %s", agent_row, err) + if debug: + print(f"[DEBUG] remote row {agent_row} accept fallback: {err}") + row_impl = None + row_accepts = _accepts(row_policy, v1_offer2, b1, v1_offer) + else: + row_accepts = _accepts(row_policy, v1_offer2, b1, v1_offer) + if row_accepts: + accepted = True + accepted_round = 2 + + if not accepted: + # walk: both get BATNAs, discounted by gamma^(r-1) + end_round = 2 if params.max_rounds >= 2 else max(1, params.max_rounds) + disc = params.gamma ** (end_round - 1) + payoff1 = b1 * disc + payoff2 = b2 * disc + record = { + "pair": pair_key, + "game": g, + "accepted": False, + "terminal": "walk", + "round": end_round, + "q": params.q, + "v1": v1, + "v2": v2, + "b1": b1, + "b2": b2, + "a1": [0, 0, 0], + "a2": [0, 0, 0], + "payoff1": payoff1, + "payoff2": payoff2, + "ef1": None, + } + f.write(json.dumps(record) + "\n") + sum_row += payoff1 + sum_col += payoff2 + continue + + # accepted allocation + r_idx = accepted_round - 1 + disc = params.gamma ** r_idx + v1_realized = _value(v1, a1_final) + v2_realized = _value(v2, a2_final) + payoff1 = v1_realized * disc + payoff2 = v2_realized * disc + ef1_ok = _is_ef1(v1, a1_final, a2_final) and _is_ef1(v2, a2_final, a1_final) + + record = { + "pair": pair_key, + "game": g, + "accepted": True, + "terminal": "accept", + "round": accepted_round, + "q": params.q, + "v1": v1, + "v2": v2, + "b1": b1, + "b2": b2, + "a1": a1_final, + "a2": a2_final, + "payoff1": payoff1, + "payoff2": payoff2, + "ef1": ef1_ok, + } + f.write(json.dumps(record) + "\n") + n_accept += 1 + n_ef1 += 1 if ef1_ok else 0 + sum_row += payoff1 + sum_col += payoff2 + + return { + "pair": pair_key, + "trace_file": str(trace_file), + "accept_rate": n_accept / max(1, games), + "ef1_rate": n_ef1 / max(1, n_accept) if n_accept else 0.0, + "row_mean_payoff": sum_row / max(1, games), + "col_mean_payoff": sum_col / max(1, games), + } + + +def run_matrix_pipeline( + *, + model_circles: List[str] | None, + model_shortnames: Dict[str, str] | None, + full_matrix: bool = True, + matrix_id: int = 1, + model: str | None = None, + circle: int | str | None = None, + date: str | None = None, + max_rounds: int = 3, + games: int = 50, + total_games: int | None = None, + parallel: int | bool = False, # number of worker threads; False/0 => sequential + discount: float = 0.98, + skip_existing: bool = False, # not used + force_new_dirs: bool = False, # not used + dry_run: bool = False, + use_openspiel: bool = True, # must remain True + num_items: int = 3, + debug: bool = False, + pyspiel_dump_games: int = 0, + nfsp_checkpoint_path: str | None = None, + rnad_checkpoint_path: str | None = None, + challenger_label: str | None = None, + challenger_url: str | None = None, + remote_agents: Dict[str, str] | None = None, + challenger_circle: int | None = None, + remote_agent_circles: Dict[str, int] | None = None, +) -> Dict[str, Any]: + """Simulate bargaining for a roster of 'agents' on OpenSpiel and save traces and payoffs.""" + if not use_openspiel: + raise ValueError("OpenSpiel negotiation is required for all matchups.") + assert num_items == 3, "This lightweight pipeline only supports BGS (3 items)." + tag = date or _now_tag() + base_dir = _ensure_dir(Path("bargaining_runs") / f"BGS_matrix_{matrix_id}_{tag}") + + # Default RL checkpoints based on game spec + def _checkpoint_key(max_rounds: int, discount: float) -> str | None: + if max_rounds == 3 and abs(discount - 0.9) < 1e-6: + return "bg4" + if max_rounds == 3 and abs(discount - 0.98) < 1e-6: + return "bg5" + if max_rounds == 5 and abs(discount - 0.98) < 1e-6: + return "bg6" + return None + + ckpt_key = _checkpoint_key(max_rounds, discount) + rl_root = Path(__file__).resolve().parent.parent / "rl_agent_checkpoints" + nfsp_map = { + "bg4": rl_root / "nfsp" / "nfsp_bg4.pt", + "bg5": rl_root / "nfsp" / "nfsp_ng5.pt", # assuming ng5 corresponds to BG5 + "bg6": rl_root / "nfsp" / "nfsp_bg6.pt", + } + rnad_map = { + "bg4": rl_root / "rnad" / "rnad_bg4.pkl", + "bg5": rl_root / "rnad" / "rnad_bg5.pkl", + "bg6": rl_root / "rnad" / "rnad_bg6.pkl", + } + if nfsp_checkpoint_path is None and ckpt_key and ckpt_key in nfsp_map and nfsp_map[ckpt_key].exists(): + nfsp_checkpoint_path = str(nfsp_map[ckpt_key]) + if rnad_checkpoint_path is None and ckpt_key and ckpt_key in rnad_map and rnad_map[ckpt_key].exists(): + rnad_checkpoint_path = str(rnad_map[ckpt_key]) + + remote_agent_map: Dict[str, str] = {str(k): str(v) for k, v in (remote_agents or {}).items()} + if challenger_label and challenger_url: + remote_agent_map.setdefault(str(challenger_label), str(challenger_url)) + + remote_circle_map_raw: Dict[str, int] = {} + for k, v in (remote_agent_circles or {}).items(): + try: + remote_circle_map_raw[str(k)] = int(v) + except Exception: + continue + if challenger_label and challenger_circle is not None: + try: + remote_circle_map_raw.setdefault(str(challenger_label), int(challenger_circle)) + except Exception: + pass + + if full_matrix: + if not model_circles: + agents = ["soft", "tough", "aspiration", "walk"] + else: + agents = [str(a) for a in model_circles] + # Only include RL baselines when we're in a supported bg4/bg5/bg6 config + if ckpt_key in {"bg4", "bg5", "bg6"}: + available_rl: List[str] = [] + if nfsp_checkpoint_path and Path(nfsp_checkpoint_path).exists(): + available_rl.append("nfsp") + if rnad_checkpoint_path and Path(rnad_checkpoint_path).exists(): + available_rl.append("rnad") + for rl_agent in available_rl: + if rl_agent not in agents: + agents.append(rl_agent) + else: + if model is None or circle is None: + raise ValueError("When full_matrix is false, both 'model' and 'circle' must be provided.") + agents = [f"{model}-c-{circle}"] + + if remote_agent_map: + for label in remote_agent_map.keys(): + if label not in agents: + agents.append(label) + + # Always include a 'soft' baseline for comparison + if not any(str(a).lower() == "soft" for a in agents): + agents.append("soft") + # Always include a 'tough' baseline for comparison + if not any(str(a).lower() == "tough" for a in agents): + agents.append("tough") + # Always include an 'aspiration' baseline for comparison + if not any("aspire" in str(a).lower() or "aspiration" in str(a).lower() for a in agents): + agents.append("aspiration") + + # Optional shortnames mapping + short_map = {a: (model_shortnames[a] if model_shortnames and a in model_shortnames else a) for a in agents} + agent_ids = [short_map[a] for a in agents] + + remote_agent_urls: Dict[str, str] = {} + remote_agent_meta: Dict[str, Dict[str, str]] = {} + remote_circle_map: Dict[str, int] = {} + if remote_agent_map: + for orig_label, url in remote_agent_map.items(): + resolved = short_map.get(orig_label, orig_label) + remote_agent_urls[resolved] = url + remote_agent_meta[resolved] = { + "original_label": orig_label, + "endpoint_hint": _sanitize_endpoint(url), + } + if remote_circle_map_raw: + for orig_label, circle_val in remote_circle_map_raw.items(): + resolved = short_map.get(orig_label, orig_label) + remote_circle_map[resolved] = int(circle_val) + + meta = { + "agents": agent_ids, + "original_agents": agents, + "params": { + "q": Q_BGS, + "v_max": V_MAX_DEFAULT, + "gamma": discount, + "max_rounds": max_rounds, + **( + {"games_per_pair": games} + if not total_games + else {"total_games": int(total_games)} + ), + }, + } + # Attach OpenSpiel negotiation config; must load successfully + neg_params = build_negotiation_params( + discount=discount, + max_rounds=max_rounds, + num_items=num_items, + item_quantities=Q_BGS, + min_value=1, + max_value=V_MAX_DEFAULT, + max_quantity=10, + ) + game = try_load_pyspiel_game(neg_params) + if game is None: + raise RuntimeError("Failed to load OpenSpiel negotiation game; required for all matchups.") + pyspiel_loaded = True + meta["pyspiel"] = { + "enabled": bool(use_openspiel), + "loaded": bool(pyspiel_loaded), + "negotiation_params": neg_params, + } + if remote_agent_meta: + meta["remote_agents"] = remote_agent_meta + (base_dir / "meta.json").write_text(json.dumps(meta, indent=2)) + + if dry_run: + return {"base_dir": str(base_dir), "experiments": []} + + params = GameParams(q=Q_BGS, v_max=V_MAX_DEFAULT, gamma=discount, max_rounds=max_rounds) + results: Dict[str, Any] = {} + experiments: List[str] = [] + + # Determine role-balanced allocation + # Build work items (pair_key, agent_row, agent_col, num_games) + work: List[Tuple[str, str, str, int]] = [] + + if total_games and total_games > 0: + # Use unordered pairs (i <= j), split games evenly, and balance roles per pair + unordered_pairs: List[Tuple[int, int]] = [] + n_agents = len(agent_ids) + for i in range(n_agents): + for j in range(i, n_agents): + unordered_pairs.append((i, j)) + num_pairs = len(unordered_pairs) if unordered_pairs else 1 + base = total_games // num_pairs + remainder = total_games % num_pairs + + for k, (i, j) in enumerate(unordered_pairs): + ai, aj = agent_ids[i], agent_ids[j] + per_pair_total = base + (1 if k < remainder else 0) + g_row = per_pair_total // 2 + g_col = per_pair_total - g_row + if g_row > 0: + work.append((f"{ai}__vs__{aj}", ai, aj, g_row)) + if g_col > 0: + work.append((f"{aj}__vs__{ai}", aj, ai, g_col)) + else: + # Per-pair behavior: for each unordered matchup, run `games` total, split evenly across roles + unordered_pairs: List[Tuple[int, int]] = [] + n_agents = len(agent_ids) + for i in range(n_agents): + for j in range(i, n_agents): + unordered_pairs.append((i, j)) + + for (i, j) in unordered_pairs: + ai, aj = agent_ids[i], agent_ids[j] + if i == j: + # Self-play: run all as ai vs ai once + work.append((f"{ai}__vs__{aj}", ai, aj, games)) + continue + + g_row = games // 2 + g_col = games - g_row + if g_row > 0: + work.append((f"{ai}__vs__{aj}", ai, aj, g_row)) + if g_col > 0: + work.append((f"{aj}__vs__{ai}", aj, ai, g_col)) + + # Decide concurrency + if parallel: + max_workers = parallel if isinstance(parallel, int) and parallel > 0 else None + else: + max_workers = 1 + + # Pre-generate walk baseline once if any matchup involves a walk policy + need_walk_baseline = any( + _policy_kind(row) == "walk" or _policy_kind(col) == "walk" + for (_, row, col, _) in work + ) + if need_walk_baseline: + _ = _ensure_walk_baseline( + base_dir=base_dir, + discount=discount, + quantities=Q_BGS, + v_max=V_MAX_DEFAULT, + games=WALK_BASELINE_GAMES, + rng_seed=WALK_BASELINE_SEED, + ) + + def _run_pair(item: Tuple[str, str, str, int]) -> Tuple[str, Dict[str, Any]]: + pair_key, row_agent, col_agent, g = item + row_policy = _policy_kind(row_agent) + col_policy = _policy_kind(col_agent) + # Remote agents always play real games, even if they have "walk" policy + row_is_remote = row_agent in remote_agent_urls + col_is_remote = col_agent in remote_agent_urls + use_walk_baseline = (row_policy == "walk" or col_policy == "walk") and not row_is_remote and not col_is_remote + if use_walk_baseline: + base = _ensure_walk_baseline( + base_dir=base_dir, + discount=discount, + quantities=Q_BGS, + v_max=V_MAX_DEFAULT, + games=WALK_BASELINE_GAMES, + rng_seed=WALK_BASELINE_SEED, + ) + # Create symlink from pair-specific path to walk baseline so analysis can find it + traces_dir = base_dir / "traces" + pair_trace_file = traces_dir / f"{pair_key}.jsonl" + walk_baseline_file = Path(base["trace_file"]) + if not pair_trace_file.exists() and walk_baseline_file.exists(): + try: + pair_trace_file.symlink_to(walk_baseline_file.name) + except (OSError, FileExistsError): + pass # Symlink may already exist or fail on some systems + sim = { + **base, + "pair": pair_key, + # Use the same trace file for all walk pairings; counts are total games + "trace_file": str(pair_trace_file), + "accept_rate": base["accept_rate"], + "ef1_rate": base["ef1_rate"], + "row_mean_payoff": base["row_mean_payoff"], + "col_mean_payoff": base["col_mean_payoff"], + } + else: + sim = run_pyspiel_pair_nfsp_with_traces( + pair_key=pair_key, + agent_row=row_agent, + agent_col=col_agent, + discount=discount, + max_rounds=max_rounds, + num_items=num_items, + quantities=Q_BGS, + games=g, + out_dir=base_dir, + nfsp_checkpoint_path=nfsp_checkpoint_path, + rnad_checkpoint_path=rnad_checkpoint_path, + remote_agents=remote_agent_urls, + remote_agent_circles=remote_circle_map, + ) + return pair_key, sim + + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = [executor.submit(_run_pair, item) for item in work] + for fut in concurrent.futures.as_completed(futures): + pair_key, sim = fut.result() + results[pair_key] = sim + experiments.append(pair_key) + if debug: + print(f"Simulated {pair_key}: {sim['row_mean_payoff']:.1f} / {sim['col_mean_payoff']:.1f}") + + (base_dir / "payoffs.json").write_text(json.dumps(results, indent=2)) + return {"base_dir": str(base_dir), "experiments": experiments} + + diff --git a/scenarios/bargaining/bargaining_green.py b/scenarios/bargaining/bargaining_green.py new file mode 100644 index 0000000..da0e104 --- /dev/null +++ b/scenarios/bargaining/bargaining_green.py @@ -0,0 +1,343 @@ +import argparse +import asyncio +import json +import logging +import os +import sys +from typing import Any, Dict, Optional + +# Ensure project root is on path for local imports +project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +if project_root not in sys.path: + sys.path.insert(0, project_root) + +import contextlib +from a2a.types import AgentCard, AgentCapabilities, AgentSkill + +# Optional server dependencies; provide fallbacks for CLI 'once' mode +HAVE_A2A = True +try: + from agentbeats.green_executor import GreenAgent, GreenExecutor # type: ignore + from agentbeats.models import EvalRequest, EvalResult # type: ignore + from a2a.types import TaskState, Part, TextPart # type: ignore + from a2a.server.tasks import TaskUpdater # type: ignore + from a2a.utils import new_agent_text_message # type: ignore + from a2a.server.apps import A2AStarletteApplication # type: ignore + from a2a.server.request_handlers import DefaultRequestHandler # type: ignore + from a2a.server.tasks import InMemoryTaskStore # type: ignore + import uvicorn # type: ignore +except Exception: + HAVE_A2A = False + + class GreenAgent: # type: ignore + pass + + class EvalRequest: # type: ignore + def __init__(self, participants: Dict[str, Any], config: Optional[Dict[str, Any]] = None): + self.participants = participants + self.config = config or {} + + class EvalResult: # type: ignore + def __init__(self, winner: str, detail: Dict[str, Any]): + self.winner = winner + self.detail = detail + + def model_dump(self) -> Dict[str, Any]: + return {"winner": self.winner, "detail": self.detail} + + class TaskState: # type: ignore + working = "working" + completed = "completed" + + def new_agent_text_message(text: str, context_id: Optional[str] = None) -> Any: # type: ignore + class _Msg: + pass + m = _Msg() + m.text = text + return m + +from .bargaining_env.run_entire_matrix import run_matrix_pipeline +from .bargaining_env.main import run_analysis, run_metagame_analysis + +logger = logging.getLogger("bargaining_green") +logging.basicConfig(level=logging.INFO) + + +class BargainingGreenAgent(GreenAgent): + """ + Green orchestrator for the bargaining meta-game framework. + It simulates OpenSpiel negotiation games for a roster of agents, + then runs the meta-game analysis to compute regrets/welfare metrics. + """ + + def __init__(self): + # Require at least one challenger participant to compare against baselines. + self._required_roles = ["challenger"] + self._required_config_keys = [] + + def validate_request(self, request: EvalRequest) -> tuple[bool, str]: + participants = request.participants or {} + missing_roles = set(self._required_roles) - set(participants.keys()) + if missing_roles: + return False, f"Missing roles: {missing_roles}" + cfg = request.config or {} + full_matrix = cfg.get("full_matrix", True) + model = cfg.get("model") + circle = cfg.get("circle") + if not full_matrix and (model is None or circle is None): + return False, "When full_matrix is false, provide both 'model' and 'circle' in config." + if cfg.get("use_openspiel") is False: + return False, "OpenSpiel must be enabled for all bargaining runs." + return True, "ok" + + async def run_eval(self, req: EvalRequest, updater: Any) -> None: + cfg = req.config or {} + participants = req.participants or {} + await updater.update_status( + TaskState.working, new_agent_text_message("Starting bargaining simulations...") + ) + + # Simulation parameters + sim_kwargs: Dict[str, Any] = { + "full_matrix": cfg.get("full_matrix", True), + "matrix_id": cfg.get("matrix_id", 1), + "model": cfg.get("model"), + "circle": cfg.get("circle"), + "date": cfg.get("date"), + "max_rounds": cfg.get("max_rounds", 5), + "games": cfg.get("games", 50), + "total_games": cfg.get("total_games"), + "parallel": cfg.get("parallel", True), + "discount": cfg.get("discount", 0.98), + "skip_existing": cfg.get("skip_existing", False), + "force_new_dirs": cfg.get("force_new_dirs", False), + "dry_run": cfg.get("dry_run", False), + # Always use OpenSpiel for bargaining. + "use_openspiel": True, + "num_items": cfg.get("num_items", 3), + "debug": cfg.get("debug", False), + } + + challenger_label = cfg.get("challenger_label", "challenger") + challenger_url = participants.get("challenger") + remote_agents_cfg: Dict[str, str] = {} + cfg_remote_agents = cfg.get("remote_agents") + if isinstance(cfg_remote_agents, dict): + remote_agents_cfg.update({str(k): str(v) for k, v in cfg_remote_agents.items()}) + if challenger_url: + remote_agents_cfg.setdefault(str(challenger_label), str(challenger_url)) + sim_kwargs["challenger_url"] = str(challenger_url) + sim_kwargs["challenger_label"] = str(challenger_label) + if remote_agents_cfg: + sim_kwargs["remote_agents"] = remote_agents_cfg + # Optional prompt circle selection for remote entrants + challenger_circle = cfg.get("challenger_circle") + circle_map: Dict[str, int] = {} + cfg_remote_circles = cfg.get("remote_agent_circles") + if isinstance(cfg_remote_circles, dict): + for label, circle_val in cfg_remote_circles.items(): + try: + circle_map[str(label)] = int(circle_val) + except Exception: + continue + if challenger_circle is not None: + try: + circle_map.setdefault(str(challenger_label), int(challenger_circle)) + except Exception: + pass + if circle_map: + sim_kwargs["remote_agent_circles"] = circle_map + + model_circles = cfg.get("model_circles") + model_shortnames = cfg.get("model_shortnames") + + def _run_matrix(): + return run_matrix_pipeline( + model_circles=model_circles, + model_shortnames=model_shortnames, + **sim_kwargs, + nfsp_checkpoint_path=cfg.get("nfsp_checkpoint_path"), + rnad_checkpoint_path=cfg.get("rnad_checkpoint_path"), + ) + + sim_result = await asyncio.to_thread(_run_matrix) + base_dir = sim_result.get("base_dir") + await updater.update_status( + TaskState.working, + new_agent_text_message(f"Simulation complete. Data in {base_dir}. Starting meta-game analysis..."), + ) + + output_dir = cfg.get("output_dir", "meta_game_analysis/results_bargaining") + analysis_kwargs = { + "input_dir": base_dir, + "output_dir": output_dir, + "discount_factor": cfg.get("discount", 0.98), + "num_bootstrap": cfg.get("bootstrap", 100), + "norm_constants": cfg.get("norm_constants", {"UW": 805.9, "NW": 378.7, "NWA": 81.7}), + "random_seed": cfg.get("random_seed", 42), + } + + await asyncio.to_thread(run_analysis, **analysis_kwargs) + + # Run metagame analysis aggregation to produce standardized metrics + metrics_cfg = { + "input_dir": base_dir, + "output_dir": output_dir, + "discount_factor": cfg.get("discount", 0.98), + "num_bootstrap": cfg.get("bootstrap", 100), + "norm_constants": cfg.get("norm_constants", {"UW": 805.9, "NW": 378.7, "NWA": 81.7}), + "random_seed": cfg.get("random_seed", 42), + } + metrics = await asyncio.to_thread(run_metagame_analysis, metrics_cfg) + metrics_json = json.dumps(metrics, indent=2, sort_keys=True) + + # Send artifact with metrics + if HAVE_A2A: + await updater.add_artifact( + parts=[Part(root=TextPart(text=metrics_json))], + name="evaluation_results", + ) + + # Optional summary message + summary = metrics.get("summary", {}) if isinstance(metrics, dict) else {} + msg_lines = [ + "Metagame evaluation complete.", + f"Agents: {summary.get('num_agents', 0)}", + f"MENE regret (mean): {summary.get('mene_regret_mean')}", + f"NW% (mean): {summary.get('nw_percent_mean')}", + f"NWA% (mean): {summary.get('nwa_percent_mean')}", + f"UW% (mean): {summary.get('uw_percent_mean')}", + f"EF1% (mean): {summary.get('ef1_percent_mean')}", + "Full per-agent metrics are in the JSON artifact 'evaluation_results'.", + ] + await updater.update_status( + TaskState.completed, + new_agent_text_message("\n".join(str(x) for x in msg_lines), context_id=updater.context_id), + ) + + +def _run_once_from_cli(config_path: Optional[str]) -> None: + cfg_raw: Dict[str, Any] = {} + if config_path: + with open(config_path, "r") as f: + cfg_raw = json.load(f) + cfg: Dict[str, Any] = dict(cfg_raw) + participants = cfg.pop("participants", {}) + inline_challenger = cfg.pop("challenger_url", None) + if inline_challenger and "challenger" not in participants: + participants["challenger"] = inline_challenger + + dummy_req = EvalRequest(participants=participants, config=cfg) + agent = BargainingGreenAgent() + ok, msg = agent.validate_request(dummy_req) + if not ok: + raise ValueError(msg) + + class DummyUpdater: + """Minimal stand-in for TaskUpdater when running via CLI.""" + + def __init__(self): + self.context_id = None + + async def update_status(self, status, message): + logger.info(f"{status}: {getattr(message, 'text', message)}") + + async def add_artifact(self, parts, name): + logger.info(f"Artifact ({name}): {parts}") + + async def complete(self): + pass + + updater = DummyUpdater() + asyncio.run(agent.run_eval(dummy_req, updater)) # type: ignore[arg-type] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run the A2A bargaining green agent or a one-off CLI run.") + sub = parser.add_subparsers(dest="mode", required=False) + # server mode + p_srv = sub.add_parser("serve", help="Start the bargaining green A2A server") + p_srv.add_argument("--host", type=str, default="127.0.0.1", help="Host to bind the server") + p_srv.add_argument("--port", type=int, default=9029, help="Port to bind the server") + p_srv.add_argument("--card-url", type=str, help="External URL to provide in the agent card") + p_srv.add_argument("--cloudflare-quick-tunnel", action="store_true", help="Use a Cloudflare quick tunnel. Requires cloudflared. This will override --card-url") + # one-off mode + p_one = sub.add_parser("once", help="Run a single simulation + analysis from a JSON config") + p_one.add_argument("--config", type=str, help="Path to JSON config matching EvalRequest.config") + args = parser.parse_args() + + if args.mode == "once": + _run_once_from_cli(getattr(args, "config", None)) + else: + if not HAVE_A2A: + raise ImportError("Server mode requires 'agentbeats' and 'a2a' packages. Use 'once' mode or install dependencies.") + # default to server mode if no subcommand + if getattr(args, "mode", None) is None: + args.mode = "serve" + args.host = "127.0.0.1" + args.port = 9029 + args.card_url = None + args.cloudflare_quick_tunnel = False + + # Always use a bargaining-specific minimal card + def _card(name: str, url: str) -> AgentCard: + skill = AgentSkill( + id="bargaining_assessor", + name="Bargaining Meta-Game Assessment", + description=( + "Evaluates negotiation agents using Empirical Game-Theoretic Analysis (EGTA). " + "Simulates pairwise bargaining games in OpenSpiel, computes Maximum Entropy Nash " + "Equilibrium (MENE), and returns regret and welfare metrics. Supports BG4/BG5/BG6 " + "game configurations with pre-trained NFSP and RNAD baseline agents." + ), + tags=["bargaining", "negotiation", "game-theory", "EGTA", "MENE", "assessment"], + examples=[ + "Evaluate my negotiation agent against baseline strategies", + "Run meta-game analysis with custom discount factor", + "Assess agent performance using MENE regret metrics", + ], + ) + return AgentCard( + name=name, + version="1.0.0", + description=( + "Meta-Game Bargaining Evaluator: A green agent that implements Empirical " + "Game-Theoretic Analysis for multi-agent negotiation assessment. Computes " + "MENE-based regret and welfare metrics (UW, NW, NW+, EF1) to evaluate how " + "well-adapted agents are to strategic competition. Based on research from " + "the University of Michigan Strategic Reasoning Group." + ), + url=url, + preferred_transport="JSONRPC", + protocol_version="0.3.0", + default_input_modes=["text"], + default_output_modes=["text"], + capabilities=AgentCapabilities(streaming=True), + skills=[skill], + ) + + if args.cloudflare_quick_tunnel: + from agentbeats.cloudflare import quick_tunnel + agent_url_cm = quick_tunnel(f"http://{args.host}:{args.port}") + else: + base_url = f"http://{args.host}:{args.port}/" + agent_url_cm = contextlib.nullcontext(args.card_url or base_url) + + async def _serve() -> None: + async with agent_url_cm as agent_url: + agent = BargainingGreenAgent() + executor = GreenExecutor(agent) + agent_card = _card("BargainingGreen", agent_url) + request_handler = DefaultRequestHandler( + agent_executor=executor, + task_store=InMemoryTaskStore(), + ) + server = A2AStarletteApplication( + agent_card=agent_card, + http_handler=request_handler, + ) + uvicorn_config = uvicorn.Config(server.build(), host=args.host, port=args.port) + uvicorn_server = uvicorn.Server(uvicorn_config) + await uvicorn_server.serve() + + asyncio.run(_serve()) diff --git a/scenarios/bargaining/controller.py b/scenarios/bargaining/controller.py new file mode 100644 index 0000000..24c0d4d --- /dev/null +++ b/scenarios/bargaining/controller.py @@ -0,0 +1,82 @@ +""" +Minimal controller entrypoint for AgentBeats Cloud Run deployment. +This wraps the existing BargainingGreenAgent server implementation. +""" +import os +import asyncio +import logging +from typing import Dict, Any + +# Ensure project root is on path for local imports +import sys +project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")) +if project_root not in sys.path: + sys.path.insert(0, project_root) + +from scenarios.bargaining.bargaining_green import BargainingGreenAgent +from agentbeats.green_executor import GreenExecutor +from a2a.server.apps import A2AStarletteApplication +from a2a.server.request_handlers import DefaultRequestHandler +from a2a.server.tasks import InMemoryTaskStore +import uvicorn + +logger = logging.getLogger("bargaining_controller") +logging.basicConfig(level=logging.INFO) + + +def _create_agent_card(name: str, url: str) -> Dict[str, Any]: + """Create a minimal agent card for the bargaining green agent.""" + return { + "name": name, + "version": "0.1.0", + "description": "Bargaining Green Agent - Meta-game analysis controller", + "endpoints": [{"type": "http", "url": url}], + } + + +async def _serve(host: str = "0.0.0.0", port: int = 8080, card_url: str = None) -> None: + """Start the A2A server for the bargaining green agent.""" + # Use PORT environment variable if set (Cloud Run convention) + port = int(os.environ.get("PORT", port)) + + # Determine card URL - use provided URL or construct from host/port + if not card_url: + # In Cloud Run, we need the public URL, but we'll use a placeholder + # The actual URL will be set by Cloud Run's environment + card_url = f"http://{host}:{port}/" + + agent = BargainingGreenAgent() + executor = GreenExecutor(agent) + agent_card = _create_agent_card("BargainingGreen", card_url) + + request_handler = DefaultRequestHandler( + agent_executor=executor, + task_store=InMemoryTaskStore(), + ) + + server = A2AStarletteApplication( + agent_card=agent_card, + http_handler=request_handler, + ) + + uvicorn_config = uvicorn.Config(server.build(), host=host, port=port) + uvicorn_server = uvicorn.Server(uvicorn_config) + logger.info(f"Starting bargaining green agent server on {host}:{port}") + await uvicorn_server.serve() + + +def main(): + """Main entrypoint for the controller.""" + # Cloud Run sets PORT environment variable + port = int(os.environ.get("PORT", 8080)) + host = os.environ.get("HOST", "0.0.0.0") + + # Card URL can be set via environment variable for Cloud Run + card_url = os.environ.get("CARD_URL") + + asyncio.run(_serve(host=host, port=port, card_url=card_url)) + + +if __name__ == "__main__": + main() + diff --git a/scenarios/bargaining/green_agent.toml b/scenarios/bargaining/green_agent.toml new file mode 100644 index 0000000..816a532 --- /dev/null +++ b/scenarios/bargaining/green_agent.toml @@ -0,0 +1,49 @@ +# AgentBeats Controller Configuration for Bargaining Green Agent +# This config file is used by `agentbeats run_ctrl` command + +[controller] +name = "BargainingGreen" +description = """ +Meta-Game Bargaining Evaluator: Implements Empirical Game-Theoretic Analysis (EGTA) +for multi-agent negotiation assessment. Computes Maximum Entropy Nash Equilibrium (MENE) +to evaluate agent strategies against a pool of baseline negotiators (soft, tough, +aspiration, walk, NFSP, RNAD). Returns regret and welfare metrics (UW, NW, NW+, EF1). +""" +version = "1.0.0" + +# Entrypoint module and function +entrypoint = "scenarios.bargaining.controller:main" + +# The controller requires a `challenger` participant in each assessment request. +# Supply the purple agent URL under that role when running evaluations. +# +# Optional config parameters: +# - challenger_label: Custom label for the challenger agent (default: "challenger") +# - challenger_circle: Prompt sophistication level 0-6 for LLM agents +# - games: Number of games per agent pair (default: 50) +# - max_rounds: Maximum negotiation rounds (default: 5) +# - discount: Per-round discount factor (default: 0.98) +# - bootstrap: MENE bootstrap iterations (default: 100) +# - remote_agents: Additional remote agents {"label": "url"} +# - remote_agent_circles: Prompt circles for remote agents {"label": circle} + +[server] +# Cloud Run will set PORT environment variable (defaults to 8080) +# Host should bind to 0.0.0.0 to accept connections from Cloud Run's load balancer +host = "0.0.0.0" +port = 8080 # Will be overridden by $PORT env var in Cloud Run + +# Optional: Set CARD_URL environment variable in Cloud Run to override the agent card URL +# This should be set to the public HTTPS URL provided by Cloud Run + +[game_configs] +# Supported game configurations (automatically selected based on discount/max_rounds) +# BG4: discount=0.9, max_rounds=3 (high time pressure) +# BG5: discount=0.98, max_rounds=3 (low time pressure, short) +# BG6: discount=0.98, max_rounds=5 (low time pressure, long) + +[baseline_agents] +# Pre-trained RL checkpoints included for all configurations +nfsp = ["nfsp_bg4.pt", "nfsp_ng5.pt", "nfsp_bg6.pt"] +rnad = ["rnad_bg4.pkl", "rnad_bg5.pkl", "rnad_bg6.pkl"] + diff --git a/scenarios/bargaining/open_spiel/.github/workflows/actions.yml b/scenarios/bargaining/open_spiel/.github/workflows/actions.yml new file mode 100644 index 0000000..2257b36 --- /dev/null +++ b/scenarios/bargaining/open_spiel/.github/workflows/actions.yml @@ -0,0 +1,84 @@ +name: build_and_test + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + strategy: + matrix: + include: + - os: ubuntu-24.04 + OS_PYTHON_VERSION: "3.12" + DEFAULT_OPTIONAL_DEPENDENCY: "OFF" + BUILD_SHARED_LIB: "OFF" + OPEN_SPIEL_ABSL_VERSION: "20250127.1" + OPEN_SPIEL_BUILD_WITH_ORTOOLS: "OFF" + OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: "" + - os: macos-14 + OS_PYTHON_VERSION: "3.12" + DEFAULT_OPTIONAL_DEPENDENCY: "OFF" + BUILD_SHARED_LIB: "OFF" + OPEN_SPIEL_ABSL_VERSION: "20250127.1" + OPEN_SPIEL_BUILD_WITH_ORTOOLS: "OFF" + OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: "" + - os: ubuntu-22.04 + OS_PYTHON_VERSION: "3.11" + DEFAULT_OPTIONAL_DEPENDENCY: "ON" + BUILD_SHARED_LIB: "OFF" + OPEN_SPIEL_ABSL_VERSION: "20230125.0" + OPEN_SPIEL_BUILD_WITH_ORTOOLS: "OFF" + OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: "" + # Standard (most current) platforms and versions. + - os: ubuntu-22.04 + OS_PYTHON_VERSION: "3.10" + DEFAULT_OPTIONAL_DEPENDENCY: "OFF" + BUILD_SHARED_LIB: "OFF" + OPEN_SPIEL_ABSL_VERSION: "20230125.0" + OPEN_SPIEL_BUILD_WITH_ORTOOLS: "OFF" + OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: "" + - os: macos-13 + OS_PYTHON_VERSION: "3.11" + TRAVIS_USE_NOX: 0 + DEFAULT_OPTIONAL_DEPENDENCY: "OFF" + BUILD_SHARED_LIB: "OFF" + OPEN_SPIEL_ABSL_VERSION: "20250127.1" + OPEN_SPIEL_BUILD_WITH_ORTOOLS: "OFF" + OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: "" + + runs-on: ${{ matrix.os }} + env: + OPEN_SPIEL_ENABLE_JAX: ON + OPEN_SPIEL_ENABLE_PYTORCH: ON + OPEN_SPIEL_ENABLE_PYTHON_MISC: ON + OPEN_SPIEL_ABSL_VERSION: ${{ matrix.OPEN_SPIEL_ABSL_VERSION }} + OS_PYTHON_VERSION: ${{ matrix.OS_PYTHON_VERSION }} + DEFAULT_OPTIONAL_DEPENDENCY: ${{ matrix.DEFAULT_OPTIONAL_DEPENDENCY }} + OPEN_SPIEL_BUILD_WITH_JULIA: ${{ matrix.OPEN_SPIEL_BUILD_WITH_JULIA }} + BUILD_SHARED_LIB: ${{ matrix.BUILD_SHARED_LIB }} + OPEN_SPIEL_BUILD_WITH_ORTOOLS: ${{ matrix.OPEN_SPIEL_BUILD_WITH_ORTOOLS }} + OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: ${{ matrix.OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL }} + + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v2 + with: + version: 1.8 + - name: Ad-hoc fix + if: ${{ matrix.DEFAULT_OPTIONAL_DEPENDENCY == 'ON' }} + run: | + # workaround for https://github.com/deepmind/open_spiel/issues/606 + sudo cp /usr/lib/x86_64-linux-gnu/libstdc++.so.6 $(julia --startup-file=no -e 'using Libdl;print(abspath(joinpath(Libdl.dlpath("libjulia"), "..", "julia")))') + - name: Install + run: | + pwd + ./open_spiel/scripts/ci_python_prechecks.sh + chmod +x install.sh + ./install.sh + - name: Build and test + run: | + python3 --version + ./open_spiel/scripts/ci_script.sh diff --git a/scenarios/bargaining/open_spiel/.github/workflows/wheels.yml b/scenarios/bargaining/open_spiel/.github/workflows/wheels.yml new file mode 100644 index 0000000..1e12307 --- /dev/null +++ b/scenarios/bargaining/open_spiel/.github/workflows/wheels.yml @@ -0,0 +1,141 @@ +# Builds and tests the OpenSpiel wheels using cibuildwheel. +# +# Each wheel is built via the manylinux2014 pypa Docker image on Linux and +# standard MacOS X on 10.15. Each binary wheel is built only for x86_64. Basic +# API tests are run within the Docker environment that built the wheel. Full +# tests (tests that use extra dependencies such as PyTorch, JAX, Tensorflow) +# are tested in the Github Actions CI environment (Ubuntu 20.04 and Mac OS +# 10.15). +name: wheels + +on: + # Test the wheels for each PR to ensure the PR doesn't break them. + pull_request: + branches: [ master ] + # Workflow dispatch is a way to manually trigger workflows. This will be + # used to build and test the wheels manually for releases. + workflow_dispatch: + inputs: + name: + description: 'Workflow dispatch (triggered manually)' + required: false + default: 'No name specified' + +jobs: + build_wheels: + name: Build wheels on ${{ matrix.os }} ${{ matrix.NAME }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + include: + - os: ubuntu-22.04 + NAME: "Linux" + OS_TYPE: "Linux" + CI_PYBIN: python3 + OS_PYTHON_VERSION: 3.10 + OPEN_SPIEL_ABSL_VERSION: "20250127.1" + CIBW_ENVIRONMENT: "CXX=$(which g++) OPEN_SPIEL_BUILDING_WHEEL='ON' OPEN_SPIEL_BUILD_WITH_ACPC='ON' OPEN_SPIEL_BUILD_WITH_HANABI='ON' OPEN_SPIEL_BUILD_WITH_ROSHAMBO='ON'" + CIBW_BUILD: cp39-manylinux_x86_64 cp310-manylinux_x86_64 cp311-manylinux_x86_64 cp312-manylinux_x86_64 cp313-manylinux_x86_64 + # These must use the old abseil + - os: macOS-13 + NAME: "MacOS13_Python_lte_3.11" + OS_TYPE: "Darwin" + CI_PYBIN: python3.9 + OS_PYTHON_VERSION: 3.9 + OPEN_SPIEL_ABSL_VERSION: "20230125.0" + CIBW_ENVIRONMENT: "OPEN_SPIEL_BUILDING_WHEEL='ON' OPEN_SPIEL_BUILD_WITH_ACPC='ON' OPEN_SPIEL_BUILD_WITH_HANABI='ON' OPEN_SPIEL_BUILD_WITH_ROSHAMBO='ON'" + CIBW_BUILD: cp39-macosx_x86_64 cp310-macosx_x86_64 cp311-macosx_x86_64 + # These use the new abseil + - os: macOS-13 + NAME: "MacOS13_Python_gte_3.12" + OS_TYPE: "Darwin" + CI_PYBIN: python3.12 + OPEN_SPIEL_ABSL_VERSION: "20250127.1" + OS_PYTHON_VERSION: 3.12 + CIBW_ENVIRONMENT: "OPEN_SPIEL_BUILDING_WHEEL='ON' OPEN_SPIEL_BUILD_WITH_ACPC='ON' OPEN_SPIEL_BUILD_WITH_HANABI='ON' OPEN_SPIEL_BUILD_WITH_ROSHAMBO='ON'" + CIBW_BUILD: cp312-macosx_x86_64 cp313-macosx_x86_64 + # Setting to the new M1 runners to build the _arm64 wheels + # https://github.blog/2023-10-02-introducing-the-new-apple-silicon-powered-m1-macos-larger-runner-for-github-actions/ + # Disabling now that the OpenSpiel 1.4 wheels are on PyPI because these xlarge machines are + # quite costly... we don't want to run these on every PR. + # TODO(author5): Set this to macos-13 once these runners are no longer in beta + #- os: macos-13-xlarge + # OS_TYPE: "Darwin" + # CI_PYBIN: python3.11 + # OS_PYTHON_VERSION: 3.11 + # CIBW_ENVIRONMENT: "OPEN_SPIEL_BUILDING_WHEEL='ON' OPEN_SPIEL_BUILD_WITH_ACPC='ON' OPEN_SPIEL_BUILD_WITH_HANABI='ON' OPEN_SPIEL_BUILD_WITH_ROSHAMBO='ON'" + # CIBW_BUILD: cp39-macosx_arm64 cp310-macosx_arm64 cp311-macosx_arm64 cp312-macosx_arm64 + env: + OPEN_SPIEL_BUILDING_WHEEL: ON + OPEN_SPIEL_BUILD_WITH_ACPC: ON + OPEN_SPIEL_BUILD_WITH_HANABI: ON + OPEN_SPIEL_BUILD_WITH_ROSHAMBO: ON + OPEN_SPIEL_ABSL_VERSION: ${{ matrix.OPEN_SPIEL_ABSL_VERSION }} + OS_TYPE: ${{ matrix.OS_TYPE }} + OS_PYTHON_VERSION: ${{ matrix.OS_PYTHON_VERSION }} + CI_PYBIN: ${{ matrix.CI_PYBIN }} + CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 + CIBW_BUILD: ${{ matrix.CIBW_BUILD }} + CIBW_SKIP: pp* + CIBW_BEFORE_TEST: python -m pip install --upgrade pip + CIBW_TEST_COMMAND: /bin/bash {project}/open_spiel/scripts/test_wheel.sh basic {project} + CIBW_ENVIRONMENT: ${{ matrix.CIBW_ENVIRONMENT }} + + steps: + - uses: actions/checkout@v4 + + - name: Install + run: | + pwd + uname -a + [[ "${OS_TYPE}" = "Darwin" ]] && brew install python@${OS_PYTHON_VERSION} + [[ "${OS_TYPE}" = "Darwin" ]] && brew link --force python@${OS_PYTHON_VERSION} + which g++ + g++ --version + chmod +x install.sh + # This is needed to grab OpenSpiel dependencies. + [[ "${OS_TYPE}" = "Darwin" ]] && ./install.sh `which python${OS_PYTHON_VERSION}` + [[ "${OS_TYPE}" = "Linux" ]] && ./install.sh `which python3` + # These are necessary to install what is necessary for the build and for the full tests below. + ${CI_PYBIN} -m venv ./venv + source ./venv/bin/activate + python -m pip install --upgrade pip + python -m pip --version + [[ "${OS_TYPE}" = "Darwin" ]] && python -m pip install pipx + python -m pip install --upgrade setuptools + python -m pip install --upgrade -r requirements.txt -q + source ./open_spiel/scripts/python_extra_deps.sh python + python -m pip install --no-cache-dir --upgrade $OPEN_SPIEL_PYTHON_JAX_DEPS + python -m pip install --no-cache-dir --upgrade $OPEN_SPIEL_PYTHON_PYTORCH_DEPS + python -m pip install --no-cache-dir --upgrade $OPEN_SPIEL_PYTHON_MISC_DEPS + python -m pip install twine + python -m pip install cibuildwheel==2.22.0 + - name: Build sdist + run: | + source ./venv/bin/activate + pipx run build --sdist + twine check dist/*.tar.gz + + # Build all the wheels and run the basic tests (within the docker images) + # Basic tests are run via the CIBW_TEST_COMMAND environment variable. + - name: Build bdist_wheel and run tests + run: | + [[ "${OS_TYPE}" = "Darwin" ]] && xcodebuild -version + source ./venv/bin/activate + python -m cibuildwheel --output-dir wheelhouse + ls -l wheelhouse + + # Install the built wheel and run the full tests on this host. The full + # tests include all the ones that use the machine learning libraries, + # such as Tensorflow, PyTorch, and JAX. + - name: Install bdist_wheel and full tests + run: | + source ./venv/bin/activate + ./open_spiel/scripts/test_wheel.sh full `pwd` python + + - uses: actions/upload-artifact@v4 + with: + name: artifact-${{ matrix.os }}-${{ matrix.NAME }} + path: | + dist/*.tar.gz + ./wheelhouse/*.whl diff --git a/scenarios/bargaining/open_spiel/.gitignore b/scenarios/bargaining/open_spiel/.gitignore new file mode 100644 index 0000000..98adf1d --- /dev/null +++ b/scenarios/bargaining/open_spiel/.gitignore @@ -0,0 +1,64 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + + # C extensions +*.so + + # Jupyter Notebook +.ipynb_checkpoints + + # virtualenv +.venv +venv/ +ENV/ + + # OSX specific +.DS_Store + + # Build products +build/ +build*/ +cmake-build-*/ +dist/ +pyspiel.egg-info/ +open_spiel.egg-info/ + + +# Swift build directory +.build + + # External git modules +open_spiel/abseil-cpp/ +open_spiel/eigen/libeigen/ +open_spiel/libnop/libnop/ +open_spiel/games/bridge/double_dummy_solver/ +open_spiel/games/universal_poker/double_dummy_solver/ +open_spiel/games/hanabi/hanabi-learning-environment/ +/open_spiel/pybind11_abseil/ +pybind11/ + +# Install artifacts +download_cache/ +get-pip.py +open_spiel/scripts/shflags +open_spiel/scripts/jill.sh + +# julia wrapper +Manifest.toml + + +# IDE +.idea/ +.vscode/ +*~ + + +open_spiel/cmake-build-debug/ + +# Swift generated build file +Package.resolved +# Visual Studio generated files +open_spiel/.vs +/.env diff --git a/scenarios/bargaining/open_spiel/CONTRIBUTING.md b/scenarios/bargaining/open_spiel/CONTRIBUTING.md new file mode 100644 index 0000000..939e534 --- /dev/null +++ b/scenarios/bargaining/open_spiel/CONTRIBUTING.md @@ -0,0 +1,28 @@ +# How to Contribute + +We'd love to accept your patches and contributions to this project. There are +just a few small guidelines you need to follow. + +## Contributor License Agreement + +Contributions to this project must be accompanied by a Contributor License +Agreement. You (or your employer) retain the copyright to your contribution; +this simply gives us permission to use and redistribute your contributions as +part of the project. Head over to to see +your current agreements on file or to sign a new one. + +You generally only need to submit a CLA once, so if you've already submitted one +(even if it was for a different project), you probably don't need to do it +again. + +## Code reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult +[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more +information on using pull requests. + +## Community Guidelines + +This project follows [Google's Open Source Community +Guidelines](https://opensource.google.com/conduct/). diff --git a/scenarios/bargaining/open_spiel/Dockerfile.base b/scenarios/bargaining/open_spiel/Dockerfile.base new file mode 100644 index 0000000..1b27eb7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/Dockerfile.base @@ -0,0 +1,48 @@ +FROM ubuntu:20.04 as base +RUN apt update +RUN apt-get -y install \ + clang \ + curl \ + git \ + python3 \ + python3-dev \ + python3-pip \ + python3-setuptools \ + python3-wheel \ + sudo +RUN mkdir repo +WORKDIR /repo + +RUN sudo pip3 install --upgrade pip +RUN sudo pip3 install matplotlib + +# install +COPY . . +RUN DEBIAN_FRONTEND="noninteractive" apt-get -y install tzdata +RUN ./install.sh +RUN pip3 install --upgrade setuptools testresources +# Line below is a workaround for the issue https://github.com/google-deepmind/open_spiel/issues/1293 +RUN pip install importlib_metadata --force-reinstall +RUN pip3 install --upgrade -r requirements.txt +RUN pip3 install --upgrade cmake + +# build and test +RUN mkdir -p build +WORKDIR /repo/build +RUN cmake -DPython3_EXECUTABLE=`which python3` -DCMAKE_CXX_COMPILER=`which clang++` ../open_spiel +RUN make -j12 +ENV PYTHONPATH=${PYTHONPATH}:/repo +ENV PYTHONPATH=${PYTHONPATH}:/repo/build/python +RUN ctest -j12 +WORKDIR /repo/open_spiel + +# minimal image for development in Python +FROM python:3.6-slim-buster as python-slim +RUN mkdir repo +WORKDIR /repo +COPY --from=base /repo . +RUN pip3 install --upgrade -r requirements.txt +RUN pip3 install matplotlib +ENV PYTHONPATH=${PYTHONPATH}:/repo +ENV PYTHONPATH=${PYTHONPATH}:/repo/build/python +WORKDIR /repo/open_spiel diff --git a/scenarios/bargaining/open_spiel/Dockerfile.jupyter b/scenarios/bargaining/open_spiel/Dockerfile.jupyter new file mode 100644 index 0000000..d6ea3b5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/Dockerfile.jupyter @@ -0,0 +1,42 @@ +FROM ubuntu:20.04 as base +RUN apt update +RUN apt-get -y install \ + clang \ + curl \ + git \ + python3 \ + python3-dev \ + python3-pip \ + python3-setuptools \ + python3-wheel \ + sudo +RUN mkdir repo +WORKDIR /repo + +RUN sudo pip3 install --upgrade pip +RUN sudo pip3 install matplotlib + +# install +COPY . . +RUN DEBIAN_FRONTEND="noninteractive" apt-get -y install tzdata +RUN ./install.sh +RUN pip3 install --upgrade setuptools testresources +RUN pip3 install --upgrade -r requirements.txt +RUN pip3 install --upgrade cmake + +# build and test +RUN mkdir -p build +WORKDIR /repo/build +RUN cmake -DPython_TARGET_VERSION=${PYVERSION} -DCMAKE_CXX_COMPILER=`which clang++` ../open_spiel +RUN make -j12 +ENV PYTHONPATH=${PYTHONPATH}:/repo +ENV PYTHONPATH=${PYTHONPATH}:/repo/build/python +# ctest can be disabled for faster builds when tests are not required +RUN ctest -j12 +WORKDIR /repo/open_spiel + +# Jupyterlab Environment +FROM base as jupyterlab +RUN pip install jupyter -U && pip install jupyterlab +EXPOSE 8888 +ENTRYPOINT ["jupyter", "lab","--ip=0.0.0.0","--allow-root"] diff --git a/scenarios/bargaining/open_spiel/LICENSE b/scenarios/bargaining/open_spiel/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/scenarios/bargaining/open_spiel/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/scenarios/bargaining/open_spiel/MANIFEST.in b/scenarios/bargaining/open_spiel/MANIFEST.in new file mode 100644 index 0000000..69eb399 --- /dev/null +++ b/scenarios/bargaining/open_spiel/MANIFEST.in @@ -0,0 +1,16 @@ +# python package requirements +include requirements.txt + +# pybind files +recursive-include pybind11/include/pybind11 *.h +recursive-include pybind11/pybind11 *.py +include pybind11/CMakeLists.txt + +# open_spiel files, including source files for abseil and dds +recursive-include open_spiel CMakeLists.txt *.cc *.cpp *.h *.hpp *.py evalHandTables + +# abseil CMake files +recursive-include open_spiel/abseil-cpp/CMake ** +recursive-include open_spiel/abseil-cpp/ *.cmake *.inc + + diff --git a/scenarios/bargaining/open_spiel/README.md b/scenarios/bargaining/open_spiel/README.md new file mode 100644 index 0000000..0d267fe --- /dev/null +++ b/scenarios/bargaining/open_spiel/README.md @@ -0,0 +1,90 @@ + + + + + + + +# OpenSpiel: A Framework for Reinforcement Learning in Games + +[![Documentation Status](https://readthedocs.org/projects/openspiel/badge/?version=latest)](https://openspiel.readthedocs.io/en/latest/?badge=latest) +![build_and_test](https://github.com/deepmind/open_spiel/workflows/build_and_test/badge.svg) + +OpenSpiel is a collection of environments and algorithms for research in general +reinforcement learning and search/planning in games. OpenSpiel supports n-player +(single- and multi- agent) zero-sum, cooperative and general-sum, one-shot and +sequential, strictly turn-taking and simultaneous-move, perfect and imperfect +information games, as well as traditional multiagent environments such as +(partially- and fully- observable) grid worlds and social dilemmas. OpenSpiel +also includes tools to analyze learning dynamics and other common evaluation +metrics. Games are represented as procedural extensive-form games, with some +natural extensions. The core API and games are implemented in C++ and exposed to +Python. Algorithms and tools are written both in C++ and Python. + +To try OpenSpiel in Google Colaboratory, please refer to `open_spiel/colabs` subdirectory or start [here](https://colab.research.google.com/github/deepmind/open_spiel/blob/master/open_spiel/colabs/install_open_spiel.ipynb). + +

+ OpenSpiel visual asset +

+ +# Index + +Please choose among the following options: + +* [Installing OpenSpiel](docs/install.md) +* [Introduction to OpenSpiel](docs/intro.md) +* [API Overview and First Example](docs/concepts.md) +* [API Reference](docs/api_reference.md) +* [Overview of Implemented Games](docs/games.md) +* [Overview of Implemented Algorithms](docs/algorithms.md) +* [Developer Guide](docs/developer_guide.md) +* [Using OpenSpiel as a C++ Library](docs/library.md) +* [Guidelines and Contributing](docs/contributing.md) +* [Authors](docs/authors.md) + +For a longer introduction to the core concepts, formalisms, and terminology, +including an overview of the algorithms and some results, please see +[OpenSpiel: A Framework for Reinforcement Learning in Games](https://arxiv.org/abs/1908.09453). + +For an overview of OpenSpiel and example uses of the core API, please check out +our tutorials: + +* [Motivation, Core API, Brief Intro to Replictor Dynamics and Imperfect + Information Games](https://www.youtube.com/watch?v=8NCPqtPwlFQ) by Marc + Lanctot. + [(slides)](http://mlanctot.info/files/OpenSpiel_Tutorial_KU_Leuven_2022.pdf) + [(colab)](https://colab.research.google.com/github/deepmind/open_spiel/blob/master/open_spiel/colabs/OpenSpielTutorial.ipynb) +* [Motivation, Core API, Implementing CFR and REINFORCE on Kuhn poker, Leduc + poker, and Goofspiel](https://www.youtube.com/watch?v=o6JNHoGUXCo) by Edward + Lockhart. + [(slides)](http://mlanctot.info/files/open_spiel_tutorial-mar2021-comarl.pdf) + [(colab)](https://colab.research.google.com/github/deepmind/open_spiel/blob/master/open_spiel/colabs/CFR_and_REINFORCE.ipynb) + +If you use OpenSpiel in your research, please cite the paper using the following +BibTeX: + +```bibtex +@article{LanctotEtAl2019OpenSpiel, + title = {{OpenSpiel}: A Framework for Reinforcement Learning in Games}, + author = {Marc Lanctot and Edward Lockhart and Jean-Baptiste Lespiau and + Vinicius Zambaldi and Satyaki Upadhyay and Julien P\'{e}rolat and + Sriram Srinivasan and Finbarr Timbers and Karl Tuyls and + Shayegan Omidshafiei and Daniel Hennes and Dustin Morrill and + Paul Muller and Timo Ewalds and Ryan Faulkner and J\'{a}nos Kram\'{a}r + and Bart De Vylder and Brennan Saeta and James Bradbury and David Ding + and Sebastian Borgeaud and Matthew Lai and Julian Schrittwieser and + Thomas Anthony and Edward Hughes and Ivo Danihelka and Jonah Ryan-Davis}, + year = {2019}, + eprint = {1908.09453}, + archivePrefix = {arXiv}, + primaryClass = {cs.LG}, + journal = {CoRR}, + volume = {abs/1908.09453}, + url = {http://arxiv.org/abs/1908.09453}, +} +``` + +## Versioning + +We use [Semantic Versioning](https://semver.org/). + diff --git a/scenarios/bargaining/open_spiel/docs/Makefile b/scenarios/bargaining/open_spiel/docs/Makefile new file mode 100644 index 0000000..0626bb2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/Makefile @@ -0,0 +1,21 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SPHINXPROJ = open_spiel +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + ./fix_table_links.sh diff --git a/scenarios/bargaining/open_spiel/docs/_static/OpenSpielB.png b/scenarios/bargaining/open_spiel/docs/_static/OpenSpielB.png new file mode 100644 index 0000000..4ae8c32 Binary files /dev/null and b/scenarios/bargaining/open_spiel/docs/_static/OpenSpielB.png differ diff --git a/scenarios/bargaining/open_spiel/docs/_static/alpha_sweep_plots.png b/scenarios/bargaining/open_spiel/docs/_static/alpha_sweep_plots.png new file mode 100644 index 0000000..3a318b6 Binary files /dev/null and b/scenarios/bargaining/open_spiel/docs/_static/alpha_sweep_plots.png differ diff --git a/scenarios/bargaining/open_spiel/docs/_static/example_multi_population_game_rankings.png b/scenarios/bargaining/open_spiel/docs/_static/example_multi_population_game_rankings.png new file mode 100644 index 0000000..7be05ad Binary files /dev/null and b/scenarios/bargaining/open_spiel/docs/_static/example_multi_population_game_rankings.png differ diff --git a/scenarios/bargaining/open_spiel/docs/_static/green_circ10.png b/scenarios/bargaining/open_spiel/docs/_static/green_circ10.png new file mode 100644 index 0000000..f85ad14 Binary files /dev/null and b/scenarios/bargaining/open_spiel/docs/_static/green_circ10.png differ diff --git a/scenarios/bargaining/open_spiel/docs/_static/markov_chain_visualization.png b/scenarios/bargaining/open_spiel/docs/_static/markov_chain_visualization.png new file mode 100644 index 0000000..5bd3378 Binary files /dev/null and b/scenarios/bargaining/open_spiel/docs/_static/markov_chain_visualization.png differ diff --git a/scenarios/bargaining/open_spiel/docs/_static/passing.svg b/scenarios/bargaining/open_spiel/docs/_static/passing.svg new file mode 100644 index 0000000..ba08b9e --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/_static/passing.svg @@ -0,0 +1 @@ +buildbuildpassingpassing diff --git a/scenarios/bargaining/open_spiel/docs/_static/public_tree_kuhn.png b/scenarios/bargaining/open_spiel/docs/_static/public_tree_kuhn.png new file mode 100644 index 0000000..749556d Binary files /dev/null and b/scenarios/bargaining/open_spiel/docs/_static/public_tree_kuhn.png differ diff --git a/scenarios/bargaining/open_spiel/docs/algorithms.md b/scenarios/bargaining/open_spiel/docs/algorithms.md new file mode 100644 index 0000000..0122a25 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/algorithms.md @@ -0,0 +1,71 @@ +# Available algorithms + +![](_static/green_circ10.png "green circle"): thoroughly-tested. In many cases, +we verified against known values and/or reproduced results from papers. + +~: implemented but lightly tested. + +X: known problems; please see Github issues. + +Algorithms | Category | Reference | Status +--------------------------------------------------------------------- | ------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------ +Information Set Monte Carlo Tree Search (IS-MCTS) | Search | [Cowley et al. '12](https://ieeexplore.ieee.org/abstract/document/6203567) | ~ +Max^n | Search | [Luckhart & Irani '86](https://www.semanticscholar.org/paper/An-Algorithmic-Solution-of-N-Person-Games-Luckhart-Irani/6ab06950332412d25b0915d7796d60040228decd) | ~ +Minimax (and Alpha-Beta) Search | Search | [Wikipedia1](https://en.wikipedia.org/wiki/Minimax#Minimax_algorithm_with_alternate_moves), [Wikipedia2](https://en.wikipedia.org/wiki/Alpha%E2%80%93beta_pruning), Knuth and Moore '75 | ![](_static/green_circ10.png "green circle") +Monte Carlo Tree Search | Search | [Wikipedia](https://en.wikipedia.org/wiki/Monte_Carlo_tree_search), [UCT paper](http://ggp.stanford.edu/readings/uct.pdf), [Coulom '06](https://hal.inria.fr/inria-00116992/document), [Cowling et al. survey](http://www.incompleteideas.net/609%20dropbox/other%20readings%20and%20resources/MCTS-survey.pdf) | ![](_static/green_circ10.png "green circle") +Perfect Information Monte Carlo (PIMC) | Search | [Long et al. '10](https://ojs.aaai.org/index.php/AAAI/article/view/7562) | ~ +Lemke-Howson (via nashpy) | Opt. | [Wikipedia](https://en.wikipedia.org/wiki/Lemke%E2%80%93Howson_algorithm), [Shoham & Leyton-Brown '09](http://masfoundations.org/) | ![](_static/green_circ10.png "green circle") +ADIDAS | Opt. | [Gemp et al '22](https://arxiv.org/abs/2106.01285) | ~ +Least Core via Linear Programming | Opt. | [Yan & Procaccia '21](https://ojs.aaai.org/index.php/AAAI/article/view/16721) | ~ +Least Core via Saddle-Point (Lagrangian) Programming | Opt. | Gemp et al '24 | ~ +Sequence-form linear programming | Opt. | [Koller, Megiddo, and von Stengel '94](http://theory.stanford.edu/~megiddo/pdf/stoc94.pdf),
[Shoham & Leyton-Brown '09](http://masfoundations.org/) | ![](_static/green_circ10.png "green circle") +Shapley Values (incl. approximations via Monte Carlo sampling) | Opt. | [Mitchell et al. '22](https://www.jmlr.org/papers/v23/21-0439.html) | ~ +Stackelberg equilibrium solver | Opt. | [Conitzer & Sandholm '06](https://users.cs.duke.edu/~conitzer/commitEC06.pdf) | ~ +MIP-Nash | Opt. | [Sandholm et al. '05](https://dl.acm.org/doi/10.5555/1619410.1619413) | ~ +Magnetic Mirror Descent (MMD) with dilated entropy | Opt. | [Sokota et al. '22](https://arxiv.org/abs/2206.05825) | ~ +Counterfactual Regret Minimization (CFR) | Tabular | [Zinkevich et al '08](https://poker.cs.ualberta.ca/publications/NIPS07-cfr.pdf), [Neller & Lanctot '13](http://modelai.gettysburg.edu/2013/cfr/cfr.pdf) | ![](_static/green_circ10.png "green circle") +CFR against a best responder (CFR-BR) | Tabular | [Johanson et al '12](https://poker.cs.ualberta.ca/publications/AAAI12-cfrbr.pdf) | ![](_static/green_circ10.png "green circle") +Exploitability / Best response | Tabular | [Shoham & Leyton-Brown '09](http://masfoundations.org/) | ![](_static/green_circ10.png "green circle") +External sampling Monte Carlo CFR | Tabular | [Lanctot et al. '09](http://mlanctot.info/files/papers/nips09mccfr.pdf), [Lanctot '13](http://mlanctot.info/files/papers/PhD_Thesis_MarcLanctot.pdf) | ![](_static/green_circ10.png "green circle") +Fixed Strategy Iteration CFR (FSICFR) | Tabular | [Neller & Hnath '11](https://cupola.gettysburg.edu/csfac/2/) | ~ +Extensive-form Regret Minimization | Tabular | [Morrill et. al. '22](https://arxiv.org/abs/2102.06973) | ~ +Mean-field Ficticious Play for MFG | Tabular | [Perrin et. al. '20](https://arxiv.org/abs/2007.03458) | ~ +Online Mirror Descent for MFG | Tabular | [Perolat et. al. '21](https://arxiv.org/abs/2103.00623) | ~ +Munchausen Online Mirror Descent for MFG | Tabular | [Lauriere et. al. '22](https://arxiv.org/pdf/2203.11973) | ~ +Fixed Point for MFG | Tabular | [Huang et. al. '06](https://zbmath.org/?q=an:1136.91349) | ~ +Boltzmann Policy Iteration for MFG | Tabular | [Lauriere et. al. '22](https://arxiv.org/pdf/2203.11973) | ~ +Outcome sampling Monte Carlo CFR | Tabular | [Lanctot et al. '09](http://mlanctot.info/files/papers/nips09mccfr.pdf), [Lanctot '13](http://mlanctot.info/files/papers/PhD_Thesis_MarcLanctot.pdf) | ![](_static/green_circ10.png "green circle") +Policy Iteration | Tabular | [Sutton & Barto '18](http://incompleteideas.net/book/the-book-2nd.html) | ![](_static/green_circ10.png "green circle") +Q-learning | Tabular | [Sutton & Barto '18](http://incompleteideas.net/book/the-book-2nd.html) | ![](_static/green_circ10.png "green circle") +Regret Matching | Tabular | [Hart & Mas-Colell '00](https://onlinelibrary.wiley.com/doi/abs/10.1111/1468-0262.00153) | ![](_static/green_circ10.png "green circle") +Restricted Nash Response (RNR) | Tabular | [Johanson et al '08](http://johanson.ca/publications/poker/2007-nips-rnash/2007-nips-rnash.html) | ~ +SARSA | Tabular | [Sutton & Barto '18](http://incompleteideas.net/book/the-book-2nd.html) | ![](_static/green_circ10.png "green circle") +Value Iteration | Tabular | [Sutton & Barto '18](http://incompleteideas.net/book/the-book-2nd.html) | ![](_static/green_circ10.png "green circle") +Advantage Actor-Critic (A2C) | RL | [Mnih et al. '16](https://arxiv.org/abs/1602.01783) | ![](_static/green_circ10.png "green circle") +Deep Q-networks (DQN) | RL | [Mnih et al. '15](https://www.nature.com/articles/nature14236) | ![](_static/green_circ10.png "green circle") +Ephemeral Value Adjustments (EVA) | RL | [Hansen et al. '18](https://arxiv.org/abs/1810.08163) | ~ +Proximal Policy Optimization (PPO) | RL | [Schulman et al. '18](https://arxiv.org/abs/1707.06347) | ~ +Mean Field Proximal Policy Optimization (MF-PPO) | RL | [Algumaei et al. '23](https://link.springer.com/chapter/10.1007/978-3-031-33377-4_28) | ~ +AlphaZero (C++/LibTorch) | MARL | [Silver et al. '18](https://science.sciencemag.org/content/362/6419/1140) | ![](_static/green_circ10.png "green circle") +AlphaZero (Python/TF) | MARL | [Silver et al. '18](https://science.sciencemag.org/content/362/6419/1140) | ![](_static/green_circ10.png "green circle") +Correlated Q-Learning | MARL | [Greenwald & Hall '03](https://www.aaai.org/Papers/ICML/2003/ICML03-034.pdf) | ~ +Asymmetric Q-Learning | MARL | [Kononen '04](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.101.9458&rep=rep1&type=pdf) | ~ +Deep CFR | MARL | [Brown et al. '18](https://arxiv.org/abs/1811.00164) | ![](_static/green_circ10.png "green circle") +DiCE: The Infinitely Differentiable Monte-Carlo Estimator (LOLA-DiCE) | MARL | [Foerster, Farquhar, Al-Shedivat et al. '18](http://proceedings.mlr.press/v80/foerster18a/foerster18a.pdf) | ~ +Exploitability Descent (ED) | MARL | [Lockhart et al. '19](https://arxiv.org/abs/1903.05614) | ![](_static/green_circ10.png "green circle") +(Extensive-form) Fictitious Play (XFP) | MARL | [Heinrich, Lanctot, & Silver '15](http://proceedings.mlr.press/v37/heinrich15.pdf) | ![](_static/green_circ10.png "green circle") +Learning with Opponent-Learning Awareness (LOLA) | MARL | [Foerster, Chen, Al-Shedivat, et al. '18](https://arxiv.org/pdf/1709.04326.pdf) | ~ +Nash Q-Learning | MARL | [Hu & Wellman '03](https://www.jmlr.org/papers/volume4/hu03a/hu03a.pdf) | ~ +Neural Fictitious Self-Play (NFSP) | MARL | [Heinrich & Silver '16](https://arxiv.org/abs/1603.01121) | ![](_static/green_circ10.png "green circle") +Neural Replicator Dynamics (NeuRD) | MARL | [Omidshafiei, Hennes, Morrill, et al. '19](https://arxiv.org/abs/1906.00190) | X +Regret Policy Gradients (RPG, RMPG) | MARL | [Srinivasan, Lanctot, et al. '18](https://arxiv.org/abs/1810.09026) | ![](_static/green_circ10.png "green circle") +Policy-Space Response Oracles (PSRO) | MARL | [Lanctot et al. '17](https://arxiv.org/abs/1711.00832) | ![](_static/green_circ10.png "green circle") +GQ-based ("all-actions") Policy Gradient (QPG) | MARL | [Srinivasan, Lanctot, et al. '18](https://arxiv.org/abs/1810.09026) | ![](_static/green_circ10.png "green circle") +Regression CFR (RCFR) | MARL | [Waugh et al. '15](https://arxiv.org/abs/1411.7974), [Morrill '16](https://poker.cs.ualberta.ca/publications/Morrill_Dustin_R_201603_MSc.pdf) | ![](_static/green_circ10.png "green circle") +Rectified Nash Response (PSRO_rn) | MARL | [Balduzzi et al. '19](https://arxiv.org/abs/1901.08106) | ~ +Mean-Field PSRO (MFPSRO) | MARL | [Muller et al. '21](https://arxiv.org/abs/2111.08350.08106) | ~ +Win-or-Learn-Fast Policy-Hill Climbing (WoLF-PHC) | MARL | [Bowling & Veloso '02](https://www.sciencedirect.com/science/article/pii/S0004370202001212) | ~ +α-Rank | Eval. / Viz. | [Omidhsafiei et al. '19](https://www.nature.com/articles/s41598-019-45619-9), [arXiv](https://arxiv.org/abs/1903.01373) | ![](_static/green_circ10.png "green circle") +Nash Averaging | Eval. / Viz. | [Balduzzi et al. '18](https://arxiv.org/abs/1806.02643) | ~ +Replicator / Evolutionary Dynamics | Eval. / Viz. | [Hofbaeur & Sigmund '98](https://www.cambridge.org/core/books/evolutionary-games-and-population-dynamics/A8D94EBE6A16837E7CB3CED24E1948F8), [Sandholm '10](https://mitpress.mit.edu/books/population-games-and-evolutionary-dynamics) | ![](_static/green_circ10.png "green circle") +Voting-as-Evaluation (VasE) | Eval. / Viz. | [Lanctot et al. '23](https://arxiv.org/abs/2312.03121) | ![](_static/green_circ10.png "green circle") diff --git a/scenarios/bargaining/open_spiel/docs/alpha_rank.md b/scenarios/bargaining/open_spiel/docs/alpha_rank.md new file mode 100644 index 0000000..51e4426 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/alpha_rank.md @@ -0,0 +1,151 @@ +# α-Rank + +OpenSpiel now supports using Alpha-Rank +([“α-Rank: Multi-Agent Evaluation by Evolution”, 2019](https://www.nature.com/articles/s41598-019-45619-9)) +for both single-population (symmetric) and multi-population games. Specifically, +games can be specified via payoff tables (or tensors for the >2 players case) as +well as Heuristic Payoff Tables (HPTs). + +The following presents several typical use cases for Alpha-Rank. For an example +complete python script, refer to +[open_spiel/python/egt/examples/alpharank_example.py](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/egt/examples/alpharank_example.py). + +## Importing the Alpha-Rank module + +```python +from open_spiel.python.egt import alpharank +from open_spiel.python.egt import alpharank_visualizer +``` + +## Running Alpha-Rank on various games + +### Example: symmetric 2-player game rankings + +In this example, we run Alpha-Rank on a symmetric 2-player game +(Rock-Paper-Scissors), computing and outputting the rankings in a tabular +format. We demonstrate also the conversion of standard payoff tables to +Heuristic Payoff Tables (HPTs), as both are supported by the ranking code. + +```python +# Load the game +game = pyspiel.load_matrix_game("matrix_rps") +payoff_tables = utils.game_payoffs_array(game) + +# Convert to heuristic payoff tables +payoff_tables= [heuristic_payoff_table.from_matrix_game(payoff_tables[0]), + heuristic_payoff_table.from_matrix_game(payoff_tables[1].T)] + +# Check if the game is symmetric (i.e., players have identical strategy sets +# and payoff tables) and return only a single-player’s payoff table if so. +# This ensures Alpha-Rank automatically computes rankings based on the +# single-population dynamics. +_, payoff_tables = utils.is_symmetric_matrix_game(payoff_tables) + +# Compute Alpha-Rank +(rhos, rho_m, pi, num_profiles, num_strats_per_population) = alpharank.compute( + payoff_tables, alpha=1e2) + +# Report results +alpharank.print_results(payoff_tables, payoffs_are_hpt_format, pi=pi) +``` + +**Output** + +```bash +Agent Rank Score +----- ---- ----- +0 1 0.33 +1 1 0.33 +2 1 0.33 +``` + +### Example: multi-population game rankings + +The next example demonstrates computing Alpha-Rank on an asymmetric 3-player +meta-game, constructed by computing payoffs for Kuhn poker agents trained via +extensive-form fictitious play (XFP). Here we use a helper function, +`compute_and_report_alpharank`, which internally conducts the pre-processing and +visualization shown in the previous example. + +```python +# Load the game +payoff_tables = alpharank_example.get_kuhn_poker_data(num_players=3) + +# Helper function for computing & reporting Alpha-Rank outputs +alpharank.compute_and_report_alpharank(payoff_tables, alpha=1e2) +``` + +**Output** + +```bash +Agent Rank Score +----- ---- ----- +(2,3,3) 1 0.22 +(3,3,3) 2 0.14 +(3,2,3) 3 0.12 +(2,2,3) 4 0.09 +(3,1,3) 5 0.08 +(2,1,3) 6 0.05 +(1,2,3) 7 0.04 +(2,3,1) 8 0.02 +... ... ... +``` + +![](_static/example_multi_population_game_rankings.png "Multi population ranking") + +## Visualizing and reporting results + +This section provides details on various methods used for reporting the final +Alpha-Rank results. + +### Basic Ranking Outputs + +The final rankings computed can be printed in a tabular manner using the +following interface: + +```python +alpharank.print_results(payoff_tables, payoffs_are_hpt_format, pi=pi) +``` + +**Output** + +```txt +Agent Rank Score +----- ---- ----- +0 1 0.33 +1 1 0.33 +2 1 0.33 +``` + +### Markov Chain Visualization + +One may visualize the Alpha-Rank Markov transition matrix as follows: + +```python +m_network_plotter = alpharank_visualizer.NetworkPlot(payoff_tables, rhos, + rho_m, pi,strat_labels, + num_top_profiles=8) +m_network_plotter.compute_and_draw_network() +``` + +**Output** + +![](_static/markov_chain_visualization.png) + +### Alpha-sweep plots + +One may choose to conduct a sweep over the ranking-intensity parameter, alpha +(as opposed to choosing a fixed alpha). This is, in general, useful for general +games where bounds on payoffs may be unknown, and where the ranking computed by +Alpha-Rank should use a sufficiently high value of alpha (to ensure +correspondence to the underlying Markov-Conley chain solution concept). In such +cases, the following interface can be used to both visualize the sweep and +obtain the final rankings computed: + +```python +alpharank.sweep_pi_vs_alpha(payoff_tables, visualize=True) +``` + +**Output** + +![](_static/alpha_sweep_plots.png) diff --git a/scenarios/bargaining/open_spiel/docs/alpha_zero.md b/scenarios/bargaining/open_spiel/docs/alpha_zero.md new file mode 100644 index 0000000..79aad87 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/alpha_zero.md @@ -0,0 +1,186 @@ +# AlphaZero + +OpenSpiel includes two implementations of AlphaZero, one based on Tensorflow (in +Python). The other based on C++ LibTorch. This document covers mostly the +TF-based implementation and common components. For the Libtorch-based +implementation, +[see here](https://github.com/deepmind/open_spiel/tree/master/open_spiel/algorithms/alpha_zero_torch). + +**Note**: as of OpenSpiel 1.6, the Tensorflow version (based on TF1) is no +longer maintained. + +**Disclaimer**: this is not the code that was used for the Go challenge matches +or the AlphaZero paper results. It is a re-implementation for illustrative +purposes, and although it can handle games like Connect Four, it is not designed +to scale to superhuman performance in Go or Chess. + +## Background + +AlphaZero is an algorithm for training an agent to play perfect information +games from pure self-play. It uses Monte Carlo Tree Search (MCTS) with the prior +and value given by a neural network to generate training data for that neural +network. + +Links to relevant articles/papers: + +- [AlphaGo Zero: Starting from scratch](https://deepmind.com/blog/article/alphago-zero-starting-scratch) + has an open access link to the AlphaGo Zero nature paper that describes the + model in detail. +- [AlphaZero: Shedding new light on chess, shogi, and Go](https://deepmind.com/blog/article/alphazero-shedding-new-light-grand-games-chess-shogi-and-go) + has an open access link to the AlphaZero science paper that describes the + training regime and generalizes to more games. + +## Overview: + +The Python and C++ implementations are conceptually fairly similar, and have +roughly the same components: [actors](#actors) that generate data through +self-play using [MCTS](#mcts) with an [evaluator](#mcts-evaluator) that uses a +[neural network](#model), a [learner](#learner) that updates the network based +on those games, and [evaluators](#evaluators) playing vs standard MCTS to gauge +progress. Both [write checkpoints](#output) that can be [played](#playing-vs-checkpoints) +independently of the training setup, and logs that can be [analyzed](#analysis) +programmatically. + +The Python implementation uses one process per actor/evaluator, doesn't support +batching for inference and does all inference and training on the cpu. The C++ +implementation, by contrast, uses threads, a shared cache, supports batched +inference, and can do both inference and training on GPUs. As such the C++ +implementation can take advantage of additional hardware and can train +significantly faster. + +### Model + +The model defined in +[open_spiel/python/algorithms/alpha_zero/model.py](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/algorithms/alpha_zero/model.py) is used by +both the python and C++ implementations. + +The model defines three architectures in decreasing complexity: + +- resnet: same as the AlphaGo/AlphaZero paper when set with width 256 and + depth 20. +- conv2d: same as the resnet except uses a conv+batchnorm+relu instead of the + residual blocks. +- mlp: same as conv2d except uses dense layers instead of conv, and drops + batch norm. + +The model is parameterized by the size of the observations and number of actions +for the game you specify, so can play any 2-player game. The conv2d and resnet +models are restricted to games with a 2d representation (ie a 3d observation +tensor). + +The models are all parameterized with a width and depth: + +- The depth is the number of blocks in the torso, where the definition of a + block varies by model. For a resnet it's a resblock which is two conv2ds, + batch norms and relus, and an addition. For conv2d it's a conv2d, a batch + norm and a relu. For mlp it's a dense plus relu. +- The width is the number of filters for any conv2d and the number of hidden + units for any dense layer. + +The networks all give two outputs: a value and a policy, which are used by the +MCTS evaluator. + +### MCTS + +Monte Carlo Tree Search (MCTS) is a general search algorithm used to play many +games, but first found success playing Go back in ~2005. It builds a tree +directed by random rollouts, and does usually uses UCT to direct the +exploration/exploitation tradeoff. For our use case we replace random rollouts +with a value network. Instead of a uniform prior we use a policy network. +Instead of UCT we use PUCT. + +We have implementations of MCTS in +[C++](https://github.com/deepmind/open_spiel/blob/master/open_spiel/algorithms/mcts.h) and +[python](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/algorithms/mcts.py). + +### MCTS Evaluator + +Both MCTS implementations above have a configurable evaluator that returns the +value and prior policy of a given node. For standard MCTS the value is given by +random rollouts, and the prior policy is uniform. For AlphaZero the value and +prior are given by a neural network evaluation. The AlphaZero evaluator takes a +model, so can be used during training or with a trained checkpoint for play with +[open_spiel/python/examples/mcts.py](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/examples/mcts.py). + +### Actors + +The main script launches a set of actor processes (Python) or threads (C++). The +actors create two MCTS instances with a shared evaluator and model, and play +self-play games, passing the trajectories to the learner via a queue. The more +actors the faster it can generate training data, assuming you have sufficient +compute to actually run them. Too many actors for your hardware will mean longer +for individual games to finish and therefore your data could be more out of date +with respect to the up to date checkpoint/weights. + +### Learner + +The learner pulls trajectories from the actors and stores them in a fixed size +FIFO replay buffer. Once the replay buffer has enough new data, it does an +update step sampling from the replay buffer. It then saves a checkpoint and +updates all the actor's models. It also updates a `learner.jsonl` file with some +stats. + +### Evaluators + +The main script also launches a set of evaluator processes/threads. They +continually play games against a standard MCTS+Solver to give an idea of how +training is progressing. The MCTS opponents can be scaled in strength based on +the number of simulations they are given per move, so more levels means stronger +but slower opponents. + +### Output + +When running the algorithm a directory must be specified and all output goes +there. + +Due to the parallel nature of the algorithm writing logs to stdout/stderr isn't +very useful, so each actor/learner/evaluator writes its own log file to the +configured directory. + +Checkpoints are written after every update step, mostly overwriting the latest +one at `checkpoint--1` but every `checkpoint_freq` is saved at +`checkpoint-`. + +The config file is written to `config.json`, to make the experiment more +repeatable. + +The learner also writes machine readable logs in the +[jsonlines](http://jsonlines.org/) format to `learner.jsonl`, which can be read +with the analysis library. + +## Usage: + +### Python + +The code lives at [open_spiel/python/algorithms/alpha_zero/](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/algorithms/alpha_zero/). + +The simplest example trains a tic_tac_toe agent for a set number of training +steps: + +```bash +python3 open_spiel/python/examples/tic_tac_toe_alpha_zero.py +``` + +Alternatively you can train on an arbitrary game with many more options: + +```bash +python3 open_spiel/python/examples/alpha_zero.py --game connect_four --nn_model mlp --actors 10 +``` + +### Analysis + +There's an analysis library at +[open_spiel/python/algorithms/alpha_zero/analysis.py](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/algorithms/alpha_zero/analysis.py) which +reads the `config.json` and `learner.jsonl` from an experiment (either python or +C++), and graphs losses, value accuracy, evaluation results, actor speed, game +lengths, etc. It should be reasonable to turn this into a colab. + +### Playing vs checkpoints + +The checkpoints are compatible between python and C++, and can be loaded by the +model. You can try playing against one directly with +[open_spiel/python/examples/mcts.py](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/examples/mcts.py): + +```bash +python3 open_spiel/python/examples/mcts.py --game=tic_tac_toe --player1=human --player2=az --az_path +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference.md b/scenarios/bargaining/open_spiel/docs/api_reference.md new file mode 100644 index 0000000..cc508d8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference.md @@ -0,0 +1,66 @@ +## OpenSpiel Core API Reference + +OpenSpiel consists of several core functions and classes. This page acts as a +helpful reminder of how to use the main functionality of OpenSpiel. + +Most of the functions are described and illustrated via Python syntax and +examples, and there are pointers to the corresponding C++ functions. + +Disclaimer: This is meant as a guide to facilitate OpenSpiel development +in Python. However, +[spiel.h](https://github.com/deepmind/open_spiel/blob/master/open_spiel/spiel.h) +remains the single source of truth for documentation on the core API. + +### Core Functions + +Method | Python | C++ | Description +-------------------------------------------------------------------- | ------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------- | ----------- +`deserialize_game_and_state(serialized_data: string)` | [Python](api_reference/game_deserialize_game_and_state.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L1127) | Returns a tuple of (game, state) reconstructed from the serialized object data. +`load_game(game_string: str)` | [Python](api_reference/load_game.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L1080) | Returns a game object for the specified game string. +`load_game(game_string: str, parameters: Dict[str, Any])` | [Python](api_reference/load_game.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L1083) | Returns a game object for the specified game string and parameter values. +`registered_names()` | [Python](api_reference/registered_names.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L1051) | Returns a list of all short names of games in the library. +`serialize_game_and_state(game: pyspiel.Game, state: pyspiel.State)` | [Python](api_reference/game_serialize_game_and_state.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L1104) | Returns a string representation of the state and game that created it. + +### State methods + +Method | Python | C++ | Description +-------------------------------------------- | ----------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- | ----------- +`action_to_string(player: int, action: int)` | [Python](api_reference/state_action_to_string.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L289) | Returns a string representation of the specified player's action. +`apply_action(action: int)` | [Python](api_reference/state_apply_action.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L230) | Applies the specified action to the state. +`apply_actions(actions: List[int])` | [Python](api_reference/state_apply_actions.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L581) | Applies the specified joint action (action for each player) to the state. +`chance_outcomes()` | [Python](api_reference/state_chance_outcomes.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L604) | Returns the a list of (action, prob) tuples representing the chance outcome distribution. +`current_player()` | [Python](api_reference/state_current_player.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L225) | Returns the player ID of the acting player. +`history()` | [Python](api_reference/state_history.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L406) | Returns the sequence of actions taken by all players since the start of the game. +`information_state_string()` | [Python](api_reference/state_information_state_string.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L433) | Returns a string representing the information state for the current player. +`information_state_string(player: int)` | [Python](api_reference/state_information_state_string.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L433) | Returns a string representing the information state for the specified player. +`information_state_tensor()` | [Python](api_reference/state_information_state_tensor.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L488) | Returns a list of floats representing the information state for the current player. +`information_state_tensor(player: int)` | [Python](api_reference/state_information_state_tensor.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L488) | Returns a list of floats representing the information state for the specified player. +`is_chance_node()` | [Python](api_reference/state_is_chance_node.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L368) | Returns True if the state represents a chance node, False otherwise. +`is_simultaneous_node()` | [Python](api_reference/state_is_simultaneous_node.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L385) | Returns True if the state represents a simultaneous player node, False otherwise. +`is_terminal()` | [Python](api_reference/state_is_terminal.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L322) | Returns True if the state is terminal (game has finished), False otherwise. +`legal_actions()` | [Python](api_reference/state_legal_actions.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L263) | Returns the list of legal actions for the current player. +`legal_actions(player: int)` | [Python](api_reference/state_legal_actions.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L245) | Returns the list of legal actions for the specified player. +`observation_string()` | [Python](api_reference/state_observation_string.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L516) | Returns a string representing the observation for the current player. +`observation_string(player: int)` | [Python](api_reference/state_observation_string.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L516) | Returns a string representing the observation for the specified player. +`observation_tensor()` | [Python](api_reference/state_observation_tensor.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L547) | Returns a list of floats representing the observation for the current player. +`observation_tensor(player: int)` | [Python](api_reference/state_observation_tensor.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L547) | Returns a list of floats representing the observation for the specified player. +`returns()` | [Python](api_reference/state_returns.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L346) | Returns the list of returns (cumulated reward from the start of the game): one value per player. +`rewards()` | [Python](api_reference/state_rewards.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L325) | Returns the list of intermediate rewards (rewards obtained since the last time the player acted): one value per player. +`serialize()` | [Python](api_reference/state_serialize.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L636) | Returns a string representation of the state which can be used to reconstruct the state from the game. + +### Game methods + +Method | Python | C++ | Description +-------------------------------------------- | --------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- | ----------- +`action_to_string(player: int, action: int)` | [Python](api_reference/game_action_to_string.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L946) | Returns a (state-independent) string representation of the specified player's action. +`deserialize_state(serialized_data: str)` | [Python](api_reference/game_deserialize_state.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L863) | Reconstructs the state from the serialized state string. +`information_state_tensor_shape()` | [Python](api_reference/game_information_state_tensor_shape_size.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L815) | Shape that the information state tensor should be perceived as. +`information_state_tensor_size()` | [Python](api_reference/game_information_state_tensor_shape_size.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L827) | Size of the list (number of values) returned by the state's information state tensor function. +`max_chance_outcomes()` | [Python](api_reference/game_max_chance_outcomes.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L778) | The maximum number of distinct chance outcomes for chance nodes in the game. +`max_game_length()` | [Python](api_reference/game_max_game_length.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L873) | The maximum length of any one game (in terms of number of decision nodes visited in the game tree). +`max_utility()` | [Python](api_reference/game_max_min_utility.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L795) | The maximum achievable utility (return) in over any playing (episode) of the game. +`min_utility()` | [Python](api_reference/game_max_min_utility.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L795) | The minimum achievable utility (return) in over any playing (episode) of the game. +`new_initial_state()` | [Python](api_reference/game_new_initial_state.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L764) | Returns a new initial state of the game (note: which might be a chance node). +`num_distinct_actions()` | [Python](api_reference/game_num_distinct_actions.md) | [C++](https://github.com/deepmind/open_spiel/blob/c6fafb92021a8a3aa5f9746cdb79e74917ed26a5/open_spiel/spiel.h#L752) | Returns the number of (state-independent) distinct actions in the game. +`observation_tensor_shape()` | [Python](api_reference/game_observation_tensor_shape_size.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L835) | Shape that the observation tensor should be perceived as. +`observation_tensor_size()` | [Python](api_reference/game_observation_tensor_shape_size.md) | [C++](https://github.com/deepmind/open_spiel/blob/89ba2264a66d9db299108fbd2de4a27b71973f54/open_spiel/spiel.h#L847) | Size of the list (number of values) returned by the state's observation tensor function. diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/game_action_to_string.md b/scenarios/bargaining/open_spiel/docs/api_reference/game_action_to_string.md new file mode 100644 index 0000000..edd0d51 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/game_action_to_string.md @@ -0,0 +1,24 @@ +# OpenSpiel game methods: action_to_string + +[Back to Core API reference](../api_reference.md) \ +
+ +`action_to_string(player: int, action: int)` + +Returns a string representation of the specified player's action, independent of +state. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("matrix_pd") +print(game.action_to_string(0, 0)) +# Output: Cooperate + +# Print first player's second action (1). +game = pyspiel.load_game("tic_tac_toe") +print(game.action_to_string(0, 1)) +# Output: x(0, 1) +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/game_deserialize_game_and_state.md b/scenarios/bargaining/open_spiel/docs/api_reference/game_deserialize_game_and_state.md new file mode 100644 index 0000000..d7b2be1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/game_deserialize_game_and_state.md @@ -0,0 +1,49 @@ +# OpenSpiel core functions: deserialize_game_and_state + +[Back to Core API reference](../api_reference.md) \ +
+ +`deserialize_game_and_state(game: pyspiel.Game, state: pyspiel.State)` + +Returns a (game, state) tuple that is reconstructed from the serialized string +data. + +Note: pickle can also be used to serialize / deserialize data, and the pickle +uses the same serialization methods. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("tic_tac_toe") +state = game.new_initial_state() +state.apply_action(4) +state.apply_action(2) +state.apply_action(1) +state.apply_action(5) + +serialized_data = pyspiel.serialize_game_and_state(game, state) +print(serialized_data) + +game_copy, state_copy = pyspiel.deserialize_game_and_state(serialized_data) +print(state_copy) + +# Output: +# # Automatically generated by OpenSpiel SerializeGameAndState +# [Meta] +# Version: 1 +# +# [Game] +# tic_tac_toe() +# [State] +# 4 +# 2 +# 1 +# 5 +# +# +# .xo +# .xo +# ... +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/game_deserialize_state.md b/scenarios/bargaining/open_spiel/docs/api_reference/game_deserialize_state.md new file mode 100644 index 0000000..43b1cd9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/game_deserialize_state.md @@ -0,0 +1,34 @@ +# OpenSpiel game methods: deserialize_state + +[Back to Core API reference](../api_reference.md) \ +
+ +`deserialize_state(serialized_data: str)` + +Reconstruct a state object from the state's serialized data (from +`state.serialize()`). The game used to reconstruct must be the same as the game +that created the original state. + +To serialize a state along with the game, use `pyspiel.serialize_game_and_state` +instead. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("tic_tac_toe") +state = game.new_initial_state() +state.apply_action(4) +state.apply_action(2) +state.apply_action(1) +state.apply_action(5) + +state_copy = game.deserialize_state(state.serialize()) +print(state_copy) + +# Output: +# .xo +# .xo +# ... +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/game_information_state_tensor_shape_size.md b/scenarios/bargaining/open_spiel/docs/api_reference/game_information_state_tensor_shape_size.md new file mode 100644 index 0000000..9b225a5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/game_information_state_tensor_shape_size.md @@ -0,0 +1,27 @@ +# OpenSpiel game methods: information_state_tensor_shape and information_state_tensor_size + +[Back to Core API reference](../api_reference.md) \ +
+ +1. `information_state_tensor_shape()` +2. `information_state_tensor_size()` + +(1) Returns the information state tensor's shape: a list of integers +representing the size of each dimension. + +(2) Returns the total number of values used to represent the information state +tensor. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("kuhn_poker") +print(game.information_state_tensor_shape()) +print(game.information_state_tensor_size()) + +# Output: +# [11] +# 11 +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/game_max_chance_outcomes.md b/scenarios/bargaining/open_spiel/docs/api_reference/game_max_chance_outcomes.md new file mode 100644 index 0000000..0bd87da --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/game_max_chance_outcomes.md @@ -0,0 +1,27 @@ +# OpenSpiel game methods: max_chance_outcomes + +[Back to Core API reference](../api_reference.md) \ +
+ +`max_chance_outcomes` + +Returns the maximum number of distinct chance outcomes at chance nodes in the +game. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("chess") +print(game.max_chance_outcomes()) +# Outputs: 0 (no chance nodes in Chess) + +game = pyspiel.load_game("markov_soccer") +print(game.max_chance_outcomes()) +# Outputs: 4 (ball starting location, and who gets initiative) + +game = pyspiel.load_game("leduc_poker") +print(game.max_chance_outcomes()) +# Outputs: 6 (three cards in two suits) +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/game_max_game_length.md b/scenarios/bargaining/open_spiel/docs/api_reference/game_max_game_length.md new file mode 100644 index 0000000..005b2ec --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/game_max_game_length.md @@ -0,0 +1,32 @@ +# OpenSpiel game methods: max_game_length + +[Back to Core API reference](../api_reference.md) \ +
+ +`max_game_length()` + +The maximum length of any one game (in terms of number of decision nodes +visited in the game tree). + +For a simultaneous action game, this is the maximum number of joint decisions. +In a turn-based game, this is the maximum number of individual decisions summed +over all players. Outcomes of chance nodes are not included in this length. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("tic_tac_toe") +print(game.max_game_length()) # Output: 9 + +# Normal-form games always have one +game = pyspiel.load_game("blotto") +print(game.max_game_length()) # Output: 1 + +# The maximum is arbitrarily defined (and/or customizable) is some games. +game = pyspiel.load_game("coop_box_pushing") +print(game.max_game_length()) # Output: 100 +game = pyspiel.load_game("coop_box_pushing(horizon=250)") +print(game.max_game_length()) # Output: 250 +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/game_max_min_utility.md b/scenarios/bargaining/open_spiel/docs/api_reference/game_max_min_utility.md new file mode 100644 index 0000000..11ae905 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/game_max_min_utility.md @@ -0,0 +1,32 @@ +# OpenSpiel game methods: max_utility and min_utility + +[Back to Core API reference](../api_reference.md) \ +
+ +`max_utility()` \ +`min_utility()` + +Returns the maximum and minimum achievable utility (return in any given episode) +in the game. + +## Examples: + +```python +import pyspiel + +# Win/loss game +game = pyspiel.load_game("tic_tac_toe") +print(game.min_utility()) # Output: -1 +print(game.max_utility()) # Output: 1 + +# Win/los/draw game (draw counts as 0). +game = pyspiel.load_game("chess") +print(game.min_utility()) # Output: -1 +print(game.max_utility()) # Output: 1 + +# Money game. +game = pyspiel.load_game("leduc_poked") +print (game.num_distinct_actions()) +print(game.min_utility()) # Output: -13 +print(game.max_utility()) # Output: 13 +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/game_new_initial_state.md b/scenarios/bargaining/open_spiel/docs/api_reference/game_new_initial_state.md new file mode 100644 index 0000000..586a7b1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/game_new_initial_state.md @@ -0,0 +1,33 @@ +# OpenSpiel game methods: new_initial_state + +[Back to Core API reference](../api_reference.md) \ +
+ +`new_initial_state()` + +Returns a new state object representing the first state of the game. Note, in +particular, this might be a chance node (where the current player is chance) in +games with chance events. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("hex") +state = game.new_initial_state() +print(state) + +# Output: +# . . . . . . . . . . . +# . . . . . . . . . . . +# . . . . . . . . . . . +# . . . . . . . . . . . +# . . . . . . . . . . . +# . . . . . . . . . . . +# . . . . . . . . . . . +# . . . . . . . . . . . +# . . . . . . . . . . . +# . . . . . . . . . . . +# . . . . . . . . . . . +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/game_num_distinct_actions.md b/scenarios/bargaining/open_spiel/docs/api_reference/game_num_distinct_actions.md new file mode 100644 index 0000000..1c48e14 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/game_num_distinct_actions.md @@ -0,0 +1,29 @@ +# OpenSpiel game methods: num_distinct_actions + +[Back to Core API reference](../api_reference.md) \ +
+ +`num_distinct_actions()` + +Returns the number of state-independent actions in the game. Valid actions in a +game will always be between 0 and `num_distinct_actions() - 1`. This number can +be thought of as the fixed width of a policy head or Q-network. Legal actions +are always a subset of { 0, 1, ... , `num_distinct_actions() - 1` }. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("tic_tac_toe") +print(game.num_distinct_actions()) # Output: 9 + +game = pyspiel.load_game("go") +print (game.num_distinct_actions()) # Output: 362 + +game = pyspiel.load_game("chess") +print (game.num_distinct_actions()) # Output: 4672 + +game = pyspiel.load_game("leduc_poker") +print (game.num_distinct_actions()) # Output: 3 +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/game_observation_tensor_shape_size.md b/scenarios/bargaining/open_spiel/docs/api_reference/game_observation_tensor_shape_size.md new file mode 100644 index 0000000..c622a3d --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/game_observation_tensor_shape_size.md @@ -0,0 +1,26 @@ +# OpenSpiel game methods: observation_tensor_shape and observation_tensor_size + +[Back to Core API reference](../api_reference.md) \ +
+ +1. `observation_tensor_shape()` +2. `observation_tensor_size()` + +(1) Returns the observation tensor's shape: a list of integers representing the +size of each dimension. + +(2) Returns the total number of values used to represent the observation tensor. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("tic_tac_toe") +print(game.observation_tensor_shape()) +print(game.observation_tensor_size()) + +# Output: +# [3, 3, 3] +# 27 +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/game_serialize_game_and_state.md b/scenarios/bargaining/open_spiel/docs/api_reference/game_serialize_game_and_state.md new file mode 100644 index 0000000..60c590d --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/game_serialize_game_and_state.md @@ -0,0 +1,48 @@ +# OpenSpiel core functions: serialize_game_and_state + +[Back to Core API reference](../api_reference.md) \ +
+ +`serialize_game_and_state(game: pyspiel.Game, state: pyspiel.State)` + +Returns a string representation of the state and the game that created it. + +Note: pickle can also be used to serialize / deserialize data, and the pickle +uses the same serialization methods. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("tic_tac_toe") +state = game.new_initial_state() +state.apply_action(4) +state.apply_action(2) +state.apply_action(1) +state.apply_action(5) + +serialized_data = pyspiel.serialize_game_and_state(game, state) +print(serialized_data) + +game_copy, state_copy = pyspiel.deserialize_game_and_state(serialized_data) +print(state_copy) + +# Output: +# # Automatically generated by OpenSpiel SerializeGameAndState +# [Meta] +# Version: 1 +# +# [Game] +# tic_tac_toe() +# [State] +# 4 +# 2 +# 1 +# 5 +# +# +# .xo +# .xo +# ... +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/load_game.md b/scenarios/bargaining/open_spiel/docs/api_reference/load_game.md new file mode 100644 index 0000000..bd5c394 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/load_game.md @@ -0,0 +1,35 @@ +# OpenSpiel functions: load_game + +[Back to Core API reference](../api_reference.md) \ +
+ +1. `load_game(game_string: str)` +2. `load_game(game_string: str, parameters: Dict[str, Any])` + +Returns a newly-loaded game. The game string can be the short name of any game +on its own, or the short name followed by a comma-separated list of `key=value` +pairs within parentheses. + +## Examples: + +```python +import pyspiel + +# Loads the game with no/default parameters. +game1 = pyspiel.load_game("tic_tac_toe") + +# Loads the game with no/default parameters (8x8 Breakthrough) +game2 = pyspiel.load_game("breakthrough") + +# Load a three-player Kuhn poker game. +game3 = pyspiel.load_game("kuhn_poker(players=3)") + +# Load the imperfect information variant of Goofspiel with five cards, and the +# unspecified parameters get their default values (two different ways): +game4 = pyspiel.load_game("goofspiel(imp_info=True,num_cards=5,points_order=descending)") +game5 = pyspiel.load_game("goofspiel", { + "imp_info": True, + "num_cards": 5, + "points_order": "descending" +}) +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/registered_names.md b/scenarios/bargaining/open_spiel/docs/api_reference/registered_names.md new file mode 100644 index 0000000..caa0fca --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/registered_names.md @@ -0,0 +1,19 @@ +# OpenSpiel functions: registered_names + +[Back to Core API reference](../api_reference.md) \ +
+ +`registered_names()` + +Returns a list of short names of all game in the library. These are names that +can be used when loading games in `load_game`. + +## Examples: + +```python +import pyspiel + +# Print the name of all OpenSpiel games +for short_name in pyspiel.registered_names(): + print(short_name) +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/state_action_to_string.md b/scenarios/bargaining/open_spiel/docs/api_reference/state_action_to_string.md new file mode 100644 index 0000000..af1e818 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/state_action_to_string.md @@ -0,0 +1,20 @@ +# OpenSpiel state methods: action_to_string + +[Back to Core API reference](../api_reference.md) \ +
+ +`action_to_string(player: int, action: int)` + +Returns a string representation of the specified player's action. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("breakthrough") +state = game.new_initial_state() +player = state.current_player() +for action in state.legal_actions(): + print(state.action_to_string(player, action)) +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/state_apply_action.md b/scenarios/bargaining/open_spiel/docs/api_reference/state_apply_action.md new file mode 100644 index 0000000..3deb789 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/state_apply_action.md @@ -0,0 +1,43 @@ +# OpenSpiel state methods: apply_action and apply_actions + +[Back to Core API reference](../api_reference.md) \ +
+ +1. `apply_action(action: int)` +2. `apply_actions(action: List[int])` + +Apply the specified action in a turn-based game (1), or joint action (one action +per player) in a simultaneous-move game (2). + +(1) must also be called to apply chance outcomes at chance nodes. (1) can also +be called on a simultaneous player state by passing in a flat integer (which was +obtained by `legal_actions()` on a simultaneous node). + +In a simultaneous-move game, when a player has no legal actions, 0 must be +passed in for their action choice. + +For performance reasons, legality of the actions are generally not checked and +applying an illegal action (or outcome at chance nodes) can fail in unspecified +ways. + +## Examples: + +```python +import pyspiel +import numpy as np + +game = pyspiel.load_game("tic_tac_toe") +state = game.new_initial_state() +state.apply_action(4) # Player 0 takes the middle +state.apply_action(1) # Player 1 takes the top + +game = pyspiel.load_game("leduc_poker") +state = game.new_initial_state() +state.apply_action(0) # First player gets the lowest card +state.apply_action(1) # Second player gets the next lowest card +state.apply_action(1) # First player checks + +game = pyspiel.load_game("matrix_pd") # Prisoner's dilemma +state = game.new_initial_state() +state.apply_actions([1, 1]) # Defect, Defect +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/state_chance_outcomes.md b/scenarios/bargaining/open_spiel/docs/api_reference/state_chance_outcomes.md new file mode 100644 index 0000000..19f940d --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/state_chance_outcomes.md @@ -0,0 +1,36 @@ +# OpenSpiel state methods: chance_outcomes + +[Back to Core API reference](../api_reference.md) \ +
+ +`chance_outcomes()` + +Returns a list of (action, probability) tuples representing the probability +distribution over chance outcomes. + +## Examples: + +```python +import pyspiel +import numpy as np + +game = pyspiel.load_game("leduc_poker") +state = game.new_initial_state() + +# First player's private card. +print(state.chance_outcomes()) +# Output: +# [(0, 0.16666666666666666), (1, 0.16666666666666666), (2, 0.16666666666666666), (3, 0.16666666666666666), (4, 0.16666666666666666), (5, 0.16666666666666666)] +state.apply_action(0) + +# Second player's private card. +outcomes = state.chance_outcomes() +print() +# Output: +# [(1, 0.2), (2, 0.2), (3, 0.2), (4, 0.2), (5, 0.2)] + +# Sampling an outcome and applying it. +action_list, prob_list = zip(*outcomes) +action = np.random.choice(action_list, p=prob_list) +state.apply_action(action) +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/state_current_player.md b/scenarios/bargaining/open_spiel/docs/api_reference/state_current_player.md new file mode 100644 index 0000000..9cfc616 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/state_current_player.md @@ -0,0 +1,30 @@ +# OpenSpiel state methods: current_player + +[Back to Core API reference](../api_reference.md) \ +
+ +`current_player()` + +Returns the player ID of the acting player. Player IDs for actual players start +at 0 and end at `game.num_players() - 1`. There are some special player IDs that +represent the chance player, simultaneous-move nodes, and terminal states. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("tic_tac_toe") +state = game.new_initial_state() +print(state.current_player()) # Output: 0 + +game = pyspiel.load_game("leduc_poker") +state = game.new_initial_state() +print(state.current_player()) # Output: -1 (pyspiel.PlayerId.CHANCE) + +game = pyspiel.load_game("matrix_rps") +state = game.new_initial_state() +print(state.current_player()) # Output: -2 (pyspiel.PlayerId.SIMULTANEOUS) +state.apply_actions([0, 0]) # I like to Rock! Oh yeah? Well.. so do I! +print(state.current_player()) # Output: -4 (pyspiel.PlayerId.TERMINAL) +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/state_history.md b/scenarios/bargaining/open_spiel/docs/api_reference/state_history.md new file mode 100644 index 0000000..2c5dfd2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/state_history.md @@ -0,0 +1,34 @@ +# OpenSpiel state methods: history + +[Back to Core API reference](../api_reference.md) \ +
+ +`history()` + +Returns a list of actions taken by all players (including chance) from the +beginning of the game. + +In simultaneous-move games, joint actions are written out sequentially in player +ID order. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("kuhn_poker") +state = game.new_initial_state() +state.apply_action(0) # First player gets the Jack +state.apply_action(1) # Second player gets the Queen +state.apply_action(0) # First player passes (check) +state.apply_action(1) # Second player bets (raise) + +print(state.history()) +# Output: [0, 1, 0, 1] + +game = pyspiel.load_game("matrix_pd") +state = game.new_initial_state() +state.apply_actions([0, 1]) # Cooperate, Defect +print(state.history()) +# Output: [0, 1] +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/state_information_state_string.md b/scenarios/bargaining/open_spiel/docs/api_reference/state_information_state_string.md new file mode 100644 index 0000000..d390e70 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/state_information_state_string.md @@ -0,0 +1,31 @@ +# OpenSpiel state methods: information_state_string + +[Back to Core API reference](../api_reference.md) \ +
+ +1. `information_state_string()` +2. `information_state_string(player: int)` + +Returns a string representation of the information state, for (1) the current +player, or (2) the specified player. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("kuhn_poker") +state = game.new_initial_state() +state.apply_action(0) # Deal first player the Jack, +state.apply_action(1) # and second player the Queen +state.apply_action(0) # First player passes (check) +state.apply_action(1) # Second player bets (raise) + +# Player 0's turn. +print(state.information_state_string()) +print(state.information_state_string(1)) + +# Output: +# 0pb +# 1pb +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/state_information_state_tensor.md b/scenarios/bargaining/open_spiel/docs/api_reference/state_information_state_tensor.md new file mode 100644 index 0000000..573e0f0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/state_information_state_tensor.md @@ -0,0 +1,32 @@ +# OpenSpiel state methods: information_state_tensor + +[Back to Core API reference](../api_reference.md) \ +
+ +1. `information_state_tensor()` +2. `information_state_tensor(player: int)` + +Returns information state tensor (a list of values) for (1) the current player, +or (2) the specified player. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("kuhn_poker") +state = game.new_initial_state() +state.apply_action(0) # Deal first player the Jack, +state.apply_action(1) # and second player the Queen +state.apply_action(0) # First player passes (check) +state.apply_action(1) # Second player bets (raise) + +# Player 0's turn. +print(state.information_state_tensor()) +print(state.information_state_tensor(1)) + +# Tensors differ in the observing player and the card obtained. +# Output: +# [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0] +# [0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0] +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/state_is_chance_node.md b/scenarios/bargaining/open_spiel/docs/api_reference/state_is_chance_node.md new file mode 100644 index 0000000..bad362f --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/state_is_chance_node.md @@ -0,0 +1,26 @@ +# OpenSpiel state methods: is_chance_node + +[Back to Core API reference](../api_reference.md) \ +
+ +`is_chance_node()` + +Returns True if the state represents a chance node, False otherwise. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("tic_tac_toe") +state = game.new_initial_state() +print(state.is_chance_node()) # Output: False + +game = pyspiel.load_game("leduc_poker") +state = game.new_initial_state() +print(state.is_chance_node()) # Output: True + +game = pyspiel.load_game("matrix_sh") +state = game.new_initial_state() +print(state.is_chance_node()) # Output: False +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/state_is_simultaneous_node.md b/scenarios/bargaining/open_spiel/docs/api_reference/state_is_simultaneous_node.md new file mode 100644 index 0000000..00764e3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/state_is_simultaneous_node.md @@ -0,0 +1,32 @@ +# OpenSpiel state methods: is_simultaneous_node + +[Back to Core API reference](../api_reference.md) \ +
+ +`is_simultaneous_node()` + +Returns True if the state represents a simultaneous player node (where all +players act simultaneously), False otherwise. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("tic_tac_toe") +state = game.new_initial_state() +print(state.is_simultaneous_node()) # Output: False + +game = pyspiel.load_game("matrix_mp") +state = game.new_initial_state() +print(state.is_simultaneous_node()) # Output: True + +# Simultaneous-move game that start at a chance node. +game = pyspiel.load_game("markov_soccer") +state = game.new_initial_state() +print(state.is_simultaneous_node()) # Output: False +print(state.legal_actions()) +state.apply_action(state.legal_actions()[0]) # Apply first legal chance outcome. +print(state.is_simultaneous_node()) # Output: True + +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/state_is_terminal.md b/scenarios/bargaining/open_spiel/docs/api_reference/state_is_terminal.md new file mode 100644 index 0000000..76c444b --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/state_is_terminal.md @@ -0,0 +1,24 @@ +# OpenSpiel state methods: is_terminal + +[Back to Core API reference](../api_reference.md) \ +
+ +`is_terminal()` + +Returns True if the state is terminal (the game has ended), False otherwise. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("tic_tac_toe") +state = game.new_initial_state() +print(state.is_terminal()) # Output: False + +game = pyspiel.load_game("matrix_rps") +state = game.new_initial_state() +print(state.is_terminal()) # Output: False +state.apply_actions([1, 1]) +print(state.is_terminal()) # Output: True +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/state_legal_actions.md b/scenarios/bargaining/open_spiel/docs/api_reference/state_legal_actions.md new file mode 100644 index 0000000..ea9b62b --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/state_legal_actions.md @@ -0,0 +1,36 @@ +# OpenSpiel state methods: legal_actions + +[Back to Core API reference](../api_reference.md) \ +
+ +1. `legal_actions()` +2. `legal_actions(player: int)` + +Returns the list of legal actions (integers between 0 and +`game.num_distinct_actions() - 1`) for (1) the current player, or (2) the +specified player. + +When called on a chance node, returns the legal chance outcomes without their +corresponding probabilities. + +When called on a simultaneous node, returns the set of legal joint actions +represented as flat integers, which can then be passed to `apply_action`. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("tic_tac_toe") +state = game.new_initial_state() +print(state.legal_actions()) +# Output: [0, 1, 2, 3, 4, 5, 6, 7, 8] + +game = pyspiel.load_game("matrix_pd") +state = game.new_initial_state() +print(state.legal_actions(0)) # row player +print(state.legal_actions(1)) # column player +# Output: +# [0, 1] +# [0, 1] +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/state_observation_string.md b/scenarios/bargaining/open_spiel/docs/api_reference/state_observation_string.md new file mode 100644 index 0000000..831af52 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/state_observation_string.md @@ -0,0 +1,46 @@ +# OpenSpiel state methods: observation_string + +[Back to Core API reference](../api_reference.md) \ +
+ +1. `observation_string()` +2. `observation_string(player: int)` + +Returns a string representation of the observation, for (1) the current player, +or (2) the specified player. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("breakthrough") +state = game.new_initial_state() +print(state.action_to_string(0, 148)) # Output: e7f6 +state.apply_action(148) + +print(state.observation_string()) +# Output: +# 8bbbbbbbb +# 7bbbb.bbb +# 6.....b.. +# 5........ +# 4........ +# 3........ +# 2wwwwwwww +# 1wwwwwwww +# abcdefgh + +# Perfect information game, same observation for both players. +print(state.observation_string(0)) +# Output: +# 8bbbbbbbb +# 7bbbb.bbb +# 6.....b.. +# 5........ +# 4........ +# 3........ +# 2wwwwwwww +# 1wwwwwwww +# abcdefgh +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/state_observation_tensor.md b/scenarios/bargaining/open_spiel/docs/api_reference/state_observation_tensor.md new file mode 100644 index 0000000..af471c4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/state_observation_tensor.md @@ -0,0 +1,45 @@ +# OpenSpiel state methods: observation_tensor + +[Back to Core API reference](../api_reference.md) \ +
+ +1. `observation_tensor()` +2. `observation_tensor(player: int)` + +Returns observation tensor (a list of values) for (1) the current player, or (2) +the specified player. + +## Examples: + +```python +import pyspiel +import numpy as np + +game = pyspiel.load_game("tic_tac_toe") +state = game.new_initial_state() +state.apply_action(4) # Middle +state.apply_action(2) # Top-right + +# Player 0's turn. +shape = game.observation_tensor_shape() +print(state.observation_tensor()) +print(state.observation_tensor(0)) + +# First dimension interpreted as selecting from 2D planes of { empty, O, X }. +print(np.reshape(np.asarray(state.observation_tensor()), shape)) + +# Output: +# [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0] +# [0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0] +# [[[1. 1. 0.] +# [1. 0. 1.] +# [1. 1. 1.]] +# +# [[0. 0. 1.] +# [0. 0. 0.] +# [0. 0. 0.]] +# +# [[0. 0. 0.] +# [0. 1. 0.] +# [0. 0. 0.]]] +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/state_returns.md b/scenarios/bargaining/open_spiel/docs/api_reference/state_returns.md new file mode 100644 index 0000000..fc1515e --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/state_returns.md @@ -0,0 +1,33 @@ +# OpenSpiel state methods: returns + +[Back to Core API reference](../api_reference.md) \ +
+ +`returns()` + +Returns the list of returns (cumulated reward from the start of the game): one +value per player. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("tic_tac_toe") +state = game.new_initial_state() + +# Play out a win for 'x'. +state.apply_action(4) +state.apply_action(1) +state.apply_action(2) +state.apply_action(5) +state.apply_action(6) +print(state) +print(state.returns()) + +# Output: +# .ox +# .xo +# x.. +# [1.0, -1.0] +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/state_rewards.md b/scenarios/bargaining/open_spiel/docs/api_reference/state_rewards.md new file mode 100644 index 0000000..3d44d10 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/state_rewards.md @@ -0,0 +1,30 @@ +# OpenSpiel state methods: rewards + +[Back to Core API reference](../api_reference.md) \ +
+ +`rewards()` + +Returns the list of intermediate rewards (rewards obtained since the last time +the player acted): one value per player. Note that for many games in OpenSpiel, +this function will return zeroes unless the state is terminal. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("matrix_pd") +state = game.new_initial_state() + +# Defect, Defect +state.apply_actions([1, 1]) + +# Rewards and returns equal in this case +print(state.rewards()) +print(state.returns()) + +# Output: +# [1.0, 1.0] +# [1.0, 1.0] +``` diff --git a/scenarios/bargaining/open_spiel/docs/api_reference/state_serialize.md b/scenarios/bargaining/open_spiel/docs/api_reference/state_serialize.md new file mode 100644 index 0000000..15ef597 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/api_reference/state_serialize.md @@ -0,0 +1,30 @@ +# OpenSpiel state methods: serialize + +[Back to Core API reference](../api_reference.md) \ +
+ +`serialize()` + +Returns a string representation of the state be used to reconstruct the state. +By default, it is a string list of each action taken in the history. + +## Examples: + +```python +import pyspiel + +game = pyspiel.load_game("tic_tac_toe") +state = game.new_initial_state() +state.apply_action(4) +state.apply_action(2) +state.apply_action(1) +state.apply_action(5) + +state_copy = game.deserialize_state(state.serialize()) +print(state_copy) + +# Output: +# .xo +# .xo +# ... +``` diff --git a/scenarios/bargaining/open_spiel/docs/authors.md b/scenarios/bargaining/open_spiel/docs/authors.md new file mode 100644 index 0000000..02457a8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/authors.md @@ -0,0 +1,47 @@ +# Authors + +Names are ordered lexicographically. Typo or similar contributors are omitted. + +## OpenSpiel contributors + +- Bart De Vylder +- Edward Hughes +- Edward Lockhart +- Daniel Hennes +- David Ding +- Dustin Morrill +- Elnaz Davoodi +- Finbarr Timbers +- Ivo Danihelka +- Jean-Baptiste Lespiau +- Janos Kramar +- Jonah Ryan-Davis +- Julian Schrittwieser +- Julien Perolat +- Karl Tuyls +- Manuel Kroiss +- Marc Lanctot +- Matthew Lai +- Michal Sustr +- Raphael Marinier +- Paul Muller +- Ryan Faulkner +- Satyaki Upadhyay +- Sebastian Borgeaud +- Sertan Girgin +- Shayegan Omidshafiei +- Srinivasan Sriram +- Thomas Anthony +- Thomas Köppe +- Timo Ewalds +- Vinicius Zambaldi + +## OpenSpiel with Swift for Tensorflow (now removed) + +- James Bradbury +- Brennan Saeta +- Dan Zheng + +## External contributors + +See https://github.com/deepmind/open_spiel/graphs/contributors. diff --git a/scenarios/bargaining/open_spiel/docs/concepts.md b/scenarios/bargaining/open_spiel/docs/concepts.md new file mode 100644 index 0000000..d6ba376 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/concepts.md @@ -0,0 +1,125 @@ +## First examples + +One can run an example of a game running (in the `build/` folder): + +```bash +./examples/example --game=tic_tac_toe +``` + +Similar examples using the Python API (run from one above `build`): + +```bash +# Similar to the C++ example: +python3 open_spiel/python/examples/example.py --game_string=breakthrough + +# Play a game against a random or MCTS bot: +python3 open_spiel/python/examples/mcts.py --game=tic_tac_toe --player1=human --player2=random +python3 open_spiel/python/examples/mcts.py --game=tic_tac_toe --player1=human --player2=mcts +``` + +## Concepts + +The following documentation describes the high-level concepts. Refer to the code +comments for specific API descriptions. + +Note that, in English, the word "game" is used for both the description of the +rules (e.g. the game of chess) and for a specific instance of a playthrough +(e.g. "we played a game of chess yesterday"). We will be using "playthrough" or +"trajectory" to refer to the second concept. + +The methods names are in `CamelCase` in C++ and `snake_case` in Python without +any other difference (e.g. `state.ApplyAction` in C++ will be +`state.apply_action` in Python). + +### The tree representation + +There are mainly 2 concepts to know about (defined in +[open_spiel/spiel.h](https://github.com/deepmind/open_spiel/blob/master/open_spiel/spiel.h)): + +* A `Game` object contains the high level description for a game (e.g. whether + it is simultaneous or sequential, the number of players, the maximum and + minimum scores). +* A `State`, which describes a specific point (e.g. a specific board position + in chess, a specific set of player cards, public cards and past bets in + Poker) within a trajectory. + +All possible trajectories in a game are represented as a tree. In this tree, a +node is a `State` and is associated to a specific history of moves for all +players. Transitions are actions taken by players (in case of a simultaneous +node, the transition is composed of the actions for all players). + +Note that in most games, we deal with chance (i.e. any source of randomness) +using a an explicit player (the "chance" player, which has id +`kChancePlayerId`). For example, in Poker, the root state would just be the +players without any cards, and the first transitions will be chance nodes to +deal the cards to the players (in practice once card is dealt per transition). + +See `spiel.h` for the full API description. For example, +`game.NewInitialState()` will return the root `State`. Then, +`state.LegalActions()` can be used to get the possible legal actions and +`state.ApplyAction(action)` can be used to update `state` in place to play the +given `action` (use `state.Child(action)` to create a new state and apply the +action to it). + +## Loading a game + +The games are all implemented in C++ in [open_spiel/games](https://github.com/deepmind/open_spiel/blob/master/open_spiel/games). +Available games names can be listed using `RegisteredNames()`. + +A game can be created from its name and its arguments (which usually have +defaults). There are 2 ways to create a game: + +* Using the game name and a structured `GameParameters` object (which, in + Python, is a dictionary from argument name to compatible types (int, bool, + str or a further dict). e.g. `{"players": 3}` with `LoadGame`. +* Using a string representation such as `kuhn_poker(players=3)`, giving + `LoadGame(kuhn_poker(players=3))`. See `open_spiel/game_parameters.cc` for + the exact syntax. + +#### Creating sequential games from simultaneous games + +It is possible to apply generic game transformations (see +[open_spiel/game_transforms/](https://github.com/deepmind/open_spiel/blob/master/open_spiel/game_transforms/)) such as loading an `n`-players +simultaneous games into an equivalent turn-based game where simultaneous moves +are encoded as `n` turns. + +One can use `LoadGameAsTurnBased(game)`, or use the string representation, such +as +`turn_based_simultaneous_game(game=goofspiel(imp_info=True,num_cards=4,points_order=descending))`. + +## Playing a trajectory + +Here are for example the Python code to play one trajectory: + +```python +import random +import pyspiel +import numpy as np + +game = pyspiel.load_game("kuhn_poker") +state = game.new_initial_state() +while not state.is_terminal(): + legal_actions = state.legal_actions() + if state.is_chance_node(): + # Sample a chance event outcome. + outcomes_with_probs = state.chance_outcomes() + action_list, prob_list = zip(*outcomes_with_probs) + action = np.random.choice(action_list, p=prob_list) + state.apply_action(action) + else: + # The algorithm can pick an action based on an observation (fully observable + # games) or an information state (information available for that player) + # We arbitrarily select the first available action as an example. + action = legal_actions[0] + state.apply_action(action) +``` + +See [open_spiel/python/examples/example.py](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/examples/example.py) for a more +thorough example that covers more use of the core API. + +See [open_spiel/python/examples/playthrough.py](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/examples/playthrough.py) (and +[open_spiel/python/algorithms/generate_playthrough.py](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/algorithms/generate_playthrough.py)) for an +richer example generating a playthrough and printing all available information. + +In C++, see [open_spiel/examples/example.cc](https://github.com/deepmind/open_spiel/blob/master/open_spiel/examples/example.cc) which generates +random trajectories. diff --git a/scenarios/bargaining/open_spiel/docs/conf.py b/scenarios/bargaining/open_spiel/docs/conf.py new file mode 100644 index 0000000..0181aa3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/conf.py @@ -0,0 +1,212 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Configuration for Sphinx.""" + +# -*- coding: utf-8 -*- +# +# Configuration file for the Sphinx documentation builder. +# +# This file does only contain a selection of the most common options. For a +# full list see the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + +# -- Project information ----------------------------------------------------- + +project = 'OpenSpiel' +copyright = '2019, DeepMind Technologies Ltd' # pylint: disable=redefined-builtin +author = 'DeepMind Technologies Ltd' + +# The short X.Y version +version = '' +# The full version, including alpha/beta/rc tags +release = '' + +# -- General configuration --------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', + 'sphinx.ext.imgmath', + 'sphinx.ext.ifconfig', + 'sphinx.ext.viewcode', + 'sphinx.ext.githubpages', + 'recommonmark', + 'sphinx_markdown_tables', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = { + '.rst': 'restructuredtext', + '.txt': 'markdown', + '.md': 'markdown', +} + +# The master toctree document. +master_doc = 'index' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path . +exclude_patterns = [ + '_build', 'Thumbs.db', '.DS_Store', '*README.md', + 'requirements.readthedocs.txt' +] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. The default is `alabaster`. +# +html_theme = 'sphinx_rtd_theme' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# See https://sphinx-rtd-theme.readthedocs.io/en/stable/configuring.html +html_theme_options = { + 'collapse_navigation': False, +} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# The default sidebars (for documents that don't match any pattern) are +# defined by theme itself. Builtin themes are using these templates by +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', +# 'searchbox.html']``. +# +# html_sidebars = {} + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = 'open_spieldoc' + +# -- Options for LaTeX output ------------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'open_spiel.tex', 'open\\_spiel Documentation', + 'The open\\_spiel authors', 'manual'), +] + +# -- Options for manual page output ------------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [(master_doc, 'open_spiel', 'open_spiel Documentation', [author], 1) + ] + +# -- Options for Texinfo output ---------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'open_spiel', 'open_spiel Documentation', author, 'open_spiel', + 'One line description of project.', 'Miscellaneous'), +] + +# -- Options for Epub output ------------------------------------------------- + +# Bibliographic Dublin Core info. +epub_title = project +epub_author = author +epub_publisher = author +epub_copyright = copyright + +# The unique identifier of the text. This can be a ISBN number +# or the project homepage. +# +# epub_identifier = '' + +# A unique identification for the text. +# +# epub_uid = '' + +# A list of files that should not be packed into the epub file. +epub_exclude_files = ['search.html'] + +# -- Extension configuration ------------------------------------------------- + +# -- Options for intersphinx extension --------------------------------------- + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = {'https://docs.python.org/': None} + +# -- Options for todo extension ---------------------------------------------- + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = True diff --git a/scenarios/bargaining/open_spiel/docs/contributing.md b/scenarios/bargaining/open_spiel/docs/contributing.md new file mode 100644 index 0000000..1c865b9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/contributing.md @@ -0,0 +1,132 @@ +# Guidelines + +Above all, OpenSpiel is designed to be easy to install and use, easy to +understand, easy to extend (“hackable”), and general/broad. OpenSpiel is built +around two major important design criteria: + +- **Keep it simple.** Simple choices are preferred to more complex ones. The + code should be readable, usable, extendable by non-experts in the + programming language(s), and especially to researchers from potentially + different fields. OpenSpiel provides reference implementations that are used + to learn from and prototype with, rather than fully-optimized / + high-performance code that would require additional assumptions (narrowing + the scope / breadth) or advanced (or lower-level) language features. + +- **Keep it light.** Dependencies can be problematic for long-term + compatibility, maintenance, and ease-of- use. Unless there is strong + justification, we tend to avoid introducing dependencies to keep things easy + to install and more portable. + +# Support expectations + +We, the OpenSpiel authors, definitely engage in supporting the community. As it +can be time-consuming, we try to find a good balance between ensuring we are +responsive and being able to continue to do our day-to-day work and research. + +Generally speaking, if you are willing to get a specific feature implemented, +the most effective way is to implement it and send a Pull Request. For large +changes, or ones involving design decisions, open a bug to check the idea is ok +first. + +The higher the quality, the easier it will be to be accepted. For instance, +following the +[C++ Google style guide](https://google.github.io/styleguide/cppguide.html) and +[Python Google style guide](http://google.github.io/styleguide/pyguide.html) +will help with the integration. + +As examples, MacOS support, Window support, example improvements, various +bug-fixes or new games has been straightforward to be included and we are very +thankful to everyone who helped. + +## Bugs + +We aim to answer bugs at a reasonable pace, several times a week. However, for +bugs involving large changes (e.g. adding new games, adding public state +supports) we cannot commit to implementing it and encourage everyone to +contribute directly. + +## Pull requests + +You can expect us to answer/comment back and you will know from the comment if +it will be merged as is or if it will need additional work. + +For pull requests, they are merged as batches to be more efficient, at least +every two weeks (for bug fixes, it will likely be faster to be integrated). So +you may need to wait a little after it has been approved to actually see it +merged. + +# OpenSpiel visual Graph + +To help you understand better the framework as a whole you can go to +[openspielgraph](https://openspielgraph.netlify.app) and use an interactive +graph that shows the OpenSpiel repository in a wide and easy to undestand way. + +By providing intuitive visual representations, it simplifies the debugging +process, aids in the optimization of algorithms, and fosters a more efficient +workflow. + +For a practical example, see one of the reasons OpenSpielGraph was thought of +and also how to use OpenSpiel and WebAssembly... + +# Roadmap and Call for Contributions + +Contributions to this project must be accompanied by a Contributor License +Agreement (CLA). See +[CONTRIBUTING.md](https://github.com/deepmind/open_spiel/blob/master/CONTRIBUTING.md) +for the details. + +Here, we outline our current highest priorities: this is where we need the most +help. There are also suggestion for larger features and research projects. Of course, +all contributions are welcome. + +Before making a contribution to OpenSpiel, please read the guidelines. We also +kindly request that you contact us before writing any large piece of code, in +case (a) we are already working on it and/or (b) it's something we have already +considered and may have some design advice on its implementation. Please also +note that some games may have copyrights which might require legal approval. +Otherwise, happy hacking! + +- **Long-term and Ongoing Maintenance**. This is the most important way to help. + Having OpenSpiel bug-free and working smoothly is the highest priority. Things + can stop working for a variety of reasons due to version changes and backward + incompatibility, but also due to discovering new problems that require some time + to fix. To see these items, look for issues with the "help wanted" tag on the + [Issues page](https://github.com/google-deepmind/open_spiel/issues). + +- **New Features and Algorithms**. There are regular requests for new features + and algorithms that we just don't have time to provide. Look for issues with the + "contribution welcome" tag on the + [Issues page](https://github.com/google-deepmind/open_spiel/issues). + +- **Windows support**. Native Windows support was added in early 2022, but + remains experimental and only via building from source. It would be nice to + have Github Actions CI support on Windows to ensure that Windows support is + actively maintained, and eventually support installing OpenSpiel via pip on + Windows as well. The tool that builds the binary wheels (cibuildwheel) + already supports Windows as a target platform. + +- **Visualizations of games**. There exists an interactive viewer for + OpenSpiel games called [SpielViz](https://github.com/michalsustr/spielviz). + Contributions to this project, and more visualization tools with OpenSpiel, + are very welcome as they could help immensely with debugging and testing + the AI beyond the console. + +- **Structured Action Spaces**. Currently, actions are integers between 0 and + some value. There is no easy way to interpret what each action means in a + game-specific way. Nor is there any way to easily represent a composite + action in terms of its parts. A structured action space could represent + actions as a sequence of values (like information states and observations-- + and can also include shapes) which can be learned instead of mappings to + flat numbers. Then, each game could have a mapping from the structured + action to the action taken. + +- **APIs for other languages** (Go, Rust, Julia). We currently have these + supported but little beyond the core API and random simulation tests. Several + are very basic (or experimental). It would be nice to properly support these + by having a few simple algorithms run via the bindings on OpenSpiel games. + +- **New Games**. New games are always welcome. If you do not have one in mind, + check out the + [Call for New Games](https://github.com/google-deepmind/open_spiel/issues/843) + issue. + diff --git a/scenarios/bargaining/open_spiel/docs/developer_guide.md b/scenarios/bargaining/open_spiel/docs/developer_guide.md new file mode 100644 index 0000000..b7796bc --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/developer_guide.md @@ -0,0 +1,193 @@ +## The code structure + +Generally speaking, the directories directly under `open_spiel` are C++ (except +for `integration_tests` and `python`). A similar structure is available in +`open_spiel/python`, containing the Python equivalent code. + +Some top level directories are special: + +* `open_spiel/integration_tests`: Generic (python) tests for all the games. +* `open_spiel/tests`: The C++ common test utilities. +* `open_spiel/scripts`: The scripts useful for development (building, running + tests, etc). + +For example, we have for C++: + +* `open_spiel/`: Contains the game abstract C++ API. +* `open_spiel/games`: Contains the games C++ implementations. +* `open_spiel/algorithms`: The C++ algorithms implemented in OpenSpiel. +* `open_spiel/examples`: The C++ examples. +* `open_spiel/tests`: The C++ common test utilities. + +For Python you have: + +* `open_spiel/python/examples`: The Python examples. +* `open_spiel/python/algorithms/`: The Python algorithms. + +## C++ and Python implementations. + +Some objects (e.g. `Policy`, `CFRSolver`, `BestResponse`) are available both in +C++ and Python. The goal is to be able to use C++ objects in place of Python +objects for most of the cases. In particular, for the objects that are well +supported, expect to have in the test for the Python object, a test checking +that both the C++ and the Python implementation behave the same. + +## Adding a game + +We describe here only the simplest and fastest way to add a new game. It is +ideal to first be aware of the general API (see `open_spiel/spiel.h`). These +guidelines primarily assume C++ games; the process is analogous for Python +games and any special considerations are noted in the steps. + +1. Choose a game to copy from in `open_spiel/games/` (or + `open_spiel/python/games/`). Suggested + games: Tic-Tac-Toe and Breakthrough for perfect information without chance + events, Backgammon or Pig for perfect information games with chance events, + Goofspiel and Oshi-Zumo for simultaneous move games, and Leduc poker and + Liar’s dice for imperfect information games. For the rest of these steps, we + assume Tic-Tac-Toe. +2. Copy the header and source: `tic_tac_toe.h`, `tic_tac_toe.cc`, and + `tic_tac_toe_test.cc` to `new_game.h`, `new_game.cc`, and `new_game_test.cc` + (or `tic_tac_toe.py` and `tic_tac_toe_test.py`). +3. Configure CMake: + * If you are working with C++: add the new game’s source files to + `open_spiel/games/CMakeLists.txt`. + * If you are working with C++: add the new game’s test target to + `open_spiel/games/CMakeLists.txt`. + * If you are working with Python: add the test to + `open_spiel/python/CMakeLists.txt` and import it in + `open_spiel/python/games/__init__.py` +4. Update boilerplate C++/Python code: + * In `new_game.h`, rename the header guard at the the top and bottom of + the file. + * In the new files, rename the inner-most namespace from `tic_tac_toe` to + `new_game`. + * In the new files, rename `TicTacToeGame` and `TicTacToeState` to + `NewGameGame` and `NewGameState`. + * At the top of `new_game.cc`, change the short name to `new_game` and + include the new game’s header. +5. Update Python integration tests: + * Add the short name to the list of expected games in + `open_spiel/python/tests/pyspiel_test.py`. +6. You should now have a duplicate game of Tic-Tac-Toe under a different name. + It should build and the test should run, and can be verified by rebuilding + and running the example `build/examples/example --game=new_game`. Note: + Python games cannot be run using this example; use + `open_spiel/python/examples/example.py` instead. +7. Now, change the implementations of the functions in `NewGameGame` and + `NewGameState` to reflect your new game’s logic. Most API functions should + be clear from the game you copied from. If not, each API function that is + overridden will be fully documented in superclasses in `open_spiel/spiel.h`. +8. To test the game as it is being built, you can play test the functionality + interactively using `ConsolePlayTest` in + `open_spiel/tests/console_play_test.h`. At the very least, the test should + include some random simulation tests (see other game's tests for an + example). Note: Python games cannot be tested using `ConsolePlayTest`, + however both C++ and Python games can also be tested on the console using + `open_spiel/python/examples/mcts_example` with human players. +9. Run your code through a linter so it conforms to Google's + [style guides](https://google.github.io/styleguide/). For C++ use + [cpplint](https://pypi.org/project/cpplint/). For Python, use + [pylint](https://pypi.org/project/pylint/) with the + [pylintrc from the Google style guide](https://google.github.io/styleguide/pyguide.html). + There is also [YAPF](https://github.com/google/yapf/) for Python as well. +10. Once done, rebuild and rerun the tests to ensure everything passes + (including your new game’s test!). +11. Add a playthrough file to catch regressions: + * Run `./open_spiel/scripts/generate_new_playthrough.sh new_game` to + generate a random game, to be used by integration tests to prevent any + regression. `open_spiel/integration_tests/playthrough_test.py` will + automatically load the playthroughs and compare them to newly generated + playthroughs. + * If you have made a change that affects playthroughs, run + `./scripts/regenerate_playthroughs.sh` to update them. + +## Conditional dependencies + +The goal is to make it possible to optionally include external dependencies and +build against them. The setup was designed to met the following needs: + +- **Single source of truth**: We want a single action to be sufficient to + manage the conditional install and build. Thus, we use bash environment + variables, that are read both by the install script (`install.sh`) to know + whether we should clone the dependency, and by CMake to know whether we + should include the files in the target. Tests can also access the bash + environment variable. +- **Light and safe defaults**: By default, we exclude the dependencies to + diminish install time and compilation time. If the bash variable is unset, + we download the dependency and we do not build against it. +- **Respect the user-defined values**: The `global_variables.sh` script, which + is included in all the scripts that needs to access the constant values, do + not override the constants but set them if and only if they are undefined. + This respects the user-defined values, e.g. on their `.bashrc` or on the + command line. + +When you add a new conditional dependency, you need to touch: + +- the root CMakeLists.txt to add the option, with an OFF default +- add the option to `scripts/global_variables.sh` +- change `install.sh` to make sure the dependency is installed +- use constructs like `if (${OPEN_SPIEL_BUILD_WITH_HANABI})` in CMake to + optionally add the targets to build. + +## Debugging tools + +For complex games it may be tricky to get all the details right. Reading through +the playthrough (or visually inspecting random games via the example) is the +first step in verifying the game mechanics. You can visualize small game trees +using [open_spiel/python/examples/treeviz_example.py](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/examples/treeviz_example.py) or for +large games there is an interactive viewer for OpenSpiel games called +[SpielViz](https://github.com/michalsustr/spielviz). + +## Adding Game-Specific Functionality + +OpenSpiel focuses on maintaining a general API to an underlying suite of games, +but sometimes it is convenient to work on specific games. In this section, we +describe how to get (or set) game-specific information from/to the generic state +objects, and how to expose these functions to python. + +Suppose, for example, we want to look at (or set) the private cards in a game of +Leduc poker. We will use an example based on this +[this commit](https://github.com/deepmind/open_spiel/commit/4cd1e5889e447d285eb3f16901ccab5c14e62187). + +1. First, locate the game you want to access. The game implementations are in + the `games/` subdirectory and have two main files: e.g. `leduc_poker.h` + (header) and `leduc_poker.cc` (implementation). +2. For simple accessor methods that just return the information and feel free + have the full implementation to the game's header file (e.g. + `LeducState::GetPrivateCards`). You can also declare the function in the + header and provide the implementation in source file (e.g. + `LeducPoker::SetPrivateCards`). +3. That's it for the core game logic. To expose these methods to Python, add + them to the Python module (via pybind11). Some games already have + game-specific functionality, so if a files named `games_leduc_poker.h` and + `games_leduc_poker.cc` exist within `python/pybind11`, add to them (skip to + Step 5). +4. If the games-specific files do not exist for your game of interest, then: + * Add the files. Copy one of the other ones, adapt the names, and remove + most of the bindings code. + * Add the new files to the `PYTHON_BINDINGS` list in + `python/CMakeFiles.txt`. + * Modify `pyspiel.cc`: include the header at the top, and call the init + function at the bottom. +5. Add the custom methods to the game-specific python bindings + (`games_leduc_poker.cc`, i.e. `LeducPoker::GetPrivateCards` and + `LeducPoker::SetPrivateCards`). For simple types, this should be relatively + straight-forward; you can see how by looking at the other game-specific + functions. For complex types, you may have to bind additional code (see e.g. + `games_backgammon.cc`). If it is unclear, do not hesitate to ask, but also + please check the + [pybind11 documentation](https://pybind11.readthedocs.io/en/stable/). +6. Add a simple test to `python/games_sim_test.py` to check that it worked. For + inspiration, see e.g. `test_leduc_get_and_set_private_cards`. + +## Language APIs + +There are four other language APIs that expose functionality from the C++ core. + +- [Python](https://github.com/deepmind/open_spiel/tree/master/open_spiel/python). +- [Julia](https://github.com/deepmind/open_spiel/tree/master/open_spiel/julia) +- [Go](https://github.com/deepmind/open_spiel/tree/master/open_spiel/go) + (unmaintained) +- [Rust](https://github.com/deepmind/open_spiel/tree/master/open_spiel/rust) + (unmaintained) diff --git a/scenarios/bargaining/open_spiel/docs/fix_table_links.sh b/scenarios/bargaining/open_spiel/docs/fix_table_links.sh new file mode 100755 index 0000000..ba9b332 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/fix_table_links.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# +# Copyright 2022 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Links to sub documents within tables are not properly converted. +# E.g. a reference to a separate markdown table is not converted to the +# corresponding .html in Read the Docs. +# +# This is an open issue with sphinx-markdown-tables, see +# https://github.com/ryanfox/sphinx-markdown-tables/issues/18 + +if [[ "$READTHEDOCS" = "True" ]]; then + # Fix the links pre-build. In this case, edit the markdown file rather than + # the resulting HTML + FILE="docs/api_reference.md" + if [[ "$1" != "" ]]; then + FILE="$1" + fi + sed -E 's/\[Python\]\((.*).md\)/\[Python\]\(\1.html\)/g' -i ${FILE} +else + # Fix the links post-build: rewrite the HTML after it's been generated. Was + # not able to get this to work on Read the Docs. + FILE="_build/html/api_reference.html" + if [[ "$1" != "" ]]; then + FILE="$1" + fi + sed -E 's/a href="(.*)\.md"/a href="\1\.html"/g' -i ${FILE} +fi + diff --git a/scenarios/bargaining/open_spiel/docs/games.md b/scenarios/bargaining/open_spiel/docs/games.md new file mode 100644 index 0000000..848a655 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/games.md @@ -0,0 +1,95 @@ +# Available games + +Statuses: + +- 🟢: thoroughly-tested. In many cases, we verified against known values and/or reproduced results from papers. +- 🔶: implemented but lightly tested. +- ❌: known issues (see notes below and code for details). + +Status | Game | Players | Deterministic | Perfect info | Description +---------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------- | ------- | -------------- | ------------ | ----------- +🔶 | [2048](https://en.wikipedia.org/wiki/2048_\(video_game\)) | 1 | ❌ | ✅ | A single player game where player aims to create a 2048 tile by merging other tiles. +🔶 | [Amazons](https://en.wikipedia.org/wiki/Game_of_the_Amazons) | 2 | ✅ | ✅ | Move pieces on a board trying to block opponents from moving. +🔶 | [Atari](https://en.wikipedia.org/wiki/Atari) | 1 | ❌ (most games) | ✅ | Agent plays classic games from [Gym's Atari Environments](https://www.gymlibrary.dev/environments/atari/), such as Breakout. +🟢 | [Backgammon](https://en.wikipedia.org/wiki/Backgammon) | 2 | ❌ | ✅ | Players move their pieces through the board based on the rolls of dice. +🔶 | Bargaining | 2 | ❌ | ❌ | Agents negotiate for items in a pool with different (hidden) valuations. References: [DeVault et al. '15](https://www.aaai.org/ocs/index.php/SSS/SSS15/paper/viewFile/10335/10100). [Lewis et al. '17](https://arxiv.org/abs/1706.05125). +🔶 | [Battleship](https://en.wikipedia.org/wiki/Battleship_\(game\)) | 2 | ✅ | ❌ | Players place ships and shoot at each other in turns. References: [Farina et al. '19, Correlation in Extensive-Form Games: Saddle-Point Formulation and Benchmarks](https://papers.nips.cc/paper/9122-correlation-in-extensive-form-games-saddle-point-formulation-and-benchmarks.pdf). +🔶 | [Blackjack](https://en.wikipedia.org/wiki/Blackjack) | 1 | ❌ | ❌ | Simplified version of blackjack, with only HIT/STAND moves. +🔶 | [Block Dominoes](https://en.wikipedia.org/wiki/Dominoes) | 2 | ❌ | ❌ | Most simple version of dominoes. Consists of 28 tiles, featuring all combinations of spot counts (also called pips or dots) between zero and six. +🟢 | [Breakthrough](https://en.wikipedia.org/wiki/Breakthrough_\(board_game\)) | 2 | ✅ | ✅ | Simplified chess using only pawns. +🟢 | [Bridge](https://en.wikipedia.org/wiki/Contract_bridge) | 4 | ❌ | ❌ | A card game where players compete in pairs. +🟢 | [(Uncontested) Bridge bidding](https://en.wikipedia.org/wiki/Contract_bridge) | 2 | ❌ | ❌ | Players score points by forming specific sets with the cards in their hands. +🔶 | Catch | 1 | ❌ | ✅ | Agent must move horizontally to 'catch' a descending ball. Designed to test basic learning. References: [Mnih et al. 2014, Recurrent Models of Visual Attention](https://papers.nips.cc/paper/5542-recurrent-models-of-visual-attention.pdf). [Osband et al '19, Behaviour Suite for Reinforcement Learning, Appendix A](https://arxiv.org/abs/1908.03568). +🔶 | [Checkers](https://en.wikipedia.org/wiki/Checkers) | 2 | ✅ | ✅ | Players move pieces around the board with the goal of eliminating the opposing pieces. +🔶 | Cliff Walking | 1 | ✅ | ✅ | Agent must find goal without falling off a cliff. Designed to demonstrate exploration-with-danger. [Sutton et al. '18, page 132](http://www.incompleteideas.net/book/bookdraft2018mar21.pdf). +🔶 | [Clobber](https://en.wikipedia.org/wiki/Clobber) | 2 | ✅ | ✅ | Simplified checkers, where tokens can capture neighbouring tokens. Designed to be amenable to combinatorial analysis. +🔶 | Coin Game | 2 | ❌ | ❌ | Agents must collect their and their collaborator's tokens while avoiding a third kind of token. Designed to test divining of collaborator's intentions. References: [Raileanu et al. '18, Modeling Others using Oneself in Multi-Agent Reinforcement Learning](https://arxiv.org/abs/1802.09640). +🔶 | Colored Trails | 3 | ❌ | ❌ | Agents negotiations for chips that they they play on a colored grid to move closer to the goal. References: [Ya'akov et al. '10](https://dash.harvard.edu/handle/1/4726287). [Fecici & Pfeffer '08](https://dl.acm.org/doi/10.5555/1402383.1402431). [de Jong et al. '11](https://www.ifaamas.org/Proceedings/aamas2011/papers/C4_R57.pdf). +🟢 | [Connect Four](https://en.wikipedia.org/wiki/Connect_Four) | 2 | ✅ | ✅ | Players drop tokens into columns to try and form a pattern. +🔶 | Cooperative Box-Pushing | 2 | ✅ | ✅ | Agents must collaborate to push a box into the goal. Designed to test collaboration. References: [Seuken & Zilberstein '12, Improved Memory-Bounded Dynamic Programming for Decentralized POMDPs](https://arxiv.org/abs/1206.5295). +🟢 | [Chess](https://en.wikipedia.org/wiki/Chess) | 2 | ✅ | ✅ | Players move pieces around the board with the goal of eliminating the opposing pieces. +🔶 | [Crazy Eights](https://en.wikipedia.org/wiki/Crazy_Eights) | 2 | ❌ | ❌ | A precursor of UNO (see [here](https://www.unorules.org/crazy-eights/)). +🔶 | [Cribbage](https://en.wikipedia.org/wiki/Cribbage) | 2-4 | ❌ | ❌ | A card game that involves grouping cards in combinations to gain points. +🔶 | Dark Hex | 2 | ✅ | ❌ | Hex, except the opponent's tokens are hidden (imperfect-information version). +🔶 | Deep Sea | 1 | ✅ | ✅ | Agent must explore to find reward (first version) or penalty (second version). Designed to test exploration. References: [Osband et al. '17, Deep Exploration via Randomized Value Functions](https://arxiv.org/abs/1703.07608). +🟢 | [Dots and Boxes](https://en.wikipedia.org/wiki/Dots_and_boxes) | 2 | ✅ | ✅ | Players put lines between dots to form boxes to get points. +🔶 | [Dou Dizhu](https://en.wikipedia.org/wiki/Dou_dizhu) | 3 | ❌ | ❌ | A three-player games where one player (dizhu) plays against a team of two (peasants). +🔶 | [Euchre](https://en.wikipedia.org/wiki/Euchre) | 4 | ❌ | ❌ | Trick-taking card game where players compete in pairs. +🔶 | [EinStein würfelt nicht!](https://en.wikipedia.org/wiki/EinStein_w%C3%BCrfelt_nicht!) | 2 | ❌ | ✅ | Players control 6 numbered cubes, selected randomly by the roll of a die. The player that gets on the opponent's board corner, or captures all the opponent's cubes wins. +🟢 | [First-price Sealed-Bid Auction](https://en.wikipedia.org/wiki/First-price_sealed-bid_auction) | 2-10 | ❌ | ❌ | Agents submit bids simultaneously; highest bid wins, and that's the price paid. +🟢 | [Gin Rummy](https://en.wikipedia.org/wiki/Gin_rummy) | 2 | ❌ | ❌ | Players score points by forming specific sets with the cards in their hands. +🟢 | [Go](https://en.wikipedia.org/wiki/Go_\(game\)) | 2 | ✅ | ✅ | Players place tokens on the board with the goal of encircling territory. +🟢 | [Goofspiel](https://en.wikipedia.org/wiki/Goofspiel) | 2-10 | ❌ | ❌ | Players bid with their cards to win other cards. +🟢 | [Hanabi](https://en.wikipedia.org/wiki/Hanabi_\(card_game\)) | 2-5 | ❌ | ❌ | Players can see only other player's pieces, and everyone must cooperate to win. References: [Bard et al. '19, The Hanabi Challenge: A New Frontier for AI Research](https://arxiv.org/abs/1902.00506). Implemented via [Hanabi Learning Environment](https://github.com/deepmind/hanabi-learning-environment). +🟢 | [Havannah](https://en.wikipedia.org/wiki/Havannah_\(board_game\)) | 2 | ✅ | ✅ | Players add tokens to a hex grid to try and form a winning structure. +🟢 | [Hearts](https://en.wikipedia.org/wiki/Hearts_\(card_game\)) | 3-6 | ❌ | ❌ | A card game where players try to avoid playing the highest card in each round. +🔶 | [Hex](https://en.wikipedia.org/wiki/Hex_\(board_game\)) | 2 | ✅ | ✅ | Players add tokens to a hex grid to try and link opposite sides of the board. References: [Hex, the full story by Ryan Hayward and Bjarne Toft](https://webdocs.cs.ualberta.ca/~hayward/hexbook/hex.html). +🟢 | [Hive](https://www.gen42.com/product/hive/) | 2 | ✅ | ✅ | Players add bug tiles on the board to try to surround the other player's queen. +🔶 | [Kriegspiel](https://en.wikipedia.org/wiki/Kriegspiel_\(chess\)) | 2 | ✅ | ❌ | Chess with opponent's pieces unknown. Illegal moves have no effect - it remains the same player's turn until they make a legal move. References: [Monte Carlo tree search in Kriegspiel](https://www.ics.uci.edu/~dechter/courses/ics-295/fall-2019/papers/2010-mtc-aij.pdf). [Game-Tree Search with Combinatorially Large Belief States, Parker 2005](https://www.cs.umd.edu/~nau/papers/parker2005game-tree.pdf). +🟢 | [Kuhn poker](https://en.wikipedia.org/wiki/Kuhn_poker) | 2 | ❌ | ❌ | Simplified poker amenable to game-theoretic analysis. +🔶 | Laser Tag | 2 | ❌ | ❌ | Agents see a local part of the grid, and attempt to tag each other with beams. References: [Leibo et al. '17](https://arxiv.org/abs/1702.03037). [Lanctot et al. '17](https://arxiv.org/abs/1711.00832). +🟢 | Leduc poker | 2 | ❌ | ❌ | Simplified poker amenable to game-theoretic analysis. References: [Southey et al. '05, Bayes’ bluff: Opponent modelling in poker](https://arxiv.org/abs/1207.1411). +🔶 | [Lewis Signaling](https://en.wikipedia.org/wiki/Lewis_signaling_game) | 2 | ❌ | ❌ | Receiver must choose an action dependent on the sender's hidden state. Designed to demonstrate the use of conventions. +🟢 | [Liar's Dice](https://en.wikipedia.org/wiki/Liar%27s_dice) | 2 | ❌ | ❌ | Players bid and bluff on the state of all the dice together, given only the state of their dice. +🔶 | [Liar's Poker](https://en.wikipedia.org/wiki/Liar%27s_poker) | 2+ | ❌ | ❌ | Players bid and bluff on the state of all hands, given only the state of their hand. +🔶 | [Mensch ärgere Dich nicht](https://en.wikipedia.org/wiki/Mensch_%C3%A4rgere_Dich_nicht) | 2-4 | ❌ | ✅ | Players roll dice to move their pegs toward their home row while throwing other players' pegs to the out area. +🔶 | [Mancala](https://en.wikipedia.org/wiki/Kalah) | 2 | ✅ | ✅ | Players take turns sowing beans on the board and try to capture more beans than the opponent. +🔶 | Markov Soccer | 2 | ❌ | ❌ | Agents must take the ball to their goal, and can 'tackle' the opponent by predicting their next move. References: [Littman '94, Markov games as a framework for multi-agent reinforcement learning](https://www2.cs.duke.edu/courses/spring07/cps296.3/littman94markov.pdf). [He et al. '16, Opponent Modeling in Deep Reinforcement Learning](https://arxiv.org/abs/1609.05559). +🟢 | [Matching Pennies](https://en.wikipedia.org/wiki/Matching_pennies) (3-player) | 3 | ✅ | ❌ | Players must predict and match/oppose another player. Designed to have an unstable Nash equilibrium. References: [Jordan '93](https://www.sciencedirect.com/science/article/abs/pii/S0899825683710225). +🟢 | Mean Field Game: crowd modelling | n/a | n/a | n/a | References: [Scaling up Mean Field Games with Online Mirror Descent](https://arxiv.org/abs/2103.00623), [Scalable Deep Reinforcement Learning Algorithms for Mean Field Games](https://arxiv.org/abs/2203.11973), [Learning in Mean Field Games: A Survey](https://arxiv.org/abs/2205.12944). +🟢 | Mean Field Game: crowd modelling 2d | n/a | n/a | n/a | References: [Scaling up Mean Field Games with Online Mirror Descent](https://arxiv.org/abs/2103.00623), [Scalable Deep Reinforcement Learning Algorithms for Mean Field Games](https://arxiv.org/abs/2203.11973), [Learning in Mean Field Games: A Survey](https://arxiv.org/abs/2205.12944). +🟢 | Mean Field Game: linear-quadratic | n/a | ❌ | ✅ | Players are uniformly distributed and are then incentivized to gather at the same point (The lower the distanbce wrt. the distribution mean position, the higher the reward). A mean-reverting term pushes the players towards the distribution, a gaussian noise term perturbs them. The players' actions alter their states linearly (alpha * a * dt) and the cost thereof is quadratic (K * a^2 * dt), hence the name. There exists an exact, closed form solution for the fully continuous version of this game. References: [Perrin & al. 2019](https://arxiv.org/abs/2007.03458). +🟢 | Mean Field Game: predator prey | n/a | n/a | n/a | References: [Scaling up Mean Field Games with Online Mirror Descent](https://arxiv.org/abs/2103.00623), [Scalable Deep Reinforcement Learning Algorithms for Mean Field Games](https://arxiv.org/abs/2203.11973), [Learning in Mean Field Games: A Survey](https://arxiv.org/abs/2205.12944). +🟢 | Mean Field Game: routing | n/a | ❌ | ✅ | Representative player chooses at each node where they go. They has an origin, a destination and a departure time and chooses their route to minimize their travel time. Time spent on each link is a function of the distribution of players on the link when the player reaches the link. References: [Cabannes et. al. '21, Solving N-player dynamic routing games with congestion: a mean field approach](https://arxiv.org/pdf/2110.11943.pdf). +🔶 | [m,n,k-game](https://en.wikipedia.org/wiki/M,n,k-game) | 2 | ✅ | ✅ | Players place tokens to try and form a k-in-a-row pattern in an m-by-n board. +🔶 | [Morpion Solitaire (4D)](https://en.wikipedia.org/wiki/Join_five) | 1 | ✅ | ✅ | A single player game where player aims to maximize lines drawn on a grid, under certain limitations. +🟢 | Negotiation | 2 | ❌ | ❌ | Agents with different utilities must negotiate an allocation of resources. References: [Lewis et al. '17](https://arxiv.org/abs/1706.05125). [Cao et al. '18](https://arxiv.org/abs/1804.03980). +🔶 | [Nim](https://en.wikipedia.org/wiki/Nim) | 2 | ✅ | ✅ | Two agents take objects from distinct piles trying to either avoid taking the last one or take it. Any positive number of objects can be taken on each turn given they all come from the same pile. +🔶 | [Nine men's morris](https://en.wikipedia.org/wiki/Nine_men%27s_morris) | 2 | ✅ | ✅ | Two players put and move stones on the board to try to form mills (three adjacent stones in a line) to capture the other player's stones. +🔶 | [Oh Hell](https://en.wikipedia.org/wiki/Oh_hell) | 3-7 | ❌ | ❌ | A card game where players try to win exactly a declared number of tricks. +🟢 | Oshi-Zumo | 2 | ✅ | ❌ | Players must repeatedly bid to push a token off the other side of the board. References: [Buro, 2004. Solving the oshi-zumo game](https://link.springer.com/chapter/10.1007/978-0-387-35706-5_23). [Bosansky et al. '16, Algorithms for Computing Strategies in Two-Player Simultaneous Move Games](http://mlanctot.info/files/papers/aij-2psimmove.pdf). +🟢 | [Oware](https://en.wikipedia.org/wiki/Oware) | 2 | ✅ | ✅ | Players redistribute tokens from their half of the board to capture tokens in the opponent's part of the board. +🔶 | Pathfinding | 1-10 | ❌ | ✅ | Agents must move to their destination. References: [Austerweil et al. '15](http://miaoliu.scripts.mit.edu/SSS-16/wp-content/uploads/2016/01/paper.pdf). [Greenwald & Hall '03](https://www.aaai.org/Papers/ICML/2003/ICML03-034.pdf). [Littman '01](https://jmvidal.cse.sc.edu/library/littman01a.pdf). +🟢 | [Pentago](https://en.wikipedia.org/wiki/Pentago) | 2 | ✅ | ✅ | Players place tokens on the board, then rotate part of the board to a new orientation. +🔶 | Phantom Go | 2 | ✅ | ❌ | Go, except the opponent's stones are hidden. The analogue of Kriegspiel for Go. References: [Cazenave '05, A Phantom Go Program](https://link.springer.com/chapter/10.1007/11922155_9). +🔶 | Phantom Tic-Tac-Toe | 2 | ✅ | ❌ | Tic-tac-toe, except the opponent's tokens are hidden. Designed as a simple, imperfect-information game. References: [Auger '11, Multiple Tree for Partially Observable Monte-Carlo Tree Search](https://hal.archives-ouvertes.fr/hal-00563480v2/document). [Lisy '14, Alternative Selection Functions for Information Set Monte Carlo Tree Search](https://core.ac.uk/download/pdf/81646968.pdf). [Lanctot '13](http://mlanctot.info/files/papers/PhD_Thesis_MarcLanctot.pdf). +🟢 | [Pig](https://en.wikipedia.org/wiki/Pig_\(dice_game\)) | 2-10 | ❌ | ✅ | Each player rolls a dice until they get a 1 or they 'hold'; the rolled total is added to their score. +🟢 | [Prisoner's Dilemma](https://en.wikipedia.org/wiki/Prisoner%27s_dilemma) | 2 | ✅ | ✅ | Players decide on whether to cooperate or defect given a situation with different payoffs. +🟢 | [Poker (Hold 'em)](https://en.wikipedia.org/wiki/Texas_hold_%27em) | 2-10 | ❌ | ❌ | Players bet on whether their hand of cards plus some communal cards will form a special set. Implemented via [ACPC](http://www.computerpokercompetition.org/). +❌ ([#1158](https://github.com/google-deepmind/open_spiel/issues/1158)) | [Quoridor](https://en.wikipedia.org/wiki/Quoridor) | 2-4 | ✅ | ✅ | Each turn, players can either move their agent or add a small wall to the board. +❌ ([#811](https://github.com/google-deepmind/open_spiel/issues/811)) | Reconnaissance Blind Chess | 2 | ✅ | ❌ | Chess with opponent's pieces unknown, with sensing moves. Chess variant, invented by John Hopkins University Applied Physics Lab. Used in NeurIPS competition and Hidden Information Game Competition. References: [Markowitz et al. '18, On the Complexity of Reconnaissance Blind Chess](https://arxiv.org/abs/1811.03119). [Newman et al. '16, Reconnaissance blind multi-chess: an experimentation platform for ISR sensor fusion and resource management](https://www.spiedigitallibrary.org/conference-proceedings-of-spie/9842/984209/Reconnaissance-blind-multi-chess--an-experimentation-platform-for-ISR/10.1117/12.2228127.short?SSO=1). +🟢 | Routing game | 1+ | ✅ | ✅ | Players choose at each node where they go. They have an origin, a destination and a departure time and choose their route to minimize their travel time. Time spent on each link is a function of the number of players on the link when the player reaches the link. References: [Cabannes et. al. '21, Solving N-player dynamic routing games with congestion: a mean field approach](https://arxiv.org/pdf/2110.11943.pdf). +🔶 | Sheriff | 2 | ✅ | ❌ | Bargaining game. Good for correlated equilibria. Based on the board game [Sheriff of Nottingham](https://boardgamegeek.com/boardgame/157969/sheriff-of-nottingham). References: [Farina et al. '19, Correlation in Extensive-Form Games: Saddle-Point Formulation and Benchmarks](https://papers.nips.cc/paper/9122-correlation-in-extensive-form-games-saddle-point-formulation-and-benchmarks.pdf). +🔶 | [Slovenian Tarok](https://en.wikipedia.org/wiki/K%C3%B6nigrufen#Slovenia) | 3-4 | ❌ | ❌ | Trick-based card game with bidding. References: [Luštrek et al. 2003, A program for playing Tarok](https://pdfs.semanticscholar.org/a920/70fe11f75f58c27ed907c4688747259cae15.pdf). +🔶 | [Skat](https://en.wikipedia.org/wiki/Skat_\(card_game\)) (simplified bidding) | 3 | ❌ | ❌ | Each turn, players bid to compete against the other two players. +🔶 | [Solitaire (K+)](https://en.wikipedia.org/wiki/Klondike_\(solitaire\)) | 1 | ❌ | ❌ | A single-player card game. References: [Bjarnason et al. '07, Searching solitaire in real time](http://web.engr.oregonstate.edu/~afern/papers/solitaire.pdf). +🔶 | [Spades](https://en.wikipedia.org/wiki/Spades_\(card_game\)) | 4 | ❌ | ❌ | A four-player card game. +🔶 | [Team Dominoes](https://en.wikipedia.org/wiki/Dominoes#Latin_American_Version) | 4 | ❌ | ❌ | Team version of dominoes. Consists of 28 tiles, featuring all combinations of spot counts (also called pips or dots) between zero and six. +🟢 | [Tic-Tac-Toe](https://en.wikipedia.org/wiki/Tic-tac-toe) | 2 | ✅ | ✅ | Players place tokens to try and form a pattern. +🟢 | Tiny [Bridge](https://en.wikipedia.org/wiki/Contract_bridge) | 2,4 | ❌ | ❌ | Simplified Bridge with fewer cards and tricks. +🟢 | Tiny [Hanabi](https://en.wikipedia.org/wiki/Hanabi_\(card_game\)) | 2-10 | ❌ | ❌ | Simplified Hanabi with just two turns. References: [Foerster et al 2018, Bayesian Action Decoder for Deep Multi-Agent Reinforcement Learning](https://arxiv.org/abs/1811.01458). +🟢 | Trade Comm | 2 | ❌ | ❌ | Players with different utilities and items communicate and then trade. +🔶 | [TwixT](https://en.wikipedia.org/wiki/TwixT) | 2 | ✅ | ✅ | Players place pegs and links on a 24x24 square to connect a line between opposite sides. +🔶 | [Ultimate Tic-Tac-Toe](https://en.wikipedia.org/wiki/Ultimate_tic-tac-toe) | 2 | ✅ | ✅ | Players try and form a pattern in local boards and a meta-board. +🔶 | Weighted Voting Games | 1+ | ✅ | ✅ | Classic coalitional game. Players each have a weight w_i, and there is a quota q. Denote p the binary vector representing a coalition over n players. The utility is 1 if p · w ≥ q, 0 otherwise. References: [Chalkiadakis, Elkind, & Wooldridge '12](https://link.springer.com/book/10.1007/978-3-031-01558-8). +🟢 | [Y](https://en.wikipedia.org/wiki/Y_\(game\)) | 2 | ✅ | ✅ | Players place tokens to try and connect sides of a triangular board. diff --git a/scenarios/bargaining/open_spiel/docs/index.rst b/scenarios/bargaining/open_spiel/docs/index.rst new file mode 100644 index 0000000..b77a667 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/index.rst @@ -0,0 +1,49 @@ +Welcome to OpenSpiel's documentation! +-------------------------------------- + + +.. toctree:: :caption: Getting started + :maxdepth: 2 + + intro + install + + +.. toctree:: :caption: Core OpenSpiel + :maxdepth: 2 + + concepts + api_reference + algorithms + games + +.. toctree:: :caption: Evaluation + :maxdepth: 2 + + Alpha-Rank + +.. toctree:: :caption: Julia OpenSpiel + :maxdepth: 2 + + OpenSpiel on Julia + +.. toctree:: :caption: AlphaZero + :maxdepth: 2 + + alpha_zero + +.. toctree:: :caption: Developer guide + :maxdepth: 2 + + developer_guide + contributing + +.. toctree:: :caption: Using OpenSpiel as a C++ Library + :maxdepth: 2 + + library + +.. toctree:: :caption: Extra information + :maxdepth: 2 + + authors diff --git a/scenarios/bargaining/open_spiel/docs/install.md b/scenarios/bargaining/open_spiel/docs/install.md new file mode 100644 index 0000000..7927c12 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/install.md @@ -0,0 +1,331 @@ +# Installation + +## Python-only installation via pip + +If you plan to only use the Python API, then the easiest way to install +OpenSpiel is to use pip. On MacOS or Linux, simply run: + +``` +python3 -m pip install open_spiel +``` + +The binary distribution is new as of OpenSpiel 1.0.0, and is only supported on +x86_64 architectures. If you encounter any problems, you can still install +OpenSpiel via pip from source (see below), but please open an issue to let us +know about the problem. + +### Python-only installation via pip (from source). + +If the binary distribution is not an option, you can also build OpenSpiel via +pip from source. CMake, Clang and Python 3 development files are required to +build the Python extension. Note that we recommend Clang but g++ >= 9.2 should +also work. + +E.g. on Ubuntu or Debian: + +```bash +# Check to see if you have the necessary tools for building OpenSpiel: +cmake --version # Must be >= 3.17 +clang++ --version # Must be >= 7.0.0 +python3-config --help + +# If not, run this line to install them. +# On older Linux distros, the package might be called clang-9 or clang-10 +sudo apt-get install cmake clang python3-dev + +# On older Linux distros, the versions may be too old. +# E.g. on Ubuntu 18.04, there are a few extra steps: +# sudo apt-get install clang-10 +# pip3 install cmake # You might need to relogin to get the new CMake version +# export CXX=clang++-10 + +# Recommended: Install pip dependencies and run under virtualenv. +sudo apt-get install virtualenv python3-virtualenv +virtualenv -p python3 venv +source venv/bin/activate + +# Finally, install OpenSpiel and its dependencies: +python3 -m pip install --upgrade setuptools pip +python3 -m pip install --no-binary=:open_spiel: open_spiel + +# To exit the virtual env +deactivate + +## **IMPORTANT NOTE**. If the build fails, please first make sure you have the +## required versions of the tools above and that you followed the recommended +## option. Then, open an issue: https://github.com/deepmind/open_spiel/issues +``` + +Note that the build could take several minutes. + +On MacOS, you can install the dependencies via `brew install cmake python3`. For +clang, you need to install or upgrade XCode and install the command-line +developer tools. + +## Installation from Source + +The instructions here are for Linux and MacOS. For installation on Windows, see +[these separate installation instructions](windows.md). On Linux, we recommend +Ubuntu 22.04, Debian 10, or later versions. On MacOS, we recommend XCode 11 or +newer. For the Python API: our tests run using Python versions 3.7 - 3.10. If +you encounter any problems on other setups, please let us know by opening an +issue. + +Currently there are three installation methods: + +1. building from the source code and editing `PYTHONPATH`. +2. using `pip install`. +3. installing via [Docker](https://www.docker.com). + +## Summary + +In a nutshell: + +```bash +./install.sh # Needed to run once and when major changes are released. +./open_spiel/scripts/build_and_run_tests.sh # Run this every-time you need to rebuild. +``` + +1. (Optional) Configure + [Conditional Dependencies](#configuring-conditional-dependencies). +2. Install system packages (e.g. cmake) and download some dependencies. Only + needs to be run once or if you enable some new conditional dependencies. + + ```bash + ./install.sh + ``` + +3. Install your [Python dependencies](#installing-python-dependencies), e.g. in + Python 3 using + [`virtualenv`](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/): + + ```bash + virtualenv -p python3 venv + source venv/bin/activate + ``` + + Use `deactivate` to quit the virtual environment. + + `pip` should be installed once and upgraded: + + ```bash + curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py + # Install pip deps as your user. Do not use the system's pip. + python3 get-pip.py + pip3 install --upgrade pip + pip3 install --upgrade setuptools testresources + ``` + + Additionally, if you intend to use one of the optional Python dependencies + (see [open_spiel/scripts/install.sh](https://github.com/deepmind/open_spiel/blob/master/open_spiel/scripts/install.sh)), you must manually + install and/or upgrade them, e.g.: `bash pip install --upgrade torch==x.xx.x + jax==x.x.x` where `x.xx.x` should be the desired version numbers (which can + be found at the link above). + +4. This sections differs depending on the installation procedure: + + **Building and testing from source** + + ```bash + python3 -m pip install -r requirements.txt + ./open_spiel/scripts/build_and_run_tests.sh + ``` + + **Building and testing using PIP** + + ```bash + python3 -m pip install . + ``` + + Optionally, use `pip install -e` to install in + [editable mode](https://pip.pypa.io/en/stable/reference/pip_install/#editable-installs), + which will allow you to skip this `pip install` step if you edit any Python + source files. If you edit any C++ files, you will have to rerun the install + command. + +5. Only when building from source: + + ```bash + # For the python modules in open_spiel. + export PYTHONPATH=$PYTHONPATH:/ + # For the Python bindings of Pyspiel + export PYTHONPATH=$PYTHONPATH://build/python + ``` + + add it to `./venv/bin/activate` or your `~/.bashrc` to be able to import + OpenSpiel from anywhere. + +To make sure OpenSpiel works on the default configurations, we do use the +`python3` command and not `python` (which still defaults to Python 2 on modern +Linux versions). + +## Installing via Docker + +Please note that we don't regularly test the Docker installation. As such, it +may not work at any given time. If you encounter a problem, please +[open an issue](https://github.com/deepmind/open_spiel/issues). + +Option 1 (Basic, 3.13GB): + +```bash +docker build --target base -t openspiel -f Dockerfile.base . +``` + +Option 2 (Slim, 2.26GB): + +```bash +docker build --target python-slim -t openspiel -f Dockerfile.base . +``` + +If you are only interested in developing in Python, use the second image. You +can navigate through the runtime of the container (after the build step) with: + +```bash +docker run -it --entrypoint /bin/bash openspiel +``` + +Finally you can run examples using: + +```bash +docker run openspiel python3 python/examples/matrix_game_example.py +docker run openspiel python3 python/examples/example.py +``` + + +Option 3 (Jupyter Notebook): + +Installs OpenSpiel with an additional Jupyter Notebook environment. + +```bash +docker build -t openspiel-notebook -f Dockerfile.jupyter --rm . +docker run -it --rm -p 8888:8888 openspiel-notebook +``` + +_More info_: https://jupyter-docker-stacks.readthedocs.io/en/latest/ + +## Running the first examples + +In the `build` directory, running `examples/example` will prints out a list of +registered games and the usage. Now, let’s play game of Tic-Tac-Toe with uniform +random players: + +```bash +examples/example --game=tic_tac_toe +``` + +Once the proper Python paths are set, from the main directory (one above +`build`), try these out: + +```bash +# Similar to the C++ example: +python3 open_spiel/python/examples/example.py --game_string=breakthrough + +# Play a game against a random or MCTS bot: +python3 open_spiel/python/examples/mcts.py --game=tic_tac_toe --player1=human --player2=random +python3 open_spiel/python/examples/mcts.py --game=tic_tac_toe --player1=human --player2=mcts +``` + +## Detailed steps + +### Configuring conditional dependencies + +Conditional dependencies are configured using environment variables, e.g. + +```bash +export OPEN_SPIEL_BUILD_WITH_HANABI=ON +``` + +`install.sh` may need to be rerun after enabling new conditional dependencies. + +See [open_spiel/scripts/global_variables.sh](https://github.com/deepmind/open_spiel/blob/master/open_spiel/scripts/global_variables.sh) for the full list +of conditional dependencies. + +See also the [Developer Guide](developer_guide.md#conditional-dependencies). + +### Installing system-wide dependencies + +See [open_spiel/scripts/install.sh](https://github.com/deepmind/open_spiel/blob/master/open_spiel/scripts/install.sh) for the required packages and cloned +repositories. + +### Installing Python dependencies + +Using a `virtualenv` to install python dependencies is highly recommended. For +more information see: +[https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/) + +##### Required dependencies + +Install required dependencies (Python 3): + +```bash +# Ubuntu 22.04 and newer: +python3 -m venv ./venv +source venv/bin/activate +python3 -m pip install -r requirements.txt +# Older than Ubuntu 22.04: +virtualenv -p python3 venv +source venv/bin/activate +python3 -m pip install -r requirements.txt +``` + +Alternatively, although not recommended, you can install the Python dependencies +system-wide with: + +```bash +python3 -m pip install --upgrade -r requirements.txt +``` + +##### Optional dependencies + +Additionally, if you intend to use one of the optional Python dependencies (see [open_spiel/scripts/install.sh](https://github.com/deepmind/open_spiel/blob/master/open_spiel/scripts/install.sh)), you must manually install and/or upgrade them. The installation scripts will not install or upgrade these dependencies. e.g.: + +```bash +python3 -m pip install --upgrade torch==x.xx.x jax==x.x.x +``` + +where `x.xx.x` should be the desired version numbers (which can be found at the +link above). + +### Building and running tests + +Make sure that the virtual environment is still activated. + +By default, Clang C++ compiler is used (and potentially installed by +[open_spiel/scripts/install.sh](https://github.com/deepmind/open_spiel/blob/master/open_spiel/scripts/install.sh)). + +Build and run tests (Python 3): + +```bash +mkdir build +cd build +CXX=clang++ cmake -DPython3_EXECUTABLE=$(which python3) -DCMAKE_CXX_COMPILER=${CXX} ../open_spiel +make -j$(nproc) +ctest -j$(nproc) +``` + +The CMake variable `Python3_EXECUTABLE` is used to specify the Python +interpreter. If the variable is not set, CMake's FindPython3 module will prefer +the latest version installed. Note, Python >= 3.7 is required. + +One can run an example of a game running (in the `build/` folder): + +```bash +./examples/example --game=tic_tac_toe +``` + +### Setting Your PYTHONPATH environment variable + +To be able to import the Python code (both the C++ binding `pyspiel` and the +rest) from any location, you will need to add to your PYTHONPATH the root +directory and the `open_spiel` directory. + +When using a virtualenv, the following should be added to +`/bin/activate`. For a system-wide install, add it in your `.bashrc` +or `.profile`. + +```bash +# For the python modules in open_spiel. +export PYTHONPATH=$PYTHONPATH:/ +# For the Python bindings of Pyspiel +export PYTHONPATH=$PYTHONPATH://build/python +``` diff --git a/scenarios/bargaining/open_spiel/docs/intro.md b/scenarios/bargaining/open_spiel/docs/intro.md new file mode 100644 index 0000000..6cd4d18 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/intro.md @@ -0,0 +1,48 @@ +## What is OpenSpiel? + +OpenSpiel is a collection of environments and algorithms for research in general +reinforcement learning and search/planning in games. OpenSpiel also includes +tools to analyze learning dynamics and other common evaluation metrics. Games +are represented as procedural extensive-form games, with some natural +extensions. + +**Open Spiel supports** + +* Single and multi-player games +* Fully observable (via observations) and imperfect information games (via + information states and observations) +* Stochasticity (via explicit chance nodes mostly, even though implicit + stochasticity is partially supported) +* n-player normal-form "one-shot" games and (2-player) matrix games +* Sequential and simultaneous move games +* Zero-sum, general-sum, and cooperative (identical payoff) games + +**Multi-language support** + +* C++17 +* Python 3 + +The games and utility functions (e.g. exploitability computation) are written in +C++. These are also available using +[pybind11](https://pybind11.readthedocs.io/en/stable/) Python bindings. + +The methods names are in `CamelCase` in C++ and `snake_case` in Python (e.g. +`state.ApplyAction` in C++ will be `state.apply_action` in Python). See the +pybind11 definition in [open_spiel/python/pybind11/pyspiel.cc](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/pybind11/pyspiel.cc) +for the full mapping between names. + +For algorithms, many are written in both languages, even if some are only +available from Python. + +**Platforms** + +OpenSpiel has been tested on Linux (Ubuntu and Debian), MacOS. There is limited +support for on [Windows 10](windows.md). + +**Visualization of games** + +There is a basic visualizer based on graphviz, see +[open_spiel/python/examples/treeviz_example.py](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/examples/treeviz_example.py). + +There is an interactive viewer for OpenSpiel games called +[SpielViz](https://github.com/michalsustr/spielviz). diff --git a/scenarios/bargaining/open_spiel/docs/julia.md b/scenarios/bargaining/open_spiel/docs/julia.md new file mode 100644 index 0000000..890dfa6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/julia.md @@ -0,0 +1,112 @@ +# Julia OpenSpiel + +We also provide a Julia wrapper for the OpenSpiel project. Most APIs are aligned +with those in Python (some are extended to accept `AbstractArray` and/or keyword +arguments for convenience). See `spiel.h` for the full API description. + +## Install + +For general usage, you can install this package in the Julia REPL with +`] add OpenSpiel`. Note that this method only supports the Linux platform and +ACPC is not included. For developers, you need to follow the instructions bellow +to install this package: + +1. Install Julia and dependencies. Edit + `open_spiel/scripts/global_variables.sh` and set + `OPEN_SPIELOPEN_SPIEL_BUILD_WITH_JULIA=ON` (you may also turn on other + options as you wish). Then run `./install.sh`. If you already have Julia + installed on your system, make sure that it is visible in your terminal and + its version is v1.3 or later. Otherwise, Julia v1.3.1 will be automatically + installed in your home dir and a soft link will be created at + `/usr/local/bin/julia`. + +1. Build and run tests + + ```bash + ./open_spiel/scripts/build_and_run_tests.sh + ``` + +1. Install `] dev ./open_spiel/julia` (run in Julia REPL). + +## Known Problems + +1. There's a problem when building this package on Mac with XCode v11.4 or + above (see discussions + [here](https://github.com/deepmind/open_spiel/pull/187#issuecomment-616540881)). + To fix it, you need to install the latest `libcxxwrap` by following the + instructions + [here](https://github.com/JuliaInterop/libcxxwrap-julia#building-libcxxwrap-julia) + after running `./install.sh`. Then make sure that the result of `julia + --project=./open_spiel/julia -e 'using CxxWrap; + print(CxxWrap.prefix_path())'` points to the newly built `libcxxwrap`. After + that, build and install this package as stated above. + +## Example + +Here we demonstrate how to use the Julia API to play one game: + +```julia +using OpenSpiel + +# Here we need the StatsBase package for weighted sampling +using Pkg +Pkg.add("StatsBase") +using StatsBase + +function run_once(name) + game = load_game(name) + state = new_initial_state(game) + println("Initial state of game[$(name)] is:\n$(state)") + + while !is_terminal(state) + if is_chance_node(state) + outcomes_with_probs = chance_outcomes(state) + println("Chance node, got $(length(outcomes_with_probs)) outcomes") + actions, probs = zip(outcomes_with_probs...) + action = actions[sample(weights(collect(probs)))] + println("Sampled outcome: $(action_to_string(state, action))") + apply_action(state, action) + elseif is_simultaneous_node(state) + chosen_actions = [rand(legal_actions(state, pid-1)) for pid in 1:num_players(game)] # in Julia, indices start at 1 + println("Chosen actions: $([action_to_string(state, pid-1, action) for (pid, action) in enumerate(chosen_actions)])") + apply_action(state, chosen_actions) + else + action = rand(legal_actions(state)) + println("Player $(current_player(state)) randomly sampled action: $(action_to_string(state, action))") + apply_action(state, action) + end + println(state) + end + rts = returns(state) + for pid in 1:num_players(game) + println("Utility for player $(pid-1) is $(rts[pid])") + end +end + +run_once("tic_tac_toe") +run_once("kuhn_poker") +run_once("goofspiel(imp_info=True,num_cards=4,points_order=descending)") +``` + +## Q&A + +1. What is `StdVector`? + + `StdVector` is introduced in + [CxxWrap.jl](https://github.com/JuliaInterop/CxxWrap.jl) recently. It is a + wrapper of `std::vector` in the C++ side. Since that it is a subtype of + `AbstractVector`, most functions should just work out of the box. + +1. `0-based` or `1-based`? + + As this package is a low-level wrapper of OpenSpiel C++, most APIs are + zero-based: for instance, the `Player` id starts from zero. But note that + some bridge types, like `StdVector`, implicitly convert between indexing + conventions, so APIs that use `StdVector` are one-based. + +1. I can't find the `xxx` function/type in the Julia wrapper/The program exits + unexpectedly. + + Although most of the functions and types should be exported, there is still + a chance that some APIs are not well tested. So if you encounter any error, + please do not hesitate to create an issue. diff --git a/scenarios/bargaining/open_spiel/docs/library.md b/scenarios/bargaining/open_spiel/docs/library.md new file mode 100644 index 0000000..367ce6f --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/library.md @@ -0,0 +1,75 @@ +# Using OpenSpiel as a C++ Library + +OpenSpiel has been designed as a framework: a suite of games, algorithms, and +tools for research in reinforcement learning and search in games. However, there +are situations where one may only want or need a single game/algorithm or small +subset from this collection, or a research experiment does not require modifying +or otherwise interacting very closely with OpenSpiel other than strictly +calling/using it. + +In cases like this, it might be nice to use OpenSpiel as a library rather than a +framework. This has the benefit of not forcing the use of certain tools like +CMake or having to continually recompile OpenSpiel when doing your research. + +Luckily, this is easy to achieve with OpenSpiel: you simply need to build it as +a shared library once, and then load it dynamically at runtime. This page walks +through how to do this assuming a bash shell on Linux, but is very similar on +MacOS or for other shells. + +## Install Dependencies + +The dependencies of OpenSpiel need to be installed before it can be used as a +library. On MacOS and Debian/Ubuntu Linux, this is often simply just running +`./install.sh`. Please see the [installation from source instructions](https://github.com/deepmind/open_spiel/blob/master/docs/install.md#installation-from-source) for more details. + +## Compiling OpenSpiel as a Shared Library + +To build OpenSpiel as a shared library, simply run: + +``` +mkdir build +cd build +BUILD_SHARED_LIB=ON CXX=clang++ cmake -DPython3_EXECUTABLE=$(which python3) -DCMAKE_CXX_COMPILER=${CXX} ../open_spiel +make -j$(nproc) open_spiel +``` + +This produces a dynamically-linked library `libopen_spiel.so` (or +`lib_openspiel.dylib` on MacOS) in `build/` that can be linked against and +loaded dynamically at run-time. + +Suppose OpenSpiel was installed in `$HOME/open_spiel`. The following line adds +the necessary environment variable to let the shell know where to find +`libopen_spiel.so` at run-time: + +``` +export LD_LIBRARY_PATH="${HOME}/open_spiel/build" +``` + +You might want to add this line to your `$HOME/.bash_profile` to avoid having to +do it every time you load the library. Of course, if you are already using +`LD_LIBRARY_PATH` for something else, then you need to add +`${HOME}/open_spiel/build` to it (space-separated paths). + +## Compiling and Running the Example + +``` +cd ../open_spiel/examples +clang++ -I${HOME}/open_spiel -I${HOME}/open_spiel/open_spiel/abseil-cpp \ + -std=c++17 -o shared_library_example shared_library_example.cc \ + -L${HOME}/open_spiel/build -lopen_spiel +``` + +The first two flags are the include directory paths and the third is the link +directory path. The `-lopen_spiel` instructs the linker to link against the +OpenSpiel shared library. + +That's it! Now you can run the example using: + +``` +./shared_library_example breakthrough +``` + +You should also be able to register new games externally without the +implementation being within OpenSpiel nor built into the shared library, though +we are always interested in growing the library and recommend you contact us +about contributing any new games to the suite. diff --git a/scenarios/bargaining/open_spiel/docs/requirements.readthedocs.txt b/scenarios/bargaining/open_spiel/docs/requirements.readthedocs.txt new file mode 100644 index 0000000..47b362c --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/requirements.readthedocs.txt @@ -0,0 +1,6 @@ +# These are the dependencies to generate the documentation. +markdown==3.4 +recommonmark==0.7.1 +sphinx_markdown_tables==0.0.17 +sphinx==5.1 +sphinx-rtd-theme==1.3.0 diff --git a/scenarios/bargaining/open_spiel/docs/windows.md b/scenarios/bargaining/open_spiel/docs/windows.md new file mode 100644 index 0000000..99a6439 --- /dev/null +++ b/scenarios/bargaining/open_spiel/docs/windows.md @@ -0,0 +1,201 @@ +# OpenSpiel Installation on Windows + +OpenSpiel has limited support on Windows and is not being regularly tested, +which means support could break at any time. This may change in the future, but +for now please be aware that Windows support is experimental. Please report any +bugs or problems you encounter. + +OpenSpiel has limited support on Windows and is not being regularly tested, +which means support could break at any time. This may change in the future +(contributions are welcome), with Github Actions supporting +[windows workers](https://docs.github.com/en/actions/using-github-hosted-runners/customizing-github-hosted-runners#installing-software-on-windows-runners!), +but for now please be aware that Windows support is experimental. Please report +any bugs or problems you encounter. + +## Option 1: Windows Installation using Visual Studio Community Edition + +This option will describe how to install and use OpenSpiel on Windows 10 via +[Visual Studio Community Edition](https://visualstudio.microsoft.com/vs/community/). +This process has been written for Windows 10 and tested on Windows 10 Home +Version 20H2, build 19042.1415 (installed on Nov 26th, 2021). + +When installing Visual Studio, enable the C++ and Python development, and also +the C++ CMake tools for Windows. C++/CLI support and C++ Clang tools may also be +useful (but not necessary). + +You will need to have the following dependencies installed: + +* [CMake](https://cmake.org/download/) +* [git](https://gitforwindows.org/) +* [Python](https://www.python.org/downloads/windows/). Note: get the latest + 3.9 release as OpenSpiel has not been tested on 3.10 yet. Also, tick the box + during installation to ensure Python executable is in your path. +* Recommended: Windows Terminal / Powershell. + +The rest of the instructions will assume that OpenSpiel is cloned in +`C:\Users\MyUser\open_spiel`. + +Open a Windows Terminal (Windows Powershell), clone OpenSpiel and its +dependencies (commands adapted from open_spiel/scripts/install.sh) + +``` +cd C:\Users\MyUser +git clone https://github.com/deepmind/open_spiel.git +cd open_spiel +git clone --single-branch --depth 1 https://github.com/pybind/pybind11.git pybind11 +git clone -b 20211102.0 --single-branch --depth 1 https://github.com/abseil/abseil-cpp.git open_spiel\abseil-cpp +git clone -b 'master' https://github.com/pybind/pybind11_abseil.git open_spiel\pybind11_abseil +cd open_spiel\pybind11_abseil +git checkout '73992b5' +cd ..\.. +git clone -b develop --single-branch --depth 1 https://github.com/jblespiau/dds.git open_spiel\games\bridge\double_dummy_solver +``` + +Open Visual Studio and continue without code. Then, click on File | Open -> +CMake, and choose `C:\Users\MyUser\open_spiel\open_spiel\CMakeLists.txt`. CMake +will then run; once you see `CMake generation finished`, choose Build -> Build +All. The files will be available in +`C:\Users\MyUser\open_spiel\open_spiel\out\build\x64-Debug`, when the build +completes with "Build All succeeded." Extra compilation options may be necessary +if errors occur. \ +MSVC options to deal with required C++ standard, file encoding (for chess +characters) and large object files include `/std:c++17`, `/utf-8`, `/bigobj`. To +use them together with default MSVC arguments, you can use the follwing CMake +command line arguments: `-DCMAKE_CXX_FLAGS="/std:c++17 /utf-8 /bigobj /DWIN32 +/D_WINDOWS /GR /EHsc"` + +To be able to import the Python code (both the C++ binding `pyspiel` and the +rest) from any location, you will need to add to your PYTHONPATH the root +directory and the `open_spiel` directory. Open +[Windows environment variables and add to the PYTHONPATH](https://stackoverflow.com/questions/3701646/how-to-add-to-the-pythonpath-in-windows-so-it-finds-my-modules-packages). +Add the directories `C:\Users\MyUser\open_spiel\open_spiel\out\build\x64-Debug` +and `C:\Users\MyUser\open_spiel\open_spiel\out\build\x64-Debug\python` to +PYTHONPATH. If your PYTHONPATH does not exist, then create a new environment +variable for it. To check that python is working, you can run the example in +`open_spiel\python\examples`. + +OpenSpiel has various Python dependencies which may require installing. At a +minimum, you will need the ones in +[requirements.txt](https://github.com/deepmind/open_spiel/blob/master/requirements.txt). + +``` +pip install absl-py +pip install attrs +pip install numpy +``` + +For a complete list, depending on what you will use, see +[python_extra_deps.sh](https://github.com/deepmind/open_spiel/blob/master/open_spiel/scripts/python_extra_deps.sh). + +## Option 2: Windows Installation using Windows Subsystem for Linux (WSL) + +This section describes the installation steps to get OpenSpiel running in a +Windows 10 environment using Windows Subsystem for Linux (WSL). Note that WSL +does not include GPU support, so will run on CPU only. + +## Process + +This process has been written for Windows 10, and tested on Windows 10 build +1903 (March 2019). + +1. Install the Windows Subsystem for Linux: + + Run the following command in Windows Powershell: + + ```powershell + Enable-WindowsOptionalFeature -Online -FeatureName Microsoft-Windows-Subsystem-Linux + ``` + +2. Install Ubuntu Linux from the Windows Store. Currently this is version + 18.04:: + + Open up the Windows Store. Search for Ubuntu. Open up Ubuntu and press "Get" + to install this. + +3. First time run of Ubuntu: + + Click on the Start Button and choose the Ubuntu icon. Wait until the distro + installs. Provide a username and password for the default user account. Note + that this account is a member of the Linux administrators (sudo) group so + choose a secure username and password combination. + +4. Update / Upgrade packages (optional step) + + ```bash + sudo apt-get update + sudo apt-get upgrade + ``` + +5. Run through the first part of the OpenSpiel installation + + ```bash + git clone https://github.com/deepmind/open_spiel.git + cd open_spiel + ./install.sh # you will be prompted for the password created at stage 3. Press Y to continue and install. During installation press Yes to restart services during package upgrades + pip install -U pip # Upgrade pip (required for TF >= 1.15) + pip3 install --upgrade -r requirements.txt # Install Python dependencies + ``` + +6. Now need to upgrade make version as the version of make which comes with + Ubuntu 18.04 is not high enough to build OpenSpiel. (Note, this step won't + be necessary if the version of Ubuntu in the Windows store gets upgraded to + 19.04) + + ```bash + cd .. + wget http://www.cmake.org/files/v3.12/cmake-3.12.4.tar.gz + tar -xvzf cmake-3.12.4.tar.gz + cd cmake-3.12.4/ + ./configure + make + sudo make install + sudo update-alternatives --install /usr/bin/cmake cmake /usr/local/bin/cmake 1 --force + cd ../open_spiel + ``` + +7. Finally, continue with the installation and run tests. + + ```bash + mkdir build + cd build + CXX=clang++ cmake -DPython3_EXECUTABLE=$(which python3) -DCMAKE_CXX_COMPILER=clang++ ../open_spiel + make -j12 # The 12 here is the number of parallel processes used to build + ctest -j12 # Run the tests to verify that the installation succeeded + ``` + + The CMake variable `Python3_EXECUTABLE` is used to specify the Python + interpreter. If the variable is not set, CMake's FindPython3 module will + prefer the latest version installed. Note, Python >= 3.6.0 is required. + + One can run an example of a game running (in the `build/` folder): + + ```bash + ./examples/example --game=tic_tac_toe + ``` + +8. Setting Your PYTHONPATH environment variable + + To be able to import the Python code (both the C++ binding `pyspiel` and the + rest) from any location, you will need to add to your PYTHONPATH the root + directory and the `open_spiel` directory. + + When using a virtualenv, the following should be added to + `/bin/activate`. For a system-wide install, add it in your + `.bashrc` or `.profile`. + + ```bash + # For the python modules in open_spiel. + export PYTHONPATH=$PYTHONPATH:/ + # For the Python bindings of Pyspiel + export PYTHONPATH=$PYTHONPATH://build/python + ``` + +9. Running the first example + + In the `build` directory, running `examples/example` will print out a list + of registered games and the usage. Now, let’s play game of Tic-Tac-Toe with + uniform random players: + + ```bash + examples/example --game=tic_tac_toe + ``` diff --git a/scenarios/bargaining/open_spiel/install.sh b/scenarios/bargaining/open_spiel/install.sh new file mode 100755 index 0000000..6f6d6e7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/install.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Just a stub here in the root to reference the real install script. +source $(dirname "$0")/open_spiel/scripts/install.sh diff --git a/scenarios/bargaining/open_spiel/open_spiel/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/CMakeLists.txt new file mode 100644 index 0000000..2227094 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/CMakeLists.txt @@ -0,0 +1,325 @@ +# Version >= 3.12 required for new FindPython module +# https://cmake.org/cmake/help/v3.12/release/3.12.html +# Version >= 3.17 required for CMAKE_CUDA_STANDARD +# https://gitlab.kitware.com/cmake/cmake/-/issues/19123 +cmake_minimum_required (VERSION 3.17) +project (open_spiel) + +# Define some nice terminal colors. +if(NOT WIN32) + string(ASCII 27 Esc) + set(ColourReset "${Esc}[m") + set(ColourBold "${Esc}[1m") + set(Red "${Esc}[31m") + set(Green "${Esc}[32m") + set(Yellow "${Esc}[33m") + set(Blue "${Esc}[34m") + set(Magenta "${Esc}[35m") + set(Cyan "${Esc}[36m") + set(White "${Esc}[37m") + set(BoldRed "${Esc}[1;31m") + set(BoldGreen "${Esc}[1;32m") + set(BoldYellow "${Esc}[1;33m") + set(BoldBlue "${Esc}[1;34m") + set(BoldMagenta "${Esc}[1;35m") + set(BoldCyan "${Esc}[1;36m") + set(BoldWhite "${Esc}[1;37m") +endif() + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CUDA_STANDARD 14) +set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) +set(CMAKE_CXX_STANDARD_REQUIRED TRUE) + +# Set default build type. +set (BUILD_TYPE $ENV{BUILD_TYPE}) +if(NOT BUILD_TYPE) + set(BUILD_TYPE Testing + CACHE STRING "Choose the type of build: Debug Release Testing." + FORCE) +endif() +message("${BoldYellow}Current build type is: ${BUILD_TYPE}${ColourReset}") + +if(${BUILD_TYPE} STREQUAL "Debug") + # Basic build for debugging (default). + # -Og enables optimizations that do not interfere with debugging. + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -Og") +endif() + +if(${BUILD_TYPE} STREQUAL "Testing") + # A build used for running tests: keep all runtime checks (assert, + # SPIEL_CHECK_*, SPIEL_DCHECK_*), but turn on some speed optimizations, + # otherwise tests run for too long. + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") +endif() + +if(${BUILD_TYPE} STREQUAL "Release") + # Optimized release build: turn off debug runtime checks (assert, + # SPIEL_DCHECK_*) and turn on highest speed optimizations. + # The difference in perfomance can be up to 10x higher. + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG -O3") +endif() + +if(APPLE) + # On MacOS: + # -undefined dynamic_lookup is necessary for pybind11 linking + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-everything -w -undefined dynamic_lookup") + + # On MacOS, we need this so that CMake will use the right Python if the user + # has a virtual environment active + set (CMAKE_FIND_FRAMEWORK LAST) +elseif(WIN32) + # Setup for MSVC 2022. + if(MSVC) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /std:c++17 /utf-8 /bigobj /DWIN32 /D_WINDOWS /GR /EHsc") + endif() +else() + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-everything") +endif() + +# Position-independent code is needed for Python extension modules. +set (CMAKE_POSITION_INDEPENDENT_CODE ON) + + +## Optional dependencies +# One can optionally build and link against specific external dependencies. +# We expect these arguments to be always defined, when building using any script +# in `open_spiel/scripts/`, thus, we emit a warning when it's not, with a +# conservative default. +# See the documentation in install.md. + +# Use this macro to define optional dependencies. +# You can then use your chosen DEP_NAME as a variable to check if that +# dependency is enabled -- see code below. +macro(openspiel_optional_dependency DEP_NAME DEP_DEFAULT DEP_DESCRIPTION) + set (${DEP_NAME} ${DEP_DEFAULT} CACHE BOOL ${DEP_DESCRIPTION}) + if(NOT DEFINED ENV{${DEP_NAME}}) + message("${BoldRed}${DEP_NAME} not set. Defaults to ${DEP_DEFAULT}${ColourReset}") + set (ENV{${DEP_NAME}} ${DEP_DEFAULT}) + endif() + set (${DEP_NAME} $ENV{${DEP_NAME}}) + message("${BoldYellow}${DEP_NAME}: ${${DEP_NAME}} ${ColourReset}") + # If the dependency is on, pass in compiler flags to enable conditional code, + # e.g. #if OPEN_SPIEL_BUILD_WITH_... + if (${DEP_NAME}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D${DEP_NAME}") + endif() +endmacro() + +# List of all optional dependencies: +openspiel_optional_dependency(OPEN_SPIEL_BUILD_WITH_ACPC OFF + "Build against the Universal Poker library.") +openspiel_optional_dependency(OPEN_SPIEL_BUILD_WITH_HANABI OFF + "Build against the Hanabi game.") +openspiel_optional_dependency(OPEN_SPIEL_BUILD_WITH_JULIA OFF + "Build binary for Julia.") +openspiel_optional_dependency(OPEN_SPIEL_BUILD_WITH_LIBNOP OFF + "Build with support for libnop.") +openspiel_optional_dependency(OPEN_SPIEL_BUILD_WITH_LIBTORCH OFF + "Build with support for libtorch.") +openspiel_optional_dependency(OPEN_SPIEL_BUILD_WITH_PYTHON ON + "Build binary for Python.") +openspiel_optional_dependency(OPEN_SPIEL_BUILD_WITH_XINXIN OFF + "Build against xinxin Hearts program.") +openspiel_optional_dependency(OPEN_SPIEL_BUILD_WITH_ROSHAMBO OFF + "Build against RoShamBo bots.") +openspiel_optional_dependency(OPEN_SPIEL_BUILD_WITH_GAMUT OFF + "Build with GAMUT generator integration.") +openspiel_optional_dependency(OPEN_SPIEL_BUILD_WITH_ORTOOLS OFF + "Build with C++ optimization library OR-Tools.") + +if (WIN32) + openspiel_optional_dependency(OPEN_SPIEL_ENABLE_JAX OFF + "Enable JAX.") + openspiel_optional_dependency(OPEN_SPIEL_ENABLE_PYTORCH OFF + "Enable PyTorch.") +else() + openspiel_optional_dependency(OPEN_SPIEL_ENABLE_JAX AUTO + "Enable JAX.") + openspiel_optional_dependency(OPEN_SPIEL_ENABLE_PYTORCH AUTO + "Enable PyTorch.") +endif() + +openspiel_optional_dependency(OPEN_SPIEL_ENABLE_PYTHON_MISC OFF + "Enable miscellaneous Python dependencies.") + +openspiel_optional_dependency(OPEN_SPIEL_BUILDING_WHEEL OFF + "Building a Python wheel?") + +# Needed to disable Abseil tests. +set (BUILD_TESTING OFF) + +# For now, let's enable all the tests. +enable_testing() + +set (OPEN_SPIEL_CORE_FILES + action_view.h + action_view.cc + canonical_game_strings.cc + canonical_game_strings.h + game_parameters.cc + game_parameters.h + matrix_game.cc + matrix_game.h + normal_form_game.h + observer.cc + observer.h + policy.cc + policy.h + simultaneous_move_game.cc + simultaneous_move_game.h + spiel.cc + spiel.h + spiel_bots.cc + spiel_bots.h + spiel_globals.h + spiel_utils.cc + spiel_utils.h + tensor_game.cc + tensor_game.h + utils/usage_logging.h + utils/usage_logging.cc +) + +# We add the subdirectory here so open_spiel_core can #include absl. +set(ABSL_PROPAGATE_CXX_STD ON) +add_subdirectory (abseil-cpp) +include_directories (abseil-cpp) + +# Just the core without any of the games +add_library(open_spiel_core OBJECT ${OPEN_SPIEL_CORE_FILES}) +target_include_directories ( + open_spiel_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} abseil-cpp) +link_libraries(open_spiel_core + absl::algorithm + absl::flags + absl::flags_parse + absl::flat_hash_map + absl::optional + absl::random_random + absl::str_format + absl::strings + absl::time +) + +# Just the minimal base library: no games. +set (OPEN_SPIEL_CORE_OBJECTS $) + +set (OPEN_SPIEL_OBJECTS + $ + $ + $ + $ + $ + $ + $ + $ + $ +) +if (OPEN_SPIEL_BUILD_WITH_HANABI) + set(OPEN_SPIEL_OBJECTS ${OPEN_SPIEL_OBJECTS} + $) +endif() +if (OPEN_SPIEL_BUILD_WITH_ACPC) + set(OPEN_SPIEL_OBJECTS ${OPEN_SPIEL_OBJECTS} + $ + $) +endif() +if (OPEN_SPIEL_BUILD_WITH_XINXIN) + set(OPEN_SPIEL_OBJECTS ${OPEN_SPIEL_OBJECTS} $) +endif() +if (OPEN_SPIEL_BUILD_WITH_ROSHAMBO) + set(OPEN_SPIEL_OBJECTS ${OPEN_SPIEL_OBJECTS} $) +endif() +if (OPEN_SPIEL_BUILD_WITH_LIBNOP) + include_directories(libnop/libnop/include) + add_subdirectory(libnop) +endif() +if (OPEN_SPIEL_BUILD_WITH_LIBTORCH) + list(APPEND CMAKE_PREFIX_PATH "${CMAKE_CURRENT_SOURCE_DIR}/libtorch/libtorch") + find_package(Torch REQUIRED) + add_subdirectory(libtorch) + include_directories(${TORCH_INCLUDE_DIRS}) + # Use following to link your_target_executable with torch libraries: + # target_link_libraries(your_target_executable ${TORCH_LIBRARIES}) +endif() +if (OPEN_SPIEL_BUILD_WITH_GAMUT) + set(OPEN_SPIEL_OBJECTS ${OPEN_SPIEL_OBJECTS} $) +endif() +if (OPEN_SPIEL_BUILD_WITH_ORTOOLS) + # Compile with OR-Tools headers and link against binary distribution, + # downloaded from https://developers.google.com/optimization/install/cpp/linux + # and assumed to be in $HOME/or-tools. + # The flags were taken from the compilation of linear_programming.cc after + # running make test_cc. + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_BOP -DUSE_GLOP -DUSE_CBC -DUSE_CLP -DUSE_SCIP -pthread") + set(ORTOOLS_HOME "${CMAKE_CURRENT_SOURCE_DIR}/ortools") + set(ORTOOLS_INC_DIRS ${ORTOOLS_HOME} ${ORTOOLS_HOME}/include) + set(ORTOOLS_LIB_DIRS ${ORTOOLS_HOME}/lib ${ORTOOLS_HOME}/lib64) + set(ORTOOLS_LIBS z rt pthread ortools) + set_target_properties(open_spiel_core PROPERTIES POSITION_INDEPENDENT_CODE ON) + include_directories(${ORTOOLS_INC_DIRS}) + link_directories(${ORTOOLS_LIB_DIRS}) + # Use following to link your_target_executable with OrTools libraries: + # target_link_libraries(your_target_executable ${ORTOOLS_LIBS}) +endif() + +# We have the parent of this directory in the include path, so that we can +# include for example "open_spiel/spiel.h" (assuming this directory is named +# open_spiel). +include_directories(..) + +add_subdirectory (algorithms) +add_subdirectory (bots) +add_subdirectory (evaluation) +add_subdirectory (examples) +add_subdirectory (games) +add_subdirectory (game_transforms) + +if (OPEN_SPIEL_BUILD_WITH_PYTHON) + add_subdirectory (python) +endif() + +add_subdirectory (utils) + +if (OPEN_SPIEL_BUILD_WITH_JULIA) + add_subdirectory (julia) +endif() + +# Build a shared library, i.e. libopen_spiel.so. We generally only enable this +# for binary releases. +# Note that there are known problems when trying to use absl::flags within a +# shared library, hence is intentionally left out. To use ABSL flags, link with +# absl::flags and absl::flags_parse separately. +set (BUILD_SHARED_LIB OFF CACHE BOOL "Build a shared library?") +if(NOT DEFINED ENV{BUILD_SHARED_LIB}) + set (ENV{BUILD_SHARED_LIB} OFF) +endif() +set (BUILD_SHARED_LIB $ENV{BUILD_SHARED_LIB}) +if (BUILD_SHARED_LIB) + if (OPEN_SPIEL_BUILD_WITH_ORTOOLS) + add_library(open_spiel SHARED ${OPEN_SPIEL_OBJECTS} + # Optionally include files that use external dependencies, for example + # linear program specification for finding Nash equilibria. + $ + ) + else() + add_library(open_spiel SHARED ${OPEN_SPIEL_OBJECTS}) + endif() + target_include_directories(open_spiel PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} abseil-cpp) + target_link_libraries(open_spiel PUBLIC + absl::algorithm + absl::flat_hash_map + absl::optional + absl::random_random + absl::str_format + absl::strings + absl::time + # Optionally link external dependencies, for example OrTools for solving + # linear programs. + ${ORTOOLS_LIBS} + ) +endif() + +add_subdirectory (tests) diff --git a/scenarios/bargaining/open_spiel/open_spiel/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/__init__.py new file mode 100644 index 0000000..8614d7a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# The existence of this file allows us to have PYTHONPATH pointing to +# the parent of this directory and then use: +# from open_spiel.python import rl_environment diff --git a/scenarios/bargaining/open_spiel/open_spiel/action_view.cc b/scenarios/bargaining/open_spiel/open_spiel/action_view.cc new file mode 100644 index 0000000..ed64531 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/action_view.cc @@ -0,0 +1,142 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/action_view.h" + +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { + +std::vector> CollectActions(const State& state) { + std::vector> legal_actions; + if (state.IsSimultaneousNode()) { + legal_actions = std::vector>(state.NumPlayers()); + for (int i = 0; i < state.NumPlayers(); ++i) { + legal_actions[i] = state.LegalActions(i); + } + } else { + legal_actions = std::vector>{state.LegalActions()}; + } + return legal_actions; +} + +ActionView::ActionView(const Player current_player, + const std::vector> legal_actions) + : current_player(current_player), legal_actions(std::move(legal_actions)) {} + +ActionView::ActionView(const State& state) + : ActionView(state.CurrentPlayer(), CollectActions(state)) {} + +// FlatJointActions + +FlatJointActions ActionView::flat_joint_actions() const { + int num_flat_actions = 1; + for (const std::vector& actions : legal_actions) { + if (!actions.empty()) num_flat_actions *= actions.size(); + } + return FlatJointActions{num_flat_actions}; +} + +FlatJointActionsIterator FlatJointActions::begin() const { + return FlatJointActionsIterator{0}; +} +FlatJointActionsIterator FlatJointActions::end() const { + return FlatJointActionsIterator{num_flat_joint_actions}; +} +FlatJointActionsIterator& FlatJointActionsIterator::operator++() { + current_action_++; + return *this; +} +bool FlatJointActionsIterator::operator==( + FlatJointActionsIterator other) const { + return current_action_ == other.current_action_; +} +bool FlatJointActionsIterator::operator!=( + FlatJointActionsIterator other) const { + return !(*this == other); +} +Action FlatJointActionsIterator::operator*() const { return current_action_; } +FlatJointActionsIterator::FlatJointActionsIterator(int current_action) + : current_action_(current_action) {} + +// FixedActions + +FixedActions ActionView::fixed_action(Player player, int action_index) const { + SPIEL_CHECK_EQ(current_player, kSimultaneousPlayerId); + int prod_after = 1; + for (int pl = player + 1; pl < legal_actions.size(); pl++) { + const std::vector& actions = legal_actions[pl]; + if (!actions.empty()) prod_after *= actions.size(); + } + int prod_before = 1; + for (int pl = 0; pl < player; pl++) { + const std::vector& actions = legal_actions[pl]; + if (!actions.empty()) prod_before *= actions.size(); + } + int num_actions = legal_actions[player].size(); + return FixedActions{action_index, num_actions, prod_before, prod_after}; +} + +FixedActionsIterator FixedActions::begin() const { + return FixedActionsIterator(fixed_action, num_actions, prod_before, + prod_after, + /*i=*/0, /*j=*/0); +} +FixedActionsIterator FixedActions::end() const { + return FixedActionsIterator(fixed_action, num_actions, prod_before, + prod_after, + /*i=*/prod_after, /*j=*/0); +} + +// This essentially imitates a generator that uses a nested for loop: +// +// for i in range(prod_after): +// for j in range(prod_before): +// yield prod_before * (fixed_action + i * num_actions) + j +FixedActionsIterator& FixedActionsIterator::operator++() { + if (j_ + 1 < prod_before_) { + ++j_; + return *this; + } else { + j_ = 0; + ++i_; + SPIEL_CHECK_LE(i_, prod_after_); + return *this; + } +} +Action FixedActionsIterator::operator*() const { + return prod_before_ * (fixed_action_ + i_ * num_actions_) + j_; +} +bool FixedActionsIterator::operator==(const FixedActionsIterator& rhs) const { + return j_ == rhs.j_ && i_ == rhs.i_ && fixed_action_ == rhs.fixed_action_ && + prod_before_ == rhs.prod_before_ && num_actions_ == rhs.num_actions_ && + prod_after_ == rhs.prod_after_; +} +bool FixedActionsIterator::operator!=(const FixedActionsIterator& rhs) const { + return !(rhs == *this); +} +FixedActionsIterator::FixedActionsIterator(int fixed_action, int num_actions, + int prod_before, int prod_after, + int i, int j) + : fixed_action_(fixed_action), + num_actions_(num_actions), + prod_before_(prod_before), + prod_after_(prod_after), + i_(i), + j_(j) {} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/action_view.h b/scenarios/bargaining/open_spiel/open_spiel/action_view.h new file mode 100644 index 0000000..4e8c89b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/action_view.h @@ -0,0 +1,101 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ACTION_VIEW_ +#define OPEN_SPIEL_ACTION_VIEW_ + +#include + +#include "open_spiel/spiel.h" + +// ActionView provides a number of iterators that are useful for dealing +// with simultaneous move nodes. + +namespace open_spiel { + +class FixedActionsIterator { + const int fixed_action_; + const int num_actions_; + const int prod_before_; + const int prod_after_; + int i_; // Outer loop + int j_; // Inner loop + public: + FixedActionsIterator(int fixed_action, int num_actions, int prod_before, + int prod_after, int i, int j); + FixedActionsIterator& operator++(); + Action operator*() const; + bool operator==(const FixedActionsIterator& rhs) const; + bool operator!=(const FixedActionsIterator& rhs) const; +}; + +struct FixedActions { + const int fixed_action; + const int num_actions; + const int prod_before; + const int prod_after; + FixedActionsIterator begin() const; + FixedActionsIterator end() const; +}; + +class FlatJointActionsIterator { + int current_action_; + + public: + FlatJointActionsIterator(int current_action); + FlatJointActionsIterator& operator++(); + bool operator==(FlatJointActionsIterator other) const; + bool operator!=(FlatJointActionsIterator other) const; + Action operator*() const; +}; + +struct FlatJointActions { + const int num_flat_joint_actions; + FlatJointActionsIterator begin() const; + FlatJointActionsIterator end() const; +}; + +// Provides a number of iterators that are useful for dealing +// with simultaneous move nodes. +struct ActionView { + const Player current_player; + const std::vector> legal_actions; + // Collects legal actions at the specified state. + explicit ActionView(const State& state); + // Construct a custom action view. + ActionView(const Player current_player, + const std::vector> legal_actions); + + int num_players() const { return legal_actions.size(); } + int num_actions(Player pl) const { return legal_actions.at(pl).size(); } + + // Provides an iterator over all flattened joint actions. + // + // It computes the number of possible joint actions = \prod #actions(i) + // over all the players with any legal actions available. + // The possible joint actions are just numbered 0, 1, 2, .... and can be + // decomposed into the individual actions of the players. + // + // As this is an iterator, it does not allocate memory for the whole cartesian + // product of the actions. + FlatJointActions flat_joint_actions() const; + + // Provides an iterator over flattened actions, while we fix one action + // for the specified player. + FixedActions fixed_action(Player player, int action_index) const; +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_ACTION_VIEW_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/algorithms/CMakeLists.txt new file mode 100644 index 0000000..ff810b9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/CMakeLists.txt @@ -0,0 +1,192 @@ +add_library (algorithms OBJECT + best_response.cc + best_response.h + cfr.cc + cfr.h + cfr_br.cc + cfr_br.h + corr_dist.cc + corr_dist.h + corr_dist/afcce.cc + corr_dist/afcce.h + corr_dist/afce.cc + corr_dist/afce.h + corr_dist/efcce.cc + corr_dist/efcce.h + corr_dist/efce.cc + corr_dist/efce.h + corr_dist/cce.cc + corr_dist/cce.h + corr_dist/ce.cc + corr_dist/ce.h + corr_dev_builder.cc + corr_dev_builder.h + deterministic_policy.cc + deterministic_policy.h + evaluate_bots.cc + evaluate_bots.h + expected_returns.cc + expected_returns.h + external_sampling_mccfr.cc + external_sampling_mccfr.h + fsicfr.cc + fsicfr.h + get_all_histories.cc + get_all_histories.h + get_all_infostates.cc + get_all_infostates.h + get_all_states.cc + get_all_states.h + get_legal_actions_map.cc + get_legal_actions_map.h + history_tree.cc + history_tree.h + infostate_tree.h + infostate_tree.cc + is_mcts.cc + is_mcts.h + matrix_game_utils.cc + matrix_game_utils.h + nfg_writer.cc + nfg_writer.h + mcts.cc + mcts.h + minimax.cc + minimax.h + observation_history.h + observation_history.cc + oos.h + oos.cc + outcome_sampling_mccfr.cc + outcome_sampling_mccfr.h + policy_iteration.cc + policy_iteration.h + state_distribution.cc + state_distribution.h + tabular_best_response_mdp.cc + tabular_best_response_mdp.h + tabular_exploitability.cc + tabular_exploitability.h + tabular_q_learning.cc + tabular_q_learning.h + tabular_sarsa.cc + tabular_sarsa.h + tensor_game_utils.cc + tensor_game_utils.h + trajectories.cc + trajectories.h + value_iteration.cc + value_iteration.h +) +target_include_directories (algorithms PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +if (${OPEN_SPIEL_BUILD_WITH_ORTOOLS}) + add_subdirectory (ortools) +endif() + +add_executable(best_response_test best_response_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(best_response_test best_response_test) + +add_executable(cfr_test cfr_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(cfr_test cfr_test) + +add_executable(cfr_br_test cfr_br_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(cfr_br_test cfr_br_test) + +add_executable(corr_dist_test corr_dist_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(corr_dist_test corr_dist_test) + +add_executable(corr_dev_builder_test corr_dev_builder_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(corr_dev_builder_test corr_dev_builder_test) + +add_executable(deterministic_policy_test deterministic_policy_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(deterministic_policy_test deterministic_policy_test) + +add_executable(evaluate_bots_test evaluate_bots_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(evaluate_bots_test evaluate_bots_test) + +add_executable(external_sampling_mccfr_test external_sampling_mccfr_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(external_sampling_mccfr_test external_sampling_mccfr_test) + +add_executable(get_all_histories_test get_all_histories_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(get_all_histories_test get_all_histories_test) + +add_executable(get_all_states_test get_all_states_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(get_all_states_test get_all_states_test) + +add_executable(get_legal_actions_map_test get_legal_actions_map_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(get_legal_actions_map_test get_legal_actions_map_test) + +add_executable(history_tree_test history_tree_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(history_tree_test history_tree_test) + +add_executable(infostate_tree_test infostate_tree_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(infostate_tree_test infostate_tree_test) + +add_executable(is_mcts_test is_mcts_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(is_mcts_test is_mcts_test) + +add_executable(matrix_game_utils_test matrix_game_utils_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(matrix_game_utils_test matrix_game_utils_test) + +add_executable(minimax_test minimax_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(minimax_test minimax_test) + +add_executable(observation_history_test observation_history_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(observation_history_test observation_history_test) + +add_executable(oos_test oos_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(oos_test oos_test) + +add_executable(outcome_sampling_mccfr_test outcome_sampling_mccfr_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(outcome_sampling_mccfr_test outcome_sampling_mccfr_test) + +add_executable(state_distribution_test state_distribution_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(state_distribution_test state_distribution_test) + +add_executable(tabular_best_response_mdp_test tabular_best_response_mdp_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(tabular_best_response_mdp_test tabular_best_response_mdp_test) + +add_executable(tabular_exploitability_test tabular_exploitability_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(tabular_exploitability_test tabular_exploitability_test) + +add_executable(tabular_sarsa_test tabular_sarsa_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(tabular_sarsa_test tabular_sarsa_test) + +add_executable(tabular_q_learning_test tabular_q_learning_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(tabular_q_learning_test tabular_q_learning_test) + +add_executable(tensor_game_utils_test tensor_game_utils_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(tensor_game_utils_test tensor_game_utils_test) + +add_executable(trajectories_test trajectories_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(trajectories_test trajectories_test) + +add_subdirectory (alpha_zero_torch) +add_subdirectory (dqn_torch) diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/CMakeLists.txt new file mode 100644 index 0000000..6e7fac7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/CMakeLists.txt @@ -0,0 +1,32 @@ +# To enable C++ Torch AlphaZero, you will need to set OPEN_SPIEL_BUILD_WITH_LIBTORCH. +if (OPEN_SPIEL_BUILD_WITH_LIBTORCH) + if(NOT OPEN_SPIEL_BUILD_WITH_LIBNOP) + message(FATAL_ERROR + "alpha_zero_torch requires libnop (OPEN_SPIEL_BUILD_WITH_LIBNOP)") + endif() + + add_library (alpha_zero_torch OBJECT + alpha_zero.h + alpha_zero.cc + device_manager.h + model.h + model.cc + vpevaluator.h + vpevaluator.cc + vpnet.h + vpnet.cc + ) + target_include_directories (alpha_zero_torch PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + + add_executable(torch_model_test model_test.cc ${OPEN_SPIEL_OBJECTS} + $ $) + add_test(torch_model_test torch_model_test) + + add_executable(torch_vpnet_test vpnet_test.cc ${OPEN_SPIEL_OBJECTS} + $ $) + add_test(torch_vpnet_test torch_vpnet_test) + + target_link_libraries (alpha_zero_torch ${TORCH_LIBRARIES}) + target_link_libraries (torch_model_test ${TORCH_LIBRARIES}) + target_link_libraries (torch_vpnet_test ${TORCH_LIBRARIES}) +endif () diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/README.md b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/README.md new file mode 100644 index 0000000..b3debe4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/README.md @@ -0,0 +1,82 @@ +# C++ LibTorch-based AlphaZero + +This is a C++ implementation of the AlphaZero algorithm based on LibTorch, +similar to the C++ TF-based AlphaZero. + +To build and use this implementation, you must set the optional global variables +`OPEN_SPIEL_BUILD_WITH_LIBTORCH` and `OPEN_SPIEL_BUILD_WITH_LIBNOP` to `ON` when +installing dependencies and building OpenSpiel. + +**Note**: Note: there are currently known problems with the C++ PyTorch: +inteferences with pybind11 versions. Until it is properly fixed, please see +[the workaround described here](https://github.com/deepmind/open_spiel/issues/966#issuecomment-1322982393). + +Then, to get started, see `examples/alpha_zero_torch_example.cc`. + +Important note: this implementation was a user contribution (see +[this PR](https://github.com/deepmind/open_spiel/pull/319)), and is not +regularly tested nor maintained by the core team. This means that, at any time, +it may not build or work as originally intended due to a change that will not +have been caught by our tests. Hence, if bugs occur, please open an issue to let +us know so we can fix them. + +This code was structured in a similar way to the TF-based C++ AlphaZero, using +several of the same components. If you have any questions, feel free to ask the +original author Christian Jans directly by following up on the PR linked above. +The PR also includes some results of experiments run using this implementation +that may be useful. + +## Setting up LibTorch AlphaZero + +1. In [global_variables.sh](../../scripts/global_variables.sh), find the + `OPEN_SPIEL_BUILD_WITH_LIBNOP` variable and set its value to `"ON"`. +2. In [global_variables.sh](../../scripts/global_variables.sh), find the + `OPEN_SPIEL_BUILD_WITH_LIBTORCH` variable and set its value to `"ON"`. +3. In [global_variables.sh](../../scripts/global_variables.sh), find the + `OPEN_SPIEL_BUILD_WITH_LIBTORCH_DOWNLOAD_URL` variable and set its value to + the LibTorch version URL compatible with your OS and hardware (see the + comments in global_variables.sh for the URLs): +4. Download libnop and the specified version of LibTorch by running: + ```bash + $ ./install.sh + ``` +5. Build OpenSpiel to compile LibTorch-dependent and libnop-dependent code + (such as LibTorch AlphaZero). + ```bash + $ ./open_spiel/scripts/build_and_run_tests.sh + ``` + + +**Note:** If you are building from CentOS and/or encounter missing symbol errors +(e.g. undefined reference to `memcpy@GLIBC_2.14`, `lgamma@GLIBC_2.23`, etc.), +see solution steps described in +[this issue](https://github.com/deepmind/open_spiel/issues/619#issuecomment-854126238). + +## Starting LibTorch AlphaZero Training + +Starting training from scratch can be done by running +`alpha_zero_torch_example`: +```sh +$ ./build/examples/alpha_zero_torch_example --game=tic_tac_toe --path=/home/me/az_example/ +``` +Run with the `--help` flag to see a complete list of flags and a brief +description of each. + +## Resuming LibTorch AlphaZero Training + +Training can be resumed from the most recent checkpoint by providing the path to +the `config.json` (which is created during the initial training run) as a +positional argument: +```sh +$ ./build/examples/alpha_zero_torch_example /home/me/az_example/config.json +``` + +## Playing a Trained LibTorch AlphaZero + +A trained LibTorch AlphaZero can be played by running +`alpha_zero_torch_game_example`: +```sh +$ ./build/examples/alpha_zero_torch_game_example --game=tic_tac_toe --player1=az --player2=mcts --az_path=/home/me/az_example/ --az_checkpoint=-1 +``` +Run with the `--help` flag to see a complete list of flags and a brief +description of each. diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/alpha_zero.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/alpha_zero.cc new file mode 100644 index 0000000..978b576 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/alpha_zero.cc @@ -0,0 +1,642 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/alpha_zero_torch/alpha_zero.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/random/uniform_real_distribution.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/abseil-cpp/absl/synchronization/mutex.h" +#include "open_spiel/abseil-cpp/absl/time/clock.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/algorithms/alpha_zero_torch/device_manager.h" +#include "open_spiel/algorithms/alpha_zero_torch/vpevaluator.h" +#include "open_spiel/algorithms/alpha_zero_torch/vpnet.h" +#include "open_spiel/algorithms/mcts.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/circular_buffer.h" +#include "open_spiel/utils/data_logger.h" +#include "open_spiel/utils/file.h" +#include "open_spiel/utils/json.h" +#include "open_spiel/utils/logger.h" +#include "open_spiel/utils/lru_cache.h" +#include "open_spiel/utils/serializable_circular_buffer.h" +#include "open_spiel/utils/stats.h" +#include "open_spiel/utils/thread.h" +#include "open_spiel/utils/threaded_queue.h" + +namespace open_spiel { +namespace algorithms { +namespace torch_az { + +struct StartInfo { + absl::Time start_time; + int start_step; + int model_checkpoint_step; + int64_t total_trajectories; +}; + +StartInfo StartInfoFromLearnerJson(const std::string& path) { + StartInfo start_info; + file::File learner_file(path + "/learner.jsonl", "r"); + std::vector learner_lines = + absl::StrSplit(learner_file.ReadContents(), '\n'); + std::string last_learner_line; + + // Get the last non-empty line in learner.jsonl. + for (int i = learner_lines.size() - 1; i >= 0; i--) { + if (!learner_lines[i].empty()) { + last_learner_line = learner_lines[i]; + break; + } + } + + json::Object last_learner_json = json::FromString( + last_learner_line).value().GetObject(); + + start_info.start_time = absl::Now() - absl::Seconds( + last_learner_json["time_rel"].GetDouble()); + start_info.start_step = last_learner_json["step"].GetInt() + 1; + start_info.model_checkpoint_step = VPNetModel::kMostRecentCheckpointStep; + start_info.total_trajectories = + last_learner_json["total_trajectories"].GetInt(); + + return start_info; +} + +struct Trajectory { + struct State { + std::vector observation; + open_spiel::Player current_player; + std::vector legal_actions; + open_spiel::Action action; + open_spiel::ActionsAndProbs policy; + double value; + }; + + std::vector states; + std::vector returns; +}; + +Trajectory PlayGame(Logger* logger, int game_num, const open_spiel::Game& game, + std::vector>* bots, + std::mt19937* rng, double temperature, int temperature_drop, + double cutoff_value, bool verbose = false) { + std::unique_ptr state = game.NewInitialState(); + std::vector history; + Trajectory trajectory; + + while (true) { + if (state->IsChanceNode()) { + open_spiel::ActionsAndProbs outcomes = state->ChanceOutcomes(); + open_spiel::Action action = + open_spiel::SampleAction(outcomes, *rng).first; + history.push_back(state->ActionToString(state->CurrentPlayer(), action)); + state->ApplyAction(action); + } else { + open_spiel::Player player = state->CurrentPlayer(); + std::unique_ptr root = (*bots)[player]->MCTSearch(*state); + open_spiel::ActionsAndProbs policy; + policy.reserve(root->children.size()); + for (const SearchNode& c : root->children) { + policy.emplace_back(c.action, + std::pow(c.explore_count, 1.0 / temperature)); + } + NormalizePolicy(&policy); + open_spiel::Action action; + if (history.size() >= temperature_drop) { + action = root->BestChild().action; + } else { + action = open_spiel::SampleAction(policy, *rng).first; + } + + double root_value = root->total_reward / root->explore_count; + trajectory.states.push_back(Trajectory::State{ + state->ObservationTensor(), player, state->LegalActions(), action, + std::move(policy), root_value}); + std::string action_str = state->ActionToString(player, action); + history.push_back(action_str); + state->ApplyAction(action); + if (verbose) { + logger->Print("Player: %d, action: %s", player, action_str); + } + if (state->IsTerminal()) { + trajectory.returns = state->Returns(); + break; + } else if (std::abs(root_value) > cutoff_value) { + trajectory.returns.resize(2); + trajectory.returns[player] = root_value; + trajectory.returns[1 - player] = -root_value; + break; + } + } + } + + logger->Print("Game %d: Returns: %s; Actions: %s", game_num, + absl::StrJoin(trajectory.returns, " "), + absl::StrJoin(history, " ")); + return trajectory; +} + +std::unique_ptr InitAZBot(const AlphaZeroConfig& config, + const open_spiel::Game& game, + std::shared_ptr evaluator, + bool evaluation) { + return std::make_unique( + game, std::move(evaluator), config.uct_c, config.max_simulations, + /*max_memory_mb=*/10, + /*solve=*/false, + /*seed=*/0, + /*verbose=*/false, ChildSelectionPolicy::PUCT, + evaluation ? 0 : config.policy_alpha, + evaluation ? 0 : config.policy_epsilon, + /*dont_return_chance_node*/ true); +} + +// An actor thread runner that generates games and returns trajectories. +void actor(const open_spiel::Game& game, const AlphaZeroConfig& config, int num, + ThreadedQueue* trajectory_queue, + std::shared_ptr vp_eval, StopToken* stop) { + std::unique_ptr logger; + if (num < 20) { // Limit the number of open files. + logger.reset(new FileLogger(config.path, absl::StrCat("actor-", num))); + } else { + logger.reset(new NoopLogger()); + } + std::mt19937 rng(absl::ToUnixNanos(absl::Now())); + absl::uniform_real_distribution dist(0.0, 1.0); + std::vector> bots; + bots.reserve(2); + for (int player = 0; player < 2; player++) { + bots.push_back(InitAZBot(config, game, vp_eval, false)); + } + for (int game_num = 1; !stop->StopRequested(); ++game_num) { + double cutoff = + (dist(rng) < config.cutoff_probability ? config.cutoff_value + : game.MaxUtility() + 1); + if (!trajectory_queue->Push( + PlayGame(logger.get(), game_num, game, &bots, &rng, + config.temperature, config.temperature_drop, cutoff), + absl::Seconds(10))) { + logger->Print("Failed to push a trajectory after 10 seconds."); + } + } + logger->Print("Got a quit."); +} + +class EvalResults { + public: + explicit EvalResults(int count, int evaluation_window) { + results_.reserve(count); + for (int i = 0; i < count; ++i) { + results_.emplace_back(evaluation_window); + } + } + + // How many evals per difficulty. + int EvalCount() { + absl::MutexLock lock(&m_); + return eval_num_ / results_.size(); + } + + // Which eval to do next: difficulty, player0. + std::pair Next() { + absl::MutexLock lock(&m_); + int next = eval_num_ % (results_.size() * 2); + eval_num_ += 1; + return {next / 2, next % 2}; + } + + void Add(int i, double value) { + absl::MutexLock lock(&m_); + results_[i].Add(value); + } + + std::vector AvgResults() { + absl::MutexLock lock(&m_); + std::vector out; + out.reserve(results_.size()); + for (const auto& result : results_) { + out.push_back(result.Empty() ? 0 + : (absl::c_accumulate(result.Data(), 0.0) / + result.Size())); + } + return out; + } + + private: + std::vector> results_; + int eval_num_ = 0; + absl::Mutex m_; +}; + +// A thread that plays vs standard MCTS. +void evaluator(const open_spiel::Game& game, const AlphaZeroConfig& config, + int num, EvalResults* results, + std::shared_ptr vp_eval, StopToken* stop) { + FileLogger logger(config.path, absl::StrCat("evaluator-", num)); + std::mt19937 rng; + auto rand_evaluator = std::make_shared(1, num); + + for (int game_num = 1; !stop->StopRequested(); ++game_num) { + auto [difficulty, first] = results->Next(); + int az_player = first ? 0 : 1; + int rand_max_simulations = + config.max_simulations * std::pow(10, difficulty / 2.0); + std::vector> bots; + bots.reserve(2); + bots.push_back(InitAZBot(config, game, vp_eval, true)); + bots.push_back(std::make_unique( + game, rand_evaluator, config.uct_c, rand_max_simulations, + /*max_memory_mb=*/1000, + /*solve=*/true, + /*seed=*/num * 1000 + game_num, + /*verbose=*/false, ChildSelectionPolicy::UCT, + /*dirichlet_alpha=*/0, + /*dirichlet_epsilon=*/0, + /*dont_return_chance_node=*/true)); + if (az_player == 1) { + std::swap(bots[0], bots[1]); + } + + logger.Print("Running MCTS with %d simulations", rand_max_simulations); + Trajectory trajectory = PlayGame( + &logger, game_num, game, &bots, &rng, /*temperature=*/1, + /*temperature_drop=*/0, /*cutoff_value=*/game.MaxUtility() + 1); + + results->Add(difficulty, trajectory.returns[az_player]); + logger.Print("Game %d: AZ: %5.2f, MCTS: %5.2f, MCTS-sims: %d, length: %d", + game_num, trajectory.returns[az_player], + trajectory.returns[1 - az_player], rand_max_simulations, + trajectory.states.size()); + } + logger.Print("Got a quit."); +} + +void learner(const open_spiel::Game& game, const AlphaZeroConfig& config, + DeviceManager* device_manager, + std::shared_ptr eval, + ThreadedQueue* trajectory_queue, + EvalResults* eval_results, StopToken* stop, + const StartInfo& start_info) { + FileLogger logger(config.path, "learner", "a"); + DataLoggerJsonLines data_logger( + config.path, "learner", true, "a", start_info.start_time); + std::mt19937 rng; + + int device_id = 0; // Do not change, the first device is the learner. + logger.Print("Running the learner on device %d: %s", device_id, + device_manager->Get(0, device_id)->Device()); + + SerializableCircularBuffer replay_buffer( + config.replay_buffer_size); + if (start_info.start_step > 1) { + replay_buffer.LoadBuffer(config.path + "/replay_buffer.data"); + } + int learn_rate = config.replay_buffer_size / config.replay_buffer_reuse; + int64_t total_trajectories = start_info.total_trajectories; + + const int stage_count = 7; + std::vector value_accuracies(stage_count); + std::vector value_predictions(stage_count); + open_spiel::BasicStats game_lengths; + open_spiel::HistogramNumbered game_lengths_hist(game.MaxGameLength() + 1); + + open_spiel::HistogramNamed outcomes({"Player1", "Player2", "Draw"}); + // Actor threads have likely been contributing for a while, so put `last` in + // the past to avoid a giant spike on the first step. + absl::Time last = absl::Now() - absl::Seconds(60); + for (int step = start_info.start_step; + !stop->StopRequested() && + (config.max_steps == 0 || step <= config.max_steps); + ++step) { + outcomes.Reset(); + game_lengths.Reset(); + game_lengths_hist.Reset(); + for (auto& value_accuracy : value_accuracies) { + value_accuracy.Reset(); + } + for (auto& value_prediction : value_predictions) { + value_prediction.Reset(); + } + + // Collect trajectories + int queue_size = trajectory_queue->Size(); + int num_states = 0; + int num_trajectories = 0; + while (!stop->StopRequested() && num_states < learn_rate) { + absl::optional trajectory = trajectory_queue->Pop(); + if (trajectory) { + num_trajectories += 1; + total_trajectories += 1; + game_lengths.Add(trajectory->states.size()); + game_lengths_hist.Add(trajectory->states.size()); + + double p1_outcome = trajectory->returns[0]; + outcomes.Add(p1_outcome > 0 ? 0 : (p1_outcome < 0 ? 1 : 2)); + + for (const Trajectory::State& state : trajectory->states) { + replay_buffer.Add(VPNetModel::TrainInputs{state.legal_actions, + state.observation, + state.policy, p1_outcome}); + num_states += 1; + } + + for (int stage = 0; stage < stage_count; ++stage) { + // Scale for the length of the game + int index = (trajectory->states.size() - 1) * + static_cast(stage) / (stage_count - 1); + const Trajectory::State& s = trajectory->states[index]; + value_accuracies[stage].Add( + (s.value >= 0) == (trajectory->returns[s.current_player] >= 0)); + value_predictions[stage].Add(abs(s.value)); + } + } + } + absl::Time now = absl::Now(); + double seconds = absl::ToDoubleSeconds(now - last); + + logger.Print("Step: %d", step); + logger.Print( + "Collected %5d states from %3d games, %.1f states/s; " + "%.1f states/(s*actor), game length: %.1f", + num_states, num_trajectories, num_states / seconds, + num_states / (config.actors * seconds), + static_cast(num_states) / num_trajectories); + logger.Print("Queue size: %d. Buffer size: %d. States seen: %d", queue_size, + replay_buffer.Size(), replay_buffer.TotalAdded()); + + if (stop->StopRequested()) { + break; + } + + last = now; + + replay_buffer.SaveBuffer(config.path + "/replay_buffer.data"); + + VPNetModel::LossInfo losses; + { // Extra scope to return the device for use for inference asap. + DeviceManager::DeviceLoan learn_model = + device_manager->Get(config.train_batch_size, device_id); + + // Let the device manager know that the first device is now + // off-limits for inference and should only be used for learning + // (if config.explicit_learning == true). + device_manager->SetLearning(config.explicit_learning); + + // Learn from them. + for (int i = 0; i < replay_buffer.Size() / config.train_batch_size; i++) { + losses += learn_model->Learn( + replay_buffer.Sample(&rng, config.train_batch_size)); + } + + // The device manager can now once again use the first device for + // inference (if it could not before). + device_manager->SetLearning(false); + } + + // Always save a checkpoint, either for keeping or for loading the weights + // to the other sessions. It only allows numbers, so use -1 as "latest". + std::string checkpoint_path = device_manager->Get(0, device_id) + ->SaveCheckpoint(VPNetModel::kMostRecentCheckpointStep); + if (step % config.checkpoint_freq == 0) { + device_manager->Get(0, device_id)->SaveCheckpoint(step); + } + if (device_manager->Count() > 0) { + for (int i = 0; i < device_manager->Count(); ++i) { + if (i != device_id) { + device_manager->Get(0, i)->LoadCheckpoint(checkpoint_path); + } + } + } + logger.Print("Checkpoint saved: %s", checkpoint_path); + + DataLogger::Record record = { + {"step", step}, + {"total_states", replay_buffer.TotalAdded()}, + {"states_per_s", num_states / seconds}, + {"states_per_s_actor", num_states / (config.actors * seconds)}, + {"total_trajectories", total_trajectories}, + {"trajectories_per_s", num_trajectories / seconds}, + {"queue_size", queue_size}, + {"game_length", game_lengths.ToJson()}, + {"game_length_hist", game_lengths_hist.ToJson()}, + {"outcomes", outcomes.ToJson()}, + {"value_accuracy", + json::TransformToArray(value_accuracies, + [](auto v) { return v.ToJson(); })}, + {"value_prediction", + json::TransformToArray(value_predictions, + [](auto v) { return v.ToJson(); })}, + {"eval", json::Object({ + {"count", eval_results->EvalCount()}, + {"results", json::CastToArray(eval_results->AvgResults())}, + })}, + {"batch_size", eval->BatchSizeStats().ToJson()}, + {"batch_size_hist", eval->BatchSizeHistogram().ToJson()}, + {"loss", json::Object({ + {"policy", losses.Policy()}, + {"value", losses.Value()}, + {"l2reg", losses.L2()}, + {"sum", losses.Total()}, + })}, + }; + eval->ResetBatchSizeStats(); + logger.Print("Losses: policy: %.4f, value: %.4f, l2: %.4f, sum: %.4f", + losses.Policy(), losses.Value(), losses.L2(), losses.Total()); + + LRUCacheInfo cache_info = eval->CacheInfo(); + if (cache_info.size > 0) { + logger.Print(absl::StrFormat( + "Cache size: %d/%d: %.1f%%, hits: %d, misses: %d, hit rate: %.3f%%", + cache_info.size, cache_info.max_size, 100.0 * cache_info.Usage(), + cache_info.hits, cache_info.misses, 100.0 * cache_info.HitRate())); + eval->ClearCache(); + } + record.emplace("cache", + json::Object({ + {"size", cache_info.size}, + {"max_size", cache_info.max_size}, + {"usage", cache_info.Usage()}, + {"requests", cache_info.Total()}, + {"requests_per_s", cache_info.Total() / seconds}, + {"hits", cache_info.hits}, + {"misses", cache_info.misses}, + {"misses_per_s", cache_info.misses / seconds}, + {"hit_rate", cache_info.HitRate()}, + })); + + data_logger.Write(record); + logger.Print(""); + } +} + +bool AlphaZero(AlphaZeroConfig config, StopToken* stop, bool resuming) { + std::shared_ptr game = + open_spiel::LoadGame(config.game); + + open_spiel::GameType game_type = game->GetType(); + if (game->NumPlayers() != 2) + open_spiel::SpielFatalError("AlphaZero can only handle 2-player games."); + if (game_type.reward_model != open_spiel::GameType::RewardModel::kTerminal) + open_spiel::SpielFatalError("Game must have terminal rewards."); + if (game_type.dynamics != open_spiel::GameType::Dynamics::kSequential) + open_spiel::SpielFatalError("Game must have sequential turns."); + + file::Mkdirs(config.path); + if (!file::IsDirectory(config.path)) { + std::cerr << config.path << " is not a directory." << std::endl; + return false; + } + + std::cout << "Logging directory: " << config.path << std::endl; + + if (config.graph_def.empty()) { + config.graph_def = "vpnet.pb"; + std::string model_path = absl::StrCat(config.path, "/", config.graph_def); + if (file::Exists(model_path)) { + std::cout << "Overwriting existing model: " << model_path << std::endl; + } else { + std::cout << "Creating model: " << model_path << std::endl; + } + SPIEL_CHECK_TRUE(CreateGraphDef( + *game, config.learning_rate, config.weight_decay, config.path, + config.graph_def, config.nn_model, config.nn_width, config.nn_depth)); + } else { + std::string model_path = absl::StrCat(config.path, "/", config.graph_def); + if (file::Exists(model_path)) { + std::cout << "Using existing model: " << model_path << std::endl; + } else { + std::cout << "Model not found: " << model_path << std::endl; + } + } + + std::cout << "Playing game: " << config.game << std::endl; + + config.inference_batch_size = std::max( + 1, + std::min(config.inference_batch_size, config.actors + config.evaluators)); + + config.inference_threads = + std::max(1, std::min(config.inference_threads, + (1 + config.actors + config.evaluators) / 2)); + + { + file::File fd(config.path + "/config.json", "w"); + fd.Write(json::ToString(config.ToJson(), true) + "\n"); + } + + StartInfo start_info = {/*start_time=*/absl::Now(), + /*start_step=*/1, + /*model_checkpoint_step=*/0, + /*total_trajectories=*/0}; + if (resuming) { + start_info = StartInfoFromLearnerJson(config.path); + } + + DeviceManager device_manager; + for (const absl::string_view& device : absl::StrSplit(config.devices, ',')) { + device_manager.AddDevice( + VPNetModel(*game, config.path, config.graph_def, std::string(device))); + } + + if (device_manager.Count() == 0) { + std::cerr << "No devices specified?" << std::endl; + return false; + } + + // The explicit_learning option should only be used when multiple + // devices are available (so that inference can continue while + // also undergoing learning). + if (device_manager.Count() <= 1 && config.explicit_learning) { + std::cerr << "Explicit learning can only be used with multiple devices." + << std::endl; + return false; + } + + std::cerr << "Loading model from step " << start_info.model_checkpoint_step + << std::endl; + { // Make sure they're all in sync. + if (!resuming) { + device_manager.Get(0)->SaveCheckpoint(start_info.model_checkpoint_step); + } + for (int i = 0; i < device_manager.Count(); ++i) { + device_manager.Get(0, i)->LoadCheckpoint( + start_info.model_checkpoint_step); + } + } + + auto eval = std::make_shared( + &device_manager, config.inference_batch_size, config.inference_threads, + config.inference_cache, (config.actors + config.evaluators) / 16); + + ThreadedQueue trajectory_queue(config.replay_buffer_size / + config.replay_buffer_reuse); + + EvalResults eval_results(config.eval_levels, config.evaluation_window); + + std::vector actors; + actors.reserve(config.actors); + for (int i = 0; i < config.actors; ++i) { + actors.emplace_back( + [&, i]() { actor(*game, config, i, &trajectory_queue, eval, stop); }); + } + std::vector evaluators; + evaluators.reserve(config.evaluators); + for (int i = 0; i < config.evaluators; ++i) { + evaluators.emplace_back( + [&, i]() { evaluator(*game, config, i, &eval_results, eval, stop); }); + } + learner(*game, config, &device_manager, eval, &trajectory_queue, + &eval_results, stop, start_info); + + if (!stop->StopRequested()) { + stop->Stop(); + } + + // Empty the queue so that the actors can exit. + trajectory_queue.BlockNewValues(); + trajectory_queue.Clear(); + + std::cout << "Joining all the threads." << std::endl; + for (auto& t : actors) { + t.join(); + } + for (auto& t : evaluators) { + t.join(); + } + std::cout << "Exiting cleanly." << std::endl; + return true; +} + +} // namespace torch_az +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/alpha_zero.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/alpha_zero.h new file mode 100644 index 0000000..3566f0a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/alpha_zero.h @@ -0,0 +1,140 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_ALPHA_ZERO_TORCH_ALPHA_ZERO_H_ +#define OPEN_SPIEL_ALGORITHMS_ALPHA_ZERO_TORCH_ALPHA_ZERO_H_ + +#include +#include +#include + +#include "open_spiel/utils/file.h" +#include "open_spiel/utils/json.h" +#include "open_spiel/utils/thread.h" + +namespace open_spiel { +namespace algorithms { +namespace torch_az { + +struct AlphaZeroConfig { + std::string game; + std::string path; + std::string graph_def; + std::string nn_model; + int nn_width; + int nn_depth; + std::string devices; + + bool explicit_learning; + double learning_rate; + double weight_decay; + int train_batch_size; + int inference_batch_size; + int inference_threads; + int inference_cache; + int replay_buffer_size; + int replay_buffer_reuse; + int checkpoint_freq; + int evaluation_window; + + double uct_c; + int max_simulations; + double policy_alpha; + double policy_epsilon; + double temperature; + double temperature_drop; + double cutoff_probability; + double cutoff_value; + + int actors; + int evaluators; + int eval_levels; + int max_steps; + + json::Object ToJson() const { + return json::Object({ + {"game", game}, + {"path", path}, + {"graph_def", graph_def}, + {"nn_model", nn_model}, + {"nn_width", nn_width}, + {"nn_depth", nn_depth}, + {"devices", devices}, + {"explicit_learning", explicit_learning}, + {"learning_rate", learning_rate}, + {"weight_decay", weight_decay}, + {"train_batch_size", train_batch_size}, + {"inference_batch_size", inference_batch_size}, + {"inference_threads", inference_threads}, + {"inference_cache", inference_cache}, + {"replay_buffer_size", replay_buffer_size}, + {"replay_buffer_reuse", replay_buffer_reuse}, + {"checkpoint_freq", checkpoint_freq}, + {"evaluation_window", evaluation_window}, + {"uct_c", uct_c}, + {"max_simulations", max_simulations}, + {"policy_alpha", policy_alpha}, + {"policy_epsilon", policy_epsilon}, + {"temperature", temperature}, + {"temperature_drop", temperature_drop}, + {"cutoff_probability", cutoff_probability}, + {"cutoff_value", cutoff_value}, + {"actors", actors}, + {"evaluators", evaluators}, + {"eval_levels", eval_levels}, + {"max_steps", max_steps}, + }); + } + + void FromJson(const json::Object& config_json) { + game = config_json.at("game").GetString(); + path = config_json.at("path").GetString(); + graph_def = config_json.at("graph_def").GetString(); + nn_model = config_json.at("nn_model").GetString(); + nn_width = config_json.at("nn_width").GetInt(); + nn_depth = config_json.at("nn_depth").GetInt(); + devices = config_json.at("devices").GetString(); + explicit_learning = config_json.at("explicit_learning").GetBool(); + learning_rate = config_json.at("learning_rate").GetDouble(); + weight_decay = config_json.at("weight_decay").GetDouble(); + train_batch_size = config_json.at("train_batch_size").GetInt(); + inference_batch_size = config_json.at("inference_batch_size").GetInt(); + inference_threads = config_json.at("inference_threads").GetInt(); + inference_cache = config_json.at("inference_cache").GetInt(); + replay_buffer_size = config_json.at("replay_buffer_size").GetInt(); + replay_buffer_reuse = config_json.at("replay_buffer_reuse").GetInt(); + checkpoint_freq = config_json.at("checkpoint_freq").GetInt(); + evaluation_window = config_json.at("evaluation_window").GetInt(); + uct_c = config_json.at("uct_c").GetDouble(); + max_simulations = config_json.at("max_simulations").GetInt(); + policy_alpha = config_json.at("policy_alpha").GetDouble(); + policy_epsilon = config_json.at("policy_epsilon").GetDouble(); + temperature = config_json.at("temperature").GetDouble(); + temperature_drop = config_json.at("temperature_drop").GetDouble(); + cutoff_probability = config_json.at("cutoff_probability").GetDouble(); + cutoff_value = config_json.at("cutoff_value").GetDouble(); + actors = config_json.at("actors").GetInt(); + evaluators = config_json.at("evaluators").GetInt(); + eval_levels = config_json.at("eval_levels").GetInt(); + max_steps = config_json.at("max_steps").GetInt(); + } +}; + +bool AlphaZero(AlphaZeroConfig config, StopToken* stop, bool resuming); + +} // namespace torch_az +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_ALPHA_ZERO_TORCH_ALPHA_ZERO_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/device_manager.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/device_manager.h new file mode 100644 index 0000000..d4c1a5d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/device_manager.h @@ -0,0 +1,116 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_ALPHA_ZERO_TORCH_DEVICE_MANAGER_H_ +#define OPEN_SPIEL_ALGORITHMS_ALPHA_ZERO_TORCH_DEVICE_MANAGER_H_ + +#include + +#include "open_spiel/abseil-cpp/absl/synchronization/mutex.h" +#include "open_spiel/algorithms/alpha_zero_torch/vpnet.h" + +namespace open_spiel { +namespace algorithms { +namespace torch_az { + +// Keeps track of a bunch of VPNet models, intended to be one per device, and +// gives them out based on usage. When you request a device you specify how much +// work you're going to give it, which is assumed done once the loan is +// returned. +class DeviceManager { + public: + DeviceManager() { + learning_ = false; + multiple_devices_ = false; + } + + void AddDevice(VPNetModel model) { // Not thread safe. + devices.emplace_back(Device{std::move(model)}); + multiple_devices_ = devices.size() > 1; + } + + // Acts as a pointer to the model, but lets the manager know when you're done. + class DeviceLoan { + public: + // DeviceLoan is not public constructible and is move only. + DeviceLoan(DeviceLoan&& other) = default; + DeviceLoan& operator=(DeviceLoan&& other) = default; + DeviceLoan(const DeviceLoan&) = delete; + DeviceLoan& operator=(const DeviceLoan&) = delete; + + ~DeviceLoan() { manager_->Return(device_id_, requests_); } + VPNetModel* operator->() { return model_; } + + private: + DeviceLoan(DeviceManager* manager, VPNetModel* model, int device_id, + int requests) + : manager_(manager), + model_(model), + device_id_(device_id), + requests_(requests) {} + DeviceManager* manager_; + VPNetModel* model_; + int device_id_; + int requests_; + friend DeviceManager; + }; + + // Gives the device with the fewest outstanding requests. + DeviceLoan Get(int requests, int device_id = -1) { + absl::MutexLock lock(&m_); + if (device_id < 0) { + // The starting device changes depending on if we are allowed to + // use the first device or not. + device_id = 0 + (learning_ && multiple_devices_); + for (int i = 1 + (learning_ && multiple_devices_); i < devices.size(); + ++i) { + if (devices[i].requests < devices[device_id].requests) { + device_id = i; + } + } + } + devices[device_id].requests += requests; + return DeviceLoan(this, &devices[device_id].model, device_id, requests); + } + + // A member to ensure that when device:0 is learning and there are + // multiple devices available, that device:0 does not take on any + // inference requests from the actors and evaluators. These inference + // requests should be dealt with by the other available devices. + void SetLearning(bool value) { learning_ = value; } + + int Count() const { return devices.size(); } + + private: + void Return(int device_id, int requests) { + absl::MutexLock lock(&m_); + devices[device_id].requests -= requests; + } + + struct Device { + VPNetModel model; + int requests = 0; + }; + + bool learning_; + bool multiple_devices_; + std::vector devices; + absl::Mutex m_; +}; + +} // namespace torch_az +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_ALPHA_ZERO_TORCH_DEVICE_MANAGER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/model.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/model.cc new file mode 100644 index 0000000..39b0ed9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/model.cc @@ -0,0 +1,405 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/alpha_zero_torch/model.h" + +#include + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/match.h" + +namespace open_spiel { +namespace algorithms { +namespace torch_az { + +std::istream& operator>>(std::istream& stream, ModelConfig& config) { + int channels; + int height; + int width; + + stream >> channels >> height >> width >> config.number_of_actions >> + config.nn_depth >> config.nn_width >> config.learning_rate >> + config.weight_decay >> config.nn_model; + + config.observation_tensor_shape = {channels, height, width}; + + return stream; +} + +std::ostream& operator<<(std::ostream& stream, const ModelConfig& config) { + int shape_dim = config.observation_tensor_shape.size(); + int height = shape_dim > 1 ? config.observation_tensor_shape[1] : 1; + int width = shape_dim > 2 ? config.observation_tensor_shape[2] : 1; + + stream << config.observation_tensor_shape[0] << " " << height << " " << width + << " " << config.number_of_actions << " " << config.nn_depth << " " + << config.nn_width << " " << config.learning_rate << " " + << config.weight_decay << " " << config.nn_model; + return stream; +} + +ResInputBlockImpl::ResInputBlockImpl(const ResInputBlockConfig& config) + : conv_(torch::nn::Conv2dOptions( + /*input_channels=*/config.input_channels, + /*output_channels=*/config.filters, + /*kernel_size=*/config.kernel_size) + .stride(1) + .padding(config.padding) + .dilation(1) + .groups(1) + .bias(true) + .padding_mode(torch::kZeros)), + batch_norm_(torch::nn::BatchNorm2dOptions( + /*num_features=*/config.filters) + .eps(0.001) // Make it the same as TF. + .momentum(0.01) // Torch momentum = 1 - TF momentum. + .affine(true) + .track_running_stats(true)) { + channels_ = config.input_channels; + height_ = config.input_height; + width_ = config.input_width; + + register_module("input_conv", conv_); + register_module("input_batch_norm", batch_norm_); +} + +torch::Tensor ResInputBlockImpl::forward(torch::Tensor x) { + torch::Tensor output = x.view({-1, channels_, height_, width_}); + output = torch::relu(batch_norm_(conv_(output))); + + return output; +} + +ResTorsoBlockImpl::ResTorsoBlockImpl(const ResTorsoBlockConfig& config, + int layer) + : conv1_(torch::nn::Conv2dOptions( + /*input_channels=*/config.input_channels, + /*output_channels=*/config.filters, + /*kernel_size=*/config.kernel_size) + .stride(1) + .padding(config.padding) + .dilation(1) + .groups(1) + .bias(true) + .padding_mode(torch::kZeros)), + conv2_(torch::nn::Conv2dOptions( + /*input_channels=*/config.filters, + /*output_channels=*/config.filters, + /*kernel_size=*/config.kernel_size) + .stride(1) + .padding(config.padding) + .dilation(1) + .groups(1) + .bias(true) + .padding_mode(torch::kZeros)), + batch_norm1_(torch::nn::BatchNorm2dOptions( + /*num_features=*/config.filters) + .eps(0.001) // Make it the same as TF. + .momentum(0.01) // Torch momentum = 1 - TF momentum. + .affine(true) + .track_running_stats(true)), + batch_norm2_(torch::nn::BatchNorm2dOptions( + /*num_features=*/config.filters) + .eps(0.001) // Make it the same as TF. + .momentum(0.01) // Torch momentum = 1 - TF momentum. + .affine(true) + .track_running_stats(true)) { + register_module("res_" + std::to_string(layer) + "_conv_1", conv1_); + register_module("res_" + std::to_string(layer) + "_conv_2", conv2_); + register_module("res_" + std::to_string(layer) + "_batch_norm_1", + batch_norm1_); + register_module("res_" + std::to_string(layer) + "_batch_norm_2", + batch_norm2_); +} + +torch::Tensor ResTorsoBlockImpl::forward(torch::Tensor x) { + torch::Tensor residual = x; + + torch::Tensor output = torch::relu(batch_norm1_(conv1_(x))); + output = batch_norm2_(conv2_(output)); + output += residual; + output = torch::relu(output); + + return output; +} + +ResOutputBlockImpl::ResOutputBlockImpl(const ResOutputBlockConfig& config) + : value_conv_(torch::nn::Conv2dOptions( + /*input_channels=*/config.input_channels, + /*output_channels=*/config.value_filters, + /*kernel_size=*/config.kernel_size) + .stride(1) + .padding(config.padding) + .dilation(1) + .groups(1) + .bias(true) + .padding_mode(torch::kZeros)), + value_batch_norm_( + torch::nn::BatchNorm2dOptions( + /*num_features=*/config.value_filters) + .eps(0.001) // Make it the same as TF. + .momentum(0.01) // Torch momentum = 1 - TF momentum. + .affine(true) + .track_running_stats(true)), + value_linear1_(torch::nn::LinearOptions( + /*in_features=*/config.value_linear_in_features, + /*out_features=*/config.value_linear_out_features) + .bias(true)), + value_linear2_(torch::nn::LinearOptions( + /*in_features=*/config.value_linear_out_features, + /*out_features=*/1) + .bias(true)), + value_observation_size_(config.value_observation_size), + policy_conv_(torch::nn::Conv2dOptions( + /*input_channels=*/config.input_channels, + /*output_channels=*/config.policy_filters, + /*kernel_size=*/config.kernel_size) + .stride(1) + .padding(config.padding) + .dilation(1) + .groups(1) + .bias(true) + .padding_mode(torch::kZeros)), + policy_batch_norm_( + torch::nn::BatchNorm2dOptions( + /*num_features=*/config.policy_filters) + .eps(0.001) // Make it the same as TF. + .momentum(0.01) // Torch momentum = 1 - TF momentum. + .affine(true) + .track_running_stats(true)), + policy_linear_(torch::nn::LinearOptions( + /*in_features=*/config.policy_linear_in_features, + /*out_features=*/config.policy_linear_out_features) + .bias(true)), + policy_observation_size_(config.policy_observation_size) { + register_module("value_conv", value_conv_); + register_module("value_batch_norm", value_batch_norm_); + register_module("value_linear_1", value_linear1_); + register_module("value_linear_2", value_linear2_); + register_module("policy_conv", policy_conv_); + register_module("policy_batch_norm", policy_batch_norm_); + register_module("policy_linear", policy_linear_); +} + +std::vector ResOutputBlockImpl::forward(torch::Tensor x, + torch::Tensor mask) { + torch::Tensor value_output = torch::relu(value_batch_norm_(value_conv_(x))); + value_output = value_output.view({-1, value_observation_size_}); + value_output = torch::relu(value_linear1_(value_output)); + value_output = torch::tanh(value_linear2_(value_output)); + + torch::Tensor policy_logits = + torch::relu(policy_batch_norm_(policy_conv_(x))); + policy_logits = policy_logits.view({-1, policy_observation_size_}); + policy_logits = policy_linear_(policy_logits); + policy_logits = torch::where(mask, policy_logits, + -(1 << 16) * torch::ones_like(policy_logits)); + + return {value_output, policy_logits}; +} + +MLPBlockImpl::MLPBlockImpl(const int in_features, const int out_features) + : linear_(torch::nn::LinearOptions( + /*in_features=*/in_features, + /*out_features=*/out_features) + .bias(true)) { + register_module("linear", linear_); +} + +torch::Tensor MLPBlockImpl::forward(torch::Tensor x) { + return torch::relu(linear_(x)); +} + +MLPOutputBlockImpl::MLPOutputBlockImpl(const int nn_width, + const int policy_linear_out_features) + : value_linear1_(torch::nn::LinearOptions( + /*in_features=*/nn_width, + /*out_features=*/nn_width) + .bias(true)), + value_linear2_(torch::nn::LinearOptions( + /*in_features=*/nn_width, + /*out_features=*/1) + .bias(true)), + policy_linear1_(torch::nn::LinearOptions( + /*input_channels=*/nn_width, + /*output_channels=*/nn_width) + .bias(true)), + policy_linear2_(torch::nn::LinearOptions( + /*in_features=*/nn_width, + /*out_features=*/policy_linear_out_features) + .bias(true)) { + register_module("value_linear_1", value_linear1_); + register_module("value_linear_2", value_linear2_); + register_module("policy_linear_1", policy_linear1_); + register_module("policy_linear_2", policy_linear2_); +} + +std::vector MLPOutputBlockImpl::forward(torch::Tensor x, + torch::Tensor mask) { + torch::Tensor value_output = torch::relu(value_linear1_(x)); + value_output = torch::tanh(value_linear2_(value_output)); + + torch::Tensor policy_logits = torch::relu(policy_linear1_(x)); + policy_logits = policy_linear2_(policy_logits); + policy_logits = torch::where(mask, policy_logits, + -(1 << 16) * torch::ones_like(policy_logits)); + + return {value_output, policy_logits}; +} + +ModelImpl::ModelImpl(const ModelConfig& config, const std::string& device) + : device_(device), + num_torso_blocks_(config.nn_depth), + weight_decay_(config.weight_decay) { + // Save config.nn_model to class + nn_model_ = config.nn_model; + + int input_size = 1; + for (const auto& num : config.observation_tensor_shape) { + if (num > 0) { + input_size *= num; + } + } + // Decide if resnet or MLP + if (config.nn_model == "resnet") { + int obs_dims = config.observation_tensor_shape.size(); + int channels = config.observation_tensor_shape[0]; + int height = obs_dims > 1 ? config.observation_tensor_shape[1] : 1; + int width = obs_dims > 2 ? config.observation_tensor_shape[2] : 1; + + ResInputBlockConfig input_config = {/*input_channels=*/channels, + /*input_height=*/height, + /*input_width=*/width, + /*filters=*/config.nn_width, + /*kernel_size=*/3, + /*padding=*/1}; + + ResTorsoBlockConfig residual_config = {/*input_channels=*/config.nn_width, + /*filters=*/config.nn_width, + /*kernel_size=*/3, + /*padding=*/1}; + + ResOutputBlockConfig output_config = { + /*input_channels=*/config.nn_width, + /*value_filters=*/1, + /*policy_filters=*/2, + /*kernel_size=*/1, + /*padding=*/0, + /*value_linear_in_features=*/1 * width * height, + /*value_linear_out_features=*/config.nn_width, + /*policy_linear_in_features=*/2 * width * height, + /*policy_linear_out_features=*/config.number_of_actions, + /*value_observation_size=*/1 * width * height, + /*policy_observation_size=*/2 * width * height}; + + layers_->push_back(ResInputBlock(input_config)); + for (int i = 0; i < num_torso_blocks_; i++) { + layers_->push_back(ResTorsoBlock(residual_config, i)); + } + layers_->push_back(ResOutputBlock(output_config)); + + register_module("layers", layers_); + + } else if (config.nn_model == "mlp") { + layers_->push_back(MLPBlock(input_size, config.nn_width)); + for (int i = 0; i < num_torso_blocks_; i++) { + layers_->push_back(MLPBlock(config.nn_width, config.nn_width)); + } + layers_->push_back( + MLPOutputBlock(config.nn_width, config.number_of_actions)); + + register_module("layers", layers_); + } else { + throw std::runtime_error("Unknown nn_model: " + config.nn_model); + } +} + +std::vector ModelImpl::forward(torch::Tensor x, + torch::Tensor mask) { + std::vector output = this->forward_(x, mask); + return {output[0], torch::softmax(output[1], 1)}; +} + +std::vector ModelImpl::losses(torch::Tensor inputs, + torch::Tensor masks, + torch::Tensor policy_targets, + torch::Tensor value_targets) { + std::vector output = this->forward_(inputs, masks); + + torch::Tensor value_predictions = output[0]; + torch::Tensor policy_predictions = output[1]; + + // Policy loss (cross-entropy). + torch::Tensor policy_loss = torch::sum( + -policy_targets * torch::log_softmax(policy_predictions, 1), -1); + policy_loss = torch::mean(policy_loss); + + // Value loss (mean-squared error). + torch::nn::MSELoss mse_loss; + torch::Tensor value_loss = mse_loss(value_predictions, value_targets); + + // L2 regularization loss (weights only). + torch::Tensor l2_regularization_loss = torch::full( + {1, 1}, 0, torch::TensorOptions().dtype(torch::kFloat32).device(device_)); + for (auto& named_parameter : this->named_parameters()) { + // named_parameter is essentially a key-value pair: + // {key, value} == {std::string name, torch::Tensor parameter} + std::string parameter_name = named_parameter.key(); + + // Do not include bias' in the loss. + if (absl::StrContains(parameter_name, "bias")) { + continue; + } + + // Copy TensorFlow's l2_loss function. + // https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss + l2_regularization_loss += + weight_decay_ * torch::sum(torch::square(named_parameter.value())) / 2; + } + + return {policy_loss, value_loss, l2_regularization_loss}; +} + +std::vector ModelImpl::forward_(torch::Tensor x, + torch::Tensor mask) { + std::vector output; + if (this->nn_model_ == "resnet") { + for (int i = 0; i < num_torso_blocks_ + 2; i++) { + if (i == 0) { + x = layers_[i]->as()->forward(x); + } else if (i >= num_torso_blocks_ + 1) { + output = layers_[i]->as()->forward(x, mask); + } else { + x = layers_[i]->as()->forward(x); + } + } + } else if (this->nn_model_ == "mlp") { + for (int i = 0; i < num_torso_blocks_ + 1; i++) { + x = layers_[i]->as()->forward(x); + } + output = layers_[num_torso_blocks_ + 1]->as() + ->forward(x, mask); + } else { + throw std::runtime_error("Unknown nn_model: " + this->nn_model_); + } + return output; +} + +} // namespace torch_az +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/model.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/model.h new file mode 100644 index 0000000..6ddb0b5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/model.h @@ -0,0 +1,206 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_ALPHA_ZERO_TORCH_MODEL_H_ +#define OPEN_SPIEL_ALGORITHMS_ALPHA_ZERO_TORCH_MODEL_H_ + +#include + +#include +#include +#include + +namespace open_spiel { +namespace algorithms { +namespace torch_az { + +struct ResInputBlockConfig { + int input_channels; + int input_height; + int input_width; + int filters; + int kernel_size; + int padding; +}; + +struct ResTorsoBlockConfig { + int input_channels; + int filters; + int kernel_size; + int padding; + int layer; +}; + +struct ResOutputBlockConfig { + int input_channels; + int value_filters; + int policy_filters; + int kernel_size; + int padding; + int value_linear_in_features; + int value_linear_out_features; + int policy_linear_in_features; + int policy_linear_out_features; + int value_observation_size; + int policy_observation_size; +}; + +// Information for the model. This should be enough for any type of model +// (residual, convultional, or MLP). It needs to be saved/loaded to/from +// a file so the input and output stream operators are overload. +struct ModelConfig { + std::vector observation_tensor_shape; + int number_of_actions; + int nn_depth; + int nn_width; + double learning_rate; + double weight_decay; + std::string nn_model = "resnet"; +}; +std::istream& operator>>(std::istream& stream, ModelConfig& config); +std::ostream& operator<<(std::ostream& stream, const ModelConfig& config); + +// A block of the residual model's network that handles the input. It consists +// of one convolutional layer (CONV) and one batch normalization (BN) layer, and +// the output is passed through a rectified linear unit function (RELU). +// +// Illustration: +// [Input Tensor] --> CONV --> BN --> RELU +// +// There is only one input block per model. +class ResInputBlockImpl : public torch::nn::Module { + public: + ResInputBlockImpl(const ResInputBlockConfig& config); + torch::Tensor forward(torch::Tensor x); + + private: + int channels_; + int height_; + int width_; + torch::nn::Conv2d conv_; + torch::nn::BatchNorm2d batch_norm_; +}; +TORCH_MODULE(ResInputBlock); + +// A block of the residual model's network that makes up the 'torso'. It +// consists of two convolutional layers (CONV) and two batchnormalization layers +// (BN). The activation function is rectified linear unit (RELU). The input to +// the layer is added to the output before the final activation function. +// +// Illustration: +// [Input Tensor] --> CONV --> BN --> RELU --> CONV --> BN --> + --> RELU +// \___________________________________________________/ +// +// Unlike the input and output blocks, one can specify how many of these torso +// blocks they want in their model. +class ResTorsoBlockImpl : public torch::nn::Module { + public: + ResTorsoBlockImpl(const ResTorsoBlockConfig& config, int layer); + torch::Tensor forward(torch::Tensor x); + + private: + torch::nn::Conv2d conv1_; + torch::nn::Conv2d conv2_; + torch::nn::BatchNorm2d batch_norm1_; + torch::nn::BatchNorm2d batch_norm2_; +}; +TORCH_MODULE(ResTorsoBlock); + +// A block of the residual model's network that creates the output. It consists +// of a value and policy head. The value head takes the input through one +// convoluational layer (CONV), one batch normalization layers (BN), and two +// linear layers (LIN). The output activation function is tanh (TANH), the +// rectified linear activation function (RELU) is within. The policy head +// consists of one convolutional layer, batch normalization layer, and linear +// layer. There is no softmax activation function in this layer. The softmax +// on the output is applied in the forward function of the residual model. +// This design was chosen because the loss function of the residual model +// requires the policy logits, not the policy distribution. By providing the +// policy logits as output, the residual model can either apply the softmax +// activation function, or calculate the loss using Torch's log softmax +// function. +// +// Illustration: +// --> CONV --> BN --> RELU --> LIN --> RELU --> LIN --> TANH +// [Input Tensor] -- +// --> CONV --> BN --> RELU --> LIN (no SOFTMAX here) +// +// There is only one output block per model. +class ResOutputBlockImpl : public torch::nn::Module { + public: + ResOutputBlockImpl(const ResOutputBlockConfig& config); + std::vector forward(torch::Tensor x, torch::Tensor mask); + + private: + torch::nn::Conv2d value_conv_; + torch::nn::BatchNorm2d value_batch_norm_; + torch::nn::Linear value_linear1_; + torch::nn::Linear value_linear2_; + int value_observation_size_; + torch::nn::Conv2d policy_conv_; + torch::nn::BatchNorm2d policy_batch_norm_; + torch::nn::Linear policy_linear_; + int policy_observation_size_; +}; +TORCH_MODULE(ResOutputBlock); + +// A dense block with ReLU activation. +class MLPBlockImpl : public torch::nn::Module { + public: + MLPBlockImpl(const int in_features, const int out_features); + torch::Tensor forward(torch::Tensor x); + + private: + torch::nn::Linear linear_; +}; +TORCH_MODULE(MLPBlock); + +class MLPOutputBlockImpl : public torch::nn::Module { + public: + MLPOutputBlockImpl(const int nn_width, const int policy_linear_out_features); + std::vector forward(torch::Tensor x, torch::Tensor mask); + + private: + torch::nn::Linear value_linear1_; + torch::nn::Linear value_linear2_; + torch::nn::Linear policy_linear1_; + torch::nn::Linear policy_linear2_; +}; +TORCH_MODULE(MLPOutputBlock); + +// The model class that interacts with the VPNet. The ResInputBlock, +// ResTorsoBlock, and ResOutputBlock are not to be used by the VPNet directly. +class ModelImpl : public torch::nn::Module { + public: + ModelImpl(const ModelConfig& config, const std::string& device); + std::vector forward(torch::Tensor x, torch::Tensor mask); + std::vector losses(torch::Tensor inputs, torch::Tensor masks, + torch::Tensor policy_targets, + torch::Tensor value_targets); + + private: + std::vector forward_(torch::Tensor x, torch::Tensor mask); + torch::nn::ModuleList layers_; + torch::Device device_; + int num_torso_blocks_; + double weight_decay_; + std::string nn_model_; +}; +TORCH_MODULE(Model); + +} // namespace torch_az +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_ALPHA_ZERO_TORCH_MODEL_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/model_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/model_test.cc new file mode 100644 index 0000000..aa939fa --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/model_test.cc @@ -0,0 +1,125 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/alpha_zero_torch/model.h" + +#include + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace torch_az { +namespace { + +void TestModelCreation() { + std::cout << "\n~-~-~-~- TestModelCreation -~-~-~-~" << std::endl; + + std::shared_ptr game = LoadGame("clobber"); + + ModelConfig net_config = { + /*observation_tensor_shape=*/game->ObservationTensorShape(), + /*number_of_actions=*/game->NumDistinctActions(), + /*nn_depth=*/8, + /*nn_width=*/128, + /*learning_rate=*/0.001, + /*weight_decay=*/0.001}; + Model net(net_config, "cpu:0"); + + std::cout << "Good! The network looks like:\n" << net << std::endl; +} + +void TestModelInference() { + std::cout << "\n~-~-~-~- TestModelInference -~-~-~-~" << std::endl; + + const int channels = 3; + const int rows = 8; + const int columns = 8; + std::string game_string = + absl::StrCat("clobber(rows=", std::to_string(rows), + ",columns=", std::to_string(columns), ")"); + + std::shared_ptr game = LoadGame(game_string); + std::unique_ptr state = game->NewInitialState(); + + ModelConfig net_config = { + /*observation_tensor_shape=*/game->ObservationTensorShape(), + /*number_of_actions=*/game->NumDistinctActions(), + /*nn_depth=*/rows + 1, + /*nn_width=*/128, + /*learning_rate=*/0.001, + /*weight_decay=*/0.001}; + Model net(net_config, "cpu:0"); + + std::vector observation_vector = state->ObservationTensor(); + torch::Tensor observation_tensor = torch::from_blob( + observation_vector.data(), {1, channels * rows * columns}); + torch::Tensor mask = torch::full({1, game->NumDistinctActions()}, false, + torch::TensorOptions().dtype(torch::kByte)); + + for (Action action : state->LegalActions()) { + mask[0][action] = true; + } + + std::cout << "Input:\n" + << observation_tensor.view({channels, rows, columns}) << std::endl; + std::cout << "Mask:\n" << mask << std::endl; + + std::vector output = net(observation_tensor, mask); + + std::cout << "Output:\n" << output << std::endl; + + // Check value and policy. + SPIEL_CHECK_EQ((int)output.size(), 2); + SPIEL_CHECK_EQ(output[0].numel(), 1); + SPIEL_CHECK_EQ(output[1].numel(), game->NumDistinctActions()); + + // Check mask's influence on the policy. + for (int i = 0; i < game->NumDistinctActions(); i++) { + if (mask[0][i].item()) { + SPIEL_CHECK_GT(output[1][0][i].item(), 0.0); + } else { + SPIEL_CHECK_EQ(output[1][0][i].item(), 0.0); + } + } + + std::cout << "Value:\n" << output[0] << std::endl; + std::cout << "Policy:\n" << output[1] << std::endl; +} + +void TestCUDAAVailability() { + if (torch::cuda::is_available()) { + std::cout << "CUDA is available!" << std::endl; + } else { + std::cout << "CUDA is not available." << std::endl; + } +} + +} // namespace +} // namespace torch_az +} // namespace algorithms +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::algorithms::torch_az::TestModelCreation(); + open_spiel::algorithms::torch_az::TestModelInference(); + open_spiel::algorithms::torch_az::TestCUDAAVailability(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/vpevaluator.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/vpevaluator.cc new file mode 100644 index 0000000..e1e4c72 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/vpevaluator.cc @@ -0,0 +1,179 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/alpha_zero_torch/vpevaluator.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/hash/hash.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/utils/stats.h" + +namespace open_spiel { +namespace algorithms { +namespace torch_az { + +VPNetEvaluator::VPNetEvaluator(DeviceManager* device_manager, int batch_size, + int threads, int cache_size, int cache_shards) + : device_manager_(*device_manager), + batch_size_(batch_size), + queue_(batch_size * threads * 4), + batch_size_hist_(batch_size + 1) { + cache_shards = std::max(1, cache_shards); + cache_.reserve(cache_shards); + for (int i = 0; i < cache_shards; ++i) { + cache_.push_back( + std::make_unique>( + cache_size / cache_shards)); + } + if (batch_size_ <= 1) { + threads = 0; + } + inference_threads_.reserve(threads); + for (int i = 0; i < threads; ++i) { + inference_threads_.emplace_back([this]() { this->Runner(); }); + } +} + +VPNetEvaluator::~VPNetEvaluator() { + stop_.Stop(); + queue_.BlockNewValues(); + queue_.Clear(); + for (auto& t : inference_threads_) { + t.join(); + } +} + +void VPNetEvaluator::ClearCache() { + for (auto& c : cache_) { + c->Clear(); + } +} + +LRUCacheInfo VPNetEvaluator::CacheInfo() { + LRUCacheInfo info; + for (auto& c : cache_) { + info += c->Info(); + } + return info; +} + +std::vector VPNetEvaluator::Evaluate(const State& state) { + // TODO(author5): currently assumes zero-sum. + double p0value = Inference(state).value; + return {p0value, -p0value}; +} + +open_spiel::ActionsAndProbs VPNetEvaluator::Prior(const State& state) { + if (state.IsChanceNode()) { + return state.ChanceOutcomes(); + } else { + return Inference(state).policy; + } +} + +VPNetModel::InferenceOutputs VPNetEvaluator::Inference(const State& state) { + VPNetModel::InferenceInputs inputs = {state.LegalActions(), + state.ObservationTensor()}; + + uint64_t key; + int cache_shard; + if (!cache_.empty()) { + key = absl::Hash{}(inputs); + cache_shard = key % cache_.size(); + absl::optional opt_outputs = + cache_[cache_shard]->Get(key); + if (opt_outputs) { + return *opt_outputs; + } + } + VPNetModel::InferenceOutputs outputs; + if (batch_size_ <= 1) { + outputs = device_manager_.Get(1)->Inference(std::vector{inputs})[0]; + } else { + std::promise prom; + std::future fut = prom.get_future(); + queue_.Push(QueueItem{inputs, &prom}); + outputs = fut.get(); + } + if (!cache_.empty()) { + cache_[cache_shard]->Set(key, outputs); + } + return outputs; +} + +void VPNetEvaluator::Runner() { + std::vector inputs; + std::vector*> promises; + inputs.reserve(batch_size_); + promises.reserve(batch_size_); + while (!stop_.StopRequested()) { + { + // Only one thread at a time should be listening to the queue to maximize + // batch size and minimize latency. + absl::MutexLock lock(&inference_queue_m_); + absl::Time deadline = absl::InfiniteFuture(); + for (int i = 0; i < batch_size_; ++i) { + absl::optional item = queue_.Pop(deadline); + if (!item) { // Hit the deadline. + break; + } + if (inputs.empty()) { + deadline = absl::Now() + absl::Milliseconds(1); + } + inputs.push_back(item->inputs); + promises.push_back(item->prom); + } + } + + if (inputs.empty()) { // Almost certainly StopRequested. + continue; + } + + { + absl::MutexLock lock(&stats_m_); + batch_size_stats_.Add(inputs.size()); + batch_size_hist_.Add(inputs.size()); + } + + std::vector outputs = + device_manager_.Get(inputs.size())->Inference(inputs); + for (int i = 0; i < promises.size(); ++i) { + promises[i]->set_value(outputs[i]); + } + inputs.clear(); + promises.clear(); + } +} + +void VPNetEvaluator::ResetBatchSizeStats() { + absl::MutexLock lock(&stats_m_); + batch_size_stats_.Reset(); + batch_size_hist_.Reset(); +} + +open_spiel::BasicStats VPNetEvaluator::BatchSizeStats() { + absl::MutexLock lock(&stats_m_); + return batch_size_stats_; +} + +open_spiel::HistogramNumbered VPNetEvaluator::BatchSizeHistogram() { + absl::MutexLock lock(&stats_m_); + return batch_size_hist_; +} + +} // namespace torch_az +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/vpevaluator.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/vpevaluator.h new file mode 100644 index 0000000..b344ce7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/vpevaluator.h @@ -0,0 +1,83 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_ALPHA_ZERO_TORCH_VPEVALUATOR_H_ +#define OPEN_SPIEL_ALGORITHMS_ALPHA_ZERO_TORCH_VPEVALUATOR_H_ + +#include // NOLINT +#include + +#include "open_spiel/abseil-cpp/absl/hash/hash.h" +#include "open_spiel/algorithms/alpha_zero_torch/device_manager.h" +#include "open_spiel/algorithms/alpha_zero_torch/vpnet.h" +#include "open_spiel/algorithms/mcts.h" +#include "open_spiel/spiel.h" +#include "open_spiel/utils/lru_cache.h" +#include "open_spiel/utils/stats.h" +#include "open_spiel/utils/thread.h" +#include "open_spiel/utils/threaded_queue.h" + +namespace open_spiel { +namespace algorithms { +namespace torch_az { + +class VPNetEvaluator : public Evaluator { + public: + explicit VPNetEvaluator(DeviceManager* device_manager, int batch_size, + int threads, int cache_size, int cache_shards = 1); + ~VPNetEvaluator() override; + + // Return a value of this state for each player. + std::vector Evaluate(const State& state) override; + + // Return a policy: the probability of the current player playing each action. + ActionsAndProbs Prior(const State& state) override; + + void ClearCache(); + LRUCacheInfo CacheInfo(); + + void ResetBatchSizeStats(); + open_spiel::BasicStats BatchSizeStats(); + open_spiel::HistogramNumbered BatchSizeHistogram(); + + private: + VPNetModel::InferenceOutputs Inference(const State& state); + + void Runner(); + + DeviceManager& device_manager_; + std::vector>> + cache_; + const int batch_size_; + + struct QueueItem { + VPNetModel::InferenceInputs inputs; + std::promise* prom; + }; + + ThreadedQueue queue_; + StopToken stop_; + std::vector inference_threads_; + absl::Mutex inference_queue_m_; // Only one thread at a time should pop. + + absl::Mutex stats_m_; + open_spiel::BasicStats batch_size_stats_; + open_spiel::HistogramNumbered batch_size_hist_; +}; + +} // namespace torch_az +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_ALPHA_ZERO_TORCH_VPEVALUATOR_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/vpnet.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/vpnet.cc new file mode 100644 index 0000000..5527e11 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/vpnet.cc @@ -0,0 +1,259 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/alpha_zero_torch/vpnet.h" + +#include + +#include // For ifstream/ofstream. +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/algorithms/alpha_zero_torch/model.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace torch_az { + +// Saves a struct that holds initialization data for the model to a file. +// +// The TensorFlow version creates a TensorFlow graph definition when +// CreateGraphDef is called. To avoid having to change this, allow calls to +// CreateGraphDef, however now it simply saves a struct to a file which can +// then be loaded and used to initialize a model. +bool SaveModelConfig(const std::string& path, const std::string& filename, + const ModelConfig& net_config) { + std::ofstream file; + file.open(absl::StrCat(path, "/", filename)); + + if (!file) { + return false; + } else { + file << net_config; + } + file.close(); + + return true; +} + +// Loads a struct that holds initialization data for the model from a file. +// +// The TensorFlow version creates a TensorFlow graph definition when +// CreateGraphDef is called. To avoid having to change this, allow calls to +// CreateGraphDef, however now it simply saves a struct to a file which can +// then be loaded and used to initialize a model. +ModelConfig LoadModelConfig(const std::string& path, + const std::string& filename) { + std::ifstream file; + file.open(absl::StrCat(path, "/", filename)); + ModelConfig net_config; + + file >> net_config; + file.close(); + + return net_config; +} + +// Modifies a given device string to one that can be accepted by the +// Torch library. +// +// The Torch library accepts 'cpu', 'cpu:0', 'cuda:0', 'cuda:1', +// 'cuda:2', 'cuda:3'..., but complains when there's a slash in front +// of the device name. +// +// Currently, this function only disregards a slash if it exists at the +// beginning of the device string, more functionality can be added if +// needed. +std::string TorchDeviceName(const std::string& device) { + if (device[0] == '/') { + return device.substr(1); + } + return device; +} + +bool CreateGraphDef(const Game& game, double learning_rate, double weight_decay, + const std::string& path, const std::string& filename, + std::string nn_model, int nn_width, int nn_depth, + bool verbose) { + ModelConfig net_config = { + /*observation_tensor_shape=*/game.ObservationTensorShape(), + /*number_of_actions=*/game.NumDistinctActions(), + /*nn_depth=*/nn_depth, + /*nn_width=*/nn_width, + /*learning_rate=*/learning_rate, + /*weight_decay=*/weight_decay, + /*nn_model=*/nn_model}; + + return SaveModelConfig(path, filename, net_config); +} + +VPNetModel::VPNetModel(const Game& game, const std::string& path, + const std::string& file_name, const std::string& device) + : device_(device), + path_(path), + flat_input_size_(game.ObservationTensorSize()), + num_actions_(game.NumDistinctActions()), + model_config_(LoadModelConfig(path, file_name)), + model_(model_config_, TorchDeviceName(device)), + model_optimizer_( + model_->parameters(), + torch::optim::AdamOptions( // NOLINT(misc-include-cleaner) + model_config_.learning_rate)), + torch_device_(TorchDeviceName(device)) { + // Some assumptions that we can remove eventually. The value net returns + // a single value in terms of player 0 and the game is assumed to be zero-sum, + // so player 1 can just be -value. + SPIEL_CHECK_EQ(game.NumPlayers(), 2); + SPIEL_CHECK_EQ(game.GetType().utility, GameType::Utility::kZeroSum); + + // Put this model on the specified device. + model_->to(torch_device_); +} + +std::string VPNetModel::SaveCheckpoint(int step) { + std::string full_path = absl::StrCat(path_, "/checkpoint-", step); + + torch::save(model_, absl::StrCat(full_path, ".pt")); + torch::save(model_optimizer_, absl::StrCat(full_path, "-optimizer.pt")); + + return full_path; +} + +void VPNetModel::LoadCheckpoint(int step) { + // Load checkpoint from the path given at its initialization. + LoadCheckpoint(absl::StrCat(path_, "/checkpoint-", step)); +} + +void VPNetModel::LoadCheckpoint(const std::string& path) { + torch::load(model_, absl::StrCat(path, ".pt"), torch_device_); + torch::load(model_optimizer_, absl::StrCat(path, "-optimizer.pt"), + torch_device_); +} + +std::vector VPNetModel::Inference( + const std::vector& inputs) { + int inference_batch_size = inputs.size(); + + // Torch tensors by default use a dense, row-aligned memory layout. + // - Their default data type is a 32-bit float + // - Use the byte data type for boolean + + torch::Tensor torch_inf_inputs = + torch::empty({inference_batch_size, flat_input_size_}, torch_device_); + torch::Tensor torch_inf_legal_mask = torch::full( + {inference_batch_size, num_actions_}, false, + torch::TensorOptions().dtype(torch::kByte).device(torch_device_)); + + for (int batch = 0; batch < inference_batch_size; ++batch) { + // Copy legal mask(s) to a Torch tensor. + for (Action action : inputs[batch].legal_actions) { + torch_inf_legal_mask[batch][action] = true; + } + + // Copy the observation(s) to a Torch tensor. + for (int i = 0; i < inputs[batch].observations.size(); ++i) { + torch_inf_inputs[batch][i] = inputs[batch].observations[i]; + } + } + + // Run the inference. + model_->eval(); + std::vector torch_outputs = + model_(torch_inf_inputs, torch_inf_legal_mask); + + torch::Tensor value_batch = torch_outputs[0]; + torch::Tensor policy_batch = torch_outputs[1]; + + // Copy the Torch tensor output to the appropriate structure. + std::vector output; + output.reserve(inference_batch_size); + for (int batch = 0; batch < inference_batch_size; ++batch) { + double value = value_batch[batch].item(); + + ActionsAndProbs state_policy; + state_policy.reserve(inputs[batch].legal_actions.size()); + for (Action action : inputs[batch].legal_actions) { + state_policy.push_back( + {action, policy_batch[batch][action].item()}); + } + + output.push_back({value, state_policy}); + } + + return output; +} + +VPNetModel::LossInfo VPNetModel::Learn(const std::vector& inputs) { + int training_batch_size = inputs.size(); + + // Torch tensors by default use a dense, row-aligned memory layout. + // - Their default data type is a 32-bit float + // - Use the byte data type for boolean + + torch::Tensor torch_train_inputs = + torch::empty({training_batch_size, flat_input_size_}, torch_device_); + torch::Tensor torch_train_legal_mask = torch::full( + {training_batch_size, num_actions_}, false, + torch::TensorOptions().dtype(torch::kByte).device(torch_device_)); + torch::Tensor torch_policy_targets = + torch::zeros({training_batch_size, num_actions_}, torch_device_); + torch::Tensor torch_value_targets = + torch::empty({training_batch_size, 1}, torch_device_); + + for (int batch = 0; batch < training_batch_size; ++batch) { + // Copy the legal mask(s) to a Torch tensor. + for (Action action : inputs[batch].legal_actions) { + torch_train_legal_mask[batch][action] = true; + } + + // Copy the observation(s) to a Torch tensor. + for (int i = 0; i < inputs[batch].observations.size(); ++i) { + torch_train_inputs[batch][i] = inputs[batch].observations[i]; + } + + // Copy the policy target(s) to a Torch tensor. + for (const auto& [action, probability] : inputs[batch].policy) { + torch_policy_targets[batch][action] = probability; + } + + // Copy the value target(s) to a Torch tensor. + torch_value_targets[batch][0] = inputs[batch].value; + } + + // Run a training step and get the losses. + model_->train(); + model_->zero_grad(); + + std::vector torch_outputs = + model_->losses(torch_train_inputs, torch_train_legal_mask, + torch_policy_targets, torch_value_targets); + + torch::Tensor total_loss = + torch_outputs[0] + torch_outputs[1] + torch_outputs[2]; + + total_loss.backward(); + + model_optimizer_.step(); + + return LossInfo(torch_outputs[0].item(), + torch_outputs[1].item(), + torch_outputs[2].item()); +} + +} // namespace torch_az +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/vpnet.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/vpnet.h new file mode 100644 index 0000000..008646a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/vpnet.h @@ -0,0 +1,159 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_ALPHA_ZERO_TORCH_VPNET_H_ +#define OPEN_SPIEL_ALGORITHMS_ALPHA_ZERO_TORCH_VPNET_H_ + +#include + +#include +#include +#include + +#include "open_spiel/algorithms/alpha_zero_torch/model.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace torch_az { + +// To avoid having to change function calls and the flow of the AlphaZero setup, +// this function is still called, but rather than create a TensorFlow graph +// definition, it simply creates a struct that the Libtorch model can use to +// initialize from. This struct is then saved and then loaded again when needed. +bool CreateGraphDef(const Game& game, double learning_rate, double weight_decay, + const std::string& path, const std::string& filename, + std::string nn_model, int nn_width, int nn_depth, + bool verbose = false); + +class VPNetModel { + public: + // A class to handle the network's loss. + class LossInfo { + public: + LossInfo() {} + LossInfo(double policy, double value, double l2) + : policy_(policy), value_(value), l2_(l2), batches_(1) {} + + // Merge another LossInfo into this one. + LossInfo& operator+=(const LossInfo& other) { + policy_ += other.policy_; + value_ += other.value_; + l2_ += other.l2_; + batches_ += other.batches_; + return *this; + } + + // Return the average losses over all merged into this one. + double Policy() const { return policy_ / batches_; } + double Value() const { return value_ / batches_; } + double L2() const { return l2_ / batches_; } + double Total() const { return Policy() + Value() + L2(); } + + private: + double policy_ = 0; + double value_ = 0; + double l2_ = 0; + int batches_ = 0; + }; + + // A struct to handle the inputs for inference. + struct InferenceInputs { + std::vector legal_actions; + std::vector observations; + + bool operator==(const InferenceInputs& other) const { + return legal_actions == other.legal_actions && + observations == other.observations; + } + + template + friend H AbslHashValue(H h, const InferenceInputs& in) { + return H::combine(std::move(h), in.legal_actions, in.observations); + } + }; + + // A struct to hold the outputs of the inference (value and policy). + struct InferenceOutputs { + double value; + ActionsAndProbs policy; + }; + + // A struct to hold the inputs for training. + struct TrainInputs { + std::vector legal_actions; + std::vector observations; + ActionsAndProbs policy; + double value; + + NOP_STRUCTURE(TrainInputs, legal_actions, observations, policy, value); + }; + + enum CheckpointStep { + kMostRecentCheckpointStep = -1, + kInvalidCheckpointStep = -2 + }; + + VPNetModel(const Game &game, const std::string &path, + const std::string &file_name, + const std::string &device = "/cpu:0"); + + // Move only, not copyable. + VPNetModel(VPNetModel&& other) = default; + VPNetModel& operator=(VPNetModel&& other) = default; + VPNetModel(const VPNetModel&) = delete; + VPNetModel& operator=(const VPNetModel&) = delete; + + // Inference: Get both at the same time. + std::vector Inference( + const std::vector& inputs); + + // Training: do one (batch) step of neural net training + LossInfo Learn(const std::vector& inputs); + + std::string SaveCheckpoint(int step); + void LoadCheckpoint(int step); + void LoadCheckpoint(const std::string& path); + + std::string Device() const { return device_; } + + private: + std::string device_; + std::string path_; + + // Store the full model metagraph file + // for writing python compatible checkpoints. + std::string model_meta_graph_contents_; + + int flat_input_size_; + int num_actions_; + + // NOTE: + // The member model_ takes an already initialized model_config_, + // and model_optimizer_ takes an already initialized model_ + // parameters and model_config_ learning rate. Therefore, keep the + // members' (model_config_, model_, model_optimizer_) declaration in + // the order shown below so the member initialization list works. + ModelConfig model_config_; + Model model_; + torch::optim::Adam model_optimizer_; + torch::Device torch_device_; +}; + +} // namespace torch_az +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_ALPHA_ZERO_TORCH_VPNET_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/vpnet_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/vpnet_test.cc new file mode 100644 index 0000000..5bca8db --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/alpha_zero_torch/vpnet_test.cc @@ -0,0 +1,216 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/alpha_zero_torch/vpnet.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/file.h" +#include "open_spiel/utils/init.h" + +namespace open_spiel { +namespace algorithms { +namespace torch_az { +namespace { + +double SolveState(const State& state, + absl::flat_hash_map& cache, + std::vector& train_inputs) { + std::string state_str = state.ToString(); + if (cache.find(state_str) != cache.end()) { + return train_inputs[cache[state_str]].value; + } + if (state.IsTerminal()) { + return state.PlayerReturn(0); + } + + bool max_player = state.CurrentPlayer() == 0; + std::vector obs = state.ObservationTensor(); + std::vector legal_actions = state.LegalActions(); + + Action best_action = kInvalidAction; + double best_value = -2; + for (Action action : legal_actions) { + double value = SolveState(*state.Child(action), cache, train_inputs); + if (best_action == kInvalidAction || + (max_player ? value > best_value : value < best_value)) { + best_action = action; + best_value = value; + } + } + ActionsAndProbs policy({{best_action, 1}}); + + cache[state_str] = train_inputs.size(); + train_inputs.push_back( + VPNetModel::TrainInputs{legal_actions, obs, policy, best_value}); + return best_value; +} + +std::vector SolveGame() { + std::shared_ptr game = + open_spiel::LoadGame("tic_tac_toe"); + std::unique_ptr state = game->NewInitialState(); + + // Store them directly into a vector so they are returned in order so + // given a static initialization the model trains identically. + absl::flat_hash_map cache; + std::vector train_inputs; + train_inputs.reserve(4520); + SolveState(*state, cache, train_inputs); + return train_inputs; +} + +VPNetModel BuildModel(const Game& game, const std::string& nn_model, + bool create_graph) { + std::string tmp_dir = open_spiel::file::GetTmpDir(); + std::string filename = + absl::StrCat("open_spiel_vpnet_test_", nn_model, ".pb"); + + if (create_graph) { + SPIEL_CHECK_TRUE(CreateGraphDef(game, + /*learning_rate=*/0.01, + /*weight_decay=*/0.0001, tmp_dir, filename, + nn_model, /*nn_width=*/64, /*nn_depth=*/2, + /*verbose=*/true)); + } + + std::string model_path = absl::StrCat(tmp_dir, "/", filename); + SPIEL_CHECK_TRUE(file::Exists(model_path)); + + VPNetModel model(game, tmp_dir, filename, "/cpu:0"); + + return model; +} + +void TestModelCreation(const std::string& nn_model) { + std::cout << "TestModelCreation: " << nn_model << std::endl; + std::shared_ptr game = LoadGame("tic_tac_toe"); + VPNetModel model = BuildModel(*game, nn_model, true); + + std::unique_ptr state = game->NewInitialState(); + std::vector legal_actions = state->LegalActions(); + std::vector obs = state->ObservationTensor(); + VPNetModel::InferenceInputs inputs = {legal_actions, obs}; + + // Check that inference runs at all. + model.Inference(std::vector{inputs}); + + std::vector train_inputs; + train_inputs.emplace_back(VPNetModel::TrainInputs{ + legal_actions, obs, ActionsAndProbs({{legal_actions[0], 1}}), 0}); + + // Check that learning runs at all. + model.Learn(train_inputs); +} + +// Can learn a single trajectory +void TestModelLearnsSimple(const std::string& nn_model) { + std::cout << "TestModelLearnsSimple: " << nn_model << std::endl; + std::shared_ptr game = LoadGame("tic_tac_toe"); + VPNetModel model = BuildModel(*game, nn_model, false); + + std::vector train_inputs; + std::unique_ptr state = game->NewInitialState(); + + while (!state->IsTerminal()) { + std::vector obs = state->ObservationTensor(); + std::vector legal_actions = state->LegalActions(); + Action action = legal_actions[0]; + ActionsAndProbs policy({{action, 1}}); + + train_inputs.emplace_back( + VPNetModel::TrainInputs{legal_actions, obs, policy, 1}); + + VPNetModel::InferenceInputs inputs = {legal_actions, obs}; + std::vector out = + model.Inference(std::vector{inputs}); + SPIEL_CHECK_EQ(out.size(), 1); + SPIEL_CHECK_EQ(out[0].policy.size(), legal_actions.size()); + + state->ApplyAction(action); + } + + std::cout << "states: " << train_inputs.size() << std::endl; + std::vector losses; + for (int i = 0; i < 1000; i++) { + VPNetModel::LossInfo loss = model.Learn(train_inputs); + std::cout << absl::StrFormat( + "%d: Losses(total: %.3f, policy: %.3f, value: %.3f, l2: %.3f)\n", i, + loss.Total(), loss.Policy(), loss.Value(), loss.L2()); + losses.push_back(loss); + if (loss.Policy() < 0.05 && loss.Value() < 0.05) { + break; + } + } + SPIEL_CHECK_GT(losses.front().Total(), losses.back().Total()); + SPIEL_CHECK_GT(losses.front().Policy(), losses.back().Policy()); + SPIEL_CHECK_GT(losses.front().Value(), losses.back().Value()); + SPIEL_CHECK_LT(losses.back().Value(), 0.05); + SPIEL_CHECK_LT(losses.back().Policy(), 0.05); +} + +// Can learn the optimal policy. +void TestModelLearnsOptimal( + const std::string& nn_model, + const std::vector& train_inputs) { + std::cout << "TestModelLearnsOptimal: " << nn_model << std::endl; + std::shared_ptr game = LoadGame("tic_tac_toe"); + VPNetModel model = BuildModel(*game, nn_model, false); + + std::cout << "states: " << train_inputs.size() << std::endl; + std::vector losses; + for (int i = 0; i < 1000; i++) { + VPNetModel::LossInfo loss = model.Learn(train_inputs); + std::cout << absl::StrFormat( + "%d: Losses(total: %.3f, policy: %.3f, value: %.3f, l2: %.3f)\n", i, + loss.Total(), loss.Policy(), loss.Value(), loss.L2()); + losses.push_back(loss); + if (loss.Policy() < 0.1 && loss.Value() < 0.1) { + break; + } + } + SPIEL_CHECK_GT(losses.front().Total(), losses.back().Total()); + SPIEL_CHECK_GT(losses.front().Policy(), losses.back().Policy()); + SPIEL_CHECK_GT(losses.front().Value(), losses.back().Value()); + SPIEL_CHECK_LT(losses.back().Value(), 0.1); + SPIEL_CHECK_LT(losses.back().Policy(), 0.1); +} + +} // namespace +} // namespace torch_az +} // namespace algorithms +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::Init("", &argc, &argv, true); + open_spiel::algorithms::torch_az::TestModelCreation("resnet"); + + // Tests below here reuse the graphs created above. Graph creation is slow + // due to calling a separate python process. + + open_spiel::algorithms::torch_az::TestModelLearnsSimple("resnet"); + + auto train_inputs = open_spiel::algorithms::torch_az::SolveGame(); + open_spiel::algorithms::torch_az::TestModelLearnsOptimal("resnet", + train_inputs); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/best_response.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/best_response.cc new file mode 100644 index 0000000..36f0c81 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/best_response.cc @@ -0,0 +1,292 @@ + +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/best_response.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/btree_set.h" +#include "open_spiel/algorithms/expected_returns.h" +#include "open_spiel/algorithms/history_tree.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { + +TabularBestResponse::TabularBestResponse(const Game& game, + Player best_responder, + const Policy* policy, + const float prob_cut_threshold, + const float action_value_tolerance) + : best_responder_(best_responder), + tabular_policy_container_(), + policy_(policy), + tree_(HistoryTree(game.NewInitialState(), best_responder_)), + num_players_(game.NumPlayers()), + prob_cut_threshold_(prob_cut_threshold), + action_value_tolerance_(action_value_tolerance), + infosets_(GetAllInfoSets(game.NewInitialState(), best_responder, policy, + &tree_)), + root_(game.NewInitialState()), + dummy_policy_(new TabularPolicy(GetUniformPolicy(game))) { + if (game.GetType().dynamics != GameType::Dynamics::kSequential) { + SpielFatalError("The game must be turn-based."); + } +} + +TabularBestResponse::TabularBestResponse( + const Game& game, Player best_responder, + const std::unordered_map& policy_table, + const float prob_cut_threshold, const float action_value_tolerance) + : best_responder_(best_responder), + tabular_policy_container_(policy_table), + policy_(&tabular_policy_container_), + tree_(HistoryTree(game.NewInitialState(), best_responder_)), + num_players_(game.NumPlayers()), + prob_cut_threshold_(prob_cut_threshold), + action_value_tolerance_(action_value_tolerance), + infosets_(GetAllInfoSets(game.NewInitialState(), best_responder, policy_, + &tree_)), + root_(game.NewInitialState()), + dummy_policy_(new TabularPolicy(GetUniformPolicy(game))) { + if (game.GetType().dynamics != GameType::Dynamics::kSequential) { + SpielFatalError("The game must be turn-based."); + } +} + +double TabularBestResponse::HandleTerminalCase(const HistoryNode& node) const { + return node.GetValue(); +} + +double TabularBestResponse::HandleDecisionCase(HistoryNode* node) { + if (node == nullptr) SpielFatalError("HandleDecisionCase: node is null."); + if (node->GetState()->CurrentPlayer() == best_responder_) { + // If we're playing as the best responder, we look at every child node, + if (action_value_tolerance_ < 0) { + // Pick the one with the highest expected utility to play. + BestResponseAction(node->GetInfoState()); + } else { + // Or spread support over all best_actions. + BestResponseActions(node->GetInfoState(), action_value_tolerance_); + } + + auto action_prob = best_response_policy_[node->GetInfoState()]; + double value = 0.0; + for (const auto& [action, prob] : action_prob) { + HistoryNode* child = node->GetChild(action).second; + if (child == nullptr) + SpielFatalError("HandleDecisionCase: node is null."); + double child_value = Value(child->GetHistory()); + value += child_value * prob; + } + return value; + } + // If the other player is playing, then we can recursively compute the + // expected utility of that node by looking at their policy. + // We take child probabilities from the policy as that is what we are + // calculating a best response to. + ActionsAndProbs state_policy = policy_->GetStatePolicy(*node->GetState()); + if (state_policy.empty()) + SpielFatalError(absl::StrCat("InfoState ", node->GetInfoState(), + " not found in policy.")); + if (state_policy.size() > node->NumChildren()) { + int num_zeros = 0; + for (const auto& a_and_p : state_policy) { + if (Near(a_and_p.second, 0.)) ++num_zeros; + } + // We check here that the policy is valid, i.e. that it doesn't contain + // too many (invalid) actions. This can only happen when the policy is + // built incorrectly. If this is failing, you are building the policy + // wrong. + if (state_policy.size() > node->NumChildren() + num_zeros) { + std::vector action_probs_str_vector; + action_probs_str_vector.reserve(state_policy.size()); + for (const auto& action_prob : state_policy) { + // TODO(b/127423396): Use absl::StrFormat. + action_probs_str_vector.push_back(absl::StrCat( + "(", action_prob.first, ", ", action_prob.second, ")")); + } + std::string action_probs_str = + absl::StrJoin(action_probs_str_vector, " "); + SpielFatalError(absl::StrCat( + "Policies don't match in size, in state ", + node->GetState()->HistoryString(), ".\nThe tree has '", + node->NumChildren(), "' valid children, but ", state_policy.size(), + " valid (action, prob) are available: [", action_probs_str, "]")); + } + } + double value = 0; + for (const auto& action : node->GetState()->LegalActions()) { + const double prob = GetProb(state_policy, action); + if (prob <= prob_cut_threshold_) continue; + // We discard the probability here that's returned by GetChild as we + // immediately load the probability for the given child from the policy. + HistoryNode* child = node->GetChild(action).second; + if (child == nullptr) SpielFatalError("HandleDecisionCase: node is null."); + // Finally, we update value by the policy weighted value of the child. + SPIEL_CHECK_PROB_TOLERANCE(prob, ProbabilityDefaultTolerance()); + value += prob * Value(child->GetHistory()); + } + return value; +} +double TabularBestResponse::HandleChanceCase(HistoryNode* node) { + double value = 0; + double prob_sum = 0; + for (const auto& action : node->GetChildActions()) { + std::pair prob_and_child = node->GetChild(action); + double prob = prob_and_child.first; + prob_sum += prob; + if (prob <= prob_cut_threshold_) continue; + HistoryNode* child = prob_and_child.second; + if (child == nullptr) SpielFatalError("Child is null."); + // Verify that the probability is valid. This should always be true. + SPIEL_CHECK_PROB_TOLERANCE(prob, ProbabilityDefaultTolerance()); + value += prob * Value(child->GetHistory()); + } + // Verify that the sum of the probabilities is 1, within tolerance. + SPIEL_CHECK_FLOAT_EQ(prob_sum, 1.0); + return value; +} +double TabularBestResponse::Value(const std::string& history) { + auto it = value_cache_.find(history); + if (it != value_cache_.end()) return it->second; + HistoryNode* node = tree_.GetByHistory(history); + if (node == nullptr) SpielFatalError("node returned is null."); + double cache_value = 0; + switch (node->GetType()) { + case StateType::kTerminal: { + cache_value = HandleTerminalCase(*node); + break; + } + case StateType::kDecision: { + cache_value = HandleDecisionCase(node); + break; + } + case StateType::kChance: { + cache_value = HandleChanceCase(node); + break; + } + case StateType::kMeanField: { + SpielFatalError("kMeanField not supported."); + } + } + value_cache_[history] = cache_value; + return value_cache_[history]; +} +Action TabularBestResponse::BestResponseAction(const std::string& infostate) { + auto it = best_response_policy_.find(infostate); + if (it != best_response_policy_.end()) return it->second.begin()->first; + std::vector> infoset = infosets_[infostate]; + Action best_action = -1; + double best_value = std::numeric_limits::lowest(); + // The legal actions are the same for all children, so we arbitrarily pick + // the first one to get the legal actions from. + for (const auto& action : infoset[0].first->GetChildActions()) { + double value = 0; + // Prob here is the counterfactual reach-weighted probability. + for (const auto& state_and_prob : infoset) { + if (state_and_prob.second <= prob_cut_threshold_) continue; + HistoryNode* state_node = state_and_prob.first; + HistoryNode* child_node = state_node->GetChild(action).second; + SPIEL_CHECK_TRUE(child_node != nullptr); + value += state_and_prob.second * Value(child_node->GetHistory()); + } + if (value > best_value) { + best_value = value; + best_action = action; + } + } + if (best_action == -1) SpielFatalError("No action was chosen."); + + ActionsAndProbs actions_and_probs; + for (const auto& action : infoset[0].first->GetChildActions()) { + double prob = 0.0; + if (action == best_action) prob = 1.0; + actions_and_probs.push_back(std::make_pair(action, prob)); + } + best_response_policy_[infostate] = actions_and_probs; + best_response_actions_[infostate] = best_action; + return best_action; +} +std::vector TabularBestResponse::BestResponseActions( + const std::string& infostate, double tolerance) { + absl::btree_set best_actions; + std::vector> action_values; + std::vector> infoset = + infosets_.at(infostate); + double best_value = std::numeric_limits::lowest(); + // The legal actions are the same for all children, so we arbitrarily pick + // the first one to get the legal actions from. + for (const Action& action : infoset[0].first->GetChildActions()) { + double value = 0; + // Prob here is the counterfactual reach-weighted probability. + for (const auto& [state_node, prob] : infoset) { + if (prob <= prob_cut_threshold_) continue; + HistoryNode* child_node = state_node->GetChild(action).second; + SPIEL_CHECK_TRUE(child_node != nullptr); + value += prob * Value(child_node->GetHistory()); + } + action_values.push_back({action, value}); + if (value > best_value) { + best_value = value; + } + } + for (const auto& [action, value] : action_values) { + if (value >= best_value - tolerance) { + best_actions.insert(action); + } + } + if (best_actions.empty()) SpielFatalError("No action was chosen."); + ActionsAndProbs actions_and_probs; + for (const auto& action : infoset[0].first->GetChildActions()) { + double prob = 0.0; + if (best_actions.count(action)) { + prob = 1.0 / best_actions.size(); + } + actions_and_probs.push_back(std::make_pair(action, prob)); + } + best_response_policy_[infostate] = actions_and_probs; + return std::vector(best_actions.begin(), best_actions.end()); +} +std::vector> +TabularBestResponse::BestResponseActionValues(const std::string& infostate) { + std::vector> action_values; + std::vector> infoset = + infosets_.at(infostate); + action_values.reserve(infoset[0].first->GetChildActions().size()); + for (Action action : infoset[0].first->GetChildActions()) { + double value = 0; + double normalizer = 0; + // Prob here is the counterfactual reach-weighted probability. + for (const auto& [state_node, prob] : infoset) { + if (prob <= prob_cut_threshold_) continue; + HistoryNode* child_node = state_node->GetChild(action).second; + SPIEL_CHECK_TRUE(child_node != nullptr); + value += prob * Value(child_node->GetHistory()); + normalizer += prob; + } + SPIEL_CHECK_GT(normalizer, 0); + action_values.push_back({action, value / normalizer}); + } + return action_values; +} +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/best_response.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/best_response.h new file mode 100644 index 0000000..3b69f0c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/best_response.h @@ -0,0 +1,215 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_BEST_RESPONSE_H_ +#define OPEN_SPIEL_ALGORITHMS_BEST_RESPONSE_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/algorithms/history_tree.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { + +// Calculates the best response to every state in the game against the given +// policy, where the best responder plays as player_id. +// This only works for two player, zero- or constant-sum sequential games, and +// raises a SpielFatalError if an incompatible game is passed to it. +// +// This implementation requires that InformationStateString for the game has +// perfect recall. Otherwise, the algorithm will still run, but the value +// returned will be wrong. +// +// A partially computed best-response can be computed when using a +// prob_cut_threshold >= 0. +// +// The max-entropy best-response policy is computed if a non-negative +// `action_value_tolerance` is used. +// Support is equally split between actions whose values are within +// `action_value_tolerance` of the max-value action. +// +// NOTE: if `action_value_tolerance` is negative, the first action with max +// value is selected and a biased determinisitc BR is computed. This may +// implicitly simplify coordination games by introducing a convention in games +// that require coordination. + +class TabularBestResponse { + public: + TabularBestResponse(const Game& game, Player best_responder, + const Policy* policy, + const float prob_cut_threshold = -1.0, + const float action_value_tolerance = -1.0); + TabularBestResponse( + const Game& game, Player best_responder, + const std::unordered_map& policy_table, + const float prob_cut_threshold = -1.0, + const float action_value_tolerance = -1.0); + + TabularBestResponse(TabularBestResponse&&) = default; + + // Returns the action that maximizes utility for the agent at the given + // infostate. The infostate must correspond to a decision node for + // best_responder. + Action BestResponseAction(const std::string& infostate); + Action BestResponseAction(const State& state) { + SPIEL_CHECK_EQ(state.CurrentPlayer(), best_responder_); + return BestResponseAction(state.InformationStateString(best_responder_)); + } + + // Returns all the actions that maximize utility for the agent at the given + // infostate. The infostate must correspond to a decision node for + // best_responder. + std::vector BestResponseActions(const std::string& infostate, + double tolerance); + std::vector BestResponseActions(const State& state, + double tolerance) { + SPIEL_CHECK_EQ(state.CurrentPlayer(), best_responder_); + return BestResponseActions(state.InformationStateString(best_responder_), + tolerance); + } + + // Returns the values of all actions at this info state. The infostate must + // correspond to a decision node for best_responder. + std::vector> BestResponseActionValues( + const std::string& infostate); + std::vector> BestResponseActionValues( + const State& state) { + SPIEL_CHECK_EQ(state.CurrentPlayer(), best_responder_); + return BestResponseActionValues( + state.InformationStateString(best_responder_)); + } + + // Returns a map of infostates to best responses, for all information states + // that have been calculated so far. If no best responses have been + // calculated, then we calculate them for every state in the game. + // When two actions have the same value, we + // return the action with the lowest number (as an int). + std::unordered_map GetBestResponseActions() { + if (action_value_tolerance_ >= 0.0) + SpielFatalError( + "TabularBestResponse is returning the max-entropy best-response but " + "deterministic best-response is requested."); + // If the best_response_policy_ cache is empty, we fill it by + // calculating all best responses, starting at the root. + if (best_response_actions_.empty()) Value(*root_); + return best_response_actions_; + } + + // Returns the computed best response as a policy object. + TabularPolicy GetBestResponsePolicy() { + // If the best_response_policy_ cache is empty, we fill it by calculating + // all best responses, starting at the root. + if (best_response_policy_.empty()) Value(*root_); + return TabularPolicy(best_response_policy_); + } + + // Returns the expected utility for best_responder when playing the game + // beginning at history. + double Value(const std::string& history); + double Value(const State& state) { return Value(state.HistoryString()); } + + // Changes the policy that we are calculating a best response to. This is + // useful as a large amount of the data structures can be reused, causing + // the calculation to be quicker than if we had to re-initialize the class. + void SetPolicy(const Policy* policy) { + policy_ = policy; + value_cache_.clear(); + best_response_actions_.clear(); + best_response_policy_.clear(); + // TODO(author1): Replace this with something that traverses the tree + // and rebuilds the probabilities. + infosets_ = + GetAllInfoSets(root_->Clone(), best_responder_, policy_, &tree_); + } + + // Set the policy given a policy table. This stores the table internally. + void SetPolicy( + const std::unordered_map& policy_table) { + tabular_policy_container_ = TabularPolicy(policy_table); + SetPolicy(&tabular_policy_container_); + } + + private: + // For chance nodes, we recursively calculate the value of each child node, + // and weight them by the probability of reaching each child. + double HandleChanceCase(HistoryNode* node); + + // Calculates the value of the HistoryNode when we have to make a decision. + // Does this by calculating the value of each possible child node and then + // setting the value of the current node equal to the maximum (as we can just + // choose the best child). + double HandleDecisionCase(HistoryNode* node); + + // Calculates the value of the HistoryNode when the node is a terminal node. + // Conveniently, the game tells us the value of every terminal node, so we + // have nothing to do. + double HandleTerminalCase(const HistoryNode& node) const; + + Player best_responder_; + + // Used to store a specific policy if not passed in from the caller. + TabularPolicy tabular_policy_container_; + + // The actual policy that we are computing a best response to. + const Policy* policy_; + + HistoryTree tree_; + int num_players_; + + // The probability tolerance for truncating value estimation. + float prob_cut_threshold_; + + // The tolerance in terms of action values deciding if a maxent BR is + // requested. + float action_value_tolerance_; + + // Maps infoset strings (from the State::InformationState method) to + // the HistoryNodes that represent all histories with + // the same information state, along with the counter-factual probability of + // doing so. If the information state is a chance node, the probability comes + // from the State::ChanceOutcomes method. If the information state is a + // decision node for best_responder, the probability is one, following the + // definition of counter-factual probability. Finally, if the information + // state is a decision node for a player other than best_responder, the + // probabilities come from their policy (i.e. policy_). + absl::flat_hash_map>> + infosets_; + + // Caches all best responses calculated so far (for each infostate). + std::unordered_map best_response_policy_; + + // Caches all best responses calculated so far (for each infostate) in case of + // biased deterministic best-response. + std::unordered_map best_response_actions_; + + // Caches all values calculated so far (for each history). + std::unordered_map value_cache_; + std::unique_ptr root_; + + // Keep a cache of an empty policy to avoid recomputing it. + std::unique_ptr dummy_policy_; +}; + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_BEST_RESPONSE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/best_response_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/best_response_test.cc new file mode 100644 index 0000000..7ac0c1c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/best_response_test.cc @@ -0,0 +1,594 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/best_response.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/algorithms/minimax.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/efg_game/efg_game.h" +#include "open_spiel/games/efg_game/efg_game_data.h" +#include "open_spiel/games/goofspiel/goofspiel.h" +#include "open_spiel/games/kuhn_poker/kuhn_poker.h" +#include "open_spiel/games/leduc_poker/leduc_poker.h" +#include "open_spiel/games/liars_dice/liars_dice.h" +#include "open_spiel/games/tic_tac_toe/tic_tac_toe.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +using InfostatesAndActions = std::vector>; + + +// Correct values come from the existing Python implementation in +// open_spiel/python/algorithms/exploitability.py. +std::vector> +GetKuhnUniformBestResponseValuesPid0() { + std::vector> history_and_probs = { + {"2", 1.5}, + {"2, 1, 1, 1", 2.0}, + {"2, 1, 1, 0", 1.0}, + {"2, 1, 0, 1, 0", -1.0}, + {"2, 1, 0, 1", 2.0}, + {"2, 1, 0, 0", 1.0}, + {"2, 0, 1, 1", 2.0}, + {"2, 1, 0", 1.5}, + {"2, 0, 0, 0", 1.0}, + {"2, 0, 0, 1, 1", 2.0}, + {"2, 0, 0", 1.5}, + {"2, 1, 1", 1.5}, + {"2, 0, 1, 0", 1.0}, + {"2, 1, 0, 1, 1", 2.0}, + {"2, 0", 1.5}, + {"2, 1", 1.5}, + {"2, 0, 0, 1", 2.0}, + {"2, 0, 1", 1.5}, + {"2, 0, 0, 1, 0", -1.0}, + {"1, 0", 1.5}, + {"0", -0.5}, + {"1, 2", -0.5}, + {"0, 2, 0", -1.0}, + {"", 0.5}, + {"0, 1", -0.5}, + {"0, 2", -0.5}, + {"1, 2, 0, 0", -1.0}, + {"0, 1, 0", -1.0}, + {"1", 0.5}, + {"0, 2, 1", -0.5}, + {"1, 2, 0, 1", -2.0}, + {"0, 1, 1", -0.5}, + {"1, 2, 0, 1, 1", -2.0}, + {"0, 2, 1, 1", -2.0}, + {"1, 2, 1", -0.5}, + {"0, 1, 0, 1", -1.0}, + {"1, 0, 1, 0", 1.0}, + {"0, 2, 0, 0", -1.0}, + {"1, 2, 0", -1.5}, + {"0, 2, 1, 0", 1.0}, + {"0, 1, 0, 0", -1.0}, + {"1, 0, 1, 1", 2.0}, + {"1, 2, 1, 0", 1.0}, + {"1, 0, 0, 1, 0", -1.0}, + {"0, 1, 1, 0", 1.0}, + {"1, 0, 0", 1.5}, + {"1, 2, 0, 1, 0", -1.0}, + {"0, 1, 0, 1, 0", -1.0}, + {"1, 0, 0, 1, 1", 2.0}, + {"1, 2, 1, 1", -2.0}, + {"0, 1, 1, 1", -2.0}, + {"1, 0, 1", 1.5}, + {"0, 1, 0, 1, 1", -2.0}, + {"0, 2, 0, 1", -1.0}, + {"1, 0, 0, 0", 1.0}, + {"0, 2, 0, 1, 1", -2.0}, + {"1, 0, 0, 1", 2.0}, + {"0, 2, 0, 1, 0", -1.0}}; + return history_and_probs; +} + +std::vector> +GetKuhnUniformBestResponseValuesPid1() { + std::vector> history_and_probs = { + {"", 0.416666666667}, + {"0", 1.75}, + {"0, 1", 1.75}, + {"0, 1, 1", 2.0}, + {"0, 1, 1, 1", 2.0}, + {"0, 1, 1, 0", -1.0}, + {"0, 1, 0", 1.5}, + {"0, 1, 0, 1", 1.5}, + {"0, 1, 0, 1, 1", 2.0}, + {"0, 1, 0, 1, 0", 1.0}, + {"0, 1, 0, 0", 1.0}, + {"0, 2", 1.75}, + {"0, 2, 1", 2.0}, + {"0, 2, 1, 1", 2.0}, + {"0, 2, 1, 0", -1.0}, + {"0, 2, 0", 1.5}, + {"0, 2, 0, 1", 1.5}, + {"0, 2, 0, 1, 1", 2.0}, + {"0, 2, 0, 1, 0", 1.0}, + {"0, 2, 0, 0", 1.0}, + {"1", 0.5}, + {"1, 0", -0.75}, + {"1, 0, 1", -1.0}, + {"1, 0, 1, 1", -2.0}, + {"1, 0, 1, 0", -1.0}, + {"1, 0, 0", -0.5}, + {"1, 0, 0, 1", -0.5}, + {"1, 0, 0, 1, 1", -2.0}, + {"1, 0, 0, 1, 0", 1.0}, + {"1, 0, 0, 0", -1.0}, + {"1, 2", 1.75}, + {"1, 2, 1", 2.0}, + {"1, 2, 1, 1", 2.0}, + {"1, 2, 1, 0", -1.0}, + {"1, 2, 0", 1.5}, + {"1, 2, 0, 1", 1.5}, + {"1, 2, 0, 1, 1", 2.0}, + {"1, 2, 0, 1, 0", 1.0}, + {"1, 2, 0, 0", 1.0}, + {"2", -1.0}, + {"2, 0", -0.75}, + {"2, 0, 1", -1.0}, + {"2, 0, 1, 1", -2.0}, + {"2, 0, 1, 0", -1.0}, + {"2, 0, 0", -0.5}, + {"2, 0, 0, 1", -0.5}, + {"2, 0, 0, 1, 1", -2.0}, + {"2, 0, 0, 1, 0", 1.0}, + {"2, 0, 0, 0", -1.0}, + {"2, 1", -1.25}, + {"2, 1, 1", -2.0}, + {"2, 1, 1, 1", -2.0}, + {"2, 1, 1, 0", -1.0}, + {"2, 1, 0", -0.5}, + {"2, 1, 0, 1", -0.5}, + {"2, 1, 0, 1, 1", -2.0}, + {"2, 1, 0, 1, 0", 1.0}, + {"2, 1, 0, 0", -1.0}}; + return history_and_probs; +} + +std::vector> +GetKuhnOptimalBestResponseValuesPid0() { + std::vector> history_and_probs = { + {"", -0.05555555555555558}, + {"1, 2, 0, 1", -1.0}, + {"1, 2, 1", -2.0}, + {"0, 2, 0, 0", -1.0}, + {"0, 1, 1, 0", 1.0}, + {"2, 1, 1, 0", 1.0}, + {"2, 0, 0, 1", 2.0}, + {"1, 2, 0, 0", -1.0}, + {"2, 0, 1", 1.0}, + {"0, 1, 1, 1", -2.0}, + {"2, 0, 0, 0", 1.0}, + {"2, 0, 0", 1.3333333333333333}, + {"1, 0", 0.3333333333333333}, + {"1, 0, 1, 1", 2.0}, + {"1, 0, 0, 1, 0", -1.0}, + {"1, 2, 1, 0", 1.0}, + {"2, 0, 1, 0", 1.0}, + {"0, 1", -1.0}, + {"0, 2", -1.0}, + {"1, 0, 0, 1, 1", 2.0}, + {"1, 0, 1, 0", 1.0}, + {"2, 0, 1, 1", 2.0}, + {"1, 2, 1, 1", -2.0}, + {"2, 1", 1.0}, + {"2, 1, 1, 1", 2.0}, + {"2, 0, 0, 1, 0", -1.0}, + {"1, 2, 0", -1.0}, + {"0, 2, 1, 1", -2.0}, + {"1, 0, 0, 0", 1.0}, + {"0, 2, 1", -2.0}, + {"2, 1, 0, 1", 2.0}, + {"1, 2, 0, 1, 1", -2.0}, + {"1, 2", -1.0}, + {"0, 1, 0, 1", -1.0}, + {"0, 2, 0", -1.0}, + {"0, 2, 1, 0", 1.0}, + {"1, 0, 0, 1", -1.0}, + {"1, 2, 0, 1, 0", -1.0}, + {"2, 1, 0, 0", 1.0}, + {"0, 1, 0, 0", -1.0}, + {"2, 1, 0, 1, 1", 2.0}, + {"2, 0", 1.3333333333333333}, + {"1, 0, 1", 1.0}, + {"0, 2, 0, 1, 0", -1.0}, + {"2, 0, 0, 1, 1", 2.0}, + {"0, 1, 0, 1, 0", -1.0}, + {"0, 1, 1", 0.0}, + {"2, 1, 1", 1.3333333333333333}, + {"2, 1, 0, 1, 0", -1.0}, + {"2", 1.1666666666666665}, + {"1", -0.33333333333333337}, + {"0", -1.0}, + {"0, 1, 0", -1.0}, + {"1, 0, 0", 0.3333333333333333}, + {"0, 2, 0, 1, 1", -2.0}, + {"0, 1, 0, 1, 1", -2.0}, + {"2, 1, 0", 1.0}, + {"0, 2, 0, 1", -1.0}}; + return history_and_probs; +} + +std::vector> +GetKuhnOptimalBestResponseValuesPid1() { + std::vector> history_and_probs = { + {"", 0.0555555555556}, + {"0", 0.9}, + {"0, 1", 0.6}, + {"0, 1, 1", -1.0}, + {"0, 1, 1, 1", 2.0}, + {"0, 1, 1, 0", -1.0}, + {"0, 1, 0", 1.0}, + {"0, 1, 0, 1", 1.0}, + {"0, 1, 0, 1, 1", 2.0}, + {"0, 1, 0, 1, 0", 1.0}, + {"0, 1, 0, 0", 1.0}, + {"0, 2", 1.2}, + {"0, 2, 1", 2.0}, + {"0, 2, 1, 1", 2.0}, + {"0, 2, 1, 0", -1.0}, + {"0, 2, 0", 1.0}, + {"0, 2, 0, 1", 1.0}, + {"0, 2, 0, 1, 1", 2.0}, + {"0, 2, 0, 1, 0", 1.0}, + {"0, 2, 0, 0", 1.0}, + {"1", 0.266666666667}, + {"1, 0", -1.0}, + {"1, 0, 1", -1.0}, + {"1, 0, 1, 1", -2.0}, + {"1, 0, 1, 0", -1.0}, + {"1, 0, 0", -1.0}, + {"1, 0, 0, 1", -0.6}, + {"1, 0, 0, 1, 1", -2.0}, + {"1, 0, 0, 1, 0", 1.0}, + {"1, 0, 0, 0", -1.0}, + {"1, 2", 1.53333333333}, + {"1, 2, 1", 2.0}, + {"1, 2, 1, 1", 2.0}, + {"1, 2, 1, 0", -1.0}, + {"1, 2, 0", 1.53333333333}, + {"1, 2, 0, 1", 1.53333333333}, + {"1, 2, 0, 1, 1", 2.0}, + {"1, 2, 0, 1, 0", 1.0}, + {"1, 2, 0, 0", 1.0}, + {"2", -1.0}, + {"2, 0", -1.0}, + {"2, 0, 1", -1.0}, + {"2, 0, 1, 1", -2.0}, + {"2, 0, 1, 0", -1.0}, + {"2, 0, 0", -1.0}, + {"2, 0, 0, 1", -2.0}, + {"2, 0, 0, 1, 1", -2.0}, + {"2, 0, 0, 1, 0", 1.0}, + {"2, 0, 0, 0", -1.0}, + {"2, 1", -1.0}, + {"2, 1, 1", -1.0}, + {"2, 1, 1, 1", -2.0}, + {"2, 1, 1, 0", -1.0}, + {"2, 1, 0", -1.0}, + {"2, 1, 0, 1", -2.0}, + {"2, 1, 0, 1, 1", -2.0}, + {"2, 1, 0, 1, 0", 1.0}, + {"2, 1, 0, 0", -1.0}}; + return history_and_probs; +} + +// The "GetKuhnEdIterNPolicy" functions return the policy that is dumped out by +// the exploitability_descent_test when running exploitability descent for N +// iterations. They are included here as a regression test, +// as the C++ best response code has been unable to replicate the existing +// results due to erroneously included state. This is fixed as of cl/238531924. +TabularPolicy GetKuhnEdIter1Policy() { + return TabularPolicy({{"0", {{0, 0.5}, {1, 0.5}}}, + {"0b", {{0, 0.5}, {1, 0.5}}}, + {"0p", {{0, 0.5}, {1, 0.5}}}, + {"0pb", {{0, 0.5}, {1, 0.5}}}, + {"1", {{0, 0.5}, {1, 0.5}}}, + {"1b", {{0, 0.5}, {1, 0.5}}}, + {"1p", {{0, 0.5}, {1, 0.5}}}, + {"1pb", {{0, 0.5}, {1, 0.5}}}, + {"2", {{0, 0.5}, {1, 0.5}}}, + {"2b", {{0, 0.5}, {1, 0.5}}}, + {"2p", {{0, 0.5}, {1, 0.5}}}, + {"2pb", {{0, 0.5}, {1, 0.5}}}}); +} + +TabularPolicy GetKuhnEdIter4Policy() { + return TabularPolicy({{"0", {{0, 0.567034158868}, {1, 0.432965841132}}}, + {"0b", {{0, 0.602000197743}, {1, 0.397999802257}}}, + {"0p", {{0, 0.520821285373}, {1, 0.479178714627}}}, + {"0pb", {{0, 0.621126761233}, {1, 0.378873238767}}}, + {"1", {{0, 0.505160629764}, {1, 0.494839370236}}}, + {"1b", {{0, 0.360357968472}, {1, 0.639642031528}}}, + {"1p", {{0, 0.520821285373}, {1, 0.479178714627}}}, + {"1pb", {{0, 0.378873238767}, {1, 0.621126761233}}}, + {"2", {{0, 0.419580194883}, {1, 0.580419805117}}}, + {"2b", {{0, 0.202838286881}, {1, 0.797161713119}}}, + {"2p", {{0, 0.5}, {1, 0.5}}}, + {"2pb", {{0, 0.202838286881}, {1, 0.797161713119}}}}); +} + +void CheckBestResponsesAgaintGoldenResponses( + const InfostatesAndActions& golden_actions, + std::unordered_map& best_responses) { + SPIEL_CHECK_EQ(best_responses.size(), golden_actions.size()); + for (const auto& infostate_and_best_response : golden_actions) { + const std::string& infostate = infostate_and_best_response.first; + Action action = infostate_and_best_response.second; + auto it = best_responses.find(infostate); + if (it == best_responses.end()) + SpielFatalError(absl::StrCat("Infostate ", infostate, + " not found in best_responses.")); + if (it->second != action) { + SpielFatalError(absl::StrCat( + "Wrong best response at infostate ", infostate, "; expected ", action, + " but received ", best_responses[infostate])); + } + } +} + +void CheckBestResponseAgainstGoldenPolicy( + const Game& game, Player best_responder, const TabularPolicy& policy, + const InfostatesAndActions& golden_actions) { + TabularBestResponse best_response(game, best_responder, &policy); + best_response.Value(*game.NewInitialState()); + std::unordered_map best_responses = + best_response.GetBestResponseActions(); + CheckBestResponsesAgaintGoldenResponses(golden_actions, best_responses); +} + +InfostatesAndActions GetKuhnUniformBestResponsePid0() { + return InfostatesAndActions( + {{"0", 1}, {"0pb", 0}, {"1", 1}, {"1pb", 1}, {"2", 0}, {"2pb", 1}}); +} + +InfostatesAndActions GetKuhnUniformBestResponsePid1() { + return InfostatesAndActions( + {{"0b", 0}, {"0p", 1}, {"1b", 1}, {"1p", 1}, {"2b", 1}, {"2p", 1}}); +} + +// The best response values are taken from the existing Python implementation in +// open_spiel/algorithms/exploitability.py. +void KuhnPokerUniformBestResponsePid0() { + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = GetUniformPolicy(*game); + CheckBestResponseAgainstGoldenPolicy(*game, /*best_responder=*/Player{0}, + policy, + GetKuhnUniformBestResponsePid0()); +} + +// The best response values are taken from the existing Python implementation in +// open_spiel/algorithms/exploitability.py. +void KuhnPokerUniformBestResponsePid1() { + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = GetUniformPolicy(*game); + CheckBestResponseAgainstGoldenPolicy(*game, /*best_responder=*/Player{1}, + policy, + GetKuhnUniformBestResponsePid1()); +} + +// The following are regression tests. They should produce the same result, but +// didn't previously due to a caching bug. +InfostatesAndActions GetExploitabilityDescentBestResponses() { + return InfostatesAndActions( + {{"0b", 0}, {"0p", 0}, {"1b", 1}, {"1p", 1}, {"2b", 1}, {"2p", 1}}); +} + +void KuhnPokerExploitabilityDescentIteration4BestResponsePid0() { + std::shared_ptr game = LoadGame("kuhn_poker"); + CheckBestResponseAgainstGoldenPolicy(*game, /*best_responder=*/Player{1}, + GetKuhnEdIter4Policy(), + GetExploitabilityDescentBestResponses()); +} + +void KuhnPokerUniformBestResponseAfterSwitchingPolicies() { + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = GetKuhnEdIter4Policy(); + TabularBestResponse response(*game, Player{1}, &policy); + + // Check that it's good + InfostatesAndActions ed_golden_actions = + GetExploitabilityDescentBestResponses(); + std::unordered_map best_responses = + response.GetBestResponseActions(); + CheckBestResponsesAgaintGoldenResponses(ed_golden_actions, best_responses); + + // Swap policies, and check again. + policy = GetUniformPolicy(*game); + response.SetPolicy(&policy); + + // Check that this equals + InfostatesAndActions actual_best_responses = GetKuhnUniformBestResponsePid1(); + best_responses = response.GetBestResponseActions(); + CheckBestResponsesAgaintGoldenResponses(actual_best_responses, + best_responses); +} + +// The best response values are taken from the existing Python implementation in +// open_spiel/algorithms/exploitability.py. +void KuhnPokerOptimalBestResponsePid0() { + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = kuhn_poker::GetOptimalPolicy(/*alpha=*/0.2); + InfostatesAndActions actual_best_responses = { + {"0", 0}, {"0pb", 0}, {"1", 0}, {"1pb", 0}, {"2", 0}, {"2pb", 1}}; + CheckBestResponseAgainstGoldenPolicy(*game, /*best_responder=*/Player{0}, + policy, actual_best_responses); +} + +// The best response values are taken from the existing Python implementation in +// open_spiel/algorithms/exploitability.py. +void KuhnPokerOptimalBestResponsePid1() { + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = kuhn_poker::GetOptimalPolicy(/*alpha=*/0.2); + InfostatesAndActions actual_best_responses = { + {"0b", 0}, {"0p", 0}, {"1p", 0}, {"1b", 0}, {"2p", 1}, {"2b", 1}}; + CheckBestResponseAgainstGoldenPolicy(*game, /*best_responder=*/Player{1}, + policy, actual_best_responses); +} + +void KuhnPokerFirstActionBestResponsePid0() { + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = GetFirstActionPolicy(*game); + InfostatesAndActions actual_best_responses = { + {"0pb", 0}, {"1", 1}, {"2", 0}, {"0", 1}, {"1pb", 0}, {"2pb", 0}}; + CheckBestResponseAgainstGoldenPolicy(*game, /*best_responder=*/Player{0}, + policy, actual_best_responses); +} + +void KuhnPokerFirstActionBestResponsePid1() { + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = GetFirstActionPolicy(*game); + InfostatesAndActions actual_best_responses = { + {"1p", 1}, {"2p", 0}, {"0p", 1}, {"1b", 0}, {"2b", 0}, {"0b", 0}}; + CheckBestResponseAgainstGoldenPolicy(*game, /*best_responder=*/Player{1}, + policy, actual_best_responses); +} + +void KuhnPokerExploitabilityDescentMinimalSimulationPid0() { + std::shared_ptr game = LoadGame("kuhn_poker"); + auto best_responder = Player{1}; + + // We create a best responder with one policy... + TabularPolicy kuhn_ed_iter1_policy = GetKuhnEdIter1Policy(); + TabularBestResponse best_response(*game, best_responder, + &kuhn_ed_iter1_policy); + + // Calculate all the best responses... + best_response.Value(*game->NewInitialState()); + + // And then set a new policy. This *shouldn't* change the result- it should + // produce the same result as in the test above which does this calculation + // with best_response initialized with the GetKuhnEdIter4Policy, but due to + // improperly resetting the caches, that was not the case previously. + TabularPolicy kuhn_ed_iter4_policy = GetKuhnEdIter4Policy(); + best_response.SetPolicy(&kuhn_ed_iter4_policy); + best_response.Value(*game->NewInitialState()); + auto best_responses = best_response.GetBestResponseActions(); + auto actual_best_responses = GetExploitabilityDescentBestResponses(); + SPIEL_CHECK_EQ(best_responses.size(), actual_best_responses.size()); + for (const auto& infostate_and_action : actual_best_responses) { + const std::string& infostate = infostate_and_action.first; + Action action = infostate_and_action.second; + auto it = best_responses.find(infostate); + if (it == best_responses.end()) + SpielFatalError(absl::StrCat("Infostate ", infostate, + " not found in best_responses.")); + if (it->second != action) { + SpielFatalError(absl::StrCat( + "Wrong best response at infostate ", infostate, "; expected ", action, + " but received ", best_responses[infostate])); + } + } +} + +void CheckBestResponseValuesAgainstGoldenValues( + const Game& game, Player best_responder, const TabularPolicy& policy, + const std::vector>& golden_values) { + TabularBestResponse best_response(game, best_responder, &policy); + for (const auto& history_and_value : golden_values) { + const std::string& history = history_and_value.first; + if (!Near(best_response.Value(history), history_and_value.second)) { + SpielFatalError(absl::StrCat("Value calculated for history '", history, + "' is equal to ", + best_response.Value(history), " but ", + history_and_value.second, " was expected.")); + } + } +} + +void KuhnPokerUniformValueTestPid0() { + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = GetUniformPolicy(*game); + std::vector> histories_and_values = + GetKuhnUniformBestResponseValuesPid0(); + CheckBestResponseValuesAgainstGoldenValues( + *game, /*best_responder=*/Player{0}, policy, histories_and_values); +} + +void KuhnPokerUniformValueTestPid1() { + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = GetUniformPolicy(*game); + std::vector> histories_and_values = + GetKuhnUniformBestResponseValuesPid1(); + CheckBestResponseValuesAgainstGoldenValues( + *game, /*best_responder=*/Player{1}, policy, histories_and_values); +} + +void KuhnPokerEFGUniformValueTestPid1() { + std::shared_ptr game = efg_game::LoadEFGGame( + efg_game::GetKuhnPokerEFGData()); + TabularPolicy policy = GetUniformPolicy(*game); + std::vector> histories_and_values = + GetKuhnUniformBestResponseValuesPid1(); + TabularBestResponse best_response(*game, 1, &policy); + const double value = best_response.Value(*game->NewInitialState()); + SPIEL_CHECK_TRUE(Near(value, histories_and_values[0].second)); +} + +void KuhnPokerOptimalValueTestPid0() { + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = kuhn_poker::GetOptimalPolicy(/*alpha=*/0.2); + std::vector> histories_and_values = + GetKuhnOptimalBestResponseValuesPid0(); + CheckBestResponseValuesAgainstGoldenValues( + *game, /*best_responder=*/Player{0}, policy, histories_and_values); +} + +void KuhnPokerOptimalValueTestPid1() { + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = kuhn_poker::GetOptimalPolicy(/*alpha=*/0.2); + std::vector> histories_and_values = + GetKuhnOptimalBestResponseValuesPid1(); + CheckBestResponseValuesAgainstGoldenValues( + *game, /*best_responder=*/Player{1}, policy, histories_and_values); +} + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::algorithms::KuhnPokerUniformBestResponsePid0(); + open_spiel::algorithms::KuhnPokerUniformBestResponsePid1(); + open_spiel::algorithms::KuhnPokerOptimalBestResponsePid0(); + open_spiel::algorithms::KuhnPokerOptimalBestResponsePid1(); + open_spiel::algorithms:: + KuhnPokerExploitabilityDescentIteration4BestResponsePid0(); + open_spiel::algorithms::KuhnPokerFirstActionBestResponsePid0(); + open_spiel::algorithms::KuhnPokerFirstActionBestResponsePid1(); + open_spiel::algorithms::KuhnPokerExploitabilityDescentMinimalSimulationPid0(); + open_spiel::algorithms::KuhnPokerUniformValueTestPid0(); + open_spiel::algorithms::KuhnPokerUniformValueTestPid1(); + open_spiel::algorithms::KuhnPokerEFGUniformValueTestPid1(); + open_spiel::algorithms::KuhnPokerOptimalValueTestPid0(); + open_spiel::algorithms::KuhnPokerOptimalValueTestPid1(); + + // Verifies that the code automatically generates the best response actions + // after swapping policies. + open_spiel::algorithms::KuhnPokerUniformBestResponseAfterSwitchingPolicies(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr.cc new file mode 100644 index 0000000..9131ae0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr.cc @@ -0,0 +1,781 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/cfr.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/strings/charconv.h" +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/serialization.h" + +namespace open_spiel { +namespace algorithms { +namespace { +inline constexpr double kRandomInitialRegretsMagnitude = 0.001; +} // namespace + +constexpr const int kSerializationVersion = 1; + +// All CFR solvers support serialization. CFRSolver, CFRPlusSolver and +// CFRBRSolver all rely on CFRSolverBase::Serialize() to provide the +// functionality (each subtype only implements the SerializeThisType() method +// which returns the name of the serialized subclass). +// ExternalSamplingMCCFRSolver and OutcomeSamplingMCCFRSolver implement +// their own versions of Serialize(). +// +// During the serialization we store multiple different sections which are +// described below: +// - [SolverType] is the name of the serialized class. +// - [SolverSpecificState] is a section to which different solvers write state +// that is specific to their own type. Note that serialization and +// deserialization of this section is left entirely to each of the +// subtypes. +// - [SolverValuesTable] is the section to which CFRInfoStateValuesTable is +// written. +// +// During deserialization all solvers rely on the +// PartiallyDeserializeCFRSolver() method which deserializes common properties +// but leaves deserialization of [SolverSpecificState] section to the caller. +// +// Note that there are some specifics that need to be taken into account when +// reading the de/serialization code: +// - Each solver's Serialize() method has two parameters which are not exposed +// in Python, i.e. int double_precision and std::string delimiter. +// - We try to avoid copying/moving of CFRInfoStateValuesTable where possible +// due to its potentially large size (mainly due to memory concerns). For +// that reason the PartiallyDeserializeCFRSolver() method also returns a +// string_view of the table and leaves deserialization to the callers +// which then in turn call the efficient +// DeserializeCFRInfoStateValuesTable(). + +CFRAveragePolicy::CFRAveragePolicy(const CFRInfoStateValuesTable& info_states, + std::shared_ptr default_policy) + : info_states_(info_states), default_policy_(default_policy) {} + +ActionsAndProbs CFRAveragePolicy::GetStatePolicy( + const State& state, Player player) const { + auto entry = info_states_.find(state.InformationStateString(player)); + if (entry == info_states_.end()) { + if (default_policy_) { + return default_policy_->GetStatePolicy(state, player); + } else { + // This should never get called. + SpielFatalError("No policy found, and no default policy."); + } + } + ActionsAndProbs actions_and_probs; + GetStatePolicyFromInformationStateValues(entry->second, &actions_and_probs); + return actions_and_probs; +} + +ActionsAndProbs CFRAveragePolicy::GetStatePolicy( + const std::string& info_state) const { + auto entry = info_states_.find(info_state); + if (entry == info_states_.end()) { + if (default_policy_) { + return default_policy_->GetStatePolicy(info_state); + } else { + // This should never get called. + SpielFatalError("No policy found, and no default policy."); + } + } + ActionsAndProbs actions_and_probs; + GetStatePolicyFromInformationStateValues(entry->second, &actions_and_probs); + return actions_and_probs; +} + +void CFRAveragePolicy::GetStatePolicyFromInformationStateValues( + const CFRInfoStateValues& is_vals, + ActionsAndProbs* actions_and_probs) const { + double sum_prob = 0.0; + for (int aidx = 0; aidx < is_vals.num_actions(); ++aidx) { + sum_prob += is_vals.cumulative_policy[aidx]; + } + + if (sum_prob == 0.0) { + // Return a uniform policy at this node + double prob = 1. / is_vals.num_actions(); + for (Action action : is_vals.legal_actions) { + actions_and_probs->push_back({action, prob}); + } + return; + } + + for (int aidx = 0; aidx < is_vals.num_actions(); ++aidx) { + actions_and_probs->push_back({is_vals.legal_actions[aidx], + is_vals.cumulative_policy[aidx] / sum_prob}); + } +} + +TabularPolicy CFRAveragePolicy::AsTabular() const { + TabularPolicy policy; + for (const auto& infoset_and_entry : info_states_) { + ActionsAndProbs state_policy; + GetStatePolicyFromInformationStateValues(infoset_and_entry.second, + &state_policy); + policy.SetStatePolicy(infoset_and_entry.first, state_policy); + } + return policy; +} + +CFRCurrentPolicy::CFRCurrentPolicy(const CFRInfoStateValuesTable& info_states, + std::shared_ptr default_policy) + : info_states_(info_states), default_policy_(default_policy) {} + +ActionsAndProbs CFRCurrentPolicy::GetStatePolicy( + const State& state, Player player) const { + auto entry = info_states_.find(state.InformationStateString(player)); + if (entry == info_states_.end()) { + if (default_policy_) { + return default_policy_->GetStatePolicy(state, player); + } else { + SpielFatalError("No policy found, and no default policy."); + } + } + ActionsAndProbs actions_and_probs; + return GetStatePolicyFromInformationStateValues(entry->second, + actions_and_probs); +} + +ActionsAndProbs CFRCurrentPolicy::GetStatePolicy( + const std::string& info_state) const { + auto entry = info_states_.find(info_state); + if (entry == info_states_.end()) { + if (default_policy_) { + return default_policy_->GetStatePolicy(info_state); + } else { + SpielFatalError("No policy found, and no default policy."); + } + } + ActionsAndProbs actions_and_probs; + GetStatePolicyFromInformationStateValues(entry->second, actions_and_probs); + return actions_and_probs; +} + +ActionsAndProbs CFRCurrentPolicy::GetStatePolicyFromInformationStateValues( + const CFRInfoStateValues& is_vals, + ActionsAndProbs& actions_and_probs) const { + for (int aidx = 0; aidx < is_vals.num_actions(); ++aidx) { + actions_and_probs.push_back( + {is_vals.legal_actions[aidx], is_vals.current_policy[aidx]}); + } + return actions_and_probs; +} + +TabularPolicy CFRCurrentPolicy::AsTabular() const { + TabularPolicy policy; + for (const auto& infoset_and_entry : info_states_) { + policy.SetStatePolicy(infoset_and_entry.first, + infoset_and_entry.second.GetCurrentPolicy()); + } + return policy; +} + +CFRSolverBase::CFRSolverBase(const Game& game, bool alternating_updates, + bool linear_averaging, bool regret_matching_plus, + bool random_initial_regrets, int seed) + : game_(game.shared_from_this()), + root_state_(game.NewInitialState()), + root_reach_probs_(game_->NumPlayers() + 1, 1.0), + regret_matching_plus_(regret_matching_plus), + alternating_updates_(alternating_updates), + linear_averaging_(linear_averaging), + random_initial_regrets_(random_initial_regrets), + chance_player_(game.NumPlayers()), + rng_(seed) { + if (game_->GetType().dynamics != GameType::Dynamics::kSequential) { + SpielFatalError( + "CFR requires sequential games. If you're trying to run it " + "on a simultaneous (or normal-form) game, please first transform it " + "using turn_based_simultaneous_game."); + } + InitializeInfostateNodes(*root_state_); +} + +CFRSolverBase::CFRSolverBase(std::shared_ptr game, + bool alternating_updates, bool linear_averaging, + bool regret_matching_plus, int iteration, + bool random_initial_regrets, int seed) + : game_(game), + iteration_(iteration), + root_state_(game->NewInitialState()), + root_reach_probs_(game_->NumPlayers() + 1, 1.0), + regret_matching_plus_(regret_matching_plus), + alternating_updates_(alternating_updates), + linear_averaging_(linear_averaging), + random_initial_regrets_(random_initial_regrets), + chance_player_(game->NumPlayers()), + rng_(seed) { + if (game_->GetType().dynamics != GameType::Dynamics::kSequential) { + SpielFatalError( + "CFR requires sequential games. If you're trying to run it " + "on a simultaneous (or normal-form) game, please first transform it " + "using turn_based_simultaneous_game."); + } +} + +void CFRSolverBase::InitializeInfostateNodes(const State& state) { + if (state.IsTerminal()) { + return; + } + if (state.IsChanceNode()) { + for (const auto& action_prob : state.ChanceOutcomes()) { + InitializeInfostateNodes(*state.Child(action_prob.first)); + } + return; + } + + int current_player = state.CurrentPlayer(); + std::string info_state = state.InformationStateString(current_player); + std::vector legal_actions = state.LegalActions(); + + if (random_initial_regrets_) { + CFRInfoStateValues is_vals(legal_actions, &rng_, + kRandomInitialRegretsMagnitude); + info_states_[info_state] = is_vals; + } else { + CFRInfoStateValues is_vals(legal_actions); + info_states_[info_state] = is_vals; + } + + for (const Action& action : legal_actions) { + InitializeInfostateNodes(*state.Child(action)); + } +} + +void CFRSolverBase::EvaluateAndUpdatePolicy() { + ++iteration_; + if (alternating_updates_) { + for (int player = 0; player < game_->NumPlayers(); player++) { + ComputeCounterFactualRegret(*root_state_, player, root_reach_probs_, + nullptr); + if (regret_matching_plus_) { + ApplyRegretMatchingPlusReset(); + } + ApplyRegretMatching(); + } + } else { + ComputeCounterFactualRegret(*root_state_, absl::nullopt, root_reach_probs_, + nullptr); + if (regret_matching_plus_) { + ApplyRegretMatchingPlusReset(); + } + ApplyRegretMatching(); + } +} + +std::string CFRSolverBase::Serialize(int double_precision, + std::string delimiter) const { + SPIEL_CHECK_GE(double_precision, -1); + std::string str = ""; + // Meta section + absl::StrAppend(&str, + "# Automatically generated by OpenSpiel " + "CFRSolverBase::Serialize\n"); + absl::StrAppend(&str, kSerializeMetaSectionHeader, "\n"); + absl::StrAppend(&str, "Version: ", kSerializationVersion, "\n"); + absl::StrAppend(&str, "\n"); + // Game section + absl::StrAppend(&str, kSerializeGameSectionHeader, "\n"); + absl::StrAppend(&str, game_->Serialize(), "\n"); + // Internal solver state section + absl::StrAppend(&str, kSerializeSolverTypeSectionHeader, "\n"); + absl::StrAppend(&str, SerializeThisType(), "\n"); + absl::StrAppend(&str, kSerializeSolverSpecificStateSectionHeader, "\n"); + absl::StrAppend(&str, iteration_, "\n"); + absl::StrAppend(&str, kSerializeSolverValuesTableSectionHeader, "\n"); + SerializeCFRInfoStateValuesTable(info_states_, &str, double_precision, + delimiter); + return str; +} + +static double CounterFactualReachProb( + const std::vector& reach_probabilities, const int player) { + double cfr_reach_prob = 1.0; + for (int i = 0; i < reach_probabilities.size(); i++) { + if (i != player) { + cfr_reach_prob *= reach_probabilities[i]; + } + } + return cfr_reach_prob; +} + +// Compute counterfactual regrets. Alternates recursively with +// ComputeCounterFactualRegretForActionProbs. +// +// Args: +// - state: The state to start the recursion. +// - alternating_player: Optionally only update this player. +// - reach_probabilities: The reach probabilities of this state for each +// player, ending with the chance player. +// +// Returns: +// The value of the state for each player (excluding the chance player). +std::vector CFRSolverBase::ComputeCounterFactualRegret( + const State& state, const absl::optional& alternating_player, + const std::vector& reach_probabilities, + const std::vector* policy_overrides) { + if (state.IsTerminal()) { + return state.Returns(); + } + if (state.IsChanceNode()) { + ActionsAndProbs actions_and_probs = state.ChanceOutcomes(); + std::vector dist(actions_and_probs.size(), 0); + std::vector outcomes(actions_and_probs.size(), 0); + for (int oidx = 0; oidx < actions_and_probs.size(); ++oidx) { + outcomes[oidx] = actions_and_probs[oidx].first; + dist[oidx] = actions_and_probs[oidx].second; + } + return ComputeCounterFactualRegretForActionProbs( + state, alternating_player, reach_probabilities, chance_player_, dist, + outcomes, nullptr, policy_overrides); + } + if (AllPlayersHaveZeroReachProb(reach_probabilities)) { + // The value returned is not used: if the reach probability for all players + // is 0, then the last taken action has probability 0, so the + // returned value is not impacting the parent node value. + return std::vector(game_->NumPlayers(), 0.0); + } + + int current_player = state.CurrentPlayer(); + std::string info_state = state.InformationStateString(); + std::vector legal_actions = state.LegalActions(current_player); + + // Load current policy. + std::vector info_state_policy; + if (policy_overrides && policy_overrides->at(current_player)) { + GetInfoStatePolicyFromPolicy(&info_state_policy, legal_actions, + policy_overrides->at(current_player), + info_state); + } else { + info_state_policy = GetPolicy(info_state, legal_actions); + } + + std::vector child_utilities; + child_utilities.reserve(legal_actions.size()); + const std::vector state_value = + ComputeCounterFactualRegretForActionProbs( + state, alternating_player, reach_probabilities, current_player, + info_state_policy, legal_actions, &child_utilities, policy_overrides); + + // Perform regret and average strategy updates. + if (!alternating_player || *alternating_player == current_player) { + CFRInfoStateValues is_vals = info_states_[info_state]; + SPIEL_CHECK_FALSE(is_vals.empty()); + + const double self_reach_prob = reach_probabilities[current_player]; + const double cfr_reach_prob = + CounterFactualReachProb(reach_probabilities, current_player); + + for (int aidx = 0; aidx < legal_actions.size(); ++aidx) { + // Update regrets. + double cfr_regret = cfr_reach_prob * + (child_utilities[aidx] - state_value[current_player]); + + is_vals.cumulative_regrets[aidx] += cfr_regret; + + // Update average policy. + if (linear_averaging_) { + is_vals.cumulative_policy[aidx] += + iteration_ * self_reach_prob * info_state_policy[aidx]; + } else { + is_vals.cumulative_policy[aidx] += + self_reach_prob * info_state_policy[aidx]; + } + } + + info_states_[info_state] = is_vals; + } + + return state_value; +} + +void CFRSolverBase::GetInfoStatePolicyFromPolicy( + std::vector* info_state_policy, + const std::vector& legal_actions, const Policy* policy, + const std::string& info_state) const { + ActionsAndProbs actions_and_probs = policy->GetStatePolicy(info_state); + info_state_policy->reserve(legal_actions.size()); + + // The policy may have extra ones not at this infostate + for (Action action : legal_actions) { + const auto& iter = + std::find_if(actions_and_probs.begin(), actions_and_probs.end(), + [action](const std::pair& ap) { + return ap.first == action; + }); + info_state_policy->push_back(iter->second); + } + + SPIEL_CHECK_EQ(info_state_policy->size(), legal_actions.size()); +} + +// Compute counterfactual regrets given certain action probabilities. +// Alternates recursively with ComputeCounterFactualRegret. +// +// Args: +// - state: The state to start the recursion. +// - alternating_player: Optionally only update this player. +// - reach_probabilities: The reach probabilities of this state. +// - current_player: Either a player or chance_player_. +// - action_probs: The action probabilities to use for this state. +// - child_values_out: optional output parameter which is filled with the child +// utilities for each action, for current_player. +// Returns: +// The value of the state for each player (excluding the chance player). +std::vector CFRSolverBase::ComputeCounterFactualRegretForActionProbs( + const State& state, const absl::optional& alternating_player, + const std::vector& reach_probabilities, const int current_player, + const std::vector& info_state_policy, + const std::vector& legal_actions, + std::vector* child_values_out, + const std::vector* policy_overrides) { + std::vector state_value(game_->NumPlayers()); + + for (int aidx = 0; aidx < legal_actions.size(); ++aidx) { + const Action action = legal_actions[aidx]; + const double prob = info_state_policy[aidx]; + const std::unique_ptr new_state = state.Child(action); + std::vector new_reach_probabilities(reach_probabilities); + new_reach_probabilities[current_player] *= prob; + std::vector child_value = + ComputeCounterFactualRegret(*new_state, alternating_player, + new_reach_probabilities, policy_overrides); + for (int i = 0; i < state_value.size(); ++i) { + state_value[i] += prob * child_value[i]; + } + if (child_values_out != nullptr) { + child_values_out->push_back(child_value[current_player]); + } + } + return state_value; +} + +bool CFRSolverBase::AllPlayersHaveZeroReachProb( + const std::vector& reach_probabilities) const { + for (int i = 0; i < game_->NumPlayers(); i++) { + if (reach_probabilities[i] != 0.0) { + return false; + } + } + return true; +} + +std::vector CFRSolverBase::GetPolicy( + const std::string& info_state, const std::vector& legal_actions) { + auto entry = info_states_.find(info_state); + if (entry == info_states_.end()) { + info_states_[info_state] = CFRInfoStateValues(legal_actions); + entry = info_states_.find(info_state); + } + + SPIEL_CHECK_FALSE(entry == info_states_.end()); + SPIEL_CHECK_FALSE(entry->second.empty()); + SPIEL_CHECK_FALSE(entry->second.current_policy.empty()); + return entry->second.current_policy; +} + +std::string CFRInfoStateValues::ToString() const { + std::string str = ""; + absl::StrAppend(&str, "Legal actions: ", absl::StrJoin(legal_actions, ", "), + "\n"); + absl::StrAppend(&str, "Current policy: ", absl::StrJoin(current_policy, ", "), + "\n"); + absl::StrAppend(&str, "Cumulative regrets: ", + absl::StrJoin(cumulative_regrets, ", "), "\n"); + absl::StrAppend(&str, + "Cumulative policy: ", absl::StrJoin(cumulative_policy, ", "), + "\n"); + return str; +} + +std::string CFRInfoStateValues::Serialize(int double_precision) const { + std::string str = ""; + std::string cumulative_regrets_str, cumulative_policy_str, current_policy_str; + if (double_precision == -1) { + cumulative_regrets_str = + absl::StrJoin(cumulative_regrets, ",", HexDoubleFormatter()); + cumulative_policy_str = + absl::StrJoin(cumulative_policy, ",", HexDoubleFormatter()); + current_policy_str = + absl::StrJoin(current_policy, ",", HexDoubleFormatter()); + } else { + cumulative_regrets_str = absl::StrJoin( + cumulative_regrets, ",", SimpleDoubleFormatter(double_precision)); + cumulative_policy_str = absl::StrJoin( + cumulative_policy, ",", SimpleDoubleFormatter(double_precision)); + current_policy_str = absl::StrJoin(current_policy, ",", + SimpleDoubleFormatter(double_precision)); + } + absl::StrAppend(&str, absl::StrJoin(legal_actions, ","), ";"); + absl::StrAppend(&str, cumulative_regrets_str, ";"); + absl::StrAppend(&str, cumulative_policy_str, ";"); + absl::StrAppend(&str, current_policy_str); + return str; +} + +CFRInfoStateValues DeserializeCFRInfoStateValues(absl::string_view serialized) { + CFRInfoStateValues res = CFRInfoStateValues(); + if (serialized.empty()) return res; + + std::vector> str_values; + str_values.reserve(4); + for (absl::string_view sv : absl::StrSplit(serialized, ';')) { + str_values.push_back(absl::StrSplit(sv, ',')); + } + + int num_elements = str_values.at(0).size(); + res.legal_actions.reserve(num_elements); + res.cumulative_regrets.reserve(num_elements); + res.cumulative_policy.reserve(num_elements); + res.current_policy.reserve(num_elements); + + // Insert the actual values + int la_value; + double cumu_regret_value, cumu_policy_value, curr_policy_value; + for (int i = 0; i < num_elements; i++) { + SPIEL_CHECK_TRUE(absl::SimpleAtoi(str_values.at(0).at(i), &la_value)); + absl::from_chars( + str_values.at(1).at(i).data(), + str_values.at(1).at(i).data() + str_values.at(1).at(i).size(), + cumu_regret_value); + absl::from_chars( + str_values.at(2).at(i).data(), + str_values.at(2).at(i).data() + str_values.at(2).at(i).size(), + cumu_policy_value); + absl::from_chars( + str_values.at(3).at(i).data(), + str_values.at(3).at(i).data() + str_values.at(3).at(i).size(), + curr_policy_value); + + res.legal_actions.push_back(la_value); + res.cumulative_regrets.push_back(cumu_regret_value); + res.cumulative_policy.push_back(cumu_policy_value); + res.current_policy.push_back(curr_policy_value); + } + return res; +} + +ActionsAndProbs CFRInfoStateValues::GetCurrentPolicy() const { + ActionsAndProbs actions_and_probs; + actions_and_probs.reserve(legal_actions.size()); + for (int i = 0; i < legal_actions.size(); ++i) { + actions_and_probs.push_back({legal_actions[i], current_policy[i]}); + } + return actions_and_probs; +} + +void CFRInfoStateValues::ApplyRegretMatchingAllPositive(double delta) { + SPIEL_CHECK_GT(delta, 0); + double sum = 0; + for (int aidx = 0; aidx < num_actions(); ++aidx) { + sum += std::max(cumulative_regrets[aidx], delta); + } + for (int aidx = 0; aidx < num_actions(); ++aidx) { + current_policy[aidx] = std::max(cumulative_regrets[aidx], delta) / sum; + } +} + +void CFRInfoStateValues::ApplyRegretMatching() { + double sum_positive_regrets = 0.0; + + for (int aidx = 0; aidx < num_actions(); ++aidx) { + if (cumulative_regrets[aidx] > 0) { + sum_positive_regrets += cumulative_regrets[aidx]; + } + } + + for (int aidx = 0; aidx < num_actions(); ++aidx) { + if (sum_positive_regrets > 0) { + current_policy[aidx] = + cumulative_regrets[aidx] > 0 + ? cumulative_regrets[aidx] / sum_positive_regrets + : 0; + } else { + current_policy[aidx] = 1.0 / legal_actions.size(); + } + } +} + +int CFRInfoStateValues::SampleActionIndex(double epsilon, double z) { + double sum = 0; + for (int aidx = 0; aidx < current_policy.size(); ++aidx) { + double prob = epsilon * 1.0 / current_policy.size() + + (1.0 - epsilon) * current_policy[aidx]; + if (z >= sum && z < sum + prob) { + return aidx; + } + sum += prob; + } + SpielFatalError(absl::StrCat("SampleActionIndex: sum of probs is ", sum)); +} + +int CFRInfoStateValues::GetActionIndex(Action a) { + auto it = std::find(legal_actions.begin(), legal_actions.end(), a); + if (it != legal_actions.end()) { + return std::distance(legal_actions.begin(), it); + } + SpielFatalError( + absl::StrCat("GetActionIndex: the action was not found: ", a)); +} + +void SerializeCFRInfoStateValuesTable( + const CFRInfoStateValuesTable& info_states, std::string* result, + int double_precision, std::string delimiter) { + if (delimiter == "," || delimiter == ";") { + // The two delimiters are used for de/serialization of CFRInfoStateValues + SpielFatalError( + "Please select a different delimiter," + "invalid values are \",\" and \";\"."); + } + if (info_states.empty()) return; + + for (auto const& [info_state, values] : info_states) { + if (info_state.find(delimiter) != std::string::npos) { + SpielFatalError(absl::StrCat( + "Info state contains delimiter \"", delimiter, + "\", please fix the info state or select a different delimiter.")); + } + absl::StrAppend(result, info_state, delimiter, + values.Serialize(double_precision), delimiter); + } + // Remove the trailing delimiter + result->erase(result->length() - delimiter.length()); +} + +void DeserializeCFRInfoStateValuesTable(absl::string_view serialized, + CFRInfoStateValuesTable* result, + std::string delimiter) { + if (serialized.empty()) return; + + std::vector splits = absl::StrSplit(serialized, delimiter); + for (int i = 0; i < splits.size(); i += 2) { + result->insert({std::string(splits.at(i)), + DeserializeCFRInfoStateValues(splits.at(i + 1))}); + } +} + +// Resets negative cumulative regrets to 0. +// +// Regret Matching+ corresponds to the following cumulative regrets update: +// cumulative_regrets = max(cumulative_regrets + regrets, 0) +// +// This must be done at the level of the information set, and thus cannot be +// done during the tree traversal (which is done on histories). It is thus +// performed as an additional step. +void CFRSolverBase::ApplyRegretMatchingPlusReset() { + for (auto& entry : info_states_) { + for (int aidx = 0; aidx < entry.second.num_actions(); ++aidx) { + if (entry.second.cumulative_regrets[aidx] < 0) { + entry.second.cumulative_regrets[aidx] = 0; + } + } + } +} + +void CFRSolverBase::ApplyRegretMatching() { + for (auto& entry : info_states_) { + entry.second.ApplyRegretMatching(); + } +} + +std::unique_ptr DeserializeCFRSolver(const std::string& serialized, + std::string delimiter) { + auto partial = PartiallyDeserializeCFRSolver(serialized); + SPIEL_CHECK_EQ(partial.solver_type, "CFRSolver"); + auto solver = std::make_unique( + partial.game, std::stoi(partial.solver_specific_state)); + DeserializeCFRInfoStateValuesTable(partial.serialized_cfr_values_table, + &solver->InfoStateValuesTable(), + delimiter); + return solver; +} + +std::unique_ptr DeserializeCFRPlusSolver( + const std::string& serialized, std::string delimiter) { + auto partial = PartiallyDeserializeCFRSolver(serialized); + SPIEL_CHECK_EQ(partial.solver_type, "CFRPlusSolver"); + auto solver = std::make_unique( + partial.game, std::stoi(partial.solver_specific_state)); + DeserializeCFRInfoStateValuesTable(partial.serialized_cfr_values_table, + &solver->InfoStateValuesTable(), + delimiter); + return solver; +} + +PartiallyDeserializedCFRSolver PartiallyDeserializeCFRSolver( + const std::string& serialized) { + // We don't copy the CFR values table section due to potential large size. + enum Section { + kInvalid = -1, + kMeta = 0, + kGame = 1, + kSolverType = 2, + kSolverSpecificState = 3 + }; + std::array section_strings = {"", "", "", ""}; + Section current_section = kInvalid; + + std::vector lines = absl::StrSplit(serialized, '\n'); + for (int i = 0; i < lines.size(); i++) { + if (lines[i].length() == 0 || lines[i].at(0) == '#') { + // Skip comments and blank lines + } else if (lines[i] == kSerializeMetaSectionHeader) { + SPIEL_CHECK_EQ(current_section, kInvalid); + current_section = kMeta; + } else if (lines[i] == kSerializeGameSectionHeader) { + SPIEL_CHECK_EQ(current_section, kMeta); + current_section = kGame; + } else if (lines[i] == kSerializeSolverTypeSectionHeader) { + SPIEL_CHECK_EQ(current_section, kGame); + current_section = kSolverType; + } else if (lines[i] == kSerializeSolverSpecificStateSectionHeader) { + SPIEL_CHECK_EQ(current_section, kSolverType); + current_section = kSolverSpecificState; + } else if (lines[i] == kSerializeSolverValuesTableSectionHeader) { + SPIEL_CHECK_EQ(current_section, kSolverSpecificState); + break; + } else { + SPIEL_CHECK_NE(current_section, kInvalid); + if (current_section == kSolverSpecificState) { + absl::StrAppend(§ion_strings[current_section], lines[i], "\n"); + } else { + absl::StrAppend(§ion_strings[current_section], lines[i]); + } + } + } + + // We currently just ignore the meta section. + // In order to avod copying the CFR values table data we rather split it again + // and obtain a single string_view that can be deserialized later using the + // DeserializeCFRInfoStateValuesTable method. + std::pair other_and_values_table_data = + absl::StrSplit( + serialized, + absl::StrCat(kSerializeSolverValuesTableSectionHeader, "\n")); + return PartiallyDeserializedCFRSolver(DeserializeGame(section_strings[kGame]), + section_strings[kSolverType], + section_strings[kSolverSpecificState], + other_and_values_table_data.second); +} + +} // namespace algorithms +} // namespace open_spiel + diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr.h new file mode 100644 index 0000000..b22e89f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr.h @@ -0,0 +1,383 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_CFR_H_ +#define OPEN_SPIEL_ALGORITHMS_CFR_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +constexpr const char* kSerializeMetaSectionHeader = "[Meta]"; +constexpr const char* kSerializeGameSectionHeader = "[Game]"; +constexpr const char* kSerializeSolverTypeSectionHeader = "[SolverType]"; +constexpr const char* kSerializeSolverSpecificStateSectionHeader = + "[SolverSpecificState]"; +constexpr const char* kSerializeSolverValuesTableSectionHeader = + "[SolverValuesTable]"; + +// A basic structure to store the relevant quantities. +struct CFRInfoStateValues { + CFRInfoStateValues() {} + CFRInfoStateValues(std::vector la, double init_value) + : legal_actions(la), + cumulative_regrets(la.size(), init_value), + cumulative_policy(la.size(), init_value), + current_policy(la.size(), 1.0 / la.size()) {} + CFRInfoStateValues(std::vector la) : CFRInfoStateValues(la, 0) {} + + // For randomized initial regrets. + CFRInfoStateValues(std::vector la, + std::mt19937* rng, + double magnitude_scale) : CFRInfoStateValues(la, 0) { + for (int i = 0; i < cumulative_policy.size(); ++i) { + cumulative_regrets[i] = magnitude_scale * + absl::Uniform(*rng, 0.0, 1.0); + } + ApplyRegretMatching(); + } + + // Fills current_policy according to the standard application of the + // regret-matching algorithm in the CFR papers. + void ApplyRegretMatching(); + + // Apply regret matching but over max(R^{T,+}(s,a), delta) rather than just + // R^{T,+}(s,a). This is mostly unused but sometimes useful for debugging + // convergence. + void ApplyRegretMatchingAllPositive(double delta); + + bool empty() const { return legal_actions.empty(); } + int num_actions() const { return legal_actions.size(); } + + // A string representation of the information state values. + std::string ToString() const; + + // A less verbose string representation used for serialization purposes. The + // double_precision parameter indicates the number of decimal places in + // floating point numbers formatting, value -1 formats doubles with lossless, + // non-portable bitwise representation hex strings. + std::string Serialize(int double_precision) const; + + // Samples from current policy using randomly generated z, adding epsilon + // exploration (mixing in uniform). + int SampleActionIndex(double epsilon, double z); + + // Extracts the current policy. Note: assumes it is filled. + ActionsAndProbs GetCurrentPolicy() const; + + // Return index of the action within the vector of legal_actions, + // or exit with an error. + int GetActionIndex(Action a); + + std::vector legal_actions; + std::vector cumulative_regrets; + std::vector cumulative_policy; + std::vector current_policy; +}; + +CFRInfoStateValues DeserializeCFRInfoStateValues(absl::string_view serialized); + +// A type for tables holding CFR values. +using CFRInfoStateValuesTable = + std::unordered_map; + +// The result parameter is passed by pointer in order to avoid copying/moving +// the string once the table is fully serialized (CFRInfoStateValuesTable +// instances could be very large). See comments above +// CFRInfoStateValues::Serialize(double_precision) for notes about the +// double_precision parameter. +void SerializeCFRInfoStateValuesTable( + const CFRInfoStateValuesTable& info_states, std::string* result, + int double_precision, std::string delimiter = "<~>"); + +// Similarly as above, the result parameter is passed by pointer in order to +// avoid copying/moving the table once fully deserialized. +void DeserializeCFRInfoStateValuesTable(absl::string_view serialized, + CFRInfoStateValuesTable* result, + std::string delimiter = "<~>"); + +// A policy that extracts the average policy from the CFR table values, which +// can be passed to tabular exploitability. +class CFRAveragePolicy : public Policy { + public: + // Returns the average policy from the CFR values. + // If a state/info state is not found, return the default policy for the + // state/info state (or an empty policy if default_policy is nullptr). + // If an info state has zero cumulative regret for all actions, + // return a uniform policy. + CFRAveragePolicy(const CFRInfoStateValuesTable& info_states, + std::shared_ptr default_policy); + ActionsAndProbs GetStatePolicy(const State& state) const override { + return GetStatePolicy(state, state.CurrentPlayer()); + }; + ActionsAndProbs GetStatePolicy(const State& state, + Player player) const override; + ActionsAndProbs GetStatePolicy(const std::string& info_state) const override; + TabularPolicy AsTabular() const; + + private: + const CFRInfoStateValuesTable& info_states_; + UniformPolicy uniform_policy_; + std::shared_ptr default_policy_; + void GetStatePolicyFromInformationStateValues( + const CFRInfoStateValues& is_vals, + ActionsAndProbs* actions_and_probs) const; +}; + +// A policy that extracts the current policy from the CFR table values. +class CFRCurrentPolicy : public Policy { + public: + // Returns the current policy from the CFR values. If a default policy is + // passed in, then it means that it is used if the lookup fails (use nullptr + // to not use a default policy). + CFRCurrentPolicy(const CFRInfoStateValuesTable& info_states, + std::shared_ptr default_policy); + ActionsAndProbs GetStatePolicy(const State& state) const override { + return GetStatePolicy(state, state.CurrentPlayer()); + }; + ActionsAndProbs GetStatePolicy(const State& state, + Player player) const override; + ActionsAndProbs GetStatePolicy(const std::string& info_state) const override; + TabularPolicy AsTabular() const; + + private: + const CFRInfoStateValuesTable& info_states_; + std::shared_ptr default_policy_; + ActionsAndProbs GetStatePolicyFromInformationStateValues( + const CFRInfoStateValues& is_vals, + ActionsAndProbs& actions_and_probs) const; +}; + +// Base class supporting different flavours of the Counterfactual Regret +// Minimization (CFR) algorithm. +// +// see https://webdocs.cs.ualberta.ca/~bowling/papers/07nips-regretpoker.pdf +// and http://modelai.gettysburg.edu/2013/cfr/cfr.pdf +// +// The implementation is similar to the Python version: +// open_spiel/python/algorithms/cfr.py +// +// The algorithm computes an approximate Nash policy for 2 player zero-sum +// games. +// +// CFR can be view as a policy iteration algorithm. Importantly, the policies +// themselves do not converge to a Nash policy, but their average does. +// +class CFRSolverBase { + public: + CFRSolverBase(const Game& game, bool alternating_updates, + bool linear_averaging, bool regret_matching_plus, + bool random_initial_regrets = false, int seed = 0); + // The constructor below is used for deserialization purposes. + CFRSolverBase(std::shared_ptr game, bool alternating_updates, + bool linear_averaging, bool regret_matching_plus, int iteration, + bool random_initial_regrets = false, int seed = 0); + virtual ~CFRSolverBase() = default; + + // Performs one step of the CFR algorithm. + virtual void EvaluateAndUpdatePolicy(); + + // Computes the average policy, containing the policy for all players. + // The returned policy instance should only be used during the lifetime of + // the CFRSolver object. + std::shared_ptr AveragePolicy() const { + return std::make_shared(info_states_, nullptr); + } + // Note: This can be quite large. + TabularPolicy TabularAveragePolicy() const { + CFRAveragePolicy policy(info_states_, nullptr); + return policy.AsTabular(); + } + + // Computes the current policy, containing the policy for all players. + // The returned policy instance should only be used during the lifetime of + // the CFRSolver object. + std::shared_ptr CurrentPolicy() const { + return std::make_shared(info_states_, nullptr); + } + + TabularPolicy TabularCurrentPolicy() const { + CFRCurrentPolicy policy(info_states_, nullptr); + return policy.AsTabular(); + } + + CFRInfoStateValuesTable& InfoStateValuesTable() { return info_states_; } + + // See comments above CFRInfoStateValues::Serialize(double_precision) for + // notes about the double_precision parameter. + std::string Serialize(int double_precision = -1, + std::string delimiter = "<~>") const; + + protected: + std::shared_ptr game_; + + // Iteration to support linear_policy. + int iteration_ = 0; + CFRInfoStateValuesTable info_states_; + const std::unique_ptr root_state_; + const std::vector root_reach_probs_; + + // Compute the counterfactual regret and update the average policy for the + // specified player. + // The optional `policy_overrides` can be used to specify for each player a + // policy to use instead of the current policy. `policy_overrides=nullptr` + // will disable this feature. Otherwise it should be a [num_players] vector, + // and if `policy_overrides[p] != nullptr` it will be used instead of the + // current policy. This feature exists to support CFR-BR. + std::vector ComputeCounterFactualRegret( + const State& state, const absl::optional& alternating_player, + const std::vector& reach_probabilities, + const std::vector* policy_overrides); + + // Update the current policy for all information states. + void ApplyRegretMatching(); + + // This method should return the type of itself so that it can be checked + // in different deserialization methods; one method for each subtype. + // For an example take a look at the CFRSolver::SerializeThisType() and + // DeserializeCFRSolver() methods. + virtual std::string SerializeThisType() const { + SpielFatalError("Serialization of the base class is not supported."); + } + + private: + std::vector ComputeCounterFactualRegretForActionProbs( + const State& state, const absl::optional& alternating_player, + const std::vector& reach_probabilities, const int current_player, + const std::vector& info_state_policy, + const std::vector& legal_actions, + std::vector* child_values_out, + const std::vector* policy_overrides); + + void InitializeInfostateNodes(const State& state); + + // Fills `info_state_policy` to be a [num_actions] vector of the probabilities + // found in `policy` at the given `info_state`. + void GetInfoStatePolicyFromPolicy(std::vector* info_state_policy, + const std::vector& legal_actions, + const Policy* policy, + const std::string& info_state) const; + + // Get the policy at this information state. The probabilities are ordered in + // the same order as legal_actions. + std::vector GetPolicy(const std::string& info_state, + const std::vector& legal_actions); + + void ApplyRegretMatchingPlusReset(); + + std::vector RegretMatching(const std::string& info_state, + const std::vector& legal_actions); + + bool AllPlayersHaveZeroReachProb( + const std::vector& reach_probabilities) const; + + const bool regret_matching_plus_; + const bool alternating_updates_; + const bool linear_averaging_; + const bool random_initial_regrets_; + + const int chance_player_; + + // CFR generally does not use this random number generator. However, this is + // used for random initial regrets (and could be useful for some helper + // methods for debugging). + std::mt19937 rng_; +}; + +// Standard CFR implementation. +// +// See https://poker.cs.ualberta.ca/publications/NIPS07-cfr.pdf +class CFRSolver : public CFRSolverBase { + public: + explicit CFRSolver(const Game& game) + : CFRSolverBase(game, + /*alternating_updates=*/true, + /*linear_averaging=*/false, + /*regret_matching_plus=*/false) {} + // The constructor below is used for deserialization purposes. + CFRSolver(std::shared_ptr game, int iteration) + : CFRSolverBase(game, + /*alternating_updates=*/true, + /*linear_averaging=*/false, + /*regret_matching_plus=*/false, iteration) {} + + protected: + std::string SerializeThisType() const { return "CFRSolver"; } +}; + +std::unique_ptr DeserializeCFRSolver(const std::string& serialized, + std::string delimiter = "<~>"); + +// CFR+ implementation. +// +// See https://poker.cs.ualberta.ca/publications/2015-ijcai-cfrplus.pdf +// +// CFR+ is CFR with the following modifications: +// - use Regret Matching+ instead of Regret Matching. +// - use alternating updates instead of simultaneous updates. +// - use linear averaging. +class CFRPlusSolver : public CFRSolverBase { + public: + CFRPlusSolver(const Game& game) + : CFRSolverBase(game, + /*alternating_updates=*/true, + /*linear_averaging=*/true, + /*regret_matching_plus=*/true) {} + // The constructor below is used for deserialization purposes. + CFRPlusSolver(std::shared_ptr game, int iteration) + : CFRSolverBase(game, + /*alternating_updates=*/true, + /*linear_averaging=*/false, + /*regret_matching_plus=*/false, iteration) {} + + protected: + std::string SerializeThisType() const { return "CFRPlusSolver"; } +}; + +std::unique_ptr DeserializeCFRPlusSolver( + const std::string& serialized, std::string delimiter = "<~>"); + +struct PartiallyDeserializedCFRSolver { + PartiallyDeserializedCFRSolver(std::shared_ptr game, + std::string solver_type, + std::string solver_specific_state, + absl::string_view serialized_cfr_values_table) + : game(game), + solver_type(solver_type), + solver_specific_state(solver_specific_state), + serialized_cfr_values_table(serialized_cfr_values_table) {} + std::shared_ptr game; + std::string solver_type; + std::string solver_specific_state; + absl::string_view serialized_cfr_values_table; +}; + +PartiallyDeserializedCFRSolver PartiallyDeserializeCFRSolver( + const std::string& serialized); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_CFR_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr_br.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr_br.cc new file mode 100644 index 0000000..c622cab --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr_br.cc @@ -0,0 +1,97 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/cfr_br.h" + +#include "open_spiel/algorithms/cfr.h" +#include "open_spiel/policy.h" + +namespace open_spiel { +namespace algorithms { + +CFRBRSolver::CFRBRSolver(const Game& game) + : CFRSolverBase(game, + /*alternating_updates=*/false, + /*linear_averaging=*/false, + /*regret_matching_plus=*/false), + policy_overrides_(game.NumPlayers(), nullptr), + uniform_policy_(UniformPolicy()) { + InitializeBestResponseComputers(); +} + +CFRBRSolver::CFRBRSolver(std::shared_ptr game, int iteration) + : CFRSolverBase(game, + /*alternating_updates=*/false, + /*linear_averaging=*/false, + /*regret_matching_plus=*/false, iteration), + policy_overrides_(game->NumPlayers(), nullptr), + uniform_policy_(UniformPolicy()) { + InitializeBestResponseComputers(); +} + +void CFRBRSolver::InitializeBestResponseComputers() { + for (int p = 0; p < game_->NumPlayers(); ++p) { + best_response_computers_.push_back(std::unique_ptr( + new TabularBestResponse(*game_, p, &uniform_policy_))); + } +} + +void CFRBRSolver::EvaluateAndUpdatePolicy() { + ++iteration_; + + std::vector br_policies(game_->NumPlayers()); + std::shared_ptr current_policy = CurrentPolicy(); + + // Set all the player's policies first. + for (int p = 0; p < game_->NumPlayers(); ++p) { + // Need to have an exception here because the CFR policy objects are + // wrappers around information that is contained in a table, and those do + // not exist until there's been a tree traversal to compute regrets below. + if (iteration_ > 1) { + best_response_computers_[p]->SetPolicy(current_policy.get()); + } + } + + // Now, for each player compute a best response + for (int p = 0; p < game_->NumPlayers(); ++p) { + br_policies[p] = best_response_computers_[p]->GetBestResponsePolicy(); + } + + for (int p = 0; p < game_->NumPlayers(); ++p) { + // Override every player except p. + for (int opp = 0; opp < game_->NumPlayers(); ++opp) { + policy_overrides_[opp] = (opp == p ? nullptr : &br_policies[opp]); + } + + // Then collect regret and update p's average strategy. + ComputeCounterFactualRegret(*root_state_, p, root_reach_probs_, + &policy_overrides_); + } + ApplyRegretMatching(); +} + +std::unique_ptr DeserializeCFRBRSolver( + const std::string& serialized, std::string delimiter) { + auto partial = PartiallyDeserializeCFRSolver(serialized); + SPIEL_CHECK_EQ(partial.solver_type, "CFRBRSolver"); + auto solver = std::make_unique( + partial.game, std::stoi(partial.solver_specific_state)); + DeserializeCFRInfoStateValuesTable(partial.serialized_cfr_values_table, + &solver->InfoStateValuesTable(), + delimiter); + return solver; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr_br.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr_br.h new file mode 100644 index 0000000..5ad97d4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr_br.h @@ -0,0 +1,60 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_CFR_BR_H_ +#define OPEN_SPIEL_ALGORITHMS_CFR_BR_H_ + +#include +#include + +#include "open_spiel/algorithms/best_response.h" +#include "open_spiel/algorithms/cfr.h" +#include "open_spiel/algorithms/tabular_exploitability.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" + +// An implementation of CFR-BR (Johanson et al., "Finding Optimal Abstract +// Strategies in Extensive-Form Games", 2012). In CFR-BR, at each iteration, +// each player minimizes regret against their worst-case opponent (a best +// response to its current policy). +namespace open_spiel { +namespace algorithms { + +class CFRBRSolver : public CFRSolverBase { + public: + explicit CFRBRSolver(const Game& game); + // The constructor below is used for deserialization purposes. + CFRBRSolver(std::shared_ptr game, int iteration); + + void EvaluateAndUpdatePolicy() override; + + protected: + std::string SerializeThisType() const { return "CFRBRSolver"; } + + private: + void InitializeBestResponseComputers(); + // Policies that are used instead of the current policy for some of the + // opponent players. + std::vector policy_overrides_; + UniformPolicy uniform_policy_; + std::vector> best_response_computers_; +}; + +std::unique_ptr DeserializeCFRBRSolver( + const std::string& serialized, std::string delimiter = "<~>"); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_CFR_BR_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr_br_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr_br_test.cc new file mode 100644 index 0000000..e663621 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr_br_test.cc @@ -0,0 +1,100 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/cfr_br.h" + +#include "open_spiel/algorithms/expected_returns.h" +#include "open_spiel/algorithms/tabular_exploitability.h" +#include "open_spiel/games/kuhn_poker/kuhn_poker.h" +#include "open_spiel/games/leduc_poker/leduc_poker.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +// Checks that the expected value of the policy is close to the Nash values. +// Assumes two-player zero-sum games. +void CheckNashValues(const Game& game, const Policy& policy, + double first_player_nash_value, double tolerance) { + const std::vector game_value = + ExpectedReturns(*game.NewInitialState(), policy, -1); + SPIEL_CHECK_EQ(2, game_value.size()); + SPIEL_CHECK_FLOAT_NEAR(game_value[0], first_player_nash_value, tolerance); + SPIEL_CHECK_FLOAT_NEAR(game_value[1], -first_player_nash_value, tolerance); +} + +void CFRBRTest_KuhnPoker() { + std::shared_ptr game = LoadGame("kuhn_poker"); + CFRBRSolver solver(*game); + for (int i = 0; i < 300; i++) { + solver.EvaluateAndUpdatePolicy(); + } + const std::shared_ptr average_policy = solver.AveragePolicy(); + // 1/18 is the Nash value. See https://en.wikipedia.org/wiki/Kuhn_poker + CheckNashValues(*game, *average_policy, -1.0 / 18, 0.001); + SPIEL_CHECK_LE(Exploitability(*game, *average_policy), 0.05); +} + +void CFRBRTest_LeducPoker() { + std::shared_ptr game = LoadGame("leduc_poker"); + CFRBRSolver solver(*game); + int num_iters = 100; + for (int i = 0; i < num_iters; i++) { + solver.EvaluateAndUpdatePolicy(); + } + const std::shared_ptr average_policy = solver.AveragePolicy(); + double nash_conv = NashConv(*game, *average_policy); + std::cout << "Iters " << num_iters << ", nash_conv = " << nash_conv + << std::endl; +} + +void CFRBRTest_CFRBRSolverSerialization() { + auto game = LoadGame("kuhn_poker"); + CFRBRSolver solver = CFRBRSolver(*game); + double exploitability0 = Exploitability(*game, *solver.AveragePolicy()); + + for (int i = 0; i < 50; i++) { + solver.EvaluateAndUpdatePolicy(); + } + double exploitability1 = Exploitability(*game, *solver.AveragePolicy()); + SPIEL_CHECK_GT(exploitability0, exploitability1); + + std::string serialized = solver.Serialize(); + std::unique_ptr deserialized_solver = + DeserializeCFRBRSolver(serialized); + SPIEL_CHECK_EQ(solver.InfoStateValuesTable().size(), + deserialized_solver->InfoStateValuesTable().size()); + double exploitability2 = + Exploitability(*game, *deserialized_solver->AveragePolicy()); + SPIEL_CHECK_FLOAT_NEAR(exploitability1, exploitability2, 1e-4); + + for (int i = 0; i < 50; i++) { + deserialized_solver->EvaluateAndUpdatePolicy(); + } + double exploitability3 = + Exploitability(*game, *deserialized_solver->AveragePolicy()); + SPIEL_CHECK_GT(exploitability2, exploitability3); +} + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +namespace algorithms = open_spiel::algorithms; + +int main(int argc, char** argv) { + algorithms::CFRBRTest_KuhnPoker(); + algorithms::CFRBRTest_LeducPoker(); + algorithms::CFRBRTest_CFRBRSolverSerialization(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr_test.cc new file mode 100644 index 0000000..0f8b542 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/cfr_test.cc @@ -0,0 +1,324 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/cfr.h" + +#include +#include + +#include "open_spiel/algorithms/expected_returns.h" +#include "open_spiel/algorithms/history_tree.h" +#include "open_spiel/algorithms/tabular_exploitability.h" +#include "open_spiel/game_transforms/turn_based_simultaneous_game.h" +#include "open_spiel/games/kuhn_poker/kuhn_poker.h" +#include "open_spiel/games/leduc_poker/leduc_poker.h" +#include "open_spiel/games/liars_dice/liars_dice.h" +#include "open_spiel/games/matching_pennies_3p/matching_pennies_3p.h" +#include "open_spiel/games/tic_tac_toe/tic_tac_toe.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +void CheckNashKuhnPoker(const Game& game, const Policy& policy) { + const std::vector game_value = + ExpectedReturns(*game.NewInitialState(), policy, -1); + + // 1/18 is the Nash value. See https://en.wikipedia.org/wiki/Kuhn_poker + constexpr float nash_value = 1.0 / 18.0; + constexpr float eps = 1e-3; + + SPIEL_CHECK_EQ(2, game_value.size()); + SPIEL_CHECK_FLOAT_NEAR((float)game_value[0], -nash_value, eps); + SPIEL_CHECK_FLOAT_NEAR((float)game_value[1], nash_value, eps); +} + +void CheckExploitabilityKuhnPoker(const Game& game, const Policy& policy) { + SPIEL_CHECK_LE(Exploitability(game, policy), 0.05); +} + +void CFRTest_KuhnPoker() { + std::shared_ptr game = LoadGame("kuhn_poker"); + CFRSolver solver(*game); + for (int i = 0; i < 300; i++) { + solver.EvaluateAndUpdatePolicy(); + } + const std::shared_ptr average_policy = solver.AveragePolicy(); + CheckNashKuhnPoker(*game, *average_policy); + CheckExploitabilityKuhnPoker(*game, *average_policy); +} + +void CFRTest_IIGoof4() { + // Random points order. + std::shared_ptr game = LoadGameAsTurnBased( + "goofspiel", {{"imp_info", GameParameter(true)}, + {"points_order", GameParameter(std::string("random"))}, + {"num_cards", GameParameter(4)}}); + + CFRSolver solver(*game); + for (int i = 0; i < 100; i++) { + solver.EvaluateAndUpdatePolicy(); + } + // Values checked with Marc's thesis implementation. + const std::shared_ptr average_policy = solver.AveragePolicy(); + SPIEL_CHECK_LE(Exploitability(*game, *average_policy), 0.1); + + // Fixed points order. + game = LoadGameAsTurnBased( + "goofspiel", {{"imp_info", GameParameter(true)}, + {"points_order", GameParameter(std::string("descending"))}, + {"num_cards", GameParameter(4)}}); + + CFRSolver solver2(*game); + for (int i = 0; i < 1000; i++) { + solver2.EvaluateAndUpdatePolicy(); + } + // Values checkes with Marc's thesis implementation. + const std::shared_ptr average_policy2 = solver2.AveragePolicy(); + SPIEL_CHECK_LE(Exploitability(*game, *average_policy2), 0.01); +} + +void CFRPlusTest_KuhnPoker() { + std::shared_ptr game = LoadGame("kuhn_poker"); + CFRPlusSolver solver(*game); + for (int i = 0; i < 200; i++) { + solver.EvaluateAndUpdatePolicy(); + } + const std::shared_ptr average_policy = solver.AveragePolicy(); + CheckNashKuhnPoker(*game, *average_policy); + CheckExploitabilityKuhnPoker(*game, *average_policy); +} + +void CFRTest_KuhnPokerRunsWithThreePlayers(bool linear_averaging, + bool regret_matching_plus, + bool alternating_updates) { + int num_players = 3; + std::shared_ptr game = + LoadGame("kuhn_poker", {{"players", GameParameter(num_players)}}); + CFRSolverBase solver(*game, alternating_updates, linear_averaging, + regret_matching_plus); + for (int i = 0; i < 10; i++) { + solver.EvaluateAndUpdatePolicy(); + } + std::shared_ptr average_policy = solver.AveragePolicy(); + // Value upper-bounds inspired by Fig 2 of (Srinivasan et al., Actor-Critic + // Policy Optimization in Partially Observable Multiagent Environments, 2018) + // https://arxiv.org/abs/1810.09026 + SPIEL_CHECK_LE(NashConv(*game, *average_policy), 1); +} + +// Tests the convergence of CFR in a specific game. Only computes nashconv +// if the upper bound is positive. +void CFRTest_GeneralMultiplePlayerTest(const std::string& game_name, + int num_players, int num_iterations, + double nashconv_upper_bound) { + std::shared_ptr game = + LoadGame(game_name, {{"players", GameParameter(num_players)}}); + CFRSolverBase solver(*game, + /*alternating_updates=*/true, + /*linear_averaging=*/false, + /*regret_matching_plus=*/false); + for (int i = 0; i < num_iterations; i++) { + solver.EvaluateAndUpdatePolicy(); + } + + if (nashconv_upper_bound > 0) { + std::shared_ptr average_policy = solver.AveragePolicy(); + SPIEL_CHECK_LE(NashConv(*game, *average_policy), nashconv_upper_bound); + } +} + +void CFRTest_OneShotGameTest(int iterations, std::string one_shot_game, + double nashconv_upper_bound) { + // Note: this is a 3-player general sum game with a unique uniform mixed + // strategy. However, CFR is not guaranteed to converge, and indeed fails to, + // just like was shown for fictitious play. + std::cout << one_shot_game << " convergence test." << std::endl; + std::shared_ptr game = LoadGameAsTurnBased(one_shot_game); + CFRSolverBase solver(*game, + /*alternating_updates=*/true, + /*linear_averaging=*/false, + /*regret_matching_plus=*/false); + double nash_conv = 0; + for (int i = 0; i < iterations; i++) { + solver.EvaluateAndUpdatePolicy(); + if (i % 10 == 0) { + std::shared_ptr average_policy = solver.AveragePolicy(); + nash_conv = NashConv(*game, *average_policy); + std::cout << "iter " << i << ", nashconv = " << nash_conv << std::endl; + + if (game->GetType().utility == GameType::Utility::kConstantSum || + game->GetType().utility == GameType::Utility::kZeroSum) { + double expl = Exploitability(*game, *average_policy); + SPIEL_CHECK_FLOAT_NEAR(expl, nash_conv / game->NumPlayers(), 1e-10); + } + } + } + SPIEL_CHECK_LE(nash_conv, nashconv_upper_bound); +} + +// Tests the convergence of CFR in a specific game. Only computes nashconv +// if the upper bound is positive. +void CFRTest_TicTacToe(int num_iterations, double nashconv_upper_bound) { + std::shared_ptr game = LoadGame("tic_tac_toe"); + CFRSolverBase solver(*game, + /*alternating_updates=*/true, + /*linear_averaging=*/false, + /*regret_matching_plus=*/false); + for (int i = 0; i < num_iterations; i++) { + solver.EvaluateAndUpdatePolicy(); + } + + if (nashconv_upper_bound > 0) { + std::shared_ptr average_policy = solver.AveragePolicy(); + SPIEL_CHECK_LE(NashConv(*game, *average_policy), nashconv_upper_bound); + } +} + +void CFRTest_InfoStateValuesTableSerialization() { + // Check empty + CFRInfoStateValuesTable info_state_values_table = {}; + std::string serialized0 = ""; + SerializeCFRInfoStateValuesTable(info_state_values_table, &serialized0, -1); + CFRInfoStateValuesTable deserialized0; + DeserializeCFRInfoStateValuesTable(serialized0, &deserialized0); + SPIEL_CHECK_TRUE(deserialized0.empty()); + + // Check non-empty + info_state_values_table = { + {"", CFRInfoStateValues({0}, 1.0)}, + {"0:0,0;0", CFRInfoStateValues({0, 1, 2}, 0.1)}, + {"<->\n<->", CFRInfoStateValues({0, 1, 2}, 0.1)}, + {"1:1,1;1", CFRInfoStateValues({0, 1, 2, 3}, 0.2)}}; + std::string serialized1 = ""; + SerializeCFRInfoStateValuesTable(info_state_values_table, &serialized1, -1); + CFRInfoStateValuesTable deserialized1; + DeserializeCFRInfoStateValuesTable(serialized1, &deserialized1); + + SPIEL_CHECK_EQ(info_state_values_table.size(), + info_state_values_table.size()); + for (const auto& [info_state, values] : info_state_values_table) { + for (int i = 0; i < values.legal_actions.size(); i++) { + SPIEL_CHECK_EQ(values.legal_actions.at(i), + deserialized1.at(info_state).legal_actions.at(i)); + SPIEL_CHECK_FLOAT_NEAR( + values.cumulative_regrets.at(i), + deserialized1.at(info_state).cumulative_regrets.at(i), 1e-15); + SPIEL_CHECK_FLOAT_NEAR( + values.cumulative_policy.at(i), + deserialized1.at(info_state).cumulative_policy.at(i), 1e-15); + SPIEL_CHECK_FLOAT_NEAR(values.current_policy.at(i), + deserialized1.at(info_state).current_policy.at(i), + 1e-15); + } + } +} + +void CFRTest_CFRSolverSerialization() { + auto game = LoadGame("kuhn_poker"); + CFRSolver solver = CFRSolver(*game); + double exploitability0 = Exploitability(*game, *solver.AveragePolicy()); + + for (int i = 0; i < 50; i++) { + solver.EvaluateAndUpdatePolicy(); + } + double exploitability1 = Exploitability(*game, *solver.AveragePolicy()); + SPIEL_CHECK_GT(exploitability0, exploitability1); + + std::string serialized = solver.Serialize(); + std::unique_ptr deserialized_solver = + DeserializeCFRSolver(serialized); + SPIEL_CHECK_EQ(solver.InfoStateValuesTable().size(), + deserialized_solver->InfoStateValuesTable().size()); + double exploitability2 = + Exploitability(*game, *deserialized_solver->AveragePolicy()); + SPIEL_CHECK_FLOAT_NEAR(exploitability1, exploitability2, 1e-15); + + for (int i = 0; i < 50; i++) { + deserialized_solver->EvaluateAndUpdatePolicy(); + } + double exploitability3 = + Exploitability(*game, *deserialized_solver->AveragePolicy()); + SPIEL_CHECK_GT(exploitability2, exploitability3); +} + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +namespace algorithms = open_spiel::algorithms; + +int main(int argc, char** argv) { + algorithms::CFRTest_KuhnPoker(); + algorithms::CFRTest_IIGoof4(); + algorithms::CFRPlusTest_KuhnPoker(); + algorithms::CFRTest_KuhnPokerRunsWithThreePlayers( + /*linear_averaging=*/false, + /*regret_matching_plus=*/false, + /*alternating_updates=*/false); + algorithms::CFRTest_KuhnPokerRunsWithThreePlayers( + /*linear_averaging=*/true, + /*regret_matching_plus=*/false, + /*alternating_updates=*/false); + algorithms::CFRTest_KuhnPokerRunsWithThreePlayers( + /*linear_averaging=*/true, + /*regret_matching_plus=*/true, + /*alternating_updates=*/false); + algorithms::CFRTest_KuhnPokerRunsWithThreePlayers( + /*linear_averaging=*/true, + /*regret_matching_plus=*/true, + /*alternating_updates=*/true); + + // Value upper bounds for Kuhn taken from Figure 2 of (Lanctot, Further + // Developments of Extensive-Form Replicator Dynamics using the Sequence-Form + // Representation, 2014). + algorithms::CFRTest_GeneralMultiplePlayerTest( + /*game_name=*/"kuhn_poker", /*num_players=*/3, /*num_iterations=*/10, + /*nashconv_upper_bound=*/1.0); + algorithms::CFRTest_GeneralMultiplePlayerTest( + /*game_name=*/"kuhn_poker", /*num_players=*/4, /*num_iterations=*/10, + /*nashconv_upper_bound=*/1.0); + + // Value upper-bounds for Leduc taken from Figure 17 of (Lanctot et al., + // A Unified Game-Theoretic Approach to Multiagent Reinforcement Learning, + // 2017). See https://arxiv.org/abs/1711.00832. For Liar's Dice, number's were + // obtained using Marc's thesis code. + algorithms::CFRTest_GeneralMultiplePlayerTest( + /*game_name=*/"leduc_poker", /*num_players=*/2, /*num_iterations=*/10, + /*nashconv_upper_bound=*/2.0); + + // Disabled as they make the test too long for a unit test. + // algorithms::CFRTest_GeneralMultiplePlayerTest( + // /*game_name=*/"liars_dice", /*num_players=*/2, /*num_iterations=*/10, + // /*nashconv_upper_bound=*/1.0); + // algorithms::CFRTest_GeneralMultiplePlayerTest( + // /*game_name=*/"leduc_poker", /*num_players=*/3, /*num_iterations=*/2, + // /*nashconv_upper_bound=*/10.0); + + // Test a few one-shot games. + algorithms::CFRTest_OneShotGameTest(1000, "matrix_rps", 1e-6); + algorithms::CFRTest_OneShotGameTest(1000, "matrix_shapleys_game", 1.0); + algorithms::CFRTest_OneShotGameTest(1000, "matching_pennies_3p", 3.0); + + // Try out a perfect information game that has an InformationState that obeys + // perfect recall. Test is disabled because it adds 20 seconds. Let's revisit + // when we add a version that can handle safe imperfect recall information + // states. + // algorithms::CFRTest_TicTacToe(10, 2.0); + + algorithms::CFRTest_InfoStateValuesTableSerialization(); + algorithms::CFRTest_CFRSolverSerialization(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dev_builder.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dev_builder.cc new file mode 100644 index 0000000..8294673 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dev_builder.cc @@ -0,0 +1,127 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/corr_dev_builder.h" + +#include "open_spiel/policy.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { + +CorrDevBuilder::CorrDevBuilder(int seed) : rng_(seed), total_weight_(0.0) {} + +void CorrDevBuilder::AddDeterminsticJointPolicy(const TabularPolicy& policy, + double weight) { + std::string key = policy.ToStringSorted(); + auto iter = policy_weights_.find(key); + if (iter == policy_weights_.end()) { + policy_weights_[key] = weight; + policy_map_[key] = policy; + } else { + iter->second += weight; + } + total_weight_ += weight; +} + +void CorrDevBuilder::AddSampledJointPolicy(const TabularPolicy& policy, + int num_samples, double weight) { + for (int sample = 0; sample < num_samples; ++sample) { + TabularPolicy sampled_policy; + for (const auto& iter : policy.PolicyTable()) { + Action sampled_action = SampleAction(iter.second, rng_).first; + sampled_policy.SetStatePolicy( + iter.first, ToDeterministicPolicy(iter.second, sampled_action)); + } + AddDeterminsticJointPolicy(sampled_policy, 1.0 / num_samples * weight); + } +} + +void CorrDevBuilder::AddMixedJointPolicy(const TabularPolicy& policy, + double weight) { + std::vector action_indices(policy.PolicyTable().size(), 0); + bool done = false; + double total_prob = 0.0; + + while (!done) { + // Construct the joint policy and add it. + TabularPolicy deterministic_policy; + double prob = 1.0; + int info_state_idx = 0; + for (const auto& iter : policy.PolicyTable()) { + Action action = iter.second[action_indices[info_state_idx]].first; + prob *= GetProb(iter.second, action); + if (prob == 0.0) { + break; + } + deterministic_policy.SetStatePolicy( + iter.first, ToDeterministicPolicy(iter.second, action)); + info_state_idx++; + } + + SPIEL_CHECK_PROB(prob); + if (prob > 0.0) { + AddDeterminsticJointPolicy(deterministic_policy, prob * weight); + total_prob += prob; + } + + // Now, try to move to the next joint policy. + info_state_idx = 0; + done = true; + for (const auto& iter : policy.PolicyTable()) { + if (++action_indices[info_state_idx] < iter.second.size()) { + done = false; + break; + } else { + action_indices[info_state_idx] = 0; + } + info_state_idx++; + } + } + + SPIEL_CHECK_TRUE(Near(total_prob, 1.0, 1e-10)); +} + +CorrelationDevice CorrDevBuilder::GetCorrelationDevice() const { + SPIEL_CHECK_GT(total_weight_, 0); + CorrelationDevice corr_dev; + double sum_weight = 0; + for (const auto& key_and_policy : policy_map_) { + double weight = policy_weights_.at(key_and_policy.first); + sum_weight += weight; + corr_dev.push_back({weight / total_weight_, key_and_policy.second}); + } + SPIEL_CHECK_TRUE(Near(sum_weight, total_weight_)); + return corr_dev; +} + +CorrelationDevice SampledDeterminizeCorrDev(const CorrelationDevice& corr_dev, + int num_samples_per_policy) { + CorrDevBuilder cdb; + for (const std::pair& item : corr_dev) { + cdb.AddSampledJointPolicy(item.second, num_samples_per_policy, item.first); + } + return cdb.GetCorrelationDevice(); +} + +CorrelationDevice DeterminizeCorrDev(const CorrelationDevice& corr_dev) { + CorrDevBuilder cdb; + for (const std::pair& item : corr_dev) { + cdb.AddMixedJointPolicy(item.second, item.first); + } + return cdb.GetCorrelationDevice(); +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dev_builder.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dev_builder.h new file mode 100644 index 0000000..1513b9e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dev_builder.h @@ -0,0 +1,91 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_CORR_DEV_AGGREGATOR_H_ +#define OPEN_SPIEL_ALGORITHMS_CORR_DEV_AGGREGATOR_H_ + +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/algorithms/corr_dist.h" +#include "open_spiel/policy.h" + +namespace open_spiel { +namespace algorithms { + +// A helper class for maintaining and building correlation devices +// (distributions over joint deterministic policies). +// +// This helper exists to serve algorithms interact with the CorrDist functions +// (see corr_dist.h), which require distributions over joint deterministic +// policies. Algorithms like CFR produce stochastic policies, so they have +// to either be converted or sampled before they can be evaluated by the +// CorrDist functions. +// +// This helper class maintains weights over joint determinstic tabular policies, +// updating each as new policies are added. A correlation device is obtained by +// normalizing the weights over all the deterministic policies being tracked. +class CorrDevBuilder { + public: + CorrDevBuilder(int seed = 0); + + // Add a joint policy with the specified weight. + void AddDeterminsticJointPolicy(const TabularPolicy& policy, + double weight = 1.0); + + // Take a number of sampled joint policies and add each one with a weight + // of 1.0 / num_samples. The mixed policy should be complete: a policy should + // be defined for every information state, otherwise the distribution will not + // be properly built (joint policies will be incomplete). + void AddSampledJointPolicy(const TabularPolicy& policy, int num_samples, + double weight = 1.0); + + // This function adds a mixed joint policy to the correlation device. It does + // so by computing the probability of each deterministic joint policy by + // enumerating all possible actions that the policy is mixing over and + // computing the weight of each joint policy as a product of these + // probabilities. The mixed policy should be complete: a policy should be + // defined for every information state, otherwise the distribution will not + // be properly built (joint policies will be incomplete). + // Important note: this is computationally expensive and should only be used + // for small games. For larger games, used the sampled version above. + void AddMixedJointPolicy(const TabularPolicy& policy, double weight = 1.0); + + // Return the correlation device represented by this builder. + CorrelationDevice GetCorrelationDevice() const; + + private: + std::mt19937 rng_; + double total_weight_; + + // Each of these uses keys that have a canonical stringified policy as the + // key (e.g. complete policies with sorted keys). + absl::flat_hash_map policy_weights_; + absl::flat_hash_map policy_map_; +}; + +// Helper functions to extract a distribution over deterministic strategies +// given a distribution over pure strategies by invoking the CorrDevBuilder +// functions above. The first one is the sample-based version that drawns +// a number of samples per policy (CorrDevBuilder::AddSampledJointPolicy). +// The second one does the exact costly version +// (CorrDevBuilder::AddMixedJointPolicy). +CorrelationDevice SampledDeterminizeCorrDev(const CorrelationDevice& corr_dev, + int num_samples_per_policy); +CorrelationDevice DeterminizeCorrDev(const CorrelationDevice& corr_dev); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_CORR_DEV_AGGREGATOR_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dev_builder_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dev_builder_test.cc new file mode 100644 index 0000000..8dd4077 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dev_builder_test.cc @@ -0,0 +1,173 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/corr_dev_builder.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/algorithms/cfr.h" +#include "open_spiel/algorithms/corr_dist.h" +#include "open_spiel/algorithms/deterministic_policy.h" +#include "open_spiel/algorithms/expected_returns.h" +#include "open_spiel/game_transforms/turn_based_simultaneous_game.h" +#include "open_spiel/games/efg_game/efg_game.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { +inline constexpr int kSeed = 23894982; + +TabularPolicy MergeIndependentPolicies(const TabularPolicy& policy1, + const TabularPolicy& policy2) { + TabularPolicy merged_policy; + for (const auto& infostate_and_state_policy : policy1.PolicyTable()) { + merged_policy.SetStatePolicy(infostate_and_state_policy.first, + infostate_and_state_policy.second); + } + for (const auto& infostate_and_state_policy : policy2.PolicyTable()) { + merged_policy.SetStatePolicy(infostate_and_state_policy.first, + infostate_and_state_policy.second); + } + return merged_policy; +} + +void BasicCorrDevBuilderTest() { + // Build a uniform correlation device for Kuhn poker. + std::shared_ptr game = LoadGame("kuhn_poker"); + CorrDevBuilder full_cd_builder; + + DeterministicTabularPolicy p1_policy(*game, 0); + DeterministicTabularPolicy p2_policy(*game, 1); + do { + do { + full_cd_builder.AddDeterminsticJointPolicy(MergeIndependentPolicies( + p1_policy.GetTabularPolicy(), p2_policy.GetTabularPolicy())); + } while (p2_policy.NextPolicy()); + p2_policy.ResetDefaultPolicy(); + } while (p1_policy.NextPolicy()); + + CorrelationDevice mu = full_cd_builder.GetCorrelationDevice(); + SPIEL_CHECK_EQ(mu.size(), 64 * 64); + for (const auto& prob_and_policy : mu) { + SPIEL_CHECK_FLOAT_NEAR(prob_and_policy.first, 1.0 / (64 * 64), 1e-10); + } + + std::vector uniform_returns = + ExpectedReturns(*game->NewInitialState(), GetUniformPolicy(*game), -1); + std::vector corr_dev_uniform_returns = ExpectedValues(*game, mu); + for (Player p = 0; p < game->NumPlayers(); ++p) { + SPIEL_CHECK_FLOAT_NEAR(uniform_returns[p], corr_dev_uniform_returns[p], + 1e-10); + } +} + +void BasicSamplingCorrDevBuilderTest() { + std::shared_ptr game = LoadGame("kuhn_poker"); + CorrDevBuilder cd_builder; + TabularPolicy uniform_policy = GetUniformPolicy(*game); + for (int i = 0; i < 10; ++i) { + cd_builder.AddSampledJointPolicy(uniform_policy, 1000); + } + CorrelationDevice mu = cd_builder.GetCorrelationDevice(); + SPIEL_CHECK_LE(mu.size(), 64 * 64); +} + +void CFRShapleysCorrDistTest() { + std::shared_ptr game = + LoadGame("turn_based_simultaneous_game(game=matrix_shapleys_game())"); + CorrDevBuilder cd_builder; + CFRSolverBase solver(*game, + /*alternating_updates=*/true, + /*linear_averaging=*/false, + /*regret_matching_plus=*/false, + /*random_initial_regrets*/ true, + /*seed*/ kSeed); + CorrDistConfig config; + for (int i = 0; i < 100; i++) { + solver.EvaluateAndUpdatePolicy(); + TabularPolicy current_policy = + static_cast(solver.CurrentPolicy().get()) + ->AsTabular(); + cd_builder.AddMixedJointPolicy(current_policy); + if (i % 10 == 0) { + CorrelationDevice mu = cd_builder.GetCorrelationDevice(); + double afcce_dist = AFCCEDist(*game, config, mu); + double afce_dist = AFCEDist(*game, config, mu); + double efcce_dist = EFCCEDist(*game, config, mu); + double efce_dist = EFCEDist(*game, config, mu); + std::vector values = ExpectedValues(*game, mu); + std::cout + << absl::StrFormat( + "CFRTest %d %2.10lf %2.10lf %2.10lf %2.10lf %2.3lf %2.3lf", i, + afcce_dist, afce_dist, efcce_dist, efce_dist, values[0], + values[1]) + << std::endl; + } + } + + CorrelationDevice mu = cd_builder.GetCorrelationDevice(); + std::cout << ToString(mu) << std::endl; +} + +void CFRGoofspielCorrDistTest() { + std::shared_ptr game = LoadGame( + "turn_based_simultaneous_game(game=goofspiel(num_cards=3,points_order=" + "descending,returns_type=total_points))"); + CorrDevBuilder cd_builder; + CFRSolverBase solver(*game, + /*alternating_updates=*/true, + /*linear_averaging=*/false, + /*regret_matching_plus=*/false, + /*random_initial_regrets*/ true, + /*seed*/ kSeed); + CorrDistConfig config; + for (int i = 0; i < 10; i++) { + solver.EvaluateAndUpdatePolicy(); + TabularPolicy current_policy = + static_cast(solver.CurrentPolicy().get()) + ->AsTabular(); + cd_builder.AddSampledJointPolicy(current_policy, 100); + } + CorrelationDevice mu = cd_builder.GetCorrelationDevice(); + double afcce_dist = AFCCEDist(*game, config, mu); + double afce_dist = AFCEDist(*game, config, mu); + double efcce_dist = EFCCEDist(*game, config, mu); + double efce_dist = EFCEDist(*game, config, mu); + std::vector values = ExpectedValues(*game, mu); + std::cout << absl::StrFormat( + "CFRTest %2.10lf %2.10lf %2.10lf, %2.10lf %2.3lf %2.3lf", + afcce_dist, afce_dist, efcce_dist, efce_dist, values[0], + values[1]) + << std::endl; +} + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +namespace algorithms = open_spiel::algorithms; + +int main(int argc, char** argv) { + algorithms::BasicCorrDevBuilderTest(); + algorithms::BasicSamplingCorrDevBuilderTest(); + algorithms::CFRShapleysCorrDistTest(); + algorithms::CFRGoofspielCorrDistTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist.cc new file mode 100644 index 0000000..0aaf1bb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist.cc @@ -0,0 +1,401 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/corr_dist.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/algorithms/best_response.h" +#include "open_spiel/algorithms/corr_dist/afcce.h" +#include "open_spiel/algorithms/corr_dist/afce.h" +#include "open_spiel/algorithms/corr_dist/cce.h" +#include "open_spiel/algorithms/corr_dist/ce.h" +#include "open_spiel/algorithms/corr_dist/efcce.h" +#include "open_spiel/algorithms/corr_dist/efce.h" +#include "open_spiel/algorithms/expected_returns.h" +#include "open_spiel/algorithms/tabular_exploitability.h" +#include "open_spiel/game_transforms/turn_based_simultaneous_game.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { +namespace { +// A few helper functions local to this file. +void CheckCorrelationDeviceProbDist(const CorrelationDevice& mu) { + double prob_sum = 0.0; + for (const std::pair& item : mu) { + SPIEL_CHECK_PROB(item.first); + prob_sum += item.first; + } + SPIEL_CHECK_FLOAT_EQ(prob_sum, 1.0); +} + +ActionsAndProbs CreateDeterministicPolicy(Action chosen_action, + int num_actions) { + ActionsAndProbs actions_and_probs; + actions_and_probs.reserve(num_actions); + int num_ones = 0; + int num_zeros = 0; + for (Action action = 0; action < num_actions; ++action) { + if (action == chosen_action) { + num_ones++; + actions_and_probs.push_back({action, 1.0}); + } else { + num_zeros++; + actions_and_probs.push_back({action, 0.0}); + } + } + SPIEL_CHECK_EQ(num_ones, 1); + SPIEL_CHECK_EQ(num_ones + num_zeros, num_actions); + return actions_and_probs; +} + +CorrelationDevice ConvertCorrelationDevice( + const Game& turn_based_nfg, const NormalFormCorrelationDevice& mu) { + // First get all the infostate strings. + std::unique_ptr state = turn_based_nfg.NewInitialState(); + std::vector infostate_strings; + infostate_strings.reserve(turn_based_nfg.NumPlayers()); + for (Player p = 0; p < turn_based_nfg.NumPlayers(); ++p) { + infostate_strings.push_back(state->InformationStateString()); + state->ApplyAction(0); + } + SPIEL_CHECK_TRUE(state->IsTerminal()); + + int num_actions = turn_based_nfg.NumDistinctActions(); + CorrelationDevice new_mu; + new_mu.reserve(mu.size()); + + // Next, convert to tabular policies. + for (const NormalFormJointPolicyWithProb& jpp : mu) { + TabularPolicy policy; + SPIEL_CHECK_EQ(jpp.actions.size(), turn_based_nfg.NumPlayers()); + for (Player p = 0; p < turn_based_nfg.NumPlayers(); p++) { + policy.SetStatePolicy( + infostate_strings[p], + CreateDeterministicPolicy(jpp.actions[p], num_actions)); + } + new_mu.push_back({jpp.probability, policy}); + } + + return new_mu; +} +} // namespace + +// Helper function to return a correlation device that is a uniform distribution +// over the vector of tabular policies. +CorrelationDevice UniformCorrelationDevice( + std::vector& policies) { + CorrelationDevice mu; + mu.reserve(policies.size()); + for (const TabularPolicy& policy : policies) { + mu.push_back({1.0 / policies.size(), policy}); + } + return mu; +} + +// Return a string representation of the correlation device. +std::string ToString(const CorrelationDevice& corr_dev) { + std::string corr_dev_str; + for (const auto& prob_and_policy : corr_dev) { + absl::StrAppend(&corr_dev_str, "Prob: ", prob_and_policy.first, "\n"); + absl::StrAppend(&corr_dev_str, prob_and_policy.second.ToStringSorted(), + "\n"); + } + return corr_dev_str; +} + +std::vector ExpectedValues(const Game& game, + const CorrelationDevice& mu) { + CheckCorrelationDeviceProbDist(mu); + std::vector values(game.NumPlayers(), 0); + for (const std::pair& item : mu) { + std::vector item_values = + ExpectedReturns(*game.NewInitialState(), item.second, -1, false); + for (Player p = 0; p < game.NumPlayers(); ++p) { + values[p] += item.first * item_values[p]; + } + } + return values; +} + +std::vector ExpectedValues(const Game& game, + const NormalFormCorrelationDevice& mu) { + if (game.GetType().information == GameType::Information::kOneShot) { + std::shared_ptr actual_game = ConvertToTurnBased(game); + CorrelationDevice converted_mu = ConvertCorrelationDevice(*actual_game, mu); + return ExpectedValues(*actual_game, converted_mu); + } else { + SPIEL_CHECK_EQ(game.GetType().dynamics, GameType::Dynamics::kSequential); + CorrelationDevice converted_mu = ConvertCorrelationDevice(game, mu); + return ExpectedValues(game, converted_mu); + } +} + +double EFCEDist(const Game& game, CorrDistConfig config, + const CorrelationDevice& mu) { + // Check that the config matches what is supported. + SPIEL_CHECK_TRUE(config.deterministic); + + // Check for proper probability distribution. + CheckCorrelationDeviceProbDist(mu); + + auto efce_game = + std::make_shared(game.shared_from_this(), config, mu); + + // Note that the policies are already inside the game via the correlation + // device, mu. So this is a simple wrapper policy that simply follows the + // recommendations. + EFCETabularPolicy policy(config); + return NashConv(*efce_game, policy, true); +} + +double EFCCEDist(const Game& game, CorrDistConfig config, + const CorrelationDevice& mu) { + // Check that the config matches what is supported. + SPIEL_CHECK_TRUE(config.deterministic); + + // Check for proper probability distribution. + CheckCorrelationDeviceProbDist(mu); + + auto efcce_game = + std::make_shared(game.shared_from_this(), config, mu); + + // Note that the policies are already inside the game via the correlation + // device, mu. So this is a simple wrapper policy that simply follows the + // recommendations. + EFCCETabularPolicy policy(efcce_game->FollowAction(), + efcce_game->DefectAction()); + return NashConv(*efcce_game, policy, true); +} + +double AFCEDist(const Game& game, CorrDistConfig config, + const CorrelationDevice& mu) { + // Check that the config matches what is supported. + SPIEL_CHECK_TRUE(config.deterministic); + + // Check for proper probability distribution. + CheckCorrelationDeviceProbDist(mu); + + auto afce_game = + std::make_shared(game.shared_from_this(), config, mu); + + // Note that the policies are already inside the game via the correlation + // device, mu. So this is a simple wrapper policy that simply follows the + // recommendations. + AFCETabularPolicy policy(config); + return NashConv(*afce_game, policy, true); +} + +double AFCCEDist(const Game& game, CorrDistConfig config, + const CorrelationDevice& mu) { + // Check that the config matches what is supported. + SPIEL_CHECK_TRUE(config.deterministic); + + // Check for proper probability distribution. + CheckCorrelationDeviceProbDist(mu); + + auto afcce_game = + std::make_shared(game.shared_from_this(), config, mu); + + // Note that the policies are already inside the game via the correlation + // device, mu. So this is a simple wrapper policy that simply follows the + // recommendations. + AFCCETabularPolicy policy(afcce_game->FollowAction(), + afcce_game->DefectAction()); + return NashConv(*afcce_game, policy, true); +} + +double CEDist(const Game& game, const NormalFormCorrelationDevice& mu) { + if (game.GetType().information == GameType::Information::kOneShot) { + std::shared_ptr actual_game = ConvertToTurnBased(game); + CorrelationDevice converted_mu = ConvertCorrelationDevice(*actual_game, mu); + CorrDistConfig config; + return EFCEDist(*actual_game, config, converted_mu); + } else { + SPIEL_CHECK_EQ(game.GetType().dynamics, GameType::Dynamics::kSequential); + CorrelationDevice converted_mu = ConvertCorrelationDevice(game, mu); + CorrDistConfig config; + return EFCEDist(game, config, converted_mu); + } +} + +double CCEDist(const Game& game, const NormalFormCorrelationDevice& mu) { + if (game.GetType().information == GameType::Information::kOneShot) { + std::shared_ptr actual_game = ConvertToTurnBased(game); + CorrelationDevice converted_mu = ConvertCorrelationDevice(*actual_game, mu); + CorrDistConfig config; + return EFCCEDist(*actual_game, config, converted_mu); + } else { + SPIEL_CHECK_EQ(game.GetType().dynamics, GameType::Dynamics::kSequential); + CorrelationDevice converted_mu = ConvertCorrelationDevice(game, mu); + CorrDistConfig config; + return EFCCEDist(game, config, converted_mu); + } +} + +CorrDistInfo CCEDist(const Game& game, const CorrelationDevice& mu, int player, + const float prob_cut_threshold, + const float action_value_tolerance) { + // Check for proper probability distribution. + CheckCorrelationDeviceProbDist(mu); + CorrDistConfig config; + auto cce_game = + std::make_shared(game.shared_from_this(), config, mu); + + CorrDistInfo dist_info{ + 0.0, + std::vector(1, std::numeric_limits::quiet_NaN()), + std::vector(1, 0), + std::vector(1, 0), + std::vector(1), + {}}; + + CCETabularPolicy policy; + std::unique_ptr root = cce_game->NewInitialState(); + TabularBestResponse best_response(*cce_game, player, &policy, + prob_cut_threshold, action_value_tolerance); + // Do not populate on policy values to save unnecessary computation. + // dist_info.on_policy_values[0] = ExpectedReturns( + // *root, policy, -1, false)[player]; + dist_info.best_response_values[0] = best_response.Value(*root); + dist_info.best_response_policies[0] = best_response.GetBestResponsePolicy(); + dist_info.deviation_incentives[0] = std::max( + 0.0, dist_info.best_response_values[0] - dist_info.on_policy_values[0]); + dist_info.dist_value += dist_info.deviation_incentives[0]; + + return dist_info; +} + +CorrDistInfo CCEDist(const Game& game, const CorrelationDevice& mu, + const float prob_cut_threshold, + const float action_value_tolerance) { + // Check for proper probability distribution. + CheckCorrelationDeviceProbDist(mu); + CorrDistConfig config; + auto cce_game = + std::make_shared(game.shared_from_this(), config, mu); + + CorrDistInfo dist_info{0.0, + std::vector(game.NumPlayers(), 0), + std::vector(game.NumPlayers(), 0), + std::vector(game.NumPlayers(), 0), + std::vector(game.NumPlayers()), + {}}; + + // Note: cannot simply call NashConv here as in the other examples. Because + // this auxiliary game does not have the "follow" action, it is possible that + // a best response against the correlated distribution is *negative* (i.e. + // the best deterministic policy is not as good as simply following the + // correlated recommendations), but the NashConv function has a check that the + // incentive is >= zero, so it would fail. + + CCETabularPolicy policy; + + std::unique_ptr root = cce_game->NewInitialState(); + for (auto p = Player{0}; p < cce_game->NumPlayers(); ++p) { + TabularBestResponse best_response(*cce_game, p, &policy, prob_cut_threshold, + action_value_tolerance); + dist_info.best_response_values[p] = best_response.Value(*root); + dist_info.best_response_policies[p] = best_response.GetBestResponsePolicy(); + } + dist_info.on_policy_values = ExpectedReturns(*root, policy, -1, false); + SPIEL_CHECK_EQ(dist_info.best_response_values.size(), + dist_info.on_policy_values.size()); + for (auto p = Player{0}; p < cce_game->NumPlayers(); ++p) { + // For reasons indicated in comment at the top of this funciton, we have + // max(0, ...) here. + dist_info.deviation_incentives[p] = std::max( + 0.0, dist_info.best_response_values[p] - dist_info.on_policy_values[p]); + dist_info.dist_value += dist_info.deviation_incentives[p]; + } + return dist_info; +} + +CorrDistInfo CEDist(const Game& game, const CorrelationDevice& mu, + const float action_value_tolerance) { + // Check for proper probability distribution. + CheckCorrelationDeviceProbDist(mu); + CorrDistConfig config; + auto ce_game = std::make_shared(game.shared_from_this(), config, mu); + + CorrDistInfo dist_info{ + 0.0, + std::vector(game.NumPlayers(), 0), + std::vector(game.NumPlayers(), 0), + std::vector(game.NumPlayers(), 0), + {}, + std::vector>(game.NumPlayers())}; + + CETabularPolicy policy(config); + + // For similar reasons as in CCEDist, we must manually do NashConv. + + std::unique_ptr root = ce_game->NewInitialState(); + for (auto p = Player{0}; p < ce_game->NumPlayers(); ++p) { + TabularBestResponse best_response(*ce_game, p, &policy, -1.0, + action_value_tolerance); + dist_info.best_response_values[p] = best_response.Value(*root); + + // This policy has all of the conditional ones built in. We have to extract + // one per signal by mapping back the info states. + TabularPolicy big_br_policy = best_response.GetBestResponsePolicy(); + + absl::flat_hash_map extracted_policies; + + for (const auto& infostate_and_probs : big_br_policy.PolicyTable()) { + std::string full_info_state = infostate_and_probs.first; + const size_t idx = full_info_state.find(config.recommendation_delimiter); + SPIEL_CHECK_NE(idx, std::string::npos); + std::vector parts = + absl::StrSplit(full_info_state, config.recommendation_delimiter); + SPIEL_CHECK_EQ(parts.size(), 2); + int signal = -1; + SPIEL_CHECK_TRUE(absl::SimpleAtoi(parts[1], &signal)); + SPIEL_CHECK_GE(signal, 0); + extracted_policies[signal].SetStatePolicy(parts[0], + infostate_and_probs.second); + } + + for (const auto& signal_and_policy : extracted_policies) { + dist_info.conditional_best_response_policies[p].push_back( + signal_and_policy.second); + } + } + + dist_info.on_policy_values = ExpectedReturns(*root, policy, -1, false); + SPIEL_CHECK_EQ(dist_info.best_response_values.size(), + dist_info.on_policy_values.size()); + for (auto p = Player{0}; p < ce_game->NumPlayers(); ++p) { + // For reasons indicated in comment at the top of this funciton, we have + // max(0, ...) here. + dist_info.deviation_incentives[p] = std::max( + 0.0, dist_info.best_response_values[p] - dist_info.on_policy_values[p]); + dist_info.dist_value += dist_info.deviation_incentives[p]; + } + + return dist_info; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist.h new file mode 100644 index 0000000..0325c4b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist.h @@ -0,0 +1,185 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_CORR_DIST_H_ +#define OPEN_SPIEL_ALGORITHMS_CORR_DIST_H_ + +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +// This file provides a set of functions for computing the distance of a +// distribution of joint policies to a correlated equilibrium. It is the +// analogue of NashConv for correlated equilibria, i.e. sum of the incentives to +// deviate to a best response over all players, as an empirical metric that +// summarizes how far the distribution is from an equilibrium. +// +// The functions compute these metrics for extensive-form correlated equilibria +// (EFCE) and extensive-form coarse-correlated equilibria (EFCCE). The +// algorithms work by constructing an auxiliary game (similar to the one +// described in Def 2.2 of von Stengel and Forges 2008) where chance initially +// samples a joint policy, then lets the players decide to follow or not follow +// the recommendations. +// +// The definition we use matches the common interpretation of EFCE's within AI +// papers which are based on causal deviations described in (Gordon, Greenwald, +// and Marks '08), (Dudik & Gordon '09), and (Farina & Sandholm '19). +// Specifically: if players follow recommendations, they continue to receive +// recommendations. If a player deviates, that player stops receiving +// recommendations from then on. The incentive for a player to deviate toward a +// best response can be computed by the existing best response algorithm in this +// new game. +// +// In both cases of EFCE and EFCCE, the algorithms compute the normal-form +// equivalents, and two wrapper functions are provided specifically for the +// normal-form setting (CEDist and CCEDist). +// +// **WARNING**: the implementations of the metrics for the extensive-form +// versions of the correlated equilibria have only been lightly tested (on a +// few simple examples). We plan to add more thorough tests as we implement +// more benchmark general-sum games and more worked-out examples. +// +// For formal definitions and algorithms, please refer to: +// - von Stengel and Forges, 2008. Extensive-Form Correlated Equilibrium: +// Definition and Computational Complexity, Mathematics of Operations +// Research, vol 33, no. 4. +// - Farina, Bianchi, and Sandholm, 2019. Coarse Correlation in Extensive-Form +// Games. https://arxiv.org/abs/1908.09893 +// - Dudik & Gordon, https://arxiv.org/abs/1205.2649 +// - Gordon, Greenwald, and Marks. No-Regret Learning in Convex Games. +// https://www.cs.cmu.edu/~ggordon/gordon-greenwald-marks-icml-phi-regret.pdf + +// A CorrelationDevice represents a distribution over joint policies (name is +// from von Stengel & Forges 2008). Note, however, that unlike von Stengel & +// Forges 2008, the joint policies can be mixed. In this case, an equivalent +// joint distribution over deterministic joint policies could be reconstructed +// (if the game is small enough) or the metrics below can be approximated via +// Monte Carlo sampling of deterministic joint policies from the mixtures. +using CorrelationDevice = std::vector>; + +// Helper function to return a correlation device that is a uniform distribution +// over the vector of tabular policies. +CorrelationDevice UniformCorrelationDevice( + std::vector& policies); + +// Return a string representation of the correlation device. +std::string ToString(const CorrelationDevice& corr_dev); + +// A helper class for the normal-form functions. +struct NormalFormJointPolicyWithProb { + // Probability of this joint policy. + double probability; + + // The action taken by each player. + std::vector actions; +}; + +using NormalFormCorrelationDevice = std::vector; + +// A configuration object for the metrics. +struct CorrDistConfig { + // Are the underlying policies deterministic (pure)? Currently this is the + // only supported mode. To obtain the CorrDist metrics for distributions over + // mixed policies, see the helper functions in corr_dev_builder, with examples + // in corr_dev_builder_test.cc. + bool deterministic = true; + + // A tag used to delimit recommendation sequences from the normal part of the + // information state string. + std::string recommendation_delimiter = " R-*-=-*-R "; +}; + +// Return the expected values (one per player) of a correlation device. +std::vector ExpectedValues(const Game& game, + const CorrelationDevice& mu); +std::vector ExpectedValues(const Game& game, + const NormalFormCorrelationDevice& mu); + +// Compute the sum of individual incentives to deviate (from the joint +// distribution) to a best response, over all players. The auxiliary game +// constructed is with accordance to the EFCE concept, which means players see +// their recommendations once they reach the information states (unless they've +// chosen not to follow at some point). +double EFCEDist(const Game& game, CorrDistConfig config, + const CorrelationDevice& mu); + +// Compute the sum of individual incentives to deviate (from the joint +// distribution) to a best response, over all players. The auxiliary game +// constructed is with accordance to the EFCCE concept, which means players see +// their recommendations at their information states only after they've decided +// whether or not to follow them. +double EFCCEDist(const Game& game, CorrDistConfig config, + const CorrelationDevice& mu); + +// Agent-form variants: these are similar to EFCCE + EFCE distances above, +// except that there is at most one deviation allowed, at a single information +// set, but any information set. Other than this restriction, each one is +// analogous to EFCCE or EFCE. +// **Note: these have not yet been extensively tested.** +double AFCEDist(const Game& game, CorrDistConfig config, + const CorrelationDevice& mu); +double AFCCEDist(const Game& game, CorrDistConfig config, + const CorrelationDevice& mu); + +// Analog to the functions above but for normal-form games. The game can be a +// normal-form game *or* a TurnBasedSimultaneousGame wrapping a normal-form +// game. +double CEDist(const Game& game, const NormalFormCorrelationDevice& mu); +double CCEDist(const Game& game, const NormalFormCorrelationDevice& mu); + +struct CorrDistInfo { + double dist_value; + + // One per player. + std::vector on_policy_values; + std::vector best_response_values; + std::vector deviation_incentives; + std::vector best_response_policies; + + // Several per player. Only used in the CE dist case. + std::vector> conditional_best_response_policies; +}; + +// Distance to coarse-correlated in an extensive-form game. Builds a simpler +// auxiliary game similar to the *FCCE where there is one chance node that +// determines which policies the opponents follow (never revealed). Note that +// the policies in this correlation device *can* be mixed. If values is +// non-null, then it is filled with the deviation incentive of each player. +CorrDistInfo CCEDist(const Game& game, const CorrelationDevice& mu, + const float prob_cut_threshold = -1.0, + const float action_value_tolerance = -1.0); +CorrDistInfo CCEDist(const Game& game, const CorrelationDevice& mu, int player, + const float prob_cut_threshold = -1.0, + const float action_value_tolerance = -1.0); + +// Distance to a correlated equilibrium in an extensive-form game. Builds a +// simpler auxiliary game similar to the *FCE ones where there is a chance node +// that determines the joint recommendation strategies. The correlation device +// must be a distribution over deterministic policies; if you have distribution +// over mixed policies, then first convert the correlation device using the +// helper functions DeterminizeCorrDev or SampledDeterminizeCorrDev in +// corr_dev_builder.h. If values is non-null, then it is filled with the +// deviation incentive of each player. +CorrDistInfo CEDist(const Game& game, const CorrelationDevice& mu, + const float action_value_tolerance = -1.0); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_CORR_DIST_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/afcce.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/afcce.cc new file mode 100644 index 0000000..6dccdd3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/afcce.cc @@ -0,0 +1,217 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/corr_dist/afcce.h" + +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +AFCCEState::AFCCEState(std::shared_ptr game, + std::unique_ptr state, CorrDistConfig config, + const CorrelationDevice& mu, Action follow_action, + Action defect_action) + : WrappedState(game, std::move(state)), + config_(config), + mu_(mu), + follow_action_(follow_action), + defect_action_(defect_action), + rec_index_(-1), + defected_(game->NumPlayers(), 0), + defection_infoset_(game->NumPlayers(), absl::nullopt), + recommendation_seq_(game->NumPlayers(), std::vector({})) {} + +Player AFCCEState::CurrentPlayer() const { + // Only override this in the first chance actions. + if (rec_index_ < 0) { + return kChancePlayerId; + } else { + return state_->CurrentPlayer(); + } +} + +ActionsAndProbs AFCCEState::ChanceOutcomes() const { + if (rec_index_ < 0) { + ActionsAndProbs outcomes; + for (int i = 0; i < mu_.size(); ++i) { + outcomes.push_back({i, mu_[i].first}); + } + return outcomes; + } else { + return state_->ChanceOutcomes(); + } +} + +std::vector AFCCEState::LegalActions() const { + SPIEL_CHECK_FALSE(IsSimultaneousNode()); + + if (IsTerminal()) { + return {}; + } else if (IsChanceNode()) { + return LegalChanceOutcomes(); + } + + if (!HasDefected(CurrentPlayer())) { + // If the player has not defected then they have exactly two choices: + // follow or defect. + return {follow_action_, defect_action_}; + } else if (HasDefected(CurrentPlayer()) && + !defection_infoset_[CurrentPlayer()].has_value()) { + // Player just defected; now they must choose an action. + return state_->LegalActions(); + } else { + SPIEL_CHECK_TRUE(HasDefected(CurrentPlayer())); + SPIEL_CHECK_TRUE(defection_infoset_[CurrentPlayer()].has_value()); + + // This player already previously defected, so cannot do so any more. + return {follow_action_}; + } +} + +std::string AFCCEState::InformationStateString(Player player) const { + // should look like + // + // + std::string rec_str = absl::StrJoin(recommendation_seq_[player], ","); + std::string infoset_str = state_->InformationStateString(player); + SPIEL_CHECK_EQ(infoset_str.find(config_.recommendation_delimiter), + std::string::npos); + // Note: no need to attach the defection location here because it can be + // inferred from the -1 action in the recommendation sequence (due to perfect + // recall), but we add it anyway if it's been determined yet. + // Also note that there are two infosets for a defection: + // - The first one where a player chooses the defect action. Here, defected? + // is false and there is no defection infoset set yet + // - Directly after defection by the same player. Here defected? is true but + // the infoset is not yet set. + // After the defection, defected? is set to to true and the defection infoset + // is included in the infoset string. + return absl::StrCat(infoset_str, config_.recommendation_delimiter, + HasDefected(player) ? "true " : "false ", rec_str, + defection_infoset_[player].has_value() + ? defection_infoset_[player].value() + : ""); +} + +std::string AFCCEState::ToString() const { + std::string state_str = absl::StrFormat( + "%s\nCur player: %i\nRec index %i\nDefected %s", state_->ToString(), + CurrentPlayer(), rec_index_, absl::StrJoin(defected_, " ")); + for (Player p = 0; p < state_->NumPlayers(); ++p) { + absl::StrAppend(&state_str, "\nPlayer ", p, " defection infoset: ", + !defection_infoset_[p].has_value() + ? "nullopt" + : defection_infoset_[p].value(), + "\n"); + } + for (Player p = 0; p < state_->NumPlayers(); ++p) { + absl::StrAppend(&state_str, "\nPlayer ", p, " recommendation seq: ", + absl::StrJoin(recommendation_seq_[p], ","), "\n"); + } + return state_str; +} + +bool AFCCEState::HasDefected(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, game_->NumPlayers()); + return defected_[player] == 1; +} + +Action AFCCEState::CurRecommendation() const { + ActionsAndProbs actions_and_probs = + mu_[rec_index_].second.GetStatePolicy(state_->InformationStateString()); + Action rec_action = GetAction(actions_and_probs); + SPIEL_CHECK_TRUE(rec_action != kInvalidAction); + return rec_action; +} + +void AFCCEState::DoApplyAction(Action action_id) { + if (rec_index_ < 0) { + // Pick the joint policy which will provide recommendations. + rec_index_ = action_id; + SPIEL_CHECK_LT(rec_index_, mu_.size()); + } else if (state_->IsChanceNode()) { + // Regular chance node + state_->ApplyAction(action_id); + } else { + Player cur_player = CurrentPlayer(); + SPIEL_CHECK_GE(cur_player, 0); + SPIEL_CHECK_LT(cur_player, game_->NumPlayers()); + + if (!HasDefected(cur_player)) { + // Can only submit these two actions. + SPIEL_CHECK_TRUE(action_id == follow_action_ || + action_id == defect_action_); + + // Check for defection at this point. + Action recommendation = CurRecommendation(); + + if (action_id == follow_action_) { + // Follow recommendation. + std::vector legal_actions = state_->LegalActions(); + SPIEL_CHECK_TRUE(absl::c_find(legal_actions, recommendation) != + legal_actions.end()); + state_->ApplyAction(recommendation); + recommendation_seq_[cur_player].push_back(recommendation); + } else { + // Defect. + defected_[cur_player] = 1; + } + } else if (HasDefected(cur_player) && + !defection_infoset_[cur_player].has_value()) { + // Player just defected: regular game from here on. + state_->ApplyAction(action_id); + defection_infoset_[cur_player] = + state_->InformationStateString(cur_player); + + // Player is defecting, so fill this slot with invalid action. Defecting + // players should never discover this recommendation. + recommendation_seq_[cur_player].push_back(kInvalidAction); + } else { + SPIEL_CHECK_TRUE(HasDefected(cur_player)); + SPIEL_CHECK_TRUE(defection_infoset_[cur_player].has_value()); + + // Already previously defected. Should only be follow. + Action recommendation = CurRecommendation(); + SPIEL_CHECK_EQ(action_id, follow_action_); + std::vector legal_actions = state_->LegalActions(); + SPIEL_CHECK_TRUE(absl::c_find(legal_actions, recommendation) != + legal_actions.end()); + state_->ApplyAction(recommendation); + recommendation_seq_[cur_player].push_back(recommendation); + } + } +} + +ActionsAndProbs AFCCETabularPolicy::GetStatePolicy(const State& state) const { + // The best response code has to have a policy defined everywhere when it + // builds its initial tree. For the fixed policies, the players will not + // defect, so we define a uniform policy in the regions where players have + // defected (which will not affect the best responding player, since the + // opponents will never reach these regions). + const auto* AFCCE_state = dynamic_cast(&state); + SPIEL_CHECK_TRUE(AFCCE_state != nullptr); + if (AFCCE_state->HasDefected(state.CurrentPlayer())) { + return UniformStatePolicy(state); + } + + // Simply returns a fixed policy with prob 1 on the follow action + return {{follow_action_, 1.0}, {defect_action_, 0.0}}; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/afcce.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/afcce.h new file mode 100644 index 0000000..01f413d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/afcce.h @@ -0,0 +1,137 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_CORR_DIST_AFCCE_H_ +#define OPEN_SPIEL_ALGORITHMS_CORR_DIST_AFCCE_H_ + +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/algorithms/corr_dist.h" +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +// The AFCCE auxiliary game is very similar to the EFCCE auxiliary game. The +// only difference is that the number of deviations is limited to 1. Once the +// player has deviated, they can no longer deviate and must follow for the +// rest of the game. +class AFCCEState : public WrappedState { + public: + AFCCEState(std::shared_ptr game, std::unique_ptr state, + CorrDistConfig config, const CorrelationDevice& mu, + Action follow_action, Action defect_action); + + std::unique_ptr Clone() const override { + return std::make_unique(*this); + } + + // Need to override this because otherwise WrappedState forwards the + // implementation to the underlying state, which calls the wrong + // ChanceOutcomes + std::vector LegalChanceOutcomes() const override { + return State::LegalChanceOutcomes(); + } + + Player CurrentPlayer() const override; + ActionsAndProbs ChanceOutcomes() const override; + std::vector LegalActions() const override; + std::string InformationStateString(Player player) const override; + std::string ToString() const override; + + bool HasDefected(Player player) const; + + protected: + Action CurRecommendation() const; + void DoApplyAction(Action action_id) override; + + private: + const CorrDistConfig config_; + const CorrelationDevice& mu_; + + Action follow_action_; + Action defect_action_; + + // Which joint policy was chosen? + int rec_index_; + + // Has the player defected? + std::vector defected_; + + // Where did the player defect? This is the information set of the original + // game. Indexed by player. + std::vector> defection_infoset_; + + // The sequence of recommendations, indexed by player + std::vector> recommendation_seq_; +}; + +class AFCCEGame : public WrappedGame { + public: + AFCCEGame(std::shared_ptr game, CorrDistConfig config, + const CorrelationDevice& mu) + : WrappedGame(game, game->GetType(), game->GetParameters()), + config_(config), + mu_(mu), + orig_num_distinct_actions_(game->NumDistinctActions()) {} + + std::unique_ptr NewInitialState() const override { + return std::make_unique(shared_from_this(), + game_->NewInitialState(), config_, mu_, + FollowAction(), DefectAction()); + } + + int NumDistinctActions() const override { + // 2 extra actions: cooperate/follow or defect + return orig_num_distinct_actions_ + 2; + } + + int FollowAction() const { return orig_num_distinct_actions_; } + int DefectAction() const { return orig_num_distinct_actions_ + 1; } + + private: + const CorrDistConfig config_; + const CorrelationDevice& mu_; + + // Number of distinct actions in the original game. + int orig_num_distinct_actions_; +}; + +class AFCCETabularPolicy : public TabularPolicy { + public: + AFCCETabularPolicy(Action follow_action, Action defect_action) + : follow_action_(follow_action), defect_action_(defect_action) {} + + ActionsAndProbs GetStatePolicy(const std::string& info_state) const override { + SpielFatalError("GetStatePolicy(const std::string&) should not be called."); + return TabularPolicy::GetStatePolicy(info_state); + } + ActionsAndProbs GetStatePolicy(const State& state, Player pl) const override { + SPIEL_CHECK_EQ(state.CurrentPlayer(), pl); + return GetStatePolicy(state); + } + ActionsAndProbs GetStatePolicy(const State& state) const override; + + private: + const Action follow_action_; + const Action defect_action_; +}; + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_CORR_DIST_AFCCE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/afce.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/afce.cc new file mode 100644 index 0000000..2ef0c84 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/afce.cc @@ -0,0 +1,218 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/corr_dist/afce.h" + +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" + +namespace open_spiel { +namespace algorithms { + +AFCEState::AFCEState(std::shared_ptr game, + std::unique_ptr state, CorrDistConfig config, + const CorrelationDevice& mu) + : WrappedState(game, std::move(state)), + config_(config), + mu_(mu), + rec_index_(-1), + defected_(game->NumPlayers(), 0), + defection_infoset_(game->NumPlayers(), absl::nullopt), + recommendation_seq_(game->NumPlayers(), std::vector({})) {} + +Player AFCEState::CurrentPlayer() const { + // Only override this in the first chance actions. + if (rec_index_ < 0) { + return kChancePlayerId; + } else { + return state_->CurrentPlayer(); + } +} + +ActionsAndProbs AFCEState::ChanceOutcomes() const { + if (rec_index_ < 0) { + ActionsAndProbs outcomes; + for (int i = 0; i < mu_.size(); ++i) { + outcomes.push_back({i, mu_[i].first}); + } + return outcomes; + } else { + return state_->ChanceOutcomes(); + } +} + +std::vector AFCEState::LegalActions() const { + SPIEL_CHECK_FALSE(IsSimultaneousNode()); + + if (IsTerminal()) { + return {}; + } else if (IsChanceNode()) { + return LegalChanceOutcomes(); + } + + if (!HasDefected(CurrentPlayer())) { + // If the player has not defected, they are unrestricted. + return state_->LegalActions(); + } else { + Action recommended_action = CurRecommendation(); + + // Check that it's a legal recommendation. + std::vector legal_actions = state_->LegalActions(); + SPIEL_CHECK_TRUE(std::find(legal_actions.begin(), legal_actions.end(), + recommended_action) != legal_actions.end()); + + // It is the only allowed action once this player has deviated. + return {recommended_action}; + } +} + +std::string AFCEState::InformationStateString(Player player) const { + // should look like + // + // + SPIEL_CHECK_FALSE(IsChanceNode()); + std::string rec_str = absl::StrJoin(recommendation_seq_[player], ","); + + // Always add the recommendation. + absl::StrAppend(&rec_str, ",", CurRecommendation()); + + std::string infoset_str = state_->InformationStateString(player); + SPIEL_CHECK_EQ(infoset_str.find(config_.recommendation_delimiter), + std::string::npos); + + // Note that there are two infosets for a defection: + // - The first one where a player chooses the defect action. Here, defected? + // is false and there is no defection infoset set yet + // - Directly after defection by the same player. Here defected? is true but + // the infoset is not yet set. + // After the defection, defected? is set to to true and the defection infoset + // is included in the infoset string. + return absl::StrCat(infoset_str, config_.recommendation_delimiter, + HasDefected(player) ? "true " : "false ", rec_str, + defection_infoset_[player].has_value() + ? defection_infoset_[player].value() + : ""); +} + +std::string AFCEState::ToString() const { + std::string state_str = absl::StrFormat( + "%s\nCur player: %i\nRec index %i\nDefected %s", state_->ToString(), + CurrentPlayer(), rec_index_, absl::StrJoin(defected_, " ")); + for (Player p = 0; p < state_->NumPlayers(); ++p) { + absl::StrAppend(&state_str, "\nPlayer ", p, " defection infoset: ", + !defection_infoset_[p].has_value() + ? "nullopt" + : defection_infoset_[p].value(), + "\n"); + } + for (Player p = 0; p < state_->NumPlayers(); ++p) { + absl::StrAppend(&state_str, "\nPlayer ", p, " recommendation seq: ", + absl::StrJoin(recommendation_seq_[p], ","), "\n"); + } + return state_str; +} + +bool AFCEState::HasDefected(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, game_->NumPlayers()); + return defected_[player] == 1; +} + +Action AFCEState::CurRecommendation() const { + SPIEL_CHECK_GE(rec_index_, 0); + SPIEL_CHECK_LT(rec_index_, mu_.size()); + ActionsAndProbs actions_and_probs = + mu_[rec_index_].second.GetStatePolicy(state_->InformationStateString()); + Action rec_action = GetAction(actions_and_probs); + SPIEL_CHECK_TRUE(rec_action != kInvalidAction); + return rec_action; +} + +void AFCEState::DoApplyAction(Action action_id) { + if (rec_index_ < 0) { + // Pick the joint policy which will provide recommendations. + rec_index_ = action_id; + SPIEL_CHECK_GE(rec_index_, 0); + SPIEL_CHECK_LT(rec_index_, mu_.size()); + } else if (state_->IsChanceNode()) { + // Regular chance node + state_->ApplyAction(action_id); + } else { + // Check for defection at this point. + const Action recommendation = CurRecommendation(); + + Player cur_player = CurrentPlayer(); + SPIEL_CHECK_GE(cur_player, 0); + SPIEL_CHECK_LT(cur_player, game_->NumPlayers()); + + if (HasDefected(cur_player)) { + // The recommendation should be the only legal action. + SPIEL_CHECK_EQ(action_id, recommendation); + } + + // In this auxiliary game the agent always gets a recommendation. + recommendation_seq_[cur_player].push_back(recommendation); + + if (action_id != recommendation) { + // Cannot defect more than once. + SPIEL_CHECK_TRUE(!HasDefected(cur_player)); + defected_[cur_player] = 1; + defection_infoset_[cur_player] = + state_->InformationStateString(cur_player); + } + + state_->ApplyAction(action_id); + } +} + +ActionsAndProbs AFCETabularPolicy::GetStatePolicy(const State& state) const { + // The best response code has to have a policy defined everywhere when it + // builds its initial tree. For the fixed policies, the players will not + // defect, so we define a uniform policy in the regions where players have + // defected (which will not affect the best responding player, since the + // opponents will never reach these regions). + const auto* AFCE_state = dynamic_cast(&state); + SPIEL_CHECK_TRUE(AFCE_state != nullptr); + if (AFCE_state->HasDefected(state.CurrentPlayer())) { + return UniformStatePolicy(state); + } + + // Otherwise: simply returns a fixed policy with prob 1 on the recommended + // action (extrapolated from the information state string) and 0 on the + // others. + std::string info_state = state.InformationStateString(); + const size_t idx = info_state.find(config_.recommendation_delimiter); + SPIEL_CHECK_NE(idx, std::string::npos); + + // String looks like ... + std::vector suffix_parts = absl::StrSplit( + info_state.substr(idx + config_.recommendation_delimiter.length()), ' '); + SPIEL_CHECK_TRUE(suffix_parts[0] == "true" || suffix_parts[0] == "false"); + std::vector rec_seq = absl::StrSplit(suffix_parts[1], ','); + SPIEL_CHECK_GE(rec_seq.size(), 1); + Action rec_action; + ActionsAndProbs state_policy; + std::vector legal_actions = state.LegalActions(); + state_policy.reserve(legal_actions.size()); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(rec_seq.back(), &rec_action)); + for (Action action : legal_actions) { + state_policy.push_back({action, action == rec_action ? 1.0 : 0.0}); + } + return state_policy; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/afce.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/afce.h new file mode 100644 index 0000000..474513c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/afce.h @@ -0,0 +1,112 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_CORR_DIST_AFCE_H_ +#define OPEN_SPIEL_ALGORITHMS_CORR_DIST_AFCE_H_ + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/algorithms/corr_dist.h" +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +class AFCEState : public WrappedState { + public: + AFCEState(std::shared_ptr game, std::unique_ptr state, + CorrDistConfig config, const CorrelationDevice& mu); + + std::unique_ptr Clone() const override { + return std::make_unique(*this); + } + + // Need to override this because otherwise WrappedState forwards the + // implementation to the underlying state, which calls the wrong + // ChanceOutcomes + std::vector LegalChanceOutcomes() const override { + return State::LegalChanceOutcomes(); + } + + Player CurrentPlayer() const override; + ActionsAndProbs ChanceOutcomes() const override; + std::vector LegalActions() const override; + std::string InformationStateString(Player player) const override; + std::string ToString() const; + + bool HasDefected(Player player) const; + + protected: + Action CurRecommendation() const; + void DoApplyAction(Action action_id) override; + + private: + CorrDistConfig config_; + const CorrelationDevice& mu_; + + // Which joint policy was chosen? + int rec_index_; + + // Has the player defected? + std::vector defected_; + + // Where did the player defect? This is the information set of the original + // game. Indexed by player. + std::vector> defection_infoset_; + + // The sequence of recommendations, indexed by player + std::vector> recommendation_seq_; +}; + +class AFCEGame : public WrappedGame { + public: + AFCEGame(std::shared_ptr game, CorrDistConfig config, + const CorrelationDevice& mu) + : WrappedGame(game, game->GetType(), game->GetParameters()), + config_(config), + mu_(mu) {} + + std::unique_ptr NewInitialState() const override { + return std::make_unique(shared_from_this(), + game_->NewInitialState(), config_, mu_); + } + + protected: + const CorrDistConfig config_; + const CorrelationDevice& mu_; +}; + +class AFCETabularPolicy : public TabularPolicy { + public: + AFCETabularPolicy(const CorrDistConfig& config) : config_(config) {} + + ActionsAndProbs GetStatePolicy(const std::string& info_state) const override { + SpielFatalError("GetStatePolicy(const std::string&) should not be called."); + return TabularPolicy::GetStatePolicy(info_state); + } + ActionsAndProbs GetStatePolicy(const State& state, Player pl) const override { + SPIEL_CHECK_EQ(state.CurrentPlayer(), pl); + return GetStatePolicy(state); + } + ActionsAndProbs GetStatePolicy(const State& state) const override; + + private: + const CorrDistConfig config_; +}; + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_CORR_DIST_AFCE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/cce.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/cce.cc new file mode 100644 index 0000000..e851854 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/cce.cc @@ -0,0 +1,103 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/corr_dist/cce.h" + +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/algorithms/corr_dist/efcce.h" +#include "open_spiel/spiel_globals.h" + +namespace open_spiel { +namespace algorithms { + +CCEState::CCEState(std::shared_ptr game, + std::unique_ptr state, CorrDistConfig config, + const CorrelationDevice& mu) + : WrappedState(game, std::move(state)), + config_(config), + mu_(mu), + rec_index_(-1) {} + +Player CCEState::CurrentPlayer() const { + // Only override this in the first chance actions. + if (rec_index_ < 0) { + return kChancePlayerId; + } else { + return state_->CurrentPlayer(); + } +} + +ActionsAndProbs CCEState::ChanceOutcomes() const { + if (rec_index_ < 0) { + ActionsAndProbs outcomes; + for (int i = 0; i < mu_.size(); ++i) { + outcomes.push_back({i, mu_[i].first}); + } + return outcomes; + } else { + return state_->ChanceOutcomes(); + } +} + +std::vector CCEState::LegalActions() const { + SPIEL_CHECK_FALSE(IsSimultaneousNode()); + + if (IsTerminal()) { + return {}; + } else if (IsChanceNode()) { + return LegalChanceOutcomes(); + } else { + return state_->LegalActions(); + } +} + +std::string CCEState::InformationStateString(Player player) const { + return state_->InformationStateString(player); +} + +std::string CCEState::ToString() const { + std::string state_str = + absl::StrFormat("%s\nCur player: %i\nRec index %i", state_->ToString(), + CurrentPlayer(), rec_index_); + return state_str; +} + +void CCEState::DoApplyAction(Action action_id) { + if (rec_index_ < 0) { + // Pick the joint policy which will provide recommendations. + rec_index_ = action_id; + SPIEL_CHECK_LT(rec_index_, mu_.size()); + } else if (state_->IsChanceNode()) { + // Regular chance node + state_->ApplyAction(action_id); + } else { + // Regular decision node + state_->ApplyAction(action_id); + } +} + +ActionsAndProbs CCEState::CurrentRecommendedStatePolicy() const { + SPIEL_CHECK_GE(rec_index_, 0); + return mu_[rec_index_].second.GetStatePolicy( + InformationStateString(CurrentPlayer())); +} + +ActionsAndProbs CCETabularPolicy::GetStatePolicy(const State& state) const { + const auto* cce_state = dynamic_cast(&state); + SPIEL_CHECK_TRUE(cce_state != nullptr); + return cce_state->CurrentRecommendedStatePolicy(); +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/cce.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/cce.h new file mode 100644 index 0000000..57bcdc4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/cce.h @@ -0,0 +1,106 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_CORR_DIST_CCE_H_ +#define OPEN_SPIEL_ALGORITHMS_CORR_DIST_CCE_H_ + +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/algorithms/corr_dist.h" +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +class CCEState : public WrappedState { + public: + CCEState(std::shared_ptr game, std::unique_ptr state, + CorrDistConfig config, const CorrelationDevice& mu); + + std::unique_ptr Clone() const override { + return std::make_unique(*this); + } + + // Need to override this because otherwise WrappedState forwards the + // implementation to the underlying state, which calls the wrong + // ChanceOutcomes + std::vector LegalChanceOutcomes() const override { + return State::LegalChanceOutcomes(); + } + + Player CurrentPlayer() const override; + ActionsAndProbs ChanceOutcomes() const override; + std::vector LegalActions() const override; + std::string InformationStateString(Player player) const override; + std::string ToString() const override; + + ActionsAndProbs CurrentRecommendedStatePolicy() const; + + protected: + void DoApplyAction(Action action_id) override; + + private: + const CorrDistConfig config_; + const CorrelationDevice& mu_; + + // Which joint policy was chosen? + int rec_index_; +}; + +class CCEGame : public WrappedGame { + public: + CCEGame(std::shared_ptr game, CorrDistConfig config, + const CorrelationDevice& mu) + : WrappedGame(game, game->GetType(), game->GetParameters()), + config_(config), + mu_(mu), + orig_num_distinct_actions_(game->NumDistinctActions()) {} + + std::unique_ptr NewInitialState() const override { + return std::make_unique(shared_from_this(), + game_->NewInitialState(), config_, mu_); + } + + int NumDistinctActions() const override { return orig_num_distinct_actions_; } + + private: + const CorrDistConfig config_; + const CorrelationDevice& mu_; + + // Number of distinct actions in the original game. + int orig_num_distinct_actions_; +}; + +class CCETabularPolicy : public TabularPolicy { + public: + CCETabularPolicy() {} + + ActionsAndProbs GetStatePolicy(const std::string& info_state) const override { + SpielFatalError("GetStatePolicy(const std::string&) should not be called."); + return TabularPolicy::GetStatePolicy(info_state); + } + ActionsAndProbs GetStatePolicy(const State& state, Player pl) const override { + SPIEL_CHECK_EQ(state.CurrentPlayer(), pl); + return GetStatePolicy(state); + } + ActionsAndProbs GetStatePolicy(const State& state) const override; +}; + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_CORR_DIST_CCE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/ce.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/ce.cc new file mode 100644 index 0000000..ba94a47 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/ce.cc @@ -0,0 +1,178 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/corr_dist/ce.h" + +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/algorithms/corr_dist/efcce.h" +#include "open_spiel/algorithms/get_all_infostates.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel_globals.h" + +namespace open_spiel { +namespace algorithms { + +CEState::CEState(std::shared_ptr game, std::unique_ptr state, + CorrDistConfig config, const CorrelationDevice& mu) + : WrappedState(game, std::move(state)), + config_(config), + mu_(mu), + rec_index_(-1) {} + +Player CEState::CurrentPlayer() const { + // Only override this in the first chance actions. + if (rec_index_ < 0) { + return kChancePlayerId; + } else { + return state_->CurrentPlayer(); + } +} + +ActionsAndProbs CEState::ChanceOutcomes() const { + if (rec_index_ < 0) { + ActionsAndProbs outcomes; + for (int i = 0; i < mu_.size(); ++i) { + outcomes.push_back({i, mu_[i].first}); + } + return outcomes; + } else { + return state_->ChanceOutcomes(); + } +} + +std::vector CEState::LegalActions() const { + SPIEL_CHECK_FALSE(IsSimultaneousNode()); + + if (IsTerminal()) { + return {}; + } else if (IsChanceNode()) { + return LegalChanceOutcomes(); + } else { + return state_->LegalActions(); + } +} + +std::string CEState::InformationStateString(Player player) const { + // should look like + SPIEL_CHECK_FALSE(IsChanceNode()); + std::string infoset_str = state_->InformationStateString(player); + SPIEL_CHECK_EQ(infoset_str.find(config_.recommendation_delimiter), + std::string::npos); + const auto* parent_game = down_cast(game_.get()); + SPIEL_CHECK_GE(rec_index_, 0); + int signal_id = parent_game->GetSignalId(rec_index_, player); + return absl::StrCat(infoset_str, config_.recommendation_delimiter, signal_id); +} + +std::string CEState::ToString() const { + std::string state_str = + absl::StrFormat("%s\nCur player: %i\nRec index %i", state_->ToString(), + CurrentPlayer(), rec_index_); + return state_str; +} + +void CEState::DoApplyAction(Action action_id) { + if (rec_index_ < 0) { + // Pick the joint policy which will provide recommendations. + rec_index_ = action_id; + SPIEL_CHECK_LT(rec_index_, mu_.size()); + } else if (state_->IsChanceNode()) { + // Regular chance node + state_->ApplyAction(action_id); + } else { + // Regular decision node + state_->ApplyAction(action_id); + } +} + +ActionsAndProbs CEState::RecommendedStatePolicy( + const std::string& info_state) const { + SPIEL_CHECK_GE(rec_index_, 0); + return mu_[rec_index_].second.GetStatePolicy(info_state); +} + +CEGame::CEGame(std::shared_ptr game, CorrDistConfig config, + const CorrelationDevice& mu) + : WrappedGame(game, game->GetType(), game->GetParameters()), + config_(config), + mu_(mu), + orig_num_distinct_actions_(game->NumDistinctActions()), + signal_ids_(game->NumPlayers(), 0) { + // First, build the map that will identify which information states belong + // to which player. + { + std::vector> all_infostates = + GetAllInformationStates(*game); + SPIEL_CHECK_EQ(all_infostates.size(), game->NumPlayers()); + for (Player p = 0; p < all_infostates.size(); ++p) { + for (const std::string& info_state : all_infostates[p]) { + const auto iter = info_state_to_player_.find(info_state); + if (iter != info_state_to_player_.end()) { + SpielFatalError("Duplicate information set found!"); + } + info_state_to_player_[info_state] = p; + } + } + } + + // Now, go through each joint policy in the correlation device, splitting them + // among players. + for (int rec_index = 0; rec_index < mu_.size(); ++rec_index) { + const TabularPolicy& joint_policy = mu_[rec_index].second; + + // Split the policies into individual player policies. + std::vector player_policies(game->NumPlayers()); + for (const auto& [info_state, action_probs] : joint_policy.PolicyTable()) { + const auto player_iter = info_state_to_player_.find(info_state); + SPIEL_CHECK_TRUE(player_iter != info_state_to_player_.end()); + Player player = player_iter->second; + player_policies[player].SetStatePolicy(info_state, action_probs); + } + + // Lookup / assign signals to each individual policy. + for (Player p = 0; p < player_policies.size(); ++p) { + std::string sorted_policy_string = player_policies[p].ToStringSorted(); + std::pair key = {sorted_policy_string, p}; + const auto iter = policy_player_to_signal_id_.find(key); + int signal_id = -1; + + if (iter == policy_player_to_signal_id_.end()) { + // Signal for this policy does not exist yet, use the next one. + signal_id = signal_ids_[p]++; + policy_player_to_signal_id_[key] = signal_id; + } else { + signal_id = iter->second; + } + + recidx_player_to_signal_id_[{rec_index, p}] = signal_id; + } + } +} + +ActionsAndProbs CETabularPolicy::GetStatePolicy(const State& state) const { + // Here we must scrape off the signal id so that the BR code does a proper + // lookup on the orginal info state string. + const auto* ce_state = dynamic_cast(&state); + SPIEL_CHECK_TRUE(ce_state != nullptr); + + std::string info_state = state.InformationStateString(); + const size_t idx = info_state.find(config_.recommendation_delimiter); + SPIEL_CHECK_NE(idx, std::string::npos); + + std::string orig_info_state = info_state.substr(0, idx); + return ce_state->RecommendedStatePolicy(orig_info_state); +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/ce.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/ce.h new file mode 100644 index 0000000..47c4f14 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/ce.h @@ -0,0 +1,134 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_CORR_DIST_CE_H_ +#define OPEN_SPIEL_ALGORITHMS_CORR_DIST_CE_H_ + +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/algorithms/corr_dist.h" +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +class CEState : public WrappedState { + public: + CEState(std::shared_ptr game, std::unique_ptr state, + CorrDistConfig config, const CorrelationDevice& mu); + + std::unique_ptr Clone() const override { + return std::make_unique(*this); + } + + // Need to override this because otherwise WrappedState forwards the + // implementation to the underlying state, which calls the wrong + // ChanceOutcomes + std::vector LegalChanceOutcomes() const override { + return State::LegalChanceOutcomes(); + } + + Player CurrentPlayer() const override; + ActionsAndProbs ChanceOutcomes() const override; + std::vector LegalActions() const override; + std::string InformationStateString(Player player) const override; + std::string ToString() const override; + + ActionsAndProbs RecommendedStatePolicy(const std::string& info_state) const; + + protected: + void DoApplyAction(Action action_id) override; + + private: + const CorrDistConfig config_; + const CorrelationDevice& mu_; + + // Which joint policy was chosen? + int rec_index_; +}; + +class CEGame : public WrappedGame { + public: + CEGame(std::shared_ptr game, CorrDistConfig config, + const CorrelationDevice& mu); + + std::unique_ptr NewInitialState() const override { + return std::make_unique(shared_from_this(), + game_->NewInitialState(), config_, mu_); + } + + // Returns a signal id, which corresponds to a specific policy used by the + // specified player in the joint policy at the specified recommendation index + // in the correlation device. This method makes use of a table that maintains + // these mappings initialized at construction time. + int GetSignalId(int rec_index, Player player) const { + const auto iter = recidx_player_to_signal_id_.find({rec_index, player}); + SPIEL_CHECK_TRUE(iter != recidx_player_to_signal_id_.end()); + return iter->second; + } + + int NumDistinctActions() const override { return orig_num_distinct_actions_; } + + private: + const CorrDistConfig config_; + const CorrelationDevice& mu_; + + // Number of distinct actions in the original game. + int orig_num_distinct_actions_; + + // To compute a correlated equilibria, we need to map individual player + // strategies to signal ids that are handed out by the recommender at the + // start of the game. These signal ids get tacked onto the information state + // strings so that the best response is computed conditionally on the signal. + // + // Keeps track of the number of signal id's per player. + std::vector signal_ids_; + + // Information state identifiers in this game. + absl::flat_hash_map info_state_to_player_; + + // A (sorted Tabular policy string, player id) -> signal id map. + absl::flat_hash_map, int> + policy_player_to_signal_id_; + + // A (recommendation index, player id) -> signal id map. + absl::flat_hash_map, int> recidx_player_to_signal_id_; +}; + +class CETabularPolicy : public TabularPolicy { + public: + CETabularPolicy(CorrDistConfig config) : config_(config) {} + + ActionsAndProbs GetStatePolicy(const std::string& info_state) const override { + SpielFatalError("GetStatePolicy(const std::string&) should not be called."); + return TabularPolicy::GetStatePolicy(info_state); + } + ActionsAndProbs GetStatePolicy(const State& state, Player pl) const override { + SPIEL_CHECK_EQ(state.CurrentPlayer(), pl); + return GetStatePolicy(state); + } + ActionsAndProbs GetStatePolicy(const State& state) const override; + + private: + const CorrDistConfig config_; +}; + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_CORR_DIST_CE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/efcce.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/efcce.cc new file mode 100644 index 0000000..9b380ac --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/efcce.cc @@ -0,0 +1,170 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/corr_dist/efcce.h" + +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/spiel_globals.h" + +namespace open_spiel { +namespace algorithms { + +EFCCEState::EFCCEState(std::shared_ptr game, + std::unique_ptr state, CorrDistConfig config, + const CorrelationDevice& mu, Action follow_action, + Action defect_action) + : WrappedState(game, std::move(state)), + config_(config), + mu_(mu), + follow_action_(follow_action), + defect_action_(defect_action), + rec_index_(-1), + defected_(game->NumPlayers(), 0), + recommendation_seq_(game->NumPlayers(), std::vector({})) {} + +Player EFCCEState::CurrentPlayer() const { + // Only override this in the first chance actions. + if (rec_index_ < 0) { + return kChancePlayerId; + } else { + return state_->CurrentPlayer(); + } +} + +ActionsAndProbs EFCCEState::ChanceOutcomes() const { + if (rec_index_ < 0) { + ActionsAndProbs outcomes; + for (int i = 0; i < mu_.size(); ++i) { + outcomes.push_back({i, mu_[i].first}); + } + return outcomes; + } else { + return state_->ChanceOutcomes(); + } +} + +std::vector EFCCEState::LegalActions() const { + SPIEL_CHECK_FALSE(IsSimultaneousNode()); + + if (IsTerminal()) { + return {}; + } else if (IsChanceNode()) { + return LegalChanceOutcomes(); + } + + if (!HasDefected(CurrentPlayer())) { + // If the player has not defected then they have exactly two choices: + // follow or defect. + return {follow_action_, defect_action_}; + } else { + // Player has defected.. they are on their own. + return state_->LegalActions(); + } +} + +std::string EFCCEState::InformationStateString(Player player) const { + // should look like + // + std::string rec_str = absl::StrJoin(recommendation_seq_[player], ","); + std::string infoset_str = state_->InformationStateString(player); + SPIEL_CHECK_EQ(infoset_str.find(config_.recommendation_delimiter), + std::string::npos); + return absl::StrCat(infoset_str, config_.recommendation_delimiter, + HasDefected(player) ? "true " : "false ", rec_str); +} + +std::string EFCCEState::ToString() const { + std::string state_str = absl::StrFormat( + "%s\nCur player: %i\nRec index %i\nDefected %s", state_->ToString(), + CurrentPlayer(), rec_index_, absl::StrJoin(defected_, " ")); + for (Player p = 0; p < state_->NumPlayers(); ++p) { + absl::StrAppend(&state_str, "\nPlayer ", p, " recommendation seq: ", + absl::StrJoin(recommendation_seq_[p], ",")); + } + return state_str; +} + +bool EFCCEState::HasDefected(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, game_->NumPlayers()); + return defected_[player] == 1; +} + +Action EFCCEState::CurRecommendation() const { + ActionsAndProbs actions_and_probs = + mu_[rec_index_].second.GetStatePolicy(state_->InformationStateString()); + Action rec_action = GetAction(actions_and_probs); + SPIEL_CHECK_TRUE(rec_action != kInvalidAction); + return rec_action; +} + +void EFCCEState::DoApplyAction(Action action_id) { + if (rec_index_ < 0) { + // Pick the joint policy which will provide recommendations. + rec_index_ = action_id; + SPIEL_CHECK_LT(rec_index_, mu_.size()); + } else if (state_->IsChanceNode()) { + // Regular chance node + state_->ApplyAction(action_id); + } else { + Player cur_player = CurrentPlayer(); + SPIEL_CHECK_GE(cur_player, 0); + SPIEL_CHECK_LT(cur_player, game_->NumPlayers()); + + if (!HasDefected(cur_player)) { + // Can only submit these two actions. + SPIEL_CHECK_TRUE(action_id == follow_action_ || + action_id == defect_action_); + + // Check for defection at this point. This is because the + // recommendations + Action recommendation = CurRecommendation(); + + if (action_id == follow_action_) { + // Follow recommendation. + std::vector legal_actions = state_->LegalActions(); + SPIEL_CHECK_TRUE(absl::c_find(legal_actions, recommendation) != + legal_actions.end()); + state_->ApplyAction(recommendation); + recommendation_seq_[cur_player].push_back(recommendation); + } else { + // Defect. + defected_[cur_player] = 1; + } + + } else { + // Regular game from here on. + state_->ApplyAction(action_id); + } + } +} + +ActionsAndProbs EFCCETabularPolicy::GetStatePolicy(const State& state) const { + // The best response code has to have a policy defined everywhere when it + // builds its initial tree. For the fixed policies, the players will not + // defect, so we define a uniform policy in the regions where players have + // defected (which will not affect the best responding player, since the + // opponents will never reach these regions). + const auto* efcce_state = dynamic_cast(&state); + SPIEL_CHECK_TRUE(efcce_state != nullptr); + if (efcce_state->HasDefected(state.CurrentPlayer())) { + return UniformStatePolicy(state); + } + + // Simply returns a fixed policy with prob 1 on the follow action + return {{follow_action_, 1.0}, {defect_action_, 0.0}}; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/efcce.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/efcce.h new file mode 100644 index 0000000..241f0ef --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/efcce.h @@ -0,0 +1,137 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_CORR_DIST_EFCCE_H_ +#define OPEN_SPIEL_ALGORITHMS_CORR_DIST_EFCCE_H_ + +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/algorithms/corr_dist.h" +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +// This is an EFCCE extended game similar to the one described in von Stengel +// and Forges 2008, Definition 2.2. The incentive to deviate to a best response +// is computed by running NashConv on this auxiliary game. + +// The main difference in the EFCCE auxiliary game (from the EFCE game) is that +// players must decide whether to accept or reject the recommendation *before* +// seeing it. This changes the action space of the game, adding two new actions +// that must be taken at each state before the actual decision is made. +class EFCCEState : public WrappedState { + public: + EFCCEState(std::shared_ptr game, std::unique_ptr state, + CorrDistConfig config, const CorrelationDevice& mu, + Action follow_action, Action defect_action); + + std::unique_ptr Clone() const override { + return std::make_unique(*this); + } + + // Need to override this because otherwise WrappedState forwards the + // implementation to the underlying state, which calls the wrong + // ChanceOutcomes + std::vector LegalChanceOutcomes() const override { + return State::LegalChanceOutcomes(); + } + + Player CurrentPlayer() const override; + ActionsAndProbs ChanceOutcomes() const override; + std::vector LegalActions() const override; + std::string InformationStateString(Player player) const override; + std::string ToString() const override; + + bool HasDefected(Player player) const; + + protected: + Action CurRecommendation() const; + void DoApplyAction(Action action_id) override; + + private: + const CorrDistConfig config_; + const CorrelationDevice& mu_; + + Action follow_action_; + Action defect_action_; + + // Which joint policy was chosen? + int rec_index_; + + // Has the player defected? + std::vector defected_; + + // The sequence of recommendations, indexed by player + std::vector> recommendation_seq_; +}; + +class EFCCEGame : public WrappedGame { + public: + EFCCEGame(std::shared_ptr game, CorrDistConfig config, + const CorrelationDevice& mu) + : WrappedGame(game, game->GetType(), game->GetParameters()), + config_(config), + mu_(mu), + orig_num_distinct_actions_(game->NumDistinctActions()) {} + + std::unique_ptr NewInitialState() const override { + return std::make_unique(shared_from_this(), + game_->NewInitialState(), config_, mu_, + FollowAction(), DefectAction()); + } + + int NumDistinctActions() const override { + // 2 extra actions: cooperate/follow or defect + return orig_num_distinct_actions_ + 2; + } + + int FollowAction() const { return orig_num_distinct_actions_; } + int DefectAction() const { return orig_num_distinct_actions_ + 1; } + + private: + const CorrDistConfig config_; + const CorrelationDevice& mu_; + + // Number of distinct actions in the original game. + int orig_num_distinct_actions_; +}; + +class EFCCETabularPolicy : public TabularPolicy { + public: + EFCCETabularPolicy(Action follow_action, Action defect_action) + : follow_action_(follow_action), defect_action_(defect_action) {} + + ActionsAndProbs GetStatePolicy(const std::string& info_state) const override { + SpielFatalError("GetStatePolicy(const std::string&) should not be called."); + return TabularPolicy::GetStatePolicy(info_state); + } + ActionsAndProbs GetStatePolicy(const State& state, Player pl) const override { + SPIEL_CHECK_EQ(state.CurrentPlayer(), pl); + return GetStatePolicy(state); + } + ActionsAndProbs GetStatePolicy(const State& state) const override; + + private: + const Action follow_action_; + const Action defect_action_; +}; + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_CORR_DIST_EFCCE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/efce.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/efce.cc new file mode 100644 index 0000000..d2ecb4e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/efce.cc @@ -0,0 +1,172 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/corr_dist/efce.h" + +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" + +namespace open_spiel { +namespace algorithms { + +EFCEState::EFCEState(std::shared_ptr game, + std::unique_ptr state, CorrDistConfig config, + const CorrelationDevice& mu) + : WrappedState(game, std::move(state)), + config_(config), + mu_(mu), + rec_index_(-1), + defected_(game->NumPlayers(), 0), + recommendation_seq_(game->NumPlayers(), std::vector({})) {} + +Player EFCEState::CurrentPlayer() const { + // Only override this in the first chance actions. + if (rec_index_ < 0) { + return kChancePlayerId; + } else { + return state_->CurrentPlayer(); + } +} + +ActionsAndProbs EFCEState::ChanceOutcomes() const { + if (rec_index_ < 0) { + ActionsAndProbs outcomes; + for (int i = 0; i < mu_.size(); ++i) { + outcomes.push_back({i, mu_[i].first}); + } + return outcomes; + } else { + return state_->ChanceOutcomes(); + } +} + +std::vector EFCEState::LegalActions() const { + SPIEL_CHECK_FALSE(IsSimultaneousNode()); + + if (IsTerminal()) { + return {}; + } else if (IsChanceNode()) { + return LegalChanceOutcomes(); + } + + return state_->LegalActions(); +} + +std::string EFCEState::InformationStateString(Player player) const { + // should look like + SPIEL_CHECK_FALSE(IsChanceNode()); + std::string rec_str = absl::StrJoin(recommendation_seq_[player], ","); + if (!HasDefected(player)) { + absl::StrAppend(&rec_str, ",", CurRecommendation()); + } + std::string infoset_str = state_->InformationStateString(player); + SPIEL_CHECK_EQ(infoset_str.find(config_.recommendation_delimiter), + std::string::npos); + return absl::StrCat(infoset_str, config_.recommendation_delimiter, rec_str); +} + +std::string EFCEState::ToString() const { + std::string state_str = absl::StrFormat( + "%s\nCur player: %i\nRec index %i\nDefected %s", state_->ToString(), + CurrentPlayer(), rec_index_, absl::StrJoin(defected_, " ")); + for (Player p = 0; p < state_->NumPlayers(); ++p) { + absl::StrAppend(&state_str, "\nPlayer ", p, " recommendation seq: ", + absl::StrJoin(recommendation_seq_[p], ",")); + } + return state_str; +} + +bool EFCEState::HasDefected(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, game_->NumPlayers()); + return defected_[player] == 1; +} + +Action EFCEState::CurRecommendation() const { + SPIEL_CHECK_GE(rec_index_, 0); + SPIEL_CHECK_LT(rec_index_, mu_.size()); + ActionsAndProbs actions_and_probs = + mu_[rec_index_].second.GetStatePolicy(state_->InformationStateString()); + Action rec_action = GetAction(actions_and_probs); + SPIEL_CHECK_TRUE(rec_action != kInvalidAction); + return rec_action; +} + +void EFCEState::DoApplyAction(Action action_id) { + if (rec_index_ < 0) { + // Pick the joint policy which will provide recommendations. + rec_index_ = action_id; + SPIEL_CHECK_GE(rec_index_, 0); + SPIEL_CHECK_LT(rec_index_, mu_.size()); + } else if (state_->IsChanceNode()) { + // Regular chance node + state_->ApplyAction(action_id); + } else { + // Check for defection at this point. + const Action recommendation = CurRecommendation(); + + Player cur_player = CurrentPlayer(); + SPIEL_CHECK_GE(cur_player, 0); + SPIEL_CHECK_LT(cur_player, game_->NumPlayers()); + + // If they have defected, don't add to the sequence + if (!HasDefected(cur_player)) { + recommendation_seq_[cur_player].push_back(recommendation); + + // If they chose an action other than the recommendation, they have now + // defected. + if (action_id != recommendation) { + defected_[cur_player] = 1; + } + } + + state_->ApplyAction(action_id); + } +} + +ActionsAndProbs EFCETabularPolicy::GetStatePolicy(const State& state) const { + // The best response code has to have a policy defined everywhere when it + // builds its initial tree. For the fixed policies, the players will not + // defect, so we define a uniform policy in the regions where players have + // defected (which will not affect the best responding player, since the + // opponents will never reach these regions). + const auto* efce_state = dynamic_cast(&state); + SPIEL_CHECK_TRUE(efce_state != nullptr); + if (efce_state->HasDefected(state.CurrentPlayer())) { + return UniformStatePolicy(state); + } + + // Otherwise, simply returns a fixed policy with prob 1 on the recommended + // action (extrapolated from the information state string) and 0 on the + // others. + std::string info_state = state.InformationStateString(); + const size_t idx = info_state.find(config_.recommendation_delimiter); + SPIEL_CHECK_NE(idx, std::string::npos); + std::vector rec_seq = absl::StrSplit( + info_state.substr(idx + config_.recommendation_delimiter.length()), ','); + SPIEL_CHECK_GE(rec_seq.size(), 1); + Action rec_action; + ActionsAndProbs state_policy; + std::vector legal_actions = state.LegalActions(); + state_policy.reserve(legal_actions.size()); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(rec_seq.back(), &rec_action)); + for (Action action : legal_actions) { + state_policy.push_back({action, action == rec_action ? 1.0 : 0.0}); + } + return state_policy; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/efce.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/efce.h new file mode 100644 index 0000000..03823e2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist/efce.h @@ -0,0 +1,134 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_CORR_DIST_EFCE_H_ +#define OPEN_SPIEL_ALGORITHMS_CORR_DIST_EFCE_H_ + +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/algorithms/corr_dist.h" +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" + +// The implementations of the metrics assemble the extended game described in +// von Stengel and Forges 2008, Definition 2.2. Then, the incentive to deviate +// to a best response is computed by running NashConv on this auxiliary game. +// +// The EFCE extended game modifies the original game in the following ways: +// - An internal variable is kept to determine if the player has deviated. +// If so, recommendations are no longer given. +// - If config.deterministic and config.convert are both false, the game +// starts with a "Monte Carlo" chance node that corresponds to different +// samplings of deterministic joint policies. The number of samples (and +// corresponding accuracy) is determined by config.num_samples. If either +// config.deterministic is true or config.convert is true, this chance node +// will not exist. +// - A "joint policy" chance node that corresponds to choosing a joint policy +// from the correlation device. +// - Information state keys are modified to include the recommendations +// received at the current information state and ones received up to this +// information state, i.e. the sequence of recommendations. New +// recommendations stop getting appended once the player chooses an action +// that does not match the recommendation. +// +// In addition, a specific tabular policy is made so as to map the policies +// in this new game back to the original game. + +namespace open_spiel { +namespace algorithms { + +class EFCEState : public WrappedState { + public: + EFCEState(std::shared_ptr game, std::unique_ptr state, + CorrDistConfig config, const CorrelationDevice& mu); + + std::unique_ptr Clone() const override { + return std::make_unique(*this); + } + + // Need to override this because otherwise WrappedState forwards the + // implementation to the underlying state, which calls the wrong + // ChanceOutcomes + std::vector LegalChanceOutcomes() const override { + return State::LegalChanceOutcomes(); + } + + Player CurrentPlayer() const override; + ActionsAndProbs ChanceOutcomes() const override; + std::vector LegalActions() const override; + std::string InformationStateString(Player player) const override; + std::string ToString() const; + + bool HasDefected(Player player) const; + + protected: + Action CurRecommendation() const; + void DoApplyAction(Action action_id) override; + + private: + CorrDistConfig config_; + const CorrelationDevice& mu_; + + // Which joint policy was chosen? + int rec_index_; + + // Has the player defected? + std::vector defected_; + + // The sequence of recommendations, indexed by player + std::vector> recommendation_seq_; +}; + +class EFCEGame : public WrappedGame { + public: + EFCEGame(std::shared_ptr game, CorrDistConfig config, + const CorrelationDevice& mu) + : WrappedGame(game, game->GetType(), game->GetParameters()), + config_(config), + mu_(mu) {} + + std::unique_ptr NewInitialState() const override { + return std::make_unique(shared_from_this(), + game_->NewInitialState(), config_, mu_); + } + + protected: + const CorrDistConfig config_; + const CorrelationDevice& mu_; +}; + +class EFCETabularPolicy : public TabularPolicy { + public: + EFCETabularPolicy(const CorrDistConfig& config) : config_(config) {} + + ActionsAndProbs GetStatePolicy(const std::string& info_state) const override { + SpielFatalError("GetStatePolicy(const std::string&) should not be called."); + return TabularPolicy::GetStatePolicy(info_state); + } + ActionsAndProbs GetStatePolicy(const State& state, Player pl) const override { + SPIEL_CHECK_EQ(state.CurrentPlayer(), pl); + return GetStatePolicy(state); + } + ActionsAndProbs GetStatePolicy(const State& state) const override; + + private: + const CorrDistConfig config_; +}; + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_CORR_DIST_EFCE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist_test.cc new file mode 100644 index 0000000..a8d6335 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/corr_dist_test.cc @@ -0,0 +1,397 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/corr_dist.h" + +#include +#include + +#include "open_spiel/algorithms/cfr.h" +#include "open_spiel/algorithms/corr_dev_builder.h" +#include "open_spiel/game_transforms/turn_based_simultaneous_game.h" +#include "open_spiel/games/efg_game/efg_game.h" +#include "open_spiel/games/efg_game/efg_game_data.h" +#include "open_spiel/matrix_game.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/init.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +inline constexpr double kFloatTolerance = 1e-12; + +inline constexpr const char* kGreenwaldSarfatiEg1File = + "third_party/open_spiel/games/efg/greenwald_sarfati_example1.efg"; +inline constexpr const char* kGreenwaldSarfatiEg2File = + "third_party/open_spiel/games/efg/greenwald_sarfati_example2.efg"; + +void TestGibson13MatrixGameExample() { + // Tests that the example from Sec 2.2 of Gibson 2013, Regret Minimization in + // Non-Zero-Sum Games with Applications to Building Champion Multiplayer + // Computer Poker Agents (https://arxiv.org/abs/1305.0034). + // + // a b + // A 1,0 0,0 + // B 0,0 2,0 + // C -1,0 1,0 + // + std::shared_ptr gibson_game = + matrix_game::CreateMatrixGame({{1, 0}, {0, 2}, {-1, 1}}, + {{0, 0}, {0, 0}, {0, 0}}); + + NormalFormCorrelationDevice mu = { + {0.5, {0, 0}}, // (A, a) = 0.5 + {0.25, {1, 1}}, // (B, b) = 0.25 + {0.25, {2, 1}} // (C, b) = 0.25 + }; + + // mu is a CCE. + SPIEL_CHECK_TRUE(Near(CCEDist(*gibson_game, mu), 0.0)); + + // mu is not a CE, because first player gains 1 by deviating to B after + // receiving the third recommendation, which happens with prob 0.25 + SPIEL_CHECK_TRUE(Near(CEDist(*gibson_game, mu), 0.25)); + + // Repeat these tests with a turn-based simultaneous game. + SPIEL_CHECK_TRUE(Near(CCEDist(*ConvertToTurnBased(*gibson_game), mu), 0.0)); + SPIEL_CHECK_TRUE(Near(CEDist(*ConvertToTurnBased(*gibson_game), mu), 0.25)); +} + +void TestShapleysGame() { + // Shapley's game is a general-sum version of Rock, Paper, Scissors. + // See Fig 7.6 of rown '09, http://www.masfoundations.org/mas.pdf: + // + // R P S + // R 0,0 0,1 1,0 + // P 1,0 0,0 0,1 + // S 0,1 1,0 0,0 + std::shared_ptr shapleys_game = LoadGame("matrix_shapleys_game"); + + // There is a unique Nash eq at (1/3, 1/3, 1/3). So by Sec 3.4.5 of Shoham + // and Leyton-Brown there is a CE with 1/9 on all the entries. + NormalFormCorrelationDevice mu = { + {1.0 / 9.0, {0, 0}}, {1.0 / 9.0, {0, 1}}, {1.0 / 9.0, {0, 2}}, + {1.0 / 9.0, {1, 0}}, {1.0 / 9.0, {1, 1}}, {1.0 / 9.0, {1, 2}}, + {1.0 / 9.0, {2, 0}}, {1.0 / 9.0, {2, 1}}, {1.0 / 9.0, {2, 2}}}; + + SPIEL_CHECK_TRUE(Near(CEDist(*shapleys_game, mu), 0.0)); + std::vector expected_values_full_support = + ExpectedValues(*shapleys_game, mu); + SPIEL_CHECK_TRUE(Near(expected_values_full_support[0], 1.0 / 3.0)); + SPIEL_CHECK_TRUE(Near(expected_values_full_support[1], 1.0 / 3.0)); + + // There is another CE with 1/6 on the off-diagonals. + mu = {{1.0 / 6.0, {0, 1}}, {1.0 / 6.0, {0, 2}}, {1.0 / 6.0, {1, 0}}, + {1.0 / 6.0, {1, 2}}, {1.0 / 6.0, {2, 0}}, {1.0 / 6.0, {2, 1}}}; + + SPIEL_CHECK_TRUE(Near(CEDist(*shapleys_game, mu), 0.0)); + std::vector expected_values_off_diagonals = + ExpectedValues(*shapleys_game, mu); + SPIEL_CHECK_TRUE(Near(expected_values_off_diagonals[0], 0.5)); + SPIEL_CHECK_TRUE(Near(expected_values_off_diagonals[1], 0.5)); +} + +void TestBoS() { + // Correlated equilibrium example from Sec 3.4.5 of Shoham & Leyton-Brown '09 + // https://masfoundations.org/mas.pdf + // + // LW WL + // LW 2,1 0,0 + // WL 0,0 1,2 + std::shared_ptr bos_game = + matrix_game::CreateMatrixGame({{2, 0}, {0, 1}}, {{1, 0}, {0, 2}}); + + NormalFormCorrelationDevice mu = {{0.5, {0, 0}}, {0.5, {1, 1}}}; + SPIEL_CHECK_TRUE(Near(CEDist(*bos_game, mu), 0.0)); +} + +void TestChicken() { + // Example from: https://en.wikipedia.org/wiki/Correlated_equilibrium + std::shared_ptr chicken_game = + matrix_game::CreateMatrixGame({{0, 7}, {2, 6}}, {{0, 2}, {7, 6}}); + + NormalFormCorrelationDevice mu = { + {0.5, {1, 1}}, {0.25, {1, 0}}, {0.25, {0, 1}}}; + SPIEL_CHECK_TRUE(Near(CEDist(*chicken_game, mu), 0.0)); +} + +void TestSignalingExampleVonStengelForges2008() { + // Runs a test based on the signaling game example in Section 2.3 of von + // Stengel & Forges 2008, Extensive-Form Correlated Equilibrium: + // Definition and Computational Complexity. + + // First, check the CE of the normal-form version in Figure 2. + std::shared_ptr signaling_game_nfg = + matrix_game::CreateMatrixGame( + {{5, 5, 0, 0}, {5, 2, 3, 0}, {5, 3, 2, 0}, {5, 0, 5, 0}}, + {{5, 5, 6, 6}, {5, 8, 3, 6}, {5, 3, 8, 6}, {5, 6, 5, 6}}); + + // Mix equally a'' = b'' = c'' == d'' = 1/4. + NormalFormCorrelationDevice mu_nfg = { + {0.25, {0, 3}}, {0.25, {1, 3}}, {0.25, {2, 3}}, {0.25, {3, 3}}}; + SPIEL_CHECK_TRUE(Near(CEDist(*signaling_game_nfg, mu_nfg), 0.0)); + std::vector expected_values = + ExpectedValues(*signaling_game_nfg, mu_nfg); + SPIEL_CHECK_TRUE(Near(expected_values[0], 0.0)); + SPIEL_CHECK_TRUE(Near(expected_values[1], 6.0)); + + // Now do the extensive-form version. From von Stengel & Forges '08: + std::shared_ptr efg_game = + efg_game::LoadEFGGame(efg_game::GetSignalingEFGData()); + const efg_game::EFGGame* signaling_game = + dynamic_cast(efg_game.get()); + SPIEL_CHECK_TRUE(signaling_game != nullptr); + + // "However, there is an EFCE with better payoff to both players compared to + // the outcome with payoff pair (0, 6): A signal X_G or Y_G is chosen with + // equal probability for type G, and player 2 is told to accept when receiving + // the chosen signal and to refuse when receiving the other signal (so X_G and + // lX rY are perfectly correlated, as well as Y_G and r_X l_Y ). + TabularPolicy XG_XB_policy = efg_game::EFGGameTabularPolicy(efg_game, + {{{0, "G"}, {{"X_G", 1.0}, {"Y_G", 0.0}}}, + {{0, "B"}, {{"X_B", 1.0}, {"Y_B", 0.0}}}, + {{1, "X"}, {{"l_X", 1.0}, {"r_X", 0.0}}}, + {{1, "Y"}, {{"l_Y", 0.0}, {"r_Y", 1.0}}}}); + + TabularPolicy YG_XB_policy = efg_game::EFGGameTabularPolicy(efg_game, + {{{0, "G"}, {{"X_G", 0.0}, {"Y_G", 1.0}}}, + {{0, "B"}, {{"X_B", 1.0}, {"Y_B", 0.0}}}, + {{1, "X"}, {{"l_X", 0.0}, {"r_X", 1.0}}}, + {{1, "Y"}, {{"l_Y", 1.0}, {"r_Y", 0.0}}}}); + + TabularPolicy XG_YB_policy = efg_game::EFGGameTabularPolicy(efg_game, + {{{0, "G"}, {{"X_G", 1.0}, {"Y_G", 0.0}}}, + {{0, "B"}, {{"X_B", 0.0}, {"Y_B", 1.0}}}, + {{1, "X"}, {{"l_X", 1.0}, {"r_X", 0.0}}}, + {{1, "Y"}, {{"l_Y", 0.0}, {"r_Y", 1.0}}}}); + + TabularPolicy YG_YB_policy = efg_game::EFGGameTabularPolicy(efg_game, + {{{0, "G"}, {{"X_G", 0.0}, {"Y_G", 1.0}}}, + {{0, "B"}, {{"X_B", 0.0}, {"Y_B", 1.0}}}, + {{1, "X"}, {{"l_X", 0.0}, {"r_X", 1.0}}}, + {{1, "Y"}, {{"l_Y", 1.0}, {"r_Y", 0.0}}}}); + + // Finally test to see if it's an EFCE. + CorrelationDevice mu = { + {0.25, XG_XB_policy}, + {0.25, YG_XB_policy}, + {0.25, XG_YB_policy}, + {0.25, YG_YB_policy}, + }; + expected_values = ExpectedValues(*efg_game, mu); + SPIEL_CHECK_TRUE(Near(expected_values[0], 3.5)); + SPIEL_CHECK_TRUE(Near(expected_values[1], 6.5)); + + CorrDistConfig config; + SPIEL_CHECK_TRUE(Near(EFCEDist(*efg_game, config, mu), 0.0)); + + // EFCEs are contained withing EFCCE (see Section 5 of + // https://arxiv.org/abs/1908.09893), so mu is also an EFCCE in this game. + SPIEL_CHECK_TRUE(Near(EFCCEDist(*efg_game, config, mu), 0.0)); +} + +void Test1PInOutGame() { + // Example game described in Section 2.4 of von Stengel & Forges, + // Extensive Form Correlated Equilibrium: Definition and Computational + // Complexity. CDAM Research Report LSE-CDAM-2006-04. + // http://www.cdam.lse.ac.uk/Reports/Files/cdam-2006-04.pdf + // + // This is a simple example that illustrates the difference between AFCE and + // EFCE. + const char* kInOutGameData = R"###( + EFG 2 R "InOutGame" { "P1" } "" + + p "ROOT" 1 1 "Root Infoset" { "In" "Out" } 0 + p "In" 1 2 "In Infoset" { "In" "Out" } 0 + t "In In" 1 "Outcome In In" { 1.0 } + t "In Out" 2 "Outcome In Out" { 0.0 } + p "Out" 1 3 "Out Infoset" { "In" "Out" } 0 + t "Out In" 3 "Outcome Out In" { 0.0 } + t "Out Out" 4 "Outcome Out Out" { 0.0 } + )###"; + std::shared_ptr efg_game = efg_game::LoadEFGGame(kInOutGameData); + + TabularPolicy single_policy = efg_game::EFGGameTabularPolicy( + efg_game, {{{0, "Root Infoset"}, {{"In", 0.0}, {"Out", 1.0}}}, + {{0, "In Infoset"}, {{"In", 0.0}, {"Out", 1.0}}}, + {{0, "Out Infoset"}, {{"In", 0.0}, {"Out", 1.0}}}}); + + CorrelationDevice mu = {{1.0, single_policy}}; + + std::vector expected_values = ExpectedValues(*efg_game, mu); + SPIEL_CHECK_TRUE(Near(expected_values[0], 0.0)); + + CorrDistConfig config; + SPIEL_CHECK_TRUE(Near(AFCEDist(*efg_game, config, mu), 0.0)); + + // Player has incentive to switch to In at the first decision and, once having + // deviated switch to In again, achieving a value of 1. This is 1 more than + // the correlation device's expected value of 0. + SPIEL_CHECK_FLOAT_NEAR(EFCEDist(*efg_game, config, mu), 1.0, kFloatTolerance); +} + +void TestGreenwaldSarfatiExample1() { + absl::optional file = FindFile(kGreenwaldSarfatiEg1File, 2); + if (file.has_value()) { + std::shared_ptr efg_game = + LoadGame(absl::StrCat("efg_game(filename=", file.value(), ")")); + const efg_game::EFGGame* example_game = + dynamic_cast(efg_game.get()); + SPIEL_CHECK_TRUE(example_game != nullptr); + + TabularPolicy LAl1_policy = efg_game::EFGGameTabularPolicy(efg_game, + {{{0, "Root infoset"}, {{"L", 1.0}, {"R", 0.0}}}, + {{1, "P2 infoset"}, {{"A", 1.0}, {"B", 0.0}}}, + {{0, "Left P1 infoset"}, {{"l1", 1.0}, {"r1", 0.0}}}, + {{0, "Right P1 infoset"}, {{"l2", 1.0}, {"r2", 0.0}}}}); + + TabularPolicy LBl1_policy = efg_game::EFGGameTabularPolicy(efg_game, + {{{0, "Root infoset"}, {{"L", 1.0}, {"R", 0.0}}}, + {{1, "P2 infoset"}, {{"A", 0.0}, {"B", 1.0}}}, + {{0, "Left P1 infoset"}, {{"l1", 1.0}, {"r1", 0.0}}}, + {{0, "Right P1 infoset"}, {{"l2", 0.0}, {"r2", 1.0}}}}); + + CorrelationDevice mu = {{0.5, LAl1_policy}, {0.5, LBl1_policy}}; + CorrDistConfig config; + + // This *is* an AFCE and AFCCE. + SPIEL_CHECK_FLOAT_NEAR(AFCEDist(*efg_game, config, mu), 0.0, + kFloatTolerance); + SPIEL_CHECK_FLOAT_NEAR(AFCCEDist(*efg_game, config, mu), 0.0, + kFloatTolerance); + + // However, *not* an EFCE nor EFCCE. + SPIEL_CHECK_GT(EFCEDist(*efg_game, config, mu), 0.0); + SPIEL_CHECK_GT(EFCCEDist(*efg_game, config, mu), 0.0); + } +} + +void TestGreenwaldSarfatiExample2() { + absl::optional file = FindFile(kGreenwaldSarfatiEg2File, 2); + if (file.has_value()) { + std::shared_ptr efg_game = + LoadGame(absl::StrCat("efg_game(filename=", file.value(), ")")); + const efg_game::EFGGame* example_game = + dynamic_cast(efg_game.get()); + SPIEL_CHECK_TRUE(example_game != nullptr); + + TabularPolicy LAl1_policy = efg_game::EFGGameTabularPolicy(efg_game, + {{{0, "Root infoset"}, {{"L", 1.0}, {"R", 0.0}}}, + {{1, "P2 infoset"}, {{"A", 1.0}, {"B", 0.0}}}, + {{0, "Left P1 infoset"}, {{"l1", 1.0}, {"r1", 0.0}}}, + {{0, "Right P1 infoset"}, {{"l2", 1.0}, {"r2", 0.0}}}}); + + TabularPolicy LBl1_policy = efg_game::EFGGameTabularPolicy(efg_game, + {{{0, "Root infoset"}, {{"L", 1.0}, {"R", 0.0}}}, + {{1, "P2 infoset"}, {{"A", 0.0}, {"B", 1.0}}}, + {{0, "Left P1 infoset"}, {{"l1", 1.0}, {"r1", 0.0}}}, + {{0, "Right P1 infoset"}, {{"l2", 0.0}, {"r2", 1.0}}}}); + + TabularPolicy LBr1_policy = efg_game::EFGGameTabularPolicy(efg_game, + {{{0, "Root infoset"}, {{"L", 1.0}, {"R", 0.0}}}, + {{1, "P2 infoset"}, {{"A", 0.0}, {"B", 1.0}}}, + {{0, "Left P1 infoset"}, {{"l1", 0.0}, {"r1", 1.0}}}, + {{0, "Right P1 infoset"}, {{"l2", 0.0}, {"r2", 1.0}}}}); + + CorrelationDevice mu = {{0.5, LAl1_policy}, + {0.25, LBl1_policy}, + {0.25, LBr1_policy}}; + + CorrDistConfig config; + SPIEL_CHECK_FLOAT_EQ(EFCEDist(*efg_game, config, mu), 0.0); + } + + // Matrix game version: + // + // A B + // L,l1,l2 2,2 2,2 + // L,l1,r2 2,2 2,2 + // L,r1,l2 0,2 2,2 + // L,r1,r2 0,2 2,2 + // R,l1,l2 0,0 0,0 + // R,l1,r2 0,0 3,0 + // R,r1,l2 0,0 0,0 + // R,r1,r2 0,0 3,0 + std::shared_ptr eg2_matrix_game = + matrix_game::CreateMatrixGame( + {{2, 2}, {2, 2}, {0, 2}, {0, 2}, {0, 0}, {0, 3}, {0, 0}, {0, 3}}, + {{2, 2}, {2, 2}, {2, 2}, {2, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 0}}); + + // To show it's not a CE: match the mu in the EFCE test above + NormalFormCorrelationDevice mu_nfg = { + {0.5, {0, 0}}, // L,l1,l2 + A + {0.25, {1, 1}}, // L,l1,r2 + B + {0.25, {3, 1}} // L,r1,r2 + B + }; + + SPIEL_CHECK_GT(CEDist(*eg2_matrix_game, mu_nfg), 0.0); +} + +void TestCCECEDistCFRGoofSpiel() { + std::shared_ptr game = LoadGame( + "turn_based_simultaneous_game(game=goofspiel(num_cards=3,points_order=" + "descending,returns_type=total_points))"); + for (int num_iterations : {1, 10, 100}) { + std::vector policies; + policies.reserve(num_iterations); + CFRSolverBase solver(*game, + /*alternating_updates=*/true, + /*linear_averaging=*/false, + /*regret_matching_plus=*/false, + /*random_initial_regrets*/ false); + for (int i = 0; i < num_iterations; i++) { + solver.EvaluateAndUpdatePolicy(); + TabularPolicy current_policy = + static_cast(solver.CurrentPolicy().get()) + ->AsTabular(); + policies.push_back(current_policy); + } + + CorrelationDevice mu = UniformCorrelationDevice(policies); + CorrDistInfo cce_dist_info = CCEDist(*game, mu); + std::cout << "num_iterations: " << num_iterations + << ", cce_dist: " << cce_dist_info.dist_value << std::endl; + + // Disabled in test because it's really slow. + // double ce_dist = CEDist(*game, DeterminizeCorrDev(mu)); + // std::cout << "num_iterations: " << num_iterations + // << ", approximate ce_dist: " << ce_dist << std::endl; + CorrDistInfo ce_dist_info = + CEDist(*game, SampledDeterminizeCorrDev(mu, 100)); + std::cout << "num_iterations: " << num_iterations + << ", approximate ce_dist: " << ce_dist_info.dist_value + << std::endl; + } +} +} // namespace +} // namespace algorithms +} // namespace open_spiel + +namespace algorithms = open_spiel::algorithms; + +int main(int argc, char** argv) { + open_spiel::Init("", &argc, &argv, true); + algorithms::TestGibson13MatrixGameExample(); + algorithms::TestShapleysGame(); + algorithms::TestBoS(); + algorithms::TestChicken(); + algorithms::TestSignalingExampleVonStengelForges2008(); + algorithms::Test1PInOutGame(); + algorithms::TestGreenwaldSarfatiExample1(); + algorithms::TestGreenwaldSarfatiExample2(); + algorithms::TestCCECEDistCFRGoofSpiel(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/deterministic_policy.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/deterministic_policy.cc new file mode 100644 index 0000000..68be490 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/deterministic_policy.cc @@ -0,0 +1,136 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/deterministic_policy.h" + +#include + +#include "open_spiel/algorithms/get_legal_actions_map.h" +#include "open_spiel/policy.h" + +namespace open_spiel { +namespace algorithms { + +int64_t NumDeterministicPolicies(const Game& game, Player player) { + int64_t num_policies = 1; + std::unordered_map> legal_actions_map = + GetLegalActionsMap(game, -1, player); + for (const auto& infostate_str_actions : legal_actions_map) { + int64_t num_actions = infostate_str_actions.second.size(); + SPIEL_CHECK_GT(num_actions, 0); + + // Check for integer overflow. + if (num_policies > INT64_MAX / num_actions) { + return -1; + } + + num_policies *= num_actions; + } + return num_policies; +} + +DeterministicTabularPolicy::DeterministicTabularPolicy( + const Game& game, Player player, + const std::unordered_map policy) + : table_(), player_(player) { + CreateTable(game, player); + for (const auto& info_state_action : policy) { + auto iter = table_.find(info_state_action.first); + SPIEL_CHECK_TRUE(iter != table_.end()); + iter->second.SetAction(info_state_action.second); + } +} + +DeterministicTabularPolicy::DeterministicTabularPolicy(const Game& game, + Player player) + : table_(), player_(player) { + CreateTable(game, player); +} + +ActionsAndProbs DeterministicTabularPolicy::GetStatePolicy( + const std::string& info_state) const { + auto iter = table_.find(info_state); + SPIEL_CHECK_TRUE(iter != table_.end()); + ActionsAndProbs state_policy; + Action policy_action = iter->second.GetAction(); + for (const auto& action : iter->second.legal_actions_) { + state_policy.push_back({action, action == policy_action ? 1.0 : 0.0}); + } + return state_policy; +} + +Action DeterministicTabularPolicy::GetAction( + const std::string& info_state) const { + auto iter = table_.find(info_state); + SPIEL_CHECK_TRUE(iter != table_.end()); + return iter->second.GetAction(); +} + +TabularPolicy DeterministicTabularPolicy::GetTabularPolicy() const { + TabularPolicy tabular_policy; + for (const auto& infostate_and_legals : table_) { + ActionsAndProbs state_policy; + Action policy_action = infostate_and_legals.second.GetAction(); + for (const auto& action : infostate_and_legals.second.legal_actions_) { + state_policy.push_back({action, action == policy_action ? 1.0 : 0.0}); + } + tabular_policy.SetStatePolicy(infostate_and_legals.first, state_policy); + } + return tabular_policy; +} + +bool DeterministicTabularPolicy::NextPolicy() { + // Treat the current indices as digits in a mixed base. Starting at the + // beginning of the table, add 1. If can't, continue trying. If we reach the + // end without being able to add 1, then this is the end of the order. + // Otherwise, increment the digit we land on by 1, and reset all the ones + // we skipped over earlier in the order. + for (auto iter = table_.begin(); iter != table_.end(); ++iter) { + if (iter->second.TryIncIndex()) { + for (auto iter2 = table_.begin(); iter2 != iter; ++iter2) { + iter2->second.index = 0; + } + return true; + } + } + return false; +} + +void DeterministicTabularPolicy::ResetDefaultPolicy() { + for (auto& info_state_entry : table_) { + info_state_entry.second.index = 0; + } +} + +void DeterministicTabularPolicy::CreateTable(const Game& game, Player player) { + std::unordered_map> legal_actions_map = + GetLegalActionsMap(game, -1, player); + for (const auto& info_state_actions : legal_actions_map) { + table_[info_state_actions.first] = + LegalsWithIndex(info_state_actions.second); + } +} + +std::string DeterministicTabularPolicy::ToString( + const std::string& delimiter) const { + std::string str = ""; + for (const auto& info_state_entry : table_) { + absl::StrAppend(&str, info_state_entry.first, " ", delimiter, " ", + "action = ", info_state_entry.second.GetAction(), "\n"); + } + return str; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/deterministic_policy.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/deterministic_policy.h new file mode 100644 index 0000000..c391c31 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/deterministic_policy.h @@ -0,0 +1,120 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_DETERMINISTIC_POLICY_H_ +#define OPEN_SPIEL_ALGORITHMS_DETERMINISTIC_POLICY_H_ + +#include + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { + +// Returns the number of deterministic policies for this player in this game, +// if the number is less than 2^64-1. Otherwise, returns -1. +int64_t NumDeterministicPolicies(const Game& game, Player player); + +// An simple container object used to store the legal actions (and chosen +// action) for each information state. +struct LegalsWithIndex { + LegalsWithIndex() {} + LegalsWithIndex(const std::vector& legal_actions) + : legal_actions_(legal_actions), index(0) {} + + void SetAction(Action action) { + auto iter = std::find(legal_actions_.begin(), legal_actions_.end(), action); + SPIEL_CHECK_TRUE(iter != legal_actions_.end()); + index = std::distance(legal_actions_.begin(), iter); + } + + Action GetAction() const { return legal_actions_[index]; } + + // Try to increment the index of the action. Used by the enumerator over + // deterministic policies (DeterministicPolicy::NextPolicy) below. + bool TryIncIndex() { + if (index + 1 < legal_actions_.size()) { + index += 1; + return true; + } else { + return false; + } + } + + std::vector legal_actions_; + int index; +}; + +class DeterministicTabularPolicy : public Policy { + public: + // Creates a deterministic policy and sets it to the specified policy. + DeterministicTabularPolicy( + const Game& game, Player player, + const std::unordered_map policy); + + // Creates a default deterministic policy, with all actions set to their first + // legal action (index 0 in the legal actions list). + DeterministicTabularPolicy(const Game& game, Player player); + + ActionsAndProbs GetStatePolicy(const std::string& info_state) const override; + Action GetAction(const std::string& info_state) const; + + // Returns the current deterministic policy as a TabularPolicy. + TabularPolicy GetTabularPolicy() const; + + // Deterministic policies are ordered. First, we define some order to the + // information states (which is the order defined by the legal_actions_map + // for the game). Then the total order over policies is defined in a + // "counting order according to their associated tuple ( + // legal_action_index[state] for state in ordered_states). The first + // deterministic policy in the order is the one whose action is set is the + // first legal action (legal action index = 0). The value of the index can be + // interpreted as a digit in a mixed base integer, where the value of the + // integer would represent the position of the deterministic policy in the + // total order. + // + // This function sets this policy to the next deterministic policy in this + // counting order. The function returns true if this changed the policy (i.e + // there exists a next policy in the order), otherwise returns false. + bool NextPolicy(); + + // Resets the policy to the first one in the total order defined above: all + // actions set to their first legal action (index = 0 in the legal actions + // list). + void ResetDefaultPolicy(); + + // Returns a string representation of the policy, using the specified + // delimiter to separate information state and action. + std::string ToString(const std::string& delimiter) const; + + private: + void CreateTable(const Game& game, Player player); + + std::map table_; + Player player_; +}; + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_DETERMINISTIC_POLICY_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/deterministic_policy_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/deterministic_policy_test.cc new file mode 100644 index 0000000..ab41cee --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/deterministic_policy_test.cc @@ -0,0 +1,62 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/deterministic_policy.h" + +#include "open_spiel/games/kuhn_poker/kuhn_poker.h" +#include "open_spiel/games/leduc_poker/leduc_poker.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +void KuhnDeterministicPolicyTest() { + std::shared_ptr game = LoadGame("kuhn_poker"); + + int p0_policies = 1; + int p1_policies = 1; + + DeterministicTabularPolicy p0_policy(*game, Player{0}); + while (p0_policy.NextPolicy()) { + p0_policies += 1; + } + SPIEL_CHECK_EQ(p0_policies, 64); // 2^6 + + DeterministicTabularPolicy p1_policy(*game, Player{1}); + while (p1_policy.NextPolicy()) { + p1_policies += 1; + } + SPIEL_CHECK_EQ(p1_policies, 64); // 2^6 +} + +void NumDeterministicPoliciesTest() { + // In Kuhn, each player has 6 information states with 2 actions each. + std::shared_ptr game = LoadGame("kuhn_poker"); + SPIEL_CHECK_EQ(NumDeterministicPolicies(*game, 0), 64); + SPIEL_CHECK_EQ(NumDeterministicPolicies(*game, 1), 64); + + // Leduc poker has larger than 2^64 - 1, so -1 will be returned. + game = LoadGame("leduc_poker"); + SPIEL_CHECK_EQ(NumDeterministicPolicies(*game, 0), -1); + SPIEL_CHECK_EQ(NumDeterministicPolicies(*game, 1), -1); +} + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::algorithms::KuhnDeterministicPolicyTest(); + open_spiel::algorithms::NumDeterministicPoliciesTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/CMakeLists.txt new file mode 100644 index 0000000..78716ad --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/CMakeLists.txt @@ -0,0 +1,17 @@ +# To enable C++ Torch DQN, you will need to set OPEN_SPIEL_BUILD_WITH_LIBTORCH. +if (OPEN_SPIEL_BUILD_WITH_LIBTORCH) + add_library (dqn_torch OBJECT + dqn.h + dqn.cc + simple_nets.h + simple_nets.cc + ) + target_include_directories (dqn_torch PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + + add_executable(dqn_torch_test dqn_torch_test.cc ${OPEN_SPIEL_OBJECTS} + $ $) + add_test(dqn_torch_test dqn_torch_test) + + target_link_libraries(dqn_torch ${TORCH_LIBRARIES}) + target_link_libraries(dqn_torch_test ${TORCH_LIBRARIES}) +endif() diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/dqn.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/dqn.cc new file mode 100644 index 0000000..a1a2ffd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/dqn.cc @@ -0,0 +1,338 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/dqn_torch/dqn.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/random/random.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace torch_dqn { + +constexpr const float kIllegalActionLogitsPenalty = + std::numeric_limits::lowest(); + +Action RandomAgent::Step(const State& state, bool is_evaluation) { + if (state.IsTerminal()) { + return kInvalidAction; + } + std::vector legal_actions = state.LegalActions(player_); + int aidx = absl::Uniform(rng_, 0, legal_actions.size()); + return legal_actions[aidx]; +} + +DQN::DQN(const DQNSettings& settings) + : seed_(settings.seed), + use_observation_(settings.use_observation), + player_id_(settings.player_id), + num_actions_(settings.num_actions), + hidden_layers_sizes_(settings.hidden_layers_sizes), + update_target_network_every_(settings.update_target_network_every), + learn_every_(settings.learn_every), + min_buffer_size_to_learn_(settings.min_buffer_size_to_learn), + discount_factor_(settings.discount_factor), + epsilon_start_(settings.epsilon_start), + epsilon_end_(settings.epsilon_end), + epsilon_decay_duration_(settings.epsilon_decay_duration), + replay_buffer_(settings.replay_buffer_capacity), + batch_size_(settings.batch_size), + step_counter_(0), + exists_prev_(false), + prev_state_(nullptr), + prev_action_(0), + input_size_(settings.state_representation_size), + loss_str_(settings.loss_str), + q_network_(input_size_, hidden_layers_sizes_, num_actions_), + target_q_network_(input_size_, hidden_layers_sizes_, num_actions_), + optimizer_(q_network_->parameters(), + torch::optim::SGDOptions(settings.learning_rate)), + rng_(settings.seed) {} + +std::vector DQN::GetInfoState(const State& state, + Player player_id, + bool use_observation) { + if (use_observation) { + return state.ObservationTensor(player_id); + } else { + return state.InformationStateTensor(player_id); + } +} + +Action DQN::Step(const State& state, bool is_evaluation) { + // Chance nodes should be handled externally to the agent. + SPIEL_CHECK_TRUE(!state.IsChanceNode()); + + Action action; + if (!state.IsTerminal() && + (state.CurrentPlayer() == player_id_ || state.IsSimultaneousNode())) { + std::vector info_state = GetInfoState(state, + player_id_, + use_observation_); + std::vector legal_actions = state.LegalActions(player_id_); + double epsilon = GetEpsilon(is_evaluation); + action = EpsilonGreedy(info_state, legal_actions, epsilon); + } else { + action = 0; + } + + if (!is_evaluation) { + step_counter_++; + + if (step_counter_ % learn_every_ == 0) { + Learn(); + } + if (step_counter_ % update_target_network_every_ == 0) { + std::stringstream stream; + torch::save(q_network_, stream); + torch::load(target_q_network_, stream); + } + if (exists_prev_) { + AddTransition(*prev_state_, prev_action_, state); + } + } + + if (state.IsTerminal()) { + exists_prev_ = false; + prev_action_ = 0; + prev_state_ = nullptr; + return kInvalidAction; + } else { + exists_prev_ = true; + prev_state_ = state.Clone(); + prev_action_ = action; + } + + return action; +} + +void DQN::AddTransition(const State& prev_state, + Action prev_action, + const State& state) { + Transition transition = { + /*info_state=*/GetInfoState(prev_state, player_id_, use_observation_), + /*action=*/prev_action_, + /*reward=*/state.PlayerReward(player_id_), + /*next_info_state=*/GetInfoState(state, player_id_, use_observation_), + /*is_final_step=*/state.IsTerminal(), + /*legal_actions_mask=*/state.LegalActionsMask()}; + replay_buffer_.Add(transition); +} + +Action DQN::EpsilonGreedy(std::vector info_state, + std::vector legal_actions, + double epsilon) { + Action action; + if (legal_actions.empty()) { + // In some simultaneous games, some players can have no legal actions. + return 0; + } else if (legal_actions.size() == 1) { + return legal_actions[0]; + } + + if (absl::Uniform(rng_, 0.0, 1.0) < epsilon) { + ActionsAndProbs actions_probs; + std::vector probs(legal_actions.size(), 1.0 / legal_actions.size()); + for (int i = 0; i < legal_actions.size(); i++) { + actions_probs.push_back({legal_actions[i], probs[i]}); + } + action = SampleAction(actions_probs, rng_).first; + } else { + torch::Tensor info_state_tensor = + torch::from_blob(info_state.data(), + {static_cast(info_state.size())}, + torch::dtype(torch::kFloat32)) + .view({1, -1}); + q_network_->eval(); + torch::Tensor q_values = q_network_->forward(info_state_tensor).detach(); + torch::Tensor illegal_actions_mask = + torch::full({num_actions_}, true, torch::dtype(torch::kBool)); + for (const auto& action : legal_actions) { + illegal_actions_mask[action] = false; + } + torch::Tensor legal_q_values = torch::masked_fill( + q_values, illegal_actions_mask, kIllegalActionLogitsPenalty); + action = legal_q_values.argmax(1).item().toInt(); + } + return action; +} + +double DQN::GetEpsilon(bool is_evaluation, int power) { + if (is_evaluation) { + return 0.0; + } + + double decay_steps = std::min( + static_cast(step_counter_), epsilon_decay_duration_); + double decayed_epsilon = ( + epsilon_end_ + (epsilon_start_ - epsilon_end_) * + std::pow((1 - decay_steps / epsilon_decay_duration_), power)); + return decayed_epsilon; +} + +void DQN::Learn() { + if (replay_buffer_.Size() < batch_size_ + || replay_buffer_.Size() < min_buffer_size_to_learn_) return; + std::vector transition = replay_buffer_.Sample(&rng_, + batch_size_); + std::vector info_states; + std::vector next_info_states; + std::vector legal_actions_mask; + std::vector actions; + std::vector rewards; + std::vector are_final_steps; + for (auto t : transition) { + info_states.push_back( + torch::from_blob(t.info_state.data(), + {1, static_cast(t.info_state.size())}, + torch::TensorOptions().dtype(torch::kFloat32)) + .clone()); + next_info_states.push_back( + torch::from_blob(t.next_info_state.data(), + {1, static_cast(t.next_info_state.size())}, + torch::TensorOptions().dtype(torch::kFloat32)) + .clone()); + legal_actions_mask.push_back( + torch::from_blob(t.legal_actions_mask.data(), + {1, static_cast(t.legal_actions_mask.size())}, + torch::TensorOptions().dtype(torch::kBool)) + .clone()); + actions.push_back(t.action); + rewards.push_back(t.reward); + are_final_steps.push_back(t.is_final_step); + } + torch::Tensor info_states_tensor = torch::stack(info_states, 0); + torch::Tensor next_info_states_tensor = torch::stack(next_info_states, 0); + q_network_->train(); + torch::Tensor q_values = q_network_->forward(info_states_tensor); + target_q_network_->eval(); + torch::Tensor target_q_values = target_q_network_->forward( + next_info_states_tensor).detach(); + + torch::Tensor illegal_action_masks_tensor = + torch::stack(legal_actions_mask, 0).bitwise_not(); + torch::Tensor legal_q_values = + torch::masked_fill(target_q_values, illegal_action_masks_tensor, + kIllegalActionLogitsPenalty); + + torch::Tensor max_next_q = std::get<0>(legal_q_values.max(2)); + + torch::Tensor are_final_steps_tensor = torch::from_blob( + are_final_steps.data(), + {batch_size_}, + torch::TensorOptions().dtype(torch::kInt32)).to(torch::kFloat32); + torch::Tensor rewards_tensor = torch::from_blob( + rewards.data(), + {batch_size_}, + torch::TensorOptions().dtype(torch::kFloat32)); + torch::Tensor target = rewards_tensor + ( + 1.0 - are_final_steps_tensor) * max_next_q.squeeze(1) * discount_factor_; + torch::Tensor actions_tensor = torch::from_blob( + actions.data(), + {batch_size_}, + torch::TensorOptions().dtype(torch::kInt64)); + torch::Tensor predictions = q_values.index( + {torch::arange(q_values.size(0)), + torch::indexing::Slice(), + actions_tensor}); + + optimizer_.zero_grad(); + torch::Tensor value_loss; + if (loss_str_ == "mse") { + torch::nn::MSELoss mse_loss; + value_loss = mse_loss(predictions.squeeze(1), target); + } else if (loss_str_ == "huber") { + torch::nn::SmoothL1Loss l1_loss; + value_loss = l1_loss(predictions.squeeze(1), target); + } else { + SpielFatalError("Not implemented, choose from 'mse', 'huber'."); + } + value_loss.backward(); + optimizer_.step(); +} + +void DQN::Load(const std::string& data_path, + const std::string& optimizer_data_path) { + torch::load(q_network_, data_path); + torch::load(target_q_network_, data_path); + if (!optimizer_data_path.empty()) { + torch::load(optimizer_, optimizer_data_path); + } +} + +void DQN::Save(const std::string& data_path, + const std::string& optimizer_data_path) { + torch::save(q_network_, data_path); + if (!optimizer_data_path.empty()) { + torch::save(optimizer_, optimizer_data_path); + } +} + +std::vector RunEpisodes(std::mt19937* rng, const Game& game, + const std::vector& agents, + int num_episodes, bool is_evaluation) { + SPIEL_CHECK_GE(num_episodes, 1); + SPIEL_CHECK_EQ(agents.size(), game.NumPlayers()); + std::vector total_returns(game.NumPlayers(), 0.0); + for (int i = 0; i < num_episodes; i++) { + std::unique_ptr state = game.NewInitialState(); + while (!state->IsTerminal()) { + Player player = state->CurrentPlayer(); + open_spiel::Action action; + if (state->IsChanceNode()) { + action = open_spiel::SampleAction(state->ChanceOutcomes(), + absl::Uniform(*rng, 0.0, 1.0)) + .first; + state->ApplyAction(action); + } else if (state->IsSimultaneousNode()) { + std::vector joint_action(game.NumPlayers()); + for (Player p = 0; p < game.NumPlayers(); ++p) { + joint_action[p] = agents[p]->Step(*state, is_evaluation); + } + state->ApplyActions(joint_action); + } else { + action = agents[player]->Step(*state, is_evaluation); + state->ApplyAction(action); + } + } + std::vector episode_returns = state->Returns(); + for (Player p = 0; p < game.NumPlayers(); ++p) { + agents[p]->Step(*state, is_evaluation); + total_returns[p] += episode_returns[p]; + } + } + + for (Player p = 0; p < game.NumPlayers(); ++p) { + total_returns[p] /= num_episodes; + } + + return total_returns; +} + +} // namespace torch_dqn +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/dqn.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/dqn.h new file mode 100644 index 0000000..676a46b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/dqn.h @@ -0,0 +1,152 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_DQN_TORCH_DQN_H_ +#define OPEN_SPIEL_ALGORITHMS_DQN_TORCH_DQN_H_ + +#include + +#include +#include +#include +#include + +#include "open_spiel/algorithms/dqn_torch/simple_nets.h" +#include "open_spiel/abseil-cpp/absl/random/random.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/circular_buffer.h" + +// Note: This implementation has only been lightly tested on a few small games. + +namespace open_spiel { +namespace algorithms { +namespace torch_dqn { + +struct Transition { + std::vector info_state; + Action action; + double reward; + std::vector next_info_state; + bool is_final_step; + std::vector legal_actions_mask; +}; + +struct DQNSettings { + int seed; + bool use_observation; + Player player_id; + int state_representation_size; + int num_actions; + std::vector hidden_layers_sizes = {128}; + int replay_buffer_capacity = 10000; + int batch_size = 128; + double learning_rate = 0.01; + int update_target_network_every = 1000; + int learn_every = 10; + double discount_factor = 0.99; + int min_buffer_size_to_learn = 1000; + double epsilon_start = 1.0; + double epsilon_end = 0.1; + int epsilon_decay_duration = 1000000; + std::string loss_str = "mse"; +}; + +// TODO(author5): make this into a proper general RL env/agent API as we have +// in the Python API. Then include tabular q-learning and Sarsa as well. + +// A general agent class with a Step function. +class Agent { + public: + virtual ~Agent() = default; + virtual Action Step(const State& state, bool is_evaluation = false) = 0; +}; + +// Run a number of episodes with the given agents and return the average return +// for each agent over the episodes. Set is_evaluation to true when using this +// for evaluation. +std::vector RunEpisodes(std::mt19937* rng, const Game& game, + const std::vector& agents, + int num_episodes, bool is_evaluation); + +class RandomAgent : public Agent { + public: + RandomAgent(Player player, int seed) : player_(player), rng_(seed) {} + virtual ~RandomAgent() = default; + Action Step(const State& state, bool is_evaluation = false) override; + + private: + Player player_; + std::mt19937 rng_; +}; + +// DQN Agent implementation in LibTorch. +class DQN : public Agent { + public: + DQN(const DQNSettings& settings); + virtual ~DQN() = default; + Action Step(const State& state, bool is_evaluation = false) override; + + double GetEpsilon(bool is_evaluation, int power = 1.0); + int seed() const { return seed_; } + + // Load checkpoint/trained model and optimizer + void Load(const std::string& data_path, + const std::string& optimizer_data_path = ""); + // Save checkpoint/trained model and optimizer + void Save(const std::string& data_path, + const std::string& optimizer_data_path = ""); + + private: + std::vector GetInfoState(const State& state, Player player_id, + bool use_observation); + void AddTransition(const State& prev_state, Action prev_action, + const State& state); + Action EpsilonGreedy(std::vector info_state, + std::vector legal_actions, + double epsilon); + void Learn(); + + int seed_; + bool use_observation_; + int player_id_; + int num_actions_; + std::vector hidden_layers_sizes_; + int update_target_network_every_; + int learn_every_; + int min_buffer_size_to_learn_; + double discount_factor_; + double epsilon_start_; + double epsilon_end_; + double epsilon_decay_duration_; + CircularBuffer replay_buffer_; + int batch_size_; + int step_counter_; + bool exists_prev_; + std::unique_ptr prev_state_; + Action prev_action_; + int input_size_; + std::string loss_str_; + MLP q_network_; + MLP target_q_network_; + torch::optim::SGD optimizer_; + std::mt19937 rng_; +}; + +} // namespace torch_dqn +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_DQN_TORCH_DQN_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/dqn_torch_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/dqn_torch_test.cc new file mode 100644 index 0000000..7f31195 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/dqn_torch_test.cc @@ -0,0 +1,164 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/dqn_torch/dqn.h" + +#include + +#include +#include +#include + +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/games/efg_game/efg_game.h" +#include "open_spiel/games/efg_game/efg_game_data.h" + + +namespace open_spiel { +namespace algorithms { +namespace torch_dqn { +namespace { + +constexpr int kSeed = 93879211; + +void TestSimpleGame() { + std::shared_ptr game = efg_game::LoadEFGGame( + efg_game::GetSimpleForkEFGData()); + SPIEL_CHECK_TRUE(game != nullptr); + DQNSettings settings = { + /*seed*/ kSeed, + /*use_observation*/ game->GetType().provides_observation_tensor, + /*player_id*/ 0, + /*state_representation_size*/ game->InformationStateTensorSize(), + /*num_actions*/ game->NumDistinctActions(), + /*hidden_layers_sizes*/ {16}, + /*replay_buffer_capacity*/ 100, + /*batch_size*/ 5, + /*learning_rate*/ 0.01, + /*update_target_network_every*/ 20, + /*learn_every*/ 5, + /*discount_factor*/ 1.0, + /*min_buffer_size_to_learn*/ 5, + /*epsilon_start*/ 0.02, + /*epsilon_end*/ 0.01}; + DQN dqn(settings); + int total_reward = 0; + std::unique_ptr state; + for (int i = 0; i < 150; i++) { + state = game->NewInitialState(); + while (!state->IsTerminal()) { + open_spiel::Action action = dqn.Step(*state); + state->ApplyAction(action); + total_reward += state->PlayerReward(0); + } + dqn.Step(*state); + } + + SPIEL_CHECK_GE(total_reward, 120); +} + +void TestTicTacToe() { + std::shared_ptr game = open_spiel::LoadGame("tic_tac_toe"); + SPIEL_CHECK_TRUE(game != nullptr); + std::vector> agents; + std::vector hidden_layers = {16}; + for (int i = 0; i < 2; i++) { + DQNSettings settings = { + /*seed*/ kSeed + i, + /*use_observation*/ game->GetType().provides_observation_tensor, + /*player_id*/ i, + /*state_representation_size*/ game->ObservationTensorSize(), + /*num_actions*/ game->NumDistinctActions(), + /*hidden_layers_sizes*/ hidden_layers, + /*replay_buffer_capacity*/ 10, + /*batch_size*/ 5, + /*learning_rate*/ 0.01, + /*update_target_network_every*/ 20, + /*learn_every*/ 5, + /*discount_factor*/ 1.0, + /*min_buffer_size_to_learn*/ 5}; + agents.push_back(std::make_unique(settings)); + } + std::unique_ptr state = game->NewInitialState(); + while (!state->IsTerminal()) { + Player current_player = state->CurrentPlayer(); + open_spiel::Action action = agents[current_player]->Step(*state); + state->ApplyAction(action); + } + for (int i = 0; i < 2; i++) { + agents[i]->Step(*state); + } +} + +void TestHanabi() { + std::shared_ptr game = open_spiel::LoadGame("tiny_hanabi"); + SPIEL_CHECK_TRUE(game != nullptr); + std::vector> agents; + std::vector hidden_layers = {16}; + std::mt19937 rng_; + for (int i = 0; i < 2; i++) { + DQNSettings settings = { + /*seed*/ kSeed + i, + /*use_observation*/ game->GetType().provides_observation_tensor, + /*player_id*/ i, + /*state_representation_size*/ game->InformationStateTensorSize(), + /*num_actions*/ game->NumDistinctActions(), + /*hidden_layers_sizes*/ hidden_layers, + /*replay_buffer_capacity*/ 10, + /*batch_size*/ 5, + /*learning_rate*/ 0.01, + /*update_target_network_every*/ 20, + /*learn_every*/ 5, + /*discount_factor*/ 1.0, + /*min_buffer_size_to_learn*/ 5}; + agents.push_back(std::make_unique(settings)); + } + std::unique_ptr state = game->NewInitialState(); + while (!state->IsTerminal()) { + Player current_player = state->CurrentPlayer(); + open_spiel::Action action; + if (state->IsChanceNode()) { + action = open_spiel::SampleAction(state->ChanceOutcomes(), + absl::Uniform(rng_, 0.0, 1.0)).first; + } else { + // Simultaneous move game, step both! + for (int i = 0; i < 2; i++) { + if (i == current_player) { + action = agents[i]->Step(*state); + } else { + agents[i]->Step(*state); + } + } + } + state->ApplyAction(action); + } + for (int i = 0; i < 2; i++) { + agents[i]->Step(*state); + } +} +} // namespace +} // namespace torch_dqn +} // namespace algorithms +} // namespace open_spiel + +namespace torch_dqn = open_spiel::algorithms::torch_dqn; + +int main(int args, char** argv) { + torch::manual_seed(torch_dqn::kSeed); + torch_dqn::TestSimpleGame(); + torch_dqn::TestTicTacToe(); + torch_dqn::TestHanabi(); + return 0; +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/simple_nets.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/simple_nets.cc new file mode 100644 index 0000000..531fe2b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/simple_nets.cc @@ -0,0 +1,91 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/dqn_torch/simple_nets.h" + +#include + +#include +#include +#include +#include + +namespace open_spiel { +namespace algorithms { +namespace torch_dqn { + +constexpr double kSqrt2 = 1.4142135623730950488; + +SimpleLinearImpl::SimpleLinearImpl(int input_size, int output_size, + bool activate_relu) + : activate_relu_(activate_relu), + simple_linear_(torch::nn::LinearOptions(/*in_features*/ input_size, + /*out_features*/ output_size)) { + double stddev = 1.0 / std::sqrt(input_size); + double lower = -2.0 * stddev; + double upper = 2.0 * stddev; + for (auto& named_parameter : simple_linear_->named_parameters()) { + if (named_parameter.key().find("weight") != std::string::npos) { + torch::Tensor uniform_param = torch::nn::init::uniform_( + named_parameter.value()).to(torch::kFloat64); + double clip_lower = 0.5 * (1.0 + std::erf(lower / kSqrt2)); + double clip_upper = 0.5 * (1.0 + std::erf(upper / kSqrt2)); + torch::Tensor new_param = kSqrt2 * torch::erfinv( + 2.0 * ((clip_upper - clip_lower) * uniform_param + clip_lower) - 1.0); + named_parameter.value().data() = new_param; + } + if (named_parameter.key().find("bias") != std::string::npos) { + named_parameter.value().data() = torch::zeros({output_size}); + } + } + register_module("simple_linear_", simple_linear_); +} + +torch::Tensor SimpleLinearImpl::forward(torch::Tensor x) { + if (activate_relu_) { + return torch::relu(simple_linear_->forward(x)); + } else { + return simple_linear_->forward(x); + } +} + +MLPImpl::MLPImpl(int input_size, const std::vector& hidden_layers_sizes, + int output_size, bool activate_final) + : input_size_(input_size), + hidden_layers_sizes_(hidden_layers_sizes), + output_size_(output_size), + activate_final_(activate_final) { + int layer_size = input_size_; + for (auto h_size : hidden_layers_sizes_) { + layers_->push_back(SimpleLinear(/*input_size*/layer_size, + /*output_size*/h_size)); + layer_size = h_size; + } + layers_->push_back(SimpleLinear(/*input_size*/ layer_size, + /*output_size*/ output_size_, + /*activate_final*/ activate_final_)); + register_module("layers_", layers_); +} + +torch::Tensor MLPImpl::forward(torch::Tensor x) { + for (int i=0; i < hidden_layers_sizes_.size() + 1; i++) { + x = layers_[i]->as()->forward(x); + } + return x; +} + + +} // namespace torch_dqn +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/simple_nets.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/simple_nets.h new file mode 100644 index 0000000..0361918 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/dqn_torch/simple_nets.h @@ -0,0 +1,63 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_DQN_TORCH_SIMPLE_NETS_H_ +#define OPEN_SPIEL_ALGORITHMS_DQN_TORCH_SIMPLE_NETS_H_ + +#include + +#include +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace torch_dqn { + +// Always includes biases and only supports ReLU activations. +class SimpleLinearImpl : public torch::nn::Module { + public : + SimpleLinearImpl(int input_size, int output_size, bool activate_relu = true); + torch::Tensor forward(torch::Tensor x); + + private: + bool activate_relu_; + torch::nn::Linear simple_linear_; +}; +TORCH_MODULE(SimpleLinear); + +// A simple dense network built from linear layers above. +class MLPImpl : public torch::nn::Module { + public: + MLPImpl(int input_size, const std::vector& hidden_layers_sizes, + int output_size, bool activate_final = false); + torch::Tensor forward(torch::Tensor x); + + private: + int input_size_; + std::vector hidden_layers_sizes_; + int output_size_; + bool activate_final_; + torch::nn::ModuleList layers_; +}; +TORCH_MODULE(MLP); + +} // namespace torch_dqn +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_DQN_TORCH_SIMPLE_NETS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/evaluate_bots.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/evaluate_bots.cc new file mode 100644 index 0000000..f8ffba5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/evaluate_bots.cc @@ -0,0 +1,83 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/evaluate_bots.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" + +namespace open_spiel { + +std::vector EvaluateBots(State* state, const std::vector& bots, + int seed) { + const int num_players = bots.size(); + std::mt19937 rng(seed); + std::vector joint_actions(bots.size()); + if (state->History().empty()) { + for (auto bot : bots) bot->Restart(); + } else { + for (auto bot : bots) bot->RestartAt(*state); + } + while (!state->IsTerminal()) { + if (state->IsChanceNode()) { + Action action = SampleAction(state->ChanceOutcomes(), rng).first; + for (auto bot : bots) bot->InformAction(*state, kChancePlayerId, action); + state->ApplyAction(action); + } else if (state->IsSimultaneousNode()) { + for (Player p = 0; p < num_players; ++p) { + if (state->LegalActions(p).empty()) { + joint_actions[p] = kInvalidAction; + } else { + joint_actions[p] = bots[p]->Step(*state); + } + } + state->ApplyActions(joint_actions); + } else { + Player current_player = state->CurrentPlayer(); + Action action = bots[current_player]->Step(*state); + for (Player p = 0; p < num_players; ++p) { + if (p != current_player) { + bots[p]->InformAction(*state, current_player, action); + } + } + state->ApplyAction(action); + } + } + + // Return terminal utility. + return state->Returns(); +} + +std::vector EvaluateBots(const Game& game, + const std::vector& bots, int seed) { + std::unique_ptr state = game.NewInitialState(); + return EvaluateBots(state.get(), bots, seed); +} + +std::vector EvaluateBots(const Game& game, + const std::vector& bots) { + absl::Duration time_gap = absl::Now() - absl::UnixEpoch(); + std::mt19937 rng(absl::ToInt64Nanoseconds(time_gap)); + const int seed = absl::Uniform(rng, std::numeric_limits::min(), + std::numeric_limits::max()); + return EvaluateBots(game, bots, seed); +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/evaluate_bots.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/evaluate_bots.h new file mode 100644 index 0000000..7037cf0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/evaluate_bots.h @@ -0,0 +1,42 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_EVALUATE_BOTS_H_ +#define OPEN_SPIEL_ALGORITHMS_EVALUATE_BOTS_H_ + +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" + +namespace open_spiel { + +// Play a game once, to compare bot performance. +// Must supply one bot for each player in the game. +// `state.Restart()` will be called if the state is the root node, otherwise +// `RestartAt(state)` will be called. +std::vector EvaluateBots(State* state, const std::vector& bots, + int seed); + +// As above, but creates a new state at the root of the game. +std::vector EvaluateBots(const Game& game, + const std::vector& bots, int seed); + +// As above, but calculates a random seed. +std::vector EvaluateBots(const Game& game, + const std::vector& bots); + +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_EVALUATE_BOTS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/evaluate_bots_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/evaluate_bots_test.cc new file mode 100644 index 0000000..8eaf123 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/evaluate_bots_test.cc @@ -0,0 +1,79 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/evaluate_bots.h" + +#include + +#include "open_spiel/policy.h" +#include "open_spiel/spiel_bots.h" + +namespace open_spiel { +namespace { + +void BotTest_RandomVsRandom() { + auto game = LoadGame("kuhn_poker"); + auto bot0 = MakeUniformRandomBot(0, /*seed=*/1234); + auto bot1 = MakeStatefulRandomBot(*game, 1, /*seed=*/4321); + constexpr int num_players = 2; + std::vector average_results(num_players); + constexpr int num_iters = 100000; + for (int iteration = 0; iteration < num_iters; ++iteration) { + auto this_results = + EvaluateBots(game->NewInitialState().get(), {bot0.get(), bot1.get()}, + /*seed=*/iteration); + for (auto p = Player{0}; p < num_players; ++p) { + average_results[p] += this_results[p]; + } + } + for (auto p = Player{0}; p < num_players; ++p) { + average_results[p] /= num_iters; + } + + SPIEL_CHECK_FLOAT_NEAR(average_results[0], 0.125, 0.01); + SPIEL_CHECK_FLOAT_NEAR(average_results[1], -0.125, 0.01); +} + +void BotTest_RandomVsRandomPolicy() { + auto game = LoadGame("kuhn_poker"); + auto bot0 = MakeUniformRandomBot(0, /*seed=*/1234); + std::unique_ptr uniform_policy = + std::make_unique(GetUniformPolicy(*game)); + auto bot1 = + MakePolicyBot(*game, Player{1}, /*seed=*/4321, std::move(uniform_policy)); + constexpr int num_players = 2; + std::vector average_results(num_players); + constexpr int num_iters = 100000; + for (int iteration = 0; iteration < num_iters; ++iteration) { + auto this_results = + EvaluateBots(game->NewInitialState().get(), {bot0.get(), bot1.get()}, + /*seed=*/iteration); + for (auto p = Player{0}; p < num_players; ++p) { + average_results[p] += this_results[p]; + } + } + for (auto p = Player{0}; p < num_players; ++p) { + average_results[p] /= num_iters; + } + SPIEL_CHECK_FLOAT_NEAR(average_results[0], 0.125, 0.01); + SPIEL_CHECK_FLOAT_NEAR(average_results[1], -0.125, 0.01); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::BotTest_RandomVsRandom(); + open_spiel::BotTest_RandomVsRandomPolicy(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/expected_returns.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/expected_returns.cc new file mode 100644 index 0000000..092bd7c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/expected_returns.cc @@ -0,0 +1,364 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/expected_returns.h" + +#include +#include +#include + +#include "open_spiel/simultaneous_move_game.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +// Implements the recursive traversal using a general way to access the +// player's policies via a function that takes as arguments the player id and +// information state. +// We have a special case for the case where we can get a policy just from the +// InfostateString as that gives us a 2x speedup. +std::vector ExpectedReturnsImpl( + const State& state, + const std::function& + policy_func, + int depth_limit, + float prob_cut_threshold) { + if (state.IsTerminal() || depth_limit == 0) { + return state.Rewards(); + } + + int num_players = state.NumPlayers(); + std::vector values(num_players, 0.0); + if (state.IsChanceNode()) { + ActionsAndProbs action_and_probs = state.ChanceOutcomes(); + for (const auto& action_and_prob : action_and_probs) { + if (action_and_prob.second <= prob_cut_threshold) continue; + std::unique_ptr child = state.Child(action_and_prob.first); + std::vector child_values = + ExpectedReturnsImpl( + *child, policy_func, depth_limit - 1, prob_cut_threshold); + for (auto p = Player{0}; p < num_players; ++p) { + values[p] += action_and_prob.second * child_values[p]; + } + } + } else if (state.IsSimultaneousNode()) { + // Walk over all the joint actions, and weight by the product of + // probabilities to choose them. + values = state.Rewards(); + auto smstate = dynamic_cast(&state); + SPIEL_CHECK_TRUE(smstate != nullptr); + std::vector state_policies(num_players); + for (auto p = Player{0}; p < num_players; ++p) { + state_policies[p] = policy_func(p, state.InformationStateString(p)); + if (state_policies[p].empty()) { + SpielFatalError("Error in ExpectedReturnsImpl; infostate not found."); + } + } + for (const Action flat_action : smstate->LegalActions()) { + std::vector actions = + smstate->FlatJointActionToActions(flat_action); + double joint_action_prob = 1.0; + for (auto p = Player{0}; p < num_players; ++p) { + double player_action_prob = GetProb(state_policies[p], actions[p]); + SPIEL_CHECK_GE(player_action_prob, 0.0); + SPIEL_CHECK_LE(player_action_prob, 1.0); + joint_action_prob *= player_action_prob; + if (joint_action_prob <= prob_cut_threshold) { + break; + } + } + + if (joint_action_prob > prob_cut_threshold) { + std::unique_ptr child = state.Clone(); + child->ApplyActions(actions); + std::vector child_values = + ExpectedReturnsImpl( + *child, policy_func, depth_limit - 1, prob_cut_threshold); + for (auto p = Player{0}; p < num_players; ++p) { + values[p] += joint_action_prob * child_values[p]; + } + } + } + } else { + // Turn-based decision node. + Player player = state.CurrentPlayer(); + ActionsAndProbs state_policy = + policy_func(player, state.InformationStateString()); + if (state_policy.empty()) { + SpielFatalError("Error in ExpectedReturnsImpl; infostate not found."); + } + values = state.Rewards(); + float total_prob = 0.0; + for (const Action action : state.LegalActions()) { + std::unique_ptr child = state.Child(action); + // GetProb can return -1 for legal actions not in the policy. We treat + // these as having zero probability, but check that at least some actions + // have positive probability. + double action_prob = GetProb(state_policy, action); + SPIEL_CHECK_LE(action_prob, 1.0); + if (action_prob > prob_cut_threshold) { + SPIEL_CHECK_GE(action_prob, 0.0); + total_prob += action_prob; + std::vector child_values = + ExpectedReturnsImpl( + *child, policy_func, depth_limit - 1, prob_cut_threshold); + for (auto p = Player{0}; p < num_players; ++p) { + values[p] += action_prob * child_values[p]; + } + } + } + // Check that there is a least some positive mass on at least one action. + // Consider using: SPIEL_CHECK_FLOAT_EQ(total_prob, 1.0); + SPIEL_CHECK_GT(total_prob, 0.0); + } + SPIEL_CHECK_EQ(values.size(), state.NumPlayers()); + return values; +} + +// Same as above, but the policy_func now takes a State as input in, rather +// than a string. +std::vector ExpectedReturnsImpl( + const State& state, + const std::function& policy_func, + int depth_limit, + float prob_cut_threshold) { + if (state.IsTerminal() || depth_limit == 0) { + return state.Rewards(); + } + + int num_players = state.NumPlayers(); + std::vector values(num_players, 0.0); + if (state.IsChanceNode()) { + ActionsAndProbs action_and_probs = state.ChanceOutcomes(); + for (const auto& action_and_prob : action_and_probs) { + if (action_and_prob.second <= prob_cut_threshold) continue; + std::unique_ptr child = state.Child(action_and_prob.first); + std::vector child_values = + ExpectedReturnsImpl( + *child, policy_func, depth_limit - 1, prob_cut_threshold); + for (auto p = Player{0}; p < num_players; ++p) { + values[p] += action_and_prob.second * child_values[p]; + } + } + } else if (state.IsSimultaneousNode()) { + // Walk over all the joint actions, and weight by the product of + // probabilities to choose them. + values = state.Rewards(); + auto smstate = dynamic_cast(&state); + SPIEL_CHECK_TRUE(smstate != nullptr); + std::vector state_policies(num_players); + for (auto p = Player{0}; p < num_players; ++p) { + state_policies[p] = policy_func(p, state); + if (state_policies[p].empty()) { + SpielFatalError("Error in ExpectedReturnsImpl; infostate not found."); + } + } + for (const Action flat_action : smstate->LegalActions()) { + std::vector actions = + smstate->FlatJointActionToActions(flat_action); + double joint_action_prob = 1.0; + for (auto p = Player{0}; p < num_players; ++p) { + double player_action_prob = GetProb(state_policies[p], actions[p]); + SPIEL_CHECK_GE(player_action_prob, 0.0); + SPIEL_CHECK_LE(player_action_prob, 1.0); + joint_action_prob *= player_action_prob; + if (joint_action_prob <= prob_cut_threshold) { + break; + } + } + + if (joint_action_prob > prob_cut_threshold) { + std::unique_ptr child = state.Clone(); + child->ApplyActions(actions); + std::vector child_values = + ExpectedReturnsImpl( + *child, policy_func, depth_limit - 1, prob_cut_threshold); + for (auto p = Player{0}; p < num_players; ++p) { + values[p] += joint_action_prob * child_values[p]; + } + } + } + } else { + // Turn-based decision node. + Player player = state.CurrentPlayer(); + ActionsAndProbs state_policy = policy_func(player, state); + if (state_policy.empty()) { + SpielFatalError("Error in ExpectedReturnsImpl; infostate not found."); + } + values = state.Rewards(); + for (const Action action : state.LegalActions()) { + std::unique_ptr child = state.Child(action); + double action_prob = GetProb(state_policy, action); + SPIEL_CHECK_GE(action_prob, 0.0); + SPIEL_CHECK_LE(action_prob, 1.0); + if (action_prob > prob_cut_threshold) { + std::vector child_values = + ExpectedReturnsImpl( + *child, policy_func, depth_limit - 1, prob_cut_threshold); + for (auto p = Player{0}; p < num_players; ++p) { + values[p] += action_prob * child_values[p]; + } + } + } + } + SPIEL_CHECK_EQ(values.size(), state.NumPlayers()); + return values; +} + +std::vector ExpectedReturnsOfDeterministicPoliciesFromSeedsImpl( + const State& state, + const std::vector& policy_seeds, + const std::vector& policies) { + if (state.IsTerminal()) { + return state.Rewards(); + } + const int num_players = state.NumPlayers(); + std::vector values(num_players, 0.0); + if (state.IsSimultaneousNode()) { + SpielFatalError("Simultaneous not implemented."); + } else if (state.IsChanceNode()) { + ActionsAndProbs actions_and_probs = state.ChanceOutcomes(); + for (const auto& action_and_prob : actions_and_probs) { + if (action_and_prob.second <= 0.0) continue; + std::unique_ptr child = state.Child(action_and_prob.first); + const std::vector child_values = ( + ExpectedReturnsOfDeterministicPoliciesFromSeedsImpl( + *child, policy_seeds, policies)); + for (auto p = Player{0}; p < num_players; ++p) { + values[p] += action_and_prob.second * child_values[p]; + } + } + } else { + // Get information state string. + std::string info_state_string = state.InformationStateString(); + const int player = state.CurrentPlayer(); + + // Search for policy in policies. + ActionsAndProbs actions_and_probs = {}; + for (const auto& policy : policies) { + actions_and_probs = policy->GetStatePolicy(state); + if (!actions_and_probs.empty()) { + break; + } + } + if (!actions_and_probs.empty()) { + for (const auto& action_and_prob : actions_and_probs) { + if (action_and_prob.second <= 0.0) continue; + std::unique_ptr child = state.Child(action_and_prob.first); + const std::vector child_values = ( + ExpectedReturnsOfDeterministicPoliciesFromSeedsImpl( + *child, policy_seeds, policies)); + for (auto p = Player{0}; p < num_players; ++p) { + values[p] += action_and_prob.second * child_values[p]; + } + } + return values; + } + + // Determine the state seed from the policy seed. + auto state_seed = std::hash{}(info_state_string); + state_seed += policy_seeds[player]; + state_seed += state.MoveNumber() * num_players; + state_seed += player; + std::mt19937 gen(state_seed); + + const auto legal_actions = state.LegalActions(); + std::uniform_int_distribution dist(0, legal_actions.size() - 1); + const int sampled_action_index = dist(gen); + const Action action = legal_actions[sampled_action_index]; + + SPIEL_CHECK_GE(action, 0); + std::unique_ptr child = state.Child(action); + std::vector child_values = ( + ExpectedReturnsOfDeterministicPoliciesFromSeedsImpl( + *child, policy_seeds, policies)); + for (auto p = Player{0}; p < num_players; ++p) { + values[p] += child_values[p]; + } + } + SPIEL_CHECK_EQ(values.size(), state.NumPlayers()); + return values; +} +} // namespace + +std::vector ExpectedReturns(const State& state, + const std::vector& policies, + int depth_limit, + bool use_infostate_get_policy, + float prob_cut_threshold) { + if (use_infostate_get_policy) { + return ExpectedReturnsImpl( + state, + [&policies](Player player, const std::string& info_state) { + return policies[player]->GetStatePolicy(info_state); + }, + depth_limit, + prob_cut_threshold); + } else { + return ExpectedReturnsImpl( + state, + [&policies](Player player, const State& state) { + return policies[player]->GetStatePolicy(state, player); + }, + depth_limit, + prob_cut_threshold); + } +} + +std::vector ExpectedReturns(const State& state, + const Policy& joint_policy, int depth_limit, + bool use_infostate_get_policy, + float prob_cut_threshold) { + if (use_infostate_get_policy) { + return ExpectedReturnsImpl( + state, + [&joint_policy](Player player, const std::string& info_state) { + return joint_policy.GetStatePolicy(info_state); + }, + depth_limit, + prob_cut_threshold); + } else { + return ExpectedReturnsImpl( + state, + [&joint_policy](Player player, const State& state) { + return joint_policy.GetStatePolicy(state, player); + }, + depth_limit, + prob_cut_threshold); + } +} + + +std::vector ExpectedReturnsOfDeterministicPoliciesFromSeeds( + const State& state, const std::vector& policy_seeds) { + const std::vector& policies = {}; + SPIEL_CHECK_EQ(policy_seeds.size(), state.NumPlayers()); + return ExpectedReturnsOfDeterministicPoliciesFromSeedsImpl( + state, policy_seeds, policies); +} + +std::vector ExpectedReturnsOfDeterministicPoliciesFromSeeds( + const State& state, const std::vector& policy_seeds, + const std::vector& policies) { + SPIEL_CHECK_EQ(policy_seeds.size(), state.NumPlayers()); + return ExpectedReturnsOfDeterministicPoliciesFromSeedsImpl( + state, policy_seeds, policies); +} + + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/expected_returns.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/expected_returns.h new file mode 100644 index 0000000..7828413 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/expected_returns.h @@ -0,0 +1,80 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_EXPECTED_RETURNS_H_ +#define OPEN_SPIEL_ALGORITHMS_EXPECTED_RETURNS_H_ + +#include + +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +// Computes the (undiscounted) expected returns from a depth-limited search +// starting at the state and following each player's policy. Using a negative +// depth will do a full tree traversal (from the specified state). Using a +// prob_cut_threshold > 0 will cut the tree search if the reach probability +// goes below this value resulting in an approximate return. +// +// Policies need not be complete; any missing legal actions will be assumed to +// have zero probability. +// +// The second overloaded function acts the same way, except assumes that all of +// the players' policies are encapsulated in one joint policy. +// +// The `use_infostate_get_policy` flag indicates whether to call +// Policy::GetStatePolicy(const std::string&) rather than +// Policy::GetStatePolicy(const State&) instead for retrieving the policy at +// each information state; we use a default of true for performance reasons. +std::vector ExpectedReturns(const State& state, + const std::vector& policies, + int depth_limit, + bool use_infostate_get_policy = true, + float prob_cut_threshold = 0.0); +std::vector ExpectedReturns(const State& state, + const Policy& joint_policy, int depth_limit, + bool use_infostate_get_policy = true, + float prob_cut_threshold = 0.0); + +// Computes the (undiscounted) expected returns from random deterministic +// policies which are specified using a seed. There should be a policy_seed per +// player. Optionally any number of policies can be provided which override +// the random deterministic policies. +// +// A deterministic policy is one that places all probability mass on a single +// action at each information state. We randomly generate a deterministic +// policy from a seed as follows: +// * Specify a policy seed for each player. +// * For each information state visited: +// - Calculate an integer hash of the information state string. +// - Add the move number. +// - Add the global seed of the corresponding player. +// - This results in a new seed per information state. +// - Using this seed, sample an action from a uniform integer distribution. +// +// This means that an entire policy can be represented cheaply with a single +// integer and allows computing expected returns of games whose tabular policies +// may not fit in memory. +std::vector ExpectedReturnsOfDeterministicPoliciesFromSeeds( + const State& state, const std::vector & policy_seeds); +std::vector ExpectedReturnsOfDeterministicPoliciesFromSeeds( + const State& state, const std::vector & policy_seeds, + const std::vector& policies); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_EXPECTED_RETURNS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/external_sampling_mccfr.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/external_sampling_mccfr.cc new file mode 100644 index 0000000..bf11ce5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/external_sampling_mccfr.cc @@ -0,0 +1,291 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/external_sampling_mccfr.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/algorithms/cfr.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { + +// Serialization of the MCCFR solver is in agreement with de/serialization of +// regular CFR solvers, i.e. take a look at the PartiallyDeserializedCFRSolver() +// method for more info. +constexpr const int kSerializationVersion = 1; +constexpr const char* kSerializeSolverRNGSectionHeader = "[SolverRNG]"; +constexpr const char* kSerializeSolverAverageTypeSectionHeader = + "[SolverAverageType]"; +constexpr const char* kSerializeSolverDefaultPolicySectionHeader = + "[SolverDefaultPolicy]"; + +ExternalSamplingMCCFRSolver::ExternalSamplingMCCFRSolver(const Game& game, + int seed, + AverageType avg_type) + : ExternalSamplingMCCFRSolver(game, std::make_shared(), seed, + avg_type) {} + +ExternalSamplingMCCFRSolver::ExternalSamplingMCCFRSolver( + const Game& game, std::shared_ptr default_policy, int seed, + AverageType avg_type) + : ExternalSamplingMCCFRSolver(game.shared_from_this(), default_policy, + std::make_unique(seed), + avg_type) {} + +ExternalSamplingMCCFRSolver::ExternalSamplingMCCFRSolver( + std::shared_ptr game, std::shared_ptr default_policy, + std::unique_ptr rng, AverageType avg_type) + : game_(game), + rng_(std::move(rng)), + avg_type_(avg_type), + dist_(0.0, 1.0), + default_policy_(default_policy) { + if (game_->GetType().dynamics != GameType::Dynamics::kSequential) { + SpielFatalError( + "MCCFR requires sequential games. If you're trying to run it " + "on a simultaneous (or normal-form) game, please first transform it " + "using turn_based_simultaneous_game."); + } +} + +void ExternalSamplingMCCFRSolver::RunIteration() { RunIteration(rng_.get()); } + +void ExternalSamplingMCCFRSolver::RunIteration(std::mt19937* rng) { + for (auto p = Player{0}; p < game_->NumPlayers(); ++p) { + UpdateRegrets(*game_->NewInitialState(), p, rng); + } + + if (avg_type_ == AverageType::kFull) { + std::vector reach_probs(game_->NumPlayers(), 1.0); + FullUpdateAverage(*game_->NewInitialState(), reach_probs); + } +} + +std::string ExternalSamplingMCCFRSolver::Serialize( + int double_precision, std::string delimiter) const { + SPIEL_CHECK_GE(double_precision, -1); + std::string str = ""; + // Meta section + absl::StrAppend(&str, + "# Automatically generated by OpenSpiel " + "ExternalSamplingMCCFRSolver::Serialize\n"); + absl::StrAppend(&str, kSerializeMetaSectionHeader, "\n"); + absl::StrAppend(&str, "Version: ", kSerializationVersion, "\n"); + absl::StrAppend(&str, "\n"); + // Game section + absl::StrAppend(&str, kSerializeGameSectionHeader, "\n"); + absl::StrAppend(&str, game_->Serialize(), "\n"); + // Internal solver state section + absl::StrAppend(&str, kSerializeSolverTypeSectionHeader, "\n"); + absl::StrAppend(&str, "ExternalSamplingMCCFRSolver", "\n"); + absl::StrAppend(&str, kSerializeSolverSpecificStateSectionHeader, "\n"); + // RNG section + absl::StrAppend(&str, kSerializeSolverRNGSectionHeader, "\n"); + std::ostringstream rng_stream; + rng_stream << *rng_; + absl::StrAppend(&str, rng_stream.str(), "\n"); + // Average type section + absl::StrAppend(&str, kSerializeSolverAverageTypeSectionHeader, "\n"); + if (avg_type_ == AverageType::kFull) + absl::StrAppend(&str, "FullAverageType", "\n"); + else if (avg_type_ == AverageType::kSimple) + absl::StrAppend(&str, "SimpleAverageType", "\n"); + // Default policy section + absl::StrAppend(&str, kSerializeSolverDefaultPolicySectionHeader, "\n"); + absl::StrAppend(&str, default_policy_->Serialize(double_precision, delimiter), + "\n"); + // Info state values table section + absl::StrAppend(&str, kSerializeSolverValuesTableSectionHeader, "\n"); + SerializeCFRInfoStateValuesTable(info_states_, &str, double_precision, + delimiter); + return str; +} + +double ExternalSamplingMCCFRSolver::UpdateRegrets(const State& state, + Player player, + std::mt19937* rng) { + if (state.IsTerminal()) { + return state.PlayerReturn(player); + } else if (state.IsChanceNode()) { + Action action = SampleAction(state.ChanceOutcomes(), dist_(*rng)).first; + return UpdateRegrets(*state.Child(action), player, rng); + } else if (state.IsSimultaneousNode()) { + SpielFatalError( + "Simultaneous moves not supported. Use " + "TurnBasedSimultaneousGame to convert the game first."); + } + + Player cur_player = state.CurrentPlayer(); + std::string is_key = state.InformationStateString(cur_player); + std::vector legal_actions = state.LegalActions(); + + // The insert here only inserts the default value if the key is not found, + // otherwise returns the entry in the map. + auto iter_and_result = info_states_.insert( + {is_key, CFRInfoStateValues(legal_actions, kInitialTableValues)}); + + CFRInfoStateValues info_state_copy = iter_and_result.first->second; + info_state_copy.ApplyRegretMatching(); + + double value = 0; + std::vector child_values(legal_actions.size(), 0); + + if (cur_player != player) { + // Sample at opponent nodes. + int aidx = info_state_copy.SampleActionIndex(0.0, dist_(*rng)); + value = UpdateRegrets(*state.Child(legal_actions[aidx]), player, rng); + } else { + // Walk over all actions at my nodes + for (int aidx = 0; aidx < legal_actions.size(); ++aidx) { + child_values[aidx] = + UpdateRegrets(*state.Child(legal_actions[aidx]), player, rng); + value += info_state_copy.current_policy[aidx] * child_values[aidx]; + } + } + + // Now the regret and avg strategy updates. + CFRInfoStateValues& info_state = info_states_[is_key]; + + if (cur_player == player) { + // Update regrets + for (int aidx = 0; aidx < legal_actions.size(); ++aidx) { + info_state.cumulative_regrets[aidx] += (child_values[aidx] - value); + } + } + + // Simple average does averaging on the opponent node. To do this in a game + // with more than two players, we only update the player + 1 mod num_players, + // which reduces to the standard rule in 2 players. + if (avg_type_ == AverageType::kSimple && + cur_player == ((player + 1) % game_->NumPlayers())) { + for (int aidx = 0; aidx < legal_actions.size(); ++aidx) { + info_state.cumulative_policy[aidx] += + info_state_copy.current_policy[aidx]; + } + } + + return value; +} + +void ExternalSamplingMCCFRSolver::FullUpdateAverage( + const State& state, const std::vector& reach_probs) { + if (state.IsTerminal()) { + return; + } else if (state.IsChanceNode()) { + for (Action action : state.LegalActions()) { + FullUpdateAverage(*state.Child(action), reach_probs); + } + return; + } else if (state.IsSimultaneousNode()) { + SpielFatalError( + "Simultaneous moves not supported. Use " + "TurnBasedSimultaneousGame to convert the game first."); + } + + // If all the probs are zero, no need to keep going. + double sum = std::accumulate(reach_probs.begin(), reach_probs.end(), 0.0); + if (sum == 0.0) return; + + Player cur_player = state.CurrentPlayer(); + std::string is_key = state.InformationStateString(cur_player); + std::vector legal_actions = state.LegalActions(); + + // The insert here only inserts the default value if the key is not found, + // otherwise returns the entry in the map. + auto iter_and_result = info_states_.insert( + {is_key, CFRInfoStateValues(legal_actions, kInitialTableValues)}); + + CFRInfoStateValues info_state_copy = iter_and_result.first->second; + info_state_copy.ApplyRegretMatching(); + + for (int aidx = 0; aidx < legal_actions.size(); ++aidx) { + std::vector new_reach_probs = reach_probs; + new_reach_probs[cur_player] *= info_state_copy.current_policy[aidx]; + FullUpdateAverage(*state.Child(legal_actions[aidx]), new_reach_probs); + } + + // Now update the cumulative policy. + CFRInfoStateValues& info_state = info_states_[is_key]; + for (int aidx = 0; aidx < legal_actions.size(); ++aidx) { + info_state.cumulative_policy[aidx] += + (reach_probs[cur_player] * info_state_copy.current_policy[aidx]); + } +} + +std::unique_ptr +DeserializeExternalSamplingMCCFRSolver(const std::string& serialized, + std::string delimiter) { + auto partial = PartiallyDeserializeCFRSolver(serialized); + SPIEL_CHECK_EQ(partial.solver_type, "ExternalSamplingMCCFRSolver"); + + enum Section { + kInvalid = -1, + kRNG = 0, + kAverageType = 1, + kDefaultPolicy = 2 + }; + std::array section_strings = {"", "", ""}; + Section current_section = kInvalid; + + std::vector lines = + absl::StrSplit(partial.solver_specific_state, '\n'); + for (int i = 0; i < lines.size(); i++) { + if (lines[i] == kSerializeSolverRNGSectionHeader) { + SPIEL_CHECK_EQ(current_section, kInvalid); + current_section = kRNG; + } else if (lines[i] == kSerializeSolverAverageTypeSectionHeader) { + SPIEL_CHECK_EQ(current_section, kRNG); + current_section = kAverageType; + } else if (lines[i] == kSerializeSolverDefaultPolicySectionHeader) { + SPIEL_CHECK_EQ(current_section, kAverageType); + current_section = kDefaultPolicy; + } else { + SPIEL_CHECK_NE(current_section, kInvalid); + absl::StrAppend(§ion_strings[current_section], lines[i], "\n"); + } + } + // Remove trailing newline characters + for (int i = 0; i < section_strings.size(); i++) { + section_strings[i].erase(section_strings[i].length() - 1); + } + + // Do the actual deserialization for all internal state values + std::unique_ptr rng = std::make_unique(); + std::istringstream rng_stream(section_strings[kRNG]); + rng_stream >> *rng; + + AverageType average_type; + if (section_strings[kAverageType] == "FullAverageType") + average_type = AverageType::kFull; + else if (section_strings[kAverageType] == "SimpleAverageType") + average_type = AverageType::kSimple; + + auto solver = std::make_unique( + partial.game, DeserializePolicy(section_strings[kDefaultPolicy]), + std::move(rng), average_type); + DeserializeCFRInfoStateValuesTable(partial.serialized_cfr_values_table, + &solver->InfoStateValuesTable(), + delimiter); + return solver; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/external_sampling_mccfr.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/external_sampling_mccfr.h new file mode 100644 index 0000000..cb0e3b8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/external_sampling_mccfr.h @@ -0,0 +1,122 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_EXTERNAL_SAMPLING_MCCFR_H_ +#define OPEN_SPIEL_ALGORITHMS_EXTERNAL_SAMPLING_MCCFR_H_ + +#include +#include +#include + +#include "open_spiel/algorithms/cfr.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" + +// An implementation of external sampling Monte Carlo Counterfactual Regret +// Minimization (CFR). See Lanctot 2009 [0] and Chapter 4 of Lanctot 2013 [1] +// for details. +// [0]: http://mlanctot.info/files/papers/nips09mccfr.pdf +// [1]: http://mlanctot.info/files/papers/PhD_Thesis_MarcLanctot.pdf + +namespace open_spiel { +namespace algorithms { + +// How to average the strategy. The 'simple' type does the averaging for +// player i + 1 mod num_players on player i's regret update pass; in two players +// this corresponds to the standard implementation (updating the average +// policy at opponent nodes). In n>2 players, this can be a problem for several +// reasons: first, it does not compute the estimate as described by the +// (unbiased) stochastically-weighted averaging in chapter 4 of Lanctot 2013 +// commonly used in MCCFR because the denominator (important sampling +// correction) should include all the other sampled players as well so the +// sample reach no longer cancels with reach of the player updating their +// average policy. Second, if one player assigns zero probability to an action +// (leading to a subtree), the average policy of a different player in that +// subtree is no longer updated. Hence, the full averaging does not update the +// average policy in the regret passes but does a separate pass to update the +// average policy. Nevertheless, we set the simple type as the default because +// it is faster, seems to work better empirically, and it matches what was done +// in Pluribus (Brown and Sandholm. Superhuman AI for multiplayer poker. +// Science, 11, 2019). +enum class AverageType { + kSimple, + kFull, +}; + +class ExternalSamplingMCCFRSolver { + public: + static inline constexpr double kInitialTableValues = 0.000001; + + // Creates a solver with a specific seed, average type and an explicit + // default uniform policy for states that have not been visited. + ExternalSamplingMCCFRSolver(const Game& game, int seed = 0, + AverageType avg_type = AverageType::kSimple); + + // Creates a solver with a specific seed and average type, and also allows + // for a custom default policy for nodes that have not been visited. + ExternalSamplingMCCFRSolver(const Game& game, + std::shared_ptr default_policy, + int seed = 0, + AverageType avg_type = AverageType::kSimple); + + // The constructor below is meant mainly for deserialization purposes and + // should not be used directly. + ExternalSamplingMCCFRSolver(std::shared_ptr game, + std::shared_ptr default_policy, + std::unique_ptr rng, + AverageType avg_type); + + // Performs one iteration of external sampling MCCFR, updating the regrets + // and average strategy for all players. This method uses the internal random + // number generator. + void RunIteration(); + + // Same as above, but uses the specified random number generator instead. + void RunIteration(std::mt19937* rng); + + CFRInfoStateValuesTable& InfoStateValuesTable() { return info_states_; } + + // Computes the average policy, containing the policy for all players. + // The returned policy instance should only be used during the lifetime of + // the CFRSolver object. + std::shared_ptr AveragePolicy() const { + return std::make_shared(info_states_, default_policy_); + } + + // See comments above CFRInfoStateValues::Serialize(double_precision) for + // notes about the double_precision parameter. + std::string Serialize(int double_precision = -1, + std::string delimiter = "<~>") const; + + private: + double UpdateRegrets(const State& state, Player player, std::mt19937* rng); + void FullUpdateAverage(const State& state, + const std::vector& reach_probs); + + std::shared_ptr game_; + std::unique_ptr rng_; + AverageType avg_type_; + CFRInfoStateValuesTable info_states_; + std::uniform_real_distribution dist_; + std::shared_ptr default_policy_; +}; + +std::unique_ptr +DeserializeExternalSamplingMCCFRSolver(const std::string& serialized, + std::string delimiter = "<~>"); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_EXTERNAL_SAMPLING_MCCFR_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/external_sampling_mccfr_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/external_sampling_mccfr_test.cc new file mode 100644 index 0000000..f60f66c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/external_sampling_mccfr_test.cc @@ -0,0 +1,110 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/external_sampling_mccfr.h" + +#include +#include +#include + +#include "open_spiel/algorithms/tabular_exploitability.h" +#include "open_spiel/games/kuhn_poker/kuhn_poker.h" +#include "open_spiel/games/leduc_poker/leduc_poker.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +constexpr int kSeed = 230398247; + +void MCCFR_2PGameTest(const std::string& game_name, std::mt19937* rng, + int iterations, double nashconv_upperbound) { + std::shared_ptr game = LoadGame(game_name); + ExternalSamplingMCCFRSolver solver(*game); + for (int i = 0; i < iterations; i++) { + solver.RunIteration(rng); + } + const std::shared_ptr average_policy = solver.AveragePolicy(); + double nash_conv = NashConv(*game, *average_policy, true); + std::cout << "Game: " << game_name << ", iters = " << iterations + << ", NashConv: " << nash_conv << std::endl; + SPIEL_CHECK_LE(nash_conv, nashconv_upperbound); +} + +void MCCFR_KuhnPoker3PTest(std::mt19937* rng) { + std::shared_ptr game = LoadGame("kuhn_poker(players=3)"); + ExternalSamplingMCCFRSolver solver(*game); + for (int i = 0; i < 100; i++) { + solver.RunIteration(rng); + } + const std::shared_ptr average_policy = solver.AveragePolicy(); + std::cout << "Kuhn 3P (standard averaging) NashConv = " + << NashConv(*game, *average_policy, true) << std::endl; + + ExternalSamplingMCCFRSolver full_solver(*game, 39693847, AverageType::kFull); + for (int i = 0; i < 100; i++) { + full_solver.RunIteration(rng); + } + auto full_average_policy = full_solver.AveragePolicy(); + std::cout << "Kuhn 3P (full averaging) NashConv = " + << NashConv(*game, *full_average_policy) << std::endl; +} + +void MCCFR_SerializationTest() { + auto game = LoadGame("kuhn_poker"); + ExternalSamplingMCCFRSolver solver = ExternalSamplingMCCFRSolver(*game); + double exploitability0 = Exploitability(*game, *solver.AveragePolicy()); + + for (int i = 0; i < 200; i++) { + solver.RunIteration(); + } + double exploitability1 = Exploitability(*game, *solver.AveragePolicy()); + SPIEL_CHECK_GT(exploitability0, exploitability1); + + std::string serialized = solver.Serialize(); + std::unique_ptr deserialized_solver = + DeserializeExternalSamplingMCCFRSolver(serialized); + SPIEL_CHECK_EQ(solver.InfoStateValuesTable().size(), + deserialized_solver->InfoStateValuesTable().size()); + double exploitability2 = + Exploitability(*game, *deserialized_solver->AveragePolicy()); + SPIEL_CHECK_FLOAT_NEAR(exploitability1, exploitability2, 1e-15); + + for (int i = 0; i < 200; i++) { + deserialized_solver->RunIteration(); + } + double exploitability3 = + Exploitability(*game, *deserialized_solver->AveragePolicy()); + SPIEL_CHECK_GT(exploitability2, exploitability3); +} + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +namespace algorithms = open_spiel::algorithms; + +int main(int argc, char** argv) { + // Values double-checked with the original implementation used in (Lanctot, + // "Monte Carlo Sampling and Regret Minimization For Equilibrium Computation + // and Decision-Making in Large Extensive Form Games", 2013). + std::mt19937 rng(algorithms::kSeed); + algorithms::MCCFR_2PGameTest("kuhn_poker", &rng, 1000, 0.05); + algorithms::MCCFR_2PGameTest("leduc_poker", &rng, 1000, 2.5); + algorithms::MCCFR_2PGameTest("liars_dice", &rng, 100, 1.6); + algorithms::MCCFR_KuhnPoker3PTest(&rng); + algorithms::MCCFR_SerializationTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/fsicfr.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/fsicfr.cc new file mode 100644 index 0000000..58a7a28 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/fsicfr.cc @@ -0,0 +1,253 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/fsicfr.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +constexpr const int kNumPlayers = 2; + +FSICFRNode::FSICFRNode() : psum(kNumPlayers, 0) {} + +void FSICFRNode::AddChild(Action action, int chance_id, FSICFRNode* child) { + children[{action, chance_id}] = child->id; + if (std::find(child->parent_ids.begin(), child->parent_ids.end(), id) == + child->parent_ids.end()) { + child->parent_ids.push_back(id); + } +} + +std::string FSICFRNode::ToString() { + return absl::StrCat(id, " P", player, " T?", terminal, ": ", string_key); +} + +void FSICFRNode::ApplyRegretMatching() { + double pos_rsum = 0.0; + for (int a = 0; a < legal_actions.size(); ++a) { + pos_rsum += std::max(0.0, regrets[a]); + } + for (int a = 0; a < legal_actions.size(); ++a) { + strategy[a] = pos_rsum > 0 ? std::max(0.0, regrets[a]) / pos_rsum + : 1.0 / legal_actions.size(); + SPIEL_CHECK_PROB(strategy[a]); + } +} + +FSICFRNode* FSICFRGraph::GetOrCreateDecisionNode( + const std::vector& legal_actions, + const std::string& info_state_string, Player player, int max_predecessors, + int chance_id) { + auto iter = string_key_to_node_id_map_.find(info_state_string); + if (iter != string_key_to_node_id_map_.end()) { + return &nodes_[iter->second]; + } else { + FSICFRNode node; + node.terminal = false; + node.string_key = info_state_string; + node.player = player; + node.chance_id = chance_id; + node.max_predecessors = max_predecessors; + node.legal_actions = legal_actions; + node.psum = {0.0, 0.0}; + node.strategy = + std::vector(legal_actions.size(), 1.0 / legal_actions.size()); + node.regrets = + std::vector(legal_actions.size(), 1.0 / legal_actions.size()); + node.ssum = std::vector(legal_actions.size(), 0.0); + node.id = nodes_.size(); + string_key_to_node_id_map_[info_state_string] = node.id; + nodes_.push_back(node); + return &nodes_[node.id]; + } +} + +FSICFRNode* FSICFRGraph::GetOrCreateTerminalNode( + const std::string& terminal_string_key, double p0_utility, + int max_predecessors) { + auto iter = string_key_to_node_id_map_.find(terminal_string_key); + if (iter != string_key_to_node_id_map_.end()) { + return &nodes_[iter->second]; + } else { + FSICFRNode node; + node.terminal = true; + node.string_key = terminal_string_key; + node.p0_utility = p0_utility; + node.max_predecessors = max_predecessors; + node.id = nodes_.size(); + string_key_to_node_id_map_[terminal_string_key] = node.id; + nodes_.push_back(node); + return &nodes_[node.id]; + } +} + +void FSICFRGraph::TopSort() { + int max_value = -1; + int cur_value = 0; + bool done = false; + int num_nodes = 0; + + while (!done) { + num_nodes = 0; + for (int i = 0; i < nodes_.size(); ++i) { + max_value = std::max(max_value, nodes_[i].max_predecessors); + if (nodes_[i].max_predecessors == cur_value) { + // std::cout << nodes_[i].max_predecessors << " " + // << nodes_[i].string_key << std::endl; + ordered_ids_.push_back(i); + num_nodes++; + } + } + + cur_value++; + if (cur_value > max_value) { + done = true; + } + } + + SPIEL_CHECK_EQ(nodes_.size(), ordered_ids_.size()); +} + +FSICFRSolver::FSICFRSolver(const Game& game, int seed, + const std::vector& chance_outcome_ranges, + const FSICFRGraph* graph) + : game_(game), + rng_(seed), + total_iterations_(0), + chance_outcome_ranges_(chance_outcome_ranges), + sampled_chance_outcomes_(game.NumPlayers()), + graph_(graph) {} + +void FSICFRSolver::RunIteration() { + // Predetermine chance outcomes (one per player). + for (int i = 0; i < sampled_chance_outcomes_.size(); ++i) { + sampled_chance_outcomes_[i] = + absl::Uniform(rng_, 0, chance_outcome_ranges_[i]); + SPIEL_CHECK_GE(sampled_chance_outcomes_[i], 0); + SPIEL_CHECK_LT(sampled_chance_outcomes_[i], chance_outcome_ranges_[i]); + } + ForwardPass(); + BackwardPass(); + total_iterations_++; +} + +void FSICFRSolver::RunIterations(int n) { + for (int i = 0; i < n; ++i) { + RunIteration(); + } +} + +void FSICFRSolver::ForwardPass() { + bool done_first = false; + for (int idx = 0; idx < graph_->size(); ++idx) { + int node_id = graph_->ordered_node_id(idx); + FSICFRNode* node = graph_->GetNode(node_id); + if (!node->terminal && + node->chance_id == sampled_chance_outcomes_[node->player]) { + if (!done_first) { + node->visits = 1; + node->psum = {1.0, 1.0}; + done_first = true; + } + node->ApplyRegretMatching(); + double my_reach = node->psum[node->player]; + int opp_chance_id = sampled_chance_outcomes_[1 - node->player]; + for (int a = 0; a < node->legal_actions.size(); ++a) { + node->ssum[a] += my_reach * node->strategy[a]; + Action action = node->legal_actions[a]; + auto iter = node->children.find({action, opp_chance_id}); + SPIEL_CHECK_TRUE(iter != node->children.end()); + int child_id = iter->second; + FSICFRNode* child = graph_->GetNode(child_id); + if (!child->terminal) { + child->visits += node->visits; + SPIEL_CHECK_GT(child->visits, 0); + for (int p : {0, 1}) { + child->psum[p] += + node->psum[p] * (node->player == p ? node->strategy[a] : 1.0); + SPIEL_CHECK_GE(child->psum[p], 0); + } + } + } + } + } +} + +void FSICFRSolver::BackwardPass() { + for (int idx = graph_->size() - 1; idx >= 0; --idx) { + int node_id = graph_->ordered_node_id(idx); + FSICFRNode* node = graph_->GetNode(node_id); + if (!node->terminal && + node->chance_id == sampled_chance_outcomes_[node->player]) { + node->v = 0; + int opp_chance_id = sampled_chance_outcomes_[1 - node->player]; + std::vector values(node->legal_actions.size(), 0); + double opp_reach = node->psum[1 - node->player]; + for (int a = 0; a < node->legal_actions.size(); ++a) { + Action action = node->legal_actions[a]; + auto iter = node->children.find({action, opp_chance_id}); + SPIEL_CHECK_TRUE(iter != node->children.end()); + int child_id = iter->second; + FSICFRNode* child = graph_->GetNode(child_id); + if (child->terminal) { + SPIEL_CHECK_TRUE(child->p0_utility == -1 || child->p0_utility == 1); + values[a] = + node->player == 0 ? child->p0_utility : -child->p0_utility; + } else { + values[a] = node->player == child->player ? child->v : -child->v; + } + node->v += node->strategy[a] * values[a]; + } + for (int a = 0; a < node->legal_actions.size(); ++a) { + node->regrets[a] = (node->T * node->regrets[a] + + node->visits * opp_reach * (values[a] - node->v)) / + (node->T + node->visits); + } + node->T += node->visits; + node->visits = 0; + node->psum[0] = 0; + node->psum[1] = 0; + } + } +} + +TabularPolicy FSICFRSolver::GetAveragePolicy() const { + TabularPolicy policy; + for (int idx = 0; idx < graph_->size(); ++idx) { + FSICFRNode* node = graph_->GetNode(idx); + if (!node->terminal) { + ActionsAndProbs state_policy; + double denom = std::accumulate(node->ssum.begin(), node->ssum.end(), 0.0); + SPIEL_CHECK_GE(denom, 0.0); + for (int a = 0; a < node->legal_actions.size(); ++a) { + Action action = node->legal_actions[a]; + double prob = denom > 0 ? node->ssum[a] / denom + : 1.0 / node->legal_actions.size(); + SPIEL_CHECK_PROB(prob); + state_policy.push_back({action, prob}); + } + policy.SetStatePolicy(node->string_key, state_policy); + } + } + return policy; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/fsicfr.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/fsicfr.h new file mode 100644 index 0000000..72096cc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/fsicfr.h @@ -0,0 +1,146 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_FSICFR_H_ +#define OPEN_SPIEL_ALGORITHMS_FSICFR_H_ + +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" + +namespace open_spiel { +namespace algorithms { + +// An basic implementation of Neller and Hnath 2011, "Approximating Optimal Dudo +// Play with Fixed-Strategy Iteration Counterfactual Regret Minimization" +// https://cupola.gettysburg.edu/csfac/2/. +// +// This implementation currently assumes the following: +// - All chance events occur at the start of the game (before any decisions) +// - There exists a perfect ranking between a player's chance event outcomes +// and their outcome, encoded as a chance_id integer (one per player) +// +// This implementation was built for and only tested on Liar's dice. For a usage +// example, see examples/fsicfr_liars_dice.cc. + +struct FSICFRNode { + // Maximum number of predecessor nodes (used for top sort order). + int max_predecessors = 0; + + int id = -1; + + // Chance id corresponding to the player to play at this node. + int chance_id = -1; + + bool terminal = false; + double p0_utility = 0; + + std::string string_key = ""; + Player player = kInvalidPlayer; + int T = 0; + int visits = 0; + double v = 0; + + // This is an (Action, other player chance id) -> node id map. + absl::flat_hash_map, int> children; + + std::vector parent_ids; + std::vector legal_actions; + + std::vector ssum; + std::vector psum; + std::vector strategy; + std::vector regrets; + + FSICFRNode(); + void AddChild(Action action, int chance_id, FSICFRNode* child); + std::string ToString(); + void ApplyRegretMatching(); +}; + +class FSICFRGraph { + public: + FSICFRGraph() {} + FSICFRNode* GetOrCreateDecisionNode(const std::vector& legal_actions, + const std::string& info_state_string, + Player player, int max_predecessors, + int chance_id); + FSICFRNode* GetOrCreateTerminalNode(const std::string& terminal_string_key, + double p0_utility, int max_predecessors); + FSICFRNode* GetNode(int id) const { + if (id < 0 || id >= nodes_.size()) { + return nullptr; + } else { + FSICFRGraph* this_graph = const_cast(this); + return &this_graph->nodes_[id]; + } + } + + int size() const { return nodes_.size(); } + + // Topologically sort the graph (in order of non-decreasing max_predecessors). + void TopSort(); + + int ordered_node_id(int idx) const { return ordered_ids_[idx]; } + + private: + // Infostate/terminal string key to node id map + absl::flat_hash_map string_key_to_node_id_map_; + + // Nodes. Ids correspond to indices. + std::vector nodes_; + + // Topologically sorted nodes ids. A more space-efficient implementation could + // remove this vector and simply build the node list in a such a way that + // nodes_ is already topologically-ordered. + std::vector ordered_ids_; +}; + +class FSICFRSolver { + public: + FSICFRSolver(const Game& game, int seed, + const std::vector& chance_outcome_ranges, + const FSICFRGraph* graph); + void RunIteration(); + void RunIterations(int n); + + TabularPolicy GetAveragePolicy() const; + + private: + void ForwardPass(); + void BackwardPass(); + + const Game& game_; + std::mt19937 rng_; + + int total_iterations_; + + // The maximum value of unique chance outcomes for each player. + std::vector chance_outcome_ranges_; + + // These are the predetermined chance outcomes for the iteration. + std::vector sampled_chance_outcomes_; + + // The FSICFR graph. + const FSICFRGraph* graph_; +}; + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_FSICFR_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_histories.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_histories.cc new file mode 100644 index 0000000..12647a0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_histories.cc @@ -0,0 +1,76 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/get_all_histories.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +// Walk a subgame and return all histories contained in the subgames. This does +// a recursive tree walk, therefore all valid sequences must have finite number +// of actions. +// Requires State::Clone() to be implemented. +// Use with extreme caution! +// Currently not implemented for simultaneous games. +void GetSubgameHistories(State* state, + std::vector>* all_histories, + int depth_limit, int depth, bool include_terminals, + bool include_chance_states) { + if (state->IsTerminal()) { + if (include_terminals) { + // Include, then terminate recursion. + all_histories->push_back(state->Clone()); + } + return; + } + + if (depth_limit >= 0 && depth > depth_limit) { + return; + } + + if (!state->IsChanceNode() || include_chance_states) { + all_histories->push_back(state->Clone()); + } + + for (auto action : state->LegalActions()) { + auto next_state = state->Clone(); + next_state->ApplyAction(action); + GetSubgameHistories(next_state.get(), all_histories, depth_limit, depth + 1, + include_terminals, include_chance_states); + } +} + +} // namespace + +std::vector> GetAllHistories( + const Game& game, int depth_limit, bool include_terminals, + bool include_chance_states) { + // Get the root state. + std::unique_ptr state = game.NewInitialState(); + std::vector> all_histories; + + // Then, do a recursive tree walk to fill up the vector. + GetSubgameHistories(state.get(), &all_histories, depth_limit, 0, + include_terminals, include_chance_states); + + if (all_histories.empty()) { + SpielFatalError("GetSubgameHistories returned 0 histories!"); + } + + return all_histories; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_histories.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_histories.h new file mode 100644 index 0000000..00b72b3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_histories.h @@ -0,0 +1,46 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_GET_ALL_HISTORIES_H_ +#define OPEN_SPIEL_ALGORITHMS_GET_ALL_HISTORIES_H_ + +#include +#include +#include + +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +// Returns a vector of states corresponding to unique histories in the game. +// +// For small games only! +// +// Use this implementation with caution as it does a recursive tree +// walk of the game and could easily fill up memory for larger games or games +// with long horizons. +// +// Currently only works for sequential games. +// +// Note: negative depth limit means no limit, 0 means only root, etc.. +// The default arguments will return all decision nodes in the game. +std::vector> GetAllHistories( + const Game& game, int depth_limit = -1, bool include_terminals = false, + bool include_chance_states = false); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_GET_ALL_HISTORIES_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_histories_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_histories_test.cc new file mode 100644 index 0000000..410ec2b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_histories_test.cc @@ -0,0 +1,37 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/get_all_histories.h" + +#include "open_spiel/games/tic_tac_toe/tic_tac_toe.h" +#include "open_spiel/spiel_utils.h" + +namespace algorithms = open_spiel::algorithms; + +// Orwant, et al. Mastering Algorithms with Perl. 0'Reilly, 1999, p. 183. +inline constexpr int kTTTNumTotalHistories = 549946; +inline constexpr int kTTTNumPartialHistories = 294778; + +int main(int argc, char **argv) { + std::shared_ptr game = + open_spiel::LoadGame("tic_tac_toe"); + auto histories = algorithms::GetAllHistories(*game, -1, + /*include_terminals=*/true, + /*include_chance_states=*/true); + SPIEL_CHECK_EQ(histories.size(), kTTTNumTotalHistories); + histories = algorithms::GetAllHistories(*game, -1, + /*include_terminals=*/false, + /*include_chance_states=*/true); + SPIEL_CHECK_EQ(histories.size(), kTTTNumPartialHistories); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_infostates.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_infostates.cc new file mode 100644 index 0000000..328122a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_infostates.cc @@ -0,0 +1,73 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/get_all_infostates.h" + +#include + +namespace open_spiel { +namespace algorithms { +namespace { + +// Get all the information states. Note that there might contain duplicates. +void GetSubgameInformationStates( + State* state, std::vector>* all_info_states, + int depth_limit, int depth) { + if (state->IsTerminal()) { + return; + } + + if (depth_limit >= 0 && depth > depth_limit) { + return; + } + + for (auto action : state->LegalActions()) { + auto next_state = state->Clone(); + next_state->ApplyAction(action); + + if (!state->IsChanceNode()) { + int player = state->CurrentPlayer(); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, state->NumPlayers()); + (*all_info_states)[player].push_back(state->InformationStateString()); + } + + GetSubgameInformationStates(next_state.get(), all_info_states, depth_limit, + depth + 1); + } +} + +} // namespace + +std::vector> GetAllInformationStates(const Game& game, + int depth_limit) { + // Get the root state. + std::unique_ptr state = game.NewInitialState(); + std::vector> all_infostates(game.NumPlayers()); + + // Then, do a recursive tree walk to fill up the vector. + GetSubgameInformationStates(state.get(), &all_infostates, depth_limit, 0); + + // Remove duplicates by sorting the info states and calling std::unique. + for (Player p = 0; p < all_infostates.size(); ++p) { + absl::c_sort(all_infostates[p]); + auto last = std::unique(all_infostates[p].begin(), all_infostates[p].end()); + all_infostates[p].erase(last, all_infostates[p].end()); + } + + return all_infostates; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_infostates.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_infostates.h new file mode 100644 index 0000000..e442299 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_infostates.h @@ -0,0 +1,35 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_GET_ALL_INFOSTATES_H_ +#define OPEN_SPIEL_ALGORITHMS_GET_ALL_INFOSTATES_H_ + +#include +#include +#include + +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +// Get all the information states in the game. Currently works for sequential +// games. Use -1 for the depth_limit to get everything. +std::vector> GetAllInformationStates( + const Game& game, int depth_limit = -1); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_GET_ALL_INFOSTATES_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_states.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_states.cc new file mode 100644 index 0000000..9acdde9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_states.cc @@ -0,0 +1,92 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/get_all_states.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +// Walk a subgame and return all states contained in the subgames. This does +// a recursive tree walk, therefore all valid sequences must have finite number +// of actions. The state collection is key-indexed by the state's string +// representation so that duplicates are not added. +// Requires State::Clone() to be implemented. +// Use with extreme caution! +// Currently not implemented for simultaneous games. +void GetSubgameStates(State* state, + std::map>* all_states, + int depth_limit, int depth, bool include_terminals, + bool include_chance_states, + bool stop_at_duplicates) { + if (state->IsTerminal()) { + if (include_terminals) { + // Include if not already present and then terminate recursion. + std::string key = state->ToString(); + if (all_states->find(key) == all_states->end()) { + (*all_states)[key] = state->Clone(); + } + } + return; + } + + if (depth_limit >= 0 && depth > depth_limit) { + return; + } + + if (!state->IsChanceNode() || include_chance_states) { + // Decision node; add only if not already present + std::string key = state->ToString(); + if (all_states->find(key) == all_states->end()) { + (*all_states)[key] = state->Clone(); + } else { + // Duplicate node. + if (stop_at_duplicates) { + // Terminate, do not explore the same node twice + return; + } + } + } + + for (auto action : state->LegalActions()) { + auto next_state = state->Clone(); + next_state->ApplyAction(action); + GetSubgameStates(next_state.get(), all_states, depth_limit, depth + 1, + include_terminals, include_chance_states, + stop_at_duplicates); + } +} + +} // namespace + +std::map> GetAllStates( + const Game& game, int depth_limit, bool include_terminals, + bool include_chance_states, bool stop_at_duplicates) { + // Get the root state. + std::unique_ptr state = game.NewInitialState(); + std::map> all_states; + + // Then, do a recursive tree walk to fill up the map. + GetSubgameStates(state.get(), &all_states, depth_limit, 0, include_terminals, + include_chance_states, stop_at_duplicates); + + if (all_states.empty()) { + SpielFatalError("GetSubgameStates returned 0 states!"); + } + + return all_states; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_states.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_states.h new file mode 100644 index 0000000..3de83ee --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_states.h @@ -0,0 +1,50 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_GET_ALL_STATES_H_ +#define OPEN_SPIEL_ALGORITHMS_GET_ALL_STATES_H_ + +#include + +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +// Get all states in the game, indexed by their string representation. +// For small games only! +// +// Useful for methods that solve the games explicitly, i.e. value iteration. +// +// Use this implementation with caution as it does a recursive tree +// walk of the game and could easily fill up memory for larger games or games +// with long horizons. +// +// If stop_at_duplicates is set, then the recursion does not continue if +// a node with the same string representation is reached via a different path +// (in some games this should not be used because the history matters and the +// information may not be stored in the string representation). +// +// Currently only works for sequential games. +// +// Note: negative depth limit means no limit, 0 means only root, etc.. + +std::map> GetAllStates( + const Game& game, int depth_limit, bool include_terminals, + bool include_chance_states, bool stop_at_duplicates = false); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_GET_ALL_STATES_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_states_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_states_test.cc new file mode 100644 index 0000000..116aa41 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_all_states_test.cc @@ -0,0 +1,29 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/get_all_states.h" + +#include "open_spiel/games/tic_tac_toe/tic_tac_toe.h" +#include "open_spiel/spiel_utils.h" + +namespace algorithms = open_spiel::algorithms; +namespace ttt = open_spiel::tic_tac_toe; + +int main(int argc, char **argv) { + std::shared_ptr game = + open_spiel::LoadGame("tic_tac_toe"); + auto states = algorithms::GetAllStates(*game, -1, /*include_terminals=*/true, + /*include_chance_states=*/true); + SPIEL_CHECK_EQ(states.size(), ttt::kNumberStates); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_legal_actions_map.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_legal_actions_map.cc new file mode 100644 index 0000000..7228f52 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_legal_actions_map.cc @@ -0,0 +1,78 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/get_legal_actions_map.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +// Do the tree traversal to fill the map. This function does a depth-first +// search of all the subtrees to fill the map for all the information states. +void FillMap(const State& state, + std::unordered_map>* map, + int depth_limit, int depth, Player player) { + if (state.IsTerminal()) { + return; + } + + if (depth_limit >= 0 && depth > depth_limit) { + return; + } + + if (state.IsChanceNode()) { + // Do nothing at chance nodes (no information states). + } else if (state.IsSimultaneousNode()) { + // Many players can play at this node. + for (auto p = Player{0}; p < state.NumPlayers(); ++p) { + if (player == kInvalidPlayer || p == player) { + std::string info_state = state.InformationStateString(p); + if (map->find(info_state) == map->end()) { + // Only add it if we don't already have it. + std::vector legal_actions = state.LegalActions(p); + (*map)[info_state] = legal_actions; + } + } + } + } else { + // Regular decision node. + if (player == kInvalidPlayer || state.CurrentPlayer() == player) { + std::string info_state = state.InformationStateString(); + if (map->find(info_state) == map->end()) { + // Only add it if we don't already have it. + std::vector legal_actions = state.LegalActions(); + (*map)[info_state] = legal_actions; + } + } + } + + // Recursively fill the map for each subtree below. + for (auto action : state.LegalActions()) { + std::unique_ptr next_state = state.Child(action); + FillMap(*next_state, map, depth_limit, depth + 1, player); + } +} + +} // namespace + +std::unordered_map> GetLegalActionsMap( + const Game& game, int depth_limit, Player player) { + std::unordered_map> map; + std::unique_ptr initial_state = game.NewInitialState(); + FillMap(*initial_state, &map, depth_limit, 0, player); + return map; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_legal_actions_map.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_legal_actions_map.h new file mode 100644 index 0000000..77420d7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_legal_actions_map.h @@ -0,0 +1,38 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_GET_LEGAL_ACTIONS_MAP_H_ +#define OPEN_SPIEL_ALGORITHMS_GET_LEGAL_ACTIONS_MAP_H_ + +#include +#include +#include + +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +// Gets a map of information state (string) to vector of legal actions, by doing +// (depth-limited) tree traversal through the game, for a specific player. To +// do a tree traversal over the entire game, use a negative depth limit. To +// bundle all the legal actions for all players in the same map, use +// kInvalidPlayer. +std::unordered_map> GetLegalActionsMap( + const Game& game, int depth_limit, Player player); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_GET_LEGAL_ACTIONS_MAP_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_legal_actions_map_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_legal_actions_map_test.cc new file mode 100644 index 0000000..9be876f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/get_legal_actions_map_test.cc @@ -0,0 +1,78 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/get_legal_actions_map.h" + +#include + +#include "open_spiel/games/goofspiel/goofspiel.h" +#include "open_spiel/games/kuhn_poker/kuhn_poker.h" +#include "open_spiel/games/leduc_poker/leduc_poker.h" +#include "open_spiel/spiel_utils.h" + +namespace algorithms = open_spiel::algorithms; +namespace kuhn_poker = open_spiel::kuhn_poker; +namespace leduc_poker = open_spiel::leduc_poker; + +using LegalActionsMap = + std::unordered_map>; + +namespace { +void KuhnTest() { + std::shared_ptr game = + open_spiel::LoadGame("kuhn_poker"); + + LegalActionsMap map_p0 = + algorithms::GetLegalActionsMap(*game, + /*depth_limit=*/-1, open_spiel::Player{0}); + SPIEL_CHECK_EQ(map_p0.size(), kuhn_poker::kNumInfoStatesP0); + + LegalActionsMap map_p1 = + algorithms::GetLegalActionsMap(*game, + /*depth_limit=*/-1, open_spiel::Player{1}); + SPIEL_CHECK_EQ(map_p1.size(), kuhn_poker::kNumInfoStatesP1); + + LegalActionsMap map_both = algorithms::GetLegalActionsMap( + *game, /*depth_limit=*/-1, open_spiel::kInvalidPlayer); + SPIEL_CHECK_EQ(map_both.size(), + kuhn_poker::kNumInfoStatesP0 + kuhn_poker::kNumInfoStatesP1); + // They should all have two legal actions: pass and bet. + for (const auto& legal_actions : map_both) { + SPIEL_CHECK_EQ(legal_actions.second.size(), 2); + } +} + +void LeducTest() { + std::shared_ptr game = + open_spiel::LoadGame("leduc_poker"); + LegalActionsMap map_both = algorithms::GetLegalActionsMap( + *game, /*depth_limit=*/-1, open_spiel::kInvalidPlayer); + SPIEL_CHECK_EQ(map_both.size(), leduc_poker::kNumInfoStates); +} + +void GoofspielTest() { + std::shared_ptr game = open_spiel::LoadGame( + "goofspiel", {{"num_cards", open_spiel::GameParameter(3)}}); + LegalActionsMap map_both = algorithms::GetLegalActionsMap( + *game, /*depth_limit=*/-1, open_spiel::kInvalidPlayer); + SPIEL_CHECK_GT(map_both.size(), 0); +} + +} // namespace + +int main(int argc, char** argv) { + KuhnTest(); + LeducTest(); + GoofspielTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/history_tree.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/history_tree.cc new file mode 100644 index 0000000..8be3562 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/history_tree.cc @@ -0,0 +1,241 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/history_tree.h" + +#include +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +std::unique_ptr RecursivelyBuildGameTree( + std::unique_ptr state, Player player_id, + absl::flat_hash_map* state_to_node) { + std::unique_ptr node( + new HistoryNode(player_id, std::move(state))); + if (state_to_node == nullptr) SpielFatalError("state_to_node is null."); + (*state_to_node)[node->GetHistory()] = node.get(); + State* state_ptr = node->GetState(); + switch (node->GetType()) { + case StateType::kMeanField: { + SpielFatalError("kMeanField not supported."); + } + case StateType::kChance: { + double probability_sum = 0; + for (const auto& [outcome, prob] : state_ptr->ChanceOutcomes()) { + std::unique_ptr child = state_ptr->Child(outcome); + if (child == nullptr) { + SpielFatalError("Can't add child; child is null."); + } + probability_sum += prob; + std::unique_ptr child_node = RecursivelyBuildGameTree( + std::move(child), player_id, state_to_node); + node->AddChild(outcome, {prob, std::move(child_node)}); + } + SPIEL_CHECK_FLOAT_EQ(probability_sum, 1.0); + break; + } + case StateType::kDecision: { + for (const auto& legal_action : state_ptr->LegalActions()) { + std::unique_ptr child = state_ptr->Child(legal_action); + + // Note: The probabilities here are meaningless if state.CurrentPlayer() + // != player_id, as we'll be getting the probabilities from the policy + // during the call to Value. For state.CurrentPlayer() == player_id, + // the probabilities are equal to 1. for every action as these are + // *counter-factual* probabilities, which ignore the probability of + // the player that we are playing as. + node->AddChild(legal_action, + {1., RecursivelyBuildGameTree( + std::move(child), player_id, state_to_node)}); + } + break; + } + case StateType::kTerminal: { + // As we assign terminal utilities to node.value in the constructor of + // HistoryNode, we don't have anything to do here. + break; + } + } + return node; +} + +} // namespace + +HistoryNode::HistoryNode(Player player_id, std::unique_ptr game_state) + : state_(std::move(game_state)), + history_(state_->HistoryString()), + type_(state_->GetType()) { + // Unless it's the opposing player's turn, we always view the game from the + // view of player player_id. + if (type_ == StateType::kDecision && state_->CurrentPlayer() != player_id) { + infostate_ = state_->InformationStateString(); + } else if (type_ == StateType::kChance) { + infostate_ = kChanceNodeInfostateString; + } else if (type_ == StateType::kTerminal) { + infostate_ = kTerminalNodeInfostateString; + } else { + infostate_ = state_->InformationStateString(player_id); + } + // Compute & store the legal actions so we can check that all actions we're + // adding are legal. + for (Action action : state_->LegalActions()) legal_actions_.insert(action); + if (type_ == StateType::kTerminal) value_ = state_->PlayerReturn(player_id); +} + +void HistoryNode::AddChild( + Action outcome, std::pair> child) { + if (!legal_actions_.count(outcome)) SpielFatalError("Child is not legal."); + if (child.second == nullptr) { + SpielFatalError("Error inserting child; child is null."); + } + SPIEL_CHECK_PROB_TOLERANCE(child.first, ProbabilityDefaultTolerance()); + child_info_[outcome] = std::move(child); + if (child_info_.size() > legal_actions_.size()) { + SpielFatalError("More children than legal actions."); + } +} + +std::pair HistoryNode::GetChild(Action outcome) { + auto it = child_info_.find(outcome); + if (it == child_info_.end()) { + SpielFatalError("Error getting child; action not found."); + } + // it->second.first is the probability associated with outcome, so as it is a + // probability, it must be in [0, 1]. + SPIEL_CHECK_PROB_TOLERANCE(it->second.first, ProbabilityDefaultTolerance()); + std::pair child = + std::make_pair(it->second.first, it->second.second.get()); + if (child.second == nullptr) { + SpielFatalError("Error getting child; child is null."); + } + return child; +} + +std::vector HistoryNode::GetChildActions() const { + std::vector actions; + actions.reserve(child_info_.size()); + for (const auto& [action, _] : child_info_) actions.push_back(action); + return actions; +} + +HistoryNode* HistoryTree::GetByHistory(const std::string& history) { + auto it = state_to_node_.find(history); + if (it == state_to_node_.end()) { + SpielFatalError(absl::StrCat("Node is null for history: '", history, "'")); + } + return it->second; +} + +std::vector HistoryTree::GetHistories() { + std::vector histories; + histories.reserve(state_to_node_.size()); + for (const auto& [history, _] : state_to_node_) histories.push_back(history); + return histories; +} + +// Builds game tree consisting of all decision nodes for player_id. +HistoryTree::HistoryTree(std::unique_ptr state, Player player_id) { + root_ = + RecursivelyBuildGameTree(std::move(state), player_id, &state_to_node_); +} + +ActionsAndProbs GetSuccessorsWithProbs(const State& state, + Player best_responder, + const Policy* policy) { + if (state.CurrentPlayer() == best_responder) { + ActionsAndProbs state_policy; + for (const auto& legal_action : state.LegalActions()) { + // Counterfactual reach probabilities exclude the player's + // actions, hence return probability 1.0 for every action. + state_policy.push_back({legal_action, 1.}); + } + return state_policy; + } else if (state.IsChanceNode()) { + return state.ChanceOutcomes(); + } else { + // Finally, we look at the policy we are finding a best response to, and + // get our probabilities from there. + auto state_policy = policy->GetStatePolicy(state); + if (state_policy.empty()) { + SpielFatalError(state.InformationStateString() + " not found in policy."); + } + return state_policy; + } +} + +// TODO(author1): If this is a bottleneck, it should be possible +// to pass the probabilities-so-far into the call, and get everything right +// the first time, without recursion. The recursion is simpler, however. +std::vector, double>> DecisionNodes( + const State& parent_state, Player best_responder, const Policy* policy) { + // If the state is terminal, then there are no more decisions to be made, + // so we're done. + if (parent_state.IsTerminal()) return {}; + + std::vector, double>> states_and_probs; + // We only consider states where the best_responder is making a decision. + if (parent_state.CurrentPlayer() == best_responder) { + states_and_probs.push_back({parent_state.Clone(), 1.}); + } + ActionsAndProbs actions_and_probs = + GetSuccessorsWithProbs(parent_state, best_responder, policy); + for (open_spiel::Action action : parent_state.LegalActions()) { + std::unique_ptr child = parent_state.Child(action); + + // We recurse here to get the correct probabilities for all children. + // This could probably be done in a cleaner, more performant way, but as + // this is only done once, at the start of the exploitability calculation, + // this is fine for now. + std::vector, double>> children = + DecisionNodes(*child, best_responder, policy); + const double policy_prob = GetProb(actions_and_probs, action); + SPIEL_CHECK_PROB_TOLERANCE(policy_prob, ProbabilityDefaultTolerance()); + for (auto& [state, prob] : children) { + states_and_probs.push_back( + {std::move(state), + // We weight the child probabilities by the probability of taking + // the action that would lead to them. + policy_prob * prob}); + } + } + return states_and_probs; +} + +absl::flat_hash_map>> +GetAllInfoSets(std::unique_ptr state, Player best_responder, + const Policy* policy, HistoryTree* tree) { + absl::flat_hash_map>> + infosets; + // We only need decision nodes, as there's no decision to be made at chance + // nodes (we randomly sample from the different outcomes there). + std::vector, double>> states_and_probs = + DecisionNodes(*state, best_responder, policy); + infosets.reserve(states_and_probs.size()); + for (const auto& [state, prob] : states_and_probs) { + // We look at each decision from the perspective of the best_responder. + std::string infostate = state->InformationStateString(best_responder); + infosets[infostate].push_back({tree->GetByHistory(*state), prob}); + } + return infosets; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/history_tree.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/history_tree.h new file mode 100644 index 0000000..c1ba0ad --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/history_tree.h @@ -0,0 +1,136 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_HISTORY_TREE_H_ +#define OPEN_SPIEL_ALGORITHMS_HISTORY_TREE_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/btree_map.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_set.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { + +// TODO(author1): See if it's possible to remove any fields here. +// Stores all information relevant to exploitability calculation for each +// history in the game. +class HistoryNode { + public: + // Use specific infostate strings for chance and terminal nodes so that we + // don't rely on the game implementations defining them at those states. + static constexpr const char* kChanceNodeInfostateString = "Chance Node"; + static constexpr const char* kTerminalNodeInfostateString = "Terminal node"; + + HistoryNode(Player player_id, std::unique_ptr game_state); + + State* GetState() { return state_.get(); } + + const std::string& GetInfoState() { return infostate_; } + + const std::string& GetHistory() { return history_; } + + const StateType& GetType() { return type_; } + + double GetValue() const { return value_; } + + Action NumChildren() const { return child_info_.size(); } + + void AddChild(Action outcome, + std::pair> child); + + std::vector GetChildActions() const; + + std::pair GetChild(Action outcome); + + private: + std::unique_ptr state_; + std::string infostate_; + std::string history_; + StateType type_; + double value_; + + // Map from legal actions to transition probabilities. Uses a map as we need + // to preserve the order of the actions. + absl::flat_hash_set legal_actions_; + absl::btree_map>> + child_info_; +}; + +// History here refers to the fact that we're using histories- i.e. +// representations of all players private information in addition to the public +// information- as the underlying abstraction. Other trees are possible, such as +// PublicTrees, which use public information as the base abstraction, and +// InformationStateTrees, which use all of the information available to one +// player as the base abstraction. +class HistoryTree { + public: + // Builds a tree of histories. player_id is needed here as we view all chance + // and terminal nodes from the viewpoint of player_id. Decision nodes are + // viewed from the perspective of the player making the decision. + HistoryTree(std::unique_ptr state, Player player_id); + + HistoryNode* Root() { return root_.get(); } + + HistoryNode* GetByHistory(const std::string& history); + HistoryNode* GetByHistory(const State& state) { + return GetByHistory(state.HistoryString()); + } + + // For test use only. + std::vector GetHistories(); + + Action NumHistories() { return state_to_node_.size(); } + + private: + std::unique_ptr root_; + + // Maps histories to HistoryNodes. + absl::flat_hash_map state_to_node_; +}; + +// Returns a map of infostate strings to a vector of history nodes with +// corresponding counter-factual probabilities, where counter-factual +// probabilities are calculatd using the passed policy for the opponent's +// actions, a probability of 1 for all of the best_responder's actions, and the +// natural chance probabilty for all change actions. We return all infosets +// (i.e. all sets of history nodes grouped by infostate) for the sub-game rooted +// at state, from the perspective of the player with id best_responder. +absl::flat_hash_map>> +GetAllInfoSets(std::unique_ptr state, Player best_responder, + const Policy* policy, HistoryTree* tree); + +// For a given state, returns all successor states with accompanying +// counter-factual probabilities. +ActionsAndProbs GetSuccessorsWithProbs(const State& state, + Player best_responder, + const Policy* policy); + +// Returns all decision nodes, with accompanying counter-factual probabilities, +// for the sub-game rooted at parent_state. +std::vector, double>> DecisionNodes( + const State& parent_state, Player best_responder, const Policy* policy); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_HISTORY_TREE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/history_tree_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/history_tree_test.cc new file mode 100644 index 0000000..2183ec7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/history_tree_test.cc @@ -0,0 +1,468 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/history_tree.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/node_hash_set.h" +#include "open_spiel/algorithms/minimax.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/goofspiel/goofspiel.h" +#include "open_spiel/games/kuhn_poker/kuhn_poker.h" +#include "open_spiel/games/leduc_poker/leduc_poker.h" +#include "open_spiel/games/liars_dice/liars_dice.h" +#include "open_spiel/games/tic_tac_toe/tic_tac_toe.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +void TestGameTree() { + std::vector game_names = {"leduc_poker", "kuhn_poker", + "liars_dice"}; + absl::flat_hash_map num_histories = { + // Not sure if these are correct. Chosen to make test pass. They seem to + // have the right order of magnitude. + {"kuhn_poker", 58}, + {"leduc_poker", 9457}, + {"liars_dice", 294883}}; + + for (const auto& game_name : game_names) { + std::shared_ptr game = LoadGame(game_name); + for (Player player_id : {Player{0}, Player{1}}) { + HistoryTree tree(game->NewInitialState(), player_id); + if (tree.NumHistories() != num_histories[game_name]) { + // TODO(b/126764761): Replace calls to SpielFatalError with more + // appropriate test macros once they support logging. + SpielFatalError(absl::StrCat( + "In the game ", game_name, + ", tree has wrong number of nodes: ", tree.NumHistories(), "but ", + num_histories[game_name], "nodes were expected.")); + } + + // Check that the root is not null. + if (tree.Root() == nullptr) { + SpielFatalError("Root of HistoryTree is null for game: " + game_name); + } + for (const std::string& history : tree.GetHistories()) { + HistoryNode* node = tree.GetByHistory(history); + if (node == nullptr) { + SpielFatalError(absl::StrCat("node is null for history: ", history, + " in game: ", game_name)); + } + if (node->GetState() == nullptr) { + SpielFatalError(absl::StrCat("state is null for history: ", history, + " in game: ", game_name)); + } + if (node->GetState()->HistoryString() != node->GetHistory()) { + SpielFatalError( + "history generated by state does not match history" + " stored in HistoryNode."); + } + if (history != node->GetHistory()) { + SpielFatalError( + "history key does not match history stored in " + "HistoryNode."); + } + if (node->GetType() != StateType::kTerminal) { + std::vector legal_actions = node->GetState()->LegalActions(); + std::vector child_actions = node->GetChildActions(); + if (legal_actions.size() != child_actions.size()) { + SpielFatalError(absl::StrCat( + "For state ", history, ", child actions has a different size (", + child_actions.size(), ") than legal actions (", + legal_actions.size(), ").")); + } + for (int i = 0; i < legal_actions.size(); ++i) { + if (legal_actions[i] != child_actions[i]) { + SpielFatalError(absl::StrCat( + "legal_actions[i] != child_actions[i]: ", legal_actions[i], + " != ", child_actions[i])); + } + } + } + + if (node->GetType() != StateType::kTerminal && + node->NumChildren() != node->GetState()->LegalActions().size()) { + SpielFatalError(absl::StrCat( + "number of child nodes does not match number of legal" + " actions in history: ", + history, " in game: ", game_name)); + } + if (node->GetType() == StateType::kDecision && + node->GetState()->CurrentPlayer() != player_id) { + if (node->GetInfoState() != + node->GetState()->InformationStateString()) { + SpielFatalError(absl::StrCat( + "infostate generated by state does not match ", + "infostate stored in HistoryNode for history: ", history, + "in game: ", game_name)); + } + } else if (node->GetType() == StateType::kChance) { + if (node->GetInfoState() != HistoryNode::kChanceNodeInfostateString) { + SpielFatalError(absl::StrCat( + "Chance node's infostate string not properly set for history: ", + history, " in game: ", game_name)); + } + } else if (node->GetType() == StateType::kTerminal) { + if (node->GetInfoState() != + HistoryNode::kTerminalNodeInfostateString) { + SpielFatalError(absl::StrCat( + "Chance node's infostate string not properly set for history: ", + history, " in game: ", game_name)); + } + } else { + if (node->GetInfoState() != + node->GetState()->InformationStateString(player_id)) { + SpielFatalError(absl::StrCat( + "infostate generated by state does not match ", + "infostate stored in HistoryNode for history: ", history, + "in game: ", game_name)); + } + } + } + } + } +} + +void TestInfoSetsHaveRightNumberOfGameStates() { + std::shared_ptr game = LoadGame("kuhn_poker"); + std::unique_ptr state = game->NewInitialState(); + TabularPolicy policy = GetUniformPolicy(*game); + auto best_responder = Player{0}; + HistoryTree tree(game->NewInitialState(), best_responder); + auto infosets = + GetAllInfoSets(game->NewInitialState(), best_responder, &policy, &tree); + for (const auto& kv : infosets) { + const std::string& infostate = kv.first; + const std::vector>& histories = kv.second; + int num_histories = histories.size(); + // The infostate represented by the empty string corresponds to the root + // infoset, which only has one history associated with it. + if (infostate.empty()) { + if (num_histories != 1) { + SpielFatalError( + absl::StrCat("Wrong number of histories in infoset at root;" + " expected 1, but found ", + num_histories)); + } + } else { + if (num_histories != 2) { + SpielFatalError( + absl::StrCat("Wrong number of histories in infoset at infostate ", + infostate, " expected 2, but found ", num_histories)); + } + } + } +} + +void TestGetAllInfoSetsMatchesInfoStates() { + std::shared_ptr game = LoadGame("kuhn_poker"); + std::unique_ptr state = game->NewInitialState(); + TabularPolicy policy = GetUniformPolicy(*game); + for (const auto& best_responder : {Player{0}, Player{1}}) { + HistoryTree tree(game->NewInitialState(), best_responder); + auto infosets = + GetAllInfoSets(game->NewInitialState(), best_responder, &policy, &tree); + for (const auto& kv : infosets) { + const std::string& infostate = kv.first; + for (const auto& state_and_prob : kv.second) { + HistoryNode* node = state_and_prob.first; + if (node == nullptr) SpielFatalError("Node is null."); + std::string node_infostate = node->GetInfoState(); + if (infostate != node_infostate) { + SpielFatalError( + absl::StrCat("infostate key (", infostate, ") does not match ", + "infostate stored in node (", node_infostate, ").")); + } + State* node_state = node->GetState(); + std::string state_infostate = + node_state->InformationStateString(best_responder); + if (node_infostate != state_infostate) { + SpielFatalError( + absl::StrCat("infostate stored in node (", node_infostate, ") ", + "does not match infostate calculated from state ", + "stored in node (", state_infostate, ").")); + } + if (node->GetType() == StateType::kDecision) { + if (node_state->CurrentPlayer() != best_responder) { + SpielFatalError( + absl::StrCat("CurrentPlayer for state stored in node (", + node_state->CurrentPlayer(), ") does not match ", + "best_responder (", best_responder, ").")); + } + } else if (node->GetType() == StateType::kDecision) { + if (node_state->CurrentPlayer() == best_responder) { + SpielFatalError(absl::StrCat( + "CurrentPlayer for state stored in node (", + node_state->CurrentPlayer(), ") matches best_responder (", + best_responder, ") but has type kDecision.")); + } + } + std::vector child_actions_vector = node->GetChildActions(); + absl::flat_hash_set child_actions(child_actions_vector.begin(), + child_actions_vector.end()); + std::vector legal_actions_vector = node_state->LegalActions(); + absl::node_hash_set legal_actions(legal_actions_vector.begin(), + legal_actions_vector.end()); + for (const auto& child_action : child_actions) { + if (legal_actions.count(child_action) == 0) { + SpielFatalError("Child action found that's not a legal action."); + } + } + for (const auto& legal_action : node_state->LegalActions()) { + if (child_actions.count(legal_action) == 0) { + SpielFatalError("Legal action found that's not a child action."); + } + std::unique_ptr child = node_state->Child(legal_action); + HistoryNode child_node = HistoryNode(Player{0}, std::move(child)); + if (node->GetType() != StateType::kChance) { + Player child_player = child_node.GetState()->CurrentPlayer(); + if (node_state->CurrentPlayer() == child_player) { + SpielFatalError(absl::StrCat( + "Child and parent have the same current player (", + child_player, ").")); + } + if (infostate == child_node.GetInfoState()) { + SpielFatalError( + absl::StrCat("Child and parent have the same infostate (", + infostate, ").")); + } + } + } + } + } + } +} + +void TestHistoryTreeIsSubsetOfGetAllInfoSets() { + std::shared_ptr game = LoadGame("kuhn_poker"); + std::unique_ptr state = game->NewInitialState(); + TabularPolicy policy = GetUniformPolicy(*game); + for (const auto& best_responder : {Player{0}, Player{1}}) { + HistoryTree tree(game->NewInitialState(), best_responder); + auto infosets = + GetAllInfoSets(game->NewInitialState(), best_responder, &policy, &tree); + for (const auto& history : tree.GetHistories()) { + HistoryNode* node = tree.GetByHistory(history); + if (node->GetState()->CurrentPlayer() == best_responder && + node->GetType() != StateType::kTerminal && + infosets.count(node->GetInfoState()) == 0) { + SpielFatalError(absl::StrCat("Infoset ", node->GetInfoState(), + " missing from GetAllInfoSets.")); + } + } + } +} + +// This is a common test that we want to make. We want to validate the +// counter-factual probabilities produced by this implementation against the +// golden values produced by existing implementations. +// best_responder is the player from who's view the infostate strings are +// calculated from, and represents the player for whom we are calculating a +// best response as. It can be any value in the range [0, game.NumPlayers()). +void CheckCounterFactualProbs( + const Game& game, const TabularPolicy& policy, + const absl::flat_hash_map& histories_and_probs, + Player best_responder) { + HistoryTree tree(game.NewInitialState(), best_responder); + + // Infosets maps infostate strings to a list of all histories that map to that + // same infostate, along with corresponding counter-factual reach + // probabilities. The counter-factual reach probability of a history is + // defined recursively: + // - At the root, the reach probability is 1. + // - At a chance node, you multiply the parent's reach probability by the + // probability of having that chance outcome. + // - At a decision node, if the current player is the one making the decision, + // you multiply the reach probability by 1. + // - If another player is making a decision, you multiply the parent's reach + // probability by the probability that player makes that decision (taken + // here from their policy). + // Infostate strings here are assumed to be those that are returned from + // open_spiel::State::InformationState(best_responder), which are + // equivalent to those returned by HistoryNode::GetInfoState. + absl::flat_hash_map>> + infosets = GetAllInfoSets(game.NewInitialState(), best_responder, &policy, + &tree); + + // We check this for every infoset in the game. + for (const auto& infoset : infosets) { + for (const auto& state_and_prob : infoset.second) { + HistoryNode* node = state_and_prob.first; + // We only check for nodes where the best responder is playing. This is + // because the counter-factual probability calculations assign a + // probability of 1. to all of the best responder's actions, so by + // checking the nodes where the best responder plays, we remove spurious + // failures (as the probability would be wrong at a different decision + // node iff the probability is wrong at a decision node where the best + // responder is playing). + if (node->GetState()->CurrentPlayer() != best_responder) continue; + double prob = state_and_prob.second; + auto it = histories_and_probs.find(node->GetHistory()); + if (it == histories_and_probs.end()) + SpielFatalError(absl::StrCat("Missing history: ", node->GetHistory())); + SPIEL_CHECK_FLOAT_EQ(prob, it->second); + } + } +} + +// Verifies that GetAllInfoSets returns the correct counter-factual +// probabilities when calculating a best-response as player 0 against the +// uniform policy. +void TestGetAllInfoSetsHasRightCounterFactualProbsUniformPolicyPid0() { + // These values come from running the existing implementation against the + // uniform policy. The existing implementation in + // open_spiel/python/algorithms/exploitability.py has been tested extensively + // against multiple reference implementations that have all been verified to + // produce the golden values referenced in the published, scientific + // literature. Do not change these values without an extremely good reason. + // These values are known to be correct. + absl::flat_hash_map histories_and_probs = { + {"0, 1", 0.166666667}, {"0, 1, 0, 1", 0.083333333}, + {"0, 2", 0.166666667}, {"0, 2, 0, 1", 0.083333333}, + {"1, 0", 0.166666667}, {"1, 0, 0, 1", 0.083333333}, + {"1, 2", 0.166666667}, {"1, 2, 0, 1", 0.083333333}, + {"2, 0", 0.166666667}, {"2, 0, 0, 1", 0.083333333}, + {"2, 1", 0.166666667}, {"2, 1, 0, 1", 0.083333333}}; + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = GetUniformPolicy(*game); + CheckCounterFactualProbs(*game, policy, histories_and_probs, + /*best_responder=*/Player{0}); +} + +// Verifies that GetAllInfoSets returns the correct counter-factual +// probabilities when calculating a best-response as player 1 against the +// uniform policy. +void TestGetAllInfoSetsHasRightCounterFactualProbsUniformPolicyPid1() { + // These values come from running the existing implementation against the + // uniform policy. + absl::flat_hash_map histories_and_probs = { + {"0, 1, 0", 0.083333333}, {"0, 1, 1", 0.083333333}, + {"0, 2, 0", 0.083333333}, {"0, 2, 1", 0.083333333}, + {"1, 0, 0", 0.083333333}, {"1, 0, 1", 0.083333333}, + {"1, 2, 0", 0.083333333}, {"1, 2, 1", 0.083333333}, + {"2, 0, 0", 0.083333333}, {"2, 0, 1", 0.083333333}, + {"2, 1, 0", 0.083333333}, {"2, 1, 1", 0.083333333}}; + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = GetUniformPolicy(*game); + CheckCounterFactualProbs(*game, policy, histories_and_probs, + /*best_responder=*/Player{1}); +} + +// Verifies that GetAllInfoSets returns the correct counter-factual +// probabilities when calculating a best-response as player 0 against the +// AlwaysFold policy. +void TestGetAllInfoSetsHasRightCounterFactualProbsAlwaysFoldPid0() { + // These values come from running the existing implementation against the + // AlwaysFold policy. + absl::flat_hash_map histories_and_probs = { + {"0, 1", 0.166666667}, {"0, 1, 0, 1", 0.000000000}, + {"0, 2", 0.166666667}, {"0, 2, 0, 1", 0.000000000}, + {"1, 0", 0.166666667}, {"1, 0, 0, 1", 0.000000000}, + {"1, 2", 0.166666667}, {"1, 2, 0, 1", 0.000000000}, + {"2, 0", 0.166666667}, {"2, 0, 0, 1", 0.000000000}, + {"2, 1", 0.166666667}, {"2, 1, 0, 1", 0.000000000}}; + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = GetFirstActionPolicy(*game); + CheckCounterFactualProbs(*game, policy, histories_and_probs, + /*best_responder=*/Player{0}); +} + +// Verifies that GetAllInfoSets returns the correct counter-factual +// probabilities when calculating a best-response as player 1 against the +// AlwaysFold policy. +void TestGetAllInfoSetsHasRightCounterFactualProbsAlwaysFoldPid1() { + // These values come from running the existing implementation against the + // AlwaysFold policy. + absl::flat_hash_map histories_and_probs = { + {"0, 1, 0", 0.166666667}, {"0, 1, 1", 0.000000000}, + {"0, 2, 0", 0.166666667}, {"0, 2, 1", 0.000000000}, + {"1, 0, 0", 0.166666667}, {"1, 0, 1", 0.000000000}, + {"1, 2, 0", 0.166666667}, {"1, 2, 1", 0.000000000}, + {"2, 0, 0", 0.166666667}, {"2, 0, 1", 0.000000000}, + {"2, 1, 0", 0.166666667}, {"2, 1, 1", 0.000000000}}; + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = GetFirstActionPolicy(*game); + CheckCounterFactualProbs(*game, policy, histories_and_probs, + /*best_responder=*/Player{1}); +} + + +// Verifies that GetAllInfoSets returns the correct counter-factual +// probabilities when calculating a best-response as player 0 against the +// optimal policy for Kuhn policy. +void TestGetAllInfoSetsHasRightCounterFactualProbsOptimalPid0() { + // These values come from running the existing implementation against the + // Optimal policy for Kuhn with alpha = 0.2. + absl::flat_hash_map histories_and_probs = { + {"0, 1", 0.166666667}, {"0, 1, 0, 1", 0.000000000}, + {"0, 2", 0.166666667}, {"0, 2, 0, 1", 0.166666667}, + {"1, 0", 0.166666667}, {"1, 0, 0, 1", 0.055555556}, + {"1, 2", 0.166666667}, {"1, 2, 0, 1", 0.166666667}, + {"2, 0", 0.166666667}, {"2, 0, 0, 1", 0.055555556}, + {"2, 1", 0.166666667}, {"2, 1, 0, 1", 0.000000000}}; + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = kuhn_poker::GetOptimalPolicy(/*alpha=*/0.2); + CheckCounterFactualProbs(*game, policy, histories_and_probs, + /*best_responder=*/Player{0}); +} + +// Verifies that GetAllInfoSets returns the correct counter-factual +// probabilities when calculating a best-response as player 1 against the +// optimal policy for Kuhn policy. +void TestGetAllInfoSetsHasRightCounterFactualProbsOptimalPid1() { + // These values come from running the existing implementation against the + // Optimal policy for Kuhn with alpha = 0.2. + absl::flat_hash_map histories_and_probs = { + {"0, 1, 0", 0.133333333}, {"0, 1, 1", 0.033333333}, + {"0, 2, 0", 0.133333333}, {"0, 2, 1", 0.033333333}, + {"1, 0, 0", 0.166666667}, {"1, 0, 1", 0.000000000}, + {"1, 2, 0", 0.166666667}, {"1, 2, 1", 0.000000000}, + {"2, 0, 0", 0.066666667}, {"2, 0, 1", 0.100000000}, + {"2, 1, 0", 0.066666667}, {"2, 1, 1", 0.100000000}}; + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = kuhn_poker::GetOptimalPolicy(/*alpha=*/0.2); + CheckCounterFactualProbs(*game, policy, histories_and_probs, + /*best_responder=*/Player{1}); +} + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::algorithms::TestGameTree(); + open_spiel::algorithms::TestInfoSetsHaveRightNumberOfGameStates(); + open_spiel::algorithms::TestGetAllInfoSetsMatchesInfoStates(); + open_spiel::algorithms::TestHistoryTreeIsSubsetOfGetAllInfoSets(); + open_spiel::algorithms:: + TestGetAllInfoSetsHasRightCounterFactualProbsUniformPolicyPid0(); + open_spiel::algorithms:: + TestGetAllInfoSetsHasRightCounterFactualProbsUniformPolicyPid1(); + open_spiel::algorithms:: + TestGetAllInfoSetsHasRightCounterFactualProbsAlwaysFoldPid0(); + open_spiel::algorithms:: + TestGetAllInfoSetsHasRightCounterFactualProbsAlwaysFoldPid1(); + open_spiel::algorithms:: + TestGetAllInfoSetsHasRightCounterFactualProbsOptimalPid0(); + open_spiel::algorithms:: + TestGetAllInfoSetsHasRightCounterFactualProbsOptimalPid1(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/infostate_tree.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/infostate_tree.cc new file mode 100644 index 0000000..61584e2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/infostate_tree.cc @@ -0,0 +1,727 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/infostate_tree.h" + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/action_view.h" + +namespace open_spiel { +namespace algorithms { + +using internal::kUndefinedNodeId; + +InfostateNode::InfostateNode(const InfostateTree& tree, InfostateNode* parent, + int incoming_index, InfostateNodeType type, + const std::string& infostate_string, + double terminal_utility, + double terminal_ch_reach_prob, size_t depth, + std::vector legal_actions, + std::vector terminal_history) + : tree_(tree), + parent_(parent), + incoming_index_(incoming_index), + type_(type), + infostate_string_(infostate_string), + terminal_utility_(terminal_utility), + terminal_chn_reach_prob_(terminal_ch_reach_prob), + depth_(depth), + legal_actions_(std::move(legal_actions)), + terminal_history_(std::move(terminal_history)) { + // Implications for kTerminalNode + SPIEL_DCHECK_TRUE(type_ != kTerminalInfostateNode || parent_); + // Implications for kDecisionNode + SPIEL_DCHECK_TRUE(type_ != kDecisionInfostateNode || parent_); + // Implications for kObservationNode + SPIEL_DCHECK_TRUE(!(type_ == kObservationInfostateNode && parent_ && + parent_->type() == kDecisionInfostateNode) || + (incoming_index_ >= 0 && + incoming_index_ < parent_->legal_actions().size())); +} + +InfostateNode* InfostateNode::AddChild(std::unique_ptr child) { + SPIEL_CHECK_EQ(child->parent_, this); + children_.push_back(std::move(child)); + return children_.back().get(); +} + +InfostateNode* InfostateNode::GetChild( + const std::string& infostate_string) const { + for (const std::unique_ptr& child : children_) { + if (child->infostate_string() == infostate_string) return child.get(); + } + return nullptr; +} + +std::ostream& InfostateNode::operator<<(std::ostream& os) const { + if (!parent_) return os << 'x'; + return os << parent_ << ',' << incoming_index_; +} + +std::string InfostateNode::MakeCertificate() const { + if (type_ == kTerminalInfostateNode) return "{}"; + + std::vector certificates; + for (InfostateNode* child : child_iterator()) { + certificates.push_back(child->MakeCertificate()); + } + std::sort(certificates.begin(), certificates.end()); + + std::string open, close; + if (type_ == kDecisionInfostateNode) { + open = "["; + close = "]"; + } else if (type_ == kObservationInfostateNode) { + open = "("; + close = ")"; + } + + return absl::StrCat( + open, absl::StrJoin(certificates.begin(), certificates.end(), ""), close); +} + +void InfostateNode::RebalanceSubtree(int target_depth, int current_depth) { + SPIEL_DCHECK_LE(current_depth, target_depth); + depth_ = current_depth; + + if (is_leaf_node() && target_depth != current_depth) { + // Prepare the chain of dummy observations. + depth_ = target_depth; + std::unique_ptr node = Release(); + InfostateNode* node_parent = node->parent(); + int position_in_leaf_parent = node->incoming_index(); + std::unique_ptr chain_head = + std::unique_ptr(new InfostateNode( + /*tree=*/tree_, /*parent=*/nullptr, + /*incoming_index=*/position_in_leaf_parent, + kObservationInfostateNode, + /*infostate_string=*/kFillerInfostate, + /*terminal_utility=*/NAN, /*terminal_ch_reach_prob=*/NAN, + current_depth, /*legal_actions=*/{}, /*terminal_history=*/{})); + InfostateNode* chain_tail = chain_head.get(); + for (int i = 1; i < target_depth - current_depth; ++i) { + chain_tail = + chain_tail->AddChild(std::unique_ptr(new InfostateNode( + /*tree=*/tree_, /*parent=*/chain_tail, + /*incoming_index=*/0, kObservationInfostateNode, + /*infostate_string=*/kFillerInfostate, + /*terminal_utility=*/NAN, /*terminal_ch_reach_prob=*/NAN, + current_depth + i, /*legal_actions=*/{}, + /*terminal_history=*/{}))); + } + chain_tail->children_.push_back(nullptr); + + // First put the node to the chain. If we did it in reverse order, + // i.e chain to parent and then node to the chain, the node would + // become freed. + auto* node_ptr = node.get(); + node_ptr->SwapParent(std::move(node), /*target=*/chain_tail, 0); + auto* chain_head_ptr = chain_head.get(); + chain_head_ptr->SwapParent(std::move(chain_head), /*target=*/node_parent, + position_in_leaf_parent); + } + + for (std::unique_ptr& child : children_) { + child->RebalanceSubtree(target_depth, current_depth + 1); + } +} + +std::unique_ptr InfostateNode::Release() { + SPIEL_DCHECK_TRUE(parent_); + SPIEL_DCHECK_TRUE(parent_->children_.at(incoming_index_).get() == this); + return std::move(parent_->children_.at(incoming_index_)); +} + +void InfostateNode::SwapParent(std::unique_ptr self, + InfostateNode* target, int at_index) { + // This node is still who it thinks it is :) + SPIEL_DCHECK_TRUE(self.get() == this); + target->children_.at(at_index) = std::move(self); + this->parent_ = target; + this->incoming_index_ = at_index; +} + +InfostateTree::InfostateTree(const std::vector& start_states, + const std::vector& chance_reach_probs, + std::shared_ptr infostate_observer, + Player acting_player, int max_move_ahead_limit) + : acting_player_(acting_player), + infostate_observer_(std::move(infostate_observer)), + root_(MakeRootNode()) { + SPIEL_CHECK_FALSE(start_states.empty()); + SPIEL_CHECK_EQ(start_states.size(), chance_reach_probs.size()); + SPIEL_CHECK_GE(acting_player_, 0); + SPIEL_CHECK_LT(acting_player_, start_states[0]->GetGame()->NumPlayers()); + SPIEL_CHECK_TRUE(infostate_observer_->HasString()); + + int start_max_move_number = 0; + for (const State* start_state : start_states) { + start_max_move_number = + std::max(start_max_move_number, start_state->MoveNumber()); + } + + for (int i = 0; i < start_states.size(); ++i) { + RecursivelyBuildTree(root_.get(), /*depth=*/1, *start_states[i], + start_max_move_number + max_move_ahead_limit, + chance_reach_probs[i]); + } + + // Operations to make after building the tree. + RebalanceTree(); + nodes_at_depths_.resize(tree_height() + 1); + CollectNodesAtDepth(mutable_root(), 0); + LabelNodesWithIds(); +} + +void InfostateTree::RebalanceTree() { + root_->RebalanceSubtree(tree_height(), 0); +} + +void InfostateTree::CollectNodesAtDepth(InfostateNode* node, size_t depth) { + nodes_at_depths_[depth].push_back(node); + for (InfostateNode* child : node->child_iterator()) + CollectNodesAtDepth(child, depth + 1); +} + +std::ostream& InfostateTree::operator<<(std::ostream& os) const { + return os << "Infostate tree for player " << acting_player_ << ".\n" + << "Tree height: " << tree_height_ << '\n' + << "Root branching: " << root_branching_factor() << '\n' + << "Number of decision infostate nodes: " << num_decisions() << '\n' + << "Number of sequences: " << num_sequences() << '\n' + << "Number of leaves: " << num_leaves() << '\n' + << "Tree certificate: " << '\n' + << root().MakeCertificate() << '\n'; +} + +std::unique_ptr InfostateTree::MakeNode( + InfostateNode* parent, InfostateNodeType type, + const std::string& infostate_string, double terminal_utility, + double terminal_ch_reach_prob, size_t depth, + const State* originating_state) { + auto legal_actions = + originating_state && originating_state->IsPlayerActing(acting_player_) + ? originating_state->LegalActions(acting_player_) + : std::vector(); + auto terminal_history = originating_state && originating_state->IsTerminal() + ? originating_state->History() + : std::vector(); + // Instantiate node using new to make sure that we can call + // the private constructor. + auto node = std::unique_ptr(new InfostateNode( + *this, parent, parent->num_children(), type, infostate_string, + terminal_utility, terminal_ch_reach_prob, depth, std::move(legal_actions), + std::move(terminal_history))); + return node; +} + +std::unique_ptr InfostateTree::MakeRootNode() const { + return std::unique_ptr(new InfostateNode( + /*tree=*/*this, /*parent=*/nullptr, /*incoming_index=*/0, + /*type=*/kObservationInfostateNode, + /*infostate_string=*/kDummyRootNodeInfostate, + /*terminal_utility=*/NAN, /*chance_reach_prob=*/NAN, + /*depth=*/0, /*legal_actions=*/{}, /*terminal_history=*/{})); +} + +void InfostateTree::UpdateLeafNode(InfostateNode* node, const State& state, + size_t leaf_depth, + double chance_reach_probs) { + tree_height_ = std::max(tree_height_, leaf_depth); + node->corresponding_states_.push_back(state.Clone()); + node->corresponding_ch_reaches_.push_back(chance_reach_probs); +} + +void InfostateTree::RecursivelyBuildTree(InfostateNode* parent, size_t depth, + const State& state, int move_limit, + double chance_reach_prob) { + if (state.IsTerminal()) + return BuildTerminalNode(parent, depth, state, chance_reach_prob); + else if (state.IsPlayerActing(acting_player_)) + return BuildDecisionNode(parent, depth, state, move_limit, + chance_reach_prob); + else + return BuildObservationNode(parent, depth, state, move_limit, + chance_reach_prob); +} + +void InfostateTree::BuildTerminalNode(InfostateNode* parent, size_t depth, + const State& state, + double chance_reach_prob) { + const double terminal_utility = state.Returns()[acting_player_]; + InfostateNode* terminal_node = parent->AddChild( + MakeNode(parent, kTerminalInfostateNode, + infostate_observer_->StringFrom(state, acting_player_), + terminal_utility, chance_reach_prob, depth, &state)); + UpdateLeafNode(terminal_node, state, depth, chance_reach_prob); +} + +void InfostateTree::BuildDecisionNode(InfostateNode* parent, size_t depth, + const State& state, int move_limit, + double chance_reach_prob) { + SPIEL_DCHECK_EQ(parent->type(), kObservationInfostateNode); + std::string info_state = + infostate_observer_->StringFrom(state, acting_player_); + InfostateNode* decision_node = parent->GetChild(info_state); + const bool is_leaf_node = state.MoveNumber() >= move_limit; + + if (decision_node) { + // The decision node has been already constructed along with children + // for each action: these are observation nodes. + // Fetches the observation child and goes deeper recursively. + SPIEL_DCHECK_EQ(decision_node->type(), kDecisionInfostateNode); + + if (is_leaf_node) { // Do not build deeper. + return UpdateLeafNode(decision_node, state, depth, chance_reach_prob); + } + + if (state.IsSimultaneousNode()) { + const ActionView action_view(state); + for (int i = 0; i < action_view.legal_actions[acting_player_].size(); + ++i) { + InfostateNode* observation_node = decision_node->child_at(i); + SPIEL_DCHECK_EQ(observation_node->type(), kObservationInfostateNode); + + for (Action flat_actions : + action_view.fixed_action(acting_player_, i)) { + std::unique_ptr child = state.Child(flat_actions); + RecursivelyBuildTree(observation_node, depth + 2, *child, move_limit, + chance_reach_prob); + } + } + } else { + std::vector legal_actions = state.LegalActions(acting_player_); + for (int i = 0; i < legal_actions.size(); ++i) { + InfostateNode* observation_node = decision_node->child_at(i); + SPIEL_DCHECK_EQ(observation_node->type(), kObservationInfostateNode); + std::unique_ptr child = state.Child(legal_actions.at(i)); + RecursivelyBuildTree(observation_node, depth + 2, *child, move_limit, + chance_reach_prob); + } + } + } else { // The decision node was not found yet. + decision_node = parent->AddChild(MakeNode( + parent, kDecisionInfostateNode, info_state, + /*terminal_utility=*/NAN, /*chance_reach_prob=*/NAN, depth, &state)); + + if (is_leaf_node) { // Do not build deeper. + return UpdateLeafNode(decision_node, state, depth, chance_reach_prob); + } + + // Build observation nodes right away after the decision node. + // This is because the player might be acting multiple times in a row: + // each time it might get some observations that branch the infostate + // tree. + + if (state.IsSimultaneousNode()) { + ActionView action_view(state); + for (int i = 0; i < action_view.legal_actions[acting_player_].size(); + ++i) { + // We build a dummy observation node. + // We can't ask for a proper infostate string or an originating state, + // because such a thing is not properly defined after only a partial + // application of actions for the sim move state + // (We need to supply all the actions). + InfostateNode* observation_node = decision_node->AddChild( + MakeNode(decision_node, kObservationInfostateNode, + /*infostate_string=*/kFillerInfostate, + /*terminal_utility=*/NAN, /*chance_reach_prob=*/NAN, depth, + /*originating_state=*/nullptr)); + + for (Action flat_actions : + action_view.fixed_action(acting_player_, i)) { + // Only now we can advance the state, when we have all actions. + std::unique_ptr child = state.Child(flat_actions); + RecursivelyBuildTree(observation_node, depth + 2, *child, move_limit, + chance_reach_prob); + } + } + } else { // Not a sim move node. + for (Action a : state.LegalActions()) { + std::unique_ptr child = state.Child(a); + InfostateNode* observation_node = decision_node->AddChild( + MakeNode(decision_node, kObservationInfostateNode, + infostate_observer_->StringFrom(*child, acting_player_), + /*terminal_utility=*/NAN, /*chance_reach_prob=*/NAN, depth, + child.get())); + RecursivelyBuildTree(observation_node, depth + 2, *child, move_limit, + chance_reach_prob); + } + } + } +} + +void InfostateTree::BuildObservationNode(InfostateNode* parent, size_t depth, + const State& state, int move_limit, + double chance_reach_prob) { + SPIEL_DCHECK_TRUE(state.IsChanceNode() || + !state.IsPlayerActing(acting_player_)); + const bool is_leaf_node = state.MoveNumber() >= move_limit; + const std::string info_state = + infostate_observer_->StringFrom(state, acting_player_); + + InfostateNode* observation_node = parent->GetChild(info_state); + if (!observation_node) { + observation_node = parent->AddChild(MakeNode( + parent, kObservationInfostateNode, info_state, + /*terminal_utility=*/NAN, /*chance_reach_prob=*/NAN, depth, &state)); + } + SPIEL_DCHECK_EQ(observation_node->type(), kObservationInfostateNode); + + if (is_leaf_node) { // Do not build deeper. + return UpdateLeafNode(observation_node, state, depth, chance_reach_prob); + } + + if (state.IsChanceNode()) { + for (std::pair action_prob : state.ChanceOutcomes()) { + std::unique_ptr child = state.Child(action_prob.first); + RecursivelyBuildTree(observation_node, depth + 1, *child, move_limit, + chance_reach_prob * action_prob.second); + } + } else { + for (Action a : state.LegalActions()) { + std::unique_ptr child = state.Child(a); + RecursivelyBuildTree(observation_node, depth + 1, *child, move_limit, + chance_reach_prob); + } + } +} +int InfostateTree::root_branching_factor() const { + return root_->num_children(); +} + +std::shared_ptr MakeInfostateTree(const Game& game, + Player acting_player, + int max_move_limit) { + // Uses new instead of make_shared, because shared_ptr is not a friend and + // can't call private constructors. + return std::shared_ptr(new InfostateTree( + {game.NewInitialState().get()}, /*chance_reach_probs=*/{1.}, + game.MakeObserver(kInfoStateObsType, {}), acting_player, max_move_limit)); +} + +std::shared_ptr MakeInfostateTree( + const std::vector& start_nodes, int max_move_ahead_limit) { + std::vector const_nodes(start_nodes.begin(), + start_nodes.end()); + return MakeInfostateTree(const_nodes, max_move_ahead_limit); +} + +std::shared_ptr MakeInfostateTree( + const std::vector& start_nodes, + int max_move_ahead_limit) { + SPIEL_CHECK_FALSE(start_nodes.empty()); + const InfostateNode* some_node = start_nodes[0]; + const InfostateTree& originating_tree = some_node->tree(); + SPIEL_DCHECK_TRUE([&]() { + for (const InfostateNode* node : start_nodes) { + if (!node) return false; + if (!node->is_leaf_node()) return false; + if (node->depth() != some_node->depth()) return false; + if (&node->tree() != &originating_tree) return false; + } + return true; + }()); + + // We reserve a larger number of states, as infostate nodes typically contain + // a large number of States. (8 is an arbitrary choice though). + std::vector start_states; + start_states.reserve(start_nodes.size() * 8); + std::vector chance_reach_probs; + chance_reach_probs.reserve(start_nodes.size() * 8); + + for (const InfostateNode* node : start_nodes) { + for (int i = 0; i < node->corresponding_states_size(); ++i) { + start_states.push_back(node->corresponding_states()[i].get()); + chance_reach_probs.push_back(node->corresponding_chance_reach_probs()[i]); + } + } + + // Uses new instead of make_shared, because shared_ptr is not a friend and + // can't call private constructors. + return std::shared_ptr(new InfostateTree( + start_states, chance_reach_probs, originating_tree.infostate_observer_, + originating_tree.acting_player_, max_move_ahead_limit)); +} + +std::shared_ptr MakeInfostateTree( + const std::vector& start_states, + const std::vector& chance_reach_probs, + std::shared_ptr infostate_observer, Player acting_player, + int max_move_ahead_limit) { + return std::shared_ptr( + new InfostateTree(start_states, chance_reach_probs, infostate_observer, + acting_player, max_move_ahead_limit)); +} +SequenceId InfostateTree::empty_sequence() const { + return root().sequence_id(); +} +absl::optional InfostateTree::DecisionIdForSequence( + const SequenceId& sequence_id) const { + SPIEL_DCHECK_TRUE(sequence_id.BelongsToTree(this)); + InfostateNode* node = sequences_.at(sequence_id.id()); + SPIEL_DCHECK_TRUE(node); + if (node->is_root_node()) { + return {}; + } else { + return node->parent_->decision_id(); + } +} +absl::optional InfostateTree::DecisionForSequence( + const SequenceId& sequence_id) { + SPIEL_DCHECK_TRUE(sequence_id.BelongsToTree(this)); + InfostateNode* node = sequences_.at(sequence_id.id()); + SPIEL_DCHECK_TRUE(node); + if (node->is_root_node()) { + return {}; + } else { + return node->parent_; + } +} +bool InfostateTree::IsLeafSequence(const SequenceId& sequence_id) const { + SPIEL_DCHECK_TRUE(sequence_id.BelongsToTree(this)); + InfostateNode* node = sequences_.at(sequence_id.id()); + SPIEL_DCHECK_TRUE(node); + return node->start_sequence_id() == node->end_sequence_id(); +} +std::vector InfostateTree::DecisionIdsWithParentSeq( + const SequenceId& sequence_id) const { + std::vector out; + const InfostateNode* observation_node = sequences_.at(sequence_id.id()); + std::stack open_set; + for (const InfostateNode* child : observation_node->child_iterator()) { + open_set.push(child); + } + while (!open_set.empty()) { + const InfostateNode* node = open_set.top(); + open_set.pop(); + if (node->type() == kDecisionInfostateNode && + node->sequence_id() == sequence_id) { + out.push_back(node->decision_id()); + } else { + for (const InfostateNode* child : node->child_iterator()) { + open_set.push(child); + } + } + } + return out; +} + +void InfostateTree::LabelNodesWithIds() { + // Idea of labeling: label the leaf sequences first, and continue up the tree. + size_t sequence_index = 0; + size_t decision_index = 0; + + // Do not label leaf nodes with sequences. + const int start_depth = nodes_at_depths_.size() - 2; + + for (int depth = start_depth; depth >= 0; --depth) { + for (InfostateNode* node : nodes_at_depths_[depth]) { + if (node->type() != kDecisionInfostateNode) continue; + decision_infostates_.push_back(node); + node->decision_id_ = DecisionId(decision_index++, this); + + for (InfostateNode* child : node->child_iterator()) { + sequences_.push_back(child); + child->sequence_id_ = SequenceId(sequence_index++, this); + } + // We could use sequence_index to set start and end sequences for + // the decision infostate right away here, however we'd like to make + // sure to label correctly all nodes in the tree. + } + } + // Finally label the last sequence (an empty sequence) in the root node. + sequences_.push_back(mutable_root()); + mutable_root()->sequence_id_ = SequenceId(sequence_index, this); + + CollectStartEndSequenceIds(mutable_root(), mutable_root()->sequence_id()); +} + +// Make a recursive call to assign the parent's sequences appropriately. +// Collect pairs of (start, end) sequence ids from children and propagate +// them up the tree. In case that deep nodes (close to the leaves) do not +// have any child decision nodes, set the (start, end) to the parent sequence. +// In this way the range iterator will be empty (start==end) and well defined. +std::pair InfostateTree::CollectStartEndSequenceIds( + InfostateNode* node, const SequenceId parent_sequence) { + size_t min_index = kUndefinedNodeId; // This is a large number. + size_t max_index = 0; + const SequenceId propagate_sequence_id = + node->sequence_id_.is_undefined() + ? parent_sequence + : node->sequence_id(); // This becomes the parent for next nodes. + + for (InfostateNode* child : node->child_iterator()) { + auto [min_child, max_child] = + CollectStartEndSequenceIds(child, propagate_sequence_id); + min_index = std::min(min_child, min_index); + max_index = std::max(max_child, max_index); + } + + if (min_index != kUndefinedNodeId) { + SPIEL_CHECK_LE(min_index, max_index); + node->start_sequence_id_ = SequenceId(min_index, this); + node->end_sequence_id_ = SequenceId(max_index + 1, this); + } else { + node->start_sequence_id_ = propagate_sequence_id; + node->end_sequence_id_ = propagate_sequence_id; + } + + if (node->sequence_id_.is_undefined()) { + // Propagate children limits. + node->sequence_id_ = parent_sequence; + return {min_index, max_index}; + } else { + // We have hit a defined sequence id, propagate it up. + return {node->sequence_id_.id(), node->sequence_id_.id()}; + } +} + +std::pair InfostateTree::BestResponse( + TreeplexVector&& gradient) const { + SPIEL_CHECK_EQ(this, gradient.tree()); + SPIEL_CHECK_EQ(num_sequences(), gradient.size()); + SfStrategy response(this); + + // 1. Compute counterfactual best response + // (i.e. in all infostates, even unreachable ones) + SequenceId current(0, this); + const double init_value = -std::numeric_limits::infinity(); + while (current.id() <= empty_sequence().id()) { + double max_value = init_value; + SequenceId max_id = current; + const InfostateNode* node = observation_infostate(current); + for (current = node->start_sequence_id(); + current != node->end_sequence_id(); current.next()) { + if (gradient[current] > max_value) { + max_value = gradient[current]; + max_id = current; + } + } + if (init_value != max_value) { + gradient[node->sequence_id()] += max_value; + response[max_id] = 1.; + } + current.next(); + } + SPIEL_CHECK_EQ(current.id(), empty_sequence().id() + 1); + + // 2. Prune away unreachable subtrees. + // + // This can be done with a more costly recursion. + // Instead we make a more cache-friendly double pass through the response + // vector: we increment the visited path by 1, resulting in a value of 2. + // Then we zero-out all values but 2. + current = empty_sequence(); + response[current] = 2.; + while (!IsLeafSequence(current)) { + for (SequenceId seq : observation_infostate(current)->AllSequenceIds()) { + if (response[seq] == 1.) { + current = seq; + response[seq] += 1.; + break; + } + } + } + for (SequenceId seq : response.range()) { + response[seq] = response[seq] == 2. ? 1. : 0.; + } + SPIEL_DCHECK_TRUE(IsValidSfStrategy(response)); + return {gradient[empty_sequence()], response}; +} + +double InfostateTree::BestResponseValue(LeafVector&& gradient) const { + // Loop over all heights. + for (int d = tree_height_ - 1; d >= 0; d--) { + int left_offset = 0; + // Loop over all parents of current nodes. + for (int parent_idx = 0; parent_idx < nodes_at_depths_[d].size(); + parent_idx++) { + const InfostateNode* node = nodes_at_depths_[d][parent_idx]; + const int num_children = node->num_children(); + const Range children_range = + gradient.range(left_offset, left_offset + num_children); + const LeafId parent_id(parent_idx, this); + + if (node->type() == kDecisionInfostateNode) { + double max_value = std::numeric_limits::min(); + for (LeafId id : children_range) { + max_value = std::fmax(max_value, gradient[id]); + } + gradient[parent_id] = max_value; + } else { + SPIEL_DCHECK_EQ(node->type(), kObservationInfostateNode); + double sum_value = 0.; + for (LeafId id : children_range) { + sum_value += gradient[id]; + } + gradient[parent_id] = sum_value; + } + left_offset += num_children; + } + // Check that we passed over all of the children. + SPIEL_DCHECK_EQ(left_offset, nodes_at_depths_[d + 1].size()); + } + const LeafId root_id(0, this); + return gradient[root_id]; +} + +DecisionId InfostateTree::DecisionIdFromInfostateString( + const std::string& infostate_string) const { + for (InfostateNode* node : decision_infostates_) { + if (node->infostate_string() == infostate_string) + return node->decision_id(); + } + return kUndefinedDecisionId; +} + +bool CheckSum(const SfStrategy& strategy, SequenceId id, double expected_sum) { + if (fabs(strategy[id] - expected_sum) > 1e-13) { + return false; + } + + const InfostateTree* tree = strategy.tree(); + if (tree->IsLeafSequence(id)) { + return true; + } + + double actual_sum = 0.; + const InfostateNode* node = tree->observation_infostate(id); + for (SequenceId sub_seq : node->AllSequenceIds()) { + actual_sum += strategy[sub_seq]; + } + if (fabs(actual_sum - expected_sum) > 1e-13) { + return false; + } + + for (SequenceId sub_seq : node->AllSequenceIds()) { + if (!CheckSum(strategy, sub_seq, strategy[sub_seq])) { + return false; + } + } + return true; +} + +bool IsValidSfStrategy(const SfStrategy& strategy) { + return CheckSum(strategy, strategy.tree()->empty_sequence(), 1.); +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/infostate_tree.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/infostate_tree.h new file mode 100644 index 0000000..2f02dd1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/infostate_tree.h @@ -0,0 +1,723 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_INFOSTATE_TREE_H_ +#define OPEN_SPIEL_ALGORITHMS_INFOSTATE_TREE_H_ + +#include +#include +#include + +#include "open_spiel/spiel.h" + +// This file contains data structures used in imperfect information games. +// Specifically, we implement an infostate tree, a representation of a game +// from the perspective of an acting player. +// +// The information-state tree [1] contains information states, which describe +// where the player is a) acting, b) getting observations, or c) receiving +// terminal utilities (when the game ends). See `InfostateNodeType` for more +// details. +// +// The tree can be constructed with a depth limit, so we make a distinction +// between leaf nodes and non-leaf nodes. All terminal nodes are leaf nodes. +// +// The identification of infostates is based on strings from an information +// state observer, i.e. one that is constructed using `kInfoStateObsType`. +// +// As algorithms typically need to store information associated to specific +// nodes of the tree, we provide following indexing mechanisms (see the classes +// below for more details): +// +// - `DecisionId` refers to a decision infostate where the player acts. +// - `SequenceId` refers to an observation infostate that follows the decision +// infostate after some action. +// - `LeafId` refers to an infostate node which is a leaf. +// +// All of these ids are very cheap (they are just typed `size_t`s). +// They can be used to get a pointer to the corresponding infostate node. +// +// To enable some algorithmic optimizations we construct the trees "balanced". +// We call a _balanced_ tree one which has all leaf nodes at the same depth. +// To make the tree balanced, we may need to pad "dummy" observation nodes as +// prefixes for the (previously too shallow) leafs. This is not too expensive, +// as most games are balanced by default due to game rules. +// +// [1]: Rethinking Formal Models of Partially Observable Multiagent Decision +// Making https://arxiv.org/abs/1906.11110 + +namespace open_spiel { +namespace algorithms { + +// To categorize infostate nodes we use nomenclature from [2]: +// +// - In _decision nodes_, the acting player selects actions. +// - In _observation nodes_ the acting player receives observations. +// They can correspond to State that is a chance node, or opponent's node. +// Importantly, they can correspond also to the acting player's node, +// as the player may have discovered something as a result of its action +// in the previous decision node. (This is especially important for the tree +// construction in simultaneous-move games). +// - Additionally, we introduce _terminal nodes_, which correspond to a single +// terminal history. +// +// The terminal nodes store player's utility as well as cumulative chance reach +// probability. +// +// [2]: Faster Game Solving via Predictive Blackwell Approachability: +// Connecting Regret Matching and Mirror Descent +// https://arxiv.org/abs/2007.14358 +enum InfostateNodeType { + kDecisionInfostateNode, + kObservationInfostateNode, + kTerminalInfostateNode +}; + +// Representing the game via infostates leads actually to a graph structure +// of a forest (a collection of trees), as the player may be acting for the +// first time in distinct situations. We trivially make it into a proper tree +// by introducing a "dummy" root node, which we set as an observation node. +// It can be interpreted as "the player observes the start of the game". +// This node also corresponds to the empty sequence. +// Following is the infostate string for this node. +constexpr const char* kDummyRootNodeInfostate = "(root)"; + +// Sometimes we need to create infostate nodes that do not have a corresponding +// game State, and therefore we cannot retrieve their string representations. +// This happens in simultaneous move games or if we rebalance game trees. +constexpr const char* kFillerInfostate = "(fill)"; + +// Forward declaration. +class InfostateTree; + +namespace internal { + +// An implementation detail - Not to be used directly. +// +// We use various indexing schemes (SequenceId, DecisionId, LeafId) to access +// specific nodes in the tree. Not all nodes can have an Id defined, for example +// a DecisionId is not defined for decision nodes. In this case they will +// default to the following value. +constexpr size_t kUndefinedNodeId = -1; // This is a large number. + +// An implementation detail - Not to be used directly. +// +// Create an indexing of specific infostate nodes. +// +// In release-mode the implementation is as cheap as the underlying size_t +// identifier. Therefore it is preferable to pass the Ids by copy and not +// by pointers / references. +// Most importantly in debug-mode we add checks to make sure that we are using +// the ids on appropriate trees and we do not try to index any opponents' trees. +// +// We use CRTP as it allows us to reuse the implementation for derived classes. +template +class NodeId { + size_t identifier_ = kUndefinedNodeId; +#ifndef NDEBUG // Allow additional automatic debug-time checks. + const InfostateTree* tree_ = nullptr; + + public: + NodeId(size_t id_value, const InfostateTree* tree_ptr) + : identifier_(id_value), tree_(tree_ptr) {} + NodeId& operator=(Self&& rhs) { + SPIEL_CHECK_TRUE( + tree_ == rhs.tree_ || + // The NodeId may be uninitialized, so allow to copy the rhs tree. + tree_ == nullptr && rhs.tree_ != nullptr); + identifier_ = rhs.id(); + tree_ == rhs.tree_; + return *this; + } + bool operator==(const Self& rhs) const { + SPIEL_CHECK_EQ(tree_, rhs.tree_); + return id() == rhs.id(); + } + bool operator!=(const Self& rhs) const { + SPIEL_CHECK_EQ(tree_, rhs.tree_); + return id() != rhs.id(); + } + bool BelongsToTree(const InfostateTree* other) const { + return tree_ == other; + } +#else + + public: + // Do not save the tree pointer, but expose the same interface + // so it's easy to use. + NodeId(size_t id_value, const InfostateTree*) : identifier_(id_value) {} + Self& operator=(Self&& rhs) { + identifier_ = rhs.id(); + return this; + } + bool operator==(const Self& rhs) const { return id() == rhs.id(); } + bool operator!=(const Self& rhs) const { return id() != rhs.id(); } + // BelongsToTree is not implemented on purpose: + // It must not be called in release mode -- used only by DCHECK statements. +#endif + constexpr NodeId() {} + size_t id() const { + SPIEL_CHECK_NE(identifier_, kUndefinedNodeId); + return identifier_; + } + bool is_undefined() const { return identifier_ == kUndefinedNodeId; } + void next() { + SPIEL_CHECK_NE(identifier_, kUndefinedNodeId); + ++identifier_; + } +}; + +} // namespace internal + +// `SequenceId` refers to an observation infostate that follows the decision +// infostate after following some action. It indexes the decision space of +// an agent, and its strategy can formulated in terms of values associated with +// the agent's sequences. See `TreeplexVector` for more details. +// The smallest sequence ids correspond to the deepest nodes and the highest +// value corresponds to the empty sequence. +class SequenceId final : public internal::NodeId { + using NodeId::NodeId; +}; +// When the tree is still under construction and a node doesn't +// have a final sequence id assigned yet, we use this value. +constexpr SequenceId kUndefinedSequenceId = SequenceId(); + +// `DecisionId` refers to an infostate node where the player acts, +// i.e. an infostate node with the type `kDecisionInfostateNode`. +class DecisionId final : public internal::NodeId { + using NodeId::NodeId; +}; +// When a node isn't a decision infostate, we use this value instead. +constexpr DecisionId kUndefinedDecisionId = DecisionId(); + +// `LeafId` refers to an infostate node which is a leaf. Note that this can be +// an arbitrary infostate node type. A kTerminalInfostateNode is always +// a leaf node. +// Note that leaf decision nodes do not have assigned any `DecisionId`, and +// similarly leaf observation nodes do not have assigned any `SequenceId`. +class LeafId final : public internal::NodeId { + using internal::NodeId::NodeId; +}; +// When a node isn't a leaf, we use this value instead. +constexpr LeafId kUndefinedLeafId = LeafId(); + +// Each of the Ids can be used to index an appropriate vector. +// See below for an implementation. +template +class TreeplexVector; +template +class LeafVector; +template +class DecisionVector; +using SfStrategy = TreeplexVector; + +// A convenience iterator over a contiguous range of node ids. +template +class RangeIterator { + size_t id_; + const InfostateTree* tree_; + + public: + RangeIterator(size_t id, const InfostateTree* tree) : id_(id), tree_(tree) {} + RangeIterator& operator++() { + ++id_; + return *this; + } + bool operator!=(const RangeIterator& other) const { + return id_ != other.id_ || tree_ != other.tree_; + } + Id operator*() { return Id(id_, tree_); } +}; +template +class Range { + const size_t start_; + const size_t end_; + const InfostateTree* tree_; + + public: + Range(size_t start, size_t end, const InfostateTree* tree) + : start_(start), end_(end), tree_(tree) { + SPIEL_CHECK_LE(start_, end_); + } + RangeIterator begin() const { return RangeIterator(start_, tree_); } + RangeIterator end() const { return RangeIterator(end_, tree_); } +}; + +// Forward declaration. +class InfostateNode; + +// Creates an infostate tree for a player based on the initial state +// of the game, up to some move limit. +std::shared_ptr MakeInfostateTree(const Game& game, + Player acting_player, + int max_move_limit = 1000); + +// Creates an infostate tree for a player based on some start states, +// up to some move limit from the deepest start state. +std::shared_ptr MakeInfostateTree( + const std::vector& start_states, + const std::vector& chance_reach_probs, + std::shared_ptr infostate_observer, Player acting_player, + int max_move_ahead_limit = 1000); + +// Creates an infostate tree based on some leaf infostate nodes coming from +// another infostate tree, up to some move limit. +// This is useful for easily constructing (depth-limited) tree continuations. +std::shared_ptr MakeInfostateTree( + const std::vector& start_nodes, + int max_move_ahead_limit = 1000); + +// C++17 does not allow implicit conversion of non-const pointers to const +// pointers within a vector - explanation: https://stackoverflow.com/a/2102415 +// This just adds const to the pointers and calls the other MakeInfostateTree. +std::shared_ptr MakeInfostateTree( + const std::vector& start_nodes, + int max_move_ahead_limit = 1000); + +class InfostateTree final { + // Note that only MakeInfostateTree is allowed to call the constructor + // to ensure the trees are always allocated on heap. We do this so that all + // the collected pointers are valid throughout the tree's lifetime even if + // they are moved around. + private: + InfostateTree(const std::vector& start_states, + const std::vector& chance_reach_probs, + std::shared_ptr infostate_observer, + Player acting_player, int max_move_ahead_limit); + // Friend factories. + friend std::shared_ptr MakeInfostateTree(const Game&, Player, + int); + friend std::shared_ptr MakeInfostateTree( + const std::vector&, const std::vector&, + std::shared_ptr, Player, int); + friend std::shared_ptr MakeInfostateTree( + const std::vector&, int); + + public: + // -- Root accessors --------------------------------------------------------- + const InfostateNode& root() const { return *root_; } + InfostateNode* mutable_root() { return root_.get(); } + int root_branching_factor() const; + + // -- Tree information ------------------------------------------------------- + Player acting_player() const { return acting_player_; } + // Zero-based height. + // (the height of a tree that contains only root node is zero.) + size_t tree_height() const { return tree_height_; } + + // -- General statistics ----------------------------------------------------- + size_t num_decisions() const { return decision_infostates_.size(); } + size_t num_sequences() const { return sequences_.size(); } + size_t num_leaves() const { return nodes_at_depths_.back().size(); } + // A function overload used for TreeVector templates. + size_t num_ids(DecisionId) const { return num_decisions(); } + size_t num_ids(SequenceId) const { return num_sequences(); } + size_t num_ids(LeafId) const { return num_leaves(); } + + // -- Sequence operations ---------------------------------------------------- + SequenceId empty_sequence() const; + InfostateNode* observation_infostate(const SequenceId& sequence_id) { + SPIEL_DCHECK_TRUE(sequence_id.BelongsToTree(this)); + return sequences_.at(sequence_id.id()); + } + const InfostateNode* observation_infostate( + const SequenceId& sequence_id) const { + SPIEL_DCHECK_TRUE(sequence_id.BelongsToTree(this)); + return sequences_.at(sequence_id.id()); + } + Range AllSequenceIds() const { + return Range(0, sequences_.size(), this); + } + // Returns all DecisionIds which can be found in a subtree of given sequence. + std::vector DecisionIdsWithParentSeq(const SequenceId&) const; + // Returns `None` if the sequence is the empty sequence. + absl::optional DecisionIdForSequence(const SequenceId&) const; + // Returns `None` if the sequence is the empty sequence. + absl::optional DecisionForSequence(const SequenceId&); + // Returns whether the sequence ends with the last action the player can make. + bool IsLeafSequence(const SequenceId&) const; + + // -- Decision operations ---------------------------------------------------- + InfostateNode* decision_infostate(const DecisionId& decision_id) { + SPIEL_DCHECK_TRUE(decision_id.BelongsToTree(this)); + return decision_infostates_.at(decision_id.id()); + } + const InfostateNode* decision_infostate(const DecisionId& decision_id) const { + SPIEL_DCHECK_TRUE(decision_id.BelongsToTree(this)); + return decision_infostates_.at(decision_id.id()); + } + const std::vector& AllDecisionInfostates() const { + return decision_infostates_; + } + Range AllDecisionIds() const { + return Range(0, decision_infostates_.size(), this); + } + DecisionId DecisionIdFromInfostateString( + const std::string& infostate_string) const; + + // -- Leaf operations -------------------------------------------------------- + const std::vector& leaf_nodes() const { + return nodes_at_depths_.back(); + } + InfostateNode* leaf_node(const LeafId& leaf_id) const { + SPIEL_DCHECK_TRUE(leaf_id.BelongsToTree(this)); + return leaf_nodes().at(leaf_id.id()); + } + const std::vector>& nodes_at_depths() const { + return nodes_at_depths_; + } + const std::vector& nodes_at_depth(size_t depth) const { + return nodes_at_depths_.at(depth); + } + + // -- Tree operations -------------------------------------------------------- + // Compute best response and value based on gradient from opponents. + // This consumes the gradient vector, as it is used to compute the value. + std::pair BestResponse( + TreeplexVector&& gradient) const; + // Compute best response value based on gradient from opponents over leaves. + // This consumes the gradient vector, as it is used to compute the value. + double BestResponseValue(LeafVector&& gradient) const; + + // -- For debugging ---------------------------------------------------------- + std::ostream& operator<<(std::ostream& os) const; + + private: + const Player acting_player_; + const std::shared_ptr infostate_observer_; + const std::unique_ptr root_; + /*const*/ size_t tree_height_ = 0; + + // Tree structure collections that index the respective NodeIds. + std::vector decision_infostates_; + std::vector sequences_; + // The last vector corresponds to the leaf nodes. + std::vector> nodes_at_depths_; + + // Utility functions whenever we create a new node for the tree. + std::unique_ptr MakeNode(InfostateNode* parent, + InfostateNodeType type, + const std::string& infostate_string, + double terminal_utility, + double terminal_ch_reach_prob, + size_t depth, + const State* originating_state); + std::unique_ptr MakeRootNode() const; + + // Makes sure that all tree leaves are at the same height. + // It inserts a linked list of dummy observation nodes with appropriate length + // to balance all the leaves. + void RebalanceTree(); + + void UpdateLeafNode(InfostateNode* node, const State& state, + size_t leaf_depth, double chance_reach_probs); + + // Build the tree. + void RecursivelyBuildTree(InfostateNode* parent, size_t depth, + const State& state, int move_limit, + double chance_reach_prob); + void BuildTerminalNode(InfostateNode* parent, size_t depth, + const State& state, double chance_reach_prob); + void BuildDecisionNode(InfostateNode* parent, size_t depth, + const State& state, int move_limit, + double chance_reach_prob); + void BuildObservationNode(InfostateNode* parent, size_t depth, + const State& state, int move_limit, + double chance_reach_prob); + + void CollectNodesAtDepth(InfostateNode* node, size_t depth); + void LabelNodesWithIds(); + std::pair CollectStartEndSequenceIds( + InfostateNode* node, const SequenceId parent_sequence); +}; + +// Iterate over a vector of unique pointers, but expose only the raw pointers. +template +class VecWithUniquePtrsIterator { + int pos_; + const std::vector>& vec_; + + public: + explicit VecWithUniquePtrsIterator(const std::vector>& vec, + int pos = 0) + : pos_(pos), vec_(vec) {} + VecWithUniquePtrsIterator& operator++() { + pos_++; + return *this; + } + bool operator==(VecWithUniquePtrsIterator other) const { + return pos_ == other.pos_; + } + bool operator!=(VecWithUniquePtrsIterator other) const { + return !(*this == other); + } + T* operator*() { return vec_[pos_].get(); } + VecWithUniquePtrsIterator begin() const { return *this; } + VecWithUniquePtrsIterator end() const { + return VecWithUniquePtrsIterator(vec_, vec_.size()); + } +}; + +class InfostateNode final { + // Note that all of the following members are const or they should be const. + // However we can't make all of them const during the node construction + // because they might be computed only after the whole tree is built. + private: + // Reference to the tree this node belongs to. This reference has a valid + // lifetime, as it is allocated once on the heap and never moved. + const InfostateTree& tree_; + // Pointer to the parent node. Null for the root node. + // This is not const so that we can change it when we rebalance the tree. + /*const*/ InfostateNode* parent_; + // Position of this node in the parent's children, i.e. it holds that + // parent_->children_.at(incoming_index_).get() == this. + // + // For decision nodes this corresponds also to the + // State::LegalActions(player_).at(incoming_index_) + // + // This is not const so that we can change it when we rebalance the tree. + /*const*/ int incoming_index_; + // Type of the node. + const InfostateNodeType type_; + // Identifier of the infostate. + const std::string infostate_string_; + // Decision identifier of this node. + // This is not const as the ids are assigned after the tree is built. + /*const*/ DecisionId decision_id_ = kUndefinedDecisionId; + // Sequence identifier of this node. + // The first is the parent sequence of the infostate, while the last + // two sequence IDs represent the sequence id of the first and last action + 1 + // at the infostate node. Because sequences assigned to an infostate + // are contiguous, we don't need to store all intermediate sequence IDs. + // We can thus use a Range iterable to make looping frictionless. + // This is not const as the ids can be assigned only after the tree is built. + /*const*/ SequenceId sequence_id_ = kUndefinedSequenceId; + /*const*/ SequenceId start_sequence_id_ = kUndefinedSequenceId; + /*const*/ SequenceId end_sequence_id_ = kUndefinedSequenceId; + // Sequence identifier of this node. + // This is not const as the ids are assigned after the tree is rebalanced. + /*const*/ LeafId leaf_id_ = kUndefinedLeafId; + // Utility of terminal state corresponding to the terminal infostate node. + // If the node is not terminal, the value is NaN. + const double terminal_utility_; + // Cumulative product of chance probabilities leading up to a terminal node. + // If the node is not terminal, the value is NaN. + const double terminal_chn_reach_prob_; + // Depth of the node, i.e. number of edges on the path from the root. + // Note that depth does not necessarily correspond to the MoveNumber() + // of corresponding states. + // This is not const because tree rebalancing can change this value. + /*const*/ size_t depth_; + // Children infostate nodes. Notice the node owns its children. + // This is not const so that we can add children. + /*const*/ std::vector> children_; + // Store States that correspond to a leaf node. + // This is not const so that we can add corresponding states. + /*const*/ std::vector> corresponding_states_; + // Store chance reach probs for States that correspond to a leaf node. + // This is not const so that we can add corresponding reaches. + /*const*/ std::vector corresponding_ch_reaches_; + // Stored only for decision nodes. + const std::vector legal_actions_; + // Stored only for terminal nodes. + const std::vector terminal_history_; + + // Only InfostateTree is allowed to construct nodes. + InfostateNode(const InfostateTree& tree, InfostateNode* parent, + int incoming_index, InfostateNodeType type, + const std::string& infostate_string, double terminal_utility, + double terminal_ch_reach_prob, size_t depth, + std::vector legal_actions, + std::vector terminal_history); + friend class InfostateTree; + + public: + // -- Node accessors. -------------------------------------------------------- + const InfostateTree& tree() const { return tree_; } + InfostateNode* parent() const { return parent_; } + int incoming_index() const { return incoming_index_; } + const InfostateNodeType& type() const { return type_; } + size_t depth() const { return depth_; } + bool is_root_node() const { return !parent_; } + bool has_infostate_string() const { + return infostate_string_ != kFillerInfostate && + infostate_string_ != kDummyRootNodeInfostate; + } + const std::string& infostate_string() const { + // Avoid working with empty infostate strings. + SPIEL_DCHECK_TRUE(has_infostate_string()); + return infostate_string_; + } + + // -- Children accessors. ---------------------------------------------------- + InfostateNode* child_at(int i) const { return children_.at(i).get(); } + int num_children() const { return children_.size(); } + VecWithUniquePtrsIterator child_iterator() const { + return VecWithUniquePtrsIterator(children_); + } + + // -- Sequence operations. --------------------------------------------------- + const SequenceId sequence_id() const { + SPIEL_CHECK_FALSE(sequence_id_.is_undefined()); + return sequence_id_; + } + const SequenceId start_sequence_id() const { + SPIEL_CHECK_FALSE(start_sequence_id_.is_undefined()); + return start_sequence_id_; + } + const SequenceId end_sequence_id() const { + SPIEL_CHECK_FALSE(end_sequence_id_.is_undefined()); + return end_sequence_id_; + } + Range AllSequenceIds() const { + return Range(start_sequence_id_.id(), end_sequence_id_.id(), + &tree_); + } + + // -- Decision operations. --------------------------------------------------- + const DecisionId decision_id() const { + SPIEL_CHECK_EQ(type_, kDecisionInfostateNode); + SPIEL_CHECK_FALSE(decision_id_.is_undefined()); + return decision_id_; + } + const std::vector& legal_actions() const { + SPIEL_CHECK_EQ(type_, kDecisionInfostateNode); + return legal_actions_; + } + + // -- Leaf operations. ------------------------------------------------------- + bool is_leaf_node() const { return children_.empty(); } + double terminal_utility() const { + SPIEL_CHECK_EQ(type_, kTerminalInfostateNode); + return terminal_utility_; + } + double terminal_chance_reach_prob() const { + SPIEL_CHECK_EQ(type_, kTerminalInfostateNode); + return terminal_chn_reach_prob_; + } + size_t corresponding_states_size() const { + return corresponding_states_.size(); + } + const std::vector>& corresponding_states() const { + SPIEL_CHECK_TRUE(is_leaf_node()); + return corresponding_states_; + } + const std::vector& corresponding_chance_reach_probs() const { + SPIEL_CHECK_TRUE(is_leaf_node()); + return corresponding_ch_reaches_; + } + const std::vector& TerminalHistory() const { + SPIEL_DCHECK_EQ(type_, kTerminalInfostateNode); + return terminal_history_; + } + + // -- For debugging. --------------------------------------------------------- + std::ostream& operator<<(std::ostream& os) const; + // Make subtree certificate (string representation) for easy comparison + // of (isomorphic) trees. + std::string MakeCertificate() const; + + private: + // Make sure that the subtree ends at the requested target depth by inserting + // dummy observation nodes with one outcome. + void RebalanceSubtree(int target_depth, int current_depth); + + // Get the unique_ptr for this node. The usage is intended only for tree + // balance manipulation. + std::unique_ptr Release(); + + // Change the parent of this node by inserting it at at index + // of the new parent. The node at the existing position will be freed. + // We pass the unique ptr of itself, because calling Release might be + // undefined: the node we want to swap a parent for can be root of a subtree. + void SwapParent(std::unique_ptr self, InfostateNode* target, + int at_index); + + InfostateNode* AddChild(std::unique_ptr child); + InfostateNode* GetChild(const std::string& infostate_string) const; +}; + +namespace internal { + +// An implementation detail - Not to be used directly. +// +// Create a common TreeVector container that can be indexed +// with the respective NodeIds. This is later specialized for the individual +// indexing of the trees. +template +class TreeVector { + const InfostateTree* tree_; + std::vector vec_; + + public: + explicit TreeVector(const InfostateTree* tree) + : tree_(tree), vec_(tree_->num_ids(Id(kUndefinedNodeId, tree))) {} + TreeVector(const InfostateTree* tree, std::vector vec) + : tree_(tree), vec_(std::move(vec)) { + SPIEL_CHECK_EQ(tree_->num_ids(Id(kUndefinedNodeId, tree)), vec_.size()); + } + T& operator[](const Id& id) { + SPIEL_DCHECK_TRUE(id.BelongsToTree(tree_)); + SPIEL_DCHECK_LE(0, id.id()); + SPIEL_DCHECK_LT(id.id(), vec_.size()); + return vec_[id.id()]; + } + const T& operator[](const Id& id) const { + SPIEL_DCHECK_TRUE(id.BelongsToTree(tree_)); + SPIEL_DCHECK_LE(0, id.id()); + SPIEL_DCHECK_LT(id.id(), vec_.size()); + return vec_[id.id()]; + } + std::ostream& operator<<(std::ostream& os) const { + return os << vec_ << " (for player " << tree_->acting_player() << ')'; + } + size_t size() const { return vec_.size(); } + Range range() { return Range(0, vec_.size(), tree_); } + Range range(size_t from, size_t to) { return Range(from, to, tree_); } + const InfostateTree* tree() const { return tree_; } +}; + +} // namespace internal + +// Arrays that can be easily indexed by SequenceIds. +// The space of all such arrays forms a treeplex [3]. +// +// [3]: Smoothing Techniques for Computing Nash Equilibria of Sequential Games +// http://www.cs.cmu.edu/~sandholm/proxtreeplex.MathOfOR.pdf +template +class TreeplexVector final : public internal::TreeVector { + using internal::TreeVector::TreeVector; +}; + +// Arrays that can be easily indexed by LeafIds. +template +class LeafVector final : public internal::TreeVector { + using internal::TreeVector::TreeVector; +}; + +// Arrays that can be easily indexed by DecisionIds. +template +class DecisionVector final : public internal::TreeVector { + using internal::TreeVector::TreeVector; +}; + +// Returns whether the supplied vector is a valid sequence-form strategy: +// The probability flow has to sum up to 1 and each sequence's incoming +// probability must be equal to outgoing probabilities. +bool IsValidSfStrategy(const SfStrategy& strategy); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_INFOSTATE_TREE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/infostate_tree_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/infostate_tree_test.cc new file mode 100644 index 0000000..5b2a51e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/infostate_tree_test.cc @@ -0,0 +1,521 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/infostate_tree.h" + +#include +#include +#include + +#include "open_spiel/games/goofspiel/goofspiel.h" +#include "open_spiel/games/kuhn_poker/kuhn_poker.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +constexpr const char* kImperfectInfoGoofSpiel2( + "goofspiel(" + "num_cards=2," + "imp_info=True," + "points_order=ascending" + ")"); + +constexpr const char* kImperfectInfoGoofSpiel3( + "goofspiel(" + "num_cards=3," + "imp_info=True," + "points_order=ascending" + ")"); + +bool IsNodeBalanced(const InfostateNode& node, int height, + int current_depth = 0) { + if (node.is_leaf_node()) return height == current_depth; + + for (const InfostateNode* child : node.child_iterator()) { + if (!IsNodeBalanced(*child, height, current_depth + 1)) { + return false; + } + } + + return true; +} + +bool RecomputeBalance(const InfostateTree& tree) { + return IsNodeBalanced(tree.root(), tree.tree_height()); +} + +std::shared_ptr MakeTree(const std::string& game_name, + Player player, + int max_move_limit = 1000) { + std::shared_ptr tree = + MakeInfostateTree(*LoadGame(game_name), player, max_move_limit); + SPIEL_CHECK_TRUE(RecomputeBalance(*tree)); + return tree; +} + +std::shared_ptr MakeTree( + const std::string& game_name, Player player, + const std::vector>& start_histories, + const std::vector& start_reaches, int max_move_limit = 1000) { + const std::shared_ptr game = LoadGame(game_name); + std::vector> start_states; + std::vector start_state_ptrs; + for (const std::vector& history : start_histories) { + std::unique_ptr rollout = game->NewInitialState(); + for (const Action& a : history) rollout->ApplyAction(a); + start_states.push_back(std::move(rollout)); + start_state_ptrs.push_back(start_states.back().get()); + } + + std::shared_ptr infostate_observer = + game->MakeObserver(kInfoStateObsType, {}); + + std::shared_ptr tree = + MakeInfostateTree(start_state_ptrs, start_reaches, infostate_observer, + player, max_move_limit); + SPIEL_CHECK_TRUE(RecomputeBalance(*tree)); + return tree; +} + +void TestRootCertificates() { + { + std::string expected_certificate = + "([" + "({}{})" // Play Heads: HH, HT + "({}{})" // Play Tails: TH, TT + "])"; + for (int i = 0; i < 2; ++i) { + std::shared_ptr tree = MakeTree("matrix_mp", /*player=*/i); + SPIEL_CHECK_EQ(tree->root().MakeCertificate(), expected_certificate); + } + } + { + std::string expected_certificate = + "([" + "({}{})" // Play 1: draw 1,1 lose 1,2 + "({}{})" // Play 2: win 2,1 draw 2,2 + "])"; + for (int i = 0; i < 2; ++i) { + std::shared_ptr tree = + MakeTree(kImperfectInfoGoofSpiel2, /*player=*/i); + SPIEL_CHECK_EQ(tree->root().MakeCertificate(), expected_certificate); + } + } + { // Full Kuhn test. + std::shared_ptr tree = MakeTree("kuhn_poker", /*player=*/0); + std::string expected_certificate = + // Notice all terminals are at the same depth (same indentation). + "((" // Root node, 1st is getting a card + "(" // 2nd is getting card + "[" // 1st acts + "((" // 1st bet, and 2nd acts + "(({}))" + "(({}))" + "(({}))" + "(({}))" + "))" + "((" // 1st checks, and 2nd acts + // 2nd checked + "(({}))" + "(({}))" + // 2nd betted + "[({}" + "{})" + "({}" + "{})]" + "))" + "]" + ")" + // Just 2 more copies. + "([(((({}))(({}))(({}))(({}))))(((({}))(({}))[({}{})({}{})]))])" + "([(((({}))(({}))(({}))(({}))))(((({}))(({}))[({}{})({}{})]))])" + "))"; + SPIEL_CHECK_EQ(tree->root().MakeCertificate(), expected_certificate); + } + { + std::string expected_certificate = + "(((" // Root node, distribute cards. + "(" // 1st acts + // 1st betted + "[(({})({}))(({})({}))]" + // 1st checked + "[(({})({}))(({}{}{}{}))]" + ")" + // Just 2 more copies. + "([(({})({}))(({})({}))][(({})({}))(({}{}{}{}))])" + "([(({})({}))(({})({}))][(({})({}))(({}{}{}{}))])" + ")))"; + std::shared_ptr tree = MakeTree("kuhn_poker", /*player=*/1); + SPIEL_CHECK_EQ(tree->root().MakeCertificate(), expected_certificate); + } + { + std::string expected_certificate = + "([" + "(" // Play 2 + "[({}{})({}{})]" + "[({}{})({}{})]" + "[({}{})({}{})]" + ")" + "(" // Play 1 + "[({}{})({}{})]" + "[({}{}{}{})({}{}{}{})]" + ")" + "(" // Play 3 + "[({}{})({}{})]" + "[({}{}{}{})({}{}{}{})]" + ")" + "])"; + for (int i = 0; i < 2; ++i) { + std::shared_ptr tree = + MakeTree(kImperfectInfoGoofSpiel3, /*player=*/i); + SPIEL_CHECK_EQ(tree->root().MakeCertificate(), expected_certificate); + } + } +} + +void TestCertificatesFromStartHistories() { + { + std::shared_ptr tree = MakeTree( + "kuhn_poker", /*player=*/0, /*start_histories=*/{{0, 1, 0}}, {1 / 6.}); + std::string expected_certificate = + "((" + "(({}))" // 2nd player passes + "[({})({})]" // 2nd player bets + "))"; + SPIEL_CHECK_EQ(tree->root().MakeCertificate(), expected_certificate); + } + { + std::string expected_certificate = + "(" + "([(((({}))(({}))(({}))(({}))))(((({}))(({}))[({}{})({}{})]))])" + "([(((({}))(({}))(({}))(({}))))(((({}))(({}))[({}{})({}{})]))])" + ")"; + std::shared_ptr tree = + MakeTree("kuhn_poker", /*player=*/0, + /*start_histories=*/{{0}, {2}}, {1 / 3., 1 / 3.}); + SPIEL_CHECK_EQ(tree->root().MakeCertificate(), expected_certificate); + } + { + std::string expected_certificate = + "(" + "([(({}))(({}))][(({}))(({}{}))])" + "([(({}))(({}))][(({}))(({}{}))])" + ")"; + std::shared_ptr tree = + MakeTree("kuhn_poker", /*player=*/1, + /*start_histories=*/{{1, 0}, {1, 2}}, {1 / 6., 1 / 6.}); + SPIEL_CHECK_EQ(tree->root().MakeCertificate(), expected_certificate); + } + { + std::string expected_certificate = + "(" + "([(((({}))(({}))(({}))(({}))))(((({}))(({}))[({}{})({}{})]))])" + "[((((({})))))((((({})))))]" + ")"; + std::shared_ptr tree = + MakeTree("kuhn_poker", /*player=*/0, + /*start_histories=*/{{0}, {2, 1, 0, 1}}, + /*start_reaches=*/{1 / 3., 1 / 6.}); + SPIEL_CHECK_EQ(tree->root().MakeCertificate(), expected_certificate); + } + { + std::string expected_certificate = + "(" + "(((({})))((({}))))" + "([(({}))(({}))][(({}))(({}{}))])" + ")"; + std::shared_ptr tree = MakeTree( + "kuhn_poker", /*player=*/1, /*start_histories=*/{{1, 0}, {1, 2, 0, 1}}, + /*start_reaches=*/{1 / 6., 1 / 6.}); + SPIEL_CHECK_EQ(tree->root().MakeCertificate(), expected_certificate); + } + { + std::string expected_certificate = + "(" + "[({}{})({}{})]" + ")"; + std::shared_ptr tree = + MakeTree("kuhn_poker", /*player=*/0, + /*start_histories=*/{{0, 1, 0, 1}, {0, 2, 0, 1}}, + /*start_reaches=*/{1 / 6., 1 / 6.}); + SPIEL_CHECK_EQ(tree->root().MakeCertificate(), expected_certificate); + } + { + std::string expected_certificate = + "(" + "({}{})" + "({}{})" + ")"; + std::shared_ptr tree = + MakeTree("kuhn_poker", /*player=*/1, + /*start_histories=*/{{0, 1, 0, 1}, {0, 2, 0, 1}}, + /*start_reaches=*/{1 / 6., 1 / 6.}); + SPIEL_CHECK_EQ(tree->root().MakeCertificate(), expected_certificate); + } + { + std::shared_ptr tree = MakeTree( + kImperfectInfoGoofSpiel3, /*player=*/0, + /*start_histories=*/{{0 /* = 0 0 */}, {1 /* = 1 0 */, 3 /* = 2 2 */}}, + /*start_reaches=*/{1., 1.}); + std::string expected_certificate = + "(" + "(({}))" + "[({}{})({}{})]" + ")"; + SPIEL_CHECK_EQ(tree->root().MakeCertificate(), expected_certificate); + } +} + +void CheckTreeLeaves(const InfostateTree& tree, int move_limit) { + for (InfostateNode* leaf_node : tree.leaf_nodes()) { + SPIEL_CHECK_TRUE(leaf_node->is_leaf_node()); + SPIEL_CHECK_TRUE(leaf_node->has_infostate_string()); + SPIEL_CHECK_FALSE(leaf_node->corresponding_states().empty()); + + // Check MoveNumber() for all corresponding states. + // + // The conditions are following: + // - either all states are terminal, and have the same MoveNumber() that + // is less or equal to move_limit, + // - or not all states are terminal and the MoveNumber() == move_limit. + + const int num_states = leaf_node->corresponding_states().size(); + int terminal_cnt = 0; + int max_move_number = std::numeric_limits::min(); + int min_move_number = std::numeric_limits::max(); + for (const std::unique_ptr& state : + leaf_node->corresponding_states()) { + if (state->IsTerminal()) terminal_cnt++; + max_move_number = std::max(max_move_number, state->MoveNumber()); + min_move_number = std::min(min_move_number, state->MoveNumber()); + } + SPIEL_CHECK_TRUE(terminal_cnt == 0 || terminal_cnt == num_states); + SPIEL_CHECK_TRUE(max_move_number == min_move_number); + if (terminal_cnt == 0) { + SPIEL_CHECK_EQ(max_move_number, move_limit); + } else { + SPIEL_CHECK_LE(max_move_number, move_limit); + } + } +} + +void CheckContinuation(const InfostateTree& tree) { + const std::vector& leaves = + tree.nodes_at_depth(tree.tree_height()); + std::shared_ptr continuation = MakeInfostateTree(leaves); + + SPIEL_CHECK_EQ(continuation->root_branching_factor(), leaves.size()); + for (int i = 0; i < leaves.size(); ++i) { + const InfostateNode* leaf_node = leaves[i]; + const InfostateNode* root_node = continuation->root().child_at(i); + SPIEL_CHECK_TRUE(leaf_node->is_leaf_node()); + if (leaf_node->type() != kTerminalInfostateNode) { + SPIEL_CHECK_EQ(leaf_node->type(), root_node->type()); + SPIEL_CHECK_EQ(leaf_node->has_infostate_string(), + root_node->has_infostate_string()); + if (leaf_node->has_infostate_string()) { + SPIEL_CHECK_EQ(leaf_node->infostate_string(), + root_node->infostate_string()); + } + } else { + // If the leaf node is terminal, the continuation might put this node + // deeper than in the root due to tree balancing with other leaf + // non-terminal nodes. Therefore we check whether (the possibly occurring) + // chain of dummy observations leads to this terminal node. + InfostateNode* terminal_continuation = continuation->root().child_at(i); + while (terminal_continuation->type() == kObservationInfostateNode) { + SPIEL_CHECK_FALSE(terminal_continuation->is_leaf_node()); + SPIEL_CHECK_EQ(terminal_continuation->num_children(), 1); + terminal_continuation = terminal_continuation->child_at(0); + } + SPIEL_CHECK_EQ(terminal_continuation->type(), kTerminalInfostateNode); + SPIEL_CHECK_EQ(leaf_node->has_infostate_string(), + terminal_continuation->has_infostate_string()); + if (leaf_node->has_infostate_string()) { + SPIEL_CHECK_EQ(leaf_node->infostate_string(), + terminal_continuation->infostate_string()); + } + SPIEL_CHECK_EQ(leaf_node->terminal_utility(), + terminal_continuation->terminal_utility()); + SPIEL_CHECK_EQ(leaf_node->terminal_chance_reach_prob(), + terminal_continuation->terminal_chance_reach_prob()); + SPIEL_CHECK_EQ(leaf_node->TerminalHistory(), + terminal_continuation->TerminalHistory()); + } + } +} + +void BuildAllDepths(const std::string& game_name) { + std::shared_ptr game = LoadGame(game_name); + const int max_moves = game->MaxMoveNumber(); + for (int move_limit = 0; move_limit < max_moves; ++move_limit) { + for (int pl = 0; pl < game->NumPlayers(); ++pl) { + std::shared_ptr tree = MakeTree(game_name, pl, move_limit); + CheckTreeLeaves(*tree, move_limit); + CheckContinuation(*tree); + } + } +} + +void TestDepthLimitedTrees() { + { + std::string expected_certificate = + "(" // + "(" // 1st is getting a card + "(" // 2nd is getting card + "[" // 1st acts - Node J + // Depth cutoff. + "]" + ")" + // Repeat the same for the two other cards. + "([])" // Node Q + "([])" // Node K + ")" + ")"; // + std::shared_ptr tree = MakeTree("kuhn_poker", 0, 2); + SPIEL_CHECK_EQ(tree->root().MakeCertificate(), expected_certificate); + + for (InfostateNode* acting : tree->leaf_nodes()) { + SPIEL_CHECK_TRUE(acting->is_leaf_node()); + SPIEL_CHECK_EQ(acting->type(), kDecisionInfostateNode); + SPIEL_CHECK_EQ(acting->corresponding_states().size(), 2); + SPIEL_CHECK_TRUE(acting->has_infostate_string()); + } + } + + BuildAllDepths("kuhn_poker"); + BuildAllDepths("kuhn_poker(players=3)"); + BuildAllDepths("leduc_poker"); + BuildAllDepths("goofspiel(players=2,num_cards=3,imp_info=True)"); + BuildAllDepths("goofspiel(players=3,num_cards=3,imp_info=True)"); +} + +void TestDepthLimitedSubgames() { + { + std::array expected_certificates = { + "(()()())", "(([][])([][])([][]))", + "(" + "([(())({}{})][({}{})({}{})])" + "([(())({}{})][({}{})({}{})])" + "([(())({}{})][({}{})({}{})])" + ")", + "(" + "([(({})({}))(({})({}))][(({})({}))(({}{}{}{}))])" + "([(({})({}))(({})({}))][(({})({}))(({}{}{}{}))])" + "([(({})({}))(({})({}))][(({})({}))(({}{}{}{}))])" + ")"}; + std::array expected_leaf_counts = {3, 6, 21, 30}; + + for (int move_limit = 0; move_limit < 4; ++move_limit) { + std::shared_ptr tree = + MakeTree("kuhn_poker", /*player=*/1, + {{0, 1}, {0, 2}, {1, 0}, {1, 2}, {2, 0}, {2, 1}}, + {1 / 6., 1 / 6., 1 / 6., 1 / 6., 1 / 6., 1 / 6.}, + /*max_move_limit=*/move_limit); + SPIEL_CHECK_EQ(tree->root().MakeCertificate(), + expected_certificates[move_limit]); + SPIEL_CHECK_EQ(tree->num_leaves(), expected_leaf_counts[move_limit]); + + for (InfostateNode* leaf : tree->leaf_nodes()) { + SPIEL_CHECK_EQ(leaf->depth(), tree->tree_height()); + } + } + } +} + +void TestSequenceIdLabeling() { + for (int pl = 0; pl < 2; ++pl) { + std::shared_ptr tree = MakeTree("kuhn_poker", /*player=*/pl); + + for (int depth = 0; depth <= tree->tree_height(); ++depth) { + for (InfostateNode* node : tree->nodes_at_depth(depth)) { + SPIEL_CHECK_LE(node->start_sequence_id().id(), + node->sequence_id().id()); + SPIEL_CHECK_LE(node->end_sequence_id().id(), node->sequence_id().id()); + } + } + + // Check labeling was done from the deepest nodes. + size_t depth = -1; // Some large number. + for (SequenceId id : tree->AllSequenceIds()) { + InfostateNode* node = tree->observation_infostate(id); + SPIEL_CHECK_LE(node->depth(), depth); + depth = node->depth(); + // Longer sequences (extensions) must have the corresponding + // infostate nodes placed deeper. + for (SequenceId extension : node->AllSequenceIds()) { + InfostateNode* child = tree->observation_infostate(extension); + SPIEL_CHECK_LT(node->depth(), child->depth()); + } + } + } +} + +void TestBestResponse() { + std::shared_ptr tree0 = MakeTree("matrix_mp", /*player=*/0); + std::shared_ptr tree1 = MakeTree("matrix_mp", /*player=*/1); + for (double alpha = 0; alpha < 1.; alpha += 0.1) { + const double br_value = std::fmax(2 * alpha - 1, -2 * alpha + 1); + { + LeafVector grad(tree0.get(), + { + 1. * alpha, // Head, Head + -1. * (1. - alpha), // Head, Tail + -1. * alpha, // Tail, Head + 1. * (1. - alpha), // Tail, Tail + }); + SPIEL_CHECK_FLOAT_EQ(tree0->BestResponseValue(std::move(grad)), br_value); + } + { + LeafVector grad(tree1.get(), + { + -1. * alpha, // Head, Head + 1. * (1. - alpha), // Tail, Head + 1. * alpha, // Head, Tail + -1. * (1. - alpha), // Tail, Tail + }); + SPIEL_CHECK_FLOAT_EQ(tree1->BestResponseValue(std::move(grad)), br_value); + } + { + TreeplexVector grad(tree0.get(), + {-1. + 2. * alpha, 1. - 2. * alpha, 0.}); + std::pair actual_response = + tree0->BestResponse(std::move(grad)); + SPIEL_CHECK_FLOAT_EQ(actual_response.first, br_value); + } + { + TreeplexVector grad(tree1.get(), + {1. - 2. * alpha, -1. + 2. * alpha, 0.}); + std::pair actual_response = + tree1->BestResponse(std::move(grad)); + SPIEL_CHECK_FLOAT_EQ(actual_response.first, br_value); + } + } +} + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::algorithms::TestRootCertificates(); + open_spiel::algorithms::TestCertificatesFromStartHistories(); + open_spiel::algorithms::TestDepthLimitedTrees(); + open_spiel::algorithms::TestDepthLimitedSubgames(); + open_spiel::algorithms::TestSequenceIdLabeling(); + open_spiel::algorithms::TestBestResponse(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/is_mcts.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/is_mcts.cc new file mode 100644 index 0000000..a6b0225 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/is_mcts.cc @@ -0,0 +1,382 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/is_mcts.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/discrete_distribution.h" +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { + +constexpr double kTieTolerance = 0.00001; +constexpr int kUnexpandedVisitCount = -1; + +ISMCTSBot::ISMCTSBot(int seed, std::shared_ptr evaluator, + double uct_c, int max_simulations, int max_world_samples, + ISMCTSFinalPolicyType final_policy_type, + bool use_observation_string, + bool allow_inconsistent_action_sets) + : rng_(seed), + evaluator_(evaluator), + uct_c_(uct_c), + max_simulations_(max_simulations), + max_world_samples_(max_world_samples), + final_policy_type_(final_policy_type), + use_observation_string_(use_observation_string), + allow_inconsistent_action_sets_(allow_inconsistent_action_sets) {} + +double ISMCTSBot::RandomNumber() { return absl::Uniform(rng_, 0.0, 1.0); } + +void ISMCTSBot::Reset() { + nodes_.clear(); + node_pool_.clear(); + root_samples_.clear(); +} + +ISMCTSStateKey ISMCTSBot::GetStateKey(const State& state) const { + if (use_observation_string_) { + return {state.CurrentPlayer(), state.ObservationString()}; + } else { + return {state.CurrentPlayer(), state.InformationStateString()}; + } +} + +ActionsAndProbs ISMCTSBot::RunSearch(const State& state) { + Reset(); + SPIEL_CHECK_EQ(state.GetGame()->GetType().dynamics, + GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(state.GetGame()->GetType().information, + GameType::Information::kImperfectInformation); + + // Optimization in case of single legal action, and support for games which + // do not support ResampleFromInfostate in certain specific (single action) + // states. + std::vector legal_actions = state.LegalActions(); + if (legal_actions.size() == 1) return {{legal_actions[0], 1.0}}; + + root_node_ = CreateNewNode(state); + SPIEL_CHECK_TRUE(root_node_ != nullptr); + + auto root_infostate_key = GetStateKey(state); + + for (int sim = 0; sim < max_simulations_; ++sim) { + std::unique_ptr sampled_root_state = SampleRootState(state); + SPIEL_CHECK_TRUE(root_infostate_key == GetStateKey(*sampled_root_state)); + SPIEL_CHECK_TRUE(sampled_root_state != nullptr); + RunSimulation(sampled_root_state.get()); + } + + if (allow_inconsistent_action_sets_) { + // Filter illegals for this state. + std::vector legal_actions = state.LegalActions(); + ISMCTSNode temp_node = FilterIllegals(root_node_, legal_actions); + SPIEL_CHECK_GT(temp_node.total_visits, 0); + return GetFinalPolicy(state, &temp_node); + } else { + return GetFinalPolicy(state, root_node_); + } +} + +Action ISMCTSBot::Step(const State& state) { + ActionsAndProbs policy = RunSearch(state); + return SampleAction(policy, RandomNumber()).first; +} + +ActionsAndProbs ISMCTSBot::GetPolicy(const State& state) { + return RunSearch(state); +} + +std::pair ISMCTSBot::StepWithPolicy( + const State& state) { + ActionsAndProbs policy = GetPolicy(state); + Action sampled_action = SampleAction(policy, RandomNumber()).first; + return {policy, sampled_action}; +} + +ActionsAndProbs ISMCTSBot::GetFinalPolicy(const State& state, + ISMCTSNode* node) const { + ActionsAndProbs policy; + SPIEL_CHECK_FALSE(node == nullptr); + + switch (final_policy_type_) { + case ISMCTSFinalPolicyType::kNormalizedVisitCount: { + SPIEL_CHECK_GT(node->total_visits, 0); + policy.reserve(node->child_info.size()); + double total_visits = static_cast(node->total_visits); + for (const auto& action_and_child : node->child_info) { + policy.push_back({action_and_child.first, + action_and_child.second.visits / total_visits}); + } + } break; + + case ISMCTSFinalPolicyType::kMaxVisitCount: { + SPIEL_CHECK_GT(node->total_visits, 0); + policy.reserve(node->child_info.size()); + Action max_action = kInvalidAction; + int max_visits = -std::numeric_limits::infinity(); + for (const auto& action_and_child : node->child_info) { + if (action_and_child.second.visits > max_visits) { + max_visits = action_and_child.second.visits; + max_action = action_and_child.first; + } + } + SPIEL_CHECK_NE(max_action, kInvalidAction); + for (const auto& action_and_child : node->child_info) { + policy.push_back({action_and_child.first, + action_and_child.first == max_action ? 1.0 : 0.0}); + } + } break; + + case ISMCTSFinalPolicyType::kMaxValue: { + SPIEL_CHECK_GT(node->total_visits, 0); + policy.reserve(node->child_info.size()); + Action max_action = kInvalidAction; + double max_value = -std::numeric_limits::infinity(); + for (const auto& action_and_child : node->child_info) { + double value = action_and_child.second.value(); + if (value > max_value) { + max_value = value; + max_action = action_and_child.first; + } + } + SPIEL_CHECK_NE(max_action, kInvalidAction); + for (const auto& action_and_child : node->child_info) { + policy.push_back({action_and_child.first, + action_and_child.first == max_action ? 1.0 : 0.0}); + } + } + } + + // In case the search didn't cover all the legal moves, at zero probability + // for all the remaining actions. + int policy_size = policy.size(); + std::vector legal_actions = state.LegalActions(); + if (policy_size < legal_actions.size()) { + for (Action action : legal_actions) { + if (node->child_info.find(action) == node->child_info.end()) { + // Legal action not found in the node's actions: assign probability 0. + policy.push_back({action, 0.0}); + } + } + } + return policy; +} + +std::unique_ptr ISMCTSBot::SampleRootState(const State& state) { + if (max_world_samples_ == kUnlimitedNumWorldSamples) { + return ResampleFromInfostate(state); + } else if (root_samples_.size() < max_world_samples_) { + root_samples_.push_back(ResampleFromInfostate(state)); + return root_samples_.back()->Clone(); + } else if (root_samples_.size() == max_world_samples_) { + int idx = absl::Uniform(rng_, 0u, root_samples_.size()); + return root_samples_[idx]->Clone(); + } else { + SpielFatalError("Case not handled (badly set max_world_samples..?)"); + } +} + +std::unique_ptr ISMCTSBot::ResampleFromInfostate(const State& state) { + if (resampler_cb_) { + return resampler_cb_(state, state.CurrentPlayer(), + [this]() { return RandomNumber(); }); + } else { + // Try domain-specific implementation + // (could be not implemented in some games). + return state.ResampleFromInfostate(state.CurrentPlayer(), + [this]() { return RandomNumber(); }); + } +} + +ISMCTSNode* ISMCTSBot::CreateNewNode(const State& state) { + auto infostate_key = GetStateKey(state); + node_pool_.push_back(std::unique_ptr(new ISMCTSNode)); + ISMCTSNode* node = node_pool_.back().get(); + nodes_[infostate_key] = node; + node->total_visits = kUnexpandedVisitCount; + return node; +} + +ISMCTSNode* ISMCTSBot::LookupNode(const State& state) { + auto iter = nodes_.find(GetStateKey(state)); + if (iter == nodes_.end()) { + return nullptr; + } else { + return iter->second; + } +} + +ISMCTSNode* ISMCTSBot::LookupOrCreateNode(const State& state) { + ISMCTSNode* node = LookupNode(state); + if (node != nullptr) { + return node; + } else { + return CreateNewNode(state); + } +} + +ISMCTSNode ISMCTSBot::FilterIllegals( + ISMCTSNode* node, const std::vector& legal_actions) const { + ISMCTSNode new_node = *node; + std::vector to_delete; + for (const auto& action_and_child : node->child_info) { + if (std::find(legal_actions.begin(), legal_actions.end(), + action_and_child.first) == legal_actions.end()) { + // Illegal action: mark for deletion. + new_node.total_visits -= action_and_child.second.visits; + new_node.child_info.erase(action_and_child.first); + } + } + + return new_node; +} + +void ISMCTSBot::ExpandIfNecessary(ISMCTSNode* node, Action action) const { + if (node->child_info.find(action) == node->child_info.end()) { + node->child_info[action] = ChildInfo{0, 0.0}; + } +} + +Action ISMCTSBot::SelectActionTreePolicy( + ISMCTSNode* node, const std::vector& legal_actions) { + // Check to see if we are allowing inconsistent action sets. + if (allow_inconsistent_action_sets_) { + // If so, it could mean that the node has actions with child info that are + // not legal in this state, so we have to remove them. + ISMCTSNode temp_node = FilterIllegals(node, legal_actions); + if (temp_node.total_visits == 0) { + // If we've filtered everything, return a random action. + Action action = + legal_actions[absl::Uniform(rng_, 0u, legal_actions.size())]; + ExpandIfNecessary(node, action); + return action; + } else { + return SelectActionUCB(&temp_node); + } + } else { + return SelectActionUCB(node); + } +} + +Action ISMCTSBot::SelectActionUCB(ISMCTSNode* node) { + std::vector> actions_and_values; + double max_value = -std::numeric_limits::infinity(); + + for (const auto& action_and_child : node->child_info) { + // Every child should have at least one visit because the child is only + // created when the action took it in a simulation, which then increases + // its visit count immediately. + SPIEL_CHECK_GT(action_and_child.second.visits, 0); + + Action action = action_and_child.first; + double uct_val = action_and_child.second.value() + + uct_c_ * std::sqrt(std::log(node->total_visits) / + action_and_child.second.visits); + + actions_and_values.push_back({action, uct_val}); + max_value = std::max(max_value, uct_val); + } + + std::vector candidates; + for (const auto& action_and_value : actions_and_values) { + if (action_and_value.second > max_value - kTieTolerance) { + candidates.push_back(action_and_value.first); + } + } + + SPIEL_CHECK_GE(candidates.size(), 1); + + if (candidates.size() == 1) { + return candidates[0]; + } else { + return candidates[absl::Uniform(rng_, 0u, candidates.size())]; + } +} + +Action ISMCTSBot::CheckExpand(ISMCTSNode* node, + const std::vector& legal_actions) { + // Fast check in the common/default case. + if (!allow_inconsistent_action_sets_ && + node->child_info.size() == legal_actions.size()) { + return kInvalidAction; + } + + // Shuffle the legal actions to remove the bias from the move order. + std::vector legal_actions_copy = legal_actions; + std::shuffle(legal_actions_copy.begin(), legal_actions_copy.end(), rng_); + for (Action action : legal_actions_copy) { + if (node->child_info.find(action) == node->child_info.end()) { + return action; + } + } + return kInvalidAction; +} + +std::vector ISMCTSBot::RunSimulation(State* state) { + if (state->IsTerminal()) { + return state->Returns(); + } else if (state->IsChanceNode()) { + Action chance_action = + SampleAction(state->ChanceOutcomes(), RandomNumber()).first; + state->ApplyAction(chance_action); + return RunSimulation(state); + } + + std::vector legal_actions = state->LegalActions(); + Player cur_player = state->CurrentPlayer(); + ISMCTSNode* node = LookupOrCreateNode(*state); + SPIEL_CHECK_TRUE(node != nullptr); + + if (node->total_visits == kUnexpandedVisitCount) { + // Newly created node, so we've just stepped out of the tree. + node->total_visits = 0; // Expand the node. + return evaluator_->Evaluate(*state); + } else { + // Apply tree policy. + Action chosen_action = CheckExpand(node, legal_actions); + if (chosen_action != kInvalidAction) { + // Expand. + ExpandIfNecessary(node, chosen_action); + } else { + // No expansion, so use the tree policy to select. + chosen_action = SelectActionTreePolicy(node, legal_actions); + } + + SPIEL_CHECK_NE(chosen_action, kInvalidAction); + + // Need to updates the visits before the recursive call. In games with + // imperfect recall, a node could be expanded with zero visit counts, and + // you might encounter the same (node, action) pair in the same simulation + // and the denominator for the UCT formula would be 0. + node->total_visits++; + node->child_info[chosen_action].visits++; + + state->ApplyAction(chosen_action); + std::vector returns = RunSimulation(state); + node->child_info[chosen_action].return_sum += returns[cur_player]; + return returns; + } +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/is_mcts.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/is_mcts.h new file mode 100644 index 0000000..cb0b9f3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/is_mcts.h @@ -0,0 +1,158 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_IS_MCTS_H_ +#define OPEN_SPIEL_ALGORITHMS_IS_MCTS_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/algorithms/mcts.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" + +// A basic implementation of Information Set Monte Carlo Tree Search (IS-MCTS) +// by Cowling et al. https://ieeexplore.ieee.org/abstract/document/6203567. + +namespace open_spiel { +namespace algorithms { + +// Use this constant to use an unlimited number of world samples. +inline constexpr int kUnlimitedNumWorldSamples = -1; + +// The key identifying a node contains the InformationStateString or +// ObservationString, as well as the player id, because in some games the +// observation string can be the same for different players. +using ISMCTSStateKey = std::pair; + +enum class ISMCTSFinalPolicyType { + kNormalizedVisitCount, + kMaxVisitCount, + kMaxValue, +}; + +struct ChildInfo { + int visits; + double return_sum; + double value() const { return return_sum / visits; } +}; + +struct ISMCTSNode { + absl::flat_hash_map child_info; + int total_visits; +}; + +using InfostateResampler = std::function( + const State& state, Player pl, std::function rng)>; + +class ISMCTSBot : public Bot { + public: + // Construct an IS-MCTS bot. The parameter max_world_samples controls how many + // states are sampled (with replacement!) at the root of the search; use + // kUnlimitedWorldStates to have no restriction, and a number larger than + // zero to restrict the number). If use_observation_string is true, then + // will use ObservationString as a key instead of InformationStateString. + // If allow_inconsistent_action_sets is true, then the algorithm handles the + // case of differing legal action sets across states with the same state key + // (information state string or observation string) which can happen when + // using observations or with game that have imperfect recall. + // + // Important note: this bot requires that State::ResampleFromInfostate is + // implemented. + ISMCTSBot(int seed, std::shared_ptr evaluator, double uct_c, + int max_simulations, int max_world_samples, + ISMCTSFinalPolicyType final_policy_type, + bool use_observation_string, bool allow_inconsistent_action_sets); + + // An IS-MCTS with sensible defaults. + ISMCTSBot(int seed, std::shared_ptr evaluator, double uct_c, + int max_simulations) + : ISMCTSBot(seed, evaluator, uct_c, max_simulations, + kUnlimitedNumWorldSamples, + ISMCTSFinalPolicyType::kNormalizedVisitCount, false, false) {} + + Action Step(const State& state) override; + + bool ProvidesPolicy() override { return true; } + ActionsAndProbs GetPolicy(const State& state) override; + std::pair StepWithPolicy( + const State& state) override; + + ActionsAndProbs RunSearch(const State& state); + + // Bot maintains no history, so these are empty. + void Restart() override {} + void RestartAt(const State& state) override {} + // Set a custom resampling function. + void SetResampler(InfostateResampler cb) { resampler_cb_ = cb; } + + private: + void Reset(); + double RandomNumber(); + + ISMCTSStateKey GetStateKey(const State& state) const; + std::unique_ptr SampleRootState(const State& state); + // Dispatch to either domain-specific implementation, + // or a specially supplied one via SetResampler() + std::unique_ptr ResampleFromInfostate(const State& state); + ISMCTSNode* CreateNewNode(const State& state); + ISMCTSNode* LookupNode(const State& state); + ISMCTSNode* LookupOrCreateNode(const State& state); + Action SelectActionTreePolicy(ISMCTSNode* node, + const std::vector& legal_actions); + Action SelectActionUCB(ISMCTSNode* node); + ActionsAndProbs GetFinalPolicy(const State& state, ISMCTSNode* node) const; + void ExpandIfNecessary(ISMCTSNode* node, Action action) const; + + // Check if an expansion is possible (i.e. node does not contain all the + // actions). If so, returns an action not yet in the children. Otherwise, + // returns kInvalidAction. + Action CheckExpand(ISMCTSNode* node, + const std::vector& legal_actions); + + // Returns a copy of the node with any actions not in specified legal actions + // removed. + ISMCTSNode FilterIllegals(ISMCTSNode* node, + const std::vector& legal_actions) const; + + // Run a simulation, returning the player returns. + std::vector RunSimulation(State* state); + + std::mt19937 rng_; + std::shared_ptr evaluator_; + absl::flat_hash_map nodes_; + std::vector> node_pool_; + + // If the number of sampled world state is restricted, this list is used to + // store the sampled states. + std::vector> root_samples_; + + const double uct_c_; + const int max_simulations_; + const int max_world_samples_; + const ISMCTSFinalPolicyType final_policy_type_; + const bool use_observation_string_; + const bool allow_inconsistent_action_sets_; + ISMCTSNode* root_node_; + InfostateResampler resampler_cb_; +}; + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_IS_MCTS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/is_mcts_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/is_mcts_test.cc new file mode 100644 index 0000000..bbf68c4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/is_mcts_test.cc @@ -0,0 +1,108 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/is_mcts.h" + +#include + +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/algorithms/mcts.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { + +constexpr const int kSeed = 93879211; + +void PlayGame(const Game& game, algorithms::ISMCTSBot* bot, std::mt19937* rng) { + std::unique_ptr state = game.NewInitialState(); + while (!state->IsTerminal()) { + std::cout << "State:" << std::endl; + std::cout << state->ToString() << std::endl; + + Action chosen_action = kInvalidAction; + if (state->IsChanceNode()) { + chosen_action = + SampleAction(state->ChanceOutcomes(), absl::Uniform(*rng, 0.0, 1.0)) + .first; + } else { + chosen_action = bot->Step(*state); + } + + std::cout << "Chosen action: " << state->ActionToString(chosen_action) + << std::endl; + state->ApplyAction(chosen_action); + } + + std::cout << "Terminal state:" << std::endl; + std::cout << state->ToString() << std::endl; + std::cout << "Returns: " << absl::StrJoin(state->Returns(), " ") << std::endl; +} + +void ISMCTSTest_PlayGame(const std::string& game_name) { + std::shared_ptr game = LoadGame(game_name); + auto evaluator = + std::make_shared(1, kSeed); + + for (algorithms::ISMCTSFinalPolicyType type : + {algorithms::ISMCTSFinalPolicyType::kNormalizedVisitCount, + algorithms::ISMCTSFinalPolicyType::kMaxVisitCount, + algorithms::ISMCTSFinalPolicyType::kMaxValue}) { + auto bot1 = std::make_unique( + kSeed, evaluator, 5.0, 1000, algorithms::kUnlimitedNumWorldSamples, + type, false, false); + + std::mt19937 rng(kSeed); + + std::cout << "Testing " << game_name << ", bot 1" << std::endl; + PlayGame(*game, bot1.get(), &rng); + + auto bot2 = std::make_unique( + kSeed, evaluator, 5.0, 1000, 10, type, false, false); + std::cout << "Testing " << game_name << ", bot 2" << std::endl; + PlayGame(*game, bot2.get(), &rng); + } +} + +void ISMCTS_BasicPlayGameTest_Kuhn() { + ISMCTSTest_PlayGame("kuhn_poker"); + ISMCTSTest_PlayGame("kuhn_poker(players=3)"); +} + +void ISMCTS_BasicPlayGameTest_Leduc() { + ISMCTSTest_PlayGame("leduc_poker"); + ISMCTSTest_PlayGame("leduc_poker(players=3)"); +} + +void ISMCTS_LeducObservationTest() { + std::mt19937 rng(kSeed); + std::shared_ptr game = LoadGame("leduc_poker"); + auto evaluator = + std::make_shared(1, kSeed); + auto bot = std::make_unique( + kSeed, evaluator, 10.0, 1000, algorithms::kUnlimitedNumWorldSamples, + algorithms::ISMCTSFinalPolicyType::kNormalizedVisitCount, true, true); + PlayGame(*game, bot.get(), &rng); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::ISMCTS_BasicPlayGameTest_Kuhn(); + open_spiel::ISMCTS_BasicPlayGameTest_Leduc(); + open_spiel::ISMCTS_LeducObservationTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/matrix_game_utils.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/matrix_game_utils.cc new file mode 100644 index 0000000..2527cdd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/matrix_game_utils.cc @@ -0,0 +1,134 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/matrix_game_utils.h" + +#include + +#include "open_spiel/algorithms/deterministic_policy.h" +#include "open_spiel/algorithms/expected_returns.h" +#include "open_spiel/simultaneous_move_game.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +using open_spiel::matrix_game::MatrixGame; + +std::shared_ptr LoadMatrixGame(const std::string& name) { + std::shared_ptr game = LoadGame(name); + // Make sure it is indeed a matrix game. + const MatrixGame* matrix_game = dynamic_cast(game.get()); + if (matrix_game == nullptr) { + // If it is not already a matrix game, check if it is a 2-player NFG. + // If so, convert it. + const NormalFormGame* nfg = dynamic_cast(game.get()); + if (nfg != nullptr && nfg->NumPlayers() == 2) { + return AsMatrixGame(nfg); + } else { + SpielFatalError(absl::StrCat("Cannot load ", name, " as a matrix game.")); + } + } + return std::static_pointer_cast(game); +} + +std::shared_ptr AsMatrixGame(const Game* game) { + const NormalFormGame* nfg = dynamic_cast(game); + SPIEL_CHECK_TRUE(nfg != nullptr); + return AsMatrixGame(nfg); +} + +std::shared_ptr AsMatrixGame(const NormalFormGame* game) { + SPIEL_CHECK_EQ(game->NumPlayers(), 2); + std::unique_ptr initial_state = game->NewInitialState(); + std::vector> legal_actions = { + initial_state->LegalActions(0), initial_state->LegalActions(1)}; + + std::vector row_names; + std::vector col_names; + std::vector row_utils; + std::vector col_utils; + int num_rows = legal_actions[0].size(); + int num_cols = legal_actions[1].size(); + + GameType type = game->GetType(); + type.min_num_players = 2; + type.max_num_players = 2; + + for (int r = 0; r < num_rows; ++r) { + Action row_action = legal_actions[0][r]; + row_names.push_back(initial_state->ActionToString(0, row_action)); + + for (int c = 0; c < num_cols; ++c) { + Action col_action = legal_actions[1][c]; + if (col_names.size() < num_cols) { + col_names.push_back(initial_state->ActionToString(1, col_action)); + } + + std::unique_ptr clone = initial_state->Clone(); + clone->ApplyActions({row_action, col_action}); + SPIEL_CHECK_TRUE(clone->IsTerminal()); + std::vector returns = clone->Returns(); + SPIEL_CHECK_EQ(returns.size(), 2); + + row_utils.push_back(returns[0]); + col_utils.push_back(returns[1]); + } + } + + return std::shared_ptr( + new MatrixGame(type, {}, row_names, col_names, row_utils, col_utils)); +} + +std::shared_ptr ExtensiveToMatrixGame(const Game& game) { + SPIEL_CHECK_EQ(game.NumPlayers(), 2); + + std::vector row_names; + std::vector col_names; + std::vector> row_player_utils; + std::vector> col_player_utils; + + GameType type = game.GetType(); + + std::vector policies = { + DeterministicTabularPolicy(game, 0), DeterministicTabularPolicy(game, 1)}; + + bool first_row = true; + do { + policies[1].ResetDefaultPolicy(); + row_names.push_back(policies[0].ToString(" --- ")); + std::vector row_utils; + std::vector col_utils; + do { + if (first_row) { + col_names.push_back(policies[1].ToString(" --- ")); + } + std::unique_ptr state = game.NewInitialState(); + std::vector returns = + ExpectedReturns(*state, {&policies[0], &policies[1]}, -1); + row_utils.push_back(returns[0]); + col_utils.push_back(returns[1]); + } while (policies[1].NextPolicy()); + first_row = false; + row_player_utils.push_back(row_utils); + col_player_utils.push_back(col_utils); + } while (policies[0].NextPolicy()); + + return matrix_game::CreateMatrixGame(type.short_name, type.long_name, + row_names, col_names, row_player_utils, + col_player_utils); +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/matrix_game_utils.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/matrix_game_utils.h new file mode 100644 index 0000000..5c6fa07 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/matrix_game_utils.h @@ -0,0 +1,61 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_MATRIX_GAMES_UTILS_H_ +#define OPEN_SPIEL_ALGORITHMS_MATRIX_GAMES_UTILS_H_ + +#include +#include + +#include "open_spiel/matrix_game.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +// Similar to open_spiel::LoadGame but returns specifically a matrix game type +// so that the subclass's specific methods are accessible. +std::shared_ptr LoadMatrixGame( + const std::string& name); + +// Clones a two-player normal-form game and returns it as a MatrixGame. These +// functions exist because some implementations are more general than +// two-player, but there are tools designed specifically to work with matrix +// games, and hence require conversion. +std::shared_ptr AsMatrixGame( + const NormalFormGame* game); +std::shared_ptr AsMatrixGame(const Game* game); + +// Creates a two-player extensive-form game (EFG)'s equivalent matrix game. +// +// Note that this matrix game will have a row (respectively column) for each +// deterministic policy in the extensive-form game. As such, it will be +// exponentially larger than the extensive-form game. In particular, if S_i is +// number of information states for player i, and A(s_i) for s_i in S_i is the +// set of legal actions at s_i, then the number of deterministic policies is +// the product \Prod_{s_i in S_i) |A(s_i)|, and can include many redundant +// policies that differ, e.g., only in unreachable states. See Chapter 5 of +// (Shoham and Leyton-Brown, Multiagent Systems Algorithmic, Game-Theoretic, and +// Logical Foundations, 2009, http://masfoundations.org/) for more detail, +// including examples of the transformations. +// +// Hence, this method should only be used for small games! For example, Kuhn +// poker has 64 deterministic policies, resulting in a 64-by-64 matrix. +std::shared_ptr ExtensiveToMatrixGame( + const Game& game); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_MATRIX_GAME_UTILS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/matrix_game_utils_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/matrix_game_utils_test.cc new file mode 100644 index 0000000..cfed723 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/matrix_game_utils_test.cc @@ -0,0 +1,56 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/matrix_game_utils.h" + +#include "open_spiel/games/kuhn_poker/kuhn_poker.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +void ConvertToMatrixGameTest() { + std::shared_ptr blotto = LoadGame("blotto"); + std::shared_ptr matrix_blotto = + AsMatrixGame(blotto.get()); + SPIEL_CHECK_GT(matrix_blotto->NumRows(), 0); + SPIEL_CHECK_GT(matrix_blotto->NumCols(), 0); + SPIEL_CHECK_EQ(matrix_blotto->NumRows(), 66); + SPIEL_CHECK_EQ(matrix_blotto->NumCols(), 66); + std::cout << "Blotto 0,13 = " << matrix_blotto->RowActionName(0) << " vs " + << matrix_blotto->ColActionName(13) + << " -> utils: " << matrix_blotto->PlayerUtility(Player{0}, 0, 13) + << "," << matrix_blotto->PlayerUtility(Player{1}, 0, 13) + << std::endl; +} + +void ExtensiveToMatrixGameTest() { + // This just does a conversion and checks the sizes. The real test of this + // method is currently in python/tests/matrix_utils_test, which solves the + // converted game to ensure it has the same value. + std::shared_ptr kuhn_game = LoadGame("kuhn_poker"); + std::shared_ptr kuhn_matrix_game = + ExtensiveToMatrixGame(*kuhn_game); + SPIEL_CHECK_EQ(kuhn_matrix_game->NumRows(), 64); + SPIEL_CHECK_EQ(kuhn_matrix_game->NumCols(), 64); +} + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::algorithms::ConvertToMatrixGameTest(); + open_spiel::algorithms::ExtensiveToMatrixGameTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/maxn.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/maxn.cc new file mode 100644 index 0000000..f31ec91 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/maxn.cc @@ -0,0 +1,121 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/maxn.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +std::vector _maxn( + const State* state, int depth, + std::function value_function, + Action* best_action) { + const int num_players = state->NumPlayers(); + + if (state->IsTerminal()) { + return state->Returns(); + } + + if (depth == 0 && !value_function) { + SpielFatalError( + "We assume we can walk the full depth of the tree. " + "Try increasing depth or provide a value_function."); + } + + if (depth == 0) { + std::vector values(num_players); + for (Player p = 0; p < num_players; ++p) { + values[p] = value_function(*state, p); + } + return values; + } + + Player player = state->CurrentPlayer(); + if (state->IsChanceNode()) { + std::vector values(num_players, 0.0); + for (const auto& actionprob : state->ChanceOutcomes()) { + std::unique_ptr child_state = state->Child(actionprob.first); + std::vector child_values = + _maxn(child_state.get(), depth, value_function, + /*best_action=*/nullptr); + for (Player p = 0; p < num_players; ++p) { + values[p] += actionprob.second * child_values[p]; + } + } + return values; + } else { + double value = -std::numeric_limits::infinity(); + std::vector values(num_players, 0); + + for (Action action : state->LegalActions()) { + std::unique_ptr child_state = state->Child(action); + std::vector child_values = + _maxn(child_state.get(), + /*depth=*/depth - 1, value_function, + /*best_action=*/nullptr); + + if (child_values[player] > value) { + value = child_values[player]; + values = child_values; + if (best_action != nullptr) { + *best_action = action; + } + } + } + return values; + } +} +} // namespace + +std::pair, Action> MaxNSearch( + const Game& game, const State* state, + std::function value_function, + int depth_limit) { + GameType game_info = game.GetType(); + SPIEL_CHECK_TRUE( + game_info.chance_mode == GameType::ChanceMode::kDeterministic || + game_info.chance_mode == GameType::ChanceMode::kExplicitStochastic); + // Do not check perfect information. Used by PIMC. + SPIEL_CHECK_EQ(game_info.dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(game_info.reward_model, GameType::RewardModel::kTerminal); + + std::unique_ptr search_root; + if (state == nullptr) { + search_root = game.NewInitialState(); + } else { + search_root = state->Clone(); + } + + SPIEL_CHECK_FALSE(search_root->IsChanceNode()); + + Action best_action = kInvalidAction; + std::vector values = _maxn(search_root.get(), /*depth=*/depth_limit, + value_function, &best_action); + + return {values, best_action}; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/maxn.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/maxn.h new file mode 100644 index 0000000..37e7ce1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/maxn.h @@ -0,0 +1,35 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_MAXN_H_ +#define OPEN_SPIEL_ALGORITHMS_MAXN_H_ + +#include +#include +#include + +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +std::pair, Action> MaxNSearch( + const Game& game, const State* state, + std::function value_function, + int depth_limit); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_MAXN_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/mcts.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/mcts.cc new file mode 100644 index 0000000..e575e97 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/mcts.cc @@ -0,0 +1,479 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/mcts.h" + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/time/clock.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { + +int MIN_GC_LIMIT = 5; + +int MemoryUsedMb(int nodes) { + return nodes * sizeof(SearchNode) / (1 << 20); +} + +std::vector RandomRolloutEvaluator::Evaluate(const State& state) { + std::vector result; + for (int i = 0; i < n_rollouts_; ++i) { + std::unique_ptr working_state = state.Clone(); + while (!working_state->IsTerminal()) { + if (working_state->IsChanceNode()) { + ActionsAndProbs outcomes = working_state->ChanceOutcomes(); + working_state->ApplyAction(SampleAction(outcomes, rng_).first); + } else { + std::vector actions = working_state->LegalActions(); + working_state->ApplyAction( + actions[absl::Uniform(rng_, 0u, actions.size())]); + } + } + + std::vector returns = working_state->Returns(); + if (result.empty()) { + result.swap(returns); + } else { + SPIEL_CHECK_EQ(returns.size(), result.size()); + for (int i = 0; i < result.size(); ++i) { + result[i] += returns[i]; + } + } + } + for (int i = 0; i < result.size(); ++i) { + result[i] /= n_rollouts_; + } + return result; +} + +ActionsAndProbs RandomRolloutEvaluator::Prior(const State& state) { + // Returns equal probability for all actions. + if (state.IsChanceNode()) { + return state.ChanceOutcomes(); + } else { + std::vector legal_actions = state.LegalActions(); + ActionsAndProbs prior; + prior.reserve(legal_actions.size()); + for (const Action& action : legal_actions) { + prior.emplace_back(action, 1.0 / legal_actions.size()); + } + return prior; + } +} + +// UCT value of given child +double SearchNode::UCTValue(int parent_explore_count, double uct_c) const { + if (!outcome.empty()) { + return outcome[player]; + } + + if (explore_count == 0) return std::numeric_limits::infinity(); + + // The "greedy-value" of choosing a given child is always with respect to + // the current player for this node. + return total_reward / explore_count + + uct_c * std::sqrt(std::log(parent_explore_count) / explore_count); +} + +double SearchNode::PUCTValue(int parent_explore_count, double uct_c) const { + // Returns the PUCT value of this node. + if (!outcome.empty()) { + return outcome[player]; + } + + return ((explore_count != 0 ? total_reward / explore_count : 0) + + uct_c * prior * std::sqrt(parent_explore_count) / + (explore_count + 1)); +} + +bool SearchNode::CompareFinal(const SearchNode& b) const { + double out = (player >= 0 && player < outcome.size() ? outcome[player] : 0); + double out_b = + (b.player >= 0 && b.player < b.outcome.size() ? b.outcome[b.player] : 0); + if (out != out_b) { + return out < out_b; + } + if (explore_count != b.explore_count) { + return explore_count < b.explore_count; + } + return total_reward < b.total_reward; +} + +const SearchNode& SearchNode::BestChild() const { + // Returns the best action from this node, either proven or most visited. + // + // This ordering leads to choosing: + // - Highest proven score > 0 over anything else, including a promising but + // unproven action. + // - A proven draw only if it has higher exploration than others that are + // uncertain, or the others are losses. + // - Uncertain action with most exploration over loss of any difficulty + // - Hardest loss if everything is a loss + // - Highest expected reward if explore counts are equal (unlikely). + // - Longest win, if multiple are proven (unlikely due to early stopping). + return *std::max_element(children.begin(), children.end(), + [](const SearchNode& a, const SearchNode& b) { + return a.CompareFinal(b); + }); +} + +std::string SearchNode::ChildrenStr(const State& state) const { + std::string out; + if (!children.empty()) { + std::vector refs; // Sort a list of refs, not a copy. + refs.reserve(children.size()); + for (const SearchNode& child : children) { + refs.push_back(&child); + } + std::sort(refs.begin(), refs.end(), + [](const SearchNode* a, const SearchNode* b) { + return b->CompareFinal(*a); + }); + for (const SearchNode* child : refs) { + absl::StrAppend(&out, child->ToString(state), "\n"); + } + } + return out; +} + +std::string SearchNode::ToString(const State& state) const { + return absl::StrFormat( + "%6s: player: %d, prior: %5.3f, value: %6.3f, sims: %5d, outcome: %s, " + "%3d children", + (action != kInvalidAction ? state.ActionToString(player, action) + : "none"), + player, prior, (explore_count ? total_reward / explore_count : 0.), + explore_count, + (outcome.empty() + ? "none" + : absl::StrFormat("%4.1f", + outcome[player == kChancePlayerId ? 0 : player])), + children.size()); +} + +Action SearchNode::SampleFromPrior(const State& state, + Evaluator* evaluator, + std::mt19937* rng) const { + std::unique_ptr working_state = state.Clone(); + ActionsAndProbs prior = evaluator->Prior(*working_state); + Action chosen_action = SampleAction(prior, *rng).first; + return chosen_action; +} + +std::vector dirichlet_noise(int count, double alpha, + std::mt19937* rng) { + std::vector noise; + noise.reserve(count); + + std::gamma_distribution gamma(alpha, 1.0); + for (int i = 0; i < count; ++i) { + noise.emplace_back(gamma(*rng)); + } + + double sum = absl::c_accumulate(noise, 0.0); + for (double& v : noise) { + v /= sum; + } + return noise; +} + +MCTSBot::MCTSBot(const Game& game, std::shared_ptr evaluator, + double uct_c, int max_simulations, int64_t max_memory_mb, + bool solve, int seed, bool verbose, + ChildSelectionPolicy child_selection_policy, + double dirichlet_alpha, double dirichlet_epsilon, + bool dont_return_chance_node) + : uct_c_{uct_c}, + max_simulations_{max_simulations}, + max_nodes_((max_memory_mb << 20) / sizeof(SearchNode) + 1), + nodes_(0), + gc_limit_(MIN_GC_LIMIT), + verbose_(verbose), + solve_(solve), + max_utility_(game.MaxUtility()), + dirichlet_alpha_(dirichlet_alpha), + dirichlet_epsilon_(dirichlet_epsilon), + dont_return_chance_node_(dont_return_chance_node), + rng_(seed), + child_selection_policy_(child_selection_policy), + evaluator_(evaluator) { + GameType game_type = game.GetType(); + if (game_type.reward_model != GameType::RewardModel::kTerminal) + SpielFatalError("Game must have terminal rewards."); + if (game_type.dynamics != GameType::Dynamics::kSequential) + SpielFatalError("Game must have sequential turns."); +} + +Action MCTSBot::Step(const State& state) { + absl::Time start = absl::Now(); + std::unique_ptr root = MCTSearch(state); + + if (max_simulations_ <= 1) { + // sample from prior + return root->SampleFromPrior(state, evaluator_.get(), &rng_); + } else { + // return best action + const SearchNode& best = root->BestChild(); + + if (verbose_) { + double seconds = absl::ToDoubleSeconds(absl::Now() - start); + std::cerr << absl::StrFormat( + ("Finished %d sims in %.3f secs, %.1f sims/s, " + "tree size: %d nodes / %d mb."), + root->explore_count, seconds, + (root->explore_count / seconds), nodes_, + MemoryUsedMb(nodes_)) + << std::endl; + std::cerr << "Root:" << std::endl; + std::cerr << root->ToString(state) << std::endl; + std::cerr << "Children:" << std::endl; + std::cerr << root->ChildrenStr(state) << std::endl; + if (!best.children.empty()) { + std::unique_ptr chosen_state = state.Clone(); + chosen_state->ApplyAction(best.action); + std::cerr << "Children of chosen:" << std::endl; + std::cerr << best.ChildrenStr(*chosen_state) << std::endl; + } + } + return best.action; + } +} + +std::pair MCTSBot::StepWithPolicy(const State& state) { + Action action = Step(state); + return {{{action, 1.}}, action}; +} + +std::unique_ptr MCTSBot::ApplyTreePolicy( + SearchNode* root, const State& state, + std::vector* visit_path) { + visit_path->push_back(root); + std::unique_ptr working_state = state.Clone(); + SearchNode* current_node = root; + while ((!working_state->IsTerminal() && current_node->explore_count > 0) || + (working_state->IsChanceNode() && dont_return_chance_node_)) { + if (current_node->children.empty()) { + // For a new node, initialize its state, then choose a child as normal. + ActionsAndProbs legal_actions = evaluator_->Prior(*working_state); + if (current_node == root && dirichlet_alpha_ > 0) { + std::vector noise = + dirichlet_noise(legal_actions.size(), dirichlet_alpha_, &rng_); + for (int i = 0; i < legal_actions.size(); i++) { + legal_actions[i].second = + (1 - dirichlet_epsilon_) * legal_actions[i].second + + dirichlet_epsilon_ * noise[i]; + } + } + // Reduce bias from move generation order. + std::shuffle(legal_actions.begin(), legal_actions.end(), rng_); + Player player = working_state->CurrentPlayer(); + current_node->children.reserve(legal_actions.size()); + for (auto [action, prior] : legal_actions) { + current_node->children.emplace_back(action, player, prior); + } + nodes_ += current_node->children.capacity(); + } + + Action selected_action; + if (current_node->children.empty()) { + // no children, sample from prior + selected_action = current_node->SampleFromPrior(state, evaluator_.get(), + &rng_); + } else { + // look at children + SearchNode* chosen_child = nullptr; + if (working_state->IsChanceNode()) { + // For chance nodes, rollout according to chance node's probability + // distribution + Action chosen_action = + SampleAction(working_state->ChanceOutcomes(), rng_).first; + + for (SearchNode& child : current_node->children) { + if (child.action == chosen_action) { + chosen_child = &child; + break; + } + } + } else { + // Otherwise choose node with largest UCT value. + double max_value = -std::numeric_limits::infinity(); + for (SearchNode& child : current_node->children) { + double val; + switch (child_selection_policy_) { + case ChildSelectionPolicy::UCT: + val = child.UCTValue(current_node->explore_count, uct_c_); + break; + case ChildSelectionPolicy::PUCT: + val = child.PUCTValue(current_node->explore_count, uct_c_); + break; + } + if (val > max_value) { + max_value = val; + chosen_child = &child; + } + } + } + selected_action = chosen_child->action; + current_node = chosen_child; + } + + working_state->ApplyAction(selected_action); + visit_path->push_back(current_node); + } + + return working_state; +} + +std::unique_ptr MCTSBot::MCTSearch(const State& state) { + nodes_ = 1; + gc_limit_ = MIN_GC_LIMIT; + auto root = std::make_unique(kInvalidAction, + state.CurrentPlayer(), 1); + std::vector visit_path; + std::vector returns; + visit_path.reserve(64); + for (int i = 0; i < max_simulations_; ++i) { + visit_path.clear(); + returns.clear(); + + std::unique_ptr working_state = + ApplyTreePolicy(root.get(), state, &visit_path); + + bool solved; + if (working_state->IsTerminal()) { + returns = working_state->Returns(); + visit_path[visit_path.size() - 1]->outcome = returns; + solved = solve_; + } else { + returns = evaluator_->Evaluate(*working_state); + solved = false; + } + + // Propagate values back. + while (!visit_path.empty()) { + int decision_node_idx = visit_path.size() - 1; + SearchNode* node = visit_path[decision_node_idx]; + + // If it's a chance node, find the parent player id. + while (visit_path[decision_node_idx]->player == kChancePlayerId) { + decision_node_idx--; + } + + node->total_reward += returns[visit_path[decision_node_idx]->player]; + node->explore_count += 1; + visit_path.pop_back(); + + // Back up solved results as well. + if (solved && !node->children.empty()) { + Player player = node->children[0].player; + if (player == kChancePlayerId) { + // Only back up chance nodes if all have the same outcome. + // An alternative would be to back up the weighted average of + // outcomes if all children are solved, but that is less clear. + const std::vector& outcome = node->children[0].outcome; + if (!outcome.empty() && + std::all_of(node->children.begin() + 1, node->children.end(), + [&outcome](const SearchNode& c) { + return c.outcome == outcome; + })) { + node->outcome = outcome; + } else { + solved = false; + } + } else { + // If any have max utility (won?), or all children are solved, + // choose the one best for the player choosing. + const SearchNode* best = nullptr; + bool all_solved = true; + for (const SearchNode& child : node->children) { + if (child.outcome.empty()) { + all_solved = false; + } else if (best == nullptr || + child.outcome[player] > best->outcome[player]) { + best = &child; + } + } + if (best != nullptr && + (all_solved || best->outcome[player] == max_utility_)) { + node->outcome = best->outcome; + } else { + solved = false; + } + } + } + } + + if (!root->outcome.empty() || // Full game tree is solved. + root->children.size() == 1) { + break; + } + if (max_nodes_ > 1 && nodes_ >= max_nodes_) { + // Note that actual memory used as counted by ps/top might exceed the + // counted value here, possibly by a significant margin (1.5x even!). Part + // of that is not counting the outcome array, but most of that is due to + // memory fragmentation and is out of our control without writing our own + // memory manager. + if (verbose_) { + std::cerr << absl::StrFormat( + ("Approx %d mb in %d nodes after %d sims, garbage collecting with " + "limit %d ... "), + MemoryUsedMb(nodes_), nodes_, i, gc_limit_); + } + GarbageCollect(root.get()); + + // Slowly increase or decrease to target releasing half the memory. + gc_limit_ *= (nodes_ > max_nodes_ / 2 ? 1.25 : 0.9); + gc_limit_ = std::max(MIN_GC_LIMIT, gc_limit_); + if (verbose_) { + std::cerr << absl::StrFormat( + "%d mb in %d nodes remaining\n", + MemoryUsedMb(nodes_), nodes_); + } + } + } + + return root; +} + +void MCTSBot::GarbageCollect(SearchNode* node) { + if (node->children.empty()) { + return; + } + bool clear_children = node->explore_count < gc_limit_; + for (SearchNode& child : node->children) { + GarbageCollect(&child); + } + if (clear_children) { + nodes_ -= node->children.capacity(); + node->children.clear(); + node->children.shrink_to_fit(); // release the memory + } +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/mcts.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/mcts.h new file mode 100644 index 0000000..a05a78d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/mcts.h @@ -0,0 +1,228 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_MCTS_H_ +#define OPEN_SPIEL_ALGORITHMS_MCTS_H_ + +#include + +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" + +// A vanilla Monte Carlo Tree Search algorithm. +// +// This algorithm searches the game tree from the given state. +// At the leaf, the evaluator is called if the game state is not terminal. +// A total of max_simulations states are explored. +// +// At every node, the algorithm chooses the action with the highest PUCT value +// defined as: `Q/N + c * prior * sqrt(parent_N) / N`, where Q is the total +// reward after the action, and N is the number of times the action was +// explored in this position. The input parameter c controls the balance +// between exploration and exploitation; higher values of c encourage +// exploration of under-explored nodes. Unseen actions are always explored +// first. +// +// At the end of the search, the chosen action is the action that has been +// explored most often. This is the action that is returned. +// +// This implementation supports sequential n-player games, with or without +// chance nodes. All players maximize their own reward and ignore the other +// players' rewards. This corresponds to max^n for n-player games. It is the +// norm for zero-sum games, but doesn't have any special handling for +// non-zero-sum games. It doesn't have any special handling for imperfect +// information games. +// +// The implementation also supports backing up solved states, i.e. MCTS-Solver. +// The implementation is general in that it is based on a max^n backup (each +// player greedily chooses their maximum among proven children values, or there +// exists one child whose proven value is Game::MaxUtility()), so it will work +// for multiplayer, general-sum, and arbitrary payoff games (not just win/loss/ +// draw games). Also chance nodes are considered proven only if all children +// have the same value. +// +// Some references: +// - Sturtevant, An Analysis of UCT in Multi-Player Games, 2008, +// https://web.cs.du.edu/~sturtevant/papers/multi-player_UCT.pdf +// - Nijssen, Monte-Carlo Tree Search for Multi-Player Games, 2013, +// https://project.dke.maastrichtuniversity.nl/games/files/phd/Nijssen_thesis.pdf +// - Silver, AlphaGo Zero: Starting from scratch, 2017 +// https://deepmind.com/blog/article/alphago-zero-starting-scratch +// - Winands, Bjornsson, and Saito, Monte-Carlo Tree Search Solver, 2008. +// https://dke.maastrichtuniversity.nl/m.winands/documents/uctloa.pdf + +namespace open_spiel { +namespace algorithms { + +enum class ChildSelectionPolicy { + UCT, + PUCT, +}; + +// Abstract class representing an evaluation function for a game. +// The evaluation function takes in an intermediate state in the game and +// returns an evaluation of that state, which should correlate with chances of +// winning the game for player 0. +class Evaluator { + public: + virtual ~Evaluator() = default; + + // Return a value of this state for each player. + virtual std::vector Evaluate(const State& state) = 0; + + // Return a policy: the probability of the current player playing each action. + virtual ActionsAndProbs Prior(const State& state) = 0; +}; + +// A simple evaluator that returns the average outcome of playing random actions +// from the given state until the end of the game. +// n_rollouts is the number of random outcomes to be considered. +class RandomRolloutEvaluator : public Evaluator { + public: + explicit RandomRolloutEvaluator(int n_rollouts, int seed) + : n_rollouts_(n_rollouts), rng_(seed) {} + + // Runs random games, returning the average returns. + std::vector Evaluate(const State& state) override; + + // Returns equal probability for each action. + ActionsAndProbs Prior(const State& state) override; + + private: + int n_rollouts_; + std::mt19937 rng_; +}; + +// A node in the search tree for MCTS +struct SearchNode { + Action action = 0; // The action taken to get to this node. + double prior = 0; // The prior probability of playing this action. + Player player = 0; // Which player gets to make this action. + int explore_count = 0; // Number of times this node was explored. + double total_reward = 0; // Total reward passing through this node. + std::vector outcome; // The reward if each players plays perfectly. + std::vector children; // The successors to this state. + + SearchNode() {} + + SearchNode(Action action_, Player player_, double prior_) + : action(action_), prior(prior_), player(player_) {} + + // The value as returned by the UCT formula. + double UCTValue(int parent_explore_count, double uct_c) const; + + // The value as returned by the PUCT formula. + double PUCTValue(int parent_explore_count, double uct_c) const; + + // The sort order for the BestChild. + bool CompareFinal(const SearchNode& b) const; + const SearchNode& BestChild() const; + + // Return a string representation of this node, or all its children. + // The state is needed to convert the action to a string. + std::string ToString(const State& state) const; + std::string ChildrenStr(const State& state) const; + + Action SampleFromPrior(const State& state, + Evaluator* evaluator, + std::mt19937* rng) const; +}; + +// A SpielBot that uses the MCTS algorithm as its policy. +class MCTSBot : public Bot { + public: + // The evaluator is passed as a shared pointer to make it explicit that + // the same evaluator instance can be passed to multiple bots and to + // make the MCTSBot Python interface work regardless of the scope of the + // Python evaluator object. + // + // TODO(author5): The second parameter needs to be a const reference at the + // moment, even though it gets assigned to a member of type + // std::shared_ptr. This is because using a + // std::shared_ptr in the constructor leads to the Julia API test + // failing. We don't know why right now, but intend to fix this. + MCTSBot( + const Game& game, std::shared_ptr evaluator, double uct_c, + int max_simulations, + int64_t max_memory_mb, // Max memory use in megabytes. + bool solve, // Whether to back up solved states. + int seed, bool verbose, + ChildSelectionPolicy child_selection_policy = ChildSelectionPolicy::UCT, + double dirichlet_alpha = 0, double dirichlet_epsilon = 0, + bool dont_return_chance_node = false); + ~MCTSBot() = default; + + void Restart() override {} + void RestartAt(const State& state) override {} + // Run MCTS for one step, choosing the action, and printing some information. + Action Step(const State& state) override; + + // Implements StepWithPolicy. This is equivalent to calling Step, but wraps + // the action as an ActionsAndProbs with 100% probability assigned to the + // lone action. + std::pair StepWithPolicy( + const State& state) override; + + // Run MCTS on a given state, and return the resulting search tree. + std::unique_ptr MCTSearch(const State& state); + + private: + // Applies the UCT policy to play the game until reaching a leaf node. + // + // A leaf node is defined as a node that is terminal or has not been evaluated + // yet. If it reaches a node that has been evaluated before but hasn't been + // expanded, then expand it's children and continue. + // + // Args: + // root: The root node in the search tree. + // state: The state of the game at the root node. + // visit_path: A vector of nodes to be filled in descending from the root + // node to a leaf node. + // + // Returns: The state of the game at the leaf node. + std::unique_ptr ApplyTreePolicy(SearchNode* root, const State& state, + std::vector* visit_path); + + void GarbageCollect(SearchNode* node); + + double uct_c_; + int max_simulations_; + int max_nodes_; // Max nodes allowed in the tree + int nodes_; // Nodes used in the tree. + int gc_limit_; + bool verbose_; + bool solve_; + double max_utility_; + double dirichlet_alpha_; + double dirichlet_epsilon_; + bool dont_return_chance_node_; + std::mt19937 rng_; + const ChildSelectionPolicy child_selection_policy_; + std::shared_ptr evaluator_; +}; + +// Returns a vector of noise sampled from a dirichlet distribution. See: +// https://en.wikipedia.org/wiki/Dirichlet_process +std::vector dirichlet_noise(int count, double alpha, std::mt19937* rng); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_MCTS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/mcts_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/mcts_test.cc new file mode 100644 index 0000000..31b864e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/mcts_test.cc @@ -0,0 +1,183 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/mcts.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/algorithms/evaluate_bots.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" +#include "open_spiel/spiel_utils.h" + +using open_spiel::algorithms::Evaluator; +using open_spiel::algorithms::RandomRolloutEvaluator; + +namespace open_spiel { +namespace { + +constexpr double UCT_C = 2; + +std::unique_ptr InitBot(const open_spiel::Game& game, + int max_simulations, + std::shared_ptr evaluator) { + return std::make_unique( + game, std::move(evaluator), UCT_C, max_simulations, + /*max_memory_mb=*/5, /*solve=*/true, /*seed=*/42, /*verbose=*/false); +} + +void MCTSTest_CanPlayTicTacToe() { + auto game = LoadGame("tic_tac_toe"); + int max_simulations = 100; + auto evaluator = std::make_shared(20, 42); + auto bot0 = InitBot(*game, max_simulations, evaluator); + auto bot1 = InitBot(*game, max_simulations, evaluator); + auto results = + EvaluateBots(game->NewInitialState().get(), {bot0.get(), bot1.get()}, 42); + SPIEL_CHECK_EQ(results[0] + results[1], 0); +} + +void MCTSTest_CanPlayTicTacToe_LowSimulations() { + auto game = LoadGame("tic_tac_toe"); + // Setting max_simulations to 0 or 1 is equivalent to sampling from the prior. + for (const int max_simulations : {0, 1}) { + auto evaluator = std::make_shared(20, 42); + auto bot0 = InitBot(*game, max_simulations, evaluator); + auto bot1 = InitBot(*game, max_simulations, evaluator); + auto results = EvaluateBots(game->NewInitialState().get(), + {bot0.get(), bot1.get()}, 42); + SPIEL_CHECK_EQ(results[0] + results[1], 0); + } +} + +void MCTSTest_CanPlayBothSides() { + auto game = LoadGame("tic_tac_toe"); + int max_simulations = 100; + auto evaluator = std::make_shared(20, 42); + auto bot = InitBot(*game, max_simulations, evaluator); + auto results = + EvaluateBots(game->NewInitialState().get(), {bot.get(), bot.get()}, 42); + SPIEL_CHECK_EQ(results[0] + results[1], 0); +} + +void MCTSTest_CanPlaySinglePlayer() { + auto game = LoadGame("catch"); + int max_simulations = 100; + auto evaluator = std::make_shared(20, 42); + auto bot = InitBot(*game, max_simulations, evaluator); + auto results = EvaluateBots(game->NewInitialState().get(), {bot.get()}, 42); + SPIEL_CHECK_GT(results[0], 0); +} + +void MCTSTest_CanPlayThreePlayerStochasticGames() { + auto game = LoadGame("pig(players=3,winscore=20,horizon=30)"); + int max_simulations = 1000; + auto evaluator = std::make_shared(20, 42); + auto bot0 = InitBot(*game, max_simulations, evaluator); + auto bot1 = InitBot(*game, max_simulations, evaluator); + auto bot2 = InitBot(*game, max_simulations, evaluator); + auto results = EvaluateBots(game->NewInitialState().get(), + {bot0.get(), bot1.get(), bot2.get()}, 42); + SPIEL_CHECK_FLOAT_EQ(results[0] + results[1] + results[2], 0); +} + +open_spiel::Action GetAction(const open_spiel::State& state, + const absl::string_view action_str) { + for (open_spiel::Action action : state.LegalActions()) { + if (action_str == state.ActionToString(state.CurrentPlayer(), action)) + return action; + } + open_spiel::SpielFatalError(absl::StrCat("Illegal action: ", action_str)); +} + +std::pair, std::unique_ptr> +SearchTicTacToeState(const absl::string_view initial_actions) { + auto game = LoadGame("tic_tac_toe"); + std::unique_ptr state = game->NewInitialState(); + for (const auto& action_str : absl::StrSplit(initial_actions, ' ')) { + state->ApplyAction(GetAction(*state, action_str)); + } + auto evaluator = std::make_shared(20, 42); + algorithms::MCTSBot bot(*game, evaluator, UCT_C, + /*max_simulations=*/ 10000, + /*max_memory_mb=*/ 10, + /*solve=*/ true, + /*seed=*/ 42, + /*verbose=*/ false); + return {bot.MCTSearch(*state), std::move(state)}; +} + +void MCTSTest_SolveDraw() { + auto [root, state] = SearchTicTacToeState("x(1,1) o(0,0) x(2,2)"); + SPIEL_CHECK_EQ(state->ToString(), "o..\n.x.\n..x"); + SPIEL_CHECK_EQ(root->outcome[root->player], 0); + for (const algorithms::SearchNode& c : root->children) + SPIEL_CHECK_LE(c.outcome[c.player], 0); // No winning moves. + const algorithms::SearchNode& best = root->BestChild(); + SPIEL_CHECK_EQ(best.outcome[best.player], 0); + std::string action_str = state->ActionToString(best.player, best.action); + if (action_str != "o(2,0)" && action_str != "o(0,2)") // All others lose. + SPIEL_CHECK_EQ(action_str, "o(2,0)"); // "o(0,2)" is also valid. +} + +void MCTSTest_SolveLoss() { + auto [root, state] = + SearchTicTacToeState("x(1,1) o(0,0) x(2,2) o(0,1) x(0,2)"); + SPIEL_CHECK_EQ(state->ToString(), "oox\n.x.\n..x"); + SPIEL_CHECK_EQ(root->outcome[root->player], -1); + for (const algorithms::SearchNode& c : root->children) + SPIEL_CHECK_EQ(c.outcome[c.player], -1); // All losses. +} + +void MCTSTest_SolveWin() { + auto [root, state] = SearchTicTacToeState("x(0,1) o(2,2)"); + SPIEL_CHECK_EQ(state->ToString(), ".x.\n...\n..o"); + SPIEL_CHECK_EQ(root->outcome[root->player], 1); + const algorithms::SearchNode& best = root->BestChild(); + SPIEL_CHECK_EQ(best.outcome[best.player], 1); + SPIEL_CHECK_EQ(state->ActionToString(best.player, best.action), "x(0,2)"); +} + +void MCTSTest_GarbageCollect() { + auto game = LoadGame("tic_tac_toe"); + std::unique_ptr state = game->NewInitialState(); + auto evaluator = std::make_shared(1, 42); + algorithms::MCTSBot bot(*game, evaluator, UCT_C, + /*max_simulations=*/ 1000000, + /*max_memory_mb=*/ 1, + /*solve=*/ true, + /*seed=*/ 42, + /*verbose=*/ true); // Verify the log output. + std::unique_ptr root = bot.MCTSearch(*state); + SPIEL_CHECK_TRUE(root->outcome.size() == 2 || + root->explore_count == 1000000); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::MCTSTest_CanPlayTicTacToe(); + open_spiel::MCTSTest_CanPlayTicTacToe_LowSimulations(); + open_spiel::MCTSTest_CanPlayBothSides(); + open_spiel::MCTSTest_CanPlaySinglePlayer(); + open_spiel::MCTSTest_CanPlayThreePlayerStochasticGames(); + open_spiel::MCTSTest_SolveDraw(); + open_spiel::MCTSTest_SolveLoss(); + open_spiel::MCTSTest_SolveWin(); + open_spiel::MCTSTest_GarbageCollect(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/minimax.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/minimax.cc new file mode 100644 index 0000000..6efb52a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/minimax.cc @@ -0,0 +1,294 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/minimax.h" + +#include // std::max +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +// An alpha-beta algorithm. +// +// Implements a min-max algorithm with alpha-beta pruning. +// See for example https://en.wikipedia.org/wiki/Alpha-beta_pruning +// +// Arguments: +// state: The current state node of the game. +// depth: The maximum depth for the min/max search. +// alpha: best value that the MAX player can guarantee (if the value is <= +// alpha, the MAX player will avoid it). +// beta: the best value that the MIN currently can guarantee (if the value is +// >= than beta, the MIN player will avoid it). +// value_function: An optional function mapping a Spiel `State` to a +// numerical value, to be used as the value for a node when we reach +// `depth_limit` and the node is not terminal. +// maximizing_player_id: The id of the MAX player. The other player is assumed +// to be MIN. +// use_undo: use the State::Undo for faster run-time. +// +// Returns: +// The optimal value of the sub-game starting in state (given alpha/beta). +double _alpha_beta(State* state, int depth, double alpha, double beta, + std::function value_function, + Player maximizing_player, Action* best_action, + bool use_undo) { + if (state->IsTerminal()) { + return state->PlayerReturn(maximizing_player); + } + + if (depth == 0 && !value_function) { + SpielFatalError( + "We assume we can walk the full depth of the tree. " + "Try increasing depth or provide a value_function."); + } + + if (depth == 0) { + return value_function(*state); + } + + Player player = state->CurrentPlayer(); + if (player == maximizing_player) { + double value = -std::numeric_limits::infinity(); + + for (Action action : state->LegalActions()) { + double child_value = 0; + if (use_undo) { + state->ApplyAction(action); + child_value = + _alpha_beta(state, /*depth=*/depth - 1, /*alpha=*/alpha, + /*beta=*/beta, value_function, maximizing_player, + /*best_action=*/nullptr, use_undo); + state->UndoAction(player, action); + } else { + std::unique_ptr child_state = state->Child(action); + child_value = + _alpha_beta(child_state.get(), /*depth=*/depth - 1, /*alpha=*/alpha, + /*beta=*/beta, value_function, maximizing_player, + /*best_action=*/nullptr, use_undo); + } + + if (child_value > value) { + value = child_value; + if (best_action != nullptr) { + *best_action = action; + } + } + + alpha = std::max(alpha, value); + if (alpha >= beta) { + break; // beta cut-off + } + } + + return value; + } else { + double value = std::numeric_limits::infinity(); + + for (Action action : state->LegalActions()) { + double child_value = 0; + if (use_undo) { + state->ApplyAction(action); + child_value = + _alpha_beta(state, /*depth=*/depth - 1, /*alpha=*/alpha, + /*beta=*/beta, value_function, maximizing_player, + /*best_action=*/nullptr, use_undo); + state->UndoAction(player, action); + } else { + std::unique_ptr child_state = state->Child(action); + child_value = + _alpha_beta(child_state.get(), /*depth=*/depth - 1, /*alpha=*/alpha, + /*beta=*/beta, value_function, maximizing_player, + /*best_action=*/nullptr, use_undo); + } + + if (child_value < value) { + value = child_value; + if (best_action != nullptr) { + *best_action = action; + } + } + + beta = std::min(beta, value); + if (alpha >= beta) { + break; // alpha cut-off + } + } + + return value; + } +} + +// Expectiminimax algorithm. +// +// Runs expectiminimax until the specified depth. +// See https://en.wikipedia.org/wiki/Expectiminimax for details. +// +// Arguments: +// state: The state to start the search from. +// depth: The depth of the search (not counting chance nodes). +// value_function: A value function, taking in a state and returning a value, +// in terms of the maximizing_player_id. +// maximizing_player_id: The player running the search (current player at root +// of the search tree). +// +// Returns: +// The optimal value of the sub-game starting in state. +double _expectiminimax(const State* state, int depth, + std::function value_function, + Player maximizing_player, Action* best_action) { + if (state->IsTerminal()) { + return state->PlayerReturn(maximizing_player); + } + + if (depth == 0 && !value_function) { + SpielFatalError( + "We assume we can walk the full depth of the tree. " + "Try increasing depth or provide a value_function."); + } + + if (depth == 0) { + return value_function(*state); + } + + Player player = state->CurrentPlayer(); + if (state->IsChanceNode()) { + double value = 0; + for (const auto& actionprob : state->ChanceOutcomes()) { + std::unique_ptr child_state = state->Child(actionprob.first); + double child_value = + _expectiminimax(child_state.get(), depth, value_function, + maximizing_player, /*best_action=*/nullptr); + value += actionprob.second * child_value; + } + return value; + } else if (player == maximizing_player) { + double value = -std::numeric_limits::infinity(); + + for (Action action : state->LegalActions()) { + std::unique_ptr child_state = state->Child(action); + double child_value = _expectiminimax(child_state.get(), + /*depth=*/depth - 1, value_function, + maximizing_player, + /*best_action=*/nullptr); + + if (child_value > value) { + value = child_value; + if (best_action != nullptr) { + *best_action = action; + } + } + } + return value; + } else { + double value = std::numeric_limits::infinity(); + + for (Action action : state->LegalActions()) { + std::unique_ptr child_state = state->Child(action); + double child_value = _expectiminimax(child_state.get(), + /*depth=*/depth - 1, value_function, + maximizing_player, + /*best_action=*/nullptr); + + if (child_value < value) { + value = child_value; + if (best_action != nullptr) { + *best_action = action; + } + } + } + return value; + } +} +} // namespace + +std::pair AlphaBetaSearch( + const Game& game, const State* state, + std::function value_function, int depth_limit, + Player maximizing_player, bool use_undo) { + SPIEL_CHECK_LE(game.NumPlayers(), 2); + + // Check to ensure the correct setup intended for this algorithm. + // Note: do no check perfect vs. imperfect information to support use of + // minimax as a subroutine of PIMC. + GameType game_info = game.GetType(); + SPIEL_CHECK_EQ(game_info.chance_mode, GameType::ChanceMode::kDeterministic); + SPIEL_CHECK_EQ(game_info.dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(game_info.utility, GameType::Utility::kZeroSum); + SPIEL_CHECK_EQ(game_info.reward_model, GameType::RewardModel::kTerminal); + + std::unique_ptr search_root; + if (state == nullptr) { + search_root = game.NewInitialState(); + } else { + search_root = state->Clone(); + } + + if (maximizing_player == kInvalidPlayer) { + maximizing_player = search_root->CurrentPlayer(); + } + + double infinity = std::numeric_limits::infinity(); + Action best_action = kInvalidAction; + double value = _alpha_beta( + search_root.get(), /*depth=*/depth_limit, /*alpha=*/-infinity, + /*beta=*/infinity, value_function, maximizing_player, &best_action, + use_undo); + + return {value, best_action}; +} + +std::pair ExpectiminimaxSearch( + const Game& game, const State* state, + std::function value_function, int depth_limit, + Player maximizing_player) { + SPIEL_CHECK_LE(game.NumPlayers(), 2); + + GameType game_info = game.GetType(); + SPIEL_CHECK_EQ(game_info.chance_mode, + GameType::ChanceMode::kExplicitStochastic); + SPIEL_CHECK_EQ(game_info.information, + GameType::Information::kPerfectInformation); + SPIEL_CHECK_EQ(game_info.dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(game_info.utility, GameType::Utility::kZeroSum); + SPIEL_CHECK_EQ(game_info.reward_model, GameType::RewardModel::kTerminal); + + std::unique_ptr search_root; + if (state == nullptr) { + search_root = game.NewInitialState(); + } else { + search_root = state->Clone(); + } + + if (maximizing_player == kInvalidPlayer) { + SPIEL_CHECK_FALSE(search_root->IsChanceNode()); + maximizing_player = search_root->CurrentPlayer(); + } + + Action best_action = kInvalidAction; + double value = + _expectiminimax(search_root.get(), /*depth=*/depth_limit, value_function, + maximizing_player, &best_action); + + return {value, best_action}; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/minimax.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/minimax.h new file mode 100644 index 0000000..ec506d1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/minimax.h @@ -0,0 +1,77 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_MINMAX_H_ +#define OPEN_SPIEL_ALGORITHMS_MINMAX_H_ + +#include +#include + +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +// Solves deterministic, 2-players, perfect-information 0-sum game. +// +// Arguments: +// game: The game to analyze, as returned by `LoadGame`. +// state: The state to start from. If nullptr, starts from initial state. +// value_function: An optional function mapping a Spiel `State` to a +// numerical value to the maximizing player, to be used as the value for a +// node when we reach `depth_limit` and the node is not terminal. Use +// `nullptr` for no value function. +// depth_limit: The maximum depth to search over. When this depth is +// reached, an exception will be raised. +// maximizing_player_id: The id of the MAX player. The other player is assumed +// to be MIN. Passing in kInvalidPlayer will set this to the search root's +// current player. + +// Returns: +// A pair of the value of the game for the maximizing player when both +// players play optimally, along with the action that achieves this value. + +std::pair AlphaBetaSearch( + const Game& game, const State* state, + std::function value_function, int depth_limit, + Player maximizing_player, bool use_undo = true); + +// Solves stochastic, 2-players, perfect-information 0-sum game. +// +// Arguments: +// game: The game to analyze, as returned by `LoadGame`. +// state: The state to start from. If nullptr, starts from initial state. +// value_function: An optional function mapping a Spiel `State` to a +// numerical value to the maximizing player, to be used as the value for a +// node when we reach `depth_limit` and the node is not terminal. Use +// `nullptr` or {} for no value function. +// depth_limit: The maximum depth to search over (not counting chance nodes). +// When this depth is reached, an exception will be raised. +// maximizing_player_id: The id of the MAX player. The other player is assumed +// to be MIN. Passing in kInvalidPlayer will set this to the search root's +// current player (which must not be a chance node). + +// Returns: +// A pair of the value of the game for the maximizing player when both +// players play optimally, along with the action that achieves this value. + +std::pair ExpectiminimaxSearch( + const Game& game, const State* state, + std::function value_function, int depth_limit, + Player maximizing_player); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_MINMAX_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/minimax_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/minimax_test.cc new file mode 100644 index 0000000..0110d6a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/minimax_test.cc @@ -0,0 +1,93 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/minimax.h" + +#include + +#include "open_spiel/games/pig/pig.h" +#include "open_spiel/games/tic_tac_toe/tic_tac_toe.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +// See also the examples/minimax_example.cc for example usage. + +void AlphaBetaSearchTest_TicTacToe() { + std::shared_ptr game = LoadGame("tic_tac_toe"); + std::pair value_and_action = + AlphaBetaSearch(*game, nullptr, {}, -1, kInvalidPlayer); + SPIEL_CHECK_EQ(0.0, value_and_action.first); +} + +void AlphaBetaSearchTest_TicTacToe_Win() { + std::shared_ptr game = LoadGame("tic_tac_toe"); + std::unique_ptr state = game->NewInitialState(); + state->ApplyAction(4); + state->ApplyAction(1); + + // Construct: + // .o. + // .x. + // ... + std::pair value_and_action = + AlphaBetaSearch(*game, state.get(), {}, -1, kInvalidPlayer); + SPIEL_CHECK_EQ(1.0, value_and_action.first); +} + +void AlphaBetaSearchTest_TicTacToe_Loss() { + std::shared_ptr game = LoadGame("tic_tac_toe"); + std::unique_ptr state = game->NewInitialState(); + + // Construct: + // ... + // xox + // ..o + state->ApplyAction(5); + state->ApplyAction(4); + state->ApplyAction(3); + state->ApplyAction(8); + + std::pair value_and_action = + AlphaBetaSearch(*game, state.get(), {}, -1, kInvalidPlayer); + SPIEL_CHECK_EQ(-1.0, value_and_action.first); +} + +int FirstPlayerAdvantage(const State& state) { + const auto& pstate = down_cast(state); + return pstate.score(0) - pstate.score(1); +} + +void ExpectiminimaxSearchTest_Pig() { + std::shared_ptr game = + LoadGame("pig", {{"diceoutcomes", GameParameter(3)}}); + std::pair value_and_action = ExpectiminimaxSearch( + *game, nullptr, FirstPlayerAdvantage, 2, kInvalidPlayer); + SPIEL_CHECK_EQ(1.0 / 3 * 2 + 1.0 / 3 * 3, value_and_action.first); + SPIEL_CHECK_EQ(/*kRoll=*/0, value_and_action.second); +} + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::algorithms::AlphaBetaSearchTest_TicTacToe(); + open_spiel::algorithms::AlphaBetaSearchTest_TicTacToe_Win(); + open_spiel::algorithms::AlphaBetaSearchTest_TicTacToe_Loss(); + open_spiel::algorithms::ExpectiminimaxSearchTest_Pig(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/nfg_writer.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/nfg_writer.cc new file mode 100644 index 0000000..8c71a9e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/nfg_writer.cc @@ -0,0 +1,72 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/nfg_writer.h" + +#include + +#include "open_spiel/abseil-cpp/absl/strings/ascii.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/normal_form_game.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +const std::string GameToNFGString(const Game& game) { + // NFG 1 R "Selten (IJGT, 75), Figure 2, normal form" + // { "Player 1" "Player 2" } { 3 2 } + // 1 1 0 2 0 2 1 1 0 3 2 0 + const auto* nfg = dynamic_cast(&game); + if (nfg == nullptr) { + SpielFatalError("Must be a normal-form game"); + } + + int num_players = nfg->NumPlayers(); + std::vector> legal_actions(num_players); + std::unique_ptr initial_state = nfg->NewInitialState(); + for (Player player = 0; player < num_players; ++player) { + legal_actions[player] = initial_state->LegalActions(player); + } + + // Line 1. + std::string nfg_text = + absl::StrCat("NFG 1 R \"OpenSpiel export of ", nfg->ToString(), "\"\n"); + + // Line 2. + absl::StrAppend(&nfg_text, "{"); + for (Player p = 0; p < num_players; ++p) { + absl::StrAppend(&nfg_text, " \"Player ", p, "\""); + } + absl::StrAppend(&nfg_text, " } {"); + for (Player p = 0; p < num_players; ++p) { + absl::StrAppend(&nfg_text, " ", legal_actions[p].size()); + } + absl::StrAppend(&nfg_text, " }\n\n"); + + // Now the payoffs. + for (auto flat_joint_action : initial_state->LegalActions()) { + std::vector returns = + initial_state->Child(flat_joint_action)->Returns(); + for (Player p = 0; p < returns.size(); ++p) { + absl::StrAppendFormat(&nfg_text, "%.15g ", returns[p]); + } + absl::StripAsciiWhitespace(&nfg_text); + absl::StrAppend(&nfg_text, "\n"); + } + + return nfg_text; +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/nfg_writer.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/nfg_writer.h new file mode 100644 index 0000000..0eae617 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/nfg_writer.h @@ -0,0 +1,32 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_NFG_WRITER_H_ +#define OPEN_SPIEL_ALGORITHMS_NFG_WRITER_H_ + +#include + +#include "open_spiel/spiel.h" + +namespace open_spiel { + +// Functions to export normal-form games to Gambit's .nfg format. +// http://www.gambit-project.org/gambit13/formats.html#the-strategic-game-nfg-file-format-payoff-version + +// Get the string representation of this normal-form game. +const std::string GameToNFGString(const Game& game); + +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_NFG_WRITER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/nfg_writer_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/nfg_writer_test.cc new file mode 100644 index 0000000..00dbe58 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/nfg_writer_test.cc @@ -0,0 +1,85 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/nfg_writer.h" + +#include "open_spiel/normal_form_game.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace { + +void BasicNFSWriterTestRPS() { + constexpr const char* kRPSNFG = + R"###(NFG 1 R "OpenSpiel export of matrix_rps()" +{ "Player 0" "Player 1" } { 3 3 } + +0 0 +1 -1 +-1 1 +-1 1 +0 0 +1 -1 +1 -1 +-1 1 +0 0 +)###"; + + std::shared_ptr rps = LoadGame("matrix_rps"); + std::string rps_nfg_text = GameToNFGString(*rps); + SPIEL_CHECK_EQ(rps_nfg_text, kRPSNFG); +} + +void BasicNFSWriterTestPD() { + constexpr const char* kPDNFG = R"###(NFG 1 R "OpenSpiel export of matrix_pd()" +{ "Player 0" "Player 1" } { 2 2 } + +5 5 +10 0 +0 10 +1 1 +)###"; + + std::shared_ptr pd = LoadGame("matrix_pd"); + std::string pd_nfg_text = GameToNFGString(*pd); + SPIEL_CHECK_EQ(pd_nfg_text, kPDNFG); +} + +void BasicNFSWriterTestMP3P() { + constexpr const char* kMP3PNFG = + R"###(NFG 1 R "OpenSpiel export of matching_pennies_3p()" +{ "Player 0" "Player 1" "Player 2" } { 2 2 2 } + +1 1 -1 +-1 1 1 +-1 -1 -1 +1 -1 1 +1 -1 1 +-1 -1 -1 +-1 1 1 +1 1 -1 +)###"; + + std::shared_ptr mp3p = LoadGame("matching_pennies_3p"); + std::string mp3p_nfg_text = GameToNFGString(*mp3p); + SPIEL_CHECK_EQ(mp3p_nfg_text, kMP3PNFG); +} +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::BasicNFSWriterTestRPS(); + open_spiel::BasicNFSWriterTestPD(); + open_spiel::BasicNFSWriterTestMP3P(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/observation_history.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/observation_history.cc new file mode 100644 index 0000000..a1297cd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/observation_history.cc @@ -0,0 +1,328 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/observation_history.h" + +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { + +// ----------------------------------------------------------------------------- +// ActionObservationHistory +// ----------------------------------------------------------------------------- + +// TODO(author13) Switch to the new Observation API +ActionObservationHistory::ActionObservationHistory(Player player, + const State& target) + : player_(player) { + SPIEL_CHECK_GE(player_, 0); + SPIEL_CHECK_LT(player_, target.NumPlayers()); + SPIEL_CHECK_TRUE(target.GetGame()->GetType().provides_observation_string); + + const std::vector& history = target.FullHistory(); + history_.reserve(history.size()); + + std::unique_ptr state = target.GetGame()->NewInitialState(); + history_.push_back({absl::nullopt, state->ObservationString(player)}); + for (int i = 0; i < history.size(); i++) { + const auto& [history_player, action] = history[i]; + const bool is_acting = state->CurrentPlayer() == player; + state->ApplyAction(action); + history_.push_back({ + is_acting ? action : static_cast>(absl::nullopt), + state->ObservationString(player) + }); + } +} + +ActionObservationHistory::ActionObservationHistory(const State& target) + : ActionObservationHistory(target.CurrentPlayer(), target) {} + +ActionObservationHistory::ActionObservationHistory( + Player player, + std::vector, std::string>> history) + : player_(player), history_(std::move(history)) { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_FALSE(history_.empty()); // There is always an obs for root node. + SPIEL_CHECK_EQ(history_[0].first, absl::nullopt); // No action available. +} + +int ActionObservationHistory::MoveNumber() const { + SPIEL_CHECK_FALSE(history_.empty()); + SPIEL_CHECK_EQ(history_.at(0).first, absl::nullopt); + return history_.size() - 1; +} + +const std::string& ActionObservationHistory::ObservationAt(int time) const { + return history_.at(time).second; +} + +absl::optional ActionObservationHistory::ActionAt(int time) const { + return history_.at(time).first; +} + +bool ActionObservationHistory::CorrespondsTo(Player pl, + const State& state) const { + if (MoveNumber() != state.MoveNumber()) return false; + bool equal = CheckStateCorrespondenceInSimulation(pl, state, MoveNumber()); + SPIEL_CHECK_TRUE(!equal || IsPrefixOf(pl, state)); + SPIEL_CHECK_TRUE(!equal || IsExtensionOf(pl, state)); + return equal; +} + +bool ActionObservationHistory::CorrespondsTo( + const ActionObservationHistory& other) const { + bool equal = player_ == other.player_ && history_ == other.history_; + SPIEL_CHECK_TRUE(!equal || IsPrefixOf(other)); + SPIEL_CHECK_TRUE(!equal || IsExtensionOf(other)); + return equal; +} + +bool ActionObservationHistory::IsPrefixOf( + const ActionObservationHistory& other) const { + if (player_ != other.player_) return false; + + if (CorrespondsToInitialState()) return true; + if (other.CorrespondsToInitialState()) return false; + + const auto& a = history_; + const auto& b = other.history_; + if (a.size() > b.size()) return false; + if (a.size() == b.size()) return a == b; + return std::equal(a.begin(), a.end(), b.begin()); +} + +bool ActionObservationHistory::IsPrefixOf(Player pl, const State& state) const { + const std::shared_ptr game = state.GetGame(); + SPIEL_CHECK_TRUE(game->GetType().provides_observation_string); + + if (CorrespondsToInitialState()) return true; + // Cannot be prefix if state is earlier. + if (MoveNumber() > state.MoveNumber()) return false; + + return CheckStateCorrespondenceInSimulation(pl, state, MoveNumber()); +} + +bool ActionObservationHistory::IsExtensionOf( + const ActionObservationHistory& other) const { + return other.IsPrefixOf(*this); +} + +bool ActionObservationHistory::IsExtensionOf(Player pl, + const State& state) const { + const std::shared_ptr game = state.GetGame(); + SPIEL_CHECK_TRUE(game->GetType().provides_observation_string); + + if (state.IsInitialState()) return true; + // Cannot be extension if state is later. + if (state.MoveNumber() > MoveNumber()) return false; + + // Check the latest observation is identical -- most observations + // will differ only in the last items. + if (state.ObservationString(pl) != ObservationAt(state.MoveNumber())) + return false; + + return CheckStateCorrespondenceInSimulation(pl, state, state.MoveNumber()); +} + +void ActionObservationHistory::Extend(const absl::optional action, + const std::string& observation_string) { + history_.push_back({action, observation_string}); +} + +void ActionObservationHistory::RemoveLast() { + SPIEL_CHECK_GT(history_.size(), 0); + history_.pop_back(); +} + +bool ActionObservationHistory::CheckStateCorrespondenceInSimulation( + Player pl, const State& state, int until_time) const { + const std::vector& state_history = state.FullHistory(); + std::unique_ptr simulation = state.GetGame()->NewInitialState(); + + int i = 0; // The index for state_history access. + int j = 1; // The index for history_ access. + while (simulation->MoveNumber() < until_time) { + SPIEL_CHECK_LT(i, state_history.size()); + SPIEL_CHECK_LT(j, history_.size()); + SPIEL_CHECK_FALSE(simulation->IsTerminal()); + + if (simulation->CurrentPlayer() == pl) { + if (history_[j].first != state_history[i].action) return false; + } else { + if (history_[j].first != absl::nullopt) return false; + } + + simulation->ApplyAction(state_history[i].action); + i++; + + if (history_[j].second != simulation->ObservationString(pl)) return false; + j++; + } + return true; +} + +std::string ActionObservationHistory::ToString() const { + std::string s; + for (int i = 0; i < history_.size(); i++) { + const auto& action_observation = history_[i]; + if (i > 0) absl::StrAppend(&s, ", "); + absl::StrAppend(&s, "(action=", + (action_observation.first == absl::nullopt + ? "None" + : std::to_string(*action_observation.first)), + ", observation=\"", action_observation.second, "\")"); + } + return s; +} + +// ----------------------------------------------------------------------------- +// PublicObservationHistory +// ----------------------------------------------------------------------------- + +PublicObservationHistory::PublicObservationHistory(const State& target) + : observer_(target.GetGame()->MakeObserver( + IIGObservationType{/*public_info*/true, + /*perfect_recall*/false, + /*private_info*/PrivateInfoType::kNone}, + {})) { + history_.reserve(target.FullHistory().size()); + + std::unique_ptr state = target.GetGame()->NewInitialState(); + // Use FullHistory even though we don't need the player -- prevent + // doing a copy. + for (const auto& [_, action] : target.FullHistory()) { + history_.push_back(observer_->StringFrom(*state, kDefaultPlayerId)); + state->ApplyAction(action); + } + history_.push_back(observer_->StringFrom(*state, kDefaultPlayerId)); +} + +PublicObservationHistory::PublicObservationHistory( + std::vector history) + : history_(std::move(history)) { + SPIEL_CHECK_FALSE(history_.empty()); +} + +int PublicObservationHistory::MoveNumber() const { + SPIEL_CHECK_FALSE(history_.empty()); + return history_.size() - 1; +} + +const std::string& PublicObservationHistory::ObservationAt(int time) const { + return history_.at(time); +} + +bool PublicObservationHistory::CorrespondsTo( + const PublicObservationHistory& other) const { + return history_ == other.history_; +} + +bool PublicObservationHistory::CorrespondsTo(const State& state) const { + if (MoveNumber() != state.MoveNumber()) return false; + bool equal = CheckStateCorrespondenceInSimulation(state, MoveNumber()); + SPIEL_CHECK_TRUE(!equal || IsPrefixOf(state)); + SPIEL_CHECK_TRUE(!equal || IsExtensionOf(state)); + return equal; +} + +bool PublicObservationHistory::IsPrefixOf( + const PublicObservationHistory& other) const { + if (CorrespondsToInitialState()) return true; + if (other.CorrespondsToInitialState()) return false; + + const auto& a = history_; + const auto& b = other.history_; + if (a.size() > b.size()) return false; + if (a.size() == b.size()) return a == b; + return std::equal(a.begin(), a.end(), b.begin()); +} + +bool PublicObservationHistory::IsPrefixOf(const State& state) const { + if (CorrespondsToInitialState()) return true; + // Cannot be prefix if state is earlier. + if (state.MoveNumber() < MoveNumber()) return false; + + return CheckStateCorrespondenceInSimulation(state, MoveNumber()); +} + +bool PublicObservationHistory::IsExtensionOf( + const PublicObservationHistory& other) const { + return other.IsPrefixOf(*this); +} + +bool PublicObservationHistory::IsExtensionOf(const State& state) const { + if (state.MoveNumber() > MoveNumber()) return false; + + // Check the latest observation is identical -- most observations + // will differ only in the last items. + if (observer_->StringFrom(state, kDefaultPlayerId) != + ObservationAt(state.MoveNumber())) + return false; + + return CheckStateCorrespondenceInSimulation(state, state.MoveNumber()); +} + +std::string PublicObservationHistory::ToString() const { + return absl::StrJoin(history_, ", "); +} + +void PublicObservationHistory::push_back(const std::string& observation) { + SPIEL_CHECK_FALSE(observation.empty()); + history_.push_back(observation); +} + +bool PublicObservationHistory::CheckStateCorrespondenceInSimulation( + const State& state, int until_time) const { + const std::vector& state_history = state.FullHistory(); + std::unique_ptr simulation = state.GetGame()->NewInitialState(); + + int i = 0; // The index for state_history access. + int j = 1; // The index for history_ access. + while (simulation->MoveNumber() < until_time) { + SPIEL_CHECK_LT(i, state_history.size()); + SPIEL_CHECK_LT(j, history_.size()); + SPIEL_CHECK_FALSE(simulation->IsTerminal()); + + simulation->ApplyAction(state_history[i].action); + i++; + + if (history_.at(j) != observer_->StringFrom(*simulation, kDefaultPlayerId)) + return false; + j++; + } + return true; +} + +// ----------------------------------------------------------------------------- +// Streaming. +// ----------------------------------------------------------------------------- + +std::ostream& operator<<(std::ostream& os, + const ActionObservationHistory& aoh) { + return os << aoh.ToString(); +} + +std::ostream& operator<<(std::ostream& os, + const PublicObservationHistory& poh) { + return os << poh.ToString(); +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/observation_history.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/observation_history.h new file mode 100644 index 0000000..2d1e253 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/observation_history.h @@ -0,0 +1,238 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_FOG_OBSERVATION_HISTORY_H_ +#define OPEN_SPIEL_FOG_OBSERVATION_HISTORY_H_ + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { + +// Action-Observation histories partition the game tree in the same way +// as information states, but they contain more structured information. +// Some algorithms use this structured information for targeted traversal +// of the imperfect information tree. +// +// Note that in the FOG paper, Action-Observation history $s$ for player $i$ +// at world history $h$ is defined precisely as +// +// s_i(h) := (O_i^0, a_i^0, O_i^1, a_i^1, ... O_i^{t-1}, a_i^{t-1}, O_i^t) +// +// and this can be interpreted for simultaneous-move games as +// +// Initial Observation + List of Pair((Action, Observation)) +// +// However in OpenSpiel the player is not always acting, as in sequential games, +// but we'd like to support those as well. So we make a compromise to just have +// AOH as a vector of *optional* action and observation. +class ActionObservationHistory { + private: + // Player to which this Action-Observation history belongs. + const Player player_; + + // Actual Action-Observation history. + std::vector, std::string>> history_; + + public: + // Constructs an Action-Observation history for a given player at the target + // state. This method can be called only if the game provides an + // implementation of ObservationString(). + // + // Note that this constructor makes a traversal of the state's history + // to collect player's observations and this can be expensive. + ActionObservationHistory(Player player, const State& target); + + // Constructs an Action-Observation history for the current player + // at the target state. + ActionObservationHistory(const State& target); + + // Constructs an Action-Observation history "manually" from history vector. + ActionObservationHistory( + Player player, + std::vector, std::string>> history); + + ActionObservationHistory(const ActionObservationHistory&) = default; + ~ActionObservationHistory() = default; + + const std::vector, std::string>>& History() + const { return history_; } + Player GetPlayer() const { return player_; } + + // Gets the current move number (time on the clock) - this allows to relate + // the "depth"of Action-Observation history to the "depth" of State, + // as it should correspond to the State::MoveNumber(). + int MoveNumber() const; + + // Returns the player's observation (i.e. public+private observation) + // at the given time. Root node has time 0. + const std::string& ObservationAt(int time) const; + + // Returns the action at the given time. + // If player was not acting at requested time, returns a nullopt. + absl::optional ActionAt(int time) const; + + // Does the Action-Observation history correspond to the initial state? + bool CorrespondsToInitialState() const { return MoveNumber() == 0; } + + // Does the Action-Observation history correspond to the other + // Action-Observation history? This is just like an equality operator. + bool CorrespondsTo(const ActionObservationHistory& other) const; + + // Does the Action-Observation history correspond to the requested state? + // + // In other words, if we constructed Action-Observation history for the state, + // would that correspond to this Action-Observation history? + // + // As in the following: + // + // CorrespondsTo(pl, state) == CorrespondsTo( + // ActionObservationHistory(pl, state)) + // + // This method is provided so that you do not need to construct + // Action-Observation History explicitly and is more efficient. + // There can be state1 != state2 but AOH(state1) == AOH(state2). + bool CorrespondsTo(Player pl, const State& state) const; + + // Is the current Action-Observation history prefix (or equal) of the other? + bool IsPrefixOf(const ActionObservationHistory& other) const; + + // Is the current Action-Observation history prefix (or equal) of the + // Action-Observation history that we could construct from the State? + bool IsPrefixOf(Player pl, const State& state) const; + + // Is the current Action-Observation history extension (or equal) + // of the other one? + bool IsExtensionOf(const ActionObservationHistory& other) const; + + // Is the current Action-Observation history extension (or equal) + // of the Action-Observation history that we could construct from the State? + bool IsExtensionOf(Player pl, const State& state) const; + + void Extend(const absl::optional action, + const std::string& observation_string); + + void RemoveLast(); + + std::string ToString() const; + + bool operator==(const ActionObservationHistory& other) const { + return CorrespondsTo(other); + } + + private: + bool CheckStateCorrespondenceInSimulation(Player pl, const State& state, + int until_time) const; +}; + +// Public-observation histories partition the game tree according to available +// public information into a corresponding public tree. Public observation +// history identifies the current public state (a node in the public tree), +// and is useful for integration with public state API -- you can construct +// a PublicState by using the public observation history. +// +// Some algorithms use this structured information for targeted traversal +// of the (im)perfect information tree. +class PublicObservationHistory { + private: + std::vector history_; + + public: + // Construct a history of public observations. + // This method can be called only if the game provides factored observations + // strings, mainly State::PublicObservationString() -- private observations + // are not used. + // + // Note that this constructor makes a traversal of the state's history + // to collect public observations and this can be expensive. + PublicObservationHistory(const State& target); + + // Constructs Public-Observation history "manually". + PublicObservationHistory(std::vector history); + + PublicObservationHistory(const PublicObservationHistory&) = default; + ~PublicObservationHistory() = default; + + const std::vector& History() const { return history_; } + + // Gets the current move number (time on the clock) - this allows to relate + // the "depth"of Public-Observation history to the "depth" of State, + // as it should correspond to the State::MoveNumber(). + int MoveNumber() const; + + // Returns the public observation at the given time. Root node has time 0. + const std::string& ObservationAt(int time) const; + + // Does the Public-Observation history correspond to the initial state? + bool CorrespondsToInitialState() const { return MoveNumber() == 0; } + + // Does the Public-Observation history correspond to the other + // Public-Observation history? This is just like an equality operator. + bool CorrespondsTo(const PublicObservationHistory& other) const; + + // Does the Public-Observation history correspond to the requested state? + // + // In other words, if we constructed Public-Observation history for the state, + // would that correspond to this Public-Observation history? + // As in the following: + // + // CorrespondsTo(state) == CorrespondsTo(PublicObservationHistory(state)) + // + // This method is provided so that you do not need to construct + // Public-Observation history explicitly and is more efficient. + // This is like an equality operator. + bool CorrespondsTo(const State& state) const; + + // Is the current Public-Observation history prefix (or equal) of the other? + bool IsPrefixOf(const PublicObservationHistory& other) const; + + // Is the current Public-Observation history prefix (or equal) of the + // Public-Observation history that we could construct from the State? + bool IsPrefixOf(const State& state) const; + + // Is the current Public-Observation history extension (or equal) + // of the other one? + bool IsExtensionOf(const PublicObservationHistory& other) const; + + // Is the current Public-Observation history extension (or equal) + // of the Public-Observation history that we could construct from the State? + bool IsExtensionOf(const State& state) const; + + std::string ToString() const; + + bool operator==(const PublicObservationHistory& other) const { + return CorrespondsTo(other); + } + + private: + std::shared_ptr observer_; + + void push_back(const std::string& observation); + bool CheckStateCorrespondenceInSimulation(const State& state, + int until_time) const; +}; + +std::ostream& operator<<(std::ostream& os, const ActionObservationHistory& aoh); +std::ostream& operator<<(std::ostream& os, const PublicObservationHistory& poh); + +} // namespace open_spiel + +#endif // OPEN_SPIEL_FOG_OBSERVATION_HISTORY_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/observation_history_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/observation_history_test.cc new file mode 100644 index 0000000..202d600 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/observation_history_test.cc @@ -0,0 +1,99 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/observation_history.h" + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { + +void CheckKuhnPokerObservationHistory() { + using AOH = ActionObservationHistory; + using POH = PublicObservationHistory; + // Use NONE constant to make it similar to the Python test. + constexpr absl::optional NONE = absl::nullopt; + + std::shared_ptr game = LoadGame("kuhn_poker"); + + std::unique_ptr s = game->NewInitialState(); + SPIEL_CHECK_TRUE(s->IsChanceNode()); + SPIEL_CHECK_EQ(POH(*s), POH({"start game"})); + SPIEL_CHECK_EQ(AOH(0, *s), AOH(0, {{NONE, ""}})); + SPIEL_CHECK_EQ(AOH(1, *s), AOH(1, {{NONE, ""}})); + + s->ApplyAction(2); + SPIEL_CHECK_TRUE(s->IsChanceNode()); + SPIEL_CHECK_EQ(POH(*s), POH({"start game", "Deal to player 0"})); + SPIEL_CHECK_EQ(AOH(0, *s), AOH(0, {{NONE, ""}, {NONE, "211"}})); + SPIEL_CHECK_EQ(AOH(1, *s), AOH(1, {{NONE, ""}, {NONE, ""}})); + + s->ApplyAction(1); + SPIEL_CHECK_TRUE(s->IsPlayerNode()); + SPIEL_CHECK_EQ(POH(*s), + POH({"start game", "Deal to player 0", "Deal to player 1"})); + SPIEL_CHECK_EQ(AOH(0, *s), + AOH(0, {{NONE, ""}, {NONE, "211"}, {NONE, "211"}})); + SPIEL_CHECK_EQ(AOH(1, *s), AOH(1, {{NONE, ""}, {NONE, ""}, {NONE, "111"}})); + + s->ApplyAction(0); + SPIEL_CHECK_TRUE(s->IsPlayerNode()); + SPIEL_CHECK_EQ(POH(*s), POH({"start game", "Deal to player 0", + "Deal to player 1", "Pass"})); + SPIEL_CHECK_EQ( + AOH(0, *s), + AOH(0, {{NONE, ""}, {NONE, "211"}, {NONE, "211"}, {0, "211"}})); + SPIEL_CHECK_EQ( + AOH(1, *s), + AOH(1, {{NONE, ""}, {NONE, ""}, {NONE, "111"}, {NONE, "111"}})); + + s->ApplyAction(1); + SPIEL_CHECK_TRUE(s->IsPlayerNode()); + SPIEL_CHECK_EQ(POH(*s), POH({"start game", "Deal to player 0", + "Deal to player 1", "Pass", "Bet"})); + SPIEL_CHECK_EQ(AOH(0, *s), AOH(0, {{NONE, ""}, + {NONE, "211"}, + {NONE, "211"}, + {0, "211"}, + {NONE, "212"}})); + SPIEL_CHECK_EQ( + AOH(1, *s), + AOH(1, + {{NONE, ""}, {NONE, ""}, {NONE, "111"}, {NONE, "111"}, {1, "112"}})); + + s->ApplyAction(1); + SPIEL_CHECK_TRUE(s->IsTerminal()); + SPIEL_CHECK_EQ(POH(*s), POH({"start game", "Deal to player 0", + "Deal to player 1", "Pass", "Bet", "Bet"})); + SPIEL_CHECK_EQ(AOH(0, *s), AOH(0, {{NONE, ""}, + {NONE, "211"}, + {NONE, "211"}, + {0, "211"}, + {NONE, "212"}, + {1, "222"}})); + SPIEL_CHECK_EQ(AOH(1, *s), AOH(1, {{NONE, ""}, + {NONE, ""}, + {NONE, "111"}, + {NONE, "111"}, + {1, "112"}, + {NONE, "122"}})); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::CheckKuhnPokerObservationHistory(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/oos.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/oos.cc new file mode 100644 index 0000000..3f226fd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/oos.cc @@ -0,0 +1,575 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/oos.h" + +#include +#include +#include + +#include "open_spiel/policy.h" + +namespace open_spiel { +namespace algorithms { + +// ----------------------------------------------------------------------------- +// Probability distributions +// ----------------------------------------------------------------------------- + +bool IsValidProbDistribution(const ActionsAndProbs& probs) { + double sum_probs = 0; + for (const auto& [action, prob] : probs) { + if (prob < 0) return false; + if (prob > 1) return false; + sum_probs += prob; + } + return abs(sum_probs - 1.0) < 1e-10; +} + +bool IsPositiveProbDistribution(const ActionsAndProbs& probs) { + double sum_probs = 0; + for (const auto& [action, prob] : probs) { + if (prob <= 0) return false; + if (prob > 1) return false; + sum_probs += prob; + } + return abs(sum_probs - 1.0) < 1e-10; +} + +// ----------------------------------------------------------------------------- +// Exploration policy +// ----------------------------------------------------------------------------- + +ActionsAndProbs ExplorativeSamplingPolicy::GetStatePolicy( + const State& state) const { + if (state.IsChanceNode()) { + return state.ChanceOutcomes(); + } else if (state.IsPlayerNode()) { + return GetStatePolicy(state.InformationStateString()); + } + SpielFatalError("Could not get policy for this state."); +} + +ActionsAndProbs ExplorativeSamplingPolicy::GetStatePolicy( + const std::string& info_state) const { + auto policy = CFRCurrentPolicy::GetStatePolicy(info_state); + const double unif = 1. / policy.size(); + for (auto& [_, prob] : policy) { + prob = exploration_ * unif + (1 - exploration_) * prob; + } + return policy; +} + +// ----------------------------------------------------------------------------- +// Targeted policy : public methods +// ----------------------------------------------------------------------------- + +void TargetedPolicy::NoTargeting() { + targeting_ = kDoNotUseTargeting; + target_public_state_ = kNoPublicObsTargetSpecified; + target_info_state_ = kNoActionObsTargetSpecified; +} + +void TargetedPolicy::UpdateTarget(const ActionObservationHistory* info_state) { + NoTargeting(); // Reset. + targeting_ = kInfoStateTargeting; + target_info_state_ = info_state; +} + +void TargetedPolicy::UpdateTarget( + const PublicObservationHistory* public_state) { + NoTargeting(); // Reset. + targeting_ = kPublicStateTargeting; + target_public_state_ = public_state; +} + +// Negative zeros denote the banned actions. Useful for debugging, +// as it is immediately obvious which actions have been banned. +// It is not currently used for any indication of state +// (but in principle could be). +constexpr double kBannedAction = -0.; + +ActionsAndProbs TargetedPolicy::GetStatePolicy(const State& h) const { + // Check if current state is part of the currently built tree. + ActionsAndProbs policy; + if (h.IsChanceNode()) { + policy = h.ChanceOutcomes(); + } else if (h.IsPlayerNode()) { + policy = CFRCurrentPolicy::GetStatePolicy(h); + } else { + SpielFatalError("Could not get policy for this state."); + } + + double biased_sum = 0.0; + for (auto& [action, prob] : policy) { + if (IsAllowedAction(h, action)) { + biased_sum += prob; + } else { + prob = kBannedAction; + } + } + + // Normalize the biased policy if some actions have been banned. + double bias_exploration = bias_exploration_; // Default exploration. + if (biased_sum > 0) { + for (auto& [_, prob] : policy) { + prob /= biased_sum; + } + } else { + // Do only uniform exploration when all actions are banned. + // This means the targeted policy has become "lost" in the game due + // to its imperfect information structure. Just because an action is locally + // allowed, it does not mean that we will always reach the target by + // following the (locally) allowed actions. + bias_exploration = 1.; + if (stats_) ++(stats_->missed_targets); + } + + // Mix in exploration. + const double unif = 1. / policy.size(); + for (auto& [_, prob] : policy) { + prob = bias_exploration * unif + (1 - bias_exploration) * prob; + } + return policy; +} + +bool TargetedPolicy::IsAllowedAction(const State& h, + const Action& action) const { + if (targeting_ == kDoNotUseTargeting) return true; + + const std::unique_ptr ha = h.Child(action); + + if (targeting_ == Targeting::kInfoStateTargeting) { + SPIEL_CHECK_NE(target_info_state_, kNoActionObsTargetSpecified); + return target_info_state_->IsExtensionOf(target_info_state_->GetPlayer(), + *ha); + } + + if (targeting_ == Targeting::kPublicStateTargeting) { + SPIEL_CHECK_NE(target_public_state_, kNoPublicObsTargetSpecified); + return target_public_state_->IsExtensionOf(*ha); + } + + SpielFatalError("Unknown targeting."); +} + +bool TargetedPolicy::IsTargetHit(const State& h) { + SPIEL_CHECK_TRUE(targeting_ != kInfoStateTargeting || + target_info_state_ != kNoActionObsTargetSpecified); + SPIEL_CHECK_TRUE(targeting_ != kPublicStateTargeting || + target_public_state_ != kNoPublicObsTargetSpecified); + const bool hit_info_state = + targeting_ == kInfoStateTargeting && + target_info_state_->CorrespondsTo(target_info_state_->GetPlayer(), h); + const bool hit_public_state = targeting_ == kPublicStateTargeting && + target_public_state_->CorrespondsTo(h); + return hit_info_state || hit_public_state; +} + +// ----------------------------------------------------------------------------- +// OOS stats +// ----------------------------------------------------------------------------- + +void OnlineStats::Reset() { + root_visits = 0; + state_visits = 0; + terminal_visits = 0; + rollouts = 0; + target_visits = 0; + target_biased_visits = 0; + biased_iterations = 0; + missed_targets = 0; +} + +std::string OnlineStats::ToString() const { + return absl::StrCat( + "Root visits: ", root_visits, "\n", + "State visits: ", state_visits, "\n", + "Terminal visits: ", terminal_visits, "\n", + "Rollouts (terminals): ", rollouts, "\n", + "Target visits: ", target_visits, "\n", + "Target biased visits: ", target_biased_visits, "\n", + "Biased iterations: ", biased_iterations, "\n", + "Missed targets: ", missed_targets, "\n"); +} + +void OnlineStats::CheckConsistency() const { + SPIEL_CHECK_EQ(root_visits, terminal_visits + rollouts); + SPIEL_CHECK_LE(root_visits, state_visits); + SPIEL_CHECK_LE(target_biased_visits, target_visits); + SPIEL_CHECK_GE(root_visits, 0); + SPIEL_CHECK_GE(state_visits, 0); + SPIEL_CHECK_GE(terminal_visits, 0); + SPIEL_CHECK_GE(rollouts, 0); + SPIEL_CHECK_GE(target_visits, 0); + SPIEL_CHECK_GE(target_biased_visits, 0); + SPIEL_CHECK_GE(biased_iterations, 0); + SPIEL_CHECK_GE(missed_targets, 0); +} + +std::ostream& operator<<(std::ostream& os, const OnlineStats& stats) { + return os << stats.ToString(); +} + +// ----------------------------------------------------------------------------- +// OOS algorithm : public methods. +// ----------------------------------------------------------------------------- + +OOSAlgorithm::OOSAlgorithm(std::shared_ptr game, + std::unique_ptr values, + std::unique_ptr random, + std::unique_ptr sample_policy, + std::unique_ptr bias_policy, + std::shared_ptr default_policy, + double target_biasing) + : game_(game), + values_(std::move(values)), + random_(std::move(random)), + sample_policy_(std::move(sample_policy)), + bias_policy_(std::move(bias_policy)), + default_policy_(std::move(default_policy)), + target_biasing_(target_biasing) { + SPIEL_CHECK_PROB(target_biasing_); + SPIEL_CHECK_EQ(game->GetType().dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_TRUE(game->GetType().provides_observation_string); + SPIEL_CHECK_TRUE(game->GetType().provides_information_state_string); + stats_.Reset(); + if (bias_policy_) bias_policy_->TrackStatistics(&stats_); +} + +OOSAlgorithm::OOSAlgorithm(std::shared_ptr game) + : OOSAlgorithm(std::move(game), std::make_unique(), + std::make_unique(/*seed=*/0), + /*sample_policy=*/nullptr, + /*bias_policy=*/nullptr, + /*default_policy=*/std::make_shared(), + kDefaultBiasing) { + // Make sure the policies receive references to the values table. + sample_policy_ = std::make_unique(*values_); + bias_policy_ = std::make_unique(game_, *values_); + bias_policy_->TrackStatistics(&stats_); +} + +void OOSAlgorithm::RunUnbiasedIterations(int iterations) { + bias_policy_->NoTargeting(); + + for (int t = 0; t < iterations; ++t) { + for (Player exploringPl = 0; exploringPl < 2; ++exploringPl) { + std::unique_ptr h = game_->NewInitialState(); + is_biased_iteration_ = false; + is_below_target_ = true; + + RootIteration(h.get(), Player(exploringPl)); + } + } +} + +void OOSAlgorithm::RunTargetedIterations( + const ActionObservationHistory& target_info_state, int iterations) { + if (target_info_state.CorrespondsToInitialState()) + return RunUnbiasedIterations(iterations); + + bias_policy_->UpdateTarget(&target_info_state); + RunTargetedIterations(iterations); +} + +void OOSAlgorithm::RunTargetedIterations( + const PublicObservationHistory& target_public_state, int iterations) { + if (target_public_state.CorrespondsToInitialState()) + return RunUnbiasedIterations(iterations); + + bias_policy_->UpdateTarget(&target_public_state); + RunTargetedIterations(iterations); +} + +// ----------------------------------------------------------------------------- +// OOS algorithm : internal methods. +// ----------------------------------------------------------------------------- + +void OOSAlgorithm::RunTargetedIterations(int iterations) { + if (target_biasing_ == 0.) { + return RunUnbiasedIterations(iterations); + } + + for (int t = 0; t < iterations; ++t) { + for (Player exploringPl = 0; exploringPl < 2; ++exploringPl) { + std::unique_ptr h = game_->NewInitialState(); + is_biased_iteration_ = random_->RandomUniform() <= target_biasing_; + // We always have a target, which cannot be a root node + // (this was handled by publicly facing methods) + is_below_target_ = false; + + if (is_biased_iteration_) stats_.biased_iterations++; + RootIteration(h.get(), Player(exploringPl)); + } + } +} + +void OOSAlgorithm::RootIteration(State* h, Player exploringPl) { + ++stats_.root_visits; + + // Make sure we don't use mutable vars where we shouldn't. + // We have bunch of not-nan tests all over the code to catch any bugs. + u_z_ = std::numeric_limits::quiet_NaN(); + s_z_all_ = std::numeric_limits::quiet_NaN(); + + Iteration(h, + /*rm_h_pl=*/1.0, /*rm_h_opp=*/1.0, + /*bs_h_all=*/1.0, /*us_h_all=*/1.0, + /*us_h_cn=*/1.0, exploringPl); +} + +double OOSAlgorithm::Iteration(State* h, double rm_h_pl, double rm_h_opp, + double bs_h_all, double us_h_all, double us_h_cn, + Player exploringPl) { + // Have we hit the target? And update some statistics. + ++stats_.state_visits; + + if (bias_policy_->IsTargetHit(*h)) { + is_below_target_ = true; + + ++stats_.target_visits; + if (is_biased_iteration_) ++stats_.target_biased_visits; + } + + // Dispatch to appropriate methods. + if (h->IsTerminal()) { + ++stats_.terminal_visits; + return IterationTerminalNode(h, bs_h_all, us_h_all, exploringPl); + } + + if (h->IsChanceNode()) { + return IterationChanceNode(h, rm_h_pl, rm_h_opp, bs_h_all, us_h_all, + us_h_cn, exploringPl); + } + + if (h->IsPlayerNode()) { + return IterationPlayerNode(h, rm_h_pl, rm_h_opp, bs_h_all, us_h_all, + us_h_cn, exploringPl); + } + + SpielFatalError("Unrecognized state type."); +} + +double OOSAlgorithm::IterationTerminalNode(State* h, double bs_h_all, + double us_h_all, + Player exploringPl) { + SPIEL_DCHECK_TRUE(h->IsTerminal()); + s_z_all_ = Bias(bs_h_all, us_h_all); + u_z_ = h->PlayerReturn(exploringPl); + return u_z_; +} + +double OOSAlgorithm::IterationChanceNode(State* h, double rm_h_pl, + double rm_h_opp, double bs_h_all, + double us_h_all, double us_h_cn, + Player exploringPl) { + SPIEL_DCHECK_TRUE(h->IsChanceNode()); + + const TakeAction take = SelectAction(h, IsBiasingApplicable(bs_h_all)); + const double s_ha_all = Bias(take.bs, take.us); + SPIEL_DCHECK_GT(s_ha_all, 0); + + h->ApplyAction(take.action); + const double u_ha = + Iteration(h, rm_h_pl, rm_h_opp, bs_h_all * take.bs, us_h_all * take.us, + us_h_cn * take.us, exploringPl); + + // Compute estimate of the expected utility. + double u_h = u_ha * take.us / s_ha_all; + SPIEL_DCHECK_FALSE(std::isnan(u_h)); + SPIEL_DCHECK_FALSE(std::isinf(u_h)); + return u_h; +} + +double OOSAlgorithm::IterationPlayerNode(State* h, double rm_h_pl, + double rm_h_opp, double bs_h_all, + double us_h_all, double us_h_cn, + Player exploringPl) { + SPIEL_DCHECK_TRUE(h->IsPlayerNode()); + + bool exploring_move_in_node = h->CurrentPlayer() == exploringPl; + const std::string info_state = h->InformationStateString(); + + const double s_h_all = Bias(bs_h_all, us_h_all); + SPIEL_DCHECK_GT(s_h_all, 0); + const auto it = values_->find(info_state); + bool is_leaf_state = it == values_->end(); + + // Note: we cannot use h / aoh after this code executes, + // as it will be set to leaf values. + const PlayerNodeOutcome outcome = + is_leaf_state + ? IncrementallyBuildTree(h, info_state, s_h_all, exploringPl) + : SampleExistingTree(h, info_state, &it->second, rm_h_pl, rm_h_opp, + bs_h_all, us_h_all, us_h_cn, exploringPl); + + SPIEL_DCHECK_TRUE(h->IsTerminal()); + SPIEL_DCHECK_FALSE(std::isnan(u_z_)); + SPIEL_DCHECK_FALSE(std::isnan(outcome.u_h)); + SPIEL_DCHECK_FALSE(std::isinf(outcome.u_h)); + + // Note: the only probability that's missing here is rm_h_pl + // for it to be full reach probability weighted by full sampling probability. + double importance_sampling_ratio = rm_h_opp * us_h_cn / s_h_all; + + if (exploring_move_in_node) { + UpdateInfoStateCumulativeRegrets(&outcome.data, outcome.action, + outcome.u_ha, outcome.u_h, + importance_sampling_ratio); + } else { + UpdateInfoStateCumulativePolicy(&outcome.data, importance_sampling_ratio); + } + + return outcome.u_h; +} + +PlayerNodeOutcome OOSAlgorithm::SampleExistingTree( + State* h, const std::string& info_state, CFRInfoStateValues* values, + double rm_h_pl, double rm_h_opp, double bs_h_all, double us_h_all, + double us_h_cn, Player exploringPl) { + SPIEL_DCHECK_TRUE(h->IsPlayerNode()); + SPIEL_DCHECK_FALSE(std::isnan(rm_h_pl)); + SPIEL_DCHECK_FALSE(std::isnan(rm_h_opp)); + SPIEL_DCHECK_FALSE(std::isnan(bs_h_all)); + SPIEL_DCHECK_FALSE(std::isnan(us_h_all)); + SPIEL_DCHECK_FALSE(std::isnan(us_h_cn)); + + const bool exploring_move_in_node = h->CurrentPlayer() == exploringPl; + const TakeAction take = SelectAction(h, IsBiasingApplicable(bs_h_all)); + + const int action_index = values->GetActionIndex(take.action); + const double rm_ha_both = values->current_policy[action_index]; + const double s_ha_all = Bias(take.bs, take.us); + SPIEL_DCHECK_GT(s_ha_all, 0); + + h->ApplyAction(take.action); + + const double u_ha = + Iteration(h, (exploring_move_in_node) ? rm_h_pl * rm_ha_both : rm_h_pl, + (exploring_move_in_node) ? rm_h_opp : rm_h_opp * rm_ha_both, + bs_h_all * take.bs, us_h_all * take.us, us_h_cn, exploringPl); + + double u_h = u_ha * rm_ha_both / s_ha_all; + SPIEL_DCHECK_FALSE(std::isnan(rm_ha_both)); + SPIEL_DCHECK_FALSE(std::isnan(u_h)); + return PlayerNodeOutcome{take.action, rm_ha_both, u_h, u_ha / s_ha_all, + *values}; +} + +PlayerNodeOutcome OOSAlgorithm::IncrementallyBuildTree( + State* h, const std::string& info_state, double s_h_all, + Player exploringPl) { + SPIEL_DCHECK_FALSE(std::isnan(s_h_all)); + ++stats_.rollouts; + + // The current history is a leaf within the currently built look-ahead tree. + // By adding info state values, we make sure that next sampling from here + // will be into the existing tree. + const std::vector actions = h->LegalActions(); + const auto [it, state_inserted] = + values_->emplace(info_state, CFRInfoStateValues(actions)); + // If it was already in the values, we shouldn't be building the tree. + SPIEL_DCHECK_TRUE(state_inserted); + + const double rm_ha_both = 1.0 / actions.size(); + double reach_prob = 1.0; + Action first_action = kInvalidAction; + SPIEL_DCHECK_TRUE(h->IsPlayerNode()); + while (!h->IsTerminal()) { + ActionsAndProbs policy; + if (h->IsChanceNode()) { + policy = h->ChanceOutcomes(); + } else if (h->IsPlayerNode()) { + policy = UniformStatePolicy(*h); + } else { + SpielFatalError("Invalid state"); + } + + const auto [action, prob] = SampleAction(policy, random_->RandomUniform()); + + if (first_action == kInvalidAction) { + first_action = action; + } + reach_prob *= prob; + h->ApplyAction(action); + } + SPIEL_DCHECK_NE(first_action, kInvalidAction); + + u_z_ = h->PlayerReturn(exploringPl); + s_z_all_ = s_h_all * reach_prob; + + // The expected values for u(h) must be unbiased so MCCFR can work correctly. + // Normally we use importance sampling, but since the strategy and sampling + // policy are the same, they cancel each other out. Leaving just leaf value + // for the current estimate. + const double u_h = u_z_; + const double u_ha = u_z_; + + return PlayerNodeOutcome{first_action, rm_ha_both, u_h, u_ha, it->second}; +} + +bool OOSAlgorithm::IsBiasingApplicable(double bs_h_all) { + return is_biased_iteration_ && !is_below_target_ && bs_h_all > 0.0; +} + +TakeAction OOSAlgorithm::SelectAction(State* h, bool do_biased_sample) { + const ActionsAndProbs& sample_probs = sample_policy_->GetStatePolicy(*h); + const ActionsAndProbs& biased_probs = bias_policy_->GetStatePolicy(*h); + + // Check what comes out of policies are proper distributions. + SPIEL_DCHECK_TRUE(IsValidProbDistribution(biased_probs)); + // All leaves must be reachable under sample policy! + SPIEL_DCHECK_TRUE(IsPositiveProbDistribution(sample_probs)); + + // When we do biased sampling, we completely ignore + // the sample policy for choosing any actions. + const ActionsAndProbs& followProbs = + do_biased_sample ? biased_probs : sample_probs; + + auto [action, prob] = SampleAction(followProbs, random_->RandomUniform()); + return TakeAction{action, GetProb(sample_probs, action), + GetProb(biased_probs, action)}; +} + +void OOSAlgorithm::UpdateInfoStateCumulativePolicy( + CFRInfoStateValues* values, double importance_sampling_ratio) { + // We use stochastically weighted averaging. + for (int i = 0; i < values->cumulative_policy.size(); i++) { + SPIEL_DCHECK_GE(values->cumulative_policy[i], 0); + values->cumulative_policy[i] += + importance_sampling_ratio * values->current_policy[i]; + } +} + +void OOSAlgorithm::UpdateInfoStateCumulativeRegrets( + CFRInfoStateValues* values, Action a, double u_ha, double u_h, + double importance_sampling_ratio) { + SPIEL_DCHECK_FALSE(std::isnan(u_ha)); + SPIEL_DCHECK_FALSE(std::isnan(u_h)); + SPIEL_DCHECK_FALSE(std::isnan(importance_sampling_ratio)); + auto& regs = values->cumulative_regrets; + const int action_index = values->GetActionIndex(a); + for (int i = 0; i < regs.size(); i++) { + if (i == action_index) { + regs[i] += (u_ha - u_h) * importance_sampling_ratio; + } else { + regs[i] += (-u_h) * importance_sampling_ratio; + } + } + values->ApplyRegretMatching(); +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/oos.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/oos.h new file mode 100644 index 0000000..6fa7cb3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/oos.h @@ -0,0 +1,372 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef OPEN_SPIEL_ALGORITHMS_OOS_H_ +#define OPEN_SPIEL_ALGORITHMS_OOS_H_ + +#include +#include +#include +#include + +#include "open_spiel/algorithms/cfr.h" +#include "open_spiel/algorithms/observation_history.h" +#include "open_spiel/spiel.h" +#include "open_spiel/utils/random.h" + +namespace open_spiel { +namespace algorithms { + + +// Online Outcome Sampling (OOS) algorithm +// +// Online algorithm for solving 2-player imperfect-information 0-sum games. +// +// Based on the original implementation of paper +// +// [1] Online monte carlo counterfactual regret minimization for search +// in imperfect information games, Lisý, Lanctot and Bowling +// +// The original implementation can be found at +// https://github.com/aicenter/gtlibrary-java/tree/master/src/cz/agents/gtlibrary/algorithms/mcts/oos +// +// # Description of the algorithm: +// +// OOS is a modification of Outcome Sampling Monte Carlo CFR for online setting. +// The player receives its current play position (infostate) and it has some +// time (or iteration) budget to improve his strategy. On a high level, +// OOS changes MCCFR sampling scheme to bias it towards the current info state. +// +// Additionally, it incrementally builds the search tree by doing rollouts +// whenever leafs of the lookahead search tree are hit, and it expands this +// search tree. +// +// If the algorithm is unable to retrieve strategy for the current infostate, +// i.e. it "gets lost" in the game, it continues to play uniformly randomly. +// This can happen because targeting sampling of infostate / public state +// by using Action-Observation or Public observation histories respectively +// is not always successful, and therefore no strategy may be computed at the +// requested target. +// +// When this algorithm is instantiated with target_biasing = 0, it becomes +// Outcome Sampling MCCFR with incremental tree building. If you also prebuild +// the tree you get the MCCFR algorithm. +// +// The implementation supports both information state and public state +// targeting. +// +// It is possible to provide custom sampling schemes that are implemented +// on the level of infostate strategies. +// +// There is a small difference to the original implementation: +// It used a "target compensation", a weighting factor according +// to equation (3) in [1]. This compensation is not implemented. According +// to conversation with the original author it did not influence the results +// significantly, and it makes the implementation unnecessarily cluttered. +// +// Internally, the algorithm uses a large number of various variables, so there +// are some important conventions in variable naming, in the format of: A_B_C +// +// # A corresponds to strategy: +// +// rm regret matching (or also called current) strategy +// avg average strategy +// unif uniform strategy +// bs biased sampling strategy +// us unbiased sampling strategy +// s sampling strategy (combination of biased and unbiased strategy) +// +// # B corresponds to a specific history or trajectory +// +// h current history +// z terminal history (of the game) +// zh from current history to the terminal, i.e. z|h +// zha from current history and playing action a with 100% prob +// to the terminal, i.e. z|h.a +// ha play action a at the current history, i.e. resulting +// to child history h.a +// +// # C corresponds to player +// +// pl current player +// opp opponent player (without chance) +// cn chance player +// both current and opponent player (without chance) +// all all the players (including chance) +// +// # Examples: +// +// s_z_all: is the probability of sampling terminal history +// rm_h_pl: reach probability of the searching player to the current history +// using RM strategy + +enum Targeting { + kDoNotUseTargeting, + + // Target samples to the current information state. + // More precisely, target the current Action-Observation history (AOH), + // which coincides with the notion of information states on the player + // states. + kInfoStateTargeting, + + // Target samples to the current public state. + // More precisely, target the current Public-Observation history (POH). + kPublicStateTargeting, +}; + +constexpr double kDefaultBiasing = 0.6; +constexpr double kDefaultExploration = 0.5; +using ProbDistribution = std::vector; + +// A type for holding a table of CFR values indexed by InformationStateString. +using OOSInfoStateValuesTable = CFRInfoStateValuesTable; + +// Maintain runtime statistics. +struct OnlineStats { + int root_visits; + int state_visits; + int terminal_visits; + int rollouts; + int target_visits; + int target_biased_visits; + int biased_iterations; + int missed_targets; + + void Reset(); + std::string ToString() const; + // There is a number of invariants that should hold for OOS statistics. + // Useful for testing / debugging purposes. + void CheckConsistency() const; +}; + +std::ostream& operator<<(std::ostream& os, const OnlineStats& stats); + +// Epsilon-on-policy exploration sampling. +// +// The sampling distribution is an epsilon convex combination +// of the current policy (from Regret Matching) and uniform strategy. +class ExplorativeSamplingPolicy : public CFRCurrentPolicy { + public: + const double exploration_; // AKA epsilon + + ExplorativeSamplingPolicy(const OOSInfoStateValuesTable& table, + double exploration = kDefaultExploration) + : CFRCurrentPolicy(table, std::make_shared()), + exploration_(exploration) { + // We need the exploration to be positive to guarantee all leaves are + // reachable. + SPIEL_CHECK_GT(exploration_, 0); + SPIEL_CHECK_LE(exploration_, 1); + } + + ActionsAndProbs GetStatePolicy(const State& state) const override; + ActionsAndProbs GetStatePolicy(const std::string& info_state) const override; +}; + +// No biasing target is specified -- do not target any special infostate. +constexpr ActionObservationHistory* kNoActionObsTargetSpecified = nullptr; +constexpr PublicObservationHistory* kNoPublicObsTargetSpecified = nullptr; + +// Biased sampling policy. +// +// The policy will return a convex combination of bias_exploration between +// regret matching strategy and uniform strategy on actions to do lead +// to the target. If an action does not lead to the target, the value of +// kBannedAction (negative zero) is used. +// +// The targeting is done on top of cached entries from the main algorithm, +// i.e. the OOSValues table is shared between the algorithm and this sampling +// policy. +class TargetedPolicy : public CFRCurrentPolicy { + protected: + const std::shared_ptr game_; + const double bias_exploration_; // AKA epsilon + + Targeting targeting_ = kDoNotUseTargeting; + // Current target for which we should be doing the biasing. + const ActionObservationHistory* target_info_state_ = + kNoActionObsTargetSpecified; + const PublicObservationHistory* target_public_state_ = + kNoPublicObsTargetSpecified; + + // Externally keep track of how many targets have been missed. + OnlineStats* stats_; + + public: + TargetedPolicy(std::shared_ptr game, + const OOSInfoStateValuesTable& table, + double bias_exploration = kDefaultExploration) + : CFRCurrentPolicy(table, std::make_shared()), + game_(game), + bias_exploration_(bias_exploration) {} + + void NoTargeting(); + void UpdateTarget(const ActionObservationHistory* info_state); + void UpdateTarget(const PublicObservationHistory* public_state); + bool IsTargetHit(const State& h); + + ActionsAndProbs GetStatePolicy(const State& h) const; + void TrackStatistics(OnlineStats* stats) { stats_ = stats; } + + private: + bool IsAllowedAction(const State& h, const Action& action) const; +}; + +struct PlayerNodeOutcome { + // Action to take. + Action action; + // Probability of taking this action (according to RM). + double rm_ha_all; + // Estimate of expected utility for current history + double u_h; + // Estimate of expected utility for the child of current history + // if we followed action 'a' with probability 1. + double u_ha; + // Reference to the info state values at current history h. + // This can be a new entry in the table, when we are incrementally + // building the game tree. + CFRInfoStateValues& data; +}; + +struct TakeAction { + // Action to take. + Action action; + // Probability of unbiased sampling to take this action. + // Equivalent to us_ha_all. + double us; + // Probability of biased sampling to take this action. + // Equivalent to bs_ha_all. + double bs; +}; + +class OOSAlgorithm { + public: + OOSAlgorithm(const std::shared_ptr game, + std::unique_ptr values, + std::unique_ptr random, + std::unique_ptr sample_policy, + std::unique_ptr bias_policy, + std::shared_ptr default_policy, double target_biasing); + + // Use default settings. + explicit OOSAlgorithm(std::shared_ptr game); + + // Run iterations from the root, without targeting any particular state. + void RunUnbiasedIterations(int iterations); + + // Run iterations that should be targeted to requested information state. + void RunTargetedIterations(const ActionObservationHistory& target_info_state, + int iterations); + + // Run iterations that should be targeted to requested public state. + void RunTargetedIterations( + const PublicObservationHistory& target_public_state, int iterations); + + // Returns an object capable of computing the average policy + // for all players. The returned policy instance should only be used during + // the lifetime of the OOSAlgorithm object. + std::unique_ptr AveragePolicy() const { + return std::make_unique(*values_, default_policy_); + } + + // Returns an object capable of computing the current policy + // for all players. The returned policy instance should only be used during + // the lifetime of the OOSAlgorithm object. + std::unique_ptr CurrentPolicy() const { + return std::make_unique(*values_, default_policy_); + } + + const OnlineStats& GetStats() { return stats_; } + + protected: + void RunTargetedIterations(int iterations); + void RootIteration(State* h, Player exploringPl); + + // Run iteration from particular history. + // This is a dispatcher to appropriate function based on state type. + // Returns expected utility of current state for the exploring player. + double Iteration(State* h, double rm_h_pl, double rm_h_opp, double bs_h_all, + double us_h_all, double us_h_cn, Player exploringPl); + + double IterationTerminalNode(State* h, double bs_h_all, double us_h_all, + Player exploringPl); + + double IterationChanceNode(State* h, double rm_h_pl, double rm_h_opp, + double bs_h_all, double us_h_all, double us_h_cn, + Player exploringPl); + + double IterationPlayerNode(State* h, double rm_h_pl, double rm_h_opp, + double bs_h_all, double us_h_all, double us_h_cn, + Player exploringPl); + + // Simulate an outcome starting from specified history. + PlayerNodeOutcome IncrementallyBuildTree(State* h, + const std::string& info_state, + double s_h_all, Player exploringPl); + + PlayerNodeOutcome SampleExistingTree(State* h, const std::string& info_state, + CFRInfoStateValues* values, + double rm_h_pl, double rm_h_opp, + double bs_h_all, double us_h_all, + double us_h_cn, Player exploringPl); + + TakeAction SelectAction(State* h, bool do_biased_sample); + + bool IsBiasingApplicable(double bs_h_all); + + void UpdateInfoStateCumulativeRegrets(CFRInfoStateValues* values, Action a, + double u_ha, double u_h, + double importance_sampling_ratio); + + void UpdateInfoStateCumulativePolicy(CFRInfoStateValues* values, + double importance_sampling_ratio); + + inline double Bias(double biased, double non_biased) const { + return target_biasing_ * biased + (1 - target_biasing_) * non_biased; + } + + const std::shared_ptr game_; + std::unique_ptr values_; + std::unique_ptr random_; + std::unique_ptr sample_policy_; + std::unique_ptr bias_policy_; + std::shared_ptr default_policy_; + + // Probability of doing a biased sample. Also called \delta in OOS paper. + const double target_biasing_; + + // Should current iteration make a biased sample? + // (with probability of target_biasing) + bool is_biased_iteration_ = false; + + // Are we deeper in the tree, "below" the target? + // If yes, we do not need to bias samples anymore, + // because any sampling strategy is fine. + bool is_below_target_ = false; + + // Probability of sampling a terminal history. + double s_z_all_ = -1; + // Current leaf value. + double u_z_ = 0.0; + + // Maintain some stats for debugging purposes. When needed, you can call + // Reset() to start counting from the start again. + OnlineStats stats_; +}; + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_OOS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/oos_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/oos_test.cc new file mode 100644 index 0000000..dcff562 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/oos_test.cc @@ -0,0 +1,191 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/oos.h" + +#include +#include +#include + +#include "open_spiel/algorithms/tabular_exploitability.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// TODO(author13): merge with functional.h +// A helper to create a zipped vector from two vectors. +// The resulting vector has the size of xs, possibly omitting any longer ys. +template +std::vector> Zip(const std::vector& xs, + const std::vector& ys) { + SPIEL_CHECK_LE(xs.size(), ys.size()); + std::vector> zipped; + zipped.reserve(xs.size()); + for (int i = 0; i < xs.size(); ++i) { + zipped.emplace_back(std::make_pair(xs[i], ys[i])); + } + return zipped; +} + +constexpr auto ZipActionsProbs = Zip; + +namespace open_spiel { +namespace algorithms { +namespace { + +constexpr double kFloatTolerance = 1e-10; + +void EpsExploreSamplingPolicyTest() { + std::shared_ptr game = LoadGame("kuhn_poker"); + + std::unique_ptr card_to_player0 = game->NewInitialState(); + SPIEL_CHECK_EQ(card_to_player0->CurrentPlayer(), kChancePlayerId); + std::unique_ptr card_to_player1 = card_to_player0->Child(0); + SPIEL_CHECK_EQ(card_to_player1->CurrentPlayer(), kChancePlayerId); + std::unique_ptr player0_plays = card_to_player1->Child(0); + SPIEL_CHECK_EQ(player0_plays->CurrentPlayer(), 0); + std::unique_ptr player1_plays = player0_plays->Child(0); + SPIEL_CHECK_EQ(player1_plays->CurrentPlayer(), 1); + + auto chn_3cards_dist = ZipActionsProbs(card_to_player0->LegalActions(), + {1 / 3., 1 / 3., 1 / 3.}); + auto chn_2cards_dist = + ZipActionsProbs(card_to_player1->LegalActions(), {1 / 2., 1 / 2.}); + auto player0_dist = + ZipActionsProbs(player0_plays->LegalActions(), {1 / 2., 1 / 2.}); + auto player1_dist = + ZipActionsProbs(player1_plays->LegalActions(), {1 / 2., 1 / 2.}); + + std::vector current_policy = {0.2, 0.8}; + auto expected_mix = ZipActionsProbs(player0_plays->LegalActions(), + { + 0.4 * 0.5 + 0.6 * current_policy[0], + 0.4 * 0.5 + 0.6 * current_policy[1], + }); + + OOSInfoStateValuesTable table; + auto pl0_info_state = player0_plays->InformationStateString(); + auto pl1_info_state = player1_plays->InformationStateString(); + table[pl0_info_state] = CFRInfoStateValues(player0_plays->LegalActions()); + table[pl1_info_state] = CFRInfoStateValues(player1_plays->LegalActions()); + table[pl0_info_state].current_policy = current_policy; + table[pl1_info_state].current_policy = current_policy; + + auto p = ExplorativeSamplingPolicy(table, 0.4); + SPIEL_CHECK_TRUE(StatePoliciesEqual(p.GetStatePolicy(*card_to_player0), + chn_3cards_dist, kFloatTolerance)); + SPIEL_CHECK_TRUE(StatePoliciesEqual(p.GetStatePolicy(*card_to_player1), + chn_2cards_dist, kFloatTolerance)); + SPIEL_CHECK_TRUE(StatePoliciesEqual(p.GetStatePolicy(*player0_plays), + expected_mix, kFloatTolerance)); + SPIEL_CHECK_TRUE(StatePoliciesEqual(p.GetStatePolicy(*player1_plays), + expected_mix, kFloatTolerance)); +} + +std::vector> CollectStatesInGame( + std::shared_ptr game) { + std::vector> state_collection; + + std::function walk = [&](State* s) { + for (auto action : s->LegalActions()) { + auto child = s->Child(action); + walk(child.get()); + state_collection.push_back(std::move(child)); + } + }; + + auto root_state = game->NewInitialState(); + walk(root_state.get()); + state_collection.push_back(std::move(root_state)); + return state_collection; +} + +void UnbiasedIterationsConverge(std::shared_ptr game, + int iterations, double expl_bound) { + auto alg = OOSAlgorithm(game); + auto policy = alg.AveragePolicy(); + + std::cout << "Running " << iterations << " unbiased iters.\n"; + alg.RunUnbiasedIterations(iterations); + double actual_expl = Exploitability(*game, *policy); + + std::cout << alg.GetStats(); + std::cout << "Exploitability: " << actual_expl << "\n"; + std::cout << "----" << std::endl; + SPIEL_CHECK_LT(actual_expl, expl_bound); + alg.GetStats().CheckConsistency(); +} + +void BiasedIterationsConverge(std::shared_ptr game, int iterations, + double expl_bound, int max_test_states = 100) { + // Check that we can target any state in the game. + std::vector> states = CollectStatesInGame(game); + for (int i = 0; i < std::fmin(states.size(), max_test_states); i++) { + // Action-Observation history targeting: + for (int player = 0; player < game->NumPlayers(); player++) { + auto alg = OOSAlgorithm(game); + auto policy = alg.AveragePolicy(); + + ActionObservationHistory aoh(player, *states[i]); + std::cout << "Targeting " << aoh << " with " << iterations << " iters.\n"; + alg.RunTargetedIterations(aoh, iterations); + double actual_expl = Exploitability(*game, *policy); + + std::cout << alg.GetStats(); + std::cout << "Exploitability: " << actual_expl << "\n"; + std::cout << "----" << std::endl; + SPIEL_CHECK_LT(actual_expl, expl_bound); + alg.GetStats().CheckConsistency(); + } + + // Public-Observation history targeting: + { + auto alg = OOSAlgorithm(game); + auto policy = alg.AveragePolicy(); + + PublicObservationHistory poh(*states[i]); + std::cout << "Targeting " << poh << " with " << iterations << " iters.\n"; + alg.RunTargetedIterations(poh, iterations); + double actual_expl = Exploitability(*game, *policy); + + std::cout << alg.GetStats(); + std::cout << "Exploitability: " << actual_expl << "\n"; + std::cout << "----" << std::endl; + SPIEL_CHECK_LT(actual_expl, expl_bound); + alg.GetStats().CheckConsistency(); + } + } +} + +void UnbiasedIterationsConvergeInGames() { + UnbiasedIterationsConverge(LoadGame("coordinated_mp"), 10000, 0.05); + UnbiasedIterationsConverge(LoadGame("kuhn_poker"), 10000, 0.05); +} + +void BiasedIterationsConvergeInGames() { + // Run only for a small number of iterations, as this runs for *every* state + // in the game. + BiasedIterationsConverge(LoadGame("coordinated_mp"), 1000, 0.25); + BiasedIterationsConverge(LoadGame("kuhn_poker"), 1000, 0.25); +} + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::algorithms::EpsExploreSamplingPolicyTest(); + open_spiel::algorithms::UnbiasedIterationsConvergeInGames(); + open_spiel::algorithms::BiasedIterationsConvergeInGames(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/CMakeLists.txt new file mode 100644 index 0000000..a1472bd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/CMakeLists.txt @@ -0,0 +1,32 @@ +# This is an optional dependency. Currently it is only tested with the binary +# release of OR-Tools v8 on Linux x86_64 (Debian 10). +# +# You need to set OPEN_SPIEL_BUILD_WITH_ORTOOLS to ON to include C++ Linear Programming. +# See the main ../../CMakeLists for more instructions for setup, and +# https://github.com/deepmind/open_spiel/issues/398 for details and discussion +# of C++ optimization in OpenSpiel. +# +# Note: OR-Tools is no longer tested on Github Actions CI, so it may no longer +# work with the latest version of Abseil. Note that for this code to work with +# a binary release of OR-Tools, the version of Abseil downloaded and compiled +# with OpenSpiel (see install.sh) must match the version of the one used in the +# OR-Tools release, otherwise there will be missing or duplicate symbols. + +add_library(open_spiel_ortools OBJECT + lp_solver.cc + lp_solver.h + sequence_form_lp.cc + sequence_form_lp.h +) +target_link_libraries(open_spiel_ortools ${ORTOOLS_LIBS}) + +add_executable(lp_solver_test lp_solver_test.cc ${OPEN_SPIEL_OBJECTS} + $ $) +target_link_libraries(lp_solver_test ${ORTOOLS_LIBS}) +add_test(lp_solver_test lp_solver_test) + +add_executable(sequence_form_lp_test sequence_form_lp_test.cc + ${OPEN_SPIEL_OBJECTS} + $ $) +target_link_libraries(sequence_form_lp_test ${ORTOOLS_LIBS}) +add_test(sequence_form_lp_test sequence_form_lp_test) diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/lp_solver.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/lp_solver.cc new file mode 100644 index 0000000..bb461ee --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/lp_solver.cc @@ -0,0 +1,240 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/ortools/lp_solver.h" + +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/algorithms/corr_dist.h" +#include "open_spiel/spiel.h" +#include "ortools/linear_solver/linear_solver.h" + +namespace open_spiel { +namespace algorithms { +namespace ortools { + +namespace opres = operations_research; + +std::pair SetupVariablesAndObjective( + opres::MPSolver* solver, std::vector* variables, + int num_strategy_variables, double min_utility, double max_utility) { + // Value and strategy probability variables + opres::MPVariable* v = solver->MakeNumVar(min_utility, max_utility, "v"); + variables->reserve(num_strategy_variables); + for (int i = 0; i < num_strategy_variables; ++i) { + variables->push_back(solver->MakeNumVar(0.0, 1.0, absl::StrCat("var ", i))); + } + + // Strategy probs sum to one + opres::MPConstraint* const sum_to_one = + solver->MakeRowConstraint(1.0, 1.0, "sum_to_one"); + for (int i = 0; i < num_strategy_variables; ++i) { + sum_to_one->SetCoefficient((*variables)[i], 1.0); + } + + opres::MPObjective* objective = solver->MutableObjective(); + objective->SetCoefficient(v, 1.0); + objective->SetMaximization(); + + return {v, objective}; +} + +ZeroSumGameSolution SolveZeroSumMatrixGame( + const matrix_game::MatrixGame& matrix_game) { + SPIEL_CHECK_EQ(matrix_game.GetType().information, + GameType::Information::kOneShot); + SPIEL_CHECK_EQ(matrix_game.GetType().utility, GameType::Utility::kZeroSum); + int num_rows = matrix_game.NumRows(); + int num_cols = matrix_game.NumCols(); + double min_utility = matrix_game.MinUtility(); + double max_utility = matrix_game.MaxUtility(); + + // Solving a game for player i (e.g. row player) requires finding a mixed + // policy over player i's pure strategies (actions) such that a value of the + // mixed strategy against every opponent pure strategy is maximized. + // + // For more detail, please refer to Sec 4.1 of Shoham & Leyton-Brown, 2009: + // Multiagent Systems: Algorithmic, Game-Theoretic, and Logical Foundations + // http://www.masfoundations.org/mas.pdf + // + // For the row player the LP looks like: + // max V + // st. sigma_a1 \dot col_0 >= V + // sigma_a2 \dot col_1 >= V + // . + // . + // sigma_am \cot col_n >= V + // for all i, sigma_ai >= 0 + // sigma \dot 1 = 1 + + ZeroSumGameSolution solution{ + {0, 0}, + {std::vector(num_rows, 0), std::vector(num_cols, 0)}}; + + // First, the row player (player 0). + opres::MPSolver p0_solver("solver", opres::MPSolver::GLOP_LINEAR_PROGRAMMING); + std::vector p0_vars; + auto [p0_v, p0_objective] = SetupVariablesAndObjective( + &p0_solver, &p0_vars, num_rows, min_utility, max_utility); + + // Utility constriants + for (int c = 0; c < num_cols; ++c) { + opres::MPConstraint* const constraint = p0_solver.MakeRowConstraint(); + constraint->SetLB(0.0); + constraint->SetCoefficient(p0_v, -1.0); + for (int r = 0; r < num_rows; ++r) { + constraint->SetCoefficient(p0_vars[r], + matrix_game.PlayerUtility(0, r, c)); + } + } + + p0_solver.Solve(); + solution.values[0] = p0_objective->Value(); + for (int r = 0; r < num_rows; ++r) { + solution.strategies[0][r] = p0_vars[r]->solution_value(); + } + + // Now, the column player. + opres::MPSolver p1_solver("solver", opres::MPSolver::GLOP_LINEAR_PROGRAMMING); + std::vector p1_vars; + auto [p1_v, p1_objective] = SetupVariablesAndObjective( + &p1_solver, &p1_vars, num_cols, min_utility, max_utility); + + // Utility constriants + for (int r = 0; r < num_rows; ++r) { + opres::MPConstraint* const constraint = p1_solver.MakeRowConstraint(); + constraint->SetLB(0.0); + constraint->SetCoefficient(p1_v, -1.0); + for (int c = 0; c < num_cols; ++c) { + constraint->SetCoefficient(p1_vars[c], + matrix_game.PlayerUtility(1, r, c)); + } + } + + p1_solver.Solve(); + solution.values[1] = p1_objective->Value(); + for (int c = 0; c < num_cols; ++c) { + solution.strategies[1][c] = p1_vars[c]->solution_value(); + } + + return solution; +} + +NormalFormCorrelationDevice ComputeCorrelatedEquilibrium( + const NormalFormGame& normal_form_game, CorrEqObjType obj_type, + double social_welfare_lower_bound) { + // Implements an LP solver as explained in Section 4.6 of Shoham and + // Leyton-Brown '09: http://masfoundations.org/ + + // The NormalFormState inherits from SimultaneousGame, which conveniently + // provides a flattened joint action space, which is useful for setting up + // the LP. + std::unique_ptr initial_state = normal_form_game.NewInitialState(); + NFGState* nfg_state = static_cast(initial_state.get()); + std::vector flat_joint_actions = nfg_state->LegalActions(); + + opres::MPSolver solver("solver", opres::MPSolver::GLOP_LINEAR_PROGRAMMING); + std::vector variables; + variables.reserve(flat_joint_actions.size()); + + // Probability and distribution constraints. + opres::MPConstraint* const sum_to_one = + solver.MakeRowConstraint(1.0, 1.0, "sum_to_one"); + for (int i = 0; i < flat_joint_actions.size(); ++i) { + variables.push_back(solver.MakeNumVar(0.0, 1.0, absl::StrCat("var ", i))); + sum_to_one->SetCoefficient(variables[i], 1.0); + } + + // Utility constraints. + for (Player p = 0; p < normal_form_game.NumPlayers(); ++p) { + // This player's legal actions a_i + for (Action a_i : nfg_state->LegalActions(p)) { + // This player's alternative legal actions a_i' + for (Action a_ip : nfg_state->LegalActions(p)) { + // Consider only alternatives a_i' != a_i + if (a_ip == a_i) { + continue; + } + + // Now add the constraint: + // \sum_{a \in A | a_i \in a} [u_i(a) - u_i(a_i', a_{-i})] p(a) >= 0. + opres::MPConstraint* const constraint = solver.MakeRowConstraint(); + constraint->SetLB(0.0); + + for (int ja_idx = 0; ja_idx < flat_joint_actions.size(); ++ja_idx) { + std::vector joint_action = + nfg_state->FlatJointActionToActions(flat_joint_actions[ja_idx]); + // Skip this joint action if a_i is not taken for this player. + if (joint_action[p] != a_i) { + continue; + } + + std::vector alternative_joint_action = joint_action; + alternative_joint_action[p] = a_ip; + + double coeff = + normal_form_game.GetUtility(p, joint_action) - + normal_form_game.GetUtility(p, alternative_joint_action); + constraint->SetCoefficient(variables[ja_idx], coeff); + } + } + } + } + + opres::MPObjective* objective = solver.MutableObjective(); + objective->SetMaximization(); + + // Objective depends on the type. + if (obj_type == CorrEqObjType::kSocialWelfareAtLeast) { + // Add constraint expected SW >= k. + opres::MPConstraint* constraint = solver.MakeRowConstraint(); + constraint->SetLB(social_welfare_lower_bound); + for (int i = 0; i < variables.size(); ++i) { + std::vector joint_action = + nfg_state->FlatJointActionToActions(flat_joint_actions[i]); + std::vector utilities = + normal_form_game.GetUtilities(joint_action); + constraint->SetCoefficient( + variables[i], + std::accumulate(utilities.begin(), utilities.end(), 0.0)); + } + } else if (obj_type == CorrEqObjType::kSocialWelfareMax) { + // Set the objective to the max social welfare + for (int i = 0; i < variables.size(); ++i) { + std::vector joint_action = + nfg_state->FlatJointActionToActions(flat_joint_actions[i]); + std::vector utilities = + normal_form_game.GetUtilities(joint_action); + objective->SetCoefficient( + variables[i], + std::accumulate(utilities.begin(), utilities.end(), 0.0)); + } + } + + solver.Solve(); + + NormalFormCorrelationDevice mu; + mu.reserve(variables.size()); + for (int i = 0; i < variables.size(); ++i) { + mu.push_back({variables[i]->solution_value(), // probability, actions + nfg_state->FlatJointActionToActions(flat_joint_actions[i])}); + } + + return mu; +} + +} // namespace ortools +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/lp_solver.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/lp_solver.h new file mode 100644 index 0000000..a969a29 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/lp_solver.h @@ -0,0 +1,49 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_ORTOOLS_LP_SOLVER_H_ +#define OPEN_SPIEL_ALGORITHMS_ORTOOLS_LP_SOLVER_H_ + +#include + +#include "open_spiel/algorithms/corr_dist.h" +#include "open_spiel/matrix_game.h" + +namespace open_spiel { +namespace algorithms { +namespace ortools { + +struct ZeroSumGameSolution { + std::vector values; + std::vector> strategies; +}; + +enum class CorrEqObjType { + kAny, + kSocialWelfareMax, + kSocialWelfareAtLeast, +}; + +ZeroSumGameSolution SolveZeroSumMatrixGame( + const matrix_game::MatrixGame& matrix_game); + +NormalFormCorrelationDevice ComputeCorrelatedEquilibrium( + const NormalFormGame& normal_form_game, CorrEqObjType obj_type, + double social_welfare_lower_bound); + +} // namespace ortools +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_ORTOOLS_LP_SOLVER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/lp_solver_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/lp_solver_test.cc new file mode 100644 index 0000000..d36475d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/lp_solver_test.cc @@ -0,0 +1,106 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/ortools/lp_solver.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/algorithms/corr_dist.h" +#include "open_spiel/algorithms/matrix_game_utils.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace ortools { +namespace { + +constexpr double kErrorTolerance = 1e-10; + +void TestSolveMatrixGame() { + std::shared_ptr rps = + LoadMatrixGame("matrix_rps"); + ZeroSumGameSolution solution = SolveZeroSumMatrixGame(*rps); + SPIEL_CHECK_FLOAT_NEAR(solution.values[0], 0, 1e-10); + SPIEL_CHECK_FLOAT_NEAR(solution.values[1], 0, 1e-10); + for (Player p : {0, 1}) { + for (Action a : {0, 1, 2}) { + SPIEL_CHECK_FLOAT_NEAR(solution.strategies[p][a], 1.0 / 3.0, 1e-10); + } + } +} + +void TestCorrelatedEquilibrium() { + // Wikipedia example: + // https://en.wikipedia.org/wiki/Correlated_equilibrium#An_example + std::shared_ptr chicken_dare = + matrix_game::CreateMatrixGame({{0, 7}, {2, 6}}, {{0, 2}, {7, 6}}); + NormalFormCorrelationDevice mu = + ComputeCorrelatedEquilibrium(*chicken_dare, CorrEqObjType::kAny, 0.0); + for (const auto &item : mu) { + std::cout << item.probability << " " << absl::StrJoin(item.actions, " ") + << std::endl; + } + std::cout << std::endl; + + // There is a CE with 1/3 (C,C), 1/3 (D,C), and 1/3 (C,D). + mu = ComputeCorrelatedEquilibrium(*chicken_dare, + CorrEqObjType::kSocialWelfareAtLeast, 10.0); + for (const auto &item : mu) { + std::cout << item.probability << " " << absl::StrJoin(item.actions, " ") + << std::endl; + } + std::cout << std::endl; + + std::vector expected_values = ExpectedValues(*chicken_dare, mu); + double social_welfare = + std::accumulate(expected_values.begin(), expected_values.end(), 0.0); + std::cout << social_welfare << std::endl; + SPIEL_CHECK_GE(social_welfare, 10.0 - kErrorTolerance); + + // There is a better one that gets 10.5: 1/4 (C,D), 1/4 (D,C), 1/2 (C, C) + mu = ComputeCorrelatedEquilibrium(*chicken_dare, + CorrEqObjType::kSocialWelfareMax, 0); + for (const auto &item : mu) { + std::cout << item.probability << " " << absl::StrJoin(item.actions, " ") + << std::endl; + } + std::cout << std::endl; + + expected_values = ExpectedValues(*chicken_dare, mu); + social_welfare = + std::accumulate(expected_values.begin(), expected_values.end(), 0.0); + std::cout << social_welfare << std::endl; + SPIEL_CHECK_FLOAT_NEAR(social_welfare, 10.5, kErrorTolerance); + for (const auto &item : mu) { + if (item.actions[0] + item.actions[1] == 1) { + SPIEL_CHECK_FLOAT_NEAR(item.probability, 1.0 / 4.0, kErrorTolerance); + } else if (item.actions[0] + item.actions[1] == 2) { + SPIEL_CHECK_FLOAT_NEAR(item.probability, 1.0 / 2.0, kErrorTolerance); + } + } +} + +} // namespace +} // namespace ortools +} // namespace algorithms +} // namespace open_spiel + +namespace algorithms = open_spiel::algorithms; + +int main(int argc, char **argv) { + algorithms::ortools::TestSolveMatrixGame(); + algorithms::ortools::TestCorrelatedEquilibrium(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/sequence_form_lp.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/sequence_form_lp.cc new file mode 100644 index 0000000..0b6ce55 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/sequence_form_lp.cc @@ -0,0 +1,294 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/ortools/sequence_form_lp.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/btree_map.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "ortools/linear_solver/linear_solver.h" + +namespace open_spiel { +namespace algorithms { +namespace ortools { + +namespace opres = operations_research; + +SequenceFormLpSpecification::SequenceFormLpSpecification( + const Game& game, const std::string& solver_id) + : SequenceFormLpSpecification( + { + MakeInfostateTree(game, 0), + MakeInfostateTree(game, 1), + }, + solver_id) {} + +SequenceFormLpSpecification::SequenceFormLpSpecification( + std::vector> trees, + const std::string& solver_id) + : trees_(std::move(trees)), + terminal_bijection_(ConnectTerminals(*trees_[0], *trees_[1])), + solver_(MPSolver::CreateSolver(solver_id)), + node_spec_() { + SPIEL_CHECK_TRUE(solver_); + SPIEL_CHECK_EQ(trees_.size(), 2); +} + +void SequenceFormLpSpecification::SpecifyReachProbsConstraints( + InfostateNode* player_node) { + node_spec_[player_node].var_reach_prob = solver_->MakeNumVar( + /*lb=*/0.0, /*ub=*/1., ""); + + if (player_node->type() == kTerminalInfostateNode) return; // Nothing to do. + if (player_node->type() == kObservationInfostateNode) { + for (InfostateNode* player_child : player_node->child_iterator()) { + SpecifyReachProbsConstraints(player_child); + + // Equality constraint: parent = child + opres::MPConstraint* ct = node_spec_[player_child].ct_parent_reach_prob = + solver_->MakeRowConstraint(/*lb=*/0, /*ub=*/0, ""); + ct->SetCoefficient(node_spec_[player_node].var_reach_prob, -1); + ct->SetCoefficient(node_spec_[player_child].var_reach_prob, 1); + } + return; + } + if (player_node->type() == kDecisionInfostateNode) { + // Equality constraint: parent = sum of children + opres::MPConstraint* ct = node_spec_[player_node].ct_child_reach_prob = + solver_->MakeRowConstraint(/*lb=*/0, /*ub=*/0, ""); + ct->SetCoefficient(node_spec_[player_node].var_reach_prob, -1); + for (InfostateNode* player_child : player_node->child_iterator()) { + SpecifyReachProbsConstraints(player_child); + ct->SetCoefficient(node_spec_[player_child].var_reach_prob, 1); + } + return; + } + + SpielFatalError("Exhausted pattern match!"); +} + +void SequenceFormLpSpecification::SpecifyCfValuesConstraints( + InfostateNode* opponent_node) { + node_spec_[opponent_node].var_cf_value = solver_->MakeNumVar( + /*lb=*/-opres::MPSolver::infinity(), + /*ub=*/opres::MPSolver::infinity(), ""); + + if (opponent_node->type() == kDecisionInfostateNode) { + for (InfostateNode* opponent_child : opponent_node->child_iterator()) { + SpecifyCfValuesConstraints(opponent_child); + opres::MPConstraint* ct = node_spec_[opponent_child].ct_parent_cf_value = + solver_->MakeRowConstraint(); + ct->SetUB(0.); + ct->SetCoefficient(node_spec_[opponent_node].var_cf_value, -1); + ct->SetCoefficient(node_spec_[opponent_child].var_cf_value, 1); + } + return; + } + + opres::MPConstraint* ct = node_spec_[opponent_node].ct_child_cf_value = + solver_->MakeRowConstraint(); + ct->SetUB(0.); + ct->SetCoefficient(node_spec_[opponent_node].var_cf_value, -1); + + if (opponent_node->type() == kTerminalInfostateNode) { + const std::map& terminal_map = + terminal_bijection_.association(opponent_node->tree().acting_player()); + const InfostateNode* player_node = terminal_map.at(opponent_node); + const double value = opponent_node->terminal_utility() * + opponent_node->terminal_chance_reach_prob(); + // Terminal value constraint comes from the opponent. + ct->SetCoefficient(node_spec_[player_node].var_reach_prob, value); + return; + } + if (opponent_node->type() == kObservationInfostateNode) { + // Value constraint: sum of children = parent + ct->SetLB(0.); + for (InfostateNode* opponent_child : opponent_node->child_iterator()) { + SpecifyCfValuesConstraints(opponent_child); + ct->SetCoefficient(node_spec_[opponent_child].var_cf_value, 1); + } + return; + } + + SpielFatalError("Exhausted pattern match!"); +} + +void SequenceFormLpSpecification::SpecifyRootConstraints( + const InfostateNode* player_root_node) { + SPIEL_CHECK_TRUE(player_root_node->is_root_node()); + NodeSpecification& root_data = node_spec_.at(player_root_node); + root_data.var_reach_prob->SetLB(1.); + root_data.var_reach_prob->SetUB(1.); +} + +void SequenceFormLpSpecification::SpecifyObjective( + const InfostateNode* opponent_root_node) { + opres::MPObjective* const objective = solver_->MutableObjective(); + objective->SetCoefficient(node_spec_[opponent_root_node].var_cf_value, 1); + objective->SetMinimization(); +} + +void SequenceFormLpSpecification::ClearSpecification() { + solver_->Clear(); + for (auto& [node, spec] : node_spec_) { + spec.var_cf_value = nullptr; + spec.var_reach_prob = nullptr; + spec.ct_child_cf_value = nullptr; + spec.ct_parent_cf_value = nullptr; + spec.ct_child_reach_prob = nullptr; + spec.ct_parent_reach_prob = nullptr; + } +} + +void SequenceFormLpSpecification::SpecifyLinearProgram(Player pl) { + SPIEL_CHECK_TRUE(pl == 0 || pl == 1); + ClearSpecification(); + SpecifyReachProbsConstraints( + /*player_node=*/trees_[pl]->mutable_root()); + SpecifyRootConstraints( + /*player_root_node=*/trees_[pl]->mutable_root()); + SpecifyCfValuesConstraints( + /*opponent_node=*/trees_[1 - pl]->mutable_root()); + SpecifyObjective( + /*opponent_root_node=*/trees_[1 - pl]->mutable_root()); +} + +double SequenceFormLpSpecification::Solve() { + opres::MPSolver::ResultStatus status = solver_->Solve(); + // // Export the model if the result was not optimal. + // // You can then use external debugging tools (like cplex studio). + // if (status != opres::MPSolver::ResultStatus::OPTIMAL) { + // std::string out; + // // Pick the format. + // solver_->ExportModelAsMpsFormat(false, false, &out); + // solver_->ExportModelAsLpFormat(false, &out); + // std::cout << out << "\n"; + // } + SPIEL_CHECK_EQ(status, opres::MPSolver::ResultStatus::OPTIMAL); + return -solver_->Objective().Value(); +} + +TabularPolicy SequenceFormLpSpecification::OptimalPolicy(Player for_player) { + SPIEL_CHECK_TRUE(for_player == 0 || for_player == 1); + const InfostateTree* tree = trees_[for_player].get(); + TabularPolicy policy; + for (DecisionId id : tree->AllDecisionIds()) { + const InfostateNode* node = tree->decision_infostate(id); + absl::Span actions = node->legal_actions(); + SPIEL_CHECK_EQ(actions.size(), node->num_children()); + ActionsAndProbs state_policy; + state_policy.reserve(node->num_children()); + double rp_sum = 0.; + for (int i = 0; i < actions.size(); ++i) { + rp_sum += node_spec_[node->child_at(i)].var_reach_prob->solution_value(); + } + for (int i = 0; i < actions.size(); ++i) { + double prob; + if (rp_sum) { + prob = node_spec_[node->child_at(i)].var_reach_prob->solution_value() / + rp_sum; + } else { + // If the infostate is unreachable, the strategy is not defined. + // However some code in the library may require having the strategy, + // so we just put an uniform strategy here. + prob = 1. / actions.size(); + } + state_policy.push_back({actions[i], prob}); + } + policy.SetStatePolicy(node->infostate_string(), state_policy); + } + return policy; +} + +SfStrategy SequenceFormLpSpecification::OptimalSfStrategy(Player for_player) { + SPIEL_CHECK_TRUE(for_player == 0 || for_player == 1); + const InfostateTree* tree = trees_[for_player].get(); + SfStrategy strategy(tree); + for (SequenceId id : tree->AllSequenceIds()) { + const InfostateNode* node = tree->observation_infostate(id); + strategy[id] = node_spec_[node].var_reach_prob->solution_value(); + } + return strategy; +} + +BijectiveContainer ConnectTerminals( + const InfostateTree& tree_a, const InfostateTree& tree_b) { + BijectiveContainer out; + + using History = absl::Span; + absl::btree_map history_map; + for (InfostateNode* node_b : tree_b.leaf_nodes()) { + history_map[node_b->TerminalHistory()] = node_b; + } + + for (InfostateNode* node_a : tree_a.leaf_nodes()) { + const InfostateNode* node_b = history_map[node_a->TerminalHistory()]; + out.put({node_a, node_b}); + } + return out; +} + +void SequenceFormLpSpecification::PrintProblemSpecification() { + const std::vector& variables = solver_->variables(); + const std::vector& constraints = solver_->constraints(); + const opres::MPObjective& objective = solver_->Objective(); + + std::cout << "Objective:" << std::endl; + if (objective.maximization()) { + std::cout << "max "; + } else { + std::cout << "min "; + } + bool first_obj = true; + for (int i = 0; i < variables.size(); ++i) { + const double coef = objective.GetCoefficient(variables[i]); + if (coef) { + if (!first_obj) std::cout << "+ "; + std::cout << coef << "*x" << i << " "; + first_obj = false; + } + } + std::cout << std::endl; + + std::cout << "Constraints:" << std::endl; + for (auto& ct : constraints) { + std::cout << ct->lb() << " <= "; + bool first_ct = true; + for (int i = 0; i < variables.size(); ++i) { + const double coef = ct->GetCoefficient(variables[i]); + if (coef) { + if (!first_ct) std::cout << "+ "; + std::cout << coef << "*x" << i << " "; + first_ct = false; + } + } + std::cout << "<= " << ct->ub() << " (" << ct->name() << ")" << std::endl; + } + + std::cout << "Variables:" << std::endl; + for (int i = 0; i < variables.size(); i++) { + const auto& var = variables[i]; + std::cout << var->lb() << " <= " + << "x" << i << " <= " << var->ub() << " (" << var->name() << ")" + << std::endl; + } +} + +} // namespace ortools +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/sequence_form_lp.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/sequence_form_lp.h new file mode 100644 index 0000000..dd2e54b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/sequence_form_lp.h @@ -0,0 +1,144 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_ORTOOLS_SEQUENCE_FORM_LP_H_ +#define OPEN_SPIEL_ALGORITHMS_ORTOOLS_SEQUENCE_FORM_LP_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/algorithms/infostate_tree.h" +#include "open_spiel/policy.h" +#include "ortools/linear_solver/linear_solver.h" + +// An implementation of a sequence-form linear program for computing Nash +// equilibria in sequential games, based on [1]. The implementation constructs +// infostate trees for both players, connects them through the terminals and +// recursively specifies constraints on reach probability of the player and +// counterfactual values of the opponent. +// +// [1]: Efficient Computation of Equilibria for Extensive Two-Person Games +// http://www.maths.lse.ac.uk/Personal/stengel/TEXTE/geb1996b.pdf + +namespace open_spiel { +namespace algorithms { +namespace ortools { + +using MPSolver = operations_research::MPSolver; + +// See also MPSolver::OptimizationProblemType +inline constexpr const char* kDefaultLinProgSolver = "GLOP"; + +template +struct BijectiveContainer { + std::map x2y; + std::map y2x; + + void put(std::pair xy) { + const T& x = xy.first; + const T& y = xy.second; + SPIEL_CHECK_TRUE(x2y.find(x) == x2y.end()); + SPIEL_CHECK_TRUE(y2x.find(y) == y2x.end()); + x2y[x] = y; + y2x[y] = x; + } + // Direction is equivalent to player id. + const std::map& association(int direction) const { + SPIEL_CHECK_TRUE(direction == 0 || direction == 1); + if (direction == 0) { + return x2y; + } else { + return y2x; + } + } +}; + +BijectiveContainer ConnectTerminals( + const InfostateTree& tree_a, const InfostateTree& tree_b); + +// Variables / constraints per each node, needed for solving the LP. +struct NodeSpecification { + operations_research::MPVariable* var_cf_value; + operations_research::MPVariable* var_reach_prob; + operations_research::MPConstraint* ct_child_cf_value; + operations_research::MPConstraint* ct_parent_cf_value; + operations_research::MPConstraint* ct_child_reach_prob; + operations_research::MPConstraint* ct_parent_reach_prob; +}; + +class SequenceFormLpSpecification { + public: + SequenceFormLpSpecification( + const Game& game, const std::string& solver_id = kDefaultLinProgSolver); + SequenceFormLpSpecification( + std::vector> trees, + const std::string& solver_id = kDefaultLinProgSolver); + + // Specify the linear program for given player. + void SpecifyLinearProgram(Player pl); + + // Solve the linear program. + // Returns the root value for the player whose strategy was computed. + double Solve(); + + // Reset the solver and erase all pointers. + // This is called automatically when you call SpecifyLinearProgram. + void ClearSpecification(); + + // Transform the computed sequence form policy into a behavioral policy. + // This function can be called only after call for Solve(). + TabularPolicy OptimalPolicy(Player for_player); + + // Transform the computed realization plan into a behavioral policy. + // This function can be called only after call for Solve(). + SfStrategy OptimalSfStrategy(Player for_player); + + // For debugging. + void PrintProblemSpecification(); + + const std::vector>& trees() const { + return trees_; + } + std::array roots() const { + return {trees_[0]->mutable_root(), trees_[1]->mutable_root()}; + } + std::unordered_map& node_spec() { + return node_spec_; + } + operations_research::MPSolver* solver() { return solver_.get(); } + + const BijectiveContainer& terminal_bijection() const { + return terminal_bijection_; + } + + protected: + const std::vector> trees_; + const BijectiveContainer terminal_bijection_; + std::unique_ptr solver_; + std::unordered_map node_spec_; + + void SpecifyReachProbsConstraints(InfostateNode* player_node); + void SpecifyCfValuesConstraints(InfostateNode* opponent_node); + void SpecifyRootConstraints(const InfostateNode* player_root_node); + void SpecifyObjective(const InfostateNode* opponent_root_node); +}; + +} // namespace ortools +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_ORTOOLS_SEQUENCE_FORM_LP_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/sequence_form_lp_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/sequence_form_lp_test.cc new file mode 100644 index 0000000..3c0cb7c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/sequence_form_lp_test.cc @@ -0,0 +1,72 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/ortools/sequence_form_lp.h" + +#include "open_spiel/algorithms/tabular_exploitability.h" +#include "open_spiel/game_transforms/turn_based_simultaneous_game.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace ortools { +namespace { + +constexpr double kErrorTolerance = 1e-14; + +void TestGameValueAndExploitability(const std::string& game_name, + double expected_game_value) { + std::shared_ptr game = LoadGame(game_name); + SequenceFormLpSpecification specification(*game); + specification.SpecifyLinearProgram(0); + double actual_game_value = specification.Solve(); + SPIEL_CHECK_FLOAT_NEAR(actual_game_value, expected_game_value, + kErrorTolerance); + + // Compute policy for the opponent. + TabularPolicy policy0 = specification.OptimalPolicy(0); + specification.SpecifyLinearProgram(1); + double opponent_game_value = specification.Solve(); + SPIEL_CHECK_FLOAT_NEAR(actual_game_value + opponent_game_value, 0., + kErrorTolerance); + TabularPolicy policy1 = specification.OptimalPolicy(1); + + // Test exploitability -- this is implemented only for turn-based games. + if (game->GetType().dynamics == GameType::Dynamics::kSimultaneous) return; + + // Merge the two tables. + std::unordered_map profile_table = + policy0.PolicyTable(); + profile_table.insert(policy1.PolicyTable().begin(), + policy1.PolicyTable().end()); + TabularPolicy optimal_profile(profile_table); + SPIEL_CHECK_FLOAT_NEAR(Exploitability(*game, optimal_profile), 0., + kErrorTolerance); +} + +} // namespace +} // namespace ortools +} // namespace algorithms +} // namespace open_spiel + +namespace algorithms = open_spiel::algorithms; + +int main(int argc, char** argv) { + algorithms::ortools::TestGameValueAndExploitability("matrix_mp", 0.); + algorithms::ortools::TestGameValueAndExploitability("kuhn_poker", -1 / 18.); + algorithms::ortools::TestGameValueAndExploitability("leduc_poker", + -0.085606424078); + algorithms::ortools::TestGameValueAndExploitability( + "goofspiel(players=2,num_cards=3,imp_info=True)", 0.); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/simple_lp_solver_example.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/simple_lp_solver_example.cc new file mode 100644 index 0000000..2cc5cbd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/ortools/simple_lp_solver_example.cc @@ -0,0 +1,66 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel.h" +#include "ortools/linear_solver/linear_solver.h" + +namespace open_spiel { +namespace algorithms { +namespace ortools { +namespace { + +namespace opres = operations_research; + +// Example use of OR-Tools adapted from here: +// https://developers.google.com/optimization/introduction/cpp +void TestSimpleLpProgram() { + // Create the linear solver with the GLOP backend. + opres::MPSolver solver("simple_lp_program", + opres::MPSolver::GLOP_LINEAR_PROGRAMMING); + + // Create the variables x and y. + opres::MPVariable* const x = solver.MakeNumVar(0.0, 1, "x"); + opres::MPVariable* const y = solver.MakeNumVar(0.0, 2, "y"); + + std::cout << "Number of variables = " << solver.NumVariables() << std::endl; + + // Create a linear constraint, 0 <= x + y <= 2. + opres::MPConstraint* const ct = solver.MakeRowConstraint(0.0, 2.0, "ct"); + ct->SetCoefficient(x, 1); + ct->SetCoefficient(y, 1); + + std::cout << "Number of constraints = " << solver.NumConstraints() + << std::endl; + + // Create the objective function, 3 * x + y. + opres::MPObjective* const objective = solver.MutableObjective(); + objective->SetCoefficient(x, 3); + objective->SetCoefficient(y, 1); + objective->SetMaximization(); + + solver.Solve(); + + std::cout << "Solution:" << std::endl; + std::cout << "Objective value = " << objective->Value() << std::endl; + std::cout << "x = " << x->solution_value() << std::endl; + std::cout << "y = " << y->solution_value() << std::endl; +} +} // namespace +} // namespace ortools +} // namespace algorithms +} // namespace open_spiel + +namespace algorithms = open_spiel::algorithms; + +int main(int argc, char** argv) { algorithms::ortools::TestSimpleLpProgram(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/outcome_sampling_mccfr.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/outcome_sampling_mccfr.cc new file mode 100644 index 0000000..1dd93a3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/outcome_sampling_mccfr.cc @@ -0,0 +1,294 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/outcome_sampling_mccfr.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/discrete_distribution.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/algorithms/cfr.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { + +// Serialization of the MCCFR solver is in agreement with de/serialization of +// regular CFR solvers, i.e. take a look at the PartiallyDeserializedCFRSolver() +// method for more info. +constexpr const int kSerializationVersion = 1; +constexpr const char* kSerializeSolverRNGSectionHeader = "[SolverRNG]"; +constexpr const char* kSerializeSolverEpsilonSectionHeader = "[SolverEpsilon]"; +constexpr const char* kSerializeSolverDefaultPolicySectionHeader = + "[SolverDefaultPolicy]"; + +OutcomeSamplingMCCFRSolver::OutcomeSamplingMCCFRSolver(const Game& game, + double epsilon, int seed) + : OutcomeSamplingMCCFRSolver(game, std::make_shared(), + epsilon, seed) {} + +OutcomeSamplingMCCFRSolver::OutcomeSamplingMCCFRSolver( + const Game& game, std::shared_ptr default_policy, double epsilon, + int seed) + : OutcomeSamplingMCCFRSolver( + game.shared_from_this(), default_policy, epsilon, + std::mt19937(seed >= 0 ? seed : std::mt19937::default_seed)) {} + +OutcomeSamplingMCCFRSolver::OutcomeSamplingMCCFRSolver( + std::shared_ptr game, std::shared_ptr default_policy, + double epsilon, std::mt19937 rng) + : game_(game), + epsilon_(epsilon), + rng_(rng), + dist_(0.0, 1.0), + default_policy_(default_policy) { + if (game_->GetType().dynamics != GameType::Dynamics::kSequential) { + SpielFatalError( + "MCCFR requires sequential games. If you're trying to run it " + "on a simultaneous (or normal-form) game, please first transform it " + "using turn_based_simultaneous_game."); + } +} + +void OutcomeSamplingMCCFRSolver::RunIteration(std::mt19937* rng) { + // for (Player update_player = Player{0}; update_player < game_->NumPlayers(); + // ++update_player) { + for (Player p = Player{0}; p < game_->NumPlayers(); ++p) { + std::unique_ptr state = game_->NewInitialState(); + SampleEpisode(state.get(), p, rng, 1.0, 1.0, 1.0); + } +} + +std::string OutcomeSamplingMCCFRSolver::Serialize(int double_precision, + std::string delimiter) const { + SPIEL_CHECK_GE(double_precision, -1); + std::string str = ""; + // Meta section + absl::StrAppend(&str, + "# Automatically generated by OpenSpiel " + "OutcomeSamplingMCCFRSolver::Serialize\n"); + absl::StrAppend(&str, kSerializeMetaSectionHeader, "\n"); + absl::StrAppend(&str, "Version: ", kSerializationVersion, "\n"); + absl::StrAppend(&str, "\n"); + // Game section + absl::StrAppend(&str, kSerializeGameSectionHeader, "\n"); + absl::StrAppend(&str, game_->Serialize(), "\n"); + // Internal solver state section + absl::StrAppend(&str, kSerializeSolverTypeSectionHeader, "\n"); + absl::StrAppend(&str, "OutcomeSamplingMCCFRSolver", "\n"); + absl::StrAppend(&str, kSerializeSolverSpecificStateSectionHeader, "\n"); + // RNG section + absl::StrAppend(&str, kSerializeSolverRNGSectionHeader, "\n"); + std::ostringstream rng_stream; + rng_stream << rng_; + absl::StrAppend(&str, rng_stream.str(), "\n"); + // Epsilon section + absl::StrAppend(&str, kSerializeSolverEpsilonSectionHeader, "\n"); + absl::StrAppend(&str, epsilon_, "\n"); + // Default policy section + absl::StrAppend(&str, kSerializeSolverDefaultPolicySectionHeader, "\n"); + absl::StrAppend(&str, default_policy_->Serialize(double_precision, delimiter), + "\n"); + // Info state values table section + absl::StrAppend(&str, kSerializeSolverValuesTableSectionHeader, "\n"); + SerializeCFRInfoStateValuesTable(info_states_, &str, double_precision, + delimiter); + return str; +} + +std::vector OutcomeSamplingMCCFRSolver::SamplePolicy( + const CFRInfoStateValues& info_state) const { + std::vector policy = info_state.current_policy; + for (int i = 0; i < policy.size(); ++i) { + policy[i] = epsilon_ * 1.0 / policy.size() + (1 - epsilon_) * policy[i]; + } + return policy; +} + +double OutcomeSamplingMCCFRSolver::Baseline( + const State& state, const CFRInfoStateValues& info_state, int aidx) const { + // Default to vanilla outcome sampling. + return 0; +} + +// Applies Eq. 9 of Schmid et al. '19 +double OutcomeSamplingMCCFRSolver::BaselineCorrectedChildValue( + const State& state, const CFRInfoStateValues& info_state, int sampled_aidx, + int aidx, double child_value, double sample_prob) const { + double baseline = Baseline(state, info_state, aidx); + if (aidx == sampled_aidx) { + return baseline + (child_value - baseline) / sample_prob; + } else { + return baseline; + } +} + +double OutcomeSamplingMCCFRSolver::SampleEpisode( + State* state, Player update_player, std::mt19937* rng, double my_reach, + double opp_reach, double sample_reach) { + if (state->IsTerminal()) { + return state->PlayerReturn(update_player); + } else if (state->IsChanceNode()) { + std::pair outcome_and_prob = + SampleAction(state->ChanceOutcomes(), dist_(*rng)); + SPIEL_CHECK_PROB(outcome_and_prob.second); + SPIEL_CHECK_GT(outcome_and_prob.second, 0); + state->ApplyAction(outcome_and_prob.first); + return SampleEpisode(state, update_player, rng, my_reach, + outcome_and_prob.second * opp_reach, + outcome_and_prob.second * sample_reach); + } else if (state->IsSimultaneousNode()) { + SpielFatalError( + "Simultaneous moves not supported. Use " + "TurnBasedSimultaneousGame to convert the game first."); + } + + SPIEL_CHECK_PROB(sample_reach); + + int player = state->CurrentPlayer(); + std::string is_key = state->InformationStateString(player); + std::vector legal_actions = state->LegalActions(); + + // The insert here only inserts the default value if the key is not found, + // otherwise returns the entry in the map. + auto iter_and_result = info_states_.insert( + {is_key, CFRInfoStateValues(legal_actions, kInitialTableValues)}); + + CFRInfoStateValues info_state_copy = iter_and_result.first->second; + info_state_copy.ApplyRegretMatching(); + + const std::vector& sample_policy = + (player == update_player ? SamplePolicy(info_state_copy) + : info_state_copy.current_policy); + + absl::discrete_distribution action_dist(sample_policy.begin(), + sample_policy.end()); + int sampled_aidx = action_dist(*rng); + SPIEL_CHECK_PROB(sample_policy[sampled_aidx]); + SPIEL_CHECK_GT(sample_policy[sampled_aidx], 0); + + state->ApplyAction(legal_actions[sampled_aidx]); + double child_value = SampleEpisode( + state, update_player, rng, + player == update_player + ? my_reach * info_state_copy.current_policy[sampled_aidx] + : my_reach, + player == update_player + ? opp_reach + : opp_reach * info_state_copy.current_policy[sampled_aidx], + sample_reach * sample_policy[sampled_aidx]); + + // Compute each of the child estimated values. + std::vector child_values(legal_actions.size(), 0); + for (int aidx = 0; aidx < legal_actions.size(); ++aidx) { + child_values[aidx] = + BaselineCorrectedChildValue(*state, info_state_copy, sampled_aidx, aidx, + child_value, sample_policy[aidx]); + } + + // Compute the value of this history for this policy. + double value_estimate = 0; + for (int aidx = 0; aidx < legal_actions.size(); ++aidx) { + value_estimate += + info_state_copy.current_policy[aidx] * child_values[aidx]; + } + + if (player == update_player) { + // Now the regret and avg strategy updates. + CFRInfoStateValues& info_state = info_states_[is_key]; + info_state.ApplyRegretMatching(); + + // Estimate for the counterfactual value of the policy. + double cf_value = value_estimate * opp_reach / sample_reach; + + // Update regrets. + // + // Note: different from Chapter 4 of Lanctot '13 thesis, the utilities + // coming back from the recursion are already multiplied by the players' + // tail reaches and divided by the sample tail reach. So when adding regrets + // to the table, we need only multiply by the opponent reach and divide by + // the sample reach to this point. + for (int aidx = 0; aidx < legal_actions.size(); ++aidx) { + // Estimate for the counterfactual value of the policy replaced by always + // choosing sampled_aidx at this information state. + double cf_action_value = child_values[aidx] * opp_reach / sample_reach; + info_state.cumulative_regrets[aidx] += (cf_action_value - cf_value); + } + + // Update the average policy. + for (int aidx = 0; aidx < legal_actions.size(); ++aidx) { + double increment = + my_reach * info_state.current_policy[aidx] / sample_reach; + SPIEL_CHECK_FALSE(std::isnan(increment) || std::isinf(increment)); + info_state.cumulative_policy[aidx] += increment; + } + } + + return value_estimate; +} + +std::unique_ptr +DeserializeOutcomeSamplingMCCFRSolver(const std::string& serialized, + std::string delimiter) { + auto partial = PartiallyDeserializeCFRSolver(serialized); + SPIEL_CHECK_EQ(partial.solver_type, "OutcomeSamplingMCCFRSolver"); + + enum Section { kInvalid = -1, kRNG = 0, kEpsilon = 1, kDefaultPolicy = 2 }; + std::array section_strings = {"", "", ""}; + Section current_section = kInvalid; + + std::vector lines = + absl::StrSplit(partial.solver_specific_state, '\n'); + for (int i = 0; i < lines.size(); i++) { + if (lines[i] == kSerializeSolverRNGSectionHeader) { + SPIEL_CHECK_EQ(current_section, kInvalid); + current_section = kRNG; + } else if (lines[i] == kSerializeSolverEpsilonSectionHeader) { + SPIEL_CHECK_EQ(current_section, kRNG); + current_section = kEpsilon; + } else if (lines[i] == kSerializeSolverDefaultPolicySectionHeader) { + SPIEL_CHECK_EQ(current_section, kEpsilon); + current_section = kDefaultPolicy; + } else { + SPIEL_CHECK_NE(current_section, kInvalid); + absl::StrAppend(§ion_strings[current_section], lines[i], "\n"); + } + } + // Remove trailing newline characters + for (int i = 0; i < section_strings.size(); i++) { + section_strings[i].erase(section_strings[i].length() - 1); + } + + // Do the actual deserialization for all internal state values + std::mt19937 rng = std::mt19937(); + std::istringstream rng_stream(section_strings[kRNG]); + rng_stream >> rng; + + // First scalar is epsilon, second is update_player + std::string epsilon = section_strings[kEpsilon]; + + auto solver = std::make_unique( + partial.game, DeserializePolicy(section_strings[kDefaultPolicy]), + std::stod(epsilon), rng); + DeserializeCFRInfoStateValuesTable(partial.serialized_cfr_values_table, + &solver->InfoStateValuesTable(), + delimiter); + return solver; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/outcome_sampling_mccfr.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/outcome_sampling_mccfr.h new file mode 100644 index 0000000..1004cf6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/outcome_sampling_mccfr.h @@ -0,0 +1,118 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_OUTCOME_SAMPLING_MCCFR_H_ +#define OPEN_SPIEL_ALGORITHMS_OUTCOME_SAMPLING_MCCFR_H_ + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/uniform_real_distribution.h" +#include "open_spiel/algorithms/cfr.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" + +// An implementation of outcome sampling Monte Carlo Counterfactual Regret +// Minimization (CFR). This version is implemented in a way that is closer to +// VR-MCCFR, so that it is compatible with the use of baselines to reduce +// variance (baseline of 0 is equivalent to the original outcome sampling). +// +// Lanctot et al. '09: http://mlanctot.info/files/papers/nips09mccfr.pdf +// Lanctot, 2013: http://mlanctot.info/files/papers/PhD_Thesis_MarcLanctot.pdf +// Schmid et al. '18: https://arxiv.org/abs/1809.03057 +// Davis, Schmid, & Bowling '19. https://arxiv.org/abs/1907.09633 + +namespace open_spiel { +namespace algorithms { + +class OutcomeSamplingMCCFRSolver { + public: + static inline constexpr double kInitialTableValues = 0.000001; + static inline constexpr double kDefaultEpsilon = 0.6; + + // Creates a solver with a specific seed, average type and an explicit + // default uniform policy for states that have not been visited. + OutcomeSamplingMCCFRSolver(const Game& game, double epsilon = kDefaultEpsilon, + int seed = -1); + + // Creates a solver with a specific seed and average type, and also allows + // for a custom default policy for states that have not been visited. + OutcomeSamplingMCCFRSolver(const Game& game, + std::shared_ptr default_policy, + double epsilon = kDefaultEpsilon, int seed = -1); + + // The constructor below is meant mainly for deserialization purposes and + // should not be used directly. + OutcomeSamplingMCCFRSolver(std::shared_ptr game, + std::shared_ptr default_policy, + double epsilon, std::mt19937 rng); + + // Performs one iteration of outcome sampling. + void RunIteration() { RunIteration(&rng_); } + + // Same as above, but uses the specified random number generator instead. + void RunIteration(std::mt19937* rng); + + CFRInfoStateValuesTable& InfoStateValuesTable() { return info_states_; } + + // Computes the average policy, containing the policy for all players. + // The returned policy instance should only be used during the lifetime of + // the CFRSolver object. + std::shared_ptr AveragePolicy() const { + return std::make_shared(info_states_, default_policy_); + } + // Note: This can be quite large. + TabularPolicy TabularAveragePolicy() const { + CFRAveragePolicy policy(info_states_, nullptr); + return TabularPolicy(*game_, policy); + } + + // See comments above CFRInfoStateValues::Serialize(double_precision) for + // notes about the double_precision parameter. + std::string Serialize(int double_precision = -1, + std::string delimiter = "<~>") const; + + private: + double SampleEpisode(State* state, Player update_player, std::mt19937* rng, + double my_reach, double opp_reach, double sample_reach); + std::vector SamplePolicy(const CFRInfoStateValues& info_state) const; + + // The b_i function from Schmid et al. '19. + double Baseline(const State& state, const CFRInfoStateValues& info_state, + int aidx) const; + + // Applies Eq. 9 of Schmid et al. '19 + double BaselineCorrectedChildValue(const State& state, + const CFRInfoStateValues& info_state, + int sampled_aidx, int aidx, + double child_value, + double sample_prob) const; + + std::shared_ptr game_; + double epsilon_; + CFRInfoStateValuesTable info_states_; + std::mt19937 rng_; + absl::uniform_real_distribution dist_; + std::shared_ptr default_policy_; +}; + +std::unique_ptr +DeserializeOutcomeSamplingMCCFRSolver(const std::string& serialized, + std::string delimiter = "<~>"); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_OUTCOME_SAMPLING_MCCFR_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/outcome_sampling_mccfr_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/outcome_sampling_mccfr_test.cc new file mode 100644 index 0000000..cdbd7fe --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/outcome_sampling_mccfr_test.cc @@ -0,0 +1,93 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/outcome_sampling_mccfr.h" + +#include +#include +#include + +#include "open_spiel/algorithms/tabular_exploitability.h" +#include "open_spiel/games/kuhn_poker/kuhn_poker.h" +#include "open_spiel/games/leduc_poker/leduc_poker.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +constexpr int kSeed = 230398247; + +void MCCFR_2PGameTest(const std::string& game_name, std::mt19937* rng, + int iterations, double nashconv_upperbound) { + std::shared_ptr game = LoadGame(game_name); + OutcomeSamplingMCCFRSolver solver(*game); + for (int i = 0; i < iterations; i++) { + solver.RunIteration(rng); + } + const std::shared_ptr average_policy = solver.AveragePolicy(); + double nash_conv = NashConv(*game, *average_policy, true); + std::cout << "Game: " << game_name << ", iters = " << iterations + << ", NashConv: " << nash_conv << std::endl; + SPIEL_CHECK_LE(nash_conv, nashconv_upperbound); +} + +void MCCFR_SerializationTest() { + auto game = LoadGame("kuhn_poker"); + OutcomeSamplingMCCFRSolver solver = OutcomeSamplingMCCFRSolver(*game); + double exploitability0 = Exploitability(*game, *solver.AveragePolicy()); + + for (int i = 0; i < 500; i++) { + solver.RunIteration(); + } + double exploitability1 = Exploitability(*game, *solver.AveragePolicy()); + SPIEL_CHECK_GT(exploitability0, exploitability1); + + std::string serialized = solver.Serialize(); + std::unique_ptr deserialized_solver = + DeserializeOutcomeSamplingMCCFRSolver(serialized); + SPIEL_CHECK_EQ(solver.InfoStateValuesTable().size(), + deserialized_solver->InfoStateValuesTable().size()); + double exploitability2 = + Exploitability(*game, *deserialized_solver->AveragePolicy()); + SPIEL_CHECK_FLOAT_NEAR(exploitability1, exploitability2, 1e-15); + + for (int i = 0; i < 500; i++) { + deserialized_solver->RunIteration(); + } + double exploitability3 = + Exploitability(*game, *deserialized_solver->AveragePolicy()); + SPIEL_CHECK_GT(exploitability2, exploitability3); +} + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +namespace algorithms = open_spiel::algorithms; + +int main(int argc, char** argv) { + std::mt19937 rng(algorithms::kSeed); + // Convergence results change depending on + // the seed specified for running the tests. + // For this reason, test thresholds have been adapted + // taking the maximum Nash exploitability value obtained + // at iteration 10000 from multiple runs. + // For more details see https://github.com/deepmind/open_spiel/pull/458 + algorithms::MCCFR_2PGameTest("kuhn_poker", &rng, 10000, 0.17); + algorithms::MCCFR_2PGameTest("leduc_poker", &rng, 10000, 3.07); + algorithms::MCCFR_2PGameTest("liars_dice", &rng, 10000, 1.45); + algorithms::MCCFR_SerializationTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/policy_iteration.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/policy_iteration.cc new file mode 100644 index 0000000..c7621a8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/policy_iteration.cc @@ -0,0 +1,205 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/policy_iteration.h" + +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +using std::vector; + +struct MDPState { + std::unique_ptr state; // The state of the MDP + double value; // The value of this state + absl::flat_hash_map>> + transitions; // The transitions from this state, for each action, with + // the correponding probability + Action optimal_action; // The optimal action from this state +}; + +// Adds transitions and transition probability from a given state +void AddTransition( + absl::flat_hash_map>>* + transitions, + const std::string& key, const std::unique_ptr& state) { + for (Action action : state->LegalActions()) { + std::unique_ptr next_state = state->Child(action); + vector> possibilities; + if (next_state->IsChanceNode()) { + // For a chance node, record the transition probabilities + for (const auto& actionprob : next_state->ChanceOutcomes()) { + std::unique_ptr realized_next_state = next_state->Child(action); + possibilities.emplace_back(realized_next_state->ToString(), + actionprob.second); + } + } else { + // A non-chance node is equivalent to transition with probability 1 + possibilities.emplace_back(next_state->ToString(), 1.0); + } + (*transitions)[action] = possibilities; + } +} + +// Initialize transition map and value map +void InitializeMaps( + const std::map>& states, + absl::flat_hash_map* mdp_state_nodes) { + for (const auto& kv : states) { + const std::string& key = kv.first; + if (kv.second->IsTerminal()) { + // For both 1-player and 2-player zero sum games, suffices to look at + // player 0's utility + (*mdp_state_nodes)[key].value = kv.second->PlayerReturn(Player{0}); + // No action possible from a terminal state. + (*mdp_state_nodes)[key].optimal_action = kInvalidAction; + } else { + absl::flat_hash_map>>& + transitions = (*mdp_state_nodes)[key].transitions; + AddTransition(&transitions, key, kv.second); + (*mdp_state_nodes)[key].value = 0; + // Assign any random action as the optimal action, initially. + (*mdp_state_nodes)[key].optimal_action = kv.second->LegalActions()[0]; + } + } +} + +double QValue(const absl::flat_hash_map& mdp_state_nodes, + const std::unique_ptr& state, const Action& action) { + if (!mdp_state_nodes.contains(state->ToString()) || + !mdp_state_nodes.at(state->ToString()).transitions.contains(action)) { + // This action is not possible from this state. + return 0; + } + + double value = 0; + const vector>& possibilities = + mdp_state_nodes.at(state->ToString()).transitions.at(action); + for (const auto& outcome : possibilities) { + if (mdp_state_nodes.contains(outcome.first)) { + value += outcome.second * mdp_state_nodes.at(outcome.first).value; + } + } + return value; +} + +// Given a player and a state, gets the best possible action from this state +Action GetBestAction( + const absl::flat_hash_map& mdp_state_nodes, + const std::unique_ptr& state, const Player& player, + const double& min_utility, const double& max_utility) { + vector legal_actions = state->LegalActions(); + Action optimal_action = kInvalidAction; + + // Initialize value to be the minimum utility if current player + // is the maximizing player (i.e. player 0), and to maximum utility + // if current player is the minimizing player (i.e. player 1). + double value = (player == Player{0}) ? min_utility : max_utility; + for (Action action : legal_actions) { + double q_val = QValue(mdp_state_nodes, state, action); + bool is_best_so_far = (player == Player{0} && q_val >= value) || + (player == Player{1} && q_val <= value); + if (is_best_so_far) { + value = q_val; + optimal_action = action; + } + } + return optimal_action; +} + +} // namespace + +absl::flat_hash_map PolicyIteration(const Game& game, + int depth_limit, + double threshold) { + // Currently only supports 1-player or 2-player zero sum games + SPIEL_CHECK_TRUE(game.NumPlayers() == 1 || game.NumPlayers() == 2); + if (game.NumPlayers() == 2) { + SPIEL_CHECK_EQ(game.GetType().utility, GameType::Utility::kZeroSum); + } + + // No support for simultaneous games (needs an LP solver). And so also must + // be a perfect information game. + SPIEL_CHECK_EQ(game.GetType().dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(game.GetType().information, + GameType::Information::kPerfectInformation); + + std::map> states = + GetAllStates(game, depth_limit, /*include_terminals=*/true, + /*include_chance_states=*/false); + absl::flat_hash_map mdp_state_nodes; + + InitializeMaps(states, &mdp_state_nodes); + + bool policy_stable; + do { + // Policy evaluation done in place + double error; + do { + error = 0; + for (const auto& kv : states) { + const std::string& key = kv.first; + if (kv.second->IsTerminal()) continue; + + // Evaluate the state value function + Action curr_optimal_action = mdp_state_nodes.at(key).optimal_action; + double value = QValue(mdp_state_nodes, kv.second, curr_optimal_action); + + double* stored_value = &mdp_state_nodes.at(key).value; + error = std::max(std::abs(*stored_value - value), error); + *stored_value = value; + } + } while (error > threshold); + + // Policy improvement + double min_utility = game.MinUtility(); + double max_utility = game.MaxUtility(); + policy_stable = true; + for (const auto& kv : states) { + const std::string& key = kv.first; + if (kv.second->IsTerminal()) continue; + + Player player = kv.second->CurrentPlayer(); + + // Choose the action with the highest possible action value function + Action curr_optimal_action = GetBestAction( + mdp_state_nodes, kv.second, player, min_utility, max_utility); + + double curr_value = + QValue(mdp_state_nodes, kv.second, curr_optimal_action); + + double* stored_value = &mdp_state_nodes.at(key).value; + Action* stored_optimal_action = &mdp_state_nodes.at(key).optimal_action; + if (std::abs(*stored_value - curr_value) > threshold) { + policy_stable = false; + *stored_optimal_action = curr_optimal_action; + } + } + } while (!policy_stable); + + absl::flat_hash_map values; + for (const auto& kv : states) { + std::string state_string = kv.first; + values[state_string] = mdp_state_nodes[state_string].value; + } + return values; +} +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/policy_iteration.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/policy_iteration.h new file mode 100644 index 0000000..61926c0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/policy_iteration.h @@ -0,0 +1,43 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_POLICY_ITERATION_H_ +#define OPEN_SPIEL_ALGORITHMS_POLICY_ITERATION_H_ + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/algorithms/get_all_states.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +// Policy iteration algorithm: solves for the optimal value function of a game. +// The value function is solved with maximum error less than threshold, +// and it considers all states with depth at most depth_limit from the +// initial state (so if depth_limit is 0, only the root is considered). +// If depth limit is negative, all states are considered. +// +// Currently works for sequential 1-player or 2-player zero-sum games, +// with or without chance nodes. +// +// Based on the implementation in Sutton & Barto '18. + +absl::flat_hash_map PolicyIteration(const Game& game, + int depth_limit, + double threshold); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_POLICY_ITERATION_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/state_distribution.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/state_distribution.cc new file mode 100644 index 0000000..9ac5d6b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/state_distribution.cc @@ -0,0 +1,350 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/state_distribution.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/container/btree_map.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/simultaneous_move_game.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel::algorithms { +namespace { + +int GetBeliefHistorySize(const HistoryDistribution& beliefs) { + int belief_history_size = 0; + for (int i = 0; i < beliefs.first.size(); ++i) { + belief_history_size = + std::max(belief_history_size, + static_cast(beliefs.first[i]->FullHistory().size())); + } + return belief_history_size; +} + +std::unique_ptr AdvanceBeliefHistoryOneAction( + std::unique_ptr previous, Action action, + Player player_id, const Policy& opponent_policy) { + auto dist = absl::make_unique(); + for (int i = 0; i < previous->first.size(); ++i) { + std::unique_ptr& state = previous->first[i]; + const double& prob = previous->second[i]; + if (Near(prob, 0.)) continue; + switch (state->GetType()) { + case StateType::kChance: { + // If we can't find the action in the policy, then set it to 0. + const double action_prob = GetProb(state->ChanceOutcomes(), action); + + // Then, skip all actions with 0 probability, as they don't matter + // moving forward. + if (Near(std::max(action_prob, 0.0), 0.0)) continue; + SPIEL_CHECK_PROB(action_prob); + + // If we don't find the chance outcome, then the state we're in is + // impossible, so we set it to zero. + state->ApplyAction(action); + + dist->first.push_back(std::move(state)); + dist->second.push_back(prob * std::max(0.0, action_prob)); + break; + } + case StateType::kDecision: { + if (state->CurrentPlayer() == player_id) { + state->ApplyAction(action); + dist->first.push_back(std::move(state)); + dist->second.push_back(prob); + } else { + // We have to add all actions as we don't know if the opponent is + // taking a private or public action. + // TODO(author1): Add method to open_spiel::State that lets us + // only loop over the actions that are consistent with a given private + // action. + for (const auto& [candidate, action_prob] : + opponent_policy.GetStatePolicy(*state)) { + if (Near(std::max(0.0, action_prob), 0.0)) continue; + SPIEL_CHECK_PROB(action_prob); + std::unique_ptr child = state->Child(candidate); + if (child->IsTerminal()) continue; + dist->first.push_back(std::move(child)); + dist->second.push_back(prob * action_prob); + } + } + break; + } + case StateType::kTerminal: + // If the state is terminal, and we have to advance by an action, we + // discard the terminal histories from our beliefs. + continue; + // SpielFatalError("State is terminal, should not call + // AdvanceBeliefs."); + default: + SpielFatalError(absl::StrCat("Unknown state type: ", state->GetType(), + ", state: ", state->ToString())); + } + } + return dist; +} + +// Filters out all beliefs that do not belong to infostate. +std::unique_ptr FilterOutBeliefs( + const State& state, std::unique_ptr dist, + int player_id) { + const std::string infostate = state.InformationStateString(player_id); + auto new_dist = absl::make_unique(); + std::vector good_indices; + for (int i = 0; i < dist->first.size(); ++i) { + if (dist->first[i]->InformationStateString(player_id) == infostate) { + good_indices.push_back(i); + } + } + new_dist->first.reserve(good_indices.size()); + new_dist->second.reserve(good_indices.size()); + for (int i : good_indices) { + new_dist->first.push_back(std::move(dist->first[i])); + new_dist->second.push_back(dist->second[i]); + } + return new_dist; +} + +} // namespace + +std::unique_ptr CloneBeliefs( + const open_spiel::HistoryDistribution& beliefs) { + auto beliefs_copy = absl::make_unique(); + for (int i = 0; i < beliefs.first.size(); ++i) { + beliefs_copy->first.push_back(beliefs.first[i]->Clone()); + beliefs_copy->second.push_back(beliefs.second[i]); + } + return beliefs_copy; +} + +HistoryDistribution GetStateDistribution(const State& state, + const Policy& opponent_policy) { + std::shared_ptr game = state.GetGame(); + GameType game_type = game->GetType(); + if (game_type.information == GameType::Information::kPerfectInformation) { + HistoryDistribution dist; + // We can't use brace initialization here as it triggers the copy ctor. + dist.first.push_back(state.Clone()); + dist.second.push_back(1.); + return dist; + } + SPIEL_CHECK_EQ(game_type.information, + GameType::Information::kImperfectInformation); + SPIEL_CHECK_EQ(game_type.dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_NE(game_type.chance_mode, + GameType::ChanceMode::kSampledStochastic); + SPIEL_CHECK_FALSE(state.IsChanceNode()); + SPIEL_CHECK_FALSE(state.IsTerminal()); + + Player player = state.CurrentPlayer(); + std::string info_state_string = state.InformationStateString(); + + // Generate the (info state, action) map for the current player using + // the state's history. + absl::btree_map infostate_action_map; + std::vector history = state.History(); + std::unique_ptr tmp_state = game->NewInitialState(); + for (Action action : history) { + if (tmp_state->CurrentPlayer() == player) { + infostate_action_map[tmp_state->InformationStateString()] = action; + } + tmp_state->ApplyAction(action); + } + // Add the current one to this list with an invalid action so that the + // information state is included. + infostate_action_map[info_state_string] = kInvalidAction; + + // Should get to the exact same state by re-applying the history. + SPIEL_CHECK_EQ(tmp_state->ToString(), state.ToString()); + + // Now, do a breadth-first search of all the candidate histories, removing + // them whenever their (infostate, action) is not contained in the map above. + // The search finishes when all the information states of the states in + // the list have been found. We use two lists: final_states contains the ones + // that have been found, while states are the current candidates. + std::vector> final_states; + std::vector final_probs; + std::vector> states; + std::vector probs; + states.push_back(game->NewInitialState()); + probs.push_back(1.0); + + while (!states.empty()) { + for (int idx = 0; idx < states.size();) { + if (states[idx]->IsTerminal()) { + // Terminal cannot be a valid history in an information state, so stop + // considering this line. + } else if (states[idx]->IsChanceNode()) { + // At chance nodes, just add all the children and delete the state. + for (std::pair action_and_prob : + states[idx]->ChanceOutcomes()) { + states.push_back(states[idx]->Child(action_and_prob.first)); + probs.push_back(probs[idx] * action_and_prob.second); + } + } else if (states[idx]->CurrentPlayer() != player) { + // At opponent nodes, similar to chance nodes but get the probability + // from the policy instead. + std::string opp_infostate_str = states[idx]->InformationStateString(); + ActionsAndProbs state_policy = + opponent_policy.GetStatePolicy(*states[idx]); + for (Action action : states[idx]->LegalActions()) { + double action_prob = GetProb(state_policy, action); + states.push_back(states[idx]->Child(action)); + probs.push_back(probs[idx] * action_prob); + } + } else if (states[idx]->CurrentPlayer() == player) { + std::string my_infostate_str = states[idx]->InformationStateString(); + // First check if this state is in the target information state. If + // add it to the final set and don't check for expansion. + if (my_infostate_str == info_state_string) { + final_states.push_back(states[idx]->Clone()); + final_probs.push_back(probs[idx]); + } else { + // Check for expansion of this candidate. To expand this candidate, + // the (infostate, action) pair must be contained in the map. + auto iter = infostate_action_map.find(my_infostate_str); + if (iter != infostate_action_map.end() && iter->second) { + states.push_back(states[idx]->Child(iter->second)); + probs.push_back(probs[idx]); + } + } + } else { + SpielFatalError( + absl::StrCat("Unknown player: ", states[idx]->CurrentPlayer())); + } + + // Delete entries at the index i. Rather than call erase, which would + // shift everything, simply swap with the last element and call + // pop_back(), which can be done in constant time. + std::swap(states[idx], states.back()); + std::swap(probs[idx], probs.back()); + states.pop_back(); + probs.pop_back(); + + // Do not increment the counter index here because the current one points + // to a valid state that was just expanded. + } + } + + // Now normalize the probs + Normalize(absl::MakeSpan(final_probs)); + HistoryDistribution dist = {std::move(final_states), std::move(final_probs)}; + + // Note: We do not call CheckBeliefs here as the beliefs are _wrong_ until we + // perform the filter step. + return dist; +} + +std::unique_ptr UpdateIncrementalStateDistribution( + const State& state, const Policy& opponent_policy, int player_id, + std::unique_ptr previous) { + std::unique_ptr dist; + if (previous) { + dist = std::move(previous); + } + // If we don't have a previous set of beliefs, create it. + if (!dist || dist->first.empty()) { + // This allows for games to special case this scenario. It only works if + // this is only called at the first decision node after chance nodes. We + // leave it to the caller to verify this is the case. + dist = state.GetHistoriesConsistentWithInfostate(); + + // If the game didn't implement GetHistoriesConsistentWithInfostate, then + // this is empty, otherwise, we're good. + if (!dist || dist->first.empty()) { + // If the previous pair is empty, then we have to do a BFS to find all + // relevant nodes: + dist = absl::make_unique( + GetStateDistribution(state, opponent_policy)); + } + } + // Now, we verify that the beliefs match the current infostate. + const std::vector& history = state.FullHistory(); + int belief_history_size = GetBeliefHistorySize(*dist); + std::unique_ptr new_state = state.GetGame()->NewInitialState(); + for (int i = 0; i < belief_history_size; ++i) { + new_state->ApplyAction(history[i].action); + } + SPIEL_DCHECK_TRUE(CheckBeliefs(*new_state, *dist, player_id)); + while (belief_history_size < history.size()) { + dist = AdvanceBeliefHistoryOneAction(std::move(dist), + history[belief_history_size].action, + player_id, opponent_policy); + new_state->ApplyAction(history[belief_history_size].action); + dist = FilterOutBeliefs(*new_state, std::move(dist), player_id); + SPIEL_CHECK_FALSE(dist->first.empty()); + if (!new_state->IsChanceNode()) { + SPIEL_DCHECK_TRUE(CheckBeliefs(*new_state, *dist, player_id)); + } + const int new_belief_history_size = GetBeliefHistorySize(*dist); + SPIEL_CHECK_LT(belief_history_size, new_belief_history_size); + belief_history_size = new_belief_history_size; + } + SPIEL_CHECK_EQ(belief_history_size, history.size()); + SPIEL_CHECK_EQ(new_state->FullHistory(), state.FullHistory()); + dist = FilterOutBeliefs(state, std::move(dist), player_id); + SPIEL_CHECK_FALSE(dist->first.empty()); + + // We only normalize after filtering out invalid infostates. + Normalize(absl::MakeSpan(dist->second)); + + SPIEL_DCHECK_TRUE(CheckBeliefs(state, *dist, player_id)); + return dist; +} + +std::string PrintBeliefs(const HistoryDistribution& beliefs, int player_id) { + const int num_states = beliefs.first.size(); + SPIEL_CHECK_EQ(num_states, beliefs.second.size()); + std::string str; + for (int i = 0; i < num_states; ++i) { + absl::StrAppend( + &str, + absl::StrFormat("(%s, %f)", + beliefs.first[i]->InformationStateString(player_id), + beliefs.second[i])); + if (i < num_states - 1) absl::StrAppend(&str, "\n"); + } + return str; +} + +bool CheckBeliefs(const State& ground_truth_state, + const HistoryDistribution& beliefs, int player_id) { + const std::string infostate = + ground_truth_state.InformationStateString(player_id); + for (int i = 0; i < beliefs.first.size(); ++i) { + if (Near(beliefs.second[i], 0.0, 1e-5)) { + continue; + } + SPIEL_CHECK_EQ(infostate, + beliefs.first[i]->InformationStateString(player_id)); + SPIEL_CHECK_EQ(ground_truth_state.FullHistory().size(), + beliefs.first[i]->FullHistory().size()); + SPIEL_CHECK_EQ(ground_truth_state.IsTerminal(), + beliefs.first[i]->IsTerminal()); + } + return true; +} + +} // namespace open_spiel::algorithms diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/state_distribution.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/state_distribution.h new file mode 100644 index 0000000..41e11f5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/state_distribution.h @@ -0,0 +1,76 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_STATE_DISTRIBUTION_H_ +#define OPEN_SPIEL_ALGORITHMS_STATE_DISTRIBUTION_H_ + +#include + +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +// Returns a distribution over states at the information state containing the +// specified state given the opponents' policies. That is, it returns +// Pr(h | s, \pi_{-i}) by normalizing the opponents' reach probabilities over +// all h \in s, as described in Section 3.2 of Srinivasan et al. 2018 +// https://arxiv.org/abs/1810.09026. Computing this distribution relies strongly +// on the fact that InformationStateString must abide by perfect recall. +// +// This is a game-independent implementation that does a breadth-first search +// from the start of the game to enumerate all possible histories consistent +// with the information state, trimming out histories as they become invalid. +// As such, it may not be very fast. +// +// The returned vectors have an arbitrary ordering, and will include +// zero-probability histories if there are any. If the probability of reaching +// the information state under the given policy is zero (e.g. the Bayes +// normalization term is zero) then a uniform random distribution is returned +// instead. +// +// Note: currently only works for turn-based games of imperfect information, +// and does not work with kSampledStochastic chance modes. +HistoryDistribution GetStateDistribution(const State& state, + const Policy& opponent_policy); + +// Clones a HistoryDistribution. +std::unique_ptr CloneBeliefs( + const open_spiel::HistoryDistribution& beliefs); + +// Incrementally builds the state distribution vectors. Must be called at each +// state in a trajectory. All of the states should correspond to the same +// information state (i.e. all states should have identical +// InformationStateString values, although this is not doublechecked). If +// previous is empty, calls the non-incremental version. This must be called for +// each state in order, starting from the first non-chance node, or it will be +// wrong. +// Takes ownership of previous. +std::unique_ptr UpdateIncrementalStateDistribution( + const State& state, const Policy& opponent_policy, int player_id, + std::unique_ptr previous); + +std::string PrintBeliefs(const HistoryDistribution& beliefs, int player_id); + +// Runs a bunch of sanity checks on the beliefs verifying that they hold certain +// properties that we want. Returns true if the checks pass; otherwise, dies +// with a CHECK failure. +bool CheckBeliefs(const State& ground_truth_state, + const HistoryDistribution& beliefs, int player_id); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_STATE_DISTRIBUTION_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/state_distribution_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/state_distribution_test.cc new file mode 100644 index 0000000..aabf635 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/state_distribution_test.cc @@ -0,0 +1,266 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/state_distribution.h" + +#include "open_spiel/canonical_game_strings.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + + +void KuhnStateDistributionTest() { + std::shared_ptr game = LoadGame("kuhn_poker"); + std::unique_ptr state = game->NewInitialState(); + TabularPolicy uniform_policy = GetUniformPolicy(*game); + + // Construct the state 1b + state->ApplyAction(0); // p0 card: jack + state->ApplyAction(1); // p1 card: queen + state->ApplyAction(1); // player 0 bet + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_EQ(state->InformationStateString(), "1b"); + HistoryDistribution dist = GetStateDistribution(*state, uniform_policy); + + SPIEL_CHECK_EQ(dist.first.size(), 2); + SPIEL_CHECK_EQ(dist.second.size(), 2); + + // Check that sampled states have private cards jack and king for the opponent + SPIEL_CHECK_TRUE(dist.first[0]->InformationStateString(0) == "0b" || + dist.first[0]->InformationStateString(0) == "2b"); + SPIEL_CHECK_TRUE(dist.first[1]->InformationStateString(0) == "0b" || + dist.first[1]->InformationStateString(0) == "2b"); + SPIEL_CHECK_NE(dist.first[0]->InformationStateString(0), + dist.first[1]->InformationStateString(0)); + + // Check that they are equally likely, and sum to 1 + SPIEL_CHECK_EQ(dist.second[0], 0.5); + SPIEL_CHECK_EQ(dist.second[0], 0.5); +} + +void CompareDists(const HistoryDistribution& lhs, + const HistoryDistribution& rhs) { + for (int i = 0; i < lhs.first.size(); ++i) { + std::cerr << "lhs[" << i << "]: " << lhs.first[i]->HistoryString() + << ", p: " << lhs.second[i] << std::endl; + std::cerr << "rhs[" << i << "]: " << rhs.first[i]->HistoryString() + << ", p: " << rhs.second[i] << std::endl; + } + for (int i = 0; i < lhs.first.size(); ++i) { + for (int j = 0; j < rhs.first.size(); ++j) { + if (lhs.first[i]->History() == rhs.first[j]->History()) { + SPIEL_CHECK_FLOAT_EQ(lhs.second[i], rhs.second[j]); + break; + } + } + } +} + +void CheckDistHasSameInfostate(const HistoryDistribution& dist, + const State& state, int player_id) { + for (int i = 0; i < dist.first.size(); ++i) { + if (dist.second[i] > 0) { + SPIEL_CHECK_EQ(dist.first[i]->InformationStateString(player_id), + state.InformationStateString(player_id)); + } + } +} + +void LeducStateDistributionTest() { + std::shared_ptr game = LoadGame("leduc_poker"); + std::unique_ptr state = game->NewInitialState(); + TabularPolicy uniform_policy = GetUniformPolicy(*game); + state->ApplyAction(0); // p0 card: jack of first suit + state->ApplyAction(1); // p1 card: queen of first suit + state->ApplyAction(1); // player 0 bet + std::string info_state_string = state->InformationStateString(); + std::string state_history_string = state->HistoryString(); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + HistoryDistribution dist = GetStateDistribution(*state, uniform_policy); + std::cerr << "Check infostates..." << std::endl; + CheckDistHasSameInfostate(dist, *state, /*player_id=*/1); + + std::unique_ptr incremental_dist = + UpdateIncrementalStateDistribution(*state, uniform_policy, + /*player_id=*/1, nullptr); + std::cerr << "Comparing dists 1..." << std::endl; + SPIEL_CHECK_TRUE(incremental_dist); + CompareDists(dist, *incremental_dist); + CompareDists(dist, *CloneBeliefs(dist)); + std::cerr << "Check infostates2..." << std::endl; + CheckDistHasSameInfostate(*incremental_dist, *state, /*player_id=*/1); + + std::vector correct_distribution(5, 0.2); + SPIEL_CHECK_EQ(dist.first.size(), 5); + SPIEL_CHECK_EQ(dist.second, correct_distribution); + + // Check that none of the states are equal, that one of them is equal to the + // state used to generate the distribution, and that they are all equally + // likely with probability 0.2. + int state_matches = 0; + for (int i = 0; i < dist.first.size(); ++i) { + SPIEL_CHECK_EQ(dist.first[i]->InformationStateString(), info_state_string); + if (dist.first[i]->HistoryString() == state_history_string) { + state_matches++; + } + for (int j = i + 1; j < dist.first.size(); ++j) { + SPIEL_CHECK_NE(dist.first[i]->HistoryString(), + dist.first[j]->HistoryString()); + } + } + SPIEL_CHECK_EQ(state_matches, 1); + + // Now, it's a chance node... + state->ApplyAction(state->LegalActions()[0]); + incremental_dist = UpdateIncrementalStateDistribution( + *state, uniform_policy, + /*player_id=*/1, std::move(incremental_dist)); + std::cerr << "Check infostates2a..." << std::endl; + CheckDistHasSameInfostate(*incremental_dist, *state, /*player_id=*/1); + state->ApplyAction(state->LegalActions()[0]); + dist = GetStateDistribution(*state, uniform_policy); + incremental_dist = UpdateIncrementalStateDistribution( + *state, uniform_policy, + /*player_id=*/1, std::move(incremental_dist)); + std::cerr << "Check infostates3..." << std::endl; + CheckDistHasSameInfostate(*incremental_dist, *state, /*player_id=*/1); + + std::cerr << "Comparing dists 2..." << std::endl; + CompareDists(dist, *incremental_dist); + CompareDists(dist, *CloneBeliefs(dist)); +} + +constexpr absl::string_view kHUNLGameString = + ("universal_poker(betting=limit,numPlayers=2,numRounds=4,stack=1200 " + "1200,blind=50 100,firstPlayer=2 " + "1,numSuits=4,numRanks=13,numHoleCards=2,numBoardCards=0 3 1 " + "1,raiseSize=100 100 100 100)"); + +void HUNLIncrementalTest() { + // universal_poker requires ACPC, which is an optional dependency. + // Skip this test if the game is not registered. + if (!IsGameRegistered(std::string(kHUNLGameString))) { return; } + std::shared_ptr game = LoadGame(std::string(kHUNLGameString)); + std::unique_ptr state = game->NewInitialState(); + state->ApplyAction(14); // p0 card: 5h + state->ApplyAction(46); // p0 card: Kh5h + state->ApplyAction(7); // p1 card: 3s + state->ApplyAction(19); // p1 cards: 6s3s + UniformPolicy uniform_policy; + std::cerr << "Checking first call..." << std::endl; + std::unique_ptr incremental_dist = + UpdateIncrementalStateDistribution(*state, uniform_policy, + /*player_id=*/0, /*previous=*/nullptr); + CheckDistHasSameInfostate(*incremental_dist, *state, /*player_id=*/0); + std::cerr << "First call passed!" << std::endl; + state->ApplyAction(1); // p0 bets pot. + incremental_dist = UpdateIncrementalStateDistribution( + *state, uniform_policy, /*player_id=*/0, std::move(incremental_dist)); + CheckDistHasSameInfostate(*incremental_dist, *state, /*player_id=*/0); +} + +void LeducRegressionTest() { + std::shared_ptr game = LoadGame("leduc_poker"); + std::unique_ptr state = game->NewInitialState(); + UniformPolicy opponent_policy; + const int player_id = 1; + std::unique_ptr dist; + for (const Action action : {0, 5, 1, 2, 1, 4}) { + if (state->CurrentPlayer() == player_id) { + dist = UpdateIncrementalStateDistribution(*state, opponent_policy, + player_id, std::move(dist)); + algorithms::CheckBeliefs(*state, *dist, player_id); + } + state->ApplyAction(action); + } + dist = UpdateIncrementalStateDistribution(*state, opponent_policy, player_id, + std::move(dist)); + algorithms::CheckBeliefs(*state, *dist, player_id); +} + +void LeducRegressionTestPerPlayer(int player_id) { + std::shared_ptr game = LoadGame("leduc_poker"); + std::unique_ptr state = game->NewInitialState(); + UniformPolicy opponent_policy; + std::unique_ptr dist; + + // The first two actions are chance actions, then both players call. This was + // found to cause CheckBeliefs to fail previously, so we add a test verifying + // that doesn't happen. + for (const Action action : {4, 0, 2, 2}) { + if (state->CurrentPlayer() == player_id) { + dist = UpdateIncrementalStateDistribution(*state, opponent_policy, + player_id, std::move(dist)); + algorithms::CheckBeliefs(*state, *dist, player_id); + } + state->ApplyAction(action); + } + dist = UpdateIncrementalStateDistribution(*state, opponent_policy, player_id, + std::move(dist)); + algorithms::CheckBeliefs(*state, *dist, player_id); +} + +void HunlRegressionTest() { + // universal_poker requires ACPC, which is an optional dependency. + // Skip this test if the game is not registered. + if (!IsGameRegistered(HunlGameString("fcpa"))) { return; } + std::shared_ptr game = LoadGame(HunlGameString("fcpa")); + std::unique_ptr state = game->NewInitialState(); + for (const Action action : {0, 27, 43, 44, 2}) state->ApplyAction(action); + UniformPolicy opponent_policy; + std::unique_ptr dist = + UpdateIncrementalStateDistribution(*state, opponent_policy, + state->CurrentPlayer(), nullptr); + algorithms::CheckBeliefs(*state, *dist, state->CurrentPlayer()); +} + +void GoofspielDistributionTest() { + std::shared_ptr game = + LoadGame(TurnBasedGoofspielGameString(/*num_cards=*/4)); + std::unique_ptr state = game->NewInitialState(); + std::unique_ptr dist; + UniformPolicy opponent_policy; + for (const Action action : {3, 3, 2, 1, 1}) { + dist = UpdateIncrementalStateDistribution(*state, opponent_policy, + /*player_id=*/0, std::move(dist)); + algorithms::CheckBeliefs(*state, *dist, state->CurrentPlayer()); + state->ApplyAction(action); + } + dist = UpdateIncrementalStateDistribution(*state, opponent_policy, + /*player_id=*/0, std::move(dist)); + algorithms::CheckBeliefs(*state, *dist, /*player_id=*/0); +} + + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +namespace algorithms = open_spiel::algorithms; + +int main(int argc, char** argv) { + algorithms::KuhnStateDistributionTest(); + algorithms::LeducStateDistributionTest(); + algorithms::HUNLIncrementalTest(); + algorithms::HunlRegressionTest(); + algorithms::GoofspielDistributionTest(); + algorithms::LeducRegressionTest(); + algorithms::LeducRegressionTestPerPlayer(/*player_id=*/0); + algorithms::LeducRegressionTestPerPlayer(/*player_id=*/1); + +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_best_response_mdp.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_best_response_mdp.cc new file mode 100644 index 0000000..6fc98ae --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_best_response_mdp.cc @@ -0,0 +1,411 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/tabular_best_response_mdp.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/memory/memory.h" +#include "open_spiel/algorithms/expected_returns.h" +#include "open_spiel/policy.h" +#include "open_spiel/simultaneous_move_game.h" +#include "open_spiel/spiel_globals.h" + +namespace open_spiel { +namespace algorithms { +namespace { +constexpr double kSolveTolerance = 1e-12; +} // namespace + +MDPNode::MDPNode(const std::string& node_key) + : terminal_(false), total_weight_(0), children_(), value_(0), + node_key_(node_key) {} + +void MDPNode::IncTransitionWeight(Action a, MDPNode *child, double weight) { + SPIEL_CHECK_TRUE(child != nullptr); + children_[a][child] += weight; +} + +MDP::MDP() : terminal_node_uid_(0), num_nonterminal_nodes_(0) { + node_map_[kRootKey] = absl::make_unique(std::string(kRootKey)); + node_map_[kRootKey]->add_weight(1.0); +} + +MDPNode *MDP::CreateTerminalNode(const std::string &node_key) { + ++terminal_node_uid_; + MDPNode *terminal_node = LookupOrCreateNode(node_key, true); + terminal_node->set_terminal(true); + return terminal_node; +} + +MDPNode *MDP::LookupOrCreateNode(const std::string &node_key, bool terminal) { + const auto &iter = node_map_.find(node_key); + if (iter != node_map_.end()) { + return iter->second.get(); + } else { + MDPNode *new_node = new MDPNode(node_key); + node_map_[node_key].reset(new_node); + if (!terminal) { + num_nonterminal_nodes_++; + } + return new_node; + } +} + +double MDP::Solve(double tolerance, TabularPolicy *br_policy) { + double delta = 0; + + do { + delta = 0.0; + for (auto &key_and_node : node_map_) { + MDPNode *node = key_and_node.second.get(); + + if (node->terminal()) { + continue; + } + + double max_value = -std::numeric_limits::infinity(); + Action max_action = kInvalidAction; + double node_weight = node->total_weight(); + SPIEL_CHECK_GE(node_weight, 0.0); + + // Compute Bellman value from children. + for (const auto &action_and_child : node->children()) { + double action_value = 0.0; + Action action = action_and_child.first; + + for (auto &child_value : node->children()[action]) { + MDPNode *child = child_value.first; + double transition_weight = child_value.second; + SPIEL_CHECK_TRUE(child != nullptr); + double prob = transition_weight / node_weight; + if (std::isnan(prob)) { + // When transition_weight = node_weight = 0, set to 0 + prob = 0.0; + } + + SPIEL_CHECK_PROB(prob); + action_value += prob * child->value(); + } + + if (action_value > max_value) { + max_value = action_value; + max_action = action; + } + } + + SPIEL_CHECK_NE(max_action, kInvalidAction); + delta += std::abs(node->value() - max_value); + node->set_value(max_value); + + // Set the best response to the maximum-value action, if it's non-null. + if (node->node_key() != kRootKey) { + ActionsAndProbs br_state_policy; + for (const auto &[action, child] : node->children()) { + SetProb(&br_state_policy, action, action == max_action ? 1.0 : 0.0); + } + br_policy->SetStatePolicy(node->node_key(), br_state_policy); + } + } + } while (delta > tolerance); + + return RootNode()->value(); +} + +double +TabularBestResponseMDP::OpponentReach(const std::vector& reach_probs, + Player p) const { + double product = 1.0; + for (int i = 0; i < reach_probs.size(); i++) { + if (p != i) { + product *= reach_probs[i]; + } + } + return product; +} + +void TabularBestResponseMDP::BuildMDPs( + const State &state, const std::vector& reach_probs, + const std::vector& parent_nodes, + const std::vector& parent_actions, Player only_for_player) { + if (state.IsTerminal()) { + std::vector terminal_values = state.Returns(); + for (Player p = 0; p < game_.NumPlayers(); ++p) { + if (only_for_player == kInvalidPlayer || only_for_player == p) { + std::string node_key = state.ToString(); + MDPNode *node = mdps_.at(p)->CreateTerminalNode(node_key); + node->set_value(terminal_values[p]); + double opponent_reach = OpponentReach(reach_probs, p); + SPIEL_CHECK_GE(opponent_reach, 0.0); + SPIEL_CHECK_LE(opponent_reach, 1.0); + // Following line is not actually necessary because the weight of a leaf + // is never in a denominator for a transition probability, but we + // include it to keep the semantics of the values consistent across the + // ISMDP. + node->add_weight(opponent_reach); + MDPNode *parent_node = parent_nodes[p]; + SPIEL_CHECK_TRUE(parent_node != nullptr); + parent_node->IncTransitionWeight(parent_actions[p], node, + opponent_reach); + } + } + } else if (state.IsChanceNode()) { + ActionsAndProbs outcomes_and_probs = state.ChanceOutcomes(); + for (const auto &[outcome, prob] : outcomes_and_probs) { + std::unique_ptr state_copy = state.Clone(); + state_copy->ApplyAction(outcome); + std::vector new_reach_probs = reach_probs; + // Chance prob is at the end of the vector. + new_reach_probs[game_.NumPlayers()] *= prob; + BuildMDPs(*state_copy, new_reach_probs, parent_nodes, parent_actions, + only_for_player); + } + } else if (state.IsSimultaneousNode()) { + // Several nodes are created: one for each player as the maximizer. + std::vector node_keys(num_players_); + std::vector nodes(num_players_, nullptr); + std::vector opponent_reaches(num_players_, 1.0); + std::vector fixed_state_policies(num_players_); + + for (Player player = 0; player < num_players_; ++player) { + if (only_for_player == kInvalidPlayer || only_for_player == player) { + node_keys[player] = GetNodeKey(state, player); + nodes[player] = mdps_.at(player)->LookupOrCreateNode(node_keys[player]); + opponent_reaches[player] = OpponentReach(reach_probs, player); + + SPIEL_CHECK_GE(opponent_reaches[player], 0.0); + SPIEL_CHECK_LE(opponent_reaches[player], 1.0); + nodes[player]->add_weight(opponent_reaches[player]); + + MDPNode* parent_node = parent_nodes[player]; + SPIEL_CHECK_TRUE(parent_node != nullptr); + parent_node->IncTransitionWeight(parent_actions[player], nodes[player], + opponent_reaches[player]); + } + + if (only_for_player == kInvalidPlayer || only_for_player != player) { + fixed_state_policies[player] = + fixed_policy_.GetStatePolicy(state, player); + } + } + + // Traverse over the list of joint actions. For each one, first deconstruct + // the actions, and then recurse once for each player as the maximizer with + // the others as the fixed policies. + const auto& sim_move_state = down_cast(state); + for (Action joint_action : state.LegalActions()) { + std::vector actions = + sim_move_state.FlatJointActionToActions(joint_action); + + std::unique_ptr state_copy = state.Clone(); + state_copy->ApplyAction(joint_action); + + std::vector new_reach_probs = reach_probs; + std::vector new_parent_nodes = parent_nodes; + std::vector new_parent_actions = parent_actions; + for (Player player = 0; player < num_players_; ++player) { + if (only_for_player == kInvalidPlayer || only_for_player != player) { + double action_prob = GetProb(fixed_state_policies[player], + actions[player]); + SPIEL_CHECK_PROB(action_prob); + new_reach_probs[player] *= action_prob; + } + + if (only_for_player == kInvalidPlayer || only_for_player == player) { + new_parent_nodes[player] = nodes[player]; + } + + new_parent_actions[player] = actions[player]; + } + + BuildMDPs(*state_copy, new_reach_probs, new_parent_nodes, + new_parent_actions, only_for_player); + } + } else { + // Normal decisions node. + std::vector legal_actions = state.LegalActions(); + Player player = state.CurrentPlayer(); + ActionsAndProbs state_policy; // Fixed joint policy we're responding to. + MDPNode* node = nullptr; + + // Check to see if we need to build this node. + if (only_for_player == kInvalidPlayer || only_for_player == player) { + std::string node_key = GetNodeKey(state, player); + + node = mdps_.at(player)->LookupOrCreateNode(node_key); + double opponent_reach = OpponentReach(reach_probs, player); + + SPIEL_CHECK_GE(opponent_reach, 0.0); + SPIEL_CHECK_LE(opponent_reach, 1.0); + node->add_weight(opponent_reach); + MDPNode *parent_node = parent_nodes[player]; + SPIEL_CHECK_TRUE(parent_node != nullptr); + parent_node->IncTransitionWeight(parent_actions[player], node, + opponent_reach); + } + + // Get the fixed policy all the time if building all MDPs, or only at + // opponent nodes otherwise + if (only_for_player == kInvalidPlayer || only_for_player != player) { + state_policy = fixed_policy_.GetStatePolicy(state); + } + + for (Action action : legal_actions) { + std::unique_ptr state_copy = state.Clone(); + state_copy->ApplyAction(action); + + std::vector new_reach_probs = reach_probs; + std::vector new_parent_nodes = parent_nodes; + + // If building all MDPs at once, modify reach probs in all cases. + // Otherwise, only at opponent nodes. + if (only_for_player == kInvalidPlayer || only_for_player != player) { + double action_prob = GetProb(state_policy, action); + SPIEL_CHECK_PROB(action_prob); + new_reach_probs[player] *= action_prob; + } + + // If building all MDPs at once, modify parent nodes for that MDP. + // Otherwise, only do it for the player we're building the MDP for. + if (only_for_player == kInvalidPlayer || only_for_player == player) { + new_parent_nodes[player] = node; + } + + std::vector new_parent_actions = parent_actions; + new_parent_actions[player] = action; + + BuildMDPs(*state_copy, new_reach_probs, new_parent_nodes, + new_parent_actions, only_for_player); + } + } +} + +std::string TabularBestResponseMDP::GetNodeKey(const State &state, + Player player) const { + switch (game_.GetType().information) { + case GameType::Information::kImperfectInformation: + case GameType::Information::kOneShot: + return state.InformationStateString(player); + case GameType::Information::kPerfectInformation: + return state.ObservationString(player); + default: + SpielFatalError("Information type not supported."); + } +} + +TabularBestResponseMDP::TabularBestResponseMDP(const Game &game, + const Policy &fixed_policy) + : game_(game), fixed_policy_(fixed_policy), + num_players_(game.NumPlayers()) {} + +int TabularBestResponseMDP::TotalNumNonterminals() const { + int total_num_nonterminals = 0; + for (Player p = 0; p < num_players_; ++p) { + total_num_nonterminals += mdps_[p]->NumNonTerminalNodes(); + } + return total_num_nonterminals; +} + +int TabularBestResponseMDP::TotalSize() const { + int total_size = 0; + for (Player p = 0; p < num_players_; ++p) { + total_size += mdps_[p]->TotalSize(); + } + return total_size; +} + +TabularBestResponseMDPInfo TabularBestResponseMDP::ComputeBestResponses() { + TabularBestResponseMDPInfo br_info(num_players_); + + // Initialize IS-MDPs for each player, if necessary. + if (mdps_.empty()) { + for (Player p = 0; p < num_players_; p++) { + mdps_.push_back(absl::make_unique()); + } + } + + std::vector parent_nodes; + parent_nodes.reserve(num_players_); + for (Player p = 0; p < num_players_; p++) { + parent_nodes.push_back(mdps_[p]->RootNode()); + } + std::vector reach_probs(num_players_ + 1, 1.0); // include chance. + std::vector parent_actions(num_players_, 0); + + std::unique_ptr initial_state = game_.NewInitialState(); + BuildMDPs(*initial_state, reach_probs, parent_nodes, parent_actions); + + for (Player p = 0; p < num_players_; p++) { + br_info.br_values[p] = + mdps_[p]->Solve(kSolveTolerance, &br_info.br_policies[p]); + } + + return br_info; +} + +TabularBestResponseMDPInfo +TabularBestResponseMDP::ComputeBestResponse(Player max_player) { + TabularBestResponseMDPInfo br_info(num_players_); + + if (mdps_.empty()) { + mdps_.resize(num_players_); + mdps_[max_player] = absl::make_unique(); + } + + std::vector parent_nodes(num_players_, nullptr); + parent_nodes[max_player] = mdps_[max_player]->RootNode(); + std::vector reach_probs(num_players_ + 1, 1.0); // include chance. + std::vector parent_actions(num_players_, 0); + + std::unique_ptr initial_state = game_.NewInitialState(); + BuildMDPs(*initial_state, reach_probs, parent_nodes, parent_actions, + max_player); + + br_info.br_values[max_player] = + mdps_[max_player]->Solve(kSolveTolerance, + &br_info.br_policies[max_player]); + return br_info; +} + +TabularBestResponseMDPInfo TabularBestResponseMDP::NashConv() { + TabularBestResponseMDPInfo br_info = ComputeBestResponses(); + std::unique_ptr state = game_.NewInitialState(); + br_info.on_policy_values = + ExpectedReturns(*state, fixed_policy_, + /*depth_limit*/ -1, /*use_infostate_get_policy*/ false); + for (Player p = 0; p < num_players_; ++p) { + br_info.deviation_incentives[p] = + br_info.br_values[p] - br_info.on_policy_values[p]; + br_info.nash_conv += br_info.deviation_incentives[p]; + } + return br_info; +} + +TabularBestResponseMDPInfo TabularBestResponseMDP::Exploitability() { + SPIEL_CHECK_TRUE(game_.GetType().utility == GameType::Utility::kZeroSum || + game_.GetType().utility == GameType::Utility::kConstantSum); + TabularBestResponseMDPInfo br_info = ComputeBestResponses(); + br_info.nash_conv = absl::c_accumulate(br_info.br_values, 0.0); + br_info.exploitability = + (br_info.nash_conv - *game_.UtilitySum()) / num_players_; + return br_info; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_best_response_mdp.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_best_response_mdp.h new file mode 100644 index 0000000..1350fa6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_best_response_mdp.h @@ -0,0 +1,185 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_TABULAR_BEST_RESPONSE_MDP_H_ +#define OPEN_SPIEL_ALGORITHMS_TABULAR_BEST_RESPONSE_MDP_H_ + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" + +// A tabular best response algorithm based on building an information set Markov +// decision process (IS-MDP), and then solving it using value iteration. In +// computing a best response, there is a maximizing player and a number of fixed +// players (who used fixed policies, normally represented as one fixed joint +// policy). An IS-MDP is an MDP whose nodes have a one-to-one correspondence +// with the maximizing player's information states. Transitions from ISMDP +// states to other ISMDP states are functions of the reach probabilities given +// the possible histories in the information states, chance node distributions, +// and policies of the other players. +// +// The keys used to uniquely identify states is in the MDP are +// State::InformationStateString for one-shot games and imperfect information +// games, and State::ObservationString for perfect information games. In the +// case of perfect information games (including simultaneous move games), this +// implementation requires that ObservationString is a sufficient Markovian +// description of the state; it does not need to be perfect recall, but it must +// not merge states that might have different expected values under the +// policies using these keys as state descriptions. As an example: in Goofspiel, +// it is insufficient for the observation to only include the current point card +// because which point cards remain in the point card deck is important for +// determining the expected value of the state (but the particular order they +// were played is not). +// +// This implementation has several advantages over best_response.* and +// tabular_exploitability.*: +// - It supports perfect information games using State::ObservationString as +// MDP node keys (rather than artificial blow-ups using info state strings) +// - It supports abstract games that have imperfect recall due to actions +// - It supports simultaneous move games without have to transform them +// via TurnBasedSimultaneousGame +// - The constraint on the game's State::ToString is lighter; it is only used +// as a key for terminal states in the MDP. +// - The memory requirement is linear in the information states (or +// observations) plus the number of unique terminal states, rather than +// in the total number of histories. +// +// There are some disadvantages: +// - It is not nearly as thoroughly-tested +// - History-level expected values are not stored nor retrievable +// +// Currently no performance comparisons have been done to compare the +// implementations. +// +// This implementation is exposed to Python. See +// test_cfr_plus_solver_best_response_mdp in cfr_test.py for an example use. +namespace open_spiel { +namespace algorithms { + +class MDPNode { + public: + explicit MDPNode(const std::string& node_key); + + bool terminal() const { return terminal_; } + double total_weight() const { return total_weight_; } + double value() const { return value_; } + std::string node_key() const { return node_key_; } + void set_terminal(bool terminal) { terminal_ = terminal; } + void add_weight(double weight) { total_weight_ += weight; } + void set_value(double value) { value_ = value; } + + absl::flat_hash_map> & + children() { + return children_; + } + + void IncTransitionWeight(Action a, MDPNode* child, double weight); + + private: + bool terminal_; + double total_weight_; + // Children nodes {s'} from (s,a). The double value is the weight + // (probability) assigned to the transition (s,a,s'). + absl::flat_hash_map> children_; + double value_; + std::string node_key_; +}; + +class MDP { + public: + MDP(); + MDPNode* CreateTerminalNode(const std::string& node_key); + MDPNode* LookupOrCreateNode(const std::string& node_key, + bool terminal = false); + MDPNode* RootNode() { return node_map_[kRootKey].get(); } + + double Solve(double tolerance, TabularPolicy* br_policy); + int NumNonTerminalNodes() const { return num_nonterminal_nodes_; } + int TotalSize() const { return node_map_.size(); } + + private: + constexpr static const char* kRootKey = "**&!@ INFOSET_MDP ROOT KEY"; + constexpr static const char* kTerminalKeyPrefix = "**&!@ ISMDP TERMINAL KEY"; + absl::flat_hash_map> node_map_; + int terminal_node_uid_; + int num_nonterminal_nodes_; + int num_terminal_nodes_; +}; + +// Information returned by the best response computation. +struct TabularBestResponseMDPInfo { + std::vector br_values; + std::vector br_policies; + std::vector on_policy_values; + std::vector deviation_incentives; + double nash_conv; + double exploitability; + + TabularBestResponseMDPInfo(int num_players) + : br_values(num_players, 0), br_policies(num_players), + on_policy_values(num_players, 0), deviation_incentives(num_players, 0), + nash_conv(0), exploitability(0) {} +}; + + +class TabularBestResponseMDP { + public: + TabularBestResponseMDP(const Game& game, const Policy& fixed_policy); + + // Compute best responses for all players. + TabularBestResponseMDPInfo ComputeBestResponses(); + + // Compute best responses for all players, and compute the specified metric + // based on those choses. In the case of exploitability (only supported for + // constant-sum games), the on-policy-values are not necessary and hence are + // not returned. + TabularBestResponseMDPInfo NashConv(); + TabularBestResponseMDPInfo Exploitability(); + + // Build only one MDP and compute only the response for the specific player. + TabularBestResponseMDPInfo ComputeBestResponse(Player max_player); + + int TotalNumNonterminals() const; + int TotalSize() const; + + private: + // This function builds all the players' Information Set MDPs in a single tree + // traversal. There is a distribution of world states h for each s determined + // by the opponents' policies. The transition probabilities are obtained by + // summing the weights (h, a, h') that satisfy (s, a, s') and normalizing by + // the weight obtained by the condition of having reached s. + void BuildMDPs(const State &state, const std::vector &reach_probs, + const std::vector &parent_nodes, + const std::vector &parent_actions, + Player only_for_player = kInvalidPlayer); + + std::string GetNodeKey(const State &state, Player player) const; + + double OpponentReach(const std::vector &reach_probs, Player p) const; + + std::vector> mdps_; + const Game &game_; + const Policy &fixed_policy_; + const int num_players_; +}; + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_TABULAR_BEST_RESPONSE_MDP_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_best_response_mdp_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_best_response_mdp_test.cc new file mode 100644 index 0000000..05b0750 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_best_response_mdp_test.cc @@ -0,0 +1,215 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/tabular_best_response_mdp.h" + +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/algorithms/cfr.h" +#include "open_spiel/algorithms/tabular_exploitability.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { +constexpr int kNumLiarsDiceCFRIterations = 1; +constexpr double kFloatTolerance = 1e-12; + +double NashConvTest(const std::string& game_string, const Policy& policy, + absl::optional expected_nash_conv = absl::nullopt) { + std::shared_ptr game = LoadGame(game_string); + TabularBestResponseMDP tbr(*game, policy); + TabularBestResponseMDPInfo br_info = tbr.NashConv(); + if (expected_nash_conv.has_value()) { + SPIEL_CHECK_FLOAT_NEAR(br_info.nash_conv, expected_nash_conv.value(), + kFloatTolerance); + } + return br_info.nash_conv; +} + +void KuhnNashConvTests() { + UniformPolicy uniform_policy; + NashConvTest("kuhn_poker", uniform_policy, 0.916666666666667); + FirstActionPolicy first_action_policy; + NashConvTest("kuhn_poker", first_action_policy, 2.0); +} + +void LeducNashConvTests() { + UniformPolicy uniform_policy; + NashConvTest("leduc_poker", uniform_policy, 4.747222222222222); + FirstActionPolicy first_action_policy; + NashConvTest("leduc_poker", first_action_policy, 2.0); +} + +void OnlyFirstPlayerTests() { + UniformPolicy uniform_policy; + + for (std::string game_string : { "leduc_poker", "kuhn_poker(players=3)", + "matrix_pd", "goofspiel(num_cards=3)" }) + { + std::shared_ptr game = LoadGame(game_string); + TabularBestResponseMDP tbr(*game, uniform_policy); + TabularBestResponseMDPInfo br_info = tbr.ComputeBestResponse(0); + SPIEL_CHECK_GT(br_info.br_values[0], 0); + } +} + + +void KuhnLeduc3pTests() { + UniformPolicy uniform_policy; + NashConvTest("kuhn_poker(players=3)", uniform_policy, 2.0625); + // Disabled, as it makes the test take a very long time. + // NashConvTest("leduc_poker(players=3)", uniform_policy, 12.611221340388003); +} + +void TicTacToeTests() { + UniformPolicy uniform_policy; + std::shared_ptr game = LoadGame("tic_tac_toe"); + TabularBestResponseMDP tbr1(*game, uniform_policy); + TabularBestResponseMDPInfo br_info = tbr1.NashConv(); + SPIEL_CHECK_EQ(tbr1.TotalNumNonterminals(), 4520); + + // This will be < 2 because there are drawing lines with nonzero + // probability. Verified with other best-response algorithm. + SPIEL_CHECK_FLOAT_NEAR(br_info.nash_conv, 1.919659391534391, kFloatTolerance); + + // First action policy is fully exploitable (easy to check by hand). + FirstActionPolicy first_action_policy; + TabularBestResponseMDP tbr2(*game, first_action_policy); + TabularBestResponseMDPInfo br_info2 = tbr2.NashConv(); + SPIEL_CHECK_FLOAT_NEAR(br_info2.nash_conv, 2.0, kFloatTolerance); +} + +void RPSGameTests() { + UniformPolicy uniform_policy; + FirstActionPolicy first_action_policy; + + std::shared_ptr game = LoadGame("matrix_rps"); + TabularBestResponseMDP tbr1(*game, uniform_policy); + TabularBestResponseMDPInfo br_info = tbr1.NashConv(); + SPIEL_CHECK_FLOAT_NEAR(br_info.nash_conv, 0.0, kFloatTolerance); + + TabularBestResponseMDP tbr2(*game, first_action_policy); + TabularBestResponseMDPInfo br_info2 = tbr2.NashConv(); + SPIEL_CHECK_FLOAT_NEAR(br_info2.nash_conv, 2.0, kFloatTolerance); +} + +void GoofspielGameTests() { + UniformPolicy uniform_policy; + FirstActionPolicy first_action_policy; + + std::vector game_strings = { + "goofspiel(num_cards=3)", + "goofspiel(num_cards=3,points_order=descending)", + }; + + for (const std::string& game_string : game_strings) { + std::string tbs_game_string = + absl::StrCat("turn_based_simultaneous_game(game=", game_string, ")"); + std::shared_ptr tbs_game = LoadGame(tbs_game_string); + double uniform_nash_conv = NashConv(*tbs_game, uniform_policy, true); + double first_action_nash_conv = NashConv(*tbs_game, first_action_policy, + true); + + std::shared_ptr game = LoadGame(game_string); + TabularBestResponseMDP tbr1(*game, uniform_policy); + TabularBestResponseMDPInfo br_info = tbr1.NashConv(); + SPIEL_CHECK_FLOAT_NEAR(br_info.nash_conv, uniform_nash_conv, + kFloatTolerance); + + TabularBestResponseMDP tbr2(*game, first_action_policy); + TabularBestResponseMDPInfo br_info2 = tbr2.NashConv(); + SPIEL_CHECK_FLOAT_NEAR(br_info2.nash_conv, first_action_nash_conv, + kFloatTolerance); + } +} + +void OshiZumoGameTests() { + UniformPolicy uniform_policy; + FirstActionPolicy first_action_policy; + + // Numbers verified against algorithms::NashConv using + // turn_based_simultaneous_game. + + std::shared_ptr game = LoadGame( + "oshi_zumo(coins=10,size=3,min_bid=1)"); + TabularBestResponseMDP tbr1(*game, uniform_policy); + TabularBestResponseMDPInfo br_info = tbr1.NashConv(); + SPIEL_CHECK_FLOAT_NEAR(br_info.nash_conv, 1.988311287477953, kFloatTolerance); + + TabularBestResponseMDP tbr2(*game, first_action_policy); + TabularBestResponseMDPInfo br_info2 = tbr2.NashConv(); + SPIEL_CHECK_FLOAT_NEAR(br_info2.nash_conv, 2.0, kFloatTolerance); +} + +void ImperfectRecallLiarsDiceGameTests() { + std::shared_ptr ir_game = LoadGame("liars_dice_ir"); + std::shared_ptr pr_game = LoadGame("liars_dice"); + + std::cout << ir_game->GetType().short_name << std::endl; + + { + UniformPolicy uniform_policy; + TabularBestResponseMDP tbr1(*pr_game, uniform_policy); + TabularBestResponseMDPInfo br_info1 = tbr1.NashConv(); + std::cout << "PR uniform: " << br_info1.nash_conv << std::endl; + SPIEL_CHECK_FLOAT_NEAR(br_info1.nash_conv, 1.561488646384479, + kFloatTolerance); + + TabularBestResponseMDP tbr2(*ir_game, uniform_policy); + TabularBestResponseMDPInfo br_info2 = tbr2.NashConv(); + std::cout << "IR uniform: " << br_info2.nash_conv << std::endl; + } + + // For a reference, see Figure 1 from Lanctot et al. '12 + // http://mlanctot.info/files/papers/12icml-ir.pdf + CFRSolver pr_solver(*pr_game); + CFRSolver ir_solver(*ir_game); + for (int i = 0; i < kNumLiarsDiceCFRIterations; i++) { + pr_solver.EvaluateAndUpdatePolicy(); + ir_solver.EvaluateAndUpdatePolicy(); + if (i % 10 == 0) { + const std::shared_ptr pr_avg_policy = pr_solver.AveragePolicy(); + const std::shared_ptr ir_avg_policy = ir_solver.AveragePolicy(); + + TabularBestResponseMDP pr_tbr(*pr_game, *pr_avg_policy); + TabularBestResponseMDPInfo pr_br_info = pr_tbr.NashConv(); + + TabularBestResponseMDP ir_tbr(*ir_game, *ir_avg_policy); + TabularBestResponseMDPInfo ir_br_info = ir_tbr.NashConv(); + + printf("%3d %0.15lf %0.15lf\n", i, pr_br_info.nash_conv, + ir_br_info.nash_conv); + } + } +} + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::algorithms::TicTacToeTests(); + open_spiel::algorithms::KuhnNashConvTests(); + open_spiel::algorithms::LeducNashConvTests(); + open_spiel::algorithms::OnlyFirstPlayerTests(); + open_spiel::algorithms::KuhnLeduc3pTests(); + open_spiel::algorithms::RPSGameTests(); + open_spiel::algorithms::OshiZumoGameTests(); + open_spiel::algorithms::GoofspielGameTests(); + open_spiel::algorithms::ImperfectRecallLiarsDiceGameTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_exploitability.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_exploitability.cc new file mode 100644 index 0000000..dcf775d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_exploitability.cc @@ -0,0 +1,99 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/tabular_exploitability.h" + +#include +#include +#include + +#include "open_spiel/algorithms/best_response.h" +#include "open_spiel/algorithms/expected_returns.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { + +double Exploitability(const Game& game, const Policy& policy) { + GameType game_type = game.GetType(); + if (game_type.dynamics != GameType::Dynamics::kSequential) { + SpielFatalError("The game must be turn-based."); + } + if (game_type.utility != GameType::Utility::kZeroSum && + game_type.utility != GameType::Utility::kConstantSum) { + SpielFatalError("The game must have zero- or constant-sum utility."); + } + + std::unique_ptr root = game.NewInitialState(); + double nash_conv = 0; + for (auto i = Player{0}; i < game.NumPlayers(); ++i) { + TabularBestResponse best_response(game, i, &policy); + nash_conv += best_response.Value(*root); + } + return (nash_conv - *game.UtilitySum()) / game.NumPlayers(); +} + +double Exploitability( + const Game& game, + const std::unordered_map& policy) { + TabularPolicy tabular_policy(policy); + return Exploitability(game, tabular_policy); +} + +double NashConv(const Game& game, const Policy& policy) { + return NashConv(game, policy, false); +} + +double NashConv(const Game& game, const Policy& policy, + bool use_state_get_policy) { + GameType game_type = game.GetType(); + if (game_type.dynamics != GameType::Dynamics::kSequential) { + SpielFatalError("The game must be turn-based."); + } + + std::unique_ptr root = game.NewInitialState(); + std::vector best_response_values(game.NumPlayers()); + for (auto p = Player{0}; p < game.NumPlayers(); ++p) { + TabularBestResponse best_response(game, p, &policy); + best_response_values[p] = best_response.Value(*root); + } + std::vector on_policy_values = + ExpectedReturns(*root, policy, -1, !use_state_get_policy); + SPIEL_CHECK_EQ(best_response_values.size(), on_policy_values.size()); + double nash_conv = 0; + for (auto p = Player{0}; p < game.NumPlayers(); ++p) { + double deviation_incentive = best_response_values[p] - on_policy_values[p]; + if (deviation_incentive < -FloatingPointDefaultTolerance()) { + SpielFatalError( + absl::StrCat("Negative Nash deviation incentive for player ", p, ": ", + deviation_incentive, ". Does you game have imperfect ", + "recall, or does State::ToString() not distinguish ", + "between unique states?")); + } + nash_conv += deviation_incentive; + } + return nash_conv; +} + +double NashConv( + const Game& game, + const std::unordered_map& policy) { + TabularPolicy tabular_policy(policy); + return NashConv(game, tabular_policy); +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_exploitability.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_exploitability.h new file mode 100644 index 0000000..b03ec68 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_exploitability.h @@ -0,0 +1,68 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_TABULAR_EXPLOITABILITY_H_ +#define OPEN_SPIEL_ALGORITHMS_TABULAR_EXPLOITABILITY_H_ + +#include +#include + +#include "open_spiel/algorithms/history_tree.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { + +// Returns the average utility that a best responder wins when when playing +// against the opponents' policies, each of which are assumed to be contained +// within the specified (joint) policy. +// This only works for zero- or constant-sum sequential games, otherwise raises +// an error. +double Exploitability(const Game& game, const Policy& policy); + +// Same function provided for easy Python compatibility. +double Exploitability( + const Game& game, + const std::unordered_map& policy); + +// Calculates a measure of how far the given policy is from a Nash equilibrium +// by returning the sum of the improvements in the value that each player could +// obtain by unilaterally changing their strategy while the opposing player +// maintains their current strategy (which for a Nash equilibrium, this value +// is 0). This function only works for sequential games. Note: in zero-sum and +// constant-sum games, exploitability is equal to NashConv / (num. of players). +// The use_state_get_policy flag indicates whether to call +// Policy::GetStatePolicy(const State&) instead of +// Policy::GetStatePolicy(const std::string&) in the computation of the expected +// values of the joint policy. +// +// Note: these functions make use of TabularBestResponse, which assumes perfect +// recall games, and uses the game's State::ToString as a unique identifier for +// representing the value of the state. +double NashConv(const Game& game, const Policy& policy, + bool use_state_get_policy); + +// Same as above with use_state_get_policy set to false. +double NashConv(const Game& game, const Policy& policy); + +// Same function provided for easy Python compatibility. +double NashConv(const Game& game, + const std::unordered_map& policy); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_TABULAR_EXPLOITABILITY_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_exploitability_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_exploitability_test.cc new file mode 100644 index 0000000..2e5891d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_exploitability_test.cc @@ -0,0 +1,500 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/tabular_exploitability.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/algorithms/best_response.h" +#include "open_spiel/algorithms/minimax.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/goofspiel/goofspiel.h" +#include "open_spiel/games/kuhn_poker/kuhn_poker.h" +#include "open_spiel/games/leduc_poker/leduc_poker.h" +#include "open_spiel/games/liars_dice/liars_dice.h" +#include "open_spiel/games/tic_tac_toe/tic_tac_toe.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +using InfostatesAndActions = std::vector>; + +// Correct values come from the existing Python implementation in +// open_spiel/python/algorithms/exploitability.py. +std::vector> +GetKuhnUniformBestResponseValuesPid0() { + std::vector> history_and_probs = { + {"2", 1.5}, {"2 1 bb", 2.0}, {"2 1 bp", 1.0}, {"2 1 pbp", -1.0}, + {"2 1 pb", 2.0}, {"2 1 pp", 1.0}, {"2 0 bb", 2.0}, {"2 1 p", 1.5}, + {"2 0 pp", 1.0}, {"2 0 pbb", 2.0}, {"2 0 p", 1.5}, {"2 1 b", 1.5}, + {"2 0 bp", 1.0}, {"2 1 pbb", 2.0}, {"2 0", 1.5}, {"2 1", 1.5}, + {"2 0 pb", 2.0}, {"2 0 b", 1.5}, {"2 0 pbp", -1.0}, {"1 0", 1.5}, + {"0", -0.5}, {"1 2", -0.5}, {"0 2 p", -1.0}, {"", 0.5}, + {"0 1", -0.5}, {"0 2", -0.5}, {"1 2 pp", -1.0}, {"0 1 p", -1.0}, + {"1", 0.5}, {"0 2 b", -0.5}, {"1 2 pb", -2.0}, {"0 1 b", -0.5}, + {"1 2 pbb", -2.0}, {"0 2 bb", -2.0}, {"1 2 b", -0.5}, {"0 1 pb", -1.0}, + {"1 0 bp", 1.0}, {"0 2 pp", -1.0}, {"1 2 p", -1.5}, {"0 2 bp", 1.0}, + {"0 1 pp", -1.0}, {"1 0 bb", 2.0}, {"1 2 bp", 1.0}, {"1 0 pbp", -1.0}, + {"0 1 bp", 1.0}, {"1 0 p", 1.5}, {"1 2 pbp", -1.0}, {"0 1 pbp", -1.0}, + {"1 0 pbb", 2.0}, {"1 2 bb", -2.0}, {"0 1 bb", -2.0}, {"1 0 b", 1.5}, + {"0 1 pbb", -2.0}, {"0 2 pb", -1.0}, {"1 0 pp", 1.0}, {"0 2 pbb", -2.0}, + {"1 0 pb", 2.0}, {"0 2 pbp", -1.0}}; + return history_and_probs; +} + +std::vector> +GetKuhnUniformBestResponseValuesPid1() { + std::vector> history_and_probs = { + {"", 0.416666666667}, {"0", 1.75}, {"0 1", 1.75}, + {"0 1 b", 2.0}, {"0 1 bb", 2.0}, {"0 1 bp", -1.0}, + {"0 1 p", 1.5}, {"0 1 pb", 1.5}, {"0 1 pbb", 2.0}, + {"0 1 pbp", 1.0}, {"0 1 pp", 1.0}, {"0 2", 1.75}, + {"0 2 b", 2.0}, {"0 2 bb", 2.0}, {"0 2 bp", -1.0}, + {"0 2 p", 1.5}, {"0 2 pb", 1.5}, {"0 2 pbb", 2.0}, + {"0 2 pbp", 1.0}, {"0 2 pp", 1.0}, {"1", 0.5}, + {"1 0", -0.75}, {"1 0 b", -1.0}, {"1 0 bb", -2.0}, + {"1 0 bp", -1.0}, {"1 0 p", -0.5}, {"1 0 pb", -0.5}, + {"1 0 pbb", -2.0}, {"1 0 pbp", 1.0}, {"1 0 pp", -1.0}, + {"1 2", 1.75}, {"1 2 b", 2.0}, {"1 2 bb", 2.0}, + {"1 2 bp", -1.0}, {"1 2 p", 1.5}, {"1 2 pb", 1.5}, + {"1 2 pbb", 2.0}, {"1 2 pbp", 1.0}, {"1 2 pp", 1.0}, + {"2", -1.0}, {"2 0", -0.75}, {"2 0 b", -1.0}, + {"2 0 bb", -2.0}, {"2 0 bp", -1.0}, {"2 0 p", -0.5}, + {"2 0 pb", -0.5}, {"2 0 pbb", -2.0}, {"2 0 pbp", 1.0}, + {"2 0 pp", -1.0}, {"2 1", -1.25}, {"2 1 b", -2.0}, + {"2 1 bb", -2.0}, {"2 1 bp", -1.0}, {"2 1 p", -0.5}, + {"2 1 pb", -0.5}, {"2 1 pbb", -2.0}, {"2 1 pbp", 1.0}, + {"2 1 pp", -1.0}}; + return history_and_probs; +} + +std::vector> +GetKuhnOptimalBestResponseValuesPid0() { + std::vector> history_and_probs = { + {"", -0.05555555555555558}, + {"1 2 pb", -1.0}, + {"1 2 b", -2.0}, + {"0 2 pp", -1.0}, + {"0 1 bp", 1.0}, + {"2 1 bp", 1.0}, + {"2 0 pb", 2.0}, + {"1 2 pp", -1.0}, + {"2 0 b", 1.0}, + {"0 1 bb", -2.0}, + {"2 0 pp", 1.0}, + {"2 0 p", 1.3333333333333333}, + {"1 0", 0.3333333333333333}, + {"1 0 bb", 2.0}, + {"1 0 pbp", -1.0}, + {"1 2 bp", 1.0}, + {"2 0 bp", 1.0}, + {"0 1", -1.0}, + {"0 2", -1.0}, + {"1 0 pbb", 2.0}, + {"1 0 bp", 1.0}, + {"2 0 bb", 2.0}, + {"1 2 bb", -2.0}, + {"2 1", 1.0}, + {"2 1 bb", 2.0}, + {"2 0 pbp", -1.0}, + {"1 2 p", -1.0}, + {"0 2 bb", -2.0}, + {"1 0 pp", 1.0}, + {"0 2 b", -2.0}, + {"2 1 pb", 2.0}, + {"1 2 pbb", -2.0}, + {"1 2", -1.0}, + {"0 1 pb", -1.0}, + {"0 2 p", -1.0}, + {"0 2 bp", 1.0}, + {"1 0 pb", -1.0}, + {"1 2 pbp", -1.0}, + {"2 1 pp", 1.0}, + {"0 1 pp", -1.0}, + {"2 1 pbb", 2.0}, + {"2 0", 1.3333333333333333}, + {"1 0 b", 1.0}, + {"0 2 pbp", -1.0}, + {"2 0 pbb", 2.0}, + {"0 1 pbp", -1.0}, + {"0 1 b", 0.0}, + {"2 1 b", 1.3333333333333333}, + {"2 1 pbp", -1.0}, + {"2", 1.1666666666666665}, + {"1", -0.33333333333333337}, + {"0", -1.0}, + {"0 1 p", -1.0}, + {"1 0 p", 0.3333333333333333}, + {"0 2 pbb", -2.0}, + {"0 1 pbb", -2.0}, + {"2 1 p", 1.0}, + {"0 2 pb", -1.0}}; + return history_and_probs; +} + +std::vector> +GetKuhnOptimalBestResponseValuesPid1() { + std::vector> history_and_probs = { + {"", 0.0555555555556}, + {"0", 0.9}, + {"0 1", 0.6}, + {"0 1 b", -1.0}, + {"0 1 bb", 2.0}, + {"0 1 bp", -1.0}, + {"0 1 p", 1.0}, + {"0 1 pb", 1.0}, + {"0 1 pbb", 2.0}, + {"0 1 pbp", 1.0}, + {"0 1 pp", 1.0}, + {"0 2", 1.2}, + {"0 2 b", 2.0}, + {"0 2 bb", 2.0}, + {"0 2 bp", -1.0}, + {"0 2 p", 1.0}, + {"0 2 pb", 1.0}, + {"0 2 pbb", 2.0}, + {"0 2 pbp", 1.0}, + {"0 2 pp", 1.0}, + {"1", 0.266666666667}, + {"1 0", -1.0}, + {"1 0 b", -1.0}, + {"1 0 bb", -2.0}, + {"1 0 bp", -1.0}, + {"1 0 p", -1.0}, + {"1 0 pb", -0.6}, + {"1 0 pbb", -2.0}, + {"1 0 pbp", 1.0}, + {"1 0 pp", -1.0}, + {"1 2", 1.53333333333}, + {"1 2 b", 2.0}, + {"1 2 bb", 2.0}, + {"1 2 bp", -1.0}, + {"1 2 p", 1.53333333333}, + {"1 2 pb", 1.53333333333}, + {"1 2 pbb", 2.0}, + {"1 2 pbp", 1.0}, + {"1 2 pp", 1.0}, + {"2", -1.0}, + {"2 0", -1.0}, + {"2 0 b", -1.0}, + {"2 0 bb", -2.0}, + {"2 0 bp", -1.0}, + {"2 0 p", -1.0}, + {"2 0 pb", -2.0}, + {"2 0 pbb", -2.0}, + {"2 0 pbp", 1.0}, + {"2 0 pp", -1.0}, + {"2 1", -1.0}, + {"2 1 b", -1.0}, + {"2 1 bb", -2.0}, + {"2 1 bp", -1.0}, + {"2 1 p", -1.0}, + {"2 1 pb", -2.0}, + {"2 1 pbb", -2.0}, + {"2 1 pbp", 1.0}, + {"2 1 pp", -1.0}}; + return history_and_probs; +} + +// The "GetKuhnEdIterNPolicy" functions return the policy that is dumped out by +// the exploitability_descent_test when running exploitability descent for N +// iterations. They are included here as a regression test, +// as the C++ best response code has been unable to replicate the existing +// results due to erroneously included state. This is fixed as of cl/238531924. +TabularPolicy GetKuhnEdIter1Policy() { + return TabularPolicy({{"0", {{0, 0.5}, {1, 0.5}}}, + {"0b", {{0, 0.5}, {1, 0.5}}}, + {"0p", {{0, 0.5}, {1, 0.5}}}, + {"0pb", {{0, 0.5}, {1, 0.5}}}, + {"1", {{0, 0.5}, {1, 0.5}}}, + {"1b", {{0, 0.5}, {1, 0.5}}}, + {"1p", {{0, 0.5}, {1, 0.5}}}, + {"1pb", {{0, 0.5}, {1, 0.5}}}, + {"2", {{0, 0.5}, {1, 0.5}}}, + {"2b", {{0, 0.5}, {1, 0.5}}}, + {"2p", {{0, 0.5}, {1, 0.5}}}, + {"2pb", {{0, 0.5}, {1, 0.5}}}}); +} + +TabularPolicy GetKuhnEdIter4Policy() { + return TabularPolicy({{"0", {{0, 0.567034158868}, {1, 0.432965841132}}}, + {"0b", {{0, 0.602000197743}, {1, 0.397999802257}}}, + {"0p", {{0, 0.520821285373}, {1, 0.479178714627}}}, + {"0pb", {{0, 0.621126761233}, {1, 0.378873238767}}}, + {"1", {{0, 0.505160629764}, {1, 0.494839370236}}}, + {"1b", {{0, 0.360357968472}, {1, 0.639642031528}}}, + {"1p", {{0, 0.520821285373}, {1, 0.479178714627}}}, + {"1pb", {{0, 0.378873238767}, {1, 0.621126761233}}}, + {"2", {{0, 0.419580194883}, {1, 0.580419805117}}}, + {"2b", {{0, 0.202838286881}, {1, 0.797161713119}}}, + {"2p", {{0, 0.5}, {1, 0.5}}}, + {"2pb", {{0, 0.202838286881}, {1, 0.797161713119}}}}); +} + +void CheckBestResponsesAgaintGoldenResponses( + const InfostatesAndActions& golden_actions, + std::unordered_map& best_responses) { + SPIEL_CHECK_EQ(best_responses.size(), golden_actions.size()); + for (const auto& infostate_and_best_response : golden_actions) { + const std::string& infostate = infostate_and_best_response.first; + Action action = infostate_and_best_response.second; + auto it = best_responses.find(infostate); + if (it == best_responses.end()) + SpielFatalError(absl::StrCat("Infostate ", infostate, + " not found in best_responses.")); + if (it->second != action) { + SpielFatalError(absl::StrCat( + "Wrong best response at infostate ", infostate, "; expected ", action, + " but received ", best_responses[infostate])); + } + } +} + +void CheckBestResponseAgainstGoldenPolicy( + const Game& game, Player best_responder, const TabularPolicy& policy, + const InfostatesAndActions& golden_actions) { + TabularBestResponse best_response(game, best_responder, &policy); + best_response.Value(game.NewInitialState()->ToString()); + std::unordered_map best_responses = + best_response.GetBestResponseActions(); + CheckBestResponsesAgaintGoldenResponses(golden_actions, best_responses); +} + +InfostatesAndActions GetKuhnUniformBestResponsePid0() { + return InfostatesAndActions( + {{"0", 1}, {"0pb", 0}, {"1", 1}, {"1pb", 1}, {"2", 0}, {"2pb", 1}}); +} + +InfostatesAndActions GetKuhnUniformBestResponsePid1() { + return InfostatesAndActions( + {{"0b", 0}, {"0p", 1}, {"1b", 1}, {"1p", 1}, {"2b", 1}, {"2p", 1}}); +} + +// The best response values are taken from the existing Python implementation in +// open_spiel/algorithms/exploitability.py. +void KuhnPokerUniformBestResponsePid0() { + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = GetUniformPolicy(*game); + CheckBestResponseAgainstGoldenPolicy(*game, /*best_responder=*/Player{0}, + policy, + GetKuhnUniformBestResponsePid0()); +} + +// The best response values are taken from the existing Python implementation in +// open_spiel/algorithms/exploitability.py. +void KuhnPokerUniformBestResponsePid1() { + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = GetUniformPolicy(*game); + CheckBestResponseAgainstGoldenPolicy(*game, /*best_responder=*/Player{1}, + policy, + GetKuhnUniformBestResponsePid1()); +} + +// The following are regression tests. They should produce the same result, but +// didn't previously due to a caching bug. +InfostatesAndActions GetExploitabilityDescentBestResponses() { + return InfostatesAndActions( + {{"0b", 0}, {"0p", 0}, {"1b", 1}, {"1p", 1}, {"2b", 1}, {"2p", 1}}); +} + +void KuhnPokerExploitabilityDescentIteration4BestResponsePid0() { + std::shared_ptr game = LoadGame("kuhn_poker"); + CheckBestResponseAgainstGoldenPolicy(*game, /*best_responder=*/Player{1}, + GetKuhnEdIter4Policy(), + GetExploitabilityDescentBestResponses()); +} + +void KuhnPokerUniformBestResponseAfterSwitchingPolicies() { + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = GetKuhnEdIter4Policy(); + TabularBestResponse response(*game, Player{1}, &policy); + + // Check that it's good + InfostatesAndActions ed_golden_actions = + GetExploitabilityDescentBestResponses(); + std::unordered_map best_responses = + response.GetBestResponseActions(); + CheckBestResponsesAgaintGoldenResponses(ed_golden_actions, best_responses); + + // Swap policies, and check again. + policy = GetUniformPolicy(*game); + response.SetPolicy(&policy); + + // Check that this equals + InfostatesAndActions actual_best_responses = GetKuhnUniformBestResponsePid1(); + best_responses = response.GetBestResponseActions(); + CheckBestResponsesAgaintGoldenResponses(actual_best_responses, + best_responses); +} + +// The best response values are taken from the existing Python implementation in +// open_spiel/algorithms/exploitability.py. +void KuhnPokerOptimalBestResponsePid0() { + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = kuhn_poker::GetOptimalPolicy(/*alpha=*/0.2); + InfostatesAndActions actual_best_responses = { + {"0", 0}, {"0pb", 0}, {"1", 0}, {"1pb", 0}, {"2", 0}, {"2pb", 1}}; + CheckBestResponseAgainstGoldenPolicy(*game, /*best_responder=*/Player{0}, + policy, actual_best_responses); +} + +// The best response values are taken from the existing Python implementation in +// open_spiel/algorithms/exploitability.py. +void KuhnPokerOptimalBestResponsePid1() { + std::shared_ptr game = LoadGame("kuhn_poker"); + TabularPolicy policy = kuhn_poker::GetOptimalPolicy(/*alpha=*/0.2); + InfostatesAndActions actual_best_responses = { + {"0b", 0}, {"0p", 0}, {"1p", 0}, {"1b", 0}, {"2p", 1}, {"2b", 1}}; + CheckBestResponseAgainstGoldenPolicy(*game, /*best_responder=*/Player{1}, + policy, actual_best_responses); +} + +void KuhnPokerExploitabilityDescentMinimalSimulationPid0() { + std::shared_ptr game = LoadGame("kuhn_poker"); + auto best_responder = Player{1}; + + // We create a best responder with one policy... + TabularPolicy kuhn_ed_iter1_policy = GetKuhnEdIter1Policy(); + TabularBestResponse best_response(*game, best_responder, + &kuhn_ed_iter1_policy); + + // Calculate all the best responses... + best_response.Value(game->NewInitialState()->ToString()); + + // And then set a new policy. This *shouldn't* change the result- it should + // produce the same result as in the test above which does this calculation + // with best_response initialized with the GetKuhnEdIter4Policy, but due to + // improperly resetting the caches, that was not the case previously. + TabularPolicy kuhn_ed_iter4_policy = GetKuhnEdIter4Policy(); + best_response.SetPolicy(&kuhn_ed_iter4_policy); + best_response.Value(game->NewInitialState()->ToString()); + auto best_responses = best_response.GetBestResponseActions(); + auto actual_best_responses = GetExploitabilityDescentBestResponses(); + SPIEL_CHECK_EQ(best_responses.size(), actual_best_responses.size()); + for (const auto& infostate_and_action : actual_best_responses) { + const std::string& infostate = infostate_and_action.first; + Action action = infostate_and_action.second; + auto it = best_responses.find(infostate); + if (it == best_responses.end()) + SpielFatalError(absl::StrCat("Infostate ", infostate, + " not found in best_responses.")); + if (it->second != action) { + SpielFatalError(absl::StrCat( + "Wrong best response at infostate ", infostate, "; expected ", action, + " but received ", best_responses[infostate])); + } + } +} + +void CheckBestResponseValuesAgainstGoldenValues( + const Game& game, Player best_responder, const TabularPolicy& policy, + const std::vector>& golden_values) { + TabularBestResponse best_response(game, best_responder, &policy); + for (const auto& history_and_value : golden_values) { + const std::string& history = history_and_value.first; + if (!Near(best_response.Value(history), history_and_value.second)) { + SpielFatalError(absl::StrCat("Value calculated for history '", history, + "' is equal to ", + best_response.Value(history), " but ", + history_and_value.second, " was expected.")); + } + } +} + +void TestLeducPolicyFindsIllegalAction() { + std::shared_ptr game = LoadGame("leduc_poker"); + TabularPolicy policy = open_spiel::GetUniformPolicy(*game); + const std::string troublesome_infostate = + "[Round 2][Player: 0][Pot: 14][Money: 95 91[Private: 0]][Round1]: 1 " + "1[Public: 2]\nRound 2 sequence: 2 2"; + policy.PolicyTable()[troublesome_infostate] = + ActionsAndProbs({{0, 0.5}, {1, 0.5}, {2, 0}}); + double exploitability = Exploitability(*game, policy); + SPIEL_CHECK_GE(exploitability, 0); +} + +void TestExploitability( + const std::string& game_name, + std::function policy_factory, + double expected_value) { + std::shared_ptr game = LoadGame(game_name); + TabularPolicy policy = policy_factory(*game); + double exploitability = Exploitability(*game, policy); + if (!Near(exploitability, expected_value)) { + SpielFatalError(absl::StrCat("Exploitability was ", exploitability, + " but expected ", expected_value)); + } +} + +void TestNashConv(const std::string& game_name, + std::function policy_factory, + double expected_value) { + std::shared_ptr game = LoadGame(game_name); + TabularPolicy policy = policy_factory(*game); + double nash_conv = NashConv(*game, policy); + if (!Near(nash_conv, expected_value)) { + SpielFatalError(absl::StrCat("In game ", game_name, " NashConv was ", + nash_conv, " but expected ", expected_value)); + } +} + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +int main(int argc, char** argv) { + // The optimal policy is a Nash equilibrium, so there are 0 gains available + // for either player by switching. + auto optimal_factory = [](const open_spiel::Game&) { + return open_spiel::kuhn_poker::GetOptimalPolicy(/*alpha=*/0.2); + }; + open_spiel::algorithms::TestExploitability("kuhn_poker", optimal_factory, 0.); + open_spiel::algorithms::TestNashConv("kuhn_poker", optimal_factory, 0.); + + // Smoke tests to verify that we can calculate exploitability in Leduc; this + // is a regression test, as we previously had some bugs in Leduc. + open_spiel::algorithms::TestLeducPolicyFindsIllegalAction(); + + // NashConv values for the uniform policies verified against multiple + // existing implementations. + open_spiel::algorithms::TestExploitability( + "kuhn_poker", open_spiel::GetUniformPolicy, 0.4583333333333335); + open_spiel::algorithms::TestExploitability( + "leduc_poker", open_spiel::GetUniformPolicy, 2.373611111111111); + open_spiel::algorithms::TestNashConv( + "kuhn_poker", open_spiel::GetUniformPolicy, 0.916666666666667); + open_spiel::algorithms::TestNashConv( + "leduc_poker", open_spiel::GetUniformPolicy, 4.747222222222222); + + // The first action policy is AlwaysFold in poker. If you always fold, you win + // 0 chips, but if you switch to AlwaysBet, you win 1 chip every time if + // playing against a player who always folds, so NashConv is 1 + 1 = 2, + // leading to exploitability of 2/2 = 1. + open_spiel::algorithms::TestExploitability( + "kuhn_poker", open_spiel::GetFirstActionPolicy, 1.); + open_spiel::algorithms::TestExploitability( + "leduc_poker", open_spiel::GetFirstActionPolicy, 1.); + open_spiel::algorithms::TestNashConv("kuhn_poker", + open_spiel::GetFirstActionPolicy, 2.); + open_spiel::algorithms::TestNashConv("leduc_poker", + open_spiel::GetFirstActionPolicy, 2.); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_q_learning.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_q_learning.cc new file mode 100644 index 0000000..dd784d3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_q_learning.cc @@ -0,0 +1,193 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/tabular_q_learning.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/random/random.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { + +using std::vector; + +Action TabularQLearningSolver::GetBestAction(const State& state, + double min_utility) { + vector legal_actions = state.LegalActions(); + SPIEL_CHECK_GT(legal_actions.size(), 0); + const auto state_str = state.ToString(); + + Action best_action = legal_actions[0]; + double value = min_utility; + for (const Action& action : legal_actions) { + double q_val = values_[{state_str, action}]; + if (q_val >= value) { + value = q_val; + best_action = action; + } + } + return best_action; +} + +double TabularQLearningSolver::GetBestActionValue(const State& state, + double min_utility) { + if (state.IsTerminal()) { + // q(s,a) is 0 when s is terminal. + return 0; + } + return values_[{state.ToString(), GetBestAction(state, min_utility)}]; +} + +std::pair +TabularQLearningSolver::SampleActionFromEpsilonGreedyPolicy( + const State& state, double min_utility) { + vector legal_actions = state.LegalActions(); + if (legal_actions.empty()) { + return {kInvalidAction, false}; + } + + if (absl::Uniform(rng_, 0.0, 1.0) < epsilon_) { + // Choose a random action + return {legal_actions[absl::Uniform(rng_, 0, legal_actions.size())], + true}; + } + // Choose the best action + return {GetBestAction(state, min_utility), false}; +} + +void TabularQLearningSolver::SampleUntilNextStateOrTerminal(State* state) { + // Repeatedly sample while chance node, so that we end up at a decision node + while (state->IsChanceNode() && !state->IsTerminal()) { + std::vector> outcomes = state->ChanceOutcomes(); + state->ApplyAction(SampleAction(outcomes, rng_).first); + } +} + +TabularQLearningSolver::TabularQLearningSolver(std::shared_ptr game) + : game_(game), + depth_limit_(kDefaultDepthLimit), + epsilon_(kDefaultEpsilon), + learning_rate_(kDefaultLearningRate), + discount_factor_(kDefaultDiscountFactor), + lambda_(kDefaultLambda) { + SPIEL_CHECK_LE(lambda_, 1); + SPIEL_CHECK_GE(lambda_, 0); + + // Currently only supports 1-player or 2-player zero sum games + SPIEL_CHECK_TRUE(game_->NumPlayers() == 1 || game_->NumPlayers() == 2); + if (game_->NumPlayers() == 2) { + SPIEL_CHECK_EQ(game_->GetType().utility, GameType::Utility::kZeroSum); + } + + // No support for simultaneous games (needs an LP solver). And so also must + // be a perfect information game. + SPIEL_CHECK_EQ(game_->GetType().dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(game_->GetType().information, + GameType::Information::kPerfectInformation); +} + +TabularQLearningSolver::TabularQLearningSolver( + std::shared_ptr game, double depth_limit, double epsilon, + double learning_rate, double discount_factor, double lambda) + : game_(game), + depth_limit_(depth_limit), + epsilon_(epsilon), + learning_rate_(learning_rate), + discount_factor_(discount_factor), + lambda_(lambda) { + SPIEL_CHECK_LE(lambda_, 1); + SPIEL_CHECK_GE(lambda_, 0); + + // Currently only supports 1-player or 2-player zero sum games + SPIEL_CHECK_TRUE(game_->NumPlayers() == 1 || game_->NumPlayers() == 2); + if (game_->NumPlayers() == 2) { + SPIEL_CHECK_EQ(game_->GetType().utility, GameType::Utility::kZeroSum); + } + + // No support for simultaneous games (needs an LP solver). And so also must + // be a perfect information game. + SPIEL_CHECK_EQ(game_->GetType().dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(game_->GetType().information, + GameType::Information::kPerfectInformation); +} + +const absl::flat_hash_map, double>& +TabularQLearningSolver::GetQValueTable() const { + return values_; +} + +void TabularQLearningSolver::RunIteration() { + const double min_utility = game_->MinUtility(); + // Choose start state + std::unique_ptr curr_state = game_->NewInitialState(); + SampleUntilNextStateOrTerminal(curr_state.get()); + + while (!curr_state->IsTerminal()) { + const Player player = curr_state->CurrentPlayer(); + + // Sample action from the state using an epsilon-greedy policy + auto [curr_action, chosen_uniformly] = + SampleActionFromEpsilonGreedyPolicy(*curr_state, min_utility); + + std::unique_ptr next_state = curr_state->Child(curr_action); + SampleUntilNextStateOrTerminal(next_state.get()); + + const double reward = next_state->Rewards()[player]; + // Next q-value in perspective of player to play at curr_state (important + // note: exploits property of two-player zero-sum) + const double next_q_value = + (player != next_state->CurrentPlayer() ? -1 : 1) * + GetBestActionValue(*next_state, min_utility); + + // Update the q value + std::string key = curr_state->ToString(); + double new_q_value = reward + discount_factor_ * next_q_value; + + double prev_q_val = values_[{key, curr_action}]; + if (lambda_ == 0) { + // If lambda_ is equal to zero run Q-learning as usual. + // It's not necessary to update eligibility traces. + values_[{key, curr_action}] += + learning_rate_ * (new_q_value - prev_q_val); + } else { + double lambda = + player != next_state->CurrentPlayer() ? -lambda_ : lambda_; + eligibility_traces_[{key, curr_action}] += 1; + + for (const auto& q_cell : values_) { + std::string state = q_cell.first.first; + Action action = q_cell.first.second; + + values_[{state, action}] += learning_rate_ * + (new_q_value - prev_q_val) * + eligibility_traces_[{state, action}]; + if (chosen_uniformly) { + eligibility_traces_[{state, action}] = 0; + } else { + eligibility_traces_[{state, action}] *= discount_factor_ * lambda; + } + } + } + + curr_state = std::move(next_state); + } +} +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_q_learning.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_q_learning.h new file mode 100644 index 0000000..2188612 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_q_learning.h @@ -0,0 +1,98 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_TABULAR_Q_LEARNING_H_ +#define OPEN_SPIEL_ALGORITHMS_TABULAR_Q_LEARNING_H_ + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/random/random.h" +#include "open_spiel/algorithms/get_all_states.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +// Tabular Q-learning algorithm: solves for the optimal action value function +// of a game. +// It considers all states with depth at most depth_limit from the +// initial state (so if depth_limit is 0, only the root is considered). +// If depth limit is negative, all states are considered. +// +// Currently works for sequential 1-player or 2-player zero-sum games. +// +// Based on the implementation in Sutton and Barto, Intro to RL. Second Edition, +// 2018. Section 6.5. +// +// Includes implementation of Watkins’s Q(lambda) which can be found in +// Sutton and Barto, Intro to RL. Second Edition, 2018. Section 12.10. +// (E.g. https://www.andrew.cmu.edu/course/10-703/textbook/BartoSutton.pdf) +// Eligibility traces are implemented with the "accumulate" +// method (+1 at each iteration) instead of "replace" implementation +// (doesn't sum trace values). Parameter lambda_ determines the level +// of bootstraping. + +class TabularQLearningSolver { + static inline constexpr double kDefaultDepthLimit = -1; + static inline constexpr double kDefaultEpsilon = 0.01; + static inline constexpr double kDefaultLearningRate = 0.01; + static inline constexpr double kDefaultDiscountFactor = 0.99; + static inline constexpr double kDefaultLambda = 0; + + public: + TabularQLearningSolver(std::shared_ptr game); + + TabularQLearningSolver(std::shared_ptr game, double depth_limit, + double epsilon, double learning_rate, + double discount_factor, double lambda); + + void RunIteration(); + + const absl::flat_hash_map, double>& + GetQValueTable() const; + + private: + // Given a player and a state, gets the best possible action from this state + Action GetBestAction(const State& state, double min_utility); + + // Given a state, gets the best possible action value from this state + double GetBestActionValue(const State& state, double min_utility); + + // Given a player and a state, gets the action, sampled from an epsilon-greedy + // policy. Returns where the second element + // indicates whether an action was chosen uniformly (which occurs with epsilon + // chance). + std::pair SampleActionFromEpsilonGreedyPolicy( + const State& state, double min_utility); + + // Moves a chance node to the next decision/terminal node by sampling from + // the legal actions repeatedly + void SampleUntilNextStateOrTerminal(State* state); + + std::shared_ptr game_; + int depth_limit_; + double epsilon_; + double learning_rate_; + double discount_factor_; + double lambda_; + std::mt19937 rng_; + absl::flat_hash_map, double> values_; + absl::flat_hash_map, double> + eligibility_traces_; +}; + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_TABULAR_Q_LEARNING_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_q_learning_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_q_learning_test.cc new file mode 100644 index 0000000..d1ecc2a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_q_learning_test.cc @@ -0,0 +1,228 @@ +// Copyright 2023 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/tabular_q_learning.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/random/random.h" +#include "open_spiel/games/catch/catch.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace { + +Action GetOptimalAction( + absl::flat_hash_map, double> q_values, + const std::unique_ptr &state) { + std::vector legal_actions = state->LegalActions(); + const auto state_str = state->ToString(); + + Action optimal_action = open_spiel::kInvalidAction; + double value = -1; + for (const Action &action : legal_actions) { + double q_val = q_values[{state_str, action}]; + if (q_val >= value) { + value = q_val; + optimal_action = action; + } + } + return optimal_action; +} + +Action GetRandomAction(const std::unique_ptr &state, int seed) { + std::vector legal_actions = state->LegalActions(); + if (legal_actions.empty()) { + return kInvalidAction; + } + std::mt19937 rng(seed); + return legal_actions[absl::Uniform(rng, 0, legal_actions.size())]; +} + +double PlayCatch( + absl::flat_hash_map, double> q_values, + const std::unique_ptr &state, double seed) { + // First action determines the starting column. Do the first action before the + // main loop, where the optimal action is chosen. + // Example: Initial state with random seed 42 + // ...o. + // ..... + // ..... + // ..... + // ..... + // ..... + // ..... + // ..... + // ..... + // ..x.. + std::mt19937 gen(seed); + std::uniform_int_distribution distribution(0, + catch_::kDefaultColumns - 1); + int ball_starting_column = distribution(gen); + state->ApplyAction(ball_starting_column); + + while (!state->IsTerminal()) { + Action optimal_action = GetOptimalAction(q_values, state); + state->ApplyAction(optimal_action); + } + + return state->Rewards()[0]; +} + +std::unique_ptr QLearningSolver( + std::shared_ptr game, double lambda) { + return std::make_unique( + /*game=*/game, + /*depth_limit=*/-1.0, + /*epsilon=*/0.1, + /*learning_rate=*/0.01, + /*discount_factor=*/0.99, + /*lambda=*/lambda); +} + +void TabularQLearningTest_Catch_Lambda00_Loss() { + // Classic Q-learning. No bootstraping (lambda=0.0) + // Player loses after only 1 train iteration. + std::shared_ptr game = LoadGame("catch"); + auto tabular_q_learning_solver = QLearningSolver(game, 0); + + tabular_q_learning_solver->RunIteration(); + const absl::flat_hash_map, double>& q_values = + tabular_q_learning_solver->GetQValueTable(); + std::unique_ptr state = game->NewInitialState(); + + double reward = PlayCatch(q_values, state, 42); + SPIEL_CHECK_EQ(reward, -1); +} + +void TabularQLearningTest_Catch_Lambda00_Win() { + // Classic Q-learning. No bootstraping (lambda=0.0) + // Player wins after 100 train iterations + std::shared_ptr game = LoadGame("catch"); + auto tabular_q_learning_solver = QLearningSolver(game, 0); + + for (int i = 1; i < 100; i++) { + tabular_q_learning_solver->RunIteration(); + } + const absl::flat_hash_map, double>& q_values = + tabular_q_learning_solver->GetQValueTable(); + std::unique_ptr state = game->NewInitialState(); + + double reward = PlayCatch(q_values, state, 42); + SPIEL_CHECK_EQ(reward, 1); +} + +void TabularQLearningTest_Catch_Lambda01_Win() { + // Player wins after 100 train iterations + std::shared_ptr game = LoadGame("catch"); + auto tabular_q_learning_solver = QLearningSolver(game, 0.1); + + for (int i = 1; i < 100; i++) { + tabular_q_learning_solver->RunIteration(); + } + const absl::flat_hash_map, double>& q_values = + tabular_q_learning_solver->GetQValueTable(); + std::unique_ptr state = game->NewInitialState(); + + double reward = PlayCatch(q_values, state, 42); + SPIEL_CHECK_EQ(reward, 1); +} + +void TabularQLearningTest_Catch_Lambda01FasterThanLambda00() { + // Eligibility traces (lambda > 0.0) always achieves victory with less + // training steps w.r.t. Q-learning(lambda=0.0) + std::shared_ptr game = LoadGame("catch"); + auto tabular_q_learning_solver_lambda00 = QLearningSolver(game, 0); + auto tabular_q_learning_solver_lambda01 = QLearningSolver(game, 0.1); + + for (int seed = 0; seed < 100; seed++) { + int lambda_00_train_iter = 0; + int lambda_01_train_iter = 0; + double lambda_00_reward = -1.0; + double lambda_01_reward = -1.0; + + while (lambda_00_reward == -1.0) { + tabular_q_learning_solver_lambda00->RunIteration(); + std::unique_ptr state = game->NewInitialState(); + lambda_00_reward = PlayCatch( + tabular_q_learning_solver_lambda00->GetQValueTable(), state, seed); + lambda_00_train_iter++; + } + while (lambda_01_reward == -1.0) { + tabular_q_learning_solver_lambda01->RunIteration(); + std::unique_ptr state = game->NewInitialState(); + lambda_01_reward = PlayCatch( + tabular_q_learning_solver_lambda01->GetQValueTable(), state, seed); + lambda_01_train_iter++; + } + SPIEL_CHECK_GE(lambda_00_train_iter, lambda_01_train_iter); + } +} + +void TabularQLearningTest_TicTacToe_Lambda01_Win() { + std::shared_ptr game = open_spiel::LoadGame("tic_tac_toe"); + auto tabular_q_learning_solver = QLearningSolver(game, 0.1); + + for (int i = 1; i < 100; i++) { + tabular_q_learning_solver->RunIteration(); + } + + const absl::flat_hash_map, double>& q_values = + tabular_q_learning_solver->GetQValueTable(); + std::unique_ptr state = game->NewInitialState(); + + while (!state->IsTerminal()) { + Action random_action = GetRandomAction(state, 42); + state->ApplyAction(random_action); // player 0 + if (random_action == kInvalidAction) break; + state->ApplyAction(GetOptimalAction(q_values, state)); // player 1 + } + + SPIEL_CHECK_EQ(state->Rewards()[0], -1); +} + +void TabularQLearningTest_TicTacToe_Lambda01_Tie() { + std::shared_ptr game = open_spiel::LoadGame("tic_tac_toe"); + auto tabular_q_learning_solver = QLearningSolver(game, 0.1); + + for (int i = 1; i < 1000; i++) { + tabular_q_learning_solver->RunIteration(); + } + + const absl::flat_hash_map, double>& q_values = + tabular_q_learning_solver->GetQValueTable(); + std::unique_ptr state = game->NewInitialState(); + + while (!state->IsTerminal()) { + state->ApplyAction(GetOptimalAction(q_values, state)); + } + + SPIEL_CHECK_EQ(state->Rewards()[0], 0); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::TabularQLearningTest_Catch_Lambda00_Loss(); + open_spiel::TabularQLearningTest_Catch_Lambda00_Win(); + open_spiel::TabularQLearningTest_Catch_Lambda01_Win(); + open_spiel::TabularQLearningTest_Catch_Lambda01FasterThanLambda00(); + open_spiel::TabularQLearningTest_TicTacToe_Lambda01_Win(); + open_spiel::TabularQLearningTest_TicTacToe_Lambda01_Tie(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_sarsa.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_sarsa.cc new file mode 100644 index 0000000..ece8eae --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_sarsa.cc @@ -0,0 +1,193 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/tabular_sarsa.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/random/random.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { + +using std::vector; + +Action TabularSarsaSolver::GetBestAction(const State& state, + double min_utility) { + vector legal_actions = state.LegalActions(); + SPIEL_CHECK_GT(legal_actions.size(), 0); + const auto state_str = state.ToString(); + + Action best_action = legal_actions[0]; + double value = min_utility; + for (const Action& action : legal_actions) { + double q_val = values_[{state_str, action}]; + if (q_val >= value) { + value = q_val; + best_action = action; + } + } + return best_action; +} + +Action TabularSarsaSolver::SampleActionFromEpsilonGreedyPolicy( + const State& state, double min_utility) { + vector legal_actions = state.LegalActions(); + if (legal_actions.empty()) { + return kInvalidAction; + } + + if (absl::Uniform(rng_, 0.0, 1.0) < epsilon_) { + // Choose a random action + return legal_actions[absl::Uniform(rng_, 0, legal_actions.size())]; + } + // Choose the best action + return GetBestAction(state, min_utility); +} + +void TabularSarsaSolver::SampleUntilNextStateOrTerminal(State* state) { + // Repeatedly sample while chance node, so that we end up at a decision node + while (state->IsChanceNode() && !state->IsTerminal()) { + std::vector> outcomes = state->ChanceOutcomes(); + state->ApplyAction(SampleAction(outcomes, rng_).first); + } +} + +TabularSarsaSolver::TabularSarsaSolver(std::shared_ptr game) + : game_(game), + depth_limit_(kDefaultDepthLimit), + epsilon_(kDefaultEpsilon), + learning_rate_(kDefaultLearningRate), + discount_factor_(kDefaultDiscountFactor), + lambda_(kDefaultLambda) { + SPIEL_CHECK_LE(lambda_, 1); + SPIEL_CHECK_GE(lambda_, 0); + + // Currently only supports 1-player or 2-player zero sum games + SPIEL_CHECK_TRUE(game_->NumPlayers() == 1 || game_->NumPlayers() == 2); + if (game_->NumPlayers() == 2) { + SPIEL_CHECK_EQ(game_->GetType().utility, GameType::Utility::kZeroSum); + } + + // No support for simultaneous games (needs an LP solver). And so also must + // be a perfect information game. + SPIEL_CHECK_EQ(game_->GetType().dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(game_->GetType().information, + GameType::Information::kPerfectInformation); +} + +TabularSarsaSolver::TabularSarsaSolver(std::shared_ptr game, + double depth_limit, double epsilon, + double learning_rate, + double discount_factor, double lambda) + : game_(game), + depth_limit_(depth_limit), + epsilon_(epsilon), + learning_rate_(learning_rate), + discount_factor_(discount_factor), + lambda_(lambda) { + SPIEL_CHECK_LE(lambda_, 1); + SPIEL_CHECK_GE(lambda_, 0); + + // Currently only supports 1-player or 2-player zero sum games + SPIEL_CHECK_TRUE(game_->NumPlayers() == 1 || game_->NumPlayers() == 2); + if (game_->NumPlayers() == 2) { + SPIEL_CHECK_EQ(game_->GetType().utility, GameType::Utility::kZeroSum); + } + + // No support for simultaneous games (needs an LP solver). And so also must + // be a perfect information game. + SPIEL_CHECK_EQ(game_->GetType().dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(game_->GetType().information, + GameType::Information::kPerfectInformation); +} + +const absl::flat_hash_map, double>& +TabularSarsaSolver::GetQValueTable() const { + return values_; +} + +void TabularSarsaSolver::RunIteration() { + double min_utility = game_->MinUtility(); + // Choose start state + std::unique_ptr curr_state = game_->NewInitialState(); + SampleUntilNextStateOrTerminal(curr_state.get()); + + Player player = curr_state->CurrentPlayer(); + // Sample action from the state using an epsilon-greedy policy + Action curr_action = + SampleActionFromEpsilonGreedyPolicy(*curr_state, min_utility); + std::unique_ptr next_state; + SPIEL_CHECK_NE(curr_action, kInvalidAction); + + while (!curr_state->IsTerminal()) { + player = curr_state->CurrentPlayer(); + + next_state = curr_state->Child(curr_action); + SampleUntilNextStateOrTerminal(next_state.get()); + const double reward = next_state->Rewards()[player]; + + const Action next_action = + next_state->IsTerminal() + ? kInvalidAction + : SampleActionFromEpsilonGreedyPolicy(*next_state, min_utility); + + // Update the new q value + std::string key = curr_state->ToString(); + // Next q-value in perspective of player to play at curr_state (important + // note: exploits property of two-player zero-sum). Define the value of + // q(s', a') to be 0 if s' is terminal. + const double future_value = + next_state->IsTerminal() + ? 0 + : values_[{next_state->ToString(), next_action}]; + const double next_q_value = + (player != next_state->CurrentPlayer() ? -1 : 1) * future_value; + double new_q_value = reward + discount_factor_ * next_q_value; + + double prev_q_val = values_[{key, curr_action}]; + if (lambda_ == 0) { + // If lambda_ is equal to zero, run sarsa as usual. It's not necessary + // to update eligibility traces. + values_[{key, curr_action}] += + learning_rate_ * (new_q_value - prev_q_val); + } else { + double lambda = + player != next_state->CurrentPlayer() ? -lambda_ : lambda_; + eligibility_traces_[{key, curr_action}] += 1; + + for (const auto& q_cell : values_) { + std::string state = q_cell.first.first; + Action action = q_cell.first.second; + + values_[{state, action}] += learning_rate_ * + (new_q_value - prev_q_val) * + eligibility_traces_[{state, action}]; + eligibility_traces_[{state, action}] *= discount_factor_ * lambda; + } + } + + curr_state = std::move(next_state); + curr_action = next_action; + } +} +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_sarsa.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_sarsa.h new file mode 100644 index 0000000..033fa91 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_sarsa.h @@ -0,0 +1,94 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_TABULAR_SARSA_H_ +#define OPEN_SPIEL_ALGORITHMS_TABULAR_SARSA_H_ + +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/random/random.h" +#include "open_spiel/algorithms/get_all_states.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +// SARSA algorithm: solves for the optimal action value function of a game. +// It considers all states with depth at most depth_limit from the +// initial state (so if depth_limit is 0, only the root is considered). +// If depth limit is negative, all states are considered. +// +// Currently works for sequential 1-player or 2-player zero-sum games. +// +// Based on the implementation in Sutton and Barto, Intro to RL. Second Edition, +// 2018. Section 6.4. +// +// Includes implementation of SARSA(lambda) which can be found in +// Sutton and Barto, Intro to RL. Second Edition, 2018. Section 12.7. +// (E.g. https://www.andrew.cmu.edu/course/10-703/textbook/BartoSutton.pdf) +// Eligibility traces are implemented with the "accumulate" +// method (+1 at each iteration) instead of "replace" implementation +// (doesn't sum trace values). Parameter lambda_ determines the level +// of bootstraping. + +class TabularSarsaSolver { + static inline constexpr double kDefaultDepthLimit = -1; + static inline constexpr double kDefaultEpsilon = 0.1; + static inline constexpr double kDefaultLearningRate = 0.01; + static inline constexpr double kDefaultDiscountFactor = 0.99; + static inline constexpr double kDefaultLambda = 0; + + public: + TabularSarsaSolver(std::shared_ptr game); + + TabularSarsaSolver(std::shared_ptr game, double depth_limit, + double epsilon, double learning_rate, + double discount_factor, double lambda); + + void RunIteration(); + + const absl::flat_hash_map, double>& + GetQValueTable() const; + + private: + // Given a player and a state, gets the best possible action from this state + Action GetBestAction(const State& state, double min_utility); + + // Given a player and a state, gets the action, sampled from an epsilon-greedy + // policy + Action SampleActionFromEpsilonGreedyPolicy(const State& state, + double min_utility); + + // Moves a chance node to the next decision/terminal node by sampling from + // the legal actions repeatedly + void SampleUntilNextStateOrTerminal(State* state); + + std::shared_ptr game_; + int depth_limit_; + double epsilon_; + double learning_rate_; + double discount_factor_; + double lambda_; + std::mt19937 rng_; + absl::flat_hash_map, double> values_; + absl::flat_hash_map, double> + eligibility_traces_; +}; + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_TABULAR_SARSA_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_sarsa_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_sarsa_test.cc new file mode 100644 index 0000000..a40ff5f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tabular_sarsa_test.cc @@ -0,0 +1,229 @@ +// Copyright 2023 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/tabular_sarsa.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/random/random.h" +#include "open_spiel/games/catch/catch.h" + +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace { + +Action GetOptimalAction( + absl::flat_hash_map, double> q_values, + const std::unique_ptr &state) { + std::vector legal_actions = state->LegalActions(); + Action optimal_action = open_spiel::kInvalidAction; + + double value = -1; + const auto state_str = state->ToString(); + for (const Action &action : legal_actions) { + double q_val = q_values[{state_str, action}]; + if (q_val >= value) { + value = q_val; + optimal_action = action; + } + } + return optimal_action; +} + +Action GetRandomAction(const std::unique_ptr &state, int seed) { + std::vector legal_actions = state->LegalActions(); + if (legal_actions.empty()) { + return kInvalidAction; + } + std::mt19937 rng(seed); + return legal_actions[absl::Uniform(rng, 0, legal_actions.size())]; +} + +double PlayCatch( + absl::flat_hash_map, double> q_values, + const std::unique_ptr &state, double seed) { + // First action determines the starting column. Do the first action before the + // main loop, where the optimal action is chosen. + // Example: Initial state with random seed 42 + // ...o. + // ..... + // ..... + // ..... + // ..... + // ..... + // ..... + // ..... + // ..... + // ..x.. + std::mt19937 gen(seed); + std::uniform_int_distribution distribution(0, + catch_::kDefaultColumns - 1); + int ball_starting_column = distribution(gen); + state->ApplyAction(ball_starting_column); + + while (!state->IsTerminal()) { + Action optimal_action = GetOptimalAction(q_values, state); + state->ApplyAction(optimal_action); + } + + return state->Rewards()[0]; +} + +std::unique_ptr SarsaSolver( + std::shared_ptr game, double lambda) { + return std::make_unique( + /*game=*/game, + /*depth_limit=*/-1.0, + /*epsilon=*/0.1, + /*learning_rate=*/0.01, + /*discount_factor=*/0.99, + /*lambda=*/lambda); +} + +void TabularSarsaTest_Catch_Lambda00_Loss() { + // Classic SARSA. No bootstraping (lambda=0.0) + // Player loses after only 1 train iteration. + std::shared_ptr game = LoadGame("catch"); + auto tabular_sarsa_solver = SarsaSolver(game, 0); + + tabular_sarsa_solver->RunIteration(); + const absl::flat_hash_map, double> &q_values = + tabular_sarsa_solver->GetQValueTable(); + std::unique_ptr state = game->NewInitialState(); + + double reward = PlayCatch(q_values, state, 42); + SPIEL_CHECK_EQ(reward, -1); +} + +void TabularSarsaTest_Catch_Lambda00_Win() { + // Classic SARSA. No bootstraping (lambda=0.0) + // Player wins after 100 train iterations + std::shared_ptr game = LoadGame("catch"); + auto tabular_sarsa_solver = SarsaSolver(game, 0); + + for (int i = 1; i < 100; i++) { + tabular_sarsa_solver->RunIteration(); + } + const absl::flat_hash_map, double> &q_values = + tabular_sarsa_solver->GetQValueTable(); + std::unique_ptr state = game->NewInitialState(); + + double reward = PlayCatch(q_values, state, 42); + SPIEL_CHECK_EQ(reward, 1); +} + +void TabularSarsaTest_Catch_Lambda01_Win() { + // Player wins after 100 train iterations + std::shared_ptr game = LoadGame("catch"); + auto tabular_sarsa_solver = SarsaSolver(game, 0.1); + + for (int i = 1; i < 100; i++) { + tabular_sarsa_solver->RunIteration(); + } + const absl::flat_hash_map, double> &q_values = + tabular_sarsa_solver->GetQValueTable(); + std::unique_ptr state = game->NewInitialState(); + + double reward = PlayCatch(q_values, state, 42); + SPIEL_CHECK_EQ(reward, 1); +} + +void TabularSarsaTest_Catch_Lambda01FasterThanLambda00() { + // Eligibility traces (lambda > 0.0) always achieves victory with less + // training steps w.r.t. SARSA(lambda=0.0) + std::shared_ptr game = LoadGame("catch"); + auto tabular_sarsa_solve_lambda00 = SarsaSolver(game, 0); + auto tabular_sarsa_solve_lambda01 = SarsaSolver(game, 0.1); + + for (int seed = 0; seed < 100; seed++) { + int lambda_00_train_iter = 0; + int lambda_01_train_iter = 0; + double lambda_00_reward = -1.0; + double lambda_01_reward = -1.0; + + while (lambda_00_reward == -1.0) { + tabular_sarsa_solve_lambda00->RunIteration(); + std::unique_ptr state = game->NewInitialState(); + lambda_00_reward = PlayCatch( + tabular_sarsa_solve_lambda00->GetQValueTable(), state, seed); + lambda_00_train_iter++; + } + while (lambda_01_reward == -1.0) { + tabular_sarsa_solve_lambda01->RunIteration(); + std::unique_ptr state = game->NewInitialState(); + lambda_01_reward = PlayCatch( + tabular_sarsa_solve_lambda01->GetQValueTable(), state, seed); + lambda_01_train_iter++; + } + SPIEL_CHECK_GE(lambda_00_train_iter, lambda_01_train_iter); + } +} + +void TabularSarsaTest_TicTacToe_Lambda01_Win() { + std::shared_ptr game = open_spiel::LoadGame("tic_tac_toe"); + auto tabular_sarsa_solver = SarsaSolver(game, 0.1); + + for (int i = 1; i < 100; i++) { + tabular_sarsa_solver->RunIteration(); + } + + const absl::flat_hash_map, double> &q_values = + tabular_sarsa_solver->GetQValueTable(); + std::unique_ptr state = game->NewInitialState(); + + while (!state->IsTerminal()) { + Action random_action = GetRandomAction(state, 42); + state->ApplyAction(random_action); // player 0 + if (random_action == kInvalidAction) break; + state->ApplyAction(GetOptimalAction(q_values, state)); // player 1 + } + + SPIEL_CHECK_EQ(state->Rewards()[0], -1); +} + +void TabularSarsaTest_TicTacToe_Lambda01_Tie() { + std::shared_ptr game = open_spiel::LoadGame("tic_tac_toe"); + auto tabular_sarsa_solver = SarsaSolver(game, 0.1); + + for (int i = 1; i < 1000; i++) { + tabular_sarsa_solver->RunIteration(); + } + + const absl::flat_hash_map, double> &q_values = + tabular_sarsa_solver->GetQValueTable(); + std::unique_ptr state = game->NewInitialState(); + + while (!state->IsTerminal()) { + state->ApplyAction(GetOptimalAction(q_values, state)); + } + + SPIEL_CHECK_EQ(state->Rewards()[0], 0); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::TabularSarsaTest_Catch_Lambda00_Loss(); + open_spiel::TabularSarsaTest_Catch_Lambda00_Win(); + open_spiel::TabularSarsaTest_Catch_Lambda01_Win(); + open_spiel::TabularSarsaTest_Catch_Lambda01FasterThanLambda00(); + open_spiel::TabularSarsaTest_TicTacToe_Lambda01_Win(); + open_spiel::TabularSarsaTest_TicTacToe_Lambda01_Tie(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/tensor_game_utils.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tensor_game_utils.cc new file mode 100644 index 0000000..f3e7753 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tensor_game_utils.cc @@ -0,0 +1,94 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/tensor_game_utils.h" + +#include "open_spiel/algorithms/deterministic_policy.h" +#include "open_spiel/algorithms/expected_returns.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +using open_spiel::tensor_game::TensorGame; + +std::shared_ptr LoadTensorGame(const std::string& name) { + std::shared_ptr game = LoadGame(name); + // Make sure it is indeed a tensor game. + const TensorGame* tensor_game = dynamic_cast(game.get()); + if (tensor_game == nullptr) { + // If it is not already a tensor game, check if it is an NFG. + // If so, convert it. + const NormalFormGame* nfg = dynamic_cast(game.get()); + if (nfg != nullptr) { + return AsTensorGame(nfg); + } else { + SpielFatalError(absl::StrCat("Cannot load ", name, " as a tensor game.")); + } + } + return std::static_pointer_cast(game); +} + +std::shared_ptr AsTensorGame(const Game* game) { + const NormalFormGame* nfg = dynamic_cast(game); + SPIEL_CHECK_TRUE(nfg); + return AsTensorGame(nfg); +} + +std::shared_ptr AsTensorGame(const NormalFormGame* game) { + const int num_players = game->NumPlayers(); + std::unique_ptr initial_state = game->NewInitialState(); + std::vector> legal_actions(num_players); + std::vector> action_names(num_players); + for (Player player = 0; player < num_players; ++player) { + legal_actions[player] = initial_state->LegalActions(player); + for (const Action& action : legal_actions[player]) { + action_names[player].push_back( + initial_state->ActionToString(player, action)); + } + } + std::vector> utils(num_players); + + GameType type = game->GetType(); + type.min_num_players = num_players; + type.max_num_players = num_players; + + std::vector actions(num_players); + bool last_entry; + do { + std::unique_ptr clone = initial_state->Clone(); + clone->ApplyActions(actions); + SPIEL_CHECK_TRUE(clone->IsTerminal()); + std::vector returns = clone->Returns(); + SPIEL_CHECK_EQ(returns.size(), num_players); + for (Player player = 0; player < num_players; ++player) { + utils[player].push_back(returns[player]); + } + last_entry = true; + for (Player player = num_players - 1; player >= 0; --player) { + if (++actions[player] < legal_actions[player].size()) { + last_entry = false; + break; + } else { + actions[player] = 0; + } + } + } while (!last_entry); + + return std::shared_ptr( + new TensorGame(type, {}, action_names, utils)); +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/tensor_game_utils.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tensor_game_utils.h new file mode 100644 index 0000000..f0d903a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tensor_game_utils.h @@ -0,0 +1,43 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_TENSOR_GAMES_UTILS_H_ +#define OPEN_SPIEL_ALGORITHMS_TENSOR_GAMES_UTILS_H_ + +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/tensor_game.h" + +namespace open_spiel { +namespace algorithms { + +// Similar to open_spiel::LoadGame but returns specifically a tensor game type +// so that the subclass's specific methods are accessible. + +std::shared_ptr LoadTensorGame( + const std::string& name); + +// Clones a normal-form game and returns it as a TensorGame. + +std::shared_ptr AsTensorGame( + const NormalFormGame* game); + +std::shared_ptr AsTensorGame(const Game* game); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_TENSOR_GAME_UTILS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/tensor_game_utils_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tensor_game_utils_test.cc new file mode 100644 index 0000000..93c30e5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/tensor_game_utils_test.cc @@ -0,0 +1,42 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/tensor_game_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +void ConvertToTensorGameTest() { + std::shared_ptr blotto = LoadGame("blotto(players=3)"); + std::shared_ptr tensor_blotto = + AsTensorGame(blotto.get()); + SPIEL_CHECK_EQ(tensor_blotto->Shape()[0], 66); + SPIEL_CHECK_EQ(tensor_blotto->Shape()[1], 66); + SPIEL_CHECK_EQ(tensor_blotto->Shape()[2], 66); + std::cout << "Blotto 0,15,3 = " << tensor_blotto->ActionName(Player{0}, 0) + << " vs " << tensor_blotto->ActionName(Player{1}, 15) << " vs " + << tensor_blotto->ActionName(Player{2}, 3) << " -> utils: " + << tensor_blotto->PlayerUtility(Player{0}, {0, 15, 3}) << "," + << tensor_blotto->PlayerUtility(Player{1}, {0, 15, 3}) << "," + << tensor_blotto->PlayerUtility(Player{2}, {0, 15, 3}) << std::endl; +} + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::algorithms::ConvertToTensorGameTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/trajectories.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/trajectories.cc new file mode 100644 index 0000000..a7d2ce2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/trajectories.cc @@ -0,0 +1,234 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/trajectories.h" + +#include +#include // NOLINT +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { +std::string StateKey(const Game& game, const State& state, + Player player = kInvalidPlayer) { + if (game.GetType().provides_information_state_string) { + if (player == kInvalidPlayer) return state.InformationStateString(); + return state.InformationStateString(player); + } else if (game.GetType().provides_observation_string) { + if (player == kInvalidPlayer) return state.ObservationString(); + return state.ObservationString(player); + } + return state.ToString(); +} +} // namespace + +// Initializes a BatchedTrajectory of size [batch_size, T]. +BatchedTrajectory::BatchedTrajectory(int batch_size) : batch_size(batch_size) { + observations.resize(batch_size); + state_indices.resize(batch_size); + legal_actions.resize(batch_size); + actions.resize(batch_size); + player_policies.resize(batch_size); + player_ids.resize(batch_size); + rewards.resize(batch_size); + next_is_terminal.resize(batch_size); + valid.resize(batch_size); +} + +// Moves a trajectory of size [1, T] into the current trajectory at index. +void BatchedTrajectory::MoveTrajectory(int index, + BatchedTrajectory* trajectory) { + // The passed trajectory must have a batch size of 1. + SPIEL_CHECK_EQ(trajectory->batch_size, 1); + max_trajectory_length = + std::max(max_trajectory_length, trajectory->max_trajectory_length); + observations[index] = std::move(trajectory->observations[0]); + state_indices[index] = std::move(trajectory->state_indices[0]); + legal_actions[index] = std::move(trajectory->legal_actions[0]); + actions[index] = std::move(trajectory->actions[0]); + player_policies[index] = std::move(trajectory->player_policies[0]); + player_ids[index] = std::move(trajectory->player_ids[0]); + rewards[index] = trajectory->rewards[0]; + next_is_terminal[index] = std::move(trajectory->next_is_terminal[0]); + valid[index] = std::move(trajectory->valid[0]); +} + +// Pads fields to make sure that they're all the same shape, i.e. [B, T, N], +// where N = the size of each field. +void BatchedTrajectory::ResizeFields(int length) { + if (length > 0) { + SPIEL_CHECK_GE(length, max_trajectory_length); + // We adjust max_trajectory_length as it's no longer correct. + max_trajectory_length = length; + } + // Only works for batches with at least one trajectory as otherwise we can't + // infer the field size. + SPIEL_CHECK_GT(batch_size, 0); + // TODO(author1): Replace this with a multi-threaded version. + for (int i = 0; i < batch_size; ++i) { + // Each field has shape [B, T, field_size], where N is a parameter that is + // fixed for each (game, field) pair. We thus have to get the size of N from + // the existing vectors. + if (!observations[0].empty()) { + observations[i].resize(max_trajectory_length, + std::vector(observations[0][0].size(), 0)); + } + state_indices[i].resize(max_trajectory_length, 0); + legal_actions[i].resize(max_trajectory_length, + std::vector(legal_actions[0][0].size(), 1)); + + // Actions has shape [B, T, 1] + actions[i].resize(max_trajectory_length, 0); + + // legal_actions has shape [B, T, num_distinct_actions], while + // player_policies[0][0].size() <= num_distinct_actions. + player_policies[i].resize( + max_trajectory_length, + std::vector(legal_actions[0][0].size(), 1)); + player_ids[i].resize(max_trajectory_length, 0); + next_is_terminal[i].resize(max_trajectory_length, false); + valid[i].resize(max_trajectory_length, false); + } +} + +BatchedTrajectory RecordBatchedTrajectory( + const Game& game, const std::vector& policies, + const State& initial_state, + const std::unordered_map& state_to_index, int batch_size, + bool include_full_observations, std::mt19937* rng_ptr, + int max_unroll_length) { + SPIEL_CHECK_GT(batch_size, 0); + if (state_to_index.empty()) SPIEL_CHECK_TRUE(include_full_observations); + BatchedTrajectory batched_trajectory(batch_size); + // TODO(author1): Replace this with a multi-threaded version. + for (int i = 0; i < batch_size; ++i) { + BatchedTrajectory trajectory = + RecordTrajectory(game, policies, initial_state, state_to_index, + include_full_observations, rng_ptr); + SPIEL_CHECK_FALSE(trajectory.rewards[0].empty()); + batched_trajectory.MoveTrajectory(i, &trajectory); + } + batched_trajectory.ResizeFields(max_unroll_length); + return batched_trajectory; +} + +BatchedTrajectory RecordTrajectory( + const Game& game, const std::vector& policies, + const State& initial_state, + const std::unordered_map& state_to_index, + bool include_full_observations, std::mt19937* rng) { + if (state_to_index.empty()) SPIEL_CHECK_TRUE(include_full_observations); + BatchedTrajectory trajectory(/*batch_size=*/1); + std::unique_ptr state = initial_state.Clone(); + bool find_index = !state_to_index.empty(); + while (!state->IsTerminal()) { + Action action = kInvalidAction; + if (state->IsChanceNode()) { + action = open_spiel::SampleAction( + state->ChanceOutcomes(), + std::uniform_real_distribution(0.0, 1.0)(*rng)) + .first; + } else if (state->IsSimultaneousNode()) { + open_spiel::SpielFatalError( + "We do not support games with simultaneous actions."); + } else { + // Then we're at a decision node. + trajectory.legal_actions[0].push_back(state->LegalActionsMask()); + if (find_index) { + auto it = state_to_index.find(StateKey(game, *state)); + SPIEL_CHECK_TRUE(it != state_to_index.end()); + trajectory.state_indices[0].push_back(it->second); + } else { + trajectory.observations[0].push_back(state->InformationStateTensor()); + } + ActionsAndProbs policy = + policies.at(state->CurrentPlayer()) + .GetStatePolicy(state->InformationStateString()); + if (policy.size() > state->LegalActions().size()) { + std::string policy_str = ""; + for (const auto& item : policy) { + absl::StrAppend(&policy_str, "(", item.first, ",", item.second, ") "); + } + SpielFatalError(absl::StrCat( + "There are more actions than legal actions from ", + typeid(policies.at(state->CurrentPlayer())).name(), + "\n Legal actions are: ", absl::StrJoin(state->LegalActions(), " "), + " \n Available probabilities were:", policy_str)); + } + std::vector probs(game.NumDistinctActions(), 0.); + for (const std::pair& pair : policy) { + probs[pair.first] = pair.second; + } + trajectory.player_policies[0].push_back(probs); + trajectory.player_ids[0].push_back(state->CurrentPlayer()); + action = SampleAction(policy, *rng).first; + trajectory.actions[0].push_back(action); + } + SPIEL_CHECK_NE(action, kInvalidAction); + state->ApplyAction(action); + } + trajectory.valid[0] = std::vector(trajectory.actions[0].size(), true); + trajectory.rewards[0] = state->Returns(); + trajectory.next_is_terminal[0].resize(trajectory.actions[0].size(), false); + trajectory.next_is_terminal[0][trajectory.next_is_terminal[0].size() - 1] = + true; + + // We arbitrarily set max_trajectory_length based on the actions field. All + // the fields should have the same length. + trajectory.max_trajectory_length = trajectory.actions[0].size(); + return trajectory; +} + +BatchedTrajectory RecordBatchedTrajectory( + const Game& game, const std::vector& policies, + const std::unordered_map& state_to_index, int batch_size, + bool include_full_observations, std::mt19937* rng_ptr, + int max_unroll_length) { + if (state_to_index.empty()) SPIEL_CHECK_TRUE(include_full_observations); + std::unique_ptr state = game.NewInitialState(); + return RecordBatchedTrajectory(game, policies, *state, state_to_index, + batch_size, include_full_observations, rng_ptr, + max_unroll_length); +} + +BatchedTrajectory RecordBatchedTrajectory( + const Game& game, const std::vector& policies, + const std::unordered_map& state_to_index, int batch_size, + bool include_full_observations, int seed, int max_unroll_length) { + std::mt19937 rng(seed); + return RecordBatchedTrajectory(game, policies, state_to_index, batch_size, + include_full_observations, &rng, + max_unroll_length); +} + +BatchedTrajectory RecordTrajectory( + const Game& game, const std::vector& policies, + const std::unordered_map& state_to_index, + bool include_full_observations, std::mt19937* rng_ptr) { + if (state_to_index.empty()) SPIEL_CHECK_TRUE(include_full_observations); + std::unique_ptr state = game.NewInitialState(); + return RecordTrajectory(game, policies, *state, state_to_index, + include_full_observations, rng_ptr); +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/trajectories.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/trajectories.h new file mode 100644 index 0000000..30391b2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/trajectories.h @@ -0,0 +1,146 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_TRAJECTORIES_H_ +#define OPEN_SPIEL_ALGORITHMS_TRAJECTORIES_H_ + +#include + +#include +#include +#include +#include +#include + +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { + +// The content of a trajectory. The idea is to represent a batch of trajectories +// of shape [B, T]. Each trajectory will be padded to have the same length, +// which is equal to the length of the longest episode in the batch. +struct BatchedTrajectory { + // Initializes a BatchedTrajectory of size [batch_size, T]. + BatchedTrajectory(int batch_size); + + // Moves the trajectory fields into the current trajectory. + void MoveTrajectory(int index, BatchedTrajectory* trajectory); + + // Pads fields to make sure that they're all the same shape, i.e. [B, T, N], + // where N = the size of each field. If size is -1, i.e. the default, then + // we resize to the max trajectory length in the batch. + void ResizeFields(int length = -1); + + int batch_size; + + // Observations is an optional field that corresponds to the results of + // calling State::InformationStateTensor. Only one of observations + // and state_indices will be filled out for any given instance of + // BatchedTrajectory. + std::vector>> observations; + + // The indices corresponding to the viewed state. + std::vector> state_indices; + + // Stores the result of open_spiel::State::LegalActionMask. + std::vector>> legal_actions; + std::vector> actions; + std::vector>> player_policies; + std::vector> player_ids; + + // This is a tensor of shape [B, T], where rewards[b][n] is the terminal + // reward for episode b for player n. + std::vector> rewards; + + // Tensor of shape [B, T]. valid[b][n] is true if actions[b][n] was actually + // taken during a rollout, and false if it is just padding. + std::vector> valid; + + // This is false everywhere except for the last state of the trajectory. + std::vector> next_is_terminal; + uint64_t max_trajectory_length = 0; +}; + +// If include_full_observations is true, then we record the result of +// open_spiel::State::InformationStateTensor(); otherwise, we store +// the index (taken from state_to_index). +BatchedTrajectory RecordTrajectory( + const Game& game, const std::vector& policies, + const State& initial_state, + const std::unordered_map& state_to_index, + bool include_full_observations, std::mt19937* rng_ptr); + +BatchedTrajectory RecordBatchedTrajectory( + const Game& game, const std::vector& policies, + const State& initial_state, + const std::unordered_map& state_to_index, int batch_size, + bool include_full_observations, std::mt19937* rng_ptr, + int max_unroll_length = -1); + +BatchedTrajectory RecordTrajectory( + const Game& game, const std::vector& policies, + const std::unordered_map& state_to_index, + bool include_full_observations, std::mt19937* rng_ptr); + +BatchedTrajectory RecordBatchedTrajectory( + const Game& game, const std::vector& policies, + const std::unordered_map& state_to_index, int batch_size, + bool include_full_observations, std::mt19937* rng_ptr, + int max_unroll_length = -1); + +BatchedTrajectory RecordBatchedTrajectory( + const Game& game, const std::vector& policies, + const std::unordered_map& state_to_index, int batch_size, + bool include_full_observations, int seed, int max_unroll_length = -1); + +// Stateful version of RecordTrajectory. There are several optimisations that +// this allows. Currently, the only optimisation is preventing making multiple +// copies of the state_to_index class. When state_to_index.empty() is false, +// then we default to setting the full observations field and not setting the +// state_indices field. +class TrajectoryRecorder { + public: + TrajectoryRecorder(const Game& game, + const std::unordered_map& state_to_index, + int seed) + : game_(game.shared_from_this()), + state_to_index_(state_to_index), + rng_(std::mt19937(seed)) {} + + BatchedTrajectory RecordBatch(const std::vector& policies, + int batch_size, int max_unroll_length) { + const bool include_full_observations = state_to_index_.empty(); + std::unique_ptr root = game_->NewInitialState(); + return RecordBatchedTrajectory(*game_, policies, *root, state_to_index_, + batch_size, include_full_observations, &rng_, + max_unroll_length); + } + + private: + std::shared_ptr game_; + + // Note: The key here depends on the game, and is implemented by the + // StateKey method. + std::unordered_map state_to_index_; + + std::mt19937 rng_; +}; + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_TRAJECTORIES_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/trajectories_test.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/trajectories_test.cc new file mode 100644 index 0000000..64e7f23 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/trajectories_test.cc @@ -0,0 +1,280 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/trajectories.h" + +#include + +#include "open_spiel/policy.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +constexpr int kBatchSize = 32; + +std::unordered_map GetStatesToIndices(const Game& game) { + std::unordered_map state_index; + std::vector> to_visit; + to_visit.push_back(game.NewInitialState()); + int index = 0; + while (!to_visit.empty()) { + std::unique_ptr state = std::move(to_visit.back()); + to_visit.pop_back(); + if (!state->IsChanceNode() && !state->IsTerminal()) { + state_index[state->InformationStateString()] = index; + } + ++index; + for (Action action : state->LegalActions()) { + to_visit.push_back(state->Child(action)); + } + } + return state_index; +} + +void RecordTrajectoryEveryFieldHasSameLength(const std::string& game_name) { + std::shared_ptr game = LoadGame(game_name); + std::unordered_map states_to_indices = + GetStatesToIndices(*game); + std::vector policies(2, GetUniformPolicy(*game)); + std::mt19937 rng; + BatchedTrajectory trajectory = + RecordTrajectory(*game, policies, states_to_indices, + /*include_full_observations=*/false, &rng); + int num_steps = trajectory.state_indices[0].size(); + SPIEL_CHECK_EQ(num_steps, trajectory.legal_actions[0].size()); + SPIEL_CHECK_EQ(num_steps, trajectory.actions[0].size()); + SPIEL_CHECK_EQ(num_steps, trajectory.player_policies[0].size()); + SPIEL_CHECK_EQ(num_steps, trajectory.player_ids[0].size()); + SPIEL_CHECK_EQ(num_steps, trajectory.next_is_terminal[0].size()); + SPIEL_CHECK_EQ(num_steps, trajectory.valid[0].size()); + SPIEL_CHECK_EQ(trajectory.rewards.size(), 1); +} + +void RecordTrajectoryLegalActionsIsCorrect(const std::string& game_name) { + std::shared_ptr game = LoadGame(game_name); + std::vector policies(2, GetUniformPolicy(*game)); + std::unordered_map states_to_indices = + GetStatesToIndices(*game); + std::mt19937 rng; + + BatchedTrajectory trajectory = + RecordTrajectory(*game, policies, states_to_indices, + /*include_full_observations=*/false, &rng); + std::unique_ptr state = game->NewInitialState(); + for (int i = 0; i < trajectory.actions[0].size(); ++i) { + while (state->IsChanceNode()) state->ApplyAction(state->LegalActions()[0]); + if (!state->IsTerminal() && !state->IsChanceNode()) { + SPIEL_CHECK_EQ(state->LegalActionsMask(), trajectory.legal_actions[0][i]); + } + state->ApplyAction(trajectory.actions[0][i]); + } +} + +void RecordTrajectoryNextIsTerminalIsCorrect(const std::string& game_name) { + std::shared_ptr game = LoadGame(game_name); + std::vector policies(2, GetUniformPolicy(*game)); + std::mt19937 rng; + std::unordered_map states_to_indices = + GetStatesToIndices(*game); + BatchedTrajectory trajectory = + RecordTrajectory(*game, policies, states_to_indices, + /*include_full_observations=*/false, &rng); + std::unique_ptr state = game->NewInitialState(); + for (int i = 0; i < trajectory.actions[0].size(); ++i) { + while (state->IsChanceNode()) state->ApplyAction(state->LegalActions()[0]); + state->ApplyAction(trajectory.actions[0][i]); + SPIEL_CHECK_EQ(state->IsTerminal(), trajectory.next_is_terminal[0][i]); + } +} + +void RecordTrajectoryPlayerIdsIsCorrect(const std::string& game_name) { + std::shared_ptr game = LoadGame(game_name); + std::vector policies(2, GetUniformPolicy(*game)); + std::mt19937 rng; + std::unordered_map states_to_indices = + GetStatesToIndices(*game); + + BatchedTrajectory trajectory = + RecordTrajectory(*game, policies, states_to_indices, + /*include_full_observations=*/false, &rng); + std::unique_ptr state = game->NewInitialState(); + for (int i = 0; i < trajectory.actions[0].size(); ++i) { + while (state->IsChanceNode()) state->ApplyAction(state->LegalActions()[0]); + if (!state->IsTerminal() && !state->IsChanceNode()) { + SPIEL_CHECK_EQ(trajectory.player_ids[0][i], state->CurrentPlayer()); + } + state->ApplyAction(trajectory.actions[0][i]); + } +} + +void RecordBatchedTrajectoryEveryFieldHasSameLength( + const std::string& game_name) { + std::shared_ptr game = LoadGame(game_name); + std::vector policies(2, GetUniformPolicy(*game)); + std::unordered_map states_to_indices = + GetStatesToIndices(*game); + std::mt19937 rng; + BatchedTrajectory trajectory = RecordBatchedTrajectory( + *game, policies, states_to_indices, kBatchSize, + /*include_full_observations=*/false, /*rng_ptr=*/&rng); + int batch_size = trajectory.batch_size; + SPIEL_CHECK_EQ(batch_size, trajectory.legal_actions.size()); + SPIEL_CHECK_EQ(batch_size, trajectory.actions.size()); + SPIEL_CHECK_EQ(batch_size, trajectory.player_policies.size()); + SPIEL_CHECK_EQ(batch_size, trajectory.player_ids.size()); + SPIEL_CHECK_EQ(batch_size, trajectory.next_is_terminal.size()); +} + +void RecordBatchedTrajectoryLegalActionsIsCorrect( + const std::string& game_name) { + std::shared_ptr game = LoadGame(game_name); + std::vector policies(2, GetUniformPolicy(*game)); + std::unordered_map states_to_indices = + GetStatesToIndices(*game); + std::mt19937 rng; + BatchedTrajectory trajectory = RecordBatchedTrajectory( + *game, policies, states_to_indices, kBatchSize, + /*include_full_observations=*/false, /*rng_ptr=*/&rng); + for (int t = 0; t < trajectory.batch_size; ++t) { + std::unique_ptr state = game->NewInitialState(); + for (int i = 0; i < trajectory.actions[t].size(); ++i) { + while (state->IsChanceNode()) { + state->ApplyAction(state->LegalActions()[0]); + } + if (!state->IsTerminal() && !state->IsChanceNode()) { + SPIEL_CHECK_EQ(state->LegalActionsMask(), + trajectory.legal_actions[t][i]); + } + state->ApplyAction(trajectory.actions[t][i]); + if (state->IsTerminal()) break; + } + } +} + +void RecordBatchedTrajectoryNextIsTerminalIsCorrect( + const std::string& game_name) { + std::shared_ptr game = LoadGame(game_name); + std::vector policies(2, GetUniformPolicy(*game)); + std::unordered_map states_to_indices = + GetStatesToIndices(*game); + std::mt19937 rng; + BatchedTrajectory trajectory = RecordBatchedTrajectory( + *game, policies, states_to_indices, kBatchSize, + /*include_full_observations=*/false, /*rng_ptr=*/&rng); + for (int t = 0; t < trajectory.batch_size; ++t) { + std::unique_ptr state = game->NewInitialState(); + for (int i = 0; i < trajectory.actions[t].size(); ++i) { + while (state->IsChanceNode()) { + state->ApplyAction(state->LegalActions()[0]); + } + state->ApplyAction(trajectory.actions[t][i]); + SPIEL_CHECK_EQ(state->IsTerminal(), trajectory.next_is_terminal[t][i]); + if (state->IsTerminal()) break; + } + } +} + +void RecordBatchedTrajectoryPlayerIdsIsCorrect(const std::string& game_name) { + std::shared_ptr game = LoadGame(game_name); + std::vector policies(2, GetUniformPolicy(*game)); + std::unordered_map states_to_indices = + GetStatesToIndices(*game); + std::mt19937 rng; + BatchedTrajectory trajectory = RecordBatchedTrajectory( + *game, policies, states_to_indices, kBatchSize, + /*include_full_observations=*/false, /*rng_ptr=*/&rng); + for (int t = 0; t < trajectory.batch_size; ++t) { + std::unique_ptr state = game->NewInitialState(); + for (int i = 0; i < trajectory.actions[t].size(); ++i) { + while (state->IsChanceNode()) + state->ApplyAction(state->LegalActions()[0]); + if (!state->IsTerminal() && !state->IsChanceNode()) { + SPIEL_CHECK_EQ(trajectory.player_ids[t][i], state->CurrentPlayer()); + } + state->ApplyAction(trajectory.actions[t][i]); + if (state->IsTerminal()) break; + } + } +} + +void BatchedTrajectoryResizesCorrectly(const std::string& game_name) { + std::shared_ptr game = LoadGame(game_name); + const std::vector policies(2, GetUniformPolicy(*game)); + std::unordered_map states_to_indices = + GetStatesToIndices(*game); + std::mt19937 rng; + BatchedTrajectory trajectory = RecordBatchedTrajectory( + *game, policies, states_to_indices, kBatchSize, + /*include_full_observations=*/false, /*rng_ptr=*/&rng); + for (int b = 0; b < trajectory.batch_size; ++b) { + SPIEL_CHECK_EQ(trajectory.valid[b].size(), trajectory.actions[b].size()); + } + trajectory.ResizeFields(game->MaxGameLength()); + SPIEL_CHECK_EQ(trajectory.batch_size, kBatchSize); + SPIEL_CHECK_EQ(trajectory.actions.size(), kBatchSize); + SPIEL_CHECK_EQ(trajectory.player_ids.size(), kBatchSize); + SPIEL_CHECK_EQ(trajectory.rewards.size(), kBatchSize); + SPIEL_CHECK_EQ(trajectory.legal_actions.size(), kBatchSize); + SPIEL_CHECK_EQ(trajectory.player_policies.size(), kBatchSize); + SPIEL_CHECK_EQ(trajectory.next_is_terminal.size(), kBatchSize); + SPIEL_CHECK_EQ(trajectory.valid.size(), kBatchSize); + for (int b = 0; b < trajectory.batch_size; ++b) { + SPIEL_CHECK_EQ(trajectory.actions[b].size(), + trajectory.max_trajectory_length); + SPIEL_CHECK_EQ(trajectory.valid[b].size(), + trajectory.max_trajectory_length); + SPIEL_CHECK_EQ(trajectory.player_ids[b].size(), + trajectory.max_trajectory_length); + SPIEL_CHECK_EQ(trajectory.next_is_terminal[b].size(), + trajectory.max_trajectory_length); + SPIEL_CHECK_EQ(trajectory.rewards[b].size(), game->NumPlayers()); + for (int t = 0; t < trajectory.max_trajectory_length; ++t) { + SPIEL_CHECK_EQ(trajectory.legal_actions[b][t].size(), + game->NumDistinctActions()); + + // We have to check for <= as some policies omit actions with zero + // probability. + SPIEL_CHECK_LE(trajectory.player_policies[b][t].size(), + game->NumDistinctActions()); + } + } +} + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +namespace alg = open_spiel::algorithms; +int main() { + // We test these games as they're all games that have implemented the + // necessary methods. tic_tac_toe, for instance, has not.it + for (const std::string& game_name : + {"kuhn_poker", "leduc_poker", "liars_dice"}) { + alg::RecordTrajectoryEveryFieldHasSameLength(game_name); + alg::RecordTrajectoryLegalActionsIsCorrect(game_name); + alg::RecordTrajectoryPlayerIdsIsCorrect(game_name); + alg::RecordTrajectoryNextIsTerminalIsCorrect(game_name); + alg::RecordTrajectoryEveryFieldHasSameLength(game_name); + alg::RecordTrajectoryLegalActionsIsCorrect(game_name); + alg::RecordTrajectoryPlayerIdsIsCorrect(game_name); + alg::RecordTrajectoryNextIsTerminalIsCorrect(game_name); + alg::RecordBatchedTrajectoryEveryFieldHasSameLength(game_name); + alg::RecordBatchedTrajectoryLegalActionsIsCorrect(game_name); + alg::RecordBatchedTrajectoryPlayerIdsIsCorrect(game_name); + alg::RecordBatchedTrajectoryNextIsTerminalIsCorrect(game_name); + alg::BatchedTrajectoryResizesCorrectly(game_name); + } +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/value_iteration.cc b/scenarios/bargaining/open_spiel/open_spiel/algorithms/value_iteration.cc new file mode 100644 index 0000000..c3bd007 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/value_iteration.cc @@ -0,0 +1,138 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/value_iteration.h" + +#include + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +using std::map; +using std::vector; +using state_pointer = std::unique_ptr; +using state_action = std::pair; +using state_prob = std::pair; + +// Adds transitions and transition probability from a given state +void AddTransition(map>* transitions, + std::string key, const state_pointer& state) { + for (auto action : state->LegalActions()) { + auto next_state = state->Clone(); + next_state->ApplyAction(action); + vector possibilities; + if (next_state->IsChanceNode()) { + // For a chance node, record the transition probabilities + for (const auto& actionprob : next_state->ChanceOutcomes()) { + auto realized_next_state = next_state->Clone(); + realized_next_state->ApplyAction(actionprob.first); + possibilities.emplace_back(realized_next_state->ToString(), + actionprob.second); + } + } else { + // A non-chance node is equivalent to transition with probability 1 + possibilities.emplace_back(next_state->ToString(), 1.0); + } + (*transitions)[std::make_pair(key, action)] = possibilities; + } +} + +// Initialize transition map and value map +void InitializeMaps(const map& states, + map* values, + map>* transitions) { + for (const auto& kv : states) { + auto key = kv.first; + if (kv.second->IsTerminal()) { + // For both 1-player and 2-player zero sum games, suffices to look at + // player 0's utility + (*values)[key] = kv.second->PlayerReturn(Player{0}); + } else { + (*values)[key] = 0; + AddTransition(transitions, key, kv.second); + } + } +} + +} // namespace + +std::map ValueIteration(const Game& game, int depth_limit, + double threshold) { + using state_action = std::pair; + using state_prob = std::pair; + + // Currently only supports 1-player or 2-player zero sum games + SPIEL_CHECK_TRUE(game.NumPlayers() == 1 || game.NumPlayers() == 2); + if (game.NumPlayers() == 2) { + SPIEL_CHECK_EQ(game.GetType().utility, GameType::Utility::kZeroSum); + } + + // No support for simultaneous games (needs an LP solver). And so also must + // be a perfect information game. + SPIEL_CHECK_EQ(game.GetType().dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(game.GetType().information, + GameType::Information::kPerfectInformation); + + auto states = GetAllStates(game, depth_limit, /*include_terminals=*/true, + /*include_chance_states=*/false, + /*stop_at_duplicates*/true); + std::map values; + std::map> transitions; + + InitializeMaps(states, &values, &transitions); + + double error; + double min_utility = game.MinUtility(); + double max_utility = game.MaxUtility(); + do { + error = 0; + for (const auto& kv : states) { + auto key = kv.first; + + if (kv.second->IsTerminal()) continue; + + auto player = kv.second->CurrentPlayer(); + + // Initialize value to be the minimum utility if current player + // is the maximizing player (i.e. player 0), and to maximum utility + // if current player is the minimizing player (i.e. player 1). + double value = (player == Player{0}) ? min_utility : max_utility; + for (auto action : kv.second->LegalActions()) { + auto possibilities = transitions[std::make_pair(key, action)]; + double q_value = 0; + for (const auto& outcome : possibilities) { + q_value += outcome.second * values[outcome.first]; + } + // Player 0 is maximizing the value (which is w.r.t. player 0) + // Player 1 is minimizing the value + if (player == Player{0}) + value = std::max(value, q_value); + else + value = std::min(value, q_value); + } + + double* stored_value = &values[key]; + error = std::max(std::abs(*stored_value - value), error); + *stored_value = value; + } + } while (error > threshold); + + return values; +} + +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/algorithms/value_iteration.h b/scenarios/bargaining/open_spiel/open_spiel/algorithms/value_iteration.h new file mode 100644 index 0000000..fd10482 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/algorithms/value_iteration.h @@ -0,0 +1,39 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ALGORITHMS_VALUE_ITERATION_H_ +#define OPEN_SPIEL_ALGORITHMS_VALUE_ITERATION_H_ + +#include "open_spiel/algorithms/get_all_states.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace algorithms { + +// Value iteration algorithm: solves for the optimal value function of a game. +// The value function is solved with maximum error less than threshold, +// and it considers all states with depth at most depth_limit from the +// initial state (so if depth_limit is 0, only the root is considered). +// If depth limit is negative, all states are considered. +// +// Currently works for sequential 1-player or 2-player zero-sum games, +// with or without chance nodes. + +std::map ValueIteration(const Game& game, int depth_limit, + double threshold); + +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_ALGORITHMS_VALUE_ITERATION_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/bots/CMakeLists.txt new file mode 100644 index 0000000..26708ec --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/CMakeLists.txt @@ -0,0 +1,33 @@ +set (BOT_SOURCES + gin_rummy/simple_gin_rummy_bot.cc + gin_rummy/simple_gin_rummy_bot.h + human/human_bot.cc + human/human_bot.h +) + +if (NOT WIN32) + # UCI bot not supported on Windows. + set (BOT_SOURCES ${BOT_SOURCES} + uci/uci_bot.cc + uci/uci_bot.h + ) +endif() + +add_library (bots OBJECT ${BOT_SOURCES}) + +add_subdirectory(gin_rummy) +add_subdirectory(human) + +if (NOT WIN32) + # UCI bot not supported on Windows. + add_subdirectory(uci) +endif() + +if (OPEN_SPIEL_BUILD_WITH_ROSHAMBO) + add_subdirectory(roshambo) +endif() + +if (OPEN_SPIEL_BUILD_WITH_XINXIN) + add_subdirectory(xinxin) +endif() + diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/gin_rummy/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/bots/gin_rummy/CMakeLists.txt new file mode 100644 index 0000000..e208a82 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/gin_rummy/CMakeLists.txt @@ -0,0 +1,6 @@ +add_executable (simple_gin_rummy_bot_example simple_gin_rummy_bot_example.cc + ${OPEN_SPIEL_OBJECTS}) + +add_executable (simple_gin_rummy_bot_test simple_gin_rummy_bot_test.cc + ${OPEN_SPIEL_OBJECTS} $) +add_test(simple_gin_rummy_bot_test simple_gin_rummy_bot_test) diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/gin_rummy/simple_gin_rummy_bot.cc b/scenarios/bargaining/open_spiel/open_spiel/bots/gin_rummy/simple_gin_rummy_bot.cc new file mode 100644 index 0000000..a6421e9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/gin_rummy/simple_gin_rummy_bot.cc @@ -0,0 +1,244 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/bots/gin_rummy/simple_gin_rummy_bot.h" + +#include +#include +#include + +#include "open_spiel/games/gin_rummy/gin_rummy.h" +#include "open_spiel/games/gin_rummy/gin_rummy_utils.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel::gin_rummy { + +SimpleGinRummyBot::SimpleGinRummyBot(GameParameters params, + const Player player_id) + : params_(std::move(params)), + player_id_(player_id), + hand_size_(params_["hand_size"].int_value()), + utils_(params_["num_ranks"].int_value(), params_["num_suits"].int_value(), + params_["hand_size"].int_value()) {} + +void SimpleGinRummyBot::Restart() { + knocked_ = false; + next_actions_ = {}; +} + +ActionsAndProbs SimpleGinRummyBot::GetPolicy(const State& state) { + ActionsAndProbs policy; + auto legal_actions = state.LegalActions(player_id_); + auto chosen_action = Step(state); + for (auto action : legal_actions) + policy.emplace_back(action, action == chosen_action ? 1.0 : 0.0); + return policy; +} + +std::pair SimpleGinRummyBot::StepWithPolicy( + const State& state) { + ActionsAndProbs policy; + auto legal_actions = state.LegalActions(player_id_); + auto chosen_action = Step(state); + for (auto action : legal_actions) + policy.emplace_back(action, action == chosen_action ? 1.0 : 0.0); + return {policy, chosen_action}; +} + +Action SimpleGinRummyBot::Step(const State& state) { + std::vector observation; + state.ObservationTensor(player_id_, &observation); + + std::vector hand; + std::vector layed_melds; + std::vector discard_pile; + absl::optional upcard = absl::nullopt; + int knock_card = 0; + int stock_size = 0; + + // Decode observation tensor. + int offset = 0; + SPIEL_CHECK_TRUE(observation[player_id_] == 1); + offset += kNumPlayers; + // Player hand. + if (player_id_ == 1) offset += kDefaultNumCards; + for (int i = 0; i < kDefaultNumCards; ++i) { + if (observation[offset + i] == 1) hand.push_back(i); + } + offset += kDefaultNumCards; + if (player_id_ == 0) offset += kDefaultNumCards; + // Current player. + SPIEL_CHECK_EQ(observation[offset + player_id_], 1); + offset += kNumPlayers; + // Knock card. + for (int i = 0; i < kDefaultKnockCard; ++i) { + if (observation[offset + i] == 1) knock_card += 1; + } + offset += kDefaultKnockCard; + // Upcard. + for (int i = 0; i < kDefaultNumCards; ++i) { + if (observation[offset + i] == 1) upcard = i; + } + offset += kDefaultNumCards; + // Discard pile. + for (int i = 0; i < kDefaultNumCards; ++i) { + if (observation[offset + i] == 1) discard_pile.push_back(i); + } + offset += kDefaultNumCards; + // Stock size. + for (int i = 0; i < kDefaultNumCards; ++i) { + if (observation[offset + i] == 1) stock_size += 1; + } + offset += kDefaultNumCards; + // Layed melds. Player 0 looks at player 1's layed melds and vice versa. + if (player_id_ == 0) offset += kNumMeldActions; + for (int i = 0; i < kNumMeldActions; ++i) { + if (observation[offset + i] == 1) { + layed_melds.push_back(i); + knocked_ = true; + } + } // Completed decoding observation. + + auto legal_actions = state.LegalActions(player_id_); + // Next actions must be legal, in order from back to front. + if (!next_actions_.empty()) { + Action action = next_actions_.back(); + if (std::find(legal_actions.begin(), legal_actions.end(), action) == + legal_actions.end()) { + std::cerr << "Game state:" << std::endl; + std::cerr << state.ToString() << std::endl; + std::cerr << "Legal actions: " << legal_actions << std::endl; + std::cerr << "Bot next actions: " << next_actions_ << std::endl; + SpielFatalError("Previously determined next action is illegal."); + } + next_actions_.pop_back(); + return action; + } + + // When knocking, bot decides how to lay the hand all at once and saves the + // corresponding meld actions in next_actions_. + if (knocked_) { + if (!layed_melds.empty()) { + // Opponent knocked. + next_actions_.push_back(kPassAction); // Bot never lays off. + for (int meld_id : GetMelds(hand)) { + next_actions_.push_back(kMeldActionBase + meld_id); + } + next_actions_.push_back(kPassAction); + } else { + next_actions_.push_back(kPassAction); + std::vector melds_to_lay = GetMelds(hand); + for (int meld_id : melds_to_lay) { + next_actions_.push_back(kMeldActionBase + meld_id); + } + int best_discard = GetDiscard(hand); + next_actions_.push_back(best_discard); + } + Action action = next_actions_.back(); + SPIEL_CHECK_TRUE(std::find(legal_actions.begin(), + legal_actions.end(), action) != legal_actions.end()); + next_actions_.pop_back(); + return action; + } else if (!upcard.has_value()) { + // MoveType kDiscard + if (hand.size() != hand_size_ + 1) { + std::cerr << "Game state:" << std::endl; + std::cerr << state.ToString() << std::endl; + std::cerr << "Bot hand:" << std::endl; + std::cerr << utils_.HandToString(hand); + SpielFatalError("Discarding with an insufficient number of cards."); + } + int deadwood = utils_.MinDeadwood(hand); + if (deadwood <= knock_card && !knocked_) { + knocked_ = true; + return kKnockAction; + } else { + int best_discard = GetDiscard(hand); + if (best_discard >= 0) { + return best_discard; + } else { + return legal_actions[0]; + } + } + } else { + // MoveType kDraw + if (stock_size == kWallStockSize) { + // Special rules apply when we've reached the wall. + if (legal_actions.back() == kKnockAction) { + knocked_ = true; + return kKnockAction; + } else { + return kPassAction; + } + } else if (utils_.MinDeadwood(hand, upcard) <= knock_card || + !absl::c_linear_search(GetBestDeadwood(hand, upcard), upcard)) { + // Draw upcard if doing so permits a knock, or if the upcard would not be + // in the "best" deadwood (=> upcard would be in a "best" meld). + return kDrawUpcardAction; + } else { + return legal_actions.back(); // Draw from stock or pass. + } + } +} + +// Returns the "best" deadwood, i.e. the cards that do not belong to one of the +// "best" melds. Here "best" means any meld group that achieves the lowest +// possible deadwood count for the given hand. In general this is non-unique. +std::vector SimpleGinRummyBot::GetBestDeadwood(std::vector hand, + const absl::optional card) const { + if (card.has_value()) hand.push_back(card.value()); + for (const auto& meld : utils_.BestMeldGroup(hand)) { + for (auto card : meld) { + hand.erase(remove(hand.begin(), hand.end(), card), hand.end()); + } + } + return hand; +} + +int SimpleGinRummyBot::GetDiscard(const std::vector &hand) const { + std::vector deadwood = GetBestDeadwood(hand); + if (!deadwood.empty()) { + std::sort(deadwood.begin(), deadwood.end(), + RankComparator(kDefaultNumRanks)); + return deadwood.back(); + } else { + // 11 card gin. All cards are melded so there is no deadwood to throw from. + // Must be careful to throw a card from a meld that does not break up that + // meld. E.g. consider an 11 card gin containing the meld As2s3s4s. With a + // knock card of 10, all of these cards are legal discards following a + // knock, but only the As and 4s preserve gin. + for (int i = 0; i < hand.size(); ++i) { + std::vector hand_copy = hand; + hand_copy.erase(hand_copy.begin() + i); + if (utils_.MinDeadwood(hand_copy) == 0) + return hand[i]; + } + SpielFatalError("11 card gin error."); + } +} + +std::vector SimpleGinRummyBot::GetMelds(std::vector hand) const { + if (hand.size() == hand_size_ + 1 && utils_.MinDeadwood(hand) == 0) { + // 11 card gin. Must select discard that preserves gin. See GetDiscard(). + hand.erase(remove(hand.begin(), hand.end(), GetDiscard(hand)), hand.end()); + } + std::vector rv; + for (const auto& meld : utils_.BestMeldGroup(hand)) { + rv.push_back(utils_.meld_to_int.at(meld)); + } + return rv; +} + +} // namespace open_spiel::gin_rummy diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/gin_rummy/simple_gin_rummy_bot.h b/scenarios/bargaining/open_spiel/open_spiel/bots/gin_rummy/simple_gin_rummy_bot.h new file mode 100644 index 0000000..95511ab --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/gin_rummy/simple_gin_rummy_bot.h @@ -0,0 +1,100 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_BOTS_GIN_RUMMY_SIMPLE_GIN_RUMMY_BOT_H_ +#define OPEN_SPIEL_BOTS_GIN_RUMMY_SIMPLE_GIN_RUMMY_BOT_H_ + +// This bot plays about the simplest possible strategy that performs reasonably +// well and effectively explores the game tree. It's useful both as a test +// of the game implementation and as a benchmark of playing strength. +// +// The strategy can be summarized as follows: +// +// If phase == kDraw: +// Draw the upcard under either of the following two conditions, otherwise +// draw from the stock: +// 1) If doing so allows for an immediate knock. +// 2) If the upcard belongs to a meld, and that meld lowers the deadwood +// count of the hand. The second part of this condition in relevant in +// the following example where we would not want to pick up the Js even +// though it makes three jacks, because it breaks up a better meld +// thereby increasing the total deadwood count. +// +// Upcard: Js +// +--------------------------+ +// | | +// |Ac2c3c4c | +// | 9dTdJdQd | +// | 3h Jh | +// +--------------------------+ +// +// If phase == kDiscard: +// Always knock if legal, otherwise throw the deadwood card worth the most +// points, with ties being broken arbitrarily. +// +// If phase == kKnock: +// When laying the hand, the meld arrangement is chosen that minimizes the +// total deadwood count. If two different meld arrangements are equal in this +// regard, one is chosen arbitrarily. No layoffs are made if opponent knocks. + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/games/gin_rummy/gin_rummy_utils.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace gin_rummy { + +class SimpleGinRummyBot : public Bot { + public: + SimpleGinRummyBot(GameParameters params, Player player_id); + + void Restart() override; + Action Step(const State& state) override; + + bool ProvidesPolicy() override { return true; } + std::pair StepWithPolicy( + const State& state) override; + ActionsAndProbs GetPolicy(const State& state) override; + + bool IsClonable() const override { return true; } + std::unique_ptr Clone() override { + return std::make_unique(*this); + } + SimpleGinRummyBot(const SimpleGinRummyBot& other) = default; + + private: + GameParameters params_; + const Player player_id_; + const int hand_size_; + const GinRummyUtils utils_; + + bool knocked_ = false; + std::vector next_actions_; + + std::vector GetBestDeadwood( + std::vector hand, absl::optional card = absl::nullopt) const; + int GetDiscard(const std::vector& hand) const; + std::vector GetMelds(std::vector hand) const; +}; + +} // namespace gin_rummy +} // namespace open_spiel + +#endif // OPEN_SPIEL_BOTS_GIN_RUMMY_SIMPLE_GIN_RUMMY_BOT_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/gin_rummy/simple_gin_rummy_bot_example.cc b/scenarios/bargaining/open_spiel/open_spiel/bots/gin_rummy/simple_gin_rummy_bot_example.cc new file mode 100644 index 0000000..6153345 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/gin_rummy/simple_gin_rummy_bot_example.cc @@ -0,0 +1,191 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/time/clock.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/bots/gin_rummy/simple_gin_rummy_bot.h" +#include "open_spiel/games/gin_rummy/gin_rummy.h" +#include "open_spiel/games/gin_rummy/gin_rummy_utils.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/spiel_bots.h" + +ABSL_FLAG(std::string, game_string, "gin_rummy", + "Short name plus optional params."); +ABSL_FLAG(std::string, player0, "simple_gin_rummy_bot", + "Who controls player 0."); +ABSL_FLAG(std::string, player1, "random", "Who controls player 1."); +ABSL_FLAG(int, num_games, 1, "How many games to play."); +ABSL_FLAG(uint_fast32_t, seed, 0, "Seed for rng."); +ABSL_FLAG(bool, verbose, false, "Log gameplay."); +ABSL_FLAG(bool, show_legals, false, "Sets verbose=true & shows legal actions."); +ABSL_FLAG(bool, log_histories, false, "Log action histories."); +ABSL_FLAG(bool, log_returns, false, "Log returns."); +ABSL_FLAG(bool, log_reach_probs, false, "Log reach probabilities."); +ABSL_FLAG(std::string, path, "/tmp/gin_rummy_logs.txt", + "Where to output the logs."); + +uint_fast32_t Seed() { + uint_fast32_t seed = absl::GetFlag(FLAGS_seed); + return seed != 0 ? seed : absl::ToUnixMicros(absl::Now()); +} + +std::unique_ptr InitBot( + std::string type, const open_spiel::Game& game, open_spiel::Player player) { + if (type == "random") { + return open_spiel::MakeUniformRandomBot(player, Seed()); + } + if (type == "simple_gin_rummy_bot") { + return std::make_unique( + game.GetParameters(), player); + } + open_spiel::SpielFatalError( + "Bad player type. Known types: simple_gin_rummy_bot, random"); +} + +std::vector PlayGame(const open_spiel::Game& game, + const std::vector>& bots, + std::mt19937* rng, std::ostream& os, bool verbose, bool show_legals, + bool log_histories, bool log_returns, bool log_reach_probs) { + std::unique_ptr state = game.NewInitialState(); + for (open_spiel::Player p = 0; p < open_spiel::gin_rummy::kNumPlayers; ++p) + bots[p]->Restart(); + std::vector players_reach(2, 1.0); + double chance_reach = 1.0; + + while (!state->IsTerminal()) { + open_spiel::Player player = state->CurrentPlayer(); + + if (verbose) os << "Player turn: " << player << std::endl; + if (show_legals) { + os << "Legal moves for player " << player << ":" << std::endl; + for (open_spiel::Action action : state->LegalActions(player)) + os << " " << state->ActionToString(player, action) << std::endl; + } + + open_spiel::Action action; + if (state->IsChanceNode()) { + std::pair outcome_and_prob = + open_spiel::SampleAction(state->ChanceOutcomes(), *rng); + action = outcome_and_prob.first; + SPIEL_CHECK_PROB(outcome_and_prob.second); + SPIEL_CHECK_GT(outcome_and_prob.second, 0); + SPIEL_CHECK_PROB(chance_reach); + chance_reach *= outcome_and_prob.second; + if (verbose) { + os << "Sampled action: " << state->ActionToString(player, action) + << std::endl; + } + } else { + std::pair outcome_and_prob = + open_spiel::SampleAction(bots[player]->GetPolicy(*state), *rng); + action = outcome_and_prob.first; + SPIEL_CHECK_PROB(outcome_and_prob.second); + SPIEL_CHECK_GT(outcome_and_prob.second, 0); + SPIEL_CHECK_PROB(players_reach[player]); + players_reach[player] *= outcome_and_prob.second; + if (verbose) { + os << "Chose action: " << state->ActionToString(player, action) + << std::endl; + } + } + if (!absl::c_binary_search(state->LegalActions(), action)) { + std::cerr << "State: " << std::endl << state->ToString() << std::endl + << "History: " << absl::StrJoin(state->History(), " ") + << std::endl << "Legal actions: " + << absl::StrJoin(state->LegalActions(), " ") << std::endl; + open_spiel::SpielFatalError("Illegal bot action."); + } + state->ApplyAction(action); + if (verbose) os << "State: " << std::endl << state->ToString() << std::endl; + } + if (verbose) { + os << "Returns: " << absl::StrJoin(state->Returns(), ",") << std::endl + << "History: " << absl::StrJoin(state->History(), " ") << std::endl; + } else if (log_histories) { + os << absl::StrJoin(state->History(), " ") << std::endl; + } else if (log_returns) { + os << absl::StrJoin(state->Returns(), " ") << " "; + if (log_reach_probs) { + os << absl::StrJoin(players_reach, " ") << " " << chance_reach; + } + os << std::endl; + } + return state->Returns(); +} + +int main(int argc, char** argv) { + std::vector positional_args = absl::ParseCommandLine(argc, argv); + std::mt19937 rng(Seed()); + + std::string game_string = absl::GetFlag(FLAGS_game_string); + std::cout << "Game string: " << game_string << std::endl; + std::shared_ptr game = + open_spiel::LoadGame(game_string); + + std::vector> bots; + bots.push_back(InitBot(absl::GetFlag(FLAGS_player0), *game, 0)); + bots.push_back(InitBot(absl::GetFlag(FLAGS_player1), *game, 1)); + + int num_games = absl::GetFlag(FLAGS_num_games); + bool show_legals = absl::GetFlag(FLAGS_show_legals); + bool verbose = absl::GetFlag(FLAGS_verbose) || show_legals; + bool log_histories = absl::GetFlag(FLAGS_log_histories); + bool log_returns = absl::GetFlag(FLAGS_log_returns); + bool log_reach_probs = absl::GetFlag(FLAGS_log_reach_probs); + std::string path = absl::GetFlag(FLAGS_path); + + std::ofstream os(path); + std::vector overall_returns(2, 0); + std::vector overall_wins(2, 0); + int percent = 0; + int refresh_threshold = 0; + absl::Time start = absl::Now(); + for (int game_num = 0; game_num < num_games; ++game_num) { + percent = (100 * (game_num + 1)) / num_games; + if (percent >= refresh_threshold) { + // Progress bar. + std::cout << "\r" << "[" << std::string(percent / 5, '=') + << std::string(100 / 5 - percent / 5, ' ') << "]" << percent + << "%" << " [Game " << game_num + 1 << " of " << num_games + << "]"; + std::cout.flush(); + ++refresh_threshold; + } + std::vector returns = PlayGame(*game, bots, &rng, os, verbose, + show_legals, log_histories, log_returns, log_reach_probs); + for (int i = 0; i < returns.size(); ++i) { + double v = returns[i]; + overall_returns[i] += v; + if (v > 0) overall_wins[i] += 1; + } + } + absl::Time end = absl::Now(); + double seconds = absl::ToDoubleSeconds(end - start); + + std::cout << std::endl << "Number of games played: " << num_games << std::endl + << "Overall wins: " << absl::StrJoin(overall_wins, ",") << std::endl + << "Overall returns: " << absl::StrJoin(overall_returns, ",") + << std::endl << "Seconds: " << seconds << std::endl; + if (verbose || log_histories || log_returns) + std::cout << "Game histories logged to " << path << std::endl; +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/gin_rummy/simple_gin_rummy_bot_test.cc b/scenarios/bargaining/open_spiel/open_spiel/bots/gin_rummy/simple_gin_rummy_bot_test.cc new file mode 100644 index 0000000..2e9fc07 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/gin_rummy/simple_gin_rummy_bot_test.cc @@ -0,0 +1,63 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "open_spiel/bots/gin_rummy/simple_gin_rummy_bot.h" +#include "open_spiel/games/gin_rummy/gin_rummy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" + +namespace open_spiel { +namespace gin_rummy { +namespace { + +void SimpleGinRummyBotSelfPlayTest() { + const int num_games = 3; + std::mt19937 rng(time(nullptr)); + auto game = LoadGame("gin_rummy"); + std::vector> bots; + + for (Player p = 0; p < kNumPlayers; ++p) { + bots.push_back( + std::make_unique(game->GetParameters(), p)); + } + + for (int i = 0; i < num_games; i++) { + for (Player p = 0; p < gin_rummy::kNumPlayers; ++p) bots[p]->Restart(); + std::unique_ptr state = game->NewInitialState(); + while (!state->IsTerminal()) { + Player player = state->CurrentPlayer(); + Action action; + if (state->IsChanceNode()) { + ActionsAndProbs outcomes = state->ChanceOutcomes(); + action = SampleAction(outcomes, + std::uniform_real_distribution(0.0, 1.0)(rng)).first; + } else { + action = bots[player]->Step(*state); + } + state->ApplyAction(action); + } + } +} + +} // namespace +} // namespace gin_rummy +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::gin_rummy::SimpleGinRummyBotSelfPlayTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/human/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/bots/human/CMakeLists.txt new file mode 100644 index 0000000..77ff42e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/human/CMakeLists.txt @@ -0,0 +1,3 @@ +add_executable (human_bot_test human_bot_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(human_bot_test human_bot_test) diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/human/human_bot.cc b/scenarios/bargaining/open_spiel/open_spiel/bots/human/human_bot.cc new file mode 100644 index 0000000..e46ba9d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/human/human_bot.cc @@ -0,0 +1,137 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/bots/human/human_bot.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" + +namespace open_spiel { +namespace { + +const int kMaxWidth = 80; +const int kPadding = 2; + +void PrintColumns(const std::vector &strings) { + std::string padding_string(kPadding, ' '); + + int longest_string_length = 0; + for (const std::string &string : strings) { + if (string.length() > longest_string_length) { + longest_string_length = string.length(); + } + } + + int max_columns = (kMaxWidth - 1) / (longest_string_length + 2 * kPadding); + int rows = ceil((float)strings.size() / (float)max_columns); + int columns = ceil((float)strings.size() / (float)rows); + for (int row = 0; row < rows; ++row) { + for (int column = 0; column < columns; ++column) { + int index = row + column * rows; + if (index < strings.size()) { + std::cout << std::left << std::setw(longest_string_length + kPadding) + << padding_string << strings[index]; + } + } + std::cout << std::endl; + } +} + +} // namespace + +Action HumanBot::Step(const State &state) { + std::vector legal_actions = state.LegalActions(state.CurrentPlayer()); + + if (legal_actions.empty()) { + return kInvalidAction; + } + + absl::flat_hash_map action_map; + for (Action legal_action : legal_actions) { + action_map[state.ActionToString(legal_action)] = legal_action; + } + + while (true) { + Action action; + std::string action_string = ""; + + std::cout << "Choose an action (empty to print legal actions): "; + std::getline(std::cin, action_string); + + // Print the legal actions if no action is given. + if (action_string.empty()) { + std::cout << "Legal action(s):" << std::endl; + + std::vector legal_action_strings; + std::vector> sorted_action_map( + action_map.begin(), action_map.end()); + + std::sort(sorted_action_map.begin(), sorted_action_map.end(), + [](const auto &left, const auto &right) { + return left.first < right.first; + }); + + int longest_action_length = 0; + for (const Action &legal_action : legal_actions) { + int action_length = std::to_string(legal_action).length(); + if (action_length > longest_action_length) { + longest_action_length = action_length; + } + } + + for (const auto &string_action_pair : sorted_action_map) { + std::string action_string = string_action_pair.first; + std::string action_int_string = + std::to_string(string_action_pair.second); + std::string action_padding( + longest_action_length - action_int_string.length(), ' '); + legal_action_strings.push_back(absl::StrCat( + action_padding, action_int_string, ": ", action_string)); + } + PrintColumns(legal_action_strings); + continue; + } + + // Return the action if a valid string is given. + if (action_map.find(action_string) != action_map.end()) { + return action_map[action_string]; + } + + // Return the action if a valid integer is given. + bool parse_succeeded = absl::SimpleAtoi(action_string, &action); + if (!parse_succeeded) { + std::cout << "Could not parse the action: " << action_string << std::endl; + continue; + } + + for (Action legal_action : legal_actions) { + if (action == legal_action) { + return action; + } + } + + // The input was not valid. + std::cout << "Illegal action selected: " << action_string << std::endl; + } +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/human/human_bot.h b/scenarios/bargaining/open_spiel/open_spiel/bots/human/human_bot.h new file mode 100644 index 0000000..9391cdd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/human/human_bot.h @@ -0,0 +1,29 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_BOTS_HUMAN_HUMAN_BOT_H_ +#define OPEN_SPIEL_BOTS_HUMAN_HUMAN_BOT_H_ + +#include "open_spiel/spiel_bots.h" + +namespace open_spiel { + +class HumanBot : public Bot { + public: + Action Step(const State &state); +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_BOTS_HUMAN_HUMAN_BOT_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/human/human_bot_test.cc b/scenarios/bargaining/open_spiel/open_spiel/bots/human/human_bot_test.cc new file mode 100644 index 0000000..40d0b2b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/human/human_bot_test.cc @@ -0,0 +1,159 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/bots/human/human_bot.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { + +Action GetActionFromString(const State &state, + const std::string &action_string) { + for (Action action : state.LegalActions(state.CurrentPlayer())) { + if (action_string == state.ActionToString(action)) { + return action; + } + } + return kInvalidAction; +} + +Action StepHumanBotWithInputs(HumanBot &human_bot, + const std::vector &inputs, + const State &state) { + // Add a newline character to each input. + std::string human_bot_inputs = absl::StrJoin(inputs, "\n") + "\n"; + std::istringstream human_bot_input_stream(human_bot_inputs); + + // Allow the human bot to access the input through std::cin. + std::cin.rdbuf(human_bot_input_stream.rdbuf()); + + return human_bot.Step(state); +} + +void EmptyActionTest() { + HumanBot human_bot; + std::shared_ptr game = LoadGame("tic_tac_toe"); + std::unique_ptr state = game->NewInitialState(); + std::string empty_action_string = ""; + std::string legal_action_string = "0"; + + // Have the human bot receive a empty action, then a legal action. + Action human_bot_action = StepHumanBotWithInputs( + human_bot, {empty_action_string, legal_action_string}, *state); + Action legal_action = kInvalidAction; + SPIEL_CHECK_TRUE(absl::SimpleAtoi(legal_action_string, &legal_action)); + SPIEL_CHECK_TRUE(human_bot_action == legal_action); +} + +void TerminalActionTest() { + HumanBot human_bot; + std::shared_ptr game = LoadGame("tic_tac_toe"); + std::unique_ptr state = game->NewInitialState(); + + // Apply actions to get a terminal state. + state->ApplyAction(0); + state->ApplyAction(3); + state->ApplyAction(1); + state->ApplyAction(4); + state->ApplyAction(2); + + // Ensure the human bot handles the terminal state case before trying to parse + // an action. + Action human_bot_action = StepHumanBotWithInputs(human_bot, {""}, *state); + + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_TRUE(human_bot_action == kInvalidAction); +} + +void LegalStringActionTest() { + HumanBot human_bot; + std::shared_ptr game = LoadGame("tic_tac_toe"); + std::unique_ptr state = game->NewInitialState(); + std::string legal_action_string = "x(0,0)"; + + // Have the human bot receive a legal string action. + Action human_bot_action = + StepHumanBotWithInputs(human_bot, {legal_action_string}, *state); + Action legal_action = GetActionFromString(*state, legal_action_string); + + SPIEL_CHECK_TRUE(human_bot_action == legal_action); +} + +void LegalIntActionTest() { + HumanBot human_bot; + std::shared_ptr game = LoadGame("tic_tac_toe"); + std::unique_ptr state = game->NewInitialState(); + std::string legal_action_string = "0"; + + // Have the human bot receive a legal integer action. + Action human_bot_action = + StepHumanBotWithInputs(human_bot, {legal_action_string}, *state); + Action legal_action = kInvalidAction; + SPIEL_CHECK_TRUE(absl::SimpleAtoi(legal_action_string, &legal_action)); + SPIEL_CHECK_TRUE(human_bot_action == legal_action); +} + +void IllegalStringActionTest() { + HumanBot human_bot; + std::shared_ptr game = LoadGame("tic_tac_toe"); + std::unique_ptr state = game->NewInitialState(); + std::string illegal_action_string = "illegal_action_string"; + std::string legal_action_string = "x(0,0)"; + + // Have the human bot first receive an illegal string action, then a legal + // string action. + Action human_bot_action = StepHumanBotWithInputs( + human_bot, {illegal_action_string, legal_action_string}, *state); + Action legal_action = GetActionFromString(*state, legal_action_string); + + SPIEL_CHECK_TRUE(human_bot_action == legal_action); +} + +void IllegalIntActionTest() { + HumanBot human_bot; + std::shared_ptr game = LoadGame("tic_tac_toe"); + std::unique_ptr state = game->NewInitialState(); + std::string illegal_action_string = "12345"; + std::string legal_action_string = "0"; + + // Have the human bot first receive an illegal integer action, then a legal + // integer action. + Action human_bot_action = StepHumanBotWithInputs( + human_bot, {illegal_action_string, legal_action_string}, *state); + Action legal_action = kInvalidAction; + SPIEL_CHECK_TRUE(absl::SimpleAtoi(legal_action_string, &legal_action)); + SPIEL_CHECK_TRUE(human_bot_action == legal_action); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::EmptyActionTest(); + open_spiel::TerminalActionTest(); + open_spiel::LegalStringActionTest(); + open_spiel::LegalIntActionTest(); + open_spiel::IllegalStringActionTest(); + open_spiel::IllegalIntActionTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/pimc_bot.cc b/scenarios/bargaining/open_spiel/open_spiel/bots/pimc_bot.cc new file mode 100644 index 0000000..ed9519e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/pimc_bot.cc @@ -0,0 +1,135 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/bots/pimc_bot.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/algorithms/maxn.h" +#include "open_spiel/algorithms/minimax.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { + +PIMCBot::PIMCBot( + std::function value_function, + Player player_id, uint32_t seed, int num_determinizations, int depth_limit) + : rng_(seed), + value_function_(value_function), + player_id_(player_id), + num_determinizations_(num_determinizations), + depth_limit_(depth_limit) {} + +Action PIMCBot::Step(const State& state) { + std::pair, Action> search_result = Search(state); + return search_result.second; +} + +std::pair PIMCBot::StepWithPolicy(const State& state) { + std::pair, Action> search_result = Search(state); + return {PolicyFromBestAction(state, search_result.second), + search_result.second}; +} + +ActionsAndProbs PIMCBot::GetPolicy(const State& state) { + std::pair, Action> search_result = Search(state); + return PolicyFromBestAction(state, search_result.second); +} + +ActionsAndProbs PIMCBot::PolicyFromBestAction(const State& state, + Action best_action) const { + ActionsAndProbs actions_and_probs; + for (Action action : state.LegalActions()) { + if (action == best_action) { + actions_and_probs.push_back({action, 1.0}); + } else { + actions_and_probs.push_back({action, 0.0}); + } + } + return actions_and_probs; +} + +std::pair, Action> PIMCBot::Search(const State& root_state) { + int num_determinizations = num_determinizations_; + + GameType type = root_state.GetGame()->GetType(); + if (type.information == GameType::Information::kPerfectInformation) { + num_determinizations = 1; + // TODO(author5): drop down to expectimax or alpha-beta if 2-player + } + + Player player = root_state.CurrentPlayer(); + std::vector legal_actions = root_state.LegalActions(); + const int num_legal_actions = legal_actions.size(); + std::vector counts(num_legal_actions, 0); + absl::flat_hash_map action_counts; + for (Action action : legal_actions) { + action_counts[action] = 0; + } + + auto rng_func = [this]() { + return absl::Uniform(this->rng_, 0.0, 1.0); + }; + + for (int i = 0; i < num_determinizations; ++i) { + std::unique_ptr state = nullptr; + + if (num_determinizations == 1) { + state = root_state.Clone(); + } else { + state = root_state.ResampleFromInfostate(player, rng_func); + } + + if (type.utility == GameType::Utility::kZeroSum && + type.chance_mode == GameType::ChanceMode::kDeterministic && + root_state.NumPlayers() == 2) { + // Special case for two-player zero-sum deterministic games: use + // alpha-beta. + std::pair search_result = algorithms::AlphaBetaSearch( + *state->GetGame(), state.get(), + [this, player](const State& state) { + return this->value_function_(state, player); + }, + depth_limit_, player, /*use_undo*/ false); + action_counts[search_result.second] += 1; + } else { + std::pair, Action> search_result = + algorithms::MaxNSearch(*state->GetGame(), state.get(), + value_function_, depth_limit_); + action_counts[search_result.second] += 1; + } + } + + Action best_action = kInvalidAction; + int highest_count = -1; + for (int aidx = 0; aidx < num_legal_actions; ++aidx) { + Action action = legal_actions[aidx]; + counts[aidx] = action_counts[action]; + if (counts[aidx] > highest_count) { + highest_count = counts[aidx]; + best_action = action; + } + } + + return {counts, best_action}; +} +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/pimc_bot.h b/scenarios/bargaining/open_spiel/open_spiel/bots/pimc_bot.h new file mode 100644 index 0000000..56b3c16 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/pimc_bot.h @@ -0,0 +1,61 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_BOTS_PIMC_BOT_H_ +#define OPEN_SPIEL_BOTS_PIMC_BOT_H_ + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/games/gin_rummy/gin_rummy_utils.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { + +class PIMCBot : public Bot { + public: + PIMCBot(std::function value_function, + Player player_id, uint32_t seed, int num_determinizations, + int depth_limit); + + Action Step(const State& state) override; + + bool ProvidesPolicy() override { return true; } + std::pair StepWithPolicy( + const State& state) override; + ActionsAndProbs GetPolicy(const State& state) override; + + bool IsClonable() const override { return false; } + + private: + ActionsAndProbs PolicyFromBestAction(const State& state, + Action best_action) const; + std::pair, Action> Search(const State& root_state); + + std::mt19937 rng_; + std::function value_function_; + const Player player_id_; + const int num_determinizations_; + const int depth_limit_; +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_BOTS_GIN_RUMMY_SIMPLE_GIN_RUMMY_BOT_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/pimc_bot_test.cc b/scenarios/bargaining/open_spiel/open_spiel/bots/pimc_bot_test.cc new file mode 100644 index 0000000..be7654c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/pimc_bot_test.cc @@ -0,0 +1,79 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/bots/pimc_bot.h" + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/games/hearts/hearts.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { + +constexpr uint32_t kSeed = 18713687; + +double hearts_value_function(const State& state, Player p) { + const auto& hearts_state = + open_spiel::down_cast(state); + return hearts::kTotalPositivePoints - hearts_state.Points(p); +} + +void SimpleSelfPlayTest() { + const int num_games = 3; + std::mt19937 rng(time(nullptr)); + auto game = LoadGame("hearts"); + std::vector> bots; + const int num_players = game->NumPlayers(); + + for (Player p = 0; p < num_players; ++p) { + bots.push_back( + std::make_unique(hearts_value_function, p, kSeed + p, 10, 2)); + } + + for (int i = 0; i < num_games; i++) { + int turn = 0; + std::unique_ptr state = game->NewInitialState(); + while (!state->IsTerminal()) { + turn += 1; + std::cout << "Game " << i << ", turn " << turn << std::endl; + std::cout << "State:" << std::endl << state->ToString() << std::endl; + Player player = state->CurrentPlayer(); + Action action; + if (state->IsChanceNode()) { + ActionsAndProbs outcomes = state->ChanceOutcomes(); + action = SampleAction(outcomes, std::uniform_real_distribution( + 0.0, 1.0)(rng)) + .first; + } else { + action = bots[player]->Step(*state); + } + std::cout << "Chose action: " << state->ActionToString(action) + << std::endl; + state->ApplyAction(action); + } + } +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::SimpleSelfPlayTest(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/roshambo/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/bots/roshambo/CMakeLists.txt new file mode 100644 index 0000000..a4e38ec --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/roshambo/CMakeLists.txt @@ -0,0 +1,58 @@ +add_library(roshambo OBJECT + roshambo/BotClasses/actr_lag2_decay.h + roshambo/BotClasses/adddriftbot2.h + roshambo/BotClasses/addshiftbot3.h + roshambo/BotClasses/antiflatbot.h + roshambo/BotClasses/antirotnbot.h + roshambo/BotClasses/biopic.h + roshambo/BotClasses/boom.h + roshambo/BotClasses/copybot.h + roshambo/BotClasses/debruijn81.h + roshambo/BotClasses/driftbot.h + roshambo/BotClasses/flatbot3.h + roshambo/BotClasses/foxtrotbot.h + roshambo/BotClasses/freqbot.h + roshambo/BotClasses/granite.h + roshambo/BotClasses/greenberg.h + roshambo/BotClasses/halbot.h + roshambo/BotClasses/inocencio.h + roshambo/BotClasses/iocainebot.h + roshambo/BotClasses/marble.h + roshambo/BotClasses/markov5.h + roshambo/BotClasses/mixed_strategy.h + roshambo/BotClasses/mod1bot.h + roshambo/BotClasses/multibot.cc + roshambo/BotClasses/multibot.h + roshambo/BotClasses/peterbot.h + roshambo/BotClasses/phasenbott.cc + roshambo/BotClasses/phasenbott.h + roshambo/BotClasses/pibot.h + roshambo/BotClasses/piedra.h + roshambo/BotClasses/predbot.h + roshambo/BotClasses/r226bot.h + roshambo/BotClasses/randbot.h + roshambo/BotClasses/robertot.h + roshambo/BotClasses/rockbot.h + roshambo/BotClasses/rotatebot.h + roshambo/BotClasses/rsb_bot.h + roshambo/BotClasses/russrocker4.h + roshambo/BotClasses/shofar.cc + roshambo/BotClasses/shofar.h + roshambo/BotClasses/suncrazybot.h + roshambo/BotClasses/sunnervebot.h + roshambo/BotClasses/sweetrock.h + roshambo/BotClasses/switchalot.h + roshambo/BotClasses/switchbot.h + roshambo/BotClasses/textbot.h + roshambo/BotClasses/zqmove.h + roshambo/bot_map.cc + roshambo/bot_map.h + roshambo_bot.cc + roshambo_bot.h +) +target_compile_options(roshambo PUBLIC -w) +target_include_directories(roshambo PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +add_executable(roshambo_bot_test roshambo_bot_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(roshambo_bot_test roshambo_bot_test) diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/roshambo/README.md b/scenarios/bargaining/open_spiel/open_spiel/bots/roshambo/README.md new file mode 100644 index 0000000..50754c2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/roshambo/README.md @@ -0,0 +1,16 @@ +# RoShamBo Bots + +Bots from the International RoShamBo Programming Competition, available as an +optional dependency. See `open_spiel/scripts/global_variables.sh` to configure +conditional dependencies and enable this. + +The competition was held in 1999 and 2000 by Darse Billings at the University of +Alberta. The player pool was seeded with dummy bots, requiring competitive +entrants to exploit sub-optimal play while avoiding getting exploited by other +sophisticated bots. For more information, and all code used in the first +competition, see https://webdocs.cs.ualberta.ca/~darse/rsbpc.html. + +No additional code from the second competition was officially released, but +Andrzej Nagorko, author of the winning entrant Greenberg, independently released +the bot’s source code http://www.mathpuzzle.com/older.htm. Greenberg is also +included as an OpenSpiel bot. diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/roshambo/roshambo_bot.cc b/scenarios/bargaining/open_spiel/open_spiel/bots/roshambo/roshambo_bot.cc new file mode 100644 index 0000000..9da9f03 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/roshambo/roshambo_bot.cc @@ -0,0 +1,61 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/bots/roshambo/roshambo_bot.h" + +namespace open_spiel { +namespace roshambo { + +using ::roshambo_tournament::bot_map; + +RoshamboBot::RoshamboBot(Player player_id, std::string bot_name, int num_throws) + : player_id_(player_id), opponent_id_(1 - player_id), bot_name_(bot_name) { + if (auto bot_it = bot_map.find(bot_name); bot_it == bot_map.end()) { + SpielFatalError("Invalid bot name!"); + } else { + bot_ = bot_it->second(num_throws); + } +} + +Action RoshamboBot::Step(const State& state) { + // Every step must synchronize histories between the OpenSpiel wrapper + // bot and the RoShamBo bot. + std::vector history = state.History(); + if (history.empty()) { + SPIEL_CHECK_EQ(bot_->CurrentMatchLength(), 0); + } else { + const int throw_num = history.size() / 2; + SPIEL_CHECK_EQ(bot_->CurrentMatchLength() + 1, throw_num); + bot_->RecordTrial(history[((throw_num - 1) * 2) + player_id_], + history[((throw_num - 1) * 2) + opponent_id_]); + } + return bot_->GetAction(); +} + +std::unique_ptr MakeRoshamboBot(int player_id, std::string bot_name, + int num_throws) { + return std::make_unique(player_id, bot_name, num_throws); +} + +std::vector RoshamboBotNames() { + std::vector names; + names.reserve(bot_map.size()); + for (const auto& iter : bot_map) { + names.push_back(iter.first); + } + return names; +} + +} // namespace roshambo +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/roshambo/roshambo_bot.h b/scenarios/bargaining/open_spiel/open_spiel/bots/roshambo/roshambo_bot.h new file mode 100644 index 0000000..c6db56d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/roshambo/roshambo_bot.h @@ -0,0 +1,75 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_BOTS_ROSHAMBO_ROSHAMBO_BOT_H_ +#define OPEN_SPIEL_BOTS_ROSHAMBO_ROSHAMBO_BOT_H_ + +// Bots from the International Roshambo Programming Competition. +// https://webdocs.cs.ualberta.ca/~darse/rsbpc.html +// This OpenSpiel bot provides an interface to all of the bots in the 1999 +// competition, as well as Greenberg, the winner of the 2000 competition, +// written by Andrzej Nagorko. +// http://www.mathpuzzle.com/older.htm +// http://www.mathpuzzle.com/greenberg.c + +#include +#include +#include +#include + +#include "open_spiel/spiel_bots.h" +#include "open_spiel/bots/roshambo/roshambo/BotClasses/rsb_bot.h" +#include "open_spiel/bots/roshambo/roshambo/bot_map.h" + +namespace open_spiel { +namespace roshambo { + +using roshambo_tournament::RSBBot; + +// The underlying C code requires that the number of throws in a game be +// specified at compile time. Changing it requires modifying the file +// rsb-ts1-modified.c. Set the constant 'trials' on line 42 to the desired +// number of throws. Then set kNumThrows below to the same number, and rebuild +// OpenSpiel by running the script build_and_run_tests.sh. + +// Note that in his discussion of the results of the first competition, Darse +// Billings observed that match length was not particularly important: "The +// results were remarkably robust, and increasing the match length to 10000 +// turns or decreasing it to 400 turns had a negligible effect." +// https://webdocs.cs.ualberta.ca/~darse/rsb-results1.html +inline constexpr int kNumThrows = RSBBot::kCompetitionMatchLength; +inline constexpr int kNumBots = 43; + +class RoshamboBot : public Bot { + public: + explicit RoshamboBot(int player_id, std::string bot_name, + int num_throws = kNumThrows); + Action Step(const State& state) override; + void Restart() override { bot_->Reset(); } + + private: + Player player_id_; + Player opponent_id_; + std::string bot_name_; + std::unique_ptr bot_; +}; + +std::unique_ptr MakeRoshamboBot(int player_id, std::string bot_name, + int num_throws = kNumThrows); +std::vector RoshamboBotNames(); + +} // namespace roshambo +} // namespace open_spiel + +#endif // OPEN_SPIEL_BOTS_ROSHAMBO_ROSHAMBO_BOT_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/roshambo/roshambo_bot_test.cc b/scenarios/bargaining/open_spiel/open_spiel/bots/roshambo/roshambo_bot_test.cc new file mode 100644 index 0000000..5c9f489 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/roshambo/roshambo_bot_test.cc @@ -0,0 +1,108 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/bots/roshambo/roshambo_bot.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/time/clock.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/game_transforms/repeated_game.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" +#include "open_spiel/bots/roshambo/roshambo/bot_map.h" + +namespace open_spiel { +namespace { + +uint_fast32_t Seed() { return absl::ToUnixMicros(absl::Now()); } + +void MakeAllRoshamboBots() { + std::vector> bots; + for (const auto& [name, factory] : ::roshambo_tournament::bot_map) { + bots.push_back(roshambo::MakeRoshamboBot(0, name)); + } + SPIEL_CHECK_EQ(bots.size(), roshambo::kNumBots); +} + +// This matchup is deterministic and both bots utilize the match history so +// we can test that the bots are perceiving the game correctly. +void RoshamboBotHistoryTest() { + GameParameters params; + params["num_repetitions"] = GameParameter(roshambo::kNumThrows); + std::shared_ptr game = CreateRepeatedGame("matrix_rps", params); + + std::vector> bots; + bots.push_back(roshambo::MakeRoshamboBot(0, "rotatebot")); + bots.push_back(roshambo::MakeRoshamboBot(1, "copybot")); + std::unique_ptr state = game->NewInitialState(); + + const int num_players = bots.size(); + std::vector joint_actions(bots.size()); + for (int i = 0; i < roshambo::kNumThrows; ++i) { + for (Player p = 0; p < num_players; ++p) + joint_actions[p] = bots[p]->Step(*state); + state->ApplyActions(joint_actions); + if (i == 0) { + // Copybot wins the first round. + SPIEL_CHECK_EQ(state->PlayerReturn(0), -1); + SPIEL_CHECK_EQ(state->PlayerReturn(1), 1); + } else { + // All subsequent rounds are draws. + SPIEL_CHECK_EQ(state->PlayerReward(0), 0); + SPIEL_CHECK_EQ(state->PlayerReward(1), 0); + } + } + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReturn(0), -1); + SPIEL_CHECK_EQ(state->PlayerReturn(1), 1); +} + +// Matchup between 1999 and 2000 tournament winners! +void RoshamboBotBasicPlayGame() { + int num_games = 5; + std::mt19937 rng(Seed()); + GameParameters params; + params["num_repetitions"] = GameParameter(roshambo::kNumThrows); + std::shared_ptr game = CreateRepeatedGame("matrix_rps", params); + std::vector> bots; + + bots.push_back(roshambo::MakeRoshamboBot(0, "greenberg")); + bots.push_back(roshambo::MakeRoshamboBot(1, "iocainebot")); + + for (int i = 0; i < num_games; i++) { + // Set seed for the underlying C code + srandom(Seed()); + std::unique_ptr state = game->NewInitialState(); + + const int num_players = bots.size(); + std::vector joint_actions(bots.size()); + for (Player p = 0; p < num_players; ++p) bots[p]->Restart(); + while (!state->IsTerminal()) { + for (Player p = 0; p < num_players; ++p) + joint_actions[p] = bots[p]->Step(*state); + state->ApplyActions(joint_actions); + } + } +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::MakeAllRoshamboBots(); + open_spiel::RoshamboBotHistoryTest(); + open_spiel::RoshamboBotBasicPlayGame(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/uci/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/bots/uci/CMakeLists.txt new file mode 100644 index 0000000..5ce42fa --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/uci/CMakeLists.txt @@ -0,0 +1,9 @@ + +add_executable(random_uci_bot random_uci_bot.cc ${OPEN_SPIEL_OBJECTS} + $) + +add_executable(uci_bot_test uci_bot_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(NAME uci_bot_test + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/uci_bot_test + --binary ${CMAKE_CURRENT_BINARY_DIR}/random_uci_bot) diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/uci/random_uci_bot.cc b/scenarios/bargaining/open_spiel/open_spiel/bots/uci/random_uci_bot.cc new file mode 100644 index 0000000..c295da0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/uci/random_uci_bot.cc @@ -0,0 +1,111 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/strings/match.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/games/chess/chess.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/init.h" + +ABSL_FLAG(int, seed, 0, "The seed to use."); + +namespace open_spiel { +namespace uci { + +void RandomUciBot() { + int seed = absl::GetFlag(FLAGS_seed); + std::mt19937 rng(seed); + std::unique_ptr state; + chess::ChessState* chess_state = nullptr; + std::shared_ptr game = LoadGame("chess"); + + for (std::string line; std::getline(std::cin, line);) { + if (line == "uci") { + std::cout << "uciok" << std::endl; + } else if (line == "isready") { + std::cout << "readyok" << std::endl; + } else if (line == "ucinewgame") { + state = game->NewInitialState(); + chess_state = down_cast(state.get()); + } else if (absl::StartsWith(line, "position fen ")) { + // This command has following syntax: + // position fen moves + std::vector tokens = absl::StrSplit(line, ' '); + // Build up the which can contain spaces. + std::stringstream fen; + int pos = 2; + bool has_moves = false; + while (pos < tokens.size()) { + if (tokens[pos] == "moves") { + has_moves = true; + ++pos; + break; + } + if (pos > 2) fen << ' '; + fen << tokens[pos]; + ++pos; + } + + state = game->NewInitialState(fen.str()); + chess_state = down_cast(state.get()); + + if (has_moves) { + while (pos < tokens.size()) { + Action action = chess_state->ParseMoveToAction(tokens[pos]); + state->ApplyAction(action); + ++pos; + } + } + // Bot should return a move given all types of go commands + } else if (absl::StartsWith(line, "go movetime") || + absl::StartsWith(line, "go depth") || + absl::StartsWith(line, "go nodes") || + absl::StartsWith(line, "go mate")) { + std::cout << "info string Random uci bot uci info statistics may not be " + "accurate.\n"; + std::vector legal_actions = state->LegalActions(); + int index = absl::Uniform(rng, 0, legal_actions.size()); + Action action = legal_actions[index]; + chess::Move move = ActionToMove(action, chess_state->Board()); + std::cout << "info depth 1 seldepth 1 multipv 1 nodes 1 nps 1000 " + "hashfull 0 tbhits 0 time 1 pv " + << move.ToLAN() << "\n"; + std::cout << "bestmove " << move.ToLAN() << std::endl; + } else if (line == "quit") { + return; + } else { + std::cout << "Unrecognized command: " << line << std::endl; + } + } +} + +} // namespace uci +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::Init("", &argc, &argv, false); + absl::ParseCommandLine(argc, argv); + open_spiel::uci::RandomUciBot(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/uci/uci_bot.cc b/scenarios/bargaining/open_spiel/open_spiel/bots/uci/uci_bot.cc new file mode 100644 index 0000000..55cb31e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/uci/uci_bot.cc @@ -0,0 +1,346 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/bots/uci/uci_bot.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/ascii.h" +#include "open_spiel/abseil-cpp/absl/strings/match.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/games/chess/chess.h" +#include "open_spiel/games/chess/chess_board.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/file.h" + +namespace open_spiel { +namespace uci { + +UCIBot::UCIBot(const std::string& bot_binary_path, int search_limit_value, + bool ponder, const Options& options, + SearchLimitType search_limit_type, + bool use_game_history_for_position) + : ponder_(ponder), + use_game_history_for_position_(use_game_history_for_position) { + SPIEL_CHECK_GT(search_limit_value, 0); + SPIEL_CHECK_GT(bot_binary_path.size(), 0); + search_limit_type_ = search_limit_type; + search_limit_value_ = search_limit_value; + if (search_limit_type_ == SearchLimitType::kMoveTime) { + search_limit_string_ = "movetime " + std::to_string(search_limit_value_); + } else if (search_limit_type_ == SearchLimitType::kNodes) { + search_limit_string_ = "nodes " + std::to_string(search_limit_value_); + } else if (search_limit_type_ == SearchLimitType::kDepth) { + search_limit_string_ = "depth " + std::to_string(search_limit_value_); + } else { + SpielFatalError("Unsupported search limit type"); + } + + StartProcess(bot_binary_path); + Uci(); + for (auto const& [name, value] : options) { + SetOption(name, value); + } + IsReady(); + UciNewGame(); +} + +UCIBot::~UCIBot() { + Quit(); + int status; + while (waitpid(pid_, &status, 0) == -1) { + // Do nothing. + } + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + std::cerr << "Uci sub-process failed" << std::endl; + } + + // Close the input stream + fclose(input_stream_); + // Free the input stream buffer allocated in ReadLine + free(input_stream_buffer_); + // Close the output pipe + close(output_fd_); +} + +void UCIBot::PositionFromState(const chess::ChessState& state, + const std::vector& extra_moves) { + if (use_game_history_for_position_) { + std::pair> fen_and_moves = + state.ExtractFenAndMaybeMoves(); + fen_and_moves.second.insert(fen_and_moves.second.end(), + extra_moves.begin(), extra_moves.end()); + Position(fen_and_moves.first, fen_and_moves.second); + } else { + Position(state.Board().ToFEN(), extra_moves); + } +} + +Action UCIBot::Step(const State& state) { return StepVerbose(state).first; } + +std::pair UCIBot::StepVerbose(const State& state) { + std::string move_str; + std::string info_str; // Contains the last info string from the bot. + auto chess_state = down_cast(state); + auto chess_game = down_cast(state.GetGame().get()); + if (ponder_ && ponder_move_) { + if (!was_ponder_hit_) { + Stop(); + PositionFromState(chess_state); + tie(move_str, ponder_move_) = Go(&info_str); + } else { + tie(move_str, ponder_move_) = ReadBestMove(&info_str); + } + } else { + PositionFromState(chess_state); + tie(move_str, ponder_move_) = Go(&info_str); + } + was_ponder_hit_ = false; + auto move = chess_state.Board().ParseLANMove(move_str, + chess_game->IsChess960()); + if (!move) { + SpielFatalError("Uci sub-process returned an illegal or invalid move"); + } + + if (ponder_ && ponder_move_) { + PositionFromState(chess_state, {move_str, *ponder_move_}); + GoPonder(); + } + + Action action = chess::MoveToAction(*move, chess_state.BoardSize()); + return {action, info_str}; +} + +void UCIBot::Restart() { + ponder_move_ = absl::nullopt; + was_ponder_hit_ = false; + UciNewGame(); +} + +void UCIBot::RestartAt(const State& state) { + ponder_move_ = absl::nullopt; + was_ponder_hit_ = false; + auto chess_state = down_cast(state); + PositionFromState(chess_state); +} + +void UCIBot::InformAction(const State& state, Player player_id, Action action) { + auto chess_state = down_cast(state); + auto chess_game = down_cast(state.GetGame().get()); + chess::Move move = chess::ActionToMove(action, chess_state.Board()); + std::string move_str = move.ToLAN(chess_game->IsChess960(), + &chess_state.Board()); + if (ponder_ && move_str == ponder_move_) { + PonderHit(); + was_ponder_hit_ = true; + } +} + +void UCIBot::StartProcess(const std::string& bot_binary_path) { + int output_pipe[2]; + int input_pipe[2]; + + if (pipe(output_pipe) || pipe(input_pipe)) { + SpielFatalError("Creating pipes failed"); + } + + pid_ = fork(); + if (pid_ < 0) { + SpielFatalError("Forking failed"); + } + + if (pid_ > 0) { // parent + close(output_pipe[0]); + close(input_pipe[1]); + + output_fd_ = output_pipe[1]; + input_stream_ = fdopen(input_pipe[0], "r"); + if (input_stream_ == nullptr) { + SpielFatalError("Opening the UCI input pipe as a file stream failed"); + } + + } else { // child + dup2(output_pipe[0], STDIN_FILENO); + dup2(input_pipe[1], STDOUT_FILENO); + dup2(input_pipe[1], STDERR_FILENO); + + close(output_pipe[1]); + close(input_pipe[0]); + + std::string real_binary_path = open_spiel::file::RealPath(bot_binary_path); + execlp(real_binary_path.c_str(), real_binary_path.c_str(), (char*)nullptr); + // See /usr/include/asm-generic/errno-base.h for error codes. + switch (errno) { + case ENOENT: + SpielFatalError( + absl::StrCat("Executing uci bot sub-process failed: file '", + real_binary_path, "' not found.")); + default: + SpielFatalError(absl::StrCat( + "Executing uci bot sub-process failed: Error ", errno)); + } + } +} + +void UCIBot::Uci() { + Write("uci"); + while (true) { + std::string response = ReadLine(); + if (!response.empty()) { + if (absl::StartsWith(response, "id") || + absl::StartsWith(response, "option")) { + continue; // Don't print options and ids + } + if (absl::StrContains(response, "uciok")) { + return; + } else { + std::cerr << "Bot: " << response << std::endl; + } + } + } +} + +void UCIBot::SetOption(const std::string& name, const std::string& value) { + std::string msg = "setoption name " + name + " value " + value; + Write(msg); +} + +void UCIBot::UciNewGame() { Write("ucinewgame"); } + +void UCIBot::IsReady() { + Write("isready"); + while (true) { + std::string response = ReadLine(); + if (!response.empty()) { + if (absl::StrContains(response, "readyok")) { + return; + } else { + std::cerr << "Bot: " << response << std::endl; + } + } + } +} + +void UCIBot::Position(const std::string& fen, + const std::vector& moves) { + std::string msg = "position fen " + fen; + if (!moves.empty()) { + std::string moves_str = absl::StrJoin(moves, " "); + msg += " moves " + moves_str; + } + Write(msg); +} + +std::pair> UCIBot::Go( + absl::optional info_string) { + Write("go " + search_limit_string_); + return ReadBestMove(info_string); +} + +void UCIBot::GoPonder() { Write("go ponder " + search_limit_string_); } + +void UCIBot::PonderHit() { Write("ponderhit"); } + +std::pair> UCIBot::Stop() { + Write("stop"); + return ReadBestMove(); +} + +void UCIBot::Quit() { Write("quit"); } + +std::pair> UCIBot::ReadBestMove( + absl::optional info_string) { + while (true) { + // istringstream can't use a string_view so we need to copy to a string. + std::string response = ReadLine(); + // Save the most recent info string if requested. Specifying that the string + // contains the number of nodes makes sure that we don't save strings of the + // form "info depth 30 currmove c2c1 currmovenumber 22", we want the ones + // with metadata about the search. + if (info_string.has_value() && absl::StartsWith(response, "info") && + absl::StrContains(response, "nodes")) { + *info_string.value() = response; + } + std::istringstream response_line(response); + std::string token; + std::string move_str; + absl::optional ponder_str = absl::nullopt; + response_line >> std::skipws; + while (response_line >> token) { + if (token == "bestmove") { + response_line >> move_str; + } else if (token == "ponder") { + response_line >> token; + ponder_str = token; + } + } + if (!move_str.empty()) { + return std::make_pair(move_str, ponder_str); + } + } +} + +void UCIBot::Write(const std::string& msg) const { + if (write(output_fd_, (msg + "\n").c_str(), msg.size() + 1) != + msg.size() + 1) { + SpielFatalError("Sending a command to uci sub-process failed"); + } +} + +std::string UCIBot::ReadLine() { + if (auto bytes_read = ::getline(&input_stream_buffer_, + &input_stream_buffer_size_, input_stream_); + bytes_read != -1) { + absl::string_view response = + absl::string_view(input_stream_buffer_, bytes_read); + // Remove the trailing newline that getline left in the string. + // Using a string_view as input saves us from copying the string. + return std::string(absl::StripTrailingAsciiWhitespace(response)); + } + std::cerr << "Failed to read from input stream: " << std::strerror(errno) + << "\n"; + SpielFatalError("Reading a line from uci sub-process failed"); +} + +std::unique_ptr MakeUCIBot(const std::string& bot_binary_path, + int search_limit_value, bool ponder, + const Options& options, + SearchLimitType search_limit_type, + bool use_game_history_for_position) { + return std::make_unique(bot_binary_path, search_limit_value, ponder, + options, search_limit_type, + use_game_history_for_position); +} + +} // namespace uci +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/uci/uci_bot.h b/scenarios/bargaining/open_spiel/open_spiel/bots/uci/uci_bot.h new file mode 100644 index 0000000..4f35789 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/uci/uci_bot.h @@ -0,0 +1,133 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_BOTS_UCI_BOT_H_ +#define OPEN_SPIEL_BOTS_UCI_BOT_H_ + +#include // for size_t, needed by ::getline +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/games/chess/chess.h" + +// **IMPORTANT NOTE** The basic test currently hangs, so consider this bot +// currently experimental. The original authors claimed to have verified it with +// external engines: +// https://github.com/deepmind/open_spiel/pull/496#issuecomment-791578615 See +// https://github.com/deepmind/open_spiel/issues/681 for details. +namespace open_spiel { +namespace uci { + +using Options = std::map; + +enum class SearchLimitType { + kMoveTime, + kNodes, + kDepth, + kMate, +}; + +class UCIBot : public Bot { + public: + // Search limit value is the argument sent to either "go movetime", + // "go depth", or "go nodes". + UCIBot(const std::string& bot_binary_path, int search_limit_value, + bool ponder, const Options& options, + SearchLimitType search_limit_type = SearchLimitType::kMoveTime, + bool use_game_history_for_position = false); + ~UCIBot() override; + + Action Step(const State& state) override; + + std::pair StepVerbose(const State& state) override; + + void Restart() override; + void RestartAt(const State& state) override; + + void InformAction(const State& state, Player player_id, + Action action) override; + + void Write(const std::string& msg) const; + // Always blocks until a line is read. + std::string ReadLine(); + + void Position(const std::string& fen, + const std::vector& moves = {}); + + private: + void StartProcess(const std::string& bot_binary_path); + void Uci(); + void SetOption(const std::string& name, const std::string& value); + void UciNewGame(); + void IsReady(); + std::pair> Go( + absl::optional info_string = absl::nullopt); + void GoPonder(); + void PonderHit(); + std::pair> Stop(); + void Quit(); + std::pair> ReadBestMove( + absl::optional info_string = absl::nullopt); + void PositionFromState(const chess::ChessState& state, + const std::vector& extra_moves = {}); + + pid_t pid_ = -1; + int output_fd_ = -1; + SearchLimitType search_limit_type_; + int search_limit_value_; + std::string search_limit_string_; + absl::optional ponder_move_ = absl::nullopt; + bool was_ponder_hit_ = false; + + bool ponder_; + bool use_game_history_for_position_ = false; + + // Input stream member variables for the bot. + FILE* input_stream_ = nullptr; + char* input_stream_buffer_ = nullptr; + size_t input_stream_buffer_size_ = 0; +}; + +/** + * @param bot_binary_path Path to the uci engine executable that is going to be + * run in a new process. + * @param move_time Time limit per move in millis. Right now chess lacks any + * kind of time control so it is needed to provide at least this. Without any + * time control, the uci engine behaviour is undefined (e.g. Ethereal searches + * to depth 1, but Stockfish searches until explicitly stopped) + * @param ponder Boolean indicating whether this bot should make the uci engine + * ponder (think even when it's opponent's turn). In some engines, this should + * be accompanied with an options (see param options) so that the engine can + * adapt time control. + * @param options Additional options to set in the engine. There might be + * different options available for each engine. + * @return unique_ptr to a UCIBot + */ +std::unique_ptr MakeUCIBot( + const std::string& bot_binary_path, int search_limit_value, + bool ponder = false, const Options& options = {}, + SearchLimitType search_limit_type = SearchLimitType::kMoveTime, + bool use_game_history_for_position = false); + +} // namespace uci +} // namespace open_spiel + +#endif // OPEN_SPIEL_BOTS_UCI_BOT_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/uci/uci_bot_test.cc b/scenarios/bargaining/open_spiel/open_spiel/bots/uci/uci_bot_test.cc new file mode 100644 index 0000000..3862829 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/uci/uci_bot_test.cc @@ -0,0 +1,83 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/bots/uci/uci_bot.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/abseil-cpp/absl/strings/match.h" +#include "open_spiel/algorithms/evaluate_bots.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/init.h" + +ABSL_FLAG(std::string, binary, "random_uci_bot", + "Name of the binary to run for chess."); + +namespace open_spiel { +namespace uci { +namespace { + +inline constexpr const int kNumGames = 3; +inline constexpr const int kSeed = 12874681; + +void RandomUciBotTest(bool use_game_history_for_position) { + std::string binary = absl::GetFlag(FLAGS_binary); + std::shared_ptr game = LoadGame("chess"); + Options options = {}; + auto bot1 = std::make_unique(binary, /*move_time*/ 10, + /*ponder*/ false, /*options*/ options, + /*search_limit_type*/ SearchLimitType::kMoveTime, + use_game_history_for_position); + auto bot2 = std::make_unique(binary, /*move_time*/ 10, + /*ponder*/ false, /*options*/ options, + /*search_limit_type*/ SearchLimitType::kMoveTime, + use_game_history_for_position); + std::vector bots = {bot1.get(), bot2.get()}; + for (int i = 0; i < kNumGames; ++i) { + std::unique_ptr state = game->NewInitialState(); + EvaluateBots(state.get(), bots, kSeed); + std::cout << "Game over: " << state->HistoryString() << std::endl; + } +} + +void CheckVerboseOutput() { + std::string binary = absl::GetFlag(FLAGS_binary); + std::shared_ptr game = LoadGame("chess"); + auto bot = UCIBot(binary, /*move_time*/ 10, + /*ponder*/ false, /*options*/ {}); + std::unique_ptr state = game->NewInitialState(); + auto [action, info] = bot.StepVerbose(*state); + + SPIEL_CHECK_TRUE(absl::StrContains(info, "info")); + std::cout << "Verbose output: " << info << std::endl; +} + +} // namespace +} // namespace uci +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::Init("", &argc, &argv, false); + absl::ParseCommandLine(argc, argv); + open_spiel::uci::CheckVerboseOutput(); + open_spiel::uci::RandomUciBotTest(/*use_history*/false); + open_spiel::uci::RandomUciBotTest(/*use_history*/true); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/.gitignore b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/.gitignore new file mode 100644 index 0000000..9e7ce9b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/.gitignore @@ -0,0 +1,2 @@ +# This directory is fetched during install, do not include in OpenSpiel. +hearts/ diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/CMakeLists.txt new file mode 100644 index 0000000..7d0812a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/CMakeLists.txt @@ -0,0 +1,50 @@ +add_library(xinxin OBJECT + hearts/Algorithm.cpp + hearts/Algorithm.h + hearts/CardGameState.cpp + hearts/CardGameState.h + hearts/CardProbabilityData.cpp + hearts/CardProbabilityData.h + hearts/Game.cpp + hearts/Game.h + hearts/GameState.cpp + hearts/GameState.h + hearts/Hearts.cpp + hearts/Hearts.h + hearts/HeartsGameData.cpp + hearts/HeartsGameData.h + hearts/HeartsGameHistories.cpp + hearts/HeartsGameHistories.h + hearts/Player.cpp + hearts/Player.h + hearts/ProblemState.cpp + hearts/ProblemState.h + hearts/States.cpp + hearts/States.h + hearts/Timer.cpp + hearts/Timer.h + hearts/UCT.cpp + hearts/UCT.h + hearts/algorithmStates.cpp + hearts/algorithmStates.h + hearts/fpUtil.cpp + hearts/fpUtil.h + hearts/hash.cpp + hearts/hash.h + hearts/iiGameState.cpp + hearts/iiGameState.h + hearts/iiMonteCarlo.cpp + hearts/iiMonteCarlo.h + hearts/mt_random.cpp + hearts/mt_random.h + xinxin_bot.cc + xinxin_bot.h +) + +target_include_directories (xinxin PUBLIC hearts) + +add_executable (xinxin_bot_test xinxin_bot_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(xinxin_bot_test xinxin_bot_test) + +add_executable (xinxin_game_generator xinxin_game_generator.cc ${OPEN_SPIEL_OBJECTS}) diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/README.md b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/README.md new file mode 100644 index 0000000..87d10b3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/README.md @@ -0,0 +1,9 @@ +# Hearts + +OpenSpiel can support playing against Nathan Sturtevant's state of the art +Hearts program xinxin (pronounced "sheen-sheen"). To enable this option, see +`open_spiel/scripts/global_variables.sh`. + +For more information about xinxin, see its +[github page](https://github.com/nathansttt/hearts) and/or +[Nathan's Hearts research page](https://webdocs.cs.ualberta.ca/~nathanst/hearts.html). diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_bot.cc b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_bot.cc new file mode 100644 index 0000000..e9e8fb8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_bot.cc @@ -0,0 +1,239 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/bots/xinxin/xinxin_bot.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace hearts { +namespace { + +constexpr Suit kXinxinSuits[] = {Suit::kSpades, Suit::kDiamonds, Suit::kClubs, + Suit::kHearts}; +constexpr ::hearts::tPassDir kXinxinPassDir[] = { + ::hearts::tPassDir::kHold, ::hearts::tPassDir::kLeftDir, + ::hearts::tPassDir::kAcrossDir, ::hearts::tPassDir::kRightDir}; + +} // namespace + +Action GetOpenSpielAction(::hearts::card card) { + // xinxin keeps ranks in the reverse order of open_spiel + int rank = kNumCardsPerSuit - ::hearts::Deck::getrank(card) - 1; + Suit suit = kXinxinSuits[::hearts::Deck::getsuit(card)]; + return Card(suit, rank); +} + +::hearts::card GetXinxinAction(Action action) { + int rank = kNumCardsPerSuit - CardRank(action) - 1; + int suit = + std::find(kXinxinSuits, kXinxinSuits + kNumSuits, CardSuit(action)) - + kXinxinSuits; + return ::hearts::Deck::getcard(suit, rank); +} + +std::unique_ptr<::hearts::SafeSimpleHeartsPlayer> XinxinBot::CreatePlayer() { + xinxin_uct_.push_back( + std::make_unique<::hearts::UCT>(uct_num_runs_, uct_c_val_)); + xinxin_playouts_.push_back(std::make_unique<::hearts::HeartsPlayout>()); + xinxin_uct_.back()->setPlayoutModule(xinxin_playouts_.back().get()); + xinxin_mc_.push_back(std::make_unique<::hearts::iiMonteCarlo>( + xinxin_uct_.back().get(), iimc_num_worlds_)); + xinxin_mc_.back()->setUseThreads(use_threads_); + auto player = std::make_unique<::hearts::SafeSimpleHeartsPlayer>( + xinxin_mc_.back().get()); + player->setModelLevel(2); + return player; +} + +XinxinBot::XinxinBot(int rules, int uct_num_runs, double uct_c_val, + int iimc_num_worlds, bool use_threads) + : uct_num_runs_(uct_num_runs), + uct_c_val_(uct_c_val), + iimc_num_worlds_(iimc_num_worlds), + use_threads_(use_threads), + initial_state_(nullptr) { + pass_dir_ = ::hearts::tPassDir::kHold; + num_cards_dealt_ = 0; + game_state_ = std::make_unique<::hearts::HeartsGameState>(); + game_state_->setRules(rules); + game_state_->deletePlayers(); + for (int i = 0; i < kNumPlayers; i++) { + initial_deal_.push_back(std::vector<::hearts::card>()); + // The game state destructor deletes the players, so we do not manage their + // memory in this bot. + std::unique_ptr<::hearts::SafeSimpleHeartsPlayer> player = CreatePlayer(); + ::hearts::SafeSimpleHeartsPlayer* released_player = player.release(); + game_state_->addPlayer(released_player); + released_player->setGameState(game_state_.get()); + } + SPIEL_CHECK_EQ(xinxin_uct_.size(), kNumPlayers); + SPIEL_CHECK_EQ(xinxin_mc_.size(), kNumPlayers); + SPIEL_CHECK_EQ(xinxin_playouts_.size(), kNumPlayers); +} + +void XinxinBot::Restart() { + game_state_->Reset(); + pass_dir_ = ::hearts::tPassDir::kHold; + num_cards_dealt_ = 0; + for (auto& hand : initial_deal_) { + hand.clear(); + } +} + +void XinxinBot::RestartAt(const State& state) { + if (initial_state_ == nullptr) { + initial_state_ = state.GetGame()->NewInitialState(); + } + + // TODO(author5): define a default operator== in State. + if (state.ToString() != initial_state_->ToString()) { + SpielFatalError("XinxinBot::RestartAt only supports restarts from the " + "initial state."); + } + + Restart(); +} + +void XinxinBot::NewDeal(std::vector>* initial_cards, + ::hearts::tPassDir pass_dir, int first_player) { + // the order in which these are called matters (e.g. setting the cards unsets + // the pass dir) + game_state_->Reset(); + game_state_->SetInitialCards(*initial_cards); + game_state_->setPassDir(pass_dir); + game_state_->setFirstPlayer(first_player); +} + +void XinxinBot::LogStateMismatchError(const State& state, std::string msg) { + std::cout << "Begin error message: " << std::endl; + std::cout << "xinxin game state: " << std::endl; + game_state_->Print(); + std::cout << "xinxin legal moves: " << std::endl; + ::hearts::Move* all_moves = game_state_->getAllMoves(); + if (all_moves != nullptr) all_moves->Print(1); + std::cout << "xinxin points (N E S W): " << std::endl; + for (Player p = 0; p < game_state_->getNumPlayers(); p++) + std::cout << game_state_->score(p) << " "; + std::cout << std::endl; + std::cout << "OpenSpiel game state: " << std::endl; + std::cout << state.ToString() << std::endl; + std::cout << "OpenSpiel legal actions: " << std::endl; + std::cout << state.LegalActions() << std::endl; + std::cout << "OpenSpiel history: " << std::endl; + std::cout << state.History() << std::endl; + SpielFatalError(msg); +} + +Action XinxinBot::Step(const State& state) { + // check that xinxin and open_spiel agree on legal actions + ::hearts::Move* all_moves = game_state_->getAllMoves(); + std::vector xinxin_actions; + while (all_moves != nullptr) { + ::hearts::card card = static_cast<::hearts::CardMove*>(all_moves)->c; + xinxin_actions.push_back(GetOpenSpielAction(card)); + all_moves = all_moves->next; + } + absl::c_sort(xinxin_actions); + std::vector legal_actions = state.LegalActions(); + if (legal_actions != xinxin_actions) { + LogStateMismatchError(state, + "xinxin legal actions != OpenSpiel legal actions."); + } + // test passed! + ::hearts::CardMove* move = + static_cast<::hearts::CardMove*>(game_state_->getNextPlayer()->Play()); + game_state_->ApplyMove(move); + Action act = GetOpenSpielAction(move->c); + game_state_->freeMove(move); + SPIEL_CHECK_TRUE(absl::c_binary_search(legal_actions, act)); + return act; +} + +void XinxinBot::InformAction(const State& state, Player player_id, + Action action) { + if (player_id == kChancePlayerId) { + if (state.ChanceOutcomes().size() == 4 && num_cards_dealt_ == 0) { + // this is guaranteed to be the pass dir selection action as long as + // that remains as the first optional chance node in the Hearts + // implementation + pass_dir_ = kXinxinPassDir[action]; + } else { + ::hearts::card card = GetXinxinAction(action); + initial_deal_[num_cards_dealt_ % kNumPlayers].push_back(card); + if (++num_cards_dealt_ == kNumCards) { + NewDeal(&initial_deal_, pass_dir_, 0); + } + } + } else { + if (state.IsTerminal()) { + if (!game_state_->Done()) { + LogStateMismatchError(state, "xinxin state is not terminal."); + } + std::vector returns = state.Returns(); + for (Player p = 0; p < returns.size(); p++) { + // returns in open_spiel hearts are transformed from the score + // to reflect that getting the least number of total points is better + if (returns[p] != kTotalPositivePoints - game_state_->score(p)) { + LogStateMismatchError(state, "xinxin score != OpenSpiel score"); + } + } + } else { + ::hearts::Move* move = + new ::hearts::CardMove(GetXinxinAction(action), player_id); + game_state_->ApplyMove(move); + game_state_->freeMove(move); + } + } +} + +void XinxinBot::ForceAction(const State& state, Action action) { + ::hearts::Move* move = new ::hearts::CardMove( + GetXinxinAction(action), game_state_->getNextPlayerNum()); + game_state_->ApplyMove(move); + game_state_->freeMove(move); +} + +int XinxinBot::XinxinRules(GameParameters params) { + int rules = ::hearts::kQueenPenalty; + if (params["pass_cards"].bool_value()) rules |= ::hearts::kDoPassCards; + if (params["no_pts_on_first_trick"].bool_value()) + rules |= ::hearts::kNoHeartsFirstTrick | ::hearts::kNoQueenFirstTrick; + if (params["can_lead_any_club"].bool_value()) { + rules |= ::hearts::kLeadClubs; + } else { + rules |= ::hearts::kLead2Clubs; + } + if (params["jd_bonus"].bool_value()) rules |= ::hearts::kJackBonus; + if (params["avoid_all_tricks_bonus"].bool_value()) + rules |= ::hearts::kNoTrickBonus; + if (params["qs_breaks_hearts"].bool_value()) + rules |= ::hearts::kQueenBreaksHearts; + if (params["must_break_hearts"].bool_value()) + rules |= ::hearts::kMustBreakHearts; + if (params["can_lead_hearts_instead_of_qs"].bool_value()) { + SpielFatalError("Xinxin does not support leading hearts instead of qs"); + } + return rules; +} + +std::unique_ptr MakeXinxinBot(GameParameters params, int uct_num_runs, + double uct_c_val, int iimc_num_worlds, + bool use_threads) { + int rules = XinxinBot::XinxinRules(params); + return std::make_unique(rules, uct_num_runs, uct_c_val, + iimc_num_worlds, use_threads); +} + +} // namespace hearts +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_bot.h b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_bot.h new file mode 100644 index 0000000..1e1bb6d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_bot.h @@ -0,0 +1,85 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef OPEN_SPIEL_BOTS_XINXIN_XINXIN_BOT_H_ +#define OPEN_SPIEL_BOTS_XINXIN_XINXIN_BOT_H_ + +#include +#include +#include + +#include "open_spiel/bots/xinxin/hearts/Hearts.h" +#include "open_spiel/bots/xinxin/hearts/iiMonteCarlo.h" +#include "open_spiel/games/hearts/hearts.h" +#include "open_spiel/spiel_bots.h" + +namespace open_spiel { +namespace hearts { + +Action GetOpenSpielAction(::hearts::card card); +::hearts::card GetXinxinAction(Action action); + +class XinxinBot : public Bot { + public: + explicit XinxinBot(int rules, int uct_num_runs, double uct_c_val, + int iimc_num_worlds, bool use_threads); + + Action Step(const State& state) override; + void InformAction(const State& state, Player player_id, + Action action) override; + void Restart() override; + void RestartAt(const State& state) override; // Currently just restarts. + bool ProvidesForceAction() override { return true; } + void ForceAction(const State& state, Action action) override; + + static int XinxinRules(GameParameters params); + + private: + int uct_num_runs_; + double uct_c_val_; + int iimc_num_worlds_; + bool use_threads_; + std::unique_ptr<::hearts::SafeSimpleHeartsPlayer> CreatePlayer(); + + int num_cards_dealt_; + ::hearts::tPassDir pass_dir_; + std::vector> initial_deal_; + + // Keep a copy of the initial state around, to check that RestartAt only takes + // place from the initial state. + std::unique_ptr initial_state_; + + // A number of pointers to objects need to be created externally, and sent + // into the xinxin. We use these containers to store them. The vectors are + // indexed by player number. + std::unique_ptr<::hearts::HeartsGameState> game_state_; + std::vector> xinxin_uct_; + std::vector> xinxin_mc_; + std::vector> xinxin_playouts_; + + void NewDeal(std::vector>* initial_cards, + ::hearts::tPassDir pass_dir, int first_player); + void LogStateMismatchError(const State& state, std::string msg); +}; + +std::unique_ptr MakeXinxinBot(GameParameters params, int uct_num_runs = 50, + double uct_c_val = 0.4, + int iimc_num_worlds = 20, + bool use_threads = true); + +} // namespace hearts +} // namespace open_spiel + +#endif // OPEN_SPIEL_BOTS_XINXIN_XINXIN_BOT_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_bot_test.cc b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_bot_test.cc new file mode 100644 index 0000000..8bd5d23 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_bot_test.cc @@ -0,0 +1,71 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/bots/xinxin/xinxin_bot.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/time/clock.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/bots/xinxin/hearts/Hearts.h" +#include "open_spiel/algorithms/evaluate_bots.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" + +namespace open_spiel { +namespace { + +uint_fast32_t Seed() { return absl::ToUnixMicros(absl::Now()); } + +void XinxinBot_BasicPlayGame() { + int num_games = 5; + std::mt19937 rng(Seed()); + auto game = open_spiel::LoadGame("hearts"); + std::vector> bots; + std::vector bot_ptrs; + + for (int i = 0; i < hearts::kNumPlayers; i++) { + bots.push_back(open_spiel::hearts::MakeXinxinBot(game->GetParameters())); + bot_ptrs.push_back(bots.back().get()); + } + + for (int i = 0; i < num_games; i++) { + std::unique_ptr state = game->NewInitialState(); + EvaluateBots(state.get(), bot_ptrs, + absl::Uniform(rng, 0, std::numeric_limits::max())); + // call xinxinbot with terminal state so that xinxin's internal state's + // returns can be checked against the OpenSpiel returns + for (auto bot : bot_ptrs) + bot->InformAction(*state, kTerminalPlayerId, kInvalidAction); + } +} + +void XinxinBot_CardActionTransformationTest() { + // exhaustively check if action mapping is a bijection + for (Action action = 0; action < hearts::kNumCards; action++) { + ::hearts::card card = hearts::GetXinxinAction(action); + Action transformed = hearts::GetOpenSpielAction(card); + SPIEL_CHECK_EQ(action, transformed); + } +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::XinxinBot_CardActionTransformationTest(); + open_spiel::XinxinBot_BasicPlayGame(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_bot_test.py b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_bot_test.py new file mode 100644 index 0000000..12c1100 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_bot_test.py @@ -0,0 +1,41 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit test for XinXin MCTS bot.""" + +from absl.testing import absltest +import numpy as np + +from open_spiel.python.algorithms import evaluate_bots +import pyspiel + +SEED = 12983641 + + +class ISMCTSBotTest(absltest.TestCase): + + def xinxin_play_game(self, game): + bots = [] + for _ in range(4): + bots.append(pyspiel.make_xinxin_bot(game.get_parameters())) + evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) + + def test_basic_xinxin_selfplay(self): + game = pyspiel.load_game("hearts") + self.xinxin_play_game(game) + + +if __name__ == "__main__": + np.random.seed(SEED) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_game_generator.cc b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_game_generator.cc new file mode 100644 index 0000000..5c8b05c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_game_generator.cc @@ -0,0 +1,99 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/time/clock.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/bots/xinxin/hearts/Hearts.h" +#include "open_spiel/algorithms/evaluate_bots.h" +#include "open_spiel/bots/xinxin/xinxin_bot.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" + +ABSL_FLAG(int, num_games, 5, "How many games to run."); +ABSL_FLAG(std::string, path, "/tmp/xinxin_logs.txt", + "Where to output the logs."); +ABSL_FLAG(int, uct_num_runs, 50, "Number of MCTS simulations."); +ABSL_FLAG(double, uct_c_val, 0.4, "UCT exploration parameter."); +ABSL_FLAG(int, iimc_num_worlds, 20, + "Num of worlds to sample from at the start of each simulation."); +ABSL_FLAG(bool, use_threads, true, "Use multiple threads for MCTS."); + +namespace open_spiel { +namespace { + +uint_fast32_t Seed() { return absl::ToUnixMicros(absl::Now()); } + +void XinxinBot_GenerateGames(int num_games, std::string path, int uct_num_runs, + double uct_c_val, int iimc_num_worlds, + bool use_threads) { + std::mt19937 rng(Seed()); + std::ofstream game_logs(path); + auto game = open_spiel::LoadGame("hearts"); + std::vector> bots; + std::vector bot_ptrs; + + for (int i = 0; i < hearts::kNumPlayers; i++) { + bots.push_back(open_spiel::hearts::MakeXinxinBot( + game->GetParameters(), uct_num_runs, uct_c_val, iimc_num_worlds, + use_threads)); + bot_ptrs.push_back(bots.back().get()); + } + + for (int i = 0; i < num_games; i++) { + std::unique_ptr state = game->NewInitialState(); + const int num_players = bots.size(); + for (auto bot : bot_ptrs) bot->Restart(); + while (!state->IsTerminal()) { + if (state->IsChanceNode()) { + Action action = SampleAction(state->ChanceOutcomes(), rng).first; + for (auto bot : bot_ptrs) + bot->InformAction(*state, kChancePlayerId, action); + state->ApplyAction(action); + } else { + Player current_player = state->CurrentPlayer(); + Action action = bots[current_player]->Step(*state); + for (Player p = 0; p < num_players; ++p) { + if (p != current_player) { + bots[p]->InformAction(*state, current_player, action); + } + } + state->ApplyAction(action); + } + } + for (Player p = 0; p < num_players; ++p) { + // allows checking for differences in the returns + bots[p]->InformAction(*state, kTerminalPlayerId, kInvalidAction); + } + game_logs << absl::StrJoin(state->History(), " ") << "\n"; + } + game_logs.close(); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char **argv) { + absl::ParseCommandLine(argc, argv); + open_spiel::XinxinBot_GenerateGames( + absl::GetFlag(FLAGS_num_games), absl::GetFlag(FLAGS_path), + absl::GetFlag(FLAGS_uct_num_runs), absl::GetFlag(FLAGS_uct_c_val), + absl::GetFlag(FLAGS_iimc_num_worlds), absl::GetFlag(FLAGS_use_threads)); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_pybind11.cc b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_pybind11.cc new file mode 100644 index 0000000..57643b6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_pybind11.cc @@ -0,0 +1,30 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/bots/xinxin/xinxin_bot.h" +#include "open_spiel/spiel_bots.h" +#include "open_spiel/python/pybind11/pybind11.h" + +namespace open_spiel { + +namespace py = ::pybind11; + +void init_pyspiel_xinxin(::pybind11::module& m) { + m.def("make_xinxin_bot", open_spiel::hearts::MakeXinxinBot, py::arg("params"), + py::arg("uct_num_runs") = 50, py::arg("uct_c_val") = 0.4, + py::arg("iimc_num_worlds") = 20, py::arg("use_threads") = true, + "Make the XinXin bot."); +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_pybind11.h b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_pybind11.h new file mode 100644 index 0000000..463af98 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/bots/xinxin/xinxin_pybind11.h @@ -0,0 +1,24 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_BOTS_XINXIN_XINXIN_PYBIND11_H_ +#define OPEN_SPIEL_BOTS_XINXIN_XINXIN_PYBIND11_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +namespace open_spiel { +void init_pyspiel_xinxin(::pybind11::module& m); +} // namespace open_spiel + +#endif // OPEN_SPIEL_BOTS_XINXIN_XINXIN_PYBIND11_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/canonical_game_strings.cc b/scenarios/bargaining/open_spiel/open_spiel/canonical_game_strings.cc new file mode 100644 index 0000000..9558628 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/canonical_game_strings.cc @@ -0,0 +1,71 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/canonical_game_strings.h" + +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" + +namespace open_spiel { + +std::string HunlGameString(const std::string &betting_abstraction) { + return absl::StrFormat( + "universal_poker(betting=nolimit,numPlayers=2,numRounds=4,blind=100 50," + "firstPlayer=2 1 1 1,numSuits=4,numRanks=13,numHoleCards=2," + "numBoardCards=0 3 1 1,stack=20000 20000,bettingAbstraction=%s)", + betting_abstraction); +} + +// Note: Limit games do not support the 'stack' input. +std::string HulhGameString(const std::string &betting_abstraction) { + return absl::StrFormat( + "universal_poker(betting=limit,numPlayers=2,numRounds=4,blind=10 5," + "firstPlayer=2 1,numSuits=4,numRanks=13,numHoleCards=2," + "numBoardCards=0 3 1 1,raiseSize=10 10 20 20," + "maxRaises=3 4 4 4,bettingAbstraction=%s)", + betting_abstraction); +} + +std::string Multiway3max_1_2GameString(const std::string &betting_abstraction, + int sb_stack, int bb_stack, + int dealer_stack) { + return absl::StrFormat( + "universal_poker(betting=nolimit,numPlayers=3,numRounds=4,blind=1 2 0," + // Standard turn order: D->SB->BB, then SB->BB->D + "firstPlayer=3 1 1 1,numSuits=4,numRanks=13,numHoleCards=2," + "numBoardCards=0 3 1 1,stack=%i %i %i,bettingAbstraction=%s)", + sb_stack, bb_stack, dealer_stack, betting_abstraction); +} + +std::string Multiway6max_1_2GameString(const std::string &betting_abstraction, + int buy_in) { + return absl::StrFormat( + "universal_poker(betting=nolimit,numPlayers=6,numRounds=4," + "blind=1 2 0 0 0 0," + // Standard turn order: UTG->...->D->SB->BB, then SB->BB->UTG->...->D + "firstPlayer=3 1 1 1,numSuits=4,numRanks=13,numHoleCards=2," + "numBoardCards=0 3 1 1,stack=%i %i %i %i %i %i,bettingAbstraction=%s)", + buy_in, buy_in, buy_in, buy_in, buy_in, buy_in, betting_abstraction); +} + +std::string TurnBasedGoofspielGameString(int num_cards) { + return absl::StrFormat( + "turn_based_simultaneous_game(game=goofspiel(" + "imp_info=true,num_cards=%i,players=2," + "points_order=descending,returns_type=win_loss))", + num_cards); +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/canonical_game_strings.h b/scenarios/bargaining/open_spiel/open_spiel/canonical_game_strings.h new file mode 100644 index 0000000..3eb6872 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/canonical_game_strings.h @@ -0,0 +1,49 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_CANONICAL_GAME_STRINGS_H_ +#define OPEN_SPIEL_CANONICAL_GAME_STRINGS_H_ + +#include + +// A place to store functions that return canonical game strings. These strings +// can sent to LoadGame to load the game. + +namespace open_spiel { + +// Returns the "canonical" definition of Heads-up No-limit Texas Hold'em and +// Heads-up Limit Texas Hold'em according to the ACPC: +// http://www.computerpokercompetition.org/. +// Valid values for betting_abstraction are "fc" for fold-call,"fcpa" for +// fold, call, pot, all-in, "fchpa" for fold, call, half-pot, pot, all-in, and +// "fullgame" for the unabstracted game. These indicate the actions that are +// allowed. Note that in limit poker, "fcpa" & "fcpha" are just the full game. +// The string returned can be passed directly to LoadGame. +std::string HunlGameString(const std::string &betting_abstraction); +std::string HulhGameString(const std::string &betting_abstraction); +// Additional helper functions for other common Texas Hold'em games: +// 3 players with blinds at 1/2 (SB / BB), using differing stack sizes +std::string Multiway3max_1_2GameString(const std::string &betting_abstraction, + int sb_stack, int bb_stack, + int dealer_stack); +// 6 players with blinds at 1/2 (SB / BB), all using the same input stack size +std::string Multiway6max_1_2GameString(const std::string &betting_abstraction, + int buy_in); + +// Turn based goofspiel w/ imperfect information and descending points order. +std::string TurnBasedGoofspielGameString(int num_cards); + +} // namespace open_spiel + +#endif // OPEN_SPIEL_SPIEL_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/colabs/CFR_and_REINFORCE.ipynb b/scenarios/bargaining/open_spiel/open_spiel/colabs/CFR_and_REINFORCE.ipynb new file mode 100644 index 0000000..71ac04f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/colabs/CFR_and_REINFORCE.ipynb @@ -0,0 +1,369 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "gNlXrRnVGmb8" + }, + "source": [ + "This colab has an [accompanying video](https://www.youtube.com/watch?v=o6JNHoGUXCo)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RXVkGtU5Hq9E" + }, + "outputs": [], + "source": [ + "import itertools as it\n", + "from matplotlib import pyplot as plt\n", + "import numpy as np\n", + "import pyspiel\n", + "\n", + "from open_spiel.python.algorithms import exploitability\n", + "from open_spiel.python import policy as policy_lib\n", + "\n", + "np.set_printoptions(precision=3, suppress=True, floatmode='fixed')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "shtvgpHzMrcN" + }, + "outputs": [], + "source": [ + "game = pyspiel.load_game('tic_tac_toe')\n", + "state = game.new_initial_state()\n", + "\n", + "print(state)\n", + "while not state.is_terminal():\n", + " action = np.random.choice(state.legal_actions())\n", + " print(f'Taking action {action} {state.action_to_string(action)}')\n", + " state.apply_action(action)\n", + " print(state)\n", + "print(f'Game over; returns {state.returns()}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VOaIgtInHyUs" + }, + "outputs": [], + "source": [ + "game = pyspiel.load_game('kuhn_poker')\n", + "print(game.get_type().pretty_print())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PwNaIqi4IlfL" + }, + "outputs": [], + "source": [ + "policy = policy_lib.TabularPolicy(game)\n", + "print(policy.states_per_player)\n", + "print(policy.action_probability_array)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WJJPURUH_n64" + }, + "outputs": [], + "source": [ + "def print_policy(policy):\n", + " for state, probs in zip(it.chain(*policy.states_per_player),\n", + " policy.action_probability_array):\n", + " print(f'{state:6} p={probs}')\n", + "\n", + "print_policy(policy)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qbEt7PpRkrpq" + }, + "outputs": [], + "source": [ + "print(exploitability.nash_conv(game, policy))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bYkVJGQnIxx4" + }, + "outputs": [], + "source": [ + "def new_reach(so_far, player, action_prob):\n", + " \"\"\"Returns new reach probabilities.\"\"\"\n", + " new = np.array(so_far)\n", + " new[player] *= action_prob\n", + " return new\n", + "\n", + "def calc_cfr(state, reach):\n", + " \"\"\"Updates regrets; returns utility for all players.\"\"\"\n", + " if state.is_terminal():\n", + " return state.returns()\n", + " elif state.is_chance_node():\n", + " return sum(prob * calc_cfr(state.child(action), new_reach(reach, -1, prob))\n", + " for action, prob in state.chance_outcomes())\n", + " else:\n", + " # We are at a player decision point.\n", + " player = state.current_player()\n", + " index = policy.state_index(state)\n", + " \n", + " # Compute utilities after each action, updating regrets deeper in the tree.\n", + " utility = np.zeros((game.num_distinct_actions(), game.num_players()))\n", + " for action in state.legal_actions():\n", + " prob = curr_policy[index][action]\n", + " utility[action] = calc_cfr(state.child(action), new_reach(reach, player, prob))\n", + "\n", + " # Compute regrets at this state.\n", + " cfr_prob = np.prod(reach[:player]) * np.prod(reach[player+1:])\n", + " value = np.einsum('ap,a-\u003ep', utility, curr_policy[index])\n", + " for action in state.legal_actions():\n", + " regrets[index][action] += cfr_prob * (utility[action][player] - value[player])\n", + "\n", + " # Return the value of this state for all players.\n", + " return value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ueprrRS9NbO_" + }, + "outputs": [], + "source": [ + "game = pyspiel.load_game('kuhn_poker')\n", + "# game = pyspiel.load_game('turn_based_simultaneous_game(game=goofspiel(imp_info=true,num_cards=4,players=2,points_order=descending))')\n", + "policy = policy_lib.TabularPolicy(game)\n", + "initial_state = game.new_initial_state()\n", + "curr_policy = policy.action_probability_array.copy()\n", + "regrets = np.zeros_like(policy.action_probability_array)\n", + "eval_steps = []\n", + "eval_nash_conv = []\n", + "for step in range(129):\n", + " # Compute regrets\n", + " calc_cfr(initial_state, np.ones(1 + game.num_players()))\n", + "\n", + " # Find the new regret-matching policy\n", + " floored_regrets = np.maximum(regrets, 1e-16)\n", + " sum_floored_regrets = np.sum(floored_regrets, axis=1, keepdims=True)\n", + " curr_policy = floored_regrets / sum_floored_regrets\n", + "\n", + " # Update the average policy\n", + " lr = 1 / (1 + step)\n", + " policy.action_probability_array *= (1 - lr)\n", + " policy.action_probability_array += curr_policy * lr\n", + "\n", + " # Evaluate the average policy\n", + " if step \u0026 (step-1) == 0:\n", + " nc = exploitability.nash_conv(game, policy)\n", + " eval_steps.append(step)\n", + " eval_nash_conv.append(nc)\n", + " print(f'Nash conv after step {step} is {nc}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3p1_k0gMocEp" + }, + "outputs": [], + "source": [ + "fig, ax = plt.subplots()\n", + "ax.set_title(\"NashConv by CFR Iteration\")\n", + "ax.plot(eval_steps, eval_nash_conv)\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qZI7PtXFoq0l" + }, + "outputs": [], + "source": [ + "fig, ax = plt.subplots()\n", + "ax.set_title(\"NashConv by CFR Iteration (log-log scale)\")\n", + "ax.loglog(eval_steps, eval_nash_conv)\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "orVhRjWBsBWx" + }, + "outputs": [], + "source": [ + "# Display the whole policy\n", + "print_policy(policy)\n", + "\n", + "# How likely are we to bet with a Jack?\n", + "alpha = policy.action_probability_array[policy.state_lookup['0']][1]\n", + "print(f'P(bet with Jack) = alpha = {alpha:.3}')\n", + "\n", + "# How likely are we to bet with a King?\n", + "pK = policy.action_probability_array[policy.state_lookup['2']][1]\n", + "print(f'P(bet with King) = {pK:.3}, cf {alpha * 3:.3}')\n", + "\n", + "# How likely are we to call with a Queen?\n", + "pQ = policy.action_probability_array[policy.state_lookup['1pb']][1]\n", + "print(f'P(call with Queen after checking) = {pQ:.3}, cf {alpha + 1/3:.3}')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_rflT-jousiH" + }, + "outputs": [], + "source": [ + "def sample(actions_and_probs):\n", + " actions, probs = zip(*actions_and_probs)\n", + " return np.random.choice(actions, p=probs)\n", + "\n", + "def policy_as_list(policy, state):\n", + " return list(enumerate(policy.policy_for_key(state.information_state_string())))\n", + "\n", + "def env_action(state):\n", + " if state.is_chance_node():\n", + " p = state.chance_outcomes()\n", + " else:\n", + " p = policy_as_list(fixed_policy, state)\n", + " return sample(p)\n", + "\n", + "def softmax(x):\n", + " x = np.exp(x - np.max(x, axis=-1, keepdims=True))\n", + " return x / np.sum(x, axis=-1, keepdims=True)\n", + "\n", + "def generate_trajectory(state, player):\n", + " trajectory = []\n", + " while not state.is_terminal():\n", + " if state.current_player() == player:\n", + " action = sample(policy_as_list(rl_policy, state))\n", + " trajectory.append((rl_policy.state_index(state), action))\n", + " else:\n", + " action = env_action(state)\n", + " state.apply_action(action)\n", + " return trajectory, state.returns()[player]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Y9Sn8g147_90" + }, + "outputs": [], + "source": [ + "fixed_policy = policy_lib.TabularPolicy(game)\n", + "rl_policy = policy_lib.TabularPolicy(game)\n", + "for _ in range(5):\n", + " print(generate_trajectory(game.new_initial_state(), player=0))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "R_L2KQ5u9Nc3" + }, + "outputs": [], + "source": [ + "# Run REINFORCE\n", + "N = 10000\n", + "lr = 0.01\n", + "for step in range(N):\n", + " for player in (0, 1):\n", + " trajectory, reward = generate_trajectory(game.new_initial_state(), player)\n", + " for s, a in trajectory:\n", + " logits = np.log(rl_policy.action_probability_array[s])\n", + " logits[a] += lr * reward\n", + " rl_policy.action_probability_array[s] = softmax(logits)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8-25i6xFRUqC" + }, + "outputs": [], + "source": [ + "# Evaluate the policy\n", + "def evaluate(state, rl_policy, player):\n", + " if state.is_terminal():\n", + " return state.returns()[player]\n", + " elif state.current_player() == player:\n", + " ap = policy_as_list(rl_policy, state)\n", + " elif state.is_chance_node():\n", + " ap = state.chance_outcomes()\n", + " else:\n", + " ap = policy_as_list(fixed_policy, state)\n", + " return sum(p * evaluate(state.child(a), rl_policy, player) for a, p in ap)\n", + "\n", + "def eval(rl_policy):\n", + " return (evaluate(game.new_initial_state(), rl_policy, player=0)\n", + " + evaluate(game.new_initial_state(), rl_policy, player=1))\n", + "\n", + "print_policy(rl_policy)\n", + "eval(rl_policy)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IZX8G1sGLSBT" + }, + "outputs": [], + "source": [ + "# Evaluate the greedy policy\n", + "greedy_policy = policy_lib.TabularPolicy(game)\n", + "greedy_policy.action_probability_array = (np.eye(game.num_distinct_actions())\n", + " [np.argmax(rl_policy.action_probability_array, axis=-1)])\n", + "\n", + "print_policy(greedy_policy)\n", + "eval(greedy_policy)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "CFR_and_REINFORCE.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/colabs/OpenSpielTutorial.ipynb b/scenarios/bargaining/open_spiel/open_spiel/colabs/OpenSpielTutorial.ipynb new file mode 100644 index 0000000..6cab02c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/colabs/OpenSpielTutorial.ipynb @@ -0,0 +1,524 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "OpenSpielTutorial.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "id": "odj1Coq5H080" + }, + "source": [ + "#@title ##### License { display-mode: \"form\" }\n", + "# Copyright 2019 DeepMind Technologies Ltd. All rights reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dOOzDGYAZcW3" + }, + "source": [ + "# OpenSpiel\n", + "\n", + "* This Colab gets you started the basics of OpenSpiel.\n", + "* OpenSpiel is a framework for reinforcement learning in games. The code is hosted [on github](https://github.com/deepmind/open_spiel/).\n", + "* There is an accompanying video tutorial that works through this colab. It will be linked here once it is live.\n", + "* There is also an [OpenSpiel paper](https://arxiv.org/abs/1908.09453) with more detail." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XC6kQBzWahEF" + }, + "source": [ + "## Install" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-2_Vbijh4FlZ" + }, + "source": [ + "The following command will install OpenSpiel via pip.\n", + "\n", + "Only the required dependencies are installed. You may need other dependencies if you use some of the algorithms. There is a [the complete list of packages and versions](https://github.com/deepmind/open_spiel/blob/master/open_spiel/scripts/python_extra_deps.sh) we install for the CI tests, which can be installed as necessary.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "lQc12Xrn4CXU" + }, + "source": [ + "!pip install --upgrade open_spiel" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jUtlXZ8FBnAL" + }, + "source": [ + "# Part 1. OpenSpiel API Basics." + ] + }, + { + "cell_type": "code", + "source": [ + "# Importing pyspiel and showing the list of supported games.\n", + "import pyspiel\n", + "print(pyspiel.registered_names())" + ], + "metadata": { + "id": "bDXdNLJbsZaD" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Loading a game (with no/default parameters).\n", + "game = pyspiel.load_game(\"tic_tac_toe\")\n", + "print(game)" + ], + "metadata": { + "id": "74glfO8dsmPn" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Some properties of the games.\n", + "print(game.num_players())\n", + "print(game.max_utility())\n", + "print(game.min_utility())\n", + "print(game.num_distinct_actions())" + ], + "metadata": { + "id": "tthnjDQxuuW1" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Creating initial states.\n", + "state = game.new_initial_state()\n", + "print(state)" + ], + "metadata": { + "id": "po2CYySVu-rC" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Basic information about states.\n", + "print(state.current_player())\n", + "print(state.is_terminal())\n", + "print(state.returns())\n", + "print(state.legal_actions())" + ], + "metadata": { + "id": "ZxXCiDjXvNMQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Playing the game: applying actions.\n", + "state = game.new_initial_state()\n", + "state.apply_action(1)\n", + "print(state)\n", + "print(state.current_player())\n", + "state.apply_action(2)\n", + "state.apply_action(4)\n", + "state.apply_action(0)\n", + "state.apply_action(7)\n", + "print(state)\n", + "print(state.is_terminal())\n", + "print(state.player_return(0)) # win for x (player 0)\n", + "print(state.current_player())" + ], + "metadata": { + "id": "GQypywhgvh6t" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Different game: Breakthrough with default parameters (number of rows and columns are both 8)\n", + "game = pyspiel.load_game(\"breakthrough\")\n", + "state = game.new_initial_state()\n", + "print(state)" + ], + "metadata": { + "id": "fxu3ZTxxvmrW" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Parameterized games: loading a 6x6 Breakthrough.\n", + "game = pyspiel.load_game(\"breakthrough(rows=6,columns=6)\")\n", + "state = game.new_initial_state()\n", + "print(state)\n", + "print(state.legal_actions())\n", + "print(game.num_distinct_actions())\n", + "for action in state.legal_actions():\n", + " print(f\"{action} {state.action_to_string(action)}\")" + ], + "metadata": { + "id": "rQV0169-wuLI" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Part 2. Normal-form Games and Evolutionary Dynamics in OpenSpiel." + ], + "metadata": { + "id": "PeB3zc8AzDlZ" + } + }, + { + "cell_type": "code", + "source": [ + "import pyspiel\n", + "game = pyspiel.create_matrix_game([[1, -1], [-1, 1]], [[-1, 1], [1, -1]])\n", + "print(game) # name not provided: uses a default\n", + "state = game.new_initial_state()\n", + "print(state) # action names also not provided; defaults used" + ], + "metadata": { + "id": "u2eRTZr4zm_G" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Normal-form games are 1-step simultaneous-move games.\n", + "print(state.current_player()) # special player id \n", + "print(state.legal_actions(0)) # query legal actions for each player\n", + "print(state.legal_actions(1))\n", + "print(state.is_terminal())\n" + ], + "metadata": { + "id": "N6E0hG4J0TaI" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Applying a joint action (one action per player)\n", + "state.apply_actions([0, 0])\n", + "print(state.is_terminal())\n", + "print(state.returns())" + ], + "metadata": { + "id": "RPfvosEU0pt9" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Evolutionary dynamics in Rock, Paper, Scissors\n", + "from open_spiel.python.egt import dynamics\n", + "from open_spiel.python.egt.utils import game_payoffs_array\n", + "import numpy as np\n", + "\n", + "game = pyspiel.load_matrix_game(\"matrix_rps\") # load the Rock, Paper, Scissors matrix game\n", + "payoff_matrix = game_payoffs_array(game) # convert any normal-form game to a numpy payoff matrix\n", + "\n", + "dyn = dynamics.SinglePopulationDynamics(payoff_matrix, dynamics.replicator)\n", + "x = np.array([0.2, 0.2, 0.6]) # population heavily-weighted toward scissors\n", + "dyn(x)" + ], + "metadata": { + "id": "fq4NRSrz04xe" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Choose a step size and apply the dynamic\n", + "alpha = 0.01\n", + "x += alpha * dyn(x)\n", + "print(x)\n", + "x += alpha * dyn(x)\n", + "print(x)\n", + "x += alpha * dyn(x)\n", + "x += alpha * dyn(x)\n", + "x += alpha * dyn(x)\n", + "x += alpha * dyn(x)\n", + "print(x)" + ], + "metadata": { + "id": "jPzX2HWK1VvJ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Part 3. Chance Nodes and Partially-Observable Games." + ], + "metadata": { + "id": "p-i_tT8HzLU1" + } + }, + { + "cell_type": "code", + "source": [ + "# Kuhn poker: simplified poker with a 3-card deck (https://en.wikipedia.org/wiki/Kuhn_poker)\n", + "import pyspiel\n", + "game = pyspiel.load_game(\"kuhn_poker\")\n", + "print(game.num_distinct_actions()) # bet and fold\n" + ], + "metadata": { + "id": "bA6hgOQW2iUz" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Chance nodes.\n", + "state = game.new_initial_state()\n", + "print(state.current_player()) # special chance player id\n", + "print(state.is_chance_node())\n", + "print(state.chance_outcomes()) # distibution over outcomes as a list of (outcome, probability) pairs" + ], + "metadata": { + "id": "RxVzdLjU2zWM" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Applying chance node outcomes: same function as applying actions.\n", + "state.apply_action(0) # let's choose the first card (jack)\n", + "print(state.is_chance_node()) # still at a chance node (player 2's card).\n", + "print(state.chance_outcomes()) # jack no longer a possible outcome\n", + "state.apply_action(1) # second player gets the queen\n", + "print(state.current_player()) # no longer chance node, time to play!" + ], + "metadata": { + "id": "avTQrpRA3OOQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# States vs. information states\n", + "print(state) # ground/world state (all information open)\n", + "print(state.legal_actions())\n", + "for action in state.legal_actions():\n", + " print(state.action_to_string(action))\n", + "print(state.information_state_string()) # only current player's information!" + ], + "metadata": { + "id": "UHZ7vU_V4SZm" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Take an action (pass / check), second player's turn.\n", + "# Information state tensor is vector of floats (often bits) representing the information state.\n", + "state.apply_action(0)\n", + "print(state.current_player())\n", + "print(state.information_state_string()) # now contains second player's card and the public action sequence\n", + "print(state.information_state_tensor())" + ], + "metadata": { + "id": "RuzH-yOK4xmg" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Leduc poker is a larger game (6 cards, two suits), 3 actions: fold, check/call, raise.\n", + "game = pyspiel.load_game(\"leduc_poker\")\n", + "print(game.num_distinct_actions())\n", + "state = game.new_initial_state()\n", + "print(state)\n", + "state.apply_action(0) # first player gets first jack \n", + "state.apply_action(1) # second player gets second jack\n", + "print(state.current_player())\n", + "print(state.information_state_string())\n", + "print(state.information_state_tensor())\n" + ], + "metadata": { + "id": "tmJbLdme5P8a" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's check until the second round.\n", + "print(state.legal_actions_mask()) # Helper function for neural networks.\n", + "state.apply_action(1) # check\n", + "state.apply_action(1) # check\n", + "print(state)\n", + "print(state.chance_outcomes()) # public card (4 left in the deck)\n", + "state.apply_action(2)\n", + "print(state.information_state_string()) # player 0's turn again." + ], + "metadata": { + "id": "4MwssaTo58yO" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Part 4. Basic RL: Self-play Q-Learning in Tic-Tac-Toe." + ], + "metadata": { + "id": "3PGnADszzbNP" + } + }, + { + "cell_type": "code", + "source": [ + "# Let's do independent Q-learning in Tic-Tac-Toe, and play it against random.\n", + "# RL is based on python/examples/independent_tabular_qlearning.py\n", + "from open_spiel.python import rl_environment\n", + "from open_spiel.python import rl_tools\n", + "from open_spiel.python.algorithms import tabular_qlearner\n", + "\n", + "# Create the environment\n", + "env = rl_environment.Environment(\"tic_tac_toe\")\n", + "num_players = env.num_players\n", + "num_actions = env.action_spec()[\"num_actions\"]\n", + "\n", + "# Create the agents\n", + "agents = [\n", + " tabular_qlearner.QLearner(player_id=idx, num_actions=num_actions)\n", + " for idx in range(num_players)\n", + "]" + ], + "metadata": { + "id": "EnfdHFr7621m" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Train the Q-learning agents in self-play.\n", + "for cur_episode in range(25000):\n", + " if cur_episode % 1000 == 0:\n", + " print(f\"Episodes: {cur_episode}\")\n", + " time_step = env.reset()\n", + " while not time_step.last():\n", + " player_id = time_step.observations[\"current_player\"]\n", + " agent_output = agents[player_id].step(time_step)\n", + " time_step = env.step([agent_output.action])\n", + " # Episode is over, step all agents with final info state.\n", + " for agent in agents:\n", + " agent.step(time_step)\n", + "print(\"Done!\")" + ], + "metadata": { + "id": "mDgnvsjZ7vZI" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Evaluate the Q-learning agent against a random agent.\n", + "from open_spiel.python.algorithms import random_agent\n", + "eval_agents = [agents[0], random_agent.RandomAgent(1, num_actions, \"Entropy Master 2000\") ]\n", + "\n", + "time_step = env.reset()\n", + "while not time_step.last():\n", + " print(\"\")\n", + " print(env.get_state)\n", + " player_id = time_step.observations[\"current_player\"]\n", + " # Note the evaluation flag. A Q-learner will set epsilon=0 here.\n", + " agent_output = eval_agents[player_id].step(time_step, is_evaluation=True)\n", + " print(f\"Agent {player_id} chooses {env.get_state.action_to_string(agent_output.action)}\")\n", + " time_step = env.step([agent_output.action])\n", + "\n", + "print(\"\")\n", + "print(env.get_state)\n", + "print(time_step.rewards)\n" + ], + "metadata": { + "id": "3GPNio828vyg" + }, + "execution_count": null, + "outputs": [] + } + ] +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/colabs/crowd_modelling_4rooms_MFGsurvey.ipynb b/scenarios/bargaining/open_spiel/open_spiel/colabs/crowd_modelling_4rooms_MFGsurvey.ipynb new file mode 100644 index 0000000..679116c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/colabs/crowd_modelling_4rooms_MFGsurvey.ipynb @@ -0,0 +1,628 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "0kNT4QZ3k6tk" + }, + "source": [ + "# Setup\n", + "\n", + "We use [OpenSpiel](https://github.com/deepmind/open_spiel) library for this setting. OpenSpiel is a collection of environments and algorithms for research in general reinforcement learning and search/planning in games." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NKAod1ARM0vi" + }, + "source": [ + "## Imports\n", + "\n", + "Import the OpenSpiel and other auxiliary libraries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qeLv5Ukxj8sR" + }, + "outputs": [], + "source": [ + "\"\"\"Useful imports\"\"\"\n", + "\n", + "!pip install --upgrade open_spiel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1G9298ghC6f7" + }, + "outputs": [], + "source": [ + "\n", + "import dataclasses\n", + "import math\n", + "import re\n", + "from typing import Dict, List, Optional, Tuple\n", + "\n", + "\n", + "import datetime\n", + "from matplotlib import animation\n", + "from matplotlib import cm\n", + "from matplotlib import pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import time\n", + "\n", + "from IPython.display import HTML\n", + "\n", + "from open_spiel.python import policy\n", + "from open_spiel.python import policy as policy_std\n", + "from open_spiel.python.mfg import distribution as distribution_std\n", + "from open_spiel.python.mfg import value as value_std\n", + "from open_spiel.python.mfg.algorithms import best_response_value\n", + "from open_spiel.python.mfg.algorithms import boltzmann_policy_iteration\n", + "from open_spiel.python.mfg.algorithms import distribution\n", + "from open_spiel.python.mfg.algorithms import fictitious_play\n", + "from open_spiel.python.mfg.algorithms import fixed_point\n", + "from open_spiel.python.mfg.algorithms import greedy_policy\n", + "from open_spiel.python.mfg.algorithms import mirror_descent\n", + "from open_spiel.python.mfg.algorithms import munchausen_mirror_descent\n", + "from open_spiel.python.mfg.algorithms import nash_conv\n", + "from open_spiel.python.mfg.algorithms import policy_value\n", + "from open_spiel.python.mfg.games import factory\n", + "import pyspiel" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vaPOvThZRCB4" + }, + "source": [ + "## Forbidden states" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8d_Z8Dq_RDKH" + }, + "outputs": [], + "source": [ + "forbidden_states_grid = [\n", + " '#############',\n", + " '# # #',\n", + " '# # #',\n", + " '# #',\n", + " '# # #',\n", + " '# # #',\n", + " '### ##### ###',\n", + " '# # #',\n", + " '# # #',\n", + " '# #',\n", + " '# # #',\n", + " '# # #',\n", + " '#############',\n", + "]\n", + "\n", + "def grid_to_forbidden_states(grid):\n", + " \"\"\"Converts a grid into string representation of forbidden states.\n", + "\n", + " Args:\n", + " grid: Rows of the grid. '#' character denotes a forbidden state. All rows\n", + " should have the same number of columns, i.e. cells.\n", + "\n", + " Returns:\n", + " String representation of forbidden states in the form of x (column) and y\n", + " (row) pairs, e.g. [1|1;0|2].\n", + " \"\"\"\n", + " forbidden_states = []\n", + " num_cols = len(grid[0])\n", + " for y, row in enumerate(grid):\n", + " assert len(row) == num_cols, f'Number of columns should be {num_cols}.'\n", + " for x, cell in enumerate(row):\n", + " if cell == '#':\n", + " forbidden_states.append(f'{x}|{y}')\n", + " return '[' + ';'.join(forbidden_states) + ']'\n", + "\n", + "FOUR_ROOMS_FORBIDDEN_STATES = grid_to_forbidden_states(forbidden_states_grid)\n", + "forbidden_states_indicator = np.array([[math.nan if c=='#' else 0 for c in [*row]] for row in forbidden_states_grid])\n", + "\n", + "four_rooms_default_setting = {\n", + " 'forbidden_states': FOUR_ROOMS_FORBIDDEN_STATES,\n", + " 'horizon': 41,\n", + " 'initial_distribution': '[1|1]',\n", + " 'initial_distribution_value': '[1.0]',\n", + " 'size': 13,\n", + " 'only_distribution_reward': True,\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qmiZH91CQpcL" + }, + "source": [ + "## Helper methods for visualization\n", + "\n", + "The state representation and distribution of each game would be different. OpenSpiel does not provide any built in visualization capabilities. We define some basic methods for displaying the two-dimensional grid and the distribution for our game." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "I_znsAseM7zD" + }, + "outputs": [], + "source": [ + "\"\"\"Helper methods for visualization. These are game specific.\"\"\"\n", + "\n", + "\n", + "def decode_distribution(game: pyspiel.Game,\n", + " dist: Dict[str, float],\n", + " nans: bool = True) -\u003e np.ndarray:\n", + " \"\"\"Decodes the distribution of a 2D crowd modelling game from a dictionary.\"\"\"\n", + " # Extract the size of the distribution from the game parameters. Time, i.e.\n", + " # horizon is the leading dimension so that we can easily present the temporal\n", + " # aspect.\n", + " params = game.get_parameters()\n", + " dist_size = (params['horizon'], params['size'], params['size'])\n", + " decoded = np.zeros(dist_size)\n", + "\n", + " for key, value in dist.items():\n", + " m = re.fullmatch(r'\\((?P\u003cx\u003e\\d+),\\s*(?P\u003cy\u003e\\d+),\\s*(?P\u003ct\u003e\\d+)\\)', key)\n", + " if m:\n", + " g = m.group\n", + " decoded[(int(g('t')), int(g('y')), int(g('x')))] = value\n", + "\n", + " return decoded\n", + "\n", + "\n", + "def get_policy_distribution(game: pyspiel.Game,\n", + " policy: policy_std.Policy) -\u003e np.ndarray:\n", + " \"\"\"Returns the distribution of the policy.\"\"\"\n", + " dist_policy = distribution.DistributionPolicy(game, policy)\n", + " return decode_distribution(game, dist_policy.distribution)\n", + "\n", + "\n", + "def animate_distributions(dists: np.ndarray,\n", + " fixed_cbar: bool = False) -\u003e animation.FuncAnimation:\n", + " \"\"\"Animates the given distributions.\n", + "\n", + " Args:\n", + " dists: An np.ndarray of batched distributions.\n", + " fixed_cbar: If true, then the color bar will have a fixed scale over all\n", + " distributions.\n", + "\n", + " Returns:\n", + " A function animation.\n", + " \"\"\"\n", + " if fixed_cbar:\n", + " vmin = np.min(dists)\n", + " vmax = np.max(dists)\n", + " else:\n", + " vmin, vmax = None, None\n", + "\n", + " def frame(i):\n", + " ax.cla()\n", + " sns.heatmap(\n", + " dists[i, ...],\n", + " square=True,\n", + " cmap=plt.cm.viridis,\n", + " linecolor='white',\n", + " linewidths=0.1,\n", + " ax=ax,\n", + " cbar=True,\n", + " cbar_ax=cbar_ax,\n", + " vmin=vmin,\n", + " vmax=vmax)\n", + "\n", + " grid_kws = {'width_ratios': (0.9, 0.05), 'wspace': 0.2}\n", + " fig, (ax, cbar_ax) = plt.subplots(1, 2, gridspec_kw=grid_kws, figsize=(7, 5))\n", + " anim = animation.FuncAnimation(\n", + " fig=fig, func=frame, frames=dists.shape[0], interval=50, blit=False)\n", + " # This prevents plot output at each frame.\n", + " plt.close()\n", + " return anim\n", + "\n", + "\n", + "@dataclasses.dataclass\n", + "class RunResult:\n", + " \"\"\"Holds the result of running an algorithm.\n", + "\n", + " Attributes:\n", + " policy: The resulting policy.\n", + " dists: An np.ndarray that contains the distributions at horizon for each\n", + " iteration.\n", + " nash_convs: Nash Conv metrics at each iteration.\n", + " last_dist: The distribution for the last iteration of the algorithm.\n", + " \"\"\"\n", + " policy: policy_std.Policy\n", + " dists: np.ndarray\n", + " nash_convs: np.ndarray\n", + " last_dist: np.ndarray\n", + "\n", + "\n", + "\n", + "def run_algorithm(game: pyspiel.Game, algo, num_iterations: int,\n", + " learning_rate=None, init_policy=None):\n", + " \"\"\"Runs the algorithm for specified number of iterations.\n", + "\n", + " Args:\n", + " game: An MFG.\n", + " algo: Algorithm to use.\n", + " num_iterations: Number of iterations.\n", + "\n", + " Returns:\n", + " The final policy and the Nash Conv values at each iteration.\n", + " \"\"\"\n", + " nash_convs = []\n", + " dists = []\n", + " current_policy = init_policy\n", + " dist = None\n", + " # Added to save the initialization\n", + " startt = time.time()\n", + " if not current_policy:\n", + " current_policy = algo.get_policy()\n", + " nash_convs.append(nash_conv.NashConv(game, current_policy).nash_conv())\n", + " dist = get_policy_distribution(game, current_policy)\n", + " # dists.append(dist[-1, :]) # if single population\n", + " dists.append(dist)\n", + " print(\"Done iteration = 0, \\ttime = \", time.time() - startt, \"\\tnash_conv = \", nash_convs[-1])\n", + " for i in range(num_iterations):\n", + " startt = time.time()\n", + " if learning_rate:\n", + " algo.iteration(learning_rate=learning_rate)\n", + " else:\n", + " algo.iteration()\n", + " current_policy = algo.get_policy()\n", + " nash_convs.append(nash_conv.NashConv(game, current_policy).nash_conv())\n", + " dist = get_policy_distribution(game, current_policy)\n", + " dists.append(dist)\n", + " if (i+1)%2==0:\n", + " print(\"Done iteration = \", i+1, \"\\ttime = \", time.time() - startt, \"\\tnash_conv = \", nash_convs[-1])\n", + " # print(\"run_algorithm: distribution: \", dists[-1])\n", + "\n", + " return RunResult(\n", + " policy=current_policy,\n", + " dists=np.stack(dists),\n", + " nash_convs=np.array(nash_convs),\n", + " last_dist=dist)\n", + "\n", + "\n", + "def display_result(result: RunResult):\n", + " \"\"\"Displays the run results.\"\"\"\n", + " sns.set(rc={'figure.figsize':(10, 6)})\n", + " fig, ax = plt.subplots()\n", + " ax.plot(result.nash_convs)\n", + " ax.set_xlabel('iteration')\n", + " ax.set_ylabel('Nash Conv')\n", + " return HTML(animate_distributions(result.dists).to_jshtml())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5qeYHadHRvP_" + }, + "outputs": [], + "source": [ + "# Exploitability\n", + "# Comparison of exploitability.\n", + "ft_size = 20\n", + "def display_exploitability(results: Dict[str, RunResult]):\n", + " fig_exploitabilities = plt.gcf()\n", + " nash_conv_df = pd.DataFrame.from_dict({name: result.nash_convs for name, result in results.items()})\n", + "\n", + " sns.set(rc={'figure.figsize':(15,8)})\n", + " sns.set_theme(style=\"whitegrid\")\n", + " ax = sns.lineplot(data=nash_conv_df, palette=\"tab10\", linewidth=2.5)\n", + " ax.set_yscale('log')\n", + " ax.set_xlabel('iterations', fontsize=ft_size)\n", + " ax.set_ylabel('exploitability', fontsize=ft_size)\n", + " plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0, fontsize=ft_size)\n", + " ax.set_xticklabels(ax.get_xticks(), size = ft_size)\n", + " ax.set_yticklabels(ax.get_yticks(), size = ft_size)\n", + " fig_exploitabilities.tight_layout()\n", + " return fig_exploitabilities\n", + "# Usage:\n", + "# display_exploitability(results)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9fT1ChrlRxW3" + }, + "outputs": [], + "source": [ + "# Usage:\n", + "# n_steps = game.get_parameters()['horizon']\n", + "# steps = range(0,n_steps,2)\n", + "# fig_distributions = display_distribution_at_steps(results, steps, size=2)\n", + "ft_size = 20\n", + "def display_distribution_at_steps(results, steps, size=4, forbidden_states_indicator=None):\n", + " num_steps = len(steps)\n", + " num_results = len(results)\n", + " fig, axs = plt.subplots(\n", + " num_results,\n", + " num_steps,\n", + " sharex='col',\n", + " sharey='row',\n", + " figsize=(num_steps * size, num_results * size))\n", + " for row, (name, result) in enumerate(results.items()):\n", + " for i, step in enumerate(steps):\n", + " d = result.last_dist[step]\n", + " minval = round(np.amin(d), 3)\n", + " maxval=round(np.amax(d), 3)\n", + " if forbidden_states_indicator is not None:\n", + " d = d + forbidden_states_indicator\n", + " masked_array = np.ma.array (d, mask=np.isnan(d))\n", + " cmap = plt.cm.viridis\n", + " cmap.set_bad('grey',1.)\n", + " ax = axs[row][i]\n", + " ax.axis('off')\n", + " ax.set_title(str(name) + \"\\n\" + str(i) if not i else str(step), size = ft_size)\n", + " im = ax.imshow(\n", + " d,\n", + " interpolation='nearest',\n", + " cmap=plt.cm.viridis, vmin=minval, vmax=maxval)\n", + " ticks=[round(minval + i*(maxval-minval)/4.0, 3) for i in range(5)]\n", + " cbar = plt.colorbar(im, ax=ax, fraction=0.046, ticks=ticks)\n", + " cbar.ax.tick_params(labelsize=ft_size)\n", + " ax.set_xticklabels(ax.get_xticks(), size = ft_size)\n", + " ax.set_yticklabels(ax.get_yticks(), size = ft_size)\n", + "\n", + " fig.tight_layout()\n", + " return fig\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dyfIW0FbF_9J" + }, + "source": [ + "# Run algos" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QArHwBjvvkyJ" + }, + "outputs": [], + "source": [ + "settings = {\n", + " # \"with_large_noise\": {\"noise_intensity\": 1.0},\n", + " # \"with_medium_noise\": {\"noise_intensity\": 0.5},\n", + " \"with_small_noise\": {\"noise_intensity\": 0.1},\n", + " # \"with_no_noise\": {\"noise_intensity\": 0.0},\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zq_nBAh9F_eE" + }, + "outputs": [], + "source": [ + "num_iterations = 300\n", + "\n", + "setting_results = {}\n", + "\n", + "for (sk,sv) in settings.items():\n", + " print(\"\\n\\n\\n Setting {}: noise_intensity={}\\n\\n\\n\".format(sk, sv.get(\"noise_intensity\")))\n", + "\n", + " four_rooms_default_setting.update([(\"noise_intensity\", sv.get(\"noise_intensity\"))])\n", + " game_name = 'mfg_crowd_modelling_2d'\n", + " game_name_setting = 'mfg_crowd_modelling_2d_four_rooms_exploration'\n", + " game = pyspiel.load_game(game_name, four_rooms_default_setting)\n", + " init_policy = None\n", + " #####\n", + " print(\"start_time = \", datetime.datetime.now())\n", + " start_time = time.time()\n", + " print(\"start_time = \", start_time)\n", + " ######\n", + " start_time = time.time()\n", + " fp = fictitious_play.FictitiousPlay(game)\n", + " fp_result = run_algorithm(game, fp, num_iterations, init_policy=init_policy)\n", + " print(\"FP DONE, time = \", time.time() - start_time)\n", + " start_time = time.time()\n", + " md = mirror_descent.MirrorDescent(game, lr=0.05)\n", + " md_result = run_algorithm(game, md, num_iterations, init_policy=init_policy)\n", + " print(\"OMD LR 0.1 DONE, time = \", time.time() - start_time)\n", + " # start_time = time.time()\n", + " # munchausen_md = munchausen_mirror_descent.MunchausenMirrorDescent(game, lr=0.1)\n", + " # munchausen_md_result = run_algorithm(game, munchausen_md, num_iterations, init_policy=init_policy)\n", + " # print(\"MOMD DONE, time = \", time.time() - start_time)\n", + " start_time = time.time()\n", + " fixedp = fixed_point.FixedPoint(game)\n", + " fixedp_result = run_algorithm(game, fixedp, num_iterations, init_policy=init_policy)\n", + " print(\"FixedP DONE, time = \", time.time() - start_time)\n", + " start_time = time.time()\n", + " fpd = fictitious_play.FictitiousPlay(game, lr=0.01)\n", + " fpd_result = run_algorithm(game, fpd, num_iterations, init_policy=init_policy)\n", + " print(\"Damped FP DONE, time = \", time.time() - start_time)\n", + " start_time = time.time()\n", + " fixedp_softmax = fixed_point.FixedPoint(game, temperature=0.1)\n", + " fixedp_softmax_result = run_algorithm(game, fixedp_softmax, num_iterations, init_policy=init_policy)\n", + " print(\"FixedP softmax DONE, time = \", time.time() - start_time)\n", + " start_time = time.time()\n", + " fpsoft = fictitious_play.FictitiousPlay(game, temperature=0.1)\n", + " fpsoft_result = run_algorithm(game, fpsoft, num_iterations, init_policy=init_policy)\n", + " print(\"FP softmax DONE, time = \", time.time() - start_time)\n", + " start_time = time.time()\n", + " bpi = boltzmann_policy_iteration.BoltzmannPolicyIteration(game, lr=0.1)\n", + " bpi_result = run_algorithm(game, bpi, num_iterations, init_policy=init_policy)\n", + " print(\"BPI DONE, time = \", time.time() - start_time)\n", + " ###\n", + " results = {\n", + " 'Fictitious Play': fp_result,\n", + " 'Online Mirror Descent': md_result,\n", + " # 'Munchausen OMD': munchausen_md_result,\n", + " 'Fixed Point': fixedp_result,\n", + " 'Damped Fixed Point': fpd_result,\n", + " 'Softmax Fixed Point': fixedp_softmax_result,\n", + " 'Softmax Fictitious Play': fpsoft_result,\n", + " 'Boltzmann Policy Iteration': bpi_result,\n", + " }\n", + " setting_results.update([(sk, results)])\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G0zxyA1xDFBZ" + }, + "source": [ + "# Plots" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5dOCKlc_UdNf" + }, + "source": [ + "## Save data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YY1kHvSFM7vl" + }, + "outputs": [], + "source": [ + "from colabtools import fileedit\n", + "\n", + "\n", + "# # Downloading the results\n", + "# np.savez('/tmp/{}-setting_results.npz'.format(game_name_setting), setting_results=setting_results)\n", + "# # %download_file /tmp/setting_results.npz\n", + "# fileedit.download_file('/tmp/{}-setting_results.npz'.format(game_name_setting), ephemeral=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GCzslCs0UeU5" + }, + "source": [ + "## Exploitability\n", + "\n", + "It seems that we need to run this piece of code twice in order to have the correct figure size. The first time, the figure is smaller than expected. I suspect that the size is not well defined / fixed in the function display_exploitability." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "j1_SFNYYDIjC" + }, + "outputs": [], + "source": [ + "\n", + "\n", + "\n", + "# Plotting the results\n", + "for (sk, results) in setting_results.items():\n", + " print(\"\\n\\n\\n Setting {}\\n\\n\\n\".format(sk))\n", + " s_sk = settings[sk]\n", + " fig_exploitabilities = display_exploitability(results)\n", + " fig_exploitabilities.savefig('/tmp/{}-noise{}_exploitabilities.pdf'.format(game_name_setting, s_sk.get(\"noise_intensity\")))\n", + " fileedit.download_file('/tmp/{}-noise{}_exploitabilities.pdf'.format(game_name_setting, s_sk.get(\"noise_intensity\")), ephemeral=True)\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a4jYHQmjUgHV" + }, + "source": [ + "## Distributions\n", + "\n", + "In this version, the plotting function has been modified to take extra parameters for the colorbar. If no parameters are given, then we are going to use the smallest and largest values of the distribution (beware that if there is a forbidden state, the smallest value is always 0 because there is no mass on forbidden states)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wSYRJvn6DKRs" + }, + "outputs": [], + "source": [ + "# Plotting the results\n", + "for (sk, results) in setting_results.items():\n", + " print(\"\\n\\n\\n Setting {}\\n\\n\\n\".format(sk))\n", + " s_sk = settings[sk]\n", + " fig_distributions = display_distribution_at_steps(results, range(0, 41, 5), 5, forbidden_states_indicator)\n", + " fig_distributions.savefig('/tmp/{}-noise{}_distributions.pdf'.format(game_name_setting, s_sk.get(\"noise_intensity\")))\n", + " fileedit.download_file('/tmp/{}-noise{}_distributions.pdf'.format(game_name_setting, s_sk.get(\"noise_intensity\")), ephemeral=True)\n", + " plt.show()" + ] + } + ], + "metadata": { + "colab": { + "last_runtime": { + "build_target": "", + "kind": "local" + }, + "private_outputs": true, + "provenance": [ + { + "file_id": "10Pq-xQltz7r9F9ms_rdOcmedUJg4sxPk", + "timestamp": 1703171920274 + }, + { + "file_id": "1D-v9ERt1IYFNe_2stvBbNurI54Gmrm0p", + "timestamp": 1703167054504 + }, + { + "file_id": "1_HpSbPqfF4iehxIzgQ8bpHmEEN0JNx_U", + "timestamp": 1689468319981 + }, + { + "file_id": "1Hyiw9oWOqMrVDBFfzSDOAdt0L9m2jaYp", + "timestamp": 1689453000205 + }, + { + "file_id": "1MsoPiJKf05k7civpTndix3YYgoVOhf4G", + "timestamp": 1688043948116 + } + ], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/colabs/install_open_spiel.ipynb b/scenarios/bargaining/open_spiel/open_spiel/colabs/install_open_spiel.ipynb new file mode 100644 index 0000000..7fca2a3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/colabs/install_open_spiel.ipynb @@ -0,0 +1,117 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "install_open_spiel.ipynb", + "provenance": [ + { + "file_id": "install_open_spiel.ipynb", + "timestamp": 1629100659918 + } + ], + "collapsed_sections": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "id": "odj1Coq5H080" + }, + "source": [ + "#@title ##### License { display-mode: \"form\" }\n", + "# Copyright 2019 DeepMind Technologies Ltd. All rights reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dOOzDGYAZcW3" + }, + "source": [ + "# OpenSpiel\n", + "\n", + "* This Colab gets you started with installing OpenSpiel and its dependencies.\n", + "* OpenSpiel is a framework for reinforcement learning in games.\n", + "* The instructions are adapted from [here](https://github.com/deepmind/open_spiel/blob/master/docs/install.md)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XC6kQBzWahEF" + }, + "source": [ + "## Install" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-2_Vbijh4FlZ" + }, + "source": [ + "Install OpenSpiel via pip:\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "lQc12Xrn4CXU" + }, + "source": [ + "!pip install --upgrade open_spiel" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jUtlXZ8FBnAL" + }, + "source": [ + "# It's play time!" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ewMXCaUw8d9Q" + }, + "source": [ + "import numpy as np\n", + "import pyspiel\n", + "\n", + "game = pyspiel.load_game(\"tic_tac_toe\")\n", + "state = game.new_initial_state()\n", + "\n", + "while not state.is_terminal():\n", + " state.apply_action(np.random.choice(state.legal_actions()))\n", + " print(str(state) + '\\n')" + ], + "execution_count": null, + "outputs": [] + } + ] +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/colabs/rcfr_pytorch.ipynb b/scenarios/bargaining/open_spiel/open_spiel/colabs/rcfr_pytorch.ipynb new file mode 100644 index 0000000..119f59b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/colabs/rcfr_pytorch.ipynb @@ -0,0 +1,288 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pyspiel \n", + "import tensorflow.compat.v1 as tf\n", + "import torch \n", + "import torch.nn as nn\n", + "\n", + "import algorithms.rcfr as rcfr_tf\n", + "import pytorch.rcfr as rcfr_pt\n", + "tf.disable_v2_behavior()\n", + "\n", + "tf.enable_eager_execution()\n", + "\n", + "_GAME = pyspiel.load_game('kuhn_poker')\n", + "_BATCH_SIZE = 12" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def tnsorflow_example(game_name, num_epochs, iterations):\n", + " game = pyspiel.load_game(game_name)\n", + "\n", + " models = []\n", + " for _ in range(game.num_players()):\n", + " models.append(\n", + " rcfr_tf.DeepRcfrModel(\n", + " game,\n", + " num_hidden_layers=1,\n", + " num_hidden_units=13,\n", + " num_hidden_factors=8,\n", + " use_skip_connections=True))\n", + "\n", + " buffer_size = -1\n", + " truncate_negative = False\n", + " bootstrap = False\n", + " if buffer_size > 0:\n", + " solver = rcfr_tf.ReservoirRcfrSolver(\n", + " game,\n", + " models,\n", + " buffer_size,\n", + " truncate_negative=truncate_negative)\n", + " else:\n", + " solver = rcfr_tf.RcfrSolver(\n", + " game,\n", + " models,\n", + " truncate_negative=truncate_negative,\n", + " bootstrap=bootstrap)\n", + "\n", + " def _train_fn(model, data):\n", + " \"\"\"Train `model` on `data`.\"\"\"\n", + " batch_size = 100\n", + " step_size = 0.01\n", + " data = data.shuffle(batch_size * 10)\n", + " data = data.batch(batch_size)\n", + " data = data.repeat(num_epochs)\n", + "\n", + " optimizer = tf.keras.optimizers.Adam(lr=step_size, amsgrad=True)\n", + "\n", + " @tf.function\n", + " def _train():\n", + " for x, y in data:\n", + " optimizer.minimize(\n", + " lambda: tf.losses.huber_loss(y, model(x), delta=0.01), # pylint: disable=cell-var-from-loop\n", + " model.trainable_variables)\n", + "\n", + " _train()\n", + "\n", + " # End of _train_fn\n", + " result = []\n", + " for i in range(iterations):\n", + " solver.evaluate_and_update_policy(_train_fn)\n", + " if i % 10 == 0:\n", + " conv = pyspiel.exploitability(game, solver.average_policy())\n", + " result.append(conv)\n", + " # print(\"Iteration {} exploitability {}\".format(i, conv))\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def pytorch_example(game_name, num_epochs, iterations):\n", + " game = pyspiel.load_game(game_name)\n", + "\n", + " models = []\n", + " for _ in range(game.num_players()):\n", + " models.append(\n", + " rcfr_pt.DeepRcfrModel(\n", + " game,\n", + " num_hidden_layers=1,\n", + " num_hidden_units=13,\n", + " num_hidden_factors=8,\n", + " use_skip_connections=True))\n", + "\n", + " buffer_size = -1\n", + " truncate_negative = False\n", + " bootstrap = False\n", + " if buffer_size > 0:\n", + " solver = rcfr_pt.ReservoirRcfrSolver(\n", + " game,\n", + " models,\n", + " buffer_size,\n", + " truncate_negative=truncate_negative)\n", + " else:\n", + " solver = rcfr_pt.RcfrSolver(\n", + " game,\n", + " models,\n", + " truncate_negative=truncate_negative,\n", + " bootstrap=bootstrap)\n", + "\n", + " def _train_fn(model, data):\n", + " \"\"\"Train `model` on `data`.\"\"\"\n", + " batch_size = 100\n", + " num_epochs = 20\n", + " step_size = 0.01\n", + " \n", + " data = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True)\n", + " loss_fn = nn.SmoothL1Loss()\n", + " optimizer = torch.optim.Adam(model.parameters(), lr=step_size, amsgrad=True)\n", + "\n", + " def _train(model, data):\n", + " for epoch in range(num_epochs):\n", + " for x, y in data:\n", + " optimizer.zero_grad()\n", + " output = model(x)\n", + " loss = loss_fn(output, y)\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " _train(model, data)\n", + "\n", + " # End of _train_fn\n", + " result = []\n", + " for i in range(iterations):\n", + " solver.evaluate_and_update_policy(_train_fn)\n", + " if i % 10 == 0:\n", + " conv = pyspiel.exploitability(game, solver.average_policy())\n", + " result.append(conv)\n", + " # print(\"Iteration {} exploitability {}\".format(i, conv))\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tensorflow_rcfr = []\n", + "pytorch_rcfr = []\n", + "num_epochs, iterations = 20, 100\n", + "for _ in range(10):\n", + " tensorflow_rcfr.append(tnsorflow_example('kuhn_poker', num_epochs, iterations))\n", + " pytorch_rcfr.append(pytorch_example('kuhn_poker', num_epochs, iterations))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "x = [i for i in range(10)]\n", + "tf_exploitability = [sum(tfe) for tfe in zip(*tensorflow_rcfr)]\n", + "pt_exploitability = [sum(pte) for pte in zip(*pytorch_rcfr)]\n", + "\n", + "plt.plot(x, tf_exploitability, label=\"tensorflow\")\n", + "plt.plot(x, pt_exploitability, label=\"pytorch\")\n", + "\n", + "plt.legend()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tensorflow_rcfr = []\n", + "pytorch_rcfr = []\n", + "num_epochs, iterations = 200, 100\n", + "for _ in range(10):\n", + " tensorflow_rcfr.append(tnsorflow_example('kuhn_poker', num_epochs, iterations))\n", + " pytorch_rcfr.append(pytorch_example('kuhn_poker', num_epochs, iterations))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "x = [i for i in range(10)]\n", + "tf_exploitability = [sum(tfe) for tfe in zip(*tensorflow_rcfr)]\n", + "pt_exploitability = [sum(pte) for pte in zip(*pytorch_rcfr)]\n", + "\n", + "plt.plot(x, tf_exploitability, label=\"tensorflow\")\n", + "plt.plot(x, pt_exploitability, label=\"pytorch\")\n", + "\n", + "plt.legend()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tensorflow_rcfr = []\n", + "pytorch_rcfr = []\n", + "num_epochs, iterations = 20, 100\n", + "for _ in range(10):\n", + " tensorflow_rcfr.append(tnsorflow_example('leduc_poker', num_epochs, iterations))\n", + " pytorch_rcfr.append(pytorch_example('leduc_poker', num_epochs, iterations))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "x = [i for i in range(10)]\n", + "tf_exploitability = [sum(tfe) for tfe in zip(*tensorflow_rcfr)]\n", + "pt_exploitability = [sum(pte) for pte in zip(*pytorch_rcfr)]\n", + "\n", + "plt.plot(x, tf_exploitability, label=\"tensorflow\")\n", + "plt.plot(x, pt_exploitability, label=\"pytorch\")\n", + "\n", + "plt.legend()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/colabs/research_dqn_pytorch.ipynb b/scenarios/bargaining/open_spiel/open_spiel/colabs/research_dqn_pytorch.ipynb new file mode 100644 index 0000000..2dd42da --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/colabs/research_dqn_pytorch.ipynb @@ -0,0 +1,712 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From /usr/local/lib/python3.8/dist-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "non-resource variables are not supported in the long term\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import tensorflow.compat.v1 as tf\n", + "\n", + "from open_spiel.python import rl_environment\n", + "from open_spiel.python.pytorch import dqn as dqn_pt\n", + "from open_spiel.python.algorithms import dqn\n", + "from open_spiel.python.algorithms import random_agent" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def eval_against_random_bots(env, trained_agents, random_agents, num_episodes):\n", + " \"\"\"Evaluates `trained_agents` against `random_agents` for `num_episodes`.\"\"\"\n", + " num_players = len(trained_agents)\n", + " sum_episode_rewards = np.zeros(num_players)\n", + " for player_pos in range(num_players):\n", + " cur_agents = random_agents[:]\n", + " cur_agents[player_pos] = trained_agents[player_pos]\n", + " for _ in range(num_episodes):\n", + " time_step = env.reset()\n", + " episode_rewards = 0\n", + " while not time_step.last():\n", + " player_id = time_step.observations[\"current_player\"]\n", + " if env.is_turn_based:\n", + " agent_output = cur_agents[player_id].step(\n", + " time_step, is_evaluation=True)\n", + " action_list = [agent_output.action]\n", + " else:\n", + " agents_output = [\n", + " agent.step(time_step, is_evaluation=True) for agent in cur_agents\n", + " ]\n", + " action_list = [agent_output.action for agent_output in agents_output]\n", + " time_step = env.step(action_list)\n", + " episode_rewards += time_step.rewards[player_pos]\n", + " sum_episode_rewards[player_pos] += episode_rewards\n", + " return sum_episode_rewards / num_episodes" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def pt_main(game,\n", + " config,\n", + " checkpoint_dir,\n", + " num_train_episodes,\n", + " eval_every,\n", + " hidden_layers_sizes,\n", + " replay_buffer_capacity,\n", + " batch_size):\n", + " num_players = 2\n", + "\n", + " env = rl_environment.Environment(game, **config)\n", + " info_state_size = env.observation_spec()[\"info_state\"][0]\n", + " num_actions = env.action_spec()[\"num_actions\"]\n", + "\n", + " # random agents for evaluation\n", + " random_agents = [\n", + " random_agent.RandomAgent(player_id=idx, num_actions=num_actions)\n", + " for idx in range(num_players)\n", + " ]\n", + "\n", + "\n", + " hidden_layers_sizes = [int(l) for l in hidden_layers_sizes]\n", + " # pylint: disable=g-complex-comprehension\n", + " agents = [\n", + " dqn_pt.DQN(\n", + " player_id=idx,\n", + " state_representation_size=info_state_size,\n", + " num_actions=num_actions,\n", + " hidden_layers_sizes=hidden_layers_sizes,\n", + " replay_buffer_capacity=replay_buffer_capacity,\n", + " batch_size=batch_size) for idx in range(num_players)\n", + " ]\n", + " result = []\n", + " for ep in range(num_train_episodes):\n", + " if (ep + 1) % eval_every == 0:\n", + " r_mean = eval_against_random_bots(env, agents, random_agents, 1000)\n", + " result.append(r_mean)\n", + " print(\"[%s] Mean episode rewards %s\" %(ep + 1, r_mean))\n", + "\n", + " time_step = env.reset()\n", + " while not time_step.last():\n", + " player_id = time_step.observations[\"current_player\"]\n", + " if env.is_turn_based:\n", + " agent_output = agents[player_id].step(time_step)\n", + " action_list = [agent_output.action]\n", + " else:\n", + " agents_output = [agent.step(time_step) for agent in agents]\n", + " action_list = [agent_output.action for agent_output in agents_output]\n", + " time_step = env.step(action_list)\n", + "\n", + " # Episode is over, step all agents with final info state.\n", + " for agent in agents:\n", + " agent.step(time_step)\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def tf_main(game,\n", + " config,\n", + " checkpoint_dir,\n", + " num_train_episodes,\n", + " eval_every,\n", + " hidden_layers_sizes,\n", + " replay_buffer_capacity,batch_size):\n", + " num_players = 2\n", + "\n", + " env = rl_environment.Environment(game, **config)\n", + " info_state_size = env.observation_spec()[\"info_state\"][0]\n", + " num_actions = env.action_spec()[\"num_actions\"]\n", + "\n", + " # random agents for evaluation\n", + " random_agents = [\n", + " random_agent.RandomAgent(player_id=idx, num_actions=num_actions)\n", + " for idx in range(num_players)\n", + " ]\n", + "\n", + " with tf.Session() as sess:\n", + " hidden_layers_sizes = [int(l) for l in hidden_layers_sizes]\n", + " # pylint: disable=g-complex-comprehension\n", + " agents = [\n", + " dqn.DQN(\n", + " session=sess,\n", + " player_id=idx,\n", + " state_representation_size=info_state_size,\n", + " num_actions=num_actions,\n", + " hidden_layers_sizes=hidden_layers_sizes,\n", + " replay_buffer_capacity=replay_buffer_capacity,\n", + " batch_size=batch_size) for idx in range(num_players)\n", + " ]\n", + " saver = tf.train.Saver()\n", + " sess.run(tf.global_variables_initializer())\n", + " \n", + " result_tf = []\n", + " for ep in range(num_train_episodes):\n", + " if (ep + 1) % eval_every == 0:\n", + " r_mean = eval_against_random_bots(env, agents, random_agents, 1000)\n", + " result_tf.append(r_mean)\n", + " print(\"[%s] Mean episode rewards %s\" %(ep + 1, r_mean))\n", + "\n", + " time_step = env.reset()\n", + " while not time_step.last():\n", + " player_id = time_step.observations[\"current_player\"]\n", + " if env.is_turn_based:\n", + " agent_output = agents[player_id].step(time_step)\n", + " action_list = [agent_output.action]\n", + " else:\n", + " agents_output = [agent.step(time_step) for agent in agents]\n", + " action_list = [agent_output.action for agent_output in agents_output]\n", + " time_step = env.step(action_list)\n", + "\n", + " # Episode is over, step all agents with final info state.\n", + " for agent in agents:\n", + " agent.step(time_step)\n", + " return result_tf" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "checkpoint_dir = \"/tmp/dqn_test\"\n", + "num_train_episodes = 10000\n", + "eval_every = 100\n", + "\n", + "hidden_layers_sizes = [64, 64]\n", + "replay_buffer_capacity = int(1e5)\n", + "batch_size = 32" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# BREAKTHROUGH" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "game = \"breakthrough\"\n", + "config = {\"columns\": 5, \"rows\": 5}" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[100] Mean episode rewards [0.396 0.44 ]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/torch/autograd/__init__.py:130: UserWarning: CUDA initialization: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx (Triggered internally at /pytorch/c10/cuda/CUDAFunctions.cpp:100.)\n", + " Variable._execution_engine.run_backward(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[200] Mean episode rewards [0.396 0.472]\n", + "[300] Mean episode rewards [0.522 0.49 ]\n", + "[400] Mean episode rewards [0.664 0.572]\n", + "[500] Mean episode rewards [0.676 0.578]\n", + "[600] Mean episode rewards [0.686 0.578]\n", + "[700] Mean episode rewards [0.72 0.484]\n", + "[800] Mean episode rewards [0.772 0.432]\n", + "[900] Mean episode rewards [0.664 0.384]\n", + "[1000] Mean episode rewards [0.616 0.332]\n", + "[1100] Mean episode rewards [0.646 0.338]\n", + "[1200] Mean episode rewards [0.586 0.348]\n", + "[1300] Mean episode rewards [0.512 0.328]\n", + "[1400] Mean episode rewards [0.474 0.324]\n", + "[1500] Mean episode rewards [0.416 0.316]\n", + "[1600] Mean episode rewards [0.548 0.266]\n", + "[1700] Mean episode rewards [0.64 0.174]\n", + "[1800] Mean episode rewards [0.622 0.226]\n", + "[1900] Mean episode rewards [0.524 0.3 ]\n", + "[2000] Mean episode rewards [0.568 0.132]\n", + "[2100] Mean episode rewards [0.544 0.14 ]\n", + "[2200] Mean episode rewards [0.698 0.06 ]\n", + "[2300] Mean episode rewards [0.696 0.188]\n", + "[2400] Mean episode rewards [0.726 0.346]\n", + "[2500] Mean episode rewards [0.792 0.404]\n", + "[2600] Mean episode rewards [0.876 0.512]\n", + "[2700] Mean episode rewards [0.902 0.464]\n", + "[2800] Mean episode rewards [0.802 0.444]\n", + "[2900] Mean episode rewards [0.866 0.684]\n", + "[3000] Mean episode rewards [0.884 0.654]\n", + "[3100] Mean episode rewards [0.822 0.626]\n", + "[3200] Mean episode rewards [0.836 0.716]\n", + "[3300] Mean episode rewards [0.76 0.466]\n", + "[3400] Mean episode rewards [0.662 0.708]\n", + "[3500] Mean episode rewards [0.752 0.782]\n", + "[3600] Mean episode rewards [0.648 0.662]\n", + "[3700] Mean episode rewards [0.832 0.754]\n", + "[3800] Mean episode rewards [0.794 0.792]\n", + "[3900] Mean episode rewards [0.732 0.724]\n", + "[4000] Mean episode rewards [0.882 0.648]\n", + "[4100] Mean episode rewards [0.828 0.566]\n", + "[4200] Mean episode rewards [0.904 0.654]\n", + "[4300] Mean episode rewards [0.882 0.434]\n", + "[4400] Mean episode rewards [0.886 0.636]\n", + "[4500] Mean episode rewards [0.914 0.728]\n", + "[4600] Mean episode rewards [0.954 0.712]\n", + "[4700] Mean episode rewards [0.926 0.656]\n", + "[4800] Mean episode rewards [0.888 0.78 ]\n", + "[4900] Mean episode rewards [0.93 0.77]\n", + "[5000] Mean episode rewards [0.95 0.764]\n", + "[5100] Mean episode rewards [0.944 0.848]\n", + "[5200] Mean episode rewards [0.978 0.642]\n", + "[5300] Mean episode rewards [0.928 0.948]\n", + "[5400] Mean episode rewards [0.952 0.804]\n", + "[5500] Mean episode rewards [0.976 0.928]\n", + "[5600] Mean episode rewards [0.98 0.916]\n", + "[5700] Mean episode rewards [0.952 0.924]\n", + "[5800] Mean episode rewards [0.962 0.94 ]\n", + "[5900] Mean episode rewards [0.946 0.948]\n", + "[6000] Mean episode rewards [0.958 0.914]\n", + "[6100] Mean episode rewards [0.936 0.962]\n", + "[6200] Mean episode rewards [0.95 0.962]\n", + "[6300] Mean episode rewards [0.972 0.962]\n", + "[6400] Mean episode rewards [0.91 0.952]\n", + "[6500] Mean episode rewards [0.956 0.956]\n", + "[6600] Mean episode rewards [0.976 0.932]\n", + "[6700] Mean episode rewards [0.968 0.948]\n", + "[6800] Mean episode rewards [0.98 0.946]\n", + "[6900] Mean episode rewards [0.976 0.952]\n", + "[7000] Mean episode rewards [0.982 0.95 ]\n", + "[7100] Mean episode rewards [0.988 0.956]\n", + "[7200] Mean episode rewards [0.984 0.948]\n", + "[7300] Mean episode rewards [0.968 0.96 ]\n", + "[7400] Mean episode rewards [0.978 0.97 ]\n", + "[7500] Mean episode rewards [0.96 0.942]\n", + "[7600] Mean episode rewards [0.966 0.968]\n", + "[7700] Mean episode rewards [0.956 0.948]\n", + "[7800] Mean episode rewards [0.976 0.962]\n", + "[7900] Mean episode rewards [0.958 0.964]\n", + "[8000] Mean episode rewards [0.966 0.942]\n", + "[8100] Mean episode rewards [0.934 0.948]\n", + "[8200] Mean episode rewards [0.95 0.952]\n", + "[8300] Mean episode rewards [0.946 0.958]\n", + "[8400] Mean episode rewards [0.974 0.94 ]\n", + "[8500] Mean episode rewards [0.94 0.934]\n", + "[8600] Mean episode rewards [0.958 0.952]\n", + "[8700] Mean episode rewards [0.93 0.966]\n", + "[8800] Mean episode rewards [0.968 0.94 ]\n", + "[8900] Mean episode rewards [0.962 0.942]\n", + "[9000] Mean episode rewards [0.946 0.95 ]\n", + "[9100] Mean episode rewards [0.968 0.938]\n", + "[9200] Mean episode rewards [0.962 0.95 ]\n", + "[9300] Mean episode rewards [0.976 0.94 ]\n", + "[9400] Mean episode rewards [0.98 0.948]\n", + "[9500] Mean episode rewards [0.964 0.934]\n", + "[9600] Mean episode rewards [0.97 0.922]\n", + "[9700] Mean episode rewards [0.972 0.936]\n", + "[9800] Mean episode rewards [0.966 0.932]\n", + "[9900] Mean episode rewards [0.974 0.94 ]\n", + "[10000] Mean episode rewards [0.966 0.928]\n" + ] + } + ], + "source": [ + "pt_result = pt_main(game,\n", + " config,\n", + " checkpoint_dir,\n", + " num_train_episodes,\n", + " eval_every,\n", + " hidden_layers_sizes,\n", + " replay_buffer_capacity,batch_size)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "ep = [x for x in range(len(pt_result))]\n", + "pt_r_mean0 = [y[0] for y in pt_result]\n", + "pt_r_mean1 = [y[1] for y in pt_result]\n", + "\n", + "plt.plot(ep,pt_r_mean0, c='red')\n", + "plt.plot(ep,pt_r_mean1, c='blue')\n", + "plt.xlabel('Episode')\n", + "plt.ylabel('Mean episode rewards')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[100] Mean episode rewards [0.668 0.166]\n", + "[200] Mean episode rewards [0.582 0.026]\n", + "[300] Mean episode rewards [ 0.674 -0.1 ]\n", + "[400] Mean episode rewards [ 0.556 -0.128]\n", + "[500] Mean episode rewards [ 0.446 -0.142]\n", + "[600] Mean episode rewards [ 0.322 -0.142]\n", + "[700] Mean episode rewards [ 0.23 -0.214]\n", + "[800] Mean episode rewards [ 0.292 -0.236]\n", + "[900] Mean episode rewards [ 0.28 -0.298]\n", + "[1000] Mean episode rewards [ 0.224 -0.298]\n", + "[1100] Mean episode rewards [ 0.238 -0.306]\n", + "[1200] Mean episode rewards [ 0.224 -0.37 ]\n", + "[1300] Mean episode rewards [ 0.18 -0.404]\n", + "[1400] Mean episode rewards [ 0.24 -0.388]\n", + "[1500] Mean episode rewards [ 0.212 -0.36 ]\n", + "[1600] Mean episode rewards [ 0.29 -0.302]\n", + "[1700] Mean episode rewards [ 0.354 -0.37 ]\n", + "[1800] Mean episode rewards [ 0.524 -0.33 ]\n", + "[1900] Mean episode rewards [ 0.546 -0.242]\n", + "[2000] Mean episode rewards [ 0.65 -0.162]\n", + "[2100] Mean episode rewards [ 0.574 -0.158]\n", + "[2200] Mean episode rewards [ 0.512 -0.068]\n", + "[2300] Mean episode rewards [ 0.488 -0.204]\n", + "[2400] Mean episode rewards [ 0.382 -0.004]\n", + "[2500] Mean episode rewards [ 0.164 -0.006]\n", + "[2600] Mean episode rewards [0.244 0.076]\n", + "[2700] Mean episode rewards [0.332 0.21 ]\n", + "[2800] Mean episode rewards [0.246 0.26 ]\n", + "[2900] Mean episode rewards [0.23 0.572]\n", + "[3000] Mean episode rewards [0.47 0.424]\n", + "[3100] Mean episode rewards [0.524 0.35 ]\n", + "[3200] Mean episode rewards [0.566 0.448]\n", + "[3300] Mean episode rewards [0.494 0.396]\n", + "[3400] Mean episode rewards [0.644 0.412]\n", + "[3500] Mean episode rewards [0.666 0.606]\n", + "[3600] Mean episode rewards [0.618 0.528]\n", + "[3700] Mean episode rewards [0.676 0.734]\n", + "[3800] Mean episode rewards [0.682 0.668]\n", + "[3900] Mean episode rewards [0.794 0.784]\n", + "[4000] Mean episode rewards [0.86 0.68]\n", + "[4100] Mean episode rewards [0.768 0.82 ]\n", + "[4200] Mean episode rewards [0.854 0.754]\n", + "[4300] Mean episode rewards [0.912 0.768]\n", + "[4400] Mean episode rewards [0.946 0.832]\n", + "[4500] Mean episode rewards [0.918 0.744]\n", + "[4600] Mean episode rewards [0.934 0.778]\n", + "[4700] Mean episode rewards [0.95 0.792]\n", + "[4800] Mean episode rewards [0.88 0.79]\n", + "[4900] Mean episode rewards [0.956 0.83 ]\n", + "[5000] Mean episode rewards [0.934 0.838]\n", + "[5100] Mean episode rewards [0.948 0.882]\n", + "[5200] Mean episode rewards [0.936 0.828]\n", + "[5300] Mean episode rewards [0.906 0.848]\n", + "[5400] Mean episode rewards [0.942 0.836]\n", + "[5500] Mean episode rewards [0.94 0.876]\n", + "[5600] Mean episode rewards [0.944 0.866]\n", + "[5700] Mean episode rewards [0.954 0.868]\n", + "[5800] Mean episode rewards [0.954 0.856]\n", + "[5900] Mean episode rewards [0.95 0.86]\n", + "[6000] Mean episode rewards [0.956 0.826]\n", + "[6100] Mean episode rewards [0.938 0.888]\n", + "[6200] Mean episode rewards [0.964 0.892]\n", + "[6300] Mean episode rewards [0.956 0.902]\n", + "[6400] Mean episode rewards [0.938 0.88 ]\n", + "[6500] Mean episode rewards [0.972 0.854]\n", + "[6600] Mean episode rewards [0.942 0.844]\n", + "[6700] Mean episode rewards [0.936 0.868]\n", + "[6800] Mean episode rewards [0.952 0.878]\n", + "[6900] Mean episode rewards [0.944 0.904]\n", + "[7000] Mean episode rewards [0.96 0.932]\n", + "[7100] Mean episode rewards [0.954 0.892]\n", + "[7200] Mean episode rewards [0.948 0.944]\n", + "[7300] Mean episode rewards [0.968 0.902]\n", + "[7400] Mean episode rewards [0.936 0.898]\n", + "[7500] Mean episode rewards [0.966 0.898]\n", + "[7600] Mean episode rewards [0.954 0.908]\n", + "[7700] Mean episode rewards [0.974 0.902]\n", + "[7800] Mean episode rewards [0.966 0.888]\n", + "[7900] Mean episode rewards [0.966 0.888]\n", + "[8000] Mean episode rewards [0.954 0.914]\n", + "[8100] Mean episode rewards [0.962 0.928]\n", + "[8200] Mean episode rewards [0.954 0.924]\n", + "[8300] Mean episode rewards [0.92 0.902]\n", + "[8400] Mean episode rewards [0.932 0.936]\n", + "[8500] Mean episode rewards [0.964 0.908]\n", + "[8600] Mean episode rewards [0.956 0.926]\n", + "[8700] Mean episode rewards [0.932 0.916]\n", + "[8800] Mean episode rewards [0.938 0.918]\n", + "[8900] Mean episode rewards [0.926 0.948]\n", + "[9000] Mean episode rewards [0.912 0.944]\n", + "[9100] Mean episode rewards [0.926 0.944]\n", + "[9200] Mean episode rewards [0.916 0.952]\n", + "[9300] Mean episode rewards [0.926 0.94 ]\n", + "[9400] Mean episode rewards [0.884 0.906]\n", + "[9500] Mean episode rewards [0.914 0.922]\n", + "[9600] Mean episode rewards [0.918 0.922]\n", + "[9700] Mean episode rewards [0.928 0.936]\n", + "[9800] Mean episode rewards [0.946 0.934]\n", + "[9900] Mean episode rewards [0.926 0.924]\n", + "[10000] Mean episode rewards [0.926 0.918]\n" + ] + } + ], + "source": [ + "result_tf = tf_main(game,\n", + " config,\n", + " checkpoint_dir,\n", + " num_train_episodes,\n", + " eval_every,\n", + " hidden_layers_sizes,\n", + " replay_buffer_capacity,batch_size)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ep = [x for x in range(len(result_tf))]\n", + "tf_r_mean0 = [y[0] for y in result_tf]\n", + "tf_r_mean1 = [y[1] for y in result_tf]\n", + "\n", + "plt.plot(ep,tf_r_mean0, c='red')\n", + "plt.plot(ep,tf_r_mean1, c='blue')\n", + "plt.xlabel('Episode')\n", + "plt.ylabel('Mean episode rewards')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(ep,pt_r_mean0, c='skyblue')\n", + "plt.plot(ep,pt_r_mean1, c='skyblue', linestyle='dashed')\n", + "plt.plot(ep,tf_r_mean0, c='pink')\n", + "plt.plot(ep,tf_r_mean1, c='pink', linestyle='dashed')\n", + "plt.xlabel('Episode')\n", + "plt.ylabel('Mean episode rewards')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TIC-TAC-TOE" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "game = \"tic_tac_toe\"\n", + "config = {}\n", + "num_train_episodes = 20000\n", + "eval_every = 1000" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1000] Mean episode rewards [0.823 0.112]\n", + "[2000] Mean episode rewards [0.769 0.089]\n", + "[3000] Mean episode rewards [0.883 0.161]\n", + "[4000] Mean episode rewards [0.723 0.229]\n", + "[5000] Mean episode rewards [0.424 0.125]\n", + "[6000] Mean episode rewards [0.54 0.246]\n", + "[7000] Mean episode rewards [0.637 0.244]\n", + "[8000] Mean episode rewards [0.794 0.236]\n", + "[9000] Mean episode rewards [0.643 0.148]\n", + "[10000] Mean episode rewards [0.813 0.148]\n", + "[11000] Mean episode rewards [0.626 0.17 ]\n", + "[12000] Mean episode rewards [0.622 0.188]\n", + "[13000] Mean episode rewards [0.874 0.244]\n", + "[14000] Mean episode rewards [0.856 0.183]\n", + "[15000] Mean episode rewards [0.825 0.28 ]\n", + "[16000] Mean episode rewards [0.82 0.361]\n", + "[17000] Mean episode rewards [0.847 0.241]\n", + "[18000] Mean episode rewards [0.869 0.296]\n", + "[19000] Mean episode rewards [0.904 0.261]\n", + "[20000] Mean episode rewards [0.858 0.277]\n" + ] + } + ], + "source": [ + "pt_result = pt_main(game,\n", + " config,\n", + " checkpoint_dir,\n", + " num_train_episodes,\n", + " eval_every,\n", + " hidden_layers_sizes,\n", + " replay_buffer_capacity,batch_size)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "ep = [x for x in range(len(pt_result))]\n", + "pt_r_mean0 = [y[0] for y in pt_result]\n", + "pt_r_mean1 = [y[1] for y in pt_result]\n", + "\n", + "plt.plot(ep,pt_r_mean0, c='red')\n", + "plt.plot(ep,pt_r_mean1, c='blue')\n", + "plt.xlabel('Episode')\n", + "plt.ylabel('Mean episode rewards')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1000] Mean episode rewards [ 0.493 -0.204]\n", + "[2000] Mean episode rewards [ 0.346 -0.244]\n", + "[3000] Mean episode rewards [ 0.537 -0.054]\n", + "[4000] Mean episode rewards [ 0.464 -0.059]\n", + "[5000] Mean episode rewards [0.46 0.144]\n", + "[6000] Mean episode rewards [0.442 0.08 ]\n", + "[7000] Mean episode rewards [0.606 0.068]\n", + "[8000] Mean episode rewards [0.447 0.165]\n", + "[9000] Mean episode rewards [0.702 0.196]\n", + "[10000] Mean episode rewards [0.694 0.227]\n", + "[11000] Mean episode rewards [0.757 0.213]\n", + "[12000] Mean episode rewards [0.829 0.149]\n", + "[13000] Mean episode rewards [0.733 0.186]\n", + "[14000] Mean episode rewards [0.8 0.318]\n", + "[15000] Mean episode rewards [0.849 0.308]\n", + "[16000] Mean episode rewards [0.789 0.198]\n", + "[17000] Mean episode rewards [0.825 0.353]\n", + "[18000] Mean episode rewards [0.804 0.367]\n", + "[19000] Mean episode rewards [0.827 0.355]\n", + "[20000] Mean episode rewards [0.796 0.368]\n" + ] + } + ], + "source": [ + "result_tf = tf_main(game,\n", + " config,\n", + " checkpoint_dir,\n", + " num_train_episodes,\n", + " eval_every,\n", + " hidden_layers_sizes,\n", + " replay_buffer_capacity,batch_size)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ep = [x for x in range(len(result_tf))]\n", + "tf_r_mean0 = [y[0] for y in result_tf]\n", + "tf_r_mean1 = [y[1] for y in result_tf]\n", + "\n", + "plt.plot(ep,tf_r_mean0, c='red')\n", + "plt.plot(ep,tf_r_mean1, c='blue')\n", + "plt.xlabel('Episode')\n", + "plt.ylabel('Mean episode rewards')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(ep,pt_r_mean0, c='skyblue')\n", + "plt.plot(ep,pt_r_mean1, c='skyblue', linestyle='dashed')\n", + "plt.plot(ep,tf_r_mean0, c='pink')\n", + "plt.plot(ep,tf_r_mean1, c='pink', linestyle='dashed')\n", + "plt.xlabel('Episode')\n", + "plt.ylabel('Mean episode rewards')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/colabs/research_nfsp_tf_pt.ipynb b/scenarios/bargaining/open_spiel/open_spiel/colabs/research_nfsp_tf_pt.ipynb new file mode 100644 index 0000000..de95ca0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/colabs/research_nfsp_tf_pt.ipynb @@ -0,0 +1,307 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from absl import logging\n", + "import tensorflow.compat.v1 as tf\n", + "\n", + "from open_spiel.python import policy\n", + "from open_spiel.python import rl_environment\n", + "from open_spiel.python.algorithms import exploitability\n", + "from open_spiel.python.algorithms import nfsp\n", + "from open_spiel.python.pytorch import nfsp as nfsp_pt\n", + "\n", + "class NFSPPolicies(policy.Policy):\n", + " \"\"\"Joint policy to be evaluated.\"\"\"\n", + "\n", + " def __init__(self, env, nfsp_policies, mode):\n", + " game = env.game\n", + " player_ids = [0, 1]\n", + " super(NFSPPolicies, self).__init__(game, player_ids)\n", + " self._policies = nfsp_policies\n", + " self._mode = mode\n", + " self._obs = {\"info_state\": [None, None], \"legal_actions\": [None, None]}\n", + "\n", + " def action_probabilities(self, state, player_id=None):\n", + " cur_player = state.current_player()\n", + " legal_actions = state.legal_actions(cur_player)\n", + "\n", + " self._obs[\"current_player\"] = cur_player\n", + " self._obs[\"info_state\"][cur_player] = (\n", + " state.information_state_tensor(cur_player))\n", + " self._obs[\"legal_actions\"][cur_player] = legal_actions\n", + "\n", + " info_state = rl_environment.TimeStep(\n", + " observations=self._obs, rewards=None, discounts=None, step_type=None)\n", + "\n", + " with self._policies[cur_player].temp_mode_as(self._mode):\n", + " p = self._policies[cur_player].step(info_state, is_evaluation=True).probs\n", + " prob_dict = {action: p[action] for action in legal_actions}\n", + " return prob_dict\n", + "\n", + "\n", + "def tf_main(game,\n", + " env_config,\n", + " num_train_episodes,\n", + " eval_every,\n", + " hidden_layers_sizes,\n", + " replay_buffer_capacity,\n", + " reservoir_buffer_capacity,\n", + " anticipatory_param):\n", + " env = rl_environment.Environment(game, **env_configs)\n", + " info_state_size = env.observation_spec()[\"info_state\"][0]\n", + " num_actions = env.action_spec()[\"num_actions\"]\n", + "\n", + " hidden_layers_sizes = [int(l) for l in hidden_layers_sizes]\n", + " kwargs = {\n", + " \"replay_buffer_capacity\": replay_buffer_capacity,\n", + " \"epsilon_decay_duration\": num_train_episodes,\n", + " \"epsilon_start\": 0.06,\n", + " \"epsilon_end\": 0.001,\n", + " }\n", + " expl_list = []\n", + " with tf.Session() as sess:\n", + " # pylint: disable=g-complex-comprehension\n", + " agents = [\n", + " nfsp.NFSP(sess, idx, info_state_size, num_actions, hidden_layers_sizes,\n", + " reservoir_buffer_capacity, anticipatory_param,\n", + " **kwargs) for idx in range(num_players)\n", + " ]\n", + " expl_policies_avg = NFSPPolicies(env, agents, nfsp.MODE.average_policy)\n", + "\n", + " sess.run(tf.global_variables_initializer())\n", + " for ep in range(num_train_episodes):\n", + " if (ep + 1) % eval_every == 0:\n", + " losses = [agent.loss for agent in agents]\n", + " print(\"Losses: %s\" %losses)\n", + " expl = exploitability.exploitability(env.game, expl_policies_avg)\n", + " expl_list.append(expl)\n", + " print(\"[%s] Exploitability AVG %s\" %(ep + 1, expl))\n", + " print(\"_____________________________________________\")\n", + "\n", + " time_step = env.reset()\n", + " while not time_step.last():\n", + " player_id = time_step.observations[\"current_player\"]\n", + " agent_output = agents[player_id].step(time_step)\n", + " action_list = [agent_output.action]\n", + " time_step = env.step(action_list)\n", + "\n", + " # Episode is over, step all agents with final info state.\n", + " for agent in agents:\n", + " agent.step(time_step)\n", + " return expl_list\n", + " \n", + "def pt_main(game,\n", + " env_config,\n", + " num_train_episodes,\n", + " eval_every,\n", + " hidden_layers_sizes,\n", + " replay_buffer_capacity,\n", + " reservoir_buffer_capacity,\n", + " anticipatory_param):\n", + " env = rl_environment.Environment(game, **env_configs)\n", + " info_state_size = env.observation_spec()[\"info_state\"][0]\n", + " num_actions = env.action_spec()[\"num_actions\"]\n", + "\n", + " hidden_layers_sizes = [int(l) for l in hidden_layers_sizes]\n", + " kwargs = {\n", + " \"replay_buffer_capacity\": replay_buffer_capacity,\n", + " \"epsilon_decay_duration\": num_train_episodes,\n", + " \"epsilon_start\": 0.06,\n", + " \"epsilon_end\": 0.001,\n", + " }\n", + " expl_list = []\n", + " agents = [\n", + " nfsp_pt.NFSP(idx, info_state_size, num_actions, hidden_layers_sizes,\n", + " reservoir_buffer_capacity, anticipatory_param,\n", + " **kwargs) for idx in range(num_players)\n", + " ]\n", + " expl_policies_avg = NFSPPolicies(env, agents, nfsp_pt.MODE.average_policy) \n", + " for ep in range(num_train_episodes):\n", + " if (ep + 1) % eval_every == 0:\n", + " losses = [agent.loss.item() for agent in agents]\n", + " print(\"Losses: %s\" %losses)\n", + " expl = exploitability.exploitability(env.game, expl_policies_avg)\n", + " expl_list.append(expl)\n", + " print(\"[%s] Exploitability AVG %s\" %(ep + 1, expl))\n", + " print(\"_____________________________________________\") \n", + " time_step = env.reset()\n", + " while not time_step.last():\n", + " player_id = time_step.observations[\"current_player\"]\n", + " agent_output = agents[player_id].step(time_step)\n", + " action_list = [agent_output.action]\n", + " time_step = env.step(action_list) \n", + " # Episode is over, step all agents with final info state.\n", + " for agent in agents:\n", + " agent.step(time_step)\n", + " return expl_list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "game = \"kuhn_poker\"\n", + "num_players = 2\n", + "env_configs = {\"players\": num_players}\n", + "num_train_episodes = int(3e6)\n", + "eval_every = 10000\n", + "hidden_layers_sizes = [128]\n", + "replay_buffer_capacity = int(2e5)\n", + "reservoir_buffer_capacity = int(2e6)\n", + "anticipatory_param = 0.1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tf_kuhn_result = tf_main(game, \n", + " env_configs,\n", + " num_train_episodes,\n", + " eval_every,\n", + " hidden_layers_sizes,\n", + " replay_buffer_capacity,\n", + " reservoir_buffer_capacity,\n", + " anticipatory_param)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pt_kuhn_result = pt_main(game, \n", + " env_configs,\n", + " num_train_episodes,\n", + " eval_every,\n", + " hidden_layers_sizes,\n", + " replay_buffer_capacity,\n", + " reservoir_buffer_capacity,\n", + " anticipatory_param)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "x = [i*1000 for i in range(len(tf_kuhn_result))]\n", + "\n", + "plt.plot(x, tf_kuhn_result, label='tensorflow')\n", + "plt.plot(x, pt_kuhn_result, label='pytorch')\n", + "plt.title('Kuhn Poker')\n", + "plt.xlabel('Episodes')\n", + "plt.ylabel('Exploitability')\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "game = \"leduc_poker\"\n", + "num_players = 2\n", + "env_configs = {\"players\": num_players}\n", + "num_train_episodes = int(3e6)\n", + "eval_every = 100000\n", + "hidden_layers_sizes = [128]\n", + "replay_buffer_capacity = int(2e5)\n", + "reservoir_buffer_capacity = int(2e6)\n", + "anticipatory_param = 0.1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tf_leduc_result = tf_main(game, \n", + " env_configs,\n", + " num_train_episodes,\n", + " eval_every,\n", + " hidden_layers_sizes,\n", + " replay_buffer_capacity,\n", + " reservoir_buffer_capacity,\n", + " anticipatory_param)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pt_leduc_result = pt_main(game, \n", + " env_configs,\n", + " num_train_episodes,\n", + " eval_every,\n", + " hidden_layers_sizes,\n", + " replay_buffer_capacity,\n", + " reservoir_buffer_capacity,\n", + " anticipatory_param)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x = [i * 10000 for i in range(len(tf_leduc_result))]\n", + "\n", + "plt.plot(x, tf_leduc_result, label='tensorflow')\n", + "plt.plot(x, pt_leduc_result, label='pytorch')\n", + "plt.title('Leduc Poker')\n", + "plt.xlabel('Episodes')\n", + "plt.ylabel('Exploitability')\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/colabs/test_universal_poker.ipynb b/scenarios/bargaining/open_spiel/open_spiel/colabs/test_universal_poker.ipynb new file mode 100644 index 0000000..8018ed7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/colabs/test_universal_poker.ipynb @@ -0,0 +1,313 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "v8KR9V4Hy-vw" + }, + "source": [ + "# Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "idfu7sA0vExR" + }, + "outputs": [], + "source": [ + "from __future__ import absolute_import\n", + "from __future__ import division\n", + "from __future__ import print_function\n", + "\n", + "import sys\n", + "assert sys.version_info.major == 3\n", + "import os\n", + "\n", + "add_paths = True\n", + "if add_paths:\n", + " sys.path.insert(0, os.path.join(os.path.abspath(os.getcwd()), '..', '..'))\n", + " sys.path.insert(\n", + " 0,\n", + " os.path.join(os.path.abspath(os.getcwd()), '..', '..', 'build', 'python'))\n", + " import pyspiel\n", + " from pyspiel.universal_poker import load_universal_poker_from_acpc_gamedef\n", + "\n", + "\n", + "from open_spiel.python.algorithms import cfr\n", + "from open_spiel.python.algorithms import exploitability\n", + "from open_spiel.python.algorithms import expected_game_score\n", + "from open_spiel.python.bots import uniform_random\n", + "from open_spiel.python.visualizations import treeviz" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HLXNc0ZCvExt" + }, + "outputs": [], + "source": [ + "games_list = pyspiel.registered_names()\n", + "\n", + "print(\"Registered games:\")\n", + "print(games_list)\n", + "\n", + "game = pyspiel.load_game(\"universal_poker\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vqyfMHs2vEx7" + }, + "outputs": [], + "source": [ + "\"\"\"Test that Python and C++ bots can be called by a C++ algorithm.\"\"\"\n", + "\n", + "from absl.testing import absltest\n", + "import numpy as np\n", + "\n", + "from open_spiel.python.bots import uniform_random\n", + "\n", + "game = pyspiel.load_game(\"leduc_poker\")\n", + "bots = [\n", + " pyspiel.make_uniform_random_bot(0, 1234),\n", + " uniform_random.UniformRandomBot(1, np.random.RandomState(4321)),\n", + "]\n", + "results = np.array([\n", + " pyspiel.evaluate_bots(game.new_initial_state(), bots, iteration)\n", + " for iteration in range(10000)\n", + "])\n", + "leduc_average_results = np.mean(results, axis=0)\n", + "print(leduc_average_results)\n", + "\n", + "game = pyspiel.load_game(\"universal_poker\")\n", + "bots = [\n", + " pyspiel.make_uniform_random_bot(0, 1234),\n", + " uniform_random.UniformRandomBot(1, np.random.RandomState(4321)),\n", + "]\n", + "results = np.array([\n", + " pyspiel.evaluate_bots(game.new_initial_state(), bots, iteration)\n", + " for iteration in range(10000)\n", + "])\n", + "universal_poker_average_results = np.mean(results, axis=0)\n", + "print(universal_poker_average_results)\n", + "\n", + "#np.testing.assert_allclose(universal_poker_average_results, leduc_average_results, atol=0.1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RhI6kVnkvEyE" + }, + "outputs": [], + "source": [ + "universal_poker_kuhn_limit_3p = \"\"\"\\\n", + "GAMEDEF\n", + "limit\n", + "numPlayers = 3\n", + "numRounds = 1\n", + "blind = 1 1 1\n", + "raiseSize = 1\n", + "firstPlayer = 1\n", + "maxRaises = 1\n", + "numSuits = 1\n", + "numRanks = 4\n", + "numHoleCards = 1\n", + "numBoardCards = 0\n", + "END GAMEDEF\n", + "\"\"\"\n", + "\n", + "game = load_universal_poker_from_acpc_gamedef(universal_poker_kuhn_limit_3p)\n", + "str(game)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lpLJhzBEvEyM" + }, + "outputs": [], + "source": [ + "# Compare exloitability for two games\n", + "players = 2\n", + "iterations = 10\n", + "print_freq = 1\n", + "\n", + "def compare_exploitability(game_1, game_2):\n", + " cfr_solver_1 = cfr.CFRSolver(game_1)\n", + " cfr_solver_2 = cfr.CFRSolver(game_2)\n", + " for i in range(iterations):\n", + " cfr_solver_1.evaluate_and_update_policy()\n", + " cfr_solver_2.evaluate_and_update_policy()\n", + " if i % print_freq == 0:\n", + " conv_1 = exploitability.exploitability(game_1,\n", + " cfr_solver_1.average_policy())\n", + " conv_2 = exploitability.exploitability(game_2,\n", + " cfr_solver_2.average_policy())\n", + "\n", + " print(\"Iteration {} exploitability of the {} vs: {}\".format(\n", + " i, conv_1, conv_2))\n", + "\n", + " print(\"Final exploitability is {} vs {}\".format(conv_1, conv_2))\n", + "\n", + "\n", + "game_1 = pyspiel.load_game(\"kuhn_poker\",\n", + " {\"players\": 2})\n", + "\n", + "universal_poker_kuhn_limit_2p = \"\"\"\\\n", + "GAMEDEF\n", + "limit\n", + "numPlayers = 2\n", + "numRounds = 1\n", + "blind = 1 1\n", + "raiseSize = 1\n", + "firstPlayer = 1\n", + "maxRaises = 1\n", + "numSuits = 1\n", + "numRanks = 3\n", + "numHoleCards = 1\n", + "numBoardCards = 0\n", + "END GAMEDEF\n", + "\"\"\"\n", + "game_2 = load_universal_poker_from_acpc_gamedef(universal_poker_kuhn_limit_2p)\n", + "\n", + "compare_exploitability(game_1, game_2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0Zltqy5PNM8P" + }, + "outputs": [], + "source": [ + "game_1 = pyspiel.load_game(\"leduc_poker\",\n", + " {\"players\": 2})\n", + "# Taken verbatim from the linked paper above: \"In Leduc hold'em, the deck\n", + "# consists of two suits with three cards in each suit. There are two rounds.\n", + "# In the first round a single private card is dealt to each player. In the\n", + "# second round a single board card is revealed. There is a two-bet maximum,\n", + "# with raise amounts of 2 and 4 in the first and second round, respectively.\n", + "# Both players start the first round with 1 already in the pot.\n", + "\n", + "universal_poker_leduc_limit_2p = \"\"\"\\\n", + "GAMEDEF\n", + "limit\n", + "numPlayers = 2\n", + "numRounds = 2\n", + "blind = 1 1\n", + "raiseSize = 1 1\n", + "firstPlayer = 1 1\n", + "maxRaises = 2 2\n", + "raiseSize = 2 4\n", + "numSuits = 2\n", + "numRanks = 3\n", + "numHoleCards = 1\n", + "numBoardCards = 0 1\n", + "END GAMEDEF\n", + "\"\"\"\n", + "game_2 = load_universal_poker_from_acpc_gamedef(universal_poker_leduc_limit_2p)\n", + "\n", + "compare_exploitability(game_1, game_2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zk4rz8mvvEyb" + }, + "outputs": [], + "source": [ + "game = \"universal_poker\"\n", + "out = \"/tmp/gametree.png\"\n", + "prog = \"dot\"\n", + "group_infosets = False\n", + "group_terminal = False\n", + "verbose = False\n", + "\n", + "\n", + "def _zero_sum_node_decorator(state):\n", + " \"\"\"Custom node decorator that only shows the return of the first player.\"\"\"\n", + " attrs = treeviz.default_node_decorator(state) # get default attributes\n", + " if state.is_terminal():\n", + " attrs[\"label\"] = str(int(state.returns()[0]))\n", + " return attrs\n", + "\n", + "game = load_universal_poker_from_acpc_gamedef(universal_poker_kuhn_limit_2p)\n", + "game_type = game.get_type()\n", + "\n", + "if game_type.dynamics != pyspiel.GameType.Dynamics.SEQUENTIAL:\n", + " raise ValueError(\"Game must be sequential, not {}\".format(game_type.dynamics))\n", + "\n", + "if (game_type.utility == pyspiel.GameType.Utility.ZERO_SUM and\n", + " game.num_players() == 2):\n", + " gametree = treeviz.GameTree(\n", + " game,\n", + " node_decorator=_zero_sum_node_decorator,\n", + " group_infosets=group_infosets,\n", + " group_terminal=group_terminal)\n", + "else:\n", + " gametree = treeviz.GameTree(game) # use default decorators\n", + "\n", + "if verbose:\n", + " logging.info(\"Game tree:\\n%s\", gametree.to_string())\n", + "\n", + "gametree.draw(out, prog=prog)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4rvvGu65M1jk" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "last_runtime": { + "build_target": "//research/colab/notebook:notebook_backend_py3", + "kind": "private" + }, + "name": "test_universal_poker.ipynb", + "provenance": [ + { + "file_id": "1ZX9X01BBrKZp5EAIEXTLwzxuTbEj0rTJ", + "timestamp": 1575292378817 + } + ] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/README.md b/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/README.md new file mode 100644 index 0000000..83e3e03 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/README.md @@ -0,0 +1,4 @@ +# Paper data + +This directory hosts data for reproducing paper results. Each paper should have +an associated directory, with data contained within. diff --git a/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/pbe_rrps/README.md b/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/pbe_rrps/README.md new file mode 100644 index 0000000..6329127 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/pbe_rrps/README.md @@ -0,0 +1,6 @@ +The `bot_table_file.txt` is a data set described in +[Population-based Evaluation in Repeated RPS as a Benchmark for Multiagent RL](https://arxiv.org/abs/2303.03196) +and parsed by `python/examples/roshambo_population_example.py`. + +It contains a cross-table of the expected values for all possible match-ups +between the 43 RRPS bots, using an average of 1000 games per cell. diff --git a/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/pbe_rrps/bot_table_file.txt b/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/pbe_rrps/bot_table_file.txt new file mode 100644 index 0000000..cd206ea --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/pbe_rrps/bot_table_file.txt @@ -0,0 +1,1849 @@ +('inocencio', 'addshiftbot3', 74.986) +('rotatebot', 'inocencio', -980.042) +('granite', 'copybot', 992.803) +('antiflatbot', 'addshiftbot3', -23.573) +('piedra', 'russrocker4', -40.917) +('halbot', 'piedra', 40.557) +('rockbot', 'sunNervebot', -976.261) +('textbot', 'biopic', -134.751) +('freqbot2', 'iocainebot', -914.753) +('predbot', 'mod1bot', -267.687) +('antiflatbot', 'inocencio', -978.604) +('markov5', 'inocencio', 48.798) +('debruijn81', 'switchbot', -1.321) +('mixed_strategy', 'shofar', -48.243) +('multibot', 'zq_move', -228.091) +('copybot', 'rotatebot', 1.0) +('pibot', 'actr_lag2_decay', 11.487) +('markov5', 'markovbails', -0.414) +('switchalot', 'marble', -149.899) +('greenberg', 'textbot', 122.033) +('pibot', 'zq_move', 15.64) +('foxtrotbot', 'zq_move', -20.823) +('rotatebot', 'sunCrazybot', -541.695) +('biopic', 'sunCrazybot', 505.011) +('mixed_strategy', 'zq_move', -3.045) +('piedra', 'antirotnbot', 39.819) +('multibot', 'actr_lag2_decay', -266.296) +('adddriftbot2', 'pibot', -0.706) +('copybot', 'multibot', -997.0) +('russrocker4', 'mod1bot', 9.249) +('debruijn81', 'markovbails', 10.269) +('copybot', 'addshiftbot3', -8.024) +('textbot', 'rotatebot', -11.0) +('rockbot', 'marble', -998.967) +('predbot', 'debruijn81', -71.182) +('driftbot', 'antiflatbot', 21.703) +('freqbot2', 'marble', -592.626) +('adddriftbot2', 'antirotnbot', -0.658) +('driftbot', 'textbot', 16.593) +('boom', 'markov5', -22.56) +('textbot', 'foxtrotbot', 0.359) +('robertot', 'multibot', 250.421) +('marble', 'copybot', 992.669) +('robertot', 'shofar', -1.559) +('predbot', 'marble', 20.408) +('multibot', 'freqbot2', 999.0) +('driftbot', 'russrocker4', -129.913) +('actr_lag2_decay', 'boom', 27.356) +('copybot', 'boom', -983.203) +('rockbot', 'boom', -997.0) +('markovbails', 'markov5', -1.407) +('textbot', 'russrocker4', -70.283) +('inocencio', 'flatbot3', 144.6) +('r226bot', 'addshiftbot3', 0.072) +('markov5', 'antiflatbot', 989.5) +('shofar', 'iocainebot', 5.55) +('rockbot', 'actr_lag2_decay', -996.832) +('iocainebot', 'sunNervebot', 18.537) +('foxtrotbot', 'sweetrock', -7.741) +('randbot', 'addshiftbot3', 0.272) +('flatbot3', 'flatbot3', -0.175) +('phasenbott', 'rotatebot', 991.493) +('rockbot', 'randbot', -1.072) +('pibot', 'predbot', 5.177) +('driftbot', 'rotatebot', 0.874) +('robertot', 'copybot', 934.836) +('sunCrazybot', 'zq_move', -287.442) +('greenberg', 'copybot', 992.93) +('flatbot3', 'actr_lag2_decay', -127.698) +('driftbot', 'switchbot', -1.036) +('robertot', 'antirotnbot', 51.531) +('copybot', 'biopic', -994.207) +('antirotnbot', 'copybot', 549.998) +('driftbot', 'flatbot3', -0.125) +('markov5', 'actr_lag2_decay', -0.87) +('pibot', 'sunNervebot', 1.163) +('adddriftbot2', 'robertot', -17.538) +('sunCrazybot', 'robertot', -417.373) +('predbot', 'iocainebot', -179.145) +('shofar', 'rockbot', 980.102) +('antirotnbot', 'sweetrock', -40.685) +('antirotnbot', 'robertot', -52.327) +('marble', 'peterbot', 896.913) +('mixed_strategy', 'driftbot', 35.392) +('adddriftbot2', 'predbot', -283.91) +('iocainebot', 'rockbot', 994.041) +('boom', 'peterbot', 421.005) +('markovbails', 'antiflatbot', 989.293) +('phasenbott', 'boom', 23.496) +('rotatebot', 'shofar', -964.207) +('switchalot', 'pibot', 0.948) +('switchalot', 'foxtrotbot', 0.599) +('inocencio', 'mod1bot', -449.134) +('freqbot2', 'peterbot', -434.713) +('foxtrotbot', 'debruijn81', 0.977) +('biopic', 'pibot', -1.328) +('robertot', 'piedra', 39.869) +('piedra', 'granite', -39.981) +('greenberg', 'antiflatbot', 994.021) +('russrocker4', 'russrocker4', -1.075) +('randbot', 'peterbot', 0.905) +('biopic', 'boom', 7.617) +('biopic', 'copybot', 994.309) +('switchbot', 'addshiftbot3', -1.049) +('russrocker4', 'multibot', 262.201) +('biopic', 'addshiftbot3', 58.854) +('phasenbott', 'flatbot3', 84.362) +('boom', 'addshiftbot3', 93.738) +('driftbot', 'adddriftbot2', -0.551) +('randbot', 'boom', -0.138) +('foxtrotbot', 'foxtrotbot', 0.43) +('halbot', 'marble', 242.104) +('inocencio', 'actr_lag2_decay', -262.067) +('piedra', 'multibot', 97.701) +('sunCrazybot', 'r226bot', 48.169) +('markovbails', 'r226bot', 155.955) +('iocainebot', 'halbot', 175.765) +('biopic', 'peterbot', 785.663) +('greenberg', 'inocencio', 282.051) +('multibot', 'randbot', -2.351) +('switchalot', 'multibot', -286.109) +('biopic', 'randbot', -0.518) +('randbot', 'inocencio', 0.1) +('antirotnbot', 'boom', -50.987) +('boom', 'boom', 0.0) +('inocencio', 'phasenbott', -138.173) +('iocainebot', 'boom', 19.331) +('randbot', 'robertot', 0.632) +('rockbot', 'mod1bot', -998.014) +('peterbot', 'granite', -899.573) +('zq_move', 'robertot', -86.489) +('foxtrotbot', 'biopic', -404.004) +('zq_move', 'pibot', -14.351) +('actr_lag2_decay', 'antiflatbot', 994.095) +('rockbot', 'markovbails', -994.021) +('halbot', 'addshiftbot3', 285.056) +('textbot', 'markov5', -28.733) +('antirotnbot', 'switchalot', 315.476) +('addshiftbot3', 'freqbot2', 0.174) +('actr_lag2_decay', 'predbot', 50.951) +('mixed_strategy', 'predbot', -54.997) +('robertot', 'halbot', -34.525) +('switchalot', 'freqbot2', 15.971) +('iocainebot', 'peterbot', 892.548) +('debruijn81', 'sunNervebot', 11.705) +('switchbot', 'sweetrock', -236.905) +('actr_lag2_decay', 'rockbot', 996.855) +('randbot', 'predbot', 0.772) +('addshiftbot3', 'marble', -93.983) +('halbot', 'copybot', 988.74) +('shofar', 'marble', 48.727) +('sunNervebot', 'marble', 83.646) +('addshiftbot3', 'peterbot', -33.928) +('piedra', 'peterbot', 529.826) +('piedra', 'markov5', -35.695) +('shofar', 'addshiftbot3', 14.768) +('predbot', 'biopic', -48.409) +('phasenbott', 'rockbot', 994.633) +('freqbot2', 'robertot', -846.262) +('zq_move', 'predbot', -168.205) +('mod1bot', 'sweetrock', 40.752) +('debruijn81', 'marble', 64.184) +('antirotnbot', 'halbot', -58.058) +('freqbot2', 'adddriftbot2', 1.256) +('predbot', 'rotatebot', 988.359) +('greenberg', 'marble', 193.46) +('mod1bot', 'driftbot', 241.005) +('debruijn81', 'antiflatbot', -109.155) +('marble', 'greenberg', -194.057) +('r226bot', 'r226bot', -1.256) +('antirotnbot', 'marble', -55.246) +('markov5', 'biopic', 5.466) +('markovbails', 'addshiftbot3', 5.038) +('markovbails', 'halbot', 10.779) +('switchbot', 'copybot', 500.06) +('rotatebot', 'boom', -989.57) +('antiflatbot', 'randbot', -0.691) +('peterbot', 'marble', -898.091) +('actr_lag2_decay', 'markovbails', -0.024) +('mixed_strategy', 'sunCrazybot', 166.013) +('markovbails', 'textbot', 27.34) +('rockbot', 'zq_move', -996.729) +('textbot', 'halbot', -155.089) +('rockbot', 'halbot', -998.959) +('copybot', 'sweetrock', -992.488) +('switchbot', 'inocencio', -263.203) +('sunCrazybot', 'driftbot', 51.435) +('granite', 'actr_lag2_decay', -120.592) +('halbot', 'flatbot3', 129.746) +('inocencio', 'driftbot', 95.001) +('flatbot3', 'peterbot', -175.047) +('debruijn81', 'debruijn81', 0.0) +('switchbot', 'randbot', 1.705) +('shofar', 'granite', 47.47) +('antiflatbot', 'sweetrock', -995.145) +('inocencio', 'pibot', -21.862) +('sunNervebot', 'debruijn81', -11.73) +('switchbot', 'adddriftbot2', -0.48) +('sunNervebot', 'antiflatbot', 980.592) +('predbot', 'addshiftbot3', 123.747) +('flatbot3', 'boom', -38.833) +('actr_lag2_decay', 'russrocker4', -9.464) +('mod1bot', 'flatbot3', 91.972) +('textbot', 'granite', -178.163) +('rockbot', 'markov5', -994.102) +('adddriftbot2', 'boom', -39.16) +('greenberg', 'multibot', 307.102) +('antiflatbot', 'multibot', -998.05) +('rotatebot', 'flatbot3', 0.576) +('driftbot', 'boom', -35.968) +('markovbails', 'mod1bot', 6.728) +('russrocker4', 'inocencio', 193.319) +('switchbot', 'phasenbott', -238.177) +('debruijn81', 'piedra', 24.627) +('mixed_strategy', 'switchbot', 145.903) +('actr_lag2_decay', 'adddriftbot2', 8.403) +('sweetrock', 'adddriftbot2', 43.32) +('debruijn81', 'peterbot', -9.877) +('marble', 'inocencio', 563.134) +('rotatebot', 'sweetrock', -992.099) +('mod1bot', 'debruijn81', -34.368) +('russrocker4', 'phasenbott', -529.751) +('driftbot', 'debruijn81', 8.603) +('iocainebot', 'debruijn81', 21.828) +('sweetrock', 'antiflatbot', 994.798) +('greenberg', 'actr_lag2_decay', 234.315) +('marble', 'phasenbott', -221.485) +('sunNervebot', 'russrocker4', -12.265) +('r226bot', 'textbot', -5.226) +('granite', 'shofar', -48.617) +('sunNervebot', 'rockbot', 977.215) +('mod1bot', 'switchalot', 231.318) +('sunCrazybot', 'inocencio', -219.184) +('predbot', 'sunNervebot', 24.996) +('russrocker4', 'switchalot', 124.768) +('peterbot', 'sweetrock', -510.124) +('switchbot', 'freqbot2', 25.819) +('rockbot', 'adddriftbot2', 1.913) +('sunCrazybot', 'rockbot', 963.854) +('markovbails', 'granite', 30.528) +('sweetrock', 'rotatebot', 992.119) +('halbot', 'greenberg', -157.71) +('sunNervebot', 'sunCrazybot', 133.172) +('markovbails', 'sunNervebot', 3.273) +('markovbails', 'marble', 30.599) +('boom', 'sweetrock', 37.85) +('phasenbott', 'freqbot2', 929.019) +('halbot', 'multibot', 206.598) +('mod1bot', 'shofar', -3.194) +('russrocker4', 'driftbot', 132.611) +('pibot', 'inocencio', 21.85) +('granite', 'mod1bot', -84.072) +('piedra', 'predbot', -40.677) +('markov5', 'textbot', 28.942) +('sweetrock', 'actr_lag2_decay', -38.601) +('inocencio', 'robertot', -37.277) +('r226bot', 'mixed_strategy', -373.621) +('rockbot', 'flatbot3', -0.06) +('switchalot', 'halbot', -235.782) +('flatbot3', 'granite', -168.301) +('adddriftbot2', 'driftbot', 0.589) +('rotatebot', 'halbot', -990.693) +('foxtrotbot', 'shofar', 0.743) +('shofar', 'driftbot', 66.647) +('mixed_strategy', 'copybot', 967.076) +('markov5', 'sunNervebot', 2.988) +('zq_move', 'inocencio', 263.952) +('markov5', 'mod1bot', 7.27) +('sunCrazybot', 'pibot', -3.519) +('sunNervebot', 'actr_lag2_decay', -6.683) +('mod1bot', 'antiflatbot', 995.511) +('granite', 'antirotnbot', 55.111) +('predbot', 'pibot', -5.465) +('antirotnbot', 'greenberg', -58.009) +('robertot', 'markovbails', -5.699) +('switchalot', 'r226bot', 0.392) +('russrocker4', 'shofar', 1.468) +('marble', 'marble', -1.052) +('foxtrotbot', 'iocainebot', -349.675) +('switchbot', 'shofar', -442.3) +('rockbot', 'peterbot', -999.12) +('mixed_strategy', 'granite', -16.665) +('addshiftbot3', 'antirotnbot', -10.59) +('antirotnbot', 'flatbot3', 202.873) +('rotatebot', 'markov5', -991.894) +('iocainebot', 'phasenbott', 112.593) +('debruijn81', 'biopic', 14.807) +('greenberg', 'mixed_strategy', 45.237) +('markovbails', 'sunCrazybot', 218.197) +('russrocker4', 'freqbot2', 614.332) +('peterbot', 'textbot', -23.181) +('sweetrock', 'phasenbott', -38.827) +('debruijn81', 'addshiftbot3', 0.803) +('granite', 'switchbot', 244.141) +('switchalot', 'copybot', 320.069) +('mixed_strategy', 'freqbot2', 489.957) +('biopic', 'markov5', -5.219) +('driftbot', 'freqbot2', -7.897) +('biopic', 'debruijn81', -15.576) +('boom', 'inocencio', 162.363) +('adddriftbot2', 'granite', -27.082) +('iocainebot', 'mixed_strategy', 60.377) +('multibot', 'adddriftbot2', 7.514) +('granite', 'phasenbott', -221.125) +('markov5', 'addshiftbot3', 4.028) +('mod1bot', 'iocainebot', -38.208) +('antirotnbot', 'debruijn81', -52.703) +('marble', 'shofar', -48.493) +('pibot', 'driftbot', -5.879) +('biopic', 'shofar', -12.502) +('robertot', 'debruijn81', -13.148) +('shofar', 'antiflatbot', 973.123) +('multibot', 'rotatebot', 997.0) +('predbot', 'driftbot', 260.117) +('markovbails', 'switchalot', 98.824) +('phasenbott', 'inocencio', 140.959) +('markovbails', 'inocencio', 49.751) +('halbot', 'phasenbott', -155.467) +('shofar', 'markov5', -3.0) +('switchbot', 'halbot', -441.035) +('rockbot', 'russrocker4', -999.003) +('marble', 'predbot', -19.475) +('sunNervebot', 'boom', 17.915) +('antiflatbot', 'copybot', -997.71) +('r226bot', 'adddriftbot2', -0.378) +('rotatebot', 'textbot', 11.0) +('textbot', 'antirotnbot', -55.972) +('peterbot', 'robertot', -683.279) +('predbot', 'sunCrazybot', 272.557) +('switchbot', 'switchbot', -2.026) +('greenberg', 'antirotnbot', 58.399) +('iocainebot', 'zq_move', 298.279) +('phasenbott', 'foxtrotbot', 380.192) +('greenberg', 'markovbails', -2.396) +('flatbot3', 'predbot', -86.64) +('antirotnbot', 'markov5', -1.759) +('peterbot', 'copybot', 975.877) +('halbot', 'mixed_strategy', 58.57) +('piedra', 'zq_move', -40.109) +('r226bot', 'multibot', -386.112) +('foxtrotbot', 'antirotnbot', 15.45) +('phasenbott', 'halbot', 156.105) +('textbot', 'mixed_strategy', -158.961) +('robertot', 'rockbot', 997.068) +('shofar', 'flatbot3', 26.657) +('boom', 'halbot', -24.642) +('iocainebot', 'actr_lag2_decay', 60.087) +('addshiftbot3', 'halbot', -285.29) +('sunCrazybot', 'sunCrazybot', 0.161) +('boom', 'r226bot', 380.127) +('copybot', 'switchbot', -499.862) +('copybot', 'rockbot', 1000.0) +('greenberg', 'sunNervebot', 36.804) +('zq_move', 'antiflatbot', 995.22) +('rotatebot', 'foxtrotbot', -0.453) +('adddriftbot2', 'actr_lag2_decay', -9.333) +('markov5', 'granite', 31.311) +('markovbails', 'greenberg', 1.9) +('phasenbott', 'markovbails', 16.492) +('randbot', 'sunCrazybot', -0.87) +('predbot', 'flatbot3', 87.919) +('freqbot2', 'markovbails', -456.1) +('zq_move', 'adddriftbot2', 55.699) +('sunCrazybot', 'shofar', -136.738) +('addshiftbot3', 'rotatebot', 5.71) +('actr_lag2_decay', 'biopic', -31.833) +('iocainebot', 'foxtrotbot', 349.138) +('debruijn81', 'randbot', -1.099) +('predbot', 'freqbot2', 589.059) +('robertot', 'adddriftbot2', 18.887) +('debruijn81', 'greenberg', -301.679) +('addshiftbot3', 'driftbot', -1.423) +('sunNervebot', 'pibot', -1.033) +('randbot', 'foxtrotbot', 0.334) +('sunCrazybot', 'switchalot', -2.263) +('inocencio', 'adddriftbot2', 10.441) +('zq_move', 'halbot', -255.469) +('r226bot', 'markov5', -155.536) +('boom', 'biopic', -6.006) +('robertot', 'driftbot', 71.572) +('mixed_strategy', 'iocainebot', -59.17) +('russrocker4', 'markov5', 1.281) +('sunNervebot', 'inocencio', 37.59) +('piedra', 'textbot', 167.976) +('robertot', 'foxtrotbot', -3.53) +('markov5', 'antirotnbot', 0.864) +('pibot', 'markovbails', -10.86) +('foxtrotbot', 'sunNervebot', 4.676) +('halbot', 'predbot', 48.01) +('debruijn81', 'boom', -2.844) +('rotatebot', 'robertot', -994.398) +('driftbot', 'inocencio', -94.575) +('markov5', 'pibot', 11.858) +('r226bot', 'antiflatbot', 203.245) +('adddriftbot2', 'peterbot', -15.675) +('adddriftbot2', 'sunNervebot', -98.098) +('peterbot', 'zq_move', -906.998) +('randbot', 'driftbot', 0.861) +('boom', 'russrocker4', -27.928) +('switchalot', 'sunCrazybot', -0.544) +('randbot', 'granite', 0.33) +('russrocker4', 'flatbot3', 105.254) +('shofar', 'sunNervebot', 4.031) +('predbot', 'granite', 22.405) +('antiflatbot', 'greenberg', -993.912) +('robertot', 'textbot', 172.194) +('antiflatbot', 'switchbot', 26.511) +('actr_lag2_decay', 'shofar', 3.029) +('sunNervebot', 'copybot', 946.031) +('zq_move', 'debruijn81', -34.458) +('multibot', 'sweetrock', -100.272) +('greenberg', 'rockbot', 998.086) +('actr_lag2_decay', 'switchbot', 247.311) +('halbot', 'mod1bot', -23.379) +('markovbails', 'adddriftbot2', 2.179) +('rotatebot', 'phasenbott', -991.563) +('pibot', 'randbot', -0.197) +('shofar', 'biopic', 12.269) +('russrocker4', 'boom', 28.817) +('piedra', 'flatbot3', 142.157) +('copybot', 'randbot', 0.671) +('rockbot', 'addshiftbot3', -1.537) +('greenberg', 'boom', 22.614) +('foxtrotbot', 'addshiftbot3', 0.272) +('piedra', 'halbot', -40.168) +('mod1bot', 'greenberg', -90.158) +('r226bot', 'sweetrock', -391.386) +('predbot', 'randbot', -1.427) +('shofar', 'inocencio', 149.271) +('driftbot', 'phasenbott', -86.747) +('peterbot', 'iocainebot', -893.259) +('greenberg', 'rotatebot', 996.119) +('russrocker4', 'copybot', 992.347) +('driftbot', 'randbot', 0.459) +('antiflatbot', 'rotatebot', 665.371) +('marble', 'robertot', -55.265) +('biopic', 'zq_move', 98.029) +('antirotnbot', 'textbot', 55.913) +('rotatebot', 'biopic', -995.105) +('pibot', 'pibot', 0.0) +('copybot', 'mixed_strategy', -967.329) +('mixed_strategy', 'antiflatbot', 980.036) +('robertot', 'antiflatbot', 995.54) +('addshiftbot3', 'boom', -93.231) +('flatbot3', 'biopic', -148.989) +('granite', 'greenberg', -193.603) +('switchalot', 'sweetrock', -150.169) +('switchbot', 'piedra', -241.381) +('textbot', 'iocainebot', -108.38) +('freqbot2', 'pibot', -30.0) +('antiflatbot', 'actr_lag2_decay', -993.711) +('adddriftbot2', 'copybot', 1.413) +('antiflatbot', 'granite', -996.134) +('piedra', 'antiflatbot', 994.631) +('flatbot3', 'adddriftbot2', 0.115) +('rotatebot', 'peterbot', -998.121) +('freqbot2', 'antirotnbot', -575.872) +('switchalot', 'randbot', -0.608) +('sunNervebot', 'randbot', 1.357) +('greenberg', 'granite', 190.894) +('flatbot3', 'inocencio', -143.615) +('zq_move', 'copybot', 992.526) +('multibot', 'biopic', -247.977) +('textbot', 'pibot', 81.0) +('flatbot3', 'rotatebot', 0.206) +('zq_move', 'addshiftbot3', 306.45) +('phasenbott', 'zq_move', 264.645) +('rockbot', 'greenberg', -998.136) +('rockbot', 'piedra', -996.64) +('mixed_strategy', 'addshiftbot3', 46.723) +('greenberg', 'pibot', 8.437) +('multibot', 'piedra', -95.667) +('shofar', 'pibot', -3.514) +('predbot', 'copybot', 985.008) +('switchalot', 'piedra', -150.679) +('driftbot', 'granite', -45.844) +('russrocker4', 'peterbot', 927.462) +('sweetrock', 'biopic', -39.159) +('randbot', 'rotatebot', -0.096) +('boom', 'antirotnbot', 50.974) +('sweetrock', 'flatbot3', 137.116) +('inocencio', 'shofar', -144.287) +('russrocker4', 'debruijn81', -33.719) +('markov5', 'iocainebot', -16.306) +('sweetrock', 'antirotnbot', 39.976) +('multibot', 'granite', -285.364) +('addshiftbot3', 'antiflatbot', 24.676) +('textbot', 'switchbot', 0.636) +('multibot', 'peterbot', 345.733) +('antirotnbot', 'mod1bot', -54.594) +('phasenbott', 'sweetrock', 39.178) +('switchalot', 'russrocker4', -124.459) +('zq_move', 'sunCrazybot', 287.223) +('shofar', 'peterbot', 115.846) +('mod1bot', 'boom', 20.644) +('granite', 'robertot', -52.228) +('boom', 'mixed_strategy', 29.01) +('sunNervebot', 'phasenbott', -30.452) +('addshiftbot3', 'multibot', -29.366) +('marble', 'rockbot', 998.986) +('phasenbott', 'shofar', 3.282) +('sunCrazybot', 'multibot', -39.99) +('mixed_strategy', 'phasenbott', -67.347) +('freqbot2', 'flatbot3', -236.248) +('switchbot', 'textbot', -1.029) +('piedra', 'boom', -38.102) +('zq_move', 'peterbot', 907.788) +('sweetrock', 'russrocker4', -39.003) +('markov5', 'multibot', 170.387) +('iocainebot', 'shofar', -5.006) +('switchbot', 'boom', -412.728) +('markovbails', 'boom', 22.876) +('mixed_strategy', 'debruijn81', -57.472) +('russrocker4', 'halbot', -96.655) +('antirotnbot', 'adddriftbot2', 0.929) +('pibot', 'flatbot3', -0.352) +('halbot', 'textbot', 154.466) +('granite', 'pibot', -18.023) +('textbot', 'multibot', -123.0) +('randbot', 'zq_move', 0.344) +('copybot', 'markovbails', -6.28) +('sunNervebot', 'mod1bot', -45.49) +('sweetrock', 'textbot', 164.784) +('sunNervebot', 'addshiftbot3', 94.403) +('iocainebot', 'iocainebot', -1.873) +('boom', 'multibot', 229.05) +('piedra', 'piedra', -1.089) +('piedra', 'actr_lag2_decay', -37.799) +('foxtrotbot', 'rockbot', 0.355) +('predbot', 'multibot', 197.283) +('boom', 'adddriftbot2', 37.165) +('antiflatbot', 'debruijn81', 108.829) +('switchalot', 'switchalot', -1.077) +('rockbot', 'driftbot', 1.458) +('mixed_strategy', 'halbot', -58.768) +('freqbot2', 'addshiftbot3', -0.603) +('boom', 'switchalot', 157.399) +('marble', 'foxtrotbot', 49.715) +('mixed_strategy', 'sweetrock', 7.039) +('biopic', 'multibot', 246.758) +('peterbot', 'driftbot', 28.053) +('adddriftbot2', 'mixed_strategy', -9.253) +('multibot', 'predbot', -197.355) +('boom', 'rotatebot', 989.358) +('antirotnbot', 'piedra', -39.696) +('iocainebot', 'russrocker4', 520.905) +('halbot', 'debruijn81', -65.983) +('driftbot', 'shofar', -66.386) +('granite', 'zq_move', 27.18) +('zq_move', 'russrocker4', -165.709) +('switchbot', 'flatbot3', -0.263) +('markov5', 'boom', 22.707) +('iocainebot', 'greenberg', 0.418) +('inocencio', 'switchbot', 261.977) +('peterbot', 'mod1bot', -579.956) +('sunNervebot', 'greenberg', -34.908) +('actr_lag2_decay', 'granite', 121.419) +('antirotnbot', 'shofar', -43.94) +('switchbot', 'antiflatbot', -25.581) +('predbot', 'foxtrotbot', -18.573) +('predbot', 'antirotnbot', 49.174) +('biopic', 'foxtrotbot', 403.723) +('sweetrock', 'copybot', 992.417) +('sunCrazybot', 'peterbot', 101.162) +('textbot', 'shofar', -110.914) +('sunCrazybot', 'debruijn81', -3.147) +('zq_move', 'foxtrotbot', 21.617) +('sweetrock', 'pibot', -14.019) +('mixed_strategy', 'mixed_strategy', -1.322) +('foxtrotbot', 'driftbot', 0.868) +('inocencio', 'halbot', -243.439) +('sunNervebot', 'sweetrock', 40.36) +('driftbot', 'sweetrock', -24.078) +('rotatebot', 'driftbot', -0.62) +('adddriftbot2', 'adddriftbot2', 0.256) +('biopic', 'sunNervebot', -7.633) +('switchbot', 'switchalot', -0.925) +('shofar', 'switchbot', 442.856) +('piedra', 'marble', -41.441) +('textbot', 'switchalot', -0.203) +('predbot', 'actr_lag2_decay', -50.555) +('markov5', 'russrocker4', -0.935) +('rotatebot', 'debruijn81', -21.0) +('antirotnbot', 'rotatebot', 997.968) +('russrocker4', 'granite', 149.44) +('antiflatbot', 'peterbot', -992.82) +('addshiftbot3', 'rockbot', 2.125) +('antiflatbot', 'zq_move', -994.708) +('switchalot', 'markovbails', -98.013) +('robertot', 'markov5', -6.678) +('driftbot', 'iocainebot', -181.916) +('piedra', 'sunCrazybot', 175.389) +('phasenbott', 'russrocker4', 530.433) +('shofar', 'copybot', 963.272) +('mixed_strategy', 'rockbot', 991.823) +('textbot', 'peterbot', 23.38) +('foxtrotbot', 'marble', -50.117) +('phasenbott', 'antiflatbot', 989.027) +('antiflatbot', 'textbot', -111.985) +('antirotnbot', 'russrocker4', -58.616) +('antirotnbot', 'biopic', -45.083) +('markovbails', 'freqbot2', 454.959) +('foxtrotbot', 'sunCrazybot', 0.721) +('driftbot', 'markovbails', 0.092) +('piedra', 'pibot', -14.905) +('sunNervebot', 'biopic', 7.186) +('antiflatbot', 'flatbot3', 416.917) +('addshiftbot3', 'switchalot', 1.309) +('boom', 'phasenbott', -23.551) +('greenberg', 'randbot', 0.856) +('foxtrotbot', 'robertot', 3.277) +('rotatebot', 'r226bot', 0.762) +('robertot', 'biopic', -23.654) +('sweetrock', 'sweetrock', -0.579) +('predbot', 'r226bot', 396.94) +('freqbot2', 'biopic', -654.456) +('russrocker4', 'switchbot', 247.719) +('textbot', 'debruijn81', -23.0) +('zq_move', 'mixed_strategy', 4.836) +('textbot', 'freqbot2', -185.0) +('antiflatbot', 'shofar', -972.904) +('inocencio', 'antirotnbot', -408.111) +('inocencio', 'inocencio', 0.136) +('debruijn81', 'rotatebot', 21.0) +('phasenbott', 'marble', 221.816) +('sunCrazybot', 'textbot', 8.585) +('mixed_strategy', 'textbot', 158.792) +('debruijn81', 'antirotnbot', 51.567) +('granite', 'inocencio', 574.91) +('granite', 'addshiftbot3', 94.634) +('mixed_strategy', 'piedra', 10.232) +('freqbot2', 'driftbot', 8.108) +('debruijn81', 'robertot', 13.821) +('textbot', 'robertot', -172.426) +('textbot', 'r226bot', 6.365) +('copybot', 'antiflatbot', 997.682) +('sunCrazybot', 'rotatebot', 536.059) +('robertot', 'addshiftbot3', 79.207) +('flatbot3', 'sunNervebot', -43.764) +('antirotnbot', 'phasenbott', -57.805) +('multibot', 'phasenbott', -223.051) +('phasenbott', 'mod1bot', 34.873) +('freqbot2', 'switchalot', -15.696) +('foxtrotbot', 'randbot', -0.37) +('peterbot', 'sunNervebot', -224.797) +('mixed_strategy', 'robertot', -47.875) +('rotatebot', 'multibot', -997.0) +('randbot', 'antirotnbot', 0.155) +('addshiftbot3', 'greenberg', -328.311) +('r226bot', 'piedra', -392.594) +('boom', 'piedra', 37.773) +('freqbot2', 'debruijn81', -128.0) +('multibot', 'marble', -283.166) +('granite', 'multibot', 283.657) +('greenberg', 'addshiftbot3', 328.737) +('textbot', 'marble', -178.161) +('foxtrotbot', 'phasenbott', -378.512) +('markov5', 'freqbot2', 455.231) +('sunCrazybot', 'markovbails', -216.978) +('sunNervebot', 'switchbot', 235.712) +('addshiftbot3', 'markov5', -4.098) +('randbot', 'switchalot', 0.563) +('mod1bot', 'actr_lag2_decay', 3.058) +('sunNervebot', 'zq_move', 170.207) +('russrocker4', 'pibot', -7.233) +('copybot', 'inocencio', -781.895) +('sunNervebot', 'halbot', -8.581) +('sunCrazybot', 'adddriftbot2', 6.209) +('rotatebot', 'pibot', 11.0) +('piedra', 'foxtrotbot', 5.906) +('driftbot', 'rockbot', 0.297) +('switchalot', 'mod1bot', -231.509) +('halbot', 'inocencio', 253.26) +('halbot', 'driftbot', 66.411) +('randbot', 'adddriftbot2', 0.511) +('driftbot', 'predbot', -260.84) +('phasenbott', 'greenberg', -48.322) +('randbot', 'actr_lag2_decay', -0.418) +('inocencio', 'greenberg', -282.251) +('pibot', 'textbot', -81.0) +('mixed_strategy', 'rotatebot', 957.22) +('switchbot', 'foxtrotbot', 0.156) +('flatbot3', 'sweetrock', -139.009) +('freqbot2', 'rotatebot', 0.0) +('halbot', 'switchbot', 440.276) +('piedra', 'switchalot', 150.692) +('antirotnbot', 'r226bot', 153.149) +('r226bot', 'switchalot', -0.253) +('randbot', 'halbot', -1.197) +('markov5', 'markov5', 0.262) +('r226bot', 'flatbot3', -0.688) +('driftbot', 'foxtrotbot', -0.749) +('debruijn81', 'copybot', -1.0) +('markovbails', 'multibot', 170.415) +('marble', 'piedra', 40.022) +('rockbot', 'switchalot', -0.167) +('mod1bot', 'marble', 82.666) +('shofar', 'shofar', 0.083) +('iocainebot', 'switchbot', 222.829) +('inocencio', 'sweetrock', -204.154) +('adddriftbot2', 'antiflatbot', -1.458) +('antirotnbot', 'markovbails', -0.786) +('mixed_strategy', 'marble', -18.939) +('sunCrazybot', 'actr_lag2_decay', -510.687) +('debruijn81', 'rockbot', 0.0) +('markov5', 'r226bot', 156.505) +('flatbot3', 'phasenbott', -84.968) +('peterbot', 'rockbot', 999.108) +('mod1bot', 'robertot', 5.194) +('antirotnbot', 'sunNervebot', -44.468) +('switchalot', 'peterbot', -122.892) +('addshiftbot3', 'biopic', -56.939) +('markov5', 'halbot', 13.578) +('adddriftbot2', 'greenberg', -247.288) +('biopic', 'biopic', 0.67) +('freqbot2', 'mod1bot', -592.258) +('marble', 'addshiftbot3', 92.75) +('switchalot', 'iocainebot', -45.296) +('freqbot2', 'sunCrazybot', -136.343) +('switchbot', 'peterbot', -247.958) +('antirotnbot', 'freqbot2', 574.402) +('switchbot', 'rockbot', -0.665) +('peterbot', 'adddriftbot2', 15.152) +('greenberg', 'driftbot', 263.915) +('russrocker4', 'predbot', 93.775) +('randbot', 'markov5', 0.518) +('marble', 'sunNervebot', -83.045) +('driftbot', 'switchalot', 0.437) +('flatbot3', 'multibot', -159.996) +('shofar', 'mixed_strategy', 48.091) +('piedra', 'inocencio', 216.275) +('iocainebot', 'biopic', 36.492) +('actr_lag2_decay', 'randbot', 0.37) +('pibot', 'switchalot', 0.013) +('sunCrazybot', 'antirotnbot', -66.871) +('r226bot', 'actr_lag2_decay', -308.935) +('piedra', 'freqbot2', 592.155) +('boom', 'robertot', 0.74) +('phasenbott', 'switchbot', 238.118) +('phasenbott', 'randbot', 0.189) +('mixed_strategy', 'antirotnbot', 12.087) +('sweetrock', 'peterbot', 502.027) +('greenberg', 'zq_move', 369.888) +('r226bot', 'rotatebot', -0.84) +('markovbails', 'mixed_strategy', 32.441) +('pibot', 'r226bot', 1.923) +('antiflatbot', 'sunNervebot', -979.541) +('driftbot', 'robertot', -71.103) +('russrocker4', 'markovbails', 2.653) +('predbot', 'inocencio', 472.975) +('debruijn81', 'pibot', -1.0) +('copybot', 'pibot', -22.0) +('peterbot', 'mixed_strategy', -214.448) +('sweetrock', 'r226bot', 391.638) +('r226bot', 'pibot', -3.003) +('markov5', 'randbot', -1.313) +('switchalot', 'greenberg', -278.209) +('piedra', 'greenberg', -39.292) +('freqbot2', 'copybot', -600.0) +('sunNervebot', 'iocainebot', -19.102) +('multibot', 'multibot', 0.0) +('halbot', 'rotatebot', 990.679) +('halbot', 'antiflatbot', 996.73) +('peterbot', 'inocencio', -125.115) +('iocainebot', 'inocencio', 241.425) +('marble', 'debruijn81', -64.492) +('freqbot2', 'freqbot2', 0.0) +('pibot', 'peterbot', 16.925) +('actr_lag2_decay', 'actr_lag2_decay', -1.195) +('adddriftbot2', 'piedra', -40.135) +('rotatebot', 'zq_move', -992.184) +('sweetrock', 'markovbails', -36.595) +('biopic', 'inocencio', 132.579) +('antirotnbot', 'switchbot', 497.3) +('biopic', 'piedra', 39.245) +('adddriftbot2', 'debruijn81', -0.515) +('actr_lag2_decay', 'switchalot', 141.738) +('multibot', 'russrocker4', -265.404) +('mixed_strategy', 'adddriftbot2', 8.915) +('predbot', 'sweetrock', 40.069) +('flatbot3', 'shofar', -26.527) +('russrocker4', 'antiflatbot', 997.549) +('driftbot', 'biopic', -71.626) +('r226bot', 'russrocker4', -308.653) +('piedra', 'mixed_strategy', -8.75) +('markovbails', 'driftbot', -0.522) +('markovbails', 'antirotnbot', 1.18) +('rockbot', 'switchbot', 0.015) +('actr_lag2_decay', 'halbot', 2.408) +('sunCrazybot', 'sweetrock', -188.576) +('sweetrock', 'robertot', -39.824) +('debruijn81', 'flatbot3', 0.248) +('textbot', 'inocencio', -132.5) +('russrocker4', 'randbot', -0.121) +('zq_move', 'greenberg', -368.744) +('markovbails', 'pibot', 10.455) +('boom', 'antiflatbot', 995.078) +('foxtrotbot', 'flatbot3', -0.677) +('mod1bot', 'randbot', -0.05) +('sweetrock', 'piedra', 2.002) +('switchalot', 'mixed_strategy', -71.114) +('halbot', 'iocainebot', -176.229) +('freqbot2', 'sunNervebot', -392.087) +('boom', 'pibot', -8.522) +('zq_move', 'piedra', 39.745) +('sweetrock', 'switchalot', 148.428) +('robertot', 'r226bot', 392.6) +('sunCrazybot', 'halbot', -376.017) +('mod1bot', 'pibot', -6.309) +('halbot', 'actr_lag2_decay', -4.23) +('randbot', 'mixed_strategy', -1.064) +('marble', 'driftbot', 45.902) +('shofar', 'piedra', 38.15) +('boom', 'switchbot', 410.67) +('copybot', 'zq_move', -992.679) +('mod1bot', 'foxtrotbot', -11.726) +('antiflatbot', 'foxtrotbot', 0.244) +('copybot', 'phasenbott', -986.007) +('boom', 'copybot', 983.835) +('phasenbott', 'copybot', 986.05) +('antirotnbot', 'driftbot', 6.688) +('addshiftbot3', 'sunNervebot', -94.016) +('debruijn81', 'markov5', 10.463) +('actr_lag2_decay', 'flatbot3', 128.568) +('halbot', 'zq_move', 254.938) +('foxtrotbot', 'granite', -49.675) +('piedra', 'markovbails', -35.172) +('textbot', 'antiflatbot', 112.001) +('markov5', 'peterbot', 21.161) +('rockbot', 'debruijn81', 0.0) +('markovbails', 'flatbot3', 78.103) +('phasenbott', 'switchalot', 83.403) +('russrocker4', 'biopic', 9.535) +('actr_lag2_decay', 'piedra', 40.055) +('foxtrotbot', 'piedra', -7.805) +('iocainebot', 'antirotnbot', 57.557) +('mod1bot', 'switchbot', 444.73) +('freqbot2', 'phasenbott', -929.5) +('randbot', 'shofar', 0.854) +('robertot', 'robertot', 1.015) +('addshiftbot3', 'mixed_strategy', -45.56) +('phasenbott', 'mixed_strategy', 70.992) +('switchbot', 'rotatebot', -0.782) +('phasenbott', 'peterbot', 922.36) +('robertot', 'flatbot3', 61.97) +('randbot', 'r226bot', 2.04) +('antirotnbot', 'foxtrotbot', -16.926) +('boom', 'markovbails', -22.777) +('textbot', 'sweetrock', -164.545) +('biopic', 'rockbot', 997.507) +('antiflatbot', 'markovbails', -989.257) +('shofar', 'boom', 20.873) +('iocainebot', 'rotatebot', 986.535) +('multibot', 'shofar', -117.231) +('debruijn81', 'inocencio', 38.486) +('markov5', 'piedra', 36.123) +('rockbot', 'antirotnbot', -998.028) +('predbot', 'peterbot', 576.97) +('phasenbott', 'predbot', 130.472) +('greenberg', 'greenberg', 0.992) +('sweetrock', 'sunNervebot', -38.773) +('antirotnbot', 'antiflatbot', 994.231) +('switchbot', 'actr_lag2_decay', -249.548) +('switchbot', 'marble', -244.6) +('greenberg', 'robertot', 28.528) +('switchalot', 'actr_lag2_decay', -144.796) +('greenberg', 'predbot', 240.646) +('sunNervebot', 'flatbot3', 42.951) +('granite', 'halbot', -241.84) +('mixed_strategy', 'russrocker4', -55.644) +('peterbot', 'rotatebot', 998.101) +('switchalot', 'shofar', -171.876) +('inocencio', 'zq_move', -272.622) +('pibot', 'markov5', -13.074) +('copybot', 'robertot', -935.121) +('actr_lag2_decay', 'marble', 121.013) +('flatbot3', 'textbot', -0.114) +('mixed_strategy', 'foxtrotbot', -4.642) +('freqbot2', 'actr_lag2_decay', -574.953) +('zq_move', 'sweetrock', 39.856) +('r226bot', 'predbot', -396.929) +('addshiftbot3', 'pibot', 0.065) +('biopic', 'driftbot', 71.939) +('marble', 'randbot', -0.083) +('granite', 'foxtrotbot', 49.583) +('multibot', 'driftbot', 249.419) +('pibot', 'phasenbott', 6.554) +('multibot', 'halbot', -205.807) +('predbot', 'rockbot', 994.599) +('antiflatbot', 'pibot', 10.962) +('phasenbott', 'granite', 221.777) +('russrocker4', 'antirotnbot', 58.458) +('textbot', 'mod1bot', -134.542) +('iocainebot', 'mod1bot', 39.145) +('predbot', 'phasenbott', -130.389) +('adddriftbot2', 'foxtrotbot', 0.466) +('flatbot3', 'switchbot', -0.419) +('debruijn81', 'mod1bot', 35.206) +('biopic', 'rotatebot', 995.155) +('russrocker4', 'addshiftbot3', 340.883) +('granite', 'russrocker4', -147.534) +('zq_move', 'rockbot', 996.737) +('sunNervebot', 'piedra', 40.035) +('pibot', 'granite', 18.414) +('marble', 'biopic', -126.452) +('antiflatbot', 'phasenbott', -989.145) +('boom', 'freqbot2', 753.0) +('randbot', 'multibot', 1.338) +('copybot', 'antirotnbot', -550.898) +('biopic', 'phasenbott', -31.641) +('debruijn81', 'r226bot', -0.145) +('russrocker4', 'sweetrock', 40.01) +('switchbot', 'mixed_strategy', -141.764) +('debruijn81', 'multibot', 50.0) +('freqbot2', 'zq_move', -592.551) +('flatbot3', 'switchalot', -0.06) +('multibot', 'textbot', 123.0) +('phasenbott', 'biopic', 31.029) +('zq_move', 'rotatebot', 992.339) +('copybot', 'switchalot', -319.107) +('actr_lag2_decay', 'markov5', 0.026) +('pibot', 'addshiftbot3', -0.149) +('mixed_strategy', 'biopic', -46.556) +('mod1bot', 'predbot', 269.727) +('r226bot', 'biopic', -385.01) +('multibot', 'markov5', -170.65) +('russrocker4', 'robertot', 30.74) +('textbot', 'zq_move', -157.625) +('randbot', 'freqbot2', -0.457) +('actr_lag2_decay', 'pibot', -11.448) +('pibot', 'antiflatbot', -10.921) +('debruijn81', 'textbot', 23.0) +('actr_lag2_decay', 'foxtrotbot', -23.604) +('copybot', 'freqbot2', 600.0) +('zq_move', 'switchalot', 155.01) +('granite', 'markovbails', -31.167) +('piedra', 'sunNervebot', -38.505) +('addshiftbot3', 'shofar', -14.423) +('antiflatbot', 'marble', -995.877) +('marble', 'antiflatbot', 995.843) +('flatbot3', 'freqbot2', 236.239) +('russrocker4', 'rotatebot', 993.021) +('switchbot', 'antirotnbot', -497.182) +('zq_move', 'shofar', -60.171) +('adddriftbot2', 'sunCrazybot', -7.418) +('rotatebot', 'russrocker4', -993.018) +('textbot', 'sunCrazybot', -8.919) +('foxtrotbot', 'boom', -1.087) +('randbot', 'piedra', 1.841) +('debruijn81', 'zq_move', 34.169) +('freqbot2', 'greenberg', -997.074) +('randbot', 'greenberg', 0.398) +('sweetrock', 'greenberg', -40.998) +('granite', 'driftbot', 43.585) +('iocainebot', 'driftbot', 179.092) +('driftbot', 'multibot', -249.329) +('greenberg', 'switchbot', 474.015) +('halbot', 'foxtrotbot', 70.381) +('iocainebot', 'r226bot', 376.008) +('sweetrock', 'foxtrotbot', 8.773) +('piedra', 'mod1bot', -38.937) +('shofar', 'predbot', 14.59) +('switchbot', 'debruijn81', 0.373) +('boom', 'rockbot', 997.0) +('mod1bot', 'markovbails', -7.07) +('switchalot', 'switchbot', -0.847) +('rockbot', 'inocencio', -980.026) +('foxtrotbot', 'inocencio', -309.139) +('granite', 'switchalot', 149.213) +('freqbot2', 'textbot', 185.0) +('textbot', 'driftbot', -16.902) +('mod1bot', 'phasenbott', -34.718) +('adddriftbot2', 'halbot', -188.34) +('pibot', 'boom', 9.376) +('switchbot', 'sunCrazybot', -3.853) +('addshiftbot3', 'debruijn81', -0.954) +('peterbot', 'markovbails', -20.511) +('pibot', 'shofar', 3.288) +('boom', 'textbot', 124.624) +('debruijn81', 'foxtrotbot', -0.399) +('debruijn81', 'shofar', 17.0) +('sunNervebot', 'driftbot', 42.554) +('shofar', 'randbot', 0.843) +('predbot', 'russrocker4', -94.06) +('rockbot', 'copybot', -1000.0) +('r226bot', 'marble', -396.742) +('biopic', 'halbot', -15.126) +('robertot', 'mixed_strategy', 48.231) +('multibot', 'robertot', -252.265) +('mod1bot', 'rotatebot', 993.004) +('biopic', 'antirotnbot', 45.471) +('greenberg', 'iocainebot', -1.846) +('debruijn81', 'switchalot', 0.754) +('foxtrotbot', 'actr_lag2_decay', 26.12) +('foxtrotbot', 'pibot', 0.437) +('marble', 'freqbot2', 592.632) +('granite', 'flatbot3', 166.318) +('switchalot', 'rockbot', 0.389) +('phasenbott', 'robertot', 49.344) +('actr_lag2_decay', 'sweetrock', 41.108) +('iocainebot', 'pibot', -1.552) +('robertot', 'randbot', -0.795) +('sweetrock', 'multibot', 101.884) +('rotatebot', 'actr_lag2_decay', -994.283) +('multibot', 'antiflatbot', 997.942) +('zq_move', 'zq_move', 1.981) +('randbot', 'switchbot', 1.115) +('rotatebot', 'randbot', 0.549) +('rockbot', 'rotatebot', 0.0) +('zq_move', 'antirotnbot', 57.59) +('granite', 'adddriftbot2', 25.612) +('multibot', 'greenberg', -307.065) +('rotatebot', 'rotatebot', 0.0) +('robertot', 'sweetrock', 40.492) +('actr_lag2_decay', 'mixed_strategy', 51.744) +('flatbot3', 'foxtrotbot', -0.112) +('marble', 'markovbails', -31.035) +('predbot', 'predbot', -0.011) +('antiflatbot', 'halbot', -996.502) +('inocencio', 'piedra', -229.048) +('switchalot', 'driftbot', -0.119) +('robertot', 'marble', 52.034) +('sweetrock', 'iocainebot', -41.207) +('randbot', 'copybot', -0.288) +('textbot', 'flatbot3', -0.42) +('mixed_strategy', 'greenberg', -44.557) +('flatbot3', 'halbot', -130.022) +('multibot', 'addshiftbot3', 27.877) +('markov5', 'switchbot', 247.007) +('sunNervebot', 'markov5', -3.466) +('freqbot2', 'multibot', -999.0) +('rotatebot', 'marble', -994.322) +('granite', 'marble', 1.174) +('rotatebot', 'mod1bot', -992.96) +('flatbot3', 'robertot', -63.357) +('freqbot2', 'switchbot', -25.423) +('sunNervebot', 'shofar', -2.775) +('marble', 'halbot', -240.988) +('inocencio', 'textbot', 132.22) +('marble', 'textbot', 178.347) +('antiflatbot', 'mixed_strategy', -981.097) +('sunNervebot', 'adddriftbot2', 100.308) +('mixed_strategy', 'peterbot', 209.847) +('granite', 'biopic', -124.679) +('actr_lag2_decay', 'zq_move', 93.685) +('rotatebot', 'rockbot', 0.0) +('markov5', 'shofar', 3.32) +('driftbot', 'greenberg', -263.493) +('inocencio', 'sunCrazybot', 215.446) +('rotatebot', 'antiflatbot', -666.212) +('switchalot', 'predbot', -210.068) +('biopic', 'antiflatbot', 994.523) +('addshiftbot3', 'phasenbott', -324.564) +('switchalot', 'inocencio', -93.802) +('marble', 'boom', -40.14) +('r226bot', 'markovbails', -155.538) +('sunNervebot', 'antirotnbot', 45.165) +('copybot', 'piedra', -992.438) +('mod1bot', 'halbot', 23.846) +('debruijn81', 'iocainebot', -21.083) +('randbot', 'phasenbott', -0.338) +('antirotnbot', 'pibot', -45.158) +('flatbot3', 'rockbot', 0.003) +('switchbot', 'russrocker4', -246.751) +('russrocker4', 'foxtrotbot', 175.617) +('multibot', 'iocainebot', -268.669) +('adddriftbot2', 'sweetrock', -43.466) +('textbot', 'phasenbott', -86.888) +('phasenbott', 'textbot', 86.658) +('flatbot3', 'iocainebot', -194.56) +('multibot', 'foxtrotbot', -4.622) +('predbot', 'markovbails', -20.685) +('granite', 'peterbot', 899.322) +('halbot', 'granite', 241.007) +('predbot', 'markov5', -21.298) +('predbot', 'halbot', -48.602) +('peterbot', 'switchalot', 123.726) +('halbot', 'randbot', -0.598) +('antirotnbot', 'mixed_strategy', -10.723) +('foxtrotbot', 'textbot', -0.452) +('zq_move', 'randbot', -0.415) +('markovbails', 'robertot', 4.955) +('halbot', 'halbot', 0.134) +('russrocker4', 'rockbot', 998.985) +('pibot', 'switchbot', -0.527) +('granite', 'iocainebot', -236.096) +('sunCrazybot', 'freqbot2', 138.625) +('foxtrotbot', 'mod1bot', 11.846) +('markov5', 'greenberg', 2.44) +('textbot', 'copybot', -74.0) +('pibot', 'russrocker4', 8.991) +('mod1bot', 'markov5', -6.214) +('mod1bot', 'antirotnbot', 54.426) +('markovbails', 'phasenbott', -17.601) +('predbot', 'zq_move', 166.454) +('robertot', 'sunCrazybot', 416.462) +('peterbot', 'halbot', -904.476) +('antiflatbot', 'russrocker4', -997.571) +('randbot', 'debruijn81', -0.431) +('copybot', 'peterbot', -975.999) +('predbot', 'switchalot', 210.212) +('switchalot', 'textbot', -0.781) +('addshiftbot3', 'russrocker4', -342.42) +('iocainebot', 'adddriftbot2', 140.111) +('sunCrazybot', 'predbot', -272.2) +('sweetrock', 'granite', -40.188) +('multibot', 'sunCrazybot', 37.543) +('pibot', 'robertot', 14.037) +('shofar', 'mod1bot', 3.379) +('pibot', 'sweetrock', 14.548) +('peterbot', 'shofar', -117.374) +('r226bot', 'boom', -380.336) +('freqbot2', 'granite', -592.622) +('driftbot', 'antirotnbot', -7.044) +('piedra', 'rotatebot', 992.211) +('driftbot', 'halbot', -66.485) +('addshiftbot3', 'flatbot3', -0.135) +('rockbot', 'rockbot', 0.0) +('shofar', 'robertot', 1.09) +('iocainebot', 'antiflatbot', 988.604) +('rotatebot', 'predbot', -988.283) +('biopic', 'r226bot', 384.777) +('boom', 'sunNervebot', -19.383) +('switchbot', 'iocainebot', -222.704) +('mixed_strategy', 'markovbails', -33.029) +('granite', 'antiflatbot', 995.772) +('mod1bot', 'copybot', 991.655) +('adddriftbot2', 'rotatebot', 0.155) +('mixed_strategy', 'mod1bot', -83.488) +('sunCrazybot', 'markov5', -216.733) +('zq_move', 'multibot', 229.723) +('sunCrazybot', 'randbot', -0.5) +('peterbot', 'markov5', -20.037) +('antiflatbot', 'predbot', -996.842) +('adddriftbot2', 'iocainebot', -141.412) +('marble', 'zq_move', 25.996) +('phasenbott', 'debruijn81', 40.069) +('sunCrazybot', 'foxtrotbot', 0.41) +('piedra', 'switchbot', 245.883) +('markov5', 'switchalot', 98.062) +('debruijn81', 'adddriftbot2', -0.783) +('antiflatbot', 'boom', -995.458) +('peterbot', 'pibot', -16.741) +('debruijn81', 'sweetrock', 25.47) +('peterbot', 'antirotnbot', -179.519) +('granite', 'rockbot', 999.001) +('mixed_strategy', 'flatbot3', 18.746) +('iocainebot', 'markov5', 14.304) +('flatbot3', 'driftbot', 0.19) +('mixed_strategy', 'randbot', 0.762) +('foxtrotbot', 'predbot', 17.611) +('freqbot2', 'r226bot', 399.151) +('peterbot', 'boom', -425.322) +('mod1bot', 'piedra', 40.576) +('markovbails', 'iocainebot', -15.638) +('driftbot', 'sunNervebot', -41.654) +('freqbot2', 'markov5', -454.507) +('mixed_strategy', 'inocencio', 115.576) +('freqbot2', 'antiflatbot', 997.667) +('debruijn81', 'freqbot2', 128.0) +('halbot', 'peterbot', 904.334) +('switchalot', 'rotatebot', 0.463) +('addshiftbot3', 'robertot', -77.571) +('peterbot', 'biopic', -791.486) +('markov5', 'mixed_strategy', 33.733) +('zq_move', 'iocainebot', -297.77) +('actr_lag2_decay', 'iocainebot', -62.18) +('markovbails', 'shofar', 2.846) +('piedra', 'driftbot', 24.307) +('greenberg', 'markov5', -2.09) +('antiflatbot', 'r226bot', -206.98) +('antiflatbot', 'iocainebot', -988.1) +('inocencio', 'granite', -579.868) +('freqbot2', 'sweetrock', -592.206) +('marble', 'russrocker4', -147.542) +('debruijn81', 'halbot', 66.319) +('marble', 'switchbot', 245.74) +('phasenbott', 'phasenbott', 0.891) +('markovbails', 'predbot', 20.482) +('adddriftbot2', 'marble', -26.437) +('boom', 'iocainebot', -19.119) +('robertot', 'rotatebot', 994.435) +('robertot', 'granite', 51.423) +('textbot', 'piedra', -168.529) +('shofar', 'rotatebot', 964.488) +('granite', 'randbot', 0.901) +('pibot', 'multibot', 20.0) +('biopic', 'freqbot2', 660.249) +('predbot', 'boom', 6.926) +('antiflatbot', 'markov5', -989.311) +('r226bot', 'mod1bot', -390.516) +('iocainebot', 'marble', 234.82) +('russrocker4', 'greenberg', -357.017) +('switchalot', 'sunNervebot', -106.722) +('zq_move', 'biopic', -98.353) +('boom', 'foxtrotbot', 0.464) +('robertot', 'inocencio', 31.23) +('boom', 'marble', 41.688) +('foxtrotbot', 'rotatebot', 0.435) +('boom', 'sunCrazybot', 441.276) +('pibot', 'piedra', 13.291) +('markovbails', 'foxtrotbot', 14.6) +('rotatebot', 'greenberg', -996.167) +('sweetrock', 'switchbot', 238.669) +('adddriftbot2', 'phasenbott', -114.798) +('r226bot', 'sunCrazybot', -47.474) +('halbot', 'markovbails', -13.02) +('randbot', 'antiflatbot', -0.144) +('r226bot', 'freqbot2', -399.221) +('addshiftbot3', 'randbot', 0.159) +('greenberg', 'adddriftbot2', 246.115) +('sunCrazybot', 'addshiftbot3', 37.249) +('textbot', 'greenberg', -122.006) +('pibot', 'greenberg', -7.932) +('antirotnbot', 'predbot', -48.806) +('marble', 'pibot', -18.304) +('antiflatbot', 'mod1bot', -995.538) +('rotatebot', 'copybot', -1.0) +('boom', 'mod1bot', -21.181) +('addshiftbot3', 'predbot', -122.852) +('peterbot', 'sunCrazybot', -93.843) +('piedra', 'r226bot', 391.29) +('sweetrock', 'randbot', -0.857) +('switchalot', 'boom', -159.019) +('halbot', 'shofar', -20.634) +('sunCrazybot', 'marble', -315.408) +('driftbot', 'actr_lag2_decay', -7.072) +('shofar', 'actr_lag2_decay', -4.119) +('shofar', 'sunCrazybot', 134.267) +('actr_lag2_decay', 'copybot', 369.692) +('peterbot', 'flatbot3', 175.307) +('peterbot', 'antiflatbot', 992.478) +('sweetrock', 'debruijn81', -25.386) +('zq_move', 'boom', -50.773) +('multibot', 'switchalot', 284.739) +('pibot', 'marble', 17.139) +('flatbot3', 'copybot', 208.248) +('foxtrotbot', 'switchalot', 0.08) +('foxtrotbot', 'adddriftbot2', -0.842) +('greenberg', 'flatbot3', 370.9) +('switchalot', 'antirotnbot', -315.612) +('peterbot', 'randbot', -0.475) +('flatbot3', 'antiflatbot', -416.524) +('rockbot', 'predbot', -994.659) +('robertot', 'boom', -0.931) +('pibot', 'mod1bot', 6.512) +('foxtrotbot', 'multibot', 4.867) +('sweetrock', 'predbot', -40.629) +('antirotnbot', 'zq_move', -57.543) +('addshiftbot3', 'foxtrotbot', 0.101) +('switchalot', 'addshiftbot3', -1.865) +('biopic', 'mixed_strategy', 45.303) +('actr_lag2_decay', 'inocencio', 281.581) +('russrocker4', 'piedra', 38.714) +('biopic', 'robertot', 23.594) +('sunNervebot', 'peterbot', 232.013) +('inocencio', 'r226bot', 383.072) +('markov5', 'driftbot', 0.753) +('sweetrock', 'mixed_strategy', -5.905) +('debruijn81', 'granite', 63.799) +('mod1bot', 'adddriftbot2', 243.255) +('russrocker4', 'marble', 148.478) +('markov5', 'flatbot3', 79.115) +('zq_move', 'flatbot3', 152.371) +('zq_move', 'freqbot2', 592.482) +('rockbot', 'sweetrock', -996.65) +('phasenbott', 'actr_lag2_decay', 60.069) +('greenberg', 'phasenbott', 50.157) +('r226bot', 'shofar', -352.879) +('russrocker4', 'textbot', 69.488) +('rockbot', 'foxtrotbot', 0.732) +('r226bot', 'randbot', 0.516) +('flatbot3', 'marble', -165.44) +('inocencio', 'marble', -556.419) +('sweetrock', 'halbot', -39.765) +('randbot', 'randbot', 0.327) +('granite', 'debruijn81', -63.727) +('flatbot3', 'piedra', -140.209) +('rotatebot', 'sunNervebot', -945.585) +('rotatebot', 'antirotnbot', -997.987) +('piedra', 'biopic', -40.085) +('iocainebot', 'markovbails', 16.554) +('phasenbott', 'pibot', -6.867) +('sunNervebot', 'robertot', 3.861) +('r226bot', 'foxtrotbot', -0.614) +('multibot', 'rockbot', 999.0) +('peterbot', 'piedra', -518.123) +('r226bot', 'copybot', -161.473) +('iocainebot', 'multibot', 269.589) +('markovbails', 'peterbot', 23.079) +('iocainebot', 'robertot', 28.435) +('copybot', 'granite', -992.79) +('greenberg', 'debruijn81', 301.541) +('switchbot', 'predbot', -403.224) +('sweetrock', 'mod1bot', -38.93) +('debruijn81', 'mixed_strategy', 56.495) +('actr_lag2_decay', 'multibot', 266.242) +('textbot', 'boom', -124.269) +('pibot', 'debruijn81', 1.0) +('textbot', 'markovbails', -29.711) +('randbot', 'flatbot3', -0.5) +('granite', 'r226bot', 398.196) +('switchbot', 'greenberg', -473.663) +('addshiftbot3', 'piedra', -248.758) +('boom', 'driftbot', 35.946) +('peterbot', 'phasenbott', -919.713) +('mod1bot', 'mod1bot', 0.486) +('multibot', 'inocencio', -105.604) +('copybot', 'predbot', -984.989) +('iocainebot', 'randbot', -0.159) +('mod1bot', 'inocencio', 445.944) +('switchbot', 'granite', -244.124) +('antirotnbot', 'rockbot', 998.0) +('adddriftbot2', 'shofar', -2.651) +('marble', 'adddriftbot2', 25.593) +('foxtrotbot', 'halbot', -69.724) +('phasenbott', 'iocainebot', -111.708) +('mixed_strategy', 'markov5', -34.049) +('copybot', 'halbot', -988.776) +('randbot', 'sweetrock', 0.726) +('robertot', 'switchbot', 464.094) +('shofar', 'russrocker4', -1.519) +('sweetrock', 'sunCrazybot', 186.463) +('mod1bot', 'sunNervebot', 45.357) +('halbot', 'rockbot', 998.987) +('mixed_strategy', 'switchalot', 73.109) +('markovbails', 'markovbails', 1.089) +('antirotnbot', 'actr_lag2_decay', -27.882) +('robertot', 'freqbot2', 845.404) +('pibot', 'halbot', 10.36) +('russrocker4', 'iocainebot', -520.167) +('driftbot', 'r226bot', 1.272) +('inocencio', 'antiflatbot', 978.902) +('mixed_strategy', 'r226bot', 374.67) +('marble', 'granite', -2.439) +('inocencio', 'multibot', 106.362) +('multibot', 'mixed_strategy', -33.33) +('flatbot3', 'antirotnbot', -203.857) +('biopic', 'switchalot', 150.412) +('rotatebot', 'iocainebot', -986.743) +('rotatebot', 'addshiftbot3', -4.027) +('sunNervebot', 'switchalot', 103.5) +('flatbot3', 'greenberg', -371.768) +('piedra', 'randbot', -0.609) +('addshiftbot3', 'markovbails', -4.421) +('sweetrock', 'rockbot', 996.719) +('robertot', 'greenberg', -29.167) +('rockbot', 'biopic', -997.542) +('switchbot', 'pibot', 0.346) +('randbot', 'sunNervebot', 0.496) +('russrocker4', 'mixed_strategy', 60.593) +('inocencio', 'foxtrotbot', 307.939) +('adddriftbot2', 'switchalot', 0.149) +('halbot', 'r226bot', 379.561) +('halbot', 'switchalot', 233.927) +('iocainebot', 'sunCrazybot', 567.452) +('markovbails', 'debruijn81', -10.732) +('piedra', 'addshiftbot3', 244.792) +('boom', 'flatbot3', 39.552) +('sunNervebot', 'multibot', 112.672) +('shofar', 'adddriftbot2', 3.713) +('marble', 'antirotnbot', 54.74) +('mod1bot', 'r226bot', 390.048) +('sunCrazybot', 'switchbot', 1.756) +('r226bot', 'sunNervebot', -182.596) +('iocainebot', 'freqbot2', 914.364) +('pibot', 'adddriftbot2', 1.551) +('antirotnbot', 'multibot', 237.58) +('russrocker4', 'actr_lag2_decay', 8.425) +('r226bot', 'debruijn81', 0.161) +('robertot', 'sunNervebot', -4.19) +('sunCrazybot', 'phasenbott', -394.011) +('rotatebot', 'adddriftbot2', -1.041) +('predbot', 'greenberg', -237.24) +('addshiftbot3', 'switchbot', 1.196) +('copybot', 'markov5', -0.585) +('sunCrazybot', 'greenberg', -578.089) +('multibot', 'flatbot3', 155.591) +('peterbot', 'freqbot2', 434.978) +('rockbot', 'iocainebot', -994.101) +('piedra', 'shofar', -39.43) +('rockbot', 'robertot', -997.085) +('russrocker4', 'r226bot', 309.386) +('peterbot', 'actr_lag2_decay', -231.251) +('adddriftbot2', 'freqbot2', -1.471) +('actr_lag2_decay', 'peterbot', 239.645) +('inocencio', 'sunNervebot', -37.322) +('marble', 'multibot', 284.711) +('switchbot', 'biopic', -323.253) +('actr_lag2_decay', 'textbot', 80.959) +('mod1bot', 'freqbot2', 592.285) +('markovbails', 'sweetrock', 36.682) +('sunNervebot', 'textbot', 44.383) +('markovbails', 'biopic', 6.599) +('addshiftbot3', 'inocencio', -76.046) +('pibot', 'copybot', 22.0) +('peterbot', 'addshiftbot3', 33.692) +('markov5', 'phasenbott', -18.72) +('sunCrazybot', 'mod1bot', -257.284) +('randbot', 'mod1bot', 0.594) +('rockbot', 'r226bot', 399.755) +('shofar', 'greenberg', 0.772) +('freqbot2', 'shofar', -571.894) +('rotatebot', 'piedra', -992.232) +('robertot', 'pibot', -14.277) +('boom', 'debruijn81', 1.059) +('sunNervebot', 'rotatebot', 947.317) +('peterbot', 'greenberg', -907.882) +('multibot', 'r226bot', 386.047) +('zq_move', 'marble', -26.978) +('adddriftbot2', 'flatbot3', 0.068) +('greenberg', 'switchalot', 278.519) +('inocencio', 'iocainebot', -221.056) +('driftbot', 'peterbot', -26.934) +('greenberg', 'piedra', 41.27) +('switchalot', 'robertot', -100.42) +('iocainebot', 'textbot', 107.826) +('randbot', 'marble', -0.749) +('driftbot', 'mod1bot', -241.447) +('pibot', 'freqbot2', 30.0) +('switchalot', 'flatbot3', -0.807) +('marble', 'sunCrazybot', 315.423) +('sunCrazybot', 'mixed_strategy', -169.232) +('peterbot', 'foxtrotbot', 26.407) +('addshiftbot3', 'textbot', -1.107) +('actr_lag2_decay', 'freqbot2', 575.176) +('addshiftbot3', 'copybot', 8.595) +('sunNervebot', 'foxtrotbot', -2.603) +('zq_move', 'granite', -26.075) +('greenberg', 'r226bot', 361.007) +('inocencio', 'mixed_strategy', -127.022) +('foxtrotbot', 'switchbot', -0.084) +('textbot', 'addshiftbot3', -0.159) +('biopic', 'switchbot', 323.677) +('greenberg', 'halbot', 157.735) +('randbot', 'markovbails', -0.563) +('mod1bot', 'rockbot', 997.891) +('sweetrock', 'freqbot2', 592.319) +('antiflatbot', 'driftbot', -21.852) +('flatbot3', 'debruijn81', -0.949) +('predbot', 'mixed_strategy', 55.107) +('granite', 'sweetrock', 39.446) +('sweetrock', 'boom', -37.256) +('biopic', 'predbot', 48.206) +('antiflatbot', 'biopic', -994.619) +('pibot', 'mixed_strategy', 31.576) +('rockbot', 'phasenbott', -994.735) +('shofar', 'markovbails', -1.95) +('adddriftbot2', 'zq_move', -56.744) +('markov5', 'rotatebot', 991.88) +('predbot', 'textbot', 156.412) +('robertot', 'mod1bot', -6.389) +('foxtrotbot', 'markov5', -13.905) +('mod1bot', 'zq_move', 292.915) +('greenberg', 'peterbot', 906.322) +('greenberg', 'biopic', 28.595) +('halbot', 'sunCrazybot', 373.953) +('textbot', 'sunNervebot', -42.026) +('peterbot', 'russrocker4', -927.986) +('zq_move', 'switchbot', 249.102) +('antirotnbot', 'iocainebot', -58.096) +('driftbot', 'sunCrazybot', -53.528) +('greenberg', 'russrocker4', 354.403) +('robertot', 'switchalot', 99.086) +('textbot', 'adddriftbot2', 0.238) +('robertot', 'zq_move', 87.823) +('biopic', 'markovbails', -6.513) +('copybot', 'adddriftbot2', 0.434) +('randbot', 'textbot', 0.688) +('debruijn81', 'actr_lag2_decay', 69.303) +('addshiftbot3', 'sunCrazybot', -38.413) +('shofar', 'debruijn81', -16.865) +('biopic', 'actr_lag2_decay', 30.698) +('peterbot', 'predbot', -564.03) +('adddriftbot2', 'rockbot', -0.897) +('marble', 'r226bot', 397.249) +('markov5', 'debruijn81', -10.743) +('r226bot', 'iocainebot', -377.54) +('multibot', 'debruijn81', -50.0) +('shofar', 'foxtrotbot', -0.588) +('peterbot', 'switchbot', 247.467) +('biopic', 'russrocker4', -9.34) +('zq_move', 'actr_lag2_decay', -94.484) +('inocencio', 'randbot', 0.226) +('actr_lag2_decay', 'sunNervebot', 8.587) +('markov5', 'rockbot', 993.929) +('phasenbott', 'sunCrazybot', 395.601) +('phasenbott', 'markov5', 17.96) +('sunNervebot', 'freqbot2', 391.161) +('rockbot', 'mixed_strategy', -991.602) +('zq_move', 'driftbot', 42.012) +('mod1bot', 'sunCrazybot', 257.664) +('multibot', 'pibot', -20.0) +('sunCrazybot', 'antiflatbot', 980.429) +('shofar', 'zq_move', 60.217) +('copybot', 'r226bot', 159.326) +('predbot', 'antiflatbot', 996.921) +('greenberg', 'shofar', -3.648) +('adddriftbot2', 'mod1bot', -245.661) +('markovbails', 'rockbot', 993.946) +('antiflatbot', 'switchalot', 15.172) +('markovbails', 'rotatebot', 991.839) +('phasenbott', 'sunNervebot', 30.462) +('switchbot', 'multibot', -478.832) +('rockbot', 'multibot', -999.0) +('granite', 'markov5', -31.529) +('sweetrock', 'zq_move', -39.792) +('granite', 'freqbot2', 592.682) +('biopic', 'iocainebot', -36.665) +('iocainebot', 'copybot', 988.452) +('antiflatbot', 'piedra', -995.027) +('mod1bot', 'textbot', 134.658) +('debruijn81', 'russrocker4', 31.917) +('sunCrazybot', 'sunNervebot', -135.117) +('flatbot3', 'mod1bot', -91.054) +('boom', 'zq_move', 50.475) +('mod1bot', 'addshiftbot3', 106.948) +('sunNervebot', 'predbot', -25.924) +('russrocker4', 'sunCrazybot', 480.786) +('r226bot', 'rockbot', -399.845) +('flatbot3', 'randbot', 0.267) +('adddriftbot2', 'addshiftbot3', -28.776) +('antiflatbot', 'adddriftbot2', 1.486) +('switchbot', 'markov5', -245.83) +('mixed_strategy', 'boom', -27.859) +('randbot', 'rockbot', -1.107) +('r226bot', 'zq_move', -387.701) +('multibot', 'markovbails', -170.125) +('halbot', 'antirotnbot', 57.923) +('mod1bot', 'biopic', 9.829) +('mixed_strategy', 'sunNervebot', -49.396) +('robertot', 'russrocker4', -31.057) +('piedra', 'sweetrock', -2.248) +('driftbot', 'addshiftbot3', 0.805) +('rockbot', 'antiflatbot', 999.002) +('adddriftbot2', 'biopic', -7.041) +('copybot', 'sunNervebot', -945.09) +('copybot', 'driftbot', -1.702) +('zq_move', 'sunNervebot', -169.577) +('russrocker4', 'sunNervebot', 11.901) +('adddriftbot2', 'switchbot', 1.275) +('shofar', 'halbot', 21.914) +('r226bot', 'granite', -397.201) +('debruijn81', 'driftbot', -8.312) +('iocainebot', 'granite', 235.398) +('freqbot2', 'boom', -753.0) +('switchbot', 'mod1bot', -445.086) +('mixed_strategy', 'multibot', 34.363) +('copybot', 'marble', -992.77) +('antiflatbot', 'antirotnbot', -994.158) +('freqbot2', 'russrocker4', -612.097) +('inocencio', 'switchalot', 93.396) +('marble', 'mod1bot', -85.186) +('flatbot3', 'r226bot', -0.353) +('antiflatbot', 'antiflatbot', 0.014) +('copybot', 'actr_lag2_decay', -391.481) +('iocainebot', 'addshiftbot3', 304.531) +('r226bot', 'phasenbott', -144.451) +('rotatebot', 'granite', -994.393) +('inocencio', 'copybot', 802.373) +('copybot', 'mod1bot', -991.549) +('adddriftbot2', 'r226bot', 0.067) +('addshiftbot3', 'actr_lag2_decay', -47.234) +('inocencio', 'biopic', -132.373) +('mod1bot', 'peterbot', 569.936) +('boom', 'randbot', 0.586) +('marble', 'sweetrock', 39.356) +('inocencio', 'predbot', -470.339) +('sweetrock', 'markov5', -36.37) +('multibot', 'mod1bot', -191.371) +('driftbot', 'mixed_strategy', -36.679) +('biopic', 'greenberg', -28.859) +('freqbot2', 'rockbot', 999.0) +('driftbot', 'piedra', -23.973) +('halbot', 'robertot', 34.186) +('switchalot', 'biopic', -149.888) +('sunCrazybot', 'biopic', -506.738) +('adddriftbot2', 'randbot', 0.323) +('copybot', 'sunCrazybot', -832.605) +('iocainebot', 'flatbot3', 193.923) +('pibot', 'iocainebot', -1.753) +('markov5', 'sweetrock', 36.489) +('russrocker4', 'adddriftbot2', 131.565) +('shofar', 'antirotnbot', 45.246) +('inocencio', 'markovbails', -51.083) +('r226bot', 'peterbot', -392.899) +('mod1bot', 'multibot', 191.227) +('freqbot2', 'mixed_strategy', -493.132) +('sweetrock', 'addshiftbot3', 242.166) +('actr_lag2_decay', 'addshiftbot3', 45.205) +('markov5', 'marble', 30.625) +('antirotnbot', 'randbot', 0.547) +('rockbot', 'shofar', -979.999) +('granite', 'piedra', 39.238) +('antirotnbot', 'antirotnbot', -0.128) +('flatbot3', 'addshiftbot3', 1.692) +('markovbails', 'zq_move', 43.772) +('driftbot', 'pibot', 6.02) +('sweetrock', 'marble', -39.111) +('inocencio', 'freqbot2', 361.377) +('freqbot2', 'inocencio', -360.385) +('r226bot', 'greenberg', -361.747) +('addshiftbot3', 'sweetrock', -241.28) +('addshiftbot3', 'mod1bot', -107.141) +('addshiftbot3', 'zq_move', -310.393) +('foxtrotbot', 'antiflatbot', 0.253) +('foxtrotbot', 'freqbot2', 0.347) +('rockbot', 'sunCrazybot', -965.832) +('markov5', 'foxtrotbot', 14.824) +('markov5', 'adddriftbot2', 0.194) +('greenberg', 'sunCrazybot', 577.629) +('randbot', 'pibot', -0.272) +('pibot', 'foxtrotbot', -0.647) +('halbot', 'biopic', 13.966) +('peterbot', 'multibot', -349.001) +('antirotnbot', 'peterbot', 197.015) +('multibot', 'sunNervebot', -111.714) +('inocencio', 'peterbot', 125.941) +('addshiftbot3', 'addshiftbot3', -1.728) +('multibot', 'antirotnbot', -240.235) +('zq_move', 'markovbails', -42.287) +('addshiftbot3', 'adddriftbot2', 30.299) +('copybot', 'copybot', 0.0) +('biopic', 'mod1bot', -8.359) +('sunNervebot', 'r226bot', 181.529) +('biopic', 'marble', 126.302) +('inocencio', 'russrocker4', -199.3) +('rotatebot', 'freqbot2', 0.0) +('iocainebot', 'predbot', 179.022) +('sunCrazybot', 'copybot', 839.416) +('robertot', 'predbot', 28.966) +('driftbot', 'markov5', -0.782) +('predbot', 'robertot', -29.804) +('iocainebot', 'switchalot', 46.504) +('sunCrazybot', 'flatbot3', -10.891) +('mixed_strategy', 'actr_lag2_decay', -53.965) +('markovbails', 'copybot', 4.185) +('rockbot', 'freqbot2', -999.0) +('robertot', 'phasenbott', -50.154) +('antiflatbot', 'sunCrazybot', -978.76) +('mod1bot', 'russrocker4', -7.797) +('sunNervebot', 'granite', 82.509) +('markov5', 'sunCrazybot', 216.736) +('phasenbott', 'r226bot', 144.448) +('halbot', 'pibot', -8.168) +('adddriftbot2', 'markov5', 0.624) +('halbot', 'adddriftbot2', 189.02) +('foxtrotbot', 'markovbails', -14.231) +('rockbot', 'granite', -998.981) +('shofar', 'freqbot2', 572.472) +('freqbot2', 'randbot', -0.497) +('sunNervebot', 'mixed_strategy', 49.687) +('piedra', 'rockbot', 996.562) +('foxtrotbot', 'r226bot', 0.193) +('piedra', 'adddriftbot2', 40.197) +('switchbot', 'markovbails', -246.94) +('marble', 'mixed_strategy', 15.631) +('inocencio', 'rockbot', 979.704) +('greenberg', 'mod1bot', 88.647) +('piedra', 'copybot', 992.449) +('sweetrock', 'driftbot', 25.199) +('mod1bot', 'mixed_strategy', 82.028) +('biopic', 'textbot', 134.496) +('phasenbott', 'adddriftbot2', 113.8) +('actr_lag2_decay', 'driftbot', 8.395) +('granite', 'granite', -0.194) +('antirotnbot', 'addshiftbot3', 11.065) +('russrocker4', 'zq_move', 165.291) +('flatbot3', 'pibot', 0.65) +('sunNervebot', 'markovbails', -3.36) +('markov5', 'zq_move', 43.641) +('antiflatbot', 'robertot', -995.263) +('actr_lag2_decay', 'debruijn81', -70.409) +('switchalot', 'zq_move', -156.743) +('markovbails', 'switchbot', 246.483) +('markov5', 'copybot', 5.299) +('zq_move', 'markov5', -44.355) +('rotatebot', 'switchbot', 1.608) +('predbot', 'shofar', -14.737) +('debruijn81', 'predbot', 71.53) +('textbot', 'actr_lag2_decay', -81.785) +('adddriftbot2', 'markovbails', 0.45) +('driftbot', 'marble', -46.551) +('pibot', 'rockbot', -11.0) +('marble', 'rotatebot', 994.354) +('foxtrotbot', 'russrocker4', -174.717) +('biopic', 'flatbot3', 145.909) +('freqbot2', 'predbot', -588.971) +('granite', 'rotatebot', 994.363) +('boom', 'predbot', -7.285) +('granite', 'predbot', -20.3) +('mod1bot', 'granite', 85.22) +('actr_lag2_decay', 'greenberg', -236.865) +('piedra', 'robertot', -39.751) +('peterbot', 'peterbot', 0.233) +('actr_lag2_decay', 'phasenbott', -58.938) +('phasenbott', 'multibot', 220.024) +('inocencio', 'rotatebot', 980.099) +('shofar', 'multibot', 118.43) +('markovbails', 'russrocker4', -0.335) +('antiflatbot', 'rockbot', -999.002) +('switchbot', 'sunNervebot', -238.44) +('marble', 'actr_lag2_decay', -123.119) +('rotatebot', 'switchalot', -0.886) +('sunCrazybot', 'piedra', -175.443) +('granite', 'textbot', 178.258) +('adddriftbot2', 'textbot', 0.405) +('copybot', 'iocainebot', -988.421) +('pibot', 'antirotnbot', 44.82) +('greenberg', 'foxtrotbot', 408.416) +('actr_lag2_decay', 'sunCrazybot', 511.358) +('multibot', 'copybot', 997.0) +('inocencio', 'markov5', -51.674) +('copybot', 'flatbot3', -208.369) +('copybot', 'foxtrotbot', -0.92) +('shofar', 'phasenbott', -3.292) +('piedra', 'phasenbott', -39.546) +('mixed_strategy', 'pibot', -29.639) +('actr_lag2_decay', 'rotatebot', 994.276) +('phasenbott', 'addshiftbot3', 323.705) +('switchalot', 'debruijn81', 1.728) +('greenberg', 'freqbot2', 997.188) +('robertot', 'actr_lag2_decay', -12.316) +('granite', 'mixed_strategy', 17.569) +('r226bot', 'inocencio', -383.041) +('robertot', 'peterbot', 684.491) +('foxtrotbot', 'greenberg', -407.418) +('rotatebot', 'markovbails', -991.913) +('adddriftbot2', 'inocencio', -10.419) +('copybot', 'debruijn81', 1.0) +('markov5', 'predbot', 19.921) +('peterbot', 'debruijn81', 9.634) +('markovbails', 'actr_lag2_decay', -1.043) +('piedra', 'debruijn81', -25.419) +('multibot', 'boom', -232.449) +('boom', 'shofar', -21.047) +('granite', 'boom', -39.466) +('switchalot', 'phasenbott', -82.68) +('foxtrotbot', 'copybot', 0.557) +('copybot', 'russrocker4', -992.285) +('markovbails', 'piedra', 37.263) +('shofar', 'switchalot', 171.219) +('addshiftbot3', 'granite', -93.53) +('shofar', 'textbot', 110.921) +('phasenbott', 'antirotnbot', 57.795) +('textbot', 'textbot', 0.0) +('predbot', 'piedra', 40.546) +('zq_move', 'phasenbott', -265.865) +('rockbot', 'pibot', 11.0) +('phasenbott', 'piedra', 41.249) +('textbot', 'randbot', 0.375) +('zq_move', 'mod1bot', -293.278) +('halbot', 'sunNervebot', 6.879) +('predbot', 'switchbot', 405.305) +('marble', 'markov5', -31.878) +('marble', 'iocainebot', -233.948) +('freqbot2', 'halbot', -948.402) +('halbot', 'boom', 24.315) +('sunCrazybot', 'granite', -315.229) +('pibot', 'rotatebot', -11.0) +('switchalot', 'markov5', -98.427) +('flatbot3', 'mixed_strategy', -16.113) +('freqbot2', 'piedra', -592.133) +('robertot', 'iocainebot', -28.816) +('halbot', 'russrocker4', 99.25) +('r226bot', 'halbot', -379.776) +('driftbot', 'copybot', 0.293) +('antirotnbot', 'granite', -55.269) +('switchbot', 'r226bot', -0.282) +('markov5', 'robertot', 5.339) +('zq_move', 'r226bot', 387.995) +('inocencio', 'debruijn81', -37.287) +('phasenbott', 'driftbot', 86.968) +('randbot', 'biopic', 0.378) +('addshiftbot3', 'r226bot', 1.772) +('granite', 'sunNervebot', -82.736) +('marble', 'flatbot3', 167.169) +('iocainebot', 'sweetrock', 40.561) +('flatbot3', 'zq_move', -150.93) +('sunCrazybot', 'iocainebot', -568.022) +('actr_lag2_decay', 'robertot', 11.052) +('multibot', 'switchbot', 479.189) +('boom', 'greenberg', -21.004) +('markovbails', 'randbot', -0.078) +('peterbot', 'r226bot', 394.15) +('switchalot', 'granite', -149.762) +('zq_move', 'textbot', 157.888) +('halbot', 'sweetrock', 40.15) +('r226bot', 'driftbot', 0.961) +('biopic', 'adddriftbot2', 5.671) +('textbot', 'rockbot', -185.0) +('switchbot', 'driftbot', -0.458) +('debruijn81', 'sunCrazybot', 4.041) +('adddriftbot2', 'russrocker4', -131.849) +('textbot', 'predbot', -156.443) +('adddriftbot2', 'multibot', -7.795) +('pibot', 'biopic', 0.784) +('switchalot', 'antiflatbot', -14.871) +('sunCrazybot', 'boom', -444.062) +('freqbot2', 'foxtrotbot', -0.817) +('flatbot3', 'russrocker4', -106.062) +('r226bot', 'antirotnbot', -152.954) +('marble', 'switchalot', 148.029) +('sweetrock', 'shofar', -39.646) +('boom', 'actr_lag2_decay', -26.029) +('piedra', 'iocainebot', -40.133) +('sunNervebot', 'sunNervebot', -0.345) +('foxtrotbot', 'peterbot', -26.886) +('boom', 'granite', 41.356) +('flatbot3', 'markovbails', -77.981) +('copybot', 'textbot', 74.0) +('inocencio', 'boom', -140.531) +('antiflatbot', 'freqbot2', -997.707) +('switchalot', 'adddriftbot2', 1.177) +('flatbot3', 'markov5', -78.113) +('antirotnbot', 'inocencio', 393.812) +('debruijn81', 'phasenbott', -39.554) +('r226bot', 'robertot', -392.766) +('shofar', 'sweetrock', 38.097) +('granite', 'sunCrazybot', 312.701) +('flatbot3', 'sunCrazybot', 10.019) +('copybot', 'shofar', -963.485) +('r226bot', 'switchbot', 0.141) +('randbot', 'russrocker4', 0.518) +('biopic', 'sweetrock', 41.193) +('greenberg', 'sweetrock', 42.033) +('randbot', 'iocainebot', 1.065) +('antirotnbot', 'sunCrazybot', 67.989) +('switchbot', 'robertot', -464.63) +('sweetrock', 'inocencio', 239.356) +('halbot', 'markov5', -12.162) +('sunCrazybot', 'russrocker4', -480.951) +('halbot', 'freqbot2', 947.932) +('biopic', 'granite', 125.431) +('driftbot', 'zq_move', -41.017) +('actr_lag2_decay', 'mod1bot', -1.928) +('pibot', 'sunCrazybot', 1.074) +('shofar', 'r226bot', 351.728) +('foxtrotbot', 'mixed_strategy', 3.541) +('addshiftbot3', 'iocainebot', -305.526) +('rockbot', 'textbot', 185.0) +('actr_lag2_decay', 'antirotnbot', 27.52) +('predbot', 'adddriftbot2', 284.617) +('actr_lag2_decay', 'r226bot', 309.804) +('driftbot', 'driftbot', -0.571) +('iocainebot', 'piedra', 40.344) +('switchbot', 'zq_move', -247.456) +('copybot', 'greenberg', -992.835) +('rotatebot', 'mixed_strategy', -957.369) diff --git a/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/response_graph_ucb/soccer.txt b/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/response_graph_ucb/soccer.txt new file mode 100644 index 0000000..aff2426 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/response_graph_ucb/soccer.txt @@ -0,0 +1,10 @@ +5.000000000000000000e-01 4.367475699999999739e-01 6.905596499999999693e-01 4.947496200000000011e-01 3.813060999999999812e-01 5.866810850000000466e-01 5.643643200000000304e-01 4.877253400000000072e-01 2.544107650000000387e-01 4.266350099999999812e-01 +5.632524300000000261e-01 5.000000000000000000e-01 6.165377300000000060e-01 5.190367849999999450e-01 5.132084150000000289e-01 5.294269299999999623e-01 6.031941549999999541e-01 4.251502099999999729e-01 5.585935800000000340e-01 3.684599600000000308e-01 +3.094403500000000307e-01 3.834622699999999940e-01 5.000000000000000000e-01 3.762548900000000085e-01 3.782787850000000063e-01 3.144149499999999708e-01 3.940367049999999871e-01 2.261176149999999940e-01 4.171005400000000196e-01 1.819202500000000056e-01 +5.052503799999999989e-01 4.809632149999999995e-01 6.237451099999999915e-01 5.000000000000000000e-01 4.960451399999999955e-01 5.131155450000000062e-01 5.810390599999999406e-01 3.882810250000000019e-01 5.516879150000000287e-01 3.419032850000000012e-01 +6.186939000000000188e-01 4.867915850000000266e-01 6.217212149999999937e-01 5.039548600000000045e-01 5.000000000000000000e-01 6.013259499999999980e-01 6.620943150000000443e-01 5.096754449999999492e-01 5.412645149999999461e-01 4.434359999999999968e-01 +4.133189150000000089e-01 4.705730699999999822e-01 6.855850500000000292e-01 4.868844549999999938e-01 3.986740500000000020e-01 5.000000000000000000e-01 5.282975949999999532e-01 3.148352050000000069e-01 2.363577649999999974e-01 3.951892550000000170e-01 +4.356356799999999696e-01 3.968058449999999904e-01 6.059632950000000129e-01 4.189609400000000039e-01 3.379056850000000112e-01 4.717024049999999913e-01 5.000000000000000000e-01 3.236530899999999766e-01 1.910987299999999944e-01 3.638533499999999643e-01 +5.122746599999999928e-01 5.748497900000000271e-01 7.738823850000000615e-01 6.117189750000000537e-01 4.903245549999999953e-01 6.851647949999999376e-01 6.763469100000000234e-01 5.000000000000000000e-01 3.202538999999999803e-01 4.136784749999999899e-01 +7.455892349999999613e-01 4.414064200000000215e-01 5.828994600000000359e-01 4.483120849999999713e-01 4.587354849999999984e-01 7.636422350000000581e-01 8.089012700000000056e-01 6.797461000000000197e-01 5.000000000000000000e-01 7.074556100000000125e-01 +5.733649899999999633e-01 6.315400399999999692e-01 8.180797499999999944e-01 6.580967149999999988e-01 5.565640000000000587e-01 6.048107449999999830e-01 6.361466500000000357e-01 5.863215250000000101e-01 2.925443899999999875e-01 5.000000000000000000e-01 diff --git a/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/routing_game_experiments/Experiments.ipynb b/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/routing_game_experiments/Experiments.ipynb new file mode 100644 index 0000000..4406e25 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/routing_game_experiments/Experiments.ipynb @@ -0,0 +1,792 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "VK1t9uV4CvWM" + }, + "source": [ + "# Experiment mean field routing game\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PcTLgnTsCvWP" + }, + "source": [ + "This notebook is the notebook used to produce the figures in the article [*Solving N-player dynamic routing games with congestion: a mean field approach, Cabannes et. al.*](https://arxiv.org/pdf/2110.11943.pdf).\n", + "\n", + "### Outline of the notebook:\n", + "1. [Reproducing the Braess paradox](#braess_paradox)\n", + "2. [Computation time of algorithms to compute Nash equibrium in N-player and mean field games as a function of the number of players](#efficiency)\n", + "3. [Sioux Falls, 14,000 vehicles with MFG](#sioux_falls)\n", + "4. [Augmented Braess network with multiple origin destinations](#multiple_destinations)\n", + "5. [Average deviation of the mean field equilibrium policy in the N-player Pigou network game as a function of N](#pigou_deviation)\n", + "6. [Average deviation of the mean field equilibrium policy in the N-player Braess network game as a function of N](#braess_deviation)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yIVbyt9iCvWQ" + }, + "source": [ + "## 0. Importing libraries\n", + "If the import does not work please download and compile open spiel from source and check if you have all the required libraries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pC1BCSRvCvWR" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zZDg_IQYCvWS" + }, + "outputs": [], + "source": [ + "from open_spiel.python import policy as policy_module\n", + "from open_spiel.python.algorithms import best_response as best_response_module\n", + "from open_spiel.python.algorithms import expected_game_score\n", + "from open_spiel.python.games import dynamic_routing_to_mean_field_game\n", + "from open_spiel.python.games import dynamic_routing_data\n", + "from open_spiel.python.mfg.algorithms import distribution as distribution_module\n", + "from open_spiel.python.mfg.algorithms import nash_conv as nash_conv_module\n", + "from open_spiel.python.mfg.algorithms import policy_value\n", + "from open_spiel.python.mfg.games import dynamic_routing as mean_field_routing_game\n", + "\n", + "from open_spiel.data.paper_data.routing_game_experiments.utils import *" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NT_nE0gNCvWT" + }, + "source": [ + "\u003ca name='braess_paradox'\u003e\u003c/a\u003e\n", + "\n", + "## 1. Reproducing the Braess paradox with the mean field routing game\n", + "\n", + "This is used to produce figure 1 of the article." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vtMBGtGUCvWT" + }, + "outputs": [], + "source": [ + "BRAESS_NUM_VEHICLES = 4\n", + "BRAESS_ORIGIN = 'A-\u003eB'\n", + "BRAESS_DESTINATION = 'E-\u003eF'\n", + "BRAESS_TIME_STEP_LENGTH = 0.25\n", + "BRAESS_MAX_TIME_STEP = int(4.0/BRAESS_TIME_STEP_LENGTH) + 1\n", + "\n", + "BRAESS_GRAPH = create_braess_network(BRAESS_NUM_VEHICLES)\n", + "plot_network_n_player_game(BRAESS_GRAPH)\n", + "\n", + "BRAESS_GAME, BRAESS_SEQ_GAME, BRAESS_MFG_GAME = create_games(\n", + " BRAESS_ORIGIN, BRAESS_DESTINATION, BRAESS_NUM_VEHICLES, BRAESS_GRAPH, BRAESS_MAX_TIME_STEP,\n", + " BRAESS_TIME_STEP_LENGTH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tyEu3c8TCvWU" + }, + "outputs": [], + "source": [ + "# Online Mirror Descent\n", + "\n", + "md_p_init = mirror_descent.MirrorDescent(BRAESS_MFG_GAME, lr=1)\n", + "mfmd_timing, mfmd_policy, mfmd_nash_conv, mfmd_policy_value, md_p = online_mirror_descent(\n", + " BRAESS_MFG_GAME, 10, compute_metrics=True, return_policy=True, md_p=md_p_init)\n", + "evolve_mean_field_game(BRAESS_MFG_GAME, mfmd_policy, BRAESS_GRAPH)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uQBmhHRJCvWV" + }, + "source": [ + "\u003ca name='efficiency'\u003e\u003c/a\u003e\n", + "## 2. Computation time of algorithms to compute Nash equibrium in N-player and mean field games as a function of the number of players.\n", + "\n", + "This is used to produce figure 2 of the article.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9X2sHRxcCvWV" + }, + "outputs": [], + "source": [ + "timing_n_player_results = {}\n", + "timing_mean_field_results = {}\n", + "NUM_ALGO_ITERATIONS = 10\n", + "\n", + "for num_vehicles in range(5, 45, 5):\n", + " braess_game, braess_seq_game, braess_mfg_game = create_games(\n", + " BRAESS_ORIGIN, BRAESS_DESTINATION, num_vehicles, BRAESS_GRAPH, BRAESS_MAX_TIME_STEP,\n", + " BRAESS_TIME_STEP_LENGTH)\n", + " ext_cfr_timing, ext_cfr_policy = external_sampling_monte_carlo_counterfactual_regret_minimization(braess_seq_game, NUM_ALGO_ITERATIONS)\n", + " mfmd_timing, mfmd_policy = online_mirror_descent(braess_mfg_game, NUM_ALGO_ITERATIONS, compute_metrics=False)\n", + " timing_n_player_results[num_vehicles] = ext_cfr_timing\n", + " timing_mean_field_results[num_vehicles] = mfmd_timing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_wQc6WejCvWW" + }, + "outputs": [], + "source": [ + "plt.plot(list(timing_mean_field_results), list(timing_mean_field_results.values()), '-o', label=f'{NUM_ALGO_ITERATIONS} iterations of MFG OMD')\n", + "plt.plot(list(timing_n_player_results), list(timing_n_player_results.values()), '--xr', label=f'{NUM_ALGO_ITERATIONS} iterations of N-player CFR')\n", + "plt.legend()\n", + "plt.yscale('log')\n", + "plt.xlabel('Number of players')\n", + "plt.ylabel('Computation time')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dl-LhvaOCvWW" + }, + "source": [ + "\u003ca name='sioux_falls'\u003e\u003c/a\u003e\n", + "## 3. Solving large games with mean field online mirror descent algorithm: 14,000 vehicles in the Sioux Falls network\n", + "\n", + "This is used to produce figure 4 and 5 of the article.\n", + "Depending on the computer used, the computation can take a long time. On the MacBook Pro 2019 with macOS Big Sur 11.6 it tooks around 10 hours.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EBk50rMbCvWW" + }, + "outputs": [], + "source": [ + "plot_network_n_player_game(dynamic_routing_data.SIOUX_FALLS_NETWORK)\n", + "\n", + "SIOUX_FALLS_TIME_STEP_LENGTH = 0.5 # 0.2\n", + "SIOUX_FALLS_MAX_TIME_STEP = int(40.0/SIOUX_FALLS_TIME_STEP_LENGTH) + 1 # 0.25\n", + "\n", + "SIOUX_MFG_GAME = mean_field_routing_game.MeanFieldRoutingGame(\n", + " {\"max_num_time_step\": SIOUX_FALLS_MAX_TIME_STEP, \"time_step_length\": SIOUX_FALLS_TIME_STEP_LENGTH},\n", + " network=dynamic_routing_data.SIOUX_FALLS_NETWORK,\n", + " od_demand=dynamic_routing_data.SIOUX_FALLS_DUMMY_OD_DEMAND)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vu74Z59BCvWX" + }, + "outputs": [], + "source": [ + "def online_mirror_descent_sioux_falls(mfg_game,\n", + " number_of_iterations,\n", + " md_p=None):\n", + " nash_conv_dict = {}\n", + " md = md_p if md_p else mirror_descent.MirrorDescent(mfg_game)\n", + " tick_time = time.time()\n", + " for i in range(number_of_iterations):\n", + " if i \u003c 32:\n", + " md.iteration(learning_rate=1)\n", + " elif i \u003c 64:\n", + " md.iteration(learning_rate=0.1)\n", + " else:\n", + " md.iteration(learning_rate=0.01)\n", + " md_policy = md.get_policy()\n", + " nash_conv_md = nash_conv_module.NashConv(mfg_game, md_policy)\n", + " nash_conv_dict[i] = nash_conv_md.nash_conv()\n", + " print((f\"Iteration {i}, Nash conv: {nash_conv_md.nash_conv()}, \"\n", + " f\"time: {time.time() - tick_time}\"))\n", + " timing = time.time() - tick_time\n", + " md_policy = md.get_policy()\n", + " distribution_mfg = distribution_module.DistributionPolicy(mfg_game, md_policy)\n", + " policy_value_ = policy_value.PolicyValue(\n", + " mfg_game, distribution_mfg, md_policy).value(mfg_game.new_initial_state())\n", + " nash_conv_md = nash_conv_module.NashConv(mfg_game, md_policy)\n", + " return timing, md_policy, nash_conv_md, policy_value_, md, nash_conv_dict\n", + "\n", + "md_p_init = mirror_descent.MirrorDescent(SIOUX_MFG_GAME, lr=1)\n", + "mfmd_timing, mfmd_policy, mfmd_nash_conv, mfmd_policy_value, md_p, nash_conv_dict = online_mirror_descent_sioux_falls(\n", + " SIOUX_MFG_GAME, 100, md_p=md_p_init)\n", + "\n", + "print(f\"Online mirror descent nash conv: {mfmd_nash_conv.nash_conv()}\")\n", + "print(f\"Online mirror descent timing: {mfmd_timing}\")\n", + "\n", + "tick_time = time.time()\n", + "evolve_mean_field_game(SIOUX_MFG_GAME, mfmd_policy, dynamic_routing_data.SIOUX_FALLS_NETWORK)\n", + "print(time.time() - tick_time)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nHRzB7GwCvWX" + }, + "outputs": [], + "source": [ + "plt.plot(list(nash_conv_dict), list(nash_conv_dict.values()), 'x') #, label='Online mirror descent')\n", + "plt.legend()\n", + "plt.xlabel('Number of iterations')\n", + "plt.ylabel('Average deviation incentive')\n", + "plt.show()\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YEJibGoUCvWX" + }, + "source": [ + "\u003ca name='multiple_destinations'\u003e\u003c/a\u003e\n", + "## 4. Augmented Braess network with multiple origin destinations.\n", + "\n", + "This is used to produce figure 7 of the article." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gqdZ0556CvWY" + }, + "outputs": [], + "source": [ + "AUG_BRAESS_GRAPH = create_augmented_braess_network(150)\n", + "plot_network_n_player_game(AUG_BRAESS_GRAPH)\n", + "\n", + "AUG_BRAESS_OD_DEMAND = [\n", + " dynamic_routing_utils.OriginDestinationDemand('A-\u003eB', 'E-\u003eF', 0, 50),\n", + " dynamic_routing_utils.OriginDestinationDemand('A-\u003eB', 'E-\u003eF', 0.5, 50),\n", + " dynamic_routing_utils.OriginDestinationDemand('A-\u003eB', 'E-\u003eF', 1, 50),\n", + " dynamic_routing_utils.OriginDestinationDemand('A-\u003eB', 'D-\u003eG', 0, 50),\n", + " dynamic_routing_utils.OriginDestinationDemand('A-\u003eB', 'D-\u003eG', 1, 50)]\n", + "\n", + "AUG_BRAESS_TIME_STEP_LENGTH = 0.05\n", + "AUG_BRAESS_MAX_TIME_STEP = int(8.0/AUG_BRAESS_TIME_STEP_LENGTH) + 1\n", + "\n", + "AUG_BRAESS_MFG_GAME = mean_field_routing_game.MeanFieldRoutingGame(\n", + " {\"max_num_time_step\": AUG_BRAESS_MAX_TIME_STEP, \"time_step_length\": AUG_BRAESS_TIME_STEP_LENGTH},\n", + " network=AUG_BRAESS_GRAPH, od_demand=AUG_BRAESS_OD_DEMAND)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nyQMVKrgCvWY" + }, + "outputs": [], + "source": [ + "# Online Mirror Descent\n", + "\n", + "md_p_init = mirror_descent.MirrorDescent(AUG_BRAESS_MFG_GAME, lr=1)\n", + "mfmd_timing, mfmd_policy, mfmd_nash_conv, mfmd_policy_value, md_p = online_mirror_descent(\n", + " AUG_BRAESS_MFG_GAME, 20, compute_metrics=True, return_policy=True, md_p=md_p_init)\n", + "evolve_mean_field_game(AUG_BRAESS_MFG_GAME, mfmd_policy, AUG_BRAESS_GRAPH)\n", + "\n", + "print(f\"Online mirror descent nash conv: {mfmd_nash_conv.nash_conv()}\")\n", + "print(f\"Online mirror descent timing: {mfmd_timing}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2Qiv3_1DCvWY" + }, + "source": [ + "\u003ca name='pigou_deviation'\u003e\u003c/a\u003e\n", + "## 5. Average deviation of the mean field equilibrium policy in the N-player Pigou network game as a function of N.\n", + "\n", + "This is used to produce figure 3 of the article." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-rwrioOmCvWY" + }, + "outputs": [], + "source": [ + "def create_series_parallel_network(num_network_in_series,\n", + " time_step_length=1,\n", + " capacity=1):\n", + " i = 0\n", + " origin = \"A_0-\u003eB_0\"\n", + " graph_dict = {}\n", + " while i \u003c num_network_in_series:\n", + " graph_dict.update({\n", + " f\"A_{i}\": {\n", + " \"connection\": {\n", + " f\"B_{i}\": {\n", + " \"a\": 0,\n", + " \"b\": 1.0,\n", + " \"capacity\": capacity,\n", + " \"free_flow_travel_time\": time_step_length\n", + " }\n", + " },\n", + " \"location\": [0 + 3 * i, 0]\n", + " },\n", + " f\"B_{i}\": {\n", + " \"connection\": {\n", + " f\"C_{i}\": {\n", + " \"a\": 0.0,\n", + " \"b\": 1.0,\n", + " \"capacity\": capacity,\n", + " \"free_flow_travel_time\": 2.0\n", + " },\n", + " f\"D_{i}\": {\n", + " \"a\": 2.0,\n", + " \"b\": 1.0,\n", + " \"capacity\": capacity,\n", + " \"free_flow_travel_time\": 1.0\n", + " }\n", + " },\n", + " \"location\": [1 + 3 * i, 0]\n", + " },\n", + " f\"C_{i}\": {\n", + " \"connection\": {\n", + " f\"A_{i+1}\": {\n", + " \"a\": 0,\n", + " \"b\": 1.0,\n", + " \"capacity\": capacity,\n", + " \"free_flow_travel_time\": time_step_length\n", + " }\n", + " },\n", + " \"location\": [2 + 3 * i, 1]\n", + " },\n", + " f\"D_{i}\": {\n", + " \"connection\": {\n", + " f\"A_{i+1}\": {\n", + " \"a\": 0,\n", + " \"b\": 1.0,\n", + " \"capacity\": capacity,\n", + " \"free_flow_travel_time\": time_step_length\n", + " }\n", + " },\n", + " \"location\": [2 + 3 * i, -1]\n", + " }\n", + " })\n", + " i += 1\n", + " graph_dict[f\"A_{i}\"] = {\n", + " \"connection\": {\n", + " \"END\": {\n", + " \"a\": 0,\n", + " \"b\": 1.0,\n", + " \"capacity\": capacity,\n", + " \"free_flow_travel_time\": time_step_length\n", + " }\n", + " },\n", + " \"location\": [0 + 3 * i, 0]\n", + " }\n", + " graph_dict[\"END\"] = {\"connection\": {}, \"location\": [1 + 3 * i, 0]}\n", + " time_horizon = int(5.0 * (num_network_in_series + 1) / time_step_length)\n", + " destination = f\"A_{i}-\u003eEND\"\n", + " adjacency_list = {\n", + " key: list(value[\"connection\"].keys())\n", + " for key, value in graph_dict.items()\n", + " }\n", + " bpr_a_coefficient = {}\n", + " bpr_b_coefficient = {}\n", + " capacity = {}\n", + " free_flow_travel_time = {}\n", + " for o_node, value_dict in graph_dict.items():\n", + " for d_node, section_dict in value_dict[\"connection\"].items():\n", + " road_section = dynamic_routing_utils._road_section_from_nodes(\n", + " origin=o_node, destination=d_node)\n", + " bpr_a_coefficient[road_section] = section_dict[\"a\"]\n", + " bpr_b_coefficient[road_section] = section_dict[\"b\"]\n", + " capacity[road_section] = section_dict[\"capacity\"]\n", + " free_flow_travel_time[road_section] = section_dict[\n", + " \"free_flow_travel_time\"]\n", + " node_position = {key: value[\"location\"] for key, value in graph_dict.items()}\n", + " return dynamic_routing_utils.Network(\n", + " adjacency_list,\n", + " node_position=node_position,\n", + " bpr_a_coefficient=bpr_a_coefficient,\n", + " bpr_b_coefficient=bpr_b_coefficient,\n", + " capacity=capacity,\n", + " free_flow_travel_time=free_flow_travel_time\n", + " ), origin, destination, time_horizon" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r9oB7fYJCvWZ" + }, + "outputs": [], + "source": [ + "class GoUp(PurePolicyResponse):\n", + "\n", + " def pure_action(self, state):\n", + " location = state.get_current_vehicle_locations()[self.player_id].split(\n", + " \"-\u003e\")[1]\n", + " if location == \"B_0\":\n", + " return state.get_game().network.get_action_id_from_movement(\"B_0\", \"C_0\")\n", + " else:\n", + " return 0\n", + "\n", + "def compute_regret_policy_against_pure_policy_pigou_sim_game(game,\n", + " policy,\n", + " compute_true_value=False,\n", + " num_sample=100):\n", + " time_tick = time.time()\n", + " if compute_true_value:\n", + " expected_value_policy = expected_game_score.policy_value(\n", + " game.new_initial_state(), policy)[0]\n", + " else:\n", + " expected_value_policy = get_expected_value_sim_game(game, policy, num_sample)\n", + " worse_regret = 0\n", + " deviation_policy = GoUp(game, policy, 0)\n", + " if compute_true_value:\n", + " expected_value_noise = expected_game_score.policy_value(\n", + " game.new_initial_state(), deviation_policy)[0]\n", + " else:\n", + " expected_value_noise = get_expected_value_sim_game(\n", + " game, deviation_policy, num_sample, player=0)\n", + " approximate_regret = expected_value_noise - expected_value_policy\n", + " worse_regret = max(worse_regret, approximate_regret)\n", + " return worse_regret, time.time() - time_tick" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NQc2CSdVCvWZ" + }, + "outputs": [], + "source": [ + "num_of_tests = 5\n", + "\n", + "computation_time_pure_policy_nash_conv_dict_large = {}\n", + "pure_policy_nash_conv_n_player_dict_large = {}\n", + "\n", + "PIGOU_TIME_STEP_LENGTH = 0.05\n", + "\n", + "for pigou_num_vehicle in [x for x in range(1, 10, 1)] + [x for x in range(10, 100, 10)]:\n", + " PIGOU_GRAPH, PIGOU_ORIGIN, PIGOU_DESTINATION, PIGOU_MAX_TIME_STEP = create_series_parallel_network(\n", + " 1, time_step_length=PIGOU_TIME_STEP_LENGTH, capacity=pigou_num_vehicle)\n", + "\n", + " PIGOU_GAME, PIGOU_SEQ_GAME, PIGOU_MFG_GAME = create_games(\n", + " PIGOU_ORIGIN, PIGOU_DESTINATION, pigou_num_vehicle, PIGOU_GRAPH, PIGOU_MAX_TIME_STEP,\n", + " PIGOU_TIME_STEP_LENGTH)\n", + "\n", + " md_p_init = mirror_descent.MirrorDescent(PIGOU_MFG_GAME, lr=1)\n", + " mfmd_timing, mfmd_policy, mfmd_nash_conv, mfmd_policy_value, md_p = online_mirror_descent(\n", + " PIGOU_MFG_GAME, 10, compute_metrics=True, return_policy=True, md_p=md_p_init)\n", + " print(f\"Online mirror descent nash conv: {mfmd_nash_conv.nash_conv()}\")\n", + " mfmd_policy_n_player_derived = dynamic_routing_to_mean_field_game.DerivedNPlayerPolicyFromMeanFieldPolicy(\n", + " PIGOU_GAME, mfmd_policy)\n", + "\n", + " nash_conv_n_player_list = []\n", + " computation_time_list = []\n", + "\n", + " # nash_conv_n_player, computation_time = compute_regret_policy_against_pure_policy_pigou_sim_game(\n", + " # PIGOU_GAME, mfmd_policy_n_player_derived, compute_true_value=True)\n", + " for _ in range(num_of_tests):\n", + " nash_conv_n_player, computation_time = compute_regret_policy_against_pure_policy_pigou_sim_game(\n", + " PIGOU_GAME, mfmd_policy_n_player_derived, compute_true_value=False)\n", + " nash_conv_n_player_list.append(nash_conv_n_player)\n", + " computation_time_list.append(computation_time)\n", + " print(f\"Sampled exploitability: {nash_conv_n_player}, computed in {computation_time}\")\n", + " computation_time_pure_policy_nash_conv_dict_large[pigou_num_vehicle] = computation_time_list\n", + " pure_policy_nash_conv_n_player_dict_large[pigou_num_vehicle] = nash_conv_n_player_list\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hbGervxSCvWZ" + }, + "outputs": [], + "source": [ + "import scipy.special\n", + "import matplotlib.pyplot as plt\n", + "pigou_true_average_deviation_incentive = {}\n", + "for num_player in range(1, 100):\n", + " probs = {}\n", + "\n", + " for x in range(num_player):\n", + " probs[(x+1)/num_player] = scipy.special.binom(num_player-1, x)*(0.5**(num_player-1))\n", + "\n", + " assert abs(sum(probs.values())-1) \u003c 1e-4\n", + " e_tt = sum(p*(1.05+2*x) for x, p in probs.items())\n", + " pigou_true_average_deviation_incentive[num_player] = (e_tt-2.05)/2\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "L3SkaTmyCvWa" + }, + "outputs": [], + "source": [ + "\n", + "plt.errorbar(\n", + " list(pure_policy_nash_conv_n_player_dict_large),\n", + " [sum(x)/len(x) for x in pure_policy_nash_conv_n_player_dict_large.values()],\n", + " yerr=[(max(x)-min(x))/2 for x in pure_policy_nash_conv_n_player_dict_large.values()], fmt='-xr', # ls='none',\n", + " label='Sampled') # (mean, min and max, 100 sampled, 5 times)\n", + "plt.plot(list(pigou_true_average_deviation_incentive), list(pigou_true_average_deviation_incentive.values()), '--', label='True Value')\n", + "plt.legend()\n", + "plt.xlabel('Number of players')\n", + "plt.ylabel('Average deviation incentive') # of mean field equilibrium policy\n", + "plt.show()\n", + "\n", + "plt.plot(list(computation_time_pure_policy_nash_conv_dict_large), list([sum(x)/len(x) for x in computation_time_pure_policy_nash_conv_dict_large.values()]), label='Computation time sampled Nash conv')\n", + "plt.legend()\n", + "plt.xlabel('Number of players')\n", + "plt.ylabel('Average deviation incentive computation time')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hXGkE1j-CvWa" + }, + "source": [ + "\u003ca name='braess_deviation'\u003e\u003c/a\u003e\n", + "## 6. Average deviation of the mean field equilibrium policy in the N-player Braess network game as a function of N.\n", + "\n", + "This is used to produce figure 6 of the article." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YFyzZ4GBCvWa" + }, + "outputs": [], + "source": [ + "import scipy.special\n", + "\n", + "p_middle = 0.50\n", + "p_up = 0.25\n", + "p_down = 0.25\n", + "prob_paths = {'up': 0.25, 'middle': 0.5, 'down': 0.25}\n", + "\n", + "time_step = 0.1\n", + "average_deviation_incentive_braess = {}\n", + "for num_other_player in range(1, 60):\n", + " # print(num_other_player)\n", + " def count_lien(location, volume):\n", + " if location == 'B-\u003eC' or location == 'D-\u003eE':\n", + " return 1 + volume/(num_other_player+1)\n", + " elif location == 'A-\u003eB' or new_location == 'E-\u003eF':\n", + " return 0\n", + " elif location == 'C-\u003eD':\n", + " return 0.25\n", + " elif location == 'B-\u003eD' or location == 'C-\u003eE':\n", + " return 2\n", + " raise ValueError()\n", + " probs_go_up = {}\n", + " probs_go_middle = {}\n", + " probs_each_path = {}\n", + "\n", + " for x in range(num_other_player):\n", + " probs_go_up[x] = scipy.special.binom(num_other_player-1, x) * ((p_up+p_middle)**x) * ((p_down)**(num_other_player-1-x))\n", + " for y in range(num_other_player):\n", + " probs_go_middle[(y,x)] = scipy.special.binom(x, y) * ((p_middle/(p_up+p_middle))**y) * ((p_up/(p_up+p_middle))**(x-y))\n", + " if x-y \u003e= 0:\n", + " probs_each_path[(x-y, y, num_other_player-x)] = probs_go_up[x] * probs_go_middle[(y,x)]\n", + "\n", + " returns_per_policy = {}\n", + " for policy_tested in range(3):\n", + " returns = 0\n", + " for key in probs_each_path:\n", + " rewards = {}\n", + " # Do the simulation if the person was on path up\n", + " num_paths_up, num_paths_middle, num_paths_down = key\n", + " if policy_tested == 0:\n", + " path_taken = 'up'\n", + " num_paths_up += 1\n", + " if policy_tested == 1:\n", + " path_taken = 'middle'\n", + " num_paths_middle += 1\n", + " if policy_tested == 2:\n", + " path_taken = 'down'\n", + " num_paths_down += 1\n", + " states = {'A-\u003eB_up': 0.0, 'A-\u003eB_middlemilieu': 0.0, 'A-\u003eB_down': 0.0}\n", + " current_time_step = 0.0\n", + " while True:\n", + " min_waiting_time = min((x for x in states.items() if x[1]\u003e0 or 'E-\u003eF' not in x[0]), key=lambda x: x[1])[1]\n", + " # print(min_waiting_time)\n", + " current_time_step += min_waiting_time\n", + " new_locations = {}\n", + " new_states = {}\n", + " for location_path, waiting_time in states.items():\n", + " location, path = location_path.split('_')\n", + " if path == 'up':\n", + " if waiting_time == min_waiting_time:\n", + " if location == 'A-\u003eB':\n", + " new_location = 'B-\u003eC'\n", + " elif location == 'B-\u003eC':\n", + " new_location = 'C-\u003eE'\n", + " elif location == 'C-\u003eE':\n", + " new_location = 'E-\u003eF'\n", + " elif location == 'E-\u003eF':\n", + " new_location = 'E-\u003eF'\n", + " else:\n", + " raise ValueError()\n", + " new_states[f\"{new_location}_up\"] = -1\n", + " else:\n", + " new_location = location\n", + " new_states[f\"{new_location}_uphaut\"] = waiting_time-min_waiting_time\n", + " if not new_location in new_locations:\n", + " new_locations[new_location] = 0\n", + " new_locations[new_location] += num_paths_up\n", + " elif path == 'middle':\n", + " if waiting_time == min_waiting_time:\n", + " if location == 'A-\u003eB':\n", + " new_location = 'B-\u003eC'\n", + " elif location == 'B-\u003eC':\n", + " new_location = 'C-\u003eD'\n", + " elif location == 'C-\u003eD':\n", + " new_location = 'D-\u003eE'\n", + " elif location == 'D-\u003eE':\n", + " new_location = 'E-\u003eF'\n", + " elif location == 'E-\u003eF':\n", + " new_location = 'E-\u003eF'\n", + " else:\n", + " raise ValueError()\n", + " new_states[f\"{new_location}_middle\"] = -1\n", + " else:\n", + " new_location = location\n", + " new_states[f\"{new_location}_middle\"] = waiting_time-min_waiting_time\n", + " if not new_location in new_locations:\n", + " new_locations[new_location] = 0\n", + " new_locations[new_location] += num_paths_middle\n", + " elif path == 'down':\n", + " if waiting_time == min_waiting_time:\n", + " if location == 'A-\u003eB':\n", + " new_location = 'B-\u003eD'\n", + " elif location == 'B-\u003eD':\n", + " new_location = 'D-\u003eE'\n", + " elif location == 'D-\u003eE':\n", + " new_location = 'E-\u003eF'\n", + " elif location == 'E-\u003eF':\n", + " new_location = 'E-\u003eF'\n", + " else:\n", + " raise ValueError()\n", + " new_states[f\"{new_location}_down\"] = -1\n", + " else:\n", + " new_location = location\n", + " new_states[f\"{new_location}_down\"] = waiting_time-min_waiting_time\n", + " if not new_location in new_locations:\n", + " new_locations[new_location] = 0\n", + " new_locations[new_location] += num_paths_down\n", + " should_stop = True\n", + " for location_path, waiting_time in new_states.items():\n", + " if location_path.split('_')[0] != 'E-\u003eF':\n", + " should_stop = False\n", + " else:\n", + " path = location_path.split('_')[1]\n", + " if path not in rewards:\n", + " rewards[path] = current_time_step\n", + " if waiting_time == -1:\n", + " new_location = location_path.split('_')[0]\n", + " new_states[location_path] = count_lien(new_location, new_locations[new_location])\n", + " states = new_states\n", + " if should_stop:\n", + " break\n", + " returns += probs_each_path[key] * rewards[path_taken]\n", + " returns_per_policy[path_taken] = returns\n", + " returns = 0\n", + " for k, v in returns_per_policy.items():\n", + " returns += v * prob_paths[k]\n", + " average_deviation_incentive_braess[num_other_player+1] = returns - min(returns_per_policy.values())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Dbnd7ZysCvWa" + }, + "outputs": [], + "source": [ + "plt.plot(list(average_deviation_incentive_braess), list(average_deviation_incentive_braess.values()), 'x', label='mean field policy in N player')\n", + "plt.legend()\n", + "# plt.title('Average deviation incentive of the mean field policy in the N player game as a function of N.')\n", + "plt.xlabel('Number of players')\n", + "plt.ylabel('Average deviation incentive')\n", + "plt.show()" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "last_runtime": { + "build_target": "//experimental/cabannes:colab", + "kind": "private" + }, + "name": "Experiments.ipynb", + "private_outputs": true, + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/routing_game_experiments/readme.md b/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/routing_game_experiments/readme.md new file mode 100644 index 0000000..ffeed4d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/routing_game_experiments/readme.md @@ -0,0 +1,24 @@ +# Reproducing routing game experiments + +To reproduce the experiments done in [*Solving N-player dynamic routing games +with congestion: a mean field approach, Cabannes et. +al.*](https://dl.acm.org/doi/10.5555/3535850.3536033): + +1. If you have not, download [python](https://www.python.org/downloads/) and an + IDE to run iPython notebook (either [jupyter](https://jupyter.org) or + [VSCode](https://code.visualstudio.com)). +2. Install OpenSpiel using + [pip install open_spiel](https://github.com/deepmind/open_spiel/blob/master/docs/install.md) + or from + [source](https://github.com/deepmind/open_spiel/blob/master/docs/install.md#installation-from-source). +3. Download the + [`Experiments.ipynb` iPython notebook](https://github.com/deepmind/open_spiel/tree/master/open_spiel/data/paper_data/routing_game_experiments/Experiments.ipynb). +4. Run the iPython notebook. You might need to download the dependant python + libraries. + +# License + +This code is under the Open Spiel license. Please cite the paper [*Solving +N-player dynamic routing games with congestion: a mean field approach, Cabannes +et. al.*](https://dl.acm.org/doi/10.5555/3535850.3536033) when re-using this +code. Feel free to send an email to theophile@berkeley.edu for any questions. diff --git a/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/routing_game_experiments/utils.py b/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/routing_game_experiments/utils.py new file mode 100644 index 0000000..5518780 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/data/paper_data/routing_game_experiments/utils.py @@ -0,0 +1,1113 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utils function for routing game experiment.""" +# pylint:disable=too-many-lines,import-error,missing-function-docstring,protected-access,too-many-locals,invalid-name,too-many-arguments,too-many-branches,missing-class-docstring,too-few-public-methods + +# pylint:disable=line-too-long +import random +import time + +import matplotlib.pyplot as plt +import numpy as np +import tensorflow.compat.v1 as tf + +from open_spiel.python import policy as policy_module +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import cfr +from open_spiel.python.algorithms import expected_game_score +from open_spiel.python.algorithms import exploitability +from open_spiel.python.algorithms import external_sampling_mccfr as external_mccfr +from open_spiel.python.algorithms import fictitious_play +from open_spiel.python.algorithms import nfsp +from open_spiel.python.algorithms import noisy_policy +from open_spiel.python.games import dynamic_routing +from open_spiel.python.games import dynamic_routing_utils +from open_spiel.python.mfg.algorithms import distribution as distribution_module +from open_spiel.python.mfg.algorithms import fictitious_play as mean_field_fictitious_play_module +from open_spiel.python.mfg.algorithms import mirror_descent +from open_spiel.python.mfg.algorithms import nash_conv as nash_conv_module +from open_spiel.python.mfg.algorithms import policy_value +from open_spiel.python.mfg.games import dynamic_routing as mean_field_routing_game +import pyspiel +# pylint:enable=line-too-long + + +def create_games(origin, + destination, + num_vehicles, + graph, + max_time_step, + time_step_length=1.0, + departure_time=None): + if departure_time is not None: + raise NotImplementedError("To do.") + list_of_vehicles = [ + dynamic_routing_utils.Vehicle(origin, destination) + for _ in range(num_vehicles) + ] + game = dynamic_routing.DynamicRoutingGame( + { + "max_num_time_step": max_time_step, + "time_step_length": time_step_length + }, + network=graph, + vehicles=list_of_vehicles) + seq_game = pyspiel.convert_to_turn_based(game) + od_demand = [ + dynamic_routing_utils.OriginDestinationDemand(origin, destination, 0, + num_vehicles) + ] + mfg_game = mean_field_routing_game.MeanFieldRoutingGame( + { + "max_num_time_step": max_time_step, + "time_step_length": time_step_length + }, + network=graph, + od_demand=od_demand) + return game, seq_game, mfg_game + + +def create_braess_network(capacity): + graph_dict = { + "A": { + "connection": { + "B": { + "a": 0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": 0 + } + }, + "location": [0, 0] + }, + "B": { + "connection": { + "C": { + "a": 1.0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": 1.0 + }, + "D": { + "a": 0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": 2.0 + } + }, + "location": [1, 0] + }, + "C": { + "connection": { + "D": { + "a": 0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": 0.25 + }, + "E": { + "a": 0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": 2.0 + } + }, + "location": [2, 1] + }, + "D": { + "connection": { + "E": { + "a": 1, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": 1.0 + } + }, + "location": [2, -1] + }, + "E": { + "connection": { + "F": { + "a": 0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": 0.0 + } + }, + "location": [3, 0] + }, + "F": { + "connection": {}, + "location": [4, 0] + } + } + adjacency_list = { + key: list(value["connection"].keys()) + for key, value in graph_dict.items() + } + bpr_a_coefficient = {} + bpr_b_coefficient = {} + capacity = {} + free_flow_travel_time = {} + for o_node, value_dict in graph_dict.items(): + for d_node, section_dict in value_dict["connection"].items(): + road_section = dynamic_routing_utils._road_section_from_nodes( + origin=o_node, destination=d_node) + bpr_a_coefficient[road_section] = section_dict["a"] + bpr_b_coefficient[road_section] = section_dict["b"] + capacity[road_section] = section_dict["capacity"] + free_flow_travel_time[road_section] = section_dict[ + "free_flow_travel_time"] + node_position = {key: value["location"] for key, value in graph_dict.items()} + return dynamic_routing_utils.Network( + adjacency_list, + node_position=node_position, + bpr_a_coefficient=bpr_a_coefficient, + bpr_b_coefficient=bpr_b_coefficient, + capacity=capacity, + free_flow_travel_time=free_flow_travel_time) + + +def create_augmented_braess_network(capacity): + graph_dict = { + "A": { + "connection": { + "B": { + "a": 0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": 0 + } + }, + "location": [0, 0] + }, + "B": { + "connection": { + "C": { + "a": 1.0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": 1.0 + }, + "D": { + "a": 0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": 2.0 + } + }, + "location": [1, 0] + }, + "C": { + "connection": { + "D": { + "a": 0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": 0.25 + }, + "E": { + "a": 0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": 2.0 + } + }, + "location": [2, 1] + }, + "D": { + "connection": { + "E": { + "a": 1, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": 1.0 + }, + "G": { + "a": 0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": 0.0 + } + }, + "location": [2, -1] + }, + "E": { + "connection": { + "F": { + "a": 0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": 0.0 + } + }, + "location": [3, 0] + }, + "F": { + "connection": {}, + "location": [4, 0] + }, + "G": { + "connection": {}, + "location": [3, -1] + } + } + adjacency_list = { + key: list(value["connection"].keys()) + for key, value in graph_dict.items() + } + bpr_a_coefficient = {} + bpr_b_coefficient = {} + capacity = {} + free_flow_travel_time = {} + for o_node, value_dict in graph_dict.items(): + for d_node, section_dict in value_dict["connection"].items(): + road_section = dynamic_routing_utils._road_section_from_nodes( + origin=o_node, destination=d_node) + bpr_a_coefficient[road_section] = section_dict["a"] + bpr_b_coefficient[road_section] = section_dict["b"] + capacity[road_section] = section_dict["capacity"] + free_flow_travel_time[road_section] = section_dict[ + "free_flow_travel_time"] + node_position = {key: value["location"] for key, value in graph_dict.items()} + return dynamic_routing_utils.Network( + adjacency_list, + node_position=node_position, + bpr_a_coefficient=bpr_a_coefficient, + bpr_b_coefficient=bpr_b_coefficient, + capacity=capacity, + free_flow_travel_time=free_flow_travel_time) + + +def create_series_parallel_network(num_network_in_series, + time_step_length=1, + capacity=1): + i = 0 + origin = "A_0->B_0" + graph_dict = {} + while i < num_network_in_series: + tt_up = random.random() + time_step_length + tt_down = random.random() + time_step_length + graph_dict.update({ + f"A_{i}": { + "connection": { + f"B_{i}": { + "a": 0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": time_step_length + } + }, + "location": [0 + 3 * i, 0] + }, + f"B_{i}": { + "connection": { + f"C_{i}": { + "a": 1.0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": tt_up + }, + f"D_{i}": { + "a": 1.0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": tt_down + } + }, + "location": [1 + 3 * i, 0] + }, + f"C_{i}": { + "connection": { + f"A_{i+1}": { + "a": 0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": time_step_length + } + }, + "location": [2 + 3 * i, 1] + }, + f"D_{i}": { + "connection": { + f"A_{i+1}": { + "a": 0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": time_step_length + } + }, + "location": [2 + 3 * i, -1] + } + }) + i += 1 + graph_dict[f"A_{i}"] = { + "connection": { + "END": { + "a": 0, + "b": 1.0, + "capacity": capacity, + "free_flow_travel_time": time_step_length + } + }, + "location": [0 + 3 * i, 0] + } + graph_dict["END"] = {"connection": {}, "location": [1 + 3 * i, 0]} + time_horizon = int(3.0 * (num_network_in_series + 1) / time_step_length) + destination = f"A_{i}->END" + adjacency_list = { + key: list(value["connection"].keys()) + for key, value in graph_dict.items() + } + bpr_a_coefficient = {} + bpr_b_coefficient = {} + capacity = {} + free_flow_travel_time = {} + for o_node, value_dict in graph_dict.items(): + for d_node, section_dict in value_dict["connection"].items(): + road_section = dynamic_routing_utils._road_section_from_nodes( + origin=o_node, destination=d_node) + bpr_a_coefficient[road_section] = section_dict["a"] + bpr_b_coefficient[road_section] = section_dict["b"] + capacity[road_section] = section_dict["capacity"] + free_flow_travel_time[road_section] = section_dict[ + "free_flow_travel_time"] + node_position = {key: value["location"] for key, value in graph_dict.items()} + return dynamic_routing_utils.Network( + adjacency_list, + node_position=node_position, + bpr_a_coefficient=bpr_a_coefficient, + bpr_b_coefficient=bpr_b_coefficient, + capacity=capacity, + free_flow_travel_time=free_flow_travel_time + ), origin, destination, time_horizon + + +def plot_network_n_player_game(g: dynamic_routing_utils.Network, + vehicle_locations=None): + """Plot the network. + + Args: + g: network to plot + vehicle_locations: vehicle location + """ + _, ax = plt.subplots() + o_xs, o_ys, d_xs, d_ys = g.return_list_for_matplotlib_quiver() + ax.quiver( + o_xs, + o_ys, + np.subtract(d_xs, o_xs), + np.subtract(d_ys, o_ys), + color="b", + angles="xy", + scale_units="xy", + scale=1) + ax.set_xlim([ + np.min(np.concatenate((o_xs, d_xs))) - 0.5, + np.max(np.concatenate((o_xs, d_xs))) + 0.5 + ]) + ax.set_ylim([ + np.min(np.concatenate((o_ys, d_ys))) - 0.5, + np.max(np.concatenate((o_ys, d_ys))) + 0.5 + ]) + + if vehicle_locations is not None: + num_vehicle = len(vehicle_locations) + dict_location = {} + for vehicle_location in vehicle_locations: + if vehicle_location not in dict_location: + dict_location[vehicle_location] = 0.0 + dict_location[vehicle_location] += 0.3 / num_vehicle + for point, width in dict_location.items(): + circle = plt.Circle(point, width, color="r") + ax.add_patch(circle) + + +def plot_network_mean_field_game(g: dynamic_routing_utils.Network, + distribution=None, + scaling=1): + """Plot the network. + + Args: + g: network to plot + distribution: the distribution. + scaling: scaling factor. for plot rendering. + """ + _, ax = plt.subplots() + o_xs, o_ys, d_xs, d_ys = g.return_list_for_matplotlib_quiver() + ax.quiver( + o_xs, + o_ys, + np.subtract(d_xs, o_xs), + np.subtract(d_ys, o_ys), + color="b", + angles="xy", + scale_units="xy", + scale=1) + ax.set_xlim([ + np.min(np.concatenate((o_xs, d_xs))) - 0.5, + np.max(np.concatenate((o_xs, d_xs))) + 0.5 + ]) + ax.set_ylim([ + np.min(np.concatenate((o_ys, d_ys))) - 0.5, + np.max(np.concatenate((o_ys, d_ys))) + 0.5 + ]) + + if distribution is not None: + for x, prob_of_position in distribution.items(): + point = g.return_position_of_road_section(x) + width = 0.3 * scaling * prob_of_position + circle = plt.Circle(point, width, color="r") + ax.add_patch(circle) + + +def evolve_n_player_simultaneous_game(game, policy, graph): + state = game.new_initial_state() + i = 0 + while not state.is_terminal(): + i += 1 + if state.is_chance_node(): + # Sample a chance event outcome. + outcomes_with_probs = state.chance_outcomes() + action_list, prob_list = zip(*outcomes_with_probs) + action = np.random.choice(action_list, p=prob_list) + state.apply_action(action) + elif state.is_simultaneous_node(): + # Simultaneous node: sample actions for all players. + chosen_actions = [] + for i in range(game.num_players()): + legal_actions = state.legal_actions(i) + state_policy = policy(state, i) + assert len(legal_actions) == len(state_policy), ( + f"{legal_actions} not same length than {state_policy}") + chosen_actions.append( + random.choices(legal_actions, + [state_policy[a] for a in legal_actions])[0]) + state.apply_actions(chosen_actions) + else: + raise ValueError( + "State should either be simultaneous node or change node.") + plot_network_n_player_game(graph, [ + graph.return_position_of_road_section(x) + for x in state.get_current_vehicle_locations() + ]) + print(f"Travel times: {[-x for x in state.returns()]}") + + +def evolve_n_player_sequential_game(seq_game, policy, graph, debug=False): + state = seq_game.new_initial_state() + while not state.is_terminal(): + legal_actions = state.legal_actions() + if state.is_chance_node(): + # Sample a chance event outcome. + outcomes_with_probs = state.chance_outcomes() + action_list, prob_list = zip(*outcomes_with_probs) + action = np.random.choice(action_list, p=prob_list) + if debug: + print("------------ Change node ------------") + print( + (f"Possible chance actions: {outcomes_with_probs}, the one taken: " + f"{action}.")) + state.apply_action(action) + else: + if debug: + print("------------ Sequential action node ------------") + print(state.information_state_tensor()) + print(state.observation_tensor()) + print(state.information_state_string()) + if policy is not None: + state_policy = policy(state) + vehicle_location = [ + s.replace("'", "") + for s in str(state).split("[")[1].split("]")[0].split(", ") + ] + if debug: + print((f"Policy for player {state.current_player()} at location " + f"{vehicle_location[state.current_player()]}: ") + + str([(str(graph.get_road_section_from_action_id(k)) + + f"with probability {v}") + for k, v in state_policy.items()])) + assert set(state_policy) == set(legal_actions) + action = random.choices(legal_actions, + [state_policy[a] for a in legal_actions]) + assert len(action) == 1 + action = action[0] + else: + action = random.choice(legal_actions) + state.apply_action(action) + vehicle_location = [ + s.replace("'", "") + for s in str(state).split("[")[1].split("]")[0].split(", ") + ] + if debug: + print(vehicle_location) + plot_network_n_player_game( + graph, + [graph.return_position_of_road_section(x) for x in vehicle_location]) + if debug: + print(f"Travel times: {[-x for x in state.returns()]}") + + +def evolve_mean_field_game(mfg_game, + policy, + graph, + scaling=1, + frequency_printing=1): + distribution_mfg = distribution_module.DistributionPolicy(mfg_game, policy) + root_state = mfg_game.new_initial_state() + listing_states = [root_state] + + # plot_network_mean_field_game(graph, {origin: 1}) + i = 0 + while not listing_states[0].is_terminal() and not all( + state._vehicle_without_legal_action for state in listing_states): # pylint:disable=protected-access + assert abs(sum(map(distribution_mfg.value, listing_states)) - 1) < 1e-4, ( + f"{list(map(distribution_mfg.value, listing_states))}") + new_listing_states = [] + list_of_state_seen = set() + # In case chance node: + if listing_states[0].current_player() == pyspiel.PlayerId.CHANCE: + for mfg_state in listing_states: + for action, _ in mfg_state.chance_outcomes(): + new_mfg_state = mfg_state.child(action) + # Do not append twice the same file. + if str(new_mfg_state) not in list_of_state_seen: + new_listing_states.append(new_mfg_state) + list_of_state_seen.add(str(new_mfg_state)) + current_distribution = {} + for mfg_state in new_listing_states: + location = mfg_state._vehicle_location # pylint:disable=protected-access + if location not in current_distribution: + current_distribution[location] = 0 + current_distribution[location] += distribution_mfg.value(mfg_state) + plot_network_mean_field_game(graph, current_distribution, scaling=scaling) + + # In case mean field node: + elif listing_states[0].current_player() == pyspiel.PlayerId.MEAN_FIELD: + for mfg_state in listing_states: + dist_to_register = mfg_state.distribution_support() + + def get_probability_for_state(str_state): + try: + return distribution_mfg.value_str(str_state) + except ValueError: + return 0 + + dist = [ + get_probability_for_state(str_state) + for str_state in dist_to_register + ] + new_mfg_state = mfg_state.clone() + new_mfg_state.update_distribution(dist) + # Do not append twice the same file. + if str(new_mfg_state) not in list_of_state_seen: + new_listing_states.append(new_mfg_state) + list_of_state_seen.add(str(new_mfg_state)) + + # In case action node: + else: + assert (listing_states[0].current_player() == + pyspiel.PlayerId.DEFAULT_PLAYER_ID), "The player id should be 0" + for mfg_state in listing_states: + for action, _ in policy.action_probabilities(mfg_state).items(): + new_mfg_state = mfg_state.child(action) + # Do not append twice the same file. + if str(new_mfg_state) not in list_of_state_seen: + new_listing_states.append(new_mfg_state) + list_of_state_seen.add(str(new_mfg_state)) + current_distribution = {} + for mfg_state in new_listing_states: + location = mfg_state._vehicle_location # pylint:disable=protected-access + if location not in current_distribution: + current_distribution[location] = 0 + current_distribution[location] += distribution_mfg.value(mfg_state) + assert abs(sum(current_distribution.values()) - 1) < 1e-4, ( + f"{current_distribution}") + i += 1 + if i % frequency_printing == 0: + plot_network_mean_field_game( + graph, current_distribution, scaling=scaling) + listing_states = new_listing_states + + +def uniform_policy_n_player(seq_game): + return policy_module.UniformRandomPolicy(seq_game) + + +def first_action_policy_n_player(seq_game): + return policy_module.FirstActionPolicy(seq_game) + + +def ficticious_play(seq_game, number_of_iterations, compute_metrics=False): + xfp_solver = fictitious_play.XFPSolver(seq_game) + tick_time = time.time() + for _ in range(number_of_iterations): + xfp_solver.iteration() + timing = time.time() - tick_time + # print('done') + # average_policies = xfp_solver.average_policy_tables() + tabular_policy = policy_module.TabularPolicy(seq_game) + if compute_metrics: + nash_conv = exploitability.nash_conv(seq_game, xfp_solver.average_policy()) + average_policy_values = expected_game_score.policy_value( + seq_game.new_initial_state(), [tabular_policy]) + return timing, tabular_policy, nash_conv, average_policy_values + return timing, tabular_policy + + +def counterfactual_regret_minimization(seq_game, + number_of_iterations, + compute_metrics=False): + # freq_iteration_printing = number_of_iterations // 10 + cfr_solver = cfr.CFRSolver(seq_game) + tick_time = time.time() + # print("CFRSolver initialized.") + for _ in range(number_of_iterations): + cfr_solver.evaluate_and_update_policy() + # if i % freq_iteration_printing == 0: + # print(f"Iteration {i}") + timing = time.time() - tick_time + # print("Finish.") + if compute_metrics: + nash_conv = exploitability.nash_conv(seq_game, cfr_solver.average_policy()) + return timing, cfr_solver.average_policy(), nash_conv + return timing, cfr_solver.average_policy() + + +def external_sampling_monte_carlo_counterfactual_regret_minimization( + seq_game, number_of_iterations, compute_metrics=False): + cfr_solver = external_mccfr.ExternalSamplingSolver( + seq_game, external_mccfr.AverageType.SIMPLE) + tick_time = time.time() + # print("CFRSolver initialized.") + for _ in range(number_of_iterations): + cfr_solver.iteration() + timing = time.time() - tick_time + # print("Finish.") + if compute_metrics: + nash_conv = exploitability.nash_conv(seq_game, cfr_solver.average_policy()) + return timing, cfr_solver.average_policy(), nash_conv + return timing, cfr_solver.average_policy() + + +class NFSPPolicies(policy_module.Policy): + """Joint policy to be evaluated.""" + + def __init__(self, env, nfsp_policies, mode): + game = env.game + num_players = env.num_players + player_ids = list(range(num_players)) + super().__init__(game, player_ids) + self._policies = nfsp_policies + self._mode = mode + self._obs = { + "info_state": [None] * num_players, + "legal_actions": [None] * num_players + } + + def action_probabilities(self, state, player_id=None): + del player_id + cur_player = state.current_player() + legal_actions = state.legal_actions(cur_player) + + self._obs["current_player"] = cur_player + self._obs["info_state"][cur_player] = ( + state.information_state_tensor(cur_player)) + self._obs["legal_actions"][cur_player] = legal_actions + + info_state = rl_environment.TimeStep( + observations=self._obs, rewards=None, discounts=None, step_type=None) + + with self._policies[cur_player].temp_mode_as(self._mode): + p = self._policies[cur_player].step(info_state, is_evaluation=True).probs + prob_dict = {action: p[action] for action in legal_actions} + return prob_dict + + +def neural_ficticious_self_play(seq_game, + num_epoch, + sess, + compute_metrics=False): + env = rl_environment.Environment(seq_game) + # Parameters from the game. + num_players = env.num_players + num_actions = env.action_spec()["num_actions"] + info_state_size = env.observation_spec()["info_state"][0] + + # Parameters for the algorithm. + hidden_layers_sizes = [int(l) for l in [128]] + + kwargs = { + "replay_buffer_capacity": int(2e5), + "reservoir_buffer_capacity": int(2e6), + "min_buffer_size_to_learn": 1000, + "anticipatory_param": 0.1, + "batch_size": 128, + "learn_every": 64, + "rl_learning_rate": 0.01, + "sl_learning_rate": 0.01, + "optimizer_str": "sgd", + "loss_str": "mse", + "update_target_network_every": 19200, + "discount_factor": 1.0, + "epsilon_decay_duration": int(20e6), + "epsilon_start": 0.06, + "epsilon_end": 0.001, + } + + # freq_epoch_printing = num_epoch // 10 + agents = [ + nfsp.NFSP(sess, idx, info_state_size, num_actions, hidden_layers_sizes, + **kwargs) for idx in range(num_players) + ] + joint_avg_policy = NFSPPolicies(env, agents, nfsp.MODE.average_policy) + + sess.run(tf.global_variables_initializer()) + # print("TF initialized.") + tick_time = time.time() + for _ in range(num_epoch): + # if ep % freq_epoch_printing == 0: + # print(f"Iteration {ep}") + time_step = env.reset() + while not time_step.last(): + player_id = time_step.observations["current_player"] + agent_output = agents[player_id].step(time_step) + action_list = [agent_output.action] + time_step = env.step(action_list) + + # Episode is over, step all agents with final info state. + for agent in agents: + agent.step(time_step) + timing = time.time() - tick_time + # print("Finish.") + if compute_metrics: + tabular_policy = joint_avg_policy.TabularPolicy(seq_game) + average_policy_values = expected_game_score.policy_value( + seq_game.new_initial_state(), [tabular_policy]) + nash_conv = exploitability.nash_conv(env.game, joint_avg_policy) + return timing, joint_avg_policy, average_policy_values, nash_conv + return timing, joint_avg_policy + + +def mean_field_uniform_policy(mfg_game, + number_of_iterations, + compute_metrics=False): + del number_of_iterations + uniform_policy = policy_module.UniformRandomPolicy(mfg_game) + if compute_metrics: + distribution_mfg = distribution_module.DistributionPolicy( + mfg_game, uniform_policy) + policy_value_ = policy_value.PolicyValue(mfg_game, distribution_mfg, + uniform_policy).value( + mfg_game.new_initial_state()) + return uniform_policy, policy_value_ + return uniform_policy + + +def mean_field_fictitious_play(mfg_game, + number_of_iterations, + compute_metrics=False): + fp = mean_field_fictitious_play_module.FictitiousPlay(mfg_game) + tick_time = time.time() + for _ in range(number_of_iterations): + fp.iteration() + timing = time.time() - tick_time + fp_policy = fp.get_policy() + # print('learning done') + if compute_metrics: + distribution_mfg = distribution_module.DistributionPolicy( + mfg_game, fp_policy) + # print('distribution done') + policy_value_ = policy_value.PolicyValue(mfg_game, distribution_mfg, + fp_policy).value( + mfg_game.new_initial_state()) + nash_conv_fp = nash_conv_module.NashConv(mfg_game, fp_policy) + return timing, fp_policy, nash_conv_fp, policy_value_ + return timing, fp_policy + + +def online_mirror_descent(mfg_game, + number_of_iterations, + compute_metrics=False, + return_policy=False, + md_p=None): + md = md_p if md_p else mirror_descent.MirrorDescent(mfg_game) + tick_time = time.time() + for _ in range(number_of_iterations): + md.iteration() + timing = time.time() - tick_time + md_policy = md.get_policy() + if compute_metrics: + distribution_mfg = distribution_module.DistributionPolicy( + mfg_game, md_policy) + # print('distribution done') + policy_value_ = policy_value.PolicyValue(mfg_game, distribution_mfg, + md_policy).value( + mfg_game.new_initial_state()) + nash_conv_md = nash_conv_module.NashConv(mfg_game, md_policy) + if return_policy: + return timing, md_policy, nash_conv_md, policy_value_, md + return timing, md_policy, nash_conv_md, policy_value_ + return timing, md_policy + + +class RandomPolicyDeviation: + + def __init__(self): + self.policy_deviation = {} + + def get_policy_deviation(self, state, player_id): + key = (str(state), player_id) + if key not in self.policy_deviation: + assert player_id == state.current_player() + action_probability = [random.random() for a in state.legal_actions()] + self.policy_deviation[key] = [ + x / sum(action_probability) for x in action_probability + ] + return self.policy_deviation[key] + + +def get_results_n_player_sequential_game(seq_game, policy): + state = seq_game.new_initial_state() + while not state.is_terminal(): + legal_actions = state.legal_actions() + if state.is_chance_node(): + outcomes_with_probs = state.chance_outcomes() + action_list, prob_list = zip(*outcomes_with_probs) + action = np.random.choice(action_list, p=prob_list) + else: + state_policy = policy(state) + assert set(state_policy) == set(legal_actions) + action = random.choices(legal_actions, + [state_policy[a] for a in legal_actions]) + assert len(action) == 1 + action = action[0] + state.apply_action(action) + return state.returns() + + +def get_list_results_n_player_game(seq_game, policy, num_sample=10): + return [ + get_results_n_player_sequential_game(seq_game, policy) + for _ in range(num_sample) + ] + + +def get_average_results_n_player_game(seq_game, policy, num_sample=10): + result_array = get_list_results_n_player_game(seq_game, policy, num_sample) + return sum([sum(i) / len(i) for i in zip(*result_array)]) / len(result_array) + + +def get_results_n_player_simultaneous_game(game, policy): + state = game.new_initial_state() + i = 0 + while not state.is_terminal(): + i += 1 + if state.is_chance_node(): + # Sample a chance event outcome. + outcomes_with_probs = state.chance_outcomes() + action_list, prob_list = zip(*outcomes_with_probs) + action = np.random.choice(action_list, p=prob_list) + state.apply_action(action) + elif state.is_simultaneous_node(): + # Simultaneous node: sample actions for all players. + chosen_actions = [] + for i in range(game.num_players()): + legal_actions = state.legal_actions(i) + state_policy = policy(state, player_id=i) + assert abs(sum([state_policy[a] for a in legal_actions]) - 1) < 1e-4 + chosen_actions.append( + random.choices(legal_actions, + [state_policy[a] for a in legal_actions])[0]) + state.apply_actions(chosen_actions) + else: + raise ValueError( + "State should either be simultaneous node or change node.") + return state.returns() + + +def get_list_results_n_player_simulataneous_game(game, policy, num_sample=10): + return [ + get_results_n_player_simultaneous_game(game, policy) + for _ in range(num_sample) + ] + + +def get_expected_value(seq_game, policy, num_sample, player=0): + results = get_list_results_n_player_game( + seq_game, policy, num_sample=num_sample) + expected_value = sum(x[player] for x in results) / num_sample + # num_vehicle = len(results[0]) + # error_bar = abs(sum([x[1] for x in results]) - sum( + # [x[2] for x in results])) / num_sample_trajectories + # expected_value_policy = sum(sum(x[i] for x in results) for i in range( + # 1, BRAESS_NUM_VEHICLES)) / ((BRAESS_NUM_VEHICLES-1)*num_sample_trajectories) + return expected_value + + +def compute_regret_policy(game, + policy, + num_random_policy_tested=10, + num_sample=100): + time_tick = time.time() + expected_value_policy = get_expected_value(game, policy, num_sample) + worse_regret = 0 + for _ in range(num_random_policy_tested): + noisy_n_policy = noisy_policy.NoisyPolicy(policy, player_id=0, alpha=1) + expected_value_noise = get_expected_value( + game, noisy_n_policy, num_sample, player=0) + approximate_regret = expected_value_noise - expected_value_policy + worse_regret = max(worse_regret, approximate_regret) + return worse_regret, time.time() - time_tick + + +def get_expected_value_sim_game(game, policy, num_sample, player=0): + results = get_list_results_n_player_simulataneous_game( + game, policy, num_sample=num_sample) + assert len(results) == num_sample + expected_value = sum(x[player] for x in results) / num_sample + # num_vehicle = len(results[0]) + # error_bar = abs(sum([x[1] for x in results]) - sum( + # [x[2] for x in results])) / num_sample_trajectories + # expected_value_policy = sum(sum(x[i] for x in results) for i in range( + # 1, BRAESS_NUM_VEHICLES)) / ((BRAESS_NUM_VEHICLES-1)*num_sample_trajectories) + return expected_value + + +def compute_regret_policy_random_noise_sim_game(game, + policy, + num_random_policy_tested=10, + num_sample=100): + time_tick = time.time() + expected_value_policy = get_expected_value_sim_game(game, policy, num_sample) + worse_regret = 0 + for _ in range(num_random_policy_tested): + noisy_n_policy = noisy_policy.NoisyPolicy(policy, player_id=0, alpha=1) + expected_value_noise = get_expected_value_sim_game( + game, noisy_n_policy, num_sample, player=0) + approximate_regret = expected_value_noise - expected_value_policy + worse_regret = max(worse_regret, approximate_regret) + return worse_regret, time.time() - time_tick + + +class PurePolicyResponse(policy_module.Policy): + + def __init__(self, game, policy, player_id): + self.game = game + self.player_id = player_id + self.policy = policy + + def pure_action(self, state): + raise NotImplementedError() + + def action_probabilities(self, state, player_id=None): + assert player_id is not None + if player_id == self.player_id: + legal_actions = state.legal_actions(self.player_id) + if not legal_actions: + return {0: 1.0} + if len(legal_actions) == 1: + return {legal_actions[0]: 1.0} + answer = {action: 0.0 for action in legal_actions} + pure_a = self.pure_action(state) + assert pure_a in answer + answer[pure_a] = 1.0 + return answer + return self.policy.action_probabilities(state, player_id) + + +class PathBCEResponse(PurePolicyResponse): + + def pure_action(self, state): + location = state.get_current_vehicle_locations()[self.player_id].split( + "->")[1] + if location == "B": + return state.get_game().network.get_action_id_from_movement("B", "C") + if location == "C": + return state.get_game().network.get_action_id_from_movement("C", "E") + return 0 + + +class PathBCDEResponse(PurePolicyResponse): + + def pure_action(self, state): + location = state.get_current_vehicle_locations()[self.player_id].split( + "->")[1] + if location == "B": + return state.get_game().network.get_action_id_from_movement("B", "C") + if location == "C": + return state.get_game().network.get_action_id_from_movement("C", "D") + return 0 + + +class PathBDEResponse(PurePolicyResponse): + + def pure_action(self, state): + location = state.get_current_vehicle_locations()[self.player_id].split( + "->")[1] + if location == "B": + return state.get_game().network.get_action_id_from_movement("B", "D") + return 0 + + +def compute_regret_policy_against_pure_policy_sim_game(game, + policy, + compute_true_value=False, + num_sample=100): + time_tick = time.time() + if compute_true_value: + expected_value_policy = expected_game_score.policy_value( + game.new_initial_state(), policy)[0] + else: + expected_value_policy = get_expected_value_sim_game(game, policy, + num_sample) + worse_regret = 0 + policies = [ + PathBCEResponse(game, policy, 0), + PathBCDEResponse(game, policy, 0), + PathBDEResponse(game, policy, 0) + ] + for deviation_policy in policies: + if compute_true_value: + expected_value_noise = expected_game_score.policy_value( + game.new_initial_state(), deviation_policy)[0] + else: + expected_value_noise = get_expected_value_sim_game( + game, deviation_policy, num_sample, player=0) + approximate_regret = expected_value_noise - expected_value_policy + worse_regret = max(worse_regret, approximate_regret) + return worse_regret, time.time() - time_tick + + +def online_mirror_descent_sioux_falls(mfg_game, + number_of_iterations, + md_p=None): + nash_conv_dict = {} + md = md_p if md_p else mirror_descent.MirrorDescent(mfg_game) + tick_time = time.time() + for i in range(number_of_iterations): + md.iteration() + md_policy = md.get_policy() + nash_conv_md = nash_conv_module.NashConv(mfg_game, md_policy) + nash_conv_dict[i] = nash_conv_md.nash_conv() + print((f"Iteration {i}, Nash conv: {nash_conv_md.nash_conv()}, " + "time: {time.time() - tick_time}")) + timing = time.time() - tick_time + md_policy = md.get_policy() + distribution_mfg = distribution_module.DistributionPolicy(mfg_game, md_policy) + policy_value_ = policy_value.PolicyValue( + mfg_game, distribution_mfg, md_policy).value(mfg_game.new_initial_state()) + nash_conv_md = nash_conv_module.NashConv(mfg_game, md_policy) + return timing, md_policy, nash_conv_md, policy_value_, md, nash_conv_dict diff --git a/scenarios/bargaining/open_spiel/open_spiel/evaluation/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/evaluation/CMakeLists.txt new file mode 100644 index 0000000..d62cd81 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/evaluation/CMakeLists.txt @@ -0,0 +1,9 @@ +add_library (evaluation OBJECT + soft_condorcet_optimization.cc + soft_condorcet_optimization.h +) +target_include_directories (evaluation PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +add_executable(soft_condorcet_optimization_test soft_condorcet_optimization_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(soft_condorcet_optimization_test soft_condorcet_optimization_test) diff --git a/scenarios/bargaining/open_spiel/open_spiel/evaluation/soft_condorcet_optimization.cc b/scenarios/bargaining/open_spiel/open_spiel/evaluation/soft_condorcet_optimization.cc new file mode 100644 index 0000000..f2f9c08 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/evaluation/soft_condorcet_optimization.cc @@ -0,0 +1,212 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/evaluation/soft_condorcet_optimization.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace evaluation { +namespace { +template +std::vector sort_indices(const std::vector& v) { + std::vector indices(v.size()); + std::iota(indices.begin(), indices.end(), 0); + std::stable_sort(indices.begin(), indices.end(), + [&v](int i1, int i2) { return v[i1] < v[i2]; }); + return indices; +} +} // namespace + +void PreferenceProfile::AddVote(const WeightedVote& vote) { + votes_.push_back(vote); + for (const std::string& alt : vote.vote) { + RegisterAlternative(alt); + } +} + +void PreferenceProfile::AddVote(const std::vector& vote, + int weight) { + AddVote(WeightedVote{weight, vote}); +} + +void PreferenceProfile::AddVoteUngrouped(const std::vector& vote, + int weight) { + for (int i = 0; i < weight; ++i) { + AddVote(WeightedVote{1, vote}); + } +} + +int PreferenceProfile::RegisterAlternative(const std::string& alternative) { + auto iter = alternatives_dict_.find(alternative); + if (iter != alternatives_dict_.end()) { + return iter->second; + } + alternatives_dict_[alternative] = alternatives_.size(); + alternatives_.push_back(alternative); + return alternatives_.size() - 1; +} + +Optimizer::Optimizer( + const std::vector>>& votes, + double rating_lower_bound, double rating_upper_bound, int batch_size, + int rng_seed, int compute_norm_freq, double initial_param_noise, + const std::vector& alternative_names) + : rng_(rng_seed), + rating_lower_bound_(rating_lower_bound), + rating_upper_bound_(rating_upper_bound), + batch_size_(batch_size), + compute_norm_freq_(compute_norm_freq), + initial_param_noise_(initial_param_noise), + total_iterations_(0) { + SPIEL_CHECK_GT(batch_size_, 0); + SPIEL_CHECK_GT(rating_upper_bound, rating_lower_bound); + + if (!alternative_names.empty()) { + for (const std::string& alt : alternative_names) { + profile_.RegisterAlternative(alt); + } + } + + for (const auto& vote : votes) { + profile_.AddVoteUngrouped(vote.second, vote.first); + } + double midpoint_rating = + (rating_upper_bound - rating_lower_bound) / 2.0 + rating_lower_bound; + num_alternatives_ = profile_.num_alternatives(); + ratings_.resize(num_alternatives_, midpoint_rating); + gradient_.resize(num_alternatives_, 0.0); +} + +void Optimizer::Step(double learning_rate, const std::vector& batch) { + ComputeGradient(batch); + for (int a = 0; a < num_alternatives_; ++a) { + ratings_[a] -= learning_rate * gradient_[a]; + ratings_[a] = + std::clamp(ratings_[a], rating_lower_bound_, rating_upper_bound_); + } +} + +void Optimizer::RunSolver(int iterations, double learning_rate) { + std::vector batch(batch_size_); + for (int i = 0; i < iterations; ++i) { + for (int b = 0; b < batch_size_; ++b) { + batch[b] = absl::Uniform(rng_, 0, profile_.num_votes()); + } + Step(learning_rate, batch); + total_iterations_++; + } +} + +void Optimizer::divide_gradient() { + for (int a = 0; a < num_alternatives_; ++a) { + gradient_[a] /= batch_size_; + } +} + +std::map Optimizer::ratings() const { + std::map ratings; + for (int a = 0; a < num_alternatives_; ++a) { + ratings[profile_.get_alternative(a)] = ratings_[a]; + } + return ratings; +} + +SoftCondorcetOptimizer::SoftCondorcetOptimizer( + const TupleListVote& votes, double rating_lower_bound, + double rating_upper_bound, int batch_size, double temperature, int rng_seed, + int compute_norm_freq, double initial_param_noise, + const std::vector& alternative_names) + : Optimizer(votes, rating_lower_bound, rating_upper_bound, batch_size, + rng_seed, compute_norm_freq, initial_param_noise, + alternative_names), + temperature_(temperature) { + SPIEL_CHECK_GT(temperature_, 0); +} + +void SoftCondorcetOptimizer::ComputeGradient(const std::vector& batch) { + std::fill(gradient_.begin(), gradient_.end(), 0.0); + for (int vote_idx : batch) { + const WeightedVote& vote = profile_.votes()[vote_idx]; + int vote_len = vote.vote.size(); + double weight = vote.weight; + for (int i = 0; i < vote_len; ++i) { + int a_idx = profile_.alternatives_dict().at(vote.vote[i]); + for (int j = i + 1; j < vote_len; ++j) { + int b_idx = profile_.alternatives_dict().at(vote.vote[j]); + double delta_ab = ((ratings_[b_idx] - ratings_[a_idx]) / temperature_); + // double sigma_ab = sigmoid(delta_ab); + double sigma_ab = 1.0 / (1.0 + std::exp(-delta_ab)); + gradient_[a_idx] -= + (weight * sigma_ab * (1.0 - sigma_ab) / temperature_); + gradient_[b_idx] += + (weight * sigma_ab * (1.0 - sigma_ab) / temperature_); + } + } + } + divide_gradient(); +} + +FenchelYoungOptimizer::FenchelYoungOptimizer( + const TupleListVote& votes, double rating_lower_bound, + double rating_upper_bound, int batch_size, int rng_seed, + int compute_norm_freq, double initial_param_noise, double sigma, + const std::vector& alternative_names) + : Optimizer(votes, rating_lower_bound, rating_upper_bound, batch_size, + rng_seed, compute_norm_freq, initial_param_noise, + alternative_names), + sigma_(sigma), + gumbel_dist_{0.0, 1.0} {} + +void FenchelYoungOptimizer::ComputeGradient(const std::vector& batch) { + std::fill(gradient_.begin(), gradient_.end(), 0.0); + for (int vote_idx : batch) { + const WeightedVote& vote = profile_.votes()[vote_idx]; + SPIEL_CHECK_EQ(vote.weight, 1); // Fenchel Young only works with weight 1. + int vote_len = vote.vote.size(); + std::vector alternative_ids(vote_len); + std::vector predicted_ratings(vote_len); + std::vector target_ranking(vote_len); + std::vector local_grad(vote_len); + for (int i = 0; i < vote_len; ++i) { + target_ranking[i] = i; + alternative_ids[i] = profile_.alternatives_dict().at(vote.vote[i]); + predicted_ratings[i] = + ratings_[alternative_ids[i]] + gumbel_dist_(rng_) * sigma_; + // Need to do this here to assemble -\tilde{\theta}_v needed below. + predicted_ratings[i] = -predicted_ratings[i]; + } + // ArgSort(ArgSort(-\tilde{\theta}_v)) + std::vector predicted_ranking = + sort_indices(sort_indices(predicted_ratings)); + for (int i = 0; i < vote_len; ++i) { + local_grad[i] = predicted_ranking[i] - target_ranking[i]; + gradient_[alternative_ids[i]] += -local_grad[i]; + } + } + divide_gradient(); +} + +} // namespace evaluation +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/evaluation/soft_condorcet_optimization.h b/scenarios/bargaining/open_spiel/open_spiel/evaluation/soft_condorcet_optimization.h new file mode 100644 index 0000000..ffd44a7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/evaluation/soft_condorcet_optimization.h @@ -0,0 +1,130 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_SOFT_CONDORCET_OPTIMIZATION_H_ +#define OPEN_SPIEL_UTILS_SOFT_CONDORCET_OPTIMIZATION_H_ + +// A C++ implementation of Soft Condorcet optimizer (see Lanctot et al. '24. +// https://arxiv.org/abs/2411.00119). This is functionally equivalent to the +// Python implementation in python/voting/soft_condorcet_optimization.py but +// runs faster. + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" + +namespace open_spiel { +namespace evaluation { + +using WeightedVotePair = std::pair>; +using TupleListVote = std::vector; + +struct WeightedVote { + int weight; + std::vector vote; +}; + +class PreferenceProfile { + public: + PreferenceProfile() = default; + void AddVote(const WeightedVote& vote); + void AddVote(const std::vector& vote, int weight = 1); + void AddVoteUngrouped(const std::vector& vote, int weight = 1); + int RegisterAlternative(const std::string& alternative); + const std::vector& votes() const { return votes_; } + const std::vector& alternatives() const { return alternatives_; } + std::string get_alternative(int idx) const { return alternatives_[idx]; } + int num_votes() const { return votes_.size(); } + int num_alternatives() const { return alternatives_.size(); } + const absl::flat_hash_map& alternatives_dict() const { + return alternatives_dict_; + } + + private: + // Alternative name -> index map. + std::vector alternatives_; + absl::flat_hash_map alternatives_dict_; + std::vector votes_; +}; + +class Optimizer { + public: + Optimizer(const TupleListVote& votes, double rating_lower_bound, + double rating_upper_bound, int batch_size, int rng_seed = 0, + int compute_norm_freq = 1000, double initial_param_noise = 0.0, + const std::vector& alternative_names = {}); + virtual ~Optimizer() = default; + + void Step(double learning_rate, const std::vector& batch); + void RunSolver(int iterations = 1000, double learning_rate = 0.01); + std::map ratings() const; + + protected: + virtual void ComputeGradient(const std::vector& batch) = 0; + void divide_gradient(); + + PreferenceProfile profile_; + std::mt19937_64 rng_; + double rating_lower_bound_; + double rating_upper_bound_; + int batch_size_; + int compute_norm_freq_; + double initial_param_noise_; + int total_iterations_; + int num_alternatives_; + std::vector ratings_; + std::vector gradient_; +}; + +class SoftCondorcetOptimizer : public Optimizer { + public: + SoftCondorcetOptimizer( + const TupleListVote& votes, double rating_lower_bound, + double rating_upper_bound, int batch_size, double temperature = 1.0, + int rng_seed = 0, int compute_norm_freq = 1000, + double initial_param_noise = 0, + const std::vector& alternative_names = {}); + + virtual ~SoftCondorcetOptimizer() = default; + + void ComputeGradient(const std::vector& batch) override; + + private: + double temperature_ = 1.0; +}; + +class FenchelYoungOptimizer : public Optimizer { + public: + FenchelYoungOptimizer(const TupleListVote& votes, double rating_lower_bound, + double rating_upper_bound, int batch_size, + int rng_seed = 0, int compute_norm_freq = 1000, + double initial_param_noise = 0, double sigma = 100.0, + const std::vector& alternative_names = {}); + virtual ~FenchelYoungOptimizer() = default; + + void ComputeGradient(const std::vector& batch) override; + + private: + double sigma_; + std::extreme_value_distribution<> gumbel_dist_; +}; + +} // namespace evaluation +} // namespace open_spiel + +#endif // OPEN_SPIEL_UTILS_SOFT_CONDORCET_OPTIMIZATION_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/evaluation/soft_condorcet_optimization_test.cc b/scenarios/bargaining/open_spiel/open_spiel/evaluation/soft_condorcet_optimization_test.cc new file mode 100644 index 0000000..6422a73 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/evaluation/soft_condorcet_optimization_test.cc @@ -0,0 +1,122 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/evaluation/soft_condorcet_optimization.h" + +#include // NOLINT (used by std::cout) +#include +#include + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { + +void TestSimpleCaseSigmoid() { + std::cout << "TestSimpleCaseSigmoid" << std::endl; + evaluation::SoftCondorcetOptimizer sco_optimizer({{1, {"a", "b", "c"}}}, + -100.0, 100.0, 4, 1.0); + sco_optimizer.RunSolver(1000, 0.01); + std::map ratings = sco_optimizer.ratings(); + for (const auto& [alt, rating] : ratings) { + std::cout << alt << ": " << rating << std::endl; + } + SPIEL_CHECK_GT(ratings["a"], ratings["b"]); + SPIEL_CHECK_GT(ratings["b"], ratings["c"]); +} + +void TestMeeplePentathlonSigmoid() { + std::cout << "TestMeeplePentathlonSigmoid" << std::endl; + evaluation::SoftCondorcetOptimizer sco_optimizer({{1, {"a", "b", "c"}}, + {1, {"a", "c", "b"}}, + {2, {"c", "a", "b"}}, + {1, {"b", "c", "a"}}}, + -100.0, 100.0, 4, 1.0); + sco_optimizer.RunSolver(1000, 0.01); + std::map ratings = sco_optimizer.ratings(); + for (const auto& [alt, rating] : ratings) { + std::cout << alt << ": " << rating << std::endl; + } + SPIEL_CHECK_GT(ratings["c"], ratings["a"]); + SPIEL_CHECK_GT(ratings["a"], ratings["b"]); +} + +void TestSec41ExampleSigmoid() { + std::cout << "TestSec41ExampleSigmoid" << std::endl; + evaluation::SoftCondorcetOptimizer sco_optimizer( + {{2, {"a", "b", "c"}}, {3, {"c", "a", "b"}}}, -100.0, 100.0, 4, 1.0); + sco_optimizer.RunSolver(10000, 0.01); + std::map ratings = sco_optimizer.ratings(); + for (const auto& [alt, rating] : ratings) { + std::cout << alt << ": " << rating << std::endl; + } + SPIEL_CHECK_GT(ratings["c"], ratings["a"]); + SPIEL_CHECK_GT(ratings["a"], ratings["b"]); +} + +void TestSimpleCaseFenchelYoung() { + std::cout << "TestSimpleCaseFenchelYoung" << std::endl; + evaluation::FenchelYoungOptimizer fy_optimizer({{1, {"a", "b", "c"}}}, -100.0, + 100.0, 4, 1.0); + fy_optimizer.RunSolver(1000, 0.01); + std::map ratings = fy_optimizer.ratings(); + for (const auto& [alt, rating] : ratings) { + std::cout << alt << ": " << rating << std::endl; + } + SPIEL_CHECK_GT(ratings["a"], ratings["b"]); + SPIEL_CHECK_GT(ratings["b"], ratings["c"]); +} + +void TestMeeplePentathlonFenchelYoung() { + std::cout << "TestMeeplePentathlonFenchelYoung" << std::endl; + evaluation::FenchelYoungOptimizer fy_optimizer({{1, {"a", "b", "c"}}, + {1, {"a", "c", "b"}}, + {2, {"c", "a", "b"}}, + {1, {"b", "c", "a"}}}, + -100.0, 100.0, 4, 1.0); + fy_optimizer.RunSolver(1000, 0.01); + std::map ratings = fy_optimizer.ratings(); + for (const auto& [alt, rating] : ratings) { + std::cout << alt << ": " << rating << std::endl; + } + // Not necessarily C > A > B! C ~= A just like with Elo. + SPIEL_CHECK_GT(ratings["c"], ratings["b"]); + SPIEL_CHECK_GT(ratings["a"], ratings["b"]); +} + +void TestSec41ExampleFenchelYoung() { + std::cout << "TestSec41ExampleFenchelYoung" << std::endl; + evaluation::FenchelYoungOptimizer fy_optimizer( + {{2, {"a", "b", "c"}}, {3, {"c", "a", "b"}}}, -100.0, 100.0, 4, 1.0); + fy_optimizer.RunSolver(10000, 0.01); + std::map ratings = fy_optimizer.ratings(); + for (const auto& [alt, rating] : ratings) { + std::cout << alt << ": " << rating << std::endl; + } + // Like Elo, this should result in A > C > B. + SPIEL_CHECK_GT(ratings["a"], ratings["c"]); + SPIEL_CHECK_GT(ratings["c"], ratings["b"]); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::TestSimpleCaseSigmoid(); + open_spiel::TestMeeplePentathlonSigmoid(); + open_spiel::TestSec41ExampleSigmoid(); + open_spiel::TestSimpleCaseFenchelYoung(); + open_spiel::TestMeeplePentathlonFenchelYoung(); + open_spiel::TestSec41ExampleFenchelYoung(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/examples/CMakeLists.txt new file mode 100644 index 0000000..92dc3ff --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/CMakeLists.txt @@ -0,0 +1,74 @@ +add_executable(benchmark_game benchmark_game.cc ${OPEN_SPIEL_OBJECTS}) +add_test(benchmark_game_test benchmark_game --game=tic_tac_toe --sims=100 --attempts=2) + +add_executable(cfr_example cfr_example.cc ${OPEN_SPIEL_OBJECTS}) +add_test(cfr_example_test cfr_example) + +if (OPEN_SPIEL_BUILD_WITH_ACPC) +add_executable(universal_poker_mccfr_acpc_gamedef_example universal_poker_mccfr_acpc_gamedef_example.cc ${OPEN_SPIEL_OBJECTS}) +add_test(universal_poker_mccfr_acpc_gamedef_example_test universal_poker_mccfr_acpc_gamedef_example) +endif() + +add_executable(cfr_multi_equilibria_example cfr_multi_equilibria_example.cc + ${OPEN_SPIEL_OBJECTS}) + +add_executable(imperfect_recall_mccfr imperfect_recall_mccfr.cc + ${OPEN_SPIEL_OBJECTS}) + +add_executable(example example.cc ${OPEN_SPIEL_OBJECTS}) +add_test(example_test example --game=tic_tac_toe --seed=0) + +add_executable(fsicfr_liars_dice fsicfr_liars_dice.cc ${OPEN_SPIEL_OBJECTS}) + +add_executable(gtp gtp.cc ${OPEN_SPIEL_OBJECTS}) + +add_executable(is_mcts_gwhist is_mcts_gwhist.cc ${OPEN_SPIEL_OBJECTS}) + +add_executable(matrix_example matrix_example.cc ${OPEN_SPIEL_OBJECTS}) +add_test(matrix_example_test matrix_example) + +add_executable(mcts_example mcts_example.cc ${OPEN_SPIEL_OBJECTS}) +add_test(mcts_example_test mcts_example) + +add_executable(minimax_example minimax_example.cc ${OPEN_SPIEL_OBJECTS}) +add_test(minimax_example_test minimax_example) + +add_executable(policy_iteration_example policy_iteration_example.cc ${OPEN_SPIEL_OBJECTS}) +add_test(policy_iteration_example_test policy_iteration_example) + +add_executable(value_iteration_example value_iteration_example.cc ${OPEN_SPIEL_OBJECTS}) +add_test(value_iteration_example_test value_iteration_example) + +add_executable(tabular_sarsa_example tabular_sarsa_example.cc ${OPEN_SPIEL_OBJECTS}) + +add_executable(tabular_q_learning_example tabular_q_learning_example.cc ${OPEN_SPIEL_OBJECTS}) + +add_executable(count_all_states count_all_states.cc ${OPEN_SPIEL_OBJECTS}) + +if (OPEN_SPIEL_BUILD_WITH_LIBTORCH) + add_executable(alpha_zero_torch_example alpha_zero_torch_example.cc + ${OPEN_SPIEL_OBJECTS} + $) + target_link_libraries (alpha_zero_torch_example ${TORCH_LIBRARIES}) + + add_executable(alpha_zero_torch_game_example alpha_zero_torch_game_example.cc + ${OPEN_SPIEL_OBJECTS} + $ + $) + target_link_libraries (alpha_zero_torch_game_example ${TORCH_LIBRARIES}) + + add_executable(dqn_torch_example dqn_torch_example.cc + ${OPEN_SPIEL_OBJECTS} + $) + target_link_libraries (dqn_torch_example ${TORCH_LIBRARIES}) +endif () + +if (BUILD_SHARED_LIB) + if (WIN32) + add_executable(shared_library_example shared_library_example.cc ${OPEN_SPIEL_OBJECTS}) + else() + add_executable(shared_library_example shared_library_example.cc) + endif() + target_link_libraries(shared_library_example open_spiel) + add_test(shared_lib_test shared_lib_test) +endif() diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/alpha_zero_torch_example.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/alpha_zero_torch_example.cc new file mode 100644 index 0000000..4a4c2a9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/alpha_zero_torch_example.cc @@ -0,0 +1,166 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/algorithms/alpha_zero_torch/alpha_zero.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/file.h" +#include "open_spiel/utils/init.h" +#include "open_spiel/utils/json.h" +#include "open_spiel/utils/thread.h" + +ABSL_FLAG(std::string, game, "tic_tac_toe", "The name of the game to play."); +ABSL_FLAG(std::string, path, "/tmp/az", "Where to output the logs."); +ABSL_FLAG(std::string, graph_def, "", + ("Where to get the graph. This could be from export_model.py, or " + "from a checkpoint. If this is empty it'll create one.")); +ABSL_FLAG(std::string, nn_model, "resnet", + "Model torso type, can be resnet or mlp."); +ABSL_FLAG(int, nn_width, 128, "Width of the model, passed to export_model.py."); +ABSL_FLAG(int, nn_depth, 10, "Depth of the model, passed to export_model.py."); +ABSL_FLAG(double, uct_c, 2, "UCT exploration constant."); +ABSL_FLAG(double, temperature, 1, + "Temperature for final move selection for early moves in training."); +ABSL_FLAG(double, temperature_drop, 10, // Smaller than AZ due to short games. + "Drop the temperature to 0 after this many moves."); +ABSL_FLAG(double, cutoff_probability, 0.8, + ("Cut off rollouts early when above the cutoff value with this " + "probability.")); +ABSL_FLAG(double, cutoff_value, 0.95, + "Cut off rollouts early when above this value."); +ABSL_FLAG(double, learning_rate, 0.0001, "Learning rate."); +ABSL_FLAG(double, weight_decay, 0.0001, "Weight decay."); +ABSL_FLAG(double, policy_alpha, 1, "What dirichlet noise alpha to use."); +ABSL_FLAG(double, policy_epsilon, 0.25, "What dirichlet noise epsilon to use."); +ABSL_FLAG(int, replay_buffer_size, 1 << 16, + "How many states to store in the replay buffer."); +ABSL_FLAG(double, replay_buffer_reuse, 3, + "How many times to reuse each state in the replay buffer."); +ABSL_FLAG(int, checkpoint_freq, 100, "Save a checkpoint every N steps."); +ABSL_FLAG(int, max_simulations, 300, "How many simulations to run."); +ABSL_FLAG(int, train_batch_size, 1 << 10, + "How many states to learn from per batch."); +ABSL_FLAG(int, inference_batch_size, 1, + "How many threads to wait for for inference."); +ABSL_FLAG(int, inference_threads, 0, "How many threads to run inference."); +ABSL_FLAG(int, inference_cache, 1 << 18, + "Whether to cache the results from inference."); +ABSL_FLAG(std::string, devices, "/cpu:0", + "Comma separated list of devices. The first device listed is used " + "also as the learner. Allowable device names: cpu, cuda:0, cuda:1, " + "cuda:2, cuda:3, ... Where cuda:n implies the n'th GPU resource."); +ABSL_FLAG(bool, explicit_learning, false, + "The way the first device handles learning, either 'false' " + "(while learning, the first device also takes on inference " + "requests), or 'true' (while learning, the first device does " + "not take on inference requests) which can only be used when " + "multiple devices are available)."); +ABSL_FLAG(bool, verbose, false, "Show the MCTS stats of possible moves."); +ABSL_FLAG(int, actors, 4, "How many actors to run."); +ABSL_FLAG(int, evaluators, 2, "How many evaluators to run."); +ABSL_FLAG(int, eval_levels, 7, + ("Play evaluation games vs MCTS+Solver, with max_simulations*10^(n/2)" + " simulations for n in range(eval_levels). Default of 7 means " + "running mcts with up to 1000 times more simulations.")); +ABSL_FLAG(int, max_steps, 0, "How many learn steps to run."); +ABSL_FLAG(int, evaluation_window, 100, + "Number of games to average results over."); + +open_spiel::StopToken stop_token; + +void signal_handler(int s) { + if (stop_token.StopRequested()) { + exit(1); + } else { + stop_token.Stop(); + } +} + +void signal_installer() { + struct sigaction sigIntHandler; + sigIntHandler.sa_handler = signal_handler; + sigemptyset(&sigIntHandler.sa_mask); + sigIntHandler.sa_flags = 0; + sigaction(SIGINT, &sigIntHandler, nullptr); +} + +int main(int argc, char** argv) { + open_spiel::Init("", &argc, &argv, true); + + std::vector positional_args = absl::ParseCommandLine(argc, argv); + signal_installer(); + + bool resuming; + open_spiel::algorithms::torch_az::AlphaZeroConfig config; + + if (positional_args.size() > 1) { + // Resume training from a checkpoint. + resuming = true; + + if (positional_args.size() > 2) { + open_spiel::SpielFatalError( + "Specify only a path to a config.json to resume training."); + } + + open_spiel::file::File config_file(positional_args[1], "r"); + std::string config_string = config_file.ReadContents(); + open_spiel::json::Object config_json = open_spiel::json::FromString( + config_string).value().GetObject(); + + config.FromJson(config_json); + } else { + // Start training from scratch. + resuming = false; + + config.game = absl::GetFlag(FLAGS_game); + config.path = absl::GetFlag(FLAGS_path); + config.graph_def = absl::GetFlag(FLAGS_graph_def); + config.nn_model = absl::GetFlag(FLAGS_nn_model); + config.nn_width = absl::GetFlag(FLAGS_nn_width); + config.nn_depth = absl::GetFlag(FLAGS_nn_depth); + config.devices = absl::GetFlag(FLAGS_devices); + config.explicit_learning = absl::GetFlag(FLAGS_explicit_learning); + config.learning_rate = absl::GetFlag(FLAGS_learning_rate); + config.weight_decay = absl::GetFlag(FLAGS_weight_decay); + config.train_batch_size = absl::GetFlag(FLAGS_train_batch_size); + config.replay_buffer_size = absl::GetFlag(FLAGS_replay_buffer_size); + config.replay_buffer_reuse = absl::GetFlag(FLAGS_replay_buffer_reuse); + config.checkpoint_freq = absl::GetFlag(FLAGS_checkpoint_freq); + config.evaluation_window = absl::GetFlag(FLAGS_evaluation_window); + config.uct_c = absl::GetFlag(FLAGS_uct_c); + config.max_simulations = absl::GetFlag(FLAGS_max_simulations); + config.train_batch_size = absl::GetFlag(FLAGS_train_batch_size); + config.inference_batch_size = absl::GetFlag(FLAGS_inference_batch_size); + config.inference_threads = absl::GetFlag(FLAGS_inference_threads); + config.inference_cache = absl::GetFlag(FLAGS_inference_cache); + config.policy_alpha = absl::GetFlag(FLAGS_policy_alpha); + config.policy_epsilon = absl::GetFlag(FLAGS_policy_epsilon); + config.temperature = absl::GetFlag(FLAGS_temperature); + config.temperature_drop = absl::GetFlag(FLAGS_temperature_drop); + config.cutoff_probability = absl::GetFlag(FLAGS_cutoff_probability); + config.cutoff_value = absl::GetFlag(FLAGS_cutoff_value); + config.actors = absl::GetFlag(FLAGS_actors); + config.evaluators = absl::GetFlag(FLAGS_evaluators); + config.eval_levels = absl::GetFlag(FLAGS_eval_levels); + config.max_steps = absl::GetFlag(FLAGS_max_steps); + } + + return !AlphaZero(config, &stop_token, resuming); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/alpha_zero_torch_game_example.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/alpha_zero_torch_game_example.cc new file mode 100644 index 0000000..c9fd150 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/alpha_zero_torch_game_example.cc @@ -0,0 +1,252 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/time/clock.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/algorithms/alpha_zero_torch/device_manager.h" +#include "open_spiel/algorithms/alpha_zero_torch/vpevaluator.h" +#include "open_spiel/algorithms/alpha_zero_torch/vpnet.h" +#include "open_spiel/algorithms/mcts.h" +#include "open_spiel/bots/human/human_bot.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +ABSL_FLAG(std::string, game, "tic_tac_toe", "The name of the game to play."); +ABSL_FLAG(std::string, player1, "az", "Who controls player1."); +ABSL_FLAG(std::string, player2, "random", "Who controls player2."); +ABSL_FLAG(std::string, az_path, "", "Path to AZ experiment."); +ABSL_FLAG(std::string, az_graph_def, "vpnet.pb", + "AZ graph definition file name."); +ABSL_FLAG(double, uct_c, 2, "UCT exploration constant."); +ABSL_FLAG(int, rollout_count, 10, "How many rollouts per evaluation."); +ABSL_FLAG(int, max_simulations, 10000, "How many simulations to run."); +ABSL_FLAG(int, num_games, 1, "How many games to play."); +ABSL_FLAG(int, max_memory_mb, 1000, + "The maximum memory used before cutting the search short."); +ABSL_FLAG(int, az_checkpoint, -1, "Checkpoint of AZ model."); +ABSL_FLAG(int, az_batch_size, 1, "Batch size of AZ inference."); +ABSL_FLAG(int, az_threads, 1, "Number of threads to run for AZ inference."); +ABSL_FLAG(int, az_cache_size, 16384, "Cache size of AZ algorithm."); +ABSL_FLAG(int, az_cache_shards, 1, "Cache shards of AZ algorithm."); +ABSL_FLAG(bool, solve, true, "Whether to use MCTS-Solver."); +ABSL_FLAG(uint_fast32_t, seed, 0, "Seed for MCTS."); +ABSL_FLAG(bool, verbose, false, "Show the MCTS stats of possible moves."); +ABSL_FLAG(bool, quiet, false, "Show the MCTS stats of possible moves."); + +uint_fast32_t Seed() { + uint_fast32_t seed = absl::GetFlag(FLAGS_seed); + return seed != 0 ? seed : absl::ToUnixMicros(absl::Now()); +} + +std::unique_ptr +InitBot(std::string type, const open_spiel::Game &game, + open_spiel::Player player, + std::shared_ptr evaluator, + std::shared_ptr + az_evaluator) { + if (type == "az") { + return std::make_unique( + game, std::move(az_evaluator), absl::GetFlag(FLAGS_uct_c), + absl::GetFlag(FLAGS_max_simulations), + absl::GetFlag(FLAGS_max_memory_mb), absl::GetFlag(FLAGS_solve), Seed(), + absl::GetFlag(FLAGS_verbose), + open_spiel::algorithms::ChildSelectionPolicy::PUCT, 0, 0, + /*dont_return_chance_node=*/true); + } + if (type == "human") { + return std::make_unique(); + } + if (type == "mcts") { + return std::make_unique( + game, std::move(evaluator), absl::GetFlag(FLAGS_uct_c), + absl::GetFlag(FLAGS_max_simulations), + absl::GetFlag(FLAGS_max_memory_mb), absl::GetFlag(FLAGS_solve), Seed(), + absl::GetFlag(FLAGS_verbose)); + } + if (type == "random") { + return open_spiel::MakeUniformRandomBot(player, Seed()); + } + + open_spiel::SpielFatalError( + "Bad player type. Known types: az, human, mcts, random"); +} + +open_spiel::Action GetAction(const open_spiel::State &state, + std::string action_str) { + for (open_spiel::Action action : state.LegalActions()) { + if (action_str == state.ActionToString(state.CurrentPlayer(), action)) + return action; + } + return open_spiel::kInvalidAction; +} + +std::pair, std::vector> +PlayGame(const open_spiel::Game &game, + std::vector> &bots, std::mt19937 &rng, + const std::vector &initial_actions) { + bool quiet = absl::GetFlag(FLAGS_quiet); + std::unique_ptr state = game.NewInitialState(); + std::vector history; + + if (!quiet) + std::cerr << "Initial state:\n" << state << std::endl; + + // Play the initial actions (if there are any). + for (const auto &action_str : initial_actions) { + open_spiel::Player current_player = state->CurrentPlayer(); + open_spiel::Action action = GetAction(*state, action_str); + + if (action == open_spiel::kInvalidAction) + open_spiel::SpielFatalError(absl::StrCat("Invalid action: ", action_str)); + + history.push_back(action_str); + state->ApplyAction(action); + + if (!quiet) { + std::cerr << "Player " << current_player + << " forced action: " << action_str << std::endl; + std::cerr << "Next state:\n" << state->ToString() << std::endl; + } + } + + while (!state->IsTerminal()) { + open_spiel::Player player = state->CurrentPlayer(); + + open_spiel::Action action; + if (state->IsChanceNode()) { + // Chance node; sample one according to underlying distribution. + open_spiel::ActionsAndProbs outcomes = state->ChanceOutcomes(); + action = open_spiel::SampleAction(outcomes, rng).first; + } else { + // The state must be a decision node, ask the right bot to make its + // action. + action = bots[player]->Step(*state); + } + if (!quiet) + std::cerr << "Player " << player + << " chose action: " << state->ActionToString(player, action) + << std::endl; + + // Inform the other bot of the action performed. + for (open_spiel::Player p = 0; p < bots.size(); ++p) { + if (p != player) { + bots[p]->InformAction(*state, player, action); + } + } + + // Update history and get the next state. + history.push_back(state->ActionToString(player, action)); + state->ApplyAction(action); + + if (!quiet) + std::cerr << "Next state:\n" << state->ToString() << std::endl; + } + + std::cerr << "Returns: " << absl::StrJoin(state->Returns(), ", ") + << std::endl; + std::cerr << "Game actions: " << absl::StrJoin(history, ", ") << std::endl; + + return {state->Returns(), history}; +} + +int main(int argc, char **argv) { + std::vector positional_args = absl::ParseCommandLine(argc, argv); + std::mt19937 rng(Seed()); // Random number generator. + + // Create the game. + std::string game_name = absl::GetFlag(FLAGS_game); + std::cerr << "Game: " << game_name << std::endl; + std::shared_ptr game = + open_spiel::LoadGame(game_name); + + // Ensure the game is AlphaZero-compatible and arguments are compatible. + open_spiel::GameType game_type = game->GetType(); + if (game->NumPlayers() != 2) + open_spiel::SpielFatalError("AlphaZero can only handle 2-player games."); + if (game_type.reward_model != open_spiel::GameType::RewardModel::kTerminal) + open_spiel::SpielFatalError("Game must have terminal rewards."); + if (game_type.dynamics != open_spiel::GameType::Dynamics::kSequential) + open_spiel::SpielFatalError("Game must have sequential turns."); + if (absl::GetFlag(FLAGS_az_path).empty()) + open_spiel::SpielFatalError("AlphaZero path must be specified."); + if (absl::GetFlag(FLAGS_player1) != "az" && + absl::GetFlag(FLAGS_player2) != "az") + open_spiel::SpielFatalError("One of the players must be AlphaZero."); + + open_spiel::algorithms::torch_az::DeviceManager device_manager; + device_manager.AddDevice(open_spiel::algorithms::torch_az::VPNetModel( + *game, absl::GetFlag(FLAGS_az_path), absl::GetFlag(FLAGS_az_graph_def), + "/cpu:0")); + device_manager.Get(0, 0)->LoadCheckpoint(absl::GetFlag(FLAGS_az_checkpoint)); + auto az_evaluator = + std::make_shared( + /*device_manager=*/&device_manager, + /*batch_size=*/absl::GetFlag(FLAGS_az_batch_size), + /*threads=*/absl::GetFlag(FLAGS_az_threads), + /*cache_size=*/absl::GetFlag(FLAGS_az_cache_size), + /*cache_shards=*/absl::GetFlag(FLAGS_az_cache_shards)); + auto evaluator = + std::make_shared( + absl::GetFlag(FLAGS_rollout_count), Seed()); + + std::vector> bots; + bots.push_back( + InitBot(absl::GetFlag(FLAGS_player1), *game, 0, evaluator, az_evaluator)); + bots.push_back( + InitBot(absl::GetFlag(FLAGS_player2), *game, 1, evaluator, az_evaluator)); + + std::vector initial_actions; + for (int i = 1; i < positional_args.size(); ++i) { + initial_actions.push_back(positional_args[i]); + } + + std::map histories; + std::vector overall_returns(2, 0); + std::vector overall_wins(2, 0); + int num_games = absl::GetFlag(FLAGS_num_games); + for (int game_num = 0; game_num < num_games; ++game_num) { + auto [returns, history] = PlayGame(*game, bots, rng, initial_actions); + histories[absl::StrJoin(history, " ")] += 1; + for (int i = 0; i < returns.size(); ++i) { + double v = returns[i]; + overall_returns[i] += v; + if (v > 0) { + overall_wins[i] += 1; + } + } + } + + std::cerr << "Number of games played: " << num_games << std::endl; + std::cerr << "Number of distinct games played: " << histories.size() + << std::endl; + std::cerr << "Players: " << absl::GetFlag(FLAGS_player1) << ", " + << absl::GetFlag(FLAGS_player2) << std::endl; + std::cerr << "Overall wins: " << absl::StrJoin(overall_wins, ", ") + << std::endl; + std::cerr << "Overall returns: " << absl::StrJoin(overall_returns, ", ") + << std::endl; + + return 0; +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/benchmark_game.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/benchmark_game.cc new file mode 100644 index 0000000..69f3093 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/benchmark_game.cc @@ -0,0 +1,153 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/spiel.h" + +ABSL_FLAG(std::string, game, "tic_tac_toe", "The name of the game to play."); +ABSL_FLAG(int, sims, 1000, "How many simulations to run."); +ABSL_FLAG(int, attempts, 5, "How many sets of simulations to run."); +ABSL_FLAG(bool, verbose, false, + "Boolean flag indicating whether to print all simulation info."); + +namespace open_spiel { + +int RandomSimulation(std::mt19937* rng, const Game& game, bool verbose) { + std::unique_ptr state = game.NewInitialState(); + + if (verbose) { + std::cout << "Initial state:" << std::endl + << "State:" << std::endl + << state->ToString() << std::endl; + } + + bool provides_info_state_tensor = + game.GetType().provides_information_state_tensor; + bool provides_observations_tensor = + game.GetType().provides_observation_tensor; + std::vector obs; + if (provides_info_state_tensor) { + obs = std::vector(game.InformationStateTensorSize()); + } else if (provides_observations_tensor) { + obs = std::vector(game.ObservationTensorSize()); + } + + int game_length = 0; + while (!state->IsTerminal()) { + if (provides_info_state_tensor && state->CurrentPlayer() >= 0) { + state->InformationStateTensor(state->CurrentPlayer(), + absl::MakeSpan(obs)); + } else if (provides_observations_tensor && state->CurrentPlayer() >= 0) { + state->ObservationTensor(state->CurrentPlayer(), absl::MakeSpan(obs)); + } + ++game_length; + if (state->IsChanceNode()) { + std::vector> outcomes = state->ChanceOutcomes(); + Action action; + if (game.GetType().chance_mode == + GameType::ChanceMode::kSampledStochastic) { + action = outcomes.front().first; + } else { + // Explicit chance node; sample one according to underlying + // distribution. + action = SampleAction(outcomes, *rng).first; + } + if (verbose) { + std::cout << "Sampled outcome: " + << state->ActionToString(kChancePlayerId, action) + << std::endl; + } + state->ApplyAction(action); + } else if (state->CurrentPlayer() == kSimultaneousPlayerId) { + // Sample an action for each player + std::vector joint_action; + for (int p = 0; p < game.NumPlayers(); p++) { + std::vector actions; + actions = state->LegalActions(p); + Action action = 0; + if (!actions.empty()) { + std::uniform_int_distribution dis(0, actions.size() - 1); + action = actions[dis(*rng)]; + } + joint_action.push_back(action); + if (verbose) { + std::cout << "Player " << p + << " chose action:" << state->ActionToString(p, action) + << std::endl; + } + } + state->ApplyActions(joint_action); + } else { + // Sample an action uniformly. + std::vector actions = state->LegalActions(); + std::uniform_int_distribution dis(0, actions.size() - 1); + Action action = actions[dis(*rng)]; + if (verbose) { + int p = state->CurrentPlayer(); + std::cout << "Player " << p + << " chose action: " << state->ActionToString(p, action) + << std::endl; + } + state->ApplyAction(action); + } + if (verbose) { + std::cout << "State: " << std::endl << state->ToString() << std::endl; + std::cout << "Observation: " << obs << std::endl; + } + } + return game_length; +} + +// Perform num_sims random simulations of the specified game, and output the +// time taken. +void RandomSimBenchmark(const std::string& game_def, int num_sims, + bool verbose) { + std::mt19937 rng; + std::cout << absl::StrFormat("Benchmark: game: %s, num_sims: %d. ", game_def, + num_sims); + + auto game = LoadGame(game_def); + + absl::Time start = absl::Now(); + int num_moves = 0; + for (int sim = 0; sim < num_sims; ++sim) { + num_moves += RandomSimulation(&rng, *game, verbose); + } + absl::Time end = absl::Now(); + double seconds = absl::ToDoubleSeconds(end - start); + + std::cout << absl::StrFormat( + "Finished %d moves in %.1f ms: %.1f sim/s, %.1f moves/s", + num_moves, seconds * 1000, num_sims / seconds, + num_moves / seconds) + << std::endl; +} + +} // namespace open_spiel + +int main(int argc, char** argv) { + absl::ParseCommandLine(argc, argv); + + for (int i = 0; i < absl::GetFlag(FLAGS_attempts); ++i) { + open_spiel::RandomSimBenchmark(absl::GetFlag(FLAGS_game), + absl::GetFlag(FLAGS_sims), + absl::GetFlag(FLAGS_verbose)); + } +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/cfr_example.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/cfr_example.cc new file mode 100644 index 0000000..924ae94 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/cfr_example.cc @@ -0,0 +1,47 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/algorithms/cfr.h" +#include "open_spiel/algorithms/tabular_exploitability.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +ABSL_FLAG(std::string, game_name, "kuhn_poker", "Game to run CFR on."); +ABSL_FLAG(int, num_iters, 1000, "How many iters to run for."); +ABSL_FLAG(int, report_every, 100, "How often to report exploitability."); + +// Example code for using CFR+ to solve Kuhn Poker. +int main(int argc, char** argv) { + absl::ParseCommandLine(argc, argv); + std::shared_ptr game = + open_spiel::LoadGame(absl::GetFlag(FLAGS_game_name)); + open_spiel::algorithms::CFRSolver solver(*game); + std::cerr << "Starting CFR on " << game->GetType().short_name + << "..." << std::endl; + + for (int i = 0; i < absl::GetFlag(FLAGS_num_iters); ++i) { + solver.EvaluateAndUpdatePolicy(); + if (i % absl::GetFlag(FLAGS_report_every) == 0 || + i == absl::GetFlag(FLAGS_num_iters) - 1) { + double exploitability = open_spiel::algorithms::Exploitability( + *game, *solver.AveragePolicy()); + std::cerr << "Iteration " << i << " exploitability=" << exploitability + << std::endl; + } + } +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/cfr_multi_equilibria_example.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/cfr_multi_equilibria_example.cc new file mode 100644 index 0000000..10dd435 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/cfr_multi_equilibria_example.cc @@ -0,0 +1,85 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/algorithms/cfr.h" +#include "open_spiel/algorithms/outcome_sampling_mccfr.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/init.h" +#include "open_spiel/utils/file.h" + +ABSL_FLAG(std::string, game, "kuhn_poker(players=3)", "Game to run CFR on."); +ABSL_FLAG(std::string, file_prefix, "/tmp", "Path prefix for file writing."); +ABSL_FLAG(int, seed, 39827891, "Seed to use for randomization."); +ABSL_FLAG(int, repeats, 3, "How many iters to run for."); +ABSL_FLAG(int, num_cfr_iters, 1000, "How many iters of CFR to run for."); +ABSL_FLAG(int, num_cfros_iters, 100000, + "How many iters of Outcome Sample MCCFR to run for."); + +// This example check to see how different the approximate equilibria CFR find +// are, based on random initial regrets and Monte Carlo sampling. + +// Example code for using CFR+ to solve Kuhn Poker. +int main(int argc, char** argv) { + open_spiel::Init("", &argc, &argv, false); + absl::ParseCommandLine(argc, argv); + std::shared_ptr game = + open_spiel::LoadGame(absl::GetFlag(FLAGS_game)); + std::mt19937 rng(absl::GetFlag(FLAGS_seed)); + absl::uniform_int_distribution dist; + std::string file_prefix = absl::GetFlag(FLAGS_file_prefix); + + // Random initial regrets + for (int i = 0; i < absl::GetFlag(FLAGS_repeats); ++i) { + std::string filename = absl::StrCat(file_prefix, "/cfr_rir_", i, ".txt"); + std::cout << "Random initial regrets, repeat number " << i + << ", generating " << filename << std::endl; + int seed = dist(rng); + open_spiel::algorithms::CFRSolverBase solver(*game, + /*alternating_updates*/true, /*linear_averaging*/false, + /*regret_matching_plus*/false, /*random_initial_regrets*/true, + /*seed*/seed); + + for (int i = 0; i < absl::GetFlag(FLAGS_num_cfr_iters); ++i) { + solver.EvaluateAndUpdatePolicy(); + } + open_spiel::TabularPolicy avg_policy = solver.TabularAveragePolicy(); + open_spiel::file::File outfile(filename, "w"); + outfile.Write(avg_policy.ToStringSorted()); + } + + // Outcome Sampling MCCFR + for (int i = 0; i < absl::GetFlag(FLAGS_repeats); ++i) { + std::string filename = absl::StrCat(file_prefix, "/cfr_cfros_", i, ".txt"); + std::cout << "Outcome Sampling MCCFR, repeat number " << i + << ", generating " << filename << std::endl; + int seed = dist(rng); + open_spiel::algorithms::OutcomeSamplingMCCFRSolver solver(*game, + /*epsilon*/0.6, seed); + + for (int i = 0; i < absl::GetFlag(FLAGS_num_cfros_iters); ++i) { + solver.RunIteration(); + } + open_spiel::TabularPolicy avg_policy = solver.TabularAveragePolicy(); + open_spiel::file::File outfile(filename, "w"); + outfile.Write(avg_policy.ToStringSorted()); + } +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/count_all_states.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/count_all_states.cc new file mode 100644 index 0000000..7ed49a4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/count_all_states.cc @@ -0,0 +1,87 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_set.h" +#include "open_spiel/algorithms/get_all_histories.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +ABSL_FLAG(std::string, game_string, "kuhn_poker", "Game to count states for."); + + +using open_spiel::GameType; +using open_spiel::StateType; +using open_spiel::algorithms::GetAllHistories; + +// Counts the number of states in the game according to various measures. +// - histories is a sequence of moves (for all players) and chance outcomes +// - states is for imperfect information games, information states (i.e. +// sets of histories which are indistinguishable to the acting player); +// for example in poker, the acting player's private cards plus the sequence +// of bets and public cards, for perfect information games, Markov states +// (i.e. sets of histories which yield the same result with the same actions +// applied), e.g. in tic-tac-toe the current state of the board, regardless +// of the order in which the moves were played. +int main(int argc, char** argv) { + absl::ParseCommandLine(argc, argv); + std::string game_name = absl::GetFlag(FLAGS_game_string); + std::shared_ptr game = + open_spiel::LoadGame(game_name); + std::vector> all_histories = + GetAllHistories(*game, /*depth_limit=*/-1, /*include_terminals=*/true, + /*include_chance_states=*/true); + absl::flat_hash_set nonterminal_states; + absl::flat_hash_set terminal_states; + const int num_histories = all_histories.size(); + int num_terminal_histories = 0; + int num_chance_nodes = 0; + for (const auto& state : all_histories) { + switch (state->GetType()) { + case StateType::kDecision: + if (game->GetType().information == + GameType::Information::kPerfectInformation) { + nonterminal_states.insert(state->ToString()); + } else { + nonterminal_states.insert(state->InformationStateString()); + } + break; + case StateType::kTerminal: + ++num_terminal_histories; + terminal_states.insert(state->ToString()); + break; + case StateType::kChance: + ++num_chance_nodes; + break; + case StateType::kMeanField: + open_spiel::SpielFatalError("kMeanField not handeled."); + } + } + const int num_nonterminal_states = nonterminal_states.size(); + const int num_terminal_states = terminal_states.size(); + std::cout << "Game: " << game_name + << ", num_histories: " << num_histories + << ", num_terminal_histories: " << num_terminal_histories + << ", num_chance_nodes: " << num_chance_nodes + << ", num_nonterminal_states: " << num_nonterminal_states + << ", num_terminal_states: " << num_terminal_states + << std::endl; +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/dqn_torch_example.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/dqn_torch_example.cc new file mode 100644 index 0000000..c424e25 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/dqn_torch_example.cc @@ -0,0 +1,228 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/dqn_torch/dqn.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +ABSL_FLAG(int, seed, 8263487, "Seed to use for random number generation."); + +namespace open_spiel { +namespace algorithms { +namespace torch_dqn { +namespace { + +void DQNCatch(int seed, int total_episodes, int report_every, + int num_eval_episodes) { + std::cout << "Running DQN on catch" << std::endl; + std::mt19937 rng(seed); + std::shared_ptr game = open_spiel::LoadGame("catch"); + + int dqn_agent_seed = absl::Uniform(rng, 0, 1000000); + + // Values copied from: python/examples/single_agent_catch.py + DQNSettings settings = { + /*seed*/ dqn_agent_seed, + /*use_observation*/ game->GetType().provides_observation_tensor, + /*player_id*/ 0, + /*state_representation_size*/ game->ObservationTensorSize(), + /*num_actions*/ game->NumDistinctActions(), + /*hidden_layers_sizes*/ {32, 32}, + /*replay_buffer_capacity*/ 10000, + /*batch_size*/ 128, + /*learning_rate*/ 0.1, + /*update_target_network_every*/ 250, + /*learn_every*/ 10, + /*discount_factor*/ 0.99, + /*min_buffer_size_to_learn*/ 1000, + /*epsilon_start*/ 1.0, + /*epsilon_end*/ 0.1, + /*epsilon_decay_duration*/ 2000}; + auto dqn = std::make_unique(settings); + std::vector agents = {dqn.get()}; + + for (int num_episodes = 0; num_episodes < total_episodes; + num_episodes += report_every) { + // Training + RunEpisodes(&rng, *game, agents, + /*num_episodes*/ report_every, /*is_evaluation*/ false); + + std::vector avg_returns = + RunEpisodes(&rng, *game, agents, + /*num_episodes*/ num_eval_episodes, /*is_evaluation*/ true); + + std::cout << num_episodes + report_every << " " << avg_returns[0] + << std::endl; + } +} + +void SelfPlayTicTacToe(int seed, int total_episodes, int report_every, + int num_eval_episodes) { + std::cout << "Running self-play Tic-Tac-Toe" << std::endl; + std::mt19937 rng(seed); + std::shared_ptr game = open_spiel::LoadGame("tic_tac_toe"); + + std::vector> dqn_agents; + std::vector> random_agents; + std::vector agents(game->NumPlayers(), nullptr); + + for (Player p = 0; p < game->NumPlayers(); ++p) { + int dqn_agent_seed = absl::Uniform(rng, 0, 1000000); + DQNSettings settings = { + /*seed*/ dqn_agent_seed, + /*use_observation*/ game->GetType().provides_observation_tensor, + /*player_id*/ p, + /*state_representation_size*/ game->ObservationTensorSize(), + /*num_actions*/ game->NumDistinctActions(), + /*hidden_layers_sizes*/ {32, 32}, + /*replay_buffer_capacity*/ 100000, + /*batch_size*/ 128, + /*learning_rate*/ 0.01, + /*update_target_network_every*/ 250, + /*learn_every*/ 10, + /*discount_factor*/ 0.99, + /*min_buffer_size_to_learn*/ 1000, + /*epsilon_start*/ 1.0, + /*epsilon_end*/ 0.1, + /*epsilon_decay_duration*/ 50000}; + dqn_agents.push_back(std::make_unique(settings)); + int rand_agent_seed = absl::Uniform(rng, 0, 1000000); + random_agents.push_back(std::make_unique(p, rand_agent_seed)); + } + + for (int num_episodes = 0; num_episodes < total_episodes; + num_episodes += report_every) { + for (Player p = 0; p < game->NumPlayers(); ++p) { + agents[p] = dqn_agents[p].get(); + } + + // Training + RunEpisodes(&rng, *game, agents, + /*num_episodes*/ report_every, /*is_evaluation*/ false); + + // Self-play eval. + std::vector avg_self_play_returns = + RunEpisodes(&rng, *game, agents, + /*num_episodes*/ num_eval_episodes, /*is_evaluation*/ true); + + std::vector avg_returns_vs_random(game->NumPlayers(), 0); + // Eval vs. random. + for (Player p = 0; p < game->NumPlayers(); ++p) { + for (Player pp = 0; pp < game->NumPlayers(); ++pp) { + if (pp == p) { + agents[pp] = dqn_agents[pp].get(); + } else { + agents[pp] = random_agents[pp].get(); + } + } + std::vector avg_returns = RunEpisodes( + &rng, *game, agents, + /*num_episodes*/ num_eval_episodes, /*is_evaluation*/ true); + avg_returns_vs_random[p] = avg_returns[p]; + } + + std::cout << num_episodes + report_every << " self-play returns: "; + for (Player p = 0; p < game->NumPlayers(); ++p) { + std::cout << avg_self_play_returns[p] << " "; + } + std::cout << "returns vs random: "; + for (Player p = 0; p < game->NumPlayers(); ++p) { + std::cout << avg_returns_vs_random[p] << " "; + } + std::cout << std::endl; + } +} + +void SelfPlayPrisonersDilemma(int seed, int total_episodes, int report_every, + int num_eval_episodes) { + std::cout << "Running self-play prisoner's dilemma" << std::endl; + std::mt19937 rng(seed); + std::shared_ptr game = open_spiel::LoadGame("matrix_pd"); + + std::cout << "Example initial state:" << std::endl; + std::cout << game->NewInitialState()->ToString(); + + std::vector> dqn_agents; + std::vector agents(game->NumPlayers(), nullptr); + + for (Player p = 0; p < game->NumPlayers(); ++p) { + int dqn_agent_seed = absl::Uniform(rng, 0, 1000000); + DQNSettings settings = { + /*seed*/ dqn_agent_seed, + /*use_observation*/ game->GetType().provides_observation_tensor, + /*player_id*/ p, + /*state_representation_size*/ game->ObservationTensorSize(), + /*num_actions*/ game->NumDistinctActions(), + /*hidden_layers_sizes*/ {16}, + /*replay_buffer_capacity*/ 10000, + /*batch_size*/ 32, + /*learning_rate*/ 0.1, + /*update_target_network_every*/ 10, + /*learn_every*/ 10, + /*discount_factor*/ 0.99, + /*min_buffer_size_to_learn*/ 1000, + /*epsilon_start*/ 1.0, + /*epsilon_end*/ 0.1, + /*epsilon_decay_duration*/ 10000}; + dqn_agents.push_back(std::make_unique(settings)); + agents[p] = dqn_agents[p].get(); + } + + for (int num_episodes = 0; num_episodes < total_episodes; + num_episodes += report_every) { + // Training + RunEpisodes(&rng, *game, agents, + /*num_episodes*/ report_every, /*is_evaluation*/ false); + + // Self-play eval. + std::vector avg_self_play_returns = + RunEpisodes(&rng, *game, agents, + /*num_episodes*/ num_eval_episodes, /*is_evaluation*/ true); + + std::cout << num_episodes + report_every << " " + << " epsilon=" << dqn_agents[0]->GetEpsilon(false) << " " + << avg_self_play_returns[0] << " " << avg_self_play_returns[1] + << std::endl; + } +} + +} // namespace +} // namespace torch_dqn +} // namespace algorithms +} // namespace open_spiel + +namespace torch_dqn = open_spiel::algorithms::torch_dqn; + +int main(int argc, char** argv) { + absl::ParseCommandLine(argc, argv); + int seed = absl::GetFlag(FLAGS_seed); + torch::manual_seed(seed); + torch_dqn::DQNCatch(seed, /*total_episodes*/ 2000, + /*report_every*/ 250, /*num_eval_episodes*/ 100); + torch_dqn::SelfPlayTicTacToe(seed, + /*total_episodes*/ 100000, + /*report_every*/ 1000, + /*num_eval_episodes*/ 100); + torch_dqn::SelfPlayPrisonersDilemma(seed, /*total_episodes*/ 50000, + /*report_every*/ 100, + /*num_eval_episodes*/ 1); + return 0; +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/example.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/example.cc new file mode 100644 index 0000000..0eb977a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/example.cc @@ -0,0 +1,172 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/abseil-cpp/absl/random/uniform_int_distribution.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +ABSL_FLAG(std::string, game, "tic_tac_toe", "The name of the game to play."); +ABSL_FLAG(int, players, 0, "How many players in this game, 0 for default."); +ABSL_FLAG(bool, show_infostate, false, "Show the information state."); +ABSL_FLAG(int, seed, 0, "Seed for the random number generator. 0 for auto."); +ABSL_FLAG(bool, show_legals, false, "Show the legal moves."); + +void PrintLegalActions(const open_spiel::State& state, + open_spiel::Player player, + const std::vector& movelist) { + std::cerr << "Legal moves for player " << player << ":" << std::endl; + for (open_spiel::Action action : movelist) { + std::cerr << " " << state.ActionToString(player, action) << std::endl; + } +} + +int main(int argc, char** argv) { + absl::ParseCommandLine(argc, argv); + + std::string game_name = absl::GetFlag(FLAGS_game); + auto players = absl::GetFlag(FLAGS_players); + bool show_infostate = absl::GetFlag(FLAGS_show_infostate); + int seed = absl::GetFlag(FLAGS_seed); + bool show_legals = absl::GetFlag(FLAGS_show_legals); + + // Print out registered games. + std::cerr << "Registered games:" << std::endl; + std::vector names = open_spiel::RegisteredGames(); + for (const std::string& name : names) { + std::cerr << name << std::endl; + } + + // Random number generator. + std::mt19937 rng(seed ? seed : time(0)); + + // Create the game. + std::cerr << "Creating game..\n" << std::endl; + + // Add any specified parameters to override the defaults. + open_spiel::GameParameters params; + if (players > 0) { + params["players"] = open_spiel::GameParameter(players); + } + std::shared_ptr game = + open_spiel::LoadGame(game_name, params); + + if (!game) { + std::cerr << "problem with loading game, exiting..." << std::endl; + return -1; + } + + std::cerr << "Starting new game..." << std::endl; + std::unique_ptr state = game->NewInitialState(); + + std::cerr << "Initial state:" << std::endl; + std::cerr << "State:" << std::endl << state->ToString() << std::endl; + + while (!state->IsTerminal()) { + std::cerr << "player " << state->CurrentPlayer() << std::endl; + + if (state->IsChanceNode()) { + // Chance node; sample one according to underlying distribution. + std::vector> outcomes = + state->ChanceOutcomes(); + open_spiel::Action action = open_spiel::SampleAction(outcomes, rng).first; + std::cerr << "sampled outcome: " + << state->ActionToString(open_spiel::kChancePlayerId, action) + << std::endl; + state->ApplyAction(action); + } else if (state->IsMeanFieldNode()) { + int num_states_distribution = state->DistributionSupport().size(); + state->UpdateDistribution(std::vector( + num_states_distribution, + num_states_distribution > 0 ? 1.0 / num_states_distribution : 1.0)); + std::cerr << "Call update distribution on a uniform distribution of " + << num_states_distribution << " states (length of " + << "DistributionSupport" << std::endl; + } else if (state->IsSimultaneousNode()) { + // open_spiel::Players choose simultaneously? + std::vector joint_action; + std::vector infostate(game->InformationStateTensorSize()); + + // Sample a action for each player + for (auto player = open_spiel::Player{0}; player < game->NumPlayers(); + ++player) { + if (show_infostate) { + if (game->GetType().provides_information_state_tensor) { + state->InformationStateTensor(player, absl::MakeSpan(infostate)); + std::cerr << "player " << player << ": " + << absl::StrJoin(infostate, " ") << std::endl; + } + if (game->GetType().provides_information_state_string) { + std::cerr << "player " << player << ": " + << state->InformationStateString(player) << std::endl; + } + } + + std::vector actions = state->LegalActions(player); + if (show_legals) { + PrintLegalActions(*state, player, actions); + } + + open_spiel::Action action = 0; + if (!actions.empty()) { + absl::uniform_int_distribution<> dis(0, actions.size() - 1); + action = actions[dis(rng)]; + } + joint_action.push_back(action); + std::cerr << "player " << player << " chose " + << state->ActionToString(player, action) << std::endl; + } + + state->ApplyActions(joint_action); + } else { + // Decision node, sample one uniformly. + auto player = state->CurrentPlayer(); + if (show_infostate) { + if (game->GetType().provides_information_state_tensor) { + std::vector infostate; + state->InformationStateTensor(player, absl::MakeSpan(infostate)); + std::cerr << "player " << player << ": " + << absl::StrJoin(infostate, " ") << std::endl; + } + if (game->GetType().provides_information_state_string) { + std::cerr << "player " << player << ": " + << state->InformationStateString(player) << std::endl; + } + } + + std::vector actions = state->LegalActions(); + if (show_legals) { + PrintLegalActions(*state, player, actions); + } + + absl::uniform_int_distribution<> dis(0, actions.size() - 1); + auto action = actions[dis(rng)]; + std::cerr << "chose action: " << state->ActionToString(player, action) + << std::endl; + state->ApplyAction(action); + } + + std::cerr << "State: " << std::endl << state->ToString() << std::endl; + } + + auto returns = state->Returns(); + for (auto p = open_spiel::Player{0}; p < game->NumPlayers(); p++) { + std::cerr << "Final return to player " << p << " is " << returns[p] + << std::endl; + } +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/fsicfr_liars_dice.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/fsicfr_liars_dice.cc new file mode 100644 index 0000000..c89c715 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/fsicfr_liars_dice.cc @@ -0,0 +1,130 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/algorithms/fsicfr.h" +#include "open_spiel/algorithms/tabular_best_response_mdp.h" +#include "open_spiel/games/liars_dice/liars_dice.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace { + +constexpr const int kSeed = 1873561; + +using liars_dice::LiarsDiceState; + +void BuildGraph(FSICFRGraph* graph, const State& state, Action chance_id_0, + Action chance_id_1, int max_predecessors, int parent_node_id, + Action parent_action, int parent_other_player_chance_id) { + if (state.IsTerminal()) { + const auto& ld_state = static_cast(state); + std::string terminal_key = + absl::StrCat("terminal ", ld_state.dice_outcome(0, 0), " ", + ld_state.dice_outcome(1, 0), " ", + ld_state.calling_player(), " ", ld_state.last_bid()); + FSICFRNode* node = graph->GetOrCreateTerminalNode( + terminal_key, state.PlayerReturn(0), max_predecessors); + FSICFRNode* parent_node = graph->GetNode(parent_node_id); + SPIEL_CHECK_TRUE(parent_node != nullptr); + // Connect to the parent. + parent_node->AddChild(parent_action, parent_other_player_chance_id, node); + } else if (state.IsChanceNode()) { + std::vector legal_actions = state.LegalActions(); + for (Action outcome : legal_actions) { + Action next_chance_id_0 = chance_id_0; + Action next_chance_id_1 = chance_id_1; + if (chance_id_0 == kInvalidAction) { + next_chance_id_0 = outcome; + } else { + next_chance_id_1 = outcome; + } + std::unique_ptr next_state = state.Child(outcome); + BuildGraph(graph, *next_state, next_chance_id_0, next_chance_id_1, + max_predecessors, parent_node_id, parent_action, + parent_other_player_chance_id); + } + } else { + std::string info_state_string = state.InformationStateString(); + Player player = state.CurrentPlayer(); + int my_chance_id = player == 0 ? chance_id_0 : chance_id_1; + int other_chance_id = player == 0 ? chance_id_1 : chance_id_0; + std::vector legal_actions = state.LegalActions(); + + FSICFRNode* node = + graph->GetOrCreateDecisionNode(legal_actions, info_state_string, player, + max_predecessors, my_chance_id); + int next_max_predecessors = node->max_predecessors + 1; + int node_id = node->id; + + node->max_predecessors = std::max(max_predecessors, node->max_predecessors); + + FSICFRNode* parent_node = graph->GetNode(parent_node_id); + + // Connect it to the parent. + if (parent_node != nullptr) { + parent_node->AddChild(parent_action, parent_other_player_chance_id, node); + } + + // Recrusively build the graph from the children. + for (Action action : legal_actions) { + std::unique_ptr next_state = state.Child(action); + BuildGraph(graph, *next_state, chance_id_0, chance_id_1, + next_max_predecessors, node_id, action, other_chance_id); + } + } +} + +void RunFSICFR() { + std::unique_ptr graph = std::make_unique(); + std::shared_ptr game = LoadGame("liars_dice_ir"); + std::unique_ptr initial_state = game->NewInitialState(); + std::cout << "Building the graph." << std::endl; + BuildGraph(graph.get(), *initial_state, kInvalidAction, kInvalidAction, 0, -1, + kInvalidAction, -1); + std::cout << "Graph has " << graph->size() << " nodes." << std::endl; + std::cout << "Topologically sorting the nodes." << std::endl; + graph->TopSort(); + FSICFRSolver solver(*game, kSeed, {6, 6}, graph.get()); + + std::cout << "Running iterations" << std::endl; + int max_iterations = 1000000; + int total_iterations = 0; + int num_iterations = 0; + + // solver.RunIteration(); + // std::exit(-1); + + while (total_iterations < max_iterations) { + solver.RunIterations(num_iterations); + // Must use the best response MDP since it supports imperfect recall. + TabularPolicy average_policy = solver.GetAveragePolicy(); + TabularBestResponseMDP tbr(*game, average_policy); + TabularBestResponseMDPInfo br_info = tbr.NashConv(); + total_iterations += num_iterations; + std::cout << total_iterations << " " << br_info.nash_conv << std::endl; + num_iterations = (num_iterations == 0 ? 10 : total_iterations); + } +} + +} // namespace +} // namespace algorithms +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::algorithms::RunFSICFR(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/gtp.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/gtp.cc new file mode 100644 index 0000000..2083627 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/gtp.cc @@ -0,0 +1,212 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/algorithms/mcts.h" +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +ABSL_FLAG(std::string, game, "tic_tac_toe", "The name of the game to play."); + +std::string Success() { return "=\n\n"; } +std::string Success(const std::string& s) { + return absl::StrCat("= ", s, "\n\n"); +} +std::string Failure(const std::string& s) { + return absl::StrCat("? ", s, "\n\n"); +} + +std::unique_ptr MakeBot( + const open_spiel::Game& game, + std::shared_ptr evaluator) { + return std::make_unique( + game, std::move(evaluator), /*uct_c=*/2, /*max_simulations=*/1000, + /*max_memory_mb=*/0, /*solve=*/true, /*seed=*/0, /*verbose=*/false); +} + +// Implements the Go Text Protocol, GTP, which is a text based protocol for +// communication with computer Go programs +// (https://www.lysator.liu.se/~gunnar/gtp/). This offers the open_spiel games +// and the mcts bot as a command line gtp server, which can be played against +// third party programs, or used on the command line directly. +int main(int argc, char** argv) { + absl::ParseCommandLine(argc, argv); + + std::string game_name = absl::GetFlag(FLAGS_game); + std::shared_ptr game = + open_spiel::LoadGame(game_name); + + std::unique_ptr state = game->NewInitialState(); + + auto evaluator = + std::make_shared( + /*n_rollouts=*/1, /*seed=*/0); + std::unique_ptr bot = MakeBot(*game, evaluator); + + using Args = std::vector; + std::map> cmds = { + {"name", [](const Args&) { return Success("open_spiel"); }}, + {"version", [](const Args&) { return Success("unknown"); }}, + {"protocol_version", [](const Args&) { return Success("2"); }}, + {"quit", [](const Args&) { return Success(); }}, + {"list_commands", [&cmds](const Args& args) { + std::vector keys; + keys.reserve(cmds.size()); + for (auto const& item : cmds) { + keys.push_back(item.first); + } + return Success(absl::StrJoin(keys, " ")); + }}, + {"known_command", [&cmds](const Args& args) { + if (args.empty()) { + return Failure("Not enough args"); + } + return Success(cmds.find(args[0]) == cmds.end() ? "false" : "true"); + }}, + {"known_games", [](const Args& args) { + return Success(absl::StrJoin(open_spiel::RegisteredGames(), " ")); + }}, + {"game", [&bot, &game, &state, &evaluator](const Args& args) { + if (args.empty()) { + return Success(game->ToString()); + } + game = open_spiel::LoadGame(args[0]); + state = game->NewInitialState(); + bot = MakeBot(*game, evaluator); + return Success(game->ToString()); + }}, + {"boardsize", [&bot, &game, &state, &evaluator](const Args& args) { + open_spiel::GameParameters params = game->GetParameters(); + if (params.find("board_size") == params.end()) { + return Failure("Game doesn't support setting the board size"); + } + if (args.empty()) { + return Success(params["board_size"].ToString()); + } + int board_size; + if (!absl::SimpleAtoi(args[0], &board_size)) { + return Failure("Failed to parse first arg as an int"); + } + params["board_size"] = open_spiel::GameParameter(board_size); + game = open_spiel::LoadGame(game->GetType().short_name, params); + state = game->NewInitialState(); + bot = MakeBot(*game, evaluator); + return Success(); + }}, + {"play", [&bot, &state](const Args& args) { + if (args.size() < 2) { + return Failure("Not enough args"); + } + // Ignore player arg, assume it's always the current player. + const std::string& action_str = args[1]; + for (const open_spiel::Action action : state->LegalActions()) { + if (action_str == state->ActionToString(action)) { + bot->InformAction(*state, state->CurrentPlayer(), action); + state->ApplyAction(action); + return Success(); + } + } + return Failure("Invalid action"); + }}, + {"genmove", [&bot, &state](const Args& args) { + if (state->IsTerminal()) { + return Failure("Game is already over"); + } + // Ignore player arg, assume it's always the current player. + open_spiel::Action action = bot->Step(*state); + std::string action_str = state->ActionToString(action); + state->ApplyAction(action); + return Success(action_str); + }}, + {"clear_board", [&bot, &game, &state](const Args& args) { + state = game->NewInitialState(); + bot->Restart(); + return Success(); + }}, + {"undo", [&bot, &game, &state](const Args& args) { + std::vector history = state->History(); + int count = 1; + if (!args.empty() && !absl::SimpleAtoi(args[0], &count)) { + return Failure("Failed to parse first arg as an int"); + } + if (history.size() < count) { + return Failure(absl::StrCat( + "Can't undo ", count, " moves from game of length ", + history.size())); + } + state = game->NewInitialState(); + bot->Restart(); + for (int i = 0; i < history.size() - count; ++i) { + bot->InformAction(*state, state->CurrentPlayer(), history[i]); + state->ApplyAction(history[i]); + } + return Success(); + }}, + {"showboard", [&state](const Args& args) { + return Success("\n" + state->ToString()); + }}, + {"history", [&state](const Args& args) { + return Success(state->HistoryString()); + }}, + {"is_terminal", [&state](const Args& args) { + return Success(state->IsTerminal() ? "true" : "false"); + }}, + {"current_player", [&state](const Args& args) { + return Success(absl::StrCat(state->CurrentPlayer())); + }}, + {"returns", [&state](const Args& args) { + return Success(absl::StrJoin(state->Returns(), " ")); + }}, + {"legal_actions", [&state](const Args& args) { + std::vector actions; + std::vector legal_actions = state->LegalActions(); + actions.reserve(legal_actions.size()); + for (const open_spiel::Action action : legal_actions) { + actions.push_back(state->ActionToString(action)); + } + return Success(absl::StrJoin(actions, " ")); + }}, + }; + + std::cerr << "Welcome to OpenSpiel GTP interface. Try `list_commands`." + << std::endl << std::endl; + for (std::string line; std::getline(std::cin, line);) { + std::vector parts = absl::StrSplit(line, ' '); + if (parts.empty()) continue; + std::string& cmd = parts[0]; + + auto cmd_it = cmds.find(cmd); + if (cmd_it == cmds.end()) { + std::cout << Failure("unknown command"); + continue; + } + + Args args(parts.begin() + 1, parts.end()); + std::cout << cmd_it->second(args); + if (cmd == "quit") { + break; + } + } + return 0; +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/imperfect_recall_mccfr.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/imperfect_recall_mccfr.cc new file mode 100644 index 0000000..8ea5b48 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/imperfect_recall_mccfr.cc @@ -0,0 +1,64 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/algorithms/tabular_best_response_mdp.h" +#include "open_spiel/algorithms/external_sampling_mccfr.h" +#include "open_spiel/algorithms/outcome_sampling_mccfr.h" +#include "open_spiel/games/phantom_ttt/phantom_ttt.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// E.g. another choice: dark_hex_ir(board_size=2) +ABSL_FLAG(std::string, game, "liars_dice_ir", "Game string"); +ABSL_FLAG(int, num_iters, 1000000, "How many iters to run for."); +ABSL_FLAG(int, report_every, 1000, "How often to report."); + +namespace open_spiel { +namespace { + +using algorithms::TabularBestResponseMDP; +using algorithms::TabularBestResponseMDPInfo; + +void ImperfectRecallMCCFR() { + std::shared_ptr game = + open_spiel::LoadGame(absl::GetFlag(FLAGS_game)); + // algorithms::ExternalSamplingMCCFRSolver solver(*game); + algorithms::OutcomeSamplingMCCFRSolver solver(*game); + + for (int i = 0; i < absl::GetFlag(FLAGS_num_iters); ++i) { + solver.RunIteration(); + + if (i % absl::GetFlag(FLAGS_report_every) == 0 || + i == absl::GetFlag(FLAGS_num_iters) - 1) { + // Must use tabular best response MDP as it supports imperfect recall + // games. + std::shared_ptr average_policy = solver.AveragePolicy(); + TabularBestResponseMDP tbr(*game, *average_policy); + TabularBestResponseMDPInfo br_info = tbr.NashConv(); + std::cout << i << " " << br_info.nash_conv << std::endl; + } + } +} +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + absl::ParseCommandLine(argc, argv); + open_spiel::ImperfectRecallMCCFR(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/is_mcts_gwhist.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/is_mcts_gwhist.cc new file mode 100644 index 0000000..01410f8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/is_mcts_gwhist.cc @@ -0,0 +1,85 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/algorithms/is_mcts.h" +#include "open_spiel/algorithms/mcts.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { + +constexpr const int kSeed = 9492110; // 93879211; + +void PlayGWhist(int human_player, std::mt19937* rng, int num_rollouts) { + std::shared_ptr game = LoadGame("german_whist_foregame"); + std::random_device rd; + int eval_seed = rd(); + int bot_seed = rd(); + auto evaluator = + std::make_shared(1, eval_seed); + auto bot = std::make_unique( + bot_seed, evaluator, 0.7 * 13, num_rollouts, + algorithms::kUnlimitedNumWorldSamples, + algorithms::ISMCTSFinalPolicyType::kMaxVisitCount, true, false); + std::unique_ptr state = game->NewInitialState(); + while (!state->IsTerminal()) { + Action chosen_action = kInvalidAction; + if (state->IsChanceNode()) { + chosen_action = + SampleAction(state->ChanceOutcomes(), absl::Uniform(*rng, 0.0, 1.0)) + .first; + } else if (state->CurrentPlayer() != human_player) { + chosen_action = bot->Step(*state); + } else { + std::cout << state->InformationStateString(human_player) << std::endl; + auto legal_actions = state->LegalActions(); + for (int i = 0; i < legal_actions.size(); ++i) { + std::cout << state->ActionToString(legal_actions[i]) << ","; + } + std::cout << std::endl; + std::cout << "Input action:"; + std::string input; + std::cin >> input; + chosen_action = state->StringToAction(input); + std::cout << std::endl; + } + state->ApplyAction(chosen_action); + } + + std::cout << "Terminal state:" << std::endl; + std::cout << state->ToString() << std::endl; + std::cout << "Returns: " << absl::StrJoin(state->Returns(), " ") << std::endl; +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + std::random_device rd; + std::mt19937 rng(rd()); + int human_player; + int num_rollouts; + std::cout << "human_player:"; + std::cin >> human_player; + std::cout << "\n"; + std::cout << "num_rollouts:"; + std::cin >> num_rollouts; + std::cout << "\n"; + open_spiel::PlayGWhist(human_player, &rng, num_rollouts); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/matrix_example.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/matrix_example.cc new file mode 100644 index 0000000..1d1b5c2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/matrix_example.cc @@ -0,0 +1,79 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/uniform_int_distribution.h" +#include "open_spiel/matrix_game.h" +#include "open_spiel/spiel.h" + +int main(int argc, char** argv) { + // Random number generator. + std::mt19937 rng; + + // Create the game with its default parameter settings. + std::cerr << "Creating game..\n" << std::endl; + std::shared_ptr game( + new open_spiel::matrix_game::MatrixGame( + {/*short_name=*/"matrix_pd", + /*long_name=*/"Prisoner's Dilemma", + open_spiel::GameType::Dynamics::kSimultaneous, + open_spiel::GameType::ChanceMode::kDeterministic, + open_spiel::GameType::Information::kPerfectInformation, + open_spiel::GameType::Utility::kGeneralSum, + open_spiel::GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*parameter_specification=*/{}}, + {}, // Empty parameters + {"Cooperate", "Defect"}, // (Row) Player 0's actions + {"Cooperate", "Defect"}, // (Column) Player 2's actions + {5, 0, 10, 1}, // Player 1's utilities in row-major order + {5, 10, 0, 1} // Player 2's utilities in row-major order + )); + + // Note: matrix games can also be registered through the main factory, just + // like the other games in spiel, and then loaded through + // open_spiel::LoadGame. See games/matrix_games.cc for how to register matrix + // games. + + std::cerr << "Starting new game..." << std::endl; + std::unique_ptr state = game->NewInitialState(); + + std::vector row_actions = state->LegalActions(0); + std::vector col_actions = state->LegalActions(1); + + open_spiel::Action row_action = + row_actions[absl::uniform_int_distribution( + 0, row_actions.size() - 1)(rng)]; + open_spiel::Action col_action = + col_actions[absl::uniform_int_distribution( + 0, col_actions.size() - 1)(rng)]; + + std::cerr << "Joint action is: (" << state->ActionToString(0, row_action) + << "," << state->ActionToString(1, row_action) << ")" << std::endl; + + state->ApplyActions({row_action, col_action}); + + SPIEL_CHECK_TRUE(state->IsTerminal()); + + auto returns = state->Returns(); + for (int p = 0; p < game->NumPlayers(); p++) { + std::cerr << "Terminal return to player " << p << " is " << returns[p] + << std::endl; + } +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/mcts_example.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/mcts_example.cc new file mode 100644 index 0000000..4110cc7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/mcts_example.cc @@ -0,0 +1,191 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/btree_map.h" +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/time/clock.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/algorithms/mcts.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +ABSL_FLAG(std::string, game, "tic_tac_toe", "The name of the game to play."); +ABSL_FLAG(std::string, player1, "mcts", "Who controls player1."); +ABSL_FLAG(std::string, player2, "random", "Who controls player2."); +ABSL_FLAG(double, uct_c, 2, "UCT exploration constant."); +ABSL_FLAG(int, rollout_count, 10, "How many rollouts per evaluation."); +ABSL_FLAG(int, max_simulations, 10000, "How many simulations to run."); +ABSL_FLAG(int, num_games, 1, "How many games to play."); +ABSL_FLAG(int, max_memory_mb, 1000, + "The maximum memory used before cutting the search short."); +ABSL_FLAG(bool, solve, true, "Whether to use MCTS-Solver."); +ABSL_FLAG(uint_fast32_t, seed, 0, "Seed for MCTS."); +ABSL_FLAG(bool, verbose, false, "Show the MCTS stats of possible moves."); +ABSL_FLAG(bool, quiet, false, "Show the MCTS stats of possible moves."); + +uint_fast32_t Seed() { + uint_fast32_t seed = absl::GetFlag(FLAGS_seed); + return seed != 0 ? seed : absl::ToUnixMicros(absl::Now()); +} + +std::unique_ptr InitBot( + std::string type, const open_spiel::Game& game, open_spiel::Player player, + std::shared_ptr evaluator) { + if (type == "random") { + return open_spiel::MakeUniformRandomBot(player, Seed()); + } + + if (type == "mcts") { + return std::make_unique( + game, std::move(evaluator), absl::GetFlag(FLAGS_uct_c), + absl::GetFlag(FLAGS_max_simulations), + absl::GetFlag(FLAGS_max_memory_mb), absl::GetFlag(FLAGS_solve), Seed(), + absl::GetFlag(FLAGS_verbose)); + } + open_spiel::SpielFatalError("Bad player type. Known types: mcts, random"); +} + +open_spiel::Action GetAction(const open_spiel::State& state, + std::string action_str) { + for (open_spiel::Action action : state.LegalActions()) { + if (action_str == state.ActionToString(state.CurrentPlayer(), action)) + return action; + } + return open_spiel::kInvalidAction; +} + +std::pair, std::vector> PlayGame( + const open_spiel::Game& game, + std::vector>& bots, std::mt19937& rng, + const std::vector& initial_actions) { + bool quiet = absl::GetFlag(FLAGS_quiet); + std::unique_ptr state = game.NewInitialState(); + std::vector history; + + for (const auto& action_str : initial_actions) { + open_spiel::Action action = GetAction(*state, action_str); + if (action == open_spiel::kInvalidAction) + open_spiel::SpielFatalError(absl::StrCat("Invalid action: ", action_str)); + + history.push_back(action_str); + state->ApplyAction(action); + if (!quiet) { + std::cerr << "Forced action" << action_str << std::endl; + std::cerr << "Next state:\n" << state->ToString() << std::endl; + } + } + + while (!state->IsTerminal()) { + open_spiel::Player player = state->CurrentPlayer(); + if (!quiet) std::cerr << "player turn: " << player << std::endl; + + open_spiel::Action action; + if (state->IsChanceNode()) { + // Chance node; sample one according to underlying distribution. + open_spiel::ActionsAndProbs outcomes = state->ChanceOutcomes(); + action = open_spiel::SampleAction(outcomes, rng).first; + if (!quiet) + std::cerr << "Sampled action: " << state->ActionToString(player, action) + << std::endl; + } else if (state->IsSimultaneousNode()) { + open_spiel::SpielFatalError( + "MCTS not supported for games with simultaneous actions."); + } else { + // Decision node, ask the right bot to make its action + action = bots[player]->Step(*state); + if (!quiet) + std::cerr << "Chose action: " << state->ActionToString(player, action) + << std::endl; + } + for (open_spiel::Player p = 0; p < bots.size(); ++p) { + if (p != player) { + bots[p]->InformAction(*state, player, action); + } + } + history.push_back(state->ActionToString(player, action)); + state->ApplyAction(action); + + if (!quiet) + std::cerr << "State: " << std::endl << state->ToString() << std::endl; + } + + std::cerr << "Returns: " << absl::StrJoin(state->Returns(), ",") + << " Game actions: " << absl::StrJoin(history, " ") << std::endl; + return {state->Returns(), history}; +} + +// Example code for using MCTS agent to play a game +int main(int argc, char** argv) { + std::vector positional_args = absl::ParseCommandLine(argc, argv); + std::mt19937 rng(Seed()); // Random number generator. + + // Create the game + std::string game_name = absl::GetFlag(FLAGS_game); + std::cerr << "game: " << game_name << std::endl; + std::shared_ptr game = + open_spiel::LoadGame(game_name); + + // MCTS supports arbitrary number of players, but this example assumes + // 2-player games. + SPIEL_CHECK_TRUE(game->NumPlayers() <= 2); + + auto evaluator = + std::make_shared( + absl::GetFlag(FLAGS_rollout_count), Seed()); + + std::vector> bots; + bots.push_back(InitBot(absl::GetFlag(FLAGS_player1), *game, 0, evaluator)); + bots.push_back(InitBot(absl::GetFlag(FLAGS_player2), *game, 1, evaluator)); + + std::vector initial_actions; + for (int i = 1; i < positional_args.size(); ++i) { + initial_actions.push_back(positional_args[i]); + } + + absl::btree_map histories; + std::vector overall_returns(2, 0); + std::vector overall_wins(2, 0); + int num_games = absl::GetFlag(FLAGS_num_games); + for (int game_num = 0; game_num < num_games; ++game_num) { + auto [returns, history] = PlayGame(*game, bots, rng, initial_actions); + histories[absl::StrJoin(history, " ")] += 1; + for (int i = 0; i < returns.size(); ++i) { + double v = returns[i]; + overall_returns[i] += v; + if (v > 0) { + overall_wins[i] += 1; + } + } + } + + std::cerr << "Number of games played: " << num_games << std::endl; + std::cerr << "Number of distinct games played: " << histories.size() + << std::endl; + std::cerr << "Overall wins: " << absl::StrJoin(overall_wins, ",") + << std::endl; + std::cerr << "Overall returns: " << absl::StrJoin(overall_returns, ",") + << std::endl; + + return 0; +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/minimax_example.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/minimax_example.cc new file mode 100644 index 0000000..d83340c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/minimax_example.cc @@ -0,0 +1,117 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "open_spiel/algorithms/minimax.h" +#include "open_spiel/games/breakthrough/breakthrough.h" +#include "open_spiel/games/pig/pig.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +inline constexpr int kSearchDepth = 2; +inline constexpr int kSearchDepthPig = 10; +inline constexpr int kWinscorePig = 30; +inline constexpr int kDiceoutcomesPig = 2; +inline constexpr int kSeed = 726345721; + +namespace open_spiel { +namespace { + +int BlackPieceAdvantage(const State& state) { + const auto& bstate = down_cast(state); + return bstate.pieces(breakthrough::kBlackPlayerId) - + bstate.pieces(breakthrough::kWhitePlayerId); +} + +void PlayBreakthrough() { + std::shared_ptr game = + LoadGame("breakthrough", {{"rows", GameParameter(6)}, + {"columns", GameParameter(6)}}); + std::unique_ptr state = game->NewInitialState(); + while (!state->IsTerminal()) { + std::cout << std::endl << state->ToString() << std::endl; + + Player player = state->CurrentPlayer(); + std::pair value_action = algorithms::AlphaBetaSearch( + *game, state.get(), [player](const State& state) { + return (player == breakthrough::kBlackPlayerId ? + BlackPieceAdvantage(state) : + -BlackPieceAdvantage(state)); + }, + kSearchDepth, player); + + std::cout << std::endl << "Player " << player << " choosing action " + << state->ActionToString(player, value_action.second) + << " with heuristic value (to black) " << value_action.first + << std::endl; + + state->ApplyAction(value_action.second); + } + + std::cout << "Terminal state: " << std::endl; + std::cout << state->ToString() << std::endl; +} + +int FirstPlayerAdvantage(const State& state) { + const auto& pstate = down_cast(state); + return pstate.score(0) - pstate.score(1); +} + +void PlayPig(std::mt19937& rng) { + std::shared_ptr game = + LoadGame("pig", {{"winscore", GameParameter(kWinscorePig)}, + {"diceoutcomes", GameParameter(kDiceoutcomesPig)}}); + std::unique_ptr state = game->NewInitialState(); + while (!state->IsTerminal()) { + std::cout << std::endl << state->ToString() << std::endl; + + Player player = state->CurrentPlayer(); + if (state->IsChanceNode()) { + // Chance node; sample one according to underlying distribution. + ActionsAndProbs outcomes = state->ChanceOutcomes(); + Action action = open_spiel::SampleAction(outcomes, rng).first; + std::cerr << "Sampled action: " << state->ActionToString(player, action) + << std::endl; + state->ApplyAction(action); + } else { + std::pair value_action = algorithms::ExpectiminimaxSearch( + *game, state.get(), + [player](const State& state) { + return (player == Player{0} ? FirstPlayerAdvantage(state) + : -FirstPlayerAdvantage(state)); + }, + kSearchDepthPig, player); + + std::cout << std::endl + << "Player " << player << " choosing action " + << state->ActionToString(player, value_action.second) + << " with heuristic value " << value_action.first << std::endl; + + state->ApplyAction(value_action.second); + } + } + + std::cout << "Terminal state: " << std::endl; + std::cout << state->ToString() << std::endl; +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char **argv) { + std::mt19937 rng(kSeed); // Random number generator. + open_spiel::PlayBreakthrough(); + open_spiel::PlayPig(rng); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/policy_iteration_example.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/policy_iteration_example.cc new file mode 100644 index 0000000..285d509 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/policy_iteration_example.cc @@ -0,0 +1,42 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "open_spiel/algorithms/policy_iteration.h" +#include "open_spiel/games/tic_tac_toe/tic_tac_toe.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// Example code for using policy iteration algorithm to solve tic-tac-toe. +int main(int argc, char** argv) { + std::shared_ptr game = + open_spiel::LoadGame("tic_tac_toe"); + + absl::flat_hash_map solution = + open_spiel::algorithms::PolicyIteration(*game, -1, 0.01); + for (const auto& kv : solution) { + std::cerr << "State: " << std::endl + << kv.first << std::endl + << "Value: " << kv.second << std::endl; + } + + std::string initial_state = "...\n...\n..."; + std::string cross_win_state = "...\n...\n.ox"; + std::string naught_win_state = "x..\noo.\nxx."; + SPIEL_CHECK_EQ(solution[initial_state], 0); + SPIEL_CHECK_EQ(solution[cross_win_state], 1); + SPIEL_CHECK_EQ(solution[naught_win_state], -1); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/sbr_blotto/fictitious_play.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/sbr_blotto/fictitious_play.cc new file mode 100644 index 0000000..2b47c0e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/sbr_blotto/fictitious_play.cc @@ -0,0 +1,831 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/examples/sbr_blotto/fictitious_play.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/discrete_distribution.h" +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/algorithms/corr_dist.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace blotto_fp { + +inline constexpr const double kTieTolerance = 1e-9; + +ActionsAndProbs UniformSequencePolicy(int num_actions) { + ActionsAndProbs actions_and_probs; + actions_and_probs.reserve(num_actions); + for (Action a = 0; a < num_actions; ++a) { + actions_and_probs.push_back({a, 1.0 / num_actions}); + } + return actions_and_probs; +} + +ActionsAndProbs RandomStatePolicy(int num_actions, + const std::function& rng) { + ActionsAndProbs actions_and_probs; + actions_and_probs.reserve(num_actions); + double total_weight = 0.0; + for (Action a = 0; a < num_actions; ++a) { + double weight = rng(); + total_weight += weight; + actions_and_probs.push_back({a, weight}); + } + for (Action a = 0; a < num_actions; ++a) { + actions_and_probs[a].second /= total_weight; + } + return actions_and_probs; +} + +FictitiousPlayProcess::FictitiousPlayProcess(std::shared_ptr game, + int seed, + bool randomize_initial_policies) + : rng_(seed), + dist_(0.0, 1.0), + game_(game), + num_players_(game->NumPlayers()), + num_actions_(game->NumDistinctActions()), + iterations_(0), + total_time_(absl::ZeroDuration()) { + // Get the info states strings. + infostate_strings_.reserve(num_players_); + std::unique_ptr state = game->NewInitialState(); + for (Player p = 0; p < num_players_; ++p) { + SPIEL_CHECK_EQ(state->CurrentPlayer(), p); + std::vector legal_actions = state->LegalActions(); + std::string infostate_str = state->InformationStateString(); + infostate_strings_.push_back(infostate_str); + state->ApplyAction(legal_actions[0]); + } + SPIEL_CHECK_TRUE(state->IsTerminal()); + + // Get number of joint actions + num_joint_actions_ = 1; + for (Player p = 0; p < num_players_; ++p) { + num_joint_actions_ *= num_actions_; + } + + // Set initial cumulative policies to uniform. + if (randomize_initial_policies) { + InitPoliciesRandom(); + } else { + InitPolicies(); + } + + GetMarginalJointPolicy(&joint_policy_); + CheckJointUtilitiesCache(); + + // Make the best response computers (for full FP) + for (int p = 0; p < num_players_; ++p) { + best_response_computers_.push_back(std::unique_ptr( + new TabularBestResponse(*game_, p, &joint_policy_))); + } +} + +void FictitiousPlayProcess::GetMarginalJointPolicy( + TabularPolicy* policy, + const std::vector>* weight_table) const { + if (weight_table == nullptr) { + weight_table = &cumulative_policies_; + } + + for (Player p = 0; p < num_players_; ++p) { + double prob_sum = 0.0; + double total_weight = std::accumulate((*weight_table)[p].begin(), + (*weight_table)[p].end(), 0.0); + for (Action a = 0; a < num_actions_; ++a) { + double prob = (*weight_table)[p][a] / total_weight; + SPIEL_CHECK_PROB(prob); + policy->SetProb(infostate_strings_[p], a, prob); + prob_sum += prob; + } + SPIEL_CHECK_TRUE(Near(prob_sum, 1.0)); + } +} + +// Get the marginalized joint policy by marginalizing the empirical joint +// policy. +void FictitiousPlayProcess::GetMarginalJointPolicyFromEmpirical( + TabularPolicy* policy) const { + std::vector> marginal_weights; + marginal_weights.reserve(num_players_); + for (Player p = 0; p < num_players_; ++p) { + marginal_weights.push_back(std::vector(num_actions_, 0)); + } + + NormalFormCorrelationDevice mu = GetEmpiricalJointPolicy(); + + for (int ja_idx = 0; ja_idx < num_joint_actions_; ++ja_idx) { + for (Player p = 0; p < num_players_; ++p) { + marginal_weights[p][mu[ja_idx].actions[p]] += mu[ja_idx].probability; + } + } + + for (Player p = 0; p < num_players_; ++p) { + double total_weight = std::accumulate(marginal_weights[p].begin(), + marginal_weights[p].end(), 0.0); + for (Action a = 0; a < num_actions_; ++a) { + marginal_weights[p][a] /= total_weight; + } + } + + GetMarginalJointPolicy(policy, &marginal_weights); +} + +TabularPolicy FictitiousPlayProcess::GetLatestPolicy() const { + TabularPolicy policy; + for (Player p = 0; p < num_players_; ++p) { + double prob_sum = 0.0; + for (Action a = 0; a < num_actions_; ++a) { + double prob = std::max(GetProb(past_policies_.back()[p], a), 0.0); + policy.SetProb(infostate_strings_[p], a, prob); + prob_sum += prob; + } + SPIEL_CHECK_TRUE(Near(prob_sum, 1.0)); + } + return policy; +} + +NormalFormCorrelationDevice FictitiousPlayProcess::GetEmpiricalJointPolicy() + const { + double prob_sum = 0.0; + NormalFormCorrelationDevice corr_dev; + corr_dev.reserve(num_joint_actions_); + double total_weight = std::accumulate(cumulative_joint_policy_.begin(), + cumulative_joint_policy_.end(), 0.0); + for (int ja_idx = 0; ja_idx < num_joint_actions_; ++ja_idx) { + double prob = cumulative_joint_policy_[ja_idx] / total_weight; + SPIEL_CHECK_PROB(prob); + corr_dev.push_back({prob, IndexToJointAction(ja_idx)}); + prob_sum += prob; + } + SPIEL_CHECK_TRUE(Near(prob_sum, 1.0)); + SPIEL_CHECK_EQ(corr_dev.size(), num_joint_actions_); + return corr_dev; +} + +void FictitiousPlayProcess::InitPoliciesRandom() { + // Cumulative policies per player. + cumulative_policies_.reserve(num_players_); + for (Player p = 0; p < num_players_; ++p) { + std::vector policy(num_actions_, 0); + double total_weight = 0.0; + for (int a = 0; a < num_actions_; ++a) { + policy[a] = dist_(rng_); + total_weight += policy[a]; + } + for (int a = 0; a < num_actions_; ++a) { + policy[a] /= total_weight; + } + cumulative_policies_.push_back(policy); + } + + // Initial, current, and past policies + current_joint_policy_counts_.resize(num_joint_actions_); + std::vector initial_policies; + initial_policies.reserve(num_players_); + for (Player p = 0; p < num_players_; ++p) { + initial_policies.push_back( + RandomStatePolicy(num_actions_, [this]() { return dist_(rng_); })); + } + past_policies_.push_back(initial_policies); + + // Cumulative joint policy. + cumulative_joint_policy_ = std::vector(num_joint_actions_, 0); + double total_weight = 0.0; + for (int idx = 0; idx < num_joint_actions_; ++idx) { + cumulative_joint_policy_[idx] = dist_(rng_); + total_weight += cumulative_joint_policy_[idx]; + } + for (int idx = 0; idx < num_joint_actions_; ++idx) { + cumulative_joint_policy_[idx] /= total_weight; + } +} + +void FictitiousPlayProcess::InitPolicies() { + // Cumulative policies per player. + cumulative_policies_.reserve(num_players_); + for (Player p = 0; p < num_players_; ++p) { + std::vector uniform_policy(num_actions_, 1.0 / num_actions_); + cumulative_policies_.push_back(uniform_policy); + } + + // Initial, current, and past policies + current_joint_policy_counts_.resize(num_joint_actions_); + std::vector initial_policies; + initial_policies.reserve(num_players_); + for (Player p = 0; p < num_players_; ++p) { + initial_policies.push_back(UniformSequencePolicy(num_actions_)); + } + past_policies_.push_back(initial_policies); + + // Cumulative joint policy. + cumulative_joint_policy_ = + std::vector(num_joint_actions_, 1.0 / num_joint_actions_); +} + +// Add appropriate weights given each players' (potentially mixed) policy +void FictitiousPlayProcess::UpdateCumulativeJointPolicy( + const std::vector>& policies) { + double sum_weights = 0.0; + double sum_joint_policy = 0.0; + for (int ja_idx = 0; ja_idx < num_joint_actions_; ++ja_idx) { + std::vector joint_action = IndexToJointAction(ja_idx); + double weight = 1.0; + for (Player p = 0; p < num_players_; ++p) { + Action action = joint_action[p]; + weight *= policies[p][action]; + } + cumulative_joint_policy_[ja_idx] += weight; + sum_weights += weight; + sum_joint_policy += cumulative_joint_policy_[ja_idx]; + } + SPIEL_CHECK_FLOAT_NEAR(sum_weights, 1.0, 1e-12); + SPIEL_CHECK_FLOAT_NEAR(sum_joint_policy, iterations_ + 2.0, 1e-12); +} + +void FictitiousPlayProcess::UpdateCumulativeJointPolicySampled( + const std::vector>& policies, int num_samples) { + double weight = 1.0 / num_samples; + std::vector> dists; + dists.reserve(policies.size()); + for (Player p = 0; p < policies.size(); ++p) { + dists.push_back(absl::discrete_distribution(policies[p].begin(), + policies[p].end())); + } + for (int sample = 0; sample < num_samples; ++sample) { + std::vector joint_action; + joint_action.reserve(num_players_); + for (Player p = 0; p < policies.size(); ++p) { + joint_action.push_back(dists[p](rng_)); + } + int ja_idx = JointActionToIndex(joint_action); + cumulative_joint_policy_[ja_idx] += weight; + } +} + +std::vector FictitiousPlayProcess::Softmax( + const std::vector& values, double lambda) const { + std::vector new_values = values; + for (double& new_value : new_values) { + new_value *= lambda; + } + double max = *std::max_element(new_values.begin(), new_values.end()); + + double denom = 0; + for (int idx = 0; idx < values.size(); ++idx) { + new_values[idx] = std::exp(new_values[idx] - max); + denom += new_values[idx]; + } + + SPIEL_CHECK_GT(denom, 0); + double prob_sum = 0.0; + std::vector policy; + policy.reserve(new_values.size()); + for (int idx = 0; idx < values.size(); ++idx) { + double prob = new_values[idx] / denom; + SPIEL_CHECK_PROB(prob); + prob_sum += prob; + policy.push_back(prob); + } + + SPIEL_CHECK_FLOAT_NEAR(prob_sum, 1.0, 1e-12); + return policy; +} + +Action FictitiousPlayProcess::BestResponseAgainstEmpiricalJointPolicy( + Player player, std::vector* values) { + double best_action_value = -10; + Action best_action = kInvalidAction; + // NormalFormCorrelationDevice mu = GetEmpiricalJointPolicy(); + double total_weight = std::accumulate(cumulative_joint_policy_.begin(), + cumulative_joint_policy_.end(), 0.0); + + for (Action a = 0; a < num_actions_; ++a) { + double value = 0.0; + for (int idx = 0; idx < num_joint_actions_; ++idx) { + // std::vector joint_action = mu[idx].actions; + std::vector joint_action = IndexToJointAction(idx); + joint_action[player] = a; + int new_ja_idx = JointActionToIndex(joint_action); + // value += + // mu[idx].probability * cached_joint_utilities_[player][new_ja_idx]; + value += (cumulative_joint_policy_[idx] * + cached_joint_utilities_[player][new_ja_idx]); + } + value /= total_weight; + if (values != nullptr) { + (*values)[a] = value; + } + if (value > best_action_value) { + best_action_value = value; + best_action = a; + } + } + + return best_action; +} + +Action FictitiousPlayProcess::BestResponseAgainstEmpiricalMarginalizedPolicies( + Player player, std::vector* values) { + TabularPolicy marginalized_joint_policy; + GetMarginalJointPolicyFromEmpirical(&marginalized_joint_policy); + best_response_computers_[player]->SetPolicy(&marginalized_joint_policy); + TabularPolicy br = best_response_computers_[player]->GetBestResponsePolicy(); + Action br_action = GetAction(br.GetStatePolicy(infostate_strings_[player])); + if (values != nullptr) { + std::vector> action_vals = + best_response_computers_[player]->BestResponseActionValues( + infostate_strings_[player]); + values->resize(action_vals.size()); + for (const auto& iter : action_vals) { + (*values)[iter.first] = iter.second; + } + } + return br_action; +} + +void FictitiousPlayProcess::CheckJointUtilitiesCache() { + if (cached_joint_utilities_.empty()) { + cached_joint_utilities_.reserve(num_players_); + for (Player p = 0; p < num_players_; ++p) { + cached_joint_utilities_.push_back( + std::vector(num_joint_actions_, 0)); + } + + for (int ja_idx = 0; ja_idx < num_joint_actions_; ++ja_idx) { + std::vector joint_action = IndexToJointAction(ja_idx); + std::unique_ptr state = game_->NewInitialState(); + for (Action action : joint_action) { + state->ApplyAction(action); + } + SPIEL_CHECK_TRUE(state->IsTerminal()); + std::vector returns = state->Returns(); + for (Player p = 0; p < num_players_; ++p) { + cached_joint_utilities_[p][ja_idx] = returns[p]; + } + } + } +} + +double FictitiousPlayProcess::CCEDist() const { + double dist = 0; + std::vector max_deviation(num_players_, -10); + std::vector exp_values(num_players_, 0); + NormalFormCorrelationDevice corr_dev = GetEmpiricalJointPolicy(); + + // First compute expected values for everyone + for (int ja_idx = 0; ja_idx < num_joint_actions_; ++ja_idx) { + if (corr_dev[ja_idx].probability > 0) { + for (Player p = 0; p < num_players_; ++p) { + exp_values[p] += + corr_dev[ja_idx].probability * cached_joint_utilities_[p][ja_idx]; + } + } + } + + // Now for each player, find the maximal deviation + for (Player p = 0; p < num_players_; ++p) { + for (Action a = 0; a < num_actions_; ++a) { + double action_value = 0; + for (int ja_idx = 0; ja_idx < num_joint_actions_; ++ja_idx) { + if (corr_dev[ja_idx].probability > 0) { + // Player p is changing to choose a instead. + std::vector joint_action = IndexToJointAction(ja_idx); + joint_action[p] = a; + int other_index = JointActionToIndex(joint_action); + + action_value += corr_dev[ja_idx].probability * + cached_joint_utilities_[p][other_index]; + } + } + if (action_value > max_deviation[p]) { + max_deviation[p] = action_value; + } + } + } + + for (Player p = 0; p < num_players_; ++p) { + double delta = std::max(max_deviation[p] - exp_values[p], 0.0); + SPIEL_CHECK_GE(delta, 0); + dist += delta; + } + + return dist; +} + +double FictitiousPlayProcess::NashConv() const { + TabularPolicy marginalized_policy; + GetMarginalJointPolicyFromEmpirical(&marginalized_policy); + return open_spiel::algorithms::NashConv(*game_, marginalized_policy); + // TabularPolicy marginalized_policy; + // GetMarginalJointPolicy(&marginalized_policy); + // return open_spiel::algorithms::NashConv(*game_, marginalized_policy); +} + +int FictitiousPlayProcess::JointActionToIndex( + const std::vector& joint_action) const { + // Convert to a number from base num_actions_ + int index = 0; + int digit_value = 1; + for (int i = 0; i < joint_action.size(); ++i) { + index += joint_action[i] * digit_value; + digit_value *= num_actions_; + } + SPIEL_CHECK_LT(index, num_joint_actions_); + return index; +} + +std::vector FictitiousPlayProcess::IndexToJointAction(int index) const { + // Convert to a number in base num_actions_ + std::vector joint_action(num_players_, kInvalidAction); + for (int i = 0; i < num_players_; ++i) { + joint_action[i] = index % num_actions_; + index /= num_actions_; + } + SPIEL_CHECK_EQ(index, 0); + return joint_action; +} + +void FictitiousPlayProcess::IBRIteration() { + absl::Time start = absl::Now(); + + // Compute the joint policy. + GetMarginalJointPolicy(&joint_policy_); + + // Get each player's best response, and add it to the cumulative policy. + std::vector br_policies(num_players_); + + std::vector joint_action(num_players_, kInvalidAction); + + for (Player p = 0; p < num_players_; ++p) { + best_response_computers_[p]->SetPolicy(&joint_policy_); + br_policies[p] = best_response_computers_[p]->GetBestResponsePolicy(); + Action br_action = + GetAction(br_policies[p].GetStatePolicy(infostate_strings_[p])); + SPIEL_CHECK_TRUE(br_action != kInvalidAction); + std::fill(cumulative_policies_[p].begin(), cumulative_policies_[p].end(), + 0); + cumulative_policies_[p][br_action] = 1.0; + joint_action[p] = br_action; + } + + std::fill(cumulative_joint_policy_.begin(), cumulative_joint_policy_.end(), + 0.0); + cumulative_joint_policy_[JointActionToIndex(joint_action)] = 1.0; + + iterations_++; + total_time_ += absl::Now() - start; +} + +void FictitiousPlayProcess::MaxEntIBRIteration() { + absl::Time start = absl::Now(); + + // Compute the joint policy. + GetMarginalJointPolicy(&joint_policy_); + + // Get each player's best response, and add it to the cumulative policy. + std::vector br_policies(num_players_); + + std::vector> policies; + policies.reserve(num_players_); + + for (Player p = 0; p < num_players_; ++p) { + best_response_computers_[p]->SetPolicy(&joint_policy_); + br_policies[p] = best_response_computers_[p]->GetBestResponsePolicy(); + std::vector br_actions = + best_response_computers_[p]->BestResponseActions(infostate_strings_[p], + 1e-10); + SPIEL_CHECK_GT(br_actions.size(), 0); + std::fill(cumulative_policies_[p].begin(), cumulative_policies_[p].end(), + 0); + + std::vector br(num_actions_, 0); + + for (Action action : br_actions) { + double prob = 1.0 / br_actions.size(); + cumulative_policies_[p][action] = prob; + br[action] = prob; + } + + policies.push_back(br); + } + + // Update empirical cumulative dist with these mixed policies + std::fill(cumulative_joint_policy_.begin(), cumulative_joint_policy_.end(), + 0.0); + UpdateCumulativeJointPolicy(policies); + + iterations_++; + total_time_ += absl::Now() - start; +} + +void FictitiousPlayProcess::FullFPIteration() { + absl::Time start = absl::Now(); + + std::vector joint_action(num_players_, kInvalidAction); + + for (Player p = 0; p < num_players_; ++p) { + // Action br_action = BestResponseAgainstEmpiricalMarginalizedPolicies(p); + Action br_action = BestResponseAgainstEmpiricalJointPolicy(p); + SPIEL_CHECK_TRUE(br_action != kInvalidAction); + cumulative_policies_[p][br_action] += 1.0; + joint_action[p] = br_action; + } + + cumulative_joint_policy_[JointActionToIndex(joint_action)] += 1.0; + + iterations_++; + total_time_ += absl::Now() - start; +} + +void FictitiousPlayProcess::SFPIteration(double lambda) { + absl::Time start = absl::Now(); + + std::vector> softmax_brs; + softmax_brs.reserve(num_players_); + + for (Player p = 0; p < num_players_; ++p) { + std::vector values(num_actions_, 0); + // BestResponseAgainstEmpiricalMarginalizedPolicies(p, &values); + BestResponseAgainstEmpiricalJointPolicy(p, &values); + + std::vector softmax_br = Softmax(values, lambda); + softmax_brs.push_back(softmax_br); + } + + for (Player p = 0; p < num_players_; ++p) { + for (int i = 0; i < softmax_brs[p].size(); ++i) { + cumulative_policies_[p][i] += softmax_brs[p][i]; + } + } + + // Update empirical cumulative dist with these mixed policies + UpdateCumulativeJointPolicy(softmax_brs); + + iterations_++; + total_time_ += absl::Now() - start; +} + +// This is FP+SBR in the paper, samples the base profiles from the average +// strategy. +void FictitiousPlayProcess::SBRIteration(int num_base_samples, + int num_candidates) { + absl::Time start = absl::Now(); + + std::vector joint_action(num_players_, kInvalidAction); + + // Sample the base profiles: number by player + std::vector> base_samples; + base_samples.reserve(num_base_samples); + for (int i = 0; i < num_base_samples; ++i) { + std::vector base_profile; + base_profile.reserve(num_players_); + int past_idx = static_cast(dist_(rng_) * past_policies_.size()); + for (Player p = 0; p < num_players_; ++p) { + base_profile.push_back( + SampleAction(past_policies_[past_idx][p], dist_(rng_)).first); + } + base_samples.push_back(base_profile); + } + + // Each player computes a sampled BR. + for (Player p = 0; p < num_players_; ++p) { + double max_return = -std::numeric_limits::infinity(); + Action best_candidate = kInvalidAction; + + for (int i = 0; i < num_candidates; ++i) { + Action sampled_candidate = + absl::Uniform(rng_, 0u, static_cast(num_actions_)); + // Compute the action's expectation. + double return_sum = 0.0; + for (const std::vector& base_joint_action : base_samples) { + std::vector joint_action = base_joint_action; + joint_action[p] = sampled_candidate; + std::unique_ptr state = game_->NewInitialState(); + // Turn-based simultaneous game, so must apply them in order. + for (Player pp = 0; pp < num_players_; ++pp) { + state->ApplyAction(joint_action[pp]); + } + return_sum += state->PlayerReturn(p); + } + if (return_sum / num_base_samples > max_return) { + max_return = return_sum / num_base_samples; + best_candidate = sampled_candidate; + } + } + + SPIEL_CHECK_TRUE(best_candidate != kInvalidAction); + cumulative_policies_[p][best_candidate] += 1.0; + joint_action[p] = best_candidate; + } + + // Add to past policies + std::vector new_policy; + new_policy.reserve(num_players_); + for (Player p = 0; p < num_players_; ++p) { + new_policy.push_back({{joint_action[p], 1.0}}); + } + past_policies_.push_back(new_policy); + + cumulative_joint_policy_[JointActionToIndex(joint_action)] += 1.0; + + iterations_++; + total_time_ += absl::Now() - start; +} + +void FictitiousPlayProcess::AddWeight(ActionsAndProbs* policy, Action action, + double weight) const { + double prob = std::max(0.0, GetProb(*policy, action)); + SetProb(policy, action, prob + weight); +} + +std::vector FictitiousPlayProcess::SampleBaseProfile( + BaseSamplerType sampler_type) { + std::vector base_profile; + base_profile.reserve(num_players_); + + if (sampler_type == BaseSamplerType::kBaseUniform) { + int past_idx = static_cast(dist_(rng_) * past_policies_.size()); + for (Player p = 0; p < num_players_; ++p) { + base_profile.push_back( + SampleAction(past_policies_[past_idx][p], dist_(rng_)).first); + } + return base_profile; + } else if (sampler_type == BaseSamplerType::kBaseLatest) { + int past_idx = past_policies_.size() - 1; + for (Player p = 0; p < num_players_; ++p) { + base_profile.push_back( + SampleAction(past_policies_[past_idx][p], dist_(rng_)).first); + } + return base_profile; + } else { + SpielFatalError("Base sampling method unrecognized."); + } +} + +Action FictitiousPlayProcess::SampleCandidate( + Player player, CandidatesSamplerType sampler_type) { + if (sampler_type == CandidatesSamplerType::kCandidatesInitial) { + return absl::Uniform(rng_, 0u, static_cast(num_actions_)); + } else if (sampler_type == CandidatesSamplerType::kCandidatesUniform || + sampler_type == CandidatesSamplerType::kCandidatesLatest) { + int past_idx = + sampler_type == CandidatesSamplerType::kCandidatesUniform + ? absl::Uniform(rng_, 0u, + static_cast(past_policies_.size())) + : past_policies_.size() - 1; + return SampleAction(past_policies_[past_idx][player], dist_(rng_)).first; + } else if (sampler_type == CandidatesSamplerType::kCandidatesInitialUniform) { + int bit = absl::Uniform(rng_, 0u, 2u); + if (bit == 0) { + return SampleCandidate(player, CandidatesSamplerType::kCandidatesInitial); + } else { + return SampleCandidate(player, CandidatesSamplerType::kCandidatesUniform); + } + } else if (sampler_type == CandidatesSamplerType::kCandidatesInitialLatest) { + int bit = absl::Uniform(rng_, 0u, 2u); + if (bit == 0) { + return SampleCandidate(player, CandidatesSamplerType::kCandidatesInitial); + } else { + return SampleCandidate(player, CandidatesSamplerType::kCandidatesLatest); + } + } else { + SpielFatalError("Candidate sampling method unrecognized."); + } +} + +std::vector> FictitiousPlayProcess::SampleBaseProfiles( + BaseSamplerType sampler_type, int num_base_samples) { + std::vector> base_samples; + base_samples.reserve(num_base_samples); + for (int i = 0; i < num_base_samples; ++i) { + base_samples.push_back(SampleBaseProfile(sampler_type)); + } + return base_samples; +} + +Action FictitiousPlayProcess::GetBestCandidate( + Player player, const std::vector>& base_samples, + int num_candidates, CandidatesSamplerType sampler_type) { + std::vector best_candidates; + double max_return = -std::numeric_limits::infinity(); + + for (int i = 0; i < num_candidates; ++i) { + Action sampled_candidate = SampleCandidate(player, sampler_type); + // Compute the action's expectation. + double return_sum = 0.0; + for (const std::vector& base_joint_action : base_samples) { + std::vector ja_prime = base_joint_action; + ja_prime[player] = sampled_candidate; + int ja_idx = JointActionToIndex(ja_prime); + return_sum += cached_joint_utilities_[player][ja_idx]; + } + + // Consider values within [ -kTieTolerance, kTieTolerance ] as tied. + if (return_sum > max_return + kTieTolerance) { + max_return = return_sum; + best_candidates = {sampled_candidate}; + } else if (return_sum > max_return - kTieTolerance) { + best_candidates.push_back(sampled_candidate); + } + } + + SPIEL_CHECK_GE(best_candidates.size(), 0); + if (best_candidates.size() == 1) { + return best_candidates[0]; + } else { + int idx = absl::Uniform(rng_, 0u, + static_cast(best_candidates.size())); + return best_candidates[idx]; + } +} + +// pi_i^t = \frac{1}{N} \sum_{n = 1}^N 1(a_i), where a_i ~ SBR(\pi_b, \pi_c). +void FictitiousPlayProcess::BRPIIteration( + BaseSamplerType base_sampling, CandidatesSamplerType candidates_sampling, + int num_base_samples, int num_candidates, int brpi_N) { + absl::Time start = absl::Now(); + + // Clear policy counts + std::fill(current_joint_policy_counts_.begin(), + current_joint_policy_counts_.end(), 0.0); + + // N trials + for (int n = 0; n < brpi_N; ++n) { + std::vector joint_action(num_players_, kInvalidAction); + + // Sample the base profiles: number by player + std::vector> base_samples = + SampleBaseProfiles(base_sampling, num_base_samples); + + // Each player computes a sampled BR. + for (Player p = 0; p < num_players_; ++p) { + Action best_candidate = GetBestCandidate(p, base_samples, num_candidates, + candidates_sampling); + joint_action[p] = best_candidate; + } + current_joint_policy_counts_[JointActionToIndex(joint_action)] += 1.0; + + // End of trial. + } + + // Apply the 1/N to the emprical estimate of the joint policy and add them to + // the past policies. + std::vector policies(num_players_); + for (int ja_idx = 0; ja_idx < num_joint_actions_; ++ja_idx) { + if (current_joint_policy_counts_[ja_idx] > 0) { + double weight = current_joint_policy_counts_[ja_idx] / brpi_N; + std::vector joint_action = IndexToJointAction(ja_idx); + for (Player p = 0; p < num_players_; ++p) { + AddWeight(&policies[p], joint_action[p], weight); + } + } + } + past_policies_.push_back(policies); + + std::fill(cumulative_joint_policy_.begin(), cumulative_joint_policy_.end(), + 0.0); + for (int ja_idx = 0; ja_idx < num_joint_actions_; ++ja_idx) { + std::vector joint_action = IndexToJointAction(ja_idx); + double joint_prob = 1.0; + for (Player p = 0; p < num_players_ && joint_prob > 0; ++p) { + double prob = std::max(0.0, GetProb(policies[p], joint_action[p])); + if (prob == 0) { + joint_prob = 0; + } else { + joint_prob *= prob; + } + } + if (joint_prob > 0) { + cumulative_joint_policy_[ja_idx] = joint_prob; + } + } + + iterations_++; + total_time_ += absl::Now() - start; +} + +} // namespace blotto_fp +} // namespace algorithms +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/sbr_blotto/fictitious_play.h b/scenarios/bargaining/open_spiel/open_spiel/examples/sbr_blotto/fictitious_play.h new file mode 100644 index 0000000..58f50f7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/sbr_blotto/fictitious_play.h @@ -0,0 +1,162 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_EXAMPLES_SBR_BLOTTO_FICTITIOUS_PLAY_H_ +#define OPEN_SPIEL_EXAMPLES_SBR_BLOTTO_FICTITIOUS_PLAY_H_ + +#include + +#include "open_spiel/abseil-cpp/absl/random/uniform_int_distribution.h" +#include "open_spiel/abseil-cpp/absl/random/uniform_real_distribution.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/algorithms/best_response.h" +#include "open_spiel/algorithms/corr_dist.h" +#include "open_spiel/algorithms/tabular_exploitability.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace algorithms { +namespace blotto_fp { + +enum class BaseSamplerType { + kBaseUniform, // Uniform past policy + kBaseLatest, +}; + +enum class CandidatesSamplerType { + kCandidatesInitial, // Initial in Blotto is the uniform random policy. + kCandidatesUniform, // This is uniform past policy + kCandidatesLatest, + kCandidatesInitialUniform, + kCandidatesInitialLatest, +}; + +// Returns a policy over the action sequence (0, 1, ..., num_actions - 1) +ActionsAndProbs UniformSequencePolicy(int num_actions); + +// Returns a random policy over the action sequence (0, 1, ..., num_actions - 1) +// generated by normalizing random numbers. +ActionsAndProbs RandomStatePolicy(int num_actions, + const std::function& rng); + +class FictitiousPlayProcess { + public: + FictitiousPlayProcess(std::shared_ptr game, int seed, + bool randomize_initial_policies); + + // Get the marginalized joint policy from the marginalized counts being + // maintained in weight tables (defaults to cumulative_policies_ if nullptr, + // which is marginalized separately each iteration). Weight table is indexed + // by (player, action). + void GetMarginalJointPolicy( + TabularPolicy* policy, + const std::vector>* weight_table = nullptr) const; + + // Get the marginalized joint policy by marginalizing the empirical joint + // policy. + void GetMarginalJointPolicyFromEmpirical(TabularPolicy* policy) const; + + void FullFPIteration(); + void SFPIteration(double lambda); + void SBRIteration(int num_base_samples, int num_candidates); + void IBRIteration(); + void MaxEntIBRIteration(); + void BRPIIteration(BaseSamplerType base_sampling, + CandidatesSamplerType candidates_sampling, + int num_base_samples, int num_candidates, int brpi_N); + + TabularPolicy GetMarginalJointPolicy() { + GetMarginalJointPolicy(&joint_policy_); + return joint_policy_; + } + + TabularPolicy GetLatestPolicy() const; + + absl::Duration TotalTime() const { return total_time_; } + + double NashConv() const; + double CCEDist() const; + + private: + void InitPolicies(); + void InitPoliciesRandom(); + int Iterations() const { return iterations_; } + std::vector Softmax(const std::vector& values, + double lambda) const; + + int JointActionToIndex(const std::vector& joint_action) const; + std::vector IndexToJointAction(int index) const; + + NormalFormCorrelationDevice GetEmpiricalJointPolicy() const; + + // Add appropriate weights given each players' (potentially mixed) policy + void UpdateCumulativeJointPolicy( + const std::vector>& policies); + void UpdateCumulativeJointPolicySampled( + const std::vector>& policies, int num_samples); + + void AddWeight(ActionsAndProbs* policy, Action action, double weight) const; + Action BestResponseAgainstEmpiricalJointPolicy( + Player player, std::vector* values = nullptr); + Action BestResponseAgainstEmpiricalMarginalizedPolicies( + Player player, std::vector* values = nullptr); + + void CheckJointUtilitiesCache(); + + std::vector SampleBaseProfile(BaseSamplerType sampler_type); + Action SampleCandidate(Player player, CandidatesSamplerType sampler_type); + std::vector> SampleBaseProfiles( + BaseSamplerType sampler_type, int num_base_samples); + Action GetBestCandidate(Player player, + const std::vector>& base_samples, + int num_candidates, + CandidatesSamplerType sampler_type); + + std::mt19937 rng_; + absl::uniform_real_distribution dist_; + + std::shared_ptr game_; + int num_players_; + int num_actions_; + std::vector infostate_strings_; + + int iterations_; + TabularPolicy joint_policy_; + std::vector> cumulative_policies_; + std::vector> best_response_computers_; + + int num_joint_actions_; + + // Histogram of sampled joint actions. + std::vector current_joint_policy_counts_; + // Each player's policy: time step by player + std::vector> past_policies_; + + // Joint average strategy. Index is an encoding of the joint action in base + // NumDistinctActions (so this vector has size NumDistinctActions^players). + std::vector cumulative_joint_policy_; + + // Player by joint index + std::vector> cached_joint_utilities_; + + absl::Duration total_time_; +}; + +} // namespace blotto_fp +} // namespace algorithms +} // namespace open_spiel + +#endif // OPEN_SPIEL_EXAMPLES_SBR_BLOTTO_FICTITIOUS_PLAY_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/sbr_blotto/sbr_blotto_main.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/sbr_blotto/sbr_blotto_main.cc new file mode 100644 index 0000000..b90f4f1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/sbr_blotto/sbr_blotto_main.cc @@ -0,0 +1,197 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/examples/sbr_blotto/fictitious_play.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/file.h" +#include "open_spiel/utils/init.h" + +ABSL_FLAG(int, players, 2, "Number of players."); +ABSL_FLAG(int, coins, 10, "Number of coins to place."); +ABSL_FLAG(int, fields, 3, "Number of coins to place."); + +ABSL_FLAG(int, seed, 82368234, "Seed for the random number generator."); +ABSL_FLAG(int, iterations, -1, "Number of iterations."); +ABSL_FLAG(std::string, algorithm, "fp", + "Algorithm to run (fp|sbr|ibr|meibr|sfp|brpi)"); +ABSL_FLAG(int, sbr_b, 10, "Number of base profiles in SBR"); +ABSL_FLAG(int, sbr_c, 25, "Number of candidates in SBR"); +ABSL_FLAG(int, brpi_n, 1000, "N in BRPI"); +ABSL_FLAG(std::string, base_sampler, "uniform", "Base sampler type."); +ABSL_FLAG(std::string, candidates_sampler, "initial", "Cand. sampler type."); +ABSL_FLAG(double, lambda, 1.0, "Lambda for the softmax"); +ABSL_FLAG(std::string, logdirpref, "/tmp", "Log prefix"); +ABSL_FLAG(std::string, run_name, "run", "Run name"); +ABSL_FLAG(bool, enable_log, true, "Whether to enable logging?"); +ABSL_FLAG(std::string, game, "", "Game string override (if not blotto)"); +ABSL_FLAG(bool, randomize_initial_policies, false, + "Arbitrary initial policies?"); + +using open_spiel::algorithms::blotto_fp::BaseSamplerType; +using open_spiel::algorithms::blotto_fp::CandidatesSamplerType; +using open_spiel::algorithms::blotto_fp::FictitiousPlayProcess; + +BaseSamplerType GetBaseSamplerType(const std::string& str) { + if (str == "uniform") { + return BaseSamplerType::kBaseUniform; + } else if (str == "latest") { + return BaseSamplerType::kBaseLatest; + } else { + open_spiel::SpielFatalError("Unrecognized base sampler type."); + } +} + +CandidatesSamplerType GetCandidatesSamplerType(const std::string& str) { + if (str == "initial") { + return CandidatesSamplerType::kCandidatesInitial; + } else if (str == "uniform") { + return CandidatesSamplerType::kCandidatesUniform; + } else if (str == "latest") { + return CandidatesSamplerType::kCandidatesLatest; + } else if (str == "mixedIU") { + return CandidatesSamplerType::kCandidatesInitialUniform; + } else if (str == "mixedIL") { + return CandidatesSamplerType::kCandidatesInitialLatest; + } else { + open_spiel::SpielFatalError("Unrecognized candidates sampler type."); + } +} + +int main(int argc, char** argv) { + open_spiel::Init(argv[0], &argc, &argv, /*remove_flags=*/true); + + int players = absl::GetFlag(FLAGS_players); + int coins = absl::GetFlag(FLAGS_coins); + int fields = absl::GetFlag(FLAGS_fields); + + absl::ParseCommandLine(argc, argv); + + std::string game_string = ""; + if (absl::GetFlag(FLAGS_game).empty()) { + game_string = absl::StrCat( + "turn_based_simultaneous_game(game=blotto(players=", players, + ",coins=", coins, ",fields=", fields, "))"); + } else { + game_string = absl::StrCat( + "turn_based_simultaneous_game(game=", absl::GetFlag(FLAGS_game), ")"); + } + + std::cout << "game string: " << game_string << std::endl; + std::shared_ptr game = + open_spiel::LoadGame(game_string); + SPIEL_CHECK_TRUE(game != nullptr); + + std::cout << "NumDistinctActions: " << game->NumDistinctActions() + << std::endl; + std::cout << "Size of game: " << std::fixed + << std::pow(game->NumDistinctActions(), game->NumPlayers()) + << std::endl; + + FictitiousPlayProcess fpp(game, absl::GetFlag(FLAGS_seed), + absl::GetFlag(FLAGS_randomize_initial_policies)); + int iterations = absl::GetFlag(FLAGS_iterations); + std::string algo = absl::GetFlag(FLAGS_algorithm); + int sbr_num_base = absl::GetFlag(FLAGS_sbr_b); + int sbr_num_candidates = absl::GetFlag(FLAGS_sbr_c); + int brpi_n = absl::GetFlag(FLAGS_brpi_n); + double lambda = absl::GetFlag(FLAGS_lambda); + BaseSamplerType base_sampler_type = + GetBaseSamplerType(absl::GetFlag(FLAGS_base_sampler)); + CandidatesSamplerType candidates_sampler_type = + GetCandidatesSamplerType(absl::GetFlag(FLAGS_candidates_sampler)); + + std::unique_ptr logfile = nullptr; + if (absl::GetFlag(FLAGS_enable_log)) { + std::cout << "Opening log file.." << std::endl; + + std::string dir = absl::StrCat(absl::GetFlag(FLAGS_logdirpref), "/", + absl::GetFlag(FLAGS_run_name)); + + std::string filename = absl::StrCat(dir, "/blotto_", players, "_", coins, + "_", fields, "_", algo); + + if (absl::GetFlag(FLAGS_randomize_initial_policies)) { + absl::StrAppend(&filename, "_rip"); + } + + absl::StrAppend(&filename, "_seed", absl::GetFlag(FLAGS_seed)); + + if (algo == "sbr") { + absl::StrAppend(&filename, "_", sbr_num_base, "_", sbr_num_candidates); + } else if (algo == "sfp") { + absl::StrAppend(&filename, "_lambda", lambda); + } else if (algo == "brpi") { + absl::StrAppend(&filename, "_", sbr_num_base, "_", sbr_num_candidates, + "_", absl::GetFlag(FLAGS_base_sampler), "_", + absl::GetFlag(FLAGS_candidates_sampler)); + } + + if (!open_spiel::file::Exists(dir)) { + std::cout << "Creating log directory " << dir << std::endl; + SPIEL_CHECK_TRUE(open_spiel::file::Mkdir(dir)); + } + + logfile = std::make_unique(filename, "w"); + } + + std::cout << "Starting." << std::endl; + + int next_br_iter = 1; + for (int i = 1; i < iterations || iterations < 0; ++i) { + if (algo == "fp") { + fpp.FullFPIteration(); + } else if (algo == "sbr") { + fpp.SBRIteration(sbr_num_base, sbr_num_candidates); + } else if (algo == "ibr") { + fpp.IBRIteration(); + } else if (algo == "meibr") { + fpp.MaxEntIBRIteration(); + } else if (algo == "sfp") { + fpp.SFPIteration(lambda); + } else if (algo == "brpi") { + fpp.BRPIIteration(base_sampler_type, candidates_sampler_type, + sbr_num_base, sbr_num_candidates, brpi_n); + } else { + std::cerr << "Unrecognized algorithm. Exiting..."; + exit(-1); + } + + if (i == next_br_iter) { + double cce_dist = fpp.CCEDist(); + double nash_conv = fpp.NashConv(); + + absl::Duration total_time = fpp.TotalTime(); + + std::string outline = + absl::StrCat(i, " ", absl::ToDoubleSeconds(total_time), " ", cce_dist, + " ", nash_conv, "\n"); + std::cout << outline; + if (logfile != nullptr) { + SPIEL_CHECK_TRUE(logfile->Write(outline)); + SPIEL_CHECK_TRUE(logfile->Flush()); + } + + next_br_iter *= 2; + } + } +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/shared_library_example.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/shared_library_example.cc new file mode 100644 index 0000000..3e73a90 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/shared_library_example.cc @@ -0,0 +1,118 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a simple example that is used to demonstrate how to use OpenSpiel +// when it is built as a shared library. To use OpenSpiel as a library, +// see: https://github.com/deepmind/open_spiel/blob/master/docs/library.md + +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/uniform_int_distribution.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +void PrintLegalActions(const open_spiel::State& state, + open_spiel::Player player, + const std::vector& movelist) { + std::cerr << "Legal moves for player " << player << ":" << std::endl; + for (open_spiel::Action action : movelist) { + std::cerr << " " << state.ActionToString(player, action) << std::endl; + } +} + +int main(int argc, char** argv) { + if (argc < 2) { + std::cerr << "Usage shared_library_example " << std::endl; + return -1; + } + + // Print out registered games. + std::cerr << "Registered games:" << std::endl; + std::vector names = open_spiel::RegisteredGames(); + for (const std::string& name : names) { + std::cerr << name << std::endl; + } + + // Random number generator. + std::mt19937 rng(time(0)); + + // Load the game. + std::cerr << "Loading game..\n" << std::endl; + std::shared_ptr game = open_spiel::LoadGame(argv[1]); + + if (!game) { + std::cerr << "problem with loading game, exiting..." << std::endl; + return -1; + } + + std::cerr << "Starting new game..." << std::endl; + std::unique_ptr state = game->NewInitialState(); + + std::cerr << "Initial state:" << std::endl; + std::cerr << "State:" << std::endl << state->ToString() << std::endl; + + while (!state->IsTerminal()) { + std::cerr << "player " << state->CurrentPlayer() << std::endl; + + if (state->IsChanceNode()) { + // Chance node; sample one according to underlying distribution. + std::vector> outcomes = + state->ChanceOutcomes(); + open_spiel::Action action = open_spiel::SampleAction(outcomes, rng).first; + std::cerr << "sampled outcome: " + << state->ActionToString(open_spiel::kChancePlayerId, action) + << std::endl; + state->ApplyAction(action); + } else if (state->IsSimultaneousNode()) { + // open_spiel::Players choose simultaneously? + std::vector joint_action; + std::vector infostate(game->InformationStateTensorSize()); + + // Sample a action for each player + for (auto player = open_spiel::Player{0}; player < game->NumPlayers(); + ++player) { + std::vector actions = state->LegalActions(player); + PrintLegalActions(*state, player, actions); + + absl::uniform_int_distribution<> dis(0, actions.size() - 1); + open_spiel::Action action = actions[dis(rng)]; + joint_action.push_back(action); + std::cerr << "player " << player << " chose " + << state->ActionToString(player, action) << std::endl; + } + + state->ApplyActions(joint_action); + } else { + // Decision node, sample one uniformly. + auto player = state->CurrentPlayer(); + std::vector actions = state->LegalActions(); + PrintLegalActions(*state, player, actions); + + absl::uniform_int_distribution<> dis(0, actions.size() - 1); + auto action = actions[dis(rng)]; + std::cerr << "chose action: " << state->ActionToString(player, action) + << std::endl; + state->ApplyAction(action); + } + + std::cerr << "State: " << std::endl << state->ToString() << std::endl; + } + + auto returns = state->Returns(); + for (auto p = open_spiel::Player{0}; p < game->NumPlayers(); p++) { + std::cerr << "Final return to player " << p << " is " << returns[p] + << std::endl; + } +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/tabular_q_learning_example.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/tabular_q_learning_example.cc new file mode 100644 index 0000000..8378761 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/tabular_q_learning_example.cc @@ -0,0 +1,144 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/algorithms/tabular_q_learning.h" +#include "open_spiel/games/tic_tac_toe/tic_tac_toe.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +using open_spiel::Action; +using open_spiel::Game; +using open_spiel::Player; +using open_spiel::State; + +Action GetOptimalAction( + absl::flat_hash_map, double> q_values, + const std::unique_ptr& state) { + std::vector legal_actions = state->LegalActions(); + Action optimal_action = open_spiel::kInvalidAction; + + double value = -1; + for (const Action& action : legal_actions) { + double q_val = q_values[{state->ToString(), action}]; + if (q_val >= value) { + value = q_val; + optimal_action = action; + } + } + return optimal_action; +} + +void SolveTicTacToe() { + std::shared_ptr game = open_spiel::LoadGame("tic_tac_toe"); + open_spiel::algorithms::TabularQLearningSolver tabular_q_learning_solver( + game); + + int iter = 100000; + while (iter-- > 0) { + tabular_q_learning_solver.RunIteration(); + } + + const absl::flat_hash_map, double>& q_values = + tabular_q_learning_solver.GetQValueTable(); + std::unique_ptr state = game->NewInitialState(); + while (!state->IsTerminal()) { + Action optimal_action = GetOptimalAction(q_values, state); + state->ApplyAction(optimal_action); + } + + // Tie. + SPIEL_CHECK_EQ(state->Rewards()[0], 0); + SPIEL_CHECK_EQ(state->Rewards()[1], 0); +} + +void SolveTicTacToeEligibilityTraces() { + std::shared_ptr game = open_spiel::LoadGame("tic_tac_toe"); + open_spiel::algorithms::TabularQLearningSolver + tabular_q_learning_solver_lambda00(game, -1.0, 0.0001, 0.01, 0.99, 0.0); + open_spiel::algorithms::TabularQLearningSolver + tabular_q_learning_solver_lambda01(game, -1.0, 0.0001, 0.001, 0.99, 0.1); + + int count_tie_games_lambda00 = 0; + int count_tie_games_lambda01 = 0; + for (int i = 1; i < 10000; i++) { + tabular_q_learning_solver_lambda00.RunIteration(); + + const absl::flat_hash_map, double>& + q_values_lambda00 = tabular_q_learning_solver_lambda00.GetQValueTable(); + std::unique_ptr state = game->NewInitialState(); + + while (!state->IsTerminal()) { + state->ApplyAction(GetOptimalAction(q_values_lambda00, state)); + } + + count_tie_games_lambda00 += state->Rewards()[0] == 0 ? 1 : 0; + } + + for (int i = 1; i < 10000; i++) { + tabular_q_learning_solver_lambda01.RunIteration(); + + const absl::flat_hash_map, double>& + q_values_lambda01 = tabular_q_learning_solver_lambda01.GetQValueTable(); + std::unique_ptr state = game->NewInitialState(); + + while (!state->IsTerminal()) { + state->ApplyAction(GetOptimalAction(q_values_lambda01, state)); + } + + count_tie_games_lambda01 += state->Rewards()[0] == 0 ? 1 : 0; + } + + // Q-Learning(0.1) gets equilibrium faster than Q-Learning(0.0). + // More ties in the same amount of time. + SPIEL_CHECK_GT(count_tie_games_lambda01, count_tie_games_lambda00); +} + +void SolveCatch() { + std::shared_ptr game = open_spiel::LoadGame("catch"); + open_spiel::algorithms::TabularQLearningSolver tabular_q_learning_solver( + game); + + int training_iter = 100000; + while (training_iter-- > 0) { + tabular_q_learning_solver.RunIteration(); + } + const absl::flat_hash_map, double>& q_values = + tabular_q_learning_solver.GetQValueTable(); + + int eval_iter = 1000; + int total_reward = 0; + while (eval_iter-- > 0) { + std::unique_ptr state = game->NewInitialState(); + while (!state->IsTerminal()) { + Action optimal_action = GetOptimalAction(q_values, state); + state->ApplyAction(optimal_action); + total_reward += state->Rewards()[0]; + } + } + + SPIEL_CHECK_GT(total_reward, 0); +} + +int main(int argc, char** argv) { + SolveTicTacToe(); + SolveTicTacToeEligibilityTraces(); + SolveCatch(); + return 0; +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/tabular_sarsa_example.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/tabular_sarsa_example.cc new file mode 100644 index 0000000..87af6fe --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/tabular_sarsa_example.cc @@ -0,0 +1,154 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/algorithms/tabular_sarsa.h" +#include "open_spiel/games/tic_tac_toe/tic_tac_toe.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +using open_spiel::Action; +using open_spiel::Game; +using open_spiel::Player; +using open_spiel::State; + +Action GetOptimalAction( + absl::flat_hash_map, double> q_values, + const std::unique_ptr& state) { + std::vector legal_actions = state->LegalActions(); + const auto state_str = state->ToString(); + + Action optimal_action = open_spiel::kInvalidAction; + double value = -1; + for (const Action& action : legal_actions) { + double q_val = q_values[{state_str, action}]; + if (q_val >= value) { + value = q_val; + optimal_action = action; + } + } + return optimal_action; +} + +void SolveTicTacToe() { + std::shared_ptr game = open_spiel::LoadGame("tic_tac_toe"); + open_spiel::algorithms::TabularSarsaSolver tabular_sarsa_solver(game); + + int iter = 100000; + while (iter-- > 0) { + tabular_sarsa_solver.RunIteration(); + } + + const absl::flat_hash_map, double>& q_values = + tabular_sarsa_solver.GetQValueTable(); + std::unique_ptr state = game->NewInitialState(); + while (!state->IsTerminal()) { + Action optimal_action = GetOptimalAction(q_values, state); + state->ApplyAction(optimal_action); + } + + // Tie. + SPIEL_CHECK_EQ(state->Rewards()[0], 0); + SPIEL_CHECK_EQ(state->Rewards()[1], 0); +} + +void SolveTicTacToeEligibilityTraces() { + std::shared_ptr game = open_spiel::LoadGame("tic_tac_toe"); + open_spiel::algorithms::TabularSarsaSolver tabular_sarsa_solver_lambda00( + /*game=*/game, + /*depth_limit=*/-1.0, + /*epsilon=*/0.1, + /*learning_rate=*/0.01, + /*discount_factor=*/0.99, + /*lambda=*/0.0); + open_spiel::algorithms::TabularSarsaSolver tabular_sarsa_solver_lambda03( + /*game=*/game, + /*depth_limit=*/-1.0, + /*epsilon=*/0.1, + /*learning_rate=*/0.01, + /*discount_factor=*/0.99, + /*lambda=*/0.3); + + const int runs = 1000; + int count_tie_games_lambda00 = 0; + int count_tie_games_lambda03 = 0; + for (int i = 0; i < runs; ++i) { + tabular_sarsa_solver_lambda00.RunIteration(); + + const absl::flat_hash_map, double> + &q_values_lambda00 = tabular_sarsa_solver_lambda00.GetQValueTable(); + std::unique_ptr state = game->NewInitialState(); + + while (!state->IsTerminal()) { + state->ApplyAction(GetOptimalAction(q_values_lambda00, state)); + } + + count_tie_games_lambda00 += state->Rewards()[0] == 0 ? 1 : 0; + } + + for (int i = 0; i < runs; ++i) { + tabular_sarsa_solver_lambda03.RunIteration(); + + const absl::flat_hash_map, double> + &q_values_lambda01 = tabular_sarsa_solver_lambda03.GetQValueTable(); + std::unique_ptr state = game->NewInitialState(); + + while (!state->IsTerminal()) { + state->ApplyAction(GetOptimalAction(q_values_lambda01, state)); + } + + count_tie_games_lambda03 += state->Rewards()[0] == 0 ? 1 : 0; + } + + // SARSA(0.3) gets equilibrium faster than SARSA(0.0). More ties in the same + // amount of time. + SPIEL_CHECK_GT(count_tie_games_lambda03, count_tie_games_lambda00); +} + +void SolveCatch() { + std::shared_ptr game = open_spiel::LoadGame("catch"); + open_spiel::algorithms::TabularSarsaSolver tabular_sarsa_solver(game); + + int training_iter = 100000; + while (training_iter-- > 0) { + tabular_sarsa_solver.RunIteration(); + } + const absl::flat_hash_map, double>& q_values = + tabular_sarsa_solver.GetQValueTable(); + + int eval_iter = 1000; + int total_reward = 0; + while (eval_iter-- > 0) { + std::unique_ptr state = game->NewInitialState(); + while (!state->IsTerminal()) { + Action optimal_action = GetOptimalAction(q_values, state); + state->ApplyAction(optimal_action); + total_reward += state->Rewards()[0]; + } + } + + SPIEL_CHECK_GT(total_reward, 0); +} + +int main(int argc, char** argv) { + SolveTicTacToe(); + SolveTicTacToeEligibilityTraces(); + SolveCatch(); + return 0; +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/universal_poker_mccfr_acpc_gamedef_example.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/universal_poker_mccfr_acpc_gamedef_example.cc new file mode 100644 index 0000000..c238b64 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/universal_poker_mccfr_acpc_gamedef_example.cc @@ -0,0 +1,88 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/algorithms/external_sampling_mccfr.h" +#include "open_spiel/algorithms/tabular_exploitability.h" +#include "open_spiel/games/universal_poker/universal_poker.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +constexpr char kCustom3PlayerAcpcGamedef[] = R"""( +# (Empty lines and lines starting with an '#' are all ignored) + +GAMEDEF +nolimit +numPlayers = 3 +numRounds = 1 +numSuits = 2 +numRanks = 4 +numHoleCards = 1 + +# Set per player, so 3 total +stack = 15 15 15 +blind = 0 1 0 + +# Set per round +firstPlayer = 3 +numBoardCards = 0 + +END GAMEDEF +)"""; + +ABSL_FLAG(std::string, acpc_gamedef, kCustom3PlayerAcpcGamedef, + "ACPC gamedef."); +ABSL_FLAG(int, num_iters, 2000, "How many iters to run for."); +// Note: reporting exploitability too frequently can be expensive! +ABSL_FLAG(int, report_every, 500, "How often to report exploitability."); + +// Example code for using MCCFR on a univeral_poker game loaded from an ACPC +// gamedef (via the wrapper function). +int main(int argc, char** argv) { + absl::ParseCommandLine(argc, argv); + std::cout << "Input ACPC gamedef (raw): " << absl::GetFlag(FLAGS_acpc_gamedef) + << std::endl; + + std::shared_ptr game = + open_spiel::universal_poker::LoadUniversalPokerGameFromACPCGamedef( + absl::GetFlag(FLAGS_acpc_gamedef)); + + // Downcasting to UniversalPokerGame so we can call GetACPCGame(), which isn't + // on the higher level open_spiel::Game. + const open_spiel::universal_poker::UniversalPokerGame& game_down_cast = + open_spiel::down_cast< + const open_spiel::universal_poker::UniversalPokerGame&>(*game); + std::cout << "Resulting ACPC gamedef used for universal_poker:\n" + << game_down_cast.GetACPCGame()->ToString() << std::endl; + + open_spiel::algorithms::ExternalSamplingMCCFRSolver solver(*game); + std::cerr << "Starting MCCFR on " << game->GetType().short_name << "..." + << std::endl; + + for (int i = 0; i < absl::GetFlag(FLAGS_num_iters); ++i) { + solver.RunIteration(); + if (i % absl::GetFlag(FLAGS_report_every) == 0 || + i == absl::GetFlag(FLAGS_num_iters) - 1) { + double exploitability = open_spiel::algorithms::Exploitability( + *game, *solver.AveragePolicy()); + std::cerr << "Iteration " << i << " exploitability=" << exploitability + << std::endl; + } + } +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/examples/value_iteration_example.cc b/scenarios/bargaining/open_spiel/open_spiel/examples/value_iteration_example.cc new file mode 100644 index 0000000..0274876 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/examples/value_iteration_example.cc @@ -0,0 +1,54 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/algorithms/value_iteration.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +ABSL_FLAG(std::string, game, "tic_tac_toe", "The name of the game to play."); +ABSL_FLAG(int, depth_limit, -1, + "Depth limit until which to compute value iteration."); +ABSL_FLAG(double, threshold, 0.01, + "Threshold accuracy at which to stop value iteration."); + +// Example code for using value iteration algorithm to solve tic-tac-toe. +int main(int argc, char** argv) { + absl::ParseCommandLine(argc, argv); + + std::shared_ptr game = + open_spiel::LoadGame(absl::GetFlag(FLAGS_game)); + + auto solution = open_spiel::algorithms::ValueIteration( + *game, absl::GetFlag(FLAGS_depth_limit), absl::GetFlag(FLAGS_threshold)); + for (const auto& kv : solution) { + std::cerr << "State: " << std::endl + << kv.first << std::endl + << "Value: " << kv.second << std::endl; + } + + if (absl::GetFlag(FLAGS_game) == "tic_tac_toe") { + std::string initial_state = "...\n...\n..."; + std::string cross_win_state = "...\n...\n.ox"; + std::string naught_win_state = "x..\noo.\nxx."; + SPIEL_CHECK_EQ(solution[initial_state], 0); + SPIEL_CHECK_EQ(solution[cross_win_state], 1); + SPIEL_CHECK_EQ(solution[naught_win_state], -1); + } +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_parameters.cc b/scenarios/bargaining/open_spiel/open_spiel/game_parameters.cc new file mode 100644 index 0000000..97194dd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_parameters.cc @@ -0,0 +1,317 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_parameters.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_replace.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { +constexpr const char* kSerializedNewline = "\\\\n"; +} + +std::string GameParameter::ToReprString() const { + switch (type_) { + case Type::kInt: + return absl::StrCat("GameParameter(int_value=", int_value(), ")"); + case Type::kDouble: + return absl::StrCat("GameParameter(double_value=", double_value(), ")"); + case Type::kString: + return absl::StrCat("GameParameter(string_value='", string_value(), "')"); + case Type::kBool: + return absl::StrCat( + "GameParameter(bool_value=", bool_value() ? "True" : "False", ")"); + case Type::kUnset: + return absl::StrCat("GameParameter()"); + case Type::kGame: + return absl::StrCat("GameParameter(game_value=", + GameParametersToString(game_value())); + default: + SpielFatalError("Unknown type."); + return "This will never return."; + } +} + +std::string GameParameter::ToString() const { + switch (type_) { + case Type::kInt: + return absl::StrCat(int_value()); + case Type::kDouble: + return FormatDouble(double_value()); + case Type::kString: + return string_value(); + case Type::kBool: + return bool_value() ? std::string("True") : std::string("False"); + case Type::kUnset: + return absl::StrCat("unset"); + case Type::kGame: + return GameParametersToString(game_value()); + default: + SpielFatalError("Unknown type."); + return "This will never return."; + } +} + +std::string GameParameter::Serialize(const std::string& delimiter) const { + std::string val; + switch (type_) { + case Type::kString: + val = absl::StrReplaceAll(ToString(), {{"\n", kSerializedNewline}}); + break; + case Type::kGame: + val = SerializeGameParameters(game_value()); + break; + default: + val = ToString(); + } + return absl::StrCat(GameParameterTypeToString(type_), delimiter, val, + delimiter, is_mandatory() ? "true" : "false"); +} + +GameParameter DeserializeGameParameter(const std::string& data, + const std::string& delimiter) { + std::vector parts = absl::StrSplit(data, delimiter); + SPIEL_CHECK_EQ(parts.size(), 3); + bool mandatory = (parts[2] == "True" || parts[2] == "true"); + if (parts[0] == "kUnset") { + return GameParameter(GameParameter::Type::kUnset, mandatory); + } else if (parts[0] == "kInt") { + int value; + SPIEL_CHECK_TRUE(absl::SimpleAtoi(parts[1], &value)); + return GameParameter(value, mandatory); + } else if (parts[0] == "kDouble") { + double value; + SPIEL_CHECK_TRUE(absl::SimpleAtod(parts[1], &value)); + return GameParameter(value, mandatory); + } else if (parts[0] == "kString") { + return GameParameter( + absl::StrReplaceAll(parts[1], {{kSerializedNewline, "\n"}}), mandatory); + } else if (parts[0] == "kBool") { + return GameParameter(parts[1] == "True" || parts[1] == "true", mandatory); + } else if (parts[0] == "kGame") { + return GameParameter(DeserializeGameParameters(parts[1]), mandatory); + } else { + SpielFatalError(absl::StrCat("Unrecognized type: ", parts[0])); + } +} + +std::string SerializeGameParameters(const GameParameters& game_params, + const std::string& name_delimiter, + const std::string& parameter_delimeter) { + std::list serialized_params; + + for (const auto& key_val : game_params) { + std::string name = key_val.first; + GameParameter parameter = key_val.second; + + serialized_params.push_back( + absl::StrCat(name, name_delimiter, parameter.Serialize())); + } + + return absl::StrJoin(serialized_params, parameter_delimeter); +} + +GameParameters DeserializeGameParameters( + const std::string& data, const std::string& name_delimiter, + const std::string& parameter_delimeter) { + GameParameters game_params; + std::vector parts = absl::StrSplit(data, parameter_delimeter); + + for (const auto& part : parts) { + if (!part.empty()) { + std::pair pair = + absl::StrSplit(part, name_delimiter); + game_params.insert(std::pair( + pair.first, DeserializeGameParameter(pair.second))); + } + } + return game_params; +} + +std::string GameParametersToString(const GameParameters& game_params) { + std::string str; + if (game_params.empty()) return ""; + if (game_params.count("name")) str = game_params.at("name").string_value(); + str.push_back('('); + bool first = true; + for (const auto& key_val : game_params) { + if (key_val.first != "name") { + if (!first) str.push_back(','); + str.append(key_val.first); + str.append("="); + str.append(key_val.second.ToString()); + first = false; + } + } + str.push_back(')'); + return str; +} + +GameParameter GameParameterFromString(const std::string& str) { + if (str == "True" || str == "true") { + return GameParameter(true); + } else if (str == "False" || str == "false") { + return GameParameter(false); + } else if (str.find_first_not_of("+-0123456789") == std::string::npos) { + int value; + bool success = absl::SimpleAtoi(str, &value); + SPIEL_CHECK_TRUE(success); + return GameParameter(value); + } else if (str.find_first_not_of("+-0123456789.") == std::string::npos) { + double value; + bool success = absl::SimpleAtod(str, &value); + SPIEL_CHECK_TRUE(success); + return GameParameter(value); + } else if (str.back() == ')') { + return GameParameter(GameParametersFromString(str)); + } else { + return GameParameter(str); + } +} + +GameParameters GameParametersFromString(const std::string& game_string) { + GameParameters params; + if (game_string.empty()) return params; + int first_paren = game_string.find('('); + if (first_paren == std::string::npos) { + params["name"] = GameParameter(game_string); + return params; + } + params["name"] = GameParameter(game_string.substr(0, first_paren)); + int start = first_paren + 1; + int parens = 1; + int equals = -1; + for (int i = start; i < game_string.length(); ++i) { + if (game_string[i] == '(') { + ++parens; + } else if (game_string[i] == ')') { + --parens; + } else if (game_string[i] == '=' && parens == 1) { + equals = i; + } + if ((game_string[i] == ',' && parens == 1) || + (game_string[i] == ')' && parens == 0 && i > start + 1)) { + params[game_string.substr(start, equals - start)] = + GameParameterFromString( + game_string.substr(equals + 1, i - equals - 1)); + start = i + 1; + equals = -1; + } + } + if (parens > 0) SpielFatalError("Missing closing bracket ')'."); + return params; +} + +std::string GameParameterTypeToString(const GameParameter::Type& type) { + switch (type) { + case GameParameter::Type::kUnset: + return "kUnset"; + case GameParameter::Type::kInt: + return "kInt"; + case GameParameter::Type::kDouble: + return "kDouble"; + case GameParameter::Type::kString: + return "kString"; + case GameParameter::Type::kBool: + return "kBool"; + case GameParameter::Type::kGame: + return "kGame"; + default: + SpielFatalError("Invalid GameParameter"); + } +} + +template <> +int GameParameter::value() const { + return int_value(); +} +template <> +double GameParameter::value() const { + return double_value(); +} +template <> +const std::string& GameParameter::value() const { + return string_value(); +} +template <> +std::string GameParameter::value() const { + return string_value(); +} +template <> +bool GameParameter::value() const { + return bool_value(); +} +template <> +const GameParameters& GameParameter::value() const { + return game_value(); +} +template <> +GameParameters GameParameter::value() const { + return game_value(); +} + +template <> +int GameParameter::value_with_default(int default_value) const { + if (has_int_value()) { + return int_value(); + } else { + return default_value; + } +} +template <> +double GameParameter::value_with_default(double default_value) const { + if (has_double_value()) { + return double_value(); + } else { + return default_value; + } +} +template <> +const std::string& GameParameter::value_with_default( + const std::string& default_value) const { + if (has_string_value()) { + return string_value(); + } else { + return default_value; + } +} +template <> +std::string GameParameter::value_with_default(std::string default_value) const { + if (has_string_value()) { + return string_value(); + } else { + return default_value; + } +} +template <> +bool GameParameter::value_with_default(bool default_value) const { + if (has_bool_value()) { + return bool_value(); + } else { + return default_value; + } +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_parameters.h b/scenarios/bargaining/open_spiel/open_spiel/game_parameters.h new file mode 100644 index 0000000..a7684f1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_parameters.h @@ -0,0 +1,225 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAME_PARAMETERS_H_ +#define OPEN_SPIEL_GAME_PARAMETERS_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { + +// A GameParameter can be used in 3 contexts: +// - when defining the parameters for a game, with their optional default value +// and whether they are mandatory or not. +// - when specifying in Python a parameter value. +// - when passing a parametrization of additional Observer fields. +// +class GameParameter; + +using GameParameters = std::map; +std::string GameParametersToString(const GameParameters& game_params); +GameParameter GameParameterFromString(const std::string& str); +GameParameters GameParametersFromString(const std::string& game_string); + +inline constexpr const char* kDefaultNameDelimiter = "="; +inline constexpr const char* kDefaultParameterDelimiter = "|||"; +inline constexpr const char* kDefaultInternalDelimiter = "***"; + +class GameParameter { + public: + enum class Type { kUnset = -1, kInt, kDouble, kString, kBool, kGame }; + + explicit GameParameter(Type type = Type::kUnset, bool is_mandatory = false) + : is_mandatory_(is_mandatory), type_(type) {} + + explicit GameParameter(int value, bool is_mandatory = false) + : is_mandatory_(is_mandatory), int_value_(value), type_(Type::kInt) {} + + explicit GameParameter(double value, bool is_mandatory = false) + : is_mandatory_(is_mandatory), + double_value_(value), + type_(Type::kDouble) {} + + explicit GameParameter(std::string value, bool is_mandatory = false) + : is_mandatory_(is_mandatory), + string_value_(value), + type_(Type::kString) {} + + // Allows construction of a `GameParameter` from a string literal. This method + // is not subsumed by the previous method, even if value can be converted to a + // std::string, because the [C++ standard][iso] requires that the *standard + // conversion sequence* (see §13.3.3.1.1) + // `(const char[]) -> const char* -> bool` take precedence over the + // *user-defined conversion sequence* + // `(const char[]) -> const char* -> std::string` defined in the standard + // library. + // [iso]: http://www.open-std.org/JTC1/SC22/WG21/docs/papers/2011/n3242.pdf + explicit GameParameter(const char* value, bool is_mandatory = false) + : is_mandatory_(is_mandatory), + string_value_(value), + type_(Type::kString) {} + + explicit GameParameter(bool value, bool is_mandatory = false) + : is_mandatory_(is_mandatory), bool_value_(value), type_(Type::kBool) {} + + explicit GameParameter(GameParameters value, + bool is_mandatory = false) + : is_mandatory_(is_mandatory), + game_value_(std::move(value)), + type_(Type::kGame) {} + + bool has_int_value() const { return type_ == Type::kInt; } + bool has_double_value() const { return type_ == Type::kDouble; } + bool has_string_value() const { return type_ == Type::kString; } + bool has_bool_value() const { return type_ == Type::kBool; } + bool has_game_value() const { return type_ == Type::kGame; } + Type type() const { return type_; } + + bool is_mandatory() const { return is_mandatory_; } + + // A readable string format, for display purposes; does not distinguish + // types in ambiguous cases, e.g. string True vs boolean True. + std::string ToString() const; + + // An unambiguous string representation, including type information. + // Used for __repr__ in the Python interface. + std::string ToReprString() const; + + // Everything necessary to reconstruct the parameter in string form: + // type value is_mandatory. + std::string Serialize( + const std::string& delimiter = kDefaultInternalDelimiter) const; + + int int_value() const { + SPIEL_CHECK_TRUE(type_ == Type::kInt); + return int_value_; + } + + double double_value() const { + SPIEL_CHECK_TRUE(type_ == Type::kDouble); + return double_value_; + } + + const std::string& string_value() const { + SPIEL_CHECK_TRUE(type_ == Type::kString); + return string_value_; + } + + bool bool_value() const { + SPIEL_CHECK_TRUE(type_ == Type::kBool); + return bool_value_; + } + + const GameParameters& game_value() const { + SPIEL_CHECK_TRUE(type_ == Type::kGame); + return game_value_; + } + + // Access values via param.value(). + // There are explicit specializations of this function that call the + // ***_value() functions above, however they are defined in game_parameters.cc + // to avoid compilation problems on some older compilers. + template + T value() const; + + template + T value_with_default(T default_value) const; + + bool operator==(const GameParameter& rhs) const { + switch (type_) { + case Type::kInt: + return rhs.has_int_value() && int_value_ == rhs.int_value(); + case Type::kDouble: + return rhs.has_double_value() && double_value_ == rhs.double_value(); + case Type::kString: + return rhs.has_string_value() && string_value_ == rhs.string_value(); + case Type::kBool: + return rhs.has_bool_value() && bool_value_ == rhs.bool_value(); + case Type::kGame: + return rhs.has_game_value() && game_value_ == rhs.game_value(); + case Type::kUnset: + return rhs.type_ == Type::kUnset; + } + std::cerr << "Unrecognized parameter type in operator==" + << ", returning false." << std::endl; + return false; + } + bool operator!=(const GameParameter& rhs) const { return !(*this == rhs); } + + private: + bool is_mandatory_; + + // Default initializations are required here. This is because some games mark + // parameters as not mandatory and also do not specify default values when + // registering the game type.. instead, setting the documented defaults upon + // game creation (often due to missing information at registration time). + // This causes a problem when inspecting the game types themselves, even after + // the game is created via Game::GetType(), which returns the type as it was + // when it was registered. These initial values are used for those cases. + int int_value_ = 0; + double double_value_ = 0.0; + std::string string_value_ = ""; + bool bool_value_ = false; + GameParameters game_value_ = {}; + Type type_; +}; + +std::string GameParameterTypeToString(const GameParameter::Type& type); + +// Game Parameters and Game Parameter Serialization/Deserialization form: +// param_name=type/value/is_mandatory|param_name_2=type2/value2/is_mandatory2 +// assumes none of the delimeters appears in the string values +std::string SerializeGameParameters( + const GameParameters& game_params, + const std::string& name_delimiter = kDefaultNameDelimiter, + const std::string& parameter_delimeter = kDefaultParameterDelimiter); +GameParameters DeserializeGameParameters( + const std::string& data, + const std::string& name_delimiter = kDefaultNameDelimiter, + const std::string& parameter_delimeter = kDefaultParameterDelimiter); +GameParameter DeserializeGameParameter( + const std::string& data, + const std::string& delimiter = kDefaultInternalDelimiter); + +inline bool IsParameterSpecified(const GameParameters& table, + const std::string& key) { + return table.find(key) != table.end(); +} + +template +T ParameterValue(const GameParameters& params, const std::string& key, + absl::optional default_value = absl::nullopt) { + auto iter = params.find(key); + if (iter == params.end()) { + if (!default_value.has_value()) { + SpielFatalError(absl::StrCat("Cannot find parameter and no default " + "value passed for key: ", key)); + } + + return *default_value; + } else { + return iter->second.value(); + } +} + +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAME_PARAMETERS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/CMakeLists.txt new file mode 100644 index 0000000..a04d56e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/CMakeLists.txt @@ -0,0 +1,91 @@ +add_library (game_transforms OBJECT + add_noise.cc + add_noise.h + cached_tree.cc + cached_tree.h + coop_to_1p.cc + coop_to_1p.h + efg_writer.cc + efg_writer.h + misere.cc + misere.h + normal_form_extensive_game.cc + normal_form_extensive_game.h + repeated_game.cc + repeated_game.h + restricted_nash_response.cc + restricted_nash_response.h + start_at.cc + start_at.h + turn_based_simultaneous_game.cc + turn_based_simultaneous_game.h + zerosum.cc + zerosum.h +) +target_include_directories (game_transforms PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +add_executable(restricted_nash_response_test + restricted_nash_response_test.cc + ${OPEN_SPIEL_OBJECTS} + $) +add_test(restricted_nash_response_test restricted_nash_response_test) + +add_executable(turn_based_simultaneous_game_test + turn_based_simultaneous_game_test.cc + ${OPEN_SPIEL_OBJECTS} + $) +add_test(turn_based_simultaneous_game_test turn_based_simultaneous_game_test) + +add_executable(misere_test + misere_test.cc + ${OPEN_SPIEL_OBJECTS} + $) +add_test(misere_test misere_test) + +add_executable(add_noise_test + add_noise_test.cc + ${OPEN_SPIEL_OBJECTS} + $) +add_test(add_noise_test add_noise_test) + +add_executable(cached_tree_test + cached_tree_test.cc + ${OPEN_SPIEL_OBJECTS} + $) +add_test(cached_tree_test cached_tree_test) + +add_executable(coop_to_1p_test + coop_to_1p_test.cc + ${OPEN_SPIEL_OBJECTS} + $) +add_test(coop_to_1p_test coop_to_1p_test) + +add_executable(efg_writer_test + efg_writer_test.cc + ${OPEN_SPIEL_OBJECTS} + $) +add_test(efg_writer_test efg_writer_test) + +add_executable(normal_form_extensive_game_test + normal_form_extensive_game_test.cc + ${OPEN_SPIEL_OBJECTS} + $) +add_test(normal_form_extensive_game_test normal_form_extensive_game_test) + +add_executable(repeated_game_test + repeated_game_test.cc + ${OPEN_SPIEL_OBJECTS} + $) +add_test(repeated_game_test repeated_game_test) + +add_executable(start_at_test + start_at_test.cc + ${OPEN_SPIEL_OBJECTS} + $) +add_test(start_at_test start_at_test) + +add_executable(zerosum_test + zerosum_test.cc + ${OPEN_SPIEL_OBJECTS} + $) +add_test(zerosum_test zerosum_test) diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/add_noise.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/add_noise.cc new file mode 100644 index 0000000..14fd4b6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/add_noise.cc @@ -0,0 +1,134 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/add_noise.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace add_noise { +namespace { + +// These parameters are the most-general case. The actual game may be simpler. +const GameType kGameType{ + /*short_name=*/"add_noise", + /*long_name=*/"Add noise to terminal utilities.", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kSampledStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/100, + /*min_num_players=*/1, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + {{"game", GameParameter(GameParameter::Type::kGame, /*is_mandatory=*/true)}, + {"epsilon", GameParameter(1.0, /*is_mandatory=*/true)}, + {"seed", GameParameter(1, /*is_mandatory=*/true)}}, + /*default_loadable=*/false, + /*provides_factored_observation_string=*/true, + /*is_concrete=*/false}; + +std::shared_ptr Factory(const GameParameters& params) { + auto game = LoadGame(params.at("game").game_value()); + GameType game_type = game->GetType(); + // Only terminal reward models are supported. + SPIEL_CHECK_EQ(game_type.reward_model, GameType::RewardModel::kTerminal); + + game_type.short_name = kGameType.short_name; + game_type.long_name = + absl::StrCat("Add noise to", " game=", game_type.long_name, + " epsilon=", params.at("epsilon").double_value(), + " seed=", params.at("seed").int_value()); + return std::make_shared(game, game_type, params); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +} // namespace + +AddNoiseGame::AddNoiseGame(std::shared_ptr game, GameType game_type, + GameParameters game_parameters) + : WrappedGame(game, game_type, game_parameters), + epsilon_(ParameterValue("epsilon")), + rng_(ParameterValue("seed")) {} + +std::unique_ptr AddNoiseGame::NewInitialState() const { + return std::make_unique(shared_from_this(), + game_->NewInitialState()); +} + +double AddNoiseGame::GetNoise(const AddNoiseState& state) { + std::string state_str = state.HistoryString(); + auto it = noise_table_.find(state_str); + if (it != noise_table_.end()) { + return it->second; + } + + std::uniform_real_distribution dist(-epsilon_, epsilon_); + double noise = dist(rng_); + noise_table_[state_str] = noise; + return noise; +} + +double AddNoiseGame::MaxUtility() const { + return WrappedGame::MaxUtility() + epsilon_; +} + +double AddNoiseGame::MinUtility() const { + return WrappedGame::MinUtility() - epsilon_; +} + +AddNoiseState::AddNoiseState(std::shared_ptr transformed_game, + std::unique_ptr state) + : WrappedState(transformed_game, std::move(state)) {} + +std::vector AddNoiseState::Returns() const { + std::vector returns = state_->Returns(); + SPIEL_CHECK_EQ(returns.size(), 2); + + if (state_->IsTerminal()) { + auto const_noise_game = down_cast(game_.get()); + AddNoiseGame* noise_game = const_cast(const_noise_game); + double noise = noise_game->GetNoise(*this); + returns[0] += noise; + returns[1] -= noise; + } + + return returns; +} + +std::vector AddNoiseState::Rewards() const { + if (IsTerminal()) { + return Returns(); + } else { + SPIEL_CHECK_FALSE(IsChanceNode()); + return std::vector(num_players_, 0.0); + } +} + +} // namespace add_noise +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/add_noise.h b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/add_noise.h new file mode 100644 index 0000000..ca0e18e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/add_noise.h @@ -0,0 +1,64 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAME_TRANSFORMS_ADD_NOISE_H_ +#define OPEN_SPIEL_GAME_TRANSFORMS_ADD_NOISE_H_ + +#include + +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// Transforms game by adding noise to the original utilities. +// +// The noise is sampled from uniform distribution of [-epsilon, epsilon] +// independently for each terminal history. +// The transformation can be seeded for reproducibility. + +namespace open_spiel { +namespace add_noise { + +class AddNoiseState : public WrappedState { + public: + AddNoiseState(std::shared_ptr game, std::unique_ptr state); + AddNoiseState(const AddNoiseState& other) = default; + std::unique_ptr Clone() const override { + return std::make_unique(*this); + } + std::vector Returns() const override; + std::vector Rewards() const override; +}; + +class AddNoiseGame : public WrappedGame { + public: + AddNoiseGame(std::shared_ptr game, GameType game_type, + GameParameters game_parameters); + std::unique_ptr NewInitialState() const override; + double GetNoise(const AddNoiseState& state); + + double MinUtility() const override; + + double MaxUtility() const override; + + private: + const double epsilon_; + std::mt19937 rng_; + std::unordered_map noise_table_; +}; + +} // namespace add_noise +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAME_TRANSFORMS_ADD_NOISE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/add_noise_test.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/add_noise_test.cc new file mode 100644 index 0000000..be6393d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/add_noise_test.cc @@ -0,0 +1,36 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/add_noise.h" + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace add_noise { +namespace { + +namespace testing = open_spiel::testing; + +void BasicTests() { + testing::LoadGameTest("add_noise(epsilon=1.,seed=1,game=kuhn_poker())"); + testing::RandomSimTest( + *LoadGame("add_noise(epsilon=1.,seed=1,game=kuhn_poker())"), 100); +} + +} // namespace +} // namespace add_noise +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::add_noise::BasicTests(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/cached_tree.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/cached_tree.cc new file mode 100644 index 0000000..c6ca617 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/cached_tree.cc @@ -0,0 +1,320 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/cached_tree.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace cached_tree { + +namespace { +// These parameters reflect the most-general game, with the maximum +// API coverage. The actual game may be simpler and might not provide +// all the interfaces. +// This is used as a placeholder for game registration. The actual instantiated +// game will have more accurate information. +const GameType kGameType{/*short_name=*/"cached_tree", + /*long_name=*/"Cached Tree Game Transform", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kSampledStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/100, + /*min_num_players=*/1, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + {{"game", GameParameter(GameParameter::Type::kGame, + /*is_mandatory=*/true)}}, + /*default_loadable=*/false, + /*provides_factored_observation_string=*/false, + /*is_concrete=*/false}; + +std::shared_ptr Factory(const GameParameters& params) { + return ConvertToCachedTree(*LoadGame(params.at("game").game_value())); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +GameType ConvertType(GameType type) { + type.dynamics = GameType::Dynamics::kSequential; + type.information = GameType::Information::kImperfectInformation; + type.short_name = kGameType.short_name; + type.long_name = "Turn-based " + type.long_name; + type.parameter_specification = kGameType.parameter_specification; + return type; +} + +GameParameters ConvertParams(const GameType& type, GameParameters params) { + params["name"] = GameParameter(type.short_name); + GameParameters new_params{{"game", GameParameter{params}}}; + return new_params; +} + +} // namespace + +// Note: overridden to use the wrapped state inside the node. +const State& CachedTreeState::GetWrappedState() const { + return *(node_->state); +} + +CachedTreeState::CachedTreeState(std::shared_ptr game, Node* node) + : WrappedState(game, nullptr), + parent_game_(down_cast(*game)), + node_(node) {} + +CachedTreeState::CachedTreeState(const CachedTreeState& other) + : WrappedState(other, nullptr), + parent_game_(other.parent_game_), + node_(other.node_) {} + +void CachedTreeState::DoApplyAction(Action action_id) { + auto iter = node_->children.find(action_id); + if (iter != node_->children.end()) { + node_ = iter->second; + return; + } + + // If we get here, the child does not exist. Create it and connect it. + node_ = parent_game_.CreateChildNode(node_, this, action_id); +} + +void CachedTreeState::DoApplyActions(const std::vector& actions) { + auto iter = node_->joint_action_children.find(actions); + if (iter != node_->joint_action_children.end()) { + node_ = iter->second; + return; + } + + // If we get here, the child does not exist. Create it and connect it. + node_ = parent_game_.CreateChildNode(node_, this, actions); +} + +std::unique_ptr CachedTreeState::Clone() const { + return std::make_unique(*this); +} + +Player CachedTreeState::CurrentPlayer() const { + if (node_->current_player == kInvalidPlayer) { + node_->current_player = node_->state->CurrentPlayer(); + } + return node_->current_player; +} + +std::vector CachedTreeState::LegalActions(Player player) const { + auto iter = node_->legal_actions.find(player); + if (iter != node_->legal_actions.end()) { + return iter->second; + } + std::vector legal_actions = node_->state->LegalActions(player); + node_->legal_actions[player] = legal_actions; + return legal_actions; +} + +std::vector CachedTreeState::LegalActions() const { + return LegalActions(CurrentPlayer()); +} + +std::string CachedTreeState::ActionToString(Player player, + Action action_id) const { + auto key = std::make_pair(player, action_id); + auto iter = node_->action_to_string.find(key); + if (iter != node_->action_to_string.end()) { + return iter->second; + } + std::string action_string = node_->state->ActionToString(player, action_id); + node_->action_to_string[key] = action_string; + return action_string; +} + +std::string CachedTreeState::ToString() const { + if (node_->to_string.has_value()) { + return node_->to_string.value(); + } + node_->to_string = node_->state->ToString(); + return node_->to_string.value(); +} + +bool CachedTreeState::IsTerminal() const { + if (node_->terminal.has_value()) { + return node_->terminal.value(); + } + node_->terminal = node_->state->IsTerminal(); + return node_->terminal.value(); +} + +std::vector CachedTreeState::Rewards() const { + if (node_->rewards.empty()) { + node_->rewards = node_->state->Rewards(); + } + return node_->rewards; +} + +std::vector CachedTreeState::Returns() const { + if (node_->returns.empty()) { + node_->returns = node_->state->Returns(); + } + return node_->returns; +} + +std::string CachedTreeState::InformationStateString(Player player) const { + auto iter = node_->information_state_string.find(player); + if (iter != node_->information_state_string.end()) { + return iter->second; + } + std::string information_state_string = + node_->state->InformationStateString(player); + node_->information_state_string[player] = information_state_string; + return information_state_string; +} + +void CachedTreeState::InformationStateTensor(Player player, + absl::Span values) const { + node_->state->InformationStateTensor(player, values); +} + +std::string CachedTreeState::ObservationString(Player player) const { + auto iter = node_->observation_string.find(player); + if (iter != node_->observation_string.end()) { + return iter->second; + } + std::string observation_string = node_->state->ObservationString(player); + node_->observation_string[player] = observation_string; + return observation_string; +} + +void CachedTreeState::ObservationTensor(Player player, + absl::Span values) const { + node_->state->ObservationTensor(player, values); +} + +void CachedTreeState::UndoAction(Player player, Action action) { + node_->state->UndoAction(player, action); + history_.pop_back(); +} + +ActionsAndProbs CachedTreeState::ChanceOutcomes() const { + if (node_->chance_outcomes.empty()) { + node_->chance_outcomes = node_->state->ChanceOutcomes(); + } + return node_->chance_outcomes; +} + +std::vector CachedTreeState::LegalChanceOutcomes() const { + return LegalActions(kChancePlayerId); +} + +std::vector CachedTreeState::ActionsConsistentWithInformationFrom( + Action action) const { + auto iter = + node_->legal_actions_consistent_with_information_from.find(action); + if (iter != node_->legal_actions_consistent_with_information_from.end()) { + return iter->second; + } + std::vector legal_actions_consistent_with_information_from = + node_->state->ActionsConsistentWithInformationFrom(action); + node_->legal_actions_consistent_with_information_from[action] = + legal_actions_consistent_with_information_from; + return legal_actions_consistent_with_information_from; +} + +Node* CachedTreeGame::CreateChildNode(Node* parent, + const CachedTreeState* state, + Action action) const { + SPIEL_CHECK_TRUE(parent != nullptr); + SPIEL_CHECK_TRUE(state != nullptr); + SPIEL_CHECK_TRUE(action != kInvalidAction); + nodes_.push_back(std::make_unique()); + Node* child_node = nodes_.back().get(); + child_node->state = parent->state->Child(action); + parent->children[action] = child_node; + return child_node; +} + +Node* CachedTreeGame::CreateChildNode( + Node* parent, + const CachedTreeState* state, + const std::vector& joint_action) const { + SPIEL_CHECK_TRUE(parent != nullptr); + SPIEL_CHECK_TRUE(state != nullptr); + SPIEL_CHECK_FALSE(joint_action.empty()); + nodes_.push_back(std::make_unique()); + Node* child_node = nodes_.back().get(); + auto actual_child_state = parent->state->Clone(); + actual_child_state->ApplyActions(joint_action); + child_node->state = std::move(actual_child_state); + parent->joint_action_children[joint_action] = child_node; + return child_node; +} + +std::unique_ptr CachedTreeGame::NewInitialState() const { + if (root_ == nullptr) { + SPIEL_CHECK_EQ(nodes_.size(), 0); + nodes_.push_back(std::make_unique()); + root_ = nodes_.back().get(); + root_->state = game_->NewInitialState(); + } + return std::make_unique(shared_from_this(), root_); +} + +double CachedTreeGame::MinUtility() const { + if (!min_utility_.has_value()) { + min_utility_ = game_->MinUtility(); + } + return min_utility_.value(); +} + +double CachedTreeGame::MaxUtility() const { + if (!max_utility_.has_value()) { + max_utility_ = game_->MaxUtility(); + } + return max_utility_.value(); +} + +CachedTreeGame::CachedTreeGame(std::shared_ptr game) + : WrappedGame(game, ConvertType(game->GetType()), + ConvertParams(game->GetType(), game->GetParameters())) {} + +std::shared_ptr ConvertToCachedTree(const Game& game) { + return std::shared_ptr( + new CachedTreeGame(game.shared_from_this())); +} + +std::shared_ptr LoadGameAsCachedTree(const std::string& name) { + auto game = LoadGame(name); + return ConvertToCachedTree(*game); +} + +std::shared_ptr LoadGameAsCachedTree(const std::string& name, + const GameParameters& params) { + auto game = LoadGame(name, params); + return ConvertToCachedTree(*game); +} + +} // namespace cached_tree +} // namespace open_spiel + diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/cached_tree.h b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/cached_tree.h new file mode 100644 index 0000000..22c8df4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/cached_tree.h @@ -0,0 +1,134 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAME_TRANSFORMS_CACHED_TREE_H_ +#define OPEN_SPIEL_GAME_TRANSFORMS_CACHED_TREE_H_ + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/spiel_globals.h" + +// A tree built dynamically built and cached in memory. This wrapper can be used +// to speed up the traversals of the game tree and corresponding functions like +// information state keys and tensors for games whose tree is not too large. + +namespace open_spiel { +namespace cached_tree { + +class CachedTreeState; +class CachedTreeGame; + +// A node corresponds to a state in the game. +struct Node { + Player current_player = kInvalidPlayer; + std::unique_ptr state; + absl::optional to_string; + ActionsAndProbs chance_outcomes; + std::vector returns; + std::vector rewards; + absl::optional terminal; + absl::flat_hash_map children; + absl::flat_hash_map, Node*> joint_action_children; + absl::flat_hash_map, std::string> action_to_string; + absl::flat_hash_map> legal_actions; + absl::flat_hash_map information_state_string; + absl::flat_hash_map observation_string; + absl::flat_hash_map> + legal_actions_consistent_with_information_from; +}; + + +class CachedTreeState : public WrappedState { + public: + CachedTreeState(std::shared_ptr game, Node* node); + CachedTreeState(const CachedTreeState& other); + + // Note: overridden to use the wrapped state inside the node. + const State& GetWrappedState() const override; + + // Must override all the methods of the WrappedState. This is because this + // wrapper bypasses using the state_ pointer inside WrappedState. + Player CurrentPlayer() const override; + std::vector LegalActions(Player player) const override; + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Rewards() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action action) override; + ActionsAndProbs ChanceOutcomes() const override; + std::vector LegalChanceOutcomes() const override; + std::vector ActionsConsistentWithInformationFrom( + Action action) const override; + + protected: + void DoApplyAction(Action action_id) override; + void DoApplyActions(const std::vector& actions) override; + + private: + const CachedTreeGame& parent_game_; + Node* node_ = nullptr; +}; + +class CachedTreeGame : public WrappedGame { + public: + explicit CachedTreeGame(std::shared_ptr game); + std::unique_ptr NewInitialState() const override; + double MinUtility() const override; + double MaxUtility() const override; + + Node* CreateChildNode(Node* parent, const CachedTreeState* state, + Action action) const; + Node* CreateChildNode(Node* parent, const CachedTreeState* state, + const std::vector& joint_action) const; + + + private: + // protected member game_ is inherited from WrappedGame. + mutable absl::optional min_utility_; + mutable absl::optional max_utility_; + mutable Node* root_ = nullptr; + mutable std::vector> nodes_; +}; + +// Helper function to convert +std::shared_ptr ConvertToCachedTree(const Game& game); +std::shared_ptr LoadGameAsCachedTree(const std::string& name); +std::shared_ptr LoadGameAsCachedTree(const std::string& name, + const GameParameters& params); + + +} // namespace cached_tree +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAME_TRANSFORMS_CACHED_TREE_H_ + diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/cached_tree_test.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/cached_tree_test.cc new file mode 100644 index 0000000..a68cdac --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/cached_tree_test.cc @@ -0,0 +1,88 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/algorithms/cfr.h" +#include "open_spiel/algorithms/tabular_exploitability.h" +#include "open_spiel/algorithms/expected_returns.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/init.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace cached_tree { +namespace { + +void BasicTests() { + testing::LoadGameTest("cached_tree(game=kuhn_poker())"); + testing::RandomSimTest(*LoadGame("cached_tree(game=kuhn_poker())"), 10); +} + +void CFRTest(const Game& game, + int iterations, + absl::optional nash_value, + absl::optional nash_value_eps, + absl::optional exploitability_upper_bound) { + std::cout << "Running CFR for " << iterations << " iterations on " << + game.ToString() << std::endl; + algorithms::CFRSolver solver(game); + for (int i = 0; i < iterations; i++) { + solver.EvaluateAndUpdatePolicy(); + } + const std::shared_ptr average_policy = solver.AveragePolicy(); + + const std::vector game_value = + algorithms::ExpectedReturns(*game.NewInitialState(), *average_policy, + -1); + + if (nash_value.has_value()) { + SPIEL_CHECK_EQ(2, game_value.size()); + SPIEL_CHECK_FLOAT_NEAR((float)game_value[0], nash_value.value(), + nash_value_eps.value()); + SPIEL_CHECK_FLOAT_NEAR((float)game_value[1], -nash_value.value(), + nash_value_eps.value()); + } + + if (exploitability_upper_bound.has_value()) { + double exploitability = algorithms::Exploitability(game, *average_policy); + std::cout << "Exploitability: " << exploitability << std::endl; + SPIEL_CHECK_LE(exploitability, exploitability_upper_bound.value()); + } +} + +void CFRTest_KuhnPoker() { + CFRTest(*LoadGame("cached_tree(game=kuhn_poker())"), 300, -1.0 / 18.0, 0.001, + 0.05); +} + +void CFRTest_LeducPoker() { + CFRTest(*LoadGame("cached_tree(game=leduc_poker())"), 300, -0.08, 0.05, 0.1); +} + +} // namespace +} // namespace cached_tree +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::Init("", &argc, &argv, false); + open_spiel::cached_tree::BasicTests(); + open_spiel::cached_tree::CFRTest_KuhnPoker(); + open_spiel::cached_tree::CFRTest_LeducPoker(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/coop_to_1p.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/coop_to_1p.cc new file mode 100644 index 0000000..fa5f32c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/coop_to_1p.cc @@ -0,0 +1,247 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/coop_to_1p.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace coop_to_1p { +namespace { + +// These parameters are the general case. +const GameType kGameType{/*short_name=*/"coop_to_1p", + /*long_name=*/"Cooperative Game As Single-Player", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/1, + /*min_num_players=*/1, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + {{"game", GameParameter(GameParameter::Type::kGame)}}, + /*default_loadable=*/false, + /*provides_factored_observation_string=*/false, + /*is_concrete=*/false}; + +GameType CoopTo1pGameType(GameType underlying_game_type) { + GameType game_type = kGameType; + game_type.long_name = + absl::StrCat("1p(", underlying_game_type.long_name, ")"); + game_type.reward_model = underlying_game_type.reward_model; + return game_type; +} + +std::unique_ptr Factory(const GameParameters& params) { + auto game = params.count("game") ? LoadGame(params.at("game").game_value()) + : LoadGame("tiny_hanabi"); + GameType game_type = CoopTo1pGameType(game->GetType()); + return std::unique_ptr( + new CoopTo1pGame(std::move(game), game_type, params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +} // namespace + +CoopTo1pGame::CoopTo1pGame(std::shared_ptr game, GameType game_type, + GameParameters game_parameters) + : Game(game_type, game_parameters), game_(game) {} + +std::unique_ptr CoopTo1pState::Clone() const { + return std::unique_ptr(new CoopTo1pState(*this)); +} + +std::unique_ptr CoopTo1pGame::NewInitialState() const { + return std::unique_ptr(new CoopTo1pState( + shared_from_this(), NumPrivates(), game_->NewInitialState())); +} + +std::string CoopTo1pState::ActionToString(Player player, + Action action_id) const { + if (player == kChancePlayerId) { + return state_->ActionToString(player, action_id); + } else { + Player pl = state_->CurrentPlayer(); + return absl::StrCat(privates_[pl].names[privates_[pl].next_unassigned], + "->", state_->ActionToString(pl, action_id)); + } +} + +std::string CoopTo1pState::AssignmentToString(Player player, + Action assignment) const { + switch (assignment) { + case PlayerPrivate::kImpossible: + return "impossible"; + case PlayerPrivate::kUnassigned: + return "unassigned"; + default: + return state_->ActionToString(player, assignment); + } +} + +// String representation of the current possible hands for every player and the +// assignment of hands to actions for the current player. +std::string CoopTo1pState::Assignments() const { + std::string str = ""; + Player current_player = state_->CurrentPlayer(); + for (int player = 0; player < privates_.size(); ++player) { + auto possible_assignments = state_->LegalActions(player); + possible_assignments.push_back(PlayerPrivate::kUnassigned); + for (auto asignment : possible_assignments) { + absl::StrAppend(&str, "Player ", player); + if (player == current_player) { + absl::StrAppend(&str, " ", AssignmentToString(player, asignment), ":"); + } else { + absl::StrAppend(&str, " possible:"); + } + bool found = false; + for (int pvt = 0; pvt < privates_[player].assignments.size(); ++pvt) { + if (privates_[player].assignments[pvt] == asignment) { + absl::StrAppend(&str, " ", privates_[player].names[pvt]); + found = true; + } + } + if (!found) absl::StrAppend(&str, " none"); + absl::StrAppend(&str, "\n"); + } + } + return str; +} + +// For debug purposes only. This reveals the state of the underlying game, which +// should be hidden from the player in the 1p game. +std::string CoopTo1pState::ToString() const { + return absl::StrCat(state_->ToString(), "\n", Assignments()); +} + +// The relevant public Markov state of the underlying game (i.e. the last action +// if any). +std::string CoopTo1pState::PublicStateString() const { + if (prev_action_ == kInvalidAction) { + return "New Game"; + } else { + return state_->ActionToString(prev_player_, prev_action_); + } +} + +// Represents a decision point; contains the last action (if any) in the +// underlying game and the current valid hands and their assignments. +std::string CoopTo1pState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return absl::StrCat("Player ", player, "\n", PublicStateString(), "\n", + Assignments()); +} + +void CoopTo1pState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + const int num_actions = state_->NumDistinctActions(); + const int num_players = state_->NumPlayers(); + SPIEL_CHECK_EQ(values.size(), + num_privates_ * (num_players + num_actions + 1) + num_actions); + std::fill(values.begin(), values.end(), 0); + if (IsChanceNode()) return; + + // Last action in the underlying game + int base = 0; + if (prev_action_ != kInvalidAction) values.at(prev_action_) = 1; + base += num_actions; + + // Possible privates for every player (multi-hot) + for (int p = 0; p < num_players; ++p) { + const auto& pvt = privates_[p]; + for (int i = 0; i < num_privates_; ++i) { + values.at(base + i) = (pvt.assignments[i] != PlayerPrivate::kImpossible); + } + base += num_privates_; + } + + // For terminal states, we don't need anything else. + if (state_->IsTerminal()) return; + + // Currently-assigned privates for every action (multi-hot) + Player current_player = state_->CurrentPlayer(); + const auto& pvt = privates_[current_player]; + for (Action a = 0; a < num_actions; ++a) { + for (int i = 0; i < num_privates_; ++i) { + values.at(base + i) = (pvt.assignments[i] == a); + } + base += num_privates_; + } + + // The private we are currently considering (one-hot) + if (!pvt.AssignmentsComplete()) values.at(base + pvt.next_unassigned) = 1; + base += num_privates_; +} + +void CoopTo1pState::DoApplyAction(Action action_id) { + if (IsChanceNode()) { + // Assume this is the dealing of a private state. Capture info on possible + // privates here. + privates_.push_back(PlayerPrivate(num_privates_)); + actual_private_.push_back(action_id); + for (int i = 0; i < num_privates_; ++i) { + privates_.back().names[i] = state_->ActionToString(kChancePlayerId, i); + } + state_->ApplyAction(action_id); + } else { + // Update the assignment and maybe act in the underlying game. + Player player = state_->CurrentPlayer(); + privates_[player].Assign(action_id); + if (privates_[player].AssignmentsComplete()) { + Action underlying_action = + privates_[player].assignments[actual_private_[player]]; + state_->ApplyAction(underlying_action); + prev_player_ = player; + prev_action_ = underlying_action; + privates_[player].Reset(underlying_action); + } + } +} + +std::vector CoopTo1pGame::ObservationTensorShape() const { + // State of the underlying game (represented as the last action) + // Possible privates for every player (multi-hot) + // Currently-assigned privates for every action (multi-hot) + // The private we are currently considering (one-hot) + const int num_actions = game_->NumDistinctActions(); + const int num_players = game_->NumPlayers(); + return {NumPrivates() * (num_players + num_actions + 1) + num_actions}; +} + +int CoopTo1pGame::MaxGameLength() const { + // Every choice is potentially duplicated for every private state. + return game_->MaxGameLength() * NumPrivates(); +} + +} // namespace coop_to_1p +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/coop_to_1p.h b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/coop_to_1p.h new file mode 100644 index 0000000..65e2c36 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/coop_to_1p.h @@ -0,0 +1,186 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAME_TRANSFORMS_COOP_TO_1P_H_ +#define OPEN_SPIEL_GAME_TRANSFORMS_COOP_TO_1P_H_ + +#include + +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// Transforms a co-operative game into a 1-player environment, in which +// actions build a policy in the underlying game. +// +// We make very strong assumptions about the underlying game: +// - The initial num_players actions must be chance actions, one per player. +// These are assumed to map 1:1 to the private state for that player. +// - The public state of the game is determined solely by the last non-chance +// action. +// +// These assumptions hold for tiny_hanabi and tiny_bridge_2p, but are unlikely +// to hold in other games. + +namespace open_spiel { +namespace coop_to_1p { + +// Information we have about each player's private state. +struct PlayerPrivate { + // Each private state may either - have a valid action assigned, be waiting + // for an action assignment, or have been ruled out by prior play. + static inline constexpr Action kImpossible = -100; + static inline constexpr Action kUnassigned = -99; + std::vector assignments; + int next_unassigned; // index into assignments + + // Name of each private state. + std::vector names; + + PlayerPrivate(int num_privates) + : assignments(num_privates, kUnassigned), + next_unassigned(0), + names(num_privates) {} + + // Assign the next unassigned private. + void Assign(Action action) { + assignments[next_unassigned++] = action; + while (next_unassigned < assignments.size() && + assignments[next_unassigned] != kUnassigned) + ++next_unassigned; + } + + // Have all assignments been made? + bool AssignmentsComplete() const { + return next_unassigned == assignments.size(); + } + + // Reset assignments for the next action choice. + void Reset(Action action) { + next_unassigned = assignments.size(); + for (int i = 0; i < assignments.size(); ++i) { + if (assignments[i] == action) { + if (next_unassigned == assignments.size()) next_unassigned = i; + assignments[i] = kUnassigned; + } else { + assignments[i] = kImpossible; + } + } + } +}; + +// This is a single player game. +inline constexpr Player kPlayerId = 0; + +// The state is mostly a wrapper over the imperfect information state. +class CoopTo1pState : public State { + public: + CoopTo1pState(std::shared_ptr game, int num_privates, + std::unique_ptr state) + : State(game), + state_(std::move(state)), + num_privates_(num_privates), + prev_player_(kInvalidPlayer), + prev_action_(kInvalidAction) {} + CoopTo1pState(const CoopTo1pState& other) + : State(other), + state_(other.state_->Clone()), + num_privates_(other.num_privates_), + privates_(other.privates_), + actual_private_(other.actual_private_), + prev_player_(other.prev_player_), + prev_action_(other.prev_action_) {} + Player CurrentPlayer() const override { + Player underlying_player = state_->CurrentPlayer(); + return underlying_player < 0 ? underlying_player : kPlayerId; + } + std::vector LegalActions(Player player) const override { + if (player == CurrentPlayer()) + return LegalActions(); + else + return {}; + } + std::vector LegalActions() const override { + return state_->LegalActions(state_->CurrentPlayer()); + } + std::vector LegalActionsMask() const { + return state_->LegalActionsMask(state_->CurrentPlayer()); + } + bool IsTerminal() const override { return state_->IsTerminal(); } + std::vector Rewards() const override { + return {state_->Rewards().front()}; + } + std::vector Returns() const override { + return {state_->Returns().front()}; + } + std::unique_ptr Clone() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + ActionsAndProbs ChanceOutcomes() const override { + return state_->ChanceOutcomes(); + } + std::vector LegalChanceOutcomes() const override { + return state_->LegalChanceOutcomes(); + } + + protected: + void DoApplyAction(Action action_id) override; + + private: + std::unique_ptr state_; + int num_privates_; + std::vector privates_; + std::vector actual_private_; + Player prev_player_; + Action prev_action_; + + std::string Assignments() const; + std::string PublicStateString() const; + std::string AssignmentToString(Player player, Action assignment) const; +}; + +class CoopTo1pGame : public Game { + public: + CoopTo1pGame(std::shared_ptr game, GameType game_type, + GameParameters game_parameters); + std::unique_ptr NewInitialState() const override; + int NumPlayers() const override { return 1; } + std::vector ObservationTensorShape() const override; + int MaxGameLength() const override; + int MaxChanceNodesInHistory() const override { + return game_->MaxGameLength(); + } + + int NumDistinctActions() const override { + return game_->NumDistinctActions(); + } + int MaxChanceOutcomes() const override { return game_->MaxChanceOutcomes(); } + double MinUtility() const override { return game_->MinUtility(); } + double MaxUtility() const override { return game_->MaxUtility(); } + absl::optional UtilitySum() const override { + return game_->UtilitySum(); + } + + private: + std::shared_ptr game_; + int NumPrivates() const { return game_->MaxChanceOutcomes(); } +}; + +} // namespace coop_to_1p +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAME_TRANSFORMS_COOP_TO_1P_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/coop_to_1p_test.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/coop_to_1p_test.cc new file mode 100644 index 0000000..30a3f52 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/coop_to_1p_test.cc @@ -0,0 +1,36 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/coop_to_1p.h" + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace coop_to_1p { +namespace { + +namespace testing = open_spiel::testing; + +void BasicTests() { + testing::LoadGameTest("coop_to_1p(game=tiny_hanabi())"); + testing::RandomSimTest(*LoadGame("coop_to_1p(game=tiny_hanabi())"), + 100); +} + +} // namespace +} // namespace coop_to_1p +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::coop_to_1p::BasicTests(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/efg_writer.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/efg_writer.cc new file mode 100644 index 0000000..19a4c87 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/efg_writer.cc @@ -0,0 +1,112 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/efg_writer.h" + +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +namespace open_spiel { + +EFGWriter::EFGWriter(const Game& game, const std::string filename, + bool action_names, bool separate_infostate_numbers) + : game_(game), + filename_(filename), + action_names_(action_names), + separate_infostate_numbers_(separate_infostate_numbers), + // Node indices start at 1. + chance_node_counter_(1), + terminal_node_counter_(1) { + const auto& info = game_.GetType(); + SPIEL_CHECK_EQ(info.dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(info.reward_model, GameType::RewardModel::kTerminal); + SPIEL_CHECK_NE(info.chance_mode, GameType::ChanceMode::kSampledStochastic); +} + +void EFGWriter::Write() { + std::ofstream efg_file(filename_); + efg_file << "EFG 2 R"; + GameParameters params = game_.GetParameters(); + efg_file << " \"" << game_.ToString() << "\" { "; + for (int i = 1; i <= game_.NumPlayers(); i++) { + // EFG player index starts at 1. + efg_file << '"' << "Player " << i << "\" "; + infostate_numbers_.push_back(std::map()); + } + efg_file << "}\n"; + + // Get the root state. + Write(efg_file, *game_.NewInitialState()); + efg_file.close(); +} + +void EFGWriter::Write(std::ostream& f, const State& state) { + if (state.IsTerminal()) { + f << "t \"\" "; + f << terminal_node_counter_; + terminal_node_counter_++; + f << " \"\" "; + f << "{ "; + for (auto r : state.Returns()) { + f << r << " "; + } + f << "}\n"; + return; + } else if (state.IsChanceNode()) { + f << "c \"\" "; + f << chance_node_counter_; + chance_node_counter_++; + f << " \"\" "; + f << "{ "; + for (auto action_and_probs : state.ChanceOutcomes()) { + if (action_names_) { + f << '"' << state.ActionToString(action_and_probs.first) << "\" "; + } else { + f << '"' << action_and_probs.first << "\" "; + } + f << std::setprecision(10) << action_and_probs.second << " "; + } + f << "} 0\n"; + } else { + int p = state.CurrentPlayer(); + f << "p \"\" " << p + 1 << " "; // EFG player index starts at 1. + + std::string key = state.InformationStateString(); + int idx = state.CurrentPlayer(); + if (!separate_infostate_numbers_) idx = 0; // Only use one map. + + if (infostate_numbers_[idx].find(key) == infostate_numbers_[idx].end()) { + infostate_numbers_[idx][key] = infostate_numbers_[idx].size(); + } + f << infostate_numbers_[idx][key] + 1; // Infostate numbering starts at 1. + f << " \"\" { "; + for (auto action : state.LegalActions()) { + if (action_names_) { + f << '"' << state.ActionToString(action) << "\" "; + } else { + f << '"' << action << "\" "; + } + } + f << "} 0\n"; + } + for (auto action : state.LegalActions()) { + Write(f, *state.Child(action)); + } +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/efg_writer.h b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/efg_writer.h new file mode 100644 index 0000000..b236842 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/efg_writer.h @@ -0,0 +1,58 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAME_TRANSFORMS_EFG_WRITER_H_ +#define OPEN_SPIEL_GAME_TRANSFORMS_EFG_WRITER_H_ + +#include +#include +#include + +#include "open_spiel/spiel.h" + +namespace open_spiel { + +// Takes an OpenSpiel game and converts it to the .efg format used by Gambit: +// http://www.gambit-project.org/gambit14/formats.html +// +// USE WITH CAUTION! For small games only. This could easily fill up disk +// space for large games. +// +// Note: Currently only supports sequential games and terminal rewards. + +class EFGWriter { + public: + EFGWriter(const Game& game, const std::string filename, + bool action_names = true, bool separate_infostate_numbers = true); + void Write(); + + private: + const Game& game_; + const std::string filename_; + // Use descriptive action names. If false, action ints are used. + bool action_names_; + // Keep track of infostate numbers for each player separately. In general, + // the same integer will specify different information sets for different + // players. + bool separate_infostate_numbers_; + int chance_node_counter_; + int terminal_node_counter_; + std::vector> infostate_numbers_; + + void Write(std::ostream& f, const State& state); +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAME_TRANSFORMS_EFG_WRITER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/efg_writer_test.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/efg_writer_test.cc new file mode 100644 index 0000000..c433c5e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/efg_writer_test.cc @@ -0,0 +1,63 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "open_spiel/game_transforms/efg_writer.h" + +#include +#include +#include + + +#include "open_spiel/game_transforms/turn_based_simultaneous_game.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" +#include "open_spiel/utils/file.h" + +namespace open_spiel { + +void WriteAndLoadGame(std::string game_name) { + std::string filename = + absl::StrCat(file::GetTmpDir(), "/open_spiel_test_", game_name, ".efg"); + std::shared_ptr base_game = LoadGame(game_name); + + EFGWriter(*base_game, filename).Write(); + + std::shared_ptr efg_game = + LoadGame("efg_game", {{"filename", GameParameter(filename)}}); + SPIEL_CHECK_TRUE(efg_game != nullptr); + GameType base_game_type = base_game->GetType(); + GameType efg_game_type = efg_game->GetType(); + SPIEL_CHECK_EQ(efg_game_type.dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(efg_game_type.information, base_game_type.information); + SPIEL_CHECK_EQ(efg_game_type.utility, base_game_type.utility); + SPIEL_CHECK_EQ(efg_game_type.chance_mode, base_game_type.chance_mode); + SPIEL_CHECK_EQ(efg_game->NumDistinctActions(), + base_game->NumDistinctActions()); + // TODO(author11) More comprehensive tests that the games are the same. + testing::RandomSimTestNoSerialize(*efg_game, 100); + + SPIEL_CHECK_TRUE(file::Exists(filename)); + SPIEL_CHECK_TRUE(file::Remove(filename)); + SPIEL_CHECK_FALSE(file::Remove(filename)); // Already gone + SPIEL_CHECK_FALSE(file::Exists(filename)); +} + +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::WriteAndLoadGame("kuhn_poker"); + open_spiel::WriteAndLoadGame("leduc_poker"); + open_spiel::WriteAndLoadGame("liars_dice"); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/game_wrapper.h b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/game_wrapper.h new file mode 100644 index 0000000..9dadaf2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/game_wrapper.h @@ -0,0 +1,169 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAME_TRANSFORMS_GAME_WRAPPER_H_ +#define OPEN_SPIEL_GAME_TRANSFORMS_GAME_WRAPPER_H_ + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/spiel_globals.h" + +// Wraps a game, forwarding everything to the original implementation. +// Transforms can inherit from this, overriding only what they need. + +namespace open_spiel { + +class WrappedState : public State { + public: + WrappedState(std::shared_ptr game, std::unique_ptr state) + : State(game), state_(std::move(state)) {} + WrappedState(const WrappedState& other) + : State(other), state_(other.state_->Clone()) {} + + Player CurrentPlayer() const override { return state_->CurrentPlayer(); } + + std::vector LegalActions(Player player) const override { + return state_->LegalActions(player); + } + + std::vector LegalActions() const override { + return state_->LegalActions(); + } + + std::string ActionToString(Player player, Action action_id) const override { + return state_->ActionToString(player, action_id); + } + + std::string ToString() const override { return state_->ToString(); } + + bool IsTerminal() const override { return state_->IsTerminal(); } + + std::vector Rewards() const override { return state_->Rewards(); } + + std::vector Returns() const override { return state_->Returns(); } + + std::string InformationStateString(Player player) const override { + return state_->InformationStateString(player); + } + + void InformationStateTensor(Player player, + absl::Span values) const override { + state_->InformationStateTensor(player, values); + } + + std::string ObservationString(Player player) const override { + return state_->ObservationString(player); + } + + void ObservationTensor(Player player, + absl::Span values) const override { + state_->ObservationTensor(player, values); + } + + std::unique_ptr Clone() const override = 0; + + void UndoAction(Player player, Action action) override { + state_->UndoAction(player, action); + history_.pop_back(); + } + + ActionsAndProbs ChanceOutcomes() const override { + return state_->ChanceOutcomes(); + } + + std::vector LegalChanceOutcomes() const override { + return state_->LegalChanceOutcomes(); + } + + virtual const State& GetWrappedState() const { return *state_; } + + std::vector ActionsConsistentWithInformationFrom( + Action action) const override { + return state_->ActionsConsistentWithInformationFrom(action); + } + + protected: + // Another copy constructor usable by subclasses. Currently used by the cached + // tree game wrapper. + WrappedState(const WrappedState& other, std::unique_ptr state) + : State(other), state_(std::move(state)) {} + + void DoApplyAction(Action action_id) override { + state_->ApplyAction(action_id); + } + + void DoApplyActions(const std::vector& actions) override { + state_->ApplyActions(actions); + } + + std::unique_ptr state_; +}; + +class WrappedGame : public Game { + public: + WrappedGame(std::shared_ptr game, GameType game_type, + GameParameters game_parameters) + : Game(game_type, game_parameters), game_(game) {} + + int NumDistinctActions() const override { + return game_->NumDistinctActions(); + } + + std::unique_ptr NewInitialState() const override = 0; + + int MaxChanceOutcomes() const override { return game_->MaxChanceOutcomes(); } + int NumPlayers() const override { return game_->NumPlayers(); } + double MinUtility() const override { return game_->MinUtility(); } + double MaxUtility() const override { return game_->MaxUtility(); } + absl::optional UtilitySum() const override { + return game_->UtilitySum(); + } + + std::vector InformationStateTensorShape() const override { + return game_->InformationStateTensorShape(); + } + + std::vector ObservationTensorShape() const override { + return game_->ObservationTensorShape(); + } + + TensorLayout InformationStateTensorLayout() const override { + return game_->InformationStateTensorLayout(); + } + TensorLayout ObservationTensorLayout() const override { + return game_->ObservationTensorLayout(); + } + std::vector PolicyTensorShape() const override { + return game_->PolicyTensorShape(); + } + int MaxGameLength() const override { return game_->MaxGameLength(); } + int MaxChanceNodesInHistory() const override { + return game_->MaxChanceNodesInHistory(); + } + + protected: + std::shared_ptr game_; +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAME_TRANSFORMS_GAME_WRAPPER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/misere.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/misere.cc new file mode 100644 index 0000000..3fc533d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/misere.cc @@ -0,0 +1,67 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/misere.h" + +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace { + +// These parameters are the most-general case. The actual game may be simpler. +const GameType kGameType{/*short_name=*/"misere", + /*long_name=*/"Misere Version of a Regular Game", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kSampledStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/100, + /*min_num_players=*/1, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + {{"game", GameParameter(GameParameter::Type::kGame, + /*is_mandatory=*/true)}}, + /*default_loadable=*/false, + /*provides_factored_observation_string=*/true, + /*is_concrete=*/false}; + +GameType MisereGameType(GameType game_type) { + game_type.short_name = kGameType.short_name; + game_type.long_name = absl::StrCat("Misere ", game_type.long_name); + return game_type; +} + +std::shared_ptr Factory(const GameParameters& params) { + auto game = LoadGame(params.at("game").game_value()); + GameType game_type = MisereGameType(game->GetType()); + return std::shared_ptr(new MisereGame(game, game_type, params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +} // namespace + +MisereGame::MisereGame(std::shared_ptr game, GameType game_type, + GameParameters game_parameters) + : WrappedGame(game, game_type, game_parameters) {} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/misere.h b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/misere.h new file mode 100644 index 0000000..df89f90 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/misere.h @@ -0,0 +1,76 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAME_TRANSFORMS_MISERE_H_ +#define OPEN_SPIEL_GAME_TRANSFORMS_MISERE_H_ + +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// Transforms a game into its Misere version by inverting the sign of the +// rewards / utilities. This is a self-inverse operation. +// https://en.wikipedia.org/wiki/Mis%C3%A8re + +namespace open_spiel { + +// Flips the sign of a vector. +inline std::vector Negative(std::vector&& vector) { + std::vector neg = std::move(vector); + for (auto& item : neg) item = -item; + return neg; +} + +class MisereState : public WrappedState { + public: + MisereState(std::shared_ptr game, std::unique_ptr state) + : WrappedState(game, std::move(state)) {} + MisereState(const MisereState& other) = default; + + std::vector Rewards() const override { + return Negative(state_->Rewards()); + } + + std::vector Returns() const override { + return Negative(state_->Returns()); + } + + std::unique_ptr Clone() const override { + return std::unique_ptr(new MisereState(*this)); + } +}; + +class MisereGame : public WrappedGame { + public: + MisereGame(std::shared_ptr game, GameType game_type, + GameParameters game_parameters); + MisereGame(const MisereGame& other) = default; + + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new MisereState(shared_from_this(), game_->NewInitialState())); + } + + double MinUtility() const override { return -game_->MaxUtility(); } + double MaxUtility() const override { return -game_->MinUtility(); } + absl::optional UtilitySum() const override { + auto base_game_utility_sum = game_->UtilitySum(); + return !base_game_utility_sum.has_value() ? base_game_utility_sum + : -base_game_utility_sum.value(); + } +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAME_TRANSFORMS_MISERE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/misere_test.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/misere_test.cc new file mode 100644 index 0000000..c0ee6a2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/misere_test.cc @@ -0,0 +1,36 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/misere.h" + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace misere { +namespace { + +namespace testing = open_spiel::testing; + +void BasicMisereTests() { + testing::LoadGameTest("misere(game=kuhn_poker())"); + testing::NoChanceOutcomesTest(*LoadGame("misere(game=tic_tac_toe())")); + testing::RandomSimTest(*LoadGame("misere(game=leduc_poker())"), 100); +} + +} // namespace +} // namespace misere +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::misere::BasicMisereTests(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/normal_form_extensive_game.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/normal_form_extensive_game.cc new file mode 100644 index 0000000..d970e11 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/normal_form_extensive_game.cc @@ -0,0 +1,107 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/normal_form_extensive_game.h" + +#include +#include +#include + +#include "open_spiel/algorithms/deterministic_policy.h" +#include "open_spiel/algorithms/expected_returns.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tensor_game.h" + +namespace open_spiel { + +using open_spiel::tensor_game::TensorGame; + +// These parameters reflect the most-general game, with the maximum +// API coverage. The actual game may be simpler and might not provide +// all the interfaces. +// This is used as a placeholder for game registration. The actual instantiated +// game will have more accurate information. +const GameType kGameType{ + /*short_name=*/"normal_form_extensive_game", + /*long_name=*/"Normal-Form Version of an Extensive Game", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kOneShot, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/100, + /*min_num_players=*/1, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + {{"game", + GameParameter(GameParameter::Type::kGame, /*is_mandatory=*/true)}}, + /*default_loadable=*/false, + /*provides_factored_observation_string=*/true, + /*is_concrete=*/false}; + +std::shared_ptr Factory(const GameParameters& params) { + return ExtensiveToTensorGame(*LoadGame(params.at("game").game_value())); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +std::shared_ptr ExtensiveToTensorGame(const Game& game) { + std::vector> action_names(game.NumPlayers()); + + GameType type = game.GetType(); + + std::vector policies; + for (Player player = 0; player < game.NumPlayers(); ++player) { + algorithms::DeterministicTabularPolicy policy(game, player); + do { + action_names[player].push_back(policy.ToString(/*delimiter=*/" --- ")); + } while (policy.NextPolicy()); + policy.ResetDefaultPolicy(); + policies.push_back(policy); + } + std::vector policy_ptrs(policies.size()); + for (Player player = 0; player < game.NumPlayers(); ++player) { + policy_ptrs[player] = &policies[player]; + } + const std::unique_ptr initial_state = game.NewInitialState(); + std::vector> utils(game.NumPlayers()); + bool last_entry; + do { + std::vector returns = algorithms::ExpectedReturns( + *initial_state, policy_ptrs, /*depth_limit=*/-1); + for (Player player = 0; player < game.NumPlayers(); ++player) { + utils[player].push_back(returns[player]); + } + last_entry = true; + for (auto policy = policies.rbegin(); policy != policies.rend(); ++policy) { + if (policy->NextPolicy()) { + last_entry = false; + break; + } else { + policy->ResetDefaultPolicy(); + } + } + } while (!last_entry); + + return tensor_game::CreateTensorGame(kGameType.short_name, + "Normal-form " + type.long_name, + action_names, utils); +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/normal_form_extensive_game.h b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/normal_form_extensive_game.h new file mode 100644 index 0000000..273f958 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/normal_form_extensive_game.h @@ -0,0 +1,46 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAME_TRANSFORMS_NORMAL_FORM_EXTENSIVE_GAME_H +#define OPEN_SPIEL_GAME_TRANSFORMS_NORMAL_FORM_EXTENSIVE_GAME_H + +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/tensor_game.h" + +namespace open_spiel { + +// Creates an extensive-form game (EFG)'s equivalent tensor game. +// +// Note that this tensor game will have a row (or axis-1 slice, etc) for each +// deterministic policy in the extensive-form game. As such, it will be +// exponentially larger than the extensive-form game. In particular, if S_i is +// number of information states for player i, and A(s_i) for s_i in S_i is the +// set of legal actions at s_i, then the number of deterministic policies is +// the product \Prod_{s_i in S_i) |A(s_i)|, and can include many redundant +// policies that differ, e.g., only in unreachable states. See Chapter 5 of +// (Shoham and Leyton-Brown, Multiagent Systems Algorithmic, Game-Theoretic, and +// Logical Foundations, 2009, http://masfoundations.org/) for more detail, +// including examples of the transformations. +// +// Hence, this method should only be used for small games! For example, Kuhn +// poker has 64 deterministic policies, resulting in a 64-by-64 matrix. + +std::shared_ptr ExtensiveToTensorGame( + const Game& game); + +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAME_TRANSFORMS_NORMAL_FORM_EXTENSIVE_GAME_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/normal_form_extensive_game_test.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/normal_form_extensive_game_test.cc new file mode 100644 index 0000000..60372db --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/normal_form_extensive_game_test.cc @@ -0,0 +1,36 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/normal_form_extensive_game.h" + +namespace open_spiel { +namespace { + +void ExtensiveToTensorGameTest() { + // This just does a conversion and checks the sizes. + std::shared_ptr auction_game = + LoadGame("first_sealed_auction(players=3,max_value=4)"); + std::shared_ptr auction_tensor_game = + ExtensiveToTensorGame(*auction_game); + SPIEL_CHECK_EQ(auction_tensor_game->Shape()[0], 24); + SPIEL_CHECK_EQ(auction_tensor_game->Shape()[1], 24); + SPIEL_CHECK_EQ(auction_tensor_game->Shape()[2], 24); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::ExtensiveToTensorGameTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/repeated_game.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/repeated_game.cc new file mode 100644 index 0000000..f137995 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/repeated_game.cc @@ -0,0 +1,313 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/repeated_game.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/simultaneous_move_game.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { + +constexpr bool kDefaultEnableInformationState = false; +constexpr int kDefaultRecall = 1; + +// These parameters represent the most general case. Game specific params are +// parsed once the actual stage game is supplied. +const GameType kGameType{ + /*short_name=*/"repeated_game", + /*long_name=*/"Repeated Normal-Form Game", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/100, + /*min_num_players=*/1, + /*provides_information_state_string=*/kDefaultEnableInformationState, + /*provides_information_state_tensor=*/kDefaultEnableInformationState, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"stage_game", + GameParameter(GameParameter::Type::kGame, /*is_mandatory=*/true)}, + {"num_repetitions", + GameParameter(GameParameter::Type::kInt, /*is_mandatory=*/true)}, + {"recall", GameParameter(kDefaultRecall)}}, + /*default_loadable=*/false, + /*provides_factored_observation_string=*/false, + /*is_concrete=*/false}; + +std::shared_ptr Factory(const GameParameters& params) { + return CreateRepeatedGame(*LoadGame(params.at("stage_game").game_value()), + params); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +} // namespace + +RepeatedState::RepeatedState(std::shared_ptr game, + std::shared_ptr stage_game, + int num_repetitions, + int recall) + : SimMoveState(game), + stage_game_(stage_game), + stage_game_state_(stage_game->NewInitialState()), + num_repetitions_(num_repetitions), + recall_(recall) { + actions_history_.reserve(num_repetitions_); + rewards_history_.reserve(num_repetitions_); +} + +void RepeatedState::DoApplyActions(const std::vector& actions) { + SPIEL_CHECK_EQ(actions.size(), num_players_); + // Faster to clone the reference stage_game_state_ than call + // game_->NewInitialState(). + std::unique_ptr stage_game_state = stage_game_state_->Clone(); + stage_game_state->ApplyActions(actions); + SPIEL_CHECK_TRUE(stage_game_state->IsTerminal()); + actions_history_.push_back(actions); + rewards_history_.push_back(stage_game_state->Returns()); +} + +std::string RepeatedState::ToString() const { + std::string rv; + for (int i = 0; i < actions_history_.size(); ++i) { + absl::StrAppend(&rv, "Round ", i, ":\n"); + absl::StrAppend(&rv, "Actions: "); + for (int j = 0; j < num_players_; ++j) { + absl::StrAppend( + &rv, stage_game_state_->ActionToString(j, actions_history_[i][j]), + " "); + } + absl::StrAppend(&rv, "\n"); + absl::StrAppend(&rv, "Rewards: "); + for (auto player_reward : rewards_history_[i]) + absl::StrAppend(&rv, player_reward, " "); + absl::StrAppend(&rv, "\n"); + } + absl::StrAppend(&rv, "Total Returns: "); + for (auto player_return : Returns()) absl::StrAppend(&rv, player_return, " "); + return rv; +} + +bool RepeatedState::IsTerminal() const { + return actions_history_.size() == num_repetitions_; +} + +std::vector RepeatedState::Rewards() const { + return rewards_history_.empty() ? std::vector(num_players_, 0.0) + : rewards_history_.back(); +} + +std::vector RepeatedState::Returns() const { + std::vector returns(num_players_, 0.0); + for (auto rewards : rewards_history_) { + for (int i = 0; i < rewards.size(); ++i) { + returns[i] += rewards[i]; + } + } + return returns; +} + +std::string RepeatedState::InformationStateString(Player /*player*/) const { + std::string rv; + if (actions_history_.empty()) return rv; + for (int j = 0; j < actions_history_.size(); ++j) { + for (int i = 0; i < num_players_; ++i) { + absl::StrAppend( + &rv, stage_game_state_->ActionToString(i, actions_history_[j][i]), + " "); + } + absl::StrAppend(&rv, ";"); + } + return rv; +} + +std::string RepeatedState::ObservationString(Player /*player*/) const { + std::string rv; + if (actions_history_.empty()) { return rv; } + + // Starting from the back of the history, show each player's moves: + for (int j = 0; + j < recall_ && static_cast(actions_history_.size()) - 1 - j >= 0; + ++j) { + int hist_idx = actions_history_.size() - 1 - j; + SPIEL_CHECK_GE(hist_idx, 0); + SPIEL_CHECK_LT(hist_idx, actions_history_.size()); + for (int i = 0; i < num_players_; ++i) { + absl::StrAppend(&rv, + stage_game_state_->ActionToString(i, actions_history_[hist_idx][i]), + " "); + } + } + return rv; +} + +void RepeatedState::InformationStateTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + SPIEL_CHECK_EQ(values.size(), game_->InformationStateTensorSize()); + std::fill(values.begin(), values.end(), 0.0); + if (actions_history_.empty()) return; + + auto ptr = values.begin(); + for (int j = 0; j < actions_history_.size(); ++j) { + for (int i = 0; i < num_players_; ++i) { + ptr[actions_history_[j][i]] = 1; + ptr += stage_game_state_->LegalActions(i).size(); + } + } +} + +void RepeatedState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + SPIEL_CHECK_EQ(values.size(), game_->ObservationTensorSize()); + std::fill(values.begin(), values.end(), 0.0); + if (actions_history_.empty()) return; + + auto ptr = values.begin(); + // Starting from the back of the history, show each player's moves: + for (int j = 0; + j < recall_ && static_cast(actions_history_.size()) - 1 - j >= 0; + j++) { + int hist_idx = static_cast(actions_history_.size()) - 1 - j; + SPIEL_CHECK_GE(hist_idx, 0); + SPIEL_CHECK_LT(hist_idx, actions_history_.size()); + for (int i = 0; i < num_players_; ++i) { + ptr[actions_history_[hist_idx][i]] = 1; + ptr += stage_game_state_->LegalActions(i).size(); + } + } + + SPIEL_CHECK_LE(ptr, values.end()); +} + +void RepeatedState::ObliviousObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + SPIEL_CHECK_EQ(values.size(), game_->ObservationTensorSize()); + std::fill(values.begin(), values.end(), 1.0); + if (actions_history_.empty()) return; +} + +std::vector RepeatedState::LegalActions(Player player) const { + if (IsTerminal()) return {}; + return stage_game_state_->LegalActions(player); +} + +std::string RepeatedState::ActionToString(Player player, + Action action_id) const { + return stage_game_state_->ActionToString(player, action_id); +} + +std::unique_ptr RepeatedState::Clone() const { + return std::unique_ptr(new RepeatedState(*this)); +} + +namespace { +GameType ConvertType(GameType type, bool enable_infostate) { + type.short_name = kGameType.short_name; + type.long_name = "Repeated " + type.long_name; + type.dynamics = kGameType.dynamics; + type.information = kGameType.information; + type.reward_model = kGameType.reward_model; + type.parameter_specification = kGameType.parameter_specification; + type.provides_information_state_string = enable_infostate; + type.provides_information_state_tensor = enable_infostate; + type.provides_observation_string = true; + type.provides_observation_tensor = true; + return type; +} +} // namespace + +RepeatedGame::RepeatedGame(std::shared_ptr stage_game, + const GameParameters& params) + : SimMoveGame( + ConvertType( + stage_game->GetType(), + open_spiel::ParameterValue( + params, "enable_infostate", + absl::optional(kDefaultEnableInformationState))), + params), + stage_game_(stage_game), + num_repetitions_(ParameterValue("num_repetitions")), + recall_(ParameterValue("recall", kDefaultRecall)) { + SPIEL_CHECK_GE(recall_, 1); +} + +std::shared_ptr CreateRepeatedGame(const Game& stage_game, + const GameParameters& params) { + // The stage game must be a deterministic normal-form (one-shot) game. + SPIEL_CHECK_EQ(stage_game.MaxGameLength(), 1); + SPIEL_CHECK_EQ(stage_game.GetType().dynamics, + GameType::Dynamics::kSimultaneous); + SPIEL_CHECK_EQ(stage_game.GetType().chance_mode, + GameType::ChanceMode::kDeterministic); + return std::make_shared(stage_game.shared_from_this(), + params); +} + +std::shared_ptr CreateRepeatedGame( + const std::string& stage_game_name, const GameParameters& params) { + auto game = LoadGame(stage_game_name); + // The stage game must be a deterministic normal-form (one-shot) game. + SPIEL_CHECK_EQ(game->MaxGameLength(), 1); + SPIEL_CHECK_EQ(game->GetType().dynamics, GameType::Dynamics::kSimultaneous); + SPIEL_CHECK_EQ(game->GetType().chance_mode, + GameType::ChanceMode::kDeterministic); + return CreateRepeatedGame(*game, params); +} + +std::unique_ptr RepeatedGame::NewInitialState() const { + return std::unique_ptr( + new RepeatedState(shared_from_this(), stage_game_, + num_repetitions_, recall_)); +} + +std::vector RepeatedGame::InformationStateTensorShape() const { + int player_actions_size = 0; + for (int i = 0; i < NumPlayers(); ++i) { + player_actions_size += + stage_game_->NewInitialState()->LegalActions(i).size(); + } + return {num_repetitions_ * player_actions_size}; +} + +std::vector RepeatedGame::ObservationTensorShape() const { + int size = 0; + for (int i = 0; i < NumPlayers(); ++i) + size += recall_ * stage_game_->NewInitialState()->LegalActions(i).size(); + return {size}; +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/repeated_game.h b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/repeated_game.h new file mode 100644 index 0000000..709f3e3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/repeated_game.h @@ -0,0 +1,121 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAME_TRANSFORMS_REPEATED_GAME_H_ +#define OPEN_SPIEL_GAME_TRANSFORMS_REPEATED_GAME_H_ + +#include +#include +#include + +#include "open_spiel/simultaneous_move_game.h" +#include "open_spiel/spiel.h" + +// Transform for creating a repeated game from a normal-form game. +// https://en.wikipedia.org/wiki/Repeated_game. +// +// Parameters: +// "enable_infostate" bool Enable the sequence of round outcomes as the +// information state tensor and string (default: +// false). +// "stage_game" game The game that will be repeated. +// "num_repetitions" int Number of times that the game is repeated. +// "recall" int Number of previous steps that defines the +// observations when enable_infostate is false +// (default: 1). + +namespace open_spiel { + +class RepeatedState : public SimMoveState { + public: + RepeatedState(std::shared_ptr game, + std::shared_ptr stage_game, int num_repetitions, + int recall); + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : kSimultaneousPlayerId; + } + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Rewards() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions(Player player) const override; + + protected: + void DoApplyActions(const std::vector& actions) override; + + private: + void ObliviousObservationTensor(Player player, + absl::Span values) const; + + std::shared_ptr stage_game_; + // Store a reference initial state of the stage game for efficient calls + // to state functions (e.g. LegalActions()). + std::shared_ptr stage_game_state_; + int num_repetitions_; + int recall_; + std::vector> actions_history_{}; + std::vector> rewards_history_{}; +}; + +class RepeatedGame : public SimMoveGame { + public: + RepeatedGame(std::shared_ptr stage_game, + const GameParameters& params); + std::unique_ptr NewInitialState() const override; + int MaxGameLength() const override { return num_repetitions_; } + int NumPlayers() const override { return stage_game_->NumPlayers(); } + int NumDistinctActions() const override { + return stage_game_->NumDistinctActions(); + } + double MinUtility() const override { + return stage_game_->MinUtility() * num_repetitions_; + } + double MaxUtility() const override { + return stage_game_->MaxUtility() * num_repetitions_; + } + absl::optional UtilitySum() const override { + auto per_stage_utility_sum = stage_game_->UtilitySum(); + return !per_stage_utility_sum.has_value() + ? per_stage_utility_sum + : per_stage_utility_sum.value() * num_repetitions_; + } + std::vector InformationStateTensorShape() const override; + std::vector ObservationTensorShape() const override; + + const Game* StageGame() const { return stage_game_.get(); } + + private: + std::shared_ptr stage_game_; + const int num_repetitions_; + const int recall_; +}; + +// Creates a repeated game based on the stage game. +std::shared_ptr CreateRepeatedGame(const Game& stage_game, + const GameParameters& params); +std::shared_ptr CreateRepeatedGame( + const std::string& stage_game_name, const GameParameters& params); + +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAME_TRANSFORMS_REPEATED_GAME_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/repeated_game_test.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/repeated_game_test.cc new file mode 100644 index 0000000..aeb3e05 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/repeated_game_test.cc @@ -0,0 +1,218 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/repeated_game.h" + +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace { + +void BasicRepeatedGameTest() { + std::string game_string = + "repeated_game(stage_game=matrix_rps(),num_repetitions=10)"; + open_spiel::testing::LoadGameTest(game_string); + open_spiel::testing::NoChanceOutcomesTest(*LoadGame(game_string)); + open_spiel::testing::RandomSimTest(*LoadGame(game_string), 10); + // Test loading from a pre-loaded stage game. + std::shared_ptr stage_game = LoadGame("matrix_rps"); + GameParameters params; + params["num_repetitions"] = GameParameter(10); + std::shared_ptr repeated_game = + CreateRepeatedGame(*stage_game, params); + SPIEL_CHECK_TRUE(repeated_game != nullptr); + // Test loading from a stage game string. + repeated_game = CreateRepeatedGame("matrix_pd", params); + SPIEL_CHECK_TRUE(repeated_game != nullptr); +} + +void RepeatedRockPaperScissorsTest(std::shared_ptr repeated_game) { + std::unique_ptr state = repeated_game->NewInitialState(); + SPIEL_CHECK_EQ(state->LegalActions(0), state->LegalActions(1)); + SPIEL_CHECK_EQ(state->ActionToString(0, 0), "Rock"); + SPIEL_CHECK_EQ(state->ActionToString(0, 1), "Paper"); + SPIEL_CHECK_EQ(state->ActionToString(0, 2), "Scissors"); + SPIEL_CHECK_EQ(state->ActionToString(1, 0), "Rock"); + SPIEL_CHECK_EQ(state->ActionToString(1, 1), "Paper"); + SPIEL_CHECK_EQ(state->ActionToString(1, 2), "Scissors"); + + state->ApplyActions({0, 1}); + SPIEL_CHECK_EQ(state->PlayerReward(0), -1); + SPIEL_CHECK_EQ(state->PlayerReward(1), 1); + SPIEL_CHECK_EQ(state->ObservationString(), "Rock Paper "); + SPIEL_CHECK_TRUE(absl::c_equal(state->ObservationTensor(0), + std::vector{1, 0, 0, 0, 1, 0})); + state->ApplyActions({1, 0}); + SPIEL_CHECK_EQ(state->PlayerReward(0), 1); + SPIEL_CHECK_EQ(state->PlayerReward(1), -1); + SPIEL_CHECK_EQ(state->ObservationString(), "Paper Rock "); + SPIEL_CHECK_TRUE(absl::c_equal(state->ObservationTensor(0), + std::vector{0, 1, 0, 1, 0, 0})); + state->ApplyActions({2, 2}); + SPIEL_CHECK_EQ(state->PlayerReward(0), 0); + SPIEL_CHECK_EQ(state->PlayerReward(1), 0); + SPIEL_CHECK_EQ(state->ObservationString(), "Scissors Scissors "); + SPIEL_CHECK_TRUE(absl::c_equal(state->ObservationTensor(0), + std::vector{0, 0, 1, 0, 0, 1})); + SPIEL_CHECK_TRUE(state->IsTerminal()); +} + +void RepeatedRockPaperScissorsDefaultsTest() { + GameParameters params; + params["num_repetitions"] = GameParameter(3); + std::shared_ptr repeated_game = + CreateRepeatedGame("matrix_rps", params); + SPIEL_CHECK_EQ(repeated_game->GetType().max_num_players, 2); + SPIEL_CHECK_EQ(repeated_game->GetType().min_num_players, 2); + SPIEL_CHECK_EQ(repeated_game->GetType().utility, GameType::Utility::kZeroSum); + SPIEL_CHECK_EQ(repeated_game->GetType().reward_model, + GameType::RewardModel::kRewards); + SPIEL_CHECK_TRUE(repeated_game->GetType().provides_observation_tensor); + SPIEL_CHECK_FALSE(repeated_game->GetType().provides_information_state_tensor); + + // One-hot encoding of each player's previous action. + SPIEL_CHECK_EQ(repeated_game->ObservationTensorShape()[0], 6); + + RepeatedRockPaperScissorsTest(repeated_game); +} + +void RepeatedRockPaperScissorsRecallTwoTest() { + GameParameters params; + params["num_repetitions"] = GameParameter(1000); + params["recall"] = GameParameter(2); + std::shared_ptr repeated_game = + CreateRepeatedGame("matrix_rps", params); + SPIEL_CHECK_EQ(repeated_game->GetType().max_num_players, 2); + SPIEL_CHECK_EQ(repeated_game->GetType().min_num_players, 2); + SPIEL_CHECK_EQ(repeated_game->GetType().utility, GameType::Utility::kZeroSum); + SPIEL_CHECK_EQ(repeated_game->GetType().reward_model, + GameType::RewardModel::kRewards); + SPIEL_CHECK_TRUE(repeated_game->GetType().provides_observation_tensor); + SPIEL_CHECK_FALSE(repeated_game->GetType().provides_information_state_tensor); + + // One-hot encoding of each player's previous action. + SPIEL_CHECK_EQ(repeated_game->ObservationTensorShape()[0], 12); + + std::vector> observation_tensors = { + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // first + {1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0}, // second + {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0} // subsequent... + }; + std::vector observation_strings = { + "", // first observation + "Rock Rock ", // second + "Rock Rock Rock Rock " // subsequent... + }; + + std::unique_ptr state = repeated_game->NewInitialState(); + int step = 0; + while (!state->IsTerminal()) { + int obs_idx = std::min(step, 2); + SPIEL_CHECK_EQ(state->ObservationString(0), observation_strings[obs_idx]); + SPIEL_CHECK_EQ(state->ObservationString(1), observation_strings[obs_idx]); + SPIEL_CHECK_TRUE(absl::c_equal(state->ObservationTensor(0), + observation_tensors[obs_idx])); + SPIEL_CHECK_TRUE(absl::c_equal(state->ObservationTensor(1), + observation_tensors[obs_idx])); + state->ApplyActions({0, 0}); + step += 1; + } + + SPIEL_CHECK_EQ(step, 1000); +} + +void RepeatedRockPaperScissorsInfoStateEnabledTest() { + GameParameters params; + params["num_repetitions"] = GameParameter(3); + params["enable_infostate"] = GameParameter(true); + std::shared_ptr repeated_game = + CreateRepeatedGame("matrix_rps", params); + SPIEL_CHECK_EQ(repeated_game->GetType().max_num_players, 2); + SPIEL_CHECK_EQ(repeated_game->GetType().min_num_players, 2); + SPIEL_CHECK_EQ(repeated_game->GetType().utility, GameType::Utility::kZeroSum); + SPIEL_CHECK_EQ(repeated_game->GetType().reward_model, + GameType::RewardModel::kRewards); + SPIEL_CHECK_TRUE(repeated_game->GetType().provides_observation_tensor); + SPIEL_CHECK_TRUE(repeated_game->GetType().provides_information_state_tensor); + SPIEL_CHECK_TRUE(repeated_game->GetType().provides_information_state_string); + + // One-hot encoding of each player's previous action. + SPIEL_CHECK_EQ(repeated_game->ObservationTensorShape()[0], 6); + + // One-hot encoding of each player's previous action times num_repetitions. + SPIEL_CHECK_EQ(repeated_game->InformationStateTensorShape()[0], 18); + + // Check information_state_string + std::unique_ptr state = repeated_game->NewInitialState(); + SPIEL_CHECK_EQ(state->InformationStateString(), ""); + state->ApplyActions({0, 0}); + SPIEL_CHECK_EQ(state->InformationStateString(), "Rock Rock ;"); + state->ApplyActions({1, 2}); + SPIEL_CHECK_EQ(state->InformationStateString(), + "Rock Rock ;Paper Scissors ;"); + + RepeatedRockPaperScissorsTest(repeated_game); +} + + +void RepeatedPrisonersDilemaTest() { + GameParameters params; + params["num_repetitions"] = GameParameter(2); + std::shared_ptr repeated_game = + CreateRepeatedGame("matrix_pd", params); + SPIEL_CHECK_EQ(repeated_game->GetType().max_num_players, 2); + SPIEL_CHECK_EQ(repeated_game->GetType().min_num_players, 2); + SPIEL_CHECK_EQ(repeated_game->GetType().utility, + GameType::Utility::kGeneralSum); + // repeated_game->UtilitySum() should raise an error. This is checked in + // game_transforms_test.py as it's simpler to catch the error from Python. + SPIEL_CHECK_EQ(repeated_game->GetType().reward_model, + GameType::RewardModel::kRewards); + + std::unique_ptr state = repeated_game->NewInitialState(); + SPIEL_CHECK_EQ(state->LegalActions(0), state->LegalActions(1)); + SPIEL_CHECK_EQ(state->ActionToString(0, 0), "Cooperate"); + SPIEL_CHECK_EQ(state->ActionToString(0, 1), "Defect"); + SPIEL_CHECK_EQ(state->ActionToString(1, 0), "Cooperate"); + SPIEL_CHECK_EQ(state->ActionToString(1, 1), "Defect"); + + state->ApplyActions({0, 1}); + SPIEL_CHECK_EQ(state->PlayerReward(0), 0); + SPIEL_CHECK_EQ(state->PlayerReward(1), 10); + SPIEL_CHECK_EQ(state->ObservationString(), "Cooperate Defect "); + SPIEL_CHECK_TRUE( + absl::c_equal(state->ObservationTensor(0), std::vector{1, 0, 0, 1})); + state->ApplyActions({1, 0}); + SPIEL_CHECK_EQ(state->PlayerReward(0), 10); + SPIEL_CHECK_EQ(state->PlayerReward(1), 0); + SPIEL_CHECK_EQ(state->ObservationString(), "Defect Cooperate "); + SPIEL_CHECK_TRUE( + absl::c_equal(state->ObservationTensor(1), std::vector{0, 1, 1, 0})); + SPIEL_CHECK_TRUE(state->IsTerminal()); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::BasicRepeatedGameTest(); + open_spiel::RepeatedRockPaperScissorsDefaultsTest(); + open_spiel::RepeatedRockPaperScissorsRecallTwoTest(); + open_spiel::RepeatedRockPaperScissorsInfoStateEnabledTest(); + open_spiel::RepeatedPrisonersDilemaTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/restricted_nash_response.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/restricted_nash_response.cc new file mode 100644 index 0000000..7b1e801 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/restricted_nash_response.cc @@ -0,0 +1,364 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/restricted_nash_response.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/observer.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { + +namespace { +const GameType kGameType{ + /*short_name=*/"restricted_nash_response", + /*long_name=*/"Restricted Nash Response Modification of a Game", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kSampledStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/100, + /*min_num_players=*/1, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + {{"game", GameParameter(GameParameter::Type::kGame, /*is_mandatory=*/true)}, + {"fixed_player", GameParameter(kDefaultFixedPlayer)}, + {"p", GameParameter(kDefaultP)}}, + /*default_loadable=*/false, + /*provides_factored_observation_string=*/false, + /*is_concrete=*/false}; + +std::shared_ptr Factory(const GameParameters& params) { + return ConvertToRNR( + *LoadGame(params.at("game").game_value()), + ParameterValue(params, "fixed_player", kDefaultFixedPlayer), + ParameterValue(params, "p", kDefaultP), + std::make_shared()); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); +} // namespace + +class RestrictedNashResponseObserver : public Observer { + public: + RestrictedNashResponseObserver(IIGObservationType iig_obs_type) + : Observer(/*has_string=*/true, /*has_tensor=*/true), + iig_obs_type_(iig_obs_type) {} + + // Writes the complete observation in tensor form. + // The supplied allocator is responsible for providing memory to write the + // observation into. + void WriteTensor(const State& observed_state, int player, + Allocator *allocator) const override { + auto& state = open_spiel::down_cast( + observed_state); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, state.NumPlayers()); + + std::shared_ptr original_game = state.GetOriginalGame(); + GameParameters params; + std::shared_ptr observer = + original_game->MakeObserver(iig_obs_type_, params); + // Observing player. + auto out = allocator->Get("initial_and_fixed", {2}); + if (iig_obs_type_.public_info) { + if (state.IsRestrictedNashResponseInitialState()) { + out.at(0) = 1; + } + } + if (iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer) { + if (state.IsPlayerFixed(player)) { + out.at(1) = state.IsStateFixed(); + } else { + out.at(1) = 0; + } + } else if (iig_obs_type_.private_info == PrivateInfoType::kAllPlayers) { + out.at(1) = state.IsStateFixed(); + } + observer->WriteTensor(*state.GetOriginalState(), player, allocator); + } + + // Writes an observation in string form. It would be possible just to + // turn the tensor observation into a string, but we prefer something + // somewhat human-readable. + + std::string StringFrom(const State &observed_state, + int player) const override { + auto& state = open_spiel::down_cast( + observed_state); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, state.NumPlayers()); + std::string result; + + std::shared_ptr original_game = state.GetOriginalGame(); + GameParameters params; + std::shared_ptr observer = + original_game->MakeObserver(iig_obs_type_, params); + if (iig_obs_type_.public_info) { + if (state.IsRestrictedNashResponseInitialState()) { + return "Initial"; + } + } + if (iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer) { + if (state.IsPlayerFixed(player)) { + result += state.IsStateFixed() ? "[Rnr: fixed]" : "[Rnr: free]"; + } + } else if (iig_obs_type_.private_info == PrivateInfoType::kAllPlayers) { + result += state.IsStateFixed() ? "[Rnr: fixed]" : "[Rnr: free]"; + } + + result += observer->StringFrom(*state.GetOriginalState(), player); + return result; + } + + private: + IIGObservationType iig_obs_type_; +}; + +RestrictedNashResponseState::RestrictedNashResponseState( + std::shared_ptr game, std::unique_ptr state, bool fixed, + Player fixed_player, bool initial_state, double p, + std::shared_ptr fixed_policy) + : State(std::move(game)), + state_(std::move(state)), + is_initial_(initial_state), + fixed_(fixed), + p_(p), + fixed_player_(fixed_player), + fixed_policy_(fixed_policy), + use_fixed_policy_(fixed_policy) {} + +Player RestrictedNashResponseState::CurrentPlayer() const { + if (is_initial_) { + return kChancePlayerId; + } else { + if (use_fixed_policy_ && fixed_ && + state_->CurrentPlayer() == fixed_player_) { + return kChancePlayerId; + } else { + return state_->CurrentPlayer(); + } + } +} + +void RestrictedNashResponseState::DoApplyAction(Action action_id) { + if (is_initial_) { + is_initial_ = false; + fixed_ = action_id == kFixedAction; + } else { + state_->ApplyAction(action_id); + } +} + +void RestrictedNashResponseState::DoApplyActions( + const std::vector& actions) { + SPIEL_CHECK_EQ(game_->GetType().dynamics, GameType::Dynamics::kSimultaneous); + SPIEL_CHECK_EQ(is_initial_, false); + state_->ApplyActions(actions); +} + +std::vector> +RestrictedNashResponseState::ChanceOutcomes() const { + if (is_initial_) { + return {{Action(kFixedAction), p_}, {Action(kFreeAction), 1 - p_}}; + } else { + if (state_->IsChanceNode()) { + return state_->ChanceOutcomes(); + } else if (use_fixed_policy_ && fixed_ && + state_->CurrentPlayer() == fixed_player_) { + return fixed_policy_->GetStatePolicy(*state_); + } + } + return {}; +} + +std::vector RestrictedNashResponseState::LegalActions() const { + if (is_initial_) { + return {Action(kFixedAction), Action(kFreeAction)}; + } else { + return state_->LegalActions(); + } +} + +std::vector RestrictedNashResponseState::LegalActions( + Player player) const { + // Initial state only has two actions to fixed or free tree + if (is_initial_) { + if (player == kChancePlayerId) { + return {Action(kFixedAction), Action(kFreeAction)}; + } else { + return {}; + } + } else { + if (use_fixed_policy_ && fixed_ && + state_->CurrentPlayer() == fixed_player_) { + // In other states if we exchanged fixed player nodes for chance node we + // return action for chance player + if (player == kChancePlayerId) { + return state_->LegalActions(fixed_player_); + } else { + return {}; + } + } else { + // Otherwise we just use original legal actions + return state_->LegalActions(player); + } + } +} + +std::string RestrictedNashResponseState::ActionToString( + Player player, Action action_id) const { + if (is_initial_) { + SPIEL_CHECK_EQ(player, kChancePlayerId); + return (action_id == kFixedAction ? "Fixed" : "Free"); + } else { + Player action_player = player; + if (action_player == kChancePlayerId && use_fixed_policy_ && fixed_ && + state_->CurrentPlayer() == fixed_player_) { + // This is a chance node in the RNR game, but a regular player node + // in the underlying game, so we need to use the player's true identity + // at this node. + action_player = state_->CurrentPlayer(); + } + return state_->ActionToString(action_player, action_id); + } +} + +std::string RestrictedNashResponseState::ToString() const { + if (is_initial_) { + return "Initial restricted Nash response state."; + } else { + std::string state_string = "Rnr state string of state in "; + state_string += (fixed_ ? "fixed" : "free"); + state_string += " part with underlying state:\n"; + return state_string + state_->ToString(); + } +} + +bool RestrictedNashResponseState::IsTerminal() const { + if (is_initial_) { + return false; + } else { + return state_->IsTerminal(); + } +} + +std::vector RestrictedNashResponseState::Returns() const { + if (is_initial_) { + return std::vector(num_players_, 0.0); + } + return state_->Returns(); +} + +// old observation API +std::string RestrictedNashResponseState::InformationStateString( + Player player) const { + const auto& game = + open_spiel::down_cast(*game_); + return game.info_state_observer_->StringFrom(*this, player); +} + +void RestrictedNashResponseState::InformationStateTensor( + Player player, absl::Span values) const { + ContiguousAllocator allocator(values); + const auto &game = + open_spiel::down_cast(*game_); + game.info_state_observer_->WriteTensor(*this, player, &allocator); +} + +std::string RestrictedNashResponseState::ObservationString( + Player player) const { + const auto& game = + open_spiel::down_cast(*game_); + return game.default_observer_->StringFrom(*this, player); +} + +void RestrictedNashResponseState::ObservationTensor( + Player player, absl::Span values) const { + ContiguousAllocator allocator(values); + const auto &game = + open_spiel::down_cast(*game_); + game.default_observer_->WriteTensor(*this, player, &allocator); +} + +RestrictedNashResponseState::RestrictedNashResponseState( + const RestrictedNashResponseState &other) + : State(other), + state_(other.state_->Clone()), + is_initial_(other.is_initial_), + fixed_(other.fixed_), + p_(other.p_), + fixed_player_(other.fixed_player_), + fixed_policy_(other.fixed_policy_), + use_fixed_policy_(other.use_fixed_policy_) {} + +std::unique_ptr RestrictedNashResponseState::Clone() const { + return std::unique_ptr(new RestrictedNashResponseState(*this)); +} + +namespace { +GameType ConvertType(GameType type) { + type.short_name = "rnr_" + type.short_name; + type.long_name = "Restricted Nash Response " + type.long_name; + return type; +} +} // namespace + +RestrictedNashResponseGame::RestrictedNashResponseGame( + std::shared_ptr game, Player fixed_player, double p, + std::shared_ptr fixed_policy) + : WrappedGame(game, ConvertType(game->GetType()), game->GetParameters()), + fixed_player_(fixed_player), + p_(p), + fixed_policy_(std::move(fixed_policy)) { + default_observer_ = + std::make_shared(kDefaultObsType); + info_state_observer_ = + std::make_shared(kInfoStateObsType); +} + +std::shared_ptr ConvertToRNR( + const Game& game, Player fixed_player, double p, + std::shared_ptr fixed_policy) { + return std::shared_ptr( + new RestrictedNashResponseGame(game.shared_from_this(), fixed_player, p, + fixed_policy)); +} + +// Observer creation +std::shared_ptr RestrictedNashResponseGame::MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const { + if (params.empty()) { + return std::make_shared( + iig_obs_type.value_or(kDefaultObsType)); + } else { + return MakeRegisteredObserver(iig_obs_type, params); + } +} +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/restricted_nash_response.h b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/restricted_nash_response.h new file mode 100644 index 0000000..77046e9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/restricted_nash_response.h @@ -0,0 +1,196 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAME_TRANSFORMS_RESTRICTED_NASH_RESPONSE_H_ +#define OPEN_SPIEL_GAME_TRANSFORMS_RESTRICTED_NASH_RESPONSE_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/spiel.h" +#include "open_spiel/policy.h" + +// An implementation of Restricted Nash Response by Johanson et al. '08: +// http://www.johanson.ca/publications/poker/2007-nips-rnash/2007-nips-rnash.html + +namespace open_spiel { + +constexpr Player kDefaultFixedPlayer = 0; +constexpr double kDefaultP = 0.5; + +enum { kFixedAction = 0, kFreeAction = 1 }; + +class RestrictedNashResponseObserver; + +class RestrictedNashResponseState : public State { + public: + RestrictedNashResponseState(std::shared_ptr game, + std::unique_ptr state, bool fixed, + Player fixed_player, bool initial_state, double p, + std::shared_ptr fixed_policy); + + RestrictedNashResponseState(const RestrictedNashResponseState &other); + + Player CurrentPlayer() const override; + + std::string ActionToString(Player player, Action action_id) const override; + + std::string ToString() const override; + + bool IsTerminal() const override; + + std::vector Returns() const override; + + std::string InformationStateString(Player player) const override; + + void InformationStateTensor(Player player, + absl::Span values) const override; + + std::string ObservationString(Player player) const override; + + void ObservationTensor(Player player, + absl::Span values) const override; + + std::unique_ptr Clone() const override; + + std::vector> ChanceOutcomes() const override; + + std::vector LegalActions(Player player) const override; + + std::vector LegalActions() const override; + + std::shared_ptr GetOriginalGame() const { + return state_->GetGame(); + } + + bool IsPlayerFixed(Player player) const { return player == fixed_player_; } + + bool IsStateFixed() const { return fixed_; } + + std::shared_ptr GetOriginalState() const { return state_; } + + bool IsRestrictedNashResponseInitialState() const { return is_initial_; } + + protected: + void DoApplyAction(Action action_id) override; + void DoApplyActions(const std::vector &actions) override; + + private: + // underlying state + std::shared_ptr state_; + + // Variables showing if we are in the initial state and if not whether this + // part is fixed or not. + bool is_initial_; + bool fixed_; + // Constants representing p value and the player who is fixed. + const double p_; + const Player fixed_player_; + // Constants for the fixed strategy and if we use explicit fixed strategy + std::shared_ptr fixed_policy_; + const bool use_fixed_policy_; +}; + +class RestrictedNashResponseGame : public WrappedGame { + public: + explicit RestrictedNashResponseGame( + std::shared_ptr game, Player fixed_player, double p, + std::shared_ptr fixed_policy = nullptr); + std::shared_ptr MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const; + + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new RestrictedNashResponseState( + shared_from_this(), game_->NewInitialState(), false, fixed_player_, + true, p_, fixed_policy_)); + } + + int NumDistinctActions() const override { + return game_->NumDistinctActions(); + } + + int MaxChanceOutcomes() const override { + if (fixed_policy_) { + // If a fixed policy is set, then we have a real RNR game, which means + // there is at least one chance node with 2 outcomes. But also, the + // fixed player actions are also treated as chance nodes, so the number + // of distinct actions can also determine the maximum number of chance + // outcomes. + std::vector candidates = { + game_->MaxChanceOutcomes(), 2, game_->NumDistinctActions() + }; + return *std::max_element(candidates.begin(), candidates.end()); + } else { + // Otherwise, it's the normal game. + return game_->MaxChanceOutcomes(); + } + } + + int NumPlayers() const override { return game_->NumPlayers(); } + + double MinUtility() const override { return game_->MinUtility(); } + + double MaxUtility() const override { return game_->MaxUtility(); } + + absl::optional UtilitySum() const override { + return game_->UtilitySum(); + } + + std::vector InformationStateTensorShape() const override { + // Underlying game plus + return {2 + game_->InformationStateTensorSize()}; + } + + std::vector ObservationTensorShape() const override { + // We flatten the representation of the underlying game and add one-hot + // indications of the to-play player and the observing player. + return {2 + game_->ObservationTensorSize()}; + } + + int MaxGameLength() const override { return game_->MaxGameLength() + 1; } + + int MaxChanceNodesInHistory() const override { + if (fixed_policy_) { + // If a fixed policy is set, then we have a real RNR game, which has an + // extra chance node. + return game_->MaxChanceNodesInHistory() + 1; + } else { + // Otherwise, it's just the normal game. + return game_->MaxChanceNodesInHistory(); + } + } + // old observation API + std::shared_ptr default_observer_; + std::shared_ptr info_state_observer_; + + private: + // Fixed player and p constants to be passed to the initial state + const Player fixed_player_; + const double p_; + // Constants for the fixed strategy and if we use explicit fixed strategy + std::shared_ptr fixed_policy_; +}; + +// Return back a transformed clone of the game. +std::shared_ptr ConvertToRNR( + const Game& game, Player fixed_player, double p, + std::shared_ptr fixed_policy = nullptr); +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAME_TRANSFORMS_RESTRICTED_NASH_RESPONSE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/restricted_nash_response_test.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/restricted_nash_response_test.cc new file mode 100644 index 0000000..260da53 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/restricted_nash_response_test.cc @@ -0,0 +1,249 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/restricted_nash_response.h" + +#include + +#include "open_spiel/abseil-cpp/absl/random/uniform_int_distribution.h" +#include "open_spiel/algorithms/cfr.h" +#include "open_spiel/game_transforms/turn_based_simultaneous_game.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" + +namespace open_spiel { +namespace { + +void SimulateGame(std::mt19937 *rng, const Game &game, + std::unique_ptr normal_state, + std::unique_ptr rnr_state, bool fixed, + Player fixed_player) { + // Now check that the states are identical via the ToString(). + std::string state_prefix = + fixed ? "Rnr state string of state in fixed part with underlying state:\n" + : "Rnr state string of state in free part with underlying state:\n"; + std::string infostate_prefix = fixed ? "[Rnr: fixed]" : "[Rnr: free]"; + while (!normal_state->IsTerminal()) { + SPIEL_CHECK_EQ(state_prefix + normal_state->ToString(), + rnr_state->ToString()); + if (game.GetType().provides_information_state_string) { + // Check the information states to each player are consistent. + for (auto p = Player{0}; p < game.NumPlayers(); p++) { + SPIEL_CHECK_EQ((p == fixed_player ? infostate_prefix : "") + + normal_state->InformationStateString(p), + rnr_state->InformationStateString(p)); + } + } + + if (normal_state->IsChanceNode()) { + SPIEL_CHECK_TRUE(rnr_state->IsChanceNode()); + + // Chance node; sample one according to underlying distribution + std::vector> outcomes = + normal_state->ChanceOutcomes(); + Action action = + open_spiel::SampleAction( + outcomes, std::uniform_real_distribution(0.0, 1.0)(*rng)) + .first; + + normal_state->ApplyAction(action); + rnr_state->ApplyAction(action); + } else if (normal_state->CurrentPlayer() == kSimultaneousPlayerId) { + SPIEL_CHECK_EQ(rnr_state->CurrentPlayer(), kSimultaneousPlayerId); + + // Players choose simultaneously. + std::vector joint_action; + + // Sample an action for each player + for (auto p = Player{0}; p < game.NumPlayers(); p++) { + std::vector actions; + actions = normal_state->LegalActions(p); + absl::uniform_int_distribution<> dis(0, actions.size() - 1); + Action action = actions[dis(*rng)]; + joint_action.push_back(action); + } + + normal_state->ApplyActions(joint_action); + rnr_state->ApplyActions(joint_action); + } else { + // Chance or player node + SPIEL_CHECK_EQ(normal_state->CurrentPlayer(), rnr_state->CurrentPlayer()); + + Player p = normal_state->CurrentPlayer(); + + std::vector actions; + actions = normal_state->LegalActions(p); + absl::uniform_int_distribution<> dis(0, actions.size() - 1); + Action action = actions[dis(*rng)]; + + normal_state->ApplyAction(action); + rnr_state->ApplyAction(action); + } + } + + SPIEL_CHECK_TRUE(rnr_state->IsTerminal()); + + auto sim_returns = normal_state->Returns(); + auto turn_returns = rnr_state->Returns(); + + for (auto player = Player{0}; player < sim_returns.size(); player++) { + double utility = sim_returns[player]; + SPIEL_CHECK_GE(utility, game.MinUtility()); + SPIEL_CHECK_LE(utility, game.MaxUtility()); + + double other_utility = turn_returns[player]; + SPIEL_CHECK_EQ(utility, other_utility); + } +} + +void BasicRNRTests() { + for (const std::string& name : + {"blotto", "goofspiel", "kuhn_poker", "tiny_hanabi", "phantom_ttt", + "matrix_rps", "leduc_poker"}) { + std::cout << "Basic RNR Test for " << name << std::endl; + std::string full_game_str = + absl::StrCat("restricted_nash_response(game=", + name, "())"); + testing::RandomSimTest(*LoadGame(full_game_str), 10, /*serialize*/false, + /*verbose*/false, /*mask_test*/true); + } +} + +void TestBasicCreation() { + std::mt19937 rng; + + // Create different games for RNR and check the simulation + for (const std::string& name : + {"blotto", "goofspiel", "kuhn_poker", "tiny_hanabi", "phantom_ttt", + "matrix_rps", "leduc_poker"}) { + std::cout << "RestrictedNashResponse: Testing " << name << std::endl; + for (Player fixed_player = 0; fixed_player < 2; fixed_player++) { + for (int i = 0; i < 100; ++i) { + std::shared_ptr normal_game_game = LoadGame(name); + std::shared_ptr rnr_game = + ConvertToRNR(*LoadGame(name), fixed_player, 0.5); + auto normal_init_fixed = normal_game_game->NewInitialState(); + auto rnr_init_fixed = rnr_game->NewInitialState(); + rnr_init_fixed->ApplyAction(Action(kFixedAction)); + SimulateGame(&rng, *normal_game_game, std::move(normal_init_fixed), + std::move(rnr_init_fixed), true, fixed_player); + + auto rnr_init_free = rnr_game->NewInitialState(); + auto normal_init_free = normal_game_game->NewInitialState(); + rnr_init_free->ApplyAction(Action(kFreeAction)); + SimulateGame(&rng, *normal_game_game, std::move(normal_init_free), + std::move(rnr_init_free), false, fixed_player); + } + } + } +} + +void TestMatchingPenniesCreation() { + // Check the creation of matching pennies game + Player fixed_player = 1; + std::shared_ptr game = LoadGame("matrix_mp"); + std::shared_ptr rnr_game = ConvertToRNR(*game, fixed_player, 0.4); + SPIEL_CHECK_EQ(game->MaxGameLength() + 1, rnr_game->MaxGameLength()); + SPIEL_CHECK_EQ(rnr_game->NumPlayers(), game->NumPlayers()); + SPIEL_CHECK_EQ(rnr_game->MaxUtility(), game->MaxUtility()); + SPIEL_CHECK_EQ(rnr_game->MinUtility(), game->MinUtility()); + auto state = rnr_game->NewInitialState(); + SPIEL_CHECK_EQ("Initial restricted Nash response state.", state->ToString()); + SPIEL_CHECK_EQ(state->LegalActions().size(), 2); + + auto chance_outcomes = state->ChanceOutcomes(); + SPIEL_CHECK_EQ(chance_outcomes[0].second, 0.4); + SPIEL_CHECK_EQ(chance_outcomes[1].second, 0.6); + + // Fixed part + auto fixed_child = state->Child(kFixedAction); + SPIEL_CHECK_EQ(fixed_child->CurrentPlayer(), kSimultaneousPlayerId); + + // Free part + auto free_child = state->Child(kFreeAction); + SPIEL_CHECK_EQ(free_child->CurrentPlayer(), kSimultaneousPlayerId); + + for (Action joint_action : free_child->LegalActions()) { + auto new_fixed_child = fixed_child->Child(joint_action); + auto new_free_child = free_child->Child(joint_action); + SPIEL_CHECK_EQ(new_fixed_child->Rewards(), new_free_child->Rewards()); + SPIEL_CHECK_EQ(new_fixed_child->InformationStateString(1 - fixed_player), + new_free_child->InformationStateString(1 - fixed_player)); + SPIEL_CHECK_NE(new_fixed_child->InformationStateString(fixed_player), + new_free_child->InformationStateString(fixed_player)); + } +} + +void TestFixedPolicyGame() { + // Check the RNR which automatically puts the strategy in the game as chance + // nodes Setup + Player fixed_player = 1; + std::shared_ptr game = LoadGameAsTurnBased("matrix_mp"); + std::shared_ptr fixed_policy = + std::make_shared(*game); + auto initial_state = game->NewInitialState(); + initial_state->ApplyAction(0); + fixed_policy->SetStatePolicy(initial_state->InformationStateString(), + {{0, 1}, {1, 0}}); + // P 0.6 case when the resulting strategy is pure + std::shared_ptr rnr_game = + ConvertToRNR(*game, fixed_player, 0.6, fixed_policy); + algorithms::CFRPlusSolver solver(*rnr_game); + for (int i = 0; i < 1000; i++) { + solver.EvaluateAndUpdatePolicy(); + } + const std::shared_ptr average_policy = solver.AveragePolicy(); + auto player_two_policy = average_policy->GetStatePolicy( + "[Rnr: free]Current player: 1\nObserving player: 1. Non-terminal"); + for (int i = 0; i < player_two_policy.size(); i++) { + SPIEL_CHECK_FLOAT_NEAR(player_two_policy[i].second, i, 0.001); + } + auto player_one_policy = average_policy->GetStatePolicy( + "Current player: 0\nObserving player: 0. Non-terminal"); + for (int i = 0; i < player_one_policy.size(); i++) { + SPIEL_CHECK_FLOAT_NEAR(player_one_policy[i].second, 1 - i, 0.001); + } + // P 0.6 case when the resulting strategy is pure + rnr_game = ConvertToRNR(*game, fixed_player, 0.4, fixed_policy); + algorithms::CFRPlusSolver solver_two(*rnr_game); + for (int i = 0; i < 1000; i++) { + solver_two.EvaluateAndUpdatePolicy(); + } + const std::shared_ptr average_policy_two = solver_two.AveragePolicy(); + auto player_two_policy_two = average_policy_two->GetStatePolicy( + "[Rnr: free]Current player: 1\nObserving player: 1. Non-terminal"); + double check_policy[] = {1. / 6, 5. / 6}; + for (int i = 0; i < player_two_policy_two.size(); i++) { + SPIEL_CHECK_FLOAT_NEAR(player_two_policy_two[i].second, check_policy[i], + 0.001); + } + auto player_one_policy_two = average_policy_two->GetStatePolicy( + "Current player: 0\nObserving player: 0. Non-terminal"); + check_policy[0] = check_policy[1] = 0.5; + for (int i = 0; i < player_one_policy_two.size(); i++) { + SPIEL_CHECK_FLOAT_NEAR(player_one_policy_two[i].second, check_policy[i], + 0.001); + } +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::BasicRNRTests(); + open_spiel::TestBasicCreation(); + open_spiel::TestMatchingPenniesCreation(); + open_spiel::TestFixedPolicyGame(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/start_at.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/start_at.cc new file mode 100644 index 0000000..8eb010b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/start_at.cc @@ -0,0 +1,123 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/start_at.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { + +constexpr char kActionSeparator = ';'; + +// These parameters are the most-general case. The actual game may be simpler. +const GameType kGameType{ + /*short_name=*/"start_at", + /*long_name=*/"Start at specified subgame of a regular game.", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kSampledStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/100, + /*min_num_players=*/1, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + {{"game", GameParameter(GameParameter::Type::kGame, /*is_mandatory=*/true)}, + {"history", + GameParameter(GameParameter::Type::kString, /*is_mandatory=*/true)}}, + /*default_loadable=*/false, + /*provides_factored_observation_string=*/true, + /*is_concrete=*/false}; + +std::shared_ptr Factory(const GameParameters& params) { + auto game = LoadGame(params.at("game").game_value()); + GameType game_type = game->GetType(); + game_type.short_name = kGameType.short_name; + game_type.long_name = + absl::StrCat("StartAt history=", params.at("history").string_value(), + " game=", game_type.long_name); + return std::make_shared(game, game_type, params); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +} // namespace + +// History is represented by a string of numbers separated by kActionSeparator. +std::vector HistoryFromString(const std::string& str) { + std::vector history; + if (str.empty()) return history; // Identity transformation. + + std::vector str_actions = + absl::StrSplit(str, kActionSeparator); + for (const auto& str_action : str_actions) { + Action a; + if (!absl::SimpleAtoi(str_action, &a)) { + SpielFatalError( + absl::StrCat("Error when parsing the action: ", str_action)); + } + history.push_back(a); + } + return history; +} + +std::unique_ptr StateFromHistory(std::shared_ptr game, + const std::vector& history) { + std::unique_ptr state = game->NewInitialState(); + for (const Action& a : history) state->ApplyAction(a); + return state; +} + +StartAtTransformationGame::StartAtTransformationGame( + std::shared_ptr game, GameType game_type, + GameParameters game_parameters) + : WrappedGame(game, game_type, game_parameters), + start_state_(StateFromHistory( + game, + HistoryFromString(game_parameters.at("history").string_value()))) {} + +std::unique_ptr StartAtTransformationGame::NewInitialState() const { + return std::make_unique(shared_from_this(), + start_state_->Clone()); +} + +StartAtTransformationState::StartAtTransformationState( + std::shared_ptr transformed_game, std::unique_ptr state) + : WrappedState(transformed_game, std::move(state)) { + const auto* start_at_game = open_spiel::down_cast< + const StartAtTransformationGame*>(game_.get()); + const std::vector start_history = + start_at_game->StartAtHistory(); + const std::vector wrap_history = state_->FullHistory(); + SPIEL_DCHECK_TRUE(std::equal(start_history.begin(), start_history.end(), + wrap_history.begin())); +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/start_at.h b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/start_at.h new file mode 100644 index 0000000..cddc517 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/start_at.h @@ -0,0 +1,53 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAME_TRANSFORMS_START_AT_H_ +#define OPEN_SPIEL_GAME_TRANSFORMS_START_AT_H_ + +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// Transforms a game such that `transformed_game.new_initial_state()` already +// starts at the specified history. This is useful if you want to restrict +// algorithms only to some specific subgames. + +namespace open_spiel { + +class StartAtTransformationState : public WrappedState { + public: + StartAtTransformationState(std::shared_ptr game, + std::unique_ptr state); + StartAtTransformationState(const StartAtTransformationState& other) = default; + std::unique_ptr Clone() const override { + return std::make_unique(*this); + } +}; + +class StartAtTransformationGame : public WrappedGame { + public: + StartAtTransformationGame(std::shared_ptr game, + GameType game_type, GameParameters game_parameters); + std::unique_ptr NewInitialState() const override; + std::vector StartAtHistory() const { + return start_state_->FullHistory(); + } + + private: + const std::unique_ptr start_state_; +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAME_TRANSFORMS_START_AT_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/start_at_test.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/start_at_test.cc new file mode 100644 index 0000000..631cfdc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/start_at_test.cc @@ -0,0 +1,64 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/start_at.h" +#include + +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace start_at { +namespace { + +namespace testing = open_spiel::testing; + +void BasicStartAtTests() { + testing::LoadGameTest("start_at(history=0;1;0,game=kuhn_poker())"); + testing::LoadGameTest( + "start_at(history=4;3;3;2;0;4;4;4;4;0,game=connect_four())"); + testing::RandomSimTest( + *LoadGame("start_at(history=0;1,game=kuhn_poker())"), 100); +} + +void StartsAtCorrectHistoryTest() { + std::shared_ptr game = LoadGame( + "start_at(history=0;1;0;0,game=kuhn_poker())"); + std::unique_ptr initial_state = game->NewInitialState(); + const StartAtTransformationState& state = open_spiel::down_cast< + const StartAtTransformationState&>(*initial_state); + + const std::string expected_observation_string = "011"; + { + const std::vector expected_history = {}; + SPIEL_CHECK_EQ(state.History(), expected_history); + SPIEL_CHECK_EQ(state.ObservationString(0), expected_observation_string); + } + { + const std::vector expected_history = {0, 1, 0, 0}; + SPIEL_CHECK_EQ(state.GetWrappedState().History(), expected_history); + SPIEL_CHECK_EQ(state.GetWrappedState().ObservationString(0), + expected_observation_string); + } +} + + +} // namespace +} // namespace start_at +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::start_at::BasicStartAtTests(); + open_spiel::start_at::StartsAtCorrectHistoryTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/turn_based_simultaneous_game.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/turn_based_simultaneous_game.cc new file mode 100644 index 0000000..01014e9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/turn_based_simultaneous_game.cc @@ -0,0 +1,314 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/turn_based_simultaneous_game.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { + +namespace { +// These parameters reflect the most-general game, with the maximum +// API coverage. The actual game may be simpler and might not provide +// all the interfaces. +// This is used as a placeholder for game registration. The actual instantiated +// game will have more accurate information. +const GameType kGameType{ + /*short_name=*/"turn_based_simultaneous_game", + /*long_name=*/"Turn-Based Version of a Simultaneous-Move Game", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kSampledStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/100, + /*min_num_players=*/1, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + {{"game", + GameParameter(GameParameter::Type::kGame, /*is_mandatory=*/true)}}, + /*default_loadable=*/false, + /*provides_factored_observation_string=*/false, + /*is_concrete=*/false}; + +std::shared_ptr Factory(const GameParameters& params) { + return ConvertToTurnBased(*LoadGame(params.at("game").game_value())); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); +} // namespace + +TurnBasedSimultaneousState::TurnBasedSimultaneousState( + std::shared_ptr game, std::unique_ptr state) + : State(game), + state_(std::move(state)), + action_vector_(game->NumPlayers()), + rollout_mode_(kNoRollout) { + DetermineWhoseTurn(); +} + +Player TurnBasedSimultaneousState::CurrentPlayer() const { + return current_player_; +} + +void TurnBasedSimultaneousState::DetermineWhoseTurn() { + if (state_->CurrentPlayer() == kSimultaneousPlayerId) { + // When the underlying game's node is at a simultaneous move node, they get + // rolled out as turn-based, starting with player 0. + current_player_ = -1; + rollout_mode_ = kStartRollout; + RolloutModeIncrementCurrentPlayer(); + // If the rollout mode is used, then at least one player should have a valid + // action. + SPIEL_CHECK_LT(current_player_, num_players_); + } else { + // Otherwise, just execute it normally. + current_player_ = state_->CurrentPlayer(); + rollout_mode_ = kNoRollout; + } +} + +void TurnBasedSimultaneousState::RolloutModeIncrementCurrentPlayer() { + current_player_++; + + // Make sure to skip over the players that do not have legal actions. + while (current_player_ < num_players_ && + state_->LegalActions(current_player_).empty()) { + // Unnecessary to set an action here, but leads to a nicer ToString. + action_vector_[current_player_] = 0; + current_player_++; + } +} + +void TurnBasedSimultaneousState::DoApplyAction(Action action_id) { + if (state_->IsChanceNode()) { + SPIEL_CHECK_FALSE(rollout_mode_); + state_->ApplyAction(action_id); + DetermineWhoseTurn(); + } else { + if (rollout_mode_) { + // If we are currently rolling out a simultaneous move node, then simply + // buffer the action in the action vector. + rollout_mode_ = kMidRollout; + action_vector_[current_player_] = action_id; + RolloutModeIncrementCurrentPlayer(); + // Check if we then need to apply it. + if (current_player_ == num_players_) { + state_->ApplyActions(action_vector_); + DetermineWhoseTurn(); + } + } else { + SPIEL_CHECK_NE(state_->CurrentPlayer(), kSimultaneousPlayerId); + state_->ApplyAction(action_id); + DetermineWhoseTurn(); + } + } +} + +std::vector> +TurnBasedSimultaneousState::ChanceOutcomes() const { + return state_->ChanceOutcomes(); +} + +std::vector TurnBasedSimultaneousState::LegalActions() const { + return state_->LegalActions(CurrentPlayer()); +} + +std::string TurnBasedSimultaneousState::ActionToString(Player player, + Action action_id) const { + return state_->ActionToString(player, action_id); +} + +std::string TurnBasedSimultaneousState::ToString() const { + std::string partial_action = ""; + if (rollout_mode_) { + partial_action = "Partial joint action: "; + for (auto p = Player{0}; p < current_player_; ++p) { + absl::StrAppend(&partial_action, action_vector_[p]); + partial_action.push_back(' '); + } + partial_action.push_back('\n'); + } + return partial_action + state_->ToString(); +} + +bool TurnBasedSimultaneousState::IsTerminal() const { + return state_->IsTerminal(); +} + +std::vector TurnBasedSimultaneousState::Returns() const { + return state_->Returns(); +} + +std::vector TurnBasedSimultaneousState::Rewards() const { + return rollout_mode_ == kMidRollout ? std::vector(num_players_, 0) + : state_->Rewards(); +} + +std::string TurnBasedSimultaneousState::InformationStateString( + Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::string extra_info = ""; + extra_info = "Current player: "; + absl::StrAppend(&extra_info, current_player_); + extra_info.push_back('\n'); + if (rollout_mode_) { + // Include the player's action if they have take one already. + if (player < current_player_) { + absl::StrAppend(&extra_info, "Observer's action this turn: "); + absl::StrAppend(&extra_info, action_vector_[player]); + extra_info.push_back('\n'); + } + } + return extra_info + state_->InformationStateString(player); +} + +void TurnBasedSimultaneousState::InformationStateTensor( + Player player, absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + SPIEL_CHECK_EQ(values.size(), game_->InformationStateTensorSize()); + auto value_it = values.begin(); + + // First, get the 2 * num_players bits to encode whose turn it is and who + // the observer is. + for (auto p = Player{0}; p < num_players_; ++p) { + *value_it++ = (p == current_player_ ? 1 : 0); + } + for (auto p = Player{0}; p < num_players_; ++p) { + *value_it++ = (p == player ? 1 : 0); + } + + // Then get the underlying observation + state_->InformationStateTensor(player, + absl::MakeSpan(value_it, values.end())); +} + +std::string TurnBasedSimultaneousState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::string extra_info = ""; + extra_info = "Current player: "; + absl::StrAppend(&extra_info, current_player_); + extra_info.push_back('\n'); + if (rollout_mode_) { + // Include the player's action if they have take one already. + if (player < current_player_) { + absl::StrAppend(&extra_info, "Observer's action this turn: "); + absl::StrAppend(&extra_info, action_vector_[player]); + extra_info.push_back('\n'); + } + } + return extra_info + state_->ObservationString(player); +} + +void TurnBasedSimultaneousState::ObservationTensor( + Player player, absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + SPIEL_CHECK_EQ(values.size(), game_->ObservationTensorSize()); + auto value_it = values.begin(); + + // First, get the 2 * num_players bits to encode whose turn it is and who + // the observer is. + for (auto p = Player{0}; p < num_players_; ++p) { + *value_it++ = (p == current_player_ ? 1 : 0); + } + for (auto p = Player{0}; p < num_players_; ++p) { + *value_it++ = (p == player ? 1 : 0); + } + + // Then get the underlying observation + state_->ObservationTensor(player, absl::MakeSpan(value_it, values.end())); +} + +TurnBasedSimultaneousState::TurnBasedSimultaneousState( + const TurnBasedSimultaneousState& other) + : State(other), + state_(other.state_->Clone()), + action_vector_(other.action_vector_), + current_player_(other.current_player_), + rollout_mode_(other.rollout_mode_) {} + +std::unique_ptr TurnBasedSimultaneousState::Clone() const { + return std::unique_ptr(new TurnBasedSimultaneousState(*this)); +} + +namespace { +GameType ConvertType(GameType type) { + type.dynamics = GameType::Dynamics::kSequential; + type.information = GameType::Information::kImperfectInformation; + type.short_name = kGameType.short_name; + type.long_name = "Turn-based " + type.long_name; + type.parameter_specification = kGameType.parameter_specification; + return type; +} + +GameParameters ConvertParams(const GameType& type, GameParameters params) { + params["name"] = GameParameter(type.short_name); + GameParameters new_params{{"game", GameParameter{params}}}; + return new_params; +} +} // namespace + +TurnBasedSimultaneousGame::TurnBasedSimultaneousGame( + std::shared_ptr game) + : Game(ConvertType(game->GetType()), + ConvertParams(game->GetType(), game->GetParameters())), + game_(game) {} + +std::shared_ptr ConvertToTurnBased(const Game& game) { + SPIEL_CHECK_EQ(game.GetType().dynamics, GameType::Dynamics::kSimultaneous); + return std::shared_ptr( + new TurnBasedSimultaneousGame(game.shared_from_this())); +} + +std::shared_ptr LoadGameAsTurnBased(const std::string& name) { + auto game = LoadGame(name); + if (game->GetType().dynamics == GameType::Dynamics::kSimultaneous) { + return ConvertToTurnBased(*game); + } else { + return game; + } +} + +std::shared_ptr LoadGameAsTurnBased(const std::string& name, + const GameParameters& params) { + auto game = LoadGame(name, params); + if (game->GetType().dynamics == GameType::Dynamics::kSimultaneous) { + return ConvertToTurnBased(*game); + } else { + return game; + } +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/turn_based_simultaneous_game.h b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/turn_based_simultaneous_game.h new file mode 100644 index 0000000..9262ab3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/turn_based_simultaneous_game.h @@ -0,0 +1,134 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAME_TRANSFORMS_TURN_BASED_SIMULTANEOUS_GAME_H_ +#define OPEN_SPIEL_GAME_TRANSFORMS_TURN_BASED_SIMULTANEOUS_GAME_H_ + +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// This wrapper turns any n-player simultaneous move game into an equivalent +// turn-based game where simultaneous move nodes are encoded as n turns. +// +// The underlying game must provide InformationStateString and +// InformationStateTensor for the wrapped functions to work. +// +// TODO: +// - implement UndoAction for these games. +// - generalize to use Observation as well as Information state + +namespace open_spiel { + +class TurnBasedSimultaneousState : public State { + public: + TurnBasedSimultaneousState(std::shared_ptr game, + std::unique_ptr state); + TurnBasedSimultaneousState(const TurnBasedSimultaneousState& other); + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::vector Rewards() const override; + std::string InformationStateString(Player player) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector> ChanceOutcomes() const override; + + // Access to the wrapped state, used for debugging and in the tests. + const State* SimultaneousGameState() const { return state_.get(); } + std::vector LegalActions() const override; + + protected: + void DoApplyAction(Action action_id) override; + + private: + void DetermineWhoseTurn(); + void RolloutModeIncrementCurrentPlayer(); + + std::unique_ptr state_; + + // A vector of actions that is used primarily to store the intermediate + // actions taken by the players when extending the simultaneous move nodes + // to be turn-based. + std::vector action_vector_; + + // The current player (which will never be kSimultaneousPlayerId). + Player current_player_; + + // Are we currently rolling out a simultaneous move node? + enum { kNoRollout = 0, kStartRollout, kMidRollout } rollout_mode_; +}; + +class TurnBasedSimultaneousGame : public Game { + public: + explicit TurnBasedSimultaneousGame(std::shared_ptr game); + + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new TurnBasedSimultaneousState( + shared_from_this(), game_->NewInitialState())); + } + + int NumDistinctActions() const override { + return game_->NumDistinctActions(); + } + int MaxChanceOutcomes() const override { return game_->MaxChanceOutcomes(); } + int NumPlayers() const override { return game_->NumPlayers(); } + double MinUtility() const override { return game_->MinUtility(); } + double MaxUtility() const override { return game_->MaxUtility(); } + absl::optional UtilitySum() const override { + return game_->UtilitySum(); + } + std::vector InformationStateTensorShape() const override { + // We flatten the representation of the underlying game and add one-hot + // indications of the to-play player and the observing player. + return {2 * NumPlayers() + game_->InformationStateTensorSize()}; + } + std::vector ObservationTensorShape() const override { + // We flatten the representation of the underlying game and add one-hot + // indications of the to-play player and the observing player. + return {2 * NumPlayers() + game_->ObservationTensorSize()}; + } + int MaxGameLength() const override { + return game_->MaxGameLength() * NumPlayers(); + } + int MaxChanceNodesInHistory() const override { + return game_->MaxChanceNodesInHistory(); + } + + private: + std::shared_ptr game_; +}; + +// Return back a transformed clone of the game. +std::shared_ptr ConvertToTurnBased(const Game& game); + +// These are equivalent to LoadGame but converts the game to turn-based if it is +// not already one. They are simple wrappers provided for the Python API. +std::shared_ptr LoadGameAsTurnBased(const std::string& name); +std::shared_ptr LoadGameAsTurnBased(const std::string& name, + const GameParameters& params); + +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAME_TRANSFORMS_TURN_BASED_SIMULTANEOUS_GAME_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/turn_based_simultaneous_game_test.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/turn_based_simultaneous_game_test.cc new file mode 100644 index 0000000..fdae12d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/turn_based_simultaneous_game_test.cc @@ -0,0 +1,305 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/turn_based_simultaneous_game.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/uniform_int_distribution.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/algorithms/cfr.h" +#include "open_spiel/algorithms/expected_returns.h" +#include "open_spiel/observer.h" +#include "open_spiel/simultaneous_move_game.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/tests/basic_tests.h" +#include "open_spiel/utils/init.h" + +namespace open_spiel { +namespace { + +namespace testing = open_spiel::testing; + +// An n-player version of (repeated) matching pennies with n rounds. On round +// j, the j^th player has no legal moves, and every other player can play heads +// or tails. On the j^th round: a matching pennies games is being played between +// players j+1 (mod n) and j+2 (mod n), i.e. cumulative utilities are adjusted +// based on the matching pennies matrix betwen those players, and the other +// players' actions have no effect on the return. +// +// This game was specifically designed to test the case of some players not +// having any legal actions at simultaneous nodes, which are then skipped over +// when converted to turn-based games. +const GameType kGameType{/*short_name=*/"mprmp", + /*long_name=*/"Missing Player Repeated MP", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/false, + /*provides_observation_tensor=*/false, + /*parameter_specification=*/ + {{"num_players", GameParameter(4)}}}; + +class MissingPlayerRepeatedMatchingPenniesState : public SimMoveState { + public: + MissingPlayerRepeatedMatchingPenniesState(std::shared_ptr game) + : SimMoveState(game), turns_(0), returns_(game->NumPlayers(), 0.0) {} + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : kSimultaneousPlayerId; + } + + std::string ActionToString(Player player, Action action_id) const override { + return absl::StrCat("Player ", player, " action: ", action_id); + } + + std::string ToString() const override { return HistoryString(); } + bool IsTerminal() const override { return turns_ == num_players_; }; + std::vector Returns() const override { + return IsTerminal() ? returns_ : std::vector(num_players_, 0.); + } + std::string InformationStateString(Player player) const override { + return absl::StrCat(HistoryString(), " P:", player); + } + std::unique_ptr Clone() const override { + return std::make_unique(*this); + } + std::vector LegalActions(Player player) const override { + if (player == turns_) { + return {}; + } else { + return {0, 1}; + } + } + + protected: + void DoApplyActions(const std::vector& actions) override { + int missing_pid = turns_; + int row_pid = (missing_pid + 1) % num_players_; + int col_pid = (row_pid + 1) % num_players_; + SPIEL_CHECK_NE(actions[row_pid], kInvalidAction); + SPIEL_CHECK_NE(actions[col_pid], kInvalidAction); + if (actions[row_pid] == actions[col_pid]) { + // Match. Win for row player. + returns_[row_pid] += 1.0; + returns_[col_pid] -= 1.0; + } else { + // No match. Win for col player. + returns_[row_pid] -= 1.0; + returns_[col_pid] += 1.0; + } + turns_++; + } + + private: + int turns_; + std::vector returns_; +}; + +class MissingPlayerRepeatedMatchingPenniesGame : public SimMoveGame { + public: + explicit MissingPlayerRepeatedMatchingPenniesGame( + const GameParameters& params) + : SimMoveGame(kGameType, params), + num_players_(ParameterValue("num_players", 4)) {} + + int NumDistinctActions() const override { return 2; } + std::unique_ptr NewInitialState() const override { + return std::make_unique( + shared_from_this()); + } + int MaxChanceOutcomes() const override { return 0; } + int NumPlayers() const override { return num_players_; } + double MinUtility() const override { return -num_players_; } + double MaxUtility() const override { return num_players_; } + absl::optional UtilitySum() const override { return 0; } + int MaxGameLength() const override { return num_players_; } + + private: + const int num_players_; +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr( + new MissingPlayerRepeatedMatchingPenniesGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +void SimulateGames(std::mt19937* rng, const Game& game, State* sim_state, + State* turn_based_state) { + while (!sim_state->IsTerminal()) { + const State* wrapped_sim_state = + dynamic_cast(turn_based_state) + ->SimultaneousGameState(); + + // Now check that the states are identical via the ToString(). + SPIEL_CHECK_EQ(sim_state->ToString(), wrapped_sim_state->ToString()); + + if (sim_state->IsChanceNode()) { + SPIEL_CHECK_TRUE(turn_based_state->IsChanceNode()); + + // Chance node; sample one according to underlying distribution + std::vector> outcomes = + sim_state->ChanceOutcomes(); + Action action = + open_spiel::SampleAction( + outcomes, std::uniform_real_distribution(0.0, 1.0)(*rng)) + .first; + + std::cout << "sampled outcome: %s\n" + << sim_state->ActionToString(kChancePlayerId, action) + << std::endl; + + sim_state->ApplyAction(action); + turn_based_state->ApplyAction(action); + } else if (sim_state->CurrentPlayer() == kSimultaneousPlayerId) { + SPIEL_CHECK_EQ(wrapped_sim_state->CurrentPlayer(), kSimultaneousPlayerId); + + // Players choose simultaneously. + std::vector joint_action; + + // Sample an action for each player + for (auto p = Player{0}; p < game.NumPlayers(); p++) { + if (game.GetType().provides_information_state_string) { + // Check the information states to each player are consistent. + SPIEL_CHECK_EQ(sim_state->InformationStateString(p), + wrapped_sim_state->InformationStateString(p)); + } + + std::vector actions; + actions = sim_state->LegalActions(p); + absl::uniform_int_distribution<> dis(0, actions.size() - 1); + Action action = actions[dis(*rng)]; + joint_action.push_back(action); + std::cout << "player " << p << " chose " + << sim_state->ActionToString(p, action) << std::endl; + SPIEL_CHECK_EQ(p, turn_based_state->CurrentPlayer()); + turn_based_state->ApplyAction(action); + } + + sim_state->ApplyActions(joint_action); + } else { + SPIEL_CHECK_EQ(sim_state->CurrentPlayer(), + wrapped_sim_state->CurrentPlayer()); + SPIEL_CHECK_EQ(sim_state->CurrentPlayer(), + turn_based_state->CurrentPlayer()); + + Player p = sim_state->CurrentPlayer(); + + if (game.GetType().provides_information_state_string) { + // Check the information states to each player are consistent. + SPIEL_CHECK_EQ(sim_state->InformationStateString(p), + wrapped_sim_state->InformationStateString(p)); + } + + std::vector actions; + actions = sim_state->LegalActions(p); + absl::uniform_int_distribution<> dis(0, actions.size() - 1); + Action action = actions[dis(*rng)]; + + std::cout << "player " << p << " chose " + << sim_state->ActionToString(p, action) << std::endl; + + turn_based_state->ApplyAction(action); + sim_state->ApplyAction(action); + } + + std::cout << "State: " << std::endl << sim_state->ToString() << std::endl; + } + + SPIEL_CHECK_TRUE(turn_based_state->IsTerminal()); + + auto sim_returns = sim_state->Returns(); + auto turn_returns = turn_based_state->Returns(); + + for (auto player = Player{0}; player < game.NumPlayers(); player++) { + double utility = sim_returns[player]; + SPIEL_CHECK_GE(utility, game.MinUtility()); + SPIEL_CHECK_LE(utility, game.MaxUtility()); + std::cout << "Utility to player " << player << " is " << utility + << std::endl; + + double other_utility = turn_returns[player]; + SPIEL_CHECK_EQ(utility, other_utility); + } +} + +void BasicTurnBasedSimultaneousTests() { + std::mt19937 rng; + + for (const GameType& type : RegisteredGameTypes()) { + if (!type.ContainsRequiredParameters() && type.default_loadable) { + std::string name = type.short_name; + if (type.dynamics == GameType::Dynamics::kSimultaneous) { + std::cout << "TurnBasedSimultaneous: Testing " << name << std::endl; + for (int i = 0; i < 100; ++i) { + std::shared_ptr sim_game = LoadGame(name); + std::shared_ptr turn_based_game = + ConvertToTurnBased(*LoadGame(name)); + auto sim_state = sim_game->NewInitialState(); + auto turn_based_state = turn_based_game->NewInitialState(); + SimulateGames(&rng, *sim_game, sim_state.get(), + turn_based_state.get()); + } + } + } + } +} + +void SomePlayersHaveNoLegalActionsTests() { + std::shared_ptr game( + new MissingPlayerRepeatedMatchingPenniesGame({})); + testing::RandomSimTest(*game, 10); + + std::shared_ptr turn_based_game = ConvertToTurnBased(*game); + testing::RandomSimTest(*turn_based_game, 10); + + // Hey, while we're here, why not try CFR? + algorithms::CFRSolverBase solver(*turn_based_game, + /*alternating_updates*/true, + /*linear_averaging*/false, + /*regret_matching_plus*/false, + /*random_initial_regrets*/true, + /*seed*/78846817); + for (int i = 0; i < 5; i++) { + solver.EvaluateAndUpdatePolicy(); + const std::shared_ptr average_policy = solver.AveragePolicy(); + std::vector expected_returns = algorithms::ExpectedReturns( + *turn_based_game->NewInitialState(), *average_policy, + /*depth_limit*/-1); + std::cout << "Iter " << i << ", expected returns:"; + for (double val : expected_returns) { + std::cout << " " << val; + } + std::cout << std::endl; + } +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::Init("", &argc, &argv, true); + // open_spiel::BasicTurnBasedSimultaneousTests(); + open_spiel::SomePlayersHaveNoLegalActionsTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/zerosum.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/zerosum.cc new file mode 100644 index 0000000..99907f3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/zerosum.cc @@ -0,0 +1,71 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/zerosum.h" + +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace { + +// These parameters are the most-general case, except for utility which is +// zero-sum. The actual game may be simpler. +const GameType kGameType{/*short_name=*/"zerosum", + /*long_name=*/"ZeroSum Version of a Regular Game", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kSampledStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/100, + /*min_num_players=*/1, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + {{"game", GameParameter(GameParameter::Type::kGame, + /*is_mandatory=*/true)}}, + /*default_loadable=*/false, + /*provides_factored_observation_string=*/true, + /*is_concrete=*/false}; + +GameType ZeroSumGameType(GameType game_type) { + game_type.short_name = kGameType.short_name; + game_type.long_name = absl::StrCat("ZeroSum ", game_type.long_name); + game_type.utility = GameType::Utility::kZeroSum; + return game_type; +} + +std::shared_ptr Factory(const GameParameters& params) { + auto game = LoadGame(params.at("game").game_value()); + GameType game_type = ZeroSumGameType(game->GetType()); + return std::shared_ptr(new ZeroSumGame(game, game_type, params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace + +ZeroSumGame::ZeroSumGame(std::shared_ptr game, GameType game_type, + GameParameters game_parameters) + : WrappedGame(game, game_type, game_parameters) {} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/zerosum.h b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/zerosum.h new file mode 100644 index 0000000..5f5497f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/zerosum.h @@ -0,0 +1,83 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAME_TRANSFORMS_ZEROSUM_H_ +#define OPEN_SPIEL_GAME_TRANSFORMS_ZEROSUM_H_ + +#include +#include "open_spiel/game_transforms/game_wrapper.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// Transforms a general sum game into a zero sum one by subtracting the mean +// of the rewards and final returns. + +namespace open_spiel { + +inline std::vector SubtractMean(std::vector&& vec) { + double mean = std::accumulate(vec.begin(), vec.end(), 0.0) / vec.size(); + std::vector result = std::move(vec); + for (auto& item : result) item -= mean; + return result; +} + +class ZeroSumState : public WrappedState { + public: + ZeroSumState(std::shared_ptr game, std::unique_ptr state) + : WrappedState(game, std::move(state)) {} + ZeroSumState(const ZeroSumState& other) = default; + + std::vector Rewards() const override { + return SubtractMean(state_->Rewards()); + } + + std::vector Returns() const override { + return SubtractMean(state_->Returns()); + } + + std::unique_ptr Clone() const override { + return std::unique_ptr(new ZeroSumState(*this)); + } +}; + +class ZeroSumGame : public WrappedGame { + public: + ZeroSumGame(std::shared_ptr game, GameType game_type, + GameParameters game_parameters); + ZeroSumGame(const ZeroSumGame& other) = default; + + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new ZeroSumState(shared_from_this(), game_->NewInitialState())); + } + + double MaxUtility() const override { + // The maximum utility is obtained if, in the original game, + // one player gains game_->MaxUtility() while all other players + // obtain game_->MinUtility(), because the mean is subtracted. + double n = static_cast(game_->NumPlayers()); + return (game_->MaxUtility() - game_->MinUtility()) * (n - 1) / n; + } + double MinUtility() const override { + // By symmetry: + return - MaxUtility(); + } + absl::optional UtilitySum() const override { + return 0.0; + } +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAME_TRANSFORMS_ZEROSUM_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/game_transforms/zerosum_test.cc b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/zerosum_test.cc new file mode 100644 index 0000000..09a274f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/game_transforms/zerosum_test.cc @@ -0,0 +1,37 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/game_transforms/zerosum.h" + +#include "open_spiel/games/oh_hell/oh_hell.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace zerosum { +namespace { + +namespace testing = open_spiel::testing; + +void BasicZeroSumTests() { + testing::LoadGameTest("zerosum(game=oh_hell(off_bid_penalty=true))"); + testing::RandomSimTest( + *LoadGame("zerosum(game=oh_hell(off_bid_penalty=true))"), 10); +} + +} // namespace +} // namespace zerosum +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::zerosum::BasicZeroSumTests(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/games/CMakeLists.txt new file mode 100644 index 0000000..ed1a3fa --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/CMakeLists.txt @@ -0,0 +1,668 @@ +set(GAME_SOURCES + amazons/amazons.cc + amazons/amazons.h + backgammon/backgammon.cc + backgammon/backgammon.h + bargaining/bargaining.cc + bargaining/bargaining.h + bargaining/bargaining_instances1000.cc + battleship/battleship.cc + battleship/battleship.h + battleship/battleship_types.h + battleship/battleship_types.cc + blackjack/blackjack.cc + blackjack/blackjack.h + blotto/blotto.cc + blotto/blotto.h + breakthrough/breakthrough.cc + breakthrough/breakthrough.h + bridge/bridge.cc + bridge/bridge.h + bridge/bridge_scoring.cc + bridge/bridge_scoring.h + bridge/bridge_uncontested_bidding.cc + bridge/bridge_uncontested_bidding.h + catch/catch.cc + catch/catch.h + checkers/checkers.cc + checkers/checkers.h + chess/chess.cc + chess/chess.h + chess/chess_board.cc + chess/chess_board.h + chess/chess_common.cc + chess/chess_common.h + chess/chess960_starting_positions.cc + cliff_walking/cliff_walking.cc + cliff_walking/cliff_walking.h + clobber/clobber.cc + clobber/clobber.h + coin_game/coin_game.cc + coin_game/coin_game.h + colored_trails/colored_trails.cc + colored_trails/colored_trails.h + colored_trails/colored_trails_utils.cc + connect_four/connect_four.cc + connect_four/connect_four.h + coop_box_pushing/coop_box_pushing.cc + coop_box_pushing/coop_box_pushing.h + coordinated_mp/coordinated_mp.cc + coordinated_mp/coordinated_mp.h + crazy_eights/crazy_eights.cc + crazy_eights/crazy_eights.h + cribbage/cribbage.cc + cribbage/cribbage.h + cursor_go/cursor_go.cc + cursor_go/cursor_go.h + dark_chess/dark_chess.cc + dark_chess/dark_chess.h + dark_hex/dark_hex.cc + dark_hex/dark_hex.h + deep_sea/deep_sea.cc + deep_sea/deep_sea.h + dots_and_boxes/dots_and_boxes.cc + dots_and_boxes/dots_and_boxes.h + dynamic_routing/dynamic_routing_data.cc + dynamic_routing/dynamic_routing_data.h + dynamic_routing/dynamic_routing_utils.cc + dynamic_routing/dynamic_routing_utils.h + dou_dizhu/dou_dizhu.cc + dou_dizhu/dou_dizhu.h + dou_dizhu/dou_dizhu_utils.cc + dou_dizhu/dou_dizhu_utils.h + efg_game/efg_game.cc + efg_game/efg_game.h + efg_game/efg_game_data.cc + efg_game/efg_game_data.h + einstein_wurfelt_nicht/einstein_wurfelt_nicht.cc + einstein_wurfelt_nicht/einstein_wurfelt_nicht.h + euchre/euchre.cc + euchre/euchre.h + first_sealed_auction/first_sealed_auction.cc + first_sealed_auction/first_sealed_auction.h + gin_rummy/gin_rummy.cc + gin_rummy/gin_rummy.h + gin_rummy/gin_rummy_utils.cc + gin_rummy/gin_rummy_utils.h + go/go.cc + go/go.h + go/go_board.cc + go/go_board.h + goofspiel/goofspiel.cc + goofspiel/goofspiel.h + havannah/havannah.cc + havannah/havannah.h + hearts/hearts.cc + hearts/hearts.h + hex/hex.cc + hex/hex.h + hive/hive.cc + hive/hive.h + hive/hive_board.cc + hive/hive_board.h + kriegspiel/kriegspiel.cc + kriegspiel/kriegspiel.h + kuhn_poker/kuhn_poker.cc + kuhn_poker/kuhn_poker.h + laser_tag/laser_tag.cc + laser_tag/laser_tag.h + leduc_poker/leduc_poker.cc + leduc_poker/leduc_poker.h + lewis_signaling/lewis_signaling.cc + lewis_signaling/lewis_signaling.h + liars_dice/liars_dice.cc + liars_dice/liars_dice.h + maedn/maedn.cc + maedn/maedn.h + mancala/mancala.cc + mancala/mancala.h + markov_soccer/markov_soccer.cc + markov_soccer/markov_soccer.h + matching_pennies_3p/matching_pennies_3p.cc + matching_pennies_3p/matching_pennies_3p.h + matrix_games/matrix_games.cc + mfg/crowd_modelling.cc + mfg/crowd_modelling.h + mfg/crowd_modelling_2d.cc + mfg/crowd_modelling_2d.h + mfg/dynamic_routing.cc + mfg/dynamic_routing.h + mfg/garnet.cc + mfg/garnet.h + mnk/mnk.cc + mnk/mnk.h + morpion_solitaire/morpion_solitaire.cc + morpion_solitaire/morpion_solitaire.h + negotiation/negotiation.cc + negotiation/negotiation.h + nfg_game/nfg_game.cc + nfg_game/nfg_game.h + nine_mens_morris/nine_mens_morris.cc + nine_mens_morris/nine_mens_morris.h + nim/nim.cc + nim/nim.h + oh_hell/oh_hell.cc + oh_hell/oh_hell.h + oshi_zumo/oshi_zumo.cc + oshi_zumo/oshi_zumo.h + othello/othello.cc + othello/othello.h + oware/oware.cc + oware/oware.h + oware/oware_board.cc + oware/oware_board.h + pathfinding/pathfinding.cc + pathfinding/pathfinding.h + pentago/pentago.cc + pentago/pentago.h + phantom_go/phantom_go.h + phantom_go/phantom_go.cc + phantom_go/phantom_go_board.h + phantom_go/phantom_go_board.cc + phantom_ttt/phantom_ttt.cc + phantom_ttt/phantom_ttt.h + pig/pig.cc + pig/pig.h + quoridor/quoridor.cc + quoridor/quoridor.h + rbc/rbc.cc + rbc/rbc.h + sheriff/sheriff.cc + sheriff/sheriff.h + skat/skat.cc + skat/skat.h + solitaire/solitaire.cc + solitaire/solitaire.h + spades/spades.cc + spades/spades.h + spades/spades_scoring.cc + spades/spades_scoring.h + stones_and_gems/stones_and_gems.cc + stones_and_gems/stones_and_gems.h + tarok/tarok.cc + tarok/tarok.h + tarok/cards.cc + tarok/cards.h + tarok/contracts.cc + tarok/contracts.h + tic_tac_toe/tic_tac_toe.cc + tic_tac_toe/tic_tac_toe.h + tiny_bridge/tiny_bridge.cc + tiny_bridge/tiny_bridge.h + tiny_hanabi/tiny_hanabi.cc + tiny_hanabi/tiny_hanabi.h + trade_comm/trade_comm.cc + trade_comm/trade_comm.h + twenty_forty_eight/2048.cc + twenty_forty_eight/2048.h + twixt/twixt.cc + twixt/twixt.h + twixt/twixtboard.cc + twixt/twixtboard.h + twixt/twixtcell.h + ultimate_tic_tac_toe/ultimate_tic_tac_toe.h + ultimate_tic_tac_toe/ultimate_tic_tac_toe.cc + y/y.cc + y/y.h +) + +if (${OPEN_SPIEL_BUILD_WITH_HANABI}) + set(GAME_SOURCES ${GAME_SOURCES} hanabi/hanabi.cc hanabi/hanabi.h) +endif() +if (${OPEN_SPIEL_BUILD_WITH_ACPC}) + set(GAME_SOURCES ${GAME_SOURCES} universal_poker/universal_poker.cc universal_poker/universal_poker.h) +endif() + + +add_library (games OBJECT ${GAME_SOURCES}) + +target_include_directories (games PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +if (${OPEN_SPIEL_BUILD_WITH_HANABI}) + add_subdirectory(hanabi) +endif() +if (${OPEN_SPIEL_BUILD_WITH_ACPC}) + add_subdirectory(universal_poker) +endif() +if (${OPEN_SPIEL_BUILD_WITH_GAMUT}) + add_subdirectory(gamut) +endif() + +add_library(bridge_double_dummy_solver OBJECT + bridge/double_dummy_solver/include/dll.h + bridge/double_dummy_solver/include/portab.h + bridge/double_dummy_solver/src/ABsearch.cpp + bridge/double_dummy_solver/src/ABsearch.h + bridge/double_dummy_solver/src/ABstats.cpp + bridge/double_dummy_solver/src/ABstats.h + bridge/double_dummy_solver/src/CalcTables.cpp + bridge/double_dummy_solver/src/CalcTables.h + bridge/double_dummy_solver/src/dds.cpp + bridge/double_dummy_solver/src/dds.h + bridge/double_dummy_solver/src/DealerPar.cpp + bridge/double_dummy_solver/src/debug.h + bridge/double_dummy_solver/src/dump.cpp + bridge/double_dummy_solver/src/dump.h + bridge/double_dummy_solver/src/File.cpp + bridge/double_dummy_solver/src/File.h + bridge/double_dummy_solver/src/Init.cpp + bridge/double_dummy_solver/src/Init.h + bridge/double_dummy_solver/src/LaterTricks.cpp + bridge/double_dummy_solver/src/LaterTricks.h + bridge/double_dummy_solver/src/Memory.cpp + bridge/double_dummy_solver/src/Memory.h + bridge/double_dummy_solver/src/Moves.cpp + bridge/double_dummy_solver/src/Moves.h + bridge/double_dummy_solver/src/Par.cpp + bridge/double_dummy_solver/src/parallel.h + bridge/double_dummy_solver/src/PBN.cpp + bridge/double_dummy_solver/src/PBN.h + bridge/double_dummy_solver/src/PlayAnalyser.cpp + bridge/double_dummy_solver/src/PlayAnalyser.h + bridge/double_dummy_solver/src/QuickTricks.cpp + bridge/double_dummy_solver/src/QuickTricks.h + bridge/double_dummy_solver/src/Scheduler.cpp + bridge/double_dummy_solver/src/Scheduler.h + bridge/double_dummy_solver/src/SolveBoard.cpp + bridge/double_dummy_solver/src/SolveBoard.h + bridge/double_dummy_solver/src/SolverIF.cpp + bridge/double_dummy_solver/src/SolverIF.h + bridge/double_dummy_solver/src/System.cpp + bridge/double_dummy_solver/src/System.h + bridge/double_dummy_solver/src/ThreadMgr.cpp + bridge/double_dummy_solver/src/ThreadMgr.h + bridge/double_dummy_solver/src/Timer.cpp + bridge/double_dummy_solver/src/Timer.h + bridge/double_dummy_solver/src/TimerGroup.cpp + bridge/double_dummy_solver/src/TimerGroup.h + bridge/double_dummy_solver/src/TimerList.cpp + bridge/double_dummy_solver/src/TimerList.h + bridge/double_dummy_solver/src/TimeStat.cpp + bridge/double_dummy_solver/src/TimeStat.h + bridge/double_dummy_solver/src/TimeStatList.cpp + bridge/double_dummy_solver/src/TimeStatList.h + bridge/double_dummy_solver/src/TransTable.h + bridge/double_dummy_solver/src/TransTableL.cpp + bridge/double_dummy_solver/src/TransTableL.h + bridge/double_dummy_solver/src/TransTableS.cpp + bridge/double_dummy_solver/src/TransTableS.h +) +target_include_directories (bridge_double_dummy_solver PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_compile_definitions(bridge_double_dummy_solver PUBLIC DDS_NO_STATIC_INIT) + +add_executable(2048_test twenty_forty_eight/2048_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(2048_test 2048_test) + +add_executable(amazons_test amazons/amazons_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(amazons_test amazons_test) + +add_executable(backgammon_test backgammon/backgammon_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(backgammon_test backgammon_test) + +add_executable(bargaining_instance_generator bargaining/bargaining_instance_generator.cc + ${OPEN_SPIEL_OBJECTS}) +add_executable(bargaining_test bargaining/bargaining_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(bargaining_test bargaining_test) + +add_executable(battleship_test battleship/battleship_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(battleship_test battleship_test) + +add_executable(blackjack_test blackjack/blackjack_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(blackjack_test blackjack_test) + +add_executable(blotto_test blotto/blotto_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(blotto_test blotto_test) + +add_executable(breakthrough_test breakthrough/breakthrough_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(breakthrough_test breakthrough_test) + +add_executable(bridge_test bridge/bridge_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(bridge_test bridge_test) + +add_executable(catch_test catch/catch_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(catch_test catch_test) + +add_executable(checkers_test checkers/checkers_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(checkers_test checkers_test) + +add_executable(chess_test chess/chess_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(chess_test chess_test) + +add_executable(cliff_walking_test cliff_walking/cliff_walking_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(cliff_walking_test cliff_walking_test) + +add_executable(clobber_test clobber/clobber_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(clobber_test clobber_test) + +add_executable(coin_game_test coin_game/coin_game_test.cc ${OPEN_SPIEL_OBJECTS} + $ + $) +add_test(coin_game_test coin_game_test) + +add_executable(colored_trails_board_generator + colored_trails/colored_trails_board_generator.cc + ${OPEN_SPIEL_OBJECTS} $) + +add_executable(colored_trails_test colored_trails/colored_trails_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(colored_trails_test colored_trails_test) + +add_executable(connect_four_test connect_four/connect_four_test.cc ${OPEN_SPIEL_OBJECTS} + $ + $) +add_test(connect_four_test connect_four_test) + +add_executable(coop_box_pushing_test coop_box_pushing/coop_box_pushing_test.cc + ${OPEN_SPIEL_OBJECTS} + $) +add_test(coop_box_pushing_test coop_box_pushing_test) + +add_executable(coordinated_mp_test coordinated_mp/coordinated_mp_test.cc ${OPEN_SPIEL_OBJECTS} + $ + $) +add_test(coordinated_mp_test coordinated_mp_test) + +add_executable(crazy_eights_test crazy_eights/crazy_eights_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(crazy_eights_test crazy_eights_test) + +add_executable(cribbage_test cribbage/cribbage_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(cribbage_test cribbage_test) + +add_executable(crowd_modelling_test mfg/crowd_modelling_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(crowd_modelling_test crowd_modelling_test) + +add_executable(crowd_modelling_2d_test mfg/crowd_modelling_2d_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(crowd_modelling_2d_test crowd_modelling_2d_test) + +add_executable(cursor_go_test cursor_go/cursor_go_test.cc + ${OPEN_SPIEL_OBJECTS} + $) +add_test(cursor_go_test cursor_go_test) + +add_executable(dark_chess_test dark_chess/dark_chess_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(dark_chess_test dark_chess_test) + +add_executable(dark_hex_test dark_hex/dark_hex_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(dark_hex_test dark_hex_test) + +add_executable(deep_sea_test deep_sea/deep_sea_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(deep_sea_test deep_sea_test) + +add_executable(dots_and_boxes_test dots_and_boxes/dots_and_boxes_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(dots_and_boxes_test dots_and_boxes_test) + +add_executable(dynamic_routing_data_test dynamic_routing/dynamic_routing_data_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(dynamic_routing_data_test dynamic_routing_data_test) + +add_executable(dynamic_routing_test mfg/dynamic_routing_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(dynamic_routing_test dynamic_routing_test) + +add_executable(dynamic_routing_utils_test dynamic_routing/dynamic_routing_utils_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(dynamic_routing_utils_test dynamic_routing_utils_test) + +add_executable(dou_dizhu_test dou_dizhu/dou_dizhu_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(dou_dizhu_test dou_dizhu_test) + +add_executable(dou_dizhu_utils_test dou_dizhu/dou_dizhu_utils_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(dou_dizhu_utils_test dou_dizhu_utils_test) + +add_executable(efg_game_test efg_game/efg_game_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(efg_game_test efg_game_test) + +add_executable(einstein_wurfelt_nicht_test einstein_wurfelt_nicht/einstein_wurfelt_nicht_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(einstein_wurfelt_nicht_test einstein_wurfelt_nicht_test) + +add_executable(euchre_test euchre/euchre_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(euchre_test euchre_test) + +add_executable(first_sealed_auction_test first_sealed_auction/first_sealed_auction_test.cc + ${OPEN_SPIEL_OBJECTS} + $) +add_test(first_sealed_auction_test first_sealed_auction_test) + +add_executable(garnet_test mfg/garnet_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(garnet_test garnet_test) + +add_executable(gin_rummy_test gin_rummy/gin_rummy_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(gin_rummy_test gin_rummy_test) + +add_executable(go_test go/go_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(go_test go_test) + +add_executable(phantom_go_test phantom_go/phantom_go_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(phantom_go_test phantom_go_test) + +add_executable(goofspiel_test goofspiel/goofspiel_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(goofspiel_test goofspiel_test) + +add_executable(havannah_test havannah/havannah_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(havannah_test havannah_test) + +add_executable(hearts_test hearts/hearts_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(hearts_test hearts_test) + +add_executable(hex_test hex/hex_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(hex_test hex_test) + +add_executable(hive_test hive/hive_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(hive_test hive_test) + +add_executable(kriegspiel_test kriegspiel/kriegspiel_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(kriegspiel_test kriegspiel_test) + +add_executable(kuhn_poker_test kuhn_poker/kuhn_poker_test.cc ${OPEN_SPIEL_OBJECTS} + $ + $) +add_test(kuhn_poker_test kuhn_poker_test) + +add_executable(leduc_poker_test leduc_poker/leduc_poker_test.cc ${OPEN_SPIEL_OBJECTS} + $ + $) +add_test(leduc_poker_test leduc_poker_test) + +add_executable(lewis_signaling_test lewis_signaling/lewis_signaling_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(lewis_signaling_test lewis_signaling_test) + +add_executable(liars_dice_test liars_dice/liars_dice_test.cc ${OPEN_SPIEL_OBJECTS} + $ + $) +add_test(liars_dice_test liars_dice_test) + +add_executable(maedn_test maedn/maedn_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(maedn_test maedn_test) + +add_executable(mancala_test mancala/mancala_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(mancala_test mancala_test) + +add_executable(markov_soccer_test markov_soccer/markov_soccer_test.cc ${OPEN_SPIEL_OBJECTS} + $ + $) +add_test(markov_soccer_test markov_soccer_test) + +add_executable(matching_pennies_3p_test matching_pennies_3p/matching_pennies_3p_test.cc + ${OPEN_SPIEL_OBJECTS} + $ + $) +add_test(matching_pennies_3p_test matching_pennies_3p_test) + +add_executable(matrix_games_test matrix_games/matrix_games_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(matrix_games_test matrix_games_test) + +add_executable(mnk_test mnk/mnk_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(mnk_test mnk_test) + +add_executable(morpion_solitaire_test morpion_solitaire/morpion_solitaire_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(morpion_solitaire_test morpion_solitaire_test) + +add_executable(negotiation_test negotiation/negotiation_test.cc ${OPEN_SPIEL_OBJECTS} + $ + $) +add_test(negotiation_test negotiation_test) + +add_executable(nfg_game_test nfg_game/nfg_game_test.cc ${OPEN_SPIEL_OBJECTS} + $ + $) +add_test(nfg_game_test nfg_game_test) + +add_executable(nim_test nim/nim_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(nim_test nim_test) + +add_executable(nine_mens_morris_test nine_mens_morris/nine_mens_morris_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(nine_mens_morris_test nine_mens_morris_test) + +add_executable(oh_hell_test oh_hell/oh_hell_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(oh_hell_test oh_hell_test) + +add_executable(oshi_zumo_test oshi_zumo/oshi_zumo_test.cc ${OPEN_SPIEL_OBJECTS} + $ + $) +add_test(oshi_zumo_test oshi_zumo_test) + +add_executable(othello_test othello/othello_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(othello_test othello_test) + +add_executable(oware_test oware/oware_test.cc ${OPEN_SPIEL_OBJECTS} + $ + $) +add_test(oware_test oware_test) + +add_executable(pathfinding_test pathfinding/pathfinding_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(pathfinding_test pathfinding_test) + +add_executable(pentago_test pentago/pentago_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(pentago_test pentago_test) + +add_executable(phantom_ttt_test phantom_ttt/phantom_ttt_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(phantom_ttt_test phantom_ttt_test) + +add_executable(pig_test pig/pig_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(pig_test pig_test) + +add_executable(quoridor_test quoridor/quoridor_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(quoridor_test quoridor_test) + +add_executable(rbc_test rbc/rbc_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(rbc_test rbc_test) + +add_executable(sheriff_test sheriff/sheriff_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(sheriff_test sheriff_test) + +add_executable(skat_test skat/skat_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(skat_test skat_test) + +add_executable(solitaire_test solitaire/solitaire_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(solitaire_test solitaire_test) + +add_executable(spades_test spades/spades_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(spades_test spades_test) + +add_executable(stones_and_gems_test stones_and_gems/stones_and_gems_test.cc + ${OPEN_SPIEL_OBJECTS} + $) +add_test(stones_and_gems_test stones_and_gems_test) + +add_executable(tarok_test tarok/tarok_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(tarok_test tarok_test) + +add_executable(tic_tac_toe_test tic_tac_toe/tic_tac_toe_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(tic_tac_toe_test tic_tac_toe_test) + +add_executable(laser_tag_test laser_tag/laser_tag_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(laser_tag_test laser_tag_test) + +add_executable(tiny_bridge_test tiny_bridge/tiny_bridge_test.cc ${OPEN_SPIEL_OBJECTS} + $ + $) +add_test(tiny_bridge_test tiny_bridge_test) + +add_executable(tiny_hanabi_test tiny_hanabi/tiny_hanabi_test.cc ${OPEN_SPIEL_OBJECTS} + $ + $) +add_test(tiny_hanabi_test tiny_hanabi_test) + +add_executable(trade_comm_test trade_comm/trade_comm_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(trade_comm_test trade_comm_test) + +add_executable(twixt_test twixt/twixt_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(twixt_test twixt_test) + +add_executable(ultimate_tic_tac_toe_test ultimate_tic_tac_toe/ultimate_tic_tac_toe_test.cc + ${OPEN_SPIEL_OBJECTS} $) +add_test(ultimate_tic_tac_toe_test ultimate_tic_tac_toe_test) + +if (${OPEN_SPIEL_BUILD_WITH_ACPC}) + add_executable(universal_poker_test universal_poker/universal_poker_test.cc ${OPEN_SPIEL_OBJECTS} + $ + $) + add_test(universal_poker_test universal_poker_test + --subgames_data_dir=${CMAKE_CURRENT_SOURCE_DIR}/universal_poker/endgames) +endif() + +add_executable(y_test y/y_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(y_test y_test) diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/amazons/amazons.cc b/scenarios/bargaining/open_spiel/open_spiel/games/amazons/amazons.cc new file mode 100644 index 0000000..6c98c03 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/amazons/amazons.cc @@ -0,0 +1,470 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/amazons/amazons.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace amazons { +namespace { +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"amazons", + /*long_name=*/"Amazons", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters ¶ms) { + return std::shared_ptr(new AmazonsGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +CellState PlayerToState(Player player) { + switch (player) { + case 0: + return CellState::kCross; + case 1: + return CellState::kNought; + default: + SpielFatalError(absl::StrCat("Invalid player id ", player)); + return CellState::kEmpty; + } +} + +std::string StateToString(CellState state) { + switch (state) { + case CellState::kEmpty: + return "."; + case CellState::kNought: + return "O"; + case CellState::kCross: + return "X"; + case CellState::kBlock: + return "#"; + default: + SpielFatalError("Unknown state."); + } +} + +/* Move generation functions */ +std::vector AmazonsState::GetHorizontalMoves(Action cell) const { + std::vector horizontalMoves; + + unsigned char col = cell % kNumRows; // The column the cell is in + unsigned char left = + col; // The maximum amount of spaces to check left of given cell + unsigned char right = + kNumCols - col - + 1; // The maximal amount of spaces to check right of given cell + Action focus; + + // <-----X + // Walk until we encounter a blocking piece or end of row + int count = 1; + while (count <= left) { + focus = cell - count; + if (board_[focus] == CellState::kEmpty) { + horizontalMoves.push_back(focus); + count++; + } else { + // We have encountered a blocking piece + break; + } + } + + // X----> + // Walk until we encounter a blocking piece or end of row + count = 1; + while (count <= right) { + focus = cell + count; + if (board_[focus] == CellState::kEmpty) { + horizontalMoves.push_back(focus); + count++; + } else { + // We have encountered a blocking piece + break; + } + } + + return horizontalMoves; +} + +std::vector AmazonsState::GetVerticalMoves(Action cell) const { + std::vector verticalMoves; + + unsigned char row = cell / kNumRows; // The row the cell is in + unsigned char up = + row; // The maximum amount of spaces to check up of given cell + unsigned char down = + kNumRows - row - + 1; // The maximal amount of spaces to check down of given cell + Action focus; + + // ^ + // | + // | + // X + // Walk until we encounter a blocking piece or end of column + int count = 1; + focus = cell; + while (count <= up) { + focus -= kNumRows; + if (board_[focus] == CellState::kEmpty) { + verticalMoves.push_back(focus); + count++; + } else { + // We have encountered a blocking piece + break; + } + } + + // X + // | + // | + // V + // Walk until we encounter a blocking piece or end of column + count = 1; + focus = cell; + while (count <= down) { + focus += kNumRows; + if (board_[focus] == CellState::kEmpty) { + verticalMoves.push_back(focus); + count++; + } else { + // We have encountered a blocking piece + break; + } + } + + return verticalMoves; +} + +std::vector AmazonsState::GetDiagonalMoves(Action cell) const { + std::vector diagonalMoves; + unsigned char col = cell % kNumCols; // The column the cell is in + unsigned char row = cell / kNumRows; // The row the cell is in + unsigned char upLeft = std::min( + row, + col); // The maximum amount of spaces to check up and left of given cell + unsigned char upRight = std::min( + row, + (unsigned char)(kNumCols - col - 1)); // The maximum amount of spaces to + // check up and right of given cell + // The maximum amount of spaces to check + // down and left of given cell + unsigned char downLeft = + std::min(static_cast(kNumRows - row - 1), col); + // The maximum amount of spaces to check down + // and right of given cell + unsigned char downRight = + std::min(static_cast(kNumRows - row - 1), + static_cast(kNumCols - col - 1)); + Action focus; + + // Up and left + int count = 1; + focus = cell; + while (count <= upLeft) { + focus -= (kNumRows + 1); + if (board_[focus] == CellState::kEmpty) { + diagonalMoves.push_back(focus); + count++; + } else { + // We have encountered a blocking piece + break; + } + } + + // Up and right + count = 1; + focus = cell; + while (count <= upRight) { + focus -= (kNumRows - 1); + if (board_[focus] == CellState::kEmpty) { + diagonalMoves.push_back(focus); + count++; + } else { + // We have encountered a blocking piece + break; + } + } + + // Down and left + count = 1; + focus = cell; + while (count <= downLeft) { + focus += (kNumRows - 1); + if (board_[focus] == CellState::kEmpty) { + diagonalMoves.push_back(focus); + count++; + } else { + // We have encountered a blocking piece + break; + } + } + + // Down and right + count = 1; + focus = cell; + while (count <= downRight) { + focus += (kNumRows + 1); + if (board_[focus] == CellState::kEmpty) { + diagonalMoves.push_back(focus); + count++; + } else { + // We have encountered a blocking piece + break; + } + } + + return diagonalMoves; +} + +std::vector AmazonsState::GetAllMoves(Action cell) const { + std::vector horizontals = GetHorizontalMoves(cell); + std::vector verticals = GetVerticalMoves(cell); + std::vector diagonals = GetDiagonalMoves(cell); + + std::vector acc = horizontals; + + acc.insert(acc.end(), verticals.begin(), verticals.end()); + acc.insert(acc.end(), diagonals.begin(), diagonals.end()); + + return acc; +} + +void AmazonsState::DoApplyAction(Action action) { + switch (state_) { + case amazon_select: { + SPIEL_CHECK_EQ(board_[action], PlayerToState(CurrentPlayer())); + from_ = action; + board_[from_] = CellState::kEmpty; + state_ = destination_select; + } + break; + + case destination_select: { + SPIEL_CHECK_EQ(board_[action], CellState::kEmpty); + to_ = action; + board_[to_] = PlayerToState(CurrentPlayer()); + state_ = shot_select; + break; + } + + case shot_select: { + SPIEL_CHECK_EQ(board_[action], CellState::kEmpty); + shoot_ = action; + board_[shoot_] = CellState::kBlock; + current_player_ = 1 - current_player_; + state_ = amazon_select; + // Check if game is over + if (LegalActions().empty()) { + // outcome = winner + outcome_ = 1 - current_player_; + } + } + break; + } + ++num_moves_; +} + +void AmazonsState::UndoAction(Player player, Action move) { + switch (state_) { + case amazon_select: { + shoot_ = move; + board_[shoot_] = CellState::kEmpty; + current_player_ = player; + outcome_ = kInvalidPlayer; + state_ = shot_select; + } + break; + + case destination_select: { + from_ = move; + board_[from_] = PlayerToState(player); + state_ = amazon_select; + } + break; + + case shot_select: { + to_ = move; + board_[to_] = CellState::kEmpty; + state_ = destination_select; + } + break; + } + + --num_moves_; + --move_number_; + history_.pop_back(); +} + +std::vector AmazonsState::LegalActions() const { + if (IsTerminal()) return {}; + + std::vector actions; + + switch (state_) { + case amazon_select: + for (int i = 0; i < board_.size(); i++) { + if (board_[i] == PlayerToState(CurrentPlayer())) { + // check if the selected amazon has a possible move + if (GetAllMoves(i).empty()) continue; + + actions.push_back(i); + } + } + + break; + + case destination_select: + actions = GetAllMoves(from_); + break; + + case shot_select: + actions = GetAllMoves(to_); + break; + } + + sort(actions.begin(), actions.end()); + + return actions; +} + +std::string AmazonsState::ActionToString(Player player, Action action) const { + std::string str = absl::StrCat("(", (action / kNumRows) + 1, ", ", + (action % kNumRows) + 1, ")"); + + switch (state_) { + case amazon_select: + return absl::StrCat(StateToString(PlayerToState(player)), " From ", str); + + case destination_select: + return absl::StrCat(StateToString(PlayerToState(player)), " To ", str); + + case shot_select: + return absl::StrCat(StateToString(PlayerToState(player)), + " Shoot: ", str); + } + + std::cerr << "Unhandled case in AmazonState::ActionToString, " + << "returning empty string." << std::endl; + return ""; +} + +// Looks okay +AmazonsState::AmazonsState(std::shared_ptr game) : State(game) { + std::fill(begin(board_), end(board_), CellState::kEmpty); + switch (kNumRows) { + case 6: + board_[1] = board_[4] = board_[6] = board_[11] = CellState::kCross; + board_[24] = board_[29] = board_[31] = board_[34] = CellState::kNought; + break; + + case 8: + board_[2] = board_[5] = board_[16] = board_[23] = CellState::kCross; + board_[40] = board_[47] = board_[58] = board_[61] = CellState::kNought; + break; + } +} + +void AmazonsState::SetState(int cur_player, MoveState move_state, + const std::array& board) { + current_player_ = cur_player; + state_ = move_state; + board_ = board; +} + +std::string AmazonsState::ToString() const { + std::string str; + for (int r = 0; r < kNumRows; ++r) { + for (int c = 0; c < kNumCols; ++c) { + absl::StrAppend(&str, StateToString(BoardAt(r, c))); + } + if (r < (kNumRows - 1)) { + absl::StrAppend(&str, "\n"); + } + } + return str; +} + +bool AmazonsState::IsTerminal() const { return outcome_ != kInvalidPlayer; } + +std::vector AmazonsState::Returns() const { + if (outcome_ == (Player{0})) { + return {1.0, -1.0}; + } else if (outcome_ == (Player{1})) { + return {-1.0, 1.0}; + } else { + return {0.0, 0.0}; + } +} + +std::string AmazonsState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string AmazonsState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void AmazonsState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + // Treat `values` as a 2-d tensor. + TensorView<2> view(values, {kCellStates, kNumCells}, true); + for (int cell = 0; cell < kNumCells; ++cell) { + view[{static_cast(board_[cell]), cell}] = 1.0; + } +} + +std::unique_ptr AmazonsState::Clone() const { + return std::unique_ptr(new AmazonsState(*this)); +} + +AmazonsGame::AmazonsGame(const GameParameters ¶ms) + : Game(kGameType, params) {} + +} // namespace amazons +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/amazons/amazons.h b/scenarios/bargaining/open_spiel/open_spiel/games/amazons/amazons.h new file mode 100644 index 0000000..1f44e99 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/amazons/amazons.h @@ -0,0 +1,150 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_AMAZONS_H_ +#define OPEN_SPIEL_GAMES_AMAZONS_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// The "Game of Amazons": +// https://en.wikipedia.org/wiki/Game_of_the_Amazons +// +// Parameters: TODO: let the user choose the dimension + +namespace open_spiel { +namespace amazons { + +// Constants. + +inline constexpr int kNumPlayers = 2; +inline constexpr int kNumRows = 6; +inline constexpr int kNumCols = 6; +inline constexpr int kNumCells = kNumRows * kNumCols; +inline constexpr int kCellStates = 4; // empty, 'X', 'O', '@'. + +// Hensgens et al = 10e40 for 10x10 +inline constexpr int kNumberStates = 1000000000; + +// State of a cell. +enum class CellState { kEmpty, kNought, kCross, kBlock }; + +class AmazonsGame; + +// State of an in-play game. +class AmazonsState : public State { + public: + enum MoveState { amazon_select, destination_select, shot_select }; + + AmazonsState(std::shared_ptr game); + + AmazonsState(const AmazonsState&) = default; + + AmazonsState& operator=(const AmazonsState&) = default; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : current_player_; + } + std::string ActionToString(Player player, Action action_id) const override; + + std::string ToString() const override; + + bool IsTerminal() const override; + + void SetState(int cur_player, MoveState move_state, + const std::array& board); + + std::vector Returns() const override; + + std::string InformationStateString(Player player) const override; + + std::string ObservationString(Player player) const override; + + void ObservationTensor(Player player, + absl::Span values) const override; + + std::unique_ptr Clone() const override; + + void UndoAction(Player player, Action move) override; + + std::vector LegalActions() const override; + + CellState BoardAt(int cell) const { return board_[cell]; } + CellState BoardAt(int row, int column) const { + return board_[row * kNumCols + column]; + } + + protected: + std::array board_; + + void DoApplyAction(Action action) override; + + private: + MoveState state_ = amazon_select; + int from_ = 0; + int to_ = 0; + int shoot_ = 0; + + std::vector GetAllMoves(Action) const; + std::vector GetDiagonalMoves(Action) const; + std::vector GetVerticalMoves(Action) const; + std::vector GetHorizontalMoves(Action) const; + + bool IsGameOver() const; + + Player current_player_ = 0; // Player zero goes first + Player outcome_ = kInvalidPlayer; // Outcome unclear at init + int num_moves_ = 0; +}; + +// Game object. +class AmazonsGame : public Game { + public: + explicit AmazonsGame(const GameParameters& params); + + int NumDistinctActions() const override { return kNumCells; } + + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new AmazonsState(shared_from_this())); + } + + int NumPlayers() const override { return kNumPlayers; } + + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + + std::vector ObservationTensorShape() const override { + return {kCellStates, kNumRows, kNumCols}; + } + + int MaxGameLength() const override { return 3 * kNumCells; } +}; + +CellState PlayerToState(Player player); +std::string StateToString(CellState state); + +inline std::ostream& operator<<(std::ostream& stream, const CellState& state) { + return stream << StateToString(state); +} + +} // namespace amazons +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_AMAZONS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/amazons/amazons_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/amazons/amazons_test.cc new file mode 100644 index 0000000..9a52841 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/amazons/amazons_test.cc @@ -0,0 +1,166 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/amazons/amazons.h" + +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace amazons { +namespace { + +namespace testing = open_spiel::testing; + +void BasicSpielTests() { + testing::LoadGameTest("amazons"); + testing::RandomSimTest(*LoadGame("amazons"), 100); + testing::RandomSimTestWithUndo(*LoadGame("amazons"), 5); +} + +// Test the given configuration for player 1 win: +// Player 1 = Cross, Player 2 = Nought +// |O # X _ ### ...| +void PlayerOneSimpleWinTest() { + std::shared_ptr game = LoadGame("amazons"); + std::unique_ptr state = game->NewInitialState(); + AmazonsState* astate = static_cast(state.get()); + + std::array board = {}; + for (int i = 0; i < board.size(); i++) { + board[i] = CellState::kBlock; + } + board[0] = CellState::kNought; + board[2] = CellState::kCross; + board[3] = CellState::kEmpty; + + astate->SetState(1, AmazonsState::MoveState::amazon_select, board); + + std::cout << "PlayerOneWinTest: \n" << astate->ToString() << "\n"; + + SPIEL_CHECK_TRUE(astate->LegalActions().empty()); + + std::cout << "Success!" + << "\n\n"; +} + +// Test the given configuration for player 2 win: +// Player 1 = Cross, Player 2 = Nought +// |X # O _ ### ...| +void PlayerTwoSimpleWinTest() { + std::shared_ptr game = LoadGame("amazons"); + std::unique_ptr state = game->NewInitialState(); + AmazonsState* astate = static_cast(state.get()); + + std::array board = {}; + for (int i = 0; i < board.size(); i++) { + board[i] = CellState::kBlock; + } + board[0] = CellState::kCross; + board[2] = CellState::kNought; + board[3] = CellState::kEmpty; + + astate->SetState(0, AmazonsState::MoveState::amazon_select, board); + + std::cout << "PlayerTwoWinTest: \n" << astate->ToString() << "\n"; + + SPIEL_CHECK_TRUE(astate->LegalActions().empty()); + + std::cout << "Success!" + << "\n\n"; +} + +// Test given configuration for player 1 no moves +// ....... +// ..OOO.. +// ..OXO.. +// ..OOO.. +// ....... +void PlayerOneTrappedByAmazonsTest() { + std::shared_ptr game = LoadGame("amazons"); + std::unique_ptr state = game->NewInitialState(); + AmazonsState* astate = static_cast(state.get()); + + std::array board = {}; + for (int i = 0; i < board.size(); i++) { + board[i] = CellState::kEmpty; + } + int center = kNumCells / 2 + kNumRows / 2; + board[center] = CellState::kCross; + board[center - 1] = board[center + 1] = CellState::kNought; + board[center - kNumRows] = board[center - kNumRows - 1] = + board[center - kNumRows + 1] = CellState::kNought; + board[center + kNumRows] = board[center + kNumRows - 1] = + board[center + kNumRows + 1] = CellState::kNought; + + astate->SetState(0, AmazonsState::MoveState::amazon_select, board); + + std::cout << "PlayerOneTrappedByAmazonsTest: \n" + << astate->ToString() << "\n"; + + SPIEL_CHECK_TRUE(astate->LegalActions().empty()); + + std::cout << "Success!" + << "\n\n"; +} +// Test given configuration for player 1 no moves +// ....... +// ..###.. +// ..#X#.. +// ..###.. +// ....... +void PlayerOneTrappedByBlocksTest() { + std::shared_ptr game = LoadGame("amazons"); + std::unique_ptr state = game->NewInitialState(); + AmazonsState* astate = static_cast(state.get()); + + std::array board = {}; + for (int i = 0; i < board.size(); i++) { + board[i] = CellState::kEmpty; + } + int center = kNumCells / 2 + kNumRows / 2; + board[center] = CellState::kCross; + board[center - 1] = board[center + 1] = CellState::kBlock; + board[center - kNumRows] = board[center - kNumRows - 1] = + board[center - kNumRows + 1] = CellState::kBlock; + board[center + kNumRows] = board[center + kNumRows - 1] = + board[center + kNumRows + 1] = CellState::kBlock; + + astate->SetState(0, AmazonsState::MoveState::amazon_select, board); + + std::cout << "PlayerOneTrappedByBlocksTest: \n" << astate->ToString() << "\n"; + + SPIEL_CHECK_TRUE(astate->LegalActions().empty()); + + std::cout << "Success!" + << "\n\n"; +} + +} // namespace +} // namespace amazons +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::amazons::BasicSpielTests(); + + // These tests check whether certain board configurations indicate the correct + // number of moves + open_spiel::amazons::PlayerOneSimpleWinTest(); + open_spiel::amazons::PlayerTwoSimpleWinTest(); + open_spiel::amazons::PlayerOneTrappedByAmazonsTest(); + open_spiel::amazons::PlayerOneTrappedByBlocksTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/backgammon/backgammon.cc b/scenarios/bargaining/open_spiel/open_spiel/games/backgammon/backgammon.cc new file mode 100644 index 0000000..85b778c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/backgammon/backgammon.cc @@ -0,0 +1,1366 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/backgammon/backgammon.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_set.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace backgammon { +namespace { + +// A few constants to help with the conversion to human-readable string formats. +// TODO: remove these once we've changed kBarPos and kScorePos (see TODO in +// header). +constexpr int kNumBarPosHumanReadable = 25; +constexpr int kNumOffPosHumanReadable = -2; +constexpr int kNumNonDoubleOutcomes = 15; + +const std::vector> kChanceOutcomes = { + std::pair(0, 1.0 / 18), + std::pair(1, 1.0 / 18), + std::pair(2, 1.0 / 18), + std::pair(3, 1.0 / 18), + std::pair(4, 1.0 / 18), + std::pair(5, 1.0 / 18), + std::pair(6, 1.0 / 18), + std::pair(7, 1.0 / 18), + std::pair(8, 1.0 / 18), + std::pair(9, 1.0 / 18), + std::pair(10, 1.0 / 18), + std::pair(11, 1.0 / 18), + std::pair(12, 1.0 / 18), + std::pair(13, 1.0 / 18), + std::pair(14, 1.0 / 18), + std::pair(15, 1.0 / 36), + std::pair(16, 1.0 / 36), + std::pair(17, 1.0 / 36), + std::pair(18, 1.0 / 36), + std::pair(19, 1.0 / 36), + std::pair(20, 1.0 / 36), +}; + +const std::vector> kChanceOutcomeValues = { + {1, 2}, {1, 3}, {1, 4}, {1, 5}, {1, 6}, {2, 3}, {2, 4}, + {2, 5}, {2, 6}, {3, 4}, {3, 5}, {3, 6}, {4, 5}, {4, 6}, + {5, 6}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}}; + +int NumCheckersPerPlayer(const Game* game) { + return static_cast(game)->NumCheckersPerPlayer(); +} + +// Facts about the game +const GameType kGameType{ + /*short_name=*/"backgammon", + /*long_name=*/"Backgammon", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*min_num_players=*/2, + /*max_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"hyper_backgammon", GameParameter(kDefaultHyperBackgammon)}, + {"scoring_type", + GameParameter(static_cast(kDefaultScoringType))}}}; + +static std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new BackgammonGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace + +ScoringType ParseScoringType(const std::string& st_str) { + if (st_str == "winloss_scoring") { + return ScoringType::kWinLossScoring; + } else if (st_str == "enable_gammons") { + return ScoringType::kEnableGammons; + } else if (st_str == "full_scoring") { + return ScoringType::kFullScoring; + } else { + SpielFatalError("Unrecognized scoring_type parameter: " + st_str); + } +} + +std::string PositionToString(int pos) { + switch (pos) { + case kBarPos: + return "Bar"; + case kScorePos: + return "Score"; + case -1: + return "Pass"; + default: + return absl::StrCat(pos); + } +} + +std::string CurPlayerToString(Player cur_player) { + switch (cur_player) { + case kXPlayerId: + return "x"; + case kOPlayerId: + return "o"; + case kChancePlayerId: + return "*"; + case kTerminalPlayerId: + return "T"; + default: + SpielFatalError(absl::StrCat("Unrecognized player id: ", cur_player)); + } +} + +std::string PositionToStringHumanReadable(int pos) { + if (pos == kNumBarPosHumanReadable) { + return "Bar"; + } else if (pos == kNumOffPosHumanReadable) { + return "Off"; + } else { + return PositionToString(pos); + } +} + +int BackgammonState::AugmentCheckerMove(CheckerMove* cmove, int player, + int start) const { + int end = cmove->num; + if (end != kPassPos) { + // Not a pass, so work out where the piece finished + end = start - cmove->num; + if (end <= 0) { + end = kNumOffPosHumanReadable; // Off + } else if (board_[Opponent(player)] + [player == kOPlayerId ? (end - 1) : (kNumPoints - end)] == + 1) { + cmove->hit = true; // Check to see if move is a hit + } + } + return end; +} + +std::string BackgammonState::ActionToString(Player player, + Action move_id) const { + if (player == kChancePlayerId) { + if (turns_ >= 0) { + // Normal chance roll. + return absl::StrCat("chance outcome ", move_id, + " (roll: ", kChanceOutcomeValues[move_id][0], + kChanceOutcomeValues[move_id][1], ")"); + } else { + // Initial roll to determine who starts. + const char* starter = (move_id < kNumNonDoubleOutcomes ? + "X starts" : "O starts"); + if (move_id >= kNumNonDoubleOutcomes) { + move_id -= kNumNonDoubleOutcomes; + } + return absl::StrCat("chance outcome ", move_id, " ", starter, ", ", + "(roll: ", kChanceOutcomeValues[move_id][0], + kChanceOutcomeValues[move_id][1], ")"); + } + } else { + // Assemble a human-readable string representation of the move using + // standard backgammon notation: + // + // - Always show the numbering going from Bar->24->0->Off, irrespective of + // which player is moving. + // - Show the start position followed by end position. + // - Show hits with an asterisk, e.g. 9/7*. + // - Order the moves by highest number first, e.g. 22/7 10/8 not 10/8 22/7. + // Not an official requirement, but seems to be standard convention. + // - Show duplicate moves as 10/8(2). + // - Show moves on a single piece as 10/8/5 not 10/8 8/5 + // + // Note that there are tests to ensure the ActionToString follows this + // output format. Any changes would need to be reflected in the tests as + // well. + std::vector cmoves = SpielMoveToCheckerMoves(player, move_id); + + int cmove0_start; + int cmove1_start; + if (player == kOPlayerId) { + cmove0_start = (cmoves[0].pos == kBarPos ? kNumBarPosHumanReadable + : cmoves[0].pos + 1); + cmove1_start = (cmoves[1].pos == kBarPos ? kNumBarPosHumanReadable + : cmoves[1].pos + 1); + } else { + // swap the board numbering round for Player X so player is moving + // from 24->0 + cmove0_start = (cmoves[0].pos == kBarPos ? kNumBarPosHumanReadable + : kNumPoints - cmoves[0].pos); + cmove1_start = (cmoves[1].pos == kBarPos ? kNumBarPosHumanReadable + : kNumPoints - cmoves[1].pos); + } + + // Add hit information and compute whether the moves go off the board. + int cmove0_end = AugmentCheckerMove(&cmoves[0], player, cmove0_start); + int cmove1_end = AugmentCheckerMove(&cmoves[1], player, cmove1_start); + + // check for 2 pieces hitting on the same point. + bool double_hit = + (cmoves[1].hit && cmoves[0].hit && cmove1_end == cmove0_end); + + std::string returnVal = ""; + if (cmove0_start == cmove1_start && + cmove0_end == cmove1_end) { // same move, show as (2). + if (cmoves[1].num == kPassPos) { // Player can't move at all! + returnVal = "Pass"; + } else { + returnVal = absl::StrCat(PositionToStringHumanReadable(cmove0_start), + "/", PositionToStringHumanReadable(cmove0_end), + cmoves[0].hit ? "*" : "", "(2)"); + } + } else if ((cmove0_start < cmove1_start || + (cmove0_start == cmove1_start && cmove0_end < cmove1_end) || + cmoves[0].num == kPassPos) && + cmoves[1].num != kPassPos) { + // tradition to start with higher numbers first, + // so swap moves round if this not the case. If + // there is a pass move, put it last. + if (cmove1_end == cmove0_start) { + // Check to see if the same piece is moving for both + // moves, as this changes the format of the output. + returnVal = absl::StrCat( + PositionToStringHumanReadable(cmove1_start), "/", + PositionToStringHumanReadable(cmove1_end), cmoves[1].hit ? "*" : "", + "/", PositionToStringHumanReadable(cmove0_end), + cmoves[0].hit ? "*" : ""); + } else { + returnVal = absl::StrCat( + PositionToStringHumanReadable(cmove1_start), "/", + PositionToStringHumanReadable(cmove1_end), cmoves[1].hit ? "*" : "", + " ", + (cmoves[0].num != kPassPos) + ? PositionToStringHumanReadable(cmove0_start) + : "", + (cmoves[0].num != kPassPos) ? "/" : "", + PositionToStringHumanReadable(cmove0_end), + (cmoves[0].hit && !double_hit) ? "*" : ""); + } + } else { + if (cmove0_end == cmove1_start) { + // Check to see if the same piece is moving for both + // moves, as this changes the format of the output. + returnVal = absl::StrCat( + PositionToStringHumanReadable(cmove0_start), "/", + PositionToStringHumanReadable(cmove0_end), cmoves[0].hit ? "*" : "", + "/", PositionToStringHumanReadable(cmove1_end), + cmoves[1].hit ? "*" : ""); + } else { + returnVal = absl::StrCat( + PositionToStringHumanReadable(cmove0_start), "/", + PositionToStringHumanReadable(cmove0_end), cmoves[0].hit ? "*" : "", + " ", + (cmoves[1].num != kPassPos) + ? PositionToStringHumanReadable(cmove1_start) + : "", + (cmoves[1].num != kPassPos) ? "/" : "", + PositionToStringHumanReadable(cmove1_end), + (cmoves[1].hit && !double_hit) ? "*" : ""); + } + } + + return returnVal; + } +} + +std::string BackgammonState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void BackgammonState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + int opponent = Opponent(player); + SPIEL_CHECK_EQ(values.size(), kStateEncodingSize); + auto value_it = values.begin(); + // The format of this vector is described in Section 3.4 of "G. Tesauro, + // Practical issues in temporal-difference learning, 1994." + // https://link.springer.com/article/10.1007/BF00992697 + // The values of the dice are added in the last two positions of the vector. + for (int count : board_[player]) { + *value_it++ = ((count == 1) ? 1 : 0); + *value_it++ = ((count == 2) ? 1 : 0); + *value_it++ = ((count == 3) ? 1 : 0); + *value_it++ = ((count > 3) ? (count - 3) : 0); + } + for (int count : board_[opponent]) { + *value_it++ = ((count == 1) ? 1 : 0); + *value_it++ = ((count == 2) ? 1 : 0); + *value_it++ = ((count == 3) ? 1 : 0); + *value_it++ = ((count > 3) ? (count - 3) : 0); + } + *value_it++ = (bar_[player]); + *value_it++ = (scores_[player]); + *value_it++ = ((cur_player_ == player) ? 1 : 0); + + *value_it++ = (bar_[opponent]); + *value_it++ = (scores_[opponent]); + *value_it++ = ((cur_player_ == opponent) ? 1 : 0); + + *value_it++ = ((!dice_.empty()) ? dice_[0] : 0); + *value_it++ = ((dice_.size() > 1) ? dice_[1] : 0); + + SPIEL_CHECK_EQ(value_it, values.end()); +} + +BackgammonState::BackgammonState(std::shared_ptr game, + ScoringType scoring_type, + bool hyper_backgammon) + : State(game), + scoring_type_(scoring_type), + hyper_backgammon_(hyper_backgammon), + cur_player_(kChancePlayerId), + prev_player_(kChancePlayerId), + turns_(-1), + x_turns_(0), + o_turns_(0), + double_turn_(false), + dice_({}), + bar_({0, 0}), + scores_({0, 0}), + board_( + {std::vector(kNumPoints, 0), std::vector(kNumPoints, 0)}), + turn_history_info_({}) { + SetupInitialBoard(); +} + +void BackgammonState::SetupInitialBoard() { + if (hyper_backgammon_) { + // https://bkgm.com/variants/HyperBackgammon.html + // Each player has one checker on each of the furthest points. + board_[kXPlayerId][0] = board_[kXPlayerId][1] = board_[kXPlayerId][2] = 1; + board_[kOPlayerId][23] = board_[kOPlayerId][22] = board_[kOPlayerId][21] = + 1; + } else { + // Setup the board. First, XPlayer. + board_[kXPlayerId][0] = 2; + board_[kXPlayerId][11] = 5; + board_[kXPlayerId][16] = 3; + board_[kXPlayerId][18] = 5; + // OPlayer. + board_[kOPlayerId][23] = 2; + board_[kOPlayerId][12] = 5; + board_[kOPlayerId][7] = 3; + board_[kOPlayerId][5] = 5; + } +} + +int BackgammonState::board(int player, int pos) const { + if (pos == kBarPos) { + return bar_[player]; + } else { + SPIEL_CHECK_GE(pos, 0); + SPIEL_CHECK_LT(pos, kNumPoints); + return board_[player][pos]; + } +} + +Player BackgammonState::CurrentPlayer() const { + return IsTerminal() ? kTerminalPlayerId : Player{cur_player_}; +} + +int BackgammonState::Opponent(int player) const { return 1 - player; } + +void BackgammonState::RollDice(int outcome) { + dice_.push_back(kChanceOutcomeValues[outcome][0]); + dice_.push_back(kChanceOutcomeValues[outcome][1]); +} + +int BackgammonState::DiceValue(int i) const { + SPIEL_CHECK_GE(i, 0); + SPIEL_CHECK_LT(i, dice_.size()); + + if (dice_[i] >= 1 && dice_[i] <= 6) { + return dice_[i]; + } else if (dice_[i] >= 7 && dice_[i] <= 12) { + // This die is marked as chosen, so return its proper value. + // Note: dice are only marked as chosen during the legal moves enumeration. + return dice_[i] - 6; + } else { + SpielFatalError(absl::StrCat("Bad dice value: ", dice_[i])); + } +} + +void BackgammonState::DoApplyAction(Action move) { + if (IsChanceNode()) { + turn_history_info_.push_back(TurnHistoryInfo(kChancePlayerId, prev_player_, + dice_, move, double_turn_, + false, false)); + + if (turns_ == -1) { + // The first chance node determines who goes first: X or O. + // The move is between 0 and 29 and the range determines whether X starts + // or O starts. The value is then converted to a number between 0 and 15, + // which represents the non-double chance outcome that the first player + // starts with (see RollDice(move) below). These 30 possibilities are + // constructed in GetChanceOutcomes(). + SPIEL_CHECK_TRUE(dice_.empty()); + if (move < kNumNonDoubleOutcomes) { + // X starts. + cur_player_ = prev_player_ = kXPlayerId; + } else { + // O Starts + cur_player_ = prev_player_ = kOPlayerId; + move -= kNumNonDoubleOutcomes; + } + RollDice(move); + turns_ = 0; + return; + } else { + // Normal chance node. + SPIEL_CHECK_TRUE(dice_.empty()); + RollDice(move); + cur_player_ = Opponent(prev_player_); + return; + } + } + + // Normal move action. + std::vector moves = SpielMoveToCheckerMoves(cur_player_, move); + bool first_move_hit = ApplyCheckerMove(cur_player_, moves[0]); + bool second_move_hit = ApplyCheckerMove(cur_player_, moves[1]); + + turn_history_info_.push_back( + TurnHistoryInfo(cur_player_, prev_player_, dice_, move, double_turn_, + first_move_hit, second_move_hit)); + + if (!double_turn_) { + turns_++; + if (cur_player_ == kXPlayerId) { + x_turns_++; + } else if (cur_player_ == kOPlayerId) { + o_turns_++; + } + } + + prev_player_ = cur_player_; + + // Check for doubles. + bool extra_turn = false; + if (!double_turn_ && dice_[0] == dice_[1]) { + // Check the dice, and unuse them if they are used. + int dice_used = 0; + for (int i = 0; i < 2; i++) { + if (dice_[i] > 6) { + dice_[i] -= 6; + dice_used++; + } + SPIEL_CHECK_GE(dice_[i], 1); + SPIEL_CHECK_LE(dice_[i], 6); + } + + if (dice_used == 2) { + extra_turn = true; + } + } + + if (extra_turn) { + // Dice have been unused above. + double_turn_ = true; + } else { + cur_player_ = kChancePlayerId; + dice_.clear(); + double_turn_ = false; + } +} + +void BackgammonState::UndoAction(int player, Action action) { + { + const TurnHistoryInfo& thi = turn_history_info_.back(); + SPIEL_CHECK_EQ(thi.player, player); + SPIEL_CHECK_EQ(action, thi.action); + cur_player_ = thi.player; + prev_player_ = thi.prev_player; + dice_ = thi.dice; + double_turn_ = thi.double_turn; + if (player != kChancePlayerId) { + std::vector moves = SpielMoveToCheckerMoves(player, action); + SPIEL_CHECK_EQ(moves.size(), 2); + moves[0].hit = thi.first_move_hit; + moves[1].hit = thi.second_move_hit; + UndoCheckerMove(player, moves[1]); + UndoCheckerMove(player, moves[0]); + turns_--; + if (!double_turn_) { + if (player == kXPlayerId) { + x_turns_--; + } else if (player == kOPlayerId) { + o_turns_--; + } + } + } + } + turn_history_info_.pop_back(); + history_.pop_back(); + --move_number_; +} + +bool BackgammonState::IsHit(Player player, int from_pos, int num) const { + if (from_pos != kPassPos) { + int to = PositionFrom(player, from_pos, num); + return to != kScorePos && board(Opponent(player), to) == 1; + } else { + return false; + } +} + +Action BackgammonState::TranslateAction(int from1, int from2, + bool use_high_die_first) const { + int player = CurrentPlayer(); + int num1 = use_high_die_first ? dice_.at(1) : dice_.at(0); + int num2 = use_high_die_first ? dice_.at(0) : dice_.at(1); + bool hit1 = IsHit(player, from1, num1); + bool hit2 = IsHit(player, from2, num2); + std::vector moves = {{from1, num1, hit1}, {from2, num2, hit2}}; + return CheckerMovesToSpielMove(moves); +} + +Action BackgammonState::EncodedBarMove() const { return 24; } + +Action BackgammonState::EncodedPassMove() const { return 25; } + +Action BackgammonState::CheckerMovesToSpielMove( + const std::vector& moves) const { + SPIEL_CHECK_LE(moves.size(), 2); + int dig0 = EncodedPassMove(); + int dig1 = EncodedPassMove(); + bool high_roll_first = false; + int high_roll = DiceValue(0) >= DiceValue(1) ? DiceValue(0) : DiceValue(1); + + if (!moves.empty()) { + int pos1 = moves[0].pos; + if (pos1 == kBarPos) { + pos1 = EncodedBarMove(); + } + if (pos1 != kPassPos) { + int num1 = moves[0].num; + dig0 = pos1; + high_roll_first = num1 == high_roll; + } + } + + if (moves.size() > 1) { + int pos2 = moves[1].pos; + if (pos2 == kBarPos) { + pos2 = EncodedBarMove(); + } + if (pos2 != kPassPos) { + dig1 = pos2; + } + } + + Action move = dig1 * 26 + dig0; + if (!high_roll_first) { + move += 676; // 26**2 + } + SPIEL_CHECK_GE(move, 0); + SPIEL_CHECK_LT(move, kNumDistinctActions); + return move; +} + +std::vector BackgammonState::SpielMoveToCheckerMoves( + int player, Action spiel_move) const { + SPIEL_CHECK_GE(spiel_move, 0); + SPIEL_CHECK_LT(spiel_move, kNumDistinctActions); + + bool high_roll_first = spiel_move < 676; + if (!high_roll_first) { + spiel_move -= 676; + } + + std::vector digits = {spiel_move % 26, spiel_move / 26}; + std::vector cmoves; + int high_roll = DiceValue(0) >= DiceValue(1) ? DiceValue(0) : DiceValue(1); + int low_roll = DiceValue(0) < DiceValue(1) ? DiceValue(0) : DiceValue(1); + + for (int i = 0; i < 2; ++i) { + SPIEL_CHECK_GE(digits[i], 0); + SPIEL_CHECK_LE(digits[i], 25); + + int num = -1; + if (i == 0) { + num = high_roll_first ? high_roll : low_roll; + } else { + num = high_roll_first ? low_roll : high_roll; + } + SPIEL_CHECK_GE(num, 1); + SPIEL_CHECK_LE(num, 6); + + if (digits[i] == EncodedPassMove()) { + cmoves.push_back(CheckerMove(kPassPos, -1, false)); + } else { + cmoves.push_back(CheckerMove( + digits[i] == EncodedBarMove() ? kBarPos : digits[i], num, false)); + } + } + + return cmoves; +} + +std::vector BackgammonState::AugmentWithHitInfo( + int player, const std::vector &cmoves) const { + std::vector new_cmoves = cmoves; + for (int i = 0; i < 2; ++i) { + new_cmoves[i].hit = IsHit(player, cmoves[i].pos, cmoves[i].num); + } + return new_cmoves; +} + +bool BackgammonState::IsPosInHome(int player, int pos) const { + switch (player) { + case kXPlayerId: + return (pos >= 18 && pos <= 23); + case kOPlayerId: + return (pos >= 0 && pos <= 5); + default: + SpielFatalError(absl::StrCat("Unknown player ID: ", player)); + } +} + +int BackgammonState::CheckersInHome(int player) const { + int c = 0; + for (int i = 0; i < 6; i++) { + c += board(player, (player == kXPlayerId ? (23 - i) : i)); + } + return c; +} + +bool BackgammonState::AllInHome(int player) const { + if (bar_[player] > 0) { + return false; + } + + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LE(player, 1); + + // Looking for any checkers outside home. + // --> XPlayer scans 0-17. + // --> OPlayer scans 6-23. + int scan_start = (player == kXPlayerId ? 0 : 6); + int scan_end = (player == kXPlayerId ? 17 : 23); + + for (int i = scan_start; i <= scan_end; ++i) { + if (board_[player][i] > 0) { + return false; + } + } + + return true; +} + +int BackgammonState::HighestUsableDiceOutcome() const { + if (UsableDiceOutcome(dice_[1])) { + return dice_[1]; + } else if (UsableDiceOutcome(dice_[0])) { + return dice_[0]; + } else { + return -1; + } +} + +int BackgammonState::FurthestCheckerInHome(int player) const { + // Looking for any checkers in home. + // --> XPlayer scans 23 -> 18 + // --> OPlayer scans 0 -> 5 + int scan_start = (player == kXPlayerId ? 23 : 0); + int scan_end = (player == kXPlayerId ? 17 : 6); + int inc = (player == kXPlayerId ? -1 : 1); + + int furthest = (player == kXPlayerId ? 24 : -1); + + for (int i = scan_start; i != scan_end; i += inc) { + if (board_[player][i] > 0) { + furthest = i; + } + } + + if (furthest == 24 || furthest == -1) { + return -1; + } else { + return furthest; + } +} + +bool BackgammonState::UsableDiceOutcome(int outcome) const { + return (outcome >= 1 && outcome <= 6); +} + +int BackgammonState::PositionFromBar(int player, int spaces) const { + if (player == kXPlayerId) { + return -1 + spaces; + } else if (player == kOPlayerId) { + return 24 - spaces; + } else { + SpielFatalError(absl::StrCat("Invalid player: ", player)); + } +} + +int BackgammonState::PositionFrom(int player, int pos, int spaces) const { + if (pos == kBarPos) { + return PositionFromBar(player, spaces); + } + + if (player == kXPlayerId) { + int new_pos = pos + spaces; + return (new_pos > 23 ? kScorePos : new_pos); + } else if (player == kOPlayerId) { + int new_pos = pos - spaces; + return (new_pos < 0 ? kScorePos : new_pos); + } else { + SpielFatalError(absl::StrCat("Invalid player: ", player)); + } +} + +int BackgammonState::NumOppCheckers(int player, int pos) const { + return board_[Opponent(player)][pos]; +} + +int BackgammonState::GetDistance(int player, int from, int to) const { + SPIEL_CHECK_NE(from, kScorePos); + SPIEL_CHECK_NE(to, kScorePos); + if (from == kBarPos && player == kXPlayerId) { + from = -1; + } else if (from == kBarPos && player == kOPlayerId) { + from = 24; + } + return std::abs(to - from); +} + +bool BackgammonState::IsOff(int player, int pos) const { + // Returns if an absolute position is off the board. + return ((player == kXPlayerId && pos > 23) || + (player == kOPlayerId && pos < 0)); +} + +bool BackgammonState::IsFurther(int player, int pos1, int pos2) const { + if (pos1 == pos2) { + return false; + } + + if (pos1 == kBarPos) { + return true; + } + + if (pos2 == kBarPos) { + return false; + } + + if (pos1 == kPassPos) { + return false; + } + + if (pos2 == kPassPos) { + return false; + } + + return ((player == kXPlayerId && pos1 < pos2) || + (player == kOPlayerId && pos1 > pos2)); +} + +int BackgammonState::GetToPos(int player, int from_pos, int pips) const { + if (player == kXPlayerId) { + return (from_pos == kBarPos ? -1 : from_pos) + pips; + } else if (player == kOPlayerId) { + return (from_pos == kBarPos ? 24 : from_pos) - pips; + } else { + SpielFatalError(absl::StrCat("Player (", player, ") unrecognized.")); + } +} + +// Basic from_to check (including bar checkers). +bool BackgammonState::IsLegalFromTo(int player, int from_pos, int to_pos, + int my_checkers_from, + int opp_checkers_to) const { + // Must have at least one checker the from position. + if (my_checkers_from == 0) { + return false; + } + + if (opp_checkers_to > 1) { + return false; + } + + // Quick validity checks out of the way. This appears to be a valid move. + // Now, must check: if there are moves on this player's bar, they must move + // them first, and if there are no legal moves out of the bar, the player + // loses their turn. + int my_bar_checkers = board(player, kBarPos); + if (my_bar_checkers > 0 && from_pos != kBarPos) { + return false; + } + + // If this is a scoring move, then check that all this player's checkers are + // either scored or home. + if (to_pos < 0 || to_pos > 23) { + if ((CheckersInHome(player) + scores_[player]) != 15) { + return false; + } + + // If it's not *exactly* the right amount, then we have to do a check to see + // if there exist checkers further from home, as those must be moved first. + if (player == kXPlayerId && to_pos > 24) { + for (int pos = from_pos - 1; pos >= 18; pos--) { + if (board(player, pos) > 0) { + return false; + } + } + } else if (player == kOPlayerId && to_pos < -1) { + for (int pos = from_pos + 1; pos <= 5; pos++) { + if (board(player, pos) > 0) { + return false; + } + } + } + } + + return true; +} + +std::string BackgammonState::DiceToString(int outcome) const { + if (outcome > 6) { + return std::to_string(outcome - 6) + "u"; + } else { + return std::to_string(outcome); + } +} + +int BackgammonState::CountTotalCheckers(int player) const { + int total = 0; + for (int i = 0; i < 24; ++i) { + SPIEL_CHECK_GE(board_[player][i], 0); + total += board_[player][i]; + } + SPIEL_CHECK_GE(bar_[player], 0); + total += bar_[player]; + SPIEL_CHECK_GE(scores_[player], 0); + total += scores_[player]; + return total; +} + +int BackgammonState::IsGammoned(int player) const { + if (hyper_backgammon_) { + // TODO(author5): remove this when the doubling cube is implemented. + // In Hyper-backgammon, gammons and backgammons only multiply when the cube + // has been offered and accepted. However, we do not yet support the cube. + return false; + } + + // Does the player not have any checkers borne off? + return scores_[player] == 0; +} + +int BackgammonState::IsBackgammoned(int player) const { + if (hyper_backgammon_) { + // TODO(author5): remove this when the doubling cube is implemented. + // In Hyper-backgammon, gammons and backgammons only multiply when the cube + // has been offered and accepted. However, we do not yet support the cube. + return false; + } + + // Does the player not have any checkers borne off and either has a checker + // still in the bar or still in the opponent's home? + if (scores_[player] > 0) { + return false; + } + + if (bar_[player] > 0) { + return true; + } + + // XPlayer scans 0-5. + // OPlayer scans 18-23. + int scan_start = (player == kXPlayerId ? 0 : 18); + int scan_end = (player == kXPlayerId ? 5 : 23); + + for (int i = scan_start; i <= scan_end; ++i) { + if (board_[player][i] > 0) { + return true; + } + } + + return false; +} + +std::set BackgammonState::LegalCheckerMoves(int player) const { + std::set moves; + + if (bar_[player] > 0) { + // If there are any checkers are the bar, must move them out first. + for (int outcome : dice_) { + if (UsableDiceOutcome(outcome)) { + int pos = PositionFromBar(player, outcome); + if (NumOppCheckers(player, pos) <= 1) { + bool hit = NumOppCheckers(player, pos) == 1; + moves.insert(CheckerMove(kBarPos, outcome, hit)); + } + } + } + return moves; + } + + // Regular board moves. + bool all_in_home = AllInHome(player); + for (int i = 0; i < kNumPoints; ++i) { + if (board_[player][i] > 0) { + for (int outcome : dice_) { + if (UsableDiceOutcome(outcome)) { + int pos = PositionFrom(player, i, outcome); + if (pos == kScorePos && all_in_home) { + // Check whether a bear off move is legal. + + // It is ok to bear off if all the checkers are at home and the + // point being used to move from exactly matches the distance from + // just stepping off the board. + if ((player == kXPlayerId && i + outcome == 24) || + (player == kOPlayerId && i - outcome == -1)) { + moves.insert(CheckerMove(i, outcome, false)); + } else { + // Otherwise, a die can only be used to move a checker off if + // there are no checkers further than it in the player's home. + if (i == FurthestCheckerInHome(player)) { + moves.insert(CheckerMove(i, outcome, false)); + } + } + } else if (pos != kScorePos && NumOppCheckers(player, pos) <= 1) { + // Regular move. + bool hit = NumOppCheckers(player, pos) == 1; + moves.insert(CheckerMove(i, outcome, hit)); + } + } + } + } + } + return moves; +} + +bool BackgammonState::ApplyCheckerMove(int player, const CheckerMove& move) { + // Pass does nothing. + if (move.pos < 0) { + return false; + } + + // First, remove the checker. + int next_pos = -1; + if (move.pos == kBarPos) { + bar_[player]--; + next_pos = PositionFromBar(player, move.num); + } else { + board_[player][move.pos]--; + next_pos = PositionFrom(player, move.pos, move.num); + } + + // Mark the die as used. + for (int i = 0; i < 2; ++i) { + if (dice_[i] == move.num) { + dice_[i] += 6; + break; + } + } + + // Now add the checker (or score). + if (next_pos == kScorePos) { + scores_[player]++; + } else { + board_[player][next_pos]++; + } + + bool hit = false; + // If there was a hit, remove opponent's piece and add to bar. + // Note: the move.hit will only be properly set during the legal moves search, + // so we have to also check here if there is a hit candidate. + if (move.hit || + (next_pos != kScorePos && board_[Opponent(player)][next_pos] == 1)) { + hit = true; + board_[Opponent(player)][next_pos]--; + bar_[Opponent(player)]++; + } + + return hit; +} + +// Undoes a checker move. Important note: this checkermove needs to have +// move.hit set from the history to properly undo a move (this information is +// not tracked in the action value). +void BackgammonState::UndoCheckerMove(int player, const CheckerMove& move) { + // Undoing a pass does nothing + if (move.pos < 0) { + return; + } + + // First, figure out the next position. + int next_pos = -1; + if (move.pos == kBarPos) { + next_pos = PositionFromBar(player, move.num); + } else { + next_pos = PositionFrom(player, move.pos, move.num); + } + + // If there was a hit, take it out of the opponent's bar and put it back + // onto the next position. + if (move.hit) { + bar_[Opponent(player)]--; + board_[Opponent(player)][next_pos]++; + } + + // Remove the moved checker or decrement score. + if (next_pos == kScorePos) { + scores_[player]--; + } else { + board_[player][next_pos]--; + } + + // Mark the die as unused. + for (int i = 0; i < 2; ++i) { + if (dice_[i] == move.num + 6) { + dice_[i] -= 6; + break; + } + } + + // Finally, return back the checker to its original position. + if (move.pos == kBarPos) { + bar_[player]++; + } else { + board_[player][move.pos]++; + } +} + +// Returns the maximum move size (2, 1, or 0) +int BackgammonState::RecLegalMoves( + std::vector moveseq, + std::set>* movelist) { + if (moveseq.size() == 2) { + movelist->insert(moveseq); + return moveseq.size(); + } + + std::set moves_here = LegalCheckerMoves(cur_player_); + + if (moves_here.empty()) { + movelist->insert(moveseq); + return moveseq.size(); + } + + int max_moves = -1; + for (const auto& move : moves_here) { + moveseq.push_back(move); + ApplyCheckerMove(cur_player_, move); + int child_max = RecLegalMoves(moveseq, movelist); + UndoCheckerMove(cur_player_, move); + max_moves = std::max(child_max, max_moves); + moveseq.pop_back(); + } + + return max_moves; +} + +std::vector BackgammonState::ProcessLegalMoves( + int max_moves, const std::set>& movelist) const { + if (max_moves == 0) { + SPIEL_CHECK_EQ(movelist.size(), 1); + SPIEL_CHECK_TRUE(movelist.begin()->empty()); + + // Passing is always a legal move! + return {CheckerMovesToSpielMove( + {{kPassPos, -1, false}, {kPassPos, -1, false}})}; + } + + absl::flat_hash_set legal_action_strings; + + // Rule 2 in Movement of Checkers: + // A player must use both numbers of a roll if this is legally possible (or + // all four numbers of a double). When only one number can be played, the + // player must play that number. Or if either number can be played but not + // both, the player must play the larger one. When neither number can be used, + // the player loses his turn. In the case of doubles, when all four numbers + // cannot be played, the player must play as many numbers as he can. + + // TODO(author5): below we filter out actions that are mapped to the same + // string representation as they have the same effect, even when applied in + // different orders. A better fix would be to remove the duplicate actions + // from the action space altogether. + std::vector legal_actions; + int max_roll = -1; + for (const auto& move : movelist) { + if (max_moves == 2) { + // Only add moves that are size 2. + if (move.size() == 2) { + int action = CheckerMovesToSpielMove(move); + std::string action_string = ActionToString(CurrentPlayer(), action); + // Do not add duplicate actions. E.g. 24/21 24/20 and 24/20 24/21 are + // represented as two different integer actions, but due to the logic + // in the action string they have the same string representation. + // So we only include one of them in the legal action set. + if (!legal_action_strings.contains(action_string)) { + legal_action_strings.insert(action_string); + legal_actions.push_back(action); + } + } + } else if (max_moves == 1) { + // We are just finding the maximum roll. + max_roll = std::max(max_roll, move[0].num); + } + } + + if (max_moves == 1) { + // Another round to add those that have the max die roll. + for (const auto& move : movelist) { + if (move[0].num == max_roll) { + int action = CheckerMovesToSpielMove(move); + std::string action_string = ActionToString(CurrentPlayer(), action); + // Do not add duplicate actions. E.g. 24/21 24/20 and 24/20 24/21 are + // represented as two different integer actions, but due to the logic + // in the action string they have the same string representation. + // So we only include one of them in the legal action set. + if (!legal_action_strings.contains(action_string)) { + legal_action_strings.insert(action_string); + legal_actions.push_back(action); + } + } + } + } + + SPIEL_CHECK_FALSE(legal_actions.empty()); + return legal_actions; +} + +std::vector BackgammonState::LegalActions() const { + if (IsChanceNode()) return LegalChanceOutcomes(); + if (IsTerminal()) return {}; + + SPIEL_CHECK_EQ(CountTotalCheckers(kXPlayerId), + NumCheckersPerPlayer(game_.get())); + SPIEL_CHECK_EQ(CountTotalCheckers(kOPlayerId), + NumCheckersPerPlayer(game_.get())); + + std::unique_ptr cstate = this->Clone(); + BackgammonState* state = dynamic_cast(cstate.get()); + std::set> movelist; + int max_moves = state->RecLegalMoves({}, &movelist); + SPIEL_CHECK_GE(max_moves, 0); + SPIEL_CHECK_LE(max_moves, 2); + std::vector legal_actions = ProcessLegalMoves(max_moves, movelist); + std::sort(legal_actions.begin(), legal_actions.end()); + return legal_actions; +} + +std::vector> BackgammonState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(IsChanceNode()); + if (turns_ == -1) { + // Doubles not allowed for the initial roll to determine who goes first. + // Range 0-14: X goes first, range 15-29: O goes first. + std::vector> outcomes; + int num_outcomes = kNumNonDoubleOutcomes * 2; + outcomes.reserve(num_outcomes); + const double uniform_prob = 1.0 / num_outcomes; + for (Action action = 0; action < num_outcomes; ++action) { + outcomes.push_back({action, uniform_prob}); + } + return outcomes; + } else { + return kChanceOutcomes; + } +} + +std::string BackgammonState::ToString() const { + std::vector board_array = { + "+------|------+", "|......|......|", "|......|......|", + "|......|......|", "|......|......|", "|......|......|", + "| | |", "|......|......|", "|......|......|", + "|......|......|", "|......|......|", "|......|......|", + "+------|------+"}; + + // Fill the board. + for (int pos = 0; pos < 24; pos++) { + if (board_[kXPlayerId][pos] > 0 || board_[kOPlayerId][pos] > 0) { + int start_row = (pos < 12 ? 11 : 1); + int col = (pos < 12 ? (pos >= 6 ? 12 - pos : 13 - pos) + : (pos < 18 ? pos - 11 : pos - 10)); + + int row_offset = (pos < 12 ? -1 : 1); + + int owner = board_[kXPlayerId][pos] > 0 ? kXPlayerId : kOPlayerId; + char piece = (owner == kXPlayerId ? 'x' : 'o'); + int my_checkers = board_[owner][pos]; + + for (int i = 0; i < 5 && i < my_checkers; i++) { + board_array[start_row + i * row_offset][col] = piece; + } + + // Check for special display of >= 10 and >5 pieces + if (my_checkers >= 10) { + char lsd = std::to_string(my_checkers % 10)[0]; + // Make sure it reads downward. + if (pos < 12) { + board_array[start_row + row_offset][col] = '1'; + board_array[start_row][col] = lsd; + } else { + board_array[start_row][col] = '1'; + board_array[start_row + row_offset][col] = lsd; + } + } else if (my_checkers > 5) { + board_array[start_row][col] = std::to_string(my_checkers)[0]; + } + } + } + + std::string board_str = absl::StrJoin(board_array, "\n") + "\n"; + + // Extra info like whose turn it is etc. + absl::StrAppend(&board_str, "Turn: "); + absl::StrAppend(&board_str, CurPlayerToString(cur_player_)); + absl::StrAppend(&board_str, "\n"); + absl::StrAppend(&board_str, "Dice: "); + absl::StrAppend(&board_str, !dice_.empty() ? DiceToString(dice_[0]) : ""); + absl::StrAppend(&board_str, dice_.size() > 1 ? DiceToString(dice_[1]) : ""); + absl::StrAppend(&board_str, "\n"); + absl::StrAppend(&board_str, "Bar:"); + absl::StrAppend(&board_str, + (bar_[kXPlayerId] > 0 || bar_[kOPlayerId] > 0 ? " " : "")); + for (int p = 0; p < 2; p++) { + for (int n = 0; n < bar_[p]; n++) { + absl::StrAppend(&board_str, (p == kXPlayerId ? "x" : "o")); + } + } + absl::StrAppend(&board_str, "\n"); + absl::StrAppend(&board_str, "Scores, X: ", scores_[kXPlayerId]); + absl::StrAppend(&board_str, ", O: ", scores_[kOPlayerId], "\n"); + + return board_str; +} + +bool BackgammonState::IsTerminal() const { + return (scores_[kXPlayerId] == NumCheckersPerPlayer(game_.get()) || + scores_[kOPlayerId] == NumCheckersPerPlayer(game_.get())); +} + +std::vector BackgammonState::Returns() const { + int winner = -1; + int loser = -1; + if (scores_[kXPlayerId] == 15) { + winner = kXPlayerId; + loser = kOPlayerId; + } else if (scores_[kOPlayerId] == 15) { + winner = kOPlayerId; + loser = kXPlayerId; + } else { + return {0.0, 0.0}; + } + + // Magnify the util based on the scoring rules for this game. + int util_mag = 1; + switch (scoring_type_) { + case ScoringType::kWinLossScoring: + default: + break; + + case ScoringType::kEnableGammons: + util_mag = (IsGammoned(loser) ? 2 : 1); + break; + + case ScoringType::kFullScoring: + util_mag = (IsBackgammoned(loser) ? 3 : IsGammoned(loser) ? 2 : 1); + break; + } + + std::vector returns(kNumPlayers); + returns[winner] = util_mag; + returns[loser] = -util_mag; + return returns; +} + +std::unique_ptr BackgammonState::Clone() const { + return std::unique_ptr(new BackgammonState(*this)); +} + +void BackgammonState::SetState(int cur_player, bool double_turn, + const std::vector& dice, + const std::vector& bar, + const std::vector& scores, + const std::vector>& board) { + cur_player_ = cur_player; + double_turn_ = double_turn; + dice_ = dice; + bar_ = bar; + scores_ = scores; + board_ = board; + + SPIEL_CHECK_EQ(CountTotalCheckers(kXPlayerId), + NumCheckersPerPlayer(game_.get())); + SPIEL_CHECK_EQ(CountTotalCheckers(kOPlayerId), + NumCheckersPerPlayer(game_.get())); +} + +BackgammonGame::BackgammonGame(const GameParameters& params) + : Game(kGameType, params), + scoring_type_( + ParseScoringType(ParameterValue("scoring_type"))), + hyper_backgammon_(ParameterValue("hyper_backgammon")) {} + +double BackgammonGame::MaxUtility() const { + if (hyper_backgammon_) { + // We do not have the cube implemented, so Hyper-backgammon us currently + // restricted to a win-loss game regardless of the scoring type. + return 1; + } + + switch (scoring_type_) { + case ScoringType::kWinLossScoring: + return 1; + case ScoringType::kEnableGammons: + return 2; + case ScoringType::kFullScoring: + return 3; + default: + SpielFatalError("Unknown scoring_type"); + } +} + +int BackgammonGame::NumCheckersPerPlayer() const { + if (hyper_backgammon_) { + return 3; + } else { + return kNumCheckersPerPlayer; + } +} + +} // namespace backgammon +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/backgammon/backgammon.h b/scenarios/bargaining/open_spiel/open_spiel/games/backgammon/backgammon.h new file mode 100644 index 0000000..994aa5d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/backgammon/backgammon.h @@ -0,0 +1,322 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_BACKGAMMON_H_ +#define OPEN_SPIEL_GAMES_BACKGAMMON_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// An implementation of the classic: https://en.wikipedia.org/wiki/Backgammon +// using rule set from +// http://usbgf.org/learn-backgammon/backgammon-rules-and-terms/rules-of-backgammon/ +// where red -> 'x' (player 0) and white -> 'o' (player 1). +// +// Currently does not support the doubling cube nor "matches" (multiple games +// where outcomes are scored and tallied to 21). +// +// Parameters: +// "hyper_backgammon" bool Use Hyper-backgammon variant [1] (def: false) +// "scoring_type" string Type of scoring for the game: "winloss_scoring" +// (default), "enable_gammons", or "full_scoring" +// +// [1] https://bkgm.com/variants/HyperBackgammon.html. Hyper-backgammon is a +// simplified backgammon start setup which is small enough to solve. Note that +// it is not the full Hyper-backgammon sinc do not have cube is not implemented. + +namespace open_spiel { +namespace backgammon { + +inline constexpr const int kNumPlayers = 2; +inline constexpr const int kNumChanceOutcomes = 21; +inline constexpr const int kNumPoints = 24; +inline constexpr const int kNumDiceOutcomes = 6; +inline constexpr const int kXPlayerId = 0; +inline constexpr const int kOPlayerId = 1; +inline constexpr const int kPassPos = -1; + +// Number of checkers per player in the standard game. For varaints, use +// BackgammonGame::NumCheckersPerPlayer. +inline constexpr const int kNumCheckersPerPlayer = 15; + +// TODO: look into whether these can be set to 25 and -2 to avoid having a +// separate helper function (PositionToStringHumanReadable) to convert moves +// to strings. +inline constexpr const int kBarPos = 100; +inline constexpr const int kScorePos = 101; + +// The action encoding stores a number in { 0, 1, ..., 1351 }. If the high +// roll is to move first, then the number is encoded as a 2-digit number in +// base 26 ({0, 1, .., 23, kBarPos, Pass}) (=> first 676 numbers). Otherwise, +// the low die is to move first and, 676 is subtracted and then again the +// number is encoded as a 2-digit number in base 26. +inline constexpr const int kNumDistinctActions = 1352; + +// See ObservationTensorShape for details. +inline constexpr const int kBoardEncodingSize = 4 * kNumPoints * kNumPlayers; +inline constexpr const int kStateEncodingSize = + 3 * kNumPlayers + kBoardEncodingSize + 2; +inline constexpr const char* kDefaultScoringType = "winloss_scoring"; +inline constexpr bool kDefaultHyperBackgammon = false; + +// Game scoring type, whether to score gammons/backgammons specially. +enum class ScoringType { + kWinLossScoring, // "winloss_scoring": Score only 1 point per player win. + kEnableGammons, // "enable_gammons": Score 2 points for a "gammon". + kFullScoring, // "full_scoring": Score gammons as well as 3 points for a + // "backgammon". +}; + +struct CheckerMove { + // Pass is encoded as (pos, num, hit) = (-1, -1, false). + int pos; // 0-24 (0-23 for locations on the board and kBarPos) + int num; // 1-6 + bool hit; + CheckerMove(int _pos, int _num, bool _hit) + : pos(_pos), num(_num), hit(_hit) {} + bool operator<(const CheckerMove& rhs) const { + return (pos * 6 + (num - 1)) < (rhs.pos * 6 + rhs.num - 1); + } +}; + +// This is a small helper to track historical turn info not stored in the moves. +// It is only needed for proper implementation of Undo. +struct TurnHistoryInfo { + int player; + int prev_player; + std::vector dice; + Action action; + bool double_turn; + bool first_move_hit; + bool second_move_hit; + TurnHistoryInfo(int _player, int _prev_player, std::vector _dice, + int _action, bool _double_turn, bool fmh, bool smh) + : player(_player), + prev_player(_prev_player), + dice(_dice), + action(_action), + double_turn(_double_turn), + first_move_hit(fmh), + second_move_hit(smh) {} +}; + +class BackgammonGame; + +class BackgammonState : public State { + public: + BackgammonState(const BackgammonState&) = default; + BackgammonState(std::shared_ptr, ScoringType scoring_type, + bool hyper_backgammone); + + Player CurrentPlayer() const override; + void UndoAction(Player player, Action action) override; + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action move_id) const override; + std::vector> ChanceOutcomes() const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + + // Setter function used for debugging and tests. Note: this does not set the + // historical information properly, so Undo likely will not work on states + // set this way! + void SetState(int cur_player, bool double_turn, const std::vector& dice, + const std::vector& bar, const std::vector& scores, + const std::vector>& board); + + // Returns the opponent of the specified player. + int Opponent(int player) const; + + // Compute a distance between 'from' and 'to'. The from can be kBarPos. The + // to can be a number below 0 or above 23, but do not use kScorePos directly. + int GetDistance(int player, int from, int to) const; + + // Is this position off the board, i.e. >23 or <0? + bool IsOff(int player, int pos) const; + + // Returns whether pos2 is further (closer to scoring) than pos1 for the + // specifed player. + bool IsFurther(int player, int pos1, int pos2) const; + + // Is this a legal from -> to checker move? Here, the to_pos can be a number + // that is outside {0, ..., 23}; if so, it is counted as "off the board" for + // the corresponding player (i.e. >23 is a bear-off move for XPlayerId, and + // <0 is a bear-off move for OPlayerId). + bool IsLegalFromTo(int player, int from_pos, int to_pos, int my_checkers_from, + int opp_checkers_to) const; + + // Get the To position for this play given the from position and number of + // pips on the die. This function simply adds the values: the return value + // will be a position that might be off the the board (<0 or >23). + int GetToPos(int player, int from_pos, int pips) const; + + // Count the total number of checkers for this player (on the board, in the + // bar, and have borne off). Should be 15 for the standard game. + int CountTotalCheckers(int player) const; + + // Returns if moving from the position for the number of spaces is a hit. + bool IsHit(Player player, int from_pos, int num) const; + + // Accessor functions for some of the specific data. + int player_turns() const { return turns_; } + int player_turns(int player) const { + return (player == kXPlayerId ? x_turns_ : o_turns_); + } + int bar(int player) const { return bar_[player]; } + int score(int player) const { return scores_[player]; } + int dice(int i) const { return dice_[i]; } + bool double_turn() const { return double_turn_; } + + // Get the number of checkers on the board in the specified position belonging + // to the specified player. The position can be kBarPos or any valid position + // on the main part of the board, but kScorePos (use score() to get the number + // of checkers born off). + int board(int player, int pos) const; + + // Action encoding / decoding functions. Note, the converted checker moves + // do not contain the hit information; use the AddHitInfo function to get the + // hit information. + Action CheckerMovesToSpielMove(const std::vector& moves) const; + std::vector SpielMoveToCheckerMoves(int player, + Action spiel_move) const; + Action TranslateAction(int from1, int from2, bool use_high_die_first) const; + + // Return checker moves with extra hit information. + std::vector + AugmentWithHitInfo(Player player, + const std::vector &cmoves) const; + + protected: + void DoApplyAction(Action move_id) override; + + private: + void SetupInitialBoard(); + void RollDice(int outcome); + bool IsPosInHome(int player, int pos) const; + bool AllInHome(int player) const; + int CheckersInHome(int player) const; + bool UsableDiceOutcome(int outcome) const; + int PositionFromBar(int player, int spaces) const; + int PositionFrom(int player, int pos, int spaces) const; + int NumOppCheckers(int player, int pos) const; + std::string DiceToString(int outcome) const; + int IsGammoned(int player) const; + int IsBackgammoned(int player) const; + int DiceValue(int i) const; + int HighestUsableDiceOutcome() const; + Action EncodedPassMove() const; + Action EncodedBarMove() const; + + // A helper function used by ActionToString to add necessary hit information + // and compute whether the move goes off the board. + int AugmentCheckerMove(CheckerMove* cmove, int player, int start) const; + + // Returns the position of the furthest checker in the home of this player. + // Returns -1 if none found. + int FurthestCheckerInHome(int player) const; + + bool ApplyCheckerMove(int player, const CheckerMove& move); + void UndoCheckerMove(int player, const CheckerMove& move); + std::set LegalCheckerMoves(int player) const; + int RecLegalMoves(std::vector moveseq, + std::set>* movelist); + std::vector ProcessLegalMoves( + int max_moves, const std::set>& movelist) const; + + ScoringType scoring_type_; // Which rules apply when scoring the game. + bool hyper_backgammon_; // Is the Hyper-backgammon variant enabled? + + Player cur_player_; + Player prev_player_; + int turns_; + int x_turns_; + int o_turns_; + bool double_turn_; + std::vector dice_; // Current dice. + std::vector bar_; // Checkers of each player in the bar. + std::vector scores_; // Checkers returned home by each player. + std::vector> board_; // Checkers for each player on points. + std::vector turn_history_info_; // Info needed for Undo. +}; + +class BackgammonGame : public Game { + public: + explicit BackgammonGame(const GameParameters& params); + + int NumDistinctActions() const override { return kNumDistinctActions; } + + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new BackgammonState( + shared_from_this(), scoring_type_, hyper_backgammon_)); + } + + // On the first turn there are 30 outcomes: 15 for each player (rolls without + // the doubles). + int MaxChanceOutcomes() const override { return 30; } + + // There is arbitrarily chosen number to ensure the game is finite. + int MaxGameLength() const override { return 1000; } + + // Upper bound: chance node per move, with an initial chance node for + // determining starting player. + int MaxChanceNodesInHistory() const override { return MaxGameLength() + 1; } + + int NumPlayers() const override { return 2; } + double MinUtility() const override { return -MaxUtility(); } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override; + + std::vector ObservationTensorShape() const override { + // Encode each point on the board as four doubles: + // - One double for whether there is one checker or not (1 or 0). + // - One double for whether there are two checkers or not (1 or 0). + // - One double for whether there are three checkers or not (1 or 0). + // - One double if there are more than 3 checkers, the number of checkers. + // more than three that are on that point. + // + // Return a vector encoding: + // Every point listed for the current player. + // Every point listed for the opponent. + // One double for the number of checkers on the bar for the current player. + // One double for the number of checkers scored for the current player. + // One double for whether it's the current player's turn (1 or 0). + // One double for the number of checkers on the bar for the opponent. + // One double for the number of checkers scored for the opponent. + // One double for whether it's the opponent's turn (1 or 0). + // One double for the first dice's value. + // One double for the second dice's value. + + return {kStateEncodingSize}; + } + + int NumCheckersPerPlayer() const; + + private: + ScoringType scoring_type_; // Which rules apply when scoring the game. + bool hyper_backgammon_; // Is hyper-backgammon variant enabled? +}; + +} // namespace backgammon +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_BACKGAMMON_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/backgammon/backgammon_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/backgammon/backgammon_test.cc new file mode 100644 index 0000000..91369d0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/backgammon/backgammon_test.cc @@ -0,0 +1,590 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/backgammon/backgammon.h" + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace backgammon { +namespace { + +namespace testing = open_spiel::testing; + +bool ActionsContains(const std::vector& legal_actions, Action action) { + return std::find(legal_actions.begin(), legal_actions.end(), action) != + legal_actions.end(); +} + +void CheckHits(const State &state) { + if (state.IsChanceNode() || state.IsTerminal()) { + return; + } + Player player = state.CurrentPlayer(); + const auto &bstate = down_cast(state); + for (Action action : bstate.LegalActions()) { + std::vector cmoves = bstate.AugmentWithHitInfo( + player, bstate.SpielMoveToCheckerMoves(player, action)); + std::cout << bstate.ActionToString(player, action) << std::endl; + for (CheckerMove cmove : cmoves) { + const int to_pos = bstate.GetToPos(player, cmove.pos, cmove.num); + // If the to position is on the board and there is only 1 checker, this + // has to be a hit. + if (cmove.pos != kPassPos && !bstate.IsOff(player, to_pos) && + bstate.board(bstate.Opponent(player), to_pos) == 1) { + SPIEL_CHECK_TRUE(cmove.hit); + } + + // Now, check the converse. + if (cmove.hit) { + SPIEL_CHECK_TRUE(cmove.pos != kPassPos && + !bstate.IsOff(player, to_pos) && + bstate.board(bstate.Opponent(player), to_pos) == 1); + } + + // No need to apply the intermediate checker move, as it does not make + // any difference for what we're checking. + } + } +} + +void BasicBackgammonTestsCheckHits() { + std::shared_ptr game = LoadGame("backgammon"); + testing::RandomSimTest(*game, 10, true, true, &CheckHits); +} + +void BasicBackgammonTestsVaryScoring() { + for (std::string scoring : + {"winloss_scoring", "enable_gammons", "full_scoring"}) { + auto game = + LoadGame("backgammon", {{"scoring_type", GameParameter(scoring)}}); + testing::ChanceOutcomesTest(*game); + testing::RandomSimTestWithUndo(*game, 10); + testing::RandomSimTest(*game, 10); + } +} + +void BasicHyperBackgammonTestsVaryScoring() { + for (std::string scoring : + {"winloss_scoring", "enable_gammons", "full_scoring"}) { + auto game = + LoadGame("backgammon", {{"scoring_type", GameParameter(scoring)}, + {"hyper_backgammon", GameParameter(true)}}); + testing::ChanceOutcomesTest(*game); + testing::RandomSimTestWithUndo(*game, 10); + testing::RandomSimTest(*game, 10); + } +} + +void BasicBackgammonTestsDoNotStartWithDoubles() { + std::mt19937 rng; + for (int i = 0; i < 100; ++i) { + auto game = LoadGame("backgammon"); + std::unique_ptr state = game->NewInitialState(); + + while (state->IsChanceNode()) { + Action outcome = + SampleAction(state->ChanceOutcomes(), + std::uniform_real_distribution(0.0, 1.0)(rng)) + .first; + state->ApplyAction(outcome); + } + BackgammonState* backgammon_state = + dynamic_cast(state.get()); + // The dice should contain two different numbers, + // because a tie would not select a starting player. + SPIEL_CHECK_NE(backgammon_state->dice(0), backgammon_state->dice(1)); + } +} + +// Must bear-off furthest checker first. +// Should have exactly one legal move here (since double moves are +// two separate turns): 1-5, 0-5 +// +------|------+ +// |..xx..|..x6x.| +// |...x..|...xx.| +// |......|...x..| +// |......|...x..| +// |......|...x..| +// | | | +// |......|......| +// |......|......| +// |......|......| +// |......|x....o| +// |..x...|x...oo| +// +------|------+ +// Turn: o +// Dice: 55 +// Bar: +// Scores, X: 0, O: 12 +void BearOffFurthestFirstTest() { + std::shared_ptr game = LoadGame("backgammon"); + std::unique_ptr state = game->NewInitialState(); + BackgammonState* bstate = static_cast(state.get()); + bstate->SetState( + kOPlayerId, false, {5, 5}, {0, 0}, {0, 12}, + {{0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 1, 6, 2, 0}, + {2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}); + std::cout << bstate->ToString(); + + // Check for exactly one legal move. + std::vector legal_actions = bstate->LegalActions(); + SPIEL_CHECK_EQ(legal_actions.size(), 1); + + // Check that it's 1-5 0-5 + std::vector checker_moves = + bstate->SpielMoveToCheckerMoves(kOPlayerId, legal_actions[0]); + SPIEL_CHECK_EQ(checker_moves[0].pos, 1); + SPIEL_CHECK_EQ(checker_moves[0].num, 5); + SPIEL_CHECK_EQ(checker_moves[1].pos, 0); + SPIEL_CHECK_EQ(checker_moves[1].num, 5); +} + +// +------|------+ +// |......|x.xxx9| +// |......|..xx.x| +// |......|.....x| +// |......|.....x| +// |......|.....x| +// | | | +// |......|.....o| +// |......|.....o| +// |......|.....o| +// |......|.....o| +// |......|.....7| +// +------|------+ +// Turn: x +// Dice: 16 +// Bar: +// Scores, X: 0, O: 8 +void NormalBearOffSituation() { + std::shared_ptr game = LoadGame("backgammon"); + std::unique_ptr state = game->NewInitialState(); + BackgammonState* bstate = static_cast(state.get()); + bstate->SetState( + kXPlayerId, false, {1, 6}, {0, 0}, {0, 8}, + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 2, 1, 9}, + {7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}); + std::cout << bstate->ToString(); + + std::vector legal_actions = bstate->LegalActions(); + std::cout << "Legal actions:" << std::endl; + for (Action action : legal_actions) { + std::cout << bstate->ActionToString(kXPlayerId, action) << std::endl; + } + + // Legal actions here are: + // (18-1 19-6) + // (18-6 20-1) + // (18-6 21-1) + // (18-6 22-1) + // (18-6 23-1) + // (20-1 18-6) // 20-1 is a duplicate of 18-6 20-1 (not listed) + // (21-1 18-6) // 21-1 is a duplicate of 18-6 21-1 (not listed) + // (22-1 18-6) // 22-1 is a duplicate of 18-6 22-1 (not listed) + // (23-1 18-6) // 23-1 is a duplicate of 18-6 23-1 (not listed) + SPIEL_CHECK_EQ(legal_actions.size(), 5); + SPIEL_CHECK_TRUE(ActionsContains( + legal_actions, + bstate->CheckerMovesToSpielMove({{18, 1, false}, {19, 6, false}}))); + SPIEL_CHECK_TRUE(ActionsContains( + legal_actions, + bstate->CheckerMovesToSpielMove({{18, 6, false}, {20, 1, false}}))); + SPIEL_CHECK_TRUE(ActionsContains( + legal_actions, + bstate->CheckerMovesToSpielMove({{18, 6, false}, {21, 1, false}}))); + SPIEL_CHECK_TRUE(ActionsContains( + legal_actions, + bstate->CheckerMovesToSpielMove({{18, 6, false}, {22, 1, false}}))); + SPIEL_CHECK_TRUE(ActionsContains( + legal_actions, + bstate->CheckerMovesToSpielMove({{18, 6, false}, {23, 1, false}}))); +} + +// +------|------+ +// |.....o|x.xx9o| +// |......|..xxx.| +// |......|..x.x.| +// |......|....x.| +// |......|....x.| +// | | | +// |......|.....o| +// |......|.....o| +// |......|.....o| +// |......|..o.oo| +// |o.....|..o.o8| +// +------|------+ +// Turn: x +// Dice: 44 +// Bar: +// Scores, X: 0, O: 0 +void NormalBearOffSituation2() { + std::shared_ptr game = LoadGame("backgammon"); + std::unique_ptr state = game->NewInitialState(); + BackgammonState* bstate = static_cast(state.get()); + bstate->SetState( + kXPlayerId, false, {4, 4}, {0, 0}, {0, 0}, + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 9, 0}, + {8, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1}}); + std::cout << bstate->ToString(); + + std::vector legal_actions = bstate->LegalActions(); + std::cout << "Legal actions:" << std::endl; + for (Action action : legal_actions) { + std::cout << bstate->ActionToString(kXPlayerId, action) << std::endl; + } + + // Legal actions here are: + // (18-4 20-4) + // (20-4 20-4) + SPIEL_CHECK_EQ(legal_actions.size(), 2); + SPIEL_CHECK_TRUE(ActionsContains( + legal_actions, + bstate->CheckerMovesToSpielMove({{18, 4, false}, {20, 4, false}}))); + SPIEL_CHECK_TRUE(ActionsContains( + legal_actions, + bstate->CheckerMovesToSpielMove({{20, 4, false}, {20, 4, false}}))); +} + +// +------|------+ +// |.....x|x.xx9o| +// |......|..xxx.| +// |......|....x.| +// |......|....x.| +// |......|....x.| +// | | | +// |......|.....o| +// |......|.....o| +// |......|..o..o| +// |......|..o.oo| +// |o.....|..o.o8| +// +------|------+ +// Turn: x +// Dice: 16 +// Bar: +// Scores, X: 0, O: 0 +void BearOffOutsideHome() { + std::shared_ptr game = LoadGame("backgammon"); + std::unique_ptr state = game->NewInitialState(); + BackgammonState* bstate = static_cast(state.get()); + bstate->SetState( + kXPlayerId, false, {1, 6}, {0, 0}, {0, 0}, + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 2, 2, 9, 0}, + {8, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}}); + std::cout << bstate->ToString(); + + std::vector legal_actions = bstate->LegalActions(); + std::cout << "Legal actions:" << std::endl; + for (Action action : legal_actions) { + std::cout << bstate->ActionToString(kXPlayerId, action) << std::endl; + } + + // Check that the one outside can be born off with this roll. + SPIEL_CHECK_TRUE(ActionsContains( + legal_actions, + bstate->CheckerMovesToSpielMove({{17, 6, true}, {23, 1, false}}))); + SPIEL_CHECK_TRUE(ActionsContains( + legal_actions, + bstate->CheckerMovesToSpielMove({{17, 1, false}, {18, 6, false}}))); +} + +// +------|------+ +// |o...x.|xxxxox| +// |....x.|xxxxox| +// |......|x.xx..| +// |......|......| +// |......|......| +// | | | +// |......|......| +// |......|......| +// |......|......| +// |......|o.o.oo| +// |oo..o.|ooo.oo| +// +------|------+ +// Turn: x +// Dice: 44 +// Bar: +// Scores, X: 0, O: 0 +void DoublesBearOffOutsideHome() { + std::shared_ptr game = LoadGame("backgammon"); + std::unique_ptr state = game->NewInitialState(); + BackgammonState* bstate = static_cast(state.get()); + bstate->SetState( + kXPlayerId, false, {4, 4}, {0, 0}, {0, 0}, + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 3, 2, 3, 3, 0, 2}, + {2, 2, 0, 2, 1, 2, 0, 1, 0, 0, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0}}); + std::cout << bstate->ToString(); + + // First part of double turn. + SPIEL_CHECK_FALSE(bstate->double_turn()); + + std::vector legal_actions = bstate->LegalActions(); + std::cout << "Legal actions:" << std::endl; + for (Action action : legal_actions) { + std::cout << bstate->ActionToString(kXPlayerId, action) << std::endl; + } + + // Check that we can bear off the two X checkers outside the home area (using + // two turns. + Action action = + bstate->CheckerMovesToSpielMove({{16, 4, false}, {16, 4, false}}); + SPIEL_CHECK_TRUE(ActionsContains(legal_actions, action)); + bstate->ApplyAction(action); + + std::cout << bstate->ToString(); + legal_actions = bstate->LegalActions(); + std::cout << "Legal actions:" << std::endl; + for (Action action : legal_actions) { + std::cout << bstate->ActionToString(kXPlayerId, action) << std::endl; + } + + // Second part of double turn, so make sure the same player goes again. + SPIEL_CHECK_TRUE(bstate->double_turn()); + SPIEL_CHECK_EQ(bstate->CurrentPlayer(), kXPlayerId); + + // Now, bearing off from 20 should be allowed. + action = bstate->CheckerMovesToSpielMove({{20, 4, false}, {20, 4, false}}); + SPIEL_CHECK_TRUE(ActionsContains(legal_actions, action)); +} + +void HumanReadableNotation() { + std::shared_ptr game = LoadGame("backgammon"); + std::unique_ptr state = game->NewInitialState(); + BackgammonState* bstate = static_cast(state.get()); + + // Check double repeated move and moving on from Bar displayed correctly + bstate->SetState( + kXPlayerId, false, {1, 1}, {13, 5}, {0, 0}, + {{2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}); + std::cout << bstate->ToString(); + std::vector legal_actions = bstate->LegalActions(); + std::cout << "First legal action:" << std::endl; + std::string notation = bstate->ActionToString(kXPlayerId, legal_actions[0]); + std::cout << notation << std::endl; + SPIEL_CHECK_EQ(notation, "Bar/24(2)"); + + // Check hits displayed correctly + bstate->SetState( + kXPlayerId, false, {2, 1}, {13, 5}, {0, 0}, + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1}, + {1, 1, 1, 1, 1, 5, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}); + std::cout << bstate->ToString(); + legal_actions = bstate->LegalActions(); + std::cout << "First legal action:" << std::endl; + notation = bstate->ActionToString(kXPlayerId, legal_actions[0]); + std::cout << notation << std::endl; + SPIEL_CHECK_EQ(notation, "Bar/24* Bar/23*"); + + // Check moving off displayed correctly + bstate->SetState( + kXPlayerId, false, {2, 1}, {0, 0}, {13, 5}, + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1}, + {0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}); + std::cout << bstate->ToString(); + legal_actions = bstate->LegalActions(); + std::cout << "First legal action:" << std::endl; + notation = bstate->ActionToString(kXPlayerId, legal_actions[0]); + std::cout << notation << std::endl; + SPIEL_CHECK_EQ(notation, "2/Off 1/Off"); + + // Check die order doesn't impact narrative + bstate->SetState( + kXPlayerId, false, {1, 2}, {0, 0}, {13, 5}, + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1}, + {0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}); + std::cout << bstate->ToString(); + legal_actions = bstate->LegalActions(); + std::cout << "First legal action:" << std::endl; + notation = bstate->ActionToString(kXPlayerId, legal_actions[0]); + std::cout << notation << std::endl; + SPIEL_CHECK_EQ(notation, "2/Off 1/Off"); + + // Check double move + bstate->SetState( + kXPlayerId, false, {6, 5}, {0, 0}, {13, 5}, + {{2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}); + std::cout << bstate->ToString(); + legal_actions = bstate->LegalActions(); + std::cout << "First legal action" << std::endl; + notation = bstate->ActionToString(kXPlayerId, legal_actions[0]); + std::cout << notation << std::endl; + SPIEL_CHECK_EQ(notation, "24/18/13"); + + // Check double move with hit + bstate->SetState( + kXPlayerId, false, {6, 5}, {0, 0}, {13, 4}, + {{2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}); + std::cout << bstate->ToString(); + legal_actions = bstate->LegalActions(); + std::cout << "First legal action:" << std::endl; + notation = bstate->ActionToString(kXPlayerId, legal_actions[0]); + std::cout << notation << std::endl; + SPIEL_CHECK_EQ(notation, "24/18*/13"); + + // Check double move with double hit + bstate->SetState( + kXPlayerId, false, {6, 5}, {0, 0}, {13, 3}, + {{2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}); + std::cout << bstate->ToString(); + legal_actions = bstate->LegalActions(); + std::cout << "First legal action:" << std::endl; + notation = bstate->ActionToString(kXPlayerId, legal_actions[0]); + std::cout << notation << std::endl; + SPIEL_CHECK_EQ(notation, "24/18*/13*"); + + // Check ordinary move! + bstate->SetState( + kXPlayerId, false, {6, 5}, {0, 0}, {13, 3}, + {{2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 2, 2, 4, 0, 0, 0, 0, 0, 0, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}); + std::cout << bstate->ToString(); + legal_actions = bstate->LegalActions(); + std::cout << "First legal action:" << std::endl; + notation = bstate->ActionToString(kXPlayerId, legal_actions[0]); + std::cout << notation << std::endl; + SPIEL_CHECK_EQ(notation, "24/19 24/18"); + + // Check ordinary move with die reversed + bstate->SetState( + kXPlayerId, false, {5, 6}, {0, 0}, {13, 3}, + {{2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 2, 2, 4, 0, 0, 0, 0, 0, 0, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}); + std::cout << bstate->ToString(); + legal_actions = bstate->LegalActions(); + std::cout << "First legal actions:" << std::endl; + notation = bstate->ActionToString(kXPlayerId, legal_actions[0]); + std::cout << notation << std::endl; + SPIEL_CHECK_EQ(notation, "24/19 24/18"); + + // Check ordinary move with 1st hit + bstate->SetState( + kXPlayerId, false, {6, 5}, {0, 0}, {13, 3}, + {{2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 2, 2, 3, 1, 0, 0, 0, 0, 0, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}); + std::cout << bstate->ToString(); + legal_actions = bstate->LegalActions(); + std::cout << "First legal action:" << std::endl; + notation = bstate->ActionToString(kXPlayerId, legal_actions[0]); + std::cout << notation << std::endl; + SPIEL_CHECK_EQ(notation, "24/19* 24/18"); + + // Check ordinary move with 2nd hit + bstate->SetState( + kXPlayerId, false, {5, 6}, {0, 0}, {13, 3}, + {{2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 2, 2, 3, 0, 1, 0, 0, 0, 0, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}); + std::cout << bstate->ToString(); + legal_actions = bstate->LegalActions(); + std::cout << "First legal action:" << std::endl; + notation = bstate->ActionToString(kXPlayerId, legal_actions[0]); + std::cout << notation << std::endl; + SPIEL_CHECK_EQ(notation, "24/19 24/18*"); + + // Check ordinary move with double hit + bstate->SetState( + kXPlayerId, false, {5, 6}, {0, 0}, {13, 3}, + {{2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 2, 2, 2, 1, 1, 0, 0, 0, 0, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}); + std::cout << bstate->ToString(); + legal_actions = bstate->LegalActions(); + std::cout << "First legal action:" << std::endl; + notation = bstate->ActionToString(kXPlayerId, legal_actions[0]); + std::cout << notation << std::endl; + SPIEL_CHECK_EQ(notation, "24/19* 24/18*"); + + // Check double pass + bstate->SetState( + kXPlayerId, false, {5, 3}, {0, 0}, {13, 3}, + {{2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}); + std::cout << bstate->ToString(); + legal_actions = bstate->LegalActions(); + std::cout << "First legal action:" << std::endl; + notation = bstate->ActionToString(kXPlayerId, legal_actions[0]); + std::cout << notation << std::endl; + SPIEL_CHECK_EQ(notation, "Pass"); + + // Check single pass + bstate->SetState( + kXPlayerId, false, {5, 6}, {0, 0}, {13, 3}, + {{2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}); + std::cout << bstate->ToString(); + legal_actions = bstate->LegalActions(); + std::cout << "First legal action:" << std::endl; + notation = bstate->ActionToString(kXPlayerId, legal_actions[0]); + std::cout << notation << std::endl; + SPIEL_CHECK_EQ(notation, "24/18 Pass"); +} + +void BasicHyperBackgammonTest() { + std::shared_ptr game = + LoadGame("backgammon", {{"hyper_backgammon", GameParameter(true)}}); + std::unique_ptr state = game->NewInitialState(); + BackgammonState* bstate = static_cast(state.get()); + SPIEL_CHECK_EQ(bstate->CountTotalCheckers(kXPlayerId), 3); + SPIEL_CHECK_EQ(bstate->CountTotalCheckers(kOPlayerId), 3); +} + +} // namespace +} // namespace backgammon +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::testing::LoadGameTest("backgammon"); + open_spiel::backgammon::BasicBackgammonTestsCheckHits(); + open_spiel::backgammon::BasicBackgammonTestsDoNotStartWithDoubles(); + open_spiel::backgammon::BasicBackgammonTestsVaryScoring(); + open_spiel::backgammon::BasicHyperBackgammonTestsVaryScoring(); + open_spiel::backgammon::BearOffFurthestFirstTest(); + open_spiel::backgammon::NormalBearOffSituation(); + open_spiel::backgammon::NormalBearOffSituation2(); + open_spiel::backgammon::BearOffOutsideHome(); + open_spiel::backgammon::DoublesBearOffOutsideHome(); + open_spiel::backgammon::HumanReadableNotation(); + open_spiel::backgammon::BasicHyperBackgammonTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining.cc b/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining.cc new file mode 100644 index 0000000..0bf36cf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining.cc @@ -0,0 +1,613 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "open_spiel/games/bargaining/bargaining.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/file.h" + +namespace open_spiel { +namespace bargaining { + +namespace { + +// Facts about the game +const GameType kGameType{/*short_name=*/"bargaining", + /*long_name=*/"Bargaining", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/kNumPlayers, + /*min_num_players=*/kNumPlayers, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"instances_file", + GameParameter("")}, + {"max_turns", GameParameter(kDefaultMaxTurns)}, + {"discount", GameParameter(kDefaultDiscount)}, + {"prob_end", GameParameter(kDefaultProbEnd)}}}; + +static std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new BargainingGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace + + +std::string format_values(const std::vector& values) { + return absl::StrCat("Book: ", values[0], ", ", + "Hat: ", values[1], ", ", + "Basketball: ", values[2]); +} + +std::string Instance::ToString() const { + return absl::StrCat(format_values(pool), " ", + format_values(values[0]), " ", + format_values(values[1])); +} + +std::string Instance::ToPrettyString() const { + return absl::StrCat("Pool: ", format_values(pool), "\n", + "P0 vals: ", format_values(values[0]), "\n", + "P1 vals: ", format_values(values[1]), "\n"); +} + +std::string Offer::ToString() const { + return absl::StrCat("Offer: ", format_values(quantities)); +} + +std::string BargainingState::ActionToString(Player player, + Action move_id) const { + return parent_game_->ActionToString(player, move_id); +} + +bool BargainingState::IsTerminal() const { + return agreement_reached_ || game_ended_ || + offers_.size() >= parent_game_->max_turns(); +} + +std::vector BargainingState::Returns() const { + if (agreement_reached_) { + int proposing_player = (offers_.size() + 1) % kNumPlayers; + int other_player = 1 - proposing_player; + std::vector returns(kNumPlayers, 0); + for (int i = 0; i < kNumItemTypes; ++i) { + returns[proposing_player] += + instance_.values[proposing_player][i] * offers_.back().quantities[i]; + returns[other_player] += + instance_.values[other_player][i] * + (instance_.pool[i] - offers_.back().quantities[i]); + } + // Apply discount. + if (discount_ < 1.0) { + for (Player p = 0; p < num_players_; ++p) { + returns[p] *= discount_; + } + } + return returns; + } else { + return std::vector(kNumPlayers, 0); + } +} + +std::string BargainingState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + if (IsChanceNode()) { + return "Initial chance node"; + } + + std::string str = absl::StrCat("Pool: ", format_values(instance_.pool)); + absl::StrAppend(&str, + "\nMy values: ", format_values(instance_.values[player]), + "\n"); + absl::StrAppend(&str, "Agreement reached? ", agreement_reached_, "\n"); + absl::StrAppend(&str, "Number of offers: ", offers_.size(), "\n"); + if (!offers_.empty()) { + // Only the most recent offer. + absl::StrAppend(&str, "P", (offers_.size() + 1) % 2, + " offers: ", offers_.back().ToString(), "\n"); + } + return str; +} + +std::string BargainingState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + if (IsChanceNode()) { + return "Initial chance node"; + } + + std::string str = absl::StrCat("Pool: ", format_values(instance_.pool)); + absl::StrAppend(&str, + "\nMy values: ", format_values(instance_.values[player]), + "\n"); + absl::StrAppend(&str, "Agreement reached? ", agreement_reached_, "\n"); + for (int i = 0; i < offers_.size(); ++i) { + int proposer = i % 2; + absl::StrAppend(&str, "P", proposer, " offers: ", offers_[i].ToString(), + "\n"); + } + return str; +} + +std::unique_ptr BargainingState::ResampleFromInfostate( + int player_id, std::function rng) const { + std::vector valid_indices; + const int num_instances = parent_game_->AllInstances().size(); + for (int i = 0; i < num_instances; ++i) { + const Instance& instance = parent_game_->GetInstance(i); + if (instance_.pool == instance.pool && + instance_.values[player_id] == instance.values[player_id]) { + valid_indices.push_back(i); + } + } + + SPIEL_CHECK_FALSE(valid_indices.empty()); + int idx = static_cast(rng() * valid_indices.size()); + SPIEL_CHECK_GE(idx, 0); + SPIEL_CHECK_LT(idx, valid_indices.size()); + + int instance_idx = valid_indices[idx]; + std::unique_ptr state = parent_game_->NewInitialState(); + for (Action action : History()) { + if (state->IsChanceNode()) { + state->ApplyAction(instance_idx); + } else { + state->ApplyAction(action); + } + } + return state; +} + +void BargainingState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + SPIEL_CHECK_EQ(values.size(), game_->ObservationTensorSize()); + std::fill(values.begin(), values.end(), 0); + + if (IsChanceNode()) { + // No observations at chance nodes. + return; + } + + int offset = 0; + + // Agreement reached? + if (agreement_reached_) { + values[offset] = 1; + } + offset += 1; + + // How many trade offers have happened? + values[offers_.size()] = 1; + offset += parent_game_->max_turns() + 1; + + // Pool + for (int i = 0; i < kNumItemTypes; ++i) { + for (int j = 0; j <= instance_.pool[i]; ++j) { + values[offset + j] = 1; + } + offset += kPoolMaxNumItems + 1; + } + + // My values + for (int i = 0; i < kNumItemTypes; ++i) { + for (int j = 0; j <= instance_.values[player][i]; ++j) { + values[offset + j] = 1; + } + offset += kTotalValueAllItems + 1; + } + + // Just the last offer + if (!offers_.empty()) { + for (int i = 0; i < kNumItemTypes; ++i) { + for (int j = 0; j <= offers_.back().quantities[i]; ++j) { + values[offset + j] = 1; + } + offset += kPoolMaxNumItems + 1; + } + } else { + offset += (kPoolMaxNumItems + 1) * kNumItemTypes; + } + + SPIEL_CHECK_EQ(offset, values.size()); +} + +void BargainingState::InformationStateTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + SPIEL_CHECK_EQ(values.size(), game_->InformationStateTensorSize()); + std::fill(values.begin(), values.end(), 0); + + if (IsChanceNode()) { + // No observations at chance nodes. + return; + } + + int offset = 0; + + // Agreement reached? + if (agreement_reached_) { + values[offset] = 1; + } + offset += 1; + + // How many trade offers have happened? + values[offers_.size()] = 1; + offset += parent_game_->max_turns() + 1; + + // Pool + for (int i = 0; i < kNumItemTypes; ++i) { + for (int j = 0; j <= instance_.pool[i]; ++j) { + values[offset + j] = 1; + } + offset += kPoolMaxNumItems + 1; + } + + // My values + for (int i = 0; i < kNumItemTypes; ++i) { + for (int j = 0; j <= instance_.values[player][i]; ++j) { + values[offset + j] = 1; + } + offset += kTotalValueAllItems + 1; + } + + // Offers + for (int k = 0; k < parent_game_->max_turns(); ++k) { + if (k < offers_.size()) { + for (int i = 0; i < kNumItemTypes; ++i) { + for (int j = 0; j <= offers_[k].quantities[i]; ++j) { + values[offset + j] = 1; + } + offset += kPoolMaxNumItems + 1; + } + } else { + offset += (kPoolMaxNumItems + 1) * kNumItemTypes; + } + } + + SPIEL_CHECK_EQ(offset, values.size()); +} + +void BargainingState::SetInstance(Instance instance) { + instance_ = instance; + // TODO(author5): we could (should?) add the ability to check if the instance + // abides by the rules of the game here (refactoring that logic out of the + // instance generator into a general helper function). + + // Check if this is at the start of the game. If so, make it no longer the + // chance player. + if (IsChanceNode()) { + SPIEL_CHECK_TRUE(offers_.empty()); + cur_player_ = 0; + } +} + +BargainingState::BargainingState(std::shared_ptr game) + : State(game), + cur_player_(kChancePlayerId), + agreement_reached_(false), + parent_game_(down_cast(game.get())), + next_player_(0), + discount_(1.0), + game_ended_(false) {} + +int BargainingState::CurrentPlayer() const { + return IsTerminal() ? kTerminalPlayerId : cur_player_; +} + +Action BargainingState::AgreeAction() const { + return parent_game_->AllOffers().size(); +} + +void BargainingState::DoApplyAction(Action action) { + if (IsChanceNode()) { + if (move_number_ == 0) { + instance_ = parent_game_->GetInstance(action); + cur_player_ = 0; + } else { + if (action == parent_game_->ContinueOutcome()) { + cur_player_ = next_player_; + } else { + SPIEL_CHECK_EQ(action, parent_game_->EndOutcome()); + game_ended_ = true; + cur_player_ = kTerminalPlayerId; + } + } + } else { + // Check to apply discount. + if (move_number_ >= 3 && parent_game_->discount() < 1.0) { + discount_ *= parent_game_->discount(); + } + + const std::vector& all_offers = parent_game_->AllOffers(); + if (action != AgreeAction()) { + offers_.push_back(all_offers.at(action)); + + if (move_number_ >= 2 && parent_game_->prob_end() > 0.0) { + next_player_ = 1 - cur_player_; + cur_player_ = kChancePlayerId; + } else { + cur_player_ = 1 - cur_player_; + } + } else { + // Agree action. + SPIEL_CHECK_EQ(action, AgreeAction()); + agreement_reached_ = true; + } + } +} + +bool BargainingState::IsLegalOffer(const Offer& offer) const { + // An offer is legal if it's a proper subset of the current pool. + for (int i = 0; i < kNumItemTypes; ++i) { + if (offer.quantities[i] > instance_.pool[i]) { + return false; + } + } + return true; +} + +std::vector BargainingState::LegalActions() const { + if (IsChanceNode()) { + return LegalChanceOutcomes(); + } else if (IsTerminal()) { + return {}; + } else { + const std::vector& all_offers = parent_game_->AllOffers(); + std::vector legal_actions; + for (int i = 0; i < all_offers.size(); ++i) { + if (IsLegalOffer(all_offers.at(i))) { + legal_actions.push_back(i); + } + } + // Add the agree action if there's at least one offer on the table. + if (!offers_.empty()) { + legal_actions.push_back(all_offers.size()); + } + return legal_actions; + } +} + +std::vector> BargainingState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(IsChanceNode()); + std::vector> outcomes; + const int num_boards = parent_game_->AllInstances().size(); + + if (move_number_ == 0) { + // First chance move of the game. This is for determining the instance. + outcomes.reserve(num_boards); + double uniform_prob = 1.0 / num_boards; + for (int i = 0; i < num_boards; ++i) { + outcomes.push_back({i, uniform_prob}); + } + } else { + const double prob_end = parent_game_->prob_end(); + SPIEL_CHECK_TRUE(move_number_ >= 3); + outcomes = {{parent_game_->ContinueOutcome(), 1.0 - prob_end}, + {parent_game_->EndOutcome(), prob_end}}; + } + return outcomes; +} + +std::string BargainingState::ToString() const { + if (IsChanceNode()) { + return "Initial chance node"; + } + + std::string str = instance_.ToPrettyString(); + absl::StrAppend(&str, "Agreement reached? ", agreement_reached_, "\n"); + for (int i = 0; i < offers_.size(); ++i) { + int proposer = i % 2; + absl::StrAppend(&str, "P", proposer, " offers: ", offers_[i].ToString(), + "\n"); + } + return str; +} + +std::unique_ptr BargainingState::Clone() const { + return std::unique_ptr(new BargainingState(*this)); +} + +void BargainingGame::ParseInstancesFile(const std::string& filename) { + open_spiel::file::File infile(filename, "r"); + std::string contents = infile.ReadContents(); + ParseInstancesString(contents); +} + +void BargainingGame::ParseInstancesString(const std::string& instances_string) { + std::vector lines = absl::StrSplit(instances_string, '\n'); + SPIEL_CHECK_GT(lines.size(), 1); + for (const std::string& line : lines) { + if (!line.empty()) { + std::vector parts = absl::StrSplit(line, ' '); + SPIEL_CHECK_EQ(parts.size(), kNumItemTypes); + Instance instance; + // pool + std::vector pool_parts = absl::StrSplit(parts[0], ','); + for (int i = 0; i < kNumItemTypes; ++i) { + SPIEL_CHECK_TRUE(absl::SimpleAtoi(pool_parts[i], &instance.pool[i])); + } + // p1 values + std::vector p1values_parts = absl::StrSplit(parts[1], ','); + for (int i = 0; i < kNumItemTypes; ++i) { + SPIEL_CHECK_TRUE( + absl::SimpleAtoi(p1values_parts[i], &instance.values[0][i])); + } + // p2 values + std::vector p2values_parts = absl::StrSplit(parts[2], ','); + for (int i = 0; i < kNumItemTypes; ++i) { + SPIEL_CHECK_TRUE( + absl::SimpleAtoi(p2values_parts[i], &instance.values[1][i])); + } + all_instances_.push_back(instance); + instance_map_[instance.ToString()] = all_instances_.size() - 1; + std::vector> player_data = { + {0, absl::StrCat("player_0,", parts[0], ",", parts[1])}, + {1, absl::StrCat("player_1,", parts[0], ",", parts[2])} + }; + + for (const auto& [player, key] : player_data) { + if (possible_opponent_values_.contains(key)) { + possible_opponent_values_[key].push_back(instance.values[1-player]); + } else { + possible_opponent_values_[key] = {instance.values[1-player]}; + } + } + } + } +} + +void BargainingGame::CreateOffers() { + std::vector cur_offer(kNumItemTypes, 0); + bool done = false; + do { + if (std::accumulate(cur_offer.begin(), cur_offer.end(), 0) <= + kPoolMaxNumItems) { + all_offers_.push_back(Offer(cur_offer)); + offer_map_[all_offers_.back().ToString()] = all_offers_.size() - 1; + } + + // Try adding a digit to the left-most, keep going until you can. Then + // set everything to the left of it to 0. + done = true; + for (int i = 0; i < kNumItemTypes; ++i) { + if (cur_offer[i] < kPoolMaxNumItems) { + done = false; + cur_offer[i]++; + for (int j = i - 1; j >= 0; j--) { + cur_offer[j] = 0; + } + break; + } + } + } while (!done); +} + +BargainingGame::BargainingGame(const GameParameters& params) + : Game(kGameType, params), + max_turns_(ParameterValue("max_turns", kDefaultMaxTurns)), + discount_(ParameterValue("discount", kDefaultDiscount)), + prob_end_(ParameterValue("prob_end", kDefaultProbEnd)) { + std::string filename = ParameterValue( + "instances_file", "" + ); + if (open_spiel::file::Exists(filename)) { + ParseInstancesFile(filename); + } else { + if (!filename.empty()) { + std::cerr << "Failed to parse instances file: " << filename << " "; + } + ParseInstancesString(BargainingInstances1000()); + } + CreateOffers(); +} + +std::string BargainingGame::ActionToString(Player player, + Action move_id) const { + if (player == kChancePlayerId) { + if (move_id == ContinueOutcome()){ + return "Continue"; + } + if (move_id == EndOutcome()) { + return "End"; + } + const Instance& instance = GetInstance(move_id); + return absl::StrCat("Sample game instance:\n", instance.ToPrettyString()); + } else if (move_id < all_offers_.size()) { + return all_offers_[move_id].ToString(); + } else { + SPIEL_CHECK_EQ(move_id, all_offers_.size()); + return "Agree"; + } +} + +int BargainingGame::NumDistinctActions() const { + // All offers + agree. + return all_offers_.size() + 1; +} + +std::pair BargainingGame::GetOfferByQuantities( + const std::vector& quantities) const { + for (int i = 0; i < all_offers_.size(); ++i) { + if (quantities == all_offers_[i].quantities) { + return {all_offers_[i], i}; + } + } + return {Offer(), kInvalidAction}; +} + + +std::vector BargainingGame::ObservationTensorShape() const { + return { + 1 + // Agreement reached? + max_turns_ + 1 + // How many offers have happened + (kPoolMaxNumItems + 1) * kNumItemTypes + // Pool + (kTotalValueAllItems + 1) * kNumItemTypes + // My values + (kPoolMaxNumItems + 1) * kNumItemTypes // Most recent offer + }; +} + +std::vector BargainingGame::InformationStateTensorShape() const { + return { + 1 + // Agreement reached? + max_turns_ + 1 + // How many offers have happened + (kPoolMaxNumItems + 1) * kNumItemTypes + // Pool + (kTotalValueAllItems + 1) * kNumItemTypes + // My values + max_turns_ * (kPoolMaxNumItems + 1) * kNumItemTypes // Offers + }; +} + +std::vector> BargainingGame::GetPossibleOpponentValues( + int player_id, + const std::vector& pool, + const std::vector& values) const { + std::string key = absl::StrCat( + "player_", player_id, ",", absl::StrJoin(pool, ","), + ",", absl::StrJoin(values, "," )); + if (possible_opponent_values_.contains(key)) { + return possible_opponent_values_.at(key); + } + return {}; +} +} // namespace bargaining +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining.h b/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining.h new file mode 100644 index 0000000..7deaad4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining.h @@ -0,0 +1,213 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_BARGAINING_H_ +#define OPEN_SPIEL_GAMES_BARGAINING_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/game_parameters.h" + +// A simple multi-issue bargaining game, based on [1,2]. The rules are based +// on the description of Section 2.2 of [1]: +// +// "Each agent is given a different randomly generated value function, which +// gives a non-negative value for each item. The value functions are +// constrained so that: (1) the total value for a user of all items is 10; +// (2) each item has non-zero value to at least one user; and (3) some items +// have nonzero value to both users. These constraints enforce that it is not +// possible for both agents to receive a maximum score, and that no item is +// worthless to both agents, so the negotiation will be competitive. After 10 +// turns, we allow agents the option to complete the negotiation with no +// agreement, which is worth 0 points to both users. We use 3 item types +// (books, hats, balls), and between 5 and 7 total items in the pool." +// +// [1] Lewis et al., Deal or no deal? End-to-end learning of negotiation +// dialogues, 2017. https://arxiv.org/abs/1706.05125 +// [2] David DeVault, Johnathan Mell, and Jonathan Gratch. +// 2015. Toward Natural Turn-taking in a Virtual Human Negotiation Agent +// +// Parameters: +// "instances_file" string The file containing the boards (default: "") +// "discount" double Discount factor multiplied each turn after +// turn 2, applied to (multiplied to reduce) the +// returns (default = 1.0). +// "max_turns" integer Maximum total turns before the game ends +// (default = 10). +// "prob_end" double Probability of the game ending after each +// action (only after each player has taken +// one turn each) (default = 0.0). + +namespace open_spiel { +namespace bargaining { + +constexpr int kNumItemTypes = 3; +constexpr int kPoolMinNumItems = 5; +constexpr int kPoolMaxNumItems = 7; +constexpr int kTotalValueAllItems = 10; +constexpr int kNumPlayers = 2; +constexpr double kDefaultDiscount = 1.0; +constexpr int kDefaultMaxTurns = 10; +constexpr double kDefaultProbEnd = 0.0; +constexpr int kDefaultNumInstances = 1000; +// Default 1000-instance database. See +// bargaining_instances1000.cc to create your own. +// Format is: pool items, p1 values, p2 values. +const char* BargainingInstances1000(); + +struct Instance { + std::vector> values; + std::vector pool; + Instance() + : values({std::vector(kNumItemTypes, 0), + std::vector(kNumItemTypes, 0)}), + pool(kNumItemTypes, 0) {} + std::string ToString() const; + std::string ToPrettyString() const; +}; + +struct Offer { + std::vector quantities; + Offer() : quantities(kNumItemTypes, 0) {} + Offer(const std::vector& _quantities) : quantities(_quantities) {} + std::string ToString() const; +}; + +class BargainingGame; // Forward definition necessary for parent pointer. + +class BargainingState : public State { + public: + BargainingState(std::shared_ptr game); + BargainingState(const BargainingState&) = default; + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action move_id) const override; + std::vector> ChanceOutcomes() const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + std::string InformationStateString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::string ObservationString(Player player) const override; + + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + + std::unique_ptr ResampleFromInfostate( + int player_id, std::function rng) const override; + + // Extra methods not part of the general API. + Instance GetInstance() const { return instance_; } + void SetInstance(Instance instance); + + std::vector Offers() const { return offers_; } + + Action AgreeAction() const; + + protected: + void DoApplyAction(Action action) override; + + private: + bool IsLegalOffer(const Offer& offer) const; + + Player cur_player_; + bool agreement_reached_; + const BargainingGame* parent_game_; + Instance instance_; + std::vector offers_; + Player next_player_; + double discount_; + bool game_ended_; +}; + +class BargainingGame : public Game { + public: + explicit BargainingGame(const GameParameters& params); + + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new BargainingState(shared_from_this())); + } + int MaxChanceOutcomes() const override { return all_instances_.size() + 2; } + std::string ActionToString(Player player, Action move_id) const override; + + int MaxGameLength() const override { return max_turns_; } + int MaxChanceNodesInHistory() const override { return 1 + (max_turns_ - 2); } + + int NumPlayers() const override { return kNumPlayers; } + double MaxUtility() const override { return kTotalValueAllItems; } + double MinUtility() const override { return 0; } + std::vector ObservationTensorShape() const override; + std::vector InformationStateTensorShape() const override; + + int max_turns() const { return max_turns_; } + double discount() const { return discount_; } + double prob_end() const { return prob_end_; } + + Action ContinueOutcome() const { return all_instances_.size(); } + Action EndOutcome() const { return all_instances_.size() + 1; } + + const std::vector& AllInstances() const { return all_instances_; } + const std::vector& AllOffers() const { return all_offers_; } + const Instance& GetInstance(int num) const { return all_instances_[num]; } + const Offer& GetOffer(int num) const { return all_offers_[num]; } + std::pair GetOfferByQuantities( + const std::vector& quantities) const; + int GetInstanceIndex(const Instance& instance) const { + if (!instance_map_.contains(instance.ToString())) { + return -1; + } + return instance_map_.at(instance.ToString()); + } + int GetOfferIndex(const Offer& offer) const { + if (!offer_map_.contains(offer.ToString())) { + return -1; + } + return offer_map_.at(offer.ToString()); + } + std::vector> GetPossibleOpponentValues( + int player_id, const std::vector& pool, + const std::vector& values) const; + + private: + void ParseInstancesFile(const std::string& filename); + void ParseInstancesString(const std::string& instances_string); + void CreateOffers(); + + std::vector all_instances_; + std::vector all_offers_; + absl::flat_hash_map offer_map_; + absl::flat_hash_map instance_map_; + absl::flat_hash_map>> + possible_opponent_values_; + const int max_turns_; + const double discount_; + const double prob_end_; +}; + +} // namespace bargaining +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_BARGAINING_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining_instance_generator.cc b/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining_instance_generator.cc new file mode 100644 index 0000000..45e107a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining_instance_generator.cc @@ -0,0 +1,130 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This generates strategically interesting instances of Colored Trails +// according to the criteria of Sec 5 of Jong et al', 2011, Metastrategies in +// the Colored Trails Game. +// https://www.ifaamas.org/Proceedings/aamas2011/papers/C4_R57.pdf + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/games/bargaining/bargaining.h" +#include "open_spiel/utils/file.h" +#include "open_spiel/utils/init.h" + +ABSL_FLAG(int, seed, 0, "Seed to use"); +ABSL_FLAG(int, num_instances, 1000, "Number of boards to generate."); +ABSL_FLAG(std::string, filename, "/tmp/instances.txt", + "File to save boards to."); + +namespace open_spiel { +namespace bargaining { +namespace { + +// TODO(author5): the efficiency can be greatly improved :) +Instance GenerateInstance(std::mt19937* rng) { + Instance instance; + bool valid = false; + while (!valid) { + valid = true; + for (int i = 0; i < kNumItemTypes; ++i) { + instance.pool[i] = absl::Uniform(*rng, 1, kPoolMaxNumItems + 1); + } + int num_pool_items = + std::accumulate(instance.pool.begin(), instance.pool.end(), 0); + if (!(num_pool_items >= kPoolMinNumItems && + num_pool_items <= kPoolMaxNumItems)) { + valid = false; + continue; + } + + // total value to each user is 10 + // every item has nonzero value to at least one player + // some items have nonzero value to both players + bool exists_valuable_to_both = false; + std::array total_values = {0, 0}; + for (int i = 0; i < kNumItemTypes && valid; ++i) { + for (Player p : {0, 1}) { + instance.values[p][i] = + absl::Uniform(*rng, 0, kTotalValueAllItems + 1); + } + + if (instance.values[0][i] == 0 && instance.values[1][i] == 0) { + valid = false; + break; + } else if (instance.values[0][i] > 0 && instance.values[1][i] > 0) { + exists_valuable_to_both = true; + } + + for (Player p : {0, 1}) { + total_values[p] += instance.values[p][i] * instance.pool[i]; + if (total_values[p] > kTotalValueAllItems) { + valid = false; + break; + } + } + } + + if (!valid) { + continue; + } + + if (!(total_values[0] == kTotalValueAllItems && + total_values[1] == kTotalValueAllItems && exists_valuable_to_both)) { + valid = false; + } + } + + return instance; +} + +void GenerateInstances(int num) { + std::string filename = absl::GetFlag(FLAGS_filename); + int seed = absl::GetFlag(FLAGS_seed); + std::mt19937 rng(seed); + std::unordered_set generated_instance_strings; + + std::cout << "Opening file: " << filename << std::endl; + open_spiel::file::File outfile(filename, "w"); + int num_generated_instances = 0; + while (num_generated_instances < num) { + Instance instance = GenerateInstance(&rng); + std::string instance_line = instance.ToString(); + if (generated_instance_strings.find(instance_line) == + generated_instance_strings.end()) { + generated_instance_strings.insert(instance_line); + num_generated_instances++; + instance_line.push_back('\n'); + std::cout << num_generated_instances << std::endl << instance_line; + outfile.Write(instance_line); + } +} + std::cout << "Wrote to file: " << filename << std::endl; +} + +} // namespace +} // namespace bargaining +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::Init("", &argc, &argv, false); + absl::ParseCommandLine(argc, argv); + open_spiel::bargaining::GenerateInstances(absl::GetFlag(FLAGS_num_instances)); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining_instances1000.cc b/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining_instances1000.cc new file mode 100644 index 0000000..2638b06 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining_instances1000.cc @@ -0,0 +1,1028 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +namespace open_spiel { +namespace bargaining { +constexpr const char* kDefaultInstances1000 = + R"(1,2,3 8,1,0 4,0,2 +1,4,1 4,1,2 2,2,0 +2,2,1 1,1,6 0,4,2 +1,4,1 9,0,1 2,2,0 +1,4,1 5,1,1 0,1,6 +4,1,1 2,1,1 1,0,6 +3,1,1 1,4,3 0,2,8 +1,1,3 0,1,3 1,3,2 +1,3,1 2,2,2 10,0,0 +1,2,2 2,3,1 4,0,3 +1,4,1 6,1,0 8,0,2 +1,1,3 7,3,0 0,4,2 +1,5,1 4,0,6 3,1,2 +3,3,1 3,0,1 0,2,4 +1,2,3 8,1,0 7,0,1 +4,1,2 0,6,2 2,2,0 +2,1,2 3,2,1 4,2,0 +1,3,1 4,2,0 8,0,2 +2,1,3 3,1,1 0,10,0 +1,3,1 6,1,1 4,1,3 +2,2,1 3,0,4 2,1,4 +3,3,1 1,1,4 3,0,1 +1,2,3 0,5,0 3,2,1 +1,3,1 1,2,3 3,1,4 +4,1,1 0,0,10 1,3,3 +2,4,1 2,1,2 2,1,2 +4,1,2 1,6,0 1,2,2 +1,1,4 4,2,1 4,6,0 +1,5,1 2,0,8 5,1,0 +1,3,1 0,1,7 6,0,4 +1,1,4 4,6,0 0,2,2 +1,1,5 3,2,1 2,8,0 +1,3,2 7,1,0 4,0,3 +2,1,3 1,2,2 2,3,1 +1,3,1 0,1,7 7,0,3 +1,3,1 2,2,2 1,2,3 +1,5,1 9,0,1 0,1,5 +4,1,1 0,4,6 1,5,1 +2,2,1 0,2,6 4,1,0 +3,1,1 2,1,3 0,6,4 +1,1,3 10,0,0 1,3,2 +3,2,1 2,1,2 1,3,1 +1,3,1 5,1,2 3,0,7 +1,4,1 1,2,1 3,0,7 +4,2,1 1,3,0 0,3,4 +2,2,1 1,3,2 5,0,0 +1,3,1 4,2,0 1,1,6 +1,1,3 6,1,1 0,1,3 +2,1,2 3,4,0 3,2,1 +1,4,1 2,1,4 9,0,1 +2,2,2 0,3,2 1,3,1 +3,3,1 0,2,4 1,0,7 +3,1,1 1,0,7 0,8,2 +4,1,1 1,4,2 2,1,1 +1,3,1 0,0,10 1,1,6 +2,2,1 3,0,4 2,3,0 +2,2,2 2,3,0 0,4,1 +2,1,2 3,4,0 1,2,3 +3,1,1 2,2,2 0,2,8 +1,2,2 4,0,3 2,1,3 +2,2,2 2,1,2 2,2,1 +2,2,2 1,1,3 0,5,0 +3,1,1 1,2,5 1,0,7 +1,1,5 3,2,1 8,2,0 +3,3,1 2,1,1 2,1,1 +2,1,4 1,8,0 3,0,1 +1,2,2 6,1,1 8,1,0 +1,1,3 1,3,2 0,10,0 +1,3,1 1,2,3 3,0,7 +2,1,2 2,2,2 1,8,0 +1,4,2 10,0,0 2,1,2 +1,4,1 5,1,1 2,0,8 +3,1,1 2,4,0 3,0,1 +2,2,2 2,2,1 3,1,1 +1,1,3 2,5,1 6,4,0 +2,1,2 1,8,0 1,6,1 +1,3,1 3,1,4 10,0,0 +1,3,1 1,3,0 7,0,3 +3,1,1 0,8,2 1,6,1 +5,1,1 0,9,1 1,1,4 +3,1,1 2,1,3 0,7,3 +3,1,1 0,5,5 3,0,1 +3,1,1 1,0,7 2,4,0 +2,2,1 2,1,4 2,3,0 +1,2,2 4,2,1 0,3,2 +1,2,3 2,1,2 0,2,2 +2,3,1 1,2,2 2,1,3 +3,1,1 0,3,7 1,1,6 +2,1,4 0,2,2 2,2,1 +1,3,1 2,0,8 0,3,1 +4,2,1 1,0,6 0,2,6 +2,3,1 0,3,1 2,2,0 +1,1,4 0,6,1 1,5,1 +1,1,5 10,0,0 3,2,1 +3,1,1 1,5,2 1,5,2 +4,1,1 0,0,10 1,2,4 +1,1,3 1,9,0 7,0,1 +2,1,2 1,4,2 3,2,1 +2,1,4 3,0,1 2,6,0 +1,1,5 1,4,1 4,1,1 +2,2,1 1,3,2 3,2,0 +2,2,1 3,0,4 0,2,6 +3,1,1 2,2,2 0,8,2 +2,1,2 3,2,1 3,4,0 +1,1,3 3,4,1 1,9,0 +2,4,1 2,1,2 2,0,6 +2,2,2 4,1,0 1,2,2 +3,1,1 0,1,9 2,4,0 +1,1,4 1,1,2 5,5,0 +3,1,1 3,1,0 2,0,4 +1,4,2 4,1,1 4,1,1 +1,2,2 6,1,1 0,1,4 +2,3,1 0,2,4 4,0,2 +3,1,1 3,1,0 0,3,7 +2,1,4 5,0,0 1,4,1 +4,1,1 1,5,1 0,4,6 +2,2,1 1,1,6 3,1,2 +1,3,1 4,2,0 3,1,4 +3,1,1 0,2,8 1,1,6 +3,1,1 1,3,4 2,4,0 +4,1,1 1,3,3 0,6,4 +5,1,1 1,1,4 1,1,4 +1,1,3 3,1,2 2,2,2 +1,3,2 8,0,1 1,3,0 +1,1,5 0,5,1 8,2,0 +1,5,1 8,0,2 2,1,3 +1,3,1 4,2,0 5,0,5 +1,3,1 0,2,4 2,1,5 +1,3,1 4,1,3 3,2,1 +2,3,1 1,1,5 1,2,2 +2,2,1 2,0,6 0,1,8 +3,3,1 0,1,7 2,0,4 +1,3,3 4,0,2 1,1,2 +1,4,1 1,2,1 2,2,0 +4,1,1 1,6,0 1,4,2 +2,2,2 1,2,2 2,1,2 +5,1,1 1,5,0 1,1,4 +3,3,1 2,1,1 0,1,7 +2,1,3 0,1,3 3,1,1 +2,1,3 2,0,2 3,1,1 +2,3,2 1,2,1 1,2,1 +4,1,2 0,8,1 1,2,2 +1,1,3 0,10,0 3,4,1 +4,2,1 0,2,6 2,1,0 +1,4,2 6,1,0 0,2,1 +1,2,3 0,2,2 2,1,2 +2,2,1 3,1,2 3,2,0 +1,1,3 2,2,2 3,1,2 +3,1,1 0,4,6 2,0,4 +1,3,1 4,0,6 0,3,1 +2,1,2 1,8,0 2,4,1 +1,5,1 3,1,2 4,1,1 +1,2,2 0,4,1 4,1,2 +3,1,1 1,1,6 1,0,7 +1,3,1 1,1,6 4,1,3 +3,1,1 2,0,4 1,7,0 +2,1,2 5,0,0 1,2,3 +3,1,2 1,1,3 2,2,1 +2,2,2 0,2,3 1,4,0 +1,1,4 5,1,1 2,4,1 +1,1,3 5,5,0 1,0,3 +3,3,1 2,0,4 0,3,1 +1,1,3 6,1,1 0,4,2 +2,2,2 0,2,3 3,0,2 +2,1,2 5,0,0 2,4,1 +1,1,3 9,1,0 6,1,1 +1,3,1 0,0,10 4,1,3 +1,1,3 1,3,2 4,6,0 +2,2,2 5,0,0 1,1,3 +1,1,3 7,0,1 1,6,1 +3,2,1 1,2,3 2,2,0 +3,1,1 0,4,6 2,1,3 +1,3,1 3,0,7 2,1,5 +2,1,2 0,2,4 4,2,0 +1,1,5 5,0,1 5,5,0 +3,1,1 0,5,5 1,2,5 +1,2,3 10,0,0 5,1,1 +1,4,1 0,1,6 9,0,1 +1,1,5 2,3,1 7,3,0 +1,5,1 2,1,3 0,1,5 +1,3,1 2,1,5 0,3,1 +2,2,2 2,0,3 0,3,2 +2,4,1 3,0,4 3,1,0 +5,1,1 0,2,8 1,3,2 +3,2,1 3,0,1 0,1,8 +1,1,4 5,1,1 7,3,0 +1,3,1 1,3,0 3,1,4 +3,3,1 2,1,1 3,0,1 +1,1,3 6,1,1 1,3,2 +2,1,3 4,2,0 2,0,2 +3,1,1 1,2,5 0,4,6 +2,1,2 0,4,3 2,0,3 +2,1,2 0,8,1 4,2,0 +2,4,1 4,0,2 1,1,4 +1,3,1 6,1,1 3,1,4 +1,2,3 5,1,1 1,0,3 +1,2,4 4,1,1 6,0,1 +4,2,1 0,1,8 1,2,2 +2,2,1 1,4,0 2,0,6 +1,2,3 6,2,0 5,1,1 +3,1,1 1,7,0 0,2,8 +1,3,1 4,1,3 2,0,8 +1,1,3 1,0,3 2,5,1 +1,1,3 3,4,1 1,3,2 +3,1,3 1,4,1 1,1,2 +5,1,1 0,6,4 1,1,4 +2,2,1 0,3,4 1,2,4 +5,1,1 0,3,7 1,2,3 +1,2,3 1,3,1 10,0,0 +2,2,2 1,0,4 3,1,1 +1,2,2 4,0,3 2,3,1 +1,2,3 7,0,1 3,2,1 +1,4,1 3,0,7 0,1,6 +2,1,2 2,4,1 3,0,2 +2,1,3 2,6,0 0,1,3 +3,1,1 0,5,5 1,6,1 +1,5,1 5,1,0 2,0,8 +4,2,1 0,1,8 2,0,2 +2,2,1 0,3,4 4,0,2 +2,2,2 0,4,1 2,0,3 +2,2,2 0,1,4 2,3,0 +3,1,1 1,0,7 1,5,2 +2,1,2 4,2,0 1,0,4 +4,1,2 1,2,2 1,6,0 +2,3,2 4,0,1 1,2,1 +1,2,2 6,1,1 0,4,1 +1,5,1 5,0,5 3,1,2 +2,1,2 0,8,1 3,0,2 +4,1,1 1,2,4 1,0,6 +5,1,1 0,7,3 1,2,3 +2,1,2 4,2,0 0,2,4 +1,2,2 0,1,4 8,0,1 +2,1,4 3,4,0 2,2,1 +4,1,2 1,6,0 2,0,1 +2,1,3 3,4,0 1,5,1 +4,1,2 0,6,2 1,6,0 +1,2,2 2,2,2 2,2,2 +3,1,3 2,4,0 0,1,3 +3,2,1 1,2,3 2,1,2 +1,4,1 9,0,1 0,2,2 +2,2,1 0,3,4 1,0,8 +4,1,1 1,0,6 0,1,9 +2,2,1 3,1,2 1,1,6 +2,2,1 2,2,2 2,1,4 +2,2,2 1,4,0 1,0,4 +4,1,1 2,2,0 1,0,6 +1,3,1 4,2,0 5,1,2 +1,2,4 0,5,0 4,1,1 +2,1,2 1,0,4 1,6,1 +1,1,4 1,5,1 4,6,0 +1,1,4 1,5,1 0,6,1 +3,1,1 1,3,4 1,5,2 +1,5,1 2,1,3 5,0,5 +1,4,1 1,1,5 5,1,1 +1,3,1 0,1,7 5,1,2 +1,2,2 8,0,1 4,1,2 +1,5,1 0,2,0 4,1,1 +3,3,1 0,2,4 1,2,1 +1,4,1 6,1,0 2,1,4 +1,2,4 4,1,1 0,1,2 +3,2,1 1,0,7 2,2,0 +2,1,3 1,5,1 0,10,0 +1,2,2 0,1,4 6,1,1 +1,4,1 8,0,2 2,2,0 +3,1,1 0,3,7 1,3,4 +3,1,2 0,10,0 1,3,2 +1,2,4 0,1,2 2,0,2 +2,1,4 3,4,0 1,4,1 +2,2,2 1,3,1 0,2,3 +1,1,4 0,10,0 5,1,1 +3,1,3 1,7,0 1,4,1 +2,4,1 1,0,8 0,2,2 +1,1,4 4,2,1 1,1,2 +2,1,2 3,2,1 5,0,0 +1,1,3 3,4,1 1,0,3 +1,3,1 9,0,1 0,1,7 +2,3,2 2,2,0 0,2,2 +4,1,1 2,0,2 1,4,2 +1,4,1 7,0,3 4,1,2 +3,1,1 1,7,0 0,4,6 +3,2,2 2,1,1 2,0,2 +2,2,1 1,3,2 3,0,4 +1,1,3 0,10,0 2,2,2 +3,1,1 3,1,0 0,1,9 +1,1,3 3,7,0 3,4,1 +2,2,2 1,0,4 1,1,3 +1,3,1 7,1,0 9,0,1 +1,4,2 2,1,2 2,2,0 +3,1,2 2,0,2 2,2,1 +1,3,1 3,2,1 0,1,7 +1,1,3 2,8,0 4,0,2 +2,3,1 0,1,7 2,0,6 +1,2,2 4,1,2 8,0,1 +1,4,1 0,1,6 6,0,4 +1,1,4 0,2,2 2,8,0 +1,2,4 2,0,2 2,4,0 +3,1,1 1,0,7 1,4,3 +1,4,1 1,2,1 1,1,5 +1,1,3 9,1,0 3,4,1 +2,2,1 1,4,0 2,2,2 +3,1,1 0,1,9 1,5,2 +3,1,1 0,1,9 2,2,2 +1,3,3 4,2,0 1,1,2 +1,1,3 1,0,3 5,5,0 +4,2,1 1,2,2 0,1,8 +1,4,1 4,1,2 0,1,6 +1,3,1 1,1,6 2,2,2 +2,2,2 2,2,1 0,2,3 +2,2,2 1,2,2 2,3,0 +1,1,4 4,2,1 9,1,0 +4,2,1 1,3,0 1,2,2 +4,1,2 1,2,2 1,2,2 +1,4,2 2,1,2 2,0,4 +4,1,1 1,3,3 0,7,3 +3,1,3 2,1,1 0,1,3 +2,1,2 0,4,3 3,4,0 +1,4,1 1,0,9 4,1,2 +5,1,1 0,1,9 1,2,3 +1,1,4 5,1,1 4,6,0 +1,4,2 0,0,5 4,1,1 +1,3,1 0,3,1 2,2,2 +3,1,2 1,1,3 0,2,4 +2,2,3 0,2,2 2,3,0 +2,4,1 0,2,2 1,1,4 +3,1,2 3,1,0 0,8,1 +5,1,1 1,2,3 0,1,9 +4,2,1 1,1,4 0,4,2 +1,5,1 0,0,10 3,1,2 +1,2,2 2,0,4 6,1,1 +1,1,4 3,3,1 8,2,0 +1,2,2 6,0,2 8,1,0 +4,2,1 0,4,2 1,3,0 +2,1,2 0,4,3 2,4,1 +1,4,1 1,1,5 1,1,5 +1,4,1 0,1,6 8,0,2 +2,2,2 4,1,0 2,0,3 +2,4,1 1,2,0 3,0,4 +3,1,1 1,3,4 0,8,2 +3,1,2 2,0,2 1,7,0 +1,4,1 1,2,1 3,1,3 +1,1,3 4,3,1 2,8,0 +4,1,2 0,8,1 2,2,0 +4,2,1 0,3,4 2,0,2 +3,1,1 1,6,1 1,5,2 +2,1,4 3,0,1 1,8,0 +1,1,3 4,0,2 6,4,0 +2,2,1 0,3,4 1,3,2 +4,1,1 1,4,2 0,3,7 +4,2,1 1,2,2 1,0,6 +3,1,2 0,10,0 2,2,1 +3,2,1 2,2,0 1,2,3 +1,3,1 1,2,3 4,2,0 +2,4,1 1,2,0 0,2,2 +3,1,1 2,4,0 2,3,1 +2,1,2 2,4,1 0,0,5 +1,1,3 0,7,1 3,1,2 +2,1,2 2,4,1 2,6,0 +1,1,3 2,5,1 7,0,1 +1,3,1 0,0,10 2,2,2 +2,2,1 2,1,4 5,0,0 +2,3,1 3,1,1 1,0,8 +1,1,3 3,4,1 3,7,0 +1,4,1 5,1,1 1,2,1 +1,4,1 6,1,0 1,2,1 +1,3,2 3,1,2 6,0,2 +1,5,1 3,0,7 2,1,3 +4,1,2 1,2,2 0,0,5 +1,1,4 6,0,1 2,8,0 +2,2,1 1,3,2 2,2,2 +1,1,3 3,1,2 9,1,0 +2,1,4 2,2,1 3,0,1 +2,4,1 2,0,6 3,1,0 +2,2,2 0,2,3 1,0,4 +1,1,3 1,9,0 4,3,1 +4,1,1 1,2,4 0,2,8 +1,1,3 6,1,1 0,10,0 +2,2,1 1,2,4 2,3,0 +4,1,2 1,6,0 1,4,1 +1,2,3 5,1,1 1,3,1 +3,1,1 1,1,6 0,6,4 +1,3,1 1,3,0 1,0,9 +2,2,2 2,2,1 3,0,2 +3,1,2 0,0,5 1,5,1 +1,3,3 4,0,2 4,2,0 +1,2,2 4,2,1 6,1,1 +2,1,2 3,4,0 0,4,3 +3,2,2 0,5,0 2,1,1 +1,5,1 5,1,0 0,1,5 +1,2,2 8,0,1 6,1,1 +2,1,2 1,2,3 2,6,0 +2,1,4 1,4,1 2,2,1 +1,1,3 6,1,1 5,2,1 +1,1,4 2,8,0 0,6,1 +2,1,2 2,2,2 4,0,1 +3,1,3 0,10,0 1,4,1 +1,2,4 2,2,1 10,0,0 +1,3,1 4,2,0 0,1,7 +1,3,2 10,0,0 5,1,1 +2,1,2 3,4,0 0,8,1 +1,4,2 4,1,1 4,0,3 +3,1,2 1,3,2 2,4,0 +2,2,2 1,4,0 0,4,1 +1,1,3 1,0,3 1,9,0 +1,4,1 3,0,7 3,1,3 +2,2,2 3,1,1 2,1,2 +2,1,2 3,2,1 1,6,1 +1,3,3 1,1,2 4,1,1 +1,5,1 6,0,4 3,1,2 +1,3,1 0,1,7 7,1,0 +2,2,1 1,1,6 0,3,4 +1,1,3 1,0,3 1,3,2 +1,2,2 6,1,1 2,0,4 +1,3,2 3,1,2 2,2,1 +2,2,1 1,2,4 2,0,6 +1,4,1 2,2,0 5,1,1 +2,1,3 2,0,2 3,4,0 +2,1,4 1,0,2 0,2,2 +3,1,1 0,9,1 3,1,0 +1,5,1 3,0,7 1,1,4 +1,4,1 1,2,1 9,0,1 +1,4,2 6,1,0 6,0,2 +1,3,2 4,2,0 2,0,4 +3,1,1 0,10,0 1,2,5 +1,3,2 3,1,2 7,1,0 +1,1,4 0,2,2 3,7,0 +2,2,2 4,0,1 2,3,0 +1,1,5 0,5,1 2,3,1 +3,1,1 1,2,5 0,1,9 +1,1,3 3,1,2 10,0,0 +1,1,3 6,4,0 0,4,2 +2,2,1 1,0,8 1,3,2 +4,1,1 1,0,6 1,1,5 +1,1,3 0,1,3 2,5,1 +1,4,1 8,0,2 2,1,4 +1,1,4 7,3,0 1,1,2 +1,3,1 2,2,2 7,1,0 +3,1,1 1,0,7 3,1,0 +2,2,1 3,2,0 1,0,8 +1,3,1 1,1,6 6,1,1 +1,3,3 1,2,1 4,0,2 +3,1,1 0,10,0 1,3,4 +3,1,1 1,7,0 2,2,2 +1,5,1 8,0,2 0,1,5 +2,1,4 2,2,1 1,0,2 +1,4,1 0,2,2 1,0,9 +5,1,1 0,4,6 1,5,0 +1,1,5 8,2,0 1,4,1 +1,2,4 4,1,1 8,1,0 +1,4,1 1,1,5 3,0,7 +5,1,1 0,6,4 1,0,5 +3,1,1 0,0,10 1,1,6 +1,3,1 4,1,3 7,0,3 +1,2,4 2,0,2 8,1,0 +1,1,3 2,2,2 6,1,1 +1,1,3 6,1,1 2,2,2 +1,2,2 6,0,2 2,3,1 +3,3,1 0,0,10 1,2,1 +3,2,1 2,1,2 1,2,3 +1,3,1 8,0,2 7,1,0 +1,2,3 1,0,3 4,3,0 +1,2,2 0,3,2 8,1,0 +2,2,2 1,4,0 1,2,2 +1,4,2 0,2,1 4,0,3 +1,4,1 1,2,1 6,1,0 +1,2,4 4,1,1 6,2,0 +3,2,1 0,0,10 1,3,1 +3,1,1 1,4,3 0,0,10 +2,1,2 3,2,1 3,0,2 +2,2,2 2,3,0 1,3,1 +1,2,2 8,1,0 0,3,2 +1,3,1 2,1,5 3,2,1 +1,1,4 5,5,0 3,3,1 +2,1,2 3,0,2 3,4,0 +1,3,1 7,1,0 6,0,4 +3,3,1 0,3,1 1,1,4 +2,4,1 2,0,6 0,2,2 +1,1,3 2,8,0 3,1,2 +1,1,3 7,0,1 0,7,1 +2,3,1 2,1,3 3,1,1 +1,4,1 0,2,2 4,1,2 +1,1,5 9,1,0 1,4,1 +1,1,4 1,9,0 4,2,1 +3,2,1 0,1,8 1,1,5 +4,1,1 0,4,6 1,3,3 +1,4,1 4,1,2 6,0,4 +3,1,3 0,7,1 1,7,0 +3,1,2 1,5,1 3,1,0 +2,2,1 2,0,6 0,2,6 +2,2,2 0,4,1 1,2,2 +1,4,1 6,0,4 0,2,2 +1,2,2 4,2,1 6,2,0 +3,1,3 1,4,1 2,4,0 +1,2,3 1,3,1 4,3,0 +1,1,5 2,3,1 6,4,0 +2,1,2 1,4,2 3,4,0 +1,1,4 4,2,1 2,8,0 +1,3,1 6,1,1 4,2,0 +1,2,2 4,0,3 0,3,2 +1,3,1 3,0,7 7,1,0 +4,1,1 1,1,5 0,10,0 +1,1,4 1,5,1 1,1,2 +1,1,5 7,3,0 1,4,1 +4,2,1 2,1,0 0,1,8 +1,2,3 2,1,2 2,4,0 +1,2,2 6,1,1 2,2,2 +2,2,2 0,4,1 2,3,0 +1,4,1 3,1,3 5,0,5 +3,2,1 0,4,2 3,0,1 +2,4,1 2,1,2 3,0,4 +2,3,1 2,1,3 3,0,4 +2,3,1 4,0,2 1,2,2 +1,1,5 0,10,0 1,4,1 +1,1,3 3,7,0 6,1,1 +2,3,1 1,2,2 0,3,1 +3,1,1 0,7,3 1,0,7 +1,2,2 0,3,2 4,0,3 +1,4,1 0,1,6 5,0,5 +2,2,2 3,1,1 2,2,1 +2,4,1 1,1,4 3,0,4 +2,1,3 4,2,0 1,5,1 +1,2,2 6,1,1 10,0,0 +4,1,1 0,7,3 1,0,6 +2,1,3 1,8,0 1,2,2 +2,2,2 1,1,3 0,4,1 +1,3,2 2,2,1 8,0,1 +1,4,2 2,2,0 4,1,1 +2,1,2 1,6,1 2,6,0 +1,1,5 1,4,1 10,0,0 +2,2,2 0,1,4 3,1,1 +1,1,4 8,2,0 4,2,1 +3,2,1 1,0,7 0,1,8 +2,2,1 2,3,0 0,3,4 +2,2,1 3,1,2 2,2,2 +3,1,1 1,4,3 1,5,2 +1,1,3 3,1,2 1,3,2 +2,1,3 2,0,2 1,8,0 +1,4,1 3,1,3 1,1,5 +2,1,4 2,2,1 3,4,0 +1,3,1 5,1,2 0,3,1 +2,1,3 3,1,1 1,2,2 +4,2,1 0,2,6 1,0,6 +1,1,3 6,1,1 5,5,0 +2,1,2 1,0,4 4,2,0 +1,4,1 5,0,5 0,1,6 +1,5,1 2,1,3 10,0,0 +1,3,1 7,1,0 4,1,3 +4,2,1 1,2,2 1,1,4 +1,5,1 0,1,5 3,0,7 +2,2,1 0,2,6 1,4,0 +5,1,1 1,5,0 1,2,3 +2,1,2 2,4,1 2,4,1 +2,3,1 0,2,4 2,1,3 +1,2,4 6,2,0 0,1,2 +2,1,3 3,4,0 2,3,1 +3,1,2 0,2,4 1,5,1 +2,1,2 2,0,3 4,2,0 +2,1,2 1,6,1 2,4,1 +2,1,3 1,5,1 2,3,1 +1,3,3 1,1,2 1,0,3 +1,1,3 3,1,2 6,1,1 +2,1,2 5,0,0 3,2,1 +1,1,3 1,9,0 4,0,2 +1,1,3 3,1,2 1,6,1 +4,1,1 1,4,2 0,5,5 +1,3,1 0,0,10 5,1,2 +2,2,1 0,1,8 2,1,4 +1,4,1 1,2,1 0,1,6 +1,2,2 8,1,0 4,0,3 +1,3,1 4,2,0 1,0,9 +1,1,3 1,6,1 0,10,0 +2,2,2 4,1,0 2,1,2 +2,3,1 1,0,8 1,1,5 +3,3,1 1,1,4 1,2,1 +3,1,2 1,7,0 1,1,3 +1,3,1 6,1,1 6,0,4 +1,1,4 4,2,1 1,9,0 +1,4,1 4,0,6 0,1,6 +1,1,4 3,7,0 4,2,1 +3,1,1 1,3,4 1,6,1 +3,1,1 0,1,9 1,0,7 +2,2,2 3,0,2 1,1,3 +2,4,1 0,1,6 1,2,0 +1,1,4 5,1,1 6,0,1 +5,1,1 0,5,5 1,0,5 +2,2,2 0,2,3 2,0,3 +2,1,2 4,2,0 1,2,3 +1,4,1 4,1,2 5,1,1 +1,3,1 5,0,5 1,1,6 +3,1,1 0,4,6 1,1,6 +2,2,2 1,3,1 2,0,3 +3,1,2 2,4,0 0,2,4 +2,2,1 2,2,2 4,1,0 +1,1,4 1,9,0 6,0,1 +1,4,1 6,1,0 4,1,2 +3,2,2 2,1,1 0,1,4 +4,2,1 1,1,4 0,2,6 +4,1,2 2,2,0 0,8,1 +3,1,1 0,2,8 2,1,3 +4,1,1 1,2,4 0,5,5 +5,1,1 1,4,1 1,1,4 +1,3,1 7,0,3 1,2,3 +1,1,3 4,0,2 5,5,0 +2,1,4 4,2,0 2,2,1 +2,2,2 3,2,0 0,2,3 +1,1,3 0,1,3 7,0,1 +2,1,3 1,5,1 1,8,0 +5,1,1 1,5,0 1,0,5 +3,1,1 2,0,4 0,6,4 +4,1,2 1,0,3 1,4,1 +2,1,2 2,4,1 2,2,2 +1,1,3 1,3,2 0,4,2 +1,3,1 1,1,6 3,2,1 +1,4,1 3,0,7 1,1,5 +1,3,1 4,0,6 5,1,2 +3,1,1 2,0,4 0,7,3 +1,4,1 0,1,6 2,0,8 +4,1,1 1,1,5 1,4,2 +3,1,1 0,0,10 1,4,3 +1,2,4 0,3,1 2,4,0 +4,2,1 0,3,4 1,1,4 +3,1,1 0,2,8 2,3,1 +4,2,1 1,2,2 1,2,2 +1,1,4 2,4,1 0,10,0 +1,1,5 5,0,1 1,9,0 +1,2,2 0,4,1 4,3,0 +2,1,3 0,7,1 1,2,2 +3,1,1 0,10,0 2,3,1 +1,3,2 1,3,0 1,1,3 +1,1,5 4,1,1 5,5,0 +1,2,4 6,2,0 6,0,1 +4,1,1 1,6,0 1,1,5 +3,3,1 0,2,4 2,1,1 +1,1,3 3,4,1 0,4,2 +3,1,1 0,6,4 2,3,1 +5,1,1 1,1,4 1,5,0 +4,2,1 0,2,6 1,2,2 +2,1,2 3,2,1 0,6,2 +1,1,3 1,6,1 4,3,1 +1,3,1 0,3,1 2,0,8 +3,1,2 1,3,2 1,3,2 +1,4,1 6,0,4 5,1,1 +1,2,2 2,0,4 0,4,1 +3,2,2 0,1,4 2,0,2 +3,2,1 1,0,7 0,4,2 +2,2,2 2,0,3 3,2,0 +4,1,2 2,2,0 0,4,3 +2,1,2 0,6,2 1,6,1 +2,3,2 0,0,5 1,2,1 +2,1,4 3,4,0 0,2,2 +1,3,1 6,1,1 8,0,2 +2,1,2 1,8,0 4,0,1 +1,1,3 5,5,0 2,5,1 +1,4,2 8,0,1 4,1,1 +1,4,2 0,2,1 6,1,0 +3,1,1 1,6,1 2,2,2 +5,1,1 1,4,1 0,0,10 +3,1,3 2,1,1 0,4,2 +1,2,3 4,0,2 2,1,2 +4,1,1 1,1,5 0,1,9 +1,3,2 5,1,1 2,2,1 +2,2,1 1,1,6 1,3,2 +1,1,3 3,4,1 6,4,0 +1,1,4 2,8,0 1,5,1 +3,1,1 0,5,5 1,1,6 +2,1,2 1,6,1 5,0,0 +1,3,2 1,3,0 2,2,1 +2,2,1 0,2,6 3,1,2 +1,1,4 1,5,1 2,4,1 +3,2,1 0,3,4 1,1,5 +1,2,2 4,0,3 6,2,0 +5,1,1 0,9,1 1,4,1 +1,2,2 4,1,2 0,4,1 +5,1,1 0,1,9 1,1,4 +1,4,1 3,0,7 6,1,0 +1,3,1 8,0,2 2,1,5 +3,1,3 1,1,2 2,1,1 +1,5,1 1,0,9 1,1,4 +1,1,5 2,3,1 8,2,0 +1,1,3 7,3,0 7,0,1 +1,1,3 2,8,0 1,0,3 +4,1,2 1,2,2 0,8,1 +1,5,1 3,1,2 0,0,10 +2,2,1 2,3,0 4,0,2 +1,2,2 0,3,2 2,3,1 +1,1,3 6,1,1 4,6,0 +1,1,5 3,2,1 10,0,0 +1,3,1 0,2,4 4,1,3 +1,4,1 8,0,2 0,2,2 +2,2,1 2,0,6 2,2,2 +1,1,4 8,2,0 6,0,1 +2,2,1 1,4,0 3,1,2 +1,3,1 3,1,4 7,1,0 +1,3,1 4,1,3 3,1,4 +4,1,2 2,0,1 0,8,1 +1,4,2 6,1,0 0,1,3 +1,3,3 4,1,1 4,1,1 +1,1,3 7,3,0 1,0,3 +2,2,2 3,1,1 1,2,2 +1,1,3 5,2,1 3,7,0 +1,1,3 0,4,2 4,0,2 +1,2,4 6,0,1 4,1,1 +2,3,1 3,0,4 1,1,5 +1,3,2 7,1,0 0,2,2 +1,3,3 1,1,2 4,0,2 +1,5,1 4,1,1 3,0,7 +3,1,1 3,0,1 1,2,5 +1,1,5 2,3,1 5,5,0 +3,1,1 0,10,0 1,6,1 +1,4,1 2,1,4 1,0,9 +3,1,1 3,0,1 1,5,2 +1,3,1 3,0,7 1,1,6 +3,1,1 1,5,2 0,8,2 +1,4,1 10,0,0 1,1,5 +3,1,1 1,2,5 3,1,0 +2,2,1 1,0,8 0,3,4 +1,1,3 3,7,0 4,3,1 +1,3,1 7,0,3 0,2,4 +1,1,3 0,7,1 6,4,0 +3,1,1 3,0,1 0,5,5 +3,1,1 0,8,2 1,2,5 +1,2,2 4,3,0 4,2,1 +1,1,3 0,1,3 9,1,0 +2,1,3 0,4,2 3,4,0 +1,1,4 3,3,1 10,0,0 +2,1,2 3,0,2 4,2,0 +1,2,4 0,1,2 8,1,0 +1,2,3 1,0,3 1,3,1 +1,1,4 8,2,0 0,2,2 +2,1,2 0,10,0 1,6,1 +1,3,1 6,1,1 1,1,6 +1,1,3 2,5,1 10,0,0 +2,1,4 2,6,0 2,2,1 +3,1,1 3,1,0 1,3,4 +2,2,2 0,2,3 2,1,2 +1,1,3 0,10,0 5,2,1 +2,2,2 0,1,4 1,4,0 +3,1,3 0,1,3 2,4,0 +1,1,4 8,2,0 0,6,1 +2,2,1 2,1,4 1,4,0 +1,3,1 0,2,4 4,0,6 +1,3,1 6,0,4 4,1,3 +1,3,1 6,1,1 0,3,1 +4,1,1 1,5,1 2,0,2 +3,1,1 1,6,1 0,7,3 +1,3,1 4,1,3 2,2,2 +3,1,2 2,4,0 1,1,3 +2,1,2 2,0,3 1,4,2 +2,2,2 1,1,3 2,3,0 +1,3,2 4,0,3 0,2,2 +1,3,1 0,3,1 4,1,3 +2,1,2 2,2,2 0,6,2 +1,4,1 2,2,0 1,1,5 +4,1,1 1,5,1 1,1,5 +2,2,2 2,1,2 5,0,0 +4,2,1 0,4,2 1,1,4 +2,2,2 4,0,1 1,3,1 +3,1,2 1,1,3 1,7,0 +2,3,2 2,2,0 1,2,1 +2,1,3 3,4,0 1,2,2 +2,3,1 3,0,4 2,1,3 +1,5,1 0,1,5 1,1,4 +3,1,1 0,4,6 1,2,5 +1,2,2 4,1,2 6,2,0 +1,1,3 7,0,1 9,1,0 +1,1,5 2,3,1 1,4,1 +4,1,1 1,6,0 0,9,1 +1,2,2 4,2,1 2,4,0 +1,1,4 4,6,0 0,6,1 +2,4,1 3,0,4 1,2,0 +1,1,4 5,5,0 2,4,1 +1,1,3 0,4,2 9,1,0 +1,1,4 1,1,2 1,5,1 +1,5,1 1,0,9 4,1,1 +2,2,1 1,3,2 4,0,2 +2,1,2 1,6,1 0,0,5 +1,2,4 2,4,0 2,0,2 +2,2,2 1,0,4 0,3,2 +1,3,2 3,1,2 1,1,3 +1,4,1 1,2,1 2,0,8 +4,1,1 1,1,5 1,5,1 +2,2,2 1,2,2 1,2,2 +3,1,1 1,4,3 1,3,4 +4,1,1 1,0,6 2,2,0 +1,1,4 4,2,1 6,0,1 +1,2,2 8,1,0 6,1,1 +1,2,3 3,2,1 4,3,0 +1,3,2 4,0,3 1,1,3 +2,1,2 1,2,3 0,6,2 +1,3,1 2,2,2 1,0,9 +1,2,2 6,1,1 6,1,1 +2,1,3 0,10,0 3,1,1 +1,2,4 4,3,0 0,1,2 +1,1,4 1,1,2 8,2,0 +3,3,1 1,1,4 1,0,7 +2,2,1 0,2,6 4,0,2 +3,1,1 1,3,4 0,3,7 +1,2,2 6,2,0 4,1,2 +4,1,1 1,3,3 1,3,3 +1,3,2 1,3,0 2,0,4 +1,1,4 2,0,2 0,2,2 +4,1,1 1,2,4 0,10,0 +3,1,1 1,4,3 1,4,3 +3,2,1 2,1,2 2,0,4 +1,5,1 0,1,5 2,1,3 +2,1,3 1,8,0 0,1,3 +3,1,3 2,4,0 1,1,2 +3,2,2 2,0,2 0,2,3 +4,1,1 2,2,0 0,2,8 +4,2,1 1,2,2 0,3,4 +3,2,1 2,0,4 1,3,1 +2,2,2 1,2,2 1,4,0 +2,1,4 4,2,0 0,6,1 +1,1,3 3,7,0 1,6,1 +1,1,4 1,9,0 1,1,2 +4,1,1 2,0,2 0,1,9 +1,4,2 0,1,3 2,2,0 +3,1,1 0,2,8 2,2,2 +2,1,2 2,4,1 0,2,4 +1,2,3 7,0,1 5,1,1 +1,4,2 8,0,1 6,1,0 +3,1,1 0,8,2 1,3,4 +1,3,3 1,0,3 1,3,0 +2,2,2 3,1,1 0,0,5 +1,1,4 2,8,0 1,1,2 +2,1,3 1,8,0 3,1,1 +1,3,1 10,0,0 1,1,6 +1,2,3 1,0,3 2,1,2 +1,2,2 4,0,3 4,2,1 +5,1,1 1,2,3 1,4,1 +1,5,1 4,1,1 10,0,0 +2,2,1 2,1,4 2,1,4 +3,1,1 0,10,0 1,1,6 +1,4,1 4,0,6 3,1,3 +3,2,2 2,1,1 0,2,3 +1,5,1 2,1,3 4,1,1 +4,1,1 0,2,8 1,6,0 +1,3,1 0,3,1 2,1,5 +2,2,2 2,0,3 0,1,4 +3,2,1 0,2,6 2,0,4 +1,3,1 0,1,7 6,1,1 +4,1,1 0,1,9 1,0,6 +1,1,5 0,5,1 9,1,0 +2,2,1 4,1,0 3,0,4 +3,1,1 3,1,0 0,6,4 +1,3,1 3,2,1 6,1,1 +3,1,1 1,6,1 1,0,7 +1,3,1 1,3,0 5,1,2 +3,1,1 2,3,1 3,1,0 +1,1,4 9,1,0 1,5,1 +1,2,2 2,1,3 0,3,2 +4,1,2 0,8,1 1,4,1 +2,1,2 3,2,1 1,4,2 +1,3,1 0,2,4 4,2,0 +4,2,1 0,5,0 1,1,4 +1,1,3 1,0,3 6,1,1 +1,2,4 2,2,1 4,1,1 +1,1,3 2,8,0 2,5,1 +1,1,5 5,5,0 2,3,1 +1,3,1 1,0,9 7,1,0 +1,2,3 5,1,1 7,0,1 +1,1,5 0,5,1 5,0,1 +1,2,2 8,0,1 2,3,1 +5,1,1 0,9,1 1,5,0 +3,1,2 1,3,2 0,10,0 +3,1,2 1,5,1 0,4,3 +1,1,3 6,1,1 6,1,1 +1,1,3 1,6,1 3,7,0 +2,2,1 2,2,2 3,0,4 +1,3,1 1,0,9 0,1,7 +4,1,1 1,0,6 0,6,4 +1,4,1 1,1,5 4,1,2 +1,2,2 2,2,2 0,0,5 +4,1,1 2,1,1 0,10,0 +4,2,1 2,0,2 1,1,4 +2,3,1 2,1,3 0,0,10 +1,1,4 2,8,0 2,0,2 +3,1,1 1,1,6 1,4,3 +2,2,1 0,3,4 3,0,4 +3,1,1 3,0,1 1,7,0 +1,2,3 6,2,0 1,3,1 +3,2,1 0,4,2 1,1,5 +1,2,4 4,3,0 2,2,1 +1,3,1 0,2,4 6,1,1 +1,3,1 1,2,3 3,2,1 +3,3,1 1,2,1 0,3,1 +1,2,4 6,0,1 2,4,0 +1,2,2 6,0,2 4,3,0 +2,1,3 2,3,1 3,4,0 +2,1,2 1,0,4 2,6,0 +2,3,1 5,0,0 1,1,5 +1,1,3 1,6,1 10,0,0 +4,2,1 2,0,2 0,4,2 +3,1,1 1,2,5 1,4,3 +3,3,1 0,0,10 1,1,4 +1,3,1 5,1,2 5,1,2 +1,4,1 2,1,4 6,1,0 +1,1,4 7,3,0 2,4,1 +1,1,3 4,0,2 9,1,0)" +// MSVC can't handle string longer than 16,384 bytes +R"( +2,4,1 1,0,8 1,1,4 +1,4,1 3,1,3 6,1,0 +1,1,5 2,3,1 10,0,0 +1,2,3 8,1,0 1,3,1 +1,3,2 6,0,2 5,1,1 +2,2,2 0,3,2 2,1,2 +2,1,3 2,0,2 4,2,0 +1,3,3 1,2,1 10,0,0 +3,1,2 3,1,0 0,2,4 +1,5,1 4,1,1 5,0,5 +2,2,1 2,0,6 3,1,2 +4,1,2 0,0,5 1,2,2 +2,3,1 2,1,3 0,1,7 +2,2,1 0,3,4 2,3,0 +2,1,2 2,2,2 1,2,3 +1,3,1 10,0,0 1,2,3 +1,3,1 1,0,9 5,1,2 +1,2,2 6,0,2 2,2,2 +1,1,5 1,4,1 3,2,1 +2,1,2 1,8,0 0,2,4 +2,3,1 0,0,10 3,1,1 +1,3,2 2,2,1 4,0,3 +1,3,1 9,0,1 2,2,2 +1,2,4 10,0,0 2,2,1 +1,2,2 10,0,0 6,1,1 +2,1,3 1,2,2 4,2,0 +1,4,1 1,1,5 3,1,3 +3,1,1 2,2,2 1,6,1 +5,1,1 1,5,0 0,3,7 +3,2,2 0,2,3 2,1,1 +1,3,1 5,1,2 0,2,4 +2,2,2 4,0,1 3,2,0 +2,1,2 5,0,0 2,2,2 +1,2,2 8,1,0 2,0,4 +3,1,2 0,8,1 1,7,0 +1,1,3 1,0,3 3,4,1 +1,2,4 2,2,1 2,4,0 +2,1,4 0,2,2 2,6,0 +1,1,3 0,4,2 1,9,0 +2,2,1 2,1,4 3,2,0 +1,2,4 2,2,1 0,3,1 +1,3,1 3,2,1 7,0,3 +4,1,1 0,3,7 2,0,2 +3,1,3 1,1,2 0,7,1 +2,3,1 1,2,2 5,0,0 +1,2,2 2,4,0 2,3,1 +1,3,3 1,3,0 1,0,3 +1,1,3 5,2,1 8,2,0 +1,2,3 4,3,0 3,2,1 +1,2,4 8,1,0 2,2,1 +1,1,3 0,10,0 6,1,1 +2,2,1 1,4,0 1,2,4 +1,3,1 1,0,9 2,1,5 +2,1,2 0,10,0 1,4,2 +1,1,3 0,7,1 2,5,1 +1,4,1 2,0,8 4,1,2 +3,2,1 0,5,0 2,1,2 +2,1,3 3,1,1 0,4,2 +1,1,5 5,0,1 9,1,0 +1,3,1 5,0,5 1,2,3 +2,4,1 1,2,0 2,1,2 +2,1,2 4,0,1 2,2,2 +3,1,2 0,8,1 1,5,1 +1,2,3 0,2,2 1,0,3 +1,5,1 1,1,4 5,0,5 +2,3,1 3,1,1 0,3,1 +2,2,1 4,0,2 1,1,6 +2,1,2 1,8,0 3,2,1 +1,2,3 2,1,2 1,3,1 +1,1,5 4,1,1 4,6,0 +2,4,1 2,1,2 4,0,2 +1,4,1 9,0,1 6,1,0 +4,1,2 0,6,2 1,2,2 +2,1,2 0,6,2 1,4,2 +4,2,1 0,4,2 2,0,2 +1,1,3 10,0,0 1,6,1 +1,3,1 0,0,10 6,1,1 +3,2,1 1,3,1 2,1,2 +1,1,3 1,0,3 2,2,2 +1,2,2 4,3,0 2,0,4 +3,1,1 1,3,4 0,0,10 +1,3,1 7,1,0 1,1,6 +1,3,1 3,2,1 7,1,0 +1,2,3 1,3,1 0,2,2 +3,1,3 3,1,0 0,4,2 +3,2,1 1,1,5 0,4,2 +1,2,3 0,5,0 5,1,1 +4,1,1 0,6,4 1,4,2 +3,1,1 3,1,0 2,2,2 +1,1,3 5,2,1 3,1,2 +4,1,1 2,0,2 0,3,7 +2,2,1 3,2,0 1,2,4 +2,3,1 4,0,2 2,1,3 +1,1,3 0,4,2 5,2,1 +4,1,2 2,0,1 0,6,2 +1,1,3 6,4,0 4,0,2 +2,2,1 2,3,0 2,0,6 +2,2,1 0,4,2 3,2,0 +3,1,1 1,2,5 0,9,1 +4,2,1 0,3,4 2,1,0 +1,2,2 2,4,0 6,1,1 +1,5,1 3,1,2 4,0,6 +)"; + +const char* BargainingInstances1000() { + return kDefaultInstances1000; +} + +} // namespace bargaining +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining_instances1000.txt b/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining_instances1000.txt new file mode 100644 index 0000000..d4442c5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining_instances1000.txt @@ -0,0 +1,1000 @@ +1,2,3 8,1,0 4,0,2 +1,4,1 4,1,2 2,2,0 +2,2,1 1,1,6 0,4,2 +1,4,1 9,0,1 2,2,0 +1,4,1 5,1,1 0,1,6 +4,1,1 2,1,1 1,0,6 +3,1,1 1,4,3 0,2,8 +1,1,3 0,1,3 1,3,2 +1,3,1 2,2,2 10,0,0 +1,2,2 2,3,1 4,0,3 +1,4,1 6,1,0 8,0,2 +1,1,3 7,3,0 0,4,2 +1,5,1 4,0,6 3,1,2 +3,3,1 3,0,1 0,2,4 +1,2,3 8,1,0 7,0,1 +4,1,2 0,6,2 2,2,0 +2,1,2 3,2,1 4,2,0 +1,3,1 4,2,0 8,0,2 +2,1,3 3,1,1 0,10,0 +1,3,1 6,1,1 4,1,3 +2,2,1 3,0,4 2,1,4 +3,3,1 1,1,4 3,0,1 +1,2,3 0,5,0 3,2,1 +1,3,1 1,2,3 3,1,4 +4,1,1 0,0,10 1,3,3 +2,4,1 2,1,2 2,1,2 +4,1,2 1,6,0 1,2,2 +1,1,4 4,2,1 4,6,0 +1,5,1 2,0,8 5,1,0 +1,3,1 0,1,7 6,0,4 +1,1,4 4,6,0 0,2,2 +1,1,5 3,2,1 2,8,0 +1,3,2 7,1,0 4,0,3 +2,1,3 1,2,2 2,3,1 +1,3,1 0,1,7 7,0,3 +1,3,1 2,2,2 1,2,3 +1,5,1 9,0,1 0,1,5 +4,1,1 0,4,6 1,5,1 +2,2,1 0,2,6 4,1,0 +3,1,1 2,1,3 0,6,4 +1,1,3 10,0,0 1,3,2 +3,2,1 2,1,2 1,3,1 +1,3,1 5,1,2 3,0,7 +1,4,1 1,2,1 3,0,7 +4,2,1 1,3,0 0,3,4 +2,2,1 1,3,2 5,0,0 +1,3,1 4,2,0 1,1,6 +1,1,3 6,1,1 0,1,3 +2,1,2 3,4,0 3,2,1 +1,4,1 2,1,4 9,0,1 +2,2,2 0,3,2 1,3,1 +3,3,1 0,2,4 1,0,7 +3,1,1 1,0,7 0,8,2 +4,1,1 1,4,2 2,1,1 +1,3,1 0,0,10 1,1,6 +2,2,1 3,0,4 2,3,0 +2,2,2 2,3,0 0,4,1 +2,1,2 3,4,0 1,2,3 +3,1,1 2,2,2 0,2,8 +1,2,2 4,0,3 2,1,3 +2,2,2 2,1,2 2,2,1 +2,2,2 1,1,3 0,5,0 +3,1,1 1,2,5 1,0,7 +1,1,5 3,2,1 8,2,0 +3,3,1 2,1,1 2,1,1 +2,1,4 1,8,0 3,0,1 +1,2,2 6,1,1 8,1,0 +1,1,3 1,3,2 0,10,0 +1,3,1 1,2,3 3,0,7 +2,1,2 2,2,2 1,8,0 +1,4,2 10,0,0 2,1,2 +1,4,1 5,1,1 2,0,8 +3,1,1 2,4,0 3,0,1 +2,2,2 2,2,1 3,1,1 +1,1,3 2,5,1 6,4,0 +2,1,2 1,8,0 1,6,1 +1,3,1 3,1,4 10,0,0 +1,3,1 1,3,0 7,0,3 +3,1,1 0,8,2 1,6,1 +5,1,1 0,9,1 1,1,4 +3,1,1 2,1,3 0,7,3 +3,1,1 0,5,5 3,0,1 +3,1,1 1,0,7 2,4,0 +2,2,1 2,1,4 2,3,0 +1,2,2 4,2,1 0,3,2 +1,2,3 2,1,2 0,2,2 +2,3,1 1,2,2 2,1,3 +3,1,1 0,3,7 1,1,6 +2,1,4 0,2,2 2,2,1 +1,3,1 2,0,8 0,3,1 +4,2,1 1,0,6 0,2,6 +2,3,1 0,3,1 2,2,0 +1,1,4 0,6,1 1,5,1 +1,1,5 10,0,0 3,2,1 +3,1,1 1,5,2 1,5,2 +4,1,1 0,0,10 1,2,4 +1,1,3 1,9,0 7,0,1 +2,1,2 1,4,2 3,2,1 +2,1,4 3,0,1 2,6,0 +1,1,5 1,4,1 4,1,1 +2,2,1 1,3,2 3,2,0 +2,2,1 3,0,4 0,2,6 +3,1,1 2,2,2 0,8,2 +2,1,2 3,2,1 3,4,0 +1,1,3 3,4,1 1,9,0 +2,4,1 2,1,2 2,0,6 +2,2,2 4,1,0 1,2,2 +3,1,1 0,1,9 2,4,0 +1,1,4 1,1,2 5,5,0 +3,1,1 3,1,0 2,0,4 +1,4,2 4,1,1 4,1,1 +1,2,2 6,1,1 0,1,4 +2,3,1 0,2,4 4,0,2 +3,1,1 3,1,0 0,3,7 +2,1,4 5,0,0 1,4,1 +4,1,1 1,5,1 0,4,6 +2,2,1 1,1,6 3,1,2 +1,3,1 4,2,0 3,1,4 +3,1,1 0,2,8 1,1,6 +3,1,1 1,3,4 2,4,0 +4,1,1 1,3,3 0,6,4 +5,1,1 1,1,4 1,1,4 +1,1,3 3,1,2 2,2,2 +1,3,2 8,0,1 1,3,0 +1,1,5 0,5,1 8,2,0 +1,5,1 8,0,2 2,1,3 +1,3,1 4,2,0 5,0,5 +1,3,1 0,2,4 2,1,5 +1,3,1 4,1,3 3,2,1 +2,3,1 1,1,5 1,2,2 +2,2,1 2,0,6 0,1,8 +3,3,1 0,1,7 2,0,4 +1,3,3 4,0,2 1,1,2 +1,4,1 1,2,1 2,2,0 +4,1,1 1,6,0 1,4,2 +2,2,2 1,2,2 2,1,2 +5,1,1 1,5,0 1,1,4 +3,3,1 2,1,1 0,1,7 +2,1,3 0,1,3 3,1,1 +2,1,3 2,0,2 3,1,1 +2,3,2 1,2,1 1,2,1 +4,1,2 0,8,1 1,2,2 +1,1,3 0,10,0 3,4,1 +4,2,1 0,2,6 2,1,0 +1,4,2 6,1,0 0,2,1 +1,2,3 0,2,2 2,1,2 +2,2,1 3,1,2 3,2,0 +1,1,3 2,2,2 3,1,2 +3,1,1 0,4,6 2,0,4 +1,3,1 4,0,6 0,3,1 +2,1,2 1,8,0 2,4,1 +1,5,1 3,1,2 4,1,1 +1,2,2 0,4,1 4,1,2 +3,1,1 1,1,6 1,0,7 +1,3,1 1,1,6 4,1,3 +3,1,1 2,0,4 1,7,0 +2,1,2 5,0,0 1,2,3 +3,1,2 1,1,3 2,2,1 +2,2,2 0,2,3 1,4,0 +1,1,4 5,1,1 2,4,1 +1,1,3 5,5,0 1,0,3 +3,3,1 2,0,4 0,3,1 +1,1,3 6,1,1 0,4,2 +2,2,2 0,2,3 3,0,2 +2,1,2 5,0,0 2,4,1 +1,1,3 9,1,0 6,1,1 +1,3,1 0,0,10 4,1,3 +1,1,3 1,3,2 4,6,0 +2,2,2 5,0,0 1,1,3 +1,1,3 7,0,1 1,6,1 +3,2,1 1,2,3 2,2,0 +3,1,1 0,4,6 2,1,3 +1,3,1 3,0,7 2,1,5 +2,1,2 0,2,4 4,2,0 +1,1,5 5,0,1 5,5,0 +3,1,1 0,5,5 1,2,5 +1,2,3 10,0,0 5,1,1 +1,4,1 0,1,6 9,0,1 +1,1,5 2,3,1 7,3,0 +1,5,1 2,1,3 0,1,5 +1,3,1 2,1,5 0,3,1 +2,2,2 2,0,3 0,3,2 +2,4,1 3,0,4 3,1,0 +5,1,1 0,2,8 1,3,2 +3,2,1 3,0,1 0,1,8 +1,1,4 5,1,1 7,3,0 +1,3,1 1,3,0 3,1,4 +3,3,1 2,1,1 3,0,1 +1,1,3 6,1,1 1,3,2 +2,1,3 4,2,0 2,0,2 +3,1,1 1,2,5 0,4,6 +2,1,2 0,4,3 2,0,3 +2,1,2 0,8,1 4,2,0 +2,4,1 4,0,2 1,1,4 +1,3,1 6,1,1 3,1,4 +1,2,3 5,1,1 1,0,3 +1,2,4 4,1,1 6,0,1 +4,2,1 0,1,8 1,2,2 +2,2,1 1,4,0 2,0,6 +1,2,3 6,2,0 5,1,1 +3,1,1 1,7,0 0,2,8 +1,3,1 4,1,3 2,0,8 +1,1,3 1,0,3 2,5,1 +1,1,3 3,4,1 1,3,2 +3,1,3 1,4,1 1,1,2 +5,1,1 0,6,4 1,1,4 +2,2,1 0,3,4 1,2,4 +5,1,1 0,3,7 1,2,3 +1,2,3 1,3,1 10,0,0 +2,2,2 1,0,4 3,1,1 +1,2,2 4,0,3 2,3,1 +1,2,3 7,0,1 3,2,1 +1,4,1 3,0,7 0,1,6 +2,1,2 2,4,1 3,0,2 +2,1,3 2,6,0 0,1,3 +3,1,1 0,5,5 1,6,1 +1,5,1 5,1,0 2,0,8 +4,2,1 0,1,8 2,0,2 +2,2,1 0,3,4 4,0,2 +2,2,2 0,4,1 2,0,3 +2,2,2 0,1,4 2,3,0 +3,1,1 1,0,7 1,5,2 +2,1,2 4,2,0 1,0,4 +4,1,2 1,2,2 1,6,0 +2,3,2 4,0,1 1,2,1 +1,2,2 6,1,1 0,4,1 +1,5,1 5,0,5 3,1,2 +2,1,2 0,8,1 3,0,2 +4,1,1 1,2,4 1,0,6 +5,1,1 0,7,3 1,2,3 +2,1,2 4,2,0 0,2,4 +1,2,2 0,1,4 8,0,1 +2,1,4 3,4,0 2,2,1 +4,1,2 1,6,0 2,0,1 +2,1,3 3,4,0 1,5,1 +4,1,2 0,6,2 1,6,0 +1,2,2 2,2,2 2,2,2 +3,1,3 2,4,0 0,1,3 +3,2,1 1,2,3 2,1,2 +1,4,1 9,0,1 0,2,2 +2,2,1 0,3,4 1,0,8 +4,1,1 1,0,6 0,1,9 +2,2,1 3,1,2 1,1,6 +2,2,1 2,2,2 2,1,4 +2,2,2 1,4,0 1,0,4 +4,1,1 2,2,0 1,0,6 +1,3,1 4,2,0 5,1,2 +1,2,4 0,5,0 4,1,1 +2,1,2 1,0,4 1,6,1 +1,1,4 1,5,1 4,6,0 +1,1,4 1,5,1 0,6,1 +3,1,1 1,3,4 1,5,2 +1,5,1 2,1,3 5,0,5 +1,4,1 1,1,5 5,1,1 +1,3,1 0,1,7 5,1,2 +1,2,2 8,0,1 4,1,2 +1,5,1 0,2,0 4,1,1 +3,3,1 0,2,4 1,2,1 +1,4,1 6,1,0 2,1,4 +1,2,4 4,1,1 0,1,2 +3,2,1 1,0,7 2,2,0 +2,1,3 1,5,1 0,10,0 +1,2,2 0,1,4 6,1,1 +1,4,1 8,0,2 2,2,0 +3,1,1 0,3,7 1,3,4 +3,1,2 0,10,0 1,3,2 +1,2,4 0,1,2 2,0,2 +2,1,4 3,4,0 1,4,1 +2,2,2 1,3,1 0,2,3 +1,1,4 0,10,0 5,1,1 +3,1,3 1,7,0 1,4,1 +2,4,1 1,0,8 0,2,2 +1,1,4 4,2,1 1,1,2 +2,1,2 3,2,1 5,0,0 +1,1,3 3,4,1 1,0,3 +1,3,1 9,0,1 0,1,7 +2,3,2 2,2,0 0,2,2 +4,1,1 2,0,2 1,4,2 +1,4,1 7,0,3 4,1,2 +3,1,1 1,7,0 0,4,6 +3,2,2 2,1,1 2,0,2 +2,2,1 1,3,2 3,0,4 +1,1,3 0,10,0 2,2,2 +3,1,1 3,1,0 0,1,9 +1,1,3 3,7,0 3,4,1 +2,2,2 1,0,4 1,1,3 +1,3,1 7,1,0 9,0,1 +1,4,2 2,1,2 2,2,0 +3,1,2 2,0,2 2,2,1 +1,3,1 3,2,1 0,1,7 +1,1,3 2,8,0 4,0,2 +2,3,1 0,1,7 2,0,6 +1,2,2 4,1,2 8,0,1 +1,4,1 0,1,6 6,0,4 +1,1,4 0,2,2 2,8,0 +1,2,4 2,0,2 2,4,0 +3,1,1 1,0,7 1,4,3 +1,4,1 1,2,1 1,1,5 +1,1,3 9,1,0 3,4,1 +2,2,1 1,4,0 2,2,2 +3,1,1 0,1,9 1,5,2 +3,1,1 0,1,9 2,2,2 +1,3,3 4,2,0 1,1,2 +1,1,3 1,0,3 5,5,0 +4,2,1 1,2,2 0,1,8 +1,4,1 4,1,2 0,1,6 +1,3,1 1,1,6 2,2,2 +2,2,2 2,2,1 0,2,3 +2,2,2 1,2,2 2,3,0 +1,1,4 4,2,1 9,1,0 +4,2,1 1,3,0 1,2,2 +4,1,2 1,2,2 1,2,2 +1,4,2 2,1,2 2,0,4 +4,1,1 1,3,3 0,7,3 +3,1,3 2,1,1 0,1,3 +2,1,2 0,4,3 3,4,0 +1,4,1 1,0,9 4,1,2 +5,1,1 0,1,9 1,2,3 +1,1,4 5,1,1 4,6,0 +1,4,2 0,0,5 4,1,1 +1,3,1 0,3,1 2,2,2 +3,1,2 1,1,3 0,2,4 +2,2,3 0,2,2 2,3,0 +2,4,1 0,2,2 1,1,4 +3,1,2 3,1,0 0,8,1 +5,1,1 1,2,3 0,1,9 +4,2,1 1,1,4 0,4,2 +1,5,1 0,0,10 3,1,2 +1,2,2 2,0,4 6,1,1 +1,1,4 3,3,1 8,2,0 +1,2,2 6,0,2 8,1,0 +4,2,1 0,4,2 1,3,0 +2,1,2 0,4,3 2,4,1 +1,4,1 1,1,5 1,1,5 +1,4,1 0,1,6 8,0,2 +2,2,2 4,1,0 2,0,3 +2,4,1 1,2,0 3,0,4 +3,1,1 1,3,4 0,8,2 +3,1,2 2,0,2 1,7,0 +1,4,1 1,2,1 3,1,3 +1,1,3 4,3,1 2,8,0 +4,1,2 0,8,1 2,2,0 +4,2,1 0,3,4 2,0,2 +3,1,1 1,6,1 1,5,2 +2,1,4 3,0,1 1,8,0 +1,1,3 4,0,2 6,4,0 +2,2,1 0,3,4 1,3,2 +4,1,1 1,4,2 0,3,7 +4,2,1 1,2,2 1,0,6 +3,1,2 0,10,0 2,2,1 +3,2,1 2,2,0 1,2,3 +1,3,1 1,2,3 4,2,0 +2,4,1 1,2,0 0,2,2 +3,1,1 2,4,0 2,3,1 +2,1,2 2,4,1 0,0,5 +1,1,3 0,7,1 3,1,2 +2,1,2 2,4,1 2,6,0 +1,1,3 2,5,1 7,0,1 +1,3,1 0,0,10 2,2,2 +2,2,1 2,1,4 5,0,0 +2,3,1 3,1,1 1,0,8 +1,1,3 3,4,1 3,7,0 +1,4,1 5,1,1 1,2,1 +1,4,1 6,1,0 1,2,1 +1,3,2 3,1,2 6,0,2 +1,5,1 3,0,7 2,1,3 +4,1,2 1,2,2 0,0,5 +1,1,4 6,0,1 2,8,0 +2,2,1 1,3,2 2,2,2 +1,1,3 3,1,2 9,1,0 +2,1,4 2,2,1 3,0,1 +2,4,1 2,0,6 3,1,0 +2,2,2 0,2,3 1,0,4 +1,1,3 1,9,0 4,3,1 +4,1,1 1,2,4 0,2,8 +1,1,3 6,1,1 0,10,0 +2,2,1 1,2,4 2,3,0 +4,1,2 1,6,0 1,4,1 +1,2,3 5,1,1 1,3,1 +3,1,1 1,1,6 0,6,4 +1,3,1 1,3,0 1,0,9 +2,2,2 2,2,1 3,0,2 +3,1,2 0,0,5 1,5,1 +1,3,3 4,0,2 4,2,0 +1,2,2 4,2,1 6,1,1 +2,1,2 3,4,0 0,4,3 +3,2,2 0,5,0 2,1,1 +1,5,1 5,1,0 0,1,5 +1,2,2 8,0,1 6,1,1 +2,1,2 1,2,3 2,6,0 +2,1,4 1,4,1 2,2,1 +1,1,3 6,1,1 5,2,1 +1,1,4 2,8,0 0,6,1 +2,1,2 2,2,2 4,0,1 +3,1,3 0,10,0 1,4,1 +1,2,4 2,2,1 10,0,0 +1,3,1 4,2,0 0,1,7 +1,3,2 10,0,0 5,1,1 +2,1,2 3,4,0 0,8,1 +1,4,2 4,1,1 4,0,3 +3,1,2 1,3,2 2,4,0 +2,2,2 1,4,0 0,4,1 +1,1,3 1,0,3 1,9,0 +1,4,1 3,0,7 3,1,3 +2,2,2 3,1,1 2,1,2 +2,1,2 3,2,1 1,6,1 +1,3,3 1,1,2 4,1,1 +1,5,1 6,0,4 3,1,2 +1,3,1 0,1,7 7,1,0 +2,2,1 1,1,6 0,3,4 +1,1,3 1,0,3 1,3,2 +1,2,2 6,1,1 2,0,4 +1,3,2 3,1,2 2,2,1 +2,2,1 1,2,4 2,0,6 +1,4,1 2,2,0 5,1,1 +2,1,3 2,0,2 3,4,0 +2,1,4 1,0,2 0,2,2 +3,1,1 0,9,1 3,1,0 +1,5,1 3,0,7 1,1,4 +1,4,1 1,2,1 9,0,1 +1,4,2 6,1,0 6,0,2 +1,3,2 4,2,0 2,0,4 +3,1,1 0,10,0 1,2,5 +1,3,2 3,1,2 7,1,0 +1,1,4 0,2,2 3,7,0 +2,2,2 4,0,1 2,3,0 +1,1,5 0,5,1 2,3,1 +3,1,1 1,2,5 0,1,9 +1,1,3 3,1,2 10,0,0 +1,1,3 6,4,0 0,4,2 +2,2,1 1,0,8 1,3,2 +4,1,1 1,0,6 1,1,5 +1,1,3 0,1,3 2,5,1 +1,4,1 8,0,2 2,1,4 +1,1,4 7,3,0 1,1,2 +1,3,1 2,2,2 7,1,0 +3,1,1 1,0,7 3,1,0 +2,2,1 3,2,0 1,0,8 +1,3,1 1,1,6 6,1,1 +1,3,3 1,2,1 4,0,2 +3,1,1 0,10,0 1,3,4 +3,1,1 1,7,0 2,2,2 +1,5,1 8,0,2 0,1,5 +2,1,4 2,2,1 1,0,2 +1,4,1 0,2,2 1,0,9 +5,1,1 0,4,6 1,5,0 +1,1,5 8,2,0 1,4,1 +1,2,4 4,1,1 8,1,0 +1,4,1 1,1,5 3,0,7 +5,1,1 0,6,4 1,0,5 +3,1,1 0,0,10 1,1,6 +1,3,1 4,1,3 7,0,3 +1,2,4 2,0,2 8,1,0 +1,1,3 2,2,2 6,1,1 +1,1,3 6,1,1 2,2,2 +1,2,2 6,0,2 2,3,1 +3,3,1 0,0,10 1,2,1 +3,2,1 2,1,2 1,2,3 +1,3,1 8,0,2 7,1,0 +1,2,3 1,0,3 4,3,0 +1,2,2 0,3,2 8,1,0 +2,2,2 1,4,0 1,2,2 +1,4,2 0,2,1 4,0,3 +1,4,1 1,2,1 6,1,0 +1,2,4 4,1,1 6,2,0 +3,2,1 0,0,10 1,3,1 +3,1,1 1,4,3 0,0,10 +2,1,2 3,2,1 3,0,2 +2,2,2 2,3,0 1,3,1 +1,2,2 8,1,0 0,3,2 +1,3,1 2,1,5 3,2,1 +1,1,4 5,5,0 3,3,1 +2,1,2 3,0,2 3,4,0 +1,3,1 7,1,0 6,0,4 +3,3,1 0,3,1 1,1,4 +2,4,1 2,0,6 0,2,2 +1,1,3 2,8,0 3,1,2 +1,1,3 7,0,1 0,7,1 +2,3,1 2,1,3 3,1,1 +1,4,1 0,2,2 4,1,2 +1,1,5 9,1,0 1,4,1 +1,1,4 1,9,0 4,2,1 +3,2,1 0,1,8 1,1,5 +4,1,1 0,4,6 1,3,3 +1,4,1 4,1,2 6,0,4 +3,1,3 0,7,1 1,7,0 +3,1,2 1,5,1 3,1,0 +2,2,1 2,0,6 0,2,6 +2,2,2 0,4,1 1,2,2 +1,4,1 6,0,4 0,2,2 +1,2,2 4,2,1 6,2,0 +3,1,3 1,4,1 2,4,0 +1,2,3 1,3,1 4,3,0 +1,1,5 2,3,1 6,4,0 +2,1,2 1,4,2 3,4,0 +1,1,4 4,2,1 2,8,0 +1,3,1 6,1,1 4,2,0 +1,2,2 4,0,3 0,3,2 +1,3,1 3,0,7 7,1,0 +4,1,1 1,1,5 0,10,0 +1,1,4 1,5,1 1,1,2 +1,1,5 7,3,0 1,4,1 +4,2,1 2,1,0 0,1,8 +1,2,3 2,1,2 2,4,0 +1,2,2 6,1,1 2,2,2 +2,2,2 0,4,1 2,3,0 +1,4,1 3,1,3 5,0,5 +3,2,1 0,4,2 3,0,1 +2,4,1 2,1,2 3,0,4 +2,3,1 2,1,3 3,0,4 +2,3,1 4,0,2 1,2,2 +1,1,5 0,10,0 1,4,1 +1,1,3 3,7,0 6,1,1 +2,3,1 1,2,2 0,3,1 +3,1,1 0,7,3 1,0,7 +1,2,2 0,3,2 4,0,3 +1,4,1 0,1,6 5,0,5 +2,2,2 3,1,1 2,2,1 +2,4,1 1,1,4 3,0,4 +2,1,3 4,2,0 1,5,1 +1,2,2 6,1,1 10,0,0 +4,1,1 0,7,3 1,0,6 +2,1,3 1,8,0 1,2,2 +2,2,2 1,1,3 0,4,1 +1,3,2 2,2,1 8,0,1 +1,4,2 2,2,0 4,1,1 +2,1,2 1,6,1 2,6,0 +1,1,5 1,4,1 10,0,0 +2,2,2 0,1,4 3,1,1 +1,1,4 8,2,0 4,2,1 +3,2,1 1,0,7 0,1,8 +2,2,1 2,3,0 0,3,4 +2,2,1 3,1,2 2,2,2 +3,1,1 1,4,3 1,5,2 +1,1,3 3,1,2 1,3,2 +2,1,3 2,0,2 1,8,0 +1,4,1 3,1,3 1,1,5 +2,1,4 2,2,1 3,4,0 +1,3,1 5,1,2 0,3,1 +2,1,3 3,1,1 1,2,2 +4,2,1 0,2,6 1,0,6 +1,1,3 6,1,1 5,5,0 +2,1,2 1,0,4 4,2,0 +1,4,1 5,0,5 0,1,6 +1,5,1 2,1,3 10,0,0 +1,3,1 7,1,0 4,1,3 +4,2,1 1,2,2 1,1,4 +1,5,1 0,1,5 3,0,7 +2,2,1 0,2,6 1,4,0 +5,1,1 1,5,0 1,2,3 +2,1,2 2,4,1 2,4,1 +2,3,1 0,2,4 2,1,3 +1,2,4 6,2,0 0,1,2 +2,1,3 3,4,0 2,3,1 +3,1,2 0,2,4 1,5,1 +2,1,2 2,0,3 4,2,0 +2,1,2 1,6,1 2,4,1 +2,1,3 1,5,1 2,3,1 +1,3,3 1,1,2 1,0,3 +1,1,3 3,1,2 6,1,1 +2,1,2 5,0,0 3,2,1 +1,1,3 1,9,0 4,0,2 +1,1,3 3,1,2 1,6,1 +4,1,1 1,4,2 0,5,5 +1,3,1 0,0,10 5,1,2 +2,2,1 0,1,8 2,1,4 +1,4,1 1,2,1 0,1,6 +1,2,2 8,1,0 4,0,3 +1,3,1 4,2,0 1,0,9 +1,1,3 1,6,1 0,10,0 +2,2,2 4,1,0 2,1,2 +2,3,1 1,0,8 1,1,5 +3,3,1 1,1,4 1,2,1 +3,1,2 1,7,0 1,1,3 +1,3,1 6,1,1 6,0,4 +1,1,4 4,2,1 1,9,0 +1,4,1 4,0,6 0,1,6 +1,1,4 3,7,0 4,2,1 +3,1,1 1,3,4 1,6,1 +3,1,1 0,1,9 1,0,7 +2,2,2 3,0,2 1,1,3 +2,4,1 0,1,6 1,2,0 +1,1,4 5,1,1 6,0,1 +5,1,1 0,5,5 1,0,5 +2,2,2 0,2,3 2,0,3 +2,1,2 4,2,0 1,2,3 +1,4,1 4,1,2 5,1,1 +1,3,1 5,0,5 1,1,6 +3,1,1 0,4,6 1,1,6 +2,2,2 1,3,1 2,0,3 +3,1,2 2,4,0 0,2,4 +2,2,1 2,2,2 4,1,0 +1,1,4 1,9,0 6,0,1 +1,4,1 6,1,0 4,1,2 +3,2,2 2,1,1 0,1,4 +4,2,1 1,1,4 0,2,6 +4,1,2 2,2,0 0,8,1 +3,1,1 0,2,8 2,1,3 +4,1,1 1,2,4 0,5,5 +5,1,1 1,4,1 1,1,4 +1,3,1 7,0,3 1,2,3 +1,1,3 4,0,2 5,5,0 +2,1,4 4,2,0 2,2,1 +2,2,2 3,2,0 0,2,3 +1,1,3 0,1,3 7,0,1 +2,1,3 1,5,1 1,8,0 +5,1,1 1,5,0 1,0,5 +3,1,1 2,0,4 0,6,4 +4,1,2 1,0,3 1,4,1 +2,1,2 2,4,1 2,2,2 +1,1,3 1,3,2 0,4,2 +1,3,1 1,1,6 3,2,1 +1,4,1 3,0,7 1,1,5 +1,3,1 4,0,6 5,1,2 +3,1,1 2,0,4 0,7,3 +1,4,1 0,1,6 2,0,8 +4,1,1 1,1,5 1,4,2 +3,1,1 0,0,10 1,4,3 +1,2,4 0,3,1 2,4,0 +4,2,1 0,3,4 1,1,4 +3,1,1 0,2,8 2,3,1 +4,2,1 1,2,2 1,2,2 +1,1,4 2,4,1 0,10,0 +1,1,5 5,0,1 1,9,0 +1,2,2 0,4,1 4,3,0 +2,1,3 0,7,1 1,2,2 +3,1,1 0,10,0 2,3,1 +1,3,2 1,3,0 1,1,3 +1,1,5 4,1,1 5,5,0 +1,2,4 6,2,0 6,0,1 +4,1,1 1,6,0 1,1,5 +3,3,1 0,2,4 2,1,1 +1,1,3 3,4,1 0,4,2 +3,1,1 0,6,4 2,3,1 +5,1,1 1,1,4 1,5,0 +4,2,1 0,2,6 1,2,2 +2,1,2 3,2,1 0,6,2 +1,1,3 1,6,1 4,3,1 +1,3,1 0,3,1 2,0,8 +3,1,2 1,3,2 1,3,2 +1,4,1 6,0,4 5,1,1 +1,2,2 2,0,4 0,4,1 +3,2,2 0,1,4 2,0,2 +3,2,1 1,0,7 0,4,2 +2,2,2 2,0,3 3,2,0 +4,1,2 2,2,0 0,4,3 +2,1,2 0,6,2 1,6,1 +2,3,2 0,0,5 1,2,1 +2,1,4 3,4,0 0,2,2 +1,3,1 6,1,1 8,0,2 +2,1,2 1,8,0 4,0,1 +1,1,3 5,5,0 2,5,1 +1,4,2 8,0,1 4,1,1 +1,4,2 0,2,1 6,1,0 +3,1,1 1,6,1 2,2,2 +5,1,1 1,4,1 0,0,10 +3,1,3 2,1,1 0,4,2 +1,2,3 4,0,2 2,1,2 +4,1,1 1,1,5 0,1,9 +1,3,2 5,1,1 2,2,1 +2,2,1 1,1,6 1,3,2 +1,1,3 3,4,1 6,4,0 +1,1,4 2,8,0 1,5,1 +3,1,1 0,5,5 1,1,6 +2,1,2 1,6,1 5,0,0 +1,3,2 1,3,0 2,2,1 +2,2,1 0,2,6 3,1,2 +1,1,4 1,5,1 2,4,1 +3,2,1 0,3,4 1,1,5 +1,2,2 4,0,3 6,2,0 +5,1,1 0,9,1 1,4,1 +1,2,2 4,1,2 0,4,1 +5,1,1 0,1,9 1,1,4 +1,4,1 3,0,7 6,1,0 +1,3,1 8,0,2 2,1,5 +3,1,3 1,1,2 2,1,1 +1,5,1 1,0,9 1,1,4 +1,1,5 2,3,1 8,2,0 +1,1,3 7,3,0 7,0,1 +1,1,3 2,8,0 1,0,3 +4,1,2 1,2,2 0,8,1 +1,5,1 3,1,2 0,0,10 +2,2,1 2,3,0 4,0,2 +1,2,2 0,3,2 2,3,1 +1,1,3 6,1,1 4,6,0 +1,1,5 3,2,1 10,0,0 +1,3,1 0,2,4 4,1,3 +1,4,1 8,0,2 0,2,2 +2,2,1 2,0,6 2,2,2 +1,1,4 8,2,0 6,0,1 +2,2,1 1,4,0 3,1,2 +1,3,1 3,1,4 7,1,0 +1,3,1 4,1,3 3,1,4 +4,1,2 2,0,1 0,8,1 +1,4,2 6,1,0 0,1,3 +1,3,3 4,1,1 4,1,1 +1,1,3 7,3,0 1,0,3 +2,2,2 3,1,1 1,2,2 +1,1,3 5,2,1 3,7,0 +1,1,3 0,4,2 4,0,2 +1,2,4 6,0,1 4,1,1 +2,3,1 3,0,4 1,1,5 +1,3,2 7,1,0 0,2,2 +1,3,3 1,1,2 4,0,2 +1,5,1 4,1,1 3,0,7 +3,1,1 3,0,1 1,2,5 +1,1,5 2,3,1 5,5,0 +3,1,1 0,10,0 1,6,1 +1,4,1 2,1,4 1,0,9 +3,1,1 3,0,1 1,5,2 +1,3,1 3,0,7 1,1,6 +3,1,1 1,5,2 0,8,2 +1,4,1 10,0,0 1,1,5 +3,1,1 1,2,5 3,1,0 +2,2,1 1,0,8 0,3,4 +1,1,3 3,7,0 4,3,1 +1,3,1 7,0,3 0,2,4 +1,1,3 0,7,1 6,4,0 +3,1,1 3,0,1 0,5,5 +3,1,1 0,8,2 1,2,5 +1,2,2 4,3,0 4,2,1 +1,1,3 0,1,3 9,1,0 +2,1,3 0,4,2 3,4,0 +1,1,4 3,3,1 10,0,0 +2,1,2 3,0,2 4,2,0 +1,2,4 0,1,2 8,1,0 +1,2,3 1,0,3 1,3,1 +1,1,4 8,2,0 0,2,2 +2,1,2 0,10,0 1,6,1 +1,3,1 6,1,1 1,1,6 +1,1,3 2,5,1 10,0,0 +2,1,4 2,6,0 2,2,1 +3,1,1 3,1,0 1,3,4 +2,2,2 0,2,3 2,1,2 +1,1,3 0,10,0 5,2,1 +2,2,2 0,1,4 1,4,0 +3,1,3 0,1,3 2,4,0 +1,1,4 8,2,0 0,6,1 +2,2,1 2,1,4 1,4,0 +1,3,1 0,2,4 4,0,6 +1,3,1 6,0,4 4,1,3 +1,3,1 6,1,1 0,3,1 +4,1,1 1,5,1 2,0,2 +3,1,1 1,6,1 0,7,3 +1,3,1 4,1,3 2,2,2 +3,1,2 2,4,0 1,1,3 +2,1,2 2,0,3 1,4,2 +2,2,2 1,1,3 2,3,0 +1,3,2 4,0,3 0,2,2 +1,3,1 0,3,1 4,1,3 +2,1,2 2,2,2 0,6,2 +1,4,1 2,2,0 1,1,5 +4,1,1 1,5,1 1,1,5 +2,2,2 2,1,2 5,0,0 +4,2,1 0,4,2 1,1,4 +2,2,2 4,0,1 1,3,1 +3,1,2 1,1,3 1,7,0 +2,3,2 2,2,0 1,2,1 +2,1,3 3,4,0 1,2,2 +2,3,1 3,0,4 2,1,3 +1,5,1 0,1,5 1,1,4 +3,1,1 0,4,6 1,2,5 +1,2,2 4,1,2 6,2,0 +1,1,3 7,0,1 9,1,0 +1,1,5 2,3,1 1,4,1 +4,1,1 1,6,0 0,9,1 +1,2,2 4,2,1 2,4,0 +1,1,4 4,6,0 0,6,1 +2,4,1 3,0,4 1,2,0 +1,1,4 5,5,0 2,4,1 +1,1,3 0,4,2 9,1,0 +1,1,4 1,1,2 1,5,1 +1,5,1 1,0,9 4,1,1 +2,2,1 1,3,2 4,0,2 +2,1,2 1,6,1 0,0,5 +1,2,4 2,4,0 2,0,2 +2,2,2 1,0,4 0,3,2 +1,3,2 3,1,2 1,1,3 +1,4,1 1,2,1 2,0,8 +4,1,1 1,1,5 1,5,1 +2,2,2 1,2,2 1,2,2 +3,1,1 1,4,3 1,3,4 +4,1,1 1,0,6 2,2,0 +1,1,4 4,2,1 6,0,1 +1,2,2 8,1,0 6,1,1 +1,2,3 3,2,1 4,3,0 +1,3,2 4,0,3 1,1,3 +2,1,2 1,2,3 0,6,2 +1,3,1 2,2,2 1,0,9 +1,2,2 6,1,1 6,1,1 +2,1,3 0,10,0 3,1,1 +1,2,4 4,3,0 0,1,2 +1,1,4 1,1,2 8,2,0 +3,3,1 1,1,4 1,0,7 +2,2,1 0,2,6 4,0,2 +3,1,1 1,3,4 0,3,7 +1,2,2 6,2,0 4,1,2 +4,1,1 1,3,3 1,3,3 +1,3,2 1,3,0 2,0,4 +1,1,4 2,0,2 0,2,2 +4,1,1 1,2,4 0,10,0 +3,1,1 1,4,3 1,4,3 +3,2,1 2,1,2 2,0,4 +1,5,1 0,1,5 2,1,3 +2,1,3 1,8,0 0,1,3 +3,1,3 2,4,0 1,1,2 +3,2,2 2,0,2 0,2,3 +4,1,1 2,2,0 0,2,8 +4,2,1 1,2,2 0,3,4 +3,2,1 2,0,4 1,3,1 +2,2,2 1,2,2 1,4,0 +2,1,4 4,2,0 0,6,1 +1,1,3 3,7,0 1,6,1 +1,1,4 1,9,0 1,1,2 +4,1,1 2,0,2 0,1,9 +1,4,2 0,1,3 2,2,0 +3,1,1 0,2,8 2,2,2 +2,1,2 2,4,1 0,2,4 +1,2,3 7,0,1 5,1,1 +1,4,2 8,0,1 6,1,0 +3,1,1 0,8,2 1,3,4 +1,3,3 1,0,3 1,3,0 +2,2,2 3,1,1 0,0,5 +1,1,4 2,8,0 1,1,2 +2,1,3 1,8,0 3,1,1 +1,3,1 10,0,0 1,1,6 +1,2,3 1,0,3 2,1,2 +1,2,2 4,0,3 4,2,1 +5,1,1 1,2,3 1,4,1 +1,5,1 4,1,1 10,0,0 +2,2,1 2,1,4 2,1,4 +3,1,1 0,10,0 1,1,6 +1,4,1 4,0,6 3,1,3 +3,2,2 2,1,1 0,2,3 +1,5,1 2,1,3 4,1,1 +4,1,1 0,2,8 1,6,0 +1,3,1 0,3,1 2,1,5 +2,2,2 2,0,3 0,1,4 +3,2,1 0,2,6 2,0,4 +1,3,1 0,1,7 6,1,1 +4,1,1 0,1,9 1,0,6 +1,1,5 0,5,1 9,1,0 +2,2,1 4,1,0 3,0,4 +3,1,1 3,1,0 0,6,4 +1,3,1 3,2,1 6,1,1 +3,1,1 1,6,1 1,0,7 +1,3,1 1,3,0 5,1,2 +3,1,1 2,3,1 3,1,0 +1,1,4 9,1,0 1,5,1 +1,2,2 2,1,3 0,3,2 +4,1,2 0,8,1 1,4,1 +2,1,2 3,2,1 1,4,2 +1,3,1 0,2,4 4,2,0 +4,2,1 0,5,0 1,1,4 +1,1,3 1,0,3 6,1,1 +1,2,4 2,2,1 4,1,1 +1,1,3 2,8,0 2,5,1 +1,1,5 5,5,0 2,3,1 +1,3,1 1,0,9 7,1,0 +1,2,3 5,1,1 7,0,1 +1,1,5 0,5,1 5,0,1 +1,2,2 8,0,1 2,3,1 +5,1,1 0,9,1 1,5,0 +3,1,2 1,3,2 0,10,0 +3,1,2 1,5,1 0,4,3 +1,1,3 6,1,1 6,1,1 +1,1,3 1,6,1 3,7,0 +2,2,1 2,2,2 3,0,4 +1,3,1 1,0,9 0,1,7 +4,1,1 1,0,6 0,6,4 +1,4,1 1,1,5 4,1,2 +1,2,2 2,2,2 0,0,5 +4,1,1 2,1,1 0,10,0 +4,2,1 2,0,2 1,1,4 +2,3,1 2,1,3 0,0,10 +1,1,4 2,8,0 2,0,2 +3,1,1 1,1,6 1,4,3 +2,2,1 0,3,4 3,0,4 +3,1,1 3,0,1 1,7,0 +1,2,3 6,2,0 1,3,1 +3,2,1 0,4,2 1,1,5 +1,2,4 4,3,0 2,2,1 +1,3,1 0,2,4 6,1,1 +1,3,1 1,2,3 3,2,1 +3,3,1 1,2,1 0,3,1 +1,2,4 6,0,1 2,4,0 +1,2,2 6,0,2 4,3,0 +2,1,3 2,3,1 3,4,0 +2,1,2 1,0,4 2,6,0 +2,3,1 5,0,0 1,1,5 +1,1,3 1,6,1 10,0,0 +4,2,1 2,0,2 0,4,2 +3,1,1 1,2,5 1,4,3 +3,3,1 0,0,10 1,1,4 +1,3,1 5,1,2 5,1,2 +1,4,1 2,1,4 6,1,0 +1,1,4 7,3,0 2,4,1 +1,1,3 4,0,2 9,1,0 +2,4,1 1,0,8 1,1,4 +1,4,1 3,1,3 6,1,0 +1,1,5 2,3,1 10,0,0 +1,2,3 8,1,0 1,3,1 +1,3,2 6,0,2 5,1,1 +2,2,2 0,3,2 2,1,2 +2,1,3 2,0,2 4,2,0 +1,3,3 1,2,1 10,0,0 +3,1,2 3,1,0 0,2,4 +1,5,1 4,1,1 5,0,5 +2,2,1 2,0,6 3,1,2 +4,1,2 0,0,5 1,2,2 +2,3,1 2,1,3 0,1,7 +2,2,1 0,3,4 2,3,0 +2,1,2 2,2,2 1,2,3 +1,3,1 10,0,0 1,2,3 +1,3,1 1,0,9 5,1,2 +1,2,2 6,0,2 2,2,2 +1,1,5 1,4,1 3,2,1 +2,1,2 1,8,0 0,2,4 +2,3,1 0,0,10 3,1,1 +1,3,2 2,2,1 4,0,3 +1,3,1 9,0,1 2,2,2 +1,2,4 10,0,0 2,2,1 +1,2,2 10,0,0 6,1,1 +2,1,3 1,2,2 4,2,0 +1,4,1 1,1,5 3,1,3 +3,1,1 2,2,2 1,6,1 +5,1,1 1,5,0 0,3,7 +3,2,2 0,2,3 2,1,1 +1,3,1 5,1,2 0,2,4 +2,2,2 4,0,1 3,2,0 +2,1,2 5,0,0 2,2,2 +1,2,2 8,1,0 2,0,4 +3,1,2 0,8,1 1,7,0 +1,1,3 1,0,3 3,4,1 +1,2,4 2,2,1 2,4,0 +2,1,4 0,2,2 2,6,0 +1,1,3 0,4,2 1,9,0 +2,2,1 2,1,4 3,2,0 +1,2,4 2,2,1 0,3,1 +1,3,1 3,2,1 7,0,3 +4,1,1 0,3,7 2,0,2 +3,1,3 1,1,2 0,7,1 +2,3,1 1,2,2 5,0,0 +1,2,2 2,4,0 2,3,1 +1,3,3 1,3,0 1,0,3 +1,1,3 5,2,1 8,2,0 +1,2,3 4,3,0 3,2,1 +1,2,4 8,1,0 2,2,1 +1,1,3 0,10,0 6,1,1 +2,2,1 1,4,0 1,2,4 +1,3,1 1,0,9 2,1,5 +2,1,2 0,10,0 1,4,2 +1,1,3 0,7,1 2,5,1 +1,4,1 2,0,8 4,1,2 +3,2,1 0,5,0 2,1,2 +2,1,3 3,1,1 0,4,2 +1,1,5 5,0,1 9,1,0 +1,3,1 5,0,5 1,2,3 +2,4,1 1,2,0 2,1,2 +2,1,2 4,0,1 2,2,2 +3,1,2 0,8,1 1,5,1 +1,2,3 0,2,2 1,0,3 +1,5,1 1,1,4 5,0,5 +2,3,1 3,1,1 0,3,1 +2,2,1 4,0,2 1,1,6 +2,1,2 1,8,0 3,2,1 +1,2,3 2,1,2 1,3,1 +1,1,5 4,1,1 4,6,0 +2,4,1 2,1,2 4,0,2 +1,4,1 9,0,1 6,1,0 +4,1,2 0,6,2 1,2,2 +2,1,2 0,6,2 1,4,2 +4,2,1 0,4,2 2,0,2 +1,1,3 10,0,0 1,6,1 +1,3,1 0,0,10 6,1,1 +3,2,1 1,3,1 2,1,2 +1,1,3 1,0,3 2,2,2 +1,2,2 4,3,0 2,0,4 +3,1,1 1,3,4 0,0,10 +1,3,1 7,1,0 1,1,6 +1,3,1 3,2,1 7,1,0 +1,2,3 1,3,1 0,2,2 +3,1,3 3,1,0 0,4,2 +3,2,1 1,1,5 0,4,2 +1,2,3 0,5,0 5,1,1 +4,1,1 0,6,4 1,4,2 +3,1,1 3,1,0 2,2,2 +1,1,3 5,2,1 3,1,2 +4,1,1 2,0,2 0,3,7 +2,2,1 3,2,0 1,2,4 +2,3,1 4,0,2 2,1,3 +1,1,3 0,4,2 5,2,1 +4,1,2 2,0,1 0,6,2 +1,1,3 6,4,0 4,0,2 +2,2,1 2,3,0 2,0,6 +2,2,1 0,4,2 3,2,0 +3,1,1 1,2,5 0,9,1 +4,2,1 0,3,4 2,1,0 +1,2,2 2,4,0 6,1,1 +1,5,1 3,1,2 4,0,6 diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining_test.cc new file mode 100644 index 0000000..a995c89 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/bargaining/bargaining_test.cc @@ -0,0 +1,195 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/bargaining/bargaining.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" +#include "open_spiel/utils/init.h" + +// This is set to false by default because it complicates tests on github CI. +ABSL_FLAG(bool, enable_instances_file_test, false, + "Whether to test loading of an instances file."); + +namespace open_spiel { +namespace bargaining { +namespace { + +constexpr const char* kInstancesFilename = + "open_spiel/games/bargaining/bargaining_instances1000.txt"; +constexpr int kFileNumInstances = 1000; + +namespace testing = open_spiel::testing; + +void BasicBargainingTests() { + testing::LoadGameTest("bargaining"); + testing::RandomSimTest(*LoadGame("bargaining"), 10); + testing::RandomSimTest(*LoadGame("bargaining(prob_end=0.1)"), 10); + testing::RandomSimTest(*LoadGame("bargaining(discount=0.9)"), 10); + testing::RandomSimTest(*LoadGame("bargaining(max_turns=200)"), 10); +} + +void BargainingMaxTurnsTest() { + std::shared_ptr game = LoadGame("bargaining(max_turns=200)"); + std::unique_ptr state = game->NewInitialState(); + int num_turns = 200; + while (num_turns > 0) { + if (state->IsChanceNode()) { + ActionsAndProbs chance_outcomes = state->ChanceOutcomes(); + state->ApplyAction(chance_outcomes[0].first); + } else { + SPIEL_CHECK_TRUE(!state->IsTerminal()); + num_turns--; + std::vector legal_actions = state->LegalActions(); + state->ApplyAction(legal_actions[0]); + } + } + SPIEL_CHECK_TRUE(state->IsTerminal()); +} + +void BargainingDiscountTest() { + std::shared_ptr game = LoadGame("bargaining(discount=0.9)"); + std::unique_ptr state = game->NewInitialState(); + BargainingState* bargaining_state = + static_cast(state.get()); + ActionsAndProbs chance_outcomes = state->ChanceOutcomes(); + state->ApplyAction(chance_outcomes[0].first); + std::vector legal_actions = state->LegalActions(); + state->ApplyAction(legal_actions[0]); + state->ApplyAction(legal_actions[0]); + state->ApplyAction(legal_actions[0]); + state->ApplyAction(legal_actions[0]); + state->ApplyAction(bargaining_state->AgreeAction()); + // P0 offers [0,0,0] then P1, then P0, then P1, then P0 agrees. + // P0 would get 10, but it's discounted by 0.9 three times + SPIEL_CHECK_FLOAT_EQ(state->PlayerReturn(0), 0.9 * 0.9 * 0.9 * 10); + SPIEL_CHECK_FLOAT_EQ(state->PlayerReturn(1), 0.0); +} + +void BargainingProbEndContinueTest() { + std::shared_ptr game = LoadGame("bargaining(prob_end=0.1)"); + std::unique_ptr state = game->NewInitialState(); + state->ApplyAction(state->ChanceOutcomes()[0].first); + std::vector legal_actions = state->LegalActions(); + state->ApplyAction(legal_actions[0]); + state->ApplyAction(legal_actions[0]); + for (int i = 0; i < (bargaining::kDefaultMaxTurns - 2); ++i) { + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(state->ChanceOutcomes()[0].first); + SPIEL_CHECK_TRUE(!state->IsChanceNode()); + legal_actions = state->LegalActions(); + state->ApplyAction(legal_actions[0]); + } + SPIEL_CHECK_TRUE(state->IsTerminal()); +} + +void BargainingProbEndEndTest() { + std::shared_ptr game = LoadGame("bargaining(prob_end=0.1)"); + std::unique_ptr state = game->NewInitialState(); + state->ApplyAction(state->ChanceOutcomes()[0].first); + std::vector legal_actions = state->LegalActions(); + state->ApplyAction(legal_actions[0]); + state->ApplyAction(legal_actions[0]); + for (int i = 0; i < (bargaining::kDefaultMaxTurns - 4); ++i) { + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(state->ChanceOutcomes()[0].first); + SPIEL_CHECK_TRUE(!state->IsChanceNode()); + legal_actions = state->LegalActions(); + state->ApplyAction(legal_actions[0]); + } + SPIEL_CHECK_TRUE(state->IsChanceNode()); + SPIEL_CHECK_TRUE(!state->IsTerminal()); + state->ApplyAction(state->ChanceOutcomes()[1].first); + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_FLOAT_EQ(state->PlayerReturn(0), 0.0); + SPIEL_CHECK_FLOAT_EQ(state->PlayerReturn(1), 0.0); +} + +void BasicBargainingFromInstancesFileTests() { + // Game creation and legal actions are fairly heavy, so only run 1 sim. + std::shared_ptr game = LoadGame( + absl::StrCat("bargaining(instances_file=", kInstancesFilename, ")")); + + const auto* bargaining_game = static_cast(game.get()); + SPIEL_CHECK_EQ(bargaining_game->AllInstances().size(), kFileNumInstances); + + testing::RandomSimTest(*game, 100); +} + +void BasicBargainingFromCCInstancesTests() { + std::shared_ptr game = LoadGame("bargaining"); + + const auto* bargaining_game = static_cast(game.get()); + SPIEL_CHECK_EQ(bargaining_game->AllInstances().size(), kDefaultNumInstances); +} + +void BasicBargainingInstanceMapTests() { + std::shared_ptr game = LoadGame("bargaining"); + const auto* bargaining_game = static_cast(game.get()); + for (int i = 0; i < bargaining_game->AllInstances().size(); ++i) { + const Instance& instance = bargaining_game->GetInstance(i); + SPIEL_CHECK_EQ(bargaining_game->GetInstanceIndex(instance), i); + } +} + +void BasicBargainingOfferMapTests() { + std::shared_ptr game = LoadGame("bargaining"); + const auto* bargaining_game = static_cast(game.get()); + for (int i = 0; i < bargaining_game->AllOffers().size(); ++i) { + const Offer& offer = bargaining_game->GetOffer(i); + SPIEL_CHECK_EQ(bargaining_game->GetOfferIndex(offer), i); + } +} + +void BasicBargainingOpponentValuesTests() { + std::shared_ptr game = LoadGame("bargaining"); + const auto* bargaining_game = static_cast(game.get()); + std::vector> expected_values = { + {4, 0, 2}, {7, 0, 1}, {1, 3, 1} + }; + std::vector player_values = {1, 2, 3}; + std::vector opponent_values = {8, 1, 0}; + std::vector> actual_values = + bargaining_game->GetPossibleOpponentValues( + 0, player_values, opponent_values); + SPIEL_CHECK_EQ(actual_values, expected_values); +} + +} // namespace +} // namespace bargaining +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::Init("", &argc, &argv, false); + absl::ParseCommandLine(argc, argv); + open_spiel::bargaining::BasicBargainingTests(); + if (absl::GetFlag(FLAGS_enable_instances_file_test)) { + open_spiel::bargaining::BasicBargainingFromInstancesFileTests(); + } + open_spiel::bargaining::BargainingMaxTurnsTest(); + open_spiel::bargaining::BargainingDiscountTest(); + open_spiel::bargaining::BargainingProbEndContinueTest(); + open_spiel::bargaining::BargainingProbEndEndTest(); + open_spiel::bargaining::BasicBargainingFromCCInstancesTests(); + open_spiel::bargaining::BasicBargainingInstanceMapTests(); + open_spiel::bargaining::BasicBargainingOfferMapTests(); + open_spiel::bargaining::BasicBargainingOpponentValuesTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/battleship/battleship.cc b/scenarios/bargaining/open_spiel/open_spiel/games/battleship/battleship.cc new file mode 100644 index 0000000..0e7fc7c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/battleship/battleship.cc @@ -0,0 +1,1099 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/battleship/battleship.h" + +#include "open_spiel/abseil-cpp/absl/strings/ascii.h" +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/abseil-cpp/absl/strings/strip.h" + +namespace open_spiel { +namespace battleship { +namespace { +constexpr double kFloatTolerance = 1e-9; +} + +BattleshipState::BattleshipState( + const std::shared_ptr bs_game) + : State(bs_game), bs_game_(bs_game) {} + +Player BattleshipState::CurrentPlayer() const { + const BattleshipConfiguration& conf = bs_game_->conf; + + // The players place the ships on the board in turns, starting from + // Player 1. + // + // NOTE: It is important whether or not the players place all their ships at + // once or not for correlated equilibria purposes. This is because in + // correlated equilibria, the recommender can stop issuing recommendations + // after a player deviates from a recommended *action*. + if (!AllShipsPlaced()) { + // In this case, if an even number (possibly 0) of ships have been placed, + // then it is Player 1's turn to act next. Else, it is Player 2's. + if (NumShipsPlaced() % 2 == 0) { + return Player{0}; + } else { + return Player{1}; + } + } else { + // In this case, all ships have been placed. The players can take turns + // for their next moves, starting from Player 1. + + // First, we check whether the game is over. + // + // The game is over only in two cases: + // * Both players have taken `conf.num_shots` shots; or + // * At least one player has lost all of their ships. + if (moves_.size() == 2 * conf.ships.size() + 2 * conf.num_shots) { + return kTerminalPlayerId; + } else if (AllPlayersShipsSank(Player{0}) || + AllPlayersShipsSank(Player{1})) { + return kTerminalPlayerId; + } + + // If we are here, the game is not over yet. + if (moves_.size() % 2 == 0) { + return Player{0}; + } else { + return Player{1}; + } + } +} + +std::vector BattleshipState::LegalActions() const { + if (IsTerminal()) { + return {}; + } else { + const Player player = CurrentPlayer(); + const BattleshipConfiguration& conf = bs_game_->conf; + + std::vector action_ids; + action_ids.reserve(NumDistinctActions()); + + if (!AllShipsPlaced()) { + std::vector partial_placement; + for (const auto& move : moves_) { + if (move.player == player && + absl::holds_alternative(move.action)) { + partial_placement.push_back(absl::get(move.action)); + } + } + + // If we are here, we still have some ships to place on the board. + // + // First, we find the first ship that hasn't been placed on the board + // yet. + const Ship next_ship = NextShipToPlace(player); + + // Horizontal placement. + if (next_ship.length <= conf.board_width) { + for (int row = 0; row < conf.board_height; ++row) { + for (int col = 0; col < conf.board_width - next_ship.length + 1; + ++col) { + const ShipPlacement placement(ShipPlacement::Direction::Horizontal, + /* ship = */ next_ship, + /* tl_corner = */ Cell{row, col}); + partial_placement.push_back(placement); + if (PlacementDoesNotOverlap(placement, player) && + ExistsFeasiblePlacement(conf, &partial_placement)) { + action_ids.push_back( + bs_game_->SerializeShipPlacementAction(placement)); + } + partial_placement.pop_back(); + } + } + } + + // Vertical placement. + // + // NOTE: vertical placement is defined only for ships with length more + // than one. This avoids duplicating placement actions for 1x1 + // ships. + if (next_ship.length > 1 && next_ship.length <= conf.board_height) { + for (int row = 0; row < conf.board_height - next_ship.length + 1; + ++row) { + for (int col = 0; col < conf.board_width; ++col) { + const ShipPlacement placement(ShipPlacement::Direction::Vertical, + /* ship = */ next_ship, + /* tl_corner = */ Cell{row, col}); + partial_placement.push_back(placement); + if (PlacementDoesNotOverlap(placement, player) && + ExistsFeasiblePlacement(conf, &partial_placement)) { + action_ids.push_back( + bs_game_->SerializeShipPlacementAction(placement)); + } + partial_placement.pop_back(); + } + } + } + + // Since the constructor of the game checks that there exists a feasible + // placement of ships for each player, and since we only consider + // placement actions that preserve feasibility, it is impossible that all + // of a sudden we find ourselves painted in a corner where no placement + // can be performed. + SPIEL_CHECK_GT(action_ids.size(), 0); + } else { + // In this case, the only thing the player can do is to shoot on a cell + // + // Depending on whether repeated shots are allowed or not, we might + // filter out some cells. + for (int row = 0; row < conf.board_height; ++row) { + for (int col = 0; col < conf.board_width; ++col) { + if (!conf.allow_repeated_shots && + AlreadyShot(Cell{row, col}, CurrentPlayer())) { + // We do not duplicate the shot, so nothing to do here... + } else { + action_ids.push_back(bs_game_->SerializeShotAction(Shot{row, col})); + } + } + } + + // SAFETY: The assert below can never fail, because when + // allow_repeated_shot is false, we check at game construction time + // that the number of shots per player is <= the number of cells in + // the board. + SPIEL_DCHECK_FALSE(action_ids.empty()); + } + + return action_ids; + } +} + +std::string BattleshipState::ActionToString(Player player, + Action action_id) const { + return bs_game_->ActionToString(player, action_id); +} + +std::string BattleshipState::ToString() const { + std::string state_str = ToPrettyString(); + + // The board representation returned by `ToPrettyString` does not distinguish + // between the order of moves. To disambiguate and have ToString fully capture + // the state, we also include the order of the moves in a separate line. + + absl::StrAppend(&state_str, "\nFull history: "); + for (const auto& move : moves_) { + if (move.player == Player{0}) { + absl::StrAppend(&state_str, "/0:"); + } else { + absl::StrAppend(&state_str, "/1:"); + } + if (absl::holds_alternative(move.action)) { + absl::StrAppend(&state_str, + absl::get(move.action).ToString()); + } else { + SPIEL_DCHECK_TRUE(absl::holds_alternative(move.action)); + absl::StrAppend(&state_str, absl::get(move.action).ToString()); + } + } + absl::StrAppend(&state_str, "\n"); + return state_str; +} + +bool BattleshipState::IsTerminal() const { + return CurrentPlayer() == kTerminalPlayerId; +} + +std::vector BattleshipState::Returns() const { + if (!IsTerminal()) { + return {0.0, 0.0}; + } else { + const BattleshipConfiguration& conf = bs_game_->conf; + + // The description of the game in the header file contains more details + // about how the payoffs for the players are computed at the end of the + // game, as well as the meaning of the `loss_multiplier`. + const double loss_multiplier = conf.loss_multiplier; + + double damage_pl1 = 0.0; + double damage_pl2 = 0.0; + for (const Ship& ship : conf.ships) { + if (DidShipSink(ship, Player{0})) damage_pl1 += ship.value; + if (DidShipSink(ship, Player{1})) damage_pl2 += ship.value; + } + + return {damage_pl2 - loss_multiplier * damage_pl1, + damage_pl1 - loss_multiplier * damage_pl2}; + } +} + +std::unique_ptr BattleshipState::Clone() const { + return std::make_unique(*this); +} + +std::string BattleshipState::InformationStateString(Player player) const { + SPIEL_CHECK_TRUE(player >= 0 && player < NumPlayers()); + + const BattleshipConfiguration& conf = bs_game_->conf; + const Player opponent = (player == Player{0}) ? Player{1} : Player{0}; + + // We will need to figure out whether each of the player's shots (i) hit the + // water, (ii) damaged but did not sink yet one of the opponent's ships, or + // (iii) damaged and sank one of the opponent's ships. + // + // To be able to figure that out, we will keep track of the damage that each + // of the opponent's ship has received so far. The vector `ship_damage` + // contains and updates this information as each player's shot is processed + // in order. Position i corresponds to the damage that the opponent's ship + // in position i of bs_game->conf.ships has suffered. + std::vector ship_damage(conf.ships.size(), 0); + // Since in general we might have repeated shots, we cannot simply increase + // the ship damage every time a shot hits a ship. For that, we keep track of + // whether a cell was already hit in the past. We reuse the + // serialization/deserialization routines for shots to map from (r, c) to + // cell index r * board_width + c. + std::vector cell_hit(conf.board_width * conf.board_height, false); + + // NOTE: OpenSpiel's automatic observation consistency checks require that + // agents be able to distinguish that someone else has moved (though the + // move itself might not be observed). So, the information_state string + // we return has to be able to distinguish between, e.g., "Player 1 + // still hasn't placed the first ship" and "Player 1 has placed the ship + // and it's now my turn" in the first two moves. + // + // For that reason, we prepend the move number in the information state to + // resolve the ambiguity. + std::string information_state = absl::StrCat("T=", MoveNumber(), " "); + for (const auto& move : moves_) { + if (absl::holds_alternative(move.action)) { + // The player observed *their own* ship placements. + if (move.player == player) { + absl::StrAppend(&information_state, "/"); + absl::StrAppend(&information_state, + absl::get(move.action).ToString()); + } + } else { + const Shot& shot = absl::get(move.action); + + if (move.player != player) { + // If the shot came from the opponent, the player has seen it. + absl::StrAppend(&information_state, "/oppshot_", shot.ToString()); + } else { + const int cell_index = bs_game_->SerializeShotAction(shot); + + char shot_outcome = 'W'; // For 'water'. + for (int ship_index = 0; ship_index < conf.ships.size(); ++ship_index) { + const Ship& ship = conf.ships.at(ship_index); + + // SAFETY: the call to FindShipPlacement_ is safe, because if we are + // here it means that all ships have been placed. + const ShipPlacement ship_placement = + FindShipPlacement(ship, opponent); + + if (ship_placement.CoversCell(shot)) { + if (!cell_hit[cell_index]) { + // This is a new hit: we have to increas the ship damage and + // mark the cell as already hit. + ++ship_damage.at(ship_index); + cell_hit.at(cell_index) = true; + } + if (ship_damage.at(ship_index) == ship.length) { + shot_outcome = 'S'; // For 'sunk'. + } else { + shot_outcome = 'H'; // For 'hit' (but not sunk). + } + } + } + + // Otherwise, the player knows they shot, but also knows whether the + // shot hit the water, hit a ship (but did not sink it), or sank a + // ship. + absl::StrAppend(&information_state, "/shot_", shot.ToString(), ":"); + information_state.push_back(shot_outcome); + } + } + } + + return information_state; +} + +void BattleshipState::InformationStateTensor( + Player player, absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + SPIEL_CHECK_EQ(values.size(), game_->InformationStateTensorSize()); + std::fill(values.begin(), values.end(), 0); + + int offset = 0; + const BattleshipConfiguration& conf = bs_game_->conf; + const Player opponent = (player == Player{0}) ? Player{1} : Player{0}; + const int height = conf.board_height; + const int width = conf.board_width; + std::vector ship_damage(conf.ships.size(), 0); + std::vector cell_hit(conf.board_width * conf.board_height, false); + + if (IsTerminal()) { + values[offset] = 1; + } + offset += 1; + + values[offset + player] = 1; + offset += 2; + + if (!IsTerminal()) { + values[offset + CurrentPlayer()] = 1; + } + offset += 2; + + for (const auto& move : moves_) { + if (absl::holds_alternative(move.action)) { + // The player observed *their own* ship placements. + if (move.player == player) { + const ShipPlacement& placement = absl::get(move.action); + if (placement.direction == CellAndDirection::Horizontal) { + values[offset] = 1; + } else { + values[offset + 1] = 1; + } + offset += 2; + + values[offset + placement.TopLeftCorner().row] = 1; + offset += height; + values[offset + placement.TopLeftCorner().col] = 1; + offset += width; + } + } else { + const Shot& shot = absl::get(move.action); + + values[offset + move.player] = 1; + offset += bs_game_->NumPlayers(); + + values[offset + shot.row] = 1; + offset += height; + values[offset + shot.col] = 1; + offset += width; + + // Add info of hit, shot, or sunk only for my shots (same as in the + // info state string). + if (move.player == player) { + const int cell_index = bs_game_->SerializeShotAction(shot); + + char shot_outcome = 'W'; // For 'water'. + for (int ship_index = 0; ship_index < conf.ships.size(); ++ship_index) { + const Ship& ship = conf.ships.at(ship_index); + + // SAFETY: the call to FindShipPlacement_ is safe, because if we are + // here it means that all ships have been placed. + const ShipPlacement ship_placement = + FindShipPlacement(ship, opponent); + + if (ship_placement.CoversCell(shot)) { + if (!cell_hit[cell_index]) { + // This is a new hit: we have to increas the ship damage and + // mark the cell as already hit. + ++ship_damage.at(ship_index); + cell_hit.at(cell_index) = true; + } + if (ship_damage.at(ship_index) == ship.length) { + shot_outcome = 'S'; // For 'sunk'. + } else { + shot_outcome = 'H'; // For 'hit' (but not sunk). + } + } + } + + switch (shot_outcome) { + case 'W': values[offset] = 1; break; + case 'H': values[offset + 1] = 1; break; + case 'S': values[offset + 2] = 1; break; + default: + std::string error = "Bad shot outcome: "; + error.push_back(shot_outcome); + SpielFatalError(error); + } + } + + // Bits for For W/H/S. + offset += 3; + } + } + + SPIEL_CHECK_LE(offset, values.size()); +} + +std::string BattleshipState::ObservationString(Player player) const { + std::string output = "State of player's ships:\n"; + absl::StrAppend(&output, OwnBoardString(player)); + absl::StrAppend(&output, "\nPlayer's shot outcomes:\n"); + absl::StrAppend(&output, ShotsBoardString(player)); + return output; +} + +void BattleshipState::UndoAction(Player player, Action action_id) { + SPIEL_CHECK_GT(moves_.size(), 0); + // XXX(gfarina): It looks like SPIEL_CHECK_EQ wants to print a PlayerAction + // on failure, but std::cout was not overloaded. For now I moved to a + // SPIEL_CHECK_TRUE. + SPIEL_CHECK_TRUE((history_.back() == PlayerAction{player, action_id})); + + history_.pop_back(); + moves_.pop_back(); + --move_number_; +} + +std::string BattleshipState::OwnBoardString(const Player player) const { + SPIEL_CHECK_TRUE(player >= 0 && player < NumPlayers()); + + const Player opponent = (player == Player{0}) ? Player{1} : Player{0}; + const BattleshipConfiguration& conf = bs_game_->conf; + + // We keep the board in memory as vectors of strings. Initially, all strings + // only contain whitespace. + std::vector player_board(conf.board_height, + std::string(conf.board_width, ' ')); + + // We start by drawing the ships on the player's board. For now, we do not + // include any information about where the opponent shot. + char ship_id = 'a'; + for (const auto& move : moves_) { + if (move.player == player && + absl::holds_alternative(move.action)) { + const ShipPlacement& placement = absl::get(move.action); + + // We now iterate over all the cells that the ship covers on the board + // and fill in the `player_board` string representation. + Cell cell = placement.TopLeftCorner(); + for (int i = 0; i < placement.ship.length; ++i) { + SPIEL_DCHECK_TRUE(cell.row >= 0 && cell.row < conf.board_height); + SPIEL_DCHECK_TRUE(cell.col >= 0 && cell.col < conf.board_width); + + // The ships do not overlap. + SPIEL_DCHECK_EQ(player_board[cell.row][cell.col], ' '); + player_board[cell.row][cell.col] = ship_id; + + if (placement.direction == ShipPlacement::Direction::Horizontal) { + ++cell.col; + } else { + SPIEL_DCHECK_TRUE(placement.direction == + ShipPlacement::Direction::Vertical); + ++cell.row; + } + } + + ++ship_id; + } + } + // It is impossible that the player placed more ships than they own. + SPIEL_DCHECK_LE(ship_id, 'a' + conf.ships.size()); + + // We now include the opponent's shots on the player's board. + for (const auto& move : moves_) { + if (move.player == opponent && absl::holds_alternative(move.action)) { + const Shot& shot = absl::get(move.action); + + if (player_board[shot.row][shot.col] == ' ' || + player_board[shot.row][shot.col] == '*') { + // If the cell contains a '*' it means that we have already shot at that + // cell before. That can only happen if repeated shots are allowed. + SPIEL_DCHECK_TRUE(conf.allow_repeated_shots || + player_board[shot.row][shot.col] == ' '); + + player_board[shot.row][shot.col] = '*'; + } else { + SPIEL_DCHECK_TRUE(std::isalpha(player_board[shot.row][shot.col])); + + // The shot hit one of the ships. In this case, we use the uppercase + // letter corresponding to the ship. + player_board[shot.row][shot.col] = + std::toupper(player_board[shot.row][shot.col]); + } + } + } + + std::string output; + absl::StrAppend(&output, "+", std::string(conf.board_width, '-'), "+\n"); + for (const auto& row : player_board) { + absl::StrAppend(&output, "|", row, "|\n"); + } + absl::StrAppend(&output, "+", std::string(conf.board_width, '-'), "+\n"); + return output; +} + +std::string BattleshipState::ShotsBoardString(const Player player) const { + SPIEL_CHECK_TRUE(player >= 0 && player < NumPlayers()); + + const Player opponent = (player == Player{0}) ? Player{1} : Player{0}; + const BattleshipConfiguration& conf = bs_game_->conf; + + // We keep the board in memory as vectors of strings. Initially, all strings + // only contain whitespace. + std::vector shots_board(conf.board_height, + std::string(conf.board_width, ' ')); + + // We fill in the board that represents the outcome of the player's + // shots. + // + // We start by adding a '@' to all the positions where the player shot. + // That corresponds to marking all shots as 'misses'. We will promote them + // to ship-hit marks '#' in a shortly. + for (const auto& move : moves_) { + if (move.player == player && absl::holds_alternative(move.action)) { + const Shot& shot = absl::get(move.action); + + if (conf.allow_repeated_shots) { + SPIEL_DCHECK_TRUE(shots_board[shot.row][shot.col] == ' ' || + shots_board[shot.row][shot.col] == '@'); + } else { + SPIEL_DCHECK_EQ(shots_board[shot.row][shot.col], ' '); + } + shots_board[shot.row][shot.col] = '@'; + } + } + + // Now, we iterate through the ship placements of the opponent. If a ship + // has been hit, then we will promote '@' to '#'. + for (const auto& move : moves_) { + if (move.player == opponent && + absl::holds_alternative(move.action)) { + const ShipPlacement& placement = absl::get(move.action); + + // We now iterate over all the cells that the ship covers on the board + // and fill in the `player_board` string representation. + Cell cell = placement.TopLeftCorner(); + for (int i = 0; i < placement.ship.length; ++i) { + SPIEL_DCHECK_TRUE(cell.row >= 0 && cell.row < conf.board_height); + SPIEL_DCHECK_TRUE(cell.col >= 0 && cell.col < conf.board_width); + + if (shots_board[cell.row][cell.col] == '@') { + shots_board[cell.row][cell.col] = '#'; + } else { + // Ships cannot intersect, so it's impossible that we would go over + // a '#'. + SPIEL_DCHECK_EQ(shots_board[cell.row][cell.col], ' '); + } + + if (placement.direction == ShipPlacement::Direction::Horizontal) { + ++cell.col; + } else { + SPIEL_DCHECK_TRUE(placement.direction == + ShipPlacement::Direction::Vertical); + ++cell.row; + } + } + } + } + + std::string output; + absl::StrAppend(&output, "+", std::string(conf.board_width, '-'), "+\n"); + for (const auto& row : shots_board) { + absl::StrAppend(&output, "|", row, "|\n"); + } + absl::StrAppend(&output, "+", std::string(conf.board_width, '-'), "+\n"); + return output; +} + +std::string BattleshipState::ToPrettyString() const { + std::string state_str; + + absl::StrAppend(&state_str, "Player 0's board:\n"); + absl::StrAppend(&state_str, OwnBoardString(Player{0})); + absl::StrAppend(&state_str, "\nPlayer 1's board:\n"); + absl::StrAppend(&state_str, OwnBoardString(Player{1})); + + return state_str; +} + +void BattleshipState::DoApplyAction(Action action_id) { + SPIEL_CHECK_FALSE(IsTerminal()); + + const Player player = CurrentPlayer(); + const auto legal_action_ids = LegalActions(); + + // Instead of validating the input action, we simply check that it is one + // of the legal actions. This effectively moves all the burden of validation + // onto `LegalActions`. + SPIEL_CHECK_EQ( + std::count(legal_action_ids.begin(), legal_action_ids.end(), action_id), + 1); + + const absl::variant action = + bs_game_->DeserializeAction(action_id); + + if (absl::holds_alternative(action)) { + const CellAndDirection& cell_and_dir = absl::get(action); + const ShipPlacement placement( + /* direction = */ cell_and_dir.direction, + /* ship = */ NextShipToPlace(player), + /* tl_corner = */ cell_and_dir.TopLeftCorner()); + + moves_.push_back(GameMove{CurrentPlayer(), placement}); + } else { + SPIEL_DCHECK_TRUE(absl::holds_alternative(action)); + + moves_.push_back(GameMove{CurrentPlayer(), absl::get(action)}); + } +} // namespace battleship + +int BattleshipState::NumShipsPlaced() const { + return static_cast( + std::count_if(moves_.begin(), moves_.end(), [](const GameMove& move) { + return absl::holds_alternative(move.action); + })); +} + +bool BattleshipState::AllShipsPlaced() const { + const BattleshipConfiguration& conf = bs_game_->conf; + + return NumShipsPlaced() == 2 * conf.ships.size(); +} + +bool BattleshipState::IsShipPlaced(const Ship& ship, + const Player player) const { + SPIEL_DCHECK_TRUE(player == Player{0} || player == Player{1}); + + for (const auto& move : moves_) { + if (move.player == player && + absl::holds_alternative(move.action) && + absl::get(move.action).ship.id == ship.id) { + return true; + } + } + return false; +} + +Ship BattleshipState::NextShipToPlace(const Player player) const { + SPIEL_DCHECK_TRUE(player == Player{0} || player == Player{1}); + + const BattleshipConfiguration& conf = bs_game_->conf; + const auto next_ship = std::find_if_not( + conf.ships.begin(), conf.ships.end(), [this, player](const Ship& ship) { + return this->IsShipPlaced(ship, player); + }); + + SPIEL_DCHECK_TRUE(next_ship != conf.ships.end()); + return *next_ship; +} + +ShipPlacement BattleshipState::FindShipPlacement(const Ship& ship, + const Player player) const { + SPIEL_DCHECK_TRUE(player == Player{0} || player == Player{1}); + + // NOTE: for now, this function is intended to be called only after all the + // ships have been placed. + SPIEL_DCHECK_TRUE(AllShipsPlaced()); + + // We iterate through the moves of the player, filtering those that belong + // to the requested one. We match ships based on their unique id. + for (const auto& move : moves_) { + if (move.player == player && + absl::holds_alternative(move.action)) { + const ShipPlacement& placement = absl::get(move.action); + if (placement.ship.id == ship.id) { + return placement; + } + } + } + + SpielFatalError("Unreachable"); +} + +bool BattleshipState::PlacementDoesNotOverlap(const ShipPlacement& proposed, + const Player player) const { + const BattleshipConfiguration& conf = bs_game_->conf; + + SPIEL_CHECK_GE(proposed.TopLeftCorner().row, 0); + SPIEL_CHECK_LT(proposed.TopLeftCorner().row, conf.board_height); + SPIEL_CHECK_GE(proposed.TopLeftCorner().col, 0); + SPIEL_CHECK_LT(proposed.TopLeftCorner().col, conf.board_width); + + SPIEL_CHECK_GE(proposed.BottomRightCorner().row, 0); + SPIEL_CHECK_LT(proposed.BottomRightCorner().row, conf.board_height); + SPIEL_CHECK_GE(proposed.BottomRightCorner().col, 0); + SPIEL_CHECK_LT(proposed.BottomRightCorner().col, conf.board_width); + + for (const auto& move : moves_) { + if (move.player == player && + absl::holds_alternative(move.action)) { + const ShipPlacement& prior_placement = + absl::get(move.action); + + if (proposed.OverlapsWith(prior_placement)) { + return false; + } + } + } + return true; +} + +bool BattleshipState::DidShipSink(const Ship& ship, const Player player) const { + SPIEL_DCHECK_TRUE(player == Player{0} || player == Player{1}); + + // NOTE: for now, this function is intended to be called only after all the + // ships have been placed. + SPIEL_DCHECK_TRUE(AllShipsPlaced()); + + const BattleshipConfiguration& conf = bs_game_->conf; + + // We go through the history of shots by the opponent, and filter those that + // intersect with the ship. + std::vector hits; + const ShipPlacement placement = FindShipPlacement(ship, player); + for (const auto& move : moves_) { + if (move.player != player && absl::holds_alternative(move.action)) { + const Shot& shot = absl::get(move.action); + if (placement.CoversCell(shot)) { + hits.push_back(shot); + } + } + } + + // We need to account for the possibility that the opponent hit the same + // cell more than once, when `allow_repeated_shots = true`. For this, we + // de-duplicate the vector of hits. + std::sort(hits.begin(), hits.end()); + const auto new_end = std::unique(hits.begin(), hits.end()); + SPIEL_CHECK_TRUE(new_end == hits.end() || conf.allow_repeated_shots); + + const size_t num_unique_shots = std::distance(hits.begin(), new_end); + SPIEL_DCHECK_LE(num_unique_shots, ship.length); + + return num_unique_shots == ship.length; +} + +bool BattleshipState::AllPlayersShipsSank(const Player player) const { + SPIEL_DCHECK_TRUE(player == Player{0} || player == Player{1}); + + const BattleshipConfiguration& conf = bs_game_->conf; + + for (const Ship& ship : conf.ships) { + if (!DidShipSink(ship, player)) return false; + } + return true; +} + +bool BattleshipState::AlreadyShot(const Shot& shot, const Player player) const { + SPIEL_DCHECK_TRUE(player == Player{0} || player == Player{1}); + + return std::find_if(moves_.begin(), moves_.end(), + [player, shot](const GameMove& move) { + return move.player == player && + absl::holds_alternative(move.action) && + absl::get(move.action) == shot; + }) != moves_.end(); +} + +// Facts about the game +// +// NOTE: The utility type is overridden in the game constructor and set to +// `kZeroSum` when the loss multiplier is 1.0. +const GameType kGameType{ + /* short_name = */ "battleship", + /* long_name = */ "Battleship", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /* max_num_players = */ 2, + /* min_num_players = */ 2, + /* provides_information_state_string = */ true, + /* provides_information_state_tensor = */ true, + /* provides_observation_string = */ true, + /* provides_observation_tensor = */ false, + /* parameter_specification = */ + {{"board_width", GameParameter(kDefaultBoardWidth)}, + {"board_height", GameParameter(kDefaultBoardHeight)}, + {"ship_sizes", GameParameter(kDefaultShipSizes)}, + {"ship_values", GameParameter(kDefaultShipValues)}, + {"num_shots", GameParameter(kDefaultNumShots)}, + {"allow_repeated_shots", GameParameter(kDefaultAllowRepeatedShots)}, + {"loss_multiplier", GameParameter(kDefaultLossMultiplier)}}}; + +constexpr int kMaxDimension = 10; + +std::shared_ptr Factory(const GameParameters& params) { + return std::make_shared(params); +} +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +BattleshipGame::BattleshipGame(const GameParameters& params) + : Game(kGameType, params) { + conf.board_width = ParameterValue("board_width"); + SPIEL_CHECK_GE(conf.board_width, 0); + SPIEL_CHECK_LE(conf.board_width, kMaxDimension); + + conf.board_height = ParameterValue("board_height"); + SPIEL_CHECK_GE(conf.board_height, 0); + SPIEL_CHECK_LE(conf.board_height, kMaxDimension); + + // NOTE: It is *very* important to clone ship_sizes and ship_values onto the + // stack, otherwise we would run into undefined behavior without + // warning, because ParameterValue() returns a temporary without + // storage, and absl::string_view would amount to a fat pointer to a + // temporary. + const std::string ship_sizes_param_str = + ParameterValue("ship_sizes"); + const std::string ship_values_param_str = + ParameterValue("ship_values"); + + // First, we check that the list starts with '[' and ends with ']'. + absl::string_view ship_sizes_param = + absl::StripAsciiWhitespace(ship_sizes_param_str); + SPIEL_CHECK_TRUE(absl::ConsumePrefix(&ship_sizes_param, "[")); + SPIEL_CHECK_TRUE(absl::ConsumeSuffix(&ship_sizes_param, "]")); + + absl::string_view ship_values_param = + absl::StripAsciiWhitespace(ship_values_param_str); + SPIEL_CHECK_TRUE(absl::ConsumePrefix(&ship_values_param, "[")); + SPIEL_CHECK_TRUE(absl::ConsumeSuffix(&ship_values_param, "]")); + + const std::vector ship_sizes = + absl::StrSplit(ship_sizes_param, ';'); + const std::vector ship_values = + absl::StrSplit(ship_values_param, ';'); + SPIEL_CHECK_EQ(ship_sizes.size(), ship_values.size()); + + for (size_t ship_index = 0; ship_index < ship_sizes.size(); ++ship_index) { + Ship ship; + ship.id = ship_index; + + SPIEL_CHECK_TRUE(absl::SimpleAtoi(ship_sizes.at(ship_index), &ship.length)); + SPIEL_CHECK_TRUE(absl::SimpleAtod(ship_values.at(ship_index), &ship.value)); + + SPIEL_CHECK_TRUE(ship.length <= conf.board_width || + ship.length <= conf.board_height); + SPIEL_CHECK_GE(ship.value, 0.0); + + conf.ships.push_back(ship); + } + SPIEL_CHECK_GT(conf.ships.size(), 0); + + // XXX(gfarina): The next restriction is not really intrinsic in the game, + // but we need it to pretty print the board status in + // `ObservationString`, since we use ASCII letters (a-z) to identify the + // ships. + SPIEL_CHECK_LE(conf.ships.size(), 26); + + std::vector partial_placement; + if (!ExistsFeasiblePlacement(conf, &partial_placement)) { + SpielFatalError( + "Battleship: it is NOT possible to fit all the ships on the " + "board!"); + } + + conf.num_shots = ParameterValue("num_shots"); + SPIEL_CHECK_GT(conf.num_shots, 0); + + conf.allow_repeated_shots = ParameterValue("allow_repeated_shots"); + if (!conf.allow_repeated_shots) { + SPIEL_CHECK_LE(conf.num_shots, conf.board_width * conf.board_height); + } + + conf.loss_multiplier = ParameterValue("loss_multiplier"); + + if (std::abs(conf.loss_multiplier - 1.0) < kFloatTolerance) { + game_type_.utility = GameType::Utility::kZeroSum; + } +} + +int BattleshipGame::NumDistinctActions() const { + // See comment about (de)serialization of actions in `BattleshipGame`. + return 3 * conf.board_width * conf.board_height; +} + +std::unique_ptr BattleshipGame::NewInitialState() const { + const auto ptr = + std::dynamic_pointer_cast(shared_from_this()); + return std::make_unique(ptr); +} + +double BattleshipGame::MinUtility() const { + // The final payoff is a sum of values of ships we destroyed, minus sum of + // our own destroyed ships multiplied by the loss multiplier. + // + // So, here we take the worst possible case: we destroy no ship and all of + // our ships are destroyed. + // + // Note: the implementation below is only correct if the ship values are >= + // 0. That condition is checked at game construction time. However, we allow + // for a negative loss_multiplier. + double min_utility = 0.0; + if (conf.loss_multiplier > 0.0) { + for (const Ship& ship : conf.ships) { + SPIEL_DCHECK_GE(ship.value, 0.0); + min_utility -= conf.loss_multiplier * ship.value; + } + } + + return min_utility; +} + +double BattleshipGame::MaxUtility() const { + // The final payoff is a sum of values of ships we destroyed, minus sum of + // our own destroyed ships multiplied by the loss multiplier. + // + // So, here we take the best possible case: we destroy all of the opponent's + // ship and have none of ours sunk. + // + // Note: the implementation below is only correct if the ship values are >= + // 0. That condition is checked at game construction time. However, we allow + // for a negative loss_multiplier. + double max_utility = 0.0; + for (const Ship& ship : conf.ships) { + SPIEL_DCHECK_GE(ship.value, 0.0); + max_utility += ship.value; + } + + if (conf.loss_multiplier < 0.0) { + max_utility *= (1.0 - conf.loss_multiplier); + } + + return max_utility; +} + +absl::optional BattleshipGame::UtilitySum() const { + if (std::abs(conf.loss_multiplier - 1.0) < kFloatTolerance) { + return 0.0; + } else { + return absl::nullopt; + } +} + +int BattleshipGame::MaxGameLength() const { + // Each player has to place their ships, plus potentially as many turns as + // the number of shots + return 2 * (conf.ships.size() + conf.num_shots); +} + +std::vector BattleshipGame::InformationStateTensorShape() const { + // The information set is a sequence of placements followed by a + // a sequence of shots. + // + // Each placement has: + // - two bits for one-hot horizontal/vertical + // - rows bits for one-hot row + // - cols bits for one-hot col + const int bits_for_placement = conf.ships.size() * + (2 + conf.board_height + conf.board_width); + + // Each shot has: + // - two bits for the one-hot player + // - three bits for one-hot W/H/S + // - rows bits for the one-hot row + // - cols bits for the one-hot col + const int bits_for_shots = conf.num_shots * NumPlayers() * + (2 + 3 + conf.board_height + conf.board_width); + + // 1 bit for terminal?, 2 bits each for observing player and current player + return {1 + NumPlayers() + NumPlayers() + + bits_for_placement + bits_for_shots}; +} + +std::string BattleshipGame::ActionToString(Player player, + Action action_id) const { + SPIEL_DCHECK_TRUE(player == Player{0} || player == Player{1}); + + const absl::variant action = + DeserializeAction(action_id); + + if (absl::holds_alternative(action)) { + const Shot& shot = absl::get(action); + return absl::StrCat("Pl", player, ": shoot at (", shot.row, ", ", shot.col, + ")"); + } else { + SPIEL_DCHECK_TRUE(absl::holds_alternative(action)); + const CellAndDirection& cell_and_dir = absl::get(action); + absl::string_view direction_str; + if (cell_and_dir.direction == CellAndDirection::Direction::Horizontal) { + direction_str = "horizontally"; + } else { + SPIEL_DCHECK_EQ(cell_and_dir.direction, + CellAndDirection::Direction::Vertical); + direction_str = "vertically"; + } + + return absl::StrCat("Pl", player, ": place ship ", direction_str, + " with top-left corner in (", + cell_and_dir.TopLeftCorner().row, ", ", + cell_and_dir.TopLeftCorner().col, ")"); + } +} + +Action BattleshipGame::SerializeShipPlacementAction( + const CellAndDirection& cell_and_dir) const { + SPIEL_CHECK_GE(cell_and_dir.TopLeftCorner().row, 0); + SPIEL_CHECK_GE(cell_and_dir.TopLeftCorner().col, 0); + SPIEL_CHECK_LT(cell_and_dir.TopLeftCorner().row, conf.board_height); + SPIEL_CHECK_LT(cell_and_dir.TopLeftCorner().col, conf.board_width); + + Action shift = 0; + if (cell_and_dir.direction == CellAndDirection::Direction::Horizontal) { + shift = conf.board_width * conf.board_height; + } else { + SPIEL_DCHECK_EQ(cell_and_dir.direction, + CellAndDirection::Direction::Vertical); + shift = 2 * conf.board_width * conf.board_height; + } + + return shift + SerializeShotAction(cell_and_dir.TopLeftCorner()); +} + +Action BattleshipGame::SerializeShotAction(const Shot& shot) const { + SPIEL_CHECK_GE(shot.row, 0); + SPIEL_CHECK_GE(shot.col, 0); + SPIEL_CHECK_LT(shot.row, conf.board_height); + SPIEL_CHECK_LT(shot.col, conf.board_width); + + return shot.row * conf.board_width + shot.col; +} + +absl::variant BattleshipGame::DeserializeAction( + const Action action_id) const { + SPIEL_CHECK_GE(action_id, 0); + SPIEL_CHECK_LT(action_id, NumDistinctActions()); + + if (action_id >= conf.board_width * conf.board_height) { + // If we are here, the action_id represents a `CellAndDirection`. + return DeserializeShipPlacementAction(action_id); + } else { + // Otherwise, the action_id is a `Shot`. + return DeserializeShotAction(action_id); + } +} + +CellAndDirection BattleshipGame::DeserializeShipPlacementAction( + const Action action_id) const { + SPIEL_DCHECK_GE(action_id, conf.board_width * conf.board_height); + SPIEL_DCHECK_LT(action_id, 3 * conf.board_width * conf.board_height); + + CellAndDirection::Direction direction; + Cell tl_corner; + if (action_id >= 2 * conf.board_width * conf.board_height) { + direction = CellAndDirection::Direction::Vertical; + tl_corner = DeserializeShotAction(action_id - + 2 * conf.board_width * conf.board_height); + } else { + direction = CellAndDirection::Direction::Horizontal; + tl_corner = + DeserializeShotAction(action_id - conf.board_width * conf.board_height); + } + + return CellAndDirection(/* direction */ direction, + /* tl_corner = */ tl_corner); +} + +Shot BattleshipGame::DeserializeShotAction(const Action action_id) const { + SPIEL_DCHECK_GE(action_id, 0); + SPIEL_DCHECK_LT(action_id, conf.board_width * conf.board_height); + return Shot{/* row = */ static_cast(action_id / conf.board_width), + /* col = */ static_cast(action_id % conf.board_width)}; +} + +} // namespace battleship +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/battleship/battleship.h b/scenarios/bargaining/open_spiel/open_spiel/games/battleship/battleship.h new file mode 100644 index 0000000..6cf45df --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/battleship/battleship.h @@ -0,0 +1,402 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Parametric two-player Battleship game, as introduced in [1]. It is inspired +// by the famous [board game][wikipedia]. +// +// +// Game dynamics +// ============= +// +// > The following description is loosely taken from the +// > [Wikipedia page][wikipedia] for the Battlship game. +// +// The game is played on two grids, one for each player. The grids have equal +// size, specifically `board_height` rows and `board_width` columns, where +// `board_height` and `board_width` are parameters passed to the generator. +// +// [wikipedia]: https://en.wikipedia.org/wiki/Battleship_(game) +// +// The game has two phases: ship placement and war: +// +// Ship placement phase +// -------------------- +// +// During the ship placement phase, each player secretly arranges their ships +// on their grid. The players alternate placing one ship at a time, without +// revealing the ships' positions. +// +// Each ship occupies a number of consecutive squares on the grid, arranged +// either horizontally or vertically. The number of squares for each ship is +// determined by the type of the ship. The ships cannot overlap (i.e., only one +// ship can occupy any given square in the grid). The lengths and values of +// ships are the same for each player. +// +// The number of ships, as well as their lengths and values are parameters that +// can be specified at game generation time. +// +// War phase +// --------- +// +// After the ships have been positioned, the game proceeds with `num_rounds` +// rounds. In each round, each player (starting from Player 1) takes a turn to +// announce a target square in the opponent's grid which is to be shot at. +// Depending on the flags passed in to the generator, a player might or might +// not be able to shoot in a previously-selected position (default: no). +// +// The opponent announces whether or not the square is occupied by a ship. When +// all of the squares of a ship have been hit, the ship's owner announce the +// sinking of the ship. +// +// If all of a player's ships have been sunk, the game is over. +// +// Payoff computation +// ------------------ +// +// The game payoffs are computed based on the following two quantities: +// +// - `damage_pl1`: this is the sum of values of Player 1's ships that have been +// sunk by Player 2. +// - `damage_pl2`: this is the sum of values of Player 2's ships that have been +// sunk by Player 1. +// +// The payoff for Player 1 is computed as `damage_pl2 - loss_multiplier * +// damage_pl1`, while the payoff for Player 2 is symmetrically computed as +// `damage_pl1 - loss_multiplier * damage_pl2`, where `loss_multiplier` is a +// parameter to the generator. +// +// When `loss_multiplier = 1`, the game is zero-sum. +// Note that currently no partial credit is awarded to a player that hit but did +// not sink a ship. +// +// Game size +// --------- +// +// The number of states / information sets / sequences in Battleship is +// exponential in the number of cells on the boards, as well as in the number of +// turns. +// +// For example, on a 10x10 board, each player has 100^{number of turns} possible +// actions. On the game with default parameters (see below), the number of +// states is in the order of (at least) 10^200. +// +// +// Even apparently small games are very complex and exhibit a large number of +// states. For example, using allow_repeated_shots = False: +// +// +-------+-------+-------+-----------------+----------------+----------+ +// | Grid | Shots | Ship | Num sequences | Num infosets | Terminal | +// | | | sizes | pl 0 | pl 1 | pl 0 | pl 1 | states | +// +-------+-------+-------+--------+--------+-------+--------+----------+ +// | 2 x 2 | 2 | [1] | 165 | 341 | 53 | 109 | 1072 | +// | 2 x 2 | 3 | [1] | 741 | 917 | 341 | 397 | 2224 | +// | 2 x 2 | 2 | [1;2] | 1197 | 3597 | 397 | 1189 | 9216 | +// | 2 x 2 | 3 | [1;2] | 13485 | 22029 | 6541 | 10405 | 32256 | +// +-------+-------+-------+--------+--------+-------+--------+----------+ +// | 2 x 3 | 2 | [1] | 943 | 3787 | 187 | 751 | 19116 | +// | 2 x 3 | 3 | [1] | 15343 | 46987 | 3787 | 11551 | 191916 | +// | 2 x 3 | 4 | [1] | 144943 | 306187 | 46987 | 97951 | 969516 | +// +-------+-------+-------+--------+--------+-------+--------+----------+ +// +// +// +// Game parameters +// =============== +// +// "board_width" int Number of columns of the game board for +// each player (default = 10) +// "board_height" int Number of rows of the game board for +// each player (default = 10) +// "ship_sizes" [int] Length of the ships each player has +// (default = [2;3;3;4;5]) +// "ship_values" [double] Value of the ships each player has +// (default = [1;1;1;1;1]) +// "num_shots" int Number of shots available to each +// player (default = 50) +// "allow_repeated_shots" bool If false, the players will be prevented +// from shooting multiple times at the same +// cell of the board (default = true) +// "loss_multiplier" double Loss multiplier (see above). The game is +// zero-sum iff the loss multiplier is 1.0 +// (default = 1.0) +// +// NOTE: The list parameters must be supplied as a string of semicolon-separated +// values, wrapped in square brackets. For example: "[1;2]" is a list with +// elements `1` and `2`. "[1]" is a list with only one element. +// +// References +// ========== +// +// [1]: +// https://papers.nips.cc/paper/9122-correlation-in-extensive-form-games-saddle-point-formulation-and-benchmarks.pdf +// +// If you want to reference the paper that introduced the benchmark game, here +// is a Bibtex citation: +// +// ``` +// @inproceedings{Farina19:Correlation, +// title= {Correlation in Extensive-Form Games: Saddle-Point Formulation +// and Benchmarks}, +// author= {Farina, Gabriele and Ling, Chun Kai and Fang, Fei and +// Sandholm, Tuomas}, +// booktitle={Conference on Neural Information Processing Systems +// (NeurIPS)}, +// year={2019} +// } +// ``` + +#ifndef OPEN_SPIEL_GAMES_BATTLESHIP_H_ +#define OPEN_SPIEL_GAMES_BATTLESHIP_H_ + +#include +#include + +#include "open_spiel/games/battleship/battleship_types.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace battleship { + +inline constexpr int kDefaultBoardWidth = 10; +inline constexpr int kDefaultBoardHeight = 10; +inline constexpr const char* kDefaultShipSizes = "[2;3;3;4;5]"; +inline constexpr const char* kDefaultShipValues = "[1.0;1.0;1.0;1.0;1.0]"; +inline constexpr int kDefaultNumShots = 50; +inline constexpr bool kDefaultAllowRepeatedShots = true; +inline constexpr double kDefaultLossMultiplier = 1.0; + +class BattleshipGame final : public Game { + public: + explicit BattleshipGame(const GameParameters& params); + + // Virtual functions inherited by OpenSpiel's `Game` interface + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override; + int MaxChanceOutcomes() const override { return 0; } + int NumPlayers() const override { return 2; } + double MinUtility() const override; + double MaxUtility() const override; + absl::optional UtilitySum() const override; + int MaxGameLength() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::vector InformationStateTensorShape() const override; + + // Action (de)serialization routines + // ================================= + // + // A cell (r, c) is serialized to action_id r * board_width + c. + // A ship placement with top-left corner (r, c) and direction d is serialized + // as follows: + // * If d is horizontal, then we serialize cell (r, c) and add shift + // board_width * board_height. + // * If d is vertical, then we serialize cell (r, c) and add shift + // 2 * board_width * board_height. + // Since the ship placement action serialization does not depend on the + // specific ship that is being placed, the serialization/deserialization + // routines take in and return `CellAndDirection` objects. + // + // This means that the highes possible action_id is + // 3 * board_width * board_height + + // Converts a `ShipPlacement` action into a unique action_id, as required + // by OpenSpiel's interface. + // + // See above for details about our serialization scheme. + Action SerializeShipPlacementAction( + const CellAndDirection& cell_and_dir) const; + + // Converts a `Shot` action into a unique action_id, as required + // by OpenSpiel's interface. + Action SerializeShotAction(const Shot& shot) const; + + // Converts an action id to the action (Ship Placement or Shot) it represents. + absl::variant DeserializeAction( + const Action action_id) const; + + // Members + // ======= + + BattleshipConfiguration conf; + + private: + // Converts an action_id to a `CellAndDirection` action. + // + // See above for details about our serialization scheme. + CellAndDirection DeserializeShipPlacementAction(const Action action_id) const; + + // Converts an action_id to a `Shot` action. + // + // See above for details about our serialization scheme. + Shot DeserializeShotAction(const Action action_id) const; +}; + +class BattleshipState final : public State { + public: + explicit BattleshipState(const std::shared_ptr bs_game); + ~BattleshipState() = default; + + // Virtual functions inherited by OpenSpiel's `State` interface + Player CurrentPlayer() const override; + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::unique_ptr Clone() const override; + std::string InformationStateString(Player player) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + std::string ObservationString(Player player) const override; + void UndoAction(Player player, Action action_id) override; + + // Draws the board of a player. + // + // The board is drawn as a rectangular grid of characters, with the following + // conventions: + // - Ships are identified with letters, starting from 'a'. For each ship, we + // mark all of the cells it occupies with the same letter. + // - Lowercase letters denote that the cell was never hit by the opponent. + // - Uppercase letters denote that the cell was hit by the opponent. + // - Cells marked with '*' denote shots by the opponent, that hit water. + // - All other cells are empty, that is, filled with a space ' ' character. + // + // + // Example + // ------- + // + // This is what a typical 3x6 board string might looks like after 4 shots by + // the opponent. + // + // ``` + // +------+ + // |*a | + // | A* | + // | bbB| + // +------+ + // ``` + std::string OwnBoardString(const Player player) const; + + // Draws the state of the player's shots for far. + // + // This corresponds to the incremental board that the player builds over time + // by shooting at the opponent. + // + // The board is drawn as a rectangular grid of characters, with the following + // conventions: + // - Shots that hit a ship are marked with '#'. + // - Shots that hit the water are marked with '@'. + // - All other cells are empty, that is, filled with a space ' ' character. + // + // + // Example + // ------- + // + // This is what the opponent player to the example provided in + // `OwnBoardString` will see: + // + // ``` + // +------+ + // |@ | + // | #@ | + // | #| + // +------+ + // ``` + std::string ShotsBoardString(const Player player) const; + + // Outputs a pretty representation of the boards of the two players. + std::string ToPrettyString() const; + + protected: + void DoApplyAction(Action action_id) override; + + private: + // Ship placement methods + // ====================== + + // Returns how many ships have been placed collectively by the two player. + int NumShipsPlaced() const; + + // Checks whether both players have placed all of their ships. + bool AllShipsPlaced() const; + + // Checks whether the given ship has already been placed on the board by the + // given player. + bool IsShipPlaced(const Ship& ship, const Player player) const; + + // Returns the ship that the given player should place on the board next. + // + // Ships are placed in the order they are defined in the + // `BattleshipConfiguration` object. + // + // NOTE: assumes (and checks in debug mode) that the player has not yet placed + // all of their ships on the board. + Ship NextShipToPlace(const Player player) const; + + // Returns the placement associated with the given ship of the given player. + // + // NOTE: assumes (and checks in debug mode) that the player has already placed + // the ship on the board. + ShipPlacement FindShipPlacement(const Ship& ship, const Player player) const; + + // Checks whether the proposed ship placement would overlap with the ships + // that the player has placed so far. + bool PlacementDoesNotOverlap(const ShipPlacement& proposed, + const Player player) const; + + // Sunken ship methods + // =================== + + // Checks whether the given ship of the given player has been sunk by the + // opponent. + // + // NOTE: assumes (and checks in debug mode) that *all* ships (of both players) + // have already been placed. + bool DidShipSink(const Ship& ship, const Player player) const; + + // Checks whether all of the given player's ships have been sunk by the + // opponent. + // + // NOTE: assumes (and checks in debug mode) that *all* ships (of both players) + // have already been placed. + bool AllPlayersShipsSank(const Player player) const; + + // Shot methods + // ============ + + // Checks whether the given player has already shot the given cell. + bool AlreadyShot(const Shot& shot, const Player player) const; + + // Members + // ======= + + // In addition to OpenSpiel's `game` pointer, which is of type `Game`, we also + // store a more specialized `bs_game_` back-pointer to the Battleship game + // that generated the state. + // + // This is useful to avoid having to dynamic cast game_ to retrieve the + // `BattleshipConfiguration` object. + std::shared_ptr bs_game_; + + // In addition to OpenSpiel's `history_` protected member defined in `State`, + // which is a vector of serialized action numbers, we store a friendlier + // representation of moves that happened so far. + // + // The two representations will always be in sync. + std::vector moves_; +}; + +} // namespace battleship +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_BATTLESHIP_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/battleship/battleship_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/battleship/battleship_test.cc new file mode 100644 index 0000000..aaea85d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/battleship/battleship_test.cc @@ -0,0 +1,457 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_set.h" +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/algorithms/expected_returns.h" +#include "open_spiel/algorithms/get_all_states.h" +#include "open_spiel/algorithms/tabular_exploitability.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +ABSL_FLAG(int, num_sims, 20, "Number of simulations in the basic tests."); +ABSL_FLAG(bool, enable_game_sizes_test, false, + "Whether to test the game sizes."); + +namespace open_spiel { +namespace battleship { +namespace { + +namespace testing = open_spiel::testing; + +void BasicBattleshipTest() { + // Some basic tests on a small 2x2 instance. + for (int num_shots = 1; num_shots <= 3; ++num_shots) { + const std::shared_ptr game = + LoadGame("battleship", {{"board_width", GameParameter(2)}, + {"board_height", GameParameter(2)}, + {"ship_sizes", GameParameter("[1;2]")}, + {"ship_values", GameParameter("[1;2]")}, + {"num_shots", GameParameter(num_shots)}, + {"allow_repeated_shots", GameParameter(false)}, + {"loss_multiplier", GameParameter(2.0)}}); + testing::RandomSimTestWithUndo(*game, absl::GetFlag(FLAGS_num_sims)); + testing::NoChanceOutcomesTest(*game); + } +} + +void RandomTestsOnLargeBoards() { + // Allow repeated shots. + std::shared_ptr game = + LoadGame("battleship", {{"board_width", GameParameter(10)}, + {"board_height", GameParameter(10)}, + {"ship_sizes", GameParameter("[2;3;3;4;5]")}, + {"ship_values", GameParameter("[1;1;1;1;1]")}, + {"num_shots", GameParameter(50)}, + {"allow_repeated_shots", GameParameter(true)}, + {"loss_multiplier", GameParameter(1.0)}}); + testing::NoChanceOutcomesTest(*game); + testing::RandomSimTestWithUndo(*game, absl::GetFlag(FLAGS_num_sims)); + + // Repeated shots not allowed. + game = LoadGame("battleship", {{"board_width", GameParameter(10)}, + {"board_height", GameParameter(10)}, + {"ship_sizes", GameParameter("[2;3;3;4;5]")}, + {"ship_values", GameParameter("[1;1;1;1;1]")}, + {"num_shots", GameParameter(50)}, + {"allow_repeated_shots", GameParameter(false)}, + {"loss_multiplier", GameParameter(1.0)}}); + testing::NoChanceOutcomesTest(*game); + testing::RandomSimTestWithUndo(*game, absl::GetFlag(FLAGS_num_sims)); +} + +void TestZeroSumTrait() { + // We check that when the loss multiplier is 1.0, the game is registered as + // zero sum. + std::shared_ptr game = + LoadGame("battleship", {{"loss_multiplier", GameParameter(2.0)}}); + SPIEL_CHECK_EQ(game->GetType().utility, GameType::Utility::kGeneralSum); + + game = LoadGame("battleship", {{"loss_multiplier", GameParameter(1.0)}}); + SPIEL_CHECK_EQ(game->GetType().utility, GameType::Utility::kZeroSum); +} + +void TestTightLayout1() { + // We construct a 4x1 grid with 2 ships of length 2 each. We want to make sure + // that the the first ship is not placed at the center of the board. + + const std::shared_ptr game = + LoadGame("battleship", {{"board_width", GameParameter(4)}, + {"board_height", GameParameter(1)}, + {"ship_sizes", GameParameter("[2;2]")}, + {"ship_values", GameParameter("[1;1]")}}); + std::unique_ptr state = game->NewInitialState(); + SPIEL_CHECK_EQ(state->CurrentPlayer(), Player{0}); + { + const std::vector actions = state->LegalActions(); + SPIEL_CHECK_EQ(actions, std::vector({4, 6})); + SPIEL_CHECK_EQ( + state->ActionToString(actions[0]), + "Pl0: place ship horizontally with top-left corner in (0, 0)"); + SPIEL_CHECK_EQ( + state->ActionToString(actions[1]), + "Pl0: place ship horizontally with top-left corner in (0, 2)"); + } + + // We now place the first ship to the left, which corresponds to the + // serialized id 4 as checked above. + state->ApplyAction(4); + + // We repeat the check for Player 1. + SPIEL_CHECK_EQ(state->CurrentPlayer(), Player{1}); + { + const std::vector actions = state->LegalActions(); + SPIEL_CHECK_EQ(actions, std::vector({4, 6})); + SPIEL_CHECK_EQ( + state->ActionToString(actions[0]), + "Pl1: place ship horizontally with top-left corner in (0, 0)"); + SPIEL_CHECK_EQ( + state->ActionToString(actions[1]), + "Pl1: place ship horizontally with top-left corner in (0, 2)"); + } + + // We place Player 1's ship to the right. + state->ApplyAction(Action{6}); + + // Now, we need to check that the only remaining action for Player 0 is to + // place the second ship to the right. + SPIEL_CHECK_EQ(state->CurrentPlayer(), Player{0}); + { + const std::vector actions = state->LegalActions(); + SPIEL_CHECK_EQ(actions, std::vector({6})); + SPIEL_CHECK_EQ( + state->ActionToString(actions[0]), + "Pl0: place ship horizontally with top-left corner in (0, 2)"); + } + state->ApplyAction(Action{6}); + + //... While for Player 1 the only remaining action is to place the ship to the + // left. + SPIEL_CHECK_EQ(state->CurrentPlayer(), Player{1}); + { + const std::vector actions = state->LegalActions(); + SPIEL_CHECK_EQ(actions, std::vector({4})); + SPIEL_CHECK_EQ( + state->ActionToString(actions[0]), + "Pl1: place ship horizontally with top-left corner in (0, 0)"); + } + state->ApplyAction(Action{4}); + + SPIEL_CHECK_FALSE(state->IsTerminal()); +} + +void TestTightLayout2() { + // We construct a 2x3 grid with 2 ships of length 2 and 3 respectively. We + // want to make sure that the the first ship is not placed anywhere + // vertically. + + const std::shared_ptr game = + LoadGame("battleship", {{"board_width", GameParameter(3)}, + {"board_height", GameParameter(2)}, + {"ship_sizes", GameParameter("[2;3]")}, + {"ship_values", GameParameter("[1;1]")}}); + std::unique_ptr state = game->NewInitialState(); + SPIEL_CHECK_EQ(state->CurrentPlayer(), Player{0}); + { + const std::vector actions = state->LegalActions(); + SPIEL_CHECK_EQ(actions, std::vector({6, 7, 9, 10})); + + SPIEL_CHECK_EQ( + state->ActionToString(actions[0]), + "Pl0: place ship horizontally with top-left corner in (0, 0)"); + SPIEL_CHECK_EQ( + state->ActionToString(actions[1]), + "Pl0: place ship horizontally with top-left corner in (0, 1)"); + SPIEL_CHECK_EQ( + state->ActionToString(actions[2]), + "Pl0: place ship horizontally with top-left corner in (1, 0)"); + SPIEL_CHECK_EQ( + state->ActionToString(actions[3]), + "Pl0: place ship horizontally with top-left corner in (1, 1)"); + } + + // We now place the first ship to the right on the first row, which + // corresponds to the serialized index 1 as checked above. + state->ApplyAction(Action{7}); + + // We repeat the check for Player 1. + SPIEL_CHECK_EQ(state->CurrentPlayer(), Player{1}); + { + const std::vector actions = state->LegalActions(); + SPIEL_CHECK_EQ(actions, std::vector({6, 7, 9, 10})); + + SPIEL_CHECK_EQ( + state->ActionToString(actions[0]), + "Pl1: place ship horizontally with top-left corner in (0, 0)"); + SPIEL_CHECK_EQ( + state->ActionToString(actions[1]), + "Pl1: place ship horizontally with top-left corner in (0, 1)"); + SPIEL_CHECK_EQ( + state->ActionToString(actions[2]), + "Pl1: place ship horizontally with top-left corner in (1, 0)"); + SPIEL_CHECK_EQ( + state->ActionToString(actions[3]), + "Pl1: place ship horizontally with top-left corner in (1, 1)"); + } + + // We place Player 1's ship to the left on the second row. + state->ApplyAction(Action{9}); + + // Now, we need to check that the only remaining action for Player 0 is to + // place the second ship on the second row. + SPIEL_CHECK_EQ(state->CurrentPlayer(), Player{0}); + { + const std::vector actions = state->LegalActions(); + SPIEL_CHECK_EQ(actions, std::vector({9})); + + SPIEL_CHECK_EQ( + state->ActionToString(actions[0]), + "Pl0: place ship horizontally with top-left corner in (1, 0)"); + } + state->ApplyAction(Action{9}); + + //... While for Player 1 the only remaining action is to place the second ship + // on the first row. + SPIEL_CHECK_EQ(state->CurrentPlayer(), Player{1}); + { + const std::vector actions = state->LegalActions(); + SPIEL_CHECK_EQ(actions, std::vector({6})); + + SPIEL_CHECK_EQ( + state->ActionToString(actions[0]), + "Pl1: place ship horizontally with top-left corner in (0, 0)"); + } + state->ApplyAction(Action{6}); + + SPIEL_CHECK_FALSE(state->IsTerminal()); +} + +void TestNashEquilibriumInSmallBoard() { + // We replicate the same setting as page 7 of [1]. + // + // There, each player has a 1x3 board with a single ship of size 1 and value + // 1.0. Each player has two shots available. The loss multiplier is 2.0, so + // this is a *general-sum* game. + // + // The only Nash equilibrium of the game is for all players to place their + // ship at random, and then shoot at random. + // + // + // [1]: + // https://papers.nips.cc/paper/9122-correlation-in-extensive-form-games-saddle-point-formulation-and-benchmarks.pdf#page=7 + + const std::shared_ptr game = + LoadGame("battleship", {{"board_width", GameParameter(3)}, + {"board_height", GameParameter(1)}, + {"ship_sizes", GameParameter("[1]")}, + {"ship_values", GameParameter("[1.0]")}, + {"num_shots", GameParameter(2)}, + {"allow_repeated_shots", GameParameter(false)}, + {"loss_multiplier", GameParameter(2.0)}}); + SPIEL_CHECK_EQ(game->GetType().utility, GameType::Utility::kGeneralSum); + + const TabularPolicy policy = GetUniformPolicy(*game); + const std::vector expected_utilities = algorithms::ExpectedReturns( + *game->NewInitialState(), policy, + /* depth_limit = */ std::numeric_limits::max()); + + // Under the uniformly random policy, we expect that Player 0 and Player 1 + // will sink their opponent's ship with probability 5/9 and 1/3, respectively. + // + // Correspondingly, Player 0's expected utility is 5/9 - 2 * 1/3 = -1/9 (the 2 + // comes from the loss multiplier), while Player 1's expected utility is 1/3 - + // 2 * 5/9 = -7/9. + SPIEL_CHECK_FLOAT_EQ(expected_utilities[Player{0}], -1.0 / 9); + SPIEL_CHECK_FLOAT_EQ(expected_utilities[Player{1}], -7.0 / 9); + + // We check that this joint policy is a best response, by computing the Nash + // gap. + SPIEL_CHECK_FLOAT_NEAR(algorithms::NashConv(*game, policy), 0.0, 1e-9); + + // TODO(gfarina): When OpenSpiel implements algorithms for optimal + // EFCE/EFCCE/NFCCE,finish checking that we exactly replicate the same + // results as [1] in this game. +} + +struct GameSize { + uint32_t num_sequences[2] = {0, 0}; // Layout: [Pl.0, Pl.1]. + uint32_t num_infostates[2] = {0, 0}; // Layout: [Pl.0, Pl.1]. + uint32_t num_terminal_states = 0; +}; + +GameSize ComputeGameSize(const std::shared_ptr game) { + std::map> all_states = + open_spiel::algorithms::GetAllStates( + *game, /* depth_limit = */ std::numeric_limits::max(), + /* include_terminals = */ true, + /* include_chance_states = */ false); + + GameSize size; + + // Account for empty sequence. + size.num_sequences[Player{0}] = 1; + size.num_sequences[Player{1}] = 1; + + absl::flat_hash_set infosets; + for (const auto& [_, state] : all_states) { + if (state->IsTerminal()) { + ++size.num_terminal_states; + } else { + const Player player = state->CurrentPlayer(); + SPIEL_CHECK_TRUE(player == Player{0} || player == Player{1}); + + // NOTE: there is no requirement that infostates strings be unique across + // players. So, we disambiguate the player by prepending it. + const std::string infostate_string = + absl::StrCat(player, state->InformationStateString()); + + if (infosets.insert(infostate_string).second) { + // The infostate string was not present in the hash set. We update the + // tally of infosets and sequences for the player. + size.num_infostates[player] += 1; + size.num_sequences[player] += state->LegalActions().size(); + } + } + } + + return size; +} + +void TestGameSizes() { + // We expect these game sizes when using allow_repeated_shots = False: + // + // +-------+-------+-------+-----------------+----------------+----------+ + // | Grid | Shots | Ship | Num sequences | Num infosets | Terminal | + // | | | sizes | pl 0 | pl 1 | pl 0 | pl 1 | states | + // +-------+-------+-------+--------+--------+-------+--------+----------+ + // | 2 x 2 | 2 | [1] | 165 | 341 | 53 | 109 | 1072 | + // | 2 x 2 | 3 | [1] | 741 | 917 | 341 | 397 | 2224 | + // | 2 x 2 | 2 | [1;2] | 1197 | 3597 | 397 | 1189 | 9216 | + // | 2 x 2 | 3 | [1;2] | 13485 | 22029 | 6541 | 10405 | 32256 | + // +-------+-------+-------+--------+--------+-------+--------+----------+ + // | 2 x 3 | 2 | [1] | 943 | 3787 | 187 | 751 | 19116 | + // | 2 x 3 | 3 | [1] | 15343 | 46987 | 3787 | 11551 | 191916 | + // | 2 x 3 | 4 | [1] | 144943 | 306187 | 46987 | 97951 | 969516 | + // +-------+-------+-------+--------+--------+-------+--------+----------+ + + // To simplify the construction of game instance we introduce a lambda. + // + // Since the value of the ships and the loss multiplier do not affect the game + // size, the lambda fills those parameters with 2 + const auto ConstructInstance = + [](const std::string& grid, const int num_shots, + const std::string& ship_sizes_str) -> std::shared_ptr { + std::vector grid_dimensions = absl::StrSplit(grid, 'x'); + SPIEL_CHECK_EQ(grid_dimensions.size(), 2); + + const GameParameter board_width(std::stoi(grid_dimensions[1])); + const GameParameter board_height(std::stoi(grid_dimensions[0])); + const GameParameter ship_sizes(ship_sizes_str); + + // We reuse the ship sizes as ship values. The values of the ships do not + // affect the game size. + const GameParameter ship_values(ship_sizes_str); + + return LoadGame("battleship", + {{"board_width", board_width}, + {"board_height", board_height}, + {"ship_sizes", ship_sizes}, + {"ship_values", ship_values}, + {"num_shots", GameParameter(num_shots)}, + {"allow_repeated_shots", GameParameter(false)}, + {"loss_multiplier", GameParameter(2.0)}}); + }; + + // 2x2 grid, 2 shots, ships sizes [1]. + GameSize size = ComputeGameSize(ConstructInstance("2x2", 2, "[1]")); + SPIEL_CHECK_EQ(size.num_sequences[Player{0}], 165); + SPIEL_CHECK_EQ(size.num_sequences[Player{1}], 341); + SPIEL_CHECK_EQ(size.num_infostates[Player{0}], 53); + SPIEL_CHECK_EQ(size.num_infostates[Player{1}], 109); + SPIEL_CHECK_EQ(size.num_terminal_states, 1072); + + // 2x2 grid, 3 shots, ships sizes [1]. + size = ComputeGameSize(ConstructInstance("2x2", 3, "[1]")); + SPIEL_CHECK_EQ(size.num_sequences[Player{0}], 741); + SPIEL_CHECK_EQ(size.num_sequences[Player{1}], 917); + SPIEL_CHECK_EQ(size.num_infostates[Player{0}], 341); + SPIEL_CHECK_EQ(size.num_infostates[Player{1}], 397); + SPIEL_CHECK_EQ(size.num_terminal_states, 2224); + + // 2x2 grid, 2 shots, ships sizes [1;2]. + size = ComputeGameSize(ConstructInstance("2x2", 2, "[1;2]")); + SPIEL_CHECK_EQ(size.num_sequences[Player{0}], 1197); + SPIEL_CHECK_EQ(size.num_sequences[Player{1}], 3597); + SPIEL_CHECK_EQ(size.num_infostates[Player{0}], 397); + SPIEL_CHECK_EQ(size.num_infostates[Player{1}], 1189); + SPIEL_CHECK_EQ(size.num_terminal_states, 9216); + + // 2x2 grid, 3 shots, ships sizes [1;2]. + size = ComputeGameSize(ConstructInstance("2x2", 3, "[1;2]")); + SPIEL_CHECK_EQ(size.num_sequences[Player{0}], 13485); + SPIEL_CHECK_EQ(size.num_sequences[Player{1}], 22029); + SPIEL_CHECK_EQ(size.num_infostates[Player{0}], 6541); + SPIEL_CHECK_EQ(size.num_infostates[Player{1}], 10405); + SPIEL_CHECK_EQ(size.num_terminal_states, 32256); + + // 2x3 grid, 2 shots, ships sizes [1]. + size = ComputeGameSize(ConstructInstance("2x3", 2, "[1]")); + SPIEL_CHECK_EQ(size.num_sequences[Player{0}], 943); + SPIEL_CHECK_EQ(size.num_sequences[Player{1}], 3787); + SPIEL_CHECK_EQ(size.num_infostates[Player{0}], 187); + SPIEL_CHECK_EQ(size.num_infostates[Player{1}], 751); + SPIEL_CHECK_EQ(size.num_terminal_states, 19116); + + // 2x3 grid, 3 shots, ships sizes [1]. + size = ComputeGameSize(ConstructInstance("2x3", 3, "[1]")); + SPIEL_CHECK_EQ(size.num_sequences[Player{0}], 15343); + SPIEL_CHECK_EQ(size.num_sequences[Player{1}], 46987); + SPIEL_CHECK_EQ(size.num_infostates[Player{0}], 3787); + SPIEL_CHECK_EQ(size.num_infostates[Player{1}], 11551); + SPIEL_CHECK_EQ(size.num_terminal_states, 191916); + + // 2x2 grid, 4 shots, ships sizes [1]. + size = ComputeGameSize(ConstructInstance("2x3", 4, "[1]")); + SPIEL_CHECK_EQ(size.num_sequences[Player{0}], 144943); + SPIEL_CHECK_EQ(size.num_sequences[Player{1}], 306187); + SPIEL_CHECK_EQ(size.num_infostates[Player{0}], 46987); + SPIEL_CHECK_EQ(size.num_infostates[Player{1}], 97951); + SPIEL_CHECK_EQ(size.num_terminal_states, 969516); +} +} // namespace +} // namespace battleship +} // namespace open_spiel + +int main(int argc, char** argv) { + absl::ParseCommandLine(argc, argv); + + open_spiel::testing::LoadGameTest("battleship"); + open_spiel::battleship::BasicBattleshipTest(); + open_spiel::battleship::RandomTestsOnLargeBoards(); + open_spiel::battleship::TestZeroSumTrait(); + open_spiel::battleship::TestTightLayout1(); + open_spiel::battleship::TestTightLayout2(); + open_spiel::battleship::TestNashEquilibriumInSmallBoard(); + + if (absl::GetFlag(FLAGS_enable_game_sizes_test)) { + open_spiel::battleship::TestGameSizes(); + } +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/battleship/battleship_types.cc b/scenarios/bargaining/open_spiel/open_spiel/games/battleship/battleship_types.cc new file mode 100644 index 0000000..34ee7ea --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/battleship/battleship_types.cc @@ -0,0 +1,177 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/battleship/battleship_types.h" + +namespace open_spiel { +namespace battleship { + +namespace { +bool IsOverlappingPlacement(const std::vector& placement) { + for (int index = 1; index < placement.size(); ++index) { + for (int other = 0; other < index; ++other) { + if (placement.at(index).OverlapsWith(placement.at(other))) { + return true; + } + } + } + + return false; +} +} // namespace + +bool ExistsFeasiblePlacement(const BattleshipConfiguration& conf, + std::vector* partial_placement) { + // Debug-time check of preconditions. + SPIEL_DCHECK_LE(partial_placement->size(), conf.ships.size()); + SPIEL_DCHECK_FALSE(IsOverlappingPlacement(*partial_placement)); + for (int index = 0; index < partial_placement->size(); ++index) { + const ShipPlacement& placement = partial_placement->at(index); + + SPIEL_CHECK_EQ(placement.ship.id, conf.ships.at(index).id); + SPIEL_CHECK_TRUE( + placement.IsWithinBounds(conf.board_width, conf.board_height)); + } + + if (partial_placement->size() == conf.ships.size()) { + // All ships have been placed. The placement is valid because of the + // precondition. + return true; + } else { + // We try to place the next ship in the board. We start by trying to place + // the ship horizontally. + // + // Because of the precondition, partial_placement is a placement of a prefix + // of the ships in conf.ships. Hence, the next ship that needs to be placed + // is simply: + const Ship& ship = conf.ships.at(partial_placement->size()); + + // -- Horizontal placement. + for (int row = 0; row < conf.board_height; ++row) { + for (int col = 0; col < conf.board_width - ship.length + 1; ++col) { + // First, we append the placement of the next ship to the partial + // placement vector. + partial_placement->push_back({ShipPlacement::Direction::Horizontal, + /* ship = */ ship, + /* tl_corner = */ Cell{row, col}}); + if (!IsOverlappingPlacement(*partial_placement) && + ExistsFeasiblePlacement(conf, partial_placement)) { + // The new partial placement led to a solution. We honor the + // postcondition and early-return sucess. + partial_placement->pop_back(); + return true; + } else { + // The new partial placement does not lead to a solution. We remove + // the placement and continue with the next placement. + partial_placement->pop_back(); + } + } + } + + // -- Vertical placement. + for (int row = 0; row < conf.board_height - ship.length + 1; ++row) { + for (int col = 0; col < conf.board_width; ++col) { + // First, we append the placement of the next ship to the partial + // placement vector. + partial_placement->push_back({ShipPlacement::Direction::Vertical, + /* ship = */ ship, + /* tl_corner = */ Cell{row, col}}); + if (!IsOverlappingPlacement(*partial_placement) && + ExistsFeasiblePlacement(conf, partial_placement)) { + // The new partial placement led to a solution. We honor the + // postcondition and early-return sucess. + partial_placement->pop_back(); + return true; + } else { + // The new partial placement does not lead to a solution. We remove + // the placement and continue with the next placement. + partial_placement->pop_back(); + } + } + } + } + + return false; +} + +CellAndDirection::CellAndDirection(const Direction direction, + const Cell& tl_corner) + : direction(direction), tl_corner_(tl_corner) { + SPIEL_CHECK_GE(tl_corner.row, 0); + SPIEL_CHECK_GE(tl_corner.col, 0); +} + +ShipPlacement::ShipPlacement(const Direction dir, const Ship& ship, + const Cell& tl_corner) + : CellAndDirection(dir, tl_corner), ship(ship) { + SPIEL_CHECK_GE(ship.length, 1); +} + +bool ShipPlacement::CoversCell(const Cell& cell) const { + if (direction == Direction::Horizontal) { + return cell.row == tl_corner_.row && cell.col >= tl_corner_.col && + cell.col < tl_corner_.col + ship.length; + } else { + SPIEL_CHECK_EQ(direction, Direction::Vertical); + return cell.col == tl_corner_.col && cell.row >= tl_corner_.row && + cell.row < tl_corner_.row + ship.length; + } +} + +Cell ShipPlacement::BottomRightCorner() const { + if (direction == Direction::Horizontal) { + return Cell{tl_corner_.row, tl_corner_.col + ship.length - 1}; + } else { + SPIEL_CHECK_EQ(direction, Direction::Vertical); + return Cell{tl_corner_.row + ship.length - 1, tl_corner_.col}; + } +} + +bool ShipPlacement::OverlapsWith(const ShipPlacement& other) const { + if (other.BottomRightCorner().row < TopLeftCorner().row) { + // `other` is completely above `this`. + return false; + } else if (other.TopLeftCorner().row > BottomRightCorner().row) { + // `other` is completely below `this`. + return false; + } else if (other.BottomRightCorner().col < TopLeftCorner().col) { + // `other` is completely to the left of `this`. + return false; + } else if (other.TopLeftCorner().col > BottomRightCorner().col) { + // `other` is completely to the right of `this`. + return false; + } + return true; +} + +bool ShipPlacement::IsWithinBounds(const int board_width, + const int board_height) const { + const Cell tl_corner = TopLeftCorner(); + const Cell br_corner = BottomRightCorner(); + + return (tl_corner.row >= 0 && tl_corner.row < board_height) && + (br_corner.row >= 0 && br_corner.row < board_height) && + (tl_corner.col >= 0 && tl_corner.col < board_width) && + (br_corner.col >= 0 && br_corner.col < board_width); +} + +std::string ShipPlacement::ToString() const { + const char direction_char = direction == Direction::Horizontal ? 'h' : 'v'; + + return absl::StrFormat("%c_%d_%d", direction_char, tl_corner_.row, + tl_corner_.col); +} + +} // namespace battleship +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/battleship/battleship_types.h b/scenarios/bargaining/open_spiel/open_spiel/games/battleship/battleship_types.h new file mode 100644 index 0000000..223dfa6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/battleship/battleship_types.h @@ -0,0 +1,149 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_BATTLESHIP_TYPES_H_ +#define OPEN_SPIEL_GAMES_BATTLESHIP_TYPES_H_ + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/types/variant.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace battleship { + +// A cell of a generic player's board. +struct Cell { + int row; + int col; + + std::string ToString() const { return absl::StrFormat("%d_%d", row, col); } + bool operator==(const Cell& other) const { + return row == other.row && col == other.col; + } + bool operator<(const Cell& other) const { + return (row < other.row) || (row == other.row && col < other.col); + } +}; + +// Represents a shot action. We store the coordinates of the cell that was hit. +using Shot = Cell; + +struct Ship { + int id; // Globally unique identifier of the ship + int length; + double value; +}; + +class CellAndDirection { + public: + enum Direction { Horizontal = 0, Vertical = 1 }; + + CellAndDirection(const Direction direction, const Cell& tl_corner); + Cell TopLeftCorner() const { return tl_corner_; } + Direction direction; + + protected: + Cell tl_corner_; +}; + +// Represents the placement of a ship. +// +// Ships can be placed either horizontal or vertical on the board. When the ship +// is placed horizontally, we store the *leftmost* cell of the placement as the +// corner. When the ship is placed vertically, the corner is the *topmost*. +class ShipPlacement final : public CellAndDirection { + public: + using CellAndDirection::Direction; + + ShipPlacement(const Direction direction, const Ship& ship, + const Cell& tl_corner); + + // Returns true if the the ship falls over a specific cell. + bool CoversCell(const Cell& cell) const; + + // Returns the bottom-right corner of the ship when placed according to this + // placement. + Cell BottomRightCorner() const; + + // Checks whether two ship placements intersect on at least one cell. + bool OverlapsWith(const ShipPlacement& other) const; + + // Checkes whether the ship placement fits within a board of given heigth and + // width. + bool IsWithinBounds(const int board_width, const int board_height) const; + + // Gives a string representation of the ship placement, useful to the + // ActionToString method. + // + // For a ship placed horizontally with the top left corner in (2,3), the + // string representation is `h_2_3`. For vertical placements, the first + // character is a `v` instead of an `h`. + std::string ToString() const; + + Ship ship; +}; + +struct GameMove { + Player player; + absl::variant action; +}; + +struct BattleshipConfiguration { + int board_width; + int board_height; + + // It is assumed that each agent has the same set of ships. So, each ship is + // only included once instead of being duplicated for each player. + std::vector ships; + + // Number of shots **each player** can use. + int num_shots; + + // If false, players are forbidden from shooting the same cell more than once. + bool allow_repeated_shots; + + // See the description of the game in `battleship.h` for details of how + // the payoffs of the players are computed. + double loss_multiplier; +}; + +// Returns whether there is still enough space to finish placing ships in a +// partially-filled-in board. +// +// This method receives a vector of current ship placements, and +// returns `true` if and only if there is at least one way to place the +// remaining ship on the board without overlapping ships. +// +// This method is used when deciding the set of placement actions a player has +// a given point in time, as well as to validate at construction time whether +// the given Battleship configuration is feasible. +// +// +// Inductive contract +// ------------------ +// +// The correctness of this method relies on the following inductive contract. +// The preconditions are checked in debug mode. +// - Precondition: partial_placement contains a valid (that is, within the +// bounds of the board, and non-overlapping) placement of a prefix of ships +// defined in `conf.ships`. +// - Postcondition: by the time the function returns, partial_placement is +// exactly the same vector as at the time of entering the call. +bool ExistsFeasiblePlacement(const BattleshipConfiguration& conf, + std::vector* partial_placement); +} // namespace battleship +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_BATTLESHIP_TYPES_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/blackjack/blackjack.cc b/scenarios/bargaining/open_spiel/open_spiel/games/blackjack/blackjack.cc new file mode 100644 index 0000000..a46cd02 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/blackjack/blackjack.cc @@ -0,0 +1,494 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/blackjack/blackjack.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace blackjack { + +constexpr int kPlayerId = 0; +constexpr int kAceValue = 1; +// The max score to approach for any player, i.e. as close to this as possible +// without exceeding it. +constexpr int kApproachScore = 21; +constexpr int kInitialCardsPerPlayer = 2; + +const char kSuitNames[kNumSuits + 1] = "CDHS"; +const char kRanks[kCardsPerSuit + 1] = "A23456789TJQK"; + +namespace { +// Facts about the game +const GameType kGameType{/*short_name=*/"blackjack", + /*long_name=*/"Blackjack", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/1, + /*min_num_players=*/1, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{}}; + +static std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new BlackjackGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +std::vector SetToSortedVector(const std::set& set) { + std::vector vec; + vec.reserve(set.size()); + for (int i : set) { + vec.push_back(i); + } + std::sort(vec.begin(), vec.end()); + return vec; +} + +} // namespace + +std::string PhaseToString(Phase phase) { + switch (phase) { + case kInitialDeal: + return "Initial Deal"; + case kPlayerTurn: + return "Player Turn"; + case kDealerTurn: + return "Dealer Turn"; + default: + SpielFatalError("Unknown phase"); + } +} + +std::string CardToString(int card) { + return std::string(1, kSuitNames[card / kCardsPerSuit]) + + std::string(1, kRanks[card % kCardsPerSuit]); +} + +int GetCardByString(std::string card_string) { + if (card_string.length() != 2) { + return -1; + } + int suit_idx = std::string(kSuitNames).find(card_string[0]); + int rank_idx = std::string(kRanks).find(card_string[1]); + if (suit_idx == std::string::npos || rank_idx == std::string::npos) { + return -1; + } + return suit_idx * kCardsPerSuit + rank_idx; +} + +std::vector CardsToStrings(const std::vector& cards, + int start_index) { + std::vector card_strings; + card_strings.reserve(cards.size()); + for (int i = 0; i < cards.size(); ++i) { + if (i < start_index) { + card_strings.push_back(kHiddenCardStr); + } else { + card_strings.push_back(CardToString(cards[i])); + } + } + return card_strings; +} + +std::string BlackjackState::ActionToString(Player player, + Action move_id) const { + if (player == kChancePlayerId) { + return CardToString(move_id); + } else if (move_id == ActionType::kHit) { + return "Hit"; + } else { + return "Stand"; + } +} + +std::string BlackjackState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + std::vector history = History(); + + if (!cards_[DealerId()].empty()) { + int dealer_first_card_index = 2 * DealerId(); + SPIEL_CHECK_EQ(history[dealer_first_card_index], cards_[DealerId()][0]); + history.erase(history.begin() + dealer_first_card_index); + } + return absl::StrJoin(history, " "); +} + +std::unique_ptr BlackjackState::ResampleFromInfostate( + int player_id, std::function rng) const { + if (IsTerminal() || cards_[DealerId()].empty()) { + return Clone(); + } + + // The possible cards to choose from are the cards in the deck, plus the + // dealer's current card. + std::vector possible_cards = deck_; + int dealer_down_card = cards_[DealerId()][0]; + possible_cards.push_back(dealer_down_card); + + double z = rng(); + int sampled_index = static_cast(z * possible_cards.size()); + int dealer_new_down_card = possible_cards[sampled_index]; + + std::unique_ptr new_state = game_->NewInitialState(); + std::vector history = History(); + + // The dealer down card is always the third action in the history. + int dealer_down_card_index = 2; + SPIEL_CHECK_EQ(history[dealer_down_card_index], dealer_down_card); + for (int i = 0; i < history.size(); ++i) { + if (i == dealer_down_card_index) { + new_state->ApplyAction(dealer_new_down_card); + } else { + new_state->ApplyAction(history[i]); + } + } + return new_state; +} + +bool BlackjackState::IsTerminal() const { return turn_over_[DealerId()]; } + +int BlackjackState::DealerId() const { return game_->NumPlayers(); } + +std::vector BlackjackState::Returns() const { + if (!IsTerminal()) { + return {0}; + } + + int player_total = GetBestPlayerTotal(kPlayerId); + int dealer_total = GetBestPlayerTotal(DealerId()); + if (player_total > kApproachScore) { + // Bust. + return {-1}; + } else if (dealer_total > kApproachScore) { + // Bust. + return {+1}; + } else if (player_total > dealer_total) { + return {+1}; + } else if (player_total < dealer_total) { + return {-1}; + } else { + // Tie. + return {0}; + } +} + +std::string BlackjackState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, game_->NumPlayers()); + if (IsTurnOver(player)) { + // Show dealer's face-down card after player's hand is settled. + return StateToString(/*show_all_dealers_card=*/true); + } else { + return StateToString(/*show_all_dealers_card=*/false); + } +} + +std::string BlackjackState::StateToString(bool show_all_dealers_card) const { + std::vector players; + std::string result = absl::StrCat( + "Current Phase: ", PhaseToString(phase_), "\n", + "Current Player: ", cur_player_, "\n"); + + for (int p = 0; p <= NumPlayers(); ++p) { + absl::StrAppend( + &result, p == DealerId() ? "Dealer" : absl::StrCat("Player ", p), ": "); + // Don't show dealer's first card if we're not showing all of them. + int start_index = (p == 1 && !show_all_dealers_card ? 1 : 0); + absl::StrAppend(&result, "Cards: ", + absl::StrJoin(CardsToStrings(cards_[p], start_index), " "), + "\n"); + } + + return result; +} + +std::string BlackjackState::ToString() const { + return StateToString(/*show_all_dealers_card=*/true); +} + +void BlackjackState::ObservationTensor(Player player, + absl::Span values) const { + std::fill(values.begin(), values.end(), 0); + int offset = 0; + + // Whose turn is it? + if (cur_player_ + 1 >= 0) { // do not support kTerminalPlayerId + values[cur_player_ + 1] = 1; // to support kChancePlayerId (equals to -1) + } + offset += game_->NumPlayers() + 1; + + // Terminal? + values[offset] = IsTerminal() ? 1 : 0; + offset += 1; + + // Player's best sum (thermometer of ones up to the value) + int player_best_sum = GetBestPlayerTotal(player); + for (int i = 0; i < kMaxSum; ++i) { + values[offset + i] = i <= player_best_sum ? 1 : 0; + } + offset += kMaxSum; + + // Dealer's initial visible card + if (cards_[DealerId()].size() > 1) { + values[offset + cards_[DealerId()][1]] = 1; + } + offset += kDeckSize; + + // Show each player's cards that are visible. + bool show_all_dealers_cards = player == kChancePlayerId || IsTurnOver(player); + + for (std::size_t player_id = 0; player_id < cards_.size(); player_id++) { + int start_index = 0; + if (player_id == DealerId() && !show_all_dealers_cards) { + start_index = 1; + } + for (int i = start_index; i < cards_[player_id].size(); ++i) { + int card = cards_[player_id][i]; + values[offset + card] = 1; + } + offset += kDeckSize; + } + + SPIEL_CHECK_EQ(offset, values.size()); +} + +bool BlackjackState::InitialCardsDealt(int player) const { + return cards_[player].size() >= kInitialCardsPerPlayer; +} + +int BlackjackState::CardValue(int card) const { + // Cards are indexed from 0 to kDeckSize-1; + const int rank = card % kCardsPerSuit; + if (rank == 0) { + return kAceValue; + } else if (rank <= 9) { + return rank + 1; + } else { + // Ten or a face card. + return 10; + } +} + +void BlackjackState::DealCardToPlayer(int player, int card) { + // Remove card from deck. + auto new_end = std::remove(deck_.begin(), deck_.end(), card); + if (new_end == deck_.end()) SpielFatalError("Card not present in deck"); + deck_.erase(new_end, deck_.end()); + + cards_[player].push_back(card); + const int value = CardValue(card); + if (value == kAceValue) { + num_aces_[player]++; + } else { + non_ace_total_[player] += value; + } +} + +BlackjackState::BlackjackState(std::shared_ptr game) : State(game) { + phase_ = kInitialDeal; + total_moves_ = 0; + cur_player_ = kChancePlayerId; + turn_player_ = kPlayerId; + live_players_ = 1; + + // The values are stored for the dealer as well, whose id is NumPlayers. + // See DealerId(). + non_ace_total_.resize(game_->NumPlayers() + 1, 0); + num_aces_.resize(game_->NumPlayers() + 1, 0); + turn_over_.resize(game_->NumPlayers() + 1, false); + cards_.resize(game_->NumPlayers() + 1); + + deck_.resize(kDeckSize); + std::iota(deck_.begin(), deck_.end(), 0); +} + +int BlackjackState::GetBestPlayerTotal(int player) const { + // Return the max possible total <= kApproachScore, depending on hard or soft + // aces. 'Best' refers to the max non-bust score possible for the player. + // If it is not possible, some value > kApproachScore is returned. + int total = non_ace_total_[player] + num_aces_[player]; + for (int i = 1; i <= num_aces_[player]; i++) { + int soft_total = + non_ace_total_[player] + i * 11 + (num_aces_[player] - i) * 1; + if (soft_total <= kApproachScore) { + total = std::max(total, soft_total); + } + } + return total; +} + +int BlackjackState::CurrentPlayer() const { return cur_player_; } + +int BlackjackState::NextTurnPlayer() const { + if (IsTerminal()) { + return kTerminalPlayerId; + } + return turn_over_[kPlayerId] ? DealerId() : kPlayerId; +} + +void BlackjackState::EndPlayerTurn(int player) { + turn_over_[player] = true; + turn_player_ = NextTurnPlayer(); + cur_player_ = turn_player_; + phase_ = kDealerTurn; +} + +void BlackjackState::DoApplyAction(Action move) { + SPIEL_CHECK_EQ(IsTerminal(), false); + + if (!InitialCardsDealt(DealerId())) { + // Still in the initial dealing phase. Deal the 'move' card to turn_player_. + SPIEL_CHECK_EQ(IsChanceNode(), true); + + DealCardToPlayer(turn_player_, move); + cur_player_ = kChancePlayerId; + if (InitialCardsDealt(turn_player_)) { + // Next player. + turn_player_++; + if (InitialCardsDealt(DealerId())) { + // Hit/stand part of the game commences. + turn_player_ = kPlayerId; + cur_player_ = kPlayerId; + phase_ = kPlayerTurn; + } + } + return; + } + + if (IsChanceNode()) { + // Deal the 'move' card to turn_player_. + DealCardToPlayer(turn_player_, move); + cur_player_ = turn_player_; + if (GetBestPlayerTotal(turn_player_) > kApproachScore) { + if (turn_player_ != DealerId()) --live_players_; + EndPlayerTurn(turn_player_); + } + MaybeApplyDealerAction(); + return; + } + + total_moves_++; + if (move == kHit) { + cur_player_ = kChancePlayerId; + } else if (move == kStand) { + EndPlayerTurn(turn_player_); + MaybeApplyDealerAction(); + } +} + +void BlackjackState::MaybeApplyDealerAction() { + // If there are no players still live, dealer doesn't play. + if (live_players_ == 0) { + EndPlayerTurn(DealerId()); + } + + // Otherwise, hits 16 or less, stands on 17 or more. + if (cur_player_ == DealerId()) { + if (GetBestPlayerTotal(DealerId()) <= 16) { + cur_player_ = kChancePlayerId; + } else { + EndPlayerTurn(cur_player_); + } + } +} + +std::vector BlackjackState::LegalActions() const { + SPIEL_CHECK_NE(cur_player_, DealerId()); + if (IsChanceNode()) { + return LegalChanceOutcomes(); + } else if (IsTerminal()) { + return {}; + } else { + return {kHit, kStand}; + } +} + +ActionsAndProbs BlackjackState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(IsChanceNode()); + ActionsAndProbs outcomes; + outcomes.reserve(deck_.size()); + for (int card : deck_) { + outcomes.emplace_back(card, 1.0 / deck_.size()); + } + return outcomes; +} + +std::set BlackjackState::VisibleCards() const { + std::set visible_cards; + for (int i = 0; i < cards_.size(); ++i) { + for (int card_idx = 0; card_idx < cards_[i].size(); ++card_idx) { + // Hide dealer's first card if the game is not over. + if (IsTerminal() || i != DealerId() || card_idx != 0) { + visible_cards.insert(cards_[i][card_idx]); + } + } + } + return visible_cards; +} + +std::vector BlackjackState::VisibleCardsSortedVector() const { + return SetToSortedVector(VisibleCards()); +} + +int BlackjackState::DealersVisibleCard() const { + if (cards_[DealerId()].size() < 2) { + return -1; + } else { + return cards_[DealerId()][1]; + } +} + +std::vector BlackjackState::PlayerCardsSortedVector() const { + std::vector player_visible_cards = cards_[0]; + std::sort(player_visible_cards.begin(), player_visible_cards.end()); + return player_visible_cards; +} + +std::unique_ptr BlackjackState::Clone() const { + return std::unique_ptr(new BlackjackState(*this)); +} + +BlackjackGame::BlackjackGame(const GameParameters& params) + : Game(kGameType, params) {} + +} // namespace blackjack +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/blackjack/blackjack.h b/scenarios/bargaining/open_spiel/open_spiel/games/blackjack/blackjack.h new file mode 100644 index 0000000..a82a180 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/blackjack/blackjack.h @@ -0,0 +1,150 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_BLACKJACK_H_ +#define OPEN_SPIEL_GAMES_BLACKJACK_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// A simple game that includes chance and imperfect information +// http://en.wikipedia.org/wiki/Blackjack +// Currently, it supports only a single player against the dealer. + +namespace open_spiel { +namespace blackjack { + +constexpr int kNumSuits = 4; +constexpr int kCardsPerSuit = 13; +constexpr int kDeckSize = kCardsPerSuit * kNumSuits; +constexpr int kMaxSum = 30; // player busts by hitting on 20 and receiving a 10 +constexpr const char* kHiddenCardStr = "??"; + +// Moves. +enum ActionType { kHit = 0, kStand = 1 }; + +enum Phase { kInitialDeal = 0, kPlayerTurn = 1, kDealerTurn = 2 }; + +class BlackjackGame; + +class BlackjackState : public State { + public: + BlackjackState(const BlackjackState&) = default; + BlackjackState(std::shared_ptr game); + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action move_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + ActionsAndProbs ChanceOutcomes() const override; + + std::unique_ptr Clone() const override; + + std::vector LegalActions() const override; + + int GetBestPlayerTotal(int player) const; + int DealerId() const; + int NextTurnPlayer() const; + bool InitialCardsDealt(int player) const; + int CardValue(int card) const; + void EndPlayerTurn(int player); + void DealCardToPlayer(int player, int card); + bool IsTurnOver(int player) const { return turn_over_[player]; } + std::vector cards(int player) const { return cards_[player]; } + std::string InformationStateString(Player player) const; + std::unique_ptr ResampleFromInfostate( + int player_id, std::function rng) const override; + + Phase phase() const { return phase_; } + std::set VisibleCards() const; + std::vector VisibleCardsSortedVector() const; + std::vector PlayerCardsSortedVector() const; + + // Returns the dealer's initial visible card, if it's been dealt. Otherwise + // returns -1. + int DealersVisibleCard() const; + + protected: + void DoApplyAction(Action move_id) override; + + private: + void MaybeApplyDealerAction(); + std::string StateToString(bool show_all_dealers_card) const; + + // Initialize to bad/invalid values. Use open_spiel::NewInitialState() + + Phase phase_ = kInitialDeal; + int total_moves_ = -1; // Total num moves taken during the game. + Player cur_player_ = -1; // Player to play. + int turn_player_ = -1; // Whose actual turn is it. At chance nodes, we need + // to remember whose is playing for next turns. + int live_players_ = 0; // Number of players who haven't yet bust. + std::vector + non_ace_total_; // Total value of cards for each player, excluding aces. + std::vector num_aces_; // Number of aces owned by each player. + std::vector turn_over_; // Whether each player's turn is over. + std::vector deck_; // Remaining cards in the deck. + std::vector> cards_; // Cards dealt to each player. +}; + +class BlackjackGame : public Game { + public: + explicit BlackjackGame(const GameParameters& params); + + int NumDistinctActions() const override { return 2; } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new BlackjackState(shared_from_this())); + } + int MaxChanceOutcomes() const override { return kDeckSize; } + int MaxGameLength() const { return 12; } + + int NumPlayers() const override { return 1; } + double MinUtility() const override { return -1; } + double MaxUtility() const override { return +1; } + std::vector ObservationTensorShape() const override { + return { + NumPlayers() + 1 + // turn (incl. chance) + 1 + // is terminal? + kMaxSum + // player best sum + kDeckSize + // dealer's initial visible card + kDeckSize * (NumPlayers() + 1) // many-hot of the visible cards + }; + }; +}; + +std::string CardToString(int card); +std::string PhaseToString(Phase phase); +std::vector CardsToStrings(const std::vector& cards, + int start_index = 0); + +// Gets a card id from a string representation. Returns -1 if the string is not +// a valid card. +int GetCardByString(std::string card_string); + + +} // namespace blackjack +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_BLACKJACK_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/blackjack/blackjack_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/blackjack/blackjack_test.cc new file mode 100644 index 0000000..77e616d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/blackjack/blackjack_test.cc @@ -0,0 +1,224 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/blackjack/blackjack.h" + +#include +#include +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace blackjack { +namespace { + +namespace testing = open_spiel::testing; + +void NoBustPlayerWinTest() { + // Cards are indexed from 0 to 51. + std::shared_ptr game = LoadGame("blackjack"); + std::unique_ptr state = game->NewInitialState(); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(0); // Deal CA to Player. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(13); // Deal DA to Player. + + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(11); // Deal CQ to Dealer. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(4); // Deal C5 to Dealer. + + SPIEL_CHECK_TRUE(!state->IsChanceNode()); + state->ApplyAction(0); // Player hits. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(8); // Deal C9 to Player. + + SPIEL_CHECK_TRUE(!state->IsChanceNode()); + state->ApplyAction(1); // Player stands. + + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(2); // Deal C3 to Dealer. + + SPIEL_CHECK_TRUE(state->IsTerminal()); // Dealer stands. + + // Player wins. + SPIEL_CHECK_EQ(state->PlayerReturn(0), 1); +} + +void DealerBustTest() { + // Cards are indexed from 0 to 51. + std::shared_ptr game = LoadGame("blackjack"); + std::unique_ptr state = game->NewInitialState(); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(8); // Deal C9 to Player. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(4); // Deal C5 to Player. + + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(10); // Deal CJ to Dealer. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(2); // Deal C3 to Dealer. + + SPIEL_CHECK_TRUE(!state->IsChanceNode()); + state->ApplyAction(1); // Player stands. + + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(21); // Deal D9 to Dealer. + + // Player wins. + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReturn(0), 1); +} + +void PlayerBustTest() { + // Cards are indexed from 0 to 51. + std::shared_ptr game = LoadGame("blackjack"); + std::unique_ptr state = game->NewInitialState(); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(9); // Deal C10 to Player. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(22); // Deal D10 to Player. + + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(8); // Deal C9 to Dealer. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(4); // Deal C5 to Dealer. + + SPIEL_CHECK_TRUE(!state->IsChanceNode()); + state->ApplyAction(0); // Player hits. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(21); // Deal D9 to Player. + + // Player loses. + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReturn(0), -1); +} + +void DealersFirstCardHiddenTest() { + std::shared_ptr game = LoadGame("blackjack"); + std::unique_ptr state = game->NewInitialState(); + + // Deal cards to player. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(9); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(22); + + // Deal cards to dealer. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(8); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(4); + + // Cast state to blackjack state. + auto blackjack_state = dynamic_cast(state.get()); + SPIEL_CHECK_TRUE(blackjack_state != nullptr); + std::set visible_cards = blackjack_state->VisibleCards(); + + // Dealer's first card. + SPIEL_CHECK_TRUE(visible_cards.find(8) == visible_cards.end()); + + // Remaining cards. + SPIEL_CHECK_TRUE(visible_cards.find(9) != visible_cards.end()); + SPIEL_CHECK_TRUE(visible_cards.find(22) != visible_cards.end()); + SPIEL_CHECK_TRUE(visible_cards.find(4) != visible_cards.end()); +} + +void InfoStateStringDoesNotContainDealerFirstCardTest() { + std::shared_ptr game = LoadGame("blackjack"); + std::unique_ptr state = game->NewInitialState(); + + // Deal cards to player. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(9); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(22); + + // Deal cards to dealer. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(8); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(4); + + std::string info_state_string = state->InformationStateString(0); + SPIEL_CHECK_EQ(info_state_string, "9 22 4"); +} + +void ResamplingHistoryTest() { + std::shared_ptr game = LoadGame("blackjack"); + std::unique_ptr state = game->NewInitialState(); + + // Deal cards to player. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(9); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(22); + + // Deal cards to dealer. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(8); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(4); + + auto b_original = dynamic_cast(state.get()); + std::string original_observation_string = b_original->ObservationString(0); + + std::vector random_seeds = {0.12345, 0.6123, 0.0101}; + // Resample from infostate. + for (float seed : random_seeds) { + std::unique_ptr resampled_state = + state->ResampleFromInfostate(0, [seed]() { return seed; }); + + auto b_resampled = dynamic_cast(resampled_state.get()); + + // All cards should be the same except for the dealer's first card. + SPIEL_CHECK_EQ(b_original->cards(0)[0], b_resampled->cards(0)[0]); + SPIEL_CHECK_EQ(b_original->cards(0)[0], b_resampled->cards(0)[0]); + SPIEL_CHECK_EQ(b_original->cards(1)[1], b_resampled->cards(1)[1]); + + SPIEL_CHECK_NE(b_original->cards(1)[0], b_resampled->cards(1)[0]); + + // Check that dealer's first card is not visible. + std::set visible_cards = b_resampled->VisibleCards(); + SPIEL_CHECK_TRUE(visible_cards.find(b_resampled->cards(1)[0]) == + visible_cards.end()); + + // Observation strings should be the same. + SPIEL_CHECK_TRUE(original_observation_string == + b_resampled->ObservationString(0)); + } +} + +void BasicBlackjackTests() { + testing::LoadGameTest("blackjack"); + testing::RandomSimTest(*LoadGame("blackjack"), 100); + NoBustPlayerWinTest(); + PlayerBustTest(); + DealerBustTest(); + DealersFirstCardHiddenTest(); + InfoStateStringDoesNotContainDealerFirstCardTest(); + ResamplingHistoryTest(); +} + +} // namespace +} // namespace blackjack +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::blackjack::BasicBlackjackTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/blotto/blotto.cc b/scenarios/bargaining/open_spiel/open_spiel/games/blotto/blotto.cc new file mode 100644 index 0000000..6741398 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/blotto/blotto.cc @@ -0,0 +1,204 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/blotto/blotto.h" + +#include + +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace blotto { + +constexpr const int kDefaultNumCoins = 10; +constexpr const int kDefaultNumFields = 3; +constexpr const int kDefaultNumPlayers = 2; + +namespace { + +const GameType kGameType{/*short_name=*/"blotto", + /*long_name=*/"Blotto", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kOneShot, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/10, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"coins", GameParameter(kDefaultNumCoins)}, + {"fields", GameParameter(kDefaultNumFields)}, + {"players", GameParameter(kDefaultNumPlayers)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new BlottoGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace + +BlottoState::BlottoState(std::shared_ptr game, int coins, + int fields, const ActionMap* action_map, + const std::vector* legal_actions) + : NFGState(game), + coins_(coins), + fields_(fields), + joint_action_({}), + action_map_(action_map), + legal_actions_(legal_actions), + returns_({}) {} + +void BlottoState::DoApplyActions(const std::vector& actions) { + joint_action_ = actions; + + // Now determine returns. + returns_.resize(num_players_, 0); + std::vector scores(num_players_, 0); + std::vector> player_actions; + + for (int f = 0; f < fields_; ++f) { + int num_winners = 0; + int winner = 0; + int max_value = -1; + + for (auto p = Player{0}; p < num_players_; ++p) { + // Get the expanded action if necessary. + if (p >= player_actions.size()) { + player_actions.push_back(action_map_->at(joint_action_[p])); + } + + if (player_actions[p][f] > max_value) { + num_winners = 1; + winner = p; + max_value = player_actions[p][f]; + } else if (player_actions[p][f] == max_value) { + num_winners++; + } + } + + // Give the winner of this field one point. Draw if tied. + if (num_winners == 1) { + scores[winner]++; + } + } + + // Find the global winner(s). + std::set winners; + int max_points = 0; + for (auto p = Player{0}; p < num_players_; ++p) { + if (scores[p] > max_points) { + max_points = scores[p]; + winners = {p}; + } else if (scores[p] == max_points) { + winners.insert(p); + } + } + + // Finally, assign returns. Each winner gets 1/num_winners, each loser gets + // -1 / num_losers. + for (auto p = Player{0}; p < num_players_; ++p) { + if (winners.size() == num_players_) { + // All players won same number of fields. Draw. + returns_[p] = 0; + } else if (winners.find(p) != winners.end()) { + SPIEL_CHECK_GE(winners.size(), 1); + returns_[p] = 1.0 / winners.size(); + } else { + SPIEL_CHECK_GE(num_players_ - winners.size(), 1); + returns_[p] = -1.0 / (num_players_ - winners.size()); + } + } +} + +std::vector BlottoState::LegalActions(Player player) const { + if (IsTerminal()) return {}; + return (*legal_actions_); +} + +std::string BlottoState::ActionToString(Player player, Action move_id) const { + return game_->ActionToString(player, move_id); +} + +std::string BlottoState::ToString() const { + std::string str = ""; + absl::StrAppend(&str, "Terminal? ", IsTerminal(), "\n"); + for (int p = 0; p < joint_action_.size(); ++p) { + absl::StrAppend(&str, "P", p, + " action: ", ActionToString(p, joint_action_[p]), "\n"); + } + return str; +} + +bool BlottoState::IsTerminal() const { return !joint_action_.empty(); } + +std::vector BlottoState::Returns() const { + return IsTerminal() ? returns_ : std::vector(num_players_, 0.); +} + +std::unique_ptr BlottoState::Clone() const { + return std::unique_ptr(new BlottoState(*this)); +} + +std::string BlottoGame::ActionToString(Player player, Action action) const { + return "[" + absl::StrJoin(action_map_->at(action), ",") + "]"; +} + +int BlottoGame::NumDistinctActions() const { return num_distinct_actions_; } + +void BlottoGame::CreateActionMapRec(int* count, int coins_left, + const std::vector& action) { + if (action.size() == fields_) { + if (coins_left == 0) { + // All coins used, valid move. + (*action_map_)[*count] = action; + (*count)++; + return; + } else { + // Not all coins used, invalid move. + return; + } + } else { + for (int num_coins = 0; num_coins <= coins_left; ++num_coins) { + std::vector new_action = action; + new_action.push_back(num_coins); + CreateActionMapRec(count, coins_left - num_coins, new_action); + } + } +} + +BlottoGame::BlottoGame(const GameParameters& params) + : NormalFormGame(kGameType, params), + num_distinct_actions_(0), // Set properly after CreateActionMap. + coins_(ParameterValue("coins")), + fields_(ParameterValue("fields")), + players_(ParameterValue("players")) { + action_map_.reset(new ActionMap()); + CreateActionMapRec(&num_distinct_actions_, coins_, {}); + + // The action set is static for all states, so create it only once. + legal_actions_.reset(new std::vector(num_distinct_actions_)); + for (Action action = 0; action < num_distinct_actions_; ++action) { + (*legal_actions_)[action] = action; + } +} + +} // namespace blotto +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/blotto/blotto.h b/scenarios/bargaining/open_spiel/open_spiel/games/blotto/blotto.h new file mode 100644 index 0000000..e2d5d39 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/blotto/blotto.h @@ -0,0 +1,102 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_BLOTTO_H_ +#define OPEN_SPIEL_GAMES_BLOTTO_H_ + +#include +#include +#include +#include + +#include "open_spiel/normal_form_game.h" + +// An implementation of the Blotto: https://en.wikipedia.org/wiki/Blotto_game +// This version supports n >= 2 players. Each player distributes M coins on N +// fields. Each field is won by at most one player: the one with the most +// coins on the specific field; if there is a draw, the field is considered +// drawn (not won by any player), and hence ignored in the scoring. The winner +// is the player with the most won fields: all player have won the same number +// of fields, they each receive 0. Otherwise, the winners share 1 / (number of +// winners) and losers share -1 / (number of losers), reducing to {-1,0,1} in +// the 2-player case. +// +// Parameters: +// "coins" int number of coins each player starts with (default: 10) +// "fields" int number of fields (default: 3) +// "players" int number of players (default: 2) + +namespace open_spiel { +namespace blotto { + +using ActionMap = std::unordered_map>; + +class BlottoState : public NFGState { + public: + BlottoState(std::shared_ptr game, int coins, int fields, + const ActionMap* action_map, + const std::vector* legal_actions_); + + std::vector LegalActions(Player player) const override; + std::string ActionToString(Player player, Action move_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::unique_ptr Clone() const override; + + protected: + void DoApplyActions(const std::vector& actions) override; + + private: + int coins_; + int fields_; + std::vector joint_action_; // The action taken by all the players. + const ActionMap* action_map_; + const std::vector* legal_actions_; + std::vector returns_; +}; + +class BlottoGame : public NormalFormGame { + public: + explicit BlottoGame(const GameParameters& params); + + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new BlottoState(shared_from_this(), coins_, + fields_, action_map_.get(), + legal_actions_.get())); + } + + int NumPlayers() const override { return players_; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return +1; } + std::string ActionToString(Player player, Action action) const override; + + private: + void CreateActionMapRec(int* count, int coins_left, + const std::vector& action); + + int num_distinct_actions_; + int coins_; + int fields_; + int players_; + std::unique_ptr action_map_; + std::unique_ptr> legal_actions_; +}; + +} // namespace blotto +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_BLOTTO_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/blotto/blotto_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/blotto/blotto_test.cc new file mode 100644 index 0000000..2d838fc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/blotto/blotto_test.cc @@ -0,0 +1,38 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace blotto { +namespace { + +namespace testing = open_spiel::testing; + +void BasicBlottoTests() { + testing::LoadGameTest("blotto"); + testing::NoChanceOutcomesTest(*LoadGame("blotto")); + testing::RandomSimTest(*LoadGame("blotto"), 100); + for (Player players = 3; players <= 5; players++) { + testing::RandomSimTest( + *LoadGame("blotto", {{"players", GameParameter(players)}}), 100); + } +} + +} // namespace +} // namespace blotto +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::blotto::BasicBlottoTests(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/breakthrough/breakthrough.cc b/scenarios/bargaining/open_spiel/open_spiel/games/breakthrough/breakthrough.cc new file mode 100644 index 0000000..ad61661 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/breakthrough/breakthrough.cc @@ -0,0 +1,446 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/breakthrough/breakthrough.h" + +#include +#include +#include +#include + +#include "open_spiel/game_parameters.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace breakthrough { +namespace { + +// Number of unique directions each piece can take. +constexpr int kNumDirections = 6; + +// Numbers of rows needed to have 2 full rows of pieces. +constexpr int kNumRowsForFullPieces = 6; + +// Direction offsets for black, then white. +constexpr std::array kDirRowOffsets = { + {1, 1, 1, -1, -1, -1}}; + +constexpr std::array kDirColOffsets = { + {-1, 0, 1, -1, 0, 1}}; + +// Facts about the game +const GameType kGameType{/*short_name=*/"breakthrough", + /*long_name=*/"Breakthrough", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"rows", GameParameter(kDefaultRows)}, + {"columns", GameParameter(kDefaultColumns)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new BreakthroughGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +int StateToPlayer(CellState state) { + switch (state) { + case CellState::kBlack: + return 0; + case CellState::kWhite: + return 1; + default: + SpielFatalError("No player id for this cell state"); + } +} + +CellState PlayerToState(Player player) { + switch (player) { + case 0: + return CellState::kBlack; + case 1: + return CellState::kWhite; + default: + SpielFatalError("No cell state for this player id"); + } +} + +std::string CellToString(CellState state) { + switch (state) { + case CellState::kBlack: + return "b"; + case CellState::kWhite: + return "w"; + case CellState::kEmpty: + return "."; + default: + SpielFatalError("Unrecognized cell state"); + } +} + +CellState OpponentState(CellState state) { + return PlayerToState(1 - StateToPlayer(state)); +} + +std::string RowLabel(int rows, int row) { + std::string label = ""; + label += static_cast('1' + (rows - 1 - row)); + return label; +} + +std::string ColLabel(int col) { + std::string label = ""; + label += static_cast('a' + col); + return label; +} + +} // namespace + +BreakthroughState::BreakthroughState(std::shared_ptr game, int rows, + int cols) + : State(game), rows_(rows), cols_(cols) { + SPIEL_CHECK_GT(rows_, 1); + SPIEL_CHECK_GT(cols_, 1); + + board_ = std::vector(rows_ * cols_, CellState::kEmpty); + for (int r = 0; r < rows_; r++) { + for (int c = 0; c < cols_; c++) { + // Only use two rows if there are at least 6 rows. + if (r == 0 || (rows_ >= kNumRowsForFullPieces && r == 1)) { + SetBoard(r, c, CellState::kBlack); + } else if (r == (rows_ - 1) || + (rows_ >= kNumRowsForFullPieces && r == (rows_ - 2))) { + SetBoard(r, c, CellState::kWhite); + } + } + } + + winner_ = kInvalidPlayer; + pieces_[0] = pieces_[1] = cols_ * (rows_ >= kNumRowsForFullPieces ? 2 : 1); + cur_player_ = 0; + total_moves_ = 0; +} + +int BreakthroughState::CurrentPlayer() const { + if (IsTerminal()) { + return kTerminalPlayerId; + } else { + return cur_player_; + } +} + +void BreakthroughState::DoApplyAction(Action action) { + std::vector values = + UnrankActionMixedBase(action, {rows_, cols_, kNumDirections, 2}); + int r1 = values[0]; + int c1 = values[1]; + int dir = values[2]; + bool capture = values[3] == 1; + int r2 = r1 + kDirRowOffsets[dir]; + int c2 = c1 + kDirColOffsets[dir]; + + SPIEL_CHECK_TRUE(InBounds(r1, c1)); + SPIEL_CHECK_TRUE(InBounds(r2, c2)); + + // Remove pieces if captured. + if (board(r2, c2) == CellState::kWhite) { + pieces_[StateToPlayer(CellState::kWhite)]--; + SPIEL_CHECK_EQ(board(r1, c1), CellState::kBlack); + SPIEL_CHECK_EQ(cur_player_, StateToPlayer(CellState::kBlack)); + } else if (board(r2, c2) == CellState::kBlack) { + pieces_[StateToPlayer(CellState::kBlack)]--; + SPIEL_CHECK_EQ(board(r1, c1), CellState::kWhite); + SPIEL_CHECK_EQ(cur_player_, StateToPlayer(CellState::kWhite)); + } + + // Move the piece. + if (capture) { + SPIEL_CHECK_EQ(board(r2, c2), OpponentState(board(r1, c1))); + } + SetBoard(r2, c2, board(r1, c1)); + SetBoard(r1, c1, CellState::kEmpty); + + // Check for winner. + if (cur_player_ == 0 && r2 == (rows_ - 1)) { + winner_ = 0; + } else if (cur_player_ == 1 && r2 == 0) { + winner_ = 1; + } + + cur_player_ = NextPlayerRoundRobin(cur_player_, kNumPlayers); + total_moves_++; +} + +std::string BreakthroughState::ActionToString(Player player, + Action action) const { + std::vector values = + UnrankActionMixedBase(action, {rows_, cols_, kNumDirections, 2}); + int r1 = values[0]; + int c1 = values[1]; + int dir = values[2]; + bool capture = values[3] == 1; + int r2 = r1 + kDirRowOffsets[dir]; + int c2 = c1 + kDirColOffsets[dir]; + + std::string action_string = ""; + absl::StrAppend(&action_string, ColLabel(c1)); + absl::StrAppend(&action_string, RowLabel(rows_, r1)); + absl::StrAppend(&action_string, ColLabel(c2)); + absl::StrAppend(&action_string, RowLabel(rows_, r2)); + if (capture) { + absl::StrAppend(&action_string, "*"); + } + + return action_string; +} + +std::vector BreakthroughState::LegalActions() const { + std::vector movelist; + if (IsTerminal()) return movelist; + const Player player = CurrentPlayer(); + CellState mystate = PlayerToState(player); + std::vector action_bases = {rows_, cols_, kNumDirections, 2}; + std::vector action_values = {0, 0, 0, 0}; + + for (int r = 0; r < rows_; r++) { + for (int c = 0; c < cols_; c++) { + if (board(r, c) == mystate) { + for (int o = 0; o < kNumDirections / 2; o++) { + int dir = player * kNumDirections / 2 + o; + int rp = r + kDirRowOffsets[dir]; + int cp = c + kDirColOffsets[dir]; + + if (InBounds(rp, cp)) { + action_values[0] = r; + action_values[1] = c; + action_values[2] = dir; + if (board(rp, cp) == CellState::kEmpty) { + // Regular move. + action_values[3] = 0; + movelist.push_back( + RankActionMixedBase(action_bases, action_values)); + } else if ((o == 0 || o == 2) && + board(rp, cp) == OpponentState(mystate)) { + // Capture move (can only capture diagonally) + action_values[3] = 1; + movelist.push_back( + RankActionMixedBase(action_bases, action_values)); + } + } + } + } + } + } + + return movelist; +} + +bool BreakthroughState::InBounds(int r, int c) const { + return (r >= 0 && r < rows_ && c >= 0 && c < cols_); +} + +std::string BreakthroughState::ToString() const { + std::string result = ""; + + for (int r = 0; r < rows_; r++) { + absl::StrAppend(&result, RowLabel(rows_, r)); + + for (int c = 0; c < cols_; c++) { + absl::StrAppend(&result, CellToString(board(r, c))); + } + + result.append("\n"); + } + + absl::StrAppend(&result, " "); + for (int c = 0; c < cols_; c++) { + absl::StrAppend(&result, ColLabel(c)); + } + absl::StrAppend(&result, "\n"); + + return result; +} + +int BreakthroughState::observation_plane(int r, int c) const { + int plane = -1; + switch (board(r, c)) { + case CellState::kBlack: + plane = 0; + break; + case CellState::kWhite: + plane = 1; + break; + case CellState::kEmpty: + plane = 2; + break; + default: + std::cerr << "Invalid character on board: " << CellToString(board(r, c)) + << std::endl; + plane = -1; + break; + } + + return plane; +} + +bool BreakthroughState::IsTerminal() const { + return (winner_ >= 0 || (pieces_[0] == 0 || pieces_[1] == 0)); +} + +std::vector BreakthroughState::Returns() const { + if (winner_ == 0 || pieces_[1] == 0) { + return {1.0, -1.0}; + } else if (winner_ == 1 || pieces_[0] == 0) { + return {-1.0, 1.0}; + } else { + return {0.0, 0.0}; + } +} + +std::string BreakthroughState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void BreakthroughState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + TensorView<3> view(values, {kCellStates, rows_, cols_}, true); + + for (int r = 0; r < rows_; r++) { + for (int c = 0; c < cols_; c++) { + int plane = observation_plane(r, c); + SPIEL_CHECK_TRUE(plane >= 0 && plane < kCellStates); + view[{plane, r, c}] = 1.0; + } + } +} + +void BreakthroughState::UndoAction(Player player, Action action) { + std::vector values = + UnrankActionMixedBase(action, {rows_, cols_, kNumDirections, 2}); + int r1 = values[0]; + int c1 = values[1]; + int dir = values[2]; + bool capture = values[3] == 1; + int r2 = r1 + kDirRowOffsets[dir]; + int c2 = c1 + kDirColOffsets[dir]; + + cur_player_ = PreviousPlayerRoundRobin(cur_player_, 2); + total_moves_--; + + // Undo win status. + winner_ = kInvalidPlayer; + + // Move back the piece, and put back the opponent's piece if necessary. + // The move is (r1, c1) -> (r2, c2) where r is row and c is column. + SetBoard(r1, c1, board(r2, c2)); + SetBoard(r2, c2, CellState::kEmpty); + if (capture) { + if (board(r1, c1) == CellState::kWhite) { + // It was a white move: put back the black piece. + SetBoard(r2, c2, CellState::kBlack); + pieces_[kBlackPlayerId]++; + } else if (board(r1, c1) == CellState::kBlack) { + // It was a black move: put back the white piece. + SetBoard(r2, c2, CellState::kWhite); + pieces_[kWhitePlayerId]++; + } + } + history_.pop_back(); + --move_number_; +} + +std::unique_ptr BreakthroughState::Clone() const { + return std::unique_ptr(new BreakthroughState(*this)); +} + +BreakthroughGame::BreakthroughGame(const GameParameters& params) + : Game(kGameType, params), + rows_(ParameterValue("rows")), + cols_(ParameterValue("columns")) {} + +int BreakthroughGame::NumDistinctActions() const { + return rows_ * cols_ * kNumDirections * 2; +} + +std::string BreakthroughState::Serialize() const { + std::string str = ""; + // Serialize the board state. + for (int r = 0; r < rows_; r++) { + for (int c = 0; c < cols_; c++) { + absl::StrAppend(&str, CellToString(board(r, c))); + } + } + // Append current player information. + absl::StrAppend(&str, std::to_string(cur_player_)); + return str; +} + +std::unique_ptr BreakthroughGame::DeserializeState( + const std::string& str) const { + std::unique_ptr state = NewInitialState(); + + if (str.length() != rows_ * cols_ + 1) { + SpielFatalError("Incorrect number of characters in string."); + return std::unique_ptr(); + } + + BreakthroughState* bstate = dynamic_cast(state.get()); + + bstate->SetPieces(0, 0); + bstate->SetPieces(1, 0); + int i = 0; + for (int r = 0; r < rows_; r++) { + for (int c = 0; c < cols_; c++) { + if (str.at(i) == 'b') { + bstate->SetPieces(0, bstate->pieces(0) + 1); + bstate->SetBoard(r, c, CellState::kBlack); + } else if (str.at(i) == 'w') { + bstate->SetPieces(1, bstate->pieces(1) + 1); + bstate->SetBoard(r, c, CellState::kWhite); + } else if (str.at(i) == '.') { + bstate->SetBoard(r, c, CellState::kEmpty); + } else { + std::string error = "Invalid character in std::string: "; + error += str.at(i); + SpielFatalError(error); + return std::unique_ptr(); + } + + i++; + } + } + + // -'0' to get the int value. + bstate->Set_cur_player(str.at(i) - '0'); + return state; +} + +} // namespace breakthrough +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/breakthrough/breakthrough.h b/scenarios/bargaining/open_spiel/open_spiel/games/breakthrough/breakthrough.h new file mode 100644 index 0000000..dfea896 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/breakthrough/breakthrough.h @@ -0,0 +1,147 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_BREAKTHROUGH_H_ +#define OPEN_SPIEL_GAMES_BREAKTHROUGH_H_ + +#include +#include +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// Breakthrough, a game used in the general game-play competition +// http://en.wikipedia.org/wiki/Breakthrough_%28board_game%29 +// +// Parameters: +// "columns" int number of columns on the board (default = 8) +// "rows" int number of rows on the board (default = 8) + +namespace open_spiel { +namespace breakthrough { + +inline constexpr int kNumPlayers = 2; +inline constexpr int kBlackPlayerId = 0; +inline constexpr int kWhitePlayerId = 1; +inline constexpr int kCellStates = + 1 + kNumPlayers; // player 0, player 1, empty. +inline constexpr int kDefaultRows = 8; +inline constexpr int kDefaultColumns = 8; + +// State of a cell. +enum class CellState { + kEmpty, + kBlack, + kWhite, +}; + +class BreakthroughState : public State { + public: + explicit BreakthroughState(std::shared_ptr game, int rows, + int cols); + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action action) override; + + bool InBounds(int r, int c) const; + void SetBoard(int r, int c, CellState cs) { board_[r * cols_ + c] = cs; } + void SetPieces(int idx, int value) { pieces_[idx] = value; } + void Set_cur_player(int player) { cur_player_ = player; } + CellState board(int row, int col) const { return board_[row * cols_ + col]; } + int pieces(int idx) const { return pieces_[idx]; } + int rows() const { return rows_; } + int cols() const { return cols_; } + std::vector LegalActions() const override; + std::string Serialize() const override; + + protected: + void DoApplyAction(Action action) override; + + private: + int observation_plane(int r, int c) const; + + // Fields sets to bad/invalid values. Use Game::NewInitialState(). + Player cur_player_ = kInvalidPlayer; + int winner_ = kInvalidPlayer; + int total_moves_ = -1; + std::array pieces_; + int rows_ = -1; + int cols_ = -1; + std::vector board_; // for (row,col) we use row*cols_ + col. +}; + +class BreakthroughGame : public Game { + public: + explicit BreakthroughGame(const GameParameters& params); + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new BreakthroughState(shared_from_this(), rows_, cols_)); + } + std::unique_ptr NewInitialState(const std::string& str) + const override { + return DeserializeState(str); + } + + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + std::vector ObservationTensorShape() const override { + return {kCellStates, rows_, cols_}; + } + + // Each piece must move forward from its current position, so the second last + // row on the opponent's side is a distance of rows_ - 2 for the front row, + // and rows_ - 1 for the back row (= 2*rows_ - 3). This can be done for each + // column, and for both players, and there is one final move to step onto the + // last winning row. As such, the formula for maximum game length is: + int MaxGameLength() const override { + return (2 * (2 * rows_ - 3) * cols_) + 1; + } + + std::unique_ptr DeserializeState( + const std::string& str) const override; + + private: + int rows_ = -1; + int cols_ = -1; +}; + +inline std::ostream& operator<<(std::ostream& stream, const CellState& state) { + switch (state) { + case CellState::kBlack: + return stream << "Black"; + case CellState::kWhite: + return stream << "White"; + case CellState::kEmpty: + return stream << "Empty"; + default: + SpielFatalError("Unknown cell state"); + } +} + +} // namespace breakthrough +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_BREAKTHROUGH_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/breakthrough/breakthrough_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/breakthrough/breakthrough_test.cc new file mode 100644 index 0000000..30007c8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/breakthrough/breakthrough_test.cc @@ -0,0 +1,47 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/breakthrough/breakthrough.h" + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace breakthrough { +namespace { + +namespace testing = open_spiel::testing; + +void BasicSerializationTest() { + std::shared_ptr game = LoadGame("breakthrough"); + std::unique_ptr state = game->NewInitialState(); + std::unique_ptr state2 = game->DeserializeState(state->Serialize()); + SPIEL_CHECK_EQ(state->ToString(), state2->ToString()); +} + +void BasicBreakthroughTests() { + testing::LoadGameTest("breakthrough"); + testing::NoChanceOutcomesTest(*LoadGame("breakthrough")); + testing::RandomSimTest(*LoadGame("breakthrough"), 100); + testing::RandomSimTestWithUndo(*LoadGame("breakthrough"), 1); +} + +} // namespace +} // namespace breakthrough +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::breakthrough::BasicSerializationTest(); + open_spiel::breakthrough::BasicBreakthroughTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge.cc b/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge.cc new file mode 100644 index 0000000..1823ac7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge.cc @@ -0,0 +1,1046 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#define NOMINMAX +#include "open_spiel/games/bridge/bridge.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/base/attributes.h" +#include "open_spiel/abseil-cpp/absl/base/const_init.h" +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/abseil-cpp/absl/synchronization/mutex.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/games/bridge/double_dummy_solver/include/dll.h" +#include "open_spiel/games/bridge/double_dummy_solver/src/Memory.h" +#include "open_spiel/games/bridge/double_dummy_solver/src/SolverIF.h" +#include "open_spiel/games/bridge/double_dummy_solver/src/TransTable.h" +#include "open_spiel/games/bridge/double_dummy_solver/src/TransTableL.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/bridge/bridge_scoring.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +// Our preferred version of the double_dummy_solver defines a DDS_EXTERNAL +// macro to add a prefix to the exported symbols to avoid name clashes. +// In order to compile with versions of the double_dummy_solver which do not +// do this, we define DDS_EXTERNAL as an identity if it isn't already defined. +#ifndef DDS_EXTERNAL +#define DDS_EXTERNAL(x) x +#endif + +namespace open_spiel { +namespace bridge { +namespace { + +enum Seat { kNorth, kEast, kSouth, kWest }; + +const GameType kGameType{/*short_name=*/"bridge", + /*long_name=*/"Contract Bridge", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/kNumPlayers, + /*min_num_players=*/kNumPlayers, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + // If true, replace the play phase with a computed + // result based on perfect-information play. + {"use_double_dummy_result", GameParameter(true)}, + // If true, the dealer's side is vulnerable. + {"dealer_vul", GameParameter(false)}, + // If true, the non-dealer's side is vulnerable. + {"non_dealer_vul", GameParameter(false)}, + // Number of played tricks in observation tensor + {"num_tricks", GameParameter(2)}, + }}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new BridgeGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +// A call is one of Pass, Double, Redouble, or a bid. +// Bids are a combination of a number of tricks (level + 6) and denomination +// (trump suit or no-trumps). +// The calls are represented in sequence: Pass, Dbl, RDbl, 1C, 1D, 1H, 1S, etc. +enum Calls { kPass = 0, kDouble = 1, kRedouble = 2 }; +inline constexpr int kFirstBid = kRedouble + 1; +int Bid(int level, Denomination denomination) { + return (level - 1) * kNumDenominations + denomination + kFirstBid; +} +int BidLevel(int bid) { return 1 + (bid - kNumOtherCalls) / kNumDenominations; } +Denomination BidSuit(int bid) { + return Denomination((bid - kNumOtherCalls) % kNumDenominations); +} + +// Cards are represented as rank * kNumSuits + suit. +Suit CardSuit(int card) { return Suit(card % kNumSuits); } +int CardRank(int card) { return card / kNumSuits; } +int Card(Suit suit, int rank) { + return rank * kNumSuits + static_cast(suit); +} + +constexpr char kRankChar[] = "23456789TJQKA"; +constexpr char kSuitChar[] = "CDHS"; + +// Ours, Left hand opponent, Partner, Right hand opponent +constexpr std::array kRelativePlayer{ // NOLINT + "Us", "LH", "Pd", "RH"}; + +std::string CardString(int card) { + return {kSuitChar[static_cast(CardSuit(card))], + kRankChar[CardRank(card)]}; +} + +constexpr char kLevelChar[] = "-1234567"; +std::string BidString(int bid) { + if (bid == kPass) return "Pass"; + if (bid == kDouble) return "Dbl"; + if (bid == kRedouble) return "RDbl"; + return {kLevelChar[BidLevel(bid)], kDenominationChar[BidSuit(bid)]}; +} + +// There are two partnerships: players 0 and 2 versus players 1 and 3. +// We call 0 and 2 partnership 0, and 1 and 3 partnership 1. +int Partnership(Player player) { return player & 1; } +int Partner(Player player) { return player ^ 2; } +} // namespace + +BridgeGame::BridgeGame(const GameParameters& params) + : Game(kGameType, params) {} + +BridgeState::BridgeState(std::shared_ptr game, + bool use_double_dummy_result, + bool is_dealer_vulnerable, + bool is_non_dealer_vulnerable, int num_tricks) + : State(game), + use_double_dummy_result_(use_double_dummy_result), + is_vulnerable_{is_dealer_vulnerable, is_non_dealer_vulnerable}, + num_tricks_(num_tricks) { + possible_contracts_.fill(true); +} + +std::string BridgeState::ActionToString(Player player, Action action) const { + return (action < kBiddingActionBase) ? CardString(action) + : BidString(action - kBiddingActionBase); +} + +std::string BridgeState::ToString() const { + std::string rv = absl::StrCat(FormatVulnerability(), FormatDeal()); + if (history_.size() > kNumCards) + absl::StrAppend(&rv, FormatAuction(/*trailing_query=*/false)); + if (num_cards_played_ > 0) absl::StrAppend(&rv, FormatPlay()); + if (IsTerminal()) absl::StrAppend(&rv, FormatResult()); + return rv; +} + +std::array FormatHand( + int player, bool mark_voids, + const std::array, kNumCards>& deal) { + std::array cards; + for (int suit = 0; suit < kNumSuits; ++suit) { + cards[suit].push_back(kSuitChar[suit]); + cards[suit].push_back(' '); + bool is_void = true; + for (int rank = kNumCardsPerSuit - 1; rank >= 0; --rank) { + if (player == deal[Card(Suit(suit), rank)]) { + cards[suit].push_back(kRankChar[rank]); + is_void = false; + } + } + if (is_void && mark_voids) absl::StrAppend(&cards[suit], "none"); + } + return cards; +} + +std::unique_ptr BridgeState::ResampleFromInfostate( + int player_id, std::function rng) const { + // Only works in the auction phase for now. + SPIEL_CHECK_TRUE(phase_ == Phase::kAuction); + std::vector our_cards; + std::vector other_cards; + for (int i = 0; i < kNumCards; ++i) { + if (holder_[i] == player_id) our_cards.push_back(i); + else if (holder_[i].has_value()) other_cards.push_back(i); + } + std::unique_ptr new_state = GetGame()->NewInitialState(); + for (int i = 0; i < kNumCards; ++i) { + if (i % kNumPlayers == player_id) { + new_state->ApplyAction(our_cards.back()); + our_cards.pop_back(); + } else { + const int k = static_cast(rng() * other_cards.size()); + new_state->ApplyAction(other_cards[k]); + other_cards[k] = other_cards.back(); + other_cards.pop_back(); + } + } + for (int i = kNumCards; i < history_.size(); ++i) { + new_state->ApplyAction(history_[i].action); + } + return new_state; +} + +std::string BridgeState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + if (IsTerminal()) return ToString(); + std::string rv = FormatVulnerability(); + auto cards = FormatHand(player, /*mark_voids=*/true, holder_); + for (int suit = kNumSuits - 1; suit >= 0; --suit) + absl::StrAppend(&rv, cards[suit], "\n"); + if (history_.size() > kNumCards) + absl::StrAppend( + &rv, FormatAuction(/*trailing_query=*/phase_ == Phase::kAuction && + player == CurrentPlayer())); + if (num_cards_played_ > 0) absl::StrAppend(&rv, FormatPlay()); + return rv; +} + +std::string BridgeState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + if (IsTerminal()) return ToString(); + std::string rv = FormatVulnerability(); + auto cards = FormatHand(player, /*mark_voids=*/true, holder_); + for (int suit = kNumSuits - 1; suit >= 0; --suit) + absl::StrAppend(&rv, cards[suit], "\n"); + if (phase_ == Phase::kPlay) { + absl::StrAppend(&rv, "Contract: ", contract_.ToString(), "\n"); + } else if (phase_ == Phase::kAuction && history_.size() > kNumCards) { + absl::StrAppend( + &rv, FormatAuction(/*trailing_query=*/player == CurrentPlayer())); + } + if (num_cards_played_ > 0) { + absl::StrAppend(&rv, FormatPlayObservation(/*trailing_query=*/player == + CurrentPlayer())); + } + return rv; +} + +std::array, kNumCards> BridgeState::OriginalDeal() + const { + SPIEL_CHECK_GE(history_.size(), kNumCards); + std::array, kNumCards> deal; + for (int i = 0; i < kNumCards; ++i) + deal[history_[i].action] = (i % kNumPlayers); + return deal; +} + +std::string BridgeState::FormatDeal() const { + std::array, kNumPlayers> cards; + if (IsTerminal()) { + // Include all cards in the terminal state to make reviewing the deal easier + auto deal = OriginalDeal(); + for (auto player : {kNorth, kEast, kSouth, kWest}) { + cards[player] = FormatHand(player, /*mark_voids=*/false, deal); + } + } else { + for (auto player : {kNorth, kEast, kSouth, kWest}) { + cards[player] = FormatHand(player, /*mark_voids=*/false, holder_); + } + } + constexpr int kColumnWidth = 8; + std::string padding(kColumnWidth, ' '); + std::string rv; + for (int suit = kNumSuits - 1; suit >= 0; --suit) + absl::StrAppend(&rv, padding, cards[kNorth][suit], "\n"); + for (int suit = kNumSuits - 1; suit >= 0; --suit) + absl::StrAppend(&rv, absl::StrFormat("%-8s", cards[kWest][suit]), padding, + cards[kEast][suit], "\n"); + for (int suit = kNumSuits - 1; suit >= 0; --suit) + absl::StrAppend(&rv, padding, cards[kSouth][suit], "\n"); + return rv; +} + +std::string BridgeState::FormatVulnerability() const { + return absl::StrCat("Vul: ", + is_vulnerable_[0] ? (is_vulnerable_[1] ? "All" : "N/S") + : (is_vulnerable_[1] ? "E/W" : "None"), + "\n"); +} + +std::string BridgeState::FormatAuction(bool trailing_query) const { + SPIEL_CHECK_GT(history_.size(), kNumCards); + std::string rv = "\nWest North East South\n "; + for (int i = kNumCards; i < history_.size() - num_cards_played_; ++i) { + if (i % kNumPlayers == kNumPlayers - 1) rv.push_back('\n'); + absl::StrAppend( + &rv, absl::StrFormat( + "%-6s", BidString(history_[i].action - kBiddingActionBase))); + } + if (trailing_query) { + if ((history_.size() - num_cards_played_) % kNumPlayers == kNumPlayers - 1) + rv.push_back('\n'); + rv.push_back('?'); + } + return rv; +} + +std::string BridgeState::FormatPlay() const { + SPIEL_CHECK_GT(num_cards_played_, 0); + std::string rv = "\n\nN E S W N E S"; + Trick trick{kInvalidPlayer, kNoTrump, 0}; + Player player = (1 + contract_.declarer) % kNumPlayers; + for (int i = 0; i < num_cards_played_; ++i) { + if (i % kNumPlayers == 0) { + if (i > 0) player = trick.Winner(); + absl::StrAppend(&rv, "\n", std::string(3 * player, ' ')); + } else { + player = (1 + player) % kNumPlayers; + } + const int card = history_[history_.size() - num_cards_played_ + i].action; + if (i % kNumPlayers == 0) { + trick = Trick(player, contract_.trumps, card); + } else { + trick.Play(player, card); + } + absl::StrAppend(&rv, CardString(card), " "); + } + absl::StrAppend(&rv, "\n\nDeclarer tricks: ", num_declarer_tricks_); + return rv; +} + +std::string BridgeState::FormatPlayObservation(bool trailing_query) const { + SPIEL_CHECK_GT(num_cards_played_, 0); + std::string rv; + Trick trick{kInvalidPlayer, kNoTrump, 0}; + Player player = (1 + contract_.declarer) % kNumPlayers; + // Previous tricks + const int completed_tricks = num_cards_played_ / kNumPlayers; + for (int i = 0; i < completed_tricks * kNumPlayers; ++i) { + if (i % kNumPlayers == 0) { + if (i > 0) player = trick.Winner(); + } else { + player = (1 + player) % kNumPlayers; + } + const int card = history_[history_.size() - num_cards_played_ + i].action; + if (i % kNumPlayers == 0) { + trick = Trick(player, contract_.trumps, card); + } else { + trick.Play(player, card); + } + if (i % kNumPlayers == 0 && i > 0) + absl::StrAppend(&rv, "Trick ", (i / kNumPlayers), " won by "); + if (Partnership(trick.Winner()) == Partnership(contract_.declarer)) + absl::StrAppend(&rv, "declarer\n"); + else + absl::StrAppend(&rv, "defence\n"); + } + // Current trick + absl::StrAppend(&rv, "Current trick: "); + for (int i = completed_tricks * kNumPlayers; i < num_cards_played_; ++i) { + const int card = history_[history_.size() - num_cards_played_ + i].action; + absl::StrAppend(&rv, CardString(card), " "); + } + if (trailing_query) absl::StrAppend(&rv, "?"); + return rv; +} + +std::string BridgeState::FormatResult() const { + SPIEL_CHECK_TRUE(IsTerminal()); + std::string rv; + if (use_double_dummy_result_ && contract_.level) { + absl::StrAppend(&rv, "\n\nDeclarer tricks: ", num_declarer_tricks_); + } + absl::StrAppend(&rv, "\nScore: N/S ", returns_[kNorth], " E/W ", + returns_[kEast]); + return rv; +} + +void BridgeState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_EQ(values.size(), game_->ObservationTensorSize()); + WriteObservationTensor(player, values); +} + +void BridgeState::InformationStateTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_EQ(values.size(), game_->ObservationTensorSize()); + WriteObservationTensor(player, values); +} + +void BridgeState::WriteObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::fill(values.begin(), values.end(), 0.0); + if (phase_ == Phase::kDeal) return; + int partnership = Partnership(player); + auto ptr = values.begin(); + if (num_cards_played_ > 0) { + // Observation for play phase + const bool defending = (partnership != Partnership(contract_.declarer)); + if (phase_ == Phase::kPlay) ptr[2 + defending] = 1; + ptr += kNumObservationTypes; + + // Contract + ptr[contract_.level - 1] = 1; + ptr += kNumBidLevels; + + // Trump suit + ptr[contract_.trumps] = 1; + ptr += kNumDenominations; + + // Double status + *ptr++ = contract_.double_status == DoubleStatus::kUndoubled; + *ptr++ = contract_.double_status == DoubleStatus::kDoubled; + *ptr++ = contract_.double_status == DoubleStatus::kRedoubled; + + // Identity of the declarer. + ptr[(contract_.declarer + kNumPlayers - player) % kNumPlayers] = 1; + ptr += kNumPlayers; + + // Vulnerability. + ptr[is_vulnerable_[Partnership(contract_.declarer)]] = 1.0; + ptr += kNumVulnerabilities; + + // Our remaining cards. + for (int i = 0; i < kNumCards; ++i) + if (holder_[i] == player) ptr[i] = 1; + ptr += kNumCards; + + // Dummy's remaining cards. + const int dummy = Partner(contract_.declarer); + for (int i = 0; i < kNumCards; ++i) + if (holder_[i] == dummy) ptr[i] = 1; + ptr += kNumCards; + + // Indexing into history for recent tricks. + int current_trick = num_cards_played_ / kNumPlayers; + int this_trick_cards_played = num_cards_played_ % kNumPlayers; + int this_trick_start = history_.size() - this_trick_cards_played; + + // Current trick + if (phase_ != Phase::kGameOver) { + int leader = tricks_[current_trick].Leader(); + for (int i = 0; i < this_trick_cards_played; ++i) { + int card = history_[this_trick_start + i].action; + int relative_player = (i + leader + kNumPlayers - player) % kNumPlayers; + ptr[relative_player * kNumCards + card] = 1; + } + } + + ptr += kNumPlayers * kNumCards; + + // Previous tricks + for (int j = current_trick - 1; + j >= std::max(0, current_trick - num_tricks_ + 1); --j) { + int leader = tricks_[j].Leader(); + for (int i = 0; i < kNumPlayers; ++i) { + int card = + history_[this_trick_start - kNumPlayers * (current_trick - j) + i] + .action; + int relative_player = (i + leader + kNumPlayers - player) % kNumPlayers; + ptr[relative_player * kNumCards + card] = 1; + } + ptr += kNumPlayers * kNumCards; + } + + // Move pointer for future tricks to have a fixed size tensor + if (num_tricks_ > current_trick + 1) { + ptr += kNumPlayers * kNumCards * (num_tricks_ - current_trick - 1); + } + + // Number of tricks taken by each side. + ptr[num_declarer_tricks_] = 1; + ptr += kNumTricks; + ptr[num_cards_played_ / 4 - num_declarer_tricks_] = 1; + ptr += kNumTricks; + + int kPlayTensorSize = BridgeGame::GetPlayTensorSize(num_tricks_); + SPIEL_CHECK_EQ(std::distance(values.begin(), ptr), + kPlayTensorSize + kNumObservationTypes); + SPIEL_CHECK_LE(std::distance(values.begin(), ptr), values.size()); + } else { + // Observation for auction or opening lead. + ptr[phase_ == Phase::kPlay ? 1 : 0] = 1; + ptr += kNumObservationTypes; + ptr[is_vulnerable_[partnership]] = 1; + ptr += kNumVulnerabilities; + ptr[is_vulnerable_[1 - partnership]] = 1; + ptr += kNumVulnerabilities; + int last_bid = 0; + for (int i = kNumCards; i < history_.size(); ++i) { + int this_call = history_[i].action - kBiddingActionBase; + int relative_bidder = (i + kNumPlayers - player) % kNumPlayers; + if (last_bid == 0 && this_call == kPass) ptr[relative_bidder] = 1; + if (this_call == kDouble) { + ptr[kNumPlayers + (last_bid - kFirstBid) * kNumPlayers * 3 + + kNumPlayers + relative_bidder] = 1; + } else if (this_call == kRedouble) { + ptr[kNumPlayers + (last_bid - kFirstBid) * kNumPlayers * 3 + + kNumPlayers * 2 + relative_bidder] = 1; + } else if (this_call != kPass) { + last_bid = this_call; + ptr[kNumPlayers + (last_bid - kFirstBid) * kNumPlayers * 3 + + relative_bidder] = 1; + } + } + ptr += kNumPlayers * (1 + 3 * kNumBids); + for (int i = 0; i < kNumCards; ++i) + if (holder_[i] == player) ptr[i] = 1; + ptr += kNumCards; + SPIEL_CHECK_EQ(std::distance(values.begin(), ptr), + kAuctionTensorSize + kNumObservationTypes); + SPIEL_CHECK_LE(std::distance(values.begin(), ptr), values.size()); + } +} + +std::vector BridgeState::PublicObservationTensor() const { + SPIEL_CHECK_TRUE(phase_ == Phase::kAuction); + std::vector rv(kPublicInfoTensorSize); + auto ptr = rv.begin(); + ptr[is_vulnerable_[0]] = 1; + ptr += kNumVulnerabilities; + ptr[is_vulnerable_[1]] = 1; + ptr += kNumVulnerabilities; + auto bidding = ptr + 2 * kNumPlayers; // initial and recent passes + int last_bid = 0; + for (int i = kNumCards; i < history_.size(); ++i) { + const int player = i % kNumPlayers; + const int this_call = history_[i].action - kBiddingActionBase; + if (this_call == kPass) { + if (last_bid == 0) ptr[player] = 1; // Leading passes + ptr[kNumPlayers + player] = 1; // Trailing passes + } else { + // Call is a non-Pass, so clear the trailing pass markers. + for (int i = 0; i < kNumPlayers; ++i) ptr[kNumPlayers + i] = 0; + if (this_call == kDouble) { + auto base = bidding + (last_bid - kFirstBid) * kNumPlayers * 3; + base[kNumPlayers + player] = 1; + } else if (this_call == kRedouble) { + auto base = bidding + (last_bid - kFirstBid) * kNumPlayers * 3; + base[kNumPlayers * 2 + player] = 1; + } else { + last_bid = this_call; + auto base = bidding + (last_bid - kFirstBid) * kNumPlayers * 3; + base[player] = 1; + } + } + } + return rv; +} + +std::vector BridgeState::PrivateObservationTensor(Player player) const { + std::vector rv(kNumCards); + for (int i = 0; i < kNumCards; ++i) + if (holder_[i] == player) rv[i] = 1; + return rv; +} + +void BridgeState::SetDoubleDummyResults(ddTableResults double_dummy_results) { + double_dummy_results_ = double_dummy_results; + ComputeScoreByContract(); +} + +ABSL_CONST_INIT absl::Mutex dds_mutex(absl::kConstInit); + +void BridgeState::ComputeDoubleDummyTricks() const { + if (!double_dummy_results_.has_value()) { + absl::MutexLock lock(&dds_mutex); // TODO(author11) Make DDS code thread-safe + double_dummy_results_ = ddTableResults{}; + ddTableDeal dd_table_deal{}; + for (int suit = 0; suit < kNumSuits; ++suit) { + for (int rank = 0; rank < kNumCardsPerSuit; ++rank) { + const int player = holder_[Card(Suit(suit), rank)].value(); + dd_table_deal.cards[player][suit] += 1 << (2 + rank); + } + } + DDS_EXTERNAL(SetMaxThreads)(0); + const int return_code = DDS_EXTERNAL(CalcDDtable)( + dd_table_deal, &double_dummy_results_.value()); + if (return_code != RETURN_NO_FAULT) { + char error_message[80]; + DDS_EXTERNAL(ErrorMessage)(return_code, error_message); + SpielFatalError(absl::StrCat("double_dummy_solver:", error_message)); + } + } + ComputeScoreByContract(); +} + +std::vector BridgeState::ScoreForContracts( + int player, const std::vector& contracts) const { + // Storage for the number of tricks. + std::array, kNumDenominations> dd_tricks; + + if (double_dummy_results_.has_value()) { + // If we have already computed double-dummy results, use them. + for (int declarer = 0; declarer < kNumPlayers; ++declarer) { + for (int trumps = 0; trumps < kNumDenominations; ++trumps) { + dd_tricks[trumps][declarer] = + double_dummy_results_->resTable[trumps][declarer]; + } + } + } else { + { + // This performs some sort of global initialization; unclear + // exactly what. + absl::MutexLock lock(&dds_mutex); + DDS_EXTERNAL(SetMaxThreads)(0); + } + + // Working storage for DD calculation. + auto thread_data = std::make_unique(); + auto transposition_table = std::make_unique(); + transposition_table->SetMemoryDefault(95); // megabytes + transposition_table->SetMemoryMaximum(160); // megabytes + transposition_table->MakeTT(); + thread_data->transTable = transposition_table.get(); + + // Which trump suits do we need to handle? + std::set suits; + for (auto index : contracts) { + const auto& contract = kAllContracts[index]; + if (contract.level > 0) suits.emplace(contract.trumps); + } + // Build the deal + ::deal dl{}; + for (int suit = 0; suit < kNumSuits; ++suit) { + for (int rank = 0; rank < kNumCardsPerSuit; ++rank) { + const int player = holder_[Card(Suit(suit), rank)].value(); + dl.remainCards[player][suit] += 1 << (2 + rank); + } + } + for (int k = 0; k <= 2; k++) { + dl.currentTrickRank[k] = 0; + dl.currentTrickSuit[k] = 0; + } + + // Analyze for each trump suit. + for (int suit : suits) { + dl.trump = suit; + transposition_table->ResetMemory(TT_RESET_NEW_TRUMP); + + // Assemble the declarers we need to consider. + std::set declarers; + for (auto index : contracts) { + const auto& contract = kAllContracts[index]; + if (contract.level > 0 && contract.trumps == suit) + declarers.emplace(contract.declarer); + } + + // Analyze the deal for each declarer. + absl::optional first_declarer; + absl::optional first_tricks; + for (int declarer : declarers) { + ::futureTricks fut; + dl.first = (declarer + 1) % kNumPlayers; + if (!first_declarer.has_value()) { + // First time we're calculating this trump suit. + const int return_code = SolveBoardInternal( + thread_data.get(), dl, + /*target=*/-1, // Find max number of tricks + /*solutions=*/1, // Just the tricks (no card-by-card result) + /*mode=*/2, // Unclear + &fut // Output + ); + if (return_code != RETURN_NO_FAULT) { + char error_message[80]; + DDS_EXTERNAL(ErrorMessage)(return_code, error_message); + SpielFatalError( + absl::StrCat("double_dummy_solver:", error_message)); + } + dd_tricks[suit][declarer] = 13 - fut.score[0]; + first_declarer = declarer; + first_tricks = 13 - fut.score[0]; + } else { + // Reuse data from last time. + const int hint = Partnership(declarer) == Partnership(*first_declarer) + ? *first_tricks + : 13 - *first_tricks; + const int return_code = + SolveSameBoard(thread_data.get(), dl, &fut, hint); + if (return_code != RETURN_NO_FAULT) { + char error_message[80]; + DDS_EXTERNAL(ErrorMessage)(return_code, error_message); + SpielFatalError( + absl::StrCat("double_dummy_solver:", error_message)); + } + dd_tricks[suit][declarer] = 13 - fut.score[0]; + } + } + } + } + + // Compute the scores. + std::vector scores; + scores.reserve(contracts.size()); + for (int contract_index : contracts) { + const Contract& contract = kAllContracts[contract_index]; + const int declarer_score = + (contract.level == 0) + ? 0 + : Score(contract, dd_tricks[contract.trumps][contract.declarer], + is_vulnerable_[Partnership(contract.declarer)]); + scores.push_back(Partnership(contract.declarer) == Partnership(player) + ? declarer_score + : -declarer_score); + } + return scores; +} + +std::vector BridgeState::LegalActions() const { + switch (phase_) { + case Phase::kDeal: + return DealLegalActions(); + case Phase::kAuction: + return BiddingLegalActions(); + case Phase::kPlay: + return PlayLegalActions(); + default: + return {}; + } +} + +std::vector BridgeState::DealLegalActions() const { + std::vector legal_actions; + legal_actions.reserve(kNumCards - history_.size()); + for (int i = 0; i < kNumCards; ++i) { + if (!holder_[i].has_value()) legal_actions.push_back(i); + } + return legal_actions; +} + +std::vector BridgeState::BiddingLegalActions() const { + std::vector legal_actions; + legal_actions.reserve(kNumCalls); + legal_actions.push_back(kBiddingActionBase + kPass); + if (contract_.level > 0 && + Partnership(contract_.declarer) != Partnership(current_player_) && + contract_.double_status == kUndoubled) { + legal_actions.push_back(kBiddingActionBase + kDouble); + } + if (contract_.level > 0 && + Partnership(contract_.declarer) == Partnership(current_player_) && + contract_.double_status == kDoubled) { + legal_actions.push_back(kBiddingActionBase + kRedouble); + } + for (int bid = Bid(contract_.level, contract_.trumps) + 1; bid < kNumCalls; + ++bid) { + legal_actions.push_back(kBiddingActionBase + bid); + } + return legal_actions; +} + +std::vector BridgeState::PlayLegalActions() const { + std::vector legal_actions; + legal_actions.reserve(kNumCardsPerHand - num_cards_played_ / kNumPlayers); + + // Check if we can follow suit. + if (num_cards_played_ % kNumPlayers != 0) { + auto suit = CurrentTrick().LedSuit(); + for (int rank = 0; rank < kNumCardsPerSuit; ++rank) { + if (holder_[Card(suit, rank)] == current_player_) { + legal_actions.push_back(Card(suit, rank)); + } + } + } + if (!legal_actions.empty()) return legal_actions; + + // Otherwise, we can play any of our cards. + for (int card = 0; card < kNumCards; ++card) { + if (holder_[card] == current_player_) legal_actions.push_back(card); + } + return legal_actions; +} + +std::vector> BridgeState::ChanceOutcomes() const { + std::vector> outcomes; + int num_cards_remaining = kNumCards - history_.size(); + outcomes.reserve(num_cards_remaining); + const double p = 1.0 / static_cast(num_cards_remaining); + for (int card = 0; card < kNumCards; ++card) { + if (!holder_[card].has_value()) outcomes.emplace_back(card, p); + } + return outcomes; +} + +void BridgeState::DoApplyAction(Action action) { + switch (phase_) { + case Phase::kDeal: + return ApplyDealAction(action); + case Phase::kAuction: + return ApplyBiddingAction(action - kBiddingActionBase); + case Phase::kPlay: + return ApplyPlayAction(action); + case Phase::kGameOver: + SpielFatalError("Cannot act in terminal states"); + } +} + +void BridgeState::ApplyDealAction(int card) { + holder_[card] = (history_.size() % kNumPlayers); + if (history_.size() == kNumCards - 1) { + if (use_double_dummy_result_) ComputeDoubleDummyTricks(); + phase_ = Phase::kAuction; + current_player_ = kFirstPlayer; + } +} + +void BridgeState::ApplyBiddingAction(int call) { + // Track the number of consecutive passes since the last bid (if any). + if (call == kPass) { + ++num_passes_; + } else { + num_passes_ = 0; + } + + auto partnership = Partnership(current_player_); + if (call == kDouble) { + SPIEL_CHECK_NE(Partnership(contract_.declarer), partnership); + SPIEL_CHECK_EQ(contract_.double_status, kUndoubled); + SPIEL_CHECK_GT(contract_.level, 0); + possible_contracts_[contract_.Index()] = false; + contract_.double_status = kDoubled; + } else if (call == kRedouble) { + SPIEL_CHECK_EQ(Partnership(contract_.declarer), partnership); + SPIEL_CHECK_EQ(contract_.double_status, kDoubled); + possible_contracts_[contract_.Index()] = false; + contract_.double_status = kRedoubled; + } else if (call == kPass) { + if (num_passes_ == 4) { + // Four consecutive passes can only happen if no-one makes a bid. + // The hand is then over, and each side scores zero points. + phase_ = Phase::kGameOver; + possible_contracts_.fill(false); + possible_contracts_[0] = true; + } else if (num_passes_ == 3 && contract_.level > 0) { + // After there has been a bid, three consecutive passes end the auction. + possible_contracts_.fill(false); + possible_contracts_[contract_.Index()] = true; + if (use_double_dummy_result_) { + SPIEL_CHECK_TRUE(double_dummy_results_.has_value()); + phase_ = Phase::kGameOver; + num_declarer_tricks_ = + double_dummy_results_ + ->resTable[contract_.trumps][contract_.declarer]; + ScoreUp(); + } else { + phase_ = Phase::kPlay; + current_player_ = (contract_.declarer + 1) % kNumPlayers; + return; + } + } + } else { + // A bid was made. + SPIEL_CHECK_TRUE((BidLevel(call) > contract_.level) || + (BidLevel(call) == contract_.level && + BidSuit(call) > contract_.trumps)); + contract_.level = BidLevel(call); + contract_.trumps = BidSuit(call); + contract_.double_status = kUndoubled; + auto partnership = Partnership(current_player_); + if (!first_bidder_[partnership][contract_.trumps].has_value()) { + // Partner cannot declare this denomination. + first_bidder_[partnership][contract_.trumps] = current_player_; + const int partner = Partner(current_player_); + for (int level = contract_.level + 1; level <= kNumBidLevels; ++level) { + for (DoubleStatus double_status : {kUndoubled, kDoubled, kRedoubled}) { + possible_contracts_[Contract{level, contract_.trumps, double_status, + partner} + .Index()] = false; + } + } + } + contract_.declarer = first_bidder_[partnership][contract_.trumps].value(); + // No lower contract is possible. + std::fill( + possible_contracts_.begin(), + possible_contracts_.begin() + + Contract{contract_.level, contract_.trumps, kUndoubled, 0}.Index(), + false); + // No-one else can declare this precise contract. + for (int player = 0; player < kNumPlayers; ++player) { + if (player != current_player_) { + for (DoubleStatus double_status : {kUndoubled, kDoubled, kRedoubled}) { + possible_contracts_[Contract{contract_.level, contract_.trumps, + double_status, player} + .Index()] = false; + } + } + } + } + current_player_ = (current_player_ + 1) % kNumPlayers; +} + +void BridgeState::ApplyPlayAction(int card) { + SPIEL_CHECK_TRUE(holder_[card] == current_player_); + holder_[card] = absl::nullopt; + if (num_cards_played_ % kNumPlayers == 0) { + CurrentTrick() = Trick(current_player_, contract_.trumps, card); + } else { + CurrentTrick().Play(current_player_, card); + } + const Player winner = CurrentTrick().Winner(); + ++num_cards_played_; + if (num_cards_played_ % kNumPlayers == 0) { + current_player_ = winner; + if (Partnership(winner) == Partnership(contract_.declarer)) + ++num_declarer_tricks_; + } else { + current_player_ = (current_player_ + 1) % kNumPlayers; + } + if (num_cards_played_ == kNumCards) { + phase_ = Phase::kGameOver; + ScoreUp(); + } +} + +Player BridgeState::CurrentPlayer() const { + if (phase_ == Phase::kDeal) { + return kChancePlayerId; + } else if (phase_ == Phase::kGameOver) { + return kTerminalPlayerId; + } else if (phase_ == Phase::kPlay && + Partnership(current_player_) == Partnership(contract_.declarer)) { + // Declarer chooses cards for both players. + return contract_.declarer; + } else { + return current_player_; + } +} + +void BridgeState::ScoreUp() { + int declarer_score = Score(contract_, num_declarer_tricks_, + is_vulnerable_[Partnership(contract_.declarer)]); + for (int pl = 0; pl < kNumPlayers; ++pl) { + returns_[pl] = Partnership(pl) == Partnership(contract_.declarer) + ? declarer_score + : -declarer_score; + } +} + +void BridgeState::ComputeScoreByContract() const { + SPIEL_CHECK_TRUE(double_dummy_results_.has_value()); + for (int i = 0; i < kNumContracts; ++i) { + Contract contract = kAllContracts[i]; + if (contract.level == 0) { + score_by_contract_[i] = 0; + } else { + const int num_declarer_tricks = + double_dummy_results_->resTable[contract.trumps][contract.declarer]; + const int declarer_score = + Score(contract, num_declarer_tricks, + is_vulnerable_[Partnership(contract.declarer)]); + score_by_contract_[i] = Partnership(contract.declarer) == 0 + ? declarer_score + : -declarer_score; + } + } +} + +Trick::Trick(Player leader, Denomination trumps, int card) + : trumps_(trumps), + led_suit_(CardSuit(card)), + winning_suit_(CardSuit(card)), + winning_rank_(CardRank(card)), + leader_(leader), + winning_player_(leader) {} + +void Trick::Play(Player player, int card) { + if (CardSuit(card) == winning_suit_) { + if (CardRank(card) > winning_rank_) { + winning_rank_ = CardRank(card); + winning_player_ = player; + } + } else if (CardSuit(card) == Suit(trumps_)) { + winning_suit_ = Suit(trumps_); + winning_rank_ = CardRank(card); + winning_player_ = player; + } +} + +// We have custom State serialization to avoid recomputing double-dummy +// results. +std::string BridgeState::Serialize() const { + std::string serialized = State::Serialize(); + if (use_double_dummy_result_ && double_dummy_results_.has_value()) { + std::string dd; + for (int trumps = 0; trumps < kNumDenominations; ++trumps) { + for (int player = 0; player < kNumPlayers; ++player) { + absl::StrAppend(&dd, double_dummy_results_->resTable[trumps][player], + "\n"); + } + } + absl::StrAppend(&serialized, "Double Dummy Results\n", dd); + } + return serialized; +} + +std::unique_ptr BridgeGame::DeserializeState( + const std::string& str) const { + if (!UseDoubleDummyResult()) return Game::DeserializeState(str); + auto state = std::make_unique( + shared_from_this(), UseDoubleDummyResult(), IsDealerVulnerable(), + IsNonDealerVulnerable(), NumTricks()); + std::vector lines = absl::StrSplit(str, '\n'); + const auto separator = absl::c_find(lines, "Double Dummy Results"); + // Double-dummy results. + if (separator != lines.end()) { + ddTableResults double_dummy_results; + auto it = separator; + int i = 0; + while (++it != lines.end()) { + if (it->empty()) continue; + double_dummy_results.resTable[i / kNumPlayers][i % kNumPlayers] = + std::stol(*it); + ++i; + } + state->SetDoubleDummyResults(double_dummy_results); + } + // Actions in the game. + for (auto it = lines.begin(); it != separator; ++it) { + if (it->empty()) continue; + state->ApplyAction(std::stol(*it)); + } + return state; +} + +int BridgeState::ContractIndex() const { + SPIEL_CHECK_TRUE(phase_ == Phase::kPlay || phase_ == Phase::kGameOver); + return contract_.Index(); +} + +std::string BridgeGame::ContractString(int index) const { + return kAllContracts[index].ToString(); +} + +} // namespace bridge +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge.h b/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge.h new file mode 100644 index 0000000..7d1146a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge.h @@ -0,0 +1,283 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_BRIDGE_H_ +#define OPEN_SPIEL_GAMES_BRIDGE_H_ + +// The full game of contract bridge. +// See https://en.wikipedia.org/wiki/Contract_bridge +// This is played by four players in two partnerships; it consists of a bidding +// phase followed by a play phase. The bidding phase determines the contract for +// the play phase. The contract has four components: +// - Which of the four players is the 'declarer'. (The first play is made by +// the player on declarer's left. Declarer's partner (the 'dummy') then +// places their cards face-up for everyone to see; their plays are chosen +// by declarer.) +// - The trump suit (or no-trumps). +// - The level, i.e. the trick target for the declaring partnership. +// - Whether the contract is doubled or redoubled (increasing the stakes). +// +// There is then a play phase, in which 13 tricks are allocated between the +// two partnerships. The declaring side gets a positive score if they take +// at least as many tricks as contracted for, otherwise their score is negative. +// +// We support an option to replace the play phase with a perfect-information +// solution (the 'double dummy result' in bridge jargon). +// +// The action space is as follows: +// 0..51 Cards, used for both dealing (chance events) and play; +// 52+ Calls (Pass, Dbl, RDbl, and bids), used during the auction phase. +// +// During the play phase, the dummy's cards are played by the declarer (their +// partner). There will thus be 26 turns for declarer, and 13 turns for each +// of the defenders during the play. + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/games/bridge/double_dummy_solver/include/dll.h" +#include "open_spiel/games/bridge/bridge_scoring.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace bridge { + +inline constexpr int kBiddingActionBase = kNumCards; // First bidding action. +inline constexpr int kNumObservationTypes = 4; // Bid, lead, declare, defend +// Because bids always increase, any individual bid can be made at most once. +// Thus for each bid, we only need to track (a) who bid it (if anyone), (b) who +// doubled it (if anyone), and (c) who redoubled it (if anyone). +// We also report the number of passes before the first bid; we could +// equivalently report which player made the first call. +// This is much more compact than storing the auction call-by-call, which +// requires 318 turns * 38 possible calls per turn = 12084 bits (although +// in practice almost all auctions have fewer than 80 calls). +inline constexpr int kAuctionTensorSize = + kNumPlayers * (1 // Did this player pass before the opening bid? + + kNumBids // Did this player make each bid? + + kNumBids // Did this player double each bid? + + kNumBids // Did this player redouble each bid? + ) + + kNumCards // Our hand + + kNumVulnerabilities * kNumPartnerships; // Vulnerability of each side +inline constexpr int kPublicInfoTensorSize = + kAuctionTensorSize // The auction + - kNumCards // But not any player's cards + + kNumPlayers; // Plus trailing passes +inline constexpr int kMaxAuctionLength = + kNumBids * (1 + kNumPlayers * 2) + kNumPlayers; +inline constexpr Player kFirstPlayer = 0; +enum class Suit { kClubs = 0, kDiamonds = 1, kHearts = 2, kSpades = 3 }; + +// State of a single trick. +class Trick { + public: + Trick() : Trick{kInvalidPlayer, kNoTrump, 0} {} + Trick(Player leader, Denomination trumps, int card); + void Play(Player player, int card); + Suit LedSuit() const { return led_suit_; } + Player Winner() const { return winning_player_; } + Player Leader() const { return leader_; } + + private: + Denomination trumps_; + Suit led_suit_; + Suit winning_suit_; + int winning_rank_; + Player leader_; + Player winning_player_; +}; + +// State of an in-play game. Can be any phase of the game. +class BridgeState : public State { + public: + BridgeState(std::shared_ptr game, bool use_double_dummy_result, + bool is_dealer_vulnerable, bool is_non_dealer_vulnerable, + int num_tricks); + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + bool IsTerminal() const override { return phase_ == Phase::kGameOver; } + std::vector Returns() const override { return returns_; } + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void WriteObservationTensor(Player player, absl::Span values) const; + void ObservationTensor(Player player, + absl::Span values) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override { + return std::unique_ptr(new BridgeState(*this)); + } + std::vector LegalActions() const override; + std::vector> ChanceOutcomes() const override; + std::string Serialize() const override; + void SetDoubleDummyResults(ddTableResults double_dummy_results); + std::unique_ptr ResampleFromInfostate( + int player_id, std::function rng) const override; + + // If the state is terminal, returns the index of the final contract, into the + // arrays returned by PossibleFinalContracts and ScoreByContract. + int ContractIndex() const; + + // Returns a mask indicating which final contracts are possible. + std::array PossibleContracts() const { + return possible_contracts_; + } + + // Returns the score for each possible final contract. This is computed once + // at the start of the deal, so will include scores for contracts which are + // now impossible. + std::array ScoreByContract() const { + SPIEL_CHECK_TRUE(double_dummy_results_.has_value()); + return score_by_contract_; + } + + // Returns the double-dummy score for a list of contracts from the point + // of view of the specified player. + // Will compute the double-dummy results if needed. + std::vector ScoreForContracts(int player, + const std::vector& contracts) const; + + // Private information tensor per player. + std::vector PrivateObservationTensor(Player player) const; + + // Public information. + std::vector PublicObservationTensor() const; + + // Current phase. + int CurrentPhase() const { return static_cast(phase_); } + + protected: + void DoApplyAction(Action action) override; + + private: + enum class Phase { kDeal, kAuction, kPlay, kGameOver }; + + std::vector DealLegalActions() const; + std::vector BiddingLegalActions() const; + std::vector PlayLegalActions() const; + void ApplyDealAction(int card); + void ApplyBiddingAction(int call); + void ApplyPlayAction(int card); + void ComputeDoubleDummyTricks() const; + void ComputeScoreByContract() const; + void ScoreUp(); + Trick& CurrentTrick() { return tricks_[num_cards_played_ / kNumPlayers]; } + const Trick& CurrentTrick() const { + return tricks_[num_cards_played_ / kNumPlayers]; + } + std::array, kNumCards> OriginalDeal() const; + std::string FormatDeal() const; + std::string FormatVulnerability() const; + std::string FormatAuction(bool trailing_query) const; + std::string FormatPlay() const; + std::string FormatPlayObservation(bool trailing_query) const; + std::string FormatResult() const; + + const bool use_double_dummy_result_; + const bool is_vulnerable_[kNumPartnerships]; + const int num_tricks_; + + int num_passes_ = 0; // Number of consecutive passes since the last non-pass. + int num_declarer_tricks_ = 0; + int num_cards_played_ = 0; + Player current_player_ = 0; // During the play phase, the hand to play. + Phase phase_ = Phase::kDeal; + Contract contract_{0}; + std::array, kNumDenominations>, + kNumPartnerships> + first_bidder_{}; + std::array tricks_{}; + std::vector returns_ = std::vector(kNumPlayers); + std::array, kNumCards> holder_{}; + mutable absl::optional double_dummy_results_{}; + std::array possible_contracts_; + mutable std::array score_by_contract_; +}; + +class BridgeGame : public Game { + public: + explicit BridgeGame(const GameParameters& params); + int NumDistinctActions() const override { + return kBiddingActionBase + kNumCalls; + } + int MaxChanceOutcomes() const override { return kNumCards; } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new BridgeState( + shared_from_this(), UseDoubleDummyResult(), IsDealerVulnerable(), + IsNonDealerVulnerable(), NumTricks())); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -kMaxScore; } + double MaxUtility() const override { return kMaxScore; } + absl::optional UtilitySum() const override { return 0; } + + static int GetPlayTensorSize(int num_tricks) { + return kNumBidLevels // What the contract is + + kNumDenominations // What trumps are + + kNumOtherCalls // Undoubled / doubled / redoubled + + kNumPlayers // Who declarer is + + kNumVulnerabilities // Vulnerability of the declaring side + + kNumCards // Our remaining cards + + kNumCards // Dummy's remaining cards + + num_tricks * kNumPlayers * kNumCards // Number of played tricks + + kNumTricks // Number of tricks we have won + + kNumTricks; // Number of tricks they have won + } + + std::vector ObservationTensorShape() const override { + return {kNumObservationTypes + + std::max(GetPlayTensorSize(NumTricks()), kAuctionTensorSize)}; + } + + std::vector InformationStateTensorShape() const override { + return {kNumObservationTypes + + std::max(GetPlayTensorSize(NumTricks()), kAuctionTensorSize)}; + } + + int MaxGameLength() const override { + return UseDoubleDummyResult() ? kMaxAuctionLength + : kMaxAuctionLength + kNumCards; + } + int MaxChanceNodesInHistory() const override { return kNumCards; } + + std::unique_ptr DeserializeState( + const std::string& str) const override; + + // How many contracts there are (including declarer and double status). + int NumPossibleContracts() const { return kNumContracts; } + + // A string representation of a contract. + std::string ContractString(int index) const; + + // Extra observation tensors. + int PrivateObservationTensorSize() const { return kNumCards; } + int PublicObservationTensorSize() const { return kPublicInfoTensorSize; } + + private: + bool UseDoubleDummyResult() const { + return ParameterValue("use_double_dummy_result", true); + } + bool IsDealerVulnerable() const { + return ParameterValue("dealer_vul", false); + } + bool IsNonDealerVulnerable() const { + return ParameterValue("non_dealer_vul", false); + } + int NumTricks() const { return ParameterValue("num_tricks", 2); } +}; + +} // namespace bridge +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_BRIDGE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge_scoring.cc b/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge_scoring.cc new file mode 100644 index 0000000..0580b55 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge_scoring.cc @@ -0,0 +1,121 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/bridge/bridge_scoring.h" + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" + +namespace open_spiel { +namespace bridge { +namespace { +constexpr int kBaseTrickScores[] = {20, 20, 30, 30, 30}; + +int ScoreContract(Contract contract, DoubleStatus double_status) { + int score = contract.level * kBaseTrickScores[contract.trumps]; + if (contract.trumps == kNoTrump) score += 10; + return score * double_status; +} + +// Score for failing to make the contract (will be negative). +int ScoreUndertricks(int undertricks, bool is_vulnerable, + DoubleStatus double_status) { + if (double_status == kUndoubled) { + return (is_vulnerable ? -100 : -50) * undertricks; + } + int score = 0; + if (is_vulnerable) { + score = -200 - 300 * (undertricks - 1); + } else { + if (undertricks == 1) { + score = -100; + } else if (undertricks == 2) { + score = -300; + } else { + // This takes into account the -100 for the fourth and subsequent tricks. + score = -500 - 300 * (undertricks - 3); + } + } + return score * (double_status / 2); +} + +// Score for tricks made in excess of the bid. +int ScoreOvertricks(Denomination trump_suit, int overtricks, bool is_vulnerable, + DoubleStatus double_status) { + if (double_status == kUndoubled) { + return overtricks * kBaseTrickScores[trump_suit]; + } else { + return (is_vulnerable ? 100 : 50) * overtricks * double_status; + } +} + +// Bonus for making a doubled or redoubled contract. +int ScoreDoubledBonus(DoubleStatus double_status) { + return 50 * (double_status / 2); +} + +// Bonuses for partscore, game, or slam. +int ScoreBonuses(int level, int contract_score, bool is_vulnerable) { + if (level == 7) { // 1500/1000 for grand slam + 500/300 for game + return is_vulnerable ? 2000 : 1300; + } else if (level == 6) { // 750/500 for small slam + 500/300 for game + return is_vulnerable ? 1250 : 800; + } else if (contract_score >= 100) { // game bonus + return is_vulnerable ? 500 : 300; + } else { // partscore bonus + return 50; + } +} +} // namespace + +int Score(Contract contract, int declarer_tricks, bool is_vulnerable) { + if (contract.level == 0) return 0; + int contracted_tricks = 6 + contract.level; + int contract_result = declarer_tricks - contracted_tricks; + if (contract_result < 0) { + return ScoreUndertricks(-contract_result, is_vulnerable, + contract.double_status); + } else { + int contract_score = ScoreContract(contract, contract.double_status); + int bonuses = ScoreBonuses(contract.level, contract_score, is_vulnerable) + + ScoreDoubledBonus(contract.double_status) + + ScoreOvertricks(contract.trumps, contract_result, + is_vulnerable, contract.double_status); + return contract_score + bonuses; + } +} + +std::string Contract::ToString() const { + if (level == 0) return "Passed Out"; + std::string str = absl::StrCat(level, std::string{kDenominationChar[trumps]}); + if (double_status == kDoubled) absl::StrAppend(&str, "X"); + if (double_status == kRedoubled) absl::StrAppend(&str, "XX"); + absl::StrAppend(&str, " ", std::string{kPlayerChar[declarer]}); + return str; +} + +int Contract::Index() const { + if (level == 0) return 0; + int index = level - 1; + index *= kNumDenominations; + index += static_cast(trumps); + index *= kNumPlayers; + index += static_cast(declarer); + index *= kNumDoubleStates; + if (double_status == kRedoubled) index += 2; + if (double_status == kDoubled) index += 1; + return index + 1; +} + +} // namespace bridge +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge_scoring.h b/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge_scoring.h new file mode 100644 index 0000000..21f93f2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge_scoring.h @@ -0,0 +1,88 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_BRIDGE_BRIDGE_SCORING_H_ +#define OPEN_SPIEL_GAMES_BRIDGE_BRIDGE_SCORING_H_ + +// Scoring for (duplicate) contract bridge. +// See Law 77 of the Laws of Bridge, 2017: +// http://www.worldbridge.org/wp-content/uploads/2017/03/2017LawsofDuplicateBridge-paginated.pdf + +#include +#include + +namespace open_spiel { +namespace bridge { + +enum Denomination { kClubs = 0, kDiamonds, kHearts, kSpades, kNoTrump }; +inline constexpr int kNumDenominations = 5; +constexpr char kDenominationChar[] = "CDHSN"; + +enum DoubleStatus { kUndoubled = 1, kDoubled = 2, kRedoubled = 4 }; +inline constexpr int kNumDoubleStates = 3; + +inline constexpr int kNumPlayers = 4; +constexpr char kPlayerChar[] = "NESW"; + +inline constexpr int kNumSuits = 4; +inline constexpr int kNumCardsPerSuit = 13; +inline constexpr int kNumPartnerships = 2; +inline constexpr int kNumBidLevels = 7; // Bids can be from 7 to 13 tricks. +inline constexpr int kNumOtherCalls = 3; // Pass, Double, Redouble +inline constexpr int kNumVulnerabilities = 2; // Vulnerable or non-vulnerable. +inline constexpr int kNumBids = kNumBidLevels * kNumDenominations; +inline constexpr int kNumCalls = kNumBids + kNumOtherCalls; +inline constexpr int kNumCards = kNumSuits * kNumCardsPerSuit; +inline constexpr int kNumCardsPerHand = kNumCards / kNumPlayers; +inline constexpr int kNumTricks = kNumCardsPerHand; +inline constexpr int kMaxScore = 7600; // See http://www.rpbridge.net/2y66.htm + +struct Contract { + int level = 0; + Denomination trumps = kNoTrump; + DoubleStatus double_status = kUndoubled; + int declarer = -1; + + std::string ToString() const; + int Index() const; +}; + +int Score(Contract contract, int declarer_tricks, bool is_vulnerable); + +// All possible contracts. +inline constexpr int kNumContracts = + kNumBids * kNumPlayers * kNumDoubleStates + 1; +constexpr std::array AllContracts() { + std::array contracts; + int i = 0; + contracts[i++] = Contract(); + for (int level : {1, 2, 3, 4, 5, 6, 7}) { + for (Denomination trumps : + {kClubs, kDiamonds, kHearts, kSpades, kNoTrump}) { + for (int declarer = 0; declarer < kNumPlayers; ++declarer) { + for (DoubleStatus double_status : {kUndoubled, kDoubled, kRedoubled}) { + contracts[i++] = Contract{level, trumps, double_status, declarer}; + } + } + } + } + return contracts; +} +inline constexpr std::array kAllContracts = + AllContracts(); + +} // namespace bridge +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_BRIDGE_BRIDGE_SCORING_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge_test.cc new file mode 100644 index 0000000..2677fa7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge_test.cc @@ -0,0 +1,97 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/bridge/bridge.h" + +#include "open_spiel/abseil-cpp/absl/strings/str_replace.h" +#include "open_spiel/games/bridge/bridge_scoring.h" +#include "open_spiel/games/bridge/bridge_uncontested_bidding.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace bridge { +namespace { + +void ScoringTests() { + SPIEL_CHECK_EQ(Score({4, kHearts, kUndoubled}, 11, true), 650); + SPIEL_CHECK_EQ(Score({4, kDiamonds, kUndoubled}, 10, true), 130); + SPIEL_CHECK_EQ(Score({3, kNoTrump, kUndoubled}, 6, false), -150); + SPIEL_CHECK_EQ(Score({3, kNoTrump, kDoubled}, 6, false), -500); + SPIEL_CHECK_EQ(Score({2, kSpades, kDoubled}, 8, true), 670); +} + +void BasicGameTests() { + testing::LoadGameTest("bridge_uncontested_bidding(num_redeals=1)"); + testing::RandomSimTest(*LoadGame("bridge_uncontested_bidding(num_redeals=1)"), + 3); + testing::LoadGameTest("bridge"); + testing::RandomSimTest(*LoadGame("bridge"), 3); + testing::RandomSimTest(*LoadGame("bridge(use_double_dummy_result=false)"), 3); + testing::ResampleInfostateTest(*LoadGame("bridge"), 10); +} + +void DeserializeStateTest() { + auto game = LoadGame("bridge_uncontested_bidding(num_redeals=1)"); + auto state = game->DeserializeState("AKQJ.543.QJ8.T92 97532.A2.9.QJ853"); + SPIEL_CHECK_EQ(state->ToString(), "AKQJ.543.QJ8.T92 97532.A2.9.QJ853 "); +} + +void SerializeDoubleDummyResults() { + auto game = LoadGame("bridge"); + auto state = game->NewInitialState(); + for (auto action : {33, 25, 3, 44, 47, 28, 23, 46, 1, 43, 30, 26, 29, 48, + 24, 42, 13, 21, 17, 8, 5, 34, 6, 7, 37, 49, 11, 38, + 51, 32, 20, 9, 0, 14, 35, 22, 10, 50, 15, 45, 39, 16, + 12, 18, 27, 31, 41, 40, 4, 36, 19, 2, 52, 59, 52, 61}) { + state->ApplyAction(action); + } + auto str = state->Serialize(); + str = absl::StrReplaceAll(str, {{"\n", ","}}); + SPIEL_CHECK_EQ(str, + "33,25,3,44,47,28,23,46,1,43,30,26,29,48," + "24,42,13,21,17,8,5,34,6,7,37,49,11,38,51," + "32,20,9,0,14,35,22,10,50,15,45,39,16,12," + "18,27,31,41,40,4,36,19,2,52,59,52,61," + "Double Dummy Results," + "0,12,0,12,7,5,7,5,0,12,0,12,8,5,8,5,0,7,0,7,"); +} + +void DeserializeDoubleDummyResults() { + auto game = LoadGame("bridge"); + // These results intentionally incorrect to check that the + // implementation is using them rather than wastefully recomputing them. + std::string serialized = + "33,25,3,44,47,28,23,46,1,43,30,26,29,48," + "24,42,13,21,17,8,5,34,6,7,37,49,11,38,51," + "32,20,9,0,14,35,22,10,50,15,45,39,16,12," + "18,27,31,41,40,4,36,19,2,52,59,52,61," + "Double Dummy Results," + "12,12,0,12,7,5,7,5,9,12,0,12,6,5,8,5,3,7,0,7,"; + serialized = absl::StrReplaceAll(serialized, {{",", "\n"}}); + auto new_state = game->DeserializeState(serialized); + SPIEL_CHECK_EQ(serialized, new_state->Serialize()); +} + +} // namespace +} // namespace bridge +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::bridge::DeserializeStateTest(); + open_spiel::bridge::ScoringTests(); + open_spiel::bridge::BasicGameTests(); + open_spiel::bridge::SerializeDoubleDummyResults(); + open_spiel::bridge::DeserializeDoubleDummyResults(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge_uncontested_bidding.cc b/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge_uncontested_bidding.cc new file mode 100644 index 0000000..4240003 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge_uncontested_bidding.cc @@ -0,0 +1,461 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/bridge/bridge_uncontested_bidding.h" + +#include +#include + +#include "open_spiel/games/bridge/double_dummy_solver/include/dll.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/bridge/bridge_scoring.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// For compatibility with versions of the double dummy solver code which +// don't amend exported names. +#ifndef DDS_EXTERNAL +#define DDS_EXTERNAL(x) x +#endif + +namespace open_spiel { +namespace bridge_uncontested_bidding { +namespace { + +using open_spiel::bridge::kClubs; +using open_spiel::bridge::kDenominationChar; +using open_spiel::bridge::kDiamonds; +using open_spiel::bridge::kHearts; +using open_spiel::bridge::kNoTrump; +using open_spiel::bridge::kSpades; +using open_spiel::bridge::kUndoubled; + +constexpr int kDefaultNumRedeals = 10; // how many possible layouts to analyse + +const GameType kGameType{ + /*short_name=*/"bridge_uncontested_bidding", + /*long_name=*/"Bridge: Uncontested Bidding", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kSampledStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kIdentical, + GameType::RewardModel::kTerminal, + /*max_num_players=*/kNumPlayers, + /*min_num_players=*/kNumPlayers, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/false, + /*provides_observation_tensor=*/false, + /*parameter_specification=*/ + { + {"subgame", GameParameter(static_cast(""))}, + {"rng_seed", GameParameter(0)}, + {"relative_scoring", GameParameter(false)}, + {"num_redeals", GameParameter(kDefaultNumRedeals)}, + }, +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new UncontestedBiddingGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +constexpr Action kPass = 0; +constexpr Action k2NT = 10; + +bool Is2NTDeal(const Deal& deal) { + int lengths[kNumSuits] = {0, 0, 0, 0}; + int hcp = 0; + for (int i = 0; i < kNumCardsPerHand; ++i) { + int suit = deal.Suit(i); + int rank = deal.Rank(i); + lengths[suit]++; + if (rank > 8) hcp += (rank - 8); + } + // Balanced means 4333, 4432 or 5332 + bool is_balanced = (lengths[0] * lengths[1] * lengths[2] * lengths[3] >= 90); + return is_balanced && (20 <= hcp) && (hcp <= 21); +} +bool NoFilter(const Deal& deal) { return true; } + +} // namespace + +int UncontestedBiddingState::CurrentPlayer() const { + if (!dealt_) return kChancePlayerId; + if (IsTerminal()) return kTerminalPlayerId; + return actions_.size() % 2; +} + +constexpr bridge::Denomination Denomination(Action bid) { + return bridge::Denomination((bid - 1) % kNumDenominations); +} + +constexpr int Level(Action bid) { return 1 + (bid - 1) / kNumDenominations; } + +std::string UncontestedBiddingState::ActionToString(Player player, + Action action_id) const { + if (player == kChancePlayerId) return "Deal"; + if (action_id == kPass) return "Pass"; + return absl::StrCat( + Level(action_id), + std::string(1, kDenominationChar[Denomination(action_id)])); +} + +Action ActionFromString(const std::string& str) { + if (str == "Pass") return kPass; + SPIEL_CHECK_EQ(str.length(), 2); + auto level = str[0] - '0'; + auto denomination = std::string(kDenominationChar).find(str[1]); + SPIEL_CHECK_NE(denomination, std::string::npos); + return (level - 1) * kNumDenominations + denomination + 1; +} + +std::string Deal::HandString(int begin, int end) const { + bool cards[kNumSuits][kNumCardsPerSuit] = {{false}}; + for (int i = begin; i < end; ++i) { + cards[Suit(i)][Rank(i)] = true; + } + std::string hand; + for (int s = 3; s >= 0; --s) { + for (int r = 12; r >= 0; --r) { + if (cards[s][r]) { + hand.push_back(kRankChar[r]); + } + } + if (s) hand.push_back('.'); + } + return hand; +} + +std::string UncontestedBiddingState::ToString() const { + if (!dealt_) return ""; + std::string rv = absl::StrCat(deal_.HandString(0, 13), " ", + deal_.HandString(13, 26), " ", AuctionString()); + if (IsTerminal()) { + absl::StrAppend(&rv, " Score:", score_); + for (int i = 0; i < reference_contracts_.size(); ++i) { + absl::StrAppend(&rv, " ", reference_contracts_[i].ToString(), ":", + reference_scores_[i]); + } + } + return rv; +} + +bool UncontestedBiddingState::IsTerminal() const { + return dealt_ && actions_.size() >= 2 && actions_.back() == kPass; +} + +std::vector UncontestedBiddingState::Returns() const { + if (!IsTerminal()) return {0, 0}; + double v = score_; + if (reference_scores_.empty()) { + return {v, v}; + } else { + const double datum = + *std::max_element(reference_scores_.begin(), reference_scores_.end()); + return {v, v - datum}; + } +} + +std::string UncontestedBiddingState::AuctionString() const { + std::string actions; + for (const auto action : actions_) { + if (!actions.empty()) actions.push_back('-'); + actions.append(ActionToString(0, action)); + } + return actions; +} + +std::string UncontestedBiddingState::InformationStateString( + Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + if (!dealt_) return ""; + return absl::StrCat(deal_.HandString(player * 13, (player + 1) * 13), " ", + AuctionString()); +} + +void UncontestedBiddingState::InformationStateTensor( + Player player, absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + SPIEL_CHECK_EQ(values.size(), kStateSize); + std::fill(values.begin(), values.end(), 0.); + auto ptr = values.begin(); + + for (int i = kNumCardsPerHand * player; i < kNumCardsPerHand * (1 + player); + ++i) { + ptr[deal_.Card(i)] = 1.; + } + ptr += kNumCards; + + // What actions have been taken, and by whom + for (int i = 0; i < actions_.size(); ++i) { + ptr[actions_[i] * kNumPlayers + (i % kNumPlayers)] = 1; + } + ptr += kNumActions * kNumPlayers; + + // Which player we are + ptr[player] = 1; + ptr += kNumPlayers; +} + +std::unique_ptr UncontestedBiddingState::Clone() const { + return std::unique_ptr(new UncontestedBiddingState(*this)); +} + +std::vector UncontestedBiddingState::LegalActions() const { + if (IsTerminal()) { + return {}; + } else if (dealt_) { + std::vector actions{kPass}; + const Action prev = actions_.empty() ? kPass : actions_.back(); + for (Action a = prev + 1; a < kNumActions; ++a) actions.push_back(a); + return actions; + } else { + return {0}; + } +} + +void UncontestedBiddingState::ScoreDeal() { + // If both Pass, the score is zero. + const bool passed_out = (actions_.size() == 2); + if (passed_out && reference_contracts_.empty()) { + score_ = 0; + return; + } + + // Determine the final contract and declarer + const Action bid = actions_[actions_.size() - 2]; + Contract contract{passed_out ? 0 : Level(bid), + passed_out ? kNoTrump : Denomination(bid), kUndoubled}; + for (int i = 0; i < actions_.size(); ++i) { + if (actions_[i] > 0 && Denomination(actions_[i]) == contract.trumps) { + contract.declarer = i % 2; + break; + } + } + + // Populate East-West cards + ddTableDeal dd_table_deal{}; + for (Player player = 0; player < kNumPlayers; ++player) { + for (int i = kNumCardsPerHand * player; i < kNumCardsPerHand * (1 + player); + ++i) { + dd_table_deal.cards[player * 2][deal_.Suit(i)] += 1 + << (2 + deal_.Rank(i)); + } + } + + // Initialize scores to zero + score_ = 0; + reference_scores_.resize(reference_contracts_.size()); + std::fill(reference_scores_.begin(), reference_scores_.end(), 0); + + // For each redeal + for (int ideal = 0; ideal < num_redeals_; ++ideal) { + if (ideal > 0) deal_.Shuffle(&rng_, kNumCardsPerHand * 2, kNumCards); + + // Populate (reshuffled) North-South cards + for (int opponent = 0; opponent < kNumPlayers; ++opponent) { + std::fill(dd_table_deal.cards[1 + opponent * 2], + dd_table_deal.cards[1 + opponent * 2] + 4, 0); + for (int i = kNumCardsPerHand * (2 + opponent); + i < kNumCardsPerHand * (3 + opponent); ++i) { + dd_table_deal.cards[1 + opponent * 2][deal_.Suit(i)] += + 1 << (2 + deal_.Rank(i)); + } + } + + // Analyze the deal. + DDS_EXTERNAL(SetMaxThreads)(0); + struct ddTableResults results; + const int return_code = DDS_EXTERNAL(CalcDDtable)(dd_table_deal, &results); + + // Check for errors. + if (return_code != RETURN_NO_FAULT) { + char error_message[80]; + DDS_EXTERNAL(ErrorMessage)(return_code, error_message); + SpielFatalError(absl::StrCat("double_dummy_solver:", error_message)); + } + + // Compute the score and update the total. + if (!passed_out) { + const int declarer_tricks = + results.resTable[contract.trumps][2 * contract.declarer]; + const int declarer_score = + Score(contract, declarer_tricks, /*is_vulnerable=*/false); + score_ += static_cast(declarer_score) / num_redeals_; + } + + // Compute the scores for reference contracts. + for (int i = 0; i < reference_contracts_.size(); ++i) { + const int declarer_tricks = + results.resTable[reference_contracts_[i].trumps] + [2 * reference_contracts_[i].declarer]; + const int declarer_score = Score(reference_contracts_[i], declarer_tricks, + /*is_vulnerable=*/false); + reference_scores_[i] += + static_cast(declarer_score) / num_redeals_; + } + } +} + +void UncontestedBiddingState::DoApplyAction(Action action_id) { + if (dealt_) { + actions_.push_back(action_id); + if (IsTerminal()) ScoreDeal(); + } else { + do { + deal_.Shuffle(&rng_); + } while (!deal_filter_(deal_)); + dealt_ = true; + } +} + +std::vector> UncontestedBiddingState::ChanceOutcomes() + const { + return {{0, 1.0}}; +} + +UncontestedBiddingGame::UncontestedBiddingGame(const GameParameters& params) + : Game(kGameType, params), + forced_actions_{}, + deal_filter_{NoFilter}, + rng_seed_(ParameterValue("rng_seed")), + num_redeals_(ParameterValue("num_redeals")) { + std::string subgame = ParameterValue("subgame"); + if (subgame == "2NT") { + deal_filter_ = Is2NTDeal; + forced_actions_ = {k2NT}; + if (ParameterValue("relative_scoring")) { + reference_contracts_ = { + {2, kNoTrump, kUndoubled, 0}, {3, kClubs, kUndoubled, 1}, + {3, kDiamonds, kUndoubled, 0}, {3, kDiamonds, kUndoubled, 1}, + {3, kHearts, kUndoubled, 0}, {3, kHearts, kUndoubled, 1}, + {3, kSpades, kUndoubled, 0}, {3, kSpades, kUndoubled, 1}, + {3, kNoTrump, kUndoubled, 0}, {4, kClubs, kUndoubled, 0}, + {4, kHearts, kUndoubled, 0}, {4, kHearts, kUndoubled, 1}, + {4, kSpades, kUndoubled, 0}, {4, kSpades, kUndoubled, 1}, + {5, kClubs, kUndoubled, 0}, {5, kClubs, kUndoubled, 1}, + {5, kDiamonds, kUndoubled, 0}, {5, kDiamonds, kUndoubled, 1}, + {6, kClubs, kUndoubled, 0}, {6, kClubs, kUndoubled, 1}, + {6, kDiamonds, kUndoubled, 0}, {6, kDiamonds, kUndoubled, 1}, + {6, kHearts, kUndoubled, 0}, {6, kHearts, kUndoubled, 1}, + {6, kSpades, kUndoubled, 0}, {6, kSpades, kUndoubled, 1}, + {6, kNoTrump, kUndoubled, 0}, {7, kClubs, kUndoubled, 0}, + {7, kClubs, kUndoubled, 1}, {7, kDiamonds, kUndoubled, 0}, + {7, kDiamonds, kUndoubled, 1}, {7, kHearts, kUndoubled, 0}, + {7, kHearts, kUndoubled, 1}, {7, kSpades, kUndoubled, 0}, + {7, kSpades, kUndoubled, 1}, {7, kNoTrump, kUndoubled, 0}}; + } + } else { + SPIEL_CHECK_EQ(subgame, ""); + if (ParameterValue("relative_scoring")) { + reference_contracts_ = { + {0, kNoTrump, kUndoubled, 0}, {1, kClubs, kUndoubled, 0}, + {1, kClubs, kUndoubled, 1}, {1, kDiamonds, kUndoubled, 0}, + {1, kDiamonds, kUndoubled, 1}, {1, kHearts, kUndoubled, 0}, + {1, kHearts, kUndoubled, 1}, {1, kSpades, kUndoubled, 0}, + {1, kSpades, kUndoubled, 1}, {1, kNoTrump, kUndoubled, 0}, + {1, kNoTrump, kUndoubled, 1}, {3, kNoTrump, kUndoubled, 0}, + {3, kNoTrump, kUndoubled, 1}, {4, kHearts, kUndoubled, 0}, + {4, kHearts, kUndoubled, 1}, {4, kSpades, kUndoubled, 0}, + {4, kSpades, kUndoubled, 1}, {5, kClubs, kUndoubled, 0}, + {5, kClubs, kUndoubled, 1}, {5, kDiamonds, kUndoubled, 0}, + {5, kDiamonds, kUndoubled, 1}, {6, kClubs, kUndoubled, 0}, + {6, kClubs, kUndoubled, 1}, {6, kDiamonds, kUndoubled, 0}, + {6, kDiamonds, kUndoubled, 1}, {6, kHearts, kUndoubled, 0}, + {6, kHearts, kUndoubled, 1}, {6, kSpades, kUndoubled, 0}, + {6, kSpades, kUndoubled, 1}, {6, kNoTrump, kUndoubled, 0}, + {6, kNoTrump, kUndoubled, 1}, {7, kClubs, kUndoubled, 0}, + {7, kClubs, kUndoubled, 1}, {7, kDiamonds, kUndoubled, 0}, + {7, kDiamonds, kUndoubled, 1}, {7, kHearts, kUndoubled, 0}, + {7, kHearts, kUndoubled, 1}, {7, kSpades, kUndoubled, 0}, + {7, kSpades, kUndoubled, 1}, {7, kNoTrump, kUndoubled, 0}, + {7, kNoTrump, kUndoubled, 1}}; + } + } +} + +// Deserialize the deal and auction +// e.g. "AKQJ.543.QJ8.T92 97532.A2.9.QJ853 2N-3C" +std::unique_ptr UncontestedBiddingGame::DeserializeState( + const std::string& str) const { + if (str.empty()) { + return absl::make_unique( + shared_from_this(), reference_contracts_, deal_filter_, forced_actions_, + rng_seed_, num_redeals_); + } + SPIEL_CHECK_GE(str.length(), + kNumPlayers * (kNumCardsPerHand + kNumSuits) - 1); + std::array cards{}; + std::array cards_dealt{}; + for (Player player = 0; player < kNumPlayers; ++player) { + int suit = 0; + int start = player * (kNumCardsPerHand + kNumSuits); + for (int i = 0; i < kNumCardsPerHand; ++i) { + char ch = str[start + i + suit]; + while (ch == '.') { + ++suit; + ch = str[start + i + suit]; + } + const int rank = (std::strchr(kRankChar, ch) - kRankChar); + const int card = rank * 4 + (3 - suit); + SPIEL_CHECK_FALSE(cards_dealt[card]); + cards[player * kNumCardsPerHand + i] = card; + cards_dealt[card] = true; + } + } + int i = kNumPlayers * kNumCardsPerHand; + for (int c = 0; c < 52; ++c) { + if (!cards_dealt[c]) cards[i++] = c; + } + + // Get any actions there may be. + std::vector actions; + int start = kNumPlayers * (kNumCardsPerHand + kNumSuits); + while (start < str.length()) { + auto end = str.find('-', start); + if (end == std::string::npos) end = str.length(); + actions.push_back(ActionFromString(str.substr(start, end - start))); + start = end + 1; + } + + // Check that early actions agree with the forced actions in this game. + SPIEL_CHECK_GE(actions.size(), forced_actions_.size()); + for (int i = 0; i < forced_actions_.size(); ++i) { + SPIEL_CHECK_EQ(actions[i], forced_actions_[i]); + } + + return absl::make_unique( + shared_from_this(), reference_contracts_, Deal(cards), actions, rng_seed_, + num_redeals_); +} + +std::string UncontestedBiddingGame::GetRNGState() const { + return std::to_string(rng_seed_); +} + +void UncontestedBiddingGame::SetRNGState(const std::string& rng_state) const { + if (rng_state.empty()) return; + SPIEL_CHECK_TRUE(absl::SimpleAtoi(rng_state, &rng_seed_)); +} + +} // namespace bridge_uncontested_bidding +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge_uncontested_bidding.h b/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge_uncontested_bidding.h new file mode 100644 index 0000000..346f76f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/bridge/bridge_uncontested_bidding.h @@ -0,0 +1,245 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_BRIDGE_UNCONTESTED_BIDDING_H_ +#define OPEN_SPIEL_GAMES_BRIDGE_UNCONTESTED_BIDDING_H_ + +#include + +// Uncontested bridge bidding. A two-player purely cooperative game. +// +// The full game of contract bridge is played by four players in two +// partnerships; it consists of a bidding phase followed by a play phase. The +// bidding phase determines the contract for the play phase. The contract has +// four components: +// - Which of the four players is the 'declarer'. (The first play is made by +// the player on declarer's left. Declarer's partner (the 'dummy') then +// places their cards face-up for everyone to see; their plays are chosen +// by declarer.) +// - The trump suit (or no-trumps). +// - The level, i.e. the trick target for the declaring partnership. +// - Whether the contract is doubled or redoubled (increasing the stakes). +// +// In 'uncontested bidding', we simplify the game in two ways: +// 1. Only one partnership may bid during the auction phase (hence +// 'uncontested'). +// 2. Rather than play out the play phase, we generate several (e.g. 10) +// layouts of the opponents' cards, solve for the number of tricks that +// would be taken with perfect perfect-information play by both sides +// on each deal, and use the average score over these deals. (This +// perfect information solution is called 'double dummy', because it is +// equivalent to one player of each partnerships being 'dummy' in the sense +// described above). +// +// Since the other partnership has no actions available, this is +// a two-player cooperative game. It is widely used by partnerships +// to practice their bidding. See for example this on-line tool: +// http://www.bridgebase.com/help/v2help/partnership_bidding.html +// Or these pre-constructed hands: +// http://rpbridge.net/rpbp.htm (here the scores are generated using human +// judgement rather than the automated procedure given above). +// +// We support two variations: +// 1. Any deal permitted, auction starts normally. +// In this variant, WBridge5 scores +95.1 absolute, std err 2.2 +// Its relative score (compared to the best-possible score on each deal) +// is -68.8, std err 1.3 (both results from n=8750 deals). +// 2. First player is dealt a hand suitable for a 2NT opening (i.e. a bid +// of 8 tricks with no trumps), and is forced to bid 2NT. +// A 2NT opening is typically played as showing a very strong balanced +// hand. 'Balanced' means that the distribution of cards between the +// suits is 4-3-3-3, 4-4-3-2, or 5-3-3-2 (some might also incude some +// 6-3-2-2 or 5-4-2-2 hands, but we do not). +// Strength is typically measured using 'high card points' (A=4, K=3, Q=2, +// J=1). A 2NT opening on this scale might be 20-22, 20-21, 21-22, or +// similar depending on agreement. We use 20-21, in line with the +// 'Standard American Yellow Card' system: +// http://web2.acbl.org/documentlibrary/play/SP3%20(bk)%20single%20pages.pdf +// Expert players may adjust hand valuation up or down slightly depending +// on the location of their high cards; we do not attempt to replicate +// this. +// +// The 2NT variant is both a smaller game, and also a fairer comparison +// with existing bots, since in practice auctions which start with 2NT are +// almost always uncontested. This means that bidding is generally conducted +// without worrying that the opponents might bid. This is in contrast to the +// full game where many bids are designed in anticipation of the possibility +// that the opponents may bid - a constraint that we do not have in this game. +// +// We treat the initial deal as a single sampled stochastic chance event; that +// is, the game tree has a single chance event with a single possible outcome, +// but when applying this outcome, the game state evolves stochastically, +// reflecting the full deal that has taken place. +// +// We could have explicit chance in case (1), e.g. with one chnce node for each +// card being dealt, but this would be hard in case (2), and we choose to +// treat both consistently. +// +// The score for player 0 will always be the raw point score for the contract +// reached. If the parameter `relative_scoring` is set to true, then the score +// for player 1 will be the score relative to the best-scoring of the possible +// contracts (so 0 if the contract reached is the best-scoring contract, +// otherwise negative). + +#include "open_spiel/games/bridge/bridge_scoring.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace bridge_uncontested_bidding { + +using bridge::Contract; + +inline constexpr int kNumSuits = 4; +inline constexpr int kNumDenominations = 1 + kNumSuits; +inline constexpr int kMaxBid = 7; +inline constexpr int kNumBids = kMaxBid * kNumDenominations; +inline constexpr int kNumActions = kNumBids + 1; +inline constexpr int kNumCardsPerSuit = 13; +inline constexpr int kNumCards = kNumSuits * kNumCardsPerSuit; +inline constexpr int kNumPlayers = 2; +inline constexpr int kNumHands = 4; +inline constexpr int kNumCardsPerHand = 13; +inline constexpr int kMinScore = -650; // 13 undertricks, at 50 each +inline constexpr int kMaxScore = 1520; // 7NT making +inline constexpr int kStateSize = + kNumCards + kNumPlayers * kNumActions + kNumPlayers; +inline constexpr char kRankChar[] = "23456789TJQKA"; + +class Deal { + public: + Deal() { std::iota(std::begin(cards_), std::end(cards_), 0); } + void Shuffle(std::mt19937* rng, int begin = 0, int end = kNumCards) { + for (int i = begin; i < end - 1; ++i) { + // We don't use absl::uniform_int_distribution because it behaves + // differently in different versions of C++, and we want reproducible + // tests. + int j = i + (*rng)() % (end - i); + std::swap(cards_[i], cards_[j]); + } + } + Deal(const std::array& cards) : cards_(cards) {} + int Card(int i) const { return cards_[i]; } + int Suit(int i) const { return cards_[i] % kNumSuits; } + int Rank(int i) const { return cards_[i] / kNumSuits; } + std::string HandString(int begin, int end) const; + + private: + std::array cards_; // 0..12 are West's, then E, N, S +}; + +class UncontestedBiddingState : public State { + public: + UncontestedBiddingState(std::shared_ptr game, + std::vector reference_contracts, + std::function deal_filter, + std::vector actions, int rng_seed, + int num_redeals) + : State(game), + reference_contracts_(std::move(reference_contracts)), + actions_(std::move(actions)), + deal_filter_(deal_filter), + rng_(rng_seed), + num_redeals_(num_redeals), + dealt_(false) {} + UncontestedBiddingState(std::shared_ptr game, + std::vector reference_contracts, + const Deal& deal, std::vector actions, + int rng_seed, int num_redeals) + : State(game), + reference_contracts_(std::move(reference_contracts)), + actions_(std::move(actions)), + rng_(rng_seed), + num_redeals_(num_redeals), + deal_(deal), + dealt_(true) { + if (IsTerminal()) ScoreDeal(); + } + UncontestedBiddingState(const UncontestedBiddingState&) = default; + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string AuctionString() const; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + std::vector> ChanceOutcomes() const override; + std::string Serialize() const override { return ToString(); } + + protected: + void DoApplyAction(Action action_id) override; + void ScoreDeal(); + + private: + // If non-empty, the score for player 1 will be relative to the best-scoring + // of these contracts. This may be useful to reduce variance, or to provide a + // signal for how suboptimal the outcome achieved is. + std::vector reference_contracts_; + std::vector actions_; + // This function is used to select possible deals. We repeatedly shuffle the + // deck until this function returns true. It may always return true if no + // filtering is required, or it may check that the opening bidder has a + // balanced hand with 20-21 HCP (a 2NT opener - see above). + std::function deal_filter_; + mutable std::mt19937 rng_; + const int num_redeals_; + mutable Deal deal_; + bool dealt_; + double score_; // score for the achieved contract + std::vector reference_scores_; // scores for the reference_contracts +}; + +class UncontestedBiddingGame : public Game { + public: + explicit UncontestedBiddingGame(const GameParameters& params); + int NumDistinctActions() const override { return kNumActions; } + int MaxChanceOutcomes() const override { return 1; } + std::unique_ptr NewInitialState() const override { + return absl::make_unique( + shared_from_this(), reference_contracts_, deal_filter_, forced_actions_, + ++rng_seed_, num_redeals_); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { + return reference_contracts_.empty() ? kMinScore : kMinScore - kMaxScore; + } + double MaxUtility() const override { + return reference_contracts_.empty() ? kMaxScore : 0; + } + std::vector InformationStateTensorShape() const override { + return {kStateSize}; + } + int MaxGameLength() const override { return kNumActions; } + int MaxChanceNodesInHistory() const override { return 1; } + std::unique_ptr DeserializeState( + const std::string& str) const override; + std::string GetRNGState() const; + void SetRNGState(const std::string& rng_state) const; + + private: + std::vector reference_contracts_; + std::vector forced_actions_; + std::function deal_filter_; + mutable int rng_seed_; + const int num_redeals_; +}; + +} // namespace bridge_uncontested_bidding +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_BRIDGE_UNCONTESTED_BIDDING_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/catan/random_bot_test.py b/scenarios/bargaining/open_spiel/open_spiel/games/catan/random_bot_test.py new file mode 100644 index 0000000..4f093be --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/catan/random_bot_test.py @@ -0,0 +1,56 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np + +from open_spiel.games.catan import py_catan # pylint: disable=unused-import +from open_spiel.python.bots import uniform_random +import pyspiel + + +class RandomBotTest(parameterized.TestCase): + + @parameterized.parameters([ + dict(seed=34239871, max_turns=int(1e2)), + ]) + def test_random_bot_plays(self, seed, max_turns): + np.random.seed(seed) + + game = pyspiel.load_game('catan') + state = game.new_initial_state() + + # Load bot + bots = [uniform_random.UniformRandomBot(player_id=i, rng=np.random) + for i in range(4)] + + while not state.is_terminal() and state.player_turns() < max_turns: + if state.is_chance_node(): + # Chance node: sample an outcome + outcomes = state.chance_outcomes() + action_list, prob_list = zip(*outcomes) + action = np.random.choice(action_list, p=prob_list) + state.apply_action(action) + else: + # Decision node: sample action for the single current player + action = bots[state.current_player()].step(state) + state.apply_action(action) + + valid_exit = state.is_terminal() or (state.player_turns() >= max_turns) + self.assertTrue(valid_exit, 'game loop exited prematurely') + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/catan/test_data/longest_road_game1.txt b/scenarios/bargaining/open_spiel/open_spiel/games/catan/test_data/longest_road_game1.txt new file mode 100644 index 0000000..0ff7f82 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/catan/test_data/longest_road_game1.txt @@ -0,0 +1,1930 @@ +# Automatically generated by OpenSpiel SerializeGameAndState +[Meta] +Version: 1 + +[Game] +catan(enable_colors=True,max_num_player_turns=500,max_num_trade_resources=2,players=4) +[State] +15 +3 +4 +0 +3 +2 +4 +0 +1 +1 +2 +4 +2 +3 +4 +1 +3 +0 +1 +5 +4 +2 +5 +5 +5 +5 +0 +1 +3 +121 +68 +104 +44 +106 +37 +126 +66 +114 +58 +82 +13 +112 +56 +80 +19 +0 +226 +356 +347 +351 +344 +470 +298 +0 +294 +435 +469 +69 +0 +1 +439 +353 +353 +333 +471 +468 +464 +459 +460 +0 +0 +2 +439 +379 +395 +472 +469 +439 +458 +467 +439 +470 +0 +3 +270 +292 +0 +300 +471 +317 +372 +333 +412 +470 +127 +0 +0 +4 +298 +0 +293 +471 +472 +455 +342 +455 +353 +470 +0 +5 +157 +344 +357 +467 +0 +469 +344 +329 +356 +312 +471 +0 +6 +429 +418 +280 +0 +0 +461 +427 +272 +0 +0 +0 +128 +166 +178 +4 +7 +445 +290 +288 +454 +472 +289 +289 +406 +278 +471 +0 +8 +64 +297 +292 +302 +289 +472 +458 +455 +456 +460 +0 +0 +9 +36 +407 +375 +400 +380 +472 +283 +426 +432 +0 +0 +0 +10 +302 +432 +440 +298 +0 +400 +432 +269 +266 +471 +127 +0 +0 +0 +351 +438 +306 +353 +0 +306 +415 +347 +436 +471 +0 +1 +230 +282 +301 +287 +297 +472 +427 +471 +439 +409 +470 +0 +2 +411 +0 +464 +418 +469 +127 +0 +407 +408 +435 +427 +472 +0 +128 +168 +176 +4 +3 +127 +3 +347 +409 +404 +353 +0 +416 +356 +404 +326 +471 +14 +0 +4 +441 +454 +467 +356 +0 +467 +453 +469 +356 +470 +0 +5 +169 +176 +0 +416 +278 +278 +432 +0 +439 +278 +471 +435 +469 +123 +0 +128 +166 +178 +4 +6 +280 +0 +467 +445 +471 +252 +0 +292 +0 +469 +57 +0 +7 +132 +20 +0 +8 +62 +288 +278 +460 +436 +472 +298 +402 +299 +298 +470 +127 +0 +0 +9 +335 +460 +450 +453 +472 +357 +392 +321 +399 +469 +0 +10 +51 +401 +403 +0 +397 +471 +436 +357 +349 +436 +469 +0 +0 +436 +0 +450 +452 +470 +409 +409 +413 +416 +469 +0 +128 +163 +1 +417 +413 +469 +409 +471 +401 +400 +469 +397 +471 +0 +2 +432 +402 +436 +402 +469 +0 +3 +432 +470 +454 +467 +472 +354 +446 +467 +467 +469 +0 +4 +127 +0 +333 +333 +355 +334 +469 +439 +460 +439 +435 +470 +0 +5 +158 +325 +402 +0 +325 +471 +425 +349 +356 +0 +0 +0 +6 +289 +427 +395 +439 +469 +468 +389 +395 +459 +0 +0 +7 +27 +404 +412 +460 +416 +471 +459 +0 +460 +470 +469 +0 +128 +160 +8 +464 +445 +445 +460 +0 +454 +454 +349 +356 +470 +0 +9 +119 +407 +0 +432 +438 +0 +438 +411 +436 +0 +471 +0 +10 +410 +410 +415 +294 +469 +402 +0 +415 +429 +0 +227 +0 +0 +315 +308 +303 +400 +471 +436 +330 +432 +350 +0 +0 +1 +127 +0 +467 +443 +465 +453 +469 +29 +0 +2 +449 +464 +454 +0 +470 +413 +437 +432 +0 +471 +127 +2 +0 +3 +403 +435 +376 +0 +0 +436 +432 +0 +0 +470 +0 +4 +339 +435 +0 +409 +471 +355 +315 +308 +339 +471 +0 +128 +164 +176 +4 +5 +162 +462 +436 +422 +435 +470 +351 +0 +315 +353 +471 +198 +0 +6 +130 +52 +54 +395 +395 +0 +298 +471 +48 +0 +7 +70 +321 +404 +399 +0 +0 +406 +429 +399 +0 +0 +226 +0 +8 +28 +0 +9 +396 +376 +434 +399 +470 +410 +399 +472 +414 +469 +0 +10 +350 +349 +469 +0 +470 +219 +434 +434 +432 +0 +471 +0 +0 +308 +434 +313 +308 +470 +353 +0 +354 +434 +469 +186 +0 +1 +437 +0 +0 +415 +471 +467 +470 +399 +464 +472 +0 +2 +415 +0 +425 +0 +471 +415 +416 +0 +438 +470 +127 +2 +0 +3 +422 +467 +0 +464 +472 +435 +399 +400 +469 +470 +223 +0 +4 +439 +425 +432 +0 +0 +417 +0 +417 +0 +472 +231 +0 +5 +171 +175 +0 +0 +6 +130 +30 +46 +345 +427 +344 +436 +470 +353 +317 +344 +318 +0 +0 +7 +296 +298 +298 +291 +0 +292 +0 +298 +294 +472 +0 +8 +65 +432 +271 +471 +289 +469 +326 +327 +344 +346 +469 +0 +9 +43 +376 +276 +376 +393 +471 +241 +434 +239 +245 +0 +0 +10 +45 +436 +363 +472 +390 +470 +395 +271 +0 +413 +0 +0 +0 +350 +350 +347 +346 +472 +262 +266 +267 +395 +470 +0 +1 +397 +367 +365 +368 +472 +344 +428 +331 +0 +0 +0 +2 +127 +0 +289 +248 +249 +239 +0 +295 +294 +251 +267 +469 +0 +3 +370 +322 +400 +439 +469 +374 +432 +400 +402 +469 +194 +0 +4 +293 +397 +251 +395 +472 +49 +271 +298 +275 +301 +470 +0 +5 +166 +178 +4 +331 +431 +0 +413 +470 +116 +468 +468 +465 +0 +470 +226 +0 +128 +165 +178 +2 +6 +297 +464 +0 +0 +472 +402 +402 +0 +363 +471 +63 +0 +7 +446 +445 +464 +460 +469 +0 +8 +341 +381 +0 +453 +471 +59 +465 +461 +0 +466 +472 +0 +9 +315 +385 +356 +353 +469 +127 +1 +390 +363 +402 +0 +0 +0 +10 +344 +0 +0 +349 +0 +344 +0 +0 +349 +0 +0 +0 +437 +462 +437 +460 +470 +15 +439 +0 +436 +439 +470 +0 +1 +399 +397 +354 +0 +0 +321 +353 +353 +356 +0 +0 +2 +402 +323 +439 +356 +0 +439 +369 +471 +349 +469 +0 +3 +467 +416 +439 +425 +469 +433 +432 +463 +450 +0 +0 +4 +450 +449 +330 +440 +471 +401 +385 +385 +0 +469 +0 +5 +168 +176 +3 +127 +1 +434 +434 +334 +0 +471 +313 +321 +402 +0 +470 +0 +6 +67 +429 +390 +395 +416 +469 +127 +1 +0 +7 +42 +384 +398 +385 +0 +469 +344 +0 +346 +0 +469 +0 +8 +343 +389 +386 +346 +469 +440 +412 +0 +407 +470 +31 +0 +9 +127 +1 +340 +461 +351 +0 +470 +127 +1 +0 +10 +312 +312 +308 +0 +472 +399 +395 +395 +395 +470 +0 +0 +376 +470 +433 +399 +472 +354 +349 +0 +0 +472 +0 +1 +377 +410 +376 +436 +469 +312 +322 +312 +308 +471 +0 +2 +464 +422 +415 +422 +0 +467 +433 +464 +466 +470 +0 +3 +315 +0 +313 +351 +472 +228 +312 +0 +308 +471 +469 +0 +4 +460 +470 +0 +466 +471 +127 +0 +0 +5 +162 +395 +462 +0 +0 +469 +366 +314 +370 +0 +0 +0 +6 +110 +0 +7 +289 +455 +465 +467 +469 +429 +432 +272 +0 +472 +0 +128 +164 +8 +41 +0 +9 +16 +330 +333 +432 +330 +471 +364 +356 +363 +308 +469 +189 +0 +10 +314 +399 +349 +315 +472 +464 +395 +0 +385 +0 +0 +0 +213 +0 +1 +401 +376 +0 +439 +472 +349 +470 +352 +355 +472 +0 +2 +330 +439 +354 +0 +0 +413 +436 +333 +438 +0 +184 +0 +3 +388 +401 +0 +469 +471 +467 +449 +439 +467 +471 +0 +4 +0 +5 +155 +152 +154 +153 +168 +389 +400 +387 +403 +471 +450 +446 +463 +465 +469 +0 +6 +331 +357 +430 +348 +471 +32 +372 +402 +413 +435 +469 +0 +7 +127 +4 +433 +436 +432 +299 +471 +290 +298 +264 +266 +471 +0 +8 +71 +440 +431 +438 +349 +470 +24 +461 +471 +466 +466 +472 +0 +9 +127 +0 +440 +439 +330 +312 +0 +333 +440 +310 +436 +469 +0 +10 +127 +0 +467 +461 +466 +0 +0 +467 +0 +466 +472 +471 +0 +0 +150 +389 +342 +313 +321 +472 +341 +353 +0 +349 +471 +0 +1 +350 +353 +353 +353 +470 +435 +434 +432 +471 +472 +0 +128 +170 +178 +2 +2 +127 +0 +357 +353 +356 +0 +0 +397 +399 +324 +0 +0 +0 +128 +164 +176 +1 +3 +313 +435 +376 +330 +469 +127 +0 +0 +4 +0 +5 +158 +355 +308 +314 +314 +0 +353 +308 +315 +313 +472 +0 +128 +172 +177 +4 +6 +396 +399 +402 +0 +0 +460 +396 +388 +0 +0 +0 +128 +168 +176 +2 +7 +378 +356 +0 +0 +469 +127 +3 +185 +0 +8 +413 +433 +356 +308 +472 +396 +432 +398 +410 +0 +193 +0 +9 +315 +311 +308 +308 +0 +356 +315 +312 +350 +469 +0 +10 +397 +387 +462 +462 +0 +353 +462 +339 +395 +472 +0 +0 +134 +97 +436 +436 +432 +409 +469 +433 +0 +376 +436 +0 +0 +1 +432 +353 +350 +330 +470 +367 +469 +395 +399 +470 +0 +2 +396 +377 +321 +401 +470 +399 +314 +349 +312 +472 +0 +3 +127 +0 +396 +396 +0 +395 +472 +356 +349 +314 +349 +471 +0 +4 +127 +4 +422 +466 +433 +422 +471 +351 +349 +342 +308 +471 +0 +5 +164 +176 +2 +127 +0 +321 +0 +0 +469 +472 +0 +6 +72 +399 +385 +462 +471 +472 +370 +317 +356 +361 +472 +0 +128 +170 +178 +1 +7 +40 +349 +0 +303 +470 +469 +299 +289 +0 +294 +472 +0 +8 +149 +0 +128 +160 +178 +2 +9 +305 +350 +308 +354 +471 +221 +456 +356 +460 +456 +472 +61 +38 +0 +10 +213 +448 +464 +310 +450 +0 +312 +448 +454 +308 +0 +0 +0 +401 +401 +0 +357 +471 +385 +0 +464 +461 +0 +0 +1 +440 +413 +377 +388 +470 +403 +413 +396 +440 +469 +0 +2 +461 +467 +436 +465 +470 +422 +397 +425 +464 +472 +0 +3 +183 +435 +294 +278 +295 +469 +439 +367 +398 +361 +469 +224 +206 +0 +4 +397 +453 +0 +439 +472 +389 +388 +426 +0 +469 +0 +5 +155 +155 +154 +152 +159 +177 +4 +464 +388 +0 +0 +469 +440 +438 +0 +451 +0 +0 +6 +393 +455 +452 +300 +472 +375 +427 +421 +381 +0 +231 +0 +7 +289 +297 +300 +294 +0 +300 +471 +302 +296 +469 +0 +8 +94 +396 +362 +390 +397 +471 +372 +470 +363 +0 +469 +0 +9 +264 +418 +289 +396 +471 +86 +455 +464 +465 +387 +470 +0 +10 +274 +390 +402 +244 +472 +374 +436 +438 +428 +471 +190 +0 +0 +439 +434 +393 +275 +470 +11 +359 +399 +0 +402 +469 +0 +1 +402 +394 +426 +0 +472 +276 +464 +464 +284 +472 +0 +2 +23 +425 +465 +269 +437 +470 +285 +464 +269 +393 +0 +0 +3 +366 +370 +400 +383 +0 +371 +363 +393 +455 +0 +0 +4 +465 +342 +462 +339 +0 +403 +347 +346 +339 +469 +0 +5 +154 +151 +155 +152 +171 +175 +1 +101 +451 +450 +439 +426 +469 +452 +439 +464 +425 +472 +195 +0 +6 +391 +401 +455 +358 +469 +0 +7 +399 +294 +391 +294 +472 +390 +0 +358 +370 +470 +0 +8 +25 +401 +360 +322 +349 +0 +354 +360 +351 +349 +0 +0 +9 +35 +361 +368 +392 +363 +469 +50 +401 +370 +0 +363 +0 +187 +0 +10 +82 +462 +245 +464 +381 +0 +462 +285 +282 +390 +469 +0 +0 +402 +387 +381 +284 +471 +60 +436 +436 +0 +439 +472 +0 +1 +465 +390 +359 +370 +472 +459 +266 +294 +402 +472 +0 +2 +436 +418 +439 +470 +472 +296 +427 +439 +275 +0 +0 +3 +88 +97 +440 +379 +408 +372 +471 +273 +380 +264 +382 +0 +0 +4 +457 +465 +460 +455 +472 +295 +280 +457 +302 +0 +0 +5 +153 +154 +155 +155 +163 +176 +1 +304 +320 +402 +320 +470 +89 +430 +397 +440 +394 +472 +0 +6 + + diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/catan/test_data/longest_road_history1.txt b/scenarios/bargaining/open_spiel/open_spiel/games/catan/test_data/longest_road_history1.txt new file mode 100644 index 0000000..f1540dd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/catan/test_data/longest_road_history1.txt @@ -0,0 +1 @@ +[14, 4, 0, 3, 1, 4, 0, 2, 2, 0, 1, 1, 3, 2, 4, 1, 3, 3, 5, 4, 5, 1, 5, 2, 3, 0, 4, 5, 5, 91, 26, 117, 61, 113, 56, 84, 15, 96, 31, 93, 36, 77, 9, 105, 44, 10, 349, 0, 0, 335, 0, 344, 0, 0, 339, 0, 0, 5, 164, 175, 1, 16, 0, 5, 157, 178, 0, 0, 6, 348, 324, 0, 0, 0, 355, 324, 0, 0, 0, 0, 6, 467, 0, 0, 339, 0, 354, 0, 0, 339, 0, 0, 9, 0, 3, 43, 432, 280, 469, 0, 471, 63, 0, 7, 401, 324, 0, 0, 0, 393, 324, 0, 0, 0, 0, 6, 462, 0, 0, 271, 0, 349, 0, 0, 271, 0, 27, 0, 3, 427, 0, 409, 312, 0, 326, 0, 432, 409, 472, 208, 17, 0, 1, 432, 419, 262, 0, 0, 376, 372, 469, 0, 471, 120, 0, 6, 0, 1, 127, 0, 345, 0, 0, 330, 0, 351, 0, 0, 303, 0, 0, 8, 0, 8, 127, 2, 0, 7, 298, 358, 472, 0, 470, 392, 363, 0, 0, 0, 127, 3, 0, 473, 6, 25, 428, 0, 0, 326, 0, 431, 0, 0, 327, 0, 128, 160, 175, 4, 0, 5, 164, 175, 3, 460, 0, 287, 278, 0, 0, 4, 130, 42, 41, 0, 6, 434, 385, 0, 354, 469, 412, 262, 0, 467, 0, 140, 84, 182, 0, 2, 431, 0, 0, 388, 0, 431, 323, 0, 321, 0, 127, 0, 0, 5, 160, 178, 4, 289, 0, 287, 239, 0, 422, 0, 287, 239, 0, 0, 2, 101, 0, 9, 127, 0, 0, 473, 3, 460, 0, 0, 0, 0, 344, 0, 0, 0, 0, 128, 157, 176, 0, 24, 0, 7, 87, 429, 470, 0, 404, 469, 62, 420, 0, 0, 0, 0, 0, 2, 0, 128, 172, 175, 3, 1, 300, 271, 0, 0, 0, 294, 301, 0, 0, 471, 0, 6, 34, 40, 467, 0, 0, 441, 0, 467, 453, 0, 441, 0, 0, 7, 436, 0, 287, 417, 0, 426, 0, 453, 278, 0, 225, 119, 0, 6, 127, 1, 0, 7, 431, 418, 0, 0, 0, 436, 279, 0, 0, 0, 180, 127, 4, 0, 4, 394, 0, 0, 385, 0, 289, 0, 0, 382, 0, 0, 4, 295, 0, 409, 436, 471, 127, 1, 0, 1, 280, 455, 358, 0, 472, 217, 50, 395, 269, 390, 0, 0, 0, 4, 148, 248, 260, 312, 0, 470, 248, 362, 472, 0, 470, 96, 38, 0, 6, 429, 418, 0, 418, 0, 290, 0, 0, 427, 0, 0, 7, 297, 0, 326, 278, 471, 53, 47, 0, 4, 395, 244, 390, 0, 472, 0, 5, 153, 151, 151, 151, 151, 151, 151, 151, 164, 175, 2, 298, 0, 0, 0, 0, 298, 260, 0, 0, 0, 30, 29, 0, 8, 127, 0, 431, 0, 0, 439, 0, 438, 0, 0, 439, 0, 0, 3, 108, 0, 8, 234, 55, 0, 8, 467, 385, 0, 0, 0, 467, 446, 0, 0, 0, 198, 127, 0, 0, 473, 2, 352, 0, 0, 334, 0, 352, 0, 0, 439, 0, 0, 1, 464, 0, 404, 432, 0, 467, 0, 404, 439, 471, 235, 127, 1, 0, 1, 367, 381, 446, 0, 0, 238, 0, 128, 169, 175, 2, 4, 248, 284, 0, 0, 0, 248, 280, 0, 0, 0, 181, 21, 32, 127, 0, 0, 128, 160, 175, 2, 2, 355, 0, 0, 358, 0, 349, 0, 0, 379, 0, 127, 0, 0, 7, 436, 0, 327, 344, 0, 331, 0, 327, 404, 472, 228, 0, 2, 238, 113, 0, 128, 171, 176, 3, 7, 436, 416, 0, 0, 0, 466, 418, 0, 0, 0, 0, 128, 168, 175, 0, 8, 459, 0, 0, 261, 0, 302, 0, 0, 324, 0, 127, 0, 20, 0, 5, 172, 175, 2, 298, 0, 388, 419, 0, 250, 0, 287, 405, 472, 228, 117, 0, 5, 171, 176, 0, 0, 10, 98, 467, 455, 0, 0, 0, 467, 441, 0, 0, 0, 0, 473, 4, 394, 0, 0, 262, 0, 397, 0, 0, 269, 0, 0, 6, 66, 0, 3, 0, 3, 14, 13, 460, 450, 0, 0, 0, 455, 446, 0, 0, 0, 0, 128, 168, 175, 2, 1, 397, 0, 0, 318, 0, 391, 0, 0, 304, 0, 45, 0, 3, 314, 0, 326, 308, 0, 208, 214, 126, 0, 8, 0, 7, 239, 450, 0, 0, 471, 250, 442, 0, 0, 0, 179, 0, 6, 385, 0, 0, 324, 0, 250, 0, 0, 316, 0, 46, 19, 0, 1, 72, 443, 0, 439, 416, 0, 460, 0, 441, 439, 472, 228, 127, 4, 0, 2, 234, 67, 225, 68, 409, 427, 277, 0, 0, 0, 7, 12, 456, 302, 0, 0, 0, 464, 247, 0, 0, 471, 0, 2, 284, 0, 0, 284, 472, 301, 0, 0, 403, 0, 0, 7, 150, 233, 54, 231, 233, 48, 224, 226, 71, 231, 226, 70, 224, 226, 69, 129] diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/catan/test_data/longest_road_history2.txt b/scenarios/bargaining/open_spiel/open_spiel/games/catan/test_data/longest_road_history2.txt new file mode 100644 index 0000000..fd2caa8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/catan/test_data/longest_road_history2.txt @@ -0,0 +1 @@ +[1, 1, 4, 5, 3, 3, 0, 1, 2, 1, 3, 0, 0, 1, 4, 2, 2, 4, 4, 3, 0, 2, 5, 4, 5, 3, 5, 5, 1, 116, 65, 83, 8, 102, 41, 107, 47, 114, 64, 95, 37, 105, 44, 97, 32, 6, 70, 0, 3, 0, 2, 291, 436, 0, 0, 0, 429, 440, 0, 0, 0, 0, 7, 398, 0, 0, 381, 0, 467, 0, 0, 381, 0, 198, 127, 1, 0, 7, 439, 0, 257, 0, 0, 0, 8, 464, 326, 379, 0, 0, 450, 367, 379, 0, 0, 127, 2, 127, 0, 0, 5, 156, 176, 1, 429, 260, 0, 0, 0, 438, 260, 0, 0, 0, 40, 0, 5, 163, 178, 3, 435, 0, 0, 0, 0, 461, 0, 0, 0, 0, 0, 7, 123, 0, 473, 7, 0, 7, 34, 354, 0, 0, 0, 0, 349, 315, 0, 0, 0, 0, 4, 464, 0, 0, 271, 0, 464, 0, 0, 422, 0, 0, 9, 290, 0, 266, 399, 0, 0, 473, 3, 128, 161, 178, 1, 390, 376, 381, 470, 469, 0, 5, 154, 154, 151, 153, 170, 175, 3, 356, 278, 0, 0, 0, 356, 278, 0, 0, 0, 45, 0, 5, 160, 176, 1, 463, 0, 0, 262, 0, 349, 0, 0, 257, 0, 127, 2, 0, 9, 0, 3, 3, 130, 36, 27, 427, 376, 326, 0, 0, 372, 385, 372, 0, 471, 76, 0, 6, 440, 413, 0, 0, 0, 435, 439, 0, 0, 471, 0, 6, 468, 0, 0, 324, 0, 351, 0, 0, 370, 0, 127, 1, 130, 68, 53, 0, 9, 299, 0, 399, 275, 0, 0, 8, 455, 422, 453, 0, 0, 0, 6, 344, 417, 0, 0, 0, 339, 342, 0, 0, 0, 0, 2, 127, 3, 467, 0, 0, 356, 0, 455, 0, 0, 453, 472, 0, 5, 151, 152, 155, 151, 155, 155, 155, 155, 163, 178, 3, 116, 0, 7, 0, 4, 440, 257, 0, 0, 0, 347, 326, 0, 0, 0, 30, 0, 8, 468, 0, 0, 259, 0, 461, 0, 0, 259, 0, 121, 0, 6, 460, 0, 280, 278, 0, 0, 7, 432, 416, 439, 0, 0, 195, 0, 4, 440, 260, 0, 0, 0, 300, 303, 0, 0, 0, 100, 0, 6, 467, 0, 0, 301, 0, 455, 0, 0, 301, 0, 0, 1, 228, 127, 4, 0, 6, 460, 422, 416, 0, 472, 93, 0, 4, 300, 279, 0, 0, 0, 456, 260, 0, 0, 0, 24, 90, 0, 3, 399, 0, 0, 270, 0, 467, 0, 0, 402, 0, 237, 127, 0, 141, 127, 1, 0, 3, 149, 127, 0, 0, 5, 151, 151, 151, 155, 172, 177, 0, 2, 441, 324, 450, 0, 0, 0, 5, 166, 177, 4, 464, 441, 0, 0, 0, 455, 441, 0, 0, 0, 0, 473, 10, 398, 0, 0, 270, 0, 403, 0, 0, 269, 0, 128, 157, 176, 2, 219, 0, 473, 7, 97, 128, 172, 177, 3, 0, 5, 163, 178, 1, 467, 325, 303, 0, 471, 453, 324, 385, 0, 0, 0, 5, 160, 176, 2, 392, 386, 0, 0, 0, 401, 385, 0, 0, 0, 0, 4, 61, 353, 0, 0, 344, 0, 353, 0, 0, 0, 0, 0, 4, 180, 127, 0, 0, 4, 0, 9, 389, 418, 0, 0, 0, 287, 271, 0, 0, 0, 189, 95, 0, 5, 151, 151, 153, 155, 163, 178, 2, 66, 297, 0, 0, 0, 0, 394, 0, 0, 269, 0, 0, 128, 172, 177, 0, 0, 0, 1, 74, 0, 3, 293, 425, 0, 0, 0, 399, 253, 0, 0, 0, 25, 127, 0, 0, 2, 398, 0, 0, 390, 0, 398, 0, 0, 381, 0, 0, 10, 33, 250, 469, 317, 253, 470, 99, 0, 10, 20, 13, 0, 128, 174, 175, 2, 9, 127, 0, 298, 276, 0, 0, 0, 302, 276, 0, 0, 0, 0, 4, 368, 0, 0, 464, 0, 302, 0, 0, 0, 0, 0, 6, 182, 0, 0, 464, 413, 470, 0, 471, 464, 441, 426, 0, 0, 127, 3, 127, 0, 0, 128, 156, 176, 4, 2, 295, 437, 0, 0, 0, 392, 421, 0, 0, 0, 127, 0, 182, 127, 1, 0, 2, 396, 0, 0, 284, 0, 394, 0, 0, 450, 0, 198, 127, 0, 0, 8, 185, 127, 0, 0, 128, 172, 177, 2, 9, 464, 454, 375, 0, 0, 127, 0, 0, 128, 160, 176, 3, 6, 431, 417, 0, 0, 471, 127, 0, 467, 0, 0, 0, 0, 0, 473, 2, 468, 0, 0, 379, 0, 435, 0, 0, 379, 0, 127, 4, 0, 128, 163, 178, 4, 2, 23, 238, 88, 298, 0, 0, 260, 0, 71, 0, 473, 2, 197, 140, 83, 0, 128, 157, 176, 4, 2, 455, 0, 0, 0, 0, 460, 0, 0, 0, 0, 0, 128, 160, 176, 4, 4, 461, 0, 0, 418, 0, 437, 0, 0, 437, 472, 237, 127, 1, 0, 5, 152, 152, 152, 151, 152, 152, 163, 178, 2, 38, 48, 0, 473, 4, 0, 6, 302, 278, 0, 0, 0, 464, 418, 0, 0, 0, 181, 0, 7, 294, 0, 0, 301, 0, 0, 5, 155, 155, 155, 155, 168, 178, 3, 127, 0, 0, 473, 4, 464, 356, 336, 0, 0, 128, 163, 178, 2, 454, 339, 441, 0, 0, 0, 4, 298, 327, 0, 0, 0, 298, 372, 0, 0, 0, 127, 0, 26, 0, 6, 467, 0, 0, 455, 0, 393, 0, 0, 390, 0, 0, 128, 161, 178, 0, 4, 226, 199, 49, 39, 239, 0, 0, 251, 0, 0, 9, 464, 330, 334, 0, 0, 450, 381, 439, 0, 0, 197, 0, 128, 157, 176, 3, 7, 245, 333, 0, 0, 0, 355, 379, 0, 0, 0, 100, 0, 3, 389, 0, 0, 385, 0, 294, 0, 0, 390, 0, 149, 201, 114, 0, 0, 199, 18, 0, 8, 105, 455, 446, 333, 0, 472, 344, 308, 357, 0, 472, 0, 9, 340, 303, 0, 0, 0, 346, 344, 0, 0, 0, 19, 42, 0, 5, 154, 155, 155, 155, 151, 151, 153, 151, 155, 154, 154, 155, 152, 152, 156, 176, 1, 118, 296, 0, 0, 298, 0, 298, 0, 0, 301, 0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/catch/catch.cc b/scenarios/bargaining/open_spiel/open_spiel/games/catch/catch.cc new file mode 100644 index 0000000..4af42f3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/catch/catch.cc @@ -0,0 +1,194 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/catch/catch.h" + +#include +#include + +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace catch_ { +namespace { + +// Facts about the game. +const GameType kGameType{/*short_name=*/"catch", + /*long_name=*/"Catch", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kPerfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/1, + /*min_num_players=*/1, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"rows", GameParameter(kDefaultRows)}, + {"columns", GameParameter(kDefaultColumns)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new CatchGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +std::string StateToString(CellState state) { + switch (state) { + case CellState::kEmpty: + return "."; + case CellState::kPaddle: + return "x"; + case CellState::kBall: + return "o"; + default: + SpielFatalError("Unknown state."); + return "This will never return."; + } +} + +} // namespace + +CatchState::CatchState(std::shared_ptr game) : State(game) { + const CatchGame& parent_game = static_cast(*game); + num_rows_ = parent_game.NumRows(); + num_columns_ = parent_game.NumColumns(); +} + +int CatchState::CurrentPlayer() const { + if (!initialized_) return kChancePlayerId; + if (IsTerminal()) return kTerminalPlayerId; + return 0; +} + +std::vector CatchState::LegalActions() const { + if (IsTerminal()) return {}; + if (initialized_) { + return {0, 1, 2}; // Left, stay, right. + } + std::vector moves; + moves.reserve(num_columns_); + for (int i = 0; i < num_columns_; i++) moves.push_back(i); + return moves; +} + +ActionsAndProbs CatchState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(!initialized_); + ActionsAndProbs action_and_probs; + action_and_probs.reserve(num_columns_); + for (int c = 0; c < num_columns_; c++) { + action_and_probs.emplace_back(c, 1. / num_columns_); + } + return action_and_probs; +} + +CellState CatchState::BoardAt(int row, int column) const { + if (row == num_rows_ - 1 && column == paddle_col_) + return CellState::kPaddle; + else if (row == ball_row_ && column == ball_col_) + return CellState::kBall; + return CellState::kEmpty; +} + +std::string CatchState::ActionToString(Player player, Action action_id) const { + if (player == kChancePlayerId) + return absl::StrCat("Initialized ball to ", action_id); + SPIEL_CHECK_EQ(player, 0); + switch (action_id) { + case 0: + return "LEFT"; + case 1: + return "STAY"; + case 2: + return "RIGHT"; + default: + SpielFatalError("Out of range action"); + } +} + +std::string CatchState::ToString() const { + std::string str; + for (int r = 0; r < num_rows_; ++r) { + for (int c = 0; c < num_columns_; ++c) { + absl::StrAppend(&str, StateToString(BoardAt(r, c))); + } + absl::StrAppend(&str, "\n"); + } + return str; +} + +bool CatchState::IsTerminal() const { + return initialized_ && ball_row_ >= num_rows_ - 1; +} + +std::vector CatchState::Returns() const { + if (!IsTerminal()) { + return {0.0}; + } else if (ball_col_ == paddle_col_) { + return {1.0}; + } else { + return {-1.0}; + } +} + +std::string CatchState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void CatchState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + TensorView<2> view(values, {num_rows_, num_columns_}, true); + if (initialized_) { + view[{ball_row_, ball_col_}] = 1.0; + view[{num_rows_ - 1, paddle_col_}] = 1.0; + } +} + +std::unique_ptr CatchState::Clone() const { + return std::unique_ptr(new CatchState(*this)); +} + +void CatchState::DoApplyAction(Action move) { + if (!initialized_) { + initialized_ = true; + ball_col_ = move; + ball_row_ = 0; + paddle_col_ = num_columns_ / 2; + } else { + ball_row_++; + int direction = move - 1; + paddle_col_ = + std::min(std::max(paddle_col_ + direction, 0), num_columns_ - 1); + } +} + +CatchGame::CatchGame(const GameParameters& params) + : Game(kGameType, params), + num_rows_(ParameterValue("rows")), + num_columns_(ParameterValue("columns")) {} + +} // namespace catch_ +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/catch/catch.h b/scenarios/bargaining/open_spiel/open_spiel/games/catch/catch.h new file mode 100644 index 0000000..70fe764 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/catch/catch.h @@ -0,0 +1,122 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_CATCH_H_ +#define OPEN_SPIEL_GAMES_CATCH_H_ + +#include +#include +#include + +#include "open_spiel/spiel.h" + +// Catch is a single player game, often used for unit testing RL algorithms. +// +// The player must move a paddle to intercept a falling ball. The initial +// column of the ball is decided by chance. Each turn, the ball moves downwards +// while remaining in the initial column. +// +// Please note: In each turn, all actions (left, stay, right) are legal. This +// is different to the Python implementation of the game. +// +// References: +// a) Recurrent models of visual attention, 2014, Minh et al. +// (Advances in Neural Information Processing Systems 27, pages 2204–2212.) +// b) Behaviour Suite for Reinforcement Learning, 2019, Osband et al. +// (https://arxiv.org/abs/1908.03568) +// +// Parameters: +// "rows" int rows of the board (default = 10) +// "columns" int columns of the board (default = 5) + +namespace open_spiel { +namespace catch_ { + +// Constants. +inline constexpr int kNumPlayers = 1; +inline constexpr int kNumActions = 3; +inline constexpr int kDefaultRows = 10; +inline constexpr int kDefaultColumns = 5; + +// State of a cell. +enum class CellState { + kEmpty, + kBall, + kPaddle, +}; + +class CatchGame; + +// State of an in-play game. +class CatchState : public State { + public: + CatchState(std::shared_ptr game); + CatchState(const CatchState&) = default; + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + ActionsAndProbs ChanceOutcomes() const override; + CellState BoardAt(int row, int column) const; + + protected: + void DoApplyAction(Action move) override; + + private: + int num_rows_ = -1; + int num_columns_ = -1; + bool initialized_ = false; + int ball_row_ = -1; + int ball_col_ = -1; + int paddle_col_ = -1; +}; + +// Game object. +class CatchGame : public Game { + public: + explicit CatchGame(const GameParameters& params); + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new CatchState(shared_from_this())); + } + std::vector ObservationTensorShape() const override { + return {num_rows_, num_columns_}; + } + + int NumDistinctActions() const override { return kNumActions; } + int MaxChanceOutcomes() const override { return num_columns_; } + int NumPlayers() const override { return kNumPlayers; } + double MaxUtility() const override { return 1; } + double MinUtility() const override { return -1; } + int MaxGameLength() const override { return num_rows_; } + // There is only initial chance. + int MaxChanceNodesInHistory() const override { return 1; } + int NumRows() const { return num_rows_; } + int NumColumns() const { return num_columns_; } + + private: + const int num_rows_; + const int num_columns_; +}; + +} // namespace catch_ +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_CATCH_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/catch/catch_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/catch/catch_test.cc new file mode 100644 index 0000000..85368a4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/catch/catch_test.cc @@ -0,0 +1,114 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/catch/catch.h" + +#include "open_spiel/algorithms/get_all_states.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace catch_ { +namespace { + +namespace testing = open_spiel::testing; + +void BasicCatchTests() { + testing::LoadGameTest("catch"); + testing::ChanceOutcomesTest(*LoadGame("catch")); + testing::RandomSimTest(*LoadGame("catch"), 100); +} + +void GetAllStatesTest() { + auto catch_game = LoadGame("catch"); + auto states = algorithms::GetAllStates(*catch_game, + /*depth_limit=*/-1, + /*include_terminals=*/true, + /*include_chance_states=*/false); + SPIEL_CHECK_EQ( + states.size(), + kDefaultRows * kDefaultColumns * kDefaultColumns - 6 * kDefaultColumns); + + // Verify number of states that lead to win and loss. + int num_wins = 0; + int num_losses = 0; + for (const auto& pair : states) { + const auto& state = pair.second; + if (state->IsTerminal()) { + if (state->PlayerReturn(0) == 1) + num_wins++; + else if (state->PlayerReturn(0) == -1) + num_losses++; + else + SpielFatalError("Unexpected return"); + } + } + SPIEL_CHECK_EQ(num_wins, 5); + SPIEL_CHECK_EQ(num_losses, 20); + + // Verify normalized observation matches string represtation. + for (const auto& pair : states) { + std::vector obs(catch_game->ObservationTensorSize()); + pair.second->ObservationTensor(0, absl::MakeSpan(obs)); + const std::string& str = pair.first; + SPIEL_CHECK_EQ(obs.size(), str.size() - kDefaultRows); + for (int i = 0; i < obs.size(); i++) { + SPIEL_CHECK_EQ(obs[i] == 1, str[i + i / kDefaultColumns] != '.'); + } + } +} + +void PlayAndWinTest() { + auto game = LoadGame("catch"); + auto state = game->NewInitialState(); + SPIEL_CHECK_EQ(state->CurrentPlayer(), kChancePlayerId); + state->ApplyAction(3); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + state->ApplyAction(2); // Right. + for (int i = 0; i < kDefaultRows - 2; i++) { + SPIEL_CHECK_FALSE(state->IsTerminal()); + state->ApplyAction(1); // Stay. + } + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReturn(0), 1); +} + +void ToStringTest() { + auto game = LoadGame("catch"); + auto state = game->NewInitialState(); + state->ApplyAction(3); + SPIEL_CHECK_EQ(state->ToString(), + "...o.\n" + ".....\n" + ".....\n" + ".....\n" + ".....\n" + ".....\n" + ".....\n" + ".....\n" + ".....\n" + "..x..\n"); +} + +} // namespace +} // namespace catch_ +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::catch_::BasicCatchTests(); + open_spiel::catch_::GetAllStatesTest(); + open_spiel::catch_::PlayAndWinTest(); + open_spiel::catch_::ToStringTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/checkers/checkers.cc b/scenarios/bargaining/open_spiel/open_spiel/games/checkers/checkers.cc new file mode 100644 index 0000000..ab9c635 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/checkers/checkers.cc @@ -0,0 +1,574 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/checkers/checkers.h" + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace checkers { +namespace { + +// Number of rows with pieces for each player +constexpr int kNumRowsWithPieces = 3; +// Types of moves: normal & capture +constexpr int kNumMoveType = 2; +// Number of unique directions each piece can take. +constexpr int kNumDirections = 4; + +// Index 0: Direction is diagonally up-left. +// Index 1: Direction is diagonally up-right. +// Index 2: Direction is diagonally down-right. +// Index 3: Direction is diagonally down-left. +constexpr std::array kDirRowOffsets = {{-1, -1, 1, 1}}; +constexpr std::array kDirColumnOffsets = {{-1, 1, 1, -1}}; + +// Facts about the game. +const GameType kGameType{/*short_name=*/"checkers", + /*long_name=*/"Checkers", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"rows", GameParameter(kDefaultRows)}, + {"columns", GameParameter(kDefaultColumns)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new CheckersGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +int StateToPlayer(CellState state) { + switch (state) { + case CellState::kWhite: + return 0; + case CellState::kBlack: + return 1; + default: + SpielFatalError("No player id for this cell state"); + } +} + +CellState CrownState(CellState state) { + switch (state) { + case CellState::kWhite: + return CellState::kWhiteKing; + case CellState::kBlack: + return CellState::kBlackKing; + default: + SpielFatalError("Invalid state"); + } +} + +PieceType StateToPiece(CellState state) { + switch (state) { + case CellState::kWhite: + case CellState::kBlack: + return PieceType::kMan; + case CellState::kWhiteKing: + case CellState::kBlackKing: + return PieceType::kKing; + default: + SpielFatalError("Invalid state"); + } +} + +CellState PlayerToState(Player player) { + switch (player) { + case 0: + return CellState::kWhite; + case 1: + return CellState::kBlack; + default: + SpielFatalError(absl::StrCat("Invalid player id ", player)); + } +} + +std::string StateToString(CellState state) { + switch (state) { + case CellState::kEmpty: + return "."; + case CellState::kWhite: + return "o"; + case CellState::kBlack: + return "+"; + case CellState::kWhiteKing: + return "8"; + case CellState::kBlackKing: + return "*"; + default: + SpielFatalError("Unknown state."); + } +} + +CellState StringToState(char ch) { + switch (ch) { + case '.': + return CellState::kEmpty; + case 'o': + return CellState::kWhite; + case '+': + return CellState::kBlack; + case '8': + return CellState::kWhiteKing; + case '*': + return CellState::kBlackKing; + default: + std::string error_string = "Unknown state: "; + error_string.push_back(ch); + SpielFatalError(error_string); + } +} + +CellState OpponentState(CellState state) { + return PlayerToState(1 - StateToPlayer(state)); +} + +std::string RowLabel(int rows, int row) { + int row_number = rows - row; + std::string label = std::to_string(row_number); + return label; +} + +std::string ColumnLabel(int column) { + std::string label = ""; + label += static_cast('a' + column); + return label; +} +} // namespace + +std::ostream& operator<<(std::ostream& stream, const CellState& state) { + switch (state) { + case CellState::kWhite: + return stream << "White"; + case CellState::kBlack: + return stream << "Black"; + case CellState::kWhiteKing: + return stream << "WhiteKing"; + case CellState::kBlackKing: + return stream << "BlackKing"; + case CellState::kEmpty: + return stream << "Empty"; + default: + SpielFatalError("Unknown cell state"); + } +} + +CheckersState::CheckersState(std::shared_ptr game, int rows, + int columns) + : State(game), rows_(rows), columns_(columns) { + SPIEL_CHECK_GE(rows_, 1); + SPIEL_CHECK_GE(columns_, 1); + SPIEL_CHECK_LE(rows_, 99); // Only supports 1 and 2 digit row numbers. + SPIEL_CHECK_LE(columns_, 26); // Only 26 letters to represent columns. + + moves_without_capture_ = 0; + board_ = std::vector(rows_ * columns_, CellState::kEmpty); + turn_history_info_ = {}; + + for (int row = rows_ - 1; row >= 0; row--) { + for (int column = 0; column < columns_; column++) { + if ((row + column) % 2 == 1) { + if (row >= 0 && row < kNumRowsWithPieces) { + SetBoard(row, column, CellState::kBlack); + } else if (row >= (rows_ - kNumRowsWithPieces)) { + SetBoard(row, column, CellState::kWhite); + } + } + } + } +} + +CellState CheckersState::CrownStateIfLastRowReached(int row, CellState state) { + if (row == 0 && state == CellState::kWhite) { + return CellState::kWhiteKing; + } + if (row == rows_ - 1 && state == CellState::kBlack) { + return CellState::kBlackKing; + } + return state; +} + +void CheckersState::SetCustomBoard(const std::string board_string) { + SPIEL_CHECK_EQ(rows_ * columns_, board_string.length() - 1); + current_player_ = board_string[0] - '0'; + SPIEL_CHECK_GE(current_player_, 0); + SPIEL_CHECK_LE(current_player_, 1); + // Create the board from the board string. The characters 'o', '8' are White + // (first player) & '+', '*' are Black (second player), and the character '.' + // is an Empty cell. Population goes from top left to bottom right. + for (int row = 0; row < rows_; row++) { + for (int column = 0; column < columns_; column++) { + char state_character = board_string[1 + row * columns_ + column]; + CellState state = StringToState(state_character); + SetBoard(row, column, state); + } + } +} + +CheckersAction CheckersState::SpielActionToCheckersAction(Action action) const { + std::vector values = UnrankActionMixedBase( + action, {rows_, columns_, kNumDirections, kNumMoveType}); + return CheckersAction(values[0], values[1], values[2], values[3]); +} + +Action CheckersState::CheckersActionToSpielAction(CheckersAction move) const { + std::vector action_bases = {rows_, columns_, kNumDirections, + kNumMoveType}; + return RankActionMixedBase( + action_bases, {move.row, move.column, move.direction, move.move_type}); +} + +void CheckersState::DoApplyAction(Action action) { + CheckersAction checkers_action = SpielActionToCheckersAction(action); + SPIEL_CHECK_TRUE(InBounds(checkers_action.row, checkers_action.column)); + + int end_row, end_column; + multiple_jump_piece_ = kNoMultipleJumpsPossible; + moves_without_capture_++; + + switch (checkers_action.move_type) { + case MoveType::kNormal: + end_row = checkers_action.row + kDirRowOffsets[checkers_action.direction]; + end_column = + checkers_action.column + kDirColumnOffsets[checkers_action.direction]; + SPIEL_CHECK_TRUE(InBounds(end_row, end_column)); + SPIEL_CHECK_EQ(BoardAt(end_row, end_column), CellState::kEmpty); + turn_history_info_.push_back(TurnHistoryInfo( + action, current_player_, PieceType::kMan, + StateToPiece(BoardAt(checkers_action.row, checkers_action.column)))); + SetBoard( + end_row, end_column, + CrownStateIfLastRowReached( + end_row, BoardAt(checkers_action.row, checkers_action.column))); + SetBoard(checkers_action.row, checkers_action.column, CellState::kEmpty); + break; + case MoveType::kCapture: + end_row = + checkers_action.row + kDirRowOffsets[checkers_action.direction] * 2; + end_column = checkers_action.column + + kDirColumnOffsets[checkers_action.direction] * 2; + SPIEL_CHECK_TRUE(InBounds(end_row, end_column)); + SPIEL_CHECK_EQ(BoardAt(end_row, end_column), CellState::kEmpty); + PieceType captured_piece = + StateToPiece(BoardAt((checkers_action.row + end_row) / 2, + (checkers_action.column + end_column) / 2)); + turn_history_info_.push_back(TurnHistoryInfo( + action, current_player_, captured_piece, + StateToPiece(BoardAt(checkers_action.row, checkers_action.column)))); + SetBoard((checkers_action.row + end_row) / 2, + (checkers_action.column + end_column) / 2, CellState::kEmpty); + CellState end_state = CrownStateIfLastRowReached( + end_row, BoardAt(checkers_action.row, checkers_action.column)); + SetBoard(end_row, end_column, end_state); + bool piece_crowned = + BoardAt(checkers_action.row, checkers_action.column) != end_state; + SetBoard(checkers_action.row, checkers_action.column, CellState::kEmpty); + moves_without_capture_ = 0; + + // Check if multiple jump is possible for the piece that made the + // last capture. If that is the case, then the current player gets + // to move again with LegalActions restricted to multiple jump moves + // for this piece. + if (!piece_crowned) { + std::vector moves = LegalActions(); + for (Action action : moves) { + CheckersAction move = SpielActionToCheckersAction(action); + if (move.row == end_row && move.column == end_column && + move.move_type == MoveType::kCapture) { + multiple_jump_piece_ = end_row * rows_ + end_column; + break; + } + } + } + break; + } + + if (multiple_jump_piece_ == kNoMultipleJumpsPossible) { + current_player_ = 1 - current_player_; + } + + if (LegalActions().empty()) { + outcome_ = 1 - current_player_; + } +} + +std::string CheckersState::ActionToString(Player player, + Action action_id) const { + CheckersAction checkers_action = SpielActionToCheckersAction(action_id); + const int end_row = + checkers_action.row + kDirRowOffsets[checkers_action.direction] * + (checkers_action.move_type + 1); + const int end_column = + checkers_action.column + kDirColumnOffsets[checkers_action.direction] * + (checkers_action.move_type + 1); + + std::string action_string = absl::StrCat( + ColumnLabel(checkers_action.column), RowLabel(rows_, checkers_action.row), + ColumnLabel(end_column), RowLabel(rows_, end_row)); + + return action_string; +} + +std::vector CheckersState::LegalActions() const { + if (moves_without_capture_ >= kMaxMovesWithoutCapture) { + return {}; + } + std::vector move_list, capture_move_list; + CellState current_player_state = PlayerToState(current_player_); + CellState current_player_crowned = CrownState(current_player_state); + + for (int row = 0; row < rows_; row++) { + for (int column = 0; column < columns_; column++) { + if (BoardAt(row, column) == current_player_state || + BoardAt(row, column) == current_player_crowned) { + for (int direction = 0; direction < kNumDirections; direction++) { + // Only crowned pieces can move in all 4 directions. + if (BoardAt(row, column) == current_player_state && + ((current_player_ == 0 && direction > 1) || + (current_player_ == 1 && direction < 2))) { + continue; + } + int adjacent_row = row + kDirRowOffsets[direction]; + int adjacent_column = column + kDirColumnOffsets[direction]; + + if (InBounds(adjacent_row, adjacent_column)) { + CellState adjacent_state = BoardAt(adjacent_row, adjacent_column); + CellState opponent_state = OpponentState(current_player_state); + CellState opponent_state_crowned = CrownState(opponent_state); + + if (adjacent_state == CellState::kEmpty) { + CheckersAction move = + CheckersAction(row, column, direction, MoveType::kNormal); + move_list.push_back(CheckersActionToSpielAction(move)); + } else if (adjacent_state == opponent_state || + adjacent_state == opponent_state_crowned) { + int jumping_row = adjacent_row + kDirRowOffsets[direction]; + int jumping_column = + adjacent_column + kDirColumnOffsets[direction]; + if (InBounds(jumping_row, jumping_column) && + BoardAt(jumping_row, jumping_column) == CellState::kEmpty) { + CheckersAction move = + CheckersAction(row, column, direction, MoveType::kCapture); + capture_move_list.push_back(CheckersActionToSpielAction(move)); + } + } + } + } + } + } + } + + // If capture moves are possible, it's mandatory to play them. + if (!capture_move_list.empty()) { + if (multiple_jump_piece_ != kNoMultipleJumpsPossible) { + int multiple_jump_piece_row = multiple_jump_piece_ / rows_; + int multiple_jump_piece_column = multiple_jump_piece_ % rows_; + std::vector multiple_move_list; + for (Action action : capture_move_list) { + CheckersAction move = SpielActionToCheckersAction(action); + if (move.row == multiple_jump_piece_row && + move.column == multiple_jump_piece_column) { + multiple_move_list.push_back(action); + } + } + SPIEL_CHECK_GT(multiple_move_list.size(), 0); + return multiple_move_list; + } + return capture_move_list; + } + return move_list; +} + +bool CheckersState::InBounds(int row, int column) const { + return (row >= 0 && row < rows_ && column >= 0 && column < columns_); +} + +std::string CheckersState::ToString() const { + std::string result; + result.reserve((rows_ + 1) * (columns_ + 3)); + + for (int r = 0; r < rows_; r++) { + // Ensure the row labels are aligned. + if (rows_ - r < 10 && rows_ >= 10) { + absl::StrAppend(&result, " "); + } + absl::StrAppend(&result, RowLabel(rows_, r)); + + for (int c = 0; c < columns_; c++) { + absl::StrAppend(&result, StateToString(BoardAt(r, c))); + } + + result.append("\n"); + } + + // Add an extra space to the bottom row + // if the row labels take up two spaces. + if (rows_ >= 10) { + absl::StrAppend(&result, " "); + } + absl::StrAppend(&result, " "); + + for (int c = 0; c < columns_; c++) { + absl::StrAppend(&result, ColumnLabel(c)); + } + absl::StrAppend(&result, "\n"); + + return result; +} + +int CheckersState::ObservationPlane(CellState state, Player player) const { + int state_value; + switch (state) { + case CellState::kWhite: + state_value = 0; + break; + case CellState::kWhiteKing: + state_value = 1; + break; + case CellState::kBlackKing: + state_value = 2; + break; + case CellState::kBlack: + state_value = 3; + break; + case CellState::kEmpty: + default: + return 4; + } + if (player == Player{0}) { + return state_value; + } else { + return 3 - state_value; + } +} + +bool CheckersState::IsTerminal() const { return LegalActions().empty(); } + +std::vector CheckersState::Returns() const { + if (outcome_ == kInvalidPlayer || + moves_without_capture_ >= kMaxMovesWithoutCapture) { + return {0., 0.}; + } else if (outcome_ == Player{0}) { + return {1.0, -1.0}; + } else if (outcome_ == Player{1}) { + return {-1.0, 1.0}; + } + return {0., 0.}; +} + +std::string CheckersState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string CheckersState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void CheckersState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + TensorView<3> view(values, {kCellStates, rows_, columns_}, true); + + // Observation Tensor Representation: + // Plane 0: 1's where the current player's pieces are, 0's elsewhere. + // Plane 1: 1's where the oppponent's pieces are, 0's elsewhere. + // Plane 2: 1's where the current player's crowned pieces are, 0's elsewhere. + // Plane 3: 1's where the oppponent's crowned pieces are, 0's elsewhere. + // Plane 4: 1's where the empty cells are, 0's elsewhere. + for (int row = 0; row < rows_; row++) { + for (int column = 0; column < columns_; column++) { + int plane = ObservationPlane(BoardAt(row, column), player); + view[{plane, row, column}] = 1.0; + } + } +} + +CellState GetPieceStateFromTurnHistory(Player player, int piece_type) { + return piece_type == PieceType::kMan ? PlayerToState(player) + : CrownState(PlayerToState(player)); +} + +void CheckersState::UndoAction(Player player, Action action) { + CheckersAction move = SpielActionToCheckersAction(action); + const TurnHistoryInfo& thi = turn_history_info_.back(); + SPIEL_CHECK_EQ(thi.player, player); + SPIEL_CHECK_EQ(thi.action, action); + current_player_ = player; + outcome_ = kInvalidPlayer; + move_number_--; + + int end_row, end_column; + CellState player_piece = + GetPieceStateFromTurnHistory(player, thi.player_piece_type); + + switch (move.move_type) { + case MoveType::kNormal: + end_row = move.row + kDirRowOffsets[move.direction]; + end_column = move.column + kDirColumnOffsets[move.direction]; + SetBoard(move.row, move.column, player_piece); + SetBoard(end_row, end_column, CellState::kEmpty); + break; + case MoveType::kCapture: + end_row = move.row + kDirRowOffsets[move.direction] * 2; + end_column = move.column + kDirColumnOffsets[move.direction] * 2; + SetBoard(move.row, move.column, player_piece); + SetBoard(end_row, end_column, CellState::kEmpty); + CellState captured_piece = + GetPieceStateFromTurnHistory(1 - player, thi.captured_piece_type); + SetBoard((move.row + end_row) / 2, (move.column + end_column) / 2, + captured_piece); + break; + } + turn_history_info_.pop_back(); + history_.pop_back(); +} + +CheckersGame::CheckersGame(const GameParameters& params) + : Game(kGameType, params), + rows_(ParameterValue("rows")), + columns_(ParameterValue("columns")) {} + +int CheckersGame::NumDistinctActions() const { + return rows_ * columns_ * kNumDirections * kNumMoveType; +} + +} // namespace checkers +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/checkers/checkers.h b/scenarios/bargaining/open_spiel/open_spiel/games/checkers/checkers.h new file mode 100644 index 0000000..16608bd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/checkers/checkers.h @@ -0,0 +1,181 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_CHECKERS_H_ +#define OPEN_SPIEL_GAMES_CHECKERS_H_ + +// Implementation of the board game Checkers. +// https://en.wikipedia.org/wiki/Checkers +// +// Some notes about this implementation: +// - Capturing: +// When capturing an opponent's piece is possible, capturing is mandatory +// in this implementation. +// - Drawing: +// Game is drawn if no pieces have been removed in 40 moves +// http://www.flyordie.com/games/help/checkers/en/games_rules_checkers.html +// - Custom board dimensions: +// Dimensions of the board can be customised by calling the +// CheckersState(rows, columns) constructer with the desired +// number of rows and columns + +#include +#include +#include + +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace checkers { + +constexpr int kNumPlayers = 2; +constexpr int kDefaultRows = 8; +constexpr int kDefaultColumns = 8; +constexpr int kMaxMovesWithoutCapture = 40; +// Empty, White, WhiteKing, Black and BlackKing. +constexpr int kCellStates = 5; +constexpr int kNoMultipleJumpsPossible = -1; + +// State of a cell. +enum class CellState { + kEmpty, // Represented by ' '. + kWhite, // Represented by 'o'. + kBlack, // Represented by '+'. + kWhiteKing, // Represented by '8'. + kBlackKing, // Represented by '*'. +}; + +struct CheckersAction { + int row; + int column; + int direction; + int move_type; + CheckersAction(int _row, int _column, int _direction, int _move_type) + : row(_row), + column(_column), + direction(_direction), + move_type(_move_type) {} +}; + +// Types of moves. +enum MoveType { + kNormal = 0, + kCapture = 1, +}; + +// Types of pieces. +enum PieceType { + kMan = 0, + kKing = 1, +}; + +// This is a small helper to track historical turn info not stored in the moves. +// It is only needed for proper implementation of Undo. +struct TurnHistoryInfo { + Action action; + Player player; + // set to kMan if not a capture move + PieceType captured_piece_type; + PieceType player_piece_type; + TurnHistoryInfo(Action _action, Player _player, + PieceType _captured_piece_type, PieceType _player_piece_type) + : action(_action), + player(_player), + captured_piece_type(_captured_piece_type), + player_piece_type(_player_piece_type) {} +}; + +// State of an in-play game. +class CheckersState : public State { + public: + explicit CheckersState(std::shared_ptr game, int rows, + int columns); + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : current_player_; + } + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override { + return std::unique_ptr(new CheckersState(*this)); + } + void UndoAction(Player player, Action action) override; + bool InBounds(int row, int column) const; + void SetCustomBoard(const std::string board_string); + CellState CrownStateIfLastRowReached(int row, CellState state); + CheckersAction SpielActionToCheckersAction(Action action) const; + Action CheckersActionToSpielAction(CheckersAction move) const; + void SetBoard(int row, int column, CellState state) { + board_[row * columns_ + column] = state; + } + CellState BoardAt(int row, int column) const { + return board_[row * columns_ + column]; + } + std::vector LegalActions() const override; + int ObservationPlane(CellState state, Player player) const; + int GetRow() const { return rows_; } + int GetCollumn() const { return columns_; } + int GetCellState() const { return kCellStates; } + + protected: + void DoApplyAction(Action action) override; + + private: + Player current_player_ = 0; // Player zero (White, 'o') goes first. + Player outcome_ = kInvalidPlayer; + // Piece in the board who can do multiple jump. + // Represented by row * rows_ + column + int multiple_jump_piece_ = kNoMultipleJumpsPossible; + int rows_; + int columns_; + int moves_without_capture_; + std::vector board_; + std::vector turn_history_info_; // Info needed for Undo. +}; + +// Game object. +class CheckersGame : public Game { + public: + explicit CheckersGame(const GameParameters& params); + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override { + return absl::make_unique(shared_from_this(), rows_, + columns_); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + std::vector ObservationTensorShape() const override { + return {kCellStates, rows_, columns_}; + } + // There is arbitrarily chosen number to ensure the game is finite. + int MaxGameLength() const override { return 1000; } + + private: + int rows_; + int columns_; +}; + +std::ostream& operator<<(std::ostream& stream, const CellState& state); + +} // namespace checkers +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_CHECKERS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/checkers/checkers_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/checkers/checkers_test.cc new file mode 100644 index 0000000..4545c84 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/checkers/checkers_test.cc @@ -0,0 +1,159 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/checkers/checkers.h" + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace checkers { +namespace { + +namespace testing = open_spiel::testing; + +void BasicSerializationTest() { + std::shared_ptr game = LoadGame("checkers"); + std::unique_ptr state = game->NewInitialState(); + std::unique_ptr state2 = game->DeserializeState(state->Serialize()); + SPIEL_CHECK_EQ(state->ToString(), state2->ToString()); +} + +void RandomSerializationTest() { + std::shared_ptr game = LoadGame("checkers"); + std::unique_ptr state = game->NewInitialState(); + for (int i = 0; i < 20; ++i) { + state->ApplyAction(state->LegalActions()[0]); + } + std::unique_ptr state2 = game->DeserializeState(state->Serialize()); + SPIEL_CHECK_EQ(state->ToString(), state2->ToString()); +} + +void BasicCheckersTests() { + testing::LoadGameTest("checkers"); + testing::NoChanceOutcomesTest(*LoadGame("checkers")); + testing::RandomSimTest(*LoadGame("checkers"), 100); + testing::RandomSimTestWithUndo(*LoadGame("checkers"), 10); + + // 10x10 Board + testing::RandomSimTest( + *LoadGame("checkers", + {{"rows", GameParameter(10)}, {"columns", GameParameter(10)}}), + 100); + testing::RandomSimTestWithUndo( + *LoadGame("checkers", + {{"rows", GameParameter(10)}, {"columns", GameParameter(10)}}), + 10); + + // 12x12 Board + testing::RandomSimTest( + *LoadGame("checkers", + {{"rows", GameParameter(12)}, {"columns", GameParameter(12)}}), + 100); + testing::RandomSimTestWithUndo( + *LoadGame("checkers", + {{"rows", GameParameter(12)}, {"columns", GameParameter(12)}}), + 10); + + auto observer = LoadGame("checkers") + ->MakeObserver(absl::nullopt, + GameParametersFromString("single_tensor")); + testing::RandomSimTestCustomObserver(*LoadGame("checkers"), observer); +} + +// Board: +// 8........ +// 7..*..... +// 6........ +// 5....+.o. +// 4.....o.. +// 3+....... +// 2...+.... +// 1o.o..... +// abcdefgh +// Player 0 should be able to do a double jump and crown a piece at b8 +void MultipleJumpTest() { + std::shared_ptr game = LoadGame("checkers"); + std::unique_ptr state = game->NewInitialState(); + CheckersState* cstate = static_cast(state.get()); + cstate->SetCustomBoard( + "0..........*.................+.o......o..+..........+....o.o....."); + cstate->ApplyAction(cstate->LegalActions()[0]); + // Confirm that player 0 is given only one action (f4 token is in the middle + // of a multiple jump) and there's a capture opportunity for c1 piece as well + // (which cannot be moved in this extra move) + SPIEL_CHECK_EQ(cstate->LegalActions().size(), 1); + cstate->ApplyAction(cstate->LegalActions()[0]); + SPIEL_CHECK_EQ(cstate->BoardAt(0, 1), CellState::kWhiteKing); + SPIEL_CHECK_EQ(cstate->BoardAt(1, 2), CellState::kEmpty); + SPIEL_CHECK_EQ(cstate->BoardAt(3, 4), CellState::kEmpty); +} + +// Board: +// 8...8.... +// 7........ +// 6........ +// 5....+... +// 4........ +// 3+....... +// 2........ +// 1........ +// abcdefgh +// Player 0 should be able to move the crowned piece backwards +void CrownedPieceCanMoveBackwardsTest() { + std::shared_ptr game = LoadGame("checkers"); + std::unique_ptr state = game->NewInitialState(); + CheckersState* cstate = static_cast(state.get()); + cstate->SetCustomBoard( + "0...8........................+...........+......................."); + std::vector legal_actions = cstate->LegalActions(); + cstate->ApplyAction(legal_actions[0]); + SPIEL_CHECK_EQ(cstate->BoardAt(1, 4), CellState::kWhiteKing); +} + +// Board: +// 8........ +// 7....+.+. +// 6........ +// 5....+.o. +// 4.....o.. +// 3+....... +// 2........ +// 1o.o..... +// abcdefgh +// Player 0 move should end after piece crowned +void MoveShouldEndAfterPieceCrownedTest() { + std::shared_ptr game = LoadGame("checkers"); + std::unique_ptr state = game->NewInitialState(); + CheckersState* cstate = static_cast(state.get()); + cstate->SetCustomBoard( + "0............+.+.............+.o......o..+...............o.o....."); + cstate->ApplyAction(cstate->LegalActions()[0]); + cstate->ApplyAction(cstate->LegalActions()[0]); + SPIEL_CHECK_EQ(cstate->CurrentPlayer(), 1); +} + +} // namespace +} // namespace checkers +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::checkers::BasicSerializationTest(); + open_spiel::checkers::RandomSerializationTest(); + open_spiel::checkers::BasicCheckersTests(); + open_spiel::checkers::MultipleJumpTest(); + open_spiel::checkers::CrownedPieceCanMoveBackwardsTest(); + open_spiel::checkers::MoveShouldEndAfterPieceCrownedTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess.cc b/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess.cc new file mode 100644 index 0000000..504e5d5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess.cc @@ -0,0 +1,595 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/chess/chess.h" +#include + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/strings/match.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/chess/chess_board.h" +#include "open_spiel/games/chess/chess_common.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace chess { +namespace { + +constexpr int kNumReversibleMovesToDraw = 100; +constexpr int kNumRepetitionsToDraw = 3; + +// Facts about the game +const GameType kGameType{/*short_name=*/"chess", + /*long_name=*/"Chess", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"chess960", GameParameter(kDefaultChess960)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new ChessGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +// Adds a plane to the information state vector corresponding to the presence +// and absence of the given piece type and colour at each square. +void AddPieceTypePlane(Color color, PieceType piece_type, + const ChessBoard& board, + absl::Span::iterator& value_it) { + for (int8_t y = 0; y < kMaxBoardSize; ++y) { + for (int8_t x = 0; x < kMaxBoardSize; ++x) { + Piece piece_on_board = board.at(Square{x, y}); + *value_it++ = + (piece_on_board.color == color && piece_on_board.type == piece_type + ? 1.0 + : 0.0); + } + } +} + +// Adds a uniform scalar plane scaled with min and max. +template +void AddScalarPlane(T val, T min, T max, + absl::Span::iterator& value_it) { + double normalized_val = static_cast(val - min) / (max - min); + for (int i = 0; i < k2dMaxBoardSize; ++i) *value_it++ = normalized_val; +} + +// Adds a binary scalar plane. +void AddBinaryPlane(bool val, absl::Span::iterator& value_it) { + AddScalarPlane(val ? 1 : 0, 0, 1, value_it); +} +} // namespace + +ChessState::ChessState(std::shared_ptr game) + : State(game), + start_board_(MakeDefaultBoard()), + current_board_(start_board_) { + repetitions_[current_board_.HashValue()] = 1; + if (ParentGame()->IsChess960()) { + chess960_random_start_fen_ = "UNINITIALIZED"; + } +} + +ChessState::ChessState(std::shared_ptr game, const std::string& fen) + : State(game) { + auto maybe_board = ChessBoard::BoardFromFEN(fen); + SPIEL_CHECK_TRUE(maybe_board); + start_board_ = *maybe_board; + current_board_ = start_board_; + repetitions_[current_board_.HashValue()] = 1; +} + +Player ChessState::CurrentPlayer() const { + if (ParentGame()->IsChess960() && + chess960_random_start_fen_ == "UNINITIALIZED") { + return kChancePlayerId; + } + return IsTerminal() ? kTerminalPlayerId : ColorToPlayer(Board().ToPlay()); +} + +ActionsAndProbs ChessState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(ParentGame()->IsChess960()); + // One chance outcome for each initial position in chess960. + ActionsAndProbs outcomes; + outcomes.reserve(960); + for (int i = 0; i < 960; ++i) { + outcomes.push_back({i, 1.0 / 960}); + } + return outcomes; +} + +Action ChessState::ParseMoveToAction(const std::string& move_str) const { + bool chess960 = ParentGame()->IsChess960(); + absl::optional move = Board().ParseMove(move_str, chess960); + if (!move.has_value()) { + return kInvalidAction; + } + return MoveToAction(*move, BoardSize()); +} + +void ChessState::DoApplyAction(Action action) { + if (IsChanceNode()) { + SPIEL_CHECK_TRUE(ParentGame()->IsChess960()); + // In chess960, there could be a chance node at the top of the game if the + // initial FEN is not passed in. So here we apply the initial position. + // First, reset the repetitions table. + repetitions_ = RepetitionTable(); + + // Then get the initial fen and set the board. + chess960_random_start_fen_ = ParentGame()->Chess960LookupFEN(action); + auto maybe_board = ChessBoard::BoardFromFEN(chess960_random_start_fen_); + SPIEL_CHECK_TRUE(maybe_board); + start_board_ = *maybe_board; + current_board_ = start_board_; + repetitions_[current_board_.HashValue()] = 1; + cached_legal_actions_.reset(); + return; + } + + Move move = ActionToMove(action, Board()); + moves_history_.push_back(move); + Board().ApplyMove(move); + ++repetitions_[current_board_.HashValue()]; + cached_legal_actions_.reset(); +} + +void ChessState::MaybeGenerateLegalActions() const { + if (!cached_legal_actions_) { + cached_legal_actions_ = std::vector(); + Board().GenerateLegalMoves([this](const Move& move) -> bool { + cached_legal_actions_->push_back(MoveToAction(move, kMaxBoardSize)); + return true; + }); + absl::c_sort(*cached_legal_actions_); + } +} + +std::vector ChessState::LegalActions() const { + if (IsChanceNode()) { + return LegalChanceOutcomes(); + } // chess960. + MaybeGenerateLegalActions(); + if (IsTerminal()) return {}; + return *cached_legal_actions_; +} + +int EncodeMove(const Square& from_square, int destination_index, int board_size, + int num_actions_destinations) { + return (from_square.x * board_size + from_square.y) * + num_actions_destinations + + destination_index; +} + +int8_t ReflectRank(Color to_play, int board_size, int8_t rank) { + return to_play == Color::kBlack ? board_size - 1 - rank : rank; +} + +Color PlayerToColor(Player p) { + SPIEL_CHECK_NE(p, kInvalidPlayer); + return static_cast(p); +} + +Action MoveToAction(const Move& move, int board_size) { + // Special-case for pass move. + if (move == kPassMove) return kPassAction; + + if (move.is_castling()) { + if (move.castle_dir == CastlingDirection::kLeft) { + return kLeftCastlingAction; + } else if (move.castle_dir == CastlingDirection::kRight) { + return kRightCastlingAction; + } else { + SpielFatalError("Invalid castling move."); + } + } + + Color color = move.piece.color; + // We rotate the move to be from player p's perspective. + Move player_move(move); + + // Rotate move to be from player p's perspective. + player_move.from.y = ReflectRank(color, board_size, player_move.from.y); + player_move.to.y = ReflectRank(color, board_size, player_move.to.y); + + // For each starting square, we enumerate 73 actions: + // - 9 possible underpromotions + // - 56 queen moves + // - 8 knight moves + // In total, this results in 64 * 73 = 4672 indices. + // This is the union of all possible moves, by reducing this to the number of + // moves actually available from each starting square this could still be + // reduced a little to 1816 indices. + int starting_index = + EncodeMove(player_move.from, 0, kMaxBoardSize, kNumActionDestinations); + int8_t x_diff = player_move.to.x - player_move.from.x; + int8_t y_diff = player_move.to.y - player_move.from.y; + Offset offset{x_diff, y_diff}; + bool is_under_promotion = move.promotion_type != PieceType::kEmpty && + move.promotion_type != PieceType::kQueen; + if (is_under_promotion) { + // We have to indicate underpromotions as special moves, because in terms of + // from/to they are identical to queen promotions. + // For a given starting square, an underpromotion can have 3 possible + // destination squares (straight, left diagonal, right diagonal) and 3 + // possible piece types. + SPIEL_CHECK_EQ(move.piece.type, PieceType::kPawn); + SPIEL_CHECK_TRUE((move.piece.color == color && + player_move.from.y == board_size - 2 && + player_move.to.y == board_size - 1) || + (move.piece.color == OppColor(color) && + player_move.from.y == 1 && player_move.to.y == 0)); + + int promotion_index; + { + auto itr = absl::c_find(kUnderPromotionIndexToType, move.promotion_type); + SPIEL_CHECK_TRUE(itr != kUnderPromotionIndexToType.end()); + promotion_index = std::distance(kUnderPromotionIndexToType.begin(), itr); + } + + int direction_index; + { + auto itr = absl::c_find_if( + kUnderPromotionDirectionToOffset, + [offset](Offset o) { return o.x_offset == offset.x_offset; }); + SPIEL_CHECK_TRUE(itr != kUnderPromotionDirectionToOffset.end()); + direction_index = + std::distance(kUnderPromotionDirectionToOffset.begin(), itr); + } + return starting_index + + kUnderPromotionDirectionToOffset.size() * promotion_index + + direction_index; + } else { + // For the normal moves, we simply encode starting and destination square. + int destination_index = + OffsetToDestinationIndex(offset, kKnightOffsets, kMaxBoardSize); + SPIEL_CHECK_TRUE(destination_index >= 0 && destination_index < 64); + return starting_index + kNumUnderPromotions + destination_index; + } +} + +std::pair ActionToDestination(int action, int board_size, + int num_actions_destinations) { + const int xy = action / num_actions_destinations; + SPIEL_CHECK_GE(xy, 0); + SPIEL_CHECK_LT(xy, board_size * board_size); + const int8_t x = xy / board_size; + const int8_t y = xy % board_size; + const int destination_index = action % num_actions_destinations; + SPIEL_CHECK_GE(destination_index, 0); + SPIEL_CHECK_LT(destination_index, num_actions_destinations); + return {Square{x, y}, destination_index}; +} + +Move ActionToMove(const Action& action, const ChessBoard& board) { + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LT(action, NumDistinctActions()); + + // Some chess variants (e.g. RBC) allow pass moves. + if (board.AllowPassMove() && action == kPassAction) { + return kPassMove; + } + + // Castle actions. + if (action == kLeftCastlingAction || action == kRightCastlingAction) { + Square king_square = board.find(Piece{board.ToPlay(), PieceType::kKing}); + if (action == kLeftCastlingAction) { + return Move(king_square, Square{2, king_square.y}, + Piece{board.ToPlay(), PieceType::kKing}, PieceType::kEmpty, + CastlingDirection::kLeft); + } else if (action == kRightCastlingAction) { + return Move(king_square, Square{6, king_square.y}, + Piece{board.ToPlay(), PieceType::kKing}, PieceType::kEmpty, + CastlingDirection::kRight); + } else { + SpielFatalError("Invalid castling move."); + } + } + + // The encoded action represents an action encoded from color's perspective. + Color color = board.ToPlay(); + int board_size = board.BoardSize(); + PieceType promotion_type = PieceType::kEmpty; + CastlingDirection castle_dir = CastlingDirection::kNone; + + auto [from_square, destination_index] = + ActionToDestination(action, kMaxBoardSize, kNumActionDestinations); + SPIEL_CHECK_LT(destination_index, kNumActionDestinations); + + bool is_under_promotion = destination_index < kNumUnderPromotions; + Offset offset; + if (is_under_promotion) { + int promotion_index = destination_index / 3; + int direction_index = destination_index % 3; + promotion_type = kUnderPromotionIndexToType[promotion_index]; + offset = kUnderPromotionDirectionToOffset[direction_index]; + } else { + destination_index -= kNumUnderPromotions; + offset = DestinationIndexToOffset(destination_index, kKnightOffsets, + kMaxBoardSize); + } + Square to_square = from_square + offset; + + from_square.y = ReflectRank(color, board_size, from_square.y); + to_square.y = ReflectRank(color, board_size, to_square.y); + + // This uses the current state to infer the piece type. + Piece piece = {board.ToPlay(), board.at(from_square).type}; + + // Check for queen promotion. + if (!is_under_promotion && piece.type == PieceType::kPawn && + ReflectRank(color, board_size, from_square.y) == board_size - 2 && + ReflectRank(color, board_size, to_square.y) == board_size - 1) { + promotion_type = PieceType::kQueen; + } + + Move move(from_square, to_square, piece, promotion_type, castle_dir); + return move; +} + +std::string ChessState::ActionToString(Player player, Action action) const { + if (player == kChancePlayerId) { + // Chess960 has an initial chance node. + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LT(action, 960); + return absl::StrCat("ChanceNodeOutcome_", action); + } + Move move = ActionToMove(action, Board()); + return move.ToSAN(Board()); +} + +std::string ChessState::DebugString() const { + return current_board_.DebugString(ParentGame()->IsChess960()); +} + +std::string ChessState::ToString() const { + return Board().ToFEN(ParentGame()->IsChess960()); +} + +std::vector ChessState::Returns() const { + auto maybe_final_returns = MaybeFinalReturns(); + if (maybe_final_returns) { + return *maybe_final_returns; + } else { + return {0.0, 0.0}; + } +} + +std::string ChessState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string ChessState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void ChessState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + auto value_it = values.begin(); + + // Piece configuration. + for (const auto& piece_type : kPieceTypes) { + AddPieceTypePlane(Color::kWhite, piece_type, Board(), value_it); + AddPieceTypePlane(Color::kBlack, piece_type, Board(), value_it); + } + + AddPieceTypePlane(Color::kEmpty, PieceType::kEmpty, Board(), value_it); + + const auto entry = repetitions_.find(Board().HashValue()); + SPIEL_CHECK_FALSE(entry == repetitions_.end()); + int repetitions = entry->second; + + // Num repetitions for the current board. + AddScalarPlane(repetitions, 1, 3, value_it); + + // Side to play. + AddScalarPlane(ColorToPlayer(Board().ToPlay()), 0, 1, value_it); + + // Irreversible move counter. + AddScalarPlane(Board().IrreversibleMoveCounter(), 0, 101, value_it); + + // Castling rights. + AddBinaryPlane(Board().CastlingRight(Color::kWhite, CastlingDirection::kLeft), + value_it); + + AddBinaryPlane( + Board().CastlingRight(Color::kWhite, CastlingDirection::kRight), + value_it); + + AddBinaryPlane(Board().CastlingRight(Color::kBlack, CastlingDirection::kLeft), + value_it); + + AddBinaryPlane( + Board().CastlingRight(Color::kBlack, CastlingDirection::kRight), + value_it); + + SPIEL_CHECK_EQ(value_it, values.end()); +} + +std::unique_ptr ChessState::Clone() const { + return std::unique_ptr(new ChessState(*this)); +} + +void ChessState::UndoAction(Player player, Action action) { + // TODO: Make this fast by storing undo info in another stack. + // Note: only supported after the chance node in Chess960. + SPIEL_CHECK_GE(moves_history_.size(), 1); + --repetitions_[current_board_.HashValue()]; + moves_history_.pop_back(); + history_.pop_back(); + --move_number_; + current_board_ = start_board_; + for (const Move& move : moves_history_) { + current_board_.ApplyMove(move); + } +} + +bool ChessState::IsRepetitionDraw() const { + const auto entry = repetitions_.find(Board().HashValue()); + SPIEL_CHECK_FALSE(entry == repetitions_.end()); + return entry->second >= kNumRepetitionsToDraw; +} + +int ChessState::NumRepetitions(const ChessState& state) const { + uint64_t state_hash_value = state.Board().HashValue(); + const auto entry = repetitions_.find(state_hash_value); + if (entry == repetitions_.end()) { + return 0; + } else { + return entry->second; + } +} + +std::pair> +ChessState::ExtractFenAndMaybeMoves() const { + SPIEL_CHECK_FALSE(IsChanceNode()); + std::string initial_fen = start_board_.ToFEN(ParentGame()->IsChess960()); + std::vector move_lans; + std::unique_ptr state = ParentGame()->NewInitialState(initial_fen); + ChessBoard board = down_cast(*state).Board(); + for (const Move& move : moves_history_) { + move_lans.push_back(move.ToLAN(ParentGame()->IsChess960(), &board)); + board.ApplyMove(move); + } + return std::make_pair(initial_fen, move_lans); +} + +absl::optional> ChessState::MaybeFinalReturns() const { + if (!Board().HasSufficientMaterial()) { + return std::vector{DrawUtility(), DrawUtility()}; + } + + if (IsRepetitionDraw()) { + return std::vector{DrawUtility(), DrawUtility()}; + } + // Compute and cache the legal actions. + MaybeGenerateLegalActions(); + SPIEL_CHECK_TRUE(cached_legal_actions_); + bool have_legal_moves = !cached_legal_actions_->empty(); + + // If we don't have legal moves we are either stalemated or checkmated, + // depending on whether we are in check or not. + if (!have_legal_moves) { + if (!Board().InCheck()) { + return std::vector{DrawUtility(), DrawUtility()}; + } else { + std::vector returns(NumPlayers()); + auto next_to_play = ColorToPlayer(Board().ToPlay()); + returns[next_to_play] = LossUtility(); + returns[OtherPlayer(next_to_play)] = WinUtility(); + return returns; + } + } + + if (Board().IrreversibleMoveCounter() >= kNumReversibleMovesToDraw) { + // This is theoretically a draw that needs to be claimed, but we implement + // it as a forced draw for now. + return std::vector{DrawUtility(), DrawUtility()}; + } + + return absl::nullopt; +} + +std::string ChessState::Serialize() const { + std::string state_str = ""; + // Write the FEN of the start board, then all the moves in the history. + absl::StrAppend(&state_str, "FEN: ", + start_board_.ToFEN(ParentGame()->IsChess960()), "\n"); + // Remove the first chance node from the history because we're storing the + // fen as the start_board_ in DoApplyAction at the chance node. + std::vector history = History(); + if (ParentGame()->IsChess960() && !chess960_random_start_fen_.empty()) { + if (!history.empty()) { + history.erase(history.begin()); + } + } + absl::StrAppend(&state_str, absl::StrJoin(history, "\n"), "\n"); + return state_str; +} + +ChessGame::ChessGame(const GameParameters& params) + : Game(kGameType, params), chess960_(ParameterValue("chess960")) { + if (chess960_) { + initial_fens_ = Chess960StartingPositions(); + SPIEL_CHECK_EQ(initial_fens_.size(), 960); + } +} + +std::unique_ptr ChessGame::DeserializeState( + const std::string& str) const { + const std::string prefix("FEN: "); + if (!absl::StartsWith(str, prefix)) { + // Backward compatibility. + return Game::DeserializeState(str); + } + int line_num = 0; + std::vector lines = absl::StrSplit(str, '\n'); + // Create initial state from FEN (first line of serialized state). + std::unique_ptr state = + NewInitialState(lines[line_num].substr(prefix.length())); + line_num += 1; + for (int i = line_num; i < lines.size(); ++i) { + if (lines[i].empty()) { + break; + } + Action action = static_cast(std::stol(lines[i])); + state->ApplyAction(action); + } + return state; +} + +int ChessGame::MaxChanceOutcomes() const { + if (IsChess960()) { + return 960; + } else { + return 0; + } +} + +} // namespace chess +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess.h b/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess.h new file mode 100644 index 0000000..d1ececc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess.h @@ -0,0 +1,293 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_CHESS_H_ +#define OPEN_SPIEL_GAMES_CHESS_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/memory/memory.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/chess/chess_board.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// Game of chess: +// https://en.wikipedia.org/wiki/Chess +// +// Parameters: +// "chess960" bool is it a Fischer Random game? (default: false) +// + +namespace open_spiel { +namespace chess { + +// Constants. +inline constexpr int NumPlayers() { return 2; } +inline constexpr double LossUtility() { return -1; } +inline constexpr double DrawUtility() { return 0; } +inline constexpr double WinUtility() { return 1; } + +inline constexpr int NumDistinctActions() { return 4674; } +inline constexpr int kLeftCastlingAction = 4672; +inline constexpr int kRightCastlingAction = 4673; + +// https://math.stackexchange.com/questions/194008/how-many-turns-can-a-chess-game-take-at-maximum +inline constexpr int MaxGameLength() { return 17695; } + +inline const std::vector& ObservationTensorShape() { + static std::vector shape{ + 13 /* piece types * colours + empty */ + 1 /* repetition count */ + + 1 /* side to move */ + 1 /* irreversible move counter */ + + 4 /* castling rights */, + kMaxBoardSize, kMaxBoardSize}; + return shape; +} + +constexpr bool kDefaultChess960 = false; + +// Returns a list of all possible starting positions in chess960. +std::vector Chess960StartingPositions(); + +class ChessGame; + +inline int ColorToPlayer(Color c) { + if (c == Color::kBlack) { + return 0; + } else if (c == Color::kWhite) { + return 1; + } else { + SpielFatalError("Unknown color"); + } +} + +inline int OtherPlayer(Player player) { return player == Player{0} ? 1 : 0; } + +inline constexpr std::array kUnderPromotionIndexToType = { + PieceType::kRook, PieceType::kBishop, PieceType::kKnight}; +inline constexpr std::array kUnderPromotionDirectionToOffset = { + {{0, 1}, {1, 1}, {-1, 1}}}; +inline constexpr int kNumUnderPromotions = + kUnderPromotionIndexToType.size() * kUnderPromotionDirectionToOffset.size(); + +// Reads a bitfield within action, with LSB at offset, and length bits long (up +// to 8). +inline uint8_t GetField(Action action, int offset, int length) { + return (action >> offset) & ((1ULL << length) - 1); +} + +// Sets a bitfield within action, with LSB at offset, and length bits long (up +// to 8) to value. +inline void SetField(int offset, int length, uint8_t value, Action* action) { + uint32_t mask = ((1ULL << length) - 1) << offset; + *action &= ~mask; + *action |= static_cast(value) << offset; +} + +// Returns index (0 ... BoardSize*BoardSize-1) of a square +// ({0, 0} ... {BoardSize-1, BoardSize-1}). +inline uint8_t SquareToIndex(const Square& square, int board_size) { + return square.y * board_size + square.x; +} + +// Returns square ({0, 0} ... {BoardSize-1, BoardSize-1}) from an index +// (0 ... BoardSize*BoardSize-1). +inline Square IndexToSquare(uint8_t index, int board_size) { + return Square{static_cast(index % board_size), + static_cast(index / board_size)}; +} + +int EncodeMove(const Square& from_square, int destination_index, int board_size, + int num_actions_destinations); + +inline constexpr int kNumActionDestinations = 73; + +int8_t ReflectRank(Color to_play, int board_size, int8_t rank); + +Color PlayerToColor(Player p); + +std::pair ActionToDestination(int action, int board_size, + int num_actions_destinations); + +Action MoveToAction(const Move& move, int board_size = kDefaultBoardSize); + +Move ActionToMove(const Action& action, const ChessBoard& board); + +// State of an in-play game. +class ChessState : public State { + public: + // Constructs a chess state at the standard start position. + ChessState(std::shared_ptr game); + + // Constructs a chess state at the given position in Forsyth-Edwards Notation. + // https://en.wikipedia.org/wiki/Forsyth%E2%80%93Edwards_Notation + ChessState(std::shared_ptr game, const std::string& fen); + ChessState(const ChessState&) = default; + + ChessState& operator=(const ChessState&) = default; + + Player CurrentPlayer() const override; + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + ActionsAndProbs ChanceOutcomes() const override; // for chess960 + + bool IsTerminal() const override { + return static_cast(MaybeFinalReturns()); + } + + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action action) override; + + // Current board. + ChessBoard& Board() { return current_board_; } + const ChessBoard& Board() const { return current_board_; } + int BoardSize() const { return current_board_.BoardSize(); } + + // Starting board. + ChessBoard& StartBoard() { return start_board_; } + const ChessBoard& StartBoard() const { return start_board_; } + + std::vector& MovesHistory() { return moves_history_; } + const std::vector& MovesHistory() const { return moves_history_; } + + // A prettier board string. + std::string DebugString() const; + + // Returns an action parsed from standard algebraic notation or long + // algebraic notation (using ChessBoard::ParseMove), or kInvalidAction if + // the parsing fails. + Action ParseMoveToAction(const std::string& move_str) const; + + std::string Serialize() const override; + + // Draw can be claimed under the FIDE 3-fold repetition rule (the current + // board position has already appeared twice in the history). + bool IsRepetitionDraw() const; + + // Returns the number of times the specified state has appeared in the + // history. + int NumRepetitions(const ChessState& state) const; + + // Get the FEN for this move and the list of moves in UCI format. + std::pair> ExtractFenAndMaybeMoves() + const; + + const ChessGame* ParentGame() const { + return down_cast(GetGame().get()); + } + + void SetChess960RandomStartFEN(const std::string& fen) { + chess960_random_start_fen_ = fen; + } + + protected: + void DoApplyAction(Action action) override; + + private: + // Calculates legal actions and caches them. This is separate from + // LegalActions() as there are a number of other methods that need the value + // of LegalActions. This is a separate method as it's called from + // IsTerminal(), which is also called by LegalActions(). + void MaybeGenerateLegalActions() const; + + absl::optional> MaybeFinalReturns() const; + + // We have to store every move made to check for repetitions and to implement + // undo. We store the current board position as an optimization. + std::vector moves_history_; + // We store the start board for history to support games not starting + // from the start position. + ChessBoard start_board_; + // We store the current board position as an optimization. + ChessBoard current_board_; + + // Used for Chess960. Set to the fen that was randomly chosen at the start of + // the game only when it was drawn randomly using a chance node. This remains + // empty if chance nodes are not used to determine the start position (i.e. + // when the start position passed in using NewInitialState(fen)). + std::string chess960_random_start_fen_; + + // RepetitionTable records how many times the given hash exists in the history + // stack (including the current board). + // We are already indexing by board hash, so there is no need to hash that + // hash again, so we use a custom passthrough hasher. + class PassthroughHash { + public: + std::size_t operator()(uint64_t x) const { + return static_cast(x); + } + }; + using RepetitionTable = absl::flat_hash_map; + RepetitionTable repetitions_; + mutable absl::optional> cached_legal_actions_; +}; + +// Game object. +class ChessGame : public Game { + public: + explicit ChessGame(const GameParameters& params); + int NumDistinctActions() const override { + return chess::NumDistinctActions(); + } + std::unique_ptr NewInitialState( + const std::string& fen) const override { + return absl::make_unique(shared_from_this(), fen); + } + std::unique_ptr NewInitialState() const override { + return absl::make_unique(shared_from_this()); + } + int NumPlayers() const override { return chess::NumPlayers(); } + double MinUtility() const override { return LossUtility(); } + absl::optional UtilitySum() const override { return DrawUtility(); } + double MaxUtility() const override { return WinUtility(); } + std::vector ObservationTensorShape() const override { + return chess::ObservationTensorShape(); + } + int MaxGameLength() const override { return chess::MaxGameLength(); } + int MaxChanceOutcomes() const override; // for chess960 + + std::unique_ptr DeserializeState( + const std::string& str) const override; + + bool IsChess960() const { return chess960_; } + + std::string Chess960LookupFEN(int index) const { + SPIEL_CHECK_GE(index, 0); + SPIEL_CHECK_LT(index, initial_fens_.size()); + return initial_fens_[index]; + } + + private: + bool chess960_; + std::vector initial_fens_; // Used for chess960. +}; + +} // namespace chess +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_CHESS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess960_starting_positions.cc b/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess960_starting_positions.cc new file mode 100644 index 0000000..4994f7f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess960_starting_positions.cc @@ -0,0 +1,996 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" + +namespace open_spiel { +namespace chess { + +constexpr const char* kChess960StartingFens = + R"(bbqnnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNNRKR w KQkq - 0 1 +bbqnrnkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNRNKR w KQkq - 0 1 +bbqnrknr/pppppppp/8/8/8/8/PPPPPPPP/BBQNRKNR w KQkq - 0 1 +bbqnrkrn/pppppppp/8/8/8/8/PPPPPPPP/BBQNRKRN w KQkq - 0 1 +bbqrnnkr/pppppppp/8/8/8/8/PPPPPPPP/BBQRNNKR w KQkq - 0 1 +bbqrnknr/pppppppp/8/8/8/8/PPPPPPPP/BBQRNKNR w KQkq - 0 1 +bbqrnkrn/pppppppp/8/8/8/8/PPPPPPPP/BBQRNKRN w KQkq - 0 1 +bbqrknnr/pppppppp/8/8/8/8/PPPPPPPP/BBQRKNNR w KQkq - 0 1 +bbqrknrn/pppppppp/8/8/8/8/PPPPPPPP/BBQRKNRN w KQkq - 0 1 +bbqrkrnn/pppppppp/8/8/8/8/PPPPPPPP/BBQRKRNN w KQkq - 0 1 +bbnqnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBNQNRKR w KQkq - 0 1 +bbnqrnkr/pppppppp/8/8/8/8/PPPPPPPP/BBNQRNKR w KQkq - 0 1 +bbnqrknr/pppppppp/8/8/8/8/PPPPPPPP/BBNQRKNR w KQkq - 0 1 +bbnqrkrn/pppppppp/8/8/8/8/PPPPPPPP/BBNQRKRN w KQkq - 0 1 +bbrqnnkr/pppppppp/8/8/8/8/PPPPPPPP/BBRQNNKR w KQkq - 0 1 +bbrqnknr/pppppppp/8/8/8/8/PPPPPPPP/BBRQNKNR w KQkq - 0 1 +bbrqnkrn/pppppppp/8/8/8/8/PPPPPPPP/BBRQNKRN w KQkq - 0 1 +bbrqknnr/pppppppp/8/8/8/8/PPPPPPPP/BBRQKNNR w KQkq - 0 1 +bbrqknrn/pppppppp/8/8/8/8/PPPPPPPP/BBRQKNRN w KQkq - 0 1 +bbrqkrnn/pppppppp/8/8/8/8/PPPPPPPP/BBRQKRNN w KQkq - 0 1 +bbnnqrkr/pppppppp/8/8/8/8/PPPPPPPP/BBNNQRKR w KQkq - 0 1 +bbnrqnkr/pppppppp/8/8/8/8/PPPPPPPP/BBNRQNKR w KQkq - 0 1 +bbnrqknr/pppppppp/8/8/8/8/PPPPPPPP/BBNRQKNR w KQkq - 0 1 +bbnrqkrn/pppppppp/8/8/8/8/PPPPPPPP/BBNRQKRN w KQkq - 0 1 +bbrnqnkr/pppppppp/8/8/8/8/PPPPPPPP/BBRNQNKR w KQkq - 0 1 +bbrnqknr/pppppppp/8/8/8/8/PPPPPPPP/BBRNQKNR w KQkq - 0 1 +bbrnqkrn/pppppppp/8/8/8/8/PPPPPPPP/BBRNQKRN w KQkq - 0 1 +bbrkqnnr/pppppppp/8/8/8/8/PPPPPPPP/BBRKQNNR w KQkq - 0 1 +bbrkqnrn/pppppppp/8/8/8/8/PPPPPPPP/BBRKQNRN w KQkq - 0 1 +bbrkqrnn/pppppppp/8/8/8/8/PPPPPPPP/BBRKQRNN w KQkq - 0 1 +bbnnrqkr/pppppppp/8/8/8/8/PPPPPPPP/BBNNRQKR w KQkq - 0 1 +bbnrnqkr/pppppppp/8/8/8/8/PPPPPPPP/BBNRNQKR w KQkq - 0 1 +bbnrkqnr/pppppppp/8/8/8/8/PPPPPPPP/BBNRKQNR w KQkq - 0 1 +bbnrkqrn/pppppppp/8/8/8/8/PPPPPPPP/BBNRKQRN w KQkq - 0 1 +bbrnnqkr/pppppppp/8/8/8/8/PPPPPPPP/BBRNNQKR w KQkq - 0 1 +bbrnkqnr/pppppppp/8/8/8/8/PPPPPPPP/BBRNKQNR w KQkq - 0 1 +bbrnkqrn/pppppppp/8/8/8/8/PPPPPPPP/BBRNKQRN w KQkq - 0 1 +bbrknqnr/pppppppp/8/8/8/8/PPPPPPPP/BBRKNQNR w KQkq - 0 1 +bbrknqrn/pppppppp/8/8/8/8/PPPPPPPP/BBRKNQRN w KQkq - 0 1 +bbrkrqnn/pppppppp/8/8/8/8/PPPPPPPP/BBRKRQNN w KQkq - 0 1 +bbnnrkqr/pppppppp/8/8/8/8/PPPPPPPP/BBNNRKQR w KQkq - 0 1 +bbnrnkqr/pppppppp/8/8/8/8/PPPPPPPP/BBNRNKQR w KQkq - 0 1 +bbnrknqr/pppppppp/8/8/8/8/PPPPPPPP/BBNRKNQR w KQkq - 0 1 +bbnrkrqn/pppppppp/8/8/8/8/PPPPPPPP/BBNRKRQN w KQkq - 0 1 +bbrnnkqr/pppppppp/8/8/8/8/PPPPPPPP/BBRNNKQR w KQkq - 0 1 +bbrnknqr/pppppppp/8/8/8/8/PPPPPPPP/BBRNKNQR w KQkq - 0 1 +bbrnkrqn/pppppppp/8/8/8/8/PPPPPPPP/BBRNKRQN w KQkq - 0 1 +bbrknnqr/pppppppp/8/8/8/8/PPPPPPPP/BBRKNNQR w KQkq - 0 1 +bbrknrqn/pppppppp/8/8/8/8/PPPPPPPP/BBRKNRQN w KQkq - 0 1 +bbrkrnqn/pppppppp/8/8/8/8/PPPPPPPP/BBRKRNQN w KQkq - 0 1 +bbnnrkrq/pppppppp/8/8/8/8/PPPPPPPP/BBNNRKRQ w KQkq - 0 1 +bbnrnkrq/pppppppp/8/8/8/8/PPPPPPPP/BBNRNKRQ w KQkq - 0 1 +bbnrknrq/pppppppp/8/8/8/8/PPPPPPPP/BBNRKNRQ w KQkq - 0 1 +bbnrkrnq/pppppppp/8/8/8/8/PPPPPPPP/BBNRKRNQ w KQkq - 0 1 +bbrnnkrq/pppppppp/8/8/8/8/PPPPPPPP/BBRNNKRQ w KQkq - 0 1 +bbrnknrq/pppppppp/8/8/8/8/PPPPPPPP/BBRNKNRQ w KQkq - 0 1 +bbrnkrnq/pppppppp/8/8/8/8/PPPPPPPP/BBRNKRNQ w KQkq - 0 1 +bbrknnrq/pppppppp/8/8/8/8/PPPPPPPP/BBRKNNRQ w KQkq - 0 1 +bbrknrnq/pppppppp/8/8/8/8/PPPPPPPP/BBRKNRNQ w KQkq - 0 1 +bbrkrnnq/pppppppp/8/8/8/8/PPPPPPPP/BBRKRNNQ w KQkq - 0 1 +bqnbnrkr/pppppppp/8/8/8/8/PPPPPPPP/BQNBNRKR w KQkq - 0 1 +bqnbrnkr/pppppppp/8/8/8/8/PPPPPPPP/BQNBRNKR w KQkq - 0 1 +bqnbrknr/pppppppp/8/8/8/8/PPPPPPPP/BQNBRKNR w KQkq - 0 1 +bqnbrkrn/pppppppp/8/8/8/8/PPPPPPPP/BQNBRKRN w KQkq - 0 1 +bqrbnnkr/pppppppp/8/8/8/8/PPPPPPPP/BQRBNNKR w KQkq - 0 1 +bqrbnknr/pppppppp/8/8/8/8/PPPPPPPP/BQRBNKNR w KQkq - 0 1 +bqrbnkrn/pppppppp/8/8/8/8/PPPPPPPP/BQRBNKRN w KQkq - 0 1 +bqrbknnr/pppppppp/8/8/8/8/PPPPPPPP/BQRBKNNR w KQkq - 0 1 +bqrbknrn/pppppppp/8/8/8/8/PPPPPPPP/BQRBKNRN w KQkq - 0 1 +bqrbkrnn/pppppppp/8/8/8/8/PPPPPPPP/BQRBKRNN w KQkq - 0 1 +bnqbnrkr/pppppppp/8/8/8/8/PPPPPPPP/BNQBNRKR w KQkq - 0 1 +bnqbrnkr/pppppppp/8/8/8/8/PPPPPPPP/BNQBRNKR w KQkq - 0 1 +bnqbrknr/pppppppp/8/8/8/8/PPPPPPPP/BNQBRKNR w KQkq - 0 1 +bnqbrkrn/pppppppp/8/8/8/8/PPPPPPPP/BNQBRKRN w KQkq - 0 1 +brqbnnkr/pppppppp/8/8/8/8/PPPPPPPP/BRQBNNKR w KQkq - 0 1 +brqbnknr/pppppppp/8/8/8/8/PPPPPPPP/BRQBNKNR w KQkq - 0 1 +brqbnkrn/pppppppp/8/8/8/8/PPPPPPPP/BRQBNKRN w KQkq - 0 1 +brqbknnr/pppppppp/8/8/8/8/PPPPPPPP/BRQBKNNR w KQkq - 0 1 +brqbknrn/pppppppp/8/8/8/8/PPPPPPPP/BRQBKNRN w KQkq - 0 1 +brqbkrnn/pppppppp/8/8/8/8/PPPPPPPP/BRQBKRNN w KQkq - 0 1 +bnnbqrkr/pppppppp/8/8/8/8/PPPPPPPP/BNNBQRKR w KQkq - 0 1 +bnrbqnkr/pppppppp/8/8/8/8/PPPPPPPP/BNRBQNKR w KQkq - 0 1 +bnrbqknr/pppppppp/8/8/8/8/PPPPPPPP/BNRBQKNR w KQkq - 0 1 +bnrbqkrn/pppppppp/8/8/8/8/PPPPPPPP/BNRBQKRN w KQkq - 0 1 +brnbqnkr/pppppppp/8/8/8/8/PPPPPPPP/BRNBQNKR w KQkq - 0 1 +brnbqknr/pppppppp/8/8/8/8/PPPPPPPP/BRNBQKNR w KQkq - 0 1 +brnbqkrn/pppppppp/8/8/8/8/PPPPPPPP/BRNBQKRN w KQkq - 0 1 +brkbqnnr/pppppppp/8/8/8/8/PPPPPPPP/BRKBQNNR w KQkq - 0 1 +brkbqnrn/pppppppp/8/8/8/8/PPPPPPPP/BRKBQNRN w KQkq - 0 1 +brkbqrnn/pppppppp/8/8/8/8/PPPPPPPP/BRKBQRNN w KQkq - 0 1 +bnnbrqkr/pppppppp/8/8/8/8/PPPPPPPP/BNNBRQKR w KQkq - 0 1 +bnrbnqkr/pppppppp/8/8/8/8/PPPPPPPP/BNRBNQKR w KQkq - 0 1 +bnrbkqnr/pppppppp/8/8/8/8/PPPPPPPP/BNRBKQNR w KQkq - 0 1 +bnrbkqrn/pppppppp/8/8/8/8/PPPPPPPP/BNRBKQRN w KQkq - 0 1 +brnbnqkr/pppppppp/8/8/8/8/PPPPPPPP/BRNBNQKR w KQkq - 0 1 +brnbkqnr/pppppppp/8/8/8/8/PPPPPPPP/BRNBKQNR w KQkq - 0 1 +brnbkqrn/pppppppp/8/8/8/8/PPPPPPPP/BRNBKQRN w KQkq - 0 1 +brkbnqnr/pppppppp/8/8/8/8/PPPPPPPP/BRKBNQNR w KQkq - 0 1 +brkbnqrn/pppppppp/8/8/8/8/PPPPPPPP/BRKBNQRN w KQkq - 0 1 +brkbrqnn/pppppppp/8/8/8/8/PPPPPPPP/BRKBRQNN w KQkq - 0 1 +bnnbrkqr/pppppppp/8/8/8/8/PPPPPPPP/BNNBRKQR w KQkq - 0 1 +bnrbnkqr/pppppppp/8/8/8/8/PPPPPPPP/BNRBNKQR w KQkq - 0 1 +bnrbknqr/pppppppp/8/8/8/8/PPPPPPPP/BNRBKNQR w KQkq - 0 1 +bnrbkrqn/pppppppp/8/8/8/8/PPPPPPPP/BNRBKRQN w KQkq - 0 1 +brnbnkqr/pppppppp/8/8/8/8/PPPPPPPP/BRNBNKQR w KQkq - 0 1 +brnbknqr/pppppppp/8/8/8/8/PPPPPPPP/BRNBKNQR w KQkq - 0 1 +brnbkrqn/pppppppp/8/8/8/8/PPPPPPPP/BRNBKRQN w KQkq - 0 1 +brkbnnqr/pppppppp/8/8/8/8/PPPPPPPP/BRKBNNQR w KQkq - 0 1 +brkbnrqn/pppppppp/8/8/8/8/PPPPPPPP/BRKBNRQN w KQkq - 0 1 +brkbrnqn/pppppppp/8/8/8/8/PPPPPPPP/BRKBRNQN w KQkq - 0 1 +bnnbrkrq/pppppppp/8/8/8/8/PPPPPPPP/BNNBRKRQ w KQkq - 0 1 +bnrbnkrq/pppppppp/8/8/8/8/PPPPPPPP/BNRBNKRQ w KQkq - 0 1 +bnrbknrq/pppppppp/8/8/8/8/PPPPPPPP/BNRBKNRQ w KQkq - 0 1 +bnrbkrnq/pppppppp/8/8/8/8/PPPPPPPP/BNRBKRNQ w KQkq - 0 1 +brnbnkrq/pppppppp/8/8/8/8/PPPPPPPP/BRNBNKRQ w KQkq - 0 1 +brnbknrq/pppppppp/8/8/8/8/PPPPPPPP/BRNBKNRQ w KQkq - 0 1 +brnbkrnq/pppppppp/8/8/8/8/PPPPPPPP/BRNBKRNQ w KQkq - 0 1 +brkbnnrq/pppppppp/8/8/8/8/PPPPPPPP/BRKBNNRQ w KQkq - 0 1 +brkbnrnq/pppppppp/8/8/8/8/PPPPPPPP/BRKBNRNQ w KQkq - 0 1 +brkbnrnq/pppppppp/8/8/8/8/PPPPPPPP/BRKBNRNQ w KQkq - 0 1 +bqnnrbkr/pppppppp/8/8/8/8/PPPPPPPP/BQNNRBKR w KQkq - 0 1 +bqnrnbkr/pppppppp/8/8/8/8/PPPPPPPP/BQNRNBKR w KQkq - 0 1 +bqnrkbnr/pppppppp/8/8/8/8/PPPPPPPP/BQNRKBNR w KQkq - 0 1 +bqnrkbrn/pppppppp/8/8/8/8/PPPPPPPP/BQNRKBRN w KQkq - 0 1 +bqrnnbkr/pppppppp/8/8/8/8/PPPPPPPP/BQRNNBKR w KQkq - 0 1 +bqrnkbnr/pppppppp/8/8/8/8/PPPPPPPP/BQRNKBNR w KQkq - 0 1 +bqrnkbrn/pppppppp/8/8/8/8/PPPPPPPP/BQRNKBRN w KQkq - 0 1 +bqrknbnr/pppppppp/8/8/8/8/PPPPPPPP/BQRKNBNR w KQkq - 0 1 +bqrknbrn/pppppppp/8/8/8/8/PPPPPPPP/BQRKNBRN w KQkq - 0 1 +bqrkrbnn/pppppppp/8/8/8/8/PPPPPPPP/BQRKRBNN w KQkq - 0 1 +bnqnrbkr/pppppppp/8/8/8/8/PPPPPPPP/BNQNRBKR w KQkq - 0 1 +bnqrnbkr/pppppppp/8/8/8/8/PPPPPPPP/BNQRNBKR w KQkq - 0 1 +bnqrkbnr/pppppppp/8/8/8/8/PPPPPPPP/BNQRKBNR w KQkq - 0 1 +bnqrkbrn/pppppppp/8/8/8/8/PPPPPPPP/BNQRKBRN w KQkq - 0 1 +brqnnbkr/pppppppp/8/8/8/8/PPPPPPPP/BRQNNBKR w KQkq - 0 1 +brqnkbnr/pppppppp/8/8/8/8/PPPPPPPP/BRQNKBNR w KQkq - 0 1 +brqnkbrn/pppppppp/8/8/8/8/PPPPPPPP/BRQNKBRN w KQkq - 0 1 +brqknbnr/pppppppp/8/8/8/8/PPPPPPPP/BRQKNBNR w KQkq - 0 1 +brqknbrn/pppppppp/8/8/8/8/PPPPPPPP/BRQKNBRN w KQkq - 0 1 +brqkrbnn/pppppppp/8/8/8/8/PPPPPPPP/BRQKRBNN w KQkq - 0 1 +bnnqrbkr/pppppppp/8/8/8/8/PPPPPPPP/BNNQRBKR w KQkq - 0 1 +bnrqnbkr/pppppppp/8/8/8/8/PPPPPPPP/BNRQNBKR w KQkq - 0 1 +bnrqkbnr/pppppppp/8/8/8/8/PPPPPPPP/BNRQKBNR w KQkq - 0 1 +bnrqkbrn/pppppppp/8/8/8/8/PPPPPPPP/BNRQKBRN w KQkq - 0 1 +brnqnbkr/pppppppp/8/8/8/8/PPPPPPPP/BRNQNBKR w KQkq - 0 1 +brnqkbnr/pppppppp/8/8/8/8/PPPPPPPP/BRNQKBNR w KQkq - 0 1 +brnqkbrn/pppppppp/8/8/8/8/PPPPPPPP/BRNQKBRN w KQkq - 0 1 +brkqnbnr/pppppppp/8/8/8/8/PPPPPPPP/BRKQNBNR w KQkq - 0 1 +brkqnbrn/pppppppp/8/8/8/8/PPPPPPPP/BRKQNBRN w KQkq - 0 1 +brkqrbnn/pppppppp/8/8/8/8/PPPPPPPP/BRKQRBNN w KQkq - 0 1 +bnnrqbkr/pppppppp/8/8/8/8/PPPPPPPP/BNNRQBKR w KQkq - 0 1 +bnrnqbkr/pppppppp/8/8/8/8/PPPPPPPP/BNRNQBKR w KQkq - 0 1 +bnrkqbnr/pppppppp/8/8/8/8/PPPPPPPP/BNRKQBNR w KQkq - 0 1 +bnrkqbrn/pppppppp/8/8/8/8/PPPPPPPP/BNRKQBRN w KQkq - 0 1 +brnnqbkr/pppppppp/8/8/8/8/PPPPPPPP/BRNNQBKR w KQkq - 0 1 +brnkqbnr/pppppppp/8/8/8/8/PPPPPPPP/BRNKQBNR w KQkq - 0 1 +brnkqbrn/pppppppp/8/8/8/8/PPPPPPPP/BRNKQBRN w KQkq - 0 1 +brknqbnr/pppppppp/8/8/8/8/PPPPPPPP/BRKNQBNR w KQkq - 0 1 +brknqbrn/pppppppp/8/8/8/8/PPPPPPPP/BRKNQBRN w KQkq - 0 1 +brkrqbnn/pppppppp/8/8/8/8/PPPPPPPP/BRKRQBNN w KQkq - 0 1 +bnnrkbqr/pppppppp/8/8/8/8/PPPPPPPP/BNNRKBQR w KQkq - 0 1 +bnrnkbqr/pppppppp/8/8/8/8/PPPPPPPP/BNRNKBQR w KQkq - 0 1 +bnrknbqr/pppppppp/8/8/8/8/PPPPPPPP/BNRKNBQR w KQkq - 0 1 +bnrkrbqn/pppppppp/8/8/8/8/PPPPPPPP/BNRKRBQN w KQkq - 0 1 +brnnkbqr/pppppppp/8/8/8/8/PPPPPPPP/BRNNKBQR w KQkq - 0 1 +brnknbqr/pppppppp/8/8/8/8/PPPPPPPP/BRNKNBQR w KQkq - 0 1 +brnkrbqn/pppppppp/8/8/8/8/PPPPPPPP/BRNKRBQN w KQkq - 0 1 +brknnbqr/pppppppp/8/8/8/8/PPPPPPPP/BRKNNBQR w KQkq - 0 1 +brknrbqn/pppppppp/8/8/8/8/PPPPPPPP/BRKNRBQN w KQkq - 0 1 +brkrnbqn/pppppppp/8/8/8/8/PPPPPPPP/BRKRNBQN w KQkq - 0 1 +bnnrkbrq/pppppppp/8/8/8/8/PPPPPPPP/BNNRKBRQ w KQkq - 0 1 +bnrnkbrq/pppppppp/8/8/8/8/PPPPPPPP/BNRNKBRQ w KQkq - 0 1 +bnrknbrq/pppppppp/8/8/8/8/PPPPPPPP/BNRKNBRQ w KQkq - 0 1 +bnrkrbnq/pppppppp/8/8/8/8/PPPPPPPP/BNRKRBNQ w KQkq - 0 1 +brnnkbrq/pppppppp/8/8/8/8/PPPPPPPP/BRNNKBRQ w KQkq - 0 1 +brnknbrq/pppppppp/8/8/8/8/PPPPPPPP/BRNKNBRQ w KQkq - 0 1 +brnkrbnq/pppppppp/8/8/8/8/PPPPPPPP/BRNKRBNQ w KQkq - 0 1 +brknnbrq/pppppppp/8/8/8/8/PPPPPPPP/BRKNNBRQ w KQkq - 0 1 +brknrbnq/pppppppp/8/8/8/8/PPPPPPPP/BRKNRBNQ w KQkq - 0 1 +brkrnbnq/pppppppp/8/8/8/8/PPPPPPPP/BRKRNBNQ w KQkq - 0 1 +bqnnrkrb/pppppppp/8/8/8/8/PPPPPPPP/BQNNRKRB w KQkq - 0 1 +bqnrnkrb/pppppppp/8/8/8/8/PPPPPPPP/BQNRNKRB w KQkq - 0 1 +bqnrknrb/pppppppp/8/8/8/8/PPPPPPPP/BQNRKNRB w KQkq - 0 1 +bqnrkrnb/pppppppp/8/8/8/8/PPPPPPPP/BQNRKRNB w KQkq - 0 1 +bqrnnkrb/pppppppp/8/8/8/8/PPPPPPPP/BQRNNKRB w KQkq - 0 1 +bqrnknrb/pppppppp/8/8/8/8/PPPPPPPP/BQRNKNRB w KQkq - 0 1 +bqrnkrnb/pppppppp/8/8/8/8/PPPPPPPP/BQRNKRNB w KQkq - 0 1 +bqrknnrb/pppppppp/8/8/8/8/PPPPPPPP/BQRKNNRB w KQkq - 0 1 +bqrknrnb/pppppppp/8/8/8/8/PPPPPPPP/BQRKNRNB w KQkq - 0 1 +bqrkrnnb/pppppppp/8/8/8/8/PPPPPPPP/BQRKRNNB w KQkq - 0 1 +bnqnrkrb/pppppppp/8/8/8/8/PPPPPPPP/BNQNRKRB w KQkq - 0 1 +bnqrnkrb/pppppppp/8/8/8/8/PPPPPPPP/BNQRNKRB w KQkq - 0 1 +bnqrknrb/pppppppp/8/8/8/8/PPPPPPPP/BNQRKNRB w KQkq - 0 1 +bnqrkrnb/pppppppp/8/8/8/8/PPPPPPPP/BNQRKRNB w KQkq - 0 1 +brqnnkrb/pppppppp/8/8/8/8/PPPPPPPP/BRQNNKRB w KQkq - 0 1 +brqnknrb/pppppppp/8/8/8/8/PPPPPPPP/BRQNKNRB w KQkq - 0 1 +brqnkrnb/pppppppp/8/8/8/8/PPPPPPPP/BRQNKRNB w KQkq - 0 1 +brqknnrb/pppppppp/8/8/8/8/PPPPPPPP/BRQKNNRB w KQkq - 0 1 +brqknrnb/pppppppp/8/8/8/8/PPPPPPPP/BRQKNRNB w KQkq - 0 1 +brqkrnnb/pppppppp/8/8/8/8/PPPPPPPP/BRQKRNNB w KQkq - 0 1 +bnnqrkrb/pppppppp/8/8/8/8/PPPPPPPP/BNNQRKRB w KQkq - 0 1 +bnrqnkrb/pppppppp/8/8/8/8/PPPPPPPP/BNRQNKRB w KQkq - 0 1 +bnrqknrb/pppppppp/8/8/8/8/PPPPPPPP/BNRQKNRB w KQkq - 0 1 +bnrqkrnb/pppppppp/8/8/8/8/PPPPPPPP/BNRQKRNB w KQkq - 0 1 +brnqnkrb/pppppppp/8/8/8/8/PPPPPPPP/BRNQNKRB w KQkq - 0 1 +brnqknrb/pppppppp/8/8/8/8/PPPPPPPP/BRNQKNRB w KQkq - 0 1 +brnqkrnb/pppppppp/8/8/8/8/PPPPPPPP/BRNQKRNB w KQkq - 0 1 +brkqnnrb/pppppppp/8/8/8/8/PPPPPPPP/BRKQNNRB w KQkq - 0 1 +brkqnrnb/pppppppp/8/8/8/8/PPPPPPPP/BRKQNRNB w KQkq - 0 1 +brkqrnnb/pppppppp/8/8/8/8/PPPPPPPP/BRKQRNNB w KQkq - 0 1 +bnnrqkrb/pppppppp/8/8/8/8/PPPPPPPP/BNNRQKRB w KQkq - 0 1 +bnrnqkrb/pppppppp/8/8/8/8/PPPPPPPP/BNRNQKRB w KQkq - 0 1 +bnrkqnrb/pppppppp/8/8/8/8/PPPPPPPP/BNRKQNRB w KQkq - 0 1 +bnrkqrnb/pppppppp/8/8/8/8/PPPPPPPP/BNRKQRNB w KQkq - 0 1 +brnnqkrb/pppppppp/8/8/8/8/PPPPPPPP/BRNNQKRB w KQkq - 0 1 +brnkqnrb/pppppppp/8/8/8/8/PPPPPPPP/BRNKQNRB w KQkq - 0 1 +brnkqrnb/pppppppp/8/8/8/8/PPPPPPPP/BRNKQRNB w KQkq - 0 1 +brknqnrb/pppppppp/8/8/8/8/PPPPPPPP/BRKNQNRB w KQkq - 0 1 +brknqrnb/pppppppp/8/8/8/8/PPPPPPPP/BRKNQRNB w KQkq - 0 1 +brkrqnnb/pppppppp/8/8/8/8/PPPPPPPP/BRKRQNNB w KQkq - 0 1 +bnnrkqrb/pppppppp/8/8/8/8/PPPPPPPP/BNNRKQRB w KQkq - 0 1 +bnrnkqrb/pppppppp/8/8/8/8/PPPPPPPP/BNRNKQRB w KQkq - 0 1 +bnrknqrb/pppppppp/8/8/8/8/PPPPPPPP/BNRKNQRB w KQkq - 0 1 +bnrkrqnb/pppppppp/8/8/8/8/PPPPPPPP/BNRKRQNB w KQkq - 0 1 +brnnkqrb/pppppppp/8/8/8/8/PPPPPPPP/BRNNKQRB w KQkq - 0 1 +brnknqrb/pppppppp/8/8/8/8/PPPPPPPP/BRNKNQRB w KQkq - 0 1 +brnkrqnb/pppppppp/8/8/8/8/PPPPPPPP/BRNKRQNB w KQkq - 0 1 +brknnqrb/pppppppp/8/8/8/8/PPPPPPPP/BRKNNQRB w KQkq - 0 1 +brknrqnb/pppppppp/8/8/8/8/PPPPPPPP/BRKNRQNB w KQkq - 0 1 +brkrnqnb/pppppppp/8/8/8/8/PPPPPPPP/BRKRNQNB w KQkq - 0 1 +bnnrkrqb/pppppppp/8/8/8/8/PPPPPPPP/BNNRKRQB w KQkq - 0 1 +bnrnkrqb/pppppppp/8/8/8/8/PPPPPPPP/BNRNKRQB w KQkq - 0 1 +bnrknrqb/pppppppp/8/8/8/8/PPPPPPPP/BNRKNRQB w KQkq - 0 1 +bnrkrnqb/pppppppp/8/8/8/8/PPPPPPPP/BNRKRNQB w KQkq - 0 1 +brnnkrqb/pppppppp/8/8/8/8/PPPPPPPP/BRNNKRQB w KQkq - 0 1 +brnknrqb/pppppppp/8/8/8/8/PPPPPPPP/BRNKNRQB w KQkq - 0 1 +brnkrnqb/pppppppp/8/8/8/8/PPPPPPPP/BRNKRNQB w KQkq - 0 1 +brknnrqb/pppppppp/8/8/8/8/PPPPPPPP/BRKNNRQB w KQkq - 0 1 +brknrnqb/pppppppp/8/8/8/8/PPPPPPPP/BRKNRNQB w KQkq - 0 1 +brkrnnqb/pppppppp/8/8/8/8/PPPPPPPP/BRKRNNQB w KQkq - 0 1 +qbbnnrkr/pppppppp/8/8/8/8/PPPPPPPP/QBBNNRKR w KQkq - 0 1 +qbbnrnkr/pppppppp/8/8/8/8/PPPPPPPP/QBBNRNKR w KQkq - 0 1 +qbbnrknr/pppppppp/8/8/8/8/PPPPPPPP/QBBNRKNR w KQkq - 0 1 +qbbnrkrn/pppppppp/8/8/8/8/PPPPPPPP/QBBNRKRN w KQkq - 0 1 +qbbrnnkr/pppppppp/8/8/8/8/PPPPPPPP/QBBRNNKR w KQkq - 0 1 +qbbrnknr/pppppppp/8/8/8/8/PPPPPPPP/QBBRNKNR w KQkq - 0 1 +qbbrnkrn/pppppppp/8/8/8/8/PPPPPPPP/QBBRNKRN w KQkq - 0 1 +qbbrknnr/pppppppp/8/8/8/8/PPPPPPPP/QBBRKNNR w KQkq - 0 1 +qbbrknrn/pppppppp/8/8/8/8/PPPPPPPP/QBBRKNRN w KQkq - 0 1 +qbbrkrnn/pppppppp/8/8/8/8/PPPPPPPP/QBBRKRNN w KQkq - 0 1 +nbbqnrkr/pppppppp/8/8/8/8/PPPPPPPP/NBBQNRKR w KQkq - 0 1 +nbbqrnkr/pppppppp/8/8/8/8/PPPPPPPP/NBBQRNKR w KQkq - 0 1 +nbbqrknr/pppppppp/8/8/8/8/PPPPPPPP/NBBQRKNR w KQkq - 0 1 +nbbqrkrn/pppppppp/8/8/8/8/PPPPPPPP/NBBQRKRN w KQkq - 0 1 +rbbqnnkr/pppppppp/8/8/8/8/PPPPPPPP/RBBQNNKR w KQkq - 0 1 +rbbqnknr/pppppppp/8/8/8/8/PPPPPPPP/RBBQNKNR w KQkq - 0 1 +rbbqnkrn/pppppppp/8/8/8/8/PPPPPPPP/RBBQNKRN w KQkq - 0 1 +rbbqknnr/pppppppp/8/8/8/8/PPPPPPPP/RBBQKNNR w KQkq - 0 1 +rbbqknrn/pppppppp/8/8/8/8/PPPPPPPP/RBBQKNRN w KQkq - 0 1 +rbbqkrnn/pppppppp/8/8/8/8/PPPPPPPP/RBBQKRNN w KQkq - 0 1 +nbbnqrkr/pppppppp/8/8/8/8/PPPPPPPP/NBBNQRKR w KQkq - 0 1 +nbbrqnkr/pppppppp/8/8/8/8/PPPPPPPP/NBBRQNKR w KQkq - 0 1 +nbbrqknr/pppppppp/8/8/8/8/PPPPPPPP/NBBRQKNR w KQkq - 0 1 +nbbrqkrn/pppppppp/8/8/8/8/PPPPPPPP/NBBRQKRN w KQkq - 0 1 +rbbnqnkr/pppppppp/8/8/8/8/PPPPPPPP/RBBNQNKR w KQkq - 0 1 +rbbnqknr/pppppppp/8/8/8/8/PPPPPPPP/RBBNQKNR w KQkq - 0 1 +rbbnqkrn/pppppppp/8/8/8/8/PPPPPPPP/RBBNQKRN w KQkq - 0 1 +rbbkqnnr/pppppppp/8/8/8/8/PPPPPPPP/RBBKQNNR w KQkq - 0 1 +rbbkqnrn/pppppppp/8/8/8/8/PPPPPPPP/RBBKQNRN w KQkq - 0 1 +rbbkqrnn/pppppppp/8/8/8/8/PPPPPPPP/RBBKQRNN w KQkq - 0 1 +nbbnrqkr/pppppppp/8/8/8/8/PPPPPPPP/NBBNRQKR w KQkq - 0 1 +nbbrnqkr/pppppppp/8/8/8/8/PPPPPPPP/NBBRNQKR w KQkq - 0 1 +nbbrkqnr/pppppppp/8/8/8/8/PPPPPPPP/NBBRKQNR w KQkq - 0 1 +nbbrkqrn/pppppppp/8/8/8/8/PPPPPPPP/NBBRKQRN w KQkq - 0 1 +rbbnnqkr/pppppppp/8/8/8/8/PPPPPPPP/RBBNNQKR w KQkq - 0 1 +rbbnkqnr/pppppppp/8/8/8/8/PPPPPPPP/RBBNKQNR w KQkq - 0 1 +rbbnkqrn/pppppppp/8/8/8/8/PPPPPPPP/RBBNKQRN w KQkq - 0 1 +rbbknqnr/pppppppp/8/8/8/8/PPPPPPPP/RBBKNQNR w KQkq - 0 1 +rbbknqrn/pppppppp/8/8/8/8/PPPPPPPP/RBBKNQRN w KQkq - 0 1 +rbbkrqnn/pppppppp/8/8/8/8/PPPPPPPP/RBBKRQNN w KQkq - 0 1 +nbbnrkqr/pppppppp/8/8/8/8/PPPPPPPP/NBBNRKQR w KQkq - 0 1 +nbbrnkqr/pppppppp/8/8/8/8/PPPPPPPP/NBBRNKQR w KQkq - 0 1 +nbbrknqr/pppppppp/8/8/8/8/PPPPPPPP/NBBRKNQR w KQkq - 0 1 +nbbrkrqn/pppppppp/8/8/8/8/PPPPPPPP/NBBRKRQN w KQkq - 0 1 +rbbnnkqr/pppppppp/8/8/8/8/PPPPPPPP/RBBNNKQR w KQkq - 0 1 +rbbnknqr/pppppppp/8/8/8/8/PPPPPPPP/RBBNKNQR w KQkq - 0 1 +rbbnkrqn/pppppppp/8/8/8/8/PPPPPPPP/RBBNKRQN w KQkq - 0 1)" +// MSVC can't handle string longer than 16,384 bytes +R"( +rbbknnqr/pppppppp/8/8/8/8/PPPPPPPP/RBBKNNQR w KQkq - 0 1 +rbbknrqn/pppppppp/8/8/8/8/PPPPPPPP/RBBKNRQN w KQkq - 0 1 +rbbkrnqn/pppppppp/8/8/8/8/PPPPPPPP/RBBKRNQN w KQkq - 0 1 +nbbnrkrq/pppppppp/8/8/8/8/PPPPPPPP/NBBNRKRQ w KQkq - 0 1 +nbbrnkrq/pppppppp/8/8/8/8/PPPPPPPP/NBBRNKRQ w KQkq - 0 1 +nbbrknrq/pppppppp/8/8/8/8/PPPPPPPP/NBBRKNRQ w KQkq - 0 1 +nbbrkrnq/pppppppp/8/8/8/8/PPPPPPPP/NBBRKRNQ w KQkq - 0 1 +rbbnnkrq/pppppppp/8/8/8/8/PPPPPPPP/RBBNNKRQ w KQkq - 0 1 +rbbnknrq/pppppppp/8/8/8/8/PPPPPPPP/RBBNKNRQ w KQkq - 0 1 +rbbnkrnq/pppppppp/8/8/8/8/PPPPPPPP/RBBNKRNQ w KQkq - 0 1 +rbbknnrq/pppppppp/8/8/8/8/PPPPPPPP/RBBKNNRQ w KQkq - 0 1 +rbbknrnq/pppppppp/8/8/8/8/PPPPPPPP/RBBKNRNQ w KQkq - 0 1 +rbbkrnnq/pppppppp/8/8/8/8/PPPPPPPP/RBBKRNNQ w KQkq - 0 1 +qnbbnrkr/pppppppp/8/8/8/8/PPPPPPPP/QNBBNRKR w KQkq - 0 1 +qnbbrnkr/pppppppp/8/8/8/8/PPPPPPPP/QNBBRNKR w KQkq - 0 1 +qnbbrknr/pppppppp/8/8/8/8/PPPPPPPP/QNBBRKNR w KQkq - 0 1 +qnbbrkrn/pppppppp/8/8/8/8/PPPPPPPP/QNBBRKRN w KQkq - 0 1 +qrbbnnkr/pppppppp/8/8/8/8/PPPPPPPP/QRBBNNKR w KQkq - 0 1 +qrbbnknr/pppppppp/8/8/8/8/PPPPPPPP/QRBBNKNR w KQkq - 0 1 +qrbbnkrn/pppppppp/8/8/8/8/PPPPPPPP/QRBBNKRN w KQkq - 0 1 +qrbbknnr/pppppppp/8/8/8/8/PPPPPPPP/QRBBKNNR w KQkq - 0 1 +qrbbknrn/pppppppp/8/8/8/8/PPPPPPPP/QRBBKNRN w KQkq - 0 1 +qrbbkrnn/pppppppp/8/8/8/8/PPPPPPPP/QRBBKRNN w KQkq - 0 1 +nqbbnrkr/pppppppp/8/8/8/8/PPPPPPPP/NQBBNRKR w KQkq - 0 1 +nqbbrnkr/pppppppp/8/8/8/8/PPPPPPPP/NQBBRNKR w KQkq - 0 1 +nqbbrknr/pppppppp/8/8/8/8/PPPPPPPP/NQBBRKNR w KQkq - 0 1 +nqbbrkrn/pppppppp/8/8/8/8/PPPPPPPP/NQBBRKRN w KQkq - 0 1 +rqbbnnkr/pppppppp/8/8/8/8/PPPPPPPP/RQBBNNKR w KQkq - 0 1 +rqbbnknr/pppppppp/8/8/8/8/PPPPPPPP/RQBBNKNR w KQkq - 0 1 +rqbbnkrn/pppppppp/8/8/8/8/PPPPPPPP/RQBBNKRN w KQkq - 0 1 +rqbbknnr/pppppppp/8/8/8/8/PPPPPPPP/RQBBKNNR w KQkq - 0 1 +rqbbknrn/pppppppp/8/8/8/8/PPPPPPPP/RQBBKNRN w KQkq - 0 1 +rqbbkrnn/pppppppp/8/8/8/8/PPPPPPPP/RQBBKRNN w KQkq - 0 1 +nnbbqrkr/pppppppp/8/8/8/8/PPPPPPPP/NNBBQRKR w KQkq - 0 1 +nrbbqnkr/pppppppp/8/8/8/8/PPPPPPPP/NRBBQNKR w KQkq - 0 1 +nrbbqknr/pppppppp/8/8/8/8/PPPPPPPP/NRBBQKNR w KQkq - 0 1 +nrbbqkrn/pppppppp/8/8/8/8/PPPPPPPP/NRBBQKRN w KQkq - 0 1 +rnbbqnkr/pppppppp/8/8/8/8/PPPPPPPP/RNBBQNKR w KQkq - 0 1 +rnbbqknr/pppppppp/8/8/8/8/PPPPPPPP/RNBBQKNR w KQkq - 0 1 +rnbbqkrn/pppppppp/8/8/8/8/PPPPPPPP/RNBBQKRN w KQkq - 0 1 +rkbbqnnr/pppppppp/8/8/8/8/PPPPPPPP/RKBBQNNR w KQkq - 0 1 +rkbbqnrn/pppppppp/8/8/8/8/PPPPPPPP/RKBBQNRN w KQkq - 0 1 +rkbbqrnn/pppppppp/8/8/8/8/PPPPPPPP/RKBBQRNN w KQkq - 0 1 +nnbbrqkr/pppppppp/8/8/8/8/PPPPPPPP/NNBBRQKR w KQkq - 0 1 +nrbbnqkr/pppppppp/8/8/8/8/PPPPPPPP/NRBBNQKR w KQkq - 0 1 +nrbbkqnr/pppppppp/8/8/8/8/PPPPPPPP/NRBBKQNR w KQkq - 0 1 +nrbbkqrn/pppppppp/8/8/8/8/PPPPPPPP/NRBBKQRN w KQkq - 0 1 +rnbbnqkr/pppppppp/8/8/8/8/PPPPPPPP/RNBBNQKR w KQkq - 0 1 +rnbbkqnr/pppppppp/8/8/8/8/PPPPPPPP/RNBBKQNR w KQkq - 0 1 +rnbbkqrn/pppppppp/8/8/8/8/PPPPPPPP/RNBBKQRN w KQkq - 0 1 +rkbbnqnr/pppppppp/8/8/8/8/PPPPPPPP/RKBBNQNR w KQkq - 0 1 +rkbbnqrn/pppppppp/8/8/8/8/PPPPPPPP/RKBBNQRN w KQkq - 0 1 +rkbbrqnn/pppppppp/8/8/8/8/PPPPPPPP/RKBBRQNN w KQkq - 0 1 +nnbbrkqr/pppppppp/8/8/8/8/PPPPPPPP/NNBBRKQR w KQkq - 0 1 +nrbbnkqr/pppppppp/8/8/8/8/PPPPPPPP/NRBBNKQR w KQkq - 0 1 +nrbbknqr/pppppppp/8/8/8/8/PPPPPPPP/NRBBKNQR w KQkq - 0 1 +nrbbkrqn/pppppppp/8/8/8/8/PPPPPPPP/NRBBKRQN w KQkq - 0 1 +rnbbnkqr/pppppppp/8/8/8/8/PPPPPPPP/RNBBNKQR w KQkq - 0 1 +rnbbknqr/pppppppp/8/8/8/8/PPPPPPPP/RNBBKNQR w KQkq - 0 1 +rnbbkrqn/pppppppp/8/8/8/8/PPPPPPPP/RNBBKRQN w KQkq - 0 1 +rkbbnnqr/pppppppp/8/8/8/8/PPPPPPPP/RKBBNNQR w KQkq - 0 1 +rkbbnrqn/pppppppp/8/8/8/8/PPPPPPPP/RKBBNRQN w KQkq - 0 1 +rkbbrnqn/pppppppp/8/8/8/8/PPPPPPPP/RKBBRNQN w KQkq - 0 1 +nnbbrkrq/pppppppp/8/8/8/8/PPPPPPPP/NNBBRKRQ w KQkq - 0 1 +nrbbnkrq/pppppppp/8/8/8/8/PPPPPPPP/NRBBNKRQ w KQkq - 0 1 +nrbbknrq/pppppppp/8/8/8/8/PPPPPPPP/NRBBKNRQ w KQkq - 0 1 +nrbbkrnq/pppppppp/8/8/8/8/PPPPPPPP/NRBBKRNQ w KQkq - 0 1 +rnbbnkrq/pppppppp/8/8/8/8/PPPPPPPP/RNBBNKRQ w KQkq - 0 1 +rnbbknrq/pppppppp/8/8/8/8/PPPPPPPP/RNBBKNRQ w KQkq - 0 1 +rnbbkrnq/pppppppp/8/8/8/8/PPPPPPPP/RNBBKRNQ w KQkq - 0 1 +rkbbnnrq/pppppppp/8/8/8/8/PPPPPPPP/RKBBNNRQ w KQkq - 0 1 +rkbbnrnq/pppppppp/8/8/8/8/PPPPPPPP/RKBBNRNQ w KQkq - 0 1 +rkbbrnnq/pppppppp/8/8/8/8/PPPPPPPP/RKBBRNNQ w KQkq - 0 1 +qnbnrbkr/pppppppp/8/8/8/8/PPPPPPPP/QNBNRBKR w KQkq - 0 1 +qnbrnbkr/pppppppp/8/8/8/8/PPPPPPPP/QNBRNBKR w KQkq - 0 1 +qnbrkbnr/pppppppp/8/8/8/8/PPPPPPPP/QNBRKBNR w KQkq - 0 1 +qnbrkbrn/pppppppp/8/8/8/8/PPPPPPPP/QNBRKBRN w KQkq - 0 1 +qrbnnbkr/pppppppp/8/8/8/8/PPPPPPPP/QRBNNBKR w KQkq - 0 1 +qrbnkbnr/pppppppp/8/8/8/8/PPPPPPPP/QRBNKBNR w KQkq - 0 1 +qrbnkbrn/pppppppp/8/8/8/8/PPPPPPPP/QRBNKBRN w KQkq - 0 1 +qrbknbnr/pppppppp/8/8/8/8/PPPPPPPP/QRBKNBNR w KQkq - 0 1 +qrbknbrn/pppppppp/8/8/8/8/PPPPPPPP/QRBKNBRN w KQkq - 0 1 +qrbkrbnn/pppppppp/8/8/8/8/PPPPPPPP/QRBKRBNN w KQkq - 0 1 +nqbnrbkr/pppppppp/8/8/8/8/PPPPPPPP/NQBNRBKR w KQkq - 0 1 +nqbrnbkr/pppppppp/8/8/8/8/PPPPPPPP/NQBRNBKR w KQkq - 0 1 +nqbrkbnr/pppppppp/8/8/8/8/PPPPPPPP/NQBRKBNR w KQkq - 0 1 +nqbrkbrn/pppppppp/8/8/8/8/PPPPPPPP/NQBRKBRN w KQkq - 0 1 +rqbnnbkr/pppppppp/8/8/8/8/PPPPPPPP/RQBNNBKR w KQkq - 0 1 +rqbnkbnr/pppppppp/8/8/8/8/PPPPPPPP/RQBNKBNR w KQkq - 0 1 +rqbnkbrn/pppppppp/8/8/8/8/PPPPPPPP/RQBNKBRN w KQkq - 0 1 +rqbknbnr/pppppppp/8/8/8/8/PPPPPPPP/RQBKNBNR w KQkq - 0 1 +rqbknbrn/pppppppp/8/8/8/8/PPPPPPPP/RQBKNBRN w KQkq - 0 1 +rqbkrbnn/pppppppp/8/8/8/8/PPPPPPPP/RQBKRBNN w KQkq - 0 1 +nnbqrbkr/pppppppp/8/8/8/8/PPPPPPPP/NNBQRBKR w KQkq - 0 1 +nrbqnbkr/pppppppp/8/8/8/8/PPPPPPPP/NRBQNBKR w KQkq - 0 1 +nrbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/NRBQKBNR w KQkq - 0 1 +nrbqkbrn/pppppppp/8/8/8/8/PPPPPPPP/NRBQKBRN w KQkq - 0 1 +rnbqnbkr/pppppppp/8/8/8/8/PPPPPPPP/RNBQNBKR w KQkq - 0 1 +rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1 +rnbqkbrn/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBRN w KQkq - 0 1 +rkbqnbnr/pppppppp/8/8/8/8/PPPPPPPP/RKBQNBNR w KQkq - 0 1 +rkbqnbrn/pppppppp/8/8/8/8/PPPPPPPP/RKBQNBRN w KQkq - 0 1 +rkbqrbnn/pppppppp/8/8/8/8/PPPPPPPP/RKBQRBNN w KQkq - 0 1 +nnbrqbkr/pppppppp/8/8/8/8/PPPPPPPP/NNBRQBKR w KQkq - 0 1 +nrbnqbkr/pppppppp/8/8/8/8/PPPPPPPP/NRBNQBKR w KQkq - 0 1 +nrbkqbnr/pppppppp/8/8/8/8/PPPPPPPP/NRBKQBNR w KQkq - 0 1 +nrbkqbrn/pppppppp/8/8/8/8/PPPPPPPP/NRBKQBRN w KQkq - 0 1 +rnbnqbkr/pppppppp/8/8/8/8/PPPPPPPP/RNBNQBKR w KQkq - 0 1 +rnbkqbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBKQBNR w KQkq - 0 1 +rnbkqbrn/pppppppp/8/8/8/8/PPPPPPPP/RNBKQBRN w KQkq - 0 1 +rkbnqbnr/pppppppp/8/8/8/8/PPPPPPPP/RKBNQBNR w KQkq - 0 1 +rkbnqbrn/pppppppp/8/8/8/8/PPPPPPPP/RKBNQBRN w KQkq - 0 1 +rkbrqbnn/pppppppp/8/8/8/8/PPPPPPPP/RKBRQBNN w KQkq - 0 1 +nnbrkbqr/pppppppp/8/8/8/8/PPPPPPPP/NNBRKBQR w KQkq - 0 1 +nrbnkbqr/pppppppp/8/8/8/8/PPPPPPPP/NRBNKBQR w KQkq - 0 1 +nrbknbqr/pppppppp/8/8/8/8/PPPPPPPP/NRBKNBQR w KQkq - 0 1 +nrbkrbqn/pppppppp/8/8/8/8/PPPPPPPP/NRBKRBQN w KQkq - 0 1 +rnbnkbqr/pppppppp/8/8/8/8/PPPPPPPP/RNBNKBQR w KQkq - 0 1 +rnbknbqr/pppppppp/8/8/8/8/PPPPPPPP/RNBKNBQR w KQkq - 0 1 +rnbkrbqn/pppppppp/8/8/8/8/PPPPPPPP/RNBKRBQN w KQkq - 0 1 +rkbnnbqr/pppppppp/8/8/8/8/PPPPPPPP/RKBNNBQR w KQkq - 0 1 +rkbnrbqn/pppppppp/8/8/8/8/PPPPPPPP/RKBNRBQN w KQkq - 0 1 +rkbrnbqn/pppppppp/8/8/8/8/PPPPPPPP/RKBRNBQN w KQkq - 0 1 +nnbrkbrq/pppppppp/8/8/8/8/PPPPPPPP/NNBRKBRQ w KQkq - 0 1 +nrbnkbrq/pppppppp/8/8/8/8/PPPPPPPP/NRBNKBRQ w KQkq - 0 1 +nrbknbrq/pppppppp/8/8/8/8/PPPPPPPP/NRBKNBRQ w KQkq - 0 1 +nrbkrbnq/pppppppp/8/8/8/8/PPPPPPPP/NRBKRBNQ w KQkq - 0 1 +rnbnkbrq/pppppppp/8/8/8/8/PPPPPPPP/RNBNKBRQ w KQkq - 0 1 +rnbknbrq/pppppppp/8/8/8/8/PPPPPPPP/RNBKNBRQ w KQkq - 0 1 +rnbkrbnq/pppppppp/8/8/8/8/PPPPPPPP/RNBKRBNQ w KQkq - 0 1 +rkbnnbrq/pppppppp/8/8/8/8/PPPPPPPP/RKBNNBRQ w KQkq - 0 1 +rkbnrbnq/pppppppp/8/8/8/8/PPPPPPPP/RKBNRBNQ w KQkq - 0 1 +rkbrnbnq/pppppppp/8/8/8/8/PPPPPPPP/RKBRNBNQ w KQkq - 0 1 +qnbnrkrb/pppppppp/8/8/8/8/PPPPPPPP/QNBNRKRB w KQkq - 0 1 +qnbrnkrb/pppppppp/8/8/8/8/PPPPPPPP/QNBRNKRB w KQkq - 0 1 +qnbrknrb/pppppppp/8/8/8/8/PPPPPPPP/QNBRKNRB w KQkq - 0 1 +qnbrkrnb/pppppppp/8/8/8/8/PPPPPPPP/QNBRKRNB w KQkq - 0 1 +qrbnnkrb/pppppppp/8/8/8/8/PPPPPPPP/QRBNNKRB w KQkq - 0 1 +qrbnknrb/pppppppp/8/8/8/8/PPPPPPPP/QRBNKNRB w KQkq - 0 1 +qrbnkrnb/pppppppp/8/8/8/8/PPPPPPPP/QRBNKRNB w KQkq - 0 1 +qrbknnrb/pppppppp/8/8/8/8/PPPPPPPP/QRBKNNRB w KQkq - 0 1 +qrbknrnb/pppppppp/8/8/8/8/PPPPPPPP/QRBKNRNB w KQkq - 0 1 +qrbkrnnb/pppppppp/8/8/8/8/PPPPPPPP/QRBKRNNB w KQkq - 0 1 +nqbnrkrb/pppppppp/8/8/8/8/PPPPPPPP/NQBNRKRB w KQkq - 0 1 +nqbrnkrb/pppppppp/8/8/8/8/PPPPPPPP/NQBRNKRB w KQkq - 0 1 +nqbrknrb/pppppppp/8/8/8/8/PPPPPPPP/NQBRKNRB w KQkq - 0 1 +nqbrkrnb/pppppppp/8/8/8/8/PPPPPPPP/NQBRKRNB w KQkq - 0 1 +rqbnnkrb/pppppppp/8/8/8/8/PPPPPPPP/RQBNNKRB w KQkq - 0 1 +rqbnknrb/pppppppp/8/8/8/8/PPPPPPPP/RQBNKNRB w KQkq - 0 1 +rqbnkrnb/pppppppp/8/8/8/8/PPPPPPPP/RQBNKRNB w KQkq - 0 1 +rqbknnrb/pppppppp/8/8/8/8/PPPPPPPP/RQBKNNRB w KQkq - 0 1 +rqbknrnb/pppppppp/8/8/8/8/PPPPPPPP/RQBKNRNB w KQkq - 0 1 +rqbkrnnb/pppppppp/8/8/8/8/PPPPPPPP/RQBKRNNB w KQkq - 0 1 +nnbqrkrb/pppppppp/8/8/8/8/PPPPPPPP/NNBQRKRB w KQkq - 0 1 +nrbqnkrb/pppppppp/8/8/8/8/PPPPPPPP/NRBQNKRB w KQkq - 0 1 +nrbqknrb/pppppppp/8/8/8/8/PPPPPPPP/NRBQKNRB w KQkq - 0 1 +nrbqkrnb/pppppppp/8/8/8/8/PPPPPPPP/NRBQKRNB w KQkq - 0 1 +rnbqnkrb/pppppppp/8/8/8/8/PPPPPPPP/RNBQNKRB w KQkq - 0 1 +rnbqknrb/pppppppp/8/8/8/8/PPPPPPPP/RNBQKNRB w KQkq - 0 1 +rnbqkrnb/pppppppp/8/8/8/8/PPPPPPPP/RNBQKRNB w KQkq - 0 1 +rkbqnnrb/pppppppp/8/8/8/8/PPPPPPPP/RKBQNNRB w KQkq - 0 1 +rkbqnrnb/pppppppp/8/8/8/8/PPPPPPPP/RKBQNRNB w KQkq - 0 1 +rkbqrnnb/pppppppp/8/8/8/8/PPPPPPPP/RKBQRNNB w KQkq - 0 1 +nnbrqkrb/pppppppp/8/8/8/8/PPPPPPPP/NNBRQKRB w KQkq - 0 1 +nrbnqkrb/pppppppp/8/8/8/8/PPPPPPPP/NRBNQKRB w KQkq - 0 1 +nrbkqnrb/pppppppp/8/8/8/8/PPPPPPPP/NRBKQNRB w KQkq - 0 1 +nrbkqrnb/pppppppp/8/8/8/8/PPPPPPPP/NRBKQRNB w KQkq - 0 1 +rnbnqkrb/pppppppp/8/8/8/8/PPPPPPPP/RNBNQKRB w KQkq - 0 1 +rnbkqnrb/pppppppp/8/8/8/8/PPPPPPPP/RNBKQNRB w KQkq - 0 1 +rnbkqrnb/pppppppp/8/8/8/8/PPPPPPPP/RNBKQRNB w KQkq - 0 1 +rkbnqnrb/pppppppp/8/8/8/8/PPPPPPPP/RKBNQNRB w KQkq - 0 1 +rkbnqrnb/pppppppp/8/8/8/8/PPPPPPPP/RKBNQRNB w KQkq - 0 1 +rkbrqnnb/pppppppp/8/8/8/8/PPPPPPPP/RKBRQNNB w KQkq - 0 1 +nnbrkqrb/pppppppp/8/8/8/8/PPPPPPPP/NNBRKQRB w KQkq - 0 1 +nrbnkqrb/pppppppp/8/8/8/8/PPPPPPPP/NRBNKQRB w KQkq - 0 1 +nrbknqrb/pppppppp/8/8/8/8/PPPPPPPP/NRBKNQRB w KQkq - 0 1 +nrbkrqnb/pppppppp/8/8/8/8/PPPPPPPP/NRBKRQNB w KQkq - 0 1 +rnbnkqrb/pppppppp/8/8/8/8/PPPPPPPP/RNBNKQRB w KQkq - 0 1 +rnbknqrb/pppppppp/8/8/8/8/PPPPPPPP/RNBKNQRB w KQkq - 0 1 +rnbkrqnb/pppppppp/8/8/8/8/PPPPPPPP/RNBKRQNB w KQkq - 0 1 +rkbnnqrb/pppppppp/8/8/8/8/PPPPPPPP/RKBNNQRB w KQkq - 0 1 +rkbnrqnb/pppppppp/8/8/8/8/PPPPPPPP/RKBNRQNB w KQkq - 0 1 +rkbrnqnb/pppppppp/8/8/8/8/PPPPPPPP/RKBRNQNB w KQkq - 0 1 +nnbrkrqb/pppppppp/8/8/8/8/PPPPPPPP/NNBRKRQB w KQkq - 0 1 +nrbnkrqb/pppppppp/8/8/8/8/PPPPPPPP/NRBNKRQB w KQkq - 0 1 +nrbknrqb/pppppppp/8/8/8/8/PPPPPPPP/NRBKNRQB w KQkq - 0 1 +nrbkrnqb/pppppppp/8/8/8/8/PPPPPPPP/NRBKRNQB w KQkq - 0 1 +rnbnkrqb/pppppppp/8/8/8/8/PPPPPPPP/RNBNKRQB w KQkq - 0 1 +rnbknrqb/pppppppp/8/8/8/8/PPPPPPPP/RNBKNRQB w KQkq - 0 1 +rnbkrnqb/pppppppp/8/8/8/8/PPPPPPPP/RNBKRNQB w KQkq - 0 1 +rkbnnrqb/pppppppp/8/8/8/8/PPPPPPPP/RKBNNRQB w KQkq - 0 1 +rkbnrnqb/pppppppp/8/8/8/8/PPPPPPPP/RKBNRNQB w KQkq - 0 1 +rkbrnnqb/pppppppp/8/8/8/8/PPPPPPPP/RKBRNNQB w KQkq - 0 1 +qbnnbrkr/pppppppp/8/8/8/8/PPPPPPPP/QBNNBRKR w KQkq - 0 1 +qbnrbnkr/pppppppp/8/8/8/8/PPPPPPPP/QBNRBNKR w KQkq - 0 1 +qbnrbknr/pppppppp/8/8/8/8/PPPPPPPP/QBNRBKNR w KQkq - 0 1 +qbnrbkrn/pppppppp/8/8/8/8/PPPPPPPP/QBNRBKRN w KQkq - 0 1 +qbrnbnkr/pppppppp/8/8/8/8/PPPPPPPP/QBRNBNKR w KQkq - 0 1 +qbrnbknr/pppppppp/8/8/8/8/PPPPPPPP/QBRNBKNR w KQkq - 0 1 +qbrnbkrn/pppppppp/8/8/8/8/PPPPPPPP/QBRNBKRN w KQkq - 0 1 +qbrkbnnr/pppppppp/8/8/8/8/PPPPPPPP/QBRKBNNR w KQkq - 0 1 +qbrkbnrn/pppppppp/8/8/8/8/PPPPPPPP/QBRKBNRN w KQkq - 0 1 +qbrkbrnn/pppppppp/8/8/8/8/PPPPPPPP/QBRKBRNN w KQkq - 0 1 +nbqnbrkr/pppppppp/8/8/8/8/PPPPPPPP/NBQNBRKR w KQkq - 0 1 +nbqrbnkr/pppppppp/8/8/8/8/PPPPPPPP/NBQRBNKR w KQkq - 0 1 +nbqrbknr/pppppppp/8/8/8/8/PPPPPPPP/NBQRBKNR w KQkq - 0 1 +nbqrbkrn/pppppppp/8/8/8/8/PPPPPPPP/NBQRBKRN w KQkq - 0 1 +rbqnbnkr/pppppppp/8/8/8/8/PPPPPPPP/RBQNBNKR w KQkq - 0 1 +rbqnbknr/pppppppp/8/8/8/8/PPPPPPPP/RBQNBKNR w KQkq - 0 1 +rbqnbkrn/pppppppp/8/8/8/8/PPPPPPPP/RBQNBKRN w KQkq - 0 1 +rbqkbnnr/pppppppp/8/8/8/8/PPPPPPPP/RBQKBNNR w KQkq - 0 1 +rbqkbnrn/pppppppp/8/8/8/8/PPPPPPPP/RBQKBNRN w KQkq - 0 1 +rbqkbrnn/pppppppp/8/8/8/8/PPPPPPPP/RBQKBRNN w KQkq - 0 1 +nbnqbrkr/pppppppp/8/8/8/8/PPPPPPPP/NBNQBRKR w KQkq - 0 1 +nbrqbnkr/pppppppp/8/8/8/8/PPPPPPPP/NBRQBNKR w KQkq - 0 1 +nbrqbknr/pppppppp/8/8/8/8/PPPPPPPP/NBRQBKNR w KQkq - 0 1 +nbrqbkrn/pppppppp/8/8/8/8/PPPPPPPP/NBRQBKRN w KQkq - 0 1 +rbnqbnkr/pppppppp/8/8/8/8/PPPPPPPP/RBNQBNKR w KQkq - 0 1 +rbnqbknr/pppppppp/8/8/8/8/PPPPPPPP/RBNQBKNR w KQkq - 0 1 +rbnqbkrn/pppppppp/8/8/8/8/PPPPPPPP/RBNQBKRN w KQkq - 0 1 +rbkqbnnr/pppppppp/8/8/8/8/PPPPPPPP/RBKQBNNR w KQkq - 0 1 +rbkqbnrn/pppppppp/8/8/8/8/PPPPPPPP/RBKQBNRN w KQkq - 0 1 +rbkqbrnn/pppppppp/8/8/8/8/PPPPPPPP/RBKQBRNN w KQkq - 0 1 +nbnrbqkr/pppppppp/8/8/8/8/PPPPPPPP/NBNRBQKR w KQkq - 0 1 +nbrnbqkr/pppppppp/8/8/8/8/PPPPPPPP/NBRNBQKR w KQkq - 0 1 +nbrkbqnr/pppppppp/8/8/8/8/PPPPPPPP/NBRKBQNR w KQkq - 0 1 +nbrkbqrn/pppppppp/8/8/8/8/PPPPPPPP/NBRKBQRN w KQkq - 0 1 +rbnnbqkr/pppppppp/8/8/8/8/PPPPPPPP/RBNNBQKR w KQkq - 0 1 +rbnkbqnr/pppppppp/8/8/8/8/PPPPPPPP/RBNKBQNR w KQkq - 0 1 +rbnkbqrn/pppppppp/8/8/8/8/PPPPPPPP/RBNKBQRN w KQkq - 0 1 +rbknbqnr/pppppppp/8/8/8/8/PPPPPPPP/RBKNBQNR w KQkq - 0 1 +rbknbqrn/pppppppp/8/8/8/8/PPPPPPPP/RBKNBQRN w KQkq - 0 1 +rbkrbqnn/pppppppp/8/8/8/8/PPPPPPPP/RBKRBQNN w KQkq - 0 1 +nbnrbkqr/pppppppp/8/8/8/8/PPPPPPPP/NBNRBKQR w KQkq - 0 1 +nbrnbkqr/pppppppp/8/8/8/8/PPPPPPPP/NBRNBKQR w KQkq - 0 1 +nbrkbnqr/pppppppp/8/8/8/8/PPPPPPPP/NBRKBNQR w KQkq - 0 1 +nbrkbrqn/pppppppp/8/8/8/8/PPPPPPPP/NBRKBRQN w KQkq - 0 1 +rbnnbkqr/pppppppp/8/8/8/8/PPPPPPPP/RBNNBKQR w KQkq - 0 1 +rbnkbnqr/pppppppp/8/8/8/8/PPPPPPPP/RBNKBNQR w KQkq - 0 1 +rbnkbrqn/pppppppp/8/8/8/8/PPPPPPPP/RBNKBRQN w KQkq - 0 1 +rbknbnqr/pppppppp/8/8/8/8/PPPPPPPP/RBKNBNQR w KQkq - 0 1 +rbknbrqn/pppppppp/8/8/8/8/PPPPPPPP/RBKNBRQN w KQkq - 0 1 +rbkrbnqn/pppppppp/8/8/8/8/PPPPPPPP/RBKRBNQN w KQkq - 0 1 +nbnrbkrq/pppppppp/8/8/8/8/PPPPPPPP/NBNRBKRQ w KQkq - 0 1 +nbrnbkrq/pppppppp/8/8/8/8/PPPPPPPP/NBRNBKRQ w KQkq - 0 1 +nbrkbnrq/pppppppp/8/8/8/8/PPPPPPPP/NBRKBNRQ w KQkq - 0 1 +nbrkbrnq/pppppppp/8/8/8/8/PPPPPPPP/NBRKBRNQ w KQkq - 0 1 +rbnnbkrq/pppppppp/8/8/8/8/PPPPPPPP/RBNNBKRQ w KQkq - 0 1 +rbnkbnrq/pppppppp/8/8/8/8/PPPPPPPP/RBNKBNRQ w KQkq - 0 1 +rbnkbrnq/pppppppp/8/8/8/8/PPPPPPPP/RBNKBRNQ w KQkq - 0 1 +rbknbnrq/pppppppp/8/8/8/8/PPPPPPPP/RBKNBNRQ w KQkq - 0 1 +rbknbrnq/pppppppp/8/8/8/8/PPPPPPPP/RBKNBRNQ w KQkq - 0 1 +rbkrbnnq/pppppppp/8/8/8/8/PPPPPPPP/RBKRBNNQ w KQkq - 0 1 +qnnbbrkr/pppppppp/8/8/8/8/PPPPPPPP/QNNBBRKR w KQkq - 0 1 +qnrbbnkr/pppppppp/8/8/8/8/PPPPPPPP/QNRBBNKR w KQkq - 0 1 +qnrbbknr/pppppppp/8/8/8/8/PPPPPPPP/QNRBBKNR w KQkq - 0 1 +qnrbbkrn/pppppppp/8/8/8/8/PPPPPPPP/QNRBBKRN w KQkq - 0 1 +qrnbbnkr/pppppppp/8/8/8/8/PPPPPPPP/QRNBBNKR w KQkq - 0 1 +qrnbbknr/pppppppp/8/8/8/8/PPPPPPPP/QRNBBKNR w KQkq - 0 1 +qrnbbkrn/pppppppp/8/8/8/8/PPPPPPPP/QRNBBKRN w KQkq - 0 1 +qrkbbnnr/pppppppp/8/8/8/8/PPPPPPPP/QRKBBNNR w KQkq - 0 1 +qrkbbnrn/pppppppp/8/8/8/8/PPPPPPPP/QRKBBNRN w KQkq - 0 1 +qrkbbrnn/pppppppp/8/8/8/8/PPPPPPPP/QRKBBRNN w KQkq - 0 1 +nqnbbrkr/pppppppp/8/8/8/8/PPPPPPPP/NQNBBRKR w KQkq - 0 1 +nqrbbnkr/pppppppp/8/8/8/8/PPPPPPPP/NQRBBNKR w KQkq - 0 1 +nqrbbknr/pppppppp/8/8/8/8/PPPPPPPP/NQRBBKNR w KQkq - 0 1 +nqrbbkrn/pppppppp/8/8/8/8/PPPPPPPP/NQRBBKRN w KQkq - 0 1 +rqnbbnkr/pppppppp/8/8/8/8/PPPPPPPP/RQNBBNKR w KQkq - 0 1 +rqnbbknr/pppppppp/8/8/8/8/PPPPPPPP/RQNBBKNR w KQkq - 0 1 +rqnbbkrn/pppppppp/8/8/8/8/PPPPPPPP/RQNBBKRN w KQkq - 0 1 +rqkbbnnr/pppppppp/8/8/8/8/PPPPPPPP/RQKBBNNR w KQkq - 0 1 +rqkbbnrn/pppppppp/8/8/8/8/PPPPPPPP/RQKBBNRN w KQkq - 0 1 +rqkbbrnn/pppppppp/8/8/8/8/PPPPPPPP/RQKBBRNN w KQkq - 0 1 +nnqbbrkr/pppppppp/8/8/8/8/PPPPPPPP/NNQBBRKR w KQkq - 0 1 +nrqbbnkr/pppppppp/8/8/8/8/PPPPPPPP/NRQBBNKR w KQkq - 0 1 +nrqbbknr/pppppppp/8/8/8/8/PPPPPPPP/NRQBBKNR w KQkq - 0 1 +nrqbbkrn/pppppppp/8/8/8/8/PPPPPPPP/NRQBBKRN w KQkq - 0 1 +rnqbbnkr/pppppppp/8/8/8/8/PPPPPPPP/RNQBBNKR w KQkq - 0 1 +rnqbbknr/pppppppp/8/8/8/8/PPPPPPPP/RNQBBKNR w KQkq - 0 1 +rnqbbkrn/pppppppp/8/8/8/8/PPPPPPPP/RNQBBKRN w KQkq - 0 1 +rkqbbnnr/pppppppp/8/8/8/8/PPPPPPPP/RKQBBNNR w KQkq - 0 1 +rkqbbnrn/pppppppp/8/8/8/8/PPPPPPPP/RKQBBNRN w KQkq - 0 1 +rkqbbrnn/pppppppp/8/8/8/8/PPPPPPPP/RKQBBRNN w KQkq - 0 1 +nnrbbqkr/pppppppp/8/8/8/8/PPPPPPPP/NNRBBQKR w KQkq - 0 1 +nrnbbqkr/pppppppp/8/8/8/8/PPPPPPPP/NRNBBQKR w KQkq - 0 1 +nrkbbqnr/pppppppp/8/8/8/8/PPPPPPPP/NRKBBQNR w KQkq - 0 1 +nrkbbqrn/pppppppp/8/8/8/8/PPPPPPPP/NRKBBQRN w KQkq - 0 1)" +// MSVC can't handle string longer than 16,384 bytes +R"( +rnnbbqkr/pppppppp/8/8/8/8/PPPPPPPP/RNNBBQKR w KQkq - 0 1 +rnkbbqnr/pppppppp/8/8/8/8/PPPPPPPP/RNKBBQNR w KQkq - 0 1 +rnkbbqrn/pppppppp/8/8/8/8/PPPPPPPP/RNKBBQRN w KQkq - 0 1 +rknbbqnr/pppppppp/8/8/8/8/PPPPPPPP/RKNBBQNR w KQkq - 0 1 +rknbbqrn/pppppppp/8/8/8/8/PPPPPPPP/RKNBBQRN w KQkq - 0 1 +rkrbbqnn/pppppppp/8/8/8/8/PPPPPPPP/RKRBBQNN w KQkq - 0 1 +nnrbbkqr/pppppppp/8/8/8/8/PPPPPPPP/NNRBBKQR w KQkq - 0 1 +nrnbbkqr/pppppppp/8/8/8/8/PPPPPPPP/NRNBBKQR w KQkq - 0 1 +nrkbbnqr/pppppppp/8/8/8/8/PPPPPPPP/NRKBBNQR w KQkq - 0 1 +nrkbbrqn/pppppppp/8/8/8/8/PPPPPPPP/NRKBBRQN w KQkq - 0 1 +rnnbbkqr/pppppppp/8/8/8/8/PPPPPPPP/RNNBBKQR w KQkq - 0 1 +rnkbbnqr/pppppppp/8/8/8/8/PPPPPPPP/RNKBBNQR w KQkq - 0 1 +rnkbbrqn/pppppppp/8/8/8/8/PPPPPPPP/RNKBBRQN w KQkq - 0 1 +rknbbnqr/pppppppp/8/8/8/8/PPPPPPPP/RKNBBNQR w KQkq - 0 1 +rknbbrqn/pppppppp/8/8/8/8/PPPPPPPP/RKNBBRQN w KQkq - 0 1 +rkrbbnqn/pppppppp/8/8/8/8/PPPPPPPP/RKRBBNQN w KQkq - 0 1 +nnrbbkrq/pppppppp/8/8/8/8/PPPPPPPP/NNRBBKRQ w KQkq - 0 1 +nrnbbkrq/pppppppp/8/8/8/8/PPPPPPPP/NRNBBKRQ w KQkq - 0 1 +nrkbbnrq/pppppppp/8/8/8/8/PPPPPPPP/NRKBBNRQ w KQkq - 0 1 +nrkbbrnq/pppppppp/8/8/8/8/PPPPPPPP/NRKBBRNQ w KQkq - 0 1 +rnnbbkrq/pppppppp/8/8/8/8/PPPPPPPP/RNNBBKRQ w KQkq - 0 1 +rnkbbnrq/pppppppp/8/8/8/8/PPPPPPPP/RNKBBNRQ w KQkq - 0 1 +rnkbbrnq/pppppppp/8/8/8/8/PPPPPPPP/RNKBBRNQ w KQkq - 0 1 +rknbbnrq/pppppppp/8/8/8/8/PPPPPPPP/RKNBBNRQ w KQkq - 0 1 +rknbbrnq/pppppppp/8/8/8/8/PPPPPPPP/RKNBBRNQ w KQkq - 0 1 +rkrbbnnq/pppppppp/8/8/8/8/PPPPPPPP/RKRBBNNQ w KQkq - 0 1 +qnnrbbkr/pppppppp/8/8/8/8/PPPPPPPP/QNNRBBKR w KQkq - 0 1 +qnrnbbkr/pppppppp/8/8/8/8/PPPPPPPP/QNRNBBKR w KQkq - 0 1 +qnrkbbnr/pppppppp/8/8/8/8/PPPPPPPP/QNRKBBNR w KQkq - 0 1 +qnrkbbrn/pppppppp/8/8/8/8/PPPPPPPP/QNRKBBRN w KQkq - 0 1 +qrnnbbkr/pppppppp/8/8/8/8/PPPPPPPP/QRNNBBKR w KQkq - 0 1 +qrnkbbnr/pppppppp/8/8/8/8/PPPPPPPP/QRNKBBNR w KQkq - 0 1 +qrnkbbrn/pppppppp/8/8/8/8/PPPPPPPP/QRNKBBRN w KQkq - 0 1 +qrknbbnr/pppppppp/8/8/8/8/PPPPPPPP/QRKNBBNR w KQkq - 0 1 +qrknbbrn/pppppppp/8/8/8/8/PPPPPPPP/QRKNBBRN w KQkq - 0 1 +qrkrbbnn/pppppppp/8/8/8/8/PPPPPPPP/QRKRBBNN w KQkq - 0 1 +nqnrbbkr/pppppppp/8/8/8/8/PPPPPPPP/NQNRBBKR w KQkq - 0 1 +nqrnbbkr/pppppppp/8/8/8/8/PPPPPPPP/NQRNBBKR w KQkq - 0 1 +nqrkbbnr/pppppppp/8/8/8/8/PPPPPPPP/NQRKBBNR w KQkq - 0 1 +nqrkbbrn/pppppppp/8/8/8/8/PPPPPPPP/NQRKBBRN w KQkq - 0 1 +rqnnbbkr/pppppppp/8/8/8/8/PPPPPPPP/RQNNBBKR w KQkq - 0 1 +rqnkbbnr/pppppppp/8/8/8/8/PPPPPPPP/RQNKBBNR w KQkq - 0 1 +rqnkbbrn/pppppppp/8/8/8/8/PPPPPPPP/RQNKBBRN w KQkq - 0 1 +rqknbbnr/pppppppp/8/8/8/8/PPPPPPPP/RQKNBBNR w KQkq - 0 1 +rqknbbrn/pppppppp/8/8/8/8/PPPPPPPP/RQKNBBRN w KQkq - 0 1 +rqkrbbnn/pppppppp/8/8/8/8/PPPPPPPP/RQKRBBNN w KQkq - 0 1 +nnqrbbkr/pppppppp/8/8/8/8/PPPPPPPP/NNQRBBKR w KQkq - 0 1 +nrqnbbkr/pppppppp/8/8/8/8/PPPPPPPP/NRQNBBKR w KQkq - 0 1 +nrqkbbnr/pppppppp/8/8/8/8/PPPPPPPP/NRQKBBNR w KQkq - 0 1 +nrqkbbrn/pppppppp/8/8/8/8/PPPPPPPP/NRQKBBRN w KQkq - 0 1 +rnqnbbkr/pppppppp/8/8/8/8/PPPPPPPP/RNQNBBKR w KQkq - 0 1 +rnqkbbnr/pppppppp/8/8/8/8/PPPPPPPP/RNQKBBNR w KQkq - 0 1 +rnqkbbrn/pppppppp/8/8/8/8/PPPPPPPP/RNQKBBRN w KQkq - 0 1 +rkqnbbnr/pppppppp/8/8/8/8/PPPPPPPP/RKQNBBNR w KQkq - 0 1 +rkqnbbrn/pppppppp/8/8/8/8/PPPPPPPP/RKQNBBRN w KQkq - 0 1 +rkqrbbnn/pppppppp/8/8/8/8/PPPPPPPP/RKQRBBNN w KQkq - 0 1 +nnrqbbkr/pppppppp/8/8/8/8/PPPPPPPP/NNRQBBKR w KQkq - 0 1 +nrnqbbkr/pppppppp/8/8/8/8/PPPPPPPP/NRNQBBKR w KQkq - 0 1 +nrkqbbnr/pppppppp/8/8/8/8/PPPPPPPP/NRKQBBNR w KQkq - 0 1 +nrkqbbrn/pppppppp/8/8/8/8/PPPPPPPP/NRKQBBRN w KQkq - 0 1 +rnnqbbkr/pppppppp/8/8/8/8/PPPPPPPP/RNNQBBKR w KQkq - 0 1 +rnkqbbnr/pppppppp/8/8/8/8/PPPPPPPP/RNKQBBNR w KQkq - 0 1 +rnkqbbrn/pppppppp/8/8/8/8/PPPPPPPP/RNKQBBRN w KQkq - 0 1 +rknqbbnr/pppppppp/8/8/8/8/PPPPPPPP/RKNQBBNR w KQkq - 0 1 +rknqbbrn/pppppppp/8/8/8/8/PPPPPPPP/RKNQBBRN w KQkq - 0 1 +rkrqbbnn/pppppppp/8/8/8/8/PPPPPPPP/RKRQBBNN w KQkq - 0 1 +nnrkbbqr/pppppppp/8/8/8/8/PPPPPPPP/NNRKBBQR w KQkq - 0 1 +nrnkbbqr/pppppppp/8/8/8/8/PPPPPPPP/NRNKBBQR w KQkq - 0 1 +nrknbbqr/pppppppp/8/8/8/8/PPPPPPPP/NRKNBBQR w KQkq - 0 1 +nrkrbbqn/pppppppp/8/8/8/8/PPPPPPPP/NRKRBBQN w KQkq - 0 1 +rnnkbbqr/pppppppp/8/8/8/8/PPPPPPPP/RNNKBBQR w KQkq - 0 1 +rnknbbqr/pppppppp/8/8/8/8/PPPPPPPP/RNKNBBQR w KQkq - 0 1 +rnkrbbqn/pppppppp/8/8/8/8/PPPPPPPP/RNKRBBQN w KQkq - 0 1 +rknnbbqr/pppppppp/8/8/8/8/PPPPPPPP/RKNNBBQR w KQkq - 0 1 +rknrbbqn/pppppppp/8/8/8/8/PPPPPPPP/RKNRBBQN w KQkq - 0 1 +rkrnbbqn/pppppppp/8/8/8/8/PPPPPPPP/RKRNBBQN w KQkq - 0 1 +nnrkbbrq/pppppppp/8/8/8/8/PPPPPPPP/NNRKBBRQ w KQkq - 0 1 +nrnkbbrq/pppppppp/8/8/8/8/PPPPPPPP/NRNKBBRQ w KQkq - 0 1 +nrknbbrq/pppppppp/8/8/8/8/PPPPPPPP/NRKNBBRQ w KQkq - 0 1 +nrkrbbnq/pppppppp/8/8/8/8/PPPPPPPP/NRKRBBNQ w KQkq - 0 1 +rnnkbbrq/pppppppp/8/8/8/8/PPPPPPPP/RNNKBBRQ w KQkq - 0 1 +rnknbbrq/pppppppp/8/8/8/8/PPPPPPPP/RNKNBBRQ w KQkq - 0 1 +rnkrbbnq/pppppppp/8/8/8/8/PPPPPPPP/RNKRBBNQ w KQkq - 0 1 +rknnbbrq/pppppppp/8/8/8/8/PPPPPPPP/RKNNBBRQ w KQkq - 0 1 +rknrbbnq/pppppppp/8/8/8/8/PPPPPPPP/RKNRBBNQ w KQkq - 0 1 +rkrnbbnq/pppppppp/8/8/8/8/PPPPPPPP/RKRNBBNQ w KQkq - 0 1 +qnnrbkrb/pppppppp/8/8/8/8/PPPPPPPP/QNNRBKRB w KQkq - 0 1 +qnrnbkrb/pppppppp/8/8/8/8/PPPPPPPP/QNRNBKRB w KQkq - 0 1 +qnrkbnrb/pppppppp/8/8/8/8/PPPPPPPP/QNRKBNRB w KQkq - 0 1 +qnrkbrnb/pppppppp/8/8/8/8/PPPPPPPP/QNRKBRNB w KQkq - 0 1 +qrnnbkrb/pppppppp/8/8/8/8/PPPPPPPP/QRNNBKRB w KQkq - 0 1 +qrnkbnrb/pppppppp/8/8/8/8/PPPPPPPP/QRNKBNRB w KQkq - 0 1 +qrnkbrnb/pppppppp/8/8/8/8/PPPPPPPP/QRNKBRNB w KQkq - 0 1 +qrknbnrb/pppppppp/8/8/8/8/PPPPPPPP/QRKNBNRB w KQkq - 0 1 +qrknbrnb/pppppppp/8/8/8/8/PPPPPPPP/QRKNBRNB w KQkq - 0 1 +qrkrbnnb/pppppppp/8/8/8/8/PPPPPPPP/QRKRBNNB w KQkq - 0 1 +nqnrbkrb/pppppppp/8/8/8/8/PPPPPPPP/NQNRBKRB w KQkq - 0 1 +nqrnbkrb/pppppppp/8/8/8/8/PPPPPPPP/NQRNBKRB w KQkq - 0 1 +nqrkbnrb/pppppppp/8/8/8/8/PPPPPPPP/NQRKBNRB w KQkq - 0 1 +nqrkbrnb/pppppppp/8/8/8/8/PPPPPPPP/NQRKBRNB w KQkq - 0 1 +rqnnbkrb/pppppppp/8/8/8/8/PPPPPPPP/RQNNBKRB w KQkq - 0 1 +rqnkbnrb/pppppppp/8/8/8/8/PPPPPPPP/RQNKBNRB w KQkq - 0 1 +rqnkbrnb/pppppppp/8/8/8/8/PPPPPPPP/RQNKBRNB w KQkq - 0 1 +rqknbnrb/pppppppp/8/8/8/8/PPPPPPPP/RQKNBNRB w KQkq - 0 1 +rqknbrnb/pppppppp/8/8/8/8/PPPPPPPP/RQKNBRNB w KQkq - 0 1 +rqkrbnnb/pppppppp/8/8/8/8/PPPPPPPP/RQKRBNNB w KQkq - 0 1 +nnqrbkrb/pppppppp/8/8/8/8/PPPPPPPP/NNQRBKRB w KQkq - 0 1 +nrqnbkrb/pppppppp/8/8/8/8/PPPPPPPP/NRQNBKRB w KQkq - 0 1 +nrqkbnrb/pppppppp/8/8/8/8/PPPPPPPP/NRQKBNRB w KQkq - 0 1 +nrqkbrnb/pppppppp/8/8/8/8/PPPPPPPP/NRQKBRNB w KQkq - 0 1 +rnqnbkrb/pppppppp/8/8/8/8/PPPPPPPP/RNQNBKRB w KQkq - 0 1 +rnqkbnrb/pppppppp/8/8/8/8/PPPPPPPP/RNQKBNRB w KQkq - 0 1 +rnqkbrnb/pppppppp/8/8/8/8/PPPPPPPP/RNQKBRNB w KQkq - 0 1 +rkqnbnrb/pppppppp/8/8/8/8/PPPPPPPP/RKQNBNRB w KQkq - 0 1 +rkqnbrnb/pppppppp/8/8/8/8/PPPPPPPP/RKQNBRNB w KQkq - 0 1 +rkqrbnnb/pppppppp/8/8/8/8/PPPPPPPP/RKQRBNNB w KQkq - 0 1 +nnrqbkrb/pppppppp/8/8/8/8/PPPPPPPP/NNRQBKRB w KQkq - 0 1 +nrnqbkrb/pppppppp/8/8/8/8/PPPPPPPP/NRNQBKRB w KQkq - 0 1 +nrkqbnrb/pppppppp/8/8/8/8/PPPPPPPP/NRKQBNRB w KQkq - 0 1 +nrkqbrnb/pppppppp/8/8/8/8/PPPPPPPP/NRKQBRNB w KQkq - 0 1 +rnnqbkrb/pppppppp/8/8/8/8/PPPPPPPP/RNNQBKRB w KQkq - 0 1 +rnkqbnrb/pppppppp/8/8/8/8/PPPPPPPP/RNKQBNRB w KQkq - 0 1 +rnkqbrnb/pppppppp/8/8/8/8/PPPPPPPP/RNKQBRNB w KQkq - 0 1 +rknqbnrb/pppppppp/8/8/8/8/PPPPPPPP/RKNQBNRB w KQkq - 0 1 +rknqbrnb/pppppppp/8/8/8/8/PPPPPPPP/RKNQBRNB w KQkq - 0 1 +rkrqbnnb/pppppppp/8/8/8/8/PPPPPPPP/RKRQBNNB w KQkq - 0 1 +nnrkbqrb/pppppppp/8/8/8/8/PPPPPPPP/NNRKBQRB w KQkq - 0 1 +nrnkbqrb/pppppppp/8/8/8/8/PPPPPPPP/NRNKBQRB w KQkq - 0 1 +nrknbqrb/pppppppp/8/8/8/8/PPPPPPPP/NRKNBQRB w KQkq - 0 1 +nrkrbqnb/pppppppp/8/8/8/8/PPPPPPPP/NRKRBQNB w KQkq - 0 1 +rnnkbqrb/pppppppp/8/8/8/8/PPPPPPPP/RNNKBQRB w KQkq - 0 1 +rnknbqrb/pppppppp/8/8/8/8/PPPPPPPP/RNKNBQRB w KQkq - 0 1 +rnkrbqnb/pppppppp/8/8/8/8/PPPPPPPP/RNKRBQNB w KQkq - 0 1 +rknnbqrb/pppppppp/8/8/8/8/PPPPPPPP/RKNNBQRB w KQkq - 0 1 +rknrbqnb/pppppppp/8/8/8/8/PPPPPPPP/RKNRBQNB w KQkq - 0 1 +rkrnbqnb/pppppppp/8/8/8/8/PPPPPPPP/RKRNBQNB w KQkq - 0 1 +nnrkbrqb/pppppppp/8/8/8/8/PPPPPPPP/NNRKBRQB w KQkq - 0 1 +nrnkbrqb/pppppppp/8/8/8/8/PPPPPPPP/NRNKBRQB w KQkq - 0 1 +nrknbrqb/pppppppp/8/8/8/8/PPPPPPPP/NRKNBRQB w KQkq - 0 1 +nrkrbnqb/pppppppp/8/8/8/8/PPPPPPPP/NRKRBNQB w KQkq - 0 1 +rnnkbrqb/pppppppp/8/8/8/8/PPPPPPPP/RNNKBRQB w KQkq - 0 1 +rnknbrqb/pppppppp/8/8/8/8/PPPPPPPP/RNKNBRQB w KQkq - 0 1 +rnkrbnqb/pppppppp/8/8/8/8/PPPPPPPP/RNKRBNQB w KQkq - 0 1 +rknnbrqb/pppppppp/8/8/8/8/PPPPPPPP/RKNNBRQB w KQkq - 0 1 +rknrbnqb/pppppppp/8/8/8/8/PPPPPPPP/RKNRBNQB w KQkq - 0 1 +rkrnbnqb/pppppppp/8/8/8/8/PPPPPPPP/RKRNBNQB w KQkq - 0 1 +qbnnrkbr/pppppppp/8/8/8/8/PPPPPPPP/QBNNRKBR w KQkq - 0 1 +qbnrnkbr/pppppppp/8/8/8/8/PPPPPPPP/QBNRNKBR w KQkq - 0 1 +qbnrknbr/pppppppp/8/8/8/8/PPPPPPPP/QBNRKNBR w KQkq - 0 1 +qbnrkrbn/pppppppp/8/8/8/8/PPPPPPPP/QBNRKRBN w KQkq - 0 1 +qbrnnkbr/pppppppp/8/8/8/8/PPPPPPPP/QBRNNKBR w KQkq - 0 1 +qbrnknbr/pppppppp/8/8/8/8/PPPPPPPP/QBRNKNBR w KQkq - 0 1 +qbrnkrbn/pppppppp/8/8/8/8/PPPPPPPP/QBRNKRBN w KQkq - 0 1 +qbrknnbr/pppppppp/8/8/8/8/PPPPPPPP/QBRKNNBR w KQkq - 0 1 +qbrknrbn/pppppppp/8/8/8/8/PPPPPPPP/QBRKNRBN w KQkq - 0 1 +qbrkrnbn/pppppppp/8/8/8/8/PPPPPPPP/QBRKRNBN w KQkq - 0 1 +nbqnrkbr/pppppppp/8/8/8/8/PPPPPPPP/NBQNRKBR w KQkq - 0 1 +nbqrnkbr/pppppppp/8/8/8/8/PPPPPPPP/NBQRNKBR w KQkq - 0 1 +nbqrknbr/pppppppp/8/8/8/8/PPPPPPPP/NBQRKNBR w KQkq - 0 1 +nbqrkrbn/pppppppp/8/8/8/8/PPPPPPPP/NBQRKRBN w KQkq - 0 1 +rbqnnkbr/pppppppp/8/8/8/8/PPPPPPPP/RBQNNKBR w KQkq - 0 1 +rbqnknbr/pppppppp/8/8/8/8/PPPPPPPP/RBQNKNBR w KQkq - 0 1 +rbqnkrbn/pppppppp/8/8/8/8/PPPPPPPP/RBQNKRBN w KQkq - 0 1 +rbqknnbr/pppppppp/8/8/8/8/PPPPPPPP/RBQKNNBR w KQkq - 0 1 +rbqknrbn/pppppppp/8/8/8/8/PPPPPPPP/RBQKNRBN w KQkq - 0 1 +rbqkrnbn/pppppppp/8/8/8/8/PPPPPPPP/RBQKRNBN w KQkq - 0 1 +nbnqrkbr/pppppppp/8/8/8/8/PPPPPPPP/NBNQRKBR w KQkq - 0 1 +nbrqnkbr/pppppppp/8/8/8/8/PPPPPPPP/NBRQNKBR w KQkq - 0 1 +nbrqknbr/pppppppp/8/8/8/8/PPPPPPPP/NBRQKNBR w KQkq - 0 1 +nbrqkrbn/pppppppp/8/8/8/8/PPPPPPPP/NBRQKRBN w KQkq - 0 1 +rbnqnkbr/pppppppp/8/8/8/8/PPPPPPPP/RBNQNKBR w KQkq - 0 1 +rbnqknbr/pppppppp/8/8/8/8/PPPPPPPP/RBNQKNBR w KQkq - 0 1 +rbnqkrbn/pppppppp/8/8/8/8/PPPPPPPP/RBNQKRBN w KQkq - 0 1 +rbkqnnbr/pppppppp/8/8/8/8/PPPPPPPP/RBKQNNBR w KQkq - 0 1 +rbkqnrbn/pppppppp/8/8/8/8/PPPPPPPP/RBKQNRBN w KQkq - 0 1 +rbkqrnbn/pppppppp/8/8/8/8/PPPPPPPP/RBKQRNBN w KQkq - 0 1 +nbnrqkbr/pppppppp/8/8/8/8/PPPPPPPP/NBNRQKBR w KQkq - 0 1 +nbrnqkbr/pppppppp/8/8/8/8/PPPPPPPP/NBRNQKBR w KQkq - 0 1 +nbrkqnbr/pppppppp/8/8/8/8/PPPPPPPP/NBRKQNBR w KQkq - 0 1 +nbrkqrbn/pppppppp/8/8/8/8/PPPPPPPP/NBRKQRBN w KQkq - 0 1 +rbnnqkbr/pppppppp/8/8/8/8/PPPPPPPP/RBNNQKBR w KQkq - 0 1 +rbnkqnbr/pppppppp/8/8/8/8/PPPPPPPP/RBNKQNBR w KQkq - 0 1 +rbnkqrbn/pppppppp/8/8/8/8/PPPPPPPP/RBNKQRBN w KQkq - 0 1 +rbknqnbr/pppppppp/8/8/8/8/PPPPPPPP/RBKNQNBR w KQkq - 0 1 +rbknqrbn/pppppppp/8/8/8/8/PPPPPPPP/RBKNQRBN w KQkq - 0 1 +rbkrqnbn/pppppppp/8/8/8/8/PPPPPPPP/RBKRQNBN w KQkq - 0 1 +nbnrkqbr/pppppppp/8/8/8/8/PPPPPPPP/NBNRKQBR w KQkq - 0 1 +nbrnkqbr/pppppppp/8/8/8/8/PPPPPPPP/NBRNKQBR w KQkq - 0 1 +nbrknqbr/pppppppp/8/8/8/8/PPPPPPPP/NBRKNQBR w KQkq - 0 1 +nbrkrqbn/pppppppp/8/8/8/8/PPPPPPPP/NBRKRQBN w KQkq - 0 1 +rbnnkqbr/pppppppp/8/8/8/8/PPPPPPPP/RBNNKQBR w KQkq - 0 1 +rbnknqbr/pppppppp/8/8/8/8/PPPPPPPP/RBNKNQBR w KQkq - 0 1 +rbnkrqbn/pppppppp/8/8/8/8/PPPPPPPP/RBNKRQBN w KQkq - 0 1 +rbknnqbr/pppppppp/8/8/8/8/PPPPPPPP/RBKNNQBR w KQkq - 0 1 +rbknrqbn/pppppppp/8/8/8/8/PPPPPPPP/RBKNRQBN w KQkq - 0 1 +rbkrnqbn/pppppppp/8/8/8/8/PPPPPPPP/RBKRNQBN w KQkq - 0 1 +nbnrkrbq/pppppppp/8/8/8/8/PPPPPPPP/NBNRKRBQ w KQkq - 0 1 +nbrnkrbq/pppppppp/8/8/8/8/PPPPPPPP/NBRNKRBQ w KQkq - 0 1 +nbrknrbq/pppppppp/8/8/8/8/PPPPPPPP/NBRKNRBQ w KQkq - 0 1 +nbrkrnbq/pppppppp/8/8/8/8/PPPPPPPP/NBRKRNBQ w KQkq - 0 1 +rbnnkrbq/pppppppp/8/8/8/8/PPPPPPPP/RBNNKRBQ w KQkq - 0 1 +rbnknrbq/pppppppp/8/8/8/8/PPPPPPPP/RBNKNRBQ w KQkq - 0 1 +rbnkrnbq/pppppppp/8/8/8/8/PPPPPPPP/RBNKRNBQ w KQkq - 0 1 +rbknnrbq/pppppppp/8/8/8/8/PPPPPPPP/RBKNNRBQ w KQkq - 0 1 +rbknrnbq/pppppppp/8/8/8/8/PPPPPPPP/RBKNRNBQ w KQkq - 0 1 +rbkrnnbq/pppppppp/8/8/8/8/PPPPPPPP/RBKRNNBQ w KQkq - 0 1 +qnnbrkbr/pppppppp/8/8/8/8/PPPPPPPP/QNNBRKBR w KQkq - 0 1 +qnrbnkbr/pppppppp/8/8/8/8/PPPPPPPP/QNRBNKBR w KQkq - 0 1 +qnrbknbr/pppppppp/8/8/8/8/PPPPPPPP/QNRBKNBR w KQkq - 0 1 +qnrbkrbn/pppppppp/8/8/8/8/PPPPPPPP/QNRBKRBN w KQkq - 0 1 +qrnbnkbr/pppppppp/8/8/8/8/PPPPPPPP/QRNBNKBR w KQkq - 0 1 +qrnbknbr/pppppppp/8/8/8/8/PPPPPPPP/QRNBKNBR w KQkq - 0 1 +qrnbkrbn/pppppppp/8/8/8/8/PPPPPPPP/QRNBKRBN w KQkq - 0 1 +qrkbnnbr/pppppppp/8/8/8/8/PPPPPPPP/QRKBNNBR w KQkq - 0 1 +qrkbnrbn/pppppppp/8/8/8/8/PPPPPPPP/QRKBNRBN w KQkq - 0 1 +qrkbrnbn/pppppppp/8/8/8/8/PPPPPPPP/QRKBRNBN w KQkq - 0 1 +nqnbrkbr/pppppppp/8/8/8/8/PPPPPPPP/NQNBRKBR w KQkq - 0 1 +nqrbnkbr/pppppppp/8/8/8/8/PPPPPPPP/NQRBNKBR w KQkq - 0 1 +nqrbknbr/pppppppp/8/8/8/8/PPPPPPPP/NQRBKNBR w KQkq - 0 1 +nqrbkrbn/pppppppp/8/8/8/8/PPPPPPPP/NQRBKRBN w KQkq - 0 1 +rqnbnkbr/pppppppp/8/8/8/8/PPPPPPPP/RQNBNKBR w KQkq - 0 1 +rqnbknbr/pppppppp/8/8/8/8/PPPPPPPP/RQNBKNBR w KQkq - 0 1 +rqnbkrbn/pppppppp/8/8/8/8/PPPPPPPP/RQNBKRBN w KQkq - 0 1 +rqkbnnbr/pppppppp/8/8/8/8/PPPPPPPP/RQKBNNBR w KQkq - 0 1 +rqkbnrbn/pppppppp/8/8/8/8/PPPPPPPP/RQKBNRBN w KQkq - 0 1 +rqkbrnbn/pppppppp/8/8/8/8/PPPPPPPP/RQKBRNBN w KQkq - 0 1 +nnqbrkbr/pppppppp/8/8/8/8/PPPPPPPP/NNQBRKBR w KQkq - 0 1 +nrqbnkbr/pppppppp/8/8/8/8/PPPPPPPP/NRQBNKBR w KQkq - 0 1 +nrqbknbr/pppppppp/8/8/8/8/PPPPPPPP/NRQBKNBR w KQkq - 0 1 +nrqbkrbn/pppppppp/8/8/8/8/PPPPPPPP/NRQBKRBN w KQkq - 0 1 +rnqbnkbr/pppppppp/8/8/8/8/PPPPPPPP/RNQBNKBR w KQkq - 0 1 +rnqbknbr/pppppppp/8/8/8/8/PPPPPPPP/RNQBKNBR w KQkq - 0 1 +rnqbkrbn/pppppppp/8/8/8/8/PPPPPPPP/RNQBKRBN w KQkq - 0 1 +rkqbnnbr/pppppppp/8/8/8/8/PPPPPPPP/RKQBNNBR w KQkq - 0 1 +rkqbnrbn/pppppppp/8/8/8/8/PPPPPPPP/RKQBNRBN w KQkq - 0 1 +rkqbrnbn/pppppppp/8/8/8/8/PPPPPPPP/RKQBRNBN w KQkq - 0 1 +nnrbqkbr/pppppppp/8/8/8/8/PPPPPPPP/NNRBQKBR w KQkq - 0 1 +nrnbqkbr/pppppppp/8/8/8/8/PPPPPPPP/NRNBQKBR w KQkq - 0 1 +nrkbqnbr/pppppppp/8/8/8/8/PPPPPPPP/NRKBQNBR w KQkq - 0 1 +nrkbqrbn/pppppppp/8/8/8/8/PPPPPPPP/NRKBQRBN w KQkq - 0 1 +rnnbqkbr/pppppppp/8/8/8/8/PPPPPPPP/RNNBQKBR w KQkq - 0 1 +rnkbqnbr/pppppppp/8/8/8/8/PPPPPPPP/RNKBQNBR w KQkq - 0 1 +rnkbqrbn/pppppppp/8/8/8/8/PPPPPPPP/RNKBQRBN w KQkq - 0 1 +rknbqnbr/pppppppp/8/8/8/8/PPPPPPPP/RKNBQNBR w KQkq - 0 1 +rknbqrbn/pppppppp/8/8/8/8/PPPPPPPP/RKNBQRBN w KQkq - 0 1 +rkrbqnbn/pppppppp/8/8/8/8/PPPPPPPP/RKRBQNBN w KQkq - 0 1 +nnrbkqbr/pppppppp/8/8/8/8/PPPPPPPP/NNRBKQBR w KQkq - 0 1 +nrnbkqbr/pppppppp/8/8/8/8/PPPPPPPP/NRNBKQBR w KQkq - 0 1 +nrkbnqbr/pppppppp/8/8/8/8/PPPPPPPP/NRKBNQBR w KQkq - 0 1 +nrkbrqbn/pppppppp/8/8/8/8/PPPPPPPP/NRKBRQBN w KQkq - 0 1 +rnnbkqbr/pppppppp/8/8/8/8/PPPPPPPP/RNNBKQBR w KQkq - 0 1 +rnkbnqbr/pppppppp/8/8/8/8/PPPPPPPP/RNKBNQBR w KQkq - 0 1 +rnkbrqbn/pppppppp/8/8/8/8/PPPPPPPP/RNKBRQBN w KQkq - 0 1 +rknbnqbr/pppppppp/8/8/8/8/PPPPPPPP/RKNBNQBR w KQkq - 0 1 +rknbrqbn/pppppppp/8/8/8/8/PPPPPPPP/RKNBRQBN w KQkq - 0 1 +rkrbnqbn/pppppppp/8/8/8/8/PPPPPPPP/RKRBNQBN w KQkq - 0 1 +nnrbkrbq/pppppppp/8/8/8/8/PPPPPPPP/NNRBKRBQ w KQkq - 0 1 +nrnbkrbq/pppppppp/8/8/8/8/PPPPPPPP/NRNBKRBQ w KQkq - 0 1 +nrkbnrbq/pppppppp/8/8/8/8/PPPPPPPP/NRKBNRBQ w KQkq - 0 1 +nrkbrnbq/pppppppp/8/8/8/8/PPPPPPPP/NRKBRNBQ w KQkq - 0 1 +rnnbkrbq/pppppppp/8/8/8/8/PPPPPPPP/RNNBKRBQ w KQkq - 0 1 +rnkbnrbq/pppppppp/8/8/8/8/PPPPPPPP/RNKBNRBQ w KQkq - 0 1 +rnkbrnbq/pppppppp/8/8/8/8/PPPPPPPP/RNKBRNBQ w KQkq - 0 1 +rknbnrbq/pppppppp/8/8/8/8/PPPPPPPP/RKNBNRBQ w KQkq - 0 1 +rknbrnbq/pppppppp/8/8/8/8/PPPPPPPP/RKNBRNBQ w KQkq - 0 1 +rkrbnnbq/pppppppp/8/8/8/8/PPPPPPPP/RKRBNNBQ w KQkq - 0 1 +qnnrkbbr/pppppppp/8/8/8/8/PPPPPPPP/QNNRKBBR w KQkq - 0 1 +qnrnkbbr/pppppppp/8/8/8/8/PPPPPPPP/QNRNKBBR w KQkq - 0 1 +qnrknbbr/pppppppp/8/8/8/8/PPPPPPPP/QNRKNBBR w KQkq - 0 1 +qnrkrbbn/pppppppp/8/8/8/8/PPPPPPPP/QNRKRBBN w KQkq - 0 1 +qrnnkbbr/pppppppp/8/8/8/8/PPPPPPPP/QRNNKBBR w KQkq - 0 1 +qrnknbbr/pppppppp/8/8/8/8/PPPPPPPP/QRNKNBBR w KQkq - 0 1 +qrnkrbbn/pppppppp/8/8/8/8/PPPPPPPP/QRNKRBBN w KQkq - 0 1 +qrknnbbr/pppppppp/8/8/8/8/PPPPPPPP/QRKNNBBR w KQkq - 0 1 +qrknrbbn/pppppppp/8/8/8/8/PPPPPPPP/QRKNRBBN w KQkq - 0 1 +qrkrnbbn/pppppppp/8/8/8/8/PPPPPPPP/QRKRNBBN w KQkq - 0 1 +nqnrkbbr/pppppppp/8/8/8/8/PPPPPPPP/NQNRKBBR w KQkq - 0 1 +nqrnkbbr/pppppppp/8/8/8/8/PPPPPPPP/NQRNKBBR w KQkq - 0 1 +nqrknbbr/pppppppp/8/8/8/8/PPPPPPPP/NQRKNBBR w KQkq - 0 1 +nqrkrbbn/pppppppp/8/8/8/8/PPPPPPPP/NQRKRBBN w KQkq - 0 1 +rqnnkbbr/pppppppp/8/8/8/8/PPPPPPPP/RQNNKBBR w KQkq - 0 1 +rqnknbbr/pppppppp/8/8/8/8/PPPPPPPP/RQNKNBBR w KQkq - 0 1 +rqnkrbbn/pppppppp/8/8/8/8/PPPPPPPP/RQNKRBBN w KQkq - 0 1 +rqknnbbr/pppppppp/8/8/8/8/PPPPPPPP/RQKNNBBR w KQkq - 0 1 +rqknrbbn/pppppppp/8/8/8/8/PPPPPPPP/RQKNRBBN w KQkq - 0 1 +rqkrnbbn/pppppppp/8/8/8/8/PPPPPPPP/RQKRNBBN w KQkq - 0 1 +nnqrkbbr/pppppppp/8/8/8/8/PPPPPPPP/NNQRKBBR w KQkq - 0 1)" +// MSVC can't handle string longer than 16,384 bytes +R"( +nrqnkbbr/pppppppp/8/8/8/8/PPPPPPPP/NRQNKBBR w KQkq - 0 1 +nrqknbbr/pppppppp/8/8/8/8/PPPPPPPP/NRQKNBBR w KQkq - 0 1 +nrqkrbbn/pppppppp/8/8/8/8/PPPPPPPP/NRQKRBBN w KQkq - 0 1 +rnqnkbbr/pppppppp/8/8/8/8/PPPPPPPP/RNQNKBBR w KQkq - 0 1 +rnqknbbr/pppppppp/8/8/8/8/PPPPPPPP/RNQKNBBR w KQkq - 0 1 +rnqkrbbn/pppppppp/8/8/8/8/PPPPPPPP/RNQKRBBN w KQkq - 0 1 +rkqnnbbr/pppppppp/8/8/8/8/PPPPPPPP/RKQNNBBR w KQkq - 0 1 +rkqnrbbn/pppppppp/8/8/8/8/PPPPPPPP/RKQNRBBN w KQkq - 0 1 +rkqrnbbn/pppppppp/8/8/8/8/PPPPPPPP/RKQRNBBN w KQkq - 0 1 +nnrqkbbr/pppppppp/8/8/8/8/PPPPPPPP/NNRQKBBR w KQkq - 0 1 +nrnqkbbr/pppppppp/8/8/8/8/PPPPPPPP/NRNQKBBR w KQkq - 0 1 +nrkqnbbr/pppppppp/8/8/8/8/PPPPPPPP/NRKQNBBR w KQkq - 0 1 +nrkqrbbn/pppppppp/8/8/8/8/PPPPPPPP/NRKQRBBN w KQkq - 0 1 +rnnqkbbr/pppppppp/8/8/8/8/PPPPPPPP/RNNQKBBR w KQkq - 0 1 +rnkqnbbr/pppppppp/8/8/8/8/PPPPPPPP/RNKQNBBR w KQkq - 0 1 +rnkqrbbn/pppppppp/8/8/8/8/PPPPPPPP/RNKQRBBN w KQkq - 0 1 +rknqnbbr/pppppppp/8/8/8/8/PPPPPPPP/RKNQNBBR w KQkq - 0 1 +rknqrbbn/pppppppp/8/8/8/8/PPPPPPPP/RKNQRBBN w KQkq - 0 1 +rkrqnbbn/pppppppp/8/8/8/8/PPPPPPPP/RKRQNBBN w KQkq - 0 1 +nnrkqbbr/pppppppp/8/8/8/8/PPPPPPPP/NNRKQBBR w KQkq - 0 1 +nrnkqbbr/pppppppp/8/8/8/8/PPPPPPPP/NRNKQBBR w KQkq - 0 1 +nrknqbbr/pppppppp/8/8/8/8/PPPPPPPP/NRKNQBBR w KQkq - 0 1 +nrkrqbbn/pppppppp/8/8/8/8/PPPPPPPP/NRKRQBBN w KQkq - 0 1 +rnnkqbbr/pppppppp/8/8/8/8/PPPPPPPP/RNNKQBBR w KQkq - 0 1 +rnknqbbr/pppppppp/8/8/8/8/PPPPPPPP/RNKNQBBR w KQkq - 0 1 +rnkrqbbn/pppppppp/8/8/8/8/PPPPPPPP/RNKRQBBN w KQkq - 0 1 +rknnqbbr/pppppppp/8/8/8/8/PPPPPPPP/RKNNQBBR w KQkq - 0 1 +rknrqbbn/pppppppp/8/8/8/8/PPPPPPPP/RKNRQBBN w KQkq - 0 1 +rkrnqbbn/pppppppp/8/8/8/8/PPPPPPPP/RKRNQBBN w KQkq - 0 1 +nnrkrbbq/pppppppp/8/8/8/8/PPPPPPPP/NNRKRBBQ w KQkq - 0 1 +nrnkrbbq/pppppppp/8/8/8/8/PPPPPPPP/NRNKRBBQ w KQkq - 0 1 +nrknrbbq/pppppppp/8/8/8/8/PPPPPPPP/NRKNRBBQ w KQkq - 0 1 +nrkrnbbq/pppppppp/8/8/8/8/PPPPPPPP/NRKRNBBQ w KQkq - 0 1 +rnnkrbbq/pppppppp/8/8/8/8/PPPPPPPP/RNNKRBBQ w KQkq - 0 1 +rnknrbbq/pppppppp/8/8/8/8/PPPPPPPP/RNKNRBBQ w KQkq - 0 1 +rnkrnbbq/pppppppp/8/8/8/8/PPPPPPPP/RNKRNBBQ w KQkq - 0 1 +rknnrbbq/pppppppp/8/8/8/8/PPPPPPPP/RKNNRBBQ w KQkq - 0 1 +rknrnbbq/pppppppp/8/8/8/8/PPPPPPPP/RKNRNBBQ w KQkq - 0 1 +rkrnnbbq/pppppppp/8/8/8/8/PPPPPPPP/RKRNNBBQ w KQkq - 0 1 +qnnrkrbb/pppppppp/8/8/8/8/PPPPPPPP/QNNRKRBB w KQkq - 0 1 +qnrnkrbb/pppppppp/8/8/8/8/PPPPPPPP/QNRNKRBB w KQkq - 0 1 +qnrknrbb/pppppppp/8/8/8/8/PPPPPPPP/QNRKNRBB w KQkq - 0 1 +qnrkrnbb/pppppppp/8/8/8/8/PPPPPPPP/QNRKRNBB w KQkq - 0 1 +qrnnkrbb/pppppppp/8/8/8/8/PPPPPPPP/QRNNKRBB w KQkq - 0 1 +qrnknrbb/pppppppp/8/8/8/8/PPPPPPPP/QRNKNRBB w KQkq - 0 1 +qrnkrnbb/pppppppp/8/8/8/8/PPPPPPPP/QRNKRNBB w KQkq - 0 1 +qrknnrbb/pppppppp/8/8/8/8/PPPPPPPP/QRKNNRBB w KQkq - 0 1 +qrknrnbb/pppppppp/8/8/8/8/PPPPPPPP/QRKNRNBB w KQkq - 0 1 +qrkrnnbb/pppppppp/8/8/8/8/PPPPPPPP/QRKRNNBB w KQkq - 0 1 +nqnrkrbb/pppppppp/8/8/8/8/PPPPPPPP/NQNRKRBB w KQkq - 0 1 +nqrnkrbb/pppppppp/8/8/8/8/PPPPPPPP/NQRNKRBB w KQkq - 0 1 +nqrknrbb/pppppppp/8/8/8/8/PPPPPPPP/NQRKNRBB w KQkq - 0 1 +nqrkrnbb/pppppppp/8/8/8/8/PPPPPPPP/NQRKRNBB w KQkq - 0 1 +rqnnkrbb/pppppppp/8/8/8/8/PPPPPPPP/RQNNKRBB w KQkq - 0 1 +rqnknrbb/pppppppp/8/8/8/8/PPPPPPPP/RQNKNRBB w KQkq - 0 1 +rqnkrnbb/pppppppp/8/8/8/8/PPPPPPPP/RQNKRNBB w KQkq - 0 1 +rqknnrbb/pppppppp/8/8/8/8/PPPPPPPP/RQKNNRBB w KQkq - 0 1 +rqknrnbb/pppppppp/8/8/8/8/PPPPPPPP/RQKNRNBB w KQkq - 0 1 +rqkrnnbb/pppppppp/8/8/8/8/PPPPPPPP/RQKRNNBB w KQkq - 0 1 +nnqrkrbb/pppppppp/8/8/8/8/PPPPPPPP/NNQRKRBB w KQkq - 0 1 +nrqnkrbb/pppppppp/8/8/8/8/PPPPPPPP/NRQNKRBB w KQkq - 0 1 +nrqknrbb/pppppppp/8/8/8/8/PPPPPPPP/NRQKNRBB w KQkq - 0 1 +nrqkrnbb/pppppppp/8/8/8/8/PPPPPPPP/NRQKRNBB w KQkq - 0 1 +rnqnkrbb/pppppppp/8/8/8/8/PPPPPPPP/RNQNKRBB w KQkq - 0 1 +rnqknrbb/pppppppp/8/8/8/8/PPPPPPPP/RNQKNRBB w KQkq - 0 1 +rnqkrnbb/pppppppp/8/8/8/8/PPPPPPPP/RNQKRNBB w KQkq - 0 1 +rkqnnrbb/pppppppp/8/8/8/8/PPPPPPPP/RKQNNRBB w KQkq - 0 1 +rkqnrnbb/pppppppp/8/8/8/8/PPPPPPPP/RKQNRNBB w KQkq - 0 1 +rkqrnnbb/pppppppp/8/8/8/8/PPPPPPPP/RKQRNNBB w KQkq - 0 1 +nnrqkrbb/pppppppp/8/8/8/8/PPPPPPPP/NNRQKRBB w KQkq - 0 1 +nrnqkrbb/pppppppp/8/8/8/8/PPPPPPPP/NRNQKRBB w KQkq - 0 1 +nrkqnrbb/pppppppp/8/8/8/8/PPPPPPPP/NRKQNRBB w KQkq - 0 1 +nrkqrnbb/pppppppp/8/8/8/8/PPPPPPPP/NRKQRNBB w KQkq - 0 1 +rnnqkrbb/pppppppp/8/8/8/8/PPPPPPPP/RNNQKRBB w KQkq - 0 1 +rnkqnrbb/pppppppp/8/8/8/8/PPPPPPPP/RNKQNRBB w KQkq - 0 1 +rnkqrnbb/pppppppp/8/8/8/8/PPPPPPPP/RNKQRNBB w KQkq - 0 1 +rknqnrbb/pppppppp/8/8/8/8/PPPPPPPP/RKNQNRBB w KQkq - 0 1 +rknqrnbb/pppppppp/8/8/8/8/PPPPPPPP/RKNQRNBB w KQkq - 0 1 +rkrqnnbb/pppppppp/8/8/8/8/PPPPPPPP/RKRQNNBB w KQkq - 0 1 +nnrkqrbb/pppppppp/8/8/8/8/PPPPPPPP/NNRKQRBB w KQkq - 0 1 +nrnkqrbb/pppppppp/8/8/8/8/PPPPPPPP/NRNKQRBB w KQkq - 0 1 +nrknqrbb/pppppppp/8/8/8/8/PPPPPPPP/NRKNQRBB w KQkq - 0 1 +nrkrqnbb/pppppppp/8/8/8/8/PPPPPPPP/NRKRQNBB w KQkq - 0 1 +rnnkqrbb/pppppppp/8/8/8/8/PPPPPPPP/RNNKQRBB w KQkq - 0 1 +rnknqrbb/pppppppp/8/8/8/8/PPPPPPPP/RNKNQRBB w KQkq - 0 1 +rnkrqnbb/pppppppp/8/8/8/8/PPPPPPPP/RNKRQNBB w KQkq - 0 1 +rknnqrbb/pppppppp/8/8/8/8/PPPPPPPP/RKNNQRBB w KQkq - 0 1 +rknrqnbb/pppppppp/8/8/8/8/PPPPPPPP/RKNRQNBB w KQkq - 0 1 +rkrnqnbb/pppppppp/8/8/8/8/PPPPPPPP/RKRNQNBB w KQkq - 0 1 +nnrkrqbb/pppppppp/8/8/8/8/PPPPPPPP/NNRKRQBB w KQkq - 0 1 +nrnkrqbb/pppppppp/8/8/8/8/PPPPPPPP/NRNKRQBB w KQkq - 0 1 +nrknrqbb/pppppppp/8/8/8/8/PPPPPPPP/NRKNRQBB w KQkq - 0 1 +nrkrnqbb/pppppppp/8/8/8/8/PPPPPPPP/NRKRNQBB w KQkq - 0 1 +rnnkrqbb/pppppppp/8/8/8/8/PPPPPPPP/RNNKRQBB w KQkq - 0 1 +rnknrqbb/pppppppp/8/8/8/8/PPPPPPPP/RNKNRQBB w KQkq - 0 1 +rnkrnqbb/pppppppp/8/8/8/8/PPPPPPPP/RNKRNQBB w KQkq - 0 1 +rknnrqbb/pppppppp/8/8/8/8/PPPPPPPP/RKNNRQBB w KQkq - 0 1 +rknrnqbb/pppppppp/8/8/8/8/PPPPPPPP/RKNRNQBB w KQkq - 0 1 +rkrnnqbb/pppppppp/8/8/8/8/PPPPPPPP/RKRNNQBB w KQkq - 0 1)"; + +std::vector Chess960StartingPositions() { + return absl::StrSplit(kChess960StartingFens, '\n'); +} + +} // namespace chess +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess_board.cc b/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess_board.cc new file mode 100644 index 0000000..3d0d6e2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess_board.cc @@ -0,0 +1,2081 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/chess/chess_board.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/ascii.h" +#include "open_spiel/abseil-cpp/absl/strings/match.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/games/chess/chess_common.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace chess { +namespace { +constexpr const char* kShredderWhiteCastlingFiles = "ABCDEFGH"; +constexpr const char* kShredderBlackCastlingFiles = "abcdefgh"; +} + +bool IsMoveCharacter(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9'); +} + +std::pair SplitAnnotations(const std::string &move) { + for (int i = 0; i < move.size(); ++i) { + if (!IsMoveCharacter(move[i])) { + return {move.substr(0, i), std::string(absl::ClippedSubstr(move, i))}; + } + } + return {move, ""}; +} + +std::string ColorToString(Color c) { + switch (c) { + case Color::kBlack: + return "black"; + case Color::kWhite: + return "white"; + case Color::kEmpty: + return "empty"; + default: + SpielFatalError(absl::StrCat("Unknown color: ", c)); + return "This will never return."; + } +} + +absl::optional PieceTypeFromChar(char c) { + switch (toupper(c)) { + case 'P': + return PieceType::kPawn; + case 'N': + return PieceType::kKnight; + case 'B': + return PieceType::kBishop; + case 'R': + return PieceType::kRook; + case 'Q': + return PieceType::kQueen; + case 'K': + return PieceType::kKing; + default: + std::cerr << "Invalid piece type: " << c << std::endl; + return absl::nullopt; + } +} + +std::string PieceTypeToString(PieceType p, bool uppercase) { + switch (p) { + case PieceType::kEmpty: + return " "; + case PieceType::kPawn: + return uppercase ? "P" : "p"; + case PieceType::kKnight: + return uppercase ? "N" : "n"; + case PieceType::kBishop: + return uppercase ? "B" : "b"; + case PieceType::kRook: + return uppercase ? "R" : "r"; + case PieceType::kQueen: + return uppercase ? "Q" : "q"; + case PieceType::kKing: + return uppercase ? "K" : "k"; + default: + SpielFatalError("Unknown piece."); + return "This will never return."; + } +} + +std::string Piece::ToUnicode() const { + switch (color) { + case Color::kBlack: + switch (type) { + case PieceType::kEmpty: + return " "; + case PieceType::kPawn: + return "♟"; + case PieceType::kKnight: + return "♞"; + case PieceType::kBishop: + return "♝"; + case PieceType::kRook: + return "♜"; + case PieceType::kQueen: + return "♛"; + case PieceType::kKing: + return "♚"; + default: + SpielFatalError("Unknown piece."); + return "This will never return."; + } + case Color::kWhite: + switch (type) { + case PieceType::kEmpty: + return " "; + case PieceType::kPawn: + return "♙"; + case PieceType::kKnight: + return "♘"; + case PieceType::kBishop: + return "♗"; + case PieceType::kRook: + return "♖"; + case PieceType::kQueen: + return "♕"; + case PieceType::kKing: + return "♔"; + default: + SpielFatalError("Unknown piece type."); + return "This will never return."; + } + case Color::kEmpty: + return " "; + default: + SpielFatalError("Unknown color."); + return "This will never return."; + } +} + +std::string Piece::ToString() const { + std::string base = PieceTypeToString(type); + return color == Color::kWhite ? absl::AsciiStrToUpper(base) + : absl::AsciiStrToLower(base); +} + +absl::optional SquareFromString(const std::string &s) { + if (s.size() != 2) return kInvalidSquare; + + auto file = ParseFile(s[0]); + auto rank = ParseRank(s[1]); + if (file && rank) return Square{*file, *rank}; + return absl::nullopt; +} + +bool IsLongDiagonal(const chess::Square &from_sq, const chess::Square &to_sq, + int board_size) { + if (from_sq == to_sq) { + return false; + } + int half_board_size = board_size / 2; + if ((to_sq.y < half_board_size && to_sq.x < half_board_size) || + (to_sq.y >= half_board_size && to_sq.x >= half_board_size)) { + return from_sq.y - to_sq.y == from_sq.x - to_sq.x; + } else { + return from_sq.y - to_sq.y == to_sq.x - from_sq.x; + } +} + +std::string Move::ToString() const { + std::string extra; + if (promotion_type != PieceType::kEmpty) { + absl::StrAppend(&extra, ", promotion to ", + PieceTypeToString(promotion_type)); + } + if (is_castling()) { + absl::StrAppend(&extra, " (castle)"); + } + return absl::StrCat(piece.ToString(), " ", SquareToString(from), " to ", + SquareToString(to), extra); +} + +std::string Move::ToLAN(bool chess960, + const ChessBoard *board_ptr) const { + if (chess960 && is_castling()) { + // In chess960, when castling, the LAN format is different. It includes the + // it is castling with. + SPIEL_CHECK_TRUE(board_ptr != nullptr); + Color to_play = board_ptr->ToPlay(); + absl::optional maybe_rook_sq = board_ptr->MaybeCastlingRookSquare( + to_play, castle_dir); + SPIEL_CHECK_TRUE(maybe_rook_sq.has_value()); + return absl::StrCat(SquareToString(from), + SquareToString(maybe_rook_sq.value())); + } else { + std::string promotion; + if (promotion_type != PieceType::kEmpty) { + promotion = PieceTypeToString(promotion_type, false); + } + return absl::StrCat(SquareToString(from), SquareToString(to), promotion); + } +} + +std::string Move::ToSAN(const ChessBoard &board) const { + std::string move_text; + PieceType piece_type = board.at(from).type; + if (is_castling()) { + if (castle_dir == CastlingDirection::kRight) { + move_text = "O-O"; + } else if (castle_dir == CastlingDirection::kLeft) { + move_text = "O-O-O"; + } else { + SpielFatalError("Unknown castling direction."); + } + } else { + switch (piece_type) { + case PieceType::kKing: + case PieceType::kQueen: + case PieceType::kRook: + case PieceType::kBishop: + case PieceType::kKnight: + move_text += PieceTypeToString(piece_type); + break; + case PieceType::kPawn: + // No piece type required. + break; + case PieceType::kEmpty: + std::cerr << "Move doesn't have a piece type" << std::endl; + } + + // Now we generate all moves from this position, and see if our file and + // rank are unique. + bool file_unique = true; + bool rank_unique = true; + bool disambiguation_required = false; + + board.GenerateLegalMoves([&](const Move &move) -> bool { + if (move.piece.type != piece.type) { + return true; // Continue generating moves. + } + if (move.to != to) { + return true; + } + if (move.from == from) { + // This is either us, or a promotion to a different type. We don't count + // them as ambiguous in either case. + return true; + } + disambiguation_required = true; + if (move.from.x == from.x) { + file_unique = false; + } else if (move.from.y == from.y) { + rank_unique = false; + } + return true; + }); + + bool file_required = false; + bool rank_required = false; + + if (piece_type == PieceType::kPawn && from.x != to.x) { + // Pawn captures always need file, and they will never require rank dis- + // ambiguation. + file_required = true; + } else if (disambiguation_required) { + if (file_unique) { + // This includes when both will disambiguate, in which case we have to + // use file. [FIDE Laws of Chess (2018): C.10.3]. + file_required = true; + } else if (rank_unique) { + rank_required = true; + } else { + // We have neither unique file nor unique rank. This is only possible + // with 3 or more pieces of the same type. + file_required = true; + rank_required = true; + } + } + + if (file_required) { + absl::StrAppend(&move_text, FileToString(from.x)); + } + + if (rank_required) { + absl::StrAppend(&move_text, RankToString(from.y)); + } + + // We have a capture if either 1) the destination square has a piece, or + // 2) we are making a diagonal pawn move (which can also be an en-passant + // capture, where the destination square would not have a piece). + auto piece_at_to_square = board.at(to); + if ((piece_at_to_square.type != PieceType::kEmpty) || + (piece_type == PieceType::kPawn && from.x != to.x)) { + absl::StrAppend(&move_text, "x"); + } + + // Destination square is always fully encoded. + absl::StrAppend(&move_text, SquareToString(to)); + + // Encode the promotion type if we have a promotion. + switch (promotion_type) { + case PieceType::kEmpty: + break; + case PieceType::kQueen: + case PieceType::kRook: + case PieceType::kBishop: + case PieceType::kKnight: + absl::StrAppend(&move_text, "=", PieceTypeToString(promotion_type)); + break; + case PieceType::kKing: + case PieceType::kPawn: + std::cerr << "Cannot promote to " << PieceTypeToString(promotion_type) + << "! Only Q, R, B, N are allowed" << std::endl; + break; + } + } + + // Figure out if this is a check / checkmating move or not. + if (!board.KingInCheckAllowed()) { + auto board_copy = board; + board_copy.ApplyMove(*this); + if (board_copy.InCheck()) { + bool has_escape = false; + board_copy.GenerateLegalMoves([&](const Move &) -> bool { + has_escape = true; + return false; // No need to keep generating moves. + }); + + if (has_escape) { + // Check. + absl::StrAppend(&move_text, "+"); + } else { + // Checkmate. + absl::StrAppend(&move_text, "#"); + } + } + } + + return move_text; +} + +ChessBoard::ChessBoard(int board_size, bool king_in_check_allowed, + bool allow_pass_move) + : board_size_(board_size), + king_in_check_allowed_(king_in_check_allowed), + allow_pass_move_(allow_pass_move), + to_play_(Color::kWhite), + ep_square_(kInvalidSquare), + irreversible_move_counter_(0), + move_number_(1), + zobrist_hash_(0) { + board_.fill(kEmptyPiece); +} + +/*static*/ absl::optional ChessBoard::BoardFromFEN( + const std::string &fen, int board_size, + bool king_in_check_allowed, bool allow_pass_move) { + /* An FEN string includes a board position, side to play, castling + * rights, ep square, 50 moves clock, and full move number. In that order. + * + * Eg. start position is: + * rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1 + * + * Board is described from rank 8 to rank 1, and files from a to h. Empty + * squares are encoded as number of consecutive empty squares. + * + * Many FEN strings don't have the last two fields. + */ + ChessBoard board(board_size, king_in_check_allowed, allow_pass_move); + + std::vector fen_parts = absl::StrSplit(fen, ' '); + + if (fen_parts.size() != 6 && fen_parts.size() != 4) { + std::cerr << "Invalid FEN: " << fen << std::endl; + return absl::nullopt; + } + + std::string &piece_configuration = fen_parts[0]; + std::string &side_to_move = fen_parts[1]; + std::string &castling_rights = fen_parts[2]; + std::string &ep_square = fen_parts[3]; + + // These are defaults if the FEN string doesn't have these fields. + std::string fifty_clock = "0"; + std::string move_number = "1"; + + if (fen_parts.size() == 6) { + fifty_clock = fen_parts[4]; + move_number = fen_parts[5]; + } + + std::vector piece_config_by_rank = + absl::StrSplit(piece_configuration, '/'); + + for (int8_t current_y = board_size - 1; current_y >= 0; --current_y) { + std::string &rank = piece_config_by_rank[board_size - current_y - 1]; + int8_t current_x = 0; + for (char c : rank) { + if (current_x >= board_size) { + std::cerr << "Too many things on FEN rank: " << rank << std::endl; + return absl::nullopt; + } + + if (c >= '1' && c <= '8') { + current_x += c - '0'; + } else { + auto piece_type = PieceTypeFromChar(c); + if (!piece_type) { + std::cerr << "Invalid piece type in FEN: " << c << std::endl; + return absl::nullopt; + } + + Color color = isupper(c) ? Color::kWhite : Color::kBlack; + board.set_square(Square{current_x, current_y}, + Piece{color, *piece_type}); + + ++current_x; + } + } + } + + if (side_to_move == "b") { + board.SetToPlay(Color::kBlack); + } else if (side_to_move == "w") { + board.SetToPlay(Color::kWhite); + } else { + std::cerr << "Invalid side to move in FEN: " << side_to_move << std::endl; + return absl::nullopt; + } + + // Castling rights are done differently in standard FEN versus shredder FEN. + // https://www.chessprogramming.org/Forsyth-Edwards_Notation#Shredder-FEN. + // + // If we have a castling right, we look for a rook in that position. In + // chess960 there must be a rook on either side of the king, but all 3 can + // otherwise be in any square. When using the standard notations ("KQkq"): if + // we find one rook on that side, that is used as the castling square. + // Otherwise we use capital letters corresponding to the file of the rook + // that can castle. E.g. "Hkq" would mean white can castle (which side depends + // on which file the white king is on), and black can castle on both sides. + if (castling_rights.find('K') != std::string::npos) { // NOLINT + Square rook_sq = + board.FindRookForCastling(Color::kWhite, CastlingDirection::kRight); + board.SetCastlingRight(Color::kWhite, CastlingDirection::kRight, rook_sq); + } + + if (castling_rights.find('Q') != std::string::npos) { // NOLINT + Square rook_sq = + board.FindRookForCastling(Color::kWhite, CastlingDirection::kLeft); + board.SetCastlingRight(Color::kWhite, CastlingDirection::kLeft, rook_sq); + } + + if (castling_rights.find('k') != std::string::npos) { // NOLINT + Square rook_sq = + board.FindRookForCastling(Color::kBlack, CastlingDirection::kRight); + board.SetCastlingRight(Color::kBlack, CastlingDirection::kRight, rook_sq); + } + + if (castling_rights.find('q') != std::string::npos) { // NOLINT + Square rook_sq = + board.FindRookForCastling(Color::kBlack, CastlingDirection::kLeft); + board.SetCastlingRight(Color::kBlack, CastlingDirection::kLeft, rook_sq); + } + + // Now check each character for the Shredder-based castling rights. These will + // be supported for regular chess but is only necessary for Chess960. + // Checking these here in addition to the above allows for a combination of + // Shredder and standard FEN notation for castling, e.g. "Gkq", which is + // sometimes used (see e.g. the following example): + // https://chess.stackexchange.com/questions/19331/how-does-x-fen-chess960-fen-differentiate-from-traditional-fen-notation + for (char c : castling_rights) { + for (Color color : {Color::kWhite, Color::kBlack}) { + std::string shredder_castling_files( + color == Color::kWhite ? kShredderWhiteCastlingFiles + : kShredderBlackCastlingFiles); + Square king_square = board.find(Piece{color, PieceType::kKing}); + size_t idx = shredder_castling_files.find(c); + if (idx != std::string::npos) { + CastlingDirection direction = idx > king_square.x ? + CastlingDirection::kRight : CastlingDirection::kLeft; + Square rook_sq{static_cast(idx), king_square.y}; + SPIEL_CHECK_TRUE(board.at(rook_sq).type == PieceType::kRook); + SPIEL_CHECK_TRUE(board.at(rook_sq).color == color); + board.SetCastlingRight(color, direction, rook_sq); + } + } + } + + if (ep_square != "-") { + auto maybe_ep_square = SquareFromString(ep_square); + if (!maybe_ep_square) { + std::cerr << "Invalid en passant square in FEN: " << ep_square + << std::endl; + return absl::nullopt; + } + // Only set the en-passant square if it's being threatened. This is to + // prevent changing the hash of the board for the purposes of the + // repetition rule. + if (board.EpSquareThreatened(*maybe_ep_square)) { + board.SetEpSquare(*maybe_ep_square); + } + } + + board.SetIrreversibleMoveCounter(std::stoi(fifty_clock)); + board.SetMovenumber(std::stoi(move_number)); + + return board; +} + +Square ChessBoard::find(const Piece &piece) const { + for (int8_t y = 0; y < board_size_; ++y) { + for (int8_t x = 0; x < board_size_; ++x) { + Square sq{x, y}; + if (at(sq) == piece) { + return sq; + } + } + } + + return kInvalidSquare; +} + +void ChessBoard::GenerateLegalMoves(const MoveYieldFn &yield, + Color color) const { + // We do not need to filter moves that would result for King to move / stay + // in check, so we can yield all pseudo legal moves + if (king_in_check_allowed_) { + GeneratePseudoLegalMoves(yield, color); + } else { + auto king_square = find(Piece{color, PieceType::kKing}); + + GeneratePseudoLegalMoves( + [this, &king_square, &yield, color](const Move &move) { + // See if the move is legal by applying, checking whether the king is + // under attack, and undoing the move. + // TODO: Optimize this. + auto board_copy = *this; + board_copy.ApplyMove(move); + + auto ks = + at(move.from).type == PieceType::kKing ? move.to : king_square; + + if (board_copy.UnderAttack(ks, color)) { + return true; + } else { + return yield(move); + } + }, + color); + } +} + +void ChessBoard::GeneratePseudoLegalMoves( + const MoveYieldFn &yield, Color color, + PseudoLegalMoveSettings settings) const { + bool generating = true; + +#define YIELD(move) \ + if (!yield(move)) { \ + generating = false; \ + } + + if (allow_pass_move_) YIELD(kPassMove); + + for (int8_t y = 0; y < board_size_ && generating; ++y) { + for (int8_t x = 0; x < board_size_ && generating; ++x) { + Square sq{x, y}; + auto &piece = at(sq); + if (piece.type != PieceType::kEmpty && piece.color == color) { + switch (piece.type) { + case PieceType::kKing: + GenerateKingDestinations_( + sq, color, + [&yield, &piece, &sq, &generating](const Square &to) { + YIELD(Move(sq, to, piece)); + }); + GenerateCastlingDestinations_( + sq, color, settings, + [&yield, &piece, &sq, &generating](const Square &to) { + if (to.x == 2) { + YIELD(Move(sq, to, piece, PieceType::kEmpty, + CastlingDirection::kLeft)); + } else if (to.x == 6) { + YIELD(Move(sq, to, piece, PieceType::kEmpty, + CastlingDirection::kRight)); + } + }); + break; + case PieceType::kQueen: + GenerateQueenDestinations_( + sq, color, settings, + [&yield, &sq, &piece, &generating](const Square &to) { + YIELD(Move(sq, to, piece)); + }); + break; + case PieceType::kRook: + GenerateRookDestinations_( + sq, color, settings, + [&yield, &sq, &piece, &generating](const Square &to) { + YIELD(Move(sq, to, piece)); + }); + break; + case PieceType::kBishop: + GenerateBishopDestinations_( + sq, color, settings, + [&yield, &sq, &piece, &generating](const Square &to) { + YIELD(Move(sq, to, piece)); + }); + break; + case PieceType::kKnight: + GenerateKnightDestinations_( + sq, color, + [&yield, &sq, &piece, &generating](const Square &to) { + YIELD(Move(sq, to, piece)); + }); + break; + case PieceType::kPawn: + GeneratePawnDestinations_( + sq, color, settings, + [&yield, &sq, &piece, &generating, this](const Square &to) { + if (IsPawnPromotionRank(to)) { + YIELD(Move(sq, to, piece, PieceType::kQueen)); + YIELD(Move(sq, to, piece, PieceType::kRook)); + YIELD(Move(sq, to, piece, PieceType::kBishop)); + YIELD(Move(sq, to, piece, PieceType::kKnight)); + } else { + YIELD(Move(sq, to, piece)); + } + }); + GeneratePawnCaptureDestinations_( + sq, color, settings, true, /* include enpassant */ + [&yield, &sq, &piece, &generating, this](const Square &to) { + if (IsPawnPromotionRank(to)) { + YIELD(Move(sq, to, piece, PieceType::kQueen)); + YIELD(Move(sq, to, piece, PieceType::kRook)); + YIELD(Move(sq, to, piece, PieceType::kBishop)); + YIELD(Move(sq, to, piece, PieceType::kKnight)); + } else { + YIELD(Move(sq, to, piece)); + } + }); + break; + default: + std::cerr << "Unknown piece type: " << static_cast(piece.type) + << std::endl; + } + } + } + } + +#undef YIELD +} + +void ChessBoard::GenerateLegalPawnCaptures(const MoveYieldFn &yield, + Color color) const { + // We do not need to filter moves that would result for King to move / stay + // in check, so we can yield all pseudo legal moves + if (king_in_check_allowed_) { + GeneratePseudoLegalPawnCaptures(yield, color); + } else { + auto king_square = find(Piece{color, PieceType::kKing}); + + GeneratePseudoLegalPawnCaptures( + [this, &king_square, &yield, color](const Move &move) { + // See if the move is legal by applying, checking whether the king is + // under attack, and undoing the move. + // TODO: Optimize this. + auto board_copy = *this; + board_copy.ApplyMove(move); + + auto ks = + at(move.from).type == PieceType::kKing ? move.to : king_square; + + if (board_copy.UnderAttack(ks, color)) { + return true; + } else { + return yield(move); + } + }, + color); + } +} + +void ChessBoard::GeneratePseudoLegalPawnCaptures( + const MoveYieldFn &yield, Color color, + PseudoLegalMoveSettings settings) const { + bool generating = true; + +#define YIELD(move) \ + if (!yield(move)) { \ + generating = false; \ + } + + for (int8_t y = 0; y < board_size_ && generating; ++y) { + for (int8_t x = 0; x < board_size_ && generating; ++x) { + Square sq{x, y}; + auto &piece = at(sq); + if (piece.type == PieceType::kPawn && piece.color == color) { + GeneratePawnCaptureDestinations_( + sq, color, settings, true, /* include enpassant */ + [&yield, &sq, &piece, &generating, this](const Square &to) { + if (IsPawnPromotionRank(to)) { + YIELD(Move(sq, to, piece, PieceType::kQueen)); + YIELD(Move(sq, to, piece, PieceType::kRook)); + YIELD(Move(sq, to, piece, PieceType::kBishop)); + YIELD(Move(sq, to, piece, PieceType::kKnight)); + } else { + YIELD(Move(sq, to, piece)); + } + }); + } + } + } + +#undef YIELD +} + +bool ChessBoard::IsBreachingMove(Move tested_move) const { + if (tested_move == kPassMove) return false; + + const Piece& piece = tested_move.piece; + if (piece.type == PieceType::kEmpty) return false; + if (piece.type == PieceType::kKnight) return false; + if (piece.type == PieceType::kPawn) return false; + // King never makes breaching moves: a castling that would be breaching + // is considered an illegal move. + if (piece.type == PieceType::kKing) return false; + + SPIEL_DCHECK_TRUE(piece.type == PieceType::kQueen || + piece.type == PieceType::kRook || + piece.type == PieceType::kBishop); + + // The move is not breaching, if it is generated with + // PseudoLegalMoveSettings::kAcknowledgeEnemyPieces + + bool is_breaching = true; + const auto check_breaching = [&](const Square &to) { + if (to == tested_move.to) is_breaching = false; + }; + + // Queen moves are a combination of rook and bishop moves. + if (piece.type == PieceType::kRook || piece.type == PieceType::kQueen) { + GenerateRookDestinations_(tested_move.from, piece.color, + kAcknowledgeEnemyPieces, check_breaching); + } + if (piece.type == PieceType::kBishop || piece.type == PieceType::kQueen) { + GenerateBishopDestinations_(tested_move.from, piece.color, + kAcknowledgeEnemyPieces, check_breaching); + } + + return is_breaching; +} + +void ChessBoard::BreachingMoveToCaptureMove(Move* move) const { + SPIEL_CHECK_TRUE(move); + SPIEL_DCHECK_TRUE(IsBreachingMove(*move)); + int dx = move->to.x - move->from.x; + int dy = move->to.y - move->from.y; + SPIEL_DCHECK_TRUE(dx == 0 || dy == 0 || std::abs(dx) == std::abs(dy)); + + // Cap values to [-1, 1] range to make a proper step size. + dx = std::max(-1, dx); + dx = std::min(1, dx); + dy = std::max(-1, dy); + dy = std::min(1, dy); + const Offset step{static_cast(dx), + static_cast(dy)}; + + Square sq; + for (sq = move->from + step; sq != move->to; sq += step) { + if (at(sq).type != PieceType::kEmpty) break; + } + move->to = sq; +} + +bool ChessBoard::HasSufficientMaterial() const { + // Try to detect these 4 conditions. + // 1. K vs K + // 2. K+B vs K + // 3. K+N vs K + // 4. K+B* vs K+B* (all bishops on same coloured squares) + + // If king is allowed to move to/stay in check, any material is sufficient + // material. If there is no material, then there is also no opponent king and + // that means the game had already ended. + if (king_in_check_allowed_) { + return true; + } + + // Indexed by colour. + int knights[2] = {0, 0}; + int dark_bishops[2] = {0, 0}; + int light_bishops[2] = {0, 0}; + + for (int8_t y = 0; y < board_size_; ++y) { + for (int8_t x = 0; x < board_size_; ++x) { + const auto &piece = at(Square{x, y}); + // If we have a queen, rook, or pawn, we have sufficient material. + // This is early exit for almost all positions. We check rooks first + // because they tend to appear on the corners of boards. + if (piece.color != Color::kEmpty) { + if (piece.type == PieceType::kRook || piece.type == PieceType::kPawn || + piece.type == PieceType::kQueen) { + return true; + } + + // We don't care about kings. + if (piece.type == PieceType::kKing) { + continue; + } + + if (piece.type == PieceType::kKnight) { + ++knights[static_cast(piece.color)]; + } + + if (piece.type == PieceType::kBishop) { + bool is_dark = ((x + y) % 2 == 0); + if (is_dark) { + ++dark_bishops[static_cast(piece.color)]; + } else { + ++light_bishops[static_cast(piece.color)]; + } + } + } + } + } + + // Having two knights allows helpmate. + if (knights[0] > 1 || knights[1] > 1) { + return true; + } + + if (knights[0] == 1) { + // If we have anything else, mate is possible. + if (light_bishops[0] > 0 || dark_bishops[0] > 0) { + return true; + } else { + // If one side only has a knight, the other side must have something (#3). + return knights[1] > 0 || dark_bishops[1] > 0 || light_bishops[1] > 0; + } + } + + if (knights[1] == 1) { + // If we have anything else, mate is possible. + if (light_bishops[1] > 0 || dark_bishops[1] > 0) { + return true; + } else { + // If one side only has a knight, the other side must have something (#3). + return knights[0] > 0 || dark_bishops[0] > 0 || light_bishops[0] > 0; + } + } + + // Now we only have bishops and kings. We must have two bishops on opposite + // coloured squares (from either side) to not be a draw. + // This covers #1, #2, and #4. + bool dark_bishop_exists = (dark_bishops[0] + dark_bishops[1]) > 0; + bool light_bishop_exists = (light_bishops[0] + light_bishops[1]) > 0; + return dark_bishop_exists && light_bishop_exists; +} + +absl::optional ChessBoard::ParseMove(const std::string &move, + bool chess960) const { + // First see if they are in the long form - + // "anan" (eg. "e2e4") or "anana" (eg. "f7f8q") + // SAN moves will never have this form because an SAN move that starts with + // a lowercase letter must be a pawn move, and pawn moves will never require + // rank disambiguation (meaning the second character will never be a number). + auto lan_move = ParseLANMove(move, chess960); + if (lan_move) { + return lan_move; + } + + auto san_move = ParseSANMove(move); + if (san_move) { + return san_move; + } + + return absl::nullopt; +} + +absl::optional ChessBoard::ParseSANMove( + const std::string &move_str) const { + std::string move = move_str; + + if (move.empty()) return absl::nullopt; + + if (absl::StartsWith(move, "O-O-O")) { + // Queenside / left castling. + std::vector candidates; + GenerateLegalMoves([&candidates](const Move &move) { + if (move.is_castling() && move.to.x == 2) { + candidates.push_back(move); + } + return true; + }); + if (candidates.size() == 1) return candidates[0]; + std::cerr << "Invalid O-O-O" << std::endl; + return absl::nullopt; + } + + if (absl::StartsWith(move, "O-O")) { + // Kingside / right castling. + std::vector candidates; + GenerateLegalMoves([&candidates](const Move &move) { + if (move.is_castling() && move.to.x == 6) { + candidates.push_back(move); + } + return true; + }); + if (candidates.size() == 1) return candidates[0]; + std::cerr << "Invalid O-O" << std::endl; + return absl::nullopt; + } + + auto move_annotation = SplitAnnotations(move); + move = move_annotation.first; + if (move.empty()) { return absl::nullopt; } + + auto annotation = move_annotation.second; + + // A move starts with a single letter identifying the piece. This may be + // omitted for pawns. + PieceType piece_type = PieceType::kPawn; + std::string pieces = "PNBRQK"; + if (pieces.find(move[0]) != std::string::npos) { // NOLINT + auto maybe_piece_type = PieceTypeFromChar(move[0]); + if (!maybe_piece_type) { + std::cerr << "Invalid piece type: " << move[0] << std::endl; + return absl::nullopt; + } + piece_type = *maybe_piece_type; + move = std::string(absl::ClippedSubstr(move, 1)); + } + + // A move always ends with the destination square. + if (move.size() < 2) { + std::cerr << "Missing destination square" << std::endl; + return absl::nullopt; + } + auto destination = std::string(absl::ClippedSubstr(move, move.size() - 2)); + move = move.substr(0, move.size() - 2); + + auto dest_file = ParseFile(destination[0]); + auto dest_rank = ParseRank(destination[1]); + + if (!dest_file || !dest_rank) { + std::cerr << "Failed to parse destination square: " << destination + << std::endl; + return absl::nullopt; + } + + Square destination_square{*dest_file, *dest_rank}; + + // Captures are indicated by a 'x' immediately preceding the destination. + // This is irrelevant for parsing, so we just drop it. + if (!move.empty() && move[move.size() - 1] == 'x') { + move = move.substr(0, move.size() - 1); + } + + // If necessary, source rank and/or file are also included for + // disambiguation. + absl::optional source_file, source_rank; + if (!move.empty()) { + source_file = ParseFile(move[0]); + if (source_file) { + move = std::string(absl::ClippedSubstr(move, 1)); + } + } + if (!move.empty()) { + source_rank = ParseRank(move[0]); + if (source_rank) { + move = std::string(absl::ClippedSubstr(move, 1)); + } + } + + if (!move.empty()) { return absl::nullopt; } + + // Pawn promations are annotated with =Q to indicate the promotion type. + absl::optional promotion_type; + if (!annotation.empty() && annotation[0] == '=') { + if (annotation.size() < 2) { return absl::nullopt; } + auto maybe_piece = PieceTypeFromChar(annotation[1]); + if (!maybe_piece) return absl::optional(); + promotion_type = maybe_piece; + } + + std::vector candidates; + GenerateLegalMoves([&candidates, destination_square, piece_type, source_file, + source_rank, promotion_type, this](const Move &move) { + PieceType moving_piece_type = at(move.from).type; + if (move.to == destination_square && moving_piece_type == piece_type && + (!source_file || move.from.x == *source_file) && + (!source_rank || move.from.y == *source_rank) && + (!promotion_type || move.promotion_type == *promotion_type)) { + candidates.push_back(move); + } + return true; + }); + + if (candidates.size() == 1) return candidates[0]; + std::cerr << "expected exactly one matching move, got " << candidates.size() + << std::endl; + return absl::optional(); +} + +absl::optional ChessBoard::ParseLANMove(const std::string &move, + bool chess960) const { + if (move.empty()) { return absl::nullopt; } + + // Long algebraic notation moves (of the variant we care about) are in one of + // two forms - + // "anan" (eg. "e2e4") or "anana" (eg. "f7f8q") + if (move.size() == 4 || move.size() == 5) { + if (move[0] < 'a' || move[0] >= ('a' + board_size_) || move[1] < '1' || + move[1] >= ('1' + board_size_) || move[2] < 'a' || + move[2] >= ('a' + board_size_) || move[3] < '1' || + move[3] >= ('1' + board_size_)) { + return absl::nullopt; + } + + if (move.size() == 5 && move[4] != 'q' && move[4] != 'r' && + move[4] != 'b' && move[4] != 'n') { + return absl::nullopt; + } + + auto from = SquareFromString(move.substr(0, 2)); + auto to = SquareFromString(std::string(absl::ClippedSubstr(move, 2, 2))); + if (from && to) { + absl::optional promotion_type; + if (move.size() == 5) { + promotion_type = PieceTypeFromChar(move[4]); + if (!promotion_type) { + std::cerr << "Invalid promotion type" << std::endl; + return absl::nullopt; + } + } + + // Castling in chess960 is a special case, expressed in LAN as + // . + if (chess960 && at(*from).color == at(*to).color && + at(*from).type == PieceType::kKing && + at(*to).type == PieceType::kRook) { + std::vector candidates; + GenerateLegalMoves( + [&from, &candidates](const Move &move) { + if (move.from == *from && move.is_castling()) { + candidates.push_back(move); + } + return true; + }); + + Color moving_color = at(*from).color; + for (const Move& move : candidates) { + auto maybe_castle_rook_sq = MaybeCastlingRookSquare( + moving_color, move.castle_dir); + if (maybe_castle_rook_sq.has_value() && + *maybe_castle_rook_sq == *to) { + return move; + } + } + std::cerr << "Could not match chess960 castling move with a legal move " + << move << std::endl; + std::cerr << *this << std::endl; + return Move(); + } + + // Other regular moves. + std::vector candidates; + GenerateLegalMoves( + [&to, &from, &promotion_type, &candidates](const Move &move) { + if (move.from == *from && move.to == *to && + (!promotion_type || (move.promotion_type == *promotion_type))) { + candidates.push_back(move); + } + return true; + }); + + if (chess960) { + // Chess960: Remove the castling moves as we checked for them in the + // special case above. + candidates.erase(std::remove_if(candidates.begin(), candidates.end(), + [](const Move &move) { + return move.is_castling(); + }), + candidates.end()); + } + + if (candidates.empty()) { + std::cerr << "Illegal move - " << move << " on " << ToUnicodeString() + << std::endl; + return Move(); + } else if (candidates.size() > 1) { + std::cerr << "Multiple matches (is promotion type missing?) - " << move + << std::endl; + return Move(); + } + + return candidates[0]; + } + } else { + return absl::nullopt; + } + SpielFatalError("All conditionals failed; this is a bug."); +} + +void ChessBoard::ApplyMove(const Move &move) { + // Skip applying a move if it's a pass. + if (move == kPassMove) { + if (to_play_ == Color::kBlack) ++move_number_; + SetToPlay(OppColor(to_play_)); + SetEpSquare(chess::kInvalidSquare); + return; + } + + // Most moves are simple - we remove the moving piece from the original + // square, and put it on the destination square, overwriting whatever was + // there before, update the 50 move counter, and update castling rights. + // + // There are a few exceptions - castling, en passant, promotions, double pawn + // pushes. They require special adjustments in addition to those things. We + // do them after the basic apply move. + + Piece moving_piece = at(move.from); + Piece destination_piece = at(move.to); + + // We have to do it in this order because in Chess960 the king can castle + // in-place! That's the only possibility for move.from == move.to. + set_square(move.from, kEmptyPiece); + set_square(move.to, moving_piece); + + // Whether the move is irreversible for the purpose of the 50-moves rule. Note + // that although castling (and losing castling rights) should be irreversible, + // it is counted as reversible here. + // Irreversible moves are pawn moves and captures. We don't have to make a + // special case for en passant, since they are pawn moves anyways. + // Note that the capture case has to check that the piece is of the opposite + // color, since in chess960 the king can castle with the rook in the + // destination square. + bool irreversible = + (moving_piece.type == PieceType::kPawn) || // pawn move + (destination_piece.type != PieceType::kEmpty && + destination_piece.color != moving_piece.color); // capture + + if (irreversible) { + SetIrreversibleMoveCounter(0); + } else { + SetIrreversibleMoveCounter(IrreversibleMoveCounter() + 1); + } + + // Castling rights can be lost in a few different ways - + // 1. The king moves (loses both rights), including castling. We do this later + // since we still need the rook locations in case this is a castle. + // 2. A rook moves (loses the right on that side). + // 3. Captures an opponent rook (OPPONENT loses the right on that side). + if (moving_piece.type == PieceType::kRook) { + if (castling_rights_[ToInt(to_play_)].left_castle.has_value() && + *castling_rights_[ToInt(to_play_)].left_castle == move.from) { + SetCastlingRight(to_play_, CastlingDirection::kLeft, absl::nullopt); + } else if (castling_rights_[ToInt(to_play_)].right_castle.has_value() && + *castling_rights_[ToInt(to_play_)].right_castle == move.from) { + SetCastlingRight(to_play_, CastlingDirection::kRight, absl::nullopt); + } + } + if (destination_piece.type == PieceType::kRook) { + if (castling_rights_[ToInt(OppColor(to_play_))].left_castle.has_value() && + *castling_rights_[ToInt(OppColor(to_play_))].left_castle == move.to) { + SetCastlingRight(OppColor(to_play_), CastlingDirection::kLeft, + absl::nullopt); + } else if (castling_rights_[ToInt(OppColor(to_play_))] + .right_castle.has_value() && + *castling_rights_[ToInt(OppColor(to_play_))].right_castle == + move.to) { + SetCastlingRight(OppColor(to_play_), CastlingDirection::kRight, + absl::nullopt); + } + } + + // Special cases that require adjustment - + // 1. Castling + if (move.is_castling()) { + SPIEL_CHECK_EQ(moving_piece.type, PieceType::kKing); + // We can tell which side we are castling to using "to" square. This is true + // even in chess960 (destination squares are same as in normal chess). + // However, we have to be careful of the edge case where the king actually + // doesn't move. + int8_t y = to_play_ == Color::kWhite ? 0 : 7; + if (move.to == Square{2, y}) { + // left castle + const auto &maybe_rook_sq = castling_rights_[ToInt(to_play_)].left_castle; + SPIEL_CHECK_TRUE(maybe_rook_sq.has_value()); + set_square(*maybe_rook_sq, kEmptyPiece); + set_square(Square{2, y}, Piece{to_play_, PieceType::kKing}); + set_square(Square{3, y}, Piece{to_play_, PieceType::kRook}); + } else if (move.to == Square{6, y}) { + // right castle + const auto &maybe_rook_sq = + castling_rights_[ToInt(to_play_)].right_castle; + SPIEL_CHECK_TRUE(maybe_rook_sq.has_value()); + set_square(*maybe_rook_sq, kEmptyPiece); + set_square(Square{6, y}, Piece{to_play_, PieceType::kKing}); + set_square(Square{5, y}, Piece{to_play_, PieceType::kRook}); + } else { + std::cerr << "Trying to castle but destination " << move.to.ToString() + << " is not valid." << std::endl; + SPIEL_CHECK_TRUE(false); + } + } + + if (moving_piece.type == PieceType::kKing) { + SetCastlingRight(to_play_, CastlingDirection::kLeft, absl::nullopt); + SetCastlingRight(to_play_, CastlingDirection::kRight, absl::nullopt); + } + + // 2. En-passant + if (moving_piece.type == PieceType::kPawn && move.from.x != move.to.x && + destination_piece.type == PieceType::kEmpty) { + if (move.to != EpSquare()) { + std::cerr << "We are trying to capture an empty square " + << "with a pawn, but the square is not the en passant square:\n" + << *this << "\n" + << "Move: " << move.ToString() << std::endl; + SpielFatalError("Trying to apply an invalid move"); + } + Square captured_pawn_square = move.to; + if (to_play_ == Color::kWhite) { + --captured_pawn_square.y; + } else { + ++captured_pawn_square.y; + } + SPIEL_CHECK_EQ(at(captured_pawn_square), + (Piece{OppColor(to_play_), PieceType::kPawn})); + set_square(captured_pawn_square, kEmptyPiece); + } + + // 3. Promotions + if (moving_piece.type == PieceType::kPawn && IsPawnPromotionRank(move.to)) { + set_square(move.to, Piece{at(move.to).color, move.promotion_type}); + } + + // 4. Double push + SetEpSquare(kInvalidSquare); + if (moving_piece.type == PieceType::kPawn && + abs(move.from.y - move.to.y) == 2) { + Square ep_square{move.from.x, + static_cast((move.from.y + move.to.y) / 2)}; + // Only set the en-passant square if it's being threatened. This is to + // prevent changing the hash of the board for the purposes of the + // repetition rule. + if (EpSquareThreatened(ep_square)) { + SetEpSquare(ep_square); + } + } + + if (to_play_ == Color::kBlack) { + ++move_number_; + } + + SetToPlay(OppColor(to_play_)); +} + +bool ChessBoard::TestApplyMove(const Move &move) { + Color color = to_play_; + ApplyMove(move); + return !UnderAttack(find(Piece{color, PieceType::kKing}), color); +} + +bool ChessBoard::UnderAttack(const Square &sq, Color our_color) const { + SPIEL_CHECK_NE(sq, kInvalidSquare); + + bool under_attack = false; + Color opponent_color = OppColor(our_color); + + // We do this by pretending we are a piece of different types, and seeing if + // we can attack opponent pieces. Eg. if we pretend we are a knight, and can + // attack an opponent knight, that means the knight can also attack us. + + // King moves (this is possible because we use this function for checking + // whether we are moving into check, and we can be trying to move the king + // into a square attacked by opponent king). + GenerateKingDestinations_( + sq, our_color, [this, &under_attack, &opponent_color](const Square &to) { + if (at(to) == Piece{opponent_color, PieceType::kKing}) { + under_attack = true; + } + }); + if (under_attack) { + return true; + } + + // Rook moves (for rooks and queens) + GenerateRookDestinations_( + sq, our_color, PseudoLegalMoveSettings::kAcknowledgeEnemyPieces, + [this, &under_attack, &opponent_color](const Square &to) { + if ((at(to) == Piece{opponent_color, PieceType::kRook}) || + (at(to) == Piece{opponent_color, PieceType::kQueen})) { + under_attack = true; + } + }); + if (under_attack) { + return true; + } + + // Bishop moves (for bishops and queens) + GenerateBishopDestinations_( + sq, our_color, PseudoLegalMoveSettings::kAcknowledgeEnemyPieces, + [this, &under_attack, &opponent_color](const Square &to) { + if ((at(to) == Piece{opponent_color, PieceType::kBishop}) || + (at(to) == Piece{opponent_color, PieceType::kQueen})) { + under_attack = true; + } + }); + if (under_attack) { + return true; + } + + // Knight moves + GenerateKnightDestinations_( + sq, our_color, [this, &under_attack, &opponent_color](const Square &to) { + if (at(to) == Piece{opponent_color, PieceType::kKnight}) { + under_attack = true; + } + }); + if (under_attack) { + return true; + } + + // Pawn captures. + GeneratePawnCaptureDestinations_( + sq, our_color, PseudoLegalMoveSettings::kAcknowledgeEnemyPieces, + false /* no ep */, + [this, &under_attack, &opponent_color](const Square &to) { + if (at(to) == Piece{opponent_color, PieceType::kPawn}) { + under_attack = true; + } + }); + if (under_attack) { + return true; + } + + return false; +} + +std::string ChessBoard::DebugString(bool shredder_fen) const { + std::string s; + s = absl::StrCat("FEN: ", ToFEN(shredder_fen), "\n"); + absl::StrAppend(&s, "\n ---------------------------------\n"); + for (int8_t y = board_size_ - 1; y >= 0; --y) { + // Rank label. + absl::StrAppend(&s, RankToString(y), " "); + + // Pieces on the rank. + for (int8_t x = 0; x < board_size_; ++x) { + Square sq{x, y}; + absl::StrAppend(&s, "| ", at(sq).ToString(), " "); + } + absl::StrAppend(&s, "|\n"); + absl::StrAppend(&s, " ---------------------------------\n"); + } + + // File labels. + absl::StrAppend(&s, " "); + for (int8_t x = 0; x < board_size_; ++x) { + absl::StrAppend(&s, FileToString(x), " "); + } + absl::StrAppend(&s, "\n"); + + absl::StrAppend(&s, "To play: ", to_play_ == Color::kWhite ? "W" : "B", "\n"); + absl::StrAppend(&s, "En passant square: ", SquareToString(EpSquare()), "\n"); + absl::StrAppend(&s, "50-moves clock: ", IrreversibleMoveCounter(), "\n"); + absl::StrAppend(&s, "Move number: ", move_number_, "\n\n"); + + absl::StrAppend(&s, "Castling rights:\n"); + absl::StrAppend(&s, "White left (queen-side): ", + CastlingRight(Color::kWhite, CastlingDirection::kLeft), "\n"); + if (CastlingRight(Color::kWhite, CastlingDirection::kLeft)) { + absl::StrAppend( + &s, "White left (queen-side) rook: ", + MaybeCastlingRookSquare(Color::kWhite, CastlingDirection::kLeft) + .value() + .ToString(), + "\n"); + } + absl::StrAppend(&s, "White right (king-side): ", + CastlingRight(Color::kWhite, CastlingDirection::kRight), + "\n"); + if (CastlingRight(Color::kWhite, CastlingDirection::kRight)) { + absl::StrAppend( + &s, "White right (king-side) rook: ", + MaybeCastlingRookSquare(Color::kWhite, CastlingDirection::kRight) + .value() + .ToString(), + "\n"); + } + absl::StrAppend(&s, "Black left (queen-side): ", + CastlingRight(Color::kBlack, CastlingDirection::kLeft), "\n"); + if (CastlingRight(Color::kBlack, CastlingDirection::kLeft)) { + absl::StrAppend( + &s, "Black left (queen-side) rook: ", + MaybeCastlingRookSquare(Color::kBlack, CastlingDirection::kLeft) + .value() + .ToString(), + "\n"); + } + absl::StrAppend(&s, "Black right (king-side): ", + CastlingRight(Color::kBlack, CastlingDirection::kRight), + "\n"); + if (CastlingRight(Color::kBlack, CastlingDirection::kRight)) { + absl::StrAppend( + &s, "Black right (king-side) rook: ", + MaybeCastlingRookSquare(Color::kBlack, CastlingDirection::kRight) + .value() + .ToString(), + "\n"); + } + absl::StrAppend(&s, "\n"); + + return s; +} + +// King moves without castling. +template +void ChessBoard::GenerateKingDestinations_(Square sq, Color color, + const YieldFn &yield) const { + static const std::array kOffsets = { + {{1, 0}, {1, 1}, {1, -1}, {0, 1}, {0, -1}, {-1, 1}, {-1, 0}, {-1, -1}}}; + + for (const auto &offset : kOffsets) { + Square dest = sq + offset; + if (InBoardArea(dest) && IsEmptyOrEnemy(dest, color)) { + yield(dest); + } + } +} + +// Whether all squares between sq1 and sq2 exclusive are empty, and +// optionally safe (not under attack). +// +// The exception_square only set to something in between from_sq and to_sq in +// Chess960 (because it can contain the rook the king is jumping over or the +// king the rook is jumping over). In that case, it does not check for that +// space being occupied to prevent the king from castling. +bool ChessBoard::CanCastleBetween(Square from_sq, Square to_sq, + bool check_safe_from_opponent, + PseudoLegalMoveSettings settings, + Square exception_square) const { + SPIEL_DCHECK_EQ(from_sq.y, to_sq.y); + const int y = from_sq.y; + const Color &our_color = at(from_sq).color; + + const int x_start = std::min(from_sq.x, to_sq.x); + const int x_end = std::max(from_sq.x, to_sq.x); + + // Need to explicitly check the final squares are empty in Chess960. The final + // square must be empty (unless it's the piece being jumped over or it's the + // king moving into the same square). + if (to_sq != exception_square && to_sq != from_sq) { + if ((settings == PseudoLegalMoveSettings::kAcknowledgeEnemyPieces && + IsEnemy(to_sq, our_color)) || IsFriendly(to_sq, our_color)) { + return false; + } + } + + for (int x = x_start; x <= x_end; ++x) { + Square test_square{static_cast(x), + static_cast(y)}; + if (check_safe_from_opponent && UnderAttack(test_square, our_color)) + return false; + if (settings == PseudoLegalMoveSettings::kAcknowledgeEnemyPieces && + IsEnemy(test_square, our_color)) + return false; + const bool x_in_between = x > x_start && x < x_end; + if (x_in_between && test_square != exception_square && + IsFriendly(test_square, our_color)) { + return false; + } + } + return true; +} + +template +void ChessBoard::GenerateCastlingDestinations_(Square sq, Color color, + PseudoLegalMoveSettings settings, + const YieldFn &yield) const { + // There are 8 conditions for castling - + // 1. The rook involved must not have moved. + // 2. The king must not have moved. + // 3. The rook involved must still be alive. + // 4. All squares king jumps over must be empty. + // 5. All squares the rook jumps over must be empty. + // 6. The squares the king jumps over must not be under attack. + // 7. The king must not be in check. + // (8). The square the king ends up in must not be under attack. + // + // We don't check for (8) here because this is not unique to castling, and + // we will check for it later. + // + // We use the generalized definition of castling from Chess960, instead of + // hard-coding starting squares. + // By Chess960 rules, the king and rook end up in the same positions as in + // standard chess, but they can start from any squares. + // + // Castling to one side doesn't necessarily mean the king will move towards + // that side. + // Eg. + // |RK...R..| + long castle (to the left) => + // |..KR.R..| + + // castling is not defined for other chessboards than the standard one + if (board_size_ != 8) { + return; + } + + const auto check_castling_conditions = [this, &sq, &color, &settings]( + Square king_sq, + CastlingDirection dir) -> bool { + const auto &rights = castling_rights_[ToInt(color)]; + Square rook_sq = dir == CastlingDirection::kLeft + ? rights.left_castle.value() + : rights.right_castle.value(); + + int8_t rook_final_x = + dir == CastlingDirection::kLeft ? 3 /* d-file */ : 5 /* f-file */; + Square rook_final_sq = Square{rook_final_x, sq.y}; + int8_t king_final_x = + dir == CastlingDirection::kLeft ? 2 /* c-file */ : 6 /* g-file */; + Square king_final_sq = Square{king_final_x, sq.y}; + + // 4. 5. 6. All squares the king and rook jump over, including the final + // squares, must be empty. Squares king jumps over must additionally be + // safe. + const bool make_king_jump_check = + !king_in_check_allowed_ && + settings == PseudoLegalMoveSettings::kAcknowledgeEnemyPieces; + if (!CanCastleBetween(rook_sq, rook_final_sq, false, settings, king_sq) || + !CanCastleBetween(sq, king_final_sq, make_king_jump_check, settings, + rook_sq)) { + return false; + } + + return true; + }; + + // 1. 2. 3. Moving the king, moving the rook, or the rook getting captured + // will reset the flag. + bool can_left_castle = + CastlingRight(color, CastlingDirection::kLeft) && + check_castling_conditions(sq, CastlingDirection::kLeft); + bool can_right_castle = + CastlingRight(color, CastlingDirection::kRight) && + check_castling_conditions(sq, CastlingDirection::kRight); + + if (can_left_castle || can_right_castle) { + // 7. No castling to escape from check. + if (UnderAttack(sq, color) && + !(king_in_check_allowed_ || + settings == PseudoLegalMoveSettings::kBreachEnemyPieces)) { + return; + } + if (can_left_castle) { + yield(Square{static_cast(2), sq.y}); + } + + if (can_right_castle) { + yield(Square{static_cast(6), sq.y}); + } + } +} + +template +void ChessBoard::GenerateQueenDestinations_(Square sq, Color color, + PseudoLegalMoveSettings settings, + const YieldFn &yield) const { + GenerateRookDestinations_(sq, color, settings, yield); + GenerateBishopDestinations_(sq, color, settings, yield); +} + +template +void ChessBoard::GenerateRookDestinations_(Square sq, Color color, + PseudoLegalMoveSettings settings, + const YieldFn &yield) const { + GenerateRayDestinations_(sq, color, settings, {1, 0}, yield); + GenerateRayDestinations_(sq, color, settings, {-1, 0}, yield); + GenerateRayDestinations_(sq, color, settings, {0, 1}, yield); + GenerateRayDestinations_(sq, color, settings, {0, -1}, yield); +} + +template +void ChessBoard::GenerateBishopDestinations_(Square sq, Color color, + PseudoLegalMoveSettings settings, + const YieldFn &yield) const { + GenerateRayDestinations_(sq, color, settings, {1, 1}, yield); + GenerateRayDestinations_(sq, color, settings, {-1, 1}, yield); + GenerateRayDestinations_(sq, color, settings, {1, -1}, yield); + GenerateRayDestinations_(sq, color, settings, {-1, -1}, yield); +} + +template +void ChessBoard::GenerateKnightDestinations_(Square sq, Color color, + const YieldFn &yield) const { + for (const auto &offset : kKnightOffsets) { + Square dest = sq + offset; + if (InBoardArea(dest) && IsEmptyOrEnemy(dest, color)) { + yield(dest); + } + } +} + +// Pawn moves without captures. +template +void ChessBoard::GeneratePawnDestinations_(Square sq, Color color, + PseudoLegalMoveSettings settings, + const YieldFn &yield) const { + int8_t y_direction = color == Color::kWhite ? 1 : -1; + Square dest = sq + Offset{0, y_direction}; + if (InBoardArea(dest) && + (IsEmpty(dest) || + (IsEnemy(dest, color) && + settings == PseudoLegalMoveSettings::kBreachEnemyPieces))) { + yield(dest); + + // Test for double move. Only defined on standard board + if (board_size_ == 8 && IsPawnStartingRank(sq, color)) { + dest = sq + Offset{0, static_cast(2 * y_direction)}; + if (IsEmpty(dest) || + (IsEnemy(dest, color) && + settings == PseudoLegalMoveSettings::kBreachEnemyPieces)) { + yield(dest); + } + } + } +} + +// Pawn capture destinations, with or without en passant. +template +void ChessBoard::GeneratePawnCaptureDestinations_( + Square sq, Color color, PseudoLegalMoveSettings settings, bool include_ep, + const YieldFn &yield) const { + int8_t y_direction = color == Color::kWhite ? 1 : -1; + Square dest = sq + Offset{1, y_direction}; + if (InBoardArea(dest) && + (IsEnemy(dest, color) || (include_ep && dest == EpSquare()) || + (IsEmpty(dest) && + settings == PseudoLegalMoveSettings::kBreachEnemyPieces))) { + yield(dest); + } + + dest = sq + Offset{-1, y_direction}; + if (InBoardArea(dest) && + (IsEnemy(dest, color) || (include_ep && dest == EpSquare()) || + (IsEmpty(dest) && + settings == PseudoLegalMoveSettings::kBreachEnemyPieces))) { + yield(dest); + } +} + +template +void ChessBoard::GenerateRayDestinations_(Square sq, Color color, + PseudoLegalMoveSettings settings, + Offset offset_step, + const YieldFn &yield) const { + for (Square dest = sq + offset_step; InBoardArea(dest); dest += offset_step) { + if (IsEmpty(dest)) { + yield(dest); + } else if (IsEnemy(dest, color)) { + yield(dest); + if (settings == PseudoLegalMoveSettings::kAcknowledgeEnemyPieces) { + break; + } + } else { + // We have a friendly piece. + break; + } + } +} + +std::string ChessBoard::ToUnicodeString() const { + std::string out = "\n"; + for (int8_t rank = board_size_ - 1; rank >= 0; --rank) { + out += std::to_string(rank + 1); + for (int8_t file = 0; file < board_size_; ++file) { + out += at(Square{file, rank}).ToUnicode(); + } + out += "\n"; + } + out += ' '; + for (int8_t file = 0; file < board_size_; ++file) { + out += ('a' + file); + } + out += '\n'; + return out; +} + +char ChessBoard::ShredderCastlingRightChar(Color color, + CastlingDirection dir) const { + absl::optional maybe_rook_sq = MaybeCastlingRookSquare(color, dir); + if (!maybe_rook_sq.has_value()) { + return '-'; + } + Square rook_sq = maybe_rook_sq.value(); + std::string castling_files(color == Color::kWhite ? + kShredderWhiteCastlingFiles : kShredderBlackCastlingFiles); + return castling_files[rook_sq.x]; +} + +std::string ChessBoard::ToFEN(bool shredder) const { + // Example FEN: rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1 + std::string fen; + + // 1. encode the board. + for (int8_t rank = board_size_ - 1; rank >= 0; --rank) { + int num_empty = 0; + for (int8_t file = 0; file < board_size_; ++file) { + auto piece = at(Square{file, rank}); + if (piece == kEmptyPiece) { + ++num_empty; + } else { + if (num_empty > 0) { + absl::StrAppend(&fen, num_empty); + num_empty = 0; + } + absl::StrAppend(&fen, piece.ToString()); + } + } + if (num_empty > 0) { + absl::StrAppend(&fen, num_empty); + } + if (rank > 0) { + fen.push_back('/'); + } + } + + // 2. color to play. + absl::StrAppend(&fen, " ", to_play_ == Color::kWhite ? "w" : "b"); + + // 3. by castling rights. + // Note: Shredder FEN uses different characters (the files of the rooks): + // https://www.chessprogramming.org/Forsyth-Edwards_Notation#Shredder-FEN. + absl::StrAppend(&fen, " "); + std::string castling_rights; + if (CastlingRight(Color::kWhite, CastlingDirection::kRight)) { + castling_rights.push_back( + shredder ? ShredderCastlingRightChar(Color::kWhite, + CastlingDirection::kRight) : 'K'); + } + if (CastlingRight(Color::kWhite, CastlingDirection::kLeft)) { + castling_rights.push_back( + shredder ? ShredderCastlingRightChar(Color::kWhite, + CastlingDirection::kLeft) : 'Q'); + } + if (CastlingRight(Color::kBlack, CastlingDirection::kRight)) { + castling_rights.push_back( + shredder ? ShredderCastlingRightChar(Color::kBlack, + CastlingDirection::kRight) : 'k'); + } + if (CastlingRight(Color::kBlack, CastlingDirection::kLeft)) { + castling_rights.push_back( + shredder ? ShredderCastlingRightChar(Color::kBlack, + CastlingDirection::kLeft) : 'q'); + } + absl::StrAppend(&fen, castling_rights.empty() ? "-" : castling_rights); + + // 4. en passant square + absl::StrAppend(&fen, " "); + absl::StrAppend( + &fen, EpSquare() == kInvalidSquare ? "-" : SquareToString(EpSquare())); + + // 5. half-move clock for 50-move rule + absl::StrAppend(&fen, " ", irreversible_move_counter_); + + // 6. full-move clock + absl::StrAppend(&fen, " ", move_number_); + + return fen; +} + +// Used in Dark Chess (see games/dark_chess.{h,cc}) +std::string ChessBoard::ToDarkFEN(const ObservationTable &observability_table, + Color color) const { + std::string fen; + + // 1. encode the board. + for (int8_t rank = board_size_ - 1; rank >= 0; --rank) { + int num_empty = 0; + for (int8_t file = 0; file < board_size_; ++file) { + size_t index = SquareToIndex_(chess::Square{file, rank}); + if (!observability_table[index]) { + if (num_empty > 0) { + fen += std::to_string(num_empty); + num_empty = 0; + } + fen.push_back('?'); + } else { + const Piece &piece = at(chess::Square{file, rank}); + if (piece == chess::kEmptyPiece) { + ++num_empty; + } else { + if (num_empty > 0) { + fen += std::to_string(num_empty); + num_empty = 0; + } + absl::StrAppend(&fen, piece.ToString()); + } + } + } + if (num_empty > 0) { + absl::StrAppend(&fen, num_empty); + } + if (rank > 0) { + fen.push_back('/'); + } + } + + // 2. color to play. + absl::StrAppend(&fen, " ", ToPlay() == chess::Color::kWhite ? "w" : "b"); + + // 3. by castling rights. + absl::StrAppend(&fen, " "); + std::string castling_rights; + if (color == chess::Color::kWhite) { + if (CastlingRight(chess::Color::kWhite, chess::CastlingDirection::kRight)) { + castling_rights.push_back('K'); + } + if (CastlingRight(chess::Color::kWhite, chess::CastlingDirection::kLeft)) { + castling_rights.push_back('Q'); + } + } else { + if (CastlingRight(chess::Color::kBlack, chess::CastlingDirection::kRight)) { + castling_rights.push_back('k'); + } + if (CastlingRight(chess::Color::kBlack, chess::CastlingDirection::kLeft)) { + castling_rights.push_back('q'); + } + } + absl::StrAppend(&fen, castling_rights.empty() ? "-" : castling_rights); + + // 4. en passant square + std::string ep_square = "-"; + if (EpSquare() != kInvalidSquare) { + int8_t reversed_y_direction = color == Color::kWhite ? -1 : 1; + Square from = EpSquare() + Offset{1, reversed_y_direction}; + Piece piece = at(from); + if (piece.color == color && piece.type == PieceType::kPawn) { + ep_square = SquareToString(EpSquare()); + } else { + from = EpSquare() + Offset{-1, reversed_y_direction}; + piece = at(from); + if (piece.color == color && piece.type == PieceType::kPawn) { + ep_square = SquareToString(EpSquare()); + } + } + } + absl::StrAppend(&fen, " ", ep_square); + + // 5. half-move clock for 50-move rule + absl::StrAppend(&fen, " ", IrreversibleMoveCounter()); + + // 6. full-move clock + absl::StrAppend(&fen, " ", move_number_); + + return fen; +} + +void ChessBoard::set_square(Square sq, Piece piece) { + static const ZobristTableU64 kZobristValues( + /*seed=*/2765481); + + // First, remove the current piece from the hash. + auto position = SquareToIndex_(sq); + auto current_piece = at(sq); + zobrist_hash_ ^= + kZobristValues[position][static_cast(current_piece.color)] + [static_cast(current_piece.type)]; + + // Then add the new piece + zobrist_hash_ ^= kZobristValues[position][static_cast(piece.color)] + [static_cast(piece.type)]; + + board_[position] = piece; +} + +absl::optional ChessBoard::MaybeCastlingRookSquare( + Color side, CastlingDirection direction) const { + switch (direction) { + case CastlingDirection::kLeft: + return castling_rights_[ToInt(side)].left_castle; + case CastlingDirection::kRight: + return castling_rights_[ToInt(side)].right_castle; + default: + SpielFatalError("Unknown direction."); + return Square{0, 0}; + } +} + +int ToInt(CastlingDirection direction) { + switch (direction) { + case CastlingDirection::kLeft: + return 0; + case CastlingDirection::kRight: + return 1; + case CastlingDirection::kNone: + return 2; + default: + SpielFatalError("Unknown direction."); + return 0; + } +} + +void ChessBoard::SetCastlingRight(Color side, CastlingDirection direction, + absl::optional maybe_rook_square) { + static const ZobristTableU64<2, 2, 2> kZobristValues(/*seed=*/876387212); + + // Remove old value from hash (note that we only use bool for castling rights, + // since all states derived from the same game will have the same initial rook + // squares). + bool can_castle_before = MaybeCastlingRookSquare(side, direction).has_value(); + zobrist_hash_ ^= + kZobristValues[ToInt(side)][ToInt(direction)][can_castle_before]; + + // Then add the new value. + bool can_castle_now = maybe_rook_square.has_value(); + zobrist_hash_ ^= + kZobristValues[ToInt(side)][ToInt(direction)][can_castle_now]; + switch (direction) { + case CastlingDirection::kLeft: + castling_rights_[ToInt(side)].left_castle = maybe_rook_square; + break; + case CastlingDirection::kRight: + castling_rights_[ToInt(side)].right_castle = maybe_rook_square; + break; + case CastlingDirection::kNone: + SpielFatalError("Setting castling right when direction is none."); + } +} + +Square ChessBoard::FindRookForCastling(Color color, + CastlingDirection dir) const { + Square my_king = find(Piece{color, PieceType::kKing}); + Piece rook_to_find{color, PieceType::kRook}; + int canonical_x = dir == CastlingDirection::kLeft ? 0 : (board_size_ - 1); + Square canonical_sq = Square{static_cast(canonical_x), my_king.y}; + if (board_[SquareToIndex_(canonical_sq)] == rook_to_find) { + return canonical_sq; + } else { + // Find all rooks. + int x_offset = dir == CastlingDirection::kLeft ? -1 : 1; + int x = my_king.x + x_offset; + std::set rooks; + while (x < board_size_ && x >= 0) { + auto sq = Square{static_cast(x), my_king.y}; + auto index = SquareToIndex_(sq); + if (board_[index] == rook_to_find) { + rooks.insert(sq); + } + x += x_offset; + } + // Failing here means the FEN is either from chess960 or malformed (the FEN + // says we have castling rights, but there is no rook on the canonical + // square, and more than one rook in the castling direction). This provides + // partial support for chess960, but not for loading a mid-game chess960 + // position where two rooks ended up on the same side, while there's still + // castling right on that side (we can't determine which rook to castle + // with then). Solving this will require implementing a chess960-specific + // FEN format. + SPIEL_CHECK_EQ(rooks.size(), 1); + return *rooks.begin(); + } +} + +void ChessBoard::SetToPlay(Color c) { + static const ZobristTableU64<2> kZobristValues(/*seed=*/284628); + + // Remove old color and add new to play. + zobrist_hash_ ^= kZobristValues[ToInt(to_play_)]; + zobrist_hash_ ^= kZobristValues[ToInt(c)]; + to_play_ = c; +} + +void ChessBoard::SetIrreversibleMoveCounter(int c) { + irreversible_move_counter_ = c; +} + +void ChessBoard::SetMovenumber(int move_number) { move_number_ = move_number; } + +bool ChessBoard::EpSquareThreatened(Square ep_square) const { + // If the en-passant square is set, look to see if there are pawns of the + // opponent that could capture via en-passant. + if (ep_square == kInvalidSquare) { + return false; + } + + Color ep_color = Color::kEmpty; + Offset offset1 = {0, 0}; + Offset offset2 = {0, 0}; + if (ep_square.y == 2) { + ep_color = Color::kWhite; + offset1 = {-1, +1}; + offset2 = {+1, +1}; + } else if (ep_square.y == 5) { + ep_color = Color::kBlack; + offset1 = {-1, -1}; + offset2 = {+1, -1}; + } else { + SpielFatalError(absl::StrCat("Invalid en passant square: ", ep_square.y)); + } + + Square sq1 = ep_square + offset1; + if (InBoardArea(sq1) && IsEnemy(sq1, ep_color) && + at(sq1).type == PieceType::kPawn) { + return true; + } + + Square sq2 = ep_square + offset2; + if (InBoardArea(sq2) && IsEnemy(sq2, ep_color) && + at(sq2).type == PieceType::kPawn) { + return true; + } + + return false; +} + +void ChessBoard::SetEpSquare(Square sq) { + static const ZobristTableU64 kZobristValues( + /*seed=*/837261); + + // Only update the hash if the en-passant square is threatened. This is to + // ensure that the state is properly captured for three-fold repetition + // detection. + if (EpSquare() != kInvalidSquare) { + // Remove en passant square if there was one. + zobrist_hash_ ^= kZobristValues[EpSquare().x][EpSquare().y]; + } + if (sq != kInvalidSquare) { + zobrist_hash_ ^= kZobristValues[sq.x][sq.y]; + } + + ep_square_ = sq; +} + +ChessBoard MakeDefaultBoard() { + auto maybe_board = ChessBoard::BoardFromFEN(kDefaultStandardFEN); + SPIEL_CHECK_TRUE(maybe_board); + return *maybe_board; +} + +std::string DefaultFen(int board_size) { + if (board_size == 8) + return chess::kDefaultStandardFEN; + else if (board_size == 4) + return chess::kDefaultSmallFEN; + else + SpielFatalError( + "Only board sizes 4 and 8 have their default chessboards. " + "For other sizes, you have to pass your own FEN."); +} + +} // namespace chess +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess_board.h b/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess_board.h new file mode 100644 index 0000000..e6cbd93 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess_board.h @@ -0,0 +1,582 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_IMPL_CHESS_CHESS_BOARD_H_ +#define OPEN_SPIEL_GAMES_IMPL_CHESS_CHESS_BOARD_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/games/chess/chess_common.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace chess { + +using chess_common::kInvalidSquare; // NOLINT +using chess_common::Offset; +using chess_common::Square; +using chess_common::SquareToString; // NOLINT + +template +using ZobristTableU64 = chess_common::ZobristTable; + +enum class Color : int8_t { kBlack = 0, kWhite = 1, kEmpty = 2 }; + +inline int ToInt(Color color) { return color == Color::kWhite ? 1 : 0; } + +inline Color OppColor(Color color) { + return color == Color::kWhite ? Color::kBlack : Color::kWhite; +} + +std::string ColorToString(Color c); + +inline std::ostream& operator<<(std::ostream& stream, Color c) { + return stream << ColorToString(c); +} + +enum class CastlingDirection { kLeft, kRight, kNone }; + +int ToInt(CastlingDirection dir); + +enum class PieceType : int8_t { + kEmpty = 0, + kKing = 1, + kQueen = 2, + kRook = 3, + kBishop = 4, + kKnight = 5, + kPawn = 6 +}; + +static inline constexpr std::array kPieceTypes = { + {PieceType::kKing, PieceType::kQueen, PieceType::kRook, PieceType::kBishop, + PieceType::kKnight, PieceType::kPawn}}; + +// In case all the pieces are represented in the same plane, these values are +// used to represent each piece type. +static inline constexpr std::array kPieceRepresentation = { + {1, 0.8, 0.6, 0.4, 0.2, 0.1}}; + +// Tries to parse piece type from char ('K', 'Q', 'R', 'B', 'N', 'P'). +// Case-insensitive. +absl::optional PieceTypeFromChar(char c); + +// Converts piece type to one character strings - "K", "Q", "R", "B", "N", "P". +// p must be one of the enumerator values of PieceType. +std::string PieceTypeToString(PieceType p, bool uppercase = true); + +struct Piece { + bool operator==(const Piece& other) const { + return type == other.type && color == other.color; + } + + bool operator!=(const Piece& other) const { return !(*this == other); } + + std::string ToUnicode() const; + std::string ToString() const; + + Color color; + PieceType type; +}; + +static inline constexpr Piece kEmptyPiece = + Piece{Color::kEmpty, PieceType::kEmpty}; + +inline std::ostream& operator<<(std::ostream& stream, const Piece& p) { + return stream << p.ToString(); +} + +inline absl::optional ParseRank(char c) { + if (c >= '1' && c <= '8') return c - '1'; + return absl::nullopt; +} + +inline absl::optional ParseFile(char c) { + if (c >= 'a' && c <= 'h') return c - 'a'; + return absl::nullopt; +} + +// Maps y = [0, 7] to rank ["1", "8"]. +inline std::string RankToString(int8_t rank) { + return std::string(1, '1' + rank); +} + +// Maps x = [0, 7] to file ["a", "h"]. +inline std::string FileToString(int8_t file) { + return std::string(1, 'a' + file); +} + +// Offsets for all possible knight moves. +inline constexpr std::array kKnightOffsets = { + {{-2, -1}, {-2, 1}, {-1, -2}, {-1, 2}, {2, -1}, {2, 1}, {1, -2}, {1, 2}}}; + +absl::optional SquareFromString(const std::string& s); + +bool IsLongDiagonal(const chess::Square& from_sq, const chess::Square& to_sq, + int board_size); + +// Forward declare ChessBoard here because it's needed in Move::ToSAN. +class ChessBoard; + +struct Move { + Square from; + Square to; + Piece piece; + PieceType promotion_type; + CastlingDirection castle_dir = CastlingDirection::kNone; + + Move() : castle_dir(CastlingDirection::kNone) {} + Move(const Square& from, const Square& to, const Piece& piece, + PieceType promotion_type = PieceType::kEmpty, + CastlingDirection castle_dir = CastlingDirection::kNone) + : from(from), + to(to), + piece(piece), + promotion_type(promotion_type), + castle_dir(castle_dir) {} + + std::string ToString() const; + + // Converts to long algebraic notation, as required by the UCI protocol. + // In the case of chess960, the castling move is converted to the format + // it is castling with so it needs the board. + std::string ToLAN(bool chess960 = false, + const ChessBoard* board_ptr = nullptr) const; + + // Converts to standard algebraic notation, as required by portable game + // notation (PGN). It is a chess move notation that is designed to be + // human-readable and concise. + // + // Unlike the LAN format, generating a SAN string requires the board the move + // is generated from. + // + // There are 3 types of SAN moves - + // 1. O-O (short castle) + // 2. O-O-O (long castle) + // 3. [piece type][from file][from rank][x][to square][=Promo][annotations] + // + // [piece type] is omitted for pawns + // [from file] is only included if 1) move is a pawn capture, or 2) it's + // required for disambiguation (see below). + // [from rank] is only included if it's required for disambiguation. + // [x] is only included for captures + // [to square] is always included + // [=Promo] is only included for promotions ("=N", "=B", "=R", "=Q" depending + // on type promoting to). + // [annotations] are a list of 0 or more characters added with different + // meanings. The ones we care about are '+' for check, and '#' for + // checkmate. All others are optional. + // + // Disambiguation: + // If a move is not uniquely-identified otherwise, file and/or rank of the + // from square is inserted to disambiguate. When either one will disambiguate, + // file should be used. If file is unique, file is used. Otherwise if rank is + // unique, rank is used. If neither is unique (this happens rarely, usually + // after under-promoting to a minor piece with both original pieces still + // intact, or double queen promotions with original queen still intact), both + // are used. + // + // Examples: + // * e4 (pawn to e4) + // * exd5 (pawn on file e capture the piece on d5) + // * Nf3 (knight to f3) + // * Nxd5 (knight captures piece on d5) + // * Bed5 (bishop on file e to d5) + // * B5xc3 (bishop on rank 5 capture piece on c3) + // * Ne5f7 (knight on e5 to f7, when there are 3 knights on the board, one on + // e file, and one on 5th rank) + // * exd8=N#!! (pawn on e file capture piece on d8 and promote to knight + // resulting in checkmate in a surprisingly good move) + // * O-O-O!!N+/- (a surprisingly good long castle that is a theoretical + // novelty that gives white a clear but not winning advantage) + std::string ToSAN(const ChessBoard& board) const; + + bool is_castling() const { return castle_dir != CastlingDirection::kNone; } + + bool operator==(const Move& other) const { + return from == other.from && to == other.to && piece == other.piece && + promotion_type == other.promotion_type && + castle_dir == other.castle_dir; + } +}; + +inline std::ostream& operator<<(std::ostream& stream, const Move& m) { + return stream << m.ToString(); +} + +bool IsMoveCharacter(char c); + +std::pair SplitAnnotations(const std::string& move); + +inline constexpr int kMaxBoardSize = 8; +inline constexpr int kDefaultBoardSize = 8; +inline constexpr int k2dMaxBoardSize = kMaxBoardSize * kMaxBoardSize; +inline const std::string kDefaultStandardFEN = + "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; +inline const std::string kDefaultSmallFEN = "r1kr/pppp/PPPP/R1KR w - - 0 1"; + +using ObservationTable = std::array; + +// Specifies policy for pseudo legal moves generation. +enum PseudoLegalMoveSettings { + // Standard legal moves (do not allow to move past enemy pieces). + kAcknowledgeEnemyPieces, + // Pseudo-legal moves, where a piece can move anywhere (according to the rules + // for that piece), except if it was blocked from doing so by other player's + // pieces. This is used in games, where the player may not know the position + // of an enemy piece (like Kriegspiel or RBC) and it can try to move past the + // enemy (for example a rook can try to move the other side of the board, even + // if it is in fact blocked by an unseen opponent's pawn). + kBreachEnemyPieces, +}; + +// Some chess variants (RBC) allow a "pass" action/move +inline constexpr open_spiel::Action kPassAction = 0; +inline const chess::Move kPassMove = + Move(Square{-1, -1}, Square{-1, -1}, + Piece{Color::kEmpty, PieceType::kEmpty}); + +class ChessBoard { + public: + ChessBoard(int board_size = kDefaultBoardSize, + bool king_in_check_allowed = false, + bool allow_pass_move = false); + + // Constructs a chess board at the given position in Forsyth-Edwards Notation. + // https://en.wikipedia.org/wiki/Forsyth%E2%80%93Edwards_Notation + static absl::optional BoardFromFEN( + const std::string& fen, int board_size = 8, + bool king_in_check_allowed = false, + bool allow_pass_move = false); + + const Piece& at(Square sq) const { return board_[SquareToIndex_(sq)]; } + + void set_square(Square sq, Piece p); + + const std::array& pieces() const { return board_; } + + Color ToPlay() const { return to_play_; } + void SetToPlay(Color c); + + Square EpSquare() const { return ep_square_; } + void SetEpSquare(Square sq); + + int32_t IrreversibleMoveCounter() const { return irreversible_move_counter_; } + int32_t Movenumber() const { return move_number_; } + + absl::optional MaybeCastlingRookSquare( + Color side, CastlingDirection direction) const; + + bool CastlingRight(Color color, CastlingDirection dir) const { + return MaybeCastlingRookSquare(color, dir).has_value(); + } + + char ShredderCastlingRightChar(Color color, CastlingDirection dir) const; + + void SetCastlingRight(Color side, CastlingDirection direction, + absl::optional maybe_rook_square); + + Square FindRookForCastling(Color color, CastlingDirection dir) const; + + // Find the location of any one piece of the given type, or kInvalidSquare. + Square find(const Piece& piece) const; + + // Pseudo-legal moves are moves that may leave the king in check, but are + // otherwise legal. + // The generation functions call yield(move) for each move generated. + // The yield function should return whether generation should continue. + // For performance reasons, we do not guarantee that no more moves will be + // generated if yield returns false. It is only for optimization. + using MoveYieldFn = std::function; + void GenerateLegalMoves(const MoveYieldFn& yield) const { + GenerateLegalMoves(yield, to_play_); + } + void GenerateLegalMoves(const MoveYieldFn& yield, Color color) const; + void GeneratePseudoLegalMoves( + const MoveYieldFn& yield, Color color, + PseudoLegalMoveSettings settings = + PseudoLegalMoveSettings::kAcknowledgeEnemyPieces) const; + + // Optimization for computing number of pawn tries for kriegspiel + void GenerateLegalPawnCaptures(const MoveYieldFn& yield, Color color) const; + void GeneratePseudoLegalPawnCaptures( + const MoveYieldFn& yield, Color color, + PseudoLegalMoveSettings settings = + PseudoLegalMoveSettings::kAcknowledgeEnemyPieces) const; + + bool HasLegalMoves() const { + bool found = false; + GenerateLegalMoves([&found](const Move&) { + found = true; + return false; // We don't need any more moves. + }); + return found; + } + + bool IsMoveLegal(const Move& tested_move) const { + bool found = false; + GenerateLegalMoves([&found, &tested_move](const Move& found_move) { + if (tested_move == found_move) { + found = true; + return false; // We don't need any more moves. + } + return true; + }); + return found; + } + + // Does either side have sufficient material to mate? + // FIDE rules say it must be impossible to mate even with "most unskilled" + // counterplay. This would technically include things like pawns blocking + // either side from making progress. + // Eg. "8/4k3/8/p1p1p1p1/P1P1P1P1/8/4K3/8 w - -". + // However, detecting all such positions will require solving chess... so + // we detect a more generally-accepted subset of positions - those with the + // following material combinations: + // 1. K vs K + // 2. K+B vs K + // 3. K+N vs K + // 4. K+B* vs K+B* (all bishops on same coloured squares) + bool HasSufficientMaterial() const; + + // Parses a move in standard algebraic notation or long algebraic notation + // (see below). Returns absl::nullopt on failure. + absl::optional ParseMove(const std::string& move, + bool chess960 = false) const; + + // Parses a move in standard algebraic notation as defined by FIDE. + // https://en.wikipedia.org/wiki/Algebraic_notation_(chess). + // Returns absl::nullopt on failure. + absl::optional ParseSANMove(const std::string& move) const; + + // Parses a move in long algebraic notation. + // Long algebraic notation is not standardized and there are many variants, + // but the one we care about is of the form "e2e4" and "f7f8q". This is the + // form used by chess engine text protocols that are of interest to us. + // Returns absl::nullopt on failure. + absl::optional ParseLANMove(const std::string& move, + bool chess960 = false) const; + + void ApplyMove(const Move& move); + + // Applies a pseudo-legal move and returns whether it's legal. This avoids + // applying and copying the whole board once for legality testing, and once + // for actually applying the move. + bool TestApplyMove(const Move& move); + + bool InBoardArea(const Square& sq) const { + return sq.x >= 0 && sq.x < board_size_ && sq.y >= 0 && sq.y < board_size_; + } + + bool IsEmpty(const Square& sq) const { + const Piece& piece = board_[SquareToIndex_(sq)]; + return piece.type == PieceType::kEmpty; + } + + bool IsEnemy(const Square& sq, Color our_color) const { + const Piece& piece = board_[SquareToIndex_(sq)]; + return piece.type != PieceType::kEmpty && piece.color != our_color; + } + + bool IsFriendly(const Square& sq, Color our_color) const { + const Piece& piece = board_[SquareToIndex_(sq)]; + return piece.color == our_color; + } + + bool IsEmptyOrEnemy(const Square& sq, Color our_color) const { + const Piece& piece = board_[SquareToIndex_(sq)]; + return piece.color != our_color; + } + + /* Whether the square is on the pawn starting rank for our_color. */ + bool IsPawnStartingRank(const Square& sq, Color our_color) const { + return ((our_color == Color::kWhite && sq.y == 1) || + (our_color == Color::kBlack && sq.y == (board_size_ - 2))); + } + + bool IsPawnPromotionRank(const Square& sq) const { + // No need to test for color here because a pawn can't be on the "wrong" + // promotion rank. + return sq.y == 0 || sq.y == (board_size_ - 1); + } + + /* Whether the sq is under attack by the opponent. */ + bool UnderAttack(const Square& sq, Color our_color) const; + + bool InCheck() const { + return UnderAttack(find(Piece{to_play_, PieceType::kKing}), to_play_); + } + + int BoardSize() const { return board_size_; } + + bool KingInCheckAllowed() const { return king_in_check_allowed_; } + + bool AllowPassMove() const { return allow_pass_move_; } + + uint64_t HashValue() const { return zobrist_hash_; } + + std::string DebugString(bool shredder_fen = false) const; + + std::string ToUnicodeString() const; + + // Constructs a string describing the chess board position in Forsyth-Edwards + // Notation. https://en.wikipedia.org/wiki/Forsyth%E2%80%93Edwards_Notation + std::string ToFEN(bool shredder = false) const; + + /* Constructs a string describing the dark chess board position in a notation + * similar to Forsyth-Edwards Notation. + * https://en.wikipedia.org/wiki/Forsyth%E2%80%93Edwards_Notation + * + * There are several key differences to FEN: + * 1) Only observable squares are shown. Squares invisible to the observer are + * represented by the character '?' + * 2) Only observer's castling rights are shown + * 3) en passant square is only shown if the observer is capable of performing + * an en passant capture + * + */ + std::string ToDarkFEN(const ObservationTable& observability_table, + Color color) const; + + bool IsBreachingMove(Move move) const; + void BreachingMoveToCaptureMove(Move* move) const; + + private: + size_t SquareToIndex_(Square sq) const { return sq.y * board_size_ + sq.x; } + + /* Generate*Destinations functions call yield(sq) for every potential + * destination generated. + * Eg. + * std::vector knight_moves; + * board.GenerateKnightDestinations(Square{3, 3}, + * [](const Square& sq) { + * Move move{Square{3, 3}, sq, + * Piece{kWhite, kKnight}}; + * knight_moves.push_back(move); + * }); + */ + + /* All the Generate*Destinations functions work in the same slightly strange + * way - + * They assume there's a piece of the type in question at sq, and generate + * potential destinations. Potential destinations may include moves that + * will leave the king exposed, and are therefore illegal. + * This strange semantic is to support reusing these functions for checking + * whether one side is in check, which would otherwise require an almost- + * duplicate move generator. + */ + + // King moves without castling. + template + void GenerateKingDestinations_(Square sq, Color color, + const YieldFn& yield) const; + + template + void GenerateCastlingDestinations_(Square sq, Color color, + PseudoLegalMoveSettings settings, + const YieldFn& yield) const; + bool CanCastle(Square king_sq, Color color, + PseudoLegalMoveSettings settings) const; + bool CanCastleBetween(Square from_sq, Square to_sq, + bool check_safe_from_opponent, + PseudoLegalMoveSettings settings, + Square exception_sq = kInvalidSquare) const; + + template + void GenerateQueenDestinations_(Square sq, Color color, + PseudoLegalMoveSettings settings, + const YieldFn& yield) const; + + template + void GenerateRookDestinations_(Square sq, Color color, + PseudoLegalMoveSettings settings, + const YieldFn& yield) const; + + template + void GenerateBishopDestinations_(Square sq, Color color, + PseudoLegalMoveSettings settings, + const YieldFn& yield) const; + + template + void GenerateKnightDestinations_(Square sq, Color color, + const YieldFn& yield) const; + + template + // Pawn moves without captures. + void GeneratePawnDestinations_(Square sq, Color color, + PseudoLegalMoveSettings settings, + const YieldFn& yield) const; + + template + // Pawn diagonal capture destinations, with or without en passant. + void GeneratePawnCaptureDestinations_(Square sq, Color color, + PseudoLegalMoveSettings settings, + bool include_ep, + const YieldFn& yield) const; + + // Helper function. + template + void GenerateRayDestinations_(Square sq, Color color, + PseudoLegalMoveSettings settings, + Offset offset_step, const YieldFn& yield) const; + + void SetIrreversibleMoveCounter(int c); + void SetMovenumber(int move_number); + bool EpSquareThreatened(Square ep_square) const; + + int board_size_; + bool king_in_check_allowed_; + bool allow_pass_move_; + + std::array board_; + Color to_play_; + Square ep_square_; + int32_t irreversible_move_counter_; + + // This starts at 1, and increments after each black move (a "full move" in + // chess is a "half move" by white followed by a "half move" by black). + int32_t move_number_; + + // Set to the square of the rook if castling is still possible in that + // direction, otherwise nullopt. + struct { + absl::optional left_castle; // -x direction, AKA long castle + absl::optional right_castle; // +x direction, AKA short castle + } castling_rights_[2]; + + uint64_t zobrist_hash_; +}; + +inline std::ostream& operator<<(std::ostream& stream, const ChessBoard& board) { + return stream << board.DebugString(); +} + +inline std::ostream& operator<<(std::ostream& stream, const PieceType& pt) { + return stream << PieceTypeToString(pt); +} + +ChessBoard MakeDefaultBoard(); +std::string DefaultFen(int board_size); + +} // namespace chess +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_IMPL_CHESS_CHESS_BOARD_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess_common.cc b/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess_common.cc new file mode 100644 index 0000000..5db6e54 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess_common.cc @@ -0,0 +1,150 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/chess/chess_common.h" + +#include + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace chess_common { +namespace { + +int DiffToDestinationIndex(int diff, int board_size) { + int destination_index = diff + board_size - 1; + if (diff > 0) --destination_index; + return destination_index; +} + +int DestinationIndexToDiff(int destination_index, int board_size) { + int diff = destination_index - board_size + 1; + if (diff >= 0) ++diff; + return diff; +} + +template +int OffsetToDestinationIndexImpl(const Offset& offset, + const KnightOffsets& knight_offsets, + int board_size) { + // Encodes chess queen moves + knight moves. + int move_type = -1; + int destination_index = -1; + if (offset.x_offset == 0) { + // vertical moves + move_type = 0; + destination_index = DiffToDestinationIndex(offset.y_offset, board_size); + } else if (offset.y_offset == 0) { + // horizontal moves + move_type = 1; + destination_index = DiffToDestinationIndex(offset.x_offset, board_size); + } else if (offset.x_offset == offset.y_offset) { + // left downward or right upward diagonal moves. + move_type = 2; + destination_index = DiffToDestinationIndex(offset.x_offset, board_size); + } else if (offset.x_offset == -offset.y_offset) { + // left upward or right downward diagonal moves. + move_type = 3; + destination_index = DiffToDestinationIndex(offset.x_offset, board_size); + } else { + // knight moves. + move_type = 4; + auto itr = std::find(knight_offsets.begin(), knight_offsets.end(), offset); + if (itr != knight_offsets.end()) { + destination_index = std::distance(knight_offsets.begin(), itr); + } else { + SpielFatalError(absl::StrCat("Unexpected offset (", + static_cast(offset.x_offset), ", ", + static_cast(offset.y_offset), ")")); + } + } + + return move_type * 2 * (board_size - 1) + destination_index; +} + +template +Offset DestinationIndexToOffsetImpl(int destination_index, + const KnightOffsets& knight_offsets, + int board_size) { + int move_type = destination_index / (2 * (board_size - 1)); + destination_index = destination_index % (2 * (board_size - 1)); + int8_t diff = DestinationIndexToDiff(destination_index, board_size); + + if (move_type == 0) { + return {0, diff}; + } else if (move_type == 1) { + return {diff, 0}; + } else if (move_type == 2) { + return {diff, diff}; + } else if (move_type == 3) { + return {diff, static_cast(-diff)}; + } else if (move_type == 4) { + SPIEL_CHECK_GE(destination_index, 0); + SPIEL_CHECK_LT(destination_index, knight_offsets.size()); + return knight_offsets[destination_index]; + } else { + SpielFatalError(absl::StrCat("Unexpected move type (", move_type, ")")); + } +} + +} // namespace + +int OffsetToDestinationIndex(const Offset& offset, + const std::array& knight_offsets, + int board_size) { + return OffsetToDestinationIndexImpl(offset, knight_offsets, board_size); +} + +int OffsetToDestinationIndex(const Offset& offset, + const std::array& knight_offsets, + int board_size) { + return OffsetToDestinationIndexImpl(offset, knight_offsets, board_size); +} + +Offset DestinationIndexToOffset(int destination_index, + const std::array& knight_offsets, + int board_size) { + return DestinationIndexToOffsetImpl(destination_index, knight_offsets, + board_size); +} + +Offset DestinationIndexToOffset(int destination_index, + const std::array& knight_offsets, + int board_size) { + return DestinationIndexToOffsetImpl(destination_index, knight_offsets, + board_size); +} + +std::pair DecodeNetworkTarget(int i, int board_size, + int num_actions_destinations) { + int xy = i / num_actions_destinations; + SPIEL_CHECK_GE(xy, 0); + SPIEL_CHECK_LT(xy, board_size * board_size); + int8_t x = xy / board_size; + int8_t y = xy % board_size; + int destination_index = i % num_actions_destinations; + SPIEL_CHECK_GE(destination_index, 0); + SPIEL_CHECK_LT(destination_index, num_actions_destinations); + return std::make_pair(Square{x, y}, destination_index); +} + +int EncodeNetworkTarget(const Square& from_square, int destination_index, + int board_size, int num_actions_destinations) { + return (from_square.x * board_size + from_square.y) * + num_actions_destinations + + destination_index; +} + +} // namespace chess_common +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess_common.h b/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess_common.h new file mode 100644 index 0000000..1491ce1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess_common.h @@ -0,0 +1,179 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_IMPL_COMMON_CHESS_COMMON_H_ +#define OPEN_SPIEL_GAMES_IMPL_COMMON_CHESS_COMMON_H_ + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/uniform_int_distribution.h" + +namespace open_spiel { +namespace chess_common { + +struct Offset { + int8_t x_offset; + int8_t y_offset; + + bool operator==(const Offset& other) const { + return x_offset == other.x_offset && y_offset == other.y_offset; + } +}; + +// x corresponds to file (column / letter) +// y corresponds to rank (row / number). +struct Square { + Square& operator+=(const Offset& offset) { + x += offset.x_offset; + y += offset.y_offset; + return *this; + } + + bool operator==(const Square& other) const { + return x == other.x && y == other.y; + } + + bool operator!=(const Square& other) const { return !(*this == other); } + + // Required by std::set. + bool operator<(const Square& other) const { + if (x != other.x) { + return x < other.x; + } else { + return y < other.y; + } + } + + std::string ToString() const { + std::string s; + s.push_back('a' + x); + s.push_back('1' + y); + return s; + } + + int8_t x; + int8_t y; +}; + +constexpr Square kInvalidSquare{-1, -1}; + +inline std::string SquareToString(const Square& square) { + if (square == kInvalidSquare) { + return "None"; + } else { + std::string s; + s.push_back('a' + square.x); + s.push_back('1' + square.y); + return s; + } +} + +inline Square operator+(const Square& sq, const Offset& offset) { + int8_t x = sq.x + offset.x_offset; + int8_t y = sq.y + offset.y_offset; + return Square{x, y}; +} + +// This function takes an Offset which represents a relative chess move and +// encodes it into an integer: the DestinationIndex. The encoding enumerates the +// queen moves and then the knight moves. For chess, this results in the +// following mapping: +// - [ 0, 13]: 14 vertical moves +// - [14, 27]: 14 horizontal moves +// - [28, 41]: 14 left downward or right upward diagonal moves +// - [42, 55]: 14 left upward or right downward diagonal moves +// - [56, 63]: 8 knight moves +int OffsetToDestinationIndex(const Offset& offset, + const std::array& knight_offsets, + int board_size); +int OffsetToDestinationIndex(const Offset& offset, + const std::array& knight_offsets, + int board_size); + +// Inverse function of OffsetToDestinationIndex +Offset DestinationIndexToOffset(int destination_index, + const std::array& knight_offsets, + int board_size); +Offset DestinationIndexToOffset(int destination_index, + const std::array& knight_offsets, + int board_size); + +// Encoding is: +// i = (x * board_size + y) * num_actions_destinations + destination_index +// where x,y are the square coordinates. +std::pair DecodeNetworkTarget(int i, int board_size, + int num_actions_destinations); +int EncodeNetworkTarget(const Square& from_square, int destination_index, + int board_size, int num_actions_destinations); + +// n-dimensional array of uniform random numbers. +// Example: +// ZobristTable table; +// +// table[a][b][c] is a random int where a < 3, b < 4, c < 5 +// +template +class ZobristTable { + public: + using Generator = std::mt19937_64; + using NestedTable = ZobristTable; + + ZobristTable(Generator::result_type seed) { + Generator generator(seed); + absl::uniform_int_distribution dist; + data_.reserve(InnerDim); + for (std::size_t i = 0; i < InnerDim; ++i) { + data_.emplace_back(dist(generator)); + } + } + + const NestedTable& operator[](std::size_t inner_index) const { + return data_[inner_index]; + } + + private: + std::vector data_; +}; + +// 1-dimensional array of uniform random numbers. +template +class ZobristTable { + public: + using Generator = std::mt19937_64; + + ZobristTable(Generator::result_type seed) : data_(InnerDim) { + Generator generator(seed); + absl::uniform_int_distribution dist; + for (auto& field : data_) { + field = dist(generator); + } + } + + T operator[](std::size_t index) const { return data_[index]; } + + private: + std::vector data_; +}; + +inline std::ostream& operator<<(std::ostream& stream, const Square& sq) { + return stream << SquareToString(sq); +} + +} // namespace chess_common +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_IMPL_COMMON_CHESS_COMMON_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess_test.cc new file mode 100644 index 0000000..5b7aa76 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/chess/chess_test.cc @@ -0,0 +1,427 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/chess/chess.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/uniform_int_distribution.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/games/chess/chess_board.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace chess { +namespace { + +namespace testing = open_spiel::testing; + +uint64_t Perft(const ChessBoard& board, int depth) { + std::vector legal_moves; + board.GenerateLegalMoves([&legal_moves](const Move& move) -> bool { + legal_moves.push_back(move); + return true; + }); + if (depth == 1) { + return legal_moves.size(); + } else { + uint64_t ret = 0; + for (const auto& move : legal_moves) { + ChessBoard board_copy = board; + board_copy.ApplyMove(move); + ret += Perft(board_copy, depth - 1); + } + return ret; + } +} + +uint64_t Perft(const char* fen, int depth) { + return Perft(ChessBoard::BoardFromFEN(fen).value(), depth); +} + +void CheckUndo(const char* fen, const char* move_san, const char* fen_after) { + std::shared_ptr game = LoadGame("chess"); + ChessState state(game, fen); + Player player = state.CurrentPlayer(); + absl::optional maybe_move = state.Board().ParseSANMove(move_san); + SPIEL_CHECK_TRUE(maybe_move); + Action action = MoveToAction(*maybe_move, state.BoardSize()); + state.ApplyAction(action); + SPIEL_CHECK_EQ(state.Board().ToFEN(), fen_after); + state.UndoAction(player, action); + SPIEL_CHECK_EQ(state.Board().ToFEN(), fen); +} + +void ApplySANMove(const char* move_san, ChessState* state) { + absl::optional maybe_move = state->Board().ParseSANMove(move_san); + SPIEL_CHECK_TRUE(maybe_move); + state->ApplyAction(MoveToAction(*maybe_move, state->BoardSize())); +} + +void BasicChessTests() { + testing::LoadGameTest("chess"); + testing::NoChanceOutcomesTest(*LoadGame("chess")); + testing::RandomSimTest(*LoadGame("chess"), 10); + testing::RandomSimTestWithUndo(*LoadGame("chess"), 10); +} + +void BasicChess960Tests() { + testing::LoadGameTest("chess(chess960=true)"); + testing::RandomSimTest(*LoadGame("chess(chess960=true)"), 10); + // Undo only works after the chance node in chess960. + // testing::RandomSimTestWithUndo(*LoadGame(chess960_game_string), 10); +} + +void Chess960SerializationRootIsChanceNodeTest() { + std::shared_ptr game = LoadGame("chess(chess960=true)"); + + std::unique_ptr state = game->NewInitialState(); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(0); + SPIEL_CHECK_FALSE(state->IsChanceNode()); + state->ApplyAction(state->LegalActions()[0]); + SPIEL_CHECK_EQ(state->History().size(), 2); + + // Do one round-trip serialization -> deserialization. + // State should be the same after serialization and deserialization, and + // the chance node should be removed from the history. + std::string state_string = state->ToString(); + std::string serialized_state = state->Serialize(); + std::unique_ptr deserialized_state = + game->DeserializeState(serialized_state); + SPIEL_CHECK_EQ(state_string, deserialized_state->ToString()); + // Chance node is removed from the history. + SPIEL_CHECK_EQ(deserialized_state->History().size(), 1); + + // Do a second round-trip serialization -> deserialization. + // State should be the same after serialization and deserialization, and + // the chance node should still be removed from the history. + serialized_state = deserialized_state->Serialize(); + deserialized_state = game->DeserializeState(serialized_state); + SPIEL_CHECK_EQ(state_string, deserialized_state->ToString()); + SPIEL_CHECK_EQ(deserialized_state->History().size(), 1); +} + +void Chess960SerializationRootIsSpecificStartingPositionTest() { + std::shared_ptr game = LoadGame("chess(chess960=true)"); + + std::unique_ptr state = game->NewInitialState( + "qrbkrnnb/pppppppp/8/8/8/8/PPPPPPPP/QRBKRNNB w KQkq - 0 1" + ); + SPIEL_CHECK_FALSE(state->IsChanceNode()); + state->ApplyAction(state->LegalActions()[0]); + SPIEL_CHECK_EQ(state->History().size(), 1); + + // Do one round-trip serialization -> deserialization. + // State should be the same after serialization and deserialization, and + // the chance node should be removed from the history. + std::string state_string = state->ToString(); + std::string serialized_state = state->Serialize(); + std::unique_ptr deserialized_state = + game->DeserializeState(serialized_state); + SPIEL_CHECK_EQ(state_string, deserialized_state->ToString()); + SPIEL_CHECK_EQ(deserialized_state->History().size(), 1); + + // Do a second round-trip serialization -> deserialization. + // State should be the same after serialization and deserialization, and + // the chance node should still be removed from the history. + serialized_state = deserialized_state->Serialize(); + deserialized_state = game->DeserializeState(serialized_state); + SPIEL_CHECK_EQ(state_string, deserialized_state->ToString()); + SPIEL_CHECK_EQ(deserialized_state->History().size(), 1); +} + + +void MoveGenerationTests() { + // These perft positions and results are from here: + // https://www.chessprogramming.org/Perft_Results + // They are specifically designed to catch move generator bugs. + // Depth chosen for maximum a few seconds run time in debug build. + SPIEL_CHECK_EQ(Perft(MakeDefaultBoard(), 5), 4865609); + SPIEL_CHECK_EQ( + Perft("r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq -", + 4), + 4085603); + SPIEL_CHECK_EQ(Perft("8/2p5/3p4/KP5r/1R3p1k/8/4P1P1/8 w - -", 5), 674624); + SPIEL_CHECK_EQ( + Perft("r3k2r/Pppp1ppp/1b3nbN/nP6/BBP1P3/q4N2/Pp1P2PP/R2Q1RK1 w kq - 0 1", + 4), + 422333); + SPIEL_CHECK_EQ( + Perft("rnbq1k1r/pp1Pbppp/2p5/8/2B5/8/PPP1NnPP/RNBQK2R w KQ - 1 8", 4), + 2103487); + SPIEL_CHECK_EQ( + Perft( + "r4rk1/1pp1qppp/p1np1n2/2b1p1B1/2B1P1b1/P1NP1N2/1PP1QPPP/R4RK1 w - -", + 4), + 3894594); + + // Rook disambiguation: + // https://github.com/google-deepmind/open_spiel/issues/1125 + SPIEL_CHECK_EQ( + Perft("4k1rr/1b1p3p/nn1p4/P3Np2/3P1bp1/6PP/P5R1/1B1K2N1 b k - 1 37", 1), + 35); +} + +void TerminalReturnTests() { + std::shared_ptr game = LoadGame("chess"); + ChessState checkmate_state( + game, "rnb1kbnr/pppp1ppp/8/4p3/6Pq/5P2/PPPPP2P/RNBQKBNR w KQkq -"); + SPIEL_CHECK_EQ(checkmate_state.IsTerminal(), true); + SPIEL_CHECK_EQ(checkmate_state.Returns(), (std::vector{1.0, -1.0})); + + ChessState stalemate_state(game, "8/8/5k2/1r1r4/8/8/7r/2K5 w - -"); + SPIEL_CHECK_EQ(stalemate_state.IsTerminal(), true); + SPIEL_CHECK_EQ(stalemate_state.Returns(), (std::vector{0.0, 0.0})); + + ChessState fifty_moves_state(game, "8/8/5k2/8/8/8/7r/2K5 w - - 100 1"); + SPIEL_CHECK_EQ(fifty_moves_state.IsTerminal(), true); + SPIEL_CHECK_EQ(fifty_moves_state.Returns(), (std::vector{0.0, 0.0})); + + ChessState ongoing_state(game, "8/8/5k2/8/8/8/7r/2K5 w - - 99 1"); + SPIEL_CHECK_EQ(ongoing_state.IsTerminal(), false); + + ChessState repetition_state(game, "8/8/5k2/8/8/8/7r/2K5 w - - 50 1"); + ApplySANMove("Kd1", &repetition_state); + ApplySANMove("Ra2", &repetition_state); + ApplySANMove("Kc1", &repetition_state); + ApplySANMove("Rh2", &repetition_state); + ApplySANMove("Kd1", &repetition_state); + ApplySANMove("Ra2", &repetition_state); + ApplySANMove("Kc1", &repetition_state); + SPIEL_CHECK_EQ(repetition_state.IsTerminal(), false); + ApplySANMove("Rh2", &repetition_state); + SPIEL_CHECK_EQ(repetition_state.IsTerminal(), true); + SPIEL_CHECK_EQ(repetition_state.Returns(), (std::vector{0.0, 0.0})); +} + +void UndoTests() { + // Promotion + capture. + CheckUndo("r1bqkbnr/pPpppppp/8/6n1/6p1/8/PPPPP1PP/RNBQKBNR w KQkq - 0 1", + "bxa8=Q", + "Q1bqkbnr/p1pppppp/8/6n1/6p1/8/PPPPP1PP/RNBQKBNR b KQk - 0 1"); + + // En passant. + CheckUndo("rnbqkbnr/pppp1p1p/8/4pPp1/8/8/PPPPP1PP/RNBQKBNR w KQkq g6 0 2", + "fxg6", + "rnbqkbnr/pppp1p1p/6P1/4p3/8/8/PPPPP1PP/RNBQKBNR b KQkq - 0 2"); +} + +float ValueAt(const std::vector& v, const std::vector& shape, + int plane, int x, int y) { + return v[plane * shape[1] * shape[2] + y * shape[2] + x]; +} + +float ValueAt(const std::vector& v, const std::vector& shape, + int plane, const std::string& square) { + Square sq = *SquareFromString(square); + return ValueAt(v, shape, plane, sq.x, sq.y); +} + +void ObservationTensorTests() { + std::shared_ptr game = LoadGame("chess"); + ChessState initial_state(game); + auto shape = game->ObservationTensorShape(); + std::vector v(game->ObservationTensorSize()); + initial_state.ObservationTensor(initial_state.CurrentPlayer(), + absl::MakeSpan(v)); + + // For each piece type, check one square that's supposed to be occupied, and + // one that isn't. + // Kings. + SPIEL_CHECK_EQ(ValueAt(v, shape, 0, "e1"), 1.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 0, "d1"), 0.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 1, "e8"), 1.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 1, "e1"), 0.0); + + // Queens. + SPIEL_CHECK_EQ(ValueAt(v, shape, 2, "d1"), 1.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 2, "e1"), 0.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 3, "d8"), 1.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 3, "d1"), 0.0); + + // Rooks. + SPIEL_CHECK_EQ(ValueAt(v, shape, 4, "a1"), 1.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 4, "e8"), 0.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 5, "h8"), 1.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 5, "c5"), 0.0); + + // Bishops. + SPIEL_CHECK_EQ(ValueAt(v, shape, 6, "c1"), 1.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 6, "b1"), 0.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 7, "f8"), 1.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 7, "f7"), 0.0); + + // Knights. + SPIEL_CHECK_EQ(ValueAt(v, shape, 8, "b1"), 1.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 8, "c3"), 0.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 9, "g8"), 1.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 9, "g7"), 0.0); + + // Pawns. + SPIEL_CHECK_EQ(ValueAt(v, shape, 10, "a2"), 1.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 10, "a3"), 0.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 11, "e7"), 1.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 11, "e6"), 0.0); + + // Empty. + SPIEL_CHECK_EQ(ValueAt(v, shape, 12, "e4"), 1.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 12, "e2"), 0.0); + + // Repetition count. + SPIEL_CHECK_EQ(ValueAt(v, shape, 13, 0, 0), 0.0); + + // Side to move. + SPIEL_CHECK_EQ(ValueAt(v, shape, 14, 0, 0), 1.0); + + // Irreversible move counter. + SPIEL_CHECK_EQ(ValueAt(v, shape, 15, 0, 0), 0.0); + + // Castling rights. + SPIEL_CHECK_EQ(ValueAt(v, shape, 16, 0, 0), 1.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 17, 1, 1), 1.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 18, 2, 2), 1.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 19, 3, 3), 1.0); + + ApplySANMove("e4", &initial_state); + ApplySANMove("e5", &initial_state); + ApplySANMove("Ke2", &initial_state); + + initial_state.ObservationTensor(initial_state.CurrentPlayer(), + absl::MakeSpan(v)); + SPIEL_CHECK_EQ(v.size(), game->ObservationTensorSize()); + + // Now it's black to move. + SPIEL_CHECK_EQ(ValueAt(v, shape, 14, 0, 0), 0.0); + + // White king is now on e2. + SPIEL_CHECK_EQ(ValueAt(v, shape, 0, "e1"), 0.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 0, "e2"), 1.0); + + // Irreversible move counter incremented to 1 (king moving and losing castling + // rights is in fact irreversible in this case, but it doesn't reset the + // counter according to FIDE rules). + SPIEL_CHECK_FLOAT_EQ(ValueAt(v, shape, 15, 0, 0), 1.0 / 101.0); + + // And white no longer has castling rights. + SPIEL_CHECK_EQ(ValueAt(v, shape, 16, 0, 0), 0.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 17, 1, 1), 0.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 18, 2, 2), 1.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 19, 3, 3), 1.0); +} + +void MoveConversionTests() { + auto game = LoadGame("chess"); + std::mt19937 rng(23); + for (int i = 0; i < 100; ++i) { + std::unique_ptr state = game->NewInitialState(); + while (!state->IsTerminal()) { + const ChessState* chess_state = + dynamic_cast(state.get()); + std::vector legal_actions = state->LegalActions(); + absl::uniform_int_distribution dist(0, legal_actions.size() - 1); + int action_index = dist(rng); + Action action = legal_actions[action_index]; + Move move = ActionToMove(action, chess_state->Board()); + Action action_from_move = MoveToAction(move, chess_state->BoardSize()); + SPIEL_CHECK_EQ(action, action_from_move); + const ChessBoard& board = chess_state->Board(); + ChessBoard fresh_board = chess_state->StartBoard(); + for (Move move : chess_state->MovesHistory()) { + fresh_board.ApplyMove(move); + } + SPIEL_CHECK_EQ(board.ToFEN(), fresh_board.ToFEN()); + Action action_from_lan = + MoveToAction(*board.ParseLANMove(move.ToLAN()), board.BoardSize()); + SPIEL_CHECK_EQ(action, action_from_lan); + state->ApplyAction(action); + } + } +} + +void SerializaitionTests() { + auto game = LoadGame("chess"); + + // Default board position. + std::unique_ptr state = game->NewInitialState(); + std::shared_ptr deserialized_state = + game->DeserializeState(state->Serialize()); + SPIEL_CHECK_EQ(state->ToString(), deserialized_state->ToString()); + + // Empty string. + deserialized_state = game->DeserializeState(""); + SPIEL_CHECK_EQ(state->ToString(), deserialized_state->ToString()); + + // FEN starting position. + state = game->NewInitialState( + "rnbqkbnr/pp1ppppp/8/2p5/4P3/5N2/PPPP1PPP/RNBQKB1R b KQkq - 1 2"); + deserialized_state = game->DeserializeState(state->Serialize()); + SPIEL_CHECK_EQ(state->ToString(), deserialized_state->ToString()); +} + +void ThreeFoldRepetitionTestWithEnPassant() { + // Example from: + // https://www.chess.com/article/view/think-twice-before-a-threefold-repetition + std::string san_history_str = + "e4 e5 Nf3 Nc6 Bb5 a6 Ba4 Nf6 O-O Be7 Re1 " + "b5 Bb3 d6 c3 O-O h3 Bb7 d4 Re8 Ng5 Rf8 Nf3 Re8 Ng5 Rf8 Nf3"; + std::vector san_history = absl::StrSplit(san_history_str, ' '); + + auto game = LoadGame("chess"); + std::unique_ptr state = game->NewInitialState(); + + for (const std::string& san : san_history) { + SPIEL_CHECK_FALSE(state->IsTerminal()); + Action chosen_action = kInvalidAction; + for (Action action : state->LegalActions()) { + if (state->ActionToString(action) == san) { + chosen_action = action; + break; + } + } + SPIEL_CHECK_NE(chosen_action, kInvalidAction); + state->ApplyAction(chosen_action); + } + + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_TRUE( + down_cast(state.get())->IsRepetitionDraw()); +} + +} // namespace +} // namespace chess +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::chess::BasicChessTests(); + open_spiel::chess::MoveGenerationTests(); + open_spiel::chess::UndoTests(); + open_spiel::chess::TerminalReturnTests(); + open_spiel::chess::ObservationTensorTests(); + open_spiel::chess::MoveConversionTests(); + open_spiel::chess::SerializaitionTests(); + open_spiel::chess::BasicChess960Tests(); + open_spiel::chess::Chess960SerializationRootIsChanceNodeTest(); + open_spiel::chess::Chess960SerializationRootIsSpecificStartingPositionTest(); + open_spiel::chess::ThreeFoldRepetitionTestWithEnPassant(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/cliff_walking/cliff_walking.cc b/scenarios/bargaining/open_spiel/open_spiel/games/cliff_walking/cliff_walking.cc new file mode 100644 index 0000000..5f46f09 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/cliff_walking/cliff_walking.cc @@ -0,0 +1,206 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/cliff_walking/cliff_walking.h" + +#include +#include + +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace cliff_walking { +namespace { + +enum CliffWalkingAction { RIGHT = 0, UP = 1, LEFT = 2, DOWN = 3 }; + +// Facts about the game. +const GameType kGameType{/*short_name=*/"cliff_walking", + /*long_name=*/"CliffWalking", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/1, + /*min_num_players=*/1, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"height", GameParameter(kDefaultHeight)}, + {"width", GameParameter(kDefaultWidth)}, + {"horizon", GameParameter(kDefaultHorizon)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new CliffWalkingGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +CliffWalkingState::CliffWalkingState(std::shared_ptr game) + : State(game) { + const CliffWalkingGame& parent_game = + static_cast(*game); + height_ = parent_game.Height(); + width_ = parent_game.Width(); + horizon_ = parent_game.MaxGameLength(); + player_row_ = parent_game.Height() - 1; +} + +int CliffWalkingState::CurrentPlayer() const { + if (IsTerminal()) return kTerminalPlayerId; + return 0; +} + +std::vector CliffWalkingState::LegalActions() const { + if (IsTerminal()) return {}; + return {RIGHT, UP, LEFT, DOWN}; +} + +std::string CliffWalkingState::ActionToString(int player, + Action action_id) const { + SPIEL_CHECK_EQ(player, 0); + switch (action_id) { + case RIGHT: + return "RIGHT"; + case UP: + return "UP"; + case LEFT: + return "LEFT"; + case DOWN: + return "DOWN"; + default: + SpielFatalError("Out of range action"); + } +} + +std::string CliffWalkingState::ToString() const { + std::string str; + str.reserve(height_ * (width_ + 1)); + for (int r = 0; r < height_; ++r) { + for (int c = 0; c < width_; ++c) { + if (r == player_row_ && c == player_col_) + str += 'P'; + else if (IsCliff(r, c)) + str += 'X'; + else if (IsGoal(r, c)) + str += 'G'; + else + str += '.'; + } + str += '\n'; + } + return str; +} + +bool CliffWalkingState::IsTerminal() const { + return time_counter_ >= horizon_ || IsCliff(player_row_, player_col_) || + IsGoal(player_row_, player_col_); +} + +std::vector CliffWalkingState::Rewards() const { + if (IsCliff(player_row_, player_col_)) return {-100.0}; + if (time_counter_ == 0) return {0.0}; + return {-1.0}; +} + +std::vector CliffWalkingState::Returns() const { + if (IsCliff(player_row_, player_col_)) return {-100.0 - time_counter_ + 1}; + return {time_counter_ * -1.0}; +} + +std::string CliffWalkingState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string CliffWalkingState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void CliffWalkingState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + SPIEL_CHECK_EQ(values.size(), height_ * width_); + std::fill(values.begin(), values.end(), 0.); + values[player_row_ * width_ + player_col_] = 1.0; +} + +void CliffWalkingState::InformationStateTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + SPIEL_CHECK_EQ(values.size(), kNumActions * horizon_); + for (int i = 0; i < history_.size(); i++) { + values[i * kNumActions + history_[i].action] = 1; + } +} + +std::unique_ptr CliffWalkingState::Clone() const { + return std::unique_ptr(new CliffWalkingState(*this)); +} + +void CliffWalkingState::DoApplyAction(Action move) { + switch (move) { + case RIGHT: + ++player_col_; + break; + case UP: + --player_row_; + break; + case LEFT: + --player_col_; + break; + case DOWN: + ++player_row_; + break; + default: + SpielFatalError("Unexpected action"); + } + player_row_ = std::min(std::max(player_row_, 0), height_ - 1); + player_col_ = std::min(std::max(player_col_, 0), width_ - 1); + ++time_counter_; +} + +bool CliffWalkingState::IsCliff(int row, int col) const { + return col > 0 && col < width_ - 1 && row == height_ - 1; +} + +bool CliffWalkingState::IsGoal(int row, int col) const { + return row == height_ - 1 && col == width_ - 1; +} + +CliffWalkingGame::CliffWalkingGame(const GameParameters& params) + : Game(kGameType, params), + height_(ParameterValue("height")), + width_(ParameterValue("width")), + horizon_(ParameterValue("horizon")) { + SPIEL_CHECK_GE(height_, 2); + SPIEL_CHECK_GE(width_, 3); +} + +} // namespace cliff_walking +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/cliff_walking/cliff_walking.h b/scenarios/bargaining/open_spiel/open_spiel/games/cliff_walking/cliff_walking.h new file mode 100644 index 0000000..ddf4299 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/cliff_walking/cliff_walking.h @@ -0,0 +1,147 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_CLIFF_WALKING_H_ +#define OPEN_SPIEL_GAMES_CLIFF_WALKING_H_ + +#include +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// A cliff walking reinforcement learning environment. +// +// This is a deterministic environment that can be used to test RL algorithms. +// Note there are *no illegal moves* in this environment--if the agent is on the +// gridworld boundary and takes an action which would yield an invalid position, +// the action is ignored (as if there were walls surrounding the grid world). +// +// The player spawns at the bottom left and must reach the goal located on the +// bottom right. A game is terminal when the player reaches the goal, the +// maximum episode length has been reached (horizon) or when the player steps +// off the cliff edge (see figure below). The player receives a reward of -1 for +// all transitions except when stepping off the cliff, where a reward of -100 is +// received. +// +// Cliff example for height=3 and width=5: +// +// | | | | | | +// | | | | | | +// | S | x | x | x | G | +// +// where `S` is always the starting position, `G` is always the goal and `x` +// represents the zone of high negative reward to be avoided. For this instance, +// the optimum policy is depicted as follows: +// +// | | | | | | +// |-->|-->|-->|-->|\|/| +// |/|\| x | x | x | G | +// +// yielding a reward of -6 (minus 1 per time step). +// +// See pages 132 of Rich Sutton's book for details: +// http://www.incompleteideas.net/book/bookdraft2018mar21.pdf +// +// Parameters: +// "height" int rows of the board (default = 4) +// "width" int columns of the board (default = 8) +// "horizon" int maximum episode length (default = 100) + +namespace open_spiel { +namespace cliff_walking { + +// Constants. +inline constexpr int kNumPlayers = 1; +inline constexpr int kNumActions = 4; // Right, Up, Left, Down. + +inline constexpr int kDefaultHeight = 4; +inline constexpr int kDefaultWidth = 8; +inline constexpr int kDefaultHorizon = 100; + +class CliffWalkingGame; + +// State of an in-play game. +class CliffWalkingState : public State { + public: + CliffWalkingState(std::shared_ptr game); + CliffWalkingState(const CliffWalkingState&) = default; + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Rewards() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + + protected: + void DoApplyAction(Action move) override; + + private: + // Check if player position is in bottom row between start and goal. + bool IsCliff(int row, int col) const; + + bool IsGoal(int row, int col) const; + + // Copied from CliffWalkingGame. + int height_; + int width_; + int horizon_; + + int player_row_; + int player_col_ = 0; + int time_counter_ = 0; +}; + +// Game object. +class CliffWalkingGame : public Game { + public: + explicit CliffWalkingGame(const GameParameters& params); + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new CliffWalkingState(shared_from_this())); + } + std::vector ObservationTensorShape() const override { + return {height_, width_}; + } + std::vector InformationStateTensorShape() const override { + return {kNumActions * horizon_}; + } + + int NumDistinctActions() const override { return kNumActions; } + int NumPlayers() const override { return kNumPlayers; } + double MaxUtility() const override { return -width_ - 1; } + double MinUtility() const override { return -horizon_ + 1 - 100; } + int MaxGameLength() const override { return horizon_; } + int Height() const { return height_; } + int Width() const { return width_; } + + private: + const int height_; + const int width_; + const int horizon_; +}; + +} // namespace cliff_walking +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_CLIFF_WALKING_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/cliff_walking/cliff_walking_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/cliff_walking/cliff_walking_test.cc new file mode 100644 index 0000000..f214e0f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/cliff_walking/cliff_walking_test.cc @@ -0,0 +1,39 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/cliff_walking/cliff_walking.h" + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace cliff_walking { +namespace { + +namespace testing = open_spiel::testing; + +void BasicCliffWalkingTests() { + testing::LoadGameTest("cliff_walking"); + testing::NoChanceOutcomesTest(*LoadGame("cliff_walking")); + testing::RandomSimTest(*LoadGame("cliff_walking"), 100); +} + +} // namespace +} // namespace cliff_walking +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::cliff_walking::BasicCliffWalkingTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/clobber/clobber.cc b/scenarios/bargaining/open_spiel/open_spiel/games/clobber/clobber.cc new file mode 100644 index 0000000..5203582 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/clobber/clobber.cc @@ -0,0 +1,445 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/clobber/clobber.h" + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace clobber { +namespace { + +// Constants. +inline constexpr int kCellStates = 1 + kNumPlayers; // Empty, White, and Black. +inline constexpr int kDefaultRows = 5; +inline constexpr int kDefaultColumns = 6; + +// Number of unique directions each piece can take. +constexpr int kNumDirections = 4; + +// Index 0: Direction is up (north), towards decreasing y. +// Index 1: Direction is right (east), towards increasing x. +// Index 2: Direction is down (south), towards increasing y. +// Index 3: Direction is left (west), towards decreasing x. +constexpr std::array kDirRowOffsets = {{-1, 0, 1, 0}}; +constexpr std::array kDirColumnOffsets = {{0, 1, 0, -1}}; + +// Facts about the game. +const GameType kGameType{/*short_name=*/"clobber", + /*long_name=*/"Clobber", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"rows", GameParameter(kDefaultRows)}, + {"columns", GameParameter(kDefaultColumns)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new ClobberGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +int StateToPlayer(CellState state) { + switch (state) { + case CellState::kWhite: + return 0; + case CellState::kBlack: + return 1; + default: + SpielFatalError("No player id for this cell state"); + } +} + +CellState PlayerToState(Player player) { + switch (player) { + case 0: + return CellState::kWhite; + case 1: + return CellState::kBlack; + default: + SpielFatalError(absl::StrCat("Invalid player id ", player)); + return CellState::kEmpty; + } +} + +std::string StateToString(CellState state) { + switch (state) { + case CellState::kEmpty: + return "."; + case CellState::kWhite: + return "o"; + case CellState::kBlack: + return "x"; + default: + SpielFatalError("Unknown state."); + } +} + +CellState StringToState(std::string str) { + if (str == ".") { + return CellState::kEmpty; + } else if (str == "o") { + return CellState::kWhite; + } else if (str == "x") { + return CellState::kBlack; + } else { + SpielFatalError("Unknown state."); + } +} + +CellState OpponentState(CellState state) { + return PlayerToState(1 - StateToPlayer(state)); +} + +bool IsEven(int num) { return num % 2 == 0; } + +std::string RowLabel(int rows, int row) { + int row_number = 1 + (rows - 1 - row); + std::string label = std::to_string(row_number); + return label; +} + +std::string ColumnLabel(int column) { + std::string label = ""; + label += static_cast('a' + column); + return label; +} +} // namespace + +std::ostream& operator<<(std::ostream& stream, const CellState& state) { + switch (state) { + case CellState::kWhite: + return stream << "White"; + case CellState::kBlack: + return stream << "Black"; + case CellState::kEmpty: + return stream << "Empty"; + default: + SpielFatalError("Unknown cell state"); + } +} + +ClobberState::ClobberState(std::shared_ptr game, int rows, + int columns) + : State(game), rows_(rows), columns_(columns) { + SPIEL_CHECK_GE(rows_, 1); + SPIEL_CHECK_GE(columns_, 1); + SPIEL_CHECK_LE(rows_, 99); // Only supports 1 and 2 digit row numbers. + SPIEL_CHECK_LE(columns_, 26); // Only 26 letters to represent columns. + + board_ = std::vector(rows_ * columns_, CellState::kEmpty); + + // Put the pieces on the board (checkerboard pattern) starting with + // the first player (White, or 'o') in the bottom left corner. + for (int row = rows_ - 1; row >= 0; row--) { + for (int column = 0; column < columns_; column++) { + if ((IsEven(row + (rows_ - 1)) && IsEven(column)) || + (!IsEven(row + (rows_ - 1)) && !IsEven(column))) { + SetBoard(row, column, CellState::kWhite); + } else { + SetBoard(row, column, CellState::kBlack); + } + } + } +} + +ClobberState::ClobberState(std::shared_ptr game, int rows, + int columns, const std::string& board_string) + : State(game), rows_(rows), columns_(columns) { + SPIEL_CHECK_GE(rows_, 1); + SPIEL_CHECK_GE(columns_, 1); + SPIEL_CHECK_LE(rows_, 99); // Only supports 1 and 2 digit row numbers. + SPIEL_CHECK_LE(columns_, 26); // Only 26 letters to represent columns. + SPIEL_CHECK_GE(board_string[0], '0'); + SPIEL_CHECK_LE(board_string[0], '1'); + SPIEL_CHECK_EQ(rows_ * columns_, board_string.length() - 1); + + board_ = std::vector(rows_ * columns_, CellState::kEmpty); + current_player_ = board_string[0] - '0'; + + // Create the board from the board string. The character 'o' is White + // (first player), 'x' is Black (second player), and the character '.' + // is an Empty cell. Population goes from top left to bottom right. + for (int row = 0; row < rows_; row++) { + for (int column = 0; column < columns_; column++) { + char state_character = board_string[1 + row * columns_ + column]; + CellState state = StringToState(std::string(1, state_character)); + SetBoard(row, column, state); + } + } + + // If the given state is terminal, the current player + // cannot play. Therefore, the other player wins. + if (!MovesRemaining()) { + outcome_ = 1 - current_player_; + } +} + +void ClobberState::DoApplyAction(Action action) { + std::vector values = + UnrankActionMixedBase(action, {rows_, columns_, kNumDirections}); + + const int start_row = values[0]; + const int start_column = values[1]; + const int direction = values[2]; + const int end_row = start_row + kDirRowOffsets[direction]; + const int end_column = start_column + kDirColumnOffsets[direction]; + + SPIEL_CHECK_TRUE(InBounds(start_row, start_column)); + SPIEL_CHECK_TRUE(InBounds(end_row, end_column)); + SPIEL_CHECK_EQ(BoardAt(start_row, start_column), + OpponentState(BoardAt(end_row, end_column))); + + SetBoard(end_row, end_column, BoardAt(start_row, start_column)); + SetBoard(start_row, start_column, CellState::kEmpty); + + // Does the other player have any moves left? + if (!MovesRemaining()) { + outcome_ = current_player_; + } + + current_player_ = 1 - current_player_; + num_moves_++; +} + +std::string ClobberState::ActionToString(Player player, + Action action_id) const { + std::vector values = + UnrankActionMixedBase(action_id, {rows_, columns_, kNumDirections}); + + const int start_row = values[0]; + const int start_column = values[1]; + const int direction = values[2]; + const int end_row = start_row + kDirRowOffsets[direction]; + const int end_column = start_column + kDirColumnOffsets[direction]; + + std::string action_string = + absl::StrCat(ColumnLabel(start_column), RowLabel(rows_, start_row), + ColumnLabel(end_column), RowLabel(rows_, end_row)); + + return action_string; +} + +std::vector ClobberState::LegalActions() const { + std::vector move_list; + + if (IsTerminal()) { + return move_list; + } + + CellState current_player_state = PlayerToState(CurrentPlayer()); + std::vector action_bases = {rows_, columns_, kNumDirections}; + std::vector action_values = {0, 0, 0}; + + for (int row = 0; row < rows_; row++) { + for (int column = 0; column < columns_; column++) { + if (BoardAt(row, column) == current_player_state) { + for (int direction = 0; direction < kNumDirections; direction++) { + int adjacent_row = row + kDirRowOffsets[direction]; + int adjacent_column = column + kDirColumnOffsets[direction]; + + if (InBounds(adjacent_row, adjacent_column)) { + CellState adjacent_state = BoardAt(adjacent_row, adjacent_column); + CellState opponent_state = OpponentState(current_player_state); + + if (adjacent_state == opponent_state) { + // The adjacent cell is in bounds and contains the opponent + // player, therefore playing to this adjacent cell would be + // a valid move. + action_values[0] = row; + action_values[1] = column; + action_values[2] = direction; + + move_list.push_back( + RankActionMixedBase(action_bases, action_values)); + } + } + } + } + } + } + + return move_list; +} + +bool ClobberState::InBounds(int row, int column) const { + return (row >= 0 && row < rows_ && column >= 0 && column < columns_); +} + +std::string ClobberState::ToString() const { + std::string result = ""; + for (int r = 0; r < rows_; r++) { + // Ensure the row labels are aligned. + if (rows_ - r < 10 && rows_ >= 10) { + absl::StrAppend(&result, " "); + } + absl::StrAppend(&result, RowLabel(rows_, r)); + + for (int c = 0; c < columns_; c++) { + absl::StrAppend(&result, StateToString(BoardAt(r, c))); + } + + result.append("\n"); + } + + // Add an extra space to the bottom row + // if the row labels take up two spaces. + if (rows_ >= 10) { + absl::StrAppend(&result, " "); + } + absl::StrAppend(&result, " "); + + for (int c = 0; c < columns_; c++) { + absl::StrAppend(&result, ColumnLabel(c)); + } + absl::StrAppend(&result, "\n"); + + return result; +} + +int ClobberState::ObservationPlane(CellState state, Player player) const { + if (state == CellState::kEmpty) { + return 2; + } + return (StateToPlayer(state) + player) % 2; +} + +bool ClobberState::MovesRemaining() const { + for (int row = 0; row < rows_; row++) { + for (int column = 0; column < columns_; column++) { + CellState current_cell_state = BoardAt(row, column); + + if (current_cell_state == CellState::kEmpty) { + continue; + } + + for (int direction = 0; direction < kNumDirections; direction++) { + int adjacent_row = row + kDirRowOffsets[direction]; + int adjacent_column = column + kDirColumnOffsets[direction]; + + if (InBounds(adjacent_row, adjacent_column)) { + CellState adjacent_state = BoardAt(adjacent_row, adjacent_column); + CellState opponent_state = OpponentState(current_cell_state); + + if (adjacent_state == opponent_state) { + return true; + } + } + } + } + } + + return false; +} + +bool ClobberState::IsTerminal() const { return outcome_ != kInvalidPlayer; } + +std::vector ClobberState::Returns() const { + if (outcome_ == kInvalidPlayer) { + return {0., 0.}; + } else if (outcome_ == Player{0}) { + return {1.0, -1.0}; + } else { + return {-1.0, 1.0}; + } +} + +std::string ClobberState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string ClobberState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void ClobberState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + TensorView<3> view(values, {kCellStates, rows_, columns_}, + true); + + // Observation Tensor Representation: + // Plane 0: 1's where the current player's pieces are, 0's elsewhere. + // Plane 1: 1's where the oppponent's pieces are, 0's elsewhere. + // Plane 2: 1's where the empty cells are, 0's elsewhere. + for (int row = 0; row < rows_; row++) { + for (int column = 0; column < columns_; column++) { + int plane = ObservationPlane(BoardAt(row, column), player); + view[{plane, row, column}] = 1.0; + } + } +} + +void ClobberState::UndoAction(Player player, Action action) { + std::vector values = + UnrankActionMixedBase(action, {rows_, columns_, kNumDirections}); + + const int start_row = values[0]; + const int start_column = values[1]; + const int direction = values[2]; + const int end_row = start_row + kDirRowOffsets[direction]; + const int end_column = start_column + kDirColumnOffsets[direction]; + + current_player_ = player; + outcome_ = kInvalidPlayer; + num_moves_--; + + if (BoardAt(end_row, end_column) == CellState::kWhite) { + SetBoard(end_row, end_column, CellState::kBlack); + SetBoard(start_row, start_column, CellState::kWhite); + } else { + SetBoard(end_row, end_column, CellState::kWhite); + SetBoard(start_row, start_column, CellState::kBlack); + } + + history_.pop_back(); +} + +ClobberGame::ClobberGame(const GameParameters& params) + : Game(kGameType, params), + rows_(ParameterValue("rows")), + columns_(ParameterValue("columns")) {} + +int ClobberGame::NumDistinctActions() const { + return rows_ * columns_ * kNumDirections; +} + +} // namespace clobber +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/clobber/clobber.h b/scenarios/bargaining/open_spiel/open_spiel/games/clobber/clobber.h new file mode 100644 index 0000000..bd2d762 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/clobber/clobber.h @@ -0,0 +1,165 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_CLOBBER_H_ +#define OPEN_SPIEL_GAMES_CLOBBER_H_ + +// Implementation of the board game Clobber. +// https://en.wikipedia.org/wiki/Clobber +// +// Some notes about this implementation: +// - The two players: +// Clobber is a two player game. The two players in this +// implementation are 'o' (White, 0) and 'x' (Black, 1). In the +// default board of any size, the bottom left corner is always +// 'o' and continues in a checkerboard pattern from there. 'o' +// moves first in the default board. +// - Custom boards: +// A custom board can be used to initialize a state when calling +// either the ClobberState(rows, columns, board_string) constructer +// or ClobberGame's method NewInitialString(board_string). Where +// 'rows' and 'columns' are the number of rows and columns on the +// board respectively, and 'board_string' is a string representing +// the board. The format of board string is as follows: +// - The first character is either a '0' or '1', this indicates +// which player's turn it is (white or black respectively). +// - The next characters are either 'o', 'x', or '.' which +// represent white pieces, black pieces, or empty cells +// respectively. There must be rows * columns number of these +// characters following the first character. +// For example, a state initialized from "1x.o.xo.x." on a game with +// 3 rows and 3 columns would have 'x' (Black, 1) play first on a +// 3x3 board with configuration: +// x.o +// .xo +// .x. +// - Observation tensor: +// This version implements a 3-plane observation tensor. Each plane +// has equal dimensions as the board. The first plane contains 1's\ +// where the current player's pieces are, and 0's elsewhere. The +// next plane contains 1's where their opponent's pieces are, and +// 0's elsewhere. Finally, the last plane consists of 1's where the +// empty cells are, and 0's elsewhere. + +#include +#include +#include + +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace clobber { + +inline constexpr int kNumPlayers = 2; + +// State of a cell. +enum class CellState { + kEmpty, // Represented by ' '. + kWhite, // Represented by 'o'. + kBlack, // Represented by 'x'. +}; + +// State of an in-play game. +class ClobberState : public State { + public: + explicit ClobberState(std::shared_ptr game, int rows, + int columns); + explicit ClobberState(std::shared_ptr game, int rows, int columns, + const std::string& board_string); + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : current_player_; + } + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override { + return std::unique_ptr(new ClobberState(*this)); + } + void UndoAction(Player player, Action action) override; + bool InBounds(int row, int column) const; + void SetBoard(int row, int column, CellState state) { + board_[row * columns_ + column] = state; + } + CellState BoardAt(int row, int column) const { + return board_[row * columns_ + column]; + } + std::vector LegalActions() const override; + + protected: + void DoApplyAction(Action action) override; + + private: + // Returns the appropriate plane for the cell's state and current + // player. If the cell's state is Empty, the plane is 2. Otherwise, the + // plane depends on both the state and the player. This method ensures + // that whichever player's turn it is, their pieces will be on plane 0, + // and their opponents will be on plane 1. + int ObservationPlane(CellState state, Player player) const; + + // This method takes advantage of the fact that in Clobber, a player + // has a move if-and-only-if the oppposing player also has that move. + // Therefore, at each board cell, just check if any adjacent cell has + // the opponent's piece on it. + bool MovesRemaining() const; + + Player current_player_ = 0; // Player zero (White, 'o') goes first. + Player outcome_ = kInvalidPlayer; + int num_moves_ = 0; + int rows_; + int columns_; + std::vector board_; +}; + +// Game object. +class ClobberGame : public Game { + public: + explicit ClobberGame(const GameParameters& params); + int NumDistinctActions() const override; + std::unique_ptr NewInitialState( + const std::string& board_string) const override { + return absl::make_unique(shared_from_this(), rows_, columns_, + board_string); + } + std::unique_ptr NewInitialState() const override { + return absl::make_unique(shared_from_this(), rows_, columns_); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + std::vector ObservationTensorShape() const override { + return {kNumPlayers + 1, rows_, columns_}; + } + // On every turn, one piece is taken out. The longest game occurs + // when the last player takes out the only remaining opponenent's + // piece with their last piece. Therefore, there is still one piece on + // the board. Hence, the maximum number of moves is # of cells - 1. + int MaxGameLength() const override { return rows_ * columns_ - 1; } + + private: + int rows_; + int columns_; +}; + +std::ostream& operator<<(std::ostream& stream, const CellState& state); + +} // namespace clobber +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_CLOBBER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/clobber/clobber_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/clobber/clobber_test.cc new file mode 100644 index 0000000..d5adaf3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/clobber/clobber_test.cc @@ -0,0 +1,125 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/clobber/clobber.h" + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace clobber { +namespace { + +namespace testing = open_spiel::testing; + +double ValueAt(const std::vector& v, const std::vector& shape, + int plane, int x, int y) { + return v[plane * shape[1] * shape[2] + y * shape[2] + x]; +} + +void BasicClobberTests() { + testing::LoadGameTest("clobber"); + testing::NoChanceOutcomesTest(*LoadGame("clobber")); + + // Test game simulations on competition boards: + testing::RandomSimTest(*LoadGame("clobber"), 100); + testing::RandomSimTest(*LoadGame("clobber(rows=8,columns=8)"), 50); + testing::RandomSimTest(*LoadGame("clobber(rows=10,columns=10)"), 30); +} + +void TerminalReturnsTests() { + std::shared_ptr clobber2x2 = + LoadGame("clobber(rows=2,columns=2)"); + std::shared_ptr clobber4x4 = + LoadGame("clobber(rows=4,columns=4)"); + std::shared_ptr clobber5x6 = + LoadGame("clobber(rows=5,columns=6)"); + + ClobberState end_state1(clobber2x2, 2, 2, "0xxxx"); + SPIEL_CHECK_EQ(end_state1.IsTerminal(), true); + SPIEL_CHECK_EQ(end_state1.Returns(), (std::vector{-1.0, 1.0})); + + ClobberState end_state2(clobber2x2, 2, 2, "1oooo"); + SPIEL_CHECK_EQ(end_state2.IsTerminal(), true); + SPIEL_CHECK_EQ(end_state2.Returns(), (std::vector{1.0, -1.0})); + + ClobberState end_state3(clobber2x2, 2, 2, "1x.x."); + SPIEL_CHECK_EQ(end_state3.IsTerminal(), true); + SPIEL_CHECK_EQ(end_state3.Returns(), (std::vector{1.0, -1.0})); + + ClobberState end_state4(clobber2x2, 2, 2, "0o..x"); + SPIEL_CHECK_EQ(end_state4.IsTerminal(), true); + SPIEL_CHECK_EQ(end_state4.Returns(), (std::vector{-1.0, 1.0})); + + ClobberState end_state5(clobber4x4, 4, 4, "0o..xo.......x..o"); + SPIEL_CHECK_EQ(end_state5.IsTerminal(), true); + SPIEL_CHECK_EQ(end_state5.Returns(), (std::vector{-1.0, 1.0})); + + ClobberState ongoing_state(clobber5x6, 5, 6, + "0ox..ox..oxoxox..ox..oxoxoxoxox"); + SPIEL_CHECK_EQ(ongoing_state.IsTerminal(), false); +} + +void ObservationTensorTests() { + std::shared_ptr clobber8x8 = + LoadGame("clobber(rows=8,columns=8)"); + std::unique_ptr clobber_state = clobber8x8->NewInitialState(); + auto shape = clobber8x8->ObservationTensorShape(); + auto v = clobber_state->ObservationTensor(clobber_state->CurrentPlayer()); + + SPIEL_CHECK_EQ(ValueAt(v, shape, 0, 4, 4), 0.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 0, 5, 6), 1.0); + + SPIEL_CHECK_EQ(ValueAt(v, shape, 1, 7, 2), 0.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 1, 4, 6), 1.0); + + SPIEL_CHECK_EQ(ValueAt(v, shape, 2, 2, 2), 0.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 2, 1, 6), 0.0); + + std::vector legal_actions = clobber_state->LegalActions(); + bool action_performed = false; + for (Action action : legal_actions) { + if (clobber_state->ActionToString(action) == "a1b1") { + clobber_state->ApplyAction(action); + action_performed = true; + break; + } + } + + if (!action_performed) { + return; + } + + clobber_state->ObservationTensor(clobber_state->CurrentPlayer(), + absl::MakeSpan(v)); + + SPIEL_CHECK_EQ(ValueAt(v, shape, 0, 7, 2), 0.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 0, 4, 6), 1.0); + + SPIEL_CHECK_EQ(ValueAt(v, shape, 1, 4, 4), 0.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 1, 5, 6), 1.0); + + SPIEL_CHECK_EQ(ValueAt(v, shape, 2, 2, 2), 0.0); + SPIEL_CHECK_EQ(ValueAt(v, shape, 2, 0, 7), 1.0); +} + +} // namespace +} // namespace clobber +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::clobber::BasicClobberTests(); + open_spiel::clobber::TerminalReturnsTests(); + open_spiel::clobber::ObservationTensorTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/coin_game/coin_game.cc b/scenarios/bargaining/open_spiel/open_spiel/games/coin_game/coin_game.cc new file mode 100644 index 0000000..543f19a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/coin_game/coin_game.cc @@ -0,0 +1,478 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/coin_game/coin_game.h" + +#include + +#include "open_spiel/game_parameters.h" + +namespace open_spiel { +namespace coin_game { + +// Defaults match the paper https://arxiv.org/pdf/1802.09640.pdf +constexpr int kDefaultPlayers = 2; +constexpr int kDefaultRows = 8; +constexpr int kDefaultColumns = 8; +constexpr int kDefaultExtraCoinColors = 1; +constexpr int kDefaultCoinsPerColor = 4; +constexpr int kDefaultEpisodeLength = 20; + +namespace { + +// Facts about the game +const GameType kGameType{ + /*short_name=*/"coin_game", + /*long_name=*/"The Coin Game", + GameType::Dynamics::kSequential, + // Getting a NewInitialState randomly initializes player and coin positions + // and player preferences, but from that point on no chance nodes are + // involved. + GameType::ChanceMode::kExplicitStochastic, + // Imperfect information game because players only know their own preferred + // coin. + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/10, + /*min_num_players=*/1, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/false, + /*parameter_specification=*/ + { + {"players", GameParameter(kDefaultPlayers)}, + {"rows", GameParameter(kDefaultRows)}, + {"columns", GameParameter(kDefaultColumns)}, + {"episode_length", GameParameter(kDefaultEpisodeLength)}, + // Number of extra coin colors to use apart from the + // players' preferred color. + {"num_extra_coin_colors", GameParameter(kDefaultExtraCoinColors)}, + {"num_coins_per_color", GameParameter(kDefaultCoinsPerColor)}, + }}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new CoinGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +std::string GamePhaseToString(GamePhase phase) { + switch (phase) { + case GamePhase::kAssignPreferences: + return "AssignPreferences"; + case GamePhase::kDeployPlayers: + return "DeployPlayers"; + case GamePhase::kDeployCoins: + return "DeployCoins"; + case GamePhase::kPlay: + return "Play"; + default: + SpielFatalError("Unknown phase."); + return "This will never return."; + } +} + +enum struct SymbolType { kEmpty = 0, kCoin = 1, kPlayer = 2 }; +constexpr char kEmptySymbol = ' '; + +SymbolType GetSymbolType(char symbol) { + if (symbol == kEmptySymbol) { + return SymbolType::kEmpty; + } else if ('a' <= symbol && symbol <= 'z') { + return SymbolType::kCoin; + } else if ('0' <= symbol && symbol <= '9') { + return SymbolType::kPlayer; + } + SpielFatalError(absl::StrCat("Unexpected symbol: ", std::string(1, symbol))); +} + +inline char PlayerSymbol(Player player) { + return '0' + static_cast(player); +} +inline char CoinSymbol(int coin) { return 'a' + static_cast(coin); } +int CoinId(char symbol) { return symbol - 'a'; } + +// Movement. +enum MovementType { kUp = 0, kDown = 1, kLeft = 2, kRight = 3, kStand = 4 }; + +constexpr std::array offsets = { + {{-1, 0}, {1, 0}, {0, -1}, {0, 1}, {0, 0}}}; + +Location operator+(const Location& a, const Location& b) { + return {a.first + b.first, a.second + b.second}; +} + +std::set RangeAsSet(int n) { + std::set result; + for (int i = 0; i < n; i++) { + result.insert(i); + } + return result; +} + +std::vector Range(int n) { + std::vector result(n); + for (int i = 0; i < n; i++) { + result[i] = i; + } + return result; +} + +ActionsAndProbs ActionProbRange(const std::set set) { + ActionsAndProbs result; + result.reserve(set.size()); + const double prob = 1.0 / set.size(); + for (int elem : set) { + result.push_back({elem, prob}); + } + return result; +} + +std::vector ActionRange(const std::set set) { + std::vector result; + result.reserve(set.size()); + for (int elem : set) { + result.push_back(elem); + } + return result; +} +} // namespace + +Setup::Setup(int num_rows, int num_columns, int num_coin_colors) + : available_coin_colors_(RangeAsSet(num_coin_colors)), + available_positions_(RangeAsSet(num_rows * num_columns)) {} + +CoinState::CoinState(std::shared_ptr game) + : State(game), + parent_game_(static_cast(*game)), + setup_(parent_game_.NumRows(), parent_game_.NumColumns(), + parent_game_.NumCoinColors()), + player_preferences_(game->NumPlayers()), + player_location_(game->NumPlayers()), + field_(parent_game_.NumRows() * parent_game_.NumColumns(), kEmptySymbol), + player_coins_(game->NumPlayers() * parent_game_.NumCoinColors(), 0) {} + +GamePhase CoinState::GetPhase() const { + if (cur_player_ != kChancePlayerId) { + return GamePhase::kPlay; + } else if (setup_.num_players_assigned_preference < num_players_) { + return GamePhase::kAssignPreferences; + } else if (setup_.num_players_on_field < num_players_) { + return GamePhase::kDeployPlayers; + } else if (setup_.num_coins_on_field < parent_game_.TotalCoins()) { + return GamePhase::kDeployCoins; + } else { + SpielFatalError("Inconsistent setup versus current_player state"); + } +} + +std::vector CoinState::LegalActions() const { + if (IsTerminal()) return {}; + switch (GetPhase()) { + case GamePhase::kAssignPreferences: + return ActionRange(setup_.available_coin_colors_); + case GamePhase::kDeployPlayers: + return ActionRange(setup_.available_positions_); + case GamePhase::kDeployCoins: + return ActionRange(setup_.available_positions_); + case GamePhase::kPlay: + return Range(offsets.size()); + default: + SpielFatalError("Unknown phase."); + } +} + +ActionsAndProbs CoinState::ChanceOutcomes() const { + switch (GetPhase()) { + case GamePhase::kAssignPreferences: + return ActionProbRange(setup_.available_coin_colors_); + case GamePhase::kDeployPlayers: + return ActionProbRange(setup_.available_positions_); + case GamePhase::kDeployCoins: + return ActionProbRange(setup_.available_positions_); + case GamePhase::kPlay: + SpielFatalError("ChanceOutcomes invoked in play phase"); + default: + SpielFatalError("Unknown phase."); + return {}; + } +} + +std::string CoinState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::ostringstream out; + // A player only learns its own preference. + out << player_preferences_[player] << "\n"; + // Table of how many coins of each type were collected by each player. + PrintCoinsCollected(out); + // Current positions of all coins and players on the board. + PrintBoard(out); + return out.str(); +} + +bool CoinState::InBounds(Location loc) const { + return (loc.first >= 0 && loc.second >= 0 && + loc.first < parent_game_.NumRows() && + loc.second < parent_game_.NumColumns()); +} + +void CoinState::SetField(Location loc, char symbol) { + field_[loc.first * parent_game_.NumColumns() + loc.second] = symbol; +} + +char CoinState::GetField(Location loc) const { + return field_[loc.first * parent_game_.NumColumns() + loc.second]; +} + +Location CoinState::LocationFromIndex(int index) const { + return {index / parent_game_.NumColumns(), index % parent_game_.NumColumns()}; +} + +void CoinState::ApplyAssignPreferenceAction(Action coin_color) { + SPIEL_CHECK_LT(coin_color, parent_game_.NumCoinColors()); + player_preferences_[setup_.num_players_assigned_preference] = coin_color; + ++setup_.num_players_assigned_preference; + setup_.available_coin_colors_.erase(coin_color); +} + +void CoinState::ApplyDeployPlayersAction(Action index) { + SPIEL_CHECK_LT(index, field_.size()); + SPIEL_CHECK_TRUE(GetSymbolType(field_[index]) == SymbolType::kEmpty); + field_[index] = PlayerSymbol(setup_.num_players_on_field); + player_location_[setup_.num_players_on_field] = LocationFromIndex(index); + ++setup_.num_players_on_field; + setup_.available_positions_.erase(index); +} + +void CoinState::ApplyDeployCoinsAction(Action index) { + SPIEL_CHECK_LT(index, field_.size()); + SPIEL_CHECK_TRUE(GetSymbolType(field_[index]) == SymbolType::kEmpty); + + int coin_color = setup_.num_coins_on_field / parent_game_.NumCoinsPerColor(); + field_[index] = CoinSymbol(coin_color); + ++setup_.num_coins_on_field; + setup_.available_positions_.erase(index); + + if (setup_.num_coins_on_field == parent_game_.TotalCoins()) { + // Switch to play phase. + setup_.available_positions_.clear(); // Release memory. + setup_.available_coin_colors_.clear(); // Release memory. + cur_player_ = 0; + } +} + +void CoinState::ApplyPlayAction(Action move) { + ++total_moves_; + + Location old_loc = player_location_[cur_player_]; + SPIEL_CHECK_EQ(GetField(old_loc), PlayerSymbol(cur_player_)); + + Location new_loc = old_loc + offsets[move]; + if (InBounds(new_loc)) { + char target = GetField(new_loc); + SymbolType target_type = GetSymbolType(target); + if (target_type == SymbolType::kCoin) { + IncPlayerCoinCount(cur_player_, CoinId(target)); + } + if (target_type == SymbolType::kCoin || target_type == SymbolType::kEmpty) { + player_location_[cur_player_] = new_loc; + SetField(old_loc, kEmptySymbol); + SetField(new_loc, PlayerSymbol(cur_player_)); + } + } + cur_player_ = (cur_player_ + 1) % num_players_; +} + +void CoinState::DoApplyAction(Action action) { + switch (GetPhase()) { + case GamePhase::kAssignPreferences: + ApplyAssignPreferenceAction(action); + break; + case GamePhase::kDeployPlayers: + ApplyDeployPlayersAction(action); + break; + case GamePhase::kDeployCoins: + ApplyDeployCoinsAction(action); + break; + case GamePhase::kPlay: + ApplyPlayAction(action); + break; + } +} + +void CoinState::IncPlayerCoinCount(Player player, int coin_color) { + player_coins_[player * parent_game_.NumCoinColors() + coin_color]++; +} + +int CoinState::GetPlayerCoinCount(Player player, int coin_color) const { + return player_coins_[player * parent_game_.NumCoinColors() + coin_color]; +} + +std::string CoinState::ActionToString(Player player, Action action_id) const { + if (player == kChancePlayerId) { + return absl::StrCat(action_id); + } else { + if (action_id == kUp) { + return "up"; + } else if (action_id == kDown) { + return "down"; + } else if (action_id == kLeft) { + return "left"; + } else if (action_id == kRight) { + return "right"; + } else if (action_id == kStand) { + return "stand"; + } else { + SpielFatalError(absl::StrCat("Unexpected action ", action_id)); + } + } +} + +void CoinState::PrintCoinsCollected(std::ostream& out) const { + // Prints table with players as rows and coin_colors as columns. + out << " "; + for (int coint_color = 0; coint_color < parent_game_.NumCoinColors(); + coint_color++) { + out << CoinSymbol(coint_color) << " "; + } + out << "\n"; + for (auto player = Player{0}; player < num_players_; player++) { + out << "player" << player << " "; + for (int coint_color = 0; coint_color < parent_game_.NumCoinColors(); + coint_color++) { + out << GetPlayerCoinCount(player, coint_color) << " "; + } + out << "\n"; + } +} + +void CoinState::PrintPreferences(std::ostream& out) const { + out << "preferences="; + for (Player player = 0; player < setup_.num_players_assigned_preference; + player++) { + out << player << ":" << CoinSymbol(player_preferences_[player]) << " "; + } + out << "\n"; +} + +void CoinState::PrintBoardDelimiterRow(std::ostream& out) const { + out << "+"; + for (int c = 0; c < parent_game_.NumColumns(); c++) { + out << "-"; + } + out << "+\n"; +} + +void CoinState::PrintBoard(std::ostream& out) const { + PrintBoardDelimiterRow(out); + for (int r = 0; r < parent_game_.NumRows(); r++) { + out << "|"; + for (int c = 0; c < parent_game_.NumColumns(); c++) { + out << GetField({r, c}); + } + out << "|\n"; + } + PrintBoardDelimiterRow(out); +} + +std::string CoinState::ToString() const { + std::ostringstream out; + out << "phase=" << GamePhaseToString(GetPhase()) << "\n"; + PrintPreferences(out); + out << "moves=" << total_moves_ << "\n"; + PrintCoinsCollected(out); + PrintBoard(out); + return out.str(); +} + +bool CoinState::IsTerminal() const { + return total_moves_ >= parent_game_.EpisodeLength(); +} + +std::vector CoinState::Returns() const { + if (!IsTerminal()) { + return std::vector(num_players_, 0.0); + } + + int collected_coins = 0; + std::vector coin_count(parent_game_.NumCoinColors()); + for (int coin_color = 0; coin_color < parent_game_.NumCoinColors(); + coin_color++) { + for (auto player = Player{0}; player < num_players_; player++) { + Player player_coins = GetPlayerCoinCount(player, coin_color); + coin_count[coin_color] += player_coins; + collected_coins += player_coins; + } + } + int good_coins = 0; + for (int preference : player_preferences_) { + good_coins += coin_count[preference]; + } + const int bad_coins = collected_coins - good_coins; + std::vector rewards(num_players_); + for (auto player = Player{0}; player < num_players_; player++) { + int self_coins = coin_count[player_preferences_[player]]; + int other_coins = good_coins - self_coins; + rewards[player] = (std::pow(self_coins, 2) + std::pow(other_coins, 2) - + std::pow(bad_coins, 2)); + } + return rewards; +} + +std::unique_ptr CoinState::Clone() const { + return std::unique_ptr(new CoinState(*this)); +} + +int CoinState::CurrentPlayer() const { + return IsTerminal() ? kTerminalPlayerId : cur_player_; +} + +double CoinGame::MaxUtility() const { return std::pow(TotalCoins(), 2); } + +double CoinGame::MinUtility() const { return -MaxUtility(); } + +CoinGame::CoinGame(const GameParameters& params) + : Game(kGameType, params), + num_players_(ParameterValue("players")), + num_rows_(ParameterValue("rows")), + num_columns_(ParameterValue("columns")), + episode_length_(ParameterValue("episode_length")), + num_coin_colors_(num_players_ + + ParameterValue("num_extra_coin_colors")), + num_coins_per_color_(ParameterValue("num_coins_per_color")) { + int total_items = num_players_ + num_coin_colors_ * num_coins_per_color_; + SPIEL_CHECK_LE(total_items, num_rows_ * num_columns_); +} + +int CoinGame::MaxGameLength() const { return (episode_length_); } + +// Chance nodes must not be considered in NumDistinctActions. +int CoinGame::NumDistinctActions() const { return offsets.size(); } + +int CoinGame::MaxChanceOutcomes() const { + return std::max(num_coin_colors_, num_rows_ * num_columns_); +} + +std::unique_ptr CoinGame::NewInitialState() const { + return std::unique_ptr(new CoinState(shared_from_this())); +} + +} // namespace coin_game +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/coin_game/coin_game.h b/scenarios/bargaining/open_spiel/open_spiel/games/coin_game/coin_game.h new file mode 100644 index 0000000..ec91b0e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/coin_game/coin_game.h @@ -0,0 +1,155 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_COIN_GAME_H_ +#define OPEN_SPIEL_GAMES_COIN_GAME_H_ + +#include +#include + +#include "open_spiel/spiel.h" + +// An implementation of the 'Coin Game'. Different descriptions of this game +// exist with slightly different rules. In particular: +// a) "Modeling Others using Oneself in Multi-Agent Reinforcement Learning" +// (https://arxiv.org/abs/1802.09640) +// b) "Maintaining cooperation in complex social dilemmas using deep +// reinforcement learning" (https://arxiv.org/abs/1707.01068) +// c) "Learning with Opponent-Learning Awareness" +// (https://arxiv.org/abs/1709.04326) +// The current implementation follows the description given in a). +// +// Players live on a a grid, which also contains coins of different colors. +// Players can collect coins by moving around and walking into the coin's +// square. They can move in all directions or choose not to move at all. +// If a player would move outside of the grid or into the square of another +// player, they stay where they are. +// Each player has a preferred color. They are rewarded for collecting +// coins of their own or other players' preference, but punished for collecting +// coins that are no one's preference. Players initially only know their own +// coin preference. The initial positions of players and coins on the board is +// randomized, as well as the players' color preferences. +// Players move sequentially, in fixed order, starting with player 0. + +namespace open_spiel { +namespace coin_game { + +class CoinGame; +using Location = std::pair; + +// Different phases of the game, first setup, then play. +enum struct GamePhase { + kAssignPreferences = 0, + kDeployPlayers = 1, + kDeployCoins = 2, + kPlay = 3 +}; + +// Part of CoinState related to the setup phase. +struct Setup { + Setup(int num_rows, int num_columns, int num_coin_colors); + std::set available_coin_colors_; + std::set available_positions_; + int num_players_assigned_preference = 0; + int num_players_on_field = 0; + int num_coins_on_field = 0; +}; + +class CoinState : public State { + public: + explicit CoinState(std::shared_ptr game); + CoinState(const CoinState&) = default; + + Player CurrentPlayer() const override; + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::unique_ptr Clone() const override; + + ActionsAndProbs ChanceOutcomes() const override; + std::string ObservationString(Player player) const override; + + protected: + void DoApplyAction(Action action) override; + + private: + GamePhase GetPhase() const; + Location LocationFromIndex(int index) const; + char GetField(Location loc) const; + void SetField(Location loc, char symbol); + bool InBounds(Location loc) const; + int GetPlayerCoinCount(Player player, int coin_color) const; + void IncPlayerCoinCount(Player player, int coin_color); + + void PrintCoinsCollected(std::ostream& out) const; + void PrintPreferences(std::ostream& out) const; + void PrintBoardDelimiterRow(std::ostream& out) const; + void PrintBoard(std::ostream& out) const; + + void ApplyDeployPlayersAction(Action index); + void ApplyDeployCoinsAction(Action index); + void ApplyAssignPreferenceAction(Action coin_color); + void ApplyPlayAction(Action move); + + const CoinGame& parent_game_; + + Setup setup_; + Player cur_player_ = + kChancePlayerId; // Chance player for setting up the game. + int total_moves_ = 0; + std::vector player_preferences_; + std::vector player_location_; + // num_rows x num_columns representation of playing field. + std::vector field_; + // num_players x num_coin_colors representation of how many coins each player + // collected. + std::vector player_coins_; +}; + +class CoinGame : public Game { + public: + explicit CoinGame(const GameParameters& params); + + int NumDistinctActions() const override; + int MaxChanceOutcomes() const override; + std::unique_ptr NewInitialState() const override; + int NumPlayers() const override { return num_players_; } + double MaxUtility() const override; + double MinUtility() const override; + int MaxGameLength() const override; + // TODO: verify whether this bound is tight and/or tighten it. + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + + int NumRows() const { return num_rows_; } + int NumColumns() const { return num_columns_; } + int EpisodeLength() const { return episode_length_; } + int NumCoinColors() const { return num_coin_colors_; } + int NumCoinsPerColor() const { return num_coins_per_color_; } + int TotalCoins() const { return num_coin_colors_ * num_coins_per_color_; } + + private: + int num_players_; + int num_rows_; + int num_columns_; + int episode_length_; + int num_coin_colors_; + int num_coins_per_color_; +}; + +} // namespace coin_game +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_COIN_GAME_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/coin_game/coin_game_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/coin_game/coin_game_test.cc new file mode 100644 index 0000000..18c3369 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/coin_game/coin_game_test.cc @@ -0,0 +1,60 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/get_all_states.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace coin_game { + +void BasicCoinGameTests() { + testing::LoadGameTest("coin_game"); + testing::RandomSimTest(*LoadGame("coin_game"), 10); + testing::RandomSimTest( + *LoadGame("coin_game", + { + {"players", GameParameter(3)}, + {"rows", GameParameter(7)}, + {"columns", GameParameter(10)}, + {"num_extra_coin_colors", GameParameter(2)}, + {"episode_length", GameParameter(100)}, + {"num_coins_per_color", GameParameter(2)}, + }), + 10); +} + +void GetAllStatesTest() { + // Getting all states (on a small game) can find corner case bugs. + const std::shared_ptr game = + LoadGame("coin_game", {{"players", GameParameter(2)}, + {"rows", GameParameter(2)}, + {"columns", GameParameter(3)}, + {"num_extra_coin_colors", GameParameter(0)}, + {"episode_length", GameParameter(2)}, + {"num_coins_per_color", GameParameter(2)}}); + auto states = algorithms::GetAllStates(*game, + /*depth_limit=*/-1, + /*include_terminals=*/true, + /*include_chance_states=*/false); + SPIEL_CHECK_EQ(states.size(), 4296); +} +} // namespace coin_game +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::coin_game::BasicCoinGameTests(); + open_spiel::coin_game::GetAllStatesTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/boards100.txt b/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/boards100.txt new file mode 100644 index 0000000..db04252 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/boards100.txt @@ -0,0 +1,100 @@ +4 5 3 ECCCCBEBBCDACAEE AAACDE AACDDDE BCC 3 4 15 13 +4 5 3 BEDCDCCECADCADED AACC AABCD ABBCEEEE 5 2 11 0 +4 5 3 ABAEEEBCCABCADBA BBCDEEE BBCDDDEE AACCE 4 8 10 14 +4 5 3 EDDDEABACDEADADE EEE ABDEE BCCC 1 11 5 8 +4 5 3 BCBBCCBAEDECCEBD CCCD ACCCCCDE ABBEEE 4 11 7 12 +4 5 3 EACCEDAEEDEAEBCC AAABBD AAABCCDD BBCDDDEE 8 12 7 10 +4 5 3 ABBDACCEECBBCADD ACDDEE ADDE AAABBBEE 4 6 2 11 +4 5 3 BAADBADBDCDACEAD ABE AABCCCE CCDDEE 5 7 1 9 +4 5 3 BBBEEABDBDBDDCBC AABDEE ABBE AAAACDDE 14 4 8 7 +4 5 3 EEEBEAACACEBBBBE ABBCDDE AAABE BBBDEE 0 3 5 10 +4 5 3 DDCADEEBCDEEECDC AACCE AADE BCDE 14 0 9 3 +4 5 3 ACCCEABCDBACEBDD CCD ACDE BBBDDEE 4 8 7 9 +4 5 3 DBADDEDABBEEEBBE CCDDDE AACDDDEE BBDDDDE 10 2 4 14 +4 5 3 ABECBDBECEBBCCCD ABBC AABCCCD ACCDEEE 10 1 14 7 +4 5 3 CAAEBCEBAAAABACE AACDE ACC BBCCCEE 15 2 14 4 +4 5 3 BEABBDBAABDEBDCB BEE BCEE AABDD 9 6 12 1 +4 5 3 CDDDCBDCADBCCCAA AAACC AAD BBBBDDDE 10 7 15 5 +4 5 3 DBECCBDEAAABAEAE ABD ABBD CDDEE 8 1 9 7 +4 5 3 CBADEEABEEBCCCAC AACCCE AAEE BBCCDDD 8 5 10 1 +4 5 3 EBABDAEBCACDEEBA ABBBD ABBE ADDEE 7 8 3 0 +4 5 3 CCACDDEDDBBDDCED CCD ABBCD ABDDEE 15 5 12 14 +4 5 3 BDECBAEBBCCCBEBD ABCCC CCDE AABBDE 4 14 1 12 +4 5 3 CCDECDEDABBDBAEE BDDEE AAADEE ABCCCDD 3 14 15 4 +4 5 3 CDCCEEBADBEADDDC ACCEE AACCD BBBDDDD 1 6 11 9 +4 5 3 EEEBEBACAAACEEDB ACCD AABB BCDEE 10 15 4 13 +4 5 3 EECCEDADEDEDABBD ABBDE AABCCEE ABEEE 0 11 15 12 +4 5 3 ECCCABDAEDABEDAD BBE BBCDDDE AACDD 9 3 12 4 +4 5 3 AAAAEBECEEEDCCDE AABCDEE ABCDDE BBBCCCD 7 14 4 12 +4 5 3 ADBCACECECBCACED BCEEE CCC BCDDDE 7 9 12 1 +4 5 3 CAADECCABDBCABAB CCCDDE ABCCDE AAABDDE 0 13 15 3 +4 5 3 ABADCCBDACECEBBA BCDD AAABDEE AABCDDD 10 3 4 15 +4 5 3 ACCDDCCDAECDEECD BCDEE BCDD AAABCEE 6 2 8 0 +4 5 3 AAEECEAADBDCDEAB DEE CCDEE ABC 0 2 7 5 +4 5 3 EADEEDBABCDADCEA BBCCE ACDE AADDEEE 4 7 12 15 +4 5 3 CBCAAEBCADDAEDAB BCD ADDDEE AAAABE 7 2 0 14 +4 5 3 EDDAECEDBCAEDDCC CCC BBBDE AAADDDEE 10 9 12 2 +4 5 3 ACCBDCEACAEBDACC BCCEE CDE AABBDDD 14 6 0 9 +4 5 3 BBADCCDEAECCEABA BBCDE ABCCDDE AACDDE 10 12 0 2 +4 5 3 BBCBDDDABAAABCEC ABCDE ABC AABCE 3 6 4 11 +4 5 3 EEEDBBCBAEDAEDDC BCD ABBDE AABCCEE 9 3 14 8 +4 5 3 ACECDEAAAEADBAAE AACC BBCDE BBDEEEE 7 10 2 5 +4 5 3 DEECCBCCADBACEAB BBD BBCDE AAABCCD 10 6 0 15 +4 5 3 CCAEBECEBBDCCADE AACCD ABBBBD BCCCE 15 11 3 1 +4 5 3 CBBBAEABECBEAADA BCCDDDEE AABE AAAABBCC 15 3 13 1 +4 5 3 CDBEABAAEBAAAADC AABCC ABCDDE ABCDEE 0 9 13 3 +4 5 3 EDDEBCABDDBAAEBD ABBBCCC ABCEE ADDEE 1 6 15 13 +4 5 3 BCEAAADBDCDCAAEB AAAABBCD ABBCDD CCCCCEEE 2 12 11 14 +4 5 3 CBEDAEABCEEBDDDD AADDDDD AAABDD ACEE 14 10 7 4 +4 5 3 BDAACBCAEDDBCEDD AADD AACCDDD ABBDEEEE 15 4 2 13 +4 5 3 CEDBBACABBCBCCED AACCE ACD BBBDD 11 5 1 4 +4 5 3 ACBDDDDBDAEBBCBA CCCCDDEE BBCDDE AABBCCEE 2 15 0 9 +4 5 3 BAEEEDCCCCAEBABA ABDDEEE ADDEE AABBCC 5 3 2 11 +4 5 3 ECCCAEEEDEBEBCCE ABCE AABBCEE CCDDD 11 7 5 8 +4 5 3 EACCBABCEBCADCAE AABBDE ABDDD BCCCDEEE 7 5 2 13 +4 5 3 EACEAEBBADADECCE BBBCDE BBDDDE AAABBC 0 6 13 8 +4 5 3 ADADADABAEBBEADB AABBCCEE ABBEEE BCDDDE 12 10 15 3 +4 5 3 DCEACCCEADBBAEEA ABCCD AABCD ACDEE 13 14 9 7 +4 5 3 BADDCEBAABDBBBBA AADE ABCCCEE BBBCDEE 15 4 3 11 +4 5 3 CBEBDBADAEDEEDBD DDEE AADDDE BBCDDE 5 4 10 2 +4 5 3 BCBACBEDAADBCBEB AAAACDEE ABCCDEE AABBBCC 8 1 9 15 +4 5 3 DBCDABAABAEEADDD AAC AACE BBCDDD 1 15 12 5 +4 5 3 CBAEAEAADADBEABB BCDDDEE BBCDEEE AACCD 15 2 6 8 +4 5 3 DCACCDDCBCDCCAEE AAABBCCE AABCCCE ABCDDD 12 13 15 1 +4 5 3 DDDAEAADCECCEECC AABCCDD AAABCC CDDEE 6 10 11 8 +4 5 3 CCADDCADECAADBDB ABDEE AAB BCDDDEE 4 12 7 14 +4 5 3 BDEBAAEBBACBEADD BBBBDEEE BBCCCD ADE 8 2 10 11 +4 5 3 ABECCBAADBBCEECB AABCEE ABBBB ACD 13 12 6 0 +4 5 3 AECEDBCECBECCBDE AACCCDE CCCDD ABDEE 8 6 9 11 +4 5 3 EBCDECAEAEAABAAD BCCCCEEE ACDDDEE AABCDDD 2 11 7 13 +4 5 3 ABDCAACABEABCDDB ACD ACCDE ABBBC 7 11 13 1 +4 5 3 DEDEACADBCBBDAAE BBBEEE BBE BBCCDD 8 1 2 0 +4 5 3 BECCABEEABDBAEAE BCDDE AADDEE AABBBC 7 4 12 14 +4 5 3 BACDEBDBDDCEBADB ABBBC BBDDD CCDDDE 8 6 0 15 +4 5 3 AAAAADBDDCECABDE CDDDD BCDDEE AACEE 0 9 7 1 +4 5 3 EDCAADBBDDBBDDEA ADDE DDD ABBBCC 3 8 13 6 +4 5 3 AAAEEDDCECAABDDB BBCDDE CDDE AABBBDE 5 7 8 10 +4 5 3 BAAECBEEEDEDAEBE ABBBCDDD ACCDD CCCE 2 6 10 11 +4 5 3 DEADCACCADBDBECC BCD BDDDD AAABCC 4 5 15 12 +4 5 3 CCADBEEAEDDDDACD ACCD AABC ABBCDDE 14 7 8 11 +4 5 3 ECBBDECECEECBDCE ABBEEE BCDE ACCCEE 3 10 13 0 +4 5 3 EBBEABDCAAAEDABD AAABE AAB BBDDDE 6 14 7 12 +4 5 3 BEBBAADEBBCABABD AACDE ACCDE BBBDDDE 5 1 15 9 +4 5 3 BACBBEAADBDCECAE ABCCCDD BCDDEE ACCCEEE 0 7 5 13 +4 5 3 EBCCDDBAEADEEDDE CCD ABDD ACEE 5 7 0 8 +4 5 3 BCDACCACBDCBDDDB BBCCCE AAABCCEE AAADD 1 12 8 10 +4 5 3 EEEAEBDBEDCEDBCE ABCCDE DDD BEEE 8 7 10 2 +4 5 3 EBBEEBEECBECDADB BBCCDDDD AACCDD BEEE 5 14 15 11 +4 5 3 ECADBBCBBBBEEAEC AACCDEE BBD AAABBDDE 2 14 3 13 +4 5 3 DDADCEACADBCEEED DDDDDEEE AACDD AABCE 4 15 5 3 +4 5 3 ECACACCDBCABCBEB ABBBDD BCDDE BCCDEE 4 13 14 7 +4 5 3 DBAEADDCDEECDBEE ABBCE AABBCD ABDDDDE 8 14 15 2 +4 5 3 CDCEBBCEDDECBCDE ABDDDEE ABCDDD ABBCCCCE 1 7 11 12 +4 5 3 AEADEDDAECCDCCCB ABCCD CCCDEEE AABBBDDE 4 1 12 11 +4 5 3 CCAADDBDDEDBCADC BDDE ABCE AAACCDD 1 0 14 3 +4 5 3 ADAEBECCEEDCDEBD AABBBBCD AABBEE CCE 4 7 15 14 +4 5 3 DCDBCBBBADEBBDED AABBCCD BBBCEE ABDEEEE 11 0 3 12 +4 5 3 ABDADEECDCAABEAA BBBCEE ACCDDDE AAACC 3 7 5 0 +4 5 3 DBEACDDAADCCDDDC AAC ABCE ADDDEEEE 8 10 0 9 +4 5 3 ADBAAECDACDDDACE BDDDDD AABDDEE AAABBCCE 0 12 15 3 +4 5 3 AAEDCABCDAAABBEE AACCCD AAAC BBCEE 14 7 0 2 diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/colored_trails.cc b/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/colored_trails.cc new file mode 100644 index 0000000..593440a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/colored_trails.cc @@ -0,0 +1,913 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/colored_trails/colored_trails.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace colored_trails { + +namespace { + +// Facts about the game +const GameType kGameType{/*short_name=*/"colored_trails", + /*long_name=*/"Colored Trails", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/3, + /*min_num_players=*/3, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"boards_file", GameParameter("")}, + {"board_size", GameParameter(kDefaultBoardSize)}, + {"num_colors", GameParameter(kDefaultNumColors)}, + {"players", GameParameter(kDefaultNumPlayers)}}}; + +static std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new ColoredTrailsGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +bool IsLegalTrade( + const Board& board, const Trade& trade, + const std::vector& proposer_chips, + const std::vector& responder_chips) { + if (trade.giving.empty() || trade.receiving.empty()) { + // pass trade is always legal. + return true; + } + + for (int i = 0; i < board.num_colors; ++i) { + if (trade.giving[i] > proposer_chips[i]) { + return false; + } + + if (trade.receiving[i] > responder_chips[i]) { + return false; + } + } + + // Try to reduce the trade, if it's not valid or not equal to the same trade + // then not a legal trade. + Trade copy = trade; + bool valid = copy.reduce(); + return (valid && copy == trade); +} + + +std::vector GenerateLegalActionsForChips( + const ColoredTrailsGame* game, + const Board& board, + const std::vector& player_chips, + const std::vector& responder_chips) { + std::vector actions; + ChipComboIterator proposer_iter(player_chips); + while (!proposer_iter.IsFinished()) { + std::vector proposer_chips = proposer_iter.Next(); + ChipComboIterator receiver_iter(responder_chips); + while (!receiver_iter.IsFinished()) { + std::vector receiver_chips = receiver_iter.Next(); + Trade trade(proposer_chips, receiver_chips); + if (IsLegalTrade(board, trade, proposer_chips, responder_chips)) { + int trade_id = game->LookupTradeId(trade.ToString()); + actions.push_back(trade_id); + } + } + } + // Sort and remove duplicates. + absl::c_sort(actions); + auto last = std::unique(actions.begin(), actions.end()); + actions.erase(last, actions.end()); + + // Add pass trade. + actions.push_back(game->PassAction()); + return actions; +} + +} // namespace + +Board::Board() + : board(size * size, -1), + num_chips(num_players, -1), + positions(num_players + 1, -1) { + init(); +} + +Board::Board(int _size, int _num_colors, int _num_players) + : size(_size), + num_colors(_num_colors), + num_players(_num_players), + board(size * size, -1), + num_chips(num_players, -1), + positions(num_players + 1, -1) { + init(); +} + +Board Board::Clone() const { + Board clone(size, num_colors, num_players); + clone.board = board; + clone.num_chips = num_chips; + clone.chips = chips; + clone.positions = positions; + return clone; +} + + +void Board::init() { + chips.reserve(num_players); + for (int p = 0; p < num_players; ++p) { + chips.push_back(std::vector(num_colors, 0)); + } +} + +bool Board::InBounds(int row, int col) const { + return (row >= 0 && row < size && col >= 0 && col < size); +} + +void Board::ApplyTrade(std::pair players, const Trade& trade) { + if (trade.giving.empty()) { + // This is a pass, so don't change the board. + return; + } + SPIEL_CHECK_EQ(trade.giving.size(), num_colors); + SPIEL_CHECK_EQ(trade.receiving.size(), num_colors); + for (int i = 0; i < num_colors; ++i) { + SPIEL_CHECK_LE(trade.giving[i], chips[players.first][i]); + SPIEL_CHECK_LE(trade.receiving[i], chips[players.second][i]); + chips[players.first][i] -= trade.giving[i]; + chips[players.second][i] += trade.giving[i]; + chips[players.first][i] += trade.receiving[i]; + chips[players.second][i] -= trade.receiving[i]; + } +} + +std::string Board::ToString() const { + std::string str = absl::StrCat(size, " ", num_colors, " ", num_players, " "); + for (int i = 0; i < board.size(); ++i) { + str.push_back(ColorToChar(board[i])); + } + absl::StrAppend(&str, " "); + for (Player p = 0; p < num_players; ++p) { + absl::StrAppend(&str, ComboToString(chips[p]), " "); + } + absl::StrAppend(&str, absl::StrJoin(positions, " ")); + return str; +} + +std::string Board::PrettyBoardString() const { + std::string str; + for (int r = 0; r < size; ++r) { + for (int c = 0; c < size; ++c) { + str.push_back(ColorToChar(board[r * size + c])); + } + str.push_back('\n'); + } + return str; +} + +void Board::ParseFromLine(const std::string& line) { + // Example: 4 5 3 AAEDCABCDAAABBEE AACCCD AAAC BBCEE 14 7 0 2 + std::vector parts = absl::StrSplit(line, ' '); + SPIEL_CHECK_EQ(parts.size(), 3 + 2 * num_players + 2); + + int _size, _colors, _players; + SPIEL_CHECK_TRUE(absl::SimpleAtoi(parts[0], &_size)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(parts[1], &_colors)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(parts[2], &_players)); + SPIEL_CHECK_EQ(_size, size); + SPIEL_CHECK_EQ(_colors, num_colors); + SPIEL_CHECK_EQ(_players, num_players); + + SPIEL_CHECK_EQ(parts[3].size(), size * size); + for (int i = 0; i < parts[3].size(); ++i) { + board[i] = CharToColor(parts[3].at(i)); + } + + for (Player p = 0; p < num_players; ++p) { + num_chips[p] = parts[4 + p].length(); + for (int i = 0; i < parts[4 + p].length(); ++i) { + int chip_color = CharToColor(parts[4 + p].at(i)); + chips[p][chip_color]++; + } + } + + for (int i = 0; i < num_players + 1; ++i) { + SPIEL_CHECK_TRUE( + absl::SimpleAtoi(parts[4 + num_players + i], &positions[i])); + } +} + +std::string Trade::ToString() const { + if (giving.empty() || receiving.empty()) { + return "Pass trade."; + } + return absl::StrCat(ComboToString(giving), " for ", ComboToString(receiving)); +} + +int Trade::DistanceTo(const Trade& other) const { + int sum = 0; + if (other.giving.empty() || other.receiving.empty()) { + // Pass trade is the furthest possible distance. + return kDefaultTradeDistanceUpperBound + 1; + } + for (int i = 0; i < giving.size(); ++i) { + sum += std::abs(other.giving[i] - giving[i]); + sum += std::abs(other.receiving[i] - receiving[i]); + } + return sum; +} + +bool Trade::reduce() { + for (int i = 0; i < giving.size(); ++i) { + int min_val = std::min(giving[i], receiving[i]); + giving[i] -= min_val; + receiving[i] -= min_val; + } + return (std::accumulate(giving.begin(), giving.end(), 0) > 0 && + std::accumulate(receiving.begin(), receiving.end(), 0) > 0); +} + +Trade::Trade(const std::vector _giving, const std::vector _receiving) + : giving(_giving), receiving(_receiving) {} + +Trade::Trade(const Trade& other) + : giving(other.giving), receiving(other.receiving) {} + +std::string ColoredTrailsState::ActionToString(Player player, + Action move_id) const { + if (player == kChancePlayerId) { + return absl::StrCat("Chance outcome ", move_id); + } else if (player < kResponderId) { + return absl::StrCat("Proposer ", player, ": ", + parent_game_->LookupTrade(move_id).ToString()); + } else if (player == kResponderId) { + if (move_id == num_distinct_actions_ - 3) { + return "Deal: trade with proposer 0"; + } else if (move_id == num_distinct_actions_ - 2) { + return "Deal: trade with proposer 1"; + } else if (move_id == num_distinct_actions_ - 1) { + return "No Deal!"; + } else { + SpielFatalError(absl::StrCat("move_id unrecognized: ", move_id)); + } + } else { + SpielFatalError(absl::StrCat("Player and move case unrecognized: ", player, + " ", move_id)); + } +} + +bool ColoredTrailsState::IsTerminal() const { + return cur_player_ == kTerminalPlayerId; +} + +std::vector ColoredTrailsState::Returns() const { return returns_; } + +std::string ColoredTrailsState::ObservationString(Player player) const { + return InformationStateString(player); +} + +std::string ColoredTrailsState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::string str = + absl::StrCat(board_.PrettyBoardString(), "\n"); + absl::StrAppend(&str, "Player: ", player, "\nPos: ", + absl::StrJoin(board_.positions, " "), "\n"); + if (player < kResponderId) { + absl::StrAppend(&str, "My chips: ", ComboToString(board_.chips[player]), + "\n"); + absl::StrAppend(&str, "Responder chips: ", + ComboToString(board_.chips[kResponderId]), "\n"); + } else if (player == kResponderId) { + absl::StrAppend(&str, "P0 chips: ", ComboToString(board_.chips[0]), "\n"); + absl::StrAppend(&str, "P1 chips: ", ComboToString(board_.chips[1]), "\n"); + if (CurrentPlayer() == kResponderId) { + SPIEL_CHECK_EQ(proposals_.size(), 2); + absl::StrAppend(&str, "Proposal 0: ", proposals_[0].ToString(), "\n"); + absl::StrAppend(&str, "Proposal 1: ", proposals_[1].ToString(), "\n"); + } + } else { + SpielFatalError(absl::StrCat("Bad player id: ", player)); + } + return str; +} + +void ColoredTrailsState::ObservationTensor(Player player, + absl::Span values) const { + InformationStateTensor(player, values); +} + +std::unique_ptr ColoredTrailsState::ResampleFromInfostate( + int player_id, std::function rng) const { + std::vector> candidates; + const std::vector& all_boards = parent_game_->AllBoards(); + + for (int o = 0; o < all_boards.size(); ++o) { + if (board_.ToString() != all_boards[o].ToString()) { + continue; + } + + std::unique_ptr candidate_state = parent_game_->NewInitialState(); + candidate_state->ApplyAction(o); + + if (player_id == 0) { + if (candidate_state->InformationStateString(0) == + InformationStateString(0)) { + candidates.push_back(std::move(candidate_state)); + } + } else if (player_id == 1) { + // Enumerate legal moves. + for (Action action : candidate_state->LegalActions()) { + std::unique_ptr candidate_child = candidate_state->Child(action); + if (candidate_child->InformationStateString(1) == + InformationStateString(1)) { + candidates.push_back(std::move(candidate_child)); + } else { + // Player 0's move is hidden. No need to keep trying actions if P1's + // infostate doesn't match. + break; + } + } + } else { + SPIEL_CHECK_EQ(player_id, 2); + SPIEL_CHECK_EQ(History().size(), 3); + Action p0_action = History()[1]; + Action p1_action = History()[2]; + // Receiver sees everything, so replay the moves. + std::vector legal_actions = candidate_state->LegalActions(); + if (absl::c_find(legal_actions, p0_action) != legal_actions.end()) { + candidate_state->ApplyAction(p0_action); + legal_actions = candidate_state->LegalActions(); + if (absl::c_find(legal_actions, p1_action) != legal_actions.end()) { + candidate_state->ApplyAction(p1_action); + candidates.push_back(std::move(candidate_state)); + } + } + } + } + + SPIEL_CHECK_GE(candidates.size(), 1); + if (candidates.size() == 1) { + return std::move(candidates[0]); + } else { + int idx = static_cast(rng() * candidates.size()); + SPIEL_CHECK_LE(idx, candidates.size()); + return std::move(candidates[idx]); + } +} + +void ColoredTrailsState::InformationStateTensor( + Player player, absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + SPIEL_CHECK_EQ(values.size(), game_->InformationStateTensorSize()); + std::fill(values.begin(), values.end(), 0); + + if (IsChanceNode()) { + // No observations at chance nodes. + return; + } + + int offset = 0; + + // Player. + values[player] = 1; + offset += num_players_; + + // Terminal? + if (IsTerminal()) { + values[offset] = 1; + } + offset += 1; + + // The board + for (int i = 0; i < board_.board.size(); ++i) { + values[offset + board_.board[i]] = 1; + offset += board_.num_colors; + } + + // Positions + for (int i = 0; i < board_.positions.size(); ++i) { + values[offset + board_.positions[i]] = 1; + offset += board_.size * board_.size; + } + + // Chips. + std::array*, 3> chips_ptrs; + std::vector zeros(board_.num_colors, 0); + if (player < kResponderId) { + chips_ptrs[0] = &board_.chips[player]; + chips_ptrs[1] = &zeros; + chips_ptrs[2] = &board_.chips[kResponderId]; + } else { + chips_ptrs[0] = &board_.chips[0]; + chips_ptrs[1] = &board_.chips[1]; + chips_ptrs[2] = &board_.chips[kResponderId]; + } + for (int c = 0; c < 3; ++c) { + for (int i = 0; i < board_.num_colors; ++i) { + for (int j = 0; j <= chips_ptrs[c]->at(i); ++j) { + values[offset + j] = 1; + } + offset += (kNumChipsUpperBound + 1); + } + } + + // Proposals + if (player == kResponderId && CurrentPlayer() == kResponderId) { + SPIEL_CHECK_EQ(proposals_.size(), 2); + for (int p : {0, 1}) { + if (IsPassTrade(proposals_[p])) { + chips_ptrs[0] = &zeros; + chips_ptrs[1] = &zeros; + } else { + chips_ptrs[0] = &(proposals_[p].giving); + chips_ptrs[1] = &(proposals_[p].receiving); + } + + for (int c = 0; c < 2; ++c) { + for (int i = 0; i < board_.num_colors; ++i) { + for (int j = 0; j <= chips_ptrs[c]->at(i); ++j) { + values[offset + j] = 1; + } + offset += (kNumChipsUpperBound + 1); + } + } + } + } else { + // Proposers have no observations of the proposals. + // Responder doesn't observe the chips until its their turn. + offset += (kNumChipsUpperBound + 1) * board_.num_colors * 2 * + (num_players_ - 1); + } + SPIEL_CHECK_EQ(offset, values.size()); +} + +ColoredTrailsState::ColoredTrailsState(std::shared_ptr game, + int board_size, int num_colors) + : State(game), + cur_player_(kChancePlayerId), + parent_game_(down_cast(game.get())), + board_(board_size, num_colors, game->NumPlayers()), + returns_(game->NumPlayers(), 0) {} + +int ColoredTrailsState::CurrentPlayer() const { + return IsTerminal() ? kTerminalPlayerId : cur_player_; +} + +void ColoredTrailsState::DoApplyAction(Action action) { + if (IsChanceNode()) { + const std::vector& all_boards = parent_game_->AllBoards(); + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LT(action, all_boards.size()); + board_ = all_boards[action]; + cur_player_ = 0; + } else if (cur_player_ < kResponderId) { + proposals_.push_back(parent_game_->LookupTrade(action)); + cur_player_++; + + // Special case when using SetChipsAndProposals, check the future_trade_. + // If it's now the second player, and there's a future trade queued, apply + // it. + if (cur_player_ == 1 && + (!future_trade_.giving.empty() || !future_trade_.receiving.empty())) { + proposals_.push_back(future_trade_); + cur_player_++; + } + } else { + // Base scores. + SPIEL_CHECK_EQ(cur_player_, kResponderId); + for (Player p = 0; p < board_.num_players; ++p) { + returns_[p] = Score(p, board_).first; + } + + if (action == parent_game_->ResponderTradeWithPlayerAction(0)) { + if (!IsPassTrade(proposals_[0])) { + board_.ApplyTrade({0, kResponderId}, proposals_[0]); + } + } else if (action == parent_game_->ResponderTradeWithPlayerAction(1)) { + if (!IsPassTrade(proposals_[1])) { + board_.ApplyTrade({1, kResponderId}, proposals_[1]); + } + } else if (action == parent_game_->PassAction()) { + // No trade. + } else { + std::string error = absl::StrCat("Invalid action: ", action, + parent_game_->ActionToString(kResponderId, action), "\n", + ToString()); + SpielFatalErrorWithStateInfo(error, *parent_game_, *this); + } + + // Gain is final score minus base score. + for (Player p = 0; p < board_.num_players; ++p) { + returns_[p] = Score(p, board_).first - returns_[p]; + } + + cur_player_ = kTerminalPlayerId; + } +} + +bool ColoredTrailsState::IsPassTrade(const Trade& trade) const { + return (trade.giving.empty() && trade.receiving.empty()); +} + +bool ColoredTrailsState::IsLegalTrade(Player proposer, + const Trade& trade) const { + return colored_trails::IsLegalTrade(board_, trade, board_.chips[proposer], + board_.chips[kResponderId]); +} + +std::vector ColoredTrailsState::LegalActionsForChips( + const std::vector& player_chips, + const std::vector& responder_chips) const { + // First, check the cache. + std::string key = absl::StrCat(ComboToString(player_chips), " ", + ComboToString(responder_chips)); + std::vector actions = parent_game_->LookupTradesCache(key); + if (!actions.empty()) { + return actions; + } + + actions = GenerateLegalActionsForChips(parent_game_, board_, player_chips, + responder_chips); + + // Add these to the cache. + parent_game_->AddToTradesCache(key, actions); + return actions; +} + +std::vector ColoredTrailsState::LegalActions() const { + if (IsChanceNode()) { + return LegalChanceOutcomes(); + } else if (IsTerminal()) { + return {}; + } else if (cur_player_ < kResponderId) { + return LegalActionsForChips(board_.chips[cur_player_], + board_.chips[kResponderId]); + } else { + SPIEL_CHECK_EQ(cur_player_, kResponderId); + // Last three actions correspond to "trade with 0", "trade with 1", and + // "no trade". + return {parent_game_->ResponderTradeWithPlayerAction(0), + parent_game_->ResponderTradeWithPlayerAction(1), + parent_game_->PassAction()}; + } +} + +std::vector> ColoredTrailsState::ChanceOutcomes() + const { + SPIEL_CHECK_TRUE(IsChanceNode()); + std::vector> outcomes; + const int num_boards = parent_game_->AllBoards().size(); + outcomes.reserve(num_boards); + double uniform_prob = 1.0 / num_boards; + for (int i = 0; i < num_boards; ++i) { + outcomes.push_back({i, uniform_prob}); + } + return outcomes; +} + +std::string ColoredTrailsState::ToString() const { + if (IsChanceNode()) { + return "Initial chance node"; + } + + std::string str; + if (MoveNumber() > 0) { + absl::StrAppend(&str, "Move Number: ", MoveNumber(), "\n", + board_.PrettyBoardString(), "\n"); + for (Player p = 0; p < num_players_; ++p) { + absl::StrAppend(&str, "P", p, " chips: ", ComboToString(board_.chips[p]), + "\n"); + } + } + + absl::StrAppend(&str, "Pos: ", absl::StrJoin(board_.positions, " "), "\n"); + for (int i = 0; i < proposals_.size(); ++i) { + absl::StrAppend(&str, "Proposal ", i, ": ", proposals_[i].ToString(), "\n"); + } + return str; +} + +std::unique_ptr ColoredTrailsState::Clone() const { + return std::unique_ptr(new ColoredTrailsState(*this)); +} + +void ColoredTrailsState::SetChipsAndTradeProposal( + Player player, std::vector chips, Trade trade, + std::vector& rng_rolls) { + // First, check the chips. + int rng_idx = 0; + int num_chips = std::accumulate(chips.begin(), chips.end(), 0); + + while (num_chips < kNumChipsLowerBound) { + std::vector indices; + for (int i = 0; i < chips.size(); i++) { + if (chips[i] == 0) { + indices.push_back(i); + } + } + SPIEL_CHECK_LT(rng_idx, rng_rolls.size()); + int selected_idx = + indices[static_cast(rng_rolls[rng_idx] * indices.size())]; + chips[selected_idx]++; + rng_idx++; + num_chips = std::accumulate(chips.begin(), chips.end(), 0); + } + + while (num_chips > kNumChipsUpperBound) { + std::vector indices; + for (int i = 0; i < chips.size(); i++) { + if (chips[i] > 0) { + indices.push_back(i); + } + } + SPIEL_CHECK_LT(rng_idx, rng_rolls.size()); + int selected_idx = + indices[static_cast(rng_rolls[rng_idx] * indices.size())]; + chips[selected_idx]--; + rng_idx++; + num_chips = std::accumulate(chips.begin(), chips.end(), 0); + } + + board_.chips[player] = chips; + trade.reduce(); + + // Now check if the Trade is legal. If not, chose one of the closest legal + // ones in edit distance + if (!IsLegalTrade(player, trade)) { + std::vector closest_trades; + int lowest_distance = kDefaultTradeDistanceUpperBound + 100; + std::vector legal_actions = + LegalActionsForChips(chips, board_.chips[kResponderId]); + for (Action action : legal_actions) { + const Trade& legal_trade = parent_game_->LookupTrade(action); + int dist = trade.DistanceTo(legal_trade); + if (dist == lowest_distance) { + closest_trades.push_back(legal_trade); + } else if (dist < lowest_distance) { + lowest_distance = dist; + closest_trades = {legal_trade}; + } + } + + if (closest_trades.empty()) { + std::cout << ToString() << std::endl; + std::cout << "Trade: " << trade.ToString() << std::endl; + } + + SPIEL_CHECK_GT(closest_trades.size(), 0); + if (closest_trades.size() == 1) { + trade = closest_trades[0]; + } else { + trade = closest_trades[static_cast(rng_rolls[rng_idx] * + closest_trades.size())]; + rng_idx++; + } + } + + if (player == 0) { + SPIEL_CHECK_NE(cur_player_, 0); + proposals_[0] = trade; + } else if (player == 1) { + SPIEL_CHECK_NE(cur_player_, 1); + if (cur_player_ == 0) { + future_trade_ = trade; + } else { + proposals_[1] = trade; + } + } +} + +ColoredTrailsGame::ColoredTrailsGame(const GameParameters& params) + : Game(kGameType, params), + num_colors_(ParameterValue("num_colors", kDefaultNumColors)), + board_size_(ParameterValue("board_size", kDefaultBoardSize)), + num_players_(ParameterValue("players", kDefaultNumPlayers)) { + // Only support the 3-player game. + SPIEL_CHECK_EQ(num_players_, kDefaultNumPlayers); + + std::string filename = ParameterValue("boards_file", ""); + if (!filename.empty()) { + ParseBoardsFile(&all_boards_, filename, num_colors_, board_size_, + num_players_); + } else { + ParseBoardsString(&all_boards_, kDefaultBoardsString, num_colors_, + board_size_, num_players_); + } + InitTradeInfo(&trade_info_, num_colors_); +} + +int ColoredTrailsGame::NumDistinctActions() const { + return trade_info_.possible_trades.size() + 3; +} + +std::vector ColoredTrailsGame::ObservationTensorShape() const { + return InformationStateTensorShape(); +} + +std::vector ColoredTrailsGame::InformationStateTensorShape() const { + return { + num_players_ + // Who is observing + 1 + // is it terminal? + board_size_ * board_size_ * num_colors_ + // board + board_size_ * board_size_ * (num_players_ + 1) + // player + flag positions + // thermometer of bits representation of the chips (proposers + receiver) + (kNumChipsUpperBound + 1) * num_colors_ * 3 + + // thermometer of bits representation of the proposals + // 0 to upperboard of chip combos for each in X for Y, and max two proposals + (kNumChipsUpperBound + 1) * num_colors_ * 2 * (num_players_ - 1) + }; +} + +std::vector ColoredTrailsGame::LookupTradesCache( + const std::string& key) const { + const auto& iter = trades_cache_.find(key); + if (iter == trades_cache_.end()) { + return {}; + } + return iter->second; +} + +void ColoredTrailsGame::AddToTradesCache(const std::string& key, + std::vector& actions) const { + trades_cache_[key] = actions; +} + +bool CheckBoard(const Board& board) { + std::vector base_scores(board.num_players); + int min_score = board.size * 100; + int max_score = board.size * -100; + + for (Player player = 0; player < board.num_players; ++player) { + std::pair score_and_solved = Score(player, board); + if (score_and_solved.second) { + // Cannot be solvable without negotiation. + return false; + } + base_scores[player] = score_and_solved.first; + min_score = std::min(min_score, base_scores[player]); + max_score = std::max(max_score, base_scores[player]); + } + + if (max_score - min_score > kBaseScoreEpsilon) { + return false; + } + + // Now check that there exist two trades: + // - one between player 0 and 2, such that both can reach the goal + // - one between player 1 and 2, such that both can reach the goal + for (int proposer : {0, 1}) { + bool found_trade = false; + ChipComboIterator iter1(board.chips[proposer]); + while (!found_trade && !iter1.IsFinished()) { + std::vector combo1 = iter1.Next(); + ChipComboIterator iter2(board.chips[2]); + while (!found_trade && !iter2.IsFinished()) { + std::vector combo2 = iter2.Next(); + // Do the trade and check if both can reach the goal. + Board board_copy = board; + Trade trade(combo1, combo2); + board_copy.ApplyTrade({proposer, 2}, trade); + std::pair prop_score_and_goal = Score(proposer, board_copy); + if (prop_score_and_goal.second) { + std::pair rec_score_and_goal = Score(2, board_copy); + if (rec_score_and_goal.second) { + found_trade = true; + } + } + } + } + if (!found_trade) { + return false; + } + } + + return true; +} + +bool CheckBoardForProposer(const Board& board, Player proposer) { + std::vector base_scores(board.num_players); + int min_score = board.size * 100; + int max_score = board.size * -100; + + std::pair score_and_solved = Score(proposer, board); + if (score_and_solved.second) { + // Cannot be solvable without negotiation. + return false; + } + base_scores[proposer] = score_and_solved.first; + min_score = std::min(min_score, base_scores[proposer]); + max_score = std::max(max_score, base_scores[proposer]); + + if (max_score - min_score > kBaseScoreEpsilon) { + return false; + } + + // Now check that there exist two trades: + bool found_trade = false; + ChipComboIterator iter1(board.chips[proposer]); + while (!found_trade && !iter1.IsFinished()) { + std::vector combo1 = iter1.Next(); + ChipComboIterator iter2(board.chips[2]); + while (!found_trade && !iter2.IsFinished()) { + std::vector combo2 = iter2.Next(); + // Do the trade and check if both can reach the goal. + Board board_copy = board; + Trade trade(combo1, combo2); + board_copy.ApplyTrade({proposer, 2}, trade); + std::pair prop_score_and_goal = Score(proposer, board_copy); + if (prop_score_and_goal.second) { + std::pair rec_score_and_goal = Score(2, board_copy); + if (rec_score_and_goal.second) { + found_trade = true; + } + } + } + } + if (!found_trade) { + return false; + } + + return true; +} + + +std::pair ColoredTrailsGame::SampleRandomBoardCompletion( + int seed, const Board& board, Player player) const { + std::mt19937 rng(seed); + Board new_board = board; + const int max_tries = 1000; + int tries = 0; + + do { + tries += 1; + for (int i = 0; i < new_board.chips[player].size(); ++i) { + new_board.chips[player][i] = 0; + } + int width = kNumChipsUpperBound - kNumChipsLowerBound + 1; + new_board.num_chips[player] = + kNumChipsLowerBound + absl::Uniform(rng, 0, width); + for (int i = 0; i < new_board.num_chips[player]; ++i) { + int chip = absl::Uniform(rng, 0, new_board.num_colors); + new_board.chips[player][chip]++; + } + } while (!CheckBoardForProposer(new_board, player) && tries < max_tries); + SPIEL_CHECK_LT(tries, max_tries); + + std::string key = absl::StrCat(ComboToString(new_board.chips[player]), " ", + ComboToString(new_board.chips[kResponderId])); + std::vector actions = LookupTradesCache(key); + if (actions.empty()) { + actions = GenerateLegalActionsForChips(this, new_board, + new_board.chips[player], + new_board.chips[kResponderId]); + AddToTradesCache(key, actions); + } + + Action action = actions[absl::Uniform(rng, 0, actions.size())]; + return {new_board, action}; +} + + +} // namespace colored_trails +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/colored_trails.h b/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/colored_trails.h new file mode 100644 index 0000000..235d62c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/colored_trails.h @@ -0,0 +1,302 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_COLORED_TRAILS_H_ +#define OPEN_SPIEL_GAMES_COLORED_TRAILS_H_ + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/spiel.h" + +// A simple bargaining game [1]. +// +// This code currently implements the three-player imperfect information game +// from these papers [2,3] but we would like this to be a generic implementation +// that can handle the several variants of the classic game. +// +// [1] Ya'akov Gal, Barbara Grosz, Sarit Kraus, Avi Pfeffer, and Stuart Shieber. +// 2010. Agent decision-making in open mixed networks. Artificial +// Intelligence 174(18): 1460-1480. +// [2] de Jong et al. '11, Metastrategies in the Colored Trails Game +// https://www.ifaamas.org/Proceedings/aamas2011/papers/C4_R57.pdf +// [3] S. G. Ficici and A. Pfeffer. Modeling how Humans Reason about Others with +// Partial Information. In Proceedings of the Seventh International +// Conference on Autonomous Agents and Multiagent Systems (AAMAS), 2008. +// +// Parameters: +// "boards_file" string The file containing the boards (default: "") +// "board_size" int number of rows / columns (default = 4) +// "num_colors" int number of colors (default = 5) +// "players" int number of players (default = 3) + +namespace open_spiel { +namespace colored_trails { + +constexpr int kResponderId = 2; + +constexpr int kDefaultNumPlayers = 3; +constexpr int kDefaultNumColors = 5; +constexpr int kDefaultBoardSize = 4; // 4x4 + +// [3] states that each player receive between 4 and 8 chips, but [2] shows +// instances with only 3 chips. +constexpr int kNumChipsLowerBound = 3; +constexpr int kNumChipsUpperBound = 8; + +constexpr int kLeftoverChipScore = 10; +constexpr int kFlagPenaltyPerCell = -25; + +// How much distance can there be between trades? +constexpr int kDefaultTradeDistanceUpperBound = + kDefaultNumColors * kNumChipsUpperBound; + +// Minimum gain required when generating boards. +constexpr int kBaseScoreEpsilon = 20; + + + +// Default 10-board database used for tests, etc. See +// colored_trails/boards100.txt and create your own using +// colored_trails/colored_trails_board_generator. +constexpr const char* kDefaultBoardsString = + "4 5 3 DEADCACCADBDBECC BCD BDDDD AAABCC 4 5 15 12\n" + "4 5 3 CCADBEEAEDDDDACD ACCD AABC ABBCDDE 14 7 8 11\n" + "4 5 3 ECBBDECECEECBDCE ABBEEE BCDE ACCCEE 3 10 13 0\n" + "4 5 3 EBBEABDCAAAEDABD AAABE AAB BBDDDE 6 14 7 12\n" + "4 5 3 BEBBAADEBBCABABD AACDE ACCDE BBBDDDE 5 1 15 9\n" + "4 5 3 BACBBEAADBDCECAE ABCCCDD BCDDEE ACCCEEE 0 7 5 13\n" + "4 5 3 EBCCDDBAEADEEDDE CCD ABDD ACEE 5 7 0 8\n" + "4 5 3 BCDACCACBDCBDDDB BBCCCE AAABCCEE AAADD 1 12 8 10\n" + "4 5 3 EEEAEBDBEDCEDBCE ABCCDE DDD BEEE 8 7 10 2\n" + "4 5 3 EBBEEBEECBECDADB BBCCDDDD AACCDD BEEE 5 14 15 11\n"; + +class ColoredTrailsGame; // Forward definition necessary for parent pointer. + +struct Trade { + std::vector giving; + std::vector receiving; + Trade() {} + Trade(const std::vector _giving, const std::vector _receiving); + Trade(const Trade& other); + std::string ToString() const; + int DistanceTo(const Trade& other) const; + bool operator==(const Trade& other) const { + return (giving == other.giving && receiving == other.receiving); + } + bool reduce(); // remove redundant chip exchanges from both sides + // returns whether it's a valid trade (nonempty giving + // and receiving) +}; + +struct TradeInfo { + std::vector> chip_combinations; + std::vector> possible_trades; + absl::flat_hash_map trade_str_to_id; +}; + +struct Board { + int size = kDefaultBoardSize; + int num_colors = kDefaultNumColors; + int num_players = kDefaultNumPlayers; + std::vector board; + std::vector num_chips; + std::vector> chips; + std::vector positions; // Flag position is at positions[num_players] + + Board(); + Board(int _size, int _num_colors, int _num_players); + + Board Clone() const; + void ParseFromLine(const std::string& line); + bool InBounds(int row, int col) const; + void init(); + std::string ToString() const; + std::string PrettyBoardString() const; + void ApplyTrade(std::pair players, const Trade& trade); +}; + +class ChipComboIterator { + public: + ChipComboIterator(const std::vector& chips); + bool IsFinished() const; + std::vector Next(); + + private: + std::vector chips_; + std::vector cur_combo_; +}; + +class ColoredTrailsState : public State { + public: + ColoredTrailsState(std::shared_ptr game, int board_size, + int num_colors); + ColoredTrailsState(const ColoredTrailsState&) = default; + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action move_id) const override; + std::vector> ChanceOutcomes() const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + std::string InformationStateString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::string ObservationString(Player player) const override; + + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + + std::unique_ptr ResampleFromInfostate( + int player_id, std::function rng) const override; + + // Override the current chips and trade proposal for the specified player. + // If the chips is an illegal allotment, it is randomly matched to the + // neareast legal one. If the trade is illegal as a result, it is replaced + // by one of the closes legal trades in edit distance. + // If called on Player 1's turn to set Player 2's values, then the + // future_trade_ is set and applied automatically. + // Finally, rng_rolls is several random numbers in [0,1) used for random + // decisions. + void SetChipsAndTradeProposal(Player player, std::vector chips, + Trade trade, std::vector& rng_rolls); + + const Board& board() { return board_; } + const std::vector& proposals() { return proposals_; } + + protected: + void DoApplyAction(Action action) override; + + private: + bool IsPassTrade(const Trade& trade) const; + bool IsLegalTrade(Player proposer, const Trade& trade) const; + std::vector LegalActionsForChips( + const std::vector& player_chips, + const std::vector& responder_chips) const; + + Player cur_player_; + const ColoredTrailsGame* parent_game_; + Board board_; + std::vector returns_; + std::vector proposals_; + + // This is only used by the SetChipsAndTradeProposals functions above. + Trade future_trade_; +}; + +class ColoredTrailsGame : public Game { + public: + explicit ColoredTrailsGame(const GameParameters& params); + + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new ColoredTrailsState(shared_from_this(), board_size_, num_colors_)); + } + int MaxChanceOutcomes() const override { return all_boards_.size(); } + + int MaxGameLength() const override { return 3; } + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + + int NumPlayers() const override { return num_players_; } + double MaxUtility() const override { + // Get max chips, then do a 1-for-8 trade, and only use 1 chip. + // = 0 (for reaching goal) + (8 - 1 + 8) * leftover_chip_value + return kLeftoverChipScore * (kNumChipsUpperBound - 1 + kNumChipsUpperBound); + } + double MinUtility() const override { + // No chips left and as far away from the goal as possible. + return board_size_ * board_size_ * kFlagPenaltyPerCell; + } + std::vector ObservationTensorShape() const override; + std::vector InformationStateTensorShape() const override; + + const std::vector& AllBoards() const { return all_boards_; } + + const Trade& LookupTrade(int trade_id) const { + if (trade_id == PassAction()) { + return pass_trade_; + } else { + return *(trade_info_.possible_trades.at(trade_id)); + } + } + + Action ResponderTradeWithPlayerAction(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LE(player, 1); + return NumDistinctActions() - 3 + player; + } + + Action PassAction() const { return NumDistinctActions() - 1; } + + int LookupTradeId(const std::string& trade_str) const { + return trade_info_.trade_str_to_id.at(trade_str); + } + + std::vector LookupTradesCache(const std::string& key) const; + void AddToTradesCache(const std::string& key, + std::vector& actions) const; + + // Sample a random board according to the board generation rules, using a + // partial board which contains all the information for all the players except + // the specified player (override anything present for that player). + // Also returns a legal action for the same player. + std::pair SampleRandomBoardCompletion( + int seed, const Board& board, Player player) const; + + private: + const int num_colors_; + const int board_size_; + const int num_players_; + std::vector all_boards_; + TradeInfo trade_info_; + Trade pass_trade_; + mutable absl::flat_hash_map> trades_cache_; +}; + +// Helper functions used by the board generator and game implementation. +// Implementations contained in colored_trails_utils.cc. +char ColorToChar(int color); +int CharToColor(char c); +std::string ComboToString(const std::vector& combo); +std::vector ComboStringToCombo(const std::string& combo_str, + int num_colors); +void InitTradeInfo(TradeInfo* trade_info, int num_colors); + +// This is the G function described in [2]: the score if the player were to +// advance as close to the goal as possible given their current chips: +// - Subtract 25 points for every step away from the goal in Manhattan +// distance +// - Add 10 points for every chip leftover after the exchange. +std::pair Score(Player player, const Board& board); + +void ParseBoardsFile(std::vector* boards, const std::string& filename, + int num_colors, int board_size, int num_players); +void ParseBoardsString(std::vector* boards, + const std::string& boards_string, + int num_colors, int board_size, int num_players); + +// Does the board match the creation criteria? +bool CheckBoard(const Board& board); + + +} // namespace colored_trails +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_COLORED_TRAILS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/colored_trails_board_generator.cc b/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/colored_trails_board_generator.cc new file mode 100644 index 0000000..3d110fb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/colored_trails_board_generator.cc @@ -0,0 +1,116 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This generates strategically interesting instances of Colored Trails +// according to the criteria of Sec 5 of Jong et al', 2011, Metastrategies in +// the Colored Trails Game. +// https://www.ifaamas.org/Proceedings/aamas2011/papers/C4_R57.pdf + +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/games/colored_trails/colored_trails.h" +#include "open_spiel/utils/file.h" +#include "open_spiel/utils/init.h" + +ABSL_FLAG(int, seed, 0, "Seed to use"); +ABSL_FLAG(int, num_boards, 10000, "Number of boards to generate."); +ABSL_FLAG(std::string, filename, "/tmp/boards.txt", "File to save boards to."); + +namespace open_spiel { +namespace colored_trails { +namespace { + +std::string GenerateBoard(std::mt19937* rng) { + bool valid_board = false; + std::string board_string; + + while (!valid_board) { + Board board; + // Generate the player's chips. + int width = kNumChipsUpperBound - kNumChipsLowerBound + 1; + for (int p = 0; p < board.num_players; ++p) { + // First their number of chips. + board.num_chips[p] = + kNumChipsLowerBound + absl::Uniform(*rng, 0, width); + // Then, their chips + for (int i = 0; i < board.num_chips[p]; ++i) { + int chip = absl::Uniform(*rng, 0, board.num_colors); + board.chips[p][chip]++; + } + } + + // Now, the board. + for (int r = 0; r < board.size; ++r) { + for (int c = 0; c < board.size; ++c) { + int idx = r * board.size + c; + board.board[idx] = absl::Uniform(*rng, 0, board.num_colors); + } + } + + // Now the player positions. + // The flag position is the last one, hence positions.size() here + for (int p = 0; p < board.positions.size(); ++p) { + int candidate = -1; + while (absl::c_find(board.positions, candidate) != + board.positions.end()) { + candidate = absl::Uniform(*rng, 0, board.size * board.size); + } + board.positions[p] = candidate; + } + + // Check the board. + valid_board = CheckBoard(board); + board_string = board.ToString(); + } + + return board_string; +} + +void GenerateBoards(int num) { + std::string filename = absl::GetFlag(FLAGS_filename); + int seed = absl::GetFlag(FLAGS_seed); + std::mt19937 rng(seed); + + std::cout << "Starting." << std::endl; + TradeInfo trade_info; + InitTradeInfo(&trade_info, kDefaultNumColors); + std::cout << "Num combos: " << trade_info.chip_combinations.size() + << ", possible trades " << trade_info.possible_trades.size() + << std::endl; + + std::cout << "Opening file: " << filename << std::endl; + open_spiel::file::File outfile(filename, "w"); + for (int i = 0; i < num; ++i) { + std::cout << "Generating board " << i << std::endl; + std::string line = GenerateBoard(&rng); + line.push_back('\n'); + std::cout << line; + outfile.Write(line); + } + std::cout << "Wrote to file: " << filename << std::endl; +} + +} // namespace +} // namespace colored_trails +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::Init("", &argc, &argv, false); + absl::ParseCommandLine(argc, argv); + open_spiel::colored_trails::GenerateBoards(absl::GetFlag(FLAGS_num_boards)); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/colored_trails_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/colored_trails_test.cc new file mode 100644 index 0000000..1147757 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/colored_trails_test.cc @@ -0,0 +1,45 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/colored_trails/colored_trails.h" + +#include +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" +#include "open_spiel/utils/init.h" + +namespace open_spiel { +namespace colored_trails { +namespace { + +namespace testing = open_spiel::testing; + +void BasicColoredTrailsTests() { + testing::LoadGameTest("colored_trails"); + + // Game creation and legal actions are fairly heavy, so only run 1 sim. + testing::RandomSimTest(*LoadGame("colored_trails"), 1); +} + +} // namespace +} // namespace colored_trails +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::Init("", &argc, &argv, false); + open_spiel::colored_trails::BasicColoredTrailsTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/colored_trails_utils.cc b/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/colored_trails_utils.cc new file mode 100644 index 0000000..8b3742c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/colored_trails/colored_trails_utils.cc @@ -0,0 +1,200 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/games/colored_trails/colored_trails.h" +#include "open_spiel/utils/file.h" + +namespace open_spiel { +namespace colored_trails { +namespace { + +constexpr int kNumDirections = 4; +constexpr std::array kRowOffsets = {-1, 0, 1, 0}; +constexpr std::array kColumnOffsets = {0, -1, 0, 1}; + +void InitChipCombosRec(TradeInfo* trade_info, int num_colors, + std::string cur_combo_str) { + if (cur_combo_str.length() > 0 && + cur_combo_str.length() <= kNumChipsUpperBound) { + trade_info->chip_combinations.push_back( + ComboStringToCombo(cur_combo_str, num_colors)); + } else if (cur_combo_str.length() > kNumChipsUpperBound) { + return; + } + + int last_color = + (cur_combo_str.empty() ? 0 : CharToColor(cur_combo_str.back())); + for (int c = last_color; c < num_colors; ++c) { + std::string child = cur_combo_str; + child.push_back(ColorToChar(c)); + InitChipCombosRec(trade_info, num_colors, child); + } +} + +int ManhattanDistance(const Board& board, int pos1, int pos2) { + int r1 = pos1 / board.size, c1 = pos1 % board.size; + int r2 = pos2 / board.size, c2 = pos2 % board.size; + return std::abs(r2 - r1) + std::abs(c2 - c1); +} + +int CurrentScore(Player p, const Board& board) { + int score = std::accumulate(board.chips[p].begin(), board.chips[p].end(), 0) * + kLeftoverChipScore; + score += kFlagPenaltyPerCell * + ManhattanDistance(board, board.positions[p], board.positions.back()); + return score; +} + +int ScoreRec(Player player, const Board& board, bool* solved) { + int score = CurrentScore(player, board); + int row = board.positions[player] / board.size; + int col = board.positions[player] % board.size; + + if (board.positions.back() == board.positions[player]) { + // We found the goal. This has to be the maximal score: terminate recursion. + *solved = true; + return score; + } + + for (int dir = 0; dir < kNumDirections; ++dir) { + int rp = row + kRowOffsets[dir]; + int cp = col + kColumnOffsets[dir]; + if (board.InBounds(rp, cp)) { // Check this position is in bounds. + int pos = rp * board.size + cp; + int color = board.board[pos]; + if (board.chips[player][color] > 0) { + // If this player has a chip to travel here, then move them and call + // score on the child board. + Board child_board = board; + child_board.chips[player][color]--; + child_board.positions[player] = pos; + int child_score = ScoreRec(player, child_board, solved); + score = std::max(score, child_score); + } + } + } + + return score; +} + +} // namespace + +ChipComboIterator::ChipComboIterator(const std::vector& chips) + : chips_(chips), cur_combo_(chips.size(), 0) { + SPIEL_CHECK_GT(std::accumulate(chips_.begin(), chips_.end(), 0), 0); +} + +bool ChipComboIterator::IsFinished() const { + // If every digit is maximized, we are done. + return cur_combo_ == chips_; +} + +std::vector ChipComboIterator::Next() { + // Try to increase the left-most non-maximized chip with non-zero chips. Then + // reset every digit to the left of it with nonzero chips. + for (int inc_idx = 0; inc_idx < chips_.size(); ++inc_idx) { + if (cur_combo_[inc_idx] < chips_[inc_idx]) { + cur_combo_[inc_idx]++; + for (int j = inc_idx - 1; j >= 0; --j) { + cur_combo_[j] = 0; + } + break; + } + } + return cur_combo_; +} + +std::vector ComboStringToCombo(const std::string& combo_str, + int num_colors) { + std::vector combo(num_colors, 0); + for (int i = 0; i < combo_str.length(); ++i) { + int color = CharToColor(combo_str[i]); + combo[color]++; + } + return combo; +} + +std::string ComboToString(const std::vector& combo) { + std::string combo_str; + for (int i = 0; i < combo.size(); ++i) { + for (int k = 0; k < combo[i]; ++k) { + combo_str.push_back(ColorToChar(i)); + } + } + return combo_str; +} + +char ColorToChar(int color) { return static_cast('A' + color); } + +int CharToColor(char c) { return static_cast(c - 'A'); } + +void InitTradeInfo(TradeInfo* trade_info, int num_colors) { + InitChipCombosRec(trade_info, num_colors, ""); + for (int i = 0; i < trade_info->chip_combinations.size(); ++i) { + for (int j = 0; j < trade_info->chip_combinations.size(); ++j) { + Trade candidate(trade_info->chip_combinations[i], + trade_info->chip_combinations[j]); + bool valid = candidate.reduce(); + if (!valid) { + continue; + } + + std::string candidate_str = candidate.ToString(); + + if (trade_info->trade_str_to_id.find(candidate_str) == + trade_info->trade_str_to_id.end()) { + // std::cout << "Valid trade: " << candidate_str << std::endl; + trade_info->possible_trades.push_back( + std::make_unique(candidate)); + trade_info->trade_str_to_id[candidate_str] = + trade_info->possible_trades.size() - 1; + } + } + } +} + +std::pair Score(Player player, const Board& board) { + bool solved = false; + int score = ScoreRec(player, board, &solved); + return std::make_pair(score, solved); +} + +void ParseBoardsString(std::vector* boards, + const std::string& boards_string, + int num_colors, int board_size, int num_players) { + std::vector lines = absl::StrSplit(boards_string, '\n'); + SPIEL_CHECK_GT(lines.size(), 1); + for (const std::string& line : lines) { + if (!line.empty()) { + Board board(board_size, num_colors, num_players); + board.ParseFromLine(line); + boards->push_back(board); + } + } +} + +void ParseBoardsFile(std::vector* boards, const std::string& filename, + int num_colors, int board_size, int num_players) { + open_spiel::file::File infile(filename, "r"); + std::string contents = infile.ReadContents(); + ParseBoardsString(boards, contents, num_colors, board_size, num_players); +} + +} // namespace colored_trails +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/connect_four/connect_four.cc b/scenarios/bargaining/open_spiel/open_spiel/games/connect_four/connect_four.cc new file mode 100644 index 0000000..28a7036 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/connect_four/connect_four.cc @@ -0,0 +1,277 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/connect_four/connect_four.h" + +#include +#include +#include + +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace connect_four { +namespace { + +// Facts about the game +const GameType kGameType{ + /*short_name=*/"connect_four", + /*long_name=*/"Connect Four", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new ConnectFourGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +CellState PlayerToState(Player player) { + switch (player) { + case 0: + return CellState::kCross; + case 1: + return CellState::kNought; + default: + SpielFatalError(absl::StrCat("Invalid player id ", player)); + } +} + +std::string StateToString(CellState state) { + switch (state) { + case CellState::kEmpty: + return "."; + case CellState::kNought: + return "o"; + case CellState::kCross: + return "x"; + default: + SpielFatalError("Unknown state."); + return "This will never return."; + } +} +} // namespace + +CellState& ConnectFourState::CellAt(int row, int col) { + return board_[row * kCols + col]; +} + +CellState ConnectFourState::CellAt(int row, int col) const { + return board_[row * kCols + col]; +} + +int ConnectFourState::CurrentPlayer() const { + if (IsTerminal()) { + return kTerminalPlayerId; + } else { + return current_player_; + } +} + +void ConnectFourState::DoApplyAction(Action move) { + SPIEL_CHECK_EQ(CellAt(kRows - 1, move), CellState::kEmpty); + int row = 0; + while (CellAt(row, move) != CellState::kEmpty) ++row; + CellAt(row, move) = PlayerToState(CurrentPlayer()); + + if (HasLine(current_player_)) { + outcome_ = static_cast(current_player_); + } else if (IsFull()) { + outcome_ = Outcome::kDraw; + } + + current_player_ = 1 - current_player_; +} + +std::vector ConnectFourState::LegalActions() const { + // Can move in any non-full column. + std::vector moves; + if (IsTerminal()) return moves; + for (int col = 0; col < kCols; ++col) { + if (CellAt(kRows - 1, col) == CellState::kEmpty) moves.push_back(col); + } + return moves; +} + +std::string ConnectFourState::ActionToString(Player player, + Action action_id) const { + return absl::StrCat(StateToString(PlayerToState(player)), action_id); +} + +bool ConnectFourState::HasLineFrom(Player player, int row, int col) const { + return HasLineFromInDirection(player, row, col, 0, 1) || + HasLineFromInDirection(player, row, col, -1, -1) || + HasLineFromInDirection(player, row, col, -1, 0) || + HasLineFromInDirection(player, row, col, -1, 1); +} + +bool ConnectFourState::HasLineFromInDirection(Player player, int row, int col, + int drow, int dcol) const { + if (row + 3 * drow >= kRows || col + 3 * dcol >= kCols || + row + 3 * drow < 0 || col + 3 * dcol < 0) + return false; + CellState c = PlayerToState(player); + for (int i = 0; i < 4; ++i) { + if (CellAt(row, col) != c) return false; + row += drow; + col += dcol; + } + return true; +} + +bool ConnectFourState::HasLine(Player player) const { + CellState c = PlayerToState(player); + for (int col = 0; col < kCols; ++col) { + for (int row = 0; row < kRows; ++row) { + if (CellAt(row, col) == c && HasLineFrom(player, row, col)) return true; + } + } + return false; +} + +bool ConnectFourState::IsFull() const { + for (int col = 0; col < kCols; ++col) { + if (CellAt(kRows - 1, col) == CellState::kEmpty) return false; + } + return true; +} + +ConnectFourState::ConnectFourState(std::shared_ptr game) + : State(game) { + std::fill(begin(board_), end(board_), CellState::kEmpty); +} + +std::string ConnectFourState::ToString() const { + std::string str; + for (int row = kRows - 1; row >= 0; --row) { + for (int col = 0; col < kCols; ++col) { + str.append(StateToString(CellAt(row, col))); + } + str.append("\n"); + } + return str; +} +bool ConnectFourState::IsTerminal() const { + return outcome_ != Outcome::kUnknown; +} + +std::vector ConnectFourState::Returns() const { + if (outcome_ == Outcome::kPlayer1) return {1.0, -1.0}; + if (outcome_ == Outcome::kPlayer2) return {-1.0, 1.0}; + return {0.0, 0.0}; +} + +std::string ConnectFourState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string ConnectFourState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +int PlayerRelative(CellState state, Player current) { + switch (state) { + case CellState::kNought: + return current == 0 ? 0 : 1; + case CellState::kCross: + return current == 1 ? 0 : 1; + case CellState::kEmpty: + return 2; + default: + SpielFatalError("Unknown player type."); + } +} + +void ConnectFourState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + TensorView<2> view(values, {kCellStates, kNumCells}, true); + + for (int cell = 0; cell < kNumCells; ++cell) { + view[{PlayerRelative(board_[cell], player), cell}] = 1.0; + } +} + +std::unique_ptr ConnectFourState::Clone() const { + return std::unique_ptr(new ConnectFourState(*this)); +} + +ConnectFourGame::ConnectFourGame(const GameParameters& params) + : Game(kGameType, params) {} + +ConnectFourState::ConnectFourState(std::shared_ptr game, + const std::string& str) + : State(game) { + int xs = 0; + int os = 0; + int r = 5; + int c = 0; + for (const char ch : str) { + switch (ch) { + case '.': + CellAt(r, c) = CellState::kEmpty; + break; + case 'x': + ++xs; + CellAt(r, c) = CellState::kCross; + break; + case 'o': + ++os; + CellAt(r, c) = CellState::kNought; + break; + } + if (ch == '.' || ch == 'x' || ch == 'o') { + ++c; + if (c >= kCols) { + r--; + c = 0; + } + } + } + SPIEL_CHECK_TRUE(xs == os || xs == (os + 1)); + SPIEL_CHECK_TRUE(r == -1 && ("Problem parsing state (incorrect rows).")); + SPIEL_CHECK_TRUE(c == 0 && + ("Problem parsing state (column value should be 0)")); + current_player_ = (xs == os) ? 0 : 1; + + if (HasLine(0)) { + outcome_ = Outcome::kPlayer1; + } else if (HasLine(1)) { + outcome_ = Outcome::kPlayer2; + } else if (IsFull()) { + outcome_ = Outcome::kDraw; + } +} + +} // namespace connect_four +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/connect_four/connect_four.h b/scenarios/bargaining/open_spiel/open_spiel/games/connect_four/connect_four.h new file mode 100644 index 0000000..cc2dae3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/connect_four/connect_four.h @@ -0,0 +1,138 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_CONNECT_FOUR_H_ +#define OPEN_SPIEL_GAMES_CONNECT_FOUR_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// Simple game of Connect Four +// https://en.wikipedia.org/wiki/Connect_Four +// +// Minimax values (win/loss/draw) available for first 8 moves, here: +// https://archive.ics.uci.edu/ml/datasets/Connect-4 +// +// Parameters: none + +namespace open_spiel { +namespace connect_four { + +// Constants. +inline constexpr int kNumPlayers = 2; +inline constexpr int kRows = 6; +inline constexpr int kCols = 7; +inline constexpr int kNumCells = kRows * kCols; +inline constexpr int kCellStates = + 1 + kNumPlayers; // player 0, player 1, empty + +// Outcome of the game. +enum class Outcome { + kPlayer1 = 0, + kPlayer2 = 1, + kUnknown, + kDraw, +}; + +// State of a cell. +enum class CellState { + kEmpty, + kNought, + kCross, +}; + +// State of an in-play game. +class ConnectFourState : public State { + public: + ConnectFourState(std::shared_ptr); + explicit ConnectFourState(std::shared_ptr game, + const std::string& str); + ConnectFourState(const ConnectFourState& other) = default; + + Player CurrentPlayer() const override; + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector ActionsConsistentWithInformationFrom( + Action action) const override { + return {action}; + } + std::unique_ptr ResampleFromInfostate( + int player_id, std::function rng) const override { + return Clone(); + } + + protected: + void DoApplyAction(Action move) override; + + private: + CellState& CellAt(int row, int col); + CellState CellAt(int row, int col) const; + bool HasLine(Player player) const; // Does this player have a line? + bool HasLineFrom(Player player, int row, int col) const; + bool HasLineFromInDirection(Player player, int row, int col, int drow, + int dcol) const; + bool IsFull() const; // Is the board full? + Player current_player_ = 0; // Player zero goes first + Outcome outcome_ = Outcome::kUnknown; + std::array board_; +}; + +// Game object. +class ConnectFourGame : public Game { + public: + explicit ConnectFourGame(const GameParameters& params); + int NumDistinctActions() const override { return kCols; } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new ConnectFourState(shared_from_this())); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + std::vector ObservationTensorShape() const override { + return {kCellStates, kRows, kCols}; + } + int MaxGameLength() const override { return kNumCells; } +}; + +inline std::ostream& operator<<(std::ostream& stream, const CellState& state) { + switch (state) { + case CellState::kEmpty: + return stream << "Empty"; + case CellState::kNought: + return stream << "O"; + case CellState::kCross: + return stream << "X"; + default: + SpielFatalError("Unknown cell state"); + } +} + +} // namespace connect_four +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_CONNECT_FOUR_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/connect_four/connect_four_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/connect_four/connect_four_test.cc new file mode 100644 index 0000000..2d644f9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/connect_four/connect_four_test.cc @@ -0,0 +1,91 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/connect_four/connect_four.h" + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace connect_four { +namespace { + +namespace testing = open_spiel::testing; + +void BasicConnectFourTests() { + testing::LoadGameTest("connect_four"); + testing::NoChanceOutcomesTest(*LoadGame("connect_four")); + testing::RandomSimTest(*LoadGame("connect_four"), 100); +} + +void FastLoss() { + std::shared_ptr game = LoadGame("connect_four"); + auto state = game->NewInitialState(); + state->ApplyAction(3); + state->ApplyAction(3); + state->ApplyAction(4); + state->ApplyAction(4); + state->ApplyAction(2); + state->ApplyAction(2); + SPIEL_CHECK_FALSE(state->IsTerminal()); + state->ApplyAction(1); + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->Returns(), (std::vector{1.0, -1.0})); + SPIEL_CHECK_EQ(state->ToString(), + ".......\n" + ".......\n" + ".......\n" + ".......\n" + "..ooo..\n" + ".xxxx..\n"); +} + +void BasicSerializationTest() { + std::shared_ptr game = LoadGame("connect_four"); + std::unique_ptr state = game->NewInitialState(); + std::unique_ptr state2 = game->DeserializeState(state->Serialize()); + SPIEL_CHECK_EQ(state->ToString(), state2->ToString()); +} + +void CheckFullBoardDraw() { + std::shared_ptr game = LoadGame("connect_four"); + ConnectFourState state(game, + "ooxxxoo\n" + "xxoooxx\n" + "ooxxxoo\n" + "xxoooxx\n" + "ooxxxoo\n" + "xxoooxx\n"); + SPIEL_CHECK_EQ(state.ToString(), + "ooxxxoo\n" + "xxoooxx\n" + "ooxxxoo\n" + "xxoooxx\n" + "ooxxxoo\n" + "xxoooxx\n"); + SPIEL_CHECK_TRUE(state.IsTerminal()); + SPIEL_CHECK_EQ(state.Returns(), (std::vector{0, 0})); +} + +} // namespace +} // namespace connect_four +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::connect_four::BasicConnectFourTests(); + open_spiel::connect_four::FastLoss(); + open_spiel::connect_four::BasicSerializationTest(); + open_spiel::connect_four::CheckFullBoardDraw(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/coop_box_pushing/coop_box_pushing.cc b/scenarios/bargaining/open_spiel/open_spiel/games/coop_box_pushing/coop_box_pushing.cc new file mode 100644 index 0000000..3ebb069 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/coop_box_pushing/coop_box_pushing.cc @@ -0,0 +1,582 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/coop_box_pushing/coop_box_pushing.h" + +#include +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace coop_box_pushing { +namespace { + +// Valid characters: <>^v .bB +// However first 4 characters each have a player (0 or 1) attached to them +// So, 4 + 4 + 3 = 11 +constexpr int kCellStates = 11; +constexpr char kLeft = '<'; +constexpr char kRight = '>'; +constexpr char kUp = '^'; +constexpr char kDown = 'v'; +constexpr char kField = '.'; +constexpr char kSmallBox = 'b'; +constexpr char kBigBox = 'B'; + +// Some constants for this game. +constexpr int kRows = 8; +constexpr int kCols = 8; +constexpr int kNumPlayers = 2; +constexpr int kNumDistinctActions = 4; + +// Chance outcomes. +enum ChanceOutcome { + kChanceSuccess = 0, + kChanceFail = 1, + kChanceInit1 = 2, // determines order of moves + kChanceInit2 = 3 +}; + +// Rewards. +constexpr double kBumpPenalty = -5; +constexpr double kDelayPenalty = -0.1; +constexpr double kSmallBoxReward = 10; +constexpr double kBigBoxReward = 100; + +// Default parameters. +constexpr int kDefaultHorizon = 100; +constexpr bool kDefaultFullyObservable = false; + +constexpr std::array row_offsets = {{-1, 0, 1, 0}}; +constexpr std::array col_offsets = {{0, 1, 0, -1}}; + +// Facts about the game +const GameType kGameType{ + /*short_name=*/"coop_box_pushing", + /*long_name=*/"Cooperative Box Pushing", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kIdentical, + GameType::RewardModel::kRewards, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"fully_observable", GameParameter(kDefaultFullyObservable)}, + {"horizon", GameParameter(kDefaultHorizon)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new CoopBoxPushingGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +ActionType ToAction(Action action) { + switch (action) { + case 0: + return ActionType::kTurnLeft; + case 1: + return ActionType::kTurnRight; + case 2: + return ActionType::kMoveForward; + case 3: + return ActionType::kStay; + } + + SpielFatalError(absl::StrCat("Invalid action: ", action)); +} + +std::string ActionToString(Action action) { + switch (action) { + case 0: + return "turn left"; + case 1: + return "turn right"; + case 2: + return "move forward"; + case 3: + return "stay"; + } + + SpielFatalError(absl::StrCat("Invalid action: ", action)); +} + +char ToCharacter(int orientation) { + switch (orientation) { + case OrientationType::kNorth: + return '^'; + case OrientationType::kEast: + return '>'; + case OrientationType::kSouth: + return 'v'; + case OrientationType::kWest: + return '<'; + } + + SpielFatalError(absl::StrCat("invalid orientation ", orientation)); +} + +OrientationType Rotate(OrientationType orientation, ActionType move) { + if (move == ActionType::kTurnLeft) { + return (orientation == 0 ? static_cast(3) + : static_cast(orientation - 1)); + } else { + return (orientation == 3 ? static_cast(0) + : static_cast(orientation + 1)); + } +} + +std::pair NextCoord(std::pair coord, int direction) { + return {coord.first + row_offsets[direction], + coord.second + col_offsets[direction]}; +} +} // namespace + +CoopBoxPushingState::CoopBoxPushingState(std::shared_ptr game, + int horizon, bool fully_observable) + : SimMoveState(game), + total_rewards_(0), + horizon_(horizon), + cur_player_(kSimultaneousPlayerId), + total_moves_(0), + initiative_(0), + win_(false), + fully_observable_(fully_observable), + reward_(0), + action_status_( + {ActionStatusType::kUnresolved, ActionStatusType::kUnresolved}) { + field_.resize(kRows * kCols, '.'); + + // Small boxes. + SetField({3, 1}, 'b'); + SetField({3, 6}, 'b'); + + // Big box. + SetField({3, 3}, 'B'); + SetField({3, 4}, 'B'); + + // Agents. + SetPlayer({6, 1}, 0, OrientationType::kEast); + SetPlayer({6, 6}, 1, OrientationType::kWest); +} + +std::string CoopBoxPushingState::ActionToString(Player player, + Action action) const { + return ::open_spiel::coop_box_pushing::ActionToString(action); +} + +void CoopBoxPushingState::SetField(std::pair coord, char v) { + field_[coord.first * kCols + coord.second] = v; +} + +void CoopBoxPushingState::SetPlayer(std::pair coord, Player player, + OrientationType orientation) { + SetField(coord, ToCharacter(orientation)); + player_coords_[player] = coord; + player_orient_[player] = orientation; +} + +void CoopBoxPushingState::SetPlayer(std::pair coord, Player player) { + SetPlayer(coord, player, player_orient_[player]); +} + +char CoopBoxPushingState::field(std::pair coord) const { + return field_[coord.first * kCols + coord.second]; +} + +void CoopBoxPushingState::DoApplyActions(const std::vector& actions) { + SPIEL_CHECK_EQ(actions.size(), 2); + SPIEL_CHECK_EQ(cur_player_, kSimultaneousPlayerId); + moves_[0] = ToAction(actions[0]); + moves_[1] = ToAction(actions[1]); + cur_player_ = kChancePlayerId; +} + +bool CoopBoxPushingState::InBounds(std::pair coord) const { + return (coord.first >= 0 && coord.second >= 0 && coord.first < kRows && + coord.second < kCols); +} + +void CoopBoxPushingState::MoveForward(Player player) { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LE(player, 1); + + OrientationType dir = player_orient_[player]; + auto next = NextCoord(player_coords_[player], dir); + + if (!InBounds(next)) { + // Bump.. out of bounds! + AddReward(kBumpPenalty); + } else if (field(next) == '.') { + // Uninterrupted move. + SetField(player_coords_[player], '.'); + SetPlayer(next, player); + } else if (field(next) == 'b') { + auto next_next = NextCoord(next, dir); + if (!InBounds(next_next)) { + // Bump, can't push box out of bounds! + AddReward(kBumpPenalty); + } else if (field(next_next) == '.') { + // Move the small box. + SetField(next_next, 'b'); + SetField(player_coords_[player], '.'); + SetPlayer(next, player); + + // Check for reward. + if (next_next.first == 0 && next.first != 0) { + AddReward(kSmallBoxReward); + } + } else { + // Trying to move box into something else.. bump! + AddReward(kBumpPenalty); + } + } else { + // Also bump! + AddReward(kBumpPenalty); + } +} + +void CoopBoxPushingState::AddReward(double reward) { + reward_ += reward; + total_rewards_ += reward; +} + +void CoopBoxPushingState::ResolveMoves() { + // Check for successful move of the big box. + if (moves_[0] == ActionType::kMoveForward && + moves_[1] == ActionType::kMoveForward && + action_status_[0] == ActionStatusType::kSuccess && + action_status_[1] == ActionStatusType::kSuccess) { + std::array, 2> next_coords; + std::array, 2> next_next_coords; + + next_coords[0] = NextCoord(player_coords_[0], player_orient_[0]); + next_coords[1] = NextCoord(player_coords_[1], player_orient_[1]); + next_next_coords[0] = NextCoord(next_coords[0], player_orient_[0]); + next_next_coords[1] = NextCoord(next_coords[1], player_orient_[1]); + + if (InBounds(next_coords[0]) && InBounds(next_coords[1]) && + InBounds(next_next_coords[0]) && InBounds(next_next_coords[1]) && + field(next_coords[0]) == 'B' && field(next_coords[1]) == 'B' && + field(next_next_coords[0]) == '.' && + field(next_next_coords[1]) == '.') { + SetField(next_next_coords[0], 'B'); + SetField(next_next_coords[1], 'B'); + SetField(player_coords_[0], '.'); + SetField(player_coords_[1], '.'); + SetPlayer(next_coords[0], 0); + SetPlayer(next_coords[1], 1); + + if (next_next_coords[0].first == 0 && next_coords[0].first != 0) { + AddReward(kBigBoxReward); + win_ = true; + return; + } + } + } + + // Otherwise, just resolve them independently. + for (int i = 0; i < 2; i++) { + // Player order depends on initiative. + int p = (i + initiative_) % 2; + + SPIEL_CHECK_GE(p, 0); + SPIEL_CHECK_LT(p, 2); + SPIEL_CHECK_TRUE(action_status_[p] != ActionStatusType::kUnresolved); + + ActionType move = moves_[p]; + + // Action failed or deliberate stay => nothing happens to this agent. + if (action_status_[p] == ActionStatusType::kFail || + move == ActionType::kStay) { + continue; + } + + if (move == ActionType::kTurnLeft || move == ActionType::kTurnRight) { + SetPlayer(player_coords_[p], p, Rotate(player_orient_[p], move)); + } else if (move == ActionType::kMoveForward) { + MoveForward(p); + } + } + + // Reset the action statuses and current player. + cur_player_ = kSimultaneousPlayerId; + action_status_[0] = ActionStatusType::kUnresolved; + action_status_[1] = ActionStatusType::kUnresolved; + + AddReward(kDelayPenalty); + total_moves_++; +} + +void CoopBoxPushingState::DoApplyAction(Action action) { + reward_ = 0; + if (IsSimultaneousNode()) return ApplyFlatJointAction(action); + + if (action == kChanceSuccess) { + // Success. + if (action_status_[0] == ActionStatusType::kUnresolved) { + action_status_[0] = ActionStatusType::kSuccess; + } else if (action_status_[1] == ActionStatusType::kUnresolved) { + action_status_[1] = ActionStatusType::kSuccess; + } else { + SpielFatalError(absl::StrCat("Invalid chance move case: ", action)); + } + } else if (action == kChanceFail) { + // Fail! + if (action_status_[0] == ActionStatusType::kUnresolved) { + action_status_[0] = ActionStatusType::kFail; + } else if (action_status_[1] == ActionStatusType::kUnresolved) { + action_status_[1] = ActionStatusType::kFail; + } else { + SpielFatalError(absl::StrCat("Invalid chance move case: ", action)); + } + } else if (action == kChanceInit1) { + // Player 1 moves first. + initiative_ = 0; + ResolveMoves(); + } else { + // Player 2 moves first. + initiative_ = 1; + ResolveMoves(); + } +} + +std::vector CoopBoxPushingState::LegalActions(Player player) const { + if (player == kSimultaneousPlayerId) { + return LegalFlatJointActions(); + } else if (IsTerminal()) { + return {}; + } else if (IsChanceNode()) { + if (action_status_[0] == ActionStatusType::kUnresolved || + action_status_[1] == ActionStatusType::kUnresolved) { + return {0, 1}; + } else { + return {2, 3}; + } + } + // All the actions are legal at every state. + return {0, 1, 2, 3}; +} + +ActionsAndProbs CoopBoxPushingState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(IsChanceNode()); + + if (action_status_[0] == ActionStatusType::kUnresolved || + action_status_[1] == ActionStatusType::kUnresolved) { + // Determine success (0) or failure (1) of a player's action. + return {std::pair(0, 0.9), + std::pair(1, 0.1)}; + } else { + // Determine initiative outcomes (2 and 3) + return {std::pair(2, 0.5), + std::pair(3, 0.5)}; + } +} + +std::string CoopBoxPushingState::ToString() const { + std::string result = ""; + absl::StrAppend(&result, "Total moves: ", total_moves_, "\n"); + absl::StrAppend(&result, "Most recent reward: ", reward_, "\n"); + absl::StrAppend(&result, "Total rewards: ", total_rewards_, "\n"); + + for (int r = 0; r < kRows; r++) { + for (int c = 0; c < kCols; c++) { + result += field({r, c}); + } + + absl::StrAppend(&result, "\n"); + } + + return result; +} + +ObservationType CoopBoxPushingState::PartialObservation(Player player) const { + std::pair adj_coord = { + player_coords_[player].first + row_offsets[player_orient_[player]], + player_coords_[player].second + col_offsets[player_orient_[player]]}; + + if (!InBounds(adj_coord)) { + return kWallObs; + } else { + switch (field(adj_coord)) { + case kField: + return kEmptyFieldObs; + case kLeft: + case kRight: + case kUp: + case kDown: + return kOtherAgentObs; + case kSmallBox: + return kSmallBoxObs; + case kBigBox: + return kBigBoxObs; + default: + SpielFatalError("Unrecognized field char: " + + std::to_string(field(adj_coord))); + } + } +} + +std::string CoopBoxPushingState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + if (fully_observable_) { + return ToString(); + } else { + ObservationType obs = PartialObservation(player); + switch (obs) { + case kEmptyFieldObs: + return "field"; + case kWallObs: + return "wall"; + case kOtherAgentObs: + return "other agent"; + case kSmallBoxObs: + return "small box"; + case kBigBoxObs: + return "big box"; + default: + SpielFatalError("Unrecognized observation!"); + } + } +} + +bool CoopBoxPushingState::IsTerminal() const { + return (total_moves_ >= horizon_ || win_); +} + +std::vector CoopBoxPushingState::Returns() const { + // Cooperative game: all players get same reward. + return {total_rewards_, total_rewards_}; +} + +std::vector CoopBoxPushingState::Rewards() const { + // Cooperative game: all players get same reward. + return {reward_, reward_}; +} + +bool CoopBoxPushingState::SameAsPlayer(std::pair coord, + Player player) const { + return coord == player_coords_[player]; +} + +int CoopBoxPushingState::ObservationPlane(std::pair coord, + Player player) const { + int plane = 0; + switch (field(coord)) { + case kField: + plane = 0; + break; + case kSmallBox: + plane = 1; + break; + case kBigBox: + plane = 2; + break; + case kLeft: + plane = (SameAsPlayer(coord, player)) ? 3 : 4; + break; + case kRight: + plane = (SameAsPlayer(coord, player)) ? 5 : 6; + break; + case kUp: + plane = (SameAsPlayer(coord, player)) ? 7 : 8; + break; + case kDown: + plane = (SameAsPlayer(coord, player)) ? 9 : 10; + break; + default: + std::cerr << "Invalid character on field: " << field(coord) << std::endl; + std::cerr << ToString() << std::endl; + plane = -1; + break; + } + + return plane; +} + +void CoopBoxPushingState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + if (fully_observable_) { + TensorView<3> view(values, {kCellStates, kRows, kCols}, true); + + for (int r = 0; r < kRows; r++) { + for (int c = 0; c < kCols; c++) { + int plane = ObservationPlane({r, c}, player); + SPIEL_CHECK_TRUE(plane >= 0 && plane < kCellStates); + view[{plane, r, c}] = 1.0; + } + } + } else { + SPIEL_CHECK_EQ(values.size(), kNumObservations); + std::fill(values.begin(), values.end(), 0); + ObservationType obs = PartialObservation(player); + values[obs] = 1; + } +} + +std::unique_ptr CoopBoxPushingState::Clone() const { + return std::unique_ptr(new CoopBoxPushingState(*this)); +} + +CoopBoxPushingGame::CoopBoxPushingGame(const GameParameters& params) + : SimMoveGame(kGameType, params), + horizon_(ParameterValue("horizon")), + fully_observable_(ParameterValue("fully_observable")) {} + +std::vector CoopBoxPushingGame::ObservationTensorShape() const { + if (fully_observable_) { + return {kCellStates, kRows, kCols}; + } else { + return {kNumObservations}; + } +} + +int CoopBoxPushingGame::NumDistinctActions() const { + return kNumDistinctActions; +} + +int CoopBoxPushingGame::NumPlayers() const { return kNumPlayers; } + +std::unique_ptr CoopBoxPushingGame::NewInitialState() const { + std::unique_ptr state( + new CoopBoxPushingState(shared_from_this(), horizon_, fully_observable_)); + return state; +} + +// This is a cooperative game where rewards are summed over players. +// So multiply the lower/upper bound by number of players. Also, utility is +// handed out at the end of the episode, so multiply this lower bound by the +// episode length. +double CoopBoxPushingGame::MaxUtility() const { + return MaxGameLength() * NumPlayers() * (kBigBoxReward + kDelayPenalty); +} + +double CoopBoxPushingGame::MinUtility() const { + return MaxGameLength() * NumPlayers() * (kBumpPenalty + kDelayPenalty); +} + +} // namespace coop_box_pushing +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/coop_box_pushing/coop_box_pushing.h b/scenarios/bargaining/open_spiel/open_spiel/games/coop_box_pushing/coop_box_pushing.h new file mode 100644 index 0000000..37bf40d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/coop_box_pushing/coop_box_pushing.h @@ -0,0 +1,159 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_COOP_BOX_PUSHING_SOCCER_H_ +#define OPEN_SPIEL_GAMES_COOP_BOX_PUSHING_SOCCER_H_ + +#include +#include +#include +#include + +#include "open_spiel/simultaneous_move_game.h" +#include "open_spiel/spiel.h" + +// This is the cooperative box-pushing domain presented by Seuken & Zilberstein +// in their paper "Improved Memory-Bounded Dynamic Programming for Dec-POMDPs" +// http://rbr.cs.umass.edu/papers/SZuai07.pdf +// +// Parameters: +// "fully_observable" bool agents see everything, or only partial view as +// described in the original paper (def: false) +// "horizon" int length of horizon (default = 100) + +namespace open_spiel { +namespace coop_box_pushing { + +// To indicate the status of each agent's action. +enum class ActionStatusType { + kUnresolved, + kSuccess, + kFail, +}; + +// Direction each agent can be facing. +enum OrientationType { + kNorth = 0, + kEast = 1, + kSouth = 2, + kWest = 3, + kInvalid = 4 +}; + +// When not fully-observable, the number of observations (taken from Seuken & +// Zilberstein '12): empty field, wall, other agent, small box, large box. +enum ObservationType { + kEmptyFieldObs, + kWallObs, + kOtherAgentObs, + kSmallBoxObs, + kBigBoxObs +}; +constexpr int kNumObservations = 5; + +// Different actions used by the agent. +enum class ActionType { kTurnLeft, kTurnRight, kMoveForward, kStay }; + +class CoopBoxPushingState : public SimMoveState { + public: + CoopBoxPushingState(std::shared_ptr game, int horizon, + bool fully_observable); + + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::vector Rewards() const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::string ObservationString(Player player) const override; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : cur_player_; + } + std::unique_ptr Clone() const override; + + ActionsAndProbs ChanceOutcomes() const override; + + void Reset(const GameParameters& params); + std::vector LegalActions(Player player) const override; + + protected: + void DoApplyAction(Action action) override; + void DoApplyActions(const std::vector& actions) override; + + private: + void SetField(std::pair coord, char v); + void SetPlayer(std::pair coord, Player player, + OrientationType orientation); + void SetPlayer(std::pair coord, Player player); + void AddReward(double reward); + char field(std::pair coord) const; + void ResolveMoves(); + void MoveForward(Player player); + bool InBounds(std::pair coord) const; + bool SameAsPlayer(std::pair coord, Player player) const; + + // Partial observation of the specific agent. + ObservationType PartialObservation(Player player) const; + + // Observation planes for the fully-observable case. + int ObservationPlane(std::pair coord, Player player) const; + + // Fields sets to bad/invalid values. Use Game::NewInitialState(). + double total_rewards_ = -1; + int horizon_ = -1; // Limit on the total number of moves. + Player cur_player_ = kSimultaneousPlayerId; + int total_moves_ = 0; + int initiative_; // player id of player to resolve actions first. + bool win_; // True if agents push the big box to the goal. + bool fully_observable_; + + // Most recent rewards. + double reward_; + // All coordinates below are (row, col). + std::array, 2> player_coords_; // Players' coordinates. + // Players' orientations. + std::array player_orient_; + // Moves chosen by agents. + std::array moves_; + // The status of each of the players' moves. + std::array action_status_; + // Actual field used by the players. + std::vector field_; +}; + +class CoopBoxPushingGame : public SimMoveGame { + public: + explicit CoopBoxPushingGame(const GameParameters& params); + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override; + int MaxChanceOutcomes() const override { return 4; } + int NumPlayers() const override; + double MinUtility() const override; + double MaxUtility() const override; + std::vector ObservationTensorShape() const override; + int MaxGameLength() const override { return horizon_; } + // TODO: verify whether this bound is tight and/or tighten it. + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + + private: + int horizon_; + bool fully_observable_; +}; + +} // namespace coop_box_pushing +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_COOP_BOX_PUSHING diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/coop_box_pushing/coop_box_pushing_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/coop_box_pushing/coop_box_pushing_test.cc new file mode 100644 index 0000000..0e189ce --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/coop_box_pushing/coop_box_pushing_test.cc @@ -0,0 +1,38 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/coop_box_pushing/coop_box_pushing.h" + +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace coop_box_pushing { +namespace { + +namespace testing = open_spiel::testing; + +void BasicCoopBoxPushingTests() { + testing::LoadGameTest("coop_box_pushing"); + testing::ChanceOutcomesTest(*LoadGame("coop_box_pushing")); + testing::RandomSimTest(*LoadGame("coop_box_pushing"), 100); +} + +} // namespace +} // namespace coop_box_pushing +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::coop_box_pushing::BasicCoopBoxPushingTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/coordinated_mp/coordinated_mp.cc b/scenarios/bargaining/open_spiel/open_spiel/games/coordinated_mp/coordinated_mp.cc new file mode 100644 index 0000000..be9094a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/coordinated_mp/coordinated_mp.cc @@ -0,0 +1,237 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/coordinated_mp/coordinated_mp.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace coordinated_mp { +namespace { + +// Facts about the game +const GameType kGameType{/*short_name=*/"coordinated_mp", + /*long_name=*/"Coordinated Matching Pennies", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/false, + /*parameter_specification=*/{}, + /*default_loadable*/true, + /*provides_factored_observation_string*/true}; + +std::shared_ptr Factory(const GameParameters ¶ms) { + return std::shared_ptr(new PenniesGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace + +class PenniesObserver : public Observer { + public: + PenniesObserver(IIGObservationType iig_obs_type) + : Observer(/*has_string=*/true, /*has_tensor=*/false), + iig_obs_type_(iig_obs_type) {} + + void WriteTensor(const State &observed_state, int player, + Allocator *allocator) const override { + SpielFatalError("Unimplemented"); + } + + std::string StringFrom(const State &observed_state, int player) const { + const PenniesState &state = + open_spiel::down_cast(observed_state); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, state.num_players_); + + std::string str; + if (iig_obs_type_.perfect_recall) { + absl::StrAppend(&str, state.MoveNumber()); + } + + if (iig_obs_type_.perfect_recall && + (iig_obs_type_.private_info == PrivateInfoType::kAllPlayers || + (player == 0 && + iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer))) { + if (state.actionA_ == kHeads) str.push_back('H'); + if (state.actionA_ == kTails) str.push_back('T'); + } + + if (iig_obs_type_.private_info != PrivateInfoType::kNone) { + // TODO(author13) + // This information appears to be private information for both players, + // but not public information. Is this a bug? + if (state.infoset_ == kTop) str.push_back('T'); + if (state.infoset_ == kBottom) str.push_back('B'); + } + + if (iig_obs_type_.perfect_recall && + (iig_obs_type_.private_info == PrivateInfoType::kAllPlayers || + (player == 1 && + iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer))) { + if (state.actionB_ == kHeads) str.push_back('H'); + if (state.actionB_ == kTails) str.push_back('T'); + } + + if (iig_obs_type_.public_info && + iig_obs_type_.private_info == PrivateInfoType::kNone) { + if (state.IsInitialState()) + absl::StrAppend(&str, "start game"); + else + absl::StrAppend(&str, "clock tick"); + } + + return str; + } + + private: + IIGObservationType iig_obs_type_; +}; + +PenniesState::PenniesState(std::shared_ptr game) : State(game) {} + +int PenniesState::CurrentPlayer() const { + if (actionA_ == kNoAction) { + // When first player acts, these should not be set yet. + SPIEL_CHECK_EQ(infoset_, kNoInfoset); + SPIEL_CHECK_EQ(actionB_, kNoAction); + return Player(0); + } + if (infoset_ == kNoInfoset) { + // When chance player acts, second player shoud have no action. + SPIEL_CHECK_EQ(actionB_, kNoAction); + return kChancePlayerId; + } + if (actionB_ == kNoAction) { + return Player(1); + } + + SPIEL_CHECK_TRUE(IsTerminal()); + return kTerminalPlayerId; +} + +void PenniesState::DoApplyAction(Action move) { + switch (CurrentPlayer()) { + case Player(0): + actionA_ = static_cast(move); + break; + case Player(1): + actionB_ = static_cast(move); + break; + case kChancePlayerId: + infoset_ = static_cast(move); + break; + default: + SpielFatalError("Should not match"); + } +} + +std::vector PenniesState::LegalActions() const { + if (IsTerminal()) return {}; + if (IsChanceNode()) return {InfosetPosition::kTop, InfosetPosition::kBottom}; + return {ActionType::kHeads, ActionType::kTails}; +} + +std::string PenniesState::ActionToString(Player player, Action move) const { + if (IsChanceNode()) { + if (move == kTop) return "Top"; + if (move == kBottom) return "Bottom"; + SpielFatalError("Should not match"); + } + if (move == kHeads) return "Heads"; + if (move == kTails) return "Tails"; + + SpielFatalError("Should not match"); + return "Does not return"; +} + +std::string PenniesState::ToString() const { + std::string str; + if (actionA_ == kHeads) absl::StrAppend(&str, "H"); + if (actionA_ == kTails) absl::StrAppend(&str, "T"); + if (infoset_ == kTop) absl::StrAppend(&str, "T"); + if (infoset_ == kBottom) absl::StrAppend(&str, "B"); + if (actionB_ == kHeads) absl::StrAppend(&str, "H"); + if (actionB_ == kTails) absl::StrAppend(&str, "T"); + return str; +} + +bool PenniesState::IsTerminal() const { + return actionA_ != kNoAction && actionB_ != kNoAction && + infoset_ != kNoInfoset; +} + +std::vector PenniesState::Returns() const { + if (!IsTerminal()) return {0., 0.}; + const double matching = actionA_ == actionB_ ? 1. : -1.; + return {matching * 1., matching * -1.}; +} + +std::string PenniesState::InformationStateString(Player player) const { + const PenniesGame &game = open_spiel::down_cast(*game_); + return game.info_state_observer_->StringFrom(*this, player); +} + +std::string PenniesState::ObservationString(Player player) const { + const PenniesGame &game = open_spiel::down_cast(*game_); + return game.default_observer_->StringFrom(*this, player); +} + +std::unique_ptr PenniesState::Clone() const { + return absl::make_unique(*this); +} + +std::vector> PenniesState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(IsChanceNode()); + return {{kTop, 0.5}, {kBottom, 0.5}}; +} + +PenniesGame::PenniesGame(const GameParameters ¶ms) + : Game(kGameType, params) { + default_observer_ = std::make_shared(kDefaultObsType); + info_state_observer_ = std::make_shared(kInfoStateObsType); +} + +std::unique_ptr PenniesGame::NewInitialState() const { + return absl::make_unique(shared_from_this()); +} + +std::shared_ptr PenniesGame::MakeObserver( + absl::optional iig_obs_type, + const GameParameters ¶ms) const { + SPIEL_CHECK_TRUE(params.empty()); + return std::make_shared( + iig_obs_type.value_or(kDefaultObsType)); +} + +} // namespace coordinated_mp +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/coordinated_mp/coordinated_mp.h b/scenarios/bargaining/open_spiel/open_spiel/games/coordinated_mp/coordinated_mp.h new file mode 100644 index 0000000..57a38a7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/coordinated_mp/coordinated_mp.h @@ -0,0 +1,97 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_COORDINATED_MP_H_ +#define OPEN_SPIEL_GAMES_COORDINATED_MP_H_ + +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// A simple game of Coordinate Matching Pennies, a modification of original MP +// that has multiple Nash equilibria lying on a line parametrized with one +// variable for the second player. He must coordinate his actions in the two +// infosets that he has, in such a way that p+q=1 for NE, where p and q are +// probabilities of playing Heads in top and bottom infosets respectively. +// +// For more information on this game (e.g. equilibrium sets, etc.) see +// todo: arxiv link +// + +namespace open_spiel { +namespace coordinated_mp { + +enum ActionType { kNoAction = -1, kHeads = 0, kTails = 1 }; +enum InfosetPosition { kNoInfoset = -1, kTop = 0, kBottom = 1 }; + +class PenniesObserver; + +class PenniesState : public State { + public: + explicit PenniesState(std::shared_ptr game); + PenniesState(const PenniesState&) = default; + + Player CurrentPlayer() const override; + + std::string ActionToString(Player player, Action move) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + std::unique_ptr Clone() const override; + std::vector> ChanceOutcomes() const override; + std::vector LegalActions() const override; + + protected: + void DoApplyAction(Action move) override; + + private: + friend class PenniesObserver; + + ActionType actionA_ = kNoAction; // Action of the first player. + ActionType actionB_ = kNoAction; // Action of the second player. + InfosetPosition infoset_ = kNoInfoset; // The infoset position in the game. +}; + +class PenniesGame : public Game { + public: + explicit PenniesGame(const GameParameters& params); + int NumDistinctActions() const override { return 2; } + std::unique_ptr NewInitialState() const override; + int MaxChanceOutcomes() const override { return 2; } + int NumPlayers() const override { return 2; } + double MinUtility() const override { return -1; }; + double MaxUtility() const override { return 1; }; + absl::optional UtilitySum() const override { return 0; } + int MaxGameLength() const override { return 2; } + int MaxChanceNodesInHistory() const override { return 1; } + + // New Observation API + std::shared_ptr MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const override; + + // Used to implement the old observation API. + std::shared_ptr default_observer_; + std::shared_ptr info_state_observer_; +}; + +} // namespace coordinated_mp +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_COORDINATED_MP_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/coordinated_mp/coordinated_mp_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/coordinated_mp/coordinated_mp_test.cc new file mode 100644 index 0000000..777ce55 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/coordinated_mp/coordinated_mp_test.cc @@ -0,0 +1,53 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/coordinated_mp/coordinated_mp.h" + +#include "open_spiel/algorithms/get_all_states.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace coordinated_mp { +namespace { + +namespace testing = open_spiel::testing; + +void BasicCoordinatedMPTests() { + testing::LoadGameTest("coordinated_mp"); + testing::ChanceOutcomesTest(*LoadGame("coordinated_mp")); + testing::RandomSimTest(*LoadGame("coordinated_mp"), 100); +} + +void CountStates() { + std::shared_ptr game = LoadGame("coordinated_mp"); + auto states = algorithms::GetAllStates(*game, + /*depth_limit=*/-1, + /*include_terminals=*/true, + /*include_chance_states=*/true); + SPIEL_CHECK_EQ(states.size(), 15); +} + +} // namespace +} // namespace coordinated_mp +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::coordinated_mp::BasicCoordinatedMPTests(); + open_spiel::coordinated_mp::CountStates(); + open_spiel::testing::CheckChanceOutcomes( + *open_spiel::LoadGame("coordinated_mp")); + open_spiel::testing::RandomSimTest(*open_spiel::LoadGame("coordinated_mp"), + /*num_sims=*/10); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/crazy_eights/crazy_eights.cc b/scenarios/bargaining/open_spiel/open_spiel/games/crazy_eights/crazy_eights.cc new file mode 100644 index 0000000..af869fd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/crazy_eights/crazy_eights.cc @@ -0,0 +1,712 @@ +// Copyright 2023 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/crazy_eights/crazy_eights.h" + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" + +namespace open_spiel { +namespace crazy_eights { + +namespace { + +constexpr char kRankChar[] = "23456789TJQKA"; +constexpr char kSuitChar[] = "CDHS"; + +constexpr int kDefaultPlayers = 5; +constexpr int kDefaultMaxDrawCards = 5; +constexpr int kNumInitialCardsForTwoPlayers = 7; +constexpr int kNumInitialCards = 5; + +constexpr int kEightRank = 6; // 8 +constexpr int kSkipRank = 10; // Q +constexpr int kReverseRank = 12; // A +constexpr int kDrawTwoRank = 0; // 2 + +const GameType kGameType{ + /*short_name=*/"crazy_eights", + /*long_name=*/"Crazy Eights", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/15, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"players", GameParameter(kDefaultPlayers)}, + {"max_draw_cards", GameParameter(kDefaultMaxDrawCards)}, + {"use_special_cards", GameParameter(false)}, + {"reshuffle", GameParameter(false)}}, + /*default_loadable=*/true, +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new CrazyEightsGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +Suit GetSuit(int action) { + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LT(action, kNumCards); + + return static_cast(action % kNumSuits); +} + +int GetRank(int action) { + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LT(action, kNumCards); + + return action / kNumSuits; +} + +int GetAction(Suit suit, int rank) { + SPIEL_CHECK_LE(rank, kNumRanks); + return rank * kNumSuits + static_cast(suit); +} + +std::string GetCardStr(int action) { + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LT(action, kNumCards); + int rank = GetRank(action); + int suit = static_cast(GetSuit(action)); + return {kSuitChar[suit], kRankChar[rank]}; +} + +} // namespace + +CrazyEightsGame::CrazyEightsGame(const GameParameters& params) + : Game(kGameType, params), + num_players_(ParameterValue("players")), + max_draw_cards_(ParameterValue("max_draw_cards")), + use_special_cards_(ParameterValue("use_special_cards")), + reshuffle_(ParameterValue("reshuffle")) {} + +CrazyEightsState::CrazyEightsState(std::shared_ptr game, + int num_players, int max_draw_cards, + bool use_special_cards, bool reshuffle) + : State(game), + reshuffle_(reshuffle), + num_players_(num_players), + max_draw_cards_(max_draw_cards), + use_special_cards_(use_special_cards) { + num_initial_cards_ = + num_players == 2 ? kNumInitialCardsForTwoPlayers : kNumInitialCards; + num_decks_ = num_players > 5 ? 2 : 1; + num_cards_left_ = num_decks_ * kNumCards; + absl::c_fill(dealer_deck_, num_decks_); + for (int i = 0; i < num_players; ++i) { + hands_.push_back(std::vector(kNumCards, 0)); + returns_.push_back(0); + } +} + +std::string CrazyEightsState::ActionToString(Player player, + Action action) const { + if (player == kChancePlayerId) { + if (action < kDraw) { + return absl::StrFormat("Deal %s", GetCardStr(action)); + } else if (action < kDecideDealerActionBase + num_players_) { + return absl::StrFormat("Decide Player %d to be the dealer", + action - kDecideDealerActionBase); + } else { + SpielFatalError( + absl::StrFormat("Non action valid Id %d for chance player", action)); + } + } + + if (action < kDraw) { + return absl::StrFormat("Play %s", GetCardStr(action)); + } else if (action == kDraw) { + return "Draw"; + } else if (action == kPass) { + return "Pass"; + } else if (action < kNominateSuitActionBase + kNumSuits) { + return absl::StrFormat("Nominate suit %c", + kSuitChar[action - kNominateSuitActionBase]); + } else { + SpielFatalError( + absl::StrFormat("Non valid Id %d for player: %d", action, player)); + } +} + +std::vector CrazyEightsState::FormatHand(Player player) const { + std::vector hand_str(kNumSuits, + std::string(num_decks_ * kNumRanks, ' ')); + for (int suit = 0; suit < kNumSuits; ++suit) { + for (int rank = 0; rank < kNumRanks; ++rank) { + int card = GetAction(static_cast(suit), rank); + for (int i = 0; i < hands_[player][card]; ++i) { + hand_str[suit][rank * num_decks_ + i] = kRankChar[rank]; + } + } + } + return hand_str; +} + +std::string CrazyEightsState::FormatAllHands() const { + std::string hands_str; + std::vector> all_hands; + all_hands.reserve(num_players_); + for (int player = 0; player < num_players_; ++player) { + all_hands.push_back(FormatHand(player)); + } + constexpr int kLongWidth = 40; + + for (int player = 0; player < num_players_; ++player) { + std::string player_str = absl::StrFormat("Player %d:", player); + if (player != num_players_ - 1) { + absl::StrAppend(&player_str, + std::string(kLongWidth - player_str.length(), ' ')); + } else { + absl::StrAppend(&player_str, "\n"); + } + absl::StrAppend(&hands_str, player_str); + } + + for (int suit = 0; suit < kNumSuits; ++suit) { + std::string suit_row; + for (int player = 0; player < num_players_; ++player) { + std::string player_row; + absl::StrAppend(&player_row, + absl::StrFormat("Suit %c: %s", kSuitChar[suit], + all_hands[player][suit])); + SPIEL_CHECK_GE(kLongWidth, player_row.length()); + if (player != num_players_ - 1) { + absl::StrAppend(&player_row, + std::string(kLongWidth - player_row.length(), ' ')); + } else { + absl::StrAppend(&player_row, "\n"); + } + absl::StrAppend(&suit_row, player_row); + } + absl::StrAppend(&hands_str, suit_row); + } + return hands_str; +} + +std::string CrazyEightsState::ToString() const { + std::string str; + int playing_player = dealer_; + for (int i = 0; i < history_.size(); ++i) { + if (i == 0) { + absl::StrAppend( + &str, absl::StrFormat("Player %d becomes the dealer\n", dealer_)); + } else if (i <= num_players_ * num_initial_cards_) { + int player = (dealer_ + i) % num_players_; + absl::StrAppend(&str, absl::StrFormat("Player %d is dealt %s\n", player, + GetCardStr(history_[i].action))); + } else { + if (history_[i].player == kChancePlayerId) { + absl::StrAppend(&str, + absl::StrFormat("Player %d draws %s\n", playing_player, + GetCardStr(history_[i].action))); + } else if (history_[i].player != kTerminalPlayerId) { + playing_player = history_[i].player; + if (history_[i].action == kDraw) { + absl::StrAppend(&str, absl::StrFormat("Player %d starts drawing\n", + playing_player)); + } else if (history_[i].action == kPass) { + absl::StrAppend( + &str, absl::StrFormat("Player %d passes\n", playing_player)); + } else if (history_[i].action >= kNominateSuitActionBase && + history_[i].action < kNominateSuitActionBase + kNumSuits) { + int suit = history_[i].action - kNominateSuitActionBase; + absl::StrAppend(&str, + absl::StrFormat("Player %d nominates suit %c\n", + playing_player, kSuitChar[suit])); + } else { + SPIEL_CHECK_GE(history_[i].action, 0); + SPIEL_CHECK_LT(history_[i].action, kNumCards); + absl::StrAppend( + &str, absl::StrFormat("Player %d plays %s\n", playing_player, + GetCardStr(history_[i].action))); + } + } else { + absl::StrAppend(&str, "Final scores\n"); + for (int player = 0; player < num_players_; ++player) { + absl::StrAppend(&str, absl::StrFormat("Player %d gets score %f\n", + player, returns_[player])); + } + } + } + } + if (last_card_ != kInvalidAction) { + absl::StrAppend(&str, + absl::StrFormat("Last card: %s\n", GetCardStr(last_card_))); + absl::StrAppend(&str, + absl::StrFormat("Last suit: %c\n", kSuitChar[last_suit_])); + } + absl::StrAppend(&str, absl::StrFormat("Number of cards left in deck: %d\n", + num_cards_left_)); + absl::StrAppend(&str, FormatAllHands()); + return str; +} + +std::string CrazyEightsState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + std::string str; + if (phase_ == Phase::kDeal) return str; + absl::StrAppend(&str, "Currently I have: \n"); + std::vector hands = FormatHand(player); + for (int suit = 0; suit < kNumSuits; ++suit) { + absl::StrAppend( + &str, absl::StrFormat("Suit %c: %s\n", kSuitChar[suit], hands[suit])); + } + absl::StrAppend( + &str, absl::StrFormat("Previous card: %s\n", GetCardStr(last_card_))); + absl::StrAppend( + &str, absl::StrFormat("Previous suit: %c\n", kSuitChar[last_suit_])); + absl::StrAppend(&str, "Starting counterclockwise, other players have: "); + for (int i = 0; i <= num_players_ - 1; ++i) { + int player_idx = (player + i) % num_players_; + int player_num_cards = 0; + for (int card = 0; card < kNumCards; ++card) { + player_num_cards += hands_[player_idx][card]; + } + if (i != num_players_ - 1) { + absl::StrAppend(&str, absl::StrFormat("%d, ", player_num_cards)); + } else { + absl::StrAppend(&str, absl::StrFormat("%d cards.\n", player_num_cards)); + } + } + if (use_special_cards_) { + absl::StrAppend(&str, absl::StrFormat("The direction is %s\n", + direction_ == 1 ? "counterclockwise" + : "clockwise")); + } + return str; +} + +void CrazyEightsState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_EQ(values.size(), game_->ObservationTensorSize()); + WriteObservationTensor(player, values); +} + +void CrazyEightsState::WriteObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + absl::c_fill(values, 0.); + if (phase_ == Phase::kDeal) return; + + for (int card = 0; card < kNumCards; ++card) { + values[card * (num_decks_ + 1) + hands_[player][card]] = 1; + } + values[(num_decks_ + 1) * kNumCards + last_card_] = 1; + values[(num_decks_ + 1) * kNumCards + kNumCards + last_suit_] = 1; + int tmp_base = (num_decks_ + 1) * kNumCards + kNumCards + kNumSuits; + for (int i = 1; i <= num_players_ - 1; ++i) { + int num_cards = 0; + for (int card = 0; card < kNumCards; ++card) { + num_cards += hands_[(player + i) % num_players_][card]; + } + values[tmp_base + (i - 1) * (num_decks_ * kNumCards + 1) + num_cards] = 1; + } + + if (use_special_cards_) { + tmp_base += (num_decks_ * kNumCards + 1) * (num_players_ - 1); + values[tmp_base] = (direction_ + 1) / 2; + } +} + +std::vector CrazyEightsState::LegalActions() const { + switch (phase_) { + case Phase::kDeal: + return DealLegalActions(); + case Phase::kPlay: + return PlayLegalActions(); + default: + return {}; + } +} + +std::vector> CrazyEightsState::ChanceOutcomes() + const { + std::vector> outcomes; + if (history_.empty()) { + for (int player = 0; player < num_players_; ++player) { + outcomes.emplace_back(player + kDecideDealerActionBase, + 1.0 / num_players_); + } + } else { + int num_cards_remaining = 0; + for (int card = 0; card < kNumCards; ++card) { + SPIEL_CHECK_GE(dealer_deck_[card], 0); + SPIEL_CHECK_LE(dealer_deck_[card], num_decks_); + num_cards_remaining += dealer_deck_[card]; + } + outcomes.reserve(num_cards_remaining); + for (int card = 0; card < kNumCards; ++card) { + if (dealer_deck_[card]) { + outcomes.emplace_back(card, static_cast(dealer_deck_[card]) / + num_cards_remaining); + } + } + } + return outcomes; +} + +void CrazyEightsState::DoApplyAction(Action action) { + switch (phase_) { + case Phase::kDeal: + return ApplyDealAction(action); + case Phase::kPlay: + return ApplyPlayAction(action); + case Phase::kGameOver: + SpielFatalError("Cannot act in terminal states"); + default: + SpielFatalError("Invalid Phase!"); + } +} + +std::vector CrazyEightsState::DealLegalActions() const { + std::vector legal_actions; + if (history_.empty()) { + for (int player = 0; player < num_players_; ++player) { + legal_actions.push_back(kDecideDealerActionBase + player); + } + } else { + for (int card = 0; card < kNumCards; ++card) { + if (dealer_deck_[card]) { + legal_actions.push_back(card); + } + } + } + return legal_actions; +} + +void CrazyEightsState::Reshuffle() { + SPIEL_CHECK_NE(last_card_, kInvalidAction); + for (int card = 0; card < kNumCards; ++card) { + dealer_deck_[card] = num_decks_; + for (int player = 0; player < num_players_; ++player) { + dealer_deck_[card] -= hands_[player][card]; + } + if (card == last_card_) dealer_deck_[card]--; + SPIEL_CHECK_GE(dealer_deck_[card], 0); + SPIEL_CHECK_LE(dealer_deck_[card], num_decks_); + num_cards_left_ += dealer_deck_[card]; + } +} + +void CrazyEightsState::ApplyDealAction(int action) { + // determine the dealer + if (history_.empty()) { + dealer_ = action - kDecideDealerActionBase; + current_player_ = (dealer_ + 1) % num_players_; + return; + } + + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LT(action, kDraw); + + num_cards_left_--; + dealer_deck_[action]--; + hands_[current_player_][action]++; + + SPIEL_CHECK_GE(dealer_deck_[action], 0); + SPIEL_CHECK_LE(dealer_deck_[action], num_decks_); + + // reshuffle the discarded cards + if (!num_cards_left_ && reshuffle_) { + Reshuffle(); + } + + // redraw=true if we are examining the first card turned face up after the + // initial dealing round, which cannot be Eights + if (redraw_) { + SPIEL_CHECK_EQ(current_player_, dealer_); + int rank = GetRank(action); + if (rank != kEightRank) { + phase_ = Phase::kPlay; + redraw_ = false; + last_card_ = action; + last_suit_ = GetSuit(action); + // if it is special card, act as if the dealer played this card + if (use_special_cards_) { + if (rank == kSkipRank) { + current_player_ = (current_player_ + 2) % num_players_; + return; + } else if (rank == kReverseRank) { + current_player_ = (current_player_ - 1 + num_players_) % + num_players_; + direction_ *= -1; + return; + } else if (rank == kDrawTwoRank) { + num_draws_from_twos_left_ += 2; + current_player_ = (current_player_ + 1) % num_players_; + return; + } + } + current_player_ = (current_player_ + 1) % num_players_; + return; + } else { + // put back + dealer_deck_[action]++; + num_cards_left_++; + hands_[current_player_][action]--; + return; + } + } + + SPIEL_CHECK_FALSE(redraw_); + + if (history_.size() < num_players_ * num_initial_cards_) { + current_player_ = (current_player_ + 1) % num_players_; + return; + } + + if (history_.size() == num_players_ * num_initial_cards_) { + SPIEL_CHECK_EQ(current_player_, dealer_); + redraw_ = true; + return; + } + + if (!num_cards_left_) can_pass_action_ = true; + + // if has accumlated 2s and has decided to draw these 2s from previous plays + if (start_draw_twos_) { + SPIEL_CHECK_TRUE(use_special_cards_); + num_draws_from_twos_left_--; + // assume if there is no card in the pile then the liability is cleared + if (!num_cards_left_) { + // if it is due to that the pile is exhausted during drawing +2s, + // counted as a pass + if (!num_draws_from_twos_left_) num_passes_++; + num_draws_from_twos_left_ = 0; + } + if (!num_draws_from_twos_left_) { + start_draw_twos_ = false; + phase_ = Phase::kPlay; + current_player_ = (current_player_ + direction_ + + num_players_) % num_players_; + } + return; + } + + // lastly, consider when the player draws card without having a previous +2 + // card + num_draws_before_play_++; + phase_ = Phase::kPlay; + + if (!num_cards_left_) num_draws_before_play_ = max_draw_cards_; + if (num_draws_before_play_ == max_draw_cards_) { + can_pass_action_ = true; + } +} + +void SearchLegalCards(std::vector* legal_actions, + const std::vector& hand, int last_rank, + int last_suit) { + for (int card = 0; card < kNumCards; ++card) { + if (hand[card] == 0) continue; + Suit suit = GetSuit(card); + int rank = GetRank(card); + if (rank == kEightRank) { + legal_actions->push_back(card); + } else if (last_suit == suit || last_rank == rank) { + legal_actions->push_back(card); + } + } +} + +std::vector CrazyEightsState::PlayLegalActions() const { + std::vector legal_actions; + if (nominate_suits_) { + for (int suit = kClubs; suit <= kSpades; ++suit) { + legal_actions.push_back(suit + kNominateSuitActionBase); + } + return legal_actions; + } + + if (can_pass_action_ || !num_cards_left_) { + SPIEL_CHECK_TRUE(!start_draw_twos_); + legal_actions.push_back(kPass); + } + + if (num_draws_from_twos_left_) { + SPIEL_CHECK_GT(num_cards_left_, 0); + + legal_actions.push_back(kDraw); + // since we are able to draw + SPIEL_CHECK_FALSE(can_pass_action_); + SPIEL_CHECK_TRUE(use_special_cards_); + + if (!start_draw_twos_) { + for (int suit = kClubs; suit <= kSpades; ++suit) { + int duo_card = GetAction(static_cast(suit), kDrawTwoRank); + if (hands_[current_player_][duo_card]) + legal_actions.push_back(duo_card); + } + } + } else { + for (int card = 0; card < kNumCards; ++card) { + if (hands_[current_player_][card] == 0) continue; + Suit suit = GetSuit(card); + int rank = GetRank(card); + if (rank == kEightRank) { + legal_actions.push_back(card); + } else if (last_suit_ == suit || GetRank(last_card_) == rank) { + legal_actions.push_back(card); + } + } + if (num_cards_left_ && num_draws_before_play_ != max_draw_cards_) { + SPIEL_CHECK_FALSE(can_pass_action_); + legal_actions.push_back(kDraw); + } + } + absl::c_sort(legal_actions); + return legal_actions; +} + +bool CrazyEightsState::CheckAllCardsPlayed(int action) { + SPIEL_CHECK_GT(hands_[current_player_][action], 0); + hands_[current_player_][action]--; + bool all_played = true; + for (int card = 0; card < kNumCards; ++card) { + all_played &= !hands_[current_player_][card]; + } + return all_played; +} + +void CrazyEightsState::ApplyPlayAction(int action) { + if (action == kPass) { + if (!num_cards_left_) { + num_passes_++; + } else { + num_passes_ = 0; + } + + if (num_passes_ == num_players_ + 1) { + phase_ = kGameOver; + ScoreUp(); + return; + } + + if (max_draw_cards_ == num_draws_before_play_) { + num_draws_before_play_ = 0; + } + current_player_ = + (current_player_ + direction_ + num_players_) % num_players_; + if (num_cards_left_) can_pass_action_ = false; + return; + } else { + num_passes_ = 0; + } + + if (action == kDraw) { + SPIEL_CHECK_FALSE(can_pass_action_); + phase_ = kDeal; + if (num_draws_from_twos_left_) { start_draw_twos_ = true; } + return; + } else if (nominate_suits_) { + SPIEL_CHECK_LT(action, kNominateSuitActionBase + kNumSuits); + SPIEL_CHECK_GE(action, kNominateSuitActionBase); + last_suit_ = action - kNominateSuitActionBase; + current_player_ = + (current_player_ + direction_ + num_players_) % num_players_; + nominate_suits_ = false; + return; + } else { + num_plays++; + can_pass_action_ = false; + num_draws_before_play_ = 0; + bool all_played = CheckAllCardsPlayed(action); + if (all_played || num_plays >= kMaxTurnLimit) { + phase_ = kGameOver; + ScoreUp(); + } + + last_card_ = action; + last_suit_ = GetSuit(action); + + if (!num_cards_left_ && reshuffle_) { + Reshuffle(); + } + + int rank = GetRank(action); + + if (rank == kEightRank) { + nominate_suits_ = true; + return; + } + if (use_special_cards_) { + if (rank == kSkipRank) { + current_player_ = + (current_player_ + 2 * direction_ + num_players_) % num_players_; + return; + } + if (rank == kReverseRank) { + direction_ *= -1; + current_player_ = + (current_player_ + direction_ + num_players_) % num_players_; + return; + } + if (rank == kDrawTwoRank) { + // if there is no card currently available in the pile, assume + // the next player doesn't have to draw cards in the next round, + // and just view it played a normal card + if (num_cards_left_) num_draws_from_twos_left_ += 2; + current_player_ = + (current_player_ + direction_ + num_players_) % num_players_; + return; + } + } + current_player_ = + (current_player_ + direction_ + num_players_) % num_players_; + return; + } +} + +Player CrazyEightsState::CurrentPlayer() const { + if (phase_ == Phase::kDeal) { + return kChancePlayerId; + } else if (phase_ == Phase::kGameOver) { + return kTerminalPlayerId; + } else { + return current_player_; + } +} + +void CrazyEightsState::ScoreUp() { + for (int player = 0; player < num_players_; ++player) { + for (int card = 0; card < kNumCards; ++card) { + if (!hands_[player][card]) continue; + int rank = GetRank(card); + if (rank == kEightRank) { + returns_[player] -= 50 * hands_[player][card]; + } else if (rank >= 9) { + returns_[player] -= 10 * hands_[player][card]; + } else { + returns_[player] -= (card + 2) * hands_[player][card]; + } + } + } +} + +} // namespace crazy_eights +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/crazy_eights/crazy_eights.h b/scenarios/bargaining/open_spiel/open_spiel/games/crazy_eights/crazy_eights.h new file mode 100644 index 0000000..7cbdc60 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/crazy_eights/crazy_eights.h @@ -0,0 +1,226 @@ +// Copyright 2023 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_CRAZY_EIGHTS_H_ +#define OPEN_SPIEL_GAMES_CRAZY_EIGHTS_H_ + +// The game of crazy eights. +// See https://en.wikipedia.org/wiki/Crazy_Eights +// For 2~5 players, the game uses a standard 52-card deck. +// For >5 players, it uses 2 decks. +// Initially a player is randomly selected as the dealer. +// Then each player is dealt 5 cards (7 cards if there are 2 players). +// Then the dealer draws one card from the deck and turns it face up. +// Then started with the player on the dealer's right, +// the game goes counterclockwise +// by default (with an exception, details later). +// In each player's turn, it needs to play a card that either match the suit +// or the rank of the card on the top of the discard pile. +// And then place this card on the discard pile top for the next player to +// match. A player can play an 8 as a wild card, however, at anytime. If it does +// so then a color needs to be nominated for the next player to match. A player +// can also decide to draw cards from the dealer deck. Notice that it is the +// only action available if it does not have a available card to play at its +// turn. But it doesn't prevent the player to draw cards even if it has playable +// cards. However, the maximum number of cards a player can draw at its turn is +// bounded. If a player plays a card, it cannot draw at the current turn +// anymore. The game ends if a player has played all of its card. The other +// players are penalized according to the cards on their hand. That is, -50 for +// each 8, -10 for each court card, and -{face value} for others. +// +// +// The game can also incorporate other "special cards". +// These including: +// Skip: if a player plays Q, then the next player is skipped +// Reverse: if a player plays A, then the direction of play is reversed. +// Draw 2: if a player plays 2, then the next player should draw 2 cards. +// However, it admits stacking. That is, if the next player has 2, it can play +// it. And then the next player after it should draw 4 cards unless it plays +// draw 2 as well, etc. If a player starts to draw in this case, it must draw +// all the cards and then passes. I.e., if it draws a draw 2 card during the +// drawing, it is not allowed to play it. +// +// If the first card turned face up by the dealer is a special card, +// then it acts as if the dealer plays the card. +// +// If reshuffle = true, then the discard pile got reshuffle and become the new +// dealer card once exhausted. +// +// The action space of this game is as follows. +// action id 0, 1,..., 51: play/deal a card from the standard 52-card deck. +// action id 52: a player draw a card from the dealer's deck. +// action id 53: a player passes if it had already drawn max_draw_cards. +// action id 54, 55, 56, 57: a player nominate one of the four suit. +// (for chance) action id 0, 1,...., 51 are cards to be drawn +// action id 52, 53, ...., 52 + num_player-1: decide the dealer. +// +// An observation contains: +// (1) the current hand I have +// (2) the previous card and previous suit +// (3) starting from (my_idx + 1), the numbers of cards others have +// (4) whether currently it goes counterclockwise or not + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace crazy_eights { + +constexpr int kNumCards = 52; +constexpr int kNumRanks = 13; +constexpr int kNumSuits = 4; +constexpr int kDraw = kNumCards; +constexpr int kPass = kDraw + 1; +constexpr int kNominateSuitActionBase = kPass + 1; +constexpr int kDecideDealerActionBase = kNumCards; +// 50 for each 8, 10 for each face card, and face values +// for others. then it is totally 4 * (2+3+..7+50+9+10+4*10) +constexpr double kMaxPenality = 544; +constexpr int kMaxTurnLimit = 10000; + +enum Phase { kDeal = 0, kPlay, kGameOver }; +enum Suit { kClubs = 0, kDiamonds, kHearts, kSpades }; + +class CrazyEightsState : public State { + public: + CrazyEightsState(std::shared_ptr game, int num_players, + int max_draw_cards, bool use_special_cards, bool reshuffle); + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + bool IsTerminal() const override { return phase_ == Phase::kGameOver; } + std::vector Returns() const override { return returns_; } + std::string ObservationString(Player player) const override; + void WriteObservationTensor(Player player, absl::Span values) const; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override { + return absl::make_unique(*this); + } + std::vector LegalActions() const override; + std::vector> ChanceOutcomes() const override; + + protected: + void DoApplyAction(Action action) override; + + private: + std::vector DealLegalActions() const; + std::vector PlayLegalActions() const; + void ApplyDealAction(int action); + void ApplyPlayAction(int action); + bool CheckAllCardsPlayed(int action); + void ScoreUp(); + + void Reshuffle(); + + std::vector FormatHand(Player player) const; + + std::string FormatAllHands() const; + + Phase phase_ = Phase::kDeal; + int current_player_ = kInvalidPlayer; + int dealer_ = kInvalidPlayer; + + // for the first card turned up, keep drawing if it is an eight + bool redraw_ = false; + + // whether a player can pass + // it is true when (1) a player had already drawn max_draw_cards + // or (2) there is no card in the discard pile + bool can_pass_action_ = false; + + // whether a player had already started to draw +2 cards + bool start_draw_twos_ = false; + + // consecutive passes during a play + // if num_passes = num_players_ + 1, then the game ends + int num_passes_ = 0; + + // the current accmulated +2 cards to be drawn + int num_draws_from_twos_left_ = 0; + + // the number of consecutive draws for current_player_ so far + // this is not used for +2 cases + int num_draws_before_play_ = 0; + + // the number of cards player can draw + int num_cards_left_; + + int num_plays = 0; + + int last_card_ = kInvalidAction; + int last_suit_ = -1; + + bool nominate_suits_ = false; + + int direction_ = 1; + + bool reshuffle_; + int num_players_; + int max_draw_cards_; + int num_initial_cards_; + int num_decks_; + bool use_special_cards_; + + std::vector returns_; + std::array dealer_deck_{}; + std::vector> hands_; +}; + +class CrazyEightsGame : public Game { + public: + explicit CrazyEightsGame(const GameParameters& params); + int NumDistinctActions() const override { + return kNominateSuitActionBase + kNumSuits; + } + int MaxChanceOutcomes() const override { + return kDecideDealerActionBase + num_players_; + } + std::unique_ptr NewInitialState() const override { + return absl::make_unique(shared_from_this(), num_players_, + max_draw_cards_, + use_special_cards_, reshuffle_); + } + int NumPlayers() const override { return num_players_; } + double MinUtility() const override { + return -kMaxPenality * (num_players_ > 5 ? 2 : 1); + } + double MaxUtility() const override { return 0.0; } + std::vector ObservationTensorShape() const override { + int num_decks = num_players_ > 5 ? 2 : 1; + int base_observation_size = + (num_decks + 1) * kNumCards + kNumCards + kNumSuits + + (num_decks * kNumCards + 1) * (num_players_ - 1); + if (!use_special_cards_) { + return {base_observation_size}; + } else { + return {base_observation_size + 1}; + } + } + // In principle, the game can run indefinitely + int MaxGameLength() const override { return kMaxTurnLimit; } + int GetMaxDrawCards() const { return max_draw_cards_; } + + private: + int num_players_; + int max_draw_cards_; + bool use_special_cards_; + bool reshuffle_; +}; + +} // namespace crazy_eights + +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_CRAZY_EIGHTS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/crazy_eights/crazy_eights_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/crazy_eights/crazy_eights_test.cc new file mode 100644 index 0000000..b8494dc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/crazy_eights/crazy_eights_test.cc @@ -0,0 +1,133 @@ +// Copyright 2023 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/crazy_eights/crazy_eights.h" + +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace crazy_eights { +namespace { + +void BasicGameTests() { + testing::LoadGameTest("crazy_eights"); + for (int players = 2; players <= 6; ++players) { + for (bool b : {false, true}) { + testing::RandomSimTest( + *LoadGame("crazy_eights", {{"players", GameParameter(players)}, + {"use_special_cards", GameParameter(b)}}), + 5); + } + } +} + +void SpecialCardTests() { + std::shared_ptr game = + LoadGame("crazy_eights", {{"players", GameParameter(4)}, + {"use_special_cards", GameParameter(true)}}); + + std::unique_ptr state = game->NewInitialState(); + // 0 is the dealer + state->ApplyAction(kDecideDealerActionBase); + // Player0 has (S2)(H8)(DQ)(SK)(SA) + // Player1 has (C2)(C3)(S8)(HQ)(CA) + // Player2 has (D2)(C8)(C9)(SQ)(DA) + // Player3 has (H2)(D8)(CQ)(CK)(HA) + std::vector dealt_cards = {0, 1, 2, 3, 4, 24, 25, 26, 27, 28, + 40, 41, 42, 43, 44, 47, 48, 49, 50, 51}; + + for (auto card : dealt_cards) state->ApplyAction(card); + + // The first card is D3 + state->ApplyAction(5); + + // Player 1 plays C3 + state->ApplyAction(4); + + // Player 2 plays C8 + state->ApplyAction(24); + + // Check the current actions are color nomination + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + std::vector legal_actions = state->LegalActions(); + SPIEL_CHECK_EQ(static_cast(legal_actions.size()), kNumSuits); + + for (int i = 0; i < kNumSuits; ++i) { + SPIEL_CHECK_GE(legal_actions[i], kNominateSuitActionBase); + SPIEL_CHECK_LT(legal_actions[i], kNominateSuitActionBase + kNumSuits); + } + + // The next suit is H + state->ApplyAction(kNominateSuitActionBase + 2); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 3); + // Player 3 plays HA + state->ApplyAction(50); + // Reverse direction to player 2 + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + // Player 2 plays DA + state->ApplyAction(49); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 3); + // Reverse direction to player 3 + // Player 3 plays D8 + state->ApplyAction(25); + // Player 3 nominates D + state->ApplyAction(kNominateSuitActionBase + 1); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + // Player 0 plays DQ + state->ApplyAction(41); + + // Player 1 is skipped, next is player 2 + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + + // Player 2 plays D2! + state->ApplyAction(1); + // Player 3 only has two actions: H2 or start drawing + legal_actions = state->LegalActions(); + SPIEL_CHECK_EQ(static_cast(legal_actions.size()), 2); + SPIEL_CHECK_EQ(legal_actions[0], 2); + SPIEL_CHECK_EQ(legal_actions[1], kDraw); + // Let's stack the twos! + state->ApplyAction(2); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + + // Keep stacking + state->ApplyAction(3); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + + // Keep stacking + state->ApplyAction(0); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + legal_actions = state->LegalActions(); + SPIEL_CHECK_EQ(static_cast(legal_actions.size()), 1); + // Player 2 has to draw 8 cards + + state->ApplyAction(kDraw); + std::vector draw_cards = {6, 7, 8, 9, 10, 11, 12, 13}; + for (auto card : draw_cards) state->ApplyAction(card); + // Then it is player 3's turn + SPIEL_CHECK_EQ(state->CurrentPlayer(), 3); +} + +} // namespace +} // namespace crazy_eights +} // namespace open_spiel + +int main() { + open_spiel::crazy_eights::BasicGameTests(); + open_spiel::crazy_eights::SpecialCardTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/cribbage/cribbage.cc b/scenarios/bargaining/open_spiel/open_spiel/games/cribbage/cribbage.cc new file mode 100644 index 0000000..58e2b4b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/cribbage/cribbage.cc @@ -0,0 +1,911 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/cribbage/cribbage.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace cribbage { + +constexpr int kDefaultNumPlayers = 2; +constexpr int kWinScore = 121; +constexpr double kDefaultWinnerBonus = 1000; + + +constexpr const std::array kAllCards = { + // Clubs + Card{0, 0, 0}, + Card{1, 1, 0}, + Card{2, 2, 0}, + Card{3, 3, 0}, + Card{4, 4, 0}, + Card{5, 5, 0}, + Card{6, 6, 0}, + Card{7, 7, 0}, + Card{8, 8, 0}, + Card{9, 9, 0}, + Card{10, 10, 0}, + Card{11, 11, 0}, + Card{12, 12, 0}, + // Diamonds + Card{13, 0, 1}, + Card{14, 1, 1}, + Card{15, 2, 1}, + Card{16, 3, 1}, + Card{17, 4, 1}, + Card{18, 5, 1}, + Card{19, 6, 1}, + Card{20, 7, 1}, + Card{21, 8, 1}, + Card{22, 9, 1}, + Card{23, 10, 1}, + Card{24, 11, 1}, + Card{25, 12, 1}, + // Hearts + Card{26, 0, 2}, + Card{27, 1, 2}, + Card{28, 2, 2}, + Card{29, 3, 2}, + Card{30, 4, 2}, + Card{31, 5, 2}, + Card{32, 6, 2}, + Card{33, 7, 2}, + Card{34, 8, 2}, + Card{35, 9, 2}, + Card{36, 10, 2}, + Card{37, 11, 2}, + Card{38, 12, 2}, + // Spades + Card{39, 0, 3}, + Card{40, 1, 3}, + Card{41, 2, 3}, + Card{42, 3, 3}, + Card{43, 4, 3}, + Card{44, 5, 3}, + Card{45, 6, 3}, + Card{46, 7, 3}, + Card{47, 8, 3}, + Card{48, 9, 3}, + Card{49, 10, 3}, + Card{50, 11, 3}, + Card{51, 12, 3}, +}; + +// Scoring. +constexpr int kNum5Combos = 1; +constexpr int kNum4Combos = 5; +constexpr int kNum3Combos = 10; +constexpr int kNum2Combos = 10; + +// Bitmasks used to choose card combinations. +constexpr const std::array k5CardMasks = {31}; +constexpr const std::array k4CardMasks = {30, 29, 27, 23, 15}; +constexpr const std::array k3CardMasks = {7, 11, 13, 14, 19, + 21, 22, 25, 26, 28}; +constexpr const std::array k2CardMasks = {3, 5, 6, 9, 10, + 12, 17, 18, 20, 24}; + +namespace { + +// Facts about the game +const GameType kGameType{/*short_name=*/"cribbage", + /*long_name=*/"Cribbage", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/4, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/false, + /*provides_observation_tensor=*/false, + /*parameter_specification=*/ + {{"players", GameParameter(kDefaultNumPlayers)}, + {"winner_bonus_reward", + GameParameter(kDefaultWinnerBonus)}}}; + +static std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new CribbageGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace + +bool operator==(const Card& lhs, const Card& rhs) { return lhs.id == rhs.id; } + +// Sort by rank first. This is needed for the proper scoring of runs. +bool operator<(const Card& lhs, const Card& rhs) { + return (lhs.rank < rhs.rank || (lhs.rank == rhs.rank && lhs.suit < rhs.suit)); +} + +int CardsPerPlayer(int num_players) { + switch (num_players) { + case 2: + return 6; + case 3: + return 5; + case 4: + return 5; + default: + SpielFatalError(absl::StrCat("Unknown number of players: ", num_players)); + } +} + +int CardsToCrib(int num_players) { + switch (num_players) { + case 2: + return 0; + case 3: + return 1; + case 4: + return 0; + default: + SpielFatalError(absl::StrCat("Unknown number of players: ", num_players)); + } +} + +Card GetCard(int id) { + SPIEL_CHECK_GE(id, 0); + SPIEL_CHECK_LT(id, kDeckSize); + return kAllCards[id]; +} + +Card GetCardByString(const std::string& str) { + for (int i = 0; i < kDeckSize; ++i) { + if (kAllCards[i].to_string() == str) { + return kAllCards[i]; + } + } + SpielFatalError(absl::StrCat("Unknown card: ", str)); +} + +std::vector GetHandFromStrings( + const std::vector& card_strings) { + std::vector hand; + hand.reserve(card_strings.size()); + for (const std::string& cstr : card_strings) { + hand.push_back(GetCardByString(cstr)); + } + return hand; +} + +int Card::value() const { + if (rank >= kTen) { + return 10; + } else { + return rank + 1; + } +} + +std::string Card::to_string() const { + std::string str("XX"); + str[0] = kRanks[rank]; + str[1] = kSuitNames[suit]; + return str; +} + +Action ToAction(const Card& c1, const Card& c2) { + return kDeckSize + (kDeckSize * c1.id + c2.id); +} + +std::pair FromAction(Action action) { + action -= kDeckSize; + return {action / kDeckSize, action % kDeckSize}; +} + +int CardsSum(const std::vector& hand, int combo_mask) { + int sum = 0; + int bit = 1; + for (int pos = 0; pos < hand.size(); ++pos) { + if ((combo_mask & bit) > 0) { + sum += hand[pos].value(); + } + bit <<= 1; + } + return sum; +} + +bool IsPair(const std::vector& hand, int combo_mask) { + int bit = 1; + int rank = -1; + + for (int pos = 0; pos < hand.size(); ++pos) { + if ((combo_mask & bit) > 0) { + if (rank == -1) { + rank = hand[pos].rank; + } else { + return (rank == hand[pos].rank); + } + } + bit <<= 1; + } + return false; +} + +int ScoreHand15(const std::vector& hand) { + int score = 0; + for (int mask : k5CardMasks) { + if (CardsSum(hand, mask) == 15) { + score += 2; + } + } + for (int mask : k4CardMasks) { + if (CardsSum(hand, mask) == 15) { + score += 2; + } + } + for (int mask : k3CardMasks) { + if (CardsSum(hand, mask) == 15) { + score += 2; + } + } + for (int mask : k2CardMasks) { + if (CardsSum(hand, mask) == 15) { + score += 2; + } + } + return score; +} + +int ScoreHandPairs(const std::vector& hand) { + int score = 0; + for (int mask : k2CardMasks) { + if (IsPair(hand, mask)) { + score += 2; + } + } + return score; +} + +int ScoreHandFlush(const std::vector& hand) { + SPIEL_CHECK_TRUE(hand.size() == 4 || hand.size() == 5); + int suit = hand[0].suit; + for (int i = 1; i < hand.size(); ++i) { + if (hand[i].suit != suit) { + return 0; + } + } + return hand.size(); +} + +int ScoreHandRun(const std::vector& hand, int combo_mask) { + int rank = -1; + int bit = 1; + int length = 0; + + for (int pos = 0; pos < hand.size(); ++pos) { + if ((combo_mask & bit) > 0) { + if (rank == -1) { + // First rank in the run. + rank = hand[pos].rank; + } else { + // Check that the next rank is one up, then move the rank up. + if (hand[pos].rank != (rank + 1)) { + return 0; + } else { + rank++; + } + } + length++; + } + bit <<= 1; + } + SPIEL_CHECK_GE(length, 3); + return length; +} + +bool IsSubsetMask(const std::vector& masks, int test_mask) { + if (masks.empty()) { + return false; + } + for (int mask : masks) { + if ((mask & test_mask) == test_mask) { + return true; + } + } + return false; +} + +int ScoreHand(const std::vector& hand) { + SPIEL_CHECK_EQ(hand.size(), 5); + int score = 0; + + // 15s. + score += ScoreHand15(hand); + + // Pairs (and 3-of-a-kind and 4-of-a-kind). + score += ScoreHandPairs(hand); + + // Score the runs. When doing subsets of size 3, must check that the subset + // is not a smaller proper subset of a combination that has already been + // counted. So we keep a set of all the 4-card subsets that were counted + // for this purpose. + int score_run_5 = ScoreHandRun(hand, 31); + if (score_run_5 > 0) { + return score + score_run_5; + } + + std::vector combo_masks_scored; + for (int mask : k4CardMasks) { + int score_run_4 = ScoreHandRun(hand, mask); + if (score_run_4 > 0) { + score += score_run_4; + combo_masks_scored.push_back(mask); + } + } + + for (int mask : k3CardMasks) { + if (!IsSubsetMask(combo_masks_scored, mask)) { + score += ScoreHandRun(hand, mask); + } + } + + return score; +} + +int ScoreHand(const std::vector& hand, const Card& starter) { + SPIEL_CHECK_EQ(hand.size(), 4); + + int score = 0; + // Check for jack of the same suit as the starter + for (int i = 0; i < hand.size(); ++i) { + if (hand[i].rank == Rank::kJack && hand[i].suit == starter.suit) { + score += 1; + break; + } + } + + // Make the 5-card hand which includes the starter. + std::vector five_card_hand = hand; + five_card_hand.push_back(starter); + std::sort(five_card_hand.begin(), five_card_hand.end()); + + // Check for flush. + int flush5 = ScoreHandFlush(five_card_hand); + if (flush5 != 0) { + score += flush5; + } else { + score += ScoreHandFlush(hand); + } + + return score + ScoreHand(five_card_hand); +} + +std::string CribbageState::ActionToString(Player player, Action move_id) const { + if (player == kChancePlayerId) { + return absl::StrCat("Deal ", kAllCards[move_id].to_string()); + } else { + if (move_id < kDeckSize) { + return absl::StrCat("Choose ", kAllCards[move_id].to_string()); + } else if (move_id == kPassAction) { + return "Pass"; + } else { + std::pair card_ids = FromAction(move_id); + return absl::StrCat("Choose ", kAllCards[card_ids.first].to_string(), " ", + kAllCards[card_ids.second].to_string()); + } + } +} + +bool CribbageState::IsTerminal() const { + return (round_ >= kMaxNumRounds || + *std::max_element(scores_.begin(), scores_.end()) >= kWinScore); +} + +int CribbageState::DetermineWinner() const { + for (int p = 0; p < num_players_; ++p) { + if (scores_[p] >= kWinScore) { + return p; + } + } + return kInvalidPlayer; +} + +void AddWinnerBonusLoserPenalty(std::vector* values, int winner, + int num_players, double winner_bonus) { + if (winner == kInvalidPlayer) { + return; + } + + // For 2 and 3 player games, the loss penalty is -win_bonus / (n-1) and + // win_bonus is given only to one player. For a 4-player game, it's a team + // game so both the win bonus and loss penalty is shared across losers. + double win_bonus_per_player = + num_players <= 3 ? winner_bonus : (winner_bonus / 2.0); + + double loss_penalty_per_player = + num_players <= 3 + ? (-winner_bonus / (static_cast(values->size()) - 1.0)) + : (-winner_bonus / 2.0); + + for (Player p = 0; p < values->size(); ++p) { + // In the 4-player games, the score is identical for players {0,2} and {1,3} + if (p == winner || (num_players == 4 && p == (winner + 2))) { + (*values)[p] += win_bonus_per_player; + } else { + (*values)[p] += loss_penalty_per_player; + } + } +} + +std::vector CribbageState::Rewards() const { + int winner = DetermineWinner(); + std::vector ret = rewards_; + SPIEL_CHECK_EQ(ret.size(), num_players_); + AddWinnerBonusLoserPenalty(&ret, winner, num_players_, + parent_game_.winner_bonus_reward()); + return ret; +} + +std::vector CribbageState::Returns() const { + int winner = DetermineWinner(); + std::vector ret = scores_; + SPIEL_CHECK_EQ(ret.size(), num_players_); + AddWinnerBonusLoserPenalty(&ret, winner, num_players_, + parent_game_.winner_bonus_reward()); + return ret; +} + +std::string CribbageState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, game_->NumPlayers()); + return ""; +} + +bool CribbageState::AllHandsAreEmpty() const { + for (Player p = 0; p < num_players_; ++p) { + if (!hands_[p].empty()) { + return false; + } + } + return true; +} + +bool CribbageState::AllPlayersHavePassed() const { + for (Player p = 0; p < num_players_; ++p) { + if (!passed_[p]) { + return false; + } + } + return true; +} + +void CribbageState::NextRound() { + round_++; + dealer_ = NextPlayerRoundRobin(dealer_, num_players_); + start_player_ = NextPlayerRoundRobin(start_player_, num_players_); + cur_player_ = kChancePlayerId; + + deck_.clear(); + deck_.resize(kDeckSize); + for (int i = 0; i < kDeckSize; ++i) { + deck_[i] = kAllCards[i]; + } + + for (int p = 0; p < num_players_; ++p) { + hands_[p].clear(); + discards_[p].clear(); + } + std::fill(passed_.begin(), passed_.end(), false); + crib_.clear(); + played_cards_.clear(); + + phase_ = Phase::kCardPhase; + starter_ = std::nullopt; + last_played_player_ = -1; + current_sum_ = 0; +} + +void CribbageState::ObservationTensor(Player player, + absl::Span values) const {} + +CribbageState::CribbageState(std::shared_ptr game) + : State(game), + parent_game_(static_cast(*game)), + phase_(kCardPhase), + rewards_(num_players_, 0), + scores_(num_players_, 0), + starter_(std::nullopt), + hands_(num_players_), + discards_(num_players_), + passed_(num_players_) { + NextRound(); +} + +int CribbageState::CurrentPlayer() const { + if (IsTerminal()) { + return kTerminalPlayerId; + } else { + return cur_player_; + } +} + +bool SameRank(const std::vector& played_cards, int start_index) { + int rank = played_cards[start_index].rank; + for (int i = start_index + 1; i < played_cards.size(); ++i) { + if (played_cards[i].rank != rank) { + return false; + } + } + return true; +} + +bool IsUnsortedRun(const std::vector& played_cards, int start_index) { + std::vector played_cards_copy = played_cards; + std::sort(played_cards_copy.begin() + start_index, played_cards_copy.end()); + for (int i = start_index + 1; i < played_cards_copy.size(); ++i) { + if (played_cards_copy[i].rank != (played_cards_copy[i - 1].rank + 1)) { + return false; + } + } + return true; +} + +void CribbageState::CheckAndApplyPlayScoring() { + if (current_sum_ == 15) { + Score(cur_player_, 2); + } + + // Check 4ofk, 3ofk, pair. + if (played_cards_.size() >= 4 && + SameRank(played_cards_, played_cards_.size() - 4)) { + Score(cur_player_, 12); + } else if (played_cards_.size() >= 3 && + SameRank(played_cards_, played_cards_.size() - 3)) { + Score(cur_player_, 6); + } else if (played_cards_.size() >= 2 && + SameRank(played_cards_, played_cards_.size() - 2)) { + Score(cur_player_, 2); + } + + for (int num_cards = std::min(played_cards_.size(), 7); num_cards >= 3; + --num_cards) { + if (IsUnsortedRun(played_cards_, played_cards_.size() - num_cards)) { + Score(cur_player_, num_cards); + break; + } + } +} + +void CribbageState::DoEndOfPlayRound() { + // Apply end-of-play round scoring. + int end_of_round_points = current_sum_ == 31 ? 2 : 1; + Score(last_played_player_, end_of_round_points); + + played_cards_.clear(); + current_sum_ = 0; + std::fill(passed_.begin(), passed_.end(), false); + SPIEL_CHECK_GE(last_played_player_, 0); + SPIEL_CHECK_LT(last_played_player_, num_players_); + cur_player_ = NextPlayerRoundRobin(last_played_player_, num_players_); + + // Check for end of play phase. + if (AllHandsAreEmpty()) { + // First, reset the hands to be the discards. + for (Player p = 0; p < num_players_; ++p) { + hands_[p] = discards_[p]; + SPIEL_CHECK_EQ(hands_[p].size(), 4); + } + ScoreHands(); + ScoreCrib(); + NextRound(); + } +} + +void CribbageState::DoApplyAction(Action move) { + SPIEL_CHECK_EQ(IsTerminal(), false); + + if (IsChanceNode()) { + SPIEL_CHECK_GE(move, 0); + SPIEL_CHECK_LT(move, kDeckSize); + if (phase_ == Phase::kCardPhase) { + // In the card phase, the chance nodes correspond to the card deals to + // each player and to the crib. + auto iter = std::find(deck_.begin(), deck_.end(), kAllCards[move]); + SPIEL_CHECK_TRUE(iter != deck_.end()); + Card card = *iter; + deck_.erase(iter); + bool card_dealt = false; + bool crib_dealt = false; + + // Deal to players first + int p = 0; + for (p = 0; p < num_players_; ++p) { + if (hands_[p].size() < parent_game_.cards_per_player()) { + hands_[p].push_back(card); + card_dealt = true; + break; + } + } + + // Deal to crib if necessary + if (!card_dealt && crib_.size() < parent_game_.cards_to_crib()) { + crib_.push_back(card); + crib_dealt = true; + } + + // Check if we're ready to start choosing cards. + if (crib_dealt || (p == num_players_ - 1 && + hands_[p].size() == parent_game_.cards_per_player() && + crib_.size() == parent_game_.cards_to_crib())) { + SortHands(); + cur_player_ = 0; + } else { + cur_player_ = kChancePlayerId; + } + } else { + // A chance node in the play phase corresponds to choosing the starter. + SPIEL_CHECK_FALSE(starter_.has_value()); + auto iter = std::find(deck_.begin(), deck_.end(), kAllCards[move]); + SPIEL_CHECK_FALSE(iter == deck_.end()); + starter_ = *iter; + deck_.erase(iter); + if ((*starter_).rank == Rank::kJack) { + // His Nobs + Score(dealer_, 2); + } + // Player left of the dealer starts. + cur_player_ = NextPlayerRoundRobin(dealer_, num_players_); + } + } else { + // Decision node. + SPIEL_CHECK_GE(cur_player_, 0); + SPIEL_CHECK_LT(cur_player_, num_players_); + // Applying action at decision node: First, clear the intermediate rewards. + std::fill(rewards_.begin(), rewards_.end(), 0); + if (phase_ == Phase::kCardPhase) { + // Move the chose card(s) into the crib. + if (num_players_ == 3 || num_players_ == 4) { + SPIEL_CHECK_GE(move, 0); + SPIEL_CHECK_LT(move, kDeckSize); + MoveCardToCrib(cur_player_, kAllCards[move]); + } else { + std::pair card_ids = FromAction(move); + for (int card_id : {card_ids.first, card_ids.second}) { + SPIEL_CHECK_GE(card_id, 0); + SPIEL_CHECK_LT(card_id, kDeckSize); + MoveCardToCrib(cur_player_, kAllCards[card_id]); + } + } + + cur_player_ += 1; + if (cur_player_ >= num_players_) { + SortCrib(); + phase_ = Phase::kPlayPhase; + cur_player_ = kChancePlayerId; // starter + } + } else { + if (move == kPassAction) { + passed_[cur_player_] = true; + // Check for end of current play sequence (or round). + if (AllPlayersHavePassed()) { + DoEndOfPlayRound(); + } else { + cur_player_ = NextPlayerRoundRobin(cur_player_, num_players_); + } + } else { + // Play the chosen card. + auto iter = std::find(hands_[cur_player_].begin(), + hands_[cur_player_].end(), kAllCards[move]); + SPIEL_CHECK_TRUE(iter != hands_[cur_player_].end()); + Card card = *iter; + current_sum_ += card.value(); + hands_[cur_player_].erase(iter); + played_cards_.push_back(card); + discards_[cur_player_].push_back(card); + last_played_player_ = cur_player_; + CheckAndApplyPlayScoring(); + // If the sum is 31 then no need for the passes, we can end the round + // round right away. + if (current_sum_ == 31) { + DoEndOfPlayRound(); + } else { + cur_player_ = NextPlayerRoundRobin(cur_player_, num_players_); + } + } + } + } +} + +void CribbageState::Score(Player player, int points) { + rewards_[player] += points; + scores_[player] += points; + + // 4-player is a team game. Any scoring for p also counts for either (p+2) + // or (p-2). + if (num_players_ == 4) { + Player teammate = (player + 2) % num_players_; + SPIEL_CHECK_GE(teammate, 0); + SPIEL_CHECK_LT(teammate, num_players_); + rewards_[teammate] += points; + scores_[teammate] += points; + } +} + +void CribbageState::ScoreHands() { + for (Player p = 0; p < num_players_; ++p) { + int points = ScoreHand(hands_[p], *starter_); + Score(p, points); + } +} + +void CribbageState::ScoreCrib() { + int points = ScoreHand(crib_, *starter_); + Score(dealer_, points); +} + +void CribbageState::MoveCardToCrib(Player player, const Card& card) { + auto iter = std::find(hands_[player].begin(), hands_[player].end(), card); + SPIEL_CHECK_TRUE(iter != hands_[player].end()); + Card found_card = *iter; + hands_[player].erase(iter); + crib_.push_back(found_card); +} + +void CribbageState::SortHands() { + for (int p = 0; p < num_players_; ++p) { + std::sort(hands_[p].begin(), hands_[p].end()); + } +} + +void CribbageState::SortCrib() { std::sort(crib_.begin(), crib_.end()); } + +std::vector CribbageState::LegalActions() const { + if (IsChanceNode()) { + return LegalChanceOutcomes(); + } else if (IsTerminal()) { + return {}; + } else { + if (phase_ == Phase::kCardPhase) { + switch (num_players_) { + case 2: + return LegalTwoCardCribActions(); + case 3: + case 4: + return LegalOneCardCribActions(); + default: + SpielFatalError("Unknown number of players"); + } + } else if (phase_ == Phase::kPlayPhase) { + // The current player can play anything in their hand that does not bring + // the current sum over 31, or pass if they have no legal actions. + SPIEL_CHECK_GE(cur_player_, 0); + SPIEL_CHECK_LT(cur_player_, num_players_); + std::vector legal_actions; + for (const Card& card : hands_[cur_player_]) { + if ((current_sum_ + card.value()) <= 31) { + legal_actions.push_back(card.id); + } + } + if (legal_actions.empty()) { + legal_actions = {kPassAction}; + } + std::sort(legal_actions.begin(), legal_actions.end()); + return legal_actions; + } else { + SpielFatalError("Unknown phase in LegalActions()"); + } + } +} + +std::vector CribbageState::LegalOneCardCribActions() const { + std::vector legal_actions; + legal_actions.reserve(hands_[cur_player_].size()); + for (int i = 0; i < hands_[cur_player_].size(); ++i) { + legal_actions.push_back(hands_[cur_player_][i].id); + } + std::sort(legal_actions.begin(), legal_actions.end()); + return legal_actions; +} + +std::vector CribbageState::LegalTwoCardCribActions() const { + std::vector legal_actions; + for (int i = 0; i < hands_[cur_player_].size(); ++i) { + for (int j = i + 1; j < hands_[cur_player_].size(); ++j) { + Action action = ToAction(hands_[cur_player_][i], hands_[cur_player_][j]); + legal_actions.push_back(action); + } + } + std::sort(legal_actions.begin(), legal_actions.end()); + return legal_actions; +} + +ActionsAndProbs CribbageState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(IsChanceNode()); + ActionsAndProbs outcomes; + outcomes.reserve(deck_.size()); + double prob = 1.0 / deck_.size(); + for (int o = 0; o < deck_.size(); ++o) { + outcomes.push_back({deck_[o].id, prob}); + } + return outcomes; +} + +std::string CribbageState::ToString() const { + std::string str; + absl::StrAppend(&str, "---------------------------------\n"); + absl::StrAppend(&str, "Num players: ", num_players_, "\n"); + absl::StrAppend(&str, "Round: ", round_, "\n"); + absl::StrAppend( + &str, "Phase: ", phase_ == Phase::kCardPhase ? "Card" : "Play", "\n"); + absl::StrAppend(&str, "Dealer: ", dealer_, "\n"); + absl::StrAppend(&str, "Cur player: ", cur_player_, "\n"); + absl::StrAppend(&str, "Scores:"); + for (int p = 0; p < num_players_; ++p) { + absl::StrAppend(&str, " ", scores_[p]); + } + absl::StrAppend(&str, "\n"); + absl::StrAppend(&str, "---------------------------------\n"); + absl::StrAppend(&str, "Crib:"); + for (int i = 0; i < crib_.size(); ++i) { + absl::StrAppend(&str, " ", crib_[i].to_string()); + } + absl::StrAppend(&str, "\n"); + if (starter_.has_value()) { + absl::StrAppend(&str, "Starter: ", (*starter_).to_string(), "\n"); + } + for (int p = 0; p < num_players_; ++p) { + absl::StrAppend(&str, "P", p, " Hand:"); + for (int i = 0; i < hands_[p].size(); ++i) { + absl::StrAppend(&str, " ", hands_[p][i].to_string()); + } + absl::StrAppend(&str, "\n"); + } + absl::StrAppend(&str, "---------------------------------\n"); + absl::StrAppend(&str, "Running total: ", current_sum_, "\n"); + absl::StrAppend(&str, "Played cards: "); + for (int i = 0; i < played_cards_.size(); ++i) { + absl::StrAppend(&str, " ", played_cards_[i].to_string()); + } + absl::StrAppend(&str, "\n"); + absl::StrAppend(&str, "---------------------------------\n"); + + return str; +} + +std::unique_ptr CribbageState::Clone() const { + return std::unique_ptr(new CribbageState(*this)); +} + +CribbageGame::CribbageGame(const GameParameters& params) + : Game(kGameType, params), + num_players_(ParameterValue("players", kDefaultNumPlayers)), + cards_per_player_(CardsPerPlayer(num_players_)), + cards_to_crib_(CardsToCrib(num_players_)), + winner_bonus_reward_( + ParameterValue("winner_bonus_reward", kDefaultWinnerBonus)) {} + +} // namespace cribbage +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/cribbage/cribbage.h b/scenarios/bargaining/open_spiel/open_spiel/games/cribbage/cribbage.h new file mode 100644 index 0000000..47299ca --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/cribbage/cribbage.h @@ -0,0 +1,204 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_CRIBBAGE_H_ +#define OPEN_SPIEL_GAMES_CRIBBAGE_H_ + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// An implementation of Cribbage: +// https://en.wikipedia.org/wiki/Cribbage +// +// Parameters: +// players (int): Number of players. Default: 2. +// winner_bonus_reward (double): Bonus reward given to the winner(s), the +// i.e. player(s) who score at least 121 points. If there are multiple +// winners, the bonus is split equally among them. Also a negative of this +// amount is given to the loser(s) (also split equally). Default: 1000. + +namespace open_spiel { +namespace cribbage { + +constexpr int kNumSuits = 4; +constexpr int kCardsPerSuit = 13; +constexpr int kDeckSize = kCardsPerSuit * kNumSuits; +constexpr int kMaxNumRounds = 100; + +// In a 4-player game, if all players have 10s (16 of them), then each round +// will take 3 actions + 4 passes = 7. There will be 5 of these giving 35 +// actions and the last round will have 5 = 40. Then add 4 for the crib card +// selection. +constexpr int kMaxNumActionsPerRound = 44; + +// First 52 represents single-card actions. +// Next 52*52 represents two-card actions. +// 1 for the pass action. +constexpr int kNumDistinctActions = 2757; +constexpr int kPassAction = 2756; + +enum Suit { kClubs = 0, kDiamonds = 1, kHearts = 2, kSpades = 3 }; + +enum Rank { + kAce = 0, + kTwo = 1, + kThree = 2, + kFour = 3, + kFive = 4, + kSix = 5, + kSeven = 6, + kEight = 7, + kNine = 8, + kTen = 9, + kJack = 10, + kQueen = 11, + kKing = 12, +}; + +const char kSuitNames[kNumSuits + 1] = "CDHS"; +const char kRanks[kCardsPerSuit + 1] = "A23456789TJQK"; + +struct Card { + int id; + int rank; + int suit; + int value() const; + std::string to_string() const; +}; + +bool operator==(const Card& lhs, const Card& rhs); +bool operator<(const Card& lhs, const Card& rhs); + +enum Phase { kCardPhase = 0, kPlayPhase = 1 }; + +class CribbageGame; + +class CribbageState : public State { + public: + CribbageState(const CribbageState&) = default; + CribbageState(std::shared_ptr game); + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action move_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::vector Rewards() const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + ActionsAndProbs ChanceOutcomes() const override; + + std::unique_ptr Clone() const override; + + std::vector LegalActions() const override; + + int round() const { return round_; } + std::vector scores() const { return scores_; } + + protected: + void DoApplyAction(Action move_id) override; + + private: + std::vector LegalOneCardCribActions() const; + std::vector LegalTwoCardCribActions() const; + void SortHands(); + void SortCrib(); + void MoveCardToCrib(Player player, const Card& card); + void Score(Player player, int points); + bool AllHandsAreEmpty() const; + bool AllPlayersHavePassed() const; + void ScoreHands(); + void ScoreCrib(); + int DetermineWinner() const; + void DoEndOfPlayRound(); + void CheckAndApplyPlayScoring(); + + const CribbageGame& parent_game_; + int round_ = -1; + int dealer_ = -1; // Who is the dealer? + int start_player_ = -1; // Who is starting this round. + Phase phase_; // Choosing cards or play phase? + Player cur_player_ = -1; // Player to play. + std::vector rewards_; // Intermediate rewards + std::vector scores_; // Current points for each player. + + std::optional starter_; + std::vector deck_; + std::vector> hands_; + std::vector> discards_; + std::vector crib_; + std::vector played_cards_; + std::vector passed_; + Player last_played_player_; // Last player to have played a card. + int current_sum_ = -1; + + void NextRound(); +}; + +class CribbageGame : public Game { + public: + explicit CribbageGame(const GameParameters& params); + + int NumDistinctActions() const override { return kNumDistinctActions; } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new CribbageState(shared_from_this())); + } + int MaxChanceOutcomes() const override { return kDeckSize; } + int MaxGameLength() const { return kMaxNumActionsPerRound * kMaxNumRounds; } + + int NumPlayers() const override { return num_players_; } + // Win score + max points (getting 29 when you start at 120). + double MinUtility() const override { return -1149; } + double MaxUtility() const override { return +1149; } + std::vector ObservationTensorShape() const override { return {}; } + + int cards_per_player() const { return cards_per_player_; } + int cards_to_crib() const { return cards_to_crib_; } + double winner_bonus_reward() const { return winner_bonus_reward_; } + + private: + const int num_players_; + const int cards_per_player_; + const int cards_to_crib_; + const double winner_bonus_reward_; +}; + +Action ToAction(const Card& c1, const Card& c2); +Card GetCard(int id); +Card GetCardByString(const std::string& str); +std::vector GetHandFromStrings( + const std::vector& card_strings); + +// Score a 5-card hand (i.e. including the starter). Assumes cards are +// pre-sorted. +// Does not include: +// - checking the jack having the same suit as the starter. +// - checking for flushes (both 4 and 5 card) +int ScoreHand(const std::vector& hand); + +// Score a 4-card hand + starter. No sorting assumed. Includes all scoring. +int ScoreHand(const std::vector& hand, const Card& starter); + +} // namespace cribbage +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_CRIBBAGE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/cribbage/cribbage_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/cribbage/cribbage_test.cc new file mode 100644 index 0000000..8ecc367 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/cribbage/cribbage_test.cc @@ -0,0 +1,357 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/cribbage/cribbage.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/random.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +constexpr int kSeed = 2871611; + +namespace open_spiel { +namespace cribbage { +namespace { + +namespace testing = open_spiel::testing; + +void CardToStringTest() { + std::cout << "CardToStringTest" << std::endl; + std::vector card_strings; + card_strings.reserve(52); + std::string suit_names(kSuitNames); + std::string ranks(kRanks); + + for (int i = 0; i < 52; ++i) { + Card card = GetCard(i); + std::string card_string = card.to_string(); + size_t rank_pos = ranks.find(card_string[0]); + SPIEL_CHECK_TRUE(rank_pos != std::string::npos); + size_t suit_pos = suit_names.find(card_string[1]); + SPIEL_CHECK_TRUE(suit_pos != std::string::npos); + auto iter = + std::find(card_strings.begin(), card_strings.end(), card_string); + SPIEL_CHECK_TRUE(iter == card_strings.end()); + card_strings.push_back(card_string); + } +} + +void BasicLoadTest() { + std::cout << "BasicLoadTest" << std::endl; + std::shared_ptr game = LoadGame("cribbage"); + std::unique_ptr state = game->NewInitialState(); + std::cout << state->ToString() << std::endl; + SPIEL_CHECK_EQ(game->NumPlayers(), 2); + + game = LoadGame("cribbage(players=3)"); + state = game->NewInitialState(); + std::cout << state->ToString() << std::endl; + SPIEL_CHECK_EQ(game->NumPlayers(), 3); + + game = LoadGame("cribbage(players=4)"); + state = game->NewInitialState(); + std::cout << state->ToString() << std::endl; + SPIEL_CHECK_EQ(game->NumPlayers(), 4); +} + +void BasicOneTurnPlaythrough() { + std::cout << "BasicOneTurnPlaythroughTest" << std::endl; + std::mt19937 rng(kSeed); + std::shared_ptr game = LoadGame("cribbage"); + std::unique_ptr state = game->NewInitialState(); + CribbageState* crib_state = static_cast(state.get()); + + // Deal. + while (state->IsChanceNode()) { + std::cout << state->ToString() << std::endl; + double z = absl::Uniform(rng, 0.0, 1.0); + Action outcome = SampleAction(state->ChanceOutcomes(), z).first; + std::cout << "Sampled outcome: " + << state->ActionToString(kChancePlayerId, outcome) << std::endl; + state->ApplyAction(outcome); + } + + // Card choices. + for (int p = 0; p < game->NumPlayers(); ++p) { + std::cout << state->ToString() << std::endl; + std::vector legal_actions = state->LegalActions(); + int idx = absl::Uniform(rng, 0, legal_actions.size()); + Action action = legal_actions[idx]; + std::cout << "Sampled action: " + << state->ActionToString(state->CurrentPlayer(), action) + << std::endl; + state->ApplyAction(action); + } + + // Starter. + std::cout << state->ToString() << std::endl; + double z = absl::Uniform(rng, 0.0, 1.0); + Action outcome = SampleAction(state->ChanceOutcomes(), z).first; + std::cout << "Sampled outcome: " + << state->ActionToString(kChancePlayerId, outcome) << std::endl; + state->ApplyAction(outcome); + SPIEL_CHECK_FALSE(state->IsChanceNode()); + + // Play phase. + while (crib_state->round() < 1) { + std::cout << state->ToString() << std::endl; + std::vector legal_actions = state->LegalActions(); + int idx = absl::Uniform(rng, 0, legal_actions.size()); + Action action = legal_actions[idx]; + std::cout << "Sampled action: " + << state->ActionToString(state->CurrentPlayer(), action) + << std::endl; + state->ApplyAction(action); + } + + std::cout << state->ToString() << std::endl; +} + +void AssertApproxEqual(const std::vector& values1, + const std::vector& values2) { + for (int i = 0; i < values1.size(); ++i) { + SPIEL_CHECK_TRUE(Near(values1[i], values2[i])); + } +} + +void WikipediaExampleTwoPlayers() { + // https://en.wikipedia.org/wiki/Rules_of_cribbage + std::shared_ptr game = LoadGame("cribbage"); + std::unique_ptr state = game->NewInitialState(); + CribbageState* crib_state = static_cast(state.get()); + + // Deal. + // Player 0 (dealer) Alice: 5S 4S 2S 6H | 7H 8H + // Player 1 Bob: 6D JH 4H 7C | 2D 8D + // Starter: 3C + const std::vector cards = {"5S", "4S", "2S", "6H", "7H", "8H", + "6D", "JH", "4H", "7C", "2D", "8D"}; + for (const std::string& cstr : cards) { + Card card = GetCardByString(cstr); + state->ApplyAction(card.id); + } + + std::cout << state->ToString() << std::endl; + + // Card choices. Alice, then Bob. + state->ApplyAction(ToAction(GetCardByString("7H"), GetCardByString("8H"))); + state->ApplyAction(ToAction(GetCardByString("2D"), GetCardByString("8D"))); + + // Starter. + state->ApplyAction(GetCardByString("3D").id); + + // Play phase. + std::cout << state->ToString() << std::endl; + + // Bob plays JH + state->ApplyAction(GetCardByString("JH").id); + AssertApproxEqual(crib_state->scores(), {0.0, 0.0}); + // Alice plays 5S + state->ApplyAction(GetCardByString("5S").id); + AssertApproxEqual(crib_state->scores(), {2.0, 0.0}); + // Bob plays 7C + state->ApplyAction(GetCardByString("7C").id); + AssertApproxEqual(crib_state->scores(), {2.0, 0.0}); + // Alice plays 6H + state->ApplyAction(GetCardByString("6H").id); + AssertApproxEqual(crib_state->scores(), {5.0, 0.0}); + // Bob passes + state->ApplyAction(kPassAction); + // Alice plays 2S + state->ApplyAction(GetCardByString("2S").id); + AssertApproxEqual(crib_state->scores(), {5.0, 0.0}); + // Bob passes, Alice passes. + state->ApplyAction(kPassAction); + state->ApplyAction(kPassAction); + AssertApproxEqual(crib_state->scores(), {6.0, 0.0}); + + // New play round. Bob starts with 6D. + state->ApplyAction(GetCardByString("6D").id); + AssertApproxEqual(crib_state->scores(), {6.0, 0.0}); + // Alice plays 4S. + state->ApplyAction(GetCardByString("4S").id); + AssertApproxEqual(crib_state->scores(), {6.0, 0.0}); + // Bob plays 4H. + state->ApplyAction(GetCardByString("4H").id); + AssertApproxEqual(crib_state->scores(), {6.0, 2.0}); + // Alice passes, Bob passes. + state->ApplyAction(kPassAction); + state->ApplyAction(kPassAction); + // Points are now {6, 3}. + // Alice counts her hand: 15 two, 15 four (using the starter) and a run of + // 5 = 9. + // Bob counts his hand: nothing. + // Alice counts the crib: 15 two, 15 four, and a pair = 6. + std::cout << state->ToString() << std::endl; + + AssertApproxEqual(crib_state->scores(), {21.0, 3.0}); +} + +void WikipediaExampleThreePlayers() { + // https://en.wikipedia.org/wiki/Rules_of_cribbage + std::shared_ptr game = LoadGame("cribbage(players=3)"); + std::unique_ptr state = game->NewInitialState(); + CribbageState* crib_state = static_cast(state.get()); + + // Deal. + // Player 0 (dealer) Claire: 7S KD 9D 8H | 7H + // Player 1 David: TS 5S 4S 7C | 2D + // Player 2 Eve: 7D 3D TH 5C | 3S + // Crib: TC + // Starter: 3C + const std::vector cards = { + "7S", "KD", "9D", "8H", "7H", "TS", "5S", "4S", + "7C", "2D", "7D", "3D", "TH", "5C", "3S", "TC", + }; + for (const std::string& cstr : cards) { + Card card = GetCardByString(cstr); + state->ApplyAction(card.id); + } + + std::cout << state->ToString() << std::endl; + SPIEL_CHECK_FALSE(state->IsChanceNode()); + + // Card choices. + state->ApplyAction(GetCardByString("7H").id); + state->ApplyAction(GetCardByString("2D").id); + state->ApplyAction(GetCardByString("3S").id); + + // Starter. + std::cout << state->ToString() << std::endl; + state->ApplyAction(GetCardByString("3C").id); + + // David plays 7C. + state->ApplyAction(GetCardByString("7C").id); + AssertApproxEqual(crib_state->scores(), {0.0, 0.0, 0.0}); + // Eve plays 7D. + state->ApplyAction(GetCardByString("7D").id); + AssertApproxEqual(crib_state->scores(), {0.0, 0.0, 2.0}); + // Claire plays 7S. + state->ApplyAction(GetCardByString("7S").id); + AssertApproxEqual(crib_state->scores(), {6.0, 0.0, 2.0}); + // David plays 5S. + state->ApplyAction(GetCardByString("5S").id); + AssertApproxEqual(crib_state->scores(), {6.0, 0.0, 2.0}); + // Eve plays 31. + state->ApplyAction(GetCardByString("5C").id); + AssertApproxEqual(crib_state->scores(), {6.0, 0.0, 6.0}); + + // Claire plays 8H. + state->ApplyAction(GetCardByString("8H").id); + AssertApproxEqual(crib_state->scores(), {6.0, 0.0, 6.0}); + // David plays TS. + state->ApplyAction(GetCardByString("TS").id); + AssertApproxEqual(crib_state->scores(), {6.0, 0.0, 6.0}); + // Eve plays TH. + state->ApplyAction(GetCardByString("TH").id); + AssertApproxEqual(crib_state->scores(), {6.0, 0.0, 8.0}); + // Claire passes, David passess, Eve plays 3D + state->ApplyAction(kPassAction); + state->ApplyAction(kPassAction); + state->ApplyAction(GetCardByString("3D").id); + AssertApproxEqual(crib_state->scores(), {6.0, 0.0, 10.0}); + + // Claire plays KD + state->ApplyAction(GetCardByString("KD").id); + AssertApproxEqual(crib_state->scores(), {6.0, 0.0, 10.0}); + // David plays 4S + state->ApplyAction(GetCardByString("4S").id); + AssertApproxEqual(crib_state->scores(), {6.0, 0.0, 10.0}); + // Eve passes + state->ApplyAction(kPassAction); + // Claire plays 9D + state->ApplyAction(GetCardByString("9D").id); + AssertApproxEqual(crib_state->scores(), {6.0, 0.0, 10.0}); + // David passes, Eve passes again. Claire passes. + // Claire gets 1 point, then hands scored. + state->ApplyAction(kPassAction); + state->ApplyAction(kPassAction); + state->ApplyAction(kPassAction); + + // Claire scores 15 two and 7-8-9 for four = 5. + // Claire's crib scores 15 two, 15 four, 15 six, and pair = 8. + // David scores 15 two, 15 four, and 3-4-5 for three is 7. + // Eve scores 15 two, 15 four, 15 six, and a pair for 8. + AssertApproxEqual(crib_state->scores(), {20.0, 7.0, 18.0}); + std::cout << state->ToString() << std::endl; +} + +void HandScoringTests() { + // Suit order: CDHS + std::vector hand; + hand = GetHandFromStrings({"QC", "TD", "7H", "9H", "5S"}); + SPIEL_CHECK_EQ(ScoreHand(hand), 4); + hand = GetHandFromStrings({"QC", "QD", "7H", "9H", "5S"}); + SPIEL_CHECK_EQ(ScoreHand(hand), 6); + hand = GetHandFromStrings({"QC", "QD", "QH", "9H", "5S"}); + SPIEL_CHECK_EQ(ScoreHand(hand), 12); + hand = GetHandFromStrings({"QC", "QD", "QH", "5S", "QS"}); + SPIEL_CHECK_EQ(ScoreHand(hand), 20); + hand = GetHandFromStrings({"5C", "QC", "5D", "5H", "5S"}); + SPIEL_CHECK_EQ(ScoreHand(hand), 28); // 8 for 15s w/ Q, 12 4-of-K, 8 more 15s + hand = GetHandFromStrings({"QC", "JD", "7H", "9H"}); + SPIEL_CHECK_EQ(ScoreHand(hand, GetCardByString("5D")), 5); // 4 15s + jack + hand = GetHandFromStrings({"QC", "JD", "7H", "9H"}); + SPIEL_CHECK_EQ(ScoreHand(hand, GetCardByString("5S")), 4); // 4 15s + // Flushes. 5-card flush, then a 4-card flush, then no flush. + hand = GetHandFromStrings({"QC", "TC", "8C", "4C"}); + SPIEL_CHECK_EQ(ScoreHand(hand, GetCardByString("2C")), 5); + hand = GetHandFromStrings({"QC", "TC", "8C", "4C"}); + SPIEL_CHECK_EQ(ScoreHand(hand, GetCardByString("2D")), 4); + hand = GetHandFromStrings({"QD", "TC", "8C", "4C"}); + SPIEL_CHECK_EQ(ScoreHand(hand, GetCardByString("2C")), 0); + // 5-card flush and run of 5 + nobs = 11. + hand = GetHandFromStrings({"9C", "TC", "JC", "QC"}); + SPIEL_CHECK_EQ(ScoreHand(hand, GetCardByString("KC")), 11); + // Examples of runs from the rule book. + hand = GetHandFromStrings({"5C", "6C", "7C", "8D"}); + SPIEL_CHECK_EQ(ScoreHand(hand, GetCardByString("8S")), 14); + // 3 runs of 3 (9) + 3-of-a-kind (6) + three 15s (8) = 21. + hand = GetHandFromStrings({"4C", "4D", "4S", "5D"}); + SPIEL_CHECK_EQ(ScoreHand(hand, GetCardByString("6S")), 21); + // 4 runs of 3 (12) + 2 pairs (4) + 2 15s (4) = 20. + hand = GetHandFromStrings({"6C", "6D", "7S", "7D"}); + SPIEL_CHECK_EQ(ScoreHand(hand, GetCardByString("8S")), 20); +} + +void BasicCribbageTests() { + testing::RandomSimTest(*LoadGame("cribbage"), 10); + testing::RandomSimTest(*LoadGame("cribbage(players=3)"), 10); + testing::RandomSimTest(*LoadGame("cribbage(players=4)"), 10); +} + +} // namespace +} // namespace cribbage +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::cribbage::CardToStringTest(); + open_spiel::cribbage::BasicLoadTest(); + open_spiel::cribbage::BasicCribbageTests(); + open_spiel::cribbage::BasicOneTurnPlaythrough(); + open_spiel::cribbage::HandScoringTests(); + open_spiel::cribbage::WikipediaExampleTwoPlayers(); + open_spiel::cribbage::WikipediaExampleThreePlayers(); + open_spiel::cribbage::BasicCribbageTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/cursor_go/cursor_go.cc b/scenarios/bargaining/open_spiel/open_spiel/games/cursor_go/cursor_go.cc new file mode 100644 index 0000000..5d8adc3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/cursor_go/cursor_go.cc @@ -0,0 +1,287 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/cursor_go/cursor_go.h" + +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/go/go_board.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace cursor_go { +namespace { + +using go::BoardPoints; +using go::MakePoint; +using go::VirtualPoint; +using go::VirtualPointFrom2DPoint; +using go::VirtualPointToString; + +// Facts about the game +const GameType kGameType{ + /*short_name=*/"cursor_go", + /*long_name=*/"Cursor Go", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + {"komi", GameParameter(7.5)}, + {"board_size", GameParameter(19)}, + {"handicap", GameParameter(0)}, + {"max_cursor_moves", GameParameter(100)}, + }, +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new CursorGoGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +std::vector HandicapStones(int num_handicap) { + if (num_handicap < 2 || num_handicap > 9) return {}; + + static std::array placement = { + {MakePoint("d4"), MakePoint("q16"), MakePoint("d16"), MakePoint("q4"), + MakePoint("d10"), MakePoint("q10"), MakePoint("k4"), MakePoint("k16"), + MakePoint("k10")}}; + static VirtualPoint center = MakePoint("k10"); + + std::vector points; + points.reserve(num_handicap); + for (int i = 0; i < num_handicap; ++i) { + points.push_back(placement[i]); + } + + if (num_handicap >= 5 && num_handicap % 2 == 1) { + points[num_handicap - 1] = center; + } + + return points; +} + +} // namespace + +CursorGoState::CursorGoState(std::shared_ptr game, int board_size, + float komi, int handicap, int max_cursor_moves) + : State(game), + board_(board_size), + komi_(komi), + handicap_(handicap), + max_cursor_moves_(max_cursor_moves), + to_play_(GoColor::kBlack) { + ResetBoard(); +} + +std::string CursorGoState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string CursorGoState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void CursorGoState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + int num_cells = board_.board_size() * board_.board_size(); + SPIEL_CHECK_EQ(values.size(), num_cells * (kCellStates + 3)); + std::fill(values.begin(), values.end(), 0.); + + // Add planes: black, white, empty. + int cell = 0; + for (VirtualPoint p : BoardPoints(board_.board_size())) { + int color_val = static_cast(board_.PointColor(p)); + values[num_cells * color_val + cell] = 1.0; + ++cell; + } + SPIEL_CHECK_EQ(cell, num_cells); + + // Fourth plane for cursor position. + const auto [row, col] = cursor_[ColorToPlayer(to_play_)]; + const int cursor_cell = row * board_.board_size() + col; + values[num_cells * kCellStates + cursor_cell] = 1.0; + + // Add a fifth binary plane for komi (whether white is to play). + std::fill(values.begin() + ((1 + kCellStates) * num_cells), + values.begin() + ((2 + kCellStates) * num_cells), + (to_play_ == GoColor::kWhite ? 1.0 : 0.0)); + + // Add a sixth binary plane for the number of cursor moves. + std::fill(values.begin() + ((2 + kCellStates) * num_cells), values.end(), + static_cast(cursor_moves_count_) / max_cursor_moves_); +} + +std::vector CursorGoState::LegalActions() const { + std::vector actions{}; + if (is_terminal_) return actions; + const auto cursor = cursor_[ColorToPlayer(to_play_)]; + if (cursor_moves_count_ < max_cursor_moves_) { + const auto [row, col] = cursor; + if (row < board_.board_size() - 1) actions.push_back(kActionUp); + if (row > 0) actions.push_back(kActionDown); + if (col > 0) actions.push_back(kActionLeft); + if (col < board_.board_size() - 1) actions.push_back(kActionRight); + } + if (board_.IsLegalMove(VirtualPointFrom2DPoint(cursor), to_play_)) + actions.push_back(kActionPlaceStone); + actions.push_back(kActionPass); + return actions; +} + +std::string CursorGoState::ActionToString(Player player, Action action) const { + static constexpr std::array + kActionNames{"Up", "Down", "Left", "Right", "Place Stone", "Pass"}; + if (action < 0 || action >= kActionNames.size()) { + return absl::StrFormat("invalid action %d", action); + } + return std::string(kActionNames[action]); +} + +std::string CursorGoState::ToString() const { + std::stringstream ss; + ss << "CursorGoState(komi=" << komi_; + if (!is_terminal_) ss << ", to_play=" << GoColorToString(to_play_); + ss << ", history.size()=" << history_.size(); + if (!is_terminal_) ss << ", cursor_moves_count=" << cursor_moves_count_; + ss << ")\n" << board_; + if (!is_terminal_) + ss << "\nCursor: " + << VirtualPointToString( + VirtualPointFrom2DPoint(cursor_[ColorToPlayer(to_play_)])); + return ss.str(); +} + +std::vector CursorGoState::Returns() const { + if (!is_terminal_) return {0.0, 0.0}; + + if (superko_) { + // Superko rules (https://senseis.xmp.net/?Superko) are complex and vary + // between rulesets. + // For simplicity and because superkos are very rare, we just treat them as + // a draw. + return {kDrawUtility, kDrawUtility}; + } + + // Score with Tromp-Taylor. + float black_score = TrompTaylorScore(board_, komi_, handicap_); + + std::vector returns(kNumPlayers); + if (black_score > 0) { + returns[ColorToPlayer(GoColor::kBlack)] = kWinUtility; + returns[ColorToPlayer(GoColor::kWhite)] = kLossUtility; + } else if (black_score < 0) { + returns[ColorToPlayer(GoColor::kBlack)] = kLossUtility; + returns[ColorToPlayer(GoColor::kWhite)] = kWinUtility; + } else { + returns[ColorToPlayer(GoColor::kBlack)] = kDrawUtility; + returns[ColorToPlayer(GoColor::kWhite)] = kDrawUtility; + } + return returns; +} + +std::unique_ptr CursorGoState::Clone() const { + return std::unique_ptr(new CursorGoState(*this)); +} + +void CursorGoState::DoApplyAction(Action action) { + if (action == kActionPlaceStone || action == kActionPass) { + VirtualPoint point = + (action == kActionPass) + ? go::kVirtualPass + : VirtualPointFrom2DPoint(cursor_[ColorToPlayer(to_play_)]); + SPIEL_CHECK_TRUE(board_.PlayMove(point, to_play_)); + is_terminal_ = last_move_was_pass_ && (action == kActionPass); + last_move_was_pass_ = (action == kActionPass); + to_play_ = OppColor(to_play_); + cursor_moves_count_ = 0; + + bool was_inserted = repetitions_.insert(board_.HashValue()).second; + if (!was_inserted && action == kActionPlaceStone) { + // We have encountered this position before. + superko_ = true; + } + } else { + switch (action) { + case kActionUp: + cursor_[ColorToPlayer(to_play_)].first++; + break; + case kActionDown: + cursor_[ColorToPlayer(to_play_)].first--; + break; + case kActionLeft: + cursor_[ColorToPlayer(to_play_)].second--; + break; + case kActionRight: + cursor_[ColorToPlayer(to_play_)].second++; + break; + default: + SpielFatalError(absl::StrCat("Invalid action ", action)); + } + ++cursor_moves_count_; + } +} + +void CursorGoState::ResetBoard() { + board_.Clear(); + const int middle = board_.board_size() / 2; + cursor_[0] = {middle, middle}; + cursor_[1] = {middle, middle}; + cursor_moves_count_ = 0; + if (handicap_ < 2) { + to_play_ = GoColor::kBlack; + } else { + for (VirtualPoint p : HandicapStones(handicap_)) { + board_.PlayMove(p, GoColor::kBlack); + } + to_play_ = GoColor::kWhite; + } + + repetitions_.clear(); + repetitions_.insert(board_.HashValue()); + superko_ = false; + is_terminal_ = false; + last_move_was_pass_ = false; +} + +CursorGoGame::CursorGoGame(const GameParameters& params) + : Game(kGameType, params), + komi_(ParameterValue("komi")), + board_size_(ParameterValue("board_size")), + handicap_(ParameterValue("handicap")), + max_cursor_moves_(ParameterValue("max_cursor_moves")) {} + +} // namespace cursor_go +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/cursor_go/cursor_go.h b/scenarios/bargaining/open_spiel/open_spiel/games/cursor_go/cursor_go.h new file mode 100644 index 0000000..03f9440 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/cursor_go/cursor_go.h @@ -0,0 +1,182 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_CURSOR_GO_H_ +#define OPEN_SPIEL_GAMES_CURSOR_GO_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/games/go/go_board.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// Game of Go, with a cursor interface. Actions are to move the cursor up, down, +// left, or right. Or to pass or place a stone in the current cursor position. +// https://en.wikipedia.org/wiki/Go_(game) +// +// Parameters: +// "komi" float (default 7.5) compensation for white +// "board_size" int (default 19) rows of the board +// "handicap" int (default 0) number of handicap stones for black +// "max_cursor_moves" int (default 100) maximum number of cursor moves +// before a player must pass or play. +// +// Handicap stones assume a 19x19 board. + +namespace open_spiel { +namespace cursor_go { + +using go::GoBoard; +using go::GoColor; + +// Actions +enum CursorGoAction : Action { + kActionUp, + kActionDown, + kActionLeft, + kActionRight, + kActionPlaceStone, + kActionPass +}; + +// Constants. +inline constexpr int kNumPlayers = 2; +inline constexpr double kLossUtility = -1; +inline constexpr double kWinUtility = 1; +inline constexpr int kCellStates = 3; + +// Go can only end in a draw when using a round komi. +// We also treat superko as a draw. +inline constexpr double kDrawUtility = 0; + +// All actions must be in [0; NumDistinctActions). +inline constexpr int kNumDistinctActions = kActionPass + 1; + +// In theory Go games have no length limit, but we limit them to twice the +// number of points on the board for practicality - only random games last +// this long. +// The maximum number of cursor go moves is greater by a factor of +// (1+max_cursor_moves). +inline int MaxGameLength(int board_size) { return board_size * board_size * 2; } + +inline int ColorToPlayer(GoColor c) { return static_cast(c); } + +// State of an in-play game. +class CursorGoState : public State { + public: + // Constructs a Go state for the empty board. + CursorGoState(std::shared_ptr game, int board_size, float komi, + int handicap, int max_cursor_moves); + + Player CurrentPlayer() const override { + return is_terminal_ ? kTerminalPlayerId : ColorToPlayer(to_play_); + } + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + + bool IsTerminal() const override { return is_terminal_; } + + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + + // Five planes: black, white, empty, cursor position, and a bias plane of bits + // indicating komi (whether white is to play). + void ObservationTensor(Player player, + absl::Span values) const override; + + std::vector Returns() const override; + + std::unique_ptr Clone() const override; + + const GoBoard& board() const { return board_; } + + protected: + void DoApplyAction(Action action) override; + + private: + void ResetBoard(); + + GoBoard board_; + + // RepetitionTable records which positions we have already encountered. + // We are already indexing by board hash, so there is no need to hash that + // hash again, so we use a custom passthrough hasher. + class PassthroughHash { + public: + std::size_t operator()(uint64_t x) const { + return static_cast(x); + } + }; + using RepetitionTable = std::unordered_set; + RepetitionTable repetitions_; + + const float komi_; + const int handicap_; + const int max_cursor_moves_; + GoColor to_play_; + int cursor_moves_count_; + bool superko_; + bool last_move_was_pass_; + bool is_terminal_; + std::array, kNumPlayers> cursor_; +}; + +// Game object. +class CursorGoGame : public Game { + public: + explicit CursorGoGame(const GameParameters& params); + + int NumDistinctActions() const override { return kNumDistinctActions; } + + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new CursorGoState( + shared_from_this(), board_size_, komi_, handicap_, max_cursor_moves_)); + } + + std::vector ObservationTensorShape() const override { + // Planes: black, white, empty, cursor position, and bias planes indicating + // komi (whether white is to play) and the number of cursor moves made. + return {kCellStates + 3, board_size_, board_size_}; + } + + int NumPlayers() const override { return kNumPlayers; } + + double MinUtility() const override { return kLossUtility; } + absl::optional UtilitySum() const override { + return kLossUtility + kWinUtility; + } + double MaxUtility() const override { return kWinUtility; } + + int MaxGameLength() const override { + return cursor_go::MaxGameLength(board_size_) * (1 + max_cursor_moves_); + } + + private: + const float komi_; + const int board_size_; + const int handicap_; + const int max_cursor_moves_; +}; + +} // namespace cursor_go +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_GO_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/cursor_go/cursor_go_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/cursor_go/cursor_go_test.cc new file mode 100644 index 0000000..0818b43 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/cursor_go/cursor_go_test.cc @@ -0,0 +1,41 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/cursor_go/cursor_go.h" + +#include "open_spiel/games/go/go_board.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace cursor_go { +namespace { + +namespace testing = open_spiel::testing; + +constexpr int kBoardSize = 19; +constexpr float kKomi = 7.5; + +void BasicGoTests() { + testing::LoadGameTest("cursor_go"); + testing::NoChanceOutcomesTest(*LoadGame("cursor_go(board_size=5)")); + testing::RandomSimTest(*LoadGame("cursor_go(board_size=3)"), 100); +} + +} // namespace +} // namespace cursor_go +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::cursor_go::BasicGoTests(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dark_chess/dark_chess.cc b/scenarios/bargaining/open_spiel/open_spiel/games/dark_chess/dark_chess.cc new file mode 100644 index 0000000..a0af763 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dark_chess/dark_chess.cc @@ -0,0 +1,599 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/dark_chess/dark_chess.h" + +#include +#include +#include +#include + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace dark_chess { +namespace { + +constexpr int kNumReversibleMovesToDraw = 100; +constexpr int kNumRepetitionsToDraw = 3; + +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"dark_chess", + /*long_name=*/"Dark Chess", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"board_size", GameParameter(8)}, + {"fen", GameParameter(GameParameter::Type::kString, false)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new DarkChessGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory) + +chess::ObservationTable ComputePrivateInfoTable( + const chess::ChessBoard& board, chess::Color color, + const chess::ObservationTable& public_info_table) { + const int board_size = board.BoardSize(); + chess::ObservationTable observability_table{false}; + board.GenerateLegalMoves( + [&](const chess::Move& move) -> bool { + size_t to_index = chess::SquareToIndex(move.to, board_size); + if (!public_info_table[to_index]) observability_table[to_index] = true; + + if (move.to == board.EpSquare() && + move.piece.type == chess::PieceType::kPawn) { + int8_t reversed_y_direction = color == chess::Color::kWhite ? -1 : 1; + chess::Square en_passant_capture = + move.to + chess::Offset{0, reversed_y_direction}; + size_t index = chess::SquareToIndex(en_passant_capture, board_size); + if (!public_info_table[index]) observability_table[index] = true; + } + return true; + }, + color); + + for (int8_t y = 0; y < board_size; ++y) { + for (int8_t x = 0; x < board_size; ++x) { + chess::Square sq{x, y}; + const auto& piece = board.at(sq); + if (piece.color == color) { + size_t index = chess::SquareToIndex(sq, board_size); + if (!public_info_table[index]) observability_table[index] = true; + } + } + } + return observability_table; +} + +// Checks whether the defender is under attack from the attacker, +// for the special case when we already know that attacker is under attack +// from the defender. +// I.e. D -> A, but D <-? A (where arrow is the "under attack relation") +// This is used for computation of the public info table. +bool IsUnderAttack(const chess::Square defender_sq, + const chess::Piece defender_piece, + const chess::Square attacker_sq, + const chess::Piece attacker_piece) { + // Identity: i.e. we only check distinct piece types from now on. + if (defender_piece.type == attacker_piece.type) { + return true; + } + // No need to check empty attackers from now on. + if (attacker_piece.type == chess::PieceType::kEmpty) { + return false; + } + + const auto pawn_attack = [&]() { + int8_t y_dir = attacker_piece.color == chess::Color::kWhite ? 1 : -1; + return defender_sq == attacker_sq + chess::Offset{1, y_dir} || + defender_sq == attacker_sq + chess::Offset{-1, y_dir}; + }; + const auto king_attack = [&]() { + return abs(attacker_sq.x - defender_sq.x) <= 1 && + abs(attacker_sq.y - defender_sq.y) <= 1; + }; + const auto rook_attack = [&]() { + return abs(attacker_sq.x - defender_sq.x) == 0 || + abs(attacker_sq.y - defender_sq.y) == 0; + }; + const auto bishop_attack = [&]() { + return abs(attacker_sq.x - defender_sq.x) >= 1 && + abs(attacker_sq.y - defender_sq.y) >= 1; + }; + + switch (defender_piece.type) { + case chess::PieceType::kEmpty: + SpielFatalError("Empty squares cannot be already attacking."); + + case chess::PieceType::kKing: + switch (attacker_piece.type) { + case chess::PieceType::kQueen: + return true; + case chess::PieceType::kRook: + return rook_attack(); + case chess::PieceType::kBishop: + return bishop_attack(); + case chess::PieceType::kKnight: + return false; + case chess::PieceType::kPawn: + return pawn_attack(); + default: + SpielFatalError("Exhausted match"); + } + + case chess::PieceType::kQueen: + switch (attacker_piece.type) { + case chess::PieceType::kKing: + return king_attack(); + case chess::PieceType::kRook: + return rook_attack(); + case chess::PieceType::kBishop: + return bishop_attack(); + case chess::PieceType::kKnight: + return false; + case chess::PieceType::kPawn: + return pawn_attack(); + default: + SpielFatalError("Exhausted match"); + } + + case chess::PieceType::kRook: + switch (attacker_piece.type) { + case chess::PieceType::kKing: + return king_attack(); + case chess::PieceType::kQueen: + return true; + default: + return false; + } + + case chess::PieceType::kBishop: + switch (attacker_piece.type) { + case chess::PieceType::kKing: + return king_attack(); + case chess::PieceType::kQueen: + return true; + case chess::PieceType::kPawn: + return pawn_attack(); + default: + return false; + } + + case chess::PieceType::kKnight: + return false; + + case chess::PieceType::kPawn: + return attacker_piece.type == chess::PieceType::kKing || + attacker_piece.type == chess::PieceType::kQueen || + attacker_piece.type == chess::PieceType::kBishop; + + default: + // This should not happen, we cover all the possibilities. + SpielFatalError("Exhausted pattern match in dark_chess::IsUnderAttack()"); + } +} + +// Computes which squares are public information. It does not recognize all of +// them. Only squares of two opponent pieces of the same type attacking each +// other. +chess::ObservationTable ComputePublicInfoTable(const chess::ChessBoard& board) { + const int board_size = board.BoardSize(); + std::array observability_table{false}; + board.GenerateLegalMoves( + [&](const chess::Move& move) -> bool { + const chess::Piece& from_piece = board.at(move.from); + const chess::Piece& to_piece = board.at(move.to); + + if (IsUnderAttack(move.from, from_piece, move.to, to_piece)) { + size_t from_index = chess::SquareToIndex(move.from, board_size); + observability_table[from_index] = true; + + size_t to_index = chess::SquareToIndex(move.to, board_size); + observability_table[to_index] = true; + + // Fill the table also between the indices. + if (from_piece.type != chess::PieceType::kKnight) { + int offset_x = 0; + int offset_y = 0; + + int diff_x = move.to.x - move.from.x; + if (diff_x > 0) + offset_x = 1; + else if (diff_x < 0) + offset_x = -1; + + int diff_y = move.to.y - move.from.y; + if (diff_y > 0) + offset_y = 1; + else if (diff_y < 0) + offset_y = -1; + chess::Offset offset_step = { + static_cast(offset_x), + static_cast(offset_y) + }; + + for (chess::Square dest = move.from + offset_step; dest != move.to; + dest += offset_step) { + size_t dest_index = chess::SquareToIndex(dest, board_size); + observability_table[dest_index] = true; + } + } + } + return true; + }, + chess::Color::kWhite); + + return observability_table; +} + +bool ObserverHasString(IIGObservationType iig_obs_type) { + return iig_obs_type.public_info && + iig_obs_type.private_info == PrivateInfoType::kSinglePlayer && + !iig_obs_type.perfect_recall; +} +bool ObserverHasTensor(IIGObservationType iig_obs_type) { + return !iig_obs_type.perfect_recall; +} + +} // namespace + +class DarkChessObserver : public Observer { + public: + explicit DarkChessObserver(IIGObservationType iig_obs_type) + : Observer(/*has_string=*/ObserverHasString(iig_obs_type), + /*has_tensor=*/ObserverHasTensor(iig_obs_type)), + iig_obs_type_(iig_obs_type) {} + + void WriteTensor(const State& observed_state, int player, + Allocator* allocator) const override { + auto& state = open_spiel::down_cast(observed_state); + auto& game = open_spiel::down_cast(*state.GetGame()); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, game.NumPlayers()); + + if (iig_obs_type_.perfect_recall) { + SpielFatalError( + "DarkChessObserver: tensor with perfect recall not implemented."); + } + + const auto public_info_table = ComputePublicInfoTable(state.Board()); + + if (iig_obs_type_.public_info) { + WritePublicInfoTensor(state, public_info_table, allocator); + } + if (iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer) { + std::string prefix = "private"; + WritePrivateInfoTensor(state, public_info_table, player, prefix, + allocator); + } else if (iig_obs_type_.private_info == PrivateInfoType::kAllPlayers) { + for (int i = 0; i < chess::NumPlayers(); ++i) { + chess::Color color = chess::PlayerToColor(player); + std::string prefix = chess::ColorToString(color); + WritePrivateInfoTensor(state, public_info_table, i, prefix, allocator); + } + } + } + + std::string StringFrom(const State& observed_state, + int player) const override { + auto& state = open_spiel::down_cast(observed_state); + auto& game = open_spiel::down_cast(*state.GetGame()); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, game.NumPlayers()); + + if (iig_obs_type_.perfect_recall) { + SpielFatalError( + "DarkChessObserver: string with perfect recall is not supported"); + } + + if (iig_obs_type_.public_info && + iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer) { + chess::Color color = chess::PlayerToColor(player); + chess::ObservationTable empty_public_info_table{}; + auto obs_table = ComputePrivateInfoTable(state.Board(), color, + empty_public_info_table); + return state.Board().ToDarkFEN(obs_table, color); + } else { + SpielFatalError( + "DarkChessObserver: string with imperfect recall is implemented only" + " for the (default) observation type."); + } + } + + private: + void WritePieces(chess::Color color, chess::PieceType piece_type, + const chess::ChessBoard& board, + const chess::ObservationTable& observability_table, + const std::string& prefix, Allocator* allocator) const { + const std::string type_string = + color == chess::Color::kEmpty + ? "empty" + : chess::PieceTypeToString( + piece_type, + /*uppercase=*/color == chess::Color::kWhite); + const int board_size = board.BoardSize(); + + auto out = allocator->Get(prefix + "_" + type_string + "_pieces", + {board_size, board_size}); + for (int8_t y = 0; y < board_size; ++y) { + for (int8_t x = 0; x < board_size; ++x) { + const chess::Square square{x, y}; + const chess::Piece& piece_on_board = board.at(square); + const bool write_square = + piece_on_board.color == color && + piece_on_board.type == piece_type && + observability_table[chess::SquareToIndex(square, board_size)]; + out.at(x, y) = write_square ? 1.0f : 0.0f; + } + } + } + + void WriteUnknownSquares(const chess::ChessBoard& board, + chess::ObservationTable& observability_table, + const std::string& prefix, + Allocator* allocator) const { + const int board_size = board.BoardSize(); + auto out = allocator->Get(prefix + "_unknown_squares", + {board.BoardSize(), board.BoardSize()}); + for (int8_t y = 0; y < board_size; ++y) { + for (int8_t x = 0; x < board_size; ++x) { + const chess::Square square{x, y}; + const bool write_square = + observability_table[chess::SquareToIndex(square, board_size)]; + out.at(x, y) = write_square ? 0.0f : 1.0f; + } + } + } + + void WriteScalar(int val, int min, int max, const std::string& field_name, + Allocator* allocator) const { + SPIEL_DCHECK_LT(min, max); + SPIEL_DCHECK_GE(val, min); + SPIEL_DCHECK_LE(val, max); + auto out = allocator->Get(field_name, {max - min + 1}); + out.at(val - min) = 1; + } + + // Adds a binary scalar plane. + void WriteBinary(bool val, const std::string& field_name, + Allocator* allocator) const { + WriteScalar(val ? 1 : 0, 0, 1, field_name, allocator); + } + + void WritePrivateInfoTensor(const DarkChessState& state, + const chess::ObservationTable& public_info_table, + int player, const std::string& prefix, + Allocator* allocator) const { + chess::Color color = chess::PlayerToColor(player); + chess::ObservationTable private_info_table = + ComputePrivateInfoTable(state.Board(), color, public_info_table); + + // Piece configuration. + for (const chess::PieceType& piece_type : chess::kPieceTypes) { + WritePieces(chess::Color::kWhite, piece_type, state.Board(), + private_info_table, prefix, allocator); + WritePieces(chess::Color::kBlack, piece_type, state.Board(), + private_info_table, prefix, allocator); + } + WritePieces(chess::Color::kEmpty, chess::PieceType::kEmpty, state.Board(), + private_info_table, prefix, allocator); + WriteUnknownSquares(state.Board(), private_info_table, prefix, allocator); + + // Castling rights. + WriteBinary( + state.Board().CastlingRight(color, chess::CastlingDirection::kLeft), + prefix + "_left_castling", allocator); + WriteBinary( + state.Board().CastlingRight(color, chess::CastlingDirection::kRight), + prefix + "_right_castling", allocator); + } + + void WritePublicInfoTensor(const DarkChessState& state, + const chess::ObservationTable& public_info_table, + Allocator* allocator) const { + const auto entry = state.repetitions_.find(state.Board().HashValue()); + SPIEL_CHECK_FALSE(entry == state.repetitions_.end()); + int repetitions = entry->second; + + // Piece configuration. + std::string prefix = "public"; + for (const chess::PieceType& piece_type : chess::kPieceTypes) { + WritePieces(chess::Color::kWhite, piece_type, state.Board(), + public_info_table, prefix, allocator); + WritePieces(chess::Color::kBlack, piece_type, state.Board(), + public_info_table, prefix, allocator); + } + WritePieces(chess::Color::kEmpty, chess::PieceType::kEmpty, state.Board(), + public_info_table, prefix, allocator); + + // Num repetitions for the current board. + WriteScalar(/*val=*/repetitions, /*min=*/1, /*max=*/3, "repetitions", + allocator); + + // Side to play. + WriteScalar(/*val=*/ColorToPlayer(state.Board().ToPlay()), + /*min=*/0, /*max=*/1, "side_to_play", allocator); + + // Irreversible move counter. + auto out = allocator->Get("irreversible_move_counter", {1}); + out.at(0) = state.Board().IrreversibleMoveCounter() / 100.; + } + + IIGObservationType iig_obs_type_; +}; + +DarkChessState::DarkChessState(std::shared_ptr game, int board_size, + const std::string& fen) + : State(game), + start_board_(*chess::ChessBoard::BoardFromFEN(fen, board_size, true)), + current_board_(start_board_) { + SPIEL_CHECK_TRUE(¤t_board_); + repetitions_[current_board_.HashValue()] = 1; +} + +void DarkChessState::DoApplyAction(Action action) { + chess::Move move = ActionToMove(action, Board()); + moves_history_.push_back(move); + Board().ApplyMove(move); + ++repetitions_[current_board_.HashValue()]; + cached_legal_actions_.reset(); +} + +void DarkChessState::MaybeGenerateLegalActions() const { + if (!cached_legal_actions_) { + cached_legal_actions_ = std::vector(); + Board().GenerateLegalMoves([this](const chess::Move& move) -> bool { + cached_legal_actions_->push_back(MoveToAction(move, BoardSize())); + return true; + }); + absl::c_sort(*cached_legal_actions_); + } +} + +std::vector DarkChessState::LegalActions() const { + MaybeGenerateLegalActions(); + if (IsTerminal()) return {}; + return *cached_legal_actions_; +} + +std::string DarkChessState::ActionToString(Player player, Action action) const { + chess::Move move = ActionToMove(action, Board()); + return move.ToSAN(Board()); +} + +std::string DarkChessState::ToString() const { return Board().ToFEN(); } + +std::vector DarkChessState::Returns() const { + auto maybe_final_returns = MaybeFinalReturns(); + if (maybe_final_returns) { + return *maybe_final_returns; + } else { + return {0.0, 0.0}; + } +} + +std::string DarkChessState::ObservationString(Player player) const { + const auto& game = open_spiel::down_cast(*game_); + return game.default_observer_->StringFrom(*this, player); +} + +void DarkChessState::ObservationTensor(Player player, + absl::Span values) const { + ContiguousAllocator allocator(values); + const auto& game = open_spiel::down_cast(*game_); + game.default_observer_->WriteTensor(*this, player, &allocator); +} + +std::unique_ptr DarkChessState::Clone() const { + return std::make_unique(*this); +} + +void DarkChessState::UndoAction(Player player, Action action) { + // TODO: Make this fast by storing undo info in another stack. + SPIEL_CHECK_GE(moves_history_.size(), 1); + --repetitions_[current_board_.HashValue()]; + moves_history_.pop_back(); + history_.pop_back(); + --move_number_; + current_board_ = start_board_; + for (const chess::Move& move : moves_history_) { + current_board_.ApplyMove(move); + } +} + +bool DarkChessState::IsRepetitionDraw() const { + const auto entry = repetitions_.find(Board().HashValue()); + SPIEL_CHECK_FALSE(entry == repetitions_.end()); + return entry->second >= kNumRepetitionsToDraw; +} + +absl::optional> DarkChessState::MaybeFinalReturns() const { + const auto to_play_color = Board().ToPlay(); + const auto opp_color = chess::OppColor(to_play_color); + + const auto to_play_king = + chess::Piece{to_play_color, chess::PieceType::kKing}; + const auto opp_king = chess::Piece{opp_color, chess::PieceType::kKing}; + + if (Board().find(to_play_king) == chess::kInvalidSquare) { + std::vector returns(NumPlayers()); + returns[chess::ColorToPlayer(to_play_color)] = LossUtility(); + returns[chess::ColorToPlayer(opp_color)] = WinUtility(); + return returns; + + } else if (Board().find(opp_king) == chess::kInvalidSquare) { + std::vector returns(NumPlayers()); + returns[chess::ColorToPlayer(to_play_color)] = WinUtility(); + returns[chess::ColorToPlayer(opp_color)] = LossUtility(); + return returns; + } + + if (!Board().HasSufficientMaterial()) { + return std::vector{DrawUtility(), DrawUtility()}; + } + + if (IsRepetitionDraw()) { + return std::vector{DrawUtility(), DrawUtility()}; + } + // Compute and cache the legal actions. + MaybeGenerateLegalActions(); + SPIEL_CHECK_TRUE(cached_legal_actions_); + const bool have_legal_moves = !cached_legal_actions_->empty(); + + // If we don't have legal moves we are stalemated + if (!have_legal_moves) { + return std::vector{DrawUtility(), DrawUtility()}; + } + + if (Board().IrreversibleMoveCounter() >= kNumReversibleMovesToDraw) { + // This is theoretically a draw that needs to be claimed, but we implement + // it as a forced draw for now. + return std::vector{DrawUtility(), DrawUtility()}; + } + + return absl::nullopt; +} + +DarkChessGame::DarkChessGame(const GameParameters& params) + : Game(kGameType, params), + board_size_(ParameterValue("board_size")), + fen_(ParameterValue("fen", chess::DefaultFen(board_size_))) { + default_observer_ = std::make_shared(kDefaultObsType); +} + +std::shared_ptr DarkChessGame::MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const { + if (!params.empty()) SpielFatalError("Observation params not supported"); + IIGObservationType obs_type = iig_obs_type.value_or(kDefaultObsType); + if (ObserverHasString(obs_type) || ObserverHasTensor(obs_type)) { + return std::make_shared(obs_type); + } + return nullptr; +} + +} // namespace dark_chess +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dark_chess/dark_chess.h b/scenarios/bargaining/open_spiel/open_spiel/games/dark_chess/dark_chess.h new file mode 100644 index 0000000..7e485ab --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dark_chess/dark_chess.h @@ -0,0 +1,187 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_DARK_CHESS_H_ +#define OPEN_SPIEL_GAMES_DARK_CHESS_H_ + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/games/chess/chess.h" +#include "open_spiel/games/chess/chess_board.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// Dark chess - imperfect information version of chess: +// https://en.wikipedia.org/wiki/Dark_chess +// +// Parameters: +// "board_size" int Number of squares in each row and column (default: 8) +// "fen" string String describing the chess board position in +// Forsyth-Edwards Notation. The FEN has to match +// the board size. Default values are available for +// board sizes 4 and 8. + +namespace open_spiel { +namespace dark_chess { + +// Constants. +inline constexpr int NumPlayers() { return 2; } +inline constexpr double LossUtility() { return -1; } +inline constexpr double DrawUtility() { return 0; } +inline constexpr double WinUtility() { return 1; } + +// See action encoding below. +inline constexpr int NumDistinctActions() { return 4672; } + +// https://math.stackexchange.com/questions/194008/how-many-turns-can-a-chess-game-take-at-maximum +inline constexpr int MaxGameLength() { return 17695; } + +class DarkChessGame; +class DarkChessObserver; + +// State of an in-play game. +class DarkChessState : public State { + public: + // Constructs a chess state at the given position in Forsyth-Edwards Notation. + // https://en.wikipedia.org/wiki/Forsyth%E2%80%93Edwards_Notation + DarkChessState(std::shared_ptr game, int board_size, + const std::string& fen); + DarkChessState(const DarkChessState&) = default; + + DarkChessState& operator=(const DarkChessState&) = default; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : ColorToPlayer(Board().ToPlay()); + } + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + + bool IsTerminal() const override { + return static_cast(MaybeFinalReturns()); + } + + std::vector Returns() const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action action) override; + + // Current board. + chess::ChessBoard& Board() { return current_board_; } + const chess::ChessBoard& Board() const { return current_board_; } + int BoardSize() const { return current_board_.BoardSize(); } + + // Starting board. + chess::ChessBoard& StartBoard() { return start_board_; } + const chess::ChessBoard& StartBoard() const { return start_board_; } + + std::vector& MovesHistory() { return moves_history_; } + const std::vector& MovesHistory() const { + return moves_history_; + } + + protected: + void DoApplyAction(Action action) override; + + private: + friend class DarkChessObserver; + + // Draw can be claimed under the FIDE 3-fold repetition rule (the current + // board position has already appeared twice in the history). + bool IsRepetitionDraw() const; + + // Calculates legal actions and caches them. This is separate from + // LegalActions() as there are a number of other methods that need the value + // of LegalActions. This is a separate method as it's called from + // IsTerminal(), which is also called by LegalActions(). + void MaybeGenerateLegalActions() const; + + absl::optional> MaybeFinalReturns() const; + + // We have to store every move made to check for repetitions and to implement + // undo. We store the current board position as an optimization. + std::vector moves_history_; + // We store the start board for history to support games not starting + // from the start position. + chess::ChessBoard start_board_; + // We store the current board position as an optimization. + chess::ChessBoard current_board_; + + // RepetitionTable records how many times the given hash exists in the history + // stack (including the current board). + // We are already indexing by board hash, so there is no need to hash that + // hash again, so we use a custom passthrough hasher. + class PassthroughHash { + public: + std::size_t operator()(uint64_t x) const { + return static_cast(x); + } + }; + using RepetitionTable = absl::flat_hash_map; + RepetitionTable repetitions_; + mutable absl::optional> cached_legal_actions_; +}; + +// Game object. +class DarkChessGame : public Game { + public: + explicit DarkChessGame(const GameParameters& params); + int NumDistinctActions() const override { + return chess::NumDistinctActions(); + } + std::unique_ptr NewInitialState() const override { + return absl::make_unique(shared_from_this(), board_size_, + fen_); + } + int NumPlayers() const override { return chess::NumPlayers(); } + double MinUtility() const override { return LossUtility(); } + absl::optional UtilitySum() const override { return DrawUtility(); } + double MaxUtility() const override { return WinUtility(); } + std::vector ObservationTensorShape() const override { + std::vector shape{ + (13 + // public boards: piece types * colours + empty + 14) // private boards: piece types * colours + empty + unknown + * board_size_ * board_size_ + + 3 + // public: repetitions count, one-hot encoding + 2 + // public: side to play + 1 + // public: irreversible move counter -- a fraction of $n over 100 + 2 * 2 // private: left/right castling rights, one-hot encoded. + }; + return shape; + } + int MaxGameLength() const override { return chess::MaxGameLength(); } + std::shared_ptr MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const; + + std::shared_ptr default_observer_; + + private: + const int board_size_; + const std::string fen_; +}; + +} // namespace dark_chess +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_DARK_CHESS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dark_chess/dark_chess_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/dark_chess/dark_chess_test.cc new file mode 100644 index 0000000..26a69d4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dark_chess/dark_chess_test.cc @@ -0,0 +1,68 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/chess/chess_board.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace dark_chess { +namespace { + +namespace testing = open_spiel::testing; + +void BasicDarkChessTests(int board_size) { + GameParameters params; + params["board_size"] = GameParameter(board_size); + + testing::LoadGameTest("dark_chess"); + testing::NoChanceOutcomesTest(*LoadGame("dark_chess", params)); + testing::RandomSimTest(*LoadGame("dark_chess", params), 100); + testing::RandomSimTestWithUndo(*LoadGame("dark_chess", params), 1); +} + +void ChessBoardFlagPropagationTest(bool flag_king_in_check_allowed) { + auto tested_move = + chess::Move(/*from=*/{3, 0}, /*to=*/{2, 0}, + {chess::Color::kWhite, chess::PieceType::kKing}); + + auto board = chess::ChessBoard::BoardFromFEN("1kr1/4/4/3K w - - 0 1", + /*board_size=*/4, + flag_king_in_check_allowed); + bool move_allowed = false; + board->GenerateLegalMoves( + [&move_allowed, tested_move](const chess::Move& found_move) { + if (found_move == tested_move) { + move_allowed = true; + return false; + } + return true; + }); + + SPIEL_CHECK_EQ(move_allowed, flag_king_in_check_allowed); +} + +} // namespace +} // namespace dark_chess +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::dark_chess::BasicDarkChessTests(/*board_size=*/4); + open_spiel::dark_chess::BasicDarkChessTests(/*board_size=*/8); + + open_spiel::dark_chess::ChessBoardFlagPropagationTest( + /*flag_king_in_check_allowed=*/true); + open_spiel::dark_chess::ChessBoardFlagPropagationTest( + /*flag_king_in_check_allowed=*/false); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dark_hex/dark_hex.cc b/scenarios/bargaining/open_spiel/open_spiel/games/dark_hex/dark_hex.cc new file mode 100644 index 0000000..cd2366b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dark_hex/dark_hex.cc @@ -0,0 +1,354 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/dark_hex/dark_hex.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/games/hex/hex.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace dark_hex { +namespace { + +using hex::kCellStates; + +using hex::CellState; +using hex::kMinValueCellState; + +using hex::StateToString; + +// Game Facts +const GameType kGameType{/*short_name=*/"dark_hex", + /*long_name=*/"Dark Hex", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"obstype", GameParameter(kDefaultObsType)}, + {"gameversion", GameParameter(kDefaultGameVersion)}, + {"board_size", GameParameter(kDefaultBoardSize)}, + {"num_cols", GameParameter(kDefaultNumCols)}, + {"num_rows", GameParameter(kDefaultNumRows)}}}; + +const GameType kImperfectRecallGameType{ + /*short_name=*/"dark_hex_ir", + /*long_name=*/"Dark Hex with Imperfect Recall", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"obstype", GameParameter(kDefaultObsType)}, + {"gameversion", GameParameter(kDefaultGameVersion)}, + {"board_size", GameParameter(kDefaultBoardSize)}, + {"num_cols", GameParameter(kDefaultNumCols)}, + {"num_rows", GameParameter(kDefaultNumRows)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new DarkHexGame(params, kGameType)); +} + +std::shared_ptr ImperfectRecallFactory( + const GameParameters& params) { + return std::shared_ptr(new ImperfectRecallDarkHexGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); +RegisterSingleTensorObserver single_tensor1(kGameType.short_name); + +REGISTER_SPIEL_GAME(kImperfectRecallGameType, ImperfectRecallFactory); +RegisterSingleTensorObserver single_tensor_imperfect_recall( + kImperfectRecallGameType.short_name); + +} // namespace + +ImperfectRecallDarkHexGame::ImperfectRecallDarkHexGame( + const GameParameters& params) + : DarkHexGame(params, kImperfectRecallGameType) {} + +DarkHexState::DarkHexState(std::shared_ptr game, int num_cols, + int num_rows, GameVersion game_version, + ObservationType obs_type) + : State(game), + state_(game, num_cols, num_rows, hex::StringRep::kStandard), + obs_type_(obs_type), + game_version_(game_version), + num_cols_(num_cols), + num_rows_(num_rows), + num_cells_(num_cols * num_rows) { + black_view_.resize(num_cols * num_rows, CellState::kEmpty); + white_view_.resize(num_cols * num_rows, CellState::kEmpty); + if (obs_type == ObservationType::kRevealNothing) { + bits_per_action_ = num_cells_; + longest_sequence_ = num_cells_; + } else { + SPIEL_CHECK_EQ(obs_type_, ObservationType::kRevealNumTurns); + // Reserve 0 for the player and 10 as "I don't know." + bits_per_action_ = num_cells_ + 2; + longest_sequence_ = num_cells_ * 2 - 1; + } +} + +void DarkHexState::DoApplyAction(Action move) { + Player cur_player = CurrentPlayer(); // current player + auto& cur_view = (cur_player == 0 ? black_view_ : white_view_); + + // Either occupied or not + if (game_version_ == GameVersion::kClassicalDarkHex) { + if (state_.BoardAt(move) == CellState::kEmpty) { + state_.ApplyAction(move); + } + } else { + SPIEL_CHECK_EQ(game_version_, GameVersion::kAbruptDarkHex); + if (state_.BoardAt(move) == CellState::kEmpty) { + state_.ApplyAction(move); + } else { + // switch the current player + state_.ChangePlayer(); + } + } + + SPIEL_CHECK_TRUE(cur_view[move] == CellState::kEmpty); + // Update the view - only using CellState::kBlack and CellState::kWhite + if (state_.BoardAt(move) == CellState::kBlack || + state_.BoardAt(move) == CellState::kBlackNorth || + state_.BoardAt(move) == CellState::kBlackSouth) { + cur_view[move] = CellState::kBlack; + } else if (state_.BoardAt(move) == CellState::kWhite || + state_.BoardAt(move) == CellState::kWhiteEast || + state_.BoardAt(move) == CellState::kWhiteWest) { + cur_view[move] = CellState::kWhite; + } else if (state_.BoardAt(move) == CellState::kBlackWin || + state_.BoardAt(move) == CellState::kWhiteWin) { + cur_view[move] = state_.BoardAt(move); + } else { + SPIEL_CHECK_TRUE(false); + } + action_sequence_.push_back(std::pair(cur_player, move)); +} + +std::vector DarkHexState::LegalActions() const { + if (IsTerminal()) return {}; + std::vector moves; + const Player player = CurrentPlayer(); + const auto& cur_view = (player == 0 ? black_view_ : white_view_); + + for (Action move = 0; move < num_cells_; ++move) { + if (cur_view[move] == CellState::kEmpty) { + moves.push_back(move); + } + } + + return moves; +} + +std::string DarkHexState::ViewToString(Player player) const { + const auto& cur_view = (player == 0 ? black_view_ : white_view_); + std::string str; + + for (int r = 0; r < num_rows_; ++r) { + for (int c = 0; c < num_cols_; ++c) { + absl::StrAppend(&str, StateToString(cur_view[r * num_cols_ + c], + state_.string_rep())); + } + if (r < (num_rows_ - 1)) { + absl::StrAppend(&str, "\n"); + } + } + return str; +} + +std::string DarkHexState::ActionSequenceToString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::string str; + for (const auto& player_with_action : action_sequence_) { + if (player_with_action.first == player) { + absl::StrAppend(&str, player_with_action.first, ","); + absl::StrAppend(&str, player_with_action.second, " "); + } else if (obs_type_ == ObservationType::kRevealNumTurns) { + absl::StrAppend(&str, player_with_action.first, ",? "); + } else { + SPIEL_CHECK_EQ(obs_type_, ObservationType::kRevealNothing); + } + } + return str; +} + +std::string DarkHexState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + std::string str; + absl::StrAppend(&str, ViewToString(player), "\n"); + absl::StrAppend(&str, history_.size(), "\n"); + absl::StrAppend(&str, ActionSequenceToString(player)); + return str; +} + +void DarkHexState::InformationStateTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + const auto& player_view = (player == 0 ? black_view_ : white_view_); + + SPIEL_CHECK_EQ(values.size(), num_cells_ * kCellStates + + longest_sequence_ * bits_per_action_); + std::fill(values.begin(), values.end(), 0.); + for (int cell = 0; cell < num_cells_; ++cell) { + values[cell * kCellStates + + (static_cast(player_view[cell]) - kMinValueCellState)] = 1.0; + } + + // Encoding the sequence + int offset = num_cells_ * kCellStates; + for (const auto& player_with_action : action_sequence_) { + if (player_with_action.first == player) { + // Always include the observing player's actions. + if (obs_type_ == ObservationType::kRevealNumTurns) { + values[offset] = player_with_action.first; // Player 0 or 1 + values[offset + 1 + player_with_action.second] = 1.0; + } else { + // Here we don't need to encode the player since we won't see opponent + // moves. + SPIEL_CHECK_EQ(obs_type_, ObservationType::kRevealNothing); + values[offset + player_with_action.second] = 1.0; + } + offset += bits_per_action_; + } else if (obs_type_ == ObservationType::kRevealNumTurns) { + // If the number of turns are revealed, then each of the other player's + // actions will show up as unknowns. Here, num_cells_ is used to + // encode "unknown". + values[offset] = player_with_action.first; + values[offset + 1 + num_cells_] = 1.0; + offset += bits_per_action_; + } else { + SPIEL_CHECK_EQ(obs_type_, ObservationType::kRevealNothing); + } + } +} + +std::string DarkHexState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + std::string observation = ViewToString(player); + if (obs_type_ == ObservationType::kRevealNumTurns) { + absl::StrAppend(&observation, "\nTotal turns: ", action_sequence_.size()); + } + return observation; +} + +void DarkHexState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + SPIEL_CHECK_EQ(values.size(), game_->ObservationTensorSize()); + std::fill(values.begin(), values.end(), 0.); + + const auto& player_view = (player == 0 ? black_view_ : white_view_); + for (int cell = 0; cell < num_cells_; ++cell) { + values[cell * kCellStates + + (static_cast(player_view[cell]) - kMinValueCellState)] = 1.0; + } + + if (obs_type_ == ObservationType::kRevealNumTurns) { + values[num_cells_ * kCellStates + action_sequence_.size()] = 1.0; + } +} + +std::unique_ptr DarkHexState::Clone() const { + return std::unique_ptr(new DarkHexState(*this)); +} + +DarkHexGame::DarkHexGame(const GameParameters& params, GameType game_type) + : Game(game_type, params), + game_(std::static_pointer_cast(LoadGame( + "hex", + {{"num_cols", GameParameter(ParameterValue( + "num_cols", ParameterValue("board_size")))}, + {"num_rows", + GameParameter(ParameterValue( + "num_rows", ParameterValue("board_size")))}}))), + num_cols_( + ParameterValue("num_cols", ParameterValue("board_size"))), + num_rows_( + ParameterValue("num_rows", ParameterValue("board_size"))), + num_cells_(num_cols_ * num_rows_) { + std::string obs_type = ParameterValue("obstype"); + if (obs_type == "reveal-nothing") { + obs_type_ = ObservationType::kRevealNothing; + bits_per_action_ = num_cells_; + longest_sequence_ = num_cells_; + } else if (obs_type == "reveal-numturns") { + obs_type_ = ObservationType::kRevealNumTurns; + // Reserve 0 for the player and 10 as "I don't know." + bits_per_action_ = num_cells_ + 2; + longest_sequence_ = num_cells_ * 2 - 1; + } else { + SpielFatalError(absl::StrCat("Unrecognized observation type: ", obs_type)); + } + + std::string game_version = ParameterValue("gameversion"); + if (game_version == "cdh") { + game_version_ = GameVersion::kClassicalDarkHex; + } else if (game_version == "adh") { + game_version_ = GameVersion::kAbruptDarkHex; + } else { + SpielFatalError(absl::StrCat("Unrecognized game version: ", game_version)); + } +} + +std::vector DarkHexGame::InformationStateTensorShape() const { + return {num_cells_ * kCellStates + longest_sequence_ * bits_per_action_}; +} + +std::vector DarkHexGame::ObservationTensorShape() const { + if (obs_type_ == ObservationType::kRevealNothing) { + return {num_cells_ * kCellStates}; + } else if (obs_type_ == ObservationType::kRevealNumTurns) { + return {num_cells_ * kCellStates + longest_sequence_ + 1}; + } else { + SpielFatalError("Uknown observation type"); + } +} + +} // namespace dark_hex +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dark_hex/dark_hex.h b/scenarios/bargaining/open_spiel/open_spiel/games/dark_hex/dark_hex.h new file mode 100644 index 0000000..4fcc3b0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dark_hex/dark_hex.h @@ -0,0 +1,226 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_DARK_HEX_H_ +#define OPEN_SPIEL_GAMES_DARK_HEX_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/games/hex/hex.h" +#include "open_spiel/spiel.h" + +// Dark Hex (Some versions also called Phantom Hex or Kriegspiel Hex) is an +// imperfect information version of the classic game of Hex. Players are not +// exposed to oppsite sides piece information. Only a refree has the full +// information of the board. When a move fails due to collision/rejection the +// player gets some information of the cell (i.e. stone exists), and is allowed +// to make another move until success. +// +// There are slightly different versions of the game exists depending on the +// level of information being exposed to the opponent and what happens in the +// event of an attempt to move to an occupied cell. We have two different +// versions of Dark Hex (Phantom Hex) implemented: +// - Classical Dark Hex (cdh) +// Player: +// -> Replays after the attempt to move to an occupied cell +// (Rejection) +// - Abrupt Dark Hex (adh) +// Player: +// -> No replay after the attempt to move to an occupied cell +// (Collision) +// +// For classical dark hex we do allow specifying 'obstype'. It specifies if the +// player is exposed to number of turns that has passed or not. +// +// Common phantom games include Kriegspiel (Phantom chess), e.g. see +// https://en.wikipedia.org/wiki/Kriegspiel_(chess), and Phantom Go. +// See also http://mlanctot.info/files/papers/PhD_Thesis_MarcLanctot.pdf, Ch 3. +// +// Parameters: +// "gameversion" string Which version of the game to activate +// (default "cdh") ['cdh', 'adh'] +// "obstype" string If the player is informed of the +// number of moves attempted +// (default "reveal-nothing") ['reveal-nothing', 'reveal-numturns'] +// +// "num_cols" int Number of columns on the hex board +// (default 3) +// "num_rows" int Number of the rows on the hex board +// (default 3) + +namespace open_spiel { +namespace dark_hex { + +inline constexpr const char* kDefaultObsType = "reveal-nothing"; +inline constexpr const char* kDefaultGameVersion = "cdh"; +inline constexpr int kDefaultNumRows = 3; +inline constexpr int kDefaultNumCols = 3; +inline constexpr int kDefaultBoardSize = 3; + +// black - white - empty +inline constexpr int kPosStates = hex::kNumPlayers + 1; + +// Add here if anything else is needed to be revealed +enum class ObservationType { + kRevealNothing, + kRevealNumTurns, +}; + +enum class GameVersion { + kAbruptDarkHex, + kClassicalDarkHex, +}; + +class DarkHexState : public State { + public: + DarkHexState(std::shared_ptr game, int num_cols, int num_rows, + GameVersion game_version, ObservationType obs_type); + + Player CurrentPlayer() const override { return state_.CurrentPlayer(); } + + std::string ActionToString(Player player, Action action_id) const override { + return state_.ActionToString(player, action_id); + } + std::string ToString() const override { return state_.ToString(); } + bool IsTerminal() const override { return state_.IsTerminal(); } + std::vector Returns() const override { return state_.Returns(); } + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + + // Dark games funcs. + std::string InformationStateString(Player player) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + + protected: + void DoApplyAction(Action move) override; + std::string ViewToString(Player player) const; + + private: + std::string ActionSequenceToString(Player player) const; + + hex::HexState state_; + ObservationType obs_type_; + GameVersion game_version_; + const int num_cols_; // x + const int num_rows_; // y + const int num_cells_; + int bits_per_action_; + int longest_sequence_; + + // Change this to _history on base class + std::vector> action_sequence_; + std::vector black_view_; + std::vector white_view_; +}; + +class DarkHexGame : public Game { + public: + DarkHexGame(const GameParameters& params, GameType game_type); + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new DarkHexState( + shared_from_this(), num_cols_, num_rows_, game_version_, obs_type_)); + } + int NumDistinctActions() const override { + return game_->NumDistinctActions(); + } + int NumPlayers() const override { return game_->NumPlayers(); } + double MinUtility() const override { return game_->MinUtility(); } + absl::optional UtilitySum() const override { + return game_->UtilitySum(); + } + double MaxUtility() const override { return game_->MaxUtility(); } + + std::vector InformationStateTensorShape() const override; + std::vector ObservationTensorShape() const override; + int MaxGameLength() const override { return num_cols_ * num_rows_ * 2 - 1; } + ObservationType obs_type() const { return obs_type_; } + GameVersion game_version() const { return game_version_; } + int num_cols() const { return num_cols_; } + int num_rows() const { return num_rows_; } + + private: + std::shared_ptr game_; + ObservationType obs_type_; + GameVersion game_version_; + const int num_cols_; + const int num_rows_; + const int num_cells_; + int bits_per_action_; + int longest_sequence_; +}; + +class ImperfectRecallDarkHexState : public DarkHexState { + public: + ImperfectRecallDarkHexState(std::shared_ptr game, int num_rows_, + int num_cols_, GameVersion game_version, + ObservationType obs_type) + : DarkHexState(game, num_rows_, num_cols_, game_version, obs_type) {} + std::string InformationStateString(Player player) const override { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return absl::StrCat("P", player, " ", ViewToString(player)); + } + std::unique_ptr Clone() const override { + return std::unique_ptr(new ImperfectRecallDarkHexState(*this)); + } +}; + +class ImperfectRecallDarkHexGame : public DarkHexGame { + public: + explicit ImperfectRecallDarkHexGame(const GameParameters& params); + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new ImperfectRecallDarkHexState( + shared_from_this(), num_cols(), num_rows(), game_version(), + obs_type())); + } +}; + +inline std::ostream& operator<<(std::ostream& stream, + const ObservationType& obs_type) { + switch (obs_type) { + case ObservationType::kRevealNothing: + return stream << "Reveal Nothing"; + case ObservationType::kRevealNumTurns: + return stream << "Reveal Num Turns"; + default: + SpielFatalError("Unknown observation type"); + } +} + +inline std::ostream& operator<<(std::ostream& stream, + const GameVersion& game_version) { + switch (game_version) { + case GameVersion::kClassicalDarkHex: + return stream << "Classical Dark Hex"; + case GameVersion::kAbruptDarkHex: + return stream << "Abrupt Dark Hex"; + default: + SpielFatalError("Unknown game version"); + } +} + +} // namespace dark_hex +} // namespace open_spiel + +#endif diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dark_hex/dark_hex_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/dark_hex/dark_hex_test.cc new file mode 100644 index 0000000..89549dc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dark_hex/dark_hex_test.cc @@ -0,0 +1,177 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace dark_hex { +namespace { + +namespace testing = open_spiel::testing; + +void GameBlackWinWithCollisionAndObs() { + std::shared_ptr game = + LoadGame("dark_hex", {{"num_cols", GameParameter(3)}, + {"num_rows", GameParameter(3)}, + {"obstype", GameParameter("reveal-numturns")}}); + std::unique_ptr state = game->NewInitialState(); + std::vector lm = state->LegalActions(); // initial legal moves + // . . . + // . . . + // . . . + state->ApplyAction(4); + // . . . + // . B . B represents black-stone + // . . . + // Check White's possible moves before rejection + SPIEL_CHECK_EQ(state->LegalActions(), lm); + state->ApplyAction(4); + // . . . + // . R . R represents rejection + // . . . + // Check White's possible moves after rejection + lm.erase(std::remove(lm.begin(), lm.end(), 4), lm.end()); + SPIEL_CHECK_EQ(state->LegalActions(), lm); + // . . . + // . B . W represents white-stone + // . W . + state->ApplyAction(7); + // . . . + // . B . Black now knows the whites move + // . R . + state->ApplyAction(7); + // Check blacks info on number of turns + std::string str = state->ObservationString(state->CurrentPlayer()); + SPIEL_CHECK_EQ(str.back(), '4'); + // . . . + // . B . + // B W . + state->ApplyAction(6); + // . . W + // . B . + // B W . + state->ApplyAction(2); + // . B W + // . B . + // B W . + state->ApplyAction(1); + // Black wins + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReturn(0), 1.0); + SPIEL_CHECK_EQ(state->PlayerReturn(1), -1.0); +} + +void GameBlackWinsMaximumCollisions() { + // White follows exact moves black is playing, black does the same + // B . . R . . B W . B R . B W B + // . . . . . . . . . . . . . . . ... + // . . . . . . . . . . . . . . . + std::shared_ptr game = LoadGame( + "dark_hex", + {{"num_cols", GameParameter(3)}, {"num_rows", GameParameter(3)}}); + std::unique_ptr state = game->NewInitialState(); + std::array play_seq = {0, 1, 4, 2, 7, 5, 8, 6}; // 3 is the terminal move + for (int i = 0; i < play_seq.size(); ++i) { + state->ApplyAction(play_seq[i]); // player moves + state->ApplyAction(play_seq[i]); // other player tries to move + } + state->ApplyAction(3); + // Black wins + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReturn(0), 1.0); + SPIEL_CHECK_EQ(state->PlayerReturn(1), -1.0); +} + +void GameUnevenBoardBlackWin() { + std::shared_ptr game = LoadGame( + "dark_hex", + {{"num_cols", GameParameter(4)}, {"num_rows", GameParameter(3)}}); + std::unique_ptr state = game->NewInitialState(); + state->ApplyAction(8); + state->ApplyAction(5); + state->ApplyAction(4); + state->ApplyAction(1); + state->ApplyAction(0); + std::cout << state->ObservationString(0) << std::endl; + std::cout << state->ObservationString(1) << std::endl; + // Black wins + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReturn(0), 1.0); + SPIEL_CHECK_EQ(state->PlayerReturn(1), -1.0); +} + +void GameUnevenBoardWhiteWin() { + std::shared_ptr game = LoadGame( + "dark_hex", + {{"num_cols", GameParameter(4)}, {"num_rows", GameParameter(3)}}); + std::unique_ptr state = game->NewInitialState(); + state->ApplyAction(8); + state->ApplyAction(5); + state->ApplyAction(9); + state->ApplyAction(4); + state->ApplyAction(10); + state->ApplyAction(6); + state->ApplyAction(11); + state->ApplyAction(7); + std::cout << state->ObservationString(0) << std::endl; + std::cout << state->ObservationString(1) << std::endl; + // White wins + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReturn(0), -1.0); + SPIEL_CHECK_EQ(state->PlayerReturn(1), 1.0); +} + +void ClassicalDarkHexTests() { + testing::LoadGameTest("dark_hex"); + testing::NoChanceOutcomesTest(*LoadGame("dark_hex")); + testing::RandomSimTest(*LoadGame("dark_hex(num_cols=5,num_rows=5)"), 10); + testing::LoadGameTest("dark_hex(obstype=reveal-numturns)"); + GameBlackWinWithCollisionAndObs(); + GameBlackWinsMaximumCollisions(); + GameUnevenBoardBlackWin(); + GameUnevenBoardWhiteWin(); +} + +void AbruptDHCustomTest() { + std::shared_ptr game = + LoadGame("dark_hex", {{"num_cols", GameParameter(2)}, + {"num_rows", GameParameter(2)}, + {"gameversion", GameParameter("adh")}}); + std::unique_ptr state = game->NewInitialState(); + state->ApplyAction(0); + state->ApplyAction(0); + state->ApplyAction(2); + // Black wins + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReturn(0), 1.0); + SPIEL_CHECK_EQ(state->PlayerReturn(1), -1.0); +} + +void AbruptDarkHexTests() { + testing::LoadGameTest("dark_hex(gameversion=adh)"); + testing::NoChanceOutcomesTest(*LoadGame("dark_hex(gameversion=adh)")); + testing::RandomSimTest( + *LoadGame("dark_hex(num_cols=3,num_rows=3,gameversion=adh)"), 3); + AbruptDHCustomTest(); +} + +} // namespace +} // namespace dark_hex +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::dark_hex::ClassicalDarkHexTests(); + open_spiel::dark_hex::AbruptDarkHexTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/deep_sea/deep_sea.cc b/scenarios/bargaining/open_spiel/open_spiel/games/deep_sea/deep_sea.cc new file mode 100644 index 0000000..6aac480 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/deep_sea/deep_sea.cc @@ -0,0 +1,203 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/deep_sea/deep_sea.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace deep_sea { +namespace { + +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"deep_sea", + /*long_name=*/"DeepSea", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/1, + /*min_num_players=*/1, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + {"size", GameParameter(kDefaultSize)}, + {"seed", GameParameter(kDefaultSeed)}, + {"unscaled_move_cost", GameParameter(kDefaultUnscaledMoveCost)}, + {"randomize_actions", GameParameter(kDefaultRandomizeActions)}, + }}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new DeepSeaGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +DeepSeaState::DeepSeaState(std::shared_ptr game) : State(game) { + SPIEL_CHECK_TRUE(game); + const DeepSeaGame& parent_game = static_cast(*game); + size_ = parent_game.MaxGameLength(); + move_cost_ = -parent_game.UnscaledMoveCost() / size_; + action_mapping_ = parent_game.ActionMapping(); +} + +int DeepSeaState::CurrentPlayer() const { + if (IsTerminal()) return kTerminalPlayerId; + return 0; +} + +std::vector DeepSeaState::LegalActions() const { + if (IsTerminal()) return {}; + return {0, 1}; +} + +std::string DeepSeaState::ActionToString(Player player, + Action action_id) const { + SPIEL_CHECK_EQ(player, 0); + return action_id ? "RIGHT" : "LEFT"; +} + +std::string DeepSeaState::ToString() const { + std::string str; + str.reserve((size_ + 1) * (size_ + 2)); + for (int r = 0; r < size_ + 1; ++r) { + for (int c = 0; c < size_ + 1; ++c) { + if (player_row_ == r && player_col_ == c) { + str += "x"; + } else if (r < size_ && c <= r) { + str += action_mapping_[r * size_ + c] ? 'R' : 'L'; + } else { + str += "."; + } + } + str += "\n"; + } + return str; +} + +bool DeepSeaState::IsTerminal() const { return player_row_ == size_; } + +std::vector DeepSeaState::Rewards() const { + double reward = 0; + if (!direction_history_.empty() && direction_history_.back()) { + reward += move_cost_; + } + if (IsTerminal() && player_col_ == size_) { + reward += 1; + } + return {reward}; +} + +std::vector DeepSeaState::Returns() const { + double reward_sum = 0; + for (bool direction : direction_history_) + if (direction) reward_sum += move_cost_; + if (IsTerminal() && player_col_ == size_) { + reward_sum += 1; + } + return {reward_sum}; +} + +std::string DeepSeaState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + // We need to account for the possibility that `player_row == size_` at + // terminal states, so that's why we add the +1. + std::string str((size_ + 1) * size_, '.'); + str[player_row_ * size_ + player_col_] = 'x'; + return str; +} + +void DeepSeaState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::fill(values.begin(), values.end(), 0.); + SPIEL_CHECK_EQ(values.size(), size_ * size_); + if (player_row_ < size_ && player_col_ < size_) + values[player_row_ * size_ + player_col_] = 1.0; +} + +std::unique_ptr DeepSeaState::Clone() const { + return std::unique_ptr(new DeepSeaState(*this)); +} + +void DeepSeaState::UndoAction(Player player, Action move) { + // Can only reliably undo by replaying the actions. This is because moving + // left from column 0 is a no-op, so we can't deduce the previous column if + // we're now at column 0. + direction_history_.pop_back(); + history_.pop_back(); + --move_number_; + player_row_ = 0; + player_col_ = 0; + for (auto action_right : direction_history_) { + if (action_right) { + ++player_col_; + } else if (player_col_ > 0) { + --player_col_; + } + ++player_row_; + } +} + +void DeepSeaState::DoApplyAction(Action move) { + const int i = player_row_ * size_ + player_col_; + const bool action_right = move == action_mapping_[i]; + if (action_right) { + ++player_col_; + } else if (player_col_ > 0) { + --player_col_; + } + ++player_row_; + direction_history_.push_back(action_right); +} + +DeepSeaGame::DeepSeaGame(const GameParameters& params) + : Game(kGameType, params), + size_(ParameterValue("size")), + unscaled_move_cost_(ParameterValue("unscaled_move_cost")) { + action_mapping_.resize(size_ * size_); + if (ParameterValue("randomize_actions")) { + std::seed_seq seq{ParameterValue("seed")}; + std::mt19937 rng(seq); + for (int i = 0; i < action_mapping_.size(); ++i) { + action_mapping_[i] = absl::Uniform(rng, 0, 2); + } + } else { + std::fill(action_mapping_.begin(), action_mapping_.end(), true); + } +} + +} // namespace deep_sea +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/deep_sea/deep_sea.h b/scenarios/bargaining/open_spiel/open_spiel/games/deep_sea/deep_sea.h new file mode 100644 index 0000000..3138735 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/deep_sea/deep_sea.h @@ -0,0 +1,124 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_DEEP_SEA_H_ +#define OPEN_SPIEL_GAMES_DEEP_SEA_H_ + +#include +#include +#include + +#include "open_spiel/spiel.h" + +// Implementation of 'Deep Sea' exploration environment. +// +// This environment is designed as a stylized version of the 'exploration +// chain', such as the classical river swim environment (Strehl & Littman '04, +// https://ieeexplore.ieee.org/document/1374179). The observation is an N x N +// grid, with a falling block starting in top left. Each timestep the agent can +// move 'left' or 'right', which are mapped to discrete actions 0 and 1 on a +// state-dependent level. There is a large reward of +1 in the bottom right +// state, but this can be hard for many exploration algorithms to find. +// +// For more information, see papers: +// [1] https://arxiv.org/abs/1703.07608 +// [2] https://arxiv.org/abs/1806.03335 +// +// Parameters: +// "size" int rows and columns (default = 5) +// "seed" int seed for randomizing actions (default = 42) +// "unscaled_move_cost" double move cost (default = 0.01) +// "randomize_actions" bool state dependent actions (default = true) + +namespace open_spiel { +namespace deep_sea { + +// Constants. +constexpr int kNumPlayers = 1; +constexpr int kNumActions = 2; + +constexpr int kDefaultSize = 5; +constexpr int kDefaultSeed = 42; +constexpr double kDefaultUnscaledMoveCost = 0.01; +constexpr bool kDefaultRandomizeActions = true; + +// State of an in-play game. +class DeepSeaState : public State { + public: + DeepSeaState(const std::shared_ptr game); + DeepSeaState(const DeepSeaState&) = default; + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Rewards() const override; + std::vector Returns() const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action move) override; + std::vector LegalActions() const override; + + protected: + void DoApplyAction(Action move) override; + + private: + // Copied from game. + int size_; + double move_cost_; // Cost of moving right. + std::vector action_mapping_; + + // Position of the player. + int player_row_ = 0; + int player_col_ = 0; + + // History of actual moves. `true` means RIGHT, otherwise LEFT. + std::vector direction_history_; +}; + +// Game object. +class DeepSeaGame : public Game { + public: + explicit DeepSeaGame(const GameParameters& params); + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new DeepSeaState(shared_from_this())); + } + std::vector ObservationTensorShape() const override { + return {size_, size_}; + } + + int NumDistinctActions() const override { return kNumActions; } + int MaxChanceOutcomes() const override { return kNumActions; } + int NumPlayers() const override { return kNumPlayers; } + double MaxUtility() const override { return 1 - unscaled_move_cost_; } + double MinUtility() const override { return -unscaled_move_cost_; } + int MaxGameLength() const override { return size_; } + + double UnscaledMoveCost() const { return unscaled_move_cost_; } + + // Wether the action will be reversed (false) or upheld (true). + std::vector ActionMapping() const { return action_mapping_; } + + private: + const int size_; + const double unscaled_move_cost_; + std::vector action_mapping_; +}; + +} // namespace deep_sea +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_DEEP_SEA_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/deep_sea/deep_sea_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/deep_sea/deep_sea_test.cc new file mode 100644 index 0000000..bf5748c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/deep_sea/deep_sea_test.cc @@ -0,0 +1,39 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/deep_sea/deep_sea.h" + +#include "open_spiel/algorithms/get_all_states.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace deep_sea { +namespace { + +namespace testing = open_spiel::testing; + +void BasicDeepSeaTests() { + testing::LoadGameTest("deep_sea(size=5)"); + testing::ChanceOutcomesTest(*LoadGame("deep_sea(size=5)")); + testing::RandomSimTest(*LoadGame("deep_sea(size=5)"), 100); + testing::RandomSimTestWithUndo(*LoadGame("deep_sea(size=5)"), 1); +} + +} // namespace +} // namespace deep_sea +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::deep_sea::BasicDeepSeaTests(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dots_and_boxes/dots_and_boxes.cc b/scenarios/bargaining/open_spiel/open_spiel/games/dots_and_boxes/dots_and_boxes.cc new file mode 100644 index 0000000..5873bbb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dots_and_boxes/dots_and_boxes.cc @@ -0,0 +1,675 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Contributed by Wannes Meert, Giuseppe Marra, and Pieter Robberechts +// for the KU Leuven course Machine Learning: Project. + +#include "open_spiel/games/dots_and_boxes/dots_and_boxes.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace dots_and_boxes { +namespace { + +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"dots_and_boxes", + /*long_name=*/"Dots and Boxes", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"num_rows", GameParameter(kDefaultNumRows)}, + {"num_cols", GameParameter(kDefaultNumCols)}, + {"utility_margin", GameParameter(kDefaultUtilityMargin)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new DotsAndBoxesGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +} // namespace + +CellState PlayerToState(Player player) { + switch (player) { + case 0: + return CellState::kPlayer1; + case 1: + return CellState::kPlayer2; + default: + SpielFatalError(absl::StrCat("Invalid player id ", player)); + return CellState::kEmpty; + } +} + +std::string StateToString(CellState state) { + switch (state) { + case CellState::kEmpty: + return "."; + case CellState::kPlayer1: + return "1"; + case CellState::kPlayer2: + return "2"; + default: + SpielFatalError("Unknown state."); + } +} + +std::string OrientationToString(CellOrientation orientation) { + switch (orientation) { + case CellOrientation::kHorizontal: + return "h"; + case CellOrientation::kVertical: + return "v"; + default: + SpielFatalError("Unknown orientation."); + } +} + +// Move Methods ================================================================ + +Move::Move(int row, int col, CellOrientation orientation, int rows, int cols) { + row_ = row; + col_ = col; + orientation_ = orientation; + num_rows_ = rows; + num_cols_ = cols; +} + +Move::Move() { + row_ = 0; + col_ = 0; + orientation_ = CellOrientation::kVertical; + num_rows_ = 0; + num_cols_ = 0; +} + +Move::Move(Action action, int rows, int cols) { + num_rows_ = rows; + num_cols_ = cols; + int maxh = (num_rows_ + 1) * num_cols_; + if (action < maxh) { + // Horizontal + orientation_ = CellOrientation::kHorizontal; + row_ = action / num_cols_; + col_ = action % num_cols_; + } else { + // Vertical + action -= maxh; + orientation_ = CellOrientation::kVertical; + row_ = action / (num_cols_ + 1); + col_ = action % (num_cols_ + 1); + } + SPIEL_CHECK_LT(row_, num_rows_ + 1); + SPIEL_CHECK_LT(col_, num_cols_ + 1); +} + +void Move::Set(int row, int col, CellOrientation orientation) { + row_ = row; + col_ = col; + SPIEL_CHECK_LT(row_, num_rows_ + 1); + SPIEL_CHECK_LT(col_, num_cols_ + 1); + orientation_ = orientation; +} + +int Move::GetRow() const { return row_; } +int Move::GetCol() const { return col_; } +CellOrientation Move::GetOrientation() const { return orientation_; } + +Action Move::ActionId() { + // First bit is horizontal (0) or vertical (1) + Action action = 0; + int maxh = (num_rows_ + 1) * num_cols_; + if (orientation_ == CellOrientation::kHorizontal) { + action = row_ * num_cols_ + col_; + } else { + action = maxh + row_ * (num_cols_ + 1) + col_; + } + return action; +} + +int Move::GetCell() { return row_ * (num_cols_ + 1) + col_; } + +int Move::GetCellLeft() { + if (col_ == 0) { + return -1; + } + return row_ * (num_cols_ + 1) + (col_ - 1); +} + +int Move::GetCellRight() { + if (col_ == num_cols_) { + return -1; + } + return row_ * (num_cols_ + 1) + (col_ + 1); +} + +int Move::GetCellAbove() { + if (row_ == 0) { + return -1; + } + return (row_ - 1) * (num_cols_ + 1) + col_; +} + +int Move::GetCellBelow() { + if (row_ == num_rows_) { + return -1; + } + return (row_ + 1) * (num_cols_ + 1) + col_; +} + +int Move::GetCellAboveLeft() { + if (row_ == 0 || col_ == 0) { + return -1; + } + return (row_ - 1) * (num_cols_ + 1) + (col_ - 1); +} + +int Move::GetCellAboveRight() { + if (row_ == 0 || col_ == num_cols_) { + return -1; + } + return (row_ - 1) * (num_cols_ + 1) + (col_ + 1); +} + +int Move::GetCellBelowLeft() { + if (row_ == num_rows_ || col_ == 0) { + return -1; + } + return (row_ + 1) * (num_cols_ + 1) + (col_ - 1); +} + +int Move::GetCellBelowRight() { + if (row_ == num_rows_ || col_ == num_cols_) { + return -1; + } + return (row_ + 1) * (num_cols_ + 1) + (col_ + 1); +} + +// DotsAndBoxesState Methods =================================================== + +void DotsAndBoxesState::DoApplyAction(Action action) { + Move move = Move(action, num_rows_, num_cols_); + int cell = move.GetCell(); + bool won_cell = false; + if (move.GetOrientation() == CellOrientation::kVertical) { + SPIEL_CHECK_EQ(v_[cell], CellState::kEmpty); + v_[cell] = PlayerToState(CurrentPlayer()); + + // Left + if (move.GetCol() > 0) { + if (v_[move.GetCellLeft()] != CellState::kEmpty && + h_[move.GetCellLeft()] != CellState::kEmpty && + h_[move.GetCellBelowLeft()] != CellState::kEmpty) { + won_cell = true; + p_[move.GetCellLeft()] = PlayerToState(CurrentPlayer()); + points_[current_player_]++; + } + } + + // Right + if (move.GetCol() < num_cols_) { + if (v_[move.GetCellRight()] != CellState::kEmpty && + h_[move.GetCellBelow()] != CellState::kEmpty && + h_[cell] != CellState::kEmpty) { + won_cell = true; + p_[cell] = PlayerToState(CurrentPlayer()); + points_[current_player_]++; + } + } + + } else { // move.GetOrientation() == kHorizontal + SPIEL_CHECK_EQ(h_[cell], CellState::kEmpty); + h_[cell] = PlayerToState(CurrentPlayer()); + + // Above + if (move.GetRow() > 0) { + if (v_[move.GetCellAbove()] != CellState::kEmpty && + v_[move.GetCellAboveRight()] != CellState::kEmpty && + h_[move.GetCellAbove()] != CellState::kEmpty) { + won_cell = true; + p_[move.GetCellAbove()] = PlayerToState(CurrentPlayer()); + points_[current_player_]++; + } + } + // Below + if (move.GetRow() < num_rows_) { + if (v_[cell] != CellState::kEmpty && + v_[move.GetCellRight()] != CellState::kEmpty && + h_[move.GetCellBelow()] != CellState::kEmpty) { + won_cell = true; + p_[cell] = PlayerToState(CurrentPlayer()); + points_[current_player_]++; + } + } + } + + if (Wins(current_player_)) { + outcome_ = current_player_; + } + if (!won_cell) { + // If box is scored, current player keeps the turn + current_player_ = 1 - current_player_; + } + num_moves_ += 1; +} + +std::vector DotsAndBoxesState::LegalActions() const { + if (IsTerminal()) return {}; + std::vector actions; + int action = 0; + Move move; + move.SetRowsCols(num_rows_, num_cols_); + int maxh = (num_rows_ + 1) * num_cols_; + int maxv = num_rows_ * (num_cols_ + 1); + // Horizontal lines + for (int row = 0; row <= num_rows_; ++row) { + for (int col = 0; col < num_cols_; ++col) { + move.Set(row, col, CellOrientation::kHorizontal); + if (h_[move.GetCell()] == CellState::kEmpty) { + actions.push_back(action); + } else { + } + action++; + } + } + SPIEL_CHECK_EQ(action, maxh); + // Vertical lines + for (int row = 0; row < num_rows_; ++row) { + for (int col = 0; col <= num_cols_; ++col) { + move.Set(row, col, CellOrientation::kVertical); + if (v_[move.GetCell()] == CellState::kEmpty) { + actions.push_back(action); + } else { + } + // std::cout << action << std::endl; + action++; + } + } + SPIEL_CHECK_EQ(action, maxh + maxv); + return actions; +} + +std::string DotsAndBoxesState::DbnString() const { + // A string representing which lines have been set. + // This corresponds to an unscored state representation + // (Barker and Korf 2012). + // For a scored state, use the ObservationTensor function. + std::string str; + int cell = 0; + int idx = 0; + for (int row = 0; row < num_rows_ + 1; ++row) { + for (int col = 0; col < num_cols_; ++col) { + if (h_[cell] != CellState::kEmpty) { + absl::StrAppend(&str, "1"); + } else { + absl::StrAppend(&str, "0"); + } + idx++; + cell++; + } + cell++; + } + cell = 0; + for (int row = 0; row < num_rows_; ++row) { + for (int col = 0; col < num_cols_ + 1; ++col) { + if (v_[cell] != CellState::kEmpty) { + absl::StrAppend(&str, "1"); + } else { + absl::StrAppend(&str, "0"); + } + idx++; + cell++; + } + } + return str; +} + +std::string DotsAndBoxesState::ActionToString(Player player, + Action action_id) const { + Move move(action_id, num_rows_, num_cols_); + return absl::StrCat("P", StateToString(PlayerToState(player)), "(", + OrientationToString(move.GetOrientation()), ",", + move.GetRow(), ",", move.GetCol(), ")"); +} + +bool DotsAndBoxesState::Wins(Player player) const { + if (IsFull()) { + // Game over + if (PlayerToState(player) == CellState::kPlayer1) { + return points_[0] > points_[1]; + } else { + return points_[0] < points_[1]; + } + } + return false; +} + +bool DotsAndBoxesState::IsFull() const { + return num_moves_ == + (num_rows_ + 1) * num_cols_ + num_rows_ * (num_cols_ + 1); +} + +DotsAndBoxesState::DotsAndBoxesState(std::shared_ptr game, + int num_rows, int num_cols, + bool utility_margin) + : State(game), + num_rows_(num_rows), + num_cols_(num_cols), + num_cells_((1 + num_rows) * (1 + num_cols)), + utility_margin_(utility_margin) { + SPIEL_CHECK_GE(num_rows_, 1); + SPIEL_CHECK_GE(num_cols_, 1); + h_.resize(num_cells_); + v_.resize(num_cells_); + p_.resize(num_cells_); + std::fill(begin(h_), end(h_), CellState::kEmpty); + std::fill(begin(v_), end(v_), CellState::kEmpty); + std::fill(begin(p_), end(p_), CellState::kEmpty); + std::fill(begin(points_), end(points_), 0); +} + +// Create initial board from the Dots-and-Boxes Notation. +// A vector with: +// [b | for r in [0,num_rows+1], for c in [0,num_cols]: +// b=1 if horizontal line[r,c] set else 0] + +// [b | for r in [0,num_rows_], for c in [0,num_cols+1]: +// b=1 if vertical line[r,c] set else 0] +DotsAndBoxesState::DotsAndBoxesState(std::shared_ptr game, + int num_rows, int num_cols, + bool utility_margin, + const std::string& dbn) + : State(game), + num_rows_(num_rows), + num_cols_(num_cols), + num_cells_((1 + num_rows) * (1 + num_cols)), + utility_margin_(utility_margin) { + /* std::cout << "Init dots and boxes state with dbn\n"; */ + SPIEL_CHECK_GE(num_rows_, 1); + /* SPIEL_CHECK_LE(num_rows_, 1000); */ + SPIEL_CHECK_GE(num_cols_, 1); + /* SPIEL_CHECK_LE(num_cols_, 1000); */ + h_.resize(num_cells_); + v_.resize(num_cells_); + p_.resize(num_cells_); + std::fill(begin(h_), end(h_), CellState::kEmpty); + std::fill(begin(v_), end(v_), CellState::kEmpty); + std::fill(begin(p_), end(p_), CellState::kEmpty); + std::fill(begin(points_), end(points_), 0); + int cell = 0; + int idx = 0; + for (int row = 0; row < num_rows_ + 1; ++row) { + for (int col = 0; col < num_cols_; ++col) { + if (dbn[idx] == '1') { + h_[cell] = CellState::kSet; + num_moves_++; + } + idx++; + cell++; + } + cell++; + } + cell = 0; + for (int row = 0; row < num_rows_; ++row) { + for (int col = 0; col < num_cols_ + 1; ++col) { + if (dbn[idx] == '1') { + v_[cell] = CellState::kSet; + num_moves_++; + } + idx++; + cell++; + } + } + int max_moves = (num_rows_ + 1) * num_cols_ + num_rows_ * (num_cols_ + 1); + SPIEL_CHECK_LE(num_moves_, max_moves); +} + +std::string DotsAndBoxesState::ToString() const { + std::string str; + int cell = 0; + int cell_start = 0; + for (int r = 0; r < num_rows_; ++r) { + cell_start = cell; + for (int c = 0; c <= num_cols_; ++c) { + absl::StrAppend(&str, StateToStringH(h_[cell], r, c)); + cell++; + } + absl::StrAppend(&str, "\n"); + cell = cell_start; + for (int c = 0; c < num_cols_; ++c) { + absl::StrAppend(&str, StateToStringV(v_[cell], r, c)); + absl::StrAppend(&str, StateToStringP(p_[cell], r, c)); + cell++; + } + absl::StrAppend(&str, StateToStringV(v_[cell], r, num_cols_)); + cell++; + absl::StrAppend(&str, "\n"); + } + for (int c = 0; c <= num_cols_; ++c) { + absl::StrAppend(&str, StateToStringH(h_[cell], num_rows_, c)); + cell++; + } + absl::StrAppend(&str, "\n"); + return str; +} + +bool DotsAndBoxesState::IsTerminal() const { + return outcome_ != kInvalidPlayer || IsFull(); +} + +std::vector DotsAndBoxesState::Returns() const { + if (utility_margin_) { + if (IsTerminal()) { + double margin = (double)(points_[0] - points_[1]); + return {margin, -margin}; + } else { + return {0.0, 0.0}; + } + } else { + if (Wins(Player{0})) { + return {1.0, -1.0}; + } else if (Wins(Player{1})) { + return {-1.0, 1.0}; + } else { + // Game is not finished + return {0.0, 0.0}; + } + } +} + +std::string DotsAndBoxesState::InformationStateString(Player player) const { + // Cannot be used when starting from a non-empty initial state. + // If the game is started from a non-empty initial state + // there are no previous moves and thus the history is empty. + // And moves cannot be inferred as different orderings can lead + // to different scores for the players. + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string DotsAndBoxesState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void DotsAndBoxesState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + // Treat `values` as a 3-d tensor. + TensorView<3> view(values, + {/*cellstates=*/3, num_cells_, + /*part of cell (h, v, p)=*/3}, + true); + for (int cell = 0; cell < num_cells_; ++cell) { + view[{static_cast(h_[cell]), cell, 0}] = 1.0; + view[{static_cast(v_[cell]), cell, 1}] = 1.0; + view[{static_cast(p_[cell]), cell, 2}] = 1.0; + } +} + +void DotsAndBoxesState::UndoAction(Player player, Action action) { + Move move(action, num_rows_, num_cols_); + int cell = move.GetCell(); + if (p_[cell] != CellState::kEmpty) { + points_[current_player_]--; + } + h_[cell] = CellState::kEmpty; + v_[cell] = CellState::kEmpty; + p_[cell] = CellState::kEmpty; + current_player_ = player; + outcome_ = kInvalidPlayer; + num_moves_ -= 1; + history_.pop_back(); + --move_number_; +} + +std::unique_ptr DotsAndBoxesState::Clone() const { + return std::unique_ptr(new DotsAndBoxesState(*this)); +} + +std::string DotsAndBoxesState::StateToStringH(CellState state, int row, + int col) const { + if (row == 0 && col == 0) { + if (state == CellState::kEmpty) { + return "┌╴ ╶"; + } else { + return "┌───"; + } + } + if (row == num_rows_ && col == 0) { + if (state == CellState::kEmpty) { + return "└╴ ╶"; + } else { + return "└───"; + } + } + if (row == 0 && col == num_cols_) { + return "┐"; + } + if (row == num_rows_ && col == num_cols_) { + return "┘"; + } + if (col == num_cols_) { + return "┤"; + } + if (col == 0) { + if (state == CellState::kEmpty) { + return "├╴ ╶"; + } else { + return "├───"; + } + } + if (row == 0) { + if (state == CellState::kEmpty) { + return "┬╴ ╶"; + } else { + return "┬───"; + } + } + if (row == num_rows_) { + if (state == CellState::kEmpty) { + return "┴╴ ╶"; + } else { + return "┴───"; + } + } + if (state == CellState::kEmpty) { + return "┼╴ ╶"; + } else { + return "┼───"; + } +} + +std::string DotsAndBoxesState::StateToStringV(CellState state, int row, + int col) const { + if (state == CellState::kEmpty) { + return " "; // "┊"; + } else { + return "│"; + } +} + +std::string DotsAndBoxesState::StateToStringP(CellState state, int row, + int col) const { + if (state == CellState::kEmpty) { + return " "; + } + if (state == CellState::kPlayer1) { + return " 1 "; + } + if (state == CellState::kPlayer2) { + return " 2 "; + } + return " x "; +} + +DotsAndBoxesGame::DotsAndBoxesGame(const GameParameters& params) + : Game(kGameType, params), + num_rows_(ParameterValue("num_rows", kDefaultNumRows)), + num_cols_(ParameterValue("num_cols", kDefaultNumCols)), + num_cells_((1 + ParameterValue("num_rows", kDefaultNumRows)) * + (1 + ParameterValue("num_cols", kDefaultNumCols))), + utility_margin_( + ParameterValue("utility_margin", kDefaultUtilityMargin)) { +} + +double DotsAndBoxesGame::MinUtility() const { + // If win/lose is the utility, this is -1. + if (utility_margin_) { + return -num_rows_ * num_cols_; + } else { + return -1; + } +} + +absl::optional DotsAndBoxesGame::UtilitySum() const { + return 0; +} + +double DotsAndBoxesGame::MaxUtility() const { + if (utility_margin_) { + return num_rows_ * num_cols_; + } else { + return 1; + } +} + +} // namespace dots_and_boxes +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dots_and_boxes/dots_and_boxes.h b/scenarios/bargaining/open_spiel/open_spiel/games/dots_and_boxes/dots_and_boxes.h new file mode 100644 index 0000000..e920328 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dots_and_boxes/dots_and_boxes.h @@ -0,0 +1,195 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Contributed by Wannes Meert, Giuseppe Marra, and Pieter Robberechts +// for the KU Leuven course Machine Learning: Project. + +#ifndef OPEN_SPIEL_GAMES_DOTS_AND_BOXES_H_ +#define OPEN_SPIEL_GAMES_DOTS_AND_BOXES_H_ + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +// Dots and Boxes: +// https://en.wikipedia.org/wiki/Dots_and_Boxes +// +// Parameters: +// - num_rows: Number of rows on the board +// - num_cols: Number of columns on the board +// - utility_margin: Return as payoff the margin achieved (if true) or +// return -1/0/1 to indicate win/tie/loss. + +namespace open_spiel { +namespace dots_and_boxes { + +// Constants. +inline constexpr int kNumPlayers = 2; +inline constexpr int kDefaultNumRows = 2; +inline constexpr int kDefaultNumCols = 2; +inline constexpr int kMaskSize = 10; +inline constexpr int kMask = (1 << kMaskSize) - 1; +inline constexpr bool kDefaultUtilityMargin = false; + +// State of a cell. +enum class CellState { + kEmpty, // Not set + kPlayer1, // Set by player 1 + kPlayer2, // Set by player 2 + kSet // Set by default start state +}; + +enum class CellOrientation { + kHorizontal, // = 0 + kVertical, // = 1 +}; + +class Move { + public: + Move(void); + Move(int row, int col, CellOrientation orientation, int rows, int cols); + explicit Move(Action action, int rows, int cols); + + void SetRowsCols(int rows, int cols) { + num_rows_ = rows; + num_cols_ = cols; + } + void Set(int row, int col, CellOrientation orientation); + int GetRow() const; + int GetCol() const; + CellOrientation GetOrientation() const; + + Action ActionId(); + int GetCell(); + int GetCellLeft(); + int GetCellRight(); + int GetCellAbove(); + int GetCellBelow(); + int GetCellAboveLeft(); + int GetCellAboveRight(); + int GetCellBelowLeft(); + int GetCellBelowRight(); + + protected: + int row_; + int col_; + CellOrientation orientation_; + int num_rows_; + int num_cols_; +}; + +// State of an in-play game. +class DotsAndBoxesState : public State { + public: + DotsAndBoxesState(std::shared_ptr game, int num_rows, + int num_cols, bool utility_margin); + DotsAndBoxesState(std::shared_ptr game, int num_rows, + int num_cols, bool utility_margin, const std::string& dbn); + DotsAndBoxesState(const DotsAndBoxesState&) = default; + DotsAndBoxesState& operator=(const DotsAndBoxesState&) = default; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : current_player_; + } + std::string DbnString() const; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action action) override; + std::vector LegalActions() const override; + Player outcome() const { return outcome_; } + + std::string StateToStringV(CellState state, int row, int col) const; + std::string StateToStringH(CellState state, int row, int col) const; + std::string StateToStringP(CellState state, int row, int col) const; + + void SetCurrentPlayer(Player player) { current_player_ = player; } + + protected: + std::vector v_; // Who set the vertical line + std::vector h_; // Who set the horizontal line + std::vector p_; // Who won the cell + void DoApplyAction(Action action) override; + + private: + bool Wins(Player player) const; + bool IsFull() const; + Player current_player_ = 0; // Player zero goes first + Player outcome_ = kInvalidPlayer; + int num_moves_ = 0; + const int num_rows_; + const int num_cols_; + const int num_cells_; + std::array points_; + const bool utility_margin_; +}; + +// Game object. +class DotsAndBoxesGame : public Game { + public: + explicit DotsAndBoxesGame(const GameParameters& params); + int NumDistinctActions() const override { + return (num_rows_ + 1) * num_cols_ + num_rows_ * (num_cols_ + 1); + } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new DotsAndBoxesState( + shared_from_this(), num_rows_, num_cols_, utility_margin_)); + } + std::unique_ptr NewInitialState( + const std::string& str) const override { + return std::make_unique(shared_from_this(), num_rows_, + num_cols_, utility_margin_, str); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override; + absl::optional UtilitySum() const override; + double MaxUtility() const override; + std::vector ObservationTensorShape() const override { + return {3, num_cells_, 3}; + } + int MaxGameLength() const override { + return (num_rows_ + 1) * num_cols_ + num_cols_ * (num_rows_ + 1); + } + + private: + const int num_rows_; + const int num_cols_; + const int num_cells_; + const bool utility_margin_; +}; + +// CellState PlayerToState(Player player); +std::string StateToString(CellState state); +std::string OrientationToString(CellOrientation orientation); + +inline std::ostream& operator<<(std::ostream& stream, const CellState& state) { + return stream << StateToString(state); +} + +} // namespace dots_and_boxes +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_DOTS_AND_BOXES_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dots_and_boxes/dots_and_boxes_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/dots_and_boxes/dots_and_boxes_test.cc new file mode 100644 index 0000000..2accffa --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dots_and_boxes/dots_and_boxes_test.cc @@ -0,0 +1,42 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Contributed by Wannes Meert, Giuseppe Marra, and Pieter Robberechts +// for the KU Leuven course Machine Learning: Project. + +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace dots_and_boxes { +namespace { + +namespace testing = open_spiel::testing; + +void BasicDotsAndBoxesTests() { + std::cout << "Test dots and boxes\n"; + testing::LoadGameTest("dots_and_boxes"); + testing::NoChanceOutcomesTest(*LoadGame("dots_and_boxes")); + testing::RandomSimTest(*LoadGame("dots_and_boxes"), 100); +} + +} // namespace +} // namespace dots_and_boxes +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::dots_and_boxes::BasicDotsAndBoxesTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu.cc b/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu.cc new file mode 100644 index 0000000..03ebb0f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu.cc @@ -0,0 +1,471 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/dou_dizhu/dou_dizhu.h" + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/games/dou_dizhu/dou_dizhu_utils.h" + +namespace open_spiel { +namespace dou_dizhu { +namespace { + +const GameType kGameType{/*short_name=*/"dou_dizhu", + /*long_name=*/"Dou Dizhu", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/kNumPlayers, + /*min_num_players=*/kNumPlayers, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new DouDizhuGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +DouDizhuGame::DouDizhuGame(const GameParameters& params) + : Game(kGameType, params) {} + +DouDizhuState::DouDizhuState(std::shared_ptr game) : State(game) { + absl::c_fill(dealer_deck_, 1); +} + +std::string DouDizhuState::ActionToString(Player player, Action action) const { + if (player == kChancePlayerId) { + if (action < kDealingActionBase) { + return absl::StrCat("Decide first card up position ", action); + } else if (action < kDealingActionBase + kNumCards) { + return absl::StrCat("Deal ", CardString(action - kDealingActionBase)); + } else { + SpielFatalError( + absl::StrFormat("Non valid ID %d for chance player", action)); + } + } + + if (action == kPass) { + return "Pass"; + } else if (action > kPass && action < kPlayActionBase) { + return absl::StrCat("Bid ", action - kBiddingActionBase); + } else if (action >= kPlayActionBase && action <= kRocketActionBase) { + // For aiplane combinations, need special treatment to resolve ambiguity + if (action >= kAirplaneWithSoloActionBase && action < kBombActionBase) { + return FormatAirplaneCombHand(action); + } + return FormatSingleHand(ActionToHand(action)); + } else { + SpielFatalError("Non valid action ID!"); + } +} + +std::string DouDizhuState::ToString() const { + std::string rv = FormatDeal(); + + if (history_.size() > kNumCards - kNumCardsLeftOver + 1) + absl::StrAppend(&rv, FormatAuction()); + + if (num_played_ > 0) absl::StrAppend(&rv, FormatPlay()); + if (IsTerminal()) absl::StrAppend(&rv, FormatResult()); + + return rv; +} + +std::string DouDizhuState::FormatAuction() const { + SPIEL_CHECK_GT(history_.size(), kNumCards - kNumCardsLeftOver + 1); + std::string rv = "Bidding phase begin\n"; + for (int i = kNumCards - kNumCardsLeftOver + 1; + i < history_.size() - num_played_; ++i) { + absl::StrAppend( + &rv, absl::StrFormat( + "Player %d played %s\n", history_[i].player, + ActionToString(history_[i].player, history_[i].action))); + } + return rv; +} + +std::string DouDizhuState::FormatPlay() const { + SPIEL_CHECK_GT(num_played_, 0); + std::string rv = "Playing phase begin \n"; + for (int i = history_.size() - num_played_; i < history_.size(); ++i) { + absl::StrAppend( + &rv, absl::StrFormat( + "Player %d played %s\n", history_[i].player, + ActionToString(history_[i].player, history_[i].action))); + } + return rv; +} + +std::string DouDizhuState::FormatResult() const { + std::string rv = "The results are: \n"; + for (int player = 0; player < kNumPlayers; ++player) { + absl::StrAppend( + &rv, absl::StrFormat("Player %d got %f\n", player, returns_[player])); + } + return rv; +} + +std::array FormatHand( + int player, bool mark_voids, + const std::array, kNumPlayers>& deal) { + std::array cards{}; + for (int rank = 0; rank < kNumRanks - 2; ++rank) { + bool is_void = true; + for (int i = 0; i < deal[player][rank]; ++i) { + cards[rank].push_back(kRankChar[rank]); + is_void = false; + } + if (is_void && mark_voids) absl::StrAppend(&cards[rank], "none"); + } + if (deal[player][kNumRanks - 2]) + absl::StrAppend(&cards[kNumRanks - 2], "(BWJ)"); + else if (mark_voids) + absl::StrAppend(&cards[kNumRanks - 2], "none"); + + if (deal[player][kNumRanks - 1]) + absl::StrAppend(&cards[kNumRanks - 1], "(CJ)"); + else if (mark_voids) + absl::StrAppend(&cards[kNumRanks - 1], "none"); + + return cards; +} + +std::array, kNumPlayers> +DouDizhuState::OriginalDeal() const { + SPIEL_CHECK_GE(history_.size(), kNumCards + 1); + std::array, kNumPlayers> deal{}; + for (int i = 1; i < kNumCards - kNumCardsLeftOver + 1; ++i) + deal[((i - 1 + first_player_) % kNumPlayers)] + [CardToRank(history_[i].action - kDealingActionBase)]++; + + for (int i = 0; i < kNumCardsLeftOver; ++i) + deal[dizhu_][cards_left_over_[i]]++; + return deal; +} + +std::string DouDizhuState::FormatDeal() const { + std::array, kNumPlayers> cards{}; + if (IsTerminal()) { + // Include all cards in the terminal state to make reviewing the deal easier + auto deal = OriginalDeal(); + for (int player = 0; player < kNumPlayers; ++player) { + cards[player] = FormatHand(player, /*mark_voids=*/false, deal); + } + } else { + for (int player = 0; player < kNumPlayers; ++player) { + cards[player] = FormatHand(player, /*mark_voids=*/false, holds_); + } + } + constexpr int kColumnWidth = 8; + std::string padding(kColumnWidth, ' '); + std::string rv; + for (int rank = 0; rank < kNumRanks; ++rank) + absl::StrAppend(&rv, absl::StrFormat("%-8s", cards[1][rank]), padding, + cards[2][rank], "\n"); + for (int rank = 0; rank < kNumRanks; ++rank) + absl::StrAppend(&rv, padding, cards[0][rank], "\n"); + return rv; +} + +std::string DouDizhuState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + std::string rv = + absl::StrFormat("My hand %s\n", FormatSingleHand(holds_[player])); + absl::StrAppend(&rv, absl::StrFormat("Played cards %s\n", + FormatSingleHand(played_deck_))); + absl::StrAppend(&rv, + absl::StrFormat("face up card rank: %d", card_rank_face_up_)); + absl::StrAppend(&rv, absl::StrFormat("start player: %d", first_player_)); + absl::StrAppend( + &rv, absl::StrFormat("My position from Dizhu: %d", + (player - dizhu_ + kNumPlayers) % kNumPlayers)); + return rv; +} + +void DouDizhuState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_EQ(values.size(), game_->ObservationTensorSize()); + WriteObservationTensor(player, values); +} + +void DouDizhuState::WriteObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + absl::c_fill(values, 0.); + if (phase_ == Phase::kDeal) return; + auto values_iterator = values.begin(); + const int played_deck_base = (kNumRanks - 2) * (kNumSuits + 1) + 2 * 2; + for (int i = 0; i < kNumRanks; ++i) { + values_iterator[i * (kNumSuits + 1) + holds_[player][i]] = 1; + values_iterator[played_deck_base + i * (kNumSuits + 1) + played_deck_[i]] = + 1; + } + + if (dizhu_ != kInvalidPlayer) { + const int from_dizhu_base = 2 * played_deck_base; + const int from_dizhu = (player - dizhu_ + kNumPlayers) % kNumPlayers; + values_iterator[from_dizhu_base + from_dizhu] = 1; + } + + if (first_player_ != kInvalidPlayer) { + const int start_player_base = 2 * played_deck_base + kNumPlayers; + values_iterator[start_player_base + first_player_] = 1; + values_iterator[start_player_base + kNumPlayers + card_rank_face_up_] = 1; + } +} + +std::vector DouDizhuState::LegalActions() const { + switch (phase_) { + case Phase::kDeal: + return DealLegalActions(); + case Phase::kAuction: + return BiddingLegalActions(); + case Phase::kPlay: + return PlayLegalActions(); + default: + return {}; + } +} + +std::vector DouDizhuState::DealLegalActions() const { + std::vector legal_actions; + legal_actions.reserve(kNumCards - history_.size() + 1); + + if (card_face_up_position_ == -1) { + for (int i = 0; i < kDealingActionBase; ++i) legal_actions.push_back(i); + } else { + for (int i = 0; i < kNumCards; ++i) { + if (dealer_deck_[i]) legal_actions.push_back(i + kDealingActionBase); + } + } + + return legal_actions; +} + +std::vector DouDizhuState::BiddingLegalActions() const { + std::vector legal_actions = {kPass}; + legal_actions.reserve(kNumBids + 1); + + for (int bid = winning_bid_ + 1; bid <= kNumBids; ++bid) { + legal_actions.push_back(kBiddingActionBase + bid); + } + return legal_actions; +} + +std::vector DouDizhuState::PlayLegalActions() const { + std::vector legal_actions; + // the leader of a trick must play./ an action and cannot pass + if (!new_trick_begin_) legal_actions.push_back(kPass); + + std::array hand = holds_[current_player_]; + const int prev_action = CurrentTrick().WinningAction(); + SearchForLegalActions(&legal_actions, hand, prev_action); + + absl::c_sort(legal_actions); + return legal_actions; +} + +std::vector> DouDizhuState::ChanceOutcomes() const { + std::vector> outcomes; + int num_cards_remaining = 0; + for (int i = 0; i < kNumCards; ++i) num_cards_remaining += dealer_deck_[i]; + outcomes.reserve(num_cards_remaining); + + if (card_face_up_position_ == -1) { + for (int i = 0; i < kDealingActionBase; ++i) + outcomes.emplace_back(i, 1.0 / static_cast(kDealingActionBase)); + } else { + for (int card = 0; card < kNumCards; ++card) + if (dealer_deck_[card]) + outcomes.emplace_back(card + kDealingActionBase, + 1.0 / static_cast(num_cards_remaining)); + } + + return outcomes; +} + +void DouDizhuState::DoApplyAction(Action action) { + switch (phase_) { + case Phase::kDeal: + return ApplyDealAction(action); + case Phase::kAuction: + return ApplyBiddingAction(action); + case Phase::kPlay: + return ApplyPlayAction(action); + case Phase::kGameOver: + SpielFatalError("Cannot act in terminal states"); + } +} + +void DouDizhuState::ApplyDealAction(int action) { + // First decide the face up card + if (card_face_up_position_ == -1) { + card_face_up_position_ = action; + return; + } + + const int dealing_round = static_cast(history_.size()) - 1; + // if the current player is dealt the face up card, make it the first one to + // bid + if (dealing_round == history_[0].action) { + first_player_ = dealing_round % kNumPlayers; + card_rank_face_up_ = CardToRank(action - kDealingActionBase); + } + const int dealt_player_idx = ((history_.size() - 1) % kNumPlayers); + const int dealt_rank = CardToRank(action - kDealingActionBase); + holds_[dealt_player_idx][dealt_rank]++; + dealer_deck_[action - kDealingActionBase]--; + if (history_.size() == kNumCards - kNumCardsLeftOver) { + phase_ = Phase::kAuction; + current_player_ = first_player_; + SPIEL_CHECK_GE(current_player_, 0); + SPIEL_CHECK_LE(current_player_, num_players_); + for (int card = 0; card < kNumCards; ++card) + if (dealer_deck_[card]) { + cards_left_over_.push_back(CardToRank(card)); + } + } +} + +void DouDizhuState::ApplyBiddingAction(int action) { + // Track the number of consecutive passes since the last bid (if any). + if (action == kPass) { + ++num_passes_; + } else { + num_passes_ = 0; + } + + bool has_winner = false; + + if (action == kPass) { + if (num_passes_ == kNumPlayers) + phase_ = Phase::kGameOver; + else if (num_passes_ == kNumPlayers - 1 && winning_bid_ > 0) + has_winner = true; + } else { + dizhu_ = current_player_; + winning_bid_ = action - kBiddingActionBase; + if (winning_bid_ == kNumBids) has_winner = true; + } + if (has_winner) { + for (int i = 0; i < kNumCardsLeftOver; ++i) + holds_[dizhu_][cards_left_over_[i]]++; + phase_ = Phase::kPlay; + current_player_ = dizhu_; + new_trick_begin_ = true; + tricks_.push_back(Trick(dizhu_, kInvalidAction)); + num_passes_ = 0; + } else { + current_player_ = (current_player_ + 1) % kNumPlayers; + } +} + +bool DouDizhuState::AfterPlayHand(int player, int action) { + std::array used_hand = ActionToHand(action); + bool flag = true; + for (int rank = 0; rank < kNumRanks; ++rank) { + SPIEL_CHECK_GE(holds_[player][rank], used_hand[rank]); + holds_[player][rank] -= used_hand[rank]; + flag &= !holds_[player][rank]; + played_deck_[rank] += used_hand[rank]; + } + return flag; +} + +void DouDizhuState::ApplyPlayAction(int action) { + num_played_++; + + if (action == kPass) { + ++num_passes_; + } else { + num_passes_ = 0; + } + + if (action == kPass) { + if (num_passes_ == kNumPlayers - 1) { + current_player_ = CurrentTrick().Winner(); + trick_played_++; + num_passes_ = 0; + tricks_.push_back(Trick()); + new_trick_begin_ = true; + return; + } + } else { + if (action >= kBombActionBase) bombs_played_++; + players_hands_played[current_player_]++; + + if (new_trick_begin_) new_trick_begin_ = false; + + CurrentTrick().Play(current_player_, action); + + bool all_played = AfterPlayHand(current_player_, action); + if (all_played) { + final_winner_ = current_player_; + ScoreUp(); + phase_ = Phase::kGameOver; + return; + } + } + current_player_ = (current_player_ + 1) % kNumPlayers; +} + +Player DouDizhuState::CurrentPlayer() const { + if (phase_ == Phase::kDeal) { + return kChancePlayerId; + } else if (phase_ == Phase::kGameOver) { + return kTerminalPlayerId; + } else { + return current_player_; + } +} + +void DouDizhuState::ScoreUp() { + // If no one bids, 0 for everyone + if (dizhu_ == kInvalidPlayer) return; + + // if none of the farmers played, or the dizhu only played once + // then it is spring! + bool is_spring = false; + is_spring |= (players_hands_played[dizhu_] == 1); + is_spring |= ((!players_hands_played[(dizhu_ + 1) % 3]) && + (!players_hands_played[(dizhu_ + 2) % 3])); + + int paying = winning_bid_; + for (int i = 0; i < is_spring + bombs_played_; ++i) paying *= 2; + const int dizhu_sign = (final_winner_ == dizhu_) ? 1 : -1; + + returns_[dizhu_] = dizhu_sign * 2 * paying; + returns_[(dizhu_ + 1) % 3] = -dizhu_sign * paying; + returns_[(dizhu_ + 2) % 3] = -dizhu_sign * paying; +} + +Trick::Trick(Player leader, int action) + : winning_action_(action), leader_(leader), winning_player_(leader) {} + +} // namespace dou_dizhu +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu.h b/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu.h new file mode 100644 index 0000000..17ad959 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu.h @@ -0,0 +1,187 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_DOU_DIZHU_H_ +#define OPEN_SPIEL_GAMES_DOU_DIZHU_H_ + +// The game of dou dizhu (the three-player version). +// For a general description of the rules, see +// https://en.wikipedia.org/wiki/Dou_dizhu It uses a standard 54-card deck +// (including two Jokers). The game starts by randomly picking one card face up, +// which is then inserted into the shuffled deck. Then each player is dealt 17 +// cards. Then the bidding phase starts. The player who got the face-up card +// becomes the first one to bid. Bidding round ends if (1) no one bids (2) two +// consecutive passes (3) maximum bid 3 was bidded. The one who wins the bidding +// phase becomes dizhu (landlord). Dizhu get the remaining 3 cards. The other +// players are called peasants. Starting with dizhu, the playing phase +// consisting of multiple tricks. The leader of a trick can play several +// allowable categories of hands. The players during a trick can only pass or +// play hands of the same pattern of higher rank. A player becomes the winner of +// a trick if the other two players passes. And then it becomes the leader of +// the next trick. In this game, suits DO NOT MATTER. +// +// The allowable categories of hands: +// Solo: a single card +// SoloChain: >=5 consecutive cards in rank, e.g., 34567 +// Pair: a pair of card with the same rank +// PairChain: >= 3 consecutive pairs. e.g., 334455 +// Trio: three of a rank. e.g., 444 +// TrioWithSolo: a trio + a single hand. e.g., 3334 +// Trio With Pair: a trio + a pair. e.g., 33344 +// Airplane (TrioChain). >=2 consecutive trio. e.g., 333-444 +// Airplane+solo. airplane where each trio carries a solo. e.g., 333-444-5-6 +// Airplane+pair. airplane where each trio carries a pair. e.g., 333-444-55-66 +// Bomb. Four of a rank. e.g., 4444 +// Rocket. Two jokers +// +// Some other rules: +// The order for solo card is: ColoredJoker>BlackWhiteJoker>2>A>K>Q>....>4>3 +// For combination hands, the primal part determines the order. +// e.g. the primal part of 333-444-5-6 is 333-444 +// 2s and Jokers cannot be in a chain. +// Rocket dominates all other hands. +// A bomb dominates all other hands except rocket or bombs of higher rank. +// Bomb/rocket cannot appear in an airplane combination +// E.g., 333-444-555-666-7777 is prohibited. +// But in this implementation any pair and any trio can be kickers +// For more, see https://rezunli96.github.io/blog/doudizhu_count.html +// +// A game ends if a player has played all their cards. +// The winning bid determines the initial stake. +// Each bomb played doubles the stake. +// And if (1) both peasants do not play any cards +// (2) dizhu does not play any cards after its first hand, then it's called +// spring. And the stake is also doubled. + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/games/dou_dizhu/dou_dizhu_utils.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace dou_dizhu { + +class Trick { + public: + Trick() : Trick(kInvalidPlayer, kInvalidAction) {} + Trick(Player leader, int action); + // winning_player_ is the current winner of the trick + void Play(Player player, int action) { + winning_player_ = player; + winning_action_ = action; + } + int WinningAction() const { return winning_action_; } + Player Winner() const { return winning_player_; } + Player Leader() const { return leader_; } + + private: + int winning_action_; + const Player leader_; + Player winning_player_; +}; + +class DouDizhuState : public State { + public: + DouDizhuState(std::shared_ptr game); + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + bool IsTerminal() const override { return phase_ == Phase::kGameOver; } + std::vector Returns() const override { return returns_; } + std::string ObservationString(Player player) const override; + void WriteObservationTensor(Player player, absl::Span values) const; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override { + return absl::make_unique(*this); + } + std::vector LegalActions() const override; + std::vector> ChanceOutcomes() const override; + // Current phase. + int CurrentPhase() const { return static_cast(phase_); } + + protected: + void DoApplyAction(Action action) override; + + private: + std::vector DealLegalActions() const; + std::vector BiddingLegalActions() const; + std::vector PlayLegalActions() const; + void ApplyDealAction(int action); + void ApplyBiddingAction(int action); + void ApplyPlayAction(int action); + void ScoreUp(); + + bool AfterPlayHand(int player, int action); + Trick& CurrentTrick() { return tricks_[trick_played_]; } + const Trick& CurrentTrick() const { return tricks_[trick_played_]; } + // Recording each player got how many cards for each rank + std::array, kNumPlayers> OriginalDeal() const; + + std::string FormatDeal() const; + std::string FormatAuction() const; + std::string FormatPlay() const; + std::string FormatResult() const; + // the ranks of the cards left over after dealing phase + std::vector cards_left_over_; + + int num_passes_ = 0; // Number of consecutive passes since the last non-pass. + int winning_bid_ = 0; + int trick_played_ = 0; + int num_played_ = 0; // number of plays during playing phase + int card_face_up_position_ = -1; + int card_rank_face_up_ = kInvalidAction; + bool new_trick_begin_ = false; + Player current_player_ = kInvalidPlayer; + Player first_player_ = kInvalidPlayer; + Player dizhu_ = kInvalidPlayer; + Player final_winner_ = kInvalidPlayer; + Phase phase_ = Phase::kDeal; + + std::array dealer_deck_{}; + std::array played_deck_{}; + std::vector tricks_{}; + // for score computation + int bombs_played_ = 0; + std::array players_hands_played{}; + + std::vector returns_ = std::vector(kNumPlayers); + // recording the current hands of players + std::array, kNumPlayers> holds_{}; +}; + +class DouDizhuGame : public Game { + public: + explicit DouDizhuGame(const GameParameters& params); + int NumDistinctActions() const override { return kRocketActionBase + 1; } + int MaxChanceOutcomes() const override { + return kDealingActionBase + kNumCards; + } + std::unique_ptr NewInitialState() const override { + return absl::make_unique(shared_from_this()); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return kMinUtility; } + double MaxUtility() const override { return kMaxUtility; } + absl::optional UtilitySum() const override { return 0; } + std::vector ObservationTensorShape() const override { + return {kObservationTensorSize}; + } + int MaxGameLength() const override { + return kMaxAuctionLength + kNumCards * kNumPlayers; + } +}; +} // namespace dou_dizhu +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_DOU_DIZHU_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu_test.cc new file mode 100644 index 0000000..a8b9332 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu_test.cc @@ -0,0 +1,35 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/dou_dizhu/dou_dizhu.h" + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace dou_dizhu { +namespace { + +void BasicGameTests() { + testing::LoadGameTest("dou_dizhu"); + testing::RandomSimTest(*LoadGame("dou_dizhu"), 20); +} + +} // namespace +} // namespace dou_dizhu +} // namespace open_spiel + +int main() { + open_spiel::dou_dizhu::BasicGameTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu_utils.cc b/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu_utils.cc new file mode 100644 index 0000000..0f07302 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu_utils.cc @@ -0,0 +1,928 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/dou_dizhu/dou_dizhu_utils.h" + +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" + +namespace open_spiel { +namespace dou_dizhu { + +// dropping suit information +int CardToRank(int card) { + if (card == kNumCards - 2 || card == kNumCards - 1) { + return card - kNumCards + kNumRanks; + } + return card % (kNumRanks - 2); +} + +int CardToSuit(int card) { + if (card == kNumCards - 2 || card == kNumCards - 1) { + SpielFatalError("No Suit defined for Jokers"); + } + return card / (kNumRanks - 2); +} + +std::string RankString(int rank) { + if (rank < kNumRanks - 2) + return std::string(1, kRankChar[rank]); + else if (rank == kNumRanks - 2) + return "(BWJ)"; + else if (rank == kNumRanks - 1) + return "(CJ)"; + else + SpielFatalError("Non valid rank"); +} + +std::string CardString(int card) { + int rank = CardToRank(card); + if (rank >= kNumRanks - 2) { + return RankString(rank); + } else { + int suit = CardToSuit(card); + SPIEL_CHECK_GE(suit, 0); + SPIEL_CHECK_LT(suit, kNumSuits); + return absl::StrFormat("%c%c", kSuitChar[suit], kRankChar[rank]); + } +} + +std::string FormatSingleHand(absl::Span hand) { + std::string hand_format; + for (int rank = 0; rank < kNumRanks; ++rank) { + for (int i = 0; i < hand[rank]; ++i) + absl::StrAppend(&hand_format, RankString(rank)); + } + return hand_format; +} + +// resolve ambiguity for cases like 333444555666 +std::string FormatAirplaneCombHand(int action) { + TrioCombParams params = GetAirplaneCombParams(action); + std::array hand = ActionToHand(action); + std::string airplane_comb_str; + // specify which is chain + for (int rank = params.chain_head; + rank < params.chain_head + params.chain_length; ++rank) { + for (int i = 0; i < 3; ++i) + absl::StrAppend(&airplane_comb_str, RankString(rank)); + } + absl::StrAppend(&airplane_comb_str, "-"); + // kickers + for (int rank = 0; rank < kNumRanks; ++rank) { + if (rank >= params.chain_head && + rank < params.chain_head + params.chain_length) + continue; + if (!hand[rank]) continue; + for (int i = 0; i < hand[rank]; ++i) + absl::StrAppend(&airplane_comb_str, RankString(rank)); + } + return airplane_comb_str; +} + +// Shared by single-rank and chain-only hands +int GetNumCardsPerRank(int action) { + int num_cards; + if (action >= kPlayActionBase && action < kPairActionBase) { + num_cards = 1; + } else if (action >= kPairActionBase && action < kTrioActionBase) { + num_cards = 2; + } else if ((action >= kTrioActionBase && action < kTrioWithSoloActionBase) || + (action >= kAirplaneActionBase && + action < kAirplaneWithSoloActionBase)) { + num_cards = 3; + } else if (action >= kBombActionBase && action < kRocketActionBase) { + num_cards = 4; + } else { + SpielFatalError("Invalid action ID"); + } + + return num_cards; +} + +int GetSingleRankActionBase(int num_cards_same_rank = 1) { + int action_base; + switch (num_cards_same_rank) { + case 1: + action_base = kPlayActionBase; + break; + case 2: + action_base = kPairActionBase; + break; + case 3: + action_base = kTrioActionBase; + break; + case 4: + action_base = kBombActionBase; + break; + default: + SpielFatalError( + "The number of cards of the same rank is wrong (single rank)."); + } + return action_base; +} + +SingleRankHandParams GetSingleRankHandParams(int action) { + const int num_cards = GetNumCardsPerRank(action); + const int action_base = GetSingleRankActionBase(num_cards); + SPIEL_CHECK_GE(action, action_base); + return SingleRankHandParams(action - action_base, num_cards); +} + +std::array SingleRankHand(int action) { + std::array hand{}; + SingleRankHandParams params = GetSingleRankHandParams(action); + hand[params.rank] = params.num_cards; + return hand; +} + +// given a single-rank hand, map it to action id +int SingleRankHandToActionId(absl::Span hand) { + int the_rank; + int counter = 0; + + for (int rank = 0; rank < kNumRanks; ++rank) { + if (hand[rank] != 0) { + the_rank = rank; + counter++; + } + } + SPIEL_CHECK_EQ(counter, 1); + const int num_cards_same_rank = hand[the_rank]; + int action = GetSingleRankActionBase(num_cards_same_rank); + action += the_rank; + return action; +} + +// given an arbitrary hand, search for possible single-rank hands +// if prev_action = kInvalidAction, search for all possible such hands +// otherwise, only search for those that are ranked higher than prev_action +void SearchSingleRankActions(std::vector* actions, + absl::Span hand, + int prev_action = kInvalidAction) { + std::array used_hands{}; + SingleRankHandParams prev_action_params; + int start_rank; + if (prev_action == kInvalidAction) { + start_rank = 0; + } else { + prev_action_params = GetSingleRankHandParams(prev_action); + start_rank = prev_action_params.rank + 1; + } + for (int rank = start_rank; rank < kNumRanks; ++rank) { + SPIEL_CHECK_LE(hand[rank], kNumSuits); + SPIEL_CHECK_GE(hand[rank], 0); + if (rank == kNumRanks - 2 || rank == kNumRanks - 1) + SPIEL_CHECK_LE(hand[rank], 1); + if (prev_action == kInvalidAction) { + for (int i = 0; i < hand[rank]; ++i) { + used_hands[rank]++; + actions->push_back(SingleRankHandToActionId(used_hands)); + } + } else if (hand[rank] >= prev_action_params.num_cards) { + used_hands[rank] = prev_action_params.num_cards; + actions->push_back(SingleRankHandToActionId(used_hands)); + } + used_hands[rank] = 0; + } +} + +int GetChainOnlyActionBase(int num_cards_same_rank = 1) { + int action_base; + switch (num_cards_same_rank) { + case 1: + action_base = kSoloChainActionBase; + break; + case 2: + action_base = kPairChainActionBase; + break; + case 3: + action_base = kAirplaneActionBase; + break; + default: + SpielFatalError("The number of cards of the same rank is wrong (chain)."); + } + return action_base; +} + +int GetChainOnlyMinLength(int num_cards_same_rank = 1) { + int chain_length; + switch (num_cards_same_rank) { + case 1: + chain_length = kSoloChainMinLength; + break; + case 2: + chain_length = kPairChainMinLength; + break; + case 3: + chain_length = kAirplaneMinLength; + break; + default: + SpielFatalError("The number of cards of the same rank is wrong (chain)."); + } + return chain_length; +} + +ChainOnlyHandParams GetChainOnlyHandParams(int action) { + const int num_cards_same_rank = GetNumCardsPerRank(action); + const int action_base = GetChainOnlyActionBase(num_cards_same_rank); + const int min_length = GetChainOnlyMinLength(num_cards_same_rank); + SPIEL_CHECK_GE(action, action_base); + const int hand_id = action - action_base; + int chain_length = min_length; + int base = 0; + // we label the action Ids by increasing length of the chain + for (chain_length = min_length; chain_length <= kNumRanks; ++chain_length) { + int num_chains = kNumRanks - chain_length - 2; + if (base <= hand_id && hand_id < base + num_chains) break; + base += num_chains; + } + const int chain_head = hand_id - base; + return ChainOnlyHandParams(chain_head, num_cards_same_rank, chain_length); +} + +std::array ChainOnlyHand(int action) { + std::array hand{}; + ChainOnlyHandParams params = GetChainOnlyHandParams(action); + for (int i = 0; i < params.chain_length; ++i) { + hand[params.chain_head + i] = params.num_cards_per_rank; + } + return hand; +} + +int ChainOnlyHandToActionId(absl::Span hand) { + int chain_head = -1; + int chain_length = 0; + int chain_counter = 0; + int num_cards_same_rank = 0; + bool chain_stopped = true; + + if (hand[kNumRanks - 3] || hand[kNumRanks - 2] || hand[kNumRanks - 1]) + SpielFatalError("2s and Jokers cannot be in a chain"); + + for (int rank = 0; rank < kNumRanks - 3; ++rank) { + if (hand[rank] == 0) { + chain_stopped = true; + } else { + if (chain_stopped) { + chain_head = rank; + num_cards_same_rank = hand[rank]; + chain_length = 1; + chain_counter++; + chain_stopped = false; + } else if (hand[rank] != num_cards_same_rank) { + SpielFatalError("Invalid pattern"); + } else { + chain_length++; + } + } + } + + SPIEL_CHECK_EQ(chain_counter, 1); + const int min_length = GetChainOnlyMinLength(num_cards_same_rank); + const int action_base = GetChainOnlyActionBase(num_cards_same_rank); + + if (chain_length < min_length) + SpielFatalError(absl::StrFormat("The length of chain should be at least %d", + min_length)); + int action = action_base; + for (int length = min_length; length < chain_length; ++length) + action += kNumRanks - length - 2; + action += chain_head; + return action; +} + +void SearchChainOnlyActions(std::vector* actions, + absl::Span hand, + int prev_action = kInvalidAction) { + ChainOnlyHandParams prev_action_params; + + int start_rank; + if (prev_action == kInvalidAction) { + start_rank = 0; + } else { + prev_action_params = GetChainOnlyHandParams(prev_action); + start_rank = prev_action_params.chain_head + 1; + } + + for (int chain_head = start_rank; chain_head < kNumRanks - 4; ++chain_head) { + if (!hand[chain_head] || hand[chain_head] == kNumSuits) continue; + int num_cards = hand[chain_head]; + // 2-s and Jokers cannot be in chain + for (int chain_length = 2; chain_head + chain_length - 1 < kNumRanks - 3; + ++chain_length) { + int chain_tail = chain_head + chain_length - 1; + num_cards = std::min(num_cards, hand[chain_tail]); + if (!num_cards) break; + std::vector all_nums; + if (prev_action != kInvalidAction) { + if (num_cards < prev_action_params.num_cards_per_rank) break; + if (chain_length > prev_action_params.chain_length) break; + if (chain_length == prev_action_params.chain_length) { + all_nums.push_back(prev_action_params.num_cards_per_rank); + } + } else { + for (int n = 1; n <= num_cards; ++n) { + all_nums.push_back(n); + } + } + + for (auto n : all_nums) { + const int min_length = GetChainOnlyMinLength(n); + if (chain_length >= min_length) { + std::array used_rank{}; + for (int i = 0; i < chain_length; ++i) used_rank[chain_head + i] = n; + actions->push_back(ChainOnlyHandToActionId(used_rank)); + } + } + } + } +} + +int GetTrioCombActionBase(int action) { + int action_base; + if (kTrioWithSoloActionBase <= action && action < kTrioWithPairActionBase) { + action_base = kTrioWithSoloActionBase; + } else if (kTrioWithPairActionBase <= action && + action < kAirplaneActionBase) { + action_base = kTrioWithPairActionBase; + } else if (kAirplaneWithSoloActionBase <= action && + action < kAirplaneWithPairActionBase) { + action_base = kAirplaneWithSoloActionBase; + } else if (kAirplaneWithPairActionBase <= action && + action < kBombActionBase) { + action_base = kAirplaneWithPairActionBase; + } else { + SpielFatalError("Invalid action Ids"); + } + return action_base; +} + +KickerType GetTrioCombKickerType(int action) { + KickerType kicker_type; + if (kTrioWithSoloActionBase <= action && action < kTrioWithPairActionBase) { + kicker_type = kSolo; + } else if (kTrioWithPairActionBase <= action && + action < kAirplaneActionBase) { + kicker_type = kPair; + } else if (kAirplaneWithSoloActionBase <= action && + action < kAirplaneWithPairActionBase) { + kicker_type = kSolo; + } else if (kAirplaneWithPairActionBase <= action && + action < kBombActionBase) { + kicker_type = kPair; + } else { + SpielFatalError("Invalid action Ids"); + } + return kicker_type; +} + +// single trio comb includes trio+solo and trio+pair (excluding airplanes) +TrioCombParams GetSingleTrioCombParams(int action) { + if (action < kTrioWithSoloActionBase || action >= kAirplaneActionBase) + SpielFatalError("Must be single trio pattern"); + + const int action_base = GetTrioCombActionBase(action); + const KickerType kicker_type = GetTrioCombKickerType(action); + const int hand_id = (action - action_base); + const int num_kickers = kicker_type == kSolo ? kNumRanks - 1 : kNumRanks - 3; + const int head = hand_id / num_kickers; + const int kicker_steps = hand_id % num_kickers; + + return TrioCombParams(head, 1, kicker_type, kicker_steps); +} + +int GetNumKickersAirplaneSoloComb(int chain_length) { + int num_comb; + switch (chain_length) { + case 2: + num_comb = kNumKickersAirplaneSoloCombChainOfLengthTwo; + break; + + case 3: + num_comb = kNumKickersAirplaneSoloCombChainOfLengthThree; + break; + + case 4: + num_comb = kNumKickersAirplaneSoloCombChainOfLengthFour; + break; + + case 5: + num_comb = kNumKickersAirplaneSoloCombChainOfLengthFive; + break; + + default: + SpielFatalError("The chain length for aiplane+solo must be within 2-5"); + break; + } + return num_comb; +} + +int GetAirplaneSoloActionBase(int chain_length) { + int action_base; + switch (chain_length) { + case 2: + action_base = kAirplaneWithSoloActionBase; + break; + + case 3: + action_base = kAirplaneWithSoloActionBase + 968; + break; + + case 4: + action_base = kAirplaneWithSoloActionBase + 4268; + break; + + case 5: + action_base = kAirplaneWithSoloActionBase + 11612; + break; + + default: + SpielFatalError("The chain length for aiplane+solo must be within 2-5"); + break; + } + return action_base; +} + +int GetNumKickersAirplanePairComb(int chain_length) { + int num_comb; + switch (chain_length) { + case 2: + num_comb = kNumKickersAirplanePairCombChainOfLengthTwo; + break; + + case 3: + num_comb = kNumKickersAirplanePairCombChainOfLengthThree; + break; + + case 4: + num_comb = kNumKickersAirplanePairCombChainOfLengthFour; + break; + + default: + SpielFatalError("The chain length for aiplane+Pair must be within 2-4"); + break; + } + return num_comb; +} + +int GetAirplanePairActionBase(int chain_length) { + int action_base; + switch (chain_length) { + case 2: + action_base = kAirplaneWithPairActionBase; + break; + + case 3: + action_base = kAirplaneWithPairActionBase + 605; + break; + + case 4: + action_base = kAirplaneWithPairActionBase + 1805; + break; + default: + SpielFatalError("The chain length for aiplane+Pair must be within 2-4"); + break; + } + return action_base; +} + +TrioCombParams GetAirplaneCombParams(int action) { + if (action < kAirplaneWithSoloActionBase || action >= kBombActionBase) + SpielFatalError("Must be airplane pattern"); + + int action_base = kInvalidAction; + KickerType kicker_type; + + SPIEL_CHECK_GE(action, kAirplaneWithSoloActionBase); + SPIEL_CHECK_LT(action, kBombActionBase); + int start_length = 2, end_length, end_base; + + int (*GetActionBaseFunc)(int), (*GetKickersNumFunc)(int); + if (kAirplaneWithSoloActionBase <= action && + action < kAirplaneWithPairActionBase) { + kicker_type = kSolo; + GetActionBaseFunc = &GetAirplaneSoloActionBase; + GetKickersNumFunc = &GetNumKickersAirplaneSoloComb; + end_length = 5; + end_base = kAirplaneWithPairActionBase; + } else { + kicker_type = kPair; + GetActionBaseFunc = &GetAirplanePairActionBase; + GetKickersNumFunc = &GetNumKickersAirplanePairComb; + end_length = 4; + end_base = kBombActionBase; + } + int chain_length; + // label the action Ids in increasing length of chain + for (chain_length = start_length; chain_length <= end_length; + ++chain_length) { + int start_base = GetActionBaseFunc(chain_length); + int next_base = chain_length == end_length + ? end_base + : GetActionBaseFunc(chain_length + 1); + if (start_base <= action && action < next_base) { + action_base = start_base; + break; + } + } + const int hand_id = (action - action_base); + const int num_kickers = GetKickersNumFunc(chain_length); + const int chain_head = hand_id / num_kickers; + const int kicker_steps = hand_id % num_kickers; + SPIEL_CHECK_FALSE(action_base == kInvalidAction); + return TrioCombParams(chain_head, chain_length, kicker_type, kicker_steps); +} + +std::array SingleTrioCombHand(int action) { + std::array hand{}; + + TrioCombParams params = GetSingleTrioCombParams(action); + + hand[params.chain_head] = 3; + const int kicker_steps = params.kicker_id; + int kicker_rank, counter = 0; + + for (kicker_rank = 0; kicker_rank < kNumRanks; ++kicker_rank) { + // kicker cannot be the same rank as trio + if (kicker_rank == params.chain_head) continue; + if (counter++ == kicker_steps) break; + } + + hand[kicker_rank] = (params.kicker_type == kSolo ? 1 : 2); + return hand; +} + +int SingleTrioCombHandToActionId(absl::Span hand) { + int trio_rank, kicker_rank; + int trio_counter = 0, kicker_counter = 0; + for (int rank = 0; rank < kNumRanks; ++rank) { + if (hand[rank] == 3) { + trio_counter++; + trio_rank = rank; + } else if (hand[rank] == 1 || hand[rank] == 2) { + kicker_counter++; + kicker_rank = rank; + } else if (hand[rank] == 4) { + SpielFatalError("There cannot be a bomb"); + } + } + SPIEL_CHECK_EQ(trio_counter, 1); + SPIEL_CHECK_EQ(kicker_counter, 1); + + int action; + if (hand[kicker_rank] == 1) + action = kTrioWithSoloActionBase; + else + action = kTrioWithPairActionBase; + // one of the rank had already been taken by the trio + if (hand[kicker_rank] == 1) + action += trio_rank * (kNumRanks - 1); + else + action += trio_rank * (kNumRanks - 3); // the jokers cannot be the pair + int kicker_steps = 0; + for (int rank = 0; rank < kNumRanks; ++rank) { + if (rank == trio_rank) continue; + if (rank == kicker_rank) break; + kicker_steps++; + } + action += kicker_steps; + return action; +} + +void SearchSingleTrioCombActions(std::vector* actions, + absl::Span hand, + int prev_action = kInvalidAction) { + TrioCombParams prev_action_params; + int start_rank; + if (prev_action == kInvalidAction) { + start_rank = 0; + } else { + prev_action_params = GetSingleTrioCombParams(prev_action); + start_rank = prev_action_params.chain_head + 1; + } + // enumerate possible trio + for (int rank = start_rank; rank < kNumRanks - 2; ++rank) { + if (hand[rank] < 3) continue; + for (int kicker = 0; kicker < kNumRanks; ++kicker) { + if (!hand[kicker] || kicker == rank) continue; + std::vector all_kicker_types; + if (prev_action != kInvalidAction) { + if (hand[kicker] >= prev_action_params.kicker_type) + all_kicker_types.push_back(prev_action_params.kicker_type); + } else { + for (int i = 1; i <= std::min(hand[kicker], 2); ++i) + all_kicker_types.push_back(static_cast(i)); + } + for (auto n : all_kicker_types) { + std::array used_hand{}; + used_hand[rank] = 3; + used_hand[kicker] = static_cast(n); + actions->push_back(SingleTrioCombHandToActionId(used_hand)); + } + } + } +} + +// a dfs backtrack algorithm to compute action ids / hands for airplane +// combinations if target_count = -1, then the goal of this algorithm is to find +// the kicker_id of ans_hand, stored in count reference otherwise, the goal is +// to find a hand whose kicker_id is target_count and the result hand is stored +// in ans_hand +bool dfs_airplane_kicker(int chain_length, int depth, int target_count, + int& count, int max_search_rank, + absl::Span used_rank, absl::Span ans_hand, + KickerType kicker_type) { + if (chain_length == depth) { + if (target_count == -1) { + bool found = true; + for (int rank = 0; rank < kNumRanks; ++rank) + found = found & (used_rank[rank] == ans_hand[rank]); + if (found) return true; + } else if (target_count == count) { + for (int rank = 0; rank < kNumRanks; ++rank) + ans_hand[rank] = used_rank[rank]; + return true; + } + count++; + } else { + for (int rank = 0; rank <= max_search_rank; ++rank) { + SPIEL_CHECK_NE(used_rank[rank], kNumSuits); + if (used_rank[rank] == 3) continue; + if (kicker_type == kPair) { + SPIEL_CHECK_NE(used_rank[rank], 1); + if (used_rank[rank] == 2) continue; + } + if (rank == kNumRanks - 1 || rank == kNumRanks - 2) { + if (kicker_type == kPair) continue; + if (used_rank[rank]) continue; + // Rocket cannot be kickers + if (used_rank[2 * kNumRanks - 3 - rank]) continue; + } + used_rank[rank] += kicker_type == kSolo ? 1 : 2; + if (dfs_airplane_kicker(chain_length, depth + 1, target_count, count, + rank, used_rank, ans_hand, kicker_type)) + return true; + used_rank[rank] -= kicker_type == kSolo ? 1 : 2; + } + } + return false; +} + +std::array AirplaneCombHand(int action) { + std::array hand{}; + std::array used_rank{}; + SPIEL_CHECK_GE(action, kAirplaneWithSoloActionBase); + SPIEL_CHECK_LT(action, kBombActionBase); + TrioCombParams params = GetAirplaneCombParams(action); + for (int i = 0; i < params.chain_length; ++i) { + hand[params.chain_head + i] = used_rank[params.chain_head + i] = 3; + } + const int kicker_steps = params.kicker_id; + int count = 0; + bool found = dfs_airplane_kicker(params.chain_length, 0, kicker_steps, count, + kNumRanks - 1, absl::MakeSpan(used_rank), + absl::MakeSpan(hand), params.kicker_type); + SPIEL_CHECK_TRUE(found); + return hand; +} + +// for aiplane combination, we have to specify the chain head +// to resolve ambiguity such as 333444555666 +int AirplaneCombHandToActionId(absl::Span hand, int chain_head, + KickerType kicker_type) { + int chain_length = 0; + bool chain_begun = false; + std::vector kickers; + for (int rank = 0; rank < kNumRanks; ++rank) { + SPIEL_CHECK_LT(hand[rank], kNumSuits); + if (!hand[rank]) continue; + if (!chain_begun && rank != chain_head) { + if (kicker_type == kSolo) { + for (int i = 0; i < hand[rank]; ++i) { + kickers.push_back(rank); + } + } else { + SPIEL_CHECK_EQ(hand[rank], 2); + kickers.push_back(rank); + } + } else if (rank == chain_head) { + SPIEL_CHECK_EQ(hand[rank], 3); + chain_begun = true; + chain_length++; + } else if (chain_begun && hand[rank] == 3) { + chain_length++; + } else if (chain_begun && hand[rank] != 3) { + chain_begun = false; + if (kicker_type == kSolo) { + for (int i = 0; i < hand[rank]; ++i) kickers.push_back(rank); + } else { + SPIEL_CHECK_EQ(hand[rank], 2); + kickers.push_back(rank); + } + } + } + + // handle case where 333444555666 and chain_head=3 + // in this case, the above linear scan algorithm will view 3-4-5-6 as the + // chain where 6s should be the kickers + if (chain_length - 1 == static_cast(kickers.size()) + 3) { + chain_length--; + for (int i = 0; i < 3; ++i) kickers.push_back(chain_head + chain_length); + } + SPIEL_CHECK_EQ(chain_length, static_cast(kickers.size())); + + if (chain_head + chain_length - 1 >= kNumRanks - 3) + SpielFatalError("2s, Joker cannot be in a chain"); + int action_base; + if (kicker_type == kSolo) + action_base = GetAirplaneSoloActionBase(chain_length) + + chain_head * GetNumKickersAirplaneSoloComb(chain_length); + else + action_base = GetAirplanePairActionBase(chain_length) + + chain_head * GetNumKickersAirplanePairComb(chain_length); + + int count = 0; + std::array used_rank{}; + for (int i = 0; i < chain_length; ++i) used_rank[chain_head + i] = 3; + + std::array hand_copy{}; + for (int i = 0; i < kNumRanks; ++i) hand_copy[i] = hand[i]; + bool found = dfs_airplane_kicker(chain_length, 0, -1, count, kNumRanks - 1, + absl::MakeSpan(used_rank), + absl::MakeSpan(hand_copy), kicker_type); + SPIEL_CHECK_TRUE(found); + + return action_base + count; +} + +// a dfs backtrack algorithm that found the action ids of all possible airplane +// combination the action ids are stored in action_ids +void dfs_add_all_airplane_kickers(int chain_head, int chain_length, int depth, + int max_search_rank, + absl::Span used_rank, + absl::Span ans_hand, + std::vector* action_ids, + KickerType kicker_type) { + if (chain_length == depth) { + std::array final_hand{}; + for (int i = 0; i < kNumRanks; ++i) final_hand[i] = used_rank[i]; + action_ids->push_back(static_cast( + AirplaneCombHandToActionId(final_hand, chain_head, kicker_type))); + } else { + for (int rank = 0; rank <= max_search_rank; ++rank) { + if (rank >= chain_head && rank <= chain_head + chain_length - 1) continue; + SPIEL_CHECK_NE(used_rank[rank], kNumSuits); + if (used_rank[rank] == 3) continue; + if (kicker_type == kPair) { + SPIEL_CHECK_NE(used_rank[rank], 1); + if (used_rank[rank] == 2) continue; + } + if (rank == kNumRanks - 1 || rank == kNumRanks - 2) { + if (kicker_type == kPair) continue; + if (used_rank[rank]) continue; + if (used_rank[2 * kNumRanks - 3 - rank]) continue; + } + int num_use_cards = kicker_type == kSolo ? 1 : 2; + if (ans_hand[rank] < num_use_cards + used_rank[rank]) continue; + used_rank[rank] += num_use_cards; + dfs_add_all_airplane_kickers(chain_head, chain_length, depth + 1, rank, + used_rank, ans_hand, action_ids, + kicker_type); + used_rank[rank] -= num_use_cards; + } + } +} + +void SearchAirplaneCombActions(std::vector* actions, + absl::Span hand, + int prev_action = kInvalidAction) { + TrioCombParams prev_action_params; + int start_rank; + if (prev_action == kInvalidAction) { + start_rank = 0; + } else { + prev_action_params = GetAirplaneCombParams(prev_action); + start_rank = prev_action_params.chain_head + 1; + } + for (int chain_head = start_rank; chain_head < kNumRanks - 4; ++chain_head) { + if (hand[chain_head] < 3) continue; + int num_cards = hand[chain_head]; + for (int chain_length = 2; chain_head + chain_length - 1 < kNumRanks - 3; + ++chain_length) { + int chain_tail = chain_head + chain_length - 1; + num_cards = std::min(num_cards, hand[chain_tail]); + if (num_cards < 3) break; + std::vector all_kicker_types; + if (prev_action != kInvalidAction) { + if (chain_length > prev_action_params.chain_length) break; + if (chain_length == prev_action_params.chain_length) { + all_kicker_types.push_back(prev_action_params.kicker_type); + } + } else { + all_kicker_types.push_back(kSolo); + all_kicker_types.push_back(kPair); + } + for (auto kicker_type : all_kicker_types) { + std::array used_hand{}; + for (int i = 0; i < chain_length; ++i) used_hand[chain_head + i] = 3; + dfs_add_all_airplane_kickers(chain_head, chain_length, 0, kNumRanks - 1, + absl::MakeSpan(used_hand), + absl::MakeSpan(hand), actions, + kicker_type); + } + } + } +} + +std::array ActionToHand(int action) { + std::array hand{}; + if ((action >= kPlayActionBase && action < kSoloChainActionBase) || + (action >= kPairActionBase && action < kPairChainActionBase) || + (action >= kTrioActionBase && action < kTrioWithSoloActionBase) || + (action >= kBombActionBase && action < kRocketActionBase)) { + hand = SingleRankHand(action); + } else if ((action >= kSoloChainActionBase && action < kPairActionBase) || + (action >= kPairChainActionBase && action < kTrioActionBase) || + (action >= kAirplaneActionBase && + action < kAirplaneWithSoloActionBase)) { + hand = ChainOnlyHand(action); + } else if (action >= kTrioWithSoloActionBase && + action < kAirplaneActionBase) { + hand = SingleTrioCombHand(action); + } else if (action >= kAirplaneWithSoloActionBase && + action < kBombActionBase) { + hand = AirplaneCombHand(action); + } else if (action == kRocketActionBase) { + hand[kNumRanks - 1] = hand[kNumRanks - 2] = 1; + } else { + SpielFatalError("Non valid Action Ids"); + } + return hand; +} + +void SearchForLegalActions(std::vector* legal_actions, + absl::Span hand, int prev_action) { + if (hand[kNumRanks - 2] && hand[kNumRanks - 1]) + legal_actions->push_back(kRocketActionBase); + if (prev_action == kInvalidAction) { + // search for all possible actions + SearchSingleRankActions(legal_actions, hand, prev_action); + SearchChainOnlyActions(legal_actions, hand, prev_action); + SearchSingleTrioCombActions(legal_actions, hand, prev_action); + SearchAirplaneCombActions(legal_actions, hand, prev_action); + } else if (prev_action >= kBombActionBase && + prev_action < kRocketActionBase) { + // if previous action is a bomb, then only higher bomb or rocket can be + // played + SearchSingleRankActions(legal_actions, hand, prev_action); + } else { + // check for bombs + for (int rank = 0; rank < kNumRanks - 2; ++rank) { + if (hand[rank] == kNumSuits) { + std::array used_rank{}; + used_rank[rank] = kNumSuits; + legal_actions->push_back(SingleRankHandToActionId(used_rank)); + } + } + + // then search within each category + if ((prev_action >= kPlayActionBase && + prev_action < kSoloChainActionBase) || + (prev_action >= kPairActionBase && + prev_action < kPairChainActionBase) || + (prev_action >= kTrioActionBase && + prev_action < kTrioWithSoloActionBase)) { + SearchSingleRankActions(legal_actions, hand, prev_action); + } else if ((prev_action >= kSoloChainActionBase && + prev_action < kPairActionBase) || + (prev_action >= kPairChainActionBase && + prev_action < kTrioActionBase) || + (prev_action >= kAirplaneActionBase && + prev_action < kAirplaneWithSoloActionBase)) { + SearchChainOnlyActions(legal_actions, hand, prev_action); + } else if (prev_action >= kTrioWithSoloActionBase && + prev_action < kAirplaneActionBase) { + SearchSingleTrioCombActions(legal_actions, hand, prev_action); + } else if (prev_action >= kAirplaneWithSoloActionBase && + prev_action < kBombActionBase) { + SearchAirplaneCombActions(legal_actions, hand, prev_action); + } else if (prev_action == kRocketActionBase) { + } else { + SpielFatalError("Previous actions invalid"); + } + } +} + +} // namespace dou_dizhu +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu_utils.h b/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu_utils.h new file mode 100644 index 0000000..702f09e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu_utils.h @@ -0,0 +1,178 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_DOU_DIZHU_DOU_DIZHU_UTILS_H_ +#define OPEN_SPIEL_GAMES_DOU_DIZHU_DOU_DIZHU_UTILS_H_ + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace dou_dizhu { + +enum class Phase { kDeal, kAuction, kPlay, kGameOver }; + +inline constexpr int kNumPlayers = 3; +inline constexpr int kNumCards = 54; + +inline constexpr int kNumBids = 3; +inline constexpr int kNumCardsPerSuit = 13; + +// player 0, 1 passes, 2 bids 1, then 0 passes, 1 bids 2, 2 passes, 0 bids 3, 1 +// & 2 passes +inline constexpr int kMaxAuctionLength = 9; + +// the maximum/minimum utility is achieved if the players play all 13 bombs +// alternatively and dizhu bid maximum bids +inline constexpr int kMaxUtility = kNumBids * 16384; +inline constexpr int kMinUtility = -kNumBids * 8192; + +// 13 normal cards + 2 jokers +inline constexpr int kNumRanks = kNumCardsPerSuit + 2; + +inline constexpr int kNumCardsLeftOver = 3; + +inline constexpr int kNumSuits = 4; + +// Observations are: the number of cards of each rank I current have +// Plus the number of cards of each rank that had been played by all players +// Plus the start player +// Plus the face up card +inline constexpr int kObservationTensorSize = + 2 * ((kNumRanks - 2) * (kNumSuits + 1) + 2 * 2) + kNumPlayers + + kNumPlayers + kNumRanks; + +inline constexpr int kDealingActionBase = kNumCards - kNumCardsLeftOver; + +inline constexpr int kBiddingActionBase = 0; + +inline constexpr int kPass = kBiddingActionBase; + +inline constexpr int kPlayActionBase = kBiddingActionBase + 1 + kNumBids; + +inline constexpr int kSoloChainMinLength = 5; +inline constexpr int kSoloChainActionBase = kPlayActionBase + 15; + +inline constexpr int kPairActionBase = kSoloChainActionBase + 36; + +inline constexpr int kPairChainMinLength = 3; +inline constexpr int kPairChainActionBase = kPairActionBase + 13; + +inline constexpr int kTrioActionBase = kPairChainActionBase + 52; + +inline constexpr int kTrioWithSoloActionBase = kTrioActionBase + 13; + +inline constexpr int kTrioWithPairActionBase = kTrioWithSoloActionBase + 182; + +inline constexpr int kAirplaneMinLength = 2; +inline constexpr int kAirplaneActionBase = kTrioWithPairActionBase + 156; + +inline constexpr int kAirplaneWithSoloMinLength = 2; +inline constexpr int kAirplaneWithSoloActionBase = kAirplaneActionBase + 45; + +inline constexpr int kAirplaneWithPairMinLength = 2; +inline constexpr int kAirplaneWithPairActionBase = + kAirplaneWithSoloActionBase + 22588; + +inline constexpr int kBombActionBase = kAirplaneWithPairActionBase + 2939; +inline constexpr int kRocketActionBase = kBombActionBase + 13; + +inline constexpr int kNumKickersAirplaneSoloCombChainOfLengthTwo = 88; +inline constexpr int kNumKickersAirplaneSoloCombChainOfLengthThree = 330; +inline constexpr int kNumKickersAirplaneSoloCombChainOfLengthFour = 816; +inline constexpr int kNumKickersAirplaneSoloCombChainOfLengthFive = 1372; + +inline constexpr int kNumKickersAirplanePairCombChainOfLengthTwo = 55; +inline constexpr int kNumKickersAirplanePairCombChainOfLengthThree = 120; +inline constexpr int kNumKickersAirplanePairCombChainOfLengthFour = 126; + +constexpr char kRankChar[] = "3456789TJQKA2"; +// only for dealing phase usages +constexpr char kSuitChar[] = "CDHS"; + +enum KickerType { kSolo = 1, kPair }; + +// single rank hand means hands consisting of only a single rank +// includes solo, pair, trio, bombs +struct SingleRankHandParams { + int rank; + int num_cards; + SingleRankHandParams(int r, int n) : rank(r), num_cards(n) {} + SingleRankHandParams() {} +}; + +// chain only hand means hands consisting of only consecutive ranks +// includes solo chain, pair chain and airplane +struct ChainOnlyHandParams { + int chain_head; + int num_cards_per_rank; + int chain_length; + ChainOnlyHandParams(int h, int n, int l) + : chain_head(h), num_cards_per_rank(n), chain_length(l) {} + ChainOnlyHandParams() {} +}; + +// shared by trio+solo, trio+pair, airplane+solo, airplane+pair +struct TrioCombParams { + int chain_head; + int chain_length; + KickerType kicker_type; + int kicker_id; + TrioCombParams(int head, int length, KickerType k, int k_id) + : chain_head(head), + chain_length(length), + kicker_type(k), + kicker_id(k_id) {} + TrioCombParams() {} +}; + +int CardToRank(int card); +std::string RankString(int rank); +std::string CardString(int card); +std::string FormatSingleHand(absl::Span hand); +std::string FormatAirplaneCombHand(int action); + +SingleRankHandParams GetSingleRankHandParams(int action); +std::array SingleRankHand(int action); +int SingleRankHandToActionId(absl::Span hand); +void SearchSingleRankActions(std::vector* actions, + absl::Span hand, int prev_action); + +ChainOnlyHandParams GetChainOnlyHandParams(int action); +std::array ChainOnlyHand(int action); +int ChainOnlyHandToActionId(absl::Span hand); +void SearchChainOnlyActions(std::vector* actions, + absl::Span hand, int prev_action); + +TrioCombParams GetSingleTrioCombParams(int action); +std::array SingleTrioCombHand(int action); +int SingleTrioCombHandToActionId(absl::Span hand); +void SearchSingleTrioCombActions(std::vector* actions, + absl::Span hand, int prev_action); + +TrioCombParams GetAirplaneCombParams(int action); +std::array AirplaneCombHand(int action); +int AirplaneCombHandToActionId(absl::Span hand, int chain_head, + KickerType kicker_type); +void SearchAirplaneCombActions(std::vector* actions, + absl::Span hand, int prev_action); + +std::array ActionToHand(int action); +void SearchForLegalActions(std::vector* legal_actions, + absl::Span hand, int prev_action); + +} // namespace dou_dizhu +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_DOU_DIZHU_DOU_DIZHU_UTILS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu_utils_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu_utils_test.cc new file mode 100644 index 0000000..11535a9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dou_dizhu/dou_dizhu_utils_test.cc @@ -0,0 +1,173 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/dou_dizhu/dou_dizhu_utils.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace dou_dizhu { + +void SingleRankHandTest() { + std::array hand1{}, hand2{}; + hand1[6] = 3; + int action_id1 = SingleRankHandToActionId(hand1); + SPIEL_CHECK_EQ(FormatSingleHand(SingleRankHand(action_id1)), "999"); + + hand2[13] = 1; + int action_id2 = SingleRankHandToActionId(hand2); + SPIEL_CHECK_EQ(FormatSingleHand(SingleRankHand(action_id2)), "(BWJ)"); + + // 558999TJJJJKKK + std::array current_hand = {0, 0, 2, 0, 0, 1, 3, 1, 4, 0, 3}; + + std::vector actions1, actions2, actions3; + + // The only hands that are greater than 999 are JJJ and KKK + SearchSingleRankActions(&actions1, current_hand, /*prev_action=*/action_id1); + SPIEL_CHECK_EQ(static_cast(actions1.size()), 2); + + // No hands greater than BWJ + SearchSingleRankActions(&actions2, current_hand, /*prev_action=*/action_id2); + SPIEL_CHECK_EQ(static_cast(actions2.size()), 0); + + // 6 solos + 4 pairs + 3 trios + 1 bomb = 14 + SearchSingleRankActions(&actions3, current_hand, + /*prev_action=*/kInvalidAction); + SPIEL_CHECK_EQ(static_cast(actions3.size()), 14); +} + +void ChainOnlyHandTest() { + std::array hand1 = {0, 0, 0, 3, 3, 3}; + int action_id1 = ChainOnlyHandToActionId(hand1); + + SPIEL_CHECK_EQ(FormatSingleHand(ChainOnlyHand(action_id1)), "666777888"); + + std::array hand2 = {2, 2, 2, 2, 2, 2, 2, 2, 2}; + + int action_id2 = ChainOnlyHandToActionId(hand2); + SPIEL_CHECK_EQ(FormatSingleHand(ChainOnlyHand(action_id2)), + "33445566778899TTJJ"); + + // 5566777888999TTTJJQQKKAA22(BWJ)(CJ) + std::array current_hand = {0, 0, 2, 2, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 1, 1}; + + std::vector actions1, actions2, actions3; + SearchChainOnlyActions(&actions1, current_hand, /*prev_action=*/action_id1); + + // The only hands greater than 666777888 are 777888999 and 888999TTT + SPIEL_CHECK_EQ(static_cast(actions1.size()), 2); + + SearchChainOnlyActions(&actions2, current_hand, /*prev_action=*/action_id2); + + // The only hands greater than 334455....TTJJ are 5566....QQKK and + // 6677.....KKAA + SPIEL_CHECK_EQ(static_cast(actions2.size()), 2); + + SearchChainOnlyActions(&actions3, current_hand, + /*prev_action=*/kInvalidAction); + SPIEL_CHECK_EQ(static_cast(actions3.size()), 63); +} + +void SingleTrioCombHandTest() { + std::array hand1{}, hand2{}; + + // 999-(CJ) + hand1[6] = 3; + hand1[14] = 1; + int action_id1 = SingleTrioCombHandToActionId(hand1); + SPIEL_CHECK_EQ(FormatSingleHand(SingleTrioCombHand(action_id1)), "999(CJ)"); + + // 333-22 + hand2[12] = 2; + hand2[0] = 3; + + int action_id2 = SingleTrioCombHandToActionId(hand2); + SPIEL_CHECK_EQ(FormatSingleHand(SingleTrioCombHand(action_id2)), "33322"); + + // 666777TTTQQQ222(BWJ)(CJ) + std::array current_hand = {0, 0, 0, 3, 3, 0, 0, 3, + 0, 3, 0, 0, 3, 1, 1}; + + std::vector actions1, actions2, actions3; + + // The hands that are greater than 333222 uses trios 666, 777, TTT, QQQ, 222 + // And we just enuemerate all possible pairs + SearchSingleTrioCombActions(&actions1, current_hand, + /*prev_action=*/action_id1); + SPIEL_CHECK_EQ(static_cast(actions1.size()), 18); + + SearchSingleTrioCombActions(&actions2, current_hand, + /*prev_action=*/action_id2); + SPIEL_CHECK_EQ(static_cast(actions2.size()), 20); + + SearchSingleTrioCombActions(&actions3, current_hand, kInvalidAction); + SPIEL_CHECK_EQ(static_cast(actions3.size()), 50); +} + +void AirplaneCombHandTest() { + // 888999TTTJJJQQQ-7772(CJ) + std::array hand1 = {0, 0, 0, 0, 3, 3, 3, 3, + 3, 3, 0, 0, 1, 0, 1}; + + int action_id1 = AirplaneCombHandToActionId(hand1, /*chain_head=*/5, + /*kicker_type=*/kSolo); + SPIEL_CHECK_EQ(FormatSingleHand(AirplaneCombHand(action_id1)), + "777888999TTTJJJQQQ2(CJ)"); + + // TTTJJJQQQKKK-33445522 + std::array hand2 = {2, 2, 2, 0, 0, 0, 0, 3, + 3, 3, 3, 0, 2, 0, 0}; + int action_id2 = AirplaneCombHandToActionId(hand2, /*chain_head=*/7, + /*kicker_type=*/kPair); + SPIEL_CHECK_EQ(FormatSingleHand(AirplaneCombHand(action_id2)), + "334455TTTJJJQQQKKK22"); + + // 667899TTTJJJJQQQKKKAAA222(BWJ)(CJ) + std::array current_hand = {0, 0, 0, 2, 1, 1, 2, 3, + 4, 3, 3, 3, 3, 1, 1}; + std::vector actions1, actions2, actions3; + SearchAirplaneCombActions(&actions1, current_hand, + /*prev_action=*/action_id1); + // C(7, 5) - C(5, 3) + 3*(C(6, 3) - C(4, 1)) + C(3, 2) * 5 + 2 + C(6, 2) - 1 = + // 90 + SPIEL_CHECK_EQ(static_cast(actions1.size()), 90); + + // The only hand that is greater than TTTJJJQQQKKK-33445522 is + // JJJQQQKKKAAA-6699TT22 + SearchAirplaneCombActions(&actions2, current_hand, + /*prev_action=*/action_id2); + SPIEL_CHECK_EQ(static_cast(actions2.size()), 1); + + SearchAirplaneCombActions(&actions3, current_hand, + /*prev_action=*/kInvalidAction); + SPIEL_CHECK_EQ(static_cast(actions3.size()), 1052); +} + +} // namespace dou_dizhu +} // namespace open_spiel + +int main() { + open_spiel::dou_dizhu::SingleRankHandTest(); + open_spiel::dou_dizhu::ChainOnlyHandTest(); + open_spiel::dou_dizhu::SingleTrioCombHandTest(); + open_spiel::dou_dizhu::AirplaneCombHandTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_data.cc b/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_data.cc new file mode 100644 index 0000000..8a804c1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_data.cc @@ -0,0 +1,84 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/dynamic_routing/dynamic_routing_data.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/memory/memory.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/games/dynamic_routing/dynamic_routing_utils.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel::dynamic_routing { + +std::unique_ptr DynamicRoutingData::Create( + DynamicRoutingDataName name) { + std::unique_ptr data = + absl::make_unique(); + switch (name) { + case DynamicRoutingDataName::kLine: { + absl::flat_hash_map> + adjacency_list = {{"bef_O", {"O"}}, + {"O", {"A"}}, + {"A", {"D"}}, + {"D", {"aft_D"}}, + {"aft_D", {}}}; + data->network_ = Network::Create(adjacency_list); + data->od_demand_ = + absl::make_unique>(std::vector{ + OriginDestinationDemand("bef_O->O", "D->aft_D", 0, 100)}); + return data; + } + case DynamicRoutingDataName::kBraess: { + const int kBraessNumPlayer = 5; + absl::flat_hash_map> + adjacency_list = {{"O", {"A"}}, {"A", {"B", "C"}}, {"B", {"C", "D"}}, + {"C", {"D"}}, {"D", {"E"}}, {"E", {}}}; + absl::flat_hash_map> node_position = + {{"O", {0, 0}}, {"A", {1, 0}}, {"B", {2, 1}}, + {"C", {2, -1}}, {"D", {3, 0}}, {"E", {4, 0}}}; + absl::flat_hash_map bpr_a_coefficient = { + {"O->A", 0}, {"A->B", 1.0}, {"A->C", 0}, {"B->C", 0}, + {"B->D", 0}, {"C->D", 1.0}, {"D->E", 0}}; + absl::flat_hash_map bpr_b_coefficient = { + {"O->A", 1.0}, {"A->B", 1.0}, {"A->C", 1.0}, {"B->C", 1.0}, + {"B->D", 1.0}, {"C->D", 1.0}, {"D->E", 1.0}}; + absl::flat_hash_map capacity = { + {"O->A", kBraessNumPlayer}, {"A->B", kBraessNumPlayer}, + {"A->C", kBraessNumPlayer}, {"B->C", kBraessNumPlayer}, + {"B->D", kBraessNumPlayer}, {"C->D", kBraessNumPlayer}, + {"D->E", kBraessNumPlayer}}; + absl::flat_hash_map free_flow_travel_time = { + {"O->A", 0}, {"A->B", 1.0}, {"A->C", 2.0}, {"B->C", 0.25}, + {"B->D", 2.0}, {"C->D", 1.0}, {"D->E", 0}}; + data->network_ = + Network::Create(adjacency_list, node_position, bpr_a_coefficient, + bpr_b_coefficient, capacity, free_flow_travel_time); + data->od_demand_ = + absl::make_unique>(std::vector{ + OriginDestinationDemand("O->A", "D->E", 0, kBraessNumPlayer)}); + return data; + } + default: + open_spiel::SpielFatalError( + absl::StrCat("Unknown Dynamic Routing Data Name: ", name)); + } + return data; +} + +} // namespace open_spiel::dynamic_routing diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_data.h b/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_data.h new file mode 100644 index 0000000..73c44cf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_data.h @@ -0,0 +1,42 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_DYNAMIC_ROUTING_DYNAMIC_ROUTING_DATA_H_ +#define OPEN_SPIEL_GAMES_DYNAMIC_ROUTING_DYNAMIC_ROUTING_DATA_H_ + +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/games/dynamic_routing/dynamic_routing_utils.h" + +namespace open_spiel::dynamic_routing { + +// The enum for supported Dynamic Routing Data. +enum class DynamicRoutingDataName { kLine, kBraess }; + +// Data of the Dynamic Routing Game +class DynamicRoutingData { + public: + // Creates data for the specific dynamic routing game. + static std::unique_ptr Create( + DynamicRoutingDataName name); + + std::unique_ptr network_; + std::unique_ptr> od_demand_; +}; + +} // namespace open_spiel::dynamic_routing + +#endif // OPEN_SPIEL_GAMES_DYNAMIC_ROUTING_DYNAMIC_ROUTING_DATA_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_data_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_data_test.cc new file mode 100644 index 0000000..963c6b6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_data_test.cc @@ -0,0 +1,92 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/dynamic_routing/dynamic_routing_data.h" + +#include "open_spiel/games/dynamic_routing/dynamic_routing_utils.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel::dynamic_routing { + +namespace { +float GetTravelTime(float free_flow_travel_time, float a, float b, + float capacity, float volume) { + return free_flow_travel_time * (1.0 + a * pow(volume / capacity, b)); +} +void TestGetDynamicRoutingDataLine() { + std::unique_ptr data = + DynamicRoutingData::Create(DynamicRoutingDataName::kLine); + Network* network = data->network_.get(); + OriginDestinationDemand od_demand = data->od_demand_->at(0); + SPIEL_CHECK_EQ(network->num_links(), 4); + SPIEL_CHECK_EQ(network->GetSuccessors("bef_O"), + std::vector{"O"}); + SPIEL_CHECK_EQ(network->GetSuccessors("O"), std::vector{"A"}); + SPIEL_CHECK_EQ(network->GetSuccessors("A"), std::vector{"D"}); + SPIEL_CHECK_EQ(network->GetSuccessors("D"), + std::vector{"aft_D"}); + SPIEL_CHECK_EQ(network->GetSuccessors("aft_D"), std::vector{}); + SPIEL_CHECK_FALSE(network->IsLocationASinkNode("bef_O->O")); + SPIEL_CHECK_FALSE(network->IsLocationASinkNode("O->A")); + SPIEL_CHECK_FALSE(network->IsLocationASinkNode("A->D")); + SPIEL_CHECK_TRUE(network->IsLocationASinkNode("D->aft_D")); + SPIEL_CHECK_EQ(od_demand.vehicle.origin, "bef_O->O"); + SPIEL_CHECK_EQ(od_demand.vehicle.destination, "D->aft_D"); + SPIEL_CHECK_EQ(od_demand.vehicle.departure_time, 0); + SPIEL_CHECK_EQ(od_demand.counts, 100); +} + +void TestGetDynamicRoutingDataBraess() { + std::unique_ptr data = + DynamicRoutingData::Create(DynamicRoutingDataName::kBraess); + Network* network = data->network_.get(); + OriginDestinationDemand od_demand = data->od_demand_->at(0); + SPIEL_CHECK_EQ(network->num_links(), 7); + SPIEL_CHECK_EQ(network->GetSuccessors("O"), (std::vector{"A"})); + SPIEL_CHECK_EQ(network->GetSuccessors("A"), + (std::vector{"B", "C"})); + SPIEL_CHECK_EQ(network->GetSuccessors("B"), + (std::vector{"C", "D"})); + SPIEL_CHECK_EQ(network->GetSuccessors("C"), (std::vector{"D"})); + SPIEL_CHECK_EQ(network->GetSuccessors("D"), (std::vector{"E"})); + SPIEL_CHECK_EQ(network->GetSuccessors("E"), (std::vector{})); + SPIEL_CHECK_FALSE(network->IsLocationASinkNode("A->B")); + SPIEL_CHECK_FALSE(network->IsLocationASinkNode("B->C")); + SPIEL_CHECK_FALSE(network->IsLocationASinkNode("C->D")); + SPIEL_CHECK_TRUE(network->IsLocationASinkNode("D->E")); + SPIEL_CHECK_EQ(od_demand.vehicle.origin, "O->A"); + SPIEL_CHECK_EQ(od_demand.vehicle.destination, "D->E"); + SPIEL_CHECK_EQ(od_demand.vehicle.departure_time, 0); + SPIEL_CHECK_EQ(od_demand.counts, 5); + SPIEL_CHECK_EQ(network->GetTravelTime("O->A", 1.0), 0); + SPIEL_CHECK_EQ(network->GetTravelTime("A->B", 1.0), + GetTravelTime(1.0, 1.0, 1.0, 5.0, 1.0)); + SPIEL_CHECK_EQ(network->GetTravelTime("A->C", 1.0), + GetTravelTime(2.0, 0, 1.0, 5.0, 1.0)); + SPIEL_CHECK_EQ(network->GetTravelTime("B->C", 1.0), + GetTravelTime(0.25, 0, 1.0, 5.0, 1.0)); + SPIEL_CHECK_EQ(network->GetTravelTime("B->D", 1.0), + GetTravelTime(2.0, 0, 1.0, 5.0, 1.0)); + SPIEL_CHECK_EQ(network->GetTravelTime("C->D", 1.0), + GetTravelTime(1.0, 1.0, 1.0, 5.0, 1.0)); + SPIEL_CHECK_EQ(network->GetTravelTime("D->E", 1.0), 0); +} + +} // namespace +} // namespace open_spiel::dynamic_routing + +int main(int argc, char** argv) { + open_spiel::dynamic_routing::TestGetDynamicRoutingDataLine(); + open_spiel::dynamic_routing::TestGetDynamicRoutingDataBraess(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_utils.cc b/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_utils.cc new file mode 100644 index 0000000..8771435 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_utils.cc @@ -0,0 +1,201 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/dynamic_routing/dynamic_routing_utils.h" + +#include + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/btree_map.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_set.h" +#include "open_spiel/abseil-cpp/absl/memory/memory.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel::dynamic_routing { +namespace { + +template +absl::flat_hash_set GetKeySet( + const absl::flat_hash_map& m) { + absl::flat_hash_set keys; + for (const auto& pair : m) { + keys.emplace(pair.first); + } + return keys; +} + +absl::flat_hash_map AssignExistingOrDefaultValues( + absl::flat_hash_map dict_object, + absl::flat_hash_set road_sections, float default_value) { + if (!dict_object.empty()) { + SPIEL_CHECK_TRUE((GetKeySet(dict_object)) == + road_sections); + return dict_object; + } + absl::flat_hash_map dict_object_returned; + for (const auto& key : road_sections) { + dict_object_returned.emplace(key, default_value); + } + return dict_object_returned; +} +} // namespace + +std::string RoadSectionFromNodes(absl::string_view origin, + absl::string_view destination) { + return absl::StrCat(origin, "->", destination); +} + +std::vector NodesFromRoadSection(std::string road_section) { + return absl::StrSplit(road_section, "->"); +} + +std::unique_ptr Network::Create( + const absl::flat_hash_map>& + adjacency_list, + const absl::flat_hash_map>& + node_position, + const absl::flat_hash_map& bpr_a_coefficient, + const absl::flat_hash_map& bpr_b_coefficient, + const absl::flat_hash_map& capacity, + const absl::flat_hash_map& free_flow_travel_time) { + return absl::WrapUnique(new Network(adjacency_list, node_position, + bpr_a_coefficient, bpr_b_coefficient, + capacity, free_flow_travel_time)); +} + +Network::Network( + absl::flat_hash_map> adjacency_list, + absl::flat_hash_map> node_position, + absl::flat_hash_map bpr_a_coefficient, + absl::flat_hash_map bpr_b_coefficient, + absl::flat_hash_map capacity, + absl::flat_hash_map free_flow_travel_time) { + adjacency_list_ = adjacency_list; + // Sort the adjacency list to make the action id unique. + absl::btree_map> sorted_adjacency_list; + sorted_adjacency_list.insert(adjacency_list.begin(), adjacency_list.end()); + action_by_road_section_.clear(); + road_section_by_action.clear(); + road_section_by_action.emplace_back(""); // Dummy road section at index 0. + int action_number = kNoPossibleAction + 1; + for (auto& [origin, successors] : sorted_adjacency_list) { + std::sort(successors.begin(), successors.end()); + for (const auto& destination : successors) { + std::string road_section = RoadSectionFromNodes(origin, destination); + SPIEL_CHECK_FALSE(action_by_road_section_.contains(road_section)); + action_by_road_section_.emplace(road_section, action_number); + road_section_by_action.emplace_back(road_section); + // Adds road_section with no successors to sink_road_sections_; + if (sorted_adjacency_list.at(destination).empty()) { + sink_road_sections_.emplace(road_section); + } + action_number++; + } + } + node_position_ = node_position; + absl::flat_hash_set road_sections = + GetKeySet(action_by_road_section_); + bpr_a_coefficient_ = + AssignExistingOrDefaultValues(bpr_a_coefficient, road_sections, 0); + bpr_b_coefficient_ = + AssignExistingOrDefaultValues(bpr_b_coefficient, road_sections, 1); + capacity_ = AssignExistingOrDefaultValues(capacity, road_sections, 1); + free_flow_travel_time_ = + AssignExistingOrDefaultValues(free_flow_travel_time, road_sections, 1); +} + +float Network::GetTravelTime(absl::string_view road_section, + float volume) const { + SPIEL_CHECK_TRUE(free_flow_travel_time_.contains(road_section)); + SPIEL_CHECK_TRUE(bpr_a_coefficient_.contains(road_section)); + SPIEL_CHECK_TRUE(bpr_b_coefficient_.contains(road_section)); + SPIEL_CHECK_TRUE(capacity_.contains(road_section)); + + float free_flow_travel_time = free_flow_travel_time_.at(road_section); + float a = bpr_a_coefficient_.at(road_section); + float b = bpr_b_coefficient_.at(road_section); + float capacity = capacity_.at(road_section); + return free_flow_travel_time * (1.0 + a * pow(volume / capacity, b)); +} + +bool Network::IsLocationASinkNode(absl::string_view road_section) const { + return sink_road_sections_.contains(road_section); +} + +int Network::GetActionIdFromMovement(absl::string_view origin, + absl::string_view destination) const { + std::string section = RoadSectionFromNodes(origin, destination); + SPIEL_CHECK_TRUE(action_by_road_section_.contains(section)); + return action_by_road_section_.at(section); +} + +int Network::num_links() const { return this->action_by_road_section_.size(); } + +int Network::num_actions() const { return 1 + this->num_links(); } + +std::vector Network::GetSuccessors(absl::string_view node) const { + SPIEL_CHECK_TRUE(adjacency_list_.contains(node)); + return adjacency_list_.at(node); +} + +std::string Network::GetRoadSectionFromActionId(int action) const { + return road_section_by_action.at(action); +} + +int Network::GetRoadSectionAsInt(std::string section) const { + if (section.empty()) { + return 0; + } + std::vector nodes = NodesFromRoadSection(section); + std::string start_node = nodes[0]; + std::string end_node = nodes[1]; + return GetActionIdFromMovement(start_node, end_node); +} + +void Network::AssertValidAction(int action, std::string road_section) const { + SPIEL_CHECK_GE(action, 1); + SPIEL_CHECK_LT(action, num_actions()); + if (!road_section.empty()) { + std::string new_road_section = GetRoadSectionFromActionId(action); + std::vector nodes = NodesFromRoadSection(new_road_section); + std::string origin_new_section = nodes[0]; + std::string end_new_section = nodes[1]; + std::string end_section_node = NodesFromRoadSection(road_section)[1]; + SPIEL_CHECK_EQ(end_section_node, origin_new_section); + std::vector successors = GetSuccessors(origin_new_section); + SPIEL_CHECK_TRUE(std::find(successors.begin(), successors.end(), + end_new_section) != successors.end()); + } +} + +void Network::CheckListOfOdDemandIsCorrect( + std::vector* od_demands) { + for (const OriginDestinationDemand& od_demand : *od_demands) { + SPIEL_CHECK_TRUE( + action_by_road_section_.contains(od_demand.vehicle.origin)); + SPIEL_CHECK_TRUE( + action_by_road_section_.contains(od_demand.vehicle.destination)); + } +} + +} // namespace open_spiel::dynamic_routing diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_utils.h b/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_utils.h new file mode 100644 index 0000000..8ba521c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_utils.h @@ -0,0 +1,182 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Utils for dynamic routing game and mean field routing game. +// This module has three main classes: +// - Network +// - Vehicle +// - OriginDestinationDemand + +#ifndef OPEN_SPIEL_GAMES_DYNAMIC_ROUTING_DYNAMIC_ROUTING_UTILS_H_ +#define OPEN_SPIEL_GAMES_DYNAMIC_ROUTING_DYNAMIC_ROUTING_UTILS_H_ + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_set.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" + +namespace open_spiel::dynamic_routing { + +// In case one vehicle has reached a end node, then it cannot do anything. In +// this case its action is 0. Action 0 is reserved to encode no possible action +// as requested by Open Spiel. +inline constexpr int kNoPossibleAction = 0; + +// Creates a road section "A->B" from two nodes "A" and "B". +std::string RoadSectionFromNodes(absl::string_view origin, + absl::string_view destination); + +// Creates a vector of two nodes {"A", "B"} from a road section "A->B". +std::vector NodesFromRoadSection(std::string road_section); + +// A Vehicle is one origin and one destination. +// +// Both the origin and the destination of the vehicle are road section, +// therefore they are string formatted as "{str}->{str}". +// Attributes: +// origin: origin of the vehicle. +// destination: destination of the vehicle. +// departure_time: departure time of the vehicle. +struct Vehicle { + Vehicle(absl::string_view origin, absl::string_view destination, + float departure_time = 0) + : origin(origin), + destination(destination), + departure_time(departure_time) {} + + const std::string origin; + const std::string destination; + const float departure_time; +}; + +// Number of trips from origin to destination for a specific departure time. +// Both the origin and the destination of the vehicle are road section, +// therefore they are string formatted as "{str}->{str}". +struct OriginDestinationDemand { + explicit OriginDestinationDemand(absl::string_view origin, + absl::string_view destination, + float departure_time, float counts) + : vehicle{origin, destination, departure_time}, counts(counts) {} + + // The vehicles in the origin destination demand with the same origin, + // destination and departure time. + Vehicle vehicle; + // The number of vehicles with the origin, destination and departure time. + const float counts; +}; + +// Network implementation. +// +// A network is a directed graph with a volume delay function on each +// of its edges. Each vertex is referred to as a string (for example "A") and +// each edge as a string f"{node1}->{node2}" (for example "A->B"). The network +// is created from an adjacency list. Each road section is mapped to an action +// index (positive integer) in road_section_to_action_, and vice versa in +// action_to_road_section_. The volume delay function on each road section rs +// is given by free_flow_travel_time_[rs]*(1+ a_[rs]*(v/capacity_[rs])**b_[rs]) +// where v is the volume on the road section rs, according to the U.S. Bureau +// of Public Road (BPR). Such functions are called fundamental diagram of +// traffic flow. +class Network { + public: + // The factory function to create an instance of the Network class. + static std::unique_ptr Create( + const absl::flat_hash_map>& + adjacency_list, + const absl::flat_hash_map>& + node_position = {}, + const absl::flat_hash_map& bpr_a_coefficient = {}, + const absl::flat_hash_map& bpr_b_coefficient = {}, + const absl::flat_hash_map& capacity = {}, + const absl::flat_hash_map& free_flow_travel_time = + {}); + + // Returns True if the road section has no successors. + bool IsLocationASinkNode(absl::string_view road_section) const; + + // Returns travel time on the road section given the volume on it. + // Volume unit should be the same as the capacity unit. + // Travel time unit is the free flow travel time unit. + // Args: + // road_section: the road section. + // volume: the volume on the road section. + float GetTravelTime(absl::string_view road_section, float volume) const; + + // Maps two connected nodes to an action. + int GetActionIdFromMovement(absl::string_view origin, + absl::string_view destination) const; + + // Returns the number of road sections. + int num_links() const; + + // Returns the number of possible actions. + int num_actions() const; + + // Returns the successor nodes of the node. + std::vector GetSuccessors(absl::string_view node) const; + + // Maps a action to the corresponding road section. + std::string GetRoadSectionFromActionId(int action) const; + + // Returns the integer representation of the road section. + int GetRoadSectionAsInt(std::string section) const; + + // Assert that an action as a int is valid. + // The action should be a int between 1 and num_actions. In case road_section + // is not null then it is test if the action correspond to going on a road + // section which is a successor of road_section. + void AssertValidAction(int action, std::string road_section = "") const; + + // Assert that OD demands have valid origin and destination. + void CheckListOfOdDemandIsCorrect( + std::vector* od_demands); + + private: + explicit Network( + absl::flat_hash_map> adjacency_list, + absl::flat_hash_map> node_position, + absl::flat_hash_map bpr_a_coefficient, + absl::flat_hash_map bpr_b_coefficient, + absl::flat_hash_map capacity, + absl::flat_hash_map free_flow_travel_time); + + // flat_hash_map that maps road section string representation to its a. + absl::flat_hash_map bpr_a_coefficient_; + // flat_hash_map that maps road section string representation to its b. + absl::flat_hash_map bpr_b_coefficient_; + // flat_hash_map that maps road section string representation to its adjacency + // list. + absl::flat_hash_map> adjacency_list_; + // flat_hash_map that maps road section string representation to its capacity. + absl::flat_hash_map capacity_; + // flat_hash_map that maps road section string representation to its free flow + // travel time. + absl::flat_hash_map free_flow_travel_time_; + // flat_hash_map that maps road section string representation to couple of + // float encoding x and y position of the node. None by default. + absl::flat_hash_map> node_position_; + // flat_hash_map that maps road section string representation to action. + absl::flat_hash_map action_by_road_section_; + // vector that maps action to road section string representation. + std::vector road_section_by_action; + // flat_hash_set that contains sink locations. + absl::flat_hash_set sink_road_sections_; +}; +} // namespace open_spiel::dynamic_routing + +#endif // OPEN_SPIEL_GAMES_DYNAMIC_ROUTING_DYNAMIC_ROUTING_UTILS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_utils_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_utils_test.cc new file mode 100644 index 0000000..6624e92 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/dynamic_routing/dynamic_routing_utils_test.cc @@ -0,0 +1,120 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/dynamic_routing/dynamic_routing_utils.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/memory/memory.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel::dynamic_routing { + +namespace { + +using ::open_spiel::dynamic_routing::RoadSectionFromNodes; +using ::open_spiel::dynamic_routing::NodesFromRoadSection; + +void TestRoadSectionFromNodes() { + std::string road_section = RoadSectionFromNodes("A", "B"); + SPIEL_CHECK_TRUE(road_section == "A->B"); +} + +void TestNodesFromRoadSection() { + std::string road_section = "A->B"; + std::vector nodes = NodesFromRoadSection(road_section); + std::vector expected{"A", "B"}; + SPIEL_CHECK_TRUE(nodes == expected); +} + +void TestVehicleInstanciation1() { + auto vehicle = absl::make_unique("O->A", "B->D"); + SPIEL_CHECK_EQ(vehicle->origin, "O->A"); + SPIEL_CHECK_EQ(vehicle->destination, "B->D"); + SPIEL_CHECK_FLOAT_EQ(vehicle->departure_time, 0); +} + +void TestVehicleInstanciation2() { + auto vehicle = absl::make_unique("O->A", "B->D", 10.5); + SPIEL_CHECK_EQ(vehicle->origin, "O->A"); + SPIEL_CHECK_EQ(vehicle->destination, "B->D"); + SPIEL_CHECK_FLOAT_EQ(vehicle->departure_time, 10.5); +} + +void TestOdDemandInstanciation1() { + auto od_demand = + absl::make_unique("O->A", "B->D", 0, 30); + SPIEL_CHECK_EQ(od_demand->vehicle.origin, "O->A"); + SPIEL_CHECK_EQ(od_demand->vehicle.destination, "B->D"); + SPIEL_CHECK_FLOAT_EQ(od_demand->vehicle.departure_time, 0); + SPIEL_CHECK_FLOAT_EQ(od_demand->counts, 30); +} + +void TestOdDemandInstanciation2() { + auto od_demand = + absl::make_unique("O->A", "B->D", 10.5, 43.2); + SPIEL_CHECK_EQ(od_demand->vehicle.origin, "O->A"); + SPIEL_CHECK_EQ(od_demand->vehicle.destination, "B->D"); + SPIEL_CHECK_FLOAT_EQ(od_demand->vehicle.departure_time, 10.5); + SPIEL_CHECK_FLOAT_EQ(od_demand->counts, 43.2); +} + +void TestNetworkInitWithEmpty() { + absl::flat_hash_map> adjacency_list = + {}; + auto network = Network::Create(adjacency_list); +} + +std::unique_ptr InitNetwork() { + absl::flat_hash_map> adjacency_list; + adjacency_list["O"] = std::vector{"A"}; + adjacency_list["A"] = std::vector{"D"}; + adjacency_list["D"] = std::vector{}; + return Network::Create(adjacency_list); +} + +void TestNetworkAdjacencyListInit() { + auto network = InitNetwork(); + SPIEL_CHECK_EQ(network->GetActionIdFromMovement("O", "A"), 2); + SPIEL_CHECK_EQ(network->GetActionIdFromMovement("A", "D"), 1); + SPIEL_CHECK_EQ(network->num_links(), 2); + SPIEL_CHECK_EQ(network->GetSuccessors("O"), std::vector{"A"}); + SPIEL_CHECK_EQ(network->GetSuccessors("A"), std::vector{"D"}); + SPIEL_CHECK_EQ(network->GetSuccessors("D"), std::vector{}); + SPIEL_CHECK_TRUE(network->IsLocationASinkNode("A->D")); + SPIEL_CHECK_FALSE(network->IsLocationASinkNode("O->A")); + SPIEL_CHECK_EQ(network->GetRoadSectionFromActionId(2), "O->A"); + SPIEL_CHECK_EQ(network->GetRoadSectionFromActionId(1), "A->D"); +} + +// Exceptions are checked in the code with SPIEL_CHECK_TRUE. + +} // namespace +} // namespace open_spiel::dynamic_routing + +int main(int argc, char** argv) { + open_spiel::dynamic_routing::TestRoadSectionFromNodes(); + open_spiel::dynamic_routing::TestNodesFromRoadSection(); + open_spiel::dynamic_routing::TestVehicleInstanciation1(); + open_spiel::dynamic_routing::TestVehicleInstanciation2(); + open_spiel::dynamic_routing::TestOdDemandInstanciation1(); + open_spiel::dynamic_routing::TestOdDemandInstanciation2(); + open_spiel::dynamic_routing::TestNetworkInitWithEmpty(); + open_spiel::dynamic_routing::TestNetworkAdjacencyListInit(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/efg_game.cc b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/efg_game.cc new file mode 100644 index 0000000..12f2dba --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/efg_game.cc @@ -0,0 +1,885 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/efg_game/efg_game.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/strings/match.h" +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/observer.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/file.h" + +namespace open_spiel { +namespace efg_game { +namespace { + +// Facts about the game. These are defaults that will differ depending on the +// game's descriptions. Using dummy defaults just to register the game. +const GameType kGameType{/*short_name=*/"efg_game", + /*long_name=*/"efg_game", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/false, + /*parameter_specification=*/ + {{"filename", GameParameter(std::string(""))}}, + /*default_loadable=*/false, + /*provides_factored_observation_string=*/false, + /*is_concrete=*/false}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new EFGGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +std::string NodeToString(const Node* node) { + std::string str = ""; + if (node->type == NodeType::kTerminal) { + absl::StrAppend(&str, "Terminal: ", node->name, " ", node->outcome_name); + for (double payoff : node->payoffs) { + absl::StrAppend(&str, " ", payoff); + } + absl::StrAppend(&str, "\n"); + } else if (node->type == NodeType::kChance) { + absl::StrAppend(&str, "Chance: ", node->name, " ", node->infoset_number, + " ", node->infoset_name); + for (int i = 0; i < node->children.size(); ++i) { + absl::StrAppend(&str, " ", node->actions[i], " ", node->probs[i]); + } + absl::StrAppend(&str, "\n"); + } else if (node->type == NodeType::kPlayer) { + absl::StrAppend(&str, "Player: ", node->name, " ", node->player_number, " ", + node->infoset_number, " ", node->infoset_name); + for (int i = 0; i < node->children.size(); ++i) { + absl::StrAppend(&str, " ", node->actions[i]); + } + absl::StrAppend(&str, "\n"); + } + return str; +} + +std::string EFGInformationStateString(Player owner, Player observer, int number, + const std::string& name) { + return absl::StrCat(owner, "-", observer, "-", number, "-", name); +} +} // namespace + +EFGState::EFGState(std::shared_ptr game, const Node* root) + : State(game), cur_node_(root) {} + +Player EFGState::CurrentPlayer() const { + if (cur_node_->type == NodeType::kChance) { + return kChancePlayerId; + } else if (cur_node_->type == NodeType::kTerminal) { + return kTerminalPlayerId; + } else { + // Gambit player numbers are between 1 and num_players + SPIEL_CHECK_GE(cur_node_->player_number, 1); + SPIEL_CHECK_LE(cur_node_->player_number, num_players_); + return cur_node_->player_number - 1; + } +} + +std::string EFGState::ActionToString(Player player, Action action) const { + int action_idx = ActionIdx(action); + SPIEL_CHECK_GE(action_idx, 0); + SPIEL_CHECK_LT(action_idx, cur_node_->actions.size()); + return cur_node_->actions[action_idx]; +} + +std::string EFGState::ToString() const { + return absl::StrCat(cur_node_->id, ": ", NodeToString(cur_node_)); +} + +bool EFGState::IsTerminal() const { + return cur_node_->type == NodeType::kTerminal; +} + +std::vector EFGState::Returns() const { + if (cur_node_->type == NodeType::kTerminal) { + SPIEL_CHECK_EQ(cur_node_->payoffs.size(), num_players_); + return cur_node_->payoffs; + } else { + return std::vector(num_players_, 0); + } +} + +std::string EFGState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + // The information set number has to uniquely identify the infoset, whereas + // the names are optional. But the numbers are unique per player, so must + // add the player number. + return EFGInformationStateString(cur_node_->player_number - 1, player, + cur_node_->infoset_number, + cur_node_->infoset_name); +} + +void EFGState::InformationStateTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + std::fill(values.begin(), values.end(), 0.0); + int offset = 0; + int index = 0; + + // Current player, or terminal. + if (cur_node_->type == NodeType::kTerminal) { + index = offset + num_players_; + } else { + index = offset + cur_node_->player_number - 1; + } + SPIEL_CHECK_GE(index, 0); + SPIEL_CHECK_LT(index, values.size()); + values[index] = 1.0; + offset += num_players_ + 1; + + // Observing player. + index = offset + player; + SPIEL_CHECK_GE(index, 0); + SPIEL_CHECK_LT(index, values.size()); + values[index] = 1.0; + offset += num_players_; + + // Infostate number. + index = offset + cur_node_->infoset_number - 1; + SPIEL_CHECK_GE(index, 0); + SPIEL_CHECK_LT(index, values.size()); + values[index] = 1.0; + + offset += static_cast(game_.get())->NumInfoStates(player); + SPIEL_CHECK_LE(offset, values.size()); +} + +std::string EFGState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return absl::StrCat(cur_node_->player_number - 1, "-", player, "-", + cur_node_->infoset_number, "-", cur_node_->infoset_name); +} + +std::unique_ptr EFGState::Clone() const { + return std::unique_ptr(new EFGState(*this)); +} + +void EFGState::UndoAction(Player player, Action action) { + SPIEL_CHECK_TRUE(cur_node_->parent != nullptr); + cur_node_ = cur_node_->parent; +} + +int EFGState::ActionIdx(Action action) const { + int action_idx = -1; + for (int i = 0; i < cur_node_->action_ids.size(); ++i) { + if (action == cur_node_->action_ids[i]) { + action_idx = i; + break; + } + } + return action_idx; +} + +void EFGState::DoApplyAction(Action action) { + // Actions in these games are just indices into the legal actions. + SPIEL_CHECK_FALSE(cur_node_->type == NodeType::kTerminal); + SPIEL_CHECK_GE(action, 0); + if (IsChanceNode()) { + SPIEL_CHECK_LT(action, game_->MaxChanceOutcomes()); + } else { + SPIEL_CHECK_LT(action, game_->NumDistinctActions()); + } + int action_idx = ActionIdx(action); + SPIEL_CHECK_NE(action_idx, -1); + SPIEL_CHECK_FALSE(cur_node_->children[action_idx] == nullptr); + cur_node_ = cur_node_->children[action_idx]; +} + +std::vector EFGState::LegalActions() const { + return cur_node_->action_ids; +} + +std::vector> EFGState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(IsChanceNode()); + SPIEL_CHECK_TRUE(cur_node_->type == NodeType::kChance); + std::vector> outcomes(cur_node_->children.size()); + for (int i = 0; i < cur_node_->children.size(); ++i) { + outcomes[i].first = cur_node_->action_ids[i]; + outcomes[i].second = cur_node_->probs[i]; + } + return outcomes; +} + +int EFGGame::MaxChanceOutcomes() const { return chance_action_ids_.size(); } + +int EFGGame::NumDistinctActions() const { return action_ids_.size(); } + +int EFGGame::NumPlayers() const { return num_players_; } + +double EFGGame::MinUtility() const { return min_util_.value(); } + +absl::optional EFGGame::UtilitySum() const { + if (constant_sum_) + return util_sum_; + else + return absl::nullopt; +} + +double EFGGame::MaxUtility() const { return max_util_.value(); } + +int EFGGame::MaxGameLength() const { return max_depth_; } + +int EFGGame::MaxChanceNodesInHistory() const { return num_chance_nodes_; } + +int EFGGame::MaxMoveNumber() const { return max_depth_; } + +int EFGGame::MaxHistoryLength() const { return max_depth_; } + +std::vector EFGGame::InformationStateTensorShape() const { + int max_player_infosets = 0; + for (Player p = 0; p < num_players_; ++p) { + max_player_infosets = std::max(max_player_infosets, + infoset_num_to_states_count_[p].size()); + } + + return { + num_players_ + 1 + // Current player (plus special for terminal). + num_players_ + // Current observing player. + max_player_infosets // Information set number (for the current player). + }; +} + +EFGGame::EFGGame(const GameParameters& params) + : Game(kGameType, params), + filename_(ParameterValue("filename")), + string_data_(file::ReadContentsFromFile(filename_, "r")), + pos_(0), + num_chance_nodes_(0), + max_actions_(0), + max_depth_(0), + constant_sum_(true), + identical_payoffs_(true), + general_sum_(true), + perfect_information_(true) { + SPIEL_CHECK_GT(string_data_.size(), 0); + + // Now parse the string data into a data structure. + ParseGame(); +} + +EFGGame::EFGGame(const std::string& data) + : Game(kGameType, {}), + string_data_(data), + pos_(0), + num_chance_nodes_(0), + max_actions_(0), + max_depth_(0), + constant_sum_(true), + identical_payoffs_(true), + general_sum_(true), + perfect_information_(true) { + ParseGame(); +} + +std::shared_ptr LoadEFGGame(const std::string& data) { + return std::shared_ptr(new EFGGame(data)); +} + +bool EFGGame::IsWhiteSpace(char c) const { + return (c == ' ' || c == '\r' || c == '\n'); +} + +bool EFGGame::IsNodeToken(char c) const { + return (c == 'c' || c == 'p' || c == 't'); +} + +std::unique_ptr EFGGame::NewNode() const { + std::unique_ptr new_node = std::make_unique(); + new_node->id = nodes_.size(); + return new_node; +} + +// Let's use custom parser macros, so that we can print the line +// and an error about what happened while parsing the gambit file. + +#define SPIEL_EFG_PARSE_CHECK_OP(x_exp, op, y_exp) \ + do { \ + auto x = x_exp; \ + auto y = y_exp; \ + if (!((x)op(y))) \ + open_spiel::SpielFatalError(open_spiel::internal::SpielStrCat( \ + __FILE__, ":", __LINE__, " ", #x_exp " " #op " " #y_exp, \ + "\n" #x_exp, " = ", x, ", " #y_exp " = ", y, "\n", \ + " while parsing line #", line_, ":\n", GetLine(line_))); \ + } while (false) + +#define SPIEL_EFG_PARSE_CHECK_GE(x, y) SPIEL_EFG_PARSE_CHECK_OP(x, >=, y) +#define SPIEL_EFG_PARSE_CHECK_GT(x, y) SPIEL_EFG_PARSE_CHECK_OP(x, >, y) +#define SPIEL_EFG_PARSE_CHECK_LE(x, y) SPIEL_EFG_PARSE_CHECK_OP(x, <=, y) +#define SPIEL_EFG_PARSE_CHECK_LT(x, y) SPIEL_EFG_PARSE_CHECK_OP(x, <, y) +#define SPIEL_EFG_PARSE_CHECK_EQ(x, y) SPIEL_EFG_PARSE_CHECK_OP(x, ==, y) +#define SPIEL_EFG_PARSE_CHECK_NE(x, y) SPIEL_EFG_PARSE_CHECK_OP(x, !=, y) + +#define SPIEL_EFG_PARSE_CHECK_TRUE(x) \ + while (!(x)) \ + open_spiel::SpielFatalError(open_spiel::internal::SpielStrCat( \ + __FILE__, ":", __LINE__, " CHECK_TRUE(", #x, ")\n", \ + " while parsing line #", line_, ":\n", GetLine(line_))) + +#define SPIEL_EFG_PARSE_CHECK_FALSE(x) \ + while (x) \ + open_spiel::SpielFatalError(open_spiel::internal::SpielStrCat( \ + __FILE__, ":", __LINE__, " CHECK_FALSE(", #x, ")\n", \ + " while parsing line #", line_, ":\n", GetLine(line_))) + +bool EFGGame::ParseDoubleValue(const std::string& str, double* value) const { + if (absl::StrContains(str, '/')) { + // Check for rational number of the form X/Y + std::vector parts = absl::StrSplit(str, '/'); + SPIEL_EFG_PARSE_CHECK_EQ(parts.size(), 2); + int numerator = 0, denominator = 0; + bool success = absl::SimpleAtoi(parts[0], &numerator); + if (!success) { + return false; + } + success = absl::SimpleAtoi(parts[1], &denominator); + if (!success) { + return false; + } + SPIEL_EFG_PARSE_CHECK_FALSE(denominator == 0); + *value = static_cast(numerator) / denominator; + return true; + } else { + // Otherwise, parse as a double. + return absl::SimpleAtod(str, value); + } +} + + +std::string EFGGame::NextPayoffToken() { + std::string str = ""; + bool seen_comma = false; + + while (true) { + // Check stopping condition: + if (pos_ >= string_data_.length() || + string_data_.at(pos_) == ',' || + IsWhiteSpace(string_data_.at(pos_))) { + break; + } + + str.push_back(string_data_.at(pos_)); + AdvancePosition(); + } + + // Advance the position to the next token. + while (pos_ < string_data_.length()) { + if (!seen_comma && string_data_.at(pos_) == ',') { + seen_comma = true; + AdvancePosition(); + continue; + } + if (!IsWhiteSpace(string_data_.at(pos_))) { + break; + } + AdvancePosition(); + } + + return str; +} + +std::string EFGGame::NextToken() { + std::string str = ""; + bool reading_quoted_string = false; + + if (string_data_.at(pos_) == '"') { + reading_quoted_string = true; + AdvancePosition(); + } + + while (true) { + // Check stopping condition: + if (pos_ >= string_data_.length() || + (reading_quoted_string && string_data_.at(pos_) == '"') || + (!reading_quoted_string && IsWhiteSpace(string_data_.at(pos_)))) { + break; + } + + str.push_back(string_data_.at(pos_)); + AdvancePosition(); + } + + if (reading_quoted_string) { + SPIEL_EFG_PARSE_CHECK_EQ(string_data_.at(pos_), '"'); + } + AdvancePosition(); + + // Advance the position to the next token. + while (pos_ < string_data_.length() && IsWhiteSpace(string_data_.at(pos_))) { + AdvancePosition(); + } + + return str; +} + +void EFGGame::AdvancePosition() { + pos_++; + if (string_data_[pos_] == '\n') line_++; +} + +std::string EFGGame::GetLine(int line) const { + SPIEL_CHECK_GE(line, 1); + + int cur_line = 1; + int pos = 0; + int len = string_data_.size(); + std::string buf; + do { + if (cur_line == line) buf.push_back(string_data_[pos]); + if (string_data_[pos] == '\n') cur_line++; + pos++; + } while (cur_line != line + 1 && pos < len); + + return buf; +} + +/* +EFG 2 R "General Bayes game, one stage" { "Player 1" "Player 2" } +c "ROOT" 1 "(0,1)" { "1G" 0.500000 "1B" 0.500000 } 0 +c "" 2 "(0,2)" { "2g" 0.500000 "2b" 0.500000 } 0 +p "" 1 1 "(1,1)" { "H" "L" } 0 +p "" 2 1 "(2,1)" { "h" "l" } 0 +t "" 1 "Outcome 1" { 10.000000 2.000000 } +t "" 2 "Outcome 2" { 0.000000 10.000000 } +p "" 2 1 "(2,1)" { "h" "l" } 0 +t "" 3 "Outcome 3" { 2.000000 4.000000 } +t "" 4 "Outcome 4" { 4.000000 0.000000 } +p "" 1 1 "(1,1)" { "H" "L" } 0 +p "" 2 2 "(2,2)" { "h" "l" } 0 +t "" 5 "Outcome 5" { 10.000000 2.000000 } +t "" 6 "Outcome 6" { 0.000000 10.000000 } +p "" 2 2 "(2,2)" { "h" "l" } 0 +t "" 7 "Outcome 7" { 2.000000 4.000000 } +t "" 8 "Outcome 8" { 4.000000 0.000000 } +c "" 3 "(0,3)" { "2g" 0.500000 "2b" 0.500000 } 0 +p "" 1 2 "(1,2)" { "H" "L" } 0 +p "" 2 1 "(2,1)" { "h" "l" } 0 +t "" 9 "Outcome 9" { 4.000000 2.000000 } +t "" 10 "Outcome 10" { 2.000000 10.000000 } +p "" 2 1 "(2,1)" { "h" "l" } 0 +t "" 11 "Outcome 11" { 0.000000 4.000000 } +t "" 12 "Outcome 12" { 10.000000 2.000000 } +p "" 1 2 "(1,2)" { "H" "L" } 0 +p "" 2 2 "(2,2)" { "h" "l" } 0 +t "" 13 "Outcome 13" { 4.000000 2.000000 } +t "" 14 "Outcome 14" { 2.000000 10.000000 } +p "" 2 2 "(2,2)" { "h" "l" } 0 +t "" 15 "Outcome 15" { 0.000000 4.000000 } +t "" 16 "Outcome 16" { 10.000000 0.000000 } +*/ +void EFGGame::ParsePrologue() { + // Parse the first part of the header "EFG 2 R " + SPIEL_EFG_PARSE_CHECK_TRUE(NextToken() == "EFG"); + SPIEL_EFG_PARSE_CHECK_LT(pos_, string_data_.length()); + SPIEL_EFG_PARSE_CHECK_TRUE(NextToken() == "2"); + SPIEL_EFG_PARSE_CHECK_LT(pos_, string_data_.length()); + SPIEL_EFG_PARSE_CHECK_TRUE(NextToken() == "R"); + SPIEL_EFG_PARSE_CHECK_LT(pos_, string_data_.length()); + SPIEL_EFG_PARSE_CHECK_EQ(string_data_.at(pos_), '"'); + name_ = NextToken(); + std::string token = NextToken(); + SPIEL_EFG_PARSE_CHECK_TRUE(token == "{"); + SPIEL_EFG_PARSE_CHECK_EQ(string_data_.at(pos_), '"'); + token = NextToken(); + while (token != "}") { + player_names_.push_back(token); + token = NextToken(); + } + num_players_ = player_names_.size(); + infoset_num_to_states_count_.resize(num_players_, {}); + if (string_data_.at(pos_) == '"') { + description_ = NextToken(); + } + SPIEL_EFG_PARSE_CHECK_LT(pos_, string_data_.length()); + SPIEL_EFG_PARSE_CHECK_TRUE(IsNodeToken(string_data_.at(pos_))); +} + +void EFGGame::ParseChanceNode(Node* parent, Node* child, int depth) { + // a text string, giving the name of the node + // a positive integer specifying the information set number + // (optional) the name of the information set + // (optional) a list of actions at the information set with their + // corresponding probabilities + // a nonnegative integer specifying the outcome + // (optional)the payoffs to each player for the outcome + // + // c "ROOT" 1 "(0,1)" { "1G" 0.500000 "1B" 0.500000 } 0 + SPIEL_EFG_PARSE_CHECK_TRUE(NextToken() == "c"); + num_chance_nodes_++; + max_depth_ = std::max(max_depth_, depth); + child->type = NodeType::kChance; + child->parent = parent; + SPIEL_EFG_PARSE_CHECK_EQ(string_data_.at(pos_), '"'); + child->name = NextToken(); + SPIEL_EFG_PARSE_CHECK_FALSE(string_data_.at(pos_) == '"'); + SPIEL_EFG_PARSE_CHECK_TRUE( + absl::SimpleAtoi(NextToken(), &child->infoset_number)); + if (string_data_.at(pos_) == '"') { + child->infoset_name = NextToken(); + } + // I do not understand how the list of children can be optional. + SPIEL_EFG_PARSE_CHECK_TRUE(NextToken() == "{"); + int chance_outcomes = 0; + double prob_sum = 0.0; + while (string_data_.at(pos_) == '"') { + std::string action_str = NextToken(); + child->actions.push_back(action_str); + Action action = AddOrGetChanceOutcome(action_str); + child->action_ids.push_back(action); + double prob = -1; + SPIEL_EFG_PARSE_CHECK_TRUE(ParseDoubleValue(NextToken(), &prob)); + SPIEL_EFG_PARSE_CHECK_GE(prob, 0.0); + SPIEL_EFG_PARSE_CHECK_LE(prob, 1.0); + prob_sum += prob; + child->probs.push_back(prob); + nodes_.push_back(NewNode()); + child->children.push_back(nodes_.back().get()); + chance_outcomes++; + } + SPIEL_EFG_PARSE_CHECK_GT(child->actions.size(), 0); + absl::c_sort(child->action_ids); + SPIEL_EFG_PARSE_CHECK_TRUE(Near(prob_sum, 1.0)); + SPIEL_EFG_PARSE_CHECK_TRUE(NextToken() == "}"); + SPIEL_EFG_PARSE_CHECK_TRUE( + absl::SimpleAtoi(NextToken(), &child->outcome_number)); + // Do not support optional payoffs here for now. + + // Now, recurse: + for (Node* grand_child : child->children) { + RecParseSubtree(child, grand_child, depth + 1); + } +} + +void EFGGame::UpdateAndCheckInfosetMaps(const Node* node) { + // If the infoset name is not empty: + // 1. ensure mapping from infoset (player,num) -> name is consistent, adding + // it if it doesn't exist. + // 2. Add also name -> (player, num) to a hash map + Player player = node->player_number - 1; + if (!node->infoset_name.empty()) { + std::pair key = {player, node->infoset_number}; + const auto& iter1 = infoset_player_num_to_name_.find(key); + if (iter1 != infoset_player_num_to_name_.end()) { + if (iter1->second != node->infoset_name) { + SpielFatalError(absl::StrCat( + "Inconsistent infoset (player, num) -> name: ", + static_cast(player), ",", node->infoset_number, " ", + node->infoset_name, " ", iter1->second, "\nfilename: ", filename_, + "\nstring data:\n", string_data_)); + } + } else { + std::pair key = {player, node->infoset_number}; + infoset_player_num_to_name_[key] = node->infoset_name; + } + + // Name -> infoset number is not required to be unique in .efg so we don't + // check it. So these may overlap unless the mapping is unique in the file. + infoset_name_to_player_num_[node->infoset_name] = {player, + node->infoset_number}; + } +} + +void EFGGame::ParsePlayerNode(Node* parent, Node* child, int depth) { + // a text string, giving the name of the node + // a positive integer specifying the player who owns the node + // a positive integer specifying the information set + // (optional) the name of the information set + // (optional) a list of action names for the information set + // a nonnegative integer specifying the outcome + // (optional) the name of the outcome + // the payoffs to each player for the outcome + // + // p "" 1 1 "(1,1)" { "H" "L" } 0 + SPIEL_EFG_PARSE_CHECK_TRUE(NextToken() == "p"); + max_depth_ = std::max(max_depth_, depth); + child->type = NodeType::kPlayer; + child->parent = parent; + SPIEL_EFG_PARSE_CHECK_EQ(string_data_.at(pos_), '"'); + child->name = NextToken(); + SPIEL_EFG_PARSE_CHECK_FALSE(string_data_.at(pos_) == '"'); + SPIEL_EFG_PARSE_CHECK_TRUE( + absl::SimpleAtoi(NextToken(), &child->player_number)); + SPIEL_EFG_PARSE_CHECK_TRUE( + absl::SimpleAtoi(NextToken(), &child->infoset_number)); + infoset_num_to_states_count_[child->player_number - 1] + [child->infoset_number]++; + if (infoset_num_to_states_count_[child->player_number - 1] + [child->infoset_number] > 1) { + perfect_information_ = false; + } + child->infoset_name = ""; + if (string_data_.at(pos_) == '"') { + child->infoset_name = NextToken(); + } + UpdateAndCheckInfosetMaps(child); + // Do not understand how the list of actions can be optional. + SPIEL_EFG_PARSE_CHECK_TRUE(NextToken() == "{"); + int actions = 0; + while (string_data_.at(pos_) == '"') { + std::string action_str = NextToken(); + child->actions.push_back(action_str); + Action action = AddOrGetAction(action_str); + child->action_ids.push_back(action); + nodes_.push_back(NewNode()); + child->children.push_back(nodes_.back().get()); + actions++; + } + SPIEL_EFG_PARSE_CHECK_GT(child->actions.size(), 0); + absl::c_sort(child->action_ids); + max_actions_ = std::max(max_actions_, actions); + SPIEL_EFG_PARSE_CHECK_TRUE(NextToken() == "}"); + SPIEL_EFG_PARSE_CHECK_TRUE( + absl::SimpleAtoi(NextToken(), &child->outcome_number)); + // Do not support optional payoffs here for now. + + // Now, recurse: + for (Node* grand_child : child->children) { + RecParseSubtree(child, grand_child, depth + 1); + } +} + +void EFGGame::ParseTerminalNode(Node* parent, Node* child, int depth) { + // a text string, giving the name of the node + // a nonnegative integer specifying the outcome + // (optional) the name of the outcome + // the payoffs to each player for the outcome + // + // t "" 1 "Outcome 1" { 10.000000 2.000000 } + SPIEL_EFG_PARSE_CHECK_TRUE(NextToken() == "t"); + max_depth_ = std::max(max_depth_, depth); + child->type = NodeType::kTerminal; + child->parent = parent; + SPIEL_EFG_PARSE_CHECK_EQ(string_data_.at(pos_), '"'); + child->name = NextToken(); + SPIEL_EFG_PARSE_CHECK_TRUE( + absl::SimpleAtoi(NextToken(), &child->outcome_number)); + if (string_data_.at(pos_) == '"') { + child->outcome_name = NextToken(); + } + SPIEL_EFG_PARSE_CHECK_TRUE(NextToken() == "{"); + + int idx = 0; + double util_sum = 0; + bool identical = true; + while (string_data_.at(pos_) != '}') { + double utility = 0; + SPIEL_EFG_PARSE_CHECK_TRUE(ParseDoubleValue(NextPayoffToken(), &utility)); + child->payoffs.push_back(utility); + util_sum += utility; + if (!min_util_.has_value()) { + min_util_ = utility; + } + if (!max_util_.has_value()) { + max_util_ = utility; + } + min_util_ = std::min(min_util_.value(), utility); + max_util_ = std::max(max_util_.value(), utility); + + if (identical && idx >= 1 && + Near(child->payoffs[idx - 1], child->payoffs[idx])) { + identical = true; + } else { + identical = false; + } + + idx++; + } + SPIEL_EFG_PARSE_CHECK_EQ(child->payoffs.size(), num_players_); + SPIEL_EFG_PARSE_CHECK_TRUE(NextToken() == "}"); + + // Inspect the utilities to classify the utility type for this game. + if (!util_sum_.has_value()) { + util_sum_ = util_sum; + } + + if (constant_sum_ && Near(util_sum_.value(), util_sum)) { + constant_sum_ = true; + } else { + constant_sum_ = false; + } + + if (identical_payoffs_ && identical) { + identical_payoffs_ = true; + } else { + identical_payoffs_ = false; + } +} + +void EFGGame::RecParseSubtree(Node* parent, Node* child, int depth) { + switch (string_data_.at(pos_)) { + case 'c': + ParseChanceNode(parent, child, depth); + break; + case 'p': + ParsePlayerNode(parent, child, depth); + break; + case 't': + ParseTerminalNode(parent, child, depth); + break; + default: + SpielFatalError(absl::StrCat("Unexpected character at pos ", pos_, ": ", + string_data_.substr(pos_, 1))); + } +} + +std::string EFGGame::PrettyTree(const Node* node, + const std::string& indent) const { + std::string str = indent + NodeToString(node); + for (Node* child : node->children) { + str += PrettyTree(child, indent + " "); + } + return str; +} + +std::string EFGGame::GetInformationStateStringByName( + Player player, const std::string& name) const { + const auto& iter = infoset_name_to_player_num_.find(name); + if (iter == infoset_name_to_player_num_.end()) { + SpielFatalError(absl::StrCat("Information state not found: ", name)); + } + if (iter->second.first != player) { + SpielFatalError(absl::StrCat("Player mismatch in lookup by name: ", name, + " ", player, " ", iter->second.first)); + } + return EFGInformationStateString(player, player, iter->second.second, name); +} + +std::string EFGGame::GetInformationStateStringByNumber(Player player, + int number) const { + const auto& iter = infoset_player_num_to_name_.find({player, number}); + if (iter == infoset_player_num_to_name_.end()) { + SpielFatalError( + absl::StrCat("Information state not found: ", player, ",", number)); + } + return EFGInformationStateString(player, player, number, iter->second); +} + +void EFGGame::ParseGame() { + // Skip any initial whitespace. + while (pos_ < string_data_.length() && IsWhiteSpace(string_data_.at(pos_))) { + AdvancePosition(); + } + SPIEL_EFG_PARSE_CHECK_LT(pos_, string_data_.length()); + + ParsePrologue(); + nodes_.push_back(NewNode()); + RecParseSubtree(nullptr, nodes_[0].get(), 0); + SPIEL_EFG_PARSE_CHECK_GE(pos_, string_data_.length()); + + // Modify the game type. + if (num_chance_nodes_ > 0) { + game_type_.chance_mode = GameType::ChanceMode::kExplicitStochastic; + } + + if (perfect_information_) { + game_type_.information = GameType::Information::kPerfectInformation; + } else { + game_type_.information = GameType::Information::kImperfectInformation; + } + + if (constant_sum_ && Near(util_sum_.value(), 0.0)) { + game_type_.utility = GameType::Utility::kZeroSum; + } else if (constant_sum_) { + game_type_.utility = GameType::Utility::kConstantSum; + } else if (identical_payoffs_) { + game_type_.utility = GameType::Utility::kIdentical; + } else { + game_type_.utility = GameType::Utility::kGeneralSum; + } + + game_type_.max_num_players = num_players_; + game_type_.min_num_players = num_players_; + + // Check infoset number consistency. Currently they must start at 1 and end + // at n_i for each player i. The InformationStateTensor currently requires + // this to implement a proper info state tensor. + for (Player p = 0; p < num_players_; ++p) { + int max_value = 0; + for (const auto& number_and_count : infoset_num_to_states_count_[p]) { + SPIEL_CHECK_GE(number_and_count.first, 1); + SPIEL_CHECK_LE(number_and_count.first, + infoset_num_to_states_count_[p].size()); + max_value = std::max(max_value, number_and_count.first); + } + SPIEL_CHECK_EQ(max_value, infoset_num_to_states_count_[p].size()); + } +} + +TabularPolicy EFGGameTabularPolicy( + std::shared_ptr game, + const absl::flat_hash_map, + std::vector>>& + policy_map) { + const EFGGame* efg_game = dynamic_cast(game.get()); + SPIEL_CHECK_TRUE(efg_game != nullptr); + + TabularPolicy policy; + for (const auto& outer_iter : policy_map) { + Player player = outer_iter.first.first; + std::string infoset_label = outer_iter.first.second; + std::string infoset_str = + efg_game->GetInformationStateStringByName(player, infoset_label); + + ActionsAndProbs state_policy; + state_policy.reserve(outer_iter.second.size()); + for (const auto& inner_iter : outer_iter.second) { + std::string action_label = inner_iter.first; + double prob = inner_iter.second; + Action action = efg_game->GetAction(action_label); + state_policy.push_back({action, prob}); + } + + policy.SetStatePolicy(infoset_str, state_policy); + } + + return policy; +} + + +} // namespace efg_game +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/efg_game.h b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/efg_game.h new file mode 100644 index 0000000..147e47f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/efg_game.h @@ -0,0 +1,239 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_EFG_GAME_H_ +#define OPEN_SPIEL_GAMES_EFG_GAME_H_ + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// A parser for the .efg format used by Gambit: +// http://www.gambit-project.org/gambit14/formats.html +// +// Parameters: +// "filename" string name of a file containing the data +// +// Note: not the full EFG is supported as stated on that page. In particular: +// - Payoffs / outcomes at non-terminal nodes are not supported +// - Player nodes and chance nodes must each have one child +// + +namespace open_spiel { +namespace efg_game { + +enum class NodeType { + kChance, + kPlayer, + kTerminal, +}; + +// A node object that represent a subtree of the game. +struct Node { + Node* parent; + NodeType type; + int id; + std::string name; + int infoset_number; // Must starting at 1 for each player. + int player_number; // The EFG player numbers (starting at 1 rather than 0). + std::string infoset_name; + std::string outcome_name; + int outcome_number; + std::vector actions; + std::vector action_ids; + std::vector children; + std::vector probs; + std::vector payoffs; +}; + +// A function to load an EFG directly from string data. Note: games loaded +// using this function will not be serializable (nor will their states). Use +// the general LoadGame with the filename argument if serialization is required. +std::shared_ptr LoadEFGGame(const std::string& data); + +// Helper function to construct a tabular policy explicitly. The game must be +// an EFG game. The map uses is +// (player, information ste strings) -> vector of (action string, prob), e.g.: +// { {{0, "infoset1"}, {{"actionA, prob1"}, {"actionB", prob2}}}, +// {{1, "infoset2"}, {{"actionC, prob1"}, {"actionD", prob2}}} } +TabularPolicy EFGGameTabularPolicy( + std::shared_ptr game, + const absl::flat_hash_map, + std::vector>>& + policy_map); + +class EFGState : public State { + public: + explicit EFGState(std::shared_ptr game, const Node* root); + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action action) override; + std::vector LegalActions() const override; + std::vector> ChanceOutcomes() const override; + + protected: + void DoApplyAction(Action action) override; + + private: + int ActionIdx(Action action) const; + const Node* cur_node_; +}; + +class EFGGame : public Game { + public: + explicit EFGGame(const GameParameters& params); + explicit EFGGame(const std::string& data); + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new EFGState(shared_from_this(), nodes_[0].get())); + } + + int MaxChanceOutcomes() const override; + int NumDistinctActions() const override; + int NumPlayers() const override; + double MinUtility() const override; + absl::optional UtilitySum() const override; + double MaxUtility() const override; + int MaxGameLength() const override; + int MaxChanceNodesInHistory() const override; + int MaxMoveNumber() const override; + int MaxHistoryLength() const override; + std::vector InformationStateTensorShape() const override; + + // Gets the player / decision node action associated to this label. + Action GetAction(const std::string& label) const { + auto iter = action_ids_.find(label); + SPIEL_CHECK_TRUE(iter != action_ids_.end()); + return iter->second; + } + + // Gets the chance node action associated to this label. + Action GetChanceAction(const std::string& label) const { + auto iter = chance_action_ids_.find(label); + SPIEL_CHECK_TRUE(iter != chance_action_ids_.end()); + return iter->second; + } + + Action AddOrGetAction(const std::string& label) { + auto iter = action_ids_.find(label); + if (iter != action_ids_.end()) { + return iter->second; + } + Action new_action = action_ids_.size(); + action_ids_[label] = new_action; + return new_action; + } + + Action AddOrGetChanceOutcome(const std::string& label) { + auto iter = chance_action_ids_.find(label); + if (iter != chance_action_ids_.end()) { + return iter->second; + } + Action new_action = chance_action_ids_.size(); + chance_action_ids_[label] = new_action; + return new_action; + } + + // Get the information state strings by names or numbers. + // Note: since the names of the information sets are not required to be + // unique, if the same name is used for different infoset numbers in the file + // then the information set number may not be the correct one. Only use + // GetInformationStateStringByName if the names are unique and there is a + // one-to-one correspondence with infoset numbers! + std::string GetInformationStateStringByName(Player player, + const std::string& name) const; + std::string GetInformationStateStringByNumber(Player player, + int number) const; + + // Return the number of information states for the specified player. + int NumInfoStates(Player player) const { + return infoset_num_to_states_count_[player].size(); + } + + private: + std::unique_ptr NewNode() const; + void ParseGame(); + void ParsePrologue(); + std::string NextPayoffToken(); + std::string NextToken(); + void AdvancePosition(); + std::string GetLine(int line) const; + bool ParseDoubleValue(const std::string& str, double* value) const; + bool IsWhiteSpace(char c) const; + bool IsNodeToken(char c) const; + void UpdateAndCheckInfosetMaps(const Node* node); + void ParseChanceNode(Node* parent, Node* child, int depth); + void ParsePlayerNode(Node* parent, Node* child, int depth); + void ParseTerminalNode(Node* parent, Node* child, int depth); + void RecParseSubtree(Node* parent, Node* child, int depth); + std::string PrettyTree(const Node* node, const std::string& indent) const; + + std::string filename_; + std::string string_data_; + int pos_; + int line_ = 1; + std::vector> nodes_; + std::string name_; + std::string description_; + std::vector player_names_; + int num_chance_nodes_; + int num_players_; + int max_actions_; + int max_depth_; + absl::optional util_sum_; + absl::optional max_util_; + absl::optional min_util_; + bool constant_sum_; + bool identical_payoffs_; + bool general_sum_; + bool perfect_information_; + + // Maintains a map of infoset number -> count of states in the infoset + // (one for each player). + std::vector> infoset_num_to_states_count_; + + // Maintains a (player, infoset number) -> infoset name mapping and vice + // versa, for retrieval of information set strings externally + // (GetInformationStateStringByName and GetInformationStateStringByNumber). + absl::flat_hash_map, std::string> + infoset_player_num_to_name_; + absl::flat_hash_map> + infoset_name_to_player_num_; + + // Action label -> action id mapping. Note that chance actions are excluded. + absl::flat_hash_map action_ids_; + + // Outcome label -> action id mapping for chance nodes. + absl::flat_hash_map chance_action_ids_; +}; + +} // namespace efg_game +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_EFG_GAME_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/efg_game_data.cc b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/efg_game_data.cc new file mode 100644 index 0000000..44c78ed --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/efg_game_data.cc @@ -0,0 +1,155 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/efg_game/efg_game_data.h" + +namespace open_spiel { +namespace efg_game { + +// A copy of games/efg/kuhn_poker.efg useful to use for tests. +const char* kKuhnEFGData = R"###( +EFG 2 R "Kuhn poker" { "Player 1" "Player 2" } "A simplified poker game: https://en.wikipedia.org/wiki/Kuhn_poker" + +c "ROOT" 1 "c1" { "1" 1/3 "0" 1/3 "2" 1/3 } 0 + c "c2" 2 "c2" { "2" 1/2 "0" 1/2 } 0 + p "" 1 1 "1" { "p" "b" } 0 + p "" 2 2 "2p" { "p" "b" } 0 + t "" 3 "Outcome 12pp" { -1.0 1.0 } + p "" 1 2 "1pb" { "p" "b" } 0 + t "" 4 "Outcome 12pbp" { -1.0 1.0 } + t "" 5 "Outcome 12pbb" { -2.0 2.0 } + p "" 2 1 "2b" { "p" "b" } 0 + t "" 1 "Outcome 12bp" { 1.0 -1.0 } + t "" 2 "Outcome 12bb" { -2.0 2.0 } + p "" 1 1 "1" { "p" "b" } 0 + p "" 2 3 "0p" { "p" "b" } 0 + t "" 8 "Outcome 10pp" { 1.0 -1.0 } + p "" 1 2 "1pb" { "p" "b" } 0 + t "" 6 "Outcome 10pbp" { -1.0 1.0 } + t "" 7 "Outcome 10pbb" { 2.0 -2.0 } + p "" 2 4 "0b" { "p" "b" } 0 + t "" 9 "Outcome 10bp" { 1.0 -1.0 } + t "" 10 "Outcome 10bb" { 2.0 -2.0 } + c "c3" 3 "c3" { "2" 1/2 "1" 1/2 } 0 + p "" 1 3 "0" { "p" "b" } 0 + p "" 2 2 "2p" { "p" "b" } 0 + t "" 13 "Outcome 02pp" { -1.0 1.0 } + p "" 1 4 "0pb" { "p" "b" } 0 + t "" 14 "Outcome 02pbp" { -1.0 1.0 } + t "" 15 "Outcome 02pbb" { -2.0 2.0 } + p "" 2 1 "2b" { "p" "b" } 0 + t "" 11 "Outcome 02bp" { 1.0 -1.0 } + t "" 12 "Outcome 02bb" { -2.0 2.0 } + p "" 1 3 "0" { "p" "b" } 0 + p "" 2 5 "1p" { "p" "b" } 0 + t "" 18 "Outcome 01pp" { -1.0 1.0 } + p "" 1 4 "0pb" { "p" "b" } 0 + t "" 16 "Outcome 01pbp" { -1.0 1.0 } + t "" 17 "Outcome 01pbb" { -2.0 2.0 } + p "" 2 6 "1b" { "p" "b" } 0 + t "" 19 "Outcome 01bp" { 1.0 -1.0 } + t "" 20 "Outcome 01bb" { -2.0 2.0 } + c "c4" 4 "c4" { "0" 1/2 "1" 1/2 } 0 + p "" 1 5 "2" { "p" "b" } 0 + p "" 2 3 "0p" { "p" "b" } 0 + t "" 21 "Outcome 20pp" { 1.0 -1.0 } + p "" 1 6 "2pb" { "p" "b" } 0 + t "" 22 "Outcome 20pbp" { -1.0 1.0 } + t "" 23 "Outcome 20pbb" { 2.0 -2.0 } + p "" 2 4 "0b" { "p" "b" } 0 + t "" 24 "Outcome 20bp" { 1.0 -1.0 } + t "" 25 "Outcome 20bb" { 2.0 -2.0 } + p "" 1 5 "2" { "p" "b" } 0 + p "" 2 5 "1p" { "p" "b" } 0 + t "" 28 "Outcome 21pp" { 1.0 -1.0 } + p "" 1 6 "2pb" { "p" "b" } 0 + t "" 26 "Outcome 21pbp" { -1.0 1.0 } + t "" 27 "Outcome 21pbb" { 2.0 -2.0 } + p "" 2 6 "1b" { "p" "b" } 0 + t "" 29 "Outcome 21bp" { 1.0 -1.0 } + t "" 30 "Outcome 21bb" { 2.0 -2.0 } +)###"; + +// A copy of games/efg/sample.efg useful to use within tests. +const char* kSampleEFGData = R"###( +EFG 2 R "General Bayes game, one stage" { "Player 1" "Player 2" } +c "ROOT" 1 "(0,1)" { "1G" 0.500000 "1B" 0.500000 } 0 +c "" 2 "(0,2)" { "2g" 0.500000 "2b" 0.500000 } 0 +p "" 1 1 "(1,1)" { "H" "L" } 0 +p "" 2 1 "(2,1)" { "h" "l" } 0 +t "" 1 "Outcome 1" { 10.000000 2.000000 } +t "" 2 "Outcome 2" { 0.000000 10.000000 } +p "" 2 1 "(2,1)" { "h" "l" } 0 +t "" 3 "Outcome 3" { 2.000000 4.000000 } +t "" 4 "Outcome 4" { 4.000000 0.000000 } +p "" 1 1 "(1,1)" { "H" "L" } 0 +p "" 2 2 "(2,2)" { "h" "l" } 0 +t "" 5 "Outcome 5" { 10.000000 2.000000 } +t "" 6 "Outcome 6" { 0.000000 10.000000 } +p "" 2 2 "(2,2)" { "h" "l" } 0 +t "" 7 "Outcome 7" { 2.000000 4.000000 } +t "" 8 "Outcome 8" { 4.000000 0.000000 } +c "" 3 "(0,3)" { "2g" 0.500000 "2b" 0.500000 } 0 +p "" 1 2 "(1,2)" { "H" "L" } 0 +p "" 2 1 "(2,1)" { "h" "l" } 0 +t "" 9 "Outcome 9" { 4.000000 2.000000 } +t "" 10 "Outcome 10" { 2.000000 10.000000 } +p "" 2 1 "(2,1)" { "h" "l" } 0 +t "" 11 "Outcome 11" { 0.000000 4.000000 } +t "" 12 "Outcome 12" { 10.000000 2.000000 } +p "" 1 2 "(1,2)" { "H" "L" } 0 +p "" 2 2 "(2,2)" { "h" "l" } 0 +t "" 13 "Outcome 13" { 4.000000 2.000000 } +t "" 14 "Outcome 14" { 2.000000 10.000000 } +p "" 2 2 "(2,2)" { "h" "l" } 0 +t "" 15 "Outcome 15" { 0.000000 4.000000 } +t "" 16 "Outcome 16" { 10.000000 0.000000 } +)###"; + +const char* kSignalingEFGData = R"###( +EFG 2 R "Signaling game from Fig 1 of von Stengel and Forges 2008" { "Player 1" "Player 2" } "See Fig 1 of Extensive-Form Correlated Equilibrium: +Definition and Computational Complexity" + +c "ROOT" 1 "c1" { "g" 1/2 "b" 1/2 } 0 + p "G" 1 1 "G" { "X_G" "Y_G" } 0 + p "G X_G" 2 1 "X" { "l_X" "r_X" } 0 + t "G X_G l_X" 1 "Outcome G X_G l_X" { 4.0 10.0 } + t "G X_G r_X" 2 "Outcome G X_G r_X" { 0.0 6.0 } + p "G Y_G" 2 2 "Y" { "l_Y" "r_Y" } 0 + t "G Y_G l_Y" 3 "Outcome G Y_G l_Y" { 4.0 10.0 } + t "G Y_G r_Y" 4 "Outcome G Y_G r_Y" { 0.0 6.0 } + p "B" 1 2 "B" { "X_B" "Y_B" } 0 + p "B X_B" 2 1 "X" { "l_X" "r_X" } 0 + t "B X_B l_X" 5 "Outcome B X_B l_X" { 6.0 0.0 } + t "B X_B r_X" 6 "Outcome B X_B r_X" { 0.0 6.0 } + p "B Y_B" 2 2 "Y" { "l_Y" "r_Y" } 0 + t "B Y_B l_Y" 7 "Outcome B Y_B l_Y" { 6.0 0.0 } + t "B Y_B r_Y" 8 "Outcome B Y_B r_Y" { 0.0 6.0 } +)###"; + +const char* kSimpleForkEFGData = R"###( +EFG 2 R "Simple single-agent problem" { "Player 1" } "" + +p "ROOT" 1 1 "ROOT" { "L" "R" } 0 + t "L" 1 "Outcome L" { -1.0 } + t "R" 2 "Outcome R" { 1.0 } +)###"; + +std::string GetSampleEFGData() { return std::string(kSampleEFGData); } +std::string GetKuhnPokerEFGData() { return std::string(kKuhnEFGData); } +std::string GetSignalingEFGData() { return std::string(kSignalingEFGData); } +std::string GetSimpleForkEFGData() { return std::string(kSimpleForkEFGData); } + +} // namespace efg_game +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/efg_game_data.h b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/efg_game_data.h new file mode 100644 index 0000000..81b3623 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/efg_game_data.h @@ -0,0 +1,37 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_EFG_GAME_DATA_H_ +#define OPEN_SPIEL_GAMES_EFG_GAME_DATA_H_ + +#include + +#include "open_spiel/games/efg_game/efg_game.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace efg_game { + +// A few example games used in the tests. These are identical to the contents +// of the files in efg/ but do not need to be loadable from a specific path +// when running tests. +std::string GetSampleEFGData(); +std::string GetKuhnPokerEFGData(); +std::string GetSignalingEFGData(); +std::string GetSimpleForkEFGData(); + +} // namespace efg_game +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_EFG_GAME_DATA_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/efg_game_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/efg_game_test.cc new file mode 100644 index 0000000..6951e26 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/efg_game_test.cc @@ -0,0 +1,220 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/efg_game/efg_game.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/games/efg_game/efg_game_data.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" +#include "open_spiel/utils/init.h" + +namespace open_spiel { +namespace efg_game { +namespace { + +namespace testing = open_spiel::testing; + +#define EFG_PATH_PREFIX "open_spiel/games/efg_game/games/" +// Sample game from Gambit +const char* kCommasFilename = EFG_PATH_PREFIX "commas.efg"; +const char* kSampleFilename = EFG_PATH_PREFIX "sample.efg"; +const char* kKuhnFilename = EFG_PATH_PREFIX "kuhn_poker.efg"; +const char* kLeducFilename = EFG_PATH_PREFIX "leduc_poker.efg"; +const char* kSignalingFilename = EFG_PATH_PREFIX + "signaling_vonstengel_forges_2008.efg"; + +// Example games from Morrill et al. +// "Hindsight and Sequential Rationality of Correlated Play" +const char* kExtendedBosFilename = EFG_PATH_PREFIX "extended_bos.efg"; +const char* kExtendedMPFilename = EFG_PATH_PREFIX "extended_mp.efg"; +const char* kExtendedShapleysFilename = EFG_PATH_PREFIX "extended_shapleys.efg"; + +void EFGGameSimTestsSampleFromData() { + std::shared_ptr game = LoadEFGGame(GetSampleEFGData()); + SPIEL_CHECK_TRUE(game != nullptr); + + // EFG games loaded directly via string cannot be properly deserialized + // because there is no way to pass the data back vai the game string. + testing::RandomSimTestNoSerialize(*game, 100); +} + +void EFGGameSimTestsKuhnFromData() { + std::shared_ptr game = LoadEFGGame(GetKuhnPokerEFGData()); + SPIEL_CHECK_TRUE(game != nullptr); + GameType type = game->GetType(); + SPIEL_CHECK_EQ(type.dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(type.information, + GameType::Information::kImperfectInformation); + SPIEL_CHECK_EQ(type.utility, GameType::Utility::kZeroSum); + SPIEL_CHECK_EQ(type.chance_mode, GameType::ChanceMode::kExplicitStochastic); + SPIEL_CHECK_EQ(game->NumDistinctActions(), 2); + SPIEL_CHECK_EQ(game->MaxChanceOutcomes(), 3); + + // EFG games loaded directly via string cannot be properly deserialized + // because there is no way to pass the data back vai the game string. + testing::RandomSimTestNoSerialize(*game, 100); +} + +void EFGGameSimTestsSignalingFromData() { + std::shared_ptr game = LoadEFGGame(GetSignalingEFGData()); + SPIEL_CHECK_TRUE(game != nullptr); + GameType type = game->GetType(); + SPIEL_CHECK_EQ(type.dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(type.information, + GameType::Information::kImperfectInformation); + SPIEL_CHECK_EQ(type.utility, GameType::Utility::kGeneralSum); + SPIEL_CHECK_EQ(type.chance_mode, GameType::ChanceMode::kExplicitStochastic); + SPIEL_CHECK_EQ(game->NumDistinctActions(), 8); + SPIEL_CHECK_EQ(game->MaxChanceOutcomes(), 2); + testing::RandomSimTestNoSerialize(*game, 100); +} + +void EFGGameSimpleForkFromData() { + std::shared_ptr game = LoadEFGGame(GetSimpleForkEFGData()); + SPIEL_CHECK_TRUE(game != nullptr); + + // EFG games loaded directly via string cannot be properly deserialized + // because there is no way to pass the data back vai the game string. + testing::RandomSimTestNoSerialize(*game, 100); +} + +void EFGGameCommasFromFile() { + absl::optional file = FindFile(kCommasFilename, 2); + if (file != absl::nullopt) { + std::cout << "Found file: " << file.value() << "; running sim test."; + std::shared_ptr game = + LoadGame("efg_game", {{"filename", GameParameter(file.value())}}); + SPIEL_CHECK_TRUE(game != nullptr); + GameType type = game->GetType(); + SPIEL_CHECK_EQ(type.dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(type.information, + GameType::Information::kImperfectInformation); + SPIEL_CHECK_EQ(type.utility, GameType::Utility::kGeneralSum); + SPIEL_CHECK_EQ(type.chance_mode, GameType::ChanceMode::kExplicitStochastic); + SPIEL_CHECK_EQ(game->NumDistinctActions(), 4); + SPIEL_CHECK_EQ(game->NumPlayers(), 2); + } +} + +void EFGGameSimTestsSampleFromFile() { + absl::optional file = FindFile(kSampleFilename, 2); + if (file != absl::nullopt) { + std::cout << "Found file: " << file.value() << "; running sim test."; + std::shared_ptr game = + LoadGame("efg_game", {{"filename", GameParameter(file.value())}}); + SPIEL_CHECK_TRUE(game != nullptr); + testing::RandomSimTest(*game, 100); + } +} + +void EFGGameSimTestsKuhnFromFile() { + absl::optional file = FindFile(kKuhnFilename, 2); + if (file != absl::nullopt) { + std::cout << "Found file: " << file.value() << "; running sim test."; + std::shared_ptr game = + LoadGame("efg_game", {{"filename", GameParameter(file.value())}}); + SPIEL_CHECK_TRUE(game != nullptr); + GameType type = game->GetType(); + SPIEL_CHECK_EQ(type.dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(type.information, + GameType::Information::kImperfectInformation); + SPIEL_CHECK_EQ(type.utility, GameType::Utility::kZeroSum); + SPIEL_CHECK_EQ(type.chance_mode, GameType::ChanceMode::kExplicitStochastic); + SPIEL_CHECK_EQ(game->NumDistinctActions(), 2); + SPIEL_CHECK_EQ(game->MaxChanceOutcomes(), 3); + testing::RandomSimTest(*game, 100); + } +} + +void EFGGameSimTestsLeducFromFile() { + absl::optional file = FindFile(kLeducFilename, 2); + if (file != absl::nullopt) { + std::cout << "Found file: " << file.value() << "; running sim test."; + std::shared_ptr game = + LoadGame("efg_game", {{"filename", GameParameter(file.value())}}); + SPIEL_CHECK_TRUE(game != nullptr); + GameType type = game->GetType(); + SPIEL_CHECK_EQ(type.dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(type.information, + GameType::Information::kImperfectInformation); + SPIEL_CHECK_EQ(type.utility, GameType::Utility::kZeroSum); + SPIEL_CHECK_EQ(type.chance_mode, GameType::ChanceMode::kExplicitStochastic); + SPIEL_CHECK_EQ(game->NumDistinctActions(), 3); + SPIEL_CHECK_EQ(game->MaxChanceOutcomes(), 24); + testing::RandomSimTest(*game, 100); + } +} + +void EFGGameSimTestsSignalingFromFile() { + absl::optional file = FindFile(kSignalingFilename, 2); + if (file != absl::nullopt) { + std::cout << "Found file: " << file.value() << "; running sim test."; + std::shared_ptr game = + LoadGame("efg_game", {{"filename", GameParameter(file.value())}}); + SPIEL_CHECK_TRUE(game != nullptr); + GameType type = game->GetType(); + SPIEL_CHECK_EQ(type.dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(type.information, + GameType::Information::kImperfectInformation); + SPIEL_CHECK_EQ(type.utility, GameType::Utility::kGeneralSum); + SPIEL_CHECK_EQ(type.chance_mode, GameType::ChanceMode::kExplicitStochastic); + SPIEL_CHECK_EQ(game->NumDistinctActions(), 8); + SPIEL_CHECK_EQ(game->MaxChanceOutcomes(), 2); + testing::RandomSimTest(*game, 100); + } +} + +void EFGGameSimTestsExtendedFromFile() { + for (const char* filename : { kExtendedBosFilename, + kExtendedMPFilename, + kExtendedShapleysFilename}) { + absl::optional file = FindFile(filename, 2); + if (file != absl::nullopt) { + std::cout << "Found file: " << file.value() << "; running sim test."; + std::shared_ptr game = + LoadGame("efg_game", {{"filename", GameParameter(file.value())}}); + SPIEL_CHECK_TRUE(game != nullptr); + GameType type = game->GetType(); + SPIEL_CHECK_EQ(type.dynamics, GameType::Dynamics::kSequential); + SPIEL_CHECK_EQ(type.information, + GameType::Information::kImperfectInformation); + SPIEL_CHECK_EQ(type.chance_mode, + GameType::ChanceMode::kDeterministic); + testing::RandomSimTest(*game, 1); + } + } +} + +} // namespace +} // namespace efg_game +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::Init("", &argc, &argv, true); + open_spiel::efg_game::EFGGameSimTestsSampleFromData(); + open_spiel::efg_game::EFGGameSimTestsKuhnFromData(); + open_spiel::efg_game::EFGGameCommasFromFile(); + open_spiel::efg_game::EFGGameSimTestsSampleFromFile(); + open_spiel::efg_game::EFGGameSimTestsKuhnFromFile(); + open_spiel::efg_game::EFGGameSimTestsLeducFromFile(); + open_spiel::efg_game::EFGGameSimTestsSignalingFromData(); + open_spiel::efg_game::EFGGameSimTestsSignalingFromFile(); + open_spiel::efg_game::EFGGameSimTestsExtendedFromFile(); + open_spiel::efg_game::EFGGameSimpleForkFromData(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/README.md b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/README.md new file mode 100644 index 0000000..4aec882 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/README.md @@ -0,0 +1,9 @@ +This directory contains files that are in the +[Gambit](http://www.gambit-project.org/) +[extensive-form game (.efg) format](http://www.gambit-project.org/gambit14/formats.html). + +To load them, use game string `efg_game(filename=)`. +The parser is found in [efg_game.h](https://github.com/deepmind/open_spiel/blob/master/open_spiel/games/efg_game.h). + +To export existing games in the library into gambit format, you can use python +script `python/examples/gambit_example.py` diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/commas.efg b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/commas.efg new file mode 100644 index 0000000..01e665e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/commas.efg @@ -0,0 +1,18 @@ +EFG 2 R "commas" { "p1" "p2" } +"test different allowed commas in payoffs" + +p "" 1 1 "" { "A" "B" } 0 +c "" 1 "" { "s" 99/100 "c" 1/100 } 0 +p "" 2 1 "" { "S" "C" } 0 +t "" 1 "SS" { 5, 2, } +t "" 2 "SC" { 3 1 } +p "" 2 2 "" { "S" "C" } 0 +t "" 1 "SS" { 5 2, } +t "" 2 "SC" { 3, 1 } +c "" 2 "" { "s" 1/100 "c" 99/100 } 0 +p "" 2 1 "" { "S" "C" } 0 +t "" 3 "CS" { 6, 3, } +t "" 4 "CC" { 4, 4 } +p "" 2 2 "" { "S" "C" } 0 +t "" 3 "CS" { 6, 3 } +t "" 4 "CC" { 4, 4 } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/extended_bos.efg b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/extended_bos.efg new file mode 100644 index 0000000..c6ba41e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/extended_bos.efg @@ -0,0 +1,17 @@ +EFG 2 R "Modified Battle-of-the-Sexes Game in Morrill et al., Hindsight and Sequential Rationality of Correlated Play" { "Player 1" "Player 2" } "" + +p "ROOT" 1 1 "Root infoset" { "U" "N" } 0 + p "U" 1 2 "P1 U" { "X" "Y" } 0 + p "UX" 2 1 "P2 Infoset" { "X" "Y" } 0 + t "UXX" 1 "Outcome UXX" { 2.0 3.0 } + t "UXY" 2 "Outcome UXY" { 0.0 0.0 } + p "UY" 2 1 "P2 Infoset" { "X" "Y" } 0 + t "UYX" 3 "Outcome UYX" { 0.0 0.0 } + t "UYY" 4 "Outcome UYY" { 3.0 2.0 } + p "N" 1 3 "P1 N" { "X" "Y" } 0 + p "NX" 2 1 "P2 Infoset" { "X" "Y" } 0 + t "NXX" 5 "Outcome NXX" { 1.0 2.0 } + t "NXY" 6 "Outcome NXY" { 0.0 0.0 } + p "NY" 2 1 "P2 Infoset" { "X" "Y" } 0 + t "NYX" 7 "Outcome NYX" { 0.0 0.0 } + t "NYY" 8 "Outcome NYY" { 2.0 1.0 } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/extended_mp.efg b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/extended_mp.efg new file mode 100644 index 0000000..9802fb9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/extended_mp.efg @@ -0,0 +1,17 @@ +EFG 2 R "Modified Matching Pennies Game in Morrill et al., Hindsight and Sequential Rationality of Correlated Play" { "Player 1" "Player 2" } "" + +p "ROOT" 1 1 "Root infoset" { "M" "N" } 0 + p "M" 1 2 "P1 M" { "H" "T" } 0 + p "MH" 2 1 "P2 Infoset" { "H" "T" } 0 + t "MHH" 1 "Outcome MHH" { 1.0 -1.0 } + t "MHT" 2 "Outcome UXY" { -1.0 1.0 } + p "MT" 2 1 "P2 Infoset" { "H" "T" } 0 + t "MTH" 3 "Outcome MTH" { -1.0 1.0 } + t "MTT" 4 "Outcome MTT" { 1.0 -1.0 } + p "N" 1 3 "P1 N" { "H" "T" } 0 + p "NH" 2 1 "P2 Infoset" { "H" "T" } 0 + t "NHH" 5 "Outcome NHH" { -1.0 1.0 } + t "NHT" 6 "Outcome NHT" { 1.0 -1.0 } + p "NT" 2 1 "P2 Infoset" { "H" "T" } 0 + t "NTH" 7 "Outcome NTH" { 1.0 -1.0 } + t "NTT" 8 "Outcome NTT" { -1.0 1.0 } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/extended_shapleys.efg b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/extended_shapleys.efg new file mode 100644 index 0000000..4243f31 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/extended_shapleys.efg @@ -0,0 +1,30 @@ +EFG 2 R "Modified Shapley's Game in Morrill et al., Hindsight and Sequential Rationality of Correlated Play" { "Player 1" "Player 2" } "" + +p "ROOT" 1 1 "Root infoset" { "R" "P" "S" } 0 + p "R" 1 2 "P1 R" { "PR" "PNR" } 0 + p "R PR" 2 1 "P2 Infoset" { "r" "p" "s" } 0 + t "R PR r" 1 "Outcome R PR r" { 0.003 0.0 } + t "R PR p" 2 "Outcome R PR p" { 0.0 1.0 } + t "R PR s" 3 "Outcome R PR s" { 1.0 0.0 } + p "R PNR" 2 1 "P2 Infoset" { "r" "p" "s" } 0 + t "R PNR r" 4 "Outcome R PNR r" { 0.0 0.0 } + t "R PNR p" 5 "Outcome R PNR p" { 0.001 1.0 } + t "R PNR s" 6 "Outcome R PNR s" { 1.001 0.0 } + p "P" 1 3 "P1 P" { "PR" "PNR" } 0 + p "P PR" 2 1 "P2 Infoset" { "r" "p" "s" } 0 + t "P PR r" 7 "Outcome P PR r" { 1.003 0.0 } + t "P PR p" 8 "Outcome P PR p" { 0.0 0.0 } + t "P PR s" 9 "Outcome P PR s" { 0.0 1.0 } + p "P PNR" 2 1 "P2 Infoset" { "r" "p" "s" } 0 + t "P PNR r" 10 "Outcome P PNR r" { 1.0 0.0 } + t "P PNR p" 11 "Outcome P PNR p" { 0.001 0.0 } + t "P PNR s" 12 "Outcome P PNR s" { 0.001 1.0 } + p "S" 1 4 "P1 S" { "PR" "PNR" } 0 + p "S PR" 2 1 "P2 Infoset" { "r" "p" "s" } 0 + t "S PR r" 13 "Outcome S PR r" { 0.003 1.0 } + t "S PR p" 14 "Outcome S PR p" { 1.0 0.0 } + t "S PR s" 15 "Outcome S PR s" { 0.0 0.0 } + p "S PNR" 2 1 "P2 Infoset" { "r" "p" "s" } 0 + t "S PNR r" 16 "Outcome S PNR r" { 0.0 1.0 } + t "S PNR p" 17 "Outcome S PNR p" { 1.001 0.0 } + t "S PNR s" 18 "Outcome S PNR s" { 0.001 0.0 } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/greenwald_sarfati_example1.efg b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/greenwald_sarfati_example1.efg new file mode 100644 index 0000000..03a7ad0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/greenwald_sarfati_example1.efg @@ -0,0 +1,17 @@ +EFG 2 R "Greenwald & Sarfati Example 1" { "Player 1" "Player 2" } "Example AFCE / AFCCE that is not an EFCE nor EFCCE" + +p "ROOT" 1 1 "Root infoset" { "L" "R" } 0 + p "L" 2 1 "P2 infoset" { "A" "B" } 0 + p "LA" 1 2 "Left P1 infoset" { "l1" "r1" } 0 + t "LAl" 1 "Outcome LAl" { 2.0 2.0 } + t "LAr" 2 "Outcome LAr" { 3.0 1.0 } + p "LB" 1 2 "Left P1 infoset" { "l1" "r1" } 0 + t "LBl" 3 "Outcome LBl" { 2.0 2.0 } + t "LBr" 4 "Outcome LBr" { 0.0 2.0 } + p "R" 2 1 "P2 infoset" { "A" "B" } 0 + p "RA" 1 3 "Right P1 infoset" { "l2" "r2" } 0 + t "RAl" 5 "Outcome RAl" { 0.0 0.0 } + t "RAr" 6 "Outcome RAr" { 3.0 0.0 } + p "RB" 1 3 "Right P1 infoset" { "l2" "r2" } 0 + t "RBl" 7 "Outcome RBl" { 0.0 0.0 } + t "RBr" 8 "Outcome RBr" { 3.0 0.0 } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/greenwald_sarfati_example2.efg b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/greenwald_sarfati_example2.efg new file mode 100644 index 0000000..926d359 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/greenwald_sarfati_example2.efg @@ -0,0 +1,17 @@ +EFG 2 R "Greenwald & Sarfati Example 1" { "Player 1" "Player 2" } "Example EFCE but not CE" + +p "ROOT" 1 1 "Root infoset" { "L" "R" } 0 + p "L" 2 1 "P2 infoset" { "A" "B" } 0 + p "LA" 1 2 "Left P1 infoset" { "l1" "r1" } 0 + t "LAl" 1 "Outcome LAl" { 2.0 2.0 } + t "LAr" 2 "Outcome LAr" { 0.0 2.0 } + p "LB" 1 2 "Left P1 infoset" { "l1" "r1" } 0 + t "LBl" 3 "Outcome LBl" { 2.0 2.0 } + t "LBr" 4 "Outcome LBr" { 2.0 2.0 } + p "R" 2 1 "P2 infoset" { "A" "B" } 0 + p "RA" 1 3 "Right P1 infoset" { "l2" "r2" } 0 + t "RAl" 5 "Outcome RAl" { 0.0 0.0 } + t "RAr" 6 "Outcome RAr" { 0.0 0.0 } + p "RB" 1 3 "Right P1 infoset" { "l2" "r2" } 0 + t "RBl" 7 "Outcome RBl" { 0.0 0.0 } + t "RBr" 8 "Outcome RBr" { 3.0 0.0 } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/kuhn_poker.efg b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/kuhn_poker.efg new file mode 100644 index 0000000..79bfd19 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/kuhn_poker.efg @@ -0,0 +1,60 @@ +EFG 2 R "Kuhn poker" { "Player 1" "Player 2" } "A simplified poker game: https://en.wikipedia.org/wiki/Kuhn_poker" + +c "ROOT" 1 "c1" { "1" 1/3 "0" 1/3 "2" 1/3 } 0 + c "c2" 2 "c2" { "2" 1/2 "0" 1/2 } 0 + p "" 1 1 "1" { "p" "b" } 0 + p "" 2 2 "2p" { "p" "b" } 0 + t "" 3 "Outcome 12pp" { -1.0 1.0 } + p "" 1 2 "1pb" { "p" "b" } 0 + t "" 4 "Outcome 12pbp" { -1.0 1.0 } + t "" 5 "Outcome 12pbb" { -2.0 2.0 } + p "" 2 1 "2b" { "p" "b" } 0 + t "" 1 "Outcome 12bp" { 1.0 -1.0 } + t "" 2 "Outcome 12bb" { -2.0 2.0 } + p "" 1 1 "1" { "p" "b" } 0 + p "" 2 3 "0p" { "p" "b" } 0 + t "" 8 "Outcome 10pp" { 1.0 -1.0 } + p "" 1 2 "1pb" { "p" "b" } 0 + t "" 6 "Outcome 10pbp" { -1.0 1.0 } + t "" 7 "Outcome 10pbb" { 2.0 -2.0 } + p "" 2 4 "0b" { "p" "b" } 0 + t "" 9 "Outcome 10bp" { 1.0 -1.0 } + t "" 10 "Outcome 10bb" { 2.0 -2.0 } + c "c3" 3 "c3" { "2" 1/2 "1" 1/2 } 0 + p "" 1 3 "0" { "p" "b" } 0 + p "" 2 2 "2p" { "p" "b" } 0 + t "" 13 "Outcome 02pp" { -1.0 1.0 } + p "" 1 4 "0pb" { "p" "b" } 0 + t "" 14 "Outcome 02pbp" { -1.0 1.0 } + t "" 15 "Outcome 02pbb" { -2.0 2.0 } + p "" 2 1 "2b" { "p" "b" } 0 + t "" 11 "Outcome 02bp" { 1.0 -1.0 } + t "" 12 "Outcome 02bb" { -2.0 2.0 } + p "" 1 3 "0" { "p" "b" } 0 + p "" 2 5 "1p" { "p" "b" } 0 + t "" 18 "Outcome 01pp" { -1.0 1.0 } + p "" 1 4 "0pb" { "p" "b" } 0 + t "" 16 "Outcome 01pbp" { -1.0 1.0 } + t "" 17 "Outcome 01pbb" { -2.0 2.0 } + p "" 2 6 "1b" { "p" "b" } 0 + t "" 19 "Outcome 01bp" { 1.0 -1.0 } + t "" 20 "Outcome 01bb" { -2.0 2.0 } + c "c4" 4 "c4" { "0" 1/2 "1" 1/2 } 0 + p "" 1 5 "2" { "p" "b" } 0 + p "" 2 3 "0p" { "p" "b" } 0 + t "" 21 "Outcome 20pp" { 1.0 -1.0 } + p "" 1 6 "2pb" { "p" "b" } 0 + t "" 22 "Outcome 20pbp" { -1.0 1.0 } + t "" 23 "Outcome 20pbb" { 2.0 -2.0 } + p "" 2 4 "0b" { "p" "b" } 0 + t "" 24 "Outcome 20bp" { 1.0 -1.0 } + t "" 25 "Outcome 20bb" { 2.0 -2.0 } + p "" 1 5 "2" { "p" "b" } 0 + p "" 2 5 "1p" { "p" "b" } 0 + t "" 28 "Outcome 21pp" { 1.0 -1.0 } + p "" 1 6 "2pb" { "p" "b" } 0 + t "" 26 "Outcome 21pbp" { -1.0 1.0 } + t "" 27 "Outcome 21pbb" { 2.0 -2.0 } + p "" 2 6 "1b" { "p" "b" } 0 + t "" 29 "Outcome 21bp" { 1.0 -1.0 } + t "" 30 "Outcome 21bb" { 2.0 -2.0 } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/leduc_poker.efg b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/leduc_poker.efg new file mode 100644 index 0000000..fb1c1d6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/leduc_poker.efg @@ -0,0 +1,2043 @@ +EFG 2 R "Leduc Poker" { "Player 1" "Player 2" } "Leduc poker. Original author Jens Weigel. Adapted from file found here: https://sourceforge.net/p/gambit/mailman/gambit-users/?page=1" + +c "" 1 "" { "KKQ" 1/30 "KKJ" 1/30 "KQK" 1/30 "KQQ" 1/30 "KQJ" 2/30 "KJK" 1/30 "KJQ" 2/30 "KJJ" 1/30 "QKK" 1/30 "QKQ" 1/30 "QKJ" 2/30 "QQK" 1/30 "QQJ" 1/30 "QJK" 2/30 "QJQ" 1/30 "QJJ" 1/30 "JKK" 1/30 "JKQ" 2/30 "JKJ" 1/30 "JQK" 2/30 "JQQ" 1/30 "JQJ" 1/30 "JJK" 1/30 "JJQ" 1/30 } 0 +p "" 1 1 "" { "R" "C" } 0 +p "" 2 1 "p2_R_K" { "R" "C" "F" } 0 +p "" 1 2 "p1_K_RR" { "C" "F" } 0 +p "" 1 3 "p1_RRQ_K" { "R" "C" } 0 +p "" 2 2 "p2_RR_Q_R_K" { "R" "C" "F" } 0 +p "" 1 4 "" { "C" "F" } 0 +t "" 130 "" { 0 0 } +t "" 1 "" { -9 9 } +t "" 131 "" { 0 0 } +t "" 2 "" { 5 -5 } +p "" 2 3 "p2_RR_Q_C_K" { "R" "C" } 0 +p "" 1 5 "" { "R" "C" "F" } 0 +p "" 2 4 "" { "C" "F" } 0 +t "" 132 "" { 0 0 } +t "" 3 "" { 9 -9 } +t "" 133 "" { 0 0 } +t "" 4 "" { -5 5 } +t "" 134 "" { 0 0 } +t "" 5 "" { -3 3 } +p "" 1 6 "p1_RCQ_K" { "R" "C" } 0 +p "" 2 5 "p2_R_Q_R_K" { "R" "C" "F" } 0 +p "" 1 7 "" { "C" "F" } 0 +t "" 135 "" { 0 0 } +t "" 6 "" { -7 7 } +t "" 136 "" { 0 0 } +t "" 7 "" { 3 -3 } +p "" 2 6 "p2_CRR_Q_C_K" { "R" "C" } 0 +p "" 1 8 "" { "R" "C" "F" } 0 +p "" 2 7 "" { "C" "F" } 0 +t "" 137 "" { 0 0 } +t "" 8 "" { 7 -7 } +t "" 138 "" { 0 0 } +t "" 9 "" { -3 3 } +t "" 139 "" { 0 0 } +t "" 10 "" { 1 -1 } +p "" 2 8 "p2_C_K" { "R" "C" } 0 +p "" 1 9 "p1_CR_K" { "R" "C" "F" } 0 +p "" 2 9 "" { "C" "F" } 0 +p "" 1 10 "" { "R" "C" } 0 +p "" 2 10 "" { "R" "C" "F" } 0 +p "" 1 11 "" { "C" "F" } 0 +t "" 145 "" { 0 0 } +t "" 16 "" { -9 9 } +t "" 146 "" { 0 0 } +t "" 17 "" { 5 -5 } +p "" 2 11 "" { "R" "C" } 0 +p "" 1 12 "" { "R" "C" "F" } 0 +p "" 2 12 "" { "C" "F" } 0 +t "" 147 "" { 0 0 } +t "" 18 "" { 9 -9 } +t "" 148 "" { 0 0 } +t "" 19 "" { -5 5 } +t "" 149 "" { 0 0 } +t "" 20 "" { 3 -3 } +p "" 1 13 "p1_CRQ_K" { "R" "C" } 0 +p "" 2 13 "p2_CRR_Q_R_K" { "R" "C" "F" } 0 +p "" 1 14 "" { "C" "F" } 0 +t "" 140 "" { 0 0 } +t "" 11 "" { -7 7 } +t "" 141 "" { 0 0 } +t "" 12 "" { 3 -3 } +p "" 2 14 "p2_CRR_Q_C_K" { "R" "C" } 0 +p "" 1 15 "" { "R" "C" "F" } 0 +p "" 2 15 "" { "C" "F" } 0 +t "" 142 "" { 0 0 } +t "" 13 "" { 7 -7 } +t "" 143 "" { 0 0 } +t "" 14 "" { -3 3 } +t "" 144 "" { 0 0 } +t "" 292 "" { -1 1 } +p "" 1 16 "p1_CCQ_K" { "R" "C" } 0 +p "" 2 16 "p2_CC_Q_R_K" { "R" "C" "F" } 0 +p "" 1 17 "" { "C" "F" } 0 +t "" 150 "" { 0 0 } +t "" 21 "" { -5 5 } +t "" 151 "" { 0 0 } +t "" 22 "" { 1 -1 } +p "" 2 17 "p2_CC_Q_C_K" { "R" "C" } 0 +p "" 1 18 "" { "R" "C" "F" } 0 +p "" 2 18 "" { "C" "F" } 0 +t "" 152 "" { 0 0 } +t "" 23 "" { 5 -5 } +t "" 153 "" { 0 0 } +t "" 24 "" { -1 1 } +t "" 159 "" { 0 0 } +p "" 1 1 "" { "R" "C" } 0 +p "" 2 1 "p2_R_K" { "R" "C" "F" } 0 +p "" 1 2 "p1_K_RR" { "C" "F" } 0 +p "" 1 19 "p1_RRJ_K" { "R" "C" } 0 +p "" 2 19 "p2_RR_J_R_K" { "R" "C" "F" } 0 +p "" 1 20 "" { "C" "F" } 0 +t "" 154 "" { 0 0 } +t "" 1 "" { -9 9 } +t "" 155 "" { 0 0 } +t "" 2 "" { 5 -5 } +p "" 2 20 "p2_CRR_Q_C_K" { "R" "C" } 0 +p "" 1 21 "" { "R" "C" "F" } 0 +p "" 2 21 "" { "C" "F" } 0 +t "" 156 "" { 0 0 } +t "" 3 "" { 9 -9 } +t "" 157 "" { 0 0 } +t "" 4 "" { -5 5 } +t "" 158 "" { 0 0 } +t "" 5 "" { -3 3 } +p "" 1 22 "p1_RCJ_K" { "R" "C" } 0 +p "" 2 22 "" { "R" "C" "F" } 0 +p "" 1 23 "" { "C" "F" } 0 +t "" 160 "" { 0 0 } +t "" 6 "" { -7 7 } +t "" 161 "" { 0 0 } +t "" 7 "" { 3 -3 } +p "" 2 23 "" { "R" "C" } 0 +p "" 1 24 "" { "R" "C" "F" } 0 +p "" 2 24 "" { "C" "F" } 0 +t "" 162 "" { 0 0 } +t "" 8 "" { 7 -7 } +t "" 163 "" { 0 0 } +t "" 9 "" { -3 3 } +t "" 164 "" { 0 0 } +t "" 10 "" { 1 -1 } +p "" 2 8 "p2_C_K" { "R" "C" } 0 +p "" 1 9 "p1_CR_K" { "R" "C" "F" } 0 +p "" 2 9 "" { "C" "F" } 0 +p "" 1 25 "" { "R" "C" } 0 +p "" 2 25 "" { "R" "C" "F" } 0 +p "" 1 26 "" { "C" "F" } 0 +t "" 170 "" { 0 0 } +t "" 16 "" { -9 9 } +t "" 171 "" { 0 0 } +t "" 17 "" { 5 -5 } +p "" 2 26 "" { "R" "C" } 0 +p "" 1 27 "" { "R" "C" "F" } 0 +p "" 2 27 "" { "C" "F" } 0 +t "" 172 "" { 0 0 } +t "" 18 "" { 9 -9 } +t "" 173 "" { 0 0 } +t "" 19 "" { -5 5 } +t "" 174 "" { 0 0 } +t "" 20 "" { 3 -3 } +p "" 1 28 "p1_CRJ_K" { "R" "C" } 0 +p "" 2 28 "" { "R" "C" "F" } 0 +p "" 1 29 "" { "C" "F" } 0 +t "" 165 "" { 0 0 } +t "" 11 "" { -7 7 } +t "" 166 "" { 0 0 } +t "" 12 "" { 3 -3 } +p "" 2 29 "" { "R" "C" } 0 +p "" 1 30 "" { "R" "C" "F" } 0 +p "" 2 30 "" { "C" "F" } 0 +t "" 167 "" { 0 0 } +t "" 13 "" { 7 -7 } +t "" 168 "" { 0 0 } +t "" 14 "" { -3 3 } +t "" 169 "" { 0 0 } +t "" 293 "" { -1 1 } +p "" 1 31 "p1_CCJ_K" { "R" "C" } 0 +p "" 2 31 "" { "R" "C" "F" } 0 +p "" 1 32 "" { "C" "F" } 0 +t "" 175 "" { 0 0 } +t "" 21 "" { -5 5 } +t "" 176 "" { 0 0 } +t "" 22 "" { 1 -1 } +p "" 2 32 "" { "R" "C" } 0 +p "" 1 33 "" { "R" "C" "F" } 0 +p "" 2 33 "" { "C" "F" } 0 +t "" 177 "" { 0 0 } +t "" 23 "" { 5 -5 } +t "" 178 "" { 0 0 } +t "" 24 "" { -1 1 } +t "" 179 "" { 0 0 } +p "" 1 1 "" { "R" "C" } 0 +p "" 2 34 "p2_R_Q" { "R" "C" "F" } 0 +p "" 1 2 "p1_K_RR" { "C" "F" } 0 +p "" 1 34 "p1_RRK_K" { "R" "C" } 0 +p "" 2 35 "" { "R" "C" "F" } 0 +p "" 1 35 "" { "C" "F" } 0 +t "" 25 "" { 13 -13 } +t "" 26 "" { -9 9 } +t "" 27 "" { 9 -9 } +t "" 2 "" { 5 -5 } +p "" 2 36 "" { "R" "C" } 0 +p "" 1 36 "" { "R" "C" "F" } 0 +p "" 2 37 "" { "C" "F" } 0 +t "" 28 "" { 13 -13 } +t "" 29 "" { 9 -9 } +t "" 30 "" { 9 -9 } +t "" 31 "" { -5 5 } +t "" 32 "" { 5 -5 } +t "" 5 "" { -3 3 } +p "" 1 37 "p1_RCK_K" { "R" "C" } 0 +p "" 2 38 "" { "R" "C" "F" } 0 +p "" 1 38 "" { "C" "F" } 0 +t "" 33 "" { 11 -11 } +t "" 6 "" { -7 7 } +t "" 34 "" { 7 -7 } +t "" 7 "" { 3 -3 } +p "" 2 39 "" { "R" "C" } 0 +p "" 1 39 "" { "R" "C" "F" } 0 +p "" 2 40 "" { "C" "F" } 0 +t "" 35 "" { 11 -11 } +t "" 36 "" { 7 -7 } +t "" 37 "" { 7 -7 } +t "" 9 "" { -3 3 } +t "" 38 "" { 3 -3 } +t "" 10 "" { 1 -1 } +p "" 2 41 "p2_C_Q" { "R" "C" } 0 +p "" 1 9 "p1_CR_K" { "R" "C" "F" } 0 +p "" 2 42 "" { "C" "F" } 0 +p "" 1 40 "" { "R" "C" } 0 +p "" 2 43 "" { "R" "C" "F" } 0 +p "" 1 41 "" { "C" "F" } 0 +t "" 47 "" { 13 -13 } +t "" 48 "" { -9 9 } +t "" 49 "" { 9 -9 } +t "" 50 "" { 5 -5 } +p "" 2 44 "" { "R" "C" } 0 +p "" 1 42 "" { "R" "C" "F" } 0 +p "" 2 45 "" { "C" "F" } 0 +t "" 51 "" { 13 -13 } +t "" 52 "" { 9 -9 } +t "" 53 "" { 9 -9 } +t "" 54 "" { -5 5 } +t "" 55 "" { 5 -5 } +t "" 20 "" { 3 -3 } +p "" 1 43 "p1_CRK_K" { "R" "C" } 0 +p "" 2 46 "" { "R" "C" "F" } 0 +p "" 1 44 "" { "C" "F" } 0 +t "" 39 "" { 11 -11 } +t "" 40 "" { -7 7 } +t "" 41 "" { 7 -7 } +t "" 12 "" { 3 -3 } +p "" 2 47 "" { "R" "C" } 0 +p "" 1 45 "" { "R" "C" "F" } 0 +p "" 2 48 "" { "C" "F" } 0 +t "" 42 "" { 11 -11 } +t "" 43 "" { 7 -7 } +t "" 44 "" { 7 -7 } +t "" 45 "" { -3 3 } +t "" 46 "" { 3 -3 } +t "" 294 "" { -1 1 } +p "" 1 46 "p1_CCK_K" { "R" "C" } 0 +p "" 2 49 "" { "R" "C" "F" } 0 +p "" 1 47 "" { "C" "F" } 0 +t "" 56 "" { 9 -9 } +t "" 21 "" { -5 5 } +t "" 57 "" { 5 -5 } +t "" 58 "" { 1 -1 } +p "" 2 50 "" { "R" "C" } 0 +p "" 1 48 "" { "R" "C" "F" } 0 +p "" 2 51 "" { "C" "F" } 0 +t "" 59 "" { 9 -9 } +t "" 60 "" { 5 -5 } +t "" 61 "" { 5 -5 } +t "" 24 "" { -1 1 } +t "" 62 "" { 1 -1 } +p "" 1 1 "" { "R" "C" } 0 +p "" 2 34 "p2_R_Q" { "R" "C" "F" } 0 +p "" 1 2 "p1_K_RR" { "C" "F" } 0 +p "" 1 3 "p1_RRQ_K" { "R" "C" } 0 +p "" 2 52 "" { "R" "C" "F" } 0 +p "" 1 4 "" { "C" "F" } 0 +t "" 63 "" { -13 13 } +t "" 64 "" { -9 9 } +t "" 65 "" { -9 9 } +t "" 2 "" { 5 -5 } +p "" 2 53 "" { "R" "C" } 0 +p "" 1 5 "" { "R" "C" "F" } 0 +p "" 2 54 "" { "C" "F" } 0 +t "" 66 "" { -13 13 } +t "" 67 "" { 9 -9 } +t "" 68 "" { -9 9 } +t "" 69 "" { -5 5 } +t "" 70 "" { -5 5 } +t "" 5 "" { -3 3 } +p "" 1 6 "p1_RCQ_K" { "R" "C" } 0 +p "" 2 55 "" { "R" "C" "F" } 0 +p "" 1 7 "" { "C" "F" } 0 +t "" 71 "" { -11 11 } +t "" 6 "" { -7 7 } +t "" 72 "" { -7 7 } +t "" 7 "" { 3 -3 } +p "" 2 56 "" { "R" "C" } 0 +p "" 1 8 "" { "R" "C" "F" } 0 +p "" 2 57 "" { "C" "F" } 0 +t "" 73 "" { -11 11 } +t "" 285 "" { 7 -7 } +t "" 75 "" { -7 7 } +t "" 9 "" { -3 3 } +t "" 76 "" { -3 3 } +t "" 10 "" { 1 -1 } +p "" 2 41 "p2_C_Q" { "R" "C" } 0 +p "" 1 9 "p1_CR_K" { "R" "C" "F" } 0 +p "" 2 42 "" { "C" "F" } 0 +p "" 1 10 "" { "R" "C" } 0 +p "" 2 58 "" { "R" "C" "F" } 0 +p "" 1 11 "" { "C" "F" } 0 +t "" 85 "" { -13 13 } +t "" 86 "" { -9 9 } +t "" 87 "" { -9 9 } +t "" 88 "" { 5 -5 } +p "" 2 59 "" { "R" "C" } 0 +p "" 1 12 "" { "R" "C" "F" } 0 +p "" 2 60 "" { "C" "F" } 0 +t "" 89 "" { -13 13 } +t "" 90 "" { 9 -9 } +t "" 91 "" { -9 9 } +t "" 92 "" { -5 5 } +t "" 93 "" { -5 5 } +t "" 20 "" { 3 -3 } +p "" 1 13 "p1_CRQ_K" { "R" "C" } 0 +p "" 2 61 "" { "R" "C" "F" } 0 +p "" 1 14 "" { "C" "F" } 0 +t "" 77 "" { -11 11 } +t "" 78 "" { -7 7 } +t "" 79 "" { -7 7 } +t "" 12 "" { 3 -3 } +p "" 2 62 "" { "R" "C" } 0 +p "" 1 15 "" { "R" "C" "F" } 0 +p "" 2 63 "" { "C" "F" } 0 +t "" 80 "" { -11 11 } +t "" 81 "" { 7 -7 } +t "" 82 "" { -7 7 } +t "" 83 "" { -3 3 } +t "" 84 "" { -3 3 } +t "" 295 "" { -1 1 } +p "" 1 16 "p1_CCQ_K" { "R" "C" } 0 +p "" 2 64 "" { "R" "C" "F" } 0 +p "" 1 17 "" { "C" "F" } 0 +t "" 94 "" { -9 9 } +t "" 21 "" { -5 5 } +t "" 95 "" { -5 5 } +t "" 96 "" { 1 -1 } +p "" 2 65 "" { "R" "C" } 0 +p "" 1 18 "" { "R" "C" "F" } 0 +p "" 2 66 "" { "C" "F" } 0 +t "" 97 "" { -9 9 } +t "" 98 "" { 5 -5 } +t "" 99 "" { -5 5 } +t "" 24 "" { -1 1 } +t "" 100 "" { -1 1 } +p "" 1 1 "" { "R" "C" } 0 +p "" 2 34 "p2_R_Q" { "R" "C" "F" } 0 +p "" 1 2 "p1_K_RR" { "C" "F" } 0 +p "" 1 19 "p1_RRJ_K" { "R" "C" } 0 +p "" 2 67 "" { "R" "C" "F" } 0 +p "" 1 20 "" { "C" "F" } 0 +t "" 25 "" { 13 -13 } +t "" 26 "" { -9 9 } +t "" 27 "" { 9 -9 } +t "" 2 "" { 5 -5 } +p "" 2 68 "" { "R" "C" } 0 +p "" 1 21 "" { "R" "C" "F" } 0 +p "" 2 69 "" { "C" "F" } 0 +t "" 28 "" { 13 -13 } +t "" 29 "" { 9 -9 } +t "" 30 "" { 9 -9 } +t "" 31 "" { -5 5 } +t "" 32 "" { 5 -5 } +t "" 5 "" { -3 3 } +p "" 1 22 "p1_RCJ_K" { "R" "C" } 0 +p "" 2 70 "" { "R" "C" "F" } 0 +p "" 1 23 "" { "C" "F" } 0 +t "" 33 "" { 11 -11 } +t "" 6 "" { -7 7 } +t "" 101 "" { 7 -7 } +t "" 7 "" { 3 -3 } +p "" 2 71 "" { "R" "C" } 0 +p "" 1 24 "" { "R" "C" "F" } 0 +p "" 2 72 "" { "C" "F" } 0 +t "" 35 "" { 11 -11 } +t "" 36 "" { 7 -7 } +t "" 37 "" { 7 -7 } +t "" 9 "" { -3 3 } +t "" 38 "" { 3 -3 } +t "" 10 "" { 1 -1 } +p "" 2 41 "p2_C_Q" { "R" "C" } 0 +p "" 1 9 "p1_CR_K" { "R" "C" "F" } 0 +p "" 2 42 "" { "C" "F" } 0 +p "" 1 25 "" { "R" "C" } 0 +p "" 2 73 "" { "R" "C" "F" } 0 +p "" 1 26 "" { "C" "F" } 0 +t "" 47 "" { 13 -13 } +t "" 48 "" { -9 9 } +t "" 49 "" { 9 -9 } +t "" 50 "" { 5 -5 } +p "" 2 74 "" { "R" "C" } 0 +p "" 1 27 "" { "R" "C" "F" } 0 +p "" 2 75 "" { "C" "F" } 0 +t "" 51 "" { 13 -13 } +t "" 52 "" { 9 -9 } +t "" 53 "" { 9 -9 } +t "" 54 "" { -5 5 } +t "" 55 "" { 5 -5 } +t "" 20 "" { 3 -3 } +p "" 1 28 "p1_CRJ_K" { "R" "C" } 0 +p "" 2 76 "" { "R" "C" "F" } 0 +p "" 1 29 "" { "C" "F" } 0 +t "" 39 "" { 11 -11 } +t "" 40 "" { -7 7 } +t "" 41 "" { 7 -7 } +t "" 12 "" { 3 -3 } +p "" 2 77 "" { "R" "C" } 0 +p "" 1 30 "" { "R" "C" "F" } 0 +p "" 2 78 "" { "C" "F" } 0 +t "" 42 "" { 11 -11 } +t "" 43 "" { 7 -7 } +t "" 44 "" { 7 -7 } +t "" 45 "" { -3 3 } +t "" 46 "" { 3 -3 } +t "" 296 "" { -1 1 } +p "" 1 31 "p1_CCJ_K" { "R" "C" } 0 +p "" 2 79 "" { "R" "C" "F" } 0 +p "" 1 32 "" { "C" "F" } 0 +t "" 56 "" { 9 -9 } +t "" 21 "" { -5 5 } +t "" 57 "" { 5 -5 } +t "" 22 "" { 1 -1 } +p "" 2 80 "" { "R" "C" } 0 +p "" 1 33 "" { "R" "C" "F" } 0 +p "" 2 81 "" { "C" "F" } 0 +t "" 59 "" { 9 -9 } +t "" 60 "" { 5 -5 } +t "" 57 "" { 5 -5 } +t "" 24 "" { -1 1 } +t "" 62 "" { 1 -1 } +p "" 1 1 "" { "R" "C" } 0 +p "" 2 82 "p2_R_J" { "R" "C" "F" } 0 +p "" 1 2 "p1_K_RR" { "C" "F" } 0 +p "" 1 34 "p1_RRK_K" { "R" "C" } 0 +p "" 2 83 "" { "R" "C" "F" } 0 +p "" 1 35 "" { "C" "F" } 0 +t "" 25 "" { 13 -13 } +t "" 26 "" { -9 9 } +t "" 27 "" { 9 -9 } +t "" 2 "" { 5 -5 } +p "" 2 84 "" { "R" "C" } 0 +p "" 1 36 "" { "R" "C" "F" } 0 +p "" 2 85 "" { "C" "F" } 0 +t "" 28 "" { 13 -13 } +t "" 29 "" { 9 -9 } +t "" 30 "" { 9 -9 } +t "" 31 "" { -5 5 } +t "" 32 "" { 5 -5 } +t "" 5 "" { -3 3 } +p "" 1 37 "p1_RCK_K" { "R" "C" } 0 +p "" 2 86 "" { "R" "C" "F" } 0 +p "" 1 38 "" { "C" "F" } 0 +t "" 33 "" { 11 -11 } +t "" 6 "" { -7 7 } +t "" 101 "" { 7 -7 } +t "" 7 "" { 3 -3 } +p "" 2 87 "" { "R" "C" } 0 +p "" 1 39 "" { "R" "C" "F" } 0 +p "" 2 88 "" { "C" "F" } 0 +t "" 35 "" { 11 -11 } +t "" 36 "" { 7 -7 } +t "" 37 "" { 7 -7 } +t "" 9 "" { -3 3 } +t "" 38 "" { 3 -3 } +t "" 10 "" { 1 -1 } +p "" 2 89 "p2_C_J" { "R" "C" } 0 +p "" 1 9 "p1_CR_K" { "R" "C" "F" } 0 +p "" 2 90 "" { "C" "F" } 0 +p "" 1 40 "" { "R" "C" } 0 +p "" 2 91 "" { "R" "C" "F" } 0 +p "" 1 41 "" { "C" "F" } 0 +t "" 47 "" { 13 -13 } +t "" 48 "" { -9 9 } +t "" 49 "" { 9 -9 } +t "" 50 "" { 5 -5 } +p "" 2 92 "" { "R" "C" } 0 +p "" 1 42 "" { "R" "C" "F" } 0 +p "" 2 93 "" { "C" "F" } 0 +t "" 51 "" { 13 -13 } +t "" 52 "" { 9 -9 } +t "" 53 "" { 9 -9 } +t "" 54 "" { -5 5 } +t "" 55 "" { 5 -5 } +t "" 20 "" { 3 -3 } +p "" 1 43 "p1_CRK_K" { "R" "C" } 0 +p "" 2 94 "" { "R" "C" "F" } 0 +p "" 1 44 "" { "C" "F" } 0 +t "" 39 "" { 11 -11 } +t "" 40 "" { -7 7 } +t "" 41 "" { 7 -7 } +t "" 12 "" { 3 -3 } +p "" 2 95 "" { "R" "C" } 0 +p "" 1 45 "" { "R" "C" "F" } 0 +p "" 2 96 "" { "C" "F" } 0 +t "" 42 "" { 11 -11 } +t "" 43 "" { 7 -7 } +t "" 44 "" { 7 -7 } +t "" 45 "" { -3 3 } +t "" 46 "" { 3 -3 } +t "" 297 "" { -1 1 } +p "" 1 46 "p1_CCK_K" { "R" "C" } 0 +p "" 2 97 "" { "R" "C" "F" } 0 +p "" 1 47 "" { "C" "F" } 0 +t "" 56 "" { 9 -9 } +t "" 21 "" { -5 5 } +t "" 57 "" { 5 -5 } +t "" 22 "" { 1 -1 } +p "" 2 98 "" { "R" "C" } 0 +p "" 1 48 "" { "R" "C" "F" } 0 +p "" 2 99 "" { "C" "F" } 0 +t "" 59 "" { 9 -9 } +t "" 60 "" { 5 -5 } +t "" 57 "" { 5 -5 } +t "" 24 "" { -1 1 } +t "" 62 "" { 1 -1 } +p "" 1 1 "" { "R" "C" } 0 +p "" 2 82 "p2_R_J" { "R" "C" "F" } 0 +p "" 1 2 "p1_K_RR" { "C" "F" } 0 +p "" 1 3 "p1_RRQ_K" { "R" "C" } 0 +p "" 2 100 "" { "R" "C" "F" } 0 +p "" 1 4 "" { "C" "F" } 0 +t "" 25 "" { 13 -13 } +t "" 26 "" { -9 9 } +t "" 27 "" { 9 -9 } +t "" 2 "" { 5 -5 } +p "" 2 101 "" { "R" "C" } 0 +p "" 1 5 "" { "R" "C" "F" } 0 +p "" 2 102 "" { "C" "F" } 0 +t "" 28 "" { 13 -13 } +t "" 29 "" { 9 -9 } +t "" 30 "" { 9 -9 } +t "" 31 "" { -5 5 } +t "" 32 "" { 5 -5 } +t "" 5 "" { -3 3 } +p "" 1 6 "p1_RCQ_K" { "R" "C" } 0 +p "" 2 103 "" { "R" "C" "F" } 0 +p "" 1 7 "" { "C" "F" } 0 +t "" 33 "" { 11 -11 } +t "" 6 "" { -7 7 } +t "" 101 "" { 7 -7 } +t "" 7 "" { 3 -3 } +p "" 2 104 "" { "R" "C" } 0 +p "" 1 8 "" { "R" "C" "F" } 0 +p "" 2 105 "" { "C" "F" } 0 +t "" 35 "" { 11 -11 } +t "" 36 "" { 7 -7 } +t "" 37 "" { 7 -7 } +t "" 9 "" { -3 3 } +t "" 38 "" { 3 -3 } +t "" 10 "" { 1 -1 } +p "" 2 89 "p2_C_J" { "R" "C" } 0 +p "" 1 9 "p1_CR_K" { "R" "C" "F" } 0 +p "" 2 90 "" { "C" "F" } 0 +p "" 1 10 "" { "R" "C" } 0 +p "" 2 106 "" { "R" "C" "F" } 0 +p "" 1 11 "" { "C" "F" } 0 +t "" 47 "" { 13 -13 } +t "" 48 "" { -9 9 } +t "" 49 "" { 9 -9 } +t "" 50 "" { 5 -5 } +p "" 2 107 "" { "R" "C" } 0 +p "" 1 12 "" { "R" "C" "F" } 0 +p "" 2 108 "" { "C" "F" } 0 +t "" 51 "" { 13 -13 } +t "" 52 "" { 9 -9 } +t "" 53 "" { 9 -9 } +t "" 54 "" { -5 5 } +t "" 55 "" { 5 -5 } +t "" 20 "" { 3 -3 } +p "" 1 13 "p1_CRQ_K" { "R" "C" } 0 +p "" 2 109 "" { "R" "C" "F" } 0 +p "" 1 14 "" { "C" "F" } 0 +t "" 39 "" { 11 -11 } +t "" 74 "" { -7 7 } +t "" 41 "" { 7 -7 } +t "" 12 "" { 3 -3 } +p "" 2 110 "" { "R" "C" } 0 +p "" 1 15 "" { "R" "C" "F" } 0 +p "" 2 111 "" { "C" "F" } 0 +t "" 42 "" { 11 -11 } +t "" 43 "" { 7 -7 } +t "" 44 "" { 7 -7 } +t "" 45 "" { -3 3 } +t "" 46 "" { 3 -3 } +t "" 298 "" { -1 1 } +p "" 1 16 "p1_CCQ_K" { "R" "C" } 0 +p "" 2 112 "" { "R" "C" "F" } 0 +p "" 1 17 "" { "C" "F" } 0 +t "" 56 "" { 9 -9 } +t "" 21 "" { -5 5 } +t "" 57 "" { 5 -5 } +t "" 22 "" { 1 -1 } +p "" 2 113 "" { "R" "C" } 0 +p "" 1 18 "" { "R" "C" "F" } 0 +p "" 2 114 "" { "C" "F" } 0 +t "" 59 "" { 9 -9 } +t "" 60 "" { 5 -5 } +t "" 57 "" { 5 -5 } +t "" 24 "" { -1 1 } +t "" 62 "" { 1 -1 } +p "" 1 1 "" { "R" "C" } 0 +p "" 2 82 "p2_R_J" { "R" "C" "F" } 0 +p "" 1 2 "p1_K_RR" { "C" "F" } 0 +p "" 1 19 "p1_RRJ_K" { "R" "C" } 0 +p "" 2 115 "" { "R" "C" "F" } 0 +p "" 1 20 "" { "C" "F" } 0 +t "" 63 "" { -13 13 } +t "" 64 "" { -9 9 } +t "" 65 "" { -9 9 } +t "" 2 "" { 5 -5 } +p "" 2 116 "" { "R" "C" } 0 +p "" 1 21 "" { "R" "C" "F" } 0 +p "" 2 117 "" { "C" "F" } 0 +t "" 66 "" { -13 13 } +t "" 67 "" { 9 -9 } +t "" 68 "" { -9 9 } +t "" 69 "" { -5 5 } +t "" 70 "" { -5 5 } +t "" 5 "" { -3 3 } +p "" 1 22 "p1_RCJ_K" { "R" "C" } 0 +p "" 2 118 "" { "R" "C" "F" } 0 +p "" 1 23 "" { "C" "F" } 0 +t "" 71 "" { -11 11 } +t "" 6 "" { -7 7 } +t "" 72 "" { -7 7 } +t "" 7 "" { 3 -3 } +p "" 2 119 "" { "R" "C" } 0 +p "" 1 24 "" { "R" "C" "F" } 0 +p "" 2 120 "" { "C" "F" } 0 +t "" 73 "" { -11 11 } +t "" 286 "" { 7 -7 } +t "" 75 "" { -7 7 } +t "" 9 "" { -3 3 } +t "" 76 "" { -3 3 } +t "" 10 "" { 1 -1 } +p "" 2 89 "p2_C_J" { "R" "C" } 0 +p "" 1 9 "p1_CR_K" { "R" "C" "F" } 0 +p "" 2 90 "" { "C" "F" } 0 +p "" 1 25 "" { "R" "C" } 0 +p "" 2 121 "" { "R" "C" "F" } 0 +p "" 1 26 "" { "C" "F" } 0 +t "" 85 "" { -13 13 } +t "" 86 "" { -9 9 } +t "" 87 "" { -9 9 } +t "" 88 "" { 5 -5 } +p "" 2 122 "" { "R" "C" } 0 +p "" 1 27 "" { "R" "C" "F" } 0 +p "" 2 123 "" { "C" "F" } 0 +t "" 89 "" { -13 13 } +t "" 90 "" { 9 -9 } +t "" 91 "" { -9 9 } +t "" 92 "" { -5 5 } +t "" 93 "" { -5 5 } +t "" 20 "" { 3 -3 } +p "" 1 28 "p1_CRJ_K" { "R" "C" } 0 +p "" 2 124 "" { "R" "C" "F" } 0 +p "" 1 29 "" { "C" "F" } 0 +t "" 77 "" { -11 11 } +t "" 78 "" { -7 7 } +t "" 79 "" { -7 7 } +t "" 12 "" { 3 -3 } +p "" 2 125 "" { "R" "C" } 0 +p "" 1 30 "" { "R" "C" "F" } 0 +p "" 2 126 "" { "C" "F" } 0 +t "" 80 "" { -11 11 } +t "" 81 "" { 7 -7 } +t "" 82 "" { -7 7 } +t "" 83 "" { -3 3 } +t "" 84 "" { -3 3 } +t "" 290 "" { -1 1 } +p "" 1 31 "p1_CCJ_K" { "R" "C" } 0 +p "" 2 127 "" { "R" "C" "F" } 0 +p "" 1 32 "" { "C" "F" } 0 +t "" 94 "" { -9 9 } +t "" 21 "" { -5 5 } +t "" 95 "" { -5 5 } +t "" 22 "" { 1 -1 } +p "" 2 128 "" { "R" "C" } 0 +p "" 1 33 "" { "R" "C" "F" } 0 +p "" 2 129 "" { "C" "F" } 0 +t "" 97 "" { -9 9 } +t "" 98 "" { 5 -5 } +t "" 99 "" { -5 5 } +t "" 24 "" { -1 1 } +t "" 100 "" { -1 1 } +p "" 1 49 "" { "R" "C" } 0 +p "" 2 1 "p2_R_K" { "R" "C" "F" } 0 +p "" 1 50 "p1_RR_Q" { "C" "F" } 0 +p "" 1 51 "p1_RRK_Q" { "R" "C" } 0 +p "" 2 130 "" { "R" "C" "F" } 0 +p "" 1 52 "" { "C" "F" } 0 +t "" 63 "" { -13 13 } +t "" 64 "" { -9 9 } +t "" 65 "" { -9 9 } +t "" 2 "" { 5 -5 } +p "" 2 131 "" { "R" "C" } 0 +p "" 1 53 "" { "R" "C" "F" } 0 +p "" 2 132 "" { "C" "F" } 0 +t "" 66 "" { -13 13 } +t "" 67 "" { 9 -9 } +t "" 68 "" { -9 9 } +t "" 69 "" { -5 5 } +t "" 70 "" { -5 5 } +t "" 5 "" { -3 3 } +p "" 1 54 "p1_RCK_Q" { "R" "C" } 0 +p "" 2 133 "" { "R" "C" "F" } 0 +p "" 1 55 "" { "C" "F" } 0 +t "" 71 "" { -11 11 } +t "" 6 "" { -7 7 } +t "" 72 "" { -7 7 } +t "" 7 "" { 3 -3 } +p "" 2 134 "" { "R" "C" } 0 +p "" 1 56 "" { "R" "C" "F" } 0 +p "" 2 135 "" { "C" "F" } 0 +t "" 73 "" { -11 11 } +t "" 287 "" { 7 -7 } +t "" 75 "" { -7 7 } +t "" 9 "" { -3 3 } +t "" 76 "" { -3 3 } +t "" 10 "" { 1 -1 } +p "" 2 8 "p2_C_K" { "R" "C" } 0 +p "" 1 57 "p1_CR_Q" { "R" "C" "F" } 0 +p "" 2 9 "" { "C" "F" } 0 +p "" 1 58 "" { "R" "C" } 0 +p "" 2 136 "" { "R" "C" "F" } 0 +p "" 1 59 "" { "C" "F" } 0 +t "" 85 "" { -13 13 } +t "" 86 "" { -9 9 } +t "" 87 "" { -9 9 } +t "" 88 "" { 5 -5 } +p "" 2 137 "" { "R" "C" } 0 +p "" 1 60 "" { "R" "C" "F" } 0 +p "" 2 138 "" { "C" "F" } 0 +t "" 89 "" { -13 13 } +t "" 90 "" { 9 -9 } +t "" 91 "" { -9 9 } +t "" 92 "" { -5 5 } +t "" 93 "" { -5 5 } +t "" 20 "" { 3 -3 } +p "" 1 61 "p1_CRK_Q" { "R" "C" } 0 +p "" 2 139 "" { "R" "C" "F" } 0 +p "" 1 62 "" { "C" "F" } 0 +t "" 77 "" { -11 11 } +t "" 78 "" { -7 7 } +t "" 79 "" { -7 7 } +t "" 12 "" { 3 -3 } +p "" 2 140 "" { "R" "C" } 0 +p "" 1 63 "" { "R" "C" "F" } 0 +p "" 2 141 "" { "C" "F" } 0 +t "" 80 "" { -11 11 } +t "" 81 "" { 7 -7 } +t "" 82 "" { -7 7 } +t "" 83 "" { -3 3 } +t "" 84 "" { -3 3 } +t "" 291 "" { -1 1 } +p "" 1 64 "p1_CCK_Q" { "R" "C" } 0 +p "" 2 142 "" { "R" "C" "F" } 0 +p "" 1 65 "" { "C" "F" } 0 +t "" 94 "" { -9 9 } +t "" 21 "" { -5 5 } +t "" 95 "" { -5 5 } +t "" 22 "" { 1 -1 } +p "" 2 143 "" { "R" "C" } 0 +p "" 1 66 "" { "R" "C" "F" } 0 +p "" 2 144 "" { "C" "F" } 0 +t "" 97 "" { -9 9 } +t "" 98 "" { 5 -5 } +t "" 99 "" { -5 5 } +t "" 24 "" { -1 1 } +t "" 100 "" { -1 1 } +p "" 1 49 "" { "R" "C" } 0 +p "" 2 1 "p2_R_K" { "R" "C" "F" } 0 +p "" 1 50 "p1_RR_Q" { "C" "F" } 0 +p "" 1 67 "p1_RRQ_Q" { "R" "C" } 0 +p "" 2 2 "p2_RR_Q_R_K" { "R" "C" "F" } 0 +p "" 1 68 "" { "C" "F" } 0 +t "" 25 "" { 13 -13 } +t "" 26 "" { -9 9 } +t "" 27 "" { 9 -9 } +t "" 2 "" { 5 -5 } +p "" 2 3 "p2_RR_Q_C_K" { "R" "C" } 0 +p "" 1 69 "" { "R" "C" "F" } 0 +p "" 2 4 "" { "C" "F" } 0 +t "" 28 "" { 13 -13 } +t "" 29 "" { 9 -9 } +t "" 30 "" { 9 -9 } +t "" 31 "" { -5 5 } +t "" 32 "" { 5 -5 } +t "" 5 "" { -3 3 } +p "" 1 70 "p1_RCQ_Q" { "R" "C" } 0 +p "" 2 5 "p2_R_Q_R_K" { "R" "C" "F" } 0 +p "" 1 71 "" { "C" "F" } 0 +t "" 33 "" { 11 -11 } +t "" 6 "" { -7 7 } +t "" 101 "" { 7 -7 } +t "" 7 "" { 3 -3 } +p "" 2 6 "p2_CRR_Q_C_K" { "R" "C" } 0 +p "" 1 72 "" { "R" "C" "F" } 0 +p "" 2 7 "" { "C" "F" } 0 +t "" 35 "" { 11 -11 } +t "" 36 "" { 7 -7 } +t "" 44 "" { 7 -7 } +t "" 9 "" { -3 3 } +t "" 46 "" { 3 -3 } +t "" 10 "" { 1 -1 } +p "" 2 8 "p2_C_K" { "R" "C" } 0 +p "" 1 57 "p1_CR_Q" { "R" "C" "F" } 0 +p "" 2 9 "" { "C" "F" } 0 +p "" 1 73 "" { "R" "C" } 0 +p "" 2 10 "" { "R" "C" "F" } 0 +p "" 1 74 "" { "C" "F" } 0 +t "" 47 "" { 13 -13 } +t "" 48 "" { -9 9 } +t "" 49 "" { 9 -9 } +t "" 50 "" { 5 -5 } +p "" 2 11 "" { "R" "C" } 0 +p "" 1 75 "" { "R" "C" "F" } 0 +p "" 2 12 "" { "C" "F" } 0 +t "" 51 "" { 13 -13 } +t "" 52 "" { 9 -9 } +t "" 53 "" { 9 -9 } +t "" 54 "" { -5 5 } +t "" 55 "" { 5 -5 } +t "" 20 "" { 3 -3 } +p "" 1 76 "p1_CRQ_Q" { "R" "C" } 0 +p "" 2 13 "p2_CRR_Q_R_K" { "R" "C" "F" } 0 +p "" 1 77 "" { "C" "F" } 0 +t "" 39 "" { 11 -11 } +t "" 74 "" { -7 7 } +t "" 41 "" { 7 -7 } +t "" 12 "" { 3 -3 } +p "" 2 14 "p2_CRR_Q_C_K" { "R" "C" } 0 +p "" 1 78 "" { "R" "C" "F" } 0 +p "" 2 15 "" { "C" "F" } 0 +t "" 42 "" { 11 -11 } +t "" 43 "" { 7 -7 } +t "" 44 "" { 7 -7 } +t "" 45 "" { -3 3 } +t "" 46 "" { 3 -3 } +t "" 299 "" { -1 1 } +p "" 1 79 "p1_CCQ_Q" { "R" "C" } 0 +p "" 2 16 "p2_CC_Q_R_K" { "R" "C" "F" } 0 +p "" 1 80 "" { "C" "F" } 0 +t "" 56 "" { 9 -9 } +t "" 21 "" { -5 5 } +t "" 57 "" { 5 -5 } +t "" 22 "" { 1 -1 } +p "" 2 17 "p2_CC_Q_C_K" { "R" "C" } 0 +p "" 1 81 "" { "R" "C" "F" } 0 +p "" 2 18 "" { "C" "F" } 0 +t "" 59 "" { 9 -9 } +t "" 60 "" { 5 -5 } +t "" 57 "" { 5 -5 } +t "" 24 "" { -1 1 } +t "" 62 "" { 1 -1 } +p "" 1 49 "" { "R" "C" } 0 +p "" 2 1 "p2_R_K" { "R" "C" "F" } 0 +p "" 1 50 "p1_RR_Q" { "C" "F" } 0 +p "" 1 82 "p1_RRJ_Q" { "R" "C" } 0 +p "" 2 19 "p2_RR_J_R_K" { "R" "C" "F" } 0 +p "" 1 83 "" { "C" "F" } 0 +t "" 63 "" { -13 13 } +t "" 64 "" { -9 9 } +t "" 65 "" { -9 9 } +t "" 2 "" { 5 -5 } +p "" 2 20 "p2_CRR_Q_C_K" { "R" "C" } 0 +p "" 1 84 "" { "R" "C" "F" } 0 +p "" 2 21 "" { "C" "F" } 0 +t "" 66 "" { -13 13 } +t "" 67 "" { 9 -9 } +t "" 68 "" { -9 9 } +t "" 69 "" { -5 5 } +t "" 70 "" { -5 5 } +t "" 5 "" { -3 3 } +p "" 1 85 "p1_RCJ_Q" { "R" "C" } 0 +p "" 2 22 "" { "R" "C" "F" } 0 +p "" 1 86 "" { "C" "F" } 0 +t "" 71 "" { -11 11 } +t "" 6 "" { -7 7 } +t "" 72 "" { -7 7 } +t "" 7 "" { 3 -3 } +p "" 2 23 "" { "R" "C" } 0 +p "" 1 87 "" { "R" "C" "F" } 0 +p "" 2 24 "" { "C" "F" } 0 +t "" 73 "" { -11 11 } +t "" 288 "" { 7 -7 } +t "" 75 "" { -7 7 } +t "" 9 "" { -3 3 } +t "" 76 "" { -3 3 } +t "" 10 "" { 1 -1 } +p "" 2 8 "p2_C_K" { "R" "C" } 0 +p "" 1 57 "p1_CR_Q" { "R" "C" "F" } 0 +p "" 2 9 "" { "C" "F" } 0 +p "" 1 88 "" { "R" "C" } 0 +p "" 2 25 "" { "R" "C" "F" } 0 +p "" 1 89 "" { "C" "F" } 0 +t "" 85 "" { -13 13 } +t "" 86 "" { -9 9 } +t "" 87 "" { -9 9 } +t "" 88 "" { 5 -5 } +p "" 2 26 "" { "R" "C" } 0 +p "" 1 90 "" { "R" "C" "F" } 0 +p "" 2 27 "" { "C" "F" } 0 +t "" 89 "" { -13 13 } +t "" 90 "" { 9 -9 } +t "" 91 "" { -9 9 } +t "" 92 "" { -5 5 } +t "" 93 "" { -5 5 } +t "" 20 "" { 3 -3 } +p "" 1 91 "p1_CRJ_Q" { "R" "C" } 0 +p "" 2 28 "" { "R" "C" "F" } 0 +p "" 1 92 "" { "C" "F" } 0 +t "" 77 "" { -11 11 } +t "" 78 "" { -7 7 } +t "" 79 "" { -7 7 } +t "" 12 "" { 3 -3 } +p "" 2 29 "" { "R" "C" } 0 +p "" 1 93 "" { "R" "C" "F" } 0 +p "" 2 30 "" { "C" "F" } 0 +t "" 80 "" { -11 11 } +t "" 81 "" { 7 -7 } +t "" 82 "" { -7 7 } +t "" 83 "" { -3 3 } +t "" 84 "" { -3 3 } +t "" 300 "" { -1 1 } +p "" 1 94 "p1_CCJ_Q" { "R" "C" } 0 +p "" 2 31 "" { "R" "C" "F" } 0 +p "" 1 95 "" { "C" "F" } 0 +t "" 94 "" { -9 9 } +t "" 21 "" { -5 5 } +t "" 95 "" { -5 5 } +t "" 22 "" { 1 -1 } +p "" 2 32 "" { "R" "C" } 0 +p "" 1 96 "" { "R" "C" "F" } 0 +p "" 2 33 "" { "C" "F" } 0 +t "" 97 "" { -9 9 } +t "" 98 "" { 5 -5 } +t "" 99 "" { -5 5 } +t "" 24 "" { -1 1 } +t "" 100 "" { -1 1 } +p "" 1 49 "" { "R" "C" } 0 +p "" 2 34 "p2_R_Q" { "R" "C" "F" } 0 +p "" 1 50 "p1_RR_Q" { "C" "F" } 0 +p "" 1 51 "p1_RRK_Q" { "R" "C" } 0 +p "" 2 35 "" { "R" "C" "F" } 0 +p "" 1 52 "" { "C" "F" } 0 +t "" 180 "" { 0 0 } +t "" 102 "" { -9 9 } +t "" 181 "" { 0 0 } +t "" 2 "" { 5 -5 } +p "" 2 36 "" { "R" "C" } 0 +p "" 1 53 "" { "R" "C" "F" } 0 +p "" 2 37 "" { "C" "F" } 0 +t "" 182 "" { 0 0 } +t "" 103 "" { 9 -9 } +t "" 183 "" { 0 0 } +t "" 104 "" { -5 5 } +t "" 184 "" { 0 0 } +t "" 5 "" { -3 3 } +p "" 1 54 "p1_RCK_Q" { "R" "C" } 0 +p "" 2 38 "" { "R" "C" "F" } 0 +p "" 1 55 "" { "C" "F" } 0 +t "" 185 "" { 0 0 } +t "" 6 "" { -7 7 } +t "" 186 "" { 0 0 } +t "" 7 "" { 3 -3 } +p "" 2 39 "" { "R" "C" } 0 +p "" 1 56 "" { "R" "C" "F" } 0 +p "" 2 40 "" { "C" "F" } 0 +t "" 187 "" { 0 0 } +t "" 105 "" { 7 -7 } +t "" 188 "" { 0 0 } +t "" 9 "" { -3 3 } +t "" 189 "" { 0 0 } +t "" 10 "" { 1 -1 } +p "" 2 41 "p2_C_Q" { "R" "C" } 0 +p "" 1 57 "p1_CR_Q" { "R" "C" "F" } 0 +p "" 2 42 "" { "C" "F" } 0 +p "" 1 58 "" { "R" "C" } 0 +p "" 2 43 "" { "R" "C" "F" } 0 +p "" 1 59 "" { "C" "F" } 0 +t "" 195 "" { 0 0 } +t "" 109 "" { -9 9 } +t "" 196 "" { 0 0 } +t "" 110 "" { 5 -5 } +p "" 2 44 "" { "R" "C" } 0 +p "" 1 60 "" { "R" "C" "F" } 0 +p "" 2 45 "" { "C" "F" } 0 +t "" 197 "" { 0 0 } +t "" 111 "" { 9 -9 } +t "" 198 "" { 0 0 } +t "" 112 "" { -5 5 } +t "" 199 "" { 0 0 } +t "" 20 "" { 3 -3 } +p "" 1 61 "p1_CRK_Q" { "R" "C" } 0 +p "" 2 46 "" { "R" "C" "F" } 0 +p "" 1 62 "" { "C" "F" } 0 +t "" 190 "" { 0 0 } +t "" 106 "" { -7 7 } +t "" 191 "" { 0 0 } +t "" 12 "" { 3 -3 } +p "" 2 47 "" { "R" "C" } 0 +p "" 1 63 "" { "R" "C" "F" } 0 +p "" 2 48 "" { "C" "F" } 0 +t "" 192 "" { 0 0 } +t "" 107 "" { 7 -7 } +t "" 193 "" { 0 0 } +t "" 108 "" { -3 3 } +t "" 194 "" { 0 0 } +t "" 301 "" { -1 1 } +p "" 1 64 "p1_CCK_Q" { "R" "C" } 0 +p "" 2 49 "" { "R" "C" "F" } 0 +p "" 1 65 "" { "C" "F" } 0 +t "" 200 "" { 0 0 } +t "" 21 "" { -5 5 } +t "" 201 "" { 0 0 } +t "" 22 "" { 1 -1 } +p "" 2 50 "" { "R" "C" } 0 +p "" 1 66 "" { "R" "C" "F" } 0 +p "" 2 51 "" { "C" "F" } 0 +t "" 202 "" { 0 0 } +t "" 113 "" { 5 -5 } +t "" 203 "" { 0 0 } +t "" 24 "" { -1 1 } +t "" 204 "" { 0 0 } +p "" 1 49 "" { "R" "C" } 0 +p "" 2 34 "p2_R_Q" { "R" "C" "F" } 0 +p "" 1 50 "p1_RR_Q" { "C" "F" } 0 +p "" 1 82 "p1_RRJ_Q" { "R" "C" } 0 +p "" 2 67 "" { "R" "C" "F" } 0 +p "" 1 83 "" { "C" "F" } 0 +t "" 205 "" { 0 0 } +t "" 102 "" { -9 9 } +t "" 206 "" { 0 0 } +t "" 2 "" { 5 -5 } +p "" 2 68 "" { "R" "C" } 0 +p "" 1 84 "" { "R" "C" "F" } 0 +p "" 2 69 "" { "C" "F" } 0 +t "" 207 "" { 0 0 } +t "" 103 "" { 9 -9 } +t "" 208 "" { 0 0 } +t "" 104 "" { -5 5 } +t "" 209 "" { 0 0 } +t "" 5 "" { -3 3 } +p "" 1 85 "p1_RCJ_Q" { "R" "C" } 0 +p "" 2 70 "" { "R" "C" "F" } 0 +p "" 1 86 "" { "C" "F" } 0 +t "" 210 "" { 0 0 } +t "" 6 "" { -7 7 } +t "" 211 "" { 0 0 } +t "" 7 "" { 3 -3 } +p "" 2 71 "" { "R" "C" } 0 +p "" 1 87 "" { "R" "C" "F" } 0 +p "" 2 72 "" { "C" "F" } 0 +t "" 212 "" { 0 0 } +t "" 105 "" { 7 -7 } +t "" 213 "" { 0 0 } +t "" 9 "" { -3 3 } +t "" 214 "" { 0 0 } +t "" 10 "" { 1 -1 } +p "" 2 41 "p2_C_Q" { "R" "C" } 0 +p "" 1 57 "p1_CR_Q" { "R" "C" "F" } 0 +p "" 2 42 "" { "C" "F" } 0 +p "" 1 88 "" { "R" "C" } 0 +p "" 2 73 "" { "R" "C" "F" } 0 +p "" 1 89 "" { "C" "F" } 0 +t "" 220 "" { 0 0 } +t "" 109 "" { -9 9 } +t "" 221 "" { 0 0 } +t "" 110 "" { 5 -5 } +p "" 2 74 "" { "R" "C" } 0 +p "" 1 90 "" { "R" "C" "F" } 0 +p "" 2 75 "" { "C" "F" } 0 +t "" 222 "" { 0 0 } +t "" 111 "" { 9 -9 } +t "" 223 "" { 0 0 } +t "" 112 "" { -5 5 } +t "" 224 "" { 0 0 } +t "" 20 "" { 3 -3 } +p "" 1 91 "p1_CRJ_Q" { "R" "C" } 0 +p "" 2 76 "" { "R" "C" "F" } 0 +p "" 1 92 "" { "C" "F" } 0 +t "" 215 "" { 0 0 } +t "" 106 "" { -7 7 } +t "" 216 "" { 0 0 } +t "" 12 "" { 3 -3 } +p "" 2 77 "" { "R" "C" } 0 +p "" 1 93 "" { "R" "C" "F" } 0 +p "" 2 78 "" { "C" "F" } 0 +t "" 217 "" { 0 0 } +t "" 107 "" { 7 -7 } +t "" 218 "" { 0 0 } +t "" 108 "" { -3 3 } +t "" 219 "" { 0 0 } +t "" 302 "" { -1 1 } +p "" 1 94 "p1_CCJ_Q" { "R" "C" } 0 +p "" 2 79 "" { "R" "C" "F" } 0 +p "" 1 95 "" { "C" "F" } 0 +t "" 225 "" { 0 0 } +t "" 21 "" { -5 5 } +t "" 226 "" { 0 0 } +t "" 22 "" { 1 -1 } +p "" 2 80 "" { "R" "C" } 0 +p "" 1 96 "" { "R" "C" "F" } 0 +p "" 2 81 "" { "C" "F" } 0 +t "" 227 "" { 0 0 } +t "" 113 "" { 5 -5 } +t "" 228 "" { 0 0 } +t "" 24 "" { -1 1 } +t "" 289 "" { 0 0 } +p "" 1 49 "" { "R" "C" } 0 +p "" 2 82 "p2_R_J" { "R" "C" "F" } 0 +p "" 1 50 "p1_RR_Q" { "C" "F" } 0 +p "" 1 51 "p1_RRK_Q" { "R" "C" } 0 +p "" 2 83 "" { "R" "C" "F" } 0 +p "" 1 52 "" { "C" "F" } 0 +t "" 25 "" { 13 -13 } +t "" 26 "" { -9 9 } +t "" 27 "" { 9 -9 } +t "" 2 "" { 5 -5 } +p "" 2 84 "" { "R" "C" } 0 +p "" 1 53 "" { "R" "C" "F" } 0 +p "" 2 85 "" { "C" "F" } 0 +t "" 28 "" { 13 -13 } +t "" 29 "" { 9 -9 } +t "" 30 "" { 9 -9 } +t "" 31 "" { -5 5 } +t "" 32 "" { 5 -5 } +t "" 5 "" { -3 3 } +p "" 1 54 "p1_RCK_Q" { "R" "C" } 0 +p "" 2 86 "" { "R" "C" "F" } 0 +p "" 1 55 "" { "C" "F" } 0 +t "" 33 "" { 11 -11 } +t "" 6 "" { -7 7 } +t "" 101 "" { 7 -7 } +t "" 7 "" { 3 -3 } +p "" 2 87 "" { "R" "C" } 0 +p "" 1 56 "" { "R" "C" "F" } 0 +p "" 2 88 "" { "C" "F" } 0 +t "" 35 "" { 11 -11 } +t "" 36 "" { 7 -7 } +t "" 44 "" { 7 -7 } +t "" 9 "" { -3 3 } +t "" 46 "" { 3 -3 } +t "" 10 "" { 1 -1 } +p "" 2 89 "p2_C_J" { "R" "C" } 0 +p "" 1 57 "p1_CR_Q" { "R" "C" "F" } 0 +p "" 2 90 "" { "C" "F" } 0 +p "" 1 58 "" { "R" "C" } 0 +p "" 2 91 "" { "R" "C" "F" } 0 +p "" 1 59 "" { "C" "F" } 0 +t "" 47 "" { 13 -13 } +t "" 48 "" { -9 9 } +t "" 49 "" { 9 -9 } +t "" 50 "" { 5 -5 } +p "" 2 92 "" { "R" "C" } 0 +p "" 1 60 "" { "R" "C" "F" } 0 +p "" 2 93 "" { "C" "F" } 0 +t "" 51 "" { 13 -13 } +t "" 52 "" { 9 -9 } +t "" 53 "" { 9 -9 } +t "" 54 "" { -5 5 } +t "" 55 "" { 5 -5 } +t "" 20 "" { 3 -3 } +p "" 1 61 "p1_CRK_Q" { "R" "C" } 0 +p "" 2 94 "" { "R" "C" "F" } 0 +p "" 1 62 "" { "C" "F" } 0 +t "" 39 "" { 11 -11 } +t "" 74 "" { -7 7 } +t "" 41 "" { 7 -7 } +t "" 12 "" { 3 -3 } +p "" 2 95 "" { "R" "C" } 0 +p "" 1 63 "" { "R" "C" "F" } 0 +p "" 2 96 "" { "C" "F" } 0 +t "" 42 "" { 11 -11 } +t "" 43 "" { 7 -7 } +t "" 44 "" { 7 -7 } +t "" 45 "" { -3 3 } +t "" 46 "" { 3 -3 } +t "" 303 "" { -1 1 } +p "" 1 64 "p1_CCK_Q" { "R" "C" } 0 +p "" 2 97 "" { "R" "C" "F" } 0 +p "" 1 65 "" { "C" "F" } 0 +t "" 56 "" { 9 -9 } +t "" 21 "" { -5 5 } +t "" 57 "" { 5 -5 } +t "" 22 "" { 1 -1 } +p "" 2 98 "" { "R" "C" } 0 +p "" 1 66 "" { "R" "C" "F" } 0 +p "" 2 99 "" { "C" "F" } 0 +t "" 59 "" { 9 -9 } +t "" 60 "" { 5 -5 } +t "" 57 "" { 5 -5 } +t "" 24 "" { -1 1 } +t "" 62 "" { 1 -1 } +p "" 1 49 "" { "R" "C" } 0 +p "" 2 82 "p2_R_J" { "R" "C" "F" } 0 +p "" 1 50 "p1_RR_Q" { "C" "F" } 0 +p "" 1 67 "p1_RRQ_Q" { "R" "C" } 0 +p "" 2 100 "" { "R" "C" "F" } 0 +p "" 1 68 "" { "C" "F" } 0 +t "" 25 "" { 13 -13 } +t "" 26 "" { -9 9 } +t "" 27 "" { 9 -9 } +t "" 2 "" { 5 -5 } +p "" 2 101 "" { "R" "C" } 0 +p "" 1 69 "" { "R" "C" "F" } 0 +p "" 2 102 "" { "C" "F" } 0 +t "" 28 "" { 13 -13 } +t "" 29 "" { 9 -9 } +t "" 30 "" { 9 -9 } +t "" 31 "" { -5 5 } +t "" 32 "" { 5 -5 } +t "" 5 "" { -3 3 } +p "" 1 70 "p1_RCQ_Q" { "R" "C" } 0 +p "" 2 103 "" { "R" "C" "F" } 0 +p "" 1 71 "" { "C" "F" } 0 +t "" 33 "" { 11 -11 } +t "" 6 "" { -7 7 } +t "" 101 "" { 7 -7 } +t "" 7 "" { 3 -3 } +p "" 2 104 "" { "R" "C" } 0 +p "" 1 72 "" { "R" "C" "F" } 0 +p "" 2 105 "" { "C" "F" } 0 +t "" 35 "" { 11 -11 } +t "" 36 "" { 7 -7 } +t "" 44 "" { 7 -7 } +t "" 9 "" { -3 3 } +t "" 46 "" { 3 -3 } +t "" 10 "" { 1 -1 } +p "" 2 89 "p2_C_J" { "R" "C" } 0 +p "" 1 57 "p1_CR_Q" { "R" "C" "F" } 0 +p "" 2 90 "" { "C" "F" } 0 +p "" 1 73 "" { "R" "C" } 0 +p "" 2 106 "" { "R" "C" "F" } 0 +p "" 1 74 "" { "C" "F" } 0 +t "" 47 "" { 13 -13 } +t "" 48 "" { -9 9 } +t "" 49 "" { 9 -9 } +t "" 50 "" { 5 -5 } +p "" 2 107 "" { "R" "C" } 0 +p "" 1 75 "" { "R" "C" "F" } 0 +p "" 2 108 "" { "C" "F" } 0 +t "" 51 "" { 13 -13 } +t "" 52 "" { 9 -9 } +t "" 53 "" { 9 -9 } +t "" 54 "" { -5 5 } +t "" 229 "" { 5 -5 } +t "" 20 "" { 3 -3 } +p "" 1 76 "p1_CRQ_Q" { "R" "C" } 0 +p "" 2 109 "" { "R" "C" "F" } 0 +p "" 1 77 "" { "C" "F" } 0 +t "" 39 "" { 11 -11 } +t "" 74 "" { -7 7 } +t "" 41 "" { 7 -7 } +t "" 12 "" { 3 -3 } +p "" 2 110 "" { "R" "C" } 0 +p "" 1 78 "" { "R" "C" "F" } 0 +p "" 2 111 "" { "C" "F" } 0 +t "" 42 "" { 11 -11 } +t "" 43 "" { 7 -7 } +t "" 44 "" { 7 -7 } +t "" 45 "" { -3 3 } +t "" 46 "" { 3 -3 } +t "" 304 "" { -1 1 } +p "" 1 79 "p1_CCQ_Q" { "R" "C" } 0 +p "" 2 112 "" { "R" "C" "F" } 0 +p "" 1 80 "" { "C" "F" } 0 +t "" 56 "" { 9 -9 } +t "" 21 "" { -5 5 } +t "" 57 "" { 5 -5 } +t "" 22 "" { 1 -1 } +p "" 2 113 "" { "R" "C" } 0 +p "" 1 81 "" { "R" "C" "F" } 0 +p "" 2 114 "" { "C" "F" } 0 +t "" 59 "" { 9 -9 } +t "" 60 "" { 5 -5 } +t "" 57 "" { 5 -5 } +t "" 24 "" { -1 1 } +t "" 62 "" { 1 -1 } +p "" 1 49 "" { "R" "C" } 0 +p "" 2 82 "p2_R_J" { "R" "C" "F" } 0 +p "" 1 50 "p1_RR_Q" { "C" "F" } 0 +p "" 1 82 "p1_RRJ_Q" { "R" "C" } 0 +p "" 2 115 "" { "R" "C" "F" } 0 +p "" 1 83 "" { "C" "F" } 0 +t "" 63 "" { -13 13 } +t "" 64 "" { -9 9 } +t "" 65 "" { -9 9 } +t "" 2 "" { 5 -5 } +p "" 2 116 "" { "R" "C" } 0 +p "" 1 84 "" { "R" "C" "F" } 0 +p "" 2 117 "" { "C" "F" } 0 +t "" 66 "" { -13 13 } +t "" 67 "" { 9 -9 } +t "" 68 "" { -9 9 } +t "" 69 "" { -5 5 } +t "" 70 "" { -5 5 } +t "" 5 "" { -3 3 } +p "" 1 85 "p1_RCJ_Q" { "R" "C" } 0 +p "" 2 118 "" { "R" "C" "F" } 0 +p "" 1 86 "" { "C" "F" } 0 +t "" 71 "" { -11 11 } +t "" 6 "" { -7 7 } +t "" 72 "" { -7 7 } +t "" 7 "" { 3 -3 } +p "" 2 119 "" { "R" "C" } 0 +p "" 1 87 "" { "R" "C" "F" } 0 +p "" 2 120 "" { "C" "F" } 0 +t "" 73 "" { -11 11 } +t "" 230 "" { 7 -7 } +t "" 75 "" { -7 7 } +t "" 9 "" { -3 3 } +t "" 76 "" { -3 3 } +t "" 10 "" { 1 -1 } +p "" 2 89 "p2_C_J" { "R" "C" } 0 +p "" 1 57 "p1_CR_Q" { "R" "C" "F" } 0 +p "" 2 90 "" { "C" "F" } 0 +p "" 1 88 "" { "R" "C" } 0 +p "" 2 121 "" { "R" "C" "F" } 0 +p "" 1 89 "" { "C" "F" } 0 +t "" 85 "" { -13 13 } +t "" 86 "" { -9 9 } +t "" 87 "" { -9 9 } +t "" 88 "" { 5 -5 } +p "" 2 122 "" { "R" "C" } 0 +p "" 1 90 "" { "R" "C" "F" } 0 +p "" 2 123 "" { "C" "F" } 0 +t "" 89 "" { -13 13 } +t "" 90 "" { 9 -9 } +t "" 91 "" { -9 9 } +t "" 92 "" { -5 5 } +t "" 93 "" { -5 5 } +t "" 20 "" { 3 -3 } +p "" 1 91 "p1_CRJ_Q" { "R" "C" } 0 +p "" 2 124 "" { "R" "C" "F" } 0 +p "" 1 92 "" { "C" "F" } 0 +t "" 77 "" { -11 11 } +t "" 78 "" { -7 7 } +t "" 79 "" { -7 7 } +t "" 12 "" { 3 -3 } +p "" 2 125 "" { "R" "C" } 0 +p "" 1 93 "" { "R" "C" "F" } 0 +p "" 2 126 "" { "C" "F" } 0 +t "" 80 "" { -11 11 } +t "" 81 "" { 7 -7 } +t "" 82 "" { -7 7 } +t "" 83 "" { -3 3 } +t "" 84 "" { -3 3 } +t "" 305 "" { -1 1 } +p "" 1 94 "p1_CCJ_Q" { "R" "C" } 0 +p "" 2 127 "" { "R" "C" "F" } 0 +p "" 1 95 "" { "C" "F" } 0 +t "" 94 "" { -9 9 } +t "" 21 "" { -5 5 } +t "" 95 "" { -5 5 } +t "" 22 "" { 1 -1 } +p "" 2 128 "" { "R" "C" } 0 +p "" 1 96 "" { "R" "C" "F" } 0 +p "" 2 129 "" { "C" "F" } 0 +t "" 97 "" { -9 9 } +t "" 98 "" { 5 -5 } +t "" 99 "" { -5 5 } +t "" 24 "" { -1 1 } +t "" 100 "" { -1 1 } +p "" 1 97 "" { "R" "C" } 0 +p "" 2 1 "p2_R_K" { "R" "C" "F" } 0 +p "" 1 98 "p1_RR_J" { "C" "F" } 0 +p "" 1 99 "p1_RRK_J" { "R" "C" } 0 +p "" 2 130 "" { "R" "C" "F" } 0 +p "" 1 100 "" { "C" "F" } 0 +t "" 63 "" { -13 13 } +t "" 64 "" { -9 9 } +t "" 65 "" { -9 9 } +t "" 2 "" { 5 -5 } +p "" 2 131 "" { "R" "C" } 0 +p "" 1 101 "" { "R" "C" "F" } 0 +p "" 2 132 "" { "C" "F" } 0 +t "" 66 "" { -13 13 } +t "" 67 "" { 9 -9 } +t "" 68 "" { -9 9 } +t "" 69 "" { -5 5 } +t "" 70 "" { -5 5 } +t "" 5 "" { -3 3 } +p "" 1 102 "p1_RCK_J" { "R" "C" } 0 +p "" 2 133 "" { "R" "C" "F" } 0 +p "" 1 103 "" { "C" "F" } 0 +t "" 71 "" { -11 11 } +t "" 6 "" { -7 7 } +t "" 72 "" { -7 7 } +t "" 7 "" { 3 -3 } +p "" 2 134 "" { "R" "C" } 0 +p "" 1 104 "" { "R" "C" "F" } 0 +p "" 2 135 "" { "C" "F" } 0 +t "" 73 "" { -11 11 } +t "" 231 "" { 7 -7 } +t "" 75 "" { -7 7 } +t "" 9 "" { -3 3 } +t "" 76 "" { -3 3 } +t "" 10 "" { 1 -1 } +p "" 2 8 "p2_C_K" { "R" "C" } 0 +p "" 1 105 "p1_CR_J" { "R" "C" "F" } 0 +p "" 2 9 "" { "C" "F" } 0 +p "" 1 106 "" { "R" "C" } 0 +p "" 2 136 "" { "R" "C" "F" } 0 +p "" 1 107 "" { "C" "F" } 0 +t "" 85 "" { -13 13 } +t "" 86 "" { -9 9 } +t "" 87 "" { -9 9 } +t "" 88 "" { 5 -5 } +p "" 2 137 "" { "R" "C" } 0 +p "" 1 108 "" { "R" "C" "F" } 0 +p "" 2 138 "" { "C" "F" } 0 +t "" 89 "" { -13 13 } +t "" 90 "" { 9 -9 } +t "" 91 "" { -9 9 } +t "" 92 "" { -5 5 } +t "" 93 "" { -5 5 } +t "" 20 "" { 3 -3 } +p "" 1 109 "p1_CRK_J" { "R" "C" } 0 +p "" 2 139 "" { "R" "C" "F" } 0 +p "" 1 110 "" { "C" "F" } 0 +t "" 77 "" { -11 11 } +t "" 78 "" { -7 7 } +t "" 79 "" { -7 7 } +t "" 12 "" { 3 -3 } +p "" 2 140 "" { "R" "C" } 0 +p "" 1 111 "" { "R" "C" "F" } 0 +p "" 2 141 "" { "C" "F" } 0 +t "" 80 "" { -11 11 } +t "" 81 "" { 7 -7 } +t "" 82 "" { -7 7 } +t "" 83 "" { -3 3 } +t "" 84 "" { -3 3 } +t "" 306 "" { -1 1 } +p "" 1 112 "p1_CCK_J" { "R" "C" } 0 +p "" 2 142 "" { "R" "C" "F" } 0 +p "" 1 113 "" { "C" "F" } 0 +t "" 94 "" { -9 9 } +t "" 21 "" { -5 5 } +t "" 95 "" { -5 5 } +t "" 22 "" { 1 -1 } +p "" 2 143 "" { "R" "C" } 0 +p "" 1 114 "" { "R" "C" "F" } 0 +p "" 2 144 "" { "C" "F" } 0 +t "" 97 "" { -9 9 } +t "" 98 "" { 5 -5 } +t "" 99 "" { -5 5 } +t "" 24 "" { -1 1 } +t "" 100 "" { -1 1 } +p "" 1 97 "" { "R" "C" } 0 +p "" 2 1 "p2_R_K" { "R" "C" "F" } 0 +p "" 1 98 "p1_RR_J" { "C" "F" } 0 +p "" 1 115 "p1_RRQ_J" { "R" "C" } 0 +p "" 2 2 "p2_RR_Q_R_K" { "R" "C" "F" } 0 +p "" 1 116 "" { "C" "F" } 0 +t "" 63 "" { -13 13 } +t "" 64 "" { -9 9 } +t "" 65 "" { -9 9 } +t "" 2 "" { 5 -5 } +p "" 2 3 "p2_RR_Q_C_K" { "R" "C" } 0 +p "" 1 117 "" { "R" "C" "F" } 0 +p "" 2 4 "" { "C" "F" } 0 +t "" 66 "" { -13 13 } +t "" 67 "" { 9 -9 } +t "" 68 "" { -9 9 } +t "" 69 "" { -5 5 } +t "" 70 "" { -5 5 } +t "" 5 "" { -3 3 } +p "" 1 118 "p1_RCQ_J" { "R" "C" } 0 +p "" 2 5 "p2_R_Q_R_K" { "R" "C" "F" } 0 +p "" 1 119 "" { "C" "F" } 0 +t "" 71 "" { -11 11 } +t "" 6 "" { -7 7 } +t "" 72 "" { -7 7 } +t "" 7 "" { 3 -3 } +p "" 2 6 "p2_CRR_Q_C_K" { "R" "C" } 0 +p "" 1 120 "" { "R" "C" "F" } 0 +p "" 2 7 "" { "C" "F" } 0 +t "" 73 "" { -11 11 } +t "" 232 "" { 7 -7 } +t "" 75 "" { -7 7 } +t "" 9 "" { -3 3 } +t "" 76 "" { -3 3 } +t "" 10 "" { 1 -1 } +p "" 2 8 "p2_C_K" { "R" "C" } 0 +p "" 1 105 "p1_CR_J" { "R" "C" "F" } 0 +p "" 2 9 "" { "C" "F" } 0 +p "" 1 121 "" { "R" "C" } 0 +p "" 2 10 "" { "R" "C" "F" } 0 +p "" 1 122 "" { "C" "F" } 0 +t "" 85 "" { -13 13 } +t "" 86 "" { -9 9 } +t "" 87 "" { -9 9 } +t "" 88 "" { 5 -5 } +p "" 2 11 "" { "R" "C" } 0 +p "" 1 123 "" { "R" "C" "F" } 0 +p "" 2 12 "" { "C" "F" } 0 +t "" 89 "" { -13 13 } +t "" 90 "" { 9 -9 } +t "" 91 "" { -9 9 } +t "" 92 "" { -5 5 } +t "" 93 "" { -5 5 } +t "" 20 "" { 3 -3 } +p "" 1 124 "p1_CRQ_J" { "R" "C" } 0 +p "" 2 13 "p2_CRR_Q_R_K" { "R" "C" "F" } 0 +p "" 1 125 "" { "C" "F" } 0 +t "" 77 "" { -11 11 } +t "" 78 "" { -7 7 } +t "" 79 "" { -7 7 } +t "" 12 "" { 3 -3 } +p "" 2 14 "p2_CRR_Q_C_K" { "R" "C" } 0 +p "" 1 126 "" { "R" "C" "F" } 0 +p "" 2 15 "" { "C" "F" } 0 +t "" 80 "" { -11 11 } +t "" 81 "" { 7 -7 } +t "" 82 "" { -7 7 } +t "" 83 "" { -3 3 } +t "" 84 "" { -3 3 } +t "" 307 "" { -1 1 } +p "" 1 127 "p1_CCQ_J" { "R" "C" } 0 +p "" 2 16 "p2_CC_Q_R_K" { "R" "C" "F" } 0 +p "" 1 128 "" { "C" "F" } 0 +t "" 94 "" { -9 9 } +t "" 21 "" { -5 5 } +t "" 95 "" { -5 5 } +t "" 22 "" { 1 -1 } +p "" 2 17 "p2_CC_Q_C_K" { "R" "C" } 0 +p "" 1 129 "" { "R" "C" "F" } 0 +p "" 2 18 "" { "C" "F" } 0 +t "" 97 "" { -9 9 } +t "" 98 "" { 5 -5 } +t "" 99 "" { -5 5 } +t "" 24 "" { -1 1 } +t "" 100 "" { -1 1 } +p "" 1 97 "" { "R" "C" } 0 +p "" 2 1 "p2_R_K" { "R" "C" "F" } 0 +p "" 1 98 "p1_RR_J" { "C" "F" } 0 +p "" 1 130 "p1_RRJ_J" { "R" "C" } 0 +p "" 2 19 "p2_RR_J_R_K" { "R" "C" "F" } 0 +p "" 1 131 "" { "C" "F" } 0 +t "" 25 "" { 13 -13 } +t "" 114 "" { -9 9 } +t "" 27 "" { 9 -9 } +t "" 2 "" { 5 -5 } +p "" 2 20 "p2_CRR_Q_C_K" { "R" "C" } 0 +p "" 1 132 "" { "R" "C" "F" } 0 +p "" 2 21 "" { "C" "F" } 0 +t "" 28 "" { 13 -13 } +t "" 29 "" { 9 -9 } +t "" 30 "" { 9 -9 } +t "" 31 "" { -5 5 } +t "" 32 "" { 5 -5 } +t "" 5 "" { -3 3 } +p "" 1 133 "p1_RCJ_J" { "R" "C" } 0 +p "" 2 22 "" { "R" "C" "F" } 0 +p "" 1 134 "" { "C" "F" } 0 +t "" 33 "" { 11 -11 } +t "" 6 "" { -7 7 } +t "" 101 "" { 7 -7 } +t "" 7 "" { 3 -3 } +p "" 2 23 "" { "R" "C" } 0 +p "" 1 135 "" { "R" "C" "F" } 0 +p "" 2 24 "" { "C" "F" } 0 +t "" 35 "" { 11 -11 } +t "" 36 "" { 7 -7 } +t "" 44 "" { 7 -7 } +t "" 9 "" { -3 3 } +t "" 46 "" { 3 -3 } +t "" 10 "" { 1 -1 } +p "" 2 8 "p2_C_K" { "R" "C" } 0 +p "" 1 105 "p1_CR_J" { "R" "C" "F" } 0 +p "" 2 9 "" { "C" "F" } 0 +p "" 1 136 "" { "R" "C" } 0 +p "" 2 25 "" { "R" "C" "F" } 0 +p "" 1 137 "" { "C" "F" } 0 +t "" 47 "" { 13 -13 } +t "" 48 "" { -9 9 } +t "" 49 "" { 9 -9 } +t "" 50 "" { 5 -5 } +p "" 2 26 "" { "R" "C" } 0 +p "" 1 138 "" { "R" "C" "F" } 0 +p "" 2 27 "" { "C" "F" } 0 +t "" 51 "" { 13 -13 } +t "" 52 "" { 9 -9 } +t "" 53 "" { 9 -9 } +t "" 54 "" { -5 5 } +t "" 233 "" { 5 -5 } +t "" 20 "" { 3 -3 } +p "" 1 139 "p1_CRJ_J" { "R" "C" } 0 +p "" 2 28 "" { "R" "C" "F" } 0 +p "" 1 140 "" { "C" "F" } 0 +t "" 39 "" { 11 -11 } +t "" 74 "" { -7 7 } +t "" 41 "" { 7 -7 } +t "" 12 "" { 3 -3 } +p "" 2 29 "" { "R" "C" } 0 +p "" 1 141 "" { "R" "C" "F" } 0 +p "" 2 30 "" { "C" "F" } 0 +t "" 42 "" { 11 -11 } +t "" 43 "" { 7 -7 } +t "" 44 "" { 7 -7 } +t "" 45 "" { -3 3 } +t "" 46 "" { 3 -3 } +t "" 308 "" { -1 1 } +p "" 1 142 "p1_CCJ_J" { "R" "C" } 0 +p "" 2 31 "" { "R" "C" "F" } 0 +p "" 1 143 "" { "C" "F" } 0 +t "" 56 "" { 9 -9 } +t "" 21 "" { -5 5 } +t "" 57 "" { 5 -5 } +t "" 22 "" { 1 -1 } +p "" 2 32 "" { "R" "C" } 0 +p "" 1 144 "" { "R" "C" "F" } 0 +p "" 2 33 "" { "C" "F" } 0 +t "" 59 "" { 9 -9 } +t "" 60 "" { 5 -5 } +t "" 57 "" { 5 -5 } +t "" 24 "" { -1 1 } +t "" 62 "" { 1 -1 } +p "" 1 97 "" { "R" "C" } 0 +p "" 2 34 "p2_R_Q" { "R" "C" "F" } 0 +p "" 1 98 "p1_RR_J" { "C" "F" } 0 +p "" 1 99 "p1_RRK_J" { "R" "C" } 0 +p "" 2 35 "" { "R" "C" "F" } 0 +p "" 1 100 "" { "C" "F" } 0 +t "" 63 "" { -13 13 } +t "" 64 "" { -9 9 } +t "" 65 "" { -9 9 } +t "" 2 "" { 5 -5 } +p "" 2 36 "" { "R" "C" } 0 +p "" 1 101 "" { "R" "C" "F" } 0 +p "" 2 37 "" { "C" "F" } 0 +t "" 66 "" { -13 13 } +t "" 67 "" { 9 -9 } +t "" 68 "" { -9 9 } +t "" 69 "" { -5 5 } +t "" 70 "" { -5 5 } +t "" 5 "" { -3 3 } +p "" 1 102 "p1_RCK_J" { "R" "C" } 0 +p "" 2 38 "" { "R" "C" "F" } 0 +p "" 1 103 "" { "C" "F" } 0 +t "" 71 "" { -11 11 } +t "" 6 "" { -7 7 } +t "" 72 "" { -7 7 } +t "" 7 "" { 3 -3 } +p "" 2 39 "" { "R" "C" } 0 +p "" 1 104 "" { "R" "C" "F" } 0 +p "" 2 40 "" { "C" "F" } 0 +t "" 73 "" { -11 11 } +t "" 115 "" { 7 -7 } +t "" 75 "" { -7 7 } +t "" 9 "" { -3 3 } +t "" 76 "" { -3 3 } +t "" 10 "" { 1 -1 } +p "" 2 41 "p2_C_Q" { "R" "C" } 0 +p "" 1 105 "p1_CR_J" { "R" "C" "F" } 0 +p "" 2 42 "" { "C" "F" } 0 +p "" 1 106 "" { "R" "C" } 0 +p "" 2 43 "" { "R" "C" "F" } 0 +p "" 1 107 "" { "C" "F" } 0 +t "" 85 "" { -13 13 } +t "" 86 "" { -9 9 } +t "" 87 "" { -9 9 } +t "" 88 "" { 5 -5 } +p "" 2 44 "" { "R" "C" } 0 +p "" 1 108 "" { "R" "C" "F" } 0 +p "" 2 45 "" { "C" "F" } 0 +t "" 89 "" { -13 13 } +t "" 90 "" { 9 -9 } +t "" 91 "" { -9 9 } +t "" 92 "" { -5 5 } +t "" 93 "" { -5 5 } +t "" 20 "" { 3 -3 } +p "" 1 109 "p1_CRK_J" { "R" "C" } 0 +p "" 2 46 "" { "R" "C" "F" } 0 +p "" 1 110 "" { "C" "F" } 0 +t "" 77 "" { -11 11 } +t "" 78 "" { -7 7 } +t "" 79 "" { -7 7 } +t "" 12 "" { 3 -3 } +p "" 2 47 "" { "R" "C" } 0 +p "" 1 111 "" { "R" "C" "F" } 0 +p "" 2 48 "" { "C" "F" } 0 +t "" 80 "" { -11 11 } +t "" 81 "" { 7 -7 } +t "" 82 "" { -7 7 } +t "" 83 "" { -3 3 } +t "" 84 "" { -3 3 } +t "" 309 "" { -1 1 } +p "" 1 112 "p1_CCK_J" { "R" "C" } 0 +p "" 2 49 "" { "R" "C" "F" } 0 +p "" 1 113 "" { "C" "F" } 0 +t "" 94 "" { -9 9 } +t "" 21 "" { -5 5 } +t "" 95 "" { -5 5 } +t "" 22 "" { 1 -1 } +p "" 2 50 "" { "R" "C" } 0 +p "" 1 114 "" { "R" "C" "F" } 0 +p "" 2 51 "" { "C" "F" } 0 +t "" 97 "" { -9 9 } +t "" 98 "" { 5 -5 } +t "" 99 "" { -5 5 } +t "" 24 "" { -1 1 } +t "" 100 "" { -1 1 } +p "" 1 97 "" { "R" "C" } 0 +p "" 2 34 "p2_R_Q" { "R" "C" "F" } 0 +p "" 1 98 "p1_RR_J" { "C" "F" } 0 +p "" 1 115 "p1_RRQ_J" { "R" "C" } 0 +p "" 2 52 "" { "R" "C" "F" } 0 +p "" 1 116 "" { "C" "F" } 0 +t "" 63 "" { -13 13 } +t "" 64 "" { -9 9 } +t "" 65 "" { -9 9 } +t "" 2 "" { 5 -5 } +p "" 2 53 "" { "R" "C" } 0 +p "" 1 117 "" { "R" "C" "F" } 0 +p "" 2 54 "" { "C" "F" } 0 +t "" 66 "" { -13 13 } +t "" 67 "" { 9 -9 } +t "" 68 "" { -9 9 } +t "" 69 "" { -5 5 } +t "" 70 "" { -5 5 } +t "" 5 "" { -3 3 } +p "" 1 118 "p1_RCQ_J" { "R" "C" } 0 +p "" 2 55 "" { "R" "C" "F" } 0 +p "" 1 119 "" { "C" "F" } 0 +t "" 71 "" { -11 11 } +t "" 6 "" { -7 7 } +t "" 72 "" { -7 7 } +t "" 7 "" { 3 -3 } +p "" 2 56 "" { "R" "C" } 0 +p "" 1 120 "" { "R" "C" "F" } 0 +p "" 2 57 "" { "C" "F" } 0 +t "" 73 "" { -11 11 } +t "" 81 "" { 7 -7 } +t "" 75 "" { -7 7 } +t "" 9 "" { -3 3 } +t "" 76 "" { -3 3 } +t "" 10 "" { 1 -1 } +p "" 2 41 "p2_C_Q" { "R" "C" } 0 +p "" 1 105 "p1_CR_J" { "R" "C" "F" } 0 +p "" 2 42 "" { "C" "F" } 0 +p "" 1 121 "" { "R" "C" } 0 +p "" 2 58 "" { "R" "C" "F" } 0 +p "" 1 122 "" { "C" "F" } 0 +t "" 85 "" { -13 13 } +t "" 86 "" { -9 9 } +t "" 87 "" { -9 9 } +t "" 88 "" { 5 -5 } +p "" 2 59 "" { "R" "C" } 0 +p "" 1 123 "" { "R" "C" "F" } 0 +p "" 2 60 "" { "C" "F" } 0 +t "" 89 "" { -13 13 } +t "" 90 "" { 9 -9 } +t "" 91 "" { -9 9 } +t "" 92 "" { -5 5 } +t "" 93 "" { -5 5 } +t "" 20 "" { 3 -3 } +p "" 1 124 "p1_CRQ_J" { "R" "C" } 0 +p "" 2 61 "" { "R" "C" "F" } 0 +p "" 1 125 "" { "C" "F" } 0 +t "" 77 "" { -11 11 } +t "" 78 "" { -7 7 } +t "" 79 "" { -7 7 } +t "" 12 "" { 3 -3 } +p "" 2 62 "" { "R" "C" } 0 +p "" 1 126 "" { "R" "C" "F" } 0 +p "" 2 63 "" { "C" "F" } 0 +t "" 80 "" { -11 11 } +t "" 81 "" { 7 -7 } +t "" 82 "" { -7 7 } +t "" 83 "" { -3 3 } +t "" 84 "" { -3 3 } +t "" 310 "" { -1 1 } +p "" 1 127 "p1_CCQ_J" { "R" "C" } 0 +p "" 2 64 "" { "R" "C" "F" } 0 +p "" 1 128 "" { "C" "F" } 0 +t "" 94 "" { -9 9 } +t "" 21 "" { -5 5 } +t "" 95 "" { -5 5 } +t "" 22 "" { 1 -1 } +p "" 2 65 "" { "R" "C" } 0 +p "" 1 129 "" { "R" "C" "F" } 0 +p "" 2 66 "" { "C" "F" } 0 +t "" 97 "" { -9 9 } +t "" 98 "" { 5 -5 } +t "" 99 "" { -5 5 } +t "" 24 "" { -1 1 } +t "" 100 "" { -1 1 } +p "" 1 97 "" { "R" "C" } 0 +p "" 2 34 "p2_R_Q" { "R" "C" "F" } 0 +p "" 1 98 "p1_RR_J" { "C" "F" } 0 +p "" 1 130 "p1_RRJ_J" { "R" "C" } 0 +p "" 2 67 "" { "R" "C" "F" } 0 +p "" 1 131 "" { "C" "F" } 0 +t "" 25 "" { 13 -13 } +t "" 116 "" { -9 9 } +t "" 27 "" { 9 -9 } +t "" 2 "" { 5 -5 } +p "" 2 68 "" { "R" "C" } 0 +p "" 1 132 "" { "R" "C" "F" } 0 +p "" 2 69 "" { "C" "F" } 0 +t "" 28 "" { 13 -13 } +t "" 29 "" { 9 -9 } +t "" 30 "" { 9 -9 } +t "" 31 "" { -5 5 } +t "" 117 "" { 5 -5 } +t "" 5 "" { -3 3 } +p "" 1 133 "p1_RCJ_J" { "R" "C" } 0 +p "" 2 70 "" { "R" "C" "F" } 0 +p "" 1 134 "" { "C" "F" } 0 +t "" 33 "" { 11 -11 } +t "" 6 "" { -7 7 } +t "" 101 "" { 7 -7 } +t "" 7 "" { 3 -3 } +p "" 2 71 "" { "R" "C" } 0 +p "" 1 135 "" { "R" "C" "F" } 0 +p "" 2 72 "" { "C" "F" } 0 +t "" 35 "" { 11 -11 } +t "" 36 "" { 7 -7 } +t "" 44 "" { 7 -7 } +t "" 9 "" { -3 3 } +t "" 46 "" { 3 -3 } +t "" 10 "" { 1 -1 } +p "" 2 41 "p2_C_Q" { "R" "C" } 0 +p "" 1 105 "p1_CR_J" { "R" "C" "F" } 0 +p "" 2 42 "" { "C" "F" } 0 +p "" 1 136 "" { "R" "C" } 0 +p "" 2 73 "" { "R" "C" "F" } 0 +p "" 1 137 "" { "C" "F" } 0 +t "" 47 "" { 13 -13 } +t "" 48 "" { -9 9 } +t "" 49 "" { 9 -9 } +t "" 50 "" { 5 -5 } +p "" 2 74 "" { "R" "C" } 0 +p "" 1 138 "" { "R" "C" "F" } 0 +p "" 2 75 "" { "C" "F" } 0 +t "" 51 "" { 13 -13 } +t "" 52 "" { 9 -9 } +t "" 53 "" { 9 -9 } +t "" 54 "" { -5 5 } +t "" 234 "" { 5 -5 } +t "" 20 "" { 3 -3 } +p "" 1 139 "p1_CRJ_J" { "R" "C" } 0 +p "" 2 76 "" { "R" "C" "F" } 0 +p "" 1 140 "" { "C" "F" } 0 +t "" 39 "" { 11 -11 } +t "" 74 "" { -7 7 } +t "" 41 "" { 7 -7 } +t "" 12 "" { 3 -3 } +p "" 2 77 "" { "R" "C" } 0 +p "" 1 141 "" { "R" "C" "F" } 0 +p "" 2 78 "" { "C" "F" } 0 +t "" 42 "" { 11 -11 } +t "" 43 "" { 7 -7 } +t "" 44 "" { 7 -7 } +t "" 45 "" { -3 3 } +t "" 46 "" { 3 -3 } +t "" 311 "" { -1 1 } +p "" 1 142 "p1_CCJ_J" { "R" "C" } 0 +p "" 2 79 "" { "R" "C" "F" } 0 +p "" 1 143 "" { "C" "F" } 0 +t "" 56 "" { 9 -9 } +t "" 21 "" { -5 5 } +t "" 57 "" { 5 -5 } +t "" 22 "" { 1 -1 } +p "" 2 80 "" { "R" "C" } 0 +p "" 1 144 "" { "R" "C" "F" } 0 +p "" 2 81 "" { "C" "F" } 0 +t "" 59 "" { 9 -9 } +t "" 60 "" { 5 -5 } +t "" 57 "" { 5 -5 } +t "" 24 "" { -1 1 } +t "" 62 "" { 1 -1 } +p "" 1 97 "" { "R" "C" } 0 +p "" 2 82 "p2_R_J" { "R" "C" "F" } 0 +p "" 1 98 "p1_RR_J" { "C" "F" } 0 +p "" 1 99 "p1_RRK_J" { "R" "C" } 0 +p "" 2 83 "" { "R" "C" "F" } 0 +p "" 1 100 "" { "C" "F" } 0 +t "" 235 "" { 0 0 } +t "" 118 "" { -9 9 } +t "" 236 "" { 0 0 } +t "" 2 "" { 5 -5 } +p "" 2 84 "" { "R" "C" } 0 +p "" 1 101 "" { "R" "C" "F" } 0 +p "" 2 85 "" { "C" "F" } 0 +t "" 237 "" { 0 0 } +t "" 119 "" { 9 -9 } +t "" 238 "" { 0 0 } +t "" 120 "" { -5 5 } +t "" 239 "" { 0 0 } +t "" 5 "" { -3 3 } +p "" 1 102 "p1_RCK_J" { "R" "C" } 0 +p "" 2 86 "" { "R" "C" "F" } 0 +p "" 1 103 "" { "C" "F" } 0 +t "" 240 "" { 0 0 } +t "" 6 "" { -7 7 } +t "" 241 "" { 0 0 } +t "" 7 "" { 3 -3 } +p "" 2 87 "" { "R" "C" } 0 +p "" 1 104 "" { "R" "C" "F" } 0 +p "" 2 88 "" { "C" "F" } 0 +t "" 242 "" { 0 0 } +t "" 121 "" { 7 -7 } +t "" 243 "" { 0 0 } +t "" 9 "" { -3 3 } +t "" 244 "" { 0 0 } +t "" 10 "" { 1 -1 } +p "" 2 89 "p2_C_J" { "R" "C" } 0 +p "" 1 105 "p1_CR_J" { "R" "C" "F" } 0 +p "" 2 90 "" { "C" "F" } 0 +p "" 1 106 "" { "R" "C" } 0 +p "" 2 91 "" { "R" "C" "F" } 0 +p "" 1 107 "" { "C" "F" } 0 +t "" 250 "" { 0 0 } +t "" 125 "" { -9 9 } +t "" 251 "" { 0 0 } +t "" 126 "" { 5 -5 } +p "" 2 92 "" { "R" "C" } 0 +p "" 1 108 "" { "R" "C" "F" } 0 +p "" 2 93 "" { "C" "F" } 0 +t "" 252 "" { 0 0 } +t "" 127 "" { 9 -9 } +t "" 253 "" { 0 0 } +t "" 128 "" { -5 5 } +t "" 254 "" { 0 0 } +t "" 20 "" { 3 -3 } +p "" 1 109 "p1_CRK_J" { "R" "C" } 0 +p "" 2 94 "" { "R" "C" "F" } 0 +p "" 1 110 "" { "C" "F" } 0 +t "" 245 "" { 0 0 } +t "" 122 "" { -7 7 } +t "" 246 "" { 0 0 } +t "" 12 "" { 3 -3 } +p "" 2 95 "" { "R" "C" } 0 +p "" 1 111 "" { "R" "C" "F" } 0 +p "" 2 96 "" { "C" "F" } 0 +t "" 247 "" { 0 0 } +t "" 123 "" { 7 -7 } +t "" 248 "" { 0 0 } +t "" 124 "" { -3 3 } +t "" 249 "" { 0 0 } +t "" 312 "" { -1 1 } +p "" 1 112 "p1_CCK_J" { "R" "C" } 0 +p "" 2 97 "" { "R" "C" "F" } 0 +p "" 1 113 "" { "C" "F" } 0 +t "" 255 "" { 0 0 } +t "" 21 "" { -5 5 } +t "" 256 "" { 0 0 } +t "" 22 "" { 1 -1 } +p "" 2 98 "" { "R" "C" } 0 +p "" 1 114 "" { "R" "C" "F" } 0 +p "" 2 99 "" { "C" "F" } 0 +t "" 257 "" { 0 0 } +t "" 129 "" { 5 -5 } +t "" 258 "" { 0 0 } +t "" 24 "" { -1 1 } +t "" 259 "" { 0 0 } +p "" 1 97 "" { "R" "C" } 0 +p "" 2 82 "p2_R_J" { "R" "C" "F" } 0 +p "" 1 98 "p1_RR_J" { "C" "F" } 0 +p "" 1 115 "p1_RRQ_J" { "R" "C" } 0 +p "" 2 100 "" { "R" "C" "F" } 0 +p "" 1 116 "" { "C" "F" } 0 +t "" 260 "" { 0 0 } +t "" 118 "" { -9 9 } +t "" 261 "" { 0 0 } +t "" 2 "" { 5 -5 } +p "" 2 101 "" { "R" "C" } 0 +p "" 1 117 "" { "R" "C" "F" } 0 +p "" 2 102 "" { "C" "F" } 0 +t "" 262 "" { 0 0 } +t "" 119 "" { 9 -9 } +t "" 263 "" { 0 0 } +t "" 120 "" { -5 5 } +t "" 264 "" { 0 0 } +t "" 5 "" { -3 3 } +p "" 1 118 "p1_RCQ_J" { "R" "C" } 0 +p "" 2 103 "" { "R" "C" "F" } 0 +p "" 1 119 "" { "C" "F" } 0 +t "" 265 "" { 0 0 } +t "" 6 "" { -7 7 } +t "" 266 "" { 0 0 } +t "" 7 "" { 3 -3 } +p "" 2 104 "" { "R" "C" } 0 +p "" 1 120 "" { "R" "C" "F" } 0 +p "" 2 105 "" { "C" "F" } 0 +t "" 267 "" { 0 0 } +t "" 121 "" { 7 -7 } +t "" 268 "" { 0 0 } +t "" 9 "" { -3 3 } +t "" 269 "" { 0 0 } +t "" 10 "" { 1 -1 } +p "" 2 89 "p2_C_J" { "R" "C" } 0 +p "" 1 105 "p1_CR_J" { "R" "C" "F" } 0 +p "" 2 90 "" { "C" "F" } 0 +p "" 1 121 "" { "R" "C" } 0 +p "" 2 106 "" { "R" "C" "F" } 0 +p "" 1 122 "" { "C" "F" } 0 +t "" 274 "" { 0 0 } +t "" 125 "" { -9 9 } +t "" 276 "" { 0 0 } +t "" 126 "" { 5 -5 } +p "" 2 107 "" { "R" "C" } 0 +p "" 1 123 "" { "R" "C" "F" } 0 +p "" 2 108 "" { "C" "F" } 0 +t "" 277 "" { 0 0 } +t "" 127 "" { 9 -9 } +t "" 278 "" { 0 0 } +t "" 128 "" { -5 5 } +t "" 281 "" { 0 0 } +t "" 20 "" { 3 -3 } +p "" 1 124 "p1_CRQ_J" { "R" "C" } 0 +p "" 2 109 "" { "R" "C" "F" } 0 +p "" 1 125 "" { "C" "F" } 0 +t "" 270 "" { 0 0 } +t "" 122 "" { -7 7 } +t "" 271 "" { 0 0 } +t "" 12 "" { 3 -3 } +p "" 2 110 "" { "R" "C" } 0 +p "" 1 126 "" { "R" "C" "F" } 0 +p "" 2 111 "" { "C" "F" } 0 +t "" 272 "" { 0 0 } +t "" 123 "" { 7 -7 } +t "" 273 "" { 0 0 } +t "" 124 "" { -3 3 } +t "" 275 "" { 0 0 } +t "" 313 "" { -1 1 } +p "" 1 127 "p1_CCQ_J" { "R" "C" } 0 +p "" 2 112 "" { "R" "C" "F" } 0 +p "" 1 128 "" { "C" "F" } 0 +t "" 279 "" { 0 0 } +t "" 21 "" { -5 5 } +t "" 280 "" { 0 0 } +t "" 22 "" { 1 -1 } +p "" 2 113 "" { "R" "C" } 0 +p "" 1 129 "" { "R" "C" "F" } 0 +p "" 2 114 "" { "C" "F" } 0 +t "" 282 "" { 0 0 } +t "" 129 "" { 5 -5 } +t "" 283 "" { 0 0 } +t "" 24 "" { -1 1 } +t "" 284 "" { 0 0 } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/sample.efg b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/sample.efg new file mode 100644 index 0000000..0c01fe2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/sample.efg @@ -0,0 +1,32 @@ +EFG 2 R "General Bayes game, one stage" { "Player 1" "Player 2" } +c "ROOT" 1 "(0,1)" { "1G" 0.500000 "1B" 0.500000 } 0 +c "" 2 "(0,2)" { "2g" 0.500000 "2b" 0.500000 } 0 +p "" 1 1 "(1,1)" { "H" "L" } 0 +p "" 2 1 "(2,1)" { "h" "l" } 0 +t "" 1 "Outcome 1" { 10.000000 2.000000 } +t "" 2 "Outcome 2" { 0.000000 10.000000 } +p "" 2 1 "(2,1)" { "h" "l" } 0 +t "" 3 "Outcome 3" { 2.000000 4.000000 } +t "" 4 "Outcome 4" { 4.000000 0.000000 } +p "" 1 1 "(1,1)" { "H" "L" } 0 +p "" 2 2 "(2,2)" { "h" "l" } 0 +t "" 5 "Outcome 5" { 10.000000 2.000000 } +t "" 6 "Outcome 6" { 0.000000 10.000000 } +p "" 2 2 "(2,2)" { "h" "l" } 0 +t "" 7 "Outcome 7" { 2.000000 4.000000 } +t "" 8 "Outcome 8" { 4.000000 0.000000 } +c "" 3 "(0,3)" { "2g" 0.500000 "2b" 0.500000 } 0 +p "" 1 2 "(1,2)" { "H" "L" } 0 +p "" 2 1 "(2,1)" { "h" "l" } 0 +t "" 9 "Outcome 9" { 4.000000 2.000000 } +t "" 10 "Outcome 10" { 2.000000 10.000000 } +p "" 2 1 "(2,1)" { "h" "l" } 0 +t "" 11 "Outcome 11" { 0.000000 4.000000 } +t "" 12 "Outcome 12" { 10.000000 2.000000 } +p "" 1 2 "(1,2)" { "H" "L" } 0 +p "" 2 2 "(2,2)" { "h" "l" } 0 +t "" 13 "Outcome 13" { 4.000000 2.000000 } +t "" 14 "Outcome 14" { 2.000000 10.000000 } +p "" 2 2 "(2,2)" { "h" "l" } 0 +t "" 15 "Outcome 15" { 0.000000 4.000000 } +t "" 16 "Outcome 16" { 10.000000 0.000000 } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/signaling_vonstengel_forges_2008.efg b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/signaling_vonstengel_forges_2008.efg new file mode 100644 index 0000000..6c5bd4b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/efg_game/games/signaling_vonstengel_forges_2008.efg @@ -0,0 +1,18 @@ +EFG 2 R "Signaling game from Fig 1 of von Stengel and Forges 2008" { "Player 1" "Player 2" } "See Fig 1 of Extensive-Form Correlated Equilibrium: +Definition and Computational Complexity" + +c "ROOT" 1 "c1" { "g" 1/2 "b" 1/2 } 0 + p "G" 1 1 "G" { "X_G" "Y_G" } 0 + p "G X_G" 2 1 "X" { "l_X" "r_X" } 0 + t "G X_G l_X" 1 "Outcome G X_G l_X" { 4.0 10.0 } + t "G X_G r_X" 2 "Outcome G X_G r_X" { 0.0 6.0 } + p "G Y_G" 2 2 "Y" { "l_Y" "r_Y" } 0 + t "G Y_G l_Y" 3 "Outcome G Y_G l_Y" { 4.0 10.0 } + t "G Y_G r_Y" 4 "Outcome G Y_G r_Y" { 0.0 6.0 } + p "B" 1 2 "B" { "X_B" "Y_B" } 0 + p "B X_B" 2 1 "X" { "l_X" "r_X" } 0 + t "B X_B l_X" 5 "Outcome B X_B l_X" { 6.0 0.0 } + t "B X_B r_X" 6 "Outcome B X_B r_X" { 0.0 6.0 } + p "B Y_B" 2 2 "Y" { "l_Y" "r_Y" } 0 + t "B Y_B l_Y" 7 "Outcome B Y_B l_Y" { 6.0 0.0 } + t "B Y_B r_Y" 8 "Outcome B Y_B r_Y" { 0.0 6.0 } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/einstein_wurfelt_nicht/einstein_wurfelt_nicht.cc b/scenarios/bargaining/open_spiel/open_spiel/games/einstein_wurfelt_nicht/einstein_wurfelt_nicht.cc new file mode 100644 index 0000000..777e953 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/einstein_wurfelt_nicht/einstein_wurfelt_nicht.cc @@ -0,0 +1,544 @@ +// Copyright 2024 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/einstein_wurfelt_nicht/einstein_wurfelt_nicht.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/combinatorics.h" + +namespace open_spiel { +namespace einstein_wurfelt_nicht { +namespace { + +const std::vector> kChanceOutcomes = { + std::pair(0, 1.0 / 6), + std::pair(1, 1.0 / 6), + std::pair(2, 1.0 / 6), + std::pair(3, 1.0 / 6), + std::pair(4, 1.0 / 6), + std::pair(5, 1.0 / 6)}; + +// Number of unique directions each cube can take. +constexpr int kNumDirections = 6; + +// Direction offsets for black, then white. +constexpr std::array kDirRowOffsets = { + {1, 1, 0, -1, -1, 0}}; + +constexpr std::array kDirColOffsets = { + {1, 0, 1, 0, -1, -1}}; + +// Facts about the game +const GameType kGameType{ + /*short_name=*/"einstein_wurfelt_nicht", + /*long_name=*/"einstein_wurfelt_nicht", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new EinsteinWurfeltNichtGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +Color PlayerToColor(Player player) { + SPIEL_CHECK_NE(player, kInvalidPlayer); + return static_cast(player); +} + +Player ColorToPlayer(Color color) { + switch (color) { + case Color::kBlack: + return kBlackPlayerId; + case Color::kWhite: + return kWhitePlayerId; + default: + SpielFatalError("No player for this color"); + } +} + +Color OpponentColor(Player player) { // NOLINT + Color player_color = PlayerToColor(player); + if (player_color == Color::kBlack) { + return Color::kWhite; + } else if (player_color == Color::kWhite) { + return Color::kBlack; + } else { + SpielFatalError("Player should be either black or white"); + } +} + +std::string CoordinatesToDirection(int row, int col) { + std::string direction; + if (row == col) { + direction = "diag"; + } else if (row == -1) { + direction = "up"; + } else if (row == 1) { + direction = "down"; + } else if (col == 1) { + direction = "right"; + } else if (col == -1) { + direction = "left"; + } else { + std::cout << "r2: " << row << "c2: " << col << std::endl; + SpielFatalError("Unrecognized cube's movement"); + } + return direction; +} + +} // namespace + +EinsteinWurfeltNichtState::EinsteinWurfeltNichtState( + std::shared_ptr game, int rows, int cols) + : State(game), + cur_player_(kChancePlayerId), + prev_player_(kBlackPlayerId), + turns_(-1), + rows_(rows), + cols_(cols) { + SPIEL_CHECK_GT(rows_, 1); + SPIEL_CHECK_GT(cols_, 1); + board_.fill(Cube{Color::kEmpty, -1}); + + winner_ = kInvalidPlayer; + cubes_[0] = cubes_[1] = kNumPlayerCubes; +} + +void EinsteinWurfeltNichtState::SetupInitialBoard(Player player, + Action action) { + std::vector indices(kNumPlayerCubes); + std::iota(indices.begin(), indices.end(), 1); + std::vector cubes_position_order = UnrankPermutation(indices, action); + int perm_idx = 0; + + // Values in the upper-left corner (black cubes) have a position identified + // as rows+cols <= 2. Values in the lower-right corner (white cubes) have a + // position identified as rows+cols >= 6. The rest of the board is empty. + for (int r = 0; r < kDefaultRows; r++) { + for (int c = 0; c < kDefaultColumns; c++) { + if (r + c <= 2 && player == kBlackPlayerId) { + board_[r * kDefaultColumns + c] = + Cube{Color::kBlack, cubes_position_order[perm_idx]}; + perm_idx++; + } else if (r + c >= 6 && player == kWhitePlayerId) { + board_[r * kDefaultColumns + c] = + Cube{Color::kWhite, cubes_position_order[perm_idx]}; + perm_idx++; + } + } + } +} + +int EinsteinWurfeltNichtState::CurrentPlayer() const { + if (IsTerminal()) { + return kTerminalPlayerId; + } else { + return cur_player_; + } +} + +int EinsteinWurfeltNichtState::Opponent(int player) const { return 1 - player; } + +std::vector> EinsteinWurfeltNichtState::AvailableCubesPosition( + Color player_color) const { + std::vector> player_cubes; + for (int r = 0; r < rows_; r++) { + for (int c = 0; c < cols_; c++) { + if (board(r, c).color == player_color) { + if (board(r, c).value == die_roll_) { + // If there is a cube with the same value as the die, + // return only this one + std::vector> player_cube; + player_cube.push_back({board(r, c).value, r, c}); + return player_cube; + } else { + player_cubes.push_back({r, c}); + } + } + } + } + + // Initialise lowest/highest cube values to out-of-bound cube's values + std::vector lowest_cube = {0, 0, 0}; // cube value, r, c + std::vector highest_cube = {7, 0, 0}; // cube value, r, c + for (int i = 0; i < player_cubes.size(); ++i) { + int r = player_cubes[i].first; + int c = player_cubes[i].second; + if (board(r, c).value > lowest_cube[0] && board(r, c).value < die_roll_) { + lowest_cube[0] = board(r, c).value; + lowest_cube[1] = r; + lowest_cube[2] = c; + } else if (board(r, c).value < highest_cube[0] && + board(r, c).value > die_roll_) { + highest_cube[0] = board(r, c).value; + highest_cube[1] = r; + highest_cube[2] = c; + } + } + + std::vector> selected_cubes; + if (lowest_cube[0] > 0) { + selected_cubes.push_back(lowest_cube); + } + if (highest_cube[0] < 7) { + selected_cubes.push_back(highest_cube); + } + + // Legal actions have to be sorted. Sort by row first, then by column + std::sort(selected_cubes.begin(), selected_cubes.end(), + [](const std::vector& a, const std::vector& b) { + if (a[1] != b[1]) return a[1] < b[1]; + return a[2] < b[2]; + }); + + return selected_cubes; +} + +void EinsteinWurfeltNichtState::DoApplyAction(Action action) { + if (IsChanceNode()) { + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LE(action, kNumCubesPermutations - 1); + turn_history_info_.push_back(TurnHistoryInfo(kChancePlayerId, prev_player_, + die_roll_, action, + Cube{Color::kEmpty, -1})); + if (turns_ == -1) { + SetupInitialBoard(kBlackPlayerId, action); + turns_ = 0; + return; + } else if (turns_ == 0) { + SetupInitialBoard(kWhitePlayerId, action); + turns_++; + return; + } else { + cur_player_ = Opponent(prev_player_); + prev_player_ = cur_player_; + die_roll_ = action + 1; + turns_++; + return; + } + } + + // The die should have been rolled at least once at this point + SPIEL_CHECK_GE(die_roll_, 1); + SPIEL_CHECK_LE(die_roll_, 6); + + std::vector values = + UnrankActionMixedBase(action, {rows_, cols_, kNumDirections, 2}); + int r1 = values[0]; + int c1 = values[1]; + int dir = values[2]; + bool capture = values[3] == 1; + int r2 = r1 + kDirRowOffsets[dir]; + int c2 = c1 + kDirColOffsets[dir]; + + SPIEL_CHECK_TRUE(InBounds(r1, c1)); + SPIEL_CHECK_TRUE(InBounds(r2, c2)); + + // Remove cubes if captured. + if (board(r2, c2).color == Color::kBlack) { + cubes_[ColorToPlayer(Color::kBlack)]--; + } else if (board(r2, c2).color == Color::kWhite) { + cubes_[ColorToPlayer(Color::kWhite)]--; + } + + Cube captured_cube = (capture) ? board(r2, c2) : Cube{Color::kEmpty, -1}; + turn_history_info_.push_back(TurnHistoryInfo( + cur_player_, prev_player_, die_roll_, action, captured_cube)); + + SetBoard(r2, c2, board(r1, c1)); + SetBoard(r1, c1, Cube{Color::kEmpty, -1}); + + // Check for winner. + if ((cur_player_ == 0 && r2 == (rows_ - 1) && c2 == (cols_ - 1)) || + (cubes_[ColorToPlayer(Color::kWhite)] == 0)) { + winner_ = 0; + } else if ((cur_player_ == 1 && r2 == 0 && c2 == 0) || + (cubes_[ColorToPlayer(Color::kBlack)] == 0)) { + winner_ = 1; + } + + cur_player_ = NextPlayerRoundRobin(cur_player_, kNumPlayers); + cur_player_ = kChancePlayerId; + turns_++; +} + +std::string EinsteinWurfeltNichtState::ActionToString(Player player, + Action action) const { + std::string action_string = ""; + + if (IsChanceNode()) { + if (turns_ == -1) { + absl::StrAppend(&action_string, + "Placing black cubes on the board - action ", action); + return action_string; + } else if (turns_ == 0) { + absl::StrAppend(&action_string, + "Placing white cubes on the board - action ", action); + return action_string; + } else if (turns_ >= 0) { + absl::StrAppend(&action_string, "roll ", action + 1); + return action_string; + } + } + + std::vector values = + UnrankActionMixedBase(action, {rows_, cols_, kNumDirections, 2}); + int r1 = values[0]; + int c1 = values[1]; + int dir = values[2]; + bool capture = values[3] == 1; + int r2 = kDirRowOffsets[dir]; + int c2 = kDirColOffsets[dir]; + + Cube cube = board(r1, c1); + std::string color = (cube.color == Color::kBlack) ? "B" : "W"; + + std::string direction = CoordinatesToDirection(r2, c2); + absl::StrAppend(&action_string, color); + absl::StrAppend(&action_string, cube.value); + absl::StrAppend(&action_string, "-"); + absl::StrAppend(&action_string, direction); + if (capture) { + absl::StrAppend(&action_string, "*"); + } + return action_string; +} + +std::vector EinsteinWurfeltNichtState::LegalActions() const { + if (IsChanceNode()) return LegalChanceOutcomes(); + if (IsTerminal()) return {}; + + std::vector movelist; + if (IsTerminal()) return movelist; + const Player player = CurrentPlayer(); + Color player_color = PlayerToColor(player); + std::vector action_bases = {rows_, cols_, kNumDirections, 2}; + std::vector action_values = {0, 0, 0, 0}; + + std::vector> available_cubes; + available_cubes = AvailableCubesPosition(player_color); + + for (int i = 0; i < available_cubes.size(); ++i) { + int r = available_cubes[i][1]; + int c = available_cubes[i][2]; + for (int o = 0; o < kNumDirections / 2; o++) { + int dir = player * kNumDirections / 2 + o; + int rp = r + kDirRowOffsets[dir]; + int cp = c + kDirColOffsets[dir]; + if (InBounds(rp, cp)) { + action_values[0] = r; + action_values[1] = c; + action_values[2] = dir; + if (board(rp, cp).color == Color::kEmpty) { + action_values[3] = 0; // no capture + movelist.push_back(RankActionMixedBase(action_bases, action_values)); + } else { + action_values[3] = 1; // capture + movelist.push_back(RankActionMixedBase(action_bases, action_values)); + } + } + } + } + return movelist; +} + +std::vector> +EinsteinWurfeltNichtState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(IsChanceNode()); + if (turns_ <= 0) { + // First 2 moves corresponds to the initial board setup. + // There are 6! = 720 possible permutations of the cubes. + std::vector> chance_outcomes; + double action_prob = 1.0 / kNumCubesPermutations; + chance_outcomes.reserve(kNumCubesPermutations); + + for (Action i = 0; i < kNumCubesPermutations; ++i) { + chance_outcomes.emplace_back(i, action_prob); + } + return chance_outcomes; + } else { + return kChanceOutcomes; + } +} + +bool EinsteinWurfeltNichtState::InBounds(int r, int c) const { + return (r >= 0 && r < rows_ && c >= 0 && c < cols_); +} + +std::string EinsteinWurfeltNichtState::ToString() const { + std::string W_result = ""; + + for (int r = 0; r < kDefaultRows; r++) { + for (int c = 0; c < kDefaultColumns; c++) { + if (board_[r * kDefaultColumns + c].color == Color::kBlack) { + absl::StrAppend(&W_result, "|b"); + absl::StrAppend(&W_result, board_[r * kDefaultColumns + c].value); + absl::StrAppend(&W_result, "|"); + } else if (board_[r * kDefaultColumns + c].color == Color::kWhite) { + absl::StrAppend(&W_result, "|w"); + absl::StrAppend(&W_result, board_[r * kDefaultColumns + c].value); + absl::StrAppend(&W_result, "|"); + } else { + absl::StrAppend(&W_result, "|__|"); + } + } + W_result.append("\n"); + } + return W_result; +} + +bool EinsteinWurfeltNichtState::IsTerminal() const { + return (winner_ >= 0 || (cubes_[0] == 0 || cubes_[1] == 0)); +} + +std::vector EinsteinWurfeltNichtState::Returns() const { + if (winner_ == 0 || cubes_[1] == 0) { + return {1.0, -1.0}; + } else if (winner_ == 1 || cubes_[0] == 0) { + return {-1.0, 1.0}; + } else { + return {0.0, 0.0}; + } +} + +std::string EinsteinWurfeltNichtState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void EinsteinWurfeltNichtState::ObservationTensor( + Player player, absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + auto value_it = values.begin(); + + for (int cube_num = 1; cube_num < kNumPlayerCubes + 1; ++cube_num) { + for (int player_idx = 0; player_idx < kNumPlayers; ++player_idx) { + for (int8_t y = 0; y < kDefaultRows; ++y) { + for (int8_t x = 0; x < kDefaultColumns; ++x) { + *value_it++ = (board(x, y).value == cube_num && + board(x, y).color == PlayerToColor(player_idx) + ? 1.0 + : 0.0); + } + } + } + } +} + +void EinsteinWurfeltNichtState::UndoAction(Player player, Action action) { + const TurnHistoryInfo& thi = turn_history_info_.back(); + SPIEL_CHECK_EQ(thi.player, player); + SPIEL_CHECK_EQ(action, thi.action); + + if (player != kChancePlayerId) { + std::vector values = + UnrankActionMixedBase(action, {rows_, cols_, kNumDirections, 2}); + int r1 = values[0]; + int c1 = values[1]; + int dir = values[2]; + int r2 = r1 + kDirRowOffsets[dir]; + int c2 = c1 + kDirColOffsets[dir]; + Cube captured_cube = thi.captured_cube; + + SetBoard(r1, c1, board(r2, c2)); + if (captured_cube.value != -1) { + SetBoard(r2, c2, captured_cube); + if (captured_cube.color == Color::kBlack) { + cubes_[ColorToPlayer(Color::kBlack)]++; + } else if (captured_cube.color == Color::kWhite) { + cubes_[ColorToPlayer(Color::kWhite)]++; + } + } else { + SetBoard(r2, c2, Cube{Color::kEmpty, -1}); + } + } else { + for (int r = 0; r < kDefaultRows; r++) { + for (int c = 0; c < kDefaultColumns; c++) { + if (turns_ == 1 && board(r, c).color == Color::kWhite) { + board_[r * kDefaultColumns + c] = Cube{Color::kEmpty, -1}; + } else if (turns_ == 0 && board(r, c).color == Color::kBlack) { + board_[r * kDefaultColumns + c] = Cube{Color::kEmpty, -1}; + } + } + } + } + + // Undo win status. + winner_ = kInvalidPlayer; + + turn_history_info_.pop_back(); + history_.pop_back(); + --turns_; + --move_number_; +} + +std::unique_ptr EinsteinWurfeltNichtState::Clone() const { + return std::unique_ptr(new EinsteinWurfeltNichtState(*this)); +} + +// Setter function used for debugging and tests. Note: this does not set the +// historical information properly, so Undo likely will not work on states +// set this way! +void EinsteinWurfeltNichtState::SetState( + int cur_player, int die_roll, const std::array board, + int cubes_black, int cubes_white) { + cur_player_ = cur_player; + die_roll_ = die_roll; + board_ = board; + cubes_[ColorToPlayer(Color::kBlack)] = cubes_black; + cubes_[ColorToPlayer(Color::kWhite)] = cubes_white; +} + +EinsteinWurfeltNichtGame::EinsteinWurfeltNichtGame(const GameParameters& params) + : Game(kGameType, params), rows_(kDefaultRows), cols_(kDefaultColumns) {} + +int EinsteinWurfeltNichtGame::NumDistinctActions() const { + return rows_ * cols_ * kNumDirections * 2; +} + +} // namespace einstein_wurfelt_nicht +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/einstein_wurfelt_nicht/einstein_wurfelt_nicht.h b/scenarios/bargaining/open_spiel/open_spiel/games/einstein_wurfelt_nicht/einstein_wurfelt_nicht.h new file mode 100644 index 0000000..a8b7829 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/einstein_wurfelt_nicht/einstein_wurfelt_nicht.h @@ -0,0 +1,159 @@ +// Copyright 2024 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_EINSTEIN_WURFELT_NICHT_H_ +#define OPEN_SPIEL_GAMES_EINSTEIN_WURFELT_NICHT_H_ + +#include + +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// An implementation of the game EinStein würfelt nicht! +// This is the implementation of the basic game with a 5x5 board and 6 cubes +// per player. +// https://en.wikipedia.org/wiki/EinStein_w%C3%BCrfelt_nicht! + +namespace open_spiel { +namespace einstein_wurfelt_nicht { + +enum class Color : int8_t { kBlack = 0, kWhite = 1, kEmpty = 2 }; + +struct Cube { + Color color; + int value; // player's die value +}; + +inline constexpr int kNumPlayers = 2; +inline constexpr int kBlackPlayerId = 0; +inline constexpr int kWhitePlayerId = 1; +inline constexpr int kNumPlayerCubes = 6; +// 720 possible permutations of 6 cubes on the board +inline constexpr int kNumCubesPermutations = 720; +inline constexpr int kDefaultRows = 5; +inline constexpr int kDefaultColumns = 5; +inline constexpr int k2dMaxBoardSize = kDefaultRows * kDefaultColumns; +inline constexpr const int kStateEncodingSize = + kNumPlayers * kNumPlayerCubes * kDefaultRows * kDefaultColumns; + +// This is a small helper to track historical turn info not stored in the moves. +// It is only needed for proper implementation of Undo. +struct TurnHistoryInfo { + int player; + int prev_player; + int die_roll_; + Action action; + Cube captured_cube; + TurnHistoryInfo(int _player, int _prev_player, int _die_roll, int _action, + Cube _captured_cube) + : player(_player), + prev_player(_prev_player), + die_roll_(_die_roll), + action(_action), + captured_cube(_captured_cube) {} +}; + +class EinsteinWurfeltNichtState : public State { + public: + explicit EinsteinWurfeltNichtState(std::shared_ptr game, int rows, + int cols); + Player CurrentPlayer() const override; + // Returns the opponent of the specified player. + int Opponent(int player) const; + std::vector> AvailableCubesPosition(Color color) const; + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action action) override; + + bool InBounds(int r, int c) const; + void SetBoard(int r, int c, Cube cube) { board_[r * cols_ + c] = cube; } + Cube board(int row, int col) const { return board_[row * cols_ + col]; } + std::vector LegalActions() const override; + std::vector> ChanceOutcomes() const override; + void SetState(int cur_player, int die_roll, + const std::array board, int cubes_black, + int cubes_white); + + protected: + void DoApplyAction(Action action) override; + + private: + void SetupInitialBoard(Player player, Action action); + + Player cur_player_ = kInvalidPlayer; + Player prev_player_ = kInvalidPlayer; + int winner_ = kInvalidPlayer; + int total_moves_ = -1; + int turns_ = -1; + std::array cubes_; + int rows_ = -1; + int cols_ = -1; + int die_roll_ = 0; + std::array + board_; // for (row,col) we use row*cols_+col + std::vector turn_history_info_; +}; + +class EinsteinWurfeltNichtGame : public Game { + public: + explicit EinsteinWurfeltNichtGame(const GameParameters& params); + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new EinsteinWurfeltNichtState(shared_from_this(), rows_, cols_)); + } + + int MaxChanceOutcomes() const override { return kNumCubesPermutations; } + + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + std::vector ObservationTensorShape() const override { + return {kStateEncodingSize}; + } + + // Assuming that each cube is moved first along the horizontal axis and then + // along the vertical axis, which is the maximum number of moves for a cube + // (only the cubes in the corners). This accounts for (row-1) * (cols-1) + // moves. If we assume that each player makes all these moves we get + // (row-1) * (cols-1) * num_players. If we consider the chance player as + // the third player which makes the same number of moves, the upper bound + // for the number of moves is (row-1) * (cols-1) * (num_players + 1). + int MaxGameLength() const override { + return (kDefaultRows - 1) * (kDefaultColumns - 1) * (kNumPlayerCubes + 1); + } + + private: + int rows_ = -1; + int cols_ = -1; +}; + +} // namespace einstein_wurfelt_nicht +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_EINSTEIN_WURFELT_NICHT_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/einstein_wurfelt_nicht/einstein_wurfelt_nicht_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/einstein_wurfelt_nicht/einstein_wurfelt_nicht_test.cc new file mode 100644 index 0000000..e698ccc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/einstein_wurfelt_nicht/einstein_wurfelt_nicht_test.cc @@ -0,0 +1,305 @@ +// Copyright 2024 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/einstein_wurfelt_nicht/einstein_wurfelt_nicht.h" + +#include +#include +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace einstein_wurfelt_nicht { +namespace { + +namespace testing = open_spiel::testing; + +void BasicEinsteinWurfeltNitchTests() { + std::shared_ptr game = + open_spiel::LoadGame("einstein_wurfelt_nicht"); + testing::RandomSimTest(*game, 100, true, true); + testing::RandomSimTestWithUndo(*game, 1); +} + +void BlackPlayerSimpleWinTest() { + std::shared_ptr game = + open_spiel::LoadGame("einstein_wurfelt_nicht"); + std::unique_ptr state = game->NewInitialState(); + EinsteinWurfeltNichtState* bstate = + static_cast(state.get()); + + int values[] = {-1, 2, -1, -1, -1, -1, -1, -1, 5, -1, 6, -1, -1, + -1, -1, -1, 3, -1, -1, 3, -1, -1, -1, -1, -1}; + Color colors[] = {Color::kEmpty, Color::kWhite, Color::kEmpty, Color::kEmpty, + Color::kEmpty, Color::kEmpty, Color::kEmpty, Color::kEmpty, + Color::kBlack, Color::kEmpty, Color::kBlack, Color::kEmpty, + Color::kEmpty, Color::kEmpty, Color::kEmpty, Color::kEmpty, + Color::kWhite, Color::kEmpty, Color::kEmpty, Color::kBlack, + Color::kEmpty, Color::kEmpty, Color::kEmpty, Color::kEmpty, + Color::kEmpty}; + std::array board; + for (int i = 0; i < k2dMaxBoardSize; i++) { + board[i] = {colors[i], values[i]}; + } + + bstate->SetState(kBlackPlayerId, 2, board, 3, 2); + + std::string expected_state = + "|__||w2||__||__||__|\n" + "|__||__||__||b5||__|\n" + "|b6||__||__||__||__|\n" + "|__||w3||__||__||b3|\n" + "|__||__||__||__||__|\n"; + SPIEL_CHECK_EQ(bstate->ToString(), expected_state); + SPIEL_CHECK_EQ(bstate->CurrentPlayer(), kBlackPlayerId); + SPIEL_CHECK_FALSE(bstate->IsTerminal()); + SPIEL_CHECK_EQ(bstate->LegalActions().size(), 1); + Action action = 230; // Move B3 down + SPIEL_CHECK_EQ(bstate->LegalActions()[0], action); + SPIEL_CHECK_EQ(bstate->ActionToString(kBlackPlayerId, action), "B3-down"); + + bstate->ApplyAction(230); + std::string expected_state_final = + "|__||w2||__||__||__|\n" + "|__||__||__||b5||__|\n" + "|b6||__||__||__||__|\n" + "|__||w3||__||__||__|\n" + "|__||__||__||__||b3|\n"; + SPIEL_CHECK_EQ(bstate->ToString(), expected_state_final); + std::vector returns = bstate->Returns(); + SPIEL_CHECK_TRUE(bstate->IsTerminal()); + SPIEL_CHECK_EQ(returns.size(), 2); + SPIEL_CHECK_EQ(returns[0], 1); + SPIEL_CHECK_EQ(returns[1], -1); +} + +void WhitePlayerSimpleWinTest() { + std::shared_ptr game = + open_spiel::LoadGame("einstein_wurfelt_nicht"); + std::unique_ptr state = game->NewInitialState(); + EinsteinWurfeltNichtState* bstate = + static_cast(state.get()); + + int values[] = {-1, 2, -1, -1, -1, -1, -1, -1, 5, -1, 6, -1, -1, + -1, -1, -1, 3, -1, -1, 3, -1, -1, -1, -1, -1}; + Color colors[] = {Color::kEmpty, Color::kWhite, Color::kEmpty, Color::kEmpty, + Color::kEmpty, Color::kEmpty, Color::kEmpty, Color::kEmpty, + Color::kBlack, Color::kEmpty, Color::kBlack, Color::kEmpty, + Color::kEmpty, Color::kEmpty, Color::kEmpty, Color::kEmpty, + Color::kWhite, Color::kEmpty, Color::kEmpty, Color::kBlack, + Color::kEmpty, Color::kEmpty, Color::kEmpty, Color::kEmpty, + Color::kEmpty}; + std::array board; + for (int i = 0; i < k2dMaxBoardSize; i++) { + board[i] = {colors[i], values[i]}; + } + bstate->SetState(kWhitePlayerId, 2, board, 3, 2); + + std::string expected_state = + "|__||w2||__||__||__|\n" + "|__||__||__||b5||__|\n" + "|b6||__||__||__||__|\n" + "|__||w3||__||__||b3|\n" + "|__||__||__||__||__|\n"; + SPIEL_CHECK_EQ(bstate->ToString(), expected_state); + SPIEL_CHECK_EQ(bstate->CurrentPlayer(), kWhitePlayerId); + SPIEL_CHECK_FALSE(bstate->IsTerminal()); + SPIEL_CHECK_EQ(bstate->LegalActions().size(), 1); + Action action = 22; // Move W2 to the left + SPIEL_CHECK_EQ(bstate->LegalActions()[0], action); + SPIEL_CHECK_EQ(bstate->ActionToString(kWhitePlayerId, action), "W2-left"); + + bstate->ApplyAction(action); + std::string expected_state_final = + "|w2||__||__||__||__|\n" + "|__||__||__||b5||__|\n" + "|b6||__||__||__||__|\n" + "|__||w3||__||__||b3|\n" + "|__||__||__||__||__|\n"; + SPIEL_CHECK_EQ(bstate->ToString(), expected_state_final); + std::vector returns = bstate->Returns(); + SPIEL_CHECK_TRUE(bstate->IsTerminal()); + SPIEL_CHECK_EQ(returns.size(), 2); + SPIEL_CHECK_EQ(returns[0], -1); + SPIEL_CHECK_EQ(returns[1], 1); +} + +void WinByCapturingAllOpponentCubesTest() { + std::shared_ptr game = + open_spiel::LoadGame("einstein_wurfelt_nicht"); + std::unique_ptr state = game->NewInitialState(); + EinsteinWurfeltNichtState* bstate = + static_cast(state.get()); + + int values[] = {-1, -1, -1, -1, -1, -1, -1, -1, 5, -1, 6, -1, -1, + -1, -1, -1, 3, -1, -1, 3, -1, -1, -1, -1, -1}; + Color colors[] = {Color::kEmpty, Color::kEmpty, Color::kEmpty, Color::kEmpty, + Color::kEmpty, Color::kEmpty, Color::kEmpty, Color::kEmpty, + Color::kBlack, Color::kEmpty, Color::kBlack, Color::kEmpty, + Color::kEmpty, Color::kEmpty, Color::kEmpty, Color::kEmpty, + Color::kWhite, Color::kEmpty, Color::kEmpty, Color::kBlack, + Color::kEmpty, Color::kEmpty, Color::kEmpty, Color::kEmpty, + Color::kEmpty}; + std::array board; + for (int i = 0; i < k2dMaxBoardSize; i++) { + board[i] = {colors[i], values[i]}; + } + bstate->SetState(kBlackPlayerId, 6, board, 3, 1); + + std::string expected_state = + "|__||__||__||__||__|\n" + "|__||__||__||b5||__|\n" + "|b6||__||__||__||__|\n" + "|__||w3||__||__||b3|\n" + "|__||__||__||__||__|\n"; + SPIEL_CHECK_EQ(bstate->ToString(), expected_state); + SPIEL_CHECK_EQ(bstate->CurrentPlayer(), kBlackPlayerId); + SPIEL_CHECK_FALSE(bstate->IsTerminal()); + SPIEL_CHECK_EQ(bstate->LegalActions().size(), 3); + Action action = 121; // Move B6 diagonally down-right + SPIEL_CHECK_EQ(bstate->LegalActions()[0], action); + SPIEL_CHECK_EQ(bstate->ActionToString(kBlackPlayerId, action), "B6-diag*"); + + bstate->ApplyAction(action); + std::string expected_state_final = + "|__||__||__||__||__|\n" + "|__||__||__||b5||__|\n" + "|__||__||__||__||__|\n" + "|__||b6||__||__||b3|\n" + "|__||__||__||__||__|\n"; + SPIEL_CHECK_EQ(bstate->ToString(), expected_state_final); + std::vector returns = bstate->Returns(); + SPIEL_CHECK_TRUE(bstate->IsTerminal()); + SPIEL_CHECK_EQ(returns.size(), 2); + SPIEL_CHECK_EQ(returns[0], 1); + SPIEL_CHECK_EQ(returns[1], -1); +} + +void CheckAlternateChancePlayerAndNormalPlayerTest() { + std::shared_ptr game = + open_spiel::LoadGame("einstein_wurfelt_nicht"); + std::unique_ptr state = game->NewInitialState(); + + int previous_player = state->CurrentPlayer(); + + while (!state->IsTerminal()) { + if (state->CurrentPlayer() == open_spiel::kChancePlayerId) { + state->ApplyAction(state->LegalActions()[0]); + } else { + std::vector legal_actions = state->LegalActions(); + state->ApplyAction(legal_actions[0]); + } + int current_player = state->CurrentPlayer(); + if (current_player != open_spiel::kChancePlayerId) { + SPIEL_CHECK_NE(current_player, previous_player); + } + previous_player = current_player; + } +} + +void InitialStateTest() { + std::shared_ptr game = + open_spiel::LoadGame("einstein_wurfelt_nicht"); + std::unique_ptr state = game->NewInitialState(); + SPIEL_CHECK_EQ(state->CurrentPlayer(), open_spiel::kChancePlayerId); + SPIEL_CHECK_FALSE(state->IsTerminal()); +} + +void LegalActionsTest() { + std::shared_ptr game = + open_spiel::LoadGame("einstein_wurfelt_nicht"); + std::unique_ptr state = game->NewInitialState(); + + while (!state->IsTerminal()) { + std::vector legal_actions = state->LegalActions(); + SPIEL_CHECK_FALSE(legal_actions.empty()); + state->ApplyAction(legal_actions[0]); + } + + std::vector returns = state->Returns(); + SPIEL_CHECK_EQ(returns.size(), 2); + SPIEL_CHECK_TRUE(returns[0] == 1.0 || returns[1] == 1.0); +} + +void InitialBoardSetupTest() { + // Test the initial setup with empty board + std::string empty_board_state = + "|__||__||__||__||__|\n" + "|__||__||__||__||__|\n" + "|__||__||__||__||__|\n" + "|__||__||__||__||__|\n" + "|__||__||__||__||__|\n"; + std::shared_ptr game = + open_spiel::LoadGame("einstein_wurfelt_nicht"); + std::unique_ptr state = game->NewInitialState(); + SPIEL_CHECK_EQ(state->ToString(), empty_board_state); + SPIEL_CHECK_EQ(state->CurrentPlayer(), kChancePlayerId); + SPIEL_CHECK_EQ(state->ChanceOutcomes().size(), kNumCubesPermutations); + + // Test allocation of black cubes on the board + state->ApplyAction(0); + std::string black_board_state = + "|b1||b2||b3||__||__|\n" + "|b4||b5||__||__||__|\n" + "|b6||__||__||__||__|\n" + "|__||__||__||__||__|\n" + "|__||__||__||__||__|\n"; + SPIEL_CHECK_EQ(state->ToString(), black_board_state); + SPIEL_CHECK_EQ(state->CurrentPlayer(), kChancePlayerId); + SPIEL_CHECK_EQ(state->ChanceOutcomes().size(), kNumCubesPermutations); + + // Allocation of cubes on the board changes if a different action is applied + std::shared_ptr game2 = + open_spiel::LoadGame("einstein_wurfelt_nicht"); + std::unique_ptr state2 = game->NewInitialState(); + SPIEL_CHECK_EQ(state2->ToString(), empty_board_state); + state2->ApplyAction(1); + SPIEL_CHECK_NE(state2->ToString(), empty_board_state); + SPIEL_CHECK_NE(state->ToString(), state2->ToString()); + + // Test allocation of white cubes on the board + state->ApplyAction(0); + std::string white_board_state = + "|b1||b2||b3||__||__|\n" + "|b4||b5||__||__||__|\n" + "|b6||__||__||__||w1|\n" + "|__||__||__||w2||w3|\n" + "|__||__||w4||w5||w6|\n"; + SPIEL_CHECK_EQ(state->ToString(), white_board_state); + SPIEL_CHECK_EQ(state->CurrentPlayer(), kChancePlayerId); + SPIEL_CHECK_EQ(state->ChanceOutcomes().size(), kNumPlayerCubes); +} + +} // namespace +} // namespace einstein_wurfelt_nicht +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::testing::LoadGameTest("einstein_wurfelt_nicht"); + open_spiel::einstein_wurfelt_nicht::BasicEinsteinWurfeltNitchTests(); + open_spiel::einstein_wurfelt_nicht::WinByCapturingAllOpponentCubesTest(); + open_spiel::einstein_wurfelt_nicht:: + CheckAlternateChancePlayerAndNormalPlayerTest(); + open_spiel::einstein_wurfelt_nicht::InitialStateTest(); + open_spiel::einstein_wurfelt_nicht::LegalActionsTest(); + open_spiel::einstein_wurfelt_nicht::BlackPlayerSimpleWinTest(); + open_spiel::einstein_wurfelt_nicht::WhitePlayerSimpleWinTest(); + open_spiel::einstein_wurfelt_nicht::WinByCapturingAllOpponentCubesTest(); + open_spiel::einstein_wurfelt_nicht::InitialBoardSetupTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/euchre/euchre.cc b/scenarios/bargaining/open_spiel/open_spiel/games/euchre/euchre.cc new file mode 100644 index 0000000..c7562b8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/euchre/euchre.cc @@ -0,0 +1,721 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/euchre/euchre.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace euchre { +namespace { + +const GameType kGameType{ + /*short_name=*/"euchre", + /*long_name=*/"Euchre", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/kNumPlayers, + /*min_num_players=*/kNumPlayers, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/false, + /*provides_observation_tensor=*/false, + /*parameter_specification=*/ + { + {"allow_lone_defender", GameParameter(false)}, + {"stick_the_dealer", GameParameter(true)}, + }}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new EuchreGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +open_spiel::RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +std::map same_color_suit { + {Suit::kClubs, Suit::kSpades}, {Suit::kSpades, Suit::kClubs}, + {Suit::kDiamonds, Suit::kHearts}, {Suit::kHearts, Suit::kDiamonds}}; + +} // namespace + +Suit CardSuit(int card, Suit trump_suit) { + Suit suit = CardSuit(card); + if (CardRank(card) == kJackRank && same_color_suit[suit] == trump_suit) + suit = trump_suit; + return suit; +} + +// Highest rank belongs to right bower, then left bower, then usual ranking. +int CardRank(int card, Suit trump_suit) { + int rank = CardRank(card); + if (CardSuit(card) == trump_suit && rank == kJackRank) { + rank = 100; // Right bower (arbitrary value) + } else if (CardSuit(card, trump_suit) == trump_suit && rank == kJackRank) { + rank = 99; // Left bower (arbitrary value) + } + return rank; +} + +EuchreGame::EuchreGame(const GameParameters& params) + : Game(kGameType, params), + allow_lone_defender_(ParameterValue("allow_lone_defender")), + stick_the_dealer_(ParameterValue("stick_the_dealer")) {} + +EuchreState::EuchreState(std::shared_ptr game, + bool allow_lone_defender, bool stick_the_dealer) + : State(game), + allow_lone_defender_(allow_lone_defender), + stick_the_dealer_(stick_the_dealer) {} + +std::string EuchreState::ActionToString(Player player, Action action) const { + if (history_.empty()) return DirString(action); + if (action == kPassAction) return "Pass"; + if (action == kClubsTrumpAction) return "Clubs"; + if (action == kDiamondsTrumpAction) return "Diamonds"; + if (action == kHeartsTrumpAction) return "Hearts"; + if (action == kSpadesTrumpAction) return "Spades"; + if (action == kGoAloneAction) return "Alone"; + if (action == kPlayWithPartnerAction) return "Partner"; + return CardString(action); +} + +std::string EuchreState::ToString() const { + std::string rv = "Dealer: "; + absl::StrAppend(&rv, DirString(dealer_), "\n\n"); + absl::StrAppend(&rv, FormatDeal()); + if (upcard_ != kInvalidAction) + absl::StrAppend(&rv, "\nUpcard: ", ActionToString(kInvalidPlayer, upcard_)); + if (history_.size() > kFirstBiddingActionInHistory) + absl::StrAppend(&rv, FormatBidding()); + if (discard_ != kInvalidAction) { + absl::StrAppend(&rv, "\nDealer discard: ", + ActionToString(kInvalidPlayer, discard_), "\n"); + } + if (declarer_go_alone_.has_value()) { + absl::StrAppend(&rv, "\nDeclarer go alone: "); + if (declarer_go_alone_.value()) + absl::StrAppend(&rv, "true\n"); + else + absl::StrAppend(&rv, "false\n"); + if (allow_lone_defender_) { + absl::StrAppend(&rv, "\nDefender go alone: "); + if (lone_defender_ != kInvalidPlayer) + absl::StrAppend(&rv, "true\n"); + else + absl::StrAppend(&rv, "false\n"); + } + } + if (num_cards_played_ > 0) absl::StrAppend(&rv, FormatPlay(), FormatPoints()); + return rv; +} + +std::array EuchreState::FormatHand( + int player, bool mark_voids) const { + // Current hand, except in the terminal state when we use the original hand + // to enable an easy review of the whole deal. + auto deal = IsTerminal() ? initial_deal_ : holder_; + std::array cards; + for (int suit = 0; suit < kNumSuits; ++suit) { + cards[suit].push_back(kSuitChar[suit]); + cards[suit].push_back(' '); + bool is_void = true; + for (int rank = kNumCardsPerSuit - 1; rank >= 0; --rank) { + if (player == deal[Card(Suit(suit), rank)]) { + cards[suit].push_back(kRankChar[rank]); + is_void = false; + } + } + if (is_void && mark_voids) absl::StrAppend(&cards[suit], "none"); + } + return cards; +} + +std::string EuchreState::FormatDeal() const { + std::string rv; + std::array, kNumPlayers> cards; + for (auto player : {kNorth, kEast, kSouth, kWest}) + cards[player] = FormatHand(player, /*mark_voids=*/false); + constexpr int kColumnWidth = 8; + std::string padding(kColumnWidth, ' '); + for (int suit = kNumSuits - 1; suit >= 0; --suit) + absl::StrAppend(&rv, padding, cards[kNorth][suit], "\n"); + for (int suit = kNumSuits - 1; suit >= 0; --suit) + absl::StrAppend(&rv, absl::StrFormat("%-8s", cards[kWest][suit]), padding, + cards[kEast][suit], "\n"); + for (int suit = kNumSuits - 1; suit >= 0; --suit) + absl::StrAppend(&rv, padding, cards[kSouth][suit], "\n"); + return rv; +} + +std::string EuchreState::FormatBidding() const { + SPIEL_CHECK_GE(history_.size(), kFirstBiddingActionInHistory); + std::string rv; + absl::StrAppend(&rv, "\nBidding:"); + absl::StrAppend(&rv, "\nNorth East South West\n"); + if (dealer_ == 0) absl::StrAppend(&rv, absl::StrFormat("%-9s", "")); + if (dealer_ == 1) absl::StrAppend(&rv, absl::StrFormat("%-18s", "")); + if (dealer_ == 2) absl::StrAppend(&rv, absl::StrFormat("%-27s", "")); + + for (int i = kFirstBiddingActionInHistory; i < history_.size(); ++i) { + if (i < kFirstBiddingActionInHistory + kNumPlayers - 1) { + // Players can pass or "order up" the upcard to the dealer. + if (history_[i].action == kPassAction) + absl::StrAppend(&rv, absl::StrFormat("%-9s", "Pass")); + else + absl::StrAppend(&rv, absl::StrFormat("%-9s", "Order up!")); + } else if (i == kFirstBiddingActionInHistory + kNumPlayers) { + // Dealer can pass or "pick up" the upcard. + if (history_[i].action == kPassAction) + absl::StrAppend(&rv, absl::StrFormat("%-9s", "Pass")); + else + absl::StrAppend(&rv, absl::StrFormat("%-9s", "Pick up!")); + } else { + absl::StrAppend( + &rv, absl::StrFormat( + "%-9s", ActionToString(kInvalidPlayer, history_[i].action))); + } + if (history_[i].player == kNumPlayers - 1) rv.push_back('\n'); + if (history_[i].action > kPassAction) break; + } + + absl::StrAppend(&rv, "\n"); + return rv; +} + +std::string EuchreState::FormatPlay() const { + SPIEL_CHECK_GT(num_cards_played_, 0); + std::string rv = "\nTricks:"; + absl::StrAppend(&rv, "\nN E S W N E S"); + for (int i = 0; i <= (num_cards_played_ - 1) / num_active_players_; ++i) { + Player player_id = tricks_[i].Leader(); + absl::StrAppend(&rv, "\n", std::string(3 * player_id, ' ')); + for (auto card : tricks_[i].Cards()) { + absl::StrAppend(&rv, CardString(card), " "); + player_id = (player_id + 1) % kNumPlayers; + while (!active_players_[player_id]) { + absl::StrAppend(&rv, " "); + player_id = (player_id + 1) % kNumPlayers; + } + } + } + return rv; +} + +std::string EuchreState::FormatPoints() const { + std::string rv; + absl::StrAppend(&rv, "\n\nPoints:"); + for (int i = 0; i < kNumPlayers; ++i) + absl::StrAppend(&rv, "\n", DirString(i), ": ", points_[i]); + return rv; +} + +void EuchreState::InformationStateTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::fill(values.begin(), values.end(), 0.0); + SPIEL_CHECK_EQ(values.size(), kInformationStateTensorSize); + if (upcard_ == kInvalidAction) return; + auto ptr = values.begin(); + // Dealer position + ptr[static_cast(dealer_)] = 1; + ptr += kNumPlayers; + // Upcard + ptr[upcard_] = 1; + ptr += kNumCards; + // Bidding [Clubs, Diamonds, Hearts, Spades, Pass] + for (int i = 0; i < num_passes_; ++i) { + ptr[kNumSuits + 1] = 1; + ptr += (kNumSuits + 1); + } + if (num_passes_ == 2 * kNumPlayers) return; + if (trump_suit_ != Suit::kInvalidSuit) { + ptr[static_cast(trump_suit_)] = 1; + } + ptr += (kNumSuits + 1); + for (int i = 0; i < 2 * kNumPlayers - num_passes_ - 1; ++i) + ptr += (kNumSuits + 1); + // Go alone + if (declarer_go_alone_) ptr[0] = 1; + if (lone_defender_ == first_defender_) ptr[1] = 1; + if (lone_defender_ == second_defender_) ptr[2] = 1; + ptr += 3; + // Current hand + for (int i = 0; i < kNumCards; ++i) + if (holder_[i] == player) ptr[i] = 1; + ptr += kNumCards; + // History of tricks, presented in the format: N E S W N E S + int current_trick = std::min(num_cards_played_ / num_active_players_, + static_cast(tricks_.size() - 1)); + for (int i = 0; i < current_trick; ++i) { + Player leader = tricks_[i].Leader(); + ptr += leader * kNumCards; + int offset = 0; + for (auto card : tricks_[i].Cards()) { + ptr[card] = 1; + ptr += kNumCards; + ++offset; + while (!active_players_[(leader + offset) % kNumPlayers]) { + ptr += kNumCards; + ++offset; + } + } + SPIEL_CHECK_EQ(offset, kNumPlayers); + ptr += (kNumPlayers - leader - 1) * kNumCards; + } + Player leader = tricks_[current_trick].Leader(); + int offset = 0; + if (leader != kInvalidPlayer) { + auto cards = tricks_[current_trick].Cards(); + ptr += leader * kNumCards; + for (auto card : cards) { + ptr[card] = 1; + ptr += kNumCards; + ++offset; + while (!active_players_[(leader + offset) % kNumPlayers]) { + ptr += kNumCards; + ++offset; + } + } + } + // Current trick may contain less than four cards. + if (offset < kNumPlayers) { + ptr += (kNumPlayers - offset) * kNumCards; + } + // Move to the end of current trick. + ptr += (kNumPlayers - std::max(leader, 0) - 1) * kNumCards; + // Skip over unplayed tricks. + ptr += (kNumTricks - current_trick - 1) * kTrickTensorSize; + SPIEL_CHECK_EQ(ptr, values.end()); +} + +std::vector EuchreState::LegalActions() const { + switch (phase_) { + case Phase::kDealerSelection: + return DealerSelectionLegalActions(); + case Phase::kDeal: + return DealLegalActions(); + case Phase::kBidding: + return BiddingLegalActions(); + case Phase::kDiscard: + return DiscardLegalActions(); + case Phase::kGoAlone: + return GoAloneLegalActions(); + case Phase::kPlay: + return PlayLegalActions(); + default: + return {}; + } +} + +std::vector EuchreState::DealerSelectionLegalActions() const { + SPIEL_CHECK_EQ(history_.size(), 0); + std::vector legal_actions; + legal_actions.reserve(kNumPlayers); + for (int i = 0; i < kNumPlayers; ++i) legal_actions.push_back(i); + return legal_actions; +} + +std::vector EuchreState::DealLegalActions() const { + std::vector legal_actions; + legal_actions.reserve(kNumCards - num_cards_dealt_); + for (int i = 0; i < kNumCards; ++i) { + if (!holder_[i].has_value()) legal_actions.push_back(i); + } + SPIEL_CHECK_GT(legal_actions.size(), 0); + return legal_actions; +} + +std::vector EuchreState::BiddingLegalActions() const { + std::vector legal_actions; + legal_actions.push_back(kPassAction); + if (stick_the_dealer_ && num_passes_ == 2 * kNumPlayers - 1) + legal_actions.pop_back(); + Suit suit = CardSuit(upcard_); + if (num_passes_ < kNumPlayers) { + switch (suit) { + case Suit::kClubs: + legal_actions.push_back(kClubsTrumpAction); + break; + case Suit::kDiamonds: + legal_actions.push_back(kDiamondsTrumpAction); + break; + case Suit::kHearts: + legal_actions.push_back(kHeartsTrumpAction); + break; + case Suit::kSpades: + legal_actions.push_back(kSpadesTrumpAction); + break; + case Suit::kInvalidSuit: + SpielFatalError("Suit of upcard is invalid."); + } + } else { + switch (suit) { + case Suit::kClubs: + legal_actions.push_back(kDiamondsTrumpAction); + legal_actions.push_back(kHeartsTrumpAction); + legal_actions.push_back(kSpadesTrumpAction); + break; + case Suit::kDiamonds: + legal_actions.push_back(kClubsTrumpAction); + legal_actions.push_back(kHeartsTrumpAction); + legal_actions.push_back(kSpadesTrumpAction); + break; + case Suit::kHearts: + legal_actions.push_back(kClubsTrumpAction); + legal_actions.push_back(kDiamondsTrumpAction); + legal_actions.push_back(kSpadesTrumpAction); + break; + case Suit::kSpades: + legal_actions.push_back(kClubsTrumpAction); + legal_actions.push_back(kDiamondsTrumpAction); + legal_actions.push_back(kHeartsTrumpAction); + break; + case Suit::kInvalidSuit: + SpielFatalError("Suit of upcard is invalid."); + } + } + return legal_actions; +} + +std::vector EuchreState::DiscardLegalActions() const { + std::vector legal_actions; + for (int card = 0; card < kNumCards; ++card) { + if (holder_[card] == current_player_ && card != upcard_) { + legal_actions.push_back(card); + } + } + SPIEL_CHECK_EQ(legal_actions.size(), kNumTricks); + return legal_actions; +} + +std::vector EuchreState::GoAloneLegalActions() const { + std::vector legal_actions; + legal_actions.push_back(kGoAloneAction); + legal_actions.push_back(kPlayWithPartnerAction); + return legal_actions; +} + +std::vector EuchreState::PlayLegalActions() const { + std::vector legal_actions; + // Check if we can follow suit. + if (num_cards_played_ % num_active_players_ != 0) { + Suit led_suit = CurrentTrick().LedSuit(); + if (led_suit == trump_suit_) { + for (int rank = 0; rank < kNumCardsPerSuit; ++rank) { + if (holder_[Card(led_suit, rank)] == current_player_) { + legal_actions.push_back(Card(led_suit, rank)); + } + } + if (holder_[left_bower_] == current_player_) { + // Left bower belongs to trump suit. + legal_actions.push_back(left_bower_); + } + } else { + for (int rank = 0; rank < kNumCardsPerSuit; ++rank) { + if (holder_[Card(led_suit, rank)] == current_player_ && + Card(led_suit, rank) != left_bower_) { + legal_actions.push_back(Card(led_suit, rank)); + } + } + } + } + if (!legal_actions.empty()) { + absl::c_sort(legal_actions); // Sort required because of left bower. + return legal_actions; + } + // Can't follow suit, so we can play any of the cards in our hand. + for (int card = 0; card < kNumCards; ++card) { + if (holder_[card] == current_player_) legal_actions.push_back(card); + } + return legal_actions; +} + +std::vector> EuchreState::ChanceOutcomes() const { + std::vector> outcomes; + if (history_.empty()) { + outcomes.reserve(kNumPlayers); + const double p = 1.0 / kNumPlayers; + for (int dir = 0; dir < kNumPlayers; ++dir) { + outcomes.emplace_back(dir, p); + } + return outcomes; + } + int num_cards_remaining = kNumCards - num_cards_dealt_; + outcomes.reserve(num_cards_remaining); + const double p = 1.0 / num_cards_remaining; + for (int card = 0; card < kNumCards; ++card) { + if (!holder_[card].has_value()) outcomes.emplace_back(card, p); + } + return outcomes; +} + +void EuchreState::DoApplyAction(Action action) { + switch (phase_) { + case Phase::kDealerSelection: + return ApplyDealerSelectionAction(action); + case Phase::kDeal: + return ApplyDealAction(action); + case Phase::kBidding: + return ApplyBiddingAction(action); + case Phase::kDiscard: + return ApplyDiscardAction(action); + case Phase::kGoAlone: + return ApplyGoAloneAction(action); + case Phase::kPlay: + return ApplyPlayAction(action); + case Phase::kGameOver: + SpielFatalError("Cannot act in terminal states"); + } +} + +void EuchreState::ApplyDealerSelectionAction(int selected_dealer) { + SPIEL_CHECK_EQ(history_.size(), 0); + dealer_ = selected_dealer; + phase_ = Phase::kDeal; +} + +void EuchreState::ApplyDealAction(int card) { + if (num_cards_dealt_ == kNumPlayers * kNumTricks) { + initial_deal_ = holder_; // Preserve the initial deal for easy retrieval. + upcard_ = card; + ++num_cards_dealt_; + phase_ = Phase::kBidding; + current_player_ = (dealer_ + 1) % kNumPlayers; + } else { + holder_[card] = (dealer_ + num_cards_dealt_) % kNumPlayers; + ++num_cards_dealt_; + } +} + +void EuchreState::ApplyBiddingAction(int action) { + if (action == kPassAction) { + ++num_passes_; + if (num_passes_ == kNumPlayers * 2) { + phase_ = Phase::kGameOver; + current_player_ = kTerminalPlayerId; + } else { + current_player_ = (current_player_ + 1) % kNumPlayers; + } + } else { + // Trump suit selected. + declarer_ = current_player_; + first_defender_ = (declarer_ + 1) % kNumPlayers; + declarer_partner_ = (declarer_ + 2) % kNumPlayers; + second_defender_ = (declarer_ + 3) % kNumPlayers; + switch (action) { + case kClubsTrumpAction: + trump_suit_ = Suit::kClubs; + break; + case kDiamondsTrumpAction: + trump_suit_ = Suit::kDiamonds; + break; + case kHeartsTrumpAction: + trump_suit_ = Suit::kHearts; + break; + case kSpadesTrumpAction: + trump_suit_ = Suit::kSpades; + break; + default: + SpielFatalError("Invalid bidding action."); + } + right_bower_ = Card(trump_suit_, kJackRank); + left_bower_ = Card(same_color_suit[trump_suit_], kJackRank); + if (num_passes_ < kNumPlayers) { + // Top card was ordered up to dealer in first round of bidding. + holder_[upcard_] = dealer_; + phase_ = Phase::kDiscard; + current_player_ = dealer_; + } else { + // Trump suit selected in second round of bidding. + phase_ = Phase::kGoAlone; + } + } +} + +void EuchreState::ApplyDiscardAction(int card) { + SPIEL_CHECK_TRUE(holder_[card] == current_player_); + discard_ = card; + holder_[card] = absl::nullopt; + phase_ = Phase::kGoAlone; + current_player_ = declarer_; +} + +void EuchreState::ApplyGoAloneAction(int action) { + if (declarer_go_alone_.has_value() && allow_lone_defender_) { + if (action == kGoAloneAction) { + lone_defender_ = current_player_; + active_players_[(lone_defender_ + 2) % kNumPlayers] = false; + --num_active_players_; + phase_ = Phase::kPlay; + current_player_ = (dealer_ + 1) % kNumPlayers; + while (!active_players_[current_player_]) { + current_player_ = (current_player_ + 1) % kNumPlayers; + } + } else if (action == kPlayWithPartnerAction) { + if (current_player_ == (dealer_ + 1) % kNumPlayers || + current_player_ == (dealer_ + 2) % kNumPlayers) { + current_player_ = (current_player_ + 2) % kNumPlayers; + } else { + phase_ = Phase::kPlay; + current_player_ = (dealer_ + 1) % kNumPlayers; + while (!active_players_[current_player_]) { + current_player_ = (current_player_ + 1) % kNumPlayers; + } + } + } else { + SpielFatalError("Invalid GoAlone action."); + } + } else { + if (action == kGoAloneAction) { + declarer_go_alone_ = true; + active_players_[declarer_partner_] = false; + --num_active_players_; + } else if (action == kPlayWithPartnerAction) { + declarer_go_alone_ = false; + } else { + SpielFatalError("Invalid GoAlone action."); + } + if (allow_lone_defender_) { + current_player_ = (dealer_ + 1) % kNumPlayers; + if (current_player_ == declarer_ || current_player_ == declarer_partner_) + current_player_ = (current_player_ + 1) % kNumPlayers; + } else { + phase_ = Phase::kPlay; + current_player_ = (dealer_ + 1) % kNumPlayers; + if (declarer_go_alone_.value() && current_player_ == declarer_partner_) { + current_player_ = (current_player_ + 1) % kNumPlayers; + } + } + } +} + +void EuchreState::ApplyPlayAction(int card) { + SPIEL_CHECK_TRUE(holder_[card] == current_player_); + holder_[card] = absl::nullopt; + if (num_cards_played_ % num_active_players_ == 0) { + CurrentTrick() = Trick(current_player_, trump_suit_, card); + } else { + CurrentTrick().Play(current_player_, card); + } + // Update player and point totals. + Trick current_trick = CurrentTrick(); + ++num_cards_played_; + if (num_cards_played_ % num_active_players_ == 0) { + current_player_ = current_trick.Winner(); + } else { + current_player_ = (current_player_ + 1) % kNumPlayers; + while (!active_players_[current_player_]) { + current_player_ = (current_player_ + 1) % kNumPlayers; + } + } + if (num_cards_played_ == num_active_players_ * kNumTricks) { + phase_ = Phase::kGameOver; + current_player_ = kTerminalPlayerId; + ComputeScore(); + } +} + +void EuchreState::ComputeScore() { + SPIEL_CHECK_TRUE(IsTerminal()); + std::vector tricks_won(kNumPlayers, 0); + for (int i = 0; i < kNumTricks; ++i) { + tricks_won[tricks_[i].Winner()] += 1; + } + int makers_tricks_won = tricks_won[declarer_] + tricks_won[declarer_partner_]; + int makers_score; + if (makers_tricks_won >= 0 && makers_tricks_won <= 2) { + if (lone_defender_ >= 0) + makers_score = -4; + else + makers_score = -2; + } else if (makers_tricks_won >= 3 && makers_tricks_won <= 4) { + makers_score = 1; + } else if (makers_tricks_won == 5) { + if (declarer_go_alone_.value()) + makers_score = 4; + else + makers_score = 2; + } else { + SpielFatalError("Invalid number of tricks won by makers."); + } + for (Player i = 0; i < kNumPlayers; ++i) { + if (i == declarer_ || i == declarer_partner_) + points_[i] = makers_score; + else + points_[i] = -makers_score; + } +} + +std::vector EuchreState::Tricks() const { + return std::vector(tricks_.begin(), tricks_.end()); +} + +Trick::Trick(Player leader, Suit trump_suit, int card) + : winning_card_(card), + led_suit_(CardSuit(card, trump_suit)), + trump_suit_(trump_suit), + trump_played_(trump_suit != Suit::kInvalidSuit && + trump_suit == led_suit_), + leader_(leader), + winning_player_(leader), + cards_{card} {} + +// TODO(jhtschultz) Find a simpler way of computing this. +void Trick::Play(Player player, int card) { + cards_.push_back(card); + bool new_winner = false; + if (winning_player_ == kInvalidPlayer) new_winner = true; + if (CardSuit(card, trump_suit_) == trump_suit_) { + trump_played_ = true; + if (CardSuit(winning_card_, trump_suit_) == trump_suit_) { + if (CardRank(card, trump_suit_) > CardRank(winning_card_, trump_suit_)) { + new_winner = true; + } + } else { + new_winner = true; + } + } else { + if (CardSuit(winning_card_, trump_suit_) != trump_suit_ && + CardSuit(winning_card_, trump_suit_) == CardSuit(card, trump_suit_) && + CardRank(card, trump_suit_) > CardRank(winning_card_, trump_suit_)) { + new_winner = true; + } + } + if (new_winner) { + winning_card_ = card; + winning_player_ = player; + } +} + +} // namespace euchre +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/euchre/euchre.h b/scenarios/bargaining/open_spiel/open_spiel/games/euchre/euchre.h new file mode 100644 index 0000000..1572294 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/euchre/euchre.h @@ -0,0 +1,265 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_EUCHRE_H_ +#define OPEN_SPIEL_GAMES_EUCHRE_H_ + +// Full implementation of the classic trick taking game Euchre. +// +// https://en.wikipedia.org/wiki/Euchre +// https://www.pagat.com/euchre/euchre.html +// +// This implementation uses standard North American rules with "super-Euchres", +// i.e. the makers lose 4 points if they fail to win a single trick. By default, +// only the declarer has the option of playing alone, but optionally the +// defenders can go alone as well. The popular variation "stick the dealer" is +// enabled by default as it has interesting strategic implications and increases +// playability by avoiding drawn hands. + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" + +namespace open_spiel { +namespace euchre { + +inline constexpr int kNumPlayers = 4; +inline constexpr int kJackRank = 2; +inline constexpr int kNumSuits = 4; +inline constexpr int kNumCardsPerSuit = 6; +inline constexpr int kNumCards = 24; + +inline constexpr int kPassAction = 24; +inline constexpr int kClubsTrumpAction = 25; +inline constexpr int kDiamondsTrumpAction = 26; +inline constexpr int kHeartsTrumpAction = 27; +inline constexpr int kSpadesTrumpAction = 28; +inline constexpr int kGoAloneAction = 29; +inline constexpr int kPlayWithPartnerAction = 30; +inline constexpr int kNumDistinctActions = 31; +// Dealer selection + deal + upcard +inline constexpr int kFirstBiddingActionInHistory = 22; + +inline constexpr int kMaxBids = 8; +inline constexpr int kNumTricks = 5; +inline constexpr int kFullHandSize = 5; +inline constexpr int kMaxScore = 4; +inline constexpr int kMinScore = -4; +inline constexpr int kTrickTensorSize = kNumCards * 7; // N E S W N E S +inline constexpr int kInformationStateTensorSize = + kNumPlayers // Dealer + + kNumCards // Upcard + + (kNumSuits + 1) * kMaxBids // Bidding + + 3 // Go alone (declarer, defender 1 & 2) + + kNumCards // Current hand + + kNumTricks * kTrickTensorSize; // History of tricks + +enum class Phase { kDealerSelection, kDeal, kBidding, kDiscard, kGoAlone, kPlay, + kGameOver }; +enum class Suit { kInvalidSuit = -1, kClubs = 0, kDiamonds = 1, + kHearts = 2, kSpades = 3 }; +enum Seat { kNorth, kEast, kSouth, kWest }; +// Cards are represented as rank * kNumSuits + suit. +inline Suit CardSuit(int card) { return Suit(card % kNumSuits); } +Suit CardSuit(int card, Suit trump_suit); +inline int CardRank(int card) { return card / kNumSuits; } +int CardRank(int card, Suit trump_suit); +inline int Card(Suit suit, int rank) { + return rank * kNumSuits + static_cast(suit); +} +constexpr char kRankChar[] = "9TJQKA"; +constexpr char kSuitChar[] = "CDHS"; +constexpr char kDirChar[] = "NESW"; +inline std::string DirString(int dir) { + if (dir < 0) + return ""; + else + return {kDirChar[dir]}; +} +inline std::string CardString(int card) { + return {kSuitChar[static_cast(CardSuit(card))], + kRankChar[CardRank(card)]}; +} + + +// State of a single trick. +class Trick { + public: + Trick() : Trick{kInvalidPlayer, Suit::kInvalidSuit, kInvalidAction} {} + Trick(Player leader, Suit trump_suit, int card); + void Play(Player player, int card); + int WinningCard() const { return winning_card_; } + Suit LedSuit() const { return led_suit_; } + Suit TrumpSuit() const { return trump_suit_; } + bool TrumpPlayed() const { return trump_played_; } + Player Leader() const { return leader_; } + Player Winner() const { return winning_player_; } + std::vector Cards() const { return cards_; } + + private: + int winning_card_; + Suit led_suit_; + Suit trump_suit_; + bool trump_played_; + Player leader_; // First player to throw. + Player winning_player_; + std::vector cards_; +}; + +class EuchreState : public State { + public: + EuchreState(std::shared_ptr game, bool allow_lone_defender, + bool stick_the_dealer); + Player CurrentPlayer() const override { return current_player_; } + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + bool IsTerminal() const override { return phase_ == Phase::kGameOver; } + std::vector Returns() const override { return points_; } + void InformationStateTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override { + return std::unique_ptr(new EuchreState(*this)); + } + std::vector LegalActions() const override; + std::vector> ChanceOutcomes() const override; + + int NumCardsDealt() const { return num_cards_dealt_; } + int NumCardsPlayed() const { return num_cards_played_; } + int NumPasses() const { return num_passes_; } + int Upcard() const { return upcard_; } + int Discard() const { return discard_; } + int TrumpSuit() const { return static_cast(trump_suit_); } + int LeftBower() const { return left_bower_; } + int RightBower() const { return right_bower_; } + int Declarer() const { return declarer_; } + int FirstDefender() const { return first_defender_; } + int DeclarerPartner() const { return declarer_partner_; } + int SecondDefender() const { return second_defender_; } + absl::optional DeclarerGoAlone() const { return declarer_go_alone_; } + Player LoneDefender() const { return lone_defender_; } + std::vector ActivePlayers() const { return active_players_; } + Player Dealer() const { return dealer_; } + + Phase CurrentPhase() const { return phase_; } + + int CurrentTrickIndex() const { + return std::min(num_cards_played_ / num_active_players_, + static_cast(tricks_.size())); + } + Trick& CurrentTrick() { return tricks_[CurrentTrickIndex()]; } + const Trick& CurrentTrick() const { return tricks_[CurrentTrickIndex()]; } + + std::array, kNumCards> CardHolder() const { + return holder_; + } + std::vector Tricks() const; + + protected: + void DoApplyAction(Action action) override; + + private: + std::vector DealerSelectionLegalActions() const; + std::vector DealLegalActions() const; + std::vector BiddingLegalActions() const; + std::vector DiscardLegalActions() const; + std::vector GoAloneLegalActions() const; + std::vector PlayLegalActions() const; + void ApplyDealerSelectionAction(int selected_dealer); + void ApplyDealAction(int card); + void ApplyBiddingAction(int action); + void ApplyDiscardAction(int card); + void ApplyGoAloneAction(int action); + void ApplyPlayAction(int card); + + void ComputeScore(); + + std::array FormatHand(int player, + bool mark_voids) const; + std::string FormatBidding() const; + std::string FormatDeal() const; + std::string FormatPlay() const; + std::string FormatPoints() const; + + const bool allow_lone_defender_; + const bool stick_the_dealer_; + + int num_cards_dealt_ = 0; + int num_cards_played_ = 0; + int num_passes_ = 0; + int upcard_ = kInvalidAction; + int discard_ = kInvalidAction; + Suit trump_suit_ = Suit::kInvalidSuit; + int left_bower_ = kInvalidAction; + int right_bower_ = kInvalidAction; + Player declarer_ = kInvalidPlayer; + Player declarer_partner_ = kInvalidPlayer; + Player first_defender_ = kInvalidPlayer; + Player second_defender_ = kInvalidPlayer; + absl::optional declarer_go_alone_; + Player lone_defender_ = kInvalidPlayer; + std::vector active_players_ = std::vector(kNumPlayers, true); + int num_active_players_ = kNumPlayers; + Player current_player_ = kChancePlayerId; + Player dealer_ = kChancePlayerId; + Phase phase_ = Phase::kDealerSelection; + std::array tricks_{}; + std::array, kNumCards> holder_{}; + std::array, kNumCards> initial_deal_{}; + std::vector points_ = std::vector(kNumPlayers, 0); +}; + +class EuchreGame : public Game { + public: + explicit EuchreGame(const GameParameters& params); + int NumDistinctActions() const override { return kNumDistinctActions; } + int MaxChanceOutcomes() const override { return kNumCards; } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new EuchreState(shared_from_this(), + /*allow_lone_defender=*/allow_lone_defender_, + /*stick_the_dealer=*/stick_the_dealer_)); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return kMinScore; } + double MaxUtility() const override { return kMaxScore; } + absl::optional UtilitySum() const override { return 0; } + std::vector InformationStateTensorShape() const override { + return {kInformationStateTensorSize}; + } + int MaxGameLength() const override { + return (2 * kNumPlayers) + // Max 2 rounds of bidding + 1 + // Declarer go alone? + (2 * allow_lone_defender_) + // Defenders go alone? (optional) + (kNumPlayers * kNumTricks); // Play of hand + } + int MaxChanceNodesInHistory() const override { + return 1 + // Dealer selection + (kNumPlayers * kNumTricks) + // Deal hands + 1; // Upcard + } + + private: + const bool allow_lone_defender_; + const bool stick_the_dealer_; +}; + +} // namespace euchre +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_EUCHRE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/euchre/euchre_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/euchre/euchre_test.cc new file mode 100644 index 0000000..ba5959d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/euchre/euchre_test.cc @@ -0,0 +1,41 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace euchre { +namespace { + +void BasicGameTests() { + testing::LoadGameTest("euchre"); + testing::ChanceOutcomesTest(*LoadGame("euchre")); + testing::RandomSimTest(*LoadGame("euchre"), 10); + + auto observer = LoadGame("euchre") + ->MakeObserver(kInfoStateObsType, + GameParametersFromString("single_tensor")); + testing::RandomSimTestCustomObserver(*LoadGame("euchre"), observer); +} + + +} // namespace +} // namespace euchre +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::euchre::BasicGameTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/first_sealed_auction/first_sealed_auction.cc b/scenarios/bargaining/open_spiel/open_spiel/games/first_sealed_auction/first_sealed_auction.cc new file mode 100644 index 0000000..4322124 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/first_sealed_auction/first_sealed_auction.cc @@ -0,0 +1,209 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/first_sealed_auction/first_sealed_auction.h" + +#include +#include +#include + +namespace open_spiel { +namespace first_sealed_auction { +namespace { + +// Facts about the game +const GameType kGameType{/*short_name=*/"first_sealed_auction", + /*long_name=*/"First-Price Sealed-Bid Auction", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/10, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + {"players", GameParameter(kDefaultPlayers)}, + {"max_value", GameParameter(kDefaultMaxValue)}, + }}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new FPSBAGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace + +FPSBAGame::FPSBAGame(const GameParameters& params) + : Game(kGameType, params), + num_players_(ParameterValue("players")), + max_value_(ParameterValue("max_value")) {} + +FPSBAState::FPSBAState(std::shared_ptr game) + : State(game), max_value_(game->NumDistinctActions()) {} + +int FPSBAState::CurrentPlayer() const { + if (valuations_.size() < num_players_) return kChancePlayerId; + if (bids_.size() < num_players_) return bids_.size(); + if (winner_ == kInvalidPlayer) return kChancePlayerId; + return kTerminalPlayerId; +} + +std::vector FPSBAState::EligibleWinners() const { + int max_bid = *std::max_element(bids_.begin(), bids_.end()); + std::vector eligibles; + for (auto player = Player{0}; player < num_players_; player++) { + if (bids_[player] == max_bid) { + eligibles.push_back(player); + } + } + return eligibles; +} + +std::vector FPSBAState::LegalActions() const { + if (valuations_.size() < num_players_) { + std::vector values(max_value_); + std::iota(values.begin(), values.end(), 1); + return values; + } + if (bids_.size() < num_players_) { + std::vector bids(valuations_[bids_.size()]); + std::iota(bids.begin(), bids.end(), 0); + return bids; + } + if (winner_ == kInvalidPlayer) { + return EligibleWinners(); + } + return {}; +} + +std::string FPSBAState::ActionToString(Player player, Action action_id) const { + if (player != kChancePlayerId) { + return absl::StrCat("Player ", player, " bid: ", action_id); + } else if (valuations_.size() < num_players_) { + return absl::StrCat("Player ", valuations_.size(), " value: ", action_id); + } else { + return absl::StrCat("Chose winner ", action_id); + } +} + +std::string FPSBAState::ToString() const { + return absl::StrCat( + absl::StrJoin(valuations_, ","), ";", absl::StrJoin(bids_, ","), + winner_ == kInvalidPlayer ? "" : absl::StrCat(";", winner_)); +} + +bool FPSBAState::IsTerminal() const { return winner_ != kInvalidPlayer; } + +std::vector FPSBAState::Returns() const { + std::vector returns(num_players_); + if (winner_ != kInvalidPlayer) { + returns[winner_] = valuations_[winner_] - bids_[winner_]; + } + return returns; +} + +std::unique_ptr FPSBAState::Clone() const { + return std::unique_ptr(new FPSBAState(*this)); +} + +void FPSBAState::DoApplyAction(Action action_id) { + if (valuations_.size() < num_players_) { + valuations_.push_back(action_id); + } else if (bids_.size() < num_players_) { + bids_.push_back(action_id); + } else if (winner_ == kInvalidPlayer) { + winner_ = action_id; + } else { + SpielFatalError( + absl::StrCat("Can't apply action in terminal state: ", action_id)); + } +} + +std::string FPSBAState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + if (valuations_.size() <= player) return absl::StrCat("p", player); + if (bids_.size() <= player) + return absl::StrCat("p", player, " val ", valuations_[player]); + return absl::StrCat("p", player, " val ", valuations_[player], " bid ", + bids_[player]); +} + +void FPSBAState::InformationStateTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + SPIEL_CHECK_EQ(values.size(), 2 * max_value_ + num_players_); + std::fill(values.begin(), values.end(), 0); + auto cursor = values.begin(); + cursor[player] = 1; + cursor += num_players_; + if (valuations_.size() > player) { + cursor[valuations_[player] - 1] = 1; + } + cursor += max_value_; + if (bids_.size() > player) { + cursor[bids_[player]] = 1; + } + cursor += max_value_; + SPIEL_CHECK_EQ(cursor - values.begin(), values.size()); +} + +std::string FPSBAState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + if (valuations_.size() <= player) return ""; + return absl::StrCat(valuations_[player]); +} + +void FPSBAState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + SPIEL_CHECK_EQ(values.size(), max_value_); + std::fill(values.begin(), values.end(), 0); + if (valuations_.size() > player) { + values[valuations_[player] - 1] = 1; + } +} + +ActionsAndProbs FPSBAState::ChanceOutcomes() const { + ActionsAndProbs valuesAndProbs; + if (valuations_.size() < num_players_) { + for (int i = 1; i <= max_value_; i++) { + valuesAndProbs.push_back(std::make_pair(i, 1. / max_value_)); + } + } else if (bids_.size() == num_players_ && winner_ == kInvalidPlayer) { + int max_bid = *std::max_element(bids_.begin(), bids_.end()); + int num_tie = std::count(bids_.begin(), bids_.end(), max_bid); + for (auto player = Player{0}; player < num_players_; player++) { + if (bids_[player] == max_bid) { + valuesAndProbs.push_back(std::make_pair(player, 1. / num_tie)); + } + } + } else { + SpielFatalError("This isn't a chance node"); + } + return valuesAndProbs; +} + +} // namespace first_sealed_auction +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/first_sealed_auction/first_sealed_auction.h b/scenarios/bargaining/open_spiel/open_spiel/games/first_sealed_auction/first_sealed_auction.h new file mode 100644 index 0000000..737c177 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/first_sealed_auction/first_sealed_auction.h @@ -0,0 +1,112 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_FPSBA_H_ +#define OPEN_SPIEL_GAMES_FPSBA_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// First-Price Sealed-Bid Auction: +// https://en.wikipedia.org/wiki/First-price_sealed-bid_auction +// +// Each player has a valuation of the target object from 1 to K, according to a +// uniform distribution, and places bids from 0 to (valuation - 1). The highest +// bidder gets reward (valuation - bid); the others get 0. In the case of a +// tie, the winner is randomly determined amongst the highest bidders. +// +// Parameters: +// "max_value" int maximum valuation (default = 10) +// "players" int number of players (default = 2) + +namespace open_spiel { +namespace first_sealed_auction { + +// Constants. +inline constexpr int kDefaultPlayers = 2; +inline constexpr int kDefaultMaxValue = 10; + +// State of an in-play game. +class FPSBAState : public State { + public: + FPSBAState(std::shared_ptr game); + FPSBAState(const FPSBAState& other) = default; + + Player CurrentPlayer() const override; + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::unique_ptr Clone() const override; + std::string InformationStateString(Player player) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + ActionsAndProbs ChanceOutcomes() const override; + + protected: + void DoApplyAction(Action action_id) override; + + private: + const int max_value_; + std::vector bids_; + std::vector valuations_; + int winner_ = kInvalidPlayer; + std::vector EligibleWinners() const; +}; + +// Game object. +class FPSBAGame : public Game { + public: + explicit FPSBAGame(const GameParameters& params); + int NumDistinctActions() const override { return max_value_; } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new FPSBAState(shared_from_this())); + } + int MaxChanceOutcomes() const override { + return std::max(max_value_ + 1, num_players_); + } + int NumPlayers() const override { return num_players_; } + double MinUtility() const override { return 0; } + double MaxUtility() const override { return max_value_; } + int MaxGameLength() const override { return num_players_; } + // There is an additional chance node after all the bids to determine a winner + // in the case of a tie. + int MaxChanceNodesInHistory() const override { return num_players_ + 1; } + std::vector InformationStateTensorShape() const override { + return {max_value_ * 2 + num_players_}; + }; + std::vector ObservationTensorShape() const override { + return {max_value_}; + }; + + private: + // Number of players. + const int num_players_; + // Maximum valuation, which is one more than maximum bid. + const int max_value_; +}; + +} // namespace first_sealed_auction +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_FPSBA_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/first_sealed_auction/first_sealed_auction_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/first_sealed_auction/first_sealed_auction_test.cc new file mode 100644 index 0000000..dd81981 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/first_sealed_auction/first_sealed_auction_test.cc @@ -0,0 +1,73 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/first_sealed_auction/first_sealed_auction.h" + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace first_sealed_auction { +namespace { + +namespace testing = open_spiel::testing; + +void BasicFPSBATests(const GameParameters& params) { + testing::LoadGameTest("first_sealed_auction"); + testing::ChanceOutcomesTest(*LoadGame("first_sealed_auction", params)); + testing::RandomSimTest(*LoadGame("first_sealed_auction", params), 100); + testing::CheckChanceOutcomes(*LoadGame("first_sealed_auction", params)); +} + +void TieBreak() { + std::shared_ptr game = LoadGame( + "first_sealed_auction", {{"players", open_spiel::GameParameter(3)}, + {"max_value", open_spiel::GameParameter(5)}}); + std::vector action({1, 2, 3, 4, 5}); + auto state = game->NewInitialState(); + SPIEL_CHECK_EQ(state->LegalActions(), std::vector({1, 2, 3, 4, 5})); + state->ApplyAction(5); + SPIEL_CHECK_EQ(state->LegalActions(), std::vector({1, 2, 3, 4, 5})); + state->ApplyAction(2); + SPIEL_CHECK_EQ(state->LegalActions(), std::vector({1, 2, 3, 4, 5})); + state->ApplyAction(4); + SPIEL_CHECK_EQ(state->LegalActions(), std::vector({0, 1, 2, 3, 4})); + state->ApplyAction(2); + SPIEL_CHECK_EQ(state->LegalActions(), std::vector({0, 1})); + state->ApplyAction(1); + SPIEL_CHECK_EQ(state->LegalActions(), std::vector({0, 1, 2, 3})); + state->ApplyAction(2); + SPIEL_CHECK_EQ(state->LegalActions(), std::vector({0, 2})); + state->ApplyAction(2); + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->Returns(), std::vector({0, 0, 2})); +} +} // namespace +} // namespace first_sealed_auction +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::first_sealed_auction::BasicFPSBATests({}); + open_spiel::first_sealed_auction::BasicFPSBATests( + {{"players", open_spiel::GameParameter(1)}, + {"max_value", open_spiel::GameParameter(1)}}); + open_spiel::first_sealed_auction::BasicFPSBATests( + {{"players", open_spiel::GameParameter(10)}, + {"max_value", open_spiel::GameParameter(2)}}); + open_spiel::first_sealed_auction::BasicFPSBATests( + {{"players", open_spiel::GameParameter(2)}, + {"max_value", open_spiel::GameParameter(40)}}); + open_spiel::first_sealed_auction::TieBreak(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/gamut/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/games/gamut/CMakeLists.txt new file mode 100644 index 0000000..f812875 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/gamut/CMakeLists.txt @@ -0,0 +1,11 @@ +# The interface to GAMUT is optional. To enable it, set the flag in +# scripts/global_variables.sh. + +add_library(gamut OBJECT + gamut.cc + gamut.h +) + +add_executable (gamut_test gamut_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(gamut_test gamut_test) diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/gamut/README.md b/scenarios/bargaining/open_spiel/open_spiel/games/gamut/README.md new file mode 100644 index 0000000..7c1e9db --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/gamut/README.md @@ -0,0 +1,13 @@ +# GAMUT games + +This is an interface to load normal-form games from the +[GAMUT](http://gamut.stanford.edu/) games generator. This interface is not +compiled with OpenSpiel by default and must be enabled via the +`OPEN_SPIEL_BUILD_WITH_GAMUT` environment variable (see the Developer Guide) +when OpenSpiel is built. + +It requires a working JVM (`java` binary) and the `gamut.jar` from the GAMUT +project. + +Note that this interface is not regularly tested, so it may break at any time. +Please open an issue to report any problem when using it. diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut.cc b/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut.cc new file mode 100644 index 0000000..01e9181 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut.cc @@ -0,0 +1,119 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/gamut/gamut.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/file.h" + +namespace open_spiel { +namespace gamut { +namespace { +constexpr const char* kDefaultJavaPath = "java"; +constexpr const int kNumTmpfileRetries = 16; +constexpr const int kNumRandomChars = 32; +constexpr const char* kAlphaChars = + "abcdefghijklmnopqrstuvwxyxABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; +} // namespace + +GamutGenerator::GamutGenerator(const std::string& jar_path, int tmpfile_seed) + : GamutGenerator(kDefaultJavaPath, jar_path, tmpfile_seed) {} + +GamutGenerator::GamutGenerator(const std::string& java_path, + const std::string& jar_path, int tmpfile_seed) + : java_path_(java_path), + jar_path_(jar_path), + rng_(tmpfile_seed == 0 ? std::random_device{}() : tmpfile_seed), + rand_string_(kAlphaChars) {} + +std::shared_ptr GamutGenerator::GenerateGame( + const std::string& cmdline_args) { + return GenerateGame(absl::StrSplit(cmdline_args, ' ')); +} + +std::string GamutGenerator::TmpFile() { + for (int retries = 0; retries < kNumTmpfileRetries; ++retries) { + // Try random files until we find one that does not exist. + absl::c_shuffle(rand_string_, rng_); + std::string candidate = + absl::StrCat(file::GetTmpDir(), "/gamut_tmpgame_", + rand_string_.substr(0, kNumRandomChars)); + if (!file::Exists(candidate)) { + return candidate; + } + } + + SpielFatalError(absl::StrCat("Could not get a temporary file after ", + kNumTmpfileRetries, " tries.")); +} + +std::shared_ptr GamutGenerator::GenerateGame( + const std::vector& cmdline_args) { + // Check that there's no -f and no -output in the command-line args. The get + // added by this generator. + for (const auto& arg : cmdline_args) { + if (arg == "-f") { + SpielFatalError("Do not use -f in the command-line arguments."); + } else if (arg == "-output") { + SpielFatalError("Do not use -output in the command-line arguments."); + } + } + + std::vector arguments = cmdline_args; + arguments.push_back("-output"); + arguments.push_back("GambitOutput"); + + // Lock here to prevent concurrently writing / removal. + std::shared_ptr game; + { + absl::MutexLock lock(&generation_mutex_); + // Get a temporary file and add it to the arguments. + std::string tmp_filename = TmpFile(); + arguments.push_back("-f"); + arguments.push_back(tmp_filename); + std::string full_cmd = absl::StrCat(java_path_, " -jar ", jar_path_, " ", + absl::StrJoin(arguments, " ")); + int ret_code = system(full_cmd.c_str()); + SPIEL_CHECK_EQ(ret_code, 0); + SPIEL_CHECK_TRUE(file::Exists(tmp_filename)); + game = LoadGame("nfg_game", {{"filename", GameParameter(tmp_filename)}}); + file::Remove(tmp_filename); + } + return game; +} + +std::shared_ptr +GamutGenerator::GenerateMatrixGame( + const std::vector& cmdline_args) { + return std::dynamic_pointer_cast( + GenerateGame(cmdline_args)); +} + +std::shared_ptr +GamutGenerator::GenerateTensorGame( + const std::vector& cmdline_args) { + return std::dynamic_pointer_cast( + GenerateGame(cmdline_args)); +} + +} // namespace gamut +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut.h b/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut.h new file mode 100644 index 0000000..9abf724 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut.h @@ -0,0 +1,75 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_GAMUT_GAMUT_H_ +#define OPEN_SPIEL_GAMES_GAMUT_GAMUT_H_ + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/synchronization/mutex.h" +#include "open_spiel/matrix_game.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tensor_game.h" + +namespace open_spiel { +namespace gamut { + +// A wrapper class to the GAMUT (http://gamut.stanford.edu) game generator. +// See also, "Run the GAMUT: A Comprehensive Approach to Evaluating +// Game-Theoretic Algorithms." Eugene Nudelman, Jennifer Wortman, +// Kevin Leyton-Brown, Yoav Shoham. AAMAS-2004. +class GamutGenerator { + public: + // Create a game generator with the specified java executable and GAMUT jar + // file. The seed is used for random file names (if 0, uses the current time). + GamutGenerator(const std::string& java_path, const std::string& jar_path, + int tmpfile_seed = 0); + + // Create a game generator using the default path to java executable, defined + // in gamut.cc. The seed is used for random file names (if 0, uses the + // current time). + GamutGenerator(const std::string& jar_path, int tmpfile_seed = 0); + + // Generate a game using GAMUT command-line arguments. Do not use -f nor + // -output, as they are added to the command-line arguments inside this + // function. + std::shared_ptr GenerateGame(const std::string& cmdline_args); + std::shared_ptr GenerateGame( + const std::vector& cmdline_args); + + // Same as above; returns a MatrixGame subtype for two-player games. + std::shared_ptr GenerateMatrixGame( + const std::vector& cmdline_args); + + // Same as above; returns a MatrixGame subtype for games with >= 2 players. + std::shared_ptr GenerateTensorGame( + const std::vector& cmdline_args); + + private: + std::string TmpFile(); + + std::string java_path_; + std::string jar_path_; + absl::Mutex generation_mutex_; + std::mt19937 rng_; + std::string rand_string_; // Random string used for temp file names. +}; + +} // namespace gamut +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_GAMUT_GAMUT_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut_pybind11.cc b/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut_pybind11.cc new file mode 100644 index 0000000..a0ab0db --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut_pybind11.cc @@ -0,0 +1,34 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/gamut/gamut.h" +#include "open_spiel/python/pybind11/pybind11.h" + +namespace open_spiel { + +namespace py = ::pybind11; + +void init_pyspiel_gamut(::pybind11::module& m) { + py::class_ gamut_generator(m, "GamutGenerator"); + gamut_generator.def(py::init()) + .def(py::init()) + .def("generate_game", py::overload_cast( + &gamut::GamutGenerator::GenerateGame)) + .def("generate_game", py::overload_cast&>( + &gamut::GamutGenerator::GenerateGame)) + .def("generate_matrix_game", &gamut::GamutGenerator::GenerateMatrixGame) + .def("generate_tensor_game", &gamut::GamutGenerator::GenerateTensorGame); +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut_pybind11.h b/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut_pybind11.h new file mode 100644 index 0000000..737cd06 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut_pybind11.h @@ -0,0 +1,24 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_GAMUT_GAMUT_PYBIND11_H_ +#define OPEN_SPIEL_GAMES_GAMUT_GAMUT_PYBIND11_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +namespace open_spiel { +void init_pyspiel_gamut(::pybind11::module& m); +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_GAMUT_GAMUT_PYBIND11_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut_test.cc new file mode 100644 index 0000000..1e95c7f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut_test.cc @@ -0,0 +1,65 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/gamut/gamut.h" + +#include "open_spiel/matrix_game.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tensor_game.h" +#include "open_spiel/utils/init.h" + +namespace open_spiel { +namespace gamut { +namespace { + +void BasicLoadGamutTest() { + GamutGenerator generator("gamut.jar"); + + // See the documentation at http://gamut.stanford.edu/ for the commands needed + // to generate the various different games. + + // Using a string of arguments. + std::shared_ptr game1 = generator.GenerateGame( + "-g RandomGame -players 4 -normalize -min_payoff 0 " + "-max_payoff 150 -actions 2 4 5 7"); + SPIEL_CHECK_TRUE(game1 != nullptr); + + // Using a vector of arguments. + std::shared_ptr game2 = generator.GenerateGame( + {"-g", "RandomGame", "-players", "4", "-normalize", "-min_payoff", "0", + "-max_payoff", "150", "-actions", "2", "4", "5", "7"}); + SPIEL_CHECK_TRUE(game2 != nullptr); + + // As a matrix game. + std::shared_ptr game3 = + generator.GenerateMatrixGame( + {"-g", "RandomGame", "-players", "2", "-normalize", "-min_payoff", + "0", "-max_payoff", "150", "-actions", "10", "15"}); + SPIEL_CHECK_TRUE(game3 != nullptr); + + std::shared_ptr game4 = + generator.GenerateTensorGame( + {"-g", "RandomGame", "-players", "4", "-normalize", "-min_payoff", + "0", "-max_payoff", "150", "-actions", "2", "4", "5", "7"}); + SPIEL_CHECK_TRUE(game4 != nullptr); +} + +} // namespace +} // namespace gamut +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::Init("", &argc, &argv, true); + open_spiel::gamut::BasicLoadGamutTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut_test.py b/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut_test.py new file mode 100644 index 0000000..caa4a21 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/gamut/gamut_test.py @@ -0,0 +1,133 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit test for GamutGenerator.""" + +from absl import app +from absl.testing import absltest +from absl.testing import parameterized +from open_spiel.python.egt.utils import game_payoffs_array +import pyspiel + + +class GamutGeneratorTest(parameterized.TestCase): + + def _gamut_generator(self): + return pyspiel.GamutGenerator( + "gamut.jar" + ) + + @parameterized.parameters( + "-g BertrandOligopoly -players 2 -actions 4 -random_params", + "-g UniformLEG-CG -players 2 -actions 4 -random_params", + "-g PolymatrixGame-SW -players 2 -actions 4 -random_params", + "-g GraphicalGame-SW -players 2 -actions 4 -random_params", + "-g BidirectionalLEG-CG -players 2 -actions 4 -random_params", + "-g CovariantGame -players 2 -actions 4 -random_params", + "-g DispersionGame -players 2 -actions 4 -random_params", + "-g MinimumEffortGame -players 2 -actions 4 -random_params", + "-g RandomGame -players 2 -actions 4 -random_params", + "-g TravelersDilemma -players 2 -actions 4 -random_params", + ) + def test_generate_game(self, game_str): + generator = self._gamut_generator() + # Using a string of arguments. + game = generator.generate_game(game_str) + self.assertIsNotNone(game) + + payoff_tensor = game_payoffs_array(game) + self.assertEqual(payoff_tensor.shape, (2, 4, 4)) + + def test_gamut_api(self): + generator = self._gamut_generator() + + # See the documentation at http://gamut.stanford.edu/ for the commands + # needed to generate the various different games. + + # Using a string of arguments. + game = generator.generate_game( + "-g RandomGame -players 4 -normalize -min_payoff 0 " + + "-max_payoff 150 -actions 2 4 5 7" + ) + self.assertIsNotNone(game) + + # Using a list of arguments. + game = generator.generate_game([ + "-g", + "RandomGame", + "-players", + "4", + "-normalize", + "-min_payoff", + "0", + "-max_payoff", + "150", + "-actions", + "2", + "4", + "5", + "7", + ]) + self.assertIsNotNone(game) + + # Using a list of arguments. + matrix_game = generator.generate_matrix_game([ + "-g", + "RandomGame", + "-players", + "2", + "-normalize", + "-min_payoff", + "0", + "-max_payoff", + "150", + "-actions", + "10", + "15", + ]) + self.assertIsNotNone(matrix_game) + print(matrix_game.new_initial_state()) + payoff_matrix = game_payoffs_array(matrix_game) + print(payoff_matrix.shape) + print(payoff_matrix) + + # Using a list of arguments. + tensor_game = generator.generate_game([ + "-g", + "RandomGame", + "-players", + "4", + "-normalize", + "-min_payoff", + "0", + "-max_payoff", + "150", + "-actions", + "2", + "4", + "5", + "7", + ]) + self.assertIsNotNone(tensor_game) + payoff_tensor = game_payoffs_array(tensor_game) + print(payoff_tensor.shape) + + +def main(_): + absltest.main() + + +if __name__ == "__main__": + # Calling main via app.run here is necessary for internal uses. + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/gin_rummy/gin_rummy.cc b/scenarios/bargaining/open_spiel/open_spiel/games/gin_rummy/gin_rummy.cc new file mode 100644 index 0000000..c5a5dcb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/gin_rummy/gin_rummy.cc @@ -0,0 +1,869 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/gin_rummy/gin_rummy.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/algorithms/observation_history.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/gin_rummy/gin_rummy_utils.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace gin_rummy { +namespace { + +const GameType kGameType{ + /*short_name=*/"gin_rummy", + /*long_name=*/"Gin Rummy", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + {"oklahoma", GameParameter(false)}, + {"knock_card", GameParameter(kDefaultKnockCard)}, + {"gin_bonus", GameParameter(kDefaultGinBonus)}, + {"undercut_bonus", GameParameter(kDefaultUndercutBonus)}, + {"num_ranks", GameParameter(kDefaultNumRanks)}, + {"num_suits", GameParameter(kDefaultNumSuits)}, + {"hand_size", GameParameter(kDefaultHandSize)}, + }}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new GinRummyGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +bool ObserverHasString(IIGObservationType iig_obs_type) { + return !iig_obs_type.perfect_recall || + (iig_obs_type.public_info && + iig_obs_type.private_info == PrivateInfoType::kSinglePlayer); +} + +bool ObserverHasTensor(IIGObservationType iig_obs_type) { + return !iig_obs_type.perfect_recall; +} + +} // namespace + + +class GinRummyObserver : public Observer { + public: + explicit GinRummyObserver(IIGObservationType iig_obs_type) + : Observer(/*has_string=*/ObserverHasString(iig_obs_type), + /*has_tensor=*/ObserverHasTensor(iig_obs_type)), + iig_obs_type_(iig_obs_type) {} + + + void WriteTensor(const State& observed_state, int player, + Allocator* allocator) const override { + auto& state = open_spiel::down_cast(observed_state); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, state.num_players_); + + if (iig_obs_type_.perfect_recall) { + SpielFatalError( + "GinRummyObserver: tensor with perfect recall not implemented."); + } + + // Observing player. + WriteObservingPlayer(state, player, allocator); + + // Private hand(s). + if (iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer) { + WriteSinglePlayerHand(state, player, allocator); + } else if (iig_obs_type_.private_info == PrivateInfoType::kAllPlayers) { + WriteAllPlayerHands(state, allocator); + } + + // Public information. + if (iig_obs_type_.public_info) { + WriteCurrentPlayer(state, allocator); + WriteKnockCard(state, allocator); + WriteUpcard(state, allocator); + WriteDiscardPile(state, allocator); + WriteStockSize(state, allocator); + WriteLayedMelds(state, allocator); + } + } + + std::string StringFrom(const State& observed_state, + int player) const override { + auto& state = open_spiel::down_cast(observed_state); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, state.num_players_); + + if (iig_obs_type_.perfect_recall) { + if (iig_obs_type_.public_info && + iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer) { + return state.aohs_[player].ToString(); + } else { + SpielFatalError( + "GinRummyObserver: string with perfect recall is implemented only" + " for the (default) info state observation type."); + } + } + + std::string rv; + absl::StrAppend(&rv, "\nKnock card: ", state.knock_card_); + absl::StrAppend(&rv, "\nPrev upcard: ", + state.utils_.CardString(state.prev_upcard_)); + absl::StrAppend(&rv, "\nRepeated move: ", state.repeated_move_); + absl::StrAppend(&rv, "\nCurrent player: ", state.cur_player_); + absl::StrAppend(&rv, "\nPhase: ", + GinRummyState::kPhaseString[static_cast(state.phase_)], + "\n"); + if (iig_obs_type_.private_info == PrivateInfoType::kAllPlayers || + (iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer && + player == 0)) { + absl::StrAppend(&rv, "\nPlayer0: Deadwood=", state.deadwood_[0]); + } else { + absl::StrAppend(&rv, "\nPlayer0:"); + } + if (state.knocked_[1] && !state.layoffs_.empty()) { + absl::StrAppend(&rv, "\nLayoffs: "); + for (int card : state.layoffs_) { + absl::StrAppend(&rv, state.utils_.CardString(card)); + } + } + if (!state.layed_melds_[0].empty()) { + absl::StrAppend(&rv, "\nLayed melds:"); + for (int meld_id : state.layed_melds_[0]) { + absl::StrAppend(&rv, " "); + std::vector meld = state.utils_.int_to_meld.at(meld_id); + for (int card : meld) { + absl::StrAppend(&rv, state.utils_.CardString(card)); + } + } + } + if (iig_obs_type_.private_info == PrivateInfoType::kAllPlayers || + (iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer && + player == 0)) { + absl::StrAppend(&rv, "\n", state.utils_.HandToString(state.hands_[0])); + } else { + absl::StrAppend(&rv, "\n", state.utils_.HandToString(std::vector())); + } + + absl::StrAppend(&rv, "\nStock size: ", state.stock_size_); + absl::StrAppend(&rv, " Upcard: ", state.utils_.CardString(state.upcard_)); + absl::StrAppend(&rv, "\nDiscard pile: "); + for (int card : state.discard_pile_) { + absl::StrAppend(&rv, state.utils_.CardString(card)); + } + if (iig_obs_type_.private_info == PrivateInfoType::kAllPlayers || + (iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer && + player == 1)) { + absl::StrAppend(&rv, "\n\nPlayer1: Deadwood=", state.deadwood_[1]); + } else { + absl::StrAppend(&rv, "\n\nPlayer1:"); + } + if (state.knocked_[0] && !state.layoffs_.empty()) { + absl::StrAppend(&rv, "\nLayoffs: "); + for (int card : state.layoffs_) { + absl::StrAppend(&rv, state.utils_.CardString(card)); + } + } + if (!state.layed_melds_[1].empty()) { + absl::StrAppend(&rv, "\nLayed melds:"); + for (int meld_id : state.layed_melds_[1]) { + absl::StrAppend(&rv, " "); + std::vector meld = state.utils_.int_to_meld.at(meld_id); + for (int card : meld) { + absl::StrAppend(&rv, state.utils_.CardString(card)); + } + } + } + + if (iig_obs_type_.private_info == PrivateInfoType::kAllPlayers || + (iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer && + player == 1)) { + absl::StrAppend(&rv, "\n", state.utils_.HandToString(state.hands_[1])); + } else { + absl::StrAppend(&rv, "\n", state.utils_.HandToString(std::vector())); + } + return rv; + } + + private: + static void WriteObservingPlayer(const GinRummyState& state, int player, + Allocator* allocator) { + auto out = allocator->Get("player", {kNumPlayers}); + out.at(player) = 1; + } + + static void WriteSinglePlayerHand(const GinRummyState& state, int player, + Allocator* allocator) { + auto out = allocator->Get("private_hand", {kNumPlayers, kDefaultNumCards}); + for (auto card : state.hands_[player]) out.at(player, card) = 1; + } + + static void WriteAllPlayerHands(const GinRummyState& state, + Allocator* allocator) { + auto out = allocator->Get("private_hands", {kNumPlayers, kDefaultNumCards}); + for (Player p = 0; p < kNumPlayers; ++p) { + for (auto card : state.hands_[p]) out.at(p, card) = 1; + } + } + + static void WriteCurrentPlayer(const GinRummyState& state, + Allocator* allocator) { + auto out = allocator->Get("current_player", {kNumPlayers}); + if (state.cur_player_ >= 0) out.at(state.cur_player_) = 1; + } + + static void WriteKnockCard(const GinRummyState& state, + Allocator* allocator) { + auto out = allocator->Get("knock_card", {kDefaultKnockCard}); + for (int i = 0; i < state.knock_card_; ++i) out.at(i) = 1; + } + + static void WriteUpcard(const GinRummyState& state, Allocator* allocator) { + auto out = allocator->Get("upcard", {kDefaultNumCards}); + if (state.upcard_.has_value()) out.at(state.upcard_.value()) = 1; + } + + static void WriteDiscardPile(const GinRummyState& state, + Allocator* allocator) { + auto out = allocator->Get("discard_pile", {kDefaultNumCards}); + for (auto card : state.discard_pile_) out.at(card) = 1; + } + + static void WriteStockSize(const GinRummyState& state, + Allocator* allocator) { + auto out = allocator->Get("stock_size", {kDefaultNumCards}); + for (int i = 0; i < state.stock_size_; ++i) out.at(i) = 1; + } + + static void WriteLayedMelds(const GinRummyState& state, + Allocator* allocator) { + auto out = allocator->Get("layed_melds", {kNumPlayers, kNumMeldActions}); + for (Player p = 0; p < kNumPlayers; ++p) { + for (auto meld : state.layed_melds_[p]) out.at(p, meld) = 1; + } + } + + IIGObservationType iig_obs_type_; +}; + +GinRummyState::GinRummyState(std::shared_ptr game, bool oklahoma, + int knock_card, int gin_bonus, int undercut_bonus, + int num_ranks, int num_suits, int hand_size) + : State(std::move(game)), + oklahoma_(oklahoma), + knock_card_(knock_card), + gin_bonus_(gin_bonus), + undercut_bonus_(undercut_bonus), + num_ranks_(num_ranks), + num_suits_(num_suits), + num_cards_(num_ranks * num_suits), + hand_size_(hand_size), + utils_(GinRummyUtils(num_ranks, num_suits, hand_size)), + stock_size_(num_ranks * num_suits), + deck_(num_ranks * num_suits, true) { + aohs_.reserve(kNumPlayers); + for (Player player = 0; player < kNumPlayers; ++player) { + std::vector, std::string>> aoh; + aoh.push_back({{}, ObservationString(player)}); + aohs_.push_back(open_spiel::ActionObservationHistory(player, aoh)); + } +} + +int GinRummyState::CurrentPlayer() const { + if (IsTerminal()) { + return kTerminalPlayerId; + } else { + return cur_player_; + } +} + +void GinRummyState::DoApplyAction(Action action) { + Player current_player = CurrentPlayer(); + switch (phase_) { + case Phase::kDeal: + ApplyDealAction(action); + break; + case Phase::kFirstUpcard: + ApplyFirstUpcardAction(action); + break; + case Phase::kDraw: + ApplyDrawAction(action); + break; + case Phase::kDiscard: + ApplyDiscardAction(action); + break; + case Phase::kKnock: + ApplyKnockAction(action); + break; + case Phase::kLayoff: + ApplyLayoffAction(action); + break; + case Phase::kWall: + ApplyWallAction(action); + break; + case Phase::kGameOver: + SpielFatalError("Cannot act in terminal states."); + break; + default: + SpielFatalError("Invalid game phase."); + } + for (Player player = 0; player < NumPlayers(); ++player) { + absl::optional a = {}; + if (current_player == player) a = action; + aohs_[player].Extend(a, ObservationString(player)); + } +} + +void GinRummyState::ApplyDealAction(Action action) { + SPIEL_CHECK_TRUE(IsChanceNode()); + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LT(action, num_cards_); + // Deal cards to player 0. + if (stock_size_ > num_cards_ - hand_size_) { + StockToHand(0, action); + } else if (stock_size_ > num_cards_ - 2 * hand_size_) { + // Next deal to player 1. + StockToHand(1, action); + } else if (stock_size_ == num_cards_ - 2 * hand_size_) { + // Set upcard. + StockToUpcard(action); + for (int i = 0; i < kNumPlayers; ++i) { + deadwood_[i] = utils_.MinDeadwood(hands_[i]); + } + // Initial upcard determines the knock card if playing Oklahoma. + if (oklahoma_) { + knock_card_ = utils_.CardValue(action); + // Ace upcard means we must play for gin! + if (knock_card_ == 1) knock_card_ = 0; + } + prev_player_ = kChancePlayerId; + // This implementation always starts the action with player 0. + cur_player_ = 0; + phase_ = Phase::kFirstUpcard; + } else { + // Previous player drew from stock, let's deal them a card. + StockToHand(prev_player_, action); + // Update deadwood total, used to see if knock is legal. + deadwood_[prev_player_] = utils_.MinDeadwood(hands_[prev_player_]); + cur_player_ = prev_player_; + prev_player_ = kChancePlayerId; + phase_ = Phase::kDiscard; + } +} + +// Unique rules apply to the first upcard. If the first player to act does not +// draw the upcard, the second player then has the option to pick it up. If +// both players pass, the first player draws from the stock. +void GinRummyState::ApplyFirstUpcardAction(Action action) { + if (action == kDrawUpcardAction) { + SPIEL_CHECK_TRUE(upcard_.has_value()); + prev_upcard_ = upcard_; + UpcardToHand(cur_player_); + deadwood_[cur_player_] = utils_.MinDeadwood(hands_[cur_player_]); + prev_player_ = cur_player_; + phase_ = Phase::kDiscard; + } else if (action == kDrawStockAction) { + SPIEL_CHECK_TRUE(pass_on_first_upcard_[0] && pass_on_first_upcard_[1]); + prev_upcard_ = upcard_; + discard_pile_.push_back(upcard_.value()); + upcard_ = absl::nullopt; + prev_player_ = cur_player_; + cur_player_ = kChancePlayerId; + phase_ = Phase::kDeal; + } else if (action == kPassAction) { + SPIEL_CHECK_FALSE(pass_on_first_upcard_[0] && pass_on_first_upcard_[1]); + pass_on_first_upcard_[cur_player_] = true; + prev_player_ = cur_player_; + cur_player_ = Opponent(prev_player_); + phase_ = Phase::kFirstUpcard; + } else { + SpielFatalError("Invalid Action"); + } +} + +void GinRummyState::ApplyDrawAction(Action action) { + if (action == kDrawUpcardAction) { + SPIEL_CHECK_TRUE(upcard_.has_value()); + if (++num_draw_upcard_actions_ == kMaxNumDrawUpcardActions) { + phase_ = Phase::kGameOver; + return; + } + prev_upcard_ = upcard_; + UpcardToHand(cur_player_); + deadwood_[cur_player_] = utils_.MinDeadwood(hands_[cur_player_]); + prev_player_ = cur_player_; + phase_ = Phase::kDiscard; + } else if (action == kDrawStockAction) { + // When a player chooses to draw from stock the upcard is no + // longer in play and goes to the top of the discard pile. + prev_upcard_ = upcard_; + if (upcard_.has_value()) discard_pile_.push_back(upcard_.value()); + upcard_ = absl::nullopt; + prev_player_ = cur_player_; + cur_player_ = kChancePlayerId; + phase_ = Phase::kDeal; + } else { + SpielFatalError("Invalid DrawAction"); + } +} + +void GinRummyState::ApplyDiscardAction(Action action) { + if (action == kKnockAction) { + SPIEL_CHECK_LE(deadwood_[cur_player_], knock_card_); + // The hand has been knocked, so now deadwood tracks the total card value. + for (int i = 0; i < kNumPlayers; ++i) + deadwood_[i] = utils_.TotalCardValue(hands_[i]); + knocked_[cur_player_] = true; + prev_player_ = cur_player_; + phase_ = Phase::kKnock; + } else { + SPIEL_CHECK_TRUE(absl::c_linear_search(hands_[cur_player_], action)); + RemoveFromHand(cur_player_, action); + deadwood_[cur_player_] = utils_.MinDeadwood(hands_[cur_player_]); + upcard_ = action; + prev_player_ = cur_player_; + cur_player_ = Opponent(prev_player_); + if (upcard_ == prev_upcard_) { + if (repeated_move_) { + phase_ = Phase::kGameOver; + return; + } else { + repeated_move_ = true; + } + } else { + repeated_move_ = false; + } + if (stock_size_ == kWallStockSize) { + phase_ = Phase::kWall; + } else { + phase_ = Phase::kDraw; + } + } +} + +void GinRummyState::ApplyKnockAction(Action action) { + // First the knocking player must discard. + if (hands_[cur_player_].size() == hand_size_ + 1) { + SPIEL_CHECK_TRUE(absl::c_linear_search(hands_[cur_player_], action)); + RemoveFromHand(cur_player_, action); + discard_pile_.push_back(action); + deadwood_[cur_player_] = utils_.TotalCardValue(hands_[cur_player_]); + phase_ = Phase::kKnock; + } else if (action == kPassAction) { + // Here the pass action indicates knocking player is finished laying the + // hand. The player's deadwood is now final, and any cards not layed in a + // meld are counted towards the deadwood total. + deadwood_[cur_player_] = utils_.TotalCardValue(hands_[cur_player_]); + // Make sure the knocking player has completed a legal knock. + SPIEL_CHECK_LE(deadwood_[cur_player_], knock_card_); + // If deadwood equals 0 then the player has gin. The opponent is not + // allowed to lay off on gin. + if (deadwood_[cur_player_] == 0) finished_layoffs_ = true; + cur_player_ = Opponent(prev_player_); + phase_ = Phase::kLayoff; + } else { + // Knocking player must declare or "lay" melds, one action at a time. + SPIEL_CHECK_LE(action - kMeldActionBase, kNumMeldActions); + SPIEL_CHECK_GE(action - kMeldActionBase, 0); + layed_melds_[cur_player_].push_back(action - kMeldActionBase); + // Upon laying a meld the cards are removed from the player's hand. + for (int card : utils_.int_to_meld.at(action - kMeldActionBase)) { + RemoveFromHand(cur_player_, card); + } + deadwood_[cur_player_] = utils_.TotalCardValue(hands_[cur_player_]); + phase_ = Phase::kKnock; + } +} + +void GinRummyState::ApplyLayoffAction(Action action) { + if (!finished_layoffs_) { + if (action == kPassAction) { + finished_layoffs_ = true; + phase_ = Phase::kLayoff; + } else { + SPIEL_CHECK_TRUE(absl::c_linear_search(hands_[cur_player_], action)); + layoffs_.push_back(action); + RemoveFromHand(cur_player_, action); + deadwood_[cur_player_] = utils_.TotalCardValue(hands_[cur_player_]); + phase_ = Phase::kLayoff; + } + } else { + // Finished laying off individual cards, now lay melds. + if (action == kPassAction) { + deadwood_[cur_player_] = utils_.TotalCardValue(hands_[cur_player_]); + phase_ = Phase::kGameOver; + } else { + // Lay melds one action at a time. + SPIEL_CHECK_LE(action - kMeldActionBase, kNumMeldActions); + SPIEL_CHECK_GE(action - kMeldActionBase, 0); + layed_melds_[cur_player_].push_back(action - kMeldActionBase); + // Upon laying a meld the cards are removed from the player's hand. + for (int card : utils_.int_to_meld.at(action - kMeldActionBase)) + RemoveFromHand(cur_player_, card); + deadwood_[cur_player_] = utils_.TotalCardValue(hands_[cur_player_]); + phase_ = Phase::kLayoff; + } + } +} + +void GinRummyState::ApplyWallAction(Action action) { + if (action == kKnockAction) { + // When we've reached the wall, a knock automatically includes upcard. + UpcardToHand(cur_player_); + deadwood_[cur_player_] = utils_.MinDeadwood(hands_[cur_player_]); + // Make sure knock is legal. + SPIEL_CHECK_LE(deadwood_[cur_player_], knock_card_); + knocked_[cur_player_] = true; + prev_player_ = cur_player_; + phase_ = Phase::kKnock; + } else if (action == kPassAction) { + phase_ = Phase::kGameOver; + } else { + SpielFatalError("Invalid WallAction"); + } +} + +std::vector GinRummyState::LegalActions() const { + switch (phase_) { + case Phase::kDeal: + return DealLegalActions(); + case Phase::kFirstUpcard: + return FirstUpcardLegalActions(); + case Phase::kDraw: + return DrawLegalActions(); + case Phase::kDiscard: + return DiscardLegalActions(); + case Phase::kKnock: + return KnockLegalActions(); + case Phase::kLayoff: + return LayoffLegalActions(); + case Phase::kWall: + return WallLegalActions(); + default: + return {}; + } +} + +std::vector GinRummyState::DealLegalActions() const { + std::vector legal_actions; + for (int card = 0; card < num_cards_; ++card) { + if (deck_[card]) legal_actions.push_back(card); + } + return legal_actions; +} + +std::vector GinRummyState::FirstUpcardLegalActions() const { + std::vector legal_actions; + // If both players have passed then must draw from stock. + if (pass_on_first_upcard_[0] && pass_on_first_upcard_[1]) { + legal_actions.push_back(kDrawStockAction); + } else { + // Options are to draw upcard or pass to opponent. + legal_actions.push_back(kDrawUpcardAction); + legal_actions.push_back(kPassAction); + } + return legal_actions; +} + +std::vector GinRummyState::DrawLegalActions() const { + std::vector legal_actions; + legal_actions.push_back(kDrawUpcardAction); + legal_actions.push_back(kDrawStockAction); + return legal_actions; +} + +std::vector GinRummyState::DiscardLegalActions() const { + // All cards in hand are legal discards. + std::vector legal_actions(hands_[cur_player_].begin(), + hands_[cur_player_].end()); + if (deadwood_[cur_player_] <= knock_card_) { + legal_actions.push_back(kKnockAction); + } + std::sort(legal_actions.begin(), legal_actions.end()); + return legal_actions; +} + +std::vector GinRummyState::KnockLegalActions() const { + std::vector legal_actions; + // After knocking, the player discards. This discard must not prevent + // the player from arranging the hand in such a way that the deadwood + // total is less than the knock card. + if (hands_[cur_player_].size() == hand_size_ + 1) { + for (int card : utils_.LegalDiscards(hands_[cur_player_], knock_card_)) { + legal_actions.push_back(card); + } + } else { + for (int meld_id : utils_.LegalMelds(hands_[cur_player_], knock_card_)) { + legal_actions.push_back(meld_id + kMeldActionBase); + } + // Must keep laying melds until remaining deadwood is less than knock card. + // Once that has been accomplished, the knocking player can pass. + if (utils_.TotalCardValue(hands_[cur_player_]) <= knock_card_) { + legal_actions.push_back(kPassAction); + } + } + std::sort(legal_actions.begin(), legal_actions.end()); + return legal_actions; +} + +std::vector GinRummyState::LayoffLegalActions() const { + std::vector legal_actions; + if (!finished_layoffs_) { + // Always have the option not to lay off any cards. + legal_actions.push_back(kPassAction); + std::vector all_possible_layoffs = + utils_.AllLayoffs(layed_melds_[prev_player_], layoffs_); + for (int card : all_possible_layoffs) { + if (absl::c_linear_search(hands_[cur_player_], card)) { + legal_actions.push_back(card); + } + } + } else { + // After laying off individual cards, now the player lays melds. + // Always have the option not to declare any melds. + legal_actions.push_back(kPassAction); + // The non-knocking player does not have to arrange melds in a particular + // way to get under the knock card. Therefore we use kMaxPossibleDeadwood + // to ensure that all melds are legal. + for (int meld_id : utils_.LegalMelds(hands_[cur_player_], + kMaxPossibleDeadwood)) { + legal_actions.push_back(meld_id + kMeldActionBase); + } + } + std::sort(legal_actions.begin(), legal_actions.end()); + return legal_actions; +} + +std::vector GinRummyState::WallLegalActions() const { + std::vector legal_actions; + legal_actions.push_back(kPassAction); + // Player can either pass or knock (if legal). + int deadwood = utils_.MinDeadwood(hands_[cur_player_], upcard_); + if (deadwood <= knock_card_) { + legal_actions.push_back(kKnockAction); + } + return legal_actions; +} + +std::vector> GinRummyState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(IsChanceNode()); + std::vector> outcomes; + outcomes.reserve(stock_size_); + const double p = 1.0 / stock_size_; + for (int card = 0; card < num_cards_; ++card) { + // This card is still in the deck, prob is 1/stock_size_. + if (deck_[card]) outcomes.push_back({card, p}); + } + return outcomes; +} + +std::string GinRummyState::ActionToString(Player player, Action action) const { + if (player == kChancePlayerId) { + return absl::StrCat("Chance outcome: ", utils_.CardString(action)); + } else { + std::string action_str; + if (action < num_cards_) { + action_str = utils_.CardString(action); + } else if (action == kDrawUpcardAction) { + action_str = "Draw upcard"; + } else if (action == kDrawStockAction) { + action_str = "Draw stock"; + } else if (action == kPassAction) { + action_str = "Pass"; + } else if (action == kKnockAction) { + action_str = "Knock"; + } else if (action < kMeldActionBase + kNumMeldActions) { + std::vector meld = utils_.int_to_meld.at(action - kMeldActionBase); + std::vector meld_str = utils_.CardIntsToCardStrings(meld); + action_str = absl::StrJoin(meld_str, ""); + } else { + SpielFatalError( + absl::StrCat("Error in GinRummyState::ActionToString().")); + } + return absl::StrCat("Player: ", player, " Action: ", action_str); + } +} + +std::string GinRummyState::ToString() const { + std::string rv; + absl::StrAppend(&rv, "\nKnock card: ", knock_card_); + absl::StrAppend(&rv, "\nPrev upcard: ", utils_.CardString(prev_upcard_)); + absl::StrAppend(&rv, "\nRepeated move: ", repeated_move_); + absl::StrAppend(&rv, "\nCurrent player: ", cur_player_); + absl::StrAppend(&rv, "\nPhase: ", kPhaseString[static_cast(phase_)], + "\n"); + absl::StrAppend(&rv, "\nPlayer0: Deadwood=", deadwood_[0]); + if (knocked_[1] && !layoffs_.empty()) { + absl::StrAppend(&rv, "\nLayoffs: "); + for (int card : layoffs_) absl::StrAppend(&rv, utils_.CardString(card)); + } + if (!layed_melds_[0].empty()) { + absl::StrAppend(&rv, "\nLayed melds:"); + for (int meld_id : layed_melds_[0]) { + absl::StrAppend(&rv, " "); + std::vector meld = utils_.int_to_meld.at(meld_id); + for (int card : meld) absl::StrAppend(&rv, utils_.CardString(card)); + } + } + absl::StrAppend(&rv, "\n", utils_.HandToString(hands_[0])); + absl::StrAppend(&rv, "\nStock size: ", stock_size_); + absl::StrAppend(&rv, " Upcard: ", utils_.CardString(upcard_)); + absl::StrAppend(&rv, "\nDiscard pile: "); + for (int card : discard_pile_) absl::StrAppend(&rv, utils_.CardString(card)); + absl::StrAppend(&rv, "\n\nPlayer1: Deadwood=", deadwood_[1]); + if (knocked_[0] && !layoffs_.empty()) { + absl::StrAppend(&rv, "\nLayoffs: "); + for (int card : layoffs_) absl::StrAppend(&rv, utils_.CardString(card)); + } + if (!layed_melds_[1].empty()) { + absl::StrAppend(&rv, "\nLayed melds:"); + for (int meld_id : layed_melds_[1]) { + absl::StrAppend(&rv, " "); + std::vector meld = utils_.int_to_meld.at(meld_id); + for (int card : meld) absl::StrAppend(&rv, utils_.CardString(card)); + } + } + absl::StrAppend(&rv, "\n", utils_.HandToString(hands_[1])); + return rv; +} + +std::vector GinRummyState::Returns() const { + if (!IsTerminal()) { + return std::vector(kNumPlayers, 0.0); + } + std::vector returns(kNumPlayers, 0.0); + // player 0 knocked + if (knocked_[0]) { + returns[0] = deadwood_[1] - deadwood_[0]; + if (deadwood_[0] == 0) { + returns[0] += gin_bonus_; + } + if (returns[0] < 0) { + returns[0] -= undercut_bonus_; + } + returns[1] = -returns[0]; + } else if (knocked_[1]) { + // player 1 knocked + returns[1] = deadwood_[0] - deadwood_[1]; + if (deadwood_[1] == 0) { + returns[1] += gin_bonus_; + } + if (returns[1] < 0) { + returns[1] -= undercut_bonus_; + } + returns[0] = -returns[1]; + } + // If neither player knocked both players get 0. + return returns; +} + +void GinRummyState::StockToHand(Player player, Action card) { + hands_[player].push_back(card); + deck_[card] = false; + --stock_size_; +} + +void GinRummyState::StockToUpcard(Action card) { + upcard_ = card; + deck_[card] = false; + --stock_size_; +} + +void GinRummyState::UpcardToHand(Player player) { + SPIEL_CHECK_TRUE(upcard_.has_value()); + hands_[player].push_back(upcard_.value()); + upcard_ = absl::nullopt; +} + +void GinRummyState::RemoveFromHand(Player player, Action card) { + hands_[player].erase( + std::remove(hands_[player].begin(), hands_[player].end(), card), + hands_[player].end()); +} + +std::unique_ptr GinRummyState::Clone() const { + return std::unique_ptr(new GinRummyState(*this)); +} + +std::string GinRummyState::InformationStateString(Player player) const { + const auto& game = open_spiel::down_cast(*game_); + return game.info_state_observer_->StringFrom(*this, player); +} + +std::string GinRummyState::ObservationString(Player player) const { + const GinRummyGame& game = open_spiel::down_cast(*game_); + return game.default_observer_->StringFrom(*this, player); +} + +void GinRummyState::ObservationTensor(Player player, + absl::Span values) const { + ContiguousAllocator allocator(values); + const GinRummyGame& game = open_spiel::down_cast(*game_); + game.default_observer_->WriteTensor(*this, player, &allocator); +} + +GinRummyGame::GinRummyGame(const GameParameters& params) + : Game(kGameType, params), + oklahoma_(ParameterValue("oklahoma")), + knock_card_(ParameterValue("knock_card")), + gin_bonus_(ParameterValue("gin_bonus")), + undercut_bonus_(ParameterValue("undercut_bonus")), + num_ranks_(ParameterValue("num_ranks")), + num_suits_(ParameterValue("num_suits")), + hand_size_(ParameterValue("hand_size")) { + SPIEL_CHECK_GE(knock_card_, 0); + SPIEL_CHECK_LE(knock_card_, kDefaultKnockCard); + // Check that the game size makes sense. + SPIEL_CHECK_GE(num_ranks_, 1); + SPIEL_CHECK_LE(num_ranks_, kDefaultNumRanks); + SPIEL_CHECK_GE(num_suits_, 1); + SPIEL_CHECK_LE(num_suits_, kDefaultNumSuits); + SPIEL_CHECK_GE(hand_size_, 1); + SPIEL_CHECK_LE(hand_size_, kDefaultHandSize); + // Must be able to deal both hands and have enough cards left over for one + // round of play. + SPIEL_CHECK_GE(num_ranks_ * num_suits_, + kNumPlayers * hand_size_ + kWallStockSize + 1); + + default_observer_ = std::make_shared(kDefaultObsType); + info_state_observer_ = std::make_shared(kInfoStateObsType); +} + +std::shared_ptr GinRummyGame::MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const { + if (!params.empty()) SpielFatalError("Observation params not supported"); + return std::make_shared( + iig_obs_type.value_or(kDefaultObsType)); +} + +} // namespace gin_rummy +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/gin_rummy/gin_rummy.h b/scenarios/bargaining/open_spiel/open_spiel/games/gin_rummy/gin_rummy.h new file mode 100644 index 0000000..ba8789c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/gin_rummy/gin_rummy.h @@ -0,0 +1,276 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_GIN_RUMMY_H_ +#define OPEN_SPIEL_GAMES_GIN_RUMMY_H_ + +// Implementation of the classic card game: +// https://en.wikipedia.org/wiki/Gin_rummy +// +// Gin rummy is played with many variations. Here we closely follow +// the rules described in http://ginrummytournaments.com/pdfs/GRA_Rules.pdf +// +// A game consists of a single hand of gin (i.e. this implementation does not +// support a full game, consisting of multiple hands, played to some given +// point total, usually in the 100-300 point range). +// +// Gin is a large game with over 10^85 information states and a large number +// of states per information state. Off the deal there are 41 choose 10 = +// 1,121,099,408 possible opponent hands, compared to 50 choose 2 = 1,225 in +// heads up Texas hold 'em poker. +// +// Parameters: +// "oklahoma" bool use oklahoma variation? (default = false) +// "knock_card" int set a specific knock card (default = 10) +// "gin_bonus" int bonus for getting gin (default = 25) +// "undercut_bonus" int bonus for an undercut (default = 25) +// "num_ranks" int num ranks in deck (default = 13) +// "num_suits" int num suits in deck (default = 4) +// "hand_size" int num cards in player hand (default = 10) + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/algorithms/observation_history.h" +#include "open_spiel/games/gin_rummy/gin_rummy_utils.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace gin_rummy { + +// Game constants +inline constexpr int kDefaultNumRanks = 13; +inline constexpr int kDefaultNumSuits = 4; +inline constexpr int kDefaultNumCards = 52; +inline constexpr int kNumPlayers = 2; +inline constexpr int kMaxPossibleDeadwood = 98; // E.g. KsKcQdQhJsJcTdTh9s9c +inline constexpr int kMaxNumDrawUpcardActions = 50; +inline constexpr int kDefaultHandSize = 10; +inline constexpr int kWallStockSize = 2; +inline constexpr int kDefaultKnockCard = 10; +inline constexpr int kDefaultGinBonus = 25; +inline constexpr int kDefaultUndercutBonus = 25; +// Action constants +// {0, ..., 51} are reserved for card-specific deal and discard actions +// corresponding to a standard deck size. +inline constexpr int kDrawUpcardAction = 52; +inline constexpr int kDrawStockAction = 53; +inline constexpr int kPassAction = 54; +inline constexpr int kKnockAction = 55; +inline constexpr int kMeldActionBase = 56; // First lay meld action +inline constexpr int kNumMeldActions = 185; +inline constexpr int kNumDistinctActions = kMeldActionBase + kNumMeldActions; +// Observer constants +inline constexpr int kObservationTensorSize = + kNumPlayers // Observing player + + kDefaultNumCards * 2 // Player hands + + kNumPlayers // Player turn + + kDefaultKnockCard // Knock card + + kDefaultNumCards // Upcard + + kDefaultNumCards // Discard pile + + kDefaultNumCards // Stock size + + kNumMeldActions * 2; // Layed melds of both players + +enum class Phase { + kDeal, + kFirstUpcard, + kDraw, + kDiscard, + kKnock, + kLayoff, + kWall, + kGameOver +}; + +class GinRummyGame; +class GinRummyObserver; + +class GinRummyState : public State { + public: + explicit GinRummyState(std::shared_ptr game, bool oklahoma, + int knock_card, int gin_bonus, int undercut_bonus, + int num_ranks, int num_suits, int hand_size); + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + bool IsTerminal() const override { return phase_ == Phase::kGameOver; } + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + std::vector> ChanceOutcomes() const override; + + // Used for Python bindings. + Phase CurrentPhase() const { return phase_; } + bool FinishedLayoffs() const { return finished_layoffs_ ; } + absl::optional Upcard() const { return upcard_; } + int StockSize() const { return stock_size_; } + std::vector> Hands() const { return hands_; } + std::vector DiscardPile() const { return discard_pile_; } + std::vector Deadwood() const { return deadwood_; } + std::vector Knocked() const { return knocked_; } + std::vector PassOnFirstUpcard() const { return pass_on_first_upcard_; } + std::vector> LayedMelds() const { return layed_melds_; } + std::vector Layoffs() const { return layoffs_; } + + protected: + void DoApplyAction(Action action) override; + + private: + friend class GinRummyObserver; + + inline static constexpr std::array kPhaseString = { + "Deal", "FirstUpcard", "Draw", "Discard", + "Knock", "Layoff", "Wall", "GameOver"}; + + std::vector DealLegalActions() const; + std::vector FirstUpcardLegalActions() const; + std::vector DrawLegalActions() const; + std::vector DiscardLegalActions() const; + std::vector KnockLegalActions() const; + std::vector LayoffLegalActions() const; + std::vector WallLegalActions() const; + + void ApplyDealAction(Action action); + void ApplyFirstUpcardAction(Action action); + void ApplyDrawAction(Action action); + void ApplyDiscardAction(Action action); + void ApplyKnockAction(Action action); + void ApplyLayoffAction(Action action); + void ApplyWallAction(Action action); + + void StockToHand(Player player, Action card); + void StockToUpcard(Action card); + void UpcardToHand(Player player); + void HandToUpcard(Player player, Action card); + void RemoveFromHand(Player player, Action card); + + int Opponent(int player) const { return 1 - player; } + + // Game params + const bool oklahoma_; // If true, will override the knock card value. + int knock_card_; // The maximum deadwood total for a legal knock. + const int gin_bonus_; + const int undercut_bonus_; + const int num_ranks_; + const int num_suits_; + const int num_cards_; + const int hand_size_; + + const GinRummyUtils utils_; + + Phase phase_ = Phase::kDeal; + Player cur_player_ = kChancePlayerId; + Player prev_player_ = kChancePlayerId; + bool finished_layoffs_ = false; + absl::optional upcard_; + absl::optional prev_upcard_; // Used to track repeated moves. + int stock_size_; // Number of cards remaining in stock. + // True if the prev player drew the upcard only to immediately discard it. + // If both players do this in succession the game is declared a draw. + bool repeated_move_ = false; + // Incremented every time a player draws the upcard. Used to ensure the game + // is finite. See gin_rummy_test for an example of why this is needed. + int num_draw_upcard_actions_ = 0; + + // Each player's hand. Indexed by pid. + std::vector> hands_ = + std::vector>(kNumPlayers, std::vector()); + // True if the card is still in the deck. Cards from 0-51 using the suit order + // "scdh". + std::vector deck_{}; + // Discard pile consists of cards that are out of play. + std::vector discard_pile_{}; + // Prior to a knock, deadwood tracks the minimum possible deadwood count + // over all meld arrangements, indexed by pid. When player has 11 cards, it + // counts the best 10 of 11 (the player can discard). + // After a knock, deadwood counts the total card value of all cards in the + // player's hand. Points are deducted as the player lays the hand into melds + // or lays off cards onto opponent melds. + std::vector deadwood_ = std::vector(kNumPlayers, 0); + // Flag for whether the player has knocked. Indexed by pid. + std::vector knocked_ = std::vector(kNumPlayers, false); + // Flag for whether the player has passed on first upcard. Indexed by pid. + std::vector pass_on_first_upcard_ = + std::vector(kNumPlayers, false); + // Each player's layed melds during the knock phase. Indexed by pid. + std::vector> layed_melds_ = + std::vector>(kNumPlayers, std::vector()); + // Cards that have been layed off onto knocking player's layed melds. + std::vector layoffs_{}; + // cached ActionObservationHistory for each player + std::vector aohs_; +}; + +class GinRummyGame : public Game { + public: + explicit GinRummyGame(const GameParameters& params); + + int NumDistinctActions() const override { return kNumDistinctActions; } + int MaxChanceOutcomes() const override { return num_ranks_ * num_suits_; } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { + return -(kMaxPossibleDeadwood + gin_bonus_); + } + double MaxUtility() const override { + return kMaxPossibleDeadwood + gin_bonus_; + } + absl::optional UtilitySum() const override { return 0; } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new GinRummyState(shared_from_this(), oklahoma_, knock_card_, + gin_bonus_, undercut_bonus_, num_ranks_, num_suits_, + hand_size_)); + } + std::vector ObservationTensorShape() const override { + return {kObservationTensorSize}; + } + // All games should terminate before reaching this upper bound. + int MaxGameLength() const override { return 300; } + // Chance nodes occur on the deal and when drawing from the stock. + int MaxChanceNodesInHistory() const override { + return num_ranks_ * num_suits_ - kWallStockSize; + } + std::shared_ptr MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const override; + + std::shared_ptr default_observer_; + std::shared_ptr info_state_observer_; + + // Used for Python bindings. + bool Oklahoma() const { return oklahoma_; } + int KnockCard() const { return knock_card_; } + + private: + const bool oklahoma_; + const int knock_card_; + const int gin_bonus_; + const int undercut_bonus_; + const int num_ranks_; + const int num_suits_; + const int hand_size_; +}; + +} // namespace gin_rummy +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_GIN_RUMMY_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/gin_rummy/gin_rummy_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/gin_rummy/gin_rummy_test.cc new file mode 100644 index 0000000..ee70230 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/gin_rummy/gin_rummy_test.cc @@ -0,0 +1,609 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/gin_rummy/gin_rummy.h" + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/games/gin_rummy/gin_rummy_utils.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace gin_rummy { +namespace { + +namespace testing = open_spiel::testing; + +void BasicGameTests() { + testing::LoadGameTest("gin_rummy"); + testing::RandomSimTest(*LoadGame("gin_rummy"), 10); +} + +void MeldTests() { + GinRummyUtils utils = GinRummyUtils(kDefaultNumRanks, kDefaultNumSuits, + kDefaultHandSize); + // There are 185 melds of length between 3 and 5 cards. All melds of + // length greater than 5 can be expressed as two or more smaller melds. + std::vector full_deck; + for (int i = 0; i < utils.num_cards; ++i) full_deck.push_back(i); + std::vector> all_melds = utils.AllMelds(full_deck); + SPIEL_CHECK_EQ(all_melds.size(), kNumMeldActions); + + // Some simple meld tests + std::vector cards; + cards = {"As", "2s", "3s"}; + SPIEL_CHECK_TRUE(utils.IsSuitMeld(utils.CardStringsToCardInts(cards))); + SPIEL_CHECK_FALSE(utils.IsRankMeld(utils.CardStringsToCardInts(cards))); + cards = {"As", "Ac", "Ad"}; + SPIEL_CHECK_TRUE(utils.IsRankMeld(utils.CardStringsToCardInts(cards))); + SPIEL_CHECK_FALSE(utils.IsSuitMeld(utils.CardStringsToCardInts(cards))); + cards = {"As", "Ac", "Ad", "2s"}; + SPIEL_CHECK_FALSE(utils.IsRankMeld(utils.CardStringsToCardInts(cards))); + SPIEL_CHECK_FALSE(utils.IsSuitMeld(utils.CardStringsToCardInts(cards))); + + // No "around the corner" melds + cards = {"As", "2s", "3s", "Ks"}; + SPIEL_CHECK_FALSE(utils.IsRankMeld(utils.CardStringsToCardInts(cards))); + SPIEL_CHECK_FALSE(utils.IsSuitMeld(utils.CardStringsToCardInts(cards))); + + // These cards are represented internally as consecutive ints + // but are not a meld. + cards = {"Js", "Qs", "Ks", "Ac"}; + SPIEL_CHECK_FALSE(utils.IsRankMeld(utils.CardStringsToCardInts(cards))); + SPIEL_CHECK_FALSE(utils.IsSuitMeld(utils.CardStringsToCardInts(cards))); + + // Check that the meld_to_int and int_to_meld maps work correctly. + int meld_id; + cards = {"Ks", "Kc", "Kd", "Kh"}; + meld_id = utils.meld_to_int.at(utils.CardStringsToCardInts(cards)); + SPIEL_CHECK_EQ(meld_id, 64); + SPIEL_CHECK_EQ(utils.meld_to_int.at(utils.int_to_meld.at(64)), 64); + cards = {"As", "2s", "3s"}; + meld_id = utils.meld_to_int.at(utils.CardStringsToCardInts(cards)); + SPIEL_CHECK_EQ(meld_id, 65); + SPIEL_CHECK_EQ(utils.meld_to_int.at(utils.int_to_meld.at(65)), 65); + cards = {"As", "2s", "3s", "4s"}; + meld_id = utils.meld_to_int.at(utils.CardStringsToCardInts(cards)); + SPIEL_CHECK_EQ(meld_id, 109); + SPIEL_CHECK_EQ(utils.meld_to_int.at(utils.int_to_meld.at(109)), 109); + cards = {"As", "2s", "3s", "4s", "5s"}; + meld_id = utils.meld_to_int.at(utils.CardStringsToCardInts(cards)); + SPIEL_CHECK_EQ(meld_id, 149); + SPIEL_CHECK_EQ(utils.meld_to_int.at(utils.int_to_meld.at(149)), 149); + cards = {"9h", "Th", "Jh", "Qh", "Kh"}; + meld_id = utils.meld_to_int.at(utils.CardStringsToCardInts(cards)); + SPIEL_CHECK_EQ(meld_id, 184); + SPIEL_CHECK_EQ(utils.meld_to_int.at(utils.int_to_meld.at(184)), 184); + + // Should find five rank melds and one suit meld. + // +--------------------------+ + // |As2s3s | + // |Ac | + // |Ad | + // |Ah | + // +--------------------------+ + cards = {"As", "Ac", "Ad", "Ah", "2s", "3s"}; + std::vector card_ints = utils.CardStringsToCardInts(cards); + all_melds = utils.AllMelds(card_ints); + SPIEL_CHECK_EQ(all_melds.size(), 6); + + // More complicated example with 14 possible melds. + // +--------------------------+ + // | 4s5s6s | + // | 4c5c6c | + // | 4d5d6d | + // | 4h5h | + // +--------------------------+ + cards = {"4s", "4c", "4d", "4h", "5s", "5c", "5d", "5h", "6s", "6c", "6d"}; + card_ints = utils.CardStringsToCardInts(cards); + all_melds = utils.AllMelds(card_ints); + SPIEL_CHECK_EQ(all_melds.size(), 14); + + // +--------------------------+ + // | 3s4s5s6s | + // | 2c3c4c5c | + // | 4d5d | + // | 4h | + // +--------------------------+ + // Should find the best meld group 4s4d4h, 5s5c5d, 2c3c4c with 3 deadwood. + cards = {"4s", "4c", "4d", "4h", "5s", "5c", "5d", "6s", "2c", "3s", "3c"}; + card_ints = utils.CardStringsToCardInts(cards); + std::vector> meld_group = utils.BestMeldGroup(card_ints); + std::cout << meld_group << std::endl; + for (auto meld : meld_group) + std::cout << utils.CardIntsToCardStrings(meld) << std::endl; + int deadwood = utils.MinDeadwood(card_ints); + SPIEL_CHECK_EQ(deadwood, 3); +} + +// An extremely rare situation, but one that does arise in actual gameplay. +// Tests both layoff and undercut functionality. +void GameplayTest1() { + GameParameters params; + // Modify undercut bonus game parameter as an additional test. + params["undercut_bonus"] = GameParameter(20); + std::shared_ptr game = + open_spiel::LoadGame("gin_rummy", params); + std::unique_ptr state = game->NewInitialState(); + std::vector initial_actions; + initial_actions = {11, 4, 5, 6, 21, 22, 23, 12, 25, 38, 1, 14, + 27, 40, 7, 20, 33, 8, 19, 13, 36, 52, 55, 11}; + for (auto action : initial_actions) state->ApplyAction(action); + std::cout << state->ToString() << std::endl; + // Player turn: 0 + // Phase: Knock + // + // Player1: Deadwood=49 + // +--------------------------+ + // | 2s 8s9s | + // |Ac2c 7c8c | + // | 2d 8d | + // | 2h | + // +--------------------------+ + // + // Stock size: 31 Upcard: XX + // Discard pile: Qs + // + // Player0: Deadwood=87 + // +--------------------------+ + // | 5s6s7s Ks| + // | 9cTcJc Kc| + // | Jd Kd| + // | | + // +--------------------------+ + // + // Player0 has knocked, and after laying melds will have the Jd left for + // 10 points. Player 1 has two melds (2's and 8's) with 17 points remaining. + // Laying the hand this way gives Player0 a win of 7 points. But there's a + // better play! Player1 is not compelled to lay his 8's as a meld. Instead, + // Player1 can lay off the 8s9s on the 5s6s7s, and the 7c8c on the 9cTcJc. + // This leaves Player1 with only the 8d and Ac as deadwood, for a total of + // 9 points, less than the 10 points Player0 knocked with. By breaking the + // meld of 8's Player1 wins an undercut! + + // Player0 lays melds. + state->ApplyAction(119); // KsKcKd + state->ApplyAction(125); // 5s6s7s + state->ApplyAction(140); // 9cTcJc + state->ApplyAction(54); + // Player1 layoffs. + std::vector legal_actions = state->LegalActions(); + SPIEL_CHECK_TRUE(absl::c_linear_search(legal_actions, 7)); + state->ApplyAction(7); // Lay off 8s + legal_actions = state->LegalActions(); + SPIEL_CHECK_TRUE(absl::c_linear_search(legal_actions, 8)); + state->ApplyAction(8); // Lay off 9s + legal_actions = state->LegalActions(); + SPIEL_CHECK_TRUE(absl::c_linear_search(legal_actions, 20)); + state->ApplyAction(20); // Lay off 8c + legal_actions = state->LegalActions(); + SPIEL_CHECK_TRUE(absl::c_linear_search(legal_actions, 19)); + state->ApplyAction(19); // Lay off 7c + state->ApplyAction(54); // Finished layoffs + state->ApplyAction(65); // Lay meld of 2's + state->ApplyAction(54); + // Player1 wins the difference in deadwood (10 - 9 = 1) and the undercut + // bonus which was set to 20, for a total of 21 points. + std::vector returns = state->Returns(); + SPIEL_CHECK_EQ(returns[0], -21); + SPIEL_CHECK_EQ(returns[1], 21); +} + +void GameplayTest2() { + GameParameters params; + std::shared_ptr game = + open_spiel::LoadGame("gin_rummy", params); + std::unique_ptr state = game->NewInitialState(); + std::vector initial_actions; + initial_actions = {1, 4, 5, 6, 17, 18, 19, 30, 31, 32, 2, 3, + 16, 29, 43, 44, 45, 7, 20, 33, 0, 52, 55, 1}; + for (auto action : initial_actions) state->ApplyAction(action); + std::cout << state->ToString() << std::endl; + // Player turn: 0 + // Phase: Knock + // + // Player1: Deadwood=57 + // +--------------------------+ + // | 3s4s 8s | + // | 4c 8c | + // | 4d 8d | + // | 5h6h7h | + // +--------------------------+ + // + // Stock size: 31 Upcard: XX + // Discard pile: + // + // Player0: Deadwood=57 + // +--------------------------+ + // |As 5s6s7s | + // | 5c6c7c | + // | 5d6d7d | + // | | + // +--------------------------+ + // + // Player0 has knocked. There are 6 different melds in Player0's hand. + // Because the melds overlap, the first meld layed dictates the remaining + // melds than can be layed. + // In situations where there is a choice between laying rank melds or suit + // melds, it is often advantageous to lay the hand as rank melds, which offer + // fewer layoffs. Indeed, here Player0 must lay the hand as three rank melds + // to avoid the undercut. + std::vector legal_actions = state->LegalActions(); + SPIEL_CHECK_EQ(legal_actions.size(), 6); + state->ApplyAction(79); // Lay the 5s5c5d + legal_actions = state->LegalActions(); + SPIEL_CHECK_EQ(legal_actions.size(), 2); + state->ApplyAction(84); // Lay the 6s6c6d + legal_actions = state->LegalActions(); + SPIEL_CHECK_EQ(legal_actions.size(), 1); + state->ApplyAction(89); // Lay the 7s7c7d + state->ApplyAction(54); + // Player1 can lay off the 5h6h7h, but there's no need, as it's already + // a meld. + legal_actions = state->LegalActions(); + SPIEL_CHECK_EQ(legal_actions.size(), 4); // 3 layoffs & kPassAction + state->ApplyAction(54); + state->ApplyAction(74); + state->ApplyAction(94); + state->ApplyAction(158); + state->ApplyAction(54); + // Player0 has 1 deadwood and Player1 has 3, so Player0 scores 2 points. + std::vector returns = state->Returns(); + SPIEL_CHECK_EQ(returns[0], 2); + SPIEL_CHECK_EQ(returns[1], -2); +} + +// Potentially tricky corner case. +void GameplayTest3() { + GameParameters params; + std::shared_ptr game = + open_spiel::LoadGame("gin_rummy", params); + std::unique_ptr state = game->NewInitialState(); + std::vector initial_actions; + initial_actions = {10, 11, 12, 22, 35, 48, 13, 26, 1, 40, 9, 8, + 3, 16, 29, 42, 4, 17, 30, 43, 0, 52, 55, 1}; + for (auto action : initial_actions) state->ApplyAction(action); + std::cout << state->ToString() << std::endl; + // Player turn: 0 + // Phase: Knock + // + // Player1: Deadwood=55 + // +--------------------------+ + // | 4s5s 9sTs | + // | 4c5c | + // | 4d5d | + // | 4h5h | + // +--------------------------+ + // + // Stock size: 31 Upcard: XX + // Discard pile: 2s + // + // Player0: Deadwood=65 + // +--------------------------+ + // |As JsQsKs| + // |Ac Tc | + // |Ad Td | + // | 2h Th | + // +--------------------------+ + // + // Player0 has knocked. Player1 will have the opportunity to lay off the Ts. + // We want to make sure that after laying off the Ts, Player1 will then + // be able to lay off the 9s as well. If the Ts only gets counted as + // a layoff on the rank meld of three tens, then the 9s would not lay off. + + // Player0 lays melds + state->ApplyAction(59); + state->ApplyAction(101); + state->ApplyAction(131); + state->ApplyAction(54); + // Player1 lays off the Ts + std::vector legal_actions = state->LegalActions(); + SPIEL_CHECK_TRUE(absl::c_linear_search(legal_actions, 9)); + state->ApplyAction(9); + // Assert Player1 can lay off the 9s + legal_actions = state->LegalActions(); + SPIEL_CHECK_TRUE(absl::c_linear_search(legal_actions, 8)); + state->ApplyAction(8); + // Player1 completes the hand and wins an undercut + state->ApplyAction(54); + state->ApplyAction(75); + state->ApplyAction(80); + state->ApplyAction(54); + std::vector returns = state->Returns(); + SPIEL_CHECK_EQ(returns[0], -27); + SPIEL_CHECK_EQ(returns[1], 27); +} + +// Tests action on the 50th card, and tests that layoffs are not allowed when +// the knocking player has gin. +void WallTest() { + GinRummyUtils utils = GinRummyUtils(kDefaultNumRanks, kDefaultNumSuits, + kDefaultHandSize); + GameParameters params; + std::shared_ptr game = + open_spiel::LoadGame("gin_rummy", params); + std::unique_ptr state = game->NewInitialState(); + std::vector legal_actions; + std::vector initial_actions; + initial_actions = {8, 9, 10, 11, 12, 13, 14, 15, 48, 49, 0, 1, 2, 3, + 4, 5, 6, 7, 50, 51, 16, 54, 54, 53, 17, 17, 53, 18, + 18, 53, 19, 19, 53, 20, 20, 53, 21, 21, 53, 22, 22, 53, + 23, 23, 53, 24, 24, 53, 25, 25, 53, 26, 26, 53, 27, 27, + 53, 28, 28, 53, 29, 29, 53, 30, 30, 53, 31, 31, 53, 32, + 32, 53, 33, 33, 53, 34, 34, 53, 35, 35, 53, 36, 36, 53, + 37, 37, 53, 38, 38, 53, 39, 39, 53, 40, 40, 53, 41, 41, + 53, 42, 42, 53, 43, 43, 53, 44, 44, 53, 46, 49}; + for (auto action : initial_actions) state->ApplyAction(action); + std::cout << state->ToString() << std::endl; + // Player turn: 1 + // Phase: Wall + // + // Player1: Deadwood=20 + // +--------------------------+ + // |As2s3s4s5s6s7s8s | + // | | + // | | + // | QhKh| + // +--------------------------+ + // + // Stock size: 2 Upcard: Jh + // Discard pile: 4c5c6c7c8c9cTcJcQcKcAd2d3d4d5d6d7d8d9dTdJdQdKdAh2h3h4h5h6h + // + // Player0: Deadwood=18 + // +--------------------------+ + // | 9sTsJsQsKs| + // |Ac2c3c | + // | | + // | 8h Th | + // +--------------------------+ + // + // We've reached the wall (i.e. only two cards are left in the stock). + // Player1 is not allowed to draw from the stock, and instead must either pass + // (which ends the game) or knock (if legal). In this case, Player1 can gin. + // First let's make sure the game ends if Player1 passes. + state->ApplyAction(54); + SPIEL_CHECK_TRUE(state->IsTerminal()); + // Now let's reset the state as depicted above and knock instead. + state = game->NewInitialState(); + for (auto action : initial_actions) state->ApplyAction(action); + legal_actions = state->LegalActions(); + SPIEL_CHECK_TRUE(absl::c_linear_search(legal_actions, kKnockAction)); + // Player1 knocks and lays melds. + state->ApplyAction(55); + state->ApplyAction(0); + state->ApplyAction(126); + state->ApplyAction(164); + state->ApplyAction(166); + state->ApplyAction(54); + // Player1 made gin, so Player0 cannot layoff the Th on JhQhKh + legal_actions = state->LegalActions(); + SPIEL_CHECK_FALSE(absl::c_linear_search(legal_actions, utils.CardInt("Th"))); + // Player0 lays melds. + state->ApplyAction(213); + state->ApplyAction(132); + state->ApplyAction(54); + legal_actions = state->LegalActions(); + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(legal_actions.size(), 0); + std::vector returns = state->Returns(); + SPIEL_CHECK_EQ(returns[1], 43); // 25 point gin bonus + 18 deadwood + SPIEL_CHECK_EQ(returns[0], -43); +} + +// The rules of gin rummy do not explicitly prevent infinite action sequences. +// Both players can keep drawing the upcard and discarding indefinitely. This +// is poor strategy and never occurs in actual play, but we need a way of +// ensuring the game is finite. In doing so, we don't want to prematurely +// declare a draw and prevent legitimate lines of play. Our solution is to cap +// the number of times the upcard can be drawn at 50. This is well above +// anything observed in actual play, and corresponds nicely to the 50 cards in +// play each hand. +void MaxGameLengthTest() { + GameParameters params; + std::shared_ptr game = + open_spiel::LoadGame("gin_rummy", params); + std::unique_ptr state = game->NewInitialState(); + std::vector initial_actions; + // Deal hands + initial_actions = {0, 1, 2, 13, 14, 15, 26, 27, 28, 39, 9, + 10, 11, 12, 23, 24, 25, 36, 37, 38, 40}; + // Loop of drawing and discarding. + for (int i = 0; i < 16; ++i) { + initial_actions.push_back(52); + initial_actions.push_back(0); + initial_actions.push_back(52); + initial_actions.push_back(12); + initial_actions.push_back(52); + initial_actions.push_back(1); + } + initial_actions.push_back(52); + initial_actions.push_back(0); + initial_actions.push_back(52); + initial_actions.push_back(12); + // 51st time an upcard is drawn ends the game in a draw. + initial_actions.push_back(52); + for (auto action : initial_actions) { + state->ApplyAction(action); + } + SPIEL_CHECK_TRUE(state->IsTerminal()); + std::vector returns = state->Returns(); + SPIEL_CHECK_EQ(returns[0], 0); + SPIEL_CHECK_EQ(returns[1], 0); +} + +// Tests Oklahoma variation, where the value of the initial upcard determines +// the knock card. Oklahoma is standard in competitive play. It increases the +// skill level as correct strategy changes in response to the knock card. +void OklahomaTest() { + GameParameters params; + params["oklahoma"] = GameParameter(true); + std::shared_ptr game = + open_spiel::LoadGame("gin_rummy", params); + std::unique_ptr state = game->NewInitialState(); + std::vector initial_actions; + initial_actions = {35, 37, 10, 11, 41, 14, 15, 16, 48, 49, 0, 1, + 2, 3, 4, 5, 6, 7, 8, 51, 13, 54, 52}; + for (auto action : initial_actions) { + state->ApplyAction(action); + } + std::cout << state->ToString() << std::endl; + // Player turn: 1 + // Phase: Discard + // + // Player1: Deadwood=3 + // +--------------------------+ + // |As2s3s4s5s6s7s8s | + // |Ac | + // | | + // | 2h Kh| + // +--------------------------+ + // + // Stock size: 31 Upcard: XX + // Discard pile: + // + // Player0: Deadwood=20 + // +--------------------------+ + // | 9sTsJsQsKs| + // | 2c3c4c | + // | | + // | ThJh | + // +--------------------------+ + // + // The initial upcard was the Ac, which was passed by Player0 and taken by + // Player1. Player1 has 1 deadwood, but since we're playing Oklahoma that's + // not low enough for a knock. In this case, the upcard was an ace, so both + // players must play for gin. + + // Assert Player1 cannot knock. + std::vector legal_actions = state->LegalActions(); + SPIEL_CHECK_FALSE(absl::c_linear_search(legal_actions, kKnockAction)); + // Play continues. + state->ApplyAction(51); + state->ApplyAction(53); + state->ApplyAction(26); + state->ApplyAction(26); + state->ApplyAction(52); + std::cout << state->ToString() << std::endl; + // Player turn: 1 + // Phase: Discard + // + // Player1: Deadwood=0 + // +--------------------------+ + // |As2s3s4s5s6s7s8s9s | + // |Ac | + // |Ad | + // | | + // +--------------------------+ + // + // Stock size: 30 Upcard: XX + // Discard pile: Kh + // + // Player0: Deadwood=63 + // +--------------------------+ + // | JsQs | + // | 2c3c4c | + // | Td Qd | + // | 3h ThJh | + // +--------------------------+ + // + // Player1 can now knock with gin. + legal_actions = state->LegalActions(); + SPIEL_CHECK_TRUE(absl::c_linear_search(legal_actions, kKnockAction)); + state->ApplyAction(55); + state->ApplyAction(8); + state->ApplyAction(59); + state->ApplyAction(122); + state->ApplyAction(169); + state->ApplyAction(54); + state->ApplyAction(133); + state->ApplyAction(54); + SPIEL_CHECK_TRUE(state->IsTerminal()); + legal_actions = state->LegalActions(); + SPIEL_CHECK_EQ(legal_actions.size(), 0); + std::vector returns = state->Returns(); + SPIEL_CHECK_EQ(returns[1], 88); // 25 point gin bonus + 63 deadwood + SPIEL_CHECK_EQ(returns[0], -88); +} + +// Basic Observer functionality test. +void ObserverTest() { + GameParameters params; + std::shared_ptr game = + open_spiel::LoadGame("gin_rummy", params); + + std::shared_ptr observer = game->MakeObserver(kDefaultObsType, + params); + Observation observation = Observation(*game, observer); + + std::unique_ptr state = game->NewInitialState(); + std::vector initial_actions; + initial_actions = {1, 4, 5, 6, 17, 18, 19, 30, 31, 32, 2, 3, + 16, 29, 43, 44, 45, 7, 20, 33, 0, 52, 55, 1}; + for (auto action : initial_actions) state->ApplyAction(action); + std::cout << state->ToString() << std::endl; + + for (Player player = 0; player < kNumPlayers; ++player) { + std::cout << observation.StringFrom(*state, player) << std::endl; + observation.SetFrom(*state, player); + std::cout << observation.Tensor() << std::endl; + SPIEL_CHECK_EQ(observation.Tensor(), state->ObservationTensor(player)); + std::cout << state->InformationStateString(player) << std::endl; + } +} + +// TODO(jhtschultz) Add more extensive testing of parameterized deck size. +void DeckSizeTests() { + const int kNumRanks = 10; + const int kNumSuits = 3; + const int kHandSize = 7; + GinRummyUtils utils = GinRummyUtils(kNumRanks, kNumSuits, kHandSize); + std::vector full_deck; + for (int i = 0; i < 30; ++i) full_deck.push_back(i); + std::vector> all_melds = utils.AllMelds(full_deck); + SPIEL_CHECK_EQ(all_melds.size(), 73); // 73 melds in a 10x3 deck. + // Check string representation of hand. + SPIEL_CHECK_EQ(utils.HandToString(full_deck), + "+--------------------+\n" + "|As2s3s4s5s6s7s8s9sTs|\n" + "|Ac2c3c4c5c6c7c8c9cTc|\n" + "|Ad2d3d4d5d6d7d8d9dTd|\n" + "+--------------------+\n"); + // Random sims with 10x3 deck size. + GameParameters params; + params["num_ranks"] = GameParameter(10); + params["num_suits"] = GameParameter(3); + params["hand_size"] = GameParameter(7); + std::shared_ptr game = + open_spiel::LoadGame("gin_rummy", params); + testing::RandomSimTest(*game, 10); +} + +} // namespace +} // namespace gin_rummy +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::gin_rummy::BasicGameTests(); + open_spiel::gin_rummy::MeldTests(); + open_spiel::gin_rummy::GameplayTest1(); + open_spiel::gin_rummy::GameplayTest2(); + open_spiel::gin_rummy::GameplayTest3(); + open_spiel::gin_rummy::MaxGameLengthTest(); + open_spiel::gin_rummy::WallTest(); + open_spiel::gin_rummy::OklahomaTest(); + open_spiel::gin_rummy::ObserverTest(); + open_spiel::gin_rummy::DeckSizeTests(); + std::cout << "Gin rummy tests passed!" << std::endl; +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/gin_rummy/gin_rummy_utils.cc b/scenarios/bargaining/open_spiel/open_spiel/games/gin_rummy/gin_rummy_utils.cc new file mode 100644 index 0000000..5a1685d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/gin_rummy/gin_rummy_utils.cc @@ -0,0 +1,514 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/gin_rummy/gin_rummy_utils.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace gin_rummy { + +GinRummyUtils::GinRummyUtils(int num_ranks, int num_suits, int hand_size) : + num_ranks(num_ranks), + num_suits(num_suits), + num_cards(num_ranks * num_suits), + hand_size(hand_size), + suit_comp(SuitComparator(num_ranks)), + rank_comp(RankComparator(num_ranks)), + int_to_meld(BuildIntToMeldMap()), + meld_to_int(BuildMeldToIntMap()) { +} + +int GinRummyUtils::CardSuit(int card) const { return card / num_ranks; } +int GinRummyUtils::CardRank(int card) const { return card % num_ranks; } + +// All suits are of equal value and suit ordering never factors into gameplay. +constexpr char kRankChar[] = "A23456789TJQK"; +constexpr char kSuitChar[] = "scdh"; + +std::string GinRummyUtils::CardString(absl::optional card) const { + if (!card.has_value()) return "XX"; + SPIEL_CHECK_GE(card.value(), 0); + SPIEL_CHECK_LT(card.value(), num_cards); + return {kRankChar[CardRank(card.value())], kSuitChar[CardSuit(card.value())]}; +} + +int GinRummyUtils::CardInt(std::string card) const { + SPIEL_CHECK_EQ(card.length(), 2); + int rank = strchr(kRankChar, card[0]) - kRankChar; + int suit = strchr(kSuitChar, card[1]) - kSuitChar; + return suit * num_ranks + rank; +} + +std::vector GinRummyUtils::CardIntsToCardStrings( + const VecInt &cards) const { + std::vector rv; + for (int card : cards) { + rv.push_back(CardString(card)); + } + return rv; +} + +VecInt GinRummyUtils::CardStringsToCardInts( + const std::vector &cards) const { + VecInt rv; + for (const std::string &card : cards) { + rv.push_back(CardInt(card)); + } + return rv; +} + +// TODO(jhtschultz) should kHandStringSize depend on deck size? +std::string GinRummyUtils::HandToString(const VecInt &cards) const { + std::string rv; + constexpr int kHandStringSize = 174; + rv.reserve(kHandStringSize); + // Top border + absl::StrAppend(&rv, "+"); + for (int i = 0; i < num_ranks; ++i) + absl::StrAppend(&rv, "--"); + absl::StrAppend(&rv, "+\n"); + // One row for each suit + for (int i = 0; i < num_suits; ++i) { + absl::StrAppend(&rv, "|"); + for (int j = 0; j < num_ranks; ++j) { + if (absl::c_linear_search(cards, (i * num_ranks) + j)) { + absl::StrAppend(&rv, CardString((i * num_ranks) + j)); + } else { + absl::StrAppend(&rv, " "); + } + } + absl::StrAppend(&rv, "|\n"); + } + // Bottom border + absl::StrAppend(&rv, "+"); + for (int i = 0; i < num_ranks; ++i) + absl::StrAppend(&rv, "--"); + absl::StrAppend(&rv, "+\n"); + return rv; +} + +// Ace = 1, deuce = 2, ... , face cards = 10. +int GinRummyUtils::CardValue(int card_index) const { + int value = CardRank(card_index) + 1; + return std::min(10, value); +} + +// Sums point total over all cards. +int GinRummyUtils::TotalCardValue(const VecInt &cards) const { + int total_value = 0; + for (int card : cards) { + total_value += CardValue(card); + } + return total_value; +} + +// Sums point total over all cards. +int GinRummyUtils::TotalCardValue(const VecVecInt &meld_group) const { + int total_value = 0; + for (const auto &meld : meld_group) { + for (auto card : meld) { + total_value += CardValue(card); + } + } + return total_value; +} + +bool GinRummyUtils::IsConsecutive(const VecInt &v) const { + for (int i = 1; i < v.size(); ++i) { + if (v[i] != v[i - 1] + 1) return false; + } + return true; +} + +bool GinRummyUtils::IsRankMeld(const VecInt &cards) const { + if (cards.size() != 3 && cards.size() != 4) { + return false; + } + for (int i = 1; i < cards.size(); ++i) { + if (CardRank(cards[0]) != CardRank(cards[i])) { + return false; + } + } + return true; +} + +bool GinRummyUtils::IsSuitMeld(const VecInt &cards) const { + if (cards.size() < 3) { + return false; + } + // Check all of the same suit. + for (int i = 1; i < cards.size(); ++i) { + if (CardSuit(cards[0]) != CardSuit(cards[i])) { + return false; + } + } + // Check ranks are consecutive. + VecInt ranks; + for (int i = 0; i < cards.size(); ++i) { + ranks.push_back(CardRank(cards[i])); + } + absl::c_sort(ranks); + return IsConsecutive(ranks); +} + +// Returns all possible rank melds that can be formed from cards. +VecVecInt GinRummyUtils::RankMelds(VecInt cards) const { + VecVecInt melds; + if (cards.size() < 3) { + return melds; + } + absl::c_sort(cards, rank_comp); + // First do a sweep for 4 card melds. + for (int i = 0; i < cards.size() - 3; ++i) { + // Found 4 card meld - implies there are four 3 card melds as well. + // We only add two of the 3 card melds here, the other two get added + // during the 3 card meld sweep. + if (CardRank(cards[i]) == CardRank(cards[i + 3])) { + melds.emplace_back(VecInt(cards.begin() + i, cards.begin() + i + 4)); + melds.emplace_back(VecInt{cards[i], cards[i + 1], cards[i + 3]}); + melds.emplace_back(VecInt{cards[i], cards[i + 2], cards[i + 3]}); + } + } + // Sweep for 3 card melds. + for (int i = 0; i < cards.size() - 2; ++i) { + if (CardRank(cards[i]) == CardRank(cards[i + 2])) { + melds.emplace_back(VecInt(cards.begin() + i, cards.begin() + i + 3)); + } + } + return melds; +} + +// Returns all possible suit melds that can be formed from cards. +VecVecInt GinRummyUtils::SuitMelds(VecInt cards) const { + VecVecInt melds; + if (cards.size() < 3) { + return melds; + } + absl::c_sort(cards, suit_comp); + // Find all suit melds of length 5. + if (cards.size() >= 5) { + for (int i = 0; i < cards.size() - 4; ++i) { + if (cards[i] == cards[i + 4] - 4 && + CardSuit(cards[i]) == CardSuit(cards[i + 4])) { + melds.emplace_back(VecInt(cards.begin() + i, cards.begin() + i + 5)); + } + } + } + // Find all suit melds of length 4. + if (cards.size() >= 4) { + for (int i = 0; i < cards.size() - 3; ++i) { + if (cards[i] == cards[i + 3] - 3 && + CardSuit(cards[i]) == CardSuit(cards[i + 3])) { + melds.emplace_back(VecInt(cards.begin() + i, cards.begin() + i + 4)); + } + } + } + // Find all suit melds of length 3. + for (int i = 0; i < cards.size() - 2; ++i) { + if (cards[i] == cards[i + 2] - 2 && + CardSuit(cards[i]) == CardSuit(cards[i + 2])) { + melds.emplace_back(VecInt(cards.begin() + i, cards.begin() + i + 3)); + } + } + return melds; +} + +// Returns all melds of length 5 or less. Any meld of length 6 or more can +// be expressed as two or more melds of shorter length. +VecVecInt GinRummyUtils::AllMelds(const VecInt &cards) const { + VecVecInt rank_melds = RankMelds(cards); + VecVecInt suit_melds = SuitMelds(cards); + rank_melds.insert(rank_melds.end(), suit_melds.begin(), suit_melds.end()); + return rank_melds; +} + +bool GinRummyUtils::VectorsIntersect(VecInt *v1, VecInt *v2) const { + absl::c_sort(*v1); + absl::c_sort(*v2); + VecInt::iterator first1 = v1->begin(); + VecInt::iterator last1 = v1->end(); + VecInt::iterator first2 = v2->begin(); + VecInt::iterator last2 = v2->end(); + + while (first1 != last1 && first2 != last2) { + if (*first1 < *first2) { + ++first1; + } else if (*first2 < *first1) { + ++first2; + } else { + return true; + } + } + return false; +} + +// Returns melds which do not share any common cards with given meld. +VecVecInt GinRummyUtils::NonOverlappingMelds(VecInt *meld, + VecVecInt *melds) const { + VecVecInt rv; + for (int i = 0; i < melds->size(); ++i) { + if (!VectorsIntersect(meld, &(*melds)[i])) { + rv.push_back((*melds)[i]); + } + } + return rv; +} + +// Depth first search used by AllMeldGroups. +void GinRummyUtils::AllPaths(VecInt *meld, VecVecInt *all_melds, + VecVecInt *path, VecVecVecInt *all_paths) const { + path->push_back(*meld); + VecVecInt child_melds = NonOverlappingMelds(meld, all_melds); + if (child_melds.empty()) { + all_paths->push_back(*path); + } else { + for (auto child_meld : child_melds) { + AllPaths(&child_meld, &child_melds, path, all_paths); + } + } + path->pop_back(); +} + +// A meld group is an arrangement of cards into distinct melds. +// Accordingly, no two melds in a meld group can share the same card. +VecVecVecInt GinRummyUtils::AllMeldGroups(const VecInt &cards) const { + VecVecInt all_melds = AllMelds(cards); + VecVecVecInt all_meld_groups; + for (VecInt meld : all_melds) { + VecVecInt path; + AllPaths(&meld, &all_melds, &path, &all_meld_groups); + } + return all_meld_groups; +} + +// "Best" means any meld group that achieves the lowest possible deadwood +// count for the given cards. In general this is non-unique. +VecVecInt GinRummyUtils::BestMeldGroup(const VecInt &cards) const { + int best_meld_group_total_value = 0; + VecVecInt best_meld_group; + VecVecVecInt all_meld_groups = AllMeldGroups(cards); + for (const auto &meld_group : all_meld_groups) { + int meld_group_total_value = TotalCardValue(meld_group); + if (meld_group_total_value > best_meld_group_total_value) { + best_meld_group_total_value = meld_group_total_value; + best_meld_group = meld_group; + } + } + return best_meld_group; +} + +// Minimum deadwood count over all meld groups. +int GinRummyUtils::MinDeadwood(VecInt hand, absl::optional card) const { + if (card.has_value()) hand.push_back(card.value()); + return MinDeadwood(hand); +} + +// Minimum deadwood count over all meld groups. +int GinRummyUtils::MinDeadwood(const VecInt &hand) const { + VecInt deadwood = hand; + VecVecInt best_melds = BestMeldGroup(hand); + + for (const auto &meld : best_melds) { + for (auto card : meld) { + deadwood.erase(std::remove(deadwood.begin(), deadwood.end(), card), + deadwood.end()); + } + } + // If we have just drawn a card, we can discard the one worth the most points. + if (hand.size() == hand_size + 1 && !deadwood.empty()) { + absl::c_sort(deadwood, rank_comp); + deadwood.pop_back(); + } + int deadwood_total = 0; + for (int card : deadwood) deadwood_total += CardValue(card); + return deadwood_total; +} + +// Returns the unique card that can be layed off on a given 3-card rank meld. +int GinRummyUtils::RankMeldLayoff(const VecInt &meld) const { + SPIEL_CHECK_EQ(meld.size(), 3); + SPIEL_CHECK_TRUE(IsRankMeld(meld)); + VecInt suits = {0, 1, 2, 3}; + for (int card : meld) { + suits.erase(std::remove(suits.begin(), suits.end(), CardSuit(card)), + suits.end()); + } + return CardRank(meld[0]) + suits[0] * num_ranks; +} + +// Suit melds have two layoffs, except if the meld ends in an ace or king. +VecInt GinRummyUtils::SuitMeldLayoffs(const VecInt &meld) const { + VecInt layoffs; + int min_card_index = *std::min_element(meld.begin(), meld.end()); + if (CardRank(min_card_index) > 0) { + layoffs.push_back(min_card_index - 1); + } + int max_card_index = *std::max_element(meld.begin(), meld.end()); + if (CardRank(max_card_index) < num_ranks - 1) { + layoffs.push_back(max_card_index + 1); + } + return layoffs; +} + +// Finds melds which can be layed legally given a knock card. +// Consider 6s7s8s, 6c7c8c, 8s8c8d. Laying 8s8c8d prevents us from using +// the 6's and 7's in melds, leaving us with 26 points. Laying the two suit +// melds leaves only the 8d for 8 points. +// Returns vector of meld_ids (see MeldToInt). +VecInt GinRummyUtils::LegalMelds(const VecInt &hand, int knock_card) const { + int total_hand_value = TotalCardValue(hand); + std::set meld_set; + VecInt hand_(hand); + VecVecVecInt all_meld_groups = AllMeldGroups(hand_); + for (const auto &meld_group : all_meld_groups) { + int meld_group_total_value = TotalCardValue(meld_group); + if (total_hand_value - meld_group_total_value <= knock_card) { + for (const auto &meld : meld_group) { + meld_set.insert(meld_to_int.at(meld)); + } + } + } + return VecInt(meld_set.begin(), meld_set.end()); +} + +// Returns the legal discards when a player has knocked. Normally a player can +// discard any card in their hand. When a player knocks, however, they must +// discard a card that preseves the ability to arrange the hand so that the +// total deadwood is less than the knock card. +VecInt GinRummyUtils::LegalDiscards(const VecInt &hand, int knock_card) const { + std::set legal_discards; + for (int i = 0; i < hand.size(); ++i) { + VecInt hand_(hand); + hand_.erase(hand_.begin() + i); + int deadwood = MinDeadwood(hand_); + if (deadwood <= knock_card) { + legal_discards.insert(hand[i]); + } + } + return VecInt(legal_discards.begin(), legal_discards.end()); +} + +VecInt GinRummyUtils::AllLayoffs(const VecInt &layed_melds, + const VecInt &previous_layoffs) const { + std::set layoffs; + for (int meld_id : layed_melds) { + VecInt meld = int_to_meld.at(meld_id); + if (IsRankMeld(meld) && meld.size() == 3) { + layoffs.insert(RankMeldLayoff(meld)); + } else if (IsSuitMeld(meld)) { + VecInt suit_layoffs = SuitMeldLayoffs(meld); + for (int card : previous_layoffs) { + if (absl::c_linear_search(suit_layoffs, card)) { + meld.push_back(card); + } + } + suit_layoffs = SuitMeldLayoffs(meld); + for (int card : suit_layoffs) { + layoffs.insert(card); + } + } + } + return VecInt(layoffs.begin(), layoffs.end()); +} + +// This mapping should not depend on the order of melds returned by +// AllMelds, which is subject to change. +// See MeldToInt for a description of the mapping. +std::map GinRummyUtils::BuildMeldToIntMap() const { + std::map rv; + VecInt full_deck; + for (int i = 0; i < num_cards; ++i) full_deck.push_back(i); + VecVecInt all_melds = AllMelds(full_deck); + for (int i = 0; i < all_melds.size(); ++i) { + int meld_id = MeldToInt(all_melds[i]); + rv.insert(std::pair(all_melds[i], meld_id)); + } + return rv; +} + +// Builds the reverse map [0, 185] -> meld. +// May not be fast but only gets run once. +std::map GinRummyUtils::BuildIntToMeldMap() const { + const int kNumCards = 52; + std::map rv; + VecInt full_deck; + for (int i = 0; i < kNumCards; ++i) full_deck.push_back(i); + VecVecInt all_melds = AllMelds(full_deck); + for (int i = 0; i < all_melds.size(); ++i) { + for (const auto &meld : all_melds) { + if (MeldToInt(meld) == i) { + rv.insert(std::pair(i, meld)); + break; + } + } + } + return rv; +} + +// Defines a mapping from melds to ints. +// There are 185 distinct melds in total, 65 rank melds and 120 suit melds. +// Rank melds are ordered by ascending rank. For each rank, there are 5 melds. +// The four melds of size 3 are ordered by the suit of the card missing from +// the meld (i.e. 2c2d2h precedes 2s2h2d because the 2s, missing from the first +// meld, precedes the 2c, missing from the second). +// The fifth rank meld is the unique meld containing all four cards of a +// given rank. +// Suit melds are ordered first by size, then by suit (scdh), then by rank. +int GinRummyUtils::MeldToInt(VecInt meld) const { + const int kNumRanks = 13; + const int kNumSuits = 4; + if (IsRankMeld(meld)) { + if (meld.size() == 3) { + VecInt suits; + for (int i = 0; i < kNumSuits; ++i) suits.push_back(i); + for (int card : meld) { + suits.erase(std::remove(suits.begin(), suits.end(), CardSuit(card)), + suits.end()); + } + return (CardRank(meld[0]) * 5) + suits[0]; + } else if (meld.size() == 4) { + return (CardRank(meld[0]) * 5) + 4; + } + SpielFatalError("Impossible meld size"); + } else if (IsSuitMeld(meld)) { + absl::c_sort(meld, rank_comp); + int offset = 65; // 65 rank melds + if (meld.size() == 3) { + return offset + (CardSuit(meld[0]) * (kNumRanks - 2)) + + CardRank(meld[0]); + } + offset += 44; // 44 suit melds of size three + if (meld.size() == 4) { + return offset + (CardSuit(meld[0]) * (kNumRanks - 3)) + + CardRank(meld[0]); + } + offset += 40; // 40 suit melds of size four + if (meld.size() == 5) { + return offset + (CardSuit(meld[0]) * (kNumRanks - 4)) + + CardRank(meld[0]); + } + SpielFatalError("Impossible meld size"); + } else { + SpielFatalError("Not a meld"); + } +} + +} // namespace gin_rummy +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/gin_rummy/gin_rummy_utils.h b/scenarios/bargaining/open_spiel/open_spiel/games/gin_rummy/gin_rummy_utils.h new file mode 100644 index 0000000..af3e070 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/gin_rummy/gin_rummy_utils.h @@ -0,0 +1,125 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_GIN_RUMMY_UTILS_H_ +#define OPEN_SPIEL_GAMES_GIN_RUMMY_UTILS_H_ + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" + +namespace open_spiel { +namespace gin_rummy { + +using VecInt = std::vector; +using VecVecInt = std::vector>; +using VecVecVecInt = std::vector>>; + +struct SuitComparator { + explicit SuitComparator(const int num_ranks) : num_ranks(num_ranks) {} + int CardSuit(int card) { return card / num_ranks; } + bool operator()(int card_1, int card_2) { + if (CardSuit(card_1) == CardSuit(card_2)) { + return card_1 < card_2; + } + return CardSuit(card_1) < CardSuit(card_2); + } + const int num_ranks; +}; + +struct RankComparator { + explicit RankComparator(const int num_ranks) : num_ranks(num_ranks) {} + int CardRank(int card) { return card % num_ranks; } + bool operator()(int card_1, int card_2) { + if (CardRank(card_1) == CardRank(card_2)) { + return card_1 < card_2; + } + return CardRank(card_1) < CardRank(card_2); + } + const int num_ranks; +}; + + +struct GinRummyUtils { + GinRummyUtils(int num_ranks, int num_suits, int hand_size); + + const int num_ranks; + const int num_suits; + const int num_cards; + const int hand_size; + + const SuitComparator suit_comp; + const RankComparator rank_comp; + + // This mapping is independent of changes to num_ranks and num_suits. + const std::map int_to_meld; + const std::map meld_to_int; + + std::string CardString(absl::optional card) const; + std::string HandToString(const VecInt &cards) const; + + int CardInt(std::string card) const; + + std::vector CardIntsToCardStrings(const VecInt &cards) const; + VecInt CardStringsToCardInts(const std::vector &cards) const; + + int CardValue(int card_index) const; + int TotalCardValue(const VecInt &cards) const; + int TotalCardValue(const VecVecInt &meld_group) const; + int CardRank(const int card_index) const; + int CardSuit(const int card_index) const; + + bool IsConsecutive(const VecInt &v) const; + bool IsRankMeld(const VecInt &cards) const; + bool IsSuitMeld(const VecInt &cards) const; + + VecVecInt RankMelds(VecInt cards) const; + VecVecInt SuitMelds(VecInt cards) const; + VecVecInt AllMelds(const VecInt &cards) const; + + bool VectorsIntersect(VecInt *v1, VecInt *v2) const; + + VecVecInt NonOverlappingMelds(VecInt *meld, VecVecInt *melds) const; + + void AllPaths(VecInt *meld, VecVecInt *all_melds, VecVecInt *path, + VecVecVecInt *all_paths) const; + + VecVecVecInt AllMeldGroups(const VecInt &cards) const; + + VecVecInt BestMeldGroup(const VecInt &cards) const; + + int MinDeadwood(VecInt hand, absl::optional card) const; + int MinDeadwood(const VecInt &hand) const; + + int RankMeldLayoff(const VecInt &meld) const; + VecInt SuitMeldLayoffs(const VecInt &meld) const; + + VecInt LegalMelds(const VecInt &hand, int knock_card) const; + VecInt LegalDiscards(const VecInt &hand, int knock_card) const; + + VecInt AllLayoffs(const VecInt &layed_melds, + const VecInt &previous_layoffs) const; + + int MeldToInt(VecInt meld) const; + + std::map BuildMeldToIntMap() const; + std::map BuildIntToMeldMap() const; +}; + +} // namespace gin_rummy +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_GIN_RUMMY_UTILS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/go/go.cc b/scenarios/bargaining/open_spiel/open_spiel/games/go/go.cc new file mode 100644 index 0000000..becbbe4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/go/go.cc @@ -0,0 +1,244 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/go/go.h" + +#include + +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/go/go_board.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace go { +namespace { + +// Facts about the game +const GameType kGameType{ + /*short_name=*/"go", + /*long_name=*/"Go", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"komi", GameParameter(7.5)}, + {"board_size", GameParameter(19)}, + {"handicap", GameParameter(0)}, + // After the maximum game length, the game will end arbitrarily and the + // score is computed as usual (i.e. number of stones + komi). + // It's advised to only use shorter games to compute win-rates. + // When not provided, it defaults to DefaultMaxGameLength(board_size) + {"max_game_length", + GameParameter(GameParameter::Type::kInt, /*is_mandatory=*/false)}}, +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new GoGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +std::vector HandicapStones(int num_handicap) { + if (num_handicap < 2 || num_handicap > 9) return {}; + + static std::array placement = { + {MakePoint("d4"), MakePoint("q16"), MakePoint("d16"), MakePoint("q4"), + MakePoint("d10"), MakePoint("q10"), MakePoint("k4"), MakePoint("k16"), + MakePoint("k10")}}; + static VirtualPoint center = MakePoint("k10"); + + std::vector points; + points.reserve(num_handicap); + for (int i = 0; i < num_handicap; ++i) { + points.push_back(placement[i]); + } + + if (num_handicap >= 5 && num_handicap % 2 == 1) { + points[num_handicap - 1] = center; + } + + return points; +} + +} // namespace + +GoState::GoState(std::shared_ptr game, int board_size, float komi, + int handicap) + : State(std::move(game)), + board_(board_size), + komi_(komi), + handicap_(handicap), + max_game_length_(game_->MaxGameLength()), + to_play_(GoColor::kBlack) { + ResetBoard(); +} + +std::string GoState::InformationStateString(int player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string GoState::ObservationString(int player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void GoState::ObservationTensor(int player, absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + int num_cells = board_.board_size() * board_.board_size(); + SPIEL_CHECK_EQ(values.size(), num_cells * (CellStates() + 1)); + std::fill(values.begin(), values.end(), 0.); + + // Add planes: black, white, empty. + int cell = 0; + for (VirtualPoint p : BoardPoints(board_.board_size())) { + int color_val = static_cast(board_.PointColor(p)); + values[num_cells * color_val + cell] = 1.0; + ++cell; + } + SPIEL_CHECK_EQ(cell, num_cells); + + // Add a fourth binary plane for komi (whether white is to play). + std::fill(values.begin() + (CellStates() * num_cells), values.end(), + (to_play_ == GoColor::kWhite ? 1.0 : 0.0)); +} + +std::vector GoState::LegalActions() const { + std::vector actions{}; + if (IsTerminal()) return actions; + for (VirtualPoint p : BoardPoints(board_.board_size())) { + if (board_.IsLegalMove(p, to_play_)) { + actions.push_back(board_.VirtualActionToAction(p)); + } + } + actions.push_back(board_.pass_action()); + return actions; +} + +std::string GoState::ActionToString(Player player, Action action) const { + return absl::StrCat( + GoColorToString(static_cast(player)), " ", + VirtualPointToString(board_.ActionToVirtualAction(action))); +} + +std::string GoState::ToString() const { + std::stringstream ss; + ss << "GoState(komi=" << komi_ << ", to_play=" << GoColorToString(to_play_) + << ", history.size()=" << history_.size() << ")\n"; + ss << board_; + return ss.str(); +} + +bool GoState::IsTerminal() const { + if (history_.size() < 2) return false; + return (history_.size() >= max_game_length_) || superko_ || + (history_[history_.size() - 1].action == board_.pass_action() && + history_[history_.size() - 2].action == board_.pass_action()); +} + +std::vector GoState::Returns() const { + if (!IsTerminal()) return {0.0, 0.0}; + + if (superko_) { + // Superko rules (https://senseis.xmp.net/?Superko) are complex and vary + // between rulesets. + // For simplicity and because superkos are very rare, we just treat them as + // a draw. + return {DrawUtility(), DrawUtility()}; + } + + // Score with Tromp-Taylor. + float black_score = TrompTaylorScore(board_, komi_, handicap_); + + std::vector returns(go::NumPlayers()); + if (black_score > 0) { + returns[ColorToPlayer(GoColor::kBlack)] = WinUtility(); + returns[ColorToPlayer(GoColor::kWhite)] = LossUtility(); + } else if (black_score < 0) { + returns[ColorToPlayer(GoColor::kBlack)] = LossUtility(); + returns[ColorToPlayer(GoColor::kWhite)] = WinUtility(); + } else { + returns[ColorToPlayer(GoColor::kBlack)] = DrawUtility(); + returns[ColorToPlayer(GoColor::kWhite)] = DrawUtility(); + } + return returns; +} + +std::unique_ptr GoState::Clone() const { + return std::unique_ptr(new GoState(*this)); +} + +void GoState::UndoAction(Player player, Action action) { + // We don't have direct undo functionality, but copying the board and + // replaying all actions is still pretty fast (> 1 million undos/second). + history_.pop_back(); + --move_number_; + ResetBoard(); + for (auto [_, action] : history_) { + DoApplyAction(action); + } +} + +void GoState::DoApplyAction(Action action) { + SPIEL_CHECK_TRUE( + board_.PlayMove(board_.ActionToVirtualAction(action), to_play_)); + to_play_ = OppColor(to_play_); + + bool was_inserted = repetitions_.insert(board_.HashValue()).second; + if (!was_inserted && action != board_.pass_action()) { + // We have encountered this position before. + superko_ = true; + } +} + +void GoState::ResetBoard() { + board_.Clear(); + if (handicap_ < 2) { + to_play_ = GoColor::kBlack; + } else { + for (VirtualPoint p : HandicapStones(handicap_)) { + board_.PlayMove(p, GoColor::kBlack); + } + to_play_ = GoColor::kWhite; + } + + repetitions_.clear(); + repetitions_.insert(board_.HashValue()); + superko_ = false; +} + +GoGame::GoGame(const GameParameters& params) + : Game(kGameType, params), + komi_(ParameterValue("komi")), + board_size_(ParameterValue("board_size")), + handicap_(ParameterValue("handicap")), + max_game_length_(ParameterValue( + "max_game_length", DefaultMaxGameLength(board_size_))) {} + +} // namespace go +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/go/go.h b/scenarios/bargaining/open_spiel/open_spiel/games/go/go.h new file mode 100644 index 0000000..b58b855 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/go/go.h @@ -0,0 +1,170 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_GO_H_ +#define OPEN_SPIEL_GAMES_GO_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/games/go/go_board.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// Game of Go: +// https://en.wikipedia.org/wiki/Go_(game) +// +// Parameters: +// "komi" float compensation for white (default = 7.5) +// "board_size" int rows of the board, usually 9, 13 or 19 (default = 19) +// "handicap" int number of handicap stones for black (default = 0) + +namespace open_spiel { +namespace go { + +// Constants. +inline constexpr int NumPlayers() { return 2; } +inline constexpr double LossUtility() { return -1; } +inline constexpr double WinUtility() { return 1; } +inline constexpr int CellStates() { return 3; } // Black, white, empty. + +// Go can only end in a draw when using a round komi. +// We also treat superko as a draw. +inline constexpr double DrawUtility() { return 0; } + +// All actions must be in [0; NumDistinctActions). +inline int NumDistinctActions(int board_size) { + return board_size * board_size + 1; +} + +// In theory Go games have no length limit, but we limit them to twice the +// number of points on the board for practicality - only random games last +// this long. This value can also be overriden when creating the game. +inline int DefaultMaxGameLength(int board_size) { + return board_size * board_size * 2; +} + +inline int ColorToPlayer(GoColor c) { return static_cast(c); } +inline GoColor PlayerToColor(Player p) { return static_cast(p); } + +// State of an in-play game. +// Actions are contiguous from 0 to board_size * board_size - 1, row-major, i.e. +// the (row, col) action is encoded as row * board_size + col. +// The pass action is board_size * board_size. +class GoState : public State { + public: + // Constructs a Go state for the empty board. + GoState(std::shared_ptr game, int board_size, float komi, + int handicap); + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : ColorToPlayer(to_play_); + } + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + + bool IsTerminal() const override; + + std::string InformationStateString(int player) const override; + std::string ObservationString(int player) const override; + + // Four planes: black, white, empty, and a bias plane of bits indicating komi + // (whether white is to play). + void ObservationTensor(int player, absl::Span values) const override; + + std::vector Returns() const override; + + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action action) override; + + const GoBoard& board() const { return board_; } + + protected: + void DoApplyAction(Action action) override; + + private: + void ResetBoard(); + + GoBoard board_; + + // RepetitionTable records which positions we have already encountered. + // We are already indexing by board hash, so there is no need to hash that + // hash again, so we use a custom passthrough hasher. + class PassthroughHash { + public: + std::size_t operator()(uint64_t x) const { + return static_cast(x); + } + }; + using RepetitionTable = std::unordered_set; + RepetitionTable repetitions_; + + const float komi_; + const int handicap_; + const int max_game_length_; + GoColor to_play_; + bool superko_; +}; + +// Game object. +class GoGame : public Game { + public: + explicit GoGame(const GameParameters& params); + + int NumDistinctActions() const override { + return go::NumDistinctActions(board_size_); + } + + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new GoState(shared_from_this(), board_size_, komi_, handicap_)); + } + + std::vector ObservationTensorShape() const override { + // Planes: black, white, empty, and a bias plane indicating komi (whether + // white is to play). + return {CellStates() + 1, board_size_, board_size_}; + } + + TensorLayout ObservationTensorLayout() const override { + return TensorLayout::kCHW; + } + + int NumPlayers() const override { return go::NumPlayers(); } + + double MinUtility() const override { return LossUtility(); } + absl::optional UtilitySum() const override { + return LossUtility() + WinUtility(); + } + double MaxUtility() const override { return WinUtility(); } + + int MaxGameLength() const override { return max_game_length_; } + + private: + const float komi_; + const int board_size_; + const int handicap_; + const int max_game_length_; +}; + +} // namespace go +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_GO_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/go/go_board.cc b/scenarios/bargaining/open_spiel/open_spiel/games/go/go_board.cc new file mode 100644 index 0000000..24d2530 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/go/go_board.cc @@ -0,0 +1,710 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/go/go_board.h" + +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/games/chess/chess_common.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace go { + +namespace { + +// 8 adjacent directions. +// +// 405 +// 1 2 +// 637 +// +// The order is important because it is used to index 3x3 patterns! +// +inline constexpr std::array Dir8 = {{ + kVirtualBoardSize, // new line + -1, // new line + +1, // new line + -static_cast(kVirtualBoardSize), + +static_cast(kVirtualBoardSize) - 1, + +static_cast(kVirtualBoardSize) + 1, + -static_cast(kVirtualBoardSize) - 1, + -static_cast(kVirtualBoardSize) + 1, + 0 // Dummy element. +}}; + +// Calls f for all 4 direct neighbours of p. +// f should have type void f(VirtualPoint n), but is passed as a template so we +// can elide the function call overhead. +template +void Neighbours(VirtualPoint p, const F& f) { + f(p + kVirtualBoardSize); + f(p + 1); + f(p - 1); + f(p - kVirtualBoardSize); +} + +std::vector MakeBoardPoints(int board_size) { + std::vector points; + points.reserve(board_size * board_size); + for (int row = 0; row < board_size; ++row) { + for (int col = 0; col < board_size; ++col) { + points.push_back(VirtualPointFrom2DPoint({row, col})); + } + } + return points; +} + +template +const std::vector& GetBoardPoints() { + static std::vector points = MakeBoardPoints(board_size); + return points; +} + +char GoColorToChar(GoColor c) { + switch (c) { + case GoColor::kBlack: + return 'X'; + case GoColor::kWhite: + return 'O'; + case GoColor::kEmpty: + return '+'; + case GoColor::kGuard: + return '#'; + default: + SpielFatalError(absl::StrCat("Unknown color ", c, " in GoColorToChar.")); + return '!'; + } +} + +std::string MoveAsAscii(VirtualPoint p, GoColor c) { + static std::string code = "0123456789abcdefghijklmnopqrstuvwxyz"; + static int mask = 31; + // 1 bit for color, 9 bits for the point. + uint16_t value = static_cast(c) | (p << 1); + // Encode in 2 characters of 5 bit each. + std::string encoded; + encoded.push_back(code[(value >> 5) & mask]); + encoded.push_back(code[value & mask]); + return encoded; +} + +} // namespace + +Neighbours4::Neighbours4(const VirtualPoint p) + : dir_(static_cast(0)), p_(p) {} + +Neighbours4& Neighbours4::operator++() { + ++dir_; + return *this; +} + +const VirtualPoint Neighbours4::operator*() const { return p_ + Dir8[dir_]; } + +Neighbours4::operator bool() const { return dir_ < 4; } + +std::pair VirtualPointTo2DPoint(VirtualPoint p) { + if (p == kInvalidPoint || p == kVirtualPass) return std::make_pair(-1, -1); + + const int row = static_cast(p) / kVirtualBoardSize; + const int col = static_cast(p) % kVirtualBoardSize; + return std::make_pair(row - 1, col - 1); +} + +VirtualPoint VirtualPointFrom2DPoint(std::pair row_col) { + return static_cast((row_col.first + 1) * kVirtualBoardSize + + row_col.second + 1); +} + +// Internally, the board is *always* 21*21 with a border of guard stones around +// all sides of the board. Thus we need to map a coordinate in that space +// to a coordinate in the normal board. +Action VirtualActionToAction(int virtual_action, int board_size) { + if (virtual_action == kVirtualPass) return board_size * board_size; + const int virtual_row = static_cast(virtual_action) / kVirtualBoardSize; + const int virtual_col = static_cast(virtual_action) % kVirtualBoardSize; + return board_size * (virtual_row - 1) + (virtual_col - 1); +} + +int ActionToVirtualAction(Action action, int board_size) { + if (action == board_size * board_size) return kVirtualPass; + int row = action / board_size; + int column = action % board_size; + return (row + 1) * kVirtualBoardSize + (column + 1); +} + +const std::vector& BoardPoints(int board_size) { +#define CASE_GET_POINTS(n) \ + case n: \ + return GetBoardPoints() + + switch (board_size) { + CASE_GET_POINTS(2); + CASE_GET_POINTS(3); + CASE_GET_POINTS(4); + CASE_GET_POINTS(5); + CASE_GET_POINTS(6); + CASE_GET_POINTS(7); + CASE_GET_POINTS(8); + CASE_GET_POINTS(9); + CASE_GET_POINTS(10); + CASE_GET_POINTS(11); + CASE_GET_POINTS(12); + CASE_GET_POINTS(13); + CASE_GET_POINTS(14); + CASE_GET_POINTS(15); + CASE_GET_POINTS(16); + CASE_GET_POINTS(17); + CASE_GET_POINTS(18); + CASE_GET_POINTS(19); + default: + SpielFatalError("unsupported board size"); + } + +#undef CASE_GET_POINTS +} + +GoColor OppColor(GoColor c) { + switch (c) { + case GoColor::kBlack: + return GoColor::kWhite; + case GoColor::kWhite: + return GoColor::kBlack; + case GoColor::kEmpty: + case GoColor::kGuard: + return c; + default: + SpielFatalError(absl::StrCat("Unknown color ", c, " in OppColor.")); + return c; + } +} + +std::ostream& operator<<(std::ostream& os, GoColor c) { + return os << GoColorToString(c); +} + +std::string GoColorToString(GoColor c) { + switch (c) { + case GoColor::kBlack: + return "B"; + case GoColor::kWhite: + return "W"; + case GoColor::kEmpty: + return "EMPTY"; + case GoColor::kGuard: + return "GUARD"; + default: + SpielFatalError( + absl::StrCat("Unknown color ", c, " in GoColorToString.")); + return "This will never return."; + } +} + +std::ostream& operator<<(std::ostream& os, VirtualPoint p) { + return os << VirtualPointToString(p); +} + +std::string VirtualPointToString(VirtualPoint p) { + switch (p) { + case kInvalidPoint: + return "INVALID_POINT"; + case kVirtualPass: + return "PASS"; + default: { + auto row_col = VirtualPointTo2DPoint(p); + char col = 'a' + row_col.second; + if (col >= 'i') ++col; // Go / SGF labeling skips 'i'. + return absl::StrCat(std::string(1, col), row_col.first + 1); + } + } +} + +VirtualPoint MakePoint(std::string s) { + std::transform(s.begin(), s.end(), s.begin(), ::tolower); + + if (s == "pass") return kVirtualPass; + if (s.size() < 2 || s.size() > 3) return kInvalidPoint; + + int col = s[0] < 'i' ? s[0] - 'a' : s[0] - 'a' - 1; + int row = s[1] - '0'; + if (s.size() == 3) { + row *= 10; + row += s[2] - '0'; + } + return VirtualPointFrom2DPoint({row - 1, col}); +} + +GoBoard::GoBoard(int board_size) + : board_size_(board_size), pass_action_(board_size * board_size) { + if (board_size_ > 19) { + SpielFatalError( + absl::StrCat("The current Go implementation supports board size up to " + "19. Provided: ", + board_size)); + } + Clear(); +} + +void GoBoard::Clear() { + zobrist_hash_ = 0; + + for (int i = 0; i < board_.size(); ++i) { + Vertex& v = board_[i]; + v.color = GoColor::kGuard; + v.chain_head = static_cast(i); + v.chain_next = static_cast(i); + chains_[i].reset_border(); + } + + for (VirtualPoint p : BoardPoints(board_size_)) { + board_[p].color = GoColor::kEmpty; + chains_[p].reset(); + } + + for (VirtualPoint p : BoardPoints(board_size_)) { + Neighbours(p, [this, p](VirtualPoint n) { + if (IsEmpty(n)) chain(p).add_liberty(n); + }); + } + + for (int i = 0; i < last_captures_.size(); ++i) { + last_captures_[i] = kInvalidPoint; + } + + last_ko_point_ = kInvalidPoint; +} + +bool GoBoard::PlayMove(VirtualPoint p, GoColor c) { + if (p == kVirtualPass) { + last_ko_point_ = kInvalidPoint; + return true; + } + + if (board_[p].color != GoColor::kEmpty) { + SpielFatalError(absl::StrCat("Trying to play the move ", GoColorToString(c), + ": ", VirtualPointToString(p), " (", p, + ") but the cell is already filled with ", + GoColorToString(board_[p].color))); + } + SPIEL_CHECK_EQ(GoColor::kEmpty, board_[p].color); + + // Preparation for ko checking. + bool played_in_enemy_eye = true; + Neighbours(p, [this, c, &played_in_enemy_eye](VirtualPoint n) { + GoColor s = PointColor(n); + if (s == c || s == GoColor::kEmpty) { + played_in_enemy_eye = false; + } + }); + + JoinChainsAround(p, c); + SetStone(p, c); + RemoveLibertyFromNeighbouringChains(p); + int stones_captured = CaptureDeadChains(p, c); + + if (played_in_enemy_eye && stones_captured == 1) { + last_ko_point_ = last_captures_[0]; + } else { + last_ko_point_ = kInvalidPoint; + } + + SPIEL_CHECK_GT(chain(p).num_pseudo_liberties, 0); + + return true; +} + +VirtualPoint GoBoard::SingleLiberty(VirtualPoint p) const { + VirtualPoint head = ChainHead(p); + VirtualPoint liberty = chain(p).single_liberty(); + + // Check it is really a liberty. + SPIEL_CHECK_TRUE(IsInBoardArea(liberty)); + SPIEL_CHECK_TRUE(IsEmpty(liberty)); + + // Make sure the liberty actually borders the group. + for (auto n = Neighbours4(liberty); n; ++n) { + if (ChainHead(*n) == head) return liberty; + } + + SpielFatalError( + absl::StrCat("liberty", liberty, " does not actually border group ", p)); +} + +void GoBoard::SetStone(VirtualPoint p, GoColor c) { + static const chess_common::ZobristTable + zobrist_values( + /*seed=*/2765481); + + zobrist_hash_ ^= zobrist_values[p][static_cast( + c == GoColor::kEmpty ? PointColor(p) : c)]; + + board_[p].color = c; +} + +// Combines the groups around the newly placed stone at vertex. If no groups +// are available for joining, the new stone is placed as a new group. +void GoBoard::JoinChainsAround(VirtualPoint p, GoColor c) { + VirtualPoint largest_chain_head = kInvalidPoint; + int largest_chain_size = 0; + Neighbours( + p, [this, c, &largest_chain_head, &largest_chain_size](VirtualPoint n) { + if (PointColor(n) == c) { + Chain& c = chain(n); + if (c.num_stones > largest_chain_size) { + largest_chain_size = c.num_stones; + largest_chain_head = ChainHead(n); + } + } + }); + if (largest_chain_size == 0) { + InitNewChain(p); + return; + } + + Neighbours(p, [this, c, &largest_chain_head](VirtualPoint n) { + if (PointColor(n) == c) { + VirtualPoint chain_head = ChainHead(n); + if (chain_head != largest_chain_head) { + chain(largest_chain_head).merge(chain(n)); + + // Set all stones in the smaller string to be part of the larger + // chain. + VirtualPoint cur = n; + do { + board_[cur].chain_head = largest_chain_head; + cur = board_[cur].chain_next; + } while (cur != n); + + // Connect the 2 linked lists representing the stones in the two + // chains. + std::swap(board_[largest_chain_head].chain_next, board_[n].chain_next); + } + } + }); + + board_[p].chain_next = board_[largest_chain_head].chain_next; + board_[largest_chain_head].chain_next = p; + board_[p].chain_head = largest_chain_head; + chain(largest_chain_head).num_stones += 1; + + Neighbours(p, [this, largest_chain_head](VirtualPoint n) { + if (IsEmpty(n)) { + chain(largest_chain_head).add_liberty(n); + } + }); +} + +void GoBoard::RemoveLibertyFromNeighbouringChains(VirtualPoint p) { + Neighbours(p, [this, p](VirtualPoint n) { chain(n).remove_liberty(p); }); +} + +int GoBoard::CaptureDeadChains(VirtualPoint p, GoColor c) { + int stones_captured = 0; + int capture_index = 0; + Neighbours(p, [this, c, &capture_index, &stones_captured](VirtualPoint n) { + if (PointColor(n) == OppColor(c) && chain(n).num_pseudo_liberties == 0) { + last_captures_[capture_index++] = ChainHead(n); + stones_captured += chain(n).num_stones; + RemoveChain(n); + } + }); + + for (; capture_index < last_captures_.size(); ++capture_index) { + last_captures_[capture_index] = kInvalidPoint; + } + + return stones_captured; +} + +void GoBoard::RemoveChain(VirtualPoint p) { + VirtualPoint this_chain_head = ChainHead(p); + VirtualPoint cur = p; + do { + VirtualPoint next = board_[cur].chain_next; + + SetStone(cur, GoColor::kEmpty); + InitNewChain(cur); + + Neighbours(cur, [this, this_chain_head, cur](VirtualPoint n) { + if (ChainHead(n) != this_chain_head || IsEmpty(n)) { + chain(n).add_liberty(cur); + } + }); + + cur = next; + } while (cur != p); +} + +void GoBoard::InitNewChain(VirtualPoint p) { + board_[p].chain_head = p; + board_[p].chain_next = p; + + Chain& c = chain(p); + c.reset(); + c.num_stones += 1; + + Neighbours(p, [this, &c](VirtualPoint n) { + if (IsEmpty(n)) { + c.add_liberty(n); + } + }); +} + +bool GoBoard::IsInBoardArea(VirtualPoint p) const { + auto rc = VirtualPointTo2DPoint(p); + return rc.first >= 0 && rc.first < board_size() && rc.second >= 0 && + rc.second < board_size(); +} + +bool GoBoard::IsLegalMove(VirtualPoint p, GoColor c) const { + if (p == kVirtualPass) return true; + if (!IsInBoardArea(p)) return false; + if (!IsEmpty(p) || p == LastKoPoint()) return false; + if (chain(p).num_pseudo_liberties > 0) return true; + + // For all checks below, the newly placed stone is completely surrounded by + // enemy and friendly stones. + + // Allow to play if the placed stones connects to a group that still has at + // least one other liberty after connecting. + bool has_liberty = false; + Neighbours(p, [this, c, &has_liberty](VirtualPoint n) { + has_liberty |= (PointColor(n) == c && !chain(n).in_atari()); + }); + if (has_liberty) return true; + + // Allow to play if the placed stone will kill at least one group. + bool kills_group = false; + Neighbours(p, [this, c, &kills_group](VirtualPoint n) { + kills_group |= (PointColor(n) == OppColor(c) && chain(n).in_atari()); + }); + if (kills_group) return true; + + return false; +} + +void GoBoard::Chain::reset_border() { + num_stones = 0; + // Need to have values big enough that they can never go below 0 even if + // all liberties are removed. + num_pseudo_liberties = 4; + liberty_vertex_sum = 32768; + liberty_vertex_sum_squared = 2147483648; +} + +void GoBoard::Chain::reset() { + num_stones = 0; + num_pseudo_liberties = 0; + liberty_vertex_sum = 0; + liberty_vertex_sum_squared = 0; +} + +void GoBoard::Chain::merge(const Chain& other) { + num_stones += other.num_stones; + num_pseudo_liberties += other.num_pseudo_liberties; + liberty_vertex_sum += other.liberty_vertex_sum; + liberty_vertex_sum_squared += other.liberty_vertex_sum_squared; +} + +void GoBoard::Chain::add_liberty(VirtualPoint p) { + num_pseudo_liberties += 1; + liberty_vertex_sum += p; + liberty_vertex_sum_squared += + static_cast(p) * static_cast(p); +} + +void GoBoard::Chain::remove_liberty(VirtualPoint p) { + num_pseudo_liberties -= 1; + liberty_vertex_sum -= p; + liberty_vertex_sum_squared -= + static_cast(p) * static_cast(p); +} + +VirtualPoint GoBoard::Chain::single_liberty() const { + SPIEL_CHECK_TRUE(in_atari()); + // A point is in Atari if it has only a single liberty, i.e. all pseudo + // liberties are for the same point. + // This is true exactly when + // liberty_vertex_sum**2 == liberty_vertex_sum_squared * num_pseudo_liberties + // Since all pseudo liberties are for the same point, this is equivalent to + // (taking n = num_pseudo_liberties): + // (n * p)**2 = (n * p**2) * n + // Thus to obtain p, we simple need to divide out the number of pseudo + // liberties. + SPIEL_CHECK_EQ(liberty_vertex_sum % num_pseudo_liberties, 0); + return static_cast(liberty_vertex_sum / num_pseudo_liberties); +} + +std::string GoBoard::ToString() { + std::ostringstream stream; + stream << *this; + return stream.str(); +} + +std::ostream& operator<<(std::ostream& os, const GoBoard& board) { + os << "\n"; + for (int row = board.board_size() - 1; row >= 0; --row) { + os << std::setw(2) << std::setfill(' ') << (row + 1) << " "; + for (int col = 0; col < board.board_size(); ++col) { + os << GoColorToChar( + board.PointColor(VirtualPointFrom2DPoint({row, col}))); + } + os << std::endl; + } + + std::string columns = "ABCDEFGHJKLMNOPQRST"; + os << " " << columns.substr(0, board.board_size()) << std::endl; + + // Encode the stones and print a URL that can be used to view the board. + std::string encoded; + for (VirtualPoint p : BoardPoints(board.board_size())) { + if (!board.IsEmpty(p)) { + encoded += MoveAsAscii(p, board.PointColor(p)); + } + } + + // TODO(author9): Make this a public URL. + // os << "http://jumper/goboard/" << encoded << "&size=" << board.board_size() + // << std::endl; + + return os; +} + +void GoBoard::GroupIter::step() { + --lib_i_; + while (lib_i_ < 0 && !marked_[chain_cur_]) { + Neighbours(chain_cur_, [this](VirtualPoint n) { + VirtualPoint head = board_->ChainHead(n); + if (board_->PointColor(head) == group_color_ && !marked_[head]) { + cur_libs_[++lib_i_] = head; + marked_[head] = true; + } + }); + marked_[chain_cur_] = true; + chain_cur_ = board_->board_[chain_cur_].chain_next; + } +} + +// Returns the number of points surrounded entirely by one color. +// Aborts early and returns 0 if the area borders both black and white stones. +int NumSurroundedPoints(const GoBoard& board, const VirtualPoint p, + std::array* marked, + bool* reached_black, bool* reached_white) { + if ((*marked)[p]) return 0; + (*marked)[p] = true; + + int num_points = 1; + Neighbours(p, [&board, &num_points, marked, reached_black, + reached_white](VirtualPoint n) { + switch (board.PointColor(n)) { + case GoColor::kBlack: + *reached_black = true; + break; + case GoColor::kWhite: + *reached_white = true; + break; + case GoColor::kEmpty: + num_points += + NumSurroundedPoints(board, n, marked, reached_black, reached_white); + break; + case GoColor::kGuard: + // Ignore the border. + break; + } + }); + + return num_points; +} + +float TrompTaylorScore(const GoBoard& board, float komi, int handicap) { + // The delta of how many points on the board black and white have occupied, + // from black's point of view, i.e. Black points - White points. + int occupied_delta = 0; + + // We need to keep track of which empty points we've already counted as part + // of a larger territory. + std::array marked; + marked.fill(false); + + for (VirtualPoint p : BoardPoints(board.board_size())) { + switch (board.PointColor(p)) { + case GoColor::kBlack: + ++occupied_delta; + break; + case GoColor::kWhite: + --occupied_delta; + break; + case GoColor::kEmpty: { + if (marked[p]) continue; + // If some empty points are surrounded entirely by one player, they + // count as that player's territory. + bool reached_black = false, reached_white = false; + int n = NumSurroundedPoints(board, p, &marked, &reached_black, + &reached_white); + if (reached_black && !reached_white) { + occupied_delta += n; + } else if (!reached_black && reached_white) { + occupied_delta -= n; + } + break; + } + case GoColor::kGuard: + SpielFatalError("unexpected color"); + } + } + + float score = occupied_delta - komi; + if (handicap >= 2) { + score -= handicap; + } + return score; +} + +GoBoard CreateBoard(const std::string& initial_stones) { + GoBoard board(19); + + int row = 0; + for (const auto& line : absl::StrSplit(initial_stones, '\n')) { + int col = 0; + bool stones_started = false; + for (const auto& c : line) { + if (c == ' ') { + if (stones_started) { + SpielFatalError( + "Whitespace is only allowed at the start of " + "the line. To represent empty intersections, " + "use +"); + } + continue; + } else if (c == 'X') { + stones_started = true; + SPIEL_CHECK_TRUE(board.PlayMove(VirtualPointFrom2DPoint({row, col}), + GoColor::kBlack)); + } else if (c == 'O') { + stones_started = true; + SPIEL_CHECK_TRUE(board.PlayMove(VirtualPointFrom2DPoint({row, col}), + GoColor::kWhite)); + } + col++; + } + row++; + } + + return board; +} + +} // namespace go +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/go/go_board.h b/scenarios/bargaining/open_spiel/open_spiel/games/go/go_board.h new file mode 100644 index 0000000..bf4df34 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/go/go_board.h @@ -0,0 +1,291 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_GO_GO_BOARD_H_ +#define OPEN_SPIEL_GAMES_GO_GO_BOARD_H_ + +#include +#include +#include +#include + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace go { + +enum class GoColor : uint8_t { kBlack = 0, kWhite = 1, kEmpty = 2, kGuard = 3 }; + +std::string GoColorToString(GoColor c); + +std::ostream &operator<<(std::ostream &os, GoColor c); + +GoColor OppColor(GoColor c); + +// For simplicity and speed, we store the board in terms of a "virtual board", +// with a border of guard stones around all sides of the board. +// This allows us to skip bounds checking. +// In Virtual mode, an action (row, col) is row * 21 + col, and pass is 21*21+1. +// All functions in this file (except stated otherwise) use these virtual +// coordinates. +// +// However, in the OpenSpiel API (in go.{h, cc}), the actions are still exposed +// as actions within 0, board_size*boardsize) (with pass = board_size **2. +// +// We support boards up to size 19. +inline constexpr int kMaxBoardSize = 19; +inline constexpr int kVirtualBoardSize = kMaxBoardSize + 2; +inline constexpr int kVirtualBoardPoints = + kVirtualBoardSize * kVirtualBoardSize; + +using VirtualPoint = uint16_t; + +inline constexpr VirtualPoint kInvalidPoint = 0; +inline constexpr VirtualPoint kVirtualPass = kVirtualBoardPoints + 1; + +// Returns the VirtualPoint corresponding to the provided coordinates, e.g. "d4" +// or "f10". +VirtualPoint MakePoint(std::string s); + +// Converts a VirtualPoint to a string representation. +std::string VirtualPointToString(VirtualPoint p); + +std::ostream &operator<<(std::ostream &os, VirtualPoint p); + +// Conversion functions between VirtualPoint and row/column representation. +std::pair VirtualPointTo2DPoint(VirtualPoint p); +// Returns the point identifier in the Virtual 21*21 board from the (row, col) +// 0-index coordinate in the concrete board. +VirtualPoint VirtualPointFrom2DPoint(std::pair row_col); + +// Converts an OpenSpiel action in range [0, board_size **2] to the +// Virtual board range [0, kVirtualPass], and vice-versa. +Action VirtualActionToAction(int virtual_action, int board_size); +int ActionToVirtualAction(Action action, int board_size); + +inline std::string GoActionToString(Action action, int board_size) { + return VirtualPointToString(ActionToVirtualAction(action, board_size)); +} + +// Returns a reference to a vector that contains all points that are on a board +// of the specified size. +const std::vector &BoardPoints(int board_size); + +// To iterate over 4 neighbouring points, do +// +// VirtualPoint point; +// for (auto p = Neighbours4(point); p; ++p) { +// // Do something on p.. +// } +// +class Neighbours4 { + public: + explicit Neighbours4(const VirtualPoint p); + + Neighbours4 &operator++(); + const VirtualPoint operator*() const; + explicit operator bool() const; + + private: + VirtualPoint dir_; + const VirtualPoint p_; +}; + +// Simple Go board that is optimized for speed. +// It only implements the minimum of functionality necessary to support the +// search and is optimized for speed and size. Importantly, it fits on the +// stack. For detailed numbers, run the benchmarks in go_board_test. +class GoBoard { + public: + explicit GoBoard(int board_size); + + void Clear(); + + inline int board_size() const { return board_size_; } + // Returns the concrete pass action. + inline int pass_action() const { return pass_action_; } + inline Action VirtualActionToAction(int virtual_action) const { + return go::VirtualActionToAction(virtual_action, board_size_); + } + inline int ActionToVirtualAction(Action action) const { + return go::ActionToVirtualAction(action, board_size_); + } + + inline GoColor PointColor(VirtualPoint p) const { return board_[p].color; } + + inline bool IsEmpty(VirtualPoint p) const { + return PointColor(p) == GoColor::kEmpty; + } + + bool IsInBoardArea(VirtualPoint p) const; + + bool IsLegalMove(VirtualPoint p, GoColor c) const; + + bool PlayMove(VirtualPoint p, GoColor c); + + // kInvalidPoint if there is no ko, otherwise the point of the ko. + inline VirtualPoint LastKoPoint() const { return last_ko_point_; } + + // Count of pseudo-liberties, i.e. each liberty is counted between 1 and 4 + // times, once for each stone of the group that borders it. + // This is much faster than realLiberty(), so prefer it if possible. + inline int PseudoLiberty(VirtualPoint p) const { + return chain(p).num_pseudo_liberties == 0 + ? 0 + : (chain(p).in_atari() ? 1 : chain(p).num_pseudo_liberties); + } + + inline bool InAtari(VirtualPoint p) const { return chain(p).in_atari(); } + + // If a chain has a single liberty (it is in Atari), return that liberty. + VirtualPoint SingleLiberty(VirtualPoint p) const; + + // Actual liberty count, i.e. each liberty is counted exactly once. + // This is computed on the fly by actually walking the group and checking the + // neighbouring stones. + inline int RealLiberty(VirtualPoint p) const { + int num_lib = 0; + for (auto it = LibIter(p); it; ++it) { + ++num_lib; + } + return num_lib; + } + + inline uint64_t HashValue() const { return zobrist_hash_; } + + // Head of a chain; each chain has exactly one head that can be used to + // uniquely identify it. Chain heads may change over successive PlayMove()s. + inline VirtualPoint ChainHead(VirtualPoint p) const { + return board_[p].chain_head; + } + + // Number of stones in a chain. + inline int ChainSize(VirtualPoint p) const { return chain(p).num_stones; } + + std::string ToString(); + + class GroupIter { + public: + GroupIter(const GoBoard *board, VirtualPoint p, GoColor group_color) + : board_(board), lib_i_(0), group_color_(group_color) { + marked_.fill(false); + chain_head_ = board->ChainHead(p); + chain_cur_ = chain_head_; + step(); + } + + inline explicit operator bool() const { return lib_i_ >= 0; } + + inline VirtualPoint operator*() const { return cur_libs_[lib_i_]; } + + GroupIter &operator++() { + step(); + return *this; + } + + private: + void step(); + + const GoBoard *board_; + + std::array marked_; + std::array cur_libs_; + int lib_i_; + VirtualPoint chain_head_; + VirtualPoint chain_cur_; + GoColor group_color_; + }; + + GroupIter LibIter(VirtualPoint p) const { + return GroupIter(this, p, GoColor::kEmpty); + } + GroupIter OppIter(VirtualPoint p) const { + return GroupIter(this, p, OppColor(PointColor(p))); + } + + private: + void JoinChainsAround(VirtualPoint p, GoColor c); + void SetStone(VirtualPoint p, GoColor c); + void RemoveLibertyFromNeighbouringChains(VirtualPoint p); + int CaptureDeadChains(VirtualPoint p, GoColor c); + void RemoveChain(VirtualPoint p); + void InitNewChain(VirtualPoint p); + + struct Vertex { + VirtualPoint chain_head; + VirtualPoint chain_next; + GoColor color; + }; + + struct Chain { + uint32_t liberty_vertex_sum_squared; + uint16_t liberty_vertex_sum; + uint16_t num_stones; + uint16_t num_pseudo_liberties; + + void reset(); + void reset_border(); + void merge(const Chain &other); + + inline bool in_atari() const { + return static_cast(num_pseudo_liberties) * + liberty_vertex_sum_squared == + static_cast(liberty_vertex_sum) * + static_cast(liberty_vertex_sum); + } + void add_liberty(VirtualPoint p); + void remove_liberty(VirtualPoint p); + VirtualPoint single_liberty() const; + }; + + Chain &chain(VirtualPoint p) { return chains_[ChainHead(p)]; } + const Chain &chain(VirtualPoint p) const { return chains_[ChainHead(p)]; } + + std::array board_; + std::array chains_; + + uint64_t zobrist_hash_; + + // Chains captured in the last move, kInvalidPoint otherwise. + std::array last_captures_; + + int board_size_; + int pass_action_; + + VirtualPoint last_ko_point_; +}; + +std::ostream &operator<<(std::ostream &os, const GoBoard &board); + +// Score according to https://senseis.xmp.net/?TrompTaylorRules. +float TrompTaylorScore(const GoBoard &board, float komi, int handicap = 0); + +// Generates a go board from the given string, setting X to black stones and O +// to white stones. The first character of the first line is mapped to A1, the +// second character to B1, etc, as below: +// ABCDEFGH +// 1 ++++XO++ +// 2 XXXXXO++ +// 3 OOOOOO++ +// 4 ++++++++ +// The board will always be 19x19. +// This exists mostly for test purposes. +// WARNING: This coordinate system is different from the representation in +// GoBoard in which A1 is at the bottom left. +GoBoard CreateBoard(const std::string &initial_stones); + +} // namespace go +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_GO_GO_BOARD_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/go/go_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/go/go_test.cc new file mode 100644 index 0000000..fc4529a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/go/go_test.cc @@ -0,0 +1,74 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/go/go.h" + +#include "open_spiel/games/go/go_board.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace go { +namespace { + +namespace testing = open_spiel::testing; + +constexpr int kBoardSize = 19; +constexpr float kKomi = 7.5; + +void BasicGoTests() { + GameParameters params; + params["board_size"] = GameParameter(13); + + testing::LoadGameTest("go"); + testing::NoChanceOutcomesTest(*LoadGame("go")); + testing::RandomSimTest(*LoadGame("go", params), 3); + testing::RandomSimTestWithUndo(*LoadGame("go", params), 3); +} + +void HandicapTest() { + std::shared_ptr game = + LoadGame("go", {{"board_size", open_spiel::GameParameter(kBoardSize)}, + {"komi", open_spiel::GameParameter(kKomi)}, + {"handicap", open_spiel::GameParameter(2)}}); + GoState state(game, kBoardSize, kKomi, 2); + SPIEL_CHECK_EQ(state.CurrentPlayer(), ColorToPlayer(GoColor::kWhite)); + SPIEL_CHECK_EQ(state.board().PointColor(MakePoint("d4")), GoColor::kBlack); + SPIEL_CHECK_EQ(state.board().PointColor(MakePoint("q16")), GoColor::kBlack); +} + +void ConcreteActionsAreUsedInTheAPI() { + int board_size = 13; + std::shared_ptr game = + LoadGame("go", {{"board_size", open_spiel::GameParameter(board_size)}}); + std::unique_ptr state = game->NewInitialState(); + + SPIEL_CHECK_EQ(state->NumDistinctActions(), board_size * board_size + 1); + SPIEL_CHECK_EQ(state->LegalActions().size(), state->NumDistinctActions()); + for (Action action : state->LegalActions()) { + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LE(action, board_size * board_size); + } +} + +} // namespace +} // namespace go +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::go::BasicGoTests(); + open_spiel::go::HandicapTest(); + open_spiel::go::ConcreteActionsAreUsedInTheAPI(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/goofspiel/goofspiel.cc b/scenarios/bargaining/open_spiel/open_spiel/games/goofspiel/goofspiel.cc new file mode 100644 index 0000000..87e6e81 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/goofspiel/goofspiel.cc @@ -0,0 +1,852 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/goofspiel/goofspiel.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace goofspiel { +namespace { + +const GameType kGameType{ + /*short_name=*/"goofspiel", + /*long_name=*/"Goofspiel", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/10, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + {"imp_info", GameParameter(kDefaultImpInfo)}, + {"egocentric", GameParameter(kDefaultEgocentric)}, + {"num_cards", GameParameter(kDefaultNumCards)}, + {"num_turns", GameParameter(kDefaultNumTurns)}, + {"players", GameParameter(kDefaultNumPlayers)}, + {"points_order", + GameParameter(static_cast(kDefaultPointsOrder))}, + {"returns_type", + GameParameter(static_cast(kDefaultReturnsType))}, + }, + /*default_loadable=*/true, + /*provides_factored_observation_string=*/true}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new GoofspielGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +PointsOrder ParsePointsOrder(const std::string& po_str) { + if (po_str == "random") { + return PointsOrder::kRandom; + } else if (po_str == "descending") { + return PointsOrder::kDescending; + } else if (po_str == "ascending") { + return PointsOrder::kAscending; + } else { + SpielFatalError( + absl::StrCat("Unrecognized points_order parameter: ", po_str)); + } +} + +ReturnsType ParseReturnsType(const std::string& returns_type_str) { + if (returns_type_str == "win_loss") { + return ReturnsType::kWinLoss; + } else if (returns_type_str == "point_difference") { + return ReturnsType::kPointDifference; + } else if (returns_type_str == "total_points") { + return ReturnsType::kTotalPoints; + } else { + SpielFatalError(absl::StrCat("Unrecognized returns_type parameter: ", + returns_type_str)); + } +} + +} // namespace + +class GoofspielObserver : public Observer { + public: + explicit GoofspielObserver(IIGObservationType iig_obs_type, bool egocentric) + : Observer(/*has_string=*/true, /*has_tensor=*/true), + iig_obs_type_(iig_obs_type), + egocentric_(egocentric) {} + + void WriteTensor(const State& observed_state, int player, + Allocator* allocator) const override { + const GoofspielState& state = + open_spiel::down_cast(observed_state); + const GoofspielGame& game = + open_spiel::down_cast(*state.GetGame()); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, game.NumPlayers()); + + // List all predicates. + const bool imp_info = game.IsImpInfo(); + const bool pub_info = iig_obs_type_.public_info; + const bool perf_rec = iig_obs_type_.perfect_recall; + const bool priv_one = + iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer; + + // Conditionally write each field. + if (pub_info && !perf_rec) { + WriteCurrentPointCard(game, state, allocator); + WriteRemainingPointCards(game, state, allocator); + } + if (pub_info) WritePointsTotal(game, state, player, allocator); + if (imp_info && priv_one) WritePlayerHand(game, state, player, allocator); + if (imp_info && pub_info) WriteWinSequence(game, state, player, allocator); + if (pub_info && perf_rec) WritePointCardSequence(game, state, allocator); + if (imp_info && perf_rec && priv_one) + WritePlayerActionSequence(game, state, player, allocator); + if (!imp_info && pub_info) + WriteAllPlayersHands(game, state, player, allocator); + } + + std::string StringFrom(const State& observed_state, + int player) const override { + const GoofspielState& state = + open_spiel::down_cast(observed_state); + const GoofspielGame& game = + open_spiel::down_cast(*state.GetGame()); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, game.NumPlayers()); + std::string result; + + // List all predicates. + const bool imp_info = game.IsImpInfo(); + const bool pub_info = iig_obs_type_.public_info; + const bool perf_rec = iig_obs_type_.perfect_recall; + const bool priv_one = + iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer; + + // Conditionally write each field. + // This is done in a backwards-compatible way. + if (imp_info && priv_one && perf_rec) { // InformationState + StringPlayerHand(game, state, player, &result); + StringActionSequence(game, state, player, &result); + StringPointCardSequence(state, &result); + StringWinSequence(state, &result); + StringPoints(game, state, &result); + StringIsTerminal(state, &result); + return result; + } + if (imp_info && priv_one && !perf_rec) { // Observation + StringCurrentPointCard(state, &result); + StringRemainingPointCards(state, &result); + StringPoints(game, state, &result); + StringPlayerHand(game, state, player, &result); + StringWinSequence(state, &result); + return result; + } + + // Remaining public observation requests. + if (pub_info && perf_rec) StringPointCardSequence(state, &result); + if (pub_info && !perf_rec) { + StringCurrentPointCard(state, &result); + StringRemainingPointCards(state, &result); + } + if (pub_info && !imp_info) StringPlayersHands(game, state, &result); + if (pub_info) { + StringWinSequence(state, &result); + StringPoints(game, state, &result); + } + return result; + } + + private: + // Point totals: one-hot vector encoding points, per player. + // Writes this public information from the perspective + // of the requesting player. + void WritePointsTotal(const GoofspielGame& game, const GoofspielState& state, + int player, Allocator* allocator) const { + auto out = allocator->Get("point_totals", + {game.NumPlayers(), game.MaxPointSlots()}); + Player p = player; + for (int n = 0; n < game.NumPlayers(); state.NextPlayer(&n, &p)) { + out.at(n, state.points_[p]) = 1.0; + } + } + + // Bit vectors encoding all players' hands. + // Writes this public information from the perspective + // of the requesting player. + void WriteAllPlayersHands(const GoofspielGame& game, + const GoofspielState& state, int player, + Allocator* allocator) const { + auto out = + allocator->Get("player_hands", {game.NumPlayers(), game.NumCards()}); + Player p = player; + for (int n = 0; n < game.NumPlayers(); state.NextPlayer(&n, &p)) { + for (int c = 0; c < game.NumCards(); ++c) { + out.at(n, c) = state.player_hands_[p][c]; + } + } + } + + // Sequence of who won each trick. + void WriteWinSequence(const GoofspielGame& game, const GoofspielState& state, + int player, Allocator* allocator) const { + auto out = + allocator->Get("win_sequence", {game.NumRounds(), game.NumPlayers()}); + for (int i = 0; i < state.win_sequence_.size(); ++i) { + if (state.win_sequence_[i] != kInvalidPlayer) { + int one_hot = state.win_sequence_[i]; + if (egocentric_) { + // Positive, relative distance to the winner. + one_hot = ((game.NumPlayers() + state.win_sequence_[i] - player) % + game.NumPlayers()); + } + out.at(i, one_hot) = 1.0; + } + } + } + + void WriteRemainingPointCards(const GoofspielGame& game, + const GoofspielState& state, + Allocator* allocator) const { + auto out = allocator->Get("remaining_point_cards", {game.NumCards()}); + std::set played(state.point_card_sequence_.begin(), + state.point_card_sequence_.end()); + for (int i = 0; i < state.num_cards_; ++i) { + if (played.count(i) == 0) out.at(i) = 1.0; + } + } + + void WritePointCardSequence(const GoofspielGame& game, + const GoofspielState& state, + Allocator* allocator) const { + auto out = allocator->Get("point_card_sequence", + {game.NumRounds(), game.NumCards()}); + for (int round = 0; round < state.point_card_sequence_.size(); ++round) { + out.at(round, state.point_card_sequence_[round]) = 1.0; + } + } + + void WriteCurrentPointCard(const GoofspielGame& game, + const GoofspielState& state, + Allocator* allocator) const { + auto out = allocator->Get("current_point_card", {game.NumCards()}); + if (!state.point_card_sequence_.empty()) + out.at(state.point_card_sequence_.back()) = 1.0; + } + + // Bit vector of observing player's hand. + void WritePlayerHand(const GoofspielGame& game, const GoofspielState& state, + int player, Allocator* allocator) const { + auto out = allocator->Get("player_hand", {game.NumCards()}); + for (int c = 0; c < game.NumCards(); ++c) { + out.at(c) = state.player_hands_[player][c]; + } + } + + // The observing player's action sequence. + void WritePlayerActionSequence(const GoofspielGame& game, + const GoofspielState& state, int player, + Allocator* allocator) const { + auto out = allocator->Get("player_action_sequence", + {game.NumRounds(), game.NumCards()}); + for (int round = 0; round < state.actions_history_.size(); ++round) { + out.at(round, state.actions_history_[round][player]) = 1.0; + } + } + + void StringPlayerHand(const GoofspielGame& game, const GoofspielState& state, + int player, std::string* result) const { + // Only my hand + absl::StrAppend(result, "P", player, " hand: "); + for (int c = 0; c < game.NumCards(); ++c) { + if (state.player_hands_[player][c]) absl::StrAppend(result, c + 1, " "); + } + absl::StrAppend(result, "\n"); + } + + void StringActionSequence(const GoofspielGame& game, + const GoofspielState& state, int player, + std::string* result) const { + // Also show the player's sequence. We need this to ensure perfect + // recall because two betting sequences can lead to the same hand and + // outcomes if the opponent chooses differently. + absl::StrAppend(result, "P", player, " action sequence: "); + for (int i = 0; i < state.actions_history_.size(); ++i) { + absl::StrAppend(result, state.actions_history_[i][player], " "); + } + absl::StrAppend(result, "\n"); + } + void StringPointCardSequence(const GoofspielState& state, + std::string* result) const { + absl::StrAppend(result, "Point card sequence: "); + for (int i = 0; i < state.point_card_sequence_.size(); ++i) { + absl::StrAppend(result, 1 + state.point_card_sequence_[i], " "); + } + absl::StrAppend(result, "\n"); + } + void StringRemainingPointCards(const GoofspielState& state, + std::string* result) const { + std::set played(state.point_card_sequence_.begin(), + state.point_card_sequence_.end()); + absl::StrAppend(result, "Remaining Point Cards: "); + for (int i = 0; i < state.num_cards_; ++i) { + if (played.count(i) == 0) absl::StrAppend(result, 1 + i); + } + absl::StrAppend(result, "\n"); + } + void StringCurrentPointCard(const GoofspielState& state, + std::string* result) const { + absl::StrAppend(result, "Current point card: ", state.CurrentPointValue(), + "\n"); + } + void StringPlayersHands(const GoofspielGame& game, + const GoofspielState& state, + std::string* result) const { + // Show the hands in the perfect info case. + for (auto p = Player{0}; p < game.NumPlayers(); ++p) { + absl::StrAppend(result, "P", p, " hand: "); + for (int c = 0; c < game.NumCards(); ++c) { + if (state.player_hands_[p][c]) absl::StrAppend(result, c + 1, " "); + } + absl::StrAppend(result, "\n"); + } + } + void StringWinSequence(const GoofspielState& state, + std::string* result) const { + absl::StrAppend(result, "Win sequence: "); + for (int i = 0; i < state.win_sequence_.size(); ++i) { + absl::StrAppend(result, state.win_sequence_[i], " "); + } + absl::StrAppend(result, "\n"); + } + void StringPoints(const GoofspielGame& game, const GoofspielState& state, + std::string* result) const { + absl::StrAppend(result, "Points: "); + for (auto p = Player{0}; p < game.NumPlayers(); ++p) { + absl::StrAppend(result, state.points_[p], " "); + } + absl::StrAppend(result, "\n"); + } + void StringIsTerminal(const GoofspielState& state, + std::string* result) const { + absl::StrAppend(result, "Terminal?: ", state.IsTerminal(), "\n"); + } + + IIGObservationType iig_obs_type_; + const bool egocentric_; +}; + +GoofspielState::GoofspielState(std::shared_ptr game, int num_cards, + int num_turns, PointsOrder points_order, + bool impinfo, bool egocentric, + ReturnsType returns_type) + : SimMoveState(game), + num_cards_(num_cards), + num_turns_(num_turns), + points_order_(points_order), + returns_type_(returns_type), + impinfo_(impinfo), + egocentric_(egocentric), + current_player_(kInvalidPlayer), + winners_({}), + current_turn_(0), + point_card_(-1), + point_card_sequence_({}), + win_sequence_({}), + actions_history_({}) { + SPIEL_CHECK_LE(num_turns_, num_cards_); + + // Points and point-card deck. + points_.resize(num_players_); + std::fill(points_.begin(), points_.end(), 0); + + // Player hands. + player_hands_.clear(); + for (auto p = Player{0}; p < num_players_; ++p) { + std::vector hand(num_cards_, true); + player_hands_.push_back(hand); + } + + // Set the points card index. + if (points_order_ == PointsOrder::kRandom) { + point_card_ = -1; + current_player_ = kChancePlayerId; + } else if (points_order_ == PointsOrder::kAscending) { + DealPointCard(0); + current_player_ = kSimultaneousPlayerId; + } else if (points_order_ == PointsOrder::kDescending) { + DealPointCard(num_cards - 1); + current_player_ = kSimultaneousPlayerId; + } +} + +int GoofspielState::CurrentPlayer() const { return current_player_; } + +void GoofspielState::DealPointCard(int point_card) { + SPIEL_CHECK_GE(point_card, 0); + SPIEL_CHECK_LT(point_card, num_cards_); + point_card_ = point_card; + point_card_sequence_.push_back(point_card); +} + +void GoofspielState::DoApplyAction(Action action_id) { + if (IsSimultaneousNode()) { + ApplyFlatJointAction(action_id); + return; + } + SPIEL_CHECK_TRUE(IsChanceNode()); + DealPointCard(action_id); + current_player_ = kSimultaneousPlayerId; +} + +void GoofspielState::DoApplyActions(const std::vector& actions) { + // Check the actions are valid. + SPIEL_CHECK_EQ(actions.size(), num_players_); + for (auto p = Player{0}; p < num_players_; ++p) { + const int action = actions[p]; + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LT(action, num_cards_); + SPIEL_CHECK_TRUE(player_hands_[p][action]); + } + + // Find the highest bid + int max_bid = -1; + int num_max_bids = 0; + int max_bidder = -1; + + for (int p = 0; p < actions.size(); ++p) { + if (actions[p] > max_bid) { + max_bid = actions[p]; + num_max_bids = 1; + max_bidder = p; + } else if (actions[p] == max_bid) { + num_max_bids++; + } + } + + if (num_max_bids == 1) { + // Winner takes the point card. + points_[max_bidder] += CurrentPointValue(); + win_sequence_.push_back(max_bidder); + } else { + // Tied among several players: discarded. + win_sequence_.push_back(kInvalidPlayer); + } + + // Add these actions to the history. + actions_history_.push_back(actions); + + // Remove the cards from the player's hands. + for (auto p = Player{0}; p < num_players_; ++p) { + player_hands_[p][actions[p]] = false; + } + + // Next player's turn. + current_turn_++; + + // Deal the next point card. + if (current_turn_ < num_turns_) { + if (points_order_ == PointsOrder::kRandom) { + current_player_ = kChancePlayerId; + point_card_ = -1; + } else if (points_order_ == PointsOrder::kAscending) { + if (point_card_ < num_cards_ - 1) DealPointCard(point_card_ + 1); + } else if (points_order_ == PointsOrder::kDescending) { + if (point_card_ > 0) DealPointCard(point_card_ - 1); + } + } + + // No choice at the last turn, so we can play it now + // We use DoApplyAction(s) not to modify the history, as these actions are + // not available in the tree. + if (current_turn_ == num_cards_ - 1) { + // There might be a chance event + if (IsChanceNode()) { + auto legal_actions = LegalChanceOutcomes(); + SPIEL_CHECK_EQ(legal_actions.size(), 1); + DoApplyAction(legal_actions.front()); + } + + // Each player plays their last card + std::vector actions(num_players_); + for (auto p = Player{0}; p < num_players_; ++p) { + auto legal_actions = LegalActions(p); + SPIEL_CHECK_EQ(legal_actions.size(), 1); + actions[p] = legal_actions[0]; + } + DoApplyActions(actions); + } else if (current_turn_ == num_turns_) { + // Game over - determine winner. + int max_points = -1; + for (auto p = Player{0}; p < num_players_; ++p) { + if (points_[p] > max_points) { + winners_.clear(); + max_points = points_[p]; + winners_.insert(p); + } else if (points_[p] == max_points) { + winners_.insert(p); + } + } + current_player_ = kTerminalPlayerId; + } +} + +std::vector> GoofspielState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(IsChanceNode()); + std::set played(point_card_sequence_.begin(), + point_card_sequence_.end()); + std::vector> outcomes; + const int n = num_cards_ - played.size(); + const double p = 1.0 / n; + outcomes.reserve(n); + for (int i = 0; i < num_cards_; ++i) { + if (played.count(i) == 0) outcomes.emplace_back(i, p); + } + SPIEL_CHECK_EQ(outcomes.size(), n); + return outcomes; +} + +std::vector GoofspielState::LegalActions(Player player) const { + if (CurrentPlayer() == kTerminalPlayerId) return std::vector(); + if (player == kSimultaneousPlayerId) return LegalFlatJointActions(); + if (player == kChancePlayerId) return LegalChanceOutcomes(); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::vector movelist; + for (int bid = 0; bid < player_hands_[player].size(); ++bid) { + if (player_hands_[player][bid]) { + movelist.push_back(bid); + } + } + return movelist; +} + +std::string GoofspielState::ActionToString(Player player, + Action action_id) const { + if (player == kSimultaneousPlayerId) + return FlatJointActionToString(action_id); + SPIEL_CHECK_GE(action_id, 0); + SPIEL_CHECK_LT(action_id, num_cards_); + if (player == kChancePlayerId) { + return absl::StrCat("Deal ", action_id + 1); + } else { + return absl::StrCat("[P", player, "]Bid: ", (action_id + 1)); + } +} + +std::string GoofspielState::ToString() const { + std::string points_line = "Points: "; + std::string result = ""; + + for (auto p = Player{0}; p < num_players_; ++p) { + absl::StrAppend(&points_line, points_[p]); + absl::StrAppend(&points_line, " "); + absl::StrAppend(&result, "P"); + absl::StrAppend(&result, p); + absl::StrAppend(&result, " hand: "); + for (int c = 0; c < num_cards_; ++c) { + if (player_hands_[p][c]) { + absl::StrAppend(&result, c + 1); + absl::StrAppend(&result, " "); + } + } + absl::StrAppend(&result, "\n"); + } + + // In imperfect information, the full state depends on both betting sequences + if (impinfo_) { + for (auto p = Player{0}; p < num_players_; ++p) { + absl::StrAppend(&result, "P", p, " actions: "); + for (int i = 0; i < actions_history_.size(); ++i) { + absl::StrAppend(&result, actions_history_[i][p]); + absl::StrAppend(&result, " "); + } + absl::StrAppend(&result, "\n"); + } + } + + absl::StrAppend(&result, "Point card sequence: "); + for (int i = 0; i < point_card_sequence_.size(); ++i) { + absl::StrAppend(&result, 1 + point_card_sequence_[i], " "); + } + absl::StrAppend(&result, "\n"); + + return result + points_line + "\n"; +} + +bool GoofspielState::IsTerminal() const { + return current_player_ == kTerminalPlayerId; +} + +std::vector GoofspielState::Returns() const { + if (!IsTerminal()) { + return std::vector(num_players_, 0.0); + } + + if (returns_type_ == ReturnsType::kWinLoss) { + if (winners_.size() == num_players_) { + // All players have same number of points? This is a draw. + return std::vector(num_players_, 0.0); + } else { + int num_winners = winners_.size(); + int num_losers = num_players_ - num_winners; + std::vector returns(num_players_, (-1.0 / num_losers)); + for (const auto& winner : winners_) { + returns[winner] = 1.0 / num_winners; + } + return returns; + } + } else if (returns_type_ == ReturnsType::kPointDifference) { + std::vector returns(num_players_, 0); + double sum = 0; + for (Player p = 0; p < num_players_; ++p) { + returns[p] = points_[p]; + sum += points_[p]; + } + for (Player p = 0; p < num_players_; ++p) { + returns[p] -= sum / num_players_; + } + return returns; + } else if (returns_type_ == ReturnsType::kTotalPoints) { + std::vector returns(num_players_, 0); + for (Player p = 0; p < num_players_; ++p) { + returns[p] = points_[p]; + } + return returns; + } else { + SpielFatalError(absl::StrCat("Unrecognized returns type: ", returns_type_)); + } +} + +std::string GoofspielState::InformationStateString(Player player) const { + const GoofspielGame& game = + open_spiel::down_cast(*game_); + return game.info_state_observer_->StringFrom(*this, player); +} + +std::string GoofspielState::ObservationString(Player player) const { + const GoofspielGame& game = + open_spiel::down_cast(*game_); + return game.default_observer_->StringFrom(*this, player); +} + +void GoofspielState::NextPlayer(int* count, Player* player) const { + *count += 1; + *player = (*player + 1) % num_players_; +} + +void GoofspielState::InformationStateTensor(Player player, + absl::Span values) const { + ContiguousAllocator allocator(values); + const GoofspielGame& game = + open_spiel::down_cast(*game_); + game.info_state_observer_->WriteTensor(*this, player, &allocator); +} + +void GoofspielState::ObservationTensor(Player player, + absl::Span values) const { + ContiguousAllocator allocator(values); + const GoofspielGame& game = + open_spiel::down_cast(*game_); + game.default_observer_->WriteTensor(*this, player, &allocator); +} + +std::unique_ptr GoofspielState::Clone() const { + return std::unique_ptr(new GoofspielState(*this)); +} + +GoofspielGame::GoofspielGame(const GameParameters& params) + : Game(kGameType, params), + num_cards_(ParameterValue("num_cards")), + num_turns_(ParameterValue("num_turns")), + num_players_(ParameterValue("players")), + points_order_( + ParsePointsOrder(ParameterValue("points_order"))), + returns_type_( + ParseReturnsType(ParameterValue("returns_type"))), + impinfo_(ParameterValue("imp_info")), + egocentric_(ParameterValue("egocentric")) { + // Override the zero-sum utility in the game type if total point scoring. + if (returns_type_ == ReturnsType::kTotalPoints) { + game_type_.utility = GameType::Utility::kGeneralSum; + } + // Maybe override the perfect information in the game type. + if (impinfo_) { + game_type_.information = GameType::Information::kImperfectInformation; + } + // Deduce number of turns automatically if requested. + if (num_turns_ == kNumTurnsSameAsCards) num_turns_ = num_cards_; + + const GameParameters obs_params = { + {"egocentric", GameParameter(egocentric_)}}; + default_observer_ = MakeObserver(kDefaultObsType, obs_params); + info_state_observer_ = MakeObserver(kInfoStateObsType, obs_params); + private_observer_ = MakeObserver( + IIGObservationType{/*public_info*/false, + /*perfect_recall*/false, + /*private_info*/PrivateInfoType::kSinglePlayer}, + obs_params); + public_observer_ = + MakeObserver(IIGObservationType{/*public_info*/true, + /*perfect_recall*/false, + /*private_info*/PrivateInfoType::kNone}, + obs_params); +} + +std::unique_ptr GoofspielGame::NewInitialState() const { + return std::make_unique(shared_from_this(), num_cards_, + num_turns_, points_order_, impinfo_, + egocentric_, returns_type_); +} + +int GoofspielGame::MaxChanceOutcomes() const { + if (points_order_ == PointsOrder::kRandom) { + return num_cards_; + } else { + return 0; + } +} + +std::vector GoofspielGame::InformationStateTensorShape() const { + if (impinfo_) { + return {// 1-hot bit vector for point total per player; upper bound is 1 + + // 2 + ... + N = N*(N+1) / 2, but must add one to include 0 points. + num_players_ * ((num_cards_ * (num_cards_ + 1)) / 2 + 1) + + // Bit vector for my remaining cards: + num_cards_ + + // If `egocentric = true`, returns a sequence of one-hot relative + // distances to the winner of a turn. + // If `egocentric = false`, returns a sequence of one-hot player id + // of the winner of a turn. + num_turns_ * num_players_ + + // A sequence of 1-hot bit vectors encoding the point card sequence. + num_turns_ * num_cards_ + + // The observing player's own action sequence. + num_turns_ * num_cards_}; + } else { + return {// 1-hot bit vector for point total per player; upper bound is 1 + + // 2 + ... + N = N*(N+1) / 2, but must add one to include 0 points. + num_players_ * ((num_cards_ * (num_cards_ + 1)) / 2 + 1) + + // A sequence of 1-hot bit vectors encoding the point card sequence. + num_turns_ * num_cards_ + + // Bit vector for each card per player. + num_players_ * num_cards_}; + } +} + +std::vector GoofspielGame::ObservationTensorShape() const { + // Perfect info case, show: + // - current point card showing + // - everyone's current points + // - everyone's current hands + // Imperfect info case, show: + // - current point card showing + // - everyone's current points + // - my current hand + // - current win sequence + if (impinfo_) { + return {// 1-hot bit to encode the current point card + num_cards_ + + // many-hot bit sequence to encode the remaining point cards + num_cards_ + + // 1-hot bit vector for point total per player; upper bound is 1 + + // 2 + ... + N = N*(N+1) / 2, but must add one to include 0 points. + num_players_ * ((num_cards_ * (num_cards_ + 1)) / 2 + 1) + + // Bit vector for my remaining cards: + num_cards_ + + // If `egocentric = true`, returns a sequence of one-hot relative + // distances to the winner of a turn. + // If `egocentric = false`, returns a sequence of one-hot player id + // of the winner of a turn. + num_turns_ * num_players_}; + } else { + return {// 1-hot bit to encode the current point card + num_cards_ + + // many-hot bit sequence to encode the remaining point cards + num_cards_ + + // 1-hot bit vector for point total per player; upper bound is 1 + + // 2 + ... + N = N*(N+1) / 2, but must add one to include 0 points. + num_players_ * ((num_cards_ * (num_cards_ + 1)) / 2 + 1) + + // Bit vector for each card per player + num_players_ * num_cards_}; + } +} + +double GoofspielGame::MinUtility() const { + if (returns_type_ == ReturnsType::kWinLoss) { + return -1; + } else if (returns_type_ == ReturnsType::kPointDifference) { + // 0 - (1 + 2 + ... + N) / n + return -(num_cards_ * (num_cards_ + 1) / 2) / num_players_; + } else if (returns_type_ == ReturnsType::kTotalPoints) { + return 0; + } else { + SpielFatalError("Unrecognized returns type."); + } +} + +double GoofspielGame::MaxUtility() const { + if (returns_type_ == ReturnsType::kWinLoss) { + return 1; + } else if (returns_type_ == ReturnsType::kPointDifference) { + // (1 + 2 + ... + N) - (1 + 2 + ... + N) / n + // = (n-1) (1 + 2 + ... + N) / n + double sum = num_cards_ * (num_cards_ + 1) / 2; + return (num_players_ - 1) * sum / num_players_; + } else if (returns_type_ == ReturnsType::kTotalPoints) { + // 1 + 2 + ... + N. + return num_cards_ * (num_cards_ + 1) / 2; + } else { + SpielFatalError("Unrecognized returns type."); + } +} + +absl::optional GoofspielGame::UtilitySum() const { + if (returns_type_ == ReturnsType::kTotalPoints) + return absl::nullopt; + else + return 0; +} + +std::shared_ptr GoofspielGame::MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const { + // Allows for `egocentric` overrides if observer variant is needed. + bool egocentric = egocentric_; + const auto& it = params.find("egocentric"); + if (it != params.end()) { + egocentric = it->second.value(); + } + return std::make_shared( + iig_obs_type.value_or(kDefaultObsType), egocentric); +} + +} // namespace goofspiel +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/goofspiel/goofspiel.h b/scenarios/bargaining/open_spiel/open_spiel/games/goofspiel/goofspiel.h new file mode 100644 index 0000000..9adc18b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/goofspiel/goofspiel.h @@ -0,0 +1,189 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_GOOFSPIEL_H_ +#define OPEN_SPIEL_GAMES_GOOFSPIEL_H_ + +#include +#include +#include +#include + +#include "open_spiel/simultaneous_move_game.h" +#include "open_spiel/spiel.h" + +// Goofspiel, or the Game of Pure Strategy, is a bidding card game where players +// are trying to obtain the most points. In, Goofspiel(N,K), each player has bid +// cards numbered 1..N and a point card deck containing cards numbered 1..N is +// shuffled and set face-down. There are K turns. Each turn, the top point card +// is revealed, and players simultaneously play a bid card; the point card is +// given to the highest bidder or discarded if the bids are equal. For more +// detail, see: https://en.wikipedia.org/wiki/Goofspiel +// +// This implementation of Goofspiel is slightly more general than the standard +// game. First, more than 2 players can play it. Second, the deck can take on +// pre-determined orders rather than randomly determined. Third, there is an +// option to enable the imperfect information variant described in Sec 3.1.4 +// of http://mlanctot.info/files/papers/PhD_Thesis_MarcLanctot.pdf, where only +// the sequences of wins / losses is revealed (not the players' hands). Fourth, +// players can play for only K turns (if not specified, K=N by default). +// +// The returns_type parameter determines how returns (utilities) are defined: +// - win_loss distributed 1 point divided by number of winners (i.e. players +// with highest points), and similarly to -1 among losers +// - point_difference means each player gets utility as number of points +// collected minus the average over players. +// - total_points means each player's return is equal to the number of points +// they collected. +// +// Parameters: +// "imp_info" bool Enable the imperfect info variant (default: false) +// "egocentric" bool Enable the egocentric info variant (default: false) +// "num_cards" int The highest bid card, and point card (default: 13) +// "num_turns" int The number of turns to play (default: -1, play +// for the same number of rounds as there are cards) +// "players" int number of players (default: 2) +// "points_order" string "random" (default), "descending", or "ascending" +// "returns_type" string "win_loss" (default), "point_difference", or +// "total_points". + +namespace open_spiel { +namespace goofspiel { + +inline constexpr int kNumTurnsSameAsCards = -1; + +inline constexpr int kDefaultNumPlayers = 2; +inline constexpr int kDefaultNumCards = 13; +inline constexpr int kDefaultNumTurns = kNumTurnsSameAsCards; +inline constexpr const char* kDefaultPointsOrder = "random"; +inline constexpr const char* kDefaultReturnsType = "win_loss"; +inline constexpr const bool kDefaultImpInfo = false; +inline constexpr const bool kDefaultEgocentric = false; + +enum class PointsOrder { + kRandom, + kDescending, + kAscending, +}; + +enum class ReturnsType { + kWinLoss, + kPointDifference, + kTotalPoints, +}; + +inline constexpr const int kInvalidCard = -1; + +class GoofspielObserver; + +class GoofspielState : public SimMoveState { + public: + explicit GoofspielState(std::shared_ptr game, int num_cards, + int num_turns, PointsOrder points_order, bool impinfo, + bool egocentric, ReturnsType returns_type); + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + + void InformationStateTensor(Player player, + absl::Span values) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector> ChanceOutcomes() const override; + + std::vector LegalActions(Player player) const override; + + protected: + void DoApplyAction(Action action_id) override; + void DoApplyActions(const std::vector& actions) override; + + private: + friend class GoofspielObserver; + // Increments the count and increments the player mod num_players_. + void NextPlayer(int* count, Player* player) const; + void DealPointCard(int point_card); + int CurrentPointValue() const { return 1 + point_card_; } + + int num_cards_; + int num_turns_; + PointsOrder points_order_; + ReturnsType returns_type_; + bool impinfo_; + bool egocentric_; + + Player current_player_; + std::set winners_; + int current_turn_; + int point_card_; + std::vector points_; + std::vector> player_hands_; // true if card is in hand. + std::vector point_card_sequence_; + std::vector win_sequence_; // Which player won, kInvalidPlayer if tie + std::vector> actions_history_; +}; + +class GoofspielGame : public Game { + public: + explicit GoofspielGame(const GameParameters& params); + + int NumDistinctActions() const override { return num_cards_; } + std::unique_ptr NewInitialState() const override; + int MaxChanceOutcomes() const override; + int NumPlayers() const override { return num_players_; } + double MinUtility() const override; + double MaxUtility() const override; + absl::optional UtilitySum() const override; + std::vector InformationStateTensorShape() const override; + std::vector ObservationTensorShape() const override; + int MaxGameLength() const override { return num_cards_; } + std::shared_ptr MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const override; + + int NumCards() const { return num_cards_; } + int NumRounds() const { return num_turns_; } + int NumTurns() const { return num_turns_; } + PointsOrder GetPointsOrder() const { return points_order_; } + ReturnsType GetReturnsType() const { return returns_type_; } + bool IsImpInfo() const { return impinfo_; } + int MaxPointSlots() const { return (NumCards() * (NumCards() + 1)) / 2 + 1; } + + // Used to implement the old observation API. + std::shared_ptr default_observer_; + std::shared_ptr info_state_observer_; + std::shared_ptr public_observer_; + std::shared_ptr private_observer_; + // TODO: verify whether this bound is tight and/or tighten it. + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + + private: + int num_cards_; // The N in Goofspiel(N,K) + int num_turns_; // The K in Goofspiel(N,K) + int num_players_; // Number of players + PointsOrder points_order_; + ReturnsType returns_type_; + bool impinfo_; + bool egocentric_; +}; + +} // namespace goofspiel +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_GOOFSPIEL_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/goofspiel/goofspiel_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/goofspiel/goofspiel_test.cc new file mode 100644 index 0000000..bf37269 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/goofspiel/goofspiel_test.cc @@ -0,0 +1,111 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/goofspiel/goofspiel.h" + +#include "open_spiel/game_parameters.h" +#include "open_spiel/game_transforms/turn_based_simultaneous_game.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace goofspiel { +namespace { + +namespace testing = open_spiel::testing; + +void BasicGoofspielTests() { + testing::LoadGameTest("goofspiel"); + testing::ChanceOutcomesTest(*LoadGame("goofspiel")); + testing::RandomSimTest(*LoadGame("goofspiel"), 100); + for (Player players = 3; players <= 5; players++) { + for (const std::string& returns_type : + {"win_loss", "point_difference", "total_points"}) { + testing::RandomSimTest( + *LoadGame("goofspiel", + {{"players", GameParameter(players)}, + {"returns_type", GameParameter(returns_type)}}), + 10); + } + } +} + +void LegalActionsValidAtEveryState() { + GameParameters params; + params["imp_info"] = GameParameter(true); + params["num_cards"] = GameParameter(4); + params["points_order"] = GameParameter(std::string("descending")); + std::shared_ptr game = LoadGameAsTurnBased("goofspiel", params); + testing::RandomSimTest(*game, /*num_sims=*/10); +} + +void GoofspielWithLimitedTurns() { + GameParameters params; + params["imp_info"] = GameParameter(true); + params["num_cards"] = GameParameter(13); + params["num_turns"] = GameParameter(3); + params["points_order"] = GameParameter(std::string("descending")); + testing::RandomSimTest(*LoadGame("goofspiel", params), 10); +} + +void EgocentricViewOfSymmetricActions() { + GameParameters params; + params["imp_info"] = GameParameter(true); + params["egocentric"] = GameParameter(true); + params["num_cards"] = GameParameter(4); + params["players"] = GameParameter(3); + params["points_order"] = GameParameter(std::string("descending")); + std::shared_ptr game = LoadGame("goofspiel", params); + + std::unique_ptr state = game->NewInitialState(); + + // Three action sequences each played by one player. + std::vector seq1{3, 2, 0 /*, 1 */}; + std::vector seq2{0, 1, 2 /*, 3 */}; + std::vector seq3{2, 3, 1 /*, 0 */}; + + // Accumulate info state histories form the perspective of `seq1` when playing + // as one of the three players. + std::vector>> info_state_histories( + game->NumPlayers()); + for (int as_player = 0; as_player < game->NumPlayers(); as_player++) { + for (int t = 0; t < game->MaxGameLength() - 1; t++) { + std::vector joint_actions(game->NumPlayers(), -1); + joint_actions[as_player] = seq1[t]; + joint_actions[(as_player + 1) % game->NumPlayers()] = seq2[t]; + joint_actions[(as_player + 2) % game->NumPlayers()] = seq3[t]; + state->ApplyActions(std::move(joint_actions)); + auto info_state = state->InformationStateTensor(as_player); + info_state_histories[as_player].push_back(std::move(info_state)); + } + state = game->NewInitialState(); + } + + // Verify that the observations remain identical regardless of which player + // `seq1` was executed for. + SPIEL_CHECK_EQ(info_state_histories.size(), game->NumPlayers()); + SPIEL_CHECK_EQ(info_state_histories[0], info_state_histories[1]); + SPIEL_CHECK_EQ(info_state_histories[1], info_state_histories[2]); +} + +} // namespace +} // namespace goofspiel +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::goofspiel::BasicGoofspielTests(); + open_spiel::goofspiel::LegalActionsValidAtEveryState(); + open_spiel::goofspiel::GoofspielWithLimitedTurns(); + open_spiel::goofspiel::EgocentricViewOfSymmetricActions(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/hanabi/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/games/hanabi/CMakeLists.txt new file mode 100644 index 0000000..ef6562a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/hanabi/CMakeLists.txt @@ -0,0 +1,29 @@ +add_library(hanabi_learning_environment OBJECT + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/canonical_encoders.cc + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/canonical_encoders.h + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/hanabi_card.cc + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/hanabi_card.h + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/hanabi_game.cc + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/hanabi_game.h + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/hanabi_hand.cc + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/hanabi_hand.h + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/hanabi_history_item.cc + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/hanabi_history_item.h + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/hanabi_move.cc + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/hanabi_move.h + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/hanabi_observation.cc + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/hanabi_observation.h + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/hanabi_state.cc + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/hanabi_state.h + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/observation_encoder.h + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/util.cc + hanabi-learning-environment/hanabi_learning_environment/hanabi_lib/util.h +) + +target_include_directories (hanabi_learning_environment PUBLIC hanabi-learning-environment/hanabi_learning_environment) +target_include_directories (games PUBLIC hanabi-learning-environment/hanabi_learning_environment) + +add_executable(hanabi_test hanabi_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(hanabi_test hanabi_test) +target_include_directories (hanabi_test PUBLIC hanabi-learning-environment/hanabi_learning_environment) diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/hanabi/README.md b/scenarios/bargaining/open_spiel/open_spiel/games/hanabi/README.md new file mode 100644 index 0000000..b168492 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/hanabi/README.md @@ -0,0 +1,5 @@ +# Hanabi + +OpenSpiel can support Hanabi, using the implementation in +https://github.com/deepmind/hanabi-learning-environment. To enable this option, +see `open_spiel/scripts/global_variables.sh`. diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/hanabi/hanabi.cc b/scenarios/bargaining/open_spiel/open_spiel/games/hanabi/hanabi.cc new file mode 100644 index 0000000..af4687a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/hanabi/hanabi.cc @@ -0,0 +1,245 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/hanabi/hanabi.h" + +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace hanabi { + +namespace { + +const GameType kGameType{ + /*short_name=*/"hanabi", + /*long_name=*/"Hanabi", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kIdentical, + GameType::RewardModel::kRewards, + /*max_num_players=*/5, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + {"players", GameParameter(GameParameter::Type::kInt)}, + {"colors", GameParameter(GameParameter::Type::kInt)}, + {"ranks", GameParameter(GameParameter::Type::kInt)}, + {"hand_size", GameParameter(GameParameter::Type::kInt)}, + {"max_information_tokens", GameParameter(GameParameter::Type::kInt)}, + {"max_life_tokens", GameParameter(GameParameter::Type::kInt)}, + {"seed", GameParameter(GameParameter::Type::kInt)}, + {"random_start_player", GameParameter(GameParameter::Type::kBool)}, + {"observation_type", GameParameter(GameParameter::Type::kString)}, + }}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new OpenSpielHanabiGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +open_spiel::RegisterSingleTensorObserver single_tensor(kGameType.short_name); + + +} // namespace + +std::unordered_map OpenSpielHanabiGame::MapParams() + const { + std::unordered_map hanabi_params; + if (IsParameterSpecified(game_parameters_, "players")) + hanabi_params["players"] = absl::StrCat(ParameterValue("players")); + + if (IsParameterSpecified(game_parameters_, "colors")) + hanabi_params["colors"] = absl::StrCat(ParameterValue("colors")); + + if (IsParameterSpecified(game_parameters_, "ranks")) + hanabi_params["ranks"] = absl::StrCat(ParameterValue("ranks")); + + if (IsParameterSpecified(game_parameters_, "hand_size")) + hanabi_params["hand_size"] = absl::StrCat(ParameterValue("hand_size")); + + if (IsParameterSpecified(game_parameters_, "max_information_tokens")) + hanabi_params["max_information_tokens"] = + absl::StrCat(ParameterValue("max_information_tokens")); + + if (IsParameterSpecified(game_parameters_, "max_life_tokens")) + hanabi_params["max_life_tokens"] = + absl::StrCat(ParameterValue("max_life_tokens")); + + if (IsParameterSpecified(game_parameters_, "seed")) + hanabi_params["seed"] = absl::StrCat(ParameterValue("seed")); + + if (IsParameterSpecified(game_parameters_, "random_start_player")) + hanabi_params["random_start_player"] = + absl::StrCat(ParameterValue("random_start_player")); + + if (IsParameterSpecified(game_parameters_, "observation_type")) { + auto observation_type = ParameterValue("observation_type"); + if (observation_type == "minimal") + hanabi_params["observation_type"] = absl::StrCat( + hanabi_learning_env::HanabiGame::AgentObservationType::kMinimal); + else if (observation_type == "card_knowledge") + hanabi_params["observation_type"] = + absl::StrCat(hanabi_learning_env::HanabiGame::AgentObservationType:: + kCardKnowledge); + else if (observation_type == "seer") + hanabi_params["observation_type"] = absl::StrCat( + hanabi_learning_env::HanabiGame::AgentObservationType::kSeer); + else + SpielFatalError( + absl::StrCat("Invalid observation_type ", observation_type)); + } + return hanabi_params; +} + +OpenSpielHanabiGame::OpenSpielHanabiGame(const GameParameters& params) + : Game(kGameType, params), game_(MapParams()), encoder_(&game_) {} + +int OpenSpielHanabiGame::NumDistinctActions() const { return game_.MaxMoves(); } + +std::unique_ptr OpenSpielHanabiGame::NewInitialState() const { + return std::unique_ptr(new OpenSpielHanabiState(shared_from_this())); +} + +int OpenSpielHanabiGame::MaxChanceOutcomes() const { + return game_.MaxChanceOutcomes(); +} + +int OpenSpielHanabiGame::NumPlayers() const { return game_.NumPlayers(); } + +double OpenSpielHanabiGame::MinUtility() const { return 0; } + +double OpenSpielHanabiGame::MaxUtility() const { + return game_.NumColors() * game_.NumRanks(); +} + +std::vector OpenSpielHanabiGame::ObservationTensorShape() const { + return encoder_.Shape(); +} + +int OpenSpielHanabiGame::MaxGameLength() const { + // This is an overestimate. + return game_.NumPlayers() * game_.HandSize() // Initial deal + + game_.MaxDeckSize() // Cards played + + game_.MaxDeckSize() + game_.MaxInformationTokens(); // Hints given +} + +Player OpenSpielHanabiState::CurrentPlayer() const { + return state_.IsTerminal() ? kTerminalPlayerId : state_.CurPlayer(); +} + +std::vector OpenSpielHanabiState::LegalActions() const { + if (IsTerminal()) { + return {}; + } else if (IsChanceNode()) { + auto outcomes_and_probs = state_.ChanceOutcomes(); + const int n = outcomes_and_probs.first.size(); + std::vector chance_outcomes; + chance_outcomes.reserve(n); + for (int i = 0; i < n; ++i) { + chance_outcomes.emplace_back( + game_->HanabiGame().GetChanceOutcomeUid(outcomes_and_probs.first[i])); + } + return chance_outcomes; + } else { + auto moves = state_.LegalMoves(CurrentPlayer()); + std::vector actions; + actions.reserve(moves.size()); + for (auto m : moves) actions.push_back(game_->HanabiGame().GetMoveUid(m)); + return actions; + } +} + +std::string OpenSpielHanabiState::ActionToString(Player player, + Action action_id) const { + if (player == kChancePlayerId) + return game_->HanabiGame().GetChanceOutcome(action_id).ToString(); + else + return game_->HanabiGame().GetMove(action_id).ToString(); +} + +std::vector OpenSpielHanabiState::Rewards() const { + return std::vector(NumPlayers(), state_.Score() - prev_state_score_); +} + +std::vector OpenSpielHanabiState::Returns() const { + return std::vector(NumPlayers(), state_.Score()); +} + +void OpenSpielHanabiState::DoApplyAction(Action action) { + auto move = IsChanceNode() ? game_->HanabiGame().GetChanceOutcome(action) + : game_->HanabiGame().GetMove(action); + if (state_.MoveIsLegal(move)) { + if (!IsChanceNode()) prev_state_score_ = state_.Score(); + state_.ApplyMove(move); + } else { + SpielFatalError(absl::StrCat("Invalid move ", move.ToString())); + } +} + +std::string OpenSpielHanabiState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + return hanabi_learning_env::HanabiObservation(state_, player).ToString(); +} + +void OpenSpielHanabiState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + auto obs = game_->Encoder().Encode( + hanabi_learning_env::HanabiObservation(state_, player)); + SPIEL_CHECK_EQ(values.size(), obs.size()); + for (int i = 0; i < obs.size(); ++i) values.at(i) = obs[i]; +} + +std::unique_ptr OpenSpielHanabiState::Clone() const { + return std::unique_ptr(new OpenSpielHanabiState(*this)); +} + +ActionsAndProbs OpenSpielHanabiState::ChanceOutcomes() const { + auto outcomes_and_probs = state_.ChanceOutcomes(); + const int n = outcomes_and_probs.first.size(); + ActionsAndProbs chance_outcomes; + chance_outcomes.reserve(n); + for (int i = 0; i < n; ++i) { + chance_outcomes.emplace_back( + game_->HanabiGame().GetChanceOutcomeUid(outcomes_and_probs.first[i]), + outcomes_and_probs.second[i]); + } + return chance_outcomes; +} + +std::string OpenSpielHanabiState::ToString() const { return state_.ToString(); } + +bool OpenSpielHanabiState::IsTerminal() const { return state_.IsTerminal(); } + +OpenSpielHanabiState::OpenSpielHanabiState(std::shared_ptr game) + : State(game), + state_(const_cast( + &(static_cast(*game).HanabiGame()))), + game_(static_cast(game.get())), + prev_state_score_(0.) {} + +} // namespace hanabi +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/hanabi/hanabi.h b/scenarios/bargaining/open_spiel/open_spiel/games/hanabi/hanabi.h new file mode 100644 index 0000000..84f71ae --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/hanabi/hanabi.h @@ -0,0 +1,102 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_HANABI_H_ +#define OPEN_SPIEL_GAMES_HANABI_H_ + +// Hanabi is a cooperative card game, described here: +// https://en.wikipedia.org/wiki/Hanabi_(card_game) +// +// See https://arxiv.org/abs/1902.00506 for a motivation of Hanabi as an AI +// challenge and some initial results. Please cite this paper if you use this +// Hanabi wrapper for any research results. +// +// This implementation is a wrapper for the Hanabi Learning Environment, which +// can be found here: https://github.com/deepmind/hanabi-learning-environment +// +// Since Hanabi relies on an (optional) external dependency, it is not included +// in the list of compiled games by default. To enable it, read `install.md` +// (TLDR: Set the environment variable OPEN_SPIEL_BUILD_WITH_HANABI to ON). + +#include + +#include "open_spiel/spiel.h" +#include "hanabi_lib/canonical_encoders.h" +#include "hanabi_lib/hanabi_game.h" +#include "hanabi_lib/hanabi_state.h" + +namespace open_spiel { +namespace hanabi { + +class OpenSpielHanabiGame : public Game { + public: + explicit OpenSpielHanabiGame(const GameParameters& params); + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override; + int MaxChanceOutcomes() const override; + int NumPlayers() const override; + double MinUtility() const override; + double MaxUtility() const override; + std::vector ObservationTensorShape() const override; + int MaxGameLength() const override; + // TODO: verify whether this bound is tight and/or tighten it. + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + + const hanabi_learning_env::ObservationEncoder& Encoder() const { + return encoder_; + } + + const hanabi_learning_env::HanabiGame& HanabiGame() const { return game_; } + + private: + std::unordered_map MapParams() const; + hanabi_learning_env::HanabiGame game_; + hanabi_learning_env::CanonicalObservationEncoder encoder_; +}; + +class OpenSpielHanabiState : public State { + public: + explicit OpenSpielHanabiState(std::shared_ptr game); + Player CurrentPlayer() const override; + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::vector Rewards() const override; + std::vector Returns() const override; + + // We support observations only, not information states. The information + // state would have to include the entire history of the game, and is + // impractically large. + // The observation by default includes knowledge inferred from past hints. + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + + std::unique_ptr Clone() const override; + ActionsAndProbs ChanceOutcomes() const override; + std::string ToString() const override; + bool IsTerminal() const override; + + protected: + void DoApplyAction(Action action) override; + + private: + hanabi_learning_env::HanabiState state_; + const OpenSpielHanabiGame* game_; + double prev_state_score_; +}; + +} // namespace hanabi +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_HANABI_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/hanabi/hanabi_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/hanabi/hanabi_test.cc new file mode 100644 index 0000000..043a0ba --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/hanabi/hanabi_test.cc @@ -0,0 +1,47 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/hanabi/hanabi.h" + +#include "open_spiel/game_parameters.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace hanabi { +namespace { + +namespace testing = open_spiel::testing; + +void BasicHanabiTests() { + testing::LoadGameTest("hanabi"); + testing::ChanceOutcomesTest(*LoadGame("hanabi")); + testing::RandomSimTest(*LoadGame("hanabi"), 100); + for (int players = 3; players <= 5; players++) { + testing::RandomSimTest( + *LoadGame("hanabi", {{"players", GameParameter(players)}}), 100); + } + + auto observer = LoadGame("hanabi") + ->MakeObserver(kDefaultObsType, + GameParametersFromString("single_tensor")); + testing::RandomSimTestCustomObserver(*LoadGame("hanabi"), observer); +} + +} // namespace +} // namespace hanabi +} // namespace open_spiel + +int main(int argc, char **argv) { open_spiel::hanabi::BasicHanabiTests(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/havannah/havannah.cc b/scenarios/bargaining/open_spiel/open_spiel/games/havannah/havannah.cc new file mode 100644 index 0000000..d43736f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/havannah/havannah.cc @@ -0,0 +1,431 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/havannah/havannah.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace havannah { +namespace { + +// Facts about the game. +const GameType kGameType{/*short_name=*/"havannah", + /*long_name=*/"Havannah", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + {"board_size", GameParameter(kDefaultBoardSize)}, + {"swap", GameParameter(false)}, + {"ansi_color_output", GameParameter(false)}, + }}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new HavannahGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +// The board is represented as a flattened 2d array of the form: +// 1 2 3 +// a 0 1 2 0 1 0 1 +// b 3 4 5 => 3 4 5 => 3 4 5 +// c 6 7 8 7 8 7 8 +// +// Neighbors are laid out in this pattern: +// 0 1 +// 5 X 2 +// 4 3 + +// Direct neighbors of a cell, clockwise. +constexpr std::array neighbor_offsets = { + Move(-1, -1, kMoveOffset), Move(0, -1, kMoveOffset), + Move(1, 0, kMoveOffset), Move(1, 1, kMoveOffset), + Move(0, 1, kMoveOffset), Move(-1, 0, kMoveOffset), +}; + +// Precomputed list of neighbors per board_size: [board_size][cell][direction] +std::vector neighbor_list; + +NeighborList gen_neighbors(int board_size) { + int diameter = board_size * 2 - 1; + NeighborList out; + out.resize(diameter * diameter); + for (int y = 0; y < diameter; y++) { + for (int x = 0; x < diameter; x++) { + int xy = x + y * diameter; // Don't use Move.xy so it works off-board. + for (int dir = 0; dir < neighbor_offsets.size(); dir++) { + Move offset = neighbor_offsets[dir]; + out[xy][dir] = Move(x + offset.x, y + offset.y, board_size); + } + } + } + return out; +} + +const NeighborList& get_neighbors(int board_size) { + if (board_size >= neighbor_list.size()) { + neighbor_list.resize(board_size + 1); + } + if (neighbor_list[board_size].empty()) { + neighbor_list[board_size] = gen_neighbors(board_size); + } + return neighbor_list[board_size]; +} + +// Number of set bits in each 6-bit integer. +// Python code to compute these values: [bin(i).count("1") for i in range(64)] +constexpr int kBitsSetTable64[] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, + 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, + 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, +}; + +} // namespace + +int Move::Corner(int board_size) const { + if (!OnBoard()) return 0; + + int m = board_size - 1; + int e = m * 2; + + if (x == 0 && y == 0) return 1 << 0; + if (x == m && y == 0) return 1 << 1; + if (x == e && y == m) return 1 << 2; + if (x == e && y == e) return 1 << 3; + if (x == m && y == e) return 1 << 4; + if (x == 0 && y == m) return 1 << 5; + + return 0; +} + +int Move::Edge(int board_size) const { + if (!OnBoard()) return 0; + + int m = board_size - 1; + int e = m * 2; + + if (y == 0 && x != 0 && x != m) return 1 << 0; + if (x - y == m && x != m && x != e) return 1 << 1; + if (x == e && y != m && y != e) return 1 << 2; + if (y == e && x != e && x != m) return 1 << 3; + if (y - x == m && x != m && x != 0) return 1 << 4; + if (x == 0 && y != m && y != 0) return 1 << 5; + + return 0; +} + +std::string Move::ToString() const { + if (xy == kMoveUnknown) return "unknown"; + if (xy == kMoveNone) return "none"; + return absl::StrCat(std::string(1, static_cast('a' + x)), y + 1); +} + +int HavannahState::Cell::NumCorners() const { return kBitsSetTable64[corner]; } +int HavannahState::Cell::NumEdges() const { return kBitsSetTable64[edge]; } + +HavannahState::HavannahState(std::shared_ptr game, int board_size, + bool ansi_color_output, bool allow_swap) + : State(game), + board_size_(board_size), + board_diameter_(board_size * 2 - 1), + valid_cells_((board_size * 2 - 1) * (board_size * 2 - 1) - + board_size * (board_size - 1)), // diameter^2 - corners + neighbors_(get_neighbors(board_size)), + ansi_color_output_(ansi_color_output), + allow_swap_(allow_swap) { + board_.resize(board_diameter_ * board_diameter_); + for (int i = 0; i < board_.size(); i++) { + Move m = ActionToMove(i); + board_[i] = Cell((m.OnBoard() ? kPlayerNone : kPlayerInvalid), i, + m.Corner(board_size), m.Edge(board_size)); + } +} + +Move HavannahState::ActionToMove(Action action_id) const { + return Move(action_id % board_diameter_, action_id / board_diameter_, + board_size_); +} + +std::vector HavannahState::LegalActions() const { + // Can move in any empty cell. + std::vector moves; + if (IsTerminal()) return {}; + moves.reserve(board_.size() - moves_made_); + for (int cell = 0; cell < board_.size(); ++cell) { + if (board_[cell].player == kPlayerNone) { + moves.push_back(cell); + } + } + if (AllowSwap()) { // The second move is allowed to replace the first one. + moves.push_back(last_move_.xy); + absl::c_sort(moves); + } + return moves; +} + +std::string HavannahState::ActionToString(Player player, + Action action_id) const { + return ActionToMove(action_id).ToString(); +} + +bool HavannahState::AllowSwap() const { + return allow_swap_ && moves_made_ == 1 && current_player_ == kPlayer2; +} + +std::string HavannahState::ToString() const { + // Generates something like: + // a b c d e + // 1 @ O . @ O f + // 2 . O O @ O @ g + // 3 . @ @ . . @ O h + // 4 . @ @ . . . O O i + // 5 @ . . O . @ @ O . + // 6 @ O . O O @ @[O] + // 7 . @ O . O O O + // 8 @ O @ O O O + // 9 @ O @ @ @ + + std::string white = "O"; + std::string black = "@"; + std::string empty = "."; + std::string coord = ""; + std::string reset = ""; + if (ansi_color_output_) { + std::string esc = "\033"; + reset = esc + "[0m"; + coord = esc + "[1;37m"; // bright white + empty = reset + "."; + white = esc + "[1;33m" + "@"; // bright yellow + black = esc + "[1;34m" + "@"; // bright blue + } + + std::ostringstream out; + + // Top x coords. + out << std::string(board_size_ + 3, ' '); + for (int x = 0; x < board_size_; x++) { + out << ' ' << coord << static_cast('a' + x); + } + out << '\n'; + + for (int y = 0; y < board_diameter_; y++) { + out << std::string(abs(board_size_ - 1 - y) + 1 + ((y + 1) < 10), ' '); + out << coord << (y + 1); // Leading y coord. + + bool found_last = false; + int start_x = (y < board_size_ ? 0 : y - board_size_ + 1); + int end_x = (y < board_size_ ? board_size_ + y : board_diameter_); + for (int x = start_x; x < end_x; x++) { + Move pos(x, y, board_size_); + + // Spacing and last-move highlight. + if (found_last) { + out << coord << ']'; + found_last = false; + } else if (last_move_ == pos) { + out << coord << '['; + found_last = true; + } else { + out << ' '; + } + + // Actual piece. + Player p = board_[pos.xy].player; + if (p == kPlayerNone) out << empty; + if (p == kPlayer1) out << white; + if (p == kPlayer2) out << black; + } + if (found_last) { + out << coord << ']'; + } + if (y < board_size_ - 1) { // Trailing x coord. + out << ' ' << coord << static_cast('a' + board_size_ + y); + } + out << '\n'; + } + out << reset; + return out.str(); +} + +std::vector HavannahState::Returns() const { + if (outcome_ == kPlayer1) return {1, -1}; + if (outcome_ == kPlayer2) return {-1, 1}; + if (outcome_ == kPlayerDraw) return {0, 0}; + return {0, 0}; // Unfinished +} + +std::string HavannahState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string HavannahState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +int PlayerRelative(HavannahPlayer state, Player current) { + switch (state) { + case kPlayer1: + return current == 0 ? 0 : 1; + case kPlayer2: + return current == 1 ? 0 : 1; + case kPlayerNone: + return 2; + default: + SpielFatalError("Unknown player type."); + } +} + +void HavannahState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + TensorView<2> view(values, {kCellStates, static_cast(board_.size())}, + true); + for (int i = 0; i < board_.size(); ++i) { + if (board_[i].player < kCellStates) { + view[{PlayerRelative(board_[i].player, player), i}] = 1.0; + } + } +} + +void HavannahState::DoApplyAction(Action action) { + SPIEL_CHECK_EQ(outcome_, kPlayerNone); + + Move move = ActionToMove(action); + SPIEL_CHECK_TRUE(move.OnBoard()); + + if (last_move_ == move) { + SPIEL_CHECK_TRUE(AllowSwap()); + } else { + SPIEL_CHECK_EQ(board_[move.xy].player, kPlayerNone); + moves_made_++; + last_move_ = move; + } + board_[move.xy].player = current_player_; + + bool alreadyjoined = false; // Useful for finding rings. + bool skip = false; + for (const Move& m : neighbors_[move.xy]) { + if (skip) { + skip = false; + } else if (m.OnBoard()) { + if (current_player_ == board_[m.xy].player) { + alreadyjoined |= JoinGroups(move.xy, m.xy); + + // Skip the next one. If it is the same group, it is already connected + // and forms a sharp corner, which we can ignore. + skip = true; + } + } + } + + const Cell& group = board_[FindGroupLeader(move.xy)]; + if (group.NumEdges() >= 3 || group.NumCorners() >= 2 || + (alreadyjoined && CheckRingDFS(move, 0, 3))) { + outcome_ = current_player_; + } else if (moves_made_ == valid_cells_) { + outcome_ = kPlayerDraw; + } + + current_player_ = (current_player_ == kPlayer1 ? kPlayer2 : kPlayer1); +} + +int HavannahState::FindGroupLeader(int cell) { + int parent = board_[cell].parent; + if (parent != cell) { + do { // Follow the parent chain up to the group leader. + parent = board_[parent].parent; + } while (parent != board_[parent].parent); + // Do path compression, but only the current one to avoid recursion. + board_[cell].parent = parent; + } + return parent; +} + +bool HavannahState::JoinGroups(int cell_a, int cell_b) { + int leader_a = FindGroupLeader(cell_a); + int leader_b = FindGroupLeader(cell_b); + + if (leader_a == leader_b) // Already the same group. + return true; + + if (board_[leader_a].size < board_[leader_b].size) { + // Force group a's subtree to be bigger. + std::swap(leader_a, leader_b); + } + + // Group b joins group a. + board_[leader_b].parent = leader_a; + board_[leader_a].size += board_[leader_b].size; + board_[leader_a].corner |= board_[leader_b].corner; + board_[leader_a].edge |= board_[leader_b].edge; + + return false; +} + +bool HavannahState::CheckRingDFS(const Move& move, int left, int right) { + if (!move.OnBoard()) return false; + + Cell& c = board_[move.xy]; + if (current_player_ != c.player) return false; + if (c.mark) return true; // Found a ring! + + c.mark = true; + bool success = false; + for (int i = left; !success && i <= right; i++) { + int dir = (i + 6) % 6; // Normalize. + success = CheckRingDFS(neighbors_[move.xy][dir], dir - 1, dir + 1); + } + c.mark = false; + return success; +} + +std::unique_ptr HavannahState::Clone() const { + return std::unique_ptr(new HavannahState(*this)); +} + +HavannahGame::HavannahGame(const GameParameters& params) + : Game(kGameType, params), + board_size_(ParameterValue("board_size")), + ansi_color_output_(ParameterValue("ansi_color_output")), + allow_swap_(ParameterValue("swap")) {} + +} // namespace havannah +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/havannah/havannah.h b/scenarios/bargaining/open_spiel/open_spiel/games/havannah/havannah.h new file mode 100644 index 0000000..34ead1a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/havannah/havannah.h @@ -0,0 +1,236 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_HAVANNAH_H_ +#define OPEN_SPIEL_GAMES_HAVANNAH_H_ + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// https://en.wikipedia.org/wiki/Havannah +// +// Parameters: +// "board_size" int radius of the board (default = 8) +// "swap" bool Whether to allow the swap rule. +// "ansi_color_output" bool Whether to color the output for a terminal. + +namespace open_spiel { +namespace havannah { + +inline constexpr int kNumPlayers = 2; +inline constexpr int kDefaultBoardSize = 8; +inline constexpr int kMaxNeighbors = + 6; // Maximum number of neighbors for a cell +inline constexpr int kCellStates = 1 + kNumPlayers; + +enum HavannahPlayer : uint8_t { + kPlayer1, + kPlayer2, + kPlayerNone, + kPlayerDraw, + kPlayerInvalid, +}; + +enum MoveSpecial { + kMoveNone = -1, + kMoveUnknown = -2, + kMoveOffset = -3, +}; + +inline int CalcXY(int x, int y, int board_size) { + int diameter = board_size * 2 - 1; + if (x >= 0 && y >= 0 && x < diameter && y < diameter && + (y - x < board_size) && (x - y < board_size)) { + return x + y * diameter; + } else { + return kMoveUnknown; + } +} + +struct Move { + int8_t x, y; // The x,y coordinates + int16_t xy; // precomputed x + y * board_diameter as an index into the array. + + inline constexpr Move(MoveSpecial m = kMoveUnknown) : x(-1), y(-1), xy(m) {} + inline constexpr Move(int x_, int y_, MoveSpecial m) : x(x_), y(y_), xy(m) {} + Move(int x_, int y_, int board_size) + : x(x_), y(y_), xy(CalcXY(x_, y_, board_size)) {} + + std::string ToString() const; + + bool operator==(const Move& b) const { return xy == b.xy; } + bool operator!=(const Move& b) const { return xy != b.xy; } + bool operator==(const MoveSpecial& b) const { return xy == b; } + bool operator!=(const MoveSpecial& b) const { return xy != b; } + + // Whether the move is valid and on the board. May be invalid because it is + // a MoveSpecial, in the cut-off corners, or otherwise off the board. + bool OnBoard() const { return xy >= 0; } + + // Flags for which edge/corner this move is part of. + int Edge(int board_size) const; + int Corner(int board_size) const; +}; + +// List of neighbors of a cell: [cell][direction] +typedef std::vector> NeighborList; + +// State of an in-play game. +class HavannahState : public State { + // Represents a single cell on the board, as well as the structures needed for + // groups of cells. Groups of cells are defined by a union-find structure + // embedded in the array of cells. Following the `parent` indices will lead to + // the group leader which has the up to date size, corner and edge + // connectivity of that group. Size, corner and edge are not valid for any + // cell that is not a group leader. + struct Cell { + // Who controls this cell. + HavannahPlayer player; + + // Whether this cell is marked/visited in a ring search. Should always be + // false except while running CheckRingDFS. + bool mark; + + // A parent index to allow finding the group leader. It is the leader of the + // group if it points to itself. Allows path compression to shorten the path + // from a direct parent to the leader. + uint16_t parent; + + // These three are only defined for the group leader's cell. + uint16_t size; // Size of this group of cells. + uint8_t corner; // A bitset of which corners this group is connected to. + uint8_t edge; // A bitset of which edges this group is connected to. + + Cell() {} + Cell(HavannahPlayer player_, int parent_, int corner_, int edge_) + : player(player_), + mark(false), + parent(parent_), + size(1), + corner(corner_), + edge(edge_) {} + + // How many corner or edges this group of cell is connected to. Only defined + // if called on the group leader. + int NumCorners() const; + int NumEdges() const; + }; + + public: + HavannahState(std::shared_ptr game, int board_size, + bool ansi_color_output = false, bool allow_swap = false); + + HavannahState(const HavannahState&) = default; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : static_cast(current_player_); + } + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override { return outcome_ != kPlayerNone; } + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + + // A 3d tensor, 3 player-relative one-hot 2d planes. The layers are: the + // specified player, the other player, and empty. + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + + protected: + void DoApplyAction(Action action) override; + + // Find the leader of the group. Not const due to union-find path compression. + int FindGroupLeader(int cell); + + // Join the groups of two positions, propagating group size, and edge/corner + // connections. Returns true if they were already the same group. + bool JoinGroups(int cell_a, int cell_b); + + // Do a depth first search for a ring starting at `move`. + // `left` and `right give the direction bounds for the search. A valid ring + // won't take any sharp turns, only going in one of the 3 forward directions. + // The only exception is the very beginning where we don't know the direction + // and it's valid to search in all 6 directions. 4 is enough though, since any + // valid ring can't start and end in the 2 next to each other while still + // going through `move.` + bool CheckRingDFS(const Move& move, int left, int right); + + // Turn an action id into a `Move` with an x,y. + Move ActionToMove(Action action_id) const; + + bool AllowSwap() const; + + private: + std::vector board_; + HavannahPlayer current_player_ = kPlayer1; + HavannahPlayer outcome_ = kPlayerNone; + const int board_size_; + const int board_diameter_; + const int valid_cells_; + int moves_made_ = 0; + Move last_move_ = kMoveNone; + const NeighborList& neighbors_; + const bool ansi_color_output_; + const bool allow_swap_; +}; + +// Game object. +class HavannahGame : public Game { + public: + explicit HavannahGame(const GameParameters& params); + + int NumDistinctActions() const override { + // Really diameter^2 - size*(size-1), but that's harder to represent, so + // the extra actions in the corners are never legal. + return Diameter() * Diameter(); + } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new HavannahState(shared_from_this(), board_size_, ansi_color_output_, + allow_swap_)); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + std::vector ObservationTensorShape() const override { + return {kCellStates, Diameter(), Diameter()}; + } + int MaxGameLength() const override { + // The true number of playable cells on the board. + // No stones are removed, and it is possible to draw by filling the board. + return Diameter() * Diameter() - board_size_ * (board_size_ - 1) + + allow_swap_; + } + + private: + int Diameter() const { return board_size_ * 2 - 1; } + const int board_size_; + const bool ansi_color_output_ = false; + const bool allow_swap_ = false; +}; + +} // namespace havannah +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_HAVANNAH_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/havannah/havannah_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/havannah/havannah_test.cc new file mode 100644 index 0000000..d90c696 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/havannah/havannah_test.cc @@ -0,0 +1,52 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace havannah { +namespace { + +namespace testing = open_spiel::testing; + +void BasicHavannahTests() { + testing::LoadGameTest("havannah(board_size=4)"); + testing::NoChanceOutcomesTest(*LoadGame("havannah(board_size=4)")); + testing::RandomSimTest(*LoadGame("havannah"), 10); + + // All the sizes we care about. + for (int i = 3; i <= 13; i++) { + testing::RandomSimTest( + *LoadGame(absl::StrCat("havannah(board_size=", i, ")")), 10); + } + + // Run many tests hoping swap happens at least once. + testing::RandomSimTest(*LoadGame("havannah(board_size=3,swap=True)"), 20); + + // Ansi colors! + testing::RandomSimTest( + *LoadGame("havannah", {{"board_size", GameParameter(6)}, + {"ansi_color_output", GameParameter(true)}}), + 3); + testing::RandomSimTest( + *LoadGame("havannah(board_size=5,ansi_color_output=True)"), 3); +} + +} // namespace +} // namespace havannah +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::havannah::BasicHavannahTests(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/hearts/hearts.cc b/scenarios/bargaining/open_spiel/open_spiel/games/hearts/hearts.cc new file mode 100644 index 0000000..c4b2a96 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/hearts/hearts.cc @@ -0,0 +1,771 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/hearts/hearts.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace hearts { +namespace { + +const GameType kGameType{ + /*short_name=*/"hearts", + /*long_name=*/"Hearts", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/kNumPlayers, + /*min_num_players=*/kNumPlayers, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/false, + /*provides_observation_tensor=*/false, + /*parameter_specification=*/ + { + // Pass cards at the beginning of the hand. + {"pass_cards", GameParameter(true)}, + // Cannot play hearts or QS on first trick. + {"no_pts_on_first_trick", GameParameter(true)}, + // First player to play can lead any club. + {"can_lead_any_club", GameParameter(false)}, + // -10 for taking JD. + {"jd_bonus", GameParameter(false)}, + // -5 for taking no tricks. + {"avoid_all_tricks_bonus", GameParameter(false)}, + // Must break hearts. + {"must_break_hearts", GameParameter(true)}, + // QS breaks hearts. + {"qs_breaks_hearts", GameParameter(true)}, + // If aside from QS only hearts remain, player is + // permitted to lead hearts even if hearts are + // not broken. + {"can_lead_hearts_instead_of_qs", GameParameter(false)}, + }}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new HeartsGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +open_spiel::RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +HeartsGame::HeartsGame(const GameParameters& params) + : Game(kGameType, params), + pass_cards_(ParameterValue("pass_cards")), + no_pts_on_first_trick_(ParameterValue("no_pts_on_first_trick")), + can_lead_any_club_(ParameterValue("can_lead_any_club")), + jd_bonus_(ParameterValue("jd_bonus")), + avoid_all_tricks_bonus_(ParameterValue("avoid_all_tricks_bonus")), + qs_breaks_hearts_(ParameterValue("qs_breaks_hearts")), + must_break_hearts_(ParameterValue("must_break_hearts")), + can_lead_hearts_instead_of_qs_( + ParameterValue("can_lead_hearts_instead_of_qs")) {} + +HeartsState::HeartsState(std::shared_ptr game, bool pass_cards, + bool no_pts_on_first_trick, bool can_lead_any_club, + bool jd_bonus, bool avoid_all_tricks_bonus, + bool must_break_hearts, bool qs_breaks_hearts, + bool can_lead_hearts_instead_of_qs) + : State(game), + pass_cards_(pass_cards), + no_pts_on_first_trick_(no_pts_on_first_trick), + can_lead_any_club_(can_lead_any_club), + jd_bonus_(jd_bonus), + avoid_all_tricks_bonus_(avoid_all_tricks_bonus), + qs_breaks_hearts_(qs_breaks_hearts), + must_break_hearts_(must_break_hearts), + can_lead_hearts_instead_of_qs_(can_lead_hearts_instead_of_qs), + hearts_broken_(!must_break_hearts) {} + +std::string HeartsState::ActionToString(Player player, Action action) const { + if (history_.empty()) return pass_dir_str[action]; + return CardString(action); +} + +std::string HeartsState::ToString() const { + std::string rv = "Pass Direction: "; + absl::StrAppend(&rv, pass_dir_str[static_cast(pass_dir_)], "\n\n"); + absl::StrAppend(&rv, FormatDeal()); + if (!passed_cards_[0].empty()) absl::StrAppend(&rv, FormatPass()); + if (num_cards_played_ > 0) absl::StrAppend(&rv, FormatPlay(), FormatPoints()); + return rv; +} + +std::string HeartsState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + if (IsTerminal()) return ToString(); + std::string rv = "Pass Direction: "; + absl::StrAppend(&rv, pass_dir_str[static_cast(pass_dir_)], "\n\n"); + absl::StrAppend(&rv, "Hand: \n"); + auto cards = FormatHand(player, /*mark_voids=*/true); + for (int suit = kNumSuits - 1; suit >= 0; --suit) + absl::StrAppend(&rv, cards[suit], "\n"); + if (!passed_cards_[player].empty()) absl::StrAppend(&rv, FormatPass(player)); + if (num_cards_played_ > 0) absl::StrAppend(&rv, FormatPlay(), FormatPoints()); + return rv; +} + +std::array HeartsState::FormatHand( + int player, bool mark_voids) const { + // Current hand, except in the terminal state when we use the original hand + // to enable an easy review of the whole deal. + auto deal = IsTerminal() ? initial_deal_ : holder_; + std::array cards; + for (int suit = 0; suit < kNumSuits; ++suit) { + cards[suit].push_back(kSuitChar[suit]); + cards[suit].push_back(' '); + bool is_void = true; + for (int rank = kNumCardsPerSuit - 1; rank >= 0; --rank) { + if (player == deal[Card(Suit(suit), rank)]) { + cards[suit].push_back(kRankChar[rank]); + is_void = false; + } + } + if (is_void && mark_voids) absl::StrAppend(&cards[suit], "none"); + } + return cards; +} + +std::string HeartsState::FormatDeal() const { + std::string rv; + std::array, kNumPlayers> cards; + for (auto player : {kNorth, kEast, kSouth, kWest}) + cards[player] = FormatHand(player, /*mark_voids=*/false); + constexpr int kColumnWidth = 8; + std::string padding(kColumnWidth, ' '); + for (int suit = kNumSuits - 1; suit >= 0; --suit) + absl::StrAppend(&rv, padding, cards[kNorth][suit], "\n"); + for (int suit = kNumSuits - 1; suit >= 0; --suit) + absl::StrAppend(&rv, absl::StrFormat("%-8s", cards[kWest][suit]), padding, + cards[kEast][suit], "\n"); + for (int suit = kNumSuits - 1; suit >= 0; --suit) + absl::StrAppend(&rv, padding, cards[kSouth][suit], "\n"); + return rv; +} + +std::string HeartsState::FormatPass() const { + std::string rv = "\n\nPassed Cards:"; + for (int player = 0; player < kNumPlayers; ++player) { + absl::StrAppend(&rv, "\n", DirString(player), ": "); + for (int card : passed_cards_[player]) { + absl::StrAppend(&rv, CardString(card), " "); + } + } + // Cards are not received until all players have completed passing. + // West is the last player to pass. + if (passed_cards_[static_cast(kWest)].size() == kNumCardsInPass) { + absl::StrAppend(&rv, "\n\nReceived Cards:"); + for (int receiver = 0; receiver < kNumPlayers; ++receiver) { + absl::StrAppend(&rv, "\n", DirString(receiver), ": "); + int passer = + (receiver + kNumPlayers - static_cast(pass_dir_)) % kNumPlayers; + for (int card : passed_cards_[passer]) { + absl::StrAppend(&rv, CardString(card), " "); + } + } + } + absl::StrAppend(&rv, "\n"); + return rv; +} + +std::string HeartsState::FormatPass(Player player) const { + std::string rv = "\nPassed Cards: "; + std::vector passed_cards = passed_cards_[player]; + // Sort cards because players don't have access to the order in which the + // cards were selected to be passed. Knowing the order could allow for + // information leakage. + absl::c_sort(passed_cards); + for (int card : passed_cards) absl::StrAppend(&rv, CardString(card), " "); + // Cards are not received until all players have completed passing. + // West is the last player to pass. + if (passed_cards_[static_cast(kWest)].size() == kNumCardsInPass) { + absl::StrAppend(&rv, "\n\nReceived Cards: "); + int passing_player = + (player + kNumPlayers - static_cast(pass_dir_)) % kNumPlayers; + std::vector received_cards = passed_cards_[passing_player]; + absl::c_sort(received_cards); + for (int card : received_cards) absl::StrAppend(&rv, CardString(card), " "); + } + absl::StrAppend(&rv, "\n"); + return rv; +} + +std::string HeartsState::FormatPlay() const { + SPIEL_CHECK_GT(num_cards_played_, 0); + std::string rv = "\nTricks:"; + absl::StrAppend(&rv, "\nN E S W N E S"); + for (int i = 0; i <= (num_cards_played_ - 1) / kNumPlayers; ++i) { + absl::StrAppend(&rv, "\n", std::string(3 * tricks_[i].Leader(), ' ')); + for (auto card : tricks_[i].Cards()) { + absl::StrAppend(&rv, CardString(card), " "); + } + } + return rv; +} + +std::string HeartsState::FormatPoints() const { + std::string rv; + absl::StrAppend(&rv, "\n\nPoints:"); + for (int i = 0; i < kNumPlayers; ++i) + absl::StrAppend(&rv, "\n", DirString(i), ": ", points_[i]); + return rv; +} + +void HeartsState::InformationStateTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::fill(values.begin(), values.end(), 0.0); + SPIEL_CHECK_EQ(values.size(), kInformationStateTensorSize); + if (phase_ == Phase::kPassDir || phase_ == Phase::kDeal) return; + auto ptr = values.begin(); + // Pass direction + ptr[static_cast(pass_dir_)] = 1; + ptr += kNumPlayers; + // Dealt hand + for (int i = 0; i < kNumCards; ++i) + if (initial_deal_[i] == player) ptr[i] = 1; + ptr += kNumCards; + // Passed cards + for (int card : passed_cards_[player]) ptr[card] = 1; + ptr += kNumCards; + // Received cards + // Cards are not received until all players have completed passing. + // West is the last player to pass. + if (passed_cards_[static_cast(kWest)].size() == kNumCardsInPass) { + int passer = + (player - static_cast(pass_dir_) + kNumPlayers) % kNumPlayers; + for (int card : passed_cards_[passer]) ptr[card] = 1; + } + ptr += kNumCards; + // Current hand + for (int i = 0; i < kNumCards; ++i) + if (holder_[i] == player) ptr[i] = 1; + ptr += kNumCards; + // Point totals + for (int i = 0; i < kNumPlayers; ++i) { + // Use thermometer representation instead of one-hot for point totals. + // Players can have negative points so we need to offset + for (int j = 0; j < points_[i] + std::abs(kPointsForJD); ++j) ptr[j] = 1; + ptr += kMaxScore; + } + // History of tricks, presented in the format: N E S W N E S + int current_trick = std::min(num_cards_played_ / kNumPlayers, + static_cast(tricks_.size() - 1)); + for (int i = 0; i < current_trick; ++i) { + Player leader = tricks_[i].Leader(); + ptr += leader * kNumCards; + for (auto card : tricks_[i].Cards()) { + ptr[card] = 1; + ptr += kNumCards; + } + ptr += (kNumPlayers - leader - 1) * kNumCards; + } + Player leader = tricks_[current_trick].Leader(); + if (leader != kInvalidPlayer) { + auto cards = tricks_[current_trick].Cards(); + ptr += leader * kNumCards; + for (auto card : cards) { + ptr[card] = 1; + ptr += kNumCards; + } + } + // Current trick may contain less than four cards. + if (num_cards_played_ < kNumCards) { + ptr += (kNumPlayers - (num_cards_played_ % kNumPlayers)) * kNumCards; + } + // Move to the end of current trick. + ptr += (kNumPlayers - std::max(leader, 0) - 1) * kNumCards; + // Skip over unplayed tricks. + ptr += (kNumTricks - current_trick - 1) * kTrickTensorSize; + SPIEL_CHECK_EQ(ptr, values.end()); +} + +std::vector HeartsState::LegalActions() const { + switch (phase_) { + case Phase::kPassDir: + return PassDirLegalActions(); + case Phase::kDeal: + return DealLegalActions(); + case Phase::kPass: + return PassLegalActions(); + case Phase::kPlay: + return PlayLegalActions(); + default: + return {}; + } +} + +std::vector HeartsState::PassDirLegalActions() const { + SPIEL_CHECK_EQ(history_.size(), 0); + std::vector legal_actions; + if (!pass_cards_) { + legal_actions.push_back(static_cast(PassDir::kNoPass)); + } else { + legal_actions.reserve(kNumPlayers); + for (int i = 0; i < kNumPlayers; ++i) legal_actions.push_back(i); + } + return legal_actions; +} + +std::vector HeartsState::DealLegalActions() const { + std::vector legal_actions; + legal_actions.reserve(kNumCards - num_cards_dealt_); + for (int i = 0; i < kNumCards; ++i) { + if (!holder_[i].has_value()) legal_actions.push_back(i); + } + SPIEL_CHECK_GT(legal_actions.size(), 0); + return legal_actions; +} + +std::vector HeartsState::PassLegalActions() const { + std::vector legal_actions; + legal_actions.reserve(kNumCards / kNumPlayers); + for (int card = 0; card < kNumCards; ++card) { + if (holder_[card] == current_player_) legal_actions.push_back(card); + } + return legal_actions; +} + +std::vector HeartsState::PlayLegalActions() const { + std::vector legal_actions; + legal_actions.reserve(kNumTricks - num_cards_played_ / kNumPlayers); + + // Check if we can follow suit. + if (num_cards_played_ % kNumPlayers != 0) { + auto suit = CurrentTrick().LedSuit(); + for (int rank = 0; rank < kNumCardsPerSuit; ++rank) { + if (holder_[Card(suit, rank)] == current_player_) { + legal_actions.push_back(Card(suit, rank)); + } + } + } + if (!legal_actions.empty()) return legal_actions; + + // Special rules apply to the first card played. + // Must play 2C unless option to play any club is enabled. + if (num_cards_played_ == 0) { + SPIEL_CHECK_TRUE(holder_[Card(Suit::kClubs, 0)] == current_player_); + legal_actions.push_back(Card(Suit::kClubs, 0)); + if (can_lead_any_club_) { + for (int rank = 1; rank < kNumCardsPerSuit; ++rank) { + if (holder_[Card(Suit::kClubs, rank)] == current_player_) { + legal_actions.push_back(Card(Suit::kClubs, rank)); + } + } + } + return legal_actions; + } + + // Special rules apply to the first trick. + // By default, cannot play hearts or QS on first trick. + if (no_pts_on_first_trick_ && num_cards_played_ < kNumPlayers) { + for (int card = 0; card < kNumCards; ++card) { + if (holder_[card] == current_player_ && card != Card(Suit::kSpades, 10) && + CardSuit(card) != Suit::kHearts) { + legal_actions.push_back(card); + } + } + } + if (!legal_actions.empty()) return legal_actions; + + // Player must lead. By default, cannot lead hearts until broken. + if (num_cards_played_ % kNumPlayers == 0) { + for (int card = 0; card < kNumCards; ++card) { + if (holder_[card] == current_player_) { + if (CardSuit(card) != Suit::kHearts || hearts_broken_) { + legal_actions.push_back(card); + } + } + } + // Don't force player into leading the QS when hearts have not + // been broken. + if (can_lead_hearts_instead_of_qs_ && legal_actions.size() == 1 && + legal_actions[0] == Card(Suit::kSpades, 10)) { + legal_actions.pop_back(); + } + } + if (!legal_actions.empty()) return legal_actions; + + // Otherwise, we can play any of our cards. + for (int card = 0; card < kNumCards; ++card) { + if (holder_[card] == current_player_) legal_actions.push_back(card); + } + return legal_actions; +} + +std::vector> HeartsState::ChanceOutcomes() const { + std::vector> outcomes; + if (history_.empty()) { + outcomes.reserve(kNumPlayers); + const double p = 1.0 / kNumPlayers; + for (int dir = 0; dir < kNumPlayers; ++dir) { + outcomes.emplace_back(dir, p); + } + return outcomes; + } + int num_cards_remaining = kNumCards - num_cards_dealt_; + outcomes.reserve(num_cards_remaining); + const double p = 1.0 / num_cards_remaining; + for (int card = 0; card < kNumCards; ++card) { + if (!holder_[card].has_value()) outcomes.emplace_back(card, p); + } + return outcomes; +} + +void HeartsState::DoApplyAction(Action action) { + switch (phase_) { + case Phase::kPassDir: + return ApplyPassDirAction(action); + case Phase::kDeal: + return ApplyDealAction(action); + case Phase::kPass: + return ApplyPassAction(action); + case Phase::kPlay: + return ApplyPlayAction(action); + case Phase::kGameOver: + SpielFatalError("Cannot act in terminal states"); + } +} + +// See overview in hearts.h for more information on setting the pass direction. +void HeartsState::ApplyPassDirAction(int pass_dir) { + SPIEL_CHECK_EQ(history_.size(), 0); + pass_dir_ = static_cast(pass_dir); + phase_ = Phase::kDeal; +} + +void HeartsState::ApplyDealAction(int card) { + holder_[card] = num_cards_dealt_ % kNumPlayers; + ++num_cards_dealt_; + if (num_cards_dealt_ == kNumCards) { + // Preserve the initial deal for easy retrieval + initial_deal_ = holder_; + if (pass_dir_ == PassDir::kNoPass) { + phase_ = Phase::kPlay; + // Play starts with the holder of the 2C + current_player_ = holder_[Card(Suit::kClubs, 0)].value(); + } else { + phase_ = Phase::kPass; + current_player_ = 0; + } + } +} + +void HeartsState::ApplyPassAction(int card) { + passed_cards_[current_player_].push_back(card); + holder_[card] = absl::nullopt; + if (passed_cards_[current_player_].size() % kNumCardsInPass == 0) + ++current_player_; + if (current_player_ == kNumPlayers) { + // Players have completed passing. Now let's distribute the passed cards. + for (int player = 0; player < kNumPlayers; ++player) { + for (int card : passed_cards_[player]) { + holder_[card] = (player + static_cast(pass_dir_)) % kNumPlayers; + } + } + phase_ = Phase::kPlay; + // Play starts with the holder of the 2C + current_player_ = holder_[Card(Suit::kClubs, 0)].value(); + } +} + +void HeartsState::ApplyPlayAction(int card) { + SPIEL_CHECK_TRUE(holder_[card] == current_player_); + holder_[card] = absl::nullopt; + if (num_cards_played_ % kNumPlayers == 0) { + CurrentTrick() = Trick(current_player_, card, jd_bonus_); + } else { + CurrentTrick().Play(current_player_, card); + } + // Check if action breaks hearts. + if (CardSuit(card) == Suit::kHearts) hearts_broken_ = true; + if (qs_breaks_hearts_ && card == Card(Suit::kSpades, 10)) + hearts_broken_ = true; + // Update player and point totals. + Trick current_trick = CurrentTrick(); + ++num_cards_played_; + if (num_cards_played_ % kNumPlayers == 0) { + current_player_ = current_trick.Winner(); + points_[current_player_] += current_trick.Points(); + } else { + current_player_ = (current_player_ + 1) % kNumPlayers; + } + if (num_cards_played_ == kNumCards) { + phase_ = Phase::kGameOver; + current_player_ = kTerminalPlayerId; + ComputeScore(); + } +} + +Player HeartsState::CurrentPlayer() const { + if (phase_ == Phase::kDeal) return kChancePlayerId; + return current_player_; +} + +void HeartsState::ComputeScore() { + SPIEL_CHECK_TRUE(IsTerminal()); + // Did anyone shoot the moon? + Player potential_shooter = kInvalidPlayer; + bool moon_shot = true; + for (int i = 0; i < kNumTricks; ++i) { + int points = tricks_[i].Points(); + // JD not required to shoot the moon. + if (points != 0 && points != kPointsForJD) { + // This trick must be taken by the shooter. + if (potential_shooter == kInvalidPlayer) { + potential_shooter = tricks_[i].Winner(); + } else if (potential_shooter != tricks_[i].Winner()) { + moon_shot = false; + break; + } + } + } + // Shooting the moon sets the shooter's points to 0, and adds 26 pts to each + // opponent's score. + if (moon_shot) { + for (int i = 0; i < kNumPlayers; ++i) { + points_[i] += (i == potential_shooter) ? -kTotalPositivePoints + : kTotalPositivePoints; + } + } + // Did anyone avoid taking any tricks? + if (avoid_all_tricks_bonus_ && !moon_shot) { + std::vector tricks_taken(kNumPlayers, 0); + for (int i = 0; i < kNumTricks; ++i) { + tricks_taken[tricks_[i].Winner()] += 1; + } + for (int i = 0; i < kNumPlayers; ++i) { + if (tricks_taken[i] == 0) points_[i] += kAvoidAllTricksBonus; + } + } +} + +// Hearts is a trick-avoidance game in which the goal is to accumulate the +// fewest number of points. Because RL algorithms are designed to maximize +// reward, returns are calculated by subtracting the in-game points from an +// upper bound. +std::vector HeartsState::Returns() const { + if (!IsTerminal()) { + return std::vector(kNumPlayers, 0.0); + } + std::vector returns = points_; + for (int i = 0; i < returns.size(); ++i) + returns[i] = kTotalPositivePoints - returns[i]; + return returns; +} + +absl::optional HeartsState::Played(int card) const { + if (phase_ == Phase::kPlay && !holder_[card].has_value()) { + Player p = *(initial_deal_[card]); + // check if they kept the card or not + auto it = std::find(passed_cards_[p].begin(), passed_cards_[p].end(), card); + if (it != passed_cards_[p].end()) { + p = (p + static_cast(pass_dir_)) % kNumPlayers; + } + return p; + } + + return absl::nullopt; +} + +bool HeartsState::KnowsLocation(Player player, int card) const { + bool dealt, received, played, two_clubs; + dealt = initial_deal_[card] == player; + int pass_dir = static_cast(pass_dir_); + Player recv_from = (player + kNumPlayers - pass_dir) % kNumPlayers; + auto it = std::find(passed_cards_[recv_from].begin(), + passed_cards_[recv_from].end(), card); + received = it != passed_cards_[recv_from].end() && phase_ == Phase::kPlay; + played = !holder_[card].has_value() && phase_ == Phase::kPlay; + two_clubs = card == Card(Suit::kClubs, 0) && phase_ == Phase::kPlay; + return dealt || received || played || two_clubs; +} + +// Does not account for void suit information exposed by other players during +// the play phase +std::unique_ptr HeartsState::ResampleFromInfostate( + int player_id, std::function rng) const { + std::unique_ptr clone = game_->NewInitialState(); + Action pass_dir = static_cast(pass_dir_); + clone->ApplyAction(pass_dir); + + // start by gathering all public and private info known to player_id to + // simplify the logic for applying deal / pass actions + // first thing we know is the player's entire hand + std::vector initial_hand; + for (int card = 0; card < kNumCards; card++) { + if (initial_deal_[card] == player_id) initial_hand.push_back(card); + } + + // collect cards that have been revealed through the play phase + std::vector> play_known(kNumPlayers); + if (phase_ == Phase::kPlay) { + for (int card = 0; card < kNumCards; card++) { + absl::optional p = Played(card); + if (p && *p != player_id) { + play_known[*p].push_back(card); + } + } + } + + // the two of clubs is also known once a player has played first + absl::optional two_clubs_holder = holder_[Card(Suit::kClubs, 0)]; + if (phase_ == Phase::kPlay && two_clubs_holder) { + play_known[*two_clubs_holder].push_back(Card(Suit::kClubs, 0)); + } + + // set up pass cards greedily using known cards so that passes are + // consistent + // this shouldn't affect the distribution of resampled states much because + // we have no way to model unobserved opponent pass actions anyway + std::vector> pass_actions(kNumPlayers); + for (Player p = 0; p < kNumPlayers; p++) { + for (int pass_num = 0; pass_num < passed_cards_[p].size(); pass_num++) { + if (p == player_id) { + pass_actions[p].push_back(passed_cards_[p][pass_num]); + } else { + Player pass_to = (p + pass_dir) % kNumPlayers; + // once the play phase has started, player_id knows the cards that were + // passed to them + if (phase_ == Phase::kPlay && pass_to == player_id) { + pass_actions[p].push_back(passed_cards_[p][pass_num]); + } else if (pass_num < play_known[pass_to].size()) { + pass_actions[p].push_back(play_known[pass_to][pass_num]); + } + } + } + } + + // at this point we have all the information we need about which card + // locations are known to player_id, so we can start applying deal actions + Player deal_to, pass_to, recv_from; + int card_num; + std::vector dealt(kNumCards, false); + std::vector known_dealt_counter(kNumPlayers, 0); + for (int num_dealt = 0; num_dealt < kNumCards; num_dealt++) { + card_num = num_dealt / kNumPlayers; + deal_to = num_dealt % kNumPlayers; + pass_to = (deal_to + pass_dir) % kNumPlayers; + recv_from = (deal_to + kNumPlayers - pass_dir) % kNumPlayers; + Action action = kInvalidAction; + // deal out the pass moves first so those constraints are satisfied + if (card_num < pass_actions[deal_to].size()) { + action = pass_actions[deal_to][card_num]; + } else { + // now try to find any cards that deal_to has shown they have that + // haven't already been allocated as a pass action for recv_from and have + // not already been dealt + auto& known = (deal_to == player_id) ? initial_hand : play_known[deal_to]; + while ((action == kInvalidAction || dealt[action]) && + known_dealt_counter[deal_to] < known.size()) { + action = known[known_dealt_counter[deal_to]]; + auto it = std::find(pass_actions[recv_from].begin(), + pass_actions[recv_from].end(), action); + if (it != pass_actions[recv_from].end()) action = kInvalidAction; + known_dealt_counter[deal_to]++; + } + } + + // all known card constraints for to_deal have been satisfied, so we can + // deal them a random card that does not violate other player constraints + while (action == kInvalidAction) { + Action candidate = SampleAction(clone->ChanceOutcomes(), rng()).first; + if (!KnowsLocation(player_id, candidate)) { + action = candidate; + // we can also use this card as a pass action later because its + // location is unknown + if (pass_actions[deal_to].size() < passed_cards_[deal_to].size()) { + pass_actions[deal_to].push_back(action); + } + } + } + + clone->ApplyAction(action); + dealt[action] = true; + } + + // now handle the pass phase + if (pass_dir_ != PassDir::kNoPass) { + for (Player to_move = 0; to_move < kNumPlayers; to_move++) { + SPIEL_CHECK_EQ(pass_actions[to_move].size(), + passed_cards_[to_move].size()); + pass_to = (to_move + pass_dir) % kNumPlayers; + for (int passes = 0; passes < passed_cards_[to_move].size(); passes++) { + Action action = kInvalidAction; + if (to_move == player_id || pass_to == player_id) { + // player_id knows exactly which cards were passed by player_id and + // the player who passed cards to player_id + action = passed_cards_[to_move][passes]; + } else { + action = pass_actions[to_move][passes]; + } + clone->ApplyAction(action); + } + } + } + + // given that we should now have a state consistent with the public actions + // and player_id's private cards, we can just copy the action sequence in + // the play phase + int play_start_index = kNumCards + 1; + if (pass_dir_ != PassDir::kNoPass) + play_start_index += kNumPlayers * kNumCardsInPass; + for (size_t i = play_start_index; i < history_.size(); i++) { + clone->ApplyAction(history_.at(i).action); + } + + SPIEL_CHECK_EQ(FullHistory().size(), clone->FullHistory().size()); + SPIEL_CHECK_EQ(InformationStateString(player_id), + clone->InformationStateString(player_id)); + return clone; +} + +Trick::Trick(Player leader, int card, bool jd_bonus) + : jd_bonus_(jd_bonus), + winning_rank_(CardRank(card)), + points_(CardPoints(card, jd_bonus)), + led_suit_(CardSuit(card)), + leader_(leader), + winning_player_(leader), + cards_{card} {} + +void Trick::Play(Player player, int card) { + cards_.push_back(card); + points_ += CardPoints(card, jd_bonus_); + if (CardSuit(card) == led_suit_ && CardRank(card) > winning_rank_) { + winning_rank_ = CardRank(card); + winning_player_ = player; + } +} + +} // namespace hearts +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/hearts/hearts.h b/scenarios/bargaining/open_spiel/open_spiel/games/hearts/hearts.h new file mode 100644 index 0000000..62e3171 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/hearts/hearts.h @@ -0,0 +1,248 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_HEARTS_H_ +#define OPEN_SPIEL_GAMES_HEARTS_H_ + +// Full implementation of the classic trick taking game Hearts. +// https://www.pagat.com/reverse/hearts.html +// +// Some notes on this implementation: +// +// - Pass Direction +// The direction of the pass is decided by the first chance action. If the +// "pass_cards" game parameter is set to false, the "No Pass" action will be +// the only legal action at the first chance node. +// +// In standard play, the direction of the pass alternates in +// a fixed pattern. Here, however, state is not preserved between hands, so +// the game itself cannot enforce that pattern. By using the first chance +// action to set the pass direction, the game can be dropped in to pre-existing +// algorithms without requiring modifications to coordinate training. +// +// - Returns +// Hearts is a trick-avoidance game in which the goal is to accumulate the +// fewest number of points. Because RL algorithms are designed to maximize +// reward, returns are calculated by subtracting the in-game points from an +// upper bound. + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace hearts { + +inline constexpr int kNumPlayers = 4; +inline constexpr int kNumSuits = 4; +inline constexpr int kNumCardsPerSuit = 13; +inline constexpr int kNumCards = 52; +inline constexpr int kNumCardsInPass = 3; +inline constexpr int kNumTricks = kNumCards / kNumPlayers; +inline constexpr int kPointsForHeart = 1; +inline constexpr int kPointsForQS = 13; +inline constexpr int kPointsForJD = -10; +inline constexpr int kTotalPositivePoints = 26; // All hearts + QS +inline constexpr int kMinScore = 0; +inline constexpr int kMaxScore = kTotalPositivePoints - kPointsForJD; +inline constexpr int kAvoidAllTricksBonus = -5; +inline constexpr int kTrickTensorSize = kNumCards * 7; // N E S W N E S +inline constexpr int kInformationStateTensorSize = + kNumPlayers // Pass direction + + kNumCards // Dealt hand + + kNumCards // 3 passed cards + + kNumCards // 3 received cards + + kNumCards // Current hand + + kMaxScore * kNumPlayers // Current point totals + + kNumTricks * kTrickTensorSize; // History of tricks + +enum class Suit { kClubs = 0, kDiamonds = 1, kHearts = 2, kSpades = 3 }; +enum class PassDir { kNoPass = 0, kLeft = 1, kAcross = 2, kRight = 3 }; +enum Seat { kNorth, kEast, kSouth, kWest }; +// Cards are represented as rank * kNumSuits + suit. +inline Suit CardSuit(int card) { return Suit(card % kNumSuits); } +inline int CardRank(int card) { return card / kNumSuits; } +inline int Card(Suit suit, int rank) { + return rank * kNumSuits + static_cast(suit); +} +inline int CardPoints(int card, bool jd_bonus) { + if (CardSuit(card) == Suit::kHearts) { + return kPointsForHeart; + } else if (card == Card(Suit::kSpades, 10)) { + return kPointsForQS; + } else if (card == Card(Suit::kDiamonds, 9) && jd_bonus) { + return kPointsForJD; + } else { + return 0; + } +} +constexpr char kRankChar[] = "23456789TJQKA"; +constexpr char kSuitChar[] = "CDHS"; +constexpr char kDirChar[] = "NESW"; +inline std::string DirString(int dir) { return {kDirChar[dir]}; } +inline std::string CardString(int card) { + return {kRankChar[CardRank(card)], + kSuitChar[static_cast(CardSuit(card))]}; +} +inline std::map pass_dir_str = { + {0, "No Pass"}, {1, "Left"}, {2, "Across"}, {3, "Right"}}; + +// State of a single trick. +class Trick { + public: + Trick() : Trick{kInvalidPlayer, 0, false} {} + Trick(Player leader, int card, bool jd_bonus); + void Play(Player player, int card); + Suit LedSuit() const { return led_suit_; } + Player Winner() const { return winning_player_; } + Player Leader() const { return leader_; } + int Points() const { return points_; } + std::vector Cards() const { return cards_; } + + private: + bool jd_bonus_; + int winning_rank_; + int points_; + Suit led_suit_; + Player leader_; + Player winning_player_; + std::vector cards_; +}; + +class HeartsState : public State { + public: + HeartsState(std::shared_ptr game, bool pass_cards, + bool no_pts_on_first_trick, bool can_lead_any_club, bool jd_bonus, + bool avoid_all_tricks_bonus, bool must_break_hearts, + bool qs_breaks_hearts, bool can_lead_hearts_instead_of_qs); + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + bool IsTerminal() const override { return phase_ == Phase::kGameOver; } + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override { + return std::unique_ptr(new HeartsState(*this)); + } + std::vector LegalActions() const override; + std::vector> ChanceOutcomes() const override; + std::unique_ptr ResampleFromInfostate( + int player_id, std::function rng) const override; + + int Points(Player player) const { return points_[player]; } + + protected: + void DoApplyAction(Action action) override; + + private: + enum class Phase { kPassDir, kDeal, kPass, kPlay, kGameOver }; + + std::vector PassDirLegalActions() const; + std::vector DealLegalActions() const; + std::vector PassLegalActions() const; + std::vector PlayLegalActions() const; + void ApplyPassDirAction(int pass_dir); + void ApplyDealAction(int card); + void ApplyPassAction(int card); + void ApplyPlayAction(int card); + + void ComputeScore(); + int CurrentTrickIndex() const { + return std::min(num_cards_played_ / kNumPlayers, + static_cast(tricks_.size())); + } + Trick& CurrentTrick() { return tricks_[CurrentTrickIndex()]; } + const Trick& CurrentTrick() const { return tricks_[CurrentTrickIndex()]; } + std::array FormatHand(int player, + bool mark_voids) const; + std::string FormatPlay() const; + std::string FormatPass() const; + std::string FormatPass(Player player) const; + std::string FormatDeal() const; + std::string FormatPoints() const; + + absl::optional Played(int card) const; + bool KnowsLocation(Player player, int card) const; + + const bool pass_cards_; + const bool no_pts_on_first_trick_; + const bool can_lead_any_club_; + const bool jd_bonus_; + const bool avoid_all_tricks_bonus_; + const bool qs_breaks_hearts_; + const bool must_break_hearts_; + const bool can_lead_hearts_instead_of_qs_; + + int num_cards_dealt_ = 0; + int num_cards_played_ = 0; + bool hearts_broken_ = false; + Player current_player_ = kChancePlayerId; + Phase phase_ = Phase::kPassDir; + PassDir pass_dir_ = PassDir::kNoPass; + std::array tricks_{}; + std::array, kNumCards> holder_{}; + std::array, kNumCards> initial_deal_{}; + std::vector> passed_cards_{kNumPlayers}; + std::vector points_ = std::vector(kNumPlayers, 0); +}; + +class HeartsGame : public Game { + public: + explicit HeartsGame(const GameParameters& params); + int NumDistinctActions() const override { return kNumCards; } + int MaxChanceOutcomes() const override { return kNumCards; } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new HeartsState( + shared_from_this(), /*pass_cards=*/pass_cards_, + /*no_pts_on_first_trick=*/no_pts_on_first_trick_, + /*can_lead_any_club=*/can_lead_any_club_, + /*jd_bonus=*/jd_bonus_, + /*avoid_all_tricks_bonus=*/avoid_all_tricks_bonus_, + /*must_break_hearts=*/must_break_hearts_, + /*qs_breaks_hearts=*/qs_breaks_hearts_, + /*can_lead_hearts_instead_of_qs=*/can_lead_hearts_instead_of_qs_)); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return kMinScore; } + double MaxUtility() const override { return kMaxScore; } + std::vector InformationStateTensorShape() const override { + return {kInformationStateTensorSize}; + } + int MaxGameLength() const override { + return (kNumCardsInPass * kNumPlayers) + kNumCards; + } + // TODO: verify whether this bound is tight and/or tighten it. + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + + private: + const bool pass_cards_; + const bool no_pts_on_first_trick_; + const bool can_lead_any_club_; + const bool jd_bonus_; + const bool avoid_all_tricks_bonus_; + const bool qs_breaks_hearts_; + const bool must_break_hearts_; + const bool can_lead_hearts_instead_of_qs_; +}; + +} // namespace hearts +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_HEARTS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/hearts/hearts_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/hearts/hearts_test.cc new file mode 100644 index 0000000..759bae2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/hearts/hearts_test.cc @@ -0,0 +1,306 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/hearts/hearts.h" + +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace hearts { +namespace { + +std::map BuildCardIntMap() { + std::map rv; + for (int i = 0; i < kNumCards; ++i) rv[CardString(i)] = i; + return rv; +} +std::map card_int = BuildCardIntMap(); + +void BasicGameTests() { + testing::LoadGameTest("hearts"); + testing::ChanceOutcomesTest(*LoadGame("hearts")); + testing::RandomSimTest(*LoadGame("hearts"), 10); + testing::ResampleInfostateTest(*LoadGame("hearts"), /*num_sims=*/10); + + auto observer = LoadGame("hearts") + ->MakeObserver(kInfoStateObsType, + GameParametersFromString("single_tensor")); + testing::RandomSimTestCustomObserver(*LoadGame("hearts"), observer); +} + +void ShootTheMoonTest() { + GameParameters params; + std::shared_ptr game = + open_spiel::LoadGame("hearts", params); + std::unique_ptr state = game->NewInitialState(); + std::vector actions; + actions = {static_cast(PassDir::kNoPass), + card_int["AC"], + card_int["AD"], + card_int["AH"], + card_int["AS"], + card_int["KC"], + card_int["KD"], + card_int["KH"], + card_int["KS"], + card_int["QC"], + card_int["QD"], + card_int["QH"], + card_int["QS"], + card_int["JC"], + card_int["JD"], + card_int["JH"], + card_int["JS"], + card_int["TC"], + card_int["TD"], + card_int["TH"], + card_int["TS"], + card_int["9C"], + card_int["9D"], + card_int["9H"], + card_int["9S"], + card_int["8C"], + card_int["8D"], + card_int["8H"], + card_int["8S"], + card_int["7C"], + card_int["7D"], + card_int["7H"], + card_int["7S"], + card_int["6C"], + card_int["6D"], + card_int["6H"], + card_int["6S"], + card_int["5C"], + card_int["5D"], + card_int["5H"], + card_int["5S"], + card_int["4C"], + card_int["4D"], + card_int["4H"], + card_int["4S"], + card_int["3C"], + card_int["3D"], + card_int["3H"], + card_int["3S"], + card_int["2C"], + card_int["2D"], + card_int["2H"], + card_int["2S"]}; + for (auto action : actions) state->ApplyAction(action); + state->ApplyAction(card_int["2C"]); + state->ApplyAction(card_int["AD"]); + // Check that we can play a heart even though it's the first trick because + // we only have hearts. + SPIEL_CHECK_EQ(state->LegalActions().size(), kNumCards / kNumPlayers); + state->ApplyAction(card_int["AH"]); + state->ApplyAction(card_int["AS"]); + actions = {card_int["AC"], card_int["2D"], card_int["2H"], card_int["2S"], + card_int["KC"], card_int["KD"], card_int["KH"], card_int["KS"], + card_int["QC"], card_int["QD"], card_int["QH"], card_int["QS"], + card_int["JC"], card_int["JD"], card_int["JH"], card_int["JS"], + card_int["TC"], card_int["TD"], card_int["TH"], card_int["TS"], + card_int["9C"], card_int["9D"], card_int["9H"], card_int["9S"], + card_int["8C"], card_int["8D"], card_int["8H"], card_int["8S"], + card_int["7C"], card_int["7D"], card_int["7H"], card_int["7S"], + card_int["6C"], card_int["6D"], card_int["6H"], card_int["6S"], + card_int["5C"], card_int["5D"], card_int["5H"], card_int["5S"], + card_int["4C"], card_int["4D"], card_int["4H"], card_int["4S"], + card_int["3C"], card_int["3D"], card_int["3H"], card_int["3S"]}; + for (auto action : actions) state->ApplyAction(action); + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReturn(0), kTotalPositivePoints); + SPIEL_CHECK_EQ(state->PlayerReturn(1), 0); + SPIEL_CHECK_EQ(state->PlayerReturn(2), 0); + SPIEL_CHECK_EQ(state->PlayerReturn(3), 0); +} + +std::string InformationStateTensorToString(Player player, + const std::vector& tensor) { + PassDir pass_dir; + std::array, kNumCards> dealt_hand; + std::array, kNumCards> current_hand; + std::vector passed_cards; + std::vector received_cards; + std::vector points; + std::array tricks; + + auto ptr = tensor.begin(); + // Pass dir + for (int i = 0; i < kNumPlayers; ++i) { + if (ptr[i] == 1) { + pass_dir = static_cast(i); + break; + } + } + ptr += kNumPlayers; + // Dealt hand + for (int i = 0; i < kNumCards; ++i) { + if (ptr[i] == 1) dealt_hand[i] = player; + } + ptr += kNumCards; + // Passed cards + for (int i = 0; i < kNumCards; ++i) { + if (ptr[i] == 1) passed_cards.push_back(i); + } + ptr += kNumCards; + // Received cards + for (int i = 0; i < kNumCards; ++i) { + if (ptr[i] == 1) received_cards.push_back(i); + } + ptr += kNumCards; + // Current hand + for (int i = 0; i < kNumCards; ++i) { + if (ptr[i] == 1) current_hand[i] = player; + } + ptr += kNumCards; + // Points + for (int i = 0; i < kNumPlayers; ++i) { + int player_score = kPointsForJD; + for (int j = 0; j < kMaxScore; ++j) { + if (ptr[j] == 1) ++player_score; + } + points.push_back(player_score); + ptr += kMaxScore; + } + // Trick history + Player leader; + int num_cards_played = 0; + for (int trick = 0; trick < kNumTricks; ++trick) { + leader = kInvalidPlayer; + for (int i = 0; i < kNumPlayers * kNumCards; ++i) { + if (ptr[i] == 1) { + leader = i / kNumCards; + // jd_bonus is not relevant for our purposes, set to false. + tricks[trick] = Trick(leader, i % kNumCards, false); + ++num_cards_played; + break; + } + } + if (leader != kInvalidPlayer) { + ptr += (leader + 1) * kNumCards; + for (int i = 0; i < kNumPlayers - 1; ++i) { + for (int j = 0; j < kNumCards; ++j) { + if (ptr[j] == 1) { + tricks[trick].Play((leader + i + 1) % kNumPlayers, j); + ++num_cards_played; + } + } + ptr += kNumCards; + } + ptr += (kNumPlayers - std::max(leader, 0) - 1) * kNumCards; + } else { + ptr += kTrickTensorSize; + break; + } + } + // Now build InformationStateString. + std::string rv = "Pass Direction: "; + absl::StrAppend(&rv, pass_dir_str[static_cast(pass_dir)], "\n\n"); + absl::StrAppend(&rv, "Hand: \n"); + std::array cards; + for (int suit = 0; suit < kNumSuits; ++suit) { + cards[suit].push_back(kSuitChar[suit]); + cards[suit].push_back(' '); + bool is_void = true; + for (int rank = kNumCardsPerSuit - 1; rank >= 0; --rank) { + if (player == current_hand[Card(Suit(suit), rank)]) { + cards[suit].push_back(kRankChar[rank]); + is_void = false; + } + } + if (is_void) absl::StrAppend(&cards[suit], "none"); + } + for (int suit = kNumSuits - 1; suit >= 0; --suit) + absl::StrAppend(&rv, cards[suit], "\n"); + + if (!passed_cards.empty()) { + absl::StrAppend(&rv, "\nPassed Cards: "); + for (int card : passed_cards) { + absl::StrAppend(&rv, CardString(card), " "); + } + absl::StrAppend(&rv, "\n"); + } + if (!received_cards.empty()) { + absl::StrAppend(&rv, "\nReceived Cards: "); + for (int card : received_cards) { + absl::StrAppend(&rv, CardString(card), " "); + } + absl::StrAppend(&rv, "\n"); + } + if (num_cards_played > 0) { + absl::StrAppend(&rv, "\nTricks:"); + absl::StrAppend(&rv, "\nN E S W N E S"); + for (int i = 0; i <= (num_cards_played - 1) / kNumPlayers; ++i) { + absl::StrAppend(&rv, "\n", std::string(3 * tricks[i].Leader(), ' ')); + for (auto card : tricks[i].Cards()) { + absl::StrAppend(&rv, CardString(card), " "); + } + } + absl::StrAppend(&rv, "\n\nPoints:"); + for (int i = 0; i < kNumPlayers; ++i) { + absl::StrAppend(&rv, "\n", DirString(i), ": ", points[i]); + } + } + return rv; +} + +// Build InformationStateString from InformationStateTensor and check that it +// is equal to state->InformationStateString(player). +void InformationStateTensorTest(int num_games = 100) { + std::mt19937 rng(time(0)); + GameParameters params; + params["jd_bonus"] = GameParameter(true); + std::shared_ptr game = + open_spiel::LoadGame("hearts", params); + for (int i = 0; i < num_games; ++i) { + std::unique_ptr state = game->NewInitialState(); + while (!state->IsTerminal()) { + if (state->IsChanceNode()) { + std::vector> outcomes = + state->ChanceOutcomes(); + open_spiel::Action action = + open_spiel::SampleAction(outcomes, rng).first; + state->ApplyAction(action); + } else { + auto player = state->CurrentPlayer(); + auto infostate = state->InformationStateTensor(player); + + std::string infostate_string = state->InformationStateString(player); + std::string rebuilt_infostate_string = + InformationStateTensorToString(player, infostate); + SPIEL_CHECK_EQ(infostate_string, rebuilt_infostate_string); + + std::vector actions = state->LegalActions(); + std::uniform_int_distribution<> dis(0, actions.size() - 1); + auto action = actions[dis(rng)]; + state->ApplyAction(action); + } + } + } +} + +} // namespace +} // namespace hearts +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::hearts::BasicGameTests(); + open_spiel::hearts::ShootTheMoonTest(); + open_spiel::hearts::InformationStateTensorTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/hex/hex.cc b/scenarios/bargaining/open_spiel/open_spiel/games/hex/hex.cc new file mode 100644 index 0000000..41630f1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/hex/hex.cc @@ -0,0 +1,363 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/hex/hex.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace hex { +namespace { + +// Facts about the game. +const GameType kGameType{/*short_name=*/"hex", + /*long_name=*/"Hex", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + {"board_size", GameParameter(kDefaultBoardSize)}, + {"num_cols", GameParameter(kDefaultBoardSize)}, + {"num_rows", GameParameter(kDefaultBoardSize)}, + {"string_rep", GameParameter(kDefaultStringRep)}, + }}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new HexGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +StringRep StringRepStrToEnum(const std::string& string_rep) { + if (string_rep == "standard") { + return StringRep::kStandard; + } else if (string_rep == "explicit") { + return StringRep::kExplicit; + } else { + SpielFatalError(absl::StrCat("Invalid string_rep ", string_rep)); + } +} + +} // namespace + +CellState PlayerToState(Player player) { + switch (player) { + case 0: + return CellState::kBlack; + case 1: + return CellState::kWhite; + default: + SpielFatalError(absl::StrCat("Invalid player id ", player)); + return CellState::kEmpty; + } +} + +CellState HexState::PlayerAndActionToState(Player player, Action move) const { + // This function returns the CellState resulting from the given move. + // The cell state tells us: + // - The colour of the stone. + // - If the stone results in a winning connection. + // - If the stone connects to one of the edges needed for that colour's + // winning connection. + // + // We know the colour from the argument player + // For connectedness to the edges, we check if the move is in first/last + // row/column, or if any of the neighbours are the same colour and connected. + if (player == 0) { + bool north_connected = false; + bool south_connected = false; + if (move < num_cols_) { // First row + north_connected = true; + } else if (move >= (board_.size() - num_cols_)) { // Last row + south_connected = true; + } + for (int neighbour : AdjacentCells(move)) { + if (board_[neighbour] == CellState::kBlackNorth) { + north_connected = true; + } else if (board_[neighbour] == CellState::kBlackSouth) { + south_connected = true; + } + } + if (north_connected && south_connected) { + return CellState::kBlackWin; + } else if (north_connected) { + return CellState::kBlackNorth; + } else if (south_connected) { + return CellState::kBlackSouth; + } else { + return CellState::kBlack; + } + } else if (player == 1) { + bool west_connected = false; + bool east_connected = false; + if (move % num_cols_ == 0) { // First column + west_connected = true; + } else if (move % num_cols_ == num_cols_ - 1) { // Last column + east_connected = true; + } + for (int neighbour : AdjacentCells(move)) { + if (board_[neighbour] == CellState::kWhiteWest) { + west_connected = true; + } else if (board_[neighbour] == CellState::kWhiteEast) { + east_connected = true; + } + } + if (west_connected && east_connected) { + return CellState::kWhiteWin; + } else if (west_connected) { + return CellState::kWhiteWest; + } else if (east_connected) { + return CellState::kWhiteEast; + } else { + return CellState::kWhite; + } + } else { + SpielFatalError(absl::StrCat("Invalid player id ", player)); + return CellState::kEmpty; + } +} + +std::string StateToStringStandard(CellState state) { + switch (state) { + case CellState::kEmpty: + return "."; + case CellState::kWhite: + case CellState::kWhiteWin: + case CellState::kWhiteWest: + case CellState::kWhiteEast: + return "o"; + case CellState::kBlack: + case CellState::kBlackWin: + case CellState::kBlackNorth: + case CellState::kBlackSouth: + return "x"; + default: + SpielFatalError("Unknown state."); + return "This will never return."; + } +} + +std::string StateToStringExplicit(CellState state) { + switch (state) { + case CellState::kEmpty: + return "."; + case CellState::kWhite: + return "o"; + case CellState::kWhiteWin: + return "O"; + case CellState::kWhiteWest: + return "p"; + case CellState::kWhiteEast: + return "q"; + case CellState::kBlack: + return "x"; + case CellState::kBlackWin: + return "X"; + case CellState::kBlackNorth: + return "y"; + case CellState::kBlackSouth: + return "z"; + default: + SpielFatalError("Unknown state."); + return "This will never return."; + } +} + +std::string StateToString(CellState state, StringRep string_rep) { + if (string_rep == StringRep::kExplicit) { + return StateToStringExplicit(state); + } else if (string_rep == StringRep::kStandard) { + return StateToStringStandard(state); + } else { + SpielFatalError("Unknown string_rep."); + } +} + +void HexState::DoApplyAction(Action move) { + SPIEL_CHECK_TRUE(board_[move] == CellState::kEmpty); + CellState move_cell_state = PlayerAndActionToState(CurrentPlayer(), move); + board_[move] = move_cell_state; + if (move_cell_state == CellState::kBlackWin) { + result_black_perspective_ = 1; + } else if (move_cell_state == CellState::kWhiteWin) { + result_black_perspective_ = -1; + } else if (move_cell_state != CellState::kBlack && + move_cell_state != CellState::kWhite) { + // Move is connected to an edge but not winning. + // Update edge-connected groups with a flood-fill, to maintain that all edge + // connected nodes are known about. + // We don't do flood fill if a player has won, so it's impossible for a cell + // connected to an edge to be changed by the flood-fill. + CellState cell_state_to_change = + (current_player_ == 0 ? CellState::kBlack : CellState::kWhite); + // We assume that move can safely be cast to int + std::vector flood_stack = {static_cast(move)}; + int latest_cell; + while (!flood_stack.empty()) { + latest_cell = flood_stack.back(); + flood_stack.pop_back(); + for (int neighbour : AdjacentCells(latest_cell)) { + if (board_[neighbour] == cell_state_to_change) { + // We make the change before putting the cell on the queue to avoid + // putting the same cell on the queue multiple times + board_[neighbour] = move_cell_state; + flood_stack.push_back(neighbour); + } + } + } + } + current_player_ = 1 - current_player_; +} + +std::vector HexState::LegalActions() const { + // Can move in any empty cell. + std::vector moves; + if (IsTerminal()) return moves; + for (int cell = 0; cell < board_.size(); ++cell) { + if (board_[cell] == CellState::kEmpty) { + moves.push_back(cell); + } + } + return moves; +} + +std::string HexState::ActionToString(Player player, Action action_id) const { + int row = action_id % num_cols_; + int col = action_id / num_cols_; + if (StringRep() == StringRep::kStandard) { + char row_char = static_cast(static_cast('a') + row); + std::string row_str; + row_str += row_char; + std::string ret = absl::StrCat(row_str, col + 1); + return ret; + } else if (StringRep() == StringRep::kExplicit) { + return absl::StrCat( + StateToString(PlayerAndActionToState(player, action_id), StringRep()), + "(", row, ",", col, ")"); + } else { + SpielFatalError("Unknown string_rep."); + } +} + +std::vector HexState::AdjacentCells(int cell) const { + std::vector neighbours = {}; + bool north_edge = (cell < num_cols_); + bool south_edge = (cell >= (board_.size() - num_cols_)); + bool west_edge = (cell % num_cols_ == 0); + bool east_edge = (cell % num_cols_ == num_cols_ - 1); + if (!north_edge) { neighbours.push_back(cell - num_cols_); } + if (!north_edge && !east_edge) { neighbours.push_back(cell - num_cols_ + 1); } + if (!east_edge) { neighbours.push_back(cell + 1); } + if (!south_edge) { neighbours.push_back(cell + num_cols_); } + if (!south_edge && !west_edge) { neighbours.push_back(cell + num_cols_ - 1); } + if (!west_edge) { neighbours.push_back(cell - 1); } + return neighbours; +} + +HexState::HexState(std::shared_ptr game, int num_cols, int num_rows, + enum StringRep string_rep) + : State(game), + num_cols_(num_cols), + num_rows_(num_rows), + string_rep_(string_rep) { + // for all num_colss & num_rowss -> num_colss_ >= num_rowss_ + board_.resize(num_cols * num_rows, CellState::kEmpty); +} + +std::string HexState::ToString() const { + std::string str; + // Each cell has the cell plus a space + // nth line has n spaces, and 1 "\n", except last line has no "\n" + str.reserve(num_cols_ * num_rows_ * 2 + num_rows_ * (num_rows_ + 1) / 2 - 1); + int line_num = 0; + for (int cell = 0; cell < board_.size(); ++cell) { + // if it's the first cell in a new row + if (cell && cell % num_cols_ == 0) { + absl::StrAppend(&str, "\n"); + line_num++; + absl::StrAppend(&str, std::string(line_num, ' ')); + } + absl::StrAppend(&str, StateToString(board_[cell], string_rep_)); + absl::StrAppend(&str, " "); + } + return str; +} + +bool HexState::IsTerminal() const { return result_black_perspective_ != 0; } + +std::vector HexState::Returns() const { + return {result_black_perspective_, -result_black_perspective_}; +} + +std::string HexState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string HexState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void HexState::ObservationTensor(Player player, + absl::Span values) const { + // TODO(author8): Make an option to not expose connection info + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + TensorView<2> view(values, {kCellStates, static_cast(board_.size())}, + true); + for (int cell = 0; cell < board_.size(); ++cell) { + view[{static_cast(board_[cell]) - kMinValueCellState, cell}] = 1.0; + } +} + +std::unique_ptr HexState::Clone() const { + return std::unique_ptr(new HexState(*this)); +} + +HexGame::HexGame(const GameParameters& params) + : Game(kGameType, params), + // Use board_size as the default value of num_cols and num_rows + num_cols_( + ParameterValue("num_cols", ParameterValue("board_size"))), + num_rows_( + ParameterValue("num_rows", ParameterValue("board_size"))), + string_rep_(StringRepStrToEnum( + ParameterValue("string_rep", kDefaultStringRep))) {} + +} // namespace hex +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/hex/hex.h b/scenarios/bargaining/open_spiel/open_spiel/games/hex/hex.h new file mode 100644 index 0000000..f9172a9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/hex/hex.h @@ -0,0 +1,154 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_HEX_H_ +#define OPEN_SPIEL_GAMES_HEX_H_ + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +// The classic game of Hex: https://en.wikipedia.org/wiki/Hex_(board_game) +// Does not implement pie rule to balance the game +// +// Parameters: +// "board_size" int size of the board (default = 11) +// "num_cols" int number of columns (optional) +// "num_rows" int number of rows (optional) +// "string_rep" string representation of the action and board strings +// ("standard" (default) | "explicit"). See below +// for details. + +namespace open_spiel { +namespace hex { + +// Constants. +inline constexpr int kNumPlayers = 2; +inline constexpr int kDefaultBoardSize = 11; +inline constexpr int kMaxNeighbours = + 6; // Maximum number of neighbours for a cell +inline constexpr int kCellStates = 1 + 4 * kNumPlayers; +inline constexpr int kMinValueCellState = -4; +inline constexpr const char* kDefaultStringRep = "standard"; + +// State of a cell. +// Describes if a cell is +// - empty, black or white +// - connected to N/S edges if black, or was a winning move +// - connected to E/W edges if white, or was a winning move +// These are used in calculation of winning connections, and may be useful +// features for learning agents +// +// Convention is that black plays first (and is player 0) +enum class CellState { + kEmpty = 0, + kWhiteWest = -3, + kWhiteEast = -2, + kWhiteWin = -4, + kWhite = -1, // White and not edge connected + kBlackNorth = 3, + kBlackSouth = 2, + kBlackWin = 4, + kBlack = 1, // Black and not edge connected +}; + +// The string representations of the game. Standard uses normal stones and +// chess-like action coordinates ('a1'). Explicit uses different stones +// depending on the state of each stone and uses the full cell coordinates. +enum class StringRep { + kStandard = 0, + kExplicit = 1, +}; + +// State of an in-play game. +class HexState : public State { + public: + HexState(std::shared_ptr game, int num_cols, int num_rows, + StringRep string_rep); + + HexState(const HexState&) = default; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : current_player_; + } + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + + CellState BoardAt(int cell) const { return board_[cell]; } + void ChangePlayer() { current_player_ = current_player_ == 0 ? 1 : 0; } + StringRep string_rep() const { return string_rep_; } + + protected: + std::vector board_; + void DoApplyAction(Action move) override; + + private: + CellState PlayerAndActionToState(Player player, Action move) const; + + Player current_player_ = 0; // Player zero goes first + double result_black_perspective_ = 0; // 1 if Black (player 0) wins + std::vector AdjacentCells(int cell) const; // Cells adjacent to cell + + const int num_cols_; // x + const int num_rows_; // y + const enum StringRep string_rep_; +}; + +// Game object. +class HexGame : public Game { + public: + explicit HexGame(const GameParameters& params); + int NumDistinctActions() const override { return num_cols_ * num_rows_; } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new HexState(shared_from_this(), num_cols_, num_rows_, string_rep_)); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + std::vector ObservationTensorShape() const override { + return {kCellStates, num_cols_, num_rows_}; + } + int MaxGameLength() const override { return num_cols_ * num_rows_; } + StringRep string_rep() const { return string_rep_; } + + private: + const int num_cols_; + const int num_rows_; + const enum StringRep string_rep_; +}; + +CellState PlayerToState(Player player); +std::string StateToString(CellState state, StringRep string_rep); + +} // namespace hex +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_HEX_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/hex/hex_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/hex/hex_test.cc new file mode 100644 index 0000000..3de26ac --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/hex/hex_test.cc @@ -0,0 +1,63 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace hex { +namespace { + +namespace testing = open_spiel::testing; + +void TestBoardOrientation() { + std::shared_ptr game = LoadGame( + "hex", {{"num_cols", GameParameter(3)}, {"num_rows", GameParameter(4)}}); + std::unique_ptr state = game->NewInitialState(); + state->ApplyAction(1); + state->ApplyAction(2); + state->ApplyAction(4); + state->ApplyAction(5); + state->ApplyAction(7); + state->ApplyAction(8); + state->ApplyAction(10); + // Black wins + std::cout << state << std::endl; + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReturn(0), 1.0); + SPIEL_CHECK_EQ(state->PlayerReturn(1), -1.0); +} + +void BasicHexTests() { + testing::LoadGameTest("hex(num_cols=5,num_rows=5)"); + testing::NoChanceOutcomesTest(*LoadGame("hex(num_cols=5,num_rows=5)")); + testing::RandomSimTest(*LoadGame("hex(num_cols=5,num_rows=5)"), 100); + testing::RandomSimTest(*LoadGame("hex"), 5); + testing::RandomSimTest(*LoadGame("hex(num_cols=2,num_rows=3)"), 10); + testing::RandomSimTest(*LoadGame("hex(num_cols=2,num_rows=2)"), 10); +} + +} // namespace +} // namespace hex +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::hex::BasicHexTests(); + open_spiel::hex::TestBoardOrientation(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/hive/README.md b/scenarios/bargaining/open_spiel/open_spiel/games/hive/README.md new file mode 100644 index 0000000..f85ad3d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/hive/README.md @@ -0,0 +1,128 @@ +# Hive + +Implements the base game of [Hive](https://www.gen42.com/games/hive) and its +three expansion pieces: Mosquito, Ladybug, and Pillbug. + +![Picture of playing on the console:](https://imgur.com/mkEObfx.png) + +*
Example game state viewed on the command-line (left) and with an +external viewer "Mzinga" (right)
* + +This implementation follows the rules outlined by the Universal Hive Protocol +([UHP](https://github.com/jonthysell/Mzinga/wiki/UniversalHiveProtocol)), which +means states can be serialized to and deserialized from valid UHP game strings. +With a bit of I/O handling, this can also be used as a UHP-compliant Hive +Engine, making interactions with other engines straightforward. + +## State + +### Observation Tensor + +First, the hexagonal grid needs to be represented as a rectangular one for 2D +convolution: + +![Storing a hexagonal grid in a 2D array](https://imgur.com/CIy5ctM.png) + +*
Example transformation - taken from +[RedBlobGames](https://www.redblobgames.com/grids/hexagons/#map-storage)
* + +The observation tensor then takes the form of multiple 2D feature planes +describing the board and turn state, similar to what was done for AlphaZero +chess. + +However, since Hive's "board" is a repeating hexagonal tiling, the size is +bounded only by the maximum number of tiles that can be laid in a straight line +(28 total tiles for all expansions). Yet, a grid of size 28x28 is far too large +to be computationally practical. + +To help offset the complications this would bring for training in AlphaZero, the +board can be paramaterized with `board_size` to reduce the tensor's overall +sparsity. Using a `board_size` smaller than `kMaxBoardSize` means that some +outlier games cannot be perfectly represented and are instead forced to a Draw. +In practice, games that would approach that board length are extremely rare, so +the trade-off feels acceptable. + +The 2D feature planes are one-hot encodings that indicate: + +- the presence of a particular bug type, for each player +- which bugs are pinned +- which bugs are covered +- the available positions that each player can place a new bug tile +- all 1's or all 0's to distinguish the current player's turn + +### Action Space + +**An action in Hive is described as:** 1) choosing which tile to move 2) +choosing which tile it moves next to (or on top of) 3) the relative direction of +the tile it moves next to + +*e.g.* "wA2 bL/" - *White moves their 2nd Ant to the top right edge of Black's +Ladybug* + +With there being 28 unique tiles and 7 directions (the 6 hexagonal edges and +"above"), the action space can be thought of as entries into a 3D matrix with +dimensions **7 x 28 x 28** = **5488** total actions. + +This is not a *perfect* action space representation as there are a handful of +unused actions (e.g. moving a tile next to itself?), but it does capture every +legal move. Unfortunately, with the introduction of the Pillbug, each player is +able to move their own piece *or* the enemy's, meaning we can't implicitly +expect the tile being moved to be the colour of the current player. This ends up +doubling the action space size from 7x14x28 to 7x28x28 + +## To-do + +Below are some concrete features and fixes I intend to implement to either help +speed up training or improve the interoperability between other Hive software +(e.g. displaying games directly to +[MzingaViewer](https://github.com/jonthysell/Mzinga)): + +- [ ] Address the efficiency of code that uses the most compute time + (`HiveState::GenerateValidSlides()` and `HiveState::IsGated()` from recent + perf tests) +- [ ] Implement zobrist hashing to handle a "3-repeated moves" forced draw + (unofficial community rule) +- [ ] Undo() +- [ ] Perft() +- [ ] Make it possible to load many UHP gamestrings from a file for training, + or to collect interesting game statistics +- [ ] Create a separate binary that handles I/O and behaves as a proper + UHP-compliant engine +- [ ] Provide a simplified action space for games that do not use expansion + pieces + +## Future Improvements / Thoughts + +While developing this engine, I came across many interesting ideas that have the +potential for serious progress towards a viable expert-level AZ-bot for Hive. +And as of this submission, no such Hive AI exists, making the prospect of any +improvements much more appealing. + +Below is a record of those miscellaneous thoughts, in approximate order of the +potential I think it has: + +- **Design a more exact action space**. There are a handful of other suggested + notations from the Hive community, each with their own advantages and + drawbacks, that may be useful to look into for an alternative action space. + One that looks very promising is + [Direction-Based Notation](https://psoukenik.medium.com/direction-based-notation-for-hive-dd7fd234d4d), + as it implicitly covers all rotations and reflections by design. + +- **Use a Hexagonal CNN model or filter**. One problem that has been + conveniently unaddressed is the fact that 2D convolution is performed on + Hexagonal data that has be refitted onto a square. The typical 3x3 filter + then doesn't accurately describe the 6 neighbours of a hex, as 2 extra + values are contained in the filter. One option would be to use a custom 3x3 + filter that zeroes-out the two values along the diagonal, or to attempt + using a more advanced implementation like + [HexCNN](https://arxiv.org/pdf/2101.10897) or + [Rotational-Invariant CNN](https://www.jstage.jst.go.jp/article/transinf/E107.D/2/E107.D_2023EDP7023/_pdf/-char/en). + The first option would be much easier to implement into the existing + AlphaZero framework. + +- **Attempt a graph/node-based representation**. With how a game of Hive is + structed like a graph itself, I think there is potential in using Graph + Neural Networks (GNN) for learning. Some recent research has been done by + applying + [GNNs to AlphaZero for board game AI](https://arxiv.org/pdf/2107.08387), + which indicates there is at least some proven success already. diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/hive/hive.cc b/scenarios/bargaining/open_spiel/open_spiel/games/hive/hive.cc new file mode 100644 index 0000000..7a8d15c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/hive/hive.cc @@ -0,0 +1,591 @@ +// Copyright 2025 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/hive/hive.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/match.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/hive/hive_board.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace hive { +namespace { + +// Facts about the game. +const GameType kGameType{/*short_name=*/"hive", + /*long_name=*/"Hive", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + // the radius of the underlying hexagonal grid. + // customisable to reduce computational complexity + // where needed. Max size of 14. + {"board_size", GameParameter(kDefaultBoardRadius)}, + // expansion pieces, included by default + {"uses_mosquito", GameParameter(true)}, + {"uses_ladybug", GameParameter(true)}, + {"uses_pillbug", GameParameter(true)}, + {"ansi_color_output", GameParameter(false)}, + }}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new HiveGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +HiveState::HiveState(std::shared_ptr game, int board_size, + ExpansionInfo expansions, int num_bug_types, + bool ansi_color_output) + : State(game), + board_(std::min(board_size, kMaxBoardRadius), expansions), + expansions_(expansions), + num_bug_types_(num_bug_types), + ansi_color_output_(ansi_color_output), + force_terminal_(false) {} + +std::string HiveState::ActionToString(Player player, Action action_id) const { + return ActionToMove(action_id).ToUHP(); +} + +std::string HiveState::ToString() const { + if (!ansi_color_output_) { + return Serialize(); + } + + static std::string white = "\033[38;5;223m"; // white-beige-ish + static std::string black = "\033[1;31m"; // using red to represent black + static std::string reset = "\033[1;39m"; + static float indent_size = 2.5f; + + std::string string = "\n"; + string.reserve(Board().SquareDimensions() * Board().SquareDimensions() * 5); + std::vector top_tiles; + + // loop over valid Q, R, to generate a hexagon + int radius = Board().Radius(); + for (int r = -radius; r <= radius; ++r) { + // indent based on which row we are on (r). Intentionally taking the floor + // to offset odd numbered rows + int num_spaces = std::abs(r) * indent_size; + for (int i = 0; i < num_spaces; ++i) { + absl::StrAppend(&string, " "); + } + + // print each tile on row r by iterating valid q indices + for (int q = std::max(-radius, -r - radius); + q <= std::min(radius, -r + radius); ++q) { + HiveTile tile = Board().GetTopTileAt( + {static_cast(q), static_cast(r)}); + + std::ostringstream oss; + if (tile.HasValue()) { + if (tile.GetColour() == Colour::kWhite) { + oss << white; + } else { + oss << black; + } + + std::string uhp = tile.ToUHP(); + if (Board().GetPositionOf(tile).H() > 0) { + uhp = absl::StrCat("^", uhp); + top_tiles.push_back(tile); + } + + // print the tile's UHP representation, or "-" otherwise, centered + // around a padded 5 char long string + int left_padding = (5 - uhp.size()) / 2; + int right_padding = (5 - uhp.size()) - left_padding; + for (int i = 0; i < left_padding; ++i) { + oss << ' '; + } + + oss << uhp; + + // use an asterisk to indicate this bug was most recently moved + if (tile == Board().LastMovedTile()) { + oss << "*"; + --right_padding; + } + + for (int i = 0; i < right_padding; ++i) { + oss << ' '; + } + + oss << reset; + } else { + // use an asterisk to indicate the location of the last moved tile + if (Board().LastMovedTile().HasValue() && + Board().LastMovedFrom() == HivePosition(q, r)) { + if (Board().LastMovedTile().GetColour() == Colour::kWhite) { + oss << white; + } else { + oss << black; + } + + oss << " * " << reset; + } else { + oss << " - " << reset; + } + } + absl::StrAppend(&string, oss.str()); + } + absl::StrAppend(&string, "\n\n"); + } + + // print bug stacks + for (auto tile : top_tiles) { + HivePosition pos = Board().GetPositionOf(tile); + absl::StrAppend(&string, tile.ToUHP()); + + HiveTile below = Board().GetTileBelow(pos); + while (below.HasValue()) { + absl::StrAppend(&string, " > ", below.ToUHP()); + + pos += {0, 0, -1}; + if (pos.H() <= 0) { + break; + } + + below = Board().GetTileBelow(pos); + } + + absl::StrAppend(&string, "\n"); + } + + return string; +} + +// e.g. the string "wA2 /bQ" translates to: "Move White's 2nd Ant to the +// south-west of Black's Queen" +Action HiveState::StringToAction(Player player, + const std::string& move_str) const { + // pass move? + if (move_str == "pass") { + return PassAction(); + } + + Move move; + move.direction = Direction::kNumAllDirections; + std::vector bugs = absl::StrSplit(move_str, ' '); + SPIEL_DCHECK_GT(bugs.size(), 0); + SPIEL_DCHECK_LE(bugs.size(), 2); + + // first bug should always be valid + move.from = HiveTile::UHPToTile(bugs[0]); + if (!move.from.HasValue()) { + SpielFatalError("HiveState::StringToAction() - invalid move string: " + + move_str); + } + + // special case: if only one bug is provided, it is a 1st turn move + if (bugs.size() == 1) { + return MoveToAction(move); + } + + // get second bug and its relative direction + char c = bugs[1].front(); + if (c == '\\') { + move.direction = Direction::kNW; + } else if (c == '-') { + move.direction = Direction::kW; + } else if (c == '/') { + move.direction = Direction::kSW; + } + + // check last char if we haven't found a direction + if (move.direction == Direction::kNumAllDirections) { + c = bugs[1].back(); + if (c == '\\') { + move.direction = Direction::kSE; + } else if (c == '-') { + move.direction = Direction::kE; + } else if (c == '/') { + move.direction = Direction::kNE; + } + } + + // if still no direction, it must be above + if (move.direction == Direction::kNumAllDirections) { + move.direction = Direction::kAbove; + } + + // now extract just the bug + ordinal from string + size_t start_index = bugs[1].find_first_not_of("\\-/"); + size_t end_index = bugs[1].find_last_not_of("\\-/"); + move.to = HiveTile::UHPToTile( + bugs[1].substr(start_index, end_index - start_index + 1)); + + return MoveToAction(move); +} + +std::vector HiveState::Returns() const { + bool white_winner = WinConditionMet(kPlayerWhite); + bool black_winner = WinConditionMet(kPlayerBlack); + + if (white_winner ^ black_winner) { + return {white_winner ? 1.f : -1.f, black_winner ? 1.f : -1.f}; + } + + return {0, 0}; +} + +std::string HiveState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string HiveState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void HiveState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + // starting indices for each 2D feature plane, variable based on expansions + int articulation_idx = num_bug_types_ * num_players_; + int placeable_idx = articulation_idx + 2; + int covered_idx = placeable_idx + 2; + int turn_idx = covered_idx + 2; + + // Treat values as a 3d-tensor, where each feature plane has square dimensions + // (radius * 2 + 1) x (radius * 2 + 1), and contains one-hot encodings of the + // current board state + TensorView<3> view(values, + {game_->ObservationTensorShape()[0], + Board().SquareDimensions(), Board().SquareDimensions()}, + true); + + int plane_idx = 0; + Colour my_colour = PlayerToColour(player); + Colour opposing_colour = OtherColour(my_colour); + + // populate all planes that reference a tile in play + for (auto tile : Board().GetPlayedTiles()) { + HivePosition pos = Board().GetPositionOf(tile); + std::array indices = AxialToTensorIndex(pos); + bool is_opposing = tile.GetColour() == opposing_colour; + + // bug type planes + plane_idx = BugTypeToTensorIndex(tile.GetBugType()) + + (is_opposing ? num_bug_types_ : 0); + view[{plane_idx, indices[0], indices[1]}] = 1.0f; + + // pinned plane + plane_idx = articulation_idx + (is_opposing ? 1 : 0); + if (Board().IsPinned(pos)) { + view[{plane_idx, indices[0], indices[1]}] = 1.0f; + } + + // covered plane + plane_idx = covered_idx + (is_opposing ? 1 : 0); + if (Board().IsCovered(tile)) { + view[{plane_idx, indices[0], indices[1]}] = 1.0f; + } + } + + // populate all planes that reference a specific position + int radius = Board().Radius(); + for (int r = -radius; r <= radius; ++r) { + for (int q = -radius; q <= radius; ++q) { + HivePosition pos = {static_cast(q), static_cast(r), 0}; + std::array indices = AxialToTensorIndex(pos); + + // current player's turn + view[{turn_idx, indices[0], indices[1]}] = + static_cast(current_player_); + + // player and opponent's placeable positions + if (Board().IsPlaceable(my_colour, pos)) { + view[{placeable_idx, indices[0], indices[1]}] = 1.0f; + } else if (Board().IsPlaceable(opposing_colour, pos)) { + view[{placeable_idx + 1, indices[0], indices[1]}] = 1.0f; + } + } + } +} + +std::unique_ptr HiveState::Clone() const { + return std::unique_ptr(new HiveState(*this)); +} + +std::vector HiveState::LegalActions() const { + if (IsTerminal()) { + return {}; + } + + std::vector moves; + std::set unique_actions; + + Board().GenerateAllMoves(&moves, PlayerToColour(current_player_), + move_number_); + std::transform(moves.begin(), moves.end(), + std::inserter(unique_actions, unique_actions.end()), + [this](Move& m) { return MoveToAction(m); }); + + std::vector actions(unique_actions.begin(), unique_actions.end()); + + // if a player has no legal actions, then they must pass + if (actions.empty()) { + actions.push_back(PassAction()); + } + + return actions; +} + +std::string HiveState::Serialize() const { + return absl::StrJoin( + {UHPGameString(), UHPProgressString(), UHPTurnString(), UHPMovesString()}, + ";", [](std::string* out, const absl::string_view& t) { + if (!t.empty()) { + absl::StrAppend(out, t); + } + }); +} + +Move HiveState::ActionToMove(Action action) const { + // pass action + if (action == PassAction()) { + return Move{HiveTile::kNoneTile, HiveTile::kNoneTile, + Direction::kNumAllDirections}; + } + + int64_t direction = action % Direction::kNumAllDirections; + int64_t to = (action / Direction::kNumAllDirections) % kMaxTileCount; + int64_t from = action / (kMaxTileCount * Direction::kNumAllDirections); + + // special case: for the first turn actions, they are encoded as playing a + // tile on top of itself. In this case, we want "to" to be kNoneTile + if (from == to && direction == Direction::kAbove) { + to = HiveTile::kNoneTile; + } + + return Move{HiveTile(from), HiveTile(to), static_cast(direction)}; +} + +Action HiveState::MoveToAction(Move move) const { + // pass move encoded as "moving no tile" + if (move.IsPass()) { + return PassAction(); + } + + // if there is no second bug "to", then we have a special case for first turn + if (!move.to.HasValue()) { + return (move.from * (kMaxTileCount * Direction::kNumAllDirections)) + + (move.from * Direction::kNumAllDirections) + Direction::kAbove; + } + + // as if indexing into a 3d array with indices [from][to][direction] + return (move.from * (kMaxTileCount * Direction::kNumAllDirections)) + + (move.to * Direction::kNumAllDirections) + move.direction; +} + +std::string HiveState::UHPGameString() const { + return absl::StrFormat("Base%s%s%s%s", expansions_.HasAny() ? "+" : "", + expansions_.uses_mosquito ? "M" : "", + expansions_.uses_ladybug ? "L" : "", + expansions_.uses_pillbug ? "P" : ""); +} + +std::string HiveState::UHPProgressString() const { + if (move_number_ == 0) { + return kUHPNotStarted; + } + + if (move_number_ > game_->MaxGameLength()) { + return kUHPDraw; + } + + if (IsTerminal()) { + auto returns = Returns(); + if (returns[kPlayerWhite] > returns[kPlayerBlack]) { + return kUHPWhiteWins; + } else if (returns[kPlayerWhite] < returns[kPlayerBlack]) { + return kUHPBlackWins; + } else { + return kUHPDraw; + } + } + + return kUHPInProgress; +} + +std::string HiveState::UHPTurnString() const { + return absl::StrFormat("%s[%d]", + current_player_ == kPlayerWhite ? "White" : "Black", + (move_number_ + 2) / 2); +} + +std::string HiveState::UHPMovesString() const { + return absl::StrJoin(ActionsToStrings(*this, History()), ";"); +} + +size_t HiveState::BugTypeToTensorIndex(BugType type) const { + size_t index = 0; + for (uint8_t i = 0; i < static_cast(BugType::kNumBugTypes); ++i) { + if (expansions_.IsBugTypeEnabled(static_cast(i))) { + if (type == static_cast(i)) { + return index; + } + + ++index; + } + } + + return -1; +} + +// we assume the move is valid at this point and simply apply it +void HiveState::DoApplyAction(Action action) { + if (action == PassAction()) { + Board().Pass(); + } else { + bool success = Board().MoveTile(ActionToMove(action)); + + // if something has gone wrong, force end the game as a draw + // (should only happen with with reduced board_sizes that go out of bounds) + if (!success) { + force_terminal_ = true; + } + } + + current_player_ = (++current_player_) % kNumPlayers; +} + +HiveGame::HiveGame(const GameParameters& params) + : Game(kGameType, params), + board_radius_(ParameterValue("board_size")), + num_bug_types_(kNumBaseBugTypes), + ansi_color_output_(ParameterValue("ansi_color_output")), + expansions_({ParameterValue("uses_mosquito"), + ParameterValue("uses_ladybug"), + ParameterValue("uses_pillbug")}) { + if (expansions_.uses_mosquito) { + ++num_bug_types_; + } + + if (expansions_.uses_ladybug) { + ++num_bug_types_; + } + + if (expansions_.uses_pillbug) { + ++num_bug_types_; + } +} + +std::unique_ptr HiveGame::DeserializeState( + const std::string& uhp_string) const { + std::vector tokens = absl::StrSplit(uhp_string, ';'); + SPIEL_DCHECK_GE(tokens.size(), 3); + + // first substring is the game string (e.g. "Base+MLP" for all expansions). + // since we are already inside a const game object, verify that the UHP game + // string matches what we expect it to be at this point + SPIEL_DCHECK_TRUE(absl::StrContains(tokens[0], "Base")); + if (expansions_.uses_mosquito) { + SPIEL_DCHECK_TRUE(absl::StrContains(tokens[0], "M")); + } + if (expansions_.uses_ladybug) { + SPIEL_DCHECK_TRUE(absl::StrContains(tokens[0], "L")); + } + if (expansions_.uses_pillbug) { + SPIEL_DCHECK_TRUE(absl::StrContains(tokens[0], "P")); + } + + std::unique_ptr state = NewInitialState(); + if (tokens[1] == kUHPNotStarted) { + return state; + } + + // skip tokens[2] (turn string) as it is implicitly derived from the actions + for (int i = 3; i < tokens.size(); ++i) { + state->ApplyAction(state->StringToAction(std::string(tokens[i]))); + } + + // now verify state string (tokens[1]) + if (state->IsTerminal()) { + if (state->Returns()[kPlayerWhite] > 0) { + SPIEL_DCHECK_TRUE(tokens[1] == kUHPWhiteWins); + } else if (state->Returns()[kPlayerBlack] > 0) { + SPIEL_DCHECK_TRUE(tokens[1] == kUHPBlackWins); + } else { + SPIEL_DCHECK_TRUE(tokens[1] == kUHPDraw); + } + } else { + SPIEL_DCHECK_TRUE(tokens[1] == kUHPInProgress); + } + + return state; +} + +std::pair, std::unique_ptr> +DeserializeUHPGameAndState(const std::string& uhp_string) { + auto pos = uhp_string.find(';'); + auto game_str = uhp_string.substr(0, pos); + SPIEL_DCHECK_TRUE(absl::StrContains(game_str, "Base")); + + GameParameters params{}; + params["name"] = GameParameter(kGameType.short_name); + params["uses_mosquito"] = GameParameter(absl::StrContains(game_str, "M")); + params["uses_ladybug"] = GameParameter(absl::StrContains(game_str, "L")); + params["uses_pillbug"] = GameParameter(absl::StrContains(game_str, "P")); + + auto game = LoadGame(params); + return {game, game->DeserializeState(uhp_string)}; +} + +} // namespace hive +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/hive/hive.h b/scenarios/bargaining/open_spiel/open_spiel/games/hive/hive.h new file mode 100644 index 0000000..cda95d8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/hive/hive.h @@ -0,0 +1,234 @@ +// Copyright 2025 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_HIVE_H_ +#define OPEN_SPIEL_GAMES_HIVE_H_ + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/games/hive/hive_board.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +// from https://en.wikipedia.org/wiki/Hive_(game): +// +// +// "The object of Hive is to capture the opponent's queen bee by allowing it +// to become completely surrounded by other pieces (belonging to either +// player), while avoiding the capture of one's own queen. Hive differs from +// other tile-based games in that the tiles, once placed, can then be moved to +// other positions according to various rules, much like chess pieces." +// +// +// The intent of this Hive implementation is to provide a representation of the +// board state and action space that is compatible for use in the Alpha Zero +// algorithm (or similar). +// +// This becomes particularly tricky as one of the most notable design choices +// in Hive is that it is played on an unbounded surface, with no concept of a +// grid shape or size outside of the total number of tiles present. With the +// tiles being hexagonal in shape, a classic 2D grid representation used in +// 2D convolution complicates things even further. +// +// This implementation aims to minimize the effects of such problems by +// providing bounded grid sizes to reduce computational complexity (most games +// stay within ~6 units of the initial tile in practice). More information can +// be found under the HiveBoard class. +// +// Another important feature is the support of the Universal Hive Protocol (UHP) +// (https://github.com/jonthysell/Mzinga/wiki/UniversalHiveProtocol) +// +// While not a fully compliant UHP-engine implementation (mainly missing the +// required I/O and commands), the board game and state can be serialized to and +// de-serialized from a valid UHP gamestring. This allows the ever-growing +// archive of Hive replay data to be used for model training. +// +// +// Parameters: +// "board_size" int radius of the underlying hexagonal board +// (default = 8) +// "uses_mosquito" bool Whether to use the Mosquito expansion tile. +// (default = true) +// "uses_ladybug" bool Whether to use the Ladybug expansion tile. +// (default = true) +// "uses_pillbug" bool Whether to use the Pillbug expansion tile. +// (default = true) +// "ansi_color_output" bool Whether to color the output for a terminal. +// (default = false) + +namespace open_spiel { +namespace hive { + +// There are 28 unique tiles and 7 directions a tile can be placed beside (the 6 +// hexagonal edges and "above"). So the total action space is 28 * 28 * 7 = 5488 +inline constexpr int kNumDistinctActions = 5488 + 1; // +1 for pass +inline constexpr int kNumPlayers = 2; +inline constexpr int kNumBaseBugTypes = 5; +inline constexpr int kMaxGameLength = 500; +inline constexpr const char* kUHPNotStarted = "NotStarted"; +inline constexpr const char* kUHPInProgress = "InProgress"; +inline constexpr const char* kUHPWhiteWins = "WhiteWins"; +inline constexpr const char* kUHPBlackWins = "BlackWins"; +inline constexpr const char* kUHPDraw = "Draw"; + +// State of an in-play game. +class HiveState : public State { + public: + explicit HiveState(std::shared_ptr game, + int board_size = kDefaultBoardRadius, + ExpansionInfo expansions = {}, + int num_bug_types = kNumBaseBugTypes, + bool ansi_color_output = false); + + HiveState(const HiveState&) = default; + HiveState& operator=(const HiveState&) = default; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : current_player_; + } + + // pretty prints the board state when using ansi_color_output_, and + // prints the UHP string representation otherwise + std::string ToString() const override; + + std::string ActionToString(Player player, Action action_id) const override; + Action StringToAction(Player player, + const std::string& move_str) const override; + + bool IsTerminal() const override { + return WinConditionMet(kPlayerWhite) || WinConditionMet(kPlayerBlack) || + MoveNumber() >= game_->MaxGameLength() || force_terminal_; + } + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + + // A 3d-tensor where each binary 2d-plane represents the features below. + // The # of feature planes varies based on the # of expansion tiles in use. + // Example feature plane indices with all expansion tiles: + // + // (0-7): current player's bugs in play for each of the 8 bug types + // (8-15): opposing player's bugs in play for each of the 8 bug types + // (16): current player's "pinned" bugs + // (17): opposing player's "pinned" bugs + // (18): current player's valid placement positions + // (19): opposing player's valid placement positions + // (20): current player's "covered" bugs + // (21): opposing player's "covered" bugs + // (22): all 0's if it's White's turn, and all 1's if it's Black's turn + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + + // serializes state into a valid Universal Hive Protocol (UHP) game string. + // UHP provides an interface between other Hive-playing software. Inspired + // by the Universal Chess Interface protocol used for Chess software. + // https://github.com/jonthysell/Mzinga/wiki/UniversalHiveProtocol + // e.g. GameTypeString;GameStateString;TurnString;MoveString1;...;MoveStringN + std::string Serialize() const override; + + // see above + std::string UHPGameString() const; + std::string UHPProgressString() const; + std::string UHPTurnString() const; + std::string UHPMovesString() const; + + HiveBoard& Board() { return board_; } + const HiveBoard& Board() const { return board_; } + + Move ActionToMove(Action action) const; + Action MoveToAction(Move move) const; + Action PassAction() const { return NumDistinctActions() - 1; } + + inline bool WinConditionMet(Player player) const { + return Board().IsQueenSurrounded(OtherColour(PlayerToColour(player))); + } + + protected: + void DoApplyAction(Action action) override; + + private: + // allows any combination of expansion pieces to be used for the observation + size_t BugTypeToTensorIndex(BugType type) const; + + // an axial coordinate at position (q, r) is stored at index [r][q] after + // translating the axial coordinate by the length of the radius + inline std::array AxialToTensorIndex(HivePosition pos) const { + return {pos.R() + Board().Radius(), pos.Q() + Board().Radius()}; + } + + Player current_player_ = kPlayerWhite; + HiveBoard board_; + ExpansionInfo expansions_; + int num_bug_types_; + bool ansi_color_output_; + bool force_terminal_; +}; + +class HiveGame : public Game { + public: + explicit HiveGame(const GameParameters& params); + + std::array ActionsShape() const { return {7, 28, 28}; } + int NumDistinctActions() const override { return kNumDistinctActions; } + inline std::unique_ptr NewInitialState() const override { + return std::make_unique(shared_from_this(), board_radius_, + expansions_, num_bug_types_, + ansi_color_output_); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1; } + double MaxUtility() const override { return 1; } + absl::optional UtilitySum() const override { return 0; } + + std::vector ObservationTensorShape() const override { + return {num_bug_types_ * kNumPlayers // 2 * the # of bug types in play + + 2 // articulation point planes + + 2 // placeability planes + + 2 // covered planes + + 1, // player turn plane + 2 * board_radius_ + 1, // dimensions of a sq board from hex board + 2 * board_radius_ + 1}; + } + + int MaxGameLength() const override { return kMaxGameLength; } + std::unique_ptr DeserializeState( + const std::string& uhp_string) const override; + + ExpansionInfo GetExpansionInfo() const { return expansions_; } + + private: + int board_radius_; + int num_bug_types_; + bool ansi_color_output_; + ExpansionInfo expansions_; +}; + +// helper to construct a game and state from a properly formed UHP string +std::pair, std::unique_ptr> +DeserializeUHPGameAndState(const std::string& uhp_string); + +} // namespace hive +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_HIVE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/hive/hive_board.cc b/scenarios/bargaining/open_spiel/open_spiel/games/hive/hive_board.cc new file mode 100644 index 0000000..d8a5b3b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/hive/hive_board.cc @@ -0,0 +1,926 @@ +// Copyright 2025 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/hive/hive_board.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_set.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace hive { + +HiveBoard::HiveBoard(int board_radius, ExpansionInfo expansions) + : hex_radius_(std::min(board_radius, kMaxBoardRadius)), + expansions_(expansions), + tile_grid_(SquareDimensions() * SquareDimensions()) {} + +void HiveBoard::GenerateAllMoves(std::vector* out_vec, Colour to_move, + int move_number) const { + GeneratePlacementMoves(out_vec, to_move, move_number); + + // generate legal moves for tiles in play (Queen must also be in play) + if (IsInPlay(to_move, BugType::kQueen)) { + for (auto tile : played_tiles_) { + if (tile.GetColour() == to_move && tile != last_moved_) { + GenerateMovesFor(out_vec, tile, tile.GetBugType(), to_move); + } + } + } +} + +void HiveBoard::GeneratePlacementMoves(std::vector* out, Colour to_move, + int move_number) const { + // first two moves in a game have special placement rules + // move 0: white must play a (non-queen) tile at the origin + if (move_number == 0) { + for (auto tile : HiveTile::GetTilesForColour(to_move)) { + if (tile.GetBugType() == BugType::kQueen) { + continue; + } + + if (!expansions_.IsBugTypeEnabled(tile.GetBugType())) { + continue; + } + + // playing the first tile at the origin is encoded as a move where + // a tile is placed "on top of nothing", i.e. from=tile, to=nullptr + out->emplace_back(Move{tile, HiveTile::kNoneTile, Direction::kAbove}); + } + + // move 1: black must play a (non-queen) tile next to white's first tile. + // this is the only time placing a tile next to an opponent's is allowed + } else if (move_number == 1) { + for (auto tile : HiveTile::GetTilesForColour(to_move)) { + if (tile.GetBugType() == BugType::kQueen) { + continue; + } + + if (!expansions_.IsBugTypeEnabled(tile.GetBugType())) { + continue; + } + + for (int i = 0; i < Direction::kNumCardinalDirections; ++i) { + out->emplace_back( + Move{tile, played_tiles_.front(), static_cast(i)}); + } + } + } else { + // Queen *must* be played by each player's 4th turn (8 total moves). For + // all other turns, find valid placement locations by computing a + // set difference of the player's influence positions + bool queen_placed = + move_number >= 8 || + IsInPlay(to_move == Colour::kWhite ? HiveTile::wQ : HiveTile::bQ); + for (auto tile : HiveTile::GetTilesForColour(to_move)) { + if (!expansions_.IsBugTypeEnabled(tile.GetBugType())) { + continue; + } + + if (IsInPlay(tile)) { + continue; + } + + // Queen *must* be played by each player's 4th turn + if ((move_number == 6 || move_number == 7) && !queen_placed && + tile.GetBugType() != BugType::kQueen) { + continue; + } + + // check all influence positions for validity + for (auto pos : colour_influence_[static_cast(to_move)]) { + if (GetTopTileAt(pos).HasValue()) { + continue; + } + + // skip - other player's tile is next to this spot + if (colour_influence_[static_cast(OtherColour(to_move))].contains( + pos)) { + continue; + } + + // for completeness, any neighbouring tile can be used as the reference. + // would be nice to have an alternative action space to limit this down + for (uint8_t i = 0; i < Direction::kNumCardinalDirections; ++i) { + HivePosition to_pos = pos + kNeighbourOffsets[i]; + HiveTile neighbour = GetTopTileAt(to_pos); + if (neighbour.HasValue()) { + out->emplace_back(Move{tile, neighbour, OppositeDirection(i)}); + } + } + } + } + } +} + +void HiveBoard::GenerateMovesFor(std::vector* out, HiveTile tile, + BugType acting_type, Colour to_move) const { + SPIEL_DCHECK_TRUE(expansions_.IsBugTypeEnabled(acting_type)); + + HivePosition start_pos = tile_positions_[tile]; + absl::flat_hash_set positions; + + // using an explicitly provided acting BugType to account for the Mosquito + switch (acting_type) { + case BugType::kQueen: + GenerateValidSlides(&positions, tile, start_pos, 1); + break; + + case BugType::kAnt: + GenerateValidSlides(&positions, tile, start_pos, -1); + break; + + case BugType::kGrasshopper: + GenerateValidGrasshopperPositions(&positions, tile, start_pos); + break; + + case BugType::kSpider: + GenerateValidSlides(&positions, tile, start_pos, 3); + break; + + case BugType::kBeetle: + GenerateValidClimbs(&positions, tile, start_pos); + if (start_pos.H() == 0) { + GenerateValidSlides(&positions, tile, start_pos, 1); + } + break; + + case BugType::kMosquito: + GenerateValidMosquitoPositions(out, tile, start_pos, to_move); + break; + + case BugType::kLadybug: + GenerateValidLadybugPositions(&positions, tile, start_pos); + break; + + case BugType::kPillbug: + GenerateValidSlides(&positions, tile, start_pos, 1); + GenerateValidPillbugSpecials(out, tile, start_pos); + break; + + default: + SpielFatalError("Unrecognized BugType"); + } + + // turn each position into moves by finding neighbouring tiles as reference + for (auto to_pos : positions) { + if (to_pos.H() > 0) { + // only generate kAbove moves when on top the hive + out->emplace_back(Move{tile, GetTopTileAt(to_pos), Direction::kAbove}); + } else { + // check for a valid reference tile in each cardinal direction + for (uint8_t dir = 0; dir < Direction::kNumCardinalDirections; ++dir) { + HiveTile neighbour = GetTopTileAt(to_pos + kNeighbourOffsets[dir]); + if (neighbour.HasValue()) { + if (start_pos.H() > 0 && neighbour == tile) { + // special case where the only neighbouring reference tile is itself + // on top of the stack, so use the tile directly below it + out->emplace_back( + Move{tile, GetTileBelow(start_pos), OppositeDirection(dir)}); + } else if (neighbour != tile) { + out->emplace_back(Move{tile, neighbour, OppositeDirection(dir)}); + } + } + } + } + } +} + +void HiveBoard::GenerateValidSlides(absl::flat_hash_set* out, + HiveTile tile, HivePosition start_pos, + int distance) const { + if (IsPinned(tile) || IsCovered(tile)) { + return; + } + + const bool unlimited_distance = distance < 0; + absl::flat_hash_set visited; + + auto dfs = [&](auto& dfs, HivePosition pos, Direction from, + int depth) -> void { + if (visited.contains(pos) || (!unlimited_distance && depth > distance)) { + return; + } + + // validate positions breadth-first + for (uint8_t dir = 0; dir < Direction::kNumCardinalDirections; ++dir) { + HivePosition to_test = pos + kNeighbourOffsets[dir]; + HiveTile test_tile = GetTopTileAt(to_test); + + if (dir == from) { + continue; + } + + if (visited.contains(to_test)) { + continue; + } + + // all must be false to be a valid slide direction + if (test_tile.HasValue() || + IsGated(pos, static_cast(dir), start_pos) || + !IsConnected(to_test, start_pos)) { + continue; + } + + if (depth == distance || unlimited_distance) { + out->insert(to_test); + } + } + + if (depth == distance) { + return; + } + + visited.insert(pos); + + // traverse depth-first + for (uint8_t dir = 0; dir < Direction::kNumCardinalDirections; ++dir) { + HivePosition to_test = pos + kNeighbourOffsets[dir]; + HiveTile test_tile = GetTopTileAt(to_test); + + if (dir == from) { + continue; + } + + if (visited.contains(to_test)) { + continue; + } + + // all must be false to be a valid slide direction + if (test_tile.HasValue() || + IsGated(pos, static_cast(dir), start_pos) || + !IsConnected(to_test, start_pos)) { + continue; + } + + if (depth == distance || unlimited_distance) { + out->insert(to_test); + } + + dfs(dfs, to_test, OppositeDirection(dir), depth + 1); + + if (!unlimited_distance) { + visited.erase(to_test); + } + } + }; + + dfs(dfs, start_pos, Direction::kNumAllDirections, 1); +} + +void HiveBoard::GenerateValidClimbs(absl::flat_hash_set* out, + HiveTile tile, + HivePosition start_pos) const { + if (IsPinned(tile) || IsCovered(tile)) { + return; + } + + HivePosition ground_pos = start_pos.Grounded(); + + // find the top tile, or an empty position, in each adjacent position + for (uint8_t d = 0; d < Direction::kNumCardinalDirections; ++d) { + HiveTile neighbour = GetTopTileAt(ground_pos + kNeighbourOffsets[d]); + if (neighbour.HasValue()) { + HivePosition to_pos = + tile_positions_[neighbour].NeighbourAt(Direction::kAbove); + // climbing UP: check for a gate at the *target*'s height + if (to_pos.H() > start_pos.H() && + !IsGated({start_pos.Q(), start_pos.R(), to_pos.H()}, + static_cast(d))) { + out->insert(to_pos); + // climbing DOWN or across: check for gate at *this* tile's height + } else if (to_pos.H() <= start_pos.H() && + !IsGated(start_pos, + static_cast( + d)) /*&& !position_cache_.contains(to_pos)*/) { + out->insert(to_pos); + } + } else { + HivePosition to_pos = ground_pos + kNeighbourOffsets[d]; + // climbing DOWN to empty space: check for a gate at *this* tile's height + if (to_pos.H() < start_pos.H() && + !IsGated(start_pos, + static_cast( + d)) /*&& !position_cache_.contains(to_pos)*/) { + out->insert(to_pos); + } + } + } +} + +void HiveBoard::GenerateValidGrasshopperPositions( + absl::flat_hash_set* out, HiveTile tile, + HivePosition start_pos) const { + if (IsPinned(tile) || IsCovered(tile)) { + return; + } + + // in each cardinal direction that contains a tile, jump over all tiles in + // that direction until reaching an empty space to land + for (uint8_t d = 0; d < Direction::kNumCardinalDirections; ++d) { + bool found = false; + HivePosition to_test = start_pos + kNeighbourOffsets[d]; + while (GetTopTileAt(to_test).HasValue()) { + to_test += kNeighbourOffsets[d]; + found = true; + } + + if (found) { + out->insert(to_test); + } + } +} + +void HiveBoard::GenerateValidLadybugPositions( + absl::flat_hash_set* out, HiveTile tile, + HivePosition start_pos) const { + if (IsPinned(tile) || IsCovered(tile)) { + return; + } + + // A lady bug moves in *exactly* 3 distinct steps: a climb onto the hive, + // a slide/climb across the hive, and a climb down from the hive + absl::flat_hash_set intermediates1; + absl::flat_hash_set intermediates2; + absl::flat_hash_set intermediates3; + + // step 1 + GenerateValidClimbs(&intermediates1, tile, start_pos); + + // step 2 + for (auto pos : intermediates1) { + GenerateValidClimbs(&intermediates2, tile, pos); + } + + // step 3 + for (auto pos : intermediates2) { + // ensure destination is above the hive but not "above itself" + if (pos.H() == 0 || + pos == start_pos + kNeighbourOffsets[Direction::kAbove]) { + continue; + } + + GenerateValidClimbs(&intermediates3, tile, pos); + } + + // only consider moves that finish on ground level + for (auto pos : intermediates3) { + if (pos.H() == 0) { + out->insert(pos); + } + } +} + +void HiveBoard::GenerateValidMosquitoPositions(std::vector* out, + HiveTile tile, + HivePosition start_pos, + Colour to_move) const { + // we defer IsPinned() check as the Mosquito could still use a Pillbug special + if (IsCovered(tile)) { + return; + } + + // when on top of the hive, a Mosquito can only act as a Beetle + if (start_pos.H() > 0) { + GenerateMovesFor(out, tile, BugType::kBeetle, to_move); + return; + } + + // Mosquitos copy the movement capabilities of any adjacent bug type + std::array(BugType::kNumBugTypes)> types_seen{}; + for (auto neighbour : NeighboursOf(start_pos)) { + BugType type = neighbour.GetBugType(); + + if (!types_seen[static_cast(type)]) { + types_seen[static_cast(type)] = true; + + if (type == BugType::kMosquito) { + continue; + } + + // Queen and Spider moves are strict subsets of an Ant's moves + if ((type == BugType::kQueen || type == BugType::kSpider) && + types_seen[static_cast(BugType::kAnt)]) { + continue; + } + + GenerateMovesFor(out, tile, type, to_move); + } + } +} + +void HiveBoard::GenerateValidPillbugSpecials(std::vector* out, + HiveTile tile, + HivePosition start_pos) const { + // Pillbug can still perform its special when Pinned + if (IsCovered(tile)) { + return; + } + + std::vector valid_targets; + std::vector valid_positions; + + for (uint8_t dir = 0; dir < Direction::kNumCardinalDirections; ++dir) { + // ensure there is no "gate" blocking above for this direction + if (IsGated(start_pos + kNeighbourOffsets[Direction::kAbove], + static_cast(dir))) { + continue; + } + + HivePosition test_pos = start_pos + kNeighbourOffsets[dir]; + HiveTile test_tile = GetTopTileAt(test_pos); + if (test_tile.HasValue()) { + // valid IFF the target tile is not: Pinned, Covered, the LastMovedTile, + // or above the hive + if (!IsPinned(test_tile) && !IsCovered(test_tile) && + test_tile != LastMovedTile() && GetPositionOf(test_tile).H() == 0) { + valid_targets.push_back(test_tile); + } + } else { + valid_positions.push_back(test_pos); + } + } + + // for every target_tile, add a move to every valid position by checking + // that position for its neighbours + for (auto target_tile : valid_targets) { + for (auto target_pos : valid_positions) { + for (uint8_t dir = 0; dir < Direction::kNumCardinalDirections; ++dir) { + HiveTile ref_tile = GetTopTileAt(target_pos + kNeighbourOffsets[dir]); + if (ref_tile.HasValue() && ref_tile != target_tile) { + out->emplace_back( + Move{target_tile, ref_tile, + OppositeDirection(static_cast(dir))}); + } + } + } + } +} + +std::vector HiveBoard::NeighboursOf(HivePosition pos, + HivePosition to_ignore) const { + std::vector neighbours; + for (auto neighbour : pos.Neighbours()) { + auto tile = GetTopTileAt(neighbour); + if (tile.HasValue()) { + neighbours.push_back(tile); + } + } + + return neighbours; +} + +bool HiveBoard::MoveTile(Move move) { + SPIEL_DCHECK_TRUE(move.from.HasValue()); + + // compute the final position from the reference tile + direction + HivePosition new_pos; + if (move.to.HasValue()) { + new_pos = tile_positions_[move.to] + kNeighbourOffsets[move.direction]; + + if (IsOutOfBounds(new_pos)) { + if (RecenterBoard(new_pos)) { + new_pos = tile_positions_[move.to] + kNeighbourOffsets[move.direction]; + } else { + // if the new position is still out of bounds, force terminate the game + // as a draw (possible with board_sizes smaller than kMaxBoardRadius) + return false; + } + } + + // if the reference tile was higher on the hive, the new_pos may need to + // "fall down" until it hits either the ground or another tile + if (new_pos.H() > 0) { + HiveTile top_tile = GetTopTileAt(new_pos); + if (top_tile.HasValue()) { + new_pos.SetH(tile_positions_[top_tile].H() + 1); + } else { + new_pos.SetH(0); + } + } + + } else { + // having no "to" tile encodes the opening move at the origin + new_pos = kOriginPosition; + } + + HivePosition old_pos = tile_positions_[move.from]; + if (old_pos == kNullPosition) { + played_tiles_.push_back(move.from); + } + + if (new_pos != old_pos) { + last_moved_from_ = old_pos; + } + + size_t old_idx = AxialToIndex(old_pos); + size_t new_idx = AxialToIndex(new_pos); + + // if a tile already exists at the new position, it's now condsidered covered + if (tile_grid_[new_idx].HasValue()) { + for (int i = 0; i < covered_tiles_.size(); ++i) { + if (!covered_tiles_[i].HasValue()) { + covered_tiles_[i] = tile_grid_[new_idx]; + break; + } + } + } + + // perform the move + tile_grid_[new_idx] = move.from; + tile_positions_[move.from] = new_pos; + last_moved_ = move.from; + + // potentially reinstate a covered tile at the old position + if (old_pos.H() > 0) { + // reverse iterating guarantees the first tile found has the next highest H + for (int i = covered_tiles_.size() - 1; i >= 0; --i) { + if (covered_tiles_[i] == HiveTile::kNoneTile) { + continue; + } + + if (old_pos.Grounded() == GetPositionOf(covered_tiles_[i]).Grounded()) { + tile_grid_[old_idx] = covered_tiles_[i]; + covered_tiles_[i] = HiveTile::kNoneTile; + + // left-rotate the kNoneTile to the end of the covered_tiles_ array + // to maintain height order + std::rotate(covered_tiles_.begin() + i, covered_tiles_.begin() + i + 1, + covered_tiles_.end()); + break; + } + } + } else if (old_pos != kNullPosition) { + tile_grid_[old_idx] = HiveTile::kNoneTile; + } + + // update influence of the moved tile's colour. Potentially have to update + // both influences if the moved tile was part of a stack + UpdateInfluence(move.from.GetColour()); + if (old_pos.H() > 0 || new_pos.H() > 0) { + UpdateInfluence(OtherColour(move.from.GetColour())); + } + + UpdateArticulationPoints(); + + return true; +} + +bool HiveBoard::IsOutOfBounds(HivePosition pos) const { + return pos.DistanceTo(kOriginPosition) > hex_radius_; +} + +bool HiveBoard::RecenterBoard(HivePosition new_pos) { + int8_t max_Q = 0; + int8_t min_Q = 0; + int8_t max_R = 0; + int8_t min_R = 0; + int8_t max_S = 0; + int8_t min_S = 0; + + for (auto tile : played_tiles_) { + HivePosition pos = GetPositionOf(tile); + max_Q = std::max(max_Q, pos.Q()); + min_Q = std::min(min_Q, pos.Q()); + max_R = std::max(max_R, pos.R()); + min_R = std::min(min_R, pos.R()); + max_S = std::max(max_S, pos.S()); + min_S = std::min(min_S, pos.S()); + } + + // determine the new "center" by averaging each axis and round + // to the nearest integer hex coordinate + double avg_Q = (max_Q + min_Q) / 2.0; + double avg_R = (max_R + min_R) / 2.0; + double avg_S = (max_S + min_S) / 2.0; + + int8_t round_Q = std::round(avg_Q); + int8_t round_R = std::round(avg_R); + int8_t round_S = std::round(avg_S); + + double diff_Q = std::abs(round_Q - avg_Q); + double diff_R = std::abs(round_R - avg_R); + double diff_S = std::abs(round_S - avg_S); + + if (diff_Q > diff_R && diff_Q > diff_S) { + round_Q = -round_R - round_S; + } else if (diff_R > diff_S) { + round_R = -round_Q - round_S; + } + + HivePosition offset = HivePosition(-round_Q, -round_R); + + // there are no valid directions to reposition the board without going OOB + if (offset == kOriginPosition || IsOutOfBounds(new_pos + offset)) { + return false; + } + + // apply this offset to each valid position + bool oob = false; + std::for_each(played_tiles_.begin(), played_tiles_.end(), + [this, offset, &oob](HiveTile tile) { + tile_positions_[tile] += offset; + + // this usually occurs when tiles exist at each axes' extremes + if (IsOutOfBounds(tile_positions_[tile])) { + oob = true; + } + }); + + if (oob) { + return false; + } + + // recalculate grid indices + std::fill(tile_grid_.begin(), tile_grid_.end(), HiveTile::kNoneTile); + for (uint8_t i = HiveTile::wQ; i < HiveTile::kNumTiles; ++i) { + if (IsInPlay(i) && !IsCovered(i)) { + tile_grid_[AxialToIndex(GetPositionOf(i))] = i; + } + } + + UpdateInfluence(Colour::kWhite); + UpdateInfluence(Colour::kBlack); + UpdateArticulationPoints(); + + return true; +} + +void HiveBoard::Pass() { + last_moved_ = HiveTile::kNoneTile; + last_moved_from_ = kNullPosition; +} + +bool HiveBoard::IsQueenSurrounded(Colour c) const { + HiveTile queen = c == Colour::kWhite ? HiveTile::wQ : HiveTile::bQ; + if (!IsInPlay(c, BugType::kQueen)) { + return false; + } + + for (auto neighbour_pos : tile_positions_[queen].Neighbours()) { + if (GetTopTileAt(neighbour_pos) == HiveTile::kNoneTile) { + return false; + } + } + + return true; +} + +// tile accessor with bounds checking +HiveTile HiveBoard::GetTopTileAt(HivePosition pos) const { + if (pos.DistanceTo(kOriginPosition) > Radius()) { + return HiveTile::kNoneTile; + } + + SPIEL_DCHECK_GE(AxialToIndex(pos), 0); + SPIEL_DCHECK_LT(AxialToIndex(pos), tile_grid_.size()); + return tile_grid_[AxialToIndex(pos)]; +} + +HiveTile HiveBoard::GetTileBelow(HivePosition pos) const { + SPIEL_DCHECK_TRUE(pos.H() > 0); + + HivePosition below = pos - kNeighbourOffsets[Direction::kAbove]; + // first check the top tile at this axial position + if (GetPositionOf(GetTopTileAt(below)) == below) { + return GetTopTileAt(below); + } + + // otherwise, check the covered_tiles_ list + for (auto tile : covered_tiles_) { + if (tile.HasValue() && tile_positions_[tile] == below) { + return tile; + } + } + + return HiveTile::kNoneTile; +} + +// IsGated verifies requirement (3) in GenerateValidSlides() +bool HiveBoard::IsGated(HivePosition pos, Direction d, + HivePosition to_ignore) const { + HivePosition cw = pos + kNeighbourOffsets[ClockwiseDirection(d)]; + HivePosition ccw = pos + kNeighbourOffsets[CounterClockwiseDirection(d)]; + + bool cw_exists = + cw != to_ignore && GetPositionOf(GetTopTileAt(cw)).H() >= pos.H(); + bool ccw_exists = + ccw != to_ignore && GetPositionOf(GetTopTileAt(ccw)).H() >= pos.H(); + return pos.H() == 0 ? cw_exists == ccw_exists : cw_exists && ccw_exists; +} + +bool HiveBoard::IsConnected(HivePosition pos, HivePosition to_ignore) const { + return !NeighboursOf(pos, to_ignore).empty(); +} + +bool HiveBoard::IsCovered(HivePosition pos) const { + return std::any_of( + covered_tiles_.begin(), covered_tiles_.end(), + [this, pos](HiveTile tile) { return GetPositionOf(tile) == pos; }); +} + +bool HiveBoard::IsCovered(HiveTile tile) const { + return tile.HasValue() && + std::find(covered_tiles_.begin(), covered_tiles_.end(), tile) != + covered_tiles_.end(); +} + +bool HiveBoard::IsPinned(HivePosition pos) const { + return articulation_points_.contains(pos); +} + +bool HiveBoard::IsPinned(HiveTile tile) const { + return tile.HasValue() && IsPinned(tile_positions_[tile]); +} + +bool HiveBoard::IsPlaceable(Colour c, HivePosition pos) const { + return colour_influence_[static_cast(c)].contains(pos) && + !colour_influence_[static_cast(OtherColour(c))].contains(pos) && + !IsInPlay(GetTopTileAt(pos)); +} + +// clear and recalculate this tile's player's influence range +void HiveBoard::UpdateInfluence(Colour c) { + colour_influence_[static_cast(c)].clear(); + for (auto tile : played_tiles_) { + if (tile.GetColour() != c) { + continue; + } + + // if a tile is covered, it has no influence + if (IsCovered(tile)) { + continue; + } + + // exert influence on all neighbouring positions + for (auto pos : tile_positions_[tile].Neighbours()) { + // 0 out the height, so that stacked tiles influence the ground tiles + // around them, not tiles floating in air + colour_influence_[static_cast(c)].insert(pos.Grounded()); + } + } +} + +void HiveBoard::UpdateArticulationPoints() { + articulation_points_.clear(); + + int visit_order = 0; + absl::flat_hash_set visited; + absl::flat_hash_map entry_point; + absl::flat_hash_map low_point; + + auto dfs = [&](auto& dfs, HivePosition vertex, HivePosition parent, + bool is_root) -> void { + visited.insert(vertex); + entry_point[vertex] = low_point[vertex] = visit_order; + ++visit_order; + + int children = 0; + for (uint8_t dir = 0; dir < Direction::kNumCardinalDirections; ++dir) { + HivePosition to_vertex = vertex + kNeighbourOffsets[dir]; + if (!GetTopTileAt(to_vertex).HasValue()) { + continue; + } + + if (to_vertex == parent) { + continue; + } + + if (visited.contains(to_vertex)) { + low_point[vertex] = std::min(low_point[vertex], entry_point[to_vertex]); + } else { + dfs(dfs, to_vertex, vertex, false); + ++children; + low_point[vertex] = std::min(low_point[vertex], low_point[to_vertex]); + if (low_point[to_vertex] >= entry_point[vertex] && !is_root) { + articulation_points_.insert(vertex); + } + } + } + + if (is_root && children > 1) { + articulation_points_.insert(vertex); + } + }; + + // any arbitrary starting point would do, but the Queen is guaranteed to be + // in play when generating moves + dfs(dfs, tile_positions_[HiveTile::wQ], kNullPosition, true); +} + +std::string HiveTile::ToUHP() const { + SPIEL_DCHECK_TRUE(HasValue()); + std::string uhp = ""; + + // colour + GetColour() == Colour::kWhite ? absl::StrAppend(&uhp, "w") + : absl::StrAppend(&uhp, "b"); + + // bug type + BugType type = GetBugType(); + switch (type) { + case BugType::kQueen: + absl::StrAppend(&uhp, "Q"); + break; + case BugType::kAnt: + absl::StrAppend(&uhp, "A"); + break; + case BugType::kGrasshopper: + absl::StrAppend(&uhp, "G"); + break; + case BugType::kSpider: + absl::StrAppend(&uhp, "S"); + break; + case BugType::kBeetle: + absl::StrAppend(&uhp, "B"); + break; + case BugType::kLadybug: + absl::StrAppend(&uhp, "L"); + break; + case BugType::kMosquito: + absl::StrAppend(&uhp, "M"); + break; + case BugType::kPillbug: + absl::StrAppend(&uhp, "P"); + break; + default: + SpielFatalError("HiveTile::ToUHP() - HiveTile has an invalid bug type!"); + } + + // bug type ordinal (for bugs where there can be more than 1) + if (type == BugType::kAnt || type == BugType::kGrasshopper || + type == BugType::kSpider || type == BugType::kBeetle) { + absl::StrAppend(&uhp, GetOrdinal()); + } + + return uhp; +} + +// UHP string representation of a move +std::string Move::ToUHP() { + // special case: pass for when a player has no possible legal moves + if (IsPass()) { + return "pass"; + } + + // special case: for the first turn, there is no reference tile 'to' + if (!to.HasValue()) { + return from.ToUHP(); + } + + std::string reference_tile_uhp = to.ToUHP(); + std::string offset_formatted = ""; + + // add a prefix or suffix depending on the relative position + switch (direction) { + case Direction::kNE: + offset_formatted = reference_tile_uhp + "/"; + break; + case Direction::kE: + offset_formatted = reference_tile_uhp + "-"; + break; + case Direction::kSE: + offset_formatted = reference_tile_uhp + "\\"; + break; + case Direction::kSW: + offset_formatted = "/" + reference_tile_uhp; + break; + case Direction::kW: + offset_formatted = "-" + reference_tile_uhp; + break; + case Direction::kNW: + offset_formatted = "\\" + reference_tile_uhp; + break; + case Direction::kAbove: + offset_formatted = reference_tile_uhp; + break; + default: + SpielFatalError("Move::ToUHP() - Move has an invalid direction!"); + } + + return absl::StrCat(from.ToUHP(), " ", offset_formatted); +} + +} // namespace hive +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/hive/hive_board.h b/scenarios/bargaining/open_spiel/open_spiel/games/hive/hive_board.h new file mode 100644 index 0000000..8a53237 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/hive/hive_board.h @@ -0,0 +1,682 @@ +// Copyright 2025 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_HIVE_BOARD_H_ +#define OPEN_SPIEL_GAMES_HIVE_BOARD_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/base/attributes.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_set.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace hive { + +enum class BugType : uint8_t { + kQueen = 0, + kAnt, + kGrasshopper, + kSpider, + kBeetle, + kMosquito, + kLadybug, + kPillbug, + kNumBugTypes, + kNone, +}; + +enum Direction : uint8_t { + kNE = 0, + kE, + kSE, + kSW, + kW, + kNW, + kAbove, + kNumCardinalDirections = kAbove, // syntactic sugar for iterating + kNumAllDirections +}; + +enum class Colour { kWhite, kBlack }; + +struct ExpansionInfo { + bool uses_mosquito; + bool uses_ladybug; + bool uses_pillbug; + + bool HasAny() const { return uses_mosquito || uses_ladybug || uses_pillbug; } + bool IsBugTypeEnabled(BugType type) const { + switch (type) { + case BugType::kQueen: + case BugType::kAnt: + case BugType::kGrasshopper: + case BugType::kSpider: + case BugType::kBeetle: + return true; + case BugType::kMosquito: + return uses_mosquito; + case BugType::kLadybug: + return uses_ladybug; + case BugType::kPillbug: + return uses_pillbug; + default: + return false; + } + } +}; + +// HivePosition +// +// Describes as position using the Axial coordinate system (q,r) as well as +// a height to account for beetles/mosquitos on top of the hive +// https://www.redblobgames.com/grids/hexagons/#coordinates-axial +class HivePosition { + public: + // default initialization to kNullPosition + constexpr HivePosition() : q_(0), r_(0), h_(-1) {} + constexpr HivePosition(int8_t q, int8_t r, int8_t h = 0) + : q_(q), r_(r), h_(h) {} + + int8_t Q() const { return q_; } + int8_t R() const { return r_; } + + // height above the hive, where 0 == "ground" + int8_t H() const { return h_; } + + // implicit 3rd axial coordinate S to maintain constraint: q + r + s = 0 + int8_t S() const { return -q_ - r_; } + + int DistanceTo(HivePosition other) const { + return (std::abs(q_ - other.q_) + + std::abs((q_ - other.q_) + (r_ - other.r_)) + + std::abs(r_ - other.r_)) / + 2; + } + + bool operator==(HivePosition other) const { + return q_ == other.q_ && r_ == other.r_ && h_ == other.h_; + } + + bool operator!=(HivePosition other) const { return !operator==(other); } + + HivePosition operator+(HivePosition other) const { + return HivePosition(q_ + other.q_, r_ + other.r_, h_ + other.h_); + } + + HivePosition operator-(HivePosition other) const { + return HivePosition(q_ - other.q_, r_ - other.r_, h_ - other.h_); + } + + HivePosition& operator+=(HivePosition other) { + q_ += other.q_; + r_ += other.r_; + h_ += other.h_; + + return *this; + } + + std::string ToString() const { + return absl::StrCat("(", std::to_string(q_), ", ", std::to_string(r_), ", ", + std::to_string(h_), ")"); + } + + std::array Neighbours() + const { + return {{{static_cast(q_ + 1), static_cast(r_ - 1)}, + {static_cast(q_ + 1), static_cast(r_)}, + {static_cast(q_), static_cast(r_ + 1)}, + {static_cast(q_ - 1), static_cast(r_ + 1)}, + {static_cast(q_ - 1), static_cast(r_)}, + {static_cast(q_), static_cast(r_ - 1)}}}; + } + + HivePosition NeighbourAt(Direction dir) const; + HivePosition Grounded() const { return {q_, r_, 0}; } + + void SetQ(int8_t q) { q_ = q; } + void SetR(int8_t r) { r_ = r; } + void SetH(int8_t h) { h_ = h; } + + private: + int8_t q_; + int8_t r_; + int8_t h_; +}; + +inline constexpr int kMaxTileCount = 28; +inline constexpr int kMaxBoardRadius = 14; +inline constexpr int kDefaultBoardRadius = 8; +inline constexpr std::array(BugType::kNumBugTypes)> + kBugCounts = {{1, 3, 3, 2, 2, 1, 1, 1}}; +inline constexpr Player kPlayerWhite = 0; +inline constexpr Player kPlayerBlack = 1; +inline constexpr HivePosition kOriginPosition{0, 0, 0}; +inline constexpr HivePosition kNullPosition{0, 0, -1}; + +// support hashing for HivePosition +template +H AbslHashValue(H state, HivePosition pos) { + return H::combine(std::move(state), pos.Q(), pos.R(), pos.H()); +} + +// All offsets starting at top-right neighbour, and then rotating clockwise, +// plus above for beetles/mosquitos +// 5 0 +// 4 1 +// 3 2 +constexpr std::array + kNeighbourOffsets = { + // NE E SE SW W NW Above + {{1, -1}, {1, 0}, {0, 1}, {-1, 1}, {-1, 0}, {0, -1}, {0, 0, 1}}}; + +inline HivePosition HivePosition::NeighbourAt(Direction dir) const { + return operator+(kNeighbourOffsets[dir]); +} + +inline std::ostream& operator<<(std::ostream& stream, HivePosition pos) { + return stream << pos.ToString(); +} + +inline Player OtherPlayer(Player p) { + SPIEL_DCHECK_TRUE(p != kInvalidPlayer); + return p == kPlayerWhite ? kPlayerBlack : kPlayerWhite; +} + +inline Colour OtherColour(Colour c) { + return c == Colour::kWhite ? Colour::kBlack : Colour::kWhite; +} + +inline Colour PlayerToColour(Player p) { + SPIEL_DCHECK_TRUE(p != kInvalidPlayer); + return p == kPlayerWhite ? Colour::kWhite : Colour::kBlack; +} + +inline Direction OppositeDirection(uint8_t in) { + SPIEL_DCHECK_TRUE(in < Direction::kNumCardinalDirections); + return static_cast((in + 3) % Direction::kNumCardinalDirections); +} + +inline Direction ClockwiseDirection(uint8_t in) { + SPIEL_DCHECK_TRUE(in < Direction::kNumCardinalDirections); + return static_cast((in + 1) % Direction::kNumCardinalDirections); +} + +inline Direction CounterClockwiseDirection(uint8_t in) { + SPIEL_DCHECK_TRUE(in < Direction::kNumCardinalDirections); + return static_cast((in + 5) % Direction::kNumCardinalDirections); +} + +// Wrapper class that uses an enum to represent each unique physical tile. +// This would be similar to using a uint8_t with bit fields to encode +// colour/type/ordinal, but instead with the convenient features of a class +class HiveTile { + public: + // the Value enum is a ubiquitous list of physical tiles found in the game + // using their corresponding UHP names + enum Value : uint8_t { + // white tiles + wQ = 0, + wA1, + wA2, + wA3, + wG1, + wG2, + wG3, + wS1, + wS2, + wB1, + wB2, + wM, + wL, + wP, + // black tiles + bQ, + bA1, + bA2, + bA3, + bG1, + bG2, + bG3, + bS1, + bS2, + bB1, + bB2, + bM, + bL, + bP, + // constants + kNumTiles, + kNoneTile = kNumTiles + }; + + constexpr HiveTile() : tile_name_(kNoneTile) {} + constexpr HiveTile(Value val) : tile_name_(val) {} + constexpr HiveTile(uint8_t val) : tile_name_(static_cast(val)) {} + + // evaluates to the Value enum when used in expressions + constexpr operator Value() const { return tile_name_; } + + constexpr bool HasValue() const { return tile_name_ < kNoneTile; } + + static constexpr std::array GetTilesForColour(Colour c) { + switch (c) { + case Colour::kWhite: + return {wQ, wA1, wA2, wA3, wG1, wG2, wG3, + wS1, wS2, wB1, wB2, wM, wL, wP}; + case Colour::kBlack: + return {bQ, bA1, bA2, bA3, bG1, bG2, bG3, + bS1, bS2, bB1, bB2, bM, bL, bP}; + } + } + + static constexpr Value GetTileFrom(Colour c, BugType type, + uint8_t ordinal = 1) { + uint8_t retval = c == Colour::kWhite ? wQ : bQ; + + // sort of like reverse-iterating through an enum to determine its index + switch (type) { + case BugType::kPillbug: + retval += kBugCounts[static_cast(BugType::kLadybug)]; + ABSL_FALLTHROUGH_INTENDED; + case BugType::kLadybug: + retval += kBugCounts[static_cast(BugType::kMosquito)]; + ABSL_FALLTHROUGH_INTENDED; + case BugType::kMosquito: + retval += kBugCounts[static_cast(BugType::kBeetle)]; + ABSL_FALLTHROUGH_INTENDED; + case BugType::kBeetle: + retval += kBugCounts[static_cast(BugType::kSpider)]; + ABSL_FALLTHROUGH_INTENDED; + case BugType::kSpider: + retval += kBugCounts[static_cast(BugType::kGrasshopper)]; + ABSL_FALLTHROUGH_INTENDED; + case BugType::kGrasshopper: + retval += kBugCounts[static_cast(BugType::kAnt)]; + ABSL_FALLTHROUGH_INTENDED; + case BugType::kAnt: + retval += kBugCounts[static_cast(BugType::kQueen)]; + ABSL_FALLTHROUGH_INTENDED; + default: + // no-op + break; + } + + return static_cast(retval + ordinal - 1); + } + + static Value UHPToTile(const std::string& uhp) { + static absl::flat_hash_map string_mapping = { + {"wQ", wQ}, + {"wA1", wA1}, + {"wA2", wA2}, + {"wA3", wA3}, + {"wG1", wG1}, + {"wG2", wG2}, + {"wG3", wG3}, + {"wS1", wS1}, + {"wS2", wS2}, + {"wB1", wB1}, + {"wB2", wB2}, + {"wM", wM}, + {"wL", wL}, + {"wP", wP}, + // + {"bQ", bQ}, + {"bA1", bA1}, + {"bA2", bA2}, + {"bA3", bA3}, + {"bG1", bG1}, + {"bG2", bG2}, + {"bG3", bG3}, + {"bS1", bS1}, + {"bS2", bS2}, + {"bB1", bB1}, + {"bB2", bB2}, + {"bM", bM}, + {"bL", bL}, + {"bP", bP}}; + + auto it = string_mapping.find(uhp); + SPIEL_CHECK_TRUE(it != string_mapping.end()); + return it->second; + } + + static std::string TileToUHP(HiveTile tile) { + static absl::flat_hash_map enum_mapping = {{wQ, "wQ"}, + {wA1, "wA1"}, + {wA2, "wA2"}, + {wA3, "wA3"}, + {wG1, "wG1"}, + {wG2, "wG2"}, + {wG3, "wG3"}, + {wS1, "wS1"}, + {wS2, "wS2"}, + {wB1, "wB1"}, + {wB2, "wB2"}, + {wM, "wM"}, + {wL, "wL"}, + {wP, "wP"}, + // + {bQ, "bQ"}, + {bA1, "bA1"}, + {bA2, "bA2"}, + {bA3, "bA3"}, + {bG1, "bG1"}, + {bG2, "bG2"}, + {bG3, "bG3"}, + {bS1, "bS1"}, + {bS2, "bS2"}, + {bB1, "bB1"}, + {bB2, "bB2"}, + {bM, "bM"}, + {bL, "bL"}, + {bP, "bP"}}; + + auto it = enum_mapping.find(tile); + SPIEL_CHECK_TRUE(it != enum_mapping.end()); + return it->second; + } + + constexpr BugType GetBugType() const { + switch (tile_name_) { + case wQ: + case bQ: + return BugType::kQueen; + case wA1: + case wA2: + case wA3: + case bA1: + case bA2: + case bA3: + return BugType::kAnt; + case wG1: + case wG2: + case wG3: + case bG1: + case bG2: + case bG3: + return BugType::kGrasshopper; + case wS1: + case wS2: + case bS1: + case bS2: + return BugType::kSpider; + case wB1: + case wB2: + case bB1: + case bB2: + return BugType::kBeetle; + case wM: + case bM: + return BugType::kMosquito; + case wL: + case bL: + return BugType::kLadybug; + case wP: + case bP: + return BugType::kPillbug; + default: + return BugType::kNone; + } + } + + constexpr Colour GetColour() const { + switch (tile_name_) { + case wQ: + case wA1: + case wA2: + case wA3: + case wG1: + case wG2: + case wG3: + case wS1: + case wS2: + case wB1: + case wB2: + case wM: + case wL: + case wP: + return Colour::kWhite; + case bQ: + case bA1: + case bA2: + case bA3: + case bG1: + case bG2: + case bG3: + case bS1: + case bS2: + case bB1: + case bB2: + case bM: + case bL: + case bP: + return Colour::kBlack; + default: + SpielFatalError("GetColour() - invalid enum value"); + } + } + + constexpr uint8_t GetOrdinal() const { + switch (tile_name_) { + case kNoneTile: + return 0; + case wA2: + case wG2: + case wS2: + case wB2: + case bA2: + case bG2: + case bS2: + case bB2: + return 2; + case wA3: + case wG3: + case bA3: + case bG3: + return 3; + default: + return 1; + } + } + + std::string ToUHP() const; + + private: + Value tile_name_; +}; + +// The in-game representation of an Action +struct Move { + HiveTile from; // the tile that's being moved + HiveTile to; // the reference tile + Direction direction; // offset applied to the reference tile + + std::string ToUHP(); + bool IsPass() const { return !from.HasValue(); } +}; + +// HiveBoard +// +// One of the most apparent problems to solve for Hive is how to represent an +// infinitely-sized board in a fixed-sized manner? This is especially the case +// when also needing an accurate 2D representation of the board state for use +// as an ObservationTensor. +// +// While the game logic could be implemented with a wrap-around grid big enough +// to account for all tiles (a 29x29 grid for all expansion pieces), the +// resulting ObservationTensor would be: +// 1) massively large in size (compared to the typical size of a Hive game) +// 2) be extremely sparse, which could negatively affect learning, and +// 3) unsuitable for 2D convolution in AlphaZero with no way to account for +// hexagonal wrapping of the tensor (that I know of). And even if there +// was a potential solution, a vast majority of playthroughs would be +// unlikely to ever reach a state where wrapping is necessary +// +// With all of that in mind, I have chosen the following board design: +// - the board will be stored as a fixed-sized and flattened 2d array where +// each index contains an enum describing either the existance of a +// specific tile, or an empty space on the grid +// - each tile enum can be used to index into fixed-sized arrays that store +// information about that specific tile. e.g. tile_positions_[::wA2] stores +// the HivePosition of white's 2nd Ant tile +// - most of the game logic is computed using the Axial coordinate system +// (described above under HivePosition), then later translated to an index +// when needed for the grid. This helps with the maths and allows for +// quick computation of rotational and reflectional symmetry +// +// Example board state with radius == 2 to illustrate (X means empty): +// +// ___0____1____2____3____4__ +// X bQ X 0 | | | | bQ | | +// |____|____|____|____|____| +// X X bA1 X 1 | | | | bA1| | +// AxialToIndex() |____|____|____|____|____| +// X wQ wL X X -------------> 2 | | wQ | wL | | | +// |____|____|____|____|____| +// X wG1 X X 3 | | wG1| | | | +// |____|____|____|____|____| +// X X X 4 | | | | | | +// |____|____|____|____|____| +// +class HiveBoard { + public: + // Creates a regular hexagonal board with given radius from the center + HiveBoard(int board_radius, ExpansionInfo expansions); + + int Radius() const { return hex_radius_; } + int SquareDimensions() const { return Radius() * 2 + 1; } + + // Axial position (Q,R) is stored at the 2d-index: + // grid_[R + Radius()][Q + Radius()] + // which translates to the flattened index: + // grid_[Q + Radius() + ((R + Radius()) * SqDims)] + size_t AxialToIndex(HivePosition pos) const { + return pos.Q() + Radius() + ((pos.R() + Radius()) * SquareDimensions()); + } + + HiveTile GetTopTileAt(HivePosition pos) const; + HiveTile GetTileBelow(HivePosition pos) const; + const std::vector& GetPlayedTiles() const { return played_tiles_; } + std::vector NeighboursOf( + HivePosition pos, HivePosition to_ignore = kNullPosition) const; + HivePosition GetPositionOf(HiveTile tile) const { + return tile.HasValue() ? tile_positions_[tile] : kNullPosition; + } + + HivePosition LastMovedFrom() const { return last_moved_from_; } + HiveTile LastMovedTile() const { return last_moved_; } + + // returns false if the move was unsuccessful + bool MoveTile(Move move); + void Pass(); + + bool IsQueenSurrounded(Colour c) const; + bool IsGated(HivePosition pos, Direction d, + HivePosition to_ignore = kNullPosition) const; + bool IsConnected(HivePosition pos, HivePosition to_ignore) const; + bool IsCovered(HivePosition pos) const; + bool IsCovered(HiveTile tile) const; + bool IsOutOfBounds(HivePosition pos) const; + bool IsPinned(HivePosition pos) const; + bool IsPinned(HiveTile tile) const; + bool IsPlaceable(Colour c, HivePosition pos) const; + bool IsInPlay(HiveTile tile) const { + return tile.HasValue() && tile_positions_[tile] != kNullPosition; + } + bool IsInPlay(Colour c, BugType type, int ordinal = 1) const { + return IsInPlay(HiveTile::GetTileFrom(c, type, ordinal)); + } + + void GenerateAllMoves(std::vector* out, Colour to_move, + int move_number) const; + void GenerateMovesFor(std::vector* out, HiveTile tile, + BugType acting_type, Colour to_move) const; + + private: + // moves all tiles closer to the center relative to the distance of each axis + bool RecenterBoard(HivePosition new_pos); + + // creates moves where a player can place an unplayed-tile from hand + void GeneratePlacementMoves(std::vector* out, Colour to_move, + int move_number) const; + + // In order for a tile to slide in direction D, the following must hold true: + // 1) The tile must not be "pinned" (i.e. at an articulation point) + // 2) The tile must not be covered by another tile + // 3) The tile must be able to physically slide into the position without + // displacing other tiles. That is, when sliding in direction D, exactly + // one of the two adjacent positions (D-1) (D+1) must be empty to + // physically move in, and the other position must be occupied in order + // to remain attached to the hive at all times (One-Hive rule) + void GenerateValidSlides(absl::flat_hash_set* out, + HiveTile tile, HivePosition pos, int distance) const; + + // A climb consists of a slide on top the hive laterally, with an optional + // vertical movement, in any non-gated direction. This slide is less + // restrictive than a ground-level slide as you do not require neighbours + // to remain connected to the hive + void GenerateValidClimbs(absl::flat_hash_set* out, + HiveTile tile, HivePosition pos) const; + + void GenerateValidGrasshopperPositions(absl::flat_hash_set* out, + HiveTile tile, HivePosition pos) const; + void GenerateValidLadybugPositions(absl::flat_hash_set* out, + HiveTile tile, HivePosition pos) const; + void GenerateValidMosquitoPositions(std::vector* out, HiveTile tile, + HivePosition pos, Colour to_move) const; + void GenerateValidPillbugSpecials(std::vector* out, HiveTile tile, + HivePosition pos) const; + + // Articulation points in a connected graph are vertices where, when removed, + // separate the graph into multiple components that are no longer connected. + // Tiles at an articulation point are considered "pinned" (and thus, can't be + // moved) as it would split the hive in two and invalidate the "One-Hive" rule + // https://en.wikipedia.org/wiki/Biconnected_component + // https://cp-algorithms.com/graph/cutpoints.html + void UpdateArticulationPoints(); + void UpdateInfluence(Colour col); + + int hex_radius_; + ExpansionInfo expansions_; + + std::vector tile_grid_; + std::vector played_tiles_; + std::array tile_positions_; + + // there are max 6 tiles that can climb on the hive to cover a tile + std::array covered_tiles_; + absl::flat_hash_set articulation_points_; + + // contains the positions surrounding played tiles. Used for placement rules + std::array, 2> colour_influence_; + + HiveTile last_moved_; + HivePosition last_moved_from_; +}; + +} // namespace hive +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_HIVE_BOARD_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/hive/hive_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/hive/hive_test.cc new file mode 100644 index 0000000..38320c0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/hive/hive_test.cc @@ -0,0 +1,248 @@ +// Copyright 2025 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/hive/hive.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/games/hive/hive_board.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace hive { +namespace { + +namespace testing = open_spiel::testing; + +// Move generation tests for each bug type. Modified from the original set +// to account for invalid move test cases. +// https://github.com/edre/nokamute/blob/master/data/uhp_tests.txt +constexpr const char* queen_test_game = + "Base+MLP;InProgress;White[9];wS1;bS1 wS1-;wQ -wS1;bQ bS1-;wQ \\wS1;bQ " + "bS1/;wG1 /wQ;bG1 bQ\\;wB1 \\wQ;bB1 bQ/;wG2 wB1/;bG2 \\bB1;wA1 /wB1;bA1 " + "bB1\\;wQ wG2\\;bA2 bA1/"; +constexpr const char* queen_valid_moves = "wQ wG2-;wQ -bG2;wQ wG1/"; +constexpr const char* queen_invalid_moves = "wQ -bB1;wQ -bQ;wQ \\bS1"; + +constexpr const char* ant_test_game = + "Base+MLP;InProgress;White[13];wS1;bB1 wS1-;wQ -wS1;bQ bB1-;wB1 \\wQ;bG1 " + "bQ/;wB2 \\wB1;bG2 bG1/;wS2 \\wB2;bS1 bG2/;wA1 \\wS1;bB2 bS1/;wA2 " + "\\wS2;bG3 \\bB2;wA1 -bG1;bA1 \\bG3;wG1 wA2/;bS2 -bA1;wG2 wG1/;bA2 " + "-bS2;wA3 wG2-;bA3 bS2\\;wG3 wA3\\;bA3 wG3\\"; +constexpr const char* ant_valid_moves = + "wA1 -bG2;wA1 -bS1;wA1 /bG3;wA1 bS2\\;wA1 bA2\\;wA1 /bA2;wA1 bA3-;wA1 " + "bA3\\;wA1 /bA3;wA1 /wG3;wA1 wG2\\;wA1 wG1\\;wA1 wB2/;wA1 wB1/;wA1 " + "\\wS1;wA1 \\bB1"; +constexpr const char* ant_invalid_moves = "wA1 -bA2;wA1 wA3-"; + +constexpr const char* grasshopper_test_game = + "Base+MLP;InProgress;White[11];wG1;bG1 wG1-;wQ /wG1;bQ bG1-;wS1 wQ\\;bA1 " + "bQ-;wB1 /wS1;bA1 -wQ;wB1 wS1\\;bA2 bQ-;wB1 /wS1;bA2 wG1\\;wB1 wS1\\;bA3 " + "bQ-;wB1 /wS1;bS1 bQ\\;wB1 wS1;bS1 wB1\\;wB1 /wB1;bA3 -wB1"; +constexpr const char* grasshopper_valid_moves = + "wG1 /wQ;wG1 bA2\\;wG1 bQ-;wG1 \\wB1"; +constexpr const char* grasshopper_invalid_moves = "wG1 \\bG1;wG1 bA1/"; + +constexpr const char* spider_test_game = + "Base+MLP;InProgress;White[12];wG1;bA1 wG1-;wS1 \\wG1;bQ bA1-;wQ /wG1;bG1 " + "bQ\\;wG2 wQ\\;bB1 /bG1;wB1 /wG2;bG2 bG1\\;wG3 /wB1;bG2 -bB1;wB2 wG3\\;bA1 " + "bG1\\;wA1 wB2-;bA1 bB1\\;wA2 wA1/;bA1 bG1-;wS2 wA2-;bA1 bG1\\;wA3 " + "wS2\\;bA1 wA3-"; +constexpr const char* spider_valid_moves = "wS1 \\bQ;wS1 /bQ;wS1 wG1\\;wS1 /wQ"; +constexpr const char* spider_invalid_moves = "wS1 -bQ;wS1 wG1/"; + +constexpr const char* beetle_test_game = + "Base+MLP;InProgress;White[16];wB1;bB1 wB1-;wQ \\wB1;bQ bB1/;wG1 /wB1;bB2 " + "bB1\\;wA1 /wG1;bA1 bQ\\;wG2 -wA1;bQ \\bB1;wB2 /wG2;bA2 \\bA1;wG3 " + "wB2\\;bA2 \\wQ;wA2 wG3-;bB2 wB1\\;wS1 wA2\\;bA1 bB1\\;wS2 wS1-;bA1 " + "bB1-;wA3 wS2/;bA1 \\wA3;wL -wG1;bM bB1\\;wA1 wG2\\;bM bB2;wA1 wL\\;bB1 " + "bQ;wL bB1\\;bA1 -wG1"; +constexpr const char* beetle_valid_moves = "wB1 wQ;wB1 bB1;wB1 wG1;wB1 bM"; +constexpr const char* beetle_invalid_moves = "wB1 bQ;wB1 bB2;wB1 wL;wB1 /wQ"; + +constexpr const char* mosquito_test_game = + "Base+MLP;InProgress;White[13];wM;bG1 wM-;wS1 /wM;bQ bG1-;wQ /wS1;bB1 " + "bG1\\;wB1 /wQ;bB1 wM\\;wS2 /wB1;bA1 bQ-;wB2 wS2\\;bA1 bQ\\;wG1 wB2-;bA1 " + "bQ-;wG2 wG1/;bA1 bQ\\;wG3 wG2/;bA1 bQ-;wA1 wG3-;bA1 bQ/;wA2 wA1-;bA1 " + "bQ-;wA3 wA2\\;bA1 /wA3"; +constexpr const char* mosquito_valid_moves = + "wM bQ-;wM bB1\\;wM /wS2;wM \\bG1;wM bG1;wM bB1;wM wS1;wM \\wS1;wM bQ/;wM " + "-wQ"; +constexpr const char* mosquito_invalid_moves = "wM /wA2;wM \\bQ"; + +constexpr const char* ladybug_test_game = + "Base+MLP;InProgress;White[15];wL;bL wL/;wQ -wL;bQ bL/;wQ -bL;bA1 bQ/;wB1 " + "\\wQ;bA1 bQ-;wS1 \\wB1;bA1 bQ/;wB2 \\wS1;bA1 bQ-;wS2 wB2/;bA1 bQ/;wA1 " + "wS2-;bA1 bQ-;wG1 wA1/;bA1 bQ/;wG2 wG1-;bA1 bQ-;wA2 wG2\\;bA1 bQ/;wA3 " + "wA2-;bA1 bQ-;wG3 wA3/;bA1 \\wG3;wL bL\\;bQ \\bL"; +constexpr const char* ladybug_valid_moves = + "wL wB1/;wL /wB1;wL wS1-;wL \\bQ;wL bQ/;wL bQ-;wL /wQ;wL wQ\\"; +constexpr const char* ladybug_invalid_moves = "wL /wS1;wL bL-"; + +constexpr const char* pillbug_test_game = + "Base+MLP;InProgress;White[6];wP;bM wP-;wQ \\wP;bL bM\\;wA1 /wQ;bQ bL/;wA2 " + "-wQ;bA1 /bL;wA2 wP\\;bM wP/"; +constexpr const char* pillbug_valid_moves = "wQ -wA2;wQ -bQ;wA1 bM\\"; +constexpr const char* pillbug_invalid_moves = "bM wA1\\;wP -bQ;wQ bM/"; + +// game states to test basic functionality +constexpr const char* white_wins_game = + "Base;WhiteWins;Black[8];wS1;bS1 wS1-;wQ -wS1;bQ bS1/;wG1 -wQ;bG1 \\bQ;wG1 " + "bQ\\;bG2 bQ/;wA1 wQ\\;bA1 bG2/;wA1 bG2\\;bA1 \\bG2;wQ \\wS1;bA1 bG2/;wQ " + "/bG1"; +constexpr const char* white_wins_on_black_turn_game = + "Base;WhiteWins;White[7];wS1;bS1 wS1-;wQ -wS1;bQ bS1/;wG1 -wQ;bG1 \\bQ;wG1 " + "bQ\\;bG2 bQ/;wA1 wQ\\;bA1 bG2/;wA1 bG2\\;bA1 /bG1"; +constexpr const char* draw_game = + "Base;Draw;White[11];wS1;bS1 wS1-;wQ -wS1;bQ bS1/;wG1 -wQ;bG1 \\bQ;wG1 " + "bQ\\;bG2 bQ/;wA1 wQ\\;bA1 bG2/;wA1 bG2\\;bA1 \\bG2;wQ \\wS1;bG1 wA1/;wQ " + "-bQ;bA1 \\wQ;wB1 -wS1;bG3 bG1-;wB1 /bA1;bG3 -bG2"; +constexpr const char* force_pass_game = + "Base;InProgress;White[7];wA1;bS1 wA1-;wQ -wA1;bQ bS1/;wQ \\wA1;bA1 " + "bS1\\;wQ -wA1;bA2 bQ\\;wQ \\wA1;bA1 \\wQ;wG1 /wQ;bA2 /wG1"; + +// uncommonly encountered corner-cases +constexpr const char* beetle_gate_game = + "Base;InProgress;White[12];wB1;bS1 wB1-;wQ \\wB1;bQ bS1/;wB2 -wQ;bB1 " + "bQ\\;wS1 /wB2;bB1 bS1;wG1 /wS1;bQ \\bB1;wG2 wG1\\;bB2 bQ/;wG3 wG2\\;bB2 " + "\\bQ;wA1 wG3-;bB2 wQ;wA2 wA1-;bA1 bQ/;wS2 wA2-;bA1 bB1/;wA3 wS2/;bA1 wA3/"; +constexpr const char* beetle_gate_valid_moves = + "wB1 bB2;wB1 bB1;wB1 /bB1;wB1 wB2\\"; + +constexpr const char* ladybug_gate_game = + "Base+L;InProgress;White[14];wL;bG1 wL/;wQ -wL;bQ bG1/;wQ -bG1;bG2 bQ-;wB1 " + "\\wQ;bB1 bG2-;wS1 \\wB1;bB1 bG2;wS2 \\wS1;bG3 \\bQ;wG1 wS2/;bB2 bG3/;wB2 " + "wG1/;bB2 bG3;wA1 wB2-;bA1 bB1-;wA2 wA1-;bA1 bB1\\;wG2 wA2-;bA1 bB1-;wG3 " + "wG2\\;bA1 bB1\\;wA3 wG3\\;bA1 wA3\\"; +constexpr const char* ladybug_gate_valid_moves = + "wL -bB2;wL /bB2;wL /wB1;wL /wS1;wL bQ\\;wL bG1\\;wL /wQ"; + +constexpr const char* pillbug_gate_game = + "Base+P;InProgress;White[9];wP;bB1 wP-;wQ /wP;bQ bB1/;wQ wP\\;bQ \\bB1;wQ " + "/wP;bA1 bQ/;wQ wP\\;bA1 -bQ;wQ /wP;bB2 \\bQ;wQ wP\\;bB2 bQ;bA1 -wP;bB1 wQ"; +constexpr const char* pillbug_gate_valid_moves = "bA1 -bB2;bA1 /wP"; + +void BasicHiveTests() { + testing::LoadGameTest("hive"); + std::shared_ptr game_mlp = + open_spiel::LoadGame("hive"); + testing::NoChanceOutcomesTest(*game_mlp); + testing::RandomSimTest(*game_mlp, 5); + + // test all win conditions + auto state = DeserializeUHPGameAndState(white_wins_game).second; + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_FLOAT_EQ(state->PlayerReturn(kPlayerWhite), 1.0); + SPIEL_CHECK_FLOAT_EQ(state->PlayerReturn(kPlayerBlack), -1.0); + + state = DeserializeUHPGameAndState(white_wins_on_black_turn_game).second; + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_FLOAT_EQ(state->PlayerReturn(kPlayerWhite), 1.0); + SPIEL_CHECK_FLOAT_EQ(state->PlayerReturn(kPlayerBlack), -1.0); + + state = DeserializeUHPGameAndState(draw_game).second; + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_FLOAT_EQ(state->PlayerReturn(kPlayerWhite), 0.0); + SPIEL_CHECK_FLOAT_EQ(state->PlayerReturn(kPlayerBlack), 0.0); + + // pass action + state = DeserializeUHPGameAndState(force_pass_game).second; + SPIEL_CHECK_TRUE(state->LegalActions().size() == 1); + SPIEL_CHECK_TRUE(state->LegalActions().at(0) == + state->StringToAction("pass")); + + // test all expansion variations + testing::RandomSimTest(*LoadGame("hive(uses_mosquito=false)"), 1); + testing::RandomSimTest( + *LoadGame("hive(uses_mosquito=false,uses_ladybug=false)"), 1); + testing::RandomSimTest( + *LoadGame("hive(uses_mosquito=false,uses_pillbug=false)"), 1); + testing::RandomSimTest( + *LoadGame("hive(uses_ladybug=false,uses_pillbug=false)"), 1); + testing::RandomSimTest( + *LoadGame( + "hive(uses_mosquito=false,uses_ladybug=false,uses_pillbug=false)"), + 1); + + // test with maximum board size + testing::RandomSimTest( + *LoadGame(absl::StrFormat("hive(board_size=%d)", kMaxBoardRadius)), 1); + + // test prettyprint with ansi colours + testing::RandomSimTest(*LoadGame("hive(ansi_color_output=true)"), 1); +} + +void TestMoves(std::unique_ptr&& state, const char* valid_moves, + const char* invalid_moves) { + std::vector legal_action_mask = state->LegalActionsMask(); + std::vector valid_move_list = + absl::StrSplit(valid_moves, ';', absl::SkipEmpty()); + std::vector invalid_move_list = + absl::StrSplit(invalid_moves, ';', absl::SkipEmpty()); + + for (auto& move : valid_move_list) { + SPIEL_CHECK_TRUE(legal_action_mask[state->StringToAction(move)] == 1); + } + + for (auto& move : invalid_move_list) { + SPIEL_CHECK_TRUE(legal_action_mask[state->StringToAction(move)] == 0); + } +} + +void TestBugMoves() { + std::shared_ptr game = open_spiel::LoadGame("hive"); + + // Base Bugs + TestMoves(DeserializeUHPGameAndState(queen_test_game).second, + queen_valid_moves, queen_invalid_moves); + TestMoves(DeserializeUHPGameAndState(ant_test_game).second, ant_valid_moves, + ant_invalid_moves); + TestMoves(DeserializeUHPGameAndState(grasshopper_test_game).second, + grasshopper_valid_moves, grasshopper_invalid_moves); + TestMoves(DeserializeUHPGameAndState(spider_test_game).second, + spider_valid_moves, spider_invalid_moves); + TestMoves(DeserializeUHPGameAndState(beetle_test_game).second, + beetle_valid_moves, beetle_invalid_moves); + + // Expansion Bugs + TestMoves(DeserializeUHPGameAndState(mosquito_test_game).second, + mosquito_valid_moves, mosquito_invalid_moves); + TestMoves(DeserializeUHPGameAndState(ladybug_test_game).second, + ladybug_valid_moves, ladybug_invalid_moves); + TestMoves(DeserializeUHPGameAndState(pillbug_test_game).second, + pillbug_valid_moves, pillbug_invalid_moves); + + // Special Cases + TestMoves(DeserializeUHPGameAndState(beetle_gate_game).second, + beetle_gate_valid_moves, ""); + TestMoves(DeserializeUHPGameAndState(ladybug_gate_game).second, + ladybug_gate_valid_moves, ""); + TestMoves(DeserializeUHPGameAndState(pillbug_gate_game).second, + pillbug_gate_valid_moves, ""); +} + +} // namespace +} // namespace hive +} // namespace open_spiel + +int main(int argc, char** argv) { + // TODO: perft() + open_spiel::hive::BasicHiveTests(); + open_spiel::hive::TestBugMoves(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/kriegspiel/kriegspiel.cc b/scenarios/bargaining/open_spiel/open_spiel/games/kriegspiel/kriegspiel.cc new file mode 100644 index 0000000..03c980d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/kriegspiel/kriegspiel.cc @@ -0,0 +1,700 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/kriegspiel/kriegspiel.h" + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/games/chess/chess.h" +#include "open_spiel/games/chess/chess_board.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace kriegspiel { +namespace { + +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"kriegspiel", + /*long_name=*/"Kriegspiel", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"board_size", GameParameter(8)}, + {"fen", GameParameter(GameParameter::Type::kString, false)}, + {"threefold_repetition", GameParameter(true)}, + {"50_move_rule", GameParameter(true)}}}; + +std::shared_ptr Factory(const GameParameters ¶ms) { + return std::shared_ptr(new KriegspielGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory) + +chess::ObservationTable ComputePrivateInfoTable(const chess::ChessBoard &board, + chess::Color color) { + const int board_size = board.BoardSize(); + chess::ObservationTable observability_table{false}; + + for (int8_t y = 0; y < board_size; ++y) { + for (int8_t x = 0; x < board_size; ++x) { + chess::Square sq{x, y}; + if (board.IsFriendly({x, y}, color)) { + size_t index = chess::SquareToIndex(sq, board_size); + observability_table[index] = true; + } + } + } + return observability_table; +} + +bool ObserverHasString(IIGObservationType iig_obs_type) { + return iig_obs_type.public_info && + iig_obs_type.private_info == PrivateInfoType::kSinglePlayer && + !iig_obs_type.perfect_recall; +} +bool ObserverHasTensor(IIGObservationType iig_obs_type) { + return !iig_obs_type.perfect_recall; +} + +bool IsValid(chess::Square square) { return square.x >= 0 && square.y >= 0; } + +} // namespace + +class KriegspielObserver : public Observer { + public: + explicit KriegspielObserver(IIGObservationType iig_obs_type) + : Observer(/*has_string=*/ObserverHasString(iig_obs_type), + /*has_tensor=*/ObserverHasTensor(iig_obs_type)), + iig_obs_type_(iig_obs_type) {} + + void WriteTensor(const State &observed_state, int player, + Allocator *allocator) const override { + auto &state = + open_spiel::down_cast(observed_state); + auto &game = + open_spiel::down_cast(*state.GetGame()); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, game.NumPlayers()); + chess::Color color = chess::PlayerToColor(player); + + if (iig_obs_type_.perfect_recall) { + SpielFatalError( + "KriegspielObserver: tensor with perfect recall not implemented."); + } + + if (iig_obs_type_.public_info) { + WritePublicInfoTensor(state, "public", allocator); + } + if (iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer) { + std::string prefix = "private"; + WritePrivateInfoTensor(state, player, prefix, allocator); + } else if (iig_obs_type_.private_info == PrivateInfoType::kAllPlayers) { + for (int i = 0; i < chess::NumPlayers(); ++i) { + std::string prefix = chess::ColorToString(color); + WritePrivateInfoTensor(state, i, prefix, allocator); + } + } + } + + std::string StringFrom(const State &observed_state, + int player) const override { + auto &state = + open_spiel::down_cast(observed_state); + auto &game = + open_spiel::down_cast(*state.GetGame()); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, game.NumPlayers()); + + if (iig_obs_type_.perfect_recall) { + SpielFatalError( + "KriegspielObserver: string with perfect recall is unimplemented"); + } + + if (iig_obs_type_.public_info && + iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer) { + // No observation before the first move + if (state.MoveMsgHistory().empty()) { + return std::string(); + } + + // Write last umpire message + return state.last_umpire_msg_->ToString(); + } else { + SpielFatalError( + "KriegspielObserver: string with imperfect recall is implemented only" + " for the (default) observation type."); + } + } + + private: + void WritePieces(chess::Color color, chess::PieceType piece_type, + const chess::ChessBoard &board, + const chess::ObservationTable &observability_table, + const std::string &prefix, Allocator *allocator) const { + const std::string type_string = + color == chess::Color::kEmpty + ? "empty" + : chess::PieceTypeToString( + piece_type, + /*uppercase=*/color == chess::Color::kWhite); + const int board_size = board.BoardSize(); + + auto out = allocator->Get(prefix + "_" + type_string + "_pieces", + {board_size, board_size}); + for (int8_t y = 0; y < board_size; ++y) { + for (int8_t x = 0; x < board_size; ++x) { + const chess::Square square{x, y}; + const chess::Piece &piece_on_board = board.at(square); + const bool write_square = + piece_on_board.color == color && + piece_on_board.type == piece_type && + observability_table[chess::SquareToIndex(square, board_size)]; + out.at(x, y) = write_square ? 1.0f : 0.0f; + } + } + } + + void WriteUnknownSquares(const chess::ChessBoard &board, + chess::ObservationTable &observability_table, + const std::string &prefix, + Allocator *allocator) const { + const int board_size = board.BoardSize(); + auto out = + allocator->Get(prefix + "_unknown_squares", {board_size, board_size}); + for (int8_t y = 0; y < board_size; ++y) { + for (int8_t x = 0; x < board_size; ++x) { + const chess::Square square{x, y}; + const bool write_square = + observability_table[chess::SquareToIndex(square, board_size)]; + out.at(x, y) = write_square ? 0.0f : 1.0f; + } + } + } + + void WriteMove(const chess::Move &move, const chess::ChessBoard &board, + const std::string &prefix, Allocator *allocator) const { + const int board_size = board.BoardSize(); + auto from_out = allocator->Get(prefix + "_from", {board_size, board_size}); + if (IsValid(move.from)) from_out.at(move.from.x, move.from.y) = 1; + auto to_out = allocator->Get(prefix + "_to", {board_size, board_size}); + if (IsValid(move.to)) to_out.at(move.to.x, move.to.y) = 1; + // 5 is maximum because we can't promote to a pawn. + WriteScalar(static_cast(move.promotion_type), 0, 5, + prefix + "_promotion", allocator); + WriteScalar(static_cast(move.castle_dir), 0, 2, + prefix + "_castle_dir", allocator); + } + + void WriteUmpireMessage(const KriegspielUmpireMessage &msg, + const chess::ChessBoard &board, + const std::string &prefix, + Allocator *allocator) const { + int board_size = board.BoardSize(); + WriteBinary(msg.illegal, prefix + "_illegal", allocator); + WriteScalar(static_cast(msg.capture_type), 0, 2, + prefix + "_capture_type", allocator); + auto square_out = + allocator->Get(prefix + "_captured_square", {board_size, board_size}); + if (IsValid(msg.square)) square_out.at(msg.square.x, msg.square.y) = 1; + WriteScalar(static_cast(msg.check_types.first), 0, 5, + prefix + "_check_one", allocator); + WriteScalar(static_cast(msg.check_types.second), 0, 5, + prefix + "_check_two", allocator); + WriteScalar(static_cast(msg.to_move), 0, 2, prefix + "_to_move", + allocator); + WriteScalar(msg.pawn_tries, 0, 16, prefix + "_pawn_tries", allocator); + } + + void WriteScalar(int val, int min, int max, const std::string &field_name, + Allocator *allocator) const { + SPIEL_DCHECK_LT(min, max); + SPIEL_DCHECK_GE(val, min); + SPIEL_DCHECK_LE(val, max); + auto out = allocator->Get(field_name, {max - min + 1}); + out.at(val - min) = 1; + } + + // Adds a binary scalar. + void WriteBinary(bool val, const std::string &field_name, + Allocator *allocator) const { + WriteScalar(val ? 1 : 0, 0, 1, field_name, allocator); + } + + void WritePrivateInfoTensor(const KriegspielState &state, int player, + const std::string &prefix, + Allocator *allocator) const { + chess::Color color = chess::PlayerToColor(player); + chess::ObservationTable private_info_table = + ComputePrivateInfoTable(state.Board(), color); + + // Piece configuration. + for (const chess::PieceType &piece_type : chess::kPieceTypes) { + WritePieces(chess::Color::kWhite, piece_type, state.Board(), + private_info_table, prefix, allocator); + WritePieces(chess::Color::kBlack, piece_type, state.Board(), + private_info_table, prefix, allocator); + } + WritePieces(chess::Color::kEmpty, chess::PieceType::kEmpty, state.Board(), + private_info_table, prefix, allocator); + WriteUnknownSquares(state.Board(), private_info_table, prefix, allocator); + + // Castling rights. + WriteBinary( + state.Board().CastlingRight(color, chess::CastlingDirection::kLeft), + prefix + "_left_castling", allocator); + WriteBinary( + state.Board().CastlingRight(color, chess::CastlingDirection::kRight), + prefix + "_right_castling", allocator); + + // Write observer's last move + chess::Move last_move = {chess::kInvalidSquare, chess::kInvalidSquare, + chess::kEmptyPiece, + chess::PieceType::kEmpty, + chess::CastlingDirection::kNone}; + + for (auto move_msg = state.MoveMsgHistory().rbegin(); + move_msg != state.MoveMsgHistory().rend(); ++move_msg) { + if (move_msg->first.piece.color == color) { + last_move = move_msg->first; + break; + } + } + WriteMove(last_move, state.Board(), prefix + "_last_move", allocator); + } + + void WritePublicInfoTensor(const KriegspielState &state, + const std::string &prefix, + Allocator *allocator) const { + const auto entry = state.repetitions_.find(state.Board().HashValue()); + SPIEL_CHECK_FALSE(entry == state.repetitions_.end()); + int repetitions = entry->second; + + // Num repetitions for the current board. + WriteScalar(/*val=*/repetitions, /*min=*/1, /*max=*/3, + prefix + "_repetitions", allocator); + + // Side to play. + WriteScalar(/*val=*/ColorToPlayer(state.Board().ToPlay()), + /*min=*/0, /*max=*/1, prefix + "_side_to_play", allocator); + + // Irreversible move counter. + auto out = allocator->Get(prefix + "_irreversible_move_counter", {1}); + out.at(0) = state.Board().IrreversibleMoveCounter() / 100.f; + + // Write last umpire message + if (state.last_umpire_msg_) { + WriteUmpireMessage(*state.last_umpire_msg_, state.Board(), prefix, + allocator); + } else { + WriteUmpireMessage(KriegspielUmpireMessage(), state.Board(), prefix, + allocator); + } + } + + IIGObservationType iig_obs_type_; +}; + +std::string CaptureTypeToString(KriegspielCaptureType capture_type) { + if (capture_type == KriegspielCaptureType::kNoCapture) { + return "No Piece"; + } + if (capture_type == KriegspielCaptureType::kPawn) { + return "Pawn"; + } + return "Piece"; +} + +std::string CheckTypeToString(KriegspielCheckType check_type) { + switch (check_type) { + case KriegspielCheckType::kFile: + return "File"; + case KriegspielCheckType::kRank: + return "Rank"; + case KriegspielCheckType::kLongDiagonal: + return "Long-diagonal"; + case KriegspielCheckType::kShortDiagonal: + return "Short-diagonal"; + case KriegspielCheckType::kKnight: + return "Knight"; + default: + SpielFatalError("kNoCheck does not have a string representation"); + } +} + +std::pair GetCheckType( + const chess::ChessBoard &board) { + chess::Square king_sq = + board.find(chess::Piece{board.ToPlay(), chess::PieceType::kKing}); + + std::pair check_type_pair = { + KriegspielCheckType::kNoCheck, KriegspielCheckType::kNoCheck}; + + board.GeneratePseudoLegalMoves( + [&king_sq, &check_type_pair, &board](const chess::Move &move) { + if (move.to != king_sq) { + return true; + } + KriegspielCheckType check_type; + if (move.piece.type == chess::PieceType::kKnight) + check_type = KriegspielCheckType::kKnight; + else if (move.from.x == move.to.x) + check_type = KriegspielCheckType::kFile; + else if (move.from.y == move.to.y) + check_type = KriegspielCheckType::kRank; + else if (chess::IsLongDiagonal(move.from, move.to, board.BoardSize())) + check_type = KriegspielCheckType::kLongDiagonal; + else + check_type = KriegspielCheckType::kShortDiagonal; + + if (check_type_pair.first != KriegspielCheckType::kNoCheck) { + // There cannot be more than two checks at the same time + check_type_pair.second = check_type; + return false; + } else { + check_type_pair.first = check_type; + } + return true; + }, + board.ToPlay(), chess::PseudoLegalMoveSettings::kAcknowledgeEnemyPieces); + + return check_type_pair; +} + +std::string KriegspielUmpireMessage::ToString() const { + if (illegal) { + return "Illegal move."; + } + + std::string msg; + bool put_comma = false; + + if (capture_type != KriegspielCaptureType::kNoCapture) { + msg += CaptureTypeToString(capture_type) + " at " + + chess::SquareToString(square) + " captured"; + put_comma = true; + } + if (check_types.first != KriegspielCheckType::kNoCheck) { + if (put_comma) msg += ", "; + msg += CheckTypeToString(check_types.first) + " check"; + put_comma = true; + } + if (check_types.second != KriegspielCheckType::kNoCheck) { + if (put_comma) msg += ", "; + msg += CheckTypeToString(check_types.second) + " check"; + put_comma = true; + } + if (put_comma) msg += ", "; + + msg += chess::ColorToString(to_move) + "'s move"; + if (pawn_tries > 0) { + msg += ", "; + msg += pawn_tries == 1 ? "1 pawn try" + : std::to_string(pawn_tries) + " pawn tries"; + } + msg += "."; + return msg; +} + +KriegspielUmpireMessage GetUmpireMessage(const chess::ChessBoard &chess_board, + const chess::Move &move) { + KriegspielUmpireMessage msg{}; + if (!chess_board.IsMoveLegal(move)) { + // If the move is illegal, the player is notified about it and can play + // again + msg.illegal = true; + msg.to_move = chess_board.ToPlay(); + return msg; + } + msg.illegal = false; + + chess::PieceType capture_type = chess_board.at(move.to).type; + switch (capture_type) { + case chess::PieceType::kEmpty: + msg.capture_type = KriegspielCaptureType::kNoCapture; + msg.square = chess::kInvalidSquare; + break; + case chess::PieceType::kPawn: + msg.capture_type = KriegspielCaptureType::kPawn; + msg.square = move.to; + break; + default: + msg.capture_type = KriegspielCaptureType::kPiece; + msg.square = move.to; + } + + // todo optimze when undo is optimized + chess::ChessBoard board_copy = chess_board; + board_copy.ApplyMove(move); + + msg.check_types = GetCheckType(board_copy); + + int pawnTries = 0; + board_copy.GenerateLegalPawnCaptures( + [&pawnTries](const chess::Move &move) { + pawnTries++; + return true; + }, + board_copy.ToPlay()); + msg.pawn_tries = pawnTries; + msg.to_move = board_copy.ToPlay(); + + return msg; +} + +bool GeneratesUmpireMessage(const chess::ChessBoard &chess_board, + const chess::Move &move, + const KriegspielUmpireMessage &orig_msg) { + if (!chess_board.IsMoveLegal(move)) { + // If the move is illegal, the player is notified about it and can play + // again + return orig_msg.illegal; + } + + chess::PieceType capture_type = chess_board.at(move.to).type; + switch (capture_type) { + case chess::PieceType::kEmpty: + if (orig_msg.capture_type != KriegspielCaptureType::kNoCapture) { + return false; + } + break; + case chess::PieceType::kPawn: + if (orig_msg.capture_type != KriegspielCaptureType::kPawn) { + return false; + } + break; + default: + if (orig_msg.capture_type != KriegspielCaptureType::kPiece) { + return false; + } + } + + // todo optimize when undo is optimized + chess::ChessBoard board_copy = chess_board; + board_copy.ApplyMove(move); + + if (orig_msg.check_types != GetCheckType(board_copy)) { + return false; + } + + int pawnTries = 0; + board_copy.GenerateLegalPawnCaptures( + [&pawnTries](const chess::Move &move) { + pawnTries++; + return true; + }, + board_copy.ToPlay()); + if (orig_msg.pawn_tries != pawnTries) { + return false; + } + if (orig_msg.to_move != board_copy.ToPlay()) { + return false; + } + + return true; +} + +KriegspielState::KriegspielState(std::shared_ptr game, + int board_size, const std::string &fen, + bool threefold_repetition, bool rule_50_move) + : State(game), + start_board_(*chess::ChessBoard::BoardFromFEN(fen, board_size, false)), + current_board_(start_board_), + threefold_repetition_(threefold_repetition), + rule_50_move_(rule_50_move) { + SPIEL_CHECK_TRUE(¤t_board_); + repetitions_[current_board_.HashValue()] = 1; +} + +void KriegspielState::DoApplyAction(Action action) { + cached_legal_actions_.reset(); + + chess::Move move = ActionToMove(action, Board()); + + KriegspielUmpireMessage msg = GetUmpireMessage(Board(), move); + + move_msg_history_.emplace_back(move, msg); + last_umpire_msg_ = msg; + + if (msg.illegal) { + // If the move is illegal, the player is notified about it and can play + // again + illegal_tried_moves_.emplace_back(move); + cached_legal_actions_.reset(); + return; + } + + Board().ApplyMove(move); + illegal_tried_moves_.clear(); + ++repetitions_[current_board_.HashValue()]; +} + +void KriegspielState::MaybeGenerateLegalActions() const { + if (!cached_legal_actions_) { + cached_legal_actions_ = std::vector(); + Board().GeneratePseudoLegalMoves( + [this](const chess::Move &move) -> bool { + bool is_illegal_tried = false; + for (const chess::Move &illegal_tried_move : illegal_tried_moves_) { + if (illegal_tried_move == move) { + is_illegal_tried = true; + break; + } + } + if (!is_illegal_tried) { + cached_legal_actions_->push_back(MoveToAction(move, BoardSize())); + } + return true; + }, + Board().ToPlay(), chess::PseudoLegalMoveSettings::kBreachEnemyPieces); + absl::c_sort(*cached_legal_actions_); + } +} + +std::vector KriegspielState::LegalActions() const { + MaybeGenerateLegalActions(); + if (IsTerminal()) return {}; + return *cached_legal_actions_; +} + +std::string KriegspielState::ActionToString(Player player, + Action action) const { + chess::Move move = ActionToMove(action, Board()); + return move.ToLAN(); +} + +std::string KriegspielState::ToString() const { return Board().ToFEN(); } + +std::vector KriegspielState::Returns() const { + return MaybeFinalReturns().value_or(std::vector{0., 0.}); +} + +std::string KriegspielState::ObservationString(Player player) const { + const auto &game = open_spiel::down_cast(*game_); + return game.default_observer_->StringFrom(*this, player); +} + +void KriegspielState::ObservationTensor(Player player, + absl::Span values) const { + ContiguousAllocator allocator(values); + const auto &game = open_spiel::down_cast(*game_); + game.default_observer_->WriteTensor(*this, player, &allocator); +} + +std::unique_ptr KriegspielState::Clone() const { + return std::make_unique(*this); +} + +void KriegspielState::UndoAction(Player player, Action action) { + // TODO: Make this fast by storing undo info in another stack. + SPIEL_CHECK_GE(move_msg_history_.size(), 1); + --repetitions_[current_board_.HashValue()]; + move_msg_history_.pop_back(); + history_.pop_back(); + current_board_ = start_board_; + for (const std::pair &move_msg_pair : + move_msg_history_) { + current_board_.ApplyMove(move_msg_pair.first); + } +} + +bool KriegspielState::IsThreefoldRepetitionDraw() const { + const auto entry = repetitions_.find(Board().HashValue()); + SPIEL_CHECK_FALSE(entry == repetitions_.end()); + return entry->second >= 3; +} + +absl::optional> KriegspielState::MaybeFinalReturns() const { + if (!Board().HasSufficientMaterial()) { + return std::vector{kDrawUtility, kDrawUtility}; + } + + if (threefold_repetition_ && IsThreefoldRepetitionDraw()) { + return std::vector{kDrawUtility, kDrawUtility}; + } + + // Compute and cache the legal actions. + MaybeGenerateLegalActions(); + SPIEL_CHECK_TRUE(cached_legal_actions_); + const bool have_legal_moves = !cached_legal_actions_->empty(); + + // If we don't have legal moves we are stalemated or mated + if (!have_legal_moves) { + if (!Board().InCheck()) { + return std::vector{kDrawUtility, kDrawUtility}; + } else { + std::vector returns(NumPlayers()); + auto next_to_play = ColorToPlayer(Board().ToPlay()); + returns[next_to_play] = kLossUtility; + returns[chess::OtherPlayer(next_to_play)] = kWinUtility; + return returns; + } + } + + if (rule_50_move_ && Board().IrreversibleMoveCounter() >= 50) { + return std::vector{kDrawUtility, kDrawUtility}; + } + + return absl::nullopt; +} + +KriegspielGame::KriegspielGame(const GameParameters ¶ms) + : Game(kGameType, params), + board_size_(ParameterValue("board_size")), + fen_(ParameterValue("fen", chess::DefaultFen(board_size_))), + threefold_repetition_(ParameterValue("threefold_repetition")), + rule_50_move_(ParameterValue("50_move_rule")) { + default_observer_ = std::make_shared(kDefaultObsType); +} + +std::vector KriegspielGame::ObservationTensorShape() const { + if (observation_tensor_shape_.empty()) + observation_tensor_shape_ = + ObserverTensorShape(*NewInitialState(), *default_observer_); + return observation_tensor_shape_; +} + +std::shared_ptr KriegspielGame::MakeObserver( + absl::optional iig_obs_type, + const GameParameters ¶ms) const { + if (!params.empty()) SpielFatalError("Observation params not supported"); + IIGObservationType obs_type = iig_obs_type.value_or(kDefaultObsType); + if (ObserverHasString(obs_type) || ObserverHasTensor(obs_type)) { + return std::make_shared(obs_type); + } + return nullptr; +} + +} // namespace kriegspiel +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/kriegspiel/kriegspiel.h b/scenarios/bargaining/open_spiel/open_spiel/games/kriegspiel/kriegspiel.h new file mode 100644 index 0000000..d72ac82 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/kriegspiel/kriegspiel.h @@ -0,0 +1,248 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_KRIEGSPIEL_H_ +#define OPEN_SPIEL_GAMES_KRIEGSPIEL_H_ + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/games/chess/chess.h" +#include "open_spiel/games/chess/chess_board.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// Kriegspiel - imperfect information version of chess: +// https://en.wikipedia.org/wiki/Kriegspiel +// The implementation follows ICC rules (with a few exceptions): +// https://www.chessclub.com/help/Kriegspiel +// One of the exceptions is that ICC does not notify opponent about player's +// illegal move. Here he has to be notified about it because tests don't allow +// player to not recognise the difference between states with different move +// number. And Illegal attempt is considered a move. +// Other exceptions are 50-move rule and threefold repetition, which under ICC +// rules are not automatically enforced, but can be claimed by the player. This +// implementation does not support claiming or offering draws so these rules' +// automatic enforcement can be turned on and off +// +// Parameters: +// "board_size" int Number of squares in each row and column +// (default: 8) +// "fen" string String describing the chess board position +// in Forsyth-Edwards Notation. The FEN has to +// match the board size. Default values are +// available for board sizes 4 and 8. +// "threefold_repetition" bool Whether threefold repetition rule should be +// automatically enforced (default: true) +// "50_move_rule" bool Whether 50 move rule should be automatically +// enforced (default: true) + +namespace open_spiel { +namespace kriegspiel { + +// Constants. +inline constexpr int kNumPlayers = 2; +inline constexpr double kLossUtility = -1; +inline constexpr double kDrawUtility = 0; +inline constexpr double kWinUtility = 1; + +// See action encoding below. +inline constexpr int kNumDistinctActions = 4674; + +// This is max length of a FIDE chess game. Kriegspiel can be longer. It can +// last forever when the three fold repetition and 50-move rule are turned off. +// https://math.stackexchange.com/questions/194008/how-many-turns-can-a-chess-game-take-at-maximum +inline constexpr int kMaxGameLength = 17695; + +enum KriegspielCaptureType { kNoCapture = 0, kPawn = 1, kPiece = 2 }; + +std::string CaptureTypeToString(KriegspielCaptureType capture_type); + +enum KriegspielCheckType { + kNoCheck = 0, + kFile = 1, + kRank = 2, + kLongDiagonal = 3, + kShortDiagonal = 4, + kKnight = 5 +}; + +std::string CheckTypeToString(KriegspielCheckType check_type); + +std::pair GetCheckType( + const chess::ChessBoard& board); + +struct KriegspielUmpireMessage { + bool illegal = false; + KriegspielCaptureType capture_type = KriegspielCaptureType::kNoCapture; + chess::Square square = chess::kInvalidSquare; + // there can be max two checks at a time so a pair is enough + std::pair check_types = { + KriegspielCheckType::kNoCheck, KriegspielCheckType::kNoCheck}; + chess::Color to_move = chess::Color::kEmpty; + int pawn_tries = 0; + + std::string ToString() const; + + bool operator==(KriegspielUmpireMessage& other) const { + return illegal == other.illegal && capture_type == other.capture_type && + square == other.square && check_types == other.check_types && + to_move == other.to_move && pawn_tries == other.pawn_tries; + } +}; + +KriegspielUmpireMessage GetUmpireMessage(const chess::ChessBoard& chess_board, + const chess::Move& move); + +bool GeneratesUmpireMessage(const chess::ChessBoard& chess_board, + const chess::Move& move, + const KriegspielUmpireMessage& orig_msg); + +class KriegspielGame; +class KriegspielObserver; + +// State of an in-play game. +class KriegspielState : public State { + public: + // Constructs a chess state at the given position in Forsyth-Edwards Notation. + // https://en.wikipedia.org/wiki/Forsyth%E2%80%93Edwards_Notation + KriegspielState(std::shared_ptr game, int board_size, + const std::string& fen, bool threefold_repetition, + bool rule_50_move); + KriegspielState(const KriegspielState&) = default; + + KriegspielState& operator=(const KriegspielState&) = default; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : ColorToPlayer(Board().ToPlay()); + } + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + + bool IsTerminal() const override { return MaybeFinalReturns().has_value(); } + + std::vector Returns() const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action action) override; + + protected: + void DoApplyAction(Action action) override; + + private: + friend class KriegspielObserver; + + // Current board. + chess::ChessBoard& Board() { return current_board_; } + const chess::ChessBoard& Board() const { return current_board_; } + int BoardSize() const { return current_board_.BoardSize(); } + + // Starting board. + const chess::ChessBoard& StartBoard() const { return start_board_; } + + // History of moves and umpire messages. + const std::vector>& + MoveMsgHistory() const { + return move_msg_history_; + } + + // Draw can be claimed under the FIDE threefold repetition rule (the current + // board position has already appeared twice in the history). + bool IsThreefoldRepetitionDraw() const; + + // Calculates legal actions and caches them. This is separate from + // LegalActions() as there are a number of other methods that need the value + // of LegalActions. This is a separate method as it's called from + // IsTerminal(), which is also called by LegalActions(). + void MaybeGenerateLegalActions() const; + + absl::optional> MaybeFinalReturns() const; + + // We have to store every move made to check for repetitions and to implement + // undo. We store the current board position as an optimization. + std::vector> + move_msg_history_; + // We store this info as an optimisation so that we don't have to compute it + // from move_msg_history for observations + absl::optional last_umpire_msg_{}; + // Moves that the player tried and were illegal. We don't let player try them + // again on the same board because they are clearly still illegal; + std::vector illegal_tried_moves_; + // We store the start board for history to support games not starting + // from the start position. + chess::ChessBoard start_board_; + // We store the current board position as an optimization. + chess::ChessBoard current_board_; + + bool threefold_repetition_; + bool rule_50_move_; + + // RepetitionTable records how many times the given hash exists in the history + // stack (including the current board). + // We are already indexing by board hash, so there is no need to hash that + // hash again, so we use a custom passthrough hasher. + class PassthroughHash { + public: + std::size_t operator()(uint64_t x) const { + return static_cast(x); + } + }; + using RepetitionTable = absl::flat_hash_map; + RepetitionTable repetitions_; + mutable absl::optional> cached_legal_actions_; +}; + +// Game object. +class KriegspielGame : public Game { + public: + explicit KriegspielGame(const GameParameters& params); + int NumDistinctActions() const override { return kNumDistinctActions; } + std::unique_ptr NewInitialState() const override { + return absl::make_unique(shared_from_this(), board_size_, + fen_, threefold_repetition_, + rule_50_move_); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return kLossUtility; } + absl::optional UtilitySum() const override { return kDrawUtility; } + double MaxUtility() const override { return kWinUtility; } + std::vector ObservationTensorShape() const override; + int MaxGameLength() const override { return kMaxGameLength; } + std::shared_ptr MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const; + + std::shared_ptr default_observer_; + + private: + mutable std::vector observation_tensor_shape_; + const int board_size_; + const std::string fen_; + const bool threefold_repetition_; + const bool rule_50_move_; +}; + +} // namespace kriegspiel +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_KRIEGSPIEL_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/kriegspiel/kriegspiel_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/kriegspiel/kriegspiel_test.cc new file mode 100644 index 0000000..051a412 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/kriegspiel/kriegspiel_test.cc @@ -0,0 +1,40 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace kriegspiel { +namespace { + +namespace testing = open_spiel::testing; + +void BasicKriegspielTests(int board_size) { + GameParameters params; + params["board_size"] = GameParameter(board_size); + + testing::LoadGameTest("kriegspiel"); + testing::NoChanceOutcomesTest(*LoadGame("kriegspiel", params)); + testing::RandomSimTest(*LoadGame("kriegspiel", params), 20); +} + +} // namespace +} // namespace kriegspiel +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::kriegspiel::BasicKriegspielTests(/*board_size=*/4); + open_spiel::kriegspiel::BasicKriegspielTests(/*board_size=*/8); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/kuhn_poker/kuhn_poker.cc b/scenarios/bargaining/open_spiel/open_spiel/games/kuhn_poker/kuhn_poker.cc new file mode 100644 index 0000000..566268d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/kuhn_poker/kuhn_poker.cc @@ -0,0 +1,477 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/kuhn_poker/kuhn_poker.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/observer.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace kuhn_poker { +namespace { + +// Default parameters. +constexpr int kDefaultPlayers = 2; +constexpr double kAnte = 1; + +// Facts about the game +const GameType kGameType{/*short_name=*/"kuhn_poker", + /*long_name=*/"Kuhn Poker", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/10, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"players", GameParameter(kDefaultPlayers)}}, + /*default_loadable=*/true, + /*provides_factored_observation_string=*/true, + }; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new KuhnGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +open_spiel::RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace + +class KuhnObserver : public Observer { + public: + KuhnObserver(IIGObservationType iig_obs_type) + : Observer(/*has_string=*/true, /*has_tensor=*/true), + iig_obs_type_(iig_obs_type) {} + + void WriteTensor(const State& observed_state, int player, + Allocator* allocator) const override { + const KuhnState& state = + open_spiel::down_cast(observed_state); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, state.num_players_); + const int num_players = state.num_players_; + const int num_cards = num_players + 1; + + if (iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer) { + { // Observing player. + auto out = allocator->Get("player", {num_players}); + out.at(player) = 1; + } + { // The player's card, if one has been dealt. + auto out = allocator->Get("private_card", {num_cards}); + if (state.history_.size() > player) + out.at(state.history_[player].action) = 1; + } + } + + // Betting sequence. + if (iig_obs_type_.public_info) { + if (iig_obs_type_.perfect_recall) { + auto out = allocator->Get("betting", {2 * num_players - 1, 2}); + for (int i = num_players; i < state.history_.size(); ++i) { + out.at(i - num_players, state.history_[i].action) = 1; + } + } else { + auto out = allocator->Get("pot_contribution", {num_players}); + for (auto p = Player{0}; p < state.num_players_; p++) { + out.at(p) = state.ante_[p]; + } + } + } + } + + std::string StringFrom(const State& observed_state, + int player) const override { + const KuhnState& state = + open_spiel::down_cast(observed_state); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, state.num_players_); + std::string result; + + // Private card + if (iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer) { + if (iig_obs_type_.perfect_recall || iig_obs_type_.public_info) { + if (state.history_.size() > player) { + absl::StrAppend(&result, state.history_[player].action); + } + } else { + if (state.history_.size() == 1 + player) { + absl::StrAppend(&result, "Received card ", + state.history_[player].action); + } + } + } + + // Betting. + // TODO(author11) Make this more self-consistent. + if (iig_obs_type_.public_info) { + if (iig_obs_type_.perfect_recall) { + // Perfect recall public info. + for (int i = state.num_players_; i < state.history_.size(); ++i) + result.push_back(state.history_[i].action ? 'b' : 'p'); + } else { + // Imperfect recall public info - two different formats. + if (iig_obs_type_.private_info == PrivateInfoType::kNone) { + if (state.history_.empty()) { + absl::StrAppend(&result, "start game"); + } else if (state.history_.size() > state.num_players_) { + absl::StrAppend(&result, + state.history_.back().action ? "Bet" : "Pass"); + } + } else { + if (state.history_.size() > player) { + for (auto p = Player{0}; p < state.num_players_; p++) { + absl::StrAppend(&result, state.ante_[p]); + } + } + } + } + } + + // Fact that we're dealing a card. + if (iig_obs_type_.public_info && + iig_obs_type_.private_info == PrivateInfoType::kNone && + !state.history_.empty() && + state.history_.size() <= state.num_players_) { + int currently_dealing_to_player = state.history_.size() - 1; + absl::StrAppend(&result, "Deal to player ", currently_dealing_to_player); + } + return result; + } + + private: + IIGObservationType iig_obs_type_; +}; + +KuhnState::KuhnState(std::shared_ptr game) + : State(game), + first_bettor_(kInvalidPlayer), + card_dealt_(game->NumPlayers() + 1, kInvalidPlayer), + winner_(kInvalidPlayer), + pot_(kAnte * game->NumPlayers()), + // How much each player has contributed to the pot, indexed by pid. + ante_(game->NumPlayers(), kAnte) {} + +int KuhnState::CurrentPlayer() const { + if (IsTerminal()) { + return kTerminalPlayerId; + } else { + return (history_.size() < num_players_) ? kChancePlayerId + : history_.size() % num_players_; + } +} + +void KuhnState::DoApplyAction(Action move) { + // Additional book-keeping + if (history_.size() < num_players_) { + // Give card `move` to player `history_.size()` (CurrentPlayer will return + // kChancePlayerId, so we use that instead). + card_dealt_[move] = history_.size(); + } else if (move == ActionType::kBet) { + if (first_bettor_ == kInvalidPlayer) first_bettor_ = CurrentPlayer(); + pot_ += 1; + ante_[CurrentPlayer()] += kAnte; + } + + // We undo that before exiting the method. + // This is used in `DidBet`. + history_.push_back({CurrentPlayer(), move}); + + // Check for the game being over. + const int num_actions = history_.size() - num_players_; + if (first_bettor_ == kInvalidPlayer && num_actions == num_players_) { + // Nobody bet; the winner is the person with the highest card dealt, + // which is either the highest or the next-highest card. + // Losers lose 1, winner wins 1 * (num_players - 1) + winner_ = card_dealt_[num_players_]; + if (winner_ == kInvalidPlayer) winner_ = card_dealt_[num_players_ - 1]; + } else if (first_bettor_ != kInvalidPlayer && + num_actions == num_players_ + first_bettor_) { + // There was betting; so the winner is the person with the highest card + // who stayed in the hand. + // Check players in turn starting with the highest card. + for (int card = num_players_; card >= 0; --card) { + const Player player = card_dealt_[card]; + if (player != kInvalidPlayer && DidBet(player)) { + winner_ = player; + break; + } + } + SPIEL_CHECK_NE(winner_, kInvalidPlayer); + } + history_.pop_back(); +} + +std::vector KuhnState::LegalActions() const { + if (IsTerminal()) return {}; + if (IsChanceNode()) { + std::vector actions; + for (int card = 0; card < card_dealt_.size(); ++card) { + if (card_dealt_[card] == kInvalidPlayer) actions.push_back(card); + } + return actions; + } else { + return {ActionType::kPass, ActionType::kBet}; + } +} + +std::string KuhnState::ActionToString(Player player, Action move) const { + if (player == kChancePlayerId) + return absl::StrCat("Deal:", move); + else if (move == ActionType::kPass) + return "Pass"; + else + return "Bet"; +} + +std::string KuhnState::ToString() const { + // The deal: space separated card per player + std::string str; + for (int i = 0; i < history_.size() && i < num_players_; ++i) { + if (!str.empty()) str.push_back(' '); + absl::StrAppend(&str, history_[i].action); + } + + // The betting history: p for Pass, b for Bet + if (history_.size() > num_players_) str.push_back(' '); + for (int i = num_players_; i < history_.size(); ++i) { + str.push_back(history_[i].action ? 'b' : 'p'); + } + + return str; +} + +bool KuhnState::IsTerminal() const { return winner_ != kInvalidPlayer; } + +std::vector KuhnState::Returns() const { + if (!IsTerminal()) { + return std::vector(num_players_, 0.0); + } + + std::vector returns(num_players_); + for (auto player = Player{0}; player < num_players_; ++player) { + const int bet = DidBet(player) ? 2 : 1; + returns[player] = (player == winner_) ? (pot_ - bet) : -bet; + } + return returns; +} + +std::string KuhnState::InformationStateString(Player player) const { + const KuhnGame& game = open_spiel::down_cast(*game_); + return game.info_state_observer_->StringFrom(*this, player); +} + +std::string KuhnState::ObservationString(Player player) const { + const KuhnGame& game = open_spiel::down_cast(*game_); + return game.default_observer_->StringFrom(*this, player); +} + +void KuhnState::InformationStateTensor(Player player, + absl::Span values) const { + ContiguousAllocator allocator(values); + const KuhnGame& game = open_spiel::down_cast(*game_); + game.info_state_observer_->WriteTensor(*this, player, &allocator); +} + +void KuhnState::ObservationTensor(Player player, + absl::Span values) const { + ContiguousAllocator allocator(values); + const KuhnGame& game = open_spiel::down_cast(*game_); + game.default_observer_->WriteTensor(*this, player, &allocator); +} + +std::unique_ptr KuhnState::Clone() const { + return std::unique_ptr(new KuhnState(*this)); +} + +void KuhnState::UndoAction(Player player, Action move) { + if (history_.size() <= num_players_) { + // Undoing a deal move. + card_dealt_[move] = kInvalidPlayer; + } else { + // Undoing a bet / pass. + if (move == ActionType::kBet) { + pot_ -= 1; + if (player == first_bettor_) first_bettor_ = kInvalidPlayer; + } + winner_ = kInvalidPlayer; + } + history_.pop_back(); + --move_number_; +} + +std::vector> KuhnState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(IsChanceNode()); + std::vector> outcomes; + const double p = 1.0 / (num_players_ + 1 - history_.size()); + for (int card = 0; card < card_dealt_.size(); ++card) { + if (card_dealt_[card] == kInvalidPlayer) outcomes.push_back({card, p}); + } + return outcomes; +} + +bool KuhnState::DidBet(Player player) const { + if (first_bettor_ == kInvalidPlayer) { + return false; + } else if (player == first_bettor_) { + return true; + } else if (player > first_bettor_) { + return history_[num_players_ + player].action == ActionType::kBet; + } else { + return history_[num_players_ * 2 + player].action == ActionType::kBet; + } +} + +std::unique_ptr KuhnState::ResampleFromInfostate( + int player_id, std::function rng) const { + std::unique_ptr state = game_->NewInitialState(); + Action player_chance = history_.at(player_id).action; + for (int p = 0; p < game_->NumPlayers(); ++p) { + if (p == history_.size()) return state; + if (p == player_id) { + state->ApplyAction(player_chance); + } else { + Action other_chance = player_chance; + while (other_chance == player_chance) { + other_chance = SampleAction(state->ChanceOutcomes(), rng()).first; + } + state->ApplyAction(other_chance); + } + } + SPIEL_CHECK_GE(state->CurrentPlayer(), 0); + if (game_->NumPlayers() == history_.size()) return state; + for (int i = game_->NumPlayers(); i < history_.size(); ++i) { + state->ApplyAction(history_.at(i).action); + } + return state; +} + +KuhnGame::KuhnGame(const GameParameters& params) + : Game(kGameType, params), num_players_(ParameterValue("players")) { + SPIEL_CHECK_GE(num_players_, kGameType.min_num_players); + SPIEL_CHECK_LE(num_players_, kGameType.max_num_players); + default_observer_ = std::make_shared(kDefaultObsType); + info_state_observer_ = std::make_shared(kInfoStateObsType); + private_observer_ = std::make_shared( + IIGObservationType{/*public_info*/false, + /*perfect_recall*/false, + /*private_info*/PrivateInfoType::kSinglePlayer}); + public_observer_ = std::make_shared( + IIGObservationType{/*public_info*/true, + /*perfect_recall*/false, + /*private_info*/PrivateInfoType::kNone}); +} + +std::unique_ptr KuhnGame::NewInitialState() const { + return std::unique_ptr(new KuhnState(shared_from_this())); +} + +std::vector KuhnGame::InformationStateTensorShape() const { + // One-hot for whose turn it is. + // One-hot encoding for the single private card. (n+1 cards = n+1 bits) + // Followed by 2 (n - 1 + n) bits for betting sequence (longest sequence: + // everyone except one player can pass and then everyone can bet/pass). + // n + n + 1 + 2 (n-1 + n) = 6n - 1. + return {6 * num_players_ - 1}; +} + +std::vector KuhnGame::ObservationTensorShape() const { + // One-hot for whose turn it is. + // One-hot encoding for the single private card. (n+1 cards = n+1 bits) + // Followed by the contribution of each player to the pot (n). + // n + n + 1 + n = 3n + 1. + return {3 * num_players_ + 1}; +} + +double KuhnGame::MaxUtility() const { + // In poker, the utility is defined as the money a player has at the end + // of the game minus then money the player had before starting the game. + // Everyone puts a chip in at the start, and then they each have one more + // chip. Most that a player can gain is (#opponents)*2. + return (num_players_ - 1) * 2; +} + +double KuhnGame::MinUtility() const { + // In poker, the utility is defined as the money a player has at the end + // of the game minus then money the player had before starting the game. + // In Kuhn, the most any one player can lose is the single chip they paid + // to play and the single chip they paid to raise/call. + return -2; +} + +std::shared_ptr KuhnGame::MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const { + if (params.empty()) { + return std::make_shared( + iig_obs_type.value_or(kDefaultObsType)); + } else { + return MakeRegisteredObserver(iig_obs_type, params); + } +} + +TabularPolicy GetAlwaysPassPolicy(const Game& game) { + SPIEL_CHECK_TRUE( + dynamic_cast(const_cast(&game)) != nullptr); + return GetPrefActionPolicy(game, {ActionType::kPass}); +} + +TabularPolicy GetAlwaysBetPolicy(const Game& game) { + SPIEL_CHECK_TRUE( + dynamic_cast(const_cast(&game)) != nullptr); + return GetPrefActionPolicy(game, {ActionType::kBet}); +} + +TabularPolicy GetOptimalPolicy(double alpha) { + SPIEL_CHECK_GE(alpha, 0.); + SPIEL_CHECK_LE(alpha, 1. / 3); + const double three_alpha = 3 * alpha; + std::unordered_map policy; + + // All infostates have two actions: Pass (0) and Bet (1). + // Player 0 + policy["0"] = {{0, 1 - alpha}, {1, alpha}}; + policy["0pb"] = {{0, 1}, {1, 0}}; + policy["1"] = {{0, 1}, {1, 0}}; + policy["1pb"] = {{0, 2. / 3. - alpha}, {1, 1. / 3. + alpha}}; + policy["2"] = {{0, 1 - three_alpha}, {1, three_alpha}}; + policy["2pb"] = {{0, 0}, {1, 1}}; + + // Player 1 + policy["0p"] = {{0, 2. / 3.}, {1, 1. / 3.}}; + policy["0b"] = {{0, 1}, {1, 0}}; + policy["1p"] = {{0, 1}, {1, 0}}; + policy["1b"] = {{0, 2. / 3.}, {1, 1. / 3.}}; + policy["2p"] = {{0, 0}, {1, 1}}; + policy["2b"] = {{0, 0}, {1, 1}}; + return TabularPolicy(policy); +} + +} // namespace kuhn_poker +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/kuhn_poker/kuhn_poker.h b/scenarios/bargaining/open_spiel/open_spiel/games/kuhn_poker/kuhn_poker.h new file mode 100644 index 0000000..7843efc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/kuhn_poker/kuhn_poker.h @@ -0,0 +1,142 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_KUHN_POKER_H_ +#define OPEN_SPIEL_GAMES_KUHN_POKER_H_ + +#include +#include +#include +#include + +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// A simple game that includes chance and imperfect information +// http://en.wikipedia.org/wiki/Kuhn_poker +// +// For more information on this game (e.g. equilibrium sets, etc.) see +// http://poker.cs.ualberta.ca/publications/AAAI05.pdf +// +// The multiplayer (n>2) version is the one described in +// http://mlanctot.info/files/papers/aamas14sfrd-cfr-kuhn.pdf +// +// Parameters: +// "players" int number of players (default = 2) + +namespace open_spiel { +namespace kuhn_poker { + +inline constexpr const int kNumInfoStatesP0 = 6; +inline constexpr const int kNumInfoStatesP1 = 6; + +enum ActionType { kPass = 0, kBet = 1 }; + +class KuhnGame; +class KuhnObserver; + +class KuhnState : public State { + public: + explicit KuhnState(std::shared_ptr game); + KuhnState(const KuhnState&) = default; + + Player CurrentPlayer() const override; + + std::string ActionToString(Player player, Action move) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action move) override; + std::vector> ChanceOutcomes() const override; + std::vector LegalActions() const override; + std::vector hand() const { return {card_dealt_[CurrentPlayer()]}; } + std::unique_ptr ResampleFromInfostate( + int player_id, std::function rng) const override; + + const std::vector& CardDealt() const { return card_dealt_; } + + protected: + void DoApplyAction(Action move) override; + + private: + friend class KuhnObserver; + + // Whether the specified player made a bet + bool DidBet(Player player) const; + + // The move history and number of players are sufficient information to + // specify the state of the game. We keep track of more information to make + // extracting legal actions and utilities easier. + // The cost of the additional book-keeping is more complex ApplyAction() and + // UndoAction() functions. + int first_bettor_; // the player (if any) who was first to bet + std::vector card_dealt_; // the player (if any) who has each card + int winner_; // winning player, or kInvalidPlayer if the + // game isn't over yet. + int pot_; // the size of the pot + // How much each player has contributed to the pot, indexed by pid. + std::vector ante_; +}; + +class KuhnGame : public Game { + public: + explicit KuhnGame(const GameParameters& params); + int NumDistinctActions() const override { return 2; } + std::unique_ptr NewInitialState() const override; + int MaxChanceOutcomes() const override { return num_players_ + 1; } + int NumPlayers() const override { return num_players_; } + double MinUtility() const override; + double MaxUtility() const override; + absl::optional UtilitySum() const override { return 0; } + std::vector InformationStateTensorShape() const override; + std::vector ObservationTensorShape() const override; + int MaxGameLength() const override { return num_players_ * 2 - 1; } + int MaxChanceNodesInHistory() const override { return num_players_; } + std::shared_ptr MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const override; + + // Used to implement the old observation API. + std::shared_ptr default_observer_; + std::shared_ptr info_state_observer_; + std::shared_ptr public_observer_; + std::shared_ptr private_observer_; + + private: + // Number of players. + int num_players_; +}; + +// Returns policy that always passes. +TabularPolicy GetAlwaysPassPolicy(const Game& game); + +// Returns policy that always bets. +TabularPolicy GetAlwaysBetPolicy(const Game& game); + +// The optimal Kuhn policy as stated at https://en.wikipedia.org/wiki/Kuhn_poker +// The Nash equilibrium is parametrized by alpha \in [0, 1/3]. +TabularPolicy GetOptimalPolicy(double alpha); + +} // namespace kuhn_poker +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_KUHN_POKER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/kuhn_poker/kuhn_poker_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/kuhn_poker/kuhn_poker_test.cc new file mode 100644 index 0000000..7a36659 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/kuhn_poker/kuhn_poker_test.cc @@ -0,0 +1,81 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/kuhn_poker/kuhn_poker.h" + +#include "open_spiel/algorithms/get_all_states.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace kuhn_poker { +namespace { + +namespace testing = open_spiel::testing; + +void BasicKuhnTests() { + testing::LoadGameTest("kuhn_poker"); + testing::ChanceOutcomesTest(*LoadGame("kuhn_poker")); + testing::RandomSimTest(*LoadGame("kuhn_poker"), 100); + testing::RandomSimTestWithUndo(*LoadGame("kuhn_poker"), 1); + for (Player players = 3; players <= 5; players++) { + testing::RandomSimTest( + *LoadGame("kuhn_poker", {{"players", GameParameter(players)}}), 100); + } + auto observer = LoadGame("kuhn_poker") + ->MakeObserver(kDefaultObsType, + GameParametersFromString("single_tensor")); + testing::RandomSimTestCustomObserver(*LoadGame("kuhn_poker"), observer); +} + +void CountStates() { + std::shared_ptr game = LoadGame("kuhn_poker"); + auto states = algorithms::GetAllStates(*game, /*depth_limit=*/-1, + /*include_terminals=*/true, + /*include_chance_states=*/false); + // 6 deals * 9 betting sequences (-, p, b, pp, pb, bp, bb, pbp, pbb) = 54 + SPIEL_CHECK_EQ(states.size(), 54); +} + +void PolicyTest() { + using PolicyGenerator = std::function; + std::vector policy_generators = { + GetAlwaysPassPolicy, + GetAlwaysBetPolicy, + }; + + std::shared_ptr game = LoadGame("kuhn_poker"); + for (const auto& policy_generator : policy_generators) { + testing::TestEveryInfostateInPolicy(policy_generator, *game); + testing::TestPoliciesCanPlay(policy_generator, *game); + } +} + +} // namespace +} // namespace kuhn_poker +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::kuhn_poker::BasicKuhnTests(); + open_spiel::kuhn_poker::CountStates(); + open_spiel::kuhn_poker::PolicyTest(); + open_spiel::testing::CheckChanceOutcomes(*open_spiel::LoadGame( + "kuhn_poker", {{"players", open_spiel::GameParameter(3)}})); + open_spiel::testing::RandomSimTest(*open_spiel::LoadGame("kuhn_poker"), + /*num_sims=*/10); + open_spiel::testing::ResampleInfostateTest( + *open_spiel::LoadGame("kuhn_poker"), + /*num_sims=*/10); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/laser_tag/laser_tag.cc b/scenarios/bargaining/open_spiel/open_spiel/games/laser_tag/laser_tag.cc new file mode 100644 index 0000000..357854f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/laser_tag/laser_tag.cc @@ -0,0 +1,744 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/laser_tag/laser_tag.h" + +#include +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace laser_tag { +namespace { + +// Default parameters. +constexpr int kDefaultHorizon = 1000; +constexpr bool kDefaultZeroSum = false; +constexpr bool kDefaultFullyObs = true; +constexpr int kDefaultObsFront = 17; +constexpr int kDefaultObsBack = 2; +constexpr int kDefaultObsSide = 10; + +// Register with general sum, since the game is not guaranteed to be zero sum. +// If we create a zero sum instance, the type on the created game will show it. +const GameType kGameTypeGeneralSum{ + /*short_name=*/"laser_tag", + /*long_name=*/"Laser Tag", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kPerfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"horizon", GameParameter(kDefaultHorizon)}, + {"zero_sum", GameParameter(kDefaultZeroSum)}, + {"grid", GameParameter(std::string(kDefaultGrid))}, + {"fully_obs", GameParameter(kDefaultFullyObs)}, + {"obs_front", GameParameter(kDefaultObsFront)}, + {"obs_back", GameParameter(kDefaultObsBack)}, + {"obs_side", GameParameter(kDefaultObsSide)}}}; + +GameType GameTypeForParams(const GameParameters& params) { + auto game_type = kGameTypeGeneralSum; + bool is_zero_sum = kDefaultZeroSum; + auto it = params.find("zero_sum"); + if (it != params.end()) is_zero_sum = it->second.bool_value(); + if (is_zero_sum) game_type.utility = GameType::Utility::kZeroSum; + + bool is_perfect_info = kDefaultFullyObs; + it = params.find("fully_obs"); + if (it != params.end()) is_perfect_info = it->second.bool_value(); + if (!is_perfect_info) { + game_type.information = GameType::Information::kImperfectInformation; + } + return game_type; +} + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new LaserTagGame(params)); +} + +Action ToAction(ChanceOutcome outcome) { + if (outcome == ChanceOutcome::kChanceInit0) { + return kChanceInit0Action; + } else if (outcome == ChanceOutcome::kChanceInit1) { + return kChanceInit1Action; + } else { + SpielFatalError("Unrecognized outcome"); + } +} + +REGISTER_SPIEL_GAME(kGameTypeGeneralSum, Factory); + +// Valid characters: AB*. +constexpr int kCellStates = 4; + +// Movement. +enum MovementType { + kLeftTurn = 0, + kRightTurn = 1, + kForwardMove = 2, + kBackwardMove = 3, + kStepLeft = 4, + kStepRight = 5, + kStand = 6, + kForwardLeft = 7, + kForwardRight = 8, + kFire = 9 +}; + +constexpr int kNumMovementActions = 10; + +// Orientation +enum Orientation { kNorth = 0, kSouth = 1, kEast = 2, kWest = 3 }; + +// mapping of start and end orientations for left and right turn +std::map leftMapping = {{0, 3}, {1, 2}, {2, 0}, {3, 1}}; +std::map rightMapping = {{0, 2}, {1, 3}, {2, 1}, {3, 0}}; + +// four directions: N,S,E,W +constexpr std::array, 4> row_offsets = { + {{0, 0, -1, 1, 0, 0, 0, -1, -1, 0}, + {0, 0, 1, -1, 0, 0, 0, 1, 1, 0}, + {0, 0, 0, 0, -1, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, -1, 0, 0, 0, 0}}}; +constexpr std::array, 4> col_offsets = { + {{0, 0, 0, 0, -1, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, -1, 0, 0, 0, 0}, + {0, 0, 1, -1, 0, 0, 0, 1, 1, 0}, + {0, 0, -1, 1, 0, 0, 0, -1, -1, 0}}}; +} // namespace + +LaserTagState::LaserTagState(std::shared_ptr game, const Grid& grid) + : SimMoveState(game), grid_(grid) { + GameParameters params = game_->GetParameters(); + auto it = params.find("fully_obs"); + if (it != params.end()) fully_obs_ = it->second.bool_value(); + it = params.find("obs_front"); + if (it != params.end()) obs_front_ = it->second.int_value(); + it = params.find("obs_back"); + if (it != params.end()) obs_back_ = it->second.int_value(); + it = params.find("obs_side"); + if (it != params.end()) obs_side_ = it->second.int_value(); +} + +std::string LaserTagState::ActionToString(int player, Action action_id) const { + if (player == kSimultaneousPlayerId) + return FlatJointActionToString(action_id); + SPIEL_CHECK_GE(action_id, 0); + + std::string result = ""; + if (player == kChancePlayerId) { + SPIEL_CHECK_LT(action_id, game_->MaxChanceOutcomes()); + + // Chance moves. + if (action_id == kChanceInit0Action) { + result = "(A's action first)"; + } else if (action_id == kChanceInit1Action) { + result = "(B's action first)"; + } else { + return absl::StrCat("(spawned at location #", + action_id - kNumInitiativeChanceOutcomes, ")"); + } + } else { + SPIEL_CHECK_LT(action_id, game_->NumDistinctActions()); + + // Regular move actions. + if (action_id == kLeftTurn) { + result = "left turn"; + } else if (action_id == kRightTurn) { + result = "right turn"; + } else if (action_id == kForwardMove) { + result = "move forward"; + } else if (action_id == kBackwardMove) { + result = "move backward"; + } else if (action_id == kStepLeft) { + result = "step left"; + } else if (action_id == kStepRight) { + result = "step right"; + } else if (action_id == kStand) { + result = "stand"; + } else if (action_id == kForwardLeft) { + result = "step forward and left turn"; + } else if (action_id == kForwardRight) { + result = "step forward and right turn"; + } else if (action_id == kFire) { + result = "fire"; + } + } + return result; +} + +void LaserTagState::SetField(int r, int c, char v) { + field_[r * grid_.num_cols + c] = v; + + if (v == 'A') { + player_row_[0] = r; + player_col_[0] = c; + } else if (v == 'B') { + player_row_[1] = r; + player_col_[1] = c; + } +} + +char LaserTagState::field(int r, int c) const { + return field_[r * grid_.num_cols + c]; +} + +void LaserTagState::Reset(int horizon, bool zero_sum) { + num_tags_ = 0; + horizon_ = horizon; + zero_sum_rewards_ = zero_sum; + field_.resize(grid_.num_rows * grid_.num_cols, '.'); + + for (auto i : grid_.obstacles) { + SetField(i.first, i.second, '*'); + } + + cur_player_ = kChancePlayerId; + total_moves_ = 0; + needs_respawn_ = {0, 1}; + rewards_ = {0, 0}; + returns_ = {0, 0}; + player_facing_ = {{kSouth, kSouth}}; +} + +void LaserTagState::DoApplyActions(const std::vector& moves) { + SPIEL_CHECK_EQ(moves.size(), 2); + SPIEL_CHECK_EQ(cur_player_, kSimultaneousPlayerId); + moves_[0] = moves[0]; + moves_[1] = moves[1]; + cur_player_ = kChancePlayerId; +} + +bool LaserTagState::InBounds(int r, int c) const { + return (r >= 0 && c >= 0 && r < grid_.num_rows && c < grid_.num_cols); +} + +bool LaserTagState::ResolveMove(int player, int move) { + int old_row = player_row_[player]; + int old_col = player_col_[player]; + + int current_orientation = player_facing_[player]; + + // move depends on player's current orientation + int new_row = old_row + row_offsets[current_orientation][move]; + int new_col = old_col + col_offsets[current_orientation][move]; + + if (!InBounds(new_row, new_col)) { // move is out of bounds so do nothing + return false; + } + + char from_piece = field(old_row, old_col); + + if (move == kLeftTurn) { // turn left + player_facing_[player] = leftMapping.find(current_orientation)->second; + return false; + } else if (move == kRightTurn) { // turn right + player_facing_[player] = rightMapping.find(current_orientation)->second; + return false; + } else if (move == kForwardMove || move == kBackwardMove || + move == kStepLeft || move == kStepRight || move == kForwardLeft || + move == kForwardRight) { // move left or right or forward or + // backward if able + + if (field(new_row, new_col) == '.') { + SetField(old_row, old_col, '.'); + SetField(new_row, new_col, from_piece); + + // move and also turn + if (move == kForwardLeft) { + player_facing_[player] = leftMapping.find(current_orientation)->second; + } else if (move == kForwardRight) { + player_facing_[player] = rightMapping.find(current_orientation)->second; + } + } + + return false; + } else if (move == kFire) { // fire! + int cur_row = old_row; + int cur_col = new_col; + int tagger = kInvalidPlayer; + int got_tagged = kInvalidPlayer; + + // laser goes in direction agent is facing + if (current_orientation == kNorth) { + cur_row--; + } else if (current_orientation == kSouth) { + cur_row++; + } else if (current_orientation == kEast) { + cur_col++; + } else if (current_orientation == kWest) { + cur_col--; + } + + while (InBounds(cur_row, + cur_col)) { // shoot and track laser while it is in bounds + char fired_upon = field(cur_row, cur_col); + + if (fired_upon == 'A') { // A was hit! + tagger = 1; + got_tagged = 0; + break; + } else if (fired_upon == 'B') { // B was hit! + tagger = 0; + got_tagged = 1; + break; + } else if (fired_upon == '*') { // obstacle was hit so do nothing + return false; + } + + // laser goes in direction agent is facing + if (current_orientation == kNorth) { + cur_row--; + } else if (current_orientation == kSouth) { + cur_row++; + } else if (current_orientation == kEast) { + cur_col++; + } else if (current_orientation == kWest) { + cur_col--; + } + } + + // If there was a tag, set the rewards appropriately. + if (tagger != kInvalidPlayer) { + num_tags_++; + needs_respawn_ = {got_tagged}; + SetField(player_row_[got_tagged], player_col_[got_tagged], '.'); + player_row_[got_tagged] = -1; + player_col_[got_tagged] = -1; + } + + if (tagger == 0 && zero_sum_rewards_) { + rewards_[0] += 1; + rewards_[1] -= 1; + } else if (tagger == 0 && !zero_sum_rewards_) { + rewards_[0] += 1; + } else if (tagger == 1 && zero_sum_rewards_) { + rewards_[0] -= 1; + rewards_[1] += 1; + } else if (tagger == 1 && !zero_sum_rewards_) { + rewards_[1] += 1; + } + + return tagger != kInvalidPlayer; + } + + return false; +} + +void LaserTagState::DoApplyAction(Action action_id) { + if (IsSimultaneousNode()) { + ApplyFlatJointAction(action_id); + return; + } + SPIEL_CHECK_TRUE(IsChanceNode()); + SPIEL_CHECK_GE(action_id, 0); + SPIEL_CHECK_LT(action_id, game_->MaxChanceOutcomes()); + + if (action_id == kChanceInit0Action) { + rewards_ = {0, 0}; + bool tagged = ResolveMove(0, moves_[0]); + if (!tagged) { + ResolveMove(1, moves_[1]); + } + returns_[0] += rewards_[0]; + returns_[1] += rewards_[1]; + total_moves_++; + } else if (action_id == kChanceInit1Action) { + rewards_ = {0, 0}; + bool tagged = ResolveMove(1, moves_[1]); + if (!tagged) { + ResolveMove(0, moves_[0]); + } + returns_[0] += rewards_[0]; + returns_[1] += rewards_[1]; + total_moves_++; + } else { + char spawning_player_char = ' '; + int spawn_loc = action_id - kNumInitiativeChanceOutcomes; + SPIEL_CHECK_GE(spawn_loc, 0); + SPIEL_CHECK_LT(spawn_loc, grid_.spawn_points.size()); + + // spawn locations and move resolve order + if (!needs_respawn_.empty()) { + int spawning_player = needs_respawn_.back(); + spawning_player_char = spawning_player == 0 ? 'A' : 'B'; + } + + SPIEL_CHECK_NE(spawning_player_char, ' '); + SPIEL_CHECK_EQ( + field(grid_.spawn_points[spawn_loc].first, + grid_.spawn_points[spawn_loc].second), '.'); + + SetField(grid_.spawn_points[spawn_loc].first, + grid_.spawn_points[spawn_loc].second, spawning_player_char); + needs_respawn_.pop_back(); + } + + if (needs_respawn_.empty()) { + cur_player_ = kSimultaneousPlayerId; + } else { + cur_player_ = kChancePlayerId; + } +} + +std::vector LaserTagState::LegalActions(int player) const { + if (IsTerminal()) return {}; + if (IsChanceNode()) { + if (!needs_respawn_.empty()) { + std::vector outcomes; + for (int i = 0; i < grid_.spawn_points.size(); ++i) { + if (field(grid_.spawn_points[i].first, + grid_.spawn_points[i].second) == '.') { + outcomes.push_back(kNumInitiativeChanceOutcomes + i); + } + } + return outcomes; + } else { + return {ToAction(ChanceOutcome::kChanceInit0), + ToAction(ChanceOutcome::kChanceInit1)}; + } + } else { + return {kLeftTurn, kRightTurn, kForwardMove, kBackwardMove, kStepLeft, + kStepRight, kStand, kForwardLeft, kForwardRight, kFire}; + } +} + +std::vector> LaserTagState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(IsChanceNode()); + if (!needs_respawn_.empty()) { + int num_legal_actions = 0; + for (int i = 0; i < grid_.spawn_points.size(); ++i) { + if (field(grid_.spawn_points[i].first, + grid_.spawn_points[i].second) == '.') { + num_legal_actions += 1; + } + } + std::vector> outcomes; + const double unif_prob = 1.0 / num_legal_actions; + for (int i = 0; i < grid_.spawn_points.size(); ++i) { + if (field(grid_.spawn_points[i].first, + grid_.spawn_points[i].second) == '.') { + outcomes.push_back({kNumInitiativeChanceOutcomes + i, unif_prob}); + } + } + return outcomes; + } else { + return {{ToAction(ChanceOutcome::kChanceInit0), 0.5}, + {ToAction(ChanceOutcome::kChanceInit1), 0.5}}; + } +} + +std::string LaserTagState::ToString() const { + std::string result = ""; + + for (int r = 0; r < grid_.num_rows; r++) { + for (int c = 0; c < grid_.num_cols; c++) { + result += field(r, c); + } + + absl::StrAppend(&result, "\n"); + } + + absl::StrAppend(&result, "Orientations: ", player_facing_[0], " ", + player_facing_[1], "\n"); + if (IsChanceNode()) absl::StrAppend(&result, "Chance Node"); + return result; +} + +std::string LaserTagState::ObservationString(int player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + if (fully_obs_) { + return ToString(); + } else { + return PartialObservationString(player); + } +} + +std::string LaserTagState::PartialObservationString(int player) const { + std::string result = ""; + + std::vector grid_position = {-1, -1}; + std::vector player_visible = {false, false}; + char value = ' '; + for (int r = 0; r < obs_front_ + obs_back_ + 1; r++) { + for (int c = 0; c < obs_side_ * 2 + 1; c++) { + grid_position = map_observation_to_grid(player, r, c); + + if (grid_position[0] < 0) { + // observed cell out-of-bounds of game grid + result += "*"; + } else { + value = field(grid_position[0], grid_position[1]); + result += value; + if (value == 'A') { + player_visible[0] = true; + } else if (value == 'B') { + player_visible[1] = true; + } + } + } + + absl::StrAppend(&result, "\n"); + } + + absl::StrAppend(&result, "Orientations:"); + for (int p = 0; p < num_players_; p++) { + if (player_visible[p]) { + absl::StrAppend(&result, " ", player_facing_[p]); + } else { + absl::StrAppend(&result, " -1"); + } + } + + absl::StrAppend(&result, "\n"); + + if (IsChanceNode()) absl::StrAppend(&result, "Chance Node"); + return result; +} + +bool LaserTagState::IsTerminal() const { + return ((horizon_ >= 0 && total_moves_ >= horizon_) || + (horizon_ < 0 && num_tags_ > 0)); +} + +std::vector LaserTagState::Rewards() const { return rewards_; } + +std::vector LaserTagState::Returns() const { return returns_; } + +int LaserTagState::observation_plane(int r, int c) const { + int plane = -1; + switch (field(r, c)) { + case 'A': + plane = 0; + break; + case 'B': + plane = 1; + break; + case '.': + plane = 2; + break; + case '*': + plane = 3; + break; + default: + std::cerr << "Invalid character on field: " << field(r, c) << std::endl; + plane = -1; + break; + } + + return plane; +} + +std::vector LaserTagState::map_observation_to_grid(int player, int r, + int c) const { + // Maps from observation tensor position to game grid position + // Returns [-1, -1] if the result if outside of game grid bounds + int grid_row = -1; + int grid_col = -1; + switch (player_facing_[player]) { + case kNorth: + grid_row = player_row_[player] + r - obs_front_; + grid_col = player_col_[player] + c - obs_side_; + break; + case kSouth: + grid_row = player_row_[player] + obs_front_ - r; + grid_col = player_col_[player] + obs_side_ - c; + break; + case kEast: + grid_row = player_row_[player] + c - obs_side_; + grid_col = player_col_[player] + obs_front_ - r; + break; + case kWest: + grid_row = player_row_[player] + obs_side_ - c; + grid_col = player_col_[player] + r - obs_front_; + break; + } + + if (0 <= grid_row && grid_row < grid_.num_rows && 0 <= grid_col && + grid_col < grid_.num_cols) { + return {grid_row, grid_col}; + } else { + // observed cell out-of-bounds of game grid + return {-1, -1}; + } +} + +void LaserTagState::ObservationTensor(int player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + if (fully_obs_) { + FullObservationTensor(values); + } else { + PartialObservationTensor(player, values); + } +} + +void LaserTagState::FullObservationTensor(absl::Span values) const { + TensorView<3> view(values, {kCellStates, grid_.num_rows, grid_.num_cols}, + true); + + for (int r = 0; r < grid_.num_rows; r++) { + for (int c = 0; c < grid_.num_cols; c++) { + int plane = observation_plane(r, c); + SPIEL_CHECK_TRUE(plane >= 0 && plane < kCellStates); + view[{plane, r, c}] = 1.0; + } + } +} + +void LaserTagState::PartialObservationTensor(int player, + absl::Span values) const { + // Get observation tensor for player with partial observability. + // + // Properties of the observation grid + // 1. Player is always located in center row obs_back_ rows from the bottom + // row. + // 2. If any cell of the players field of vision is outside the grid, then + // these cells are treated as obstacles. + int num_obs_rows = obs_front_ + obs_back_ + 1; + int num_obs_cols = obs_side_ * 2 + 1; + TensorView<3> view(values, {kCellStates, num_obs_rows, num_obs_cols}, true); + + std::vector grid_position = {-1, -1}; + int plane = -1; + for (int r = 0; r < num_obs_rows; r++) { + for (int c = 0; c < num_obs_cols; c++) { + grid_position = map_observation_to_grid(player, r, c); + + if (grid_position[0] < 0) { + // observed cell out-of-bounds of game grid + plane = 3; // '*' + } else { + plane = observation_plane(grid_position[0], grid_position[1]); + } + + SPIEL_CHECK_TRUE(plane >= 0 && plane < kCellStates); + view[{plane, r, c}] = 1.0; + } + } +} + +std::unique_ptr LaserTagState::Clone() const { + return std::unique_ptr(new LaserTagState(*this)); +} + +std::unique_ptr LaserTagGame::NewInitialState() const { + std::unique_ptr state( + new LaserTagState(shared_from_this(), grid_)); + state->Reset(horizon_, zero_sum_); + return state; +} + +int LaserTagGame::NumDistinctActions() const { return kNumMovementActions; } + +int LaserTagGame::MaxChanceOutcomes() const { + // First two for determining initiative, next n for spawn point locations, + // where n is equal to the number of spawn points. + return kNumInitiativeChanceOutcomes + grid_.spawn_points.size(); +} + +double LaserTagGame::MinUtility() const { + if (horizon_ < 0) { + return -1; + } else { + return -horizon_; + } +} + +double LaserTagGame::MaxUtility() const { + if (horizon_ < 0) { + return 1; + } else { + return horizon_; + } +} + +absl::optional LaserTagGame::UtilitySum() const { + if (zero_sum_) + return 0; + else + return absl::nullopt; +} + +std::vector LaserTagGame::ObservationTensorShape() const { + if (fully_obs_) { + return {kCellStates, grid_.num_rows, grid_.num_cols}; + } else { + return {kCellStates, obs_front_ + obs_back_ + 1, obs_side_ * 2 + 1}; + } +} + +namespace { +Grid ParseGrid(const std::string& grid_string) { + Grid grid{/*num_rows=*/0, /*num_cols=*/0}; + int row = 0; + int col = 0; + int count_empty_cells = 0; + for (auto c : grid_string) { + if (c == '\n') { + row += 1; + col = 0; + } else { + if (row >= grid.num_rows) grid.num_rows = row + 1; + if (col >= grid.num_cols) grid.num_cols = col + 1; + if (c == '*') { + grid.obstacles.emplace_back(row, col); + } else if (c == 'S') { + grid.spawn_points.emplace_back(row, col); + } else if (c == '.') { + ++count_empty_cells; + } else { + SpielFatalError(absl::StrCat("Invalid char '", std::string(1, c), + "' at grid (", row, ",", col, ")")); + } + col += 1; + } + } + // Must have at least one spawn point. + SPIEL_CHECK_GE(grid.spawn_points.size(), 0); + SPIEL_CHECK_EQ( + grid.num_rows * grid.num_cols, + count_empty_cells + grid.spawn_points.size() + grid.obstacles.size()); + return grid; +} +} // namespace + +LaserTagGame::LaserTagGame(const GameParameters& params) + : SimMoveGame(GameTypeForParams(params), params), + grid_(ParseGrid(ParameterValue("grid"))), + horizon_(ParameterValue("horizon")), + zero_sum_(ParameterValue("zero_sum")), + fully_obs_(ParameterValue("fully_obs")), + obs_front_(ParameterValue("obs_front")), + obs_back_(ParameterValue("obs_back")), + obs_side_(ParameterValue("obs_side")) {} + +} // namespace laser_tag +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/laser_tag/laser_tag.h b/scenarios/bargaining/open_spiel/open_spiel/games/laser_tag/laser_tag.h new file mode 100644 index 0000000..0e7c576 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/laser_tag/laser_tag.h @@ -0,0 +1,185 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_LASER_TAG_H_ +#define OPEN_SPIEL_GAMES_LASER_TAG_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/simultaneous_move_game.h" +#include "open_spiel/spiel.h" + +// A fully observable version of the first-person gridworld laser tag game from +// [1,2] with identical mechanics. This implementation includes support for +// both fully observable (the default) and first-person partially observable +// modes. The current grid is "small2" from [2]. +// +// When run in partially observable mode each agent has a local field-of-view, +// and sees (by default) 17 spaces in front, 10 to either side, and 2 spaces +// behind (per [2]). Each agent's observation is encoded as a 4x20x21 tensor, +// where each plane encodes the presence of player A, player B, empty, obstacle, +// respectively (same as fully observable tensor observations). The dimensions +// - front, side, and back - of the field of vision can be changed (see +// parameters below). +// +// [1] Leibo et al. Multi-agent Reinforcement Learning in Sequential Social +// Dilemmas. https://arxiv.org/abs/1702.03037 +// [2] Lanctot et al. A Unified Game-Theoretic Approach to Multiagent +// Reinforcement Learning", https://arxiv.org/abs/1711.00832 +// +// Parameters: +// "horizon" int Number of steps per episode. If this is < 0, then +// the episode ends after the first tag. +// (default = 1000). +// "zero_sum" bool If set, rewards are +1 for a tag and -1 for being +// tagged. Otherwise, there there is only positive +// reward of +1 per tag. (default = false). +// "grid" string String representation of grid. +// Empty spaces are '.', obstacles are '*', spawn +// points are 'S' (there must be four of these). +// "fully_obs" bool If set, the environment is full observable. +// Otherwise, the environment is partially +// observable (default = true) +// "obs_front" int Number of squares each agent sees in front of +// themself (only used if fully_obs=false) +// (default=17) +// "obs_back" int Number of squares each agent sees behind themself +// (only used if fully_obs=false) (default=2) +// "obs_side" int Number of squares each agent sees to either side +// of themself (only used if fully_obs=false) +// (default=10) + +namespace open_spiel { +namespace laser_tag { + +inline constexpr char kDefaultGrid[] = + "S.....S\n" + ".......\n" + "..*.*..\n" + ".**.**.\n" + "..*.*..\n" + ".......\n" + "S.....S"; + +struct Grid { + int num_rows; + int num_cols; + std::vector> obstacles; + std::vector> spawn_points; +}; + +// Number of chance outcomes reserved for "initiative" (learning which player's +// action gets resolved first). +inline constexpr int kNumInitiativeChanceOutcomes = 2; + +// Reserved chance outcomes for initiative. The ones following these are to +// determine spawn point locations. +inline constexpr Action kChanceInit0Action = 0; +inline constexpr Action kChanceInit1Action = 1; +enum class ChanceOutcome { kChanceInit0, kChanceInit1 }; + +class LaserTagState : public SimMoveState { + public: + explicit LaserTagState(std::shared_ptr game, const Grid& grid); + LaserTagState(const LaserTagState&) = default; + + std::string ActionToString(int player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Rewards() const override; + std::vector Returns() const override; + std::string ObservationString(int player) const override; + void ObservationTensor(int player, absl::Span values) const override; + int CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : cur_player_; + } + std::unique_ptr Clone() const override; + + ActionsAndProbs ChanceOutcomes() const override; + + void Reset(int horizon, bool zero_sum); + std::vector LegalActions(int player) const override; + + protected: + void DoApplyAction(Action action_id) override; + void DoApplyActions(const std::vector& moves) override; + + private: + void SetField(int r, int c, char v); + char field(int r, int c) const; + bool ResolveMove(int player, int move); // Return true if there was a tag + bool InBounds(int r, int c) const; + int observation_plane(int r, int c) const; + std::vector map_observation_to_grid(int player, int r, int c) const; + std::string PartialObservationString(int player) const; + void FullObservationTensor(absl::Span values) const; + void PartialObservationTensor(int player, absl::Span values) const; + + const Grid& grid_; + + // Fields set to bad values. Use Game::NewInitialState(). + int num_tags_ = 0; + int cur_player_ = -1; // Could be chance's turn. + int total_moves_ = -1; + int horizon_ = -1; + bool zero_sum_rewards_ = false; + bool fully_obs_ = false; + int obs_front_ = -1; + int obs_back_ = -1; + int obs_side_ = -1; + std::vector needs_respawn_ = {0, 1}; + std::array player_row_ = {{-1, -1}}; // Players' rows. + std::array player_col_ = {{-1, -1}}; // Players' cols. + std::array player_facing_ = {{1, 1}}; // Player facing direction. + std::vector rewards_ = {0, 0}; + std::vector returns_ = {0, 0}; + int ball_row_ = -1; + int ball_col_ = -1; + std::array moves_ = {{-1, -1}}; // Moves taken. + std::vector field_; +}; + +class LaserTagGame : public SimMoveGame { + public: + explicit LaserTagGame(const GameParameters& params); + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override; + int MaxChanceOutcomes() const override; + int NumPlayers() const override { return 2; } + double MinUtility() const override; + double MaxUtility() const override; + absl::optional UtilitySum() const override; + std::vector ObservationTensorShape() const override; + int MaxGameLength() const override { return horizon_; } + // TODO: verify whether this bound is tight and/or tighten it. + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + + private: + Grid grid_; + int horizon_; + bool zero_sum_; + bool fully_obs_; + int obs_front_; + int obs_back_; + int obs_side_; +}; + +} // namespace laser_tag +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_LASER_TAG_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/laser_tag/laser_tag_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/laser_tag/laser_tag_test.cc new file mode 100644 index 0000000..58498c9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/laser_tag/laser_tag_test.cc @@ -0,0 +1,467 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/laser_tag/laser_tag.h" + +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace laser_tag { +namespace { + +namespace testing = open_spiel::testing; + +// Spawn location values for the default map only. +constexpr int kTopLeftSpawnOutcome = kNumInitiativeChanceOutcomes; +constexpr int kTopRightSpawnOutcome = kNumInitiativeChanceOutcomes + 1; + +void BasicLaserTagTests() { + testing::LoadGameTest("laser_tag"); + testing::ChanceOutcomesTest(*LoadGame("laser_tag")); + testing::RandomSimTest(*LoadGame("laser_tag"), 100); +} + +void SimpleTagTests(int horizon, bool zero_sum, std::string grid) { + std::shared_ptr game = + LoadGame("laser_tag", {{"horizon", GameParameter(horizon)}, + {"zero_sum", GameParameter(zero_sum)}, + {"grid", GameParameter(grid)}}); + std::unique_ptr state = game->NewInitialState(); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(kTopRightSpawnOutcome); // Spawn B top-right + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(kTopLeftSpawnOutcome); // Spawn A top-left + + // Both facing south + SPIEL_CHECK_FALSE(state->IsChanceNode()); + state->ApplyActions({0, 1}); // A: Turn left, B: Turn right. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(kChanceInit0Action); // chance node: player 0 first + + SPIEL_CHECK_FALSE(state->IsChanceNode()); + state->ApplyActions({6, 1}); // A: Stand, B: Turn right. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(kChanceInit0Action); // chance node: player 0 first + + SPIEL_CHECK_FALSE(state->IsChanceNode()); + state->ApplyActions({6, 2}); // A: Stand, B: Move forward. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(kChanceInit0Action); // chance node: player 0 first + + SPIEL_CHECK_FALSE(state->IsChanceNode()); + state->ApplyActions({6, 0}); // A: Stand, B: Turn left. + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(kChanceInit0Action); // chance node: player 0 first + + SPIEL_CHECK_FALSE(state->IsChanceNode()); + state->ApplyActions({9, 9}); // stand-off! + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(kChanceInit1Action); // chance node: player 1 first + + std::cout << state->ToString() << std::endl; + + if (horizon == -1) { + // End of episode (since horizon = -1) + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReward(0), zero_sum ? -1 : 0); + SPIEL_CHECK_EQ(state->PlayerReward(1), 1); + SPIEL_CHECK_EQ(state->PlayerReturn(0), zero_sum ? -1 : 0); + SPIEL_CHECK_EQ(state->PlayerReturn(1), 1); + return; + } else { + SPIEL_CHECK_FALSE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReward(0), zero_sum ? -1 : 0); + SPIEL_CHECK_EQ(state->PlayerReward(1), 1); + SPIEL_CHECK_EQ(state->PlayerReturn(0), zero_sum ? -1 : 0); + SPIEL_CHECK_EQ(state->PlayerReturn(1), 1); + } + + std::cout << state->ToString() << std::endl; + + // horizon > 0, continue... do it again! + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(kTopLeftSpawnOutcome); // Spawn A at top-left again + SPIEL_CHECK_FALSE(state->IsChanceNode()); + state->ApplyActions({9, 9}); // stand-off! + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(kChanceInit0Action); // chance node: player 0 first + SPIEL_CHECK_FALSE(state->IsTerminal()); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(kTopRightSpawnOutcome); // Spawn B at top-right again + SPIEL_CHECK_FALSE(state->IsChanceNode()); + + // Immediate tag reward goes to player 0. + SPIEL_CHECK_EQ(state->PlayerReward(0), 1); + SPIEL_CHECK_EQ(state->PlayerReward(1), zero_sum ? -1 : 0); + + // Now they have a tag each. In a zero-sum game, their returns are both 0. + // Otherwise, they each have 1. + SPIEL_CHECK_EQ(state->PlayerReturn(0), zero_sum ? 0 : 1); + SPIEL_CHECK_EQ(state->PlayerReturn(1), zero_sum ? 0 : 1); +} + +void BasicLaserTagTestsBigGrid() { + constexpr const char big_grid[] = + ".....S................\n" + "S..***....*.....S**...\n" + "...*S..****...*......*\n" + ".......*S.**..*...****\n" + "..**...*......*......*\n" + "..S....*......**....**\n" + "**....***.....*S....**\n" + "S......*.....**......S\n" + "*...*........S**......\n" + "**..**....**........**\n" + "*....................S\n"; + testing::ChanceOutcomesTest( + *LoadGame("laser_tag", {{"grid", GameParameter(std::string(big_grid))}})); + testing::RandomSimTest( + *LoadGame("laser_tag", {{"grid", GameParameter(std::string(big_grid))}}), + 10); +} + +void BasicPartiallyObservableLaserTagTests() { + testing::ChanceOutcomesTest( + *LoadGame("laser_tag", {{"fully_obs", GameParameter(false)}})); + + testing::RandomSimTest( + *LoadGame("laser_tag", {{"fully_obs", GameParameter(false)}}), 100); +} + +std::vector get_obs_tensor_from_string(const std::string& obs_string, + int obs_grid_size) { + std::vector tensor(4 * obs_grid_size, 0.0); + + int num_newlines = 0; + for (int i = 0; i < obs_string.length(); i++) { + switch (obs_string[i]) { + case 'A': + tensor[i - num_newlines] = 1.0; + break; + case 'B': + tensor[obs_grid_size + i - num_newlines] = 1.0; + break; + case '.': + tensor[2 * obs_grid_size + i - num_newlines] = 1.0; + break; + case '*': + tensor[3 * obs_grid_size + i - num_newlines] = 1.0; + break; + case '\n': + num_newlines += 1; + break; + default: + // Reached 'O' in "Orientations" + SPIEL_CHECK_EQ(obs_string[i], 'O'); + return tensor; + } + } + return tensor; +} + +void PartiallyObservableLaserTagDefaultObsTests() { + float tolerence = 0.0001; + std::shared_ptr game = + LoadGame("laser_tag", {{"fully_obs", GameParameter(false)}, + {"obs_front", GameParameter(17)}, + {"obs_back", GameParameter(2)}, + {"obs_side", GameParameter(10)}, + {"grid", GameParameter(laser_tag::kDefaultGrid)}}); + std::unique_ptr state = game->NewInitialState(); + state->ApplyAction(kTopRightSpawnOutcome); // Spawn B top-right + state->ApplyAction(kTopLeftSpawnOutcome); // Spawn A top-left + + // A.....B + // ....... + // ..*.*.. + // .**.**. + // ..*.*.. + // ....... + // ....... + // + // Both A and B facing south + + int obs_grid_size = (17 + 2 + 1) * (2 * 10 + 1); + std::string expected_obs_string_A = + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "****.......**********\n" + "****.......**********\n" + "****..*.*..**********\n" + "****.**.**.**********\n" + "****..*.*..**********\n" + "****.......**********\n" + "****B.....A**********\n" + "*********************\n" + "*********************\n" + "Orientations: 1 1\n"; + std::vector expected_obs_tensor_A = + get_obs_tensor_from_string(expected_obs_string_A, obs_grid_size); + + std::string expected_obs_string_B = + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "**********.......****\n" + "**********.......****\n" + "**********..*.*..****\n" + "**********.**.**.****\n" + "**********..*.*..****\n" + "**********.......****\n" + "**********B.....A****\n" + "*********************\n" + "*********************\n" + "Orientations: 1 1\n"; + std::vector expected_obs_tensor_B = + get_obs_tensor_from_string(expected_obs_string_B, obs_grid_size); + + SPIEL_CHECK_EQ(expected_obs_string_A, state->ObservationString(0)); + SPIEL_CHECK_EQ(expected_obs_string_B, state->ObservationString(1)); + SPIEL_CHECK_TRUE( + AllNear(expected_obs_tensor_A, state->ObservationTensor(0), tolerence)); + SPIEL_CHECK_TRUE( + AllNear(expected_obs_tensor_B, state->ObservationTensor(1), tolerence)); + + state->ApplyActions({2, 2}); // A: Move forward, B: Move forward. + state->ApplyAction(kChanceInit0Action); // chance node: player 0 first + + state->ApplyActions({0, 1}); // A: Turn left, B: Turn right. + state->ApplyAction(kChanceInit0Action); // chance node: player 0 first + + state->ApplyActions({2, 2}); // A: Move forward, B: Move forward. + state->ApplyAction(kChanceInit0Action); // chance node: player 0 first + + state->ApplyActions({2, 2}); // A: Move forward, B: Move forward. + state->ApplyAction(kChanceInit0Action); // chance node: player 0 first + + // ....... + // ..A.B.. + // ..*.*.. + // .**.**. + // ..*.*.. + // ....... + // ....... + // + // A facing east, B facing west + + expected_obs_string_A = + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********.......*****\n" + "*********...*...*****\n" + "*********.B***..*****\n" + "*********.......*****\n" + "*********.A***..*****\n" + "*********...*...*****\n" + "*********.......*****\n" + "Orientations: 2 3\n"; + expected_obs_tensor_A = + get_obs_tensor_from_string(expected_obs_string_A, obs_grid_size); + + expected_obs_string_B = + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*********************\n" + "*****.......*********\n" + "*****...*...*********\n" + "*****..***A.*********\n" + "*****.......*********\n" + "*****..***B.*********\n" + "*****...*...*********\n" + "*****.......*********\n" + "Orientations: 2 3\n"; + expected_obs_tensor_B = + get_obs_tensor_from_string(expected_obs_string_B, obs_grid_size); + + SPIEL_CHECK_EQ(expected_obs_string_A, state->ObservationString(0)); + SPIEL_CHECK_EQ(expected_obs_string_B, state->ObservationString(1)); + SPIEL_CHECK_TRUE( + AllNear(expected_obs_tensor_A, state->ObservationTensor(0), tolerence)); + SPIEL_CHECK_TRUE( + AllNear(expected_obs_tensor_B, state->ObservationTensor(1), tolerence)); +} + +void PartiallyObservableLaserTagSmallObsTests() { + float tolerence = 0.0001; + std::shared_ptr game = + LoadGame("laser_tag", {{"fully_obs", GameParameter(false)}, + {"obs_front", GameParameter(2)}, + {"obs_back", GameParameter(1)}, + {"obs_side", GameParameter(1)}, + {"grid", GameParameter(laser_tag::kDefaultGrid)}}); + std::unique_ptr state = game->NewInitialState(); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(kTopRightSpawnOutcome); // Spawn B top-right + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(kTopLeftSpawnOutcome); // Spawn A top-left + + // A.....B + // ....... + // ..*.*.. + // .**.**. + // ..*.*.. + // ....... + // ....... + // + // Both A and B facing south + + std::string expected_obs_string_A = + "..*\n" + "..*\n" + ".A*\n" + "***\n" + "Orientations: 1 -1\n"; + std::vector expected_obs_tensor_A = { + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, // Plane 0: A + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Plane 1: B + 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, // Plane 2: . + 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1 // Plane 3: * + }; + + std::string expected_obs_string_B = + "*..\n" + "*..\n" + "*B.\n" + "***\n" + "Orientations: -1 1\n"; + std::vector expected_obs_tensor_B = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Plane 0: A + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, // Plane 1: B + 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, // Plane 2: . + 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1 // Plane 3: * + }; + + SPIEL_CHECK_EQ(expected_obs_string_A, state->ObservationString(0)); + SPIEL_CHECK_EQ(expected_obs_string_B, state->ObservationString(1)); + SPIEL_CHECK_TRUE( + AllNear(expected_obs_tensor_A, state->ObservationTensor(0), tolerence)); + SPIEL_CHECK_TRUE( + AllNear(expected_obs_tensor_B, state->ObservationTensor(1), tolerence)); + + state->ApplyActions({2, 2}); // A: Move forward, B: Move forward. + state->ApplyAction(kChanceInit0Action); // chance node: player 0 first + + state->ApplyActions({0, 1}); // A: Turn left, B: Turn right. + state->ApplyAction(kChanceInit0Action); // chance node: player 0 first + + state->ApplyActions({2, 2}); // A: Move forward, B: Move forward. + state->ApplyAction(kChanceInit0Action); // chance node: player 0 first + + state->ApplyActions({2, 2}); // A: Move forward, B: Move forward. + state->ApplyAction(kChanceInit0Action); // chance node: player 0 first + + // ....... + // ..A.B.. + // ..*.*.. + // .**.**. + // ..*.*.. + // ....... + // ....... + // + // A facing east, B facing west + + expected_obs_string_A = + ".B*\n" + "...\n" + ".A*\n" + "...\n" + "Orientations: 2 3\n"; + expected_obs_tensor_A = { + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, // Plane 0: A + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Plane 1: B + 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, // Plane 2: . + 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0 // Plane 3: * + }; + + expected_obs_string_B = + "*A.\n" + "...\n" + "*B.\n" + "...\n" + "Orientations: 2 3\n"; + expected_obs_tensor_B = { + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Plane 0: A + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, // Plane 1: B + 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, // Plane 2: . + 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 // Plane 3: * + }; + + SPIEL_CHECK_EQ(expected_obs_string_A, state->ObservationString(0)); + SPIEL_CHECK_EQ(expected_obs_string_B, state->ObservationString(1)); + SPIEL_CHECK_TRUE( + AllNear(expected_obs_tensor_A, state->ObservationTensor(0), tolerence)); + SPIEL_CHECK_TRUE( + AllNear(expected_obs_tensor_B, state->ObservationTensor(1), tolerence)); +} + +} // namespace +} // namespace laser_tag +} // namespace open_spiel + +namespace laser_tag = open_spiel::laser_tag; + +int main(int argc, char **argv) { + laser_tag::SimpleTagTests(-1, true, laser_tag::kDefaultGrid); + laser_tag::SimpleTagTests(-1, false, laser_tag::kDefaultGrid); + laser_tag::SimpleTagTests(1000, true, laser_tag::kDefaultGrid); + laser_tag::SimpleTagTests(1000, false, laser_tag::kDefaultGrid); + laser_tag::BasicLaserTagTests(); + laser_tag::BasicLaserTagTestsBigGrid(); + laser_tag::BasicPartiallyObservableLaserTagTests(); + laser_tag::PartiallyObservableLaserTagSmallObsTests(); + laser_tag::PartiallyObservableLaserTagDefaultObsTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/leduc_poker/leduc_poker.cc b/scenarios/bargaining/open_spiel/open_spiel/games/leduc_poker/leduc_poker.cc new file mode 100644 index 0000000..01e04e2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/leduc_poker/leduc_poker.cc @@ -0,0 +1,881 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/leduc_poker/leduc_poker.h" + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/memory/memory.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/observer.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace leduc_poker { +namespace { + +constexpr double kAnte = 1; + +const GameType kGameType{/*short_name=*/"leduc_poker", + /*long_name=*/"Leduc Poker", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/10, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"players", GameParameter(kDefaultPlayers)}, + {"action_mapping", GameParameter(false)}, + {"suit_isomorphism", GameParameter(false)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new LeducGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +std::string StatelessActionToString(Action action) { + if (action == ActionType::kFold) { + return "Fold"; + } else if (action == ActionType::kCall) { + return "Call"; + } else if (action == ActionType::kRaise) { + return "Raise"; + } else { + SpielFatalError(absl::StrCat("Unknown action: ", action)); + return "Will not return."; + } +} + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace + +// The Observer class is responsible for creating representations of the game +// state for use in learning algorithms. It handles both string and tensor +// representations, and any combination of public information and private +// information (none, observing player only, or all players). +// +// If a perfect recall observation is requested, it must be possible to deduce +// all previous observations for the same information type from the current +// observation. + +class LeducObserver : public Observer { + public: + LeducObserver(IIGObservationType iig_obs_type) + : Observer(/*has_string=*/true, /*has_tensor=*/true), + iig_obs_type_(iig_obs_type) {} + + // + // These helper methods each write a piece of the tensor observation. + // + + // Identity of the observing player. One-hot vector of size num_players. + static void WriteObservingPlayer(const LeducState& state, int player, + Allocator* allocator) { + auto out = allocator->Get("player", {state.num_players_}); + out.at(player) = 1; + } + + // Private card of the observing player. One-hot vector of size num_cards. + static void WriteSinglePlayerCard(const LeducState& state, int player, + Allocator* allocator) { + auto out = allocator->Get("private_card", {state.NumObservableCards()}); + int card = state.private_cards_[player]; + if (card != kInvalidCard) out.at(card) = 1; + } + + // Private cards of all players. Tensor of shape [num_players, num_cards]. + static void WriteAllPlayerCards(const LeducState& state, + Allocator* allocator) { + auto out = allocator->Get("private_cards", + {state.num_players_, state.NumObservableCards()}); + for (int p = 0; p < state.num_players_; ++p) { + int card = state.private_cards_[p]; + if (card != kInvalidCard) out.at(p, state.private_cards_[p]) = 1; + } + } + + // Community card (if any). One-hot vector of size num_cards. + static void WriteCommunityCard(const LeducState& state, + Allocator* allocator) { + auto out = allocator->Get("community_card", {state.NumObservableCards()}); + if (state.public_card_ != kInvalidCard) { + out.at(state.public_card_) = 1; + } + } + + // Betting sequence; shape [num_rounds, bets_per_round, num_actions]. + static void WriteBettingSequence(const LeducState& state, + Allocator* allocator) { + const int kNumRounds = 2; + const int kBitsPerAction = 2; + const int max_bets_per_round = state.MaxBetsPerRound(); + auto out = allocator->Get("betting", + {kNumRounds, max_bets_per_round, kBitsPerAction}); + for (int round : {0, 1}) { + const auto& bets = + (round == 0) ? state.round1_sequence_ : state.round2_sequence_; + for (int i = 0; i < bets.size(); ++i) { + if (bets[i] == ActionType::kCall) { + out.at(round, i, 0) = 1; // Encode call as 10. + } else if (bets[i] == ActionType::kRaise) { + out.at(round, i, 1) = 1; // Encode raise as 01. + } + } + } + } + + // Pot contribution per player (integer per player). + static void WritePotContribution(const LeducState& state, + Allocator* allocator) { + auto out = allocator->Get("pot_contribution", {state.num_players_}); + for (auto p = Player{0}; p < state.num_players_; p++) { + out.at(p) = state.ante_[p]; + } + } + + // Writes the complete observation in tensor form. + // The supplied allocator is responsible for providing memory to write the + // observation into. + void WriteTensor(const State& observed_state, int player, + Allocator* allocator) const override { + auto& state = open_spiel::down_cast(observed_state); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, state.num_players_); + + // Observing player. + WriteObservingPlayer(state, player, allocator); + + // Private card(s). + if (iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer) { + WriteSinglePlayerCard(state, player, allocator); + } else if (iig_obs_type_.private_info == PrivateInfoType::kAllPlayers) { + WriteAllPlayerCards(state, allocator); + } + + // Public information. + if (iig_obs_type_.public_info) { + WriteCommunityCard(state, allocator); + iig_obs_type_.perfect_recall ? WriteBettingSequence(state, allocator) + : WritePotContribution(state, allocator); + } + } + + // Writes an observation in string form. It would be possible just to + // turn the tensor observation into a string, but we prefer something + // somewhat human-readable. + + std::string StringFrom(const State& observed_state, + int player) const override { + auto& state = open_spiel::down_cast(observed_state); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, state.num_players_); + std::string result; + + // Private card(s). + if (iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer) { + absl::StrAppend(&result, "[Observer: ", player, "]"); + absl::StrAppend(&result, "[Private: ", state.private_cards_[player], "]"); + } else if (iig_obs_type_.private_info == PrivateInfoType::kAllPlayers) { + absl::StrAppend( + &result, "[Privates: ", absl::StrJoin(state.private_cards_, ""), "]"); + } + + // Public info. Not all of this is strictly necessary, but it makes the + // string easier to understand. + if (iig_obs_type_.public_info) { + absl::StrAppend(&result, "[Round ", state.round_, "]"); + absl::StrAppend(&result, "[Player: ", state.cur_player_, "]"); + absl::StrAppend(&result, "[Pot: ", state.pot_, "]"); + absl::StrAppend(&result, "[Money: ", absl::StrJoin(state.money_, " "), + "]"); + if (state.public_card_ != kInvalidCard) { + absl::StrAppend(&result, "[Public: ", state.public_card_, "]"); + } + if (iig_obs_type_.perfect_recall) { + // Betting Sequence (for the perfect recall case) + absl::StrAppend( + &result, "[Round1: ", absl::StrJoin(state.round1_sequence_, " "), + "][Round2: ", absl::StrJoin(state.round2_sequence_, " "), "]"); + } else { + // Pot contributions (imperfect recall) + absl::StrAppend(&result, "[Ante: ", absl::StrJoin(state.ante_, " "), + "]"); + } + } + + // Done. + return result; + } + + private: + IIGObservationType iig_obs_type_; +}; + +LeducState::LeducState(std::shared_ptr game, bool action_mapping, + bool suit_isomorphism) + : State(game), + cur_player_(kChancePlayerId), + num_calls_(0), + num_raises_(0), + round_(1), // Round number (1 or 2). + stakes_(1), // The current 'level' of the bet. + num_winners_(-1), + pot_(kAnte * game->NumPlayers()), // Number of chips in the pot. + public_card_(kInvalidCard), + // Number of cards remaining; not equal deck_.size()! + deck_size_((game->NumPlayers() + 1) * kNumSuits), + private_cards_dealt_(0), + remaining_players_(game->NumPlayers()), + // Is this player a winner? Indexed by pid. + winner_(game->NumPlayers(), false), + // Each player's single private card. Indexed by pid. + private_cards_(game->NumPlayers(), kInvalidCard), + // How much money each player has, indexed by pid. + money_(game->NumPlayers(), kStartingMoney - kAnte), + // How much each player has contributed to the pot, indexed by pid. + ante_(game->NumPlayers(), kAnte), + // Flag for whether the player has folded, indexed by pid. + folded_(game->NumPlayers(), false), + // Sequence of actions for each round. Needed to report information + // state. + round1_sequence_(), + round2_sequence_(), + // Always regard all actions as legal, and internally map otherwise + // illegal actions to check/call. + action_mapping_(action_mapping), + // Players cannot distinguish between cards of different suits with the + // same rank. + suit_isomorphism_(suit_isomorphism) { + // Cards by value (0-6 for standard 2-player game, kInvalidCard if no longer + // in the deck.) + deck_.resize(deck_size_); + std::iota(deck_.begin(), deck_.end(), 0); +} + +int LeducState::CurrentPlayer() const { + if (IsTerminal()) { + return kTerminalPlayerId; + } else { + return cur_player_; + } +} + +// In a chance node, `move` should be the card to deal to the current +// underlying player. +// On a player node, it should be ActionType::{kFold, kCall, kRaise} +void LeducState::DoApplyAction(Action move) { + if (IsChanceNode()) { + SPIEL_CHECK_GE(move, 0); + SPIEL_CHECK_LT(move, deck_.size()); + if (suit_isomorphism_) { + // One of the two identical cards must be left in the deck. + SPIEL_CHECK_TRUE(deck_[move * 2] != kInvalidCard || + deck_[move * 2 + 1] != kInvalidCard); + } else { + SPIEL_CHECK_NE(deck_[move], kInvalidCard); + } + + if (private_cards_dealt_ < num_players_) { + SetPrivate(private_cards_dealt_, move); + } else { + // Round 2: A single public card. + if (suit_isomorphism_) { + public_card_ = move; + if (deck_[move * 2] != kInvalidCard) { + deck_[move * 2] = kInvalidCard; + } else if (deck_[move * 2 + 1] != kInvalidCard) { + deck_[move * 2 + 1] = kInvalidCard; + } else { + SpielFatalError("Suit isomorphism error."); + } + deck_size_--; + } else { + public_card_ = deck_[move]; + deck_[move] = kInvalidCard; + deck_size_--; + } + + // We have finished the public card, let's bet! + cur_player_ = NextPlayer(); + } + } else { + // Player node. + if (action_mapping_) { + // Map otherwise illegal actions to kCall. + if (move == ActionType::kFold) { + if (stakes_ <= ante_[cur_player_]) { + move = ActionType::kCall; + } + } else if (move == ActionType::kRaise) { + if (num_raises_ >= 2) { + move = ActionType::kCall; + } + } + } + + if (move == ActionType::kFold) { + SPIEL_CHECK_NE(cur_player_, kChancePlayerId); + SequenceAppendMove(ActionType::kFold); + + // Player is now out. + folded_[cur_player_] = true; + remaining_players_--; + + if (IsTerminal()) { + ResolveWinner(); + } else if (ReadyForNextRound()) { + NewRound(); + } else { + cur_player_ = NextPlayer(); + } + } else if (move == ActionType::kCall) { + SPIEL_CHECK_NE(cur_player_, kChancePlayerId); + + // Current player puts in an amount of money equal to the current level + // (stakes) minus what they have contributed to level their contribution + // off. Note: this action also acts as a 'check' where the stakes are + // equal to each player's ante. + SPIEL_CHECK_GE(stakes_, ante_[cur_player_]); + int amount = stakes_ - ante_[cur_player_]; + Ante(cur_player_, amount); + num_calls_++; + SequenceAppendMove(ActionType::kCall); + + if (IsTerminal()) { + ResolveWinner(); + } else if (ReadyForNextRound()) { + NewRound(); + } else { + cur_player_ = NextPlayer(); + } + } else if (move == ActionType::kRaise) { + SPIEL_CHECK_NE(cur_player_, kChancePlayerId); + + // This player matches the current stakes and then brings the stakes up. + SPIEL_CHECK_LT(num_raises_, kMaxRaises); + int call_amount = stakes_ - ante_[cur_player_]; + + // First, match the current stakes if necessary + SPIEL_CHECK_GE(call_amount, 0); + if (call_amount > 0) { + Ante(cur_player_, call_amount); + } + + // Now, raise the stakes. + int raise_amount = (round_ == 1 ? kFirstRaiseAmount : kSecondRaiseAmount); + stakes_ += raise_amount; + Ante(cur_player_, raise_amount); + num_raises_++; + num_calls_ = 0; + SequenceAppendMove(ActionType::kRaise); + + if (IsTerminal()) { + ResolveWinner(); + } else { + cur_player_ = NextPlayer(); + } + } else { + SpielFatalError(absl::StrCat("Move ", move, " is invalid. ChanceNode?", + IsChanceNode())); + } + } +} + +std::vector LeducState::LegalActions() const { + if (IsTerminal()) return {}; + std::vector movelist; + if (IsChanceNode()) { + if (suit_isomorphism_) { + // Consecutive cards are identical under suit isomorphism. + for (int card = 0; card < deck_.size() / 2; card++) { + if (deck_[card * 2] != kInvalidCard || + deck_[card * 2 + 1] != kInvalidCard) { + movelist.push_back(card); + } + } + } else { + for (int card = 0; card < deck_.size(); card++) { + if (deck_[card] != kInvalidCard) movelist.push_back(card); + } + } + return movelist; + } + + if (action_mapping_) { + // All actions are regarded as legal + movelist.push_back(ActionType::kFold); + movelist.push_back(ActionType::kCall); + movelist.push_back(ActionType::kRaise); + return movelist; + } + + // Can't just randomly fold; only allow fold when under pressure. + if (stakes_ > ante_[cur_player_]) { + movelist.push_back(ActionType::kFold); + } + + // Can always call/check + movelist.push_back(ActionType::kCall); + + if (num_raises_ < 2) { + movelist.push_back(ActionType::kRaise); + } + + return movelist; +} + +std::string LeducState::ActionToString(Player player, Action move) const { + return GetGame()->ActionToString(player, move); +} + +std::string LeducState::ToString() const { + std::string result; + + absl::StrAppend(&result, "Round: ", round_, "\nPlayer: ", cur_player_, + "\nPot: ", pot_, "\nMoney (p1 p2 ...):"); + for (auto p = Player{0}; p < num_players_; p++) { + absl::StrAppend(&result, " ", money_[p]); + } + absl::StrAppend(&result, "\nCards (public p1 p2 ...): ", public_card_, " "); + for (Player player_index = 0; player_index < num_players_; player_index++) { + absl::StrAppend(&result, private_cards_[player_index], " "); + } + + absl::StrAppend(&result, "\nRound 1 sequence: "); + for (int i = 0; i < round1_sequence_.size(); ++i) { + Action action = round1_sequence_[i]; + if (i > 0) absl::StrAppend(&result, ", "); + absl::StrAppend(&result, StatelessActionToString(action)); + } + absl::StrAppend(&result, "\nRound 2 sequence: "); + for (int i = 0; i < round2_sequence_.size(); ++i) { + Action action = round2_sequence_[i]; + if (i > 0) absl::StrAppend(&result, ", "); + absl::StrAppend(&result, StatelessActionToString(action)); + } + absl::StrAppend(&result, "\n"); + + return result; +} + +bool LeducState::IsTerminal() const { + return remaining_players_ == 1 || (round_ == 2 && ReadyForNextRound()); +} + +std::vector LeducState::Returns() const { + if (!IsTerminal()) { + return std::vector(num_players_, 0.0); + } + + std::vector returns(num_players_); + for (auto player = Player{0}; player < num_players_; ++player) { + // Money vs money at start. + returns[player] = money_[player] - kStartingMoney; + } + + return returns; +} + +// Information state is card then bets. +std::string LeducState::InformationStateString(Player player) const { + const LeducGame& game = open_spiel::down_cast(*game_); + return game.info_state_observer_->StringFrom(*this, player); +} + +// Observation is card then contribution of each players to the pot. +std::string LeducState::ObservationString(Player player) const { + const LeducGame& game = open_spiel::down_cast(*game_); + return game.default_observer_->StringFrom(*this, player); +} + +void LeducState::InformationStateTensor(Player player, + absl::Span values) const { + ContiguousAllocator allocator(values); + const LeducGame& game = open_spiel::down_cast(*game_); + game.info_state_observer_->WriteTensor(*this, player, &allocator); +} + +void LeducState::ObservationTensor(Player player, + absl::Span values) const { + ContiguousAllocator allocator(values); + const LeducGame& game = open_spiel::down_cast(*game_); + game.default_observer_->WriteTensor(*this, player, &allocator); +} + +std::unique_ptr LeducState::Clone() const { + return std::unique_ptr(new LeducState(*this)); +} + +std::vector> LeducState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(IsChanceNode()); + std::vector> outcomes; + + if (suit_isomorphism_) { + const double p = 1.0 / deck_size_; + // Consecutive cards in deck are viewed identically. + for (int card = 0; card < deck_.size() / 2; card++) { + if (deck_[card * 2] != kInvalidCard && + deck_[card * 2 + 1] != kInvalidCard) { + outcomes.push_back({card, p * 2}); + } else if (deck_[card * 2] != kInvalidCard || + deck_[card * 2 + 1] != kInvalidCard) { + outcomes.push_back({card, p}); + } + } + return outcomes; + } + + const double p = 1.0 / deck_size_; + for (int card = 0; card < deck_.size(); card++) { + // This card is still in the deck, prob is 1/decksize. + if (deck_[card] != kInvalidCard) outcomes.push_back({card, p}); + } + return outcomes; +} + +int LeducState::NextPlayer() const { + // If we are on a chance node, it is the first player to play + int current_real_player; + if (cur_player_ == kChancePlayerId) { + current_real_player = -1; + } else { + current_real_player = cur_player_; + } + // Go to the next player who's still in. + for (int i = 1; i < num_players_; ++i) { + Player player = (current_real_player + i) % num_players_; + + SPIEL_CHECK_TRUE(player >= 0); + SPIEL_CHECK_TRUE(player < num_players_); + if (!folded_[player]) { + return player; + } + } + + SpielFatalError("Error in LeducState::NextPlayer(), should not get here."); +} + +int LeducState::RankHand(Player player) const { + int hand[] = {public_card_, private_cards_[player]}; + // Put the lower card in slot 0, the higher in slot 1. + if (hand[0] > hand[1]) { + std::swap(hand[0], hand[1]); + } + + if (suit_isomorphism_) { + int num_cards = deck_.size() / 2; + if (hand[0] == hand[1]) { + // Pair! Offset by deck_size_^2 to put higher than every singles combo. + return (num_cards * num_cards + hand[0]); + } else { + // Otherwise card value dominates. Suit isomorphism has already removed + // the distinction between suits, so we can compare the ranks directly. + // This could lead to ties/draws and/or multiple winners. + return hand[1] * num_cards + hand[0]; + } + } + + // E.g. rank for two players: + // 0 J1, 1 J2, 2 Q1, 3 Q2, 4 K1, 5 K2. + int num_cards = deck_.size(); + + if (hand[0] % 2 == 0 && hand[1] == hand[0] + 1) { + // Pair! Offset by deck_size_^2 to put higher than every singles combo. + return (num_cards * num_cards + hand[0]); + } else { + // Otherwise card value dominates. No high/low suit: only two suits, and + // given ordering above, dividing by gets the value (integer division + // intended.) This could lead to ties/draws and/or multiple winners. + return (hand[1] / 2) * num_cards + (hand[0] / 2); + } +} + +void LeducState::ResolveWinner() { + num_winners_ = kInvalidPlayer; + + if (remaining_players_ == 1) { + // Only one left in? They get the pot! + for (Player player_index = 0; player_index < num_players_; player_index++) { + if (!folded_[player_index]) { + num_winners_ = 1; + winner_[player_index] = true; + money_[player_index] += pot_; + pot_ = 0; + return; + } + } + + } else { + // Otherwise, showdown! + // Find the best hand among those still in. + SPIEL_CHECK_NE(public_card_, kInvalidCard); + int best_hand_rank = -1; + num_winners_ = 0; + std::fill(winner_.begin(), winner_.end(), false); + + for (Player player_index = 0; player_index < num_players_; player_index++) { + if (!folded_[player_index]) { + int rank = RankHand(player_index); + if (rank > best_hand_rank) { + // Beat the current best hand! Clear the winners list, then add. + best_hand_rank = rank; + std::fill(winner_.begin(), winner_.end(), false); + winner_[player_index] = true; + num_winners_ = 1; + } else if (rank == best_hand_rank) { + // Tied with best hand rank, so this player is a winner as well. + winner_[player_index] = true; + num_winners_++; + } + } + } + + // Split the pot among the winners (possibly only one). + SPIEL_CHECK_TRUE(1 <= num_winners_ && num_winners_ <= num_players_); + for (Player player_index = 0; player_index < num_players_; player_index++) { + if (winner_[player_index]) { + // Give this player their share. + money_[player_index] += static_cast(pot_) / num_winners_; + } + } + pot_ = 0; + } +} + +bool LeducState::ReadyForNextRound() const { + return ((num_raises_ == 0 && num_calls_ == remaining_players_) || + (num_raises_ > 0 && num_calls_ == (remaining_players_ - 1))); +} + +void LeducState::NewRound() { + SPIEL_CHECK_EQ(round_, 1); + round_++; + num_raises_ = 0; + num_calls_ = 0; + cur_player_ = kChancePlayerId; // Public card. +} + +void LeducState::SequenceAppendMove(int move) { + if (round_ == 1) { + round1_sequence_.push_back(move); + } else { + SPIEL_CHECK_EQ(round_, 2); + round2_sequence_.push_back(move); + } +} + +void LeducState::Ante(Player player, int amount) { + pot_ += amount; + ante_[player] += amount; + money_[player] -= amount; +} + +std::vector LeducState::padded_betting_sequence() const { + std::vector history = round1_sequence_; + + // We pad the history to the end of the first round with kPaddingAction. + history.resize(game_->MaxGameLength() / 2, kInvalidAction); + + // We insert the actions that happened in the second round, and fill to + // MaxGameLength. + history.insert(history.end(), round2_sequence_.begin(), + round2_sequence_.end()); + history.resize(game_->MaxGameLength(), kInvalidAction); + return history; +} + +void LeducState::SetPrivate(Player player, Action move) { + // Round 1. `move` refers to the card value to deal to the current + // underlying player (given by `private_cards_dealt_`). + if (suit_isomorphism_) { + // Consecutive cards are identical under suit isomorphism. + private_cards_[player] = move; + if (deck_[move * 2] != kInvalidCard) { + deck_[move * 2] = kInvalidCard; + } else if (deck_[move * 2 + 1] != kInvalidCard) { + deck_[move * 2 + 1] = kInvalidCard; + } else { + SpielFatalError("Suit isomorphism error."); + } + } else { + private_cards_[player] = deck_[move]; + deck_[move] = kInvalidCard; + } + --deck_size_; + ++private_cards_dealt_; + + // When all private cards are dealt, move to player 0. + if (private_cards_dealt_ == num_players_) cur_player_ = 0; +} + +std::unique_ptr LeducState::ResampleFromInfostate( + int player_id, std::function rng) const { + std::unique_ptr clone = game_->NewInitialState(); + + // First, deal out cards: + Action player_chance = history_.at(player_id).action; + for (int p = 0; p < GetGame()->NumPlayers(); ++p) { + if (p == player_id) { + clone->ApplyAction(history_.at(p).action); + } else { + Action chosen_action = player_chance; + while (chosen_action == player_chance || chosen_action == public_card_) { + chosen_action = SampleAction(clone->ChanceOutcomes(), rng()).first; + } + clone->ApplyAction(chosen_action); + } + } + for (int action : round1_sequence_) clone->ApplyAction(action); + if (public_card_ != kInvalidCard) { + clone->ApplyAction(public_card_); + for (int action : round2_sequence_) clone->ApplyAction(action); + } + return clone; +} + +int LeducState::NumObservableCards() const { + return suit_isomorphism_ ? deck_.size() / 2 : deck_.size(); +} + +int LeducState::MaxBetsPerRound() const { return 3 * num_players_ - 2; } + +void LeducState::SetPrivateCards(const std::vector& new_private_cards) { + SPIEL_CHECK_EQ(new_private_cards.size(), NumPlayers()); + private_cards_ = new_private_cards; +} + +LeducGame::LeducGame(const GameParameters& params) + : Game(kGameType, params), + num_players_(ParameterValue("players")), + total_cards_((num_players_ + 1) * kNumSuits), + action_mapping_(ParameterValue("action_mapping")), + suit_isomorphism_(ParameterValue("suit_isomorphism")) { + SPIEL_CHECK_GE(num_players_, kGameType.min_num_players); + SPIEL_CHECK_LE(num_players_, kGameType.max_num_players); + default_observer_ = std::make_shared(kDefaultObsType); + info_state_observer_ = std::make_shared(kInfoStateObsType); +} + +std::unique_ptr LeducGame::NewInitialState() const { + return absl::make_unique(shared_from_this(), + /*action_mapping=*/action_mapping_, + /*suit_isomorphism=*/suit_isomorphism_); +} + +int LeducGame::MaxChanceOutcomes() const { + if (suit_isomorphism_) { + return total_cards_ / 2; + } else { + return total_cards_; + } +} + +std::vector LeducGame::InformationStateTensorShape() const { + // One-hot encoding for player number (who is to play). + // 2 slots of cards (total_cards_ bits each): private card, public card + // Followed by maximum game length * 2 bits each (call / raise) + if (suit_isomorphism_) { + return {(num_players_) + (total_cards_) + (MaxGameLength() * 2)}; + } else { + return {(num_players_) + (total_cards_ * 2) + (MaxGameLength() * 2)}; + } +} + +std::vector LeducGame::ObservationTensorShape() const { + // One-hot encoding for player number (who is to play). + // 2 slots of cards (total_cards_ bits each): private card, public card + // Followed by the contribution of each player to the pot + if (suit_isomorphism_) { + return {(num_players_) + (total_cards_) + (num_players_)}; + } else { + return {(num_players_) + (total_cards_ * 2) + (num_players_)}; + } +} + +double LeducGame::MaxUtility() const { + // In poker, the utility is defined as the money a player has at the end of + // the game minus then money the player had before starting the game. + // The most a player can win *per opponent* is the most each player can put + // into the pot, which is the raise amounts on each round times the maximum + // number raises, plus the original chip they put in to play. + return (num_players_ - 1) * (kTotalRaisesPerRound * kFirstRaiseAmount + + kTotalRaisesPerRound * kSecondRaiseAmount + 1); +} + +double LeducGame::MinUtility() const { + // In poker, the utility is defined as the money a player has at the end of + // the game minus then money the player had before starting the game. + // The most any single player can lose is the maximum number of raises per + // round times the amounts of each of the raises, plus the original chip + // they put in to play. + return -1 * (kTotalRaisesPerRound * kFirstRaiseAmount + + kTotalRaisesPerRound * kSecondRaiseAmount + 1); +} + +std::shared_ptr LeducGame::MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const { + if (params.empty()) { + return std::make_shared( + iig_obs_type.value_or(kDefaultObsType)); + } else { + return MakeRegisteredObserver(iig_obs_type, params); + } +} + +std::string LeducGame::ActionToString(Player player, Action action) const { + if (player == kChancePlayerId) { + return absl::StrCat("Chance outcome:", action); + } else { + return StatelessActionToString(action); + } +} + +TabularPolicy GetAlwaysFoldPolicy(const Game& game) { + SPIEL_CHECK_TRUE( + dynamic_cast(const_cast(&game)) != nullptr); + return GetPrefActionPolicy(game, {ActionType::kFold, ActionType::kCall}); +} + +TabularPolicy GetAlwaysCallPolicy(const Game& game) { + SPIEL_CHECK_TRUE( + dynamic_cast(const_cast(&game)) != nullptr); + return GetPrefActionPolicy(game, {ActionType::kCall}); +} + +TabularPolicy GetAlwaysRaisePolicy(const Game& game) { + SPIEL_CHECK_TRUE( + dynamic_cast(const_cast(&game)) != nullptr); + return GetPrefActionPolicy(game, {ActionType::kRaise, ActionType::kCall}); +} + +} // namespace leduc_poker +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/leduc_poker/leduc_poker.h b/scenarios/bargaining/open_spiel/open_spiel/games/leduc_poker/leduc_poker.h new file mode 100644 index 0000000..5ac1ad6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/leduc_poker/leduc_poker.h @@ -0,0 +1,264 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// A generalized version of a Leduc poker, a simple but non-trivial poker game +// described in http://poker.cs.ualberta.ca/publications/UAI05.pdf . +// +// Taken verbatim from the linked paper above: "In Leduc hold'em, the deck +// consists of two suits with three cards in each suit. There are two rounds. +// In the first round a single private card is dealt to each player. In the +// second round a single board card is revealed. There is a two-bet maximum, +// with raise amounts of 2 and 4 in the first and second round, respectively. +// Both players start the first round with 1 already in the pot. +// +// So the maximin sequence is of the form: +// private card player 0, private card player 1, [bets], public card, [bets] +// +// Parameters: +// "players" int number of players (default = 2) +// "action_mapping" bool regard all actions as legal and internally +// map otherwise illegal actions to check/call +// (default = false) +// "suit_isomorphism" bool player observations do not distinguish +// between cards of different suits with +// the same rank (default = false) + +#ifndef OPEN_SPIEL_GAMES_LEDUC_POKER_H_ +#define OPEN_SPIEL_GAMES_LEDUC_POKER_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/observer.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace leduc_poker { + +// Default parameters. + +inline constexpr int kInvalidCard = -10000; +inline constexpr int kDefaultPlayers = 2; +inline constexpr int kNumSuits = 2; +inline constexpr int kFirstRaiseAmount = 2; +inline constexpr int kSecondRaiseAmount = 4; +inline constexpr int kTotalRaisesPerRound = 2; +inline constexpr int kMaxRaises = 2; +inline constexpr int kStartingMoney = 100; + +// Number of info states in the 2P game with default params. +inline constexpr int kNumInfoStates = 936; + +class LeducGame; +class LeducObserver; + +enum ActionType { kFold = 0, kCall = 1, kRaise = 2 }; + +class LeducState : public State { + public: + explicit LeducState(std::shared_ptr game, + bool action_mapping, bool suit_isomorphism); + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action move) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + // The probability of taking each possible action in a particular info state. + std::vector> ChanceOutcomes() const override; + + // Additional methods + int round() const { return round_; } + int deck_size() const { return deck_size_; } + int public_card() const { return public_card_; } + int raises() const { return num_raises_; } + int private_card(Player player) const { return private_cards_[player]; } + std::vector LegalActions() const override; + + // Gets the private cards. + std::vector GetPrivateCards() const { return private_cards_; } + + // Gets the public card. + int GetPublicCard() const { return public_card_; } + + // Gets number of chips in pot. + int GetPot() const { return pot_; } + + // Gets how much money each player has. + std::vector GetMoney() const { return money_; } + + // Gets the action sequence of rounds 1 & 2. + std::vector GetRound1() const { return round1_sequence_; } + std::vector GetRound2() const { return round2_sequence_; } + + // Sets the private cards to specific ones. Note that this function does not + // change the history, so any functions relying on the history will not longer + // work properly. + void SetPrivateCards(const std::vector& new_private_cards); + + // Returns a vector of MaxGameLength containing all of the betting actions + // taken so far. If the round has ended, the actions are kInvalidAction. + std::vector padded_betting_sequence() const; + std::unique_ptr ResampleFromInfostate( + int player_id, std::function rng) const override; + + std::vector ActionsConsistentWithInformationFrom( + Action action) const override { + return {action}; + } + + protected: + // The meaning of `action_id` varies: + // - At decision nodes, one of ActionType::{kFold, kCall, kRaise}. + // - At a chance node, indicates the card to be dealt to the player or + // revealed publicly. The interpretation of each chance outcome depends on + // the number of players, but always follows: + // lowest value of first suit, + // lowest value of second suit, + // next lowest value of first suit, + // next lowest value of second suit, + // . + // . + // . + // highest value of first suit, + // highest value of second suit. + // So, e.g. in the two player case (6 cards): 0 = Jack1, 1 = Jack2, + // 2 = Queen1, ... , 5 = King2. + void DoApplyAction(Action move) override; + + private: + friend class LeducObserver; + + int NextPlayer() const; + void ResolveWinner(); + bool ReadyForNextRound() const; + void NewRound(); + int RankHand(Player player) const; + void SequenceAppendMove(int move); + void Ante(Player player, int amount); + void SetPrivate(Player player, Action move); + int NumObservableCards() const; + int MaxBetsPerRound() const; + + // Fields sets to bad/invalid values. Use Game::NewInitialState(). + Player cur_player_; + + int num_calls_; // Number of calls this round (total, not per player). + int num_raises_; // Number of raises made in the round (not per player). + int round_; // Round number (1 or 2). + int stakes_; // The current 'level' of the bet. + int num_winners_; // Number of winning players. + int pot_; // Number of chips in the pot. + int public_card_; // The public card revealed after round 1. + int deck_size_; // Number of cards remaining; not equal deck_.size() + int private_cards_dealt_; // How many private cards currently dealt. + int remaining_players_; // Num. players still in (not folded). + + // Is this player a winner? Indexed by pid. + std::vector winner_; + // Each player's single private card. Indexed by pid. + std::vector private_cards_; + // Cards by value (0-6 for standard 2-player game, -1 if no longer in the + // deck.) + std::vector deck_; + // How much money each player has, indexed by pid. + std::vector money_; + // How much each player has contributed to the pot, indexed by pid. + std::vector ante_; + // Flag for whether the player has folded, indexed by pid. + std::vector folded_; + // Sequence of actions for each round. Needed to report information state. + std::vector round1_sequence_; + std::vector round2_sequence_; + // Always regard all actions as legal, and internally map otherwise illegal + // actions to check/call. + bool action_mapping_; + // Players cannot distinguish between cards of different suits with the same + // rank. + bool suit_isomorphism_; +}; + +class LeducGame : public Game { + public: + explicit LeducGame(const GameParameters& params); + + int NumDistinctActions() const override { return 3; } + std::unique_ptr NewInitialState() const override; + int MaxChanceOutcomes() const override; + int NumPlayers() const override { return num_players_; } + double MinUtility() const override; + double MaxUtility() const override; + absl::optional UtilitySum() const override { return 0; } + std::vector InformationStateTensorShape() const override; + std::vector ObservationTensorShape() const override; + constexpr int MaxBetsPerRound() const { + // E.g. longest round for 4-player is 10 bets: + // check, check, check, bet, call, call, raise, call, call, call + // = 1 bet + 1 raise + (num_players_-1)*2 calls + (num_players_-2) calls + return 3 * num_players_ - 2; + } + int MaxGameLength() const override { + // 2 rounds. + return 2 * MaxBetsPerRound(); + } + int MaxChanceNodesInHistory() const override { return 3; } + int NumObservableCards() const { + return suit_isomorphism_ ? total_cards_ / 2 : total_cards_; + } + + std::string ActionToString(Player player, Action action) const override; + // New Observation API + std::shared_ptr MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const override; + + // Used to implement the old observation API. + std::shared_ptr default_observer_; + std::shared_ptr info_state_observer_; + + private: + int num_players_; // Number of players. + int total_cards_; // Number of cards total cards in the game. + // Always regard all actions as legal, and internally map otherwise illegal + // actions to check/call. + bool action_mapping_; + // Players cannot distinguish between cards of different suits with the same + // rank. + bool suit_isomorphism_; +}; + +// Returns policy that always folds. +TabularPolicy GetAlwaysFoldPolicy(const Game& game); + +// Returns policy that always calls. +TabularPolicy GetAlwaysCallPolicy(const Game& game); + +// Returns policy that always raises. +TabularPolicy GetAlwaysRaisePolicy(const Game& game); + +} // namespace leduc_poker +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_LEDUC_POKER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/leduc_poker/leduc_poker_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/leduc_poker/leduc_poker_test.cc new file mode 100644 index 0000000..7801a18 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/leduc_poker/leduc_poker_test.cc @@ -0,0 +1,67 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/leduc_poker/leduc_poker.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace leduc_poker { +namespace { + +namespace testing = open_spiel::testing; + +void BasicLeducTests() { + testing::LoadGameTest("leduc_poker"); + testing::ChanceOutcomesTest(*LoadGame("leduc_poker")); + testing::RandomSimTest(*LoadGame("leduc_poker"), 100); + testing::RandomSimTest(*LoadGame("leduc_poker", + {{"action_mapping", GameParameter(true)}}), 100); + testing::RandomSimTest(*LoadGame("leduc_poker", + {{"suit_isomorphism", GameParameter(true)}}), 100); + for (Player players = 3; players <= 5; players++) { + testing::RandomSimTest( + *LoadGame("leduc_poker", {{"players", GameParameter(players)}}), 100); + } + testing::ResampleInfostateTest(*LoadGame("leduc_poker"), /*num_sims=*/100); + auto observer = LoadGame("leduc_poker") + ->MakeObserver(kDefaultObsType, + GameParametersFromString("single_tensor")); + testing::RandomSimTestCustomObserver(*LoadGame("leduc_poker"), observer); +} + +void PolicyTest() { + using PolicyGenerator = std::function; + std::vector policy_generators = { + GetAlwaysFoldPolicy, + GetAlwaysCallPolicy, + GetAlwaysRaisePolicy + }; + + std::shared_ptr game = LoadGame("leduc_poker"); + for (const auto& policy_generator : policy_generators) { + testing::TestEveryInfostateInPolicy(policy_generator, *game); + testing::TestPoliciesCanPlay(policy_generator, *game); + } +} + +} // namespace +} // namespace leduc_poker +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::leduc_poker::BasicLeducTests(); + open_spiel::leduc_poker::PolicyTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/lewis_signaling/lewis_signaling.cc b/scenarios/bargaining/open_spiel/open_spiel/games/lewis_signaling/lewis_signaling.cc new file mode 100644 index 0000000..72a76c0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/lewis_signaling/lewis_signaling.cc @@ -0,0 +1,282 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/lewis_signaling/lewis_signaling.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace lewis_signaling { + +namespace { + +// Facts about the game +const GameType kGameType{ + /*short_name=*/"lewis_signaling", + /*long_name=*/"Lewis Signaling Game", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"num_states", GameParameter(kDefaultNumStates)}, + {"num_messages", GameParameter(kDefaultNumMessages)}, + {"payoffs", GameParameter(std::string(kDefaultPayoffs))}}}; + +static std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new LewisSignalingGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace + +std::string LewisSignalingState::ActionToString(Player player, + Action move_id) const { + if (player == kChancePlayerId) { + return absl::StrCat("State ", move_id); + } else if (static_cast(player) == Players::kSender) { + return absl::StrCat("Message ", move_id); + } else if (static_cast(player) == Players::kReceiver) { + return absl::StrCat("Action ", move_id); + } else { + SpielFatalError("Invalid player"); + } +} + +bool LewisSignalingState::IsTerminal() const { + // Game ends after chance, sender, and receiver act + return (history_.size() == 3); +} + +std::vector LewisSignalingState::Returns() const { + if (!IsTerminal()) { + return {0.0, 0.0}; + } else { + // Find payoff from the payoff matrix based on state, action + int payoff_idx = num_states_ * state_ + action_; + return {payoffs_[payoff_idx], payoffs_[payoff_idx]}; + } +} + +std::string LewisSignalingState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + if (IsChanceNode()) { + return "ChanceNode -- no observation"; + } + + std::string str = ""; + + // Whose turn is it? + absl::StrAppend(&str, "Current turn: ", cur_player_, "\n"); + + // Show state to the sender, message to the receiver + if (static_cast(player) == Players::kSender) { + absl::StrAppend(&str, "State: ", state_, "\n"); + } else if (static_cast(player) == Players::kReceiver) { + absl::StrAppend(&str, "Message: ", message_, "\n"); + } else { + SpielFatalError("Invalid player"); + } + + return str; +} + +void LewisSignalingState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + SPIEL_CHECK_EQ(values.size(), game_->ObservationTensorSize()); + std::fill(values.begin(), values.end(), 0); + + if (IsChanceNode()) { + // No observations at chance nodes. + return; + } + + // 2 bits to indicate whose turn it is. + int offset = 0; + values[cur_player_] = 1; + offset += 2; + + // 1 bit to indicate whether it's terminal + values[offset] = IsTerminal() ? 1 : 0; + offset += 1; + + // one-hot vector for the state/message + if (static_cast(player) == Players::kSender) { + if (state_ != kUnassignedValue) { + values[offset + state_] = 1; + offset += num_states_; + } + } else if (static_cast(player) == Players::kReceiver) { + if (message_ != kUnassignedValue) { + values[offset + message_] = 1; + offset += num_messages_; + } + } else { + SpielFatalError("Invalid player"); + } +} + +LewisSignalingState::LewisSignalingState(std::shared_ptr game, + int num_states, int num_messages, + const std::vector& payoffs) + : State(game), + num_states_(num_states), + num_messages_(num_messages), + payoffs_(payoffs), + cur_player_(kChancePlayerId), + state_(kUnassignedValue), + message_(kUnassignedValue), + action_(kUnassignedValue) {} + +int LewisSignalingState::CurrentPlayer() const { + return IsTerminal() ? kTerminalPlayerId : cur_player_; +} + +void LewisSignalingState::DoApplyAction(Action action) { + if (IsChanceNode()) { + SPIEL_CHECK_LT(action, num_states_); + state_ = action; + cur_player_ = static_cast(Players::kSender); + } else { + if (static_cast(cur_player_) == Players::kSender) { + SPIEL_CHECK_LT(action, num_messages_); + message_ = action; + cur_player_ = static_cast(Players::kReceiver); + } else if (static_cast(cur_player_) == Players::kReceiver) { + action_ = action; + } else { + SpielFatalError("Invalid player"); + } + } +} + +std::vector LewisSignalingState::LegalActions() const { + if (IsChanceNode()) { + return LegalChanceOutcomes(); + } else if (IsTerminal()) { + return {}; + } else if (static_cast(cur_player_) == Players::kSender) { + // Choose one of the messages if player is the sender + std::vector legal_actions; + legal_actions.reserve(num_messages_); + for (int i = 0; i < num_messages_; ++i) { + legal_actions.push_back(i); + } + return legal_actions; + } else if (static_cast(cur_player_) == Players::kReceiver) { + // Choose one of the actions if player is the receiver + std::vector legal_actions; + legal_actions.reserve(num_states_); + for (int i = 0; i < num_states_; ++i) { + legal_actions.push_back(i); + } + return legal_actions; + } else { + SpielFatalError("Invalid node"); + } +} + +std::vector> LewisSignalingState::ChanceOutcomes() + const { + SPIEL_CHECK_TRUE(IsChanceNode()); + std::vector> outcomes; + outcomes.reserve(num_states_); + for (int i = 0; i < num_states_; ++i) { + outcomes.push_back({i, 1.0 / num_states_}); + } + return outcomes; +} + +std::string LewisSignalingState::ToString() const { + switch (history_.size()) { + case 0: // Before allocating state + return "Initial chance node"; + break; + + case 1: // After allocating state + return absl::StrCat("State ", state_); + break; + + case 2: // After sending a message + return absl::StrCat("State ", state_, ", Message ", message_); + break; + + case 3: // After taking an action + return absl::StrCat("State ", state_, ", Message ", message_, ", Action ", + action_); + break; + + default: + SpielFatalError("Invalid state"); + } +} + +std::unique_ptr LewisSignalingState::Clone() const { + return std::unique_ptr(new LewisSignalingState(*this)); +} + +LewisSignalingGame::LewisSignalingGame(const GameParameters& params) + : Game(kGameType, params), + num_states_(ParameterValue("num_states", kDefaultNumStates)), + num_messages_(ParameterValue("num_messages", kDefaultNumMessages)) { + std::string payoffs_string = + ParameterValue("payoffs", kDefaultPayoffs); + std::vector parts = absl::StrSplit(payoffs_string, ','); + SPIEL_CHECK_EQ(parts.size(), num_states_ * num_states_); + payoffs_.resize(parts.size()); + for (int i = 0; i < parts.size(); ++i) { + bool success = absl::SimpleAtod(parts[i], &payoffs_[i]); + SPIEL_CHECK_TRUE(success); + } + SPIEL_CHECK_LE(num_messages_, num_states_); +} + +int LewisSignalingGame::NumDistinctActions() const { return num_states_; } + +std::vector LewisSignalingGame::ObservationTensorShape() const { + return { + 2 + // one hot vector for whose turn it is + 1 + // one bit to indicate whether the state is terminal + num_states_ // one-hot vector for the state/message depending on the + // player + }; +} + +} // namespace lewis_signaling +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/lewis_signaling/lewis_signaling.h b/scenarios/bargaining/open_spiel/open_spiel/games/lewis_signaling/lewis_signaling.h new file mode 100644 index 0000000..2b0cc2d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/lewis_signaling/lewis_signaling.h @@ -0,0 +1,132 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_LEWIS_SIGNALING_H_ +#define OPEN_SPIEL_GAMES_LEWIS_SIGNALING_H_ + +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// Lewis Signaling Game: https://en.wikipedia.org/wiki/Lewis_signaling_game +// +// First agent (sender) receives a random private state from a set of N states. +// It then sends a message from a set of M messages to the second agent +// (receiver). Finally, the receiver takes an action after observing the +// message. An N*N payoff matrix is used to calculate the reward based on the +// state received by the sender and the action taken by the receiver. Both +// agents receive the same reward. +// +// Parameters: +// "num_states" int number of distinct states (N) (default = 3) +// "num_messages" int number of distinct messages (M) (default = 3) +// "payoffs" string string with comma separated payoff values +// (N*N elements required) +// (default = flattened identity matrix) + +namespace open_spiel { +namespace lewis_signaling { + +constexpr int kDefaultNumStates = 3; +constexpr int kDefaultNumMessages = 3; +constexpr int kDefaultNumPlayers = 2; +constexpr const char* kDefaultPayoffs = "1, 0, 0, 0, 1, 0, 0, 0, 1"; +constexpr int kUnassignedValue = -1; + +enum class Players { + kSender, + kReceiver, +}; + +class LewisSignalingGame; + +class LewisSignalingState : public State { + public: + LewisSignalingState(std::shared_ptr game, int num_states, + int num_messages, const std::vector& payoffs); + LewisSignalingState(const LewisSignalingState&) = default; + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action move_id) const override; + std::vector> ChanceOutcomes() const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::string InformationStateString(Player player) const override { + return ObservationString(player); + } + void InformationStateTensor(Player player, + absl::Span values) const override { + return ObservationTensor(player, values); + } + + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + + protected: + void DoApplyAction(Action action) override; + + private: + const int num_states_; + const int num_messages_; + const std::vector payoffs_; + int cur_player_; + int state_; + int message_; + int action_; +}; + +class LewisSignalingGame : public Game { + public: + explicit LewisSignalingGame(const GameParameters& params); + + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new LewisSignalingState( + shared_from_this(), num_states_, num_messages_, payoffs_)); + } + int MaxChanceOutcomes() const override { return num_states_; } + + int MaxGameLength() const override { return 2; } + // TODO: verify whether this bound is tight and/or tighten it. + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + + int NumPlayers() const override { return kDefaultNumPlayers; } + double MaxUtility() const override { + return *std::max_element(payoffs_.begin(), payoffs_.end()); + } + double MinUtility() const override { + return *std::min_element(payoffs_.begin(), payoffs_.end()); + } + std::vector ObservationTensorShape() const override; + std::vector InformationStateTensorShape() const override { + return ObservationTensorShape(); + } + + private: + const int num_states_; + const int num_messages_; + std::vector payoffs_; +}; + +} // namespace lewis_signaling +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_LEWIS_SIGNALING_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/lewis_signaling/lewis_signaling_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/lewis_signaling/lewis_signaling_test.cc new file mode 100644 index 0000000..228f71e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/lewis_signaling/lewis_signaling_test.cc @@ -0,0 +1,92 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/lewis_signaling/lewis_signaling.h" + +#include +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace lewis_signaling { +namespace { + +namespace testing = open_spiel::testing; + +void BasicLewisSignalingTests() { + testing::RandomSimTest(*LoadGame("lewis_signaling"), 100); +} + +void DefaultParamsTest() { + std::vector def_pay = {1, 0, 0, 0, 1, 0, 0, 0, 1}; + for (int i = 0; i < kDefaultNumStates; ++i) { + for (int j = 0; j < kDefaultNumStates; ++j) { + std::shared_ptr game = LoadGame("lewis_signaling"); + std::unique_ptr state = game->NewInitialState(); + + state->ApplyAction(i); // set state to i + SPIEL_CHECK_TRUE(state->CurrentPlayer() == + static_cast(Players::kSender)); + state->ApplyAction(0); // message 0 + SPIEL_CHECK_TRUE(state->CurrentPlayer() == + static_cast(Players::kReceiver)); + state->ApplyAction(j); // action j + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReturn(0), + def_pay[i * kDefaultNumStates + j]); + SPIEL_CHECK_EQ(state->PlayerReturn(1), + def_pay[i * kDefaultNumStates + j]); + std::cout << state->ToString() << std::endl; + } + } +} + +void LargePayoffMatrixTest() { + std::vector pay = {1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1}; + std::string pay_str = "1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1"; + int num_states = 4; + GameParameters params = {{"num_states", GameParameter(num_states)}, + {"payoffs", GameParameter(pay_str)}}; + for (int i = 0; i < num_states; ++i) { + for (int j = 0; j < num_states; ++j) { + std::shared_ptr game = LoadGame("lewis_signaling", params); + std::unique_ptr state = game->NewInitialState(); + + state->ApplyAction(i); // set state to i + SPIEL_CHECK_TRUE(state->CurrentPlayer() == + static_cast(Players::kSender)); + state->ApplyAction(0); // message 0 + SPIEL_CHECK_TRUE(state->CurrentPlayer() == + static_cast(Players::kReceiver)); + state->ApplyAction(j); // action j + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReturn(0), pay[i * num_states + j]); + SPIEL_CHECK_EQ(state->PlayerReturn(1), pay[i * num_states + j]); + std::cout << state->ToString() << std::endl; + } + } +} + +} // namespace +} // namespace lewis_signaling +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::lewis_signaling::BasicLewisSignalingTests(); + open_spiel::lewis_signaling::DefaultParamsTest(); + open_spiel::lewis_signaling::LargePayoffMatrixTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/liars_dice/liars_dice.cc b/scenarios/bargaining/open_spiel/open_spiel/games/liars_dice/liars_dice.cc new file mode 100644 index 0000000..0818f18 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/liars_dice/liars_dice.cc @@ -0,0 +1,610 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/liars_dice/liars_dice.h" + +#include +#include +#include + +#include "open_spiel/game_parameters.h" + +namespace open_spiel { +namespace liars_dice { + +namespace { +// Default Parameters. +constexpr int kDefaultPlayers = 2; +constexpr int kDefaultNumDice = 1; +constexpr int kDefaultDiceSides = 6; // Number of sides on the dice. +constexpr const char* kDefaultBiddingRule = "reset-face"; +constexpr int kInvalidOutcome = -1; +constexpr int kInvalidBid = -1; + +// Only relevant for the imperfect recall version. +constexpr int kDefaultRecallLength = 4; + +// Facts about the game +const GameType kGameType{ + /*short_name=*/"liars_dice", + /*long_name=*/"Liars Dice", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/kDefaultPlayers, + /*min_num_players=*/kDefaultPlayers, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/false, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"players", GameParameter(kDefaultPlayers)}, + {"numdice", GameParameter(kDefaultNumDice)}, + {"dice_sides", GameParameter(kDefaultDiceSides)}, + {"bidding_rule", GameParameter(kDefaultBiddingRule)}}}; + +const GameType kImperfectRecallGameType{ + /*short_name=*/"liars_dice_ir", + /*long_name=*/"Liars Dice with Imperfect Recall", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/kDefaultPlayers, + /*min_num_players=*/kDefaultPlayers, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/false, + /*provides_observation_tensor=*/false, + /*parameter_specification=*/ + {{"players", GameParameter(kDefaultPlayers)}, + {"numdice", GameParameter(kDefaultNumDice)}, + {"dice_sides", GameParameter(kDefaultDiceSides)}, + {"bidding_rule", GameParameter(kDefaultBiddingRule)}, + {"recall_length", GameParameter(kDefaultRecallLength)}}}; + + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new LiarsDiceGame(params, kGameType)); +} + +std::shared_ptr ImperfectRecallFactory( + const GameParameters& params) { + return std::shared_ptr(new ImperfectRecallLiarsDiceGame(params)); +} + +const BiddingRule ParseBiddingRule(const std::string& bidding_rule_str) { + SPIEL_CHECK_TRUE(bidding_rule_str == "reset-face" || + bidding_rule_str == "reset-quantity"); + if (bidding_rule_str == "reset-face") { + return BiddingRule::kResetFace; + } else { + return BiddingRule::kResetQuantity; + } +} + +const LiarsDiceGame* UnwrapGame(const Game* game) { + return down_cast(game); +} +} // namespace + +REGISTER_SPIEL_GAME(kGameType, Factory); +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +REGISTER_SPIEL_GAME(kImperfectRecallGameType, ImperfectRecallFactory); +RegisterSingleTensorObserver single_tensor_imperfect_recall( + kImperfectRecallGameType.short_name); + +LiarsDiceState::LiarsDiceState(std::shared_ptr game, + int total_num_dice, int max_dice_per_player, + const std::vector& num_dice) + : State(game), + dice_outcomes_(), + bidseq_(), + cur_player_(kChancePlayerId), // chance starts + cur_roller_(0), // first player starts rolling + winner_(kInvalidPlayer), + loser_(kInvalidPlayer), + current_bid_(kInvalidBid), + total_num_dice_(total_num_dice), + total_moves_(0), + calling_player_(0), + bidding_player_(0), + max_dice_per_player_(max_dice_per_player), + num_dice_(num_dice), + num_dice_rolled_(game->NumPlayers(), 0), + bidseq_str_() { + for (int const& num_dices : num_dice_) { + std::vector initial_outcomes(num_dices, kInvalidOutcome); + dice_outcomes_.push_back(initial_outcomes); + } +} + +std::string LiarsDiceState::ActionToString(Player player, + Action action_id) const { + if (player != kChancePlayerId) { + if (action_id == total_num_dice_ * dice_sides()) { + return "Liar"; + } else { + const std::pair bid = UnrankBid(action_id); + return absl::StrCat(bid.first, "-", bid.second); + } + } + return absl::StrCat("Roll ", action_id + 1); +} + +int LiarsDiceState::CurrentPlayer() const { + if (IsTerminal()) { + return kTerminalPlayerId; + } else { + return cur_player_; + } +} + +void LiarsDiceState::ResolveWinner() { + const std::pair bid = UnrankBid(current_bid_); + int quantity = bid.first, face = bid.second; + int matches = 0; + + // Count all the matches among all dice from all the players + // dice_sides_ (e.g. 6) is wild, so it always matches. + for (auto p = Player{0}; p < num_players_; p++) { + for (int d = 0; d < num_dice_[p]; d++) { + if (dice_outcomes_[p][d] == face || + dice_outcomes_[p][d] == dice_sides()) { + matches++; + } + } + } + + // If the number of matches are at least the quantity bid, then the bidder + // wins. Otherwise, the caller wins. + if (matches >= quantity) { + winner_ = bidding_player_; + loser_ = calling_player_; + } else { + winner_ = calling_player_; + loser_ = bidding_player_; + } +} + +const int LiarsDiceState::dice_sides() const { + return UnwrapGame(game_.get())->dice_sides(); +} + +const BiddingRule LiarsDiceState::bidding_rule() const { + return UnwrapGame(game_.get())->bidding_rule(); +} + +void LiarsDiceState::DoApplyAction(Action action) { + if (IsChanceNode()) { + // Fill the next die roll for the current roller. + SPIEL_CHECK_GE(cur_roller_, 0); + SPIEL_CHECK_LT(cur_roller_, num_players_); + + SPIEL_CHECK_LT(num_dice_rolled_[cur_roller_], num_dice_[cur_roller_]); + int slot = num_dice_rolled_[cur_roller_]; + + // Assign the roll. + dice_outcomes_[cur_roller_][slot] = action + 1; + num_dice_rolled_[cur_roller_]++; + + // Check to see if we must change the roller. + if (num_dice_rolled_[cur_roller_] == num_dice_[cur_roller_]) { + cur_roller_++; + if (cur_roller_ >= num_players_) { + // Time to start playing! + cur_player_ = 0; + // Sort all players' rolls + for (auto p = Player{0}; p < num_players_; p++) { + std::sort(dice_outcomes_[p].begin(), dice_outcomes_[p].end()); + } + } + } + } else { + // Check for legal actions. + if (!bidseq_.empty() && action <= bidseq_.back()) { + SpielFatalError(absl::StrCat("Illegal action. ", action, + " should be strictly higher than ", + bidseq_.back())); + } + if (action == total_num_dice_ * dice_sides()) { + // This was the calling bid, game is over. + bidseq_.push_back(action); + calling_player_ = cur_player_; + ResolveWinner(); + } else { + // Up the bid and move to the next player. + bidseq_.push_back(action); + current_bid_ = action; + bidding_player_ = cur_player_; + cur_player_ = NextPlayerRoundRobin(cur_player_, num_players_); + } + + total_moves_++; + } +} + +std::vector LiarsDiceState::LegalActions() const { + if (IsTerminal()) return {}; + // A chance node is a single die roll. + if (IsChanceNode()) { + return LegalChanceOutcomes(); + } + + std::vector actions; + + // Any move higher than the current bid is allowed. (Bids start at 0) + for (int b = current_bid_ + 1; b < total_num_dice_ * dice_sides(); b++) { + actions.push_back(b); + } + + // Calling Liar is only available if at least one move has been made. + if (total_moves_ > 0) { + actions.push_back(total_num_dice_ * dice_sides()); + } + + return actions; +} + +std::vector> LiarsDiceState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(IsChanceNode()); + + std::vector> outcomes; + + // A chance node is a single die roll. + outcomes.reserve(dice_sides()); + for (int i = 0; i < dice_sides(); i++) { + outcomes.emplace_back(i, 1.0 / dice_sides()); + } + + return outcomes; +} + +std::string LiarsDiceState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::string result = absl::StrJoin(dice_outcomes_[player], ""); + for (int b = 0; b < bidseq_.size(); b++) { + if (bidseq_[b] == total_num_dice_ * dice_sides()) { + absl::StrAppend(&result, " Liar"); + } else { + const std::pair bid = UnrankBid(bidseq_[b]); + absl::StrAppend(&result, " ", bid.first, "-", bid.second); + } + } + return result; +} + +std::string LiarsDiceState::ToString() const { + std::string result = ""; + + for (auto p = Player{0}; p < num_players_; p++) { + if (p != 0) absl::StrAppend(&result, " "); + for (int d = 0; d < num_dice_[p]; d++) { + absl::StrAppend(&result, dice_outcomes_[p][d]); + } + } + + if (IsChanceNode()) { + return absl::StrCat(result, " - chance node, current roller is player ", + cur_roller_); + } + + for (int b = 0; b < bidseq_.size(); b++) { + if (bidseq_[b] == total_num_dice_ * dice_sides()) { + absl::StrAppend(&result, " Liar"); + } else { + const std::pair bid = UnrankBid(bidseq_[b]); + absl::StrAppend(&result, " ", bid.first, "-", bid.second); + } + } + return result; +} + +bool LiarsDiceState::IsTerminal() const { return winner_ != kInvalidPlayer; } + +std::vector LiarsDiceState::Returns() const { + std::vector returns(num_players_, 0.0); + + if (winner_ != kInvalidPlayer) { + returns[winner_] = 1.0; + } + + if (loser_ != kInvalidPlayer) { + returns[loser_] = -1.0; + } + + return returns; +} + +void LiarsDiceState::InformationStateTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + // One-hot encoding for player number. + // One-hot encoding for each die (max_dice_per_player_ * sides). + // One slot(bit) for each legal bid. + // One slot(bit) for calling liar. (Necessary because observations and + // information states need to be defined at terminals) + int offset = 0; + std::fill(values.begin(), values.end(), 0.); + SPIEL_CHECK_EQ(values.size(), num_players_ + + (max_dice_per_player_ * dice_sides()) + + (total_num_dice_ * dice_sides()) + 1); + values[player] = 1; + offset += num_players_; + + int my_num_dice = num_dice_[player]; + + for (int d = 0; d < my_num_dice; d++) { + int outcome = dice_outcomes_[player][d]; + if (outcome != kInvalidOutcome) { + SPIEL_CHECK_GE(outcome, 1); + SPIEL_CHECK_LE(outcome, dice_sides()); + values[offset + (outcome - 1)] = 1; + } + offset += dice_sides(); + } + + // Skip to bidding part. If current player has fewer dice than the other + // players, all the remaining entries are 0 for those dice. + offset = num_players_ + max_dice_per_player_ * dice_sides(); + + for (int b = 0; b < bidseq_.size(); b++) { + SPIEL_CHECK_GE(bidseq_[b], 0); + SPIEL_CHECK_LE(bidseq_[b], total_num_dice_ * dice_sides()); + values[offset + bidseq_[b]] = 1; + } +} + +void LiarsDiceState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + // One-hot encoding for player number. + // One-hot encoding for each die (max_dice_per_player_ * sides). + // One slot(bit) for the two last legal bid. + // One slot(bit) for calling liar. (Necessary because observations and + // information states need to be defined at terminals) + int offset = 0; + std::fill(values.begin(), values.end(), 0.); + SPIEL_CHECK_EQ(values.size(), num_players_ + + (max_dice_per_player_ * dice_sides()) + + (total_num_dice_ * dice_sides()) + 1); + values[player] = 1; + offset += num_players_; + + int my_num_dice = num_dice_[player]; + + for (int d = 0; d < my_num_dice; d++) { + int outcome = dice_outcomes_[player][d]; + if (outcome != kInvalidOutcome) { + SPIEL_CHECK_GE(outcome, 1); + SPIEL_CHECK_LE(outcome, dice_sides()); + values[offset + (outcome - 1)] = 1; + } + offset += dice_sides(); + } + + // Skip to bidding part. If current player has fewer dice than the other + // players, all the remaining entries are 0 for those dice. + offset = num_players_ + max_dice_per_player_ * dice_sides(); + + // We only show the num_players_ last bids + int size_bid = bidseq_.size(); + int bid_offset = std::max(0, size_bid - num_players_); + for (int b = bid_offset; b < size_bid; b++) { + SPIEL_CHECK_GE(bidseq_[b], 0); + SPIEL_CHECK_LE(bidseq_[b], total_num_dice_ * dice_sides()); + values[offset + bidseq_[b]] = 1; + } +} + +std::unique_ptr LiarsDiceState::Clone() const { + return std::unique_ptr(new LiarsDiceState(*this)); +} + +std::pair LiarsDiceState::UnrankBid(int bidnum) const { + std::pair bid; + SPIEL_CHECK_NE(bidnum, kInvalidBid); + SPIEL_CHECK_GE(bidnum, 0); + SPIEL_CHECK_LT(bidnum, dice_sides() * total_num_dice_); + + if (bidding_rule() == BiddingRule::kResetFace) { + // Bids have the form - + // + // So, in a two-player game where each die has 6 faces, we have + // + // Bid ID Quantity Face + // 0 1 1 + // 1 1 2 + // ... + // 5 1 6 + // 6 2 1 + // ... + // 11 2 6 + // + // Bid ID #dice * #num faces encodes the special "liar" action. + + // The quantity occupies the higher bits, so it can be extracted using an + // integer division operation. + bid.first = bidnum / dice_sides() + 1; + // The face occupies the lower bits, so it can be extraced using a modulo + // operation. + bid.second = 1 + (bidnum % dice_sides()); + } else { + SPIEL_CHECK_EQ(bidding_rule(), BiddingRule::kResetQuantity); + // Bids have the form - + // + // So, in a two-player game where each die has 6 faces, we have + // + // Bid ID Quantity Face + // 0 1 1 + // 1 2 1 + // 2 1 2 + // 3 2 2 + // ... + // 9 2 5 + // 10 1 6 + // 11 2 6 + // + // Bid ID #dice * #num faces encodes the special "liar" action. + // + // This particular encoding scheme allows for very cheap comparison of bids: + // a bid is stronger if it is encoded to a higher ID. + + // The quantity occupies the lower bits, so it can be extracted using a + // modulo operation. + bid.first = 1 + (bidnum % total_num_dice_); + // The face occupies the higher bits, so it can be extracted using an + // integer division. + bid.second = bidnum / total_num_dice_ + 1; + } + + SPIEL_CHECK_GE(bid.first, 1); + // It doesn't make sense to bid more dice than the number of dice in the game. + SPIEL_CHECK_LE(bid.first, total_num_dice_); + + SPIEL_CHECK_GE(bid.second, 1); + // It doesn't make sense to bid a face that does not exist. + SPIEL_CHECK_LE(bid.second, dice_sides()); + + return bid; +} + +LiarsDiceGame::LiarsDiceGame(const GameParameters& params, GameType game_type) + : Game(game_type, params), + num_players_(ParameterValue("players")), + dice_sides_(ParameterValue("dice_sides")), + bidding_rule_(ParseBiddingRule( + ParameterValue("bidding_rule"))) { + SPIEL_CHECK_GE(num_players_, kGameType.min_num_players); + SPIEL_CHECK_LE(num_players_, kGameType.max_num_players); + SPIEL_CHECK_GE(dice_sides_, 1); + + int def_num_dice = ParameterValue("numdice"); + + // Compute the number of dice for each player based on parameters, + // and set default outcomes of unknown face values (-1). + total_num_dice_ = 0; + num_dice_.resize(num_players_, 0); + + for (auto p = Player{0}; p < num_players_; p++) { + std::string key = absl::StrCat("numdice", p); + + int my_num_dice = def_num_dice; + if (IsParameterSpecified(game_parameters_, key)) { + my_num_dice = ParameterValue(key); + } + + num_dice_[p] = my_num_dice; + total_num_dice_ += my_num_dice; + } + + // Compute max dice per player (used for observations.) + max_dice_per_player_ = -1; + for (int nd : num_dice_) { + if (nd > max_dice_per_player_) { + max_dice_per_player_ = nd; + } + } +} + +int LiarsDiceGame::NumDistinctActions() const { + return total_num_dice_ * dice_sides_ + 1; +} + +std::unique_ptr LiarsDiceGame::NewInitialState() const { + std::unique_ptr state( + new LiarsDiceState(shared_from_this(), + /*total_num_dice=*/total_num_dice_, + /*max_dice_per_player=*/max_dice_per_player_, + /*num_dice=*/num_dice_)); + return state; +} + +int LiarsDiceGame::MaxChanceOutcomes() const { return dice_sides_; } + +int LiarsDiceGame::MaxGameLength() const { + // A bet for each side and number of total dice, plus "liar" action. + return total_num_dice_ * dice_sides_ + 1; +} +int LiarsDiceGame::MaxChanceNodesInHistory() const { return total_num_dice_; } + +std::vector LiarsDiceGame::InformationStateTensorShape() const { + // One-hot encoding for the player number. + // One-hot encoding for each die (max_dice_per_player_ * sides). + // One slot(bit) for each legal bid. + // One slot(bit) for calling liar. (Necessary because observations and + // information states need to be defined at terminals) + return {num_players_ + (max_dice_per_player_ * dice_sides_) + + (total_num_dice_ * dice_sides_) + 1}; +} + +std::vector LiarsDiceGame::ObservationTensorShape() const { + // One-hot encoding for the player number. + // One-hot encoding for each die (max_dice_per_player_ * sides). + // One slot(bit) for the num_players_ last legal bid. + // One slot(bit) for calling liar. (Necessary because observations and + // information states need to be defined at terminals) + return {num_players_ + (max_dice_per_player_ * dice_sides_) + + (total_num_dice_ * dice_sides_) + 1}; +} + +ImperfectRecallLiarsDiceGame::ImperfectRecallLiarsDiceGame( + const GameParameters& params) + : LiarsDiceGame(params, kImperfectRecallGameType), + recall_length_( + ParameterValue("rollout_length", kDefaultRecallLength)) {} + +std::unique_ptr ImperfectRecallLiarsDiceGame::NewInitialState() const { + return absl::make_unique(shared_from_this(), + /*total_num_dice=*/total_num_dice(), + /*max_dice_per_player=*/max_dice_per_player(), + /*num_dice=*/num_dice()); +} + +std::string ImperfectRecallLiarsDiceState::InformationStateString( + Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + const auto* parent_game = down_cast( + game_.get()); + + std::string result = + absl::StrCat("P", player, " ", absl::StrJoin(dice_outcomes_[player], "")); + + // Imperfect recall. Show only the last recall_length bids. + int start_index = std::max(bidseq_.size() - parent_game->recall_length(), + 0); + for (int b = start_index; b < bidseq_.size(); b++) { + if (bidseq_[b] == parent_game->total_num_dice() * dice_sides()) { + absl::StrAppend(&result, " Liar"); + } else { + const std::pair bid = UnrankBid(bidseq_[b]); + absl::StrAppend(&result, " ", bid.first, "-", bid.second); + } + } + return result; +} + +} // namespace liars_dice +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/liars_dice/liars_dice.h b/scenarios/bargaining/open_spiel/open_spiel/games/liars_dice/liars_dice.h new file mode 100644 index 0000000..affc786 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/liars_dice/liars_dice.h @@ -0,0 +1,210 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_LIARS_DICE_H_ +#define OPEN_SPIEL_GAMES_LIARS_DICE_H_ + +#include +#include +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// A simple game that includes chance and imperfect information +// https://en.wikipedia.org/wiki/Liar%27s_dice +// +// Currently only supports a single round and two players. +// The highest face (`dice_sides`) is wild. +// +// Parameters: +// "bidding_rule" string bidding variants ("reset-face" or +// ("reset-quantity") (def. "reset-face") +// "dice_sides" int number of sides on each die (def. = 6) +// "numdice" int number of dice per player (def. = 1) +// "numdiceX" int overridden number of dice for player X (def. = 1) +// "players" int number of players (def. = 2) + +namespace open_spiel { +namespace liars_dice { + +enum BiddingRule { + // The player may bid a higher quantity of any particular face, or the same + // quantity of a higher face (allowing a player to "re-assert" a face value + // they believe prevalent if another player increased the face value on their + // bid). + kResetFace = 1, + + // The player may bid a higher quantity of the same face, or any particular + // quantity of a higher face (allowing a player to "reset" the quantity). + kResetQuantity = 2 +}; + +class LiarsDiceGame; + +class LiarsDiceState : public State { + public: + explicit LiarsDiceState(std::shared_ptr game, int total_num_dice, + int max_dice_per_player, + const std::vector& num_dice); + LiarsDiceState(const LiarsDiceState&) = default; + + void Reset(const GameParameters& params); + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector> ChanceOutcomes() const override; + std::vector LegalActions() const override; + + // Return number of sides on the dice. + const int dice_sides() const; + + Player calling_player() const { return calling_player_; } + int dice_outcome(Player player, int index) const { + return dice_outcomes_[player][index]; + } + int last_bid() const { + if (bidseq_.back() == total_num_dice_ * dice_sides()) { + return bidseq_[bidseq_.size() - 2]; + } else { + return bidseq_.back(); + } + } + + protected: + void DoApplyAction(Action action_id) override; + + // Get the quantity and face of the bid from an integer. The format of the + // return depends on the bidding rule. + // The bids starts at 0 and go to total_dice*dice_sides-1 (inclusive). + std::pair UnrankBid(int bid) const; + + // Dice outcomes: first indexed by player, then by dice number + std::vector> dice_outcomes_; + + // The bid sequence. + std::vector bidseq_; + + private: + void ResolveWinner(); + + // Return the bidding rule used by the game. + const BiddingRule bidding_rule() const; + + // Initialized to invalid values. Use Game::NewInitialState(). + Player cur_player_; // Player whose turn it is. + int cur_roller_; // Player currently rolling dice. + int winner_; + int loser_; + int current_bid_; + int total_num_dice_; + int total_moves_; + int calling_player_; // Player who calls Liar. + int bidding_player_; // Player who cast the last bid. + int max_dice_per_player_; + + std::vector num_dice_; // How many dice each player has. + std::vector num_dice_rolled_; // Number of dice currently rolled. + + // Used to encode the information state. + std::string bidseq_str_; +}; + +class LiarsDiceGame : public Game { + public: + explicit LiarsDiceGame(const GameParameters& params, GameType game_type); + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override; + int MaxChanceOutcomes() const override; + int NumPlayers() const override { return num_players_; } + double MinUtility() const override { return -1; } + double MaxUtility() const override { return 1; } + absl::optional UtilitySum() const override { return 0; } + std::vector InformationStateTensorShape() const override; + std::vector ObservationTensorShape() const override; + int MaxGameLength() const override; + int MaxChanceNodesInHistory() const override; + + // Returns the maximum among how many dice each player has. For example, + // if player 1 has 3 dice and player 2 has 2 dice, returns 3. + int max_dice_per_player() const { return max_dice_per_player_; } + + // Return the total number of dice in the game. + int total_num_dice() const { return total_num_dice_; } + + // Return the number of dice each player has. + std::vector num_dice() const { return num_dice_; } + + const int dice_sides() const { return dice_sides_; } + const BiddingRule bidding_rule() const { return bidding_rule_; } + + private: + // Number of players. + int num_players_; + + // Total dice in the game, determines the legal bids. + int total_num_dice_; + + std::vector num_dice_; // How many dice each player has. + int max_dice_per_player_; // Maximum value in num_dice_ vector. + const int dice_sides_; // Number of faces on each die. + const BiddingRule bidding_rule_; +}; + +// Implements the action abstraction from Lanctot et al. '12 +// http://mlanctot.info/files/papers/12icml-ir.pdf. See also Neller & Hnath, +// Approximating Optimal Dudo Play with Fixed-Strategy Iteration Counterfactual +// Regret Minimization: https://core.ac.uk/download/pdf/205864381.pdf +// +// This game has an extra parameter: +// "recall_length" int number of bids to remember (def. = 4) + +class ImperfectRecallLiarsDiceState : public LiarsDiceState { + public: + ImperfectRecallLiarsDiceState(std::shared_ptr game, + int total_num_dice, + int max_dice_per_player, + const std::vector& num_dice) + : LiarsDiceState(game, total_num_dice, max_dice_per_player, num_dice) {} + std::string InformationStateString(Player player) const override; + std::unique_ptr Clone() const override { + return std::unique_ptr(new ImperfectRecallLiarsDiceState(*this)); + } +}; + +class ImperfectRecallLiarsDiceGame : public LiarsDiceGame { + public: + explicit ImperfectRecallLiarsDiceGame(const GameParameters& params); + std::unique_ptr NewInitialState() const override; + + int recall_length() const { return recall_length_; } + + private: + int recall_length_; +}; + + +} // namespace liars_dice +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_LIARS_DICE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/liars_dice/liars_dice_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/liars_dice/liars_dice_test.cc new file mode 100644 index 0000000..9745ae9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/liars_dice/liars_dice_test.cc @@ -0,0 +1,43 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace liars_dice { +namespace { + +namespace testing = open_spiel::testing; + +void BasicLiarsDiceTests() { + testing::LoadGameTest("liars_dice"); + testing::ChanceOutcomesTest(*LoadGame("liars_dice")); + testing::RandomSimTest(*LoadGame("liars_dice"), 50); +} + +void ImperfectRecallLiarsDiceTests() { + testing::LoadGameTest("liars_dice_ir"); + testing::ChanceOutcomesTest(*LoadGame("liars_dice_ir")); + testing::RandomSimTest(*LoadGame("liars_dice_ir"), 50); +} + +} // namespace +} // namespace liars_dice +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::liars_dice::BasicLiarsDiceTests(); + open_spiel::liars_dice::ImperfectRecallLiarsDiceTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/maedn/maedn.cc b/scenarios/bargaining/open_spiel/open_spiel/games/maedn/maedn.cc new file mode 100644 index 0000000..65a71f6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/maedn/maedn.cc @@ -0,0 +1,570 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/maedn/maedn.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace maedn { +namespace { + +const std::vector> kChanceOutcomes = { + std::pair(0, 1.0 / 6), + std::pair(1, 1.0 / 6), + std::pair(2, 1.0 / 6), + std::pair(3, 1.0 / 6), + std::pair(4, 1.0 / 6), + std::pair(5, 1.0 / 6), +}; + +const std::vector kChanceOutcomeValues = {1, 2, 3, 4, 5, 6}; + +// Facts about the game +const GameType kGameType{/*short_name=*/"maedn", + /*long_name=*/"Mensch-Aergere-Dich-Nicht", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/4, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + {"players", GameParameter(2)}, + {"twoPlayersOpposite", GameParameter(true)}, + }}; + +static std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new MaednGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace + +std::string CurPlayerToString(Player cur_player) { + switch (cur_player) { + case kRedPlayerId: + return "1"; + case kBluePlayerId: + return "2"; + case kGreenPlayerId: + return "3"; + case kYellowPlayerId: + return "4"; + case kChancePlayerId: + return "*"; + case kTerminalPlayerId: + return "T"; + default: + SpielFatalError(absl::StrCat("Unrecognized player id: ", cur_player)); + } +} + +std::string MaednState::ActionToString(Player player, Action move_id) const { + if (player == kChancePlayerId) { + // Normal chance roll. + return absl::StrCat("chance outcome ", move_id, + " (roll: ", kChanceOutcomeValues[move_id], ")"); + } else { + // Assemble a human-readable string representation of the move. + if (move_id == kBringInAction) { + return absl::StrCat(move_id, " - brings in new piece"); + } else if (move_id == kPassAction) { + return absl::StrCat(move_id, " - passes"); + } else { + return absl::StrCat(move_id, " - moves piece on field ", + move_id - kFieldActionsOffset); + } + } +} + +std::string MaednState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void MaednState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + SPIEL_CHECK_EQ(values.size(), kStateEncodingSize); + auto value_it = values.begin(); + + // Tensor should contain state from the player's PoV, so relative + // positions are used and converted to absolute positions. + int position = PlayerToPosition(player); + for (int i = 0; i < kNumCommonFields; i++) { + int abs_pos = RelPosToAbsPos(i, position); + int piece = board_[abs_pos]; + *value_it++ = ((piece == 1) ? 1 : 0); + *value_it++ = ((piece == 2) ? 1 : 0); + *value_it++ = ((piece == 3) ? 1 : 0); + *value_it++ = ((piece == 4) ? 1 : 0); + } + + // Rotated goal fields to one hot encoded tensor. + for (int p = 0; p < kMaxNumPlayers; p++) { + int ply_position = PlayerToPosition((player + p) % kMaxNumPlayers); + for (int i = 0; i < kNumGoalFieldsPerPlayer; i++) { + int abs_pos = RelPosToAbsPos(kNumCommonFields + i, ply_position); + int piece = board_[abs_pos]; + *value_it++ = ((piece == 1) ? 1 : 0); + *value_it++ = ((piece == 2) ? 1 : 0); + *value_it++ = ((piece == 3) ? 1 : 0); + *value_it++ = ((piece == 4) ? 1 : 0); + } + } + + // Rotated number of pieces outside of field per player. + for (int p = 0; p < kMaxNumPlayers; p++) { + *value_it++ = (out_[(player + p) % kMaxNumPlayers]); + } + + if (cur_player_ == kChancePlayerId) { + // Encode chance player with all zeros. + for (int i = 0; i < kMaxNumPlayers; i++) { + *value_it++ = 0; + } + } else { + int rotated_current_player = + (num_players_ + cur_player_ - player) % num_players_; + // Rotated current player id to one hot encoded tensor. + for (int i = 0; i < kMaxNumPlayers; i++) { + *value_it++ = (rotated_current_player == i) ? 1 : 0; + } + } + + *value_it++ = ((dice_ == 1) ? 1 : 0); + *value_it++ = ((dice_ == 2) ? 1 : 0); + *value_it++ = ((dice_ == 3) ? 1 : 0); + *value_it++ = ((dice_ == 4) ? 1 : 0); + *value_it++ = ((dice_ == 5) ? 1 : 0); + *value_it++ = ((dice_ == 6) ? 1 : 0); + + SPIEL_CHECK_EQ(value_it, values.end()); +} + +void MaednState::FromObservationTensor(Player player, absl::Span values, + Player prev_player, int prev_dice) { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + SPIEL_CHECK_EQ(values.size(), kStateEncodingSize); + + prev_player_ = prev_player; + prev_dice_ = prev_dice; + + auto value_it = values.begin(); + + // Tensor should contain state from the player's PoV, so relative + // positions are used and converted to absolute positions. + int position = PlayerToPosition(player); + for (int i = 0; i < kNumCommonFields; i++) { + int abs_pos = RelPosToAbsPos(i, position); + int one = *value_it++; + int two = *value_it++; + int three = *value_it++; + int four = *value_it++; + int piece = one ? 1 : (two ? 2 : (three ? 3 : (four ? 4 : 0))); + board_[abs_pos] = piece; + } + + // rotated goal fields to one hot encoded tensor + for (int p = 0; p < kMaxNumPlayers; p++) { + int ply_position = PlayerToPosition((player + p) % kMaxNumPlayers); + for (int i = 0; i < kNumGoalFieldsPerPlayer; i++) { + int abs_pos = RelPosToAbsPos(kNumCommonFields + i, ply_position); + int one = *value_it++; + int two = *value_it++; + int three = *value_it++; + int four = *value_it++; + int piece = one ? 1 : (two ? 2 : (three ? 3 : (four ? 4 : 0))); + board_[abs_pos] = piece; + } + } + + // rotated number of pieces outside of field per player + for (int p = 0; p < kMaxNumPlayers; p++) { + out_[(player + p) % kMaxNumPlayers] = *value_it++; + } + + int zero = *value_it++; + int one = *value_it++; + int two = *value_it++; + int three = *value_it++; + + if (zero + one + two + three == 0) { + cur_player_ = kChancePlayerId; + } else { + int rotated_current_player = zero ? 0 : (one ? 1 : (two ? 2 : 3)); + + cur_player_ = (rotated_current_player + player) % num_players_; + } + + int dice_1 = *value_it++; + int dice_2 = *value_it++; + int dice_3 = *value_it++; + int dice_4 = *value_it++; + int dice_5 = *value_it++; + int dice_6 = *value_it++; + + dice_ = dice_1 ? 1 + : (dice_2 ? 2 + : (dice_3 ? 3 + : dice_4 ? 4 + : (dice_5 ? 5 : (dice_6 ? 6 : 0)))); + + SPIEL_CHECK_EQ(value_it, values.end()); +} + +MaednState::MaednState(std::shared_ptr game, + bool two_players_opposite) + : State(game), + cur_player_(kChancePlayerId), + prev_player_(game->NumPlayers() - 1), + two_players_opposite_(two_players_opposite), + turns_(0), + dice_(0), + prev_dice_(0), + board_(std::vector(kNumFields, 0)), + turn_history_info_({}) { + int i = 0; + for (; i < num_players_; i++) { + out_.push_back(4); + } + for (; i < kMaxNumPlayers; i++) { + out_.push_back(0); + } +} + +Player MaednState::CurrentPlayer() const { + return IsTerminal() ? kTerminalPlayerId : Player{cur_player_}; +} + +void MaednState::DoApplyAction(Action move) { + if (IsChanceNode()) { + // Chance action. + turn_history_info_.push_back(TurnHistoryInfo(kChancePlayerId, prev_player_, + dice_, prev_dice_, move, 0)); + + SPIEL_CHECK_TRUE(dice_ == 0); + dice_ = kChanceOutcomeValues[move]; + if (prev_dice_ == 6) { + // if last dice roll was a 6, same player moves again + cur_player_ = prev_player_; + } else { + // next player + cur_player_ = (prev_player_ + 1) % num_players_; + turns_++; + } + return; + } + + // Normal move action. + int thrown_out_player = -1; + + if (move != kPassAction) { + if (move == kBringInAction) { + // Bring in new piece. + int players_first_field = GetPlayersFirstField(cur_player_); + + thrown_out_player = board_[players_first_field] - 1; + board_[players_first_field] = cur_player_ + 1; + out_[cur_player_]--; + } else { + // Normal piece move. + std::pair fields = + GetFieldsFromAction(move, cur_player_, dice_); + + board_[fields.first] = 0; + thrown_out_player = board_[fields.second] - 1; + board_[fields.second] = cur_player_ + 1; + } + + if (thrown_out_player >= 0) { + out_[thrown_out_player]++; + } + } + + turn_history_info_.push_back(TurnHistoryInfo( + cur_player_, prev_player_, dice_, prev_dice_, move, thrown_out_player)); + + prev_player_ = cur_player_; + prev_dice_ = dice_; + + cur_player_ = kChancePlayerId; + dice_ = 0; +} + +void MaednState::UndoAction(Player player, Action action) { + { + const TurnHistoryInfo& thi = turn_history_info_.back(); + SPIEL_CHECK_EQ(thi.player, player); + SPIEL_CHECK_EQ(action, thi.action); + cur_player_ = thi.player; + prev_player_ = thi.prev_player; + dice_ = thi.dice; + prev_dice_ = thi.prev_dice; + if (player != kChancePlayerId && action != kPassAction) { + // Undo move. + // Code basically is the inverse of DoApplyAction(Action move). + if (action == kBringInAction) { + // Un-bring in new piece. + int players_first_field = GetPlayersFirstField(cur_player_); + + board_[players_first_field] = thi.thrown_out_player + 1; + out_[cur_player_]++; + } else { + // Normal piece move. + std::pair fields = + GetFieldsFromAction(action, cur_player_, dice_); + + board_[fields.first] = cur_player_ + 1; + board_[fields.second] = thi.thrown_out_player + 1; + } + + if (thi.thrown_out_player >= 0) { + out_[thi.thrown_out_player]--; + } + } + } + turn_history_info_.pop_back(); + history_.pop_back(); + --move_number_; +} + +std::pair MaednState::GetFieldsFromAction(Action action, + Player player, + int dice) const { + int position = PlayerToPosition(player); + int relative_source_field = action - kFieldActionsOffset; + int relative_target_field = relative_source_field + dice; + + return {RelPosToAbsPos(relative_source_field, position), + RelPosToAbsPos(relative_target_field, position)}; +} + +int MaednState::RelPosToAbsPos(int relative_position, int position) const { + if (relative_position < kNumCommonFields) { + int players_first_field = (kNumCommonFields / kMaxNumPlayers) * position; + return (relative_position + players_first_field) % kNumCommonFields; + } else { + return kNumGoalFieldsPerPlayer * position + relative_position; + } +} + +int MaednState::AbsPosToRelPos(int absolute_position, int position) const { + if (absolute_position < kNumCommonFields) { + int playersFirstField = (kNumCommonFields / kMaxNumPlayers) * position; + return (kNumCommonFields + absolute_position - playersFirstField) % + kNumCommonFields; + } else { + return absolute_position - kNumGoalFieldsPerPlayer * position; + } +} + +int MaednState::GetPlayersFirstField(Player player) const { + int position = PlayerToPosition(player); + return (kNumCommonFields / kMaxNumPlayers) * position; +} + +std::vector> MaednState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(IsChanceNode()); + return kChanceOutcomes; +} + +std::vector MaednState::LegalActions() const { + if (IsChanceNode()) return LegalChanceOutcomes(); + if (IsTerminal()) return {}; + + std::vector legal_actions; + + // Follows these rules in this exact order: + // - If a player's own piece is standing on the start field + // and player has at least one piece off the board, player + // MUST move the piece on the start field away unless it is + // blocked by another own piece. If that is the case, + // player is free to move any own piece. + // - If player rolls a 6 and has at least one piece off the + // board, player MUST bring in a new piece. + // - If player has no (moveable) piece on the board, player + // must pass. + // - In any other case, player is free to move any own piece + // on the board. + int players_first_field = GetPlayersFirstField(cur_player_); + if (out_[cur_player_] > 0) { + if (board_[players_first_field] == cur_player_ + 1) { + // Is piece on start field moveable by dice roll? + // (playersFirstField + dice) cannot overflow, simple + // addition is suitable. + if (board_[players_first_field + dice_] != cur_player_ + 1) { + legal_actions.push_back(kFieldActionsOffset); + return legal_actions; + } + } + + if (dice_ == 6) { + // Player MUST bring in a new piece if possible. + // Check whether start field is bloked. + if (board_[players_first_field] != cur_player_ + 1) { + legal_actions.push_back(kBringInAction); + return legal_actions; + } + // Start field is blocked and this piece itself is + // blocked due (has already been checked). + } + } + + // Look for pieces of current player on board if there is at least one: + if (out_[cur_player_] < 4) { + int position = PlayerToPosition(cur_player_); + const int max_field = kNumCommonFields + kNumGoalFieldsPerPlayer - dice_; + for (int relative_source_field = 0; relative_source_field < max_field; + relative_source_field++) { + int relative_target_field = relative_source_field + dice_; + + int absolute_source_field = + RelPosToAbsPos(relative_source_field, position); + int absolute_target_field = + RelPosToAbsPos(relative_target_field, position); + + if (board_[absolute_source_field] == cur_player_ + 1) { + if (board_[absolute_target_field] != cur_player_ + 1) { + legal_actions.push_back(relative_source_field + kFieldActionsOffset); + } + } + } + } + + // If nothing is possible, player must pass. + if (legal_actions.empty()) { + legal_actions.push_back(kPassAction); + } + + return legal_actions; +} + +std::string MaednState::ToString() const { + std::vector board_array = { + ". . o-o-S . .", ". . o . o . .", " o . o ", + " o . o ", "S-o-o-o-o . o-o-o-o-o", "o . . . . . . . . o", + "o-o-o-o-o . o-o-o-o-S", " o . o ", " o . o ", + ". . o . o . .", ". . S-o-o . .", + }; + + // Fill the board. + for (int pos = 0; pos < kNumFields; pos++) { + if (board_[pos] > 0) { + Coords coords = kFieldToBoardString[pos]; + board_array[coords.y][coords.x] = 48 + board_[pos]; + } + } + // Pieces off the board. + for (int ply = 0; ply < kMaxNumPlayers; ply++) { + int out = out_[ply]; + int position = PlayerToPosition(ply); + int offset = kNumFields + kNumGoalFieldsPerPlayer * position; + for (int i = 0; i < out; i++) { + Coords coords = kFieldToBoardString[offset + i]; + board_array[coords.y][coords.x] = 49 + ply; + } + } + + std::string board_str = absl::StrJoin(board_array, "\n") + "\n"; + + // Extra info like whose turn it is etc. + absl::StrAppend(&board_str, "Turn: "); + absl::StrAppend(&board_str, CurPlayerToString(cur_player_)); + absl::StrAppend(&board_str, "\n"); + absl::StrAppend(&board_str, "Dice: "); + absl::StrAppend(&board_str, dice_ != 0 ? std::to_string(dice_) : ""); + absl::StrAppend(&board_str, "\n"); + + return board_str; +} + +bool MaednState::AllInGoal(Player player) const { + int position = PlayerToPosition(player); + int offset = kNumCommonFields + position * kNumGoalFieldsPerPlayer; + return board_[offset] != 0 && board_[offset + 1] != 0 && + board_[offset + 2] != 0 && board_[offset + 3] != 0; +} + +bool MaednState::IsTerminal() const { + for (int ply = 0; ply < num_players_; ply++) { + if (AllInGoal(ply)) { + return true; + } + } + return false; +} + +std::vector MaednState::Returns() const { + std::vector returns; + + if (IsTerminal()) { + for (int ply = 0; ply < num_players_; ply++) { + returns.push_back(AllInGoal(ply) ? num_players_ - 1.0 : -1.0); + } + } else { + for (int ply = 0; ply < num_players_; ply++) { + returns.push_back(0.0); + } + } + + return returns; +} + +std::unique_ptr MaednState::Clone() const { + return std::unique_ptr(new MaednState(*this)); +} + +void MaednState::SetState(int cur_player, int dice, int prev_player, + int prev_dice, const std::vector& board, + const std::vector& out) { + cur_player_ = cur_player; + prev_player_ = prev_player; + dice_ = dice; + prev_dice_ = prev_dice; + board_ = board; + out_ = out; +} + +MaednGame::MaednGame(const GameParameters& params) + : Game(kGameType, params), + two_player_opposite_(ParameterValue("twoPlayersOpposite")), + num_players_(ParameterValue("players")) { + SPIEL_CHECK_GE(num_players_, kGameType.min_num_players); + SPIEL_CHECK_LE(num_players_, kGameType.max_num_players); +} + +} // namespace maedn +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/maedn/maedn.h b/scenarios/bargaining/open_spiel/open_spiel/games/maedn/maedn.h new file mode 100644 index 0000000..33c1c9a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/maedn/maedn.h @@ -0,0 +1,333 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_MAEDN_H_ +#define OPEN_SPIEL_GAMES_MAEDN_H_ + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// An implementation of Mensch-Aergere-Dich-Nicht (see +// https://en.wikipedia.org/wiki/Mensch_%C3%A4rgere_Dich_nicht) +// +// Rules used: +// - start field must be cleared as soon as possible +// - throwing out own pieces is not possible +// - only one dice roll even if no move is possible except if dice roll was +// a six, in this case, same player may roll again +// - pieces may jump over each other on four final fields +// +// Parameters: +// - players: Number of Players (2 to 4) +// - twoPlayersOpposite: +// If two players play, two different settings are possible: +// Either players can play side by side or they can play on opposite sides. +// Since opposite sides are more fair, default value is true. + +namespace open_spiel { +namespace maedn { + +inline constexpr const int kMaxNumPlayers = 4; +inline constexpr const int kNumChanceOutcomes = 6; +inline constexpr const int kRedPlayerId = 0; +inline constexpr const int kBluePlayerId = 1; +inline constexpr const int kGreenPlayerId = 2; +inline constexpr const int kYellowPlayerId = 3; +// Board consists of 40 common fields for all +// players and 4 separate goal fields for each player. +inline constexpr const int kNumCommonFields = 40; +inline constexpr const int kNumGoalFields = 16; +inline constexpr const int kNumGoalFieldsPerPlayer = 4; +inline constexpr const int kNumFields = kNumCommonFields + kNumGoalFields; + +// Number of pieces per player in the standard game. +inline constexpr const int kNumPiecesPerPlayer = 4; + +// position of pieces not yet in game +inline constexpr const int kOutPos = -1; + +// Action modelling (with ideas from Marc Lancot): +// The first action [0] is to pass (necessary if player cannot move any +// piece). The second action is to bring in a new piece. Once a piece is +// on the field, there are 43 fields a piece can stand on and be moved away +// from that field. Actions are coded as the field a move starts from, from +// each player's own PoV. That means that action 2 means to move a piece on +// field 0 for player 0 but a piece on field 10 for player 1 and so on. So +// there are 43 actions for moves, one action to bring in a new piece and +// one action to pass. Total number of possible actions is 45 +// ({ 0, 1, 2, ..., 44 }). +inline constexpr const int kNumDistinctActions = 45; + +inline constexpr const Action kPassAction = 0; +inline constexpr const Action kBringInAction = 1; +inline constexpr const Action kFieldActionsOffset = 2; + +// See ObservationTensorShape for details. +inline constexpr const int kBoardEncodingSize = 4 * kNumFields; +inline constexpr const int kStateEncodingSize = + kMaxNumPlayers + kBoardEncodingSize + kMaxNumPlayers + kNumChanceOutcomes; + +struct Coords { + int x; + int y; +}; + +const Coords kFieldToBoardString[]{ + // Common fields. + {0, 4}, + {2, 4}, + {4, 4}, + {6, 4}, + {8, 4}, + {8, 3}, + {8, 2}, + {8, 1}, + {8, 0}, + {10, 0}, + {12, 0}, + {12, 1}, + {12, 2}, + {12, 3}, + {12, 4}, + {14, 4}, + {16, 4}, + {18, 4}, + {20, 4}, + {20, 5}, + {20, 6}, + {18, 6}, + {16, 6}, + {14, 6}, + {12, 6}, + {12, 7}, + {12, 8}, + {12, 9}, + {12, 10}, + {10, 10}, + {8, 10}, + {8, 9}, + {8, 8}, + {8, 7}, + {8, 6}, + {6, 6}, + {4, 6}, + {2, 6}, + {0, 6}, + {0, 5}, + // Goal fields. + {2, 5}, + {4, 5}, + {6, 5}, + {8, 5}, + {10, 1}, + {10, 2}, + {10, 3}, + {10, 4}, + {18, 5}, + {16, 5}, + {14, 5}, + {12, 5}, + {10, 9}, + {10, 8}, + {10, 7}, + {10, 6}, + // Off the board fields. + {0, 0}, + {2, 0}, + {2, 1}, + {0, 1}, + {18, 0}, + {20, 0}, + {20, 1}, + {18, 1}, + {18, 10}, + {20, 10}, + {20, 9}, + {18, 9}, + {0, 10}, + {2, 10}, + {2, 9}, + {0, 9}, +}; + +// This is a small helper to track historical turn info not stored in the moves. +// It is only needed for proper implementation of Undo. +struct TurnHistoryInfo { + int player; + int prev_player; + int dice; + int prev_dice; + Action action; + int thrown_out_player; + TurnHistoryInfo(int _player, int _prev_player, int _dice, int _prev_dice, + int _action, int _thrown_out_player) + : player(_player), + prev_player(_prev_player), + dice(_dice), + prev_dice(_prev_dice), + action(_action), + thrown_out_player(_thrown_out_player) {} +}; + +class MaednGame; + +class MaednState : public State { + public: + MaednState(const MaednState&) = default; + MaednState(std::shared_ptr, bool two_players_opposite); + + Player CurrentPlayer() const override; + void UndoAction(Player player, Action action) override; + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action move_id) const override; + std::vector> ChanceOutcomes() const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + + // Setter function used for debugging and tests. + // History is not set by this method, so calls to UndoAction will cause + // undefined behaviour! + void SetState(int cur_player, int dice, int prev_player, int prev_dice, + const std::vector& board, + const std::vector& out); + // Some values are not part of ObservationTensor (like prev_player_ and + // prev_dice_) and so have to be given from outside. History is not part + // of ObservationTensor either, so calls to UndoAction will cause undefined + // behaviour! + void FromObservationTensor(Player player, absl::Span values, + Player prev_player, int prev_dice); + + protected: + void DoApplyAction(Action move_id) override; + + private: + void SetupInitialBoard(); + void RollDice(int outcome); + std::pair GetFieldsFromAction(Action action, Player player, + int dice) const; + int RelPosToAbsPos(int relative_position, int position) const; + int AbsPosToRelPos(int absolute_position, int position) const; + int GetPlayersFirstField(Player player) const; + + int PlayerToPosition(Player player) const { + // Position is equal to player except if two players play on opposite + // sides, in this case position of player 1 is 2. For completeness, + // in this case position of player 2 is 1, so that even for iterations + // over 4 players no position is used twice. + return num_players_ == 2 && two_players_opposite_ && + (player == 1 || player == 2) + ? 3 - player + : player; + } + + bool AllInGoal(Player player) const; + Player cur_player_; + Player prev_player_; + const bool two_players_opposite_; + int turns_; + int dice_; // Current dice roll. + int prev_dice_; // Last dice roll. + std::vector out_; // Number of pieces of each player outside of field. + + // Board consists of 40 common fields, starting with the set-in field of + // player 0. After that, four goal fields of each player follow, beginning + // with player 0 again. + // Player 0 starts on field 0, goes up to field 39 and continues into + // goal fields 40-43. + // Player 1 starts on field 10, goes up to field 39, continues from 0 to 9 + // and jumps from 9 to 44-47. + // Player 2 starts on field 20, goes up to field 39, continues from 0 to 19 + // and jumps from 19 to 48-51. + // Player 3 starts on field 30, goes up to field 39, continues from 0 to 29 + // and jumps from 29 to 52-55. + std::vector board_; + std::vector turn_history_info_; // Info needed for Undo. +}; + +class MaednGame : public Game { + public: + explicit MaednGame(const GameParameters& params); + + int NumDistinctActions() const override { return kNumDistinctActions; } + + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new MaednState(shared_from_this(), two_player_opposite_)); + } + + // Classic six sided dice. + int MaxChanceOutcomes() const override { return kNumChanceOutcomes; } + + // Arbitrarily chosen number to ensure the game is finite. + int MaxGameLength() const override { return 1000; } + + // Upper bound: chance node per move, with an initial chance node for + // determining starting player. + int MaxChanceNodesInHistory() const override { return MaxGameLength() + 1; } + + int NumPlayers() const override { return num_players_; } + double MinUtility() const override { return -MaxUtility(); } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 3; } + + std::vector ObservationTensorShape() const override { + // Encode each field on the board as four doubles: + // - One double for whether there is a piece of player 1 (1 or 0). + // - One double for whether there is a piece of player 2 (1 or 0). + // - One double for whether there is a piece of player 3 (1 or 0). + // - One double for whether there is a piece of player 4 (1 or 0). + // (effectively that is one-hot encoded player number) + // + // Return a vector encoding: + // - Every field. + // - One double for the number of pieces outside the board for player 1. + // - One double for the number of pieces outside the board for player 2. + // - One double for the number of pieces outside the board for player 3. + // - One double for the number of pieces outside the board for player 4. + // - One double for whether it's player 1's turn (1 or 0). + // - One double for whether it's player 2's turn (1 or 0). + // - One double for whether it's player 3's turn (1 or 0). + // - One double for whether it's player 4's turn (1 or 0). + // (If it's chance player's turn, all four doubles are 0.) + // - One double for whether dice roll is a 1 (1 or 0). + // - One double for whether dice roll is a 2 (1 or 0). + // - One double for whether dice roll is a 3 (1 or 0). + // - One double for whether dice roll is a 4 (1 or 0). + // - One double for whether dice roll is a 5 (1 or 0). + // - One double for whether dice roll is a 6 (1 or 0). + // (If it's chance player's turn, all six doubles are 0.) + + return {kStateEncodingSize}; + } + + private: + bool two_player_opposite_; + int num_players_; +}; + +} // namespace maedn +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_MAEDN_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/maedn/maedn_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/maedn/maedn_test.cc new file mode 100644 index 0000000..f73466f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/maedn/maedn_test.cc @@ -0,0 +1,361 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/maedn/maedn.h" + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace maedn { +namespace { + +namespace testing = open_spiel::testing; + +void BasicMaednTests() { + testing::LoadGameTest("maedn"); + + std::shared_ptr game = + LoadGame("maedn", {{"players", GameParameter(2)}, + {"twoPlayersOpposite", GameParameter(true)}}); + + testing::RandomSimTest(*game, 100); + testing::RandomSimTestWithUndo(*game, 100); + + for (int players = 2; players <= 4; players++) { + game = LoadGame("maedn", {{"players", GameParameter(players)}, + {"twoPlayersOpposite", GameParameter(false)}}); + + testing::RandomSimTest(*game, 100); + testing::RandomSimTestWithUndo(*game, 100); + } +} + +const char* MINIMAL_WINS_EXPECTED_TERMINAL_STATES[] = { + // 2 players side-by-side, player 1 wins + ". . o-o-S 2 2\n" + ". . o . o 2 2\n" + " o . o \n" + " o . o \n" + "S-o-o-o-o . o-o-o-o-o\n" + "o 1 1 1 1 . . . . o\n" + "o-o-o-o-o . o-o-o-o-S\n" + " o . o \n" + " o . o \n" + ". . o . o . .\n" + ". . S-o-o . .\n" + "Turn: *\n" + "Dice: \n", + // 2 players side-by-side, player 2 wins + "1 1 o-o-S . .\n" + "1 1 o 2 o . .\n" + " o 2 o \n" + " o 2 o \n" + "S-o-o-o-o 2 o-o-o-o-o\n" + "o . . . . . . . . o\n" + "o-o-o-o-o . o-o-o-o-S\n" + " o . o \n" + " o . o \n" + ". . o . o . .\n" + ". . S-o-o . .\n" + "Turn: *\n" + "Dice: \n", + // 2 players opposite sides, player 1 wins + ". . o-o-S . .\n" + ". . o . o . .\n" + " o . o \n" + " o . o \n" + "S-o-o-o-o . o-o-o-o-o\n" + "o 1 1 1 1 . . . . o\n" + "o-o-o-o-o . o-o-o-o-S\n" + " o . o \n" + " o . o \n" + ". . o . o 2 2\n" + ". . S-o-o 2 2\n" + "Turn: *\n" + "Dice: \n", + // 2 players opposite sides, player 2 wins + "1 1 o-o-S . .\n" + "1 1 o . o . .\n" + " o . o \n" + " o . o \n" + "S-o-o-o-o . o-o-o-o-o\n" + "o . . . . 2 2 2 2 o\n" + "o-o-o-o-o . o-o-o-o-S\n" + " o . o \n" + " o . o \n" + ". . o . o . .\n" + ". . S-o-o . .\n" + "Turn: *\n" + "Dice: \n", + // 3 players, player 1 wins + ". . o-o-S 2 2\n" + ". . o . o 2 2\n" + " o . o \n" + " o . o \n" + "S-o-o-o-o . o-o-o-o-o\n" + "o 1 1 1 1 . . . . o\n" + "o-o-o-o-o . o-o-o-o-S\n" + " o . o \n" + " o . o \n" + ". . o . o 3 3\n" + ". . S-o-o 3 3\n" + "Turn: *\n" + "Dice: \n", + // 3 players, player 2 wins + "1 1 o-o-S . .\n" + "1 1 o 2 o . .\n" + " o 2 o \n" + " o 2 o \n" + "S-o-o-o-o 2 o-o-o-o-o\n" + "o . . . . . . . . o\n" + "o-o-o-o-o . o-o-o-o-S\n" + " o . o \n" + " o . o \n" + ". . o . o 3 3\n" + ". . S-o-o 3 3\n" + "Turn: *\n" + "Dice: \n", + // 3 players, player 3 wins + "1 1 o-o-S 2 2\n" + "1 1 o . o 2 2\n" + " o . o \n" + " o . o \n" + "S-o-o-o-o . o-o-o-o-o\n" + "o . . . . 3 3 3 3 o\n" + "o-o-o-o-o . o-o-o-o-S\n" + " o . o \n" + " o . o \n" + ". . o . o . .\n" + ". . S-o-o . .\n" + "Turn: *\n" + "Dice: \n", + // 4 players, player 1 wins + ". . o-o-S 2 2\n" + ". . o . o 2 2\n" + " o . o \n" + " o . o \n" + "S-o-o-o-o . o-o-o-o-o\n" + "o 1 1 1 1 . . . . o\n" + "o-o-o-o-o . o-o-o-o-S\n" + " o . o \n" + " o . o \n" + "4 4 o . o 3 3\n" + "4 4 S-o-o 3 3\n" + "Turn: *\n" + "Dice: \n", + // 4 players, player 2 wins + "1 1 o-o-S . .\n" + "1 1 o 2 o . .\n" + " o 2 o \n" + " o 2 o \n" + "S-o-o-o-o 2 o-o-o-o-o\n" + "o . . . . . . . . o\n" + "o-o-o-o-o . o-o-o-o-S\n" + " o . o \n" + " o . o \n" + "4 4 o . o 3 3\n" + "4 4 S-o-o 3 3\n" + "Turn: *\n" + "Dice: \n", + // 4 players, player 3 wins + "1 1 o-o-S 2 2\n" + "1 1 o . o 2 2\n" + " o . o \n" + " o . o \n" + "S-o-o-o-o . o-o-o-o-o\n" + "o . . . . 3 3 3 3 o\n" + "o-o-o-o-o . o-o-o-o-S\n" + " o . o \n" + " o . o \n" + "4 4 o . o . .\n" + "4 4 S-o-o . .\n" + "Turn: *\n" + "Dice: \n", + // 4 players, player 4 wins + "1 1 o-o-S 2 2\n" + "1 1 o . o 2 2\n" + " o . o \n" + " o . o \n" + "S-o-o-o-o . o-o-o-o-o\n" + "o . . . . . . . . o\n" + "o-o-o-o-o 4 o-o-o-o-S\n" + " o 4 o \n" + " o 4 o \n" + ". . o 4 o 3 3\n" + ". . S-o-o 3 3\n" + "Turn: *\n" + "Dice: \n", +}; + +void PlayMinimalGameToWin(int players, bool twoPlayersOpposite, int ply, + int terminalStateScenarioNumber) { + std::shared_ptr game = LoadGame( + "maedn", {{"players", GameParameter(players)}, + {"twoPlayersOpposite", GameParameter(twoPlayersOpposite)}}); + + auto state = game->NewInitialState(); + + // other players do nothing + for (int i = 0; i < ply; i++) { + state->ApplyAction(0); // dice 1 for other player + state->ApplyAction(0); // player passes + } + + for (int i = 0; i < 4; i++) { + state->ApplyAction(5); // dice 6 + state->ApplyAction(1); // bring in piece + state->ApplyAction(5); // dice 6 + state->ApplyAction(2); + state->ApplyAction(5); // dice 6 + state->ApplyAction(8); + state->ApplyAction(5); // dice 6 + state->ApplyAction(14); + state->ApplyAction(5); // dice 6 + state->ApplyAction(20); + state->ApplyAction(5); // dice 6 + state->ApplyAction(26); + state->ApplyAction(5); // dice 6 + state->ApplyAction(32); + if (i == 0 || i == 1) { + state->ApplyAction(5); // dice 6 + state->ApplyAction(38); + } + if (i == 0) { + state->ApplyAction(0); // dice 1 + state->ApplyAction(44); + + // other players do nothing + for (int i = 0; i < players - 1; i++) { + state->ApplyAction(0); // dice 1 for other player + state->ApplyAction(0); // player passes + } + } else if (i == 2) { + state->ApplyAction(4); // dice 5 + state->ApplyAction(38); + + // other players do nothing + for (int i = 0; i < players - 1; i++) { + state->ApplyAction(0); // dice 1 for other player + state->ApplyAction(0); // player passes + } + } + } + + SPIEL_CHECK_FALSE(state->IsTerminal()); + state->ApplyAction(3); // dice 4 + state->ApplyAction(38); + + std::cout << "Testing minimal win for " << players << "players, player " + << ply << "wins" << std::endl + << "Terminal state:" << std::endl + << state->ToString() << std::endl; + + SPIEL_CHECK_TRUE(state->IsTerminal()); + + std::vector returns = state->Returns(); + for (int i = 0; i < players; i++) { + double expected = i == ply ? players - 1.0 : -1.0; + + SPIEL_CHECK_EQ(returns[i], expected); + } + + SPIEL_CHECK_EQ( + state->ToString(), + MINIMAL_WINS_EXPECTED_TERMINAL_STATES[terminalStateScenarioNumber]); +} + +void MinimalGameToWin() { + // Test for all constellations whether for any player the + // minimal winning scenario works as expected. + // Scenarios: 2p side-by-side, 2p opposite sides, 3p, 4p, + // for each participating player. + + int terminal_state_scenario_number = 0; + for (int scenario = 0; scenario < 4; scenario++) { + int players; + bool two_players_opposite = false; + if (scenario == 0) { + players = 2; + two_players_opposite = false; + } else if (scenario == 1) { + players = 2; + two_players_opposite = true; + } else { + players = scenario + 1; + } + + for (int ply = 0; ply < players; ply++) { + PlayMinimalGameToWin(players, two_players_opposite, ply, + terminal_state_scenario_number++); + } + } +} + +void ObservationTensorTest(const State &state) { + std::shared_ptr game = state.GetGame(); + + int players = state.NumPlayers(); + for (int ply = 0; ply < players; ply++) { + std::vector tensor = state.ObservationTensor(ply); + + std::unique_ptr state2_tmp = game->NewInitialState(); + std::unique_ptr state2( + static_cast(state2_tmp.release())); + + state2->FromObservationTensor(ply, absl::MakeSpan(tensor), 0, 0); + + // std::cout << "Player: " << ply << std::endl; + // std::cout << "State:" << std::endl << state.ToString() << std::endl; + // std::cout << "State2:" << std::endl << state2->ToString() << std::endl; + // std::cout << "Tensor:" << std::endl << tensor << std::endl; + SPIEL_CHECK_EQ(state.ToString(), state2->ToString()); + } +} + +void CheckObservationTensor() { + std::shared_ptr game = + LoadGame("maedn", {{"players", GameParameter(2)}, + {"twoPlayersOpposite", GameParameter(true)}}); + + testing::RandomSimTest(*game, 100, true, false, true, &ObservationTensorTest); + + for (int players = 2; players <= 4; players++) { + std::shared_ptr game = + LoadGame("maedn", {{"players", GameParameter(players)}, + {"twoPlayersOpposite", GameParameter(false)}}); + + testing::RandomSimTest(*game, 100, true, false, true, + &ObservationTensorTest); + } +} + +void BasicSerializationTest() { + std::shared_ptr game = LoadGame("maedn"); + std::unique_ptr state = game->NewInitialState(); + std::unique_ptr state2 = game->DeserializeState(state->Serialize()); + SPIEL_CHECK_EQ(state->ToString(), state2->ToString()); +} + +} // namespace +} // namespace maedn +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::maedn::BasicMaednTests(); + open_spiel::maedn::MinimalGameToWin(); + open_spiel::maedn::BasicSerializationTest(); + open_spiel::maedn::CheckObservationTensor(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mancala/mancala.cc b/scenarios/bargaining/open_spiel/open_spiel/games/mancala/mancala.cc new file mode 100644 index 0000000..55399bb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mancala/mancala.cc @@ -0,0 +1,235 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/mancala/mancala.h" + +#include +#include +#include +#include + +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace mancala { +namespace { + +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"mancala", + /*long_name=*/"Mancala", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new MancalaGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +int GetPlayerHomePit(Player player) { + if (player == 0) { + return kTotalPits / 2; + } + return 0; +} + +bool IsPlayerPit(Player player, int pit) { + if (player == 0) { + if (pit < kTotalPits / 2 && pit > 0) return true; + return false; + } + if (pit > kTotalPits / 2) return true; + return false; +} + +int GetOppositePit(int pit) { return kTotalPits - pit; } + +int GetNextPit(Player player, int pit) { + int next_pit = (pit + 1) % kTotalPits; + if (next_pit == GetPlayerHomePit(1 - player)) next_pit++; + return next_pit; +} +} // namespace + +void MancalaState::DoApplyAction(Action move) { + SPIEL_CHECK_GT(board_[move], 0); + int num_beans = board_[move]; + board_[move] = 0; + int current_pit = move; + for (int i = 0; i < num_beans; ++i) { + current_pit = GetNextPit(current_player_, current_pit); + board_[current_pit]++; + } + + // capturing logic + if (board_[current_pit] == 1 && IsPlayerPit(current_player_, current_pit) && + board_[GetOppositePit(current_pit)] > 0) { + board_[GetPlayerHomePit(current_player_)] += + (1 + board_[GetOppositePit(current_pit)]); + board_[current_pit] = 0; + board_[GetOppositePit(current_pit)] = 0; + } + + if (current_pit != GetPlayerHomePit(current_player_)) + current_player_ = 1 - current_player_; +} + +std::vector MancalaState::LegalActions() const { + if (IsTerminal()) return {}; + std::vector moves; + if (current_player_ == 0) { + for (int i = 0; i < kNumPits; ++i) { + if (board_[i + 1] > 0) { + moves.push_back(i + 1); + } + } + } else { + for (int i = 0; i < kNumPits; ++i) { + if (board_[board_.size() - 1 - i] > 0) { + moves.push_back(board_.size() - 1 - i); + } + } + } + std::sort(moves.begin(), moves.end()); + return moves; +} + +std::string MancalaState::ActionToString(Player player, + Action action_id) const { + return absl::StrCat(action_id); +} + +void MancalaState::InitBoard() { + std::fill(begin(board_), end(board_), 4); + board_[0] = 0; + board_[board_.size() / 2] = 0; +} + +void MancalaState::SetBoard(const std::array& board) { + board_ = board; +} + +MancalaState::MancalaState(std::shared_ptr game) : State(game) { + InitBoard(); +} + +std::string MancalaState::ToString() const { + std::string str; + std::string separator = "-"; + absl::StrAppend(&str, separator); + for (int i = 0; i < kNumPits; ++i) { + absl::StrAppend(&str, board_[board_.size() - 1 - i]); + absl::StrAppend(&str, separator); + } + absl::StrAppend(&str, "\n"); + + absl::StrAppend(&str, board_[0]); + for (int i = 0; i < kNumPits * 2 - 1; ++i) { + absl::StrAppend(&str, separator); + } + absl::StrAppend(&str, board_[board_.size() / 2]); + absl::StrAppend(&str, "\n"); + + absl::StrAppend(&str, separator); + for (int i = 0; i < kNumPits; ++i) { + absl::StrAppend(&str, board_[i + 1]); + absl::StrAppend(&str, separator); + } + return str; +} + +bool MancalaState::IsTerminal() const { + if (move_number_ > game_->MaxGameLength()) { + return true; + } + + bool player_0_has_moves = false; + bool player_1_has_moves = false; + for (int i = 0; i < kNumPits; ++i) { + if (board_[board_.size() - 1 - i] > 0) { + player_1_has_moves = true; + break; + } + } + for (int i = 0; i < kNumPits; ++i) { + if (board_[i + 1] > 0) { + player_0_has_moves = true; + break; + } + } + return !player_0_has_moves || !player_1_has_moves; +} + +std::vector MancalaState::Returns() const { + if (IsTerminal()) { + int player_0_bean_sum = std::accumulate( + board_.begin() + 1, board_.begin() + kTotalPits / 2 + 1, 0); + int player_1_bean_sum = + std::accumulate(board_.begin() + kTotalPits / 2 + 1, board_.end(), 0) + + board_[0]; + if (player_0_bean_sum > player_1_bean_sum) { + return {1.0, -1.0}; + } else if (player_0_bean_sum < player_1_bean_sum) { + return {-1.0, 1.0}; + } else { + // Reaches max game length or they have the same bean sum, it is a draw. + return {0.0, 0.0}; + } + } + return {0.0, 0.0}; +} + +std::string MancalaState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void MancalaState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + SPIEL_CHECK_EQ(values.size(), kTotalPits); + auto value_it = values.begin(); + for (int count : board_) { + *value_it++ = count; + } + SPIEL_CHECK_EQ(value_it, values.end()); +} + +std::unique_ptr MancalaState::Clone() const { + return std::unique_ptr(new MancalaState(*this)); +} + +MancalaGame::MancalaGame(const GameParameters& params) + : Game(kGameType, params) {} + +} // namespace mancala +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mancala/mancala.h b/scenarios/bargaining/open_spiel/open_spiel/games/mancala/mancala.h new file mode 100644 index 0000000..d8ea7cd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mancala/mancala.h @@ -0,0 +1,97 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_MANCALA_H_ +#define OPEN_SPIEL_GAMES_MANCALA_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// Mancala +// https://en.wikipedia.org/wiki/Mancala. +// +// Note that this implements the Kalah rule set, see +// https://en.wikipedia.org/wiki/Kalah. Oware is another game from the Mancala +// family of games implemented in oware.{h,cc}. +// +// Parameters: none + +namespace open_spiel { +namespace mancala { + +// Constants. +inline constexpr int kNumPlayers = 2; +inline constexpr int kNumPits = 6; +inline constexpr int kTotalPits = (kNumPits + 1) * 2; + +// State of an in-play game. +class MancalaState : public State { + public: + MancalaState(std::shared_ptr game); + + MancalaState(const MancalaState&) = default; + MancalaState& operator=(const MancalaState&) = default; + + void SetBoard(const std::array& board); + int BoardAt(int position) const { return board_[position]; } + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : current_player_; + } + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + + protected: + std::array board_; + void DoApplyAction(Action move) override; + + private: + void InitBoard(); + Player current_player_ = 0; // Player zero goes first +}; + +// Game object. +class MancalaGame : public Game { + public: + explicit MancalaGame(const GameParameters& params); + int NumDistinctActions() const override { return kTotalPits; } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new MancalaState(shared_from_this())); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + std::vector ObservationTensorShape() const override { + return {kTotalPits}; + } + // There is arbitrarily chosen number to ensure the game is finite. + int MaxGameLength() const override { return 1000; } +}; + +} // namespace mancala +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_MANCALA_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mancala/mancala_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/mancala/mancala_test.cc new file mode 100644 index 0000000..2587a2c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mancala/mancala_test.cc @@ -0,0 +1,125 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/mancala/mancala.h" + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace mancala { +namespace { + +namespace testing = open_spiel::testing; + +void BasicSerializationTest() { + std::shared_ptr game = LoadGame("mancala"); + std::unique_ptr state = game->NewInitialState(); + std::unique_ptr state2 = game->DeserializeState(state->Serialize()); + SPIEL_CHECK_EQ(state->ToString(), state2->ToString()); +} + +void BasicMancalaTests() { + testing::LoadGameTest("mancala"); + testing::NoChanceOutcomesTest(*LoadGame("mancala")); + testing::RandomSimTest(*LoadGame("mancala"), 100); +} + +// Board: +// -0-0-0-4-0-0- +// 0-----------0 +// -0-0-1-0-0-0- +// Player 0 taking action 3 should capture the opponents 4 beans +void CaptureWhenOppositePitNotEmptyTest() { + std::shared_ptr game = LoadGame("mancala"); + std::unique_ptr state = game->NewInitialState(); + MancalaState* mstate = static_cast(state.get()); + mstate->SetBoard({0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0}); + + // Check for exactly one legal move. + std::vector legal_actions = mstate->LegalActions(); + SPIEL_CHECK_EQ(legal_actions.size(), 1); + + // Check that it's 3 + SPIEL_CHECK_EQ(legal_actions[0], 3); + + mstate->ApplyAction(legal_actions[0]); + // Check if Player 0 home pit has 5 beans + SPIEL_CHECK_EQ(mstate->BoardAt(7), 5); +} + +// Board: +// -0-0-0-0-4-0- +// 0-----------0 +// -0-0-1-0-0-0- +// Player 0 taking action 3 should not result in any captures +void DoNotCaptureWhenOppositePitIsEmptyTest() { + std::shared_ptr game = LoadGame("mancala"); + std::unique_ptr state = game->NewInitialState(); + MancalaState* mstate = static_cast(state.get()); + mstate->SetBoard({0, 0, 0, 1, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0}); + + // Check for exactly one legal move. + std::vector legal_actions = mstate->LegalActions(); + SPIEL_CHECK_EQ(legal_actions.size(), 1); + + // Check that it's 3 + SPIEL_CHECK_EQ(legal_actions[0], 3); + + mstate->ApplyAction(legal_actions[0]); + // Check if no capture has taken place + SPIEL_CHECK_EQ(mstate->BoardAt(7), 0); + SPIEL_CHECK_EQ(mstate->BoardAt(3), 0); + SPIEL_CHECK_EQ(mstate->BoardAt(4), 1); + SPIEL_CHECK_EQ(mstate->BoardAt(9), 4); +} + +// Board: +// -0-0-0-0-0-1- +// 0-----------0 +// -1-0-0-0-0-8- +// Player 0 taking action 6 should not put beans in opponents home pit +void DoNotAddBeanToOpponentsHomePitTest() { + std::shared_ptr game = LoadGame("mancala"); + std::unique_ptr state = game->NewInitialState(); + MancalaState* mstate = static_cast(state.get()); + mstate->SetBoard({0, 1, 0, 0, 0, 0, 8, 0, 1, 0, 0, 0, 0, 0}); + + // Check for exactly two legal move. + std::vector legal_actions = mstate->LegalActions(); + SPIEL_CHECK_EQ(legal_actions.size(), 2); + + // Check that it's 1 & 6 + SPIEL_CHECK_EQ(legal_actions[0], 1); + SPIEL_CHECK_EQ(legal_actions[1], 6); + + mstate->ApplyAction(legal_actions[1]); + // Check if no bean is put into opponents home pit + SPIEL_CHECK_EQ(mstate->BoardAt(0), 0); + SPIEL_CHECK_EQ(mstate->BoardAt(7), 1); + SPIEL_CHECK_EQ(mstate->BoardAt(8), 2); + SPIEL_CHECK_EQ(mstate->BoardAt(1), 2); +} + +} // namespace +} // namespace mancala +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::mancala::BasicSerializationTest(); + open_spiel::mancala::BasicMancalaTests(); + open_spiel::mancala::CaptureWhenOppositePitNotEmptyTest(); + open_spiel::mancala::DoNotCaptureWhenOppositePitIsEmptyTest(); + open_spiel::mancala::DoNotAddBeanToOpponentsHomePitTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/markov_soccer/markov_soccer.cc b/scenarios/bargaining/open_spiel/open_spiel/games/markov_soccer/markov_soccer.cc new file mode 100644 index 0000000..9711efc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/markov_soccer/markov_soccer.cc @@ -0,0 +1,453 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/markov_soccer/markov_soccer.h" + +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace markov_soccer { + +namespace { + +// Default parameters. +constexpr int kDefaultHorizon = 1000; + +// A valid state looks like: +// +// ..... +// ..Ab. +// ..... +// ..... +// +// In this case, the first player has the ball ('A') and the second player does +// not ('b'). Upper case means that player has posession. When the ball is in +// the field and neither player has posession, it is represented as an 'O' and +// both players are lower-case. + +// Facts about the game +const GameType kGameType{/*short_name=*/"markov_soccer", + /*long_name=*/"Markov Soccer", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"horizon", GameParameter(kDefaultHorizon)}, + {"grid", GameParameter(std::string(kDefaultGrid))}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new MarkovSoccerGame(params)); +} + +Action ToAction(ChanceOutcome outcome) { + if (outcome == ChanceOutcome::kChanceInit0) { + return kChanceInit0Action; + } else if (outcome == ChanceOutcome::kChanceInit1) { + return kChanceInit1Action; + } else { + SpielFatalError("Unrecognized outcome"); + } +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +// Valid characters: AaBbO. , so 6 characters per cell. +constexpr int kCellStates = 6; + +// Movement. +enum MovementType { kUp = 0, kDown = 1, kLeft = 2, kRight = 3, kStand = 4 }; + +constexpr int kNumMovementActions = 5; + +constexpr std::array row_offsets = {{-1, 1, 0, 0, 0}}; +constexpr std::array col_offsets = {{0, 0, -1, 1, 0}}; +} // namespace + +MarkovSoccerState::MarkovSoccerState(std::shared_ptr game, + const Grid& grid) + : SimMoveState(game), grid_(grid) {} + +std::string MarkovSoccerState::ActionToString(Player player, + Action action_id) const { + if (player == kSimultaneousPlayerId) + return FlatJointActionToString(action_id); + SPIEL_CHECK_GE(action_id, 0); + + std::string result = ""; + if (player == kChancePlayerId) { + SPIEL_CHECK_LT(action_id, game_->MaxChanceOutcomes()); + + // Chance moves. + if (action_id == kChanceInit0Action) { + result = "(A's action first)"; + } else if (action_id == kChanceInit1Action) { + result = "(B's action first)"; + } else { + int ball_loc = action_id - kNumInitiativeChanceOutcomes; + return absl::StrCat("(ball at ", grid_.ball_start_points[ball_loc].first, + ",", grid_.ball_start_points[ball_loc].second, ")"); + } + } else { + SPIEL_CHECK_LT(action_id, game_->NumDistinctActions()); + + // Regular move actions. + if (action_id == kUp) { + result = "up"; + } else if (action_id == kDown) { + result = "down"; + } else if (action_id == kLeft) { + result = "left"; + } else if (action_id == kRight) { + result = "right"; + } else if (action_id == kStand) { + result = "stand"; + } + } + return result; +} + +void MarkovSoccerState::SetField(int r, int c, char v) { + field_[r * grid_.num_cols + c] = v; + + if (v == 'a' || v == 'A') { + player_row_[0] = r; + player_col_[0] = c; + } else if (v == 'b' || v == 'B') { + player_row_[1] = r; + player_col_[1] = c; + } + + if (v == 'O' || v == 'A' || v == 'B') { + ball_row_ = r; + ball_col_ = c; + } +} + +char MarkovSoccerState::field(int r, int c) const { + return field_[r * grid_.num_cols + c]; +} + +void MarkovSoccerState::Reset(int horizon) { + horizon_ = horizon; + field_.resize(grid_.num_rows * grid_.num_cols, '.'); + + SetField(grid_.a_start.first, grid_.a_start.second, 'a'); + SetField(grid_.b_start.first, grid_.b_start.second, 'b'); + + cur_player_ = kChancePlayerId; + winner_ = kInvalidPlayer; + total_moves_ = 0; +} + +void MarkovSoccerState::DoApplyActions(const std::vector& moves) { + SPIEL_CHECK_EQ(moves.size(), 2); + SPIEL_CHECK_EQ(cur_player_, kSimultaneousPlayerId); + + moves_[0] = moves[0]; + moves_[1] = moves[1]; + + cur_player_ = kChancePlayerId; +} + +bool MarkovSoccerState::InBounds(int r, int c) const { + return (r >= 0 && c >= 0 && r < grid_.num_rows && c < grid_.num_cols); +} + +void MarkovSoccerState::ResolveMove(Player player, int move) { + int old_row = player_row_[player - 1]; + int old_col = player_col_[player - 1]; + int new_row = old_row + row_offsets[move]; + int new_col = old_col + col_offsets[move]; + + char from_piece = field(old_row, old_col); + + if (!InBounds(new_row, new_col)) { + // Check, this is a goal? If so, set the winner. + if (from_piece == 'A' && (new_row == 1 || new_row == 2) && + (new_col == grid_.num_cols)) { + SetField(old_row, old_col, '.'); + winner_ = 0; + } else if (from_piece == 'B' && (new_row == 1 || new_row == 2) && + (new_col == -1)) { + SetField(old_row, old_col, '.'); + winner_ = 1; + } + + // Otherwise, nothing happens. + return; + } + + // The move was in bounds! + char to_piece = field(new_row, new_col); + + // Stand? + if (old_row == new_row && old_col == new_col) { + return; + } + + // Otherwise: something interesting. + if (to_piece == '.') { + // open field, move'em! + SetField(new_row, new_col, field(old_row, old_col)); + SetField(old_row, old_col, '.'); + } else if (to_piece == 'O') { + // Nice! .. got the ball, way to go; a -> A or b -> B. + SPIEL_CHECK_TRUE(from_piece == 'a' || from_piece == 'b'); + + if (from_piece == 'a') { + SetField(old_row, old_col, '.'); + SetField(new_row, new_col, 'A'); + } else if (from_piece == 'b') { + SetField(old_row, old_col, '.'); + SetField(new_row, new_col, 'B'); + } + } else if (from_piece == 'A' && to_piece == 'b') { + // Lost of possession to defender. + SetField(old_row, old_col, 'a'); + SetField(new_row, new_col, 'B'); + } else if (from_piece == 'B' && to_piece == 'a') { + // Lost of possession to defender. + SetField(old_row, old_col, 'b'); + SetField(new_row, new_col, 'A'); + } +} + +void MarkovSoccerState::DoApplyAction(Action action_id) { + if (IsSimultaneousNode()) { + ApplyFlatJointAction(action_id); + return; + } + SPIEL_CHECK_TRUE(IsChanceNode()); + SPIEL_CHECK_GE(action_id, 0); + SPIEL_CHECK_LT(action_id, game_->MaxChanceOutcomes()); + + if (action_id == kChanceInit0Action) { + ResolveMove(1, moves_[0]); + ResolveMove(2, moves_[1]); + } else if (action_id == kChanceInit1Action) { + ResolveMove(2, moves_[1]); + ResolveMove(1, moves_[0]); + } else { + int ball_loc = action_id - kNumInitiativeChanceOutcomes; + SetField(grid_.ball_start_points[ball_loc].first, + grid_.ball_start_points[ball_loc].second, 'O'); + } + + cur_player_ = kSimultaneousPlayerId; + total_moves_++; +} + +std::vector MarkovSoccerState::LegalActions(Player player) const { + if (IsTerminal()) return {}; + if (IsChanceNode()) { + if (total_moves_ == 0) { + std::vector outcomes(grid_.ball_start_points.size(), + kInvalidAction); + for (int i = 0; i < grid_.ball_start_points.size(); ++i) { + outcomes[i] = kNumInitiativeChanceOutcomes + i; + } + return outcomes; + } else { + return {ToAction(ChanceOutcome::kChanceInit0), + ToAction(ChanceOutcome::kChanceInit1)}; + } + } else { + return {kUp, kDown, kLeft, kRight, kStand}; + } +} + +std::vector> MarkovSoccerState::ChanceOutcomes() + const { + SPIEL_CHECK_TRUE(IsChanceNode()); + if (total_moves_ == 0) { + std::vector> outcomes( + grid_.ball_start_points.size(), {kInvalidAction, -1.0}); + const double unif_prob = 1.0 / outcomes.size(); + for (int i = 0; i < grid_.ball_start_points.size(); ++i) { + outcomes[i] = {kNumInitiativeChanceOutcomes + i, unif_prob}; + } + return outcomes; + } else { + return { + std::pair(ToAction(ChanceOutcome::kChanceInit0), 0.5), + std::pair(ToAction(ChanceOutcome::kChanceInit1), 0.5)}; + } +} + +std::string MarkovSoccerState::ToString() const { + std::string result = ""; + + for (int r = 0; r < grid_.num_rows; r++) { + for (int c = 0; c < grid_.num_cols; c++) { + result += field(r, c); + } + + absl::StrAppend(&result, "\n"); + } + if (IsChanceNode()) absl::StrAppend(&result, "Chance Node"); + return result; +} + +bool MarkovSoccerState::IsTerminal() const { + return (total_moves_ >= horizon_ || winner_ != kInvalidPlayer); +} + +std::vector MarkovSoccerState::Returns() const { + if (!IsTerminal()) { + return {0.0, 0.0}; + } + + if (total_moves_ >= horizon_) { + return {0.0, 0.0}; + } else { + return (winner_ == 0) ? std::vector{1.0, -1.0} + : std::vector{-1.0, 1.0}; + } +} + +int MarkovSoccerState::observation_plane(int r, int c) const { + int plane = -1; + switch (field(r, c)) { + case 'a': + plane = 0; + break; + case 'A': + plane = 1; + break; + case 'b': + plane = 2; + break; + case 'B': + plane = 3; + break; + case 'O': + plane = 4; + break; + case '.': + plane = 5; + break; + default: + std::cerr << "Invalid character on field: " << field(r, c) << std::endl; + plane = -1; + break; + } + + return plane; +} + +void MarkovSoccerState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + TensorView<3> view(values, {kCellStates, grid_.num_rows, grid_.num_cols}, + true); + + for (int r = 0; r < grid_.num_rows; r++) { + for (int c = 0; c < grid_.num_cols; c++) { + int plane = observation_plane(r, c); + SPIEL_CHECK_TRUE(plane >= 0 && plane < kCellStates); + view[{plane, r, c}] = 1.0; + } + } +} + +std::unique_ptr MarkovSoccerState::Clone() const { + return std::unique_ptr(new MarkovSoccerState(*this)); +} + +std::unique_ptr MarkovSoccerGame::NewInitialState() const { + std::unique_ptr state( + new MarkovSoccerState(shared_from_this(), grid_)); + state->Reset(ParameterValue("horizon")); + return state; +} + +int MarkovSoccerGame::NumDistinctActions() const { return kNumMovementActions; } + +int MarkovSoccerGame::MaxChanceOutcomes() const { + // First two for determining initiative, next n for spawn point locations, + // where n is equal to the number of spawn points. + return kNumInitiativeChanceOutcomes + grid_.ball_start_points.size(); +} + +std::vector MarkovSoccerGame::ObservationTensorShape() const { + return {kCellStates, grid_.num_rows, grid_.num_cols}; +} + +namespace { +Grid ParseGrid(const std::string& grid_string) { + Grid grid{/*num_rows=*/0, /*num_cols=*/0}; + int row = 0; + int col = 0; + int count_empty_cells = 0; + bool a_set = false; + bool b_set = false; + for (auto c : grid_string) { + if (c == '\n') { + row += 1; + col = 0; + } else { + if (row >= grid.num_rows) grid.num_rows = row + 1; + if (col >= grid.num_cols) grid.num_cols = col + 1; + if (c == 'O') { + grid.ball_start_points.emplace_back(row, col); + } else if (c == 'A') { + if (a_set == true) { + SpielFatalError("Can only have one A in grid."); + } + grid.a_start = {row, col}; + a_set = true; + } else if (c == 'B') { + if (b_set == true) { + SpielFatalError("Can only have one B in grid."); + } + grid.b_start = {row, col}; + b_set = true; + } else if (c == '.') { + ++count_empty_cells; + } else { + SpielFatalError(absl::StrCat("Invalid char '", std::string(1, c), + "' at grid (", row, ",", col, ")")); + } + col += 1; + } + } + // Must have at least one ball starting location. + SPIEL_CHECK_GE(grid.ball_start_points.size(), 0); + SPIEL_CHECK_EQ(grid.num_rows * grid.num_cols, + count_empty_cells + grid.ball_start_points.size() + 2); + return grid; +} +} // namespace + +MarkovSoccerGame::MarkovSoccerGame(const GameParameters& params) + : SimMoveGame(kGameType, params), + grid_(ParseGrid(ParameterValue("grid"))), + horizon_(ParameterValue("horizon")) {} + +} // namespace markov_soccer +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/markov_soccer/markov_soccer.h b/scenarios/bargaining/open_spiel/open_spiel/games/markov_soccer/markov_soccer.h new file mode 100644 index 0000000..600f4fe --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/markov_soccer/markov_soccer.h @@ -0,0 +1,142 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_MARKOV_SOCCER_H_ +#define OPEN_SPIEL_GAMES_MARKOV_SOCCER_H_ + +#include +#include +#include +#include + +#include "open_spiel/simultaneous_move_game.h" +#include "open_spiel/spiel.h" + +// This is the soccer game from the MinimaxQ paper. See +// "Markov Games as a Framework for Reinforcement Learning", Littman '94. +// http://www.cs.duke.edu/courses/spring07/cps296.3/littman94markov.pdf +// +// Parameters: +// "horizon" int max number of moves before draw (default = 1000) +// "grid" string String representation of grid. +// Empty spaces are '.', possible ball starting +// locations are 'O' and player A and B starting +// points are 'A' and 'B' respectively. + +namespace open_spiel { +namespace markov_soccer { + +inline constexpr char kDefaultGrid[] = + ".....\n" + "..OB.\n" + ".AO..\n" + "....."; + +struct Grid { + int num_rows; + int num_cols; + std::pair a_start; + std::pair b_start; + std::vector> ball_start_points; +}; + +// Number of chance outcomes reserved for "initiative" (learning which player's +// action gets resolved first). +inline constexpr int kNumInitiativeChanceOutcomes = 2; + +// Reserved chance outcomes for initiative. The ones following these are to +// determine spawn point locations. +inline constexpr Action kChanceInit0Action = 0; +inline constexpr Action kChanceInit1Action = 1; +enum class ChanceOutcome { kChanceInit0, kChanceInit1 }; + +class MarkovSoccerGame; + +class MarkovSoccerState : public SimMoveState { + public: + explicit MarkovSoccerState(std::shared_ptr game, + const Grid& grid); + MarkovSoccerState(const MarkovSoccerState&) = default; + + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string ObservationString(Player player) const override { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); + } + void ObservationTensor(Player player, + absl::Span values) const override; + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : cur_player_; + } + std::unique_ptr Clone() const override; + + ActionsAndProbs ChanceOutcomes() const override; + + void Reset(int horizon); + std::vector LegalActions(Player player) const override; + + protected: + void DoApplyAction(Action action_id) override; + void DoApplyActions(const std::vector& moves) override; + + private: + void SetField(int r, int c, char v); + char field(int r, int c) const; + void ResolveMove(Player player, int move); + bool InBounds(int r, int c) const; + int observation_plane(int r, int c) const; + + const Grid& grid_; + + // Fields set to bad values. Use Game::NewInitialState(). + int winner_ = -1; + Player cur_player_ = -1; // Could be chance's turn. + int total_moves_ = -1; + int horizon_ = -1; + std::array player_row_ = {{-1, -1}}; // Players' rows. + std::array player_col_ = {{-1, -1}}; // Players' cols. + int ball_row_ = -1; + int ball_col_ = -1; + std::array moves_ = {{-1, -1}}; // Moves taken. + std::vector field_; +}; + +class MarkovSoccerGame : public SimMoveGame { + public: + explicit MarkovSoccerGame(const GameParameters& params); + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override; + int MaxChanceOutcomes() const override; + int NumPlayers() const override { return 2; } + double MinUtility() const override { return -1; } + double MaxUtility() const override { return 1; } + absl::optional UtilitySum() const override { return 0; } + std::vector ObservationTensorShape() const override; + int MaxGameLength() const override { return horizon_; } + // TODO: verify whether this bound is tight and/or tighten it. + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + + private: + Grid grid_; + int horizon_; +}; + +} // namespace markov_soccer +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_MARKOV_SOCCER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/markov_soccer/markov_soccer_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/markov_soccer/markov_soccer_test.cc new file mode 100644 index 0000000..1f4fdba --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/markov_soccer/markov_soccer_test.cc @@ -0,0 +1,56 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/markov_soccer/markov_soccer.h" + +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace markov_soccer { +namespace { + +namespace testing = open_spiel::testing; + +void BasicMarkovSoccerTests() { + testing::LoadGameTest("markov_soccer"); + testing::ChanceOutcomesTest(*LoadGame("markov_soccer")); + + constexpr const char big_grid[] = + "......................\n" + ".A....................\n" + "......................\n" + "......................\n" + "...........O..........\n" + "...........O..........\n" + "...........O..........\n" + "......................\n" + "......................\n" + "....................B.\n" + "......................\n"; + + testing::RandomSimTest( + *LoadGame("markov_soccer", + {{"horizon", GameParameter(100)}, + {"grid", GameParameter(std::string(big_grid))}}), + 100); +} + +} // namespace +} // namespace markov_soccer +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::markov_soccer::BasicMarkovSoccerTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/matching_pennies_3p/matching_pennies_3p.cc b/scenarios/bargaining/open_spiel/open_spiel/games/matching_pennies_3p/matching_pennies_3p.cc new file mode 100644 index 0000000..e2ca6f2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/matching_pennies_3p/matching_pennies_3p.cc @@ -0,0 +1,104 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/matching_pennies_3p/matching_pennies_3p.h" + +#include + +#include "open_spiel/normal_form_game.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace matching_pennies_3p { + +constexpr const Action kHeadsActionId = 0; +constexpr const Action kTailsActionId = 1; + +namespace { +const GameType kGameType{/*short_name=*/"matching_pennies_3p", + /*long_name=*/"Three-Player Matching Pennies", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kOneShot, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/3, + /*min_num_players=*/3, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new MatchingPennies3pGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace + +MatchingPennies3pState::MatchingPennies3pState(std::shared_ptr game) + : NFGState(game), terminal_(false), returns_({0, 0, 0}) {} + +std::vector MatchingPennies3pState::LegalActions(Player player) const { + if (terminal_) + return {}; + else + return {kHeadsActionId, kTailsActionId}; +} + +std::string MatchingPennies3pState::ActionToString(Player player, + Action move_id) const { + switch (move_id) { + case kHeadsActionId: + return "Heads"; + case kTailsActionId: + return "Tails"; + default: + SpielFatalError("Unrecognized move id"); + } +} + +bool MatchingPennies3pState::IsTerminal() const { return terminal_; } + +std::vector MatchingPennies3pState::Returns() const { return returns_; } + +std::unique_ptr MatchingPennies3pState::Clone() const { + return std::unique_ptr(new MatchingPennies3pState(*this)); +} + +void MatchingPennies3pState::DoApplyActions( + const std::vector& actions) { + SPIEL_CHECK_EQ(actions.size(), NumPlayers()); + + // Player 1 gets a +1 if they match P2, -1 otherwise + returns_[0] = (actions[0] == actions[1] ? 1.0 : -1.0); + + // Player 2 gets a +1 if they match P3, -1 otherwise + returns_[1] = (actions[1] == actions[2] ? 1.0 : -1.0); + + // Player 3 gets a +1 if they don't match P1, -1 otherwise + returns_[2] = (actions[2] != actions[0] ? 1.0 : -1.0); + + terminal_ = true; +} + +MatchingPennies3pGame::MatchingPennies3pGame(const GameParameters& params) + : NormalFormGame(kGameType, params) {} + +} // namespace matching_pennies_3p +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/matching_pennies_3p/matching_pennies_3p.h b/scenarios/bargaining/open_spiel/open_spiel/games/matching_pennies_3p/matching_pennies_3p.h new file mode 100644 index 0000000..5bb9c49 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/matching_pennies_3p/matching_pennies_3p.h @@ -0,0 +1,81 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_MATCHING_PENNIES_3P_H_ +#define OPEN_SPIEL_GAMES_MATCHING_PENNIES_3P_H_ + +#include +#include +#include +#include + +#include "open_spiel/normal_form_game.h" + +// A three-player matching pennies, described in (J. S. Jordan. Three problems +// in learning mixed-strategy Nash equilibria. Games and Economic Behavior, +// 5:368–386, 1993. Also described in section 1.3 of these notes: +// http://web.stanford.edu/~rjohari/teaching/notes/336_lecture7_2007.pdf +// +// From the notes: "Each player has two actions, H or T. Player 1 wants to match +// the action of player 2; player 2 wants to match the action of player 3; and +// player 3 wants to match the opposite of the action of player 1. Each player +// receives a payoff of 1 if they match as desired, and −1 otherwise. It is +// straightforward to check that this game has a unique Nash equilibrium, where +// all players uniformly randomize. Jordan shows that this Nash equilibrium is +// locally unstable in a strong sense: for any epsilon > 0, and for almost all +// initial empirical distributions that are within (Euclidean) distance epsilon +// of the unique Nash equilibrium, discrete-time fictitious play does not +// converge to the NE; instead, it enters a limit cycle asymptotically as t -> +// infinity". + +namespace open_spiel { +namespace matching_pennies_3p { + +class MatchingPennies3pState : public NFGState { + public: + MatchingPennies3pState(std::shared_ptr game); + + std::vector LegalActions(Player player) const override; + std::string ActionToString(Player player, Action move_id) const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::unique_ptr Clone() const override; + + protected: + void DoApplyActions(const std::vector& actions) override; + + private: + bool terminal_; + std::vector returns_; +}; + +class MatchingPennies3pGame : public NormalFormGame { + public: + explicit MatchingPennies3pGame(const GameParameters& params); + + int NumDistinctActions() const override { return 2; } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new MatchingPennies3pState(shared_from_this())); + } + + int NumPlayers() const override { return 3; } + double MinUtility() const override { return -1; } + double MaxUtility() const override { return +1; } +}; + +} // namespace matching_pennies_3p +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_MatchingPennies3p_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/matching_pennies_3p/matching_pennies_3p_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/matching_pennies_3p/matching_pennies_3p_test.cc new file mode 100644 index 0000000..90eceb6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/matching_pennies_3p/matching_pennies_3p_test.cc @@ -0,0 +1,36 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace matching_pennies_3p { +namespace { + +namespace testing = open_spiel::testing; + +void BasicMatchingPennies3pTests() { + testing::LoadGameTest("matching_pennies_3p"); + testing::NoChanceOutcomesTest(*LoadGame("matching_pennies_3p")); + testing::RandomSimTest(*LoadGame("matching_pennies_3p"), 100); +} + +} // namespace +} // namespace matching_pennies_3p +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::matching_pennies_3p::BasicMatchingPennies3pTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/matrix_games/matrix_games.cc b/scenarios/bargaining/open_spiel/open_spiel/games/matrix_games/matrix_games.cc new file mode 100644 index 0000000..1acd98f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/matrix_games/matrix_games.cc @@ -0,0 +1,344 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/matrix_game.h" + +// Some sample matrix games. + +namespace open_spiel { + +using matrix_game::MatrixGame; + +// Matching Pennies. +namespace matching_pennies { +// Facts about the game +const GameType kGameType{ + /*short_name=*/"matrix_mp", + /*long_name=*/"Matching Pennies", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kOneShot, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr( + new MatrixGame(kGameType, params, {"Heads", "Tails"}, {"Heads", "Tails"}, + {1, -1, -1, 1}, {-1, 1, 1, -1})); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace matching_pennies + +// Rock, Paper, Scissors. +namespace rock_paper_scissors { +// Facts about the game +const GameType kGameType{ + /*short_name=*/"matrix_rps", + /*long_name=*/"Rock, Paper, Scissors", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kOneShot, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new MatrixGame( + kGameType, params, {"Rock", "Paper", "Scissors"}, + {"Rock", "Paper", "Scissors"}, {0, -1, 1, 1, 0, -1, -1, 1, 0}, + {0, 1, -1, -1, 0, 1, 1, -1, 0})); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace rock_paper_scissors + +// Rock, Paper, Scissors. +namespace biased_rock_paper_scissors { +// Game from Figure 7 of Branislav Bošanský, Viliam Lisý, Marc Lanctot, Jirí +// Cermák, and Mark H.M. Winands. Algorithms for computing strategies in +// two-player simultaneous move games. Artificial Intelligence, 237:1-40, 2016. +// Equilibrium is 1/16, 10/16, 5/16. +const GameType kGameType{ + /*short_name=*/"matrix_brps", + /*long_name=*/"Biased Rock, Paper, Scissors", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kOneShot, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new MatrixGame( + kGameType, params, {"Rock", "Paper", "Scissors"}, + {"Rock", "Paper", "Scissors"}, {0, -25, 50, 25, 0, -5, -50, 5, 0}, + {0, 25, -50, -25, 0, 5, 50, -5, 0})); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace biased_rock_paper_scissors + +// Rock, Paper, Scissors, Water: a variant of RPS by Martin Schmid which adds +// an action to both players that always gives, adding a pure equilibrium to the +// game. +namespace rock_paper_scissors_water { +// Facts about the game +const GameType kGameType{ + /*short_name=*/"matrix_rpsw", + /*long_name=*/"Rock, Paper, Scissors, Water", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kOneShot, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr( + new MatrixGame(kGameType, params, {"Rock", "Paper", "Scissors", "Water"}, + {"Rock", "Paper", "Scissors", "Water"}, + {0, -1, 1, 0, 1, 0, -1, 0, -1, 1, 0, 0, 0, 0, 0, 0}, + {0, 1, -1, 0, -1, 0, 1, 0, 1, -1, 0, 0, 0, 0, 0, 0})); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace rock_paper_scissors_water + +// A general-sum variant of Rock, Paper, Scissors. Often used as a +// counter-example for certain learning dynamics, such as ficitions play. +// See Chapter 7 of (Shoham and Leyton-Brown, Multiagent Systems Algorithmic, +// Game-Theoretic, and Logical Foundations, 2009) for detailed examples. +namespace shapleys_game { +// Facts about the game +const GameType kGameType{ + /*short_name=*/"matrix_shapleys_game", + /*long_name=*/"Shapley's Game", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kOneShot, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr( + new MatrixGame(kGameType, params, {"Rock", "Paper", "Scissors"}, + {"Rock", "Paper", "Scissors"}, {0, 0, 1, 1, 0, 0, 0, 1, 0}, + {0, 1, 0, 0, 0, 1, 1, 0, 0})); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace shapleys_game + +// Prisoner's Dilemma. +namespace prisoners_dilemma { +const GameType kGameType{ + /*short_name=*/"matrix_pd", + /*long_name=*/"Prisoner's Dilemma", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kOneShot, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr( + new MatrixGame(kGameType, params, {"Cooperate", "Defect"}, + {"Cooperate", "Defect"}, {5, 0, 10, 1}, {5, 10, 0, 1})); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace prisoners_dilemma + +// Stag Hunt. +namespace stag_hunt { +const GameType kGameType{ + /*short_name=*/"matrix_sh", + /*long_name=*/"Stag Hunt", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kOneShot, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr( + new MatrixGame(kGameType, params, {"Stag", "Hare"}, {"Stag", "Hare"}, + {2, 0, 1, 1}, {2, 1, 0, 1})); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace stag_hunt + +// Coordination. +namespace coordination { +const GameType kGameType{ + /*short_name=*/"matrix_coordination", + /*long_name=*/"Coordination", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kOneShot, + GameType::Utility::kIdentical, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr( + new MatrixGame(kGameType, params, {"Left", "Right"}, {"Left", "Right"}, + {1, 0, 0, 1}, {1, 0, 0, 1})); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace coordination + +// Chicken-Dare game. +// As described in https://web.stanford.edu/~saberi/lecture6.pdf +namespace chicken_dare { +const GameType kGameType{ + /*short_name=*/"matrix_cd", + /*long_name=*/"Chicken-Dare", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kOneShot, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr( + new MatrixGame(kGameType, params, {"Dare", "Chicken"}, + {"Dare", "Chicken"}, {0, 4, 1, 3}, {0, 1, 4, 3})); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace chicken_dare + +// Bach or Stravinksy game. +// https://en.wikipedia.org/wiki/Battle_of_the_sexes_(game_theory) +namespace bach_or_stravinsky { +const GameType kGameType{ + /*short_name=*/"matrix_bos", + /*long_name=*/"Bach or Stravinsky", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kOneShot, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr( + new MatrixGame(kGameType, params, {"Bach", "Stravinsky"}, + {"Bach", "Stravinsky"}, {3, 0, 0, 2}, {2, 0, 0, 3})); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace bach_or_stravinsky + + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/matrix_games/matrix_games_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/matrix_games/matrix_games_test.cc new file mode 100644 index 0000000..1609b3d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/matrix_games/matrix_games_test.cc @@ -0,0 +1,36 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/blotto/blotto.h" +#include "open_spiel/matrix_game.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace { + +namespace testing = open_spiel::testing; + +void BasicMatrixGamesTests() { + for (const std::string& name : {"matrix_mp", "matrix_pd", "matrix_rps", + "matrix_sh", "matrix_coordination"}) { + testing::LoadGameTest(name); + testing::RandomSimTest(*LoadGame(name), 100); + } +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::BasicMatrixGamesTests(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mfg/README.md b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/README.md new file mode 100644 index 0000000..585b257 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/README.md @@ -0,0 +1,50 @@ +# Mean field games + +This directory contains mean field games implemented in C++. For now, only +discrete-action, finite-state mean field games are supported. Single and +multi-population games are supported. + +For reference on mean field games as well as common environment, please refer to: + +* [Fictitious play for mean field games: Continuous time analysis and +applications", Perrin & al. 2019](https://arxiv.org/abs/2007.03458). + +* [Scaling up Mean Field Games with Online Mirror Descent, Perolat & al. 2021](https://arxiv.org/pdf/2103.00623). + + +Typically, external logic will maintain: + +* A probability distribution over game states representing an infinite number of + identical and anonymous players. + +* A finite collection of game state instances on the support of that + distribution, implementing game dynamics and rewards. + +Each game instance behaves similarly to a general-sum, perfect-information, +explicit-stochastic game, with the important difference that rewards can depend +on the whole state distribution. + +Game states go through the following stages: + +* The first game state is a chance node allowing sampling from the initial game + state distribution. + +Then game states cycle over: + +1. Decision node with normal in-game actions (e.g. {left, neutral, right}). + +2. Chance node, where one of the normal in-game action (e.g. {left, neutral, + right}) can be randomly selected. + +3. Mean field node, where we expect that external logic will have update the + state distribution and call DistributionSupport() and UpdateDistribution(). + +Multi-population mean field games are modeled as N-player games: the concept of +a `player` is conflated with the concept of `population`. Game states depend on +the population they belong to, and are instantiated with +`NewInitialStateForPopulation()`, which must be implemented in game states. +Initial distributions, game dynamics and rewards can therefore be +population-specific. `DistributionSupport()` will typically include the string +representation of states in all populations, which allows dynamics and rewards +depend on the distribution of all populations. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling.cc b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling.cc new file mode 100644 index 0000000..bccb384 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling.cc @@ -0,0 +1,282 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/mfg/crowd_modelling.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/abseil-cpp/absl/strings/substitute.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace crowd_modelling { +namespace { +inline constexpr float kEpsilon = 1e-25; + +// Facts about the game. +const GameType kGameType{/*short_name=*/"mfg_crowd_modelling", + /*long_name=*/"Mean Field Crowd Modelling", + GameType::Dynamics::kMeanField, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kPerfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/kNumPlayers, + /*min_num_players=*/kNumPlayers, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"size", GameParameter(kDefaultSize)}, + {"horizon", GameParameter(kDefaultHorizon)}}, + /*default_loadable*/true, + /*provides_factored_observation_string*/false}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new CrowdModellingGame(params)); +} + +std::string StateToString(int x, int t, Player player_id, bool is_chance_init) { + if (is_chance_init) { + return "initial"; + } + if (player_id == 0) { + return absl::Substitute("($0, $1)", x, t); + } + if (player_id == kMeanFieldPlayerId) { + return absl::Substitute("($0, $1)_a", x, t); + } + if (player_id == kChancePlayerId) { + return absl::Substitute("($0, $1)_a_mu", x, t); + } + SpielFatalError(absl::Substitute( + "Unexpected state (player_id: $0, is_chance_init: $1)", + player_id, is_chance_init)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +CrowdModellingState::CrowdModellingState(std::shared_ptr game, + int size, int horizon) + : State(game), + size_(size), + horizon_(horizon), + distribution_(size_, 1. / size_) {} + +CrowdModellingState::CrowdModellingState( + std::shared_ptr game, int size, int horizon, + Player current_player, bool is_chance_init, int x, int t, int last_action, + double return_value, const std::vector& distribution) + : State(game), + size_(size), + horizon_(horizon), + current_player_(current_player), + is_chance_init_(is_chance_init), + x_(x), + t_(t), + last_action_(last_action), + return_value_(return_value), + distribution_(distribution) {} + +std::vector CrowdModellingState::LegalActions() const { + if (IsTerminal()) return {}; + if (IsChanceNode()) return LegalChanceOutcomes(); + if (IsMeanFieldNode()) return {}; + SPIEL_CHECK_TRUE(IsPlayerNode()); + return {0, 1, 2}; +} + +ActionsAndProbs CrowdModellingState::ChanceOutcomes() const { + if (is_chance_init_) { + ActionsAndProbs outcomes; + for (int i = 0; i < size_; ++i) { + outcomes.push_back({i, 1. / size_}); + } + return outcomes; + } + return {{0, 1. / 3}, {1, 1. / 3}, {2, 1. / 3}}; +} + +void CrowdModellingState::DoApplyAction(Action action) { + SPIEL_CHECK_NE(current_player_, kMeanFieldPlayerId); + return_value_ += Rewards()[0]; + if (is_chance_init_) { + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LT(action, size_); + SPIEL_CHECK_EQ(current_player_, kChancePlayerId); + x_ = action; + is_chance_init_ = false; + current_player_ = 0; + } else if (current_player_ == kChancePlayerId) { + x_ = (x_ + kActionToMove.at(action) + size_) % size_; + ++t_; + current_player_ = kMeanFieldPlayerId; + } else { + SPIEL_CHECK_EQ(current_player_, 0); + x_ = (x_ + kActionToMove.at(action) + size_) % size_; + last_action_ = action; + current_player_ = kChancePlayerId; + } +} + +std::string CrowdModellingState::ActionToString(Player player, + Action action) const { + if (IsChanceNode() && is_chance_init_) { + return absl::Substitute("init_state=$0", action); + } + return std::to_string(kActionToMove.at(action)); +} + +std::vector CrowdModellingState::DistributionSupport() { + std::vector support; + support.reserve(size_); + for (int x = 0; x < size_; ++x) { + support.push_back(StateToString(x, t_, kMeanFieldPlayerId, false)); + } + return support; +} + +void CrowdModellingState::UpdateDistribution( + const std::vector& distribution) { + SPIEL_CHECK_EQ(current_player_, kMeanFieldPlayerId); + SPIEL_CHECK_EQ(distribution.size(), size_); + distribution_ = distribution; + current_player_ = kDefaultPlayerId; +} + +bool CrowdModellingState::IsTerminal() const { return t_ >= horizon_; } + +std::vector CrowdModellingState::Rewards() const { + if (current_player_ != 0) { + return {0.}; + } + double r_x = 1 - 1.0 * std::abs(x_ - size_ / 2) / (size_ / 2); + double r_a = -1.0 * std::abs(kActionToMove.at(last_action_)) / size_; + double r_mu = -std::log(distribution_[x_]+kEpsilon); + return {r_x + r_a + r_mu}; +} + +std::vector CrowdModellingState::Returns() const { + return {return_value_ + Rewards()[0]}; +} + +std::string CrowdModellingState::ToString() const { + return StateToString(x_, t_, current_player_, is_chance_init_); +} + +std::string CrowdModellingState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string CrowdModellingState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void CrowdModellingState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + SPIEL_CHECK_EQ(values.size(), size_ + horizon_ + 1); + SPIEL_CHECK_LT(x_, size_); + SPIEL_CHECK_GE(t_, 0); + // Allow t_ == horizon_. + SPIEL_CHECK_LE(t_, horizon_); + std::fill(values.begin(), values.end(), 0.); + if (x_ >= 0) { + values[x_] = 1.; + } + // x_ equals -1 for the initial (blank) state, don't set any + // position bit in that case. + values[size_ + t_] = 1.; +} + +std::unique_ptr CrowdModellingState::Clone() const { + return std::unique_ptr(new CrowdModellingState(*this)); +} + +std::string CrowdModellingState::Serialize() const { + std::string out = + absl::StrCat(current_player_, ",", is_chance_init_, ",", x_, ",", t_, ",", + last_action_, ",", return_value_, "\n"); + absl::StrAppend(&out, absl::StrJoin(distribution_, ",")); + return out; +} + +CrowdModellingGame::CrowdModellingGame(const GameParameters& params) + : Game(kGameType, params), + size_(ParameterValue("size", kDefaultSize)), + horizon_(ParameterValue("horizon", kDefaultHorizon)) {} + +std::vector CrowdModellingGame::ObservationTensorShape() const { + // +1 to allow for t_ == horizon. + return {size_ + horizon_ + 1}; +} + +std::unique_ptr CrowdModellingGame::DeserializeState( + const std::string& str) const { + std::vector lines = absl::StrSplit(str, '\n'); + if (lines.size() != 2) { + SpielFatalError(absl::StrCat("Expected 2 lines in serialized state, got: ", + lines.size())); + } + Player current_player; + int is_chance_init; + int x; + int t; + int last_action; + double return_value; + std::vector properties = absl::StrSplit(lines[0], ','); + if (properties.size() != 6) { + SpielFatalError( + absl::StrCat("Expected 6 properties for serialized state, got: ", + properties.size())); + } + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[0], ¤t_player)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[1], &is_chance_init)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[2], &x)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[3], &t)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[4], &last_action)); + SPIEL_CHECK_TRUE(absl::SimpleAtod(properties[5], &return_value)); + std::vector serialized_distrib = absl::StrSplit(lines[1], ','); + std::vector distribution; + distribution.reserve(serialized_distrib.size()); + for (std::string& v : serialized_distrib) { + double parsed_weight; + SPIEL_CHECK_TRUE(absl::SimpleAtod(v, &parsed_weight)); + distribution.push_back(parsed_weight); + } + return absl::make_unique( + shared_from_this(), size_, horizon_, current_player, is_chance_init, x, t, + last_action, return_value, distribution); +} + +} // namespace crowd_modelling +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling.h b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling.h new file mode 100644 index 0000000..cdd9f3b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling.h @@ -0,0 +1,152 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Mean Field Crowd Modelling Game. +// +// This game corresponds to the "Beach Bar Process" defined in section 4.2 of +// "Fictitious play for mean field games: Continuous time analysis and +// applications", Perrin & al. 2019 (https://arxiv.org/abs/2007.03458). +// +// In a nutshell, each representative agent evolves on a circle, with {left, +// neutral, right} actions. The reward includes the proximity to an imagined bar +// placed at a fixed location in the circle, and penalties for moving and for +// being in a crowded place. + +#ifndef OPEN_SPIEL_GAMES_MFG_CROWD_MODELLING_H_ +#define OPEN_SPIEL_GAMES_MFG_CROWD_MODELLING_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/memory/memory.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace crowd_modelling { + +inline constexpr int kNumPlayers = 1; +inline constexpr int kDefaultHorizon = 10; +inline constexpr int kDefaultSize = 10; +inline constexpr int kNumActions = 3; +inline constexpr int kNumChanceActions = 3; +// Action that leads to no displacement on the circle of the game. +inline constexpr int kNeutralAction = 1; + +// Game state. +// The high-level state transitions are as follows: +// - First game state is a chance node where the initial position on the +// circle is selected. +// Then we cycle over: +// 1. Decision node with actions {left, neutral, right}, represented by integers +// 0, 1, 2. This moves the position on the circle. +// 2. Mean field node, where we expect that external logic will call +// DistributionSupport() and UpdateDistribution(). +// 3. Chance node, where one of {left, neutral, right} actions is externally +// selected. +// The game stops after a non-initial chance node when the horizon is reached. +class CrowdModellingState : public State { + public: + CrowdModellingState(std::shared_ptr game, int size, int horizon); + CrowdModellingState(std::shared_ptr game, int size, int horizon, + Player current_player, bool is_chance_init, int x, int t, + int last_action, double return_value, + const std::vector& distribution); + + CrowdModellingState(const CrowdModellingState&) = default; + CrowdModellingState& operator=(const CrowdModellingState&) = default; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : current_player_; + } + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Rewards() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + ActionsAndProbs ChanceOutcomes() const override; + + std::vector DistributionSupport() override; + void UpdateDistribution(const std::vector& distribution) override; + std::vector Distribution() const { return distribution_; } + + std::string Serialize() const override; + + protected: + void DoApplyAction(Action action) override; + + private: + // Size of the circle. + const int size_ = -1; + const int horizon_ = -1; + Player current_player_ = kChancePlayerId; + bool is_chance_init_ = true; + // Position on the circle [0, size_) when valid. + int x_ = -1; + // Current time, in [0, horizon_]. + int t_ = 0; + int last_action_ = kNeutralAction; + double return_value_ = 0.; + + // kActionToMove[action] is the displacement on the circle of the game for + // 'action'. + static constexpr std::array kActionToMove = {-1, 0, 1}; + // Represents the current probability distribution over game states. + std::vector distribution_; +}; + +class CrowdModellingGame : public Game { + public: + explicit CrowdModellingGame(const GameParameters& params); + int NumDistinctActions() const override { return kNumActions; } + std::unique_ptr NewInitialState() const override { + return absl::make_unique(shared_from_this(), size_, + horizon_); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { + return -std::numeric_limits::infinity(); + } + double MaxUtility() const override { + return std::numeric_limits::infinity(); + } + int MaxGameLength() const override { return horizon_; } + int MaxChanceNodesInHistory() const override { + // + 1 to account for the initial extra chance node. + return horizon_ + 1; + } + std::vector ObservationTensorShape() const override; + int MaxChanceOutcomes() const override { + return std::max(size_, kNumChanceActions); + } + std::unique_ptr DeserializeState( + const std::string& str) const override; + + private: + const int size_; + const int horizon_; +}; + +} // namespace crowd_modelling +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_MFG_CROWD_MODELLING_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling_2d.cc b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling_2d.cc new file mode 100644 index 0000000..f157902 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling_2d.cc @@ -0,0 +1,575 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/mfg/crowd_modelling_2d.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/ascii.h" +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/abseil-cpp/absl/strings/strip.h" +#include "open_spiel/abseil-cpp/absl/strings/substitute.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace crowd_modelling_2d { + +std::vector ProcessStringParam( + const std::string& string_param_str, int max_size) { + // ProcessStringParam takes a parameter string and split it is a sequence of + // substring. Example: + // "" -> {} + // "[0|0;0|1]" -> {"0|0", "0|1"} + // "[0.5;0.5]" -> {"0.5", "0.5"} + absl::string_view string_param = absl::StripAsciiWhitespace(string_param_str); + SPIEL_CHECK_TRUE(absl::ConsumePrefix(&string_param, "[")); + SPIEL_CHECK_TRUE(absl::ConsumeSuffix(&string_param, "]")); + + std::vector split_string_list; + if (!string_param.empty()) { + split_string_list = absl::StrSplit(string_param, ';'); + } + SPIEL_CHECK_GE(split_string_list.size(), 0); + SPIEL_CHECK_LE(split_string_list.size(), max_size * max_size); + return split_string_list; +} + +namespace { +inline constexpr float kEpsilon = 1e-25; + +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"mfg_crowd_modelling_2d", + /*long_name=*/"Mean Field Crowd Modelling 2D", + GameType::Dynamics::kMeanField, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kPerfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/kNumPlayers, + /*min_num_players=*/kNumPlayers, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + {"size", GameParameter(kDefaultSize)}, + {"horizon", GameParameter(kDefaultHorizon)}, + {"only_distribution_reward", + GameParameter(kDefaultOnlyDistributionReward)}, + {"forbidden_states", GameParameter(kDefaultForbiddenStates)}, + {"initial_distribution", GameParameter(kDefaultInitialDistribution)}, + {"initial_distribution_value", + GameParameter(kDefaultInitialDistributionValue)}, + {"positional_reward", GameParameter(kDefaultPositionalReward)}, + {"positional_reward_value", + GameParameter(kDefaultPositionalRewardValue)}, + {"with_congestion", GameParameter(kDefaultWithCongestion)}, + {"noise_intensity", GameParameter(kDefaultNoiseIntensity)}, + {"crowd_aversion_coef", GameParameter(kDefaultCrowdAversionCoef)}, + }, + /*default_loadable*/ true, + /*provides_factored_observation_string*/ false}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new CrowdModelling2dGame(params)); +} + +std::string StateToString(int x, int y, int t, Player player_id, + bool is_chance_init) { + if (is_chance_init) { + return "initial"; + } + if (player_id == 0) { + return absl::Substitute("($0, $1, $2)", x, y, t); + } + if (player_id == kMeanFieldPlayerId) { + return absl::Substitute("($0, $1, $2)_a", x, y, t); + } + if (player_id == kChancePlayerId) { + return absl::Substitute("($0, $1, $2)_a_mu", x, y, t); + } + SpielFatalError( + absl::Substitute("Unexpected state (player_id: $0, is_chance_init: $1)", + player_id, is_chance_init)); +} + +std::vector> StringListToPairs( + std::vector strings) { + // Transforms a list of strings and returns a list of pairs + // {} -> {} + // {"0|0", "0|1"} -> {(0, 0), (0, 1)} + std::vector> pairs; + for (int i = 0; i < strings.size(); ++i) { + std::vector xy = absl::StrSplit(strings[i], '|'); + int xx; + int yy; + SPIEL_CHECK_TRUE(absl::SimpleAtoi(xy[0], &xx)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(xy[1], &yy)); + pairs.push_back({xx, yy}); + } + return pairs; +} +std::vector StringListToFloats(std::vector strings) { + // Transforms a list of strings and returns a list of float + // {} -> {} + // {"0.5","0.5"} -> {0.5, 0.5} + std::vector floats; + floats.reserve(strings.size()); + for (int i = 0; i < strings.size(); ++i) { + float ff; + SPIEL_CHECK_TRUE(absl::SimpleAtof(strings[i], &ff)); + floats.push_back(ff); + } + return floats; +} + +int GetX(int i, int size) { return i % size; } + +int GetY(int i, int size) { return i / size; } + +int MergeXY(int xx, int yy, int size) { + SPIEL_CHECK_GE(xx, 0); + SPIEL_CHECK_LE(xx, size - 1); + SPIEL_CHECK_GE(yy, 0); + SPIEL_CHECK_LE(yy, size - 1); + return yy + xx * size; +} + +bool ComparisonPair(const std::pair& a, + const std::pair& b) { + return a.first < b.first; +} + +std::vector StringListToInts(std::vector strings, + int size) { + // Transforms a list of strings and returns a list of pairs + // {} -> {} + // {"0|0", "0|1"} -> {(0, 0), (0, 1)} + std::vector ints; + for (int i = 0; i < strings.size(); ++i) { + std::vector xy = absl::StrSplit(strings[i], '|'); + int xx; + int yy; + SPIEL_CHECK_TRUE(absl::SimpleAtoi(xy[0], &xx)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(xy[1], &yy)); + ints.push_back(MergeXY(xx, yy, size)); + } + return ints; +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +CrowdModelling2dState::CrowdModelling2dState( + std::shared_ptr game, int size, int horizon, + bool only_distribution_reward, const std::string& forbidden_states, + const std::string& initial_distribution, + const std::string& initial_distribution_value, + const std::string& positional_reward, + const std::string& positional_reward_value, bool with_congestion, + double noise_intensity, double crowd_aversion_coef) + : State(game), + size_(size), + horizon_(horizon), + only_distribution_reward_(only_distribution_reward), + with_congestion_(with_congestion), + noise_intensity_(noise_intensity), + crowd_aversion_coef_(crowd_aversion_coef), + distribution_(size_ * size_, 1. / (size_ * size_)) { + std::vector initial_distribution_list = + ProcessStringParam(initial_distribution, size_); + std::vector initial_distribution_value_list = + ProcessStringParam(initial_distribution_value, size_); + SPIEL_CHECK_EQ(initial_distribution_list.size(), + initial_distribution_value_list.size()); + + auto initial_distribution_pair = StringListToPairs(initial_distribution_list); + auto initial_distribution_value_f = + StringListToFloats(initial_distribution_value_list); + + int initial_distribution_action_prob_size = initial_distribution_list.size(); + + if (initial_distribution_action_prob_size == 0) { + for (int i = 0; i < size_ * size_; ++i) { + initial_distribution_action_prob_.push_back({i, 1. / (size_ * size_)}); + } + } else { + for (int i = 0; i < initial_distribution_action_prob_size; ++i) { + int kk = MergeXY(initial_distribution_pair[i].first, + initial_distribution_pair[i].second, size_); + initial_distribution_action_prob_.push_back( + {kk, initial_distribution_value_f[i]}); + } + } + + std::sort(initial_distribution_action_prob_.begin(), + initial_distribution_action_prob_.end(), ComparisonPair); + + std::vector forbidden_states_list = + ProcessStringParam(forbidden_states, size_); + forbidden_states_xy_ = StringListToPairs(forbidden_states_list); + for (const auto& forbidden_state_xy : forbidden_states_xy_) { + SPIEL_CHECK_GE(forbidden_state_xy.first, 0); + SPIEL_CHECK_LE(forbidden_state_xy.first, size_ - 1); + SPIEL_CHECK_GE(forbidden_state_xy.second, 0); + SPIEL_CHECK_LE(forbidden_state_xy.second, size_ - 1); + } + + std::vector positional_reward_list = + ProcessStringParam(positional_reward, size_); + std::vector positional_reward_value_list = + ProcessStringParam(positional_reward_value, size_); + positional_reward_xy_ = StringListToPairs(positional_reward_list); + positional_reward_value_ = StringListToFloats(positional_reward_value_list); + // There should be a reward for each positional reward XY pair. + SPIEL_CHECK_EQ(positional_reward_xy_.size(), positional_reward_value_.size()); + for (const auto& positional_reward_xy : positional_reward_xy_) { + SPIEL_CHECK_GE(positional_reward_xy.first, 0); + SPIEL_CHECK_LE(positional_reward_xy.first, size_ - 1); + SPIEL_CHECK_GE(positional_reward_xy.second, 0); + SPIEL_CHECK_LE(positional_reward_xy.second, size_ - 1); + } + + if (positional_reward_xy_.empty()) { + // Use the center point as the reward position. + positional_reward_xy_.push_back({size_ / 2, size_ / 2}); + positional_reward_value_.push_back(1.0); + } + + // Forbid to the initial distribution and the forbidden states to overlap. + auto forbidden_states_int = StringListToInts(forbidden_states_list, size_); + auto initial_distribution_int = + StringListToInts(initial_distribution_list, size_); + std::vector intersection; + std::sort(forbidden_states_int.begin(), forbidden_states_int.end()); + std::sort(initial_distribution_int.begin(), initial_distribution_int.end()); + + std::set_intersection( + forbidden_states_int.begin(), forbidden_states_int.end(), + initial_distribution_int.begin(), initial_distribution_int.end(), + back_inserter(intersection)); + SPIEL_CHECK_EQ(intersection.size(), 0); +} + +CrowdModelling2dState::CrowdModelling2dState( + std::shared_ptr game, int size, int horizon, + bool only_distribution_reward, const std::string& forbidden_states, + const std::string& initial_distribution, + const std::string& initial_distribution_value, + const std::string& positional_reward, + const std::string& positional_reward_value, Player current_player, + bool is_chance_init, int x, int y, int t, int last_action, + double return_value, const std::vector& distribution, + bool with_congestion, double noise_intensity, double crowd_aversion_coef) + : CrowdModelling2dState(game, size, horizon, only_distribution_reward, + forbidden_states, initial_distribution, + initial_distribution_value, positional_reward, + positional_reward_value, with_congestion, + noise_intensity, crowd_aversion_coef) { + current_player_ = current_player; + is_chance_init_ = is_chance_init; + x_ = x; + y_ = y; + t_ = t; + last_action_ = last_action; + return_value_ = return_value; +} + +std::vector CrowdModelling2dState::LegalPlayerActions() const { + std::vector legal_actions; + legal_actions.reserve(kNumActions); + for (Action action = 0; action < kNumActions; ++action) { + if (!IsForbidden(action)) { + legal_actions.push_back(action); + } + } + return legal_actions; +} + +std::vector CrowdModelling2dState::LegalActions() const { + if (IsTerminal()) return {}; + if (IsChanceNode()) return LegalChanceOutcomes(); + if (IsMeanFieldNode()) return {}; + SPIEL_CHECK_TRUE(IsPlayerNode()); + return LegalPlayerActions(); +} + +ActionsAndProbs CrowdModelling2dState::ChanceOutcomes() const { + if (is_chance_init_) { + return initial_distribution_action_prob_; + } + const std::vector legal_actions = LegalPlayerActions(); + ActionsAndProbs outcomes; + if (legal_actions.empty()) { + return outcomes; + } + // Neutral action will always be present in the legal actions. + const double prob = noise_intensity_ / legal_actions.size(); + outcomes.reserve(legal_actions.size()); + for (const Action action : legal_actions) { + if (action == kNeutralAction) { + outcomes.emplace_back(action, 1.0 - noise_intensity_ + prob); + } else { + outcomes.emplace_back(action, prob); + } + } + return outcomes; +} + +bool CrowdModelling2dState::IsForbidden(Action action) const { + int xx = (x_ + kActionToMoveX.at(action) + size_) % size_; + int yy = (y_ + kActionToMoveY.at(action) + size_) % size_; + return IsForbiddenPosition(xx, yy); +} + +bool CrowdModelling2dState::IsForbiddenPosition(int x, int y) const { + for (const auto& forbidden_xy : forbidden_states_xy_) { + if (x == forbidden_xy.first && y == forbidden_xy.second) { + return true; + } + } + return false; +} + +void CrowdModelling2dState::DoApplyAction(Action action) { + SPIEL_CHECK_NE(current_player_, kMeanFieldPlayerId); + return_value_ += Rewards()[0]; + int xx; + int yy; + // Compute the next state + if (is_chance_init_) { + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LT(action, (size_ * size_)); + SPIEL_CHECK_EQ(current_player_, kChancePlayerId); + xx = GetX(action, size_); + yy = GetY(action, size_); + is_chance_init_ = false; + current_player_ = 0; + } else if (current_player_ == kChancePlayerId) { + xx = (x_ + kActionToMoveX.at(action) + size_) % size_; + yy = (y_ + kActionToMoveY.at(action) + size_) % size_; + ++t_; + current_player_ = kMeanFieldPlayerId; + } else { + SPIEL_CHECK_EQ(current_player_, 0); + xx = (x_ + kActionToMoveX.at(action) + size_) % size_; + yy = (y_ + kActionToMoveY.at(action) + size_) % size_; + last_action_ = action; + current_player_ = kChancePlayerId; + } + // Assign the new (x,y) position if it isn't forbidden. + if (!IsForbiddenPosition(xx, yy) || is_chance_init_) { + x_ = xx; + y_ = yy; + } +} + +std::string CrowdModelling2dState::ActionToString(Player player, + Action action) const { + if (IsChanceNode() && is_chance_init_) { + return absl::Substitute("init_state=$0", action); + } + return absl::Substitute("($0,$1)", kActionToMoveX.at(action), + kActionToMoveY.at(action)); +} + +std::vector CrowdModelling2dState::DistributionSupport() { + std::vector support; + support.reserve(size_ * size_); + for (int x = 0; x < size_; ++x) { + for (int y = 0; y < size_; ++y) { + support.push_back(StateToString(x, y, t_, kMeanFieldPlayerId, false)); + } + } + return support; +} + +void CrowdModelling2dState::UpdateDistribution( + const std::vector& distribution) { + SPIEL_CHECK_EQ(current_player_, kMeanFieldPlayerId); + SPIEL_CHECK_EQ(distribution.size(), size_ * size_); + distribution_ = distribution; + current_player_ = kDefaultPlayerId; +} + +bool CrowdModelling2dState::IsTerminal() const { return t_ >= horizon_; } + +std::vector CrowdModelling2dState::Rewards() const { + if (current_player_ != 0) { + return {0.}; + } + // Distribution-based reward + double r_mu = -crowd_aversion_coef_ * + std::log(distribution_[MergeXY(x_, y_, size_)] + kEpsilon); + if (only_distribution_reward_) { + return {r_mu}; + } + // Positional reward + double r_x = 1; + double r_y = 1; + for (int i = 0; i < positional_reward_xy_.size(); ++i) { + double val_r = 2.0 * positional_reward_value_[i] / size_; + r_x -= val_r * std::abs(x_ - positional_reward_xy_[i].first); + r_y -= val_r * std::abs(y_ - positional_reward_xy_[i].second); + } + double r_a = -1.0 * + (std::abs(kActionToMoveX.at(last_action_)) + + std::abs(kActionToMoveY.at(last_action_))) / + size_; + if (with_congestion_) { + // Congestion effect: higher penalty when moving in a high-density area + r_a *= distribution_[MergeXY(x_, y_, size_)]; + } + return {r_x + r_y + r_a + r_mu}; +} + +std::vector CrowdModelling2dState::Returns() const { + return {return_value_ + Rewards()[0]}; +} + +std::string CrowdModelling2dState::ToString() const { + return StateToString(x_, y_, t_, current_player_, is_chance_init_); +} + +std::string CrowdModelling2dState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string CrowdModelling2dState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void CrowdModelling2dState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + SPIEL_CHECK_EQ(values.size(), 2 * size_ + horizon_ + 1); + SPIEL_CHECK_LT(x_, size_); + SPIEL_CHECK_LT(y_, size_); + SPIEL_CHECK_GE(t_, 0); + // Allow t_ == horizon_. + SPIEL_CHECK_LE(t_, horizon_); + std::fill(values.begin(), values.end(), 0.); + if (x_ >= 0 && y_ >= 0) { + values[x_] = 1.; + values[y_ + size_] = 1.; + } else { + // x_ and y_ equal -1 for the initial (blank) state, don't set any position + // bit in that case. + SPIEL_CHECK_EQ(x_, -1); + SPIEL_CHECK_EQ(y_, -1); + } + values[2 * size_ + t_] = 1.; +} + +std::unique_ptr CrowdModelling2dState::Clone() const { + return std::unique_ptr(new CrowdModelling2dState(*this)); +} + +std::string CrowdModelling2dState::Serialize() const { + std::string out = + absl::StrCat(current_player_, ",", is_chance_init_, ",", x_, ",", y_, ",", + t_, ",", last_action_, ",", return_value_, "\n"); + absl::StrAppend(&out, absl::StrJoin(distribution_, ",")); + return out; +} + +CrowdModelling2dGame::CrowdModelling2dGame(const GameParameters& params) + : Game(kGameType, params), + size_(ParameterValue("size", kDefaultSize)), + horizon_(ParameterValue("horizon", kDefaultHorizon)), + only_distribution_reward_(ParameterValue( + "only_distribution_reward", kDefaultOnlyDistributionReward)), + forbidden_states_(ParameterValue("forbidden_states", + kDefaultForbiddenStates)), + initial_distribution_(ParameterValue( + "initial_distribution", kDefaultInitialDistribution)), + initial_distribution_value_(ParameterValue( + "initial_distribution_value", kDefaultInitialDistributionValue)), + positional_reward_(ParameterValue("positional_reward", + kDefaultPositionalReward)), + positional_reward_value_(ParameterValue( + "positional_reward_value", kDefaultPositionalRewardValue)), + with_congestion_( + ParameterValue("with_congestion", kDefaultWithCongestion)), + noise_intensity_( + ParameterValue("noise_intensity", kDefaultNoiseIntensity)), + crowd_aversion_coef_(ParameterValue("crowd_aversion_coef", + kDefaultCrowdAversionCoef)) {} + +std::vector CrowdModelling2dGame::ObservationTensorShape() const { + // +1 to allow for t_ == horizon. + return {2 * ParameterValue("size") + ParameterValue("horizon") + 1}; +} + +std::unique_ptr CrowdModelling2dGame::DeserializeState( + const std::string& str) const { + std::vector lines = absl::StrSplit(str, '\n'); + if (lines.size() != 2) { + SpielFatalError(absl::StrCat("Expected 2 lines in serialized state, got: ", + lines.size())); + } + Player current_player; + int is_chance_init, x, y, t, last_action; + double return_value; + std::vector distribution; + + std::vector properties = absl::StrSplit(lines[0], ','); + if (properties.size() != 7) { + SpielFatalError( + absl::StrCat("Expected 7 properties for serialized state, got: ", + properties.size())); + } + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[0], ¤t_player)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[1], &is_chance_init)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[2], &x)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[3], &y)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[4], &t)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[5], &last_action)); + SPIEL_CHECK_TRUE(absl::SimpleAtod(properties[6], &return_value)); + std::vector serialized_distrib = absl::StrSplit(lines[1], ','); + distribution.reserve(serialized_distrib.size()); + for (std::string& v : serialized_distrib) { + double parsed_weight; + SPIEL_CHECK_TRUE(absl::SimpleAtod(v, &parsed_weight)); + distribution.push_back(parsed_weight); + } + return absl::make_unique( + shared_from_this(), size_, horizon_, only_distribution_reward_, + forbidden_states_, initial_distribution_, initial_distribution_value_, + positional_reward_, positional_reward_value_, current_player, + is_chance_init, x, y, t, last_action, return_value, distribution, + with_congestion_, noise_intensity_, crowd_aversion_coef_); +} + +} // namespace crowd_modelling_2d +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling_2d.h b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling_2d.h new file mode 100644 index 0000000..d040200 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling_2d.h @@ -0,0 +1,221 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Mean Field Crowd Modelling Game in 2d. +// +// This game corresponds to a 2D "Beach Bar Process" defined in section 4.2 of +// "Fictitious play for mean field games: Continuous time analysis and +// applications", Perrin & al. 2019 (https://arxiv.org/abs/2007.03458). +// +// In a nutshell, each representative agent evolves on a 2d torus, with {down, +// left, neutral, right, up} actions. The reward includes the proximity to an +// imagined bar placed at a fixed location in the torus, and penalties for +// moving and for being in a crowded place. + +#ifndef OPEN_SPIEL_GAMES_MFG_CROWD_MODELLING_H_ +#define OPEN_SPIEL_GAMES_MFG_CROWD_MODELLING_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/memory/memory.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace crowd_modelling_2d { + +inline constexpr int kNumPlayers = 1; +inline constexpr int kDefaultHorizon = 10; +inline constexpr int kDefaultSize = 10; +inline constexpr int kNumActions = 5; +inline constexpr int kNumChanceActions = 5; +inline constexpr bool kDefaultOnlyDistributionReward = false; +inline constexpr bool kDefaultWithCongestion = false; +// Noise intensity is distributed uniformly among the legal actions in the +// chance node. Neutral action will get an additional probability of 1 - +// noise_intensity. +inline constexpr double kDefaultNoiseIntensity = 1.0; +inline constexpr double kDefaultCrowdAversionCoef = 1.0; + +// Example string format: "[0|0;0|1]" +inline constexpr const char* kDefaultForbiddenStates = "[]"; +// Example string format: "[0|2;0|3]" +inline constexpr const char* kDefaultInitialDistribution = "[]"; +// Example string format: "[0.5;0.5]" +inline constexpr const char* kDefaultInitialDistributionValue = "[]"; +// Example string format: "[0|2;0|3]" +inline constexpr const char* kDefaultPositionalReward = "[]"; +// Example string format: "[1.5;2.5]" +inline constexpr const char* kDefaultPositionalRewardValue = "[]"; + +// Action that leads to no displacement on the torus of the game. +inline constexpr int kNeutralAction = 2; + +std::vector ProcessStringParam( + const std::string& string_param_str, int max_size); + +// Game state. +// The high-level state transitions are as follows: +// - First game state is a chance node where the initial position on the +// torus is selected. +// Then we cycle over: +// 1. Decision node with actions {down, left, neutral, right, up}, represented +// by integers 0, 1, 2, 3, 4. This moves the position on the torus. +// 2. Mean field node, where we expect that external logic will call +// DistributionSupport() and UpdateDistribution(). +// 3. Chance node, where one of {down, left, neutral, right, up} actions is +// externally selected. +// The game stops after a non-initial chance node when the horizon is reached. +class CrowdModelling2dState : public State { + public: + // forbidden_states, initial_distribution and positional_reward are formated + // like '[int|int;...;int|int]'. Example : "[]" or "[0|0;0|1]". + // initial_distribution_value and positional_reward_value are formated like + // '[float;...;float]'. Example : "[]" or "[0.5;0.5]" + CrowdModelling2dState(std::shared_ptr game, int size, int horizon, + bool only_distribution_reward, + const std::string& forbidden_states, + const std::string& initial_distribution, + const std::string& initial_distribution_value, + const std::string& positional_reward, + const std::string& positional_reward_value, + bool with_congestion, double noise_intensity, + double crowd_aversion_coef); + CrowdModelling2dState(std::shared_ptr game, int size, int horizon, + bool only_distribution_reward, + const std::string& forbidden_states, + const std::string& initial_distribution, + const std::string& initial_distribution_value, + const std::string& positional_reward, + const std::string& positional_reward_value, + Player current_player, bool is_chance_init_, int x, + int y, int t, int last_action, double return_value, + const std::vector& distribution, + bool with_congestion, double noise_intensity, + double crowd_aversion_coef); + + CrowdModelling2dState(const CrowdModelling2dState&) = default; + CrowdModelling2dState& operator=(const CrowdModelling2dState&) = default; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : current_player_; + } + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Rewards() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + ActionsAndProbs ChanceOutcomes() const override; + + std::vector DistributionSupport() override; + void UpdateDistribution(const std::vector& distribution) override; + + std::string Serialize() const override; + + protected: + void DoApplyAction(Action action) override; + // Returns true if the specified action leads to a forbidden position. + bool IsForbidden(Action action) const; + // Returns true if the specified position is forbidden. + bool IsForbiddenPosition(int x, int y) const; + // Returns the list if legal actions for the player. + std::vector LegalPlayerActions() const; + + private: + Player current_player_ = kChancePlayerId; + bool is_chance_init_ = true; + // 2D positions on the torus [0, size_) x [0, size_). + int x_ = -1; + int y_ = -1; + // Current time, in [0, horizon_]. + int t_ = 0; + // Size of the torus. + const int size_ = -1; + const int horizon_ = -1; + const bool only_distribution_reward_ = false; + ActionsAndProbs initial_distribution_action_prob_; + std::vector> forbidden_states_xy_; + std::vector> positional_reward_xy_; + std::vector positional_reward_value_; + int last_action_ = kNeutralAction; + double return_value_ = 0.; + bool with_congestion_; + double noise_intensity_; + double crowd_aversion_coef_; + + // kActionToMoveX[action] and kActionToMoveY[action] is the displacement on + // the torus of the game for 'action'. + static constexpr std::array kActionToMoveX = {0, -1, 0, 1, 0}; + static constexpr std::array kActionToMoveY = {-1, 0, 0, 0, 1}; + // Represents the current probability distribution over game states. + std::vector distribution_; +}; + +class CrowdModelling2dGame : public Game { + public: + explicit CrowdModelling2dGame(const GameParameters& params); + int NumDistinctActions() const override { return kNumActions; } + std::unique_ptr NewInitialState() const override { + return absl::make_unique( + shared_from_this(), size_, horizon_, only_distribution_reward_, + forbidden_states_, initial_distribution_, initial_distribution_value_, + positional_reward_, positional_reward_value_, with_congestion_, + noise_intensity_, crowd_aversion_coef_); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { + return -std::numeric_limits::infinity(); + } + double MaxUtility() const override { + return std::numeric_limits::infinity(); + } + int MaxGameLength() const override { return horizon_; } + int MaxChanceNodesInHistory() const override { + // + 1 to account for the initial extra chance node. + return horizon_ + 1; + } + std::vector ObservationTensorShape() const override; + int MaxChanceOutcomes() const override { + return std::max(size_ * size_, kNumChanceActions); + } + std::unique_ptr DeserializeState( + const std::string& str) const override; + + private: + const int size_; + const int horizon_; + const bool only_distribution_reward_; + std::string forbidden_states_; // Default "", example "[0|0;0|1]" + std::string initial_distribution_; // Default "", example "[0|2;0|3]" + std::string initial_distribution_value_; // Default "", example "[0.5;0.5]" + std::string positional_reward_; // Default "", example "[0|2;0|3]" + std::string positional_reward_value_; // Default "", example "[1.5;2.5]" + const bool with_congestion_; + const double noise_intensity_; + const double crowd_aversion_coef_; +}; + +} // namespace crowd_modelling_2d +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_MFG_CROWD_MODELLING_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling_2d_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling_2d_test.cc new file mode 100644 index 0000000..21510fd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling_2d_test.cc @@ -0,0 +1,184 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/mfg/crowd_modelling_2d.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/uniform_int_distribution.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace crowd_modelling_2d { +namespace { + +namespace testing = open_spiel::testing; + +void TestLoad() { + testing::LoadGameTest("mfg_crowd_modelling_2d"); + auto game = LoadGame("mfg_crowd_modelling_2d"); + SPIEL_CHECK_EQ(game->GetType().dynamics, GameType::Dynamics::kMeanField); + auto state = game->NewInitialState(); + auto cloned = state->Clone(); + SPIEL_CHECK_EQ(state->ToString(), cloned->ToString()); + testing::ChanceOutcomesTest(*game); +} + +void TestLoadWithParams() { + auto game = LoadGame( + "mfg_crowd_modelling_2d(size=100,horizon=1000," + "only_distribution_reward=true)"); + auto state = game->NewInitialState(); + SPIEL_CHECK_EQ(game->ObservationTensorShape()[0], 1000 + 2 * 100 + 1); +} + +void TestLoadWithParams2() { + auto game = LoadGame( + "mfg_crowd_modelling_2d(size=100,horizon=1000,forbidden_states=[0|0;0|1]" + ",initial_distribution=[0|2;0|3],initial_distribution_value=[0.5;0.5]" + ",positional_reward=[1|3;1|4],positional_reward_value=[0.2;0.8]" + ",noise_intensity=0.5,crowd_aversion_coef=0.4,with_congestion=true" + ")"); + auto state = game->NewInitialState(); + SPIEL_CHECK_EQ(game->ObservationTensorShape()[0], 1000 + 2 * 100 + 1); +} + +void TestRandomPlay() { + testing::LoadGameTest("mfg_crowd_modelling_2d(size=10,horizon=20)"); + testing::RandomSimTest( + *LoadGame("mfg_crowd_modelling_2d(size=10,horizon=20)"), 3); +} + +void TestReward() { + auto game = LoadGame("mfg_crowd_modelling_2d(size=10,horizon=20)"); + auto state = game->NewInitialState(); + SPIEL_CHECK_EQ(state->CurrentPlayer(), kChancePlayerId); + state->ApplyAction(55); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + // This expected reward assumes that the game is initialized with + // a uniform state distribution. + SPIEL_CHECK_FLOAT_EQ(state->Rewards()[0], 6.60517); + SPIEL_CHECK_FLOAT_EQ(state->Returns()[0], 6.60517); + + state->ApplyAction(2); + SPIEL_CHECK_EQ(state->CurrentPlayer(), kChancePlayerId); + SPIEL_CHECK_FLOAT_EQ(state->Rewards()[0], 0.); + SPIEL_CHECK_FLOAT_EQ(state->Returns()[0], 6.60517); +} + +void TestDistRewardOnly() { + auto game = LoadGame( + "mfg_crowd_modelling_2d(size=10,horizon=20" + ",only_distribution_reward=true)"); + auto state = game->NewInitialState(); + SPIEL_CHECK_EQ(state->CurrentPlayer(), kChancePlayerId); + state->ApplyAction(55); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_FLOAT_EQ(state->Rewards()[0], 4.60517); + SPIEL_CHECK_FLOAT_EQ(state->Returns()[0], 4.60517); +} + +void TestPositionalReward() { + auto game = LoadGame( + "mfg_crowd_modelling_2d(size=10,horizon=20" + ",positional_reward=[1|2;2|2],positional_reward_value=[0.5;0.5])"); + auto state = game->NewInitialState(); + SPIEL_CHECK_EQ(state->CurrentPlayer(), kChancePlayerId); + state->ApplyAction(55); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_FLOAT_EQ(state->Rewards()[0], 5.30517); + SPIEL_CHECK_FLOAT_EQ(state->Returns()[0], 5.30517); +} + +void TestProcess() { + auto split_string_list0 = ProcessStringParam("[]", 5); + SPIEL_CHECK_EQ(split_string_list0.size(), 0); + auto split_string_list1 = ProcessStringParam("[0|0;0|1]", 5); + SPIEL_CHECK_EQ(split_string_list1.size(), 2); + auto split_string_list2 = ProcessStringParam("[0|2;0|3;0|4]", 5); + SPIEL_CHECK_EQ(split_string_list2.size(), 3); + auto split_string_list3 = ProcessStringParam("[0.5;0.5]", 5); + SPIEL_CHECK_EQ(split_string_list3.size(), 2); +} + +void TestLegalActions() { + auto game = LoadGame( + "mfg_crowd_modelling_2d(size=5,horizon=10,forbidden_states=[0|0;0|1;1|0]" + ",initial_distribution=[1|1],initial_distribution_value=[1.0])"); + auto state = game->NewInitialState(); + SPIEL_CHECK_EQ(state->CurrentPlayer(), kChancePlayerId); + // Legal action will be the state in the initial distribution. + SPIEL_CHECK_EQ(state->LegalActions(), std::vector({6})); + state->ApplyAction(6); + // Legal actions are moving right, down or no movement. + SPIEL_CHECK_EQ(state->LegalActions(), std::vector({2, 3, 4})); + // Go right. + state->ApplyAction(3); + // Chance node. No forbidden states around and all actions are legal. + SPIEL_CHECK_EQ(state->LegalActions(), std::vector({0, 1, 2, 3, 4})); + // Go left. + state->ApplyAction(1); + // Mean field node; legal actions will be empty. + SPIEL_CHECK_TRUE(state->LegalActions().empty()); + std::vector dist(25); + state->UpdateDistribution(dist); + // Back to starting point. + SPIEL_CHECK_EQ(state->LegalActions(), std::vector({2, 3, 4})); + // Stay in the same position. + state->ApplyAction(2); + // Chance node. The legal actions should be the same. + SPIEL_CHECK_EQ(state->LegalActions(), std::vector({2, 3, 4})); +} + +void TestNoiseIntensity() { + // Same as the setting in TestLegalActions() test above, except the noise + // intensity. + auto game = LoadGame( + "mfg_crowd_modelling_2d(size=5,horizon=10,forbidden_states=[0|0;0|1;1|0]" + ",initial_distribution=[1|1],initial_distribution_value=[1.0]" + ",noise_intensity=0.5)"); + auto state = game->NewInitialState(); + SPIEL_CHECK_EQ(state->CurrentPlayer(), kChancePlayerId); + SPIEL_CHECK_EQ(state->LegalActions(), std::vector({6})); + state->ApplyAction(6); + SPIEL_CHECK_EQ(state->LegalActions(), std::vector({2, 3, 4})); + state->ApplyAction(3); + // Now we are at a chance node. + SPIEL_CHECK_EQ(state->LegalActions(), std::vector({0, 1, 2, 3, 4})); + // Neutral action should have a probability of 0.5+0.1 and others 0.1, i.e. + // 0.5 / 5. + ActionsAndProbs expected_outcomes( + {{0, 0.1}, {1, 0.1}, {2, 0.6}, {3, 0.1}, {4, 0.1}}); + SPIEL_CHECK_EQ(state->ChanceOutcomes(), expected_outcomes); +} + +} // namespace +} // namespace crowd_modelling_2d +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::crowd_modelling_2d::TestLegalActions(); + open_spiel::crowd_modelling_2d::TestLoad(); + open_spiel::crowd_modelling_2d::TestLoadWithParams(); + open_spiel::crowd_modelling_2d::TestLoadWithParams2(); + open_spiel::crowd_modelling_2d::TestRandomPlay(); + open_spiel::crowd_modelling_2d::TestReward(); + open_spiel::crowd_modelling_2d::TestDistRewardOnly(); + open_spiel::crowd_modelling_2d::TestPositionalReward(); + open_spiel::crowd_modelling_2d::TestProcess(); + open_spiel::crowd_modelling_2d::TestNoiseIntensity(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling_test.cc new file mode 100644 index 0000000..903db94 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/crowd_modelling_test.cc @@ -0,0 +1,96 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/mfg/crowd_modelling.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/uniform_int_distribution.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace crowd_modelling { +namespace { + +namespace testing = open_spiel::testing; + +void TestLoad() { + testing::LoadGameTest("mfg_crowd_modelling"); + auto game = LoadGame("mfg_crowd_modelling"); + SPIEL_CHECK_EQ(game->GetType().dynamics, GameType::Dynamics::kMeanField); + auto state = game->NewInitialState(); + auto cloned = state->Clone(); + SPIEL_CHECK_EQ(state->ToString(), cloned->ToString()); + testing::ChanceOutcomesTest(*game); +} + +void TestLoadWithParams() { + auto game = LoadGame("mfg_crowd_modelling(size=100,horizon=1000)"); + auto state = game->NewInitialState(); + SPIEL_CHECK_EQ(game->ObservationTensorShape()[0], 1000 + 100 + 1); +} + +void CheckStatesEqual(const State& a, const State& b) { + const CrowdModellingState& left = + open_spiel::down_cast(a); + const CrowdModellingState& right = + open_spiel::down_cast(b); + SPIEL_CHECK_EQ(left.ToString(), right.ToString()); + SPIEL_CHECK_FLOAT_EQ(left.Rewards()[0], right.Rewards()[0]); + SPIEL_CHECK_FLOAT_EQ(left.Returns()[0], right.Returns()[0]); + SPIEL_CHECK_EQ(left.CurrentPlayer(), right.CurrentPlayer()); + auto left_distrib = left.Distribution(); + auto right_distrib = right.Distribution(); + SPIEL_CHECK_EQ(left_distrib.size(), right_distrib.size()); + for (int i = 0; i < left_distrib.size(); ++i) { + SPIEL_CHECK_FLOAT_EQ(left_distrib[i], right_distrib[i]); + } +} + +void TestRandomPlay() { + testing::LoadGameTest("mfg_crowd_modelling(size=10,horizon=20)"); + testing::RandomSimTest(*LoadGame("mfg_crowd_modelling(size=10,horizon=20)"), + 3); +} + +void TestReward() { + auto game = LoadGame("mfg_crowd_modelling(size=10,horizon=20)"); + auto state = game->NewInitialState(); + SPIEL_CHECK_EQ(state->CurrentPlayer(), kChancePlayerId); + state->ApplyAction(5); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + // This expected reward assumes that the game is initialized with + // a uniform state distribution. + SPIEL_CHECK_FLOAT_EQ(state->Rewards()[0], 1. + std::log(10)); + SPIEL_CHECK_FLOAT_EQ(state->Returns()[0], 1. + std::log(10)); + + state->ApplyAction(1); + SPIEL_CHECK_EQ(state->CurrentPlayer(), kChancePlayerId); + SPIEL_CHECK_FLOAT_EQ(state->Rewards()[0], 0.); + SPIEL_CHECK_FLOAT_EQ(state->Returns()[0], 1. + std::log(10)); +} + +} // namespace +} // namespace crowd_modelling +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::crowd_modelling::TestLoad(); + open_spiel::crowd_modelling::TestLoadWithParams(); + open_spiel::crowd_modelling::TestRandomPlay(); + open_spiel::crowd_modelling::TestReward(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mfg/dynamic_routing.cc b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/dynamic_routing.cc new file mode 100644 index 0000000..2d943d2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/dynamic_routing.cc @@ -0,0 +1,465 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/mfg/dynamic_routing.h" + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/btree_set.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/memory/memory.h" +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/dynamic_routing/dynamic_routing_data.h" +#include "open_spiel/games/dynamic_routing/dynamic_routing_utils.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel::dynamic_routing { + +namespace { + +inline constexpr double kEpsilon = 1e-4; + +const GameType kGameType{ + /*short_name=*/"mfg_dynamic_routing", + /*long_name=*/"Cpp Mean Field Dynamic Routing", + GameType::Dynamics::kMeanField, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kPerfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/kNumPlayers, + /*min_num_players=*/kNumPlayers, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/false, + /*parameter_specification=*/ + {{"max_num_time_step", GameParameter(10)}, + {"time_step_length", GameParameter(kDefaultTimeStepLength)}, + {"players", GameParameter(-1)}, + {"network", GameParameter(kDefaultNetworkName)}, + {"perform_sanity_checks", GameParameter(true)}}, + /*default_loadable*/ true, + /*provides_factored_observation_string*/ true}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new MeanFieldRoutingGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +MeanFieldRoutingGame::MeanFieldRoutingGame(const GameParameters& params) + : Game(kGameType, params) { + // Maps data name from string to the enum. + const absl::flat_hash_map + data_name_string_to_enum = {{"line", DynamicRoutingDataName::kLine}, + {"braess", DynamicRoutingDataName::kBraess}}; + + int max_num_time_step = + ParameterValue("max_num_time_step", kDefaultMaxTimeStep); + SPIEL_CHECK_NE(max_num_time_step, 0); + time_step_length_ = + ParameterValue("time_step_length", kDefaultTimeStepLength); + network_name_ = ParameterValue("network", kDefaultNetworkName); + SPIEL_CHECK_NE(network_name_, ""); + perform_sanity_checks_ = ParameterValue("perform_sanity_checks", true); + std::unique_ptr data = + DynamicRoutingData::Create(data_name_string_to_enum.at(network_name_)); + network_ = std::move(data->network_); + od_demand_ = std::move(data->od_demand_); + network_->CheckListOfOdDemandIsCorrect(od_demand_.get()); + game_info_.num_distinct_actions = network_->num_actions(); + game_info_.max_chance_outcomes = static_cast(od_demand_->size()); + game_info_.num_players = kNumPlayers; + game_info_.min_utility = static_cast(-max_num_time_step - 1); + game_info_.max_utility = 0; + game_info_.max_game_length = max_num_time_step; +} + +std::unique_ptr MeanFieldRoutingGame::DeserializeState( + const std::string& str) const { + std::vector properties = absl::StrSplit(str, ','); + if (properties.size() != 10) { + SpielFatalError( + absl::StrCat("Expected 10 properties for serialized state, got: ", + properties.size())); + } + int current_time_step; + open_spiel::PlayerId player_id; + bool is_chance_init, is_terminal, vehicle_at_destination, + vehicle_without_legal_action; + int waiting_time; + double vehicle_final_travel_time; + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[0], ¤t_time_step)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[1], &player_id)); + SPIEL_CHECK_TRUE(absl::SimpleAtob(properties[2], &is_chance_init)); + SPIEL_CHECK_TRUE(absl::SimpleAtob(properties[3], &is_terminal)); + SPIEL_CHECK_TRUE(absl::SimpleAtob(properties[4], &vehicle_at_destination)); + SPIEL_CHECK_TRUE( + absl::SimpleAtob(properties[5], &vehicle_without_legal_action)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[6], &waiting_time)); + SPIEL_CHECK_TRUE(absl::SimpleAtod(properties[7], &vehicle_final_travel_time)); + std::string vehicle_location(properties[8]), + vehicle_destination(properties[9]); + return MeanFieldRoutingGameState::Create( + shared_from_this(), time_step_length_, od_demand_.get(), network_.get(), + perform_sanity_checks_, current_time_step, player_id, is_chance_init, + is_terminal, vehicle_at_destination, vehicle_without_legal_action, + waiting_time, vehicle_final_travel_time, vehicle_location, + vehicle_destination); +} + +std::unique_ptr MeanFieldRoutingGameState::Create( + std::shared_ptr game, double time_step_length, + std::vector* od_demand, Network* network, + bool perform_sanity_checks, int current_time_step, + open_spiel::PlayerId player_id, bool is_chance_init, bool is_terminal, + bool vehicle_at_destination, bool vehicle_without_legal_action, + int waiting_time, double vehicle_final_travel_time, + std::string vehicle_location, std::string vehicle_destination) { + double total_num_vehicle = 0; + for (const OriginDestinationDemand& od_demand_item : *od_demand) { + total_num_vehicle += od_demand_item.counts; + } + int i = 0; + ActionsAndProbs chance_outcomes; + for (const auto& od_demand_item : *od_demand) { + chance_outcomes.emplace_back( + std::pair(i++, od_demand_item.counts / total_num_vehicle)); + } + return absl::WrapUnique( + new MeanFieldRoutingGameState( + game, time_step_length, od_demand, network, perform_sanity_checks, + current_time_step, player_id, is_chance_init, is_terminal, + vehicle_at_destination, vehicle_without_legal_action, waiting_time, + vehicle_final_travel_time, vehicle_location, vehicle_destination, + total_num_vehicle, chance_outcomes)); +} + +std::unique_ptr +MeanFieldRoutingGameState::CreateNewInitialState( + std::shared_ptr game, double time_step_length, + std::vector* od_demand, Network* network, + bool perform_sanity_checks) { + return MeanFieldRoutingGameState::Create( + game, time_step_length, od_demand, network, perform_sanity_checks, + /* current_time_step= */ 0, + /* player_id = */ open_spiel::PlayerId::kChancePlayerId, + /* is_chance_init = */ true, + /* is_terminal = */ false, + /* vehicle_at_destination = */ false, + /* vehicle_without_legal_action = */ false, + /* waiting_time = */ kWaitingTimeNotAssigned, + /* vehicle_final_travel_time = */ 0.0, + /* vehicle_location = */ "", + /* vehicle_destination = */ ""); +} + +MeanFieldRoutingGameState::MeanFieldRoutingGameState( + std::shared_ptr game, double time_step_length, + std::vector* od_demand, Network* network, + bool perform_sanity_checks, int current_time_step, + open_spiel::PlayerId player_id, bool is_chance_init, bool is_terminal, + bool vehicle_at_destination, bool vehicle_without_legal_action, + int waiting_time, double vehicle_final_travel_time, + std::string vehicle_location, std::string vehicle_destination, + double total_num_vehicle, const ActionsAndProbs& chance_outcomes) + : State(game), + current_time_step_(current_time_step), + current_player_id_(player_id), + is_chance_init_(is_chance_init), + is_terminal_(is_terminal), + vehicle_at_destination_(vehicle_at_destination), + vehicle_without_legal_action_(vehicle_without_legal_action), + waiting_time_(waiting_time), + vehicle_final_travel_time_(vehicle_final_travel_time), + vehicle_location_(vehicle_location), + vehicle_destination_(vehicle_destination), + time_step_length_(time_step_length), + max_travel_time_(game->MaxGameLength()), + perform_sanity_checks_(perform_sanity_checks), + od_demand_(od_demand), + network_(network), + total_num_vehicle_(total_num_vehicle), + chance_outcomes_(chance_outcomes) {} + +std::string MeanFieldRoutingGameState::StateToString( + std::string location, int time_step, int player_id, int waiting_time, + std::string destination, double ret) const { + std::string time; + if (destination.empty()) { + destination = vehicle_destination_; + } + if (is_chance_init_) { + return "initial chance node"; + } + if (player_id == PlayerId::kDefaultPlayerId || + player_id == PlayerId::kTerminalPlayerId) { + time = absl::StrCat(time_step); + } else if (player_id == PlayerId::kMeanFieldPlayerId) { + time = absl::StrFormat("%d_mean_field", time_step); + } else if (player_id == PlayerId::kChancePlayerId) { + time = absl::StrFormat("%d_chance", time_step); + } else { + SpielFatalError( + "Player id should be DEFAULT_PLAYER_ID, MEAN_FIELD or CHANCE"); + } + if (vehicle_final_travel_time_ != 0.0) { + return absl::StrFormat("Arrived at %s, with arrival time %.2f, t=%s", + location, vehicle_final_travel_time_, time); + } + return absl::StrFormat("Location=%s, waiting time=%d, t=%s, destination=%s", + location, waiting_time, time, destination); +} + +std::vector MeanFieldRoutingGameState::LegalActions() const { + if (is_terminal_) { + return {}; + } + SPIEL_CHECK_NE(CurrentPlayer(), kMeanFieldPlayerId); + if (CurrentPlayer() == kChancePlayerId) { + return LegalChanceOutcomes(); + } + if (perform_sanity_checks_) { + SPIEL_CHECK_EQ(CurrentPlayer(), kDefaultPlayerId); + } + if (waiting_time_ > 0) { + return {kNoPossibleAction}; + } + if (vehicle_without_legal_action_) { + return {kNoPossibleAction}; + } + std::string end_section_node = NodesFromRoadSection(vehicle_location_)[1]; + std::vector successors = + network_->GetSuccessors(end_section_node); + if (perform_sanity_checks_) { + SPIEL_CHECK_TRUE(!successors.empty()); + } + std::vector actions; + for (const auto& d : successors) { + Action action = network_->GetActionIdFromMovement(end_section_node, d); + network_->AssertValidAction(action); + actions.push_back(action); + } + std::sort(actions.begin(), actions.end()); + return actions; +} + +void MeanFieldRoutingGameState::DoApplyAction(Action action) { + if (perform_sanity_checks_) { + SPIEL_CHECK_TRUE(!IsTerminal()); + SPIEL_CHECK_NE(current_player_id_, PlayerId::kMeanFieldPlayerId); + } + switch (current_player_id_) { + case PlayerId::kChancePlayerId: { + current_player_id_ = PlayerId::kDefaultPlayerId; + SPIEL_CHECK_EQ(is_chance_init_, true); + auto od_demand = od_demand_->at(action); + vehicle_destination_ = od_demand.vehicle.destination; + vehicle_location_ = od_demand.vehicle.origin; + waiting_time_ = static_cast(od_demand.vehicle.departure_time / + time_step_length_); + is_chance_init_ = false; + break; + } + case PlayerId::kDefaultPlayerId: { + current_player_id_ = PlayerId::kMeanFieldPlayerId; + if (!vehicle_without_legal_action_) { + if (waiting_time_ > 0) { + waiting_time_ -= 1; + } else { + if (perform_sanity_checks_) { + network_->AssertValidAction(action, vehicle_location_); + } + vehicle_location_ = network_->GetRoadSectionFromActionId(action); + if (vehicle_location_ == vehicle_destination_) { + vehicle_final_travel_time_ = current_time_step_; + vehicle_at_destination_ = true; + vehicle_without_legal_action_ = true; + } else if (network_->IsLocationASinkNode(vehicle_location_)) { + vehicle_without_legal_action_ = true; + vehicle_final_travel_time_ = -1 * GetGame()->MinUtility(); + } else { + waiting_time_ = kWaitingTimeNotAssigned; + } + } + } + current_time_step_ += 1; + break; + } + default: + SpielFatalError(absl::StrCat("Unsupported Player ID in DoApplyAction(): ", + current_player_id_)); + } + + if (current_time_step_ >= GetGame()->MaxGameLength()) { + is_terminal_ = true; + current_player_id_ = PlayerId::kTerminalPlayerId; + if (!vehicle_at_destination_) { + vehicle_final_travel_time_ = -1 * GetGame()->MinUtility(); + } + } +} + +std::string MeanFieldRoutingGameState::ActionToString(Player player, + Action action) const { + SPIEL_CHECK_NE(player, PlayerId::kMeanFieldPlayerId); + if (player == PlayerId::kChancePlayerId) { + SPIEL_CHECK_TRUE(is_chance_init_); + return absl::StrFormat("Vehicle is assigned to population %d", action); + } + if (perform_sanity_checks_) { + SPIEL_CHECK_EQ(player, kDefaultPlayerId); + } + + if (action == kNoPossibleAction) { + return absl::StrFormat("Vehicle %d reach a sink node or its destination.", + player); + } + if (perform_sanity_checks_) { + network_->AssertValidAction(action); + } + return absl::StrFormat("Vehicle %d would like to move to %s.", player, + network_->GetRoadSectionFromActionId(action)); +} + +Action MeanFieldRoutingGameState::GetLocationAsActionInt() const { + return network_->GetRoadSectionAsInt(vehicle_location_); +} + +Action MeanFieldRoutingGameState::GetDestinationAsActionInt() const { + return network_->GetRoadSectionAsInt(vehicle_destination_); +} + +int MeanFieldRoutingGameState::CurrentTimeStamp() const { + return current_time_step_; +} + +int MeanFieldRoutingGameState::CurrentPlayer() const { + return current_player_id_; +} + +bool MeanFieldRoutingGameState::IsTerminal() const { return is_terminal_; } + +bool MeanFieldRoutingGameState::IsWaiting() const { return waiting_time_ > 0; } + +const Network* MeanFieldRoutingGameState::network() const { return network_; } + +std::vector MeanFieldRoutingGameState::Returns() const { + if (!IsTerminal()) { + return std::vector{0}; + } + double ret = -vehicle_final_travel_time_ * time_step_length_; + return std::vector{ret}; +} + +std::vector MeanFieldRoutingGameState::DistributionSupport() { + if (vehicle_without_legal_action_) { + return {}; + } + std::vector dist; + for (int waiting_time = kWaitingTimeNotAssigned; + waiting_time < max_travel_time_; waiting_time++) { + for (const auto& od : *(od_demand_)) { + std::string destination = od.vehicle.destination; + std::string value = + StateToString(vehicle_location_, current_time_step_, + PlayerId::kMeanFieldPlayerId, waiting_time, destination, + /*ret = */ 0.0); + dist.push_back(value); + } + } + absl::btree_set dist_set(dist.begin(), dist.end()); + SPIEL_CHECK_EQ(dist_set.size(), dist.size()); + return dist; +} + +void MeanFieldRoutingGameState::UpdateDistribution( + const std::vector& distribution) { + if (current_player_id_ == PlayerId::kTerminalPlayerId) { + return; + } + if (perform_sanity_checks_) { + SPIEL_CHECK_EQ(current_player_id_, PlayerId::kMeanFieldPlayerId); + } + current_player_id_ = PlayerId::kDefaultPlayerId; + + if (!vehicle_without_legal_action_) { + double normed_density_on_vehicle_link = 0; + for (const double& d : distribution) { + normed_density_on_vehicle_link += d; + } + if (perform_sanity_checks_) { + SPIEL_CHECK_GE(normed_density_on_vehicle_link, 0); + SPIEL_CHECK_LE(normed_density_on_vehicle_link, 1 + kEpsilon); + } + if (waiting_time_ == kWaitingTimeNotAssigned) { + double volume = total_num_vehicle_ * normed_density_on_vehicle_link; + waiting_time_ = + static_cast(network_->GetTravelTime(vehicle_location_, volume) / + time_step_length_) - + 1; + waiting_time_ = std::max(0, waiting_time_); + } + } +} + +ActionsAndProbs MeanFieldRoutingGameState::ChanceOutcomes() const { + SPIEL_CHECK_NE(current_player_id_, PlayerId::kMeanFieldPlayerId); + if (perform_sanity_checks_) { + SPIEL_CHECK_EQ(current_player_id_, PlayerId::kChancePlayerId); + SPIEL_CHECK_TRUE(is_chance_init_); + } + return chance_outcomes_; +} + +std::unique_ptr MeanFieldRoutingGameState::Clone() const { + return absl::make_unique(*this); +} + +std::string MeanFieldRoutingGameState::Serialize() const { + return absl::StrCat(current_time_step_, ",", current_player_id_, ",", + is_chance_init_, ",", is_terminal_, ",", + vehicle_at_destination_, ",", + vehicle_without_legal_action_, ",", waiting_time_, ",", + vehicle_final_travel_time_, ",", vehicle_location_, ",", + vehicle_destination_); +} + +std::string MeanFieldRoutingGameState::ToString() const { + if (!vehicle_location_.empty()) { + return StateToString(vehicle_location_, current_time_step_, + current_player_id_, waiting_time_, + vehicle_destination_, Returns()[0]); + } + SPIEL_CHECK_EQ(current_time_step_, 0); + return "Before initial chance node."; +} + +} // namespace open_spiel::dynamic_routing diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mfg/dynamic_routing.h b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/dynamic_routing.h new file mode 100644 index 0000000..dde5ba4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/dynamic_routing.h @@ -0,0 +1,329 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Implementation of a mean field routing game. +// +// The game is derived from https://arxiv.org/abs/2110.11943. +// This game is also implemented in python, see +// open_spiel/python/mfg/games/dynamic_routing.py. +// The list of vehicles decribing the N player of the dynamic routing game is +// replaced by a list of OriginDestinationDemand. One OriginDestinationDemand +// corresponds to one population of vehicles (with the same origin, destination +// and departure time). +// +// This game is a variant of the mean field route choice game +// (https://ieeexplore.ieee.org/abstract/document/8619448) as the vehicle +// movement depends on the current network congestion. In the mean field route +// choice game, the number of time steps to reach the destination is constant +// and does not depend on the network congestion, neither of the vehicle cost +// function. In the dynamic driving and routing game +// (https://doi.org/10.1016/j.trc.2021.103189), the vehicle choose its +// speed to travel on each link in order to minimize its cost function. +// Therefore the congestion is encoded in the cost function. +// +// More context can be found on the docstring of the python_dynamic_routing +// class. + +#ifndef OPEN_SPIEL_GAMES_MFG_DYNAMIC_ROUTING_H_ +#define OPEN_SPIEL_GAMES_MFG_DYNAMIC_ROUTING_H_ + +#include +#include +#include + +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/dynamic_routing/dynamic_routing_utils.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel::dynamic_routing { + +// This mean field game is a 1-population game, so it has only one +// (representative) player type. +inline constexpr int kNumPlayers = 1; +// A player moves to a new link during a decision node, then its waiting +// time is reassigned based on the number of players on the new link during the +// next chance node. Therefore the waiting time is assigned to +// `kWaitingTimeNotAssigned` between the decision node for a player that moves +// and the following chance node. +inline constexpr int kWaitingTimeNotAssigned = -1; +// kDefaultTimeStepLength is used to convert travel times into number of game +// time steps. +inline constexpr double kDefaultTimeStepLength = 1.0; +// Set the default values to pass auto tests with no args. +inline constexpr int kDefaultMaxTimeStep = 10; +inline constexpr const char* kDefaultNetworkName = "braess"; + +// State of the MeanFieldRoutingGame. +// One player is equal to one representative vehicle. +// See docstring of the MeanFieldRoutingGame class and of the file for more +// information. +class MeanFieldRoutingGameState : public State { + public: + static std::unique_ptr CreateNewInitialState( + std::shared_ptr game, double time_step_length, + std::vector* od_demand, Network* network, + bool perform_sanity_checks = true); + + // Returns the vehicle location. + // This will be 1-based action index of the location, or 0 when the location + // is empty before the initial chance node. + Action GetLocationAsActionInt() const; + + // Returns the vehicle destination. + // This will be 1-based action index of the destination, or 0 when the + // destination is emtpy before the initial chance node. + Action GetDestinationAsActionInt() const; + + int CurrentTimeStamp() const; + const Network* network() const; + bool IsWaiting() const; + + Player CurrentPlayer() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string ActionToString(Player player, Action action) const override; + std::unique_ptr Clone() const override; + std::string ToString() const override; + std::string Serialize() const override; + + // Converts the representation player state to its unique string + // representation. The string representation will be used in hashmaps for + // various algorithms that computes the state value, expected return, best + // response or find the mean field Nash equilibrium. The state of the + // representative player is uniquely defined by the current time, the type of + // node (decision, mean field or chance), the vehicle location, its + // destination and its waiting time. + // Args: + // `is_chance_init`: True if at chance initialization. + // `location`: the location of the representative player. + // `time_step`: the current time step. + // `player_id`: the current node type as a player id. + // `waiting_time`: the representative player waiting time. + // `destination`: the destination of the representative player. + std::string StateToString(std::string location, int time_step, + Player player_id = PlayerId::kDefaultPlayerId, + int waiting_time = 0, std::string destination = "", + double ret = 0) const; + + // Returns the list of states for which we need to know the distribution of + // players over to update the current representative player state. + // The distribution of the vehicle's states is used to determined the number + // of cars on the new link location link of the representative vehicle in + // order to define their waiting time of the representative vehicle when they + // join this link. Therefore, If the representative vehicle does not move at + // this time step, then no states are useful. If the representative vehicle + // moves at this time step, then only the states corresponding to be on the + // new link of the representative vehicle are needed to compute the + // representative vehicle new waiting time. + // Returns: + // An array of the string representation of all OD_DEMANDs. + std::vector DistributionSupport() override; + + // Updates the travel time from the distribution. + // Using the distribution `distribution` of vehicles over the states in + // `DistributionSupport()`, computes the number of cars on the same link as + // the representative player if they has moved during the last time step and + // store it internally to assign a new waiting time to the player. If the + // player has not moved during the last time step, do nothing. + // Args: + // `distribution`: the probability for a vehicle to be in the states in + // distribution_support. The distribution is a list of probabilities. + void UpdateDistribution(const std::vector& distribution) override; + + // On the initial node, returns the initial state probability distribution. + // One chance outcome correspond to each possible origin, destination, + // departure time tuple, the probability of each chance outcome is the + // proportion of the corresponding tuple. + ActionsAndProbs ChanceOutcomes() const override; + + // Returns an array of legal actions. + // If the game is finished, if the vehicle is at its destination, has a + // positive waiting time or if it is on a node without successors then an + // empty list is returned. Otherwise the list of successors nodes of the + // current vehicle location is returned. + std::vector LegalActions() const override; + + std::string InformationStateString(Player player) const override { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); + } + + std::string ObservationString(Player player) const override { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); + } + + protected: + // Can be either called on a chance node or on a decision node. + // If called on the initial chance node, the action gives in which OD + // demand the representative vehicle belongs too (it put the vehicle at + // this location and define its destination). + // If called on decision node, the action defines on which link the vehicle + // will move (if it is not stuck in traffic) and assign a waiting time to the + // vehicle. + void DoApplyAction(Action action) override; + + private: + static std::unique_ptr Create( + std::shared_ptr game, double time_step_length, + std::vector* od_demand, Network* network, + bool perform_sanity_checks, int current_time_step, + open_spiel::PlayerId player_id, bool is_chance_init, bool is_terminal, + bool vehicle_at_destination, bool vehicle_without_legal_action, + int waiting_time, double vehicle_final_travel_time, + std::string vehicle_location, std::string vehicle_destination); + + explicit MeanFieldRoutingGameState( + std::shared_ptr game, double time_step_length, + std::vector* od_demand, Network* network, + bool perform_sanity_checks, int current_time_step, + open_spiel::PlayerId player_id, bool is_chance_init, bool is_terminal, + bool vehicle_at_destination, bool vehicle_without_legal_action, + int waiting_time, double vehicle_final_travel_time, + std::string vehicle_location, std::string vehicle_destination, + double total_num_vehicle, const ActionsAndProbs& chance_outcomes); + + int current_time_step_; + open_spiel::PlayerId current_player_id_; + bool is_chance_init_; + bool is_terminal_; + // Boolean that encodes if the representative vehicle has reached its + // destination. + bool vehicle_at_destination_; + // Boolean that encodes if the representative vehicle has reach a sink node, + // meaning that it will not be able to move anymore. + bool vehicle_without_legal_action_; + // Time that the vehicle has to wait before moving to the next link (equal to + // the link travel time when the vehicle just reached the link). + int waiting_time_; + // The arrival time of the representative vehicle, the travel is either 0 if + // the vehicle is still in the network or its arrival time if the vehicle has + // reached its destination. + double vehicle_final_travel_time_; + // Current location of the vehicle as a network road section. + std::string vehicle_location_; + // The destination of the representative vehicle corresponding to this state. + // It is associated to the representative vehicle after the initial chance + // node according to the od_demand distribution. + std::string vehicle_destination_; + + // Size of the time step, used to convert travel times into number of game + // time steps. + const double time_step_length_; + // Encodes maximum arrival time on any link in number of time steps. + // Needed to enumerate all the possible state of a vehicle being on a link to + // compute volume of cars on the link. + const int max_travel_time_; + // Whether to perform sanity checks, derived from `MeanFieldRoutingGame`. + const bool perform_sanity_checks_; + // An array of OriginDestinationDemand derived from `MeanFieldRoutingGame`, + // owned by the corresponding game. + const std::vector* od_demand_; + // Network owned by the corresponding game. + const Network* network_; + // Total number of vehicles as the sum of the od_demand. + const double total_num_vehicle_; + // Chance outcomes based on the initial probability distribution. + const ActionsAndProbs chance_outcomes_; + + friend class MeanFieldRoutingGame; +}; + +// In the implementation of the mean field routing game, the representative +// vehicle/player is represented as a tuple current location, current waiting +// time and destination. When the waiting time is negative, the vehicle chooses +// the successor link it would like to go. When arriving on the link, a +// waiting time is assigned to the player based on the distribution of players +// on the link. The vehicle arrival time is equal to the time step when they +// first reach their destination. See module docstring for more information. +class MeanFieldRoutingGame : public Game { + public: + // Constructor of the game. + // Args: + // `params`: game parameters. It should define max_num_time_step, + // time_step_length, network and perform_sanity_checks. + explicit MeanFieldRoutingGame(const GameParameters& params); + + // There is only 1 chance node (the initial node). + int MaxChanceNodesInHistory() const override { return 1; } + // Maximum number of possible actions. + // This is equal to the number of links + 1 + // (corresponding to having no possible action kNoPossibleAction). + int NumDistinctActions() const override { + return game_info_.num_distinct_actions; + } + // The number of vehicles. + // Should be 1 as this mean field game is a one population game. + int NumPlayers() const override { + SPIEL_CHECK_EQ(game_info_.num_players, 1); + return game_info_.num_players; + } + // Minimum utility is the opposite of the maximum arrival time. + // Set to - max_game_length - 1. + double MinUtility() const override { + SPIEL_CHECK_EQ(game_info_.min_utility, -1 * game_info_.max_game_length - 1); + return game_info_.min_utility; + } + // Maximum utility is the opposite of the minimum arrival time. Set to 0. + double MaxUtility() const override { return game_info_.max_utility; } + // Maximum number of time step played. Passed during construction. + int MaxGameLength() const override { return game_info_.max_game_length; } + // Maximum number of chance actions. Set to the length of + // od_demand_, i.e. the number of `OriginDestinationDemand`s. + int MaxChanceOutcomes() const override { + return game_info_.max_chance_outcomes; + } + // If true, sanity checks are done during the game, should be set to false to + // speed up the game. + bool perform_sanity_checks() const { return perform_sanity_checks_; } + + // Creates a new initial state of the MeanFieldRoutingGame. + std::unique_ptr NewInitialState() const override { + return MeanFieldRoutingGameState::CreateNewInitialState( + shared_from_this(), time_step_length_, od_demand_.get(), network_.get(), + perform_sanity_checks_); + } + + // Returns the tensor shape for observation. + std::vector ObservationTensorShape() const override { + int num_locations = network_->num_actions(); + int max_num_time_step = MaxGameLength(); + return {num_locations * 2 + max_num_time_step + 1 + 1}; + } + + // Deserialize a formatted string to MeanFieldRoutingGameState. + std::unique_ptr DeserializeState( + const std::string& str) const override; + + private: + std::string network_name_; + std::unique_ptr network_; + // A list of the vehicle. Their origin and their destination should be road + // sections of the game. + std::unique_ptr> od_demand_; + // If true, sanity checks are done during the game, should be set to false to + // speed up the game. + bool perform_sanity_checks_; + // Is used to convert travel times into number of game time steps. + double time_step_length_; + GameInfo game_info_; +}; + +} // namespace open_spiel::dynamic_routing + +#endif // OPEN_SPIEL_GAMES_MFG_DYNAMIC_ROUTING_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mfg/dynamic_routing_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/dynamic_routing_test.cc new file mode 100644 index 0000000..6ced18e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/dynamic_routing_test.cc @@ -0,0 +1,344 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel::dynamic_routing { +namespace { + +namespace testing = open_spiel::testing; + +void TestLoad() { + testing::LoadGameTest( + "mfg_dynamic_routing(max_num_time_step=10,time_step_length=20.0" + ",network=line)"); + auto game = LoadGame( + "mfg_dynamic_routing(max_num_time_step=10,time_step_length=20.0" + ",network=line)"); + auto state = game->NewInitialState(); + auto cloned = state->Clone(); + SPIEL_CHECK_EQ(state->ToString(), cloned->ToString()); + SPIEL_CHECK_EQ(game->GetType().dynamics, GameType::Dynamics::kMeanField); + testing::ChanceOutcomesTest(*game); +} + +void TestLoadWithParams() { + testing::LoadGameTest( + "mfg_dynamic_routing(max_num_time_step=10,time_step_length=20.0" + ",network=line)"); + auto game = LoadGame( + "mfg_dynamic_routing(max_num_time_step=10,time_step_length=20.0" + ",network=line)"); + auto state = game->NewInitialState(); + SPIEL_CHECK_EQ(game->ObservationTensorShape().size(), 1); + SPIEL_CHECK_EQ(game->ObservationTensorShape()[0], + game->NumDistinctActions() * 2 + game->MaxGameLength() + 2); +} + +void TestWholeGameWithLineNetwork() { + std::vector distribution{1}; + auto game = LoadGame( + "mfg_dynamic_routing(max_num_time_step=5,time_step_length=0.5," + "network=line)"); + auto state = game->NewInitialState(); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), kChancePlayerId); + SPIEL_CHECK_EQ(state->ToString(), "Before initial chance node."); + SPIEL_CHECK_EQ(state->LegalActions(), std::vector{0}); + SPIEL_CHECK_EQ(state->ActionToString(0), + "Vehicle is assigned to population 0"); + state->ApplyAction(0); + SPIEL_CHECK_EQ(state->CurrentPlayer(), kDefaultPlayerId); + SPIEL_CHECK_EQ( + state->ToString(), + "Location=bef_O->O, waiting time=0, t=0, destination=D->aft_D"); + + SPIEL_CHECK_EQ(state->LegalActions(), std::vector{3}); + state->ApplyAction(3); + SPIEL_CHECK_EQ( + state->ToString(), + "Location=O->A, waiting time=-1, t=1_mean_field, destination=D->aft_D"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Location=O->A, waiting time=1, t=1, destination=D->aft_D"); + + SPIEL_CHECK_EQ(state->LegalActions(), std::vector{0}); + state->ApplyAction(0); + SPIEL_CHECK_EQ( + state->ToString(), + "Location=O->A, waiting time=0, t=2_mean_field, destination=D->aft_D"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Location=O->A, waiting time=0, t=2, destination=D->aft_D"); + + SPIEL_CHECK_EQ(state->LegalActions(), std::vector{1}); + state->ApplyAction(1); + SPIEL_CHECK_EQ( + state->ToString(), + "Location=A->D, waiting time=-1, t=3_mean_field, destination=D->aft_D"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Location=A->D, waiting time=1, t=3, destination=D->aft_D"); + + SPIEL_CHECK_EQ(state->LegalActions(), std::vector{0}); + state->ApplyAction(0); + SPIEL_CHECK_EQ( + state->ToString(), + "Location=A->D, waiting time=0, t=4_mean_field, destination=D->aft_D"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Location=A->D, waiting time=0, t=4, destination=D->aft_D"); + + SPIEL_CHECK_EQ(state->LegalActions(), std::vector{2}); + state->ApplyAction(2); + SPIEL_CHECK_EQ(state->ToString(), + "Arrived at D->aft_D, with arrival time 4.00, t=5"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Arrived at D->aft_D, with arrival time 4.00, t=5"); +} + +void TestWholeGameWithBraessNetwork() { + std::vector distribution{1}; + auto game = LoadGame( + "mfg_dynamic_routing(max_num_time_step=12,time_step_length=0.5," + "network=braess)"); + auto state = game->NewInitialState(); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), kChancePlayerId); + SPIEL_CHECK_EQ(state->ToString(), "Before initial chance node."); + SPIEL_CHECK_EQ(state->LegalActions(), std::vector{0}); + SPIEL_CHECK_EQ(state->ActionToString(0), + "Vehicle is assigned to population 0"); + state->ApplyAction(0); + SPIEL_CHECK_EQ(state->CurrentPlayer(), kDefaultPlayerId); + SPIEL_CHECK_EQ(state->ToString(), + "Location=O->A, waiting time=0, t=0, destination=D->E"); + + SPIEL_CHECK_EQ(state->LegalActions(), (std::vector{1, 2})); + state->ApplyAction(1); + SPIEL_CHECK_EQ( + state->ToString(), + "Location=A->B, waiting time=-1, t=1_mean_field, destination=D->E"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Location=A->B, waiting time=3, t=1, destination=D->E"); + + SPIEL_CHECK_EQ(state->LegalActions(), (std::vector{0})); + state->ApplyAction(0); + SPIEL_CHECK_EQ( + state->ToString(), + "Location=A->B, waiting time=2, t=2_mean_field, destination=D->E"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Location=A->B, waiting time=2, t=2, destination=D->E"); + + SPIEL_CHECK_EQ(state->LegalActions(), (std::vector{0})); + state->ApplyAction(0); + SPIEL_CHECK_EQ( + state->ToString(), + "Location=A->B, waiting time=1, t=3_mean_field, destination=D->E"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Location=A->B, waiting time=1, t=3, destination=D->E"); + + SPIEL_CHECK_EQ(state->LegalActions(), (std::vector{0})); + state->ApplyAction(0); + SPIEL_CHECK_EQ( + state->ToString(), + "Location=A->B, waiting time=0, t=4_mean_field, destination=D->E"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Location=A->B, waiting time=0, t=4, destination=D->E"); + + SPIEL_CHECK_EQ(state->LegalActions(), (std::vector{3, 4})); + state->ApplyAction(3); + SPIEL_CHECK_EQ( + state->ToString(), + "Location=B->C, waiting time=-1, t=5_mean_field, destination=D->E"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Location=B->C, waiting time=0, t=5, destination=D->E"); + + SPIEL_CHECK_EQ(state->LegalActions(), std::vector{5}); + state->ApplyAction(5); + SPIEL_CHECK_EQ( + state->ToString(), + "Location=C->D, waiting time=-1, t=6_mean_field, destination=D->E"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Location=C->D, waiting time=3, t=6, destination=D->E"); + + SPIEL_CHECK_EQ(state->LegalActions(), std::vector{0}); + state->ApplyAction(0); + SPIEL_CHECK_EQ( + state->ToString(), + "Location=C->D, waiting time=2, t=7_mean_field, destination=D->E"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Location=C->D, waiting time=2, t=7, destination=D->E"); + + SPIEL_CHECK_EQ(state->LegalActions(), std::vector{0}); + state->ApplyAction(0); + SPIEL_CHECK_EQ( + state->ToString(), + "Location=C->D, waiting time=1, t=8_mean_field, destination=D->E"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Location=C->D, waiting time=1, t=8, destination=D->E"); + + SPIEL_CHECK_EQ(state->LegalActions(), std::vector{0}); + state->ApplyAction(0); + SPIEL_CHECK_EQ( + state->ToString(), + "Location=C->D, waiting time=0, t=9_mean_field, destination=D->E"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Location=C->D, waiting time=0, t=9, destination=D->E"); + + SPIEL_CHECK_EQ(state->LegalActions(), std::vector{6}); + state->ApplyAction(6); + SPIEL_CHECK_EQ(state->ToString(), + "Arrived at D->E, with arrival time 9.00, t=10_mean_field"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Arrived at D->E, with arrival time 9.00, t=10"); + + SPIEL_CHECK_EQ(state->LegalActions(), std::vector{0}); + state->ApplyAction(0); + SPIEL_CHECK_EQ(state->ToString(), + "Arrived at D->E, with arrival time 9.00, t=11_mean_field"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Arrived at D->E, with arrival time 9.00, t=11"); + + SPIEL_CHECK_EQ(state->LegalActions(), std::vector{0}); + state->ApplyAction(0); + SPIEL_CHECK_EQ(state->ToString(), + "Arrived at D->E, with arrival time 9.00, t=12"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Arrived at D->E, with arrival time 9.00, t=12"); + + SPIEL_CHECK_EQ(state->LegalActions(), std::vector{}); +} + +void TestPreEndedGameWithLineNetwork() { + std::vector distribution{1}; + auto game = LoadGame( + "mfg_dynamic_routing(max_num_time_step=2,time_step_length=0.5," + "network=line)"); + auto state = game->NewInitialState(); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), kChancePlayerId); + SPIEL_CHECK_EQ(state->ToString(), "Before initial chance node."); + SPIEL_CHECK_EQ(state->ActionToString(state->LegalActions()[0]), + "Vehicle is assigned to population 0"); + + state->ApplyAction(state->LegalActions()[0]); + SPIEL_CHECK_EQ(state->CurrentPlayer(), kDefaultPlayerId); + SPIEL_CHECK_EQ( + state->ToString(), + "Location=bef_O->O, waiting time=0, t=0, destination=D->aft_D"); + + state->ApplyAction(state->LegalActions()[0]); + SPIEL_CHECK_EQ( + state->ToString(), + "Location=O->A, waiting time=-1, t=1_mean_field, destination=D->aft_D"); + + state->UpdateDistribution(distribution); + SPIEL_CHECK_EQ(state->ToString(), + "Location=O->A, waiting time=1, t=1, destination=D->aft_D"); + + state->ApplyAction(state->LegalActions()[0]); + SPIEL_CHECK_EQ(state->ToString(), + "Arrived at O->A, with arrival time 3.00, t=2"); +} + +void TestRandomPlayWithLineNetwork() { + testing::RandomSimTest( + *LoadGame("mfg_dynamic_routing(max_num_time_step=10,time_step_length=0.5," + "network=line,perform_sanity_checks=true)"), + 3); +} + +void TestRandomPlayWithBraessNetwork() { + testing::RandomSimTest( + *LoadGame("mfg_dynamic_routing(max_num_time_step=10,time_step_length=0.5," + "network=braess,perform_sanity_checks=true)"), + 3); +} + +// Test travel time update based on distribution is correct. +void TestCorrectTravelTimeUpdate() { + auto game = LoadGame( + "mfg_dynamic_routing(max_num_time_step=100,time_step_length=0.05," + "network=braess)"); + auto state = game->NewInitialState(); + + SPIEL_CHECK_EQ(state->ToString(), "Before initial chance node."); + state->ApplyAction(0); + SPIEL_CHECK_EQ(state->ToString(), + "Location=O->A, waiting time=0, t=0, destination=D->E"); + SPIEL_CHECK_EQ(state->LegalActions(), (std::vector{1, 2})); + state->ApplyAction(1); + SPIEL_CHECK_EQ( + state->ToString(), + "Location=A->B, waiting time=-1, t=1_mean_field, destination=D->E"); + + std::vector distribution{1}; + state->UpdateDistribution({.5}); + // Waiting time (in unit of time) = 1.0 (free flow travel time on A->B) + + // .5 (% player on A->B) * 5 (num of players) / 5 (capacity on A->B) = 1.5 + // Waiting time (in time step) = 1.5 / 0.05 (time step lenght) + // - 1 (one time step for the current time running) = 29 + SPIEL_CHECK_EQ(state->ToString(), + "Location=A->B, waiting time=29, t=1, destination=D->E"); +} +} // namespace +} // namespace open_spiel::dynamic_routing + +int main(int argc, char** argv) { + open_spiel::dynamic_routing::TestLoad(); + open_spiel::dynamic_routing::TestLoadWithParams(); + open_spiel::dynamic_routing::TestWholeGameWithLineNetwork(); + open_spiel::dynamic_routing::TestWholeGameWithBraessNetwork(); + open_spiel::dynamic_routing::TestPreEndedGameWithLineNetwork(); + open_spiel::dynamic_routing::TestRandomPlayWithLineNetwork(); + open_spiel::dynamic_routing::TestRandomPlayWithBraessNetwork(); + open_spiel::dynamic_routing::TestCorrectTravelTimeUpdate(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mfg/garnet.cc b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/garnet.cc new file mode 100644 index 0000000..8b0122e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/garnet.cc @@ -0,0 +1,390 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/mfg/garnet.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/random.h" +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/abseil-cpp/absl/strings/substitute.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace garnet { +namespace { +inline constexpr float kEpsilon = 1e-25; + +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"mfg_garnet", + /*long_name=*/"Mean Field Garnet", + GameType::Dynamics::kMeanField, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kPerfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/kNumPlayers, + /*min_num_players=*/kNumPlayers, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"size", GameParameter(kDefaultSize)}, + {"horizon", GameParameter(kDefaultHorizon)}, + {"seed", GameParameter(kDefaultSeed)}, + {"num_action", GameParameter(kDefaultNumActions)}, + {"num_chance_action", GameParameter(kDefaultNumChanceActions)}, + {"sparsity_factor", GameParameter(kDefaultSparsityFactor)}, + {"eta", GameParameter(kDefaultEta)}}, + /*default_loadable*/ true, + /*provides_factored_observation_string*/ false}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new GarnetGame(params)); +} + +std::string StateToString(int x, int t, Action last_action, Player player_id, + bool is_chance_init) { + if (is_chance_init) { + return "initial"; + } else if (player_id == 0) { + return absl::Substitute("($0, $1)", x, t); + } else if (player_id == kMeanFieldPlayerId) { + return absl::Substitute("($0, $1)_a", x, t); + } else if (player_id == kChancePlayerId) { + return absl::Substitute("($0, $1, $2)_a_mu", x, t, last_action); + } else { + SpielFatalError( + absl::Substitute("Unexpected state (player_id: $0, is_chance_init: $1)", + player_id, is_chance_init)); + } +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +GarnetState::GarnetState(std::shared_ptr game, int size, + int horizon, int seed, int num_action, + int num_chance_action, double sparsity_factor, + double eta) + : State(game), + size_(size), + horizon_(horizon), + seed_(seed), + num_action_(num_action), + num_chance_action_(num_chance_action), + sparsity_factor_(sparsity_factor), + eta_(eta), + distribution_(size_, 1. / size_) { + std::mt19937 rng(seed_); + double normalization; + double proba; + double cdf_proba; + for (int i = 0; i < size; ++i) { + for (int j = 0; j < num_action; ++j) { + double r_sparse = absl::Uniform(rng, 0.0, 1.0); + if (r_sparse < sparsity_factor_) { + garnet_reward_.push_back(absl::Uniform(rng, 0.0, 1.0)); + } else { + garnet_reward_.push_back(0.0); + } + + normalization = 0; + std::vector cdf; + cdf.push_back(0.0); + cdf.push_back(1.0); + for (int kk = 0; kk < num_chance_action - 1; ++kk) { + cdf_proba = absl::Uniform(rng, 0.0, 1.0); + cdf.push_back(cdf_proba); + } + std::sort(cdf.begin(), cdf.end()); + for (int k = 0; k < num_chance_action; ++k) { + proba = cdf[k+1]-cdf[k]; + normalization += proba; + garnet_transition_proba_unnormalized_.push_back(proba); + garnet_transition_.push_back(absl::Uniform(rng, 0, size)); + } + garnet_transition_proba_normalization_.push_back(normalization); + } + } +} + +GarnetState::GarnetState(std::shared_ptr game, int size, + int horizon, int seed, int num_action, + int num_chance_action, double sparsity_factor, + double eta, Player current_player, bool is_chance_init, + int x, int t, int last_action, double return_value, + const std::vector& distribution) + : State(game), + size_(size), + horizon_(horizon), + seed_(seed), + num_action_(num_action), + num_chance_action_(num_chance_action), + sparsity_factor_(sparsity_factor), + eta_(eta), + current_player_(current_player), + is_chance_init_(is_chance_init), + x_(x), + t_(t), + last_action_(last_action), + return_value_(return_value), + distribution_(distribution) { + std::mt19937 rng(seed_); + double normalization; + double proba; + for (int i = 0; i < size; ++i) { + for (int j = 0; j < num_action; ++j) { + double r_sparse = absl::Uniform(rng, 0.0, 1.0); + if (r_sparse < sparsity_factor_) { + garnet_reward_.push_back(absl::Uniform(rng, 0.0, 1.0)); + } else { + garnet_reward_.push_back(0.0); + } + normalization = 0; + for (int k = 0; k < num_chance_action; ++k) { + proba = absl::Uniform(rng, 0.0, 1.0); + normalization += proba; + garnet_transition_proba_unnormalized_.push_back(proba); + garnet_transition_.push_back(absl::Uniform(rng, 0, size)); + } + garnet_transition_proba_normalization_.push_back(normalization); + } + } +} + +double GarnetState::GetTransitionProba(int x, int action, + int chance_action) const { + return (garnet_transition_proba_unnormalized_[num_chance_action_ * + (x + size_ * action) + + chance_action] / + garnet_transition_proba_normalization_[x + size_ * action]); +} + +int GarnetState::GetTransition(int x, int action, int chance_action) const { + return garnet_transition_[num_chance_action_ * (x + size_ * action) + + chance_action]; +} + +double GarnetState::GetReward(int x, int action) const { + return garnet_reward_[x + size_ * action]; +} + +std::vector GarnetState::LegalActions() const { + if (IsTerminal()) return {}; + if (IsChanceNode()) return LegalChanceOutcomes(); + if (IsMeanFieldNode()) return {}; + SPIEL_CHECK_TRUE(IsPlayerNode()); + std::vector outcomes; + outcomes.reserve(num_action_); + for (int i = 0; i < num_action_; ++i) { + outcomes.push_back(i); + } + return outcomes; +} + +ActionsAndProbs GarnetState::ChanceOutcomes() const { + if (is_chance_init_) { + ActionsAndProbs outcomes; + for (int i = 0; i < size_; ++i) { + outcomes.push_back({i, 1. / size_}); + } + return outcomes; + } + ActionsAndProbs outcomes; + double proba; + for (int i = 0; i < num_chance_action_; ++i) { + proba = GetTransitionProba(x_, last_action_, i); + outcomes.push_back({i, proba}); + } + return outcomes; +} + +void GarnetState::DoApplyAction(Action action) { + SPIEL_CHECK_NE(current_player_, kMeanFieldPlayerId); + return_value_ += Rewards()[0]; + if (is_chance_init_) { + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LT(action, size_); + SPIEL_CHECK_EQ(current_player_, kChancePlayerId); + x_ = action; + is_chance_init_ = false; + current_player_ = 0; + } else if (current_player_ == kChancePlayerId) { + x_ = GetTransition(x_, last_action_, action); + ++t_; + current_player_ = kMeanFieldPlayerId; + } else { + SPIEL_CHECK_EQ(current_player_, 0); + last_action_ = action; + current_player_ = kChancePlayerId; + } +} + +std::string GarnetState::ActionToString(Player player, Action action) const { + if (IsChanceNode() && is_chance_init_) { + return absl::Substitute("init_state=$0", action); + } + return std::to_string(action); +} + +std::vector GarnetState::DistributionSupport() { + std::vector support; + support.reserve(size_); + for (int x = 0; x < size_; ++x) { + support.push_back( + StateToString(x, t_, last_action_, kMeanFieldPlayerId, false)); + } + return support; +} + +void GarnetState::UpdateDistribution(const std::vector& distribution) { + SPIEL_CHECK_EQ(current_player_, kMeanFieldPlayerId); + SPIEL_CHECK_EQ(distribution.size(), size_); + distribution_ = distribution; + current_player_ = kDefaultPlayerId; +} + +bool GarnetState::IsTerminal() const { return t_ >= horizon_; } + +std::vector GarnetState::Rewards() const { + if (current_player_ != 0) { + return {0.}; + } + double r_x = GetReward(x_, last_action_); + double r_mu = -std::log(distribution_[x_] + kEpsilon); + return {r_x + eta_ * r_mu}; +} + +std::vector GarnetState::Returns() const { + return {return_value_ + Rewards()[0]}; +} + +std::string GarnetState::ToString() const { + return StateToString(x_, t_, last_action_, current_player_, is_chance_init_); +} + +std::string GarnetState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return StateToString(x_, t_, last_action_, current_player_, is_chance_init_); +} + +std::string GarnetState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void GarnetState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + SPIEL_CHECK_EQ(values.size(), size_ + horizon_ + 1); + SPIEL_CHECK_LT(x_, size_); + SPIEL_CHECK_GE(t_, 0); + // Allow t_ == horizon_. + SPIEL_CHECK_LE(t_, horizon_); + std::fill(values.begin(), values.end(), 0.); + if (x_ >= 0) { + values[x_] = 1.; + } + // x_ equals -1 for the initial (blank) state, don't set any + // position bit in that case. + values[size_ + t_] = 1.; +} + +std::unique_ptr GarnetState::Clone() const { + return std::unique_ptr(new GarnetState(*this)); +} + +std::string GarnetState::Serialize() const { + std::string out = + absl::StrCat(current_player_, ",", is_chance_init_, ",", x_, ",", t_, ",", + last_action_, ",", return_value_, "\n"); + absl::StrAppend(&out, absl::StrJoin(distribution_, ",")); + return out; +} + +GarnetGame::GarnetGame(const GameParameters& params) + : Game(kGameType, params), + size_(ParameterValue("size", kDefaultSize)), + horizon_(ParameterValue("horizon", kDefaultHorizon)), + seed_(ParameterValue("seed", kDefaultSeed)), + num_action_(ParameterValue("num_action", kDefaultNumActions)), + num_chance_action_( + ParameterValue("num_chance_action", kDefaultNumChanceActions)), + sparsity_factor_( + ParameterValue("sparsity_factor", kDefaultSparsityFactor)), + eta_(ParameterValue("eta", kDefaultEta)) {} + +std::vector GarnetGame::ObservationTensorShape() const { + // +1 to allow for t_ == horizon. + return {size_ + horizon_ + 1}; +} + +std::unique_ptr GarnetGame::DeserializeState( + const std::string& str) const { + std::vector lines = absl::StrSplit(str, '\n'); + if (lines.size() != 2) { + SpielFatalError(absl::StrCat("Expected 2 lines in serialized state, got: ", + lines.size())); + } + Player current_player; + int is_chance_init; + int x; + int t; + int last_action; + double return_value; + std::vector properties = absl::StrSplit(lines[0], ','); + if (properties.size() != 6) { + SpielFatalError( + absl::StrCat("Expected 6 properties for serialized state, got: ", + properties.size())); + } + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[0], ¤t_player)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[1], &is_chance_init)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[2], &x)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[3], &t)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(properties[4], &last_action)); + SPIEL_CHECK_TRUE(absl::SimpleAtod(properties[5], &return_value)); + std::vector serialized_distrib = absl::StrSplit(lines[1], ','); + std::vector distribution; + distribution.reserve(serialized_distrib.size()); + for (std::string& v : serialized_distrib) { + double parsed_weight; + SPIEL_CHECK_TRUE(absl::SimpleAtod(v, &parsed_weight)); + distribution.push_back(parsed_weight); + } + return absl::make_unique( + shared_from_this(), size_, horizon_, seed_, num_action_, + num_chance_action_, sparsity_factor_, eta_, current_player, + is_chance_init, x, t, last_action, return_value, distribution); +} + +} // namespace garnet +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mfg/garnet.h b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/garnet.h new file mode 100644 index 0000000..976f5ea --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/garnet.h @@ -0,0 +1,165 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Mean Field Garnet. +// +// This game corresponds to a garnet defined in section 5.1 of +// "Scaling up Mean Field Games with Online Mirror Descent", Perolat & al. 2021 +// (https://arxiv.org/pdf/2103.00623.pdf) +// +// A garnet is a parametrized family of randomly generated Mean Field Game. One +// can control the number of action, the number of chance actions and the +// sparsity of the reward. +// - The transition is randomly generated as an unnormalized uniform(0,1) over +// the chance actions and the next state is selected uniformly over the state +// space. +// - The reward is parametrized by eta as r(x,a) - eta * log(mu(x)) where r(x,a) +// is sampled uniformly over [0,1] only with probability the sparsity and 0.0 +// otherwise. + +#ifndef OPEN_SPIEL_GAMES_MFG_GARNET_H_ +#define OPEN_SPIEL_GAMES_MFG_GARNET_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/memory/memory.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace garnet { + +inline constexpr int kNumPlayers = 1; +inline constexpr int kDefaultHorizon = 10; +inline constexpr int kDefaultSize = 10; +inline constexpr int kDefaultSeed = 0; +inline constexpr int kDefaultNumActions = 3; +inline constexpr int kDefaultNumChanceActions = 3; +inline constexpr double kDefaultSparsityFactor = 1.0; +inline constexpr double kDefaultEta = 1.0; +// Action that leads to no displacement on the circle of the game. +inline constexpr int kNeutralAction = 0; + +// Game state. +class GarnetState : public State { + public: + GarnetState(std::shared_ptr game, int size, int horizon, int seed, + int num_action, int num_chance_action, double sparsity_factor, + double eta); + GarnetState(std::shared_ptr game, int size, int horizon, int seed, + int num_action, int num_chance_action, double sparsity_factor, + double eta, Player current_player, bool is_chance_init, int x, + int t, int last_action, double return_value, + const std::vector& distribution); + + double GetTransitionProba(int x, int action, int chance_action) const; + int GetTransition(int x, int action, int chance_action) const; + double GetReward(int x, int action) const; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : current_player_; + } + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Rewards() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + ActionsAndProbs ChanceOutcomes() const override; + + std::vector DistributionSupport() override; + void UpdateDistribution(const std::vector& distribution) override; + std::vector Distribution() const { return distribution_; } + + std::string Serialize() const override; + + protected: + void DoApplyAction(Action action) override; + + private: + // Size of the garnet. + const int size_ = -1; + const int horizon_ = -1; + const int seed_ = 0; + const int num_action_ = 0; + const int num_chance_action_ = 0; + double sparsity_factor_ = kDefaultSparsityFactor; + double eta_ = kDefaultEta; + Player current_player_ = kChancePlayerId; + bool is_chance_init_ = true; + // Position on the garnet [0, size_) when valid. + int x_ = -1; + // Current time, in [0, horizon_]. + int t_ = 0; + int last_action_ = kNeutralAction; + double return_value_ = 0.; + + // Represents the current probability distribution over game states. + std::vector distribution_; + std::vector garnet_transition_; + std::vector garnet_transition_proba_unnormalized_; + std::vector garnet_transition_proba_normalization_; + std::vector garnet_reward_; +}; + +class GarnetGame : public Game { + public: + explicit GarnetGame(const GameParameters& params); + int NumDistinctActions() const override { return num_action_; } + std::unique_ptr NewInitialState() const override { + return absl::make_unique( + shared_from_this(), size_, horizon_, seed_, num_action_, + num_chance_action_, sparsity_factor_, eta_); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { + return -std::numeric_limits::infinity(); + } + double MaxUtility() const override { + return std::numeric_limits::infinity(); + } + int MaxGameLength() const override { return horizon_; } + int MaxChanceNodesInHistory() const override { + // + 1 to account for the initial extra chance node. + return horizon_ + 1; + } + std::vector ObservationTensorShape() const override; + int MaxChanceOutcomes() const override { + return std::max(size_, num_chance_action_); + } + std::unique_ptr DeserializeState( + const std::string& str) const override; + + private: + const int size_; + const int horizon_; + const int seed_; + const int num_action_ = 0; + const int num_chance_action_ = 0; + const double sparsity_factor_; + const double eta_; +}; + +} // namespace garnet +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_MFG_GARNET_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mfg/garnet_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/garnet_test.cc new file mode 100644 index 0000000..f331635 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mfg/garnet_test.cc @@ -0,0 +1,75 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/mfg/garnet.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/uniform_int_distribution.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace garnet { +namespace { + +namespace testing = open_spiel::testing; + +void TestLoad() { + testing::LoadGameTest("mfg_garnet"); + auto game = LoadGame("mfg_garnet"); + SPIEL_CHECK_EQ(game->GetType().dynamics, GameType::Dynamics::kMeanField); + auto state = game->NewInitialState(); + auto cloned = state->Clone(); + SPIEL_CHECK_EQ(state->ToString(), cloned->ToString()); + testing::ChanceOutcomesTest(*game); +} + +void TestLoadWithParams() { + auto game = LoadGame("mfg_garnet(size=100,horizon=1000)"); + auto state = game->NewInitialState(); + SPIEL_CHECK_EQ(game->ObservationTensorShape()[0], 1000 + 100 + 1); +} + +void CheckStatesEqual(const State& a, const State& b) { + const GarnetState& left = open_spiel::down_cast(a); + const GarnetState& right = open_spiel::down_cast(b); + SPIEL_CHECK_EQ(left.ToString(), right.ToString()); + SPIEL_CHECK_FLOAT_EQ(left.Rewards()[0], right.Rewards()[0]); + SPIEL_CHECK_FLOAT_EQ(left.Returns()[0], right.Returns()[0]); + SPIEL_CHECK_EQ(left.CurrentPlayer(), right.CurrentPlayer()); + auto left_distrib = left.Distribution(); + auto right_distrib = right.Distribution(); + SPIEL_CHECK_EQ(left_distrib.size(), right_distrib.size()); + for (int i = 0; i < left_distrib.size(); ++i) { + SPIEL_CHECK_FLOAT_EQ(left_distrib[i], right_distrib[i]); + } +} + +void TestRandomPlay() { + testing::LoadGameTest("mfg_garnet(size=10,horizon=20)"); + testing::RandomSimTest(*LoadGame("mfg_garnet(size=10,horizon=20)"), 3); +} + +} // namespace +} // namespace garnet +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::garnet::TestLoad(); + open_spiel::garnet::TestLoadWithParams(); + open_spiel::garnet::TestRandomPlay(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mnk/mnk.cc b/scenarios/bargaining/open_spiel/open_spiel/games/mnk/mnk.cc new file mode 100644 index 0000000..06e455d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mnk/mnk.cc @@ -0,0 +1,254 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/mnk/mnk.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace mnk { +namespace { + +// Facts about the game. +const GameType kGameType{/*short_name=*/"mnk", + /*long_name=*/"m,n,k-game", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"m", GameParameter(kDefaultNumCols)}, + {"n", GameParameter(kDefaultNumRows)}, + {"k", GameParameter(kDefaultNumInARow)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new MNKGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +CellState PlayerToState(Player player) { + switch (player) { + case 0: + return CellState::kCross; + case 1: + return CellState::kNought; + default: + SpielFatalError(absl::StrCat("Invalid player id ", player)); + return CellState::kEmpty; + } +} + +std::string StateToString(CellState state) { + switch (state) { + case CellState::kEmpty: + return "."; + case CellState::kNought: + return "o"; + case CellState::kCross: + return "x"; + default: + SpielFatalError("Unknown state."); + } +} + +bool BoardHasLine(const std::vector>& board, + const Player player, int k, int r, int c, int dr, int dc) { + CellState state = PlayerToState(player); + int count = 0; + + for (int i = 0; + i < k && 0 <= r && r < board.size() && 0 <= c && c < board[r].size(); + ++i, r += dr, c += dc) + count += board[r][c] == state; + + return count == k; +} + +bool BoardHasLine(const std::vector>& board, + const Player player, int k) { + for (int r = 0; r < board.size(); ++r) + for (int c = 0; c < board[r].size(); ++c) + for (int dr = -1; dr <= 1; ++dr) + for (int dc = -1; dc <= 1; ++dc) + if (dr || dc) + if (BoardHasLine(board, player, k, r, c, dr, dc)) return true; + + return false; +} + +void MNKState::DoApplyAction(Action move) { + auto [row, column] = ActionToCoordinates(move); + SPIEL_CHECK_EQ(board_[row][column], CellState::kEmpty); + board_[row][column] = PlayerToState(CurrentPlayer()); + if (HasLine(current_player_)) { + outcome_ = current_player_; + } + current_player_ = 1 - current_player_; + num_moves_ += 1; +} + +std::pair MNKState::ActionToCoordinates(Action move) const { + return {move / NumCols(), move % NumCols()}; +} + +int MNKState::CoordinatesToAction(int row, int column) const { + return row * NumCols() + column; +} + +int MNKState::NumRows() const { + return std::static_pointer_cast(game_)->NumRows(); +} + +int MNKState::NumCols() const { + return std::static_pointer_cast(game_)->NumCols(); +} + +int MNKState::NumCells() const { + return std::static_pointer_cast(game_)->NumCells(); +} + +int MNKState::NumInARow() const { + return std::static_pointer_cast(game_)->NumInARow(); +} + +std::vector MNKState::LegalActions() const { + if (IsTerminal()) return {}; + + // Can move in any empty cell. + std::vector moves; + + for (int r = 0; r < board_.size(); ++r) + for (int c = 0; c < board_[r].size(); ++c) + if (board_[r][c] == CellState::kEmpty) + moves.push_back(CoordinatesToAction(r, c)); + + return moves; +} + +std::string MNKState::ActionToString(Player player, Action action_id) const { + return game_->ActionToString(player, action_id); +} + +bool MNKState::HasLine(Player player) const { + return BoardHasLine(board_, player, NumInARow()); +} + +bool MNKState::IsFull() const { return num_moves_ == NumCells(); } + +MNKState::MNKState(std::shared_ptr game) : State(game) { + board_.resize(NumRows()); + + for (int r = 0; r < board_.size(); ++r) + board_[r].resize(NumCols(), CellState::kEmpty); +} + +std::string MNKState::ToString() const { + std::string str; + for (int r = 0; r < NumRows(); ++r) { + for (int c = 0; c < NumCols(); ++c) { + absl::StrAppend(&str, StateToString(BoardAt(r, c))); + } + if (r < (NumRows() - 1)) { + absl::StrAppend(&str, "\n"); + } + } + return str; +} + +bool MNKState::IsTerminal() const { + return outcome_ != kInvalidPlayer || IsFull(); +} + +std::vector MNKState::Returns() const { + if (HasLine(Player{0})) { + return {1.0, -1.0}; + } else if (HasLine(Player{1})) { + return {-1.0, 1.0}; + } else { + return {0.0, 0.0}; + } +} + +std::string MNKState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string MNKState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void MNKState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + for (int r = 0; r < NumRows(); ++r) { + for (int c = 0; c < NumCols(); ++c) { + int i = static_cast(board_[r][c]); + int j = CoordinatesToAction(r, c); + values[i * NumCells() + j] = 1.0; + } + } +} + +void MNKState::UndoAction(Player player, Action move) { + auto [r, c] = ActionToCoordinates(move); + board_[r][c] = CellState::kEmpty; + current_player_ = player; + outcome_ = kInvalidPlayer; + num_moves_ -= 1; + history_.pop_back(); + --move_number_; +} + +std::unique_ptr MNKState::Clone() const { + return std::unique_ptr(new MNKState(*this)); +} + +std::string MNKGame::ActionToString(Player player, Action action_id) const { + return absl::StrCat(StateToString(PlayerToState(player)), "(", + action_id / NumCols(), ",", action_id % NumCols(), ")"); +} + +MNKGame::MNKGame(const GameParameters& params) : Game(kGameType, params) {} + +} // namespace mnk +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mnk/mnk.h b/scenarios/bargaining/open_spiel/open_spiel/games/mnk/mnk.h new file mode 100644 index 0000000..25bcd65 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mnk/mnk.h @@ -0,0 +1,142 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_MNK_MNK_H_ +#define OPEN_SPIEL_GAMES_MNK_MNK_H_ + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// m,n,k-game, also known as k-in-a-row game on an m-by-n board: +// https://en.wikipedia.org/wiki/M,n,k-game +// +// Parameters: +// "m" int width of the board (i.e., number of columns) (default = 15) +// "n" int height of the board (i.e., number of rows) (default = 15) +// "k" int k-in-a-row win condition (default = 5) + +namespace open_spiel { +namespace mnk { + +// Constants. +inline constexpr int kNumPlayers = 2; +inline constexpr int kCellStates = 1 + kNumPlayers; // empty, 'x', and 'o'. +inline constexpr int kDefaultNumRows = 15; +inline constexpr int kDefaultNumCols = 15; +inline constexpr int kDefaultNumInARow = 5; + +// State of a cell. +enum class CellState { + kEmpty, + kNought, // O + kCross, // X +}; + +// State of an in-play game. +class MNKState : public State { + public: + MNKState(std::shared_ptr game); // NOLINT + + MNKState(const MNKState&) = default; + MNKState& operator=(const MNKState&) = default; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : current_player_; + } + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action move) override; + std::vector LegalActions() const override; + CellState BoardAt(int cell) const { + auto [row, column] = ActionToCoordinates(cell); + return board_[row][column]; + } + CellState BoardAt(int row, int column) const { return board_[row][column]; } + Player outcome() const { return outcome_; } + std::pair ActionToCoordinates(Action move) const; + int CoordinatesToAction(int row, int column) const; + int NumRows() const; + int NumCols() const; + int NumCells() const; + int NumInARow() const; + + // Only used by Ultimate Tic-Tac-Toe. + void SetCurrentPlayer(Player player) { current_player_ = player; } + + protected: + std::vector> board_; + void DoApplyAction(Action move) override; + + private: + bool HasLine(Player player) const; // Does this player have a line? + bool IsFull() const; // Is the board full? + Player current_player_ = 0; // Player zero goes first + Player outcome_ = kInvalidPlayer; + int num_moves_ = 0; +}; + +// Game object. +class MNKGame : public Game { + public: + explicit MNKGame(const GameParameters& params); + int NumDistinctActions() const override { return NumCells(); } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new MNKState(shared_from_this())); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + std::vector ObservationTensorShape() const override { + return {kCellStates, NumRows(), NumCols()}; + } + int MaxGameLength() const override { return NumCells(); } + std::string ActionToString(Player player, Action action_id) const override; + int NumRows() const { return ParameterValue("n"); } + int NumCols() const { return ParameterValue("m"); } + int NumCells() const { return NumRows() * NumCols(); } + int NumInARow() const { return ParameterValue("k"); } +}; + +CellState PlayerToState(Player player); +std::string StateToString(CellState state); + +// Does this player have a line? +bool BoardHasLine(const std::vector>& board, + const Player player, int k, int r, int c, int dr, int dc); + +bool BoardHasLine(const std::vector>& board, + const Player player, int k); + +inline std::ostream& operator<<(std::ostream& stream, const CellState& state) { + return stream << StateToString(state); +} + +} // namespace mnk +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_MNK_MNK_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/mnk/mnk_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/mnk/mnk_test.cc new file mode 100644 index 0000000..bc8aba0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/mnk/mnk_test.cc @@ -0,0 +1,34 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace mnk { +namespace { + +namespace testing = open_spiel::testing; + +void BasicMNKTests() { + testing::LoadGameTest("mnk"); + testing::NoChanceOutcomesTest(*LoadGame("mnk")); + testing::RandomSimTest(*LoadGame("mnk"), 100); +} + +} // namespace +} // namespace mnk +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::mnk::BasicMNKTests(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/morpion_solitaire/morpion_solitaire.cc b/scenarios/bargaining/open_spiel/open_spiel/games/morpion_solitaire/morpion_solitaire.cc new file mode 100644 index 0000000..e36b2f5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/morpion_solitaire/morpion_solitaire.cc @@ -0,0 +1,351 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "open_spiel/games/morpion_solitaire/morpion_solitaire.h" + +#include +#include +#include +#include + +#include "open_spiel/spiel_utils.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" + +namespace open_spiel { +namespace morpion_solitaire { +namespace { + +// Facts about the game. +const GameType kGameType{/*short_name=*/"morpion_solitaire", + /*long_name=*/"Morpion Solitaire", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/1, + /*min_num_players=*/1, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/false, + /*parameter_specification=*/{}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new MorpionGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +// Line methods ============================================================= +Line::Line(Point p1, Point p2) { Init(p1, p2); } + +// Action encoding (must be changed to support larger boards): +// - 0 - 129 represents lines with direction [0, 1] +// - 130 - 259 represents lines with direction [1, 0] +// - 260 - 359 represents lines with direction [1, -1] +// - 360 - 459 represents lines with direction [1, 1] +Line::Line(Action action) { + int row; + int base; + Point point1; + Point point2; + if (action >= 0 && action <= 129) { + // [0, 1] + row = action / 10; + point1 = Point(row, action - row * 10); + point2 = Point(row, (action - row * 10) + 3); + } else if (action >= 130 && action <= 259) { + // [1, 0] + base = action - 130; + row = (base) / 13; + point1 = Point(row, base - row * 13); + point2 = Point(row + 3, (base - row * 13)); + } else if (action >= 260 && action <= 359) { + // [1, -1] + base = action - 260; + row = (base) / 10; + point1 = Point(row, base - row * 10); + point2 = Point(row + 3, (base - row * 10) + 3); + } else if (action >= 360 && action <= 459) { + // [1, 1] + base = action - 360; + row = (base) / 10; + point1 = Point(row + 3, base - row * 10); + point2 = Point(row, (base - row * 10) + 3); + } else { + SpielFatalError("action provided does not correspond with a move"); + } + Init(point1, point2); +} + +void Line::Init(Point point1, Point point2) { + if (point1 < point2) { + endpoint1_ = point1; + endpoint2_ = point2; + } else { + endpoint1_ = point2; + endpoint2_ = point1; + } + // Categorize line in one of four directions ([0, 1], [1, 1], [1, -1], [1, + // 0]). + direction_[0] = static_cast((endpoint2_.x - endpoint1_.x) / 3); + direction_[1] = static_cast((endpoint2_.y - endpoint1_.y) / 3); + // Get all points in line (beyond the two initial endpoints) and sort. + for (int i = 0; i < 4; i++) { + line_points_.emplace_back(endpoint1_.x + i * direction_[0], + endpoint1_.y + i * direction_[1]); + } + std::sort(line_points_.begin(), line_points_.end()); +} + +bool Line::CheckOverlap(Line l) { + // Only check for overlapping points for lines in the same direction. + if (direction_ != l.GetDirection()) { + return false; + } + // Check if it's the same line. + if ((endpoint1_ == l.GetEndpoints()[0]) && + (endpoint2_ == l.GetEndpoints()[1])) { + return false; + } + // Check for overlapping points between the two lines. + std::vector intersect = {}; + std::vector l_points = l.GetAllPoints(); + std::set_intersection(l_points.begin(), l_points.end(), line_points_.begin(), + line_points_.end(), std::back_inserter(intersect)); + if (!intersect.empty()) { // Line is overlapping if intersection.size() >=1 + // in 4D version. + return true; + } + return false; +} + +bool Line::operator==(Line other_line) { + return (endpoint1_ == other_line.GetEndpoints()[0]) && + (endpoint2_ == other_line.GetEndpoints()[1]); +} + +// Getters +Action Line::GetAction() { + int dirCode; + if ((direction_[0] == 0) && (direction_[1] == 1)) { + dirCode = 1; + } else if ((direction_[0] == 1) && (direction_[1] == 0)) { + dirCode = 2; + } else if ((direction_[0] == 1) && (direction_[1] == 1)) { + dirCode = 3; + } else { + dirCode = 4; + } + // Get action encoding from line endpoints + switch (dirCode) { + // [0, 1] 0 ... 129 + case 1: + return endpoint1_.x * 10 + endpoint1_.y; + + // [1, 0] 130 ... 259 + case 2: + return endpoint1_.x * 13 + endpoint1_.y + 130; + + // [1, 1] 260 ... 359 + case 3: + return endpoint1_.x * 10 + endpoint1_.y + 260; + + // [1, -1] 360 ... 459 + case 4: + return (endpoint2_.x - 3) * 10 + endpoint2_.y + 360; + + default: + SpielFatalError(absl::StrCat("Unhandled case in Line::GetAction()", + ", dirCode = ", dirCode)); + } +} + +std::string Line::ToString() const { + return "(" + endpoint1_.ToString() + " " + endpoint2_.ToString() + ")"; +} + +std::vector Line::GetEndpoints() { + return std::vector{endpoint1_, endpoint2_}; +} + +std::array Line::GetDirection() { return direction_; } + +std::vector Line::GetAllPoints() { return line_points_; } + +// Morpion State methods ==================================================== +void MorpionState::DoApplyAction(Action move) { + Line newMove = *action_map_.at(move); + Point newPoint; + int pos; + for (Point p : newMove.GetAllPoints()) { + pos = p.y + (p.x * kNumRows); + if (board_[pos] == 0) { + board_[pos] = 1; + newPoint = p; + break; + } + } + move_history_.emplace_back(newMove, newPoint); + num_moves_ += 1; + current_returns_ += 1; +} + +std::vector MorpionState::LegalActions() const { + if (IsTerminal()) return {}; + std::vector moves; + for (Line move : current_valid_moves_) { + moves.push_back(move.GetAction()); + } + sort(moves.begin(), moves.end()); + return moves; +} + +std::string MorpionState::ActionToString(Player player, + Action action_id) const { + Line move = *action_map_.at(action_id); + std::string action_str; + for (Point p : move.GetAllPoints()) { + absl::StrAppend(&action_str, p.ToString(), " "); + } + return action_str; +} + +MorpionState::MorpionState(std::shared_ptr game) : State(game) { + // Initialize 4D starting points and find all possible lines on the board + for (int i = 0; i < kNumRows; i++) { + for (int j = 0; j < kNumCols; j++) { + // Initialize starting points on board + if ((i == 3 || i == 9) && j > 4 && j < 8) { + board_[j + (i * kNumRows)] = 1; + } + if ((i == 4 || i == 8) && (j == 5 || j == 7)) { + board_[j + (i * kNumRows)] = 1; + } + if ((i == 5 || i == 7) && ((j > 2 && j < 6) || (j > 6 && j < 10))) { + board_[j + (i * kNumRows)] = 1; + } + if (i == 6 && ((j == 3) || (j == 9))) { + board_[j + (i * kNumRows)] = 1; + } + // Get all possible lines on board (460) + if (j + 3 < kNumCols) { + all_lines_.emplace_back(Point(i, j), Point(i, j + 3)); + } + if ((j + 3 < kNumCols) && (i + 3 < kNumRows)) { + all_lines_.emplace_back(Point(i, j), Point(i + 3, j + 3)); + } + if (i + 3 < kNumRows) { + all_lines_.emplace_back(Point(i, j), Point(i + 3, j)); + } + if ((j >= 3) && (i + 3 < kNumRows)) { + all_lines_.emplace_back(Point(i, j), Point(i + 3, j - 3)); + } + } + } + // For each line, store in a map of action # -> line object. + for (Line& line : all_lines_) { + action_map_[line.GetAction()] = &line; + } +} + +// Generate all valid lines / moves in current board state. +void MorpionState::getAllValidMoves() const { + current_valid_moves_.clear(); + for (Line l : all_lines_) { + // Check that exactly one point is empty. + int count = 0; + for (Point p : l.GetAllPoints()) { + if (board_[p.y + (p.x * kNumRows)] == 1) { + count++; + } + } + if (count != 3) { + continue; + } + // Check that line does not overlap any existing moves / lines. + bool overlaps = false; + for (const std::pair& m : move_history_) { + overlaps = l.CheckOverlap(m.first); + if (overlaps) { + break; + } + } + if (overlaps) { + continue; + } + current_valid_moves_.push_back(l); + } +} + +bool MorpionState::IsTerminal() const { + getAllValidMoves(); + return current_valid_moves_.empty(); +} + +std::vector MorpionState::Rewards() const { + if (move_number_ == 0) { + return {0.0}; + } else { + return {1.0}; + } +} + +std::vector MorpionState::Returns() const { return {current_returns_}; } + +std::string MorpionState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string MorpionState::ToString() const { + std::string str; + for (int i = 0; i < kNumRows; i++) { + for (int j = 0; j < kNumCols; j++) { + absl::StrAppend(&str, board_[i * kNumRows + j]); + } + absl::StrAppend(&str, "\n"); + } + return str; +} + +std::string MorpionState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void MorpionState::UndoAction(Player player, Action move) { + std::pair last_move = move_history_.back(); + board_[last_move.second.x * kNumRows + last_move.second.y] = 0; + move_history_.pop_back(); + num_moves_ -= 1; + history_.pop_back(); + --move_number_; +} + +std::unique_ptr MorpionState::Clone() const { + return std::unique_ptr(new MorpionState(*this)); +} + +MorpionGame::MorpionGame(const GameParameters& params) + : Game(kGameType, params) {} + +} // namespace morpion_solitaire +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/morpion_solitaire/morpion_solitaire.h b/scenarios/bargaining/open_spiel/open_spiel/games/morpion_solitaire/morpion_solitaire.h new file mode 100644 index 0000000..b8a632e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/morpion_solitaire/morpion_solitaire.h @@ -0,0 +1,165 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_MORPION_SOLITAIRE_H_ +#define OPEN_SPIEL_GAMES_MORPION_SOLITAIRE_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// Morpion Solitaire (4D) +// https://en.wikipedia.org/wiki/Join_Five +// http://www.morpionsolitaire.com/ +// Parameters: none + +namespace open_spiel { +namespace morpion_solitaire { + +// Constants. + +// There are only 4 possible states for the max move limit (35), +// and 13x13 is the minimal square grid to fit all 4 solutions. +// http://www.morpionsolitaire.com/English/RecordsGrids4T4D.htm +inline constexpr int kNumRows = 13; +inline constexpr int kNumCols = 13; +inline constexpr int kNumPoints = kNumRows * kNumCols; + +// Support Classes and Structs +// ============================================================= +struct Point { + int x{}, y{}; + Point() = default; + Point(int a, int b) { + this->x = a; + this->y = b; + } + + bool operator==(const Point& other_point) const { + return (x == other_point.x) && (y == other_point.y); + } + + bool operator<(const Point& other_point) const { + if (x < other_point.x) { + return true; + } else if (x == other_point.x) { + if (y < other_point.y) { + return true; + } + } + return false; + } + + std::string ToString() const { return absl::StrCat("[", x, ",", y, "]"); } +}; + +class Line { + public: + Line(Point p1, Point p2); + explicit Line(Action action); + + bool operator==(Line other_line); + + // Getters and setters + std::vector GetEndpoints(); + std::array GetDirection(); + std::vector GetAllPoints(); + Action GetAction(); + bool CheckOverlap(Line l); + std::string ToString() const; + + private: + void Init(Point point1, Point point2); + std::array + direction_{}; // One of 4 line directions (0,0), (1,0), (1,1), (1,-1) + Point endpoint1_; + Point endpoint2_; + std::vector line_points_; // Collection of all 4 points on a line +}; + +// State of an in-play game. +class MorpionState : public State { + public: + // Constructors + MorpionState(const MorpionState&) = default; + explicit MorpionState(std::shared_ptr game); + + MorpionState& operator=(const MorpionState&) = default; + + // Overridden Methods + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : kDefaultPlayerId; + } + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + std::string ObservationString(Player player) const override; + bool IsTerminal() const override; + std::vector Rewards() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + void UndoAction(Player player, Action move) override; + + protected: + void getAllValidMoves() const; + void DoApplyAction(Action move) override; + + private: + std::array board_{}; + std::vector all_lines_; + mutable std::vector current_valid_moves_; + int num_moves_ = 0; + double current_returns_{}; + std::vector> + move_history_; // Stores both Line and new Point created during move + std::unordered_map action_map_; // Maps action encoding to Line +}; + +// Game object. +class MorpionGame : public Game { + public: + explicit MorpionGame(const GameParameters& params); + + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new MorpionState(shared_from_this())); + } + + // Number of distinct actions equals all possible lines drawn on the board. + // Given 13x13 grid (see above), 4 points per line (4D), + // For line directions [0, 1], [1, 0]: 10 possible lines (13 - 3) per row and + // column. For line directions [1, -1], [1, 1]: + // - 10 lines (13 - 3) down center diagonal + // - 2 x (9 + 8 + .. 1) for other diagonals + // In total (10 * 13 * 2) + 2 * (10 + (2 * (9 + 8 +... 1))) = 460 + int NumDistinctActions() const override { return 460; } + + // 4D fully solved by enumeration in 2008, with max 35 moves. + // http://www.morpionsolitaire.com/English/Enumeration.htm + // http://oeis.org/A204109 + int MaxGameLength() const override { return 35; } + + int NumPlayers() const override { return 1; } + double MinUtility() const override { return 0; } + double MaxUtility() const override { return MaxGameLength(); } +}; + +} // namespace morpion_solitaire +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_MORPION_SOLITAIRE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/morpion_solitaire/morpion_solitaire_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/morpion_solitaire/morpion_solitaire_test.cc new file mode 100644 index 0000000..1db8383 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/morpion_solitaire/morpion_solitaire_test.cc @@ -0,0 +1,56 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/morpion_solitaire/morpion_solitaire.h" + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace morpion_solitaire { +namespace { + +namespace testing = open_spiel::testing; + +void BasicMorpionTests() { + testing::LoadGameTest("morpion_solitaire"); + testing::RandomSimTest(*LoadGame("morpion_solitaire"), 10); +} + +void MoveConversionTest() { + Line line = Line(Point(4, 5), Point(1, 8)); + SPIEL_CHECK_EQ(line.GetAction(), 375); + line = Line(Point(9, 3), Point(9, 6)); + SPIEL_CHECK_EQ(line.GetAction(), 93); +} + +void LineOverlapsTest() { + Line line = Line(Point(5, 2), Point(2, 5)); + SPIEL_CHECK_EQ(line.CheckOverlap(Line(Point(6, 1), Point(3, 4))), true); + SPIEL_CHECK_EQ(line.CheckOverlap(Line(Point(3, 4), Point(0, 7))), true); + SPIEL_CHECK_EQ(line.CheckOverlap(Line(Point(4, 3), Point(7, 3))), false); + line = Line(Point(7, 4), Point(10, 7)); + SPIEL_CHECK_EQ(line.CheckOverlap(Line(Point(7, 2), Point(7, 5))), false); + SPIEL_CHECK_EQ(line.CheckOverlap(Line(Point(5, 2), Point(8, 5))), true); +} + +} // namespace +} // namespace morpion_solitaire +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::morpion_solitaire::BasicMorpionTests(); + open_spiel::morpion_solitaire::MoveConversionTest(); + open_spiel::morpion_solitaire::LineOverlapsTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/negotiation/negotiation.cc b/scenarios/bargaining/open_spiel/open_spiel/games/negotiation/negotiation.cc new file mode 100644 index 0000000..96be037 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/negotiation/negotiation.cc @@ -0,0 +1,804 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/negotiation/negotiation.h" + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/poisson_distribution.h" +#include "open_spiel/abseil-cpp/absl/random/uniform_int_distribution.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace negotiation { + +namespace { + +// Facts about the game +// const GameType kGameType{ +// /*short_name=*/"negotiation", +// /*long_name=*/"Negotiation", +// GameType::Dynamics::kSequential, +// GameType::ChanceMode::kExplicitStochastic, +// GameType::Information::kImperfectInformation, +// GameType::Utility::kGeneralSum, +// GameType::RewardModel::kTerminal, +// /*max_num_players=*/2, +// /*min_num_players=*/2, +// /*provides_information_state_string=*/true, +// /*provides_information_state_tensor=*/true, +// /*provides_observation_string=*/true, +// /*provides_observation_tensor=*/true, +// /*parameter_specification=*/ +// { +// {"num_items", GameParameter(kDefaultNumItems)}, +// {"quantity_mean", GameParameter(kDefaultQuantityMean)}, +// {"quantity_stddev", GameParameter(kDefaultQuantityStddev)}, +// {"min_quantity", GameParameter(kDefaultMinQuantity)}, +// {"max_quantity", GameParameter(kDefaultMaxQuantity)}, +// {"max_rounds", GameParameter(kDefaultMaxRounds)}, +// {"seed", GameParameter(kDefaultSeed)}, +// {"item_quantities", GameParameter(kDefaultItemQuantities)}, +// }}; + +static std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new NegotiationGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +std::string TurnTypeToString(TurnType turn_type) { + if (turn_type == TurnType::kProposal) { + return "Proposal"; + } else if (turn_type == TurnType::kUtterance) { + return "Utterance"; + } else { + SpielFatalError("Unrecognized turn type"); + } +} +} // namespace + +std::string NegotiationState::ActionToString(Player player, + Action move_id) const { + if (player == kChancePlayerId) { + return absl::StrCat("chance outcome ", move_id); + } + + // Use NumDistinctProposals() as the walk away action ID + if (move_id == parent_game_.NumDistinctProposals()) { + return absl::StrCat("Walk away (get ", walk_away_values_[player], " points)"); + } + + std::string action_string = ""; + if (turn_type_ == TurnType::kProposal) { + if (move_id == parent_game_.NumDistinctProposals() - 1) { + absl::StrAppend(&action_string, "Proposal: Agreement reached!"); + } else { + std::vector proposal = DecodeProposal(move_id); + std::string prop_str = absl::StrJoin(proposal, ", "); + absl::StrAppend(&action_string, "Proposal: [", prop_str, "]"); + } + } else { + std::vector utterance = DecodeUtterance(move_id); + std::string utt_str = absl::StrJoin(utterance, ", "); + absl::StrAppend(&action_string, ", Utterance: [", utt_str, "]"); + } + return action_string; +} + +bool NegotiationState::IsTerminal() const { + // If utterances are enabled, force the agent to utter something even when + // they accept the proposal or run out of steps (i.e. on ther last turn). + bool utterance_check = + (enable_utterances_ ? utterances_.size() == proposals_.size() : true); + return (agreement_reached_ || proposals_.size() >= max_steps_) && + utterance_check; +} + +std::vector NegotiationState::Returns() const { + if (!IsTerminal()) { + return std::vector(num_players_, 0.0); + } + + // If no agreement was reached (game ended due to max steps), give everyone their walk away values + if (!agreement_reached_) { + std::vector returns; + returns.reserve(walk_away_values_.size()); + for (int value : walk_away_values_) { + returns.push_back(static_cast(value)); + } + // Apply discount based on complete rounds - add one extra round for no agreement + if (num_steps_ > 2) { + // Calculate complete rounds - it should be (num_steps_ - 2) / 2 integer division + int complete_rounds = (num_steps_ - 2) / 2; // Integer division, floors the result + // Add one extra round of discount for no agreement + double discount = std::pow(parent_game_.discount(), complete_rounds + 1); + for (int i = 0; i < num_players_; ++i) { + returns[i] *= discount; + } + } else { + // Even for very short games, apply one round of discount for no agreement + double discount = parent_game_.discount(); + for (int i = 0; i < num_players_; ++i) { + returns[i] *= discount; + } + } + return returns; + } + + // Check if the last action was a walk away action + if (walk_away_) { + std::vector returns; + returns.reserve(walk_away_values_.size()); + for (int value : walk_away_values_) { + returns.push_back(static_cast(value)); + } + // Apply discount based on complete rounds + if (num_steps_ > 2) { + // Calculate complete rounds - it should be (num_steps_ - 2) / 2 integer division + int complete_rounds = (num_steps_ - 2) / 2; // Integer division, floors the result + double discount = std::pow(parent_game_.discount(), complete_rounds); + for (int i = 0; i < num_players_; ++i) { + returns[i] *= discount; + } + } + return returns; + } + + // Calculate rewards from accepted proposal + int proposing_player = proposals_.size() % 2 == 1 ? 0 : 1; + int other_player = 1 - proposing_player; + const std::vector& final_proposal = proposals_.back(); + + // Calculate utilities for both players + std::vector returns(num_players_); + for (int i = 0; i < num_items_; ++i) { + returns[proposing_player] += final_proposal[i] * agent_utils_[proposing_player][i]; + returns[other_player] += (item_pool_[i] - final_proposal[i]) * agent_utils_[other_player][i]; + } + + // Apply discount based on complete rounds + if (num_steps_ > 2) { + // Calculate complete rounds - it should be (num_steps_ - 2) / 2 integer division + int complete_rounds = (num_steps_ - 2) / 2; // Integer division, floors the result + double discount = std::pow(parent_game_.discount(), complete_rounds); + for (int i = 0; i < num_players_; ++i) { + returns[i] *= discount; + } + } + + return returns; +} + +std::string NegotiationState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + if (IsChanceNode()) { + return "ChanceNode -- no observation"; + } + + std::string str = absl::StrCat("Max steps: ", max_steps_, "\n"); + absl::StrAppend(&str, "Item pool: ", absl::StrJoin(item_pool_, " "), "\n"); + + if (!agent_utils_.empty()) { + absl::StrAppend(&str, "Agent ", player, + " util vec: ", absl::StrJoin(agent_utils_[player], " "), + "\n"); + } + + // Add walk away value to observation + absl::StrAppend(&str, "Walk away value: ", walk_away_values_[player], "\n"); + + absl::StrAppend(&str, "Current player: ", CurrentPlayer(), "\n"); + absl::StrAppend(&str, "Turn Type: ", TurnTypeToString(turn_type_), "\n"); + + if (!proposals_.empty()) { + absl::StrAppend(&str, "Most recent proposal: [", + absl::StrJoin(proposals_.back(), ", "), "]\n"); + } + + if (!utterances_.empty()) { + absl::StrAppend(&str, "Most recent utterance: [", + absl::StrJoin(utterances_.back(), ", "), "]\n"); + } + + return str; +} + +// New structure: +// [Current player (2) | Turn type (2) | Terminal status (2) | +// Round number (1) | Base discount factor (1) | Current round discount (1) | +// Item pool (num_items) | Utilities (num_items) | +// Walk away value (1) | // Only include the observing player's walk away value +// Proposal history (max_rounds * 2 * num_items)] +std::vector NegotiationGame::ObservationTensorShape() const { + // New structure: + // [Current player (2) | Turn type (2) | Terminal status (2) | + // Round number (1) | Base discount factor (1) | Current round discount (1) | + // Item pool (num_items) | Utilities (num_items) | + // Walk away value (1) | // Only include the observing player's walk away value + // Proposal history (max_proposals * num_items)] + + // Calculate max_proposals consistently with how max_steps_ is determined in the state. + // If max_rounds parameter is set (>0), use max_rounds * 2. + // Otherwise, the state samples max_steps_ (max proposals) directly between 4 and 10. + // Use the maximum possible value (10) for shape calculation. + int max_proposals_for_shape = (max_rounds_ > 0) ? (max_rounds_ * 2) : 10; // Max proposals + + return {kNumPlayers + 2 + 2 + + 1 + 1 + 1 + + num_items_ + num_items_ + + 1 + // Only include the observing player's walk away value + max_proposals_for_shape * num_items_}; // Space for proposal history +} + +void NegotiationState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + SPIEL_CHECK_EQ(values.size(), parent_game_.ObservationTensorSize()); + std::fill(values.begin(), values.end(), 0); + + // No observations at chance nodes. + if (IsChanceNode()) { + return; + } + + int offset = 0; + + // Current player - still using one-hot encoding (2 values) + if (!IsTerminal()) { + values[offset + CurrentPlayer()] = 1; + } + offset += kNumPlayers; + + // Current turn type - still using one-hot encoding (2 values) + if (turn_type_ == TurnType::kProposal) { + values[offset] = 1; + } else { + values[offset + 1] = 1; + } + offset += 2; + + // Terminal status - still using one-hot encoding (2 values) + values[offset] = IsTerminal() ? 1 : 0; + values[offset + 1] = agreement_reached_ ? 1 : 0; + offset += 2; + + // Current round number (1 value) + int current_round = (num_steps_ > 0) ? (num_steps_ - 1) / 2 : 0; + values[offset] = static_cast(current_round); + offset += 1; + + // Base discount factor of the game (1 value) + values[offset] = static_cast(parent_game_.discount()); + offset += 1; + + // Current round's applied discount factor (1 value) + double current_discount = 1.0; + if (num_steps_ > 2) { + int complete_rounds = (num_steps_ - 2) / 2; + current_discount = std::pow(parent_game_.discount(), complete_rounds); + } + // Ensure the discount factor is stored properly as a float + values[offset] = static_cast(current_discount); + offset += 1; + + // Item pool - direct values (num_items values) + for (int i = 0; i < num_items_; ++i) { + values[offset + i] = static_cast(item_pool_[i]); + } + offset += num_items_; + + // Player utilities - direct values (num_items values) + for (int i = 0; i < num_items_; ++i) { + values[offset + i] = static_cast(agent_utils_[player][i]); + } + offset += num_items_; + + // Walk away value for the observing player only (1 value) + values[offset] = static_cast(walk_away_values_[player]); + offset += 1; + + // Proposal history (max_steps_ * num_items values) + // Initialize all to -1 (indicating no proposal) + for (int i = 0; i < max_steps_ * num_items_; ++i) { + values[offset + i] = -1; + } + + // Fill in proposals that have been made + for (int p = 0; p < proposals_.size() && p < max_steps_; ++p) { + for (int i = 0; i < num_items_; ++i) { + values[offset + p * num_items_ + i] = static_cast(proposals_[p][i]); + } + } + + // Correctly calculate the expected size based on the maximum possible steps used for allocation. + int max_proposals_for_shape = (parent_game_.MaxRounds() > 0) ? (parent_game_.MaxRounds() * 2) : 10; + SPIEL_CHECK_EQ(offset + max_proposals_for_shape * num_items_, values.size()); +} + +NegotiationState::NegotiationState(std::shared_ptr game) + : State(game), + parent_game_(static_cast(*game)), + enable_proposals_(parent_game_.EnableProposals()), + enable_utterances_(parent_game_.EnableUtterances()), + num_items_(parent_game_.NumItems()), + num_symbols_(parent_game_.NumSymbols()), + utterance_dim_(parent_game_.UtteranceDim()), + num_steps_(0), + max_steps_(-1), + agreement_reached_(false), + cur_player_(kChancePlayerId), + turn_type_(TurnType::kProposal), + discount_(1.0), // Initialize discount to 1.0 + item_pool_({}), + agent_utils_({}), + proposals_({}), + utterances_({}), + walk_away_values_({}) {} + +int NegotiationState::CurrentPlayer() const { + return IsTerminal() ? kTerminalPlayerId : cur_player_; +} + +// From Sec 2.1 of the paper: "At each round (i) an item pool is sampled +// uniformly, instantiating a quantity (between 0 and 5) for each of the types +// and represented as a vector i \in {0...5}^3 and (ii) each agent j receives a +// utility function sampled uniformly, which specifies how rewarding one unit of +// each item is (with item rewards between 0 and 10, and with the constraint +// that there is at least one item with non-zero utility), represented as a +// vector u_j \in {0...10}^3". +void NegotiationState::DetermineItemPoolAndUtilities() { + // Clear existing values + item_pool_.clear(); + agent_utils_.clear(); + walk_away_values_.clear(); + + // Use the configured max_rounds if available, otherwise sample it + if (parent_game_.MaxRounds() > 0) { + // Set directly from the parameter + max_steps_ = parent_game_.MaxRounds() * 2; // Each round has 2 steps (player 0, player 1) + } else { + // Generate max number of rounds (max number of steps for the episode): we + // sample N between 4 and 10 at the start of each episode, according to a + // truncated Poissondistribution with mean 7, as done in the Cao et al. '18 + // paper. + max_steps_ = -1; + absl::poisson_distribution steps_dist(7.0); + while (!(max_steps_ >= 4 && max_steps_ <= 10)) { + max_steps_ = steps_dist(*parent_game_.RNG()); + } + } + + // Generate the pool of items + if (!parent_game_.ItemQuantities().empty()) { + // Use specified quantities + std::vector quantities = absl::StrSplit(parent_game_.ItemQuantities(), ','); + SPIEL_CHECK_EQ(quantities.size(), num_items_); + for (const std::string& quantity_str : quantities) { + int quantity; + SPIEL_CHECK_TRUE(absl::SimpleAtoi(quantity_str, &quantity)); + SPIEL_CHECK_GE(quantity, parent_game_.MinQuantity()); + SPIEL_CHECK_LE(quantity, parent_game_.MaxQuantity()); + item_pool_.push_back(quantity); + } + } else { + // Generate random quantities using Poisson distribution + absl::poisson_distribution quantity_dist(parent_game_.QuantityMean()); + for (int i = 0; i < num_items_; ++i) { + // Generate the quantity with Poisson distribution, resampling if it exceeds max_quantity. + int quantity = -1; + while (quantity < 0 || quantity > parent_game_.MaxQuantity()) { + quantity = quantity_dist(*parent_game_.RNG()); + // Ensure quantity is not negative, although Poisson should be non-negative. + if (quantity < 0) quantity = 0; + } + item_pool_.push_back(quantity); + } + } + + // Generate agent utilities. + absl::uniform_int_distribution util_dist(parent_game_.MinValue(), parent_game_.MaxValue()); + for (int i = 0; i < num_players_; ++i) { + agent_utils_.push_back({}); + int sum_util = 0; + while (sum_util == 0) { + agent_utils_[i].clear(); + for (int j = 0; j < num_items_; ++j) { + agent_utils_[i].push_back(util_dist(*parent_game_.RNG())); + sum_util += agent_utils_[i].back(); + } + } + } + + // Generate walk away values + walk_away_values_.resize(num_players_); + for (int i = 0; i < num_players_; ++i) { + // Calculate maximum possible utility for this player + int max_utility = 0; + for (int j = 0; j < num_items_; ++j) { + max_utility += agent_utils_[i][j] * item_pool_[j]; + } + // Generate random walk away value between 1 and max_utility + if (max_utility > 1) { + absl::uniform_int_distribution walk_away_dist(1, max_utility); + walk_away_values_[i] = walk_away_dist(*parent_game_.RNG()); + } else { + walk_away_values_[i] = 1; // If max_utility is 1, just use 1 + } + } +} + +void NegotiationState::InitializeEpisode() { + num_steps_ = 0; + agreement_reached_ = false; + walk_away_ = false; // Reset walk away flag + cur_player_ = 0; + turn_type_ = TurnType::kProposal; + proposals_.clear(); + utterances_.clear(); + + // Generate new item pool and utilities + DetermineItemPoolAndUtilities(); +} + +void NegotiationState::DoApplyAction(Action move_id) { + if (IsChanceNode()) { + DetermineItemPoolAndUtilities(); + cur_player_ = 0; + turn_type_ = TurnType::kProposal; + } else { + // Use NumDistinctProposals() as the walk away action ID + if (move_id == parent_game_.NumDistinctProposals()) { + walk_away_ = true; + agreement_reached_ = true; + } else if (move_id == parent_game_.NumDistinctProposals() - 1) { // Agreement action ID + walk_away_ = false; // Explicitly set walk away to false for accept + agreement_reached_ = true; + } else { + if (turn_type_ == TurnType::kProposal) { + proposals_.push_back(DecodeProposal(move_id)); + } else { + utterances_.push_back(DecodeUtterance(move_id)); + } + } + + // Switch players and turn types + if (turn_type_ == TurnType::kProposal) { + if (enable_utterances_) { + turn_type_ = TurnType::kUtterance; + } else { + cur_player_ = 1 - cur_player_; + turn_type_ = TurnType::kProposal; + } + } else { + cur_player_ = 1 - cur_player_; + turn_type_ = TurnType::kProposal; + } + } + num_steps_++; // Increment step counter +} + +bool NegotiationState::NextProposal(std::vector* proposal) const { + // Starting from the right, move left trying to increase the value. When + // successful, increment the value and set all the right digits back to 0. + for (int i = num_items_ - 1; i >= 0; --i) { + if ((*proposal)[i] + 1 <= item_pool_[i]) { + // Success! + (*proposal)[i]++; + for (int j = i + 1; j < num_items_; ++j) { + (*proposal)[j] = 0; + } + return true; + } + } + + return false; +} + +std::vector NegotiationState::DecodeInteger(int encoded_value, + int dimensions, + int num_digit_values) const { + std::vector decoded(dimensions, 0); + int i = dimensions - 1; + while (encoded_value > 0) { + SPIEL_CHECK_GE(i, 0); + SPIEL_CHECK_LT(i, dimensions); + decoded[i] = encoded_value % num_digit_values; + encoded_value /= num_digit_values; + i--; + } + return decoded; +} + +int NegotiationState::EncodeInteger(const std::vector& container, + int num_digit_values) const { + int encoded_value = 0; + for (int digit : container) { + encoded_value = encoded_value * num_digit_values + digit; + } + return encoded_value; +} + +Action NegotiationState::EncodeProposal( + const std::vector& proposal) const { + SPIEL_CHECK_EQ(proposal.size(), num_items_); + return EncodeInteger(proposal, parent_game_.MaxQuantity() + 1); +} + +Action NegotiationState::EncodeUtterance( + const std::vector& utterance) const { + SPIEL_CHECK_EQ(utterance.size(), utterance_dim_); + // Utterance ids are offset from zero (starting at NumDistinctProposals()). + return parent_game_.NumDistinctProposals() + + EncodeInteger(utterance, num_symbols_); +} + +std::vector NegotiationState::DecodeProposal(int encoded_proposal) const { + return DecodeInteger(encoded_proposal, num_items_, parent_game_.MaxQuantity() + 1); +} + +std::vector NegotiationState::DecodeUtterance( + int encoded_utterance) const { + // Utterance ids are offset from zero (starting at NumDistinctProposals()). + return DecodeInteger(encoded_utterance - parent_game_.NumDistinctProposals(), + utterance_dim_, num_symbols_); +} + +std::vector NegotiationState::LegalActions() const { + if (IsChanceNode()) { + return LegalChanceOutcomes(); + } else if (IsTerminal()) { + return {}; + } else if (turn_type_ == TurnType::kProposal) { + std::vector legal_actions; + + // Proposals are always enabled, so first contruct them. + std::vector proposal(num_items_, 0); + // Initial proposal [0, 0, ...] is always valid w.r.t max_quantity + legal_actions.push_back(EncodeProposal(proposal)); + + while (NextProposal(&proposal)) { + // Since DetermineItemPoolAndUtilities now ensures item_pool_[i] <= max_quantity_, + // and NextProposal respects item_pool_, any generated proposal is valid. + legal_actions.push_back(EncodeProposal(proposal)); + } + + if (!proposals_.empty()) { + // Add the agreement action only if there's been at least one proposal. + // Agreement action ID is NumDistinctProposals - 1 + legal_actions.push_back(parent_game_.NumDistinctProposals() - 1); + } + + // Add walk away action. ID is NumDistinctProposals. + legal_actions.push_back(parent_game_.NumDistinctProposals()); + + return legal_actions; + } else { + SPIEL_CHECK_TRUE(enable_utterances_); + SPIEL_CHECK_FALSE(parent_game_.LegalUtterances().empty()); + return parent_game_.LegalUtterances(); + } +} + +std::vector> NegotiationState::ChanceOutcomes() + const { + SPIEL_CHECK_TRUE(IsChanceNode()); + // The game has chance mode kSampledStochastic, so there is only a single + // outcome, and it's all randomized in the ApplyAction. + std::vector> outcomes = {std::make_pair(0, 1.0)}; + return outcomes; +} + +std::string NegotiationState::ToString() const { + if (IsChanceNode()) { + return "Initial chance node"; + } + + std::string str = absl::StrCat("Max steps: ", max_steps_, "\n"); + absl::StrAppend(&str, "Item pool: ", absl::StrJoin(item_pool_, " "), "\n"); + + if (!agent_utils_.empty()) { + for (int i = 0; i < num_players_; ++i) { + absl::StrAppend(&str, "Agent ", i, + " util vec: ", absl::StrJoin(agent_utils_[i], " "), "\n"); + } + } + + absl::StrAppend(&str, "Current player: ", cur_player_, "\n"); + absl::StrAppend(&str, "Turn Type: ", TurnTypeToString(turn_type_), "\n"); + + for (int i = 0; i < proposals_.size(); ++i) { + absl::StrAppend(&str, "Player ", i % 2, " proposes: [", + absl::StrJoin(proposals_[i], ", "), "]"); + if (enable_utterances_ && i < utterances_.size()) { + absl::StrAppend(&str, " utters: [", absl::StrJoin(utterances_[i], ", "), + "]"); + } + absl::StrAppend(&str, "\n"); + } + + if (agreement_reached_) { + absl::StrAppend(&str, "Agreement reached!\n"); + } + + return str; +} + +std::unique_ptr NegotiationState::Clone() const { + return std::unique_ptr(new NegotiationState(*this)); +} + +NegotiationGame::NegotiationGame(const GameParameters& params) + : Game(kGameType, params), + enable_proposals_( + ParameterValue("enable_proposals", kDefaultEnableProposals)), + enable_utterances_( + ParameterValue("enable_utterances", kDefaultEnableUtterances)), + num_items_(ParameterValue("num_items", kDefaultNumItems)), + num_symbols_(ParameterValue("num_symbols", kDefaultNumSymbols)), + utterance_dim_( + ParameterValue("utterance_dim", kDefaultUtteranceDim)), + seed_(ParameterValue("rng_seed", kDefaultSeed)), + discount_(ParameterValue("discount", kDefaultDiscount)), + min_quantity_(ParameterValue("min_quantity", kDefaultMinQuantity)), + max_quantity_(ParameterValue("max_quantity", kDefaultMaxQuantity)), + min_value_(ParameterValue("min_value", kDefaultMinValue)), + max_value_(ParameterValue("max_value", kDefaultMaxValue)), + quantity_mean_(ParameterValue("quantity_mean", kDefaultQuantityMean)), + max_rounds_(ParameterValue("max_rounds", kDefaultMaxRounds)), + item_quantities_(ParameterValue("item_quantities", kDefaultItemQuantities)), + legal_utterances_({}) { + // Use a time-based random seed when none is provided + if (seed_ < 0) { + // Use current time as seed for true randomness when no seed is provided + rng_ = std::make_unique(std::random_device{}()); + } else { + // Use the provided seed for deterministic behavior + rng_ = std::make_unique(seed_); + } + ConstructLegalUtterances(); +} + +void NegotiationGame::ConstructLegalUtterances() { + if (enable_utterances_) { + legal_utterances_.resize(NumDistinctUtterances()); + for (int i = 0; i < NumDistinctUtterances(); ++i) { + legal_utterances_[i] = NumDistinctProposals() + i; + } + } +} + +int NegotiationGame::MaxGameLength() const { + // max_steps_ in the state represents the maximum number of proposals allowed for the episode. + // Calculate the maximum possible value for this across episodes. + int max_proposals = (max_rounds_ > 0) ? (max_rounds_ * 2) : 10; // Max proposals + + // Calculate the maximum number of player actions (proposals + potential utterances) + int max_player_actions = max_proposals; + if (enable_utterances_) { + max_player_actions += max_proposals; // Add one utterance action for each proposal action + } + + // Add 1 for the initial chance node action. + return max_player_actions + 1; +} + +int NegotiationGame::NumDistinctUtterances() const { + return static_cast(std::pow(num_symbols_, utterance_dim_)); +} + +int NegotiationGame::NumDistinctProposals() const { + // Every slot can hold { 0, 1, ..., MaxQuantity }, and there is an extra + // one at the end for the special "agreement reached" action. + return static_cast(std::pow(max_quantity_ + 1, num_items_)) + 1; +} + +std::string NegotiationState::Serialize() const { + if (IsChanceNode()) { + return "chance"; + } else { + std::string state_str = ""; + absl::StrAppend(&state_str, MaxSteps(), "\n"); + absl::StrAppend(&state_str, absl::StrJoin(ItemPool(), " "), "\n"); + for (int p = 0; p < NumPlayers(); ++p) { + absl::StrAppend(&state_str, absl::StrJoin(AgentUtils()[p], " "), "\n"); + } + absl::StrAppend(&state_str, HistoryString(), "\n"); + return state_str; + } +} + +std::unique_ptr NegotiationGame::DeserializeState( + const std::string& str) const { + if (str == "chance") { + return NewInitialState(); + } else { + std::vector lines = absl::StrSplit(str, '\n'); + std::unique_ptr state = NewInitialState(); + SPIEL_CHECK_EQ(lines.size(), 5); + NegotiationState& nstate = static_cast(*state); + // Take the chance action, but then reset the quantities. Make sure game's + // RNG state is not advanced during deserialization so copy it beforehand + // in order to be able to restore after the chance action. + std::unique_ptr rng = std::make_unique(*rng_); + nstate.ApplyAction(0); + rng_ = std::move(rng); + nstate.ItemPool().clear(); + nstate.AgentUtils().clear(); + // Max steps + nstate.SetMaxSteps(std::stoi(lines[0])); + // Item pool. + std::vector parts = absl::StrSplit(lines[1], ' '); + for (const auto& part : parts) { + nstate.ItemPool().push_back(std::stoi(part)); + } + // Agent utilities. + for (Player player : {0, 1}) { + parts = absl::StrSplit(lines[2 + player], ' '); + nstate.AgentUtils().push_back({}); + for (const auto& part : parts) { + nstate.AgentUtils()[player].push_back(std::stoi(part)); + } + } + nstate.SetCurrentPlayer(0); + // Actions. + if (lines.size() == 5) { + parts = absl::StrSplit(lines[4], ' '); + // Skip the first one since it is the chance node. + for (int i = 1; i < parts.size(); ++i) { + Action action = static_cast(std::stoi(parts[i])); + nstate.ApplyAction(action); + } + } + return state; + } +} + +std::string NegotiationGame::GetRNGState() const { + std::ostringstream rng_stream; + rng_stream << *rng_; + return rng_stream.str(); +} + +void NegotiationGame::SetRNGState(const std::string& rng_state) const { + if (rng_state.empty()) return; + std::istringstream rng_stream(rng_state); + rng_stream >> *rng_; +} + +std::unique_ptr NegotiationGame::NewInitialState() const { + // If a seed was provided, reset the RNG to ensure consistent game configurations + if (seed_ >= 0) { + rng_ = std::make_unique(seed_); + } + return std::unique_ptr(new NegotiationState(shared_from_this())); +} + +} // namespace negotiation +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/negotiation/negotiation.h b/scenarios/bargaining/open_spiel/open_spiel/games/negotiation/negotiation.h new file mode 100644 index 0000000..94daa21 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/negotiation/negotiation.h @@ -0,0 +1,284 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_NEGOTIATION_H_ +#define OPEN_SPIEL_GAMES_NEGOTIATION_H_ + +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// A simple negotiation game where agents propose splits of a group of items, +// until the maximum number of turns runs out or an offer is accepted. + +// This game is inspired by the following papers: +// - DeVault et al., Toward natural turn-taking in a virtual human negotiation +// agent, 2015. +// - Lewis et al., Deal or no deal? End-to-end learning of negotiation +// dialogues, 2017. +// - Cao et al., Emergent Communication through Negotiation, 2018. +// https://arxiv.org/abs/1804.03980 +// +// We use the specific description in Cao et al. 2018. However, we choose +// default settings that lead to a smaller game since the values used in the +// paper (utterance_dim = 6, num_symbols = 10) could lead to legal action sizes +// of 2.17 * 10^8. +// TODO(author5): fix this restriction by either (i) adding support for legal +// action iterators rather than lists, or (ii) supporting structured actions, +// (or both!). +// +// Parameters: +// "enable_proposals" bool open the proposal channel (default = true) +// "enable_utterances" bool open the linguistic channel (default = true) +// "num_items" int number of distinct items (default = 5) +// "num_symbols" int number of distinct symbols (default = 5) +// "rng_seed" int seed for the random number generator +// (default -1 = not set, seeded by clock) +// "utterance_dim" int dimensionality of the utterances, i.e. number +// of symbols per utterance (default = 3) +// "discount" double discount factor applied after turn 2 +// (default = 1.0) + +namespace open_spiel { +namespace negotiation { + +inline constexpr bool kDefaultEnableProposals = true; +inline constexpr bool kDefaultEnableUtterances = true; +inline constexpr int kDefaultNumSymbols = 5; +inline constexpr int kDefaultUtteranceDim = 3; +inline constexpr int kDefaultMinQuantity = 0; +inline constexpr int kDefaultMaxQuantity = 5; +inline constexpr int kDefaultMinValue = 0; +inline constexpr int kDefaultMaxValue = 10; +inline constexpr double kDefaultQuantityMean = 3.0; +inline constexpr int kMaxSteps = 10; +inline constexpr int kDefaultMaxRounds = 5; // Default max rounds +inline constexpr int kNumPlayers = 2; +inline constexpr int kDefaultNumItems = 3; +inline constexpr int kDefaultSeed = -1; +inline constexpr double kDefaultDiscount = 1.0; +inline constexpr const char* kDefaultItemQuantities = ""; // Default empty string means random quantities + +// The utterances and proposals are done in separate repeated turns by the same +// agent. This enum is used to keep track what the type of turn it is. +enum class TurnType { kUtterance, kProposal }; + +class NegotiationGame; + +class NegotiationState : public State { + public: + NegotiationState(std::shared_ptr game); + NegotiationState(const NegotiationState&) = default; + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action move_id) const override; + std::vector> ChanceOutcomes() const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + + const std::vector& ItemPool() const { return item_pool_; } + const std::vector>& AgentUtils() const { + return agent_utils_; + } + std::vector& ItemPool() { return item_pool_; } + std::vector>& AgentUtils() { return agent_utils_; } + void SetCurrentPlayer(Player p) { cur_player_ = p; } + int MaxSteps() const { return max_steps_; } + void SetMaxSteps(int max_steps) { max_steps_ = max_steps; } + std::string Serialize() const override; + int WalkAwayValue(Player player) const { return walk_away_values_[player]; } + const std::vector>& Proposals() const { return proposals_; } + std::vector DecodeProposal(int encoded_proposal) const; + + protected: + void DoApplyAction(Action move_id) override; + + private: + // Initialize pool of items and agent utilities. + void DetermineItemPoolAndUtilities(); + + // Initialize state variables to start an episode. + void InitializeEpisode(); + + // Get the next valid proposal; returns false when there are no more. + bool NextProposal(std::vector* proposal) const; + + // Action encoding and decoding helpers. Actions are encoded as follows: + // the first values { 0, 1, ... , NumDistinctProposals() - 1 } are reserved + // for proposals, encoded in the usual way (fixed base). The next + // NumDistinctUtterances() values are reserved for utterances, so these begin + // at an offset of NumDistinctProposals(). + Action EncodeProposal(const std::vector& proposal) const; + Action EncodeUtterance(const std::vector& utterance) const; + std::vector DecodeUtterance(int encoded_utterance) const; + + std::vector DecodeInteger(int encoded_value, int dimensions, + int num_digit_values) const; + int EncodeInteger(const std::vector& container, + int num_digit_values) const; + + const NegotiationGame& parent_game_; + bool enable_proposals_; + bool enable_utterances_; + int num_items_; + int num_symbols_; + int utterance_dim_; + int num_steps_; + int max_steps_; + bool agreement_reached_; + bool walk_away_; // Track if the last action was a walk away + Player cur_player_; + TurnType turn_type_; + double discount_; + + // Current quantities of items 0, 1, 2.. + std::vector item_pool_; + + // Utilities for each item of each player: agent_utils_[i][j] represents + // player i's utility for the jth item. + std::vector> agent_utils_; + + // History of proposals. + std::vector> proposals_; + + // History of utterances. + std::vector> utterances_; + + // Walk away values for each player + std::vector walk_away_values_; +}; + +class NegotiationGame : public Game { + public: + explicit NegotiationGame(const GameParameters& params); + explicit NegotiationGame(const NegotiationGame& other); + + int NumDistinctActions() const override { + if (enable_utterances_) { + return NumDistinctProposals() + NumDistinctUtterances() + 1; // +1 for walk away + } else { + return NumDistinctProposals() + 1; // +1 for walk away + } + } + std::unique_ptr NewInitialState() const override; + int MaxChanceOutcomes() const override { return 1; } + + // There is arbitrarily chosen number to ensure the game is finite. + int MaxGameLength() const override; + // TODO: verify whether this bound is tight and/or tighten it. + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + + int NumPlayers() const override { return kNumPlayers; } + double MaxUtility() const override { + return max_quantity_ * max_value_ * num_items_; + } + double MinUtility() const override { return -MaxUtility(); } + std::vector ObservationTensorShape() const override; + + std::unique_ptr DeserializeState( + const std::string& str) const override; + std::string GetRNGState() const; + void SetRNGState(const std::string& rng_state) const; + + std::mt19937* RNG() const { return rng_.get(); } + bool EnableProposals() const { return enable_proposals_; } + bool EnableUtterances() const { return enable_utterances_; } + int NumItems() const { return num_items_; } + int NumSymbols() const { return num_symbols_; } + int UtteranceDim() const { return utterance_dim_; } + int MinQuantity() const { return min_quantity_; } + int MaxQuantity() const { return max_quantity_; } + int MinValue() const { return min_value_; } + int MaxValue() const { return max_value_; } + double QuantityMean() const { return quantity_mean_; } + int MaxRounds() const { return max_rounds_; } + const std::string& ItemQuantities() const { return item_quantities_; } + + int NumDistinctUtterances() const; + int NumDistinctProposals() const; + + const std::vector& LegalUtterances() const { + return legal_utterances_; + } + + double discount() const { return discount_; } + + private: + void ConstructLegalUtterances(); + + bool enable_proposals_; + bool enable_utterances_; + int num_items_; + int num_symbols_; + int utterance_dim_; + int seed_; + double discount_; + int min_quantity_; + int max_quantity_; + int min_value_; + int max_value_; + double quantity_mean_; + int max_rounds_; + std::string item_quantities_; // Comma-separated list of item quantities + std::vector legal_utterances_; + mutable std::unique_ptr rng_; +}; + +namespace { +// Facts about the game +const GameType kGameType{ + /*short_name=*/"negotiation", + /*long_name=*/"Negotiation", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kSampledStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"enable_proposals", GameParameter(kDefaultEnableProposals)}, + {"enable_utterances", GameParameter(kDefaultEnableUtterances)}, + {"num_items", GameParameter(kDefaultNumItems)}, + {"num_symbols", GameParameter(kDefaultNumSymbols)}, + {"rng_seed", GameParameter(kDefaultSeed)}, + {"utterance_dim", GameParameter(kDefaultUtteranceDim)}, + {"min_quantity", GameParameter(kDefaultMinQuantity)}, + {"max_quantity", GameParameter(kDefaultMaxQuantity)}, + {"min_value", GameParameter(kDefaultMinValue)}, + {"max_value", GameParameter(kDefaultMaxValue)}, + {"quantity_mean", GameParameter(kDefaultQuantityMean)}, + {"max_rounds", GameParameter(kDefaultMaxRounds)}, + {"discount", GameParameter(kDefaultDiscount)}, + {"item_quantities", GameParameter(kDefaultItemQuantities)}}}; +} + +} // namespace negotiation +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_NEGOTIATION_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/negotiation/negotiation_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/negotiation/negotiation_test.cc new file mode 100644 index 0000000..e60bcbd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/negotiation/negotiation_test.cc @@ -0,0 +1,64 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace negotiation { +namespace { + +namespace testing = open_spiel::testing; + +void BasicNegotiationTests() { + testing::LoadGameTest("negotiation"); + testing::ChanceOutcomesTest(*LoadGame("negotiation")); + + // Try with defaults (utterances and proposals). + std::cout << "\nStarting defaults test..." << std::endl; + testing::RandomSimTest(*LoadGame("negotiation"), 100); + + // Try without utterances. + std::cout << "\nStarting no utterances test..." << std::endl; + testing::RandomSimTest( + *LoadGame("negotiation", {{"enable_utterances", GameParameter(false)}}), + 100); + + // Try without utterances and without proposals + std::cout << "\nStarting no utterances and no proposals test..." << std::endl; + testing::RandomSimTest( + *LoadGame("negotiation", {{"enable_utterances", GameParameter(false)}, + {"enable_proposals", GameParameter(false)}}), + 100); + + // Try without without proposals + std::cout << "\nStarting no utterances and no proposals test..." << std::endl; + testing::RandomSimTest( + *LoadGame("negotiation", {{"enable_proposals", GameParameter(false)}}), + 100); + + // Try with discount factor + std::cout << "\nStarting discount factor test..." << std::endl; + testing::RandomSimTest( + *LoadGame("negotiation", {{"discount", GameParameter(0.9)}}), + 100); +} + +} // namespace +} // namespace negotiation +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::negotiation::BasicNegotiationTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/nfg_game/games/matching_pennies_3p.nfg b/scenarios/bargaining/open_spiel/open_spiel/games/nfg_game/games/matching_pennies_3p.nfg new file mode 100644 index 0000000..9ea3bf9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/nfg_game/games/matching_pennies_3p.nfg @@ -0,0 +1,11 @@ +NFG 1 R "OpenSpiel export of matching_pennies_3p()" +{ "Player 0" "Player 1" "Player 2" } { 2 2 2 } + +1 1 -1 +-1 1 1 +-1 -1 -1 +1 -1 1 +1 -1 1 +-1 -1 -1 +-1 1 1 +1 1 -1 diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/nfg_game/games/sample.nfg b/scenarios/bargaining/open_spiel/open_spiel/games/nfg_game/games/sample.nfg new file mode 100644 index 0000000..b6a22ad --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/nfg_game/games/sample.nfg @@ -0,0 +1,4 @@ +NFG 1 R "Selten (IJGT, 75), Figure 2, normal form" +{ "Player 1" "Player 2" } { 3 2 } + +1 1 0 2 0 2 1 1 0 3 2 0 diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/nfg_game/nfg_game.cc b/scenarios/bargaining/open_spiel/open_spiel/games/nfg_game/nfg_game.cc new file mode 100644 index 0000000..0ade2a7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/nfg_game/nfg_game.cc @@ -0,0 +1,312 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/nfg_game/nfg_game.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/matrix_game.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tensor_game.h" +#include "open_spiel/utils/file.h" + +namespace open_spiel { +namespace nfg_game { +namespace { +using std::shared_ptr; + +constexpr int kBuffSize = 1024; + +// Facts about the game. These are defaults that will differ depending on the +// game's descriptions. Using dummy defaults just to register the game. +const GameType kGameType{/*short_name=*/"nfg_game", + /*long_name=*/"nfg_game", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kOneShot, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/100, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/false, + /*parameter_specification=*/ + {{"filename", GameParameter(std::string(""))}}, + /*default_loadable=*/false}; + +class NFGGameParser { + public: + explicit NFGGameParser(const std::string& data) + : string_data_(data), pos_(0) {} + + shared_ptr ParseGame() { + // Skip any initial whitespace. + while (IsWhiteSpace(string_data_.at(pos_))) { + AdvancePosition(); + } + SPIEL_CHECK_LT(pos_, string_data_.length()); + + ParsePrologue(); + InitializeMetaInformation(); + ParseUtilities(); + + if (num_players_ == 2) { + return matrix_game::CreateMatrixGame( + "matrix_nfg", name_, matrix_row_action_names_, + matrix_col_action_names_, matrix_row_utilities_, + matrix_col_utilities_); + } else { + return tensor_game::CreateTensorGame( + "tensor_nfg", name_, tensor_action_names_, tensor_utilities_); + } + } + + private: + void ParsePrologue() { + // Parse the first part of the header "NFG 1 R " + SPIEL_CHECK_TRUE(NextToken() == "NFG"); + SPIEL_CHECK_TRUE(NextToken() == "1"); + // Older versions of .nfg format use D + std::string data_type = NextToken(); + SPIEL_CHECK_TRUE(data_type == "R" || data_type == "D"); + SPIEL_CHECK_EQ(string_data_.at(pos_), '"'); + name_ = NextToken(); + // Player names + std::string token = NextToken(); + SPIEL_CHECK_TRUE(token == "{"); + SPIEL_CHECK_EQ(string_data_.at(pos_), '"'); + token = NextToken(); + while (token != "}") { + player_names_.push_back(token); + token = NextToken(); + } + num_players_ = player_names_.size(); + // Number of actions + token = NextToken(); + SPIEL_CHECK_TRUE(token == "{"); + token = NextToken(); + while (token != "}") { + int num = 0; + SPIEL_CHECK_TRUE(absl::SimpleAtoi(token, &num)); + num_actions_.push_back(num); + token = NextToken(); + } + SPIEL_CHECK_EQ(num_actions_.size(), num_players_); + } + + void InitializeMetaInformation() { + total_entries_ = std::accumulate(num_actions_.begin(), num_actions_.end(), + 1ULL, std::multiplies()); + SPIEL_CHECK_GT(total_entries_, 0); + + // Fill some of the meta information. + if (num_players_ == 2) { + matrix_row_action_names_.reserve(num_actions_[0]); + matrix_col_action_names_.reserve(num_actions_[1]); + matrix_row_utilities_ = std::vector(total_entries_, 0); + matrix_col_utilities_ = std::vector(total_entries_, 0); + for (int a = 0; a < num_actions_[0]; ++a) { + matrix_row_action_names_.push_back(absl::StrCat("", a)); + } + for (int a = 0; a < num_actions_[1]; ++a) { + matrix_col_action_names_.push_back(absl::StrCat("", a)); + } + } else { + tensor_action_names_.reserve(num_players_); + tensor_utilities_.reserve(num_players_); + for (int p = 0; p < num_players_; ++p) { + tensor_utilities_.push_back(std::vector(total_entries_, 0)); + tensor_action_names_.push_back({}); + tensor_action_names_.back().reserve(num_actions_[p]); + for (int a = 0; a < num_actions_[p]; ++a) { + tensor_action_names_[p].push_back(absl::StrCat("", a)); + } + } + } + } + + int RowMajorIndex(const std::vector& num_actions, + const std::vector& actions) { + int index = 0; + int base_value = 1; + for (int p = actions.size() - 1; p >= 0; --p) { + if (p + 1 < actions.size()) { + base_value *= num_actions[p + 1]; + } + index += actions[p] * base_value; + } + return index; + } + + void ParseUtilities() { + // Parse all the utilities. + std::string token; + std::vector actions(num_players_, 0); + for (uint64_t entry = 0; entry < total_entries_; ++entry) { + double value = 0; + int row_major_index = RowMajorIndex(num_actions_, actions); + for (int p = 0; p < num_players_; ++p) { + // Check that the position has not reached the end for every value we + // read, except the very last one. + bool check_end = entry != total_entries_ - 1 && p != num_players_ - 1; + std::string token = NextToken(check_end); + ParseDoubleValue(token, &value); + + if (num_players_ == 2) { + if (p == 0) { + matrix_row_utilities_[row_major_index] = value; + } else { + matrix_col_utilities_[row_major_index] = value; + } + } else { + tensor_utilities_[p][row_major_index] = value; + } + } + + // next action indices, in column-major order. + for (int i = 0; i < actions.size(); ++i) { + if (++actions[i] < num_actions_[i]) { + break; + } else { + actions[i] = 0; + } + } + } + + // After reading all the utilities, we should reach the end of the file. + SPIEL_CHECK_EQ(pos_, string_data_.length()); + } + + bool ParseDoubleValue(const std::string& str, double* value) const { + if (str.find('/') != std::string::npos) { + // Check for rational number of the form X/Y + std::vector parts = absl::StrSplit(str, '/'); + SPIEL_CHECK_EQ(parts.size(), 2); + int numerator = 0, denominator = 0; + bool success = absl::SimpleAtoi(parts[0], &numerator); + if (!success) { + return false; + } + success = absl::SimpleAtoi(parts[1], &denominator); + if (!success) { + return false; + } + SPIEL_CHECK_FALSE(denominator == 0); + *value = static_cast(numerator) / denominator; + return true; + } else { + // Otherwise, parse as a double. + return absl::SimpleAtod(str, value); + } + } + + bool IsWhiteSpace(char c) const { + return (c == ' ' || c == '\r' || c == '\n'); + } + + void AdvancePosition() { pos_++; } + + // Get the next token, and then advance the position to the start of the next + // token. If check_not_end is true, then a check is done to ensure that the + // position has not reached the end of the string. + std::string NextToken(bool check_not_end = true) { + std::string str = ""; + bool reading_quoted_string = false; + + if (string_data_.at(pos_) == '"') { + reading_quoted_string = true; + AdvancePosition(); + } + + while (true) { + // Check stopping condition: + if (pos_ >= string_data_.length() || + (reading_quoted_string && string_data_.at(pos_) == '"') || + (!reading_quoted_string && IsWhiteSpace(string_data_.at(pos_)))) { + break; + } + + str.push_back(string_data_.at(pos_)); + AdvancePosition(); + } + + if (reading_quoted_string) { + SPIEL_CHECK_EQ(string_data_.at(pos_), '"'); + } + AdvancePosition(); + + // Advance the position to the next token. + while (pos_ < string_data_.length() && + IsWhiteSpace(string_data_.at(pos_))) { + AdvancePosition(); + } + + if (check_not_end) { + SPIEL_CHECK_LT(pos_, string_data_.length()); + } + + return str; + } + + const std::string& string_data_; + int pos_; + int num_players_; + std::string name_; + std::vector player_names_; + std::vector num_actions_; + std::vector> utilities_; + + // Information needed to construct the matrix / tensor games. + uint64_t total_entries_; + // MatrixGame case. + std::vector matrix_row_action_names_; + std::vector matrix_col_action_names_; + std::vector matrix_row_utilities_; + std::vector matrix_col_utilities_; + // TensorGame case. + std::vector> tensor_action_names_; + std::vector> tensor_utilities_; +}; + +std::shared_ptr Factory(const GameParameters& params) { + // return std::shared_ptr(new EFGGame(params)); + std::string filename = params.at("filename").string_value(); + std::string string_data = file::ReadContentsFromFile(filename, "r"); + + SPIEL_CHECK_GT(string_data.size(), 0); + NFGGameParser parser(string_data); + return parser.ParseGame(); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace + +std::shared_ptr LoadNFGGame(const std::string& data) { + NFGGameParser parser(data); + return parser.ParseGame(); +} + +} // namespace nfg_game +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/nfg_game/nfg_game.h b/scenarios/bargaining/open_spiel/open_spiel/games/nfg_game/nfg_game.h new file mode 100644 index 0000000..ca19b25 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/nfg_game/nfg_game.h @@ -0,0 +1,33 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_NFG_GAME_H_ +#define OPEN_SPIEL_GAMES_NFG_GAME_H_ + +#include +#include + +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace nfg_game { + +// A Gambit .NFG file reader. Currently only the payoff version is supported. +// See http://www.gambit-project.org/gambit13/formats.html for details. +std::shared_ptr LoadNFGGame(const std::string& data); + +} // namespace nfg_game +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_NFG_GAME_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/nfg_game/nfg_game_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/nfg_game/nfg_game_test.cc new file mode 100644 index 0000000..aece8bb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/nfg_game/nfg_game_test.cc @@ -0,0 +1,187 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/nfg_game/nfg_game.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/algorithms/matrix_game_utils.h" +#include "open_spiel/algorithms/nfg_writer.h" +#include "open_spiel/algorithms/tensor_game_utils.h" +#include "open_spiel/matrix_game.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tensor_game.h" +#include "open_spiel/tests/basic_tests.h" +#include "open_spiel/utils/init.h" + +namespace open_spiel { +namespace nfg_game { +namespace { + +using open_spiel::matrix_game::MatrixGame; +using open_spiel::tensor_game::TensorGame; + +// namespace testing = open_spiel::testing; + +const char* kSampleNFGFile = "third_party/open_spiel/games/nfg/sample.nfg"; +const char* kMatchingPennies3pFile = + "third_party/open_spiel/games/nfg/matching_pennies_3p.nfg"; + +const char* kSampleNFGString = R"###( +NFG 1 R "Selten (IJGT, 75), Figure 2, normal form" +{ "Player 1" "Player 2" } { 3 2 } + +1 1 0 2 0 2 1 1 0 3 2 0 +)###"; + +const char* kSampleScientificNotationString = R"###( +NFG 1 R "A small game with payoffs that use scientific notation" +{ "Player 1" "Player 2" } { 3 2 } + +1e-6 1e-6 0 2e-06 0 2 1e-5 1e+10 0 0.323423423111314 -9082948.2987934e5 0 +)###"; + +void NFGLoadSampleFromString() { + std::shared_ptr sample_nfg_game = LoadNFGGame(kSampleNFGString); + const MatrixGame* matrix_game = + dynamic_cast(sample_nfg_game.get()); + SPIEL_CHECK_TRUE(matrix_game != nullptr); + SPIEL_CHECK_EQ(matrix_game->RowUtility(0, 0), 1.0); + SPIEL_CHECK_EQ(matrix_game->RowUtility(1, 0), 0.0); + SPIEL_CHECK_EQ(matrix_game->RowUtility(2, 0), 0.0); + SPIEL_CHECK_EQ(matrix_game->RowUtility(0, 1), 1.0); + SPIEL_CHECK_EQ(matrix_game->RowUtility(1, 1), 0.0); + SPIEL_CHECK_EQ(matrix_game->RowUtility(2, 1), 2.0); + SPIEL_CHECK_EQ(matrix_game->ColUtility(0, 0), 1.0); + SPIEL_CHECK_EQ(matrix_game->ColUtility(1, 0), 2.0); + SPIEL_CHECK_EQ(matrix_game->ColUtility(2, 0), 2.0); + SPIEL_CHECK_EQ(matrix_game->ColUtility(0, 1), 1.0); + SPIEL_CHECK_EQ(matrix_game->ColUtility(1, 1), 3.0); + SPIEL_CHECK_EQ(matrix_game->ColUtility(2, 1), 0.0); +} + +void NFGLoadSampleScientificNotationFromString() { + std::shared_ptr sample_nfg_game = + LoadNFGGame(kSampleScientificNotationString); + const MatrixGame* matrix_game = + dynamic_cast(sample_nfg_game.get()); + SPIEL_CHECK_TRUE(matrix_game != nullptr); + SPIEL_CHECK_EQ(matrix_game->RowUtility(0, 0), 1e-6); + SPIEL_CHECK_EQ(matrix_game->RowUtility(1, 0), 0.0); + SPIEL_CHECK_EQ(matrix_game->RowUtility(2, 0), 0.0); + SPIEL_CHECK_EQ(matrix_game->RowUtility(0, 1), 1e-5); + SPIEL_CHECK_EQ(matrix_game->RowUtility(1, 1), 0.0); + SPIEL_CHECK_EQ(matrix_game->RowUtility(2, 1), -9082948.2987934e5); + SPIEL_CHECK_EQ(matrix_game->ColUtility(0, 0), 1e-6); + SPIEL_CHECK_EQ(matrix_game->ColUtility(1, 0), 2e-6); + SPIEL_CHECK_EQ(matrix_game->ColUtility(2, 0), 2.0); + SPIEL_CHECK_EQ(matrix_game->ColUtility(0, 1), 1e10); + SPIEL_CHECK_EQ(matrix_game->ColUtility(1, 1), 0.323423423111314); + SPIEL_CHECK_EQ(matrix_game->ColUtility(2, 1), 0.0); +} + +void NFGLoadSampleFromFile() { + absl::optional file = FindFile(kSampleNFGFile, 2); + if (file.has_value()) { + std::cout << "Found file: " << file.value() << "; running sim test."; + std::shared_ptr game = + LoadGame("nfg_game", {{"filename", GameParameter(file.value())}}); + SPIEL_CHECK_TRUE(game != nullptr); + GameType type = game->GetType(); + SPIEL_CHECK_EQ(type.dynamics, GameType::Dynamics::kSimultaneous); + SPIEL_CHECK_EQ(type.information, GameType::Information::kOneShot); + SPIEL_CHECK_EQ(type.utility, GameType::Utility::kGeneralSum); + SPIEL_CHECK_EQ(type.chance_mode, GameType::ChanceMode::kDeterministic); + SPIEL_CHECK_EQ(game->NumPlayers(), 2); + SPIEL_CHECK_EQ(game->NumDistinctActions(), 3); + SPIEL_CHECK_EQ(game->MaxChanceOutcomes(), 0); + testing::RandomSimTestNoSerialize(*game, 100); + } +} + +void NFGLoadMatchingPennies3pFromFile() { + absl::optional file = FindFile(kMatchingPennies3pFile, 2); + if (file.has_value()) { + std::cout << "Found file: " << file.value() << "; running sim test."; + std::shared_ptr game = + LoadGame("nfg_game", {{"filename", GameParameter(file.value())}}); + SPIEL_CHECK_TRUE(game != nullptr); + const TensorGame* tensor_game = dynamic_cast(game.get()); + SPIEL_CHECK_TRUE(tensor_game != nullptr); + GameType type = game->GetType(); + SPIEL_CHECK_EQ(type.dynamics, GameType::Dynamics::kSimultaneous); + SPIEL_CHECK_EQ(type.information, GameType::Information::kOneShot); + SPIEL_CHECK_EQ(type.utility, GameType::Utility::kGeneralSum); + SPIEL_CHECK_EQ(type.chance_mode, GameType::ChanceMode::kDeterministic); + SPIEL_CHECK_EQ(game->NumPlayers(), 3); + SPIEL_CHECK_EQ(game->NumDistinctActions(), 2); + SPIEL_CHECK_EQ(game->MaxChanceOutcomes(), 0); + testing::RandomSimTestNoSerialize(*game, 100); + } +} + +void NFGExportReloadTestInternalGames() { + std::vector game_strings = { + "matrix_rps", + "matrix_shapleys_game", + "matrix_pd", + "matrix_sh", + "blotto(players=2,coins=5,fields=3)", + "blotto(players=3,coins=5,fields=3)", + }; + + for (const std::string& game_string : game_strings) { + // Load a native game, write it to NFG, parse the NFG, and export again. + // Both .nfg strings should be identical. + std::shared_ptr general_game = LoadGame(game_string); + std::shared_ptr game; + if (general_game->NumPlayers() == 2) { + game = algorithms::LoadMatrixGame(game_string); + } else { + game = algorithms::LoadTensorGame(game_string); + } + std::string nfg_string = open_spiel::GameToNFGString(*game); + std::shared_ptr game2 = LoadNFGGame(nfg_string); + + if (game->NumPlayers() == 2) { + const auto* matrix_game = dynamic_cast(game.get()); + const auto* matrix_game2 = dynamic_cast(game2.get()); + SPIEL_CHECK_TRUE(matrix_game != nullptr); + SPIEL_CHECK_TRUE(matrix_game2 != nullptr); + SPIEL_CHECK_TRUE(matrix_game->ApproxEqual(*matrix_game2, 1e-10)); + } else { + const auto* tensor_game = dynamic_cast(game.get()); + const auto* tensor_game2 = dynamic_cast(game2.get()); + SPIEL_CHECK_TRUE(tensor_game != nullptr); + SPIEL_CHECK_TRUE(tensor_game2 != nullptr); + SPIEL_CHECK_TRUE(tensor_game->ApproxEqual(*tensor_game2, 1e-10)); + } + } +} + +} // namespace +} // namespace nfg_game +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::Init("", &argc, &argv, true); + open_spiel::nfg_game::NFGLoadSampleFromString(); + open_spiel::nfg_game::NFGLoadSampleScientificNotationFromString(); + open_spiel::nfg_game::NFGLoadSampleFromFile(); + open_spiel::nfg_game::NFGLoadMatchingPennies3pFromFile(); + open_spiel::nfg_game::NFGExportReloadTestInternalGames(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/nim/nim.cc b/scenarios/bargaining/open_spiel/open_spiel/games/nim/nim.cc new file mode 100644 index 0000000..d115bb7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/nim/nim.cc @@ -0,0 +1,236 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/nim/nim.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace nim { +namespace { + +constexpr char kDefaultPileSizes[] = "1;3;5;7"; + +std::vector ParsePilesString(const std::string &str) { + std::vector sizes = absl::StrSplit(str, ';'); + std::vector pile_sizes; + for (const auto &sz : sizes) { + int val; + if (!absl::SimpleAtoi(sz, &val)) { + SpielFatalError(absl::StrCat("Could not parse size '", sz, + "' of pile_sizes string '", str, + "' as an integer")); + } + pile_sizes.push_back(val); + } + return pile_sizes; +} + +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"nim", + /*long_name=*/"Nim", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + { + {"pile_sizes", GameParameter(std::string(kDefaultPileSizes))}, + {"is_misere", GameParameter(kDefaultIsMisere)}, + }}; + +std::shared_ptr Factory(const GameParameters ¶ms) { + return std::shared_ptr(new NimGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +NimGame::NimGame(const GameParameters ¶ms) + : Game(kGameType, params), + piles_(ParsePilesString(ParameterValue("pile_sizes"))), + is_misere_(ParameterValue("is_misere")) { + num_piles_ = piles_.size(); + max_num_per_pile_ = *std::max_element(piles_.begin(), piles_.end()); +} + +int NimGame::NumDistinctActions() const { + if (piles_.empty()) { + return 0; + } + // action_id = (take - 1) * num_piles_ + pile_idx < (max_take - 1) * + // num_piles_ + num_piles = max_take * num_piles_ + return num_piles_ * max_num_per_pile_ + 1; +} + +int NimGame::MaxGameLength() const { + // players can take only 1 object at every step + return std::accumulate(piles_.begin(), piles_.end(), 0); +} + +std::pair NimState::UnpackAction(Action action_id) const { + // action_id = (take - 1) * num_piles_ + pile_idx + int pile_idx = action_id % num_piles_; + int take = (action_id - pile_idx) / num_piles_ + 1; + return {pile_idx, take}; +} + +bool NimState::IsEmpty() const { + return std::accumulate(piles_.begin(), piles_.end(), 0) == 0; +} + +void NimState::DoApplyAction(Action move) { + SPIEL_CHECK_FALSE(IsTerminal()); + std::pair action = UnpackAction(move); + int pile_idx = action.first, take = action.second; + + SPIEL_CHECK_LT(pile_idx, piles_.size()); + SPIEL_CHECK_GT(take, 0); + SPIEL_CHECK_LE(take, piles_[pile_idx]); + + piles_[pile_idx] -= take; + if (IsEmpty()) { + outcome_ = is_misere_ ? 1 - current_player_ : current_player_; + } + current_player_ = 1 - current_player_; + num_moves_ += 1; +} + +std::vector NimState::LegalActions() const { + if (IsTerminal()) return {}; + std::vector moves; + for (std::size_t pile_idx = 0; pile_idx < piles_.size(); pile_idx++) { + // the player has to take at least one object from a pile + for (int take = 1; take <= piles_[pile_idx]; take++) { + moves.push_back((take - 1) * num_piles_ + (int)pile_idx); + } + } + std::sort(moves.begin(), moves.end()); + return moves; +} + +std::string NimState::ActionToString(Player player, Action action_id) const { + std::pair action = UnpackAction(action_id); + int pile_idx = action.first, take = action.second; + return absl::StrCat("pile:", pile_idx + 1, ", take:", take, ";"); +} + +NimState::NimState(std::shared_ptr game, int num_piles, + std::vector piles, bool is_misere, + int max_num_per_pile) + : State(game), + num_piles_(num_piles), + piles_(piles), + is_misere_(is_misere), + max_num_per_pile_(max_num_per_pile) {} + +std::string NimState::ToString() const { + std::string str; + absl::StrAppend(&str, "(", current_player_, "): "); + for (std::size_t pile_idx = 0; pile_idx < piles_.size(); pile_idx++) { + absl::StrAppend(&str, piles_[pile_idx]); + if (pile_idx != piles_.size() - 1) { + absl::StrAppend(&str, " "); + } + } + return str; +} + +bool NimState::IsTerminal() const { + return outcome_ != kInvalidPlayer || IsEmpty(); +} + +std::vector NimState::Returns() const { + if (outcome_ == Player{0}) { + return {1.0, -1.0}; + } else if (outcome_ == Player{1}) { + return {-1.0, 1.0}; + } else { + return {0.0, 0.0}; + } +} + +std::string NimState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string NimState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void NimState::ObservationTensor(Player player, + absl::Span values) const { + // [one-hot player] + [IsTerminal()] + [binary representation of num_piles] + + // [binary representation of every pile] + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + std::fill(values.begin(), values.end(), 0); + + int offset = 0; + values[current_player_] = 1; + offset += 2; + values[offset] = IsTerminal() ? 1 : 0; + offset += 1; + + // num_piles (which is >= 1) + values[offset + num_piles_ - 1] = 1; + offset += num_piles_; + + for (std::size_t pile_idx = 0; pile_idx < piles_.size(); pile_idx++) { + values[offset + piles_[pile_idx]] = 1; + offset += max_num_per_pile_ + 1; + } + + SPIEL_CHECK_EQ(offset, values.size()); +} + +void NimState::UndoAction(Player player, Action move) { + std::pair action = UnpackAction(move); + int pile_idx = action.first, take = action.second; + piles_[pile_idx] += take; + current_player_ = player; + outcome_ = kInvalidPlayer; + num_moves_ -= 1; + history_.pop_back(); + --move_number_; +} + +std::unique_ptr NimState::Clone() const { + return std::unique_ptr(new NimState(*this)); +} + +} // namespace nim +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/nim/nim.h b/scenarios/bargaining/open_spiel/open_spiel/games/nim/nim.h new file mode 100644 index 0000000..e163196 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/nim/nim.h @@ -0,0 +1,119 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_NIM_H_ +#define OPEN_SPIEL_GAMES_NIM_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// Nim: +// * Two players take turns removing objects from distinct piles; +// * On each turn, a player must remove at least one object, +// and may remove any number of objects provided they all come from the +// same heap or pile; +// * Depending on the version, the goal of the game is either to avoid taking +// the last object or to take it. Please see https://en.wikipedia.org/wiki/Nim +// for more + +namespace open_spiel { +namespace nim { + +// Constants. +inline constexpr int kNumPlayers = 2; +inline constexpr int kDefaultNumPiles = 3; +inline constexpr bool kDefaultIsMisere = true; + +// State of an in-play game. +class NimState : public State { + public: + explicit NimState(std::shared_ptr game, int num_piles, + std::vector piles, bool is_misere, + int max_num_per_pile); + + NimState(const NimState &) = default; + NimState &operator=(const NimState &) = default; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : current_player_; + } + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action move) override; + std::vector LegalActions() const override; + Player outcome() const { return outcome_; } + + protected: + void DoApplyAction(Action move) override; + int num_piles_ = kDefaultNumPiles; + std::vector piles_; + + private: + bool IsEmpty() const; + std::pair UnpackAction(Action action_id) const; + Player current_player_ = 0; // Player zero goes first + Player outcome_ = kInvalidPlayer; + int num_moves_ = 0; + bool is_misere_ = kDefaultIsMisere; + const int max_num_per_pile_; +}; + +// Game object. +class NimGame : public Game { + public: + explicit NimGame(const GameParameters ¶ms); + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new NimState(shared_from_this(), num_piles_, piles_, is_misere_, + max_num_per_pile_)); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + std::vector ObservationTensorShape() const override { + return { + 2 + // Turn + 1 + // Is terminal? + num_piles_ + // One-hot bit for the number `num_piles_` + // One hot representation of the quantity in each pile. + num_piles_ * (max_num_per_pile_ + 1) + }; + }; + int MaxGameLength() const override; + + private: + std::vector piles_; + int num_piles_ = kDefaultNumPiles; + bool is_misere_ = kDefaultIsMisere; + int max_num_per_pile_; +}; + +} // namespace nim +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_NIM_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/nim/nim_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/nim/nim_test.cc new file mode 100644 index 0000000..3e54831 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/nim/nim_test.cc @@ -0,0 +1,158 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/algorithms/value_iteration.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace nim { +namespace { + +namespace testing = open_spiel::testing; +namespace algorithms = open_spiel::algorithms; + +void BasicNimTests() { + testing::LoadGameTest("nim"); + testing::RandomSimTest(*LoadGame("nim"), 100); + testing::RandomSimTestWithUndo(*LoadGame("nim"), 10); + testing::RandomSimTest( + *LoadGame("nim", + { + {"pile_sizes", GameParameter("100;200;300")}, + }), + 10); + testing::RandomSimTest( + *LoadGame("nim", + { + {"pile_sizes", + GameParameter("10000;2000;3000;12414;1515;53252;1;35126")}, + }), + 10); + testing::RandomSimTest( + *LoadGame("nim", + { + {"pile_sizes", GameParameter("1;2;3;4;5;6;7;8;9;10")}, + {"is_misere", GameParameter(false)}, + }), + 10); +} + +void SinglePileNormalTest() { + std::shared_ptr game = + LoadGame("nim", { + {"pile_sizes", GameParameter("100")}, + {"is_misere", GameParameter(false)}, + }); + std::unique_ptr state = game->NewInitialState(); + std::vector actions = state->LegalActions(); + SPIEL_CHECK_EQ(actions.size(), 100); + + state->ApplyAction(actions.back()); + SPIEL_CHECK_EQ(state->IsTerminal(), 1); + SPIEL_CHECK_EQ(state->PlayerReturn(0), 1); + SPIEL_CHECK_EQ(state->PlayerReturn(1), -1); +} + +void SinglePileMisereTest() { + std::shared_ptr game = + LoadGame("nim", { + {"pile_sizes", GameParameter("100")}, + }); + std::unique_ptr state = game->NewInitialState(); + std::vector actions = state->LegalActions(); + SPIEL_CHECK_EQ(actions.size(), 100); + + state->ApplyAction(actions.back()); + SPIEL_CHECK_EQ(state->IsTerminal(), 1); + SPIEL_CHECK_EQ(state->PlayerReturn(0), -1); + SPIEL_CHECK_EQ(state->PlayerReturn(1), 1); +} + +void VISinglePileNormalTest() { + std::shared_ptr game = + LoadGame("nim", { + {"pile_sizes", GameParameter("100")}, + {"is_misere", GameParameter(false)}, + }); + auto values = algorithms::ValueIteration(*game, -1, 0.01); + SPIEL_CHECK_EQ(values["(0): 100"], 1); +} + +void VISinglePileMisereTest() { + std::shared_ptr game = + LoadGame("nim", { + {"pile_sizes", GameParameter("100")}, + }); + auto values = algorithms::ValueIteration(*game, -1, 0.01); + SPIEL_CHECK_EQ(values["(0): 100"], 1); +} + +// See "Winning positions" here +// https://en.wikipedia.org/wiki/Nim +// to understand the "pile_sizes" parameter from the tests below +void VIThreeOnesNormalTest() { + std::shared_ptr normal_game = + LoadGame("nim", { + {"pile_sizes", GameParameter("1;1;1")}, + {"is_misere", GameParameter(false)}, + }); + auto values = algorithms::ValueIteration(*normal_game, -1, 0.01); + SPIEL_CHECK_EQ(values["(0): 1 1 1"], 1); +} + +void VIThreeOnesMisereTest() { + std::shared_ptr game = + LoadGame("nim", { + {"pile_sizes", GameParameter("1;1;1")}, + }); + auto values = algorithms::ValueIteration(*game, -1, 0.01); + SPIEL_CHECK_EQ(values["(0): 1 1 1"], -1); +} + +void VIThreePilesTest() { + std::shared_ptr normal_game = + LoadGame("nim", { + {"pile_sizes", GameParameter("5;8;13")}, + {"is_misere", GameParameter(false)}, + }); + auto values = algorithms::ValueIteration(*normal_game, -1, 0.01); + SPIEL_CHECK_EQ(values["(0): 5 8 13"], -1); +} + +void VIFourPilesTest() { + std::shared_ptr normal_game = + LoadGame("nim", { + {"pile_sizes", GameParameter("2;3;8;10")}, + {"is_misere", GameParameter(false)}, + }); + auto values = algorithms::ValueIteration(*normal_game, -1, 0.01); + SPIEL_CHECK_EQ(values["(0): 2 3 8 10"], 1); +} + +} // namespace +} // namespace nim +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::nim::BasicNimTests(); + open_spiel::nim::SinglePileNormalTest(); + open_spiel::nim::SinglePileMisereTest(); + open_spiel::nim::VISinglePileNormalTest(); + open_spiel::nim::VISinglePileMisereTest(); + open_spiel::nim::VIThreeOnesNormalTest(); + open_spiel::nim::VIThreeOnesMisereTest(); + open_spiel::nim::VIThreePilesTest(); + open_spiel::nim::VIFourPilesTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/nine_mens_morris/nine_mens_morris.cc b/scenarios/bargaining/open_spiel/open_spiel/games/nine_mens_morris/nine_mens_morris.cc new file mode 100644 index 0000000..5d75191 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/nine_mens_morris/nine_mens_morris.cc @@ -0,0 +1,467 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/nine_mens_morris/nine_mens_morris.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace nine_mens_morris { +namespace { + +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"nine_mens_morris", + /*long_name=*/"Nine men's morris", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new NineMensMorrisGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +enum kDirection : int { kNorth = 0, kEast = 1, kSouth = 2, kWest = 3 }; + +// 0 7 14 +// 0: .------.------. 0, 1, 2 +// 1: | | | +// 2: | .----.----. | 3, 4, 5 +// 3: | | | | | +// 4: | | .--.--. | | 6, 7, 8 +// 5: | | | | | | +// 6: .-.-. .-.-. 9, 10, 11, 12, 13, 14 +// 7: | | | | | | +// 8: | | .--.--. | | 15, 16, 17 +// 9: | | | | | +// 10: | .----.----. | 18, 19, 20 +// 11: | | | +// 12: .------.------. 21, 22, 23 + +constexpr std::array, kNumPoints> kPointStrCoords = { + {{0, 0}, {0, 7}, {0, 14}, {2, 2}, {2, 7}, {2, 12}, {4, 4}, {4, 7}, + {4, 10}, {6, 0}, {6, 2}, {6, 4}, {6, 10}, {6, 12}, {6, 14}, {8, 4}, + {8, 7}, {8, 10}, {10, 2}, {10, 7}, {10, 12}, {12, 0}, {12, 7}, {12, 14}}}; + +constexpr std::array, kNumPoints> kPointNeighbors = {{ + // N, E, S, W + {-1, 1, 9, -1}, // 0 + {-1, 2, 4, 0}, // 1 + {-1, -1, 14, 1}, // 2 + {-1, 4, 10, -1}, // 3 + {1, 5, 7, 3}, // 4 + {-1, -1, 13, 4}, // 5 + {-1, 7, 11, -1}, // 6 + {4, 8, -1, 6}, // 7 + {-1, -1, 12, 7}, // 8 + {0, 10, 21, -1}, // 9 + {3, 11, 18, 9}, // 10 + {6, -1, 15, 10}, // 11 + {8, 13, 17, -1}, // 12 + {5, 14, 20, 12}, // 13 + {2, -1, 23, 13}, // 14 + {11, 16, -1, -1}, // 15 + {-1, 17, 19, 15}, // 16 + {12, -1, -1, 16}, // 17 + {10, 19, -1, -1}, // 18 + {16, 20, 22, 18}, // 19 + {13, -1, -1, 19}, // 20 + {9, 22, -1, -1}, // 21 + {19, 23, -1, 21}, // 22 + {14, -1, -1, 22} // 23 +}}; + +} // namespace + +CellState PlayerToState(Player player) { + switch (player) { + case 0: + return CellState::kWhite; + case 1: + return CellState::kBlack; + default: + SpielFatalError(absl::StrCat("Invalid player id ", player)); + return CellState::kEmpty; + } +} + +const char* PlayerToStr(Player player) { + switch (player) { + case 0: + return "W"; + case 1: + return "B"; + default: + SpielFatalError(absl::StrCat("Invalid player id ", player)); + return ""; + } +} + +char StateToChar(CellState state) { + switch (state) { + case CellState::kEmpty: + return '.'; + case CellState::kWhite: + return 'W'; + case CellState::kBlack: + return 'B'; + default: + SpielFatalError("Unknown state."); + } +} + +Player StateToPlayer(CellState state) { + switch (state) { + case CellState::kEmpty: + return kInvalidPlayer; + case CellState::kWhite: + return 0; + case CellState::kBlack: + return 1; + default: + SpielFatalError("Unknown state."); + } +} + +Action ToMoveAction(int source, int dest) { + return kNumPoints + (source * kNumPoints + dest); +} + +void FromMoveAction(Action action, int* source, int* dest) { + action -= kNumPoints; + *source = action / kNumPoints; + *dest = action % kNumPoints; +} + +void NineMensMorrisState::GetCurrentLegalActions() { + cur_legal_actions_.clear(); + + if (capture_) { + Player opp = 1 - current_player_; + bool all_mills = CheckAllMills(opp); + for (int p = 0; p < kNumPoints; ++p) { + if (StateToPlayer(board_[p]) == opp) { + if (all_mills || !CheckInMill(p)) { + cur_legal_actions_.push_back(p); + } + } + } + } else { + if (men_to_deploy_[current_player_] > 0) { + // Still in phase 1. + for (int p = 0; p < kNumPoints; ++p) { + if (board_[p] == CellState::kEmpty) { + cur_legal_actions_.push_back(p); + } + } + } else if (num_men_[current_player_] > 3) { + // Phase 2. + for (int p = 0; p < kNumPoints; ++p) { + Player player = StateToPlayer(board_[p]); + if (player == current_player_) { + for (int dir = 0; dir < 4; ++dir) { + int np = kPointNeighbors[p][dir]; + if (np > 0 && board_[np] == CellState::kEmpty) { + cur_legal_actions_.push_back(ToMoveAction(p, np)); + } + } + } + } + absl::c_sort(cur_legal_actions_); + } else { + // Phase 3. + for (int p = 0; p < kNumPoints; ++p) { + Player player = StateToPlayer(board_[p]); + if (player == current_player_) { + for (int np = 0; np < kNumPoints; ++np) { + if (p == np) { + continue; + } + + if (board_[np] == CellState::kEmpty) { + cur_legal_actions_.push_back(ToMoveAction(p, np)); + } + } + } + } + absl::c_sort(cur_legal_actions_); + } + } +} + +bool NineMensMorrisState::CheckAllMills(Player player) const { + for (int p = 0; p < kNumPoints; ++p) { + if (StateToPlayer(board_[p]) == player) { + if (!CheckInMill(p)) { + return false; + } + } + } + return true; +} + +bool NineMensMorrisState::CheckInMill(int pos) const { + Player player = StateToPlayer(board_[pos]); + if (player == kInvalidPlayer) { + return false; + } + + int cp = pos; + + // Direction base: North or East. + for (int dir_base = 0; dir_base < 2; ++dir_base) { + int total_matches = 0; + + // Try North + South, then East + West + for (int dir : {dir_base, dir_base + 2}) { + cp = pos; + + for (int i = 0; i < 2; ++i) { + cp = kPointNeighbors[cp][dir]; + if (cp < 0 || StateToPlayer(board_[cp]) != player) { + break; + } else { + total_matches++; + } + } + } + + if (total_matches == 2) { + return true; + } + } + + return false; +} + +void NineMensMorrisState::DoApplyAction(Action move) { + cur_legal_actions_.clear(); + if (move < kNumPoints) { + if (capture_) { + // Capture move: choosing which piece to remove. + SPIEL_CHECK_TRUE(board_[move] != CellState::kEmpty); + Player opp = StateToPlayer(board_[move]); + SPIEL_CHECK_TRUE(opp == 1 - current_player_); + num_men_[opp]--; + board_[move] = CellState::kEmpty; + capture_ = false; + current_player_ = 1 - current_player_; + num_turns_++; + } else { + // Regular move in phase 1 (deployment) + SPIEL_CHECK_TRUE(board_[move] == CellState::kEmpty); + board_[move] = PlayerToState(current_player_); + SPIEL_CHECK_GT(men_to_deploy_[current_player_], 0); + men_to_deploy_[current_player_]--; + bool mill = CheckInMill(move); + if (mill) { + capture_ = true; + } else { + current_player_ = 1 - current_player_; + num_turns_++; + } + } + } else { + // Movement move (phase 2 or 3). + int from_pos = -1, to_pos = -1; + FromMoveAction(move, &from_pos, &to_pos); + SPIEL_CHECK_TRUE(StateToPlayer(board_[from_pos]) == current_player_); + SPIEL_CHECK_TRUE(board_[to_pos] == CellState::kEmpty); + board_[to_pos] = board_[from_pos]; + board_[from_pos] = CellState::kEmpty; + bool mill = CheckInMill(to_pos); + if (mill) { + capture_ = true; + } else { + current_player_ = 1 - current_player_; + num_turns_++; + } + } + + if (cur_legal_actions_.empty()) { + GetCurrentLegalActions(); + } +} + +std::vector NineMensMorrisState::LegalActions() const { + if (IsTerminal()) return {}; + return cur_legal_actions_; +} + +std::string NineMensMorrisState::ActionToString(Player player, + Action action_id) const { + return game_->ActionToString(player, action_id); +} + +NineMensMorrisState::NineMensMorrisState(std::shared_ptr game) + : State(game) { + std::fill(begin(board_), end(board_), CellState::kEmpty); + GetCurrentLegalActions(); +} + +std::string NineMensMorrisState::ToString() const { + std::string str = + ".------.------.\n" + "| | |\n" + "| .----.----. |\n" + "| | | | |\n" + "| | .--.--. | |\n" + "| | | | | |\n" + ".-.-. .-.-.\n" + "| | | | | |\n" + "| | .--.--. | |\n" + "| | | | |\n" + "| .----.----. |\n" + "| | |\n" + ".------.------.\n\n"; + absl::StrAppend(&str, "Current player: ", PlayerToStr(current_player_), "\n"); + absl::StrAppend(&str, "Turn number: ", num_turns_, "\n"); + absl::StrAppend(&str, "Men to deploy: ", men_to_deploy_[0], " ", + men_to_deploy_[1], "\n"); + absl::StrAppend(&str, "Num men: ", num_men_[0], " ", num_men_[1], "\n"); + if (capture_) { + absl::StrAppend(&str, "Last move formed a mill. Capture time!"); + } + + for (int i = 0; i < kNumPoints; ++i) { + int row = kPointStrCoords[i][0]; + int col = kPointStrCoords[i][1]; + int idx = row * 16 + col; + str[idx] = StateToChar(board_[i]); + } + return str; +} + +bool NineMensMorrisState::IsTerminal() const { + return num_turns_ >= kMaxNumTurns || num_men_[0] <= 2 || num_men_[1] <= 2 || + cur_legal_actions_.empty(); +} + +std::vector NineMensMorrisState::Returns() const { + std::vector returns = {0.0, 0.0}; + if (cur_legal_actions_.empty()) { + Player opp = 1 - current_player_; + returns[current_player_] = -1.0; + returns[opp] = 1.0; + } else if (num_men_[0] <= 2) { + returns[0] = -1.0; + returns[1] = 1.0; + } else if (num_men_[1] <= 2) { + returns[0] = 1.0; + returns[1] = -1.0; + } + + return returns; +} + +std::string NineMensMorrisState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string NineMensMorrisState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void NineMensMorrisState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::string templ = + ".--.--.\n" + "|.-.-.|\n" + "||...||\n" + "... ...\n" + "||...||\n" + "|.-.-.|\n" + ".--.--.\n"; + int pos = 0; + TensorView<3> view( + values, {kCellStates + 2, kObservationSize, kObservationSize}, true); + for (int r = 0; r < kObservationSize; ++r) { + for (int c = 0; c < kObservationSize; ++c) { + int char_idx = r * 8 + c; + int plane = -1; + if (templ[char_idx] == '.') { + if (board_[pos] == CellState::kWhite) { + plane = 0; + } else if (board_[pos] == CellState::kBlack) { + plane = 1; + } else { + plane = 2; + } + pos++; + } else if (templ[char_idx] == '-') { + plane = 3; + } else if (templ[char_idx] == '|') { + plane = 4; + } + + if (plane >= 0) { + view[{plane, r, c}] = 1.0; + } + } + } +} + +std::unique_ptr NineMensMorrisState::Clone() const { + return std::unique_ptr(new NineMensMorrisState(*this)); +} + +std::string NineMensMorrisGame::ActionToString(Player player, + Action action_id) const { + if (action_id < kNumPoints) { + return absl::StrCat("Point ", action_id); + } else { + int from_pos = 0, to_pos = 0; + FromMoveAction(action_id, &from_pos, &to_pos); + return absl::StrCat("Move ", from_pos, " -> ", to_pos); + } +} + +int NineMensMorrisGame::NumDistinctActions() const { + return kNumPoints + kNumPoints * kNumPoints; +} + +NineMensMorrisGame::NineMensMorrisGame(const GameParameters& params) + : Game(kGameType, params) {} + +} // namespace nine_mens_morris +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/nine_mens_morris/nine_mens_morris.h b/scenarios/bargaining/open_spiel/open_spiel/games/nine_mens_morris/nine_mens_morris.h new file mode 100644 index 0000000..6391fd5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/nine_mens_morris/nine_mens_morris.h @@ -0,0 +1,124 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_NINE_MENS_MORRIS_H_ +#define OPEN_SPIEL_NINE_MENS_MORRIS_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// Nine men's morris: +// https://en.m.wikipedia.org/wiki/Nine_men%27s_morris +// +// Parameters: none + +namespace open_spiel { +namespace nine_mens_morris { + +// Constants. +inline constexpr int kNumPlayers = 2; +inline constexpr int kNumMen = 9; +inline constexpr int kNumPoints = 24; // A point is a place on the board. +inline constexpr int kCellStates = 1 + kNumPlayers; // empty, 'x', and 'o'. +inline constexpr int kMaxNumTurns = 200; +inline constexpr int kObservationSize = 7; + +// State of a cell. +enum class CellState { + kEmpty, + kWhite, // W + kBlack, // B +}; + +using Mill = std::array; + +// State of an in-play game. +class NineMensMorrisState : public State { + public: + NineMensMorrisState(std::shared_ptr game); + + NineMensMorrisState(const NineMensMorrisState&) = default; + NineMensMorrisState& operator=(const NineMensMorrisState&) = default; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : current_player_; + } + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + + // Extra methods not part of the core API. + CellState BoardAt(int cell) const { return board_[cell]; } + Player outcome() const { return outcome_; } + + protected: + std::array board_; + void DoApplyAction(Action move) override; + + private: + Player current_player_ = 0; // Player zero goes first + Player outcome_ = kInvalidPlayer; + int num_turns_ = 0; + bool capture_ = false; + std::array men_to_deploy_ = {kNumMen, kNumMen}; + std::array num_men_ = {kNumMen, kNumMen}; + std::vector cur_legal_actions_; + + void GetCurrentLegalActions(); + bool CheckInMill(int pos) const; + bool CheckAllMills(Player player) const; +}; + +// Game object. +class NineMensMorrisGame : public Game { + public: + explicit NineMensMorrisGame(const GameParameters& params); + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new NineMensMorrisState(shared_from_this())); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + std::vector ObservationTensorShape() const override { + return {kCellStates + 2, kObservationSize, kObservationSize}; + } + int MaxGameLength() const override { return kMaxNumTurns + 2 * kNumMen - 4; } + std::string ActionToString(Player player, Action action_id) const override; +}; + +CellState PlayerToState(Player player); +char StateToChar(CellState state); +const char* PlayerToStr(Player player); +Player StateToPlayer(CellState state); +Action ToMoveAction(int source, int dest); +void FromMoveAction(Action action, int* source, int* dest); + +} // namespace nine_mens_morris +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_NINE_MENS_MORRIS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/nine_mens_morris/nine_mens_morris_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/nine_mens_morris/nine_mens_morris_test.cc new file mode 100644 index 0000000..d2c6860 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/nine_mens_morris/nine_mens_morris_test.cc @@ -0,0 +1,39 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" +#include "open_spiel/tests/console_play_test.h" + +namespace open_spiel { +namespace nine_mens_morris { +namespace { + +namespace testing = open_spiel::testing; + +void BasicNineMensMorrisTests() { + testing::LoadGameTest("nine_mens_morris"); + testing::NoChanceOutcomesTest(*LoadGame("nine_mens_morris")); + testing::RandomSimTest(*LoadGame("nine_mens_morris"), 100); +} + +} // namespace +} // namespace nine_mens_morris +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::nine_mens_morris::BasicNineMensMorrisTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/oh_hell/oh_hell.cc b/scenarios/bargaining/open_spiel/open_spiel/games/oh_hell/oh_hell.cc new file mode 100644 index 0000000..9fd68c1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/oh_hell/oh_hell.cc @@ -0,0 +1,630 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/oh_hell/oh_hell.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace oh_hell { +namespace { + +const GameType kGameType{ + /*short_name=*/"oh_hell", + /*long_name=*/"Oh Hell!", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/kMaxNumPlayers, + /*min_num_players=*/kMinNumPlayers, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/false, + /*provides_observation_tensor=*/false, + /*parameter_specification=*/ + { + {"players", GameParameter(kMinNumPlayers)}, + {"num_suits", GameParameter(kMaxNumSuits)}, + {"num_cards_per_suit", GameParameter(kMaxNumCardsPerSuit)}, + // number of tricks in the game, must be between 1 and + // (num_suits * num_cards_per_suit - 1) / num_players, + // default is to choose randomly in the legal range every game + {"num_tricks_fixed", GameParameter(kRandomNumTricks)}, + // In case of no off-bid penalty, players receive `points_per_trick` + // per trick made, plus a bonus if their bid was correct. + // In case of an off-bid penalty, if a player missed their bid, they + // receive a penalty of `points_per_trick` times the number of tricks + // they are above or below their bid and only if the bid was correct + // they receive `points_per_trick` per trick made plus a bonus. + {"off_bid_penalty", GameParameter(false)}, + {"points_per_trick", GameParameter(1)}, + }}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new OhHellGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +OhHellGame::OhHellGame(const GameParameters& params) + : Game(kGameType, params), + num_players_(ParameterValue("players")), + deck_props_(ParameterValue("num_suits"), + ParameterValue("num_cards_per_suit")), + num_tricks_fixed_(ParameterValue("num_tricks_fixed")), + off_bid_penalty_(ParameterValue("off_bid_penalty")), + points_per_trick_(ParameterValue("points_per_trick")) { + SPIEL_CHECK_TRUE(num_players_ >= kMinNumPlayers && + num_players_ <= kMaxNumPlayers); + SPIEL_CHECK_TRUE(deck_props_.NumSuits() >= kMinNumSuits && + deck_props_.NumSuits() <= kMaxNumSuits); + SPIEL_CHECK_TRUE(deck_props_.NumCardsPerSuit() >= kMinNumCardsPerSuit && + deck_props_.NumCardsPerSuit() <= kMaxNumCardsPerSuit); + // need at least num_players + 1 cards + SPIEL_CHECK_TRUE(num_players_ <= deck_props_.NumCards() - kNumTrumpDeal); + SPIEL_CHECK_TRUE(num_tricks_fixed_ == kRandomNumTricks || + (num_tricks_fixed_ >= kMinNumTricks && + num_tricks_fixed_ <= MaxNumTricks())); +} + +std::vector OhHellGame::InformationStateTensorShape() const { + // initial chance actions (incl trump dealing) + int len = MaxNumTricks() + num_players_ + deck_props_.NumCards(); + // initial hand and current hand + len += 2 * deck_props_.NumCards(); + // bids, legal range is [no bid, 0, 1, ..., max legal bid] + len += num_players_ * (MaxNumTricks() + 2); + // tricks won so far + len += MaxNumTricks() * num_players_; + // tricks + len += MaxNumTricks() * (2 * num_players_ - 1) * deck_props_.NumCards(); + return {len}; +} + +OhHellState::OhHellState(std::shared_ptr game, int num_players, + DeckProperties deck_props, int num_tricks_fixed, + bool off_bid_penalty, int points_per_trick) + : State(game), + num_players_(num_players), + num_tricks_fixed_(num_tricks_fixed), + deck_props_(deck_props), + off_bid_penalty_(off_bid_penalty), + points_per_trick_(points_per_trick) { + bids_.resize(num_players_); + // need to differentiate between no bid and a bid of 0 + std::fill(bids_.begin(), bids_.end(), kInvalidBid); + num_tricks_won_.resize(num_players_); + returns_.resize(num_players_); + holder_.resize(deck_props_.NumCards()); + initial_deal_.resize(deck_props_.NumCards()); +} + +std::string OhHellState::ToString() const { + std::string rv = absl::StrCat(FormatPhase(), FormatChooseNumTricks()); + absl::StrAppend(&rv, FormatDealer()); + absl::StrAppend(&rv, FormatDeal()); + if (num_cards_dealt_ > num_players_ * num_tricks_) { + absl::StrAppend(&rv, FormatTrump()); + } + if (num_cards_played_ > 0) absl::StrAppend(&rv, FormatPlay()); + absl::StrAppend(&rv, FormatBids()); + if (IsTerminal()) absl::StrAppend(&rv, FormatResult()); + return rv; +} + +std::string OhHellState::ActionToString(Player player, Action action) const { + switch (phase_) { + case Phase::kChooseNumTricks: + case Phase::kDealer: + return absl::StrFormat("%d", action); + case Phase::kDeal: + case Phase::kPlay: + return deck_props_.CardString(action); + case Phase::kBid: + return absl::StrFormat("%d", action - deck_props_.NumCards()); + default: + return ""; + } +} + +// returns a string for each suit +std::string OhHellState::FormatHand(int player) const { + std::string rv = absl::StrFormat("Player: %d\n", player); + auto deal = IsTerminal() ? initial_deal_ : holder_; + for (int suit = 0; suit < deck_props_.NumSuits(); ++suit) { + absl::StrAppendFormat(&rv, " %c: ", kSuitChar[suit]); + for (int rank = deck_props_.NumCardsPerSuit() - 1; rank >= 0; --rank) { + if (player == deal[deck_props_.Card(Suit(suit), rank)]) { + absl::StrAppend(&rv, absl::string_view(&kRankChar[rank], 1)); + } + } + absl::StrAppend(&rv, "\n"); + } + return rv; +} + +std::string OhHellState::FormatPhase() const { + return absl::StrFormat("Phase: %s\n", kPhaseStr[static_cast(phase_)]); +} + +std::string OhHellState::FormatChooseNumTricks() const { + return absl::StrFormat("Num Total Tricks: %d\n", num_tricks_); +} + +std::string OhHellState::FormatDealer() const { + return absl::StrFormat("Dealer: %d\n", dealer_); +} + +std::string OhHellState::FormatNumCardsDealt() const { + return absl::StrFormat("Num Cards Dealt: %d\n", num_cards_dealt_); +} + +std::string OhHellState::FormatDeal() const { + std::string rv; + for (Player player = 0; player < num_players_; ++player) { + absl::StrAppendFormat(&rv, "%s\n", FormatHand(player)); + } + return rv; +} + +std::string OhHellState::FormatTrump() const { + return absl::StrFormat("Trump: %s\n", deck_props_.CardString(trump_)); +} + +std::string OhHellState::FormatBids() const { + std::string rv = "\n\nBids: "; + for (Player player = 0; player < num_players_; ++player) { + absl::StrAppendFormat(&rv, "%d ", bids_[player]); + } + absl::StrAppend(&rv, "\nTricks Won: "); + for (Player player = 0; player < num_players_; ++player) { + absl::StrAppendFormat(&rv, "%d ", num_tricks_won_[player]); + } + absl::StrAppend(&rv, "\n"); + return rv; +} + +std::string OhHellState::FormatPlay() const { + SPIEL_CHECK_GT(num_cards_played_, 0); + std::string rv = "\nTricks:\n"; + // wraps around to show which player started trick + for (Player player = 0; player < 2 * num_players_ - 1; ++player) { + absl::StrAppendFormat(&rv, "%d ", player % num_players_); + } + + for (const auto& trick : tricks_) { + if (trick.Leader() == kInvalidPlayer) break; + absl::StrAppend(&rv, "\n", std::string(3 * trick.Leader(), ' ')); + for (auto card : trick.Cards()) { + absl::StrAppend(&rv, deck_props_.CardString(card), " "); + } + } + return rv; +} + +std::string OhHellState::FormatResult() const { + SPIEL_CHECK_TRUE(IsTerminal()); + std::string rv = "Score: "; + for (Player player = 0; player < num_players_; ++player) { + absl::StrAppendFormat(&rv, "%.0lf ", returns_[player]); + } + absl::StrAppend(&rv, "\n"); + return rv; +} + +std::vector OhHellState::LegalActions() const { + switch (phase_) { + case Phase::kChooseNumTricks: + return ChooseNumTricksLegalActions(); + case Phase::kDealer: + return DealerLegalActions(); + case Phase::kDeal: + return DealLegalActions(); + case Phase::kBid: + return BiddingLegalActions(); + case Phase::kPlay: + return PlayLegalActions(); + default: + return {}; + } +} + +std::vector OhHellState::ChooseNumTricksLegalActions() const { + std::vector legal_actions; + if (num_tricks_fixed_ == kRandomNumTricks) { + for (int i = kMinNumTricks; i <= MaxNumTricks(); ++i) { + legal_actions.push_back(i); + } + } else { + legal_actions.push_back(num_tricks_fixed_); + } + return legal_actions; +} + +std::vector OhHellState::DealerLegalActions() const { + std::vector legal_actions; + legal_actions.reserve(num_players_); + for (int i = 0; i < num_players_; ++i) legal_actions.push_back(i); + return legal_actions; +} + +std::vector OhHellState::DealLegalActions() const { + std::vector legal_actions; + legal_actions.reserve(deck_props_.NumCards() - num_cards_dealt_); + for (int i = 0; i < deck_props_.NumCards(); ++i) { + if (!initial_deal_[i].has_value()) legal_actions.push_back(i); + } + return legal_actions; +} + +std::vector OhHellState::BiddingLegalActions() const { + int bid_sum = 0; + bool last_bidder = true; + for (Player player = 0; player < num_players_; ++player) { + if (player != current_player_) last_bidder &= bids_[player] != kInvalidBid; + bid_sum += std::max(0, bids_[player]); + } + std::vector legal_actions; + for (Action bid = 0; bid <= num_tricks_; ++bid) { + if (!last_bidder || bid + bid_sum != num_tricks_) { + legal_actions.push_back(bid + deck_props_.NumCards()); + } + } + return legal_actions; +} + +std::vector OhHellState::PlayLegalActions() const { + std::vector legal_actions; + legal_actions.reserve(num_tricks_ - num_cards_played_ / num_players_); + + // Check if we can follow suit. + if (num_cards_played_ % num_players_ != 0) { + auto suit = CurrentTrick().LedSuit(); + for (int rank = 0; rank < deck_props_.NumCardsPerSuit(); ++rank) { + if (holder_[deck_props_.Card(suit, rank)] == current_player_) { + legal_actions.push_back(deck_props_.Card(suit, rank)); + } + } + } + if (!legal_actions.empty()) return legal_actions; + + // Otherwise, we can play any of our cards. + for (int card = 0; card < deck_props_.NumCards(); ++card) { + if (holder_[card] == current_player_) legal_actions.push_back(card); + } + return legal_actions; +} + +std::vector> OhHellState::ChanceOutcomes() const { + std::vector> outcomes; + double p; + if (phase_ == Phase::kChooseNumTricks) { + // uniform randomly select between all legal numbers of tricks possible + // given the number of players and size of the deck + if (num_tricks_fixed_ < kMinNumTricks) { + p = 1.0 / static_cast(MaxNumTricks()); + for (int i = 0; i < MaxNumTricks(); ++i) outcomes.emplace_back(i + 1, p); + } else { + outcomes.emplace_back(num_tricks_fixed_, 1.0); + } + } else if (phase_ == Phase::kDealer) { + // uniform randomly select a player + p = 1.0 / static_cast(num_players_); + for (int i = 0; i < num_players_; ++i) outcomes.emplace_back(i, p); + } else if (num_cards_dealt_ < num_players_ * num_tricks_ + kNumTrumpDeal) { + // the only other chance nodes are when cards are dealt + int num_cards_rem = deck_props_.NumCards() - num_cards_dealt_; + outcomes.reserve(num_cards_rem); + p = 1.0 / static_cast(num_cards_rem); + for (int card = 0; card < deck_props_.NumCards(); ++card) { + if (!initial_deal_[card].has_value()) outcomes.emplace_back(card, p); + } + } + return outcomes; +} + +void OhHellState::DoApplyAction(Action action) { + switch (phase_) { + case Phase::kChooseNumTricks: + return ApplyChooseNumTricksAction(action); + case Phase::kDealer: + return ApplyDealerAction(action); + case Phase::kDeal: + return ApplyDealAction(action); + case Phase::kBid: + return ApplyBiddingAction(action - deck_props_.NumCards()); + case Phase::kPlay: + return ApplyPlayAction(action); + case Phase::kGameOver: + SpielFatalError("Cannot act in terminal states"); + } +} + +void OhHellState::ApplyChooseNumTricksAction(int num_tricks) { + num_tricks_ = num_tricks; + tricks_.resize(num_tricks_); + phase_ = Phase::kDealer; +} + +void OhHellState::ApplyDealerAction(int dealer) { + dealer_ = dealer; + phase_ = Phase::kDeal; +} + +void OhHellState::ApplyDealAction(int card) { + // dealer_ is ignored for dealing (player 0 always gets the first card) + // dealer is only used to determine who will go first during bid and play + int num_player_cards = num_players_ * num_tricks_; + if (num_cards_dealt_ < num_player_cards) { + holder_[card] = (num_cards_dealt_ % num_players_); + initial_deal_[card] = (num_cards_dealt_ % num_players_); + } else { + // last card dealt tells us the trump suit + trump_ = card; + phase_ = Phase::kBid; + current_player_ = (dealer_ + 1) % num_players_; + } + ++num_cards_dealt_; +} + +void OhHellState::ApplyBiddingAction(int bid) { + bids_[current_player_] = bid; + current_player_ = (current_player_ + 1) % num_players_; + if (current_player_ == (dealer_ + 1) % num_players_) phase_ = Phase::kPlay; +} + +void OhHellState::ApplyPlayAction(int card) { + SPIEL_CHECK_TRUE(holder_[card] == current_player_); + + holder_[card] = absl::nullopt; + if (num_cards_played_ % num_players_ == 0) { + CurrentTrick() = Trick(current_player_, deck_props_.CardSuit(trump_), + card, deck_props_); + } else { + CurrentTrick().Play(current_player_, card); + } + const Player winner = CurrentTrick().Winner(); + ++num_cards_played_; + if (num_cards_played_ % num_players_ == 0) { + ++num_tricks_won_[winner]; + current_player_ = winner; + } else { + current_player_ = (current_player_ + 1) % num_players_; + } + if (num_cards_played_ == num_players_ * num_tricks_) { + phase_ = Phase::kGameOver; + ComputeScore(); + } +} + +Player OhHellState::CurrentPlayer() const { + if (IsTerminal()) { + return kTerminalPlayerId; + } else if (phase_ == Phase::kBid || phase_ == Phase::kPlay) { + return current_player_; + } else { + return kChancePlayerId; + } +} + +void OhHellState::ComputeScore() { + SPIEL_CHECK_TRUE(IsTerminal()); + for (Player player = 0; player < num_players_; ++player) { + if (off_bid_penalty_) { + if (num_tricks_won_[player] == bids_[player]) { + returns_[player] = + points_per_trick_ * num_tricks_won_[player] + kMadeBidBonus; + } else { + int diff = num_tricks_won_[player] - bids_[player]; + returns_[player] = -(points_per_trick_ * abs(diff)); + } + } else { + returns_[player] = points_per_trick_ * num_tricks_won_[player]; + if (num_tricks_won_[player] == bids_[player]) { + returns_[player] += kMadeBidBonus; + } + } + } +} + +std::string OhHellState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + std::string rv = ""; + if (IsTerminal()) return ToString(); + if (phase_ == Phase::kChooseNumTricks) return rv; + absl::StrAppend(&rv, FormatChooseNumTricks()); + if (phase_ == Phase::kDealer) return rv; + absl::StrAppend(&rv, FormatDealer()); + absl::StrAppend(&rv, FormatNumCardsDealt()); + if (num_cards_dealt_ > num_players_ * num_tricks_) { + absl::StrAppend(&rv, FormatTrump()); + } + absl::StrAppend(&rv, FormatHand(player)); + if (num_cards_played_ > 0) absl::StrAppend(&rv, FormatPlay()); + absl::StrAppend(&rv, FormatBids()); + return rv; +} + +void OhHellState::InformationStateTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + std::fill(values.begin(), values.end(), 0.0); + SPIEL_CHECK_EQ(values.size(), game_->InformationStateTensorSize()); + if (phase_ != Phase::kBid && phase_ != Phase::kPlay) return; + auto ptr = values.begin(); + // total number of tricks + ptr[num_tricks_ - 1] = 1; + ptr += MaxNumTricks(); + // which player is dealer + ptr[dealer_] = 1; + ptr += num_players_; + // trump + ptr[trump_] = 1; + ptr += deck_props_.NumCards(); + // initial hand + for (int i = 0; i < deck_props_.NumCards(); ++i) + if (initial_deal_[i] == player) ptr[i] = 1; + ptr += deck_props_.NumCards(); + // Current hand + for (int i = 0; i < deck_props_.NumCards(); ++i) + if (holder_[i] == player) ptr[i] = 1; + ptr += deck_props_.NumCards(); + // all bids + for (Player p = 0; p < num_players_; ++p) { + ptr[bids_[p] + 1] = 1; + // need to account for bid of 0 and if player hasn't bid yet + ptr += MaxNumTricks() + 2; + } + // each player's number of tricks won so far (temperature encoding) + for (Player p = 0; p < num_players_; ++p) { + for (int i = 0; i < MaxNumTricks(); ++i) { + if (num_tricks_won_[p] > i) ptr[i] = 1; + } + ptr += MaxNumTricks(); + } + // History of tricks, each in the format: 0 1 ... n 0 1 ... n-1 + int current_trick = num_cards_played_ / num_players_; + auto play_hist = history_.begin() + NumChanceActions() + num_players_; + for (int i = 0; i <= current_trick; ++i) { + Player leader = tricks_[i].Leader(); + ptr += std::max(leader, 0) * deck_props_.NumCards(); + for (int i = 0; i < num_players_; i++) { + if (play_hist < history_.end()) { + ptr[play_hist->action] = 1; + ++play_hist; + } + ptr += deck_props_.NumCards(); + } + ptr += (num_players_ - std::max(leader, 0) - 1) * deck_props_.NumCards(); + } + // Skip over unplayed tricks. + int trick_tensor_size = (2 * num_players_ - 1) * deck_props_.NumCards(); + ptr += (MaxNumTricks() - current_trick - 1) * trick_tensor_size; + SPIEL_CHECK_EQ(ptr, values.end()); +} + +// This implementation produces samples that may be inconsistent w.r.t. voids. +// i.e. if a player has played another suit when a diamond was lead, +// this player cannot have any diamonds according to the rules of the game, but +// the generated sample could be a state that contradicts this rule. +std::unique_ptr OhHellState::ResampleFromInfostate( + int player_id, std::function rng) const { + std::unique_ptr clone = game_->NewInitialState(); + if (phase_ != Phase::kBid && phase_ != Phase::kPlay) return clone; + + // initial chance actions (choose num tricks and dealer) + clone->ApplyAction(num_tricks_); + clone->ApplyAction(dealer_); + + // deal needs to be consistent with the player's hand, and the opponent's + // played cards + std::vector> known(num_players_); + for (int card = 0; card < deck_props_.NumCards(); ++card) { + absl::optional p = initial_deal_[card]; + if (p.has_value() && (*p == player_id || !holder_[card].has_value())) { + // if player_id was initially dealt the card, or if anyone was but no + // longer holds it (because it was played), player_id knows where it was + // dealt + known[*p].push_back(card); + } + } + + // the only other known card is trump + // apply num_tricks * num_players deal actions + std::vector known_deal_counter(num_players_, 0); + for (int i = 0; i < num_players_ * num_tricks_; ++i) { + Player deal_to = i % num_players_; + if (known_deal_counter[deal_to] < known[deal_to].size()) { + clone->ApplyAction(known[deal_to][known_deal_counter[deal_to]]); + known_deal_counter[deal_to]++; + } else { + // deal randomly from the remaining unknown cards + Action candidate = kInvalidAction; + while (candidate == kInvalidAction) { + candidate = SampleAction(clone->ChanceOutcomes(), rng()).first; + absl::optional p = initial_deal_[candidate]; + if (candidate == trump_ || (p.has_value() && + (*p == player_id || !holder_[candidate].has_value()))) { + // can't use this card if player_id has it, or if it was played by + // any player + candidate = kInvalidAction; + } + } + clone->ApplyAction(candidate); + } + } + + // deal the trump card + clone->ApplyAction(trump_); + + // now apply all of the bid and play phase actions in the same order as the + // original state + int start = kNumPreDealChanceActions + num_players_ * num_tricks_ + 1; + for (size_t i = start; i < history_.size(); i++) { + clone->ApplyAction(history_.at(i).action); + } + + SPIEL_CHECK_EQ(History().size(), clone->History().size()); + SPIEL_CHECK_EQ(InformationStateString(player_id), + clone->InformationStateString(player_id)); + return clone; +} + +Trick::Trick() : Trick(kInvalidPlayer, Suit::kInvalidSuit, kInvalidRank, + DeckProperties()) {} + +Trick::Trick(Player leader, Suit trumps, int card, DeckProperties deck_props) + : trumps_(trumps), + led_suit_(deck_props.CardSuit(card)), + winning_suit_(deck_props.CardSuit(card)), + winning_rank_(deck_props.CardRank(card)), + leader_(leader), + winning_player_(leader), + deck_props_(deck_props) { cards_.push_back(card); } + +void Trick::Play(Player player, int card) { + Suit suit = deck_props_.CardSuit(card); + int rank = deck_props_.CardRank(card); + if (suit == winning_suit_) { + if (rank > winning_rank_) { + winning_rank_ = rank; + winning_player_ = player; + } + } else if (suit == trumps_) { + winning_suit_ = trumps_; + winning_rank_ = rank; + winning_player_ = player; + } + cards_.push_back(card); +} + +} // namespace oh_hell +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/oh_hell/oh_hell.h b/scenarios/bargaining/open_spiel/open_spiel/games/oh_hell/oh_hell.h new file mode 100644 index 0000000..2939713 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/oh_hell/oh_hell.h @@ -0,0 +1,271 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_OH_HELL_H_ +#define OPEN_SPIEL_GAMES_OH_HELL_H_ + +// The game of Oh Hell!. +// https://en.wikipedia.org/wiki/Oh_Hell + +// This is played by 3-7 players on a deck of up to 52 cards. It consists of a +// bidding phase followed by a play phase. +// +// Games start with a dealer dealing a specified number of cards to each player +// and then overturning a final card that is placed face up in the middle. The +// suit of this card becomes the 'trump' suit for the remainder of the game. +// +// In the bidding phase, players proceed clockwise, starting from the player +// to the left of the dealer, announcing how many tricks they think they can +// win. There is one catch: the total number of tricks bid by players cannot +// be equal to the actual number of tricks that will follow. For example, if +// 4 players are each dealt 5 cards, and the first 3 players bid 1, 2, 1, then +// the last player cannot bid 1. +// +// This is followed by the play phase, which proceeds as is standard in +// trick-taking games. Scoring is based on whether a player won exactly the +// number of tricks they bid. In this implementation, a player scores 1 point +// for every trick won and an additional 10 points if they won the exact number +// bid. +// + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" + +namespace open_spiel { +namespace oh_hell { + +// last card dealt is shown face up to all players and determines the trump suit +inline constexpr int kNumTrumpDeal = 1; +// before dealing, select the number of tricks and decide which player is dealer +inline constexpr int kNumPreDealChanceActions = 2; +inline constexpr int kMinNumPlayers = 3; +inline constexpr int kMaxNumPlayers = 7; +inline constexpr int kMinNumSuits = 1; +inline constexpr int kMaxNumSuits = 4; +inline constexpr int kMinNumCardsPerSuit = 2; +inline constexpr int kMaxNumCardsPerSuit = 13; +inline constexpr int kMinNumTricks = 1; +inline constexpr int kRandomNumTricks = -1; +inline constexpr int kInvalidRank = -1; +// Score bonus received for taking exactly as many tricks as bid +inline constexpr int kMadeBidBonus = 10; +inline constexpr int kInvalidBid = -1; + +enum class Suit { + kInvalidSuit = -1, kClubs = 0, kDiamonds = 1, kSpades = 2, kHearts = 3 +}; +constexpr char kRankChar[] = "23456789TJQKA"; +constexpr char kSuitChar[] = "CDSH"; +inline std::map kPhaseStr = { + {0, "ChooseNumTricks"}, {1, "ChooseDealer"}, {2, "Deal"}, {3, "Bid"}, + {4, "Play"}, {5, "GameOver"}}; + +// helper class to allow different numbers of cards / suits +class DeckProperties { + public: + DeckProperties() : DeckProperties(0, 0) {} + DeckProperties(int num_suits, int num_cards_per_suit) : num_suits_(num_suits), + num_cards_per_suit_(num_cards_per_suit) {} + int NumSuits() const { return num_suits_; } + int NumCardsPerSuit() const { return num_cards_per_suit_; } + int NumCards() const { return num_suits_ * num_cards_per_suit_; } + Suit CardSuit(int card) const { + if (num_suits_ <= 0) return Suit::kInvalidSuit; + return Suit(card % num_suits_); + } + int CardRank(int card) const { + if (num_suits_ <= 0) return kInvalidRank; + return card / num_suits_; + } + int Card(Suit suit, int rank) const { + return rank * num_suits_ + static_cast(suit); + } + std::string CardString(int card) const { + return {kSuitChar[static_cast(CardSuit(card))], + kRankChar[CardRank(card)]}; + } + + private: + int num_suits_; + int num_cards_per_suit_; +}; + +// State of a single trick. +class Trick { + public: + Trick(); + Trick(Player leader, Suit trumps, int card, DeckProperties deck_props); + void Play(Player player, int card); + Suit LedSuit() const { return led_suit_; } + Player Winner() const { return winning_player_; } + Player Leader() const { return leader_; } + std::vector Cards() const { return cards_; } + + private: + Suit trumps_; + Suit led_suit_; + Suit winning_suit_; + int winning_rank_; + Player leader_; + Player winning_player_; + DeckProperties deck_props_; + std::vector cards_; +}; + +// State of an in-play game. Can be any phase of the game. +class OhHellState : public State { + public: + OhHellState(std::shared_ptr game, int num_players, + DeckProperties deck_props, int num_tricks_fixed, + bool off_bid_penalty, int points_per_trick); + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + bool IsTerminal() const override { return phase_ == Phase::kGameOver; } + std::vector Returns() const override { return returns_; } + std::string InformationStateString(Player player) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override { + return std::unique_ptr(new OhHellState(*this)); + } + std::vector LegalActions() const override; + std::vector> ChanceOutcomes() const override; + std::unique_ptr ResampleFromInfostate( + int player_id, std::function rng) const override; + + protected: + void DoApplyAction(Action action) override; + + private: + enum class Phase { kChooseNumTricks, kDealer, kDeal, kBid, kPlay, kGameOver }; + + std::vector DealerLegalActions() const; + std::vector ChooseNumTricksLegalActions() const; + std::vector DealLegalActions() const; + std::vector BiddingLegalActions() const; + std::vector PlayLegalActions() const; + void ApplyDealerAction(int dealer); + void ApplyChooseNumTricksAction(int num_tricks); + void ApplyDealAction(int card); + void ApplyBiddingAction(int bid); + void ApplyPlayAction(int card); + void ComputeScore(); + Trick& CurrentTrick() { return tricks_[num_cards_played_ / num_players_]; } + const Trick& CurrentTrick() const { + return tricks_[num_cards_played_ / num_players_]; + } + std::string FormatHand(int player) const; + std::string FormatPhase() const; + std::string FormatChooseNumTricks() const; + std::string FormatDealer() const; + std::string FormatNumCardsDealt() const; + std::string FormatDeal() const; + std::string FormatTrump() const; + std::string FormatBids() const; + std::string FormatPlay() const; + std::string FormatResult() const; + int MaxNumTricks() const { + if (num_tricks_fixed_ > 0) return num_tricks_fixed_; + return (deck_props_.NumCards() - kNumTrumpDeal) / num_players_; + } + int NumChanceActions() const { + return kNumPreDealChanceActions + num_players_ * num_tricks_ + + kNumTrumpDeal; + } + + const int num_players_; + const int num_tricks_fixed_; + const DeckProperties deck_props_; + const bool off_bid_penalty_; + const int points_per_trick_; + + std::vector num_tricks_won_; + std::vector bids_; + int num_cards_played_ = 0; + int num_cards_dealt_ = 0; + int num_tricks_ = 0; + int trump_; + Player current_player_ = kChancePlayerId; + Player dealer_ = kInvalidPlayer; + Phase phase_ = Phase::kChooseNumTricks; + std::vector tricks_{}; + std::vector returns_; + std::vector> holder_{}; + std::vector> initial_deal_{}; +}; + +class OhHellGame : public Game { + public: + explicit OhHellGame(const GameParameters& params); + int NumDistinctActions() const override { + return deck_props_.NumCards() + MaxNumTricks() + 1; + } + int MaxChanceOutcomes() const override { return deck_props_.NumCards(); } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new OhHellState( + shared_from_this(), + /*num_players=*/num_players_, + /*deck_props=*/deck_props_, + /*num_tricks_fixed=*/num_tricks_fixed_, + /*off_bid_penalty=*/off_bid_penalty_, + /*points_per_trick=*/points_per_trick_)); + } + int NumPlayers() const override { return num_players_; } + double MinUtility() const override { + if (off_bid_penalty_) return (- MaxNumTricks() * points_per_trick_); + return 0; + } + double MaxUtility() const override { + return MaxNumTricks() * points_per_trick_ + kMadeBidBonus; + } + // select dealer and number of tricks (kNumPreDealChanceActions) + // deal (MaxNumTricks() * num_players + kNumTrumpDeal) + // bidding (num_players) + // play (MaxNumTricks() * num_players) + int MaxGameLength() const override { + return 2 * MaxNumTricks() * num_players_ + num_players_ + + kNumPreDealChanceActions + kNumTrumpDeal; + } + int MaxChanceNodesInHistory() const override { + return kNumPreDealChanceActions + MaxNumTricks() * num_players_ + + kNumTrumpDeal; + } + std::vector InformationStateTensorShape() const override; + // Given deck size, We can deal at most this many cards to each player and + // have an extra card to choose trump + int MaxNumTricks() const { + if (num_tricks_fixed_ > 0) return num_tricks_fixed_; + return (deck_props_.NumCards() - kNumTrumpDeal) / num_players_; + } + + private: + const int num_players_; + const DeckProperties deck_props_; + const int num_tricks_fixed_; + const bool off_bid_penalty_; + const int points_per_trick_; +}; + +} // namespace oh_hell +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_OH_HELL_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/oh_hell/oh_hell_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/oh_hell/oh_hell_test.cc new file mode 100644 index 0000000..1536235 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/oh_hell/oh_hell_test.cc @@ -0,0 +1,254 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "open_spiel/games/oh_hell/oh_hell.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace oh_hell { +namespace { + +void GameConfigSimTest() { + // only test with 2, 7, and 12 cards per suit and 3, 5, or 7 players + // to reduce test output size for CI + for (int players = kMinNumPlayers; players <= kMaxNumPlayers; players += 2) { + for (int suits = kMinNumSuits; suits <= kMaxNumSuits; ++suits) { + for (int cps = kMinNumCardsPerSuit; cps <= kMaxNumCardsPerSuit; + cps += 5) { + if (suits * cps - 1 >= players) { + open_spiel::GameParameters params; + params["players"] = GameParameter(players); + params["num_suits"] = GameParameter(suits); + params["num_cards_per_suit"] = GameParameter(cps); + // test with a randomly selected number of tricks + testing::RandomSimTest(*LoadGame("oh_hell", params), 1); + // test with a fixed number of tricks + params["num_tricks_fixed"] = GameParameter(1); + testing::RandomSimTest(*LoadGame("oh_hell", params), 1); + } + } + } + } +} + +void BasicGameTests() { + testing::LoadGameTest("oh_hell"); + testing::ChanceOutcomesTest(*LoadGame("oh_hell")); + testing::RandomSimTest(*LoadGame("oh_hell"), 3); + testing::RandomSimTest( + *LoadGame("oh_hell(off_bid_penalty=true,points_per_trick=2)"), 1); + testing::ResampleInfostateTest(*LoadGame("oh_hell"), /*num_sims=*/10); +} + +std::string InformationStateTensorToString(Player player, + const DeckProperties& deck_props, + int num_players, + int max_num_tricks, + const std::vector& tensor) { + int num_tricks; + Player dealer; + int trump; + std::vector hand(deck_props.NumCards()); + std::vector bids(num_players); + std::vector tricks_won(num_players); + std::vector tricks(max_num_tricks); + + auto ptr = tensor.begin(); + // num tricks chance action + for (int i = 0; i < max_num_tricks; ++i) { + if (ptr[i] == 1) { + num_tricks = i + 1; + break; + } + } + ptr += max_num_tricks; + // dealer selection + for (int i = 0; i < num_players; ++i) { + if (ptr[i] == 1) { + dealer = i; + break; + } + } + ptr += num_players; + // set trump + for (int i = 0; i < deck_props.NumCards(); ++i) { + if (ptr[i] == 1) { + trump = i; + break; + } + } + ptr += deck_props.NumCards(); + // bypass dealt hand + ptr += deck_props.NumCards(); + // Current hand + for (int i = 0; i < deck_props.NumCards(); ++i) { + if (ptr[i] == 1) hand[i] = 1; + } + ptr += deck_props.NumCards(); + // bids + for (Player p = 0; p < num_players; ++p) { + for (int i = 0; i <= max_num_tricks + 1; ++i) { + if (ptr[i] == 1) { + // account for no bid yet + bids[p] = i - 1; + break; + } + } + ptr += max_num_tricks + 2; + } + // Points + for (int i = 0; i < num_players; ++i) { + int player_score = 0; + for (int j = 0; j < max_num_tricks; ++j) { + if (ptr[j] == 1) ++player_score; + } + tricks_won[i] = player_score; + ptr += max_num_tricks; + } + // Trick history + Player leader; + int num_cards_played = 0; + for (int trick = 0; trick < max_num_tricks; ++trick) { + leader = kInvalidPlayer; + for (int i = 0; i < num_players * deck_props.NumCards(); ++i) { + if (ptr[i] == 1) { + leader = i / deck_props.NumCards(); + int card = i % deck_props.NumCards(); + tricks[trick] = Trick(leader, deck_props.CardSuit(trump), card, + deck_props); + ++num_cards_played; + break; + } + } + if (leader != kInvalidPlayer) { + ptr += (leader + 1) * deck_props.NumCards(); + for (int i = 0; i < num_players - 1; ++i) { + for (int j = 0; j < deck_props.NumCards(); ++j) { + if (ptr[j] == 1) { + tricks[trick].Play((leader + i + 1) % num_players, j); + ++num_cards_played; + } + } + ptr += deck_props.NumCards(); + } + ptr += (num_players - std::max(leader, 0) - 1) * deck_props.NumCards(); + } else { + ptr += (2 * num_players - 1) * deck_props.NumCards(); + break; + } + } + + // Now build InformationStateString. + std::string rv = absl::StrFormat("Num Total Tricks: %d\n", num_tricks); + absl::StrAppendFormat(&rv, "Dealer: %d\n", dealer); + // guaranteed to be in kPlay or kBid phase, so all chance nodes have already + // occured + absl::StrAppendFormat(&rv, "Num Cards Dealt: %d\n", + num_tricks * num_players + 1); + absl::StrAppendFormat(&rv, "Trump: %s\n", deck_props.CardString(trump)); + absl::StrAppendFormat(&rv, "Player: %d\n", player); + for (int suit = 0; suit < deck_props.NumSuits(); ++suit) { + absl::StrAppendFormat(&rv, " %c: ", kSuitChar[suit]); + for (int rank = deck_props.NumCardsPerSuit() - 1; rank >= 0; --rank) { + if (hand[deck_props.Card(Suit(suit), rank)]) { + absl::StrAppend(&rv, absl::string_view(&kRankChar[rank], 1)); + } + } + absl::StrAppend(&rv, "\n"); + } + + if (num_cards_played > 0) { + absl::StrAppend(&rv, "\nTricks:\n"); + // wraps around to show which player started trick + for (Player p = 0; p < 2 * num_players - 1; ++p) { + absl::StrAppendFormat(&rv, "%d ", p % num_players); + } + for (int i = 0; i <= (num_cards_played - 1) / num_players; ++i) { + absl::StrAppend(&rv, "\n", std::string(3 * tricks[i].Leader(), ' ')); + for (auto card : tricks[i].Cards()) { + absl::StrAppend(&rv, deck_props.CardString(card), " "); + } + } + } + + absl::StrAppend(&rv, "\n\nBids: "); + for (Player p = 0; p < num_players; ++p) { + absl::StrAppendFormat(&rv, "%d ", bids[p]); + } + absl::StrAppend(&rv, "\nTricks Won: "); + for (Player p = 0; p < num_players; ++p) { + absl::StrAppendFormat(&rv, "%d ", tricks_won[p]); + } + absl::StrAppend(&rv, "\n"); + + return rv; +} + +// Build InformationStateString from InformationStateTensor and check that it +// is equal to state->InformationStateString(player). +void InformationStateTensorTest(int num_games = 10) { + std::mt19937 rng(time(0)); + int num_players = kMinNumPlayers; + int num_suits = kMaxNumSuits; + int num_cards_per_suit = kMaxNumCardsPerSuit; + open_spiel::GameParameters params; + params["players"] = GameParameter(num_players); + params["num_suits"] = GameParameter(num_suits); + params["num_cards_per_suit"] = GameParameter(num_cards_per_suit); + DeckProperties deck_props = DeckProperties(num_suits, num_cards_per_suit); + std::shared_ptr game = + open_spiel::LoadGame("oh_hell", params); + std::shared_ptr oh_hell_game = + std::dynamic_pointer_cast(game); + int max_num_tricks = oh_hell_game->MaxNumTricks(); + for (int i = 0; i < num_games; ++i) { + std::unique_ptr state = game->NewInitialState(); + while (!state->IsTerminal()) { + if (state->IsChanceNode()) { + std::vector> outcomes = + state->ChanceOutcomes(); + open_spiel::Action action = + open_spiel::SampleAction(outcomes, rng).first; + state->ApplyAction(action); + } else { + auto player = state->CurrentPlayer(); + auto infostate = state->InformationStateTensor(player); + + std::string infostate_string = state->InformationStateString(player); + std::string rebuilt_infostate_string = + InformationStateTensorToString(player, deck_props, num_players, + max_num_tricks, infostate); + SPIEL_CHECK_EQ(infostate_string, rebuilt_infostate_string); + + std::vector actions = state->LegalActions(); + std::uniform_int_distribution<> dis(0, actions.size() - 1); + auto action = actions[dis(rng)]; + state->ApplyAction(action); + } + } + } +} + +} // namespace +} // namespace oh_hell +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::oh_hell::BasicGameTests(); + open_spiel::oh_hell::GameConfigSimTest(); + open_spiel::oh_hell::InformationStateTensorTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/oshi_zumo/oshi_zumo.cc b/scenarios/bargaining/open_spiel/open_spiel/games/oshi_zumo/oshi_zumo.cc new file mode 100644 index 0000000..778f1a0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/oshi_zumo/oshi_zumo.cc @@ -0,0 +1,259 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/oshi_zumo/oshi_zumo.h" + +#include +#include +#include + +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace oshi_zumo { +namespace { + +constexpr char kBoundaryPos = '#'; +constexpr char kOpenPos = '.'; +constexpr char kWrestler = 'W'; + +// Default parameters. +constexpr int kNoWinner = -1; +constexpr int kDefaultHorizon = 1000; +constexpr int kDefaultCoins = 50; +constexpr int kDefaultSize = 3; +constexpr bool kDefaultAlesia = false; +constexpr int kDefaultMinBid = 0; + +const GameType kGameType{/*short_name=*/"oshi_zumo", + /*long_name=*/"Oshi Zumo", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"alesia", GameParameter(kDefaultAlesia)}, + {"coins", GameParameter(kDefaultCoins)}, + {"size", GameParameter(kDefaultSize)}, + {"horizon", GameParameter(kDefaultHorizon)}, + {"min_bid", GameParameter(kDefaultMinBid)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new OshiZumoGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace + +OshiZumoState::OshiZumoState(std::shared_ptr game) + : SimMoveState(game), + parent_game_(static_cast(*game)), + // Fields set to bad values. Use Game::NewInitialState(). + winner_(kNoWinner), + total_moves_(0), + horizon_(parent_game_.horizon()), + starting_coins_(parent_game_.starting_coins()), + size_(parent_game_.size()), + alesia_(parent_game_.alesia()), + min_bid_(parent_game_.min_bid()), + // pos 0 and pos 2*size_+2 are "off the edge". + wrestler_pos_(size_ + 1), + coins_({{starting_coins_, starting_coins_}}) + +{} + +int OshiZumoState::CurrentPlayer() const { + return IsTerminal() ? kTerminalPlayerId : kSimultaneousPlayerId; +} + +void OshiZumoState::DoApplyActions(const std::vector& actions) { + SPIEL_CHECK_EQ(actions.size(), 2); + SPIEL_CHECK_TRUE(actions[0] >= 0); + SPIEL_CHECK_TRUE(actions[1] >= 0); + SPIEL_CHECK_TRUE(actions[0] <= coins_[0]); + SPIEL_CHECK_TRUE(actions[1] <= coins_[1]); + + // Move the wrestler. + if (actions[0] > actions[1]) { + wrestler_pos_++; + } else if (actions[0] < actions[1]) { + wrestler_pos_--; + } + + // Remove coins. + coins_[0] -= actions[0]; + coins_[1] -= actions[1]; + + // Check winner. + if (wrestler_pos_ == 0) { + winner_ = 1; + } else if (wrestler_pos_ == (2 * size_ + 2)) { + winner_ = 0; + } + + total_moves_++; +} + +std::vector OshiZumoState::LegalActions(Player player) const { + if (IsTerminal()) return {}; + if (player == kSimultaneousPlayerId) return LegalFlatJointActions(); + SPIEL_CHECK_FALSE(IsChanceNode()); + SPIEL_CHECK_TRUE(player == Player{0} || player == Player{1}); + + std::vector movelist; + for (int bet = min_bid_; bet <= coins_[player]; bet++) { + movelist.push_back(bet); + } + + if (movelist.empty()) { + // Player does not have the minimum bid: force them to play what they have + // left. + movelist.push_back(coins_[player]); + } + + return movelist; +} + +std::string OshiZumoState::ActionToString(Player player, + Action action_id) const { + if (player == kSimultaneousPlayerId) + return FlatJointActionToString(action_id); + SPIEL_CHECK_GE(action_id, 0); + std::string result = ""; + absl::StrAppend(&result, "[P", player, "]Bid: ", action_id); + return result; +} + +std::string OshiZumoState::ToString() const { + std::string result = "Coins: "; + + absl::StrAppend(&result, coins_[0]); + absl::StrAppend(&result, " "); + absl::StrAppend(&result, coins_[1]); + absl::StrAppend(&result, ", Field: "); + + for (int p = 0; p <= 2 * size_ + 2; p++) { + if (p == wrestler_pos_) { + result += kWrestler; + } else if (p == 0 || p == (2 * size_ + 2)) { + result += kBoundaryPos; + } else { + result += kOpenPos; + } + } + + absl::StrAppend(&result, "\n"); + return result; +} + +bool OshiZumoState::IsTerminal() const { + return (total_moves_ >= horizon_ || winner_ != kNoWinner || + (coins_[0] == 0 && coins_[1] == 0)); +} + +std::vector OshiZumoState::Returns() const { + if (!IsTerminal()) { + return {0.0, 0.0}; + } + + if (winner_ == 0) { + return {1.0, -1.0}; + } else if (winner_ == 1) { + return {-1.0, 1.0}; + } else { + // Wrestler not off the edge. + if (alesia_) { + return {0.0, 0.0}; + } else if (wrestler_pos_ > (size_ + 1)) { + return {1.0, -1.0}; + } else if (wrestler_pos_ < (size_ + 1)) { + return {-1.0, 1.0}; + } else { + return {0.0, 0.0}; + } + } +} + +std::string OshiZumoState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); // All the information is public. +} + +std::string OshiZumoState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); // All the information is public. +} + +void OshiZumoState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + SPIEL_CHECK_EQ(values.size(), parent_game_.ObservationTensorShape()[0]); + std::fill(values.begin(), values.end(), 0.); + + // 1 bit per coin value of player 1. { 0, 1, ... , starting_coins_ } + // 1 bit per coin value of player 2. { 0, 1, ... , starting_coins_ } + // 1 bit per position of the field. { 0, 1, ... , 2*size_+2 } + + int offset = 0; + values[offset + coins_[0]] = 1; + + offset += (starting_coins_ + 1); + values[offset + coins_[1]] = 1; + + offset += (starting_coins_ + 1); + values[offset + wrestler_pos_] = 1; +} + +std::unique_ptr OshiZumoState::Clone() const { + return std::unique_ptr(new OshiZumoState(*this)); +} + +OshiZumoGame::OshiZumoGame(const GameParameters& params) + : Game(kGameType, params), + horizon_(ParameterValue("horizon")), + starting_coins_(ParameterValue("coins")), + size_(ParameterValue("size")), + alesia_(ParameterValue("alesia")), + min_bid_(ParameterValue("min_bid")) { + SPIEL_CHECK_GE(min_bid_, 0); + SPIEL_CHECK_LE(min_bid_, starting_coins_); +} + +std::unique_ptr OshiZumoGame::NewInitialState() const { + return std::unique_ptr(new OshiZumoState(shared_from_this())); +} + +std::vector OshiZumoGame::ObservationTensorShape() const { + // 1 bit per coin value of player 1. { 0, 1, ..., starting_coins_ } + // 1 bit per coin value of player 2. { 0, 1, ..., starting_coins_ } + // 1 bit per position of the field. { 0, 1, ... , 2*size_+2 } + return {(2 * (starting_coins_ + 1)) + (2 * size_ + 3)}; +} + +} // namespace oshi_zumo +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/oshi_zumo/oshi_zumo.h b/scenarios/bargaining/open_spiel/open_spiel/games/oshi_zumo/oshi_zumo.h new file mode 100644 index 0000000..b3940c1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/oshi_zumo/oshi_zumo.h @@ -0,0 +1,117 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_OSHI_ZUMO_H_ +#define OPEN_SPIEL_GAMES_OSHI_ZUMO_H_ + +#include +#include +#include +#include + +#include "open_spiel/simultaneous_move_game.h" +#include "open_spiel/spiel.h" + +// Oshi-Zumo is a common benchmark simultaneous move game. Players pay coins +// each round to bid to move a wrestler, which can move ahead (into opponent's +// territory) if the bid is won, or back (into player's territory) if the bid +// is lost. The aim of the original game is to either push the wrestler off the +// edge of the opponent's side, or end with the wrestler on the opponent's side +// of the field, resulting in a win. Alesia is a variant that requires the +// wrestler to be pushed off the side for a win; everything else is a draw. +// +// See: +// - M. Buro 2003, "Solving the Oshi-Zumo game". +// - Bosansky et al 2016, "Algorithms for Computing Strategies in Two-Player +// Simultaneous Move Games". +// - Also called Alesia (slight variant) in Perolat et al. 2016, +// "Softened Approximate Policy Iteration for Markov Games". +// +// Parameters: +// "alesia" bool draw if wrestler is not pushed off (default: false) +// "coins" int number of coins each player starts with (default: 50) +// "size" int size of the field (= 2*size + 1) (default: 3) +// "horizon" int max number of moves before draw (default: 1000) +// "min_bid" int minimum bid at each turn (default: 0) + +namespace open_spiel { +namespace oshi_zumo { + +class OshiZumoGame; + +class OshiZumoState : public SimMoveState { + public: + explicit OshiZumoState(std::shared_ptr game); + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions(Player player) const override; + + protected: + void DoApplyActions(const std::vector& actions) override; + + private: + const OshiZumoGame& parent_game_; + int winner_; + int total_moves_; + int horizon_; + int starting_coins_; + int size_; + bool alesia_; + int min_bid_; + int wrestler_pos_; + std::array coins_; +}; + +class OshiZumoGame : public Game { + public: + explicit OshiZumoGame(const GameParameters& params); + + int NumDistinctActions() const override { return starting_coins_ + 1; } + std::unique_ptr NewInitialState() const override; + int MaxChanceOutcomes() const override { return 0; } + int NumPlayers() const override { return 2; } + double MinUtility() const override { return -1; } + double MaxUtility() const override { return +1; } + absl::optional UtilitySum() const override { return 0; } + std::vector ObservationTensorShape() const override; + int MaxGameLength() const override { return horizon_; } + + // Access to game parameters. + int horizon() const { return horizon_; } + int starting_coins() const { return starting_coins_; } + int size() const { return size_; } + bool alesia() const { return alesia_; } + int min_bid() const { return min_bid_; } + + private: + int horizon_; + int starting_coins_; + int size_; + bool alesia_; + int min_bid_; +}; + +} // namespace oshi_zumo +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_OSHI_ZUMO_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/oshi_zumo/oshi_zumo_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/oshi_zumo/oshi_zumo_test.cc new file mode 100644 index 0000000..a39a927 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/oshi_zumo/oshi_zumo_test.cc @@ -0,0 +1,54 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/oshi_zumo/oshi_zumo.h" + +#include "open_spiel/algorithms/get_all_states.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace oshi_zumo { +namespace { + +namespace testing = open_spiel::testing; + +void BasicOshiZumoTests() { + testing::LoadGameTest("oshi_zumo"); + testing::NoChanceOutcomesTest(*LoadGame("oshi_zumo")); + testing::RandomSimTest(*LoadGame("oshi_zumo"), 100); +} + +void CountStates() { + std::shared_ptr game = + LoadGame("oshi_zumo", {{"horizon", open_spiel::GameParameter(5)}, + {"coins", open_spiel::GameParameter(5)}}); + auto states = algorithms::GetAllStates(*game, /*depth_limit=*/-1, + /*include_terminals=*/true, + /*include_chance_states=*/true); + std::cerr << states.size() << std::endl; + SPIEL_CHECK_EQ(states.size(), 146); +} + +} // namespace +} // namespace oshi_zumo +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::oshi_zumo::BasicOshiZumoTests(); + std::shared_ptr game = open_spiel::LoadGame( + "oshi_zumo", {{"horizon", open_spiel::GameParameter(5)}}); + open_spiel::oshi_zumo::CountStates(); + open_spiel::testing::RandomSimTest(*game, /*num_sims=*/10); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/othello/othello.cc b/scenarios/bargaining/open_spiel/open_spiel/games/othello/othello.cc new file mode 100644 index 0000000..b3abe19 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/othello/othello.cc @@ -0,0 +1,329 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/othello/othello.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace othello { +namespace { + +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"othello", + /*long_name=*/"Othello", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new OthelloGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +CellState PlayerToState(Player player) { + switch (player) { + case 0: + return CellState::kBlack; + case 1: + return CellState::kWhite; + default: + SpielFatalError(absl::StrCat("Invalid player id ", player)); + return CellState::kEmpty; + } +} + +std::string StateToString(CellState state) { + switch (state) { + case CellState::kEmpty: + return "-"; + case CellState::kBlack: + return "x"; + case CellState::kWhite: + return "o"; + default: + SpielFatalError("Invalid cell state"); + } +} + +std::string PlayerToString(Player player) { + switch (player) { + case 0: + return "Black (x)"; + case 1: + return "White (o)"; + default: + return absl::StrCat(player); + } +} + +inline std::string RowString(int row) { return absl::StrCat(1 + row); } + +inline std::string ColumnString(int col) { + return std::string(1, "abcdefgh"[col]); +} + +} // namespace + +Move Move::Next(Direction dir) const { + switch (dir) { + case Direction::kUp: + return Move(row_ - 1, col_); + case Direction::kDown: + return Move(row_ + 1, col_); + case Direction::kLeft: + return Move(row_, col_ - 1); + case Direction::kRight: + return Move(row_, col_ + 1); + case Direction::kUpRight: + return Move(row_ - 1, col_ + 1); + case Direction::kUpLeft: + return Move(row_ - 1, col_ - 1); + case Direction::kDownRight: + return Move(row_ + 1, col_ + 1); + case Direction::kDownLeft: + return Move(row_ + 1, col_ - 1); + default: + SpielFatalError(absl::StrCat("Found unmatched case in Next.")); + } +} + +std::string Move::ToString() const { + return absl::StrCat(ColumnString(col_), RowString(row_)); +} + +inline bool Move::OnBoard() const { + return (row_ >= 0) && (row_ < kNumRows) && (col_ >= 0) && (col_ < kNumCols); +} + +int OthelloState::CountSteps(Player player, int action, Direction dir) const { + Move move = Move(action).Next(dir); + + int count = 0; + CellState cell = PlayerToState(player); + while (move.OnBoard()) { + if (BoardAt(move) == cell) { + return count; + } else if (BoardAt(move) == CellState::kEmpty) { + return 0; + } + + count++; + move = move.Next(dir); + } + + return 0; +} + +bool OthelloState::CanCapture(Player player, int move) const { + if (board_[move] != CellState::kEmpty) return false; + + for (auto direction : kDirections) { + if (CountSteps(player, move, direction) != 0) { + return true; + } + } + + return false; +} + +void OthelloState::Capture(Player player, int action, Direction dir, + int steps) { + Move move = Move(action).Next(dir); + + CellState cell = PlayerToState(player); + for (int step = 0; step < steps; step++) { + if (BoardAt(move) == CellState::kEmpty || BoardAt(move) == cell) { + SpielFatalError(absl::StrCat("Cannot capture cell ", move.ToString())); + } + + board_[move.GetAction()] = cell; + move = move.Next(dir); + } +} + +int OthelloState::DiskCount(Player player) const { + return absl::c_count(board_, PlayerToState(player)); +} + +bool OthelloState::NoValidActions() const { + return (LegalRegularActions(Player(0)).empty() && + LegalRegularActions(Player(1)).empty()); +} + +bool OthelloState::ValidAction(Player player, int move) const { + return (board_[move] == CellState::kEmpty && CanCapture(player, move)); +} + +void OthelloState::DoApplyAction(Action action) { + if (action == kPassMove) { // pass + current_player_ = 1 - current_player_; + return; + } + + SPIEL_CHECK_TRUE(ValidAction(current_player_, action)); + + CellState cell = PlayerToState(current_player_); + board_[action] = cell; + + for (auto direction : kDirections) { + int steps = CountSteps(current_player_, action, direction); + if (steps > 0) { + Capture(current_player_, action, direction, steps); + } + } + + if (NoValidActions()) { // check for end game state + int count_zero = DiskCount(Player(0)); + int count_one = DiskCount(Player(1)); + if (count_zero > count_one) { + outcome_ = Player(0); + } else if (count_zero < count_one) { + outcome_ = Player(1); + } else { + outcome_ = Player(kInvalidPlayer); // tie + } + current_player_ = Player(kTerminalPlayerId); + } else { + current_player_ = 1 - current_player_; + } +} + +std::vector OthelloState::LegalRegularActions(Player p) const { + std::vector moves; + for (int cell = 0; cell < kNumCells; ++cell) { + if (ValidAction(p, cell)) { + moves.push_back(cell); + } + } + return moves; +} + +std::vector OthelloState::LegalActions() const { + if (IsTerminal()) return {}; + std::vector moves = LegalRegularActions(current_player_); + if (moves.empty()) moves.push_back(kPassMove); + return moves; +} + +std::string OthelloState::ActionToString(Player player, + Action action_id) const { + if (action_id == kPassMove) { + return "pass"; + } else { + return Move(action_id).ToString(); + } +} + +OthelloState::OthelloState(std::shared_ptr game) : State(game) { + absl::c_fill(board_, CellState::kEmpty); + board_[Move(3, 3).GetAction()] = CellState::kWhite; + board_[Move(3, 4).GetAction()] = CellState::kBlack; + board_[Move(4, 3).GetAction()] = CellState::kBlack; + board_[Move(4, 4).GetAction()] = CellState::kWhite; +} + +std::string OthelloState::ToString() const { + std::string col_labels = " a b c d e f g h "; + std::string str = IsTerminal() ? std::string("Terminal State:\n") + : absl::StrCat(PlayerToString(CurrentPlayer()), + " to play:\n"); + absl::StrAppend(&str, col_labels, "\n"); + for (int r = 0; r < kNumRows; ++r) { + absl::StrAppend(&str, RowString(r), " "); + for (int c = 0; c < kNumCols; ++c) { + absl::StrAppend(&str, StateToString(BoardAt(r, c)), " "); + } + absl::StrAppend(&str, RowString(r), "\n"); + } + absl::StrAppend(&str, col_labels); + return str; +} + +bool OthelloState::IsTerminal() const { + return current_player_ == kTerminalPlayerId; +} + +std::vector OthelloState::Returns() const { + if (outcome_ == Player{0}) { + return {1.0, -1.0}; + } else if (outcome_ == Player{1}) { + return {-1.0, 1.0}; + } else { + return {0.0, 0.0}; + } +} + +std::string OthelloState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string OthelloState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void OthelloState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + // Treat `values` as a 2-d tensor. + TensorView<2> view(values, {kCellStates, kNumCells}, true); + + for (int cell = 0; cell < kNumCells; ++cell) { + if (board_[cell] == CellState::kEmpty) { + view[{0, cell}] = 1; + } else if (board_[cell] == PlayerToState(player)) { + view[{1, cell}] = 1; + } else { // Opponent's piece + view[{2, cell}] = 1; + } + } +} + +std::unique_ptr OthelloState::Clone() const { + return std::unique_ptr(new OthelloState(*this)); +} + +OthelloGame::OthelloGame(const GameParameters& params) + : Game(kGameType, params) {} + +} // namespace othello +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/othello/othello.h b/scenarios/bargaining/open_spiel/open_spiel/games/othello/othello.h new file mode 100644 index 0000000..be26419 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/othello/othello.h @@ -0,0 +1,163 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_OTHELLO_H_ +#define OPEN_SPIEL_GAMES_OTHELLO_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" // for c_fill +#include "open_spiel/spiel.h" + +// Simple game of Othello: +// https://en.wikipedia.org/wiki/Reversi +// +// Parameters: none + +namespace open_spiel { +namespace othello { + +// Constants. +inline constexpr int kNumPlayers = 2; +inline constexpr int kNumRows = 8; +inline constexpr int kNumCols = 8; +inline constexpr int kNumCells = kNumRows * kNumCols; +inline constexpr int kCellStates = 1 + kNumPlayers; // empty, 'x', and 'o'. +inline constexpr int kPassMove = kNumCells; + +// State of a cell. +enum class CellState { + kEmpty, + kBlack, + kWhite, +}; + +enum Direction { + kUp, + kDown, + kLeft, + kRight, + kUpLeft, + kUpRight, + kDownLeft, + kDownRight, +}; + +inline constexpr std::array kDirections = { + kUp, kDown, kLeft, kRight, kUpLeft, kUpRight, kDownLeft, kDownRight}; + +class Move { + public: + Move(int move) : row_(move / kNumCols), col_(move % kNumCols) { + SPIEL_CHECK_GE(move, 0); + SPIEL_CHECK_LT(move, kNumCells); + } + + Move(int row, int col) : row_(row), col_(col) {} + + inline int GetRow() const { return row_; } + inline int GetColumn() const { return col_; } + inline int GetAction() const { return row_ * kNumCols + col_; } + inline bool OnBoard() const; + + Move Next(Direction dir) const; + std::string ToString() const; + + private: + int row_; + int col_; +}; + +// State of an in-play game. +class OthelloState : public State { + public: + OthelloState(std::shared_ptr game); + + OthelloState(const OthelloState&) = default; + OthelloState& operator=(const OthelloState&) = default; + + Player CurrentPlayer() const override { return current_player_; } + + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + + private: + std::array board_; + void DoApplyAction(Action action) override; + + CellState BoardAt(int row, int col) const { return BoardAt(Move(row, col)); } + CellState BoardAt(Move move) const { return board_[move.GetAction()]; } + + // Returns a list of regular (non-pass) actions. + std::vector LegalRegularActions(Player p) const; + + // Returns true if the move would be valid for player if it were their turn. + bool ValidAction(Player player, int move) const; + + // Returns true if there are no actions available for either player. + bool NoValidActions() const; + + // Returns the number of pieces on the board for the given player. + int DiskCount(Player player) const; + + // Returns true if the specified move would result in a capture. + bool CanCapture(Player player, int move) const; + + // Returns the number of capturable disks of the opponent in the given + // direction from the given starting location. + int CountSteps(Player player, int action, Direction dir) const; + + // Updates the board to reflect a capture move. + void Capture(Player player, int action, Direction dir, int steps); + + Player current_player_ = 0; // Player zero goes first + Player outcome_ = kInvalidPlayer; +}; + +// Game object. +class OthelloGame : public Game { + public: + explicit OthelloGame(const GameParameters& params); + int NumDistinctActions() const override { return kNumCells + 1; } // can pass + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new OthelloState(shared_from_this())); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + std::vector ObservationTensorShape() const override { + return {kCellStates, kNumRows, kNumCols}; + } + + // Conservative upper bound due to pass moves. + int MaxGameLength() const override { return 2*kNumCells; } +}; + +} // namespace othello +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_OTHELLO_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/othello/othello_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/othello/othello_test.cc new file mode 100644 index 0000000..df621df --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/othello/othello_test.cc @@ -0,0 +1,36 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace othello { +namespace { + +namespace testing = open_spiel::testing; + +void BasicOthelloTests() { + testing::LoadGameTest("othello"); + testing::NoChanceOutcomesTest(*LoadGame("othello")); + testing::RandomSimTest(*LoadGame("othello"), 100); +} + +} // namespace +} // namespace othello +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::othello::BasicOthelloTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/oware/oware.cc b/scenarios/bargaining/open_spiel/open_spiel/games/oware/oware.cc new file mode 100644 index 0000000..0edf19f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/oware/oware.cc @@ -0,0 +1,310 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/oware/oware.h" + +#include + +#include "open_spiel/game_parameters.h" + +namespace open_spiel { +namespace oware { + +namespace { + +// Facts about the game +const GameType kGameType{ + /*short_name=*/"oware", + /*long_name=*/"Oware", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"num_houses_per_player", GameParameter(kDefaultHousesPerPlayer)}, + {"num_seeds_per_house", GameParameter(kDdefaultSeedsPerHouse)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new OwareGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +OwareState::OwareState(std::shared_ptr game, + int num_houses_per_player, int num_seeds_per_house) + : State(game), + num_houses_per_player_(num_houses_per_player), + total_seeds_(kNumPlayers * num_seeds_per_house * num_houses_per_player), + board_(/*num_houses_per_player=*/num_houses_per_player, + /*num_seeds_per_house=*/num_seeds_per_house) { + boards_since_last_capture_.insert(board_); +} + +OwareState::OwareState(std::shared_ptr game, + const OwareBoard& board) + : State(game), + num_houses_per_player_(board.seeds.size() / kNumPlayers), + total_seeds_(board.TotalSeeds()), + board_(board) { + SPIEL_CHECK_EQ(0, board.seeds.size() % kNumPlayers); + SPIEL_CHECK_TRUE(IsTerminal() || !LegalActions().empty()); + boards_since_last_capture_.insert(board_); +} + +std::vector OwareState::LegalActions() const { + std::vector actions; + if (IsTerminal()) return actions; + const Player lower = PlayerLowerHouse(board_.current_player); + const Player upper = PlayerUpperHouse(board_.current_player); + if (OpponentSeeds() == 0) { + // In case the opponent does not have any seeds, a player must make + // a move which gives the opponent seeds. + for (int house = lower; house <= upper; house++) { + const int first_seeds_in_own_row = upper - house; + if (board_.seeds[house] - first_seeds_in_own_row > 0) { + actions.push_back(HouseToAction(house)); + } + } + } else { + for (int house = lower; house <= upper; house++) { + if (board_.seeds[house] > 0) { + actions.push_back(HouseToAction(house)); + } + } + } + return actions; +} + +std::string OwareState::ActionToString(Player player, Action action) const { + return std::string(1, (player == Player{0} ? 'A' : 'a') + action); +} + +void OwareState::WritePlayerScore(std::ostringstream& out, + Player player) const { + out << "Player " << player << " score = " << board_.score[player]; + if (CurrentPlayer() == player) { + out << " [PLAYING]" << std::endl; + } else { + out << std::endl; + } +} + +std::string OwareState::ToString() const { + std::ostringstream out; + if (IsTerminal()) { + out << "[FINISHED]" << std::endl; + } + WritePlayerScore(out, 1); + + // Add player 1 labels. + for (int action = num_houses_per_player_ - 1; action >= 0; action--) { + out << std::setw(3) << std::right << ActionToString(1, action); + } + out << std::endl; + + // Add player 1 house seeds. + for (int house = kNumPlayers * num_houses_per_player_ - 1; + house >= num_houses_per_player_; house--) { + out << std::setw(3) << std::right << board_.seeds[house]; + } + out << std::endl; + + // Add player 0 house seeds. + for (int house = 0; house < num_houses_per_player_; house++) { + out << std::setw(3) << std::right << board_.seeds[house]; + } + out << std::endl; + + // Add player 0 labels. + for (int action = 0; action < num_houses_per_player_; action++) { + out << std::setw(3) << std::right << ActionToString(0, action); + } + out << std::endl; + + WritePlayerScore(out, 0); + return out.str(); +} + +bool OwareState::IsTerminal() const { + // Terminate when one player has more than half of the seeds + // (works both for even and odd number of seeds), or when all seeds + // are equally shared. + const int limit = total_seeds_ / 2; + return board_.score[0] > limit || board_.score[1] > limit || + (board_.score[0] == limit && board_.score[1] == limit); +} + +std::vector OwareState::Returns() const { + if (IsTerminal()) { + if (board_.score[0] > board_.score[1]) { + return {1, -1}; + } else if (board_.score[0] < board_.score[1]) { + return {-1, 1}; + } else { + return {0, 0}; + } + } else { + return {0, 0}; + } +} + +std::unique_ptr OwareState::Clone() const { + return std::unique_ptr(new OwareState(*this)); +} + +int OwareState::DistributeSeeds(int house) { + int to_distribute = board_.seeds[house]; + SPIEL_CHECK_NE(to_distribute, 0); + board_.seeds[house] = 0; + int index = house; + while (to_distribute > 0) { + index = (index + 1) % NumHouses(); + // Seeds are never sown into the house they were drawn from. + if (index != house) { + board_.seeds[index]++; + to_distribute--; + } + } + return index; +} + +bool OwareState::InOpponentRow(int house) const { + return (house / num_houses_per_player_) != board_.current_player; +} + +bool OwareState::IsGrandSlam(int house) const { + // If there are seeds beyond the house in which the last seed was dropped, + // it is not a Grand Slam. + for (int index = UpperHouse(house); index > house; index--) { + if (board_.seeds[index] > 0) { + return false; + } + } + // If not all houses are captured starting from the house in which the last + // seed was dropped, it is not a Grand Slam. It means the opponent will still + // have some seeds left because none of these houses can be empty due to + // the way seeds are sown. + const int lower = LowerHouse(house); + for (int index = house; index >= lower; index--) { + SPIEL_CHECK_GT(board_.seeds[index], 0); + if (!ShouldCapture(board_.seeds[index])) { + return false; + } + } + return true; +} + +int OwareState::OpponentSeeds() const { + int count = 0; + const Player opponent = 1 - board_.current_player; + const int lower = PlayerLowerHouse(opponent); + const int upper = PlayerUpperHouse(opponent); + for (int house = lower; house <= upper; house++) { + count += board_.seeds[house]; + } + return count; +} + +int OwareState::DoCaptureFrom(int house) { + const int lower = LowerHouse(house); + int captured = 0; + for (int index = house; index >= lower; index--) { + if (ShouldCapture(board_.seeds[index])) { + captured += board_.seeds[index]; + board_.seeds[index] = 0; + } else { + break; + } + } + board_.score[board_.current_player] += captured; + return captured; +} + +void OwareState::DoApplyAction(Action action) { + SPIEL_CHECK_LT(history_.size(), kMaxGameLength); + + int last_house = DistributeSeeds(ActionToHouse(CurrentPlayer(), action)); + + if (InOpponentRow(last_house) && !IsGrandSlam(last_house)) { + const int captured = DoCaptureFrom(last_house); + if (captured > 0) { + // No need to keep previous boards for checking game repetition because + // captured seeds do not re-enter the game. + boards_since_last_capture_.clear(); + } + } + board_.current_player = 1 - board_.current_player; + + if (!boards_since_last_capture_.insert(board_).second) { + // We have game repetition, the game is ended. + CollectAndTerminate(); + } + + if (LegalActions().empty()) { + CollectAndTerminate(); + } +} + +void OwareState::CollectAndTerminate() { + for (int house = 0; house < NumHouses(); house++) { + const Player player = house / num_houses_per_player_; + board_.score[player] += board_.seeds[house]; + board_.seeds[house] = 0; + } +} + +std::string OwareState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return board_.ToString(); +} + +void OwareState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + SPIEL_CHECK_EQ(values.size(), /*seeds*/ NumHouses() + /*scores*/ kNumPlayers); + for (int house = 0; house < NumHouses(); ++house) { + values[house] = ((double)board_.seeds[house]) / total_seeds_; + } + for (Player player = 0; player < kNumPlayers; ++player) { + values[NumHouses() + player] = + ((double)board_.score[player]) / total_seeds_; + } +} + +OwareGame::OwareGame(const GameParameters& params) + : Game(kGameType, params), + num_houses_per_player_(ParameterValue("num_houses_per_player")), + num_seeds_per_house_(ParameterValue("num_seeds_per_house")) {} + +std::vector OwareGame::ObservationTensorShape() const { + return {/*seeds*/ num_houses_per_player_ * kNumPlayers + + /*scores*/ kNumPlayers}; +} + +} // namespace oware +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/oware/oware.h b/scenarios/bargaining/open_spiel/open_spiel/games/oware/oware.h new file mode 100644 index 0000000..48a96da --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/oware/oware.h @@ -0,0 +1,187 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_OWARE_H_ +#define OPEN_SPIEL_GAMES_OWARE_H_ + +#include +#include + +#include "open_spiel/games/oware/oware_board.h" +#include "open_spiel/spiel.h" + +// Oware (https://en.wikipedia.org/wiki/Oware) is a strategy game within the +// family of Mancala games. Several variations of the game exist. This +// implementation uses the basic rules as described here: +// https://en.wikipedia.org/wiki/Oware or here: +// http://www.joansala.com/auale/rules/en/. +// +// In particular if the opponent has no seeds, the current player must make a +// move to give the opponent seeds. If no such move exists the game ends and the +// current player collects the seeds in his row. If at the end of an action the +// opponent would be left with no seeds because they would all be captured +// (a Grand Slam), no seeds are captured instead. +// +// When the game reaches a state which occurred before, it ends and both players +// collect the remaining seeds in their respective rows. +// +// Note: The Kalah game is also available separately in mancala.{h,cc}. + +namespace open_spiel { +namespace oware { + +inline constexpr int kMinCapture = 2; +inline constexpr int kMaxCapture = 3; + +inline constexpr int kDefaultHousesPerPlayer = 6; +inline constexpr int kDdefaultSeedsPerHouse = 4; + +// Informed guess based on +// https://mancala.fandom.com/wiki/Statistics +inline constexpr int kMaxGameLength = 1000; + +class OwareState : public State { + public: + OwareState(std::shared_ptr game, int num_houses_per_player, + int num_seeds_per_house); + + OwareState(const OwareState&) = default; + + // Custom board setup to support testing. + explicit OwareState(std::shared_ptr game, + const OwareBoard& board); + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : board_.current_player; + } + + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::unique_ptr Clone() const override; + const OwareBoard& Board() const { return board_; } + std::string ObservationString(Player player) const override; + + // The game board is provided as a vector, encoding the players' seeds + // and their score, as a fraction of the number of total number of seeds in + // the game. This provides an interface that can be used for neural network + // training, although the given representation is not necessary the best + // for that purpose. + void ObservationTensor(Player player, + absl::Span values) const override; + + protected: + void DoApplyAction(Action action) override; + + private: + void WritePlayerScore(std::ostringstream& out, Player player) const; + + // Collects the seeds from the given house and distributes them + // counterclockwise, skipping the starting position in all cases. + // Returns the index of the last house in which a seed was dropped. + int DistributeSeeds(int house); + + int OpponentSeeds() const; + + bool InOpponentRow(int house) const; + + // If the opponent would be left with no seeds after capturing starts from + // the given house, it is a Grand Slam. Such a move is allowed but no pieces + // will be captured. + bool IsGrandSlam(int house) const; + + // Collects all seeds of both players and terminates the game. + void CollectAndTerminate(); + + // Captures opponent seeds starting from given house clockwise as long as + // the number of seeds is between kMinCapture and kMaxCapture. + // Returns the number of seeds captured. + int DoCaptureFrom(int house); + + int LowerHouse(int house) const { + return (house / num_houses_per_player_) * num_houses_per_player_; + } + + int UpperHouse(int house) const { + return LowerHouse(house) + num_houses_per_player_ - 1; + } + + int PlayerLowerHouse(Player player) const { + return player * num_houses_per_player_; + } + + int PlayerUpperHouse(Player player) const { + return player * num_houses_per_player_ + num_houses_per_player_ - 1; + } + + bool ShouldCapture(int seeds) const { + return seeds >= kMinCapture && seeds <= kMaxCapture; + } + + Action HouseToAction(int house) const { + return house % num_houses_per_player_; + } + + int ActionToHouse(Player player, Action action) const { + return player * num_houses_per_player_ + action; + } + + int NumHouses() const { return kNumPlayers * num_houses_per_player_; } + + class OwareBoardHash { + public: + std::size_t operator()(const OwareBoard& board) const { + return board.HashValue(); + } + }; + + const int num_houses_per_player_; + const int total_seeds_; + + // We keep the set of visited board states to detect repetition, at which + // point the game ends and both players collect the seeds on their own row. + // Because captured seeds never enter the game again, this set is reset + // on any capture. + std::unordered_set boards_since_last_capture_; + OwareBoard board_; +}; + +// Game object. +class OwareGame : public Game { + public: + explicit OwareGame(const GameParameters& params); + int NumDistinctActions() const override { return num_houses_per_player_; } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new OwareState( + shared_from_this(), num_houses_per_player_, num_seeds_per_house_)); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + + int MaxGameLength() const override { return kMaxGameLength; } + std::vector ObservationTensorShape() const override; + + private: + const int num_houses_per_player_; + const int num_seeds_per_house_; +}; + +} // namespace oware +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_OWARE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/oware/oware_board.cc b/scenarios/bargaining/open_spiel/open_spiel/games/oware/oware_board.cc new file mode 100644 index 0000000..88f9d36 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/oware/oware_board.cc @@ -0,0 +1,72 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/oware/oware_board.h" + +namespace open_spiel { +namespace oware { + +OwareBoard::OwareBoard(int num_houses_per_player, int num_seeds_per_house) + : current_player(Player{0}), + score(kNumPlayers, 0), + seeds(kNumPlayers * num_houses_per_player, num_seeds_per_house) {} + +OwareBoard::OwareBoard(Player current_player, const std::vector& score, + const std::vector& seeds) + : current_player(current_player), score(score), seeds(seeds) { + SPIEL_CHECK_EQ(score.size(), kNumPlayers); +} + +bool OwareBoard::operator==(const OwareBoard& other) const { + return current_player == other.current_player && score == other.score && + seeds == other.seeds; +} + +bool OwareBoard::operator!=(const OwareBoard& other) const { + return !(*this == other); +} + +std::string OwareBoard::ToString() const { + return absl::StrCat(current_player, " | ", absl::StrJoin(score, " "), " | ", + absl::StrJoin(seeds, " ")); +} + +size_t OwareBoard::HashValue() const { + // Hashing similar to boost::hash_combine. + size_t hash = current_player; + for (int player_score : score) { + hash ^= (size_t)player_score + 0x9e3779b9 + (hash << 6) + (hash >> 2); + } + for (int house_seeds : seeds) { + hash ^= (size_t)house_seeds + 0x9e3779b9 + (hash << 6) + (hash >> 2); + } + return hash; +} +int OwareBoard::TotalSeeds() const { + int total = 0; + for (int house_seeds : seeds) { + total += house_seeds; + } + for (int score_seeds : score) { + total += score_seeds; + } + return total; +} + +std::ostream& operator<<(std::ostream& os, const OwareBoard& board) { + return os << board.ToString(); +} + +} // namespace oware +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/oware/oware_board.h b/scenarios/bargaining/open_spiel/open_spiel/games/oware/oware_board.h new file mode 100644 index 0000000..55c83d4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/oware/oware_board.h @@ -0,0 +1,61 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_OWARE_OWARE_BOARD_H_ +#define OPEN_SPIEL_GAMES_OWARE_OWARE_BOARD_H_ + +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +namespace open_spiel { +namespace oware { + +inline constexpr int kNumPlayers = 2; + +// Simple Oware board struct storing the current player, scores and seeds. +struct OwareBoard { + public: + OwareBoard(int num_houses_per_player, int num_seeds_per_house); + // Custom board setup to support testing. + OwareBoard(Player current_player, const std::vector& score, + const std::vector& seeds); + OwareBoard(const OwareBoard&) = default; + OwareBoard& operator=(const OwareBoard&) = default; + bool operator==(const OwareBoard& other) const; + bool operator!=(const OwareBoard& other) const; + std::string ToString() const; + size_t HashValue() const; + + // Returns total number of seeds, both those + // captured and the ones still in play. + int TotalSeeds() const; + + Player current_player; + // The number of seeds each player has in their score house, one entry + // for each player. + std::vector score; + // The number of seeds in each house. First the (kNumHousesPerPlayer) houses + // for player 0, then for player 1, in counterclockwise order (i.e. the order + // in which seeds are sown). + std::vector seeds; +}; + +std::ostream& operator<<(std::ostream& os, const OwareBoard& board); + +} // namespace oware +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_OWARE_OWARE_BOARD_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/oware/oware_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/oware/oware_test.cc new file mode 100644 index 0000000..9bf74c4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/oware/oware_test.cc @@ -0,0 +1,139 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/oware/oware.h" + +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace oware { +namespace { + +namespace testing = open_spiel::testing; + +void BasicOwareTests() { + testing::LoadGameTest("oware"); + testing::RandomSimTest(*LoadGame("oware"), 10); + testing::NoChanceOutcomesTest(*LoadGame("oware")); + + testing::RandomSimTest( + *LoadGame("oware", {{"num_houses_per_player", GameParameter(2)}, + {"num_seeds_per_house", GameParameter(2)}}), + 10); +} + +void LegalActionsNoConstraintsTest() { + std::shared_ptr game = LoadGame("oware"); + OwareState state = OwareState( + game, OwareBoard(0, {0, 0}, {1, 0, 9, 0, 0, 1, 1, 0, 0, 0, 0, 0})); + SPIEL_CHECK_EQ(state.LegalActions(), std::vector({0, 2, 5})); +} + +void LegalActionsLeaveTheOpponentSeedsTest() { + std::shared_ptr game = LoadGame("oware"); + OwareState state = OwareState( + game, OwareBoard(0, {0, 0}, {1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0})); + // Playing action 0 would leave the opponent with no seeds, which is not + // allowed. + SPIEL_CHECK_EQ(state.LegalActions(), std::vector({5})); +} + +void CaptureOpponentHousesTillBeginTest() { + std::shared_ptr game = LoadGame("oware"); + OwareState state(game, + OwareBoard(0, {0, 0}, {0, 0, 8, 0, 0, 1, 1, 1, 1, 1, 2, 3})); + state.ApplyAction(2); // Winning move + SPIEL_CHECK_EQ(state.Board(), + OwareBoard(1, {15, 3}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})); +} + +void CaptureMostOpponentHousesTest() { + std::shared_ptr game = LoadGame("oware"); + OwareState state(game, + OwareBoard(0, {0, 0}, {0, 0, 8, 0, 0, 1, 3, 1, 1, 1, 2, 0})); + state.ApplyAction(2); // Winning move + SPIEL_CHECK_EQ(state.Board(), + OwareBoard(1, {13, 4}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})); +} + +void NoCaptureBecauseTooFewSeedsTest() { + std::shared_ptr game = LoadGame("oware"); + OwareState state(game, + OwareBoard(1, {0, 0}, {3, 1, 1, 1, 0, 0, 0, 0, 8, 0, 0, 1})); + state.ApplyAction(2); + SPIEL_CHECK_EQ(state.Board(), + OwareBoard(0, {0, 0}, {4, 2, 2, 2, 1, 0, 0, 0, 0, 1, 1, 2})); +} + +void NoCaptureBecauseTooManySeedsTest() { + std::shared_ptr game = LoadGame("oware"); + OwareState state(game, + OwareBoard(0, {0, 0}, {0, 0, 8, 0, 0, 1, 3, 1, 1, 1, 3, 0})); + state.ApplyAction(2); + SPIEL_CHECK_EQ(state.Board(), + OwareBoard(1, {0, 0}, {0, 0, 0, 1, 1, 2, 4, 2, 2, 2, 4, 0})); +} + +void NoCaptureBecauseGrandSlamTest() { + std::shared_ptr game = LoadGame("oware"); + OwareState state(game, + OwareBoard(1, {0, 0}, {1, 1, 1, 1, 1, 0, 0, 0, 8, 0, 0, 1})); + state.ApplyAction(2); + SPIEL_CHECK_EQ(state.Board(), + OwareBoard(0, {0, 0}, {2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1, 2})); +} + +void GameEndsByRepetitionTest() { + std::shared_ptr game = LoadGame("oware"); + OwareState state( + game, OwareBoard(0, {23, 23}, {1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0})); + + state.ApplyAction(0); // Player 0. + state.ApplyAction(0); // Player 1. + state.ApplyAction(1); // Player 0. + state.ApplyAction(1); // Player 1. + state.ApplyAction(2); // Player 0. + state.ApplyAction(2); // Player 1. + state.ApplyAction(3); // Player 0. + state.ApplyAction(3); // Player 1. + state.ApplyAction(4); // Player 0. + state.ApplyAction(4); // Player 1. + + SPIEL_CHECK_FALSE(state.IsTerminal()); + SPIEL_CHECK_EQ(state.Board(), + OwareBoard(0, {23, 23}, {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1})); + + state.ApplyAction(5); // Player 0. + state.ApplyAction(5); // Player 1. + + SPIEL_CHECK_TRUE(state.IsTerminal()); + SPIEL_CHECK_EQ(state.Board(), + OwareBoard(0, {24, 24}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})); +} + +} // namespace +} // namespace oware +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::oware::BasicOwareTests(); + open_spiel::oware::LegalActionsNoConstraintsTest(); + open_spiel::oware::LegalActionsLeaveTheOpponentSeedsTest(); + open_spiel::oware::GameEndsByRepetitionTest(); + open_spiel::oware::CaptureOpponentHousesTillBeginTest(); + open_spiel::oware::CaptureMostOpponentHousesTest(); + open_spiel::oware::NoCaptureBecauseTooFewSeedsTest(); + open_spiel::oware::NoCaptureBecauseTooManySeedsTest(); + open_spiel::oware::NoCaptureBecauseGrandSlamTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/pathfinding/pathfinding.cc b/scenarios/bargaining/open_spiel/open_spiel/games/pathfinding/pathfinding.cc new file mode 100644 index 0000000..ae6457e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/pathfinding/pathfinding.cc @@ -0,0 +1,613 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/pathfinding/pathfinding.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/combinatorics.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace pathfinding { +namespace { + +// Offsets for the actions: stay, left, up, right, down. +constexpr std::array kRowOffsets = {0, 0, -1, 0, 1}; +constexpr std::array kColOffsets = {0, -1, 0, 1, 0}; + +// Register with general sum, since the game is not guaranteed to be zero sum. +// If we create a zero sum instance, the type on the created game will show it. +const GameType kGameType{ + /*short_name=*/"pathfinding", + /*long_name=*/"Pathfinding", + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kPerfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/10, + /*min_num_players=*/1, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"horizon", GameParameter(kDefaultHorizon)}, + {"grid", GameParameter(std::string(kDefaultSingleAgentGrid))}, + {"group_reward", GameParameter(kDefaultGroupReward)}, + {"players", GameParameter(kDefaultNumPlayers)}, + {"solve_reward", GameParameter(kDefaultSolveReward)}, + {"step_reward", GameParameter(kDefaultStepReward)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new PathfindingGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +GridSpec ParseGrid(const std::string& grid_string, int max_num_players) { + GridSpec grid{/*num_rows=*/0, /*num_cols=*/0}; + int row = 0; + int col = 0; + int count_empty_cells = 0; + absl::flat_hash_map> starting_positions_map; + absl::flat_hash_map> destinations_map; + + for (auto c : grid_string) { + if (c == '\n') { + row += 1; + col = 0; + } else { + if (row >= grid.num_rows) grid.num_rows = row + 1; + if (col >= grid.num_cols) grid.num_cols = col + 1; + if (c == '*') { + grid.obstacles.emplace_back(row, col); + } else if (islower(c)) { + // 97 is the ASCII code for 'a'. + Player player = static_cast(c) - 97; + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, max_num_players); + starting_positions_map[player] = {row, col}; + } else if (isupper(c)) { + // 65 is the ASCII code for 'A'. + Player player = static_cast(c) - 65; + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, max_num_players); + destinations_map[player] = {row, col}; + } else if (c == '.') { + ++count_empty_cells; + } else { + SpielFatalError(absl::StrCat("Invalid char '", std::string(1, c), + "' at grid (", row, ",", col, ")")); + } + col += 1; + } + } + + grid.num_players = starting_positions_map.size(); + SPIEL_CHECK_EQ(starting_positions_map.size(), destinations_map.size()); + SPIEL_CHECK_GE(grid.num_players, 1); + SPIEL_CHECK_LE(grid.num_players, max_num_players); + + // Move map entries to vectors. + grid.starting_positions.resize(grid.num_players); + grid.destinations.resize(grid.num_players); + for (Player p = 0; p < grid.num_players; ++p) { + // Check that we found a starting position, and move it to the vector. + const auto iter1 = starting_positions_map.find(p); + SPIEL_CHECK_TRUE(iter1 != starting_positions_map.end()); + grid.starting_positions[p] = iter1->second; + // Check that we found a destination, and move it to the vector. + const auto iter2 = destinations_map.find(p); + SPIEL_CHECK_TRUE(iter2 != destinations_map.end()); + grid.destinations[p] = iter2->second; + } + return grid; +} + +} // namespace + +PathfindingState::PathfindingState(std::shared_ptr game, + const GridSpec& grid_spec, int horizon) + : SimMoveState(game), + parent_game_(down_cast(*game)), + grid_spec_(grid_spec), + cur_player_(kSimultaneousPlayerId), + total_moves_(0), + horizon_(horizon), + player_positions_(num_players_), + actions_(num_players_, kInvalidAction), + rewards_(num_players_, 0.0), + returns_(num_players_, 0.0), + contested_players_(num_players_, 0), + reached_destinations_(num_players_, 0) { + grid_.reserve(grid_spec_.num_rows); + for (int r = 0; r < grid_spec_.num_rows; ++r) { + grid_.push_back(std::vector(grid_spec_.num_cols, kEmpty)); + } + + for (const std::pair& c : grid_spec_.obstacles) { + grid_[c.first][c.second] = kWall; + } + + SPIEL_CHECK_EQ(grid_spec_.starting_positions.size(), num_players_); + for (Player p = 0; p < num_players_; ++p) { + const std::pair& c = grid_spec_.starting_positions[p]; + SPIEL_CHECK_EQ(grid_[c.first][c.second], kEmpty); + grid_[c.first][c.second] = p; + player_positions_[p] = c; + } +} + +std::string PathfindingState::ActionToString(int player, + Action action_id) const { + return parent_game_.ActionToString(player, action_id); +} + +void PathfindingState::DoApplyActions(const std::vector& moves) { + SPIEL_CHECK_EQ(moves.size(), num_players_); + SPIEL_CHECK_EQ(cur_player_, kSimultaneousPlayerId); + + std::fill(rewards_.begin(), rewards_.end(), 0.0); + std::fill(contested_players_.begin(), contested_players_.end(), 0); + + actions_ = moves; + if (num_players_ == 1) { + ResolvePlayerAction(0); + } else { + ResolveActions(); + } + + if (cur_player_ == kSimultaneousPlayerId) { + // Only increment total moves if actions fully resolved. + total_moves_++; + } + + // If all players are at their destinations. + if (AllPlayersOnDestinations()) { + // Terminal state reached, all players get a bonus. + for (Player p = 0; p < num_players_; ++p) { + rewards_[p] += parent_game_.group_reward(); + returns_[p] += parent_game_.group_reward(); + } + } +} + +bool PathfindingState::InBounds(int r, int c) const { + return (r >= 0 && c >= 0 && r < grid_spec_.num_rows && + c < grid_spec_.num_cols); +} + +std::pair PathfindingState::GetNextCoord(Player p) const { + int row = player_positions_[p].first + kRowOffsets[actions_[p]]; + int col = player_positions_[p].second + kColOffsets[actions_[p]]; + if (!InBounds(row, col) || grid_[row][col] == kWall) { + // Can't run out of bounds or into a wall. + return player_positions_[p]; + } + return {row, col}; +} + +void PathfindingState::ResolvePlayerAction(Player p) { + const std::pair& cur_coord = player_positions_[p]; + std::pair next_coord = GetNextCoord(p); + + // Check if there is a player there. If so, change next_coord to cur_coord. + Player other_player = PlayerAt(next_coord); + if (other_player != kInvalidPlayer && other_player != p) { + next_coord = cur_coord; + } + + // Distribute rewards. + if (next_coord != cur_coord && reached_destinations_[p] == 0 && + next_coord == grid_spec_.destinations[p]) { + // Player is just getting to the destination for the first time! + rewards_[p] += parent_game_.solve_reward(); + returns_[p] += parent_game_.solve_reward(); + reached_destinations_[p] = 1; + } else if (next_coord == grid_spec_.destinations[p]) { + // Player getting to destination again, or staying there: no penalty. + } else { + rewards_[p] += parent_game_.step_reward(); + returns_[p] += parent_game_.step_reward(); + } + + grid_[cur_coord.first][cur_coord.second] = kEmpty; + grid_[next_coord.first][next_coord.second] = p; + player_positions_[p] = next_coord; +} + +Player PathfindingState::PlayerAt(const std::pair& coord) const { + int cell_state = grid_[coord.first][coord.second]; + if (cell_state >= 0 && cell_state < num_players_) { + return cell_state; + } else { + return kInvalidPlayer; + } +} + +int PathfindingState::TryResolveContested() { + int num_resolutions = 0; + for (Player p = 0; p < num_players_; ++p) { + if (contested_players_[p] == 1) { + std::pair next_coord = GetNextCoord(p); + // A contested player can be resolved iff: + // - There is no other player on the next coord, and + // - No other (contested) player is planning to go there. + Player other_player = PlayerAt(next_coord); + if (other_player == kInvalidPlayer) { + bool conflict = false; + for (Player op = 0; op < num_players_; ++op) { + if (p == op) { + continue; + } + if (contested_players_[op] == 1) { + std::pair op_next_coord = GetNextCoord(op); + if (next_coord == op_next_coord) { + conflict = true; + break; + } + } + } + + if (!conflict) { + contested_players_[p] = 0; + num_resolutions++; + ResolvePlayerAction(p); + } + } + } + } + + return num_resolutions; +} + +void PathfindingState::ResolveActions() { + // Get the next coords, and check for potentially conflicting actions. + std::vector> next_coords; + next_coords.reserve(num_players_); + for (Player p = 0; p < num_players_; ++p) { + std::pair next_coord = GetNextCoord(p); + // If there is a different player there, mark as potentially contested. + // If another player is going there, mark both players as contested. + Player other_player = PlayerAt(next_coord); + if (other_player != kInvalidPlayer && other_player != p) { + // Different player already there. Potentially contested (other player + // may move out). + contested_players_[p] = 1; + } else if (actions_[p] == kStay) { + // Stay action is never contested. + } else { + // Check if another player planning to go there. + auto iter = std::find(next_coords.begin(), next_coords.end(), next_coord); + if (iter != next_coords.end()) { + Player other_player = iter - next_coords.begin(); + contested_players_[p] = 1; + contested_players_[other_player] = 1; + } + } + + next_coords.push_back(next_coord); + } + + // Check for head-on collisions. These should not be marked as contested, + // because they result in a no-op. + for (Player p = 0; p < num_players_; ++p) { + if (contested_players_[p] == 1) { + int op = PlayerAt(next_coords[p]); + if (op != kInvalidPlayer && p != op) { + Player opp = PlayerAt(next_coords[op]); + if (opp != kInvalidPlayer && opp == p) { + contested_players_[p] = 0; + contested_players_[op] = 0; + continue; + } + } + } + } + + // Move the uncontested, and repeatedly check the contested players to see if + // moving resolves the contestations. If so, move them and mark as + // uncontested. Stop when there is a pass with no moves. + int num_contested = 0; + for (Player p = 0; p < num_players_; ++p) { + if (contested_players_[p] == 1) { + num_contested++; + } else { + ResolvePlayerAction(p); + } + } + + int num_resolved = 0; + do { + num_resolved = TryResolveContested(); + num_contested -= num_resolved; + } while (num_resolved > 0); + + // If there remain contestations, must resolve them via a chance node, which + // will determine order of resolution. + if (num_contested > 0) { + cur_player_ = kChancePlayerId; + } +} + +void PathfindingState::DoApplyAction(Action action_id) { + if (IsSimultaneousNode()) { + ApplyFlatJointAction(action_id); + return; + } else { + SPIEL_CHECK_TRUE(IsChanceNode()); + int num_contested_players = + std::count_if(contested_players_.begin(), contested_players_.end(), + [](int i) { return i == 1; }); + std::vector contested_player_ids; + contested_player_ids.reserve(num_contested_players); + for (Player p = 0; p < contested_players_.size(); ++p) { + if (contested_players_[p] == 1) { + contested_player_ids.push_back(p); + } + } + SPIEL_CHECK_EQ(contested_player_ids.size(), num_contested_players); + std::vector indices(num_contested_players); + std::iota(indices.begin(), indices.end(), 0); + std::vector resolution_order = UnrankPermutation(indices, action_id); + for (int idx : resolution_order) { + ResolvePlayerAction(contested_player_ids[idx]); + } + std::fill(contested_players_.begin(), contested_players_.end(), 0); + cur_player_ = kSimultaneousPlayerId; + total_moves_++; + } +} + +std::vector PathfindingState::LegalActions(int player) const { + if (IsTerminal()) return {}; + if (IsChanceNode()) { + return LegalChanceOutcomes(); + } else { + return parent_game_.legal_actions(); + } +} + +std::vector> PathfindingState::ChanceOutcomes() + const { + SPIEL_CHECK_TRUE(IsChanceNode()); + int num_contested_players = + std::count_if(contested_players_.begin(), contested_players_.end(), + [](int i) { return i == 1; }); + int num_permutations = Factorial(num_contested_players); + double prob = 1.0 / num_permutations; + ActionsAndProbs outcomes; + outcomes.reserve(num_permutations); + for (int i = 0; i < num_permutations; ++i) { + outcomes.push_back({i, prob}); + } + return outcomes; +} + +Player PathfindingState::PlayerAtPos(const std::pair& coord) const { + if (grid_[coord.first][coord.second] >= 0 && + grid_[coord.first][coord.second] < num_players_) { + return grid_[coord.first][coord.second]; + } else { + return kInvalidPlayer; + } +} + +std::string PathfindingState::ToString() const { + std::string str; + for (int r = 0; r < grid_spec_.num_rows; ++r) { + for (int c = 0; c < grid_spec_.num_cols; ++c) { + if (grid_[r][c] >= 0 && grid_[r][c] < num_players_) { + absl::StrAppend(&str, grid_[r][c]); + } else if (grid_[r][c] == kWall) { + absl::StrAppend(&str, "*"); + } else { + absl::StrAppend(&str, "."); + } + } + absl::StrAppend(&str, "\n"); + } + return str; +} + +int PathfindingState::PlayerPlaneIndex(int observing_player, + int actual_player) const { + // Need to add a num_players_ inside the brackets here because of how C++ + // handles mod of negative values. + return (actual_player - observing_player + num_players_) % num_players_; +} + +// Note: currently, the observations are current non-Markovian because the time +// step is not included and the horizon is finite. +std::string PathfindingState::ObservationString(int player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +// Note: currently, the observations are current non-Markovian because the time +// step is not included and the horizon is finite. +void PathfindingState::ObservationTensor(int player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + std::fill(values.begin(), values.end(), 0.0); + + TensorView<3> view(values, + {parent_game_.NumObservationPlanes(), grid_spec_.num_rows, + grid_spec_.num_cols}, + true); + + // Let n be the number of players. + // - First n planes refer to player + // - Second n planes refer to player's starting position + // - Third n planes refer to player's destination position + // - 1 plane for wall + // - 1 plane for empty + // + // The first three sets of n planes corresponding to the players are each + // ordered ego-centrically: + // - the first plane is the observing player's plane, followed by the next + // player, followed by the next etc. so in a 4-player game, if player 2 + // is the observing player, the planes would be ordered by player 2, 3, 0, + // 1. + for (int r = 0; r < grid_spec_.num_rows; ++r) { + for (int c = 0; c < grid_spec_.num_cols; ++c) { + // Player on the position. + if (grid_[r][c] >= 0 && grid_[r][c] < num_players_) { + view[{PlayerPlaneIndex(player, grid_[r][c]), r, c}] = 1.0; + } + + // Wall + if (grid_[r][c] == kWall) { + view[{3 * num_players_, r, c}] = 1.0; + } + + // Empty + if (grid_[r][c] == kEmpty) { + view[{3 * num_players_ + 1, r, c}] = 1.0; + } + } + } + + for (Player p = 0; p < num_players_; ++p) { + const std::pair& start_pos = grid_spec_.starting_positions[p]; + const std::pair& dest_pos = grid_spec_.destinations[p]; + int pidx = PlayerPlaneIndex(player, p); + view[{num_players_ + pidx, start_pos.first, start_pos.second}] = 1.0; + view[{2 * num_players_ + pidx, dest_pos.first, dest_pos.second}] = 1.0; + } +} + +bool PathfindingState::AllPlayersOnDestinations() const { + for (Player p = 0; p < num_players_; ++p) { + const std::pair& c = grid_spec_.destinations[p]; + if (grid_[c.first][c.second] != p) { + return false; + } + } + return true; +} + +bool PathfindingState::IsTerminal() const { + if (total_moves_ >= horizon_) { + return true; + } + + // Check if all players at their destinations. + return AllPlayersOnDestinations(); +} + +std::vector PathfindingState::Rewards() const { return rewards_; } + +std::vector PathfindingState::Returns() const { return returns_; } + +std::unique_ptr PathfindingState::Clone() const { + return std::unique_ptr(new PathfindingState(*this)); +} + +std::unique_ptr PathfindingGame::NewInitialState() const { + return std::unique_ptr( + new PathfindingState(shared_from_this(), grid_spec_, horizon_)); +} + +int PathfindingGame::MaxChanceOutcomes() const { + return Factorial(NumPlayers()); +} + +double PathfindingGame::MinUtility() const { + // Add a small constant here due to numeral issues. + return horizon_ * step_reward_ - FloatingPointDefaultTolerance(); +} + +double PathfindingGame::MaxUtility() const { + return solve_reward_ + group_reward_; +} + +int PathfindingGame::NumObservationPlanes() const { + // Number of position planes: + // - one per player present on the pos + // - one per player (starting position) + // - one per player (destination) + // - one for empty positions + // - one for wall positions + return 3 * grid_spec_.num_players + 2; +} + +std::vector PathfindingGame::ObservationTensorShape() const { + return {NumObservationPlanes(), grid_spec_.num_rows, grid_spec_.num_cols}; +} + +std::string PathfindingGame::ActionToString(int player, + Action action_id) const { + if (player == kChancePlayerId) { + return absl::StrCat("Chance outcome ", action_id); + } + + switch (action_id) { + case kStay: + return "Stay"; + case kLeft: + return "Left"; + case kUp: + return "Up"; + case kRight: + return "Right"; + case kDown: + return "Down"; + default: + SpielFatalError(absl::StrCat("Unknown action: ", action_id)); + } +} + +int PathfindingGame::NumPlayers() const { return num_players_; } + +PathfindingGame::PathfindingGame(const GameParameters& params) + : SimMoveGame(kGameType, params), + grid_spec_(ParseGrid(ParameterValue( + "grid", std::string(kDefaultSingleAgentGrid)), + kGameType.max_num_players)), + num_players_(ParameterValue("players", kDefaultNumPlayers)), + horizon_(ParameterValue("horizon", kDefaultHorizon)), + group_reward_(ParameterValue("group_reward", + kDefaultGroupReward)), + solve_reward_( + ParameterValue("solve_reward", kDefaultSolveReward)), + step_reward_(ParameterValue("step_reward", kDefaultStepReward)), + legal_actions_({kStay, kLeft, kUp, kRight, kDown}) { + // Override the number of players from the grid specification. + // + // Currently, the game only supports specific grids, so this will always be + // overridden. This will change in a future version with random grids. + if (grid_spec_.num_players >= 1) { + num_players_ = grid_spec_.num_players; + } +} + +} // namespace pathfinding +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/pathfinding/pathfinding.h b/scenarios/bargaining/open_spiel/open_spiel/games/pathfinding/pathfinding.h new file mode 100644 index 0000000..3b5dd51 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/pathfinding/pathfinding.h @@ -0,0 +1,199 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_PATHFINDING_H_ +#define OPEN_SPIEL_GAMES_PATHFINDING_H_ + +#include +#include +#include +#include + +#include "open_spiel/simultaneous_move_game.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" + +namespace open_spiel { +namespace pathfinding { + +// A simple simultaneous-move (single- and multi-agent) grid world pathfinding +// game. +// +// Grids can be expressed as ASCII strings, where lower case characters refer +// to starting positions, upper case characters refer to destinations, +// '.' refers to an empty cell, '*' refers to an wall. +// +// Parameters: +// "grid" int The grid world the agents play in (default below). +// "group_reward" double Extra reward (to each agent) if all agents reach +// their desitnation (default: 100.0). +// "horizon" int Maximum number of steps in an episode (def: 1000). +// "players" int Number of players (default: 1, and overridden by +// the grid). +// "solve_reward" double Reward obtained when reaching the destination +// (default: 100.0). +// "step_reward" double The reward given to every agent on each per step +// (default: -0.01). +// +// Note: currently, the observations are current non-Markovian because the time +// step is not included and the horizon is finite. This can be easily added as +// an option if desired. + +inline constexpr char kDefaultSingleAgentGrid[] = + "A.*..**\n" + "..*....\n" + "....*a.\n"; + +inline constexpr char kExampleMultiAgentGrid[] = + "A.*Db**\n" + "..*....\n" + "..*.*a.\n" + ".B*.**.\n" + ".*..*..\n" + "......c\n" + "C..*..d"; + +// Default parameters. +constexpr int kDefaultHorizon = 1000; +constexpr int kDefaultNumPlayers = 1; +constexpr double kDefaultStepReward = -0.01; +constexpr double kDefaultSolveReward = 100.0; +constexpr double kDefaultGroupReward = 100.0; + +struct GridSpec { + int num_rows; + int num_cols; + int num_players = -1; + std::vector> obstacles; + std::vector> starting_positions; + std::vector> destinations; +}; + +// Movement. +enum MovementType { + kStay = 0, + kLeft = 1, + kUp = 2, + kRight = 3, + kDown = 4, +}; + +enum CellState { kEmpty = -1, kWall = -2 }; + +constexpr int kNumActions = 5; + +class PathfindingGame : public SimMoveGame { + public: + explicit PathfindingGame(const GameParameters& params); + int NumDistinctActions() const { return kNumActions; } + std::string ActionToString(int player, Action action_id) const override; + std::unique_ptr NewInitialState() const override; + int MaxChanceOutcomes() const override; + int NumPlayers() const override; + double MinUtility() const override; + double MaxUtility() const override; + std::vector ObservationTensorShape() const override; + int MaxGameLength() const override { return horizon_; } + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + + int NumObservationPlanes() const; + const std::vector& legal_actions() const { return legal_actions_; } + double group_reward() const { return group_reward_; } + double solve_reward() const { return solve_reward_; } + double step_reward() const { return step_reward_; } + + private: + GridSpec grid_spec_; + int num_players_; + int horizon_; + double group_reward_; + double solve_reward_; + double step_reward_; + std::vector legal_actions_; +}; + +class PathfindingState : public SimMoveState { + public: + explicit PathfindingState(std::shared_ptr game, + const GridSpec& grid_spec, int horizon); + PathfindingState(const PathfindingState&) = default; + + std::string ActionToString(int player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Rewards() const override; + std::vector Returns() const override; + std::string ObservationString(int player) const override; + void ObservationTensor(int player, absl::Span values) const override; + int CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : cur_player_; + } + std::unique_ptr Clone() const override; + + ActionsAndProbs ChanceOutcomes() const override; + + std::vector LegalActions(int player) const override; + + std::pair PlayerPos(int player) const { + return player_positions_[player]; + } + + Player PlayerAtPos(const std::pair& coord) const; + + protected: + void DoApplyAction(Action action_id) override; + void DoApplyActions(const std::vector& moves) override; + + private: + std::pair GetNextCoord(Player p) const; + void ResolvePlayerAction(Player p); + void ResolveActions(); + bool InBounds(int r, int c) const; + Player PlayerAt(const std::pair& coord) const; + int TryResolveContested(); + bool AllPlayersOnDestinations() const; + int PlayerPlaneIndex(int observing_player, int actual_player) const; + + const PathfindingGame& parent_game_; + const GridSpec& grid_spec_; + + int cur_player_; + int total_moves_; + int horizon_; + std::vector> player_positions_; + + // The state of the board. Coordinates indices are in row-major order. + // - Values from 0 to num_players - 1 refer to the player. + // - Otherwise the value is above (kEmpty or kWall). + std::vector> grid_; + + // The player's chosen actions. + std::vector actions_; + + // Rewards this turn and cumulative rewards. + std::vector rewards_; + std::vector returns_; + + // Used when conflicting actions need to be resolved. + // 0 = uncontested, 1 = contested. + std::vector contested_players_; + + // Has the player reached the destination? (1 if yes, 0 if no). + std::vector reached_destinations_; +}; + +} // namespace pathfinding +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_PATHFINDING_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/pathfinding/pathfinding_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/pathfinding/pathfinding_test.cc new file mode 100644 index 0000000..4ee3ad8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/pathfinding/pathfinding_test.cc @@ -0,0 +1,264 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/pathfinding/pathfinding.h" + +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace pathfinding { +namespace { + +namespace testing = open_spiel::testing; + +using MovementType::kDown; +using MovementType::kLeft; +using MovementType::kRight; +using MovementType::kUp; + +void BasicPathfindingTests() { + testing::LoadGameTest("pathfinding"); + testing::LoadGameTest( + absl::StrCat("pathfinding(grid=", kDefaultSingleAgentGrid, ")")); + testing::RandomSimTest(*LoadGame("pathfinding"), 1); + testing::RandomSimTest( + *LoadGame("pathfinding", + {{"grid", GameParameter(kExampleMultiAgentGrid)}}), + 1); +} + +void BasicCongestionSimulationTests() { + const char* kSmallGrid = + "AB*Db**\n" + "c*deGFE\n" + "Cf.b*ag\n"; + std::shared_ptr game = LoadGame( + "pathfinding", + {{"grid", GameParameter(kSmallGrid)}, {"horizon", GameParameter(100)}}); + testing::RandomSimTest(*game, 100); +} + +void ChainMovementTests() { + const char* kGrid = + "ABCDEF....\n" + "..........\n" + "..a.......\n" + "..bcd.....\n" + "....e.....\n" + "....f.....\n"; + std::shared_ptr game = LoadGame( + "pathfinding", + {{"grid", GameParameter(kGrid)}, {"horizon", GameParameter(100)}}); + + std::unique_ptr state = game->NewInitialState(); + auto* pf_state = static_cast(state.get()); + + // All of them should move in lock-step. No conflict. + state->ApplyActions({kRight, kUp, kLeft, kLeft, kUp, kUp}); + SPIEL_CHECK_FALSE(state->IsChanceNode()); + + // 01234 + // 0.......... + // 1.......... + // 2..10...... + // 3..234..... + // 4....5..... + // 5.......... + SPIEL_CHECK_EQ(pf_state->PlayerPos(0), std::make_pair(2, 3)); + SPIEL_CHECK_EQ(pf_state->PlayerPos(1), std::make_pair(2, 2)); + SPIEL_CHECK_EQ(pf_state->PlayerPos(2), std::make_pair(3, 2)); + SPIEL_CHECK_EQ(pf_state->PlayerPos(3), std::make_pair(3, 3)); + SPIEL_CHECK_EQ(pf_state->PlayerPos(4), std::make_pair(3, 4)); + SPIEL_CHECK_EQ(pf_state->PlayerPos(5), std::make_pair(4, 4)); +} + +void BasicHeadOnCollisionTest() { + const char* kGrid = + "ABCD......\n" + "..........\n" + "..a.....d.\n" + "..........\n" + "..b.....c.\n" + "..........\n"; + std::shared_ptr game = LoadGame( + "pathfinding", + {{"grid", GameParameter(kGrid)}, {"horizon", GameParameter(100)}}); + + std::unique_ptr state = game->NewInitialState(); + + // Collision between 0 and 1 + state->ApplyActions({kDown, kUp, kRight, kUp}); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + + // Should be two possible outcomes + std::vector legal_actions = state->LegalActions(); + SPIEL_CHECK_EQ(legal_actions.size(), 2); + + // 1st possibility (child1): {0, 1}: a makes it, b stays. + // 2nd possibility (child2): {1, 0}: b makes it, a stays. + std::unique_ptr child1 = state->Child(legal_actions[0]); + std::unique_ptr child2 = state->Child(legal_actions[1]); + auto* pf_child1 = static_cast(child1.get()); + auto* pf_child2 = static_cast(child2.get()); + + // 1st + SPIEL_CHECK_EQ(pf_child1->PlayerPos(0), std::make_pair(3, 2)); + SPIEL_CHECK_EQ(pf_child1->PlayerPos(1), std::make_pair(4, 2)); + // 2nd + SPIEL_CHECK_EQ(pf_child2->PlayerPos(0), std::make_pair(2, 2)); + SPIEL_CHECK_EQ(pf_child2->PlayerPos(1), std::make_pair(3, 2)); + + // Start over. + state = game->NewInitialState(); + state->ApplyActions({kDown, kUp, kUp, kDown}); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + + // Factorial outcomes since these situtations are not factorized. + legal_actions = state->LegalActions(); + SPIEL_CHECK_EQ(legal_actions.size(), 24); +} + +void HeadOnCollision3pTest() { + const char* kGrid = + "ABC.......\n" + "..........\n" + "..a.......\n" + ".c........\n" + "..b.......\n" + "..........\n"; + std::shared_ptr game = LoadGame( + "pathfinding", + {{"grid", GameParameter(kGrid)}, {"horizon", GameParameter(100)}}); + + std::unique_ptr state = game->NewInitialState(); + + state->ApplyActions({kDown, kUp, kRight}); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + + // Should be 3! = 6 possible outcomes. + std::vector legal_actions = state->LegalActions(); + SPIEL_CHECK_EQ(legal_actions.size(), 6); + + // Go through all resolutions. Make sure the agent that gets to 3,2 is equally + // distributed, and that only one of them makes it (and the other two don't + // move). + std::vector> positions = {{2, 2}, {4, 2}, {3, 1}}; + std::vector counts = {0, 0, 0}; + for (int idx = 0; idx < 6; ++idx) { + std::unique_ptr child = state->Child(legal_actions[idx]); + SPIEL_CHECK_FALSE(child->IsChanceNode()); + auto* pf_child = static_cast(child.get()); + Player player = pf_child->PlayerAtPos({3, 2}); + SPIEL_CHECK_NE(player, kInvalidPlayer); + counts[player]++; + for (Player p = 0; p < 3; ++p) { + if (p != player) { + SPIEL_CHECK_EQ(pf_child->PlayerPos(p), positions[p]); + } + } + } + + SPIEL_CHECK_EQ(counts[0], 2); + SPIEL_CHECK_EQ(counts[1], 2); + SPIEL_CHECK_EQ(counts[2], 2); +} + +void HeadOnCollision4pTest() { + const char* kGrid = + "ABCD......\n" + "..........\n" + "..a.......\n" + ".c.d......\n" + "..b.......\n" + "..........\n"; + std::shared_ptr game = LoadGame( + "pathfinding", + {{"grid", GameParameter(kGrid)}, {"horizon", GameParameter(100)}}); + + std::unique_ptr state = game->NewInitialState(); + + state->ApplyActions({kDown, kUp, kRight, kLeft}); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + + // Should be 4! = 24 possible outcomes. + std::vector legal_actions = state->LegalActions(); + SPIEL_CHECK_EQ(legal_actions.size(), 24); + + // Go through all resolutions. Make sure the agent that gets to 3,2 is equally + // distributed, and that only one of them makes it (and the other two don't + // move). + std::vector> positions = {{2, 2}, {4, 2}, {3, 1}, {3, 3}}; + std::vector counts = {0, 0, 0, 0}; + for (int idx = 0; idx < 24; ++idx) { + std::unique_ptr child = state->Child(legal_actions[idx]); + SPIEL_CHECK_FALSE(child->IsChanceNode()); + auto* pf_child = static_cast(child.get()); + Player player = pf_child->PlayerAtPos({3, 2}); + SPIEL_CHECK_NE(player, kInvalidPlayer); + counts[player]++; + for (Player p = 0; p < 4; ++p) { + if (p != player) { + SPIEL_CHECK_EQ(pf_child->PlayerPos(p), positions[p]); + } + } + } + + SPIEL_CHECK_EQ(counts[0], 6); + SPIEL_CHECK_EQ(counts[1], 6); + SPIEL_CHECK_EQ(counts[2], 6); + SPIEL_CHECK_EQ(counts[3], 6); +} + +void WallCollision4pTest() { + const char* kGrid = + "ABCD......\n" + "..........\n" + "..a.......\n" + ".c*d......\n" + "..b.......\n" + "..........\n"; + std::shared_ptr game = LoadGame( + "pathfinding", + {{"grid", GameParameter(kGrid)}, {"horizon", GameParameter(100)}}); + + std::unique_ptr state = game->NewInitialState(); + std::string state_str = state->ToString(); + + // No collision, they're all running into a wall! + state->ApplyActions({kDown, kUp, kRight, kLeft}); + SPIEL_CHECK_FALSE(state->IsChanceNode()); + + // State is the same as before. + SPIEL_CHECK_EQ(state->ToString(), state_str); +} + +} // namespace +} // namespace pathfinding +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::pathfinding::BasicPathfindingTests(); + open_spiel::pathfinding::BasicCongestionSimulationTests(); + open_spiel::pathfinding::ChainMovementTests(); + open_spiel::pathfinding::BasicHeadOnCollisionTest(); + open_spiel::pathfinding::HeadOnCollision3pTest(); + open_spiel::pathfinding::HeadOnCollision4pTest(); + open_spiel::pathfinding::WallCollision4pTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/pentago/pentago.cc b/scenarios/bargaining/open_spiel/open_spiel/games/pentago/pentago.cc new file mode 100644 index 0000000..5b9be5e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/pentago/pentago.cc @@ -0,0 +1,330 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/pentago/pentago.h" + +#include +#include +#include +#include + +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace pentago { +namespace { + +// Facts about the game. +const GameType kGameType{/*short_name=*/"pentago", + /*long_name=*/"Pentago", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + {"ansi_color_output", GameParameter(false)}, + }}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new PentagoGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +struct Move { + int x, y, xy; // xy = x + y * kBoardSize + int r; // rotation + int dir; // which direction to rotate + int quadrant; // which quadrant to rotate + + constexpr Move(int x_, int y_, int r_) + : x(x_), + y(y_), + xy(x_ + y_ * kBoardSize), + r(r_), + dir(r_ & 1), + quadrant(r_ >> 1) {} + constexpr Move(Action a) + : Move((a / kPossibleRotations) % kBoardSize, + (a / (kPossibleRotations * kBoardSize)) % kBoardSize, + a % kPossibleRotations) {} + + Action ToAction() const { + return ((y * kBoardSize) + x) * kPossibleRotations + r; + } + + std::string ToString() const { + return absl::StrCat(std::string(1, static_cast('a' + x)), + std::string(1, static_cast('1' + y)), + std::string(1, static_cast('s' + r))); + } +}; + +// Order the bits such that quadrant rotations are easy. +constexpr int xy_to_bit[kBoardPositions] = { + 0, 1, 2, 15, 16, 9, // Comment + 7, 8, 3, 14, 17, 10, // to force + 6, 5, 4, 13, 12, 11, // clang-format + 29, 30, 31, 22, 23, 24, // to keep the + 28, 35, 32, 21, 26, 25, // square spatial + 27, 34, 33, 20, 19, 18, // alignment. +}; + +// The bit mask for reading from an xy location. +constexpr uint64_t xym(int xy) { return 1ull << xy_to_bit[xy]; } +constexpr uint64_t xym(int x, int y) { return xym(x + y * kBoardSize); } +constexpr uint64_t xy_bit_mask[kBoardPositions] = { + xym(0, 0), xym(1, 0), xym(2, 0), xym(3, 0), xym(4, 0), xym(5, 0), + xym(0, 1), xym(1, 1), xym(2, 1), xym(3, 1), xym(4, 1), xym(5, 1), + xym(0, 2), xym(1, 2), xym(2, 2), xym(3, 2), xym(4, 2), xym(5, 2), + xym(0, 3), xym(1, 3), xym(2, 3), xym(3, 3), xym(4, 3), xym(5, 3), + xym(0, 4), xym(1, 4), xym(2, 4), xym(3, 4), xym(4, 4), xym(5, 4), + xym(0, 5), xym(1, 5), xym(2, 5), xym(3, 5), xym(4, 5), xym(5, 5), +}; + +// Helpers for creating the win mask. +constexpr uint64_t pattern(int x, int y, int ox, int oy) { + return (xym(x + ox * 0, y + oy * 0) | // Comment + xym(x + ox * 1, y + oy * 1) | // to force + xym(x + ox * 2, y + oy * 2) | // clang-format + xym(x + ox * 3, y + oy * 3) | // to keep + xym(x + ox * 4, y + oy * 4)); // aligntment. +} +constexpr uint64_t horizontal(int x, int y) { return pattern(x, y, 1, 0); } +constexpr uint64_t vertical(int x, int y) { return pattern(x, y, 0, 1); } +constexpr uint64_t tl_br(int x, int y) { return pattern(x, y, 1, 1); } +constexpr uint64_t bl_tr(int x, int y) { return pattern(x, y, 1, -1); } + +// The mask of 5 bits for each of the win conditions. +constexpr uint64_t win_mask[kPossibleWinConditions] = { + horizontal(0, 0), horizontal(1, 0), // Row 0 + horizontal(0, 1), horizontal(1, 1), // Row 1 + horizontal(0, 2), horizontal(1, 2), // Row 2 + horizontal(0, 3), horizontal(1, 3), // Row 3 + horizontal(0, 4), horizontal(1, 4), // Row 4 + horizontal(0, 5), horizontal(1, 5), // Row 5 + vertical(0, 0), vertical(0, 1), // Column 0 + vertical(1, 0), vertical(1, 1), // Column 1 + vertical(2, 0), vertical(2, 1), // Column 2 + vertical(3, 0), vertical(3, 1), // Column 3 + vertical(4, 0), vertical(4, 1), // Column 4 + vertical(5, 0), vertical(5, 1), // Column 5 + tl_br(0, 0), tl_br(1, 1), // Center diagonals from top-left to bottom-right + tl_br(0, 1), tl_br(1, 0), // Offset diagonals + bl_tr(0, 5), bl_tr(1, 4), // Center diagonals from bottom-left to top-right + bl_tr(0, 4), bl_tr(1, 5), // Offset diagonals +}; + +// Rotate a quadrant clockwise or counter-clockwise. +// Pulls a 8-bit segment and rotates it by 2 bits. +uint64_t rotate_quadrant_cw(uint64_t b, int quadrant) { + uint64_t m = 0xFFull << (quadrant * 9); + return (b & ~m) | (((b & m) >> 6) & m) | (((b & m) << 2) & m); +} +uint64_t rotate_quadrant_ccw(uint64_t b, int quadrant) { + uint64_t m = 0xFFull << (quadrant * 9); + return (b & ~m) | (((b & m) >> 2) & m) | (((b & m) << 6) & m); +} + +} // namespace + +PentagoState::PentagoState(std::shared_ptr game, + bool ansi_color_output) + : State(std::move(game)), ansi_color_output_(ansi_color_output) { + board_[0] = 0; + board_[1] = 0; +} + +std::vector PentagoState::LegalActions() const { + // Can move in any empty cell, and do all rotations. + std::vector moves; + if (IsTerminal()) return moves; + moves.reserve((kBoardPositions - moves_made_) * kPossibleRotations); + for (int y = 0; y < kBoardSize; y++) { + for (int x = 0; x < kBoardSize; x++) { + if (get(x, y) == kPlayerNone) { + for (int r = 0; r < kPossibleRotations; r++) { + moves.push_back(Move(x, y, r).ToAction()); + } + } + } + } + return moves; +} + +std::string PentagoState::ActionToString(Player player, + Action action_id) const { + return Move(action_id).ToString(); +} + +std::string PentagoState::ToString() const { + // Generates something like: + // > t u < + // a b c d e f + // v 1 . . O @ . O v + // s 2 . . O . . @ v + // 3 . @ @ . @ O + // 4 . @ @ . O . + // z 5 @ . O @ O . w + // ^ 6 @ O @ O O O ^ + // > y x < + + std::string white = "O"; + std::string black = "@"; + std::string empty = "."; + std::string coord = ""; + std::string reset = ""; + if (ansi_color_output_) { + std::string esc = "\033"; + reset = esc + "[0m"; + coord = esc + "[1;37m"; // bright white + empty = reset + "."; + white = esc + "[1;33m" + "@"; // bright yellow + black = esc + "[1;34m" + "@"; // bright blue + } + + // Enable the arrows if/when open_spiel allows unicode in strings. + // constexpr char const* arrows[] = {"↙", "↗", "↖", "↘", "↗", "↙", "↘", "↖"}; + constexpr char const* arrows[] = {"v", ">", "<", "v", "^", "<", ">", "^"}; + constexpr char const* left[] = {arrows[0], "s", " ", " ", "z", arrows[7]}; + constexpr char const* right[] = {arrows[3], "v", " ", " ", "w", arrows[4]}; + + std::ostringstream out; + out << coord; + out << " " << arrows[1] << " t u " << arrows[2] << "\n"; + out << " a b c d e f\n"; + for (int y = 0; y < kBoardSize; y++) { + out << left[y] << " " << (y + 1) << " "; + for (int x = 0; x < kBoardSize; x++) { + Player p = get(x, y); + if (p == kPlayerNone) out << empty; + if (p == kPlayer1) out << white; + if (p == kPlayer2) out << black; + out << " "; + } + out << coord << right[y] << "\n"; + } + out << " " << arrows[6] << " y x " << arrows[5] << reset << "\n"; + return out.str(); +} + +PentagoPlayer PentagoState::get(int i) const { + return (board_[0] & xy_bit_mask[i] + ? kPlayer1 + : board_[1] & xy_bit_mask[i] ? kPlayer2 : kPlayerNone); +} + +std::vector PentagoState::Returns() const { + if (outcome_ == kPlayer1) return {1, -1}; + if (outcome_ == kPlayer2) return {-1, 1}; + if (outcome_ == kPlayerDraw) return {0, 0}; + return {0, 0}; // Unfinished +} + +std::string PentagoState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string PentagoState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +int PlayerRelative(PentagoPlayer state, Player current) { + switch (state) { + case kPlayer1: + return current == 0 ? 0 : 1; + case kPlayer2: + return current == 1 ? 0 : 1; + case kPlayerNone: + return 2; + default: + SpielFatalError("Unknown player type."); + } +} + +void PentagoState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + TensorView<2> view(values, {kCellStates, kBoardPositions}, true); + for (int i = 0; i < kBoardPositions; i++) { + view[{PlayerRelative(get(i), player), i}] = 1.0; + } +} + +void PentagoState::DoApplyAction(Action action) { + SPIEL_CHECK_EQ(outcome_, kPlayerNone); + + Move move(action); + SPIEL_CHECK_EQ(get(move.xy), kPlayerNone); + + // Apply the move. + board_[current_player_] |= xy_bit_mask[move.xy]; + if (move.dir == 0) { + board_[0] = rotate_quadrant_ccw(board_[0], move.quadrant); + board_[1] = rotate_quadrant_ccw(board_[1], move.quadrant); + } else { + board_[0] = rotate_quadrant_cw(board_[0], move.quadrant); + board_[1] = rotate_quadrant_cw(board_[1], move.quadrant); + } + moves_made_++; + + // Check the win conditions. + bool p1_won = false; + bool p2_won = false; + for (int i = 0; i < kPossibleWinConditions; i++) { + uint64_t wm = win_mask[i]; + if ((board_[0] & wm) == wm) p1_won = true; + if ((board_[1] & wm) == wm) p2_won = true; + } + + // Note that you can rotate such that you cause your opponent to win. + if (p1_won && p2_won) { + outcome_ = kPlayerDraw; + } else if (p1_won) { + outcome_ = kPlayer1; + } else if (p2_won) { + outcome_ = kPlayer2; + } else if (moves_made_ == kBoardPositions) { + outcome_ = kPlayerDraw; + } + + current_player_ = (current_player_ == kPlayer1 ? kPlayer2 : kPlayer1); +} + +std::unique_ptr PentagoState::Clone() const { + return std::unique_ptr(new PentagoState(*this)); +} + +PentagoGame::PentagoGame(const GameParameters& params) + : Game(kGameType, params), + ansi_color_output_(ParameterValue("ansi_color_output")) {} + +} // namespace pentago +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/pentago/pentago.h b/scenarios/bargaining/open_spiel/open_spiel/games/pentago/pentago.h new file mode 100644 index 0000000..9f5f4be --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/pentago/pentago.h @@ -0,0 +1,120 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_PENTAGO_H_ +#define OPEN_SPIEL_GAMES_PENTAGO_H_ + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// https://en.wikipedia.org/wiki/Pentago +// Does not implement pie rule to balance the game +// +// Parameters: +// "ansi_color_output" bool Whether to color the output for a terminal. + +namespace open_spiel { +namespace pentago { + +inline constexpr int kNumPlayers = 2; +inline constexpr int kBoardSize = 6; +inline constexpr int kBoardPositions = kBoardSize * kBoardSize; +inline constexpr int kPossibleRotations = 8; +inline constexpr int kPossibleActions = kBoardPositions * kPossibleRotations; +inline constexpr int kPossibleWinConditions = 32; +inline constexpr int kCellStates = 1 + kNumPlayers; + +enum PentagoPlayer { + kPlayer1, + kPlayer2, + kPlayerNone, + kPlayerDraw, +}; + +// State of an in-play game. +class PentagoState : public State { + public: + PentagoState(std::shared_ptr game, + bool ansi_color_output = false); + + PentagoState(const PentagoState&) = default; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : static_cast(current_player_); + } + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override { return outcome_ != kPlayerNone; } + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + + // A 3d tensor, 3 player-relative one-hot 2d planes. The layers are: the + // specified player, the other player, and empty. + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + + protected: + void DoApplyAction(Action action) override; + + PentagoPlayer get(int x, int y) const { return get(x + y * kBoardSize); } + PentagoPlayer get(int i) const; + + private: + std::array board_; + PentagoPlayer current_player_ = kPlayer1; + PentagoPlayer outcome_ = kPlayerNone; + int moves_made_ = 0; + const bool ansi_color_output_; +}; + +// Game object. +class PentagoGame : public Game { + public: + explicit PentagoGame(const GameParameters& params); + + int NumDistinctActions() const override { return kPossibleActions; } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new PentagoState(shared_from_this(), ansi_color_output_)); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + std::vector ObservationTensorShape() const override { + return {kCellStates, kBoardSize, kBoardSize}; + } + int MaxGameLength() const override { + // No stones are removed, and it is possible to draw by filling the board. + // Increase this by one if swap is ever implemented. + return kBoardPositions; + } + + private: + const bool ansi_color_output_ = false; +}; + +} // namespace pentago +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_PENTAGO_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/pentago/pentago_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/pentago/pentago_test.cc new file mode 100644 index 0000000..cfb03eb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/pentago/pentago_test.cc @@ -0,0 +1,40 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace pentago { +namespace { + +namespace testing = open_spiel::testing; + +void BasicPentagoTests() { + testing::LoadGameTest("pentago"); + testing::NoChanceOutcomesTest(*LoadGame("pentago")); + testing::RandomSimTest(*LoadGame("pentago"), 100); + + // Ansi colors! + testing::RandomSimTest( + *LoadGame("pentago", {{"ansi_color_output", GameParameter(true)}}), 1); + testing::RandomSimTest(*LoadGame("pentago(ansi_color_output=True)"), 10); +} + +} // namespace +} // namespace pentago +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::pentago::BasicPentagoTests(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/phantom_go/phantom_go.cc b/scenarios/bargaining/open_spiel/open_spiel/games/phantom_go/phantom_go.cc new file mode 100644 index 0000000..1a5ead0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/phantom_go/phantom_go.cc @@ -0,0 +1,354 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/phantom_go/phantom_go.h" + +#include +#include + +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/phantom_go/phantom_go_board.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace phantom_go { +namespace { + +// Facts about the game +const GameType kGameType{ + /*short_name=*/"phantom_go", + /*long_name=*/"Phantom Go", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"komi", GameParameter(7.5)}, + {"board_size", GameParameter(9)}, + {"handicap", GameParameter(0)}, + // After the maximum game length, the game will end arbitrarily and the + // score is computed as usual (i.e. number of stones + komi). + // It's advised to only use shorter games to compute win-rates. + // When not provided, it defaults to DefaultMaxGameLength(board_size) + {"max_game_length", + GameParameter(GameParameter::Type::kInt, /*is_mandatory=*/false)}}, +}; + +std::shared_ptr Factory(const GameParameters ¶ms) { + return std::shared_ptr(new PhantomGoGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +std::vector HandicapStones(int num_handicap) { + if (num_handicap < 2 || num_handicap > 9) return {}; + + static std::array placement = { + {MakePoint("d4"), MakePoint("q16"), MakePoint("d16"), MakePoint("q4"), + MakePoint("d10"), MakePoint("q10"), MakePoint("k4"), MakePoint("k16"), + MakePoint("k10")}}; + static VirtualPoint center = MakePoint("k10"); + + std::vector points(placement.begin(), placement.begin() + num_handicap); + + if (num_handicap >= 5 && num_handicap % 2 == 1) { + points[num_handicap - 1] = center; + } + + return points; +} + +} // namespace + +class PhantomGoObserver : public Observer { + public: + PhantomGoObserver(IIGObservationType iig_obs_type) + : Observer(/*has_string=*/true, /*has_tensor=*/true), + iig_obs_type_(iig_obs_type) {} + + void WriteTensor(const State &observed_state, int player, + Allocator *allocator) const override { + const PhantomGoState &state = + open_spiel::down_cast(observed_state); + + const int totalBoardPoints = + state.board().board_size() * state.board().board_size(); + + { + auto out = allocator->Get("stone-counts", {2}); + auto stoneCount = state.GetStoneCount(); + out.at(0) = stoneCount[0]; + out.at(1) = stoneCount[1]; + } + + if (iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer) { + { + auto observation = state.board().GetObservationByID(player); + + auto out_empty = + allocator->Get("player_observation_empty", {totalBoardPoints}); + auto out_white = + allocator->Get("player_observation_white", {totalBoardPoints}); + auto out_black = + allocator->Get("player_observation_black", {totalBoardPoints}); + auto out_komi = allocator->Get("komi", {totalBoardPoints}); + + for (int i = 0; i < totalBoardPoints; i++) { + switch (observation[i]) { + case GoColor::kBlack: + out_black.at(i) = true; + out_white.at(i) = false; + out_empty.at(i) = false; + break; + + case GoColor::kWhite: + out_black.at(i) = false; + out_white.at(i) = true; + out_empty.at(i) = false; + break; + + case GoColor::kEmpty: + out_black.at(i) = false; + out_white.at(i) = false; + out_empty.at(i) = true; + break; + + default: + SpielFatalError(absl::StrCat("Unhandled case: ", observation[i])); + } + if (state.CurrentPlayer() == (uint8_t)GoColor::kWhite) { + out_komi.at(i) = 1; + } else { + out_komi.at(i) = 0; + } + } + } + } + } + + std::string StringFrom(const State &observed_state, + int player) const override { + const PhantomGoState &state = + open_spiel::down_cast(observed_state); + + return state.ObservationString(player); + } + + private: + IIGObservationType iig_obs_type_; +}; + +PhantomGoState::PhantomGoState(std::shared_ptr game, int board_size, + float komi, int handicap) + : State(std::move(game)), + board_(board_size), + komi_(komi), + handicap_(handicap), + max_game_length_(game_->MaxGameLength()), + to_play_(GoColor::kBlack) { + ResetBoard(); +} + +std::string PhantomGoState::ObservationString(int player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return absl::StrCat(board_.ObservationToString(player), + board_.LastMoveInformationToString()); +} + +void PhantomGoState::ObservationTensor(Player player, + absl::Span values) const { + ContiguousAllocator allocator(values); + const PhantomGoGame &game = + open_spiel::down_cast(*game_); + game.default_observer_->WriteTensor(*this, player, &allocator); +} + +std::vector PhantomGoState::LegalActions() const { + std::vector actions{}; + if (IsTerminal()) return actions; + for (VirtualPoint p : BoardPoints(board_.board_size())) { + if (board_.IsLegalMove(p, to_play_)) { + actions.push_back(board_.VirtualActionToAction(p)); + } + } + actions.push_back(board_.pass_action()); + return actions; +} + +std::string PhantomGoState::ActionToString(Player player, Action action) const { + return absl::StrCat( + GoColorToString(static_cast(player)), " ", + VirtualPointToString(board_.ActionToVirtualAction(action))); +} + +char GoColorToChar(GoColor c) { + switch (c) { + case GoColor::kBlack: + return 'X'; + case GoColor::kWhite: + return 'O'; + case GoColor::kEmpty: + return '+'; + case GoColor::kGuard: + return '#'; + default: + SpielFatalError(absl::StrCat("Unknown color ", c, " in GoColorToChar.")); + return '!'; + } +} + +std::string PhantomGoState::ToString() const { + std::array stoneCount = board_.GetStoneCount(); + + return absl::StrCat("GoState(komi=", komi_, + ", to_play=", GoColorToString(to_play_), + ", history.size()=", history_.size(), ", ", + "stones_count: w", stoneCount[1], " b", stoneCount[0], + ")\n", board_.ToString(), board_.ObservationsToString()); +} + +bool PhantomGoState::IsTerminal() const { + if (history_.size() < 2) return false; + return (history_.size() >= max_game_length_) || superko_ || + (history_[history_.size() - 1].action == board_.pass_action() && + history_[history_.size() - 2].action == board_.pass_action()); +} + +std::vector PhantomGoState::Returns() const { + if (!IsTerminal()) return {0.0, 0.0}; + + if (superko_) { + // Superko rules (https://senseis.xmp.net/?Superko) are complex and vary + // between rulesets. + // For simplicity and because superkos are very rare, we just treat them as + // a draw. + return {DrawUtility(), DrawUtility()}; + } + + // Score with Tromp-Taylor. + float black_score = TrompTaylorScore(board_, komi_, handicap_); + + std::vector returns(phantom_go::NumPlayers()); + if (black_score > 0) { + returns[ColorToPlayer(GoColor::kBlack)] = WinUtility(); + returns[ColorToPlayer(GoColor::kWhite)] = LossUtility(); + } else if (black_score < 0) { + returns[ColorToPlayer(GoColor::kBlack)] = LossUtility(); + returns[ColorToPlayer(GoColor::kWhite)] = WinUtility(); + } else { + returns[ColorToPlayer(GoColor::kBlack)] = DrawUtility(); + returns[ColorToPlayer(GoColor::kWhite)] = DrawUtility(); + } + return returns; +} + +std::unique_ptr PhantomGoState::Clone() const { + return std::unique_ptr(new PhantomGoState(*this)); +} + +void PhantomGoState::UndoAction(Player player, Action action) { + // We don't have direct undo functionality, but copying the board and + // replaying all actions is still pretty fast (> 1 million undos/second). + history_.pop_back(); + --move_number_; + ResetBoard(); + for (auto [_, action] : history_) { + DoApplyAction(action); + } +} + +void PhantomGoState::DoApplyAction(Action action) { + if (board_.PlayMove(board_.ActionToVirtualAction(action), to_play_)) { + to_play_ = OppColor(to_play_); + bool was_inserted = repetitions_.insert(board_.HashValue()).second; + if (!was_inserted && action != board_.pass_action()) { + // We have encountered this position before. + superko_ = true; + } + } +} + +void PhantomGoState::ResetBoard() { + board_.Clear(); + if (handicap_ < 2) { + to_play_ = GoColor::kBlack; + } else { + for (VirtualPoint p : HandicapStones(handicap_)) { + board_.PlayMove(p, GoColor::kBlack); + } + to_play_ = GoColor::kWhite; + } + + repetitions_.clear(); + repetitions_.insert(board_.HashValue()); + superko_ = false; +} +std::array PhantomGoState::GetStoneCount() const { + return board_.GetStoneCount(); +} +bool PhantomGoState::equalMetaposition(const PhantomGoState &state1, + const PhantomGoState &state2, + int playerID) { + if (state1.board_.board_size() != state2.board_.board_size()) { + return false; + } + + std::array stoneCount1 = state1.board_.GetStoneCount(); + std::array stoneCount2 = state2.board_.GetStoneCount(); + + if (stoneCount1[0] != stoneCount2[0] || stoneCount1[1] != stoneCount2[1]) { + return false; + } + + int boardSize = state1.board_.board_size(); + + auto observation1 = state1.board_.GetObservationByID(playerID); + auto observation2 = state2.board_.GetObservationByID(playerID); + + for (int i = 0; i < boardSize * boardSize; i++) { + if (observation1[i] != observation2[i]) { + return false; + } + } + + if (state1.to_play_ != state2.to_play_) { + return false; + } + + return true; +} +int PhantomGoState::GetMaxGameLenght() const { return max_game_length_; } + +PhantomGoGame::PhantomGoGame(const GameParameters ¶ms) + : Game(kGameType, params), + komi_(ParameterValue("komi")), + board_size_(ParameterValue("board_size")), + handicap_(ParameterValue("handicap")), + max_game_length_(ParameterValue("max_game_length", + DefaultMaxGameLength(board_size_))) { + default_observer_ = std::make_shared(kDefaultObsType); +} + +} // namespace phantom_go +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/phantom_go/phantom_go.h b/scenarios/bargaining/open_spiel/open_spiel/games/phantom_go/phantom_go.h new file mode 100644 index 0000000..ff1cf61 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/phantom_go/phantom_go.h @@ -0,0 +1,185 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_PHANTOM_GO_H_ +#define OPEN_SPIEL_GAMES_PHANTOM_GO_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/games/phantom_go/phantom_go_board.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// Game of Phantom Go: +// https://www.chessprogramming.org/Phantom_Go +// +// Parameters: +// "komi" float compensation for white (default +// = 7.5) "board_size" int rows of the board, usually 9, 13 or 19 +// (default = 9) "handicap" int number of handicap stones for black +// (default = 0) "max_game_length" int maximal lenght of a game (default = +// board_size * board_size * 4) + +namespace open_spiel { +namespace phantom_go { + +class PhantomGoObserver; + +// Constants. +inline constexpr int NumPlayers() { return 2; } +inline constexpr double LossUtility() { return -1; } +inline constexpr double WinUtility() { return 1; } +inline constexpr int CellStates() { return 3; } // Black, white, empty. + +// Go can only end in a draw when using a round komi. +// We also treat superko as a draw. +inline constexpr double DrawUtility() { return 0; } + +// All actions must be in [0; NumDistinctActions). +inline int NumDistinctActions(int board_size) { + return board_size * board_size + 1; +} + +// Such high number has been set, mainly because moves on enemy stones are also +// counted into length And for "clear" resampling, lot of passes and +// "observation moves" are needed +inline int DefaultMaxGameLength(int board_size) { + return board_size * board_size * 4; +} + +inline int MaxGameLength(int board_size) { return board_size * board_size * 4; } + +inline int ColorToPlayer(GoColor c) { return static_cast(c); } +inline GoColor PlayerToColor(Player p) { return static_cast(p); } + +// State of an in-play game. +// Actions are contiguous from 0 to board_size * board_size - 1, row-major, i.e. +// the (row, col) action is encoded as row * board_size + col. +// The pass action is board_size * board_size. +class PhantomGoState : public State { + public: + // Constructs a Go state for the empty board. + PhantomGoState(std::shared_ptr game, int board_size, float komi, + int handicap); + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : ColorToPlayer(to_play_); + } + std::vector LegalActions() const override; + + std::array GetStoneCount() const; + + int GetMaxGameLenght() const; + + static bool equalMetaposition(const PhantomGoState& state1, + const PhantomGoState& state2, int playerID); + + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + + bool IsTerminal() const override; + + std::string ObservationString(int player) const override; + + // Four planes: black, white, empty, and a bias plane of bits indicating komi + // (whether white is to play). + void ObservationTensor(int player, absl::Span values) const override; + + std::vector Returns() const override; + + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action action) override; + + const PhantomGoBoard& board() const { return board_; } + + protected: + void DoApplyAction(Action action) override; + + private: + void ResetBoard(); + + PhantomGoBoard board_; + + // RepetitionTable records which positions we have already encountered. + // We are already indexing by board hash, so there is no need to hash that + // hash again, so we use a custom passthrough hasher. + class PassthroughHash { + public: + std::size_t operator()(uint64_t x) const { + return static_cast(x); + } + }; + using RepetitionTable = std::unordered_set; + RepetitionTable repetitions_; + + const float komi_; + const int handicap_; + const int max_game_length_; + GoColor to_play_; + bool superko_; +}; + +class PhantomGoGame : public Game { + public: + explicit PhantomGoGame(const GameParameters& params); + + std::shared_ptr default_observer_; + + int NumDistinctActions() const override { + return phantom_go::NumDistinctActions(board_size_); + } + + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new PhantomGoState(shared_from_this(), board_size_, komi_, handicap_)); + } + + std::vector ObservationTensorShape() const override { + // Planes: black, white, empty, and a bias plane indicating komi (whether + // white is to play) + // and 2 for stone count of white and black + return {2 + board_size_ * board_size_ * (CellStates() + 1)}; + } + + TensorLayout ObservationTensorLayout() const override { + return TensorLayout::kCHW; + } + + int NumPlayers() const override { return phantom_go::NumPlayers(); } + + double MinUtility() const override { return LossUtility(); } + absl::optional UtilitySum() const override { + return LossUtility() + WinUtility(); + } + double MaxUtility() const override { return WinUtility(); } + + int MaxGameLength() const override { return max_game_length_; } + + private: + const float komi_; + const int board_size_; + const int handicap_; + const int max_game_length_; +}; + +} // namespace phantom_go +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_PHANTOM_GO_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/phantom_go/phantom_go_board.cc b/scenarios/bargaining/open_spiel/open_spiel/games/phantom_go/phantom_go_board.cc new file mode 100644 index 0000000..11a988e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/phantom_go/phantom_go_board.cc @@ -0,0 +1,839 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/phantom_go/phantom_go_board.h" + +#include + +#include "open_spiel/abseil-cpp/absl/random/uniform_int_distribution.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/games/chess/chess_common.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace phantom_go { + +namespace { + +// 8 adjacent directions. +// +// 405 +// 1 2 +// 637 +// +// The order is important because it is used to index 3x3 patterns! +// +inline constexpr std::array Dir8 = {{ + kVirtualBoardSize, // new line + -1, // new line + +1, // new line + -static_cast(kVirtualBoardSize), + +static_cast(kVirtualBoardSize) - 1, + +static_cast(kVirtualBoardSize) + 1, + -static_cast(kVirtualBoardSize) - 1, + -static_cast(kVirtualBoardSize) + 1, + 0 // Dummy element. +}}; + +// Calls f for all 4 direct neighbours of p. +// f should have type void f(VirtualPoint n), but is passed as a template so we +// can elide the function call overhead. +template +void Neighbours(VirtualPoint p, const F &f) { + f(p + kVirtualBoardSize); + f(p + 1); + f(p - 1); + f(p - kVirtualBoardSize); +} + +std::vector MakeBoardPoints(int board_size) { + std::vector points; + points.reserve(board_size * board_size); + for (int row = 0; row < board_size; ++row) { + for (int col = 0; col < board_size; ++col) { + points.push_back(VirtualPointFrom2DPoint({row, col})); + } + } + return points; +} + +template +const std::vector &GetBoardPoints() { + static std::vector points = MakeBoardPoints(board_size); + return points; +} + +char GoColorToChar(GoColor c) { + switch (c) { + case GoColor::kBlack: + return 'X'; + case GoColor::kWhite: + return 'O'; + case GoColor::kEmpty: + return '+'; + case GoColor::kGuard: + return '#'; + default: + SpielFatalError(absl::StrCat("Unknown color ", c, " in GoColorToChar.")); + return '!'; + } +} + +std::string MoveAsAscii(VirtualPoint p, GoColor c) { + static std::string code = "0123456789abcdefghijklmnopqrstuvwxyz"; + static int mask = 31; + // 1 bit for color, 9 bits for the point. + uint16_t value = static_cast(c) | (p << 1); + // Encode in 2 characters of 5 bit each. + std::string encoded; + encoded.push_back(code[(value >> 5) & mask]); + encoded.push_back(code[value & mask]); + return encoded; +} + +} // namespace + +Neighbours4::Neighbours4(const VirtualPoint p) + : dir_(static_cast(0)), p_(p) {} + +Neighbours4 &Neighbours4::operator++() { + ++dir_; + return *this; +} + +const VirtualPoint Neighbours4::operator*() const { return p_ + Dir8[dir_]; } + +Neighbours4::operator bool() const { return dir_ < 4; } + +// update 6 +int VirtualPointToBoardPoint(VirtualPoint p, int boardSize) { + std::pair pair = VirtualPointTo2DPoint(p); + return pair.first * boardSize + pair.second; +} + +VirtualPoint VirtualPointFromBoardPoint(int boardPoint, int boardSize) { + std::pair pair; + pair.second = boardPoint % boardSize; + pair.first = boardPoint / boardSize; + return VirtualPointFrom2DPoint(pair); +} + +std::pair VirtualPointTo2DPoint(VirtualPoint p) { + if (p == kInvalidPoint || p == kVirtualPass) return std::make_pair(-1, -1); + + const int row = static_cast(p) / kVirtualBoardSize; + const int col = static_cast(p) % kVirtualBoardSize; + return std::make_pair(row - 1, col - 1); +} + +VirtualPoint VirtualPointFrom2DPoint(std::pair row_col) { + return static_cast((row_col.first + 1) * kVirtualBoardSize + + row_col.second + 1); +} + +// Internally, the board is *always* 21*21 with a border of guard stones around +// all sides of the board. Thus we need to map a coordinate in that space +// to a coordinate in the normal board. +Action VirtualActionToAction(int virtual_action, int board_size) { + if (virtual_action == kVirtualPass) return board_size * board_size; + const int virtual_row = static_cast(virtual_action) / kVirtualBoardSize; + const int virtual_col = static_cast(virtual_action) % kVirtualBoardSize; + return board_size * (virtual_row - 1) + (virtual_col - 1); +} + +int ActionToVirtualAction(Action action, int board_size) { + if (action == board_size * board_size) return kVirtualPass; + int row = action / board_size; + int column = action % board_size; + return (row + 1) * kVirtualBoardSize + (column + 1); +} + +const std::vector &BoardPoints(int board_size) { +#define CASE_GET_POINTS(n) \ + case n: \ + return GetBoardPoints() + + switch (board_size) { + CASE_GET_POINTS(2); + CASE_GET_POINTS(3); + CASE_GET_POINTS(4); + CASE_GET_POINTS(5); + CASE_GET_POINTS(6); + CASE_GET_POINTS(7); + CASE_GET_POINTS(8); + CASE_GET_POINTS(9); + CASE_GET_POINTS(10); + CASE_GET_POINTS(11); + CASE_GET_POINTS(12); + CASE_GET_POINTS(13); + CASE_GET_POINTS(14); + CASE_GET_POINTS(15); + CASE_GET_POINTS(16); + CASE_GET_POINTS(17); + CASE_GET_POINTS(18); + CASE_GET_POINTS(19); + default: + SpielFatalError(absl::StrCat("unsupported size", + board_size)); + } + +#undef CASE_GET_POINTS +} + +GoColor OppColor(GoColor c) { + switch (c) { + case GoColor::kBlack: + return GoColor::kWhite; + case GoColor::kWhite: + return GoColor::kBlack; + case GoColor::kEmpty: + case GoColor::kGuard: + return c; + default: + SpielFatalError(absl::StrCat("Unknown color ", c, " in OppColor.")); + return c; + } +} + +std::ostream &operator<<(std::ostream &os, GoColor c) { + return os << GoColorToString(c); +} + +std::string GoColorToString(GoColor c) { + switch (c) { + case GoColor::kBlack: + return "B"; + case GoColor::kWhite: + return "W"; + case GoColor::kEmpty: + return "E"; + case GoColor::kGuard: + return "G"; + default: + SpielFatalError( + absl::StrCat("Unknown color ", c, " in GoColorToString.")); + return "This will never return."; + } +} + +std::ostream &operator<<(std::ostream &os, VirtualPoint p) { + return os << VirtualPointToString(p); +} + +std::string VirtualPointToString(VirtualPoint p) { + switch (p) { + case kInvalidPoint: + return "INVALID_POINT"; + case kVirtualPass: + return "PASS"; + default: { + auto row_col = VirtualPointTo2DPoint(p); + char col = 'a' + row_col.second; + if (col >= 'i') ++col; // Go / SGF labeling skips 'i'. + return absl::StrCat(std::string(1, col), row_col.first + 1); + } + } +} + +VirtualPoint MakePoint(std::string s) { + std::transform(s.begin(), s.end(), s.begin(), ::tolower); + + if (s == "pass") return kVirtualPass; + if (s.size() < 2 || s.size() > 3) return kInvalidPoint; + + int col = s[0] < 'i' ? s[0] - 'a' : s[0] - 'a' - 1; + int row = s[1] - '0'; + if (s.size() == 3) { + row *= 10; + row += s[2] - '0'; + } + return VirtualPointFrom2DPoint({row - 1, col}); +} + +PhantomGoBoard::PhantomGoBoard(int board_size) + : board_size_(board_size), pass_action_(board_size * board_size) { + if (board_size_ > 19) { + SpielFatalError( + absl::StrCat("The current Go implementation supports board size up to " + "19. Provided: ", + board_size)); + } + Clear(); +} + +void PhantomGoBoard::Clear() { + zobrist_hash_ = 0; + + for (int i = 0; i < board_size_ * board_size_; i++) { + observations_[(uint8_t)GoColor::kBlack][i] = GoColor::kEmpty; + observations_[(uint8_t)GoColor::kWhite][i] = GoColor::kEmpty; + } + + stone_count_ = {0, 0}; + + last_move_valid = true; + last_move_pass = false; + last_move_captured = 0; + + for (int i = 0; i < board_.size(); ++i) { + Vertex &v = board_[i]; + v.color = GoColor::kGuard; + v.chain_head = static_cast(i); + v.chain_next = static_cast(i); + chains_[i].reset_border(); + } + + for (VirtualPoint p : BoardPoints(board_size_)) { + board_[p].color = GoColor::kEmpty; + chains_[p].reset(); + } + + for (VirtualPoint p : BoardPoints(board_size_)) { + Neighbours(p, [this, p](VirtualPoint n) { + if (IsEmpty(n)) chain(p).add_liberty(n); + }); + } + + for (int i = 0; i < last_captures_.size(); ++i) { + last_captures_[i] = kInvalidPoint; + } + + last_ko_point_ = kInvalidPoint; +} + +bool PhantomGoBoard::PlayMove(VirtualPoint p, GoColor c) { + if (p == kVirtualPass) { + last_ko_point_ = kInvalidPoint; + last_move_captured = 0; + last_move_pass = true; + last_move_valid = true; + return true; + } else { + last_move_pass = false; + } + + observations_[(uint8_t)c][VirtualPointToBoardPoint(p, board_size_)] = + board_[p].color; + + // playing illegal moves will occur during phantom go, it is even desired + if (!IsLegalMoveObserver(p, c)) { + last_move_captured = 0; + last_move_valid = false; // was a observational move + return false; + } + + last_move_valid = true; + + stone_count_[(uint8_t)c]++; + + // Preparation for ko checking. + bool played_in_enemy_eye = true; + Neighbours(p, [this, c, &played_in_enemy_eye](VirtualPoint n) { + GoColor s = PointColor(n); + if (s == c || s == GoColor::kEmpty) { + played_in_enemy_eye = false; + } + }); + + JoinChainsAround(p, c); + SetStone(p, c); + RemoveLibertyFromNeighbouringChains(p); + int stones_captured = CaptureDeadChains(p, c); + + stone_count_[(uint8_t)OppColor(c)] -= stones_captured; + last_move_captured = stones_captured; + + observations_[(uint8_t)c][VirtualPointToBoardPoint(p, board_size_)] = c; + + if (played_in_enemy_eye && stones_captured == 1) { + last_ko_point_ = last_captures_[0]; + } else { + last_ko_point_ = kInvalidPoint; + } + + if (stones_captured != 0) { + for (int point = 0; point < board_size_ * board_size_; point++) { + VirtualPoint vpoint = VirtualPointFromBoardPoint(point, board_size_); + + if (observations_[(uint8_t)OppColor(c)][point] == OppColor(c) && + board_[vpoint].color == GoColor::kEmpty) { + observations_[(uint8_t)GoColor::kBlack][point] = GoColor::kEmpty; + observations_[(uint8_t)GoColor::kWhite][point] = GoColor::kEmpty; + } + } + } + + SPIEL_CHECK_GT(chain(p).num_pseudo_liberties, 0); + + return true; +} + +VirtualPoint PhantomGoBoard::SingleLiberty(VirtualPoint p) const { + VirtualPoint head = ChainHead(p); + VirtualPoint liberty = chain(p).single_liberty(); + + // Check it is really a liberty. + SPIEL_CHECK_TRUE(IsInBoardArea(liberty)); + SPIEL_CHECK_TRUE(IsEmpty(liberty)); + + // Make sure the liberty actually borders the group. + for (auto n = Neighbours4(liberty); n; ++n) { + if (ChainHead(*n) == head) return liberty; + } + + SpielFatalError( + absl::StrCat("liberty", liberty, " does not actually border group ", p)); +} + +void PhantomGoBoard::SetStone(VirtualPoint p, GoColor c) { + static const chess_common::ZobristTable + zobrist_values( + /*seed=*/2765481); + + zobrist_hash_ ^= zobrist_values[p][static_cast( + c == GoColor::kEmpty ? PointColor(p) : c)]; + + board_[p].color = c; +} + +std::array +PhantomGoBoard::GetObservationByID(int player_id) const { + return observations_[player_id]; +} + +std::string PhantomGoBoard::ObservationsToString() const { + std::stringstream ss; + ss << "\nObservation white:\n"; + + ss << ObservationToString((uint8_t)GoColor::kWhite); + + ss << "\nObservation black:\n"; + + ss << ObservationToString((uint8_t)GoColor::kBlack); + + ss << "\n"; + + ss << LastMoveInformationToString(); + + return ss.str(); +} + +std::string PhantomGoBoard::ObservationToString(int player) const { + std::stringstream ss; + for (int x = board_size_ - 1; x >= 0; x--) { + if (board_size_ - 1 >= 10 && x < 10) { + ss << " "; + } + ss << " " << x + 1 << " "; + for (int y = 0; y < board_size_; y++) { + ss << GoColorToChar(observations_[player][x * board_size_ + y]); + } + ss << "\n"; + } + ss << " "; + + for (int i = 0; i < board_size_; i++) { + char letter = 'A' + i; + if (letter >= 'I') { + letter++; + } + ss << letter; + } + + ss << "\n"; + return ss.str(); +} + +// Combines the groups around the newly placed stone at vertex. If no groups +// are available for joining, the new stone is placed as a new group. +void PhantomGoBoard::JoinChainsAround(VirtualPoint p, GoColor c) { + VirtualPoint largest_chain_head = kInvalidPoint; + int largest_chain_size = 0; + Neighbours( + p, [this, c, &largest_chain_head, &largest_chain_size](VirtualPoint n) { + if (PointColor(n) == c) { + Chain &c = chain(n); + if (c.num_stones > largest_chain_size) { + largest_chain_size = c.num_stones; + largest_chain_head = ChainHead(n); + } + } + }); + if (largest_chain_size == 0) { + InitNewChain(p); + return; + } + + Neighbours(p, [this, c, &largest_chain_head](VirtualPoint n) { + if (PointColor(n) == c) { + VirtualPoint chain_head = ChainHead(n); + if (chain_head != largest_chain_head) { + chain(largest_chain_head).merge(chain(n)); + + // Set all stones in the smaller string to be part of the larger + // chain. + VirtualPoint cur = n; + do { + board_[cur].chain_head = largest_chain_head; + cur = board_[cur].chain_next; + } while (cur != n); + + // Connect the 2 linked lists representing the stones in the two + // chains. + std::swap(board_[largest_chain_head].chain_next, board_[n].chain_next); + } + } + }); + + board_[p].chain_next = board_[largest_chain_head].chain_next; + board_[largest_chain_head].chain_next = p; + board_[p].chain_head = largest_chain_head; + chain(largest_chain_head).num_stones += 1; + + Neighbours(p, [this, largest_chain_head](VirtualPoint n) { + if (IsEmpty(n)) { + chain(largest_chain_head).add_liberty(n); + } + }); +} + +void PhantomGoBoard::RemoveLibertyFromNeighbouringChains(VirtualPoint p) { + Neighbours(p, [this, p](VirtualPoint n) { chain(n).remove_liberty(p); }); +} + +int PhantomGoBoard::CaptureDeadChains(VirtualPoint p, GoColor c) { + int stones_captured = 0; + int capture_index = 0; + Neighbours(p, [this, c, &capture_index, &stones_captured](VirtualPoint n) { + if (PointColor(n) == OppColor(c) && chain(n).num_pseudo_liberties == 0) { + last_captures_[capture_index++] = ChainHead(n); + stones_captured += chain(n).num_stones; + RemoveChain(n); + } + }); + + for (; capture_index < last_captures_.size(); ++capture_index) { + last_captures_[capture_index] = kInvalidPoint; + } + + return stones_captured; +} + +void PhantomGoBoard::RemoveChain(VirtualPoint p) { + VirtualPoint this_chain_head = ChainHead(p); + VirtualPoint cur = p; + do { + VirtualPoint next = board_[cur].chain_next; + + SetStone(cur, GoColor::kEmpty); + InitNewChain(cur); + + Neighbours(cur, [this, this_chain_head, cur](VirtualPoint n) { + if (ChainHead(n) != this_chain_head || IsEmpty(n)) { + chain(n).add_liberty(cur); + } + }); + + cur = next; + } while (cur != p); +} + +void PhantomGoBoard::InitNewChain(VirtualPoint p) { + board_[p].chain_head = p; + board_[p].chain_next = p; + + Chain &c = chain(p); + c.reset(); + c.num_stones += 1; + + Neighbours(p, [this, &c](VirtualPoint n) { + if (IsEmpty(n)) { + c.add_liberty(n); + } + }); +} + +bool PhantomGoBoard::IsInBoardArea(VirtualPoint p) const { + auto rc = VirtualPointTo2DPoint(p); + return rc.first >= 0 && rc.first < board_size() && rc.second >= 0 && + rc.second < board_size(); +} + +bool PhantomGoBoard::IsLegalMoveObserver(VirtualPoint p, GoColor c) const { + if (p == kVirtualPass) return true; + if (!IsInBoardArea(p)) return false; + if (!IsEmpty(p) || p == LastKoPoint()) return false; + if (chain(p).num_pseudo_liberties > 0) return true; + + // For all checks below, the newly placed stone is completely surrounded by + // enemy and friendly stones. + + // Allow to play if the placed stones connects to a group that still has at + // least one other liberty after connecting. + bool has_liberty = false; + Neighbours(p, [this, c, &has_liberty](VirtualPoint n) { + has_liberty |= (PointColor(n) == c && !chain(n).in_atari()); + }); + if (has_liberty) return true; + + // Allow to play if the placed stone will kill at least one group. + bool kills_group = false; + Neighbours(p, [this, c, &kills_group](VirtualPoint n) { + kills_group |= (PointColor(n) == OppColor(c) && chain(n).in_atari()); + }); + if (kills_group) return true; + + return false; +} + +// returns true if is legal according to the vision of the player +bool PhantomGoBoard::IsLegalMove(VirtualPoint p, GoColor c) const { + if (observations_[(uint8_t)c][VirtualPointToBoardPoint(p, board_size_)] == + GoColor::kEmpty) { + return true; + } + return false; +} + +void PhantomGoBoard::Chain::reset_border() { + num_stones = 0; + // Need to have values big enough that they can never go below 0 even if + // all liberties are removed. + num_pseudo_liberties = 4; + liberty_vertex_sum = 32768; + liberty_vertex_sum_squared = 2147483648; +} + +void PhantomGoBoard::Chain::reset() { + num_stones = 0; + num_pseudo_liberties = 0; + liberty_vertex_sum = 0; + liberty_vertex_sum_squared = 0; +} + +void PhantomGoBoard::Chain::merge(const Chain &other) { + num_stones += other.num_stones; + num_pseudo_liberties += other.num_pseudo_liberties; + liberty_vertex_sum += other.liberty_vertex_sum; + liberty_vertex_sum_squared += other.liberty_vertex_sum_squared; +} + +void PhantomGoBoard::Chain::add_liberty(VirtualPoint p) { + num_pseudo_liberties += 1; + liberty_vertex_sum += p; + liberty_vertex_sum_squared += + static_cast(p) * static_cast(p); +} + +void PhantomGoBoard::Chain::remove_liberty(VirtualPoint p) { + num_pseudo_liberties -= 1; + liberty_vertex_sum -= p; + liberty_vertex_sum_squared -= + static_cast(p) * static_cast(p); +} + +VirtualPoint PhantomGoBoard::Chain::single_liberty() const { + SPIEL_CHECK_TRUE(in_atari()); + // A point is in Atari if it has only a single liberty, i.e. all pseudo + // liberties are for the same point. + // This is true exactly when + // liberty_vertex_sum**2 == liberty_vertex_sum_squared * num_pseudo_liberties + // Since all pseudo liberties are for the same point, this is equivalent to + // (taking n = num_pseudo_liberties): + // (n * p)**2 = (n * p**2) * n + // Thus to obtain p, we simple need to divide out the number of pseudo + // liberties. + SPIEL_CHECK_EQ(liberty_vertex_sum % num_pseudo_liberties, 0); + return static_cast(liberty_vertex_sum / num_pseudo_liberties); +} + +std::string PhantomGoBoard::ToString() const { + std::ostringstream stream; + stream << *this; + return stream.str(); +} +std::string PhantomGoBoard::LastMoveInformationToString() const { + std::stringstream stream; + if (last_move_valid) { + stream << "Previous move was valid"; + if (last_move_pass) { + stream << " and was a pass"; + } + stream << "\n"; + } else { + stream << "Previous move was observational\n"; + } + + if (last_move_captured > 0) { + stream << "In previous move " << last_move_captured + << " stones were captured\n"; + } + return stream.str(); +} + +std::ostream &operator<<(std::ostream &os, const PhantomGoBoard &board) { + os << "\n"; + for (int row = board.board_size() - 1; row >= 0; --row) { + os << std::setw(2) << std::setfill(' ') << (row + 1) << " "; + for (int col = 0; col < board.board_size(); ++col) { + os << GoColorToChar( + board.PointColor(VirtualPointFrom2DPoint({row, col}))); + } + os << std::endl; + } + + std::string columns = "ABCDEFGHJKLMNOPQRST"; + os << " " << columns.substr(0, board.board_size()) << std::endl; + + // Encode the stones and print a URL that can be used to view the board. + std::string encoded; + for (VirtualPoint p : BoardPoints(board.board_size())) { + if (!board.IsEmpty(p)) { + encoded += MoveAsAscii(p, board.PointColor(p)); + } + } + + // TODO(author9): Make this a public URL. + // os << "http://jumper/goboard/" << encoded << "&size=" << board.board_size() + // << std::endl; + + return os; +} + +void PhantomGoBoard::GroupIter::step() { + --lib_i_; + while (lib_i_ < 0 && !marked_[chain_cur_]) { + Neighbours(chain_cur_, [this](VirtualPoint n) { + VirtualPoint head = board_->ChainHead(n); + if (board_->PointColor(head) == group_color_ && !marked_[head]) { + cur_libs_[++lib_i_] = head; + marked_[head] = true; + } + }); + marked_[chain_cur_] = true; + chain_cur_ = board_->board_[chain_cur_].chain_next; + } +} + +// Returns the number of points surrounded entirely by one color. +// Aborts early and returns 0 if the area borders both black and white stones. +int NumSurroundedPoints(const PhantomGoBoard &board, const VirtualPoint p, + std::array *marked, + bool *reached_black, bool *reached_white) { + if ((*marked)[p]) return 0; + (*marked)[p] = true; + + int num_points = 1; + Neighbours(p, [&board, &num_points, marked, reached_black, + reached_white](VirtualPoint n) { + switch (board.PointColor(n)) { + case GoColor::kBlack: + *reached_black = true; + break; + case GoColor::kWhite: + *reached_white = true; + break; + case GoColor::kEmpty: + num_points += + NumSurroundedPoints(board, n, marked, reached_black, reached_white); + break; + case GoColor::kGuard: + // Ignore the border. + break; + } + }); + + return num_points; +} + +float TrompTaylorScore(const PhantomGoBoard &board, float komi, int handicap) { + // The delta of how many points on the board black and white have occupied, + // from black's point of view, i.e. Black points - White points. + int occupied_delta = 0; + + // We need to keep track of which empty points we've already counted as part + // of a larger territory. + std::array marked; + marked.fill(false); + + for (VirtualPoint p : BoardPoints(board.board_size())) { + switch (board.PointColor(p)) { + case GoColor::kBlack: + ++occupied_delta; + break; + case GoColor::kWhite: + --occupied_delta; + break; + case GoColor::kEmpty: { + if (marked[p]) continue; + // If some empty points are surrounded entirely by one player, they + // count as that player's territory. + bool reached_black = false, reached_white = false; + int n = NumSurroundedPoints(board, p, &marked, &reached_black, + &reached_white); + if (reached_black && !reached_white) { + occupied_delta += n; + } else if (!reached_black && reached_white) { + occupied_delta -= n; + } + break; + } + case GoColor::kGuard: + SpielFatalError("unexpected color"); + } + } + + float score = occupied_delta - komi; + if (handicap >= 2) { + score -= handicap; + } + return score; +} + +PhantomGoBoard CreateBoard(const std::string &initial_stones) { + PhantomGoBoard board(9); + + int row = 0; + for (const auto &line : absl::StrSplit(initial_stones, '\n')) { + int col = 0; + bool stones_started = false; + for (const auto &c : line) { + if (c == ' ') { + if (stones_started) { + SpielFatalError( + "Whitespace is only allowed at the start of " + "the line. To represent empty intersections, " + "use +"); + } + continue; + } else if (c == 'X') { + stones_started = true; + SPIEL_CHECK_TRUE(board.PlayMove(VirtualPointFrom2DPoint({row, col}), + GoColor::kBlack)); + } else if (c == 'O') { + stones_started = true; + SPIEL_CHECK_TRUE(board.PlayMove(VirtualPointFrom2DPoint({row, col}), + GoColor::kWhite)); + } + col++; + } + row++; + } + + return board; +} + +} // namespace phantom_go +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/phantom_go/phantom_go_board.h b/scenarios/bargaining/open_spiel/open_spiel/games/phantom_go/phantom_go_board.h new file mode 100644 index 0000000..6496875 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/phantom_go/phantom_go_board.h @@ -0,0 +1,313 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_GO_PHANTOM_GO_BOARD_H_ +#define OPEN_SPIEL_GAMES_GO_PHANTOM_GO_BOARD_H_ + +#include +#include +#include +#include + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace phantom_go { + +enum class GoColor : uint8_t { kBlack = 0, kWhite = 1, kEmpty = 2, kGuard = 3 }; + +std::string GoColorToString(GoColor c); + +std::ostream &operator<<(std::ostream &os, GoColor c); + +GoColor OppColor(GoColor c); + +// For simplicity and speed, we store the board in terms of a "virtual board", +// with a border of guard stones around all sides of the board. +// This allows us to skip bounds checking. +// In Virtual mode, an action (row, col) is row * 21 + col, and pass is 21*21+1. +// All functions in this file (except stated otherwise) use these virtual +// coordinates. +// +// However, in the OpenSpiel API (in go.{h, cc}), the actions are still exposed +// as actions within 0, board_size*boardsize) (with pass = board_size **2. +// +// Normal go is standardly played on board of size 19, for Phantom Go, standard +// is size 9 +inline constexpr int kMaxBoardSize = 19; +inline constexpr int kVirtualBoardSize = kMaxBoardSize + 2; +inline constexpr int kVirtualBoardPoints = + kVirtualBoardSize * kVirtualBoardSize; + +using VirtualPoint = uint16_t; + +VirtualPoint VirtualPointFromBoardPoint(int boardPoint, int boardSize); +int VirtualPointToBoardPoint(VirtualPoint p, int boardSize); + +inline constexpr VirtualPoint kInvalidPoint = 0; +inline constexpr VirtualPoint kVirtualPass = kVirtualBoardPoints + 1; + +// Returns the VirtualPoint corresponding to the provided coordinates, e.g. "d4" +// or "f10". +VirtualPoint MakePoint(std::string s); + +// Converts a VirtualPoint to a string representation. +std::string VirtualPointToString(VirtualPoint p); + +std::ostream &operator<<(std::ostream &os, VirtualPoint p); + +// Conversion functions between VirtualPoint and row/column representation. +std::pair VirtualPointTo2DPoint(VirtualPoint p); +// Returns the point identifier in the Virtual 21*21 board from the (row, col) +// 0-index coordinate in the concrete board. +VirtualPoint VirtualPointFrom2DPoint(std::pair row_col); + +// Converts an OpenSpiel action in range [0, board_size **2] to the +// Virtual board range [0, kVirtualPass], and vice-versa. +Action VirtualActionToAction(int virtual_action, int board_size); +int ActionToVirtualAction(Action action, int board_size); + +// Returns a reference to a vector that contains all points that are on a board +// of the specified size. +const std::vector &BoardPoints(int board_size); + +// To iterate over 4 neighbouring points, do +// +// VirtualPoint point; +// for (auto p = Neighbours4(point); p; ++p) { +// // Do something on p.. +// } +// +class Neighbours4 { + public: + explicit Neighbours4(const VirtualPoint p); + + Neighbours4 &operator++(); + const VirtualPoint operator*() const; + explicit operator bool() const; + + private: + VirtualPoint dir_; + const VirtualPoint p_; +}; + +class PhantomGoBoard { + public: + explicit PhantomGoBoard(int board_size); + + void Clear(); + + std::array GetStoneCount() const { return stone_count_; } + std::string ObservationsToString() const; + std::string ObservationToString(int player) const; + std::string LastMoveInformationToString() const; + bool LastMoveObservational() const { return !last_move_valid; } + bool LastMoveCapture() const { return last_move_captured > 0; } + std::array GetObservationByID( + int player_id) const; + + inline int board_size() const { return board_size_; } + + // Returns the concrete pass action. + inline int pass_action() const { return pass_action_; } + inline Action VirtualActionToAction(int virtual_action) const { + return phantom_go::VirtualActionToAction(virtual_action, board_size_); + } + inline int ActionToVirtualAction(Action action) const { + return phantom_go::ActionToVirtualAction(action, board_size_); + } + + inline GoColor PointColor(VirtualPoint p) const { return board_[p].color; } + + inline bool IsEmpty(VirtualPoint p) const { + return PointColor(p) == GoColor::kEmpty; + } + + bool IsInBoardArea(VirtualPoint p) const; + + bool IsLegalMove(VirtualPoint p, GoColor c) const; + + bool IsLegalMoveObserver(VirtualPoint p, GoColor c) const; + + bool PlayMove(VirtualPoint p, GoColor c); + + // kInvalidPoint if there is no ko, otherwise the point of the ko. + inline VirtualPoint LastKoPoint() const { return last_ko_point_; } + + // Count of pseudo-liberties, i.e. each liberty is counted between 1 and 4 + // times, once for each stone of the group that borders it. + // This is much faster than realLiberty(), so prefer it if possible. + inline int PseudoLiberty(VirtualPoint p) const { + return chain(p).num_pseudo_liberties == 0 + ? 0 + : (chain(p).in_atari() ? 1 : chain(p).num_pseudo_liberties); + } + + inline bool InAtari(VirtualPoint p) const { return chain(p).in_atari(); } + + // If a chain has a single liberty (it is in Atari), return that liberty. + VirtualPoint SingleLiberty(VirtualPoint p) const; + + // Actual liberty count, i.e. each liberty is counted exactly once. + // This is computed on the fly by actually walking the group and checking the + // neighbouring stones. + inline int RealLiberty(VirtualPoint p) const { + int num_lib = 0; + for (auto it = LibIter(p); it; ++it) { + ++num_lib; + } + return num_lib; + } + + inline uint64_t HashValue() const { return zobrist_hash_; } + + // Head of a chain; each chain has exactly one head that can be used to + // uniquely identify it. Chain heads may change over successive PlayMove()s. + inline VirtualPoint ChainHead(VirtualPoint p) const { + return board_[p].chain_head; + } + + // Number of stones in a chain. + inline int ChainSize(VirtualPoint p) const { return chain(p).num_stones; } + + std::string ToString() const; + + class GroupIter { + public: + GroupIter(const PhantomGoBoard *board, VirtualPoint p, GoColor group_color) + : board_(board), lib_i_(0), group_color_(group_color) { + marked_.fill(false); + chain_head_ = board->ChainHead(p); + chain_cur_ = chain_head_; + step(); + } + + inline explicit operator bool() const { return lib_i_ >= 0; } + + inline VirtualPoint operator*() const { return cur_libs_[lib_i_]; } + + GroupIter &operator++() { + step(); + return *this; + } + + private: + void step(); + + const PhantomGoBoard *board_; + + std::array marked_; + std::array cur_libs_; + int lib_i_; + VirtualPoint chain_head_; + VirtualPoint chain_cur_; + GoColor group_color_; + }; + + GroupIter LibIter(VirtualPoint p) const { + return GroupIter(this, p, GoColor::kEmpty); + } + GroupIter OppIter(VirtualPoint p) const { + return GroupIter(this, p, OppColor(PointColor(p))); + } + + private: + void JoinChainsAround(VirtualPoint p, GoColor c); + void SetStone(VirtualPoint p, GoColor c); + void RemoveLibertyFromNeighbouringChains(VirtualPoint p); + int CaptureDeadChains(VirtualPoint p, GoColor c); + void RemoveChain(VirtualPoint p); + void InitNewChain(VirtualPoint p); + + // In this context, GoColor::kEmpty suggests, that a player does not know, + // what piece is on that exact spot + std::array, 2> + observations_; + + // On index 0 is stored count of black stones, on index 1 is stored count of + // white stones so it equals the enum of GoColor, where kBlack is 0 + std::array stone_count_; + + bool last_move_valid; + bool last_move_pass; + int last_move_captured; + + struct Vertex { + VirtualPoint chain_head; + VirtualPoint chain_next; + GoColor color; + }; + + struct Chain { + uint32_t liberty_vertex_sum_squared; + uint16_t liberty_vertex_sum; + uint16_t num_stones; + uint16_t num_pseudo_liberties; + + void reset(); + void reset_border(); + void merge(const Chain &other); + + inline bool in_atari() const { + return static_cast(num_pseudo_liberties) * + liberty_vertex_sum_squared == + static_cast(liberty_vertex_sum) * + static_cast(liberty_vertex_sum); + } + void add_liberty(VirtualPoint p); + void remove_liberty(VirtualPoint p); + VirtualPoint single_liberty() const; + }; + + Chain &chain(VirtualPoint p) { return chains_[ChainHead(p)]; } + const Chain &chain(VirtualPoint p) const { return chains_[ChainHead(p)]; } + + std::array board_; + std::array chains_; + + uint64_t zobrist_hash_; + + // Chains captured in the last move, kInvalidPoint otherwise. + std::array last_captures_; + + int board_size_; + int pass_action_; + + VirtualPoint last_ko_point_; +}; + +std::ostream &operator<<(std::ostream &os, const PhantomGoBoard &board); + +// Score according to https://senseis.xmp.net/?TrompTaylorRules. +float TrompTaylorScore(const PhantomGoBoard &board, float komi, + int handicap = 0); + +// Generates a go board from the given string, setting X to black stones and O +// to white stones. The first character of the first line is mapped to A1, the +// second character to B1, etc, as below: +// ABCDEFGH +// 1 ++++XO++ +// 2 XXXXXO++ +// 3 OOOOOO++ +// 4 ++++++++ +// The board will always be 19x19. +// This exists mostly for test purposes. +// WARNING: This coordinate system is different from the representation in +// GoBoard in which A1 is at the bottom left. +PhantomGoBoard CreateBoard(const std::string &initial_stones); + +} // namespace phantom_go +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_GO_PHANTOM_GO_BOARD_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/phantom_go/phantom_go_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/phantom_go/phantom_go_test.cc new file mode 100644 index 0000000..a9fc7ce --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/phantom_go/phantom_go_test.cc @@ -0,0 +1,120 @@ +// Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/phantom_go/phantom_go.h" + +#include "open_spiel/games/phantom_go/phantom_go_board.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace phantom_go { +namespace { + +namespace testing = open_spiel::testing; + +constexpr int kBoardSize = 9; +constexpr float kKomi = 7.5; + +void BasicGoTests() { + GameParameters params; + params["board_size"] = GameParameter(9); + + testing::LoadGameTest("phantom_go"); + testing::NoChanceOutcomesTest(*LoadGame("phantom_go")); + testing::RandomSimTest(*LoadGame("phantom_go", params), 1); + testing::RandomSimTestWithUndo(*LoadGame("phantom_go", params), 1); +} + +void CloneTest() { + GameParameters params; + params["board_size"] = GameParameter(kBoardSize); + std::shared_ptr game = LoadGame("phantom_go", params); + PhantomGoState state(game, kBoardSize, kKomi, 0); + state.ApplyAction(5); + + std::unique_ptr stateClone = state.Clone(); + + SPIEL_CHECK_EQ(state.ToString(), stateClone->ToString()); + SPIEL_CHECK_EQ(state.History(), stateClone->History()); + + state.ApplyAction(8); + + SPIEL_CHECK_FALSE(state.ToString() == stateClone->ToString()); + SPIEL_CHECK_FALSE(state.History() == stateClone->History()); +} + +void HandicapTest() { + std::shared_ptr game = LoadGame( + "phantom_go", {{"board_size", open_spiel::GameParameter(kBoardSize)}, + {"komi", open_spiel::GameParameter(kKomi)}, + {"handicap", open_spiel::GameParameter(1)}}); + PhantomGoState state(game, kBoardSize, kKomi, 2); + SPIEL_CHECK_EQ(state.CurrentPlayer(), ColorToPlayer(GoColor::kWhite)); + SPIEL_CHECK_EQ(state.board().PointColor(MakePoint("d4")), GoColor::kBlack); +} + +void IllegalMoveTest() { + GameParameters params; + params["board_size"] = GameParameter(kBoardSize); + std::shared_ptr game = LoadGame("phantom_go", params); + PhantomGoState state(game, kBoardSize, kKomi, 0); + SPIEL_CHECK_EQ(state.CurrentPlayer(), ColorToPlayer(GoColor::kBlack)); + state.ApplyAction(5); + SPIEL_CHECK_EQ(state.CurrentPlayer(), ColorToPlayer(GoColor::kWhite)); + state.ApplyAction(5); + SPIEL_CHECK_EQ(state.CurrentPlayer(), ColorToPlayer(GoColor::kWhite)); +} + +void StoneCountTest() { + GameParameters params; + params["board_size"] = GameParameter(kBoardSize); + std::shared_ptr game = LoadGame("phantom_go", params); + PhantomGoState state(game, kBoardSize, kKomi, 0); + SPIEL_CHECK_EQ(state.board().GetStoneCount()[(uint8_t)GoColor::kBlack], 0); + SPIEL_CHECK_EQ(state.board().GetStoneCount()[(uint8_t)GoColor::kWhite], 0); + state.ApplyAction(5); + SPIEL_CHECK_EQ(state.board().GetStoneCount()[(uint8_t)GoColor::kBlack], 1); + SPIEL_CHECK_EQ(state.board().GetStoneCount()[(uint8_t)GoColor::kWhite], 0); + state.ApplyAction(6); + SPIEL_CHECK_EQ(state.board().GetStoneCount()[(uint8_t)GoColor::kBlack], 1); + SPIEL_CHECK_EQ(state.board().GetStoneCount()[(uint8_t)GoColor::kWhite], 1); +} + +void ConcreteActionsAreUsedInTheAPI() { + std::shared_ptr game = LoadGame( + "phantom_go", {{"board_size", open_spiel::GameParameter(kBoardSize)}}); + std::unique_ptr state = game->NewInitialState(); + + SPIEL_CHECK_EQ(state->NumDistinctActions(), kBoardSize * kBoardSize + 1); + SPIEL_CHECK_EQ(state->LegalActions().size(), state->NumDistinctActions()); + for (Action action : state->LegalActions()) { + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LE(action, kBoardSize * kBoardSize); + } +} + +} // namespace +} // namespace phantom_go +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::phantom_go::CloneTest(); + open_spiel::phantom_go::BasicGoTests(); + open_spiel::phantom_go::HandicapTest(); + open_spiel::phantom_go::ConcreteActionsAreUsedInTheAPI(); + open_spiel::phantom_go::IllegalMoveTest(); + open_spiel::phantom_go::StoneCountTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/phantom_ttt/phantom_ttt.cc b/scenarios/bargaining/open_spiel/open_spiel/games/phantom_ttt/phantom_ttt.cc new file mode 100644 index 0000000..d4f4112 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/phantom_ttt/phantom_ttt.cc @@ -0,0 +1,363 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/phantom_ttt/phantom_ttt.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/games/tic_tac_toe/tic_tac_toe.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace phantom_ttt { +namespace { + +using tic_tac_toe::kCellStates; +using tic_tac_toe::kNumCells; +using tic_tac_toe::kNumCols; +using tic_tac_toe::kNumRows; + +using tic_tac_toe::CellState; + +using tic_tac_toe::PlayerToState; +using tic_tac_toe::StateToString; + +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"phantom_ttt", + /*long_name=*/"Phantom Tic Tac Toe", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"obstype", GameParameter(std::string(kDefaultObsType))}, + {"gameversion", GameParameter(std::string(kDefaultGameVersion))}}}; + +const GameType kImperfectRecallGameType{ + /*short_name=*/"phantom_ttt_ir", + /*long_name=*/"Phantom Tic Tac Toe with Imperfect Recall", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/false, + /*provides_observation_tensor=*/false, + /*parameter_specification=*/ + {{"obstype", GameParameter(std::string(kDefaultObsType))}, + {"gameversion", GameParameter(std::string(kDefaultGameVersion))}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new PhantomTTTGame(params, kGameType)); +} + +std::shared_ptr ImperfectRecallFactory( + const GameParameters& params) { + return std::shared_ptr(new ImperfectRecallPTTTGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +REGISTER_SPIEL_GAME(kImperfectRecallGameType, ImperfectRecallFactory); +RegisterSingleTensorObserver single_tensor_imperfect_recall( + kImperfectRecallGameType.short_name); + +} // namespace + +ImperfectRecallPTTTGame::ImperfectRecallPTTTGame(const GameParameters& params) + : PhantomTTTGame(params, kImperfectRecallGameType) {} + +PhantomTTTState::PhantomTTTState(std::shared_ptr game, + GameVersion game_version, + ObservationType obs_type) + : State(game), + state_(game), + obs_type_(obs_type), + game_version_(game_version) { + std::fill(begin(x_view_), end(x_view_), CellState::kEmpty); + std::fill(begin(o_view_), end(o_view_), CellState::kEmpty); + if (obs_type_ == ObservationType::kRevealNumTurns) { + // Reserve 0 for the player and 10 as "I don't know." + bits_per_action_ = kNumCells + 2; + // Longest sequence is 17 moves, e.g. 0011223344556677889 + longest_sequence_ = 2 * kNumCells - 1; + } else { + SPIEL_CHECK_EQ(obs_type_, ObservationType::kRevealNothing); + bits_per_action_ = kNumCells; + longest_sequence_ = kNumCells; + } +} + +void PhantomTTTState::DoApplyAction(Action move) { + // Current player's view. + Player cur_player = CurrentPlayer(); + auto& cur_view = cur_player == 0 ? x_view_ : o_view_; + + // Either occupied or not + if (game_version_ == GameVersion::kClassicalPhantomTicTacToe) { + if (state_.BoardAt(move) == CellState::kEmpty) { + state_.ApplyAction(move); + } + } else if (game_version_ == GameVersion::kAbruptPhantomTicTacToe) { + if (state_.BoardAt(move) == CellState::kEmpty) { + state_.ApplyAction(move); + } else { + // switch the current player + state_.ChangePlayer(); + } + } else { + SpielFatalError("Unknown game version"); + } + + // Update current player's view, and action sequence. + SPIEL_CHECK_EQ(cur_view[move], CellState::kEmpty); + cur_view[move] = state_.BoardAt(move); + action_sequence_.push_back(std::pair(cur_player, move)); + + // Note: do not modify player's turn here, it will have been done above + // if necessary. +} + +std::vector PhantomTTTState::LegalActions() const { + if (IsTerminal()) return {}; + std::vector moves; + const Player player = CurrentPlayer(); + const auto& cur_view = player == 0 ? x_view_ : o_view_; + + for (Action move = 0; move < kNumCells; ++move) { + if (cur_view[move] == CellState::kEmpty) { + moves.push_back(move); + } + } + + return moves; +} + +std::string PhantomTTTState::ViewToString(Player player) const { + const auto& cur_view = player == 0 ? x_view_ : o_view_; + std::string str; + for (int r = 0; r < kNumRows; ++r) { + for (int c = 0; c < kNumCols; ++c) { + absl::StrAppend(&str, StateToString(cur_view[r * kNumCols + c])); + } + if (r < (kNumRows - 1)) { + absl::StrAppend(&str, "\n"); + } + } + return str; +} + +std::string PhantomTTTState::ActionSequenceToString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::string str; + for (const auto& player_with_action : action_sequence_) { + if (player_with_action.first == player) { + // Always include the observing player's actions. + absl::StrAppend(&str, player_with_action.first, ","); + absl::StrAppend(&str, player_with_action.second, " "); + } else if (obs_type_ == ObservationType::kRevealNumTurns) { + // If the number of turns are revealed, then each of the other player's + // actions will show up as unknowns. + absl::StrAppend(&str, player_with_action.first, ",? "); + } else { + // Do not reveal anything about the number of actions taken by opponent. + SPIEL_CHECK_EQ(obs_type_, ObservationType::kRevealNothing); + } + } + return str; +} + +std::string PhantomTTTState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + std::string str; + absl::StrAppend(&str, ViewToString(player), "\n"); + if (obs_type_ != ObservationType::kRevealNothing) { + absl::StrAppend(&str, history_.size(), "\n"); + } + absl::StrAppend(&str, ActionSequenceToString(player)); + return str; +} + +void PhantomTTTState::InformationStateTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + // First 27 bits encodes the player's view in the same way as TicTacToe. + // Then the action sequence follows (one-hot encoded, per action). + // Encoded in the same way as InformationStateAsString, so full sequences + // which may contain action value 10 to represent "I don't know." + const auto& player_view = player == 0 ? x_view_ : o_view_; + SPIEL_CHECK_EQ(values.size(), kNumCells * kCellStates + + longest_sequence_ * bits_per_action_); + std::fill(values.begin(), values.end(), 0.); + for (int cell = 0; cell < kNumCells; ++cell) { + values[kNumCells * static_cast(player_view[cell]) + cell] = 1.0; + } + + // Now encode the sequence. Each (player, action) pair uses 11 bits: + // - first bit is the player taking the action (0 or 1) + // - next 10 bits is the one-hot encoded action (10 = "I don't know") + int offset = kNumCells * kCellStates; + for (const auto& player_with_action : action_sequence_) { + if (player_with_action.first == player) { + // Always include the observing player's actions. + if (obs_type_ == ObservationType::kRevealNumTurns) { + values[offset] = player_with_action.first; // Player 0 or 1 + values[offset + 1 + player_with_action.second] = 1.0; + } else { + // Here we don't need to encode the player since we won't see opponent + // moves. + SPIEL_CHECK_EQ(obs_type_, ObservationType::kRevealNothing); + values[offset + player_with_action.second] = 1.0; + } + offset += bits_per_action_; + } else if (obs_type_ == ObservationType::kRevealNumTurns) { + // If the number of turns are revealed, then each of the other player's + // actions will show up as unknowns. + values[offset] = player_with_action.first; + values[offset + 1 + kNumCells] = 1.0; // I don't know. + offset += bits_per_action_; + } else { + // Do not reveal anything about the number of actions taken by opponent. + SPIEL_CHECK_EQ(obs_type_, ObservationType::kRevealNothing); + } + } +} + +std::string PhantomTTTState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + std::string observation = ViewToString(player); + if (obs_type_ == ObservationType::kRevealNumTurns) { + absl::StrAppend(&observation, "\nTotal turns: ", action_sequence_.size()); + } + return observation; +} + +void PhantomTTTState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + SPIEL_CHECK_EQ(values.size(), game_->ObservationTensorSize()); + std::fill(values.begin(), values.end(), 0.); + + // First 27 bits encodes the player's view in the same way as TicTacToe. + const auto& player_view = player == 0 ? x_view_ : o_view_; + for (int cell = 0; cell < kNumCells; ++cell) { + values[kNumCells * static_cast(player_view[cell]) + cell] = 1.0; + } + + // Then a one-hot to represent total number of turns. + if (obs_type_ == ObservationType::kRevealNumTurns) { + values[kNumCells * kCellStates + action_sequence_.size()] = 1.0; + } +} + +std::unique_ptr PhantomTTTState::Clone() const { + return std::unique_ptr(new PhantomTTTState(*this)); +} + +void PhantomTTTState::UndoAction(Player player, Action move) { + Action last_move = action_sequence_.back().second; + SPIEL_CHECK_EQ(last_move, move); + + if (state_.BoardAt(move) == PlayerToState(player)) { + // If the board has a mark that is the undoing player, then this was + // a successful move. Undo as normal. + state_.UndoAction(player, move); + } + + // Undo the action from that player's view, and pop from the action seq + auto& player_view = player == 0 ? x_view_ : o_view_; + player_view[move] = CellState::kEmpty; + action_sequence_.pop_back(); + + history_.pop_back(); + --move_number_; + // Note, do not change the player.. this will already have been done above + // if necessary. +} + +PhantomTTTGame::PhantomTTTGame(const GameParameters& params, GameType game_type) + : Game(game_type, params), + game_(std::static_pointer_cast( + LoadGame("tic_tac_toe"))) { + std::string obs_type = ParameterValue("obstype"); + if (obs_type == "reveal-nothing") { + obs_type_ = ObservationType::kRevealNothing; + bits_per_action_ = kNumCells; + longest_sequence_ = kNumCells; + } else if (obs_type == "reveal-numturns") { + obs_type_ = ObservationType::kRevealNumTurns; + // Reserve 0 for the player and 10 as "I don't know." + bits_per_action_ = kNumCells + 2; + // Longest sequence is 17 moves, e.g. 0011223344556677889 + longest_sequence_ = 2 * kNumCells - 1; + } else { + SpielFatalError(absl::StrCat("Unrecognized observation type: ", obs_type)); + } + + std::string game_version = ParameterValue("gameversion"); + if (game_version == "classical") { + game_version_ = GameVersion::kClassicalPhantomTicTacToe; + } else if (game_version == "abrupt") { + game_version_ = GameVersion::kAbruptPhantomTicTacToe; + } else { + SpielFatalError(absl::StrCat("Unrecognized game version: ", game_version)); + } +} + +std::vector PhantomTTTGame::InformationStateTensorShape() const { + // Enc + return {1, kNumCells * kCellStates + longest_sequence_ * bits_per_action_}; +} + +std::vector PhantomTTTGame::ObservationTensorShape() const { + if (obs_type_ == ObservationType::kRevealNothing) { + return {kNumCells * kCellStates}; + } else if (obs_type_ == ObservationType::kRevealNumTurns) { + return {kNumCells * kCellStates + longest_sequence_}; + } else { + SpielFatalError("Unknown observation type"); + } +} + +} // namespace phantom_ttt +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/phantom_ttt/phantom_ttt.h b/scenarios/bargaining/open_spiel/open_spiel/games/phantom_ttt/phantom_ttt.h new file mode 100644 index 0000000..8bf7f8a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/phantom_ttt/phantom_ttt.h @@ -0,0 +1,204 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_PHANTOM_TTT_H_ +#define OPEN_SPIEL_GAMES_PHANTOM_TTT_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/games/tic_tac_toe/tic_tac_toe.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// Phantom Tic-Tac-Toe is a phantom version of the classic game of Tic-Tac-Toe +// (Noughts and Crosses). For some perfect information game X", the game +// "phantom X" is a version of the game X where the players do not observe the +// other players' pieces. Only a referee knows the full state of the board. +// So, on a player's turn, a chosen moves may fail because it is illegal given +// the true state of the board; in this case, a player can continue to try moves +// until one succeeds. +// +// Common phantom games include Kriegspiel (Phantom chess), e.g. see +// https://en.wikipedia.org/wiki/Kriegspiel_(chess), and Phantom Go. +// See also http://mlanctot.info/files/papers/PhD_Thesis_MarcLanctot.pdf, Ch 3. +// +// In the classical version, if a move fails, the player is allowed to take +// another turn. In the abrupt version, the player loses their turn. This +// version was recently used in Rudolph et al. '25, Reevaluating Policy Gradient +// Methods for Imperfect Information Games, https://arxiv.org/abs/2502.08938. +// +// Parameters: +/// "obstype", string, "reveal-nothing" (default) or "reveal-numturns" +/// "gameversion", string, "classical" (default) or "abrupt" + +namespace open_spiel { +namespace phantom_ttt { + +inline constexpr const char* kDefaultObsType = "reveal-nothing"; +inline constexpr const char* kDefaultGameVersion = "classical"; + +enum class ObservationType { + kRevealNothing, + kRevealNumTurns, +}; + +enum class GameVersion { + kAbruptPhantomTicTacToe, + kClassicalPhantomTicTacToe, +}; + +// State of an in-play game. +class PhantomTTTState : public State { + public: + PhantomTTTState(std::shared_ptr game, GameVersion game_version, + ObservationType obs_type); + + // Forward to underlying game state + Player CurrentPlayer() const override { return state_.CurrentPlayer(); } + std::string ActionToString(Player player, Action action_id) const override { + return state_.ActionToString(player, action_id); + } + std::string ToString() const override { return state_.ToString(); } + bool IsTerminal() const override { return state_.IsTerminal(); } + std::vector Returns() const override { return state_.Returns(); } + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + + // These are implemented for phantom games + std::string InformationStateString(Player player) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action move) override; + std::vector LegalActions() const override; + + protected: + void DoApplyAction(Action move) override; + std::string ViewToString(Player player) const; + + private: + std::string ActionSequenceToString(Player player) const; + + tic_tac_toe::TicTacToeState state_; + ObservationType obs_type_; + GameVersion game_version_; + int bits_per_action_; + int longest_sequence_; + + // TODO(author2): Use the base class history_ instead. + std::vector> action_sequence_; + std::array x_view_; + std::array o_view_; +}; + +// Game object. +class PhantomTTTGame : public Game { + public: + PhantomTTTGame(const GameParameters& params, GameType game_type); + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new PhantomTTTState(shared_from_this(), game_version_, obs_type_)); + } + int NumDistinctActions() const override { + return game_->NumDistinctActions(); + } + int NumPlayers() const override { return game_->NumPlayers(); } + double MinUtility() const override { return game_->MinUtility(); } + absl::optional UtilitySum() const override { + return game_->UtilitySum(); + } + double MaxUtility() const override { return game_->MaxUtility(); } + std::string ActionToString(Player player, Action action_id) const override { + return game_->ActionToString(player, action_id); + } + + + // These will depend on the obs_type parameter. + std::vector InformationStateTensorShape() const override; + std::vector ObservationTensorShape() const override; + int MaxGameLength() const override { return game_->MaxGameLength() * 2 - 1; } + + ObservationType obs_type() const { return obs_type_; } + GameVersion game_version() const { return game_version_; } + + private: + std::shared_ptr game_; + ObservationType obs_type_; + GameVersion game_version_; + int bits_per_action_; + int longest_sequence_; +}; + +// Implements the FOE abstraction from Lanctot et al. '12 +// http://mlanctot.info/files/papers/12icml-ir.pdf +class ImperfectRecallPTTTState : public PhantomTTTState { + public: + ImperfectRecallPTTTState(std::shared_ptr game, + GameVersion game_version, ObservationType obs_type) + : PhantomTTTState(game, game_version, obs_type) {} + std::string InformationStateString(Player player) const override { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return absl::StrCat("P", player, " ", ViewToString(player)); + } + std::unique_ptr Clone() const override { + return std::unique_ptr(new ImperfectRecallPTTTState(*this)); + } +}; + +class ImperfectRecallPTTTGame : public PhantomTTTGame { + public: + explicit ImperfectRecallPTTTGame(const GameParameters& params); + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new ImperfectRecallPTTTState( + shared_from_this(), game_version(), obs_type())); + } +}; + +inline std::ostream& operator<<(std::ostream& stream, + const ObservationType& obs_type) { + switch (obs_type) { + case ObservationType::kRevealNothing: + return stream << "Reveal Nothing"; + case ObservationType::kRevealNumTurns: + return stream << "Reveal Num Turns"; + default: + SpielFatalError("Unknown observation type"); + } +} + +inline std::ostream& operator<<(std::ostream& stream, + const GameVersion& game_version) { + switch (game_version) { + case GameVersion::kClassicalPhantomTicTacToe: + return stream << "Classical Phantom Tic Tac Toe"; + case GameVersion::kAbruptPhantomTicTacToe: + return stream << "Abrupt Phantom Tic Tac Toe"; + default: + SpielFatalError("Unknown game version"); + } +} + +} // namespace phantom_ttt +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_PHANTOM_TTT_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/phantom_ttt/phantom_ttt_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/phantom_ttt/phantom_ttt_test.cc new file mode 100644 index 0000000..6d0ea63 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/phantom_ttt/phantom_ttt_test.cc @@ -0,0 +1,56 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace phantom_ttt { +namespace { + +namespace testing = open_spiel::testing; + +void ClassicalVsAbrubptTest() { + std::shared_ptr classical_game = + LoadGame("phantom_ttt(gameversion=classical)"); + std::shared_ptr abrupt_game = + LoadGame("phantom_ttt(gameversion=abrupt)"); + std::unique_ptr classical_state = classical_game->NewInitialState(); + classical_state->ApplyAction(4); + classical_state->ApplyAction(4); + SPIEL_CHECK_EQ(classical_state->CurrentPlayer(), 1); + std::unique_ptr abrupt_state = abrupt_game->NewInitialState(); + abrupt_state->ApplyAction(4); + abrupt_state->ApplyAction(4); + SPIEL_CHECK_EQ(abrupt_state->CurrentPlayer(), 0); +} + +void BasicPhantomTTTTests() { + testing::LoadGameTest("phantom_ttt"); + testing::NoChanceOutcomesTest(*LoadGame("phantom_ttt")); + testing::RandomSimTest(*LoadGame("phantom_ttt"), 100); + testing::RandomSimTestWithUndo(*LoadGame("phantom_ttt"), 1); +} + +} // namespace +} // namespace phantom_ttt +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::phantom_ttt::BasicPhantomTTTTests(); + open_spiel::phantom_ttt::ClassicalVsAbrubptTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/pig/pig.cc b/scenarios/bargaining/open_spiel/open_spiel/games/pig/pig.cc new file mode 100644 index 0000000..80da426 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/pig/pig.cc @@ -0,0 +1,264 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/pig/pig.h" + +#include + +#include + +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace pig { + +namespace { +// Moves. +enum ActionType { kRoll = 0, kStop = 1 }; + +// Bin size for the information state vectors: how many score values to put +// into one bin. Higher means more coarser. +constexpr int kBinSize = 1; + +// Default parameters. +constexpr int kDefaultDiceOutcomes = 6; +constexpr int kDefaultHorizon = 1000; +constexpr int kDefaultPlayers = 2; +constexpr int kDefaultWinScore = 100; +constexpr bool kDefaultPiglet = false; + +// Facts about the game +const GameType kGameType{ + /*short_name=*/"pig", + /*long_name=*/"Pig", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/10, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + {"players", GameParameter(kDefaultPlayers)}, + {"horizon", GameParameter(kDefaultHorizon)}, + {"winscore", GameParameter(kDefaultWinScore)}, + {"diceoutcomes", GameParameter(kDefaultDiceOutcomes)}, + {"piglet", GameParameter(kDefaultPiglet)}, + }}; + +static std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new PigGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace + +std::string PigState::ActionToString(Player player, Action move_id) const { + if (player == kChancePlayerId) { + return absl::StrCat("Roll ", piglet_ ? move_id : 1 + move_id); + } else if (move_id == kRoll) { + return "roll"; + } else { + return "stop"; + } +} + +bool PigState::IsTerminal() const { + if (total_moves_ >= horizon_) { + return true; + } + + for (auto p = Player{0}; p < num_players_; p++) { + if (scores_[p] >= win_score_) { + return true; + } + } + return false; +} + +std::vector PigState::Returns() const { + if (!IsTerminal()) { + return std::vector(num_players_, 0.0); + } + + // For (n>2)-player games, must keep it zero-sum. + std::vector returns(num_players_, -1.0 / (num_players_ - 1)); + + for (auto player = Player{0}; player < num_players_; ++player) { + if (scores_[player] >= win_score_) { + returns[player] = 1.0; + return returns; + } + } + + // Nobody has won? (e.g. over horizon length.) Then everyone gets 0. + return std::vector(num_players_, 0.0); +} + +std::string PigState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +std::vector PigGame::ObservationTensorShape() const { + int num_bins = (win_score_ / kBinSize) + 1; + return {1 + num_players_, num_bins}; +} + +void PigState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + // One extra bin for when value is >= max. + // So for win_score_ 100, binSize = 1 -> 0, 1, ..., 99, >= 100. + int num_bins = (win_score_ / kBinSize) + 1; + + // One-hot encoding: turn total (#bin) followed by p1, p2, ... + // Treat `values` as a 2-d tensor. + TensorView<2> view(values, {1 + num_players_, num_bins}, true); + + // One-hot encoding: + // - turn total (#bins) + // - player 0 (#bins) + // - player 1 (#bins) + // . + // . + // . + + // turn total + view[{0, std::min(turn_total_ / kBinSize, num_bins - 1)}] = 1; + + for (auto p = Player{0}; p < num_players_; p++) { + // score of each player + view[{1 + p, std::min(scores_[p] / kBinSize, num_bins - 1)}] = 1; + } +} + +PigState::PigState(std::shared_ptr game, int dice_outcomes, + int horizon, int win_score, bool piglet) + : State(game), + dice_outcomes_(dice_outcomes), + horizon_(horizon), + win_score_(win_score), + piglet_(piglet) { + total_moves_ = 0; + cur_player_ = 0; + turn_player_ = 0; + scores_.resize(game->NumPlayers(), 0); + turn_total_ = 0; +} + +int PigState::CurrentPlayer() const { + return IsTerminal() ? kTerminalPlayerId : cur_player_; +} + +void PigState::DoApplyAction(Action move) { + // For decision node: 0 means roll, 1 means stop. + // For chance node: outcome of the dice (x-1, piglet: [x != 1]). + if (cur_player_ >= 0 && move == kRoll) { + // Player roll -> chance node. + cur_player_ = kChancePlayerId; + total_moves_++; + } else if (cur_player_ >= 0 && move == kStop) { + // Player stops. Take turn total and pass to next player. + scores_[turn_player_] += turn_total_; + turn_total_ = 0; + turn_player_ = NextPlayerRoundRobin(turn_player_, num_players_); + cur_player_ = turn_player_; + total_moves_++; + } else if (IsChanceNode()) { + // Resolve chance node outcome. If 1, reset turn total and change players; + // else, add to total and keep going. + if (move == 0) { + // Reset turn total and loses turn! + turn_total_ = 0; + turn_player_ = NextPlayerRoundRobin(turn_player_, num_players_); + cur_player_ = turn_player_; + } else { + // Add to the turn total. + turn_total_ += (piglet_ ? 1 : move + 1); + cur_player_ = turn_player_; + } + } else { + SpielFatalError(absl::StrCat("Move ", move, " is invalid.")); + } +} + +std::vector PigState::LegalActions() const { + if (IsChanceNode()) { + return LegalChanceOutcomes(); + } else if (IsTerminal()) { + return {}; + } else { + if (scores_[cur_player_] + turn_total_ >= win_score_) { + return {kStop}; + } else { + return {kRoll, kStop}; + } + } +} + +std::vector> PigState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(IsChanceNode()); + std::vector> outcomes; + + if (piglet_) { + // Chance outcomes are labelled 0 or 1, corresponding to rolling 1 or not 1 + // respectively + outcomes.reserve(2); + outcomes.push_back(std::make_pair(0, 1.0 / dice_outcomes_)); + outcomes.push_back(std::make_pair(1, 1.0 - (1.0 / dice_outcomes_))); + } else { + // Chance outcomes are labelled 0+, corresponding to rolling 1+x. + outcomes.reserve(dice_outcomes_); + for (int i = 0; i < dice_outcomes_; i++) { + outcomes.push_back(std::make_pair(i, 1.0 / dice_outcomes_)); + } + } + + return outcomes; +} + +std::string PigState::ToString() const { + return absl::StrCat("Scores: ", absl::StrJoin(scores_, " "), + ", Turn total: ", turn_total_, + "\nCurrent player: ", turn_player_, + (cur_player_ == kChancePlayerId ? " (rolling)\n" : "\n")); +} + +std::unique_ptr PigState::Clone() const { + return std::unique_ptr(new PigState(*this)); +} + +PigGame::PigGame(const GameParameters& params) + : Game(kGameType, params), + dice_outcomes_(ParameterValue("diceoutcomes")), + horizon_(ParameterValue("horizon")), + num_players_(ParameterValue("players")), + win_score_(ParameterValue("winscore")), + piglet_(ParameterValue("piglet")) {} + +} // namespace pig +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/pig/pig.h b/scenarios/bargaining/open_spiel/open_spiel/games/pig/pig.h new file mode 100644 index 0000000..b9e04db --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/pig/pig.h @@ -0,0 +1,141 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_PIG_H_ +#define OPEN_SPIEL_GAMES_PIG_H_ + +#include +#include +#include + +#include "open_spiel/spiel.h" + +// A simple jeopardy dice game that includes chance nodes. +// See http://cs.gettysburg.edu/projects/pig/index.html for details. +// Also https://en.wikipedia.org/wiki/Pig_(dice_game) +// +// Piglet variant: Instead of increasing the running total by the roll results, +// it is always increased by a fixed step size of 1 upon rolling anything higher +// than a 1. [Note: Internally, this behaviour is modelled with only two chance +// outcomes, rolling a 1 or rolling anything higher than that.] +// Divide winscore by the average dice outcome != 1 (i.e. by diceoutcomes/2 + 1) +// when enabling Piglet to play a game that's roughly equivalent to the +// corresponding Pig game. The main advantage of this variant is thus a greatly +// reduced state space, making the game accessible to tabular methods. +// See also http://cs.gettysburg.edu/~tneller/papers/pig.zip. The original +// Piglet variant described there is played with a fair coin and a winscore +// of 10. This behaviour can be achieved by setting diceoutcomes = 2, winscore = +// 10, piglet = true. +// +// Parameters: +// "diceoutcomes" int number of outcomes of the dice (default = 6) +// "horizon" int max number of moves before draw (default = 1000) +// "players" int number of players (default = 2) +// "winscore" int number of points needed to win (default = 100) +// "piglet" bool is piglet variant enabled? (default = false) + +namespace open_spiel { +namespace pig { + +class PigGame; + +class PigState : public State { + public: + PigState(const PigState&) = default; + PigState(std::shared_ptr game, int dice_outcomes, int horizon, + int win_score, bool piglet); + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action move_id) const override; + std::vector> ChanceOutcomes() const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + + std::unique_ptr Clone() const override; + + int score(const int player_id) const { return scores_[player_id]; } + int dice_outcomes() const { return dice_outcomes_; } + std::vector LegalActions() const override; + + protected: + void DoApplyAction(Action move_id) override; + + private: + // Initialize to bad/invalid values. Use open_spiel::NewInitialState() + int dice_outcomes_ = -1; // Number of different dice outcomes (eg, 6). + int horizon_ = -1; + int nplayers_ = -1; + int win_score_ = 0; + bool piglet_ = false; + + int total_moves_ = -1; // Total num moves taken during the game. + Player cur_player_ = -1; // Player to play. + int turn_player_ = -1; // Whose actual turn is it. At chance nodes, we need + // to remember whose is playing for next turn. + // (cur_player will be the chance player's id.) + std::vector scores_; // Score for each player. + int turn_total_ = -1; +}; + +class PigGame : public Game { + public: + explicit PigGame(const GameParameters& params); + + int NumDistinctActions() const override { return 2; } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new PigState( + shared_from_this(), dice_outcomes_, horizon_, win_score_, piglet_)); + } + int MaxChanceOutcomes() const override { return dice_outcomes_; } + + // There is arbitrarily chosen number to ensure the game is finite. + int MaxGameLength() const override { return horizon_; } + + // Every chance node is preceded by a decision node (roll) + // -> At most as many chance nodes as decision nodes. + // -> Up to as many chance nodes as decision nodes, if + // every action is "roll" and player never 'falls'. + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + + int NumPlayers() const override { return num_players_; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return +1; } + std::vector ObservationTensorShape() const override; + + private: + // Number of different dice outcomes, i.e. 6. + int dice_outcomes_; + + // Maximum number of moves before draw. + int horizon_; + + // Number of players in this game. + int num_players_; + + // The amount needed to win. + int win_score_; + + // Whether Piglet variant is enabled (always move only 1 step forward) + bool piglet_; +}; + +} // namespace pig +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_PIG_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/pig/pig_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/pig/pig_test.cc new file mode 100644 index 0000000..a7bb49e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/pig/pig_test.cc @@ -0,0 +1,41 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace pig { +namespace { + +namespace testing = open_spiel::testing; + +void BasicPigTests() { + testing::LoadGameTest("pig"); + testing::ChanceOutcomesTest(*LoadGame("pig")); + testing::RandomSimTest(*LoadGame("pig"), 100); + for (Player players = 3; players <= 5; players++) { + testing::RandomSimTest( + *LoadGame("pig", {{"players", GameParameter(players)}}), 100); + } + testing::RandomSimTest(*LoadGame("pig", {{"winscore", GameParameter(25)}, + {"piglet", GameParameter(true)}}), + 100); +} + +} // namespace +} // namespace pig +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::pig::BasicPigTests(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/quoridor/quoridor.cc b/scenarios/bargaining/open_spiel/open_spiel/games/quoridor/quoridor.cc new file mode 100644 index 0000000..1c43cc6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/quoridor/quoridor.cc @@ -0,0 +1,678 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/quoridor/quoridor.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace quoridor { +namespace { + +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"quoridor", + /*long_name=*/"Quoridor", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/kMaxNumPlayers, + /*min_num_players=*/kMinNumPlayers, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + {"board_size", GameParameter(kDefaultBoardSize)}, + // A default will be computed from the board_size + {"wall_count", + GameParameter(GameParameter::Type::kInt, /*is_mandatory=*/false)}, + {"ansi_color_output", GameParameter(false)}, + {"players", GameParameter(kMinNumPlayers, false)}, + }}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new QuoridorGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +class QuoridorState::SearchState { + using DistanceAndMove = std::pair; + + class SearchQueue + : public std::priority_queue, + std::greater> { + public: + void clear() { c.clear(); } + void reserve(int capacity) { c.reserve(capacity); } + }; + + public: + explicit SearchState(int board_diameter) { + int size = board_diameter * board_diameter; + mark_.resize(size, false); + on_shortest_path_.resize(size, false); + distance_.resize(size, UndefinedDistance()); + queue_.reserve(size); + } + + bool IsEmpty() const { return queue_.empty(); } + + void ClearSearchQueue() { queue_.clear(); } + + bool Push(int dist, Move move) { + if (mark_[move.xy] == false) { + mark_[move.xy] = true; + queue_.emplace(dist, move); + return true; + } else { + return false; + } + } + + Move Pop() { + Move move = queue_.top().second; + queue_.pop(); + return move; + } + + void ResetSearchQueue() { + std::fill(mark_.begin(), mark_.end(), false); + queue_.clear(); + } + + void ResetDists() { + std::fill(distance_.begin(), distance_.end(), UndefinedDistance()); + } + + void SetDist(Move move, int dist) { distance_[move.xy] = dist; } + int GetDist(Move move) const { return distance_[move.xy]; } + void SetOnShortestPath(Move move) { on_shortest_path_[move.xy] = true; } + bool IsOnShortestPath(Move move) const { return on_shortest_path_[move.xy]; } + + static constexpr int UndefinedDistance() { return -1; } + + private: + SearchQueue queue_; + std::vector mark_; // Whether this position has been pushed before. + std::vector distance_; // Distance from player. + std::vector on_shortest_path_; // Is this position on a shortest path? +}; + +std::string Move::ToString() const { + std::string out = absl::StrCat( + std::string(1, static_cast('a' + (x / 2))), (y / 2) + 1); + if (!IsWall()) { + return out; + } else if (IsVerticalWall()) { + return absl::StrCat(out, "v"); + } else if (IsHorizontalWall()) { + return absl::StrCat(out, "h"); + } + return "invalid move"; +} + +QuoridorState::QuoridorState(std::shared_ptr game, int board_size, + int wall_count, bool ansi_color_output) + : State(game), + board_size_(board_size), + board_diameter_(board_size * 2 - 1), + ansi_color_output_(ansi_color_output), + // See ActionToMove for explanation of the below + base_for_relative_(2, 2, board_diameter_) { + board_.resize(board_diameter_ * board_diameter_, kPlayerNone); + players_.resize(num_players_); + // Account for order of turns (order of play is clockwise) + if (num_players_ == 2) { + players_[0] = kPlayer1; + players_[1] = kPlayer2; + } else if (num_players_ == 3) { + players_[0] = kPlayer1; + players_[1] = kPlayer3; + players_[2] = kPlayer2; + } else if (num_players_ == 4) { + players_[0] = kPlayer1; + players_[1] = kPlayer3; + players_[2] = kPlayer2; + players_[3] = kPlayer4; + } + wall_count_.resize(num_players_); + player_loc_.resize(num_players_); + end_zone_.resize(num_players_); + for (int i = 0; i < num_players_; ++i) { + wall_count_[players_[i]] = wall_count; + InitializePlayer(players_[i]); + } +} + +void QuoridorState::InitializePlayer(QuoridorPlayer p) { + int center_field = board_size_ - (board_size_ % 2); + if (p == kPlayer1) { + player_loc_[p] = GetMove(center_field, board_diameter_ - 1); + SetPlayer(player_loc_[p], p, kPlayerNone); + end_zone_[p] = 0; + return; + } + if (p == kPlayer2) { + player_loc_[p] = GetMove(center_field, 0); + SetPlayer(player_loc_[p], kPlayer2, kPlayerNone); + end_zone_[p] = board_diameter_ - 1; + return; + } + if (p == kPlayer3) { + player_loc_[p] = GetMove(0, center_field); + SetPlayer(player_loc_[p], p, kPlayerNone); + end_zone_[p] = board_diameter_ - 1; + return; + } + if (p == kPlayer4) { + player_loc_[p] = GetMove(board_diameter_ - 1, center_field); + SetPlayer(player_loc_[p], p, kPlayerNone); + end_zone_[p] = 0; + return; + } +} + +/* + * The original implementation mapped action IDs to absolute board positions. + * This meant that moving "north" had a different ID for every pawn position. + * Now action IDs are encoded in the same virtual space as absolute board + * positions, but they indicate the pawn's relative move as if it were in + * square (1,1). So when we get those action IDs in, we need to convert them + * back into the absolute position into which we need to place the pawn. + */ +Move QuoridorState::ActionToMove(Action action_id) const { + Move move = GetMove(action_id % board_diameter_, action_id / board_diameter_); + if (!move.IsWall()) { + Move target = player_loc_[current_player_] + (move - base_for_relative_); + if (GetPlayer(target) == kPlayerNone) { + return target; + } else { + // Jumping over a player is inferred - it has the same action ID as just + // stepping + return player_loc_[current_player_] + ((move - base_for_relative_) * 2); + } + } + return move; +} + +std::vector QuoridorState::LegalActions() const { + std::vector moves; + if (IsTerminal()) return moves; + int max_moves = + num_players_ > 2 ? 6 : 5; // Max legal pawn moves, including jumps. + if (wall_count_[current_player_] > 0) { + max_moves += 2 * (board_size_ - 1) * (board_size_ - 1); // Max wall moves. + } + moves.reserve(max_moves); + + // Pawn moves. + Move cur = player_loc_[current_player_]; + AddActions(cur, Offset(1, 0), &moves); + AddActions(cur, Offset(0, 1), &moves); + AddActions(cur, Offset(-1, 0), &moves); + AddActions(cur, Offset(0, -1), &moves); + + // Wall placements. + if (wall_count_[current_player_] > 0) { + SearchState search_state(board_diameter_); + for (int i = 0; i < num_players_; ++i) { + SearchShortestPath(players_[i], &search_state); + } + for (int y = 0; y < board_diameter_ - 2; y += 2) { + for (int x = 0; x < board_diameter_ - 2; x += 2) { + Move h = GetMove(x, y + 1); + if (IsValidWall(h, &search_state)) { + moves.push_back(h.xy); + } + Move v = GetMove(x + 1, y); + if (IsValidWall(v, &search_state)) { + moves.push_back(v.xy); + } + } + } + } + + // If no action is possible add 'pass' action to list of moves + if (moves.empty()) { + moves.push_back(cur.xy); + } + + std::sort(moves.begin(), moves.end()); + return moves; +} + +void QuoridorState::AddActions(Move cur, Offset offset, + std::vector* moves) const { + SPIEL_CHECK_FALSE(cur.IsWall()); + + if (IsWall(cur + offset)) { + // Hit a wall or edge in this direction. + return; + } + + Move forward = cur + offset * 2; + if (GetPlayer(forward) == kPlayerNone) { + // Normal single step in this direction. + moves->push_back((base_for_relative_ + offset * 2).xy); + return; + } + + // Other player, so which jumps are valid? + + if (!IsWall(cur + offset * 3)) { + // In two-players: A normal jump is allowed. We know that spot is empty. + // In >2 players, must check. + if (GetPlayer(cur + offset * 4) == kPlayerNone) { + // The relative action ID for jumping directly over is the same as moving + moves->push_back((base_for_relative_ + offset * 2).xy); + return; + } else { + return; + } + } + // We are jumping over the other player against a wall, which side jumps are + // valid? + + Offset left = offset.rotate_left(); + if (!IsWall(forward + left)) { + if (GetPlayer(forward + left * 2) == kPlayerNone) { + moves->push_back((base_for_relative_ + offset * 2 + left * 2).xy); + } + } + Offset right = offset.rotate_right(); + if (!IsWall(forward + right)) { + if (GetPlayer(forward + right * 2) == kPlayerNone) { + moves->push_back((base_for_relative_ + offset * 2 + right * 2).xy); + } + } +} + +bool QuoridorState::IsValidWall(Move m, SearchState* search_state) const { + Offset offset = (m.IsHorizontalWall() ? Offset(1, 0) : Offset(0, 1)); + + if (IsWall(m + offset * 0) || IsWall(m + offset * 1) || + IsWall(m + offset * 2)) { + // Already blocked by a wall. + return false; + } + + // Any wall that doesn't intersect with a shortest path is clearly legal. + // Walls that do intersect might still be legal because there's another way + // around, but that's more expensive to check. + if (!search_state->IsOnShortestPath(m) && + !search_state->IsOnShortestPath(m + offset * 2)) { + return true; + } + + // If this wall doesn't connect two existing walls/edges, then it can't cut + // any paths. Even connecting to a node where 3 other walls meet, but without + // connecting them to anything else, can't cut any paths. + int count = ( + // The 3 walls near the close end. + (IsWall(m - offset * 2) || IsWall(m - offset + offset.rotate_left()) || + IsWall(m - offset + offset.rotate_right())) + + // The 3 walls near the far end. + (IsWall(m + offset * 4) || + IsWall(m + offset * 3 + offset.rotate_left()) || + IsWall(m + offset * 3 + offset.rotate_right())) + + // The 2 walls in the middle. + (IsWall(m + offset + offset.rotate_left()) || + IsWall(m + offset + offset.rotate_right()))); + if (count <= 1) return true; + + // Do a full search to verify both players can get to their respective goals. + bool pathExists = true; + for (int i = 0; i < num_players_; ++i) { + pathExists = pathExists && + SearchEndZone(players_[i], m, m + offset * 2, search_state); + } + return pathExists; +} + +bool QuoridorState::SearchEndZone(QuoridorPlayer p, Move wall1, Move wall2, + SearchState* search_state) const { + search_state->ResetSearchQueue(); + Offset dir(1, 0); // Direction is arbitrary. Queue will make it fast. + int goal = end_zone_[p]; + int goal_dir = (goal == 0 ? -1 : 1); // Sort for shortest dist in a min-heap. + search_state->Push(0, player_loc_[p]); + while (!search_state->IsEmpty()) { + Move c = search_state->Pop(); + for (int i = 0; i < 4; ++i) { + Move wall = c + dir; + if (!IsWall(wall) && wall != wall1 && wall != wall2) { + Move move = c + dir * 2; + int moveCoord; + if (p == kPlayer1 || p == kPlayer2) { + moveCoord = move.y; + } else if (p == kPlayer3 || p == kPlayer4) { + moveCoord = move.x; + } else { + SpielFatalError("Case not handled for player in SearchEndZone."); + } + if (moveCoord == goal) { + return true; + } + search_state->Push(goal_dir * (goal - move.y), move); + } + dir = dir.rotate_left(); + } + } + + return false; +} + +void QuoridorState::SearchShortestPath(QuoridorPlayer p, + SearchState* search_state) const { + search_state->ResetSearchQueue(); + search_state->ResetDists(); + Offset dir(1, 0); // Direction is arbitrary. Queue will make it fast. + int goal = end_zone_[p]; + int goal_dir = (goal == 0 ? -1 : 1); // Sort for shortest dist in a min-heap. + search_state->Push(0, player_loc_[p]); + search_state->SetDist(player_loc_[p], 0); + Move goal_found = GetMove(-1, -1); // invalid + + // A* search for the end-zone, keeping distances to each cell. + while (!search_state->IsEmpty()) { + Move c = search_state->Pop(); + int dist = search_state->GetDist(c); + for (int i = 0; i < 4; ++i) { + Move wall = c + dir; + if (!IsWall(wall)) { + Move move = c + dir * 2; + int moveCoord; + if (p == kPlayer1 || p == kPlayer2) { + moveCoord = move.y; + } else if (p == kPlayer3 || p == kPlayer4) { + moveCoord = move.x; + } else { + SpielFatalError("Case not handled for player in SearchShortestPath"); + } + if (moveCoord == goal) { + search_state->SetDist(move, dist + 1); + search_state->ClearSearchQueue(); // Break out of the search. + goal_found = move; + break; + } + if (search_state->Push(dist + 1 + goal_dir * (goal - moveCoord), + move)) { + search_state->SetDist(move, dist + 1); + } + } + dir = dir.rotate_left(); + } + } + + // Trace the way back, setting them to be on a shortest path. + Move current = goal_found; + int dist = search_state->GetDist(current); + while (current != player_loc_[p]) { + for (int i = 0; i < 4; ++i) { + Move wall = current + dir; + if (!IsWall(wall)) { + Move move = current + dir * 2; + int dist2 = search_state->GetDist(move); + if (dist2 != search_state->UndefinedDistance() && dist2 + 1 == dist) { + search_state->SetOnShortestPath(wall); + current = move; + dist = dist2; + break; + } + } + dir = dir.rotate_left(); + } + } +} + +std::string QuoridorState::ActionToString(Player player, + Action action_id) const { + return ActionToMove(action_id).ToString(); +} + +std::string QuoridorState::ToString() const { + // Generates something like: + // Board size: 5, walls: 0, 0 + // a b c d e + // 1 . | . . . . 1 + // + ---+--- + // 2 . | . | . . . 2 + // + + // 3 . . | O @ . 3 + // ---+--- + // 4 . | . . . . 4 + // + ---+--- + // 5 . | . . . . 5 + // a b c d e + + std::string reset; + std::array colors, coords; + if (ansi_color_output_) { + std::string esc = "\033"; + reset = esc + "[0m"; + coords[0] = esc + "[1;33m"; + coords[1] = esc + "[1;34m"; + coords[2] = esc + "[1;35m"; + coords[3] = esc + "[1;36m"; + colors[0] = esc + "[1;33m" + " O " + reset; + colors[1] = esc + "[1;34m" + " @ " + reset; + colors[2] = esc + "[1;35m" + " # " + reset; + colors[3] = esc + "[1;36m" + " % " + reset; + } else { + std::string reset = ""; + coords[0] = ""; + coords[1] = ""; + coords[2] = ""; + coords[3] = ""; + colors[0] = " 0 "; + colors[1] = " @ "; + colors[2] = " # "; + colors[3] = " % "; + } + + std::ostringstream out; + out << "Board size: " << board_size_ << ", walls: "; + for (int i = 0; i < num_players_; ++i) { + out << wall_count_[players_[i]]; + if (i < num_players_ - 1) out << ", "; + } + out << "\n"; + + // Top x coords. + for (int x = 0; x < board_size_; ++x) { + out << " " << coords[1] << static_cast('a' + x); + } + out << reset << '\n'; + + for (int y = 0; y < board_diameter_; ++y) { + if (y % 2 == 0) { + if (y / 2 + 1 < 10) out << " "; + out << coords[2] << (y / 2 + 1) << reset; // Leading y coord. + } else { + out << " "; // Wall lines. + } + + for (int x = 0; x < board_diameter_; ++x) { + QuoridorPlayer p = GetPlayer(GetMove(x, y)); + if (x % 2 == 0 && y % 2 == 0) { + bool playerFound = false; + for (int i = 0; i < num_players_; ++i) { + if (p == players_[i]) { + out << colors[players_[i]]; + playerFound = true; + } + } + if (!playerFound) { + out << " . "; + } + } else if (x % 2 == 1 && y % 2 == 1) { + out << (p == kPlayerWall ? "+" : " "); + } else if (x % 2 == 1) { + out << (p == kPlayerWall ? "|" : " "); + } else if (y % 2 == 1) { + out << (p == kPlayerWall ? "---" : " "); + } + } + if (y % 2 == 0) { + if (y / 2 + 1 < 10) out << " "; + out << coords[3] << (y / 2 + 1) << reset; // y coord on the right. + } else { + out << " "; // Wall lines. + } + out << '\n'; + } + // Bottom x coords. + for (int x = 0; x < board_size_; ++x) { + out << " " << coords[0] << static_cast('a' + x); + } + out << reset << '\n'; + return out.str(); +} + +std::vector QuoridorState::Returns() const { + std::vector res(num_players_, 0.0); + for (int i = 0; i < num_players_; ++i) { + if (outcome_ == players_[i]) { + // If someone as won, set their reward to +1 and all the others to + // -1 / (num_players - 1). + std::fill(res.begin(), res.end(), -1.0 / (num_players_ - 1)); + res[i] = 1.0; + break; + } + } + return res; +} + +std::string QuoridorState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string QuoridorState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void QuoridorState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + TensorView<2> view( + values, {NumCellStates() + num_players_, static_cast(board_.size())}, + true); + + for (int i = 0; i < board_.size(); ++i) { + if (board_[i] < NumCellStates()) { + view[{static_cast(board_[i]), i}] = 1.0; + } + for (int j = 0; j < num_players_; ++j) { + view[{NumCellStates() + players_[j], i}] = wall_count_[players_[j]]; + } + } +} + +void QuoridorState::DoApplyAction(Action action) { + Move move = ActionToMove(action); + // If players is forced to pass it is valid to stay in place, on a field where + // there is already a player + if (board_[move.xy] != current_player_) { + SPIEL_CHECK_EQ(board_[move.xy], kPlayerNone); + } + SPIEL_CHECK_EQ(outcome_, kPlayerNone); + + SPIEL_CHECK_TRUE(move.IsValid()); + + if (move.IsWall()) { + Offset offset = (move.IsHorizontalWall() ? Offset(1, 0) : Offset(0, 1)); + SetPlayer(move + offset * 0, kPlayerWall, kPlayerNone); + SetPlayer(move + offset * 1, kPlayerWall, kPlayerNone); + SetPlayer(move + offset * 2, kPlayerWall, kPlayerNone); + wall_count_[current_player_] -= 1; + } else { + SetPlayer(player_loc_[current_player_], kPlayerNone, current_player_); + SetPlayer(move, current_player_, kPlayerNone); + player_loc_[current_player_] = move; + + int end_zone_coord; + if (current_player_ == kPlayer1 || current_player_ == kPlayer2) { + end_zone_coord = move.y; + } else { + end_zone_coord = move.x; + } + + outcome_ = kPlayerNone; + if (end_zone_coord == end_zone_[current_player_]) { + outcome_ = current_player_; + } + } + + ++moves_made_; + if (moves_made_ >= kMaxGameLengthFactor * board_size_ * board_size_) { + outcome_ = kPlayerDraw; + } + + current_player_index_ += 1; + if (current_player_index_ == num_players_) current_player_index_ = 0; + current_player_ = players_[current_player_index_]; +} + +std::unique_ptr QuoridorState::Clone() const { + return std::unique_ptr(new QuoridorState(*this)); +} + +QuoridorGame::QuoridorGame(const GameParameters& params) + : Game(kGameType, params), + board_size_(ParameterValue("board_size")), + wall_count_( + ParameterValue("wall_count", board_size_ * board_size_ / 8)), + ansi_color_output_(ParameterValue("ansi_color_output")), + num_players_(ParameterValue("players")) { + if (board_size_ < 3) { + // For relative moves, we need to be able to describe moves using a 3x3 grid + // and since we use the board to number the moves (see above), we need the + // playing board to be at least that big. + SpielFatalError("Board size must be at least 3x3."); + } +} +} // namespace quoridor +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/quoridor/quoridor.h b/scenarios/bargaining/open_spiel/open_spiel/games/quoridor/quoridor.h new file mode 100644 index 0000000..43b7909 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/quoridor/quoridor.h @@ -0,0 +1,198 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_QUORIDOR_H_ +#define OPEN_SPIEL_GAMES_QUORIDOR_H_ + +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// https://en.wikipedia.org/wiki/Quoridor +// +// Parameters: +// "board_size" int Size of the board (default = 9) +// "wall_count" int How many walls per side (default = size^2/8) +// "ansi_color_output" bool Whether to color the output for a terminal. +// "players" int Number of players (default = 2) + +namespace open_spiel { +namespace quoridor { + +inline constexpr int kDefaultNumPlayers = 2; +inline constexpr int kMinNumPlayers = 2; +inline constexpr int kMaxNumPlayers = 4; +inline constexpr int kDefaultBoardSize = 9; +inline constexpr int kMinBoardSize = 3; +inline constexpr int kMaxBoardSize = 25; +inline constexpr int kMaxGameLengthFactor = 4; + +enum QuoridorPlayer : uint8_t { + kPlayer1, + kPlayer2, + kPlayer3, + kPlayer4, + kPlayerWall, + kPlayerNone, + kPlayerDraw, +}; + +struct Offset { + int x, y; + + Offset(int x_, int y_) : x(x_), y(y_) {} + + Offset operator+(const Offset& o) const { return Offset(x + o.x, y + o.y); } + Offset operator-(const Offset& o) const { return Offset(x - o.x, y - o.y); } + Offset operator*(const int i) const { return Offset(x * i, y * i); } + Offset rotate_left() const { return Offset(-y, x); } + Offset rotate_right() const { return Offset(y, -x); } +}; + +struct Move { + int x, y; + int xy; // Precomputed x + y * size. + int size; + + Move() : x(0), y(0), xy(-1), size(-1) {} + Move(int x_, int y_, int size_) + : x(x_), y(y_), xy(x_ + (y_ * size_)), size(size_) {} + + std::string ToString() const; + + bool IsValid() const { return x >= 0 && y >= 0 && x < size && y < size; } + bool IsWall() const { return x & 1 || y & 1; } + bool IsHorizontalWall() const { return y & 1; } + bool IsVerticalWall() const { return x & 1; } + + bool operator==(const Move& b) const { return xy == b.xy; } + bool operator!=(const Move& b) const { return xy != b.xy; } + bool operator<(const Move& b) const { return xy < b.xy; } + + Move operator+(const Offset& o) const { return Move(x + o.x, y + o.y, size); } + Move operator-(const Offset& o) const { return Move(x - o.x, y - o.y, size); } + Offset operator-(const Move& o) const { return Offset(x - o.x, y - o.y); } +}; + +// State of an in-play game. +class QuoridorState : public State { + public: + QuoridorState(std::shared_ptr game, int board_size, + int wall_count, bool ansi_color_output = false); + + QuoridorState(const QuoridorState&) = default; + void InitializePlayer(QuoridorPlayer); + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : static_cast(current_player_); + } + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override { return outcome_ != kPlayerNone; } + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + int NumCellStates() const { return num_players_ + 1; } + + protected: + void DoApplyAction(Action action) override; + + // Turn an action id into a `Move`. + Move ActionToMove(Action action_id) const; + + Move GetMove(int x, int y) const { return Move(x, y, board_diameter_); } + bool IsWall(Move m) const { + return m.IsValid() ? board_[m.xy] == kPlayerWall : true; + } + QuoridorPlayer GetPlayer(Move m) const { + return m.IsValid() ? board_[m.xy] : kPlayerWall; + } + void SetPlayer(Move m, QuoridorPlayer p, Player old) { + SPIEL_CHECK_TRUE(m.IsValid()); + SPIEL_CHECK_EQ(board_[m.xy], old); + board_[m.xy] = p; + } + + private: + // SearchState contains details that are only used in the .cc file. + // A different technique in the same area is called pimpl (pointer to + // implementation). + class SearchState; + + // Helpers for `LegaLActions`. + void AddActions(Move cur, Offset offset, std::vector* moves) const; + bool IsValidWall(Move m, SearchState*) const; + bool SearchEndZone(QuoridorPlayer p, Move wall1, Move wall2, + SearchState*) const; + void SearchShortestPath(QuoridorPlayer p, SearchState* search_state) const; + + std::vector board_; + std::vector players_; + std::vector wall_count_; + std::vector end_zone_; + std::vector player_loc_; + QuoridorPlayer current_player_ = kPlayer1; + int current_player_index_ = 0; + QuoridorPlayer outcome_ = kPlayerNone; + int moves_made_ = 0; + const int board_size_; + const int board_diameter_; + const bool ansi_color_output_; + const Move base_for_relative_; +}; + +// Game object. +class QuoridorGame : public Game { + public: + explicit QuoridorGame(const GameParameters& params); + + int NumDistinctActions() const override { return Diameter() * Diameter(); } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new QuoridorState( + shared_from_this(), board_size_, wall_count_, ansi_color_output_)); + } + int NumPlayers() const override { return num_players_; } + int NumCellStates() const { return num_players_ + 1; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + std::vector ObservationTensorShape() const override { + return {NumCellStates() + num_players_, Diameter(), Diameter()}; + } + int MaxGameLength() const override { + // There's no anti-repetition rule, so this could be infinite, but no sane + // agent would take more moves than placing all the walls and visiting + // all squares. + return kMaxGameLengthFactor * board_size_ * board_size_; + } + + private: + int Diameter() const { return board_size_ * 2 - 1; } + const int board_size_; + const int wall_count_; + const bool ansi_color_output_ = false; + const int num_players_; +}; + +} // namespace quoridor +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_QUORIDOR_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/quoridor/quoridor_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/quoridor/quoridor_test.cc new file mode 100644 index 0000000..f7aa99a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/quoridor/quoridor_test.cc @@ -0,0 +1,64 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace quoridor { +namespace { + +namespace testing = open_spiel::testing; + +void BasicQuoridorTests() { + testing::LoadGameTest("quoridor(board_size=5)"); + testing::NoChanceOutcomesTest(*LoadGame("quoridor()")); + testing::RandomSimTest(*LoadGame("quoridor"), 10); + + for (int i = 5; i <= 13; i++) { + testing::RandomSimTest( + *LoadGame(absl::StrCat("quoridor(board_size=", i, ")")), 5); + } + + for (int i = 2; i <= 4; i++) { + testing::RandomSimTest( + *LoadGame(absl::StrCat("quoridor(board_size=9,players=", i, ")")), 5); + } + + testing::RandomSimTest(*LoadGame("quoridor(board_size=9,wall_count=5)"), 3); + + // Ansi colors! + testing::RandomSimTest( + *LoadGame("quoridor", {{"board_size", GameParameter(9)}, + {"ansi_color_output", GameParameter(true)}}), + 3); + testing::RandomSimTest( + *LoadGame("quoridor", {{"board_size", GameParameter(9)}, + {"ansi_color_output", GameParameter(true)}, + {"players", GameParameter(3)}}), + 3); + testing::RandomSimTest( + *LoadGame("quoridor(board_size=5,ansi_color_output=True)"), 3); + testing::RandomSimTest( + *LoadGame("quoridor(board_size=5,ansi_color_output=True,players=3)"), 3); +} + +} // namespace +} // namespace quoridor +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::quoridor::BasicQuoridorTests(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/rbc/rbc.cc b/scenarios/bargaining/open_spiel/open_spiel/games/rbc/rbc.cc new file mode 100644 index 0000000..9cffaf1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/rbc/rbc.cc @@ -0,0 +1,585 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/rbc/rbc.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace rbc { +namespace { + +constexpr int kNumReversibleMovesToDraw = 100; +constexpr int kNumRepetitionsToDraw = 3; + +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"rbc", + /*long_name=*/"Reconnaisance Blind Chess", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"board_size", GameParameter(8)}, + {"sense_size", GameParameter(3)}, + {"fen", GameParameter(GameParameter::Type::kString, false)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::make_shared(params); +} + +REGISTER_SPIEL_GAME(kGameType, Factory) + +chess::ObservationTable ComputeObservationTable(const chess::ChessBoard& board, + chess::Color color, + int sense_location, + int sense_size) { + const int board_size = board.BoardSize(); + chess::ObservationTable observability_table{false}; + + // Player pieces. + for (int8_t y = 0; y < board_size; ++y) { + for (int8_t x = 0; x < board_size; ++x) { + chess::Square sq{x, y}; + const auto& piece = board.at(sq); + if (piece.color == color) { + size_t index = chess::SquareToIndex(sq, board_size); + observability_table[index] = true; + } + } + } + + // No sense window specified. + if (sense_location < 0) return observability_table; + + // All pieces under the sense window. + int inner_size = board_size - sense_size + 1; + chess::Square sense_sq = chess::IndexToSquare(sense_location, inner_size); + SPIEL_DCHECK_LE(sense_sq.x + sense_size, board_size); + SPIEL_DCHECK_LE(sense_sq.y + sense_size, board_size); + for (int8_t x = sense_sq.x; x < sense_sq.x + sense_size; ++x) { + for (int8_t y = sense_sq.y; y < sense_sq.y + sense_size; ++y) { + const chess::Square sq{x, y}; + size_t index = chess::SquareToIndex(sq, board_size); + observability_table[index] = true; + } + } + + return observability_table; +} + +bool ObserverHasString(IIGObservationType iig_obs_type) { + return iig_obs_type.public_info && + iig_obs_type.private_info == PrivateInfoType::kSinglePlayer && + !iig_obs_type.perfect_recall; +} +bool ObserverHasTensor(IIGObservationType iig_obs_type) { + return !iig_obs_type.perfect_recall; +} + +} // namespace + +class RbcObserver : public Observer { + public: + explicit RbcObserver(IIGObservationType iig_obs_type) + : Observer(/*has_string=*/ObserverHasString(iig_obs_type), + /*has_tensor=*/ObserverHasTensor(iig_obs_type)), + iig_obs_type_(iig_obs_type) {} + + void WriteTensor(const State& observed_state, int player, + Allocator* allocator) const override { + auto& state = open_spiel::down_cast(observed_state); + auto& game = open_spiel::down_cast(*state.GetGame()); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, game.NumPlayers()); + + if (iig_obs_type_.perfect_recall) { + SpielFatalError( + "RbcObserver: tensor with perfect recall not implemented."); + } + + if (iig_obs_type_.public_info) { + WritePublicInfoTensor(state, allocator); + } + if (iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer) { + std::string prefix = "private"; + WritePrivateInfoTensor(state, player, prefix, allocator); + } else if (iig_obs_type_.private_info == PrivateInfoType::kAllPlayers) { + for (int i = 0; i < chess::NumPlayers(); ++i) { + chess::Color color = chess::PlayerToColor(player); + std::string prefix = chess::ColorToString(color); + WritePrivateInfoTensor(state, i, prefix, allocator); + } + } + } + + std::string StringFrom(const State& observed_state, + int player) const override { + auto& state = open_spiel::down_cast(observed_state); + auto& game = open_spiel::down_cast(*state.GetGame()); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, game.NumPlayers()); + + if (iig_obs_type_.perfect_recall) { + SpielFatalError( + "RbcObserver: string with perfect recall is not supported"); + } + + if (iig_obs_type_.private_info == PrivateInfoType::kSinglePlayer) { + return StringPrivateInfoObservation(state, game, player); + } else { + SpielFatalError( + "RbcObserver: string with imperfect recall is implemented only" + " for the (default) observation type."); + } + } + + private: + // Encode the private observations as a FEN-like string. + std::string StringPrivateInfoObservation(const RbcState& state, + const RbcGame& game, + int player) const { + chess::Color color = chess::PlayerToColor(player); + const int sense_location = + (state.phase_ == MovePhase::kMoving && state.CurrentPlayer() == player) + ? state.sense_location_[player] + // Make sure that sense from last round does not + // reveal a new hidden move: allow players to + // perceive only results of the last sensing. + : kSenseLocationNonSpecified; + const chess::ChessBoard& board = state.Board(); + chess::ObservationTable observability_table = ComputeObservationTable( + board, color, sense_location, game.sense_size()); + const int board_size = game.board_size(); + std::string str = ""; + + // 1. Encode the board based on what can be observed for the player. + for (int8_t rank = board_size - 1; rank >= 0; --rank) { + int num_unknown = 0; + for (int8_t file = 0; file < board_size; ++file) { + const size_t index = + chess::SquareToIndex(chess::Square{file, rank}, board_size); + if (!observability_table[index]) { + num_unknown++; + } else { + if (num_unknown > 0) { + absl::StrAppend(&str, num_unknown); + num_unknown = 0; + } + const chess::Piece& piece = board.at(chess::Square{file, rank}); + absl::StrAppend(&str, piece.ToString()); + } + } + if (num_unknown > 0) { + absl::StrAppend(&str, num_unknown); + } + if (rank > 0) { + absl::StrAppend(&str, "/"); + } + } + + // 2. Castling rights of the player. + absl::StrAppend(&str, " "); + std::string castling_rights; + if (board.CastlingRight(color, chess::CastlingDirection::kRight)) { + absl::StrAppend(&castling_rights, "K"); + } + if (board.CastlingRight(color, chess::CastlingDirection::kLeft)) { + castling_rights.push_back('Q'); + } + absl::StrAppend(&str, castling_rights.empty() ? "-" : castling_rights); + + // 3. Phase. + absl::StrAppend(&str, " ", state.phase_ == MovePhase::kSensing ? "s" : "m"); + + // 4. Capture (but no information about what was captured). + absl::StrAppend(&str, " ", state.move_captured_ ? "c" : "-"); + + // 5. Side to play. + absl::StrAppend(&str, " ", + board.ToPlay() == chess::Color::kWhite ? "w" : "b"); + + // 6. Illegal move. + const bool can_show = state.CurrentPlayer() == player; + absl::StrAppend(&str, " ", + can_show && state.illegal_move_attempted_ ? "i" : "-"); + return str; + } + + void WritePieces(chess::Color color, chess::PieceType piece_type, + const chess::ChessBoard& board, int sense_location, + int sense_size, const std::string& prefix, + Allocator* allocator) const { + const std::string type_string = chess::PieceTypeToString( + piece_type, /*uppercase=*/color == chess::Color::kWhite); + const int board_size = board.BoardSize(); + int inner_size = board_size - sense_size + 1; + chess::Square sense_square = + chess::IndexToSquare(sense_location, inner_size); + auto out = allocator->Get(prefix + "_" + type_string + "_pieces", + {board_size, board_size}); + + if (sense_location < 0) return; // No sense window specified. + SPIEL_DCHECK_LE(sense_square.x + sense_size, board_size); + SPIEL_DCHECK_LE(sense_square.y + sense_size, board_size); + for (int8_t x = sense_square.x; x < sense_square.x + sense_size; ++x) { + for (int8_t y = sense_square.y; y < sense_square.y + sense_size; ++y) { + const chess::Square square{x, y}; + const chess::Piece& piece_on_board = board.at(square); + const bool write_square = + piece_on_board.color == color && piece_on_board.type == piece_type; + out.at(x, y) = write_square ? 1.0f : 0.0f; + } + } + } + + void WriteScalar(int val, int min, int max, const std::string& field_name, + Allocator* allocator) const { + SPIEL_DCHECK_LT(min, max); + SPIEL_DCHECK_GE(val, min); + SPIEL_DCHECK_LE(val, max); + auto out = allocator->Get(field_name, {max - min + 1}); + out.at(val - min) = 1; + } + + // Adds a binary scalar plane. + void WriteBinary(bool val, const std::string& field_name, + Allocator* allocator) const { + WriteScalar(val ? 1 : 0, 0, 1, field_name, allocator); + } + + void WritePrivateInfoTensor(const RbcState& state, int player, + const std::string& prefix, + Allocator* allocator) const { + chess::Color color = chess::PlayerToColor(player); + + // Illegal move (pawn attack or pawn forward-move or castle through + // opponent pieces). + const bool can_show = state.CurrentPlayer() == player; + WriteBinary(can_show && state.illegal_move_attempted_, "illegal_move", + allocator); + + // Piece configuration. + for (const chess::PieceType& piece_type : chess::kPieceTypes) { + WritePieces(static_cast(player), piece_type, state.Board(), + 0, state.game()->board_size(), prefix, allocator); + } + + // Castling rights. + WriteBinary( + state.Board().CastlingRight(color, chess::CastlingDirection::kLeft), + prefix + "_left_castling", allocator); + WriteBinary( + state.Board().CastlingRight(color, chess::CastlingDirection::kRight), + prefix + "_right_castling", allocator); + + // Last sensing + for (const chess::PieceType& piece_type : chess::kPieceTypes) { + int sense_location = (state.phase_ == MovePhase::kMoving && + state.CurrentPlayer() == player) + ? state.sense_location_[player] + // Make sure that sense from last round does not + // reveal a new hidden move: allow players to + // perceive only results of the last sensing. + : kSenseLocationNonSpecified; + WritePieces(static_cast(1 - player), piece_type, + state.Board(), sense_location, state.game()->sense_size(), + prefix + "_sense", allocator); + } + } + + void WritePublicInfoTensor(const RbcState& state, + Allocator* allocator) const { + // Compute number of pieces of each player. + const int board_size = state.game()->board_size(); + std::array num_pieces = {0, 0}; + for (int x = 0; x < board_size; ++x) { + for (int y = 0; y < board_size; ++y) { + for (int pl = 0; pl < 2; ++pl) { + num_pieces[pl] += + state.Board().IsFriendly( + chess::Square{static_cast(x), + static_cast(y)}, + static_cast(pl)); + } + } + } + + WriteScalar(num_pieces[0], 0, board_size * 2, "pieces_black", allocator); + WriteScalar(num_pieces[1], 0, board_size * 2, "pieces_white", allocator); + WriteBinary(state.phase_ == MovePhase::kSensing, "phase", allocator); + WriteBinary(state.move_captured_, "capture", allocator); + WriteBinary(state.CurrentPlayer(), "side_to_play", allocator); + } + + IIGObservationType iig_obs_type_; +}; + +RbcState::RbcState(std::shared_ptr game, int board_size, + const std::string& fen) + : State(game), + start_board_(*chess::ChessBoard::BoardFromFEN( + fen, board_size, + /*king_in_check_allowed=*/true, + /*allow_pass_move=*/true)), + current_board_(start_board_), + phase_(MovePhase::kSensing) { + SPIEL_CHECK_TRUE(¤t_board_); + repetitions_[current_board_.HashValue()] = 1; +} + +void RbcState::DoApplyAction(Action action) { + // Reset common flags. + illegal_move_attempted_ = false; + move_captured_ = false; + + if (phase_ == MovePhase::kSensing) { + sense_location_[CurrentPlayer()] = action; + phase_ = MovePhase::kMoving; + } else { + SPIEL_CHECK_TRUE(phase_ == MovePhase::kMoving); + chess::Move move = ActionToMove(action, Board()); + + // Handle special cases for RBC. + + if (move == chess::kPassMove) { + // The RBC's pass move is handled via ChessBoard flag allow_pass_move. + // Nothing here. Values set above. + } else if (Board().IsBreachingMove(move)) { + SPIEL_DCHECK_FALSE(Board().IsMoveLegal(move)); + Board().BreachingMoveToCaptureMove(&move); + // Transformed move must be legal. + SPIEL_DCHECK_TRUE(Board().IsMoveLegal(move)); + // And it must be a capture, since we breached unseen opponent pieces. + SPIEL_DCHECK_NE(Board().at(move.from).color, Board().at(move.to).color); + move_captured_ = true; + } else if (!Board().IsMoveLegal(move)) { + // Illegal move was chosen. + illegal_move_attempted_ = true; + + // Check why the move was illegal: + // if it is pawn two-squares-forward move, + // and there is an enemy piece blocking it, the attempt to move only one + // square forward (if that would be a legal move). + // if it is pawn move to last rank, change to pawn move & queen promotion + // (if that would be a legal move) + if (move.piece.type == chess::PieceType::kPawn && + abs(move.from.y - move.to.y) == 2) { + const int dy = move.to.y - move.from.y > 0 ? 1 : -1; + chess::Move one_forward_move = move; + one_forward_move.to.y -= dy; + move = Board().IsMoveLegal(one_forward_move) ? one_forward_move + : chess::kPassMove; + } else if (move.piece.type == chess::PieceType::kPawn && + Board().IsPawnPromotionRank(move.to)) { + chess::Move promote_move = move; + promote_move.promotion_type = chess::PieceType::kQueen; + move = Board().IsMoveLegal(promote_move) ? promote_move + : chess::kPassMove; + } else { + // Treat the illegal move as a pass. + move = chess::kPassMove; + } + } else { + // All other moves + SPIEL_DCHECK_EQ(Board().at(move.from).color, Board().ToPlay()); + move_captured_ = + Board().at(move.to).color == chess::OppColor(Board().ToPlay()); + } + + SPIEL_DCHECK_TRUE(Board().IsMoveLegal(move)); + moves_history_.push_back(move); + Board().ApplyMove(move); + + ++repetitions_[current_board_.HashValue()]; + phase_ = MovePhase::kSensing; + } + cached_legal_actions_.reset(); +} + +void RbcState::MaybeGenerateLegalActions() const { + if (!cached_legal_actions_) { + cached_legal_actions_ = std::vector(); + + if (phase_ == MovePhase::kSensing) { + int num_possible_sense_locations = + game()->inner_size() * game()->inner_size(); + cached_legal_actions_->resize(num_possible_sense_locations); + absl::c_iota(*cached_legal_actions_, 0); + } else { + SPIEL_CHECK_TRUE(phase_ == MovePhase::kMoving); + Board().GeneratePseudoLegalMoves( + [this](const chess::Move& move) -> bool { + cached_legal_actions_->push_back(MoveToAction(move, BoardSize())); + return true; + }, + Board().ToPlay(), chess::PseudoLegalMoveSettings::kBreachEnemyPieces); + absl::c_sort(*cached_legal_actions_); + } + } +} + +std::vector RbcState::LegalActions() const { + if (IsTerminal()) return {}; + MaybeGenerateLegalActions(); + return *cached_legal_actions_; +} + +std::string RbcState::ActionToString(Player player, Action action) const { + if (phase_ == MovePhase::kSensing) { + std::string from = chess::SquareToString( + chess::IndexToSquare(action, game()->inner_size())); + return absl::StrCat("Sense ", from); + } else { + if (action == chess::kPassAction) return "pass"; + chess::Move move = ActionToMove(action, Board()); + return move.ToLAN(); + } +} + +std::string RbcState::ToString() const { return Board().ToFEN(); } + +std::vector RbcState::Returns() const { + auto maybe_final_returns = MaybeFinalReturns(); + if (maybe_final_returns) { + return *maybe_final_returns; + } else { + return {0.0, 0.0}; + } +} + +std::string RbcState::ObservationString(Player player) const { + const auto& game = open_spiel::down_cast(*game_); + return game.default_observer_->StringFrom(*this, player); +} + +void RbcState::ObservationTensor(Player player, + absl::Span values) const { + ContiguousAllocator allocator(values); + const auto& game = open_spiel::down_cast(*game_); + game.default_observer_->WriteTensor(*this, player, &allocator); +} + +std::unique_ptr RbcState::Clone() const { + return std::make_unique(*this); +} + +void RbcState::UndoAction(Player player, Action action) { + // TODO: Make this fast by storing undo info in another stack. + SPIEL_CHECK_FALSE(history_.empty()); // Can't undo initial state. + history_.pop_back(); + --move_number_; + + if (phase_ == MovePhase::kMoving) { + phase_ = MovePhase::kSensing; + } else { + SPIEL_CHECK_GE(moves_history_.size(), 1); + phase_ = MovePhase::kMoving; + --repetitions_[current_board_.HashValue()]; + moves_history_.pop_back(); + current_board_ = start_board_; + for (const chess::Move& move : moves_history_) { + current_board_.ApplyMove(move); + } + } +} + +bool RbcState::IsRepetitionDraw() const { + const auto entry = repetitions_.find(Board().HashValue()); + SPIEL_CHECK_FALSE(entry == repetitions_.end()); + return entry->second >= kNumRepetitionsToDraw; +} + +absl::optional> RbcState::MaybeFinalReturns() const { + const auto to_play_color = Board().ToPlay(); + const auto opp_color = chess::OppColor(to_play_color); + + const auto to_play_king = + chess::Piece{to_play_color, chess::PieceType::kKing}; + const auto opp_king = chess::Piece{opp_color, chess::PieceType::kKing}; + + if (Board().find(to_play_king) == chess::kInvalidSquare) { + std::vector returns(NumPlayers()); + returns[chess::ColorToPlayer(to_play_color)] = LossUtility(); + returns[chess::ColorToPlayer(opp_color)] = WinUtility(); + return returns; + + } else if (Board().find(opp_king) == chess::kInvalidSquare) { + std::vector returns(NumPlayers()); + returns[chess::ColorToPlayer(to_play_color)] = WinUtility(); + returns[chess::ColorToPlayer(opp_color)] = LossUtility(); + return returns; + } + + if (!Board().HasSufficientMaterial()) { + return std::vector{DrawUtility(), DrawUtility()}; + } + + if (IsRepetitionDraw()) { + return std::vector{DrawUtility(), DrawUtility()}; + } + // Compute and cache the legal actions. + MaybeGenerateLegalActions(); + SPIEL_CHECK_TRUE(cached_legal_actions_); + const bool have_legal_moves = !cached_legal_actions_->empty(); + + // If we don't have legal moves we are stalemated + if (!have_legal_moves) { + return std::vector{DrawUtility(), DrawUtility()}; + } + + if (Board().IrreversibleMoveCounter() >= kNumReversibleMovesToDraw) { + // This is theoretically a draw that needs to be claimed, but we implement + // it as a forced draw for now. + return std::vector{DrawUtility(), DrawUtility()}; + } + + return absl::nullopt; +} + +RbcGame::RbcGame(const GameParameters& params) + : Game(kGameType, params), + board_size_(ParameterValue("board_size")), + sense_size_(ParameterValue("sense_size")), + fen_(ParameterValue("fen", chess::DefaultFen(board_size_))) { + default_observer_ = std::make_shared(kDefaultObsType); +} + +std::shared_ptr RbcGame::MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const { + if (!params.empty()) SpielFatalError("Observation params not supported"); + IIGObservationType obs_type = iig_obs_type.value_or(kDefaultObsType); + if (ObserverHasString(obs_type) || ObserverHasTensor(obs_type)) { + return std::make_shared(obs_type); + } + return nullptr; +} + +} // namespace rbc +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/rbc/rbc.h b/scenarios/bargaining/open_spiel/open_spiel/games/rbc/rbc.h new file mode 100644 index 0000000..28f6f73 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/rbc/rbc.h @@ -0,0 +1,287 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_RBC_H_ +#define OPEN_SPIEL_GAMES_RBC_H_ + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/games/chess/chess.h" +#include "open_spiel/games/chess/chess_board.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// Reconnaisance Blind Chess - imperfect information version of chess, where +// players do not see the full board, but they can make explicit "sensing" moves +// to reveal specific parts of the board. +// +// Specifically, based on [1,2] the implementation follow these rules: +// +// - The rules of Standard Chess apply, with the exceptions and modifications +// that follow. +// - A player cannot see where her opponent's pieces are. +// - Prior to making each move a player selects a 3 x 3 square of the chess +// board. She learns of all pieces and their types within that square. The +// opponent is not informed about where she sensed. +// - If a player captures a piece, she is informed that she made a capture (but +// she is not informed about what she captured). +// - If a player's piece is captured, she is informed that her piece on the +// relevant square was captured (but she is not informed about what captured +// it). +// - There is no notion of check or mate (since neither player may be aware of +// any check relationship). +// - A player wins by capturing the opponent's king. +// - If a player tries to move a sliding piece through an opponent's piece, the +// opponent's piece is captured and the moved piece is stopped where the +// capture occurred. The moving player is notified of the square where her +// piece landed, and both players are notified of the capture as stated above. +// - If a player attempts to make an illegal pawn attack or pawn forward-move or +// castle, she is notified that her move did not succeed and her move is over. +// Castling through check is allowed, however, as the notion of check is +// removed. +// - There is a "pass" move, where a player can move nothing. +// +// There are some differences to the original rules [1,2]: +// +// > "All rules associated with stalemates or automatic draw conditions are +// > eliminated" +// +// Automatic draws are made after the same board repeats itself 3 times +// in the game (kNumRepetitionsToDraw) or after a large number of moves +// (kNumReversibleMovesToDraw). This is to make sure that random play +// in the game would stop after a limited number of steps. +// +// > "Turn phase: [..] if the opponent captured a piece on their turn, +// > the current player is given the capture square" +// +// Turn phase is eliminated: all the capture information can be deduced as the +// difference of player's pieces observations in the sensing phase. +// +// > "Sense phase: the player chooses any square on the chessboard to target +// > their sensor. Then, the true state of the game board in a three square by +// > three square window centered at the chosen square is revealed to the +// > sensing player." +// +// Sensing is done as picking a sensing window that fully fits within the +// chessboard and is not centered on an underlying square. The centering can +// make the sensing window smaller (as a rectangle near the border of the +// chessboard) which gives strictly less information than a better placed window +// (that remains a full square). This modification eliminates strategically +// useless sensing actions. +// +// > "If the move was modified [..] then the modified move is made, and +// > the current player is notified of the modified move in the move results." +// +// All the modifications can be deduced through the observations provided +// by the observation tensors or strings and are not given explicitly. +// +// [1] https://rbc.jhuapl.edu/gameRules and +// [2] https://reconchess.readthedocs.io/en/latest/rules.html +// +// Parameters: +// "board_size" int Number of squares in each row and column (default: 8) +// "sense_size" int Size of the sensing square. +// "fen" string String describing the chess board position in +// Forsyth-Edwards Notation. The FEN has to match +// the board size. Default values are available for +// board sizes 4 and 8. + +namespace open_spiel { +namespace rbc { + +// Constants. +inline constexpr int NumPlayers() { return 2; } +inline constexpr double LossUtility() { return -1; } +inline constexpr double DrawUtility() { return 0; } +inline constexpr double WinUtility() { return 1; } + +// See action encoding below. +inline constexpr int NumDistinctActions() { return 4672; } + +// https://math.stackexchange.com/questions/194008/how-many-turns-can-a-chess-game-take-at-maximum +inline constexpr int MaxGameLength() { return 17695; } + +class RbcGame; +class RbcObserver; + +// What kind of move is the current player making? +enum class MovePhase { + kSensing, // First sense. + kMoving, // Then make a regular move. +}; +// Special value if sense location is not specified (beginning of the game, +// or if we want to hide the sensing results when opponent is moving). +constexpr int kSenseLocationNonSpecified = -1; + +// State of an in-play game. +class RbcState : public State { + public: + // Constructs a chess state at the given position in Forsyth-Edwards Notation. + // https://en.wikipedia.org/wiki/Forsyth%E2%80%93Edwards_Notation + RbcState(std::shared_ptr game, int board_size, + const std::string& fen); + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : ColorToPlayer(Board().ToPlay()); + } + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + + bool IsTerminal() const override { + return static_cast(MaybeFinalReturns()); + } + + std::vector Returns() const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action action) override; + + // Current board. + chess::ChessBoard& Board() { return current_board_; } + const chess::ChessBoard& Board() const { return current_board_; } + int BoardSize() const { return current_board_.BoardSize(); } + + // Starting board. + chess::ChessBoard& StartBoard() { return start_board_; } + const chess::ChessBoard& StartBoard() const { return start_board_; } + + std::vector& MovesHistory() { return moves_history_; } + const std::vector& MovesHistory() const { + return moves_history_; + } + + // Draw can be claimed under the FIDE 3-fold repetition rule (the current + // board position has already appeared twice in the history). + bool IsRepetitionDraw() const; + + const RbcGame* game() const { + return open_spiel::down_cast(game_.get()); + } + + MovePhase phase() const { return phase_; } + const std::array& sense_location() const { return sense_location_; } + bool move_captured() const { return move_captured_; } + bool illegal_move_attempted() const { return illegal_move_attempted_; } + + protected: + void DoApplyAction(Action action) override; + + private: + friend class RbcObserver; + + // Calculates legal actions and caches them. This is separate from + // LegalActions() as there are a number of other methods that need the value + // of LegalActions. This is a separate method as it's called from + // IsTerminal(), which is also called by LegalActions(). + void MaybeGenerateLegalActions() const; + + absl::optional> MaybeFinalReturns() const; + + // We have to store every move made to check for repetitions and to implement + // undo. We store the current board position as an optimization. + std::vector moves_history_; + // We store the start board for history to support games not starting + // from the start position. + chess::ChessBoard start_board_; + // We store the current board position as an optimization. + chess::ChessBoard current_board_; + // How to interpret current actions. + MovePhase phase_; + // Which place was the last sensing made at? (for each player). + // See also RbcGame::inner_size() + std::array sense_location_ = {kSenseLocationNonSpecified, + kSenseLocationNonSpecified}; + bool move_captured_ = false; + bool illegal_move_attempted_ = false; + + // RepetitionTable records how many times the given hash exists in the history + // stack (including the current board). + // We are already indexing by board hash, so there is no need to hash that + // hash again, so we use a custom passthrough hasher. + class PassthroughHash { + public: + std::size_t operator()(uint64_t x) const { + return static_cast(x); + } + }; + using RepetitionTable = absl::flat_hash_map; + RepetitionTable repetitions_; + mutable absl::optional> cached_legal_actions_; +}; + +// Game object. +class RbcGame : public Game { + public: + explicit RbcGame(const GameParameters& params); + int NumDistinctActions() const override { + return chess::NumDistinctActions(); + } + std::unique_ptr NewInitialState() const override { + return absl::make_unique(shared_from_this(), board_size_, fen_); + } + std::unique_ptr NewInitialState( + const std::string& fen) const override { + return absl::make_unique(shared_from_this(), board_size_, fen); + } + int NumPlayers() const override { return chess::NumPlayers(); } + double MinUtility() const override { return LossUtility(); } + absl::optional UtilitySum() const override { return DrawUtility(); } + double MaxUtility() const override { return WinUtility(); } + std::vector ObservationTensorShape() const override { + std::vector shape{ + 17 * 2 // public: Num of pieces for both sides + + 2 // public: Phase + + 2 // public: Illegal move + + 2 // public: Capture + + 2 // public: Side to play + + 2 * 2 // private: left/right castling rights, one-hot encoded. + + (6 * 2) // private: board + sensing: 6 piece types + * board_size_ * board_size_}; + return shape; + } + int MaxGameLength() const override { return chess::MaxGameLength(); } + std::shared_ptr MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const; + + std::shared_ptr default_observer_; + + int board_size() const { return board_size_; } + int sense_size() const { return sense_size_; } + // Sensing is done only within the board, as it makes no sense for + // any player to do non-efficient sensing that goes outside of the board. + // The sense location is encoded as coordinates within this smaller + // inner square. + int inner_size() const { return board_size_ - sense_size_ + 1; } + + private: + const int board_size_; + const int sense_size_; + const std::string fen_; +}; + +} // namespace rbc +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_RBC_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/rbc/rbc_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/rbc/rbc_test.cc new file mode 100644 index 0000000..1289770 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/rbc/rbc_test.cc @@ -0,0 +1,205 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/rbc/rbc.h" + +#include "open_spiel/games/chess/chess.h" +#include "open_spiel/games/chess/chess_board.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace rbc { +namespace { + +namespace testing = open_spiel::testing; + +void TestPassMove() { + auto game = LoadGame("rbc"); + std::unique_ptr s = game->NewInitialState(); + SPIEL_CHECK_EQ(s->ToString(), chess::kDefaultStandardFEN); + + // First move + SPIEL_CHECK_EQ(s->ActionToString(Player{0}, 0), "Sense a1"); + SPIEL_CHECK_EQ(s->StringToAction("Sense a1"), 0); + s->ApplyAction(0); // Sense phase + SPIEL_CHECK_EQ(s->ActionToString(Player{0}, chess::kPassAction), "pass"); + SPIEL_CHECK_EQ(s->StringToAction("pass"), chess::kPassAction); + s->ApplyAction(chess::kPassAction); // Move phase + std::string black_fen = chess::kDefaultStandardFEN; + std::replace(black_fen.begin(), black_fen.end(), 'w', 'b'); // Switch sides. + SPIEL_CHECK_EQ(s->ToString(), black_fen); + + // Second move + SPIEL_CHECK_EQ(s->ActionToString(Player{1}, 0), "Sense a1"); + s->ApplyAction(0); // Sense phase + SPIEL_CHECK_EQ(s->ActionToString(Player{1}, chess::kPassAction), "pass"); + s->ApplyAction(chess::kPassAction); // Move phase + std::string white_fen = chess::kDefaultStandardFEN; + std::replace(white_fen.begin(), white_fen.end(), '1', '2'); // Update clock. + SPIEL_CHECK_EQ(s->ToString(), white_fen); +} + +void TestRepetitionDraw() { + auto game = LoadGame("rbc"); + auto state = game->NewInitialState(); + auto rbc_state = down_cast(state.get()); + for (int i = 0; i < 2 * 2; ++i) { // 2 players, 2 repetitions. + SPIEL_CHECK_FALSE(state->IsTerminal()); + SPIEL_CHECK_FALSE(rbc_state->IsRepetitionDraw()); + state->ApplyAction(state->StringToAction("Sense a1")); + state->ApplyAction(chess::kPassAction); + } + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_TRUE(rbc_state->IsRepetitionDraw()); +} + +// Helper function. Get the named tensor. +SpanTensor GetSpanTensor(Observation& obs, const std::string& name) { + for (SpanTensor span_tensor : obs.tensors()) { + if (span_tensor.info().name() == name) return span_tensor; + } + SpielFatalError(absl::StrCat("SpanTensor '", name, "' was not found!")); +} + +void TestIllegalMovesFlag() { + // Setup test code. + auto game = LoadGame("rbc"); + Observation observation(*game, game->MakeObserver(kDefaultObsType, {})); + SpanTensor illegal_move_span = GetSpanTensor(observation, "illegal_move"); + SPIEL_CHECK_EQ(illegal_move_span.info().size(), 2); // Binary observation. + auto CHECK_OBSERVATION = [&](const State& s, bool illegal) { + observation.SetFrom(s, kDefaultPlayerId); + SPIEL_CHECK_EQ(illegal_move_span.at(0), !illegal); + SPIEL_CHECK_EQ(illegal_move_span.at(1), illegal); + }; + + { // No move has been made. + auto state = game->NewInitialState(); + CHECK_OBSERVATION(*state, /*illegal=*/false); + } + { // Legal pawn move. + auto state = game->NewInitialState(); + state->ApplyAction(state->StringToAction("Sense a1")); + state->ApplyAction(state->StringToAction("a2a4")); + CHECK_OBSERVATION(*state, /*illegal=*/false); + } + { // Illegal pawn attack. + auto state = game->NewInitialState(); + state->ApplyAction(state->StringToAction("Sense a1")); + state->ApplyAction(state->StringToAction("a2b3")); + CHECK_OBSERVATION(*state, /*illegal=*/true); + } + { // Illegal pawn forward move. + auto state = game->NewInitialState(); + state->ApplyAction(state->StringToAction("Sense a1")); + state->ApplyAction(state->StringToAction("a2a4")); + CHECK_OBSERVATION(*state, /*illegal=*/false); + state->ApplyAction(state->StringToAction("Sense a1")); + state->ApplyAction(state->StringToAction("a7a5")); + CHECK_OBSERVATION(*state, /*illegal=*/false); + state->ApplyAction(state->StringToAction("Sense a1")); + state->ApplyAction(state->StringToAction("a4a5")); + CHECK_OBSERVATION(*state, /*illegal=*/true); + } + { // Allow castling when king is in check. + auto state = game->NewInitialState( + "rnbqkb1r/pppppp1p/6p1/8/8/3n1NPB/PPP1PP1P/RNBQK2R w KQkq - 0 1"); + state->ApplyAction(state->StringToAction("Sense a1")); + state->ApplyAction(state->StringToAction("e1g1")); + CHECK_OBSERVATION(*state, /*illegal=*/false); + } + { // Allow castling through a square controlled by the enemy. + auto state = game->NewInitialState( + "rnbqkb1r/pppppp1p/6p1/8/8/5NPB/PPPnPP1P/RNBQK2R w KQkq - 0 1"); + state->ApplyAction(state->StringToAction("Sense a1")); + state->ApplyAction(state->StringToAction("e1g1")); + CHECK_OBSERVATION(*state, /*illegal=*/false); + } + { // Allow castling when king will be in check after castling. + auto state = game->NewInitialState( + "rnbqkb1r/pppppp1p/6p1/8/8/5nPB/PPP1PP1P/RNBQK2R w KQkq - 0 1"); + state->ApplyAction(state->StringToAction("Sense a1")); + state->ApplyAction(state->StringToAction("e1g1")); + CHECK_OBSERVATION(*state, /*illegal=*/false); + } + { // Illegal castling: + // There is an opponent piece between the king and the rook. + auto state = game->NewInitialState( + "rnbqkb1r/pppppp1p/6p1/8/8/6PB/PPP1PP1P/RNBQK1nR w KQkq - 0 1"); + state->ApplyAction(state->StringToAction("Sense a1")); + state->ApplyAction(state->StringToAction("e1g1")); + CHECK_OBSERVATION(*state, /*illegal=*/true); + } +} + +void TestKingCaptureEndsTheGame() { + auto game = LoadGame("rbc"); + auto state = game->NewInitialState( + "rnbqk1nr/pppp1ppp/4p3/8/4P3/3P1Pb1/PPP3PP/RNBQKBNR b KQkq - 0 1"); + SPIEL_CHECK_FALSE(state->IsTerminal()); + state->ApplyAction(state->StringToAction("Sense a1")); + state->ApplyAction(state->StringToAction("g3e1")); + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReturn(Player{0}), 1); +} + +void TestMakeKingMoveInCheck() { + // Make a move that leaves the king in check. + auto game = LoadGame("rbc"); + auto state = game->NewInitialState( + "rnbqk1nr/pppp1ppp/4p3/8/4P3/3P1Pb1/PPP3PP/RNBQKBNR w KQkq - 0 1"); + state->ApplyAction(state->StringToAction("Sense a1")); + state->ApplyAction(state->StringToAction("e1f2")); +} + +void TestPawnBreachingMoveTwoSquares() { + auto game = LoadGame("rbc"); + auto state = game->NewInitialState( + "rnbqk1nr/pppp1ppp/4p3/8/1b6/4P3/PPPP1PPP/RNBQKBNR w KQkq - 0 1"); + state->ApplyAction(state->StringToAction("Sense a1")); + state->ApplyAction(state->StringToAction("b2b4")); + // Pawn moved only one square. + SPIEL_CHECK_EQ( + state->ToString(), + "rnbqk1nr/pppp1ppp/4p3/8/1b6/1P2P3/P1PP1PPP/RNBQKBNR b KQkq - 0 1"); + // And the move was marked as illegal. + SPIEL_CHECK_TRUE(down_cast(state.get())->illegal_move_attempted()); +} + +void BasicRbcTests(int board_size) { + GameParameters params; + params["board_size"] = GameParameter(board_size); + + testing::LoadGameTest("rbc"); + testing::NoChanceOutcomesTest(*LoadGame("rbc", params)); + testing::RandomSimTest(*LoadGame("rbc", params), 100); + testing::RandomSimTestWithUndo(*LoadGame("rbc", params), 1); +} + +} // namespace +} // namespace rbc +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::rbc::TestPassMove(); + open_spiel::rbc::TestRepetitionDraw(); + open_spiel::rbc::TestIllegalMovesFlag(); + open_spiel::rbc::TestKingCaptureEndsTheGame(); + open_spiel::rbc::TestMakeKingMoveInCheck(); + open_spiel::rbc::TestPawnBreachingMoveTwoSquares(); + + open_spiel::rbc::BasicRbcTests(/*board_size=*/8); + open_spiel::rbc::BasicRbcTests(/*board_size=*/4); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/sheriff/sheriff.cc b/scenarios/bargaining/open_spiel/open_spiel/games/sheriff/sheriff.cc new file mode 100644 index 0000000..b5f38a1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/sheriff/sheriff.cc @@ -0,0 +1,456 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/sheriff/sheriff.h" + +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace sheriff { +inline constexpr const Player kSmuggler = Player{0}; +inline constexpr const Player kSheriff = Player{1}; + +namespace { +// Facts about the game +const GameType kGameType{ + /* short_name = */ "sheriff", + /* long_name = */ "Sheriff", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /* max_num_players = */ 2, + /* min_num_players = */ 2, + /* provides_information_state_string = */ true, + /* provides_information_state_tensor = */ true, + /* provides_observation_string = */ false, + /* provides_observation_tensor = */ false, + /* parameter_specification = */ + {{"item_penalty", GameParameter(kDefaultItemPenalty)}, + {"item_value", GameParameter(kDefaultItemValue)}, + {"sheriff_penalty", GameParameter(kDefaultSheriffPenalty)}, + {"max_bribe", GameParameter(kDefaultMaxBribe)}, + {"max_items", GameParameter(kDefaultMaxItems)}, + {"num_rounds", GameParameter(kDefaultNumRounds)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::make_shared(params); +} +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +template +void StrAppendVector(std::string* s, const std::vector& v) { + absl::StrAppend(s, "["); + for (size_t index = 0; index < v.size(); ++index) { + if (index > 0) absl::StrAppend(s, ","); + absl::StrAppend(s, v[index]); + } + absl::StrAppend(s, "]"); +} +} // namespace + +SheriffState::SheriffState( + const std::shared_ptr sheriff_game) + : State(sheriff_game), sheriff_game_(sheriff_game) {} + +Player SheriffState::CurrentPlayer() const { + if (!num_illegal_items_) { + // The smuggler still hasn't decided the number of illegal items to + // place in the cargo. The game has just begun. + return kSmuggler; + } else if (bribes_.size() == inspection_feedback_.size()) { + // The smuggler has received feedback for all the bribes. + // + // If the number of bribes is equal to the number of bribing turns in the + // game, the game is over. + if (bribes_.size() == sheriff_game_->conf.num_rounds) { + return kTerminalPlayerId; + } else { + // Otherwise, a new bribing round begins. + return kSmuggler; + } + } else { + // The smuggles has made a bribe, but no feedback has been given out yet. + return kSheriff; + } +} + +std::vector SheriffState::LegalActions() const { + const SheriffGame::SheriffGameConfiguration& conf = sheriff_game_->conf; + + std::vector action_ids; + if (IsTerminal()) { + return {}; + } else if (!num_illegal_items_) { + // This is the beginning of the game. The smuggles must decide how many + // illegal items to place in the cargo, which must be an integer in the + // range [0, conf.max_items]. The action id will correspond to the number + // of illegal items placed in the cargo. + + action_ids.reserve(conf.max_items + 1); + for (uint32_t num_illegal_items = 0; num_illegal_items <= conf.max_items; + ++num_illegal_items) { + action_ids.push_back( + sheriff_game_->SerializeItemPlacementAction(num_illegal_items)); + } + } else { + // If we are here, we are inside of a bribing round. There are two cases: + // - it is the *smuggler's* turn. This means that the bribing round has + // just started. The actions that the player can use correspond to the + // set of valid bribes, which is the range [0, conf.max_bribe]. + // + // The action id corresponds to the bribe amount. + // - it is the *sheriff's* turn. The sheriff can decide to say they will + // _not_ inspect (action id: 0), or that they _will_ inspect the cargo + // (action id: 1). + const Player player = CurrentPlayer(); + + if (player == kSmuggler) { + action_ids.reserve(conf.max_bribe + 1); + for (uint32_t bribe = 0; bribe <= conf.max_bribe; ++bribe) { + action_ids.push_back(sheriff_game_->SerializeBribe(bribe)); + } + } else { + action_ids = {sheriff_game_->SerializeInspectionFeedback(false), + sheriff_game_->SerializeInspectionFeedback(true)}; + } + } + + return action_ids; +} + +std::string SheriffState::ActionToString(Player player, + Action action_id) const { + return sheriff_game_->ActionToString(player, action_id); +} + +std::string SheriffState::ToString() const { + if (!num_illegal_items_) { + return "Initial game state (smuggler hasn't decided the number of illegal " + "cargo items yet)"; + } else { + std::string state_str; + + absl::StrAppend(&state_str, + "Num illegal items in cargo: ", *num_illegal_items_, "\n"); + absl::StrAppend(&state_str, "Bribes : "); + StrAppendVector(&state_str, bribes_); + absl::StrAppend(&state_str, "\nFeedback: "); + StrAppendVector(&state_str, inspection_feedback_); + + return state_str; + } +} + +bool SheriffState::IsTerminal() const { + return CurrentPlayer() == kTerminalPlayerId; +} + +std::vector SheriffState::Returns() const { + if (!IsTerminal()) { + return {0.0, 0.0}; + } else { + SPIEL_CHECK_EQ(inspection_feedback_.size(), bribes_.size()); + SPIEL_CHECK_GT(inspection_feedback_.size(), 0); + SPIEL_CHECK_TRUE(num_illegal_items_); + + const uint32_t num_illegal_items = *num_illegal_items_; + const uint32_t bribe = bribes_.back(); + const bool sheriff_inspects = inspection_feedback_.back(); + const SheriffGame::SheriffGameConfiguration& conf = sheriff_game_->conf; + + if (sheriff_inspects) { + if (num_illegal_items > 0) { + // The smuggler was caught red-handed. + return {-static_cast(num_illegal_items) * conf.item_penalty, + static_cast(num_illegal_items) * conf.item_penalty}; + } else { + // The sheriff must pay up for inspecting a legal cargo. + return {conf.sheriff_penalty, -conf.sheriff_penalty}; + } + } else { + return {static_cast(num_illegal_items) * conf.item_value - bribe, + static_cast(bribe)}; + } + } +} + +std::unique_ptr SheriffState::Clone() const { + return std::make_unique(*this); +} + +std::string SheriffState::InformationStateString(Player player) const { + SPIEL_CHECK_TRUE(player >= 0 && player < NumPlayers()); + + std::string infostring = absl::StrCat("T=", MoveNumber(), " "); + if (player == kSmuggler) { + absl::StrAppend(&infostring, "num_illegal_items:"); + if (num_illegal_items_) { + absl::StrAppend(&infostring, *num_illegal_items_); + } else { + absl::StrAppend(&infostring, "none"); + } + } + + SPIEL_CHECK_GE(inspection_feedback_.size() + 1, bribes_.size()); + SPIEL_CHECK_LE(inspection_feedback_.size(), bribes_.size()); + for (size_t index = 0; index < bribes_.size(); ++index) { + absl::StrAppend(&infostring, "/bribe:", bribes_.at(index)); + + if (index < inspection_feedback_.size()) { + absl::StrAppend(&infostring, + "/feedback:", inspection_feedback_.at(index)); + } + } + + return infostring; +} + +std::vector SheriffGame::InformationStateTensorShape() const { + return { + 2 + // Whose turn? + 2 + // Who is observing? + static_cast(conf.num_rounds) + 1 + // Move number (0 to rounds) + static_cast(conf.max_items) + 1 + // Number of items (0 to max) + // Each round, a bribe in { 0, 1, ..., max_bribe } plus one bit for yes/no + static_cast(conf.num_rounds) * + (static_cast(conf.max_bribe) + 1 + 1) + }; +} + +void SheriffState::InformationStateTensor( + Player player, absl::Span values) const { + SPIEL_CHECK_TRUE(player >= 0 && player < NumPlayers()); + + SPIEL_CHECK_EQ(values.size(), game_->InformationStateTensorSize()); + std::fill(values.begin(), values.end(), 0); + + // Two-player game. + SPIEL_CHECK_TRUE(player == 0 || player == 1); + + int offset = 0; + const int num_players = game_->NumPlayers(); + const Player cur_player = CurrentPlayer(); + const auto* parent_game = down_cast(game_.get()); + + // Set a bit to indicate whose turn it is. + if (cur_player != kTerminalPlayerId) { + values[cur_player] = 1; + } + offset += num_players; + + // Set a bit to indicate whose is observing + values[offset + player] = 1; + offset += num_players; + + // Move number + values[offset + MoveNumber()] = 1; + offset += parent_game->num_rounds() + 1; + + // Number of items chosen by the smuggler + if (player == kSmuggler) { + int index = (num_illegal_items_ ? *num_illegal_items_ : 0); + values[offset + index] = 1; + } + offset += parent_game->max_items() + 1; + + SPIEL_CHECK_GE(inspection_feedback_.size() + 1, bribes_.size()); + SPIEL_CHECK_LE(inspection_feedback_.size(), bribes_.size()); + for (size_t index = 0; index < bribes_.size(); ++index) { + int inner_offset = index * (parent_game->max_bribe() + 2); + values[offset + inner_offset + bribes_.at(index)] = 1; + + if (index < inspection_feedback_.size()) { + int bool_bit = inspection_feedback_.at(index) ? 0 : 1; + values[offset + inner_offset + parent_game->max_bribe() + 1] = bool_bit; + } + } + offset += parent_game->num_rounds() * (parent_game->max_bribe() + 2); + + SPIEL_CHECK_EQ(offset, values.size()); +} + +void SheriffState::UndoAction(Player player, Action action_id) { + SPIEL_CHECK_TRUE(!history_.empty() && + (history_.back() == PlayerAction{player, action_id})); + history_.pop_back(); + --move_number_; + + if (!bribes_.empty()) { + if (bribes_.size() == inspection_feedback_.size()) { + // The last action must have been for the sheriff to return feedback about + // whether or not the sheriff would inspect the cargo. + inspection_feedback_.pop_back(); + } else { + // The last action must have been for the smuggler to offer a new bribe. + bribes_.pop_back(); + } + } else { + // If there are no bribes yet, then the only possibility is that the game + // has just started and the only action so far was for the smuggler to + // select the number of illegal items to place into the smuggler's cargo. + SPIEL_CHECK_TRUE(num_illegal_items_); + num_illegal_items_ = absl::nullopt; + } +} + +void SheriffState::DoApplyAction(Action action_id) { + SPIEL_CHECK_FALSE(IsTerminal()); + + if (!num_illegal_items_) { + // The action must represent the selection of the number of illegal items in + // the cargo. + + SPIEL_CHECK_EQ(CurrentPlayer(), kSmuggler); + num_illegal_items_ = + sheriff_game_->DeserializeItemPlacementAction(action_id); + } else if (bribes_.size() == inspection_feedback_.size()) { + // The action must represent a new bribe made by the smuggler. + SPIEL_CHECK_EQ(CurrentPlayer(), kSmuggler); + bribes_.push_back(sheriff_game_->DeserializeBribe(action_id)); + } else { + // The action must represent the inspection feedback returned by the + // sheriff. + SPIEL_CHECK_EQ(CurrentPlayer(), kSheriff); + inspection_feedback_.push_back( + sheriff_game_->DeserializeInspectionFeedback(action_id)); + } +} + +SheriffGame::SheriffGame(const GameParameters& params) + : Game(kGameType, params) { + conf.item_penalty = ParameterValue("item_penalty"); + SPIEL_CHECK_GE(conf.item_penalty, 0.0); + + conf.item_value = ParameterValue("item_value"); + SPIEL_CHECK_GE(conf.item_value, 0.0); + + conf.sheriff_penalty = ParameterValue("sheriff_penalty"); + SPIEL_CHECK_GE(conf.sheriff_penalty, 0.0); + + conf.max_bribe = ParameterValue("max_bribe"); + SPIEL_CHECK_GE(conf.max_bribe, 0); + + conf.max_items = ParameterValue("max_items"); + SPIEL_CHECK_GE(conf.max_items, 1); + + conf.num_rounds = ParameterValue("num_rounds"); + SPIEL_CHECK_GE(conf.num_rounds, 1); +} + +int SheriffGame::NumDistinctActions() const { + return 4 + conf.max_items + conf.max_bribe; +} + +std::unique_ptr SheriffGame::NewInitialState() const { + const auto ptr = + std::dynamic_pointer_cast(shared_from_this()); + return std::make_unique(ptr); +} + +double SheriffGame::MinUtility() const { + return std::min({-static_cast(conf.max_items) * conf.item_penalty, + -static_cast(conf.max_bribe), + -conf.sheriff_penalty}); +} + +double SheriffGame::MaxUtility() const { + return std::max({conf.sheriff_penalty, static_cast(conf.max_bribe), + static_cast(conf.max_items) * conf.item_value, + static_cast(conf.max_items) * conf.item_penalty}); +} + +int SheriffGame::MaxGameLength() const { return 2 * conf.num_rounds + 1; } + +std::string SheriffGame::ActionToString(Player player, Action action_id) const { + std::string action_string; + + if (action_id < 2) { + SPIEL_CHECK_EQ(player, kSheriff); + const bool feedback = DeserializeInspectionFeedback(action_id); + if (!feedback) { + action_string = "InspectionFeedback(will_inspect=False)"; + } else { + action_string = "InspectionFeedback(will_inspect=True)"; + } + } else if (action_id < 3 + conf.max_items) { + SPIEL_CHECK_EQ(player, kSmuggler); + + const uint32_t num_illegal_items = + DeserializeItemPlacementAction(action_id); + absl::StrAppend(&action_string, "PlaceIllegalItems(num=", num_illegal_items, + ")"); + } else { + SPIEL_CHECK_EQ(player, kSmuggler); + + const uint32_t bribe = DeserializeBribe(action_id); + absl::StrAppend(&action_string, "Bribe(amount=", bribe, ")"); + } + + return action_string; +} + +Action SheriffGame::SerializeItemPlacementAction( + const uint32_t num_illegal_items) const { + SPIEL_CHECK_LE(num_illegal_items, conf.max_items); + return 2 + num_illegal_items; +} + +Action SheriffGame::SerializeBribe(const uint32_t bribe) const { + SPIEL_CHECK_LE(bribe, conf.max_bribe); + return 3 + conf.max_items + bribe; +} + +Action SheriffGame::SerializeInspectionFeedback(const bool feedback) const { + if (!feedback) { + return 0; + } else { + return 1; + } +} + +uint32_t SheriffGame::DeserializeItemPlacementAction( + const Action action_id) const { + SPIEL_CHECK_GE(action_id, 2); + SPIEL_CHECK_LE(action_id, 2 + conf.max_items); + + return action_id - 2; +} + +uint32_t SheriffGame::DeserializeBribe(const Action action_id) const { + SPIEL_CHECK_GE(action_id, 3 + conf.max_items); + SPIEL_CHECK_LE(action_id, 3 + conf.max_items + conf.max_bribe); + + return action_id - 3 - conf.max_items; +} + +bool SheriffGame::DeserializeInspectionFeedback(const Action action_id) const { + SPIEL_CHECK_TRUE(action_id == 0 || action_id == 1); + + if (action_id == 0) { + return false; + } else { + return true; + } +} + +} // namespace sheriff +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/sheriff/sheriff.h b/scenarios/bargaining/open_spiel/open_spiel/games/sheriff/sheriff.h new file mode 100644 index 0000000..57b5a5b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/sheriff/sheriff.h @@ -0,0 +1,218 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This game is a simplified version of the Sheriff of Nottingham board +// game, as introduced in [1]. +// +// Game dynamics +// ============= +// +// Player 1 (the "smuggler") selects the number of `num_items` (0 or more) +// illegal items to be placed in the cargo. The selected number is unknown to +// Player 2 (the "sheriff"). +// +// Then, the game proceeds for `num_rounds` bargaining rounds. In each round, +// the following happens: +// +// - The smuggler selects an integer bribe amount, in the range 0 to `max_bribe` +// (inclusive). The selected amount is public information. However, the +// smuggler does *not* actually give money to the sheriff, unless this is the +// final round. +// - Then, the sheriff tells the smuggler whether he is planning to inspect the +// cargo. However, no cargo is actually inspected other than in the final +// round. The sheriff can change his mind in later rounds, except for the +// final round. +// +// Payoff computation +// ------------------ +// +// At the end of the game, the payoffs of the players are computed as follows: +// +// - If the sheriff did *not* inspect the cargo, the smuggler gains a payoff +// equal to `num_items * item_value - bribe_amount`, and the sheriff gets a +// payoff equal to `bribe_amount`, where `bribe_amount` is the *last* bribe +// amount. +// - If the sheriff inspects the cargo, and no illegal items were present, the +// smuggler gains a payoff equal to `sheriff_penalty`, while the sheriff loses +// `sheriff_penalty` value. +// - Finally, if the sheriff inspects the cargo and finds `num_items` (1 or +// more) illegal items, the smuggler loses a total value computed as +// `-num_item * item_penalty`, while the sheriff gains value `num_items * +// item_penalty`. +// +// +// Game size +// --------- +// +// +-------+-------+--------+-----------------+----------------+----------+ +// | Max | Max | Num | Num sequences | Num infosets | Terminal | +// | bribe | items | rounds | pl 0 | pl 1 | pl 0 | pl 1 | states | +// +-------+-------+--------+--------+--------+-------+--------+----------+ +// | 3 | 3 | 1 | 21 | 0 | 5 | 4 | 32 | +// | 3 | 5 | 2 | 223 | 73 | 55 | 36 | 384 | +// | 3 | 3 | 3 | 1173 | 585 | 293 | 292 | 2048 | +// | 3 | 5 | 4 | 14047 | 4681 | 3511 | 2340 | 24576 | +// +-------+-------+--------+--------+--------+-------+--------+----------+ +// | 5 | 3 | 1 | 29 | 13 | 5 | 6 | 48 | +// | 5 | 3 | 2 | 317 | 157 | 53 | 78 | 576 | +// | 5 | 5 | 3 | 5659 | 1885 | 943 | 942 | 10368 | +// +-------+-------+--------+--------+--------+-------+--------+----------+ +// +// +// +// Game parameters +// =============== +// +// "item_penalty" double Penalty (per item) incurred by the smuggler +// for carrying illegal goods (default = 2.0) +// "item_value" double Value of each successfully smuggled item +// (default = 1.0) +// "sheriff_penalty" double Sheriff's penalty for inspecting a cargo +// that does not contain illegal items +// (default = 3.0) +// "max_bribe" int Maximum bribe amount, per round +// (default = 3) +// "max_items" int Maximum numbers of items that fit the cargo +// (default = 3) +// "num_rounds" int Number of bargaining rounds (default = 4) +// +// References +// ========== +// +// If you want to reference the paper that introduced the benchmark game, here +// is a Bibtex citation: +// +// ``` +// @inproceedings{Farina19:Correlation, +// title= {Correlation in Extensive-Form Games: Saddle-Point Formulation +// and Benchmarks}, +// author= {Farina, Gabriele and Ling, Chun Kai and Fang, Fei and +// Sandholm, Tuomas}, +// booktitle={Conference on Neural Information Processing Systems +// (NeurIPS)}, +// year={2019} +// } +// ``` +// +// [1]: +// https://papers.nips.cc/paper/9122-correlation-in-extensive-form-games-saddle-point-formulation-and-benchmarks.pdf + +#ifndef OPEN_SPIEL_GAMES_SHERIFF_H_ +#define OPEN_SPIEL_GAMES_SHERIFF_H_ + +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace sheriff { + +inline constexpr double kDefaultItemPenalty = 2.0; +inline constexpr double kDefaultItemValue = 1.0; +inline constexpr double kDefaultSheriffPenalty = 3.0; +inline constexpr int kDefaultMaxBribe = 3; +inline constexpr int kDefaultMaxItems = 3; +inline constexpr int kDefaultNumRounds = 4; + +class SheriffGame final : public Game { + public: + explicit SheriffGame(const GameParameters& params); + + // Virtual functions inherited by OpenSpiel's `Game` interface + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override; + int MaxChanceOutcomes() const override { return 0; } + int NumPlayers() const override { return 2; } + double MinUtility() const override; + double MaxUtility() const override; + int MaxGameLength() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::vector InformationStateTensorShape() const override; + + // Information about the specific variant being played. + uint32_t num_rounds() const { return conf.num_rounds; } + uint32_t max_items() const { return conf.max_items; } + uint32_t max_bribe() const { return conf.max_bribe; } + + // Action (de)serialization routines + // ================================= + // + // The inspection feedback for the sheriff player is serialized to action ids + // 0 (= will not inspect) and 1 (= will inspect). All other actions belong to + // the smuggler player. Actions [2, 2 + num_items] correspond to placements + // of illegal items in the cargo (action id 2 means "0 illegal items placed + // in the cargo").Actions [3 + num_items, 4 + num_items + num_bribes] + // correspond to bribing actions (action 3 + num_items means that a bribe of + // 0 is selected. + + Action SerializeItemPlacementAction(uint32_t num_illegal_items) const; + Action SerializeBribe(uint32_t bribe) const; + Action SerializeInspectionFeedback(bool feedback) const; + + uint32_t DeserializeItemPlacementAction(Action action_id) const; + uint32_t DeserializeBribe(Action action_id) const; + bool DeserializeInspectionFeedback(Action action_id) const; + + // Members + // ======= + + struct SheriffGameConfiguration { + double item_penalty; + double item_value; + double sheriff_penalty; + + uint32_t max_items; + uint32_t max_bribe; + uint32_t num_rounds; + } conf; + + private: + std::shared_ptr sheriff_game_; +}; + +class SheriffState final : public State { + public: + explicit SheriffState(std::shared_ptr sheriff_game); + ~SheriffState() = default; + + // Virtual functions inherited by OpenSpiel's `State` interface + Player CurrentPlayer() const override; + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::unique_ptr Clone() const override; + std::string InformationStateString(Player player) const override; + void UndoAction(Player player, Action action_id) override; + void InformationStateTensor(Player player, + absl::Span values) const override; + + protected: + void DoApplyAction(Action action_id) override; + + private: + absl::optional num_illegal_items_; + std::vector bribes_; + std::vector inspection_feedback_; + + std::shared_ptr sheriff_game_; +}; + +} // namespace sheriff +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_SHERIFF_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/sheriff/sheriff_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/sheriff/sheriff_test.cc new file mode 100644 index 0000000..063cdf4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/sheriff/sheriff_test.cc @@ -0,0 +1,177 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/sheriff/sheriff.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_set.h" +#include "open_spiel/algorithms/expected_returns.h" +#include "open_spiel/algorithms/get_all_states.h" +#include "open_spiel/algorithms/tabular_exploitability.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace sheriff { +namespace { + +namespace testing = open_spiel::testing; + +void BasicSheriffTest() { + for (int num_rounds = 1; num_rounds <= 6; ++num_rounds) { + const std::shared_ptr game = + LoadGame("sheriff", {{"item_penalty", GameParameter(2.0)}, + {"item_value", GameParameter(1.5)}, + {"sheriff_penalty", GameParameter(3.14)}, + {"max_bribe", GameParameter(10)}, + {"max_items", GameParameter(10)}, + {"num_rounds", GameParameter(num_rounds)}}); + testing::RandomSimTestWithUndo(*game, 100); + testing::NoChanceOutcomesTest(*game); + } +} + +struct GameSize { + uint32_t num_sequences[2] = {0, 0}; // Layout: [Pl.0, Pl.1]. + uint32_t num_infostates[2] = {0, 0}; // Layout: [Pl.0, Pl.1]. + uint32_t num_terminal_states = 0; +}; + +GameSize ComputeGameSize(const std::shared_ptr game) { + std::map> all_states = + open_spiel::algorithms::GetAllStates( + *game, /* depth_limit = */ std::numeric_limits::max(), + /* include_terminals = */ true, + /* include_chance_states = */ false); + + GameSize size; + + // Account for empty sequence. + size.num_sequences[Player{0}] = 1; + size.num_sequences[Player{1}] = 1; + + absl::flat_hash_set infosets; + for (const auto& [_, state] : all_states) { + if (state->IsTerminal()) { + ++size.num_terminal_states; + } else { + const Player player = state->CurrentPlayer(); + SPIEL_CHECK_TRUE(player == Player{0} || player == Player{1}); + + // NOTE: there is no requirement that infostates strings be unique across + // players. So, we disambiguate the player by prepending it. + const std::string infostate_string = + absl::StrCat(player, state->InformationStateString()); + + if (infosets.insert(infostate_string).second) { + // The infostate string was not present in the hash set. We update the + // tally of infosets and sequences for the player. + size.num_infostates[player] += 1; + size.num_sequences[player] += state->LegalActions().size(); + } + } + } + + return size; +} + +void TestGameSizes() { + // We expect these game sizes: + // + // +-------+-------+--------+-----------------+----------------+----------+ + // | Max | Max | Num | Num sequences | Num infosets | Terminal | + // | bribe | items | rounds | pl 0 | pl 1 | pl 0 | pl 1 | states | + // +-------+-------+--------+--------+--------+-------+--------+----------+ + // | 3 | 3 | 1 | 21 | 9 | 5 | 4 | 32 | + // | 3 | 5 | 2 | 223 | 73 | 55 | 36 | 384 | + // | 3 | 3 | 3 | 1173 | 585 | 293 | 292 | 2048 | + // | 3 | 5 | 4 | 14047 | 4681 | 3511 | 2340 | 24576 | + // +-------+-------+--------+--------+--------+-------+--------+----------+ + // | 5 | 3 | 1 | 29 | 13 | 5 | 6 | 48 | + // | 5 | 3 | 2 | 317 | 157 | 53 | 78 | 576 | + // | 5 | 5 | 3 | 5659 | 1885 | 943 | 942 | 10368 | + // +-------+-------+--------+--------+--------+-------+--------+----------+ + + // To simplify the construction of game instance we introduce a lambda. + const auto ConstructInstance = + [](const uint32_t& max_bribe, const uint32_t max_items, + const uint32_t num_rounds) -> std::shared_ptr { + return LoadGame( + "sheriff", + {{"max_bribe", GameParameter(static_cast(max_bribe))}, + {"max_items", GameParameter(static_cast(max_items))}, + {"num_rounds", GameParameter(static_cast(num_rounds))}}); + }; + + GameSize size = ComputeGameSize(ConstructInstance(3, 3, 1)); + SPIEL_CHECK_EQ(size.num_sequences[Player{0}], 21); + SPIEL_CHECK_EQ(size.num_sequences[Player{1}], 9); + SPIEL_CHECK_EQ(size.num_infostates[Player{0}], 5); + SPIEL_CHECK_EQ(size.num_infostates[Player{1}], 4); + SPIEL_CHECK_EQ(size.num_terminal_states, 32); + + size = ComputeGameSize(ConstructInstance(3, 5, 2)); + SPIEL_CHECK_EQ(size.num_sequences[Player{0}], 223); + SPIEL_CHECK_EQ(size.num_sequences[Player{1}], 73); + SPIEL_CHECK_EQ(size.num_infostates[Player{0}], 55); + SPIEL_CHECK_EQ(size.num_infostates[Player{1}], 36); + SPIEL_CHECK_EQ(size.num_terminal_states, 384); + + size = ComputeGameSize(ConstructInstance(3, 3, 3)); + SPIEL_CHECK_EQ(size.num_sequences[Player{0}], 1173); + SPIEL_CHECK_EQ(size.num_sequences[Player{1}], 585); + SPIEL_CHECK_EQ(size.num_infostates[Player{0}], 293); + SPIEL_CHECK_EQ(size.num_infostates[Player{1}], 292); + SPIEL_CHECK_EQ(size.num_terminal_states, 2048); + + size = ComputeGameSize(ConstructInstance(3, 5, 4)); + SPIEL_CHECK_EQ(size.num_sequences[Player{0}], 14047); + SPIEL_CHECK_EQ(size.num_sequences[Player{1}], 4681); + SPIEL_CHECK_EQ(size.num_infostates[Player{0}], 3511); + SPIEL_CHECK_EQ(size.num_infostates[Player{1}], 2340); + SPIEL_CHECK_EQ(size.num_terminal_states, 24576); + + size = ComputeGameSize(ConstructInstance(5, 3, 1)); + SPIEL_CHECK_EQ(size.num_sequences[Player{0}], 29); + SPIEL_CHECK_EQ(size.num_sequences[Player{1}], 13); + SPIEL_CHECK_EQ(size.num_infostates[Player{0}], 5); + SPIEL_CHECK_EQ(size.num_infostates[Player{1}], 6); + SPIEL_CHECK_EQ(size.num_terminal_states, 48); + + size = ComputeGameSize(ConstructInstance(5, 3, 2)); + SPIEL_CHECK_EQ(size.num_sequences[Player{0}], 317); + SPIEL_CHECK_EQ(size.num_sequences[Player{1}], 157); + SPIEL_CHECK_EQ(size.num_infostates[Player{0}], 53); + SPIEL_CHECK_EQ(size.num_infostates[Player{1}], 78); + SPIEL_CHECK_EQ(size.num_terminal_states, 576); + + size = ComputeGameSize(ConstructInstance(5, 5, 3)); + SPIEL_CHECK_EQ(size.num_sequences[Player{0}], 5659); + SPIEL_CHECK_EQ(size.num_sequences[Player{1}], 1885); + SPIEL_CHECK_EQ(size.num_infostates[Player{0}], 943); + SPIEL_CHECK_EQ(size.num_infostates[Player{1}], 942); + SPIEL_CHECK_EQ(size.num_terminal_states, 10368); +} +} // namespace +} // namespace sheriff +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::testing::LoadGameTest("sheriff"); + open_spiel::sheriff::BasicSheriffTest(); + open_spiel::sheriff::TestGameSizes(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/skat/skat.cc b/scenarios/bargaining/open_spiel/open_spiel/games/skat/skat.cc new file mode 100644 index 0000000..9f2bd5c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/skat/skat.cc @@ -0,0 +1,801 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/skat/skat.h" + +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + + +namespace open_spiel { +namespace skat { +namespace { + +const GameType kGameType{/*short_name=*/"skat", + /*long_name=*/"Skat", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/3, + /*min_num_players=*/3, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new SkatGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + + +} // namespace + +Suit CardSuit(int card) { + return static_cast(card / 8); +} + +Rank CardRank(int card) { + return static_cast(card % 8); +} +const std::vector kCardSymbols = { + "🃇", "🃈", "🃉", "🃍", "🃎", "🃊", "🃁", "🃋", + "🂷", "🂸", "🂹", "🂽", "🂾", "🂺", "🂱", "🂻", + "🂧", "🂨", "🂩", "🂭", "🂮", "🂪", "🂡", "🂫", + "🃗", "🃘", "🃙", "🃝", "🃞", "🃚", "🃑", "🃛"}; + + +std::string ToCardSymbol(int card) { + if (card >= 0) { + return kCardSymbols.at(card); + } else { + return kEmptyCardSymbol; + } +} + +std::string SuitToString(Suit suit) { + switch (suit) { + case kDiamonds: + return "D"; + case kHearts: + return "H"; + case kSpades: + return "S"; + case kClubs: + return "C"; + default: + return "error"; + } +} + +std::string RankToString(Rank rank) { + switch (rank) { + case kSeven: + return "7"; + case kEight: + return "8"; + case kNine: + return "9"; + case kQueen: + return "Q"; + case kKing: + return "K"; + case kTen: + return "T"; + case kAce: + return "A"; + case kJack: + return "J"; + default: + return "error"; + } +} + +std::string PhaseToString(Phase phase) { + switch (phase) { + case kDeal: + return "dealing"; + case kBidding: + return "bidding"; + case kDiscardCards: + return "discarding cards"; + case kPlay: + return "playing"; + case kGameOver: + return "game over"; + default: + return "error"; + } +} + +int CardValue(int card) { + switch (CardRank(card)) { + case kQueen: + return 3; + case kKing: + return 4; + case kTen: + return 10; + case kAce: + return 11; + case kJack: + return 2; + default: + return 0; // Seven, eight and nine. + } +} + +std::string CardToString(int card) { + return SuitToString(CardSuit(card)) + RankToString(CardRank(card)); +} + +std::string CardsToString(const std::vector& cards) { + std::string result = ""; + for (auto& card : cards) { + absl::StrAppendFormat(&result, "%s ", ToCardSymbol(card)); + } + return result; +} + + +std::string SkatGameTypeToString(SkatGameType trump_game) { + switch (trump_game) { + case kUnknownGame: + return "unknown/pass"; + case kDiamondsTrump: + return "diamonds"; + case kHeartsTrump: + return "hearts"; + case kSpadesTrump: + return "spades"; + case kClubsTrump: + return "clubs"; + case kGrand: + return "grand"; + case kNullGame: + return "null"; + default: + return "error"; + } +} + +CardLocation PlayerToLocation(int player) { + switch (player) { + case 0: + return kHand0; + case 1: + return kHand1; + case 2: + return kHand2; + default: + return kDeck; + } +} + +// *********************************** Trick *********************************** + +int Trick::FirstCard() const { + if (cards_.empty()) { + return -1; + } else { + return cards_[0]; + } +} + +void Trick::PlayCard(int card) { + SPIEL_CHECK_LE(cards_.size(), kNumPlayers); + cards_.push_back(card); +} + +int Trick::PlayerAtPosition(int position) const { + SPIEL_CHECK_GE(position, 0); + SPIEL_CHECK_LE(position, 2); + return (leader_ + position) % kNumPlayers; +} + +int Trick::Points() const { + int sum = 0; + for (auto& card : cards_) { + sum += CardValue(card); + } + return sum; +} + +std::string Trick::ToString() const { + std::string result = absl::StrFormat("Leader: %d, ", leader_); + for (auto& card : cards_) { + if (card >= 0 && card < kNumCards) + absl::StrAppendFormat(&result, "%s ", ToCardSymbol(card)); + else + absl::StrAppendFormat(&result, "%s ", kEmptyCardSymbol); + } + return result; +} + +// ********************************* SkatState ********************************* + +SkatState::SkatState(std::shared_ptr game) + : State(game) { + card_locations_.fill(kDeck); + player_bids_.fill(kPass); +} + +std::string SkatState::ActionToString(Player player, Action action_id) const { + if (action_id < kBiddingActionBase) { + return CardToString(action_id); + } else { + return SkatGameTypeToString( + static_cast(action_id - kBiddingActionBase)); + } +} + +std::string SkatState::ToString() const { + std::string result = ""; + absl::StrAppendFormat(&result, "Phase: %s \n", PhaseToString(phase_)); + absl::StrAppendFormat(&result, "Current Player: %d", current_player_); + absl::StrAppendFormat(&result, "\n%s\n", CardLocationsToString()); + if (phase_ == kPlay || phase_ == kGameOver) { + absl::StrAppendFormat(&result, "Last trick won by player %d\n", + last_trick_winner_); + absl::StrAppendFormat(&result, "Solo Player: %d\n", solo_player_); + absl::StrAppendFormat(&result, "Points (Solo / Team): (%d / %d)\n", + points_solo_, points_team_); + absl::StrAppendFormat(&result, "Current Trick: %s\n", + CurrentTrick().ToString()); + if (CurrentTrickIndex() > 0) { + absl::StrAppendFormat(&result, "Last Trick: %s\n", + PreviousTrick().ToString()); + } + } + absl::StrAppendFormat(&result, "Game Type: %s\n", + SkatGameTypeToString(game_type_)); + return result; +} + +bool SkatState::IsTrump(int card) const { + // Nothing is trump in Null games. Otherwise Jacks are always trump. In a Suit + // game all cards of that suits are trump as well as Jacks. + if (game_type_ == kNullGame) return false; + if (CardRank(card) == kJack) return true; + switch (game_type_) { + case kDiamondsTrump: + return CardSuit(card) == kDiamonds; + case kHeartsTrump: + return CardSuit(card) == kHearts; + case kSpadesTrump: + return CardSuit(card) == kSpades; + case kClubsTrump: + return CardSuit(card) == kClubs; + default: + return false; + } +} + +int SkatState::CardOrder(int card, int first_card) const { + if (IsTrump(card)) { + return 7 + TrumpOrder(card); + } else if (CardSuit(card) == CardSuit(first_card)) { // Following suit. + if (game_type_ == kNullGame) { + return NullOrder(CardRank(card)); + } else { + return static_cast(CardRank(card)); + } + } else { + return -1; + } +} + +int SkatState::TrumpOrder(int card) const { + if (!IsTrump(card)) { + return -1; + } else if (CardRank(card) == kJack) { + return static_cast(CardSuit(card)) + static_cast(kJack); + } else { + return static_cast(CardRank(card)); + } +} + +int SkatState::NullOrder(Rank rank) const { + switch (rank) { + case kSeven: + return 0; + case kEight: + return 1; + case kNine: + return 2; + case kTen: + return 3; + case kJack: + return 4; + case kQueen: + return 5; + case kKing: + return 6; + case kAce: + return 7; + default: + return -1; + } +} + +int SkatState::WinsTrick() const { + std::vector cards = PreviousTrick().GetCards(); + if (cards.empty()) return -1; + int winning_position = 0; + for (int i = 1; i < cards.size(); i++) { + if (CardOrder(cards[i], cards[0]) > + CardOrder(cards[winning_position], cards[0])) { + winning_position = i; + } + } + return PreviousTrick().PlayerAtPosition(winning_position); +} + +void SkatState::DoApplyAction(Action action) { + switch (phase_) { + case kDeal: + return ApplyDealAction(action); + case kBidding: + return ApplyBiddingAction(action - kBiddingActionBase); + case kDiscardCards: + return ApplyDiscardCardsAction(action); + case kPlay: + return ApplyPlayAction(action); + case kGameOver: + SpielFatalError("Cannot act in terminal states"); + } +} + +void SkatState::ApplyDealAction(int card) { + SPIEL_CHECK_EQ(card_locations_[card], kDeck); + int deal_round = history_.size(); + // Cards 0-2, 11-14, 23-25 to player 1. + // Cards 3-5, 15-18, 26-28 to player 2. + // Cards 6-8, 19-22, 29-31 to player 3. + // Cards 9-10 into the Skat. + // While this might seem a bit weird, this is the official order Skat cards + // are dealt. + if ((deal_round >= 0 && deal_round <= 2) || + (deal_round >= 11 && deal_round <= 14) || + (deal_round >= 23 && deal_round <= 25)) { + card_locations_[card] = kHand0; + } else if ((deal_round >= 3 && deal_round <= 5) || + (deal_round >= 15 && deal_round <= 18) || + (deal_round >= 26 && deal_round <= 28)) { + card_locations_[card] = kHand1; + } else if ((deal_round >= 6 && deal_round <= 8) || + (deal_round >= 19 && deal_round <= 22) || + (deal_round >= 29 && deal_round <= 31)) { + card_locations_[card] = kHand2; + } else if (deal_round == 9 || deal_round == 10) { + card_locations_[card] = kSkat; + } + if (deal_round == kNumCards - 1) { + current_player_ = 0; + phase_ = kBidding; + } +} + +void SkatState::ApplyBiddingAction(int game_type) { + // Simplified bidding as first come first serve. Players can say if they want + // to play or not on a first come first serve basis. Currently, the solo + // player is not able to touch the Skat. + player_bids_[current_player_] = game_type; + if (game_type == kPass) { + if (current_player_ < 2) { + current_player_ = NextPlayer(); + } else { // No one wants to play. + phase_ = kGameOver; + } + } else { + EndBidding(current_player_, SkatGameType(game_type)); + } +} + +void SkatState::EndBidding(Player winner, SkatGameType game_type) { + solo_player_ = winner; + current_player_ = winner; + game_type_ = game_type; + // Winner takes up Skat cards. + for (int card = 0; card < kNumCards; card++) { + if (card_locations_[card] == kSkat) { + card_locations_[card] = PlayerToLocation(winner); + } + } + phase_ = kDiscardCards; +} + +int SkatState::CardsInSkat() const { + int cards_in_skat = 0; + for (int card = 0; card < kNumCards; card++) { + if (card_locations_[card] == kSkat) cards_in_skat++; + } + return cards_in_skat; +} + +void SkatState::ApplyDiscardCardsAction(int card) { + SPIEL_CHECK_LT(CardsInSkat(), 2); + SPIEL_CHECK_TRUE(current_player_ == solo_player_); + SPIEL_CHECK_TRUE(card_locations_[card] == PlayerToLocation(solo_player_)); + card_locations_[card] = kSkat; + + if (CardsInSkat() == 2) { + phase_ = kPlay; + current_player_ = 0; + } +} + +void SkatState::ApplyPlayAction(int card) { + SPIEL_CHECK_TRUE(card_locations_[card] == PlayerToLocation(current_player_)); + card_locations_[card] = kTrick; + if (num_cards_played_ == 0) { + CurrentTrick() = Trick(current_player_); + } + CurrentTrick().PlayCard(card); + num_cards_played_++; + if (num_cards_played_ % kNumPlayers == 0) { + last_trick_winner_ = WinsTrick(); + current_player_ = last_trick_winner_; + // When num_cards_played_ == kNumCards + kNumCardsInSkat CurrentTrick() is + // the same as PreviousTrick() and we don't want to overwrite it. + if (num_cards_played_ < kNumCards - kNumCardsInSkat) { + CurrentTrick() = Trick(current_player_); + } + // Winner plays next card. + if (last_trick_winner_ == solo_player_) { + points_solo_ += PreviousTrick().Points(); + if (game_type_ == kNullGame) { + // The solo player loses a Null game if they win any trick. The trick + // they win could be without points so we add one to make sure ScoreUp + // knows that the solo_player has won a trick. + points_solo_++; + phase_ = kGameOver; + ScoreUp(); + } + } else { + points_team_ += PreviousTrick().Points(); + } + } else { + current_player_ = NextPlayer(); + } + + if (num_cards_played_ == kNumCards - kNumCardsInSkat) { + phase_ = kGameOver; + ScoreUp(); + } +} + +void SkatState::ScoreUp() { + if (game_type_ == kNullGame) { + // Since we're using points as a reward we need to come up with some special + // rule for Null. + if (points_solo_ > 0) { + points_solo_ = 30; + points_team_ = 90; + } else { + points_solo_ = 90; + points_team_ = 30; + } + } else { + // Solo player gets the cards in the Skat (unless Null is played). + for (int card = 0; card < kNumCards; card++) { + if (card_locations_[card] == kSkat) { + points_solo_ += CardValue(card); + } + } + } + for (int pl = 0; pl < kNumPlayers; ++pl) { + if (solo_player_ == pl) { + returns_[pl] = (points_solo_ - 60) / 120.0; + } else { + returns_[pl] = (points_team_ - 60) / 240.0; + } + } +} + +std::string SkatState::CardLocationsToString() const { + std::string deck = "Deck: "; + std::string hand0 = "Player 0: "; + std::string hand1 = "Player 1: "; + std::string hand2 = "Player 2: "; + std::string skat = "Skat: "; + for (int i = 0; i < kNumCards; i++) { + switch (card_locations_[i]) { + case kDeck: + absl::StrAppendFormat(&deck, "%s ", ToCardSymbol(i)); + break; + case kHand0: + absl::StrAppendFormat(&hand0, "%s ", ToCardSymbol(i)); + break; + case kHand1: + absl::StrAppendFormat(&hand1, "%s ", ToCardSymbol(i)); + break; + case kHand2: + absl::StrAppendFormat(&hand2, "%s ", ToCardSymbol(i)); + break; + case kSkat: + absl::StrAppendFormat(&skat, "%s ", ToCardSymbol(i)); + break; + default: + break; + } + } + return absl::StrFormat("%s\n%s\n%s\n%s\n%s\n", + deck, hand0, hand1, hand2, skat); +} + +std::vector SkatState::LegalActions() const { + switch (phase_) { + case kDeal: + return DealLegalActions(); + case kBidding: + return BiddingLegalActions(); + case kDiscardCards: + return DiscardCardsLegalActions(); + case kPlay: + return PlayLegalActions(); + default: + return {}; + } +} + +std::vector SkatState::DealLegalActions() const { + std::vector legal_actions; + legal_actions.reserve(kNumCards - history_.size()); + for (int i = 0; i < kNumCards; ++i) { + if (card_locations_[i] == kDeck) legal_actions.push_back(i); + } + return legal_actions; +} + +std::vector SkatState::BiddingLegalActions() const { + std::vector legal_actions; + legal_actions.push_back(kBiddingActionBase + kPass); + legal_actions.push_back(kBiddingActionBase + kDiamondsTrump); + legal_actions.push_back(kBiddingActionBase + kHeartsTrump); + legal_actions.push_back(kBiddingActionBase + kSpadesTrump); + legal_actions.push_back(kBiddingActionBase + kClubsTrump); + legal_actions.push_back(kBiddingActionBase + kGrand); + legal_actions.push_back(kBiddingActionBase + kNullGame); + return legal_actions; +} + +std::vector SkatState::DiscardCardsLegalActions() const { + std::vector legal_actions; + for (int card = 0; card < kNumCards; ++card) { + if (card_locations_[card] == current_player_ + 1) { + legal_actions.push_back(card); + } + } + return legal_actions; +} + +std::vector SkatState::PlayLegalActions() const { + std::vector legal_actions; + legal_actions.reserve(kNumTricks - num_cards_played_ / kNumPlayers); + if (num_cards_played_ % kNumPlayers != 0) { + // Check if we can follow suit. + int first_card = CurrentTrick().FirstCard(); + int suit = CardSuit(first_card); + if (game_type_ == kNullGame) { + for (int rank = 0; rank < kNumRanks; ++rank) { + int card = static_cast(suit) * kNumRanks + rank; + if (card_locations_[card] == PlayerToLocation(current_player_)) { + legal_actions.push_back(card); + } + } + } else { + // This is a bid fidely but it makes sure the legal actions are sorted + // (which is required), which the special status of jacks makes hard + // otherwise. + for (int card = 0; card < kNumCards; ++card) { + if ((IsTrump(first_card) && IsTrump(card)) || + (suit == CardSuit(card) && + CardRank(card) != kJack && + CardRank(first_card) != kJack)) { + if (card_locations_[card] == PlayerToLocation(current_player_)) { + legal_actions.push_back(card); + } + } + } + } + } + + if (!legal_actions.empty()) { + return legal_actions; + } + + // Otherwise, we can play any of our cards. + for (int card = 0; card < kNumCards; ++card) { + if (card_locations_[card] == current_player_ + 1) { + legal_actions.push_back(card); + } + } + return legal_actions; +} + +std::vector> SkatState::ChanceOutcomes() const { + std::vector> outcomes; + int num_cards_remaining = kNumCards - history_.size(); + outcomes.reserve(num_cards_remaining); + const double p_card = 1.0 / static_cast(num_cards_remaining); + for (int card = 0; card < kNumCards; ++card) { + if (card_locations_[card] == kDeck) outcomes.emplace_back(card, p_card); + } + return outcomes; +} + +void SkatState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::fill(values.begin(), values.end(), 0.0); + SPIEL_CHECK_EQ(values.size(), game_->ObservationTensorSize()); + if (phase_ == Phase::kDeal) return; + auto ptr = values.begin(); + // Position: + ptr[player] = 1; + ptr += kNumPlayers; + // Phase + if (phase_ >= kBidding && phase_ <= kPlay) ptr[phase_ - kBidding] = 1; + ptr += 3; + // Players Cards + for (int i = 0; i < kNumCards; ++i) + if (card_locations_[i] == PlayerToLocation(player)) ptr[i] = 1; + ptr += kNumCards; + // All player bids. + for (int i = 0; i < kNumPlayers; i++) { + ptr[player_bids_[i]] = 1; + ptr += kNumGameTypes; + } + // Who is the solo player. + if (solo_player_ >= 0) ptr[solo_player_] = 1; + ptr += kNumPlayers; + // Information about the Skat only for the solo_player_. + if (player == solo_player_) { + for (int i = 0; i < kNumCards; ++i) + if (card_locations_[i] == kSkat) ptr[i] = 1; + } + ptr += kNumCards; + // Game type + ptr[game_type_] = 1; + ptr += kNumGameTypes; + // Current trick + if (phase_ == kPlay) { + ptr[CurrentTrick().Leader()] = 1; + ptr += kNumPlayers; + const auto& cards = CurrentTrick().GetCards(); + for (int i = 0; i < kNumPlayers; i++) { + if (cards.size() > i) ptr[cards[i]] = 1; + ptr += kNumCards; + } + } else { + ptr += kNumPlayers + kNumPlayers * kNumCards; + } + // Previous Trick + if (CurrentTrickIndex() > 0) { + ptr[PreviousTrick().Leader()] = 1; + ptr += kNumPlayers; + const auto& cards = PreviousTrick().GetCards(); + for (int i = 0; i < kNumPlayers; i++) { + if (cards.size() > i) ptr[cards[i]] = 1; + ptr += kNumCards; + } + } else { + ptr += kNumPlayers + kNumPlayers * kNumCards; + } +} + +template +std::vector GetCardsFromMultiHot(It multi_hot) { + std::vector cards; + for (int i = 0; i < kNumCards; i++) { + if (multi_hot[i]) cards.push_back(i); + } + return cards; +} + +template +int GetIntFromOneHot(It one_hot, int num_values) { + for (int i = 0; i < num_values; i++) { + if (one_hot[i]) return i; + } + return -1; +} + +std::string SkatState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + // We construct the ObservationString from the ObservationTensor to give + // some indication that the tensor representation is correct & complete. + if (phase_ == Phase::kDeal) { + return "No Observation"; + } + std::vector tensor(game_->ObservationTensorSize()); + ObservationTensor(player, absl::MakeSpan(tensor)); + std::string rv; + auto ptr = tensor.begin(); + int player_pos = GetIntFromOneHot(ptr, kNumPlayers); + absl::StrAppend(&rv, "PlPos:", player_pos); + ptr += kNumPlayers; + Phase phase = kDeal; + if (ptr[0]) phase = kBidding; + else if (ptr[1]) phase = kDiscardCards; + else if (ptr[2]) phase = kPlay; + else + phase = kGameOver; + absl::StrAppend(&rv, "|Phase:", PhaseToString(phase)); + ptr += 3; + std::vector player_cards = GetCardsFromMultiHot(ptr); + absl::StrAppend(&rv, "|Hand:", CardsToString(player_cards)); + ptr += kNumCards; + absl::StrAppend(&rv, "|Bids:"); + for (int i = 0; i < kNumPlayers; i++) { + int player_bid = GetIntFromOneHot(ptr, kNumGameTypes); + absl::StrAppend( + &rv, SkatGameTypeToString(static_cast(player_bid)), " "); + ptr += kNumGameTypes; + } + Player solo_player = GetIntFromOneHot(ptr, kNumPlayers); + absl::StrAppend(&rv, "|SoloPl:", solo_player); + ptr += kNumPlayers; + std::vector skat_cards = GetCardsFromMultiHot(ptr); + absl::StrAppend(&rv, "|Skat:", CardsToString(skat_cards)); + ptr += kNumCards; + SkatGameType game_type = SkatGameType(GetIntFromOneHot(ptr, kNumGameTypes)); + absl::StrAppend(&rv, "|Game:", SkatGameTypeToString(game_type)); + ptr += kNumGameTypes; + Player current_trick_leader = GetIntFromOneHot(ptr, kNumPlayers); + absl::StrAppend(&rv, "|CurrTrick(Leader:", current_trick_leader, "):"); + ptr += kNumPlayers; + for (int i = 0; i < kNumPlayers; i++) { + int card = GetIntFromOneHot(ptr, kNumCards); + if (card >= 0) absl::StrAppend(&rv, ToCardSymbol(card), " "); + ptr += kNumCards; + } + Player previous_trick_leader = GetIntFromOneHot(ptr, kNumPlayers); + if (previous_trick_leader >= 0) { + absl::StrAppend(&rv, "|PrevTrick(Leader:", previous_trick_leader, "):"); + ptr += kNumPlayers; + for (int i = 0; i < kNumPlayers; i++) { + int card = GetIntFromOneHot(ptr, kNumCards); + if (card >= 0) absl::StrAppend(&rv, ToCardSymbol(card), " "); + ptr += kNumCards; + } + } + return rv; +} + +// ********************************** SkatGame ********************************* + +SkatGame::SkatGame(const GameParameters& params) + : Game(kGameType, params) {} + +std::unique_ptr SkatGame::NewInitialState() const { + return std::unique_ptr(new SkatState(shared_from_this())); +} + +} // namespace skat +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/skat/skat.h b/scenarios/bargaining/open_spiel/open_spiel/games/skat/skat.h new file mode 100644 index 0000000..c98ed85 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/skat/skat.h @@ -0,0 +1,232 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_SKAT_H_ +#define OPEN_SPIEL_GAMES_SKAT_H_ + +#include + +#include "open_spiel/spiel.h" + +// A slightly simplified version of Skat. +// See https://en.wikipedia.org/wiki/Skat_(card_game) +// This is a 3 player trick-based card game. After cards are dealed a bidding +// phase decides which player is the solo player and what game type is played. +// After the bidding phase, there is a phase where the solo players takes up the +// two cards in the Skat and discards two cards (whose points are secured for +// the solo player). After that the playing phase starts. +// +// Currently the bidding is vastly simplified. The players are allowed to make +// bids or not in order. The first player who makes a bid is the solo player. +// Allowed bids are only the 6 game types (4 suits, Grand & Null). This means +// Hand and Ouvert games are currently not implemented. +// +// The play phase consists of 10 tricks. The utility is the points made minus 60 +// and divided by 120 for the solo player and 240 for the team players. This +// makes it a zero-sum game and since there are 120 points in total, each side +// gets a positive score if they get more than half the points. +// +// The action space is as follows: +// 0..31 Cards, used for dealing, discarding and playing cards. +// 32+ Bidding, currently you can only bid for a game type. + +namespace open_spiel { +namespace skat { + +inline constexpr int kNumRanks = 8; +inline constexpr int kNumSuits = 4; +inline constexpr int kNumCards = kNumRanks * kNumSuits; +inline constexpr int kNumPlayers = 3; +inline constexpr int kNumCardsInSkat = 2; +inline constexpr int kNumGameTypes = 7; +inline constexpr int kNumTricks = (kNumCards - kNumCardsInSkat) / kNumPlayers; +inline constexpr int kBiddingActionBase = kNumCards; // First bidding action. +inline constexpr int kNumBiddingActions = kNumGameTypes; +inline constexpr int kNumActions = kNumCards + kNumBiddingActions; +inline constexpr char kEmptyCardSymbol[] = "🂠"; + +inline constexpr int kObservationTensorSize = + kNumPlayers // Player position + + 3 // Phase + + kNumCards // Players cards + + kNumPlayers * kNumGameTypes // All players' bids + + kNumPlayers // Who's playing solo + + kNumCards // Cards in the Skat + + kNumGameTypes // Game type + + kNumPlayers // Who started the current trick + + kNumPlayers * kNumCards // Cards played to the current trick + + kNumPlayers // Who started the previous trick + + kNumPlayers * kNumCards; // Cards played to the previous trick + +enum SkatGameType { + kUnknownGame = 0, + kPass = 0, + kDiamondsTrump = 1, + kHeartsTrump = 2, + kSpadesTrump = 3, + kClubsTrump = 4, + kGrand = 5, + kNullGame = 6 +}; +enum Suit {kDiamonds = 0, kHearts = 1, kSpades = 2, kClubs = 3}; +enum Rank { + kSeven = 0, + kEight = 1, + kNine = 2, + kQueen = 3, + kKing = 4, + kTen = 5, + kAce = 6, + kJack = 7 +}; +enum CardLocation{ + kDeck = 0, + kHand0 = 1, + kHand1 = 2, + kHand2 = 3, + kSkat = 4, + kTrick = 5 +}; +enum Phase { + kDeal = 0, + kBidding = 1, + kDiscardCards = 2, + kPlay = 3, + kGameOver = 4}; + +// This is the information about one trick, i.e. up to three cards where each +// card was played by one player. +class Trick { + public: + Trick() : Trick{-1} {} + Trick(Player leader) { leader_ = leader; } + int FirstCard() const; + Player Leader() const { return leader_; } + // How many cards have been played in the trick. Between 0 and 3. + int CardsPlayed() const { return cards_.size(); } + // Returns a vector of the cards played in this trick. These are ordered by + // the order of play, i.e. the first card is not necessarily played by player + // 1 but by the player who played first in this trick. + std::vector GetCards() const { return cards_; } + // Adds `card` to the trick as played by player with id `player`. + void PlayCard(int card); + // Returns the player id of the player who was at position `position` in this + // trick. Position is 0 based here, i.e. PlayerAtPosition(0) returns the + // player who played the first card in this trick. This method fails if no + // cards have been played yet. + int PlayerAtPosition(int position) const; + // Returns the sum of the values of the cards in the trick. + int Points() const; + std::string ToString() const; + + private: + std::vector cards_{}; + Player leader_; + Suit led_suit_; +}; + +class SkatState : public State { + public: + SkatState(std::shared_ptr game); + SkatState(const SkatState&) = default; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : current_player_; + } + + std::string ActionToString(Player player, Action action_id) const override; + bool IsTerminal() const override { return phase_ == kGameOver; } + std::vector Returns() const override { return returns_; } + std::unique_ptr Clone() const override { + return std::unique_ptr(new SkatState(*this)); + } + std::string ToString() const override; + std::vector LegalActions() const override; + std::vector> ChanceOutcomes() const override; + + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + + protected: + void DoApplyAction(Action action) override; + + private: + std::vector DealLegalActions() const; + std::vector BiddingLegalActions() const; + std::vector DiscardCardsLegalActions() const; + std::vector PlayLegalActions() const; + void ApplyDealAction(int card); + void ApplyBiddingAction(int game_type); + void ApplyDiscardCardsAction(int card); + void ApplyPlayAction(int card); + + void EndBidding(Player winner, SkatGameType game_type); + int NextPlayer() { return (current_player_ + 1) % kNumPlayers; } + bool IsTrump(int card) const; + int CardOrder(int card, int first_card) const; + int TrumpOrder(int card) const; + int NullOrder(Rank rank) const; + int WinsTrick() const; + void ScoreUp(); + int CardsInSkat() const; + int CurrentTrickIndex() const { + return std::min(kNumTricks - 1, num_cards_played_ / kNumPlayers); + } + Trick& CurrentTrick() { return tricks_[CurrentTrickIndex()]; } + const Trick& CurrentTrick() const { return tricks_[CurrentTrickIndex()]; } + const Trick& PreviousTrick() const { + return tricks_[std::max(0, num_cards_played_ / kNumPlayers - 1)]; + } + std::string CardLocationsToString() const; + + SkatGameType game_type_ = kUnknownGame; // The trump suit (or notrumps) + Phase phase_ = kDeal; + // CardLocation for each card. + std::array card_locations_; + std::array player_bids_; + + // Play related. + Player solo_player_ = kChancePlayerId; + Player current_player_ = kChancePlayerId; // The player next to make a move. + Player last_trick_winner_ = kChancePlayerId; + int num_cards_played_ = 0; + std::array tricks_{}; // Tricks played so far. + int points_solo_ = 0; + int points_team_ = 0; + std::vector returns_ = std::vector(kNumPlayers); +}; + +class SkatGame : public Game { + public: + explicit SkatGame(const GameParameters& params); + int NumDistinctActions() const override { return kNumActions; } + std::unique_ptr NewInitialState() const override; + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1.0; } + double MaxUtility() const override { return 1.0; } + absl::optional UtilitySum() const override { return 0; } + int MaxGameLength() const override { return kNumCards + kNumPlayers; } + // TODO: verify whether this bound is tight and/or tighten it. + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + int MaxChanceOutcomes() const override { return kNumCards; } + std::vector ObservationTensorShape() const override { + return {kObservationTensorSize}; + } +}; + +} // namespace skat +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_SKAT_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/skat/skat_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/skat/skat_test.cc new file mode 100644 index 0000000..41416c0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/skat/skat_test.cc @@ -0,0 +1,37 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/skat/skat.h" + +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace skat { +namespace { + +namespace testing = open_spiel::testing; + +void BasicSkatTests() { + testing::LoadGameTest("skat"); + testing::RandomSimTest(*LoadGame("skat"), 10); +} + +} // namespace +} // namespace skat +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::skat::BasicSkatTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/solitaire/solitaire.cc b/scenarios/bargaining/open_spiel/open_spiel/games/solitaire/solitaire.cc new file mode 100644 index 0000000..8fa99b3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/solitaire/solitaire.cc @@ -0,0 +1,1535 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/solitaire/solitaire.h" + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel::solitaire { + +namespace { +const GameType kGameType{/*short_name=*/"solitaire", + /*long_name=*/"Klondike Solitaire", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/1, + /*min_num_players=*/1, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"players", GameParameter(kDefaultPlayers)}, + {"is_colored", GameParameter(kDefaultIsColored)}, + {"depth_limit", GameParameter(kDefaultDepthLimit)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new SolitaireGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory) +} // namespace + +namespace { +// ANSI color codes +inline constexpr const char* kReset = "\033[0m"; +inline constexpr const char* kRed = "\033[31m"; +inline constexpr const char* kBlack = "\033[37m"; + +// Unicode Glyphs +inline constexpr const char* kGlyphHidden = "\U0001F0A0"; +inline constexpr const char* kGlyphEmpty = "\U0001F0BF"; +inline constexpr const char* kGlyphSpades = "\U00002660"; +inline constexpr const char* kGlyphHearts = "\U00002665"; +inline constexpr const char* kGlyphClubs = "\U00002663"; +inline constexpr const char* kGlyphDiamonds = "\U00002666"; +inline constexpr const char* kGlyphArrow = "\U00002190"; + +// Constants =================================================================== +inline constexpr int kNumRanks = 13; + +// Number of cards_ that can be in each pile type_ +inline constexpr int kMaxSizeWaste = 24; +inline constexpr int kMaxSizeFoundation = 13; +inline constexpr int kMaxSizeTableau = 19; + +// Number of sources that can be in each pile type_ +inline constexpr int kMaxSourcesWaste = 8; +inline constexpr int kMaxSourcesFoundation = 1; +inline constexpr int kMaxSourcesTableau = 13; + +// These divide up the action ids into sections. kEnd is a single action that is +// used to end the game when no other actions are available. +inline constexpr int kEnd = 0; + +// Reveal actions are ones that can be taken at chance nodes; they change a +// hidden_ card to a card of the same index_ as the action id_ (e.g. 2 would +// reveal a 2 of spades) +inline constexpr int kRevealStart = 1; +inline constexpr int kRevealEnd = 52; + +// kMove actions are ones that are taken at decision nodes; they involve moving +// a card to another cards_ location_. It starts at 53 because there are 52 +// reveal actions before it. See `NumDistinctActions()` in solitaire.cc. +inline constexpr int kMoveStart = 53; +inline constexpr int kMoveEnd = 204; + +// Indices for special cards_ +// inline constexpr int kHiddenCard = 99; +inline constexpr int kEmptySpadeCard = -5; +inline constexpr int kEmptyHeartCard = -4; +inline constexpr int kEmptyClubCard = -3; +inline constexpr int kEmptyDiamondCard = -2; +inline constexpr int kEmptyTableauCard = -1; + +// 1 empty + 13 ranks +inline constexpr int kFoundationTensorLength = 14; + +// 6 hidden_ cards_ + 1 empty tableau + 52 ordinary cards_ +inline constexpr int kTableauTensorLength = 59; + +// 1 hidden_ card + 52 ordinary cards_ +inline constexpr int kWasteTensorLength = 53; + +// Constant for how many hidden_ cards_ can show up in a tableau. As hidden_ +// cards_ can't be added, the max is the highest number in a tableau at the +// start of the game: 6 +inline constexpr int kMaxHiddenCard = 6; + +// Only used in one place and just for consistency (to match kChancePlayerId& +// kTerminalPlayerId) +inline constexpr int kPlayerId = 0; + +// Indicates the last index_ before the first player action (the last Reveal +// action has an ID of 52) +inline constexpr int kActionOffset = 52; + +// Order of suits +const std::vector kSuits = {SuitType::kSpades, SuitType::kHearts, + SuitType::kClubs, SuitType::kDiamonds}; + +// These correspond with their enums, not with the two vectors directly above +const std::vector kSuitStrs = { + "", kGlyphSpades, kGlyphHearts, kGlyphClubs, kGlyphDiamonds, ""}; +const std::vector kRankStrs = { + "", "A", "2", "3", "4", "5", "6", "7", "8", "9", "T", "J", "Q", "K", ""}; + +const std::map kFoundationPoints = { + // region Maps a RankType to the reward for moving a card of that rank_ to + // the foundation + {RankType::kA, 100.0}, {RankType::k2, 90.0}, {RankType::k3, 80.0}, + {RankType::k4, 70.0}, {RankType::k5, 60.0}, {RankType::k6, 50.0}, + {RankType::k7, 40.0}, {RankType::k8, 30.0}, {RankType::k9, 20.0}, + {RankType::kT, 10.0}, {RankType::kJ, 10.0}, {RankType::kQ, 10.0}, + {RankType::kK, 10.0} + // endregion +}; + +const std::map kSuitToPile = { + // region Maps a foundation suit_ to the ID of the foundation + {SuitType::kSpades, PileID::kSpades}, + {SuitType::kHearts, PileID::kHearts}, + {SuitType::kClubs, PileID::kClubs}, + {SuitType::kDiamonds, PileID::kDiamonds} + // endregion +}; + +const std::map kIntToPile = { + // region Maps an integer to a tableau pile ID (used when initializing + // SolitaireState) + {1, PileID::k1stTableau}, {2, PileID::k2ndTableau}, + {3, PileID::k3rdTableau}, {4, PileID::k4thTableau}, + {5, PileID::k5thTableau}, {6, PileID::k6thTableau}, + {7, PileID::k7thTableau} + // endregion +}; + +} // namespace + +// Miscellaneous =============================================================== + +std::vector GetOppositeSuits(const SuitType& suit) { + /* Just returns a vector of the suits of opposite color. For red suits + * (SuitType::kHearts and SuitType::kDiamonds), this returns the black suits + * (SuitType::kSpades and SuitType::kClubs). For a black suit_, this returns + * the red suits. The last `SuitType` would be `SuitType::kNone` which should + * only occur with empty tableau cards or hidden cards. Empty tableau + * cards should accept any suit, but hidden cards are the opposite; they + * shouldn't accept any. There isn't really a use case for calling this + * function with the suit of a hidden card though. */ + + switch (suit) { + case SuitType::kSpades: { + return {SuitType::kHearts, SuitType::kDiamonds}; + } + case SuitType::kHearts: { + return {SuitType::kSpades, SuitType::kClubs}; + } + case SuitType::kClubs: { + return {SuitType::kHearts, SuitType::kDiamonds}; + } + case SuitType::kDiamonds: { + return {SuitType::kSpades, SuitType::kClubs}; + } + case SuitType::kNone: { + return {SuitType::kSpades, SuitType::kHearts, SuitType::kClubs, + SuitType::kDiamonds}; + } + default: { + SpielFatalError("suit is not in (s, h, c, d)"); + } + } +} + +int GetCardIndex(RankType rank, SuitType suit) { + /* Using a given rank and/or suit, gets an integer representing the index + * of the card. */ + + if (rank == RankType::kHidden || suit == SuitType::kHidden) { + // Handles hidden_ cards_ + return kHiddenCard; + } else if (rank == RankType::kNone) { + // Handles special cards_ + if (suit == SuitType::kNone) { + // Handles empty tableau cards_ + return kEmptyTableauCard; + } else { + // Handles empty foundation cards + switch (suit) { + case SuitType::kSpades: { + return kEmptySpadeCard; + } + case SuitType::kHearts: { + return kEmptyHeartCard; + } + case SuitType::kClubs: { + return kEmptyClubCard; + } + case SuitType::kDiamonds: { + return kEmptyDiamondCard; + } + default: { + SpielFatalError("Failed to get card index_"); + } + } + } + } else { + // Handles ordinary cards (e.g. 0-13 -> spades, 14-26 -> hearts, etc.) + return (static_cast(suit) - 1) * kNumRanks + static_cast(rank); + } +} + +int GetMaxSize(LocationType location) { + if (location >= LocationType::kDeck && location <= LocationType::kWaste) { + // Cards can only be removed from the waste_& there are 24 cards_ in it + // at the start of the game + return kMaxSizeWaste; + } else if (location == LocationType::kFoundation) { + // There are 13 cards_ in a suit_ + return kMaxSizeFoundation; + } else if (location == LocationType::kTableau) { + // There are a maximum of 6 hidden cards and 13 non-hidden cards in a + // tableau (1 for each rank) + return kMaxSizeTableau; + } else { + return 0; + } +} + +std::hash hasher; + +// Card Methods ================================================================ + +Card::Card(bool hidden, SuitType suit, RankType rank, LocationType location) + : rank_(rank), suit_(suit), location_(location), hidden_(hidden) {} + +Card::Card(int index, bool hidden, LocationType location) + : location_(location), hidden_(hidden), index_(index) { + if (!hidden_) { + switch (index_) { + case kHiddenCard: { + rank_ = RankType::kHidden; + suit_ = SuitType::kHidden; + break; + } + case kEmptyTableauCard: { + rank_ = RankType::kNone; + suit_ = SuitType::kNone; + break; + } + case kEmptySpadeCard: { + rank_ = RankType::kNone; + suit_ = SuitType::kSpades; + break; + } + case kEmptyHeartCard: { + rank_ = RankType::kNone; + suit_ = SuitType::kHearts; + break; + } + case kEmptyClubCard: { + rank_ = RankType::kNone; + suit_ = SuitType::kClubs; + break; + } + case kEmptyDiamondCard: { + rank_ = RankType::kNone; + suit_ = SuitType::kDiamonds; + break; + } + default: { + // Converts an index back into a rank and suit for ordinary cards + rank_ = static_cast(1 + ((index_ - 1) % kNumRanks)); + suit_ = static_cast( + static_cast(1 + floor((index_ - 1) / 13.0))); + } + } + } +} + +// Getters + +RankType Card::GetRank() const { return rank_; } + +SuitType Card::GetSuit() const { return suit_; } + +LocationType Card::GetLocation() const { return location_; } + +bool Card::GetHidden() const { return hidden_; } + +int Card::GetIndex() const { + /* Basically it just calculates the index if it hasn't been calculated before, + * otherwise it will just return a stored value. If `force` is true and the + * card isn't hidden, then the index is calculated again. */ + return hidden_ ? kHiddenCard : GetCardIndex(rank_, suit_); +} + +// Setters + +void Card::SetRank(RankType new_rank) { rank_ = new_rank; } + +void Card::SetSuit(SuitType new_suit) { suit_ = new_suit; } + +void Card::SetLocation(LocationType new_location) { location_ = new_location; } + +void Card::SetHidden(bool new_hidden) { hidden_ = new_hidden; } + +// Other Methods + +std::string Card::ToString(bool colored) const { + std::string result; + + // Determine color of string + if (colored && !hidden_) { + if (suit_ == SuitType::kSpades || suit_ == SuitType::kClubs) { + absl::StrAppend(&result, kBlack); + } else if (suit_ == SuitType::kHearts || suit_ == SuitType::kDiamonds) { + absl::StrAppend(&result, kRed); + } + } + + // Determine contents of string + if (rank_ == RankType::kHidden || suit_ == SuitType::kHidden) { + absl::StrAppend(&result, kGlyphHidden, " "); + } else if (rank_ == RankType::kNone && suit_ == SuitType::kNone) { + absl::StrAppend(&result, kGlyphEmpty); + } else { + absl::StrAppend(&result, kRankStrs.at(static_cast(rank_))); + absl::StrAppend(&result, kSuitStrs.at(static_cast(suit_))); + } + + if (colored) { + // Reset color if applicable + absl::StrAppend(&result, kReset); + } + + return result; +} + +std::vector Card::LegalChildren() const { + if (hidden_) { + return {}; + } else { + RankType child_rank; + std::vector child_suits; + + // A card can have a maximum of 4 children + // (specifically, an empty tableau card can accept a king of any suit) + child_suits.reserve(4); + + switch (location_) { + case LocationType::kTableau: { + if (rank_ == RankType::kNone) { + if (suit_ == SuitType::kNone) { + // Empty tableaus can accept a king of any suit + child_rank = RankType::kK; + child_suits = kSuits; + break; + } else { + return {}; + } + } else if (rank_ >= RankType::k2 && rank_ <= RankType::kK) { + // Ordinary cards (except aces) can accept cards of an opposite + // suit that is one rank lower + child_rank = static_cast(static_cast(rank_) - 1); + child_suits = GetOppositeSuits(suit_); + break; + } else { + // This will catch RankType::kA and RankType::kHidden + return {}; + } + break; + } + case LocationType::kFoundation: { + if (rank_ == RankType::kNone) { + if (suit_ != SuitType::kNone) { + child_rank = static_cast(static_cast(rank_) + 1); + child_suits = {suit_}; + } else { + return {}; + } + } else if (rank_ >= RankType::kA && rank_ <= RankType::kQ) { + // Cards (except kings) can accept a card of the same suit that is + // one rank higher + child_rank = static_cast(static_cast(rank_) + 1); + child_suits = {suit_}; + } else { + // This could catch RankType::kK and RankType::kHidden + return {}; + } + break; + } + default: { + // This catches all cards_ that aren't located in a tableau or + // foundation + return {}; + } + } + + std::vector legal_children; + legal_children.reserve(4); + + if (child_suits.empty()) { + SpielFatalError("child_suits should not be empty"); + } + + for (const auto& child_suit : child_suits) { + auto child = Card(false, child_suit, child_rank); + legal_children.push_back(child); + } + + return legal_children; + } +} + +bool Card::operator==(const Card& other_card) const { + return rank_ == other_card.rank_ && suit_ == other_card.suit_; +} + +bool Card::operator<(const Card& other_card) const { + if (suit_ != other_card.suit_) { + return suit_ < other_card.suit_; + } else if (rank_ != other_card.rank_) { + return rank_ < other_card.rank_; + } else { + return false; + } +} + +// Pile Methods ================================================================ + +Pile::Pile(LocationType type, PileID id, SuitType suit) + : type_(type), suit_(suit), id_(id), max_size_(GetMaxSize(type)) { + cards_.reserve(max_size_); +} + +// Getters/Setters + +bool Pile::GetIsEmpty() const { return cards_.empty(); } + +Card Pile::GetFirstCard() const { return cards_.front(); } + +Card Pile::GetLastCard() const { return cards_.back(); } + +SuitType Pile::GetSuit() const { return suit_; } + +LocationType Pile::GetType() const { return type_; } + +PileID Pile::GetID() const { return id_; } + +std::vector Pile::GetCards() const { return cards_; } + +void Pile::SetCards(std::vector new_cards) { + cards_ = std::move(new_cards); +} + +// Other Methods + +std::vector Pile::Targets() const { + std::cout << "Pile::Targets()" << std::endl; + switch (type_) { + case LocationType::kFoundation: { + if (!cards_.empty()) { + return {cards_.back()}; + } else { + // Empty foundation card with the same suit as the pile + return {Card(false, suit_, RankType::kNone, LocationType::kFoundation)}; + } + } + case LocationType::kTableau: { + if (!cards_.empty()) { + auto back_card = cards_.back(); + if (!back_card.GetHidden()) { + return {cards_.back()}; + } else { + return {}; + } + } else { + // Empty tableau card (no rank or suit) + return {Card(false, SuitType::kNone, RankType::kNone, + LocationType::kTableau)}; + } + } + default: { + SpielFatalError("Pile::Targets() called with unsupported type_"); + } + } +} + +std::vector Pile::Sources() const { + std::cout << "Pile::Targets()" << std::endl; + std::vector sources; + // A pile can have a maximum of 13 cards as sources (1 for each rank) + sources.reserve(kNumRanks); + switch (type_) { + case LocationType::kFoundation: { + if (!cards_.empty()) { + return {cards_.back()}; + } else { + return {}; + } + } + case LocationType::kTableau: { + if (!cards_.empty()) { + for (const auto& card : cards_) { + if (!card.GetHidden()) { + sources.push_back(card); + } + } + return sources; + } else { + return {}; + } + } + case LocationType::kWaste: { + if (!cards_.empty()) { + int i = 0; + for (const auto& card : cards_) { + if (!card.GetHidden()) { + if (i % 3 == 0) { + sources.push_back(card); + } + ++i; + } else { + break; + } + } + return sources; + } else { + return {}; + } + } + default: { + SpielFatalError("Pile::Sources() called with unsupported type_"); + } + } +} + +std::vector Pile::Split(Card card) { + std::vector split_cards; + switch (type_) { + case LocationType::kFoundation: { + if (cards_.back() == card) { + split_cards = {cards_.back()}; + cards_.pop_back(); + } + break; + } + case LocationType::kTableau: { + if (!cards_.empty()) { + bool split_flag = false; + for (auto it = cards_.begin(); it != cards_.end();) { + if (*it == card) { + split_flag = true; + } + if (split_flag) { + split_cards.push_back(*it); + it = cards_.erase(it); + } else { + ++it; + } + } + } + break; + } + case LocationType::kWaste: { + if (!cards_.empty()) { + for (auto it = cards_.begin(); it != cards_.end();) { + if (*it == card) { + split_cards.push_back(*it); + it = cards_.erase(it); + break; + } else { + ++it; + } + } + } + break; + } + default: { + return {}; + } + } + return split_cards; +} + +void Pile::Reveal(Card card_to_reveal) { + SpielFatalError("Pile::Reveal() is not implemented."); +} + +void Pile::Extend(std::vector source_cards) { + for (auto& card : source_cards) { + card.SetLocation(type_); + cards_.push_back(card); + } +} + +std::string Pile::ToString(bool colored) const { + std::string result; + for (const auto& card : cards_) { + absl::StrAppend(&result, card.ToString(colored), " "); + } + return result; +} + +// Tableau Methods ============================================================= + +Tableau::Tableau(PileID id) + : Pile(LocationType::kTableau, id, SuitType::kNone) {} + +std::vector Tableau::Targets() const { + if (!cards_.empty()) { + auto back_card = cards_.back(); + if (!back_card.GetHidden()) { + return {cards_.back()}; + } else { + return {}; + } + } else { + // Empty tableau card (no rank or suit) + return { + Card(false, SuitType::kNone, RankType::kNone, LocationType::kTableau)}; + } +} + +std::vector Tableau::Sources() const { + std::vector sources; + sources.reserve(kMaxSourcesTableau); + if (!cards_.empty()) { + for (const auto& card : cards_) { + if (!card.GetHidden()) { + sources.push_back(card); + } + } + return sources; + } else { + return {}; + } +} + +std::vector Tableau::Split(Card card) { + std::vector split_cards; + if (!cards_.empty()) { + bool split_flag = false; + for (auto it = cards_.begin(); it != cards_.end();) { + if (*it == card) { + split_flag = true; + } + if (split_flag) { + split_cards.push_back(*it); + it = cards_.erase(it); + } else { + ++it; + } + } + } + return split_cards; +} + +void Tableau::Reveal(Card card_to_reveal) { + cards_.back().SetRank(card_to_reveal.GetRank()); + cards_.back().SetSuit(card_to_reveal.GetSuit()); + cards_.back().SetHidden(false); +} + +// Foundation Methods ========================================================== + +Foundation::Foundation(PileID id, SuitType suit) + : Pile(LocationType::kFoundation, id, suit) {} + +std::vector Foundation::Targets() const { + if (!cards_.empty()) { + return {cards_.back()}; + } else { + // Empty foundation card with the same suit as the pile + return {Card(false, suit_, RankType::kNone, LocationType::kFoundation)}; + } +} + +std::vector Foundation::Sources() const { + std::vector sources; + sources.reserve(kMaxSourcesFoundation); + if (!cards_.empty()) { + return {cards_.back()}; + } else { + return {}; + } +} + +std::vector Foundation::Split(Card card) { + std::vector split_cards; + if (cards_.back() == card) { + split_cards = {cards_.back()}; + cards_.pop_back(); + } + return split_cards; +} + +// Waste Methods =============================================================== + +Waste::Waste() : Pile(LocationType::kWaste, PileID::kWaste, SuitType::kNone) {} + +std::vector Waste::Targets() const { return {}; } + +std::vector Waste::Sources() const { + std::vector sources; + sources.reserve(kMaxSourcesWaste); + if (!cards_.empty()) { + int i = 0; + for (const auto& card : cards_) { + if (!card.GetHidden()) { + // Every 3rd card in the waste can be moved + if (i % 3 == 0) { + sources.push_back(card); + } + ++i; + } else { + break; + } + } + return sources; + } else { + return {}; + } +} + +std::vector Waste::Split(Card card) { + std::vector split_cards; + if (!cards_.empty()) { + for (auto it = cards_.begin(); it != cards_.end();) { + if (*it == card) { + split_cards.push_back(*it); + it = cards_.erase(it); + break; + } else { + ++it; + } + } + } + return split_cards; +} + +void Waste::Reveal(Card card_to_reveal) { + for (auto& card : cards_) { + if (card.GetHidden()) { + card.SetRank(card_to_reveal.GetRank()); + card.SetSuit(card_to_reveal.GetSuit()); + card.SetHidden(false); + break; + } + } +} + +// Move Methods ================================================================ + +Move::Move(Card target_card, Card source_card) { + target_ = target_card; + source_ = source_card; +} + +Move::Move(RankType target_rank, SuitType target_suit, RankType source_rank, + SuitType source_suit) { + target_ = Card(false, target_suit, target_rank, LocationType::kMissing); + source_ = Card(false, source_suit, source_rank, LocationType::kMissing); +} + +Move::Move(Action action) { + // `base` refers to the starting point that indices start from (e.g. if it's + // 7, and there's 3 cards in its group, their action ids will be 8, 9, 10). + // `residual` is just the difference between the id and the base. + + int residual; + int target_rank; + int source_rank; + int target_suit; + int source_suit; + + std::vector opposite_suits; + action -= kActionOffset; + + // The numbers used in the cases below are just used to divide action ids into + // groups (e.g. 1-132 are regular moves, 133-136 are the action ids of moves + // that move an ace to an empty foundation, etc.) + + if (action >= 1 && action <= 132) { + // Handles ordinary moves + target_rank = ((action - 1) / 3) % 11 + 2; + target_suit = ((action - 1) / 33) + 1; + residual = ((action - 1) % 3); + if (residual == 0) { + source_rank = target_rank + 1; + source_suit = target_suit; + } else { + opposite_suits = GetOppositeSuits(static_cast(target_suit)); + source_rank = target_rank - 1; + source_suit = static_cast(opposite_suits[residual - 1]); + } + } else if (action >= 133 && action <= 136) { + // Handles ace to empty foundation moves + target_rank = 0; + target_suit = action - 132; + source_rank = 1; + source_suit = target_suit; + } else if (action >= 137 && action <= 140) { + // Handles king to empty tableau moves + target_rank = 0; + target_suit = 0; + source_rank = 13; + source_suit = action - 136; + } else if (action >= 141 && action <= 144) { + // Handles moves with ace targets + target_rank = 1; + target_suit = action - 140; + source_rank = 2; + source_suit = target_suit; + } else if (action >= 145 && action <= 152) { + // Handles moves with king targets + target_rank = 13; + target_suit = (action - 143) / 2; + + residual = (action - 143) % 2; + opposite_suits = GetOppositeSuits(static_cast(target_suit)); + + source_rank = 12; + source_suit = static_cast(opposite_suits[residual]); + } else { + SpielFatalError("action provided does not correspond with a move"); + } + + target_ = Card(false, static_cast(target_suit), + static_cast(target_rank)); + source_ = Card(false, static_cast(source_suit), + static_cast(source_rank)); +} + +// Getters + +Card Move::GetTarget() const { return target_; } + +Card Move::GetSource() const { return source_; } + +// Other Methods + +Action Move::ActionId() const { + int target_rank = static_cast(target_.GetRank()); + int source_rank = static_cast(source_.GetRank()); + int target_suit = static_cast(target_.GetSuit()); + int source_suit = static_cast(source_.GetSuit()); + + int base; + int residual; + + // `base` refers to the starting point that indices start from (e.g. if it's + // 7, and there's 3 cards in its group, their action ids will be 8, 9, 10). + // `residual` is just the difference between the id and the base. + + switch (target_rank) { + case static_cast(RankType::kNone): { + switch (source_rank) { + case static_cast(RankType::kA): { + base = 132; + break; + } + case static_cast(RankType::kK): { + base = 136; + break; + } + default: { + base = -999; + break; + // SpielFatalError("source_.rank_ has an incorrect value"); + } + } + return base + source_suit + kActionOffset; + } + case static_cast(RankType::kA): { + base = 140; + return base + source_suit + kActionOffset; + } + case static_cast(RankType::kK): { + base = 144; + if (source_suit <= 2) { + residual = -1; + } else { + residual = 0; + } + return base + (2 * target_suit) + residual + kActionOffset; + } + default: { + base = (target_suit - 1) * 33 + (target_rank - 2) * 3; + if (target_suit == source_suit) { + residual = 1; + } else if (source_suit <= 2) { + residual = 2; + } else { + residual = 3; + } + return base + residual + kActionOffset; + } + } +} + +std::string Move::ToString(bool colored) const { + std::string result; + absl::StrAppend(&result, target_.ToString(colored), " ", kGlyphArrow, " ", + source_.ToString(colored)); + return result; +} + +bool Move::operator<(const Move& other_move) const { + int index_ = target_.GetIndex() * 100 + source_.GetIndex(); + int other_index = + other_move.target_.GetIndex() * 100 + other_move.source_.GetIndex(); + return index_ < other_index; +} + +// SolitaireState Methods ====================================================== + +SolitaireState::SolitaireState(std::shared_ptr game) + : State(game), waste_() { + // Extract parameters from `game` + auto parameters = game->GetParameters(); + is_colored_ = parameters.at("is_colored").bool_value(); + depth_limit_ = parameters.at("depth_limit").int_value(); + + // Create foundations_ + for (const auto& suit_ : kSuits) { + foundations_.emplace_back(kSuitToPile.at(suit_), suit_); + } + + // Create tableaus_ + for (int i = 1; i <= 7; i++) { + // Create `i` hidden_ cards_ + std::vector cards_to_add; + for (int j = 1; j <= i; j++) { + cards_to_add.emplace_back(true, SuitType::kHidden, RankType::kHidden, + LocationType::kTableau); + } + + // Create a new tableau and add cards_ + auto tableau = Tableau(kIntToPile.at(i)); + tableau.SetCards(cards_to_add); + + // Add resulting tableau to tableaus_ + tableaus_.push_back(tableau); + } + + // Create waste_ + for (int i = 1; i <= 24; i++) { + auto new_card = + Card(true, SuitType::kHidden, RankType::kHidden, LocationType::kWaste); + waste_.Extend({new_card}); + } +} + +Player SolitaireState::CurrentPlayer() const { + if (IsTerminal()) { + return kTerminalPlayerId; + } else if (IsChanceNode()) { + return kChancePlayerId; + } else { + return kPlayerId; + } +} + +std::unique_ptr SolitaireState::Clone() const { + return std::unique_ptr(new SolitaireState(*this)); +} + +bool SolitaireState::IsTerminal() const { return is_finished_; } + +bool SolitaireState::IsChanceNode() const { + for (const auto& tableau : tableaus_) { + if (!tableau.GetIsEmpty() && tableau.GetLastCard().GetHidden()) { + return true; + } + } + + if (!waste_.GetIsEmpty()) { + for (const auto& card : waste_.GetCards()) { + if (card.GetHidden()) { + return true; + } + } + } + + return false; +} + +std::string SolitaireState::ToString() const { + std::string result; + + absl::StrAppend(&result, "WASTE : ", waste_.ToString(is_colored_)); + + absl::StrAppend(&result, "\nFOUNDATIONS : "); + for (const auto& foundation : foundations_) { + absl::StrAppend(&result, foundation.Targets()[0].ToString(is_colored_), + " "); + } + + absl::StrAppend(&result, "\nTABLEAUS : "); + for (const auto& tableau : tableaus_) { + if (!tableau.GetIsEmpty()) { + absl::StrAppend(&result, "\n", tableau.ToString(is_colored_)); + } + } + + absl::StrAppend(&result, "\nTARGETS : "); + for (const auto& card : Targets()) { + absl::StrAppend(&result, card.ToString(is_colored_), " "); + } + + absl::StrAppend(&result, "\nSOURCES : "); + for (const auto& card : Sources()) { + absl::StrAppend(&result, card.ToString(is_colored_), " "); + } + + return result; +} + +std::string SolitaireState::ActionToString(Player player, + Action action_id) const { + if (action_id == kEnd) { + return "kEnd"; + } else if (action_id >= kRevealStart && action_id <= kRevealEnd) { + auto revealed_card = Card(static_cast(action_id)); + std::string result; + absl::StrAppend(&result, "Reveal", revealed_card.ToString(is_colored_)); + return result; + } else if (action_id >= kMoveStart && action_id <= kMoveEnd) { + auto move = Move(action_id); + return move.ToString(is_colored_); + } else { + return "Missing Action"; + } +} + +std::string SolitaireState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string SolitaireState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void SolitaireState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + SPIEL_CHECK_EQ(values.size(), game_->ObservationTensorSize()); + std::fill(values.begin(), values.end(), 0.0); + auto ptr = values.begin(); + + for (const auto& foundation : foundations_) { + if (foundation.GetIsEmpty()) { + ptr[0] = 1; + } else { + auto last_rank = foundation.GetLastCard().GetRank(); + if (last_rank >= RankType::kA && last_rank <= RankType::kK) { + ptr[static_cast(last_rank)] = 1; + } + } + ptr += kFoundationTensorLength; + } + + for (const auto& tableau : tableaus_) { + if (tableau.GetIsEmpty()) { + ptr[7] = 1.0; + } else { + int num_hidden_cards = 0; + for (const auto& card : tableau.GetCards()) { + if (card.GetHidden() && num_hidden_cards <= kMaxHiddenCard) { + ptr[num_hidden_cards] = 1.0; + ++num_hidden_cards; + } else { + auto tensor_index = card.GetIndex() + kMaxHiddenCard; + ptr[tensor_index] = 1.0; + } + } + } + ptr += kTableauTensorLength; + } + + for (auto& card : waste_.GetCards()) { + if (card.GetHidden()) { + ptr[0] = 1.0; + } else { + auto tensor_index = card.GetIndex(); + ptr[tensor_index] = 1.0; + } + ptr += kWasteTensorLength; + } + + SPIEL_CHECK_LE(ptr, values.end()); +} + +void SolitaireState::DoApplyAction(Action action) { + if (action == kEnd) { + is_finished_ = true; + current_rewards_ = 0; + } else if (action >= kRevealStart && action <= kRevealEnd) { + auto revealed_card = Card(static_cast(action)); + bool found_card = false; + + for (auto& tableau : tableaus_) { + if (!tableau.GetIsEmpty() && tableau.GetLastCard().GetHidden()) { + tableau.Reveal(revealed_card); + card_map_.insert_or_assign(tableau.GetLastCard(), tableau.GetID()); + found_card = true; + break; + } + } + if (!found_card && !waste_.GetIsEmpty()) { + waste_.Reveal(revealed_card); + card_map_.insert_or_assign(revealed_card, waste_.GetID()); + } + revealed_cards_.push_back(action); + } else if (action >= kMoveStart && action <= kMoveEnd) { + Move selected_move = Move(action); + is_reversible_ = IsReversible(selected_move.GetSource(), + GetPile(selected_move.GetSource())); + + if (is_reversible_) { + std::string current_observation = ObservationString(0); + previous_states_.insert(hasher(current_observation)); + } else { + previous_states_.clear(); + } + + MoveCards(selected_move); + current_returns_ += current_rewards_; + } + + ++current_depth_; + if (current_depth_ >= depth_limit_) { + is_finished_ = true; + } +} + +std::vector SolitaireState::Returns() const { + // Returns the sum of rewards up to and including the most recent state + // transition. + return {current_returns_}; +} + +std::vector SolitaireState::Rewards() const { + // Should be the reward for the action that created this state, not the action + // applied to this state + return {current_rewards_}; +} + +std::vector SolitaireState::LegalActions() const { + if (IsTerminal()) { + return {}; + } else if (IsChanceNode()) { + return LegalChanceOutcomes(); + } else { + std::vector legal_actions; + + if (is_reversible_) { + // If the state is reversible, we need to check each move to see if it is + // too. + for (const auto& move : CandidateMoves()) { + if (IsReversible(move.GetSource(), GetPile(move.GetSource()))) { + auto action_id = move.ActionId(); + auto child = Child(action_id); + + if (child->CurrentPlayer() == kChancePlayerId) { + legal_actions.push_back(action_id); + } else { + auto child_hash = hasher(child->ObservationString()); + if (previous_states_.count(child_hash) == 0) { + legal_actions.push_back(action_id); + } + } + } else { + legal_actions.push_back(move.ActionId()); + } + } + } else { + // If the state isn't reversible, all candidate moves are legal + for (const auto& move : CandidateMoves()) { + legal_actions.push_back(move.ActionId()); + } + } + + if (!legal_actions.empty()) { + std::sort(legal_actions.begin(), legal_actions.end()); + } else { + legal_actions.push_back(kEnd); + } + + return legal_actions; + } +} + +std::vector> SolitaireState::ChanceOutcomes() const { + std::vector> outcomes; + const double p = 1.0 / (52 - revealed_cards_.size()); + + for (int i = 1; i <= 52; i++) { + if (std::find(revealed_cards_.begin(), revealed_cards_.end(), i) == + revealed_cards_.end()) { + outcomes.emplace_back(i, p); + } + } + + return outcomes; +} + +// Other Methods + +std::vector SolitaireState::Targets( + const absl::optional& location) const { + LocationType loc = location.value_or(LocationType::kMissing); + std::vector targets; + + if (loc == LocationType::kTableau || loc == LocationType::kMissing) { + for (const auto& tableau : tableaus_) { + std::vector current_targets = tableau.Targets(); + targets.insert(targets.end(), current_targets.begin(), + current_targets.end()); + } + } + + if (loc == LocationType::kFoundation || loc == LocationType::kMissing) { + for (const auto& foundation : foundations_) { + std::vector current_targets = foundation.Targets(); + targets.insert(targets.end(), current_targets.begin(), + current_targets.end()); + } + } + + return targets; +} + +std::vector SolitaireState::Sources( + const absl::optional& location) const { + LocationType loc = location.value_or(LocationType::kMissing); + std::vector sources; + + if (loc == LocationType::kTableau || loc == LocationType::kMissing) { + for (const auto& tableau : tableaus_) { + std::vector current_sources = tableau.Sources(); + sources.insert(sources.end(), current_sources.begin(), + current_sources.end()); + } + } + + if (loc == LocationType::kFoundation || loc == LocationType::kMissing) { + for (const auto& foundation : foundations_) { + std::vector current_sources = foundation.Sources(); + sources.insert(sources.end(), current_sources.begin(), + current_sources.end()); + } + } + + if (loc == LocationType::kWaste || loc == LocationType::kMissing) { + std::vector current_sources = waste_.Sources(); + sources.insert(sources.end(), current_sources.begin(), + current_sources.end()); + } + + return sources; +} + +const Pile* SolitaireState::GetPile(const Card& card) const { + PileID pile_id = PileID::kMissing; + + if (card.GetRank() == RankType::kNone) { + if (card.GetSuit() == SuitType::kNone) { + for (auto& tableau : tableaus_) { + if (tableau.GetIsEmpty()) { + return &tableau; + } + } + } else if (card.GetSuit() != SuitType::kHidden) { + for (auto& foundation : foundations_) { + if (foundation.GetSuit() == card.GetSuit()) { + return &foundation; + } + } + } else { + SpielFatalError("The pile containing the card wasn't found"); + } + } else { + pile_id = card_map_.at(card); + } + + if (pile_id == PileID::kWaste) { + return &waste_; + } else if (pile_id >= PileID::kSpades && pile_id <= PileID::kDiamonds) { + return &foundations_.at(static_cast(pile_id) - 1); + } else if (pile_id >= PileID::k1stTableau && pile_id <= PileID::k7thTableau) { + return &tableaus_.at(static_cast(pile_id) - 5); + } else { + SpielFatalError("The pile containing the card wasn't found"); + } +} + +Pile* SolitaireState::GetPile(const Card& card) { + PileID pile_id = PileID::kMissing; + + if (card.GetRank() == RankType::kNone) { + if (card.GetSuit() == SuitType::kNone) { + for (auto& tableau : tableaus_) { + if (tableau.GetIsEmpty()) { + return &tableau; + } + } + } else if (card.GetSuit() != SuitType::kHidden) { + for (auto& foundation : foundations_) { + if (foundation.GetSuit() == card.GetSuit()) { + return &foundation; + } + } + } else { + SpielFatalError("The pile containing the card wasn't found"); + } + } else { + pile_id = card_map_.at(card); + } + + if (pile_id == PileID::kWaste) { + return &waste_; + } else if (pile_id >= PileID::kSpades && pile_id <= PileID::kDiamonds) { + return &foundations_.at(static_cast(pile_id) - 1); + } else if (pile_id >= PileID::k1stTableau && pile_id <= PileID::k7thTableau) { + return &tableaus_.at(static_cast(pile_id) - 5); + } else { + SpielFatalError("The pile containing the card wasn't found"); + } +} + +std::vector SolitaireState::CandidateMoves() const { + std::vector candidate_moves; + std::vector targets = Targets(); + std::vector sources = Sources(); + bool found_empty_tableau = false; + + for (auto& target : targets) { + if (target.GetSuit() == SuitType::kNone && + target.GetRank() == RankType::kNone) { + if (found_empty_tableau) { + continue; + } else { + found_empty_tableau = true; + } + } + for (auto& source : target.LegalChildren()) { + if (std::find(sources.begin(), sources.end(), source) != sources.end()) { + auto* source_pile = GetPile(source); + if (target.GetLocation() == LocationType::kFoundation && + source_pile->GetType() == LocationType::kTableau) { + if (source_pile->GetLastCard() == source) { + candidate_moves.emplace_back(target, source); + } + } else if (source.GetRank() == RankType::kK && + target.GetSuit() == SuitType::kNone && + target.GetRank() == RankType::kNone) { + // Check is source is not a bottom + if (source_pile->GetType() == LocationType::kWaste || + (source_pile->GetType() == LocationType::kTableau && + !(source_pile->GetFirstCard() == source))) { + candidate_moves.emplace_back(target, source); + } + } else { + candidate_moves.emplace_back(target, source); + } + } else { + continue; + } + } + } + + return candidate_moves; +} + +void SolitaireState::MoveCards(const Move& move) { + Card target = move.GetTarget(); + Card source = move.GetSource(); + + auto* target_pile = GetPile(target); + auto* source_pile = GetPile(source); + + std::vector split_cards = source_pile->Split(source); + for (auto& card : split_cards) { + card_map_.insert_or_assign(card, target_pile->GetID()); + target_pile->Extend({card}); + } + + // Calculate rewards/returns for this move in the current state + double move_reward = 0.0; + + // Reward for moving a card to or from a foundation + if (target_pile->GetType() == LocationType::kFoundation) { + // Adds points for moving TO a foundation + move_reward += kFoundationPoints.at(source.GetRank()); + } else if (source_pile->GetType() == LocationType::kFoundation) { + // Subtracts points for moving AWAY from a foundation + move_reward -= kFoundationPoints.at(source.GetRank()); + } + + // Reward for revealing a hidden_ card + if (source_pile->GetType() == LocationType::kTableau && + !source_pile->GetIsEmpty() && source_pile->GetLastCard().GetHidden()) { + move_reward += 20.0; + } + + // Reward for moving a card from the waste_ + if (source_pile->GetType() == LocationType::kWaste) { + move_reward += 20.0; + } + + // Add current rewards to current returns + current_rewards_ = move_reward; +} + +bool SolitaireState::IsReversible(const Card& source, + const Pile* source_pile) const { + switch (source.GetLocation()) { + case LocationType::kWaste: { + return false; + } + case LocationType::kFoundation: { + return true; + } + case LocationType::kTableau: { + // Move is irreversible if its source is a bottom card or over a hidden + // card. Basically if it's the first non-hidden_ card in the pile/tableau. + auto it = std::find_if(source_pile->GetCards().begin(), + source_pile->GetCards().end(), + [](const Card& card) { return card.GetHidden(); }); + + return !(*it == source); + } + default: { + // Returns false if the source card is not in the waste, foundations, + // or tableaus + return false; + } + } +} + +// SolitaireGame Methods ======================================================= + +SolitaireGame::SolitaireGame(const GameParameters& params) + : Game(kGameType, params), + num_players_(ParameterValue("players")), + depth_limit_(ParameterValue("depth_limit")), + is_colored_(ParameterValue("is_colored")) {} + +int SolitaireGame::NumDistinctActions() const { + /* 52 Reveal Moves (one for each ordinary card) + * 52 Foundation Moves (one for every ordinary card) + * 96 Tableau Moves (two for every ordinary card except aces) + * 4 King to Empty Tableau (one for every king) + * 1 End Game Move */ + return 205; +} + +int SolitaireGame::MaxChanceOutcomes() const { return kRevealEnd + 1; } + +int SolitaireGame::MaxGameLength() const { return depth_limit_; } + +int SolitaireGame::NumPlayers() const { return 1; } + +double SolitaireGame::MinUtility() const { + /* Returns start at zero and the only negative rewards come from undoing an + * action. Undoing an action just takes away the reward that was gained from + * the action, so utility can never go below 0. */ + return 0.0; +} + +double SolitaireGame::MaxUtility() const { + /* Waste (24 * 20 = 480) + 24 cards are in the waste initially. 20 points are rewarded for every one + that is moved from the waste. Tableau (21 * 20 = 420) 21 cards are + hidden_ in the tableaus_ initially. 20 points are rewarded for every one + that is revealed. Foundation (4 * (100 + 90 + 80 + 70 + 60 + 50 + 40 + 30 + + 20 + 10 + + 10 + 10 + 10) = 4 * 580 = 2,320) 0 cards are in the foundations + initially. A varying number of points, based on the cards rank, are + awarded when the card is moved to the foundation. Each complete suit in + the foundation is worth 580 points. `kFoundationPoints` in `solitaire.h` + outlines how much each rank is worth. */ + return 3220.0; +} + +std::vector SolitaireGame::ObservationTensorShape() const { + /* Waste (24 * 53 = 1,272) + 24 locations and each location_ is a 53 element vector (52 normal cards + + 1 hidden) Tableau (7 * 59 = 413) Each tableau is represented as a 59 + element vector (6 hidden_ cards + 1 empty tableau + 52 normal cards_) + Foundation (4 * 14 = 56) Each foundation is represented as a 14 element + vector (13 ranks + 1 empty foundation) Total Length = 1,272 + 413 + 56 = + 1,741 */ + return {1741}; +} + +std::unique_ptr SolitaireGame::NewInitialState() const { + return std::unique_ptr(new SolitaireState(shared_from_this())); +} + +} // namespace open_spiel::solitaire diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/solitaire/solitaire.h b/scenarios/bargaining/open_spiel/open_spiel/games/solitaire/solitaire.h new file mode 100644 index 0000000..588ac56 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/solitaire/solitaire.h @@ -0,0 +1,315 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_SOLITAIRE_H +#define OPEN_SPIEL_GAMES_SOLITAIRE_H + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// An implementation of klondike solitaire: +// https://en.wikipedia.org/wiki/Klondike_(solitaire) More specifically, it is +// K+ solitaire, which allows the player to play any card from the deck/waste +// that would normally become playable after some number of draws in standard +// klondike solitaire. For a more in-depth description of K+ solitaire, see +// http://web.engr.oregonstate.edu/~afern/papers/solitaire.pdf. This +// implementation also gives rewards at intermediate states like most electronic +// versions of solitaire do, rather than only at terminal states. + +namespace open_spiel::solitaire { + +// Default Game Parameters ===================================================== + +inline constexpr int kDefaultPlayers = 1; +inline constexpr int kDefaultDepthLimit = 150; +inline constexpr bool kDefaultIsColored = false; + +// Constants =================================================================== + +inline constexpr int kHiddenCard = 99; + +// Enumerations ================================================================ + +enum class SuitType { + kNone = 0, + kSpades, + kHearts, + kClubs, + kDiamonds, + kHidden, +}; + +enum class RankType { + kNone = 0, + kA, + k2, + k3, + k4, + k5, + k6, + k7, + k8, + k9, + kT, + kJ, + kQ, + kK, + kHidden, +}; + +enum class LocationType { + kDeck = 0, + kWaste = 1, + kFoundation = 2, + kTableau = 3, + kMissing = 4, +}; + +enum class PileID { + kWaste = 0, + kSpades = 1, + kHearts = 2, + kClubs = 3, + kDiamonds = 4, + k1stTableau = 5, + k2ndTableau = 6, + k3rdTableau = 7, + k4thTableau = 8, + k5thTableau = 9, + k6thTableau = 10, + k7thTableau = 11, + kMissing = 12 +}; + +// Support Classes ============================================================= + +class Card { + public: + // Constructors + explicit Card(bool hidden = false, SuitType suit = SuitType::kHidden, + RankType rank = RankType::kHidden, + LocationType location = LocationType::kMissing); + explicit Card(int index, bool hidden = false, + LocationType location = LocationType::kMissing); + + // Getters + RankType GetRank() const; + SuitType GetSuit() const; + LocationType GetLocation() const; + bool GetHidden() const; + int GetIndex() const; + + // Setters + void SetRank(RankType new_rank); + void SetSuit(SuitType new_suit); + void SetLocation(LocationType new_location); + void SetHidden(bool new_hidden); + + // Operators + bool operator==(const Card& other_card) const; + bool operator<(const Card& other_card) const; + + // Other Methods + std::string ToString(bool colored = true) const; + std::vector LegalChildren() const; + + private: + RankType rank_ = RankType::kHidden; // Indicates the rank of the card + SuitType suit_ = SuitType::kHidden; // Indicates the suit of the card + LocationType location_ = + LocationType::kMissing; // Indicates the type of pile the card is in + bool hidden_ = false; // Indicates whether the card is hidden or not + int index_ = kHiddenCard; // Identifies the card with an integer +}; + +class Pile { + public: + // Constructor + Pile(LocationType type, PileID id, SuitType suit = SuitType::kNone); + + // Destructor + virtual ~Pile() = default; + + // Getters/Setters + bool GetIsEmpty() const; + SuitType GetSuit() const; + LocationType GetType() const; + PileID GetID() const; + Card GetFirstCard() const; + Card GetLastCard() const; + std::vector GetCards() const; + void SetCards(std::vector new_cards); + + // Other Methods + virtual std::vector Sources() const; + virtual std::vector Targets() const; + virtual std::vector Split(Card card); + virtual void Reveal(Card card_to_reveal); + void Extend(std::vector source_cards); + std::string ToString(bool colored = true) const; + + protected: + std::vector cards_; + const LocationType type_; + const SuitType suit_; + const PileID id_; + const int max_size_; +}; + +class Tableau : public Pile { + public: + // Constructor + explicit Tableau(PileID id); + + // Other Methods + std::vector Sources() const override; + std::vector Targets() const override; + std::vector Split(Card card) override; + void Reveal(Card card_to_reveal) override; +}; + +class Foundation : public Pile { + public: + // Constructor + Foundation(PileID id, SuitType suit); + + // Other Methods + std::vector Sources() const override; + std::vector Targets() const override; + std::vector Split(Card card) override; +}; + +class Waste : public Pile { + public: + // Constructor + Waste(); + + // Other Methods + std::vector Sources() const override; + std::vector Targets() const override; + std::vector Split(Card card) override; + void Reveal(Card card_to_reveal) override; +}; + +class Move { + public: + // Constructors + Move(Card target_card, Card source_card); + Move(RankType target_rank, SuitType target_suit, RankType source_rank, + SuitType source_suit); + explicit Move(Action action); + + // Getters + Card GetTarget() const; + Card GetSource() const; + + // Other Methods + // =========================================================================== + std::string ToString(bool colored = true) const; + bool operator<(const Move& other_move) const; + Action ActionId() const; + + private: + Card target_; + Card source_; +}; + +class SolitaireGame : public Game { + public: + // Constructor + explicit SolitaireGame(const GameParameters& params); + + // Overridden Methods + int NumDistinctActions() const override; + int MaxGameLength() const override; + // TODO: verify whether this bound is tight and/or tighten it. + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + int MaxChanceOutcomes() const override; + int NumPlayers() const override; + double MinUtility() const override; + double MaxUtility() const override; + + std::vector ObservationTensorShape() const override; + std::unique_ptr NewInitialState() const override; + + private: + int num_players_; + int depth_limit_; + bool is_colored_; +}; + +class SolitaireState : public State { + public: + // Constructors + explicit SolitaireState(std::shared_ptr game); + + // Overridden Methods + Player CurrentPlayer() const override; + std::unique_ptr Clone() const override; + bool IsTerminal() const override; + bool IsChanceNode() const override; + std::string ToString() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + void DoApplyAction(Action action) override; + std::vector Returns() const override; + std::vector Rewards() const override; + std::vector LegalActions() const override; + std::vector> ChanceOutcomes() const override; + + // Other Methods + std::vector Targets(const absl::optional& location = + LocationType::kMissing) const; + std::vector Sources(const absl::optional& location = + LocationType::kMissing) const; + std::vector CandidateMoves() const; + Pile* GetPile(const Card& card); + const Pile* GetPile(const Card& card) const; + void MoveCards(const Move& move); + bool IsReversible(const Card& source, const Pile* source_pile) const; + + private: + Waste waste_; + std::vector foundations_; + std::vector tableaus_; + std::vector revealed_cards_; + + bool is_finished_ = false; + bool is_reversible_ = false; + int current_depth_ = 0; + + std::set previous_states_ = {}; + std::map card_map_; + + double current_returns_ = 0.0; + double current_rewards_ = 0.0; + + // Parameters + int depth_limit_ = kDefaultDepthLimit; + bool is_colored_ = kDefaultIsColored; +}; + +} // namespace open_spiel::solitaire + +#endif // OPEN_SPIEL_GAMES_SOLITAIRE_H diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/solitaire/solitaire_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/solitaire/solitaire_test.cc new file mode 100644 index 0000000..8b34259 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/solitaire/solitaire_test.cc @@ -0,0 +1,94 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/solitaire/solitaire.h" + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel::solitaire { +namespace { + +namespace testing = open_spiel::testing; + +void BasicSolitaireTests() { + testing::LoadGameTest("solitaire"); + testing::RandomSimTest(*LoadGame("solitaire"), 100); +} + +void TestMoveActionId() { + std::vector suit_order = {SuitType::kSpades, SuitType::kHearts, + SuitType::kClubs, SuitType::kDiamonds}; + std::vector rank_order = { + RankType::k2, RankType::k3, RankType::k4, RankType::k5, + RankType::k6, RankType::k7, RankType::k8, RankType::k9, + RankType::kT, RankType::kJ, RankType::kQ, + }; + std::vector location_order = {LocationType::kFoundation, + LocationType::kTableau}; + + std::vector valid_moves = {}; + + // Adds cards for normal moves (excludes ace and king targets) + for (const auto &suit : suit_order) { + for (const auto &rank : rank_order) { + for (const auto &location : location_order) { + auto target_card = Card(false, suit, rank, location); + for (const auto &child : target_card.LegalChildren()) { + valid_moves.emplace_back(target_card, child); + } + } + } + } + + // Adds ace-to-empty-foundation moves + for (const auto &suit : suit_order) { + valid_moves.emplace_back(RankType::kNone, suit, RankType::kA, suit); + } + + // Adds king-to-empty-tableau moves + for (const auto &suit : suit_order) { + valid_moves.emplace_back(RankType::kNone, SuitType::kNone, RankType::kK, + suit); + } + + // Adds 2-to-ace moves + for (const auto &suit : suit_order) { + valid_moves.emplace_back(RankType::kA, suit, RankType::k2, suit); + } + + // Adds queen-to-king moves + for (const auto &suit : suit_order) { + auto target_card = Card(false, suit, RankType::kK, LocationType::kTableau); + for (const auto &child : target_card.LegalChildren()) { + valid_moves.emplace_back(target_card, child); + } + } + + // Checks that the action id of a move can be converted back into the original + // move + for (const auto &move : valid_moves) { + std::cout << move.ToString() << " == " << Move(move.ActionId()).ToString() + << std::endl; + SPIEL_CHECK_EQ(move.ToString(), Move(move.ActionId()).ToString()); + } +} + +} // namespace +} // namespace open_spiel::solitaire + +int main(int argc, char **argv) { + open_spiel::solitaire::TestMoveActionId(); + open_spiel::solitaire::BasicSolitaireTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/spades/spades.cc b/scenarios/bargaining/open_spiel/open_spiel/games/spades/spades.cc new file mode 100644 index 0000000..d019b2f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/spades/spades.cc @@ -0,0 +1,619 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/spades/spades.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/spades/spades_scoring.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace spades { +namespace { + +enum Seat { kNorth, kEast, kSouth, kWest }; + +const GameType kGameType{ + /*short_name=*/"spades", + /*long_name=*/"Partnership Spades", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/kNumPlayers, + /*min_num_players=*/kNumPlayers, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + // Whether to end the game early if score gets too low + {"use_mercy_rule", GameParameter(true)}, + // If using mercy rule, the threshold of negative points + {"mercy_threshold", GameParameter(-350)}, + // Amount of points needed to win the game + {"win_threshold", GameParameter(500)}, + // The amount to add to reward return for winning + // (Will subtract for losing by mercy rule) + {"win_or_loss_bonus", GameParameter(200)}, + // Number of played tricks in observation tensor + {"num_tricks", GameParameter(2)}, + }}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new SpadesGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +// Cards are represented suit * number of cards per suit + rank +Suit CardSuit(int card) { return Suit(card / 13); } +int CardRank(int card) { return card % 13; } +int Card(Suit suit, int rank) { + return static_cast(suit) * kNumCardsPerSuit + rank; +} + +constexpr char kRankChar[] = "23456789TJQKA"; +constexpr char kSuitChar[] = "CDHS"; + +std::string CardString(int card) { + return {kSuitChar[static_cast(CardSuit(card))], + kRankChar[CardRank(card)]}; +} + +std::string BidString(int bid) { + if (bid == 0) return "Nil"; + return std::to_string(bid); +} + +// There are two partnerships: players 0 and 2 versus players 1 and 3. +// We call 0 and 2 partnership 0, and 1 and 3 partnership 1. +int Partnership(Player player) { return player & 1; } +int Partner(Player player) { return (player + 2) % 4; } +} // namespace + +SpadesGame::SpadesGame(const GameParameters& params) + : Game(kGameType, params) {} + +SpadesState::SpadesState(std::shared_ptr game, bool use_mercy_rule, + int mercy_threshold, int win_threshold, + int win_or_loss_bonus, int num_tricks) + : State(game), + use_mercy_rule_(use_mercy_rule), + mercy_threshold_(mercy_threshold), + win_threshold_(win_threshold), + win_or_loss_bonus_(win_or_loss_bonus), + num_tricks_(num_tricks) { + possible_contracts_.fill(true); +} + +std::string SpadesState::ActionToString(Player player, Action action) const { + return (action < kBiddingActionBase) ? CardString(action) + : BidString(action - kBiddingActionBase); +} + +std::string SpadesState::ToString() const { + std::string rv = absl::StrCat(FormatDeal()); + if (history_.size() > kNumCards) + absl::StrAppend(&rv, FormatAuction(/*trailing_query=*/false)); + if (num_cards_played_ > 0) absl::StrAppend(&rv, FormatPlay()); + if (IsTerminal()) absl::StrAppend(&rv, FormatResult()); + return rv; +} + +std::array FormatHand( + int player, bool mark_voids, + const std::array, kNumCards>& deal) { + std::array cards; + for (int suit = 0; suit < kNumSuits; ++suit) { + cards[suit].push_back(kSuitChar[suit]); + cards[suit].push_back(' '); + bool is_void = true; + for (int rank = kNumCardsPerSuit - 1; rank >= 0; --rank) { + if (player == deal[Card(Suit(suit), rank)]) { + cards[suit].push_back(kRankChar[rank]); + is_void = false; + } + } + if (is_void && mark_voids) absl::StrAppend(&cards[suit], "none"); + } + return cards; +} + +std::string SpadesState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + if (IsTerminal()) return ToString(); + std::string rv = ""; + auto cards = FormatHand(player, /*mark_voids=*/true, holder_); + for (int suit = kNumSuits - 1; suit >= 0; --suit) + absl::StrAppend(&rv, cards[suit], "\n"); + if (history_.size() > kNumCards) + absl::StrAppend( + &rv, FormatAuction(/*trailing_query=*/phase_ == Phase::kAuction && + player == CurrentPlayer())); + if (num_cards_played_ > 0) absl::StrAppend(&rv, FormatPlay()); + return rv; +} + +std::array, kNumCards> SpadesState::OriginalDeal() + const { + SPIEL_CHECK_GE(history_.size(), kNumCards); + std::array, kNumCards> deal; + for (int i = 0; i < kNumCards; ++i) + deal[history_[i].action] = (i % kNumPlayers); + return deal; +} + +std::string SpadesState::FormatDeal() const { + std::array, kNumPlayers> cards; + if (IsTerminal()) { + // Include all cards in the terminal state to make reviewing the deal easier + auto deal = OriginalDeal(); + for (auto player : {kNorth, kEast, kSouth, kWest}) { + cards[player] = FormatHand(player, /*mark_voids=*/false, deal); + } + } else { + for (auto player : {kNorth, kEast, kSouth, kWest}) { + cards[player] = FormatHand(player, /*mark_voids=*/false, holder_); + } + } + constexpr int kColumnWidth = 8; + std::string padding(kColumnWidth, ' '); + std::string rv; + for (int suit = kNumSuits - 1; suit >= 0; --suit) + absl::StrAppend(&rv, padding, cards[kNorth][suit], "\n"); + for (int suit = kNumSuits - 1; suit >= 0; --suit) + absl::StrAppend(&rv, absl::StrFormat("%-8s", cards[kWest][suit]), padding, + cards[kEast][suit], "\n"); + for (int suit = kNumSuits - 1; suit >= 0; --suit) + absl::StrAppend(&rv, padding, cards[kSouth][suit], "\n"); + return rv; +} + +std::string SpadesState::FormatAuction(bool trailing_query) const { + SPIEL_CHECK_GT(history_.size(), kNumCards); + std::string rv = "\nNorth East South West "; + for (int i = kNumCards; i < history_.size() - num_cards_played_; ++i) { + if (i % kNumPlayers == 0) rv.push_back('\n'); + absl::StrAppend( + &rv, absl::StrFormat( + "%-6s", BidString(history_[i].action - kBiddingActionBase))); + } + if (trailing_query) { + if ((history_.size() - num_cards_played_) % kNumPlayers == kNumPlayers - 1) + rv.push_back('\n'); + rv.push_back('?'); + } + return rv; +} + +std::string SpadesState::FormatPlay() const { + SPIEL_CHECK_GT(num_cards_played_, 0); + std::string rv = "\n\nN E S W N E S"; + Trick trick{kInvalidPlayer, 0}; + Player player = kFirstPlayer; + for (int i = 0; i < num_cards_played_; ++i) { + if (i % kNumPlayers == 0) { + if (i > 0) player = trick.Winner(); + absl::StrAppend(&rv, "\n", std::string(3 * player, ' ')); + } else { + player = (1 + player) % kNumPlayers; + } + const int card = history_[history_.size() - num_cards_played_ + i].action; + if (i % kNumPlayers == 0) { + trick = Trick(player, card); + } else { + trick.Play(player, card); + } + absl::StrAppend(&rv, CardString(card), " "); + } + absl::StrAppend(&rv, "\n\nTricks taken:\n\n", "North East South West\n", + absl::StrFormat("%-6d", num_player_tricks_[0]), + absl::StrFormat("%-6d", num_player_tricks_[1]), + absl::StrFormat("%-6d", num_player_tricks_[2]), + absl::StrFormat("%-6d", num_player_tricks_[3]), "\n"); + return rv; +} + +std::string SpadesState::FormatResult() const { + SPIEL_CHECK_TRUE(IsTerminal()); + std::string rv; + absl::StrAppend(&rv, "\nScore: N/S ", returns_[kNorth], " E/W ", + returns_[kEast]); + return rv; +} + +void SpadesState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_EQ(values.size(), game_->ObservationTensorSize()); + WriteObservationTensor(player, values); +} + +void SpadesState::WriteObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::fill(values.begin(), values.end(), 0.0); + if (phase_ == Phase::kDeal) return; + auto ptr = values.begin(); + + // Mark bidding or playing phase + ptr[static_cast(phase_) - 1] = 1; + ptr += kPhaseInfoSize; + + if (num_cards_played_ > 0) { + // Observation for play phase + + // Contracts + for (int i = 0; i < kNumPlayers; i++) { + ptr[contracts_[i]] = 1; + ptr += kNumBids; + } + + // Our remaining cards. + for (int i = 0; i < kNumCards; ++i) + if (holder_[i] == player) ptr[i] = 1; + ptr += kNumCards; + + // Indexing into history for recent tricks. + int current_trick = num_cards_played_ / kNumPlayers; + int this_trick_cards_played = num_cards_played_ % kNumPlayers; + int this_trick_start = history_.size() - this_trick_cards_played; + + // Current trick + if (phase_ != Phase::kGameOver) { + int leader = tricks_[current_trick].Leader(); + for (int i = 0; i < this_trick_cards_played; ++i) { + int card = history_[this_trick_start + i].action; + int relative_player = (i + leader + kNumPlayers - player) % kNumPlayers; + ptr[relative_player * kNumCards + card] = 1; + } + } + + ptr += kNumPlayers * kNumCards; + + // Previous tricks + for (int j = current_trick - 1; + j >= std::max(0, current_trick - num_tricks_ + 1); --j) { + int leader = tricks_[j].Leader(); + for (int i = 0; i < kNumPlayers; ++i) { + int card = + history_[this_trick_start - kNumPlayers * (current_trick - j) + i] + .action; + int relative_player = (i + leader + kNumPlayers - player) % kNumPlayers; + ptr[relative_player * kNumCards + card] = 1; + } + ptr += kNumPlayers * kNumCards; + } + + // Move pointer for future tricks to have a fixed size tensor + if (num_tricks_ > current_trick + 1) { + ptr += kNumPlayers * kNumCards * (num_tricks_ - current_trick - 1); + } + + // Number of tricks taken by each side. + for (int i = 0; i < kNumPlayers; i++) { + ptr[num_player_tricks_[i]] = 1; + ptr += kNumTricks; + } + + int kPlayTensorSize = SpadesGame::GetPlayTensorSize(num_tricks_); + SPIEL_CHECK_EQ(std::distance(values.begin(), ptr), + kPlayTensorSize + kPhaseInfoSize); + SPIEL_CHECK_LE(std::distance(values.begin(), ptr), values.size()); + } else { + // Observation for auction + + // Bids made so far + for (int i = 0; i < kNumPlayers; i++) { + // If player has bid, mark it + if (contracts_[i] >= 0) { + ptr[contracts_[i]] = 1; + } + ptr += kNumBids; + } + + // Our cards. + for (int i = 0; i < kNumCards; ++i) + if (holder_[i] == player) ptr[i] = 1; + ptr += kNumCards; + SPIEL_CHECK_EQ(std::distance(values.begin(), ptr), + kAuctionTensorSize + kPhaseInfoSize); + SPIEL_CHECK_LE(std::distance(values.begin(), ptr), values.size()); + } +} + +std::vector SpadesState::PublicObservationTensor() const { + SPIEL_CHECK_TRUE(phase_ == Phase::kAuction); + std::vector rv(kPublicInfoTensorSize); + auto ptr = rv.begin(); + // Bids made so far + for (int i = 0; i < kNumPlayers; i++) { + // If player has bid, mark it + if (contracts_[i] >= 0) { + ptr[contracts_[i]] = 1; + } + ptr += kNumBids; + } + return rv; +} + +std::vector SpadesState::PrivateObservationTensor(Player player) const { + std::vector rv(kNumCards); + for (int i = 0; i < kNumCards; ++i) + if (holder_[i] == player) rv[i] = 1; + return rv; +} + +std::vector SpadesState::LegalActions() const { + switch (phase_) { + case Phase::kDeal: + return DealLegalActions(); + case Phase::kAuction: + return BiddingLegalActions(); + case Phase::kPlay: + return PlayLegalActions(); + default: + return {}; + } +} + +std::vector SpadesState::DealLegalActions() const { + std::vector legal_actions; + legal_actions.reserve(kNumCards - history_.size()); + for (int i = 0; i < kNumCards; ++i) { + if (!holder_[i].has_value()) legal_actions.push_back(i); + } + return legal_actions; +} + +std::vector SpadesState::BiddingLegalActions() const { + std::vector legal_actions; + legal_actions.reserve(kNumBids); + int partner_bid = contracts_[Partner(current_player_)]; + + if (partner_bid >= 0) { + // Combined bid between partners cannot be more than 13 + for (int bid = 0; bid < kNumBids - partner_bid; ++bid) { + legal_actions.push_back(kBiddingActionBase + bid); + } + } else { + for (int bid = 0; bid < kNumBids; ++bid) { + legal_actions.push_back(kBiddingActionBase + bid); + } + } + + return legal_actions; +} + +std::vector SpadesState::PlayLegalActions() const { + std::vector legal_actions; + legal_actions.reserve(kNumCardsPerHand - num_cards_played_ / kNumPlayers); + + // Check if we can follow suit. + if (num_cards_played_ % kNumPlayers != 0) { + auto suit = CurrentTrick().LedSuit(); + for (int rank = 0; rank < kNumCardsPerSuit; ++rank) { + if (holder_[Card(suit, rank)] == current_player_) { + legal_actions.push_back(Card(suit, rank)); + } + } + } else if (num_cards_played_ % kNumPlayers == 0 && !is_spades_broken_) { + // If leading, and spades have not been broken, play any other suit if + // possible. + for (int suit = 0 /*kClubs*/; suit < 3 /*kSpades*/; ++suit) { + for (int rank = 0; rank < kNumCardsPerSuit; ++rank) { + if (holder_[Card(Suit(suit), rank)] == current_player_) { + legal_actions.push_back(Card(Suit(suit), rank)); + } + } + } + } + if (!legal_actions.empty()) return legal_actions; + + // Otherwise, we can play any of our cards. + for (int card = 0; card < kNumCards; ++card) { + if (holder_[card] == current_player_) legal_actions.push_back(card); + } + return legal_actions; +} + +std::vector> SpadesState::ChanceOutcomes() const { + std::vector> outcomes; + int num_cards_remaining = kNumCards - history_.size(); + outcomes.reserve(num_cards_remaining); + const double p = 1.0 / static_cast(num_cards_remaining); + for (int card = 0; card < kNumCards; ++card) { + if (!holder_[card].has_value()) outcomes.emplace_back(card, p); + } + return outcomes; +} + +void SpadesState::DoApplyAction(Action action) { + switch (phase_) { + case Phase::kDeal: + return ApplyDealAction(action); + case Phase::kAuction: + return ApplyBiddingAction(action - kBiddingActionBase); + case Phase::kPlay: + return ApplyPlayAction(action); + case Phase::kGameOver: + SpielFatalError("Cannot act in terminal states"); + } +} + +void SpadesState::ApplyDealAction(int card) { + holder_[card] = (history_.size() % kNumPlayers); + if (history_.size() == kNumCards - 1) { + phase_ = Phase::kAuction; + current_player_ = kFirstPlayer; + } +} + +void SpadesState::ApplyBiddingAction(int bid) { + // A bid was made. + const int partner = Partner(current_player_); + SPIEL_CHECK_TRUE(contracts_[partner] == -1 || + bid + contracts_[partner] <= 13); + contracts_[current_player_] = bid; + + // Mark off possible_contracts for this player's other bids + std::fill( + possible_contracts_.begin() + (current_player_ * kNumBids), + possible_contracts_.begin() + (current_player_ * kNumBids) + kNumBids, + false); + // If partner hasn't bid, mark off partner's possible bids that would go past + // 13 + if (contracts_[partner] == -1 && bid > 0) { + std::fill( + possible_contracts_.begin() + (partner * kNumBids) + kNumBids - bid, + possible_contracts_.begin() + (partner * kNumBids) + kNumBids, false); + } + + // And now mark this bid as the player's contract + possible_contracts_[current_player_ * kNumBids + bid] = true; + + current_player_ = (current_player_ + 1) % kNumPlayers; + + // After 4 bids, end the auction. + if (std::all_of(contracts_.begin(), contracts_.end(), + [](int x) { return x != -1; })) { + phase_ = Phase::kPlay; + } +} + +void SpadesState::ApplyPlayAction(int card) { + SPIEL_CHECK_TRUE(holder_[card] == current_player_); + holder_[card] = absl::nullopt; + if (num_cards_played_ % kNumPlayers == 0) { + CurrentTrick() = Trick(current_player_, card); + } else { + CurrentTrick().Play(current_player_, card); + } + const Player winner = CurrentTrick().Winner(); + ++num_cards_played_; + if (num_cards_played_ % kNumPlayers == 0) { + current_player_ = winner; + ++num_player_tricks_[current_player_]; + } else { + current_player_ = (current_player_ + 1) % kNumPlayers; + } + if (num_cards_played_ == kNumCards) { + phase_ = Phase::kGameOver; + ScoreUp(); + } +} + +Player SpadesState::CurrentPlayer() const { + if (phase_ == Phase::kDeal) { + return kChancePlayerId; + } else if (phase_ == Phase::kGameOver) { + return kTerminalPlayerId; + } else { + return current_player_; + } +} + +void SpadesState::ScoreUp() { + std::array scores = + Score(contracts_, num_player_tricks_, current_scores_); + // Check for if bonus reward should be applied for winning (or losing by mercy + // rule) + for (int pship = 0; pship < kNumPartnerships; ++pship) { + // Update overall scores + current_scores_[pship] += scores[pship]; + // Check for bonus/penalty to returns and if overall game is over + if (scores[pship] >= win_threshold_ && scores[pship] > scores[pship ^ 1]) { + scores[pship] += win_or_loss_bonus_; // Add bonus reward for winning + is_game_over_ = true; + } else if (mercy_threshold_ && scores[pship] <= mercy_threshold_ && + scores[pship] < scores[pship ^ 1]) { + scores[pship] -= win_or_loss_bonus_; // Subtract penalty reward for + // losing by mercy rule + is_game_over_ = true; + } + } + // Apply the partnership scores (with bonus/penalty applied) to corresponding + // players' returns + for (int pl = 0; pl < kNumPlayers; ++pl) { + returns_[pl] = scores[Partnership(pl)]; + } +} + +Trick::Trick(Player leader, int card) + : led_suit_(CardSuit(card)), + winning_suit_(CardSuit(card)), + winning_rank_(CardRank(card)), + leader_(leader), + winning_player_(leader) {} + +void Trick::Play(Player player, int card) { + if (CardSuit(card) == winning_suit_) { + if (CardRank(card) > winning_rank_) { + winning_rank_ = CardRank(card); + winning_player_ = player; + } + } else if (CardSuit(card) == Suit(3) /*kSpades*/) { + winning_suit_ = Suit(3) /*kSpades*/; + winning_rank_ = CardRank(card); + winning_player_ = player; + } +} + +std::string SpadesState::Serialize() const { + std::string serialized = State::Serialize(); + return serialized; +} + +std::unique_ptr SpadesGame::DeserializeState( + const std::string& str) const { + return Game::DeserializeState(str); +} + +std::array SpadesState::ContractIndexes() const { + SPIEL_CHECK_TRUE(phase_ == Phase::kPlay || phase_ == Phase::kGameOver); + std::array contract_indexes; + for (int i = 0; i < kNumPlayers; ++i) { + contract_indexes[i] = (i * kNumBids) + contracts_[i]; + } + return contract_indexes; +} + +std::string SpadesGame::ContractString(int bid) const { + return (bid == 0) ? "Nil" : std::to_string(bid); +} + +} // namespace spades +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/spades/spades.h b/scenarios/bargaining/open_spiel/open_spiel/games/spades/spades.h new file mode 100644 index 0000000..f34dedd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/spades/spades.h @@ -0,0 +1,265 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_SPADES_H_ +#define OPEN_SPIEL_GAMES_SPADES_H_ + +// The full game of partnership spades. +// See https://dkmgames.com/CardSharp/Spades/SpadesHelp.php +// This is played by four players in two partnerships; it consists of a bidding +// phase followed by a play phase. The bidding phase determines the contracts +// for the play phase. The contract consists of: +// - Each player bidding how many tricks they can take. +// - If a player bids 'Nil' (meaning '0'), then they have a special condition +// for points +// based on whether they can avoid taking any tricks. +// +// There is then a play phase, in which 13 tricks are allocated between the +// two partnerships. Each partnership gains 10 times their combined contract +// if the partners are able to collectively take at least as many tricks as that +// combined contract, otherwise the partnership loses 10 times their combined +// contract. +// +// Any tricks taken in excess of a partnership's combined contract are worth 1 +// point and considered a 'bag' - for every 10 bags collected over the course of +// the game, the partnership is penalized 100 points. +// +// In the case of a Nil bid, if that partner avoids taking any tricks during the +// round, the partnership gains a 100 point bonus. Conversely, if that partner +// takes any tricks, the partnership will lose 100 points (but these tricks +// still count toward the other partner's contract). +// +// The action space is as follows: +// 0..51 Cards, used for both dealing (chance events) and play; +// 52+ Bids (Nil, 1-13) used during the bidding phase. +// +// During the bidding phase, every player will have 1 turn for making a bid. +// During the play phase, every play will have 13 turns for playing a card. + +#include +#include +#include +#include +#include +#include +#include "open_spiel/games/spades/spades_scoring.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace spades { + +inline constexpr int kBiddingActionBase = kNumCards; // First bidding action. +inline constexpr int kAuctionTensorSize = + kNumPlayers * kNumBids + kNumCards; // Our hand +inline constexpr int kPhaseInfoSize = 2; // Bidding (auction) and Playing +inline constexpr int kPublicInfoTensorSize = + kAuctionTensorSize // The auction + - kNumCards; // But not any player's cards +inline constexpr int kMaxAuctionLength = 4; +inline constexpr Player kFirstPlayer = 0; +enum class Suit { kClubs = 0, kDiamonds = 1, kHearts = 2, kSpades = 3 }; + +// State of a single trick. +class Trick { + public: + Trick() : Trick{kInvalidPlayer, 0} {} + Trick(Player leader, int card); + void Play(Player player, int card); + Suit LedSuit() const { return led_suit_; } + Player Winner() const { return winning_player_; } + Player Leader() const { return leader_; } + + private: + Suit led_suit_; + Suit winning_suit_; + int winning_rank_; + Player leader_; + Player winning_player_; +}; + +// State of an in-play game. Can be any phase of the game. +class SpadesState : public State { + public: + SpadesState(std::shared_ptr game, bool use_mercy_rule, + int mercy_threshold, int win_threshold, int win_or_loss_bonus, + int num_tricks); + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action) const override; + std::string ToString() const override; + bool IsTerminal() const override { return phase_ == Phase::kGameOver; } + std::vector Returns() const override { return returns_; } + std::string ObservationString(Player player) const override; + void WriteObservationTensor(Player player, absl::Span values) const; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override { + return std::unique_ptr(new SpadesState(*this)); + } + std::vector LegalActions() const override; + std::vector> ChanceOutcomes() const override; + std::string Serialize() const override; + + // If the state is terminal, returns the indexes of the final contracts, into + // the arrays returned by PossibleFinalContracts and ScoreByContract. + std::array ContractIndexes() const; + + // Returns a mask indicating which final contracts are possible. + std::array PossibleContracts() const { + return possible_contracts_; + } + + // Private information tensor per player. + std::vector PrivateObservationTensor(Player player) const; + + // Public information. + std::vector PublicObservationTensor() const; + + // Current phase. + int CurrentPhase() const { return static_cast(phase_); } + + // Current overall partnership scores + std::array GetCurrentScores() const { + return current_scores_; + } + + // Set partnership scores + void SetCurrentScores(const std::array& new_scores) { + current_scores_ = new_scores; + } + + // Indicates if overall game is over (did a partnership meet win/lose + // condition) + bool IsGameOver() const { return is_game_over_; } + + // Manually set the current player (used to specify starting player) + void SetCurrentPlayer(const int current_player) { + current_player_ = current_player; + } + + protected: + void DoApplyAction(Action action) override; + + private: + enum class Phase { kDeal, kAuction, kPlay, kGameOver }; + + std::vector DealLegalActions() const; + std::vector BiddingLegalActions() const; + std::vector PlayLegalActions() const; + void ApplyDealAction(int card); + void ApplyBiddingAction(int bid); + void ApplyPlayAction(int card); + + void ScoreUp(); + Trick& CurrentTrick() { return tricks_[num_cards_played_ / kNumPlayers]; } + const Trick& CurrentTrick() const { + return tricks_[num_cards_played_ / kNumPlayers]; + } + std::array, kNumCards> OriginalDeal() const; + std::string FormatDeal() const; + std::string FormatAuction(bool trailing_query) const; + std::string FormatPlay() const; + std::string FormatResult() const; + + const bool use_mercy_rule_; + const int mercy_threshold_; + const int win_threshold_; + const int win_or_loss_bonus_; + const int num_tricks_; + + std::array current_scores_ = {0, 0}; + bool is_game_over_ = false; + std::array num_player_tricks_ = {0, 0, 0, 0}; + int num_cards_played_ = 0; + Player current_player_ = 0; // During the play phase, the hand to play. + Phase phase_ = Phase::kDeal; + std::array contracts_ = {-1, -1, -1, -1}; + std::array tricks_{}; + std::vector returns_ = std::vector(kNumPlayers); + std::array, kNumCards> holder_{}; + std::array + possible_contracts_; // Array of bids 0-13 for each player (so 4x14 size) + bool is_spades_broken_ = false; +}; + +class SpadesGame : public Game { + public: + explicit SpadesGame(const GameParameters& params); + int NumDistinctActions() const override { + return kBiddingActionBase + kNumBids; + } + int MaxChanceOutcomes() const override { return kNumCards; } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new SpadesState(shared_from_this(), UseMercyRule(), MercyThreshold(), + WinThreshold(), WinOrLossBonus(), NumTricks())); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -(kMaxScore + WinOrLossBonus()); } + double MaxUtility() const override { return kMaxScore + WinOrLossBonus(); } + + static int GetPlayTensorSize(int num_tricks) { + return kNumBids * kNumPlayers // What each player's contract is + + kNumCards // Our remaining cards + + num_tricks * kNumPlayers * kNumCards // Number of played tricks + + kNumTricks * kNumPlayers; // Number of tricks each player has won + } + + std::vector ObservationTensorShape() const override { + return {kPhaseInfoSize + + std::max(GetPlayTensorSize(NumTricks()), kAuctionTensorSize)}; + } + + int MaxGameLength() const override { return kMaxAuctionLength + kNumCards; } + int MaxChanceNodesInHistory() const override { return kNumCards; } + + std::unique_ptr DeserializeState( + const std::string& str) const override; + + // How many contracts there are. + int NumPossibleContracts() const { return kNumContracts; } + + // A string representation of a contract. + std::string ContractString(int bid) const; + + // Extra observation tensors. + int PrivateObservationTensorSize() const { return kNumCards; } + int PublicObservationTensorSize() const { return kPublicInfoTensorSize; } + + private: + bool UseMercyRule() const { + return ParameterValue("use_mercy_rule", true); + } + + int MercyThreshold() const { + return ParameterValue("mercy_threshold", -350); + } + + int WinThreshold() const { return ParameterValue("win_threshold", 500); } + + int WinOrLossBonus() const { + return ParameterValue("win_or_loss_bonus", 200); + } + + int NumTricks() const { return ParameterValue("num_tricks", 2); } +}; + +} // namespace spades +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_SPADES_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/spades/spades_scoring.cc b/scenarios/bargaining/open_spiel/open_spiel/games/spades/spades_scoring.cc new file mode 100644 index 0000000..3c5983a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/spades/spades_scoring.cc @@ -0,0 +1,75 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/spades/spades_scoring.h" + +#include +namespace open_spiel { +namespace spades { +namespace { + +// Score from contract is 10 times the bid (make contract arg negative if +// failed) +int ScoreContract(int contract) { return contract * 10; } + +// Penalty for accumulating 10 bags (-100 per instance) +int ScoreBagPenalties(int current_score, int overtricks) { + int current_bags = current_score % 10; + current_bags += overtricks; + return -100 * (current_bags / 10); +} + +// Bonus/penalty for succeeding/failing a Nil bid +int ScoreNil(int tricks) { return (tricks > 0) ? -100 : 100; } +} // namespace + +std::array Score( + const std::array contracts, + const std::array taken_tricks, + const std::array current_scores) { + std::array round_scores = {0, 0}; + + for (int pship = 0; pship < kNumPartnerships; ++pship) { + int contract = contracts[pship] + contracts[pship + 2]; + int contract_result = + (taken_tricks[pship] + taken_tricks[pship + 2]) - contract; + int bonuses = 0; + int contract_score = 0; + + // Score any nils + if (contracts[pship] == 0) { + bonuses += ScoreNil(taken_tricks[pship]); + } + if (contracts[pship + 2] == 0) { + bonuses += ScoreNil(taken_tricks[pship + 2]); + } + + // Score contracts and check for bag penalties + if (contract_result < 0) { + contract_score = ScoreContract(-contract); + } else { + contract_score = ScoreContract(contract); + + bonuses += contract_result + // Each overtrick (bag) is worth 1 point + ScoreBagPenalties(current_scores[pship], contract_result); + } + + round_scores[pship] = contract_score + bonuses; + } + + return round_scores; +} + +} // namespace spades +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/spades/spades_scoring.h b/scenarios/bargaining/open_spiel/open_spiel/games/spades/spades_scoring.h new file mode 100644 index 0000000..79ae158 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/spades/spades_scoring.h @@ -0,0 +1,64 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_SPADES_SPADES_SCORING_H_ +#define OPEN_SPIEL_GAMES_SPADES_SPADES_SCORING_H_ + +// Scoring for partnership spades. +// See https://dkmgames.com/CardSharp/Spades/SpadesHelp.php + +#include +#include + +namespace open_spiel { +namespace spades { + +inline constexpr int kNumPlayers = 4; +constexpr char kPlayerChar[] = "NESW"; + +inline constexpr int kNumSuits = 4; +inline constexpr int kNumCardsPerSuit = 13; +inline constexpr int kNumPartnerships = 2; +inline constexpr int kNumBids = 14; // Bids can be from 0 to 13 tricks +inline constexpr int kNumCards = kNumSuits * kNumCardsPerSuit; +inline constexpr int kNumCardsPerHand = kNumCards / kNumPlayers; +inline constexpr int kNumTricks = kNumCardsPerHand; +inline constexpr int kMaxScore = 230; // Bid 13 (130) + Nil (100) + +std::array Score( + const std::array contracts, + const std::array taken_tricks, + const std::array current_scores); + +// All possible contracts. +inline constexpr int kNumContracts = kNumBids * kNumPlayers; + +constexpr std::array AllContracts() { + std::array contracts = {}; + int bid = 0; + for (int i = 0; i < kNumContracts; ++i) { + contracts[i] = bid++; + if (bid > kNumBids) { + bid = 0; + } + } + + return contracts; +} +inline constexpr std::array kAllContracts = AllContracts(); + +} // namespace spades +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_SPADES_SPADES_SCORING_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/spades/spades_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/spades/spades_test.cc new file mode 100644 index 0000000..73ec801 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/spades/spades_test.cc @@ -0,0 +1,47 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/spades/spades_scoring.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace spades { +namespace { + +void ScoringTests() { + // Score returns difference in score (reward), not new overall score + SPIEL_CHECK_EQ(Score({4, 5, 5, 0}, {5, 3, 5, 0}, {0, 0})[0], 91); + SPIEL_CHECK_EQ(Score({13, 5, 0, 1}, {4, 6, 1, 2}, {0, 0})[0], -230); + SPIEL_CHECK_EQ(Score({3, 3, 3, 2}, {4, 2, 5, 2}, {99, 0})[0], -37); + SPIEL_CHECK_EQ(Score({2, 3, 3, 3}, {2, 4, 2, 5}, {0, 99})[1], -37); +} + +void BasicGameTests() { + testing::LoadGameTest("spades"); + testing::RandomSimTest(*LoadGame("spades"), 3); + testing::RandomSimTest(*LoadGame("spades(use_mercy_rule=false,win_threshold=" + "250,win_or_loss_bonus=1000)"), + 3); +} + +} // namespace +} // namespace spades +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::spades::ScoringTests(); + open_spiel::spades::BasicGameTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/stones_and_gems/stones_and_gems.cc b/scenarios/bargaining/open_spiel/open_spiel/games/stones_and_gems/stones_and_gems.cc new file mode 100644 index 0000000..55983c2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/stones_and_gems/stones_and_gems.cc @@ -0,0 +1,1388 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/stones_and_gems/stones_and_gems.h" + +#include + +#include // std::find, min +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace stones_and_gems { + +namespace { + +// Property bit flags +enum ElementProperties { + kNone = 0, + kConsumable = 1 << 0, + kCanExplode = 1 << 1, + kRounded = 1 << 2, + kTraversable = 1 << 3, +}; + +// All possible elements +const Element kElAgent = { + HiddenCellType::kAgent, VisibleCellType::kAgent, + ElementProperties::kConsumable | ElementProperties::kCanExplode, '@'}; +const Element kElAgentInExit = {HiddenCellType::kAgentInExit, + VisibleCellType::kAgentInExit, + ElementProperties::kNone, '!'}; +const Element kElExitOpen = {HiddenCellType::kExitOpen, + VisibleCellType::kExitOpen, + ElementProperties::kTraversable, '#'}; +const Element kElExitClosed = {HiddenCellType::kExitClosed, + VisibleCellType::kExitClosed, + ElementProperties::kNone, 'C'}; +const Element kElEmpty = { + HiddenCellType::kEmpty, VisibleCellType::kEmpty, + ElementProperties::kConsumable | ElementProperties::kTraversable, ' '}; +const Element kElDirt = { + HiddenCellType::kDirt, VisibleCellType::kDirt, + ElementProperties::kConsumable | ElementProperties::kTraversable, '.'}; +const Element kElStone = { + HiddenCellType::kStone, VisibleCellType::kStone, + ElementProperties::kConsumable | ElementProperties::kRounded, 'o'}; +const Element kElStoneFalling = {HiddenCellType::kStoneFalling, + VisibleCellType::kStone, + ElementProperties::kConsumable, 'o'}; +const Element kElDiamond = {HiddenCellType::kDiamond, VisibleCellType::kDiamond, + ElementProperties::kConsumable | + ElementProperties::kRounded | + ElementProperties::kTraversable, + '*'}; +const Element kElDiamondFalling = {HiddenCellType::kDiamondFalling, + VisibleCellType::kDiamond, + ElementProperties::kConsumable, '*'}; +const Element kElFireflyUp = { + HiddenCellType::kFireflyUp, VisibleCellType::kFirefly, + ElementProperties::kConsumable | ElementProperties::kCanExplode, 'F'}; +const Element kElFireflyLeft = { + HiddenCellType::kFireflyLeft, VisibleCellType::kFirefly, + ElementProperties::kConsumable | ElementProperties::kCanExplode, 'F'}; +const Element kElFireflyDown = { + HiddenCellType::kFireflyDown, VisibleCellType::kFirefly, + ElementProperties::kConsumable | ElementProperties::kCanExplode, 'F'}; +const Element kElFireflyRight = { + HiddenCellType::kFireflyRight, VisibleCellType::kFirefly, + ElementProperties::kConsumable | ElementProperties::kCanExplode, 'F'}; +const Element kElButterflyUp = { + HiddenCellType::kButterflyUp, VisibleCellType::kButterfly, + ElementProperties::kConsumable | ElementProperties::kCanExplode, 'U'}; +const Element kElButterflyLeft = { + HiddenCellType::kButterflyLeft, VisibleCellType::kButterfly, + ElementProperties::kConsumable | ElementProperties::kCanExplode, 'U'}; +const Element kElButterflyDown = { + HiddenCellType::kButterflyDown, VisibleCellType::kButterfly, + ElementProperties::kConsumable | ElementProperties::kCanExplode, 'U'}; +const Element kElButterflyRight = { + HiddenCellType::kButterflyRight, VisibleCellType::kButterfly, + ElementProperties::kConsumable | ElementProperties::kCanExplode, 'U'}; +const Element kElBlob = {HiddenCellType::kBlob, VisibleCellType::kBlob, + ElementProperties::kConsumable, 'A'}; +const Element kElWallBrick = { + HiddenCellType::kWallBrick, VisibleCellType::kWallBrick, + ElementProperties::kConsumable | ElementProperties::kRounded, 'H'}; +const Element kElWallSteel = {HiddenCellType::kWallSteel, + VisibleCellType::kWallSteel, + ElementProperties::kNone, 'S'}; +const Element kElWallMagicOn = {HiddenCellType::kWallMagicOn, + VisibleCellType::kWallMagicOn, + ElementProperties::kConsumable, 'M'}; +const Element kElWallMagicDormant = {HiddenCellType::kWallMagicDormant, + VisibleCellType::kWallMagicOff, + ElementProperties::kConsumable, 'Q'}; +const Element kElWallMagicExpired = {HiddenCellType::kWallMagicExpired, + VisibleCellType::kWallMagicOff, + ElementProperties::kConsumable, 'Q'}; +const Element kElExplosionDiamond = {HiddenCellType::kExplosionDiamond, + VisibleCellType::kExplosion, + ElementProperties::kNone, 'E'}; +const Element kElExplosionBoulder = {HiddenCellType::kExplosionBoulder, + VisibleCellType::kExplosion, + ElementProperties::kNone, 'E'}; +const Element kElExplosionEmpty = {HiddenCellType::kExplosionEmpty, + VisibleCellType::kExplosion, + ElementProperties::kNone, 'E'}; +const Element kElGateRedClosed = {HiddenCellType::kGateRedClosed, + VisibleCellType::kGateRedClosed, + ElementProperties::kNone, 'r'}; +const Element kElGateRedOpen = {HiddenCellType::kGateRedOpen, + VisibleCellType::kGateRedOpen, + ElementProperties::kNone, 'R'}; +const Element kElKeyRed = {HiddenCellType::kKeyRed, VisibleCellType::kKeyRed, + ElementProperties::kTraversable, '1'}; +const Element kElGateBlueClosed = {HiddenCellType::kGateBlueClosed, + VisibleCellType::kGateBlueClosed, + ElementProperties::kNone, 'b'}; +const Element kElGateBlueOpen = {HiddenCellType::kGateBlueOpen, + VisibleCellType::kGateBlueOpen, + ElementProperties::kNone, 'B'}; +const Element kElKeyBlue = {HiddenCellType::kKeyBlue, VisibleCellType::kKeyBlue, + ElementProperties::kTraversable, '2'}; +const Element kElGateGreenClosed = {HiddenCellType::kGateGreenClosed, + VisibleCellType::kGateGreenClosed, + ElementProperties::kNone, 'g'}; +const Element kElGateGreenOpen = {HiddenCellType::kGateGreenOpen, + VisibleCellType::kGateGreenOpen, + ElementProperties::kNone, 'G'}; +const Element kElKeyGreen = {HiddenCellType::kKeyGreen, + VisibleCellType::kKeyGreen, + ElementProperties::kTraversable, '3'}; +const Element kElGateYellowClosed = {HiddenCellType::kGateYellowClosed, + VisibleCellType::kGateYellowClosed, + ElementProperties::kNone, 'y'}; +const Element kElGateYellowOpen = {HiddenCellType::kGateYellowOpen, + VisibleCellType::kGateYellowOpen, + ElementProperties::kNone, 'Y'}; +const Element kElKeyYellow = {HiddenCellType::kKeyYellow, + VisibleCellType::kKeyYellow, + ElementProperties::kTraversable, '4'}; +const Element kElNut = { + HiddenCellType::kNut, VisibleCellType::kNut, + ElementProperties::kRounded | ElementProperties::kConsumable, '+'}; +const Element kElNutFalling = { + HiddenCellType::kNutFalling, VisibleCellType::kNut, + ElementProperties::kRounded | ElementProperties::kConsumable, '+'}; +const Element kElBomb = {HiddenCellType::kBomb, VisibleCellType::kBomb, + ElementProperties::kRounded | + ElementProperties::kConsumable | + ElementProperties::kCanExplode, + '^'}; +const Element kElBombFalling = { + HiddenCellType::kBombFalling, VisibleCellType::kBomb, + ElementProperties::kRounded | ElementProperties::kConsumable | + ElementProperties::kCanExplode, + '^'}; +const Element kElOrangeUp = { + HiddenCellType::kOrangeUp, VisibleCellType::kOrange, + ElementProperties::kConsumable | ElementProperties::kCanExplode, 'X'}; +const Element kElOrangeLeft = { + HiddenCellType::kOrangeLeft, VisibleCellType::kOrange, + ElementProperties::kConsumable | ElementProperties::kCanExplode, 'X'}; +const Element kElOrangeDown = { + HiddenCellType::kOrangeDown, VisibleCellType::kOrange, + ElementProperties::kConsumable | ElementProperties::kCanExplode, 'X'}; +const Element kElOrangeRight = { + HiddenCellType::kOrangeRight, VisibleCellType::kOrange, + ElementProperties::kConsumable | ElementProperties::kCanExplode, 'X'}; + +// Hash for Element, so we can use as a map key +struct ElementHash { + std::size_t operator()(const Element &e) const { + return static_cast(e.cell_type) - + static_cast(HiddenCellType::kNull); + } +}; + +// ----- Conversion maps ----- +// Swap map for DeserializeState +const absl::flat_hash_map kCellTypeToElement{ + {static_cast(HiddenCellType::kNull), kNullElement}, + {static_cast(HiddenCellType::kAgent), kElAgent}, + {static_cast(HiddenCellType::kEmpty), kElEmpty}, + {static_cast(HiddenCellType::kDirt), kElDirt}, + {static_cast(HiddenCellType::kStone), kElStone}, + {static_cast(HiddenCellType::kStoneFalling), kElStoneFalling}, + {static_cast(HiddenCellType::kDiamond), kElDiamond}, + {static_cast(HiddenCellType::kDiamondFalling), kElDiamondFalling}, + {static_cast(HiddenCellType::kExitClosed), kElExitClosed}, + {static_cast(HiddenCellType::kExitOpen), kElExitOpen}, + {static_cast(HiddenCellType::kAgentInExit), kElAgentInExit}, + {static_cast(HiddenCellType::kFireflyUp), kElFireflyUp}, + {static_cast(HiddenCellType::kFireflyLeft), kElFireflyLeft}, + {static_cast(HiddenCellType::kFireflyDown), kElFireflyDown}, + {static_cast(HiddenCellType::kFireflyRight), kElFireflyRight}, + {static_cast(HiddenCellType::kButterflyUp), kElButterflyUp}, + {static_cast(HiddenCellType::kButterflyLeft), kElButterflyLeft}, + {static_cast(HiddenCellType::kButterflyDown), kElButterflyDown}, + {static_cast(HiddenCellType::kButterflyRight), kElButterflyRight}, + {static_cast(HiddenCellType::kWallBrick), kElWallBrick}, + {static_cast(HiddenCellType::kWallSteel), kElWallSteel}, + {static_cast(HiddenCellType::kWallMagicOn), kElWallMagicOn}, + {static_cast(HiddenCellType::kWallMagicDormant), kElWallMagicDormant}, + {static_cast(HiddenCellType::kWallMagicExpired), kElWallMagicExpired}, + {static_cast(HiddenCellType::kBlob), kElBlob}, + {static_cast(HiddenCellType::kExplosionBoulder), kElExplosionBoulder}, + {static_cast(HiddenCellType::kExplosionDiamond), kElExplosionDiamond}, + {static_cast(HiddenCellType::kExplosionEmpty), kElExplosionEmpty}, + {static_cast(HiddenCellType::kGateRedClosed), kElGateRedClosed}, + {static_cast(HiddenCellType::kGateRedOpen), kElGateRedOpen}, + {static_cast(HiddenCellType::kKeyRed), kElKeyRed}, + {static_cast(HiddenCellType::kGateBlueClosed), kElGateBlueClosed}, + {static_cast(HiddenCellType::kGateBlueOpen), kElGateBlueOpen}, + {static_cast(HiddenCellType::kKeyBlue), kElKeyBlue}, + {static_cast(HiddenCellType::kGateGreenClosed), kElGateGreenClosed}, + {static_cast(HiddenCellType::kGateGreenOpen), kElGateGreenOpen}, + {static_cast(HiddenCellType::kKeyGreen), kElKeyGreen}, + {static_cast(HiddenCellType::kGateYellowClosed), kElGateYellowClosed}, + {static_cast(HiddenCellType::kGateYellowOpen), kElGateYellowOpen}, + {static_cast(HiddenCellType::kKeyYellow), kElKeyYellow}, + {static_cast(HiddenCellType::kNut), kElNut}, + {static_cast(HiddenCellType::kNutFalling), kElNutFalling}, + {static_cast(HiddenCellType::kBomb), kElBomb}, + {static_cast(HiddenCellType::kBombFalling), kElBombFalling}, + {static_cast(HiddenCellType::kOrangeUp), kElOrangeUp}, + {static_cast(HiddenCellType::kOrangeLeft), kElOrangeLeft}, + {static_cast(HiddenCellType::kOrangeDown), kElOrangeDown}, + {static_cast(HiddenCellType::kOrangeRight), kElOrangeRight}, +}; + +// Rotate actions right +const absl::flat_hash_map kRotateRight{ + {Directions::kUp, Directions::kRight}, + {Directions::kRight, Directions::kDown}, + {Directions::kDown, Directions::kLeft}, + {Directions::kLeft, Directions::kUp}, + {Directions::kNone, Directions::kNone}, +}; + +// Rotate actions left +const absl::flat_hash_map kRotateLeft{ + {Directions::kUp, Directions::kLeft}, + {Directions::kLeft, Directions::kDown}, + {Directions::kDown, Directions::kRight}, + {Directions::kRight, Directions::kUp}, + {Directions::kNone, Directions::kNone}, +}; + +// actions to strings +const absl::flat_hash_map kActionsToString{ + {Directions::kUp, "up"}, {Directions::kLeft, "left"}, + {Directions::kDown, "down"}, {Directions::kRight, "right"}, + {Directions::kNone, "none"}, +}; + +// directions to offsets (col, row) +const absl::flat_hash_map> kDirectionOffsets{ + {Directions::kUp, {0, -1}}, {Directions::kUpLeft, {-1, -1}}, + {Directions::kLeft, {-1, 0}}, {Directions::kDownLeft, {-1, 1}}, + {Directions::kDown, {0, 1}}, {Directions::kDownRight, {1, 1}}, + {Directions::kRight, {1, 0}}, {Directions::kUpRight, {1, -1}}, + {Directions::kNone, {0, 0}}, +}; + +// Directions to fireflys +const absl::flat_hash_map kDirectionToFirefly{ + {Directions::kUp, kElFireflyUp}, + {Directions::kLeft, kElFireflyLeft}, + {Directions::kDown, kElFireflyDown}, + {Directions::kRight, kElFireflyRight}, +}; + +// Firefly to directions +const absl::flat_hash_map kFireflyToDirection{ + {kElFireflyUp, Directions::kUp}, + {kElFireflyLeft, Directions::kLeft}, + {kElFireflyDown, Directions::kDown}, + {kElFireflyRight, Directions::kRight}, +}; + +// Directions to butterflys +const absl::flat_hash_map kDirectionToButterfly{ + {Directions::kUp, kElButterflyUp}, + {Directions::kLeft, kElButterflyLeft}, + {Directions::kDown, kElButterflyDown}, + {Directions::kRight, kElButterflyRight}, +}; + +// Butterfly to directions +const absl::flat_hash_map kButterflyToDirection{ + {kElButterflyUp, Directions::kUp}, + {kElButterflyLeft, Directions::kLeft}, + {kElButterflyDown, Directions::kDown}, + {kElButterflyRight, Directions::kRight}, +}; + +// Orange to directions +const absl::flat_hash_map kOrangeToDirection{ + {kElOrangeUp, Directions::kUp}, + {kElOrangeLeft, Directions::kLeft}, + {kElOrangeDown, Directions::kDown}, + {kElOrangeRight, Directions::kRight}, +}; + +// Direction to Orange +const absl::flat_hash_map kDirectionToOrange{ + {Directions::kUp, kElOrangeUp}, + {Directions::kLeft, kElOrangeLeft}, + {Directions::kDown, kElOrangeDown}, + {Directions::kRight, kElOrangeRight}, +}; + +// Element explosion maps +const absl::flat_hash_map kElementToExplosion{ + {kElFireflyUp, kElExplosionEmpty}, + {kElFireflyLeft, kElExplosionEmpty}, + {kElFireflyDown, kElExplosionEmpty}, + {kElFireflyRight, kElExplosionEmpty}, + {kElButterflyUp, kElExplosionDiamond}, + {kElButterflyLeft, kElExplosionDiamond}, + {kElButterflyDown, kElExplosionDiamond}, + {kElButterflyRight, kElExplosionDiamond}, + {kElAgent, kElExplosionEmpty}, + {kElBomb, kElExplosionEmpty}, + {kElBombFalling, kElExplosionEmpty}, + {kElOrangeUp, kElExplosionEmpty}, + {kElOrangeLeft, kElExplosionEmpty}, + {kElOrangeDown, kElExplosionEmpty}, + {kElOrangeRight, kElExplosionEmpty}, +}; + +// Explosions back to elements +const absl::flat_hash_map kExplosionToElement{ + {kElExplosionDiamond, kElDiamond}, + {kElExplosionBoulder, kElStone}, + {kElExplosionEmpty, kElEmpty}, +}; + +// Magic wall conversion map +const absl::flat_hash_map kMagicWallConversion{ + {kElStoneFalling, kElDiamondFalling}, + {kElDiamondFalling, kElStoneFalling}, +}; + +// Gem point maps +const absl::flat_hash_map kGemPoints{ + {kElDiamond, 10}, + {kElDiamondFalling, 10}, +}; + +// Gate open conversion map +const absl::flat_hash_map kGateOpenMap{ + {kElGateRedClosed, kElGateRedOpen}, + {kElGateBlueClosed, kElGateBlueOpen}, + {kElGateGreenClosed, kElGateGreenOpen}, + {kElGateYellowClosed, kElGateYellowOpen}, +}; +// Gate key map +const absl::flat_hash_map kKeyToGate{ + {kElKeyRed, kElGateRedClosed}, + {kElKeyBlue, kElGateBlueClosed}, + {kElKeyGreen, kElGateGreenClosed}, + {kElKeyYellow, kElGateYellowClosed}, +}; + +// Stationary to falling +const absl::flat_hash_map kElToFalling{ + {kElDiamond, kElDiamondFalling}, + {kElStone, kElStoneFalling}, + {kElNut, kElNutFalling}, + {kElBomb, kElBombFalling}, +}; + +// Default parameters. +constexpr int kDefaultMagicWallSteps = + 140; // Number of steps before magic walls expire +constexpr int kDefaultBlobChance = + 20; // Chance to spawn another blob (out of 256) +constexpr double kDefaultBlobMaxPercentage = + 0.16; // Maximum number of blob before they collapse (percentage of map + // size) +constexpr bool kDefaultObsShowIDs = + false; // Flag to show IDs instead of one-hot encoding + +// Facts about the game +const GameType kGameType{ + /*short_name=*/"stones_and_gems", + /*long_name=*/"Stones and Gems", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kSampledStochastic, + GameType::Information::kPerfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/1, + /*min_num_players=*/1, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"obs_show_ids", GameParameter(kDefaultObsShowIDs)}, + {"magic_wall_steps", GameParameter(kDefaultMagicWallSteps)}, + {"blob_chance", GameParameter(kDefaultBlobChance)}, + {"blob_max_percentage", GameParameter(kDefaultBlobMaxPercentage)}, + {"rng_seed", GameParameter(0)}, + {"grid", GameParameter(std::string(kDefaultGrid))}}}; + +std::shared_ptr Factory(const GameParameters ¶ms) { + return std::shared_ptr(new StonesNGemsGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace + +std::string StonesNGemsState::ActionToString(Player player, + Action move_id) const { + if (player == kChancePlayerId) { + return absl::StrCat("Chance outcome: ", move_id); + } else { + SPIEL_CHECK_GE(move_id, 0); + SPIEL_CHECK_LT(move_id, kNumActions); + if (kActionsToString.find(move_id) == kActionsToString.end()) { + SpielFatalError("Unknown move_id"); + } + return kActionsToString.at(move_id); + } +} + +bool StonesNGemsState::IsTerminal() const { + // Time complete or the agent exploded + auto it = std::find(grid_.elements.begin(), grid_.elements.end(), kElAgent); + return steps_remaining_ <= 0 || it == grid_.elements.end(); +} + +std::vector StonesNGemsState::Returns() const { + // Sum of rewards, and should agree with Rewards() + return {static_cast(sum_reward_)}; +} + +std::vector StonesNGemsState::Rewards() const { + // reward for most recent state transition + return {static_cast(current_reward_)}; +} + +std::string StonesNGemsState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + if (IsChanceNode()) { + return "ChanceNode -- no observation"; + } + return ToString(); +} + +void StonesNGemsState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + // Treat `values` as a 3-d tensor. + TensorView<3> view( + values, {kNumVisibleCellType, grid_.num_rows, grid_.num_cols}, true); + + // No observations at chance nodes. + if (IsChanceNode()) { + std::fill(values.begin(), values.end(), 0); + return; + } + + int i = 0; + for (int row = 0; row < grid_.num_rows; ++row) { + for (int col = 0; col < grid_.num_cols; ++col) { + int channel = static_cast(grid_.elements[i].visible_type); + view[{channel, row, col}] = obs_show_ids_ ? grid_.ids[i] : 1.0; + ++i; + } + } +} + +int StonesNGemsState::CurrentPlayer() const { + return IsTerminal() ? kTerminalPlayerId : cur_player_; +} + +std::mt19937 *StonesNGemsState::rng() { + return static_cast(game_.get())->rng(); +} + +// Element helper functions +namespace { + +bool IsActionHorz(int action) { + return action == Directions::kLeft || action == Directions::kRight; +} + +bool IsFirefly(const Element &element) { + return element == kElFireflyUp || element == kElFireflyLeft || + element == kElFireflyDown || element == kElFireflyRight; +} + +bool IsButterfly(const Element &element) { + return element == kElButterflyUp || element == kElButterflyLeft || + element == kElButterflyDown || element == kElButterflyRight; +} + +bool IsOrange(const Element &element) { + return element == kElOrangeUp || element == kElOrangeLeft || + element == kElOrangeDown || element == kElOrangeRight; +} + +bool IsExplosion(const Element &element) { + return element == kElExplosionBoulder || element == kElExplosionDiamond || + element == kElExplosionEmpty; +} + +bool IsMagicWall(const Element &element) { + return element == kElWallMagicDormant || element == kElWallMagicExpired || + element == kElWallMagicOn; +} + +bool IsOpenGate(const Element &element) { + return element == kElGateRedOpen || element == kElGateBlueOpen || + element == kElGateGreenOpen || element == kElGateYellowOpen; +} + +bool IsKey(const Element &element) { + return element == kElKeyRed || element == kElKeyBlue || + element == kElKeyGreen || element == kElKeyYellow; +} + +} // namespace + +// ---------- Game dynamic function ---------- + +// Given an index and action, get the new flat index +int StonesNGemsState::IndexFromAction(int index, int action) const { + int col = index % grid_.num_cols; + int row = (index - col) / grid_.num_cols; + std::pair offsets = kDirectionOffsets.at(action); + col += offsets.first; + row += offsets.second; + return (grid_.num_cols * row) + col; +} + +// Check if the index with a given action step will remain in bounds +bool StonesNGemsState::InBounds(int index, int action) const { + int col = index % grid_.num_cols; + int row = (index - col) / grid_.num_cols; + std::pair offsets = kDirectionOffsets.at(action); + col += offsets.first; + row += offsets.second; + return col >= 0 && col < grid_.num_cols && row >= 0 && row < grid_.num_rows; +} + +// Check if the index after applying action contains the given element +bool StonesNGemsState::IsType(int index, Element element, int action) const { + int new_index = IndexFromAction(index, action); + return InBounds(index, action) && grid_.elements[new_index] == element; +} + +// Check if the index after applying action has an element with the given +// property +bool StonesNGemsState::HasProperty(int index, int property, int action) const { + int new_index = IndexFromAction(index, action); + return InBounds(index, action) && + ((grid_.elements[new_index].properties & property) > 0); +} + +// Move the element given the action, and set the old index to empty +void StonesNGemsState::MoveItem(int index, int action) { + int new_index = IndexFromAction(index, action); + grid_.elements[new_index] = grid_.elements[index]; + grid_.ids[new_index] = grid_.ids[index]; + grid_.elements[new_index].has_updated = true; + grid_.elements[index] = kElEmpty; + grid_.ids[index] = ++id_counter_; +} + +// Set the new index to the given element +void StonesNGemsState::SetItem(int index, Element element, int id, int action) { + int new_index = IndexFromAction(index, action); + grid_.elements[new_index] = element; + grid_.ids[new_index] = id; + grid_.elements[new_index].has_updated = true; +} + +// Get the item after applying the action to the index +Element StonesNGemsState::GetItem(int index, int action) const { + return grid_.elements[IndexFromAction(index, action)]; +} + +// Check if the element is adjacent to and cell around the given index +bool StonesNGemsState::IsTypeAdjacent(int index, Element element) const { + return IsType(index, element, Directions::kUp) || + IsType(index, element, Directions::kLeft) || + IsType(index, element, Directions::kDown) || + IsType(index, element, Directions::kRight); +} + +// Can roll left if sitting on rounded element, left and bottom left clear +bool StonesNGemsState::CanRollLeft(int index) const { + return HasProperty(index, ElementProperties::kRounded, Directions::kDown) && + IsType(index, kElEmpty, Directions::kLeft) && + IsType(index, kElEmpty, Directions::kDownLeft); +} + +// Can roll right if sitting on rounded element, right and bottom right clear +bool StonesNGemsState::CanRollRight(int index) const { + return HasProperty(index, ElementProperties::kRounded, Directions::kDown) && + IsType(index, kElEmpty, Directions::kRight) && + IsType(index, kElEmpty, Directions::kDownRight); +} + +// Roll the item to the left +void StonesNGemsState::RollLeft(int index, Element element) { + SetItem(index, element, grid_.ids[index]); + MoveItem(index, Directions::kLeft); +} + +// Roll the item to the right +void StonesNGemsState::RollRight(int index, Element element) { + SetItem(index, element, grid_.ids[index]); + MoveItem(index, Directions::kRight); +} + +// Push the item +void StonesNGemsState::Push(int index, Element stationary, Element falling, + int action) { + int new_index = IndexFromAction(index, action); + // Check if same direction past element is empty so that theres room to push + if (IsType(new_index, kElEmpty, action)) { + // Check if the element will become stationary or falling + int next_index = IndexFromAction(new_index, action); + bool is_empty = IsType(next_index, kElEmpty, Directions::kDown); + SetItem(new_index, is_empty ? falling : stationary, grid_.ids[new_index], + action); + // Move the agent + MoveItem(index, action); + } +} + +// Move the item through the magic wall +void StonesNGemsState::MoveThroughMagic(int index, Element element) { + // Check if magic wall is still active + if (magic_wall_steps_ <= 0) { + return; + } + magic_active_ = true; + int index_below = IndexFromAction(index, Directions::kDown); + // Need to ensure cell below magic wall is empty (so item can pass through) + if (IsType(index_below, kElEmpty, Directions::kDown)) { + SetItem(index, kElEmpty, ++id_counter_); + SetItem(index_below, element, ++id_counter_, Directions::kDown); + } +} + +// Explode the item +void StonesNGemsState::Explode(int index, Element element, int action) { + int new_index = IndexFromAction(index, action); + auto it = kElementToExplosion.find(GetItem(new_index)); + Element ex = + (it == kElementToExplosion.end()) ? kElExplosionEmpty : it->second; + SetItem(new_index, element, ++id_counter_); + // Recursively check all directions for chain explosions + for (int dir = 0; dir < kNumDirections; ++dir) { + if (dir == Directions::kNone || !InBounds(new_index, dir)) { + continue; + } + if (HasProperty(new_index, ElementProperties::kCanExplode, dir)) { + Explode(new_index, ex, dir); + } else if (HasProperty(new_index, ElementProperties::kConsumable, dir)) { + SetItem(new_index, ex, ++id_counter_, dir); + } + } +} + +void StonesNGemsState::OpenGate(Element element) { + auto it = std::find(grid_.elements.begin(), grid_.elements.end(), element); + if (it != grid_.elements.end()) { + int index = std::distance(grid_.elements.begin(), it); + SetItem(index, kGateOpenMap.at(GetItem(index)), grid_.ids[index]); + } +} + +void StonesNGemsState::UpdateStone(int index) { + // Boulder falls if empty below + if (IsType(index, kElEmpty, Directions::kDown)) { + SetItem(index, kElStoneFalling, grid_.ids[index]); + UpdateStoneFalling(index); + } else if (CanRollLeft(index)) { // Roll left/right if possible + RollLeft(index, kElStoneFalling); + } else if (CanRollRight(index)) { + RollRight(index, kElStoneFalling); + } +} + +void StonesNGemsState::UpdateStoneFalling(int index) { + // Continue to fall as normal + if (IsType(index, kElEmpty, Directions::kDown)) { + MoveItem(index, Directions::kDown); + } else if (HasProperty(index, ElementProperties::kCanExplode, + Directions::kDown)) { + // Falling stones can cause elements to explode + auto it = kElementToExplosion.find(GetItem(index, Directions::kDown)); + Element ex = + (it == kElementToExplosion.end()) ? kElExplosionEmpty : it->second; + Explode(index, ex, Directions::kDown); + } else if (IsType(index, kElWallMagicOn, Directions::kDown) || + IsType(index, kElWallMagicDormant, Directions::kDown)) { + MoveThroughMagic(index, kMagicWallConversion.at(GetItem(index))); + } else if (IsType(index, kElNut, Directions::kDown)) { + // Falling on a nut, crack it open to reveal a diamond! + SetItem(index, kElDiamond, ++id_counter_, Directions::kDown); + } else if (IsType(index, kElNut, Directions::kDown)) { + // Falling on a bomb, explode! + auto it = kElementToExplosion.find(GetItem(index)); + Element ex = + (it == kElementToExplosion.end()) ? kElExplosionEmpty : it->second; + Explode(index, ex); + } else if (CanRollLeft(index)) { // Roll left/right + RollLeft(index, kElStoneFalling); + } else if (CanRollRight(index)) { + RollRight(index, kElStoneFalling); + } else { + // Default options is for falling stones to become stationary + SetItem(index, kElStone, grid_.ids[index]); + } +} + +void StonesNGemsState::UpdateDiamond(int index) { + // Diamond falls if empty below + if (IsType(index, kElEmpty, Directions::kDown)) { + SetItem(index, kElDiamondFalling, grid_.ids[index]); + UpdateDiamondFalling(index); + } else if (CanRollLeft(index)) { // Roll left/right if possible + RollLeft(index, kElDiamondFalling); + } else if (CanRollRight(index)) { + RollRight(index, kElDiamondFalling); + } +} + +void StonesNGemsState::UpdateDiamondFalling(int index) { + // Continue to fall as normal + if (IsType(index, kElEmpty, Directions::kDown)) { + MoveItem(index, Directions::kDown); + } else if (HasProperty(index, ElementProperties::kCanExplode, + Directions::kDown) && + !IsType(index, kElBomb, Directions::kDown) && + !IsType(index, kElBombFalling, Directions::kDown)) { + // Falling diamonds can cause elements to explode (but not bombs) + auto it = kElementToExplosion.find(GetItem(index, Directions::kDown)); + Element ex = + (it == kElementToExplosion.end()) ? kElExplosionEmpty : it->second; + Explode(index, ex, Directions::kDown); + } else if (IsType(index, kElWallMagicOn, Directions::kDown) || + IsType(index, kElWallMagicDormant, Directions::kDown)) { + MoveThroughMagic(index, kMagicWallConversion.at(GetItem(index))); + } else if (CanRollLeft(index)) { // Roll left/right + RollLeft(index, kElDiamondFalling); + } else if (CanRollRight(index)) { + RollRight(index, kElDiamondFalling); + } else { + // Default options is for falling diamond to become stationary + SetItem(index, kElDiamond, grid_.ids[index]); + } +} + +void StonesNGemsState::UpdateNut(int index) { + // Nut falls if empty below + if (IsType(index, kElEmpty, Directions::kDown)) { + SetItem(index, kElNutFalling, grid_.ids[index]); + UpdateNutFalling(index); + } else if (CanRollLeft(index)) { // Roll left/right + RollLeft(index, kElNutFalling); + } else if (CanRollRight(index)) { + RollRight(index, kElNutFalling); + } +} + +void StonesNGemsState::UpdateNutFalling(int index) { + // Continue to fall as normal + if (IsType(index, kElEmpty, Directions::kDown)) { + MoveItem(index, Directions::kDown); + } else if (CanRollLeft(index)) { // Roll left/right + RollLeft(index, kElNutFalling); + } else if (CanRollRight(index)) { + RollRight(index, kElNutFalling); + } else { + // Default options is for falling nut to become stationary + SetItem(index, kElNut, grid_.ids[index]); + } +} + +void StonesNGemsState::UpdateBomb(int index) { + // Bomb falls if empty below + if (IsType(index, kElEmpty, Directions::kDown)) { + SetItem(index, kElBombFalling, grid_.ids[index]); + UpdateBombFalling(index); + } else if (CanRollLeft(index)) { // Roll left/right + RollLeft(index, kElBomb); + } else if (CanRollRight(index)) { + RollRight(index, kElBomb); + } +} + +void StonesNGemsState::UpdateBombFalling(int index) { + // Continue to fall as normal + if (IsType(index, kElEmpty, Directions::kDown)) { + MoveItem(index, Directions::kDown); + } else if (CanRollLeft(index)) { // Roll left/right + RollLeft(index, kElBombFalling); + } else if (CanRollRight(index)) { + RollRight(index, kElBombFalling); + } else { + // Default options is for bomb to explode if stopped falling + auto it = kElementToExplosion.find(GetItem(index)); + Element ex = + (it == kElementToExplosion.end()) ? kElExplosionEmpty : it->second; + Explode(index, ex); + } +} + +void StonesNGemsState::UpdateExit(int index) { + // Open exit if enough gems collected + if (gems_collected_ >= gems_required_) { + SetItem(index, kElExitOpen, grid_.ids[index]); + } +} + +void StonesNGemsState::UpdateAgent(int index, int action) { + if (IsType(index, kElEmpty, action) || IsType(index, kElDirt, action)) { + // Move if empty/dirt + MoveItem(index, action); + } else if (IsType(index, kElDiamond, action) || + IsType(index, kElDiamondFalling, action)) { + // Collect gems + ++gems_collected_; + current_reward_ += kGemPoints.at(GetItem(index, action)); + sum_reward_ += kGemPoints.at(GetItem(index, action)); + MoveItem(index, action); + } else if (IsActionHorz(action) && (IsType(index, kElStone, action) || + IsType(index, kElNut, action) || + IsType(index, kElBomb, action))) { + // Push stone, nut, or bomb if action is horizontal + Push(index, GetItem(index, action), kElToFalling.at(GetItem(index, action)), + action); + } else if (IsKey(GetItem(index, action))) { + // Collecting key, set gate open + OpenGate(kKeyToGate.at(GetItem(index, action))); + MoveItem(index, action); + } else if (IsOpenGate(GetItem(index, action))) { + // Walking through an open gate, with traversable element on other side + int index_gate = IndexFromAction(index, action); + if (HasProperty(index_gate, ElementProperties::kTraversable, action)) { + // Correct for landing on traversable elements + if (IsType(index_gate, kElDiamond, action)) { + ++gems_collected_; + current_reward_ += kGemPoints.at(GetItem(index_gate, action)); + sum_reward_ += kGemPoints.at(GetItem(index_gate, action)); + } else if (IsKey(GetItem(index_gate, action))) { + OpenGate(kKeyToGate.at(GetItem(index_gate, action))); + } + SetItem(index_gate, kElAgent, grid_.ids[index], action); + SetItem(index, kElEmpty, ++id_counter_); + } + } else if (IsType(index, kElExitOpen, action)) { + // Walking into exit after collecting enough gems + MoveItem(index, action); + SetItem(index, kElAgentInExit, ++id_counter_, action); + current_reward_ += steps_remaining_; + sum_reward_ += steps_remaining_; + } +} + +void StonesNGemsState::UpdateFirefly(int index, int action) { + int new_dir = kRotateLeft.at(action); + if (IsTypeAdjacent(index, kElAgent) || IsTypeAdjacent(index, kElBlob)) { + // Explode if touching the agent/blob + auto it = kElementToExplosion.find(GetItem(index)); + Element ex = + (it == kElementToExplosion.end()) ? kElExplosionEmpty : it->second; + Explode(index, ex); + } else if (IsType(index, kElEmpty, new_dir)) { + // Fireflies always try to rotate left, otherwise continue forward + SetItem(index, kDirectionToFirefly.at(new_dir), grid_.ids[index]); + MoveItem(index, new_dir); + } else if (IsType(index, kElEmpty, action)) { + SetItem(index, kDirectionToFirefly.at(action), grid_.ids[index]); + MoveItem(index, action); + } else { + // No other options, rotate right + SetItem(index, kDirectionToFirefly.at(kRotateRight.at(action)), + grid_.ids[index]); + } +} + +void StonesNGemsState::UpdateButterfly(int index, int action) { + int new_dir = kRotateRight.at(action); + if (IsTypeAdjacent(index, kElAgent) || IsTypeAdjacent(index, kElBlob)) { + // Explode if touching the agent/blob + auto it = kElementToExplosion.find(GetItem(index)); + Element ex = + (it == kElementToExplosion.end()) ? kElExplosionEmpty : it->second; + Explode(index, ex); + } else if (IsType(index, kElEmpty, new_dir)) { + // Butterflies always try to rotate right, otherwise continue forward + SetItem(index, kDirectionToButterfly.at(new_dir), grid_.ids[index]); + MoveItem(index, new_dir); + } else if (IsType(index, kElEmpty, action)) { + SetItem(index, kDirectionToButterfly.at(action), grid_.ids[index]); + MoveItem(index, action); + } else { + // No other options, rotate right + SetItem(index, kDirectionToButterfly.at(kRotateLeft.at(action)), + grid_.ids[index]); + } +} + +void StonesNGemsState::UpdateOrange(int index, int action) { + if (IsType(index, kElEmpty, action)) { + // Continue moving in direction + MoveItem(index, action); + } else if (IsTypeAdjacent(index, kElAgent)) { + // Run into the agent, explode! + auto it = kElementToExplosion.find(GetItem(index)); + Element ex = + (it == kElementToExplosion.end()) ? kElExplosionEmpty : it->second; + Explode(index, ex); + } else { + // Blocked, roll for new direction + std::vector open_dirs; + for (int dir = 0; dir < kNumActions; ++dir) { + if (dir == Directions::kNone || !InBounds(index, dir)) { + continue; + } + if (IsType(index, kElEmpty, dir)) { + open_dirs.push_back(dir); + } + } + // Roll available directions + if (!open_dirs.empty()) { + int new_dir = open_dirs[(*rng())() % open_dirs.size()]; + SetItem(index, kDirectionToOrange.at(new_dir), grid_.ids[index]); + } + } +} + +void StonesNGemsState::UpdateMagicWall(int index) { + // Dorminant, active, then expired once time runs out + if (magic_active_) { + SetItem(index, kElWallMagicOn, grid_.ids[index]); + } else if (magic_wall_steps_ > 0) { + SetItem(index, kElWallMagicDormant, grid_.ids[index]); + } else { + SetItem(index, kElWallMagicExpired, grid_.ids[index]); + } +} + +void StonesNGemsState::UpdateBlob(int index) { + // Replace blobs if swap element set + if (blob_swap_ != kNullElement) { + SetItem(index, blob_swap_, ++id_counter_); + return; + } + ++blob_size_; + // Check if at least one tile blob can grow to + if (IsTypeAdjacent(index, kElEmpty) || IsTypeAdjacent(index, kElDirt)) { + blob_enclosed_ = false; + } + // Roll if to grow and direction + bool will_grow = ((*rng())() % 256) < blob_chance_; + int grow_dir = (*rng())() % kNumActions; + if (will_grow && + (IsType(index, kElEmpty, grow_dir) || IsType(index, kElDirt, grow_dir))) { + SetItem(index, kElBlob, grow_dir, ++id_counter_); + } +} + +void StonesNGemsState::UpdateExplosions(int index) { + SetItem(index, kExplosionToElement.at(GetItem(index)), ++id_counter_); +} + +void StonesNGemsState::StartScan() { + // Update global flags + --steps_remaining_; + current_reward_ = 0; + blob_size_ = 0; + blob_enclosed_ = true; + // Reset element flags + for (auto &e : grid_.elements) { + e.has_updated = false; + } +} + +void StonesNGemsState::EndScan() { + // Check if blob dead/closed/size + if (blob_swap_ == kNullElement) { + if (blob_enclosed_) { + // blobs become diamonds if enclosed + blob_swap_ = kElDiamond; + } else if (blob_size_ > blob_max_size_) { + // blobs become stones is they grow too large + blob_swap_ = kElStone; + } + } + // Reduce magic wall steps if active + if (magic_active_) { + magic_wall_steps_ = std::max(magic_wall_steps_ - 1, 0); + } + // Check if still active + magic_active_ = (magic_active_ && magic_wall_steps_ > 0); +} + +void StonesNGemsState::DoApplyAction(Action move) { + if (cur_player_ == kChancePlayerId) { + // Check each cell and apply respective dynamics function + for (int index = 0; index < grid_.num_cols * grid_.num_rows; ++index) { + Element &e = grid_.elements[index]; + if (e.has_updated) { + continue; + } else if (e == kElStone) { + UpdateStone(index); + } else if (e == kElStoneFalling) { + UpdateStoneFalling(index); + } else if (e == kElDiamond) { + UpdateDiamond(index); + } else if (e == kElDiamondFalling) { + UpdateDiamondFalling(index); + } else if (e == kElNut) { + UpdateNut(index); + } else if (e == kElNutFalling) { + UpdateNutFalling(index); + } else if (e == kElBomb) { + UpdateBomb(index); + } else if (e == kElBombFalling) { + UpdateBombFalling(index); + } else if (e == kElExitClosed) { + UpdateExit(index); + } else if (IsButterfly(e)) { + UpdateButterfly(index, kButterflyToDirection.at(e)); + } else if (IsFirefly(e)) { + UpdateFirefly(index, kFireflyToDirection.at(e)); + } else if (IsOrange(e)) { + UpdateOrange(index, kOrangeToDirection.at(e)); + } else if (IsMagicWall(e)) { + UpdateMagicWall(index); + } else if (e == kElBlob) { + UpdateBlob(index); + } else if (IsExplosion(e)) { + UpdateExplosions(index); + } + } + EndScan(); + cur_player_ = 0; + } else { + StartScan(); + // Find where the agent is, and update its position + auto it = std::find(grid_.elements.begin(), grid_.elements.end(), kElAgent); + int index = std::distance(grid_.elements.begin(), it); + UpdateAgent(index, move); + cur_player_ = kChancePlayerId; + } +} + +std::vector StonesNGemsState::LegalActions() const { + if (IsChanceNode()) { + return LegalChanceOutcomes(); + } else if (IsTerminal()) { + return {}; + } else { + return {Directions::kNone, Directions::kUp, Directions::kRight, + Directions::kDown, Directions::kLeft}; + } +} + +std::vector> StonesNGemsState::ChanceOutcomes() + const { + SPIEL_CHECK_TRUE(IsChanceNode()); + std::vector> outcomes = {std::make_pair(0, 1.0)}; + return outcomes; +} + +std::string StonesNGemsState::ToString() const { + if (IsChanceNode()) { + return "chance node"; + } + std::string out_str; + int col_counter = 0; + for (const auto el : grid_.elements) { + ++col_counter; + out_str += el.id; + if (col_counter == grid_.num_cols) { + absl::StrAppend(&out_str, "\n"); + col_counter = 0; + } + } + absl::StrAppend(&out_str, "time left: ", steps_remaining_, ", "); + absl::StrAppend(&out_str, "gems required: ", gems_required_, ", "); + absl::StrAppend(&out_str, "gems collectred: ", gems_collected_); + return out_str; +} + +std::string StonesNGemsState::Serialize() const { + std::string out_str; + // grid properties + absl::StrAppend(&out_str, grid_.num_cols, ","); + absl::StrAppend(&out_str, grid_.num_rows, ","); + absl::StrAppend(&out_str, steps_remaining_, ","); + absl::StrAppend(&out_str, magic_wall_steps_, ","); + absl::StrAppend(&out_str, magic_active_, ","); + absl::StrAppend(&out_str, blob_max_size_, ","); + absl::StrAppend(&out_str, blob_size_, ","); + absl::StrAppend(&out_str, blob_chance_, ","); + absl::StrAppend(&out_str, static_cast(blob_swap_.cell_type), ","); + absl::StrAppend(&out_str, blob_enclosed_, ","); + absl::StrAppend(&out_str, gems_required_, ","); + absl::StrAppend(&out_str, gems_collected_, ","); + absl::StrAppend(&out_str, current_reward_, ","); + absl::StrAppend(&out_str, sum_reward_, ","); + absl::StrAppend(&out_str, obs_show_ids_, ","); + absl::StrAppend(&out_str, id_counter_, ","); + absl::StrAppend(&out_str, cur_player_, "\n"); + // grid contents + int col_counter = 0; + for (std::size_t i = 0; i < grid_.elements.size(); ++i) { + ++col_counter; + absl::StrAppend(&out_str, static_cast(grid_.elements[i].cell_type), + ","); + absl::StrAppend(&out_str, grid_.ids[i], ","); + if (col_counter == grid_.num_cols) { + out_str.pop_back(); + absl::StrAppend(&out_str, "\n"); + col_counter = 0; + } + } + // remove trailing newline + out_str.pop_back(); + return out_str; +} + +std::unique_ptr StonesNGemsState::Clone() const { + return std::unique_ptr(new StonesNGemsState(*this)); +} + +StonesNGemsState::StonesNGemsState( + std::shared_ptr game, int steps_remaining, int magic_wall_steps, + bool magic_active, int blob_max_size, int blob_size, int blob_chance, + Element blob_swap, bool blob_enclosed, int gems_required, + int gems_collected, int current_reward, int sum_reward, Grid grid, + bool obs_show_ids, int id_counter, Player player) + : State(game), + steps_remaining_(steps_remaining), + magic_wall_steps_(magic_wall_steps), + magic_active_(magic_active), + blob_max_size_(blob_max_size), + blob_size_(blob_size), + blob_chance_(blob_chance), + blob_swap_(blob_swap), + blob_enclosed_(blob_enclosed), + gems_required_(gems_required), + gems_collected_(gems_collected), + current_reward_(current_reward), + sum_reward_(sum_reward), + grid_(grid), + obs_show_ids_(obs_show_ids), + id_counter_(id_counter), + cur_player_(player) {} + +// ------ Game ------- + +std::unique_ptr StonesNGemsGame::DeserializeState( + const std::string &str) const { + // empty string + if (str.empty()) { + return NewInitialState(); + } + std::vector lines = absl::StrSplit(str, '\n'); + if (lines.size() < 2) { + SpielFatalError("Empty map string passed."); + } + // Read grid properties + std::vector property_line = absl::StrSplit(lines[0], ','); + Grid grid; + int steps_remaining, magic_wall_steps, blob_max_size, blob_size, blob_chance, + gems_required, gems_collected, current_reward, sum_reward, id_counter, + cur_player, magic_active, blob_enclosed, obs_show_ids, blob_swap; + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[0], &grid.num_cols)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[1], &grid.num_rows)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[2], &steps_remaining)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[3], &magic_wall_steps)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[4], &magic_active)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[5], &blob_max_size)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[6], &blob_size)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[7], &blob_chance)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[8], &blob_swap)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[9], &blob_enclosed)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[10], &gems_required)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[11], &gems_collected)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[12], ¤t_reward)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[13], &sum_reward)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[14], &obs_show_ids)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[15], &id_counter)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[16], &cur_player)); + // Set grid elements + for (std::size_t i = 1; i < lines.size(); ++i) { + std::vector grid_line = absl::StrSplit(lines[i], ','); + // Check for proper number of columns + if (grid_line.size() != grid.num_cols * 2) { + SpielFatalError(absl::StrCat("Grid line ", i - 1, + "doesn't have correct number of elements.")); + } + // Check each element in row + // for (const auto &type : grid_line) { + for (std::size_t i = 0; i < grid_line.size() / 2; ++i) { + // Element + auto it = kCellTypeToElement.find(std::stoi(grid_line[2 * i])); + if (it != kCellTypeToElement.end()) { + grid.elements.push_back(it->second); + } else { + SpielFatalError(absl::StrCat("Unknown element id: ", grid_line[2 * i])); + } + // ID + grid.ids.push_back(std::stoi(grid_line[2 * i + 1])); + } + } + // Ensure we read proper number of rows + if (lines.size() - 1 != grid.num_rows) { + SpielFatalError(absl::StrCat("Incorrect number of rows, got ", + lines.size() - 1, " but need ", + grid.num_rows)); + } + // Ensure the agent exists in the map + auto it = std::find(grid_.elements.begin(), grid_.elements.end(), kElAgent); + if (it == grid_.elements.end()) { + SpielFatalError("Grid string doesn't contain the agent."); + } + + return std::unique_ptr(new StonesNGemsState( + shared_from_this(), steps_remaining, magic_wall_steps, magic_active, + blob_max_size, blob_size, blob_chance, kCellTypeToElement.at(blob_swap), + blob_enclosed, gems_required, gems_collected, current_reward, sum_reward, + grid, obs_show_ids, id_counter, cur_player)); +} + +std::string StonesNGemsGame::GetRNGState() const { + std::ostringstream rng_stream; + rng_stream << rng_; + return rng_stream.str(); +} + +void StonesNGemsGame::SetRNGState(const std::string &rng_state) const { + if (rng_state.empty()) return; + std::istringstream rng_stream(rng_state); + rng_stream >> rng_; +} + +int StonesNGemsGame::NumDistinctActions() const { return kNumActions; } + +// There is arbitrarily chosen number to ensure the game is finite. +int StonesNGemsGame::MaxGameLength() const { return max_steps_; } + +int StonesNGemsGame::NumPlayers() const { return 1; } + +double StonesNGemsGame::MinUtility() const { return 0; } + +double StonesNGemsGame::MaxUtility() const { + // Max utility really depends on the number of gems in the map, + // so we have a lose upper bound. + // Diamonds give points + // Boulders can be converted to diamonds + // Butterflies can drop diamonds + // Nuts drop diamonds if cracked + double max_util = max_steps_; + max_util += + kGemPoints.at(kElDiamond) * + std::count(grid_.elements.begin(), grid_.elements.end(), kElDiamond); + max_util += kGemPoints.at(kElDiamond) * std::count(grid_.elements.begin(), + grid_.elements.end(), + kElDiamondFalling); + max_util += + std::count(grid_.elements.begin(), grid_.elements.end(), kElStone); + max_util += + std::count(grid_.elements.begin(), grid_.elements.end(), kElStoneFalling); + max_util += 9 * std::count(grid_.elements.begin(), grid_.elements.end(), + kElButterflyUp); + max_util += 9 * std::count(grid_.elements.begin(), grid_.elements.end(), + kElButterflyLeft); + max_util += 9 * std::count(grid_.elements.begin(), grid_.elements.end(), + kElButterflyDown); + max_util += 9 * std::count(grid_.elements.begin(), grid_.elements.end(), + kElButterflyRight); + max_util += std::count(grid_.elements.begin(), grid_.elements.end(), kElNut); + max_util += + std::count(grid_.elements.begin(), grid_.elements.end(), kElNutFalling); + return max_util; +} + +std::vector StonesNGemsGame::ObservationTensorShape() const { + return {kNumVisibleCellType, grid_.num_rows, grid_.num_cols}; +} + +Grid StonesNGemsGame::ParseGrid(const std::string &grid_string, + double blob_max_percentage) { + Grid grid; + + std::vector lines = absl::StrSplit(grid_string, '\n'); + if (lines.size() < 2) { + SpielFatalError("Empty map string passed."); + } + // Parse first line which contains level properties + std::vector property_line = absl::StrSplit(lines[0], '|'); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[0], &grid.num_cols)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[1], &grid.num_rows)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[2], &max_steps_)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(property_line[3], &gems_required_)); + + // Parse grid contents + for (std::size_t i = 1; i < lines.size(); ++i) { + // Check for proper number of columns + std::vector grid_line = absl::StrSplit(lines[i], '|'); + if (grid_line.size() != grid.num_cols) { + SpielFatalError(absl::StrCat( + "Grid line ", i - 1, " doesn't have correct number of elements.", + " Received ", grid_line.size(), ", expected ", grid.num_cols)); + } + // Check each element in row + for (const auto &type : grid_line) { + auto it = kCellTypeToElement.find(std::stoi(type)); + if (it != kCellTypeToElement.end()) { + grid.elements.push_back(it->second); + } else { + SpielFatalError(absl::StrCat("Unknown element id: ", type)); + } + } + } + // Ensure we read proper number of rows + if (lines.size() - 1 != grid.num_rows) { + SpielFatalError(absl::StrCat("Incorrect number of rows, received ", + lines.size() - 1, ", expected ", + grid.num_rows)); + } + // Ensure the agent exists in the map + auto it = std::find(grid_.elements.begin(), grid_.elements.end(), kElAgent); + if (it == grid_.elements.end()) { + SpielFatalError("Grid string doesn't contain the agent."); + } + blob_max_size_ = (int)(grid_.num_cols * grid_.num_rows * blob_max_percentage); + + // Initialize the grid element IDs + grid_.ids.clear(); + for (std::size_t i = 0; i < grid.elements.size(); ++i) { + grid_.ids.push_back(i + 1); + } + + return grid; +} + +StonesNGemsGame::StonesNGemsGame(const GameParameters ¶ms) + : Game(kGameType, params), + obs_show_ids_(ParameterValue("obs_show_ids")), + magic_wall_steps_(ParameterValue("magic_wall_steps")), + blob_chance_(ParameterValue("blob_chance")), + rng_seed_(ParameterValue("rng_seed")), + grid_(ParseGrid(ParameterValue("grid"), + ParameterValue("blob_max_percentage"))) {} + +} // namespace stones_and_gems +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/stones_and_gems/stones_and_gems.h b/scenarios/bargaining/open_spiel/open_spiel/games/stones_and_gems/stones_and_gems.h new file mode 100644 index 0000000..70dee0a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/stones_and_gems/stones_and_gems.h @@ -0,0 +1,386 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_STONES_AND_GEMS_H_ +#define OPEN_SPIEL_GAMES_STONES_AND_GEMS_H_ + +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// A simplified version of a mixture of various common stone and gem games. +// For details of the original games, see “Boulder Dash” (First Star Software, +// 1984) and Emerald Mines (Kingsoft, 1987) +// Brief: +// - The agent's goal is to go through the exit door. +// - The agent can move up, down, left, right, or stand still. +// - In order to open the exit, a minimum number of gems need to be +// collected. +// - Objects are suspended by dirt, or otherwise fall. The agent can move in +// all directions, +// and can remove dirt by walking over it. +// - The agent can push stones horizontally if there is room to do so. +// - The agent can die if objects fall on top of him, or if he collides with +// enemies. +// - Fireflies try to move clockwise. +// - Butterflies try to move counter-clockwise. +// - Both fireflies and butterflies explode if stones fall on them. +// - Butterflies can drop diamonds upon being killed. +// - Oranges move either up, left, down, or right. If they hit another +// object, they randomly start to move in another direction. +// The agent will die if he runs into an Orange. +// - Magic walls convert diamonds to stones, and stones to diamonds, but +// only when activated. Magic walls can be activated by dropping a stone +// through it, and will stop being active after a set amount of time. +// - Blobs grow randomly. If trapped, they become diamonds. If they grow too +// large, they turn into stones. +// - Keys and gates come in 4 colours: red, blue, green, and yellow. The +// gates remain closed until the agent collects the corresponding key. +// Once open, the agent can pass through. +// - Nuts can fall down like diamonds and stones. If a stones falls on it, +// it will open to reveal a diamond. +// - Bombs will explode if they fall onto an object, or a stone falls on top +// of it. The agent can push bombs. +// +// NOTE: Formatted levels from various popular stone and gem games such as +// Boulder Dash, as well as various +// simple RL levels can be found here +// https://github.com/tuero/stone_gems_levels +// +// Parameters: +// "magic_wall_steps" int steps magic walls remain active once +// turned on (default = 140) +// "blob_chance" int chance out of 256 each blob will +// spawn another (default = 20) +// "blob_max_percentage" double maximum blob growth size, as +// percentage of map (default = 0.16) +// "rng_seed" int seed for internal rng (default = 0) +// "grid" std::string string representing map (see +// kDefaultGrid below) +// +// Grid parameter specification +// - The grid string parameter is a pipe-separated (|) string representing +// the map +// - The first line should contain the # of cols, # of rows, max_steps, and +// gems required +// - The following lines represent the rows, with elements column separated +// - Item values are the cell type ints given by HiddenCellType (see below) + +namespace open_spiel { +namespace stones_and_gems { + +// Cell types supported from Boulderdash/Emerald Mines +enum class HiddenCellType { + kNull = -1, + kAgent = 0, + kEmpty = 1, + kDirt = 2, + kStone = 3, + kStoneFalling = 4, + kDiamond = 5, + kDiamondFalling = 6, + kExitClosed = 7, + kExitOpen = 8, + kAgentInExit = 9, + kFireflyUp = 10, + kFireflyLeft = 11, + kFireflyDown = 12, + kFireflyRight = 13, + kButterflyUp = 14, + kButterflyLeft = 15, + kButterflyDown = 16, + kButterflyRight = 17, + kWallBrick = 18, + kWallSteel = 19, + kWallMagicDormant = 20, + kWallMagicOn = 21, + kWallMagicExpired = 22, + kBlob = 23, + kExplosionDiamond = 24, + kExplosionBoulder = 25, + kExplosionEmpty = 26, + kGateRedClosed = 27, + kGateRedOpen = 28, + kKeyRed = 29, + kGateBlueClosed = 30, + kGateBlueOpen = 31, + kKeyBlue = 32, + kGateGreenClosed = 33, + kGateGreenOpen = 34, + kKeyGreen = 35, + kGateYellowClosed = 36, + kGateYellowOpen = 37, + kKeyYellow = 38, + kNut = 39, + kNutFalling = 40, + kBomb = 41, + kBombFalling = 42, + kOrangeUp = 43, + kOrangeLeft = 44, + kOrangeDown = 45, + kOrangeRight = 46, +}; + +// Cell types which are observable +enum class VisibleCellType { + kNull = -1, + kAgent = 0, + kEmpty = 1, + kDirt = 2, + kStone = 3, + kDiamond = 4, + kExitClosed = 5, + kExitOpen = 6, + kAgentInExit = 7, + kFirefly = 8, + kButterfly = 9, + kWallBrick = 10, + kWallSteel = 11, + kWallMagicOff = 12, + kWallMagicOn = 13, + kBlob = 14, + kExplosion = 15, + kGateRedClosed = 16, + kGateRedOpen = 17, + kKeyRed = 18, + kGateBlueClosed = 19, + kGateBlueOpen = 20, + kKeyBlue = 21, + kGateGreenClosed = 22, + kGateGreenOpen = 23, + kKeyGreen = 24, + kGateYellowClosed = 25, + kGateYellowOpen = 26, + kKeyYellow = 27, + kNut = 28, + kBomb = 29, + kOrange = 30 +}; + +constexpr int kNumHiddenCellType = 47; +constexpr int kNumVisibleCellType = 31; + +// Directions the interactions take place +enum Directions { + kNone = 0, + kUp = 1, + kRight = 2, + kDown = 3, + kLeft = 4, + kUpRight = 5, + kDownRight = 6, + kDownLeft = 7, + kUpLeft = 8 +}; + +// Agent can only take a subset of all directions +constexpr int kNumDirections = 9; +constexpr int kNumActions = 5; + +// Element entities, along with properties +struct Element { + HiddenCellType cell_type; + VisibleCellType visible_type; + int properties; + char id; + bool has_updated; + + Element() + : cell_type(HiddenCellType::kNull), + visible_type(VisibleCellType::kNull), + properties(0), + id(0), + has_updated(false) {} + + Element(HiddenCellType cell_type, VisibleCellType visible_type, + int properties, char id) + : cell_type(cell_type), + visible_type(visible_type), + properties(properties), + id(id), + has_updated(false) {} + + bool operator==(const Element& rhs) const { + return this->cell_type == rhs.cell_type; + } + + bool operator!=(const Element& rhs) const { + return this->cell_type != rhs.cell_type; + } +}; + +// Default base element +const Element kNullElement = {HiddenCellType::kNull, VisibleCellType::kNull, -1, + 0}; + +struct Grid { + int num_rows; + int num_cols; + std::vector elements; + std::vector ids; +}; + +// Default map, simple level of gems/stones/exit +inline constexpr char kDefaultGrid[] = + "20|12|600|4\n" + "19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19\n" + "19|03|02|02|03|02|02|02|02|03|02|02|02|02|02|03|02|02|02|19\n" + "19|02|00|02|02|02|02|02|02|01|02|02|02|02|02|02|02|02|02|19\n" + "19|02|02|02|05|02|02|02|02|02|02|03|02|02|02|02|02|02|02|19\n" + "19|18|18|18|18|18|18|18|18|18|18|18|18|18|02|02|02|03|02|19\n" + "19|02|02|02|02|02|05|02|02|02|02|02|02|02|02|02|02|02|02|19\n" + "19|02|02|03|02|02|02|02|02|02|02|05|02|02|03|02|02|01|01|19\n" + "19|02|02|03|02|02|02|03|02|02|02|02|02|02|02|02|02|01|11|19\n" + "19|02|02|02|02|02|18|18|18|18|18|18|18|18|18|18|18|18|18|19\n" + "19|02|02|05|02|02|02|02|02|02|05|03|02|02|03|02|02|03|02|19\n" + "19|02|02|02|02|02|02|02|02|02|02|02|02|02|03|02|02|02|02|07\n" + "19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19"; + +class StonesNGemsState : public State { + public: + StonesNGemsState(const StonesNGemsState&) = default; + StonesNGemsState(std::shared_ptr game, int steps_remaining, + int magic_wall_steps, bool magic_active, int blob_max_size, + int blob_size, int blob_chance, Element blob_swap, + bool blob_enclosed, int gems_required, int gems_collected, + int current_reward, int sum_reward, Grid grid, + bool obs_show_ids, int id_counter, Player player); + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action move_id) const override; + std::vector> ChanceOutcomes() const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::vector Rewards() const override; + std::string ObservationString(Player player) const override; + std::string Serialize() const override; + void ObservationTensor(Player player, + absl::Span values) const override; + + std::unique_ptr Clone() const override; + + std::vector LegalActions() const override; + + protected: + void DoApplyAction(Action move_id) override; + + private: + std::mt19937* rng(); + + int IndexFromAction(int index, int action) const; + bool InBounds(int index, int action = Directions::kNone) const; + bool IsType(int index, Element element, int action = Directions::kNone) const; + bool HasProperty(int index, int property, + int action = Directions::kNone) const; + void MoveItem(int index, int action); + void SetItem(int index, Element element, int id, + int action = Directions::kNone); + Element GetItem(int index, int action = Directions::kNone) const; + bool IsTypeAdjacent(int index, Element element) const; + + bool CanRollLeft(int index) const; + bool CanRollRight(int index) const; + void RollLeft(int index, Element element); + void RollRight(int index, Element element); + void Push(int index, Element stationary, Element falling, int action); + void MoveThroughMagic(int index, Element element); + void Explode(int index, Element element, int action = Directions::kNone); + + void UpdateStone(int index); + void UpdateStoneFalling(int index); + void UpdateDiamond(int index); + void UpdateDiamondFalling(int index); + void UpdateNut(int index); + void UpdateNutFalling(int index); + void UpdateBomb(int index); + void UpdateBombFalling(int index); + void UpdateExit(int index); + void UpdateAgent(int index, int action); + void UpdateFirefly(int index, int action); + void UpdateButterfly(int index, int action); + void UpdateOrange(int index, int action); + void UpdateMagicWall(int index); + void UpdateBlob(int index); + void UpdateExplosions(int index); + void OpenGate(Element element); + + void StartScan(); + void EndScan(); + + int steps_remaining_; // Max steps before game over + int magic_wall_steps_; // steps before magic wall expire (after active) + bool magic_active_; // flag for magic wall state + int blob_max_size_; // size before blobs collapse + int blob_size_; // current number of blobs + int blob_chance_; // Chance to spawn another blob (out of 256) + Element blob_swap_; // Element which blobs swap to + bool blob_enclosed_; // internal flag to check if blob trapped + int gems_required_; // gems required to open exit + int gems_collected_; // gems collected thus far + int current_reward_; // reset at every step + int sum_reward_; // cumulative reward + Grid grid_; // grid representing elements/positions + bool obs_show_ids_; // Flag to show IDs in observation tensor + int id_counter_; // Next ID tracker + + Player cur_player_ = -1; // Player to play. +}; + +class StonesNGemsGame : public Game { + public: + explicit StonesNGemsGame(const GameParameters& params); + + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new StonesNGemsState( + shared_from_this(), max_steps_, magic_wall_steps_, false, + blob_max_size_, 0, blob_chance_, kNullElement, true, gems_required_, 0, + 0, 0, grid_, obs_show_ids_, (int)grid_.ids.size(), 0)); + } + int MaxGameLength() const override; + int NumPlayers() const override; + int MaxChanceOutcomes() const override { return 1; } + double MinUtility() const override; + double MaxUtility() const override; + std::vector ObservationTensorShape() const override; + std::unique_ptr DeserializeState( + const std::string& str) const override; + std::string GetRNGState() const override; + void SetRNGState(const std::string& rng_state) const override; + + std::mt19937* rng() const { return &rng_; } + + protected: + Grid ParseGrid(const std::string& grid_string, double blob_max_percentage); + + private: + bool obs_show_ids_; // Flag to show IDs in observation tensor + int magic_wall_steps_; // steps before magic wall expire (after active) + int blob_chance_; // Chance to spawn another blob (out of 256) + mutable int rng_seed_; // Seed for stochastic element transitions + mutable std::mt19937 rng_; // Internal rng + Grid grid_; // grid representing elements/positions + int max_steps_; // Max steps before game over + int gems_required_; // gems required to open exit + int blob_max_size_; // size before blobs collapse +}; + +} // namespace stones_and_gems +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_STONES_AND_GEMS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/stones_and_gems/stones_and_gems_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/stones_and_gems/stones_and_gems_test.cc new file mode 100644 index 0000000..c1db5c8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/stones_and_gems/stones_and_gems_test.cc @@ -0,0 +1,212 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/stones_and_gems/stones_and_gems.h" + +#include "open_spiel/abseil-cpp/absl/container/node_hash_map.h" +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" +#include "unordered_map" + +namespace open_spiel { +namespace stones_and_gems { +namespace { + +namespace testing = open_spiel::testing; + +void BasicStonesNGemsTests() { + testing::LoadGameTest("stones_and_gems"); + testing::ChanceOutcomesTest(*LoadGame("stones_and_gems")); + testing::RandomSimTest(*LoadGame("stones_and_gems"), 100); +} + +void BasicStonesNGemsTestsWithParams() { + constexpr const char kTestDefaultGrid[] = + "6|7|20|2\n" + "19|19|19|19|19|19\n" + "19|01|01|01|01|19\n" + "19|02|02|01|01|19\n" + "19|01|01|01|01|19\n" + "19|00|03|01|02|19\n" + "19|05|02|05|01|07\n" + "19|19|19|19|19|19"; + + testing::ChanceOutcomesTest( + *LoadGame("stones_and_gems", + {{"magic_wall_steps", GameParameter(20)}, + {"blob_chance", GameParameter(50)}, + {"blob_max_percentage", GameParameter(0.25)}, + {"rng_seed", GameParameter(1)}, + {"grid", GameParameter(std::string(kTestDefaultGrid))}})); +} + +void ExtendedStonesNGemsTest() { + constexpr const char kTestDefaultGrid[] = + "6|7|20|2\n" + "19|19|19|19|19|19\n" + "19|01|01|01|03|19\n" + "19|02|02|01|01|19\n" + "19|01|01|01|02|19\n" + "19|00|03|01|02|19\n" + "19|05|02|05|01|07\n" + "19|19|19|19|19|19"; + + constexpr const char kStateToString[] = + "SSSSSS\n" + "S oS\n" + "S.. S\n" + "S .S\n" + "S@o .S\n" + "S*.* C\n" + "SSSSSS\n" + "time left: 20, gems required: 2, gems collectred: 0"; + + constexpr const char kStateSerialize[] = + "6,7,20,20,0,10,0,50,-1,1,2,0,0,0,1,42,0\n" + "19,1,19,2,19,3,19,4,19,5,19,6\n" + "19,7,1,8,1,9,1,10,3,11,19,12\n" + "19,13,2,14,2,15,1,16,1,17,19,18\n" + "19,19,1,20,1,21,1,22,2,23,19,24\n" + "19,25,0,26,3,27,1,28,2,29,19,30\n" + "19,31,5,32,2,33,5,34,1,35,7,36\n" + "19,37,19,38,19,39,19,40,19,41,19,42"; + + // observation tensor index along with corresponding IDs + const int offset = 6 * 7; + const absl::node_hash_map obs_ids_init{ + {0 * offset + 25, 26}, {1 * offset + 7, 8}, {1 * offset + 8, 9}, + {1 * offset + 9, 10}, {1 * offset + 15, 16}, {1 * offset + 16, 17}, + {1 * offset + 19, 20}, {1 * offset + 20, 21}, {1 * offset + 21, 22}, + {1 * offset + 27, 28}, {1 * offset + 34, 35}, {2 * offset + 13, 14}, + {2 * offset + 14, 15}, {2 * offset + 22, 23}, {2 * offset + 28, 29}, + {2 * offset + 32, 33}, {3 * offset + 10, 11}, {3 * offset + 26, 27}, + {4 * offset + 31, 32}, {4 * offset + 33, 34}, {5 * offset + 35, 36}, + {11 * offset + 0, 1}, {11 * offset + 1, 2}, {11 * offset + 2, 3}, + {11 * offset + 3, 4}, {11 * offset + 4, 5}, {11 * offset + 5, 6}, + {11 * offset + 6, 7}, {11 * offset + 11, 12}, {11 * offset + 12, 13}, + {11 * offset + 17, 18}, {11 * offset + 18, 19}, {11 * offset + 23, 24}, + {11 * offset + 24, 25}, {11 * offset + 29, 30}, {11 * offset + 30, 31}, + {11 * offset + 36, 37}, {11 * offset + 37, 38}, {11 * offset + 38, 39}, + {11 * offset + 39, 40}, {11 * offset + 40, 41}, {11 * offset + 41, 42}, + }; + + const absl::node_hash_map obs_ids_after{ + {0 * offset + 31, 26}, {1 * offset + 7, 8}, {1 * offset + 8, 9}, + {1 * offset + 9, 10}, {1 * offset + 15, 16}, {1 * offset + 19, 20}, + {1 * offset + 20, 21}, {1 * offset + 21, 22}, {1 * offset + 27, 28}, + {1 * offset + 34, 35}, {1 * offset + 25, 43}, {1 * offset + 10, 44}, + {2 * offset + 13, 14}, {2 * offset + 14, 15}, {2 * offset + 22, 23}, + {2 * offset + 28, 29}, {2 * offset + 32, 33}, {3 * offset + 16, 11}, + {3 * offset + 26, 27}, {4 * offset + 33, 34}, {5 * offset + 35, 36}, + {11 * offset + 0, 1}, {11 * offset + 1, 2}, {11 * offset + 2, 3}, + {11 * offset + 3, 4}, {11 * offset + 4, 5}, {11 * offset + 5, 6}, + {11 * offset + 6, 7}, {11 * offset + 11, 12}, {11 * offset + 12, 13}, + {11 * offset + 17, 18}, {11 * offset + 18, 19}, {11 * offset + 23, 24}, + {11 * offset + 24, 25}, {11 * offset + 29, 30}, {11 * offset + 30, 31}, + {11 * offset + 36, 37}, {11 * offset + 37, 38}, {11 * offset + 38, 39}, + {11 * offset + 39, 40}, {11 * offset + 40, 41}, {11 * offset + 41, 42}, + }; + + std::shared_ptr game = + LoadGame("stones_and_gems", + {{"magic_wall_steps", GameParameter(20)}, + {"blob_chance", GameParameter(50)}, + {"blob_max_percentage", GameParameter(0.25)}, + {"rng_seed", GameParameter(1)}, + {"grid", GameParameter(std::string(kTestDefaultGrid))}, + {"obs_show_ids", GameParameter(true)}}); + std::unique_ptr state = game->NewInitialState(); + + // Check max utility + SPIEL_CHECK_EQ(game->MaxUtility(), 20 + 2 + (2 * 10)); + + // Check string functions + SPIEL_CHECK_EQ(state->ToString(), std::string(kStateToString)); + SPIEL_CHECK_EQ(state->Serialize(), std::string(kStateSerialize)); + + // Check the observation tensor IDs + int i = 0; + for (const auto& t : state->ObservationTensor()) { + if (obs_ids_init.find(i) != obs_ids_init.end()) { + SPIEL_CHECK_EQ(obs_ids_init.at(i), t); + } else { + SPIEL_CHECK_EQ(0, t); + } + ++i; + } + + // Collect first diamond + SPIEL_CHECK_FALSE(state->IsChanceNode()); + state->ApplyAction(stones_and_gems::Directions::kDown); + SPIEL_CHECK_FALSE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReward(0), 10); + SPIEL_CHECK_EQ(state->PlayerReturn(0), 10); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(stones_and_gems::Directions::kNone); + + // Ensure observation tensor IDs tracked properly + i = 0; + for (const auto& t : state->ObservationTensor()) { + if (obs_ids_after.find(i) != obs_ids_after.end()) { + SPIEL_CHECK_EQ(obs_ids_after.at(i), t); + } else { + SPIEL_CHECK_EQ(0, t); + } + ++i; + } + + // Continue towards exit + SPIEL_CHECK_FALSE(state->IsChanceNode()); + state->ApplyAction(stones_and_gems::Directions::kRight); + SPIEL_CHECK_FALSE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReward(0), 0); + SPIEL_CHECK_EQ(state->PlayerReturn(0), 10); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(stones_and_gems::Directions::kNone); + + // Collect second diamond + SPIEL_CHECK_FALSE(state->IsChanceNode()); + state->ApplyAction(stones_and_gems::Directions::kRight); + SPIEL_CHECK_FALSE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReward(0), 10); + SPIEL_CHECK_EQ(state->PlayerReturn(0), 20); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(stones_and_gems::Directions::kNone); + + // Continue towards exit + SPIEL_CHECK_FALSE(state->IsChanceNode()); + state->ApplyAction(stones_and_gems::Directions::kRight); + SPIEL_CHECK_FALSE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReward(0), 0); + SPIEL_CHECK_EQ(state->PlayerReturn(0), 20); + SPIEL_CHECK_TRUE(state->IsChanceNode()); + state->ApplyAction(stones_and_gems::Directions::kNone); + + // Move to exit + SPIEL_CHECK_FALSE(state->IsChanceNode()); + state->ApplyAction(stones_and_gems::Directions::kRight); + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReward(0), 15); + SPIEL_CHECK_EQ(state->PlayerReturn(0), 35); +} + +} // namespace +} // namespace stones_and_gems +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::stones_and_gems::BasicStonesNGemsTests(); + open_spiel::stones_and_gems::BasicStonesNGemsTestsWithParams(); + open_spiel::stones_and_gems::ExtendedStonesNGemsTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/tarok/README.md b/scenarios/bargaining/open_spiel/open_spiel/games/tarok/README.md new file mode 100644 index 0000000..627d527 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/tarok/README.md @@ -0,0 +1,12 @@ +# Slovenian Tarok Card Game +[Slovenian Tarok](https://en.wikipedia.org/wiki/K%C3%B6nigrufen#Slovenia) is a variant of central European [Tarot card games](https://en.wikipedia.org/wiki/Tarot_card_games). It is essentially a three- or four-player, trick-taking, competitive game of skill with bidding. Computationally speaking, Tarok is moderately more complex than Bridge [[1]](#references). Detailed game rules are available at https://www.pagat.com/tarot/sltarok.html. + +The environment was implemented by [Nejc Ilenic](https://github.com/inejc) and [Tim Smole](https://github.com/TimSmole); the original repository is available at https://github.com/semanticweights/tarok. + +### Implementation Notes +Note that the current implementation is a full game without the [announcements](https://www.pagat.com/tarot/sltarok.html#announcements). The game is fully playable nevertheless as announcements can be considered an optional addition which will be added in a future PR. + +Furthermore, the environment is implemented in an implicitly stochastic manner, i.e. chance node (for dealing the cards) returns a single dummy action and applying it utilizes an internal RNG to deal all of the cards at once (within that single action). The reasoning for that particular game is that implicit implementation seemed easier (mostly meaning that less code had to be written). In addition, any algorithm that relies on the explicit game tree likely isn't viable for a game this large. + +### References +- [1] [Luštrek Mitja, Matjaž Gams, Ivan Bratko. "A program for playing Tarok." ICGA journal 26.3 (2003): 190-197.](https://pdfs.semanticscholar.org/a920/70fe11f75f58c27ed907c4688747259cae15.pdf) diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/tarok/cards.cc b/scenarios/bargaining/open_spiel/open_spiel/games/tarok/cards.cc new file mode 100644 index 0000000..ca5c1f1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/tarok/cards.cc @@ -0,0 +1,146 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/tarok/cards.h" + +#include +#include +#include + +namespace open_spiel { +namespace tarok { + +Card::Card(CardSuit suit, int rank, int points, std::string short_name, + std::string long_name) + : suit(suit), + rank(rank), + points(points), + short_name(short_name), + long_name(long_name) {} + +const std::string Card::ToString() const { return long_name; } + +const std::array InitializeCardDeck() { + return {// taroks + Card(CardSuit::kTaroks, 8, 5, "T1", "Pagat"), + Card(CardSuit::kTaroks, 9, 1, "T2", "II"), + Card(CardSuit::kTaroks, 10, 1, "T3", "III"), + Card(CardSuit::kTaroks, 11, 1, "T4", "IIII"), + Card(CardSuit::kTaroks, 12, 1, "T5", "V"), + Card(CardSuit::kTaroks, 13, 1, "T6", "VI"), + Card(CardSuit::kTaroks, 14, 1, "T7", "VII"), + Card(CardSuit::kTaroks, 15, 1, "T8", "VIII"), + Card(CardSuit::kTaroks, 16, 1, "T9", "IX"), + Card(CardSuit::kTaroks, 17, 1, "T10", "X"), + Card(CardSuit::kTaroks, 18, 1, "T11", "XI"), + Card(CardSuit::kTaroks, 19, 1, "T12", "XII"), + Card(CardSuit::kTaroks, 20, 1, "T13", "XIII"), + Card(CardSuit::kTaroks, 21, 1, "T14", "XIV"), + Card(CardSuit::kTaroks, 22, 1, "T15", "XV"), + Card(CardSuit::kTaroks, 23, 1, "T16", "XVI"), + Card(CardSuit::kTaroks, 24, 1, "T17", "XVII"), + Card(CardSuit::kTaroks, 25, 1, "T18", "XVIII"), + Card(CardSuit::kTaroks, 26, 1, "T19", "XIX"), + Card(CardSuit::kTaroks, 27, 1, "T20", "XX"), + Card(CardSuit::kTaroks, 28, 5, "T21", "Mond"), + Card(CardSuit::kTaroks, 29, 5, "T22", "Skis"), + // hearts + Card(CardSuit::kHearts, 0, 1, "H4", "4 of Hearts"), + Card(CardSuit::kHearts, 1, 1, "H3", "3 of Hearts"), + Card(CardSuit::kHearts, 2, 1, "H2", "2 of Hearts"), + Card(CardSuit::kHearts, 3, 1, "H1", "1 of Hearts"), + Card(CardSuit::kHearts, 4, 2, "HJ", "Jack of Hearts"), + Card(CardSuit::kHearts, 5, 3, "HKN", "Knight of Hearts"), + Card(CardSuit::kHearts, 6, 4, "HQ", "Queen of Hearts"), + Card(CardSuit::kHearts, 7, 5, "HKI", "King of Hearts"), + // diamonds + Card(CardSuit::kDiamonds, 0, 1, "D4", "4 of Diamonds"), + Card(CardSuit::kDiamonds, 1, 1, "D3", "3 of Diamonds"), + Card(CardSuit::kDiamonds, 2, 1, "D2", "2 of Diamonds"), + Card(CardSuit::kDiamonds, 3, 1, "D1", "1 of Diamonds"), + Card(CardSuit::kDiamonds, 4, 2, "DJ", "Jack of Diamonds"), + Card(CardSuit::kDiamonds, 5, 3, "DKN", "Knight of Diamonds"), + Card(CardSuit::kDiamonds, 6, 4, "DQ", "Queen of Diamonds"), + Card(CardSuit::kDiamonds, 7, 5, "DKI", "King of Diamonds"), + // spades + Card(CardSuit::kSpades, 0, 1, "S7", "7 of Spades"), + Card(CardSuit::kSpades, 1, 1, "S8", "8 of Spades"), + Card(CardSuit::kSpades, 2, 1, "S9", "9 of Spades"), + Card(CardSuit::kSpades, 3, 1, "S10", "10 of Spades"), + Card(CardSuit::kSpades, 4, 2, "SJ", "Jack of Spades"), + Card(CardSuit::kSpades, 5, 3, "SKN", "Knight of Spades"), + Card(CardSuit::kSpades, 6, 4, "SQ", "Queen of Spades"), + Card(CardSuit::kSpades, 7, 5, "SKI", "King of Spades"), + // clubs + Card(CardSuit::kClubs, 0, 1, "C7", "7 of Clubs"), + Card(CardSuit::kClubs, 1, 1, "C8", "8 of Clubs"), + Card(CardSuit::kClubs, 2, 1, "C9", "9 of Clubs"), + Card(CardSuit::kClubs, 3, 1, "C10", "10 of Clubs"), + Card(CardSuit::kClubs, 4, 2, "CJ", "Jack of Clubs"), + Card(CardSuit::kClubs, 5, 3, "CKN", "Knight of Clubs"), + Card(CardSuit::kClubs, 6, 4, "CQ", "Queen of Clubs"), + Card(CardSuit::kClubs, 7, 5, "CKI", "King of Clubs")}; +} + +DealtCards DealCards(int num_players, int seed) { + std::vector cards(54); + std::iota(cards.begin(), cards.end(), 0); + Shuffle(&cards, std::mt19937(seed)); + + // first six cards are talon + auto begin = cards.begin(); + auto end = begin + 6; + std::vector talon(begin, end); + + // deal the rest of the cards to players + int num_cards_per_player = 48 / num_players; + std::vector> players_cards; + players_cards.reserve(num_players); + + std::advance(begin, 6); + for (int i = 0; i < num_players; i++) { + std::advance(end, num_cards_per_player); + std::vector player_cards(begin, end); + // player's cards are sorted since legal actions need to be returned in + // ascending order + std::sort(player_cards.begin(), player_cards.end()); + players_cards.push_back(player_cards); + std::advance(begin, num_cards_per_player); + } + + return {talon, players_cards}; +} + +void Shuffle(std::vector* actions, std::mt19937&& rng) { + for (int i = actions->size() - 1; i > 0; i--) { + std::swap(actions->at(i), actions->at(rng() % (i + 1))); + } +} + +int CardPoints(const std::vector& actions, + const std::array& deck) { + // counting is done in batches of three (for every batch we sum up points from + // three cards and subtract 2 points, if the last batch has less than three + // cards we subtract 1 point), mathematically, this is equevalent to + // subtracting 2/3 from each card + float points = 0; + for (auto const& action : actions) { + points += deck.at(action).points; + } + points -= actions.size() * 0.666f; + return static_cast(round(points)); +} + +} // namespace tarok +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/tarok/cards.h b/scenarios/bargaining/open_spiel/open_spiel/games/tarok/cards.h new file mode 100644 index 0000000..efd7306 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/tarok/cards.h @@ -0,0 +1,70 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_TAROK_CARDS_H_ +#define OPEN_SPIEL_GAMES_TAROK_CARDS_H_ + +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace tarok { + +// a subset of card actions that are used throughout the codebase and add to +// readability, for more info see TarokState::LegalActions() +inline constexpr int kPagatAction = 0; +inline constexpr int kMondAction = 20; +inline constexpr int kSkisAction = 21; +inline constexpr int kKingOfHeartsAction = 29; +inline constexpr int kKingOfDiamondsAction = 37; +inline constexpr int kKingOfSpadesAction = 45; +inline constexpr int kKingOfClubsAction = 53; + +enum class CardSuit { kHearts, kDiamonds, kSpades, kClubs, kTaroks }; + +struct Card { + Card(CardSuit suit, int rank, int points, std::string short_name, + std::string long_name); + + const std::string ToString() const; + + const CardSuit suit; + const int rank; + const int points; + const std::string short_name; + const std::string long_name; +}; + +const std::array InitializeCardDeck(); + +// a type for a pair holding talon and players' private cards +using DealtCards = + std::tuple, std::vector>>; +DealtCards DealCards(int num_players, int seed); + +// we use our own implementation since std::shuffle is non-deterministic across +// different versions of the standard library implementation +void Shuffle(std::vector* actions, std::mt19937&& rng); + +int CardPoints(const std::vector& actions, + const std::array& deck); + +} // namespace tarok +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_TAROK_CARDS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/tarok/contracts.cc b/scenarios/bargaining/open_spiel/open_spiel/games/tarok/contracts.cc new file mode 100644 index 0000000..fdfe1df --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/tarok/contracts.cc @@ -0,0 +1,85 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/tarok/contracts.h" + +namespace open_spiel { +namespace tarok { + +Contract::Contract(ContractName name, int score, int num_talon_exchanges, + bool needs_king_calling, bool declarer_starts, + bool is_negative) + : name(name), + score(score), + num_talon_exchanges(num_talon_exchanges), + needs_king_calling(needs_king_calling), + declarer_starts(declarer_starts), + is_negative(is_negative) {} + +bool Contract::NeedsTalonExchange() const { return num_talon_exchanges > 0; } + +const std::array InitializeContracts() { + return { + Contract(ContractName::kKlop, 70, 0, false, false, true), + Contract(ContractName::kThree, 10, 3, true, false, false), + Contract(ContractName::kTwo, 20, 2, true, false, false), + Contract(ContractName::kOne, 30, 1, true, false, false), + Contract(ContractName::kSoloThree, 40, 3, false, false, false), + Contract(ContractName::kSoloTwo, 50, 2, false, false, false), + Contract(ContractName::kSoloOne, 60, 1, false, false, false), + Contract(ContractName::kBeggar, 70, 0, false, true, true), + Contract(ContractName::kSoloWithout, 80, 0, false, true, false), + Contract(ContractName::kOpenBeggar, 90, 0, false, true, true), + Contract(ContractName::kColourValatWithout, 125, 0, false, true, false), + Contract(ContractName::kValatWithout, 500, 0, false, true, false)}; +} + +std::ostream& operator<<(std::ostream& os, const ContractName& contract_name) { + os << ContractNameToString(contract_name); + return os; +} + +std::string ContractNameToString(const ContractName& contract_name) { + switch (contract_name) { + case ContractName::kKlop: + return "Klop"; + case ContractName::kThree: + return "Three"; + case ContractName::kTwo: + return "Two"; + case ContractName::kOne: + return "One"; + case ContractName::kSoloThree: + return "Solo three"; + case ContractName::kSoloTwo: + return "Solo two"; + case ContractName::kSoloOne: + return "Solo one"; + case ContractName::kBeggar: + return "Beggar"; + case ContractName::kSoloWithout: + return "Solo without"; + case ContractName::kOpenBeggar: + return "Open beggar"; + case ContractName::kColourValatWithout: + return "Colour valat without"; + case ContractName::kValatWithout: + return "Valat without"; + case ContractName::kNotSelected: + return "Not selected"; + } +} + +} // namespace tarok +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/tarok/contracts.h b/scenarios/bargaining/open_spiel/open_spiel/games/tarok/contracts.h new file mode 100644 index 0000000..71b0aad --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/tarok/contracts.h @@ -0,0 +1,75 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_TAROK_CONTRACTS_H_ +#define OPEN_SPIEL_GAMES_TAROK_CONTRACTS_H_ + +#include +#include +#include + +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace tarok { + +// a subset of bidding actions that are used throughout the codebase and add to +// readability, for more info see TarokState::LegalActionsInBidding() +inline constexpr int kInvalidBidAction = -1; +inline constexpr int kBidPassAction = 0; +inline constexpr int kBidKlopAction = 1; +inline constexpr int kBidThreeAction = 2; +inline constexpr int kBidSoloThreeAction = 5; +inline constexpr int kBidSoloOneAction = 7; + +enum class ContractName { + kKlop, + kThree, + kTwo, + kOne, + kSoloThree, + kSoloTwo, + kSoloOne, + kBeggar, + kSoloWithout, + kOpenBeggar, + kColourValatWithout, + kValatWithout, + kNotSelected +}; + +struct Contract { + Contract(ContractName name, int score, int num_talon_exchanges, + bool needs_king_calling, bool declarer_starts, bool is_negative); + + bool NeedsTalonExchange() const; + + const ContractName name; + const int score; + const int num_talon_exchanges; + const bool needs_king_calling; + const bool declarer_starts; + const bool is_negative; +}; + +const std::array InitializeContracts(); + +std::ostream& operator<<(std::ostream& os, const ContractName& contract_name); + +std::string ContractNameToString(const ContractName& contract_name); + +} // namespace tarok +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_TAROK_CONTRACTS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/tarok/tarok.cc b/scenarios/bargaining/open_spiel/open_spiel/games/tarok/tarok.cc new file mode 100644 index 0000000..16f821d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/tarok/tarok.cc @@ -0,0 +1,1038 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/tarok/tarok.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace tarok { + +const GameType kGameType{"tarok", // short_name + "Slovenian Tarok", // long_name + GameType::Dynamics::kSequential, + GameType::ChanceMode::kSampledStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + 4, // max_num_players + 3, // min_num_players + true, // provides_information_state_string + false, // provides_information_state_tensor + false, // provides_observation_string + false, // provides_observation_tensor + // parameter_specification + {{"players", GameParameter(kDefaultNumPLayers)}, + {"rng_seed", GameParameter(kDefaultSeed)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new TarokGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +// game implementation +TarokGame::TarokGame(const GameParameters& params) + : Game(kGameType, params), + num_players_(ParameterValue("players")), + rng_(std::mt19937(ParameterValue("rng_seed") == -1 + ? std::time(0) + : ParameterValue("rng_seed"))) { + SPIEL_CHECK_GE(num_players_, kGameType.min_num_players); + SPIEL_CHECK_LE(num_players_, kGameType.max_num_players); +} + +int TarokGame::NumDistinctActions() const { return 54; } + +std::unique_ptr TarokGame::NewInitialState() const { + return NewInitialTarokState(); +} + +std::unique_ptr TarokGame::NewInitialTarokState() const { + return std::make_unique(shared_from_this()); +} + +int TarokGame::MaxChanceOutcomes() const { + // game is implicitly stochastic + return 1; +} + +int TarokGame::NumPlayers() const { return num_players_; } + +double TarokGame::MinUtility() const { return -500.0; } + +double TarokGame::MaxUtility() const { return 500.0; } + +int TarokGame::MaxGameLength() const { + if (num_players_ == 3) { + // 17 actions + 16 cards each + return 65; + } else { + // 24 actions + 12 cards each + return 72; + } +} + +std::unique_ptr TarokGame::DeserializeState( + const std::string& str) const { + std::unique_ptr state = NewInitialTarokState(); + if (str.empty()) return state; + + std::vector lines = absl::StrSplit(str, '\n'); + for (int i = 0; i < lines.size(); i++) { + if (i == 0) { + // chance node where we presisted the card dealing seed, see + // TarokState::DoApplyActionInCardDealing for more info + std::tie(state->talon_, state->players_cards_) = + DealCards(num_players_, std::stoi(lines.at(i))); + state->current_game_phase_ = GamePhase::kBidding; + state->current_player_ = 1; + state->AddPrivateCardsToInfoStates(); + } else { + state->ApplyAction(std::stol(lines.at(i))); + } + } + return state; +} + +std::string TarokGame::GetRNGState() const { + std::ostringstream rng_stream; + rng_stream << rng_; + return rng_stream.str(); +} + +void TarokGame::SetRNGState(const std::string& rng_state) const { + if (rng_state.empty()) return; + std::istringstream rng_stream(rng_state); + rng_stream >> rng_; +} + +int TarokGame::RNG() const { return rng_(); } + +// state implementation +TarokState::TarokState(std::shared_ptr game) + : State(game), + tarok_parent_game_(std::static_pointer_cast(game)) { + players_bids_.reserve(num_players_); + players_bids_.insert(players_bids_.end(), num_players_, kInvalidBidAction); + players_collected_cards_.reserve(num_players_); + players_collected_cards_.insert(players_collected_cards_.end(), num_players_, + std::vector()); + players_info_states_.reserve(num_players_); + players_info_states_.insert(players_info_states_.end(), num_players_, ""); +} + +Player TarokState::CurrentPlayer() const { + switch (current_game_phase_) { + case GamePhase::kCardDealing: + return kChancePlayerId; + case GamePhase::kFinished: + return kTerminalPlayerId; + default: + return current_player_; + } +} + +bool TarokState::IsTerminal() const { + return current_game_phase_ == GamePhase::kFinished; +} + +GamePhase TarokState::CurrentGamePhase() const { return current_game_phase_; } + +std::vector TarokState::PlayerCards(Player player) const { + if (current_game_phase_ == GamePhase::kCardDealing) return {}; + return players_cards_.at(player); +} + +ContractName TarokState::SelectedContractName() const { + if (current_game_phase_ == GamePhase::kCardDealing || + current_game_phase_ == GamePhase::kBidding) { + return ContractName::kNotSelected; + } + return selected_contract_->name; +} + +std::vector TarokState::Talon() const { return talon_; } + +std::vector> TarokState::TalonSets() const { + if (current_game_phase_ != GamePhase::kTalonExchange) return {}; + + int num_talon_sets = talon_.size() / selected_contract_->num_talon_exchanges; + std::vector> talon_sets; + talon_sets.reserve(num_talon_sets); + + auto begin = talon_.begin(); + for (int i = 0; i < num_talon_sets; i++) { + talon_sets.push_back(std::vector( + begin, begin + selected_contract_->num_talon_exchanges)); + std::advance(begin, selected_contract_->num_talon_exchanges); + } + return talon_sets; +} + +std::vector TarokState::TrickCards() const { return trick_cards_; } + +std::vector TarokState::LegalActions() const { + // all card actions are encoded as 0, 1, ..., 52, 53 and correspond to card + // indices wrt. tarok_parent_game_->card_deck_, card actions are returned: + // - in the king calling phase + // - by LegalActionsInTalonExchange() after the talon set is selected (i.e. + // when discarding the cards) + // - by LegalActionsInTricksPlaying() + switch (current_game_phase_) { + case GamePhase::kCardDealing: + // return a dummy action due to implicit stochasticity + return {0}; + case GamePhase::kBidding: + return LegalActionsInBidding(); + case GamePhase::kKingCalling: + return {kKingOfHeartsAction, kKingOfDiamondsAction, kKingOfSpadesAction, + kKingOfClubsAction}; + case GamePhase::kTalonExchange: + return LegalActionsInTalonExchange(); + case GamePhase::kTricksPlaying: + return LegalActionsInTricksPlaying(); + case GamePhase::kFinished: + return {}; + } +} + +std::vector TarokState::LegalActionsInBidding() const { + // actions 1 - 12 correspond to contracts in tarok_parent_game_->contracts_ + // respectively, action 0 means pass + auto it = std::max_element(players_bids_.begin(), players_bids_.end()); + int max_bid = *it; + int max_bid_player = it - players_bids_.begin(); + + std::vector actions; + if (current_player_ == 0 && + players_bids_.at(current_player_) == kInvalidBidAction && + AllButCurrentPlayerPassedBidding()) { + // no bidding has happened before so forehand can + // bid any contract but can't pass + actions.insert(actions.end(), {kBidKlopAction, kBidThreeAction}); + } else if (!AllButCurrentPlayerPassedBidding()) { + // other players still playing + actions.push_back(kBidPassAction); + } + + for (int action = 3; action <= 12; action++) { + if (num_players_ == 3 && action >= kBidSoloThreeAction && + action <= kBidSoloOneAction) { + // skip solo contracts for three players + continue; + } + if (action < max_bid) { + continue; + } + if ((action > max_bid) || + (action == max_bid && current_player_ <= max_bid_player)) { + actions.push_back(action); + } + } + return actions; +} + +std::vector TarokState::LegalActionsInTalonExchange() const { + if (talon_.size() == 6) { + // choosing one of the talon card sets where actions are encoded as + // 0, 1, 2, etc. from left to right, i.e. 0 is the leftmost talon set + // as returned by TalonSets() + std::vector actions(6 / selected_contract_->num_talon_exchanges); + std::iota(actions.begin(), actions.end(), 0); + return actions; + } + // prevent discarding of taroks and kings + std::vector actions; + for (auto const& action : players_cards_.at(current_player_)) { + const Card& card = ActionToCard(action); + if (card.suit != CardSuit::kTaroks && card.points != 5) + actions.push_back(action); + } + // allow discarding of taroks (except of trula) if player has no other choice + if (actions.empty()) { + for (auto const& action : players_cards_.at(current_player_)) { + if (ActionToCard(action).points != 5) actions.push_back(action); + } + } + return actions; +} + +std::vector TarokState::LegalActionsInTricksPlaying() const { + if (trick_cards_.empty()) { + // trick opening, i.e. the current player is choosing + // the first card for this trick + if (selected_contract_->is_negative) + return RemovePagatIfNeeded(players_cards_.at(current_player_)); + return players_cards_.at(current_player_); + } else { + // trick following + return LegalActionsInTricksPlayingFollowing(); + } +} + +std::vector TarokState::LegalActionsInTricksPlayingFollowing() const { + auto [can_follow_suit, cant_follow_suit_but_has_tarok] = + CanFollowSuitOrCantButHasTarok(); + + CardSuit take_suit; + if (can_follow_suit) { + take_suit = ActionToCard(trick_cards_.front()).suit; + } else if (cant_follow_suit_but_has_tarok) { + take_suit = CardSuit::kTaroks; + } else { + // can't follow suit and doesn't have taroks so any card can be played + return players_cards_.at(current_player_); + } + + if (selected_contract_->is_negative) + return TakeSuitFromPlayerCardsInNegativeContracts(take_suit); + else + return TakeSuitFromPlayerCardsInPositiveContracts(take_suit); +} + +std::tuple TarokState::CanFollowSuitOrCantButHasTarok() const { + const Card& opening_card = ActionToCard(trick_cards_.front()); + bool has_taroks = false; + for (auto const& action : players_cards_.at(current_player_)) { + const Card& current_card = ActionToCard(action); + if (current_card.suit == opening_card.suit) { + // note that the second return value is irrelevant in this case + return {true, false}; + } + if (current_card.suit == CardSuit::kTaroks) { + has_taroks = true; + } + } + return {false, has_taroks}; +} + +std::vector TarokState::TakeSuitFromPlayerCardsInNegativeContracts( + CardSuit suit) const { + bool player_has_pagat = + ActionInActions(kPagatAction, players_cards_.at(current_player_)); + if (player_has_pagat && ActionInActions(kMondAction, trick_cards_) && + ActionInActions(kSkisAction, trick_cards_)) { + // the emperor trick, i.e. pagat has to be played as it is the only card + // that will win the trick + return {kPagatAction}; + } + + absl::optional action_to_beat = ActionToBeatInNegativeContracts(suit); + std::vector actions; + + if (action_to_beat) { + const Card& card_to_beat = ActionToCard(*action_to_beat); + auto const& player_cards = players_cards_.at(current_player_); + // a higher card only has to be played when the player actually has a higher + // card otherwise any card of the suit can be played + bool has_higher_card = false; + for (auto const& action : player_cards) { + const Card& current_card = ActionToCard(action); + if (current_card.suit == suit && current_card.rank > card_to_beat.rank) { + has_higher_card = true; + break; + } + } + // collect the actual cards + for (auto const& action : player_cards) { + const Card& current_card = ActionToCard(action); + if (current_card.suit == suit && + (!has_higher_card || current_card.rank > card_to_beat.rank)) { + actions.push_back(action); + } + } + } else { + // no need to beat any card so simply return all cards of the correct suit + actions = TakeSuitFromPlayerCardsInPositiveContracts(suit); + } + + if (player_has_pagat) + return RemovePagatIfNeeded(actions); + else + return actions; +} + +absl::optional TarokState::ActionToBeatInNegativeContracts( + CardSuit suit) const { + // there are two cases where no card has to be beaten; the player is following + // a colour suit and there is already at least one tarok in trick_cards_ or + // the player is forced to play a tarok and there are no taroks in + // trick_cards_ + bool tarok_in_trick_cards = false; + for (auto const& action : trick_cards_) { + if (ActionToCard(action).suit == CardSuit::kTaroks) { + tarok_in_trick_cards = true; + break; + } + } + if ((suit != CardSuit::kTaroks && tarok_in_trick_cards) || + (suit == CardSuit::kTaroks && !tarok_in_trick_cards)) { + return {}; + } + // the specified suit should be present in trick_cards_ from here on because + // it is either a suit of the opening card or CardSuit::kTaroks with existing + // taroks in trick_cards_ + Action action_to_beat = trick_cards_.front(); + for (int i = 1; i < trick_cards_.size(); i++) { + const Card& card_to_beat = ActionToCard(action_to_beat); + const Card& current_card = ActionToCard(trick_cards_.at(i)); + if (current_card.suit == suit && current_card.rank > card_to_beat.rank) + action_to_beat = trick_cards_.at(i); + } + return action_to_beat; +} + +std::vector TarokState::RemovePagatIfNeeded( + const std::vector& actions) const { + if (actions.size() > 1) { + // mustn't play pagat unless it's the only card, note that actions + // can be all player's cards or a subset already filtered by the caller + std::vector actions_no_pagat; + for (auto const& action : actions) { + if (action != kPagatAction) actions_no_pagat.push_back(action); + } + return actions_no_pagat; + } + return actions; +} + +std::vector TarokState::TakeSuitFromPlayerCardsInPositiveContracts( + CardSuit suit) const { + std::vector actions; + for (auto const& action : players_cards_.at(current_player_)) { + if (ActionToCard(action).suit == suit) actions.push_back(action); + } + return actions; +} + +std::string TarokState::ActionToString(Player player, Action action_id) const { + switch (current_game_phase_) { + case GamePhase::kCardDealing: + // return a dummy action due to implicit stochasticity + return "Deal"; + case GamePhase::kBidding: + if (action_id == 0) return "Pass"; + return ContractNameToString( + tarok_parent_game_->contracts_.at(action_id - 1).name); + case GamePhase::kKingCalling: + case GamePhase::kTricksPlaying: + return CardActionToString(action_id); + case GamePhase::kTalonExchange: + if (talon_.size() == 6) return absl::StrCat("Talon set ", action_id + 1); + return CardActionToString(action_id); + case GamePhase::kFinished: + return ""; + } +} + +std::string TarokState::CardActionToString(Action action_id) const { + return ActionToCard(action_id).ToString(); +} + +ActionsAndProbs TarokState::ChanceOutcomes() const { + if (current_game_phase_ == GamePhase::kCardDealing) { + // return a dummy action with probability 1 due to implicit stochasticity + return {{0, 1.0}}; + } + return {}; +} + +void TarokState::DoApplyAction(Action action_id) { + if (!ActionInActions(action_id, LegalActions())) { + SpielFatalError(absl::StrCat("Action ", action_id, + " is not valid in the current state.")); + } + switch (current_game_phase_) { + case GamePhase::kCardDealing: + DoApplyActionInCardDealing(); + break; + case GamePhase::kBidding: + DoApplyActionInBidding(action_id); + break; + case GamePhase::kKingCalling: + DoApplyActionInKingCalling(action_id); + break; + case GamePhase::kTalonExchange: + DoApplyActionInTalonExchange(action_id); + break; + case GamePhase::kTricksPlaying: + DoApplyActionInTricksPlaying(action_id); + break; + case GamePhase::kFinished: + SpielFatalError("Calling DoApplyAction in a terminal state."); + } +} + +void TarokState::DoApplyActionInCardDealing() { + // do the actual sampling here due to implicit stochasticity + do { + // seed is persisted for serialization purposes + card_dealing_seed_ = tarok_parent_game_->RNG(); + // hands without taroks are illegal + std::tie(talon_, players_cards_) = + DealCards(num_players_, card_dealing_seed_); + } while (AnyPlayerWithoutTaroks()); + current_game_phase_ = GamePhase::kBidding; + // lower player indices correspond to higher bidding priority, + // i.e. 0 is the forehand, num_players - 1 is the dealer + current_player_ = 1; + AddPrivateCardsToInfoStates(); +} + +bool TarokState::AnyPlayerWithoutTaroks() const { + // actions are sorted, i.e. taroks are always at the beginning + for (int i = 0; i < num_players_; i++) { + if (ActionToCard(players_cards_.at(i).front()).suit != CardSuit::kTaroks) { + return true; + } + } + return false; +} + +void TarokState::AddPrivateCardsToInfoStates() { + for (int i = 0; i < num_players_; i++) { + AppendToInformationState( + i, absl::StrCat(absl::StrJoin(players_cards_.at(i), ","), ";")); + } +} + +void TarokState::DoApplyActionInBidding(Action action_id) { + players_bids_.at(current_player_) = action_id; + AppendToAllInformationStates(std::to_string(action_id)); + if (AllButCurrentPlayerPassedBidding()) { + FinishBiddingPhase(action_id); + AppendToAllInformationStates(";"); + } else { + do { + NextPlayer(); + } while (players_bids_.at(current_player_) == kBidPassAction); + AppendToAllInformationStates(","); + } +} + +bool TarokState::AllButCurrentPlayerPassedBidding() const { + for (int i = 0; i < num_players_; i++) { + if (i == current_player_) continue; + if (players_bids_.at(i) != kBidPassAction) return false; + } + return true; +} + +void TarokState::FinishBiddingPhase(Action action_id) { + declarer_ = current_player_; + selected_contract_ = &tarok_parent_game_->contracts_.at(action_id - 1); + if (num_players_ == 4 && selected_contract_->needs_king_calling) + current_game_phase_ = GamePhase::kKingCalling; + else if (selected_contract_->NeedsTalonExchange()) + current_game_phase_ = GamePhase::kTalonExchange; + else + StartTricksPlayingPhase(); +} + +void TarokState::DoApplyActionInKingCalling(Action action_id) { + called_king_ = action_id; + if (ActionInActions(action_id, talon_)) { + called_king_in_talon_ = true; + } else { + for (int i = 0; i < num_players_; i++) { + if (i == current_player_) { + continue; + } else if (ActionInActions(action_id, players_cards_.at(i))) { + declarer_partner_ = i; + break; + } + } + } + current_game_phase_ = GamePhase::kTalonExchange; + AppendToAllInformationStates(absl::StrCat(action_id, ";")); +} + +void TarokState::DoApplyActionInTalonExchange(Action action_id) { + auto& player_cards = players_cards_.at(current_player_); + + if (talon_.size() == 6) { + // add all talon cards to info states + AppendToAllInformationStates(absl::StrCat(absl::StrJoin(talon_, ","), ";")); + + // choosing one of the talon card sets + int set_begin = action_id * selected_contract_->num_talon_exchanges; + int set_end = set_begin + selected_contract_->num_talon_exchanges; + + bool mond_in_talon = ActionInActions(kMondAction, talon_); + bool mond_in_selected_talon_set = false; + for (int i = set_begin; i < set_end; i++) { + player_cards.push_back(talon_.at(i)); + if (talon_.at(i) == kMondAction) mond_in_selected_talon_set = true; + } + if (mond_in_talon && !mond_in_selected_talon_set) { + // the captured mond penalty applies if mond is in talon and not part of + // the selected set + captured_mond_player_ = current_player_; + } + + // add the selected talon set to info states + AppendToAllInformationStates(absl::StrCat(action_id, ";")); + + std::sort(player_cards.begin(), player_cards.end()); + talon_.erase(talon_.begin() + set_begin, talon_.begin() + set_end); + } else { + // discarding the cards + MoveActionFromTo(action_id, &player_cards, + &players_collected_cards_.at(current_player_)); + + bool talon_exchange_finished = player_cards.size() == 48 / num_players_; + std::string info_state_delimiter = talon_exchange_finished ? ";" : ","; + + // note that all players see discarded tarok cards but only the discarder + // knows about discarded non-taroks + if (ActionToCard(action_id).suit == CardSuit::kTaroks) { + AppendToAllInformationStates( + absl::StrCat(action_id, info_state_delimiter)); + } else { + AppendToInformationState(current_player_, + absl::StrCat(action_id, info_state_delimiter)); + for (Player p = 0; p < num_players_; p++) { + if (p == current_player_) continue; + AppendToInformationState(p, absl::StrCat("d", info_state_delimiter)); + } + } + + if (talon_exchange_finished) StartTricksPlayingPhase(); + } +} + +void TarokState::StartTricksPlayingPhase() { + current_game_phase_ = GamePhase::kTricksPlaying; + if (selected_contract_->declarer_starts) + current_player_ = declarer_; + else + current_player_ = 0; +} + +void TarokState::DoApplyActionInTricksPlaying(Action action_id) { + MoveActionFromTo(action_id, &players_cards_.at(current_player_), + &trick_cards_); + AppendToAllInformationStates(std::to_string(action_id)); + if (trick_cards_.size() == num_players_) { + ResolveTrick(); + if (players_cards_.at(current_player_).empty() || + ((selected_contract_->name == ContractName::kBeggar || + selected_contract_->name == ContractName::kOpenBeggar) && + current_player_ == declarer_) || + ((selected_contract_->name == ContractName::kColourValatWithout || + selected_contract_->name == ContractName::kValatWithout) && + current_player_ != declarer_)) { + current_game_phase_ = GamePhase::kFinished; + } else { + AppendToAllInformationStates(";"); + } + } else { + NextPlayer(); + AppendToAllInformationStates(","); + } +} + +void TarokState::ResolveTrick() { + auto [trick_winner, winning_action] = ResolveTrickWinnerAndWinningAction(); + auto& trick_winner_collected_cards = + players_collected_cards_.at(trick_winner); + + for (auto const& action : trick_cards_) { + trick_winner_collected_cards.push_back(action); + } + + if (selected_contract_->name == ContractName::kKlop && !talon_.empty()) { + // add the "gift" talon card in klop + trick_winner_collected_cards.push_back(talon_.front()); + AppendToAllInformationStates(absl::StrCat(",", talon_.front())); + talon_.erase(talon_.begin()); + } else if (winning_action == called_king_ && called_king_in_talon_) { + // declearer won the trick with the called king that was in talon so all + // of the talon cards belong to the declearer (note that this is only + // possible when talon exchange actually happened in the past) + bool mond_in_talon = false; + for (auto const& action : talon_) { + trick_winner_collected_cards.push_back(action); + if (action == kMondAction) mond_in_talon = true; + } + if (mond_in_talon) { + // the called king and mond were in different parts of the talon and + // declearer selected the set with the king plus won the mond as + // part of the obtained talon remainder, negating the captured mond + // penalty obtained during DoApplyActionInTalonExchange() + captured_mond_player_ = kInvalidPlayer; + } + talon_.clear(); + } else if ((selected_contract_->NeedsTalonExchange() || + selected_contract_->name == ContractName::kSoloWithout) && + (winning_action == kSkisAction || + winning_action == kPagatAction)) { + // check if mond is captured by skis or pagat (emperor's trick) and + // penalise the player of the mond in certain contracts + for (int i = 0; i < trick_cards_.size(); i++) { + if (trick_cards_.at(i) == kMondAction) { + captured_mond_player_ = TrickCardsIndexToPlayer(i); + } + } + } + + trick_cards_.clear(); + current_player_ = trick_winner; +} + +TrickWinnerAndAction TarokState::ResolveTrickWinnerAndWinningAction() const { + // compute the winning action index within trick_cards_ + int winning_action_i; + if ((ActionInActions(kPagatAction, trick_cards_) && + ActionInActions(kMondAction, trick_cards_) && + ActionInActions(kSkisAction, trick_cards_)) && + (selected_contract_->name != ContractName::kColourValatWithout || + ActionToCard(trick_cards_.front()).suit == CardSuit::kTaroks)) { + // the emperor trick, i.e. pagat wins over mond and skis in all cases but + // not in Contract::kColourValatWithout when a non-trump is led + winning_action_i = + std::find(trick_cards_.begin(), trick_cards_.end(), kPagatAction) - + trick_cards_.begin(); + } else { + winning_action_i = 0; + for (int i = 1; i < trick_cards_.size(); i++) { + const Card& winning_card = + ActionToCard(trick_cards_.at(winning_action_i)); + const Card& current_card = ActionToCard(trick_cards_.at(i)); + + if (((current_card.suit == CardSuit::kTaroks && + selected_contract_->name != ContractName::kColourValatWithout) || + current_card.suit == winning_card.suit) && + current_card.rank > winning_card.rank) { + winning_action_i = i; + } + } + } + return {TrickCardsIndexToPlayer(winning_action_i), + trick_cards_.at(winning_action_i)}; +} + +Player TarokState::TrickCardsIndexToPlayer(int index) const { + Player player = current_player_; + for (int i = 0; i < trick_cards_.size() - 1 - index; i++) { + player -= 1; + if (player == -1) player = num_players_ - 1; + } + return player; +} + +std::vector TarokState::Returns() const { + std::vector returns(num_players_, 0.0); + if (!IsTerminal()) return returns; + + std::vector penalties = CapturedMondPenalties(); + std::vector scores = ScoresWithoutCapturedMondPenalties(); + for (int i = 0; i < num_players_; i++) { + returns.at(i) = penalties.at(i) + scores.at(i); + } + return returns; +} + +std::vector TarokState::CapturedMondPenalties() const { + std::vector penalties(num_players_, 0); + if (captured_mond_player_ != kInvalidPlayer) + penalties.at(captured_mond_player_) = -20; + return penalties; +} + +std::vector TarokState::ScoresWithoutCapturedMondPenalties() const { + if (!IsTerminal()) return std::vector(num_players_, 0); + if (selected_contract_->name == ContractName::kKlop) { + return ScoresInKlop(); + } else if (selected_contract_->NeedsTalonExchange()) { + return ScoresInNormalContracts(); + } else { + // beggar and above + return ScoresInHigherContracts(); + } +} + +std::vector TarokState::ScoresInKlop() const { + std::vector scores; + scores.reserve(num_players_); + + bool any_player_won_or_lost = false; + for (int i = 0; i < num_players_; i++) { + int points = CardPoints(players_collected_cards_.at(i), + tarok_parent_game_->card_deck_); + if (points > 35) { + any_player_won_or_lost = true; + scores.push_back(-70); + } else if (points == 0) { + any_player_won_or_lost = true; + scores.push_back(70); + } else { + scores.push_back(-points); + } + } + if (any_player_won_or_lost) { + // only the winners and losers score + for (int i = 0; i < num_players_; i++) { + if (std::abs(scores.at(i)) != 70) scores.at(i) = 0; + } + } + return scores; +} + +std::vector TarokState::ScoresInNormalContracts() const { + auto [collected_cards, opposite_collected_cards] = + SplitCollectedCardsPerTeams(); + + int score; + if (collected_cards.size() == 48) { + // valat won + score = 250; + } else if (opposite_collected_cards.size() == 48) { + // valat lost + score = -250; + } else { + int card_points = + CardPoints(collected_cards, tarok_parent_game_->card_deck_); + score = card_points - 35; + + if (card_points > 35) + score += selected_contract_->score; + else + score -= selected_contract_->score; + + // bonuses could be positive, negative or 0 + int bonuses = NonValatBonuses(collected_cards, opposite_collected_cards); + score += bonuses; + } + + std::vector scores(num_players_, 0); + scores.at(declarer_) = score; + if (declarer_partner_ != kInvalidPlayer) scores.at(declarer_partner_) = score; + return scores; +} + +CollectedCardsPerTeam TarokState::SplitCollectedCardsPerTeams() const { + std::vector collected_cards = players_collected_cards_.at(declarer_); + std::vector opposite_collected_cards; + for (Player p = 0; p < num_players_; p++) { + if (p != declarer_ && p != declarer_partner_) { + opposite_collected_cards.insert(opposite_collected_cards.end(), + players_collected_cards_.at(p).begin(), + players_collected_cards_.at(p).end()); + } else if (p == declarer_partner_) { + collected_cards.insert(collected_cards.end(), + players_collected_cards_.at(p).begin(), + players_collected_cards_.at(p).end()); + } + } + return {collected_cards, opposite_collected_cards}; +} + +int TarokState::NonValatBonuses( + const std::vector& collected_cards, + const std::vector& opposite_collected_cards) const { + int bonuses = 0; + + // last trick winner is the current player + auto const& last_trick_winner_cards = + players_collected_cards_.at(current_player_); + // king ultimo and pagat ultimo + int ultimo_bonus = 0; + if (std::find(last_trick_winner_cards.end() - num_players_, + last_trick_winner_cards.end(), + called_king_) != last_trick_winner_cards.end()) { + // king ultimo + ultimo_bonus = 10; + } else if (std::find(last_trick_winner_cards.end() - num_players_, + last_trick_winner_cards.end(), + 0) != last_trick_winner_cards.end()) { + // pagat ultimo + ultimo_bonus = 25; + } + + if (ultimo_bonus > 0 && + (current_player_ == declarer_ || current_player_ == declarer_partner_)) { + bonuses = ultimo_bonus; + } else if (ultimo_bonus > 0) { + bonuses = -ultimo_bonus; + } + + // collected kings or trula + auto [collected_kings, collected_trula] = + CollectedKingsAndOrTrula(collected_cards); + auto [opposite_collected_kings, opposite_collected_trula] = + CollectedKingsAndOrTrula(opposite_collected_cards); + + if (collected_kings) + bonuses += 10; + else if (opposite_collected_kings) + bonuses -= 10; + if (collected_trula) + bonuses += 10; + else if (opposite_collected_trula) + bonuses -= 10; + return bonuses; +} + +std::tuple TarokState::CollectedKingsAndOrTrula( + const std::vector& collected_cards) const { + int num_kings = 0, num_trula = 0; + for (auto const& action : collected_cards) { + if (action == kKingOfHeartsAction || kKingOfDiamondsAction == 37 || + action == kKingOfSpadesAction || kKingOfClubsAction == 53) { + num_kings += 1; + } else if (action == kPagatAction || action == kMondAction || + action == kSkisAction) { + num_trula += 1; + } + } + return {num_kings == 4, num_trula == 3}; +} + +std::vector TarokState::ScoresInHigherContracts() const { + bool declarer_won; + if (selected_contract_->name == ContractName::kBeggar || + selected_contract_->name == ContractName::kOpenBeggar) { + declarer_won = players_collected_cards_.at(declarer_).empty(); + } else if (selected_contract_->name == ContractName::kColourValatWithout || + selected_contract_->name == ContractName::kValatWithout) { + declarer_won = players_collected_cards_.at(declarer_).size() == 48; + } else { + // solo without + declarer_won = CardPoints(players_collected_cards_.at(declarer_), + tarok_parent_game_->card_deck_) > 35; + } + + std::vector scores(num_players_, 0); + if (declarer_won) + scores.at(declarer_) = selected_contract_->score; + else + scores.at(declarer_) = -selected_contract_->score; + return scores; +} + +std::string TarokState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return players_info_states_.at(player); +} + +std::string TarokState::ToString() const { + std::string str = ""; + GamePhase current_game_phase = CurrentGamePhase(); + absl::StrAppend(&str, "Game phase: ", GamePhaseToString(current_game_phase), + "\n"); + absl::StrAppend(&str, "Selected contract: ", + ContractNameToString(SelectedContractName()), "\n"); + + Player current_player = CurrentPlayer(); + absl::StrAppend(&str, "Current player: ", current_player, "\n"); + if (current_game_phase != GamePhase::kCardDealing && + current_game_phase != GamePhase::kFinished) { + absl::StrAppend(&str, "Player cards: ", + absl::StrJoin(PlayerCards(current_player), ","), "\n"); + } + + if (current_game_phase == GamePhase::kTalonExchange) { + auto talon_sets = TalonSets(); + std::vector talon_sets_strings; + talon_sets_strings.reserve(talon_sets.size()); + for (auto const& set : talon_sets) { + talon_sets_strings.push_back(absl::StrJoin(set, ",")); + } + absl::StrAppend( + &str, "Talon sets: ", absl::StrJoin(talon_sets_strings, ";"), "\n"); + } else if (current_game_phase == GamePhase::kTricksPlaying) { + absl::StrAppend(&str, "Trick cards: ", absl::StrJoin(TrickCards(), ","), + "\n"); + } + return str; +} + +std::string TarokState::Serialize() const { + if (current_game_phase_ == GamePhase::kCardDealing) return ""; + // replace the dummy stochastic action with the seed that was used + // for dealing the cards + std::vector history = History(); + history.front() = card_dealing_seed_; + return absl::StrJoin(history, "\n"); +} + +std::unique_ptr TarokState::Clone() const { + return std::unique_ptr(new TarokState(*this)); +} + +void TarokState::NextPlayer() { + current_player_ += 1; + if (current_player_ == num_players_) current_player_ = 0; +} + +bool TarokState::ActionInActions(Action action_id, + const std::vector& actions) { + return std::find(actions.begin(), actions.end(), action_id) != actions.end(); +} + +void TarokState::MoveActionFromTo(Action action_id, std::vector* from, + std::vector* to) { + from->erase(std::remove(from->begin(), from->end(), action_id), from->end()); + to->push_back(action_id); +} + +const Card& TarokState::ActionToCard(Action action_id) const { + return tarok_parent_game_->card_deck_.at(action_id); +} + +void TarokState::AppendToAllInformationStates(const std::string& appendix) { + for (int i = 0; i < num_players_; i++) { + absl::StrAppend(&players_info_states_.at(i), appendix); + } +} + +void TarokState::AppendToInformationState(Player player, + const std::string& appendix) { + absl::StrAppend(&players_info_states_.at(player), appendix); +} + +std::ostream& operator<<(std::ostream& os, const GamePhase& game_phase) { + os << GamePhaseToString(game_phase); + return os; +} + +std::string GamePhaseToString(const GamePhase& game_phase) { + switch (game_phase) { + case GamePhase::kCardDealing: + return "Card dealing"; + case GamePhase::kBidding: + return "Bidding"; + case GamePhase::kKingCalling: + return "King calling"; + case GamePhase::kTalonExchange: + return "Talon exchange"; + case GamePhase::kTricksPlaying: + return "Tricks playing"; + case GamePhase::kFinished: + return "Finished"; + } +} + +} // namespace tarok +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/tarok/tarok.h b/scenarios/bargaining/open_spiel/open_spiel/games/tarok/tarok.h new file mode 100644 index 0000000..49a9bf6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/tarok/tarok.h @@ -0,0 +1,221 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_TAROK_H_ +#define OPEN_SPIEL_GAMES_TAROK_H_ + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/games/tarok/cards.h" +#include "open_spiel/games/tarok/contracts.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace tarok { + +inline constexpr int kDefaultNumPLayers = 3; +// seed for shuffling the cards, -1 means seeded by clock +inline constexpr int kDefaultSeed = -1; + +enum class GamePhase { + kCardDealing, + kBidding, + kKingCalling, + kTalonExchange, + kTricksPlaying, + kFinished +}; + +class TarokState; + +// game definition +class TarokGame : public Game { + public: + explicit TarokGame(const GameParameters& params); + + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override; + std::unique_ptr NewInitialTarokState() const; + int MaxChanceOutcomes() const override; + int NumPlayers() const override; + double MinUtility() const override; + double MaxUtility() const override; + int MaxGameLength() const override; + + std::unique_ptr DeserializeState( + const std::string& str) const override; + std::string GetRNGState() const override; + void SetRNGState(const std::string& rng_state) const override; + + private: + friend class TarokState; + // this function is const so that it can be called from state objects, + // note that it nevertheless changes the state of the mutable rng_ used + // for shuffling the cards, this is expected behaviour since the game + // object has to maintain an internal RNG state due to implicit stochasticity, + // see ChanceOutcomes() comments in open_spiel/spiel.h for more info + int RNG() const; + + static inline const std::array card_deck_ = InitializeCardDeck(); + static inline const std::array contracts_ = + InitializeContracts(); + + const int num_players_; + mutable std::mt19937 rng_; +}; + +using TrickWinnerAndAction = std::tuple; +using CollectedCardsPerTeam = + std::tuple, std::vector>; + +// state definition +class TarokState : public State { + public: + explicit TarokState(std::shared_ptr game); + + Player CurrentPlayer() const override; + bool IsTerminal() const override; + GamePhase CurrentGamePhase() const; + std::vector PlayerCards(Player player) const; + ContractName SelectedContractName() const; + std::vector Talon() const; + std::vector> TalonSets() const; + std::vector TrickCards() const; + + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string CardActionToString(Action action_id) const; + ActionsAndProbs ChanceOutcomes() const override; + + // calculates the overall score for a finished game without radli, see + // comments above CapturedMondPenalties() for more details + std::vector Returns() const override; + // the following two methods are kept separately due to the captured mond + // penalty not being affected by any multipliers for kontras or radli, note + // that TarokState does not implement radli as they are, like cumulative + // players' score, part of the global state that would have to be kept between + // multiple NewInitialState() calls (i.e. TarokState only implements a single + // round of the game and radli implementation is left to the owner of the game + // instance who should keep track of multiple rounds if needed) + std::vector CapturedMondPenalties() const; + std::vector ScoresWithoutCapturedMondPenalties() const; + + // info state strings are of the following format (cards and actions are + // delimited by a comma character, some parts of the string are omitted in + // states where corresponding game phases are not played, + // single_trick_played_actions also contains the gift talon card in klop): + // + // each_players_private_cards;bidding_actions;king_calling_action; + // talon_cards;choosing_talon_set_action;discarding_cards_actions; + // single_trick_played_actions;...;single_trick_played_actions + std::string InformationStateString(Player player) const override; + + std::string ToString() const override; + std::string Serialize() const override; + std::unique_ptr Clone() const override; + + protected: + void DoApplyAction(Action action_id) override; + + private: + friend class TarokGame; + + std::vector LegalActionsInBidding() const; + std::vector LegalActionsInTalonExchange() const; + std::vector LegalActionsInTricksPlaying() const; + std::vector LegalActionsInTricksPlayingFollowing() const; + + // checks whether the current player can follow the opening card suit or + // can't but still has at least one tarok, if the first value is true, the + // second might be set incorrectly as it is irrelevant + std::tuple CanFollowSuitOrCantButHasTarok() const; + + std::vector TakeSuitFromPlayerCardsInNegativeContracts( + CardSuit suit) const; + absl::optional ActionToBeatInNegativeContracts(CardSuit suit) const; + std::vector RemovePagatIfNeeded( + const std::vector& actions) const; + std::vector TakeSuitFromPlayerCardsInPositiveContracts( + CardSuit suit) const; + + void DoApplyActionInCardDealing(); + bool AnyPlayerWithoutTaroks() const; + void AddPrivateCardsToInfoStates(); + void DoApplyActionInBidding(Action action_id); + bool AllButCurrentPlayerPassedBidding() const; + void FinishBiddingPhase(Action action_id); + void DoApplyActionInKingCalling(Action action_id); + void DoApplyActionInTalonExchange(Action action_id); + void StartTricksPlayingPhase(); + void DoApplyActionInTricksPlaying(Action action_id); + void ResolveTrick(); + TrickWinnerAndAction ResolveTrickWinnerAndWinningAction() const; + + // computes which player belongs to the trick_cards_ index as the player + // who opens the trick always belongs to index 0 within trick_cards_ + Player TrickCardsIndexToPlayer(int index) const; + + std::vector ScoresInKlop() const; + std::vector ScoresInNormalContracts() const; + CollectedCardsPerTeam SplitCollectedCardsPerTeams() const; + int NonValatBonuses( + const std::vector& collected_cards, + const std::vector& opposite_collected_cards) const; + std::tuple CollectedKingsAndOrTrula( + const std::vector& collected_cards) const; + std::vector ScoresInHigherContracts() const; + + void NextPlayer(); + static bool ActionInActions(Action action_id, + const std::vector& actions); + static void MoveActionFromTo(Action action_id, std::vector* from, + std::vector* to); + const Card& ActionToCard(Action action_id) const; + void AppendToAllInformationStates(const std::string& appendix); + void AppendToInformationState(Player player, const std::string& appendix); + + std::shared_ptr tarok_parent_game_; + int card_dealing_seed_ = kDefaultSeed; + + GamePhase current_game_phase_ = GamePhase::kCardDealing; + Player current_player_ = kInvalidPlayer; + std::vector talon_; + std::vector> players_cards_; + std::vector players_bids_; + Player declarer_ = kInvalidPlayer; + // contract pointed to is managed by the game instance + const Contract* selected_contract_; + Action called_king_ = kInvalidAction; + bool called_king_in_talon_ = false; + Player declarer_partner_ = kInvalidPlayer; + std::vector> players_collected_cards_; + std::vector trick_cards_; + Player captured_mond_player_ = kInvalidPlayer; + std::vector players_info_states_; +}; + +std::ostream& operator<<(std::ostream& os, const GamePhase& game_phase); + +std::string GamePhaseToString(const GamePhase& game_phase); + +} // namespace tarok +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_TAROK_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/tarok/tarok_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/tarok/tarok_test.cc new file mode 100644 index 0000000..75bada0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/tarok/tarok_test.cc @@ -0,0 +1,1449 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/tarok/tarok.h" + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace tarok { + +constexpr int kDealCardsAction = 0; +constexpr int kBidTwoAction = 3; +constexpr int kBidOneAction = 4; +constexpr int kBidSoloTwoAction = 6; +constexpr int kBidBeggarAction = 8; +constexpr int kBidSoloWithoutAction = 9; +constexpr int kBidOpenBeggarAction = 10; +constexpr int kBidColourValatAction = 11; +constexpr int kBidValatWithoutAction = 12; + +const std::array card_deck = InitializeCardDeck(); + +// helper methods +std::shared_ptr NewTarokGame(const GameParameters& params) { + return std::static_pointer_cast(LoadGame("tarok", params)); +} + +std::unique_ptr StateAfterActions( + const GameParameters& params, const std::vector& actions) { + auto state = NewTarokGame(params)->NewInitialTarokState(); + for (auto const& action : actions) { + state->ApplyAction(action); + } + return state; +} + +bool AllActionsInOtherActions(const std::vector& actions, + const std::vector& other_actions) { + for (auto const& action : actions) { + if (std::find(other_actions.begin(), other_actions.end(), action) == + other_actions.end()) { + return false; + } + } + return true; +} + +Action CardLongNameToAction(const std::string& long_name) { + for (int i = 0; i < card_deck.size(); i++) { + if (card_deck.at(i).long_name == long_name) return i; + } + SpielFatalError("Invalid long_name!"); + return -1; +} + +std::vector CardLongNamesToActions( + const std::vector& long_names) { + std::vector actions; + actions.reserve(long_names.size()); + for (auto const long_name : long_names) { + actions.push_back(CardLongNameToAction(long_name)); + } + return actions; +} + +template +bool AllEq(const std::vector& xs0, const std::vector& xs1) { + if (xs0.size() != xs1.size()) return false; + for (int i = 0; i < xs0.size(); i++) { + if (xs0.at(i) != xs1.at(i)) return false; + } + return true; +} + +// testing +void BasicGameTests() { + testing::LoadGameTest("tarok"); + testing::ChanceOutcomesTest(*LoadGame("tarok")); + testing::RandomSimTest(*LoadGame("tarok"), 100); +} + +// cards tests +void CardDeckShufflingSeedTest() { + auto game = NewTarokGame(GameParameters({{"rng_seed", GameParameter(0)}})); + + // subsequent shuffles within the same game should be different + auto state1 = game->NewInitialTarokState(); + state1->ApplyAction(0); + auto state2 = game->NewInitialTarokState(); + state2->ApplyAction(0); + SPIEL_CHECK_NE(state1->PlayerCards(0), state2->PlayerCards(0)); + + game = NewTarokGame(GameParameters({{"rng_seed", GameParameter(0)}})); + // shuffles should be the same when recreating a game with the same seed + auto state3 = game->NewInitialTarokState(); + state3->ApplyAction(0); + auto state4 = game->NewInitialTarokState(); + state4->ApplyAction(0); + SPIEL_CHECK_EQ(state1->PlayerCards(0), state3->PlayerCards(0)); + SPIEL_CHECK_EQ(state2->PlayerCards(0), state4->PlayerCards(0)); +} + +void DealtCardsSizeTest(int num_players) { + auto [talon, players_cards] = DealCards(num_players, 42); + SPIEL_CHECK_EQ(talon.size(), 6); + int num_cards_per_player = 48 / num_players; + for (auto const& player_cards : players_cards) { + SPIEL_CHECK_EQ(player_cards.size(), num_cards_per_player); + } +} + +void DealtCardsContentTest(int num_players) { + // 3 players + auto [talon, players_cards] = DealCards(num_players, 42); + // flatten and sort all the dealt cards + std::vector all_dealt_cards(talon.begin(), talon.end()); + for (auto const& player_cards : players_cards) { + all_dealt_cards.insert(all_dealt_cards.end(), player_cards.begin(), + player_cards.end()); + } + std::sort(all_dealt_cards.begin(), all_dealt_cards.end()); + + // check the actual content + for (int i = 0; i < 54; i++) { + SPIEL_CHECK_EQ(all_dealt_cards.at(i), i); + } +} + +void PlayersCardsSortedTest() { + auto [talon, players_cards] = DealCards(3, 42); + for (auto const& player_cards : players_cards) { + SPIEL_CHECK_TRUE(std::is_sorted(player_cards.begin(), player_cards.end())); + } +} + +void CountCardsTest() { + std::vector all_card_actions(54); + std::iota(all_card_actions.begin(), all_card_actions.end(), 0); + SPIEL_CHECK_EQ(CardPoints(all_card_actions, card_deck), 70); + SPIEL_CHECK_EQ(CardPoints({}, card_deck), 0); + SPIEL_CHECK_EQ(CardPoints(CardLongNamesToActions({"II"}), card_deck), 0); + SPIEL_CHECK_EQ(CardPoints(CardLongNamesToActions({"II", "III"}), card_deck), + 1); + SPIEL_CHECK_EQ(CardPoints(CardLongNamesToActions({"Mond"}), card_deck), 4); + + std::vector cards{"Mond", "Jack of Diamonds"}; + SPIEL_CHECK_EQ(CardPoints(CardLongNamesToActions(cards), card_deck), 6); + + cards = {"XIV", "Mond", "Jack of Diamonds"}; + SPIEL_CHECK_EQ(CardPoints(CardLongNamesToActions(cards), card_deck), 6); + + cards = {"XIV", "Mond", "Jack of Diamonds", "Queen of Diamonds"}; + SPIEL_CHECK_EQ(CardPoints(CardLongNamesToActions(cards), card_deck), 9); + + cards = {"II", "Jack of Clubs", "Queen of Clubs", "Mond", "King of Clubs"}; + SPIEL_CHECK_EQ(CardPoints(CardLongNamesToActions(cards), card_deck), 14); +} + +void CardDealingPhaseTest() { + auto game = NewTarokGame(GameParameters()); + auto state = game->NewInitialTarokState(); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kCardDealing); + SPIEL_CHECK_EQ(state->CurrentPlayer(), kChancePlayerId); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kNotSelected); + + SPIEL_CHECK_TRUE(state->TalonSets().empty()); + for (int i = 0; i < game->NumPlayers(); i++) { + SPIEL_CHECK_TRUE(state->PlayerCards(i).empty()); + } + + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {0})); + SPIEL_CHECK_TRUE(AllEq(state->ChanceOutcomes(), {{0, 1.0}})); + + // deal the cards + state->ApplyAction(kDealCardsAction); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kBidding); + SPIEL_CHECK_NE(state->CurrentPlayer(), kChancePlayerId); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kNotSelected); + + // talon sets are only visible in the talon exchange phase + SPIEL_CHECK_TRUE(state->TalonSets().empty()); + SPIEL_CHECK_EQ(state->Talon().size(), 6); + for (int i = 0; i < game->NumPlayers(); i++) { + SPIEL_CHECK_FALSE(state->PlayerCards(i).empty()); + } + SPIEL_CHECK_TRUE(state->ChanceOutcomes().empty()); +} + +// bidding phase tests +void BiddingPhase3PlayersTest1() { + // scenario: all players pass + auto game = NewTarokGame(GameParameters()); + auto state = game->NewInitialTarokState(); + state->ApplyAction(kDealCardsAction); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kBidding); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kNotSelected); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidTwoAction, kBidOneAction, kBidBeggarAction, + kBidSoloWithoutAction, kBidOpenBeggarAction, kBidColourValatAction, + kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidTwoAction, kBidOneAction, kBidBeggarAction, + kBidSoloWithoutAction, kBidOpenBeggarAction, kBidColourValatAction, + kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidKlopAction, kBidThreeAction, kBidTwoAction, kBidOneAction, + kBidBeggarAction, kBidSoloWithoutAction, kBidOpenBeggarAction, + kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidKlopAction); + + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kKlop); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); +} + +void BiddingPhase3PlayersTest2() { + // scenario: forehand passes, player 1 eventually bids beggar, player 2 bids + // beggar + auto game = NewTarokGame(GameParameters()); + auto state = game->NewInitialTarokState(); + state->ApplyAction(kDealCardsAction); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kBidding); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kNotSelected); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidTwoAction, kBidOneAction, kBidBeggarAction, + kBidSoloWithoutAction, kBidOpenBeggarAction, kBidColourValatAction, + kBidValatWithoutAction})); + state->ApplyAction(kBidTwoAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(AllEq( + state->LegalActions(), + {kBidPassAction, kBidOneAction, kBidBeggarAction, kBidSoloWithoutAction, + kBidOpenBeggarAction, kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidBeggarAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(AllEq( + state->LegalActions(), + {kBidPassAction, kBidBeggarAction, kBidSoloWithoutAction, + kBidOpenBeggarAction, kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(AllEq( + state->LegalActions(), + {kBidPassAction, kBidBeggarAction, kBidSoloWithoutAction, + kBidOpenBeggarAction, kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidBeggarAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidSoloWithoutAction, kBidOpenBeggarAction, + kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidBeggarAction, kBidSoloWithoutAction, kBidOpenBeggarAction, + kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidBeggarAction); + + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kBeggar); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); +} + +void BiddingPhase3PlayersTest3() { + // scenario: forehand passes, player 1 bids beggar, player 2 bids solo without + auto game = NewTarokGame(GameParameters()); + auto state = game->NewInitialTarokState(); + state->ApplyAction(kDealCardsAction); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kBidding); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kNotSelected); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidTwoAction, kBidOneAction, kBidBeggarAction, + kBidSoloWithoutAction, kBidOpenBeggarAction, kBidColourValatAction, + kBidValatWithoutAction})); + state->ApplyAction(kBidBeggarAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidSoloWithoutAction, kBidOpenBeggarAction, + kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidSoloWithoutAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidSoloWithoutAction, kBidOpenBeggarAction, + kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidSoloWithoutAction, kBidOpenBeggarAction, + kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), + {kBidSoloWithoutAction, kBidOpenBeggarAction, + kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidSoloWithoutAction); + + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kSoloWithout); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); +} + +void BiddingPhase3PlayersTest4() { + // scenario: forehand bids valat without, others are forced to pass, todo: we + // could check this case in DoApplyActionInBidding and simply finish the + // bidding phase early + auto game = NewTarokGame(GameParameters()); + auto state = game->NewInitialTarokState(); + state->ApplyAction(kDealCardsAction); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kBidding); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kNotSelected); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidTwoAction, kBidOneAction, kBidBeggarAction, + kBidSoloWithoutAction, kBidOpenBeggarAction, kBidColourValatAction, + kBidValatWithoutAction})); + state->ApplyAction(kBidTwoAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(AllEq( + state->LegalActions(), + {kBidPassAction, kBidOneAction, kBidBeggarAction, kBidSoloWithoutAction, + kBidOpenBeggarAction, kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidOneAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(AllEq( + state->LegalActions(), + {kBidPassAction, kBidOneAction, kBidBeggarAction, kBidSoloWithoutAction, + kBidOpenBeggarAction, kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidValatWithoutAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {kBidPassAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {kBidPassAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {kBidValatWithoutAction})); + state->ApplyAction(kBidValatWithoutAction); + + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kValatWithout); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); +} + +void BiddingPhase4PlayersTest1() { + // scenario: all players pass + auto game = NewTarokGame(GameParameters({{"players", GameParameter(4)}})); + auto state = game->NewInitialTarokState(); + state->ApplyAction(kDealCardsAction); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kBidding); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kNotSelected); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidTwoAction, kBidOneAction, kBidSoloThreeAction, + kBidSoloTwoAction, kBidSoloOneAction, kBidBeggarAction, + kBidSoloWithoutAction, kBidOpenBeggarAction, kBidColourValatAction, + kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidTwoAction, kBidOneAction, kBidSoloThreeAction, + kBidSoloTwoAction, kBidSoloOneAction, kBidBeggarAction, + kBidSoloWithoutAction, kBidOpenBeggarAction, kBidColourValatAction, + kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 3); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidTwoAction, kBidOneAction, kBidSoloThreeAction, + kBidSoloTwoAction, kBidSoloOneAction, kBidBeggarAction, + kBidSoloWithoutAction, kBidOpenBeggarAction, kBidColourValatAction, + kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidKlopAction, kBidThreeAction, kBidTwoAction, kBidOneAction, + kBidSoloThreeAction, kBidSoloTwoAction, kBidSoloOneAction, + kBidBeggarAction, kBidSoloWithoutAction, kBidOpenBeggarAction, + kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidKlopAction); + + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kKlop); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); +} + +void BiddingPhase4PlayersTest2() { + // scenario: forehand bids one, player 2 bids one, others pass + auto game = NewTarokGame(GameParameters({{"players", GameParameter(4)}})); + auto state = game->NewInitialTarokState(); + state->ApplyAction(kDealCardsAction); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kBidding); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kNotSelected); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidTwoAction, kBidOneAction, kBidSoloThreeAction, + kBidSoloTwoAction, kBidSoloOneAction, kBidBeggarAction, + kBidSoloWithoutAction, kBidOpenBeggarAction, kBidColourValatAction, + kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidTwoAction, kBidOneAction, kBidSoloThreeAction, + kBidSoloTwoAction, kBidSoloOneAction, kBidBeggarAction, + kBidSoloWithoutAction, kBidOpenBeggarAction, kBidColourValatAction, + kBidValatWithoutAction})); + state->ApplyAction(kBidOneAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 3); + SPIEL_CHECK_TRUE(AllEq( + state->LegalActions(), + {kBidPassAction, kBidSoloThreeAction, kBidSoloTwoAction, + kBidSoloOneAction, kBidBeggarAction, kBidSoloWithoutAction, + kBidOpenBeggarAction, kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(AllEq( + state->LegalActions(), + {kBidPassAction, kBidOneAction, kBidSoloThreeAction, kBidSoloTwoAction, + kBidSoloOneAction, kBidBeggarAction, kBidSoloWithoutAction, + kBidOpenBeggarAction, kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidOneAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(AllEq( + state->LegalActions(), + {kBidPassAction, kBidSoloThreeAction, kBidSoloTwoAction, + kBidSoloOneAction, kBidBeggarAction, kBidSoloWithoutAction, + kBidOpenBeggarAction, kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(AllEq( + state->LegalActions(), + {kBidOneAction, kBidSoloThreeAction, kBidSoloTwoAction, kBidSoloOneAction, + kBidBeggarAction, kBidSoloWithoutAction, kBidOpenBeggarAction, + kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidOneAction); + + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kKingCalling); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kOne); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); +} + +void BiddingPhase4PlayersTest3() { + // scenario: player 1 bids solo three, player 3 eventually bids solo one, + // others pass + auto game = NewTarokGame(GameParameters({{"players", GameParameter(4)}})); + auto state = game->NewInitialTarokState(); + state->ApplyAction(kDealCardsAction); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kBidding); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kNotSelected); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidTwoAction, kBidOneAction, kBidSoloThreeAction, + kBidSoloTwoAction, kBidSoloOneAction, kBidBeggarAction, + kBidSoloWithoutAction, kBidOpenBeggarAction, kBidColourValatAction, + kBidValatWithoutAction})); + state->ApplyAction(kBidSoloThreeAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidSoloTwoAction, kBidSoloOneAction, + kBidBeggarAction, kBidSoloWithoutAction, kBidOpenBeggarAction, + kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 3); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidSoloTwoAction, kBidSoloOneAction, + kBidBeggarAction, kBidSoloWithoutAction, kBidOpenBeggarAction, + kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidSoloTwoAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidSoloTwoAction, kBidSoloOneAction, + kBidBeggarAction, kBidSoloWithoutAction, kBidOpenBeggarAction, + kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidSoloTwoAction, kBidSoloOneAction, + kBidBeggarAction, kBidSoloWithoutAction, kBidOpenBeggarAction, + kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 3); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidSoloTwoAction, kBidSoloOneAction, kBidBeggarAction, + kBidSoloWithoutAction, kBidOpenBeggarAction, kBidColourValatAction, + kBidValatWithoutAction})); + state->ApplyAction(kBidSoloOneAction); + + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTalonExchange); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kSoloOne); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 3); +} + +void BiddingPhase4PlayersTest4() { + // scenario: player 2 bids beggar, others pass + auto game = NewTarokGame(GameParameters({{"players", GameParameter(4)}})); + auto state = game->NewInitialTarokState(); + state->ApplyAction(kDealCardsAction); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kBidding); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kNotSelected); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidTwoAction, kBidOneAction, kBidSoloThreeAction, + kBidSoloTwoAction, kBidSoloOneAction, kBidBeggarAction, + kBidSoloWithoutAction, kBidOpenBeggarAction, kBidColourValatAction, + kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidTwoAction, kBidOneAction, kBidSoloThreeAction, + kBidSoloTwoAction, kBidSoloOneAction, kBidBeggarAction, + kBidSoloWithoutAction, kBidOpenBeggarAction, kBidColourValatAction, + kBidValatWithoutAction})); + state->ApplyAction(kBidBeggarAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 3); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidSoloWithoutAction, kBidOpenBeggarAction, + kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(AllEq( + state->LegalActions(), + {kBidPassAction, kBidBeggarAction, kBidSoloWithoutAction, + kBidOpenBeggarAction, kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidBeggarAction, kBidSoloWithoutAction, kBidOpenBeggarAction, + kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidBeggarAction); + + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kBeggar); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); +} + +void BiddingPhase4PlayersTest5() { + // scenario: forehand passes, player 1 bids open beggar, player 2 bids colour + // valat without, player 3 bids valat without + auto game = NewTarokGame(GameParameters({{"players", GameParameter(4)}})); + auto state = game->NewInitialTarokState(); + state->ApplyAction(kDealCardsAction); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kBidding); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kNotSelected); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidTwoAction, kBidOneAction, kBidSoloThreeAction, + kBidSoloTwoAction, kBidSoloOneAction, kBidBeggarAction, + kBidSoloWithoutAction, kBidOpenBeggarAction, kBidColourValatAction, + kBidValatWithoutAction})); + state->ApplyAction(kBidOpenBeggarAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {kBidPassAction, kBidColourValatAction, kBidValatWithoutAction})); + state->ApplyAction(kBidColourValatAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 3); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), {kBidPassAction, kBidValatWithoutAction})); + state->ApplyAction(kBidValatWithoutAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), {kBidPassAction, kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), {kBidPassAction, kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), {kBidPassAction, kBidValatWithoutAction})); + state->ApplyAction(kBidPassAction); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 3); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {kBidValatWithoutAction})); + state->ApplyAction(kBidValatWithoutAction); + + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kValatWithout); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 3); +} + +// talon exchange phase tests +void TalonExchangePhaseTest1() { + // 3 talon exchanges, select the first set + auto state = StateAfterActions( + GameParameters(), + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidThreeAction}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTalonExchange); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kThree); + auto talon_initial = state->TalonSets(); + SPIEL_CHECK_EQ(talon_initial.size(), 2); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {0, 1})); + for (auto const& talon_set : talon_initial) { + SPIEL_CHECK_EQ(talon_set.size(), 3); + } + + // select the first set + state->ApplyAction(0); + auto talon_end = state->TalonSets(); + SPIEL_CHECK_EQ(talon_end.size(), 1); + SPIEL_CHECK_EQ(talon_initial.at(1), talon_end.at(0)); + SPIEL_CHECK_TRUE(AllActionsInOtherActions( + talon_initial.at(0), state->PlayerCards(state->CurrentPlayer()))); + + // discard the first three cards + auto legal_actions = state->LegalActions(); + for (int i = 0; i < 3; i++) { + state->ApplyAction(legal_actions.at(i)); + SPIEL_CHECK_FALSE(AllActionsInOtherActions( + {legal_actions.at(i)}, state->PlayerCards(state->CurrentPlayer()))); + } + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); +} + +void TalonExchangePhaseTest2() { + // 3 talon exchanges, select the second set + auto state = StateAfterActions( + GameParameters(), + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidThreeAction}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTalonExchange); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kThree); + auto talon_initial = state->TalonSets(); + SPIEL_CHECK_EQ(talon_initial.size(), 2); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {0, 1})); + for (auto const& talon_set : talon_initial) { + SPIEL_CHECK_EQ(talon_set.size(), 3); + } + + // select the second set + state->ApplyAction(1); + auto talon_end = state->TalonSets(); + SPIEL_CHECK_EQ(talon_end.size(), 1); + SPIEL_CHECK_EQ(talon_initial.at(0), talon_end.at(0)); + SPIEL_CHECK_TRUE(AllActionsInOtherActions( + talon_initial.at(1), state->PlayerCards(state->CurrentPlayer()))); + + // discard the first three cards + auto legal_actions = state->LegalActions(); + for (int i = 0; i < 3; i++) { + state->ApplyAction(legal_actions.at(i)); + SPIEL_CHECK_FALSE(AllActionsInOtherActions( + {legal_actions.at(i)}, state->PlayerCards(state->CurrentPlayer()))); + } + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); +} + +void TalonExchangePhaseTest3() { + // 2 talon exchanges, select the middle set + auto state = StateAfterActions( + GameParameters(), + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidTwoAction}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTalonExchange); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kTwo); + auto talon_initial = state->TalonSets(); + SPIEL_CHECK_EQ(talon_initial.size(), 3); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {0, 1, 2})); + for (auto const& talon_set : talon_initial) { + SPIEL_CHECK_EQ(talon_set.size(), 2); + } + + // select the middle set + state->ApplyAction(1); + auto talon_end = state->TalonSets(); + SPIEL_CHECK_EQ(talon_end.size(), 2); + SPIEL_CHECK_EQ(talon_initial.at(0), talon_end.at(0)); + SPIEL_CHECK_EQ(talon_initial.at(2), talon_end.at(1)); + SPIEL_CHECK_TRUE(AllActionsInOtherActions( + talon_initial.at(1), state->PlayerCards(state->CurrentPlayer()))); + + // discard the first two cards + auto legal_actions = state->LegalActions(); + for (int i = 0; i < 2; i++) { + state->ApplyAction(legal_actions.at(i)); + SPIEL_CHECK_FALSE(AllActionsInOtherActions( + {legal_actions.at(i)}, state->PlayerCards(state->CurrentPlayer()))); + } + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); +} + +void TalonExchangePhaseTest4() { + // 1 talon exchange, select the first set + auto state = StateAfterActions( + GameParameters(), + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidOneAction}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTalonExchange); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kOne); + auto talon_initial = state->TalonSets(); + SPIEL_CHECK_EQ(talon_initial.size(), 6); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {0, 1, 2, 3, 4, 5})); + for (auto const& talon_set : talon_initial) { + SPIEL_CHECK_EQ(talon_set.size(), 1); + } + + // select the first set + state->ApplyAction(0); + auto talon_end = state->TalonSets(); + SPIEL_CHECK_EQ(talon_end.size(), 5); + for (int i = 1; i < 6; i++) { + SPIEL_CHECK_EQ(talon_initial.at(i), talon_end.at(i - 1)); + } + SPIEL_CHECK_TRUE(AllActionsInOtherActions( + talon_initial.at(0), state->PlayerCards(state->CurrentPlayer()))); + + // discard the last card + auto legal_actions = state->LegalActions(); + state->ApplyAction(legal_actions.at(legal_actions.size() - 1)); + SPIEL_CHECK_FALSE( + AllActionsInOtherActions({legal_actions.at(legal_actions.size() - 1)}, + state->PlayerCards(state->CurrentPlayer()))); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); +} + +void TalonExchangePhaseTest5() { + // 1 talon exchange, select the fourth set + auto state = StateAfterActions( + GameParameters(), + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidOneAction}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTalonExchange); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kOne); + auto talon_initial = state->TalonSets(); + SPIEL_CHECK_EQ(talon_initial.size(), 6); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {0, 1, 2, 3, 4, 5})); + for (auto const& talon_set : talon_initial) { + SPIEL_CHECK_EQ(talon_set.size(), 1); + } + + // select the fourth set + state->ApplyAction(3); + auto talon_end = state->TalonSets(); + SPIEL_CHECK_EQ(talon_end.size(), 5); + for (int i = 0; i < 5; i++) { + if (i < 3) + SPIEL_CHECK_EQ(talon_initial.at(i), talon_end.at(i)); + else + SPIEL_CHECK_EQ(talon_initial.at(i + 1), talon_end.at(i)); + } + SPIEL_CHECK_TRUE(AllActionsInOtherActions( + talon_initial.at(3), state->PlayerCards(state->CurrentPlayer()))); + + // discard the second card + auto legal_actions = state->LegalActions(); + state->ApplyAction(legal_actions.at(1)); + SPIEL_CHECK_FALSE(AllActionsInOtherActions( + {legal_actions.at(1)}, state->PlayerCards(state->CurrentPlayer()))); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); +} + +void TalonExchangePhaseTest6() { + // 1 talon exchange, select the last set + auto state = StateAfterActions( + GameParameters(), + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidOneAction}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTalonExchange); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kOne); + auto talon_initial = state->TalonSets(); + SPIEL_CHECK_EQ(talon_initial.size(), 6); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {0, 1, 2, 3, 4, 5})); + for (auto const& talon_set : talon_initial) { + SPIEL_CHECK_EQ(talon_set.size(), 1); + } + + // select the last set + state->ApplyAction(5); + auto talon_end = state->TalonSets(); + SPIEL_CHECK_EQ(talon_end.size(), 5); + for (int i = 0; i < 5; i++) { + SPIEL_CHECK_EQ(talon_initial.at(i), talon_end.at(i)); + } + SPIEL_CHECK_TRUE(AllActionsInOtherActions( + talon_initial.at(5), state->PlayerCards(state->CurrentPlayer()))); + + // discard the first card + auto legal_actions = state->LegalActions(); + state->ApplyAction(legal_actions.at(0)); + SPIEL_CHECK_FALSE(AllActionsInOtherActions( + {legal_actions.at(0)}, state->PlayerCards(state->CurrentPlayer()))); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); +} + +void TalonExchangePhaseTest7() { + // check that taroks and kings cannot be exchanged + auto state = + StateAfterActions(GameParameters({{"rng_seed", GameParameter(42)}}), + {kDealCardsAction, kBidPassAction, kBidOneAction, + kBidPassAction, kBidOneAction, 1}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTalonExchange); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kOne); + + // check taroks and kings are not in legal actions + for (auto const& action : state->LegalActions()) { + const Card& card = card_deck.at(action); + SPIEL_CHECK_TRUE(card.suit != CardSuit::kTaroks); + SPIEL_CHECK_NE(card.points, 5); + } +} + +void TalonExchangePhaseTest8() { + // check that tarok can be exchanged if player has no other choice + auto state = + StateAfterActions(GameParameters({{"players", GameParameter(4)}, + {"rng_seed", GameParameter(141750)}}), + {kDealCardsAction, kBidPassAction, kBidPassAction, + kBidPassAction, kBidSoloTwoAction}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTalonExchange); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kSoloTwo); + + // select first set from talon + state->ApplyAction(0); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTalonExchange); + + // first the player must exchange non-tarok or non-king card + // check taroks and kings are not in legal actions + for (auto const& action : state->LegalActions()) { + const Card& card = card_deck.at(action); + SPIEL_CHECK_TRUE(card.suit != CardSuit::kTaroks); + SPIEL_CHECK_NE(card.points, 5); + } + state->ApplyAction(state->LegalActions().at(0)); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTalonExchange); + + // at this point the player has only taroks and kings in his hand but still + // needs to exchange one card + // check only taroks (no trula or kings) are in legal actions + for (auto const& action : state->LegalActions()) { + const Card& card = card_deck.at(action); + SPIEL_CHECK_TRUE(card.suit == CardSuit::kTaroks); + SPIEL_CHECK_NE(card.points, 5); + } + state->ApplyAction(state->LegalActions().at(0)); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); +} + +// tricks playing phase tests +static inline const GameParameters kTricksPlayingGameParams = GameParameters( + {{"players", GameParameter(3)}, {"rng_seed", GameParameter(634317)}}); + +// the above "rng_seed" yields: +// +// player 0 cards: +// ('II', 1), ('IIII', 3), ('V', 4), ('VIII', 7), ('XI', 10), ('XIX', 18), +// ('Mond', 20), ('Jack of Hearts', 26), ('Knight of Hearts', 27), ('4 of +// Diamonds', 30), ('8 of Spades', 39), ('Jack of Spades', 42), ('King of +// Spades', 45), ('10 of Clubs', 49), ('Jack of Clubs', 50), ('Knight of Clubs', +// 51) +// +// player 1 cards: +// ('III', 2), ('VII', 6), ('XII', 11), ('XIII', 12), ('XIV', 13), ('XX', 19), +// ('Skis', 21), ('1 of Hearts', 25), ('3 of Diamonds', 31), ('Knight of +// Diamonds', 35), ('Queen of Diamonds', 36), ('King of Diamonds', 37), ('7 of +// Spades', 38), ('Knight of Spades', 43), ('8 of Clubs', 47), ('Queen of +// Clubs', 52) +// +// player 2 cards: +// ('Pagat', 0), ('VI', 5), ('IX', 8), ('X', 9), ('XV', 14), ('XVI', 15), +// ('XVII', 16), ('XVIII', 17), ('4 of Hearts', 22), ('2 of Diamonds', 32), ('1 +// of Diamonds', 33), ('Jack of Diamonds', 34), ('9 of Spades', 40), ('10 of +// Spades', 41), ('9 of Clubs', 48), ('King of Clubs', 53) + +void TricksPlayingPhaseTest1() { + // check forced pagat in klop + auto state = StateAfterActions( + kTricksPlayingGameParams, + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidKlopAction}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kKlop); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {1, 3, 4, 7, 10, 18, 20, 26, 27, + 30, 39, 42, 45, 49, 50, 51})); + state->ApplyAction(20); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {20})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {21})); + state->ApplyAction(21); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {20, 21})); + // pagat is forced + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {0})); + state->ApplyAction(0); + // pagat won the trick + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); +} + +void TricksPlayingPhaseTest2() { + // check pagat not a legal action in klop when following and all taroks lower + auto state = StateAfterActions( + kTricksPlayingGameParams, + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidKlopAction}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kKlop); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {1, 3, 4, 7, 10, 18, 20, 26, 27, + 30, 39, 42, 45, 49, 50, 51})); + state->ApplyAction(18); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {18})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {19, 21})); + state->ApplyAction(21); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {18, 21})); + // pagat not available but all other taroks available + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {5, 8, 9, 14, 15, 16, 17})); + state->ApplyAction(17); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); +} + +void TricksPlayingPhaseTest3() { + // check pagat not a legal action in klop when opening + auto state = StateAfterActions( + kTricksPlayingGameParams, + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidKlopAction}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kKlop); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {1, 3, 4, 7, 10, 18, 20, 26, 27, + 30, 39, 42, 45, 49, 50, 51})); + state->ApplyAction(4); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {4})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {6, 11, 12, 13, 19, 21})); + state->ApplyAction(6); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {4, 6})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {8, 9, 14, 15, 16, 17})); + state->ApplyAction(8); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {5, 9, 14, 15, 16, 17, 22, 32, + 33, 34, 40, 41, 48, 53})); +} + +void TricksPlayingPhaseTest4() { + // check legal non-tarok cards in klop + auto state = StateAfterActions( + kTricksPlayingGameParams, + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidKlopAction}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kKlop); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {1, 3, 4, 7, 10, 18, 20, 26, 27, + 30, 39, 42, 45, 49, 50, 51})); + state->ApplyAction(42); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {42})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {43})); + state->ApplyAction(43); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {42, 43})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {40, 41})); + state->ApplyAction(41); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); +} + +void TricksPlayingPhaseTest5() { + // check scenarios where no card has to be beaten in klop + auto state = StateAfterActions( + kTricksPlayingGameParams, + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidKlopAction}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kKlop); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {1, 3, 4, 7, 10, 18, 20, 26, 27, + 30, 39, 42, 45, 49, 50, 51})); + state->ApplyAction(30); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {30})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {31, 35, 36, 37})); + state->ApplyAction(37); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {30, 37})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {32, 33, 34})); + state->ApplyAction(34); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {2, 6, 11, 12, 13, 19, 21, 25, + 31, 35, 36, 38, 43, 47, 52})); + state->ApplyAction(52); + state->ApplyAction(53); + state->ApplyAction(51); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), + {5, 8, 9, 14, 15, 16, 17, 22, 32, 33, 40, 41, 48})); + state->ApplyAction(32); + + // can't follow suit, i.e. forced to play tarok + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {32})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {1, 3, 4, 7, 10, 18, 20})); + state->ApplyAction(1); + + // doesn't have to beat the opening card due to the second card being tarok + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {32, 1})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {31, 35, 36})); + state->ApplyAction(36); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); +} + +void TricksPlayingPhaseTest6() { + // check taroks don't win in colour valat + auto state = StateAfterActions( + kTricksPlayingGameParams, + {kDealCardsAction, kBidColourValatAction, kBidPassAction, kBidPassAction, + kBidColourValatAction}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), + ContractName::kColourValatWithout); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); + SPIEL_CHECK_TRUE( + AllEq(state->LegalActions(), + {2, 6, 11, 12, 13, 19, 21, 25, 31, 35, 36, 37, 38, 43, 47, 52})); + state->ApplyAction(35); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {35})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {32, 33, 34})); + state->ApplyAction(32); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {35, 32})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {30})); + state->ApplyAction(30); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {2, 6, 11, 12, 13, 19, 21, 25, + 31, 36, 37, 38, 43, 47, 52})); + state->ApplyAction(37); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {37})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {33, 34})); + state->ApplyAction(33); + + // can't follow suit, i.e. forced to play tarok + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {37, 33})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {1, 3, 4, 7, 10, 18, 20})); + state->ApplyAction(1); + + // tarok didn't win the trick + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); +} + +void TricksPlayingPhaseTest7() { + // check positive contracts scenarios + auto state = + StateAfterActions(kTricksPlayingGameParams, + {kDealCardsAction, kBidPassAction, kBidTwoAction, + kBidPassAction, kBidTwoAction, 0, 40, 41}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kTwo); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {1, 3, 4, 7, 10, 18, 20, 26, 27, + 30, 39, 42, 45, 49, 50, 51})); + state->ApplyAction(30); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {30})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {31, 35, 36, 37})); + state->ApplyAction(31); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {30, 31})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {32, 33, 34})); + state->ApplyAction(32); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {0, 5, 8, 9, 14, 15, 16, 17, 22, + 24, 28, 33, 34, 48, 53})); + state->ApplyAction(33); + + // can't follow suit, i.e. forced to play tarok + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {33})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {1, 3, 4, 7, 10, 18, 20})); + state->ApplyAction(18); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(AllEq(state->TrickCards(), {33, 18})); + SPIEL_CHECK_TRUE(AllEq(state->LegalActions(), {35, 36, 37})); + state->ApplyAction(37); + + // tarok won the trick + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(state->TrickCards().empty()); +} + +// captured mond tests +void CapturedMondTest1() { + // mond captured by skis + auto state = StateAfterActions( + GameParameters({{"rng_seed", GameParameter(634317)}}), + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidOneAction, 0, 49}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kOne); + + // play mond + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + state->ApplyAction(CardLongNameToAction("Mond")); + // play skis + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + state->ApplyAction(CardLongNameToAction("Skis")); + // play low tarok + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + state->ApplyAction(CardLongNameToAction("VI")); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(AllEq(state->CapturedMondPenalties(), {-20, 0, 0})); +} + +void CapturedMondTest2() { + // mond captured by pagat (emperor trick) + auto state = StateAfterActions( + GameParameters({{"rng_seed", GameParameter(634317)}}), + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidOneAction, 0, 49}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kOne); + + // play mond + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + state->ApplyAction(CardLongNameToAction("Mond")); + // play skis + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + state->ApplyAction(CardLongNameToAction("Skis")); + // play pagat + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + state->ApplyAction(CardLongNameToAction("Pagat")); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(AllEq(state->CapturedMondPenalties(), {-20, 0, 0})); +} + +void CapturedMondTest3() { + // mond taken from talon + auto state = StateAfterActions( + GameParameters({{"rng_seed", GameParameter(497200)}}), + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidOneAction, 3, 49}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kOne); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(AllEq(state->CapturedMondPenalties(), {0, 0, 0})); +} + +void CapturedMondTest4() { + // mond left in talon + auto state = StateAfterActions( + GameParameters({{"rng_seed", GameParameter(497200)}}), + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidOneAction, 0, 49}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kOne); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(AllEq(state->CapturedMondPenalties(), {-20, 0, 0})); +} + +void CapturedMondTest5() { + // mond left in talon but won with a called king + auto state = StateAfterActions( + GameParameters( + {{"players", GameParameter(4)}, {"rng_seed", GameParameter(297029)}}), + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidPassAction, + kBidOneAction, kKingOfSpadesAction, 2, 49}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kOne); + + // play the called king and win the trick + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(AllEq(state->CapturedMondPenalties(), {-20, 0, 0, 0})); + state->ApplyAction(CardLongNameToAction("King of Spades")); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + state->ApplyAction(CardLongNameToAction("Queen of Spades")); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + state->ApplyAction(CardLongNameToAction("8 of Spades")); + SPIEL_CHECK_EQ(state->CurrentPlayer(), 3); + state->ApplyAction(CardLongNameToAction("7 of Spades")); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + SPIEL_CHECK_TRUE(AllEq(state->CapturedMondPenalties(), {0, 0, 0, 0})); +} + +void CapturedMondTest6() { + // mond captured by ally should also be penalized + auto state = + StateAfterActions(GameParameters({{"rng_seed", GameParameter(634317)}}), + {kDealCardsAction, kBidPassAction, kBidOneAction, + kBidPassAction, kBidOneAction, 0, 22}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kOne); + + // play mond + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + state->ApplyAction(CardLongNameToAction("Mond")); + // play skis + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + state->ApplyAction(CardLongNameToAction("Skis")); + // play low tarok + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + state->ApplyAction(CardLongNameToAction("VI")); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + SPIEL_CHECK_TRUE(AllEq(state->CapturedMondPenalties(), {-20, 0, 0})); +} + +void CapturedMondTest7() { + // mond captured in klop should not be penalized + auto state = StateAfterActions( + GameParameters({{"rng_seed", GameParameter(634317)}}), + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidKlopAction}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kKlop); + + // play mond + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + state->ApplyAction(CardLongNameToAction("Mond")); + // play skis + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + state->ApplyAction(CardLongNameToAction("Skis")); + // play pagat + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + state->ApplyAction(CardLongNameToAction("Pagat")); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(AllEq(state->CapturedMondPenalties(), {0, 0, 0})); +} + +void CapturedMondTest8() { + // mond captured in bagger should not be penalized + auto state = StateAfterActions( + GameParameters({{"rng_seed", GameParameter(634317)}}), + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidBeggarAction}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kBeggar); + + // play mond + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + state->ApplyAction(CardLongNameToAction("Mond")); + // play skis + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + state->ApplyAction(CardLongNameToAction("Skis")); + // play pagat + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + state->ApplyAction(CardLongNameToAction("Pagat")); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(AllEq(state->CapturedMondPenalties(), {0, 0, 0})); +} + +void CapturedMondTest9() { + // mond captured in open bagger should not be penalized + auto state = StateAfterActions( + GameParameters({{"rng_seed", GameParameter(634317)}}), + {kDealCardsAction, kBidPassAction, kBidPassAction, kBidOpenBeggarAction}); + SPIEL_CHECK_EQ(state->CurrentGamePhase(), GamePhase::kTricksPlaying); + SPIEL_CHECK_EQ(state->SelectedContractName(), ContractName::kOpenBeggar); + + // play mond + SPIEL_CHECK_EQ(state->CurrentPlayer(), 0); + state->ApplyAction(CardLongNameToAction("Mond")); + // play skis + SPIEL_CHECK_EQ(state->CurrentPlayer(), 1); + state->ApplyAction(CardLongNameToAction("Skis")); + // play pagat + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + state->ApplyAction(CardLongNameToAction("Pagat")); + + SPIEL_CHECK_EQ(state->CurrentPlayer(), 2); + SPIEL_CHECK_TRUE(AllEq(state->CapturedMondPenalties(), {0, 0, 0})); +} + +} // namespace tarok +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::tarok::BasicGameTests(); + // cards tests + open_spiel::tarok::CardDeckShufflingSeedTest(); + open_spiel::tarok::DealtCardsSizeTest(3); + open_spiel::tarok::DealtCardsSizeTest(4); + open_spiel::tarok::DealtCardsContentTest(3); + open_spiel::tarok::DealtCardsContentTest(4); + open_spiel::tarok::PlayersCardsSortedTest(); + open_spiel::tarok::CountCardsTest(); + open_spiel::tarok::CardDealingPhaseTest(); + // bidding phase tests + open_spiel::tarok::BiddingPhase3PlayersTest1(); + open_spiel::tarok::BiddingPhase3PlayersTest2(); + open_spiel::tarok::BiddingPhase3PlayersTest3(); + open_spiel::tarok::BiddingPhase3PlayersTest4(); + open_spiel::tarok::BiddingPhase4PlayersTest1(); + open_spiel::tarok::BiddingPhase4PlayersTest2(); + open_spiel::tarok::BiddingPhase4PlayersTest3(); + open_spiel::tarok::BiddingPhase4PlayersTest4(); + open_spiel::tarok::BiddingPhase4PlayersTest5(); + // talon exchange phase tests + open_spiel::tarok::TalonExchangePhaseTest1(); + open_spiel::tarok::TalonExchangePhaseTest2(); + open_spiel::tarok::TalonExchangePhaseTest3(); + open_spiel::tarok::TalonExchangePhaseTest4(); + open_spiel::tarok::TalonExchangePhaseTest5(); + open_spiel::tarok::TalonExchangePhaseTest6(); + open_spiel::tarok::TalonExchangePhaseTest7(); + open_spiel::tarok::TalonExchangePhaseTest8(); + // tricks playing phase tests + open_spiel::tarok::TricksPlayingPhaseTest1(); + open_spiel::tarok::TricksPlayingPhaseTest2(); + open_spiel::tarok::TricksPlayingPhaseTest3(); + open_spiel::tarok::TricksPlayingPhaseTest4(); + open_spiel::tarok::TricksPlayingPhaseTest5(); + open_spiel::tarok::TricksPlayingPhaseTest6(); + open_spiel::tarok::TricksPlayingPhaseTest7(); + // captured mond tests + open_spiel::tarok::CapturedMondTest1(); + open_spiel::tarok::CapturedMondTest2(); + open_spiel::tarok::CapturedMondTest3(); + open_spiel::tarok::CapturedMondTest4(); + open_spiel::tarok::CapturedMondTest5(); + open_spiel::tarok::CapturedMondTest6(); + open_spiel::tarok::CapturedMondTest7(); + open_spiel::tarok::CapturedMondTest8(); + open_spiel::tarok::CapturedMondTest9(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/tic_tac_toe/tic_tac_toe.cc b/scenarios/bargaining/open_spiel/open_spiel/games/tic_tac_toe/tic_tac_toe.cc new file mode 100644 index 0000000..030f6e9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/tic_tac_toe/tic_tac_toe.cc @@ -0,0 +1,212 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/tic_tac_toe/tic_tac_toe.h" + +#include +#include +#include +#include + +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace tic_tac_toe { +namespace { + +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"tic_tac_toe", + /*long_name=*/"Tic Tac Toe", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new TicTacToeGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +CellState PlayerToState(Player player) { + switch (player) { + case 0: + return CellState::kCross; + case 1: + return CellState::kNought; + default: + SpielFatalError(absl::StrCat("Invalid player id ", player)); + return CellState::kEmpty; + } +} + +std::string StateToString(CellState state) { + switch (state) { + case CellState::kEmpty: + return "."; + case CellState::kNought: + return "o"; + case CellState::kCross: + return "x"; + default: + SpielFatalError("Unknown state."); + } +} + +bool BoardHasLine(const std::array& board, + const Player player) { + CellState c = PlayerToState(player); + return (board[0] == c && board[1] == c && board[2] == c) || + (board[3] == c && board[4] == c && board[5] == c) || + (board[6] == c && board[7] == c && board[8] == c) || + (board[0] == c && board[3] == c && board[6] == c) || + (board[1] == c && board[4] == c && board[7] == c) || + (board[2] == c && board[5] == c && board[8] == c) || + (board[0] == c && board[4] == c && board[8] == c) || + (board[2] == c && board[4] == c && board[6] == c); +} + +std::vector TicTacToeState::Board() const { + std::vector board(board_.begin(), board_.end()); + return board; +} + + +void TicTacToeState::DoApplyAction(Action move) { + SPIEL_CHECK_EQ(board_[move], CellState::kEmpty); + board_[move] = PlayerToState(CurrentPlayer()); + if (HasLine(current_player_)) { + outcome_ = current_player_; + } + current_player_ = 1 - current_player_; + num_moves_ += 1; +} + +std::vector TicTacToeState::LegalActions() const { + if (IsTerminal()) return {}; + // Can move in any empty cell. + std::vector moves; + for (int cell = 0; cell < kNumCells; ++cell) { + if (board_[cell] == CellState::kEmpty) { + moves.push_back(cell); + } + } + return moves; +} + +std::string TicTacToeState::ActionToString(Player player, + Action action_id) const { + return game_->ActionToString(player, action_id); +} + +bool TicTacToeState::HasLine(Player player) const { + return BoardHasLine(board_, player); +} + +bool TicTacToeState::IsFull() const { return num_moves_ == kNumCells; } + +TicTacToeState::TicTacToeState(std::shared_ptr game) : State(game) { + std::fill(begin(board_), end(board_), CellState::kEmpty); +} + +std::string TicTacToeState::ToString() const { + std::string str; + for (int r = 0; r < kNumRows; ++r) { + for (int c = 0; c < kNumCols; ++c) { + absl::StrAppend(&str, StateToString(BoardAt(r, c))); + } + if (r < (kNumRows - 1)) { + absl::StrAppend(&str, "\n"); + } + } + return str; +} + +bool TicTacToeState::IsTerminal() const { + return outcome_ != kInvalidPlayer || IsFull(); +} + +std::vector TicTacToeState::Returns() const { + if (HasLine(Player{0})) { + return {1.0, -1.0}; + } else if (HasLine(Player{1})) { + return {-1.0, 1.0}; + } else { + return {0.0, 0.0}; + } +} + +std::string TicTacToeState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string TicTacToeState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void TicTacToeState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + // Treat `values` as a 2-d tensor. + TensorView<2> view(values, {kCellStates, kNumCells}, true); + for (int cell = 0; cell < kNumCells; ++cell) { + view[{static_cast(board_[cell]), cell}] = 1.0; + } +} + +void TicTacToeState::UndoAction(Player player, Action move) { + board_[move] = CellState::kEmpty; + current_player_ = player; + outcome_ = kInvalidPlayer; + num_moves_ -= 1; + history_.pop_back(); + --move_number_; +} + +std::unique_ptr TicTacToeState::Clone() const { + return std::unique_ptr(new TicTacToeState(*this)); +} + +std::string TicTacToeGame::ActionToString(Player player, + Action action_id) const { + return absl::StrCat(StateToString(PlayerToState(player)), "(", + action_id / kNumCols, ",", action_id % kNumCols, ")"); +} + +TicTacToeGame::TicTacToeGame(const GameParameters& params) + : Game(kGameType, params) {} + +} // namespace tic_tac_toe +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/tic_tac_toe/tic_tac_toe.h b/scenarios/bargaining/open_spiel/open_spiel/games/tic_tac_toe/tic_tac_toe.h new file mode 100644 index 0000000..ed2d7c6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/tic_tac_toe/tic_tac_toe.h @@ -0,0 +1,129 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_TIC_TAC_TOE_H_ +#define OPEN_SPIEL_GAMES_TIC_TAC_TOE_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// Simple game of Noughts and Crosses: +// https://en.wikipedia.org/wiki/Tic-tac-toe +// +// Parameters: none + +namespace open_spiel { +namespace tic_tac_toe { + +// Constants. +inline constexpr int kNumPlayers = 2; +inline constexpr int kNumRows = 3; +inline constexpr int kNumCols = 3; +inline constexpr int kNumCells = kNumRows * kNumCols; +inline constexpr int kCellStates = 1 + kNumPlayers; // empty, 'x', and 'o'. + +// https://math.stackexchange.com/questions/485752/tictactoe-state-space-choose-calculation/485852 +inline constexpr int kNumberStates = 5478; + +// State of a cell. +enum class CellState { + kEmpty, + kNought, // O + kCross, // X +}; + +// State of an in-play game. +class TicTacToeState : public State { + public: + TicTacToeState(std::shared_ptr game); + + TicTacToeState(const TicTacToeState&) = default; + TicTacToeState& operator=(const TicTacToeState&) = default; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : current_player_; + } + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action move) override; + std::vector LegalActions() const override; + std::vector Board() const; + CellState BoardAt(int cell) const { return board_[cell]; } + CellState BoardAt(int row, int column) const { + return board_[row * kNumCols + column]; + } + Player outcome() const { return outcome_; } + void ChangePlayer() { current_player_ = current_player_ == 0 ? 1 : 0; } + + // Only used by Ultimate Tic-Tac-Toe. + void SetCurrentPlayer(Player player) { current_player_ = player; } + + protected: + std::array board_; + void DoApplyAction(Action move) override; + + private: + bool HasLine(Player player) const; // Does this player have a line? + bool IsFull() const; // Is the board full? + Player current_player_ = 0; // Player zero goes first + Player outcome_ = kInvalidPlayer; + int num_moves_ = 0; +}; + +// Game object. +class TicTacToeGame : public Game { + public: + explicit TicTacToeGame(const GameParameters& params); + int NumDistinctActions() const override { return kNumCells; } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new TicTacToeState(shared_from_this())); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + std::vector ObservationTensorShape() const override { + return {kCellStates, kNumRows, kNumCols}; + } + int MaxGameLength() const override { return kNumCells; } + std::string ActionToString(Player player, Action action_id) const override; +}; + +CellState PlayerToState(Player player); +std::string StateToString(CellState state); + +// Does this player have a line? +bool BoardHasLine(const std::array& board, + const Player player); + +inline std::ostream& operator<<(std::ostream& stream, const CellState& state) { + return stream << StateToString(state); +} + +} // namespace tic_tac_toe +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_TIC_TAC_TOE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/tic_tac_toe/tic_tac_toe_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/tic_tac_toe/tic_tac_toe_test.cc new file mode 100644 index 0000000..e68c97a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/tic_tac_toe/tic_tac_toe_test.cc @@ -0,0 +1,36 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace tic_tac_toe { +namespace { + +namespace testing = open_spiel::testing; + +void BasicTicTacToeTests() { + testing::LoadGameTest("tic_tac_toe"); + testing::NoChanceOutcomesTest(*LoadGame("tic_tac_toe")); + testing::RandomSimTest(*LoadGame("tic_tac_toe"), 100); +} + +} // namespace +} // namespace tic_tac_toe +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::tic_tac_toe::BasicTicTacToeTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/tiny_bridge/tiny_bridge.cc b/scenarios/bargaining/open_spiel/open_spiel/games/tiny_bridge/tiny_bridge.cc new file mode 100644 index 0000000..37c4d3b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/tiny_bridge/tiny_bridge.cc @@ -0,0 +1,826 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/tiny_bridge/tiny_bridge.h" + +#include + +#include "open_spiel/abseil-cpp/absl/strings/match.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/algorithms/minimax.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace tiny_bridge { +namespace { + +constexpr std::array kRankChar{'J', 'Q', 'K', 'A'}; +constexpr std::array kSuitChar{'H', 'S', 'N'}; +constexpr std::array kSeatChar{'W', 'N', 'E', 'S'}; + +int RelativeSeatIndex(Seat player, Seat observer) { + return (kNumSeats + static_cast(player) - static_cast(observer)) % + kNumSeats; +} + +std::string RelativeSeatString(Seat player, Seat observer) { + constexpr std::array relative_player{"Us", "LH", "Pd", + "RH"}; + return std::string(relative_player[RelativeSeatIndex(player, observer)]); +} + +int Suit(int card) { return card / kNumRanks; } +int Rank(int card) { return card % kNumRanks; } + +int CharToRank(char c) { + switch (c) { + case 'J': + return 0; + case 'Q': + return 1; + case 'K': + return 2; + case 'A': + return 3; + } + SpielFatalError(absl::StrCat("Unknown rank '", std::string(1, c), "'")); +} + +int CharToTrumps(char c) { + switch (c) { + case 'H': + return 0; + case 'S': + return 1; + case 'N': // No-trump + return 2; + } + SpielFatalError(absl::StrCat("Unknown trump suit '", std::string(1, c), "'")); +} + +Seat CharToSeat(char c) { + switch (c) { + case 'W': + return kWest; + case 'N': + return kNorth; + case 'E': + return kEast; + case 'S': + return kSouth; + } + SpielFatalError(absl::StrCat("Unknown hand '", std::string(1, c), "'")); +} + +int StringToCard(const std::string& s) { + return CharToRank(s[1]) + kNumRanks * CharToTrumps(s[0]); +} + +std::string CardString(int card) { + return absl::StrCat(std::string(1, kSuitChar[Suit(card)]), + std::string(1, kRankChar[Rank(card)])); +} + +// Requires card0 > card1 +int CardsToChanceOutcome(int card0, int card1) { + return (card0 * (card0 - 1)) / 2 + card1; +} + +// Returns first > second +std::pair ChanceOutcomeToCards(int outcome) { + int card0 = 1; + while (CardsToChanceOutcome(card0 + 1, 0) <= outcome) ++card0; + return {card0, outcome - CardsToChanceOutcome(card0, 0)}; +} + +// Hand abstraction. Each line is a bucket of hands that are +// indistinguishable. +inline constexpr const char* kAbstraction[kNumAbstractHands] = { + // Mixed suits. + "SAHA", + "SJHA SKHA SQHA", + "SAHJ SAHK SAHQ", + "SJHJ SJHK SJHQ SKHJ SKHK SKHQ SQHJ SQHK SQHQ", + // Hearts only. + "HAHK HAHQ", + "HKHJ HKHQ", + "HAHJ", + "HQHJ", + // Spades only. + "SASK SASQ", + "SKSQ SKSJ", + "SASJ", + "SQSJ", +}; + +// Computes the abstraction lookup. +std::vector ConcreteToAbstract() { + std::vector concrete_to_abstract(kNumPrivates, -1); + for (int c = 0; c < kNumPrivates; ++c) { + auto hand = HandString(c); + for (int ah = 0; ah < kNumAbstractHands; ++ah) { + if (absl::StrContains(kAbstraction[ah], hand)) { + concrete_to_abstract[c] = ah; + break; + } + } + if (concrete_to_abstract[c] == -1) { + SpielFatalError( + absl::StrCat("Abstraction not found for concrete hand '", hand, "'")); + } + } + return concrete_to_abstract; +} + +// Returns an abstraction. +int ChanceOutcomeToHandAbstraction(int outcome) { + static std::vector concrete_to_abstract = ConcreteToAbstract(); + return concrete_to_abstract[outcome]; +} + +// Abstract hand string. +std::string ChanceOutcomeToHandAbstractionString(int outcome) { + return kAbstraction[ChanceOutcomeToHandAbstraction(outcome)]; +} + +// Facts about the game +const GameType kGameType2p{ + /*short_name=*/"tiny_bridge_2p", + /*long_name=*/"Tiny Bridge (Uncontested)", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kIdentical, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + {"abstracted", + GameParameter(GameParameter::Type::kBool, /*is_mandatory=*/false)}, + }}; + +const GameType kGameType4p{ + /*short_name=*/"tiny_bridge_4p", + /*long_name=*/"Tiny Bridge (Contested)", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/4, + /*min_num_players=*/4, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +// Game for the play of the cards. We don't register this - it is for internal +// use only, computing the payoff of a tiny bridge auction. +const GameType kGameTypePlay{ + /*short_name=*/"tiny_bridge_play", + /*long_name=*/"Tiny Bridge (Play Phase)", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/false, + /*provides_observation_tensor=*/false, + /*parameter_specification=*/ + { + {"trumps", + GameParameter(GameParameter::Type::kString, /*is_mandatory=*/true)}, + {"leader", + GameParameter(GameParameter::Type::kString, /*is_mandatory=*/true)}, + {"hand_W", + GameParameter(GameParameter::Type::kString, /*is_mandatory=*/true)}, + {"hand_N", + GameParameter(GameParameter::Type::kString, /*is_mandatory=*/true)}, + {"hand_E", + GameParameter(GameParameter::Type::kString, /*is_mandatory=*/true)}, + {"hand_S", + GameParameter(GameParameter::Type::kString, /*is_mandatory=*/true)}, + }}; + +std::shared_ptr Factory2p(const GameParameters& params) { + return std::shared_ptr(new TinyBridgeGame2p(params)); +} + +std::shared_ptr Factory4p(const GameParameters& params) { + return std::shared_ptr(new TinyBridgeGame4p(params)); +} + +REGISTER_SPIEL_GAME(kGameType2p, Factory2p); +REGISTER_SPIEL_GAME(kGameType4p, Factory4p); + +// Score a played-out hand. +int Score(int contract, int tricks, bool doubled, bool redoubled, int trumps) { + // -20 per undertrick + // +10 for 1H/S/NT (+10 extra if overtrick) + // +30 for 2H/S + // +35 for 2NT + const int contract_tricks = 1 + (contract - 1) / 3; + const int contract_result = tricks - contract_tricks; + const int double_factor = (1 + doubled) * (1 + redoubled); + if (contract_result < 0) return 20 * double_factor * contract_result; + int score = tricks * 10; + if (contract_tricks == 2) score += 10; + if (contract_tricks == 2 && trumps == 2) score += 5; + return score * double_factor; +} + +} // namespace + +std::string HandString(Action outcome) { + auto cards = ChanceOutcomeToCards(outcome); + return absl::StrCat(CardString(cards.first), CardString(cards.second)); +} + +std::string SeatString(Seat seat) { return std::string(1, kSeatChar[seat]); } + +TinyBridgeGame2p::TinyBridgeGame2p(const GameParameters& params) + : Game(kGameType2p, params), + is_abstracted_(ParameterValue("abstracted", false)) {} + +std::unique_ptr TinyBridgeGame2p::NewInitialState() const { + return std::unique_ptr( + new TinyBridgeAuctionState(shared_from_this(), is_abstracted_)); +} + +TinyBridgeGame4p::TinyBridgeGame4p(const GameParameters& params) + : Game(kGameType4p, params) {} + +std::unique_ptr TinyBridgeGame4p::NewInitialState() const { + return std::unique_ptr( + new TinyBridgeAuctionState(shared_from_this(), /*is_abstracted=*/false)); +} + +TinyBridgePlayGame::TinyBridgePlayGame(const GameParameters& params) + : Game(kGameTypePlay, params) {} + +std::unique_ptr TinyBridgePlayGame::NewInitialState() const { + int trumps = CharToTrumps(ParameterValue("trumps")[0]); + Seat leader = CharToSeat(ParameterValue("leader")[0]); + std::array holder; + for (Seat i : {kWest, kNorth, kEast, kSouth}) { + std::string hand = ParameterValue( + absl::StrCat("hand_", std::string(1, kSeatChar[i]))); + for (int j = 0; j < kNumTricks; ++j) { + int c = StringToCard(hand.substr(j * 2, 2)); + holder[c] = i; + } + } + return std::unique_ptr( + new TinyBridgePlayState(shared_from_this(), trumps, leader, holder)); +} + +Seat TinyBridgeAuctionState::PlayerToSeat(Player player) const { + return num_players_ == 2 ? Seat(player * 2) : Seat(player); +} + +Player TinyBridgeAuctionState::SeatToPlayer(Seat seat) const { + return num_players_ == 2 ? static_cast(seat) / 2 + : static_cast(seat); +} + +std::string TinyBridgeAuctionState::PlayerHandString(Player player, + bool abstracted) const { + if (!IsDealt(player)) return "??"; + return abstracted ? ChanceOutcomeToHandAbstractionString(actions_[player]) + : HandString(actions_[player]); +} + +std::string TinyBridgeAuctionState::DealString() const { + std::string deal; + for (auto player = Player{0}; player < num_players_; ++player) { + if (player != 0) deal.push_back(' '); + absl::StrAppend(&deal, SeatString(PlayerToSeat(player)), ":", + PlayerHandString(player, /*abstracted=*/false)); + } + return deal; +} + +TinyBridgeAuctionState::AuctionState TinyBridgeAuctionState::AnalyzeAuction() + const { + AuctionState rv; + rv.last_bid = Call::kPass; + rv.last_bidder = kInvalidSeat; + rv.doubler = kInvalidSeat; + rv.redoubler = kInvalidSeat; + for (int i = num_players_; i < actions_.size(); ++i) { + if (actions_[i] == Call::kDouble) { + rv.doubler = PlayerToSeat(i % num_players_); + } else if (actions_[i] == Call::kRedouble) { + rv.redoubler = PlayerToSeat(i % num_players_); + } else if (actions_[i] != Call::kPass) { + rv.last_bid = actions_[i]; + rv.last_bidder = PlayerToSeat(i % num_players_); + rv.doubler = kInvalidSeat; + rv.redoubler = kInvalidSeat; + } + } + return rv; +} + +int Score_p0(std::array holder, + const TinyBridgeAuctionState::AuctionState& state) { + if (state.last_bid == Call::kPass) return 0; + std::shared_ptr game(new TinyBridgePlayGame({})); + int trumps = (state.last_bid - 1) % 3; + Seat leader = Seat((state.last_bidder + 3) % 4); + Seat decl = Seat(state.last_bidder % 2); + TinyBridgePlayState play{game, trumps, leader, holder}; + const double tricks = + algorithms::AlphaBetaSearch(*game, &play, nullptr, -1, decl).first; + SPIEL_CHECK_GE(tricks, 0); + SPIEL_CHECK_LE(tricks, kNumTricks); + const int declarer_score = + Score(state.last_bid, tricks, state.doubler != kInvalidSeat, + state.redoubler != kInvalidSeat, trumps); + return (decl == 0) ? declarer_score : -declarer_score; +} + +// Score indexed by [WestHand][EastHand][Contract][LastBidder] +using ScoringTable = std::array< + std::array, kNumActions2p>, kNumPrivates>, + kNumPrivates>; + +// Calculates a single score. +double Score_2p_(Action hand0, Action hand1, + const TinyBridgeAuctionState::AuctionState& state) { + if (state.last_bid == kPass) return 0; + const double freq = 1. / 6; + double utility_p0 = 0; + std::array holders_2p; + std::fill(holders_2p.begin(), holders_2p.end(), kInvalidSeat); + const auto cards0 = ChanceOutcomeToCards(hand0); + holders_2p[cards0.first] = kWest; + holders_2p[cards0.second] = kWest; + const auto cards1 = ChanceOutcomeToCards(hand1); + holders_2p[cards1.first] = kEast; + holders_2p[cards1.second] = kEast; + std::array holders_4p; + for (int n0 = 0; n0 < 3; ++n0) { + for (int n1 = n0 + 1; n1 < 4; ++n1) { + int n = 0; + for (int i = 0; i < kDeckSize; ++i) { + if (holders_2p[i] == kInvalidSeat) { + holders_4p[i] = (n == n0 || n == n1) ? kNorth : kSouth; + ++n; + } else { + holders_4p[i] = holders_2p[i]; + } + } + utility_p0 += Score_p0(holders_4p, state) * freq; + } + } + return utility_p0; +} + +// Returns a cache of scores. +ScoringTable MakeScores() { + ScoringTable scores; + for (int hand0 = 0; hand0 < kNumPrivates; ++hand0) { + for (int hand1 = 0; hand1 < kNumPrivates; ++hand1) { + if (!IsConsistent(hand0, hand1)) continue; + for (int contract = k1H; contract < kNumActions2p; ++contract) { + for (Seat last_bidder : {kWest, kEast}) { + scores[hand0][hand1][contract][last_bidder / 2] = + Score_2p_(hand0, hand1, + {contract, last_bidder, kInvalidSeat, kInvalidSeat}); + } + } + } + } + return scores; +} + +double Score_2p(Action hand0, Action hand1, + const TinyBridgeAuctionState::AuctionState& state) { + if (state.last_bid == kPass) return 0; + static const ScoringTable scoring_table = MakeScores(); + const double score = + scoring_table[hand0][hand1][state.last_bid][state.last_bidder / 2]; + return score; +} + +std::array TinyBridgeAuctionState::CardHolders() const { + std::array holder; + std::fill(holder.begin(), holder.end(), kInvalidSeat); + for (int i = 0; i < actions_.size() && i < num_players_; ++i) { + int action_id = actions_[i]; + const auto cards = ChanceOutcomeToCards(action_id); + holder[cards.first] = Seat(i); + holder[cards.second] = Seat(i); + } + return holder; +} + +void TinyBridgeAuctionState::DoApplyAction(Action action) { + actions_.push_back(action); + if (num_players_ == 2) { + if (actions_.size() >= 2 * num_players_ && actions_.back() == Call::kPass) { + is_terminal_ = true; + utility_p0 = Score_2p(actions_[0], actions_[1], AnalyzeAuction()); + } + } else { + if (actions_.size() >= 2 * num_players_ && + actions_[actions_.size() - 1] == Call::kPass && + actions_[actions_.size() - 2] == Call::kPass && + actions_[actions_.size() - 3] == Call::kPass) { + is_terminal_ = true; + utility_p0 = Score_p0(CardHolders(), AnalyzeAuction()); + } + } +} + +std::vector TinyBridgeAuctionState::LegalActions() const { + std::vector actions; + if (IsChanceNode()) { + return LegalChanceOutcomes(); + } else if (IsTerminal()) { + return {}; + } else { + auto state = AnalyzeAuction(); + actions.push_back(Call::kPass); + for (int bid = state.last_bid + 1; bid <= Call::k2NT; ++bid) { + actions.push_back(bid); + } + if (num_players_ == 4 && state.last_bidder != kInvalidSeat) { + if (state.last_bidder % 2 != CurrentPlayer() % 2) { + if (state.doubler == kInvalidSeat) actions.push_back(Call::kDouble); + } else { + if (state.doubler != kInvalidSeat && state.redoubler == kInvalidSeat) + actions.push_back(Call::kRedouble); + } + } + } + return actions; +} + +std::vector> TinyBridgeAuctionState::ChanceOutcomes() + const { + std::vector actions; + auto holder = CardHolders(); + for (int card1 = 0; card1 < kDeckSize; ++card1) { + if (holder[card1] != kInvalidSeat) continue; + for (int card2 = card1 + 1; card2 < kDeckSize; ++card2) { + if (holder[card2] != kInvalidSeat) continue; + actions.push_back(CardsToChanceOutcome(card2, card1)); + } + } + const int num_actions = actions.size(); + std::vector> outcomes; + outcomes.reserve(num_actions); + for (auto action : actions) { + outcomes.emplace_back(action, 1.0 / num_actions); + } + return outcomes; +} + +std::string TinyBridgeAuctionState::ActionToString(Player player, + Action action_id) const { + if (player == kChancePlayerId) { + return HandString(action_id); + } else { + return kActionStr[action_id]; + } +} + +int TinyBridgeAuctionState::CurrentPlayer() const { + if (IsTerminal()) return kTerminalPlayerId; + return actions_.size() < num_players_ ? kChancePlayerId + : actions_.size() % num_players_; +} + +std::string TinyBridgeAuctionState::AuctionString() const { + std::string auction{}; + for (int i = num_players_; i < actions_.size(); ++i) { + if (!auction.empty()) auction.push_back('-'); + auction.append(ActionToString(i % num_players_, actions_[i])); + } + return auction; +} + +std::string TinyBridgeAuctionState::ToString() const { + std::string deal = DealString(); + std::string auction = AuctionString(); + if (!auction.empty()) + return absl::StrCat(deal, " ", auction); + else + return deal; +} + +bool TinyBridgeAuctionState::IsTerminal() const { return is_terminal_; } + +std::vector TinyBridgeAuctionState::Returns() const { + if (!IsTerminal()) { + return std::vector(num_players_, 0.0); + } + + if (num_players_ == 2) { + return {utility_p0, utility_p0}; + } else { + // 4 player version. + return {utility_p0, -utility_p0, utility_p0, -utility_p0}; + } +} + +std::string TinyBridgeAuctionState::InformationStateString( + Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::string hand = PlayerHandString(player, is_abstracted_); + std::string dealer = RelativeSeatString(Seat::kWest, PlayerToSeat(player)); + std::string auction = AuctionString(); + if (!auction.empty()) + return absl::StrCat(hand, " ", dealer, " ", auction); + else + return hand; +} + +// Observation string is the player's cards plus the most recent bid, +// plus any doubles or redoubles. E.g. "HJSA 2NT:Us Dbl:RH RDbl:Pd" +// This is an observation for a player who holds HJ and SA. +// and redoubled by West. +// The most recent bid is 2NT by this player, which has been doubled by their +// right-hand-opponent and redoubled by their partner. So the most recent few +// calls must be: 2NT-Pass-Pass-Dbl-Pass-Pass-RDbl-Pass. +std::string TinyBridgeAuctionState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::string observation = PlayerHandString(player, is_abstracted_); + if (HasAuctionStarted()) { + auto state = AnalyzeAuction(); + if (state.last_bid != Call::kPass) { + absl::StrAppend( + &observation, " ", ActionToString(state.last_bidder, state.last_bid), + ":", RelativeSeatString(state.last_bidder, PlayerToSeat(player))); + } + if (state.doubler != kInvalidSeat) + absl::StrAppend(&observation, " ", "Dbl:", + RelativeSeatString(state.doubler, PlayerToSeat(player))); + if (state.redoubler != kInvalidSeat) + absl::StrAppend( + &observation, " ", + "RDbl:", RelativeSeatString(state.redoubler, PlayerToSeat(player))); + } + return observation; +} + +// Information state vector consists of: +// kNumCards bits showing which cards the observing player holds +// For 2p, kNumActions2p*2 bits showing which actions have been taken: +// For each action, the bits are [1, 0] if we took the action, +// [0, 1] if our partner took the action, and otherwise [0, 0]. +// For 4p, (kNumBids*3+1)*num_players bits showing which actions have been +// taken: +// For each player, 1 bit showing if they passed before the first bid +// For each bid, 4 bits showing who made it, 4 bits showing who doubled it, +// and 4 bits showing who redoubled it. +// Each set of 4 bits is relative the the current player. +void TinyBridgeAuctionState::InformationStateTensor( + Player player, absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + const int hand_size = is_abstracted_ ? kNumAbstractHands : kDeckSize; + const int auction_size = (num_players_ == 2) + ? kNumActions2p * num_players_ + : num_players_ + kNumBids * num_players_ * 3; + std::fill(values.begin(), values.end(), 0); + SPIEL_CHECK_EQ(values.size(), hand_size + auction_size); + if (IsDealt(player)) { + if (is_abstracted_) { + const int abstraction = ChanceOutcomeToHandAbstraction(actions_[player]); + values.at(abstraction) = 1; + } else { + const auto cards = ChanceOutcomeToCards(actions_[player]); + values.at(cards.first) = 1; + values.at(cards.second) = 1; + } + } + if (num_players_ == 2) { + for (int i = num_players_; i < actions_.size(); ++i) { + values.at(hand_size + actions_[i] * 2 + (i - player) % num_players_) = 1; + } + } else { + auto last_bid = Call::kPass; + auto observer = PlayerToSeat(player); + for (int i = num_players_; i < actions_.size(); ++i) { + int bidder = RelativeSeatIndex(Seat(i % num_players_), observer); + if (actions_[i] == Call::kPass) { + if (last_bid == Call::kPass) { + values.at(hand_size + bidder) = 1; + } + } else if (actions_[i] == Call::kDouble) { + values.at(hand_size + num_players_ + + (last_bid - 1) * (3 * num_players_) + bidder) = 1; + } else if (actions_[i] == Call::kRedouble) { + values.at(hand_size + num_players_ + + (last_bid - 1) * (3 * num_players_) + num_players_ + bidder) = + 1; + } else { + last_bid = Call(actions_[i]); + values.at(hand_size + num_players_ + + (last_bid - 1) * (3 * num_players_) + num_players_ * 2 + + bidder) = 1; + } + } + } +} + +// Information state vector consists of: +// kNumCards bits showing which cards the observing player holds +// For 2p: +// kNumActions2p bits showing the most recent action (one-hot) +// For 4p: +// kNumBids bits showing the most recent bid (one-hot) +// 4 bits showing who bid it (relative to the observing player) +// 4 bits showing who doubled it (relative to the observing player) +// 4 bits showing who redoubled it (relative to the observing player) +// 4 bits for the dealer +void TinyBridgeAuctionState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + const int hand_size = is_abstracted_ ? kNumAbstractHands : kDeckSize; + const int auction_size = + num_players_ == 2 ? kNumActions2p : kNumBids + 4 * num_players_; + std::fill(values.begin(), values.end(), 0); + SPIEL_CHECK_EQ(values.size(), hand_size + auction_size); + if (IsDealt(player)) { + if (is_abstracted_) { + const int abstraction = ChanceOutcomeToHandAbstraction(actions_[player]); + values.at(abstraction) = 1; + } else { + const auto cards = ChanceOutcomeToCards(actions_[player]); + values.at(cards.first) = 1; + values.at(cards.second) = 1; + } + } + if (num_players_ == 2) { + if (HasAuctionStarted()) { + values.at(hand_size + actions_.back()) = 1; + } + } else { + auto state = AnalyzeAuction(); + auto seat = PlayerToSeat(player); + if (state.last_bidder != kInvalidSeat) + values.at(hand_size + RelativeSeatIndex(state.last_bidder, seat)) = 1; + if (state.doubler != kInvalidSeat) + values.at(hand_size + kNumSeats + + RelativeSeatIndex(state.doubler, seat)) = 1; + if (state.redoubler != kInvalidSeat) + values.at(hand_size + kNumSeats * 2 + + RelativeSeatIndex(state.redoubler, seat)) = 1; + values.at(hand_size + kNumSeats * 3 + + RelativeSeatIndex(Seat::kWest, seat)) = 1; + if (state.last_bidder != kInvalidSeat) + values.at(hand_size + kNumSeats * 4 + state.last_bid - 1) = 1; + } +} + +std::unique_ptr TinyBridgeAuctionState::Clone() const { + return std::unique_ptr{new TinyBridgeAuctionState(*this)}; +} + +void TinyBridgeAuctionState::UndoAction(Player player, Action action) { + actions_.pop_back(); + history_.pop_back(); + --move_number_; + is_terminal_ = false; +} + +void TinyBridgePlayState::DoApplyAction(Action action) { + actions_.emplace_back(CurrentHand(), action); + if (actions_.size() % 4 == 0) { + Seat win_hand = actions_[actions_.size() - 4].first; + int win_card = actions_[actions_.size() - 4].second; + for (int i = actions_.size() - 3; i < actions_.size(); ++i) { + Seat hand = actions_[i].first; + int card = actions_[i].second; + if (Suit(card) == Suit(win_card)) { + if (Rank(card) > Rank(win_card)) { + win_card = card; + win_hand = hand; + } + } else if (Suit(card) == trumps_) { + win_card = card; + win_hand = hand; + } + } + winner_[actions_.size() / 4 - 1] = win_hand; + } +} + +std::vector TinyBridgePlayState::LegalActions() const { + std::vector actions; + const int hand = CurrentHand(); + for (int i = 0; i < kDeckSize; ++i) { + if (holder_[i] == hand && + (actions_.size() < 4 || + actions_[(4 + hand - leader_) % 4].second != i)) { + actions.push_back(i); + } + } + // Have to follow suit if we have two cards of different suits. + if (!actions_.empty() && actions.size() == 2 && + Suit(actions[0]) != Suit(actions[1])) { + return {Suit(actions[0]) == Suit(actions_[0].second) ? actions[0] + : actions[1]}; + } else { + return actions; + } +} + +Seat TinyBridgePlayState::CurrentHand() const { + return Seat(((actions_.size() < 4 ? leader_ : winner_[0]) + actions_.size()) % + 4); +} + +std::string TinyBridgePlayState::ActionToString(Player player, + Action action_id) const { + return CardString(action_id); +} + +bool TinyBridgePlayState::IsTerminal() const { + return actions_.size() == kDeckSize; +} + +std::vector TinyBridgePlayState::Returns() const { + if (!IsTerminal()) { + return std::vector(num_players_, 0.0); + } + + std::vector returns(num_players_); + for (const int winner : winner_) { + returns[winner & 1] += 1.0; + } + return returns; +} + +std::unique_ptr TinyBridgePlayState::Clone() const { + return std::unique_ptr{new TinyBridgePlayState(*this)}; +} + +void TinyBridgePlayState::UndoAction(Player player, Action action) { + actions_.pop_back(); + history_.pop_back(); + --move_number_; +} + +std::string TinyBridgePlayState::ToString() const { + std::array hands; + for (int i = 0; i < kDeckSize; ++i) { + hands[holder_[i]].append(CardString(i)); + } + std::string s; + for (int i = 0; i < kNumSeats; ++i) { + if (i > 0) s.push_back(' '); + s.append(absl::StrCat(std::string(1, kSeatChar[i]), ":", hands[i])); + } + s.append(absl::StrCat(" Trumps: ", std::string(1, kSuitChar[trumps_]), + " Leader:", std::string(1, kSeatChar[leader_]))); + for (const auto& action : actions_) { + s.append(absl::StrCat(" ", std::string(1, kSeatChar[action.first]), ":", + CardString(action.second))); + } + return s; +} + +bool IsConsistent(Action hand0, Action hand1) { + auto cards0 = ChanceOutcomeToCards(hand0); + auto cards1 = ChanceOutcomeToCards(hand1); + return cards0.first != cards1.first && cards0.second != cards1.second && + cards0.first != cards1.second && cards0.second != cards1.first; +} + +} // namespace tiny_bridge +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/tiny_bridge/tiny_bridge.h b/scenarios/bargaining/open_spiel/open_spiel/games/tiny_bridge/tiny_bridge.h new file mode 100644 index 0000000..5d6502d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/tiny_bridge/tiny_bridge.h @@ -0,0 +1,248 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_TINY_BRIDGE_H_ +#define OPEN_SPIEL_GAMES_TINY_BRIDGE_H_ + +#include +#include + +#include "open_spiel/spiel.h" + +// A very small version of bridge, with 8 cards in total, created by Edward +// Lockhart, inspired by a research project at University of Alberta by Michael +// Bowling, Kate Davison, and Nathan Sturtevant. For the mechanics of the full +// game, see https://en.wikipedia.org/wiki/Contract_bridge. +// +// This smaller game has two suits (hearts and spades), each with +// four cards (Jack, Queen, King, Ace). Each of the four players gets +// two cards each. +// +// The game comprises a bidding phase, in which the players bid for the +// right to choose the trump suit (or for there not to be a trump suit), and +// perhaps also to bid a 'slam' contract which scores bonus points. +// +// The play phase is not very interesting with only two tricks being played. +// For simplicity, we replace it with a perfect-information result, which is +// computed using minimax on a two-player perfect-information game representing +// the play phase. +// +// The game comes in two varieties - the full four-player version, and a +// simplified two-player version in which one partnership does not make +// any bids in the auction phase. +// +// Scoring is as follows, for the declaring partnership: +// +10 for making 1H/S/NT (+10 extra if overtrick) +// +30 for making 2H/S +// +35 for making 2NT +// -20 per undertrick +// Doubling (only in the 4p game) multiplies all scores by 2. Redoubling by a +// further factor of 2. +// +// An abstracted version of the game is supported, where the 28 possible hands +// are grouped into 12 buckets, using the following abstractions: +// - When holding only one card in a suit, we consider J/Q/K equivalent +// - We consider KQ and KJ in a single suit equivalent +// - We consider AK and AQ in a single suit equivalent (but not AJ) + +namespace open_spiel { +namespace tiny_bridge { + +inline constexpr int kNumBids = 6; // 1H, 1S, 1NT, 2H, 2S, 2NT +inline constexpr int kNumActions2p = 1 + kNumBids; // Plus Pass +inline constexpr int kNumActions4p = 3 + kNumBids; // Pass, Double, Redouble +enum Call { kPass = 0, k1H, k1S, k1NT, k2H, k2S, k2NT, kDouble, kRedouble }; +inline constexpr int kNumRanks = 4; +inline constexpr int kNumSuits = 2; +inline constexpr int kDeckSize = kNumRanks * kNumSuits; +inline constexpr int kNumSeats = 4; +inline constexpr int kNumTricks = kDeckSize / kNumSeats; +inline constexpr int kNumAbstractHands = 12; + +// Number of possible private states (hands) for a single player. +inline constexpr int kNumPrivates = (kDeckSize * (kDeckSize - 1)) / 2; +inline constexpr std::array kActionStr{ + "Pass", "1H", "1S", "1NT", "2H", "2S", "2NT", "Dbl", "RDbl"}; +enum Seat { kInvalidSeat = -1, kWest = 0, kNorth = 1, kEast = 2, kSouth = 3 }; + +// Two-player game. Only one partnership gets to bid, so this +// is a purely-cooperative two-player game. +class TinyBridgeGame2p : public Game { + public: + explicit TinyBridgeGame2p(const GameParameters& params); + int NumDistinctActions() const override { return kNumActions2p; } + std::unique_ptr NewInitialState() const override; + int NumPlayers() const override { return 2; } + double MinUtility() const override { return -40; } // Bid 2NT, 0 tricks + double MaxUtility() const override { return 35; } // Bid 2NT, 2 tricks + int MaxGameLength() const override { return 8; } + // TODO: verify whether this bound is tight and/or tighten it. + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + int MaxChanceOutcomes() const override { return kNumPrivates; } + std::vector InformationStateTensorShape() const override { + return {(is_abstracted_ ? kNumAbstractHands : kDeckSize) + + kNumActions2p * 2}; + } + std::vector ObservationTensorShape() const override { + return {(is_abstracted_ ? kNumAbstractHands : kDeckSize) + kNumActions2p}; + } + + private: + const bool is_abstracted_; +}; + +// Four-player game. This is a zero-sum game of two partnerships. +class TinyBridgeGame4p : public Game { + public: + explicit TinyBridgeGame4p(const GameParameters& params); + int NumDistinctActions() const override { return kNumActions4p; } + std::unique_ptr NewInitialState() const override; + int NumPlayers() const override { return 4; } + double MinUtility() const override { return -160; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 160; } + int MaxGameLength() const override { return 57; } + // TODO: verify whether this bound is tight and/or tighten it. + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + int MaxChanceOutcomes() const override { return kNumPrivates; } + std::vector InformationStateTensorShape() const override { + return {kDeckSize + (kNumBids * 3 + 1) * NumPlayers()}; + } + std::vector ObservationTensorShape() const override { + return {kDeckSize + kNumBids + 4 * NumPlayers()}; + } +}; + +// Play phase as a 2-player perfect-information game. +class TinyBridgePlayGame : public Game { + public: + explicit TinyBridgePlayGame(const GameParameters& params); + int NumDistinctActions() const override { return kDeckSize; } + std::unique_ptr NewInitialState() const override; + int NumPlayers() const override { return 2; } + double MinUtility() const override { return 0; } + double MaxUtility() const override { return kNumTricks; } + int MaxGameLength() const override { return 8; } +}; + +// State of an in-progress auction, either 2p or 4p. +class TinyBridgeAuctionState : public State { + public: + struct AuctionState { + Action last_bid; + Seat last_bidder; + Seat doubler; + Seat redoubler; + }; + + TinyBridgeAuctionState(std::shared_ptr game, bool is_abstracted) + : State(std::move(game)), is_abstracted_(is_abstracted) {} + TinyBridgeAuctionState(const TinyBridgeAuctionState&) = default; + + Player CurrentPlayer() const override; + std::vector LegalActions() const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action action) override; + std::vector> ChanceOutcomes() const override; + std::string AuctionString() const; + std::string PlayerHandString(Player player, bool abstracted) const; + std::string DealString() const; + + protected: + void DoApplyAction(Action action) override; + + private: + bool is_terminal_ = false; + double utility_p0; + std::vector actions_; + bool is_abstracted_; + + bool IsDealt(Player player) const { return actions_.size() > player; } + bool HasAuctionStarted() const { return actions_.size() > num_players_; } + AuctionState AnalyzeAuction() const; + std::array CardHolders() const; + Seat PlayerToSeat(Player player) const; + Player SeatToPlayer(Seat seat) const; +}; + +// State of in-progress play. +class TinyBridgePlayState : public State { + public: + TinyBridgePlayState(std::shared_ptr game, int trumps, Seat leader, + std::array holder) + : State(std::move(game)), + trumps_(trumps), + leader_(leader), + holder_(holder) {} + TinyBridgePlayState(const TinyBridgePlayState&) = default; + + Player CurrentPlayer() const override { return CurrentHand() % 2; } + Seat CurrentHand() const; + + std::string ActionToString(Player player, Action action_id) const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action action) override; + std::string ToString() const override; + std::vector LegalActions() const override; + + protected: + void DoApplyAction(Action action) override; + + private: + int trumps_; // The trump suit (or notrumps) + Seat leader_; // The hand who plays first to the first trick. + std::array holder_; // hand of the holder of each card + std::array winner_; // hand of the winner of each trick + std::vector> actions_; // (hand, card) +}; + +// String representation for the specified hand. +std::string HandString(Action outcome); + +// String representation for the specified seat. +std::string SeatString(Seat seat); + +// True if player 0 having private state hand0 is consistent with player 1 +// having private state hand1, i.e. the two hands have no cards in common. +bool IsConsistent(Action hand0, Action hand1); + +// The score for player 0 of the specified contract. +int Score_p0(std::array holder, + const TinyBridgeAuctionState::AuctionState& state); + +// For the two-player (purely cooperative) case, the expected score for +// declaring side in the specified contract. Uses a cache of values. +double Score_2p(Action hand0, Action hand1, + const TinyBridgeAuctionState::AuctionState& state); + +// Non-caching version of `Score_2p`. +double Score_2p_(Action hand0, Action hand1, + const TinyBridgeAuctionState::AuctionState& state); + +} // namespace tiny_bridge +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_TINY_BRIDGE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/tiny_bridge/tiny_bridge_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/tiny_bridge/tiny_bridge_test.cc new file mode 100644 index 0000000..b478e14 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/tiny_bridge/tiny_bridge_test.cc @@ -0,0 +1,66 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/tiny_bridge/tiny_bridge.h" + +#include "open_spiel/algorithms/get_all_states.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace tiny_bridge { +namespace { + +namespace testing = open_spiel::testing; + +void BasicTinyBridge2pTests() { + testing::LoadGameTest("tiny_bridge_2p"); + testing::ChanceOutcomesTest(*LoadGame("tiny_bridge_2p")); + testing::CheckChanceOutcomes(*LoadGame("tiny_bridge_2p")); + testing::RandomSimTest(*LoadGame("tiny_bridge_2p"), 100); +} + +void BasicTinyBridge4pTests() { + testing::LoadGameTest("tiny_bridge_4p"); + testing::ChanceOutcomesTest(*LoadGame("tiny_bridge_4p")); + testing::RandomSimTest(*LoadGame("tiny_bridge_4p"), 100); + testing::RandomSimTestWithUndo(*LoadGame("tiny_bridge_4p"), 1); +} + +void CountStates2p() { + std::shared_ptr game = LoadGame("tiny_bridge_2p"); + auto states = + open_spiel::algorithms::GetAllStates(*game, /*depth_limit=*/-1, + /*include_terminals=*/true, + /*include_chance_states=*/false); + // Chance nodes are not counted. + // For each of 420 deals: + // 64 combinations of bids + // *2 for initial pass + // *2 for terminal pass + // -1 for double-counting the auction with a single 'Pass' + // => 420 * (64 * 4 - 1) = 107100 states + SPIEL_CHECK_EQ(states.size(), 107100); +} + +} // namespace +} // namespace tiny_bridge +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::tiny_bridge::BasicTinyBridge2pTests(); + open_spiel::tiny_bridge::BasicTinyBridge4pTests(); + open_spiel::tiny_bridge::CountStates2p(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/tiny_hanabi/tiny_hanabi.cc b/scenarios/bargaining/open_spiel/open_spiel/games/tiny_hanabi/tiny_hanabi.cc new file mode 100644 index 0000000..ad79d8d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/tiny_hanabi/tiny_hanabi.cc @@ -0,0 +1,205 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/tiny_hanabi/tiny_hanabi.h" + +#include + +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace tiny_hanabi { + +namespace { + +// This is the payoff matrix from the bayesian Action Decoder paper. +constexpr char kDefaultPayoffString[] = + // Cards: 1, 1 + "10;0;0;4;8;4;10;0;0;" + // Cards: 1, 2 + "0;0;10;4;8;4;0;0;10;" + // Cards: 2, 1 + "0;0;10;4;8;4;0;0;0;" + // Cards: 2, 2 + "10;0;0;4;8;4;10;0;0"; + +std::vector ParsePayoffString(const std::string& str) { + std::vector pieces = absl::StrSplit(str, ';'); + std::vector payoff; + for (const auto& piece : pieces) { + int val; + if (!absl::SimpleAtoi(piece, &val)) { + SpielFatalError(absl::StrCat("Could not parse piece '", piece, + "' of payoff string '", str, + "' as an integer")); + } + payoff.push_back(val); + } + return payoff; +} + +// Facts about the game +const GameType kGameType{ + /*short_name=*/"tiny_hanabi", + /*long_name=*/"Tiny Hanabi", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kIdentical, + GameType::RewardModel::kTerminal, + /*max_num_players=*/10, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + {"num_players", GameParameter(2)}, + {"num_chance", GameParameter(2)}, + {"num_actions", GameParameter(3)}, + {"payoff", GameParameter(std::string(kDefaultPayoffString))}, + }}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new TinyHanabiGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +} // namespace + +std::unique_ptr TinyHanabiGame::NewInitialState() const { + return std::unique_ptr( + new TinyHanabiState(shared_from_this(), payoff_)); +} + +TinyHanabiGame::TinyHanabiGame(const GameParameters& params) + : Game(kGameType, params), + payoff_(ParameterValue("num_players"), + ParameterValue("num_chance"), + ParameterValue("num_actions"), + ParsePayoffString(ParameterValue("payoff"))) {} + +Player TinyHanabiState::CurrentPlayer() const { + const int history_size = history_.size(); + if (history_size < num_players_) return kChancePlayerId; + if (history_size == 2 * num_players_) return kTerminalPlayerId; + return history_size - num_players_; +} + +std::string TinyHanabiState::ActionToString(Player player, + Action action) const { + if (player == kChancePlayerId) + return absl::StrCat("d", action); + else + return absl::StrCat("p", player, "a", action); +} + +std::vector> TinyHanabiState::ChanceOutcomes() const { + if (!IsChanceNode()) return {}; + std::vector> outcomes; + const int num_outcomes = payoff_.NumChance(); + const double p = 1.0 / num_outcomes; + outcomes.reserve(num_outcomes); + for (int i = 0; i < num_outcomes; ++i) outcomes.emplace_back(i, p); + return outcomes; +} + +std::string TinyHanabiState::ToString() const { + std::string rv; + for (int i = 0; i < payoff_.NumPlayers() && i < history_.size(); ++i) { + if (i != 0) absl::StrAppend(&rv, " "); + absl::StrAppend(&rv, "p", i, ":d", history_[i].action); + } + for (int i = payoff_.NumPlayers(); i < history_.size(); ++i) { + absl::StrAppend(&rv, " p", history_[i].player, ":a", history_[i].action); + } + return rv; +} + +bool TinyHanabiState::IsTerminal() const { + return history_.size() == 2 * num_players_; +} + +std::vector TinyHanabiState::Returns() const { + const double value = IsTerminal() ? payoff_(history_) : 0.0; + return std::vector(payoff_.NumPlayers(), value); +} + +std::unique_ptr TinyHanabiState::Clone() const { + return std::unique_ptr(new TinyHanabiState(*this)); +} + +std::vector TinyHanabiState::LegalActions() const { + if (IsTerminal()) return {}; + std::vector actions(IsChanceNode() ? payoff_.NumChance() + : payoff_.NumActions()); + std::iota(actions.begin(), actions.end(), 0); + return actions; +} + +std::string TinyHanabiState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + std::string rv = absl::StrCat("p", player); + if (history_.size() > player) + absl::StrAppend(&rv, ":d", history_[player].action); + for (int i = payoff_.NumPlayers(); i < history_.size(); ++i) { + absl::StrAppend(&rv, " p", i - payoff_.NumPlayers(), ":a", + history_[i].action); + } + return rv; +} + +void TinyHanabiState::InformationStateTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + SPIEL_CHECK_EQ(values.size(), payoff_.NumChance() + payoff_.NumActions() * + payoff_.NumPlayers()); + std::fill(values.begin(), values.end(), 0); + if (history_.size() > player) values.at(history_[player].action) = 1; + for (int i = payoff_.NumPlayers(); i < history_.size(); ++i) { + values.at(payoff_.NumChance() + + (i - payoff_.NumPlayers()) * payoff_.NumActions() + + history_[i].action) = 1; + } +} + +void TinyHanabiState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + InformationStateTensor(player, values); +} + +std::string TinyHanabiState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + return InformationStateString(player); +} + +void TinyHanabiState::DoApplyAction(Action action) {} + +} // namespace tiny_hanabi +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/tiny_hanabi/tiny_hanabi.h b/scenarios/bargaining/open_spiel/open_spiel/games/tiny_hanabi/tiny_hanabi.h new file mode 100644 index 0000000..4bbd6a6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/tiny_hanabi/tiny_hanabi.h @@ -0,0 +1,134 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_TINY_HANABI_H_ +#define OPEN_SPIEL_GAMES_TINY_HANABI_H_ + +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/spiel.h" + +// This is the cooperative two-turn game defined in [1] +// +// Optimal score in this game is 10 (perfect cooperation). +// There is also a no-cooperation equilibrium scoring 8; some intermediate +// strategies are feasible also. +// +// Benchmark results: +// Bayesian Action Decoder 9.5 [1] +// Simplified Action Decoder 9.5 [2] +// Policy gradient / population-based training 9.0 [1] +// Independent Q learning 8.8 [2] +// +// Refs: +// [1] Bayesian Action Decoder, Foerster et al (2018) +// https://arxiv.org/abs/1811.01458 +// [2] Simplified Action Decoder, under review (2019) +// https://openreview.net/forum?id=B1xm3RVtwB + +namespace open_spiel { +namespace tiny_hanabi { + +class TinyHanabiPayoffMatrix { + public: + int operator()(const std::vector& history) const { + SPIEL_CHECK_EQ(num_players_ * 2, history.size()); + int idx = 0; + for (int i = 0; i < num_players_; ++i) + idx = (idx * num_chance_) + history[i].action; + for (int i = num_players_; i < 2 * num_players_; ++i) + idx = (idx * num_actions_) + history[i].action; + return payoff_[idx]; + } + TinyHanabiPayoffMatrix(int num_players, int num_chance, int num_actions, + std::vector payoff) + : num_players_(num_players), + num_chance_(num_chance), + num_actions_(num_actions), + payoff_(payoff) { + // Check payoff.size() == (num_chance * num_actions)**num_players + const int n = num_chance_ * num_actions_; + int expected_payoff_size = 1; + for (int i = 0; i < num_players; ++i) expected_payoff_size *= n; + SPIEL_CHECK_EQ(payoff_.size(), expected_payoff_size); + } + + int NumPlayers() const { return num_players_; } + int NumChance() const { return num_chance_; } + int NumActions() const { return num_actions_; } + int MinUtility() const { return *absl::c_min_element(payoff_); } + int MaxUtility() const { return *absl::c_max_element(payoff_); } + + private: + int num_players_; + int num_chance_; + int num_actions_; + std::vector payoff_; +}; + +class TinyHanabiGame : public Game { + public: + explicit TinyHanabiGame(const GameParameters& params); + int NumDistinctActions() const override { return payoff_.NumActions(); } + std::unique_ptr NewInitialState() const override; + int NumPlayers() const override { return payoff_.NumPlayers(); } + double MinUtility() const override { return payoff_.MinUtility(); } + double MaxUtility() const override { return payoff_.MaxUtility(); } + int MaxGameLength() const override { return payoff_.NumPlayers(); } + // TODO: verify whether this bound is tight and/or tighten it. + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + int MaxChanceOutcomes() const override { return payoff_.NumChance(); } + std::vector InformationStateTensorShape() const override { + return {payoff_.NumChance() + payoff_.NumActions() * payoff_.NumPlayers()}; + } + std::vector ObservationTensorShape() const override { + return InformationStateTensorShape(); + } + + private: + TinyHanabiPayoffMatrix payoff_; +}; + +class TinyHanabiState : public State { + public: + TinyHanabiState(const TinyHanabiState&) = default; + TinyHanabiState(std::shared_ptr game, + TinyHanabiPayoffMatrix payoff) + : State(game), payoff_(payoff) {} + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action action) const override; + std::vector> ChanceOutcomes() const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + std::string InformationStateString(Player player) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + + private: + void DoApplyAction(Action action) override; + TinyHanabiPayoffMatrix payoff_; +}; + +} // namespace tiny_hanabi +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_TINY_HANABI_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/tiny_hanabi/tiny_hanabi_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/tiny_hanabi/tiny_hanabi_test.cc new file mode 100644 index 0000000..f9eb5ee --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/tiny_hanabi/tiny_hanabi_test.cc @@ -0,0 +1,53 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/tiny_hanabi/tiny_hanabi.h" + +#include "open_spiel/algorithms/get_all_states.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace tiny_hanabi { +namespace { + +namespace testing = open_spiel::testing; + +void BasicTinyHanabiTests() { + testing::LoadGameTest("tiny_hanabi"); + testing::ChanceOutcomesTest(*LoadGame("tiny_hanabi")); + testing::CheckChanceOutcomes(*LoadGame("tiny_hanabi")); + testing::RandomSimTest(*LoadGame("tiny_hanabi"), 100); +} + +void CountStates() { + std::shared_ptr game = LoadGame("tiny_hanabi"); + auto states = + open_spiel::algorithms::GetAllStates(*game, /*depth_limit=*/-1, + /*include_terminals=*/true, + /*include_chance_states=*/false); + // 4 initial deals + // 13 action states (1 no action, 3 first-player-only, 3*3 both players) + SPIEL_CHECK_EQ(states.size(), 4 * 13); +} + +} // namespace +} // namespace tiny_hanabi +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::tiny_hanabi::BasicTinyHanabiTests(); + open_spiel::tiny_hanabi::CountStates(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/trade_comm/trade_comm.cc b/scenarios/bargaining/open_spiel/open_spiel/games/trade_comm/trade_comm.cc new file mode 100644 index 0000000..cbf86b9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/trade_comm/trade_comm.cc @@ -0,0 +1,351 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/trade_comm/trade_comm.h" + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace trade_comm { + +namespace { + +// Facts about the game +const GameType kGameType{/*short_name=*/"trade_comm", + /*long_name=*/"Trading and Communication", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"num_items", GameParameter(kDefaultNumItems)}}}; + +static std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new TradeCommGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +std::pair DecodeAllocation(Action chance_action, int num_items) { + return {chance_action / num_items, chance_action % num_items}; +} + +std::pair DecodeTrade(Action trade_action, int num_items) { + std::pair trade = {(trade_action - num_items) / num_items, + (trade_action - num_items) % num_items}; + return trade; +} +} // namespace + +std::string TradeCommState::ActionToString(Player player, + Action move_id) const { + if (player == kChancePlayerId) { + std::pair allocation = DecodeAllocation(move_id, num_items_); + return absl::StrCat("Allocate ", allocation.first, " ", allocation.second); + } else { + if (move_id < num_items_) { + return absl::StrCat("Utter ", move_id); + } else { + std::pair trade = DecodeTrade(move_id, num_items_); + return absl::StrCat("Trade ", trade.first, ":", trade.second); + } + } +} + +bool TradeCommState::IsTerminal() const { + return (phase_ == Phase::kTrade && trade_history_.size() == 2); +} + +std::vector TradeCommState::Returns() const { + if (!IsTerminal()) { + return {0.0, 0.0}; + } else { + // Check for a compatible trade. A compatible trade satisfies: + // - Agent X has item A, and offers A for B + // - Agent Y has item B, and offers B for A + std::pair trade0 = DecodeTrade(trade_history_[0], num_items_); + std::pair trade1 = DecodeTrade(trade_history_[1], num_items_); + if (items_[0] == trade0.first && items_[1] == trade1.first && + trade0.first == trade1.second && trade1.first == trade0.second) { + return {1.0, 1.0}; + } else { + return {0.0, 0.0}; + } + } +} + +std::string TradeCommState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + if (IsChanceNode()) { + return "ChanceNode -- no observation"; + } + + std::string str = ""; + + // Whose turn is it? + absl::StrAppend(&str, "Current turn: ", cur_player_, "\n"); + + // A player can see their own item. + absl::StrAppend(&str, "My item: ", items_[player], "\n"); + + // A player see all the utterances, in the right order: + absl::StrAppend(&str, "Phase: ", phase_ == Phase::kTrade ? "trade" : "comm"); + absl::StrAppend(&str, "\nComm history: "); + for (int comm : comm_history_) { + absl::StrAppend(&str, " ", comm); + } + absl::StrAppend(&str, "\n"); + + // Trade proposals are treated as simultaneous, so not included in the + // observation, but we do mark how many trade actions have happened to agents + // can work out what trading round they're on. + absl::StrAppend(&str, "Trade history size: ", trade_history_.size(), "\n"); + + // Players can see their own trades if they were made. + if (player < trade_history_.size()) { + absl::StrAppend(&str, "Observer's trade offer: "); + std::pair trade = DecodeTrade(trade_history_[player], num_items_); + absl::StrAppend(&str, " ", trade.first, ":", trade.second, "\n"); + } + + // Players can see the other trade offers after the round. + if (IsTerminal()) { + SPIEL_CHECK_LT(1 - player, trade_history_.size()); + absl::StrAppend(&str, "Other players's trade offer: "); + std::pair trade = + DecodeTrade(trade_history_[1 - player], num_items_); + absl::StrAppend(&str, " ", trade.first, ":", trade.second, "\n"); + } + + return str; +} + +std::string TradeCommState::InformationStateString(Player player) const { + // Currently the observation and information state are the same, since the + // game only contains one step of each phase. This may change in the + // multi-step game in the future. + return ObservationString(player); +} + +void TradeCommState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + SPIEL_CHECK_EQ(values.size(), game_->InformationStateTensorSize()); + std::fill(values.begin(), values.end(), 0); + + if (IsChanceNode()) { + // No observations at chance nodes. + return; + } + + SPIEL_CHECK_TRUE(player == 0 || player == 1); + + // 2 bits to indicate whose turn it is. + int offset = 0; + values[cur_player_] = 1; + offset += 2; + + // 1 bit to indicate whether it's terminal + values[offset] = IsTerminal() ? 1 : 0; + offset += 1; + + // Single bit for the phase: 0 = comm, 1 = trade. + values[offset] = (phase_ == Phase::kCommunication ? 0 : 1); + offset += 1; + + // one-hot vector for the item the observing player got + values[offset + items_[player]] = 1; + offset += num_items_; + + if (player < comm_history_.size()) { + // one-hot vector for the utterance the observing player made + values[offset + comm_history_[player]] = 1; + } + offset += num_items_; + + // one-hot vector for the utterance the observing player observed + if (1 - player < comm_history_.size()) { + values[offset + comm_history_[1 - player]] = 1; + } + offset += num_items_; + + // one-hot vector for the size of the trade history + values[offset + trade_history_.size()] = 1; + offset += 3; + + // one-hot vector for observing player's trade history if it has been made. + if (player < trade_history_.size()) { + const auto& trade = DecodeTrade(trade_history_[player], num_items_); + values[offset + trade.first] = 1; + values[offset + num_items_ + trade.second] = 1; + } + offset += 2 * num_items_; + + SPIEL_CHECK_EQ(offset, values.size()); +} + +void TradeCommState::InformationStateTensor(Player player, + absl::Span values) const { + // Currently the observation and information state are the same, since the + // game only contains one step of each phase. This may change in the + // multi-step game in the future. + ObservationTensor(player, values); +} + +TradeCommState::TradeCommState(std::shared_ptr game, int num_items) + : State(game), + num_items_(num_items), + cur_player_(kChancePlayerId), + phase_(Phase::kCommunication) {} + +int TradeCommState::CurrentPlayer() const { + return IsTerminal() ? kTerminalPlayerId : cur_player_; +} + +void TradeCommState::DoApplyAction(Action action) { + if (IsChanceNode()) { + std::pair allocation = DecodeAllocation(action, num_items_); + items_.push_back(allocation.first); + items_.push_back(allocation.second); + cur_player_ = 0; + } else { + if (phase_ == Phase::kCommunication) { + comm_history_.push_back(action); + if (comm_history_.size() == 2) { + phase_ = Phase::kTrade; + } + cur_player_ = NextPlayerRoundRobin(cur_player_, num_players_); + } else { + trade_history_.push_back(action); + cur_player_ = NextPlayerRoundRobin(cur_player_, num_players_); + } + } +} + +std::vector TradeCommState::LegalActions() const { + if (IsChanceNode()) { + return LegalChanceOutcomes(); + } else if (IsTerminal()) { + return {}; + } else if (phase_ == Phase::kCommunication) { + // Can utter anything. Utterances are actions 0 to num_items_ - 1 for now. + std::vector legal_actions; + legal_actions.reserve(num_items_); + for (int i = 0; i < num_items_; ++i) { + legal_actions.push_back(i); + } + return legal_actions; + } else if (phase_ == Phase::kTrade) { + // 1:1 trades for k items = k*k actions (includes trading an item for the + // same item) starting at num_items_. + std::vector legal_actions; + int num_trade_actions = num_items_ * num_items_; + legal_actions.reserve(num_trade_actions); + for (int i = 0; i < num_trade_actions; ++i) { + legal_actions.push_back(num_items_ + i); + } + return legal_actions; + } else { + SpielFatalError("Invalid phase?"); + } +} + +std::vector> TradeCommState::ChanceOutcomes() const { + SPIEL_CHECK_TRUE(IsChanceNode()); + int num_outcomes = num_items_ * num_items_; + std::vector> outcomes; + outcomes.reserve(num_outcomes); + for (int i = 0; i < num_outcomes; ++i) { + outcomes.push_back({i, 1.0 / num_outcomes}); + } + return outcomes; +} + +std::string TradeCommState::ToString() const { + if (IsChanceNode()) { + return "Initial chance node"; + } + + std::string str = absl::StrCat("Items: ", absl::StrJoin(items_, " ")); + absl::StrAppend(&str, + "\nPhase: ", phase_ == Phase::kTrade ? "trade" : "comm"); + absl::StrAppend(&str, "\nComm history: ", absl::StrJoin(comm_history_, " ")); + absl::StrAppend(&str, "\nTrade history:"); + for (Action trade_action : trade_history_) { + std::pair trade = DecodeTrade(trade_action, num_items_); + absl::StrAppend(&str, " ", trade.first, ":", trade.second); + } + absl::StrAppend(&str, "\n"); + + return str; +} + +std::unique_ptr TradeCommState::Clone() const { + return std::unique_ptr(new TradeCommState(*this)); +} + +TradeCommGame::TradeCommGame(const GameParameters& params) + : Game(kGameType, params), + num_items_(ParameterValue("num_items", kDefaultNumItems)) {} + +int TradeCommGame::NumDistinctActions() const { + return num_items_ + // utterances + num_items_ * num_items_; // 1:1 trades +} + +std::vector TradeCommGame::ObservationTensorShape() const { + return { + 2 + // one hot vector for whose turn it is + 1 + // one bit to indicate whether the state is terminal + 1 + // a single bit indicating the phase (comm or trade) + num_items_ + // one-hot vector for the item the player got + num_items_ + // one-hot vector for the utterance the player made + num_items_ + // one-hot vector for the utterance the player observed + 3 + // trade history size + 2 * num_items_ // observer's trade if made. + }; +} + +std::vector TradeCommGame::InformationStateTensorShape() const { + // Currently the observation and information state are the same, since the + // game only contains one step of each phase. This may change in the + // multi-step game in the future. + return ObservationTensorShape(); +} + +} // namespace trade_comm +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/trade_comm/trade_comm.h b/scenarios/bargaining/open_spiel/open_spiel/games/trade_comm/trade_comm.h new file mode 100644 index 0000000..3dda77a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/trade_comm/trade_comm.h @@ -0,0 +1,119 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_TRADE_COMM_H_ +#define OPEN_SPIEL_GAMES_TRADE_COMM_H_ + +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// A simple communication game inspired by trading, where agents receive +// private items, send (arbitrary) utterances, and then have to commit to a +// trade. +// +// First agent receives a random item a set of K unique items. Second agent +// also receives a random item. Both items are private. Then, the first agent +// can make a single utterance from a set of K utterances, which the second +// agent observes. The second agent can do the same (which the first agent +// observes). Then each of the agents secretly chooses a 1:1 trade action in +// private. If they choose a compatible trade (i.e. agents trade the item they +// have for the item the other agent has), they each get a reward of 1. +// Otherwise, they both get 0. +// +// This current variant is the simplest version of more complex communication +// games for trading. Ultimately, we plan to expand so that the communication is +// longer and vectorized, and the commitment round is multi-step. +// +// Parameters: +// "num_items" int number of distinct items (K) (default = 10) +// + +namespace open_spiel { +namespace trade_comm { + +constexpr int kDefaultNumItems = 10; +constexpr int kDefaultNumPlayers = 2; +constexpr int kWinUtility = 1; + +enum class Phase { + kCommunication, + kTrade, +}; + +class TradeCommState : public State { + public: + TradeCommState(std::shared_ptr game, int num_items); + TradeCommState(const TradeCommState&) = default; + + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action move_id) const override; + std::vector> ChanceOutcomes() const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + std::string InformationStateString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::string ObservationString(Player player) const override; + + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + + protected: + void DoApplyAction(Action action) override; + + private: + const int num_items_; + int cur_player_; + Phase phase_; + std::vector items_; + std::vector comm_history_; + std::vector trade_history_; +}; + +class TradeCommGame : public Game { + public: + explicit TradeCommGame(const GameParameters& params); + + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new TradeCommState(shared_from_this(), num_items_)); + } + int MaxChanceOutcomes() const override { return num_items_ * num_items_; } + + int MaxGameLength() const override { return 4; } + // TODO: verify whether this bound is tight and/or tighten it. + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + + int NumPlayers() const override { return kDefaultNumPlayers; } + double MaxUtility() const override { return kWinUtility; } + double MinUtility() const override { return 0; } + std::vector ObservationTensorShape() const override; + std::vector InformationStateTensorShape() const override; + + private: + const int num_items_; +}; + +} // namespace trade_comm +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_TRADE_COMM_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/trade_comm/trade_comm_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/trade_comm/trade_comm_test.cc new file mode 100644 index 0000000..a252f53 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/trade_comm/trade_comm_test.cc @@ -0,0 +1,97 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/trade_comm/trade_comm.h" + +#include +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace trade_comm { +namespace { + +namespace testing = open_spiel::testing; + +void BasicTradeCommTests() { + testing::RandomSimTest(*LoadGame("trade_comm"), 100); +} + +void SuccessfulTradeDifferentItemsTest() { + std::shared_ptr game = LoadGame("trade_comm"); + std::unique_ptr state = game->NewInitialState(); + state->ApplyAction(26); // allocate: first player gets item 2, second gets 6 + state->ApplyAction(1); // Utterance 1 + state->ApplyAction(8); // Utterance 8 + state->ApplyAction(10 + 2 * 10 + 6); // giving 2 for 6 + state->ApplyAction(10 + 6 * 10 + 2); // giving 6 for 2 + std::cout << state->ToString() << std::endl; + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReturn(0), 1.0); + SPIEL_CHECK_EQ(state->PlayerReturn(1), 1.0); +} + +void SuccessfulTradeSameItemsTest() { + std::shared_ptr game = LoadGame("trade_comm"); + std::unique_ptr state = game->NewInitialState(); + state->ApplyAction(33); // allocate: first player gets item 3, second gets 3 + state->ApplyAction(2); // Utterance 2 + state->ApplyAction(8); // Utterance 8 + state->ApplyAction(10 + 3 * 10 + 3); // giving 3 for 3 + state->ApplyAction(10 + 3 * 10 + 3); // giving 3 for 3 + std::cout << state->ToString() << std::endl; + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReturn(0), 1.0); + SPIEL_CHECK_EQ(state->PlayerReturn(1), 1.0); +} + +void UnsuccessfulTradesTest() { + std::shared_ptr game = LoadGame("trade_comm"); + + // P0 gets item 7, p1 gets item 1. + // Only successful trade is {7, 1, 1, 7} which corresponds to p0 plays 7:1 + // and p1 plays 1:7 + for (std::array trade : std::vector>({ + // Format: { p0 giving, p0 getting, p1 giving, p0 getting } + {0, 1, 1, 7}, // p0 mismatching the give + {7, 2, 1, 7}, // p0 mismatching the get + {7, 1, 3, 7}, // p1 mismatching the give + {7, 1, 1, 4} // p1 mismatching the get + })) { + std::unique_ptr state = game->NewInitialState(); + state->ApplyAction(71); // first player gets item 7, second gets 1 + state->ApplyAction(0); // Utterance 0 + state->ApplyAction(6); // Utterance 6 + state->ApplyAction(10 + trade[0] * 10 + trade[1]); + state->ApplyAction(10 + trade[2] * 10 + trade[3]); + std::cout << state->ToString() << std::endl; + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(state->PlayerReturn(0), 0.0); + SPIEL_CHECK_EQ(state->PlayerReturn(1), 0.0); + } +} + +} // namespace +} // namespace trade_comm +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::trade_comm::BasicTradeCommTests(); + open_spiel::trade_comm::SuccessfulTradeDifferentItemsTest(); + open_spiel::trade_comm::SuccessfulTradeSameItemsTest(); + open_spiel::trade_comm::UnsuccessfulTradesTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/twenty_forty_eight/2048.cc b/scenarios/bargaining/open_spiel/open_spiel/games/twenty_forty_eight/2048.cc new file mode 100644 index 0000000..bbeda11 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/twenty_forty_eight/2048.cc @@ -0,0 +1,417 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/twenty_forty_eight/2048.h" + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace twenty_forty_eight { +namespace { + +constexpr std::array kPlayerActions = {kMoveUp, kMoveRight, + kMoveDown, kMoveLeft}; + +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"2048", + /*long_name=*/"2048", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kPerfectInformation, + GameType::Utility::kGeneralSum, + GameType::RewardModel::kRewards, + /*max_num_players=*/1, + /*min_num_players=*/1, + /*provides_information_state_string=*/false, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + {{"max_tile", GameParameter(kDefaultMaxTile)}}}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new TwentyFortyEightGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +constexpr bool InBounds(int r, int c) { + return r >= 0 && r < kRows && c >= 0 && c < kColumns; +} + +// An array that dictates the order of traveral of row and column coordinated +// by direction. E.g, kTraversals[direction][0] is an array of size four +// refering to the row order, and kTraversals[direction][1] is an array of +// size four refering to the column order. +constexpr std::array, 2>, 4> kTraversals = {{ + {{{0, 1, 2, 3}, {0, 1, 2, 3}}}, // Up + {{{0, 1, 2, 3}, {3, 2, 1, 0}}}, // Right + {{{3, 2, 1, 0}, {0, 1, 2, 3}}}, // Down + {{{0, 1, 2, 3}, {0, 1, 2, 3}}} // Left +}}; +} // namespace + +TwentyFortyEightState::TwentyFortyEightState(std::shared_ptr game) + : State(game), + parent_game_(open_spiel::down_cast(*game)), + board_(std::vector(kRows * kColumns)) {} + +void TwentyFortyEightState::SetCustomBoard(const std::vector& board_seq) { + current_player_ = 0; + for (int r = 0; r < kRows; r++) { + for (int c = 0; c < kColumns; c++) { + SetBoard(r, c, Tile(board_seq[r * kColumns + c], false)); + } + } +} + +ChanceAction TwentyFortyEightState::SpielActionToChanceAction( + Action action) const { + std::vector values = + UnrankActionMixedBase(action, {kRows, kColumns, kChanceTiles.size()}); + return ChanceAction(values[0], values[1], values[2]); +} + +Action TwentyFortyEightState::ChanceActionToSpielAction( + ChanceAction move) const { + std::vector action_bases = {kRows, kColumns, kChanceTiles.size()}; + return RankActionMixedBase(action_bases, + {move.row, move.column, move.is_four}); +} + +bool TwentyFortyEightState::CellAvailable(int r, int c) const { + return BoardAt(r, c).value == 0; +} + +constexpr Coordinate GetVector(int direction) { + switch (direction) { + case kMoveUp: + return Coordinate(-1, 0); + case kMoveRight: + return Coordinate(0, 1); + case kMoveDown: + return Coordinate(1, 0); + case kMoveLeft: + return Coordinate(0, -1); + default: + SpielFatalError("Unrecognized direction"); + } +} + +std::array TwentyFortyEightState::FindFarthestPosition( + int r, int c, int direction) const { + // Progress towards the vector direction until an obstacle is found + Coordinate prev = Coordinate(r, c); + Coordinate direction_diff = GetVector(direction); + do { + prev = Coordinate(r, c); + r += direction_diff.row; + c += direction_diff.column; + } while (InBounds(r, c) && CellAvailable(r, c)); + return std::array{prev, Coordinate(r, c)}; +} + +bool TwentyFortyEightState::TileMatchAvailable(int r, int c) const { + int tile = BoardAt(r, c).value; + if (tile > 0) { + for (int direction : kPlayerActions) { + Coordinate vector = GetVector(direction); + int other = GetCellContent(r + vector.row, c + vector.column); + if (other > 0 && other == tile) { + return true; // These two tiles can be merged + } + } + } + return false; +} + +// Check for available matches between tiles (more expensive check) +bool TwentyFortyEightState::TileMatchesAvailable() const { + for (int r = 0; r < kRows; r++) { + for (int c = 0; c < kColumns; c++) { + if (TileMatchAvailable(r, c)) { + return true; + } + } + } + return false; +} + +void TwentyFortyEightState::PrepareTiles() { + for (int r = 0; r < kRows; r++) { + for (int c = 0; c < kColumns; c++) { + SetTileIsMerged(r, c, false); + } + } +} + +int TwentyFortyEightState::GetCellContent(int r, int c) const { + if (!InBounds(r, c)) return 0; + return BoardAt(r, c).value; +} + +void TwentyFortyEightState::DoApplyAction(Action action) { + if (IsChanceNode()) { + // The original 2048 game starts with two random tiles. To achieve this, + // an extra move is given to the chance player during the beginning of the + // game. + if (!extra_chance_turn_) { + current_player_ = 0; + } + extra_chance_turn_ = false; + + if (action == kNoCellAvailableAction) { + return; + } + ChanceAction chance_action = SpielActionToChanceAction(action); + SetBoard( + chance_action.row, chance_action.column, + Tile(chance_action.is_four ? kChanceTiles[1] : kChanceTiles[0], false)); + return; + } + action_score_ = 0; + const std::array, 2>& traversals = kTraversals[action]; + PrepareTiles(); + for (int r : traversals[0]) { + for (int c : traversals[1]) { + int tile = GetCellContent(r, c); + if (tile > 0) { + bool moved = false; + std::array positions = + FindFarthestPosition(r, c, action); + Coordinate farthest_pos = positions[0]; + Coordinate next_pos = positions[1]; + int next_cell = GetCellContent(next_pos.row, next_pos.column); + if (next_cell > 0 && next_cell == tile && + !BoardAt(next_pos).is_merged) { + int merged = tile * 2; + action_score_ += merged; + SetBoard(next_pos.row, next_pos.column, Tile(merged, true)); + moved = true; + } else if (farthest_pos.row != r || farthest_pos.column != c) { + SetBoard(farthest_pos.row, farthest_pos.column, Tile(tile, false)); + moved = true; + } + if (moved) { + SetBoard(r, c, Tile(0, false)); + current_player_ = kChancePlayerId; + } + } + } + } + total_score_ += action_score_; + total_actions_++; +} + +bool TwentyFortyEightState::DoesActionChangeBoard(Action action) const { + const std::array, 2>& traversals = kTraversals[action]; + for (int r : traversals[0]) { + for (int c : traversals[1]) { + int tile = GetCellContent(r, c); + if (tile > 0) { + std::array positions = + FindFarthestPosition(r, c, action); + Coordinate farthest_pos = positions[0]; + Coordinate next_pos = positions[1]; + int next_cell = GetCellContent(next_pos.row, next_pos.column); + if (next_cell > 0 && next_cell == tile && + !BoardAt(next_pos).is_merged) { + return true; + } else if (farthest_pos.row != r || farthest_pos.column != c) { + return true; + } + } + } + } + return false; +} + +std::string TwentyFortyEightState::ActionToString(Player player, + Action action_id) const { + if (player == kChancePlayerId) { + if (action_id == kNoCellAvailableAction) { + return "No Cell Available"; + } + ChanceAction chance_action = SpielActionToChanceAction(action_id); + return absl::StrCat(chance_action.is_four ? 4 : 2, " added to row ", + chance_action.row + 1, ", column ", + chance_action.column + 1); + } + switch (action_id) { + case kMoveUp: + return "Up"; + case kMoveRight: + return "Right"; + case kMoveDown: + return "Down"; + case kMoveLeft: + return "Left"; + default: + return "Invalid action"; + } +} + +int TwentyFortyEightState::AvailableCellCount() const { + int count = 0; + for (int r = 0; r < kRows; r++) { + for (int c = 0; c < kColumns; c++) { + if (BoardAt(r, c).value == 0) { + count++; + } + } + } + return count; +} + +ActionsAndProbs TwentyFortyEightState::ChanceOutcomes() const { + int count = AvailableCellCount(); + if (count == 0) { + return {{kNoCellAvailableAction, 1.0}}; + } + ActionsAndProbs action_and_probs; + action_and_probs.reserve(count * 2); + for (int r = 0; r < kRows; r++) { + for (int c = 0; c < kColumns; c++) { + if (BoardAt(r, c).value == 0) { + // 2 appearing randomly on the board should be 9 times as likely as a 4. + action_and_probs.emplace_back( + ChanceActionToSpielAction(ChanceAction(r, c, false)), .9 / count); + action_and_probs.emplace_back( + ChanceActionToSpielAction(ChanceAction(r, c, true)), .1 / count); + } + } + } + return action_and_probs; +} + +std::vector TwentyFortyEightState::LegalActions() const { + if (IsTerminal()) { + return {}; + } + if (IsChanceNode()) { + return LegalChanceOutcomes(); + } + + // Construct a vector from the array. + std::vector actions = + std::vector(kPlayerActions.begin(), kPlayerActions.end()); + + std::vector actions_allowed = {}; + for (Action action : actions) { + if (DoesActionChangeBoard(action)) actions_allowed.push_back(action); + } + return actions_allowed; +} + +std::string TwentyFortyEightState::ToString() const { + std::string str; + for (int r = 0; r < kRows; ++r) { + for (int c = 0; c < kColumns; ++c) { + std::string tile = std::to_string(BoardAt(r, c).value); + absl::StrAppend(&str, std::string(5 - tile.length(), ' ')); + absl::StrAppend(&str, tile); + } + absl::StrAppend(&str, "\n"); + } + return str; +} + +bool TwentyFortyEightState::IsTerminal() const { + if (move_number_ >= parent_game_.MaxGameLength()) { + return true; + } + + // Scan the board. + int count = 0; + int tile_matches_available = 0; + for (int r = 0; r < kRows; r++) { + for (int c = 0; c < kColumns; c++) { + // Check for 2048, if necessary, + if (BoardAt(r, c).value == parent_game_.max_tile()) { + return true; + } + + // Check for increase of available cell count. + if (BoardAt(r, c).value == 0) { + count++; + } + + // Check for tile matches. + if (TileMatchAvailable(r, c)) { + tile_matches_available++; + } + } + } + + if (count == 0 && tile_matches_available == 0) { + return true; + } else { + return false; + } +} + +std::vector TwentyFortyEightState::Rewards() const { + return {static_cast(action_score_)}; +} + +std::vector TwentyFortyEightState::Returns() const { + return {static_cast(total_score_)}; +} + +std::string TwentyFortyEightState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string TwentyFortyEightState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void TwentyFortyEightState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + TensorView<2> view(values, {kRows, kColumns}, true); + for (int row = 0; row < kRows; row++) { + for (int column = 0; column < kColumns; column++) { + view[{row, column}] = BoardAt(row, column).value; + } + } +} + +TwentyFortyEightGame::TwentyFortyEightGame(const GameParameters& params) + : Game(kGameType, params), + max_tile_(ParameterValue("max_tile", kDefaultMaxTile)) {} + +int TwentyFortyEightGame::NumDistinctActions() const { + return kPlayerActions.size(); +} + +} // namespace twenty_forty_eight +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/twenty_forty_eight/2048.h b/scenarios/bargaining/open_spiel/open_spiel/games/twenty_forty_eight/2048.h new file mode 100644 index 0000000..b65f859 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/twenty_forty_eight/2048.h @@ -0,0 +1,176 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_2048_H_ +#define OPEN_SPIEL_GAMES_2048_H_ + +// Implementation of the popular game 2048. +// https://en.wikipedia.org/wiki/2048_(video_game) +// https://github.com/gabrielecirulli/2048 +// +// The objective of the game is to slide numbered tiles on a grid to combine +// them to create bigger tiles. +// +// Some notes about this implementation: +// - End condition: +// The game ends when a player has no more valid actions, or a maximum tile +// value is reached (default: 2048). +// +// Parameters: +// max_tile int End the game when max_tile is reached? +// (default: 2048) + +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace twenty_forty_eight { + +enum Move { kMoveUp = 0, kMoveRight = 1, kMoveDown = 2, kMoveLeft = 3 }; + +constexpr int kNumPlayers = 1; +constexpr int kRows = 4; +constexpr int kColumns = 4; + +constexpr int kDefaultMaxTile = 2048; + +// The chance tiles that randomly appear on the board after each move +constexpr std::array kChanceTiles = {2, 4}; +const int kNoCellAvailableAction = kRows * kColumns * kChanceTiles.size(); + +struct Coordinate { + int row, column; + constexpr Coordinate(int _row, int _column) : row(_row), column(_column) {} +}; + +struct ChanceAction { + int row; + int column; + bool is_four; + ChanceAction(int _row, int _column, bool _is_four) + : row(_row), column(_column), is_four(_is_four) {} +}; + +struct Tile { + int value; + bool is_merged; + Tile() : value(0), is_merged(false) {} + Tile(int _value, bool _is_merged) : value(_value), is_merged(_is_merged) {} +}; + +class TwentyFortyEightGame; // Needed for back-pointer to parent game. + +// State of an in-play game. +class TwentyFortyEightState : public State { + public: + explicit TwentyFortyEightState(std::shared_ptr game); + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : current_player_; + } + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override { + return std::unique_ptr(new TwentyFortyEightState(*this)); + } + std::vector Rewards() const override; + std::vector LegalActions() const override; + ActionsAndProbs ChanceOutcomes() const override; + + // Game-specific methods outside the core API: + Tile BoardAt(int row, int column) const { + return board_[row * kColumns + column]; + } + Tile BoardAt(Coordinate coordinate) const { + return board_[coordinate.row * kColumns + coordinate.column]; + } + void SetCustomBoard(const std::vector& board_seq); + + protected: + void DoApplyAction(Action action) override; + + private: + ChanceAction SpielActionToChanceAction(Action action) const; + Action ChanceActionToSpielAction(ChanceAction move) const; + void SetBoard(int row, int column, Tile tile) { + board_[row * kColumns + column] = tile; + } + void SetTileIsMerged(int row, int column, bool is_merged) { + board_[row * kColumns + column].is_merged = is_merged; + } + int AvailableCellCount() const; + bool CellAvailable(int r, int c) const; + std::array FindFarthestPosition(int r, int c, + int direction) const; + bool TileMatchAvailable(int r, int c) const; + bool TileMatchesAvailable() const; + void PrepareTiles(); + int GetCellContent(int r, int c) const; + bool DoesActionChangeBoard(Action action) const; + + const TwentyFortyEightGame& parent_game_; + Player current_player_ = kChancePlayerId; + std::vector board_; + bool extra_chance_turn_ = true; + int total_score_ = 0; + int action_score_ = 0; + int total_actions_ = 0; +}; + +// Game object. +class TwentyFortyEightGame : public Game { + public: + explicit TwentyFortyEightGame(const GameParameters& params); + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override { + return absl::make_unique(shared_from_this()); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return 0; } + + std::vector ObservationTensorShape() const override { + return {kRows, kColumns}; + } + int MaxChanceOutcomes() const override { + return kRows * kColumns * kChanceTiles.size() + 1; + } + + // Using analysis here to derive these bounds: + // https://www.reddit.com/r/2048/comments/214njx/highest_possible_score_for_2048_warning_math/ + double MaxUtility() const override { + return (std::log2(max_tile_) - 1) * max_tile_; + } + // First 2 is for the chance actions, second 2 for all the action required + // to get the max tile. + int MaxGameLength() const override { return 2 * 2 * max_tile_; } + + const int max_tile() const { return max_tile_; } + + private: + const int max_tile_; +}; + +} // namespace twenty_forty_eight +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_2048_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/twenty_forty_eight/2048_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/twenty_forty_eight/2048_test.cc new file mode 100644 index 0000000..c7b9deb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/twenty_forty_eight/2048_test.cc @@ -0,0 +1,152 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/twenty_forty_eight/2048.h" + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace twenty_forty_eight { +namespace { + +namespace testing = open_spiel::testing; + +void BasicSimulationTests() { testing::RandomSimTest(*LoadGame("2048"), 100); } + +void BasicSerializationTest() { + std::shared_ptr game = LoadGame("2048"); + std::unique_ptr state = game->NewInitialState(); + std::unique_ptr state2 = game->DeserializeState(state->Serialize()); + SPIEL_CHECK_EQ(state->ToString(), state2->ToString()); +} + +void RandomSerializationTest() { + std::shared_ptr game = LoadGame("2048"); + std::unique_ptr state = game->NewInitialState(); + for (int i = 0; i < 20; ++i) { + std::cout << state->ToString() << std::endl; + std::cout << state->LegalActions().size() << std::endl; + state->ApplyAction(state->LegalActions()[0]); + } + std::unique_ptr state2 = game->DeserializeState(state->Serialize()); + SPIEL_CHECK_EQ(state->ToString(), state2->ToString()); +} + +void Basic2048Tests() { + testing::LoadGameTest("2048"); + testing::ChanceOutcomesTest(*LoadGame("2048")); + testing::RandomSimTest(*LoadGame("2048"), 100); +} + +// Board: +// 0 0 0 0 +// 2 0 0 0 +// 2 0 0 0 +// 2 0 0 0 +// 4 should be formed in the bottom left corner and not on the cell above it +void MultipleMergePossibleTest() { + std::shared_ptr game = LoadGame("2048"); + std::unique_ptr state = game->NewInitialState(); + TwentyFortyEightState* cstate = + static_cast(state.get()); + cstate->SetCustomBoard({0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0}); + cstate->ApplyAction(kMoveDown); + SPIEL_CHECK_EQ(cstate->BoardAt(3, 0).value, 4); +} + +// Board: +// 2 4 0 4 +// 0 2 0 2 +// 0 0 0 0 +// 0 2 0 0 +// 4 should not be merged again with the newly formed 4 in 2nd column +void OneMergePerTurnTest() { + std::shared_ptr game = LoadGame("2048"); + std::unique_ptr state = game->NewInitialState(); + TwentyFortyEightState* cstate = + static_cast(state.get()); + cstate->SetCustomBoard({2, 4, 0, 4, 0, 2, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0}); + cstate->ApplyAction(kMoveDown); + SPIEL_CHECK_EQ(cstate->BoardAt(2, 1).value, 4); + SPIEL_CHECK_EQ(cstate->BoardAt(3, 1).value, 4); +} + +// Board: +// 4 8 2 4 +// 2 4 8 16 +// 16 128 64 128 +// 2 8 2 8 +// This should be a terminal state +void TerminalStateTest() { + std::shared_ptr game = LoadGame("2048"); + std::unique_ptr state = game->NewInitialState(); + TwentyFortyEightState* cstate = + static_cast(state.get()); + cstate->SetCustomBoard( + {4, 8, 2, 4, 2, 4, 8, 16, 16, 128, 64, 128, 2, 8, 2, 8}); + SPIEL_CHECK_EQ(cstate->IsTerminal(), true); +} + +// Board: +// 4 8 2 4 +// 2 4 8 16 +// 1024 128 64 128 +// 1024 8 2 8 +// Taking down action should win from this state +void GameWonTest() { + std::shared_ptr game = LoadGame("2048"); + std::unique_ptr state = game->NewInitialState(); + TwentyFortyEightState* cstate = + static_cast(state.get()); + cstate->SetCustomBoard( + {4, 8, 2, 4, 2, 4, 8, 16, 1024, 128, 64, 128, 1024, 8, 2, 8}); + cstate->ApplyAction(kMoveDown); + SPIEL_CHECK_EQ(cstate->IsTerminal(), true); + SPIEL_CHECK_EQ(cstate->Returns()[0], 2048); +} + +// Board: +// 0 0 0 0 +// 0 0 0 0 +// 0 0 0 0 +// 2 0 0 2 +// Down should not be a legal action here as it does not change the board +void BoardNotChangedTest() { + std::shared_ptr game = LoadGame("2048"); + std::unique_ptr state = game->NewInitialState(); + TwentyFortyEightState* cstate = + static_cast(state.get()); + cstate->SetCustomBoard({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2}); + for (Action action : cstate->LegalActions()) { + SPIEL_CHECK_NE(action, kMoveDown); + } +} + +} // namespace +} // namespace twenty_forty_eight +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::twenty_forty_eight::BasicSimulationTests(); + open_spiel::twenty_forty_eight::BasicSerializationTest(); + open_spiel::twenty_forty_eight::RandomSerializationTest(); + open_spiel::twenty_forty_eight::Basic2048Tests(); + open_spiel::twenty_forty_eight::MultipleMergePossibleTest(); + open_spiel::twenty_forty_eight::OneMergePerTurnTest(); + open_spiel::twenty_forty_eight::TerminalStateTest(); + open_spiel::twenty_forty_eight::GameWonTest(); + open_spiel::twenty_forty_eight::BoardNotChangedTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixt.cc b/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixt.cc new file mode 100644 index 0000000..8468a43 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixt.cc @@ -0,0 +1,145 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/twixt/twixt.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/games/twixt/twixtboard.h" +#include "open_spiel/games/twixt/twixtcell.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace twixt { +namespace { + +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"twixt", + /*long_name=*/"TwixT", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + {{"board_size", GameParameter(kDefaultBoardSize)}, + {"ansi_color_output", GameParameter(kDefaultAnsiColorOutput)}}, +}; + +std::unique_ptr Factory(const GameParameters ¶ms) { + return std::unique_ptr(new TwixTGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +} // namespace + +TwixTState::TwixTState(std::shared_ptr game) : State(game) { + const TwixTGame &parent_game = static_cast(*game); + board_ = Board(parent_game.board_size(), parent_game.ansi_color_output()); +} + +std::string TwixTState::ActionToString(open_spiel::Player player, + Action action) const { + Position position = board_.ActionToPosition(action); + std::string s = (player == kRedPlayer) ? "x" : "o"; + s += static_cast('a') + position.x; + s.append(std::to_string(board_.size() - position.y)); + return s; +} + +void TwixTState::SetPegAndLinksOnTensor(absl::Span values, + const Cell &cell, int offset, bool turn, + Position position) const { + TensorView<3> view(values, {kNumPlanes, board_.size(), board_.size() - 2}, + false); + Position tensorPosition = board_.GetTensorPosition(position, turn); + + if (cell.HasLinks()) { + for (int dir = 0; dir < 4; dir++) { + if (cell.HasLink(dir)) { + // peg has link in direction dir: set 1.0 on plane 1..4 / 7..10 + view[{offset + 1 + dir, tensorPosition.x, tensorPosition.y}] = 1.0; + } + } + } else { + // peg has no links: set 1.0 on plane 0 / 6 + view[{offset + 0, tensorPosition.x, tensorPosition.y}] = 1.0; + } + + // peg has blocked neighbors: set 1.0 on plane 5 / 11 + if (cell.HasBlockedNeighborsEast()) { + view[{offset + 5, tensorPosition.x, tensorPosition.y}] = 1.0; + } +} + +void TwixTState::ObservationTensor(open_spiel::Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, kNumPlayers); + + const int kPlaneOffset[2] = {0, kNumPlanes / 2}; + int size = board_.size(); + + // 2 x 6 planes of size boardSize x (boardSize-2): + // each plane excludes the endlines of the opponent + // plane 0/6 is for the pegs + // plane 1..4 / 7..10 is for the links NNE, ENE, ESE, SSE, resp. + // plane 5/11 is pegs that have blocked neighbors + + TensorView<3> view(values, {kNumPlanes, board_.size(), board_.size() - 2}, + true); + + for (int c = 0; c < size; c++) { + for (int r = 0; r < size; r++) { + Position position = {c, r}; + const Cell &cell = board_.GetConstCell(position); + int color = cell.color(); + if (color == kRedColor) { + // no turn + SetPegAndLinksOnTensor(values, cell, kPlaneOffset[0], false, position); + } else if (color == kBlueColor) { + // 90 degr turn + SetPegAndLinksOnTensor(values, cell, kPlaneOffset[1], true, position); + } + } + } +} + +TwixTGame::TwixTGame(const GameParameters ¶ms) + : Game(kGameType, params), + ansi_color_output_( + ParameterValue("ansi_color_output", kDefaultAnsiColorOutput)), + board_size_(ParameterValue("board_size", kDefaultBoardSize)) { + if (board_size_ < kMinBoardSize || board_size_ > kMaxBoardSize) { + SpielFatalError( + "board_size out of range [" + std::to_string(kMinBoardSize) + ".." + + std::to_string(kMaxBoardSize) + "]: " + std::to_string(board_size_)); + } +} + +} // namespace twixt +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixt.h b/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixt.h new file mode 100644 index 0000000..92be7ac --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixt.h @@ -0,0 +1,148 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_TWIXT_TWIXT_H_ +#define OPEN_SPIEL_GAMES_TWIXT_TWIXT_H_ + +#include +#include +#include +#include + +#include "open_spiel/games/twixt/twixtboard.h" +#include "open_spiel/games/twixt/twixtcell.h" + +// https://en.wikipedia.org/wiki/TwixT + +namespace open_spiel { +namespace twixt { + +class TwixTState : public State { + public: + explicit TwixTState(std::shared_ptr game); + + TwixTState(const TwixTState &) = default; + TwixTState &operator=(const TwixTState &) = default; + + open_spiel::Player CurrentPlayer() const override { return current_player_; }; + + std::string ActionToString(open_spiel::Player player, + Action action) const override; + + std::string ToString() const override { return board_.ToString(); }; + + bool IsTerminal() const override { + int result = board_.result(); + return (result == kRedWin || result == kBlueWin || result == kDraw); + }; + + std::vector Returns() const override { + double reward; + int result = board_.result(); + if (result == kOpen || result == kDraw) { + return {0.0, 0.0}; + } else { + reward = 1.0; + if (result == kRedWin) { + return {reward, -reward}; + } else { + return {-reward, reward}; + } + } + }; + + std::string InformationStateString(open_spiel::Player player) const override { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, kNumPlayers); + return ToString(); + }; + + std::string ObservationString(open_spiel::Player player) const override { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, kNumPlayers); + return ToString(); + }; + + void ObservationTensor(open_spiel::Player player, + absl::Span values) const override; + + std::unique_ptr Clone() const override { + return std::unique_ptr(new TwixTState(*this)); + }; + + void UndoAction(open_spiel::Player, Action) override{}; + + std::vector LegalActions() const override { + if (IsTerminal()) return {}; + return board_.GetLegalActions(current_player_); + }; + + protected: + void DoApplyAction(Action action) override { + const std::vector &v = LegalActions(); + if (std::find(v.begin(), v.end(), action) == v.end()) { + SpielFatalError("Not a legal action: " + std::to_string(action)); + } + board_.ApplyAction(CurrentPlayer(), action); + if (board_.result() == kOpen) { + set_current_player(1 - CurrentPlayer()); + } else { + set_current_player(kTerminalPlayerId); + } + }; + + private: + Player current_player_ = kRedPlayer; + Board board_; + void set_current_player(Player player) { current_player_ = player; } + void SetPegAndLinksOnTensor(absl::Span, const Cell &, int, bool, + Position) const; +}; + +class TwixTGame : public Game { + public: + explicit TwixTGame(const GameParameters ¶ms); + + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new TwixTState(shared_from_this())); + }; + + int NumDistinctActions() const override { return board_size_ * board_size_; }; + + int NumPlayers() const override { return kNumPlayers; }; + double MinUtility() const override { return -1.0; }; + absl::optional UtilitySum() const override { return 0.0; }; + double MaxUtility() const override { return 1.0; }; + + std::vector ObservationTensorShape() const override { + static std::vector shape{kNumPlanes, board_size_, board_size_ - 2}; + return shape; + } + + int MaxGameLength() const { + // square - 4 corners + swap move + return board_size_ * board_size_ - 4 + 1; + } + bool ansi_color_output() const { return ansi_color_output_; } + int board_size() const { return board_size_; } + + private: + bool ansi_color_output_; + int board_size_; +}; + +} // namespace twixt +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_TWIXT_TWIXT_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixt_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixt_test.cc new file mode 100644 index 0000000..afb7f7e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixt_test.cc @@ -0,0 +1,152 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace twixt { +namespace { + +namespace testing = open_spiel::testing; + +void BasicTwixTTests() { + testing::LoadGameTest("twixt"); + testing::NoChanceOutcomesTest(*LoadGame("twixt")); + testing::RandomSimTest(*LoadGame("twixt"), 100); +} + +void ParameterTest() { + std::string game_name = "twixt"; + open_spiel::GameParameters params; + std::shared_ptr game; + // ok: ansi_color_output=true + params.insert({"ansi_color_output", open_spiel::GameParameter(true, false)}); + game = open_spiel::LoadGame(game_name, params); + params.clear(); + + // ok: board_size=10 + params.insert({"board_size", open_spiel::GameParameter(10, false)}); + game = open_spiel::LoadGame(game_name, params); + params.clear(); +} + +bool IsLegalAction(const std::vector v, + open_spiel::Action action) { + return std::find(v.begin(), v.end(), action) != v.end(); +} + +void SwapTest() { + std::shared_ptr game = open_spiel::LoadGame("twixt"); + auto state = game->NewInitialState(); + // player 0 plays action 19: [2,3] = c5 + SPIEL_CHECK_EQ(0, state->CurrentPlayer()); + SPIEL_CHECK_TRUE(IsLegalAction(state->LegalActions(), 11)); + state->ApplyAction(19); + + // player 1 plays action 19: [2,3] = c5 (SWAP rule) + SPIEL_CHECK_EQ(1, state->CurrentPlayer()); + state->ApplyAction(19); + + // => [3,5] od3 replaces [2,3] xc5; c5 is empty again and d3 is occupied + SPIEL_CHECK_TRUE(IsLegalAction(state->LegalActions(), 19)); // c5 + SPIEL_CHECK_FALSE(IsLegalAction(state->LegalActions(), 29)); // d3 + + // player 0 plays action 36: [4,4] = e4 + SPIEL_CHECK_EQ(0, state->CurrentPlayer()); + state->ApplyAction(36); + + SPIEL_CHECK_TRUE(IsLegalAction(state->LegalActions(), 19)); // c5 + SPIEL_CHECK_FALSE(IsLegalAction(state->LegalActions(), 29)); // d3 + SPIEL_CHECK_FALSE(IsLegalAction(state->LegalActions(), 36)); // e4 +} + +void LegalActionsTest() { + std::shared_ptr game = open_spiel::LoadGame("twixt"); + auto state = game->NewInitialState(); + SPIEL_CHECK_FALSE(state->IsTerminal()); + // 48*/48 legal actions + SPIEL_CHECK_EQ(48, state->LegalActions().size()); + + state->ApplyAction(21); // player 0: xc3 + // 47/48* legal actions; player 1 could play c3 to swap + SPIEL_CHECK_EQ(48, state->LegalActions().size()); + + state->ApplyAction(38); // player 1: oe2 + // 46*/46 legal actions; player 1 did not swap + SPIEL_CHECK_EQ(46, state->LegalActions().size()); + + state->ApplyAction(15); // player 0: xb1 + // 45/46* legal actions; player 0 played on his end line + SPIEL_CHECK_EQ(46, state->LegalActions().size()); + + state->ApplyAction(11); // player 1: ob5 + // 44*/45 legal actions + SPIEL_CHECK_EQ(44, state->LegalActions().size()); + + state->ApplyAction(27); // player 0: xd5 + // 43/44* legal actions + SPIEL_CHECK_EQ(44, state->LegalActions().size()); + + state->ApplyAction(17); // player 1: oc7 + // 42*/43 legal actions + SPIEL_CHECK_EQ(42, state->LegalActions().size()); + + state->ApplyAction(42); // player 0: xf6 + // 41/42* legal actions + SPIEL_CHECK_EQ(42, state->LegalActions().size()); + + state->ApplyAction(45); // player 1: of3 + // 40*/41 legal actions + SPIEL_CHECK_EQ(40, state->LegalActions().size()); + + state->ApplyAction(48); // player 0: xg8 wins + SPIEL_CHECK_TRUE(state->IsTerminal()); + SPIEL_CHECK_EQ(1.0, state->PlayerReturn(0)); + SPIEL_CHECK_EQ(-1.0, state->PlayerReturn(1)); +} + +void DrawTest() { + open_spiel::GameParameters params; + params.insert({"board_size", open_spiel::GameParameter(5, false)}); + std::shared_ptr game = + open_spiel::LoadGame("twixt", params); + auto state = game->NewInitialState(); + + while (!state->IsTerminal()) { + // this pattern will produce a draw on a 5x5 board + state->ApplyAction(state->LegalActions().at(0)); + state->ApplyAction(state->LegalActions().at(1)); + } + SPIEL_CHECK_EQ(0.0, state->PlayerReturn(0)); + SPIEL_CHECK_EQ(0.0, state->PlayerReturn(1)); +} + +} // namespace +} // namespace twixt +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::twixt::BasicTwixTTests(); + open_spiel::twixt::ParameterTest(); + open_spiel::twixt::SwapTest(); + open_spiel::twixt::LegalActionsTest(); + open_spiel::twixt::DrawTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixtboard.cc b/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixtboard.cc new file mode 100644 index 0000000..036e640 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixtboard.cc @@ -0,0 +1,638 @@ + +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include "open_spiel/games/twixt/twixtboard.h" +#include "open_spiel/games/twixt/twixtcell.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace twixt { + +// ANSI colors +const char kAnsiRed[] = "\e[91m"; +const char kAnsiBlue[] = "\e[94m"; +const char kAnsiDefault[] = "\e[0m"; + +// helper functions +inline int OppDir(int direction) { + return (direction + kMaxCompass / 2) % kMaxCompass; +} + +inline std::string PositionToString(Position position) { + return "[" + std::to_string(position.x) + "," + std::to_string(position.y) + + "]"; +} + +// table of 8 link descriptors +static const std::vector kLinkDescriptorTable{ + // NNE + {{1, 2}, // offset of target peg (2 up, 1 right) + { // blocking/blocked links + {{0, 1}, kENE}, + {{-1, 0}, kENE}, + + {{0, 2}, kESE}, + {{0, 1}, kESE}, + {{-1, 2}, kESE}, + {{-1, 1}, kESE}, + + {{0, 1}, kSSE}, + {{0, 2}, kSSE}, + {{0, 3}, kSSE}}}, + // ENE + {{2, 1}, + {{{0, -1}, kNNE}, + {{1, 0}, kNNE}, + + {{-1, 1}, kESE}, + {{0, 1}, kESE}, + {{1, 1}, kESE}, + + {{0, 1}, kSSE}, + {{0, 2}, kSSE}, + {{1, 1}, kSSE}, + {{1, 2}, kSSE}}}, + // ESE + {{2, -1}, + {{{0, -1}, kNNE}, + {{1, -1}, kNNE}, + {{0, -2}, kNNE}, + {{1, -2}, kNNE}, + + {{-1, -1}, kENE}, + {{0, -1}, kENE}, + {{1, -1}, kENE}, + + {{0, 1}, kSSE}, + {{1, 0}, kSSE}}}, + // SSE + {{1, -2}, + {{{0, -1}, kNNE}, + {{0, -2}, kNNE}, + {{0, -3}, kNNE}, + + {{-1, -1}, kENE}, + {{0, -1}, kENE}, + {{-1, -2}, kENE}, + {{0, -2}, kENE}, + + {{-1, 0}, kESE}, + {{0, -1}, kESE}}}, + // SSW + {{-1, -2}, + {{{-1, -1}, kENE}, + {{-2, -2}, kENE}, + + {{-2, 0}, kESE}, + {{-1, 0}, kESE}, + {{-2, -1}, kESE}, + {{-1, -1}, kESE}, + + {{-1, 1}, kSSE}, + {{-1, 0}, kSSE}, + {{-1, -1}, kSSE}}}, + // WSW + {{-2, -1}, + {{{-2, -2}, kNNE}, + {{-1, -1}, kNNE}, + + {{-3, 0}, kESE}, + {{-2, 0}, kESE}, + {{-1, 0}, kESE}, + + {{-2, 1}, kSSE}, + {{-1, 1}, kSSE}, + {{-2, 0}, kSSE}, + {{-1, 0}, kSSE}}}, + // WNW + {{-2, 1}, + {{{-2, 0}, kNNE}, + {{-1, 0}, kNNE}, + {{-2, -1}, kNNE}, + {{-1, -1}, kNNE}, + + {{-3, 0}, kENE}, + {{-2, 0}, kENE}, + {{-1, 0}, kENE}, + + {{-2, 2}, kSSE}, + {{-1, 1}, kSSE}}}, + // NNW + {{-1, 2}, + {{{-1, 1}, kNNE}, + {{-1, 0}, kNNE}, + {{-1, -1}, kNNE}, + + {{-2, 1}, kENE}, + {{-1, 1}, kENE}, + {{-2, 0}, kENE}, + {{-1, 0}, kENE}, + + {{-2, 2}, kESE}, + {{-1, 1}, kESE}}}}; + +// helper class: blockerMap stores set of blocking links for each link +std::unordered_map, LinkHashFunction> BlockerMap::map_ = + {}; + +const std::set& BlockerMap::GetBlockers(Link link) { + return BlockerMap::map_[link]; +} + +void BlockerMap::PushBlocker(Link link, Link blocked_link) { + BlockerMap::map_[link].insert(blocked_link); +} + +void BlockerMap::DeleteBlocker(Link link, Link blocked_link) { + BlockerMap::map_[link].erase(blocked_link); +} + +void BlockerMap::ClearBlocker() { BlockerMap::map_.clear(); } + +Board::Board(int size, bool ansi_color_output) { + set_size(size); + set_ansi_color_output(ansi_color_output); + + InitializeCells(true); + InitializeLegalActions(); +} + +void Board::InitializeBlockerMap(Position position, int dir, + const LinkDescriptor& ld) { + Link link = {position, dir}; + for (auto&& entry : ld.blocking_links) { + Position fromPosition = position + entry.position; + if (!PositionIsOffBoard(fromPosition)) { + const LinkDescriptor& oppLd = kLinkDescriptorTable[entry.direction]; + Position toPosition = position + entry.position + oppLd.offsets; + if (!PositionIsOffBoard(toPosition)) { + BlockerMap::PushBlocker(link, {fromPosition, entry.direction}); + BlockerMap::PushBlocker(link, {toPosition, OppDir(entry.direction)}); + } + } + } +} + +void Board::UpdateResult(Player player, Position position) { + // check for WIN + bool connected_to_start = GetCell(position).IsLinkedToBorder(player, kStart); + bool connected_to_end = GetCell(position).IsLinkedToBorder(player, kEnd); + if (connected_to_start && connected_to_end) { + // peg is linked to both boarder lines + set_result(player == kRedPlayer ? kRedWin : kBlueWin); + return; + } + + // check if opponent (player to turn next) has any legal moves left + if (!HasLegalActions(1 - player)) { + set_result(kDraw); + return; + } +} + +void Board::InitializeCells(bool init_blocker_map) { + cell_.resize(size(), std::vector(size())); + BlockerMap::ClearBlocker(); + + for (int x = 0; x < size(); x++) { + for (int y = 0; y < size(); y++) { + Position position = {x, y}; + Cell& cell = GetCell(position); + + // set color to EMPTY or OFFBOARD + if (PositionIsOffBoard(position)) { + cell.set_color(kOffBoard); + } else { // regular board + cell.set_color(kEmpty); + if (x == 0) { + cell.SetLinkedToBorder(kBluePlayer, kStart); + } else if (x == size() - 1) { + cell.SetLinkedToBorder(kBluePlayer, kEnd); + } else if (y == 0) { + cell.SetLinkedToBorder(kRedPlayer, kStart); + } else if (y == size() - 1) { + cell.SetLinkedToBorder(kRedPlayer, kEnd); + } + InitializeNeighbors(position, cell, init_blocker_map); + } + } + } +} + +void Board::InitializeNeighbors(Position position, Cell& cell, + bool init_blocker_map) { + for (int dir = 0; dir < kMaxCompass; dir++) { + const LinkDescriptor& ld = kLinkDescriptorTable[dir]; + Position target_position = position + ld.offsets; + if (!PositionIsOffBoard(target_position)) { + if (init_blocker_map) { + InitializeBlockerMap(position, dir, ld); + } + cell.SetNeighbor(dir, target_position); + } + } +} + +void Board::InitializeLegalActions() { + int num_legal_actions_per_player = size() * (size() - 2); + + for (Player p = 0; p < kNumPlayers; p++) { + legal_actions_[p].resize(num_legal_actions_per_player); + legal_actions_[p].clear(); + } + + for (int col = 0; col < size(); col++) { + for (int row = 0; row < size(); row++) { + Position pos = {col, row}; + Action action = col * size() + row; + if (PositionIsOffBoard(pos)) { + continue; + } else if (PositionIsOnBorder(kRedPlayer, pos)) { + legal_actions_[kRedPlayer].push_back(action); + } else if (PositionIsOnBorder(kBluePlayer, pos)) { + legal_actions_[kBluePlayer].push_back(action); + } else { + legal_actions_[kRedPlayer].push_back(action); + legal_actions_[kBluePlayer].push_back(action); + } + } + } +} + +std::string Board::ToString() const { + std::string s = ""; + + // head line + s.append(" "); + for (int y = 0; y < size(); y++) { + std::string letter = ""; + letter += static_cast('a') + y; + letter += " "; + AppendColorString(s, kAnsiRed, letter); + } + s.append("\n"); + + for (int y = size() - 1; y >= 0; y--) { + // print "before" row + s.append(" "); + for (int x = 0; x < size(); x++) { + AppendBeforeRow(s, {x, y}); + } + s.append("\n"); + + // print "peg" row + size() - y < 10 ? s.append(" ") : s.append(" "); + AppendColorString(s, kAnsiBlue, std::to_string(size() - y) + " "); + for (int x = 0; x < size(); x++) { + AppendPegRow(s, {x, y}); + } + s.append("\n"); + + // print "after" row + s.append(" "); + for (int x = 0; x < size(); x++) { + AppendAfterRow(s, {x, y}); + } + s.append("\n"); + } + s.append("\n"); + + if (swapped_) s.append("[swapped]"); + + switch (result_) { + case kOpen: { + break; + } + case kRedWin: { + s.append("[x has won]"); + break; + } + case kBlueWin: { + s.append("[o has won]"); + break; + } + case kDraw: { + s.append("[draw]"); + break; + } + default: { + break; + } + } + + return s; +} + +void Board::AppendLinkChar(std::string& s, Position position, enum Compass dir, + std::string linkChar) const { + if (!PositionIsOffBoard(position) && GetConstCell(position).HasLink(dir)) { + if (GetConstCell(position).color() == kRedColor) { + AppendColorString(s, kAnsiRed, linkChar); + } else if (GetConstCell(position).color() == kBlueColor) { + AppendColorString(s, kAnsiBlue, linkChar); + } else { + s.append(linkChar); + } + } +} + +void Board::AppendColorString(std::string& s, std::string colorString, + std::string appString) const { + s.append(ansi_color_output() ? colorString : ""); // make it colored + s.append(appString); + s.append(ansi_color_output() ? kAnsiDefault : ""); // make it default +} + +void Board::AppendPegChar(std::string& s, Position position) const { + if (GetConstCell(position).color() == kRedColor) { + // x + AppendColorString(s, kAnsiRed, "x"); + } else if (GetConstCell(position).color() == kBlueColor) { + // o + AppendColorString(s, kAnsiBlue, "o"); + } else if (PositionIsOffBoard(position)) { + // corner + s.append(" "); + } else if (position.x == 0 || position.x == size() - 1) { + // empty . (blue border line) + AppendColorString(s, kAnsiBlue, "."); + } else if (position.y == 0 || position.y == size() - 1) { + // empty . (red border line) + AppendColorString(s, kAnsiRed, "."); + } else { + // empty (non border line) + s.append("."); + } +} + +void Board::AppendBeforeRow(std::string& s, Position position) const { + // -1, +1 + int len = s.length(); + AppendLinkChar(s, position + Position{-1, 0}, kENE, "/"); + AppendLinkChar(s, position + Position{-1, -1}, kNNE, "/"); + AppendLinkChar(s, position + Position{0, 0}, kWNW, "_"); + if (len == s.length()) s.append(" "); + + // 0, +1 + len = s.length(); + AppendLinkChar(s, position, kNNE, "|"); + if (len == s.length()) AppendLinkChar(s, position, kNNW, "|"); + if (len == s.length()) s.append(" "); + + // +1, +1 + len = s.length(); + AppendLinkChar(s, position + Position{+1, 0}, kWNW, "\\"); + AppendLinkChar(s, position + Position{+1, -1}, kNNW, "\\"); + AppendLinkChar(s, position + Position{0, 0}, kENE, "_"); + if (len == s.length()) s.append(" "); +} + +void Board::AppendPegRow(std::string& s, Position position) const { + // -1, 0 + int len = s.length(); + AppendLinkChar(s, position + Position{-1, -1}, kNNE, "|"); + AppendLinkChar(s, position + Position{0, 0}, kWSW, "_"); + if (len == s.length()) s.append(" "); + + // 0, 0 + AppendPegChar(s, position); + + // +1, 0 + len = s.length(); + AppendLinkChar(s, position + Position{+1, -1}, kNNW, "|"); + AppendLinkChar(s, position + Position{0, 0}, kESE, "_"); + if (len == s.length()) s.append(" "); +} + +void Board::AppendAfterRow(std::string& s, Position position) const { + // -1, -1 + int len = s.length(); + AppendLinkChar(s, position + Position{+1, -1}, kWNW, "\\"); + AppendLinkChar(s, position + Position{0, -1}, kNNW, "\\"); + if (len == s.length()) s.append(" "); + + // 0, -1 + len = s.length(); + AppendLinkChar(s, position + Position{-1, -1}, kENE, "_"); + AppendLinkChar(s, position + Position{+1, -1}, kWNW, "_"); + AppendLinkChar(s, position, kSSW, "|"); + if (len == s.length()) AppendLinkChar(s, position, kSSE, "|"); + if (len == s.length()) s.append(" "); + + // -1, -1 + len = s.length(); + AppendLinkChar(s, position + Position{-1, -1}, kENE, "/"); + AppendLinkChar(s, position + Position{0, -1}, kNNE, "/"); + if (len == s.length()) s.append(" "); +} + +void Board::UndoFirstMove() { + Cell& cell = GetCell(move_one()); + cell.set_color(kEmpty); + InitializeNeighbors(move_one(), cell, false); + InitializeLegalActions(); +} + +void Board::ApplyAction(Player player, Action action) { + Position position = ActionToPosition(action); + + if (move_counter() == 1) { + // it's the second position + if (position == move_one()) { + // blue player swapped + set_swapped(true); + + // undo the first move: (remove peg and restore legal actions) + UndoFirstMove(); + + // turn position 90° clockwise: + // [2,3]->[3,5]; [1,4]->[4,6]; [3,2]->[2,4] + int x = position.y; + int y = size() - position.x - 1; + position = {x, y}; + + } else { + // blue player hasn't swapped => regular move + // remove move one from legal moves + RemoveLegalAction(kRedPlayer, move_one()); + RemoveLegalAction(kBluePlayer, move_one()); + } + } + + SetPegAndLinks(player, position); + + if (move_counter() == 0) { + // do not remove the move from legal actions but store it + // because second player might want to swap, by choosing the same move + set_move_one(position); + } else { + // otherwise remove move from legal actions + RemoveLegalAction(kRedPlayer, position); + RemoveLegalAction(kBluePlayer, position); + } + + IncMoveCounter(); + + // Update the predicted result and update current_player_... + UpdateResult(player, position); +} + +void Board::SetPegAndLinks(Player player, Position position) { + bool linked_to_neutral = false; + bool linked_to_start = false; + bool linked_to_end = false; + + // set peg + Cell& cell = GetCell(position); + cell.set_color(player); + + int dir = 0; + bool newLinks = false; + // check all neigbors that are empty or have same color) + for (dir = 0; dir < kMaxCompass; dir++) { + Position target_position = position + kLinkDescriptorTable[dir].offsets; + if (!PositionIsOffBoard(target_position)) { + Cell& target_cell = GetCell(target_position); + if (target_cell.color() == cell.color()) { + // check if there are blocking links before setting link + const std::set& blockers = + BlockerMap::GetBlockers(Link{position, dir}); + bool blocked = false; + for (auto& bl : blockers) { + if (GetCell(bl.position).HasLink(bl.direction)) { + blocked = true; + break; + } + } + + if (!blocked) { + // we set the link, and set the flag that there is at least one new + // link + cell.set_link(dir); + target_cell.set_link(OppDir(dir)); + + newLinks = true; + + // check if cell we link to is linked to START border / END border + if (target_cell.IsLinkedToBorder(player, kStart)) { + cell.SetLinkedToBorder(player, kStart); + linked_to_start = true; + } else if (target_cell.IsLinkedToBorder(player, kEnd)) { + cell.SetLinkedToBorder(player, kEnd); + linked_to_end = true; + } else { + linked_to_neutral = true; + } + } else { + // we store the fact that these two pegs of the same color cannot be + // linked this info is used for the ObservationTensor + cell.SetBlockedNeighbor(dir); + target_cell.SetBlockedNeighbor(OppDir(dir)); + } + } // same color + } // is on board + } // range of directions + + // check if we need to explore further + if (newLinks) { + std::set visited = {}; + if (cell.IsLinkedToBorder(player, kStart) && linked_to_neutral) { + // case: new cell is linked to START and linked to neutral cells + // => explore neutral graph and add all its cells to START + ExploreLocalGraph(player, cell, kStart, visited); + } + if (cell.IsLinkedToBorder(player, kEnd) && linked_to_neutral) { + // case: new cell is linked to END and linked to neutral cells + // => explore neutral graph and add all its cells to END + ExploreLocalGraph(player, cell, kEnd, visited); + } + } +} + +void Board::ExploreLocalGraph(Player player, Cell& cell, enum Border border, + std::set visited) { + visited.insert(&cell); + for (int dir = 0; dir < kMaxCompass; dir++) { + if (cell.HasLink(dir)) { + Cell& target_cell = GetCell(cell.GetNeighbor(dir)); + if ((visited.find(&target_cell) == visited.end()) && + !target_cell.IsLinkedToBorder(player, border)) { + // linked neighbor has not been visited yet + // => add it and explore + target_cell.SetLinkedToBorder(player, border); + ExploreLocalGraph(player, target_cell, border, visited); + } + } + } +} + +Position Board::GetTensorPosition(Position position, bool turn) const { + // we flip x/y and top/bottom for better readability in playthrough output + if (turn) { + return {size() - position.x - 1, size() - position.y - 2}; + } else { + return {size() - position.y - 1, position.x - 1}; + } +} + +Position Board::ActionToPosition(Action action) const { + return {static_cast(action) / size_, static_cast(action) % size_}; +} + +Action Board::PositionToAction(Position position) const { + return position.x * size() + position.y; +} + +Action Board::StringToAction(std::string s) const { + Position position; + position.x = static_cast(s.at(1)) - static_cast('a'); + position.y = size() - (static_cast(s.at(2)) - static_cast('0')); + return PositionToAction(position); +} + +bool Board::PositionIsOnBorder(Player player, Position position) const { + if (player == kRedPlayer) { + return ((position.y == 0 || position.y == size() - 1) && + (position.x > 0 && position.x < size() - 1)); + } else { + return ((position.x == 0 || position.x == size() - 1) && + (position.y > 0 && position.y < size() - 1)); + } +} + +bool Board::PositionIsOffBoard(Position position) const { + return (position.y < 0 || position.y > size() - 1 || position.x < 0 || + position.x > size() - 1 || + // corner case + ((position.x == 0 || position.x == size() - 1) && + (position.y == 0 || position.y == size() - 1))); +} + +void Board::RemoveLegalAction(Player player, Position position) { + Action action = PositionToAction(position); + std::vector& la = legal_actions_[player]; + std::vector::iterator it; + it = find(la.begin(), la.end(), action); + if (it != la.end()) la.erase(it); +} + +} // namespace twixt +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixtboard.h b/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixtboard.h new file mode 100644 index 0000000..eb5ee67 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixtboard.h @@ -0,0 +1,218 @@ + +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_TWIXT_TWIXTBOARD_H_ +#define OPEN_SPIEL_GAMES_TWIXT_TWIXTBOARD_H_ + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/games/twixt/twixtcell.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace twixt { + +const int kMinBoardSize = 5; +const int kMaxBoardSize = 24; +const int kDefaultBoardSize = 8; + +const bool kDefaultAnsiColorOutput = true; + +// 8 link descriptors store the properties of a link direction +struct { + Position offsets; // offset of the target peg, e.g. (2, -1) for ENE + std::vector blocking_links; +} typedef LinkDescriptor; + +// Tensor has 2 * 6 planes of size bordSize * (boardSize-2) +// see ObservationTensor +const int kNumPlanes = 12; + +enum Result { kOpen, kRedWin, kBlueWin, kDraw }; + +enum Color { kRedColor, kBlueColor, kEmpty, kOffBoard }; + +class Board { + public: + ~Board() {} + Board() {} + Board(int, bool); + + int size() const { return size_; } + std::string ToString() const; + int result() const { return result_; } + int move_counter() const { return move_counter_; } + std::vector GetLegalActions(Player player) const { + return legal_actions_[player]; + } + void ApplyAction(Player, Action); + Cell& GetCell(Position position) { return cell_[position.x][position.y]; } + const Cell& GetConstCell(Position position) const { + return cell_[position.x][position.y]; + } + Position ActionToPosition(Action action) const; + Action PositionToAction(Position position) const; + Position GetTensorPosition(Position position, bool turn) const; + + private: + int move_counter_ = 0; + bool swapped_ = false; + Position move_one_; + int result_ = kOpen; + std::vector> cell_; + int size_; // length of a side of the board + bool ansi_color_output_; + std::vector legal_actions_[kNumPlayers]; + + void set_size(int size) { size_ = size; } + + bool ansi_color_output() const { return ansi_color_output_; } + void set_ansi_color_output(bool ansi_color_output) { + ansi_color_output_ = ansi_color_output; + } + + void set_result(int result) { result_ = result; } + + bool swapped() const { return swapped_; } + void set_swapped(bool swapped) { swapped_ = swapped; } + + Position move_one() const { return move_one_; } + void set_move_one(Position move) { move_one_ = move; } + + void IncMoveCounter() { move_counter_++; } + + bool HasLegalActions(Player player) const { + return legal_actions_[player].size() > 0; + } + + void RemoveLegalAction(Player, Position); + + void UpdateResult(Player, Position); + void UndoFirstMove(); + + void InitializeCells(bool); + void InitializeNeighbors(Position, Cell&, bool); + void InitializeBlockerMap(Position, int, const LinkDescriptor&); + + void InitializeLegalActions(); + + void SetPegAndLinks(Player, Position); + void ExploreLocalGraph(Player, Cell&, enum Border, std::set); + + void AppendLinkChar(std::string&, Position, enum Compass, std::string) const; + void AppendColorString(std::string&, std::string, std::string) const; + void AppendPegChar(std::string&, Position) const; + + void AppendBeforeRow(std::string&, Position) const; + void AppendPegRow(std::string&, Position) const; + void AppendAfterRow(std::string&, Position) const; + + bool PositionIsOnBorder(Player, Position) const; + bool PositionIsOffBoard(Position) const; + + Action StringToAction(std::string s) const; +}; + +// used to construct new entries in BlockerMap +class LinkHashFunction { + public: + size_t operator()(const Link& link) const { + return link.position.x * 10000 + link.position.y * 100 + link.direction; + } +}; + +// stores for each link the set of links that could block it (i.e. cross it) +class BlockerMap { + public: + static const std::set& GetBlockers(Link link); + static void PushBlocker(Link link, Link blocked_link); + static void DeleteBlocker(Link link, Link blocked_link); + static void ClearBlocker(); + + private: + static std::unordered_map, LinkHashFunction> map_; +}; + +// twixt board: +// * the board has board_size_ * board_size_ cells +// * the x-axis (cols) points right, +// * the y axis (rows) points up +// * coord labels c3, f4, d2, etc. start at the upper left corner (a1) +// * player 0, 'x', red color, plays top/bottom +// * player 1, 'o', blue color, plays left/right +// * positions are labeled: col letter + row number, e.g. d4 +// * moves are labeled: player label + col letter + row number, e.g. xd4 +// * empty cell code = 2 +// * corner cell code = 3 +// +// example 8 x 8 board: +// move: xc5, player 0 action: 19, red peg at [2,3] +// move: of5, player 1 action: 43, blue peg at [5,3] +// move: xd3, player 0 action: 29, red peg at [3,5] +// link from [2,3] to [3,5] +// cell[2][3].links = 00000001 (bit 1 set for NNE direction) +// cell[3][5].links = 00010000 (bit 5 set for SSW direction) +// +// a b c d e f g h +// 7 3| 2 2 2 2 2 2 | 3 1 +// --|------------------------|-- +// 6 2| 2 2 2 2 2 2 | 2 2 +// | | +// 5 2| 2 2 [0] 2 2 2 | 2 3 +// | | +// 4 2| 2 2 2 2 2 2 | 2 4 +// | | +// 3 2| 2 [0] 2 2 [1] 1 | 2 5 +// | | +// 2 2| 2 2 2 2 2 2 | 2 6 +// | | +// 1 2| 2 2 2 2 2 2 | 2 7 +// --|------------------------|-- +// 0 3| 2 2 2 2 2 2 | 3 8 +// 0 1 2 3 4 5 6 7 +// +// Actions are indexed from 0 to (board_size_ ** 2) - 1 +// except the corners (0, 7, 56, 63) which are not legal actions. +// +// a b c d e f g h +// 7 | 15 23 31 39 47 55 | 1 +// --|------------------------|-- +// 6 6| 14 22 30 38 46 54 |62 2 +// | | +// 5 5| 13 21 [29] 37 45 53 |61 3 +// | | +// 4 4| 12 20 28 36 44 52 |60 4 +// | | +// 3 3| 11 [19] 27 35 [43] 51 |59 5 +// | | +// 2 2| 10 18 26 34 42 50 |58 6 +// | | +// 1 1| 9 17 25 33 41 49 |57 7 +// --|------------------------|-- +// 0 | 8 16 24 32 40 48 | 8 +// 0 1 2 3 4 5 6 7 +// +// mapping move to action: [c,r] => c * size + r +// xd6 == [2,3] => 2 * 8 + 3 == 19 + +} // namespace twixt +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_TWIXT_TWIXTBOARD_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixtcell.h b/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixtcell.h new file mode 100644 index 0000000..cb1ef5f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/twixt/twixtcell.h @@ -0,0 +1,109 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_TWIXT_TWIXTCELL_H_ +#define OPEN_SPIEL_GAMES_TWIXT_TWIXTCELL_H_ + +#include "open_spiel/spiel_utils.h" + +struct Position { + int x; + int y; + Position operator+(const Position &p) { return {x + p.x, y + p.y}; } + bool operator==(const Position &p) const { return x == p.x && y == p.y; } + bool operator<(const Position &p) const { + return x < p.x || (x == p.x && y < p.y); + } +}; + +struct Link { + Position position; + int direction; + bool operator==(const Link &l) const { + return position == l.position && direction == l.direction; + } + bool operator<(const Link &l) const { + return position < l.position || + (position == l.position && direction < l.direction); + } +}; + +namespace open_spiel { +namespace twixt { + +enum Border { kStart, kEnd, kMaxBorder }; + +const open_spiel::Player kRedPlayer = 0; +const open_spiel::Player kBluePlayer = 1; +const int kNumPlayers = 2; + +// eight directions of links from 0 to 7:q! + +enum Compass { + kNNE, // North-North-East, 1 right, 2 up + kENE, // East-North-East, 2 right, 1 up + kESE, // East-South-East, 2 right, 1 down + kSSE, // South-South-East, 1 right, 2 down + kSSW, // South-South-West, 1 left, 2 down + kWSW, // West-South-West, 2 left, 1 down + kWNW, // West-North-West, 2 left, 1 up + kNNW, // North-North-West, 1 left, 2 up + kMaxCompass +}; + +class Cell { + public: + int color() const { return color_; } + void set_color(int color) { color_ = color; } + void set_link(int dir) { links_ |= (1UL << dir); } + int links() const { return links_; } + + bool HasLink(int dir) const { return links_ & (1UL << dir); } + bool HasLinks() const { return links_ > 0; } + + void SetBlockedNeighbor(int dir) { blocked_neighbors_ |= (1UL << dir); } + bool HasBlockedNeighbors() const { return blocked_neighbors_ > 0; } + bool HasBlockedNeighborsEast() const { + return (blocked_neighbors_ & 15UL) > 0; + } + + Position GetNeighbor(int dir) const { return neighbors_[dir]; } + void SetNeighbor(int dir, Position c) { neighbors_[dir] = c; } + + void SetLinkedToBorder(int player, int border) { + linked_to_border_[player][border] = true; + } + + bool IsLinkedToBorder(int player, int border) const { + return linked_to_border_[player][border]; + } + + private: + int color_; + // bitmap of outgoing links from this cell + int links_ = 0; + // bitmap of neighbors same color that are blocked + int blocked_neighbors_ = 0; + // array of neighbor tuples + // (cells in knight's move distance that are on board) + Position neighbors_[kMaxCompass]; + // indicator if cell is linked to START|END border of player 0|1 + bool linked_to_border_[kNumPlayers][kMaxBorder] = {{false, false}, + {false, false}}; +}; + +} // namespace twixt +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_TWIXT_TWIXTCELL_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/ultimate_tic_tac_toe/ultimate_tic_tac_toe.cc b/scenarios/bargaining/open_spiel/open_spiel/games/ultimate_tic_tac_toe/ultimate_tic_tac_toe.cc new file mode 100644 index 0000000..92edf39 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/ultimate_tic_tac_toe/ultimate_tic_tac_toe.cc @@ -0,0 +1,238 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/ultimate_tic_tac_toe/ultimate_tic_tac_toe.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/games/tic_tac_toe/tic_tac_toe.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace ultimate_tic_tac_toe { +namespace { + +namespace ttt = tic_tac_toe; + +// Facts about the game. +const GameType kGameType{ + /*short_name=*/"ultimate_tic_tac_toe", + /*long_name=*/"Ultimate Tic-Tac-Toe", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters +}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new UltimateTTTGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); +} // namespace + +bool UltimateTTTState::AllLocalStatesTerminal() const { + return std::all_of( + local_states_.begin(), local_states_.end(), + [](const std::unique_ptr& state) { return state->IsTerminal(); }); +} + +void UltimateTTTState::DoApplyAction(Action move) { + if (current_state_ < 0) { + // Choosing a board. + SPIEL_CHECK_GE(move, 0); + SPIEL_CHECK_LT(move, ttt::kNumCells); + current_state_ = move; + } else { + // Apply action to local state, then apply that move. + SPIEL_CHECK_FALSE(local_states_[current_state_]->IsTerminal()); + local_states_[current_state_]->ApplyAction(move); + // Check if it's terminal and mark the outcome in the meta-game. + if (local_states_[current_state_]->IsTerminal()) { + Player local_outcome = local_state(current_state_)->outcome(); + if (local_outcome < 0) { + meta_board_[current_state_] = ttt::CellState::kEmpty; + } else { + meta_board_[current_state_] = ttt::PlayerToState(local_outcome); + } + } + // Set the next potential local state. + current_state_ = move; + // Check for a win or no more moves in the meta-game. + if (ttt::BoardHasLine(meta_board_, current_player_)) { + outcome_ = current_player_; + } else if (AllLocalStatesTerminal()) { + outcome_ = kInvalidPlayer; // draw. + } else { + // Does the next board done? If not, set current_state_ to less than 0 to + // indicate that the next board is a choice. + if (local_states_[current_state_]->IsTerminal()) { + current_state_ = -1; + } + current_player_ = NextPlayerRoundRobin(current_player_, ttt::kNumPlayers); + // Need to set the current player in the local board. + if (current_state_ >= 0) { + local_state(current_state_)->SetCurrentPlayer(current_player_); + } + } + } +} + +std::vector UltimateTTTState::LegalActions() const { + if (IsTerminal()) return {}; + if (current_state_ < 0) { + std::vector actions; + // Choosing the next state to play: any one that is not finished. + for (int i = 0; i < local_states_.size(); ++i) { + if (!local_states_[i]->IsTerminal()) { + actions.push_back(i); + } + } + return actions; + } else { + return local_states_[current_state_]->LegalActions(); + } +} + +std::string UltimateTTTState::ActionToString(Player player, + Action action_id) const { + if (current_state_ < 0) { + return absl::StrCat("Choose local board ", action_id); + } else { + return absl::StrCat( + "Local board ", current_state_, ": ", + local_states_[current_state_]->ActionToString(player, action_id)); + } +} + +UltimateTTTState::UltimateTTTState(std::shared_ptr game) + : State(game), + ttt_game_( + static_cast(game.get())->TicTacToeGame()), + current_state_(-1) { + std::fill(meta_board_.begin(), meta_board_.end(), ttt::CellState::kEmpty); + for (int i = 0; i < ttt::kNumCells; ++i) { + local_states_[i] = ttt_game_->NewInitialState(); + } +} + +std::string UltimateTTTState::ToString() const { + std::string str; + const int rows = ttt::kNumRows * 3; + const int cols = ttt::kNumCols * 3; + int meta_row = 0; + int meta_col = 0; + for (int r = 0; r < rows; ++r) { + meta_row = r / 3; + int local_row = r % 3; + for (int c = 0; c < cols; ++c) { + meta_col = c / 3; + int local_col = c % 3; + int state_idx = meta_row * 3 + meta_col; + SPIEL_CHECK_GE(state_idx, 0); + SPIEL_CHECK_LT(state_idx, local_states_.size()); + absl::StrAppend(&str, ttt::StateToString(local_state(state_idx)->BoardAt( + local_row, local_col))); + if (local_col == 2) { + absl::StrAppend(&str, c == 8 ? "\n" : " "); + } + if (local_row == 2 && r < 8 && c == 8) { + absl::StrAppend(&str, "\n"); + } + } + } + return str; +} + +bool UltimateTTTState::IsTerminal() const { return outcome_ != kUnfinished; } + +std::vector UltimateTTTState::Returns() const { + std::vector returns = {0.0, 0.0}; + if (outcome_ >= 0) { + returns[outcome_] = 1.0; + returns[1 - outcome_] = -1.0; + } + return returns; +} + +std::string UltimateTTTState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string UltimateTTTState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +void UltimateTTTState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + // Treat `values` as a 3-d tensor: 3 x 9 x 9: + // - empty versus x versus o, then + // - local state index, then + // - then 3x3 position within the local board. + TensorView<3> view(values, {ttt::kCellStates, ttt::kNumCells, ttt::kNumCells}, + /*reset*/true); + for (int state = 0; state < ttt::kNumCells; ++state) { + for (int cell = 0; cell < ttt::kNumCells; ++cell) { + view[{static_cast(local_state(state)->BoardAt(cell)), + state, cell}] = 1.0; + } + } +} + +void UltimateTTTState::UndoAction(Player player, Action move) {} + +UltimateTTTState::UltimateTTTState(const UltimateTTTState& other) + : State(other), + current_player_(other.current_player_), + outcome_(other.outcome_), + ttt_game_(other.ttt_game_), + current_state_(other.current_state_) { + for (int i = 0; i < ttt::kNumCells; ++i) { + meta_board_[i] = other.meta_board_[i]; + local_states_[i] = other.local_states_[i]->Clone(); + } +} + +std::unique_ptr UltimateTTTState::Clone() const { + return std::unique_ptr(new UltimateTTTState(*this)); +} + +UltimateTTTGame::UltimateTTTGame(const GameParameters& params) + : Game(kGameType, params), ttt_game_(LoadGame("tic_tac_toe")) {} + +} // namespace ultimate_tic_tac_toe +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/ultimate_tic_tac_toe/ultimate_tic_tac_toe.h b/scenarios/bargaining/open_spiel/open_spiel/games/ultimate_tic_tac_toe/ultimate_tic_tac_toe.h new file mode 100644 index 0000000..e6571b3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/ultimate_tic_tac_toe/ultimate_tic_tac_toe.h @@ -0,0 +1,112 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_ULTIMATE_TIC_TAC_TOE_H_ +#define OPEN_SPIEL_GAMES_ULTIMATE_TIC_TAC_TOE_H_ + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/tic_tac_toe/tic_tac_toe.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +// A meta-game of Tic-Tac-Toe game played on 9 local boards: +// https://en.wikipedia.org/wiki/Ultimate_tic-tac-toe +// +// Parameters: none + +namespace open_spiel { +namespace ultimate_tic_tac_toe { + +constexpr const int kNumSubgames = 9; +constexpr Player kUnfinished = kInvalidPlayer - 1; + +// State of an in-play game. +class UltimateTTTState : public State { + public: + UltimateTTTState(std::shared_ptr game); + + UltimateTTTState(const UltimateTTTState& other); + UltimateTTTState& operator=(const UltimateTTTState&) = default; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : current_player_; + } + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + void UndoAction(Player player, Action move) override; + std::vector LegalActions() const override; + void DoApplyAction(Action move) override; + + private: + tic_tac_toe::TicTacToeState* local_state(int idx) const { + return static_cast(local_states_[idx].get()); + } + bool AllLocalStatesTerminal() const; + + Player current_player_ = 0; // Player zero goes first + Player outcome_ = kUnfinished; + + // The tic-tac-toe subgames, arranged in the same order as moves. + const tic_tac_toe::TicTacToeGame* ttt_game_; + std::array, tic_tac_toe::kNumCells> local_states_; + std::array meta_board_; + int current_state_; +}; + +// Game object. +class UltimateTTTGame : public Game { + public: + explicit UltimateTTTGame(const GameParameters& params); + int NumDistinctActions() const override { return tic_tac_toe::kNumCells; } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr(new UltimateTTTState(shared_from_this())); + } + int NumPlayers() const override { return tic_tac_toe::kNumPlayers; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + std::vector ObservationTensorShape() const override { + return {tic_tac_toe::kCellStates, tic_tac_toe::kNumCells, + tic_tac_toe::kNumRows, tic_tac_toe::kNumCols}; + } + int MaxGameLength() const override { + return ttt_game_->MaxGameLength() * kNumSubgames * 2; + } + + const tic_tac_toe::TicTacToeGame* TicTacToeGame() const { + return static_cast(ttt_game_.get()); + } + + private: + std::shared_ptr ttt_game_; +}; + +} // namespace ultimate_tic_tac_toe +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_ULTIMATE_TIC_TAC_TOE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/ultimate_tic_tac_toe/ultimate_tic_tac_toe_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/ultimate_tic_tac_toe/ultimate_tic_tac_toe_test.cc new file mode 100644 index 0000000..23c1172 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/ultimate_tic_tac_toe/ultimate_tic_tac_toe_test.cc @@ -0,0 +1,36 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace ultimate_tic_tac_toe { +namespace { + +namespace testing = open_spiel::testing; + +void BasicUltimateTicTacToeTests() { + testing::LoadGameTest("ultimate_tic_tac_toe"); + testing::NoChanceOutcomesTest(*LoadGame("ultimate_tic_tac_toe")); + testing::RandomSimTest(*LoadGame("ultimate_tic_tac_toe"), 100); +} + +} // namespace +} // namespace ultimate_tic_tac_toe +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::ultimate_tic_tac_toe::BasicUltimateTicTacToeTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/.gitignore b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/.gitignore new file mode 100644 index 0000000..b6f9e76 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/.gitignore @@ -0,0 +1 @@ +acpc/ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/CMakeLists.txt new file mode 100644 index 0000000..6618eb8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/CMakeLists.txt @@ -0,0 +1,46 @@ +set(HEADER_FILES + acpc_cpp/acpc_game.h + logic/card_set.h + logic/gamedef.h +) + +set(CLIB_FILES + acpc/project_acpc_server/game.h + acpc/project_acpc_server/game.cc + acpc/project_acpc_server/rng.h + acpc/project_acpc_server/rng.cc +) + +set(SOURCE_FILES + acpc_cpp/acpc_game.cc + logic/card_set.cc + logic/gamedef.cc +) + +add_library(universal_poker_clib OBJECT ${CLIB_FILES} ) +set_target_properties(universal_poker_clib PROPERTIES POSITION_INDEPENDENT_CODE ON) + +# The library contains header and source files. +add_library(universal_poker_lib OBJECT + ${SOURCE_FILES} + ${HEADER_FILES} +) + +add_executable(universal_poker_acpc_cpp_test acpc_cpp/acpc_game_test.cc + ${SOURCE_FILES} $ $) +target_link_libraries(universal_poker_acpc_cpp_test universal_poker_clib) + +add_test(universal_poker_acpc_cpp_test universal_poker_acpc_cpp_test) + +add_executable(universal_poker_card_set_test logic/card_set_test.cc + ${SOURCE_FILES} $ $) +target_link_libraries(universal_poker_card_set_test universal_poker_clib) + +add_test(universal_poker_card_set_test universal_poker_card_set_test) + + +add_executable(universal_poker_gamedef_test logic/gamedef_test.cc + ${SOURCE_FILES} $ $) +target_link_libraries(universal_poker_gamedef_test universal_poker_clib) + +add_test(universal_poker_gamedef_test universal_poker_gamedef_test) diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/README.md b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/README.md new file mode 100644 index 0000000..a9c2010 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/README.md @@ -0,0 +1,14 @@ +# Universal Poker support + +This has been contributed by dennisjay in November 2019 (See +https://github.com/deepmind/open_spiel/pull/97), and is available as an optional +dependency. See the [install.md](/docs/install.md) for documentation +and `open_spiel/scripts/global_variables.sh` to enable this. + +This is a wrapper around the Annual Computer Poker Competition bot (ACPC) +environment. See http://www.computerpokercompetition.org/. The code is initially +available at https://github.com/ethansbrown/acpc + +Thanks to dennisjay for contributing this to the community! If it's useful to +you, feel free to help supporting it. It has not been extensively reviewed or +tested by the DeepMind OpenSpiel team. diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/acpc_cpp/acpc_game.cc b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/acpc_cpp/acpc_game.cc new file mode 100644 index 0000000..c7b9c2e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/acpc_cpp/acpc_game.cc @@ -0,0 +1,298 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/universal_poker/acpc_cpp/acpc_game.h" + +#include +#include + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/games/universal_poker/acpc/project_acpc_server/game.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace universal_poker { +namespace acpc_cpp { + +constexpr int kStringBuffersize = 4096; + +namespace { +project_acpc_server::Action GetAction(ACPCState::ACPCActionType type, + int32_t size) { + project_acpc_server::Action acpc_action; + + acpc_action.size = size; + switch (type) { + case ACPCState::ACPC_CALL: + acpc_action.type = project_acpc_server::a_call; + break; + case ACPCState::ACPC_FOLD: + acpc_action.type = project_acpc_server::a_fold; + break; + case ACPCState::ACPC_RAISE: + acpc_action.type = project_acpc_server::a_raise; + break; + default: + acpc_action.type = project_acpc_server::a_invalid; + break; + } + return acpc_action; +} + +void readGameToStruct(const std::string &gameDef, + project_acpc_server::Game *acpc_game) { + char buf[kStringBuffersize]; + gameDef.copy(buf, kStringBuffersize); + + FILE *f = fmemopen(&buf, kStringBuffersize, "r"); + project_acpc_server::Game *game = project_acpc_server::readGame(f); + memcpy(acpc_game, game, sizeof(project_acpc_server::Game)); + + free(game); + fclose(f); +} + +} // namespace + +ACPCGame::ACPCGame(const std::string &gameDef) { + readGameToStruct(gameDef, &acpc_game_); +} + +ACPCGame::ACPCGame(const ACPCGame &other) + : handId_(other.handId_), acpc_game_(other.acpc_game_) {} + +// We compare the values for all the fields. For arrays, note that only the +// first `numPlayers` or `numRounds` values are meaningful, the rest being +// non-initialized. +bool ACPCGame::operator==(const ACPCGame &other) const { + // See project_acpc_server/game.h:42. 12 fields total. + // int32_t stack[ MAX_PLAYERS ]; + // int32_t blind[ MAX_PLAYERS ]; + // int32_t raiseSize[ MAX_ROUNDS ]; + // enum BettingType bettingType; + // uint8_t numPlayers; + // uint8_t numRounds; + // uint8_t firstPlayer[ MAX_ROUNDS ]; + // uint8_t maxRaises[ MAX_ROUNDS ]; + // uint8_t numSuits; + // uint8_t numRanks; + // uint8_t numHoleCards; + // uint8_t numBoardCards[ MAX_ROUNDS ]; + const project_acpc_server::Game *first = &acpc_game_; + const project_acpc_server::Game &second = other.Game(); + const int num_players = first->numPlayers; + const int num_rounds = first->numRounds; + + // We check for `raiseSize` only for limit betting. + if (first->bettingType != second.bettingType) { + return false; + } + if (first->bettingType == project_acpc_server::limitBetting) { + if (!std::equal(first->raiseSize, first->raiseSize + num_rounds, + second.raiseSize)) { + return false; + } + } + return ( // new line + first->numPlayers == second.numPlayers && + first->numRounds == second.numRounds && + std::equal(first->stack, first->stack + num_players, second.stack) && + std::equal(first->blind, first->blind + num_players, second.blind) && + std::equal(first->firstPlayer, first->firstPlayer + num_rounds, + second.firstPlayer) && + std::equal(first->maxRaises, first->maxRaises + num_rounds, + second.maxRaises) && + first->numSuits == second.numSuits && + first->numRanks == second.numRanks && + first->numHoleCards == second.numHoleCards && + std::equal(first->numBoardCards, first->numBoardCards + num_rounds, + second.numBoardCards)); +} + +std::string ACPCGame::ToString() const { + char buf[kStringBuffersize]; + memset(buf, 0, kStringBuffersize); + FILE *f = fmemopen(&buf, kStringBuffersize, "w"); + project_acpc_server::printGame(f, &acpc_game_); + std::ostringstream result; + rewind(f); + result << buf; + fclose(f); + return result.str(); +} + +uint8_t ACPCGame::GetNbBoardCardsRequired(uint8_t round) const { + SPIEL_CHECK_LT(round, acpc_game_.numRounds); + + uint8_t nbCards = 0; + for (int r = 0; r <= round; ++r) { + nbCards += acpc_game_.numBoardCards[r]; + } + return nbCards; +} + +uint8_t ACPCGame::GetTotalNbBoardCards() const { + uint8_t nbCards = 0; + for (int r = 0; r < acpc_game_.numRounds; ++r) { + nbCards += acpc_game_.numBoardCards[r]; + } + + return nbCards; +} + + +uint32_t ACPCGame::StackSize(uint8_t player) const { + SPIEL_CHECK_LE(0, player); + SPIEL_CHECK_LT(player, GetNbPlayers()); + return acpc_game_.stack[player]; +} + +uint32_t ACPCGame::BlindSize(uint8_t player) const { + SPIEL_CHECK_LE(0, player); + SPIEL_CHECK_LT(player, GetNbPlayers()); + return acpc_game_.blind[player]; +} +uint32_t ACPCGame::TotalMoney() const { + int money_pool = 0; + for (int pl = 0; pl < acpc_game_.numPlayers; ++pl) { + money_pool += acpc_game_.stack[pl]; + } + return money_pool; +} + +std::string ACPCState::ToString() const { + char buf[kStringBuffersize]; + project_acpc_server::printState(game_->MutableGame(), &acpcState_, + kStringBuffersize, buf); + std::ostringstream out; + + out << buf << std::endl << "Spent: ["; + for (int p = 0; p < game_->GetNbPlayers(); ++p) { + out << "P" << p << ": " << acpcState_.spent[p] << " "; + } + out << "]" << std::endl; + + return out.str(); +} + + +double ACPCState::ValueOfState(const uint8_t player) const { + SPIEL_CHECK_TRUE(stateFinished(&acpcState_)); + return project_acpc_server::valueOfState(game_->MutableGame(), &acpcState_, + player); +} +int ACPCState::RaiseIsValid(int32_t *minSize, int32_t *maxSize) const { + return raiseIsValid(game_->MutableGame(), &acpcState_, minSize, maxSize); +} + +uint8_t ACPCState::NumFolded() const { + return project_acpc_server::numFolded(game_->MutableGame(), &acpcState_); +} + +uint8_t ACPCState::CurrentPlayer() const { + return project_acpc_server::currentPlayer(game_->MutableGame(), &acpcState_); +} + +ACPCState::ACPCState(const ACPCGame *game) + // This is necessary as we need to value-initialize acpcState_. + : game_(game), acpcState_() { + project_acpc_server::initState(game_->MutableGame(), + game_->HandId() + /*TODO this make a unit test fail++*/, + &acpcState_); +} + +void ACPCState::DoAction(const ACPCState::ACPCActionType actionType, + const int32_t size) { + project_acpc_server::Action a = GetAction(actionType, size); + SPIEL_CHECK_TRUE(project_acpc_server::isValidAction(game_->MutableGame(), + &acpcState_, false, &a)); + project_acpc_server::doAction(game_->MutableGame(), &a, &acpcState_); +} + +int ACPCState::IsValidAction(const ACPCState::ACPCActionType actionType, + const int32_t size) const { + project_acpc_server::Action a = GetAction(actionType, size); + return project_acpc_server::isValidAction(game_->MutableGame(), &acpcState_, + false, &a); +} + +uint32_t ACPCState::Money(const uint8_t player) const { + SPIEL_CHECK_LE(player, game_->GetNbPlayers()); + return game_->StackSize(player) - acpcState_.spent[player]; +} + +uint32_t ACPCState::Ante(const uint8_t player) const { + SPIEL_CHECK_LE(player, game_->GetNbPlayers()); + return acpcState_.spent[player]; +} + +uint32_t ACPCState::TotalSpent() const { + return static_cast(absl::c_accumulate(acpcState_.spent, 0)); +} + +uint32_t ACPCState::CurrentSpent(const uint8_t player) const { + SPIEL_CHECK_LE(player, game_->GetNbPlayers()); + return acpcState_.spent[player]; +} + +std::string ACPCState::ActionToString( + const project_acpc_server::Action &action) const { + switch (action.type) { + case ACPCState::ACPC_CALL: + return "c"; + case ACPCState::ACPC_FOLD: + return "f"; + case ACPCState::ACPC_RAISE: + if (game_->IsLimitGame()) return "r"; + return absl::StrCat("r", action.size); + default: + SpielFatalError("Should never happen."); + } +} + +std::string ACPCState::BettingSequence(uint8_t round) const { + SPIEL_CHECK_LT(round, game_->NumRounds()); + std::string out; + for (int a = 0; a < acpcState_.numActions[round]; a++) { + absl::StrAppend(&out, ActionToString(acpcState_.action[round][a])); + } + return out; +} + +void ACPCState::SetHoleAndBoardCards(uint8_t holeCards[10][3], + uint8_t boardCards[7], + uint8_t nbHoleCards[10], + uint8_t nbBoardCards) { + for (int p = 0; p < game_->GetNbPlayers(); ++p) { + SPIEL_CHECK_EQ(nbHoleCards[p], game_->GetNbHoleCardsRequired()); + for (int c = 0; c < nbHoleCards[p]; ++c) { + acpcState_.holeCards[p][c] = holeCards[p][c]; + } + } + + SPIEL_CHECK_EQ(nbBoardCards, game_->GetNbBoardCardsRequired(GetRound())); + for (int c = 0; c < nbBoardCards; ++c) { + acpcState_.boardCards[c] = boardCards[c]; + } +} + +} // namespace acpc_cpp +} // namespace universal_poker +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/acpc_cpp/acpc_game.h b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/acpc_cpp/acpc_game.h new file mode 100644 index 0000000..2cfddbb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/acpc_cpp/acpc_game.h @@ -0,0 +1,161 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_ACPC_GAME_H +#define OPEN_SPIEL_ACPC_GAME_H + +#include +#include + +#include "open_spiel/abseil-cpp/absl/memory/memory.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/games/universal_poker/acpc/project_acpc_server/game.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace universal_poker { +namespace acpc_cpp { + +struct RawACPCGame : public ::project_acpc_server::Game {}; +struct RawACPCState : public ::project_acpc_server::State {}; +struct RawACPCAction : public ::project_acpc_server::Action {}; + +class ACPCGame { + public: + explicit ACPCGame(const std::string& gameDef); + explicit ACPCGame(const ACPCGame& other); + + std::string ToString() const; + + bool IsLimitGame() const { + return acpc_game_.bettingType == project_acpc_server::limitBetting; + } + + // The total number of betting rounds. + int NumRounds() const { return acpc_game_.numRounds; } + int GetNbPlayers() const { return acpc_game_.numPlayers; } + + // Returns the number of private cards for each player in this game. + uint8_t GetNbHoleCardsRequired() const { return acpc_game_.numHoleCards; } + uint8_t GetNbBoardCardsRequired(uint8_t round) const; + uint8_t NumSuitsDeck() const { return acpc_game_.numSuits; } + uint8_t NumRanksDeck() const { return acpc_game_.numRanks; } + uint8_t NumBoardCards(int round) const { + return acpc_game_.numBoardCards[round]; + } + uint32_t StackSize(uint8_t player) const; + // Returns the money amount that is used in the game (sum of all stacks). + uint32_t TotalMoney() const; + uint32_t BlindSize(uint8_t player) const; + uint8_t GetTotalNbBoardCards() const; + + // Accessors. + ::project_acpc_server::Game* MutableGame() const { return &acpc_game_; } + const project_acpc_server::Game& Game() const { return acpc_game_; } + uint32_t HandId() const { return handId_; } + absl::Span blinds() const { + return absl::Span(acpc_game_.blind, acpc_game_.numPlayers); + } + + // Checks that the underlying acpc_game_ structs have all their fields equal. + bool operator==(const ACPCGame& other) const; + bool operator!=(const ACPCGame& other) const { return !(*this == other); } + + private: + uint32_t handId_ = 0; + mutable project_acpc_server::Game acpc_game_; +}; + +class ACPCState { + public: + enum ACPCActionType { ACPC_FOLD, ACPC_CALL, ACPC_RAISE, ACPC_INVALID }; + + explicit ACPCState(const ACPCGame* game); + explicit ACPCState(const ACPCState& other) + : game_(other.game_), acpcState_(other.acpcState_) {} + + void SetHoleAndBoardCards(uint8_t holeCards[10][3], uint8_t boardCards[7], + uint8_t nbHoleCards[10], uint8_t nbBoardCards); + + // The current player is the first player in a new round, or the next player + // within a round. + uint8_t CurrentPlayer() const; + uint8_t NumFolded() const; + uint32_t Money(const uint8_t player) const; + uint32_t Ante(const uint8_t player) const; + uint32_t TotalSpent() const; + uint32_t CurrentSpent(const uint8_t player) const; + std::string ToString() const; + std::string BettingSequence(uint8_t round) const; + int RaiseIsValid(int32_t* minSize, int32_t* maxSize) const; + int IsValidAction(const ACPCActionType actionType, const int32_t size) const; + void DoAction(const ACPCActionType actionType, const int32_t size); + double ValueOfState(const uint8_t player) const; + + // Trivial methods. + bool IsFinished() const { return stateFinished(&acpcState_); } + uint32_t MaxSpend() const { return acpcState_.maxSpent; } + uint8_t hole_cards(int player_index, int card_index) const { + SPIEL_CHECK_LT(player_index, MAX_PLAYERS); + SPIEL_CHECK_LT(card_index, MAX_HOLE_CARDS); + return acpcState_.holeCards[player_index][card_index]; + } + uint8_t board_cards(int card_index) const { + SPIEL_CHECK_LT(card_index, MAX_BOARD_CARDS); + return acpcState_.boardCards[card_index]; + } + + void AddHoleCard(int player_index, int card_index, uint8_t card) { + SPIEL_CHECK_LT(player_index, MAX_PLAYERS); + SPIEL_CHECK_LT(card_index, MAX_HOLE_CARDS); + acpcState_.holeCards[player_index][card_index] = card; + } + + void AddBoardCard(int card_index, uint8_t card) { + SPIEL_CHECK_LT(card_index, MAX_BOARD_CARDS); + acpcState_.boardCards[card_index] = card; + } + + // Set the spent amounts uniformly for each player. + // Must be divisible by the number of players! + void SetPotSize(int pot_size) { + SPIEL_CHECK_GE(pot_size, 0); + SPIEL_CHECK_LE(pot_size, game_->TotalMoney()); + SPIEL_CHECK_EQ(pot_size % game_->GetNbPlayers(), 0); + const int num_players = game_->GetNbPlayers(); + for (int pl = 0; pl < num_players; ++pl) { + acpcState_.spent[pl] = pot_size / num_players; + } + } + + // Returns the current round 0-indexed round id (<= game.NumRounds() - 1). + // A showdown is still in game.NumRounds()-1, not a separate round + int GetRound() const { return acpcState_.round; } + + const ACPCGame* game() const { return game_; } + const RawACPCState& raw_state() const { return acpcState_; } + RawACPCState* mutable_state() { return &acpcState_; } + + private: + std::string ActionToString(const project_acpc_server::Action& action) const; + + const ACPCGame* game_; + mutable RawACPCState acpcState_; +}; + +} // namespace acpc_cpp +} // namespace universal_poker +} // namespace open_spiel + +#endif // OPEN_SPIEL_ACPC_GAME_H diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/acpc_cpp/acpc_game_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/acpc_cpp/acpc_game_test.cc new file mode 100644 index 0000000..d770c9a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/acpc_cpp/acpc_game_test.cc @@ -0,0 +1,66 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/universal_poker/acpc_cpp/acpc_game.h" + +#include + +namespace open_spiel { +namespace universal_poker { +namespace acpc_cpp { + +void BasicACPCTests() { + const std::string gameDesc( + "GAMEDEF\nnolimit\nnumPlayers = 2\nnumRounds = 2\nstack = 1200 " + "1200\nblind = 100 100\nfirstPlayer = 1 1\nnumSuits = 2\nnumRanks = " + "3\nnumHoleCards = 1\nnumBoardCards = 0 1\nEND GAMEDEF"); + + ACPCGame game(gameDesc); + ACPCState state(&game); + + std::cout << game.ToString() << std::endl; + std::cout << state.ToString() << std::endl; + + while (!state.IsFinished()) { + int32_t minRaise = 0, maxRaise = 0; + if (state.RaiseIsValid(&minRaise, &maxRaise)) { + minRaise = state.MaxSpend() > minRaise ? state.MaxSpend() : minRaise; + } + + const ACPCState::ACPCActionType available_actions[] = { + ACPCState::ACPC_CALL, ACPCState::ACPC_FOLD, + // ACPCState::ACPC_RAISE + }; + + for (const auto &action : available_actions) { + if (state.IsValidAction(action, 0)) { + state.DoAction(action, 0); + std::cout << state.ToString() << std::endl; + } + } + } + + std::cout << state.ValueOfState(0) << std::endl; + std::cout << state.ValueOfState(1) << std::endl; + + SPIEL_CHECK_EQ(game.TotalMoney(), 2400); +} + +} // namespace acpc_cpp +} // namespace universal_poker +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::universal_poker::acpc_cpp::BasicACPCTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/endgames/subgame1.txt b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/endgames/subgame1.txt new file mode 100644 index 0000000..8dd18f7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/endgames/subgame1.txt @@ -0,0 +1 @@ +0.5525608954951167 0.24590964455339098 0.5525608954951167 0.9124763766209736 0.0 0.0 0.0 0.9264348044747315 0.0 0.0 0.0 0.8450996933806387 0.0 0.0 0.0 0.8977677141904042 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.726217280803594 0.0 0.0 0.0 0.33951842878981914 0.0 0.0 0.0 0.3919041574944509 0.0 0.0 0.0 0.3542685103120463 0.0 0.0 0.0 0.3259446441819802 0.0 0.0 0.0 0.2966936340512656 0.6442531392076368 0.6679364194256714 0.6442531392076368 0.5579758972668625 0.506946624960345 0.8287839325285677 0.506946624960345 0.4110750665484937 0.6151636630466873 0.0 0.9122203003958169 0.0 0.0 0.0 0.926336299166054 0.0 0.0 0.0 0.8449814298475188 0.0 0.0 0.0 0.8978394863369471 0.0 0.0 0.0 0.6726865025599167 0.0 0.0 0.0 0.6935337413266147 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.38350101642933704 0.0 0.0 0.0 0.2972811288241088 0.0 0.0 0.0 0.5847575088479181 0.0 0.0 0.6671418458080351 0.6211379630797912 0.6293653400647091 0.6105706533055352 0.7586405131479691 0.5530091817809334 0.8709769343455055 0.7736923943287091 0.4110750665484937 0.0 0.0 0.9124763849419776 0.0 0.0 0.0 0.9264316385815408 0.0 0.0 0.0 0.8451149124219096 0.0 0.0 0.0 0.898382072500427 0.0 0.0 0.0 0.9161742157070223 0.0 0.0 0.0 0.898820723499246 0.0 0.0 0.0 0.621654504329188 0.0 0.0 0.0 0.927859954496664 0.0 0.0 0.0 0.8229762156299448 0.0 0.0 0.0 0.7979522137442466 0.0 0.6482809003096925 0.672866740284991 0.6469191042344109 0.672866740284991 0.6170419203488726 0.5052428311298301 0.47337878204924333 0.5052428311298301 0.0 0.0 0.0 0.9122203003958169 0.0 0.0 0.0 0.926336299166054 0.0 0.0 0.0 0.8449814298475188 0.0 0.0 0.0 0.8978394863369471 0.0 0.0 0.0 0.6726865025599167 0.0 0.0 0.0 0.6935337413266147 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.2972811288241088 0.0 0.0 0.0 0.5847575088479181 0.6671418458080351 0.6105706533055352 0.6293653400647091 0.6211379630797912 0.7586405131479691 0.7736923943287091 0.8709769343455055 0.5530091817809334 0.5551283979128921 0.6068980585298018 0.5551283979128921 0.673247922999667 0.08104491399707571 0.08135337445080341 0.08104491399707571 0.44416096440370667 0.39643289014241384 0.3969157500498022 0.39643289014241384 0.8197253826954503 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.7918178139887161 0.0 0.0 0.0 0.5975043743072632 0.0 0.0 0.0 0.36877964958195425 0.0 0.0 0.0 0.7640888678332038 0.0 0.0 0.0 0.7988626226124221 0.0036166617371704295 0.0036166617371704295 0.0036166617371704295 0.5453625972408512 0.9014924929252295 0.9160988692259575 0.9014924929252295 0.4846808653212705 0.7805472987167624 0.9293973074767413 0.7805472987167624 0.5934233669826264 0.5036750516086435 0.08131963128646295 0.673139910275715 0.08129744279031978 0.08118719420279848 0.39649397528424124 0.4433892197327073 0.3966954830511139 0.39531401174819286 0.0 0.7019687917798897 0.0 0.0 0.0 0.5312529292263071 0.0 0.0 0.0 0.48841036869694354 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.5953954249724867 0.0 0.0 0.0 0.6367508481858097 0.0 0.0 0.0036166617371704295 0.74405243257076 0.0036166617371704295 0.0036166617371704295 0.8846819341996027 0.40103434714008657 0.9057294843602655 0.9184485681965171 0.7993282170365553 0.47861767531321386 0.9068302372956956 0.8351287038252521 0.5934233669826264 0.08134296437213664 0.08110549593977409 0.6732597874273655 0.08110549593977409 0.3970364920030192 0.39629039609477157 0.4441719430856976 0.39629039609477157 0.0 0.0 0.8552304609327643 0.0 0.0 0.0 0.8754671318953239 0.0 0.0 0.0 0.9942281675844704 0.0 0.0 0.0 0.7589705833448419 0.0 0.0 0.0 0.966224876341838 0.0 0.0 0.0 0.8438942611880071 0.0 0.0036166617371704295 0.0036166617371704295 0.8550286797717733 0.0036166617371704295 0.8552711825320203 0.8757115747407556 0.5922936879920979 0.8757115747407556 0.7767313233204344 0.7570444575575919 0.4785055587491447 0.7570444575575919 0.08131963128646295 0.08118719420279848 0.08129744279031978 0.673139910275715 0.39649397528424124 0.39531401174819286 0.3966954830511139 0.4433892197327073 0.0 0.0 0.0 0.7019687917798897 0.0 0.0 0.0 0.5312529292263071 0.0 0.0 0.0 0.48841036869694354 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.6367508481858097 0.0036166617371704295 0.0036166617371704295 0.0036166617371704295 0.74405243257076 0.8846819341996027 0.9184485681965171 0.9057294843602655 0.40103434714008657 0.7993282170365553 0.8351287038252521 0.9068302372956956 0.47861767531321386 0.580456654893795 0.6123966593367348 0.580456654893795 1.2633069125272083E-4 0.9014387683151357 0.9004438812491171 0.9014387683151357 0.06462996132987675 0.5291079943073224 0.5303968857434687 0.5291079943073224 0.0 0.0 0.0 0.0 0.6369752394992232 0.0 0.0 0.0 0.7375225857879896 0.0 0.0 0.0 0.5942599032888408 0.0 0.0 0.0 0.8858159425760749 0.0 0.0 0.0 0.6740292015981141 0.5938247994413062 0.5956049992563403 0.5938247994413062 0.7069368568321888 0.9051955413647935 0.8938834045243916 0.9051955413647935 0.3504795729856201 0.9081565225739252 0.8864088582843048 0.9081565225739252 0.6040839023883876 0.6085481345607974 0.9018718241805073 1.2632185392600024E-4 0.901794864884057 0.8989139988185468 0.530081215569437 0.24045366218411304 0.5295669075865956 0.5289969760296138 0.0 0.79007436945147 0.0 0.0 0.0 0.21982500477645786 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.7399108920044069 0.0 0.0 0.0 0.8783562378253583 0.0 0.0 0.5938754103504611 0.5687359546938421 0.5932991083295371 0.5905001052461277 0.9074440161788324 0.6853210650461605 0.8825627327308357 0.8824854160561908 0.9497270210950659 0.3514908220142724 0.9250324888915921 0.9439117011667378 0.6040839023883876 0.9035974113379891 0.9016431418276734 1.2633069125272083E-4 0.9016431418276734 0.5303103507514576 0.529806894253349 0.3201230991630616 0.529806894253349 0.0 0.0 0.8725207758483315 0.0 0.0 0.0 0.8832703995032195 0.0 0.0 0.0 0.7766013461471992 0.0 0.0 0.0 0.881735713207197 0.0 0.0 0.0 0.9148032690587105 0.0 0.5951680247517703 0.5945215228832605 0.6944496303566151 0.5945215228832605 0.880287905553631 0.9015501105017045 0.7084605523529786 0.9015501105017045 0.9162971124476741 0.9117493841854675 0.2664855910992478 0.9117493841854675 0.9018718241805073 0.8989139988185468 0.901794864884057 1.2632185392600024E-4 0.530081215569437 0.5289969760296138 0.5295669075865956 0.24045366218411304 0.0 0.0 0.0 0.79007436945147 0.0 0.0 0.0 0.21982500477645786 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.8783562378253583 0.5938754103504611 0.5905001052461277 0.5932991083295371 0.5687359546938421 0.9074440161788324 0.8824854160561908 0.8825627327308357 0.6853210650461605 0.9497270210950659 0.9439117011667378 0.9250324888915921 0.3514908220142724 0.5226554557782492 0.5101801694175621 0.5226554557782492 0.06541842740905035 0.7377342380572879 0.5041121570358847 0.7377342380572879 0.0 0.5193527906548145 0.7158222397098223 0.5193527906548145 0.7340079149088354 0.09042133496914302 0.09138739372131414 0.09042133496914302 0.6067689422954504 0.0 0.0 0.0 0.5443120576826193 0.0 0.0 0.0 0.3623839187209585 0.09730962249533708 0.09716243366970648 0.09730962249533708 0.5906137615370669 0.9341947251143159 0.9455758393875298 0.9341947251143159 0.7574686692082899 0.8487549287649817 0.8594434747869035 0.8487549287649817 0.26062905676237486 0.9472543679674312 0.8769891653965683 0.9472543679674312 0.5223379565654668 0.5422393477170633 0.6430790072636817 0.05468279615404035 0.8004227768040819 0.6183740825764863 0.0 0.17526454374243408 0.700837241844725 0.5562145050966214 0.09484064198406804 0.5932504986953682 0.09716118228460424 0.08576731241906863 0.0 0.0 0.0 0.0 0.0 0.7346996048105201 0.0 0.0 0.09721789080690947 0.4074789571327503 0.09705719854693116 0.09708696729776245 0.9433526419107428 0.5447994089077229 0.9401785146033358 0.9394458723073391 0.7721490452847838 0.7539060171225981 0.7845046211058461 0.8393318009681334 0.9317640758930938 0.2535105237487563 0.9300013448015423 0.8558184599475775 0.5223379565654668 0.7823243182809807 0.5323996056730029 0.07778284867736628 0.5323996056730029 0.0 0.8087853684736297 0.20161949300281104 0.8087853684736297 0.09591735914616539 0.08298048483638443 0.8413419756159216 0.08298048483638443 0.0 0.0 0.8250007321203824 0.0 0.0 0.0 0.9785334894110201 0.0 0.09554061264026262 0.09711234524996484 0.9431692168446659 0.09711234524996484 0.9426854337695205 0.9414438760493733 0.6354982828756532 0.9414438760493733 0.91972278175616 0.7746621790537375 0.7659370000823373 0.7746621790537375 0.8363128793433722 0.8126086010265996 0.17675541391842614 0.8126086010265996 0.6430790072636817 0.6183740825764863 0.8004227768040819 0.05468279615404035 0.0 0.5562145050966214 0.700837241844725 0.17526454374243408 0.09484064198406804 0.08576731241906863 0.09716118228460424 0.5932504986953682 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.09721789080690947 0.09708696729776245 0.09705719854693116 0.4074789571327503 0.9433526419107428 0.9394458723073391 0.9401785146033358 0.5447994089077229 0.7721490452847838 0.8393318009681334 0.7845046211058461 0.7539060171225981 0.9317640758930938 0.8558184599475775 0.9300013448015423 0.2535105237487563 0.37308708179269934 0.42575501669445187 0.37308708179269934 0.0 0.7765746982347941 0.8082366840286843 0.7765746982347941 0.10798169484199746 0.6333947729770248 0.5789370056913133 0.6333947729770248 0.4207626129141185 0.0 0.5854357754648549 0.0 0.5315337376448723 0.4668634525604467 0.34701757097134506 0.0 0.4854321789122203 0.43379681502221223 0.423116972509803 0.43379681502221223 0.3002425642490638 0.830487117759815 0.842235604455535 0.830487117759815 0.660884242548865 0.600249056652088 0.6123754567273613 0.600249056652088 0.556520673329893 0.6120539848109533 0.6565398815612009 0.6120539848109533 0.45233094283160125 0.46562538013010524 0.0 0.05625504089208501 0.6976249480326261 0.7629829918437078 0.7108791049847881 0.1172636519827766 0.792242780233925 0.4502665678396533 0.7288629297722721 0.0 0.6932945808277481 0.0 0.4802355919214238 0.5884394407524866 0.4719716328827388 0.0 0.4358995725567663 0.46593106551655017 0.4280056762014627 0.4423286899209199 0.8869181492492992 0.31468697855437444 0.8702875037758636 0.8691143174860191 0.7677304766353454 0.6556587807265561 0.6260961537751059 0.6150302257389149 0.7635174802912773 0.8290037443900556 0.8605890596939418 0.9093650297003386 0.45233094283160125 0.0 0.8249843737255442 0.07250747793816177 0.8249843737255442 0.38224562654903627 0.6619094499067045 0.11093034492709467 0.6619094499067045 0.5815203836701988 0.0 0.3713387003243598 0.0 0.3914696753026496 0.43862900611497196 0.6010076954117861 0.0 0.43661777955588904 0.43555662269254686 0.8633230888281535 0.43555662269254686 0.8876095703951402 0.8091984242872075 0.6831867495559438 0.8091984242872075 0.6607563156175613 0.4682308349245091 0.6577515357150453 0.4682308349245091 0.803313598911915 0.8893142040266412 0.43906619805942426 0.8893142040266412 0.0 0.7629829918437078 0.6976249480326261 0.05625504089208501 0.7108791049847881 0.4502665678396533 0.792242780233925 0.1172636519827766 0.7288629297722721 0.0 0.6932945808277481 0.0 0.4802355919214238 0.44900369690158964 0.4719716328827388 0.0 0.4358995725567663 0.4423286899209199 0.4280056762014627 0.46593106551655017 0.8869181492492992 0.8691143174860191 0.8702875037758636 0.31468697855437444 0.7677304766353454 0.6150302257389149 0.6260961537751059 0.6556587807265561 0.7635174802912773 0.9093650297003386 0.8605890596939418 0.8290037443900556 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.294953942596855 0.33454949915949334 0.8244361694768799 0.09245688189404842 0.8056490953698531 0.7528051170155046 0.6385452422428998 0.0 0.7177097269118974 0.0 0.7010668084639141 0.38278843358059356 0.7768999165038907 0.0 0.7620802810574592 0.4492896478986082 0.8090283554175491 0.7837960646277351 0.8692562749520821 0.7007420274638332 0.6616677542687841 0.8085112783986722 0.640244383585996 0.6449538686919933 0.5411867460913176 0.4508751364274434 0.8372181483975912 0.6376069854699203 0.7310434210728322 0.5653349687262773 0.294953942596855 0.6667495343225361 0.6736290701572936 0.10734829545940848 0.6736290701572936 0.6006731097957855 0.0 0.2671092308086165 0.0 0.636627370099662 0.8122798954228713 0.40321786219588523 0.0 0.6635779761520664 0.8264157644508289 0.46013589552050055 0.8264157644508289 0.9111046597868844 0.7235378308862268 0.7692866678657219 0.7235378308862268 0.7595667267543684 0.7880957709090356 0.6510451014767713 0.7880957709090356 0.6488821011679529 0.8297024670416103 0.6443195923302995 0.8297024670416103 0.8244361694768799 0.7528051170155046 0.8056490953698531 0.09245688189404842 0.6385452422428998 0.0 0.7177097269118974 0.0 0.7010668084639141 0.7674526030190136 0.7768999165038907 0.0 0.7620802810574592 0.7837960646277351 0.8090283554175491 0.4492896478986082 0.8692562749520821 0.8085112783986722 0.6616677542687841 0.7007420274638332 0.640244383585996 0.4508751364274434 0.5411867460913176 0.6449538686919933 0.8372181483975912 0.5653349687262773 0.7310434210728322 0.6376069854699203 0.24776885486953013 0.16490370315242542 0.24776885486953013 0.2993621577469265 0.0 0.750591096514096 0.0 0.39015259919851975 0.7478462994212495 0.4271709239954803 0.0 0.49156041404886613 0.6730827342351957 0.6556379156094474 0.6730827342351957 0.6865288495329701 0.5828638933576002 0.5883784652605754 0.5828638933576002 0.6619324258229671 0.7999418644602444 0.8662370739952708 0.7999418644602444 0.30799416765005755 0.698958155173227 0.7472256045171143 0.698958155173227 0.19005451418476016 0.15008916526517402 0.7101203048218022 0.0 0.6582700677303515 0.0 0.28660951911826393 0.39765047552174754 0.7100091775485123 0.0 0.42383787247893334 0.538925993073571 0.6907634915418829 0.4303597640904859 0.5315168822499586 0.7078663512745565 0.4021553981529756 0.5813319282389549 0.8359467549578569 0.7011409366829152 0.7921386040368176 0.7639579150608699 0.7178140397919736 0.26704962573196095 0.8484633861584114 0.8439477856590323 0.19005451418476016 0.7009205118368523 0.0 0.23251751202499682 0.0 0.5042483482115668 0.6925527296348416 0.3901344480285987 0.0 0.6549546176057559 0.7398621829879379 0.5113397680756958 0.7398621829879379 0.46990344908596443 0.7476097307842237 0.5496738282218204 0.7476097307842237 0.4898715903402262 0.6356881339323114 0.6347108379903744 0.6356881339323114 0.8452687662504229 0.375729784498588 0.4253842121992838 0.375729784498588 0.7101203048218022 0.0 0.6582700677303515 0.0 0.28660951911826393 0.5052133289558414 0.7100091775485123 0.0 0.42383787247893334 0.4303597640904859 0.6907634915418829 0.538925993073571 0.5315168822499586 0.5813319282389549 0.4021553981529756 0.7078663512745565 0.8359467549578569 0.7639579150608699 0.7921386040368176 0.7011409366829152 0.7178140397919736 0.8439477856590323 0.8484633861584114 0.26704962573196095 0.0 0.048259407332347556 0.0 0.2322401562515017 0.5679200949140034 0.6431250315002368 0.0 0.4116595620771026 0.6145823598264816 0.6845971643554963 0.6145823598264816 0.5686440912072492 0.5179997243218553 0.6797787145806311 0.5179997243218553 0.5003362783900323 0.7482683301129192 0.6084019963196434 0.7482683301129192 0.14464547418653595 0.7847140462544443 0.42453636768405456 0.7847140462544443 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.4330968397795852 0.7010589882949357 0.26794702211654253 0.0 0.45560566958665616 0.5298469195341624 0.4026472117805922 0.5298469195341624 0.46307330226032783 0.7068576826567639 0.3754619367292203 0.7068576826567639 0.7430731045356209 0.5077870209160541 0.5365329637557726 0.5077870209160541 0.6640431829191347 0.5948311978656179 0.1412652846704614 0.5948311978656179 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.15776609256673357 0.7141569618636261 0.4566090172296867 0.7141569618636261 0.24797098283298225 0.6710070884639794 0.5379078928440517 0.6710070884639794 0.0 0.807363718023988 0.6484322372980367 0.807363718023988 0.0 0.6518276047688598 0.5398950721139152 0.6518276047688598 0.0 0.0 0.5439589399008395 0.1438815332018737 0.7547742924989546 0.5166412483511276 0.7242157345889833 0.2444463761362135 0.6386942859811994 0.8088069728970156 0.7130822302425825 0.0 0.793469513021582 0.7838007344322105 0.6847174780106294 0.0 0.3467722283328491 0.4540481895811133 0.0 0.4195437269465905 0.7791393305422254 0.16517195904702625 0.7791393305422254 0.6633824228938716 0.44341102837725355 0.2012376825874073 0.44341102837725355 0.6049959745462054 0.702487197665575 0.0 0.702487197665575 0.463459008825909 0.7038950752050229 0.0 0.7038950752050229 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.8311259937072378 0.7082613205729371 0.8311259937072378 0.0 0.5817519588769989 0.6911832683064522 0.5817519588769989 0.0 0.2672400321092482 0.3028346230721769 0.2672400321092482 0.0 0.0 0.6954179341636709 0.0 0.8350505924376607 0.8048003516116676 0.6216821380423359 0.0 0.6300374198920086 0.6410032033135608 0.28171940198181894 0.0 0.5072734618392628 0.43610567750335305 0.0 0.8223230000135193 0.6911089245556278 0.0 0.6911089245556278 0.5466994801575938 0.6892098753522389 0.0 0.6892098753522389 0.5494339940218048 0.45245907786481426 0.0 0.45245907786481426 0.6954179341636709 0.8048003516116676 0.8350505924376607 0.0 0.6216821380423359 0.6410032033135608 0.6300374198920086 0.0 0.28171940198181894 0.43610567750335305 0.5072734618392628 0.0 0.0 0.0 0.0 0.0 0.5062456830352193 0.428434393497102 0.5062456830352193 0.0 0.0336440798056558 0.10945858886525058 0.0336440798056558 0.0 0.0 0.2951623669744601 0.0 0.5355375156440774 0.45484204901313463 0.07973511741493237 0.0 0.07260494626019737 0.06489710015270407 0.0 0.45859093697138725 0.5612570008366002 0.0 0.5612570008366002 0.0794918391947024 0.07314191296075151 0.0 0.07314191296075151 0.2951623669744601 0.45484204901313463 0.5355375156440774 0.0 0.07973511741493237 0.06489710015270407 0.07260494626019737 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.059361161905104036 0.30270801035193867 0.059361161905104036 0.13102685567098393 0.0 0.0 0.0 0.09331522637576187 0.0 0.0 0.0 0.20673672006527902 0.0 0.0 0.0 0.0011855177667294014 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.30774537524334905 0.0 0.0 0.0 0.3644666945780456 0.0 0.0 0.0 0.1800715937165477 0.0 0.0 0.0 0.07485224779555828 0.0 0.0 0.0 0.4964731393188604 0.20556111263751964 0.4018069977586308 0.20556111263751964 0.2742556564389471 0.010474650546241852 0.38496890248788485 0.010474650546241852 0.9396725013025091 0.21127968683578652 0.8217169859301493 0.21127968683578652 0.10980483766416634 0.6005700519444515 0.0 0.7365740423154356 0.0 0.0 0.0 0.86928241420151 0.0 0.0 0.0 0.8504853613940125 0.0 0.0 0.0 0.2735414540786856 0.0 0.0 0.0 0.09888472590337141 0.0 0.0 0.0 0.0013389410486650058 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.5350284846866369 0.0 0.0 0.0 0.3519920094225817 0.0 0.0 0.12671880651390055 0.8337155834909791 0.6363995996514344 0.24462422984690288 0.034795141403405026 0.5851550010928771 0.36706754988017165 0.009924467192810612 0.07917770640446505 0.7899600208593064 0.5204240806482334 0.4100194077740333 0.10980483766416634 0.0 0.0 0.9051122560657258 0.0 0.0 0.0 0.977657939275685 0.0 0.0 0.0 0.9923291010762604 0.0 0.0 0.0 0.7549419383219433 0.0 0.0 0.0 0.9511142910659853 0.0 0.0 0.0 0.00198738787926017 0.0 0.0 0.0 0.2607338093501864 0.0 0.0 0.0 0.1675671307118112 0.0 0.0 0.0 0.23399344042413325 0.0 0.3717289944953015 0.4692363542064527 0.7113324883912093 0.4692363542064527 0.31659444779649926 0.31385245260336153 0.590654125682244 0.31385245260336153 0.5539294841675498 0.6798388644189598 0.0538624829964348 0.6798388644189598 0.0 0.0 0.0 0.7365740423154356 0.0 0.0 0.0 0.86928241420151 0.0 0.0 0.0 0.8504853613940125 0.0 0.0 0.0 0.2735414540786856 0.0 0.0 0.0 0.09888472590337141 0.0 0.0 0.0 0.0013389410486650058 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.3519920094225817 0.12671880651390055 0.24462422984690288 0.6363995996514344 0.8337155834909791 0.034795141403405026 0.009924467192810612 0.36706754988017165 0.5851550010928771 0.07917770640446505 0.4100194077740333 0.5204240806482334 0.7899600208593064 0.8671916389925637 0.3938920681430454 0.8671916389925637 0.4034609168379725 0.2430964956272526 0.5561793122410112 0.2430964956272526 0.055697112102801184 0.6284001097775743 0.52754982083684 0.6284001097775743 0.5458219534098482 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.03762304260801498 0.0 0.0 0.0 0.8237022522749506 0.0 0.0 0.0 0.07929621820640917 0.0 0.0 0.0 0.7644483251943905 0.02637096010054343 0.2673668831232763 0.02637096010054343 0.10200748726816712 0.352722071928804 0.9276511304177232 0.352722071928804 0.6421284979423304 0.2092321589560551 0.9035758010563357 0.2092321589560551 0.9932757010665242 0.911815700401006 0.9274965558799799 0.911815700401006 0.7003745400132462 0.8520197570544651 0.5283359833908012 0.48517096175494495 0.5556501362923151 0.5112852249935865 0.025931889543435647 0.2198022349175177 0.6489170989101893 0.1078154581319325 0.0 0.48236169930602735 0.0 0.0 0.0 0.615887618859225 0.0 0.0 0.0 7.602595172538558E-5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.2017236296410641 0.0 0.0 0.10543442436390042 0.7965512587902593 0.2886260699259934 0.3336295684455069 0.18297911887918483 0.8544573754496189 0.700964633550075 0.04669176494241784 0.11287442832263275 0.9499559492411269 0.7134627292618672 0.04765105720602089 0.970778931058662 0.9898766700401449 0.9872654614607017 0.9314628567585488 0.7003745400132462 0.5576765355414117 0.3869886327586522 0.9780371746524009 0.3869886327586522 0.5970843947577252 0.8335494815301061 0.7451407849892288 0.8335494815301061 0.0 0.0 0.8124103214534611 0.0 0.0 0.0 0.7932013278157515 0.0 0.0 0.0 0.37589446903731205 0.0 0.0 0.0 0.4304008033138593 0.0 0.0 0.0 0.25543410721405574 0.0 0.3388418133962239 0.11915853916769008 0.3446734267174857 0.11915853916769008 0.8271264413474372 0.12227095595835137 0.8206182735085629 0.12227095595835137 0.9101822421643725 0.1773320866287897 0.736865717559316 0.1773320866287897 0.7774966486596417 0.8993155313460507 0.9988518149848017 0.8993155313460507 0.5283359833908012 0.5112852249935865 0.5556501362923151 0.48517096175494495 0.025931889543435647 0.1078154581319325 0.6489170989101893 0.2198022349175177 0.0 0.0 0.0 0.48236169930602735 0.0 0.0 0.0 0.615887618859225 0.0 0.0 0.0 7.602595172538558E-5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.10543442436390042 0.3336295684455069 0.2886260699259934 0.7965512587902593 0.18297911887918483 0.04669176494241784 0.700964633550075 0.8544573754496189 0.11287442832263275 0.04765105720602089 0.7134627292618672 0.9499559492411269 0.970778931058662 0.9314628567585488 0.9872654614607017 0.9898766700401449 0.9861184293003498 0.9929567586138688 0.9861184293003498 0.8851245425986796 0.32200801971069387 0.17891998733153294 0.32200801971069387 0.5437651279515336 0.027358455304446402 0.7743343306707602 0.027358455304446402 0.0 0.8284709797749551 0.7947281618388031 0.8284709797749551 0.898674411532567 0.0 0.0 0.0 0.1472457490060201 0.0 0.0 0.0 0.4317013206758494 0.03486364455137938 0.3345367407534963 0.0 0.5373206427944996 0.22991015183092928 0.6972298422614722 0.22991015183092928 0.7692966823979729 0.5574169378488381 0.9445503070685227 0.5574169378488381 0.9777551005973357 0.9150082811284528 0.9720496068781211 0.9150082811284528 1.0 0.9934257333899119 0.993354716485492 0.9934257333899119 0.9945667016421016 0.9992323676895883 0.1665678704006062 0.48735655684202445 0.28947348239902326 0.25108371099755444 0.22345923314173932 0.5044924694610715 0.3212415208921354 0.0660459410540735 0.0 0.4430257864817889 0.7384886557005637 0.8026448082823826 0.0 0.010439775411764461 0.0 0.0 0.0 0.0 0.0 0.0 0.15892670886973623 0.2317277377591514 0.22976542682574896 0.0 0.4078408537685484 0.6187276019128536 0.3661582109718119 0.22710200703055863 0.6488843193896349 0.8443476098633154 0.8780021886157293 0.6095671138683394 0.9435184703764881 0.9782337452235305 0.8963714456102001 0.7432661714885717 0.9955634137552685 0.9944533724137273 0.993122133338296 0.9878946713139715 0.9945667016421016 0.4957606744882479 0.8460794106234898 0.9550956541307741 0.8460794106234898 0.40659923007265003 0.3033032213938502 0.9345482068741717 0.3033032213938502 0.0 0.3321401648355141 0.7975524853385001 0.3321401648355141 0.0 0.0 0.9071735749785516 0.0 0.0 0.0 0.20062399091289038 0.0 0.18047999305419102 0.07689791039465992 0.8786069799084181 0.0 0.8062494560225322 0.5685223722115237 0.47931318829148106 0.5685223722115237 0.9551050726370311 0.9194032925770096 0.9693086914191168 0.9194032925770096 0.9352400557665184 0.9065520855726033 0.9997154026385014 0.9065520855726033 0.9795573273459615 0.9829830046880716 1.0 0.9829830046880716 0.1665678704006062 0.25108371099755444 0.28947348239902326 0.48735655684202445 0.22345923314173932 0.0660459410540735 0.3212415208921354 0.5044924694610715 0.0 0.8026448082823826 0.7384886557005637 0.4430257864817889 0.0 0.0 0.0 0.010439775411764461 0.0 0.0 0.0 0.0 0.15892670886973623 0.03306266589022675 0.22976542682574896 0.0 0.4078408537685484 0.22710200703055863 0.3661582109718119 0.6187276019128536 0.6488843193896349 0.6095671138683394 0.8780021886157293 0.8443476098633154 0.9435184703764881 0.7432661714885717 0.8963714456102001 0.9782337452235305 0.9955634137552685 0.9878946713139715 0.993122133338296 0.9944533724137273 0.8041195055449174 0.6921056346137644 0.8041195055449174 0.2279721268716455 0.6146540702268232 0.7633298642489977 0.6146540702268232 0.0 0.25777445971362983 0.6900230561016754 0.25777445971362983 0.658956211019724 0.020139910222761036 0.2756072818940971 0.020139910222761036 0.11654534791400925 0.0 0.023958171856523113 0.0 0.47595298810997483 0.28406389342095734 0.7610790353550136 0.0 0.26164527046654185 0.12324039098322215 0.2830614982537833 0.12324039098322215 0.6775728052249175 0.01313266739292225 0.7412358405394737 0.01313266739292225 0.9882705784266249 0.7680689952995037 0.9437699817955116 0.7680689952995037 0.9805342089261806 0.8843404060033961 0.9933969719020269 0.8843404060033961 0.92647796755961 0.5496565903376227 0.9075769707939105 0.485870987511518 0.8674542925491383 0.7640182052882285 0.0 0.13447771510914197 0.8110000728418075 0.6107099210278273 0.7866311941882973 0.09351672147960627 0.6590846216147426 0.06997655230073091 0.5027124722917379 0.0 0.3278303869726039 0.0 0.7143933519752851 0.7893394954946583 0.5506737139141821 0.0 0.19570253997939094 0.6872352641817978 0.33376476277204153 0.19172831021756262 0.10620528973290935 0.6231981228435007 0.2987592785729243 0.02619969376165491 0.6383676951662605 0.952654633294495 0.9071274023809608 0.7938439749493663 0.9850565887872019 0.9541233991183197 0.993878051468849 0.9311668240483911 0.92647796755961 0.5678177161537644 0.7278078120412687 0.5697118493509973 0.7278078120412687 0.0 0.6217131542845944 0.7891562128925442 0.6217131542845944 0.31686237885848517 0.019690538015431282 0.861579059227494 0.019690538015431282 0.17477863557614592 0.0 0.182346572908005 0.0 0.5551184855341839 0.46827704714587404 0.812052063458144 0.0 0.6525681181951868 0.4529067866651579 0.30975738502163497 0.4529067866651579 0.5481256147463103 0.30959884474701455 0.3324769558713345 0.30959884474701455 0.8753637763928699 0.7849479424000874 0.9992742293645895 0.7849479424000874 0.9943502199789911 0.885933653989607 0.9995588953440704 0.885933653989607 0.9075769707939105 0.7640182052882285 0.8674542925491383 0.485870987511518 0.0 0.6107099210278273 0.8110000728418075 0.13447771510914197 0.7866311941882973 0.06997655230073091 0.6590846216147426 0.09351672147960627 0.5027124722917379 0.0 0.3278303869726039 0.0 0.7143933519752851 0.22732795751633744 0.5506737139141821 0.0 0.19570253997939094 0.19172831021756262 0.33376476277204153 0.6872352641817978 0.10620528973290935 0.02619969376165491 0.2987592785729243 0.6231981228435007 0.6383676951662605 0.7938439749493663 0.9071274023809608 0.952654633294495 0.9850565887872019 0.9311668240483911 0.993878051468849 0.9541233991183197 0.6466883702585505 0.6509493953758632 0.6466883702585505 0.0 0.6074261964514839 0.861839593293442 0.6074261964514839 0.09290164342235689 0.52852958437736 0.597893532922221 0.52852958437736 0.07180809517304612 0.0 0.6267780992996994 0.0 0.7097739539785801 0.16877110864723255 0.3086893089918471 0.0 0.5312426785976769 0.37003098123171835 0.30213720157061347 0.37003098123171835 0.5868920246133266 0.0016855638137293226 0.09413782631412945 0.0016855638137293226 0.40161278949580037 0.2727668124921527 0.9569954572089362 0.2727668124921527 0.7781859640828473 0.5359436817981316 0.9852819581340783 0.5359436817981316 0.5430768923804642 0.6736095809489465 0.0 0.06296205782655877 0.5953032694375648 0.826138220691131 0.619615893840473 0.01197365744737287 0.7145207732276979 0.08474428645096023 0.28761695088024375 0.0 0.6047057661134795 0.0 0.4181600002511472 0.6476575241756457 0.6286916304544203 0.0 0.01587793515067404 0.5666161187920059 0.13992891671187643 0.24824941792372077 0.007511762508067823 0.14880964851178505 0.30031961441129046 0.008133560469664047 0.5289006338300906 0.5417317640744167 0.578086640004993 0.2667056088809999 0.8540498813682791 0.8467328791632311 0.7028476916633144 0.8592057720054477 0.5430768923804642 0.0 0.6898443540803466 0.5179275236483508 0.6898443540803466 0.48937145574855956 0.2774533229714422 0.5925203624156454 0.2774533229714422 0.20109658627890942 0.0 0.07686486095773366 0.0 0.20795522456093643 0.5923619893161789 0.44324182556414415 0.0 0.4014159709095172 0.3840229052107011 0.16060087840759643 0.3840229052107011 0.6736457309258055 0.0933767743932329 0.019087310960409925 0.0933767743932329 0.7098995166641554 0.5355787764431762 0.9935468221777127 0.5355787764431762 0.8172647822655421 0.8013906487372391 0.9340972628353885 0.8013906487372391 0.0 0.826138220691131 0.5953032694375648 0.06296205782655877 0.619615893840473 0.08474428645096023 0.7145207732276979 0.01197365744737287 0.28761695088024375 0.0 0.6047057661134795 0.0 0.4181600002511472 0.478023555134838 0.6286916304544203 0.0 0.01587793515067404 0.24824941792372077 0.13992891671187643 0.5666161187920059 0.007511762508067823 0.008133560469664047 0.30031961441129046 0.14880964851178505 0.5289006338300906 0.2667056088809999 0.578086640004993 0.5417317640744167 0.8540498813682791 0.8592057720054477 0.7028476916633144 0.8467328791632311 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.02767216979196085 0.26212132766481816 0.8293763667340162 0.4137046855485467 0.5591085523895781 0.8945762459717601 0.4754690576126132 0.0 0.4423522464505298 0.0 0.8612190152245591 0.6593124644987123 0.6507192717463925 0.0 0.8008315953789193 0.33150498239408843 0.2977352904198963 0.9106992434333865 0.742514049774749 0.3889102283540669 0.5202975774198886 0.39444963835691166 0.6784583617887369 0.7114292546592182 0.024269160155800596 0.6360395258310827 0.16904722760657723 0.12735817046158648 0.09110664104090102 0.16787423344978608 0.02767216979196085 0.8022137075232638 0.4351206851815718 0.6338682801909568 0.4351206851815718 0.1272633207012968 0.0 0.24980684364282538 0.0 0.8990879155167549 0.6869003177851482 0.48220464710145455 0.0 0.9414685308891282 0.9556091291026025 0.2068059050174176 0.9556091291026025 0.6002463110860102 0.6469197174865248 0.2583992091736339 0.6469197174865248 0.7742699376222796 0.4979624972767524 0.2378280420434321 0.4979624972767524 0.3242052357015842 0.14898986669916728 0.003896343583871367 0.14898986669916728 0.8293763667340162 0.8945762459717601 0.5591085523895781 0.4137046855485467 0.4754690576126132 0.0 0.4423522464505298 0.0 0.8612190152245591 0.7443141257261592 0.6507192717463925 0.0 0.8008315953789193 0.9106992434333865 0.2977352904198963 0.33150498239408843 0.742514049774749 0.39444963835691166 0.5202975774198886 0.3889102283540669 0.6784583617887369 0.6360395258310827 0.024269160155800596 0.7114292546592182 0.16904722760657723 0.16787423344978608 0.09110664104090102 0.12735817046158648 0.27073005771907316 0.392073049120228 0.27073005771907316 0.03906350431920926 0.0 0.6624007959723129 0.0 0.7712500527557994 0.5186462766035829 0.5932213477620781 0.0 0.5383740982408418 0.3909757254108178 0.29785616631058376 0.3909757254108178 0.9367329050870057 0.45671612944072376 0.33691092822962526 0.45671612944072376 0.9633497689035533 0.8319046830071135 0.9533513140541767 0.8319046830071135 0.4203220897723303 0.5024503715477322 0.1252455124646061 0.5024503715477322 0.0027794459414020914 0.04965509826206457 0.6300558627593793 0.0 0.7909063256774628 0.0 0.2831401873670224 0.14489905993795146 0.19254974164085764 0.0 0.3560879504452742 0.2832567658287833 0.25075862764227996 0.1409584487682978 0.03634735921538955 0.08468427327467024 0.01661696542813769 0.04102150017895485 0.745181703936116 0.9436573438223806 0.22073304258947832 0.6090307717543281 0.15081364348123324 0.3892722330617528 0.21709783500887025 0.2882335680684095 0.0027794459414020914 0.45421653600015094 0.0 0.03619155799161273 0.0 0.3729798018507523 0.4840819283493447 0.020430670488655933 0.0 0.3735460360344929 0.09669674432633953 0.16235281234810706 0.09669674432633953 0.5626433408135683 0.44822614102466274 0.3276527168606022 0.44822614102466274 0.9680269306287518 0.9210131983825968 0.9980805020415048 0.9210131983825968 0.026672866769586637 0.059038488220202134 0.7522333354794539 0.059038488220202134 0.6300558627593793 0.0 0.7909063256774628 0.0 0.2831401873670224 0.5853985485181737 0.19254974164085764 0.0 0.3560879504452742 0.1409584487682978 0.25075862764227996 0.2832567658287833 0.03634735921538955 0.04102150017895485 0.01661696542813769 0.08468427327467024 0.745181703936116 0.6090307717543281 0.22073304258947832 0.9436573438223806 0.15081364348123324 0.2882335680684095 0.21709783500887025 0.3892722330617528 0.0 0.040056773106672806 0.0 0.18899003944979414 0.3702225427979121 0.321721720244313 0.0 0.04700996708498701 0.21970985380475067 0.03458257561098234 0.21970985380475067 0.12042047112633442 0.04759683053745659 0.2176607169956109 0.04759683053745659 0.09185172518035974 0.10740304199268016 0.07236658926744331 0.10740304199268016 0.01230383564455616 0.03341038344176251 0.17852981112558255 0.03341038344176251 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.5379117890806651 0.4239633423299756 0.006446756468793958 0.0 0.19601369825535322 0.4961045684795736 0.0014167443803118208 0.4961045684795736 0.3344386436707242 0.03630499767551837 0.016515353127053836 0.03630499767551837 0.44247493458422643 0.0783352810367438 0.011321609281530549 0.0783352810367438 0.06408888763145336 0.10998959584542325 0.07353708417010711 0.10998959584542325 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.21009149079175765 0.14793068969950218 0.0 0.11958199540631283 0.4696483244715779 0.44960944594576957 0.4696483244715779 0.8763586816813117 0.32222251478520786 0.2880166168595179 0.32222251478520786 0.7673665047009605 0.8889685969153129 0.6887922990576584 0.8889685969153129 0.32150742214544925 0.5507309188037869 0.29482613083208165 0.5507309188037869 0.18859532839724452 0.0 0.6864282971654123 0.04838856456963093 0.3894730770030132 0.11147820132809591 0.11174684837675433 0.265865765103202 0.11263649569864856 0.07185635244949733 0.6171022463907011 0.7084537985389525 0.9172989017323008 0.7966422514106779 0.5619730757677712 0.4457067137461563 0.5205611141306757 0.046061569225150784 0.0 0.5702101270233878 0.141181928327836 0.03265948919899676 0.141181928327836 0.5101402203483426 0.028589978139763602 0.6585855752778327 0.028589978139763602 0.9859714504004079 0.6234933345035916 0.9137457600367995 0.6234933345035916 0.9333649296730402 0.3499758881719692 0.007635986256135765 0.3499758881719692 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.31418273457401547 0.08335833048407394 0.31418273457401547 0.5785606415109561 0.09020738456120406 0.560931296074066 0.09020738456120406 0.6517908875933455 0.2773308304568199 0.7971839900253173 0.2773308304568199 0.06131215188600256 0.44721107218398887 0.4082415123226818 0.44721107218398887 0.18679845350286584 0.14555039540273212 0.42800559681216527 0.8258130143757243 0.17237121535730862 0.32839283600879215 0.7075346280056246 0.2357412814602641 0.6407040909224148 0.6679963404187208 0.12749931457312844 0.7792370583656433 0.4991406306724849 0.8889594065936893 0.18679845350286584 0.7394000925418827 0.6435873250767602 0.37291020684744447 0.6435873250767602 0.48151716771741254 0.7830478552401993 0.9963866287318575 0.7830478552401993 0.7747833859753638 0.24286454641547967 0.07188842416694555 0.24286454641547967 0.42800559681216527 0.32839283600879215 0.17237121535730862 0.8258130143757243 0.7075346280056246 0.6679963404187208 0.6407040909224148 0.2357412814602641 0.12749931457312844 0.8889594065936893 0.4991406306724849 0.7792370583656433 0.3623455097636269 0.5325559038762131 0.3623455097636269 0.2669208834698509 0.8125819261952927 0.8665733730243135 0.8125819261952927 0.684815958622579 0.6563120354274892 0.32969746872502587 0.6563120354274892 0.283454633160318 0.4744088130806406 0.8347197574863129 0.42845193884915594 0.8619037292517725 0.8262026597455996 0.5976140429215694 0.15547485977212008 0.22509621836539623 0.08913093576353502 0.283454633160318 0.9021339794461049 0.8606172763142257 0.9969276420853928 0.8606172763142257 0.5049871627632054 0.4585107130277986 0.04041456605012385 0.4585107130277986 0.8347197574863129 0.8262026597455996 0.8619037292517725 0.42845193884915594 0.5976140429215694 0.08913093576353502 0.22509621836539623 0.15547485977212008 0.8850027855032659 0.654581823433166 0.8850027855032659 7.142955015843552E-4 0.031186046497562292 3.579229552055493E-4 0.031186046497562292 0.9056116794065638 0.1599361362391543 0.009241320677712245 0.10448343900359075 0.17079641834999396 0.01930682104154293 0.9056116794065638 0.22304401187544332 0.21923548723941194 0.0 0.21923548723941194 0.009241320677712245 0.01930682104154293 0.17079641834999396 0.10448343900359075 0.9755233242204772 0.9700131826352253 0.9755233242204772 0.9757236104961685 0.9001662909123422 0.9757236104961685 diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/endgames/subgame2.txt b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/endgames/subgame2.txt new file mode 100644 index 0000000..f11f3ee --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/endgames/subgame2.txt @@ -0,0 +1 @@ +0.013162954623366939 0.0013859338862291686 0.0013859338862291686 0.0018990497151833035 0.0 0.0 0.0 8.807024899639285E-4 0.0 0.0 0.0 7.381219272791661E-4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 4.431722848074229E-4 0.0 0.0 0.0 0.005089707339481237 0.0 0.0 0.0 9.055471496846815E-4 7.605397585196832E-5 7.940144066566861E-7 7.940144066566861E-7 7.271094781406943E-4 0.0 5.514206285898443E-4 5.514206285898443E-4 0.03969793883570565 0.03969793883570565 0.0 0.0045113044225806006 0.0 0.0 0.0 0.0025905744000093776 0.0 0.0 0.0 0.0016514382726402505 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.762813570378492E-5 0.0 0.0 0.0 0.0022038573446818833 0.0 0.0 0.0 0.02672080427537551 0.0 0.0 2.125276413967039E-5 0.1144633162501609 8.623168147744433E-6 8.623168147744433E-6 2.4170663826197794E-4 0.0 2.0130948592112104E-4 2.0130948592112104E-4 0.00169443998705353 0.0 0.0 8.41346484695386E-4 0.0 0.0 0.0 7.212932028320972E-4 0.0 0.0 0.0 5.845871324325129E-4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 5.645418963616639E-6 0.0 0.0 0.0 9.211036375420018E-5 0.0 0.0 0.0 4.018712032754847E-4 0.0 5.346062136085426E-6 3.383126319634634E-4 2.9869086606490897E-4 1.0073861526226986E-5 7.573909985639924E-4 0.0 0.0039914621587935 3.977848451406536E-4 0.0 0.0 0.0 8.41346484695386E-4 0.0 0.0 0.0 7.212932028320972E-4 0.0 0.0 0.0 5.845871324325129E-4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 5.645418963616639E-6 0.0 0.0 0.0 9.211036375420018E-5 0.0 0.0 0.0 4.018712032754847E-4 5.346062136085426E-6 3.383126319634634E-4 1.0073861526226986E-5 2.9869086606490897E-4 7.573909985639924E-4 0.0 3.977848451406536E-4 0.0039914621587935 0.004070408774398315 6.605171908240684E-4 6.605171908240684E-4 0.010519997382676009 1.4821310394764204E-4 7.569753276704576E-5 7.569753276704576E-5 0.01587090544665329 6.641850599820839E-4 1.8680271947246803E-4 1.8680271947246803E-4 0.023007554932328155 0.0 0.0 0.0 1.4924470733797913E-4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0026177458054705387 0.0 0.0 0.0 0.00841155593155169 4.933253323901292E-4 4.448061794690301E-6 4.448061794690301E-6 0.0011180427101834638 4.298875314961441E-4 1.2278858835508195E-6 1.2278858835508195E-6 0.0012161274277296841 0.0 2.4979659970763673E-4 2.4979659970763673E-4 0.013142962048273824 0.013142962048273824 2.9680065244912105E-4 0.01886298073290687 2.595975128805721E-4 2.595975128805721E-4 4.78182374361219E-4 0.015796113589181187 9.174555813727472E-4 9.174555813727472E-4 0.0 0.0 0.0 0.0 0.0 3.0725493027994127E-4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0743460961862866E-6 0.0 0.0 0.0 0.009309269701711337 0.0 0.0 3.775850733557303E-5 0.007291813788104649 4.748278641454111E-5 4.748278641454111E-5 4.9185577798247945E-5 0.08044084288526794 1.065064030196458E-5 1.065064030196458E-5 1.7909628497387247E-4 0.0 1.1015254866582898E-4 1.1015254866582898E-4 7.157584220968101E-4 1.1457454636925553E-4 3.665096649206342E-4 0.0032779055014113363 6.728947827359031E-5 2.4403452308506521E-4 0.0012700379336845439 0.00539270935065167 1.2425082963667675E-4 0.0 0.0 0.00395194004867892 0.0 0.0 0.0 1.5317189836095126E-5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.398718248692723E-7 0.0 0.0 0.0 2.0410552739991687E-4 0.0 9.557638243552092E-6 2.0442704100569892E-4 0.0010286744353101122 8.962048167747622E-6 8.65869777877292E-6 8.377845559792394E-4 6.877811372301753E-4 5.868129328137527E-6 5.176102228179655E-4 0.0 0.003631565094392286 2.888624010533706E-4 1.1457454636925553E-4 3.665096649206342E-4 6.728947827359031E-5 0.0032779055014113363 2.4403452308506521E-4 0.0012700379336845439 1.2425082963667675E-4 0.00539270935065167 0.0 0.0 0.0 0.00395194004867892 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.398718248692723E-7 0.0 0.0 0.0 2.0410552739991687E-4 9.557638243552092E-6 2.0442704100569892E-4 8.962048167747622E-6 0.0010286744353101122 8.65869777877292E-6 8.377845559792394E-4 5.868129328137527E-6 6.877811372301753E-4 5.176102228179655E-4 0.0 2.888624010533706E-4 0.003631565094392286 0.0026686408932813294 3.8526018848750176E-4 3.8526018848750176E-4 0.012311912960603258 0.01622580919395167 0.001030455358478253 0.001030455358478253 0.06849905168386138 0.0 9.600112468621432E-4 9.600112468621432E-4 0.0026014805677781966 7.144927159001192E-7 1.093887500079962E-6 0.0 1.0178868025341032E-4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.502676436920194E-4 0.0 0.0 0.0 0.01019035928511196 0.0012096609549444865 2.065387681795039E-5 2.065387681795039E-5 4.5161293627960257E-4 0.0017399113820503465 4.4004133480532645E-5 4.4004133480532645E-5 0.0013309732399352332 0.0 8.057240407277322E-4 8.057240407277322E-4 0.008362803206465453 0.008362803206465453 0.004251264787890695 0.02945690826162865 0.01586444903137335 0.01586444903137335 9.122522098471415E-4 0.0 0.0025852413679402285 0.0025852413679402285 3.383389304900142E-6 0.001799162610015593 2.496636580475302E-6 0.0 0.0 2.5988251075120583E-4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.347268936078521E-4 0.0 0.0 0.0 0.0041298077438440784 0.0 0.0 1.813043861224249E-5 0.014134877812445363 6.110347448564731E-5 6.110347448564731E-5 1.6209648405274572E-4 0.0426632778213498 8.837187630385325E-5 8.837187630385325E-5 3.4197257060151717E-4 0.0 3.2039365488429987E-4 3.2039365488429987E-4 1.2525026647071543E-4 0.001638313634402546 0.013110387371363 0.013313125540891285 0.0021820089040716713 7.582280593316389E-4 0.0 0.015187245373249818 6.448694331914484E-4 1.3975951634855783E-6 8.79597243633016E-6 6.045996475540506E-4 0.0 0.0 0.0 5.319286646154877E-6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.578076909213005E-5 0.0 0.0 0.0 6.877738488482853E-5 0.0 4.8898238349998746E-5 0.0012605373910568433 0.0012172199640213964 4.094712692766179E-5 4.471757394775269E-5 0.006001023552687473 3.1013069066862276E-4 3.7050417389131856E-5 0.0012358073905684632 0.0 0.0024978971852873413 4.815159562316272E-4 0.001638313634402546 0.013110387371363 0.0021820089040716713 0.013313125540891285 7.582280593316389E-4 0.0 6.448694331914484E-4 0.015187245373249818 1.3975951634855783E-6 8.79597243633016E-6 1.2295237301051073E-8 0.0 0.0 0.0 0.0 5.319286646154877E-6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.578076909213005E-5 0.0 0.0 0.0 6.877738488482853E-5 4.8898238349998746E-5 0.0012605373910568433 4.094712692766179E-5 0.0012172199640213964 4.471757394775269E-5 0.006001023552687473 3.7050417389131856E-5 3.1013069066862276E-4 0.0012358073905684632 0.0 4.815159562316272E-4 0.0024978971852873413 0.008928640692256871 1.4201703673891896E-4 1.4201703673891896E-4 0.10745292639378458 0.0 0.0065618771827368975 0.0065618771827368975 0.008520290437220813 0.001214697891051796 2.9423638940734813E-4 0.0 0.0011399689738529255 1.5561261027630992E-6 6.022509958274927E-8 6.022509958274927E-8 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0012829602802347593 2.3573875608499494E-5 1.2801921400789425E-6 1.2801921400789425E-6 0.009429194509778404 3.2159455305024847E-4 3.284578617644041E-5 3.284578617644041E-5 4.0419060869663565E-4 0.0014171834494544643 3.8989586040843534E-5 3.8989586040843534E-5 8.35570134118501E-4 0.0 2.2978201556405413E-4 2.2978201556405413E-4 0.006046566908613821 0.006046566908613821 0.0037815489986059436 0.0 0.0054835300311716195 0.0054835300311716195 0.0028149761757431924 0.026025968513295302 0.0020779986482457507 0.0 2.20903739406914E-6 0.0014691611128703301 2.1055327111583386E-6 2.1055327111583386E-6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.3167176614488455E-5 0.0027200990109197834 4.540830640371044E-6 4.540830640371044E-6 1.1411404764489222E-4 0.01590313610947498 2.279079743734578E-5 2.279079743734578E-5 1.2662290178009475E-4 0.029135346331814947 1.849380200066699E-4 1.849380200066699E-4 2.3370645572704245E-4 0.0 9.738425473495417E-5 9.738425473495417E-5 7.100932286977386E-5 0.008029028059940614 0.0 0.014250860325418096 0.007692019299922394 3.067834524441776E-4 0.00774235820451637 0.0019530783148332211 0.0 4.575569813264909E-9 1.4315755765558003E-6 6.798121567896635E-4 1.9622909248480524E-8 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.1168319907432225E-6 3.650170406144821E-5 7.997868292230944E-5 6.668684230173623E-6 1.7875873189030702E-5 0.001066341184853297 0.001010896201592106 5.321629014319721E-5 1.275363979739119E-4 0.009839918215106925 2.188379073357979E-4 1.2098956546369151E-4 3.8925800596554963E-4 0.0 0.0025981838668428315 1.943503310020286E-4 0.008029028059940614 0.0 0.007692019299922394 0.014250860325418096 3.067834524441776E-4 0.00774235820451637 4.1515780516512954E-4 0.0 4.575569813264909E-9 1.4315755765558003E-6 1.9622909248480524E-8 6.798121567896635E-4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.1168319907432225E-6 3.650170406144821E-5 6.668684230173623E-6 7.997868292230944E-5 1.7875873189030702E-5 0.001066341184853297 5.321629014319721E-5 0.001010896201592106 1.275363979739119E-4 0.009839918215106925 1.2098956546369151E-4 2.188379073357979E-4 3.8925800596554963E-4 0.0 1.943503310020286E-4 0.0025981838668428315 0.0 0.0369304967280349 0.0369304967280349 0.12090531457023498 0.00858435241370427 0.0013985487377473369 0.0 0.09530514325755346 0.004814652411726085 0.004572575882804772 0.004572575882804772 0.023325470892538294 0.0020739741470349664 8.625493903501375E-4 8.625493903501375E-4 0.0 3.0574705028767115E-4 1.8805414310949458E-4 1.8805414310949458E-4 0.011231791346820742 5.044070727336046E-4 3.1625662345844096E-4 3.1625662345844096E-4 0.001160535233587525 0.0027544807844860765 0.0017869086233868539 0.0017869086233868539 6.539298893313405E-4 0.0018607589751995816 8.570188869173065E-4 8.570188869173065E-4 4.1853753740002705E-5 0.0 1.2007091709200727E-4 1.2007091709200727E-4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.11705078234595682 0.011176479747947207 0.01089429159812569 0.017583152321398846 0.0 0.0046423065284807085 0.00472544424823438 0.03477333397349315 0.0024646471453005944 2.934587062264051E-4 0.002523950235280538 0.004181516624637406 1.8124748629710834E-4 0.0 7.955867370258046E-4 0.02692914772276463 6.13960920109063E-4 7.455415043829692E-4 6.109637453068787E-4 0.009255263345455966 1.2223764069693016E-4 0.004958543997798872 0.007648046103305924 0.02680072100103056 0.0023126177846894765 0.005742447560622563 0.01288089812662857 0.007292382601323322 0.0015229758146321404 0.002163954213042846 0.0 9.834480272287928E-4 5.394990361286125E-4 0.011176479747947207 0.01089429159812569 0.0015008446033166316 0.0 0.0046423065284807085 0.00472544424823438 0.0024646471453005944 0.03477333397349315 2.934587062264051E-4 0.002523950235280538 1.8124748629710834E-4 0.004181516624637406 0.0 7.955867370258046E-4 6.13960920109063E-4 0.02692914772276463 7.455415043829692E-4 6.109637453068787E-4 1.2223764069693016E-4 0.009255263345455966 0.004958543997798872 0.007648046103305924 0.0023126177846894765 0.02680072100103056 0.005742447560622563 0.01288089812662857 0.0015229758146321404 0.007292382601323322 0.002163954213042846 0.0 5.394990361286125E-4 9.834480272287928E-4 0.002049005282348395 1.5745871666411265E-4 0.0 0.04150496932405784 0.009496910209544608 0.016746537586007127 0.016746537586007127 0.15263892917761643 0.004825975600114685 0.004812256545669803 0.004812256545669803 0.0 0.0033885210082149654 0.0019230003015935318 0.0019230003015935318 0.02769839625042435 0.0038696736075980727 8.004730608391894E-5 8.004730608391894E-5 0.011235858102128134 0.006055809180103055 6.401375167800093E-5 6.401375167800093E-5 1.8467454374647104E-4 0.007539640927375029 9.909261267714847E-5 9.909261267714847E-5 3.607771056611961E-4 0.0 2.4659510977998074E-4 2.4659510977998074E-4 0.00172512624106849 0.0 0.018758165071738348 0.025599608998519475 0.011510277342816488 0.011510277342816488 0.0022491360953495678 0.08846517146325787 0.002083974682543239 0.002083974682543239 0.0 0.009397188052991808 6.114775275192412E-4 6.114775275192412E-4 6.124737224613333E-4 0.041985310915940176 1.8049022767761643E-4 1.8049022767761643E-4 8.096633827777261E-4 0.028759388911832412 1.0547991433665107E-4 1.0547991433665107E-4 8.169597538950873E-4 0.006711024048540889 3.1761808094571186E-4 3.1761808094571186E-4 8.410019370443996E-5 0.0 1.1590366832753104E-4 1.1590366832753104E-4 0.0 0.012372961958565123 0.01631779226118129 0.0606519977053259 0.010880308662111404 0.005152779057844084 0.003541572587014423 0.03546013662187038 0.0020820331287103185 0.0 0.0018900238138652096 0.002458738542939346 0.0012942263438268358 1.590223195062017E-4 0.003618040535090969 0.0010590875902454782 8.417454079741691E-5 5.6002270522761964E-5 0.006970613243766042 6.724145526104325E-4 1.0940087046516127E-4 8.344571895366114E-5 0.012762079952595729 3.8014063330311616E-5 9.65065662550914E-5 3.763141705398595E-4 0.0 9.347097956688814E-4 2.4990790488358907E-4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0016013881077768415 1.1398047035747808E-4 1.1398047035747808E-4 0.02434355235957521 0.010054924711604993 0.016003552201956622 0.016003552201956622 0.0 0.008342001926139327 0.0017665697862294762 0.0017665697862294762 0.014044680034211162 0.017381272453274404 2.9029278009749934E-4 2.9029278009749934E-4 0.00961752087484276 0.017618566357052976 4.4785257289715685E-4 4.4785257289715685E-4 5.956910397199596E-6 0.0023548628987105306 1.1141196175739048E-4 1.1141196175739048E-4 0.004154976163667326 0.0 3.1326330289842676E-4 3.1326330289842676E-4 0.004190371807863095 0.004190371807863095 0.006569392885881379 0.039479128768312624 0.0073438632718194315 0.0073438632718194315 0.0 0.01445213204973558 0.0012952995054787714 0.0012952995054787714 0.0042196818992092785 0.07947620533785232 0.0020105425800139107 0.0020105425800139107 0.002185348309060415 0.01452122782044112 0.002186159412830932 0.002186159412830932 4.2379250931976284E-4 0.002797590526812135 2.860417335329459E-4 2.860417335329459E-4 1.1060901430322669E-4 0.0 3.4341380432616295E-4 3.4341380432616295E-4 3.701751507660728E-4 0.015984723070849482 0.004963947804517563 0.01283879010720267 0.007924236629444846 0.0 0.0035602216961107044 0.0035857798428085325 5.214302650433112E-4 5.276279359082327E-4 0.02402266566778069 0.0019886042376944615 4.8826640730562565E-4 4.7069558230050803E-4 0.01082153249665464 0.0016633194153910713 6.119217770943583E-4 1.175338114814726E-4 0.003141377109095206 1.0145645950266424E-6 1.0233915258556718E-4 3.675074166113791E-4 0.0 0.0034808509769385628 1.1926720848860397E-4 0.015984723070849482 0.004963947804517563 0.007924236629444846 0.01283879010720267 0.0 0.0035602216961107044 5.214302650433112E-4 0.0035857798428085325 5.276279359082327E-4 0.02402266566778069 4.8826640730562565E-4 0.0019886042376944615 4.7069558230050803E-4 0.01082153249665464 6.119217770943583E-4 0.0016633194153910713 1.175338114814726E-4 0.003141377109095206 1.0233915258556718E-4 1.0145645950266424E-6 3.675074166113791E-4 0.0 1.1926720848860397E-4 0.0034808509769385628 2.7664590094608507E-5 2.338028471109535E-5 2.338028471109535E-5 0.0 0.00909160385667593 0.001293681414330421 0.001293681414330421 0.00305688078785036 0.028719982202357365 0.001018591402180972 0.001018591402180972 0.005440141638973457 0.024542156546503966 6.959632755921783E-4 6.959632755921783E-4 2.739881336880445E-4 0.005327545289132185 1.26158140938082E-4 1.26158140938082E-4 0.023070190397931536 0.0 0.007867210789982843 0.007867210789982843 3.882966756065471E-4 3.882966756065471E-4 0.0 0.04056850298027765 0.0013020445345817131 0.0013020445345817131 0.0031593597501965473 0.00903513891067213 0.0028115493075308133 0.0028115493075308133 0.004731029162522831 0.012394768779646181 0.0029897108846251744 0.0029897108846251744 4.627845780935822E-4 0.04738599491418681 2.3555728395592234E-4 2.3555728395592234E-4 0.0027567076271477284 0.0 0.003911243522487026 0.003911243522487026 4.478947919361613E-5 0.0 0.006519821332680584 0.0012989865129889872 0.0017753590462245823 8.844358334323635E-4 0.02617224827308138 5.396738412866322E-4 8.91492368119987E-4 0.001032895611588533 0.02556522853711743 0.0013020271924315719 7.212121821602514E-4 1.0774964606130469E-4 0.012151893735996035 2.0215540996985337E-5 7.160853109592379E-5 0.011670324846786217 0.0 0.020473672387810334 0.004806263534810724 0.0 0.006519821332680584 0.0017753590462245823 0.0012989865129889872 8.844358334323635E-4 0.02617224827308138 8.91492368119987E-4 5.396738412866322E-4 0.001032895611588533 0.02556522853711743 7.212121821602514E-4 0.0013020271924315719 1.0774964606130469E-4 0.012151893735996035 7.160853109592379E-5 2.0215540996985337E-5 0.011670324846786217 0.0 0.004806263534810724 0.020473672387810334 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.029473555137406633 0.029473555137406633 0.0064453589885902175 0.012031309997564462 0.007245270190785675 0.007245270190785675 0.005994995474398642 0.007950796052126615 0.0038411483090221 0.0038411483090221 7.043244264249168E-4 0.0070805201816109675 2.5768870669151943E-4 2.5768870669151943E-4 0.025751451964220688 0.0 0.03479635810007082 0.03479635810007082 0.07363706680312676 0.001192377011000568 0.0018863813855146234 0.005538719939542791 0.0014372344065050023 0.012455650062318345 9.79591432128545E-4 0.011898683703934109 0.010622362405349821 0.0017939202195035196 0.001359756720087165 0.0080401734427412 0.0015923731939086064 0.04573106784072393 0.0 0.08248375966113787 0.02668677753623805 0.001192377011000568 0.0018863813855146234 0.0014372344065050023 0.005538719939542791 0.012455650062318345 9.79591432128545E-4 0.010622362405349821 0.011898683703934109 0.0017939202195035196 0.001359756720087165 0.0015923731939086064 0.0080401734427412 0.04573106784072393 0.0 0.02668677753623805 0.08248375966113787 0.0030905771477746403 0.0012284060404215607 0.0012284060404215607 0.11705590445609079 0.008028848053431221 0.008830531311979618 0.008830531311979618 0.007006654469966638 0.018851250021326902 0.011135292982967917 0.011135292982967917 0.02767816340881693 0.0 0.03408472527485679 0.03408472527485679 0.0018049637962013706 0.0018049637962013706 0.0021696249265200913 0.012049326305548272 0.0017497987214658566 0.0017497987214658566 0.019213584703673327 0.022613589134890727 0.008219308689277319 0.008219308689277319 0.02870582878231637 0.0 0.007151030455436747 0.007151030455436747 0.0011893507241194303 0.012166694791129399 0.00735673230701457 0.09092229913529852 0.012478229070899623 0.011784435575857954 0.024784442611034817 0.01605566555341304 0.003636568615785459 0.031835235989320754 0.0 0.01202237679360019 0.05635571506710765 0.012166694791129399 0.00735673230701457 0.012478229070899623 0.09092229913529852 0.011784435575857954 0.024784442611034817 0.003636568615785459 0.01605566555341304 0.031835235989320754 0.0 0.05635571506710765 0.01202237679360019 0.0024280635752832844 3.1685016801669147E-4 3.1685016801669147E-4 0.014530811590039607 0.02894899140473582 0.014352586444559149 0.014352586444559149 0.08519067074031772 0.0 0.08526515912824058 0.08526515912824058 0.0014512045820315195 0.0014512045820315195 0.030336859503732393 0.054561817051179066 0.02903325041525273 0.02903325041525273 0.07184830796310698 0.0 0.01754690167532505 0.01754690167532505 5.523692010640001E-4 0.017018816512578055 0.02633556694253703 0.023848035682572554 0.0021792430638792656 0.04496212565841443 0.0 0.05082726663068426 0.06828170396577056 0.017018816512578055 0.02633556694253703 0.0021792430638792656 0.023848035682572554 0.04496212565841443 0.0 0.06828170396577056 0.05082726663068426 4.477070112395659E-4 1.0541398361446923E-4 1.0541398361446923E-4 0.07892798395527038 0.0 0.0635834842422868 0.0635834842422868 4.7896412945604296E-4 4.7896412945604296E-4 0.02366125909661585 0.0 0.06162317218925164 0.06162317218925164 9.682947459550415E-5 0.06217578764555499 0.0 0.0322203379080506 0.01717801308107272 0.06217578764555499 0.0 0.01717801308107272 0.0322203379080506 0.0 6.089005983472657E-4 6.089005983472657E-4 0.0 0.0 8.111294296080886E-4 0.006518121807424181 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.9987978326110662 0.0 0.0 0.0 0.025269399656110126 0.025269399656110126 0.0 0.776773390481658 0.0 0.0 0.0 0.8487823664304514 0.0 0.0 0.0 0.9102566695909464 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.004799605952530547 0.0 0.0 0.0 0.9518699046989159 0.0 0.0 0.0 0.7540903445549025 0.0 0.0 0.0 0.7634076251364221 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.00366507597396238 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.9261038715319007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.00366507597396238 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.9261038715319007 4.6210757119842673E-4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.7965482206407568 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.995566342007517 0.0 0.09813760379330072 0.09813760379330072 0.002289068917403021 0.002289068917403021 0.0 0.8651486704931753 0.0 0.0 0.0 0.8483215610560069 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.21553330152403585 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.3208338376014194 0.0 0.0 0.0 0.8752493887564969 0.0 0.0 0.0 0.8087983012664307 0.0 0.0 0.0 0.8441948219040976 0.0 0.0 0.0825639932735922 0.0 0.1034683922933586 0.1034683922933586 1.7887861116391303E-4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.6497054379215274 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.29047321402253173 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.09856387463184063 0.0 0.9387724609676479 0.10116898949186577 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.6497054379215274 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.29047321402253173 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.09856387463184063 0.0 0.10116898949186577 0.9387724609676479 0.0 0.0 0.0 0.0 8.084327426052196E-7 0.0 0.0 0.9517279628321272 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.9312348044164905 0.0 0.34416845643887467 0.34416845643887467 0.007208558361913412 0.007208558361913412 0.0 0.7933438378187014 2.2528854816891054E-5 2.2528854816891054E-5 0.0 0.0 0.0 0.0 0.0 0.9385380609595045 0.0 0.0 0.0 0.711073943873144 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.4095576621389856 0.0 0.0 0.0 0.74319035112725 0.0 0.0 0.0 0.9032526531083006 0.0 0.0 0.0 0.8526405381613922 0.0 0.0 0.3447389326242102 0.0 0.3508182997713496 0.3508182997713496 0.0 2.326616580089249E-6 3.571540334776355E-7 0.0 1.312841213391879E-4 0.0 0.0 0.618605178685653 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.15870184734002532 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.30322190008993966 0.0 0.9450287479028364 0.29634250821904007 2.326616580089249E-6 3.571540334776355E-7 1.312841213391879E-4 0.0 0.0 0.0 0.0 0.618605178685653 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.15870184734002532 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.30322190008993966 0.0 0.29634250821904007 0.9450287479028364 0.016348966932319742 0.0 0.0 0.6633152863065496 0.0 0.11288660582876331 0.11288660582876331 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.907759679645124 0.0 0.5410274539861472 0.5410274539861472 0.0023609813525792707 0.0023609813525792707 0.07085324064366827 0.0 0.17776034867440452 0.17776034867440452 0.0 0.9878367296020606 0.0 0.0 0.0 0.8771508849559081 0.0 0.0 0.0 0.8825989050363116 0.0 0.0 0.0 0.40409648751545274 0.0 0.0 0.0 0.859505596306124 0.0 0.0 0.0 0.9606527632559543 0.0 0.0 0.0 0.9470963946972912 0.0 0.0 0.6178359141492888 0.0 0.6087301262015546 0.6087301262015546 0.0 0.012384082293523049 0.0 0.5078932883616798 0.027356419880956877 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.2389817895721579 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.5104856362092337 0.0 0.8195431263299763 0.5296101857010697 0.012384082293523049 0.0 0.027356419880956877 0.5078932883616798 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.2389817895721579 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.5104856362092337 0.0 0.5296101857010697 0.8195431263299763 0.0 0.10123736728315248 0.10123736728315248 0.7374958699619822 0.31335638412878514 0.03927630734361836 0.0 0.9575611705317942 0.0 0.0 0.0 0.8801686156958423 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.8855017162586241 0.0 0.0 0.0 0.9619575479974822 0.0 0.0 0.0 0.9945664515265018 0.0 0.0 0.0 0.9978892891047164 0.0 0.01495600950926362 0.01495600950926362 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.08662495148985196 0.08054564321856682 0.36539592864766307 0.017067965401314885 0.0 0.0 0.0 0.22373729026019612 0.0 0.0 0.0 0.08324781147155917 0.0 0.0 0.0 0.810656183832409 0.0 0.0 0.0 0.23357684999505218 0.0 0.0 0.0 0.20863981769725354 0.0 0.0 0.0 0.5698051830239955 0.0 0.015174208343466873 0.0 0.9914900448637449 0.014739250395121058 0.08054564321856682 0.36539592864766307 0.06730338578219731 0.0 0.0 0.0 0.0 0.22373729026019612 0.0 0.0 0.0 0.08324781147155917 0.0 0.0 0.0 0.810656183832409 0.0 0.0 0.0 0.23357684999505218 0.0 0.0 0.0 0.20863981769725354 0.0 0.0 0.0 0.5698051830239955 0.015174208343466873 0.0 0.014739250395121058 0.9914900448637449 0.05640320479571273 0.0 0.0 0.7274594206253537 0.3069518028276796 0.24978118127161805 0.24978118127161805 0.9605966878591062 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.9994280672669228 0.0 0.3819434507932269 0.3819434507932269 0.005882277901871444 0.0 0.36145534937244583 0.7238804741082958 0.32135652350272537 0.32135652350272537 0.0 0.9467787823547876 0.0 0.0 0.0 0.7236925736007271 0.0 0.0 0.0 0.9435620526835841 0.0 0.0 0.0 0.8714470949983879 0.0 0.0 0.0 0.9057911725722627 0.0 0.0 0.416736754649304 0.0 0.3975537554620971 0.3975537554620971 0.0 0.1290560667350262 0.29767835919862673 0.6335178517971215 0.16070486902362727 0.0 0.0 0.6696487816951299 0.0 0.0 0.0 0.447823040969809 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 8.326659496149854E-5 0.0 0.0 0.3801474457922201 0.0 0.9755160242094686 0.3876011286946202 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.16174422327278007 0.0 0.0 0.8957862179917638 0.361745693564716 0.3019293721960013 0.3019293721960013 0.0 0.19237487571415918 0.1632564966388117 0.1632564966388117 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.5763632622449338E-5 0.0 0.0 0.9910130464352247 0.0 0.7071952222943905 0.7071952222943905 0.1694344883067089 0.1694344883067089 0.3303503571705871 0.9153264431788785 0.3511826247689541 0.3511826247689541 0.0 0.39576362858800324 0.22613936211625424 0.22613936211625424 0.0 0.8995932719526807 0.0 0.0 0.0 0.9359375038588386 0.0 0.0 0.0 0.86731536307091 0.0 0.0 0.7395503543498394 0.0 0.7656764637024914 0.7656764637024914 0.0 0.18156727909669476 0.3255524768708672 0.6578515995621548 0.2166636484664774 0.0 0.17690074548408552 0.5075924797564777 0.166447105719839 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 9.9260390035855E-4 0.0 0.0 0.764632969531348 0.0 0.9772445799782944 0.6925308492598053 0.18156727909669476 0.3255524768708672 0.2166636484664774 0.6578515995621548 0.0 0.17690074548408552 0.166447105719839 0.5075924797564777 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 9.9260390035855E-4 0.0 0.0 0.764632969531348 0.0 0.6925308492598053 0.9772445799782944 0.15932426573385491 0.0 0.0 0.0 0.7088472086953806 0.29041247346725113 0.29041247346725113 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.45394186876037E-5 0.0 0.0 0.9909525947480842 0.0 0.6883609189013179 0.6883609189013179 0.06324298402981862 0.06324298402981862 0.0 0.2807230248987191 0.643330523341054 0.643330523341054 0.0 0.9640086775014901 0.0 0.0 0.0 0.8818232371685664 0.0 0.0 0.0 0.6437017588411117 0.0 0.0 0.8080868946422705 0.0 0.7971406976642578 0.7971406976642578 0.0 0.0 0.7545008102507361 0.5318617313227373 0.48774696567969383 4.570156229298513E-5 0.0 0.0 0.0 0.0 4.7310302221328034E-5 0.0 0.0 0.0 3.511909741409488E-4 0.0 0.0 0.7063699395211954 0.0 0.9054708729433181 0.6319153954333104 0.0 0.7545008102507361 0.48774696567969383 0.5318617313227373 4.570156229298513E-5 0.0 0.0 0.0 0.0 4.7310302221328034E-5 0.0 0.0 0.0 3.511909741409488E-4 0.0 0.0 0.7063699395211954 0.0 0.6319153954333104 0.9054708729433181 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.6030377645580149 0.6030377645580149 0.7990398335143989 0.903818479653945 0.1851293112958296 0.1851293112958296 0.47435287536008236 0.858289567360035 0.5172327919151268 0.5172327919151268 0.6633859954685298 0.9214504937415626 0.6195836130464559 0.6195836130464559 0.33438847462670707 0.0 0.2932332724452418 0.2932332724452418 0.5065676031559714 0.4285205693058891 0.661368010635573 0.19735924581886868 0.14093118808408234 0.11207684964026665 0.7505953159343582 0.67831444965803 0.14523547210067136 0.2862953933876217 0.815210835822669 0.2590855944212656 0.19595004057342186 0.4819946807075046 0.0 0.6131668881730844 0.4845241606330683 0.4285205693058891 0.661368010635573 0.14093118808408234 0.19735924581886868 0.11207684964026665 0.7505953159343582 0.14523547210067136 0.67831444965803 0.2862953933876217 0.815210835822669 0.19595004057342186 0.2590855944212656 0.4819946807075046 0.0 0.4845241606330683 0.6131668881730844 0.12377869279201001 0.00232719451710494 0.00232719451710494 0.9040918143429466 0.6077555674213002 0.28139162757331737 0.28139162757331737 0.6430465812707051 0.8393561764840847 0.19591896876718892 0.19591896876718892 0.9130887991919152 0.0 0.8141209214214178 0.8141209214214178 0.1231967689676434 0.1231967689676434 0.6244701203008074 0.8992714930757927 0.4924367122091012 0.4924367122091012 0.7639868875885066 0.5266640277485931 0.7748492096604706 0.7748492096604706 0.7479339121624485 0.0 0.7400097379406061 0.7400097379406061 0.016560999582451396 0.25454929990890773 0.6788028572204318 0.386773516664116 0.25652492309961533 0.3785097823042223 0.7821419625822816 0.4521268230410481 0.07817473445099317 0.8113579645544878 0.0 0.8870131565991606 0.7555239947946366 0.25454929990890773 0.6788028572204318 0.25652492309961533 0.386773516664116 0.3785097823042223 0.7821419625822816 0.07817473445099317 0.4521268230410481 0.8113579645544878 0.0 0.7555239947946366 0.8870131565991606 0.0 0.0 0.0 0.8291850045126983 0.7111597819680946 0.568952027933853 0.568952027933853 0.4154862645758469 0.0 0.7551708909371179 0.7551708909371179 0.0 0.0 0.7682830825964196 0.765903591157315 0.7218012426779039 0.7218012426779039 0.7115950018482342 0.0 0.7205162506814011 0.7205162506814011 0.0 0.43545991476916945 0.6952432766042747 0.03132867811147177 0.6109510472436065 0.753547514945941 0.0 0.399562997367802 0.7163226775728566 0.43545991476916945 0.6952432766042747 0.6109510472436065 0.03132867811147177 0.753547514945941 0.0 0.7163226775728566 0.399562997367802 0.0 0.0 0.0 0.0 0.0 0.0012258262438861664 0.0012258262438861664 0.0 0.0 0.0024071453900801985 0.0 0.0011673829914680208 0.0011673829914680208 0.0 6.795625096982488E-4 0.0 0.0 8.649216463281239E-4 6.795625096982488E-4 0.0 8.649216463281239E-4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/endgames/subgame3.txt b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/endgames/subgame3.txt new file mode 100644 index 0000000..568ba67 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/endgames/subgame3.txt @@ -0,0 +1 @@ +0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.7437517 0.738118 1.2535768E-4 0.03714 1.6025541E-4 1.1914393E-5 0.0 0.76353735 5.9532137E-8 1.9589265E-7 4.1246273E-7 0.06816208 1.5204709E-8 0.0 1.02606826E-7 0.23695253 5.7008144E-8 2.9717249E-8 2.014563E-8 0.58272994 2.014563E-8 2.014563E-8 2.9969662E-8 0.0 0.0 0.0 3.3910915E-8 0.0 1.7252278E-8 1.9036998E-8 9.405696E-9 9.768392E-5 1.834535E-8 0.0 0.0 2.4041821E-5 0.0 1.1863992E-9 1.1375092E-8 0.099162765 2.3185478E-8 1.087542E-9 0.6536946 0.008885433 0.6538294 0.6527127 0.51980686 0.044367373 0.5574099 0.51859665 0.7419894 5.4004808E-8 2.0517553E-4 0.83074725 1.02764545E-7 0.0 5.0242708E-8 0.72283965 3.711208E-8 0.0 1.5546854E-7 0.8615659 1.7071617E-8 5.0147854E-8 1.6715951E-8 0.8875679 4.011829E-8 2.014563E-8 2.014563E-8 0.44816518 2.014563E-8 0.0 0.0 0.87214863 0.0 4.4813536E-8 0.0 0.78666157 1.4173968E-7 2.0099513E-8 1.18765895E-8 0.08144655 0.0 5.9561556E-9 9.3977075E-9 0.01753961 1.040768E-9 0.0 0.0 0.02982531 1.4295471E-8 0.67195106 0.6734499 0.59341615 0.67174226 0.78322285 0.78169876 0.49896324 0.78357047 1.0698439E-4 6.751931E-8 0.0 0.8279651 0.0 2.212641E-7 1.4000149E-7 0.84805167 0.0 6.042031E-8 0.0 0.8587545 4.5951296E-8 2.2287935E-8 1.1143968E-8 0.88717985 2.014563E-8 2.014563E-8 2.014563E-8 0.44853836 0.0 0.0 1.8936735E-8 0.82126766 1.08813424E-7 0.0 1.3057195E-7 0.7721411 4.999384E-9 1.794817E-8 6.665846E-9 0.0 0.0 2.7900295E-9 3.6176497E-9 0.016983619 0.0 4.658182E-10 1.0003526E-8 0.027041439 0.67031306 0.6717546 0.672604 0.47459343 0.7801785 0.778301 0.77865887 0.32196897 0.67576486 0.7116204 0.7019629 0.0 0.0381233 0.010096164 0.05444484 0.46596888 0.3928929 0.39199388 0.39191064 0.85798967 2.7424468E-7 4.4147324E-8 1.9508671E-7 0.40730277 7.959233E-8 5.7767913E-8 6.67595E-8 0.88110864 0.0 0.0 0.0 0.79189277 0.0 3.048898E-7 6.694688E-7 0.07774471 1.2933575E-8 0.0 0.0 0.018163037 3.7183316E-9 1.5975226E-9 1.5818486E-8 0.026818948 0.0022395968 0.0022395384 0.0022401998 0.51587945 0.8172731 0.81846493 0.8186259 0.41565418 0.821134 0.8142476 0.80611414 0.7059005 0.7006856 0.0 0.61587954 0.04579843 0.030610707 0.39334372 0.037344307 0.39342532 0.3932722 7.386628E-8 0.22792716 7.387593E-7 8.13397E-7 1.9767072E-7 0.5300134 4.1820716E-8 5.8679824E-8 0.0 0.0 6.2982515E-8 1.00878594E-7 3.6145394E-8 0.0 0.0 5.4027346E-8 2.8519343E-8 8.355197E-5 0.0 0.0 7.517558E-9 2.9463212E-5 3.2892933E-9 9.785819E-9 0.0022400338 0.088325895 0.0022394904 0.002239763 0.79388237 0.0089766905 0.79673153 0.7958111 0.58572656 0.039989572 0.586588 0.589459 0.7077732 0.0 0.049829327 0.6703732 0.03254366 0.39209878 0.39226002 0.4648874 0.39348888 7.911125E-8 2.1541467E-7 0.8552586 2.2217226E-7 5.871595E-7 1.7690836E-7 0.40710184 4.4449075E-7 0.0 0.0 0.8632328 0.0 2.7171794E-7 0.0 0.79676133 3.7623487E-8 0.0 0.0 0.078205705 0.0 1.8321424E-9 6.418832E-9 0.019756414 4.3253516E-9 0.0022401134 0.002239983 0.031155568 0.0022393756 0.818618 0.81564695 0.56590945 0.81801695 0.8005323 0.803434 0.43886736 0.81707096 0.0 0.044556964 0.03762555 0.55917263 0.39262167 0.3924162 0.3924213 0.46280596 8.756192E-8 9.306825E-8 7.359188E-8 0.8556155 3.2599903E-8 2.0743224E-7 2.8723575E-8 0.4059226 0.0 0.0 0.0 0.8698584 5.293375E-7 0.0 2.6224214E-7 0.7681432 8.174722E-10 0.0 0.0 0.0 1.475267E-9 2.9592948E-9 6.639752E-9 0.019218367 0.002239895 0.0022399079 0.0022402972 0.025461936 0.8179123 0.8181903 0.81971127 0.4167109 0.7965647 0.7905829 0.8111873 0.4186254 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.01825624 0.017530268 0.7770885 0.005271179 0.77139306 0.80337137 0.3957086 0.37978062 0.3872895 0.48010305 1.9796376E-7 0.7963204 5.247214E-8 3.2975657E-7 9.230313E-9 0.0 1.0678776E-8 0.0 1.02306494E-7 0.0 0.0 0.0 4.2353665E-7 0.64072186 7.526748E-8 0.0 8.624064E-9 0.83949244 0.0 3.0586164E-6 0.42913267 0.3631075 0.4118095 0.37007546 0.79259336 0.4197381 0.77217644 0.7692007 0.639248 0.36828145 0.69278264 0.78387237 0.01795541 0.7753534 0.78348225 0.02277412 0.80726385 0.4014327 0.45323247 0.38590097 0.43641794 4.7849846E-7 5.094375E-7 0.8100336 1.961478E-7 0.0 0.0 0.6127514 0.0 4.4917168E-9 0.0 0.66345686 2.756275E-8 0.0 1.1042327E-7 0.6984278 0.0 2.858984E-6 3.220067E-8 0.855416 5.7840543E-7 0.3549025 0.43079278 0.4471661 0.44042557 0.761934 0.741122 0.69835985 0.8387165 0.6455847 0.82148933 0.4041089 0.7788634 0.79739666 0.7849081 0.6760928 0.020375108 0.458111 0.5012866 0.50328815 0.36276132 3.126877E-8 1.7331455E-7 6.8948987E-7 0.8593024 0.0 0.0 0.0 0.59973085 0.0 0.0 0.0 0.687193 1.0791585E-7 6.285632E-8 2.6148234E-7 0.0 1.0586119E-6 1.6823836E-7 3.037513E-8 0.7925954 0.38841397 0.43146768 0.43439096 0.42733756 0.7102508 0.5980402 0.8063197 0.6341809 0.8475228 0.824749 0.7797023 0.30120808 0.6147717 0.61628705 0.6164835 0.074974746 0.7362595 0.70020103 0.74548894 0.0044366177 0.014619931 0.016039515 0.014597415 0.7871347 0.0 0.11338576 0.114511095 0.76328224 0.0 2.8793258E-8 6.7327406E-8 0.19724205 1.7553628E-8 1.1151585E-9 0.0 0.015800372 0.0031087946 0.0031093103 0.003104633 0.31746057 0.3852246 0.3867173 0.38605 0.703157 0.8798014 0.8831066 0.87562317 0.34028268 0.8889872 0.9294045 0.8305877 0.62637347 0.580682 0.81310534 7.552115E-4 0.73458344 0.85612017 0.016212411 0.07615709 0.014758914 0.016289681 0.09720293 0.0 0.10455629 0.11458098 4.5492577E-8 0.0 2.8309492E-7 1.1403701E-8 0.0 9.968024E-5 9.247565E-9 0.0 0.0031084477 0.1483052 0.0031096132 0.0031099461 0.38644746 0.089076996 0.38718283 0.38699242 0.88095075 0.043123577 0.8820546 0.881852 0.8012702 0.2047841 0.93587464 0.88404804 0.6171495 0.80373746 0.8578883 0.086405866 0.7964531 0.01471712 0.016934438 0.004893398 0.016073853 0.116151065 0.0 0.810305 0.048245374 6.288073E-7 0.0 0.76223016 0.0 0.0 5.7838417E-8 0.20846735 0.0 0.0031099448 0.0031092307 0.016784478 0.003109067 0.38717097 0.38687396 0.34236532 0.38715717 0.88291174 0.87933016 0.8105762 0.88190967 0.90281767 0.91353524 0.34628144 0.9228526 0.79340154 0.61868066 0.7680846 0.08235862 0.015001348 0.015296474 0.015964443 0.005187144 0.07723822 0.0 0.11253978 0.78575915 5.4404385E-8 0.0 2.8271684E-8 0.75687206 0.0 2.642575E-8 1.2425686E-8 0.0 0.003109631 0.0031089112 0.003108812 0.013641186 0.38709235 0.38420367 0.38702938 0.31110483 0.8793142 0.8817409 0.88358885 0.76086146 0.89361113 0.91285026 0.92326546 0.3412086 0.5149232 0.44817272 0.50257456 0.02138783 0.38130918 0.17251813 0.15393212 0.2514157 0.0 0.8289585 0.73420376 0.31237233 0.0 0.43411717 0.5011187 0.0050642504 0.0043331655 0.0044952943 0.0 0.04980594 0.041916862 0.041948363 0.041896846 0.06538246 0.12231733 0.1222433 0.12258043 0.4229089 0.79973114 0.86171544 0.8614989 0.8465481 0.80982023 0.82421064 0.8709559 0.47866076 0.49876064 0.33887625 0.053519133 0.3518681 0.35393992 0.76609534 0.0 0.7555352 0.8023653 0.55100894 0.0 0.47443175 0.52990943 0.004278569 2.6317615E-5 0.0043468983 0.0 0.041933633 0.32935584 0.041918438 0.041937113 0.1222193 0.39940664 0.12215058 0.12029199 0.7972449 0.027118035 0.795047 0.7926856 0.81620866 0.8236886 0.8286104 0.78228253 0.4851537 0.16765942 0.34059113 0.021916153 0.17600802 0.6793606 0.0 0.247065 0.8233239 0.5238693 0.0 0.31529045 0.5462007 0.0038721152 0.004443518 0.0057123206 0.0 0.041943923 0.041944154 0.073742315 0.041943133 0.12116648 0.12224541 0.0961561 0.12067805 0.85950327 0.8074194 0.7025821 0.84978986 0.86411047 0.7974538 0.9163771 0.8346833 0.14067283 0.373017 0.16517381 0.019775484 0.78152615 0.0 0.6140709 0.23847477 0.48203242 0.0 0.5811493 0.30210394 0.0046699285 0.0037959022 0.0040434343 0.0 0.041923955 0.041936375 0.041982286 0.056439113 0.122328386 0.122177236 0.122356005 0.04488584 0.8642655 0.7765456 0.8557137 0.33949515 0.77568686 0.8017796 0.8573749 0.8810353 0.43014392 0.38794586 0.41471055 0.14433782 0.0 0.72096884 0.6771616 0.040575355 0.0 0.124828465 0.08253586 3.4052726E-5 0.05925166 4.5075354E-5 0.0 0.20039792 0.2263022 0.1699093 0.26920924 0.006899096 0.013351967 0.013728164 0.0129990345 0.06607449 1.1374093E-7 0.11595993 0.11710217 0.47513515 0.5567877 0.20386815 0.55487764 0.4283275 0.437022 0.64807755 0.0 0.7758624 0.650462 0.12381467 0.0 0.10810115 0.11545505 1.7137741E-4 0.007872885 1.7707779E-4 0.0 0.2673088 0.21659419 0.25731185 0.2687682 0.013318321 0.22849284 0.013411612 0.0135093015 0.12042547 0.42598584 0.121957585 0.10133443 0.56933326 0.6561734 0.49735737 0.57961786 0.4130405 0.736529 0.0 0.14251858 0.75835794 0.12076508 0.0 0.052294232 0.10592766 5.7968726E-5 0.06280869 3.7623173E-5 0.0 0.19874373 0.22013357 0.20598112 0.24757563 0.013533324 0.013081847 0.013520822 0.013750834 0.11115345 1.1456887E-7 0.09442513 0.12396116 0.5507707 0.52752197 0.49084604 0.5741052 0.7834892 0.0 0.74759656 0.13714764 0.11149619 0.0 0.111766346 0.045992564 5.5650973E-5 0.06123647 5.696423E-5 0.0 0.27432832 0.26370126 0.26402554 0.19718875 0.0135495635 0.013390335 0.013803416 0.009657968 0.12198193 1.1301769E-7 0.122303426 0.06892351 0.44693777 0.5467529 0.5887942 0.49730814 0.0 0.04352638 0.04436538 0.11835613 0.0 0.50946987 0.4894385 0.23670968 0.37196907 0.36370113 0.0 0.4184265 0.7020498 0.7434844 0.72039396 0.6284751 0.7863534 0.8231775 0.80235726 0.6188918 0.7705492 0.77894723 0.71548146 0.3541797 0.8415305 0.8196633 0.66297716 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.044353392 0.5014736 0.0 0.1399506 0.5143899 0.34518668 0.39716133 0.22922005 0.0 0.7287656 0.6752419 0.51929677 0.70343673 0.7980233 0.6606102 0.6881111 0.8043192 0.77143854 0.6778701 0.7137817 0.8069668 0.7741296 0.8167857 0.48150486 0.6936523 0.4803323 0.0 0.47662634 0.13680163 0.35395417 0.36414495 0.35913602 0.0 0.71063566 0.6559909 0.7179763 0.46903542 0.8302542 0.7435165 0.75831324 0.60626495 0.6432899 0.7623792 0.7872809 0.5340451 0.70224017 0.77408993 0.77861327 0.36201346 0.0 0.01883641 0.021283567 0.05613148 0.21207713 0.20722164 0.0 0.044706132 0.06633712 0.07704886 0.0799401 0.26771304 0.28442654 0.25588268 0.23391773 0.51775736 0.09784458 0.7149851 0.73668975 0.116149336 0.37089583 0.46360433 0.27751094 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.021111825 0.19686711 0.18360104 0.09179329 0.0 0.06728422 0.06920325 0.051462002 0.08306304 0.2503795 0.27038312 0.2801304 0.26809856 0.76337796 0.0945573 0.5473206 0.6325959 0.3533804 0.46200708 0.124074094 0.43698072 0.20952135 0.19690561 0.1822537 0.0 0.07670299 0.07728367 0.07827182 0.042881075 0.26556426 0.27623963 0.24149793 0.26918626 0.7642013 0.11017597 0.65077156 0.5018276 0.4208922 0.44684315 0.453443 0.12228925 1.6774947E-4 1.939223E-4 0.0 0.0026944135 0.011324552 0.008587342 0.011184328 0.0016907976 0.0053995834 0.005495076 0.0052042347 0.0068887556 0.16961215 0.38866046 0.37447932 5.517432E-4 0.51428324 0.29058164 0.31193465 9.992512E-5 0.0 0.011090981 3.709134E-7 0.010539763 0.009468105 1.7609484E-4 1.90849E-4 1.6286071E-4 1.7466255E-4 0.18245527 0.0015998214 0.18246156 0.17952527 0.45539203 5.9701724E-4 0.4358112 0.47205797 0.0 0.009901067 0.009072759 0.0031749422 0.009204681 0.0053287875 0.0057177604 0.0021481677 0.0057390803 0.39237237 0.17212267 0.00637404 0.3607318 0.33864352 0.50227016 6.1492476E-4 0.32019317 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.1501163E-6 4.2512542E-7 1.6480366E-5 0.0019533741 0.33933678 0.32592905 0.30962467 0.002660535 0.2807738 0.6244215 0.5067074 0.003038452 0.51896954 0.50302804 0.4736634 2.038619E-8 4.0049077E-8 0.33050996 0.0036999648 0.300122 0.33960226 0.24081737 5.931079E-4 0.25225562 0.28646797 0.4512087 6.2783924E-4 0.5326229 0.4838975 7.0938845E-6 0.3219235 0.33761537 0.0011095421 0.29187825 0.6463038 0.21538877 0.004210049 0.6089069 0.48016286 0.49076152 0.0045435755 0.37911862 0.3216491 0.31979254 0.32498842 0.0018286175 0.6328135 0.27516243 0.59928715 0.0015511337 0.4640363 0.41654694 0.49833816 5.856556E-4 0.0 8.678838E-10 1.8724778E-7 8.9864084E-4 0.2922528 0.28645027 0.24247365 0.0014647732 0.1211926 0.13132498 0.1254744 0.0 2.391458E-6 0.27417597 0.0051079607 0.27548963 0.2832806 0.13426764 0.0013395926 0.13710271 0.12886257 2.87409E-8 0.26141167 0.28888053 0.0011454108 0.29410166 0.13659373 0.1366041 0.0019206622 0.09551362 0.28970534 0.29197797 0.28438836 0.0025937655 0.13545792 0.12614956 0.13578494 0.0020150437 2.0603039E-7 2.9973596E-6 1.2052468E-5 1.036531E-4 3.2640504E-8 4.1260756E-7 1.602599E-6 5.329046E-7 1.7642694E-8 3.4411264E-7 1.2311958E-4 1.8186442E-7 1.783442E-9 2.0248413E-5 2.7615904E-6 0.0 5.1396888E-5 1.3772028E-6 1.765074E-7 3.900722E-7 1.4986263E-6 4.932269E-5 1.3482395E-4 1.4788874E-6 3.3782885E-6 7.016119E-5 1.43646E-7 4.1238653E-7 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.9810905 0.9785614 0.0028131753 0.2794606 0.0028131753 0.0028131753 0.0 0.05778499 0.0019465771 0.003473767 0.0015216132 0.17489436 0.0018943725 0.0015292438 6.743382E-6 0.31641924 6.909413E-6 6.749833E-6 0.0 0.03900577 0.0 0.0 0.0 0.0 0.0 0.0 4.741664E-7 0.0 9.4265835E-7 5.754499E-7 5.508551E-6 0.0022182507 6.629883E-6 0.0 3.5641522E-5 0.039182156 9.947261E-5 1.4982863E-4 0.051079564 0.008923647 0.04796248 0.033409305 0.20872568 0.0047651515 0.6107971 0.26729408 0.14698139 0.020476732 0.39574134 0.20749995 0.9775954 0.0028131753 0.0028131753 0.81367004 0.0028131753 0.0 0.0034353826 0.61048716 0.002744508 0.0018089712 0.0015864111 0.3791914 0.0018927009 6.909413E-6 6.529674E-6 0.75919604 6.7703136E-6 0.0 0.0 0.011904783 0.0 0.0 0.0 0.9621228 0.0 1.4021538E-6 0.0 0.13694456 1.655221E-6 8.8808065E-6 5.539544E-6 0.009102011 0.0 7.911475E-5 8.662981E-5 0.06949476 9.923173E-5 0.049649622 0.04237892 0.056861285 0.03758335 0.48639548 0.5425558 0.796908 0.56030977 0.3015591 0.31183505 0.73348016 0.41992474 0.0028131753 0.0028131753 0.0028131753 0.6727621 0.0 0.003431152 0.0017378154 0.3070601 0.0016383112 0.0017485083 0.0015820354 0.2898376 6.7312517E-6 6.389172E-6 6.7872934E-6 0.50653815 0.0 0.0 0.0 0.006438571 0.0 0.0 0.0 0.79886043 2.1208655E-6 0.0 2.2372776E-6 0.039490294 1.0784057E-5 2.4209905E-6 2.3242654E-5 0.0 1.4365496E-4 1.2002771E-4 1.2990381E-4 0.030638471 0.03403702 0.04048108 0.038128484 0.020964919 0.15646996 0.24322546 0.4700062 0.44149542 0.1648738 0.321626 0.4214878 0.46016955 0.992972 0.9931631 0.9933062 0.0 0.2918932 0.22044942 0.29806092 0.37518808 0.2621005 0.48214108 0.42738587 0.6186815 8.674014E-5 8.4659005E-5 1.1083745E-4 0.0080358535 2.2013492E-7 1.8141559E-7 6.149957E-8 0.9144045 0.0 2.503179E-8 2.503179E-8 0.49418646 0.0 3.0975603E-8 3.0975603E-8 0.0012153838 3.8963863E-6 1.6859072E-5 0.0 0.036694847 0.037635785 0.06220464 0.014020827 0.054750025 0.039328508 0.0520834 0.03133742 0.6363326 0.33823633 0.5438759 0.3566808 0.85476846 0.54563653 0.64034027 0.41410524 0.9880126 0.98687005 0.0 0.02681649 0.2506957 0.19068854 0.31455773 0.50004244 0.47874153 0.38793293 2.4430064E-4 0.75278944 1.424508E-5 8.373348E-5 2.2150992E-7 0.15018465 2.2150992E-7 2.2150992E-7 2.478904E-8 0.0 2.478904E-8 2.478904E-8 1.0920485E-8 0.0 1.0920485E-8 1.0920485E-8 1.2023166E-5 0.004424686 7.247535E-6 0.0 0.040141948 0.012455833 0.03792223 0.033832375 0.036861338 0.011560845 0.062498577 0.034022633 0.5228095 0.0054764384 0.8215273 0.28163177 0.25547257 0.020818448 0.33759552 0.105428256 0.99236786 0.0 0.100898586 0.3898214 0.26860574 0.42157382 0.4675507 0.52309984 0.35759595 2.1130744E-4 1.3445437E-4 0.5418482 2.0499839E-4 2.1048398E-7 2.2150992E-7 0.011559502 2.2150992E-7 2.503179E-8 0.0 0.9705662 2.503179E-8 3.0975603E-8 0.0 0.52349955 3.0975603E-8 1.3121466E-5 7.102682E-6 0.013108846 0.0 0.045356117 0.034110602 0.0879585 0.032527145 0.03814717 0.039151292 0.09520349 0.058919106 0.54790497 0.6775468 0.78168136 0.45475248 0.5918331 0.3888924 0.79691094 0.40669796 0.0 0.25825608 0.20492797 0.2032552 0.38656297 0.2990698 0.43285176 0.394753 7.0479124E-5 1.9380688E-5 5.387062E-5 0.7885982 1.9655461E-7 2.2150992E-7 2.2150992E-7 0.011810894 2.503179E-8 0.0 2.503179E-8 0.61206615 3.0975603E-8 0.0 3.0975603E-8 0.4590175 1.1512982E-5 1.2134267E-5 1.3262826E-5 0.0 0.03879885 0.042189572 0.07114564 0.0083294865 0.05854517 0.06113245 0.06513195 0.021817133 0.62902015 0.6595365 0.55831707 0.535617 0.40655792 0.26045713 0.67968816 0.7045915 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0013431113 0.0012636092 0.56694365 0.41134834 0.94124323 0.85879815 0.67280406 0.46602884 0.8743697 0.7943657 0.678307 0.0021442778 0.56121093 0.5844944 5.156331E-6 0.0 1.4139679E-5 2.9078346E-6 8.042284E-8 0.0 1.7709301E-7 5.7848364E-8 0.0010339672 2.72118E-6 0.0011180881 0.0 0.6226814 1.4465697E-4 0.65742743 0.5825216 0.7628514 1.6964578E-4 0.8095881 0.7559958 0.54564905 0.041502677 0.69361895 0.4367553 0.4036806 0.049663167 0.044258006 0.21766081 4.4767945E-5 0.789624 0.7802029 0.86845356 0.8400658 0.9316435 0.90262425 0.97194535 0.7149577 0.4751167 0.35359108 0.86671793 0.657462 5.3423487E-6 0.0 8.00943E-5 1.5431115E-6 2.2997743E-7 0.0 0.23048809 6.7682585E-8 6.313298E-4 5.672368E-5 4.4276234E-4 0.0 0.58553225 0.6306675 0.5110513 0.62560755 0.8136058 0.7983539 0.7273367 0.7620375 0.7031828 0.56864446 0.8523021 0.6506049 0.28103593 0.20056833 0.015820557 0.15042567 0.68766725 0.73900527 0.86206967 0.356852 0.8811488 0.58392775 0.8926955 0.53169185 0.5874516 0.5907761 0.68816096 0.3198566 2.3694497E-6 0.0 7.9641985E-8 0.0028958637 1.4559255E-7 0.0 9.65473E-8 0.13749874 0.0017505422 0.0010429961 0.002159725 0.0 0.60329056 0.71620136 0.70639294 0.23631465 0.6339571 0.64852816 0.70059645 0.1803853 0.50258446 0.5270175 0.50073826 0.0589832 0.11940724 0.15324625 0.13779843 0.062093016 0.9502541 0.9875471 0.98726773 0.34699407 0.55138195 0.63977385 0.38526776 0.10778662 0.13782348 0.15010707 0.18023379 0.572173 0.0 0.9229536 0.78561294 0.19845448 0.0 0.6458012 0.3295628 3.855894E-5 8.305441E-4 0.0019754167 0.0 0.10505122 0.34289172 0.3910145 0.26087528 0.4255281 0.27217746 0.55176574 0.08515719 0.37074092 0.16654642 0.30641365 0.1664446 0.88551253 0.3640206 0.67792 0.26055706 0.9907871 0.9819687 0.60766673 0.145798 0.48435074 0.4674923 0.145265 0.0014804591 0.13725996 0.14019302 0.8560563 0.0 0.7616171 0.6511656 0.119147964 0.0 0.13408683 0.17886738 0.0011519725 0.0011088535 9.233726E-4 0.0 0.2827236 0.006346363 0.29970026 0.1796291 0.3768696 0.01909137 0.63815284 0.09743385 0.25085825 0.38378707 0.4778584 0.29694504 0.33225858 0.92066914 0.7910446 0.46129292 0.98499155 0.51845115 0.59164953 0.772797 0.5136752 0.14384812 0.12869526 0.22877695 0.103476726 0.68448657 0.0 0.7957973 0.59248793 0.64862746 0.0 0.69647014 0.4777544 0.0017223529 0.0015278533 0.0013094135 0.0 0.5800904 0.2171121 0.061244745 0.27607924 0.41333327 0.45111853 0.6721275 0.31757516 0.34603685 0.36541697 0.47011015 0.32250357 0.60348284 0.48376262 0.8232026 0.5071772 0.59217346 0.6525027 0.73265 0.2909916 0.15069547 0.121175684 0.15392208 0.027341833 0.80110276 0.0 0.81637365 0.2107515 0.5912279 0.0 0.22513022 0.21568821 0.0018237847 7.458311E-4 0.0017050032 0.0 0.41949502 0.32634524 0.22451067 0.184267 0.3020262 0.12430481 0.21265963 0.6066303 0.22409423 0.3663258 0.42314342 0.44094118 0.35574389 0.46182412 0.5194442 0.8959947 0.8174224 0.9468364 0.9403956 2.734326E-6 0.002753122 1.131473E-4 6.704643E-5 0.4521045 0.0 0.8196905 0.73743683 0.23169088 0.0 0.3496147 0.4570179 0.0038000792 0.043846443 0.036155358 0.0 0.0035310902 0.022606343 0.03266994 0.019596238 0.12961462 0.028038466 0.04275791 0.015421011 0.6765925 0.14388804 0.47648457 0.22288549 0.9640736 0.7377364 0.8682983 0.46667108 0.92709607 0.9441228 0.0048282407 8.528644E-8 0.002530816 0.00523406 0.7033008 0.0 0.8830677 0.82309544 0.6365557 0.0 0.5681848 0.6939658 0.036840916 0.0016721702 0.037030905 0.0 0.021852132 0.07368625 0.015351257 0.014404811 0.034101762 0.07131663 0.019412937 0.018814167 0.011056796 0.6074746 0.398821 0.105008274 0.9042891 0.59756726 0.88989407 0.4801251 0.90567845 6.584753E-5 0.004078484 9.3647104E-5 9.626123E-5 0.8081009 0.0 0.6985408 0.5975834 0.559734 0.0 0.4835679 0.4837838 0.03717 0.041159462 0.024320789 0.0 0.02497966 0.026452834 0.034978237 0.01907848 0.057021443 0.07199768 0.21227235 0.026043154 0.6362473 0.63341635 0.8077233 0.3853272 0.8586934 0.8457821 0.8965206 0.6581511 1.2741587E-4 0.0019898738 1.6728175E-4 3.7641612E-6 0.76620346 0.0 0.6909851 0.092750594 0.5053773 0.0 0.50818855 0.029112296 0.04208145 0.03113409 0.043051288 0.0 0.004243713 0.0133303385 0.024926523 0.0242311 0.0366766 0.070075735 0.07270738 0.12463753 0.21615125 0.2895571 0.4158076 0.7959936 0.65040237 0.43592444 0.8418251 0.9563064 0.8226466 0.9815964 0.9765738 0.17862497 0.0 0.34495416 0.32573017 0.0034035866 0.0 0.0729846 0.03746217 3.3422002E-5 0.0054591317 6.858E-4 0.0 3.659007E-5 4.9111153E-5 3.4193628E-4 2.314752E-4 0.0024498499 0.0015803027 0.0016545332 3.65896E-4 0.49568003 0.06850184 0.16992995 0.07647445 0.88753897 0.46663052 0.6475931 0.34020212 0.7788876 0.83933574 0.3266083 0.0 0.29843885 0.2601449 0.11500103 0.0 0.11035282 0.0955819 0.014198971 1.9874387E-4 0.018135585 0.0 4.048469E-5 2.3909135E-5 4.928166E-5 6.663618E-5 0.001171403 0.026957523 9.5079606E-4 6.5236026E-4 0.15209904 0.045908447 0.09379582 0.054786455 0.61032104 0.004926914 0.47215927 0.37595698 0.972302 0.34085226 0.0 0.14292273 0.38745117 0.11441818 0.0 0.031600382 0.098547444 2.7379312E-4 0.004691881 3.976995E-4 0.0 2.879149E-4 4.892834E-5 5.9330225E-4 2.7012525E-4 0.0024213605 0.0024482887 0.0036391746 8.128586E-4 0.17501748 0.09529907 0.16721155 0.09412898 0.76502407 0.6183312 0.8159584 0.4872397 0.3002375 0.0 0.2962839 0.051277544 0.040616497 0.0 0.14327595 0.0028913184 4.482102E-4 0.008537102 4.6896838E-4 0.0 3.7695447E-4 3.9056602E-5 4.0718488E-4 1.715658E-5 0.0027381023 0.0022685167 0.0020366139 0.0023880824 0.06110114 0.07132103 0.07265779 0.4063104 0.47578743 0.6023208 0.5273665 0.8855629 0.0 3.7231853E-6 7.210143E-8 0.33742598 0.0 0.6274168 0.48217914 4.4257314E-7 6.2671506E-6 6.0786835E-5 0.0 0.062535696 0.2809061 0.3662238 0.34453604 0.06651788 0.4829989 0.5181368 0.52585363 0.035378445 0.4121772 0.6545423 0.5775977 0.018186009 0.18705234 0.27403837 0.32760957 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 4.151786E-6 0.6263582 0.0 0.6539205 0.6346308 8.824284E-5 1.6946187E-4 1.14456474E-4 0.0 0.33932817 0.37500802 0.1781365 0.35259777 0.56000286 0.53132784 0.33227104 0.5717564 0.3877155 0.6961981 0.33205938 0.6477466 0.32903332 0.19854482 0.017367205 0.24595279 0.5764472 0.0 0.47788152 0.1559531 3.5013867E-4 2.8659613E-5 2.0880542E-4 0.0 0.36862305 0.32662258 0.27918455 0.02412279 0.45926544 0.4750075 0.3052977 0.013929142 0.37092662 0.45193738 0.3820018 0.010896334 0.45654538 0.13769762 0.1475588 0.027749794 0.0 0.003620805 0.0030168644 8.637275E-4 0.033406243 0.039739925 0.0 0.004104349 0.0863748 0.10633502 0.10723379 9.248605E-4 0.008818871 0.014853091 0.008915458 0.390877 0.37126932 0.36753985 0.2925967 0.6777942 0.46912292 0.32424262 0.5916347 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.005522656 0.024406634 0.038605444 0.010800859 0.0 0.08756118 0.068935685 0.007033525 0.07362097 0.010478296 0.016833479 0.006738533 0.009071837 0.39120412 0.22626181 0.7797485 0.20324674 0.60009474 0.5547363 0.47476646 0.562906 0.05052743 0.031108564 0.044680197 0.0 0.033200406 0.049741037 0.096034594 0.0038347123 0.017290337 0.011906704 0.013410859 3.393172E-4 0.41320184 0.1593609 0.25798512 0.6242957 0.32424685 0.28185168 0.5403933 0.6966101 4.5566714E-5 4.7651192E-5 0.0 2.2284202E-7 1.2235866E-4 1.12279675E-4 1.4346799E-4 3.3653745E-5 0.003124675 0.004669241 0.00390697 0.009394583 0.20441256 0.14640029 0.27991298 5.868197E-4 0.020642051 0.010159085 0.02176043 2.3531222E-5 0.0 1.3263886E-4 1.4543401E-6 1.2268849E-4 1.2497828E-4 0.0059457575 3.2965763E-6 0.005341125 0.0087818755 0.13814999 0.0011939228 0.11764736 0.16611512 0.015256189 2.8186681E-5 0.0075128847 0.0025675185 0.0 1.0887627E-4 1.2865548E-4 3.888087E-6 7.2906514E-5 0.0019481874 0.002137115 3.3920168E-4 0.0034970879 0.21154281 0.22131088 0.011326611 0.24648905 0.0052198344 0.009852163 4.1230902E-4 0.0087268185 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 9.929129E-8 2.6418513E-6 2.3598539E-6 2.140826E-6 0.5200773 0.26989475 0.33445218 0.23693709 0.68728423 0.8500231 0.57094395 0.6831804 0.0 0.0 1.8392543E-6 9.564269E-6 6.745195E-7 1.5912394E-6 0.12129423 0.009144236 0.18091221 0.22569862 0.3604955 0.24174994 0.77629447 0.35663027 0.0 1.7857088E-6 2.0262203E-6 1.8814286E-7 1.8603472E-6 0.38233653 0.26574677 0.67267627 0.21672349 0.54569936 0.5272631 0.7476481 0.83968353 3.1693312E-6 1.8775125E-6 2.807981E-6 9.731789E-8 0.31460962 0.042396367 0.31408888 0.37650663 0.7119838 0.5277941 0.6694957 0.6551989 6.185838E-6 5.996599E-6 5.793759E-6 0.48845646 0.3698991 0.39184746 0.40673488 0.5857929 0.56688404 0.63265014 0.7522791 7.641677E-6 4.8490867E-5 0.30540293 0.019572344 0.32784516 0.36503968 0.8112018 2.8691372E-6 0.91315114 0.8577049 1.998472E-5 0.2910907 0.4532188 0.31663737 0.3226049 0.83862895 0.8647401 0.5980134 0.7823724 0.17719057 0.3776591 0.50653267 0.28151685 0.45485282 0.718755 0.6673296 0.45318228 1.2430934E-4 5.619994E-4 5.252004E-4 0.61264706 0.6944669 0.59883046 0.31038892 1.5594816E-4 2.5640675E-5 0.5455249 0.7558251 0.45756236 0.4160177 0.0014418303 0.61402184 0.5796174 0.9073593 0.558933 0.6075735 0.71775043 0.45488992 0.60695314 0.105020516 0.04753393 0.22974105 0.08044436 0.31750575 0.25130478 diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/endgames/subgame4.txt b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/endgames/subgame4.txt new file mode 100644 index 0000000..40e5aad --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/endgames/subgame4.txt @@ -0,0 +1 @@ +0.0020375338 0.0020375338 0.0022886763 0.4981091 0.0 0.0 0.0 0.5687428 2.8074498E-10 2.8074498E-10 7.0458667E-10 0.5874823 8.163517E-13 8.163517E-13 0.0 0.5926329 5.227646E-10 5.227646E-10 4.469884E-10 0.8364364 1.7046575E-10 0.0 1.7046575E-10 0.61005384 3.967442E-10 3.967442E-10 0.0 0.32727167 2.537443E-10 2.537443E-10 2.2482713E-10 0.237522 1.640249E-10 1.640249E-10 4.738496E-10 0.0 0.0 0.0 2.633846E-10 0.0 3.1870673E-10 3.1870673E-10 1.4754377E-9 0.0 0.4036216 0.4036216 0.3352602 0.1626818 1.422744E-8 1.422744E-8 2.0579008E-10 1.17688E-10 8.558628E-8 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 5.780081E-12 4.665062E-4 0.0 0.0 0.0 0.0 0.0 0.0 1.7046575E-10 0.0 0.0 0.0 0.0 2.5671616E-9 4.860899E-10 0.0 6.9119704E-10 0.0 3.0807645E-10 5.391337E-10 4.248931E-10 0.0 2.1323236E-10 2.7513852E-10 0.0 0.0046688 0.0 2.9511782E-11 0.0 1.0898577E-10 2.7407545E-11 8.277017E-13 0.0 0.017629074 0.014391243 0.019872025 0.016843269 0.0 0.0 0.0 8.558628E-8 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 5.780081E-12 0.0 4.665062E-4 0.0 0.0 0.0 0.0 0.0 1.7046575E-10 0.0 0.0 0.0 0.0 4.860899E-10 2.5671616E-9 0.0 6.9119704E-10 3.0807645E-10 0.0 5.391337E-10 4.248931E-10 2.1323236E-10 0.0 2.7513852E-10 0.0 0.0 0.0046688 2.9511782E-11 0.0 2.7407545E-11 1.0898577E-10 8.277017E-13 0.0 0.014391243 0.017629074 0.019872025 0.016843269 0.0 0.0 0.0 7.2740283E-7 0.0 0.0 0.0 5.4161353E-10 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.7046575E-10 0.0 0.0 0.0 0.0 0.0 0.0 1.0264418E-8 6.053431E-10 1.4809294E-9 1.4809294E-9 2.5959312E-9 1.3084514E-10 6.2322336E-10 6.2322336E-10 2.775078E-9 0.0 4.4341153E-13 4.4341153E-13 0.005215008 0.0 4.8545487E-11 4.8545487E-11 1.8606726E-9 0.0 0.019538831 0.019538831 0.015039618 0.01005348 0.0 0.0 0.0 0.0019474725 0.0019474725 0.0014893783 0.50956273 0.0 0.0 0.0 0.3262841 0.0 0.0 0.0 0.46722606 2.103759E-9 2.103759E-9 0.0 0.6587718 1.937985E-9 0.0 6.3502914E-10 0.6750737 7.2287634E-11 7.2287634E-11 0.0 0.37364078 0.0 0.0 1.1750845E-9 0.33186665 1.2230553E-9 1.2230553E-9 1.4035689E-9 0.0 9.725448E-10 9.725448E-10 6.140597E-10 0.0 0.0 0.0 0.0 0.0 0.8178347 0.8178347 0.78470284 0.25533563 8.7200586E-10 8.7200586E-10 1.8576191E-8 5.6359E-11 4.0167134E-8 0.0 0.0 0.0 0.0 8.470285E-4 2.5923495E-4 5.5689638E-5 0.0 0.0 0.0 0.0 0.0 1.3861752E-9 0.0 0.0 0.0 0.0 0.0 1.0325013E-10 1.073456E-9 2.6680025E-10 8.297648E-9 0.0 1.4442251E-9 5.235361E-9 0.0 6.4558824E-11 2.3621155E-10 0.0 0.00591968 1.15177715E-10 7.4505825E-11 0.0 1.8301259E-9 0.0 3.2233945E-11 0.0 0.01664244 0.017984226 0.02306016 0.052286748 0.0 0.0 0.0 4.0167134E-8 0.0 0.0 0.0 0.0 8.470285E-4 5.5689638E-5 2.5923495E-4 0.0 0.0 0.0 0.0 0.0 1.3861752E-9 0.0 0.0 0.0 0.0 1.0325013E-10 0.0 1.073456E-9 2.6680025E-10 0.0 8.297648E-9 1.4442251E-9 5.235361E-9 6.4558824E-11 0.0 2.3621155E-10 0.0 1.15177715E-10 0.00591968 7.4505825E-11 0.0 0.0 1.8301259E-9 3.2233945E-11 0.0 0.017984226 0.01664244 0.02306016 0.052286748 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.368684E-4 1.7390167E-4 1.7390167E-4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.1845639E-9 1.1845639E-9 1.2962414E-7 2.4294294E-10 0.0 0.0 0.0 0.0 4.3361954E-9 4.3361954E-9 0.0 0.0 1.4867403E-10 1.4867403E-10 0.0058110263 0.0 9.6967885E-11 9.6967885E-11 1.6352063E-8 0.0 0.023093998 0.023093998 0.016524833 0.0288031 0.0 0.0 0.0 0.0 0.0 0.0 0.016657876 1.8810167E-4 1.8810167E-4 0.0 0.18228158 0.0 0.0 0.0 0.6770383 4.829213E-10 0.0 1.1037695E-9 0.44148597 2.660313E-10 2.660313E-10 0.0 0.38265786 0.0 0.0 1.8647893E-10 0.3682032 6.8887734E-10 6.8887734E-10 6.1854294E-10 0.0 4.529771E-10 4.529771E-10 4.994272E-9 0.0 0.0 0.0 0.0 0.0 0.7622446 0.7622446 0.7275898 0.18005997 0.0 0.0 0.0 0.0 0.0 0.0012070787 6.004461E-6 9.706786E-5 0.0 0.0 0.0 0.0 0.0 3.1419747E-10 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.2735226E-9 0.0 6.4648653E-10 0.0 1.9647839E-10 1.7422576E-9 8.09767E-10 1.9685729E-9 0.0 0.0063275024 4.004701E-10 8.898851E-11 0.0 1.8410239E-11 0.0 0.0 0.0 0.019393547 0.021247122 0.016515566 0.058280203 0.0 0.0 0.0 0.0 0.0012070787 9.706786E-5 6.004461E-6 0.0 0.0 0.0 0.0 0.0 3.1419747E-10 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.2735226E-9 6.4648653E-10 0.0 0.0 1.9647839E-10 8.09767E-10 1.7422576E-9 1.9685729E-9 0.0 4.004701E-10 0.0063275024 8.898851E-11 0.0 0.0 1.8410239E-11 0.0 0.0 0.021247122 0.019393547 0.016515566 0.058280203 0.0 0.0 0.0 8.482772E-4 1.7458615E-4 1.7458615E-4 0.0 0.0 0.0 0.0 0.0 9.0502444E-10 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.4019264E-9 2.8565543E-9 2.8565543E-9 0.0 1.1445355E-9 1.007966E-9 1.007966E-9 0.0 0.0 4.7410537E-10 4.7410537E-10 0.005831001 0.0 0.0 0.0 5.3841137E-10 0.0 0.023128951 0.023128951 0.019584555 0.0897485 0.0 0.0 0.0 1.0037951E-4 1.0037951E-4 0.0 0.07086093 0.017273027 0.017273027 0.014234235 0.12537166 0.017040797 0.0 0.017214186 0.54356307 0.0022574163 0.0022574163 3.5348345E-4 0.48050207 3.5103224E-14 3.5103224E-14 8.221805E-14 0.38373664 1.4425102E-9 1.4425102E-9 2.0358776E-10 0.0 0.027070858 0.027070858 0.02476715 0.0 0.6913761 0.6913761 0.6733999 0.0 0.5951645 0.5951645 0.32331568 0.06120078 0.13900408 0.13900408 0.06940392 1.6446129E-6 0.0 0.01421698 0.0 0.0 0.0 1.8881625E-5 0.0 0.0 0.0 0.019878315 0.0 0.0 0.0 2.2358537E-10 8.078668E-4 0.0 3.1550559E-12 0.0 0.009174765 2.129905E-10 0.0 0.0 0.6534922 0.07246246 0.06652425 0.0 0.6640216 0.57671845 0.7000715 0.0 0.45509392 0.53830516 0.54796076 0.8869343 5.044415E-5 1.21070574E-4 1.3362741E-4 0.0 0.01421698 0.0 0.0 0.0 1.8881625E-5 0.0 0.0 0.0 0.019878315 0.0 0.0 0.0 2.2358537E-10 0.0 8.078668E-4 3.1550559E-12 0.0 2.129905E-10 0.009174765 0.0 0.0 0.07246246 0.6534922 0.06652425 0.0 0.57671845 0.6640216 0.7000715 0.0 0.53830516 0.45509392 0.54796076 0.8869343 1.21070574E-4 5.044415E-5 1.3362741E-4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.09091476 0.0 0.0 0.0 0.13298002 0.0 0.0 1.648493E-9 0.16531989 0.0 0.0 0.0 0.1905863 0.0 0.0 0.0 0.0 0.36334002 0.36334002 0.36307213 0.0 0.0 0.0 0.0 0.0 0.65468884 0.65468884 0.6366834 0.50553787 2.885691E-9 2.885691E-9 2.0166791E-8 1.0249673E-6 2.8630407E-6 0.0 1.8666837E-9 0.0 0.0 0.0 0.0 1.7871575E-9 0.0 0.0 0.0 0.0 0.0 1.8082964E-9 0.0 0.0 0.0 0.0 2.4914218E-6 1.0660158E-6 1.0369689E-6 0.0 8.0589146E-11 0.0 2.2125383E-10 0.0 0.010127269 0.011899497 0.011059287 0.4860856 0.0 0.0 0.0 2.8630407E-6 0.0 0.0 0.0 0.0 0.0 1.7871575E-9 0.0 0.0 0.0 0.0 0.0 0.0 1.8082964E-9 0.0 0.0 0.0 0.0 1.0660158E-6 2.4914218E-6 1.0369689E-6 0.0 0.0 8.0589146E-11 2.2125383E-10 0.0 0.011899497 0.010127269 0.011059287 0.4860856 0.0 0.0 0.0 0.0 0.0 0.0 2.2846955E-9 0.0 0.0 0.0 1.3170984E-7 0.0 0.0 0.0 0.0 0.0 1.8078713E-9 1.8078713E-9 8.2125445E-11 0.0 1.332404E-6 1.332404E-6 2.4772994E-6 0.0 8.3161933E-10 8.3161933E-10 0.0 0.0 0.011224338 0.011224338 0.008020142 0.19948058 0.0 0.0 0.0 6.458715E-4 0.0 5.867936E-4 0.11639998 5.2651085E-6 5.2651085E-6 1.495454E-4 0.10705364 0.0 0.0 0.0 0.18770982 0.0 0.0 4.1599986E-9 0.0 0.5113679 0.5113679 0.48197013 0.0 8.58122E-9 8.58122E-9 0.0 0.0 0.69268644 0.69268644 0.6439838 0.430075 2.6026244E-8 2.6026244E-8 5.1601752E-8 0.0 1.0427029E-6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.4743723E-9 8.475009E-12 2.1390294E-12 0.0 0.0 0.001644281 0.002435151 0.0021055103 0.0 0.0 1.0859368E-9 0.0 0.0 0.009214412 0.009529174 0.012207901 0.21700314 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 4.7183254E-5 0.0 0.0 0.0 0.0 0.0 0.0 5.411275E-10 0.0 0.0020451522 0.0020451522 0.0015737809 0.0 0.0 0.0 1.4633952E-9 0.0 0.00916262 0.00916262 0.008599039 0.47782 0.0 0.0 0.0 0.30659077 0.30659077 0.27978992 0.08517275 4.6521081E-10 4.6521081E-10 0.0 0.17375456 2.4976085E-8 2.4976085E-8 0.0 0.0 0.48125842 0.48125842 0.50134903 0.0 0.0 0.0 0.0 0.0 0.6352047 0.6352047 0.57153493 0.23301363 3.396227E-8 3.396227E-8 0.0 2.3397267E-6 2.9486428E-6 0.0 1.191022E-10 1.9796482E-12 0.0 0.0 4.0090938E-11 0.0 0.0 0.0 8.503806E-4 9.886058E-4 0.001071754 0.0 0.0 0.0 0.0 0.0 0.014365128 0.015139921 0.015821077 0.6108969 0.0 7.262848E-11 6.9609435E-12 2.9486428E-6 0.0 1.9796482E-12 1.191022E-10 0.0 0.0 0.0 4.0090938E-11 0.0 0.0 9.886058E-4 8.503806E-4 0.001071754 0.0 0.0 0.0 0.0 0.0 0.015139921 0.014365128 0.015821077 0.6108969 7.262848E-11 0.0 6.9609435E-12 0.0 0.0 0.0 5.1845994E-10 0.0 0.0 0.0 3.3043658E-11 0.0 0.0011530961 0.0011530961 8.230561E-4 0.0 0.0 0.0 1.16041905E-8 0.0 0.0110116685 0.0110116685 0.011093377 0.05422721 1.1468431E-11 1.1468431E-11 7.2601297E-6 0.119130366 0.119130366 0.12394523 0.072315305 0.31303954 0.31303954 0.2514032 0.0 0.7518422 0.7518422 0.6299445 0.0 0.7035929 0.7035929 0.60415554 0.0 0.61962354 0.61962354 0.6660701 0.037348893 2.1053923E-4 2.1053923E-4 8.232122E-8 0.0 0.0 0.482073 0.29475465 0.49222115 0.5762003 0.0 5.674682E-7 8.538268E-7 9.190851E-7 0.0 3.515226E-4 3.661632E-4 3.6741624E-4 0.0 0.14960772 0.21404493 0.18813491 0.23309265 6.7650874E-10 6.7236466E-11 0.0 0.0 0.482073 0.49222115 0.29475465 0.5762003 0.0 8.538268E-7 5.674682E-7 9.190851E-7 0.0 3.661632E-4 3.515226E-4 3.6741624E-4 0.0 0.21404493 0.14960772 0.18813491 0.23309265 6.7236466E-11 6.7650874E-10 0.0 0.312505 0.57824814 0.57824814 0.2639461 0.0 9.25257E-7 9.25257E-7 5.512989E-7 0.0 3.6309374E-4 3.6309374E-4 3.6636658E-4 0.0 0.20384768 0.20384768 0.14476675 0.5554976 1.1615659E-10 1.1615659E-10 2.0976738E-6 3.2246774E-6 3.2246774E-6 1.11205645E-5 0.0 0.77781874 0.77781874 0.7903026 0.0 0.5396093 0.5396093 0.5056534 0.0 0.5787515 0.5787515 0.5328987 2.6941492E-4 0.40818155 0.40818155 0.43171054 1.5407384E-5 1.1458977E-8 0.0 5.361452E-6 2.0882399E-5 2.1883181E-5 0.0 0.15392195 0.3771323 0.42989624 0.0 0.0100513445 0.6218259 0.7085319 0.54400045 2.4944328E-4 0.6541879 0.6208453 1.1458977E-8 0.0 2.0882399E-5 5.361452E-6 2.1883181E-5 0.0 0.3771323 0.15392195 0.42989624 0.0 0.6218259 0.0100513445 0.7085319 0.54400045 0.6541879 2.4944328E-4 0.6208453 0.0 2.0399457E-5 2.0399457E-5 6.58149E-6 0.0 0.37596074 0.37596074 0.15513001 0.0 0.6431407 0.6431407 0.009134202 0.51003695 0.6930358 0.6930358 9.3887496E-7 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.001946797 0.50421375 0.5513288 0.0 1.4264215E-4 0.05016642 0.089934506 0.37701184 2.1201893E-6 0.0014567293 0.0014286658 0.0 0.0 0.50421375 0.001946797 0.5513288 0.0 0.05016642 1.4264215E-4 0.089934506 0.37701184 0.0014567293 2.1201893E-6 0.0014286658 0.0 0.50001305 0.50001305 0.0025723642 0.0 0.10176831 0.10176831 5.1279527E-5 0.23642747 0.0012969028 0.0012969028 1.23697E-6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.7530037E-5 1.8560221E-6 0.0 1.1873907E-4 0.33898923 0.33066684 0.113724075 1.46282555E-5 0.006714971 0.0074403184 1.8560221E-6 0.0 0.33898923 1.1873907E-4 0.33066684 0.113724075 0.006714971 1.46282555E-5 0.0074403184 0.0 0.28422502 0.28422502 8.088246E-5 0.11893501 0.0064812447 0.0064812447 2.7074091E-5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 7.8430145E-5 3.353415E-5 0.0 7.8247586E-10 0.0 1.2729048E-10 3.353415E-5 0.0 0.0 7.8247586E-10 1.2729048E-10 5.8745306E-9 0.0 0.0 3.8161865E-9 2.3002575E-9 2.3002575E-9 3.5615045E-5 4.104819E-9 1.427929E-9 1.427929E-9 0.0023790114 0.0023790114 0.0071481485 0.03750419 0.0 0.0 0.0 0.014500084 0.0 0.0 0.0 0.036137167 1.0338266E-4 1.0338266E-4 0.0 0.01638361 0.0 0.0 3.2176184E-9 0.018217174 0.0 0.0 0.0 0.07284771 0.0 0.0 0.0 0.01986551 3.1403836E-8 3.1403836E-8 3.2696928E-8 0.06713316 0.0 0.0 0.0 0.0 1.0589284E-7 1.0589284E-7 3.9708468E-8 0.0 0.004200184 0.004200184 0.0032128582 0.0 0.005092483 0.005092483 0.0031680453 0.07519544 0.28485492 0.28485492 0.14191681 6.76118E-4 3.578433E-7 0.0 8.326708E-4 0.0 0.0 0.0 0.001486147 0.0 0.0 1.196479E-6 6.903454E-4 0.0 0.0 0.0 0.0023617495 3.0938832E-10 6.0993705E-10 0.0 3.8233953E-8 0.0 0.0 0.0 0.0026129615 0.0 0.0 3.5627206E-8 4.4908846E-4 1.22145405E-8 1.597285E-8 0.0 0.01698128 6.859828E-7 0.0 0.0 1.206655E-4 2.6096057E-8 1.8414703E-8 0.0 2.6952487E-4 2.769847E-4 2.4178395E-5 0.0 8.249793E-6 4.4549837E-5 1.3285429E-4 0.033636115 9.647273E-4 0.0010879525 0.001429445 3.578433E-7 0.0 0.0 8.326708E-4 0.0 0.0 0.0 0.001486147 0.0 1.196479E-6 0.0 6.903454E-4 0.0 0.0 3.0938832E-10 0.0023617495 6.0993705E-10 0.0 0.0 0.0 0.0 0.0 0.0 0.0026129615 0.0 3.5627206E-8 1.22145405E-8 4.4908846E-4 1.597285E-8 0.0 6.859828E-7 0.01698128 0.0 0.0 2.6096057E-8 1.206655E-4 1.8414703E-8 0.0 2.769847E-4 2.6952487E-4 2.4178395E-5 0.0 4.4549837E-5 8.249793E-6 1.3285429E-4 0.033636115 0.0010879525 9.647273E-4 0.001429445 0.0 0.0 0.0 0.004970648 0.0 0.0 0.0 0.0026088313 5.2205363E-5 5.2304777E-6 5.2304777E-6 0.0 0.0 2.6518998E-10 2.6518998E-10 0.0015831307 0.0 0.0 0.0 0.008073251 0.0 0.0 0.0 1.1688586E-4 2.459974E-8 9.4385095E-9 9.4385095E-9 0.0019301558 0.0 2.7161389E-6 2.7161389E-6 0.015867077 0.0 4.4874938E-8 4.4874938E-8 0.0010696596 0.0 9.6759584E-5 9.6759584E-5 0.0013816492 0.0 4.8198624E-4 4.8198624E-4 6.3160414E-6 0.035599258 0.0090317475 0.0090317475 8.059855E-5 0.01030177 0.01030177 0.002660907 0.009068103 0.0054423967 0.0054423967 0.00548319 0.10429945 0.04422914 0.04422914 0.0 0.017098278 0.0 0.0 0.0 0.01808052 0.0 0.0 2.834119E-7 0.012511839 6.3098238E-9 6.3098238E-9 6.3098238E-9 0.030747216 9.561494E-9 9.561494E-9 9.561494E-9 0.022795172 2.1066953E-6 2.1066953E-6 1.0074148E-6 0.0 1.8893722E-4 1.8893722E-4 4.3820646E-6 0.0 0.04636359 0.04636359 0.023247624 0.0 0.05347228 0.05347228 0.04518416 0.024607647 0.08368458 0.08368458 0.1786474 1.527703E-7 0.0059973253 0.009832931 0.0022415037 0.0028436934 0.003947473 0.043129787 0.002132554 0.017458076 0.0 8.27598E-7 0.0040749335 5.3755734E-6 1.5452491E-6 0.0 0.0036751602 0.0 0.0 9.915384E-9 0.002882229 2.3118156E-9 2.3118156E-9 9.9970565E-9 0.0012316186 4.9456683E-9 4.9456683E-9 6.51931E-6 0.0050094593 3.1959507E-4 2.9222889E-5 0.0 4.028549E-7 2.5944937E-5 4.6177403E-5 0.0 9.67074E-5 5.401675E-4 2.9694362E-5 0.0 7.5061835E-6 2.7377924E-5 5.823072E-5 0.020349037 2.9119453E-4 6.7028933E-4 1.6392575E-4 0.0059973253 0.009832931 0.0028436934 0.0022415037 0.003947473 0.043129787 0.017458076 0.002132554 0.0 8.27598E-7 5.3755734E-6 0.0040749335 1.5452491E-6 0.0 0.0 0.0 0.0 9.915384E-9 2.3118156E-9 0.002882229 2.3118156E-9 9.9970565E-9 4.9456683E-9 0.0012316186 4.9456683E-9 6.51931E-6 3.1959507E-4 0.0050094593 2.9222889E-5 0.0 2.5944937E-5 4.028549E-7 4.6177403E-5 0.0 5.401675E-4 9.67074E-5 2.9694362E-5 0.0 2.7377924E-5 7.5061835E-6 5.823072E-5 0.020349037 6.7028933E-4 2.9119453E-4 1.6392575E-4 0.005349994 0.0015559733 0.0015559733 0.008883577 0.022048393 0.005180205 0.005180205 0.0 0.0 3.8379963E-7 3.8379963E-7 0.006739978 0.0 0.0 0.0 0.010312512 9.915384E-9 2.3118156E-9 2.3118156E-9 5.220042E-5 9.9970565E-9 4.9456683E-9 4.9456683E-9 0.0019822896 0.0 1.4928659E-6 1.4928659E-6 0.122016564 0.0 9.651152E-5 9.651152E-5 9.626046E-9 0.0 3.7174366E-4 3.7174366E-4 3.0907194E-4 0.0 1.2576695E-4 1.2576695E-4 3.1663554E-8 0.008585598 4.4388452E-4 4.4388452E-4 5.1301744E-5 4.3098902E-4 4.3098902E-4 2.3888897E-5 0.15357459 0.01063336 0.01063336 0.0 0.031743284 0.0075734127 0.0075734127 0.021449914 0.014139191 0.006886066 0.0 0.028458163 0.016463557 0.0 0.0 0.0 0.05995365 0.0 0.0 1.3585094E-8 0.03870578 0.0077929944 0.0077929944 0.015422478 0.0 8.9887535E-4 8.9887535E-4 2.8007628E-7 0.0 0.029183941 0.029183941 0.00934881 0.0 0.004790154 0.004790154 0.023683049 0.051094007 0.0020810158 0.0020810158 0.0020814019 1.7075615E-6 6.965445E-5 0.036556188 1.1368792E-5 0.032700196 0.0 0.0019299556 0.0016442357 0.0012739038 0.0012542465 0.015910933 5.064538E-5 0.0 0.0020874965 0.0 7.1614224E-4 0.0 0.0 0.0 0.0030964233 0.0 0.0 3.6005778E-4 0.0026347307 0.010216482 0.0017165629 0.0 8.870866E-5 2.014494E-5 3.1837908E-4 0.0 3.4364715E-5 4.2726602E-5 6.120622E-5 0.0 1.197807E-5 9.536705E-5 2.7977506E-4 0.02350308 0.0012698434 7.725933E-4 0.003078127 6.965445E-5 0.036556188 0.032700196 1.1368792E-5 0.0 0.0019299556 0.0012739038 0.0016442357 0.0012542465 0.015910933 0.0024661242 0.0 0.0020874965 0.0 0.0 7.1614224E-4 0.0 0.0 0.0 0.0030964233 0.0 3.6005778E-4 0.010216482 0.0026347307 0.0017165629 0.0 2.014494E-5 8.870866E-5 3.1837908E-4 0.0 4.2726602E-5 3.4364715E-5 6.120622E-5 0.0 9.536705E-5 1.197807E-5 2.7977506E-4 0.02350308 7.725933E-4 0.0012698434 0.003078127 0.02609133 0.0024158712 0.0024158712 0.0 6.2704127E-4 0.0061775413 0.0061775413 0.009510891 0.0063444357 8.62226E-4 0.0 9.633839E-5 0.0 0.0 0.0 0.0021655345 0.0 0.0 0.0 7.662015E-4 0.0015130207 0.013729917 0.013729917 0.020562131 0.0 9.469149E-5 9.469149E-5 3.6549763E-4 0.0 2.0545222E-4 2.0545222E-4 6.6925706E-5 0.0 1.07166394E-4 1.07166394E-4 4.840512E-5 0.030883497 9.4731926E-4 9.4731926E-4 1.5336054E-5 0.01238895 0.01238895 0.0 0.006491774 0.00802174 0.00802174 0.00701375 0.015752647 0.018960956 0.0 0.009502538 0.041468143 0.010624573 0.010624573 0.022569729 0.023212904 6.2157336E-4 6.2157336E-4 0.017145153 0.01876895 0.050275512 0.050275512 0.062563695 0.0 0.020257736 0.020257736 0.035596948 0.0 7.923749E-5 7.923749E-5 1.99586E-4 0.0 0.009047728 0.009047728 0.051041782 0.002439294 0.03922935 0.03922935 0.015621329 2.9608235E-4 0.0 0.0030289728 4.728616E-5 5.9919246E-4 3.8967497E-5 4.0262025E-6 4.2828138E-4 0.0 2.6873466E-5 0.0044629597 4.421803E-4 0.0018528567 1.3231194E-4 0.0013492185 8.8132074E-5 4.2649865E-4 1.2352102E-6 4.8181866E-4 9.3462237E-4 0.0024225407 0.002674259 0.0 1.15188355E-4 2.3773589E-4 1.7885356E-4 0.0 1.5626048E-5 9.00205E-6 2.6692878E-5 0.0 0.0011343211 0.0019248298 0.0019193299 0.0045045265 0.0073072505 0.038003974 0.01618448 0.0 0.0030289728 5.9919246E-4 4.728616E-5 3.8967497E-5 4.0262025E-6 2.6110647E-6 0.0 2.6873466E-5 0.0044629597 0.0018528567 4.421803E-4 1.3231194E-4 0.0013492185 4.2649865E-4 8.8132074E-5 1.2352102E-6 4.8181866E-4 0.0024225407 9.3462237E-4 0.002674259 0.0 2.3773589E-4 1.15188355E-4 1.7885356E-4 0.0 9.00205E-6 1.5626048E-5 2.6692878E-5 0.0 0.0019248298 0.0011343211 0.0019193299 0.0045045265 0.038003974 0.0073072505 0.01618448 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.112903E-5 1.112903E-5 4.527543E-4 0.015835075 0.023042882 0.0 0.004964509 0.14189866 0.023862364 0.023862364 0.009848915 0.030028297 0.0011478607 0.0011478607 0.004422834 0.02052346 0.06466659 0.06466659 0.09675613 0.0 6.968388E-4 6.968388E-4 0.0013390917 0.0 0.034277927 0.034277927 0.036017258 0.0 0.0051674703 0.0051674703 0.024896441 0.036481448 2.6213609E-5 2.6213609E-5 2.3073069E-4 5.4143242E-5 1.462326E-5 0.0013832112 3.2257158E-4 0.0 2.3771844E-4 0.0023284622 1.5815056E-5 2.6234318E-5 3.1704578E-5 0.0024670768 7.663246E-5 9.0641004E-4 6.8061484E-4 0.08596091 0.019624444 0.00954481 0.0022390622 0.0 1.9159472E-5 1.6750475E-4 2.016075E-4 0.0 0.0011092366 0.0010776259 0.00393887 0.0 2.1824926E-4 2.5269936E-4 3.4564643E-4 0.0026934745 5.0103372E-5 6.351708E-5 0.0019605714 1.462326E-5 0.0013832112 2.3114862E-4 0.0 2.3771844E-4 0.0023284622 2.6234318E-5 1.5815056E-5 3.1704578E-5 0.0024670768 9.0641004E-4 7.663246E-5 6.8061484E-4 0.08596091 0.00954481 0.019624444 0.0022390622 0.0 1.6750475E-4 1.9159472E-5 2.016075E-4 0.0 0.0010776259 0.0011092366 0.00393887 0.0 2.5269936E-4 2.1824926E-4 3.4564643E-4 0.0026934745 6.351708E-5 5.0103372E-5 0.0019605714 0.0014402102 4.8426358E-4 0.0 0.0028655338 0.013200438 2.9541512E-5 2.9541512E-5 1.3755937E-4 1.3516403E-5 1.9586565E-5 1.9586565E-5 2.5854853E-4 0.004980899 0.0029931841 0.0029931841 0.015597701 0.0 5.2790955E-4 5.2790955E-4 4.1125098E-4 0.0 3.174463E-5 3.174463E-5 0.0011949221 0.0 0.001433215 0.001433215 3.2742475E-5 0.010578458 6.679066E-4 6.679066E-4 1.14652885E-5 9.914006E-5 0.0 1.19190645E-5 0.08019342 1.4303061E-4 1.4303061E-4 1.2370589E-4 0.11834658 0.0012994263 0.0012994263 0.001060969 0.038441062 0.0075456216 0.0075456216 0.0080822315 0.0 3.4590482E-4 3.4590482E-4 1.602607E-4 0.0 1.0951557E-7 1.0951557E-7 3.0691004E-7 0.0 0.0 0.0 0.0 0.044228546 2.749781E-5 2.749781E-5 5.50976E-6 0.0 6.863519E-5 0.0155281965 3.0703546E-4 2.1572896E-4 7.85123E-4 7.661006E-4 1.7884553E-4 0.0057616234 3.754797E-4 0.027317638 3.0702859E-6 0.001391596 0.0024479367 0.0 5.2019005E-4 8.4023614E-4 0.0018111011 0.0 9.766217E-8 9.523526E-9 7.2332234E-8 0.0 9.499517E-6 8.0338E-4 5.636084E-4 5.189354E-4 0.0010957877 1.6942942E-5 0.0013149177 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.002948253 2.1926258E-4 2.1926258E-4 0.0063900584 0.004918505 1.7916894E-4 1.7916894E-4 0.0026695705 0.031142324 7.2839897E-4 7.2839897E-4 0.0010293194 0.0 0.0041154795 0.0041154795 7.2466163E-4 0.0 2.9993288E-8 2.9993288E-8 9.874853E-12 0.0 5.7721866E-4 5.7721866E-4 0.002379313 4.0789138E-4 7.202673E-4 7.202673E-4 5.7956495E-4 1.3308716E-4 1.3308716E-4 5.633167E-4 0.0059915865 0.0019046805 0.0019046805 0.014123808 0.031788964 0.07913154 0.07913154 0.05017952 0.0 0.0037542179 0.0037542179 0.024014262 0.0 4.359088E-4 4.359088E-4 5.4680844E-5 0.0 3.919458E-4 3.919458E-4 3.7134896E-4 0.012462795 0.001329279 0.001329279 4.912435E-5 3.0935793E-5 9.6470985E-6 0.009837422 7.022162E-4 0.0030676262 0.002569463 0.048349075 0.0021506401 0.0024210338 0.0015677843 0.0 0.0026635916 8.7316934E-4 7.024728E-4 0.0 4.962993E-7 1.0819892E-6 8.322563E-5 0.0 2.0700563E-5 2.0560381E-5 6.758795E-5 0.0023511548 0.0037554682 1.2444497E-4 3.6433045E-4 9.6470985E-6 0.009837422 0.0030676262 7.022162E-4 0.002569463 0.048349075 0.0024210338 0.0021506401 0.0015677843 0.0 8.7316934E-4 0.0026635916 7.024728E-4 0.0 1.0819892E-6 4.962993E-7 8.322563E-5 0.0 2.0560381E-5 2.0700563E-5 6.758795E-5 0.0023511548 1.2444497E-4 0.0037554682 3.6433045E-4 0.0015678448 5.0924503E-4 5.0924503E-4 1.0722422E-7 0.0065523074 1.1995197E-5 1.1995197E-5 0.0014000477 0.0 5.82548E-4 5.82548E-4 0.0018138187 0.0 8.254919E-6 8.254919E-6 2.8230086E-5 0.0 7.039712E-5 7.039712E-5 8.023952E-6 6.7584447E-6 5.944971E-4 5.944971E-4 0.0012353025 5.4213668E-8 5.4213668E-8 1.5144989E-5 0.04500582 0.005586295 0.005586295 0.007646733 0.0 8.595801E-5 8.595801E-5 2.708631E-4 0.0 0.0069739083 0.0069739083 0.008364978 0.0 0.0021741667 0.0021741667 0.002142153 3.3520238E-4 0.009597263 0.009597263 3.8065357E-6 4.3349523E-6 6.4579517E-6 0.0035443017 2.564746E-4 2.9973505E-4 4.2450635E-4 0.0 1.7478451E-4 3.179842E-4 1.5814633E-4 0.0 4.2669894E-6 5.3151693E-6 2.650366E-6 0.0 6.224134E-4 6.4598897E-4 2.2640277E-4 5.8142654E-5 7.232808E-4 4.3805276E-6 1.6065159E-6 6.4579517E-6 0.0035443017 2.9973505E-4 2.564746E-4 4.2450635E-4 0.0 3.179842E-4 1.7478451E-4 1.5814633E-4 0.0 5.3151693E-6 4.2669894E-6 2.650366E-6 0.0 6.4598897E-4 6.224134E-4 2.2640277E-4 5.8142654E-5 4.3805276E-6 7.232808E-4 1.6065159E-6 0.003107791 2.2158973E-4 2.2158973E-4 2.5738517E-4 0.0 2.8780536E-4 2.8780536E-4 4.299376E-4 0.0 6.2011977E-6 6.2011977E-6 2.6085922E-7 0.0 5.2356836E-4 5.2356836E-4 3.5148354E-5 6.865477E-5 3.063562E-4 3.063562E-4 8.7164633E-4 4.1119292E-6 4.1119292E-6 8.992156E-5 0.0 0.010470388 0.010470388 0.028080784 0.0 0.0023407969 0.0023407969 9.500448E-4 0.0 0.0064542717 0.0064542717 0.0034956518 0.06485555 0.010054078 0.010054078 0.0015959141 4.29468E-6 3.4846445E-5 0.0 2.7501912E-4 0.0032689415 5.503296E-4 0.0 0.0011817481 6.3315564E-4 9.548449E-4 0.0 8.851969E-4 0.0014185378 0.0018570608 0.0016550004 0.0028765851 0.0046872413 0.0025943941 3.4846445E-5 0.0 0.0032689415 2.7501912E-4 5.503296E-4 0.0 6.3315564E-4 0.0011817481 9.548449E-4 0.0 0.0014185378 8.851969E-4 0.0018570608 0.0016550004 0.0046872413 0.0028765851 0.0025943941 0.0 0.013213835 0.013213835 0.00854056 0.0 3.779944E-4 3.779944E-4 0.0031714856 0.0 6.275072E-4 6.275072E-4 0.0016153408 0.0037610426 0.0074593667 0.0074593667 0.0034076776 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.013953042 0.015543323 0.0 4.6677777E-4 8.224327E-5 0.0011783126 0.0 2.481987E-4 1.0650459E-4 1.7484637E-4 0.0028245514 0.003750977 0.0085973125 0.0018834377 0.015543323 0.0 8.224327E-5 4.6677777E-4 0.0011783126 0.0 1.0650459E-4 2.481987E-4 1.7484637E-4 0.0028245514 0.0085973125 0.003750977 0.0018834377 0.0 5.097837E-4 5.097837E-4 0.001523296 0.0 3.709473E-4 3.709473E-4 8.033278E-5 0.0031795548 0.004846344 0.004846344 4.7803955E-5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 5.731961E-6 1.7978258E-4 0.0 0.0054182704 0.015185609 0.018535962 0.0019503222 0.0019166789 0.0031840692 0.0037062864 1.7978258E-4 0.0 0.015185609 0.0054182704 0.018535962 0.0019503222 0.0031840692 0.0019166789 0.0037062864 0.0 0.017383177 0.017383177 0.014235004 7.1351585E-4 0.0036093092 0.0036093092 5.781307E-4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.004507234 0.011681524 0.0016235076 0.0049969098 0.0046261344 5.9950934E-4 0.011681524 0.0016235076 0.0046261344 0.0049969098 5.9950934E-4 0.0024438119 0.0028536334 0.0028536334 5.899711E-4 0.013490785 0.013490785 0.007755865 4.5940364E-4 4.096394E-4 4.096394E-4 diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/action_translation.cc b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/action_translation.cc new file mode 100644 index 0000000..f0dd614 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/action_translation.cc @@ -0,0 +1,141 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/universal_poker/logic/action_translation.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel::universal_poker::logic { + +RandomizedPsuedoHarmonicActionTranslation CalculatePsuedoHarmonicMapping( + int opponent_bet, int pot_size, std::vector action_abstraction) { + // We assume the user may not have sorted + de-duped the action abstraction, + // which are properties that we will want to be able to rely on below. + std::vector sorted_action_abstraction = action_abstraction; + std::sort(sorted_action_abstraction.begin(), sorted_action_abstraction.end()); + sorted_action_abstraction.erase( + std::unique(sorted_action_abstraction.begin(), + sorted_action_abstraction.end()), + sorted_action_abstraction.end()); + + if (sorted_action_abstraction.size() < 2) { + SpielFatalError("Action abstraction must have at least two unique values."); + } + // Action-s with value 0 and 1 map to fold and check/call, so are not valid + // bet sizes to translate the opponent's bet to. + if (sorted_action_abstraction[0] <= 1) { + SpielFatalError( + "Action abstraction Action-s must be bets, not folds or check/calls."); + } + + // If their bet is one of the translated actions, or outside the bounds of our + // translated actions, we don't need to actually do the math. Since simply + // using said translated action at 100% frequency will always be the best we + // can possibly do (and is what they do in the paper!) + if (absl::c_binary_search(sorted_action_abstraction, opponent_bet)) { + return RandomizedPsuedoHarmonicActionTranslation{ + .smaller_bet = opponent_bet, + .probability_a = 1.0, + // Should never be used, setting to the same value just to be safe + // though. + .larger_bet = opponent_bet, + .probability_b = 0.0, + }; + } + Action abstraction_min = sorted_action_abstraction[0]; + if (opponent_bet < abstraction_min) { + return RandomizedPsuedoHarmonicActionTranslation{ + .smaller_bet = abstraction_min, + .probability_a = 1.0, + // Should never be used, setting to the same value just to be safe + // though. + .larger_bet = abstraction_min, + .probability_b = 0.0, + }; + } + Action abstraction_max = + sorted_action_abstraction[sorted_action_abstraction.size() - 1]; + if (opponent_bet > abstraction_max) { + return RandomizedPsuedoHarmonicActionTranslation{ + // Should never be used, setting to the same value just to be safe + // though. + .smaller_bet = abstraction_max, + .probability_a = 0.0, + .larger_bet = abstraction_max, + .probability_b = 1.0, + }; + } + // If we reach this point, that means their bet is somewhere in between two of + // the Action-s in the action abstraction. So we will need to 1. figure out + // which two Action-s those are, and 2. calculate the randomized + // pseudo-harmonic mapping to choose between them. + + // i=1 since if the first action in the 0 index was greater, then the checks + // above would have returned early and we wouldn't have reached this point. + Action translated_smaller_bet = abstraction_min; + Action translated_larger_bet = abstraction_max; + for (size_t i = 1; i < sorted_action_abstraction.size(); ++i) { + if (sorted_action_abstraction[i] > opponent_bet) { + translated_smaller_bet = sorted_action_abstraction[i - 1]; + translated_larger_bet = sorted_action_abstraction[i]; + break; + } + if (i == sorted_action_abstraction.size() - 1) { + SpielFatalError("Could not find bounding actions for the opponent's bet " + "in the action abstraction."); + } + } + + // As per the paper, scaling everything down by the pot size to determine the + // canonical "A" "B" and "x" values that go into the formula. + // + // (Oddly the paper implies that this automatically happens as a consequence + // of their function, e.g. they explicitly stated that + // ∀k > 0, x ∈ [A, B], f_kA,kB (kx) = f_A,B (x). + // But this is clearly not true unless we scale everything such that the pot + // size is '1' _before_ plugging it in. ... which they themselves also did in + // the paper.) + double psuedo_harmonic_a = static_cast(translated_smaller_bet) / + static_cast(pot_size); + double psuedo_harmonic_b = static_cast(translated_larger_bet) / + static_cast(pot_size); + double psuedo_harmonic_x = + static_cast(opponent_bet) / static_cast(pot_size); + + // As specified in the paper: + // + // f_A,B (x) = ((B - x)(1 + A)) / ((B - A)(1 + x)) + // + // where A is the smaller bet size, B is the larger bet size, and x is the + // incoming bet size. (Which calculates specifically the probability that the + // smaller bet size should be chosen). + double probability_a = + ((psuedo_harmonic_b - psuedo_harmonic_x) * (1 + psuedo_harmonic_a)) / + ((psuedo_harmonic_b - psuedo_harmonic_a) * (1 + psuedo_harmonic_x)); + double probability_b = 1.0 - probability_a; + + return RandomizedPsuedoHarmonicActionTranslation{ + .smaller_bet = translated_smaller_bet, + .probability_a = probability_a, + .larger_bet = translated_larger_bet, + .probability_b = probability_b, + }; +} + +} // namespace open_spiel::universal_poker::logic diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/action_translation.h b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/action_translation.h new file mode 100644 index 0000000..e7ac806 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/action_translation.h @@ -0,0 +1,83 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_UNIVERSAL_POKER_LOGIC_ACTION_TRANSLATION_H_ +#define OPEN_SPIEL_GAMES_UNIVERSAL_POKER_LOGIC_ACTION_TRANSLATION_H_ + +#include + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace universal_poker { +namespace logic { + +struct RandomizedPsuedoHarmonicActionTranslation { + // Used to calculate "A" for f_A,B (x) as per the paper, ie the smaller bet + // size (after scaling everything down such that pot=1). + Action smaller_bet; + // How frequently to use the above smaller bet size. E.g. 0.5 means 50% of the + // time. This value + probability_b should sum to 1.0; if this value is set to + // 1.0 (100%) then the upper bet should never be used. + double probability_a; + + // Used to calculate "B" for f_A,B (x) as per the paper, ie the larger bet + // size (after scaling everything down such that pot=1). + Action larger_bet; + // How frequently to use the above larger bet size. E.g. 0.5 means 50% of the + // time. This value + probability_a should sum to 1.0; if this value is set to + // 1.0 (100%) then the lower bet should never be used. + double probability_b; +}; + +// Implementation of the randomized pseudo-harmonic action translation algorithm +// for the universal_poker game. +// +// For more details see: +// - the supplementary materials from the 2019 paper "Superhuman AI for +// multiplayer poker" by Noam Brown and Tuomas Sandholm +// - the 2013 paper "Action Translation in Extensive-Form Games with Large +// Action Spaces: Axioms, Paradoxes, and the Pseudo-Harmonic Mapping" by Sam +// Ganzfried and Tuomas Sandholm. +// +// If the opponent's bet is outside the bounds of the action abstraction, or +// exactly equal to one of the translated actions, it will be translated to the +// singular closest such value at 100% frequency. Specifically: +// - 'smaller_bet' will be arbitrarily set to 100% frequency if the opponent's +// bet is equal to an Action in the action abstraction, or less than the +// smallest Action in the action abstraction. +// - 'larger_bet' will be arbitrarily set to 100% frequency if the opponent's +// bet is greater than the larget Action in the action abstraction. +RandomizedPsuedoHarmonicActionTranslation CalculatePsuedoHarmonicMapping( + // The original bet size in chips of the opponent to be translated. If + // outside the bounds of the action abstraction, will be translated to the + // closest value. If equal to one of the translated actions, will be + // translated to that action at 100% frequency. + int opponent_bet, + // Number of_chips currently in the pot. + // Used to provide "scale invariance" property. Specifically, we scale down + // everything so that it's calculated relative to a pot size of 1 when doing + // the math. + int pot_size, + // A subset of the valid Action-s for the game. + // Used to determine which Actions to translate the opponent's bet to. + // Must contain at least two unique values, and must contain only values + // that are >=2 (ie no fold or check/call Action). + std::vector action_abstraction); + +} // namespace logic +} // namespace universal_poker +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_UNIVERSAL_POKER_LOGIC_ACTION_TRANSLATION_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/action_translation_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/action_translation_test.cc new file mode 100644 index 0000000..cce4045 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/action_translation_test.cc @@ -0,0 +1,188 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/universal_poker/logic/action_translation.h" + +#include +#include +#include + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace universal_poker { +namespace logic { + +// As per Table 1 in the paper, "Rand-psHar" holding 'B'=1 and 'x'=0.25, but +// moving 'A' between various different values. +void TestRandPSHarPaperResults() { + // The paper only calculated out to 3 decimal places. + double tolerance = 0.001; + + int pot = 2000; + int untranslated_bet = 500; // 0.25 * pot + + // Moving A in [0.001, 0.01, 0.05, 0.1] + RandomizedPsuedoHarmonicActionTranslation result_thousandth = + CalculatePsuedoHarmonicMapping(untranslated_bet, pot, {2, pot}); + RandomizedPsuedoHarmonicActionTranslation result_hundredth + = CalculatePsuedoHarmonicMapping(untranslated_bet, pot, {20, pot}); + RandomizedPsuedoHarmonicActionTranslation result_twentieth + = CalculatePsuedoHarmonicMapping(untranslated_bet, pot, {100, pot}); + RandomizedPsuedoHarmonicActionTranslation result_tenth + = CalculatePsuedoHarmonicMapping(untranslated_bet, pot, {200, pot}); + + // Direct values from the paper. + SPIEL_CHECK_LT(std::abs(result_thousandth.probability_a - 0.601), tolerance); + SPIEL_CHECK_LT(std::abs(result_hundredth.probability_a - 0.612), tolerance); + SPIEL_CHECK_LT(std::abs(result_twentieth.probability_a - 0.663), tolerance); + SPIEL_CHECK_LT(std::abs(result_tenth.probability_a - 0.733), tolerance); + + // Corresponding percentages calculated by subtracting each above from 1. + SPIEL_CHECK_LT(std::abs(result_thousandth.probability_b - 0.399), tolerance); + SPIEL_CHECK_LT(std::abs(result_hundredth.probability_b - 0.388), tolerance); + SPIEL_CHECK_LT(std::abs(result_twentieth.probability_b - 0.337), tolerance); + SPIEL_CHECK_LT(std::abs(result_tenth.probability_b - 0.267), tolerance); +} + +// Again per the Table 1, Rand-psHar holding B=1 and x=0.25, with A=0.1. But +// now testing scale invariance - ie that when multiplying all three of A, B, +// and x by any constant multiplicative factor k > 0 that it doesn't change the +// results. +void TestRandPSHarPaperResultScaleInvariance() { + // The paper only calculated out to 3 decimal places. + const double tolerance = 0.001; + + const Action opponent_bet = 5; + const int pot = 20; + const Action small_bet = 2; + const Action large_bet = 20; // B = pot + for (int i = 1; i <= 8; ++i) { + // [10^1, 10^2, ..., 10^8] + int scale = pow(10, i); + + Action scaled_opponent_bet = opponent_bet * scale; + Action scaled_small_bet = small_bet * scale; + int scaled_pot = pot * scale; + Action scaled_large_bet = large_bet * scale; + + RandomizedPsuedoHarmonicActionTranslation result = + CalculatePsuedoHarmonicMapping( + scaled_opponent_bet, + scaled_pot, + {scaled_small_bet, scaled_large_bet}); + + SPIEL_CHECK_EQ(result.smaller_bet, scaled_small_bet); + SPIEL_CHECK_LT(std::abs(result.probability_a - 0.733), tolerance); + SPIEL_CHECK_EQ(result.larger_bet, scaled_large_bet); + SPIEL_CHECK_LT(std::abs(result.probability_b - 0.267), tolerance); + } +} + +void TestRandPSHarMappingExactMatch() { + int untranslated_bet = 200; // pot sized bet => matches 1.0 + int pot = 200; + std::vector action_abstraction = {100, 200, 400, 600, 20000}; + + RandomizedPsuedoHarmonicActionTranslation result = + CalculatePsuedoHarmonicMapping(untranslated_bet, pot, action_abstraction); + + SPIEL_CHECK_EQ(result.smaller_bet, 200); + SPIEL_CHECK_EQ(result.probability_a, 1.0); + // We don't care what the larger bet is, just that it's never used. + SPIEL_CHECK_EQ(result.probability_b, 0.0); +} + +void TestCalculatesMedianFiftyFifty() { + // For f_A,B (x) = (B - x)(1 + A) / (B - A)(1 + x), the "median" of f `x*` + // where each translated action should be 50% chance is (as per the paper): + // + // x* = (A + B + 2AB) / (A + B + 2) + // + // Using A=0.2 B=0.5, x* = .9/2.7 = 1/3. + int pot = 300; + int untranslated_bet = 100; + std::vector action_abstraction = {3, 30, 60, 150, 300}; + + // Only imprecision should be that inherent to using doubles. Since in reality + // this bet size should result in _exactly_ choosing each at 50% frequency. + double tolerance = 1E-12; + + RandomizedPsuedoHarmonicActionTranslation result = + CalculatePsuedoHarmonicMapping(untranslated_bet, pot, action_abstraction); + + SPIEL_CHECK_EQ(result.smaller_bet, 60); // 0.2 * 300 = 60 + SPIEL_CHECK_LT(std::abs(result.probability_a - 0.5), tolerance); + SPIEL_CHECK_EQ(result.larger_bet, 150); // 0.5 * 300 = 150 + SPIEL_CHECK_LT(std::abs(result.probability_b - 0.5), tolerance); +} + +void TestShortCircuitsBelowMinAbstractionBet() { + int untranslated_bet = 25; + int pot = 300; + std::vector action_abstraction = {150, 300}; + + RandomizedPsuedoHarmonicActionTranslation result = + CalculatePsuedoHarmonicMapping(untranslated_bet, pot, action_abstraction); + + SPIEL_CHECK_EQ(result.smaller_bet, 150); + SPIEL_CHECK_EQ(result.probability_a, 1.0); + // Don't care what the larger bet is, just that it's never used. + SPIEL_CHECK_EQ(result.probability_b, 0.0); +} + +void TestShortCircuitsAboveMaxAbstractionBet() { + int untranslated_bet = 600; + int pot = 300; + std::vector action_abstraction = {225, 300, 375}; + + RandomizedPsuedoHarmonicActionTranslation result = + CalculatePsuedoHarmonicMapping(untranslated_bet, pot, action_abstraction); + + SPIEL_CHECK_EQ(result.probability_a, 0.0); + // Don't care what the smaller bet is, just that it's never used. + SPIEL_CHECK_EQ(result.larger_bet, 375); + SPIEL_CHECK_EQ(result.probability_b, 1.0); +} + +void TestUnsortedNonUniqueActionAbstraction() { + double tolerance = 0.001; + int untranslated_bet = 375; + int pot = 200; + std::vector action_abstraction = {400, 300, 150, 200, 150, 300}; + + RandomizedPsuedoHarmonicActionTranslation result = + CalculatePsuedoHarmonicMapping(untranslated_bet, pot, action_abstraction); + + SPIEL_CHECK_EQ(result.smaller_bet, 300); + SPIEL_CHECK_LT(std::abs(result.probability_a - 0.217), tolerance); + SPIEL_CHECK_EQ(result.larger_bet, 400); + SPIEL_CHECK_LT(std::abs(result.probability_b - 0.783), tolerance); +} + +} // namespace logic +} // namespace universal_poker +} // namespace open_spiel + +int main(int argc, char **argv) { + // RandPSHar as in the "Rand-psHar" abbreviation used by the paper for the + // relevant row in the relevant table. + open_spiel::universal_poker::logic::TestRandPSHarPaperResults(); + open_spiel::universal_poker::logic::TestRandPSHarPaperResultScaleInvariance(); + open_spiel::universal_poker::logic::TestRandPSHarMappingExactMatch(); + open_spiel::universal_poker::logic::TestCalculatesMedianFiftyFifty(); + open_spiel::universal_poker::logic::TestShortCircuitsBelowMinAbstractionBet(); + open_spiel::universal_poker::logic::TestShortCircuitsAboveMaxAbstractionBet(); + open_spiel::universal_poker::logic::TestUnsortedNonUniqueActionAbstraction(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/card_set.cc b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/card_set.cc new file mode 100644 index 0000000..7caaf11 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/card_set.cc @@ -0,0 +1,163 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/universal_poker/logic/card_set.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/spiel_utils.h" + +constexpr absl::string_view kSuitChars = "cdhs"; +constexpr absl::string_view kRankChars = "23456789TJQKA"; + +extern "C" { +#include "open_spiel/games/universal_poker/acpc/project_acpc_server/evalHandTables" +#include "open_spiel/games/universal_poker/acpc/project_acpc_server/game.h" +} + +namespace open_spiel::universal_poker::logic { + +// Returns the lexicographically next permutation of the supplied bits. +// See https://graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation +uint64_t bit_twiddle_permute(uint64_t v) { + uint64_t t = v | (v - 1); + uint64_t u = ((~t & -~t) - 1); + int shift = __builtin_ctzl(v) + 1; + // Shifting by 64 bits or more is undefined behaviour, so we must avoid it. + // See for example: http://c0x.coding-guidelines.com/6.5.7.html (1185). + u = (shift < 64) ? (u >> shift) : 0; + uint64_t w = (t + 1) | u; + return w; +} + +CardSet::CardSet(std::string cardString) : cs() { + SPIEL_CHECK_LE(cardString.size(), 10); // Max 5 cards. + SPIEL_CHECK_EQ(cardString.size() % 2, 0); // Each cards is 2 chars: RankSuit. + + for (int i = 0; i < cardString.size(); i += 2) { + char rankChr = cardString[i]; + char suitChr = cardString[i + 1]; + + uint8_t rank = (uint8_t)(kRankChars.find(rankChr)); + uint8_t suit = (uint8_t)(kSuitChars.find(suitChr)); + SPIEL_CHECK_LT(rank, MAX_RANKS); + SPIEL_CHECK_LT(suit, MAX_SUITS); + cs.bySuit[suit] |= ((uint16_t)1 << rank); + } +} + +CardSet::CardSet(std::vector cards) : cs() { + for (int i = 0; i < cards.size(); ++i) { + int rank = rankOfCard(cards[i]); + int suit = suitOfCard(cards[i]); + + cs.bySuit[suit] |= ((uint16_t)1 << rank); + } +} + +CardSet::CardSet(uint16_t num_suits, uint16_t num_ranks) : cs() { + for (uint16_t r = 0; r < num_ranks; r++) { + for (uint16_t s = 0; s < num_suits; s++) { + cs.bySuit[s] |= ((uint16_t)1 << r); + } + } +} + +std::string CardSet::ToString() const { + std::string result; + for (int r = MAX_RANKS - 1; r >= 0; r--) { + for (int s = MAX_SUITS - 1; s >= 0; s--) { + uint32_t mask = (uint32_t)1 << r; + if (cs.bySuit[s] & mask) { + absl::StrAppend(&result, std::string(1, kRankChars[r]), + std::string(1, kSuitChars[s])); + } + } + } + + return result; +} + +std::vector CardSet::ToCardArray() const { + std::vector result(NumCards(), 0); + + int i = 0; + for (int r = 0; r < MAX_RANKS; ++r) { + for (int s = 0; s < MAX_SUITS; ++s) { + uint32_t mask = (uint32_t)1 << r; + if (cs.bySuit[s] & mask) { + result[i++] = makeCard(r, s); + } + } + } + return result; +} + +void CardSet::AddCard(uint8_t card) { + int rank = rankOfCard(card); + int suit = suitOfCard(card); + + cs.bySuit[suit] |= ((uint16_t)1 << rank); +} + +void CardSet::RemoveCard(uint8_t card) { + int rank = rankOfCard(card); + int suit = suitOfCard(card); + + cs.bySuit[suit] ^= ((uint16_t)1 << rank); +} + +int CardSet::NumCards() const { return __builtin_popcountl(cs.cards); } + +int CardSet::RankCards() const { + ::Cardset csNative; + csNative.cards = cs.cards; + return rankCardset(csNative); +} + +std::vector CardSet::SampleCards(int nbCards) { + std::vector combinations; + + uint64_t p = 0; + for (int i = 0; i < nbCards; ++i) { + p += (1 << i); + } + // Enumerates all the uint64_t integers that with nbCards 1-bits. + // The final n is ignored. It is fine as long as the rank < 16. + for (uint64_t n = bit_twiddle_permute(p); n > p; + p = n, n = bit_twiddle_permute(p)) { + // Checks whether the CardSet represented by p is inside the CardSet. + uint64_t combo = p & cs.cards; + if (__builtin_popcountl(combo) == nbCards) { + CardSet c; + c.cs.cards = combo; + combinations.emplace_back(c); + } + } + + // std::cout << "combinations.size() " << combinations.size() << std::endl; + return combinations; +} + +bool CardSet::ContainsCards(uint8_t card) const { + int rank = rankOfCard(card); + int suit = suitOfCard(card); + return (cs.bySuit[suit] & ((uint16_t)1 << rank)) > 0; +} + +} // namespace open_spiel::universal_poker::logic diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/card_set.h b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/card_set.h new file mode 100644 index 0000000..1f62941 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/card_set.h @@ -0,0 +1,73 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_CARD_SET_H +#define OPEN_SPIEL_CARD_SET_H + +#include +#include +#include + +namespace open_spiel { +namespace universal_poker { +namespace logic { + +constexpr int kMaxSuits = 4; // Also defined in ACPC game.h + +// This is an equivalent wrapper to acpc evalHandTables.Cardset. +// It stores the cards for each color over 16 * 4 bits. The use of a Union +// allows to access only a specific color (16 bits) using bySuit[color]. +// A uint8_t card is defined by the integer * MAX_SUITS + +class CardSet { + public: + union CardSetUnion { + CardSetUnion() : cards(0) {} + uint16_t bySuit[kMaxSuits]; + uint64_t cards; + } cs; + + public: + CardSet() : cs() {} + CardSet(std::string cardString); + CardSet(std::vector cards); + // Returns a set containing num_ranks cards per suit for num_suits. + CardSet(uint16_t num_suits, uint16_t num_ranks); + + std::string ToString() const; + // Returns the cards present in this set in ascending order. + std::vector ToCardArray() const; + + // Add a card, as MAX_RANKS * + to the CardSet. + void AddCard(uint8_t card); + // Toogle (does not remove) the bit associated to `card`. + void RemoveCard(uint8_t card); + bool ContainsCards(uint8_t card) const; + + int NumCards() const; + // Returns the ranking value of this set of cards as evaluated by ACPC. + int RankCards() const; + + // Returns all the possible nbCards-subsets of this CardSet. + std::vector SampleCards(int nbCards); +}; + +// Returns the lexicographically next permutation of the supplied bits. +// See https://graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation +uint64_t bit_twiddle_permute(uint64_t v); + +} // namespace logic +} // namespace universal_poker +} // namespace open_spiel + +#endif // OPEN_SPIEL_CARD_SET_H diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/card_set_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/card_set_test.cc new file mode 100644 index 0000000..df739bb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/card_set_test.cc @@ -0,0 +1,53 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/universal_poker/logic/card_set.h" + +#include + +namespace open_spiel { +namespace universal_poker { +namespace logic { + +void BasicCardSetTests() { + CardSet cs("AhKsQhJhTh"); + + std::cout << "CardSet: " << cs.ToString() << std::endl; + for (auto card : cs.ToCardArray()) { + std::cout << "Card: " << card << std::endl; + } + std::cout << "Rank: " << cs.RankCards() << std::endl; + std::cout << "Count Cards: " << cs.NumCards() << std::endl; + + CardSet deck(4, 13); + std::cout << "CardSet: " << deck.ToString() << std::endl; + std::cout << "Rank: " << deck.RankCards() << std::endl; + std::cout << "Count Cards: " << deck.NumCards() << std::endl; + + for (auto combo : deck.SampleCards(3)) { + std::cout << "CardSet: " << combo.ToString() << std::endl; + } + + for (auto combo : deck.SampleCards(1)) { + std::cout << "CardSet: " << combo.ToString() << std::endl; + } +} + +} // namespace logic +} // namespace universal_poker +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::universal_poker::logic::BasicCardSetTests(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/gamedef.cc b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/gamedef.cc new file mode 100644 index 0000000..c16acc4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/gamedef.cc @@ -0,0 +1,199 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/universal_poker/logic/gamedef.h" + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/ascii.h" +#include "open_spiel/abseil-cpp/absl/strings/match.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_replace.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel::universal_poker::logic { + +constexpr char kGamedef[] = "gamedef"; +constexpr char kEndGamedef[] = "end gamedef"; + +std::string GamedefToOpenSpielParameters(const std::string& acpc_gamedef) { + if (acpc_gamedef.empty()) { + SpielFatalError("Input ACPC gamedef was empty."); + } + + if (!StrContainsIgnoreCase(acpc_gamedef, kGamedef)) { + SpielFatalError(absl::StrCat("ACPC gamedef does not contain 'GAMEDEF': ", + acpc_gamedef)); + } + + // Check the GAMEDEF/END GAMEDEF statements are valid and not something like + // e.g. 'GAMEDEFfoo' or 'SPEND GAMEDEF'. + // + // GAMEDEF either is the very first line, in which case it should be followed + // by an "\n", or it is not, in which case it should be both followed by an + // "\n" AND also prefixed by another "\n". + if (!absl::StartsWithIgnoreCase(acpc_gamedef, absl::StrCat(kGamedef, "\n")) && + !StrContainsIgnoreCase(acpc_gamedef, + absl::StrCat("\n", kGamedef, "\n"))) { + SpielFatalError( + absl::StrCat("ACPC gamedef does not have 'GAMEDEF' on its own line " + "(please remove any trailing or prefixed characters, " + "including whitespace):", + acpc_gamedef)); + } + // END GAMEDEF either is the very last line, in which case it should be + // prefixed by an "\n", or it is not, in which case it should be both prefixed + // by an "\n" AND also followed by another "\n". + if (!StrContainsIgnoreCase(acpc_gamedef, kEndGamedef)) { + SpielFatalError(absl::StrCat( + "ACPC gamedef does not contain 'END GAMEDEF': ", acpc_gamedef)); + } + if (!absl::EndsWithIgnoreCase(acpc_gamedef, + absl::StrCat("\n", kEndGamedef)) && + !StrContainsIgnoreCase(acpc_gamedef, + absl::StrCat("\n", kEndGamedef, "\n"))) { + SpielFatalError( + absl::StrCat("ACPC gamedef does not have an 'END GAMEDEF' on its own " + "line (please remove any trailing or prefixed characters, " + "including whitespace):", + acpc_gamedef)); + } + + // As per definition of gamedef -> "case is ignored". So we will normalize to + // lowercase initially / when initially processing it. (Note: we will have to + // 'correct' the capitalization for all our keys down below at the end. Since + // OpenSpiel itself *does* care about capitalization, unlike the official ACPC + // gamedef definition.) + std::string gamedef_normalized = + absl::AsciiStrToLower(absl::StripAsciiWhitespace(acpc_gamedef)); + + std::vector open_spiel_state_args = {}; + + // Gamedef's definition states that: "Empty lines or lines with '#' as the + // very first character will be ignored". (Note that this means we do NOT want + // to treat '#' like code comments, which normally take affect even in the + // middle of a line.) + // Additionally, we want to skip doing anything for the 'gamedef' and + // 'end gamedef' lines (now that we've verified they appear in it somewhere) + // because they're not needed for the Open Spiel game state. + const auto is_useful_line = [](absl::string_view line) { + return !line.empty() && line[0] != '#' && line != kGamedef && + line != kEndGamedef; + }; + std::vector lines = absl::StrSplit(gamedef_normalized, '\n'); + for (const auto& line : lines) { + // Skip lines that are not useful. + if (!is_useful_line(line)) { continue; } + + // EDGE CASE: we should only see exactly one of either 'limit' or 'nolimit', + // and it should be on its own line. TLDR it's like 'END GAMEDEF' in that + // it's atypical / has no '=' in it, which would interfere with our + // processing below. (Hence why we're immediately taking care of it here.) + if ((line == "limit") || (line == "nolimit")) { + open_spiel_state_args.push_back(absl::StrCat("betting=", line)); + continue; + } + // else line must be of the following form: key[ ]=[ ]val1[ val2 val3 ...] + + if (!absl::StrContains(line, '=')) { + SpielFatalError( + absl::StrCat("Gamedef line is missing its '=' character: ", line)); + } + std::vector key_and_values = absl::StrSplit(line, '='); + + if (key_and_values.size() != 2) { + SpielFatalError( + absl::StrCat("Gamedef line has wrong number of components: ", line)); + } + auto key = std::string(absl::StripAsciiWhitespace(key_and_values[0])); + // Note that "values" is plural on purpose - it has potentially multiple, + // space-separated things in it! + auto values = std::string(absl::StripAsciiWhitespace(key_and_values[1])); + + // EDGE CASE: + // There's a bug with a downstream serializer that gets confused and errors + // if it receives a single value in places that can potentially be multiple + // values, e.g. firstPlayer value '1' vs '1 1' (regardless of the actual + // number of players / betting rounds / etc). + // + // With the exception of the 'blind' input, there is typically no meaningful + // difference between the value appearing a single time, vs the same exact + // value appearing twice (separated by a space). So, as a workaround we + // manually convert the former form to the latter. + // + // Yes, this is hacky. But it's also the most durable option we have until + // we can go fix the downstream issue :) + const std::set optionally_multi_round_parameters = { + "firstplayer", "raisesize", "maxraises", "numboardcards", "stack"}; + if (optionally_multi_round_parameters.find(key) != + optionally_multi_round_parameters.end() && !values.empty() && + !absl::StrContains(values, " ")) { + // Note: "values" is a single integer if in this section (hence why we're + // having this problem to begin with; see above for more details). + + // Note: this line has a potentially multi-round value defined in terms of + // single round. Transforming the value into another that is equivalent, + // but defined multi-round, to prevent downstream deserializer errors.; + + values = absl::StrCat(values, " ", values); + // Transformed value into another that is equivalent, but defined as + // multi-round + } + + open_spiel_state_args.push_back(absl::StrCat(key, "=", values)); + } + std::string lowercase_open_spiel_game_state = absl::StrCat( + "universal_poker(", absl::StrJoin(open_spiel_state_args, ","), ")"); + + // See below - unlike the input ACPC gamedef (where casing is ignored), + // OpenSpiel will actually error at runtime if the arg keys aren't capitalized + // in the exact way it expects. + // (Note: deliberately including things like e.g. bettingAbstraction that are + // not actually valid params for the ACPC gamedef to avoid future bugs). + static const char* const kPossibleGameStateKeysCapitalized[] = { + "betting", "bettingAbstraction", + "blind", "boardCards", + "firstPlayer", "gamedef", + "handReaches", "maxRaises", + "numBoardCards", "numHoleCards", + "numPlayers", "numRanks", + "numRounds", "numSuits", + "potSize", "raiseSize", + "stack", + }; + std::vector> replacements = {}; + for (const std::string& capitalized_key : kPossibleGameStateKeysCapitalized) { + std::string lowercase_key = absl::AsciiStrToLower(capitalized_key); + if (capitalized_key == lowercase_key) { + continue; + } + + // Regardless of order, at this point we know each parameter either is at + // the start - and following an open paren - or is comma-separated from + // the preceding parameter. Hence we can look for a preceding "(" or ",". + replacements.push_back(std::make_pair(absl::StrCat("(", lowercase_key), + absl::StrCat("(", capitalized_key))); + replacements.push_back(std::make_pair(absl::StrCat(",", lowercase_key), + absl::StrCat(",", capitalized_key))); + } + return absl::StrReplaceAll(lowercase_open_spiel_game_state, replacements); +} + +} // namespace open_spiel::universal_poker::logic diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/gamedef.h b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/gamedef.h new file mode 100644 index 0000000..23465b5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/gamedef.h @@ -0,0 +1,32 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_UNIVERSAL_POKER_LOGIC_GAMEDEF_H_ +#define OPEN_SPIEL_GAMES_UNIVERSAL_POKER_LOGIC_GAMEDEF_H_ + +#include + +namespace open_spiel { +namespace universal_poker { +namespace logic { + +// Converts an ACPC gamedef into the corresponding string that's compatible with +// OpenSpiel. +std::string GamedefToOpenSpielParameters(const std::string& acpc_gamedef); + +} // namespace logic +} // namespace universal_poker +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_UNIVERSAL_POKER_LOGIC_GAMEDEF_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/gamedef_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/gamedef_test.cc new file mode 100644 index 0000000..029111e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/logic/gamedef_test.cc @@ -0,0 +1,182 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/universal_poker/logic/gamedef.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/match.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace universal_poker { +namespace logic { + +const char kSimpleHeadsupLimitPokerACPCGamedef[] = + R""""( +GAMEDEF +limit +numPlayers = 2 +numRounds = 1 +blind = 5 10 +raiseSize = 10 10 20 +firstPlayer = 1 +maxRaises = 2 2 3 +numSuits = 4 +numRanks = 5 +numHoleCards = 1 +numBoardCards = 0 2 1 +END GAMEDEF)""""; + +// Designed to mimic pre-existing code in card_set_test.cc +void TestGamedefToOpenSpielParametersEasyCase() { + std::cout << "acpc gamedef:\n" + << kSimpleHeadsupLimitPokerACPCGamedef << "\n" + << std::endl; + std::cout << "OpenSpiel gamestate:\n" + << GamedefToOpenSpielParameters(kSimpleHeadsupLimitPokerACPCGamedef) + << "\n" + << std::endl; +} + +// By "KeyOnly" we mean 'GAMEDEF', 'limit', 'nolimit', and 'END GAMEDEF' lines +void TestGamedefToOpenSpielParametersNormalizesKeyOnlyLines() { + std::string open_spiel_game_state = + GamedefToOpenSpielParameters(kSimpleHeadsupLimitPokerACPCGamedef); + + SPIEL_CHECK_TRUE(absl::StrContains(open_spiel_game_state, "betting=limit,")); + SPIEL_CHECK_FALSE( + StrContainsIgnoreCase(open_spiel_game_state, "end gamedef")); + SPIEL_CHECK_FALSE( + StrContainsIgnoreCase(open_spiel_game_state, "gamedef")); + SPIEL_CHECK_FALSE( + StrContainsIgnoreCase(open_spiel_game_state, "nolimit")); +} + +// There's a bug downstream causing a runtime error if we provide it with a +// single value for keys that can have different values on each betting round. +// This function tests our (hacky) fix; whenever a value for these keys has +// only one value in it, we convert it into an equivalent one that will not +// trigger the error. +void TestGamedefToOpenSpielParametersMultiRoundValueEdgeCase() { + std::string acpc_gamedef = R""""( +GAMEDEF +limit +numPlayers = 1 +numRounds = 1 +blind = 5 +raiseSize = 10 +firstPlayer = 1 +maxRaises = 2 +numSuits = 4 +numRanks = 5 +numHoleCards = 1 +numBoardCards = 2 +stack = 100 +END GAMEDEF)""""; + + std::string open_spiel_game_state = + GamedefToOpenSpielParameters(acpc_gamedef); + SPIEL_CHECK_TRUE( + absl::StrContains(open_spiel_game_state, ",firstPlayer=1 1,")); + SPIEL_CHECK_TRUE( + absl::StrContains(open_spiel_game_state, ",raiseSize=10 10,")); + SPIEL_CHECK_TRUE(absl::StrContains(open_spiel_game_state, ",maxRaises=2 2,")); + SPIEL_CHECK_TRUE(absl::StrContains(open_spiel_game_state, ",stack=100 100)")); +} + +void TestGamedefToOpenSpielParametersRemovesUnneededLines() { + std::string acpc_gamedef = R""""( +# COMMENT THAT SHOULD BE IGNORED +gameDEF +limit +numplayers = 2 +numrounds = 1 +# ANOTHER COMMENT +blind = 5 10 +raisesize = 10 10 20 + +# Empty lines are also ignored! + +MAXRAISES = 2 2 3 +NUMSUITS = 4 +NUMRANKS = 5 +nUmHoLeCARds = 1 +numBoardCARDS = 0 2 1 +end GameDef + +# hasta la vista +)""""; + + std::string open_spiel_game_state = + GamedefToOpenSpielParameters(acpc_gamedef); + + SPIEL_CHECK_FALSE(absl::StrContains(open_spiel_game_state, "COMMENT")); + SPIEL_CHECK_FALSE(absl::StrContains(open_spiel_game_state, "EMPTY")); + SPIEL_CHECK_FALSE(absl::StrContains(open_spiel_game_state, "#")); + SPIEL_CHECK_FALSE(absl::StrContains(open_spiel_game_state, "\n")); + SPIEL_CHECK_FALSE( + StrContainsIgnoreCase(open_spiel_game_state, "end gamedef")); + SPIEL_CHECK_FALSE( + StrContainsIgnoreCase(open_spiel_game_state, "gamedef")); +} + +void TestGamedefToOpenSpielParametersNormalizesCapitalization() { + std::string acpc_gamedef = R""""( +gameDEF +limit +numplayers = 2 +numrounds = 1 +blind = 5 10 +raisesize = 10 10 20 +MAXRAISES = 2 2 3 +NUMSUITS = 4 +NUMRANKS = 5 +nUmHoLeCARds = 1 +numBoardCARDS = 0 2 1 +end GameDef +)""""; + + std::string open_spiel_game_state = + GamedefToOpenSpielParameters(acpc_gamedef); + + SPIEL_CHECK_TRUE(absl::StrContains(open_spiel_game_state, ",numPlayers=2,")); + SPIEL_CHECK_TRUE(absl::StrContains(open_spiel_game_state, ",numRounds=1,")); + SPIEL_CHECK_TRUE(absl::StrContains(open_spiel_game_state, ",blind=5 10,")); + SPIEL_CHECK_TRUE( + absl::StrContains(open_spiel_game_state, ",raiseSize=10 10 20,")); + SPIEL_CHECK_TRUE(absl::StrContains(open_spiel_game_state, ",numSuits=4,")); + SPIEL_CHECK_TRUE(absl::StrContains(open_spiel_game_state, ",numRanks=5,")); + SPIEL_CHECK_TRUE( + absl::StrContains(open_spiel_game_state, ",numHoleCards=1,")); +} + +} // namespace logic +} // namespace universal_poker +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::universal_poker::logic:: + TestGamedefToOpenSpielParametersEasyCase(); + open_spiel::universal_poker::logic:: + TestGamedefToOpenSpielParametersNormalizesKeyOnlyLines(); + open_spiel::universal_poker::logic:: + TestGamedefToOpenSpielParametersMultiRoundValueEdgeCase(); + open_spiel::universal_poker::logic:: + TestGamedefToOpenSpielParametersRemovesUnneededLines(); + open_spiel::universal_poker::logic:: + TestGamedefToOpenSpielParametersNormalizesCapitalization(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/universal_poker.cc b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/universal_poker.cc new file mode 100644 index 0000000..bdc6ac6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/universal_poker.cc @@ -0,0 +1,1593 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/universal_poker/universal_poker.h" + +#include + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/games/universal_poker/acpc/project_acpc_server/game.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/universal_poker/logic/card_set.h" +#include "open_spiel/games/universal_poker/logic/gamedef.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace universal_poker { +namespace { + +std::string BettingAbstractionToString(const BettingAbstraction &betting) { + switch (betting) { + case BettingAbstraction::kFC: { + return "BettingAbstration: FC"; + break; + } + case BettingAbstraction::kFCHPA: { + return "BettingAbstration: FCPHA"; + break; + } + case BettingAbstraction::kFCPA: { + return "BettingAbstration: FCPA"; + break; + } + case BettingAbstraction::kFULLGAME: { + return "BettingAbstraction: FULLGAME"; + break; + } + default: + SpielFatalError("Unknown betting abstraction."); + break; + } +} + +// Does not support chance actions. +// TODO(author1): Remove all of the many varieties of action types and +// switch to use a single enum, preferably project_acpc_server::ActionType. +acpc_cpp::ACPCState::ACPCActionType UniversalPokerActionTypeToACPCActionType( + StateActionType type) { + if (type == StateActionType::ACTION_DEAL) { + SpielFatalError("ACPC does not support deal action types."); + } + if (type == StateActionType::ACTION_FOLD) { + return acpc_cpp::ACPCState::ACPC_FOLD; + } + if (type == StateActionType::ACTION_CHECK_CALL) { + return acpc_cpp::ACPCState::ACPC_CALL; + } + if (type == StateActionType::ACTION_BET || + type == StateActionType::ACTION_ALL_IN) { + return acpc_cpp::ACPCState::ACPC_RAISE; + } + SpielFatalError(absl::StrCat("Action not found: ", type)); + + // Should never be called. + return acpc_cpp::ACPCState::ACPC_INVALID; +} + +} // namespace + +const GameType kGameType{ + /*short_name=*/"universal_poker", + /*long_name=*/"Universal Poker", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kExplicitStochastic, + GameType::Information::kImperfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/10, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + + { + // The ACPC code uses a specific configuration file to describe the + // game. For more details, see + // https://github.com/ethansbrown/acpc/blob/master/project_acpc_server/READMthird_party/open_spiel/integration_tests/playthrough_test.pyE + + // If you wish to construct a universal_poker game directly from one of + // these ACPC gamedefs see the LoadUniversalPokerGameFromACPCGamedef() + // wrapper function below. + // (Note that this is just for convenience; we also support defining the + // configuration as a typical OpenSpiel game state input. E.g. doing + // LoadGame("universal_poker(betting=limit,raiseSize=10 10 20,...)") + // as per usual). + + // The following has been copied from ACPC documentation: + // + // Empty lines or lines with '#' as the very first character will be + // ignored + // + // The Game definitions should start with "gamedef" and end with + // "end gamedef" and can have the fields documented bellow (case is + // ignored) + // + // If you are creating your own game definitions, please note that + // game.h defines some constants for maximums in games (e.g., number of + // rounds). These may need to be changed for games outside of the what + // is being run for the Annual Computer Poker Competition. + + // The documentation below is adapted from project_acpc_server/game.cc. + // + // Number of Players (up to 10) + {"numPlayers", GameParameter(2)}, + // Betting Type "limit" "nolimit" + {"betting", GameParameter(std::string("nolimit"))}, + // The stack size for each player at the start of each hand (for + // no-limit). It will be ignored on "limit". + // Note: it's somewhat unclear what happens behind the scenes with the + // stack sizes in limit games. Although it _appears_ to default to + // INT32_MAX for all players (regardless of whether stack was or was + // not provided). + {"stack", GameParameter(std::string("1200 1200"))}, + // The size of the blinds for each player (relative to the dealer) + {"blind", GameParameter(std::string("100 100"))}, + // The size of raises on each round (for limit games only) as numrounds + // integers. It will be ignored for nolimit games. + {"raiseSize", GameParameter(std::string("100 100"))}, + // Number of betting rounds per hand of the game + {"numRounds", GameParameter(2)}, + // The player that acts first (relative to the dealer) on each round + {"firstPlayer", GameParameter(std::string("1 1"))}, + // maxraises - the maximum number of raises on each round. If not + // specified, it will default to UINT8_MAX. + {"maxRaises", GameParameter(std::string(""))}, + // The number of different suits in the deck + {"numSuits", GameParameter(4)}, + // The number of different ranks in the deck + {"numRanks", GameParameter(6)}, + // The number of private cards to deal to each player + {"numHoleCards", GameParameter(1)}, + // The number of cards revealed on each round + {"numBoardCards", GameParameter(std::string("0 1"))}, + // Specify which actions are available to the player, in both limit and + // nolimit games. Available options are: "fc" for fold and check/call. + // "fcpa" for fold, check/call, bet pot and all in (default). + // Use "fullgame" for the unabstracted game. + {"bettingAbstraction", GameParameter(std::string("fcpa"))}, + + // ------------------------------------------------------------------------ + // Following parameters are used to specify specific subgame. + {"potSize", GameParameter(0)}, + // Board cards that have been revealed. Must be in the format + // of logic::CardSet -- kSuitChars, kRankChars + {"boardCards", GameParameter("")}, + // A space separated list of reach probabilities for each player in a + // subgame. When there are in total N cards in the deck, two players, + // and each player gets 2 cards, there should be: + // + // N*(N-1) / 2 * 2 = N*(N-1) + // ^ ignore card order ^ number of players + // + // N*(N-1) reach probabilities. + // Currently supported only for the setting of 2 players, 4 suits, 13 + // cards + {"handReaches", GameParameter("")}, + }}; + +std::shared_ptr Factory(const GameParameters ¶ms) { + return absl::make_unique(params); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +open_spiel::RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +// Returns how many actions are available at a choice node (3 when limit +// and 4 for no limit). +// TODO(author2): Is that a bug? There are 5 actions? Is no limit means +// "bet bot" is added? or should "all in" be also added? +inline uint32_t GetMaxBettingActions(const acpc_cpp::ACPCGame &acpc_game) { + return acpc_game.IsLimitGame() ? 3 : 4; +} + +// namespace universal_poker +UniversalPokerState::UniversalPokerState(std::shared_ptr game) + : State(game), + acpc_game_( + static_cast(game.get())->GetACPCGame()), + acpc_state_(acpc_game_), + deck_(/*num_suits=*/acpc_game_->NumSuitsDeck(), + /*num_ranks=*/acpc_game_->NumRanksDeck()), + cur_player_(kChancePlayerId), + possibleActions_(ACTION_DEAL), + betting_abstraction_(static_cast(game.get()) + ->betting_abstraction()) { + // -- Optionally apply subgame parameters. ----------------------------------- + // Pot size. + const int pot_size = game->GetParameters().at("potSize").int_value(); + if (pot_size > 0) { + acpc_state_.SetPotSize(pot_size); + } + // Board cards. + const std::string board_cards = + game->GetParameters().at("boardCards").string_value(); + if (!board_cards.empty()) { + // Add the cards. + logic::CardSet cs(board_cards); + int before_add = deck_.NumCards(); + for (uint8_t card : cs.ToCardArray()) { + AddBoardCard(card); + deck_.RemoveCard(card); + } + SPIEL_CHECK_EQ(deck_.NumCards(), before_add - cs.NumCards()); + // Advance the round according to the number of board cards. + int num_cards = cs.NumCards(); + int round = 0; + do { + num_cards -= acpc_game_->NumBoardCards(round); + round++; + } while (round < acpc_game_->NumRounds() && num_cards > 0); + acpc_state_.mutable_state()->round = round - 1; + } + // Set specific hand reach probabilities. + const std::string handReaches = + game->GetParameters().at("handReaches").string_value(); + if (!handReaches.empty()) { + std::stringstream iss(handReaches); + double number; + while ( iss >> number ) { + handReaches_.push_back(number); + } + SPIEL_CHECK_EQ(handReaches_.size(), kSubgameUniqueHands * 2); + } +} + +std::string UniversalPokerState::ToString() const { + std::string str = + absl::StrCat(BettingAbstractionToString(betting_abstraction_), "\n"); + for (int p = 0; p < acpc_game_->GetNbPlayers(); ++p) { + absl::StrAppend(&str, "P", p, " Cards: ", HoleCards(p).ToString(), "\n"); + } + absl::StrAppend(&str, "BoardCards ", BoardCards().ToString(), "\n"); + + if (IsChanceNode()) { + absl::StrAppend(&str, "PossibleCardsToDeal ", deck_.ToString(), "\n"); + } + if (IsTerminal()) { + for (int p = 0; p < acpc_game_->GetNbPlayers(); ++p) { + absl::StrAppend(&str, "P", p, " Reward: ", GetTotalReward(p), "\n"); + } + } + absl::StrAppend(&str, "Node type?: "); + if (IsChanceNode()) { + absl::StrAppend(&str, "Chance node\n"); + } else if (IsTerminal()) { + absl::StrAppend(&str, "Terminal Node!\n"); + } else { + absl::StrAppend(&str, "Player node for player ", cur_player_, "\n"); + } + + if (betting_abstraction_ == BettingAbstraction::kFC || + betting_abstraction_ == BettingAbstraction::kFCPA) { + absl::StrAppend(&str, "PossibleActions (", GetPossibleActionCount(), + "): ["); + for (StateActionType action : ALL_ACTIONS) { + if (action & possibleActions_) { + if (action == ACTION_ALL_IN) absl::StrAppend(&str, " ACTION_ALL_IN "); + if (action == ACTION_BET) absl::StrAppend(&str, " ACTION_BET "); + if (action == ACTION_CHECK_CALL) { + absl::StrAppend(&str, " ACTION_CHECK_CALL "); + } + if (action == ACTION_FOLD) absl::StrAppend(&str, " ACTION_FOLD "); + if (action == ACTION_DEAL) absl::StrAppend(&str, " ACTION_DEAL "); + } + } + } + absl::StrAppend(&str, "]", "\nRound: ", acpc_state_.GetRound(), + "\nACPC State: ", acpc_state_.ToString(), + "\nAction Sequence: ", actionSequence_); + return str; +} + +std::string UniversalPokerState::ActionToString(Player player, + Action move) const { + std::string move_str; + if (IsChanceNode()) { + move_str = absl::StrCat("Deal(", move, ")"); + } else if (static_cast(move) == ActionType::kFold) { + move_str = "Fold"; + } else if (static_cast(move) == ActionType::kCall) { + move_str = "Call"; + } else if (static_cast(move) == ActionType::kHalfPot && + // Avoids an edge case where we interpret a bet size that's + // literally meant to be '4' as a half pot bet (since that's the + // actual value of ActionTye::kHalfPot). + betting_abstraction_ != BettingAbstraction::kFULLGAME) { + move_str = "HalfPot"; + } else if (betting_abstraction_ == BettingAbstraction::kFULLGAME) { + SPIEL_CHECK_GE(move, 2); + move_str = absl::StrCat("Bet", move); + } else if (static_cast(move) == ActionType::kBet) { + move_str = "Bet"; + } else if (static_cast(move) == ActionType::kAllIn) { + move_str = "AllIn"; + } else if (move > kBet) { + SPIEL_CHECK_EQ(betting_abstraction_, BettingAbstraction::kFCHPA); + move_str = absl::StrCat("r", move); + } else { + SpielFatalError(absl::StrCat("Unknown action: ", move)); + } + return absl::StrCat("player=", player, " move=", move_str); +} + +bool UniversalPokerState::IsTerminal() const { + bool finished = cur_player_ == kTerminalPlayerId; + assert(acpc_state_.IsFinished() || !finished); + return finished; +} + +bool UniversalPokerState::IsChanceNode() const { + return cur_player_ == kChancePlayerId; +} + +Player UniversalPokerState::CurrentPlayer() const { + if (IsTerminal()) { + return kTerminalPlayerId; + } + if (IsChanceNode()) { + return kChancePlayerId; + } + + return Player(acpc_state_.CurrentPlayer()); +} + +std::vector UniversalPokerState::Returns() const { + if (!IsTerminal()) { + return std::vector(NumPlayers(), 0.0); + } + + std::vector returns(NumPlayers()); + for (Player player = 0; player < NumPlayers(); ++player) { + // Money vs money at start. + returns[player] = GetTotalReward(player); + } + + return returns; +} + +void UniversalPokerState::InformationStateTensor( + Player player, absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + SPIEL_CHECK_EQ(values.size(), game_->InformationStateTensorShape()[0]); + std::fill(values.begin(), values.end(), 0.); + + // Layout: + // my player number: num_players bits + // my cards: Initial deck size bits (1 means you have the card), i.e. + // MaxChanceOutcomes() = NumSuits * NumRanks + // public cards: Same as above, but for the public cards. + // action sequence: (max game length)*2 bits (fold/raise/call/all-in) + // action sequence sizings: (max game length) integers with value >= 0, + // 0 when corresponding to 'deal' or 'check'. + int offset = 0; + + // Mark who I am. + values[player] = 1; + offset += NumPlayers(); + + const logic::CardSet full_deck(acpc_game_->NumSuitsDeck(), + acpc_game_->NumRanksDeck()); + const std::vector deckCards = full_deck.ToCardArray(); + logic::CardSet holeCards = HoleCards(player); + logic::CardSet boardCards = BoardCards(); + + // Mark my private cards + // (Note: it should be way more efficient to iterate over the cards of the + // player, rather than iterating over all the cards. We may want to change + // this in the future.) + for (uint32_t i = 0; i < full_deck.NumCards(); i++) { + values[i + offset] = holeCards.ContainsCards(deckCards[i]) ? 1.0 : 0.0; + } + offset += full_deck.NumCards(); + + // Mark the public cards + for (int i = 0; i < full_deck.NumCards(); ++i) { + values[i + offset] = boardCards.ContainsCards(deckCards[i]) ? 1.0 : 0.0; + } + offset += full_deck.NumCards(); + + const std::string actionSeq = GetActionSequence(); + const int length = actionSeq.length(); + SPIEL_CHECK_LT(length, game_->MaxGameLength()); + + // Mark the action sequence (abstracted). + for (int i = 0; i < length; ++i) { + SPIEL_CHECK_LT(offset + i + 1, values.size()); + if (actionSeq[i] == 'c') { + // Encode call as 10. + values[offset + (2 * i)] = 1; + values[offset + (2 * i) + 1] = 0; + } else if (actionSeq[i] == 'p') { + // Encode raise as 01. + values[offset + (2 * i)] = 0; + values[offset + (2 * i) + 1] = 1; + } else if (actionSeq[i] == 'a') { + // Encode all-in as 11. + values[offset + (2 * i)] = 1; + values[offset + (2 * i) + 1] = 1; + } else if (actionSeq[i] == 'f') { + // Encode fold as 00. + // TODO(author2): Should this be 11? + values[offset + (2 * i)] = 0; + values[offset + (2 * i) + 1] = 0; + } else if (actionSeq[i] == 'd') { + values[offset + (2 * i)] = 0; + values[offset + (2 * i) + 1] = 0; + } else { + SPIEL_CHECK_EQ(actionSeq[i], 'd'); + } + } + // Move offset to the end of the abstracted betting sequence (since 2 entries + // per move). + offset += game_->MaxGameLength() * 2; + + // Mark the action sequence sizings. + const std::vector action_sequence_sizings = GetActionSequenceSizings(); + SPIEL_CHECK_EQ(length, action_sequence_sizings.size()); + for (int i = 0; i < length; ++i) { + values[offset + i] = action_sequence_sizings[i]; + } + // Move offset to the end of the un-abstracted betting sequence. + offset += game_->MaxGameLength(); + + SPIEL_CHECK_EQ(offset, game_->InformationStateTensorShape()[0]); +} + +void UniversalPokerState::ObservationTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, NumPlayers()); + + SPIEL_CHECK_EQ(values.size(), game_->ObservationTensorShape()[0]); + std::fill(values.begin(), values.end(), 0.); + + // Layout of observation: + // my player number: num_players bits + // my cards: Initial deck size bits (1 means you have the card), i.e. + // MaxChanceOutcomes() = NumSuits * NumRanks + // public cards: Same as above, but for the public cards. + // the contribution of each player to the pot. num_players integers. + int offset = 0; + + // Mark who I am. + values[player] = 1; + offset += NumPlayers(); + + const logic::CardSet full_deck(acpc_game_->NumSuitsDeck(), + acpc_game_->NumRanksDeck()); + const std::vector all_cards = full_deck.ToCardArray(); + logic::CardSet holeCards = HoleCards(player); + logic::CardSet boardCards = BoardCards(); + + for (uint32_t i = 0; i < full_deck.NumCards(); i++) { + values[i + offset] = holeCards.ContainsCards(all_cards[i]) ? 1.0 : 0.0; + } + offset += full_deck.NumCards(); + + for (uint32_t i = 0; i < full_deck.NumCards(); i++) { + values[i + offset] = boardCards.ContainsCards(all_cards[i]) ? 1.0 : 0.0; + } + offset += full_deck.NumCards(); + + // Adding the contribution of each players to the pot. + for (auto p = Player{0}; p < NumPlayers(); p++) { + values[offset + p] = acpc_state_.Ante(p); + } + offset += NumPlayers(); + SPIEL_CHECK_EQ(offset, game_->ObservationTensorShape()[0]); +} + +std::string UniversalPokerState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, acpc_game_->GetNbPlayers()); + const uint32_t pot = acpc_state_.MaxSpend() * + (acpc_game_->GetNbPlayers() - acpc_state_.NumFolded()); + std::string money; + money.reserve(acpc_game_->GetNbPlayers() * 2); + for (auto p = Player{0}; p < acpc_game_->GetNbPlayers(); p++) { + if (p != Player{0}) absl::StrAppend(&money, " "); + absl::StrAppend(&money, acpc_state_.Money(p)); + } + std::string sequences; + sequences.reserve(acpc_state_.GetRound() * 2); + for (auto r = 0; r <= acpc_state_.GetRound(); r++) { + if (r != 0) absl::StrAppend(&sequences, "|"); + absl::StrAppend(&sequences, acpc_state_.BettingSequence(r)); + } + + return absl::StrFormat( + "[Round %i][Player: %i][Pot: %i][Money: %s][Private: %s][Public: " + "%s][Sequences: %s]", + acpc_state_.GetRound(), CurrentPlayer(), pot, money, + HoleCards(player).ToString(), BoardCards().ToString(), sequences); +} + +std::string UniversalPokerState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, acpc_game_->GetNbPlayers()); + std::string result; + + const uint32_t pot = acpc_state_.MaxSpend() * + (acpc_game_->GetNbPlayers() - acpc_state_.NumFolded()); + absl::StrAppend(&result, "[Round ", acpc_state_.GetRound(), + "][Player: ", CurrentPlayer(), "][Pot: ", pot, "][Money:"); + for (auto p = Player{0}; p < acpc_game_->GetNbPlayers(); p++) { + absl::StrAppend(&result, " ", acpc_state_.Money(p)); + } + absl::StrAppend(&result, "]"); + // Add the player's private cards + if (player != kChancePlayerId) { + absl::StrAppend(&result, "[Private: ", HoleCards(player).ToString(), "]"); + } + // Adding the contribution of each players to the pot + absl::StrAppend(&result, "[Ante:"); + for (auto p = Player{0}; p < num_players_; p++) { + absl::StrAppend(&result, " ", acpc_state_.Ante(p)); + } + absl::StrAppend(&result, "]"); + + return result; +} + +std::unique_ptr UniversalPokerState::Clone() const { + return absl::make_unique(*this); +} + + +int GetHoleCardsReachIndex(int card_a, int card_b, + int num_suits, int num_ranks) { + // CardSet uses kSuitChars of "cdhs", but we need the inverse i.e. "shdc" + // according to https://github.com/Sandholm-Lab/LibratusEndgames + const int a_suit = num_suits - 1 - suitOfCard(card_a); + const int b_suit = num_suits - 1 - suitOfCard(card_b); + const int a_rank = rankOfCard(card_a); + const int b_rank = rankOfCard(card_b); + // Order by rank, then order by suit. + const int lesser_card = a_rank < b_rank + || (a_rank == b_rank && a_suit < b_suit) + ? card_a : card_b; + // We use ints, so this is ok. + const int higher_card = card_a + card_b - lesser_card; + + const int lesser_suit = num_suits - 1 - suitOfCard(lesser_card); + const int higher_suit = num_suits - 1 - suitOfCard(higher_card); + const int lesser_rank = rankOfCard(lesser_card); + const int higher_rank = rankOfCard(higher_card); + + // There is an ordering on the cards, which indexes an upper triangular matrix + // like so (example for num_cards_in_deck=4) + // + // j -> + // 0 1 2 3 + // 0 # 0 1 2 + // i 1 # 3 4 + // | 2 # 5 + // v 3 # + // + const int i = lesser_rank * num_suits + lesser_suit; + const int j = higher_rank * num_suits + higher_suit; + const int n = num_suits * num_ranks; + // Compute index k in the triangle. + return i * (2*n - i - 3) / 2 + j - 1; +} + +// Normally distribution of cards happens in a tree, one card at a time to each +// player. We flatten this distribution and omit repetitions. +// The yield function is called for each unique tuples of cards the players can +// receive. +void DistributeHands(const logic::CardSet& full_deck, + uint8_t num_players, + uint8_t num_hole_cards, + std::function>)> yield) { + // Taken and modified from CardSet::SampleCards() + int nbCards = num_hole_cards * num_players; + + uint64_t p = 0; + for (int i = 0; i < nbCards; ++i) { + p += (1 << i); + } + // Enumerates all the uint64_t integers that with nbCards 1-bits. + // The final n is ignored. It is fine as long as the rank < 16 (a property + // satisfied by CardSet, as ranks >= 16 are not representable). + int n_choose_k = 0; + for (uint64_t n = logic::bit_twiddle_permute(p); n > p; + p = n, n = logic::bit_twiddle_permute(p)) { + // Checks whether the CardSet represented by p is inside the CardSet. + uint64_t combo = p & full_deck.cs.cards; + if (__builtin_popcountl(combo) == nbCards) { + // Generate all partitions of size num_hole_cards: + logic::CardSet c; + c.cs.cards = combo; + std::vector x = c.ToCardArray(); + + yield({{x[0], x[1]}, {x[2], x[3]}}); + yield({{x[0], x[2]}, {x[1], x[3]}}); + yield({{x[0], x[3]}, {x[1], x[2]}}); + yield({{x[2], x[3]}, {x[0], x[1]}}); + yield({{x[1], x[3]}, {x[0], x[2]}}); + yield({{x[1], x[2]}, {x[0], x[3]}}); + ++n_choose_k; + } + } + // 52 choose 4 + SPIEL_CHECK_EQ(n_choose_k, 270725); +} + +const std::vector UniversalPokerState::GetEncodingBase() const { + const int num_hole_cards = acpc_game_->GetNbHoleCardsRequired(); + SPIEL_CHECK_EQ(num_hole_cards, 2); // Only this case is implemented. + const int num_distribute_cards = num_hole_cards * num_players_; + const int full_deck_cards = acpc_game_->NumSuitsDeck() + * acpc_game_->NumRanksDeck(); + return std::vector (num_distribute_cards, full_deck_cards); +} + +// Distribute the hole cards to each player in one large chance node. +// Modify the chance probs to be a well-formed public belief state. +std::vector> +UniversalPokerState::DistributeHandCardsInSubgame() const { + const int num_hole_cards = acpc_game_->GetNbHoleCardsRequired(); + // Only this case is supported. Generalizing the code to other pokers + // should not be too difficult. + SPIEL_CHECK_EQ(num_hole_cards, 2); + SPIEL_CHECK_EQ(num_players_, 2); + + const logic::CardSet full_deck(acpc_game_->NumSuitsDeck(), + acpc_game_->NumRanksDeck()); + + int possible_hands = kSubgameUniqueHands; // 52*51/2 + const int possible_hand_pairs = 270725 * 6; // (52 choose 4) * choose_hands + const double hole_cards_chance_prob = 1. / possible_hand_pairs; + + const int reach_offset = possible_hands; + SPIEL_DCHECK_EQ(reach_offset*num_players_, handReaches_.size()); + + std::vector> outcomes; + outcomes.reserve(possible_hand_pairs); + double normalizer = 0.; // We need to normalize the probs. + const std::vector encoding_bases = GetEncodingBase(); + const auto add_outcome = [&](const std::vector>& cards) { + // Encode OpenSpiel action. + // logic::CardSet uses 64 bits for its representation, the same size as + // open_spiel::Action. This means that we cannot easily encode distribution + // of the 4 cards along with their partition assignment to each player: + // as the name suggests, it's used for set representations, not for list + // representation. This is unfortunate, so instead we compute specific + // action numbers via multiplication of RankActionMixedBase. + SPIEL_CHECK_EQ(cards.size(), num_players_); + SPIEL_CHECK_EQ(cards[0].size(), num_hole_cards); + std::vector flatten_hole_cards; + flatten_hole_cards.reserve(num_players_ * num_hole_cards); + for (int i = 0; i < num_players_; ++i) { + for (int j = 0; j < num_hole_cards; ++j) { + flatten_hole_cards.push_back(cards[i][j]); + } + } + Action encoded = RankActionMixedBase(encoding_bases, flatten_hole_cards); + + // Compute outcome probability. + double p = hole_cards_chance_prob; + for (int pl = 0; pl < num_players_; ++pl) { + const int hole_idx = GetHoleCardsReachIndex(cards[pl][0], cards[pl][1], + acpc_game_->NumSuitsDeck(), + acpc_game_->NumRanksDeck()); + const double player_reach = handReaches_[pl*reach_offset + hole_idx]; + p *= player_reach; + } + + // We generate all hands, however not all them are necessarily feasible: + // some hands are prohibited if there are cards on the board. If we used + // those hands, the board cards could not then appear on the board. + for (int card : flatten_hole_cards) { + if (!deck_.ContainsCards(card)) { + p *= 0; + break; + } + } + + outcomes.push_back({encoded, p}); + normalizer += p; + }; + DistributeHands(full_deck, NumPlayers(), num_hole_cards, add_outcome); + + SPIEL_CHECK_GT(normalizer, 0.); + for (auto&[action, p] : outcomes) p /= normalizer; + SPIEL_CHECK_EQ(outcomes.size(), possible_hand_pairs); + return outcomes; +} + +bool UniversalPokerState::IsDistributingSingleCard() const { + return handReaches_.empty() || MoveNumber() > 0; +} + + +std::vector> UniversalPokerState::ChanceOutcomes() + const { + SPIEL_CHECK_TRUE(IsChanceNode()); + if (IsDistributingSingleCard()) { + auto available_cards = LegalActions(); + const int num_cards = available_cards.size(); + const double p = 1.0 / num_cards; + + // We need to convert std::vector into std::vector. + std::vector> outcomes; + outcomes.reserve(num_cards); + for (const auto &card : available_cards) { + outcomes.push_back({Action{card}, p}); + } + return outcomes; + } else { + return DistributeHandCardsInSubgame(); + } +} + +std::vector UniversalPokerState::LegalActions() const { + if (IsChanceNode()) { + if (IsDistributingSingleCard()) { + const logic::CardSet full_deck(acpc_game_->NumSuitsDeck(), + acpc_game_->NumRanksDeck()); + const std::vector all_cards = full_deck.ToCardArray(); + std::vector actions; + actions.reserve(deck_.NumCards()); + for (uint32_t i = 0; i < full_deck.NumCards(); i++) { + if (deck_.ContainsCards(all_cards[i])) actions.push_back(i); + } + return actions; + } else { + // Strip away probability outcomes. + std::vector> outcomes = + DistributeHandCardsInSubgame(); + std::vector actions; + actions.reserve(outcomes.size()); + for (auto&[action, p] : outcomes) actions.push_back(action); + return actions; + } + } + + std::vector legal_actions; + + if (betting_abstraction_ != BettingAbstraction::kFULLGAME) { + if (ACTION_FOLD & possibleActions_) legal_actions.push_back(kFold); + if (ACTION_CHECK_CALL & possibleActions_) legal_actions.push_back(kCall); + if (ACTION_BET & possibleActions_) legal_actions.push_back(kBet); + if (ACTION_ALL_IN & possibleActions_) legal_actions.push_back(kAllIn); + + // For legacy reasons, kHalfPot is the biggest action (in terms of the + // action representation). + // Note that FCHPA only tells the players about HalfPot + FCPA, but it will + // accept most of the other ones. + if (ACTION_BET & possibleActions_ && betting_abstraction_ == kFCHPA) { + legal_actions.push_back(kHalfPot); + } + + return legal_actions; + } else { + if (acpc_state_.IsFinished()) { + return legal_actions; + } + if (acpc_state_.IsValidAction( + acpc_cpp::ACPCState::ACPCActionType::ACPC_FOLD, 0)) { + legal_actions.push_back(kFold); + } + if (acpc_state_.IsValidAction( + acpc_cpp::ACPCState::ACPCActionType::ACPC_CALL, 0)) { + legal_actions.push_back(kCall); + } + int32_t min_bet_size = 0; + int32_t max_bet_size = 0; + if (acpc_state_.RaiseIsValid(&min_bet_size, &max_bet_size)) { + const int original_size = legal_actions.size(); + legal_actions.resize(original_size + max_bet_size - min_bet_size + 1); + std::iota(legal_actions.begin() + original_size, legal_actions.end(), + min_bet_size); + } + } + return legal_actions; +} + +int UniversalPokerState::PotSize(double multiple) const { + const project_acpc_server::State &state = acpc_state_.raw_state(); + const project_acpc_server::Game &game = acpc_state_.game()->Game(); + const int pot_size = absl::c_accumulate( + absl::Span(state.spent, game.numPlayers), 0); + const int amount_to_call = + state.maxSpent - + state.spent[project_acpc_server::currentPlayer(&game, &state)]; + const int pot_after_call = amount_to_call + pot_size; + return std::round(state.maxSpent + multiple * pot_after_call); +} + +int UniversalPokerState::AllInSize() const { + int32_t unused_min_bet_size; + int32_t all_in_size; + acpc_state_.RaiseIsValid(&unused_min_bet_size, &all_in_size); + return all_in_size; +} + +// We first deal the cards to each player, dealing all the cards to the first +// player first, then the second player, until all players have their private +// cards. +void UniversalPokerState::DoApplyAction(Action action_id) { + if (IsChanceNode()) { + if (IsDistributingSingleCard()) { + // In chance nodes, the action_id is an index into the full deck. + uint8_t card = + logic::CardSet(acpc_game_->NumSuitsDeck(), acpc_game_->NumRanksDeck()) + .ToCardArray()[action_id]; + deck_.RemoveCard(card); + actionSequence_ += 'd'; + actionSequenceSizings_.push_back(0); + + // Check where to add this card + if (hole_cards_dealt_ < + acpc_game_->GetNbPlayers() * acpc_game_->GetNbHoleCardsRequired()) { + AddHoleCard(card); + _CalculateActionsAndNodeType(); + return; + } + + if (board_cards_dealt_ < + acpc_game_->GetNbBoardCardsRequired(acpc_state_.GetRound())) { + AddBoardCard(card); + _CalculateActionsAndNodeType(); + return; + } + } else { + // We are creating the subgame: therefore we distribute the hole cards + // to each player. + std::vector base = GetEncodingBase(); + std::vector cards = UnrankActionMixedBase(action_id, base); + int num_hole_cards = acpc_game_->GetNbHoleCardsRequired(); + int num_cards_before_dist = deck_.NumCards(); + for (int pl = 0; pl < num_players_; ++pl) { + for (int i = 0; i < num_hole_cards; ++i) { + int card = cards.at(pl*num_hole_cards + i); + SPIEL_CHECK_GE(rankOfCard(card), 0); + SPIEL_CHECK_LT(rankOfCard(card), MAX_RANKS); + SPIEL_CHECK_GE(suitOfCard(card), 0); + SPIEL_CHECK_LT(suitOfCard(card), MAX_SUITS); + SPIEL_CHECK_TRUE(deck_.ContainsCards(card)); + + acpc_state_.AddHoleCard(pl, i, card); + deck_.RemoveCard(card); + SPIEL_CHECK_FALSE(deck_.ContainsCards(card)); + ++hole_cards_dealt_; + } + } + SPIEL_CHECK_EQ(deck_.NumCards(), num_cards_before_dist - cards.size()); + _CalculateActionsAndNodeType(); + } + } else { + int action_int = static_cast(action_id); + if (action_int == kFold) { + ApplyChoiceAction(ACTION_FOLD, 0); + return; + } + if (action_int == kCall) { + ApplyChoiceAction(ACTION_CHECK_CALL, 0); + return; + } + if (betting_abstraction_ == BettingAbstraction::kFC) { + SpielFatalError(absl::StrCat( + "Tried to apply action that was not fold or call. Action: ", + State::ActionToString(action_id))); + } + if (betting_abstraction_ != BettingAbstraction::kFULLGAME) { + if (action_int == kHalfPot) { + ApplyChoiceAction(ACTION_BET, PotSize(0.5)); + return; + } + if (action_int == kBet && acpc_game_->IsLimitGame()) { + // In a limit game, the bet size is fixed, so the ACPC code expects size + // to be 0. + ApplyChoiceAction(ACTION_BET, /*size=*/0); + return; + } + if (action_int == kBet && !acpc_game_->IsLimitGame()) { + ApplyChoiceAction(ACTION_BET, PotSize()); + return; + } + if (action_int == kAllIn) { + ApplyChoiceAction(ACTION_ALL_IN, AllInSize()); + return; + } + // FCHPA allows for arbitrary bets. + if (betting_abstraction_ == BettingAbstraction::kFCHPA) { + SPIEL_CHECK_LE(action_int, acpc_game_->Game().stack[0]); + ApplyChoiceAction(ACTION_BET, action_int); + return; + } + } + if (betting_abstraction_ != BettingAbstraction::kFULLGAME) { + SpielFatalError(absl::StrCat( + "Tried to apply action that was not allowed by the betting " + "abstraction. Action: ", + State::ActionToString(action_id), + ", abstraction: ", betting_abstraction_)); + } + if (action_int >= static_cast(kBet) && + action_int <= NumDistinctActions()) { + ApplyChoiceAction(ACTION_BET, action_int); + return; + } + SpielFatalError(absl::StrFormat("Action not recognized: %i", action_id)); + } +} + +double UniversalPokerState::GetTotalReward(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, acpc_game_->GetNbPlayers()); + return acpc_state_.ValueOfState(player); +} + +std::unique_ptr UniversalPokerState::ResampleFromInfostate( + int player_id, std::function rng) const { + std::unique_ptr potential_histories = + GetHistoriesConsistentWithInfostate(player_id); + const int index = SamplerFromRng(rng)(potential_histories->second); + return std::move(potential_histories->first[index]); +} + +std::unique_ptr +UniversalPokerState::GetHistoriesConsistentWithInfostate(int player_id) const { + // This is only implemented for 2 players. + if (acpc_game_->GetNbPlayers() != 2) return {}; + + logic::CardSet is_cards; + logic::CardSet our_cards = HoleCards(player_id); + for (uint8_t card : our_cards.ToCardArray()) is_cards.AddCard(card); + for (uint8_t card : BoardCards().ToCardArray()) is_cards.AddCard(card); + logic::CardSet fresh_deck(/*num_suits=*/acpc_game_->NumSuitsDeck(), + /*num_ranks=*/acpc_game_->NumRanksDeck()); + for (uint8_t card : is_cards.ToCardArray()) fresh_deck.RemoveCard(card); + auto dist = absl::make_unique(); + + // We only consider half the possible hands as we only look at each pair of + // hands once, i.e. order does not matter. + const int num_hands = + 0.5 * fresh_deck.NumCards() * (fresh_deck.NumCards() - 1); + dist->first.reserve(num_hands); + for (uint8_t hole_card1 : fresh_deck.ToCardArray()) { + logic::CardSet subset_deck = fresh_deck; + subset_deck.RemoveCard(hole_card1); + for (uint8_t hole_card2 : subset_deck.ToCardArray()) { + if (hole_card1 < hole_card2) continue; + std::unique_ptr root = game_->NewInitialState(); + if (player_id == 0) { + for (uint8_t card : our_cards.ToCardArray()) root->ApplyAction(card); + root->ApplyAction(hole_card1); + root->ApplyAction(hole_card2); + } else if (player_id == 1) { + root->ApplyAction(hole_card1); + root->ApplyAction(hole_card2); + for (uint8_t card : our_cards.ToCardArray()) root->ApplyAction(card); + } + SPIEL_CHECK_FALSE(root->IsChanceNode()); + dist->first.push_back(std::move(root)); + } + } + SPIEL_DCHECK_EQ(dist->first.size(), num_hands); + const double divisor = 1. / static_cast(dist->first.size()); + dist->second.assign(dist->first.size(), divisor); + return dist; +} + +/** + * Universal Poker Game Constructor + * @param params + */ +UniversalPokerGame::UniversalPokerGame(const GameParameters ¶ms) + : Game(kGameType, params), + gameDesc_(parseParameters(params)), + acpc_game_(gameDesc_), + potSize_(ParameterValue("potSize")), + boardCards_(ParameterValue("boardCards")), + handReaches_(ParameterValue("handReaches")) { + std::string betting_abstraction = + ParameterValue("bettingAbstraction"); + if (betting_abstraction == "fc") { + betting_abstraction_ = BettingAbstraction::kFC; + } else if (betting_abstraction == "fcpa") { + betting_abstraction_ = BettingAbstraction::kFCPA; + } else if (betting_abstraction == "fchpa") { + betting_abstraction_ = BettingAbstraction::kFCHPA; + } else if (betting_abstraction == "fullgame") { + betting_abstraction_ = BettingAbstraction::kFULLGAME; + } else { + SpielFatalError(absl::StrFormat("bettingAbstraction: %s not supported.", + betting_abstraction)); + } + max_game_length_ = MaxGameLength(); + SPIEL_CHECK_TRUE(max_game_length_.has_value()); +} + +std::unique_ptr UniversalPokerGame::NewInitialState() const { + return absl::make_unique(shared_from_this()); +} + +std::vector UniversalPokerGame::InformationStateTensorShape() const { + // Layout: + // my player number: num_players bits + // my cards: Initial deck size bits (1 means you have the card), i.e. + // MaxChanceOutcomes() = NumSuits * NumRanks + // public cards: Same as above, but for the public cards. + // action sequence: (max game length)*2 bits (fold/raise/call/all-in) + // action sequence sizings: (max game length) integers with value >= 0, + // 0 when corresponding to 'deal' or 'check'. + const int num_players = acpc_game_.GetNbPlayers(); + const int gameLength = MaxGameLength(); + const int total_num_cards = MaxChanceOutcomes(); + + return {num_players + 2 * total_num_cards + (2 + 1) * gameLength}; +} + +std::vector UniversalPokerGame::ObservationTensorShape() const { + // One-hot encoding for player number (who is to play). + // 2 slots of cards (total_cards_ bits each): private card, public card + // Followed by the contribution of each player to the pot + const int num_players = acpc_game_.GetNbPlayers(); + const int total_num_cards = MaxChanceOutcomes(); + return {2 * (num_players + total_num_cards)}; +} + +double UniversalPokerGame::MaxCommitment() const { + const auto &acpc_game = acpc_game_.Game(); + if (!acpc_game_.IsLimitGame()) { + // In nolimit games a player can shove all-in at any point in any betting + // round. Therefore the global max commitment is simply the deepest stack at + // the table. + // (Technically we could bound this to the max *meaningful* commitment by + // also looking at the second largest stack, but by convention the deepest + // stack is allowed to bet more than this amount as a valid action. So for + // sake of simplicity we allow this larger amount as a valid commitment.) + double deepest_stack = 0; + for (int i = 0; i < acpc_game_.GetNbPlayers(); ++i) { + deepest_stack = std::max(deepest_stack, acpc_game_.StackSize(i)); + } + return deepest_stack; + } + + // Otherwise we're in a limit game - meaning StackSize is meaningless (as ACPC + // leaves them as an INT32 MAX_INT). + + // Therefore: here the most a player could put into the pot is the raise + // amounts on each round times the maximum number of raises, plus the original + // chips they put in to play, which has the big blind as an upper bound. + double limit_max_commit = big_blind(); + for (int i = 0; i < acpc_game_.NumRounds(); ++i) { + limit_max_commit += acpc_game.maxRaises[i] * acpc_game.raiseSize[i]; + } + return limit_max_commit; +} + +double UniversalPokerGame::MaxUtility() const { + // In poker, the utility is defined as the money a player has at the end of + // the game minus then money the player had before starting the game. + + if (!acpc_game_.IsLimitGame()) { + // In no-limit games, because poker is zero-sum and therefore this money can + // only come from other players, the theoretical global max utility at a + // table can only be earned either of the two (or more) deepest stacks at + // the table. This occurs when all players are all-in simultaneously (with + // the possible exception of the deepest stack if it is a 'singular' deepest + // stack; in which case it simply has to match the all-in amount of all + // other players). This means we can compute the theoretical maximum global + // utility possible across all players by assuming we are playing as (one + // of) the deepest-stacked player(s) and summing up the stacks of all other + // players. + uint32_t max_stack = 0; + for (int i = 0; i < acpc_game_.GetNbPlayers(); ++i) { + max_stack = std::max(max_stack, acpc_game_.StackSize(i)); + } + return static_cast(acpc_game_.TotalMoney() - max_stack); + } + + // In 'real' limit games the above bound would normally still apply, but ACPC + // actually doesn't support stack sizes for limit games (it ignores the input + // and appears to leave everything as an INT32 MAX_INTEGER). So here we can + // instead simply look at the max commitment and number of players - e.g. what + // the value would be assuming there are as many bets as possible and that + // there were as many callers as possible for each bet. + return MaxCommitment() * (acpc_game_.GetNbPlayers() - 1); +} + +double UniversalPokerGame::MinUtility() const { + // In poker, the utility is defined as the money a player has at the end of + // the game minus the money the player had before starting the game. As such, + // the most a player can lose in a hand is the max amount they can lose when + // betting the maximum. (By convention this is not *necesarily* the actual + // amount they bet in certain cases as it is allowed to bet more than the + // maximum "meaningful" amount. E.g. any time a player goes all-in with a + // stack that is larger than all other players' stacks.) + + if (!acpc_game_.IsLimitGame()) { + // In no-limit games with more than one stack tied for deepest, the minimum + // utility bound is is simply the negative of one of said deepest stacks. + // But in situations where there is a singular deepest stack, this value is + // instead the negative of of (one of) the *second-deepest* stacks at the + // table - representing a situation where the deepest stack shoved, was + // called by second-deepest stack, and lost (or vice versa). + double max_stack = 0; + // Note: should equal max_stack in case of a tie for deepest + double second_max_stack = 0; + for (int i = 0; i < acpc_game_.GetNbPlayers(); ++i) { + double ith_stack = acpc_game_.StackSize(i); + if (ith_stack > max_stack) { + second_max_stack = max_stack; + max_stack = ith_stack; + } else { + second_max_stack = std::max(second_max_stack, ith_stack); + } + } + return -1 * second_max_stack; + } + + // On the other hand, ACPC game doesn't support stack sizes in limit games (it + // leaves them all set to INT32 MAX_INTEGER). So all we can consider is the + // maximum commitment. + return -1 * MaxCommitment(); +} + +int UniversalPokerGame::MaxChanceOutcomes() const { + return acpc_game_.NumSuitsDeck() * acpc_game_.NumRanksDeck(); +} + +int UniversalPokerGame::NumPlayers() const { return acpc_game_.GetNbPlayers(); } + +int UniversalPokerGame::NumDistinctActions() const { + if (betting_abstraction_ == BettingAbstraction::kFULLGAME) { + // 0 -> fold, 1 -> check/call, N -> bet size + return max_stack_size_ + 1; + } else if (betting_abstraction_ == BettingAbstraction::kFCHPA) { + return kNumActionsFCHPA; + } else { + return GetMaxBettingActions(acpc_game_); + } +} + +int UniversalPokerGame::MaxGameLength() const { + // We cache this as this is very slow to calculate. + if (max_game_length_) return *max_game_length_; + + // Make a good guess here because bruteforcing the tree is far too slow + // One Terminal Action + int length = 1; + + // Deal Actions + length += acpc_game_.GetTotalNbBoardCards() + + acpc_game_.GetNbHoleCardsRequired() * acpc_game_.GetNbPlayers(); + + // The longest game (with a single betting round, for simplicity) consists of: + // n-1 players checking, + // 1 player betting, n-2 players calling, + // 1 player raising, n-2 players calling, + // etc..., + // 1 player raising, n-1 players calling + + // Check Actions + length += (NumPlayers() * acpc_game_.NumRounds()); + + // Bet/Raise/Call Actions + double maxStack = 0; + double maxBlind = 0; + for (uint32_t p = 0; p < NumPlayers(); p++) { + maxStack = + acpc_game_.StackSize(p) > maxStack ? acpc_game_.StackSize(p) : maxStack; + maxBlind = + acpc_game_.BlindSize(p) > maxBlind ? acpc_game_.BlindSize(p) : maxBlind; + } + + int max_num_raises = 0; + if (betting_abstraction_ == BettingAbstraction::kFC) { + // no raises + } else if (betting_abstraction_ == BettingAbstraction::kFCPA) { + double pot_size = maxBlind * NumPlayers(); + while (pot_size / NumPlayers() < maxStack) { + max_num_raises++; + pot_size += pot_size * NumPlayers(); + } + } else if (betting_abstraction_ == BettingAbstraction::kFCHPA) { + double pot_size = maxBlind * NumPlayers(); + while (pot_size / NumPlayers() < maxStack) { + max_num_raises++; + pot_size += NumPlayers() * pot_size/2; + } + } else if (betting_abstraction_ == BettingAbstraction::kFULLGAME) { + max_num_raises = (maxStack + maxBlind - 1)/maxBlind; // ceil divide + } else { + SpielFatalError("Unknown Betting Abstraction"); + } + // each bet/raise is followed by n-2 calls, for a total of n-1 actions: + length += max_num_raises * (NumPlayers() - 1); + return length; +} + +/** + * Parses the Game Paramters and makes a gameDesc out of it + * @param map + * @return + */ +std::string UniversalPokerGame::parseParameters(const GameParameters &map) { + std::string generated_gamedef = "GAMEDEF\n"; + + absl::StrAppend( + &generated_gamedef, ParameterValue("betting"), "\n", + "numPlayers = ", ParameterValue("numPlayers"), "\n", + "numRounds = ", ParameterValue("numRounds"), "\n", + "numsuits = ", ParameterValue("numSuits"), "\n", + "firstPlayer = ", ParameterValue("firstPlayer"), "\n", + "numRanks = ", ParameterValue("numRanks"), "\n", + "numHoleCards = ", ParameterValue("numHoleCards"), "\n", + "numBoardCards = ", ParameterValue("numBoardCards"), "\n"); + + std::string max_raises = ParameterValue("maxRaises"); + if (!max_raises.empty()) { + absl::StrAppend(&generated_gamedef, "maxRaises = ", max_raises, "\n"); + } + + if (ParameterValue("betting") == "limit") { + std::string raise_size = ParameterValue("raiseSize"); + if (!raise_size.empty()) { + absl::StrAppend(&generated_gamedef, "raiseSize = ", raise_size, "\n"); + } + } else if (ParameterValue("betting") == "nolimit") { + std::string stack = ParameterValue("stack"); + if (!stack.empty()) { + absl::StrAppend(&generated_gamedef, "stack = ", stack, "\n"); + } + } else { + SpielFatalError(absl::StrCat("betting should be limit or nolimit, not ", + ParameterValue("betting"))); + } + + absl::StrAppend(&generated_gamedef, + "blind = ", ParameterValue("blind"), "\n"); + absl::StrAppend(&generated_gamedef, "END GAMEDEF\n"); + + std::vector blinds = + absl::StrSplit(ParameterValue("blind"), ' '); + big_blind_ = 0; + for (const std::string &blind : blinds) { + big_blind_ = std::max(big_blind_, std::stoi(blind)); + } + // By requiring a blind/ante of at least a single chip, we're able to + // structure the action space more intuitively in the kFULLGAME setting. + // Specifically, action 0 -> fold, 1 -> call, and N -> raise to N chips. + // While the ACPC server does not require it, in practice poker is always + // played with a blind or ante, so this is a minor restriction. + if (big_blind_ <= 0) { + SpielFatalError("Must have a blind of at least one chip."); + } + std::vector stacks = + absl::StrSplit(ParameterValue("stack"), ' '); + max_stack_size_ = 0; + for (const std::string &stack : stacks) { + max_stack_size_ = std::max(max_stack_size_, std::stoi(stack)); + } + return generated_gamedef; +} + +const char *actions = "0df0c000p0000000a"; + +void UniversalPokerState::ApplyChoiceAction(StateActionType action_type, + int size) { + SPIEL_CHECK_GE(cur_player_, 0); // No chance not terminal. + const auto &up_game = static_cast(*game_); + + // We redirect these actions to check/call, as they are semantically a + // check/call action. For some reason, ACPC prefers it this way. + if (size == up_game.MaxCommitment() * up_game.NumPlayers()) { + action_type = StateActionType::ACTION_CHECK_CALL; + size = 0; + } + + actionSequence_ += (char)actions[action_type]; + + // Note: call actions all have size '0', which means that the + // actionSequenceSizing value will be identical regardless of what size stack + // the caller has in all-in situations. + actionSequenceSizings_.push_back(size); + if (action_type == ACTION_DEAL) SpielFatalError("Cannot apply deal action."); + acpc_state_.DoAction(UniversalPokerActionTypeToACPCActionType(action_type), + size); + _CalculateActionsAndNodeType(); +} + +void UniversalPokerState::_CalculateActionsAndNodeType() { + possibleActions_ = 0; + + if (acpc_state_.IsFinished()) { + if (acpc_state_.NumFolded() >= acpc_game_->GetNbPlayers() - 1) { + // All players except one has fold. + cur_player_ = kTerminalPlayerId; + } else { + if (board_cards_dealt_ < + acpc_game_->GetNbBoardCardsRequired(acpc_state_.GetRound())) { + cur_player_ = kChancePlayerId; + possibleActions_ = ACTION_DEAL; + return; + } + // Showdown! + cur_player_ = kTerminalPlayerId; + } + + } else { + // Check if we need to deal cards. We assume all cards are dealt at the + // start of the game. + if (hole_cards_dealt_ < + acpc_game_->GetNbHoleCardsRequired() * acpc_game_->GetNbPlayers()) { + cur_player_ = kChancePlayerId; + possibleActions_ = ACTION_DEAL; + return; + } + // 2. We need to deal a public card. + if (board_cards_dealt_ < + acpc_game_->GetNbBoardCardsRequired(acpc_state_.GetRound())) { + cur_player_ = kChancePlayerId; + possibleActions_ = ACTION_DEAL; + return; + } + + // Check for CHOICE Actions + cur_player_ = acpc_state_.CurrentPlayer(); + if (acpc_state_.IsValidAction( + acpc_cpp::ACPCState::ACPCActionType::ACPC_FOLD, 0)) { + possibleActions_ |= ACTION_FOLD; + } + if (acpc_state_.IsValidAction( + acpc_cpp::ACPCState::ACPCActionType::ACPC_CALL, 0)) { + possibleActions_ |= ACTION_CHECK_CALL; + } + + int potSize = 0; + int allInSize = 0; + // We have to call this as this sets potSize and allInSize_. + bool valid_to_raise = acpc_state_.RaiseIsValid(&potSize, &allInSize); + if (betting_abstraction_ == BettingAbstraction::kFC) return; + if (valid_to_raise) { + if (acpc_game_->IsLimitGame()) { + potSize = 0; + // There's only one "bet" allowed in Limit, which is "all-in or fixed + // bet". + possibleActions_ |= ACTION_BET; + } else { + int cur_spent = acpc_state_.CurrentSpent(acpc_state_.CurrentPlayer()); + int pot_raise_to = + acpc_state_.TotalSpent() + 2 * acpc_state_.MaxSpend() - cur_spent; + + if (pot_raise_to >= potSize && pot_raise_to <= allInSize) { + potSize = pot_raise_to; + possibleActions_ |= ACTION_BET; + } + + if (pot_raise_to != allInSize) { + // If the raise to amount happens to match the number of chips I have, + // then this action was already added as a pot-bet. + possibleActions_ |= ACTION_ALL_IN; + } + } + } + } +} + +const int UniversalPokerState::GetPossibleActionCount() const { + // _builtin_popcount(int) function is used to count the number of one's + return __builtin_popcount(possibleActions_); +} + +open_spiel::Action ACPCActionToOpenSpielAction( + const project_acpc_server::Action &action, + const UniversalPokerState &state) { + // We assign this here as we cannot initialize a variable within a switch + // statement. + const auto &up_game = + static_cast(*state.GetGame()); + switch (action.type) { + case project_acpc_server::ActionType::a_fold: + return ActionType::kFold; + case project_acpc_server::ActionType::a_call: + return ActionType::kCall; + case project_acpc_server::ActionType::a_raise: + SPIEL_CHECK_NE(state.betting(), BettingAbstraction::kFC); + // Note: the following code is being kept for legacy reasons. Previous + // comment kept here for posterity: + // """ + // The maximum utility is exactly equal to the all-in amount for both + // players. + // """ + // (Said comment however A. assumes a heads-up game and B. is technically + // incorrect anyways; see MaxUtility for more details.) + if (action.size == up_game.MaxCommitment() * up_game.NumPlayers()) { + return ActionType::kCall; + } + if (action.size == state.PotSize(0.5)) { + return ActionType::kHalfPot; + } + if (action.size == state.PotSize()) return ActionType::kBet; + if (action.size == state.AllInSize()) return ActionType::kAllIn; + if (state.betting() == BettingAbstraction::kFCHPA) { + return action.size; + } + if (state.betting() != BettingAbstraction::kFULLGAME) { + SpielFatalError(absl::StrCat( + "Unsupported bet size: ", action.size, ", pot: ", state.PotSize(), + ", all_in: ", state.AllInSize(), + ", max_commitment: ", state.acpc_state().raw_state().maxSpent, + ", state: ", state.ToString(), + ", history: ", state.HistoryString())); + } + SPIEL_CHECK_EQ(state.betting(), BettingAbstraction::kFULLGAME); + return ActionType::kBet + action.size; + case project_acpc_server::ActionType::a_invalid: + SpielFatalError("Invalid action type."); + default: + SpielFatalError(absl::StrCat("Type not found. Type: ", action.type)); + } + // Will never get called. + return kInvalidAction; +} + +std::shared_ptr LoadUniversalPokerGameFromACPCGamedef( + const std::string &acpc_gamedef) { + return LoadGame(logic::GamedefToOpenSpielParameters(acpc_gamedef)); +} + +std::shared_ptr MakeRandomSubgame(std::mt19937 &rng, int pot_size, + std::string board_cards, + std::vector hand_reach) { + constexpr const char* base_game = + "universal_poker(" + "betting=nolimit," + "numPlayers=2," + "numRounds=4," + "blind=100 50," + "firstPlayer=2 1 1 1," + "numSuits=4," + "numRanks=13," + "numHoleCards=2," + "numBoardCards=0 3 1 1," + "stack=20000 20000," + "bettingAbstraction=fcpa," + "potSize=%d," + "boardCards=%s," + "handReaches=%s" + ")"; + + if (pot_size == -1) { + // 40k is total money in the game -- sum of the players stacks (20k+20k). + // 50 is the size of the small blind, 2*50 is big blind = minimum pot size. + // As both players need to match bets, the pot size must be divisible by 2. + std::uniform_int_distribution dist(50, 20000); + pot_size = dist(rng) * 2; + } + if (board_cards.empty()) { + // Pick a round, i.e. 3/4/5 cards. + std::uniform_int_distribution rnd_cards(3, 5); + std::uniform_int_distribution rnd_rank(0, MAX_RANKS); + std::uniform_int_distribution rnd_suit(0, MAX_SUITS); + // Populate random non-repeating cards. + int num_cards = rnd_cards(rng); + std::vector cards; + cards.reserve(num_cards); + while (cards.size() != num_cards) { + int rank = rnd_rank(rng); + int suit = rnd_suit(rng); + int card = makeCard(rank, suit); + if (std::find(cards.begin(), cards.end(), card) == cards.end()) { + cards.push_back(card); + } + } + logic::CardSet set(cards); + board_cards = set.ToString(); + } + if (hand_reach.empty()) { + // Normally uniform_real_distribution is defined on open interval [0, 1) + // We make it into a closed interval [0, 1] thanks to std::nextafter. + double next_after_one = std::nextafter(1.0, 2.0); + SPIEL_CHECK_NE(1.0, next_after_one); + SPIEL_CHECK_LT(1.0, next_after_one); + std::uniform_real_distribution dist(0.0, next_after_one); + for (int i = 0; i < 2*kSubgameUniqueHands; ++i) { + hand_reach.push_back(dist(rng)); + } + } + std::string reach = absl::StrJoin(hand_reach.begin(), hand_reach.end(), " "); + return LoadGame(absl::StrFormat(base_game, pot_size, board_cards, reach)); +} + +std::ostream &operator<<(std::ostream &os, const BettingAbstraction &betting) { + os << BettingAbstractionToString(betting); + return os; +} + +class UniformRestrictedActionsFactory : public BotFactory { + // Asks the bot whether it can play the game as the given player. + bool CanPlayGame(const Game &game, Player player_id) const override { + return absl::StrContains(game.GetType().short_name, "poker"); + } + + // Creates an instance of the bot for a given game and a player + // for which it should play. + std::unique_ptr Create(std::shared_ptr game, + Player player_id, + const GameParameters &bot_params) const override { + SPIEL_CHECK_GT(bot_params.count("policy_name"), 0); + absl::string_view policy_name = bot_params.at("policy_name").string_value(); + if (policy_name == "AlwaysCall") { + return MakePolicyBot(/*seed=*/0, + std::make_shared( + std::vector({ActionType::kCall}))); + + } else if (policy_name == "HalfCallHalfRaise") { + std::vector actions = {ActionType::kCall}; + + // First, we check if it's universal poker. Add the bet action if it's a + // limit ACPC game or Leduc poker. + if (game->GetType().short_name == "universal_poker") { + const auto *up_game = down_cast(game.get()); + if (up_game->GetACPCGame()->IsLimitGame()) { + actions.push_back(ActionType::kBet); + } + } else if (game->GetType().short_name == "leduc_poker") { + // Add the betting + actions.push_back(ActionType::kBet); + } else { + SpielFatalError( + absl::StrCat("HalfCallHalfRaise is not implemented for other " + "environments, such as: ", + game->GetType().short_name, + ", it is only implemented for Leduc and HUL.")); + } + return MakePolicyBot( + /*seed=*/0, std::make_shared(actions)); + + } else if (policy_name == "AlwaysFold") { + return MakePolicyBot(/*seed=*/0, + std::make_shared( + std::vector({ActionType::kFold}))); + + } else if (policy_name == "AlwaysRaise") { + return MakePolicyBot(/*seed=*/0, + std::make_shared( + std::vector({ActionType::kBet}))); + } else { + SpielFatalError(absl::StrCat("Unknown policy_name: ", policy_name)); + } + } +}; + +REGISTER_SPIEL_BOT("uniform_restricted_actions", + UniformRestrictedActionsFactory); + +} // namespace universal_poker +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/universal_poker.h b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/universal_poker.h new file mode 100644 index 0000000..497274a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/universal_poker.h @@ -0,0 +1,320 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_UNIVERSAL_POKER_H_ +#define OPEN_SPIEL_GAMES_UNIVERSAL_POKER_H_ + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_set.h" +#include "open_spiel/games/universal_poker/acpc_cpp/acpc_game.h" +#include "open_spiel/games/universal_poker/logic/card_set.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" + +// This is a wrapper around the Annual Computer Poker Competition bot (ACPC) +// environment. See http://www.computerpokercompetition.org/. The code is +// initially available at https://github.com/ethansbrown/acpc +// It is an optional dependency (see install.md for documentation and +// open_spiel/scripts/global_variables.sh to enable this). +// +// It has not been extensively reviewed/tested by the DeepMind OpenSpiel team. +namespace open_spiel { +namespace universal_poker { + +class UniversalPokerGame; + +constexpr uint8_t kMaxUniversalPokerPlayers = 10; + +// This is the mapping from int to action. E.g. the legal action "0" is fold, +// the legal action "1" is check/call, etc. +enum ActionType { kFold = 0, kCall = 1, kBet = 2, kAllIn = 3, kHalfPot = 4 }; + +// There are 5 actions: Fold, Call, Half-Pot bet, Pot Bet, and all-in. +inline constexpr int kNumActionsFCHPA = + static_cast(ActionType::kHalfPot) + 1; + +enum BettingAbstraction { kFCPA = 0, kFC = 1, kFULLGAME = 2, kFCHPA = 3 }; + +// TODO(author1): Remove StateActionType and use ActionType instead. +enum StateActionType { + ACTION_DEAL = 1, + ACTION_FOLD = 2, + ACTION_CHECK_CALL = 4, + ACTION_BET = 8, + ACTION_ALL_IN = 16 +}; + +constexpr StateActionType ALL_ACTIONS[5] = { + ACTION_DEAL, ACTION_FOLD, ACTION_CHECK_CALL, ACTION_BET, ACTION_ALL_IN}; + +class UniversalPokerState : public State { + public: + explicit UniversalPokerState(std::shared_ptr game); + + bool IsTerminal() const override; + bool IsChanceNode() const override; + Player CurrentPlayer() const override; + std::string ActionToString(Player player, Action move) const override; + std::string ToString() const override; + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + // Warning: all 'call' actions will have encoded sizing of 0. This could be + // potentially misleading in certain all-in situations if the caller has a + // stack that is smaller than the size of the bet! (See ObservationTensor if + // you need any player's exact contribution to the pot). + void InformationStateTensor(Player player, + absl::Span values) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + + // The probability of taking each possible action in a particular info state. + std::vector> ChanceOutcomes() const override; + std::vector LegalActions() const override; + + // Used to make UpdateIncrementalStateDistribution much faster. + std::unique_ptr GetHistoriesConsistentWithInfostate( + int player_id) const override; + std::vector ActionsConsistentWithInformationFrom( + Action action) const override { + return {action}; + } + std::unique_ptr ResampleFromInfostate( + int player_id, std::function rng) const; + + const acpc_cpp::ACPCState &acpc_state() const { return acpc_state_; } + const BettingAbstraction &betting() const { return betting_abstraction_; } + + // TODO(author1): If this is slow, cache it. + // Returns the raise-to size of a pot bet. Multiple determines the size; e.g. + // a double pot bet would have multiple = 2. + int PotSize(double multiple = 1.) const; + + // Returns the raise-to size of the current player going all-in. + int AllInSize() const; + void ApplyChoiceAction(StateActionType action_type, int size); + + protected: + void DoApplyAction(Action action_id) override; + + private: + void _CalculateActionsAndNodeType(); + + double GetTotalReward(Player player) const; + + const uint32_t &GetPossibleActionsMask() const { return possibleActions_; } + const int GetPossibleActionCount() const; + + // Note: might want to update the action sequence in the future to track + // everything per-round. + const std::string &GetActionSequence() const { return actionSequence_; } + // Unabstracted sizings for each entry in the Action Sequence. + const std::vector &GetActionSequenceSizings() const { + return actionSequenceSizings_; + } + + void AddHoleCard(uint8_t card) { + Player p = hole_cards_dealt_ / acpc_game_->GetNbHoleCardsRequired(); + const int card_index = + hole_cards_dealt_ % acpc_game_->GetNbHoleCardsRequired(); + acpc_state_.AddHoleCard(p, card_index, card); + ++hole_cards_dealt_; + } + + void AddBoardCard(uint8_t card) { + acpc_state_.AddBoardCard(board_cards_dealt_, card); + ++board_cards_dealt_; + } + + logic::CardSet HoleCards(Player player) const { + logic::CardSet hole_cards; + const int num_players = acpc_game_->GetNbPlayers(); + const int num_cards_dealt_to_all = hole_cards_dealt_ / num_players; + int num_cards_dealt_to_player = num_cards_dealt_to_all; + // We deal to players in order from 0 to n - 1. So if the number of cards + // dealt % num_players is > the player, we haven't dealt them a card yet; + // otherwise we have. + if (player < (hole_cards_dealt_ % num_players) && + num_cards_dealt_to_all < acpc_game_->GetNbHoleCardsRequired()) { + ++num_cards_dealt_to_player; + } + SPIEL_CHECK_LT(player, acpc_game_->GetNbPlayers()); + SPIEL_CHECK_LE(num_cards_dealt_to_player, + static_cast(acpc_game_->GetNbHoleCardsRequired())); + for (int i = 0; i < num_cards_dealt_to_player; ++i) { + hole_cards.AddCard(acpc_state_.hole_cards(player, i)); + } + return hole_cards; + } + + logic::CardSet BoardCards() const { + logic::CardSet board_cards; + const int num_board_cards = + std::min(board_cards_dealt_, + static_cast(acpc_game_->GetTotalNbBoardCards())); + for (int i = 0; i < num_board_cards; ++i) { + board_cards.AddCard(acpc_state_.board_cards(i)); + } + return board_cards; + } + + const acpc_cpp::ACPCGame *acpc_game_; + mutable acpc_cpp::ACPCState acpc_state_; + logic::CardSet deck_; // The remaining cards to deal. + int hole_cards_dealt_ = 0; + int board_cards_dealt_ = 0; + + // The current player: + // kChancePlayerId for chance nodes + // kTerminalPlayerId when we everyone except one player has fold, or that + // we have reached the showdown. + // The current player >= 0 otherwise. + Player cur_player_; + uint32_t possibleActions_; + std::string actionSequence_; + std::vector actionSequenceSizings_; + + BettingAbstraction betting_abstraction_; + + // Used for custom implementation of subgames. + std::vector handReaches_; + std::vector > DistributeHandCardsInSubgame() const; + bool IsDistributingSingleCard() const; + const std::vector GetEncodingBase() const; +}; + +class UniversalPokerGame : public Game { + public: + explicit UniversalPokerGame(const GameParameters ¶ms); + + int NumDistinctActions() const override; + std::unique_ptr NewInitialState() const override; + int NumPlayers() const override; + double MinUtility() const override; + double MaxUtility() const override; + int MaxChanceOutcomes() const override; + absl::optional UtilitySum() const override { return 0; } + std::vector InformationStateTensorShape() const override; + std::vector ObservationTensorShape() const override; + int MaxGameLength() const override; + // TODO: verify whether this bound is tight and/or tighten it. + int MaxChanceNodesInHistory() const override { return MaxGameLength(); } + BettingAbstraction betting_abstraction() const { + return betting_abstraction_; + } + + int big_blind() const { return big_blind_; } + double MaxCommitment() const; + const acpc_cpp::ACPCGame *GetACPCGame() const { return &acpc_game_; } + std::string parseParameters(const GameParameters &map); + + private: + std::string gameDesc_; + const acpc_cpp::ACPCGame acpc_game_; + const int potSize_; + const std::string boardCards_; + const std::string handReaches_; + absl::optional max_game_length_; + BettingAbstraction betting_abstraction_ = BettingAbstraction::kFULLGAME; + int big_blind_; + int max_stack_size_; +}; + +// Only supported for UniversalPoker. Randomly plays an action from a fixed list +// of actions. If none of the actions are legal, checks/calls. +class UniformRestrictedActions : public Policy { + public: + // Actions will be restricted to this list when legal. If no such action is + // legal, checks/calls. + explicit UniformRestrictedActions(absl::Span actions) + : actions_(actions.begin(), actions.end()), + max_action_(*absl::c_max_element(actions)) {} + + ActionsAndProbs GetStatePolicy(const State &state) const { + ActionsAndProbs policy; + policy.reserve(actions_.size()); + const std::vector legal_actions = state.LegalActions(); + for (Action action : legal_actions) { + if (actions_.contains(static_cast(action))) { + policy.emplace_back(action, 1.); + } + if (policy.size() >= actions_.size() || action > max_action_) break; + } + + // It is always legal to check/call. + if (policy.empty()) { + SPIEL_DCHECK_TRUE(absl::c_find(legal_actions, ActionType::kCall) != + legal_actions.end()); + policy.push_back({static_cast(ActionType::kCall), 1.}); + } + + // If we have a non-empty policy, normalize it! + if (policy.size() > 1) NormalizePolicy(&policy); + return policy; + } + + private: + const absl::flat_hash_set actions_; + const ActionType max_action_; +}; + +// Converts an ACPC action into one that's compatible with UniversalPokerGame. +open_spiel::Action ACPCActionToOpenSpielAction( + const project_acpc_server::Action &action, + const UniversalPokerState &state); + +// Get hole card index within the array of reach probabilities, as specified +// in https://github.com/Sandholm-Lab/LibratusEndgames : +// +// The probability, according to the Libratus blueprint strategy, of each player +// reaching this endgame with each hand. There are a total of 2,652 +// probabilities in this list. The first 1,326 are for the "out of position" +// player (the first player to act on the round), while the remaining 1,326 are +// for the "button" player. Each of the 1,326 probabilities corresponds to a +// poker hand, ordered as follows: +// +// 2s2h, 2s2d, 2s2c, 2s3s, 2s3h, ..., 2sAc, 2h2d, 2h2c, ..., AdAc. +int GetHoleCardsReachIndex(int card_a, int card_b, + int num_suits, int num_ranks); + +// Make random subgame, with optionally specified round, pot size, board +// cards and hand reach probs. If all of these variables are specified, +// it is actually a non-randomized subgame: by omiting any parameter, +// a random value will be supplied automatically. +std::shared_ptr MakeRandomSubgame( + std::mt19937 &rng, int pot_size = -1, std::string board_cards = "", + std::vector hand_reach = {}); + +// Converts an ACPC gamedef into the corresponding OpenSpiel universal_poker +// game-state string and uses that string to load + return the game. +std::shared_ptr LoadUniversalPokerGameFromACPCGamedef( + const std::string &acpc_gamedef); + +// Number of unique hands in no-limit poker. +constexpr int kSubgameUniqueHands = 1326; // = (52*51) / 2 + +std::ostream &operator<<(std::ostream &os, const BettingAbstraction &betting); + +} // namespace universal_poker +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_UNIVERSAL_POKER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/universal_poker_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/universal_poker_test.cc new file mode 100644 index 0000000..f464f61 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/universal_poker/universal_poker_test.cc @@ -0,0 +1,1040 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/universal_poker/universal_poker.h" + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/flags/parse.h" +#include "open_spiel/abseil-cpp/absl/strings/match.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/games/universal_poker/acpc/project_acpc_server/game.h" +#include "open_spiel/algorithms/evaluate_bots.h" +#include "open_spiel/canonical_game_strings.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" +#include "open_spiel/utils/file.h" +#include "open_spiel/utils/init.h" + +ABSL_FLAG(std::string, subgames_data_dir, "universal_poker/endgames", + "Directory containing the subgames data."); + +namespace open_spiel { +namespace universal_poker { +namespace { + +namespace testing = open_spiel::testing; + +constexpr absl::string_view kKuhnLimit3P = + ("GAMEDEF\n" + "limit\n" + "numPlayers = 3\n" + "numRounds = 1\n" + "blind = 1 1 1\n" + "raiseSize = 1\n" + "firstPlayer = 1\n" + "maxRaises = 1\n" + "numSuits = 1\n" + "numRanks = 4\n" + "numHoleCards = 1\n" + "numBoardCards = 0\n" + "END GAMEDEF\n"); +GameParameters KuhnLimit3PParameters() { + return {{"betting", GameParameter(std::string("limit"))}, + {"numPlayers", GameParameter(3)}, + {"numRounds", GameParameter(1)}, + {"blind", GameParameter(std::string("1 1 1"))}, + {"raiseSize", GameParameter(std::string("1"))}, + {"firstPlayer", GameParameter(std::string("1"))}, + {"maxRaises", GameParameter(std::string("1"))}, + {"numSuits", GameParameter(1)}, + {"numRanks", GameParameter(4)}, + {"numHoleCards", GameParameter(1)}, + {"numBoardCards", GameParameter(std::string("0"))}}; +} + +constexpr absl::string_view kHoldemNoLimit6P = + ("GAMEDEF\n" + "nolimit\n" + "numPlayers = 6\n" + "numRounds = 4\n" + "stack = 20000 20000 20000 20000 20000 20000\n" + "blind = 50 100 0 0 0 0\n" + "firstPlayer = 3 1 1 1\n" + "numSuits = 4\n" + "numRanks = 13\n" + "numHoleCards = 2\n" + "numBoardCards = 0 3 1 1\n" + "END GAMEDEF\n"); +GameParameters HoldemNoLimit6PParameters() { + return {{"betting", GameParameter(std::string("nolimit"))}, + {"numPlayers", GameParameter(6)}, + {"numRounds", GameParameter(4)}, + {"stack", + GameParameter(std::string("20000 20000 20000 20000 20000 20000"))}, + {"blind", GameParameter(std::string("50 100 0 0 0 0"))}, + {"firstPlayer", GameParameter(std::string("3 1 1 1"))}, + {"numSuits", GameParameter(4)}, + {"numRanks", GameParameter(13)}, + {"numHoleCards", GameParameter(2)}, + {"numBoardCards", GameParameter(std::string("0 3 1 1"))}}; +} + +void LoadKuhnLimitWithAndWithoutGameDef() { + std::shared_ptr game_generic = + LoadUniversalPokerGameFromACPCGamedef(std::string(kKuhnLimit3P)); + const UniversalPokerGame& kuhn_limit_3p_from_gamedef = + open_spiel::down_cast(*game_generic); + + UniversalPokerGame kuhn_limit_3p(KuhnLimit3PParameters()); + + SPIEL_CHECK_EQ(kuhn_limit_3p_from_gamedef.GetACPCGame()->ToString(), + kuhn_limit_3p.GetACPCGame()->ToString()); + SPIEL_CHECK_TRUE((*(kuhn_limit_3p_from_gamedef.GetACPCGame())) == + (*(kuhn_limit_3p.GetACPCGame()))); +} + +void LoadHoldemNoLimit6PWithAndWithoutGameDef() { + std::shared_ptr game_generic = + LoadUniversalPokerGameFromACPCGamedef(std::string(kHoldemNoLimit6P)); + const UniversalPokerGame& holdem_no_limit_6p_from_gamedef = + open_spiel::down_cast(*game_generic); + + UniversalPokerGame holdem_no_limit_6p(HoldemNoLimit6PParameters()); + + SPIEL_CHECK_EQ(holdem_no_limit_6p_from_gamedef.GetACPCGame()->ToString(), + holdem_no_limit_6p.GetACPCGame()->ToString()); + SPIEL_CHECK_TRUE((*(holdem_no_limit_6p_from_gamedef.GetACPCGame())) == + (*(holdem_no_limit_6p.GetACPCGame()))); +} +void LoadGameFromDefaultConfig() { LoadGame("universal_poker"); } + +void LoadAndRunGamesFullParameters() { + std::shared_ptr kuhn_limit_3p = + LoadGame("universal_poker", KuhnLimit3PParameters()); + std::shared_ptr os_kuhn_3p = + LoadGame("kuhn_poker", {{"players", GameParameter(3)}}); + SPIEL_CHECK_GT(kuhn_limit_3p->MaxGameLength(), os_kuhn_3p->MaxGameLength()); + testing::RandomSimTestNoSerialize(*kuhn_limit_3p, 1); + // TODO(b/145688976): The serialization is also broken + // In particular, the firstPlayer string "1" is converted back to an integer + // when deserializing, which crashes. + // testing::RandomSimTest(*kuhn_limit_3p, 1); + std::shared_ptr holdem_nolimit_6p = + LoadGame("universal_poker", HoldemNoLimit6PParameters()); + testing::RandomSimTestNoSerialize(*holdem_nolimit_6p, 1); + testing::RandomSimTest(*holdem_nolimit_6p, 3); + std::shared_ptr holdem_nolimit_fullgame = + LoadGame(HunlGameString("fullgame")); + testing::RandomSimTest(*holdem_nolimit_fullgame, 50); +} + +void LoadAndRunGameFromGameDef() { + std::shared_ptr game_generic = + LoadUniversalPokerGameFromACPCGamedef(std::string(kHoldemNoLimit6P)); + const UniversalPokerGame& holdem_no_limit_6p_from_gamedef = + open_spiel::down_cast(*game_generic); + + testing::RandomSimTestNoSerialize(holdem_no_limit_6p_from_gamedef, 1); + // Note: there's currently some bugs with serialization. This would probably + // fail if not for some hacky workarounds in the ACPC Gamedef -> OpenSpiel + // game state conversion code. + testing::RandomSimTest(holdem_no_limit_6p_from_gamedef, 1); +} + +void HUNLRegressionTests() { + std::shared_ptr game = LoadGame( + "universal_poker(betting=nolimit,numPlayers=2,numRounds=4,blind=100 " + "50,firstPlayer=2 1 1 " + "1,numSuits=4,numRanks=13,numHoleCards=2,numBoardCards=0 3 1 1,stack=400 " + "400)"); + std::unique_ptr state = game->NewInitialState(); + while (state->IsChanceNode()) { + state->ApplyAction(state->LegalActions()[0]); + } + std::cout << state->InformationStateString() << std::endl; + // Pot bet: call 50, and raise by 200. + state->ApplyAction(universal_poker::kBet); + + // Now, the minimum bet size is larger than the pot, so player 0 can only + // fold, call, or go all-in. + std::vector actions = state->LegalActions(); + absl::c_sort(actions); + + SPIEL_CHECK_EQ(actions.size(), 3); + SPIEL_CHECK_EQ(actions[0], universal_poker::kFold); + SPIEL_CHECK_EQ(actions[1], universal_poker::kCall); + SPIEL_CHECK_EQ(actions[2], universal_poker::kAllIn); + + // Try a similar test with a stacks of size 300. + game = LoadGame( + "universal_poker(betting=nolimit,numPlayers=2,numRounds=4,blind=100 " + "50,firstPlayer=2 1 1 " + "1,numSuits=4,numRanks=13,numHoleCards=2,numBoardCards=0 3 1 1,stack=300 " + "300)"); + state = game->NewInitialState(); + while (state->IsChanceNode()) { + state->ApplyAction(state->LegalActions()[0]); + } + std::cout << state->InformationStateString() << std::endl; + + // The pot bet exactly matches the number of chips available. This is an edge + // case where all-in is not available, only the pot bet. + + actions = state->LegalActions(); + absl::c_sort(actions); + + SPIEL_CHECK_EQ(actions.size(), 3); + SPIEL_CHECK_EQ(actions[0], universal_poker::kFold); + SPIEL_CHECK_EQ(actions[1], universal_poker::kCall); + SPIEL_CHECK_EQ(actions[2], universal_poker::kBet); +} + +void LoadAndRunGameFromDefaultConfig() { + std::shared_ptr game = LoadGame("universal_poker"); + testing::RandomSimTest(*game, 2); +} + +void BasicUniversalPokerTests() { + testing::LoadGameTest("universal_poker"); + testing::ChanceOutcomesTest(*LoadGame("universal_poker")); + testing::RandomSimTest(*LoadGame("universal_poker"), 100); + + // testing::RandomSimBenchmark("leduc_poker", 10000, false); + // testing::RandomSimBenchmark("universal_poker", 10000, false); + + testing::CheckChanceOutcomes(*LoadGame("universal_poker")); + + auto observer = LoadGame("universal_poker") + ->MakeObserver(kDefaultObsType, + GameParametersFromString("single_tensor")); + testing::RandomSimTestCustomObserver(*LoadGame("universal_poker"), observer); +} + +constexpr absl::string_view kHULHString = + ("universal_poker(betting=limit,numPlayers=2,numRounds=4,blind=50 100," + "firstPlayer=2 1,numSuits=4,numRanks=13,numHoleCards=2,numBoardCards=0 3 " + "1 " + "1,raiseSize=200 200 400 400,maxRaises=3 4 4 4)"); + +void ChumpPolicyTests() { + std::shared_ptr game = LoadGame(std::string(kHULHString)); + std::vector> bots; + bots.push_back(MakePolicyBot(*game, /*player_id=*/0, /*seed=*/0, + std::make_unique())); + bots.push_back( + MakePolicyBot(*game, /*player_id=*/0, /*seed=*/0, + std::make_unique( + std::vector({ActionType::kCall})))); + bots.push_back( + MakePolicyBot(*game, /*player_id=*/0, /*seed=*/0, + std::make_unique( + std::vector({ActionType::kFold})))); + bots.push_back(MakePolicyBot( + *game, /*player_id=*/0, /*seed=*/0, + std::make_unique( + std::vector({ActionType::kCall, ActionType::kBet})))); + for (int i = 0; i < bots.size(); ++i) { + for (int j = 0; j < bots.size(); ++j) { + std::unique_ptr state = game->NewInitialState(); + std::vector bots_ptrs = {bots[i].get(), bots[j].get()}; + EvaluateBots(state.get(), bots_ptrs, /*seed=*/42); + } + } +} + +// Checks min raising functionality. +void FullNLBettingTest1() { + std::shared_ptr game = LoadGame( + "universal_poker(betting=nolimit," + "numPlayers=2," + "numRounds=4," + "blind=2 1," + "firstPlayer=2 1 1 1," + "numSuits=4," + "numRanks=13," + "numHoleCards=2," + "numBoardCards=0 3 1 1," + "stack=20 20," + "bettingAbstraction=fullgame)"); + std::unique_ptr state = game->NewInitialState(); + SPIEL_CHECK_EQ(game->NumDistinctActions(), 21); + while (state->IsChanceNode()) + state->ApplyAction(state->LegalActions()[0]); // deal hole cards + // check valid raise actions, smallest valid raise is double the big blind + SPIEL_CHECK_FALSE(absl::c_binary_search(state->LegalActions(), 3)); + for (int i = 4; i <= 20; ++i) + SPIEL_CHECK_TRUE(absl::c_binary_search(state->LegalActions(), i)); + SPIEL_CHECK_FALSE(absl::c_binary_search(state->LegalActions(), 21)); + state->ApplyAction(1); // call big blind + state->ApplyAction(1); // check big blind + for (int i = 0; i < 3; ++i) + state->ApplyAction(state->LegalActions()[0]); // deal flop + // check valid raise actions, smallest valid raise is double the big blind + SPIEL_CHECK_FALSE(absl::c_binary_search(state->LegalActions(), 3)); + for (int i = 4; i <= 20; ++i) + SPIEL_CHECK_TRUE(absl::c_binary_search(state->LegalActions(), i)); + SPIEL_CHECK_FALSE(absl::c_binary_search(state->LegalActions(), 21)); + // each player keeps min raising until one is all in + for (int i = 4; i <= 20; i += 2) state->ApplyAction(i); + state->ApplyAction(1); // call last raise + state->ApplyAction(state->LegalActions()[0]); // deal turn + state->ApplyAction(state->LegalActions()[0]); // deal river + SPIEL_CHECK_EQ(state->Returns()[0], state->Returns()[1]); // hand is a draw + SPIEL_CHECK_TRUE( + absl::StrContains(state->ToString(), + "ACPC State: STATE:0:cc/r4r6r8r10r12r14r16r18r20c//" + ":2c2d|2h2s/3c3d3h/3s/4c")); +} + +// Checks that raises must double previous bet within the same round but +// each new round resets betting with the min bet size equal to the big blind. +void FullNLBettingTest2() { + std::shared_ptr game = LoadGame( + "universal_poker(betting=nolimit," + "numPlayers=2," + "numRounds=4," + "blind=100 50," + "firstPlayer=2 1 1 1," + "numSuits=4," + "numRanks=13," + "numHoleCards=2," + "numBoardCards=0 3 1 1," + "stack=10000 10000," + "bettingAbstraction=fullgame)"); + std::unique_ptr state = game->NewInitialState(); + SPIEL_CHECK_EQ(game->NumDistinctActions(), 10001); + while (state->IsChanceNode()) + state->ApplyAction(state->LegalActions()[0]); // deal hole cards + // check valid raise actions + std::vector legal_actions = state->LegalActions(); + SPIEL_CHECK_FALSE(absl::c_binary_search(legal_actions, 199)); + for (int i = 200; i <= 10000; ++i) + SPIEL_CHECK_TRUE(absl::c_binary_search(legal_actions, i)); + SPIEL_CHECK_FALSE(absl::c_binary_search(legal_actions, 10001)); + state->ApplyAction(5100); // bet just over half stack + // raise must double the size of the bet + // only legal actions now are fold, call, raise all-in + SPIEL_CHECK_EQ(state->LegalActions().size(), 3); + SPIEL_CHECK_EQ(state->LegalActions().back(), 10000); + state->ApplyAction(1); // call + for (int i = 0; i < 3; ++i) + state->ApplyAction(state->LegalActions()[0]); // deal flop + // new round of betting so we can bet as small as the big blind + legal_actions = state->LegalActions(); + SPIEL_CHECK_FALSE(absl::c_binary_search(legal_actions, 5199)); + for (int i = 5200; i <= 10000; ++i) + SPIEL_CHECK_TRUE(absl::c_binary_search(legal_actions, i)); + state->ApplyAction(5200); // min bet + // now we can raise as small as the big blind or as big as an all-in + legal_actions = state->LegalActions(); + for (int i = 5300; i <= 10000; ++i) + SPIEL_CHECK_TRUE(absl::c_binary_search(legal_actions, i)); + state->ApplyAction(1); // opt just to call + state->ApplyAction(state->LegalActions()[0]); // deal turn + state->ApplyAction(5400); // bet 2 big blinds + state->ApplyAction(5600); // raise to 4 big blinds + state->ApplyAction(5900); // reraise to 7 big blinds + // now a reraise must increase by at least 3 more big blinds + legal_actions = state->LegalActions(); + SPIEL_CHECK_FALSE(absl::c_binary_search(legal_actions, 6199)); + for (int i = 6200; i <= 10000; ++i) + SPIEL_CHECK_TRUE(absl::c_binary_search(legal_actions, i)); + state->ApplyAction(1); // opt to just call + state->ApplyAction(state->LegalActions()[0]); // deal river + // new round of betting so we can bet as small as the big blind + legal_actions = state->LegalActions(); + SPIEL_CHECK_FALSE(absl::c_binary_search(legal_actions, 5999)); + for (int i = 6000; i <= 10000; ++i) + SPIEL_CHECK_TRUE(absl::c_binary_search(legal_actions, i)); + state->ApplyAction(10000); // all-in! + state->ApplyAction(0); // fold + SPIEL_CHECK_EQ(state->Returns()[0], 5900); + SPIEL_CHECK_EQ(state->Returns()[1], -5900); + SPIEL_CHECK_TRUE(absl::StrContains(state->ToString(), + "ACPC State: STATE:0:r5100c/r5200c/r5400r5600r5900c/r10000f" + ":2c2d|2h2s/3c3d3h/3s/4c")); +} + +// Checks bet sizing is correct when there are more than two players +// all with different starting stacks. +void FullNLBettingTest3() { + std::shared_ptr game = LoadGame( + "universal_poker(betting=nolimit," + "numPlayers=3," + "numRounds=4," + "blind=100 50 0," + "firstPlayer=2 1 1 1," // WARNING: Atypical turn order! SB->D->BB, + // then BB->SB->D. + "numSuits=4," + "numRanks=13," + "numHoleCards=2," + "numBoardCards=0 3 1 1," + "stack=500 1000 2000," + "bettingAbstraction=fullgame)"); + std::unique_ptr state = game->NewInitialState(); + SPIEL_CHECK_EQ(game->NumDistinctActions(), 2001); + while (state->IsChanceNode()) state->ApplyAction(state->LegalActions()[0]); + state->ApplyAction(1); // call big blind + state->ApplyAction(1); // call big blind + state->ApplyAction(1); // check big blind + for (int i = 0; i < 3; ++i) + state->ApplyAction(state->LegalActions()[0]); // deal flop + // assert all raise increments are valid + std::vector legal_actions = state->LegalActions(); + SPIEL_CHECK_FALSE(absl::c_binary_search(legal_actions, 199)); + for (int i = 200; i <= 500; ++i) + SPIEL_CHECK_TRUE(absl::c_binary_search(legal_actions, i)); + SPIEL_CHECK_FALSE(absl::c_binary_search(legal_actions, 501)); + state->ApplyAction(1); // check + legal_actions = state->LegalActions(); + SPIEL_CHECK_FALSE(absl::c_binary_search(legal_actions, 199)); + for (int i = 200; i <= 1000; ++i) + SPIEL_CHECK_TRUE(absl::c_binary_search(legal_actions, i)); + SPIEL_CHECK_FALSE(absl::c_binary_search(legal_actions, 1001)); + state->ApplyAction(1); // check + legal_actions = state->LegalActions(); + SPIEL_CHECK_FALSE(absl::c_binary_search(legal_actions, 199)); + for (int i = 200; i <= 2000; ++i) + SPIEL_CHECK_TRUE(absl::c_binary_search(legal_actions, i)); + SPIEL_CHECK_FALSE(absl::c_binary_search(legal_actions, 2001)); + state->ApplyAction(200); // min raise + legal_actions = state->LegalActions(); + for (int i = 300; i <= 500; ++i) + SPIEL_CHECK_TRUE(absl::c_binary_search(legal_actions, i)); + SPIEL_CHECK_FALSE(absl::c_binary_search(legal_actions, 501)); + state->ApplyAction(500); // short stack goes all-in + legal_actions = state->LegalActions(); + SPIEL_CHECK_FALSE(absl::c_binary_search(legal_actions, 799)); + for (int i = 800; i <= 1000; ++i) + SPIEL_CHECK_TRUE(absl::c_binary_search(legal_actions, i)); + SPIEL_CHECK_FALSE(absl::c_binary_search(legal_actions, 1001)); + state->ApplyAction(800); // min raise + legal_actions = state->LegalActions(); + SPIEL_CHECK_FALSE(absl::c_binary_search(legal_actions, 1099)); + for (int i = 1100; i <= 2000; ++i) + SPIEL_CHECK_TRUE(absl::c_binary_search(legal_actions, i)); + SPIEL_CHECK_FALSE(absl::c_binary_search(legal_actions, 2001)); + state->ApplyAction(2000); // all-in + SPIEL_CHECK_EQ(state->LegalActions().size(), 2); // can only fold or call + state->ApplyAction(1); // call + state->ApplyAction(state->LegalActions()[0]); // deal turn + state->ApplyAction(state->LegalActions()[0]); // deal river + SPIEL_CHECK_EQ(state->Returns()[0], -500); + SPIEL_CHECK_EQ(state->Returns()[1], -1000); + SPIEL_CHECK_EQ(state->Returns()[2], 1500); + SPIEL_CHECK_TRUE(absl::StrContains(state->ToString(), + "ACPC State: STATE:0:ccc/ccr200r500r800r2000c//" + ":2c2d|2h2s|3c3d/3h3s4c/4d/4h")); +} + +// Check that a max length game works and infostate tensors are all unique. +void FullNLBettingTest4() { + std::shared_ptr game = LoadGame( + "universal_poker(betting=nolimit," + "numPlayers=2," + "numRounds=2," + "blind=100 50," + "numSuits=1," + "numRanks=4," + "numHoleCards=1," + "numBoardCards=0 1," + "stack=2000 2000," + "bettingAbstraction=fullgame)"); + std::set> information_state_tensor_set; + std::vector tensor; + std::unique_ptr state = game->NewInitialState(); + SPIEL_CHECK_EQ(game->NumDistinctActions(), 2001); + // deal cards + while (state->IsChanceNode()) state->ApplyAction(state->LegalActions()[0]); + // check the infostate tensor and add to set + tensor = state->InformationStateTensor(); + SPIEL_CHECK_FALSE(information_state_tensor_set.count(tensor)); + information_state_tensor_set.insert(tensor); + state->ApplyAction(1); // check + // check the infostate tensor and add to set + tensor = state->InformationStateTensor(); + SPIEL_CHECK_FALSE(information_state_tensor_set.count(tensor)); + information_state_tensor_set.insert(tensor); + state->ApplyAction(200); // min bet + // check the infostate tensor and add to set + tensor = state->InformationStateTensor(); + SPIEL_CHECK_FALSE(information_state_tensor_set.count(tensor)); + information_state_tensor_set.insert(tensor); + state->ApplyAction(1); // call + state->ApplyAction(state->LegalActions()[0]); // deal flop + // check the infostate tensor and add to set + tensor = state->InformationStateTensor(); + SPIEL_CHECK_FALSE(information_state_tensor_set.count(tensor)); + information_state_tensor_set.insert(tensor); + state->ApplyAction(1); // check + // check the infostate tensor and add to set + tensor = state->InformationStateTensor(); + SPIEL_CHECK_FALSE(information_state_tensor_set.count(tensor)); + information_state_tensor_set.insert(tensor); + for (int i=300; i < 2000; i+=100) { + state->ApplyAction(i); // min bet/raise + // check the infostate tensor and add to set + tensor = state->InformationStateTensor(); + SPIEL_CHECK_FALSE(information_state_tensor_set.count(tensor)); + information_state_tensor_set.insert(tensor); + } + state->ApplyAction(1); // call + SPIEL_CHECK_EQ(state->LegalActions().size(), 0); + std::cout << state->ToString() << std::endl; + SPIEL_CHECK_TRUE(absl::StrContains(state->ToString(), + "ACPC State: STATE:0:cr200c/cr300r400r500r600r700r800r900r1000r1100" + "r1200r1300r1400r1500r1600r1700r1800r1900c:2c|3c/4c")); +} + +void ChanceDealRegressionTest() { + std::shared_ptr game = LoadGame( + "universal_poker(betting=nolimit," + "numPlayers=3," + "numRounds=4," + "blind=100 50 0," + "firstPlayer=2 1 1 1," // WARNING: Atypical turn order! SB->D->BB, then + // BB->SB->D + "numSuits=4," + "numRanks=13," + "numHoleCards=2," + "numBoardCards=0 3 1 1," + "stack=500 1000 2000," + "bettingAbstraction=fullgame)"); + std::unique_ptr state = game->NewInitialState(); + for (Action action : {0, 1, 2, 3, 4, 5, 1, 1, 1, 6, 7, + 8, 1, 1, 200, 500, 800, 2000, 1, 9, 10}) { + state->ApplyAction(action); + } + SPIEL_CHECK_EQ( + state->ToString(), + "BettingAbstraction: FULLGAME\n" + "P0 Cards: 2d2c\n" + "P1 Cards: 2s2h\n" + "P2 Cards: 3d3c\n" + "BoardCards 4h4d4c3s3h\n" + "P0 Reward: -500\n" + "P1 Reward: -1000\n" + "P2 Reward: 1500\n" + "Node type?: Terminal Node!\n" + "]\n" + "Round: 3\n" + "ACPC State: " + "STATE:0:ccc/ccr200r500r800r2000c//:2c2d|2h2s|3c3d/3h3s4c/4d/4h\n" + "Spent: [P0: 500 P1: 1000 P2: 2000 ]\n\n" + "Action Sequence: ddddddcccdddccppppcdd"); +} + +void HulhMinAndMaxUtilityIsCorrect() { + // More generic version of the previous code. + std::shared_ptr game = + LoadGame(HulhGameString(/*betting_abstraction=*/"fullgame")); + const auto* up_game = dynamic_cast(game.get()); + int max_utility = up_game->big_blind(); + const auto& acpc_game = up_game->GetACPCGame()->Game(); + for (int i = 0; i < up_game->GetACPCGame()->NumRounds(); ++i) { + max_utility += acpc_game.maxRaises[i] * acpc_game.raiseSize[i]; + } + // Since 1. heads up and 2. stacks aren't relevant (since limit game) the most + // a player can in win or lose equals the maximum amount they could in theory + // put into the pot. + SPIEL_CHECK_EQ(max_utility, 240); + SPIEL_CHECK_EQ(game->MaxUtility(), max_utility); + SPIEL_CHECK_EQ(game->MinUtility(), -max_utility); +} + +void MaxUtilityLimitMultiway() { + std::shared_ptr game_1 = LoadGame( + "universal_poker(betting=limit," + "numPlayers=3," + "numRounds=4," + "blind=1 2 0," + "firstPlayer=3 1 1 1," + "numSuits=4," + "numRanks=13," + "numHoleCards=2," + "numBoardCards=0 3 1 1," + "stack=5 5 5," // Stack sizes are ignored for limit games + "raiseSize=900 900 900 900," + "maxRaises=2 2 2 2," + "bettingAbstraction=fullgame)"); + // 4 betting rounds with two raises each - note that for limit games the stack + // size input is completely ignored by the ACPC game. So that should NOT be a + // consideration here. + // 2 (big blind) + 4 * 2 * 900 = 7202 per caller + SPIEL_CHECK_EQ(game_1->MaxUtility(), 14404); +} + +void MaxUtilityEqualStacksMultiway() { + std::shared_ptr game_3max = + LoadGame(Multiway3max_1_2GameString("fullgame", 200, 200, 200)); + // Max utility is max number ending chips minus starting stack. With 3 players + // each with stack of 200 stack the utility should be (3-1)*200=400 + SPIEL_CHECK_EQ(game_3max->MaxUtility(), 400); + + std::shared_ptr game_6max_short = + LoadGame(Multiway6max_1_2GameString("fullgame", 6)); + // Now with 3 more players but ultra-short stacks (6 each, i.e. 3 BBs) the max + // utility go down significantly: (6-1)*6=30 + SPIEL_CHECK_EQ(game_6max_short->MaxUtility(), 30); + + std::shared_ptr game_6max_deep = + LoadGame(Multiway6max_1_2GameString("fullgame", 10000)); + // And conversely, with ultra-deep stacks the max utility should go WAY up: + // (6-1)*10000=50000 + SPIEL_CHECK_EQ(game_6max_deep->MaxUtility(), 50000); +} + +void MaxUtilityOneDeepStackMultiway() { + std::shared_ptr game_1 = + LoadGame(Multiway3max_1_2GameString("fullgame", 10000, 20, 10)); + // Stacks differ drastically meaning that we have to consider which stacks + // cannot lost their entire stack in a single round (even though the game + // is no-limit). + // In the best case over all player numbers the deepest or second-deepest + // stack will win in an all-in situation against all other players + // simultaneously; therefore the max utility bound here equals the sum of the + // BB's stack + the Dealer's stack: 20+10 = 30. + SPIEL_CHECK_EQ(game_1->MaxUtility(), 30); + + std::shared_ptr game_2 = + LoadGame(Multiway3max_1_2GameString("fullgame", 20, 60, 6000)); + // 20 + 60 = 80. + SPIEL_CHECK_EQ(game_2->MaxUtility(), 80); + + std::shared_ptr game_3 = + LoadGame(Multiway3max_1_2GameString("fullgame", 20, 60, 11)); + // 20 + 11 = 31. + SPIEL_CHECK_EQ(game_3->MaxUtility(), 31); +} + +void MinUtilityEqualStacksMultiway() { + // Min utility when all players have equal stacks should simply be the value + // of said starting stack (i.e. losing an all-in). + std::shared_ptr game_3max = + LoadGame(Multiway3max_1_2GameString("fullgame", 200, 200, 200)); + SPIEL_CHECK_EQ(game_3max->MinUtility(), -200); + + std::shared_ptr game_6max_short = + LoadGame(Multiway6max_1_2GameString("fullgame", 6)); + SPIEL_CHECK_EQ(game_6max_short->MinUtility(), -6); + + std::shared_ptr game_6max_deep = + LoadGame(Multiway6max_1_2GameString("fullgame", 10000)); + SPIEL_CHECK_EQ(game_6max_deep->MinUtility(), -10000); + + // Edge case: two players tie for deepest but there's another shorter stack. + // In which case the two deeper players are still able to lose their entire + // stacks - so min utility shouldn't go down. + std::shared_ptr game_tie_4 = + LoadGame(Multiway3max_1_2GameString("fullgame", 6, 6, 4)); + SPIEL_CHECK_EQ(game_tie_4->MinUtility(), -6); + + std::shared_ptr game_tie_5 = + LoadGame(Multiway3max_1_2GameString("fullgame", 20, 60, 60)); + SPIEL_CHECK_EQ(game_tie_5->MinUtility(), -60); + + std::shared_ptr game_tie_6 = + LoadGame(Multiway3max_1_2GameString("fullgame", 200, 100, 200)); + SPIEL_CHECK_EQ(game_tie_6->MinUtility(), -200); +} + +void MinUtilityOneDeepStackMultiway() { + // When stacks differ drastically meaning that we have to consider which + // stacks cannot lose their entire stack in a single game (i.e. even though + // no-limit); even in the absolute worst case, the deepest stack cannot lose + // more than the second highest stack. + std::shared_ptr game_1 = + LoadGame(Multiway3max_1_2GameString("fullgame", 10000, 20, 10)); + SPIEL_CHECK_EQ(game_1->MinUtility(), -20); + + std::shared_ptr game_2 = + LoadGame(Multiway3max_1_2GameString("fullgame", 20, 60, 6000)); + SPIEL_CHECK_EQ(game_2->MinUtility(), -60); + + std::shared_ptr game_3 = + LoadGame(Multiway3max_1_2GameString("fullgame", 20, 60, 11)); + SPIEL_CHECK_EQ(game_3->MinUtility(), -20); +} + +void CanConvertActionsCorrectly() { + std::shared_ptr game = + LoadGame(HunlGameString(/*betting_abstraction=*/"fullgame")); + std::unique_ptr state = game->NewInitialState(); + const auto& up_state = static_cast(*state); + absl::flat_hash_map results = + { + {static_cast(ActionType::kFold), + {project_acpc_server::ActionType::a_fold, 0}}, + {static_cast(ActionType::kCall), + {project_acpc_server::ActionType::a_call, 0}}, + {static_cast(ActionType::kBet), + {project_acpc_server::ActionType::a_raise, 0}}, + {static_cast(ActionType::kBet) + 1, + {project_acpc_server::ActionType::a_raise, 1}}, + {static_cast(ActionType::kBet) + 2, + {project_acpc_server::ActionType::a_raise, 2}}, + {static_cast(ActionType::kBet) + 8, + {project_acpc_server::ActionType::a_raise, 8}}, + }; + for (const auto& [os_action, acpc_action] : results) { + SPIEL_CHECK_EQ(os_action, + ACPCActionToOpenSpielAction(acpc_action, up_state)); + } +} + +void TestFCHPA() { + std::shared_ptr game = LoadGame(HunlGameString("fchpa")); + std::unique_ptr state = game->NewInitialState(); + for (Action action : {30, 37, 32, 28}) state->ApplyAction(action); + Action converted_action = ACPCActionToOpenSpielAction( + {project_acpc_server::ActionType::a_raise, 200}, + static_cast(*state)); + SPIEL_CHECK_EQ(converted_action, kHalfPot); + state->ApplyAction(converted_action); + converted_action = ACPCActionToOpenSpielAction( + {project_acpc_server::ActionType::a_raise, 400}, + static_cast(*state)); + SPIEL_CHECK_EQ(converted_action, kHalfPot); + state->ApplyAction(converted_action); + converted_action = ACPCActionToOpenSpielAction( + {project_acpc_server::ActionType::a_raise, 1800}, + static_cast(*state)); + std::cout << "converted action: " << converted_action; + + // Test that r300 is a half-pot bet. + state = game->NewInitialState(); + for (Action action : {43, 41, 8, 25, 1, 2, 4, 2, 4, 3}) + state->ApplyAction(action); + auto* up_state = static_cast(state.get()); + SPIEL_CHECK_EQ( + ACPCActionToOpenSpielAction( + {project_acpc_server::ActionType::a_raise, 40000}, *up_state), + ActionType::kCall); + + state = game->NewInitialState(); + for (Action action : {14, 36, 49, 45, 4, 2, 2, 4, 3}) + state->ApplyAction(action); + up_state = static_cast(state.get()); + SPIEL_CHECK_EQ( + ACPCActionToOpenSpielAction( + {project_acpc_server::ActionType::a_raise, 40000}, *up_state), + ActionType::kCall); + state = game->NewInitialState(); + for (Action action : {48, 47, 0, 32, 1, 2, 2, 2, 4, 3}) + state->ApplyAction(action); + up_state = static_cast(state.get()); + SPIEL_CHECK_EQ( + ACPCActionToOpenSpielAction( + {project_acpc_server::ActionType::a_raise, 40000}, *up_state), + ActionType::kCall); + + state = game->NewInitialState(); + for (Action action : {42, 27, 22, 41, 0}) { + state->ApplyAction(action); + } +} + +// Regression test checking we do not allow half pot bets in incorrect spots. +void TestFCHPALegalActions() { + std::vector fold_call_allin = {kFold, kCall, kAllIn}; + std::vector fold_call = {kFold, kCall}; + constexpr const char* heads_up_nolimit_fchpa = + "universal_poker(" + "betting=nolimit," + "numPlayers=2," + "numRounds=2," + "stack=1200 1200," + "blind=100 100," + "numSuits=4," + "numRanks=6," + "numHoleCards=1," + "numBoardCards=0 1," + "bettingAbstraction=fchpa," + ")"; + std::shared_ptr game = LoadGame(heads_up_nolimit_fchpa); + std::unique_ptr state = game->NewInitialState(); + + for (Action action : {3, 7, 2, 2}) { + state->ApplyAction(action); + } + + // 1. Verify that we did not accidentally add halfPot betting action in a + // situation where a player has too few chips to do so. + std::vector legal_actions = state->LegalActions(); + SPIEL_CHECK_FALSE(std::find(legal_actions.begin(), legal_actions.end(), + ActionType::kHalfPot) != legal_actions.end()); + SPIEL_CHECK_EQ(legal_actions, fold_call_allin); + state->ApplyAction(kAllIn); + + // 2. Verify that we do not accidentally add halfPot betting action in a + // heads-up situation where the other player already shoved all-in. + legal_actions = state->LegalActions(); + SPIEL_CHECK_FALSE(std::find(legal_actions.begin(), legal_actions.end(), + ActionType::kHalfPot) != legal_actions.end()); + SPIEL_CHECK_EQ(legal_actions, fold_call); + + // 3. Verify that we do not accidentally add halfPot betting action in a + // terminal state (i.e. where there should not be *any* possible legal actions + // remaining). + state->ApplyAction(kFold); + SPIEL_CHECK_EQ(state->LegalActions().size(), 0); +} + +void TestHoleIndexCalculation() { + auto check_index = [](std::string card_a, std::string card_b, + int expected_index) { + int a = logic::CardSet(card_a).ToCardArray()[0]; + int b = logic::CardSet(card_b).ToCardArray()[0]; + int actual_index = GetHoleCardsReachIndex(a, b, + /*num_suits=*/4, /*num_ranks=*/13); + SPIEL_CHECK_EQ(actual_index, expected_index); + }; + + // Suit order is "shdc" + check_index("2s", "2h", 0); + check_index("2s", "2d", 1); + check_index("2s", "2c", 2); + check_index("2s", "3s", 3); + check_index("2s", "3h", 4); + // ... + check_index("2s", "Ac", 50); + check_index("2h", "2d", 51); + check_index("2h", "2c", 52); + // ... + check_index("Ad", "Ac", 1325); +} + +std::string ReadSubgameReachProbs(const std::string& file_name) { + std::string dir = absl::GetFlag(FLAGS_subgames_data_dir); + if (dir.back() == '/') { + dir.pop_back(); + } + return file::ReadContentsFromFile(absl::StrCat(dir, "/", file_name, ".txt"), + "r"); +} + +void TestSubgameCreation() { + auto test_game = []( + int pot_size, + const std::string& board_cards, + const std::string& hand_reach){ + constexpr const char* base_game = + "universal_poker(" + "betting=nolimit," + "numPlayers=2," + "numRounds=4," + "blind=100 50," + "firstPlayer=2 1 1 1," + "numSuits=4," + "numRanks=13," + "numHoleCards=2," + "numBoardCards=0 3 1 1," + "stack=20000 20000," + "bettingAbstraction=fcpa," + "potSize=%d," + "boardCards=%s," + "handReaches=%s" + ")"; + + std::string game_str = + absl::StrFormat(base_game, pot_size, board_cards, hand_reach); + printf("game_str %s", game_str.c_str()); + std::shared_ptr with_reach = LoadGame(game_str); + testing::RandomSimTest(*with_reach, + /*num_sims=*/5, + /*serialize=*/true, + /*verbose=*/true, + /*mask_test=*/false); + }; + + // Build uniform reaches as a string. + std::stringstream ss; + for (int i = 0; i < 2 * kSubgameUniqueHands; ++i) + ss << 1. / (2 * kSubgameUniqueHands) << ' '; + std::string uniform_reaches = ss.str(); + test_game(500, "7s9h9cTc", uniform_reaches); + test_game(500, "7s9h9cTc", ReadSubgameReachProbs("subgame1")); + test_game(4780, "Ts6hAh7c", uniform_reaches); + test_game(4780, "Ts6hAh7c", ReadSubgameReachProbs("subgame2")); + test_game(500, "4s8hTc9h2s", uniform_reaches); + test_game(500, "4s8hTc9h2s", ReadSubgameReachProbs("subgame3")); + test_game(3750, "JsKs5cQs7d", uniform_reaches); + test_game(3750, "JsKs5cQs7d", ReadSubgameReachProbs("subgame4")); +} + +void TestRandomSubgameCreation() { + std::mt19937 rng; + MakeRandomSubgame(rng); + MakeRandomSubgame(rng, 100); + MakeRandomSubgame(rng, 100, "7s9h9cTc"); + + std::vector uniform_reaches; + for (int i = 0; i < 2 * kSubgameUniqueHands; ++i) { + uniform_reaches.push_back(1. / (2 * kSubgameUniqueHands)); + } + MakeRandomSubgame(rng, 100, "7s9h9cTc", uniform_reaches); +} + +void TestHalfCallHalfRaise() { + std::string bot_string = + "uniform_restricted_actions(policy_name=HalfCallHalfRaise)"; + for (const std::string& game_string : + std::vector({ HulhGameString("fullgame"), + "leduc_poker" })) { + std::shared_ptr game = LoadGame(game_string); + std::vector> owned_bots; + owned_bots.push_back(LoadBot(bot_string, game, /*player_id=*/0)); + owned_bots.push_back(LoadBot(bot_string, game, /*player_id=*/1)); + std::vector bots = {owned_bots[0].get(), owned_bots[1].get()}; + EvaluateBots(*game, bots); + } +} + +void TestFixedPreferenceBots() { + for (std::string bot_string : { + "uniform_restricted_actions(policy_name=AlwaysCall)", + "uniform_restricted_actions(policy_name=AlwaysRaise)", + "uniform_restricted_actions(policy_name=AlwaysFold)", + }) { + for (std::string game_string : {HunlGameString("fcpa"), + HulhGameString("fullgame")}) { + std::shared_ptr game = LoadGame(game_string); + std::vector> owned_bots; + owned_bots.push_back(LoadBot(bot_string, game, /*player_id=*/0)); + owned_bots.push_back(LoadBot(bot_string, game, /*player_id=*/1)); + std::vector bots = {owned_bots[0].get(), owned_bots[1].get()}; + EvaluateBots(*game, bots); + } + } +} + +void TestTensorsRecordsSizings() { + std::shared_ptr game = LoadGame( + "universal_poker(betting=nolimit," + "numPlayers=3," + "numRounds=4," + "blind=1 2 0," // p1=SB, p2=BB, p3=Button + "firstPlayer=3 1 1 1," // Standard turn order: D->SB->BB, then SB->BB->D + "numSuits=4," + "numRanks=13," + "numHoleCards=2," + "numBoardCards=0 3 1 1," + "stack=50 100 100," // SB has smaller stack to allow side-pot + "bettingAbstraction=fullgame)"); + std::unique_ptr state = game->NewInitialState(); + for (Action action : + {0, 1, 2, 3, 4, 5, 1, 1, 1, 6, 7, 8, 1, 1, 20, 40, 1, 100, 1, 1}) { + std::cout << "action " << action << "state: " << state << "\n" << std::endl; + state->ApplyAction(action); + } + // We have to choose a player since the no-arg default would result in an + // error due to the game being 'over'... but the choice is arbitrary since the + // information we're checking is all public knowledge. + std::vector tensor = state->InformationStateTensor(1); + int tensor_size = tensor.size(); + + SPIEL_CHECK_TRUE(tensor_size == game->InformationStateTensorShape()[0]); + int offset = tensor_size - game->MaxGameLength(); + + // Pre-Turn: All actions are deal or check + SPIEL_CHECK_EQ(tensor[offset + 10], 0); + + SPIEL_CHECK_EQ(tensor[offset + 11], 0); // Deal Turn + SPIEL_CHECK_EQ(tensor[offset + 12], 0); // SB Check + SPIEL_CHECK_EQ(tensor[offset + 13], 0); // BB Check + SPIEL_CHECK_EQ(tensor[offset + 14], 20); // Button raise 20 + SPIEL_CHECK_EQ(tensor[offset + 15], 40); // SB reraise 40 + SPIEL_CHECK_EQ(tensor[offset + 16], 0); // BB call 40 + SPIEL_CHECK_EQ(tensor[offset + 17], 100); // Button all-in 100 + SPIEL_CHECK_EQ(tensor[offset + 18], 0); // SB call for 50 (side-pot) + SPIEL_CHECK_EQ(tensor[offset + 19], 0); // BB call 100 + + // No action taken yet, so should default 0 + SPIEL_CHECK_EQ(tensor[offset + 20], 0); + + // Verify the final call sizes can instead be obtained from the Observation + // Tensor (especially the SB's, since it's a side-pot!) + std::vector observation_tensor = state->ObservationTensor(1); + int ob_tensor_size = observation_tensor.size(); + + SPIEL_CHECK_TRUE(ob_tensor_size == game->ObservationTensorShape()[0]); + SPIEL_CHECK_EQ(observation_tensor[ob_tensor_size - 3], 50); // SB (side-pot) + SPIEL_CHECK_EQ(observation_tensor[ob_tensor_size - 2], 100); // BB + SPIEL_CHECK_EQ(observation_tensor[ob_tensor_size - 1], 100); // Button +} + +void Bet4ConfusedForHalfPotRegressionTest() { + // 100 chip buy-in for all players, 50BB stacks (SB=1, BB=2) + std::shared_ptr game = + LoadGame(Multiway3max_1_2GameString("fullgame", 100, 100, 100)); + + std::unique_ptr state = game->NewInitialState(); + for (Action action : {0, 1, 2, 3, 4, 5, 1, 1, 1, 6, 7, 8, 1, 1}) { + std::cout << "action " << action << "state: " << state << "\n" << std::endl; + state->ApplyAction(action); + } + // Should *not* be 'half pot bet' since this is a fullgame / not abstracted. + SPIEL_CHECK_EQ(state->ActionToString(4), "player=2 move=Bet4"); +} + +} // namespace +} // namespace universal_poker +} // namespace open_spiel + +int main(int argc, char **argv) { + open_spiel::Init("", &argc, &argv, true); + absl::ParseCommandLine(argc, argv); + open_spiel::universal_poker::ChanceDealRegressionTest(); + open_spiel::universal_poker::LoadKuhnLimitWithAndWithoutGameDef(); + open_spiel::universal_poker::LoadHoldemNoLimit6PWithAndWithoutGameDef(); + open_spiel::universal_poker::LoadAndRunGamesFullParameters(); + open_spiel::universal_poker::LoadGameFromDefaultConfig(); + open_spiel::universal_poker::LoadAndRunGameFromGameDef(); + open_spiel::universal_poker::LoadAndRunGameFromDefaultConfig(); + open_spiel::universal_poker::BasicUniversalPokerTests(); + open_spiel::universal_poker::HUNLRegressionTests(); + open_spiel::universal_poker::ChumpPolicyTests(); + open_spiel::universal_poker::FullNLBettingTest1(); + open_spiel::universal_poker::FullNLBettingTest2(); + open_spiel::universal_poker::FullNLBettingTest3(); + open_spiel::universal_poker::FullNLBettingTest4(); + open_spiel::universal_poker::HulhMinAndMaxUtilityIsCorrect(); + open_spiel::universal_poker::MaxUtilityLimitMultiway(); + open_spiel::universal_poker::MaxUtilityEqualStacksMultiway(); + open_spiel::universal_poker::MaxUtilityOneDeepStackMultiway(); + open_spiel::universal_poker::MinUtilityEqualStacksMultiway(); + open_spiel::universal_poker::MinUtilityOneDeepStackMultiway(); + open_spiel::universal_poker::CanConvertActionsCorrectly(); + open_spiel::universal_poker::TestFCHPA(); + open_spiel::universal_poker::TestFCHPALegalActions(); + open_spiel::universal_poker::TestHoleIndexCalculation(); + open_spiel::universal_poker::TestSubgameCreation(); + open_spiel::universal_poker::TestRandomSubgameCreation(); + open_spiel::universal_poker::TestHalfCallHalfRaise(); + open_spiel::universal_poker::TestFixedPreferenceBots(); + open_spiel::universal_poker::TestTensorsRecordsSizings(); + open_spiel::universal_poker::Bet4ConfusedForHalfPotRegressionTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/y/y.cc b/scenarios/bargaining/open_spiel/open_spiel/games/y/y.cc new file mode 100644 index 0000000..cdc4987 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/y/y.cc @@ -0,0 +1,335 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/y/y.h" + +#include +#include +#include +#include + +#include "open_spiel/game_parameters.h" +#include "open_spiel/utils/tensor_view.h" + +namespace open_spiel { +namespace y_game { +namespace { + +// Facts about the game. +const GameType kGameType{/*short_name=*/"y", + /*long_name=*/"Y Connection Game", + GameType::Dynamics::kSequential, + GameType::ChanceMode::kDeterministic, + GameType::Information::kPerfectInformation, + GameType::Utility::kZeroSum, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/false, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/ + { + {"board_size", GameParameter(kDefaultBoardSize)}, + {"ansi_color_output", GameParameter(false)}, + }}; + +std::shared_ptr Factory(const GameParameters& params) { + return std::shared_ptr(new YGame(params)); +} + +REGISTER_SPIEL_GAME(kGameType, Factory); + +RegisterSingleTensorObserver single_tensor(kGameType.short_name); + +// The board is represented as a flattened 2d array of the form: +// 1 2 3 +// A 0 1 2 0 1 2 0 1 2 +// B 3 4 5 <=> 3 4 <=> 3 4 +// C 6 7 8 6 6 +// +// Neighbors are laid out in this pattern: +// 0 1 0 1 +// 5 X 2 <=> 5 X 2 +// 4 3 4 3 + +// Direct neighbors of a cell, clockwise. +constexpr std::array neighbor_offsets = { + Move(0, -1, kMoveOffset), Move(1, -1, kMoveOffset), + Move(1, 0, kMoveOffset), Move(0, 1, kMoveOffset), + Move(-1, 1, kMoveOffset), Move(-1, 0, kMoveOffset), +}; + +// Precomputed list of neighbors per board_size: [board_size][cell][direction] +std::vector neighbor_list; + +NeighborList gen_neighbors(int board_size) { + NeighborList out; + out.resize(board_size * board_size); + for (int y = 0; y < board_size; y++) { + for (int x = 0; x < board_size; x++) { + int xy = x + y * board_size; // Don't use Move.xy so it works off-board. + for (int dir = 0; dir < neighbor_offsets.size(); dir++) { + Move offset = neighbor_offsets[dir]; + out[xy][dir] = Move(x + offset.x, y + offset.y, board_size); + } + } + } + return out; +} + +const NeighborList& get_neighbors(int board_size) { + if (board_size >= neighbor_list.size()) { + neighbor_list.resize(board_size + 1); + } + if (neighbor_list[board_size].empty()) { + neighbor_list[board_size] = gen_neighbors(board_size); + } + return neighbor_list[board_size]; +} + +} // namespace + +int Move::Edge(int board_size) const { + if (!OnBoard()) return 0; + + return (x == 0 ? (1 << 0) : 0) | (y == 0 ? (1 << 1) : 0) | + (x + y == board_size - 1 ? (1 << 2) : 0); +} + +std::string Move::ToString() const { + if (xy == kMoveUnknown) return "unknown"; + if (xy == kMoveNone) return "none"; + return absl::StrCat(std::string(1, static_cast('a' + x)), y + 1); +} + +YState::YState(std::shared_ptr game, int board_size, + bool ansi_color_output) + : State(game), + board_size_(board_size), + neighbors(get_neighbors(board_size)), + ansi_color_output_(ansi_color_output) { + board_.resize(board_size * board_size); + for (int i = 0; i < board_.size(); i++) { + Move m = ActionToMove(i); + board_[i] = Cell((m.OnBoard() ? kPlayerNone : kPlayerInvalid), i, + m.Edge(board_size)); + } +} + +Move YState::ActionToMove(Action action_id) const { + return Move(action_id % board_size_, action_id / board_size_, board_size_); +} + +std::vector YState::LegalActions() const { + // Can move in any empty cell. + std::vector moves; + if (IsTerminal()) return moves; + moves.reserve(board_.size() - moves_made_); + for (int cell = 0; cell < board_.size(); ++cell) { + if (board_[cell].player == kPlayerNone) { + moves.push_back(cell); + } + } + return moves; +} + +std::string YState::ActionToString(Player player, Action action_id) const { + return ActionToMove(action_id).ToString(); +} + +std::string YState::ToString() const { + // Generates something like: + // a b c d e f g h i j k + // 1 O @ O O . @ @ O O @ O + // 2 . O O . O @ @ . O O + // 3 . O @ @ O @ O O @ + // 4 O O . @ . @ O O + // 5 . . . @[@]@ O + // 6 @ @ @ O O @ + // 7 @ . O @ O + // 8 . @ @ O + // 9 @ @ . + // 10 O . + // 11 @ + + std::string white = "O"; + std::string black = "@"; + std::string empty = "."; + std::string coord = ""; + std::string reset = ""; + if (ansi_color_output_) { + std::string esc = "\033"; + reset = esc + "[0m"; + coord = esc + "[1;37m"; // bright white + empty = reset + "."; + white = esc + "[1;33m" + "@"; // bright yellow + black = esc + "[1;34m" + "@"; // bright blue + } + + std::ostringstream out; + + // Top x coords. + out << ' '; + for (int x = 0; x < board_size_; x++) { + out << ' ' << coord << static_cast('a' + x); + } + out << '\n'; + + for (int y = 0; y < board_size_; y++) { + out << std::string(y + ((y + 1) < 10), ' '); // Leading space. + out << coord << (y + 1); // Leading y coord. + + bool found_last = false; + for (int x = 0; x < board_size_ - y; x++) { + Move pos(x, y, board_size_); + + // Spacing and last-move highlight. + if (found_last) { + out << coord << ']'; + found_last = false; + } else if (last_move_ == pos) { + out << coord << '['; + found_last = true; + } else { + out << ' '; + } + + // Actual piece. + Player p = board_[pos.xy].player; + if (p == kPlayerNone) out << empty; + if (p == kPlayer1) out << white; + if (p == kPlayer2) out << black; + } + if (found_last) { + out << coord << ']'; + } + out << '\n'; + } + out << reset; + return out.str(); +} + +std::vector YState::Returns() const { + if (outcome_ == kPlayer1) return {1, -1}; + if (outcome_ == kPlayer2) return {-1, 1}; + return {0, 0}; // Unfinished +} + +std::string YState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return HistoryString(); +} + +std::string YState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + return ToString(); +} + +int PlayerRelative(YPlayer state, Player current) { + switch (state) { + case kPlayer1: + return current == 0 ? 0 : 1; + case kPlayer2: + return current == 1 ? 0 : 1; + case kPlayerNone: + return 2; + default: + SpielFatalError("Unknown player type."); + } +} + +void YState::ObservationTensor(Player player, absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + + TensorView<2> view(values, {kCellStates, static_cast(board_.size())}, + true); + for (int i = 0; i < board_.size(); ++i) { + if (board_[i].player != kPlayerInvalid) { + view[{PlayerRelative(board_[i].player, player), i}] = 1.0; + } + } +} + +void YState::DoApplyAction(Action action) { + SPIEL_CHECK_EQ(board_[action].player, kPlayerNone); + SPIEL_CHECK_EQ(outcome_, kPlayerNone); + + Move move = ActionToMove(action); + SPIEL_CHECK_TRUE(move.OnBoard()); + + last_move_ = move; + board_[move.xy].player = current_player_; + moves_made_++; + + for (const Move& m : neighbors[move.xy]) { + if (m.OnBoard() && current_player_ == board_[m.xy].player) { + JoinGroups(move.xy, m.xy); + } + } + + if (board_[FindGroupLeader(move.xy)].edge == 0x7) { // ie all 3 edges. + outcome_ = current_player_; + } + + current_player_ = (current_player_ == kPlayer1 ? kPlayer2 : kPlayer1); +} + +int YState::FindGroupLeader(int cell) { + int parent = board_[cell].parent; + if (parent != cell) { + do { // Follow the parent chain up to the group leader. + parent = board_[parent].parent; + } while (parent != board_[parent].parent); + // Do path compression, but only the current one to avoid recursion. + board_[cell].parent = parent; + } + return parent; +} + +bool YState::JoinGroups(int cell_a, int cell_b) { + int leader_a = FindGroupLeader(cell_a); + int leader_b = FindGroupLeader(cell_b); + + if (leader_a == leader_b) // Already the same group. + return true; + + if (board_[leader_a].size < board_[leader_b].size) { + // Force group a's subtree to be bigger. + std::swap(leader_a, leader_b); + } + + // Group b joins group a. + board_[leader_b].parent = leader_a; + board_[leader_a].size += board_[leader_b].size; + board_[leader_a].edge |= board_[leader_b].edge; + + return false; +} + +std::unique_ptr YState::Clone() const { + return std::unique_ptr(new YState(*this)); +} + +YGame::YGame(const GameParameters& params) + : Game(kGameType, params), + board_size_(ParameterValue("board_size")), + ansi_color_output_(ParameterValue("ansi_color_output")) {} + +} // namespace y_game +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/y/y.h b/scenarios/bargaining/open_spiel/open_spiel/games/y/y.h new file mode 100644 index 0000000..6b0af9a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/y/y.h @@ -0,0 +1,201 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAMES_Y_H_ +#define OPEN_SPIEL_GAMES_Y_H_ + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/spiel.h" + +// https://en.wikipedia.org/wiki/Y_(game) +// Does not implement pie rule to balance the game +// +// Parameters: +// "board_size" int size of the board (default = 19) +// "ansi_color_output" bool Whether to color the output for a terminal. + +namespace open_spiel { +namespace y_game { + +inline constexpr int kNumPlayers = 2; +inline constexpr int kDefaultBoardSize = 19; +inline constexpr int kMaxNeighbors = + 6; // Maximum number of neighbors for a cell +inline constexpr int kCellStates = 1 + kNumPlayers; + +enum YPlayer : uint8_t { + kPlayer1, + kPlayer2, + kPlayerNone, + kPlayerInvalid, +}; + +enum MoveSpecial { + kMoveNone = -1, + kMoveUnknown = -2, + kMoveOffset = -3, +}; + +int CalcXY(int x, int y, int board_size) { + if (x >= 0 && y >= 0 && x < board_size && y < board_size && + (x + y < board_size)) { + return x + y * board_size; + } else { + return kMoveUnknown; + } +} + +struct Move { + int8_t x, y; // The x,y coordinates + int16_t xy; // precomputed x + y * board_size as an index into the array. + + inline constexpr Move(MoveSpecial m = kMoveUnknown) : x(-1), y(-1), xy(m) {} + inline constexpr Move(int x_, int y_, MoveSpecial m) : x(x_), y(y_), xy(m) {} + Move(int x_, int y_, int board_size) + : x(x_), y(y_), xy(CalcXY(x_, y_, board_size)) {} + + std::string ToString() const; + + bool operator==(const Move& b) const { return xy == b.xy; } + bool operator!=(const Move& b) const { return xy != b.xy; } + bool operator==(const MoveSpecial& b) const { return xy == b; } + bool operator!=(const MoveSpecial& b) const { return xy != b; } + + // Whether the move is valid and on the board. May be invalid because it is + // a MoveSpecial, in the cut-off corner, or otherwise off the board. + bool OnBoard() const { return xy >= 0; } + + // Flags for which edge this move is part of. + int Edge(int board_size) const; +}; + +// List of neighbors of a cell: [cell][direction] +typedef std::vector> NeighborList; + +// State of an in-play game. +class YState : public State { + // Represents a single cell on the board, as well as the structures needed for + // groups of cells. Groups of cells are defined by a union-find structure + // embedded in the array of cells. Following the `parent` indices will lead to + // the group leader which has the up to date size and edge + // connectivity of that group. Size and edge are not valid for any + // cell that is not a group leader. + struct Cell { + // Who controls this cell. + YPlayer player; + + // A parent index to allow finding the group leader. It is the leader of the + // group if it points to itself. Allows path compression to shorten the path + // from a direct parent to the leader. + uint16_t parent; + + // These three are only defined for the group leader's cell. + uint16_t size; // Size of this group of cells. + uint8_t edge; // A bitset of which edges this group is connected to. + + Cell() {} + Cell(YPlayer player_, int parent_, int edge_) + : player(player_), parent(parent_), size(1), edge(edge_) {} + }; + + public: + YState(std::shared_ptr game, int board_size, + bool ansi_color_output = false); + + YState(const YState&) = default; + + Player CurrentPlayer() const override { + return IsTerminal() ? kTerminalPlayerId : static_cast(current_player_); + } + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override { return outcome_ != kPlayerNone; } + std::vector Returns() const override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + + // A 3d tensor, 3 player-relative one-hot 2d planes. The layers are: the + // specified player, the other player, and empty. + void ObservationTensor(Player player, + absl::Span values) const override; + std::unique_ptr Clone() const override; + std::vector LegalActions() const override; + + protected: + void DoApplyAction(Action action) override; + + // Find the leader of the group. Not const due to union-find path compression. + int FindGroupLeader(int cell); + + // Join the groups of two positions, propagating group size, and edge + // connections. Returns true if they were already the same group. + bool JoinGroups(int cell_a, int cell_b); + + // Turn an action id into a `Move` with an x,y. + Move ActionToMove(Action action_id) const; + + private: + std::vector board_; + YPlayer current_player_ = kPlayer1; + YPlayer outcome_ = kPlayerNone; + const int board_size_; + int moves_made_ = 0; + Move last_move_ = kMoveNone; + const NeighborList& neighbors; + const bool ansi_color_output_; +}; + +// Game object. +class YGame : public Game { + public: + explicit YGame(const GameParameters& params); + + int NumDistinctActions() const override { + // Really size*(size+1)/2, but that's harder to represent, so the extra + // actions in the corner are never legal. + return board_size_ * board_size_; + } + std::unique_ptr NewInitialState() const override { + return std::unique_ptr( + new YState(shared_from_this(), board_size_, ansi_color_output_)); + } + int NumPlayers() const override { return kNumPlayers; } + double MinUtility() const override { return -1; } + absl::optional UtilitySum() const override { return 0; } + double MaxUtility() const override { return 1; } + std::vector ObservationTensorShape() const override { + return {kCellStates, board_size_, board_size_}; + } + int MaxGameLength() const override { + // The true number of playable cells on the board. + // No stones are removed, and someone will win by filling the board. + // Increase this by one if swap is ever implemented. + return board_size_ * (board_size_ + 1) / 2; + } + + private: + const int board_size_; + const bool ansi_color_output_ = false; +}; + +} // namespace y_game +} // namespace open_spiel + +#endif // OPEN_SPIEL_GAMES_Y_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/games/y/y_test.cc b/scenarios/bargaining/open_spiel/open_spiel/games/y/y_test.cc new file mode 100644 index 0000000..d69d301 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/games/y/y_test.cc @@ -0,0 +1,50 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace y_game { +namespace { + +namespace testing = open_spiel::testing; + +void BasicYTests() { + testing::LoadGameTest("y(board_size=9)"); + testing::NoChanceOutcomesTest(*LoadGame("y(board_size=9)")); + + testing::RandomSimTest(*LoadGame("y"), 10); + + // All the sizes we care about. + for (int i = 5; i <= 26; i++) { + testing::RandomSimTest(*LoadGame(absl::StrCat("y(board_size=", i, ")")), + 10); + } + + // Ansi colors! + testing::RandomSimTest( + *LoadGame("y", {{"board_size", GameParameter(9)}, + {"ansi_color_output", GameParameter(true)}}), + 1); + testing::RandomSimTest(*LoadGame("y(board_size=10,ansi_color_output=True)"), + 3); +} + +} // namespace +} // namespace y_game +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::y_game::BasicYTests(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/go/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/go/CMakeLists.txt new file mode 100644 index 0000000..95ad95e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/go/CMakeLists.txt @@ -0,0 +1,40 @@ +# Note: GO API is disabled in ../CMakeLists.txt for now due to failing tests: +# # openspiel_test +# [openspiel_test] +# ./example_leduc_test.go:14:1: ExampleLeduc refers to unknown identifier: Leduc +# ./example_test.go:10:1: ExampleTicTacToe refers to unknown identifier: TicTacToe +# ./example_test.go:138:1: ExampleLoadParametrizedGame refers to unknown identifier: LoadParametrizedGame +# FAIL openspiel [build failed] + +set(GO_BINDINGS ${GO_BINDINGS} + go_open_spiel.cc + go_open_spiel.h +) + +set(GO_API_FILES ${GO_API_FILES} + examples/example.go + example_leduc_test.go + example_test.go + go_open_spiel.h + openspiel.go +) + +# Note: needs to be SHARED rather than MODULE to work on MacOS +add_library(gospiel SHARED ${GO_BINDINGS} ${OPEN_SPIEL_OBJECTS}) + +# Copy the files keeping the directories intact +foreach(go_api_file IN LISTS GO_API_FILES) + get_filename_component(file_dir ${go_api_file} DIRECTORY) + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${file_dir}) + file(COPY ${go_api_file} DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/${file_dir}) +endforeach(go_api_file) + +execute_process(COMMAND go mod init openspiel + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + + +# add_test(NAME gospiel_test COMMAND go test -v) +# set_property(TEST gospiel_test +# PROPERTY ENVIRONMENT +# LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}; +# TEST_SRCDIR=${CMAKE_CURRENT_BINARY_DIR}) diff --git a/scenarios/bargaining/open_spiel/open_spiel/go/README.md b/scenarios/bargaining/open_spiel/open_spiel/go/README.md new file mode 100644 index 0000000..f665b40 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/go/README.md @@ -0,0 +1,16 @@ +# OpenSpiel Go API + +*Note:* This API has issues and is no longer maintained. See +[issue 1301](https://github.com/google-deepmind/open_spiel/issues/1301) for +details. + +This is a basic [Go](https://golang.org/) API for OpenSpiel. Please note that it +is currently experimental and may not work as expected. Please see the +[announcement thread](https://github.com/deepmind/open_spiel/issues/541) and +report any issues. Fixes and improvements are more than welcome! + +See the `CMakeLists.txt` to see how it is setup: a dynamic shared library is +created similarly to python extension (`libgospiel.so`). A simple go module is +created in this directory using `go mod init` so that go tests can be run. Note +that currently `LD_LIBRARY_PATH` must include the location of the dynamic +library so that it gets properly loaded at run time. diff --git a/scenarios/bargaining/open_spiel/open_spiel/go/example_leduc_test.go b/scenarios/bargaining/open_spiel/open_spiel/go/example_leduc_test.go new file mode 100644 index 0000000..68d8e38 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/go/example_leduc_test.go @@ -0,0 +1,236 @@ +package openspiel_test + +import ( + "fmt" + "strings" + + "openspiel" +) + +func toString(s *openspiel.State) string { + return strings.ReplaceAll(s.String(), " \n", "\n") +} + +func ExampleLeduc() { + game := openspiel.LoadGame("leduc_poker") + fmt.Println(game.LongName()) + state := game.NewInitialState() + + // Chance node + fmt.Println(state.IsTerminal()) + fmt.Println(state.IsChanceNode()) + action0, probability0 := state.ChanceOutcomes() + fmt.Println(action0) + fmt.Println(probability0) + fmt.Print(toString(state)) + fmt.Println(state.LegalActions()) + state.ApplyAction(4) + + // Chance node + fmt.Println(state.IsTerminal()) + fmt.Println(state.IsChanceNode()) + action1, probability1 := state.ChanceOutcomes() + fmt.Println(action1) + fmt.Println(probability1) + fmt.Print(toString(state)) + fmt.Println(state.LegalActions()) + state.ApplyAction(3) + + stateClone := state.Clone() + + // player 0 + fmt.Println(state.IsTerminal()) + fmt.Println(state.IsChanceNode()) + fmt.Print(toString(state)) + fmt.Println(state.Observation()) + fmt.Println(state.ObservationPlayer(0)) + fmt.Println(state.ObservationPlayer(1)) + fmt.Println(state.InformationState()) + fmt.Println(state.InformationStatePlayer(0)) + fmt.Println(state.InformationStatePlayer(1)) + fmt.Println(state.LegalActions()) + state.ApplyAction(1) + + // player 1 + fmt.Println(state.IsTerminal()) + fmt.Println(state.IsChanceNode()) + fmt.Print(toString(state)) + fmt.Println(state.Observation()) + fmt.Println(state.ObservationPlayer(0)) + fmt.Println(state.ObservationPlayer(1)) + fmt.Println(state.InformationState()) + fmt.Println(state.InformationStatePlayer(0)) + fmt.Println(state.InformationStatePlayer(1)) + fmt.Println(state.LegalActions()) + state.ApplyAction(1) + + // Chance node + fmt.Println(state.IsTerminal()) + fmt.Println(state.IsChanceNode()) + action2, probability2 := state.ChanceOutcomes() + fmt.Println(action2) + fmt.Println(probability2) + fmt.Print(toString(state)) + fmt.Println(state.LegalActions()) + state.ApplyAction(1) + + // player 0 + fmt.Println(state.IsTerminal()) + fmt.Println(state.IsChanceNode()) + fmt.Print(toString(state)) + fmt.Println(state.Observation()) + fmt.Println(state.ObservationPlayer(0)) + fmt.Println(state.ObservationPlayer(1)) + fmt.Println(state.InformationState()) + fmt.Println(state.InformationStatePlayer(0)) + fmt.Println(state.InformationStatePlayer(1)) + fmt.Println(state.LegalActions()) + state.ApplyAction(1) + + // player 1 + fmt.Println(state.IsTerminal()) + fmt.Println(state.IsChanceNode()) + fmt.Print(toString(state)) + fmt.Println(state.Observation()) + fmt.Println(state.ObservationPlayer(0)) + fmt.Println(state.ObservationPlayer(1)) + fmt.Println(state.InformationState()) + fmt.Println(state.InformationStatePlayer(0)) + fmt.Println(state.InformationStatePlayer(1)) + fmt.Println(state.LegalActions()) + state.ApplyAction(1) + + fmt.Println(state.IsTerminal()) + fmt.Println(state.IsChanceNode()) + fmt.Print(toString(state)) + + fmt.Printf("Player 0 return: %f\n", state.PlayerReturn(0)) + fmt.Printf("Player 1 return: %f\n", state.PlayerReturn(1)) + + fmt.Println(stateClone.IsTerminal()) + fmt.Println(state.IsChanceNode()) + fmt.Print(toString(state)) + + // Output: + // Leduc Poker + // false + // true + // [0 1 2 3 4 5] + // [0.16666667 0.16666667 0.16666667 0.16666667 0.16666667 0.16666667] + // Round: 1 + // Player: -1 + // Pot: 2 + // Money (p1 p2 ...): 99 99 + // Cards (public p1 p2 ...): -10000 -10000 -10000 + // Round 1 sequence: + // Round 2 sequence: + // [0 1 2 3 4 5] + // false + // true + // [0 1 2 3 5] + // [0.2 0.2 0.2 0.2 0.2] + // Round: 1 + // Player: -1 + // Pot: 2 + // Money (p1 p2 ...): 99 99 + // Cards (public p1 p2 ...): -10000 4 -10000 + // Round 1 sequence: + // Round 2 sequence: + // [0 1 2 3 5] + // false + // false + // Round: 1 + // Player: 0 + // Pot: 2 + // Money (p1 p2 ...): 99 99 + // Cards (public p1 p2 ...): -10000 4 3 + // Round 1 sequence: + // Round 2 sequence: + // [1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1] + // [1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1] + // [0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 1] + // [1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + // [1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + // [0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + // [1 2] + // false + // false + // Round: 1 + // Player: 1 + // Pot: 2 + // Money (p1 p2 ...): 99 99 + // Cards (public p1 p2 ...): -10000 4 3 + // Round 1 sequence: Call + // Round 2 sequence: + // [0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 1] + // [1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1] + // [0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 1] + // [0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + // [1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + // [0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + // [1 2] + // false + // true + // [0 1 2 5] + // [0.25 0.25 0.25 0.25] + // Round: 2 + // Player: -1 + // Pot: 2 + // Money (p1 p2 ...): 99 99 + // Cards (public p1 p2 ...): -10000 4 3 + // Round 1 sequence: Call, Call + // Round 2 sequence: + // [0 1 2 5] + // false + // false + // Round: 2 + // Player: 0 + // Pot: 2 + // Money (p1 p2 ...): 99 99 + // Cards (public p1 p2 ...): 1 4 3 + // Round 1 sequence: Call, Call + // Round 2 sequence: + // [1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1] + // [1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1] + // [0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 1] + // [1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0] + // [1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0] + // [0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0] + // [1 2] + // false + // false + // Round: 2 + // Player: 1 + // Pot: 2 + // Money (p1 p2 ...): 99 99 + // Cards (public p1 p2 ...): 1 4 3 + // Round 1 sequence: Call, Call + // Round 2 sequence: Call + // [0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 1] + // [1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1] + // [0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 1] + // [0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0] + // [1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0] + // [0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0] + // [1 2] + // true + // false + // Round: 2 + // Player: 1 + // Pot: 0 + // Money (p1 p2 ...): 101 99 + // Cards (public p1 p2 ...): 1 4 3 + // Round 1 sequence: Call, Call + // Round 2 sequence: Call, Call + // Player 0 return: 1.000000 + // Player 1 return: -1.000000 + // false + // false + // Round: 2 + // Player: 1 + // Pot: 0 + // Money (p1 p2 ...): 101 99 + // Cards (public p1 p2 ...): 1 4 3 + // Round 1 sequence: Call, Call + // Round 2 sequence: Call, Call +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/go/example_test.go b/scenarios/bargaining/open_spiel/open_spiel/go/example_test.go new file mode 100644 index 0000000..1a07300 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/go/example_test.go @@ -0,0 +1,166 @@ +package openspiel_test + +import ( + "fmt" + "strings" + + "openspiel" +) + +func ExampleTicTacToe() { + game := openspiel.LoadGame("tic_tac_toe") + fmt.Println(game.LongName()) + state := game.NewInitialState() + + fmt.Println(state.IsTerminal()) + fmt.Println(state.String()) + fmt.Println(state.Observation()) + state.ApplyAction(4) // Middle + + stateClone := state.Clone() + fmt.Println(state.IsTerminal()) + fmt.Println(state.String()) + fmt.Println(state.Observation()) + + fmt.Println(state.IsTerminal()) + fmt.Println(state.String()) + fmt.Println(state.Observation()) + state.ApplyAction(0) // Top-left + + fmt.Println(state.IsTerminal()) + fmt.Println(state.String()) + fmt.Println(state.Observation()) + state.ApplyAction(2) // Top-right + + fmt.Println(state.IsTerminal()) + fmt.Println(state.String()) + fmt.Println(state.Observation()) + state.ApplyAction(6) // Bottom-left + + fmt.Println(state.IsTerminal()) + fmt.Println(state.String()) + fmt.Println(state.Observation()) + state.ApplyAction(3) // Middle-left + + fmt.Println(state.IsTerminal()) + fmt.Println(state.String()) + fmt.Println(state.Observation()) + state.ApplyAction(5) // Middle-right + + fmt.Println(state.IsTerminal()) + fmt.Println(state.String()) + fmt.Println(state.Observation()) + state.ApplyAction(7) // Bottom + + fmt.Println(state.IsTerminal()) + fmt.Println(state.String()) + fmt.Println(state.Observation()) + state.ApplyAction(1) // Top + + fmt.Println(state.IsTerminal()) + fmt.Println(state.String()) + fmt.Println(state.Observation()) + state.ApplyAction(8) // Bottom-right + + fmt.Println(state.IsTerminal()) + fmt.Println(state.String()) + + fmt.Printf("Player 0 return: %f\n", state.PlayerReturn(0)) + fmt.Printf("Player 1 return: %f\n", state.PlayerReturn(1)) + + fmt.Println(stateClone.IsTerminal()) + fmt.Println(stateClone.String()) + + // Output: + // Tic Tac Toe + // false + // ... + // ... + // ... + // [1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + // false + // ... + // .x. + // ... + // [1 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0] + // false + // ... + // .x. + // ... + // [1 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0] + // false + // o.. + // .x. + // ... + // [0 1 1 1 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0] + // false + // o.x + // .x. + // ... + // [0 1 0 1 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0] + // false + // o.x + // .x. + // o.. + // [0 1 0 1 0 1 0 1 1 1 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0] + // false + // o.x + // xx. + // o.. + // [0 1 0 0 0 1 0 1 1 1 0 0 0 0 0 1 0 0 0 0 1 1 1 0 0 0 0] + // false + // o.x + // xxo + // o.. + // [0 1 0 0 0 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 1 1 1 0 0 0 0] + // false + // o.x + // xxo + // ox. + // [0 1 0 0 0 0 0 0 1 1 0 0 0 0 1 1 0 0 0 0 1 1 1 0 0 1 0] + // false + // oox + // xxo + // ox. + // [0 0 0 0 0 0 0 0 1 1 1 0 0 0 1 1 0 0 0 0 1 1 1 0 0 1 0] + // true + // oox + // xxo + // oxx + // Player 0 return: 0.000000 + // Player 1 return: 0.000000 + // false + // ... + // .x. + // ... +} + +func ExampleLoadParametrizedGame() { + game := openspiel.LoadGame("breakthrough(rows=6,columns=6)") + state := game.NewInitialState() + fmt.Println(state.String()) + + game = openspiel.LoadGame("turn_based_simultaneous_game(game=goofspiel(num_cards=5,imp_info=true,points_order=descending))") + state = game.NewInitialState() + goofStringLines := strings.Split(state.String(), "\n") + for i := 0; i < len(goofStringLines); i++ { + fmt.Println(strings.TrimSpace(goofStringLines[i])) + } + + // Output: + // 6bbbbbb + // 5bbbbbb + // 4...... + // 3...... + // 2wwwwww + // 1wwwwww + // abcdef + // + // Partial joint action: + // P0 hand: 1 2 3 4 5 + // P1 hand: 1 2 3 4 5 + // P0 actions: + // P1 actions: + // Point card sequence: 5 + // Points: 0 0 +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/go/examples/example.go b/scenarios/bargaining/open_spiel/open_spiel/go/examples/example.go new file mode 100644 index 0000000..7edb89c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/go/examples/example.go @@ -0,0 +1,43 @@ +// Package main provides a simple example use of the Go API. +package main + +import ( + "fmt" + + "math/rand" + + "openspiel" +) + +func main() { + + openspiel.Test() + + game := openspiel.LoadGame("breakthrough") + fmt.Printf("Game's long name is %s\n", game.LongName()) + + state := game.NewInitialState() + + for !state.IsTerminal() { + fmt.Printf("\n%s", state.String()) + + curPlayer := state.CurrentPlayer() + legalActions := state.LegalActions() + for i := 0; i < len(legalActions); i++ { + fmt.Printf("Legal action: %s\n", state.ActionToString(curPlayer, legalActions[i])) + } + + sampledIdx := rand.Intn(len(legalActions)) + sampledAction := legalActions[sampledIdx] + fmt.Printf("Sampled action: %s\n", state.ActionToString(curPlayer, sampledAction)) + + state.ApplyAction(sampledAction) + } + + fmt.Printf("\nTerminal state reached!\n") + fmt.Printf(state.String()) + fmt.Printf("\n") + for i := 0; i < game.NumPlayers(); i++ { + fmt.Printf("Return for player %d is %f\n", i, state.PlayerReturn(i)) + } +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/go/go_open_spiel.cc b/scenarios/bargaining/open_spiel/open_spiel/go/go_open_spiel.cc new file mode 100644 index 0000000..ab18506 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/go/go_open_spiel.cc @@ -0,0 +1,227 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "go_open_spiel.h" // NOLINT + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/spiel.h" + +using open_spiel::Game; +using open_spiel::State; + +/* We need this because games are shared pointers and we need to return + raw pointers to objects that contain them.*/ +namespace { +struct GamePointerHolder { + std::shared_ptr ptr; +}; +} // namespace + +extern "C" { + +void Test() { std::cout << "Testing, testing, 1 2 3!" << std::endl; } + +/* Game functions. */ +void* LoadGame(const char* name) { + return reinterpret_cast( + new GamePointerHolder{open_spiel::LoadGame(name)}); +} + +void DeleteGame(void* game_ptr) { + GamePointerHolder* game = reinterpret_cast(game_ptr); + delete game; +} + +char* GameShortName(const void* game_ptr) { + std::shared_ptr game = + reinterpret_cast(game_ptr)->ptr; + std::string short_name = game->GetType().short_name; + return strdup(short_name.c_str()); +} + +char* GameLongName(const void* game_ptr) { + std::shared_ptr game = + reinterpret_cast(game_ptr)->ptr; + std::string long_name = game->GetType().long_name; + return strdup(long_name.c_str()); +} + +void* GameNewInitialState(const void* game_ptr) { + std::shared_ptr game = + reinterpret_cast(game_ptr)->ptr; + std::unique_ptr state = game->NewInitialState(); + void* state_ptr = reinterpret_cast(state.release()); + return state_ptr; +} + +int GameNumPlayers(const void* game_ptr) { + std::shared_ptr game = + reinterpret_cast(game_ptr)->ptr; + return game->NumPlayers(); +} + +int GameMaxGameLength(const void* game_ptr) { + std::shared_ptr game = + reinterpret_cast(game_ptr)->ptr; + return game->MaxGameLength(); +} + +int GameNumDistinctActions(const void* game_ptr) { + std::shared_ptr game = + reinterpret_cast(game_ptr)->ptr; + return game->NumDistinctActions(); +} + +void DeleteState(void* state_ptr) { + State* state = reinterpret_cast(state_ptr); + delete state; +} + +void* StateClone(const void* state_ptr) { + const State* state = reinterpret_cast(state_ptr); + std::unique_ptr state_copy = state->Clone(); + return reinterpret_cast(state_copy.release()); +} + +char* StateToString(const void* state_ptr) { + const State* state = reinterpret_cast(state_ptr); + std::string state_str = state->ToString(); + return strdup(state_str.c_str()); +} + +int StateNumLegalActions(const void* state_ptr) { + const State* state = reinterpret_cast(state_ptr); + return state->LegalActions().size(); +} + +int StateNumDistinctActions(const void* state_ptr) { + const State* state = reinterpret_cast(state_ptr); + return state->NumDistinctActions(); +} + +void StateFillLegalActions(const void* state_ptr, void* array_ptr) { + const State* state = reinterpret_cast(state_ptr); + int* legal_actions_array = reinterpret_cast(array_ptr); + absl::c_copy(state->LegalActions(), legal_actions_array); +} + +void StateFillLegalActionsMask(const void* state_ptr, void* array_ptr) { + const State* state = reinterpret_cast(state_ptr); + int* legal_actions_mask_array = reinterpret_cast(array_ptr); + std::vector legal_actions_mask = state->LegalActionsMask(); + absl::c_copy(state->LegalActionsMask(), legal_actions_mask_array); +} + +int StateSizeObservation(const void* state_ptr) { + const State* state = reinterpret_cast(state_ptr); + return state->GetGame()->ObservationTensorSize(); +} + +void StateFillObservation(const void* state_ptr, void* array_ptr) { + const State* state = reinterpret_cast(state_ptr); + double* observation_array = reinterpret_cast(array_ptr); + absl::c_copy(state->ObservationTensor(), observation_array); +} + +int StateSizeChanceOutcomes(const void* state_ptr) { + const State* state = reinterpret_cast(state_ptr); + return state->ChanceOutcomes().size(); +} + +void StateFillChanceOutcomes(const void* state_ptr, void* action_ptr, + void* proba_ptr) { + const State* state = reinterpret_cast(state_ptr); + int* action_array = reinterpret_cast(action_ptr); + double* proba_array = reinterpret_cast(proba_ptr); + std::vector> outcomes = + state->ChanceOutcomes(); + std::pair outcome; + for (int i = 0; i < outcomes.size(); ++i) { + outcome = outcomes[i]; + action_array[i] = outcome.first; + proba_array[i] = outcome.second; + } +} + +void StateFillObservationPlayer(const void* state_ptr, void* array_ptr, + int player) { + const State* state = reinterpret_cast(state_ptr); + double* observation_array = reinterpret_cast(array_ptr); + std::vector observation = state->ObservationTensor(player); + for (int i = 0; i < observation.size(); ++i) { + observation_array[i] = observation[i]; + } +} + +int StateSizeInformationState(const void* state_ptr) { + const State* state = reinterpret_cast(state_ptr); + return state->GetGame()->InformationStateTensorSize(); +} + +void StateFillInformationState(const void* state_ptr, void* array_ptr) { + const State* state = reinterpret_cast(state_ptr); + double* information_state_array = reinterpret_cast(array_ptr); + std::vector information_state = state->InformationStateTensor(); + for (int i = 0; i < information_state.size(); ++i) { + information_state_array[i] = information_state[i]; + } +} + +void StateFillInformationStatePlayer(const void* state_ptr, void* array_ptr, + int player) { + const State* state = reinterpret_cast(state_ptr); + double* information_state_array = reinterpret_cast(array_ptr); + std::vector information_state = state->InformationStateTensor(player); + absl::c_copy(information_state, information_state_array); +} + +int StateIsTerminal(const void* state_ptr) { + const State* state = reinterpret_cast(state_ptr); + return state->IsTerminal() ? 1 : 0; +} + +int StateIsChanceNode(const void* state_ptr) { + const State* state = reinterpret_cast(state_ptr); + return state->IsChanceNode() ? 1 : 0; +} + +int StateCurrentPlayer(const void* state_ptr) { + const State* state = reinterpret_cast(state_ptr); + return state->CurrentPlayer(); +} + +char* StateActionToString(const void* state_ptr, int player, int action) { + const State* state = reinterpret_cast(state_ptr); + std::string action_str = state->ActionToString(player, action); + return strdup(action_str.c_str()); +} + +void StateApplyAction(void* state_ptr, int action) { + State* state = reinterpret_cast(state_ptr); + state->ApplyAction(action); +} + +double StatePlayerReturn(const void* state_ptr, int player) { + const State* state = reinterpret_cast(state_ptr); + return state->PlayerReturn(player); +} + +} /* extern "C" */ diff --git a/scenarios/bargaining/open_spiel/open_spiel/go/go_open_spiel.h b/scenarios/bargaining/open_spiel/open_spiel/go/go_open_spiel.h new file mode 100644 index 0000000..61fd6bc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/go/go_open_spiel.h @@ -0,0 +1,71 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef __GO_OPEN_SPIEL_H__ +#define __GO_OPEN_SPIEL_H__ + +/* A pure C API that wraps the C++ OpenSpiel core. */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Test */ +void Test(); + +/* Game functions. */ +void* LoadGame(const char* name); +void DeleteGame(void* game_ptr); +char* GameShortName(const void* game_ptr); +char* GameLongName(const void* game_ptr); +void* GameNewInitialState(const void* game_ptr); +int GameNumPlayers(const void* game_ptr); +int GameMaxGameLength(const void* game_ptr); +int GameNumDistinctActions(const void* game_ptr); + +/* State functions. */ +void DeleteState(void* state_ptr); +void* StateClone(const void* state_ptr); +char* StateToString(const void* state_ptr); +int StateNumLegalActions(const void* state_ptr); +int StateNumDistinctActions(const void* state_ptr); +void StateFillLegalActions(const void* state_ptr, void* array_ptr); +void StateFillLegalActionsMask(const void* state_ptr, void* array_ptr); +int StateSizeObservation(const void* state_ptr); +void StateFillObservation(const void* state_ptr, void* array_ptr); +void StateFillObservationPlayer(const void* state_ptr, void* array_ptr, + int player); +double StateObservation(const void* observation_ptr, int idx); +int StateSizeInformationState(const void* state_ptr); +void StateFillInformationState(const void* state_ptr, void* array_ptr); +void StateFillInformationStatePlayer(const void* state_ptr, void* array_ptr, + int player); +double StateInformationState(const void* information_state_ptr, int idx); + +int StateSizeChanceOutcomes(const void* state_ptr); +void StateFillChanceOutcomes(const void* state_ptr, void* action_ptr, + void* proba_ptr); + +int StateIsTerminal(const void* state_ptr); +int StateIsChanceNode(const void* state_ptr); +int StateCurrentPlayer(const void* state_ptr); +char* StateActionToString(const void* state_ptr, int player, int action); +void StateApplyAction(void* state_ptr, int action); +double StatePlayerReturn(const void* state_ptr, int player); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/scenarios/bargaining/open_spiel/open_spiel/go/openspiel.go b/scenarios/bargaining/open_spiel/open_spiel/go/openspiel.go new file mode 100644 index 0000000..f9694e8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/go/openspiel.go @@ -0,0 +1,242 @@ +// Package openspiel provides and API to C++ OpenSpiel. +package openspiel + +// #cgo CFLAGS: -I. +// #cgo LDFLAGS: -L. -L../../build/go -lgospiel +// #include +// #include "go_open_spiel.h" +import "C" // keep +import ( + "fmt" + "runtime" + "unsafe" +) + +// Game is a wrapper object around an open_spiel::Game object +type Game struct { + game unsafe.Pointer +} + +// State is a wrapper arounds an open_spiel::State object +type State struct { + state unsafe.Pointer +} + +// Test prints out a nice testing message! +func Test() { + C.Test() +} + +// LoadGame loads a game! +func LoadGame(name string) *Game { + cs := C.CString(name) + game := &Game{C.LoadGame(cs)} + C.free(unsafe.Pointer(cs)) + runtime.SetFinalizer(game, deleteGame) + return game +} + +// LongName returns the long name of a game +func (game *Game) LongName() string { + cs := C.GameLongName(game.game) + str := C.GoString(cs) + C.free(unsafe.Pointer(cs)) + return str +} + +// ShortName returns the short name of a game +func (game *Game) ShortName() string { + cs := C.GameLongName(game.game) + str := C.GoString(cs) + C.free(unsafe.Pointer(cs)) + return str +} + +// NewInitialState returns a new initial state. +func (game *Game) NewInitialState() *State { + state := &State{C.GameNewInitialState(game.game)} + runtime.SetFinalizer(state, deleteState) + return state +} + +// MaxGameLength returns the maximum length of one game. +func (game *Game) MaxGameLength() int { + return int(C.GameMaxGameLength(game.game)) +} + +// NumPlayers returns the number of players in this game +func (game *Game) NumPlayers() int { + return int(C.GameNumPlayers(game.game)) +} + +// NumDistinctActions returns a number of distinct actions that the game has +func (game *Game) NumDistinctActions() int { + return int(C.GameNumDistinctActions(game.game)) +} + +// String returns a string representation of the state +func (state *State) String() string { + cs := C.StateToString(state.state) + str := C.GoString(cs) + C.free(unsafe.Pointer(cs)) + return str +} + +// IsTerminal returns whether a state is terminal or not +func (state *State) IsTerminal() bool { + val := C.StateIsTerminal(state.state) + return val == 1 +} + +// IsChanceNode returns whether a state is a chance node or not +func (state *State) IsChanceNode() bool { + val := C.StateIsChanceNode(state.state) + return val == 1 +} + +// Clone clones this state. +func (state *State) Clone() *State { + clone := &State{C.StateClone(state.state)} + runtime.SetFinalizer(clone, deleteState) + return clone +} + +// LegalActions returns a list of legal actions +func (state *State) LegalActions() []int { + length := int(C.StateNumLegalActions(state.state)) + legalActions := make([]int, length) + cppLegalActions := make([]C.int, length) + C.StateFillLegalActions(state.state, unsafe.Pointer(&cppLegalActions[0])) + for i := 0; i < length; i++ { + legalActions[i] = int(cppLegalActions[i]) + } + return legalActions +} + +// LegalActionsMask returns a mask marking all legal actions as true +func (state *State) LegalActionsMask() []bool { + length := int(C.StateNumDistinctActions(state.state)) + legalActionMask := make([]bool, length) + cppLegalActionsMask := make([]C.int, length) + C.StateFillLegalActionsMask(state.state, unsafe.Pointer(&cppLegalActionsMask[0])) + for i := 0; i < length; i++ { + legalActionMask[i] = (cppLegalActionsMask[i] > 0) + } + return legalActionMask +} + +// Observation returns an observation as a list +func (state *State) Observation() []float32 { + length := int(C.StateSizeObservation(state.state)) + observation := make([]float32, length) + cppObservation := make([]C.double, length) + C.StateFillObservation(state.state, unsafe.Pointer(&cppObservation[0])) + for i := 0; i < length; i++ { + observation[i] = float32(cppObservation[i]) + } + return observation +} + +// ObservationPlayer returns an observation as a list +func (state *State) ObservationPlayer(player int) []float32 { + length := int(C.StateSizeObservation(state.state)) + observation := make([]float32, length) + cppObservation := make([]C.double, length) + C.StateFillObservationPlayer(state.state, unsafe.Pointer(&cppObservation[0]), C.int(player)) + for i := 0; i < length; i++ { + observation[i] = float32(cppObservation[i]) + } + return observation +} + +// InformationState returns an observation as a list +func (state *State) InformationState() []float32 { + length := int(C.StateSizeInformationState(state.state)) + informationState := make([]float32, length) + cppInformationState := make([]C.double, length) + C.StateFillInformationState(state.state, unsafe.Pointer(&cppInformationState[0])) + for i := 0; i < length; i++ { + informationState[i] = float32(cppInformationState[i]) + } + return informationState +} + +// InformationStateAsString returns an observation as a list +func (state *State) InformationStateAsString() string { + infostate := state.InformationState() + s := "" + for _, v := range infostate { + s = s + fmt.Sprintf("%f", v) + } + return s +} + +// InformationStatePlayer returns an observation as a list +func (state *State) InformationStatePlayer(player int) []float32 { + length := int(C.StateSizeInformationState(state.state)) + informationState := make([]float32, length) + cppInformationState := make([]C.double, length) + C.StateFillInformationStatePlayer(state.state, unsafe.Pointer(&cppInformationState[0]), C.int(player)) + for i := 0; i < length; i++ { + informationState[i] = float32(cppInformationState[i]) + } + return informationState +} + +// InformationStatePlayerAsString returns an observation as a list +func (state *State) InformationStatePlayerAsString(player int) string { + infostate := state.InformationStatePlayer(player) + s := "" + for _, v := range infostate { + s = s + fmt.Sprintf("%f", v) + } + return s +} + +// ChanceOutcomes returns an action slice and a probability slice +func (state *State) ChanceOutcomes() ([]int, []float32) { + length := int(C.StateSizeChanceOutcomes(state.state)) + action := make([]int, length) + probability := make([]float32, length) + + cppAction := make([]C.int, length) + cppProbability := make([]C.double, length) + + C.StateFillChanceOutcomes(state.state, unsafe.Pointer(&cppAction[0]), unsafe.Pointer(&cppProbability[0])) + for i := 0; i < length; i++ { + action[i] = int(cppAction[i]) + probability[i] = float32(cppProbability[i]) + } + return action, probability +} + +// CurrentPlayer returns the current player to play at the state +func (state *State) CurrentPlayer() int { + return int(C.StateCurrentPlayer(state.state)) +} + +// ActionToString returns a string representation of the action +func (state *State) ActionToString(player int, action int) string { + cs := C.StateActionToString(state.state, C.int(player), C.int(action)) + str := C.GoString(cs) + C.free(unsafe.Pointer(cs)) + return str +} + +// PlayerReturn returns the return for the specified player +func (state *State) PlayerReturn(player int) float32 { + return float32(C.StatePlayerReturn(state.state, C.int(player))) +} + +// ApplyAction applies the specified action to the state +func (state *State) ApplyAction(action int) { + C.StateApplyAction(state.state, C.int(action)) +} + +func deleteGame(game *Game) { + C.DeleteGame(game.game) +} + +func deleteState(state *State) { + C.DeleteState(state.state) +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/README.md b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/README.md new file mode 100644 index 0000000..9ccdaf9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/README.md @@ -0,0 +1,5 @@ +# Integration tests + +This directory regroups high level tests, often testing both the C++ code and +its Python bindings. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/api_test.py b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/api_test.py new file mode 100644 index 0000000..fd4b4eb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/api_test.py @@ -0,0 +1,646 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.integration_tests.api.""" + +import enum +import re +import unittest + +from absl import app +from absl import flags +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np + +from open_spiel.python import games # pylint:disable=unused-import +from open_spiel.python.algorithms import get_all_states +from open_spiel.python.algorithms import sample_some_states +from open_spiel.python.mfg import games as mfg_games # pylint:disable=unused-import +from open_spiel.python.observation import make_observation +import pyspiel + +FLAGS = flags.FLAGS +flags.DEFINE_string("test_only_games", ".*", + "Test only selected games (regex). Defaults to all.") + +_ALL_GAMES = pyspiel.registered_games() + +_GAMES_TO_TEST = set([g.short_name for g in _ALL_GAMES if g.default_loadable]) + +_GAMES_NOT_UNDER_TEST = [ + g.short_name for g in _ALL_GAMES if not g.default_loadable +] + +_GAMES_TO_OMIT_LEGAL_ACTIONS_CHECK = set(["bridge_uncontested_bidding"]) + +# The list of game instances to test on the full tree as tuples +# (name to display, string to pass to load_game). +_GAMES_FULL_TREE_TRAVERSAL_TESTS = [ + ("cliff_walking", "cliff_walking(horizon=7)"), + ("kuhn_poker", "kuhn_poker"), + ("leduc_poker", "leduc_poker"), + ("iigoofspiel4", ("turn_based_simultaneous_game(game=goofspiel(" + "imp_info=True,num_cards=4,points_order=descending))")), + ("kuhn_poker3p", "kuhn_poker(players=3)"), + ("first_sealed_auction", "first_sealed_auction(max_value=2)"), + ("tiny_hanabi", "tiny_hanabi"), + ("nf_auction", ("turn_based_simultaneous_game(game=" + "normal_form_extensive_game(game=" + "first_sealed_auction(max_value=3)))")), + # Disabled by default - big games, slow tests. + # Uncomment to check the games if you modify them. + # ("liars_dice", "liars_dice"), + # ("tiny_bridge_2p", "tiny_bridge_2p"), +] + +_GAMES_FULL_TREE_TRAVERSAL_TESTS_NAMES = [ + g[1] for g in _GAMES_FULL_TREE_TRAVERSAL_TESTS +] + +TOTAL_NUM_STATES = { + # This maps the game name to (chance, playable, terminal) + "cliff_walking": (0, 2119, 6358), + "kuhn_poker": (4, 24, 30), + "leduc_poker": (157, 3780, 5520), + "liars_dice": (7, 147456, 147420), + "iigoofspiel4": (0, 501, 576), + "kuhn_poker3p": (17, 288, 312), + "first_sealed_auction": (12, 10, 14), + "tiny_bridge_2p": (29, 53760, 53340), + "tiny_hanabi": (3, 16, 36), + "nf_auction": (0, 7, 36), +} + +# This is kept to ensure non-regression, but we would like to remove that +# when we can interpret what are these numbers. +PERFECT_RECALL_NUM_STATES = { + "cliff_walking": 2119, + "kuhn_poker": 12, + "leduc_poker": 936, + "liars_dice": 24576, + "iigoofspiel4": 162, + "kuhn_poker3p": 48, + "first_sealed_auction": 4, + "tiny_bridge_2p": 3584, + "tiny_hanabi": 8, + "nf_auction": 2, +} + + +class EnforceAPIOnFullTreeBase(parameterized.TestCase): + """Test properties on the full game tree, instantiating the tree only once. + + A new class, extensing this class will be dynamically created and added as + a unittest class for the games we want to test exhaustively. + """ + + @classmethod + def setUpClass(cls): + super(EnforceAPIOnFullTreeBase, cls).setUpClass() + + cls.all_states = set( + get_all_states.get_all_states( + cls.game, + depth_limit=-1, + include_terminals=True, + include_chance_states=True).values()) + + def test_legal_actions_empty(self): + # We check we have some non-terminal non-random states + self.assertTrue( + any(not s.is_terminal() and not s.is_chance_node() + for s in self.all_states)) + + for state in self.all_states: + if state.is_terminal(): + # Empty on terminal + msg = ("The game %s does not return an empty list on " + "legal_actions() for state %s" % (self.game_name, str(state))) + self.assertEmpty(state.legal_actions(), msg=msg) + for player in range(self.game.num_players()): + msg = ("The game %s does not return an empty list on " + "legal_actions(%i) for state %s" % + (self.game_name, player, str(state))) + self.assertEmpty(state.legal_actions(player), msg=msg) + elif state.is_simultaneous_node(): + # No requirement for legal_actions to be empty, since all players act. + pass + elif state.is_chance_node(): + # Would be an error to request legal actions for a non-chance player. + pass + else: + # Empty for players other than the current player + current_player = state.current_player() + for player in range(self.game.num_players()): + if player != current_player: + msg = ("The game {!r} does not return an empty list on " + "legal_actions() in state {}".format( + self.game_name, state)) + self.assertEmpty(state.legal_actions(player), msg=msg) + + def test_number_of_nodes(self): + expected_numbers = TOTAL_NUM_STATES[self.game_name] + + num_chance_nodes = 0 + num_terminals = 0 + num_playable = 0 + for state in self.all_states: + if state.is_terminal(): + num_terminals += 1 + elif state.is_chance_node(): + num_chance_nodes += 1 + else: + num_playable += 1 + + self.assertEqual(expected_numbers, + (num_chance_nodes, num_playable, num_terminals)) + + def test_current_player_returns_terminal_player_on_terminal_nodes(self): + for state in self.all_states: + if state.is_terminal(): + self.assertEqual(pyspiel.PlayerId.TERMINAL, state.current_player()) + + def test_information_state_no_argument_raises_on_terminal_nodes(self): + for state in self.all_states: + if state.is_terminal(): + with self.assertRaises(RuntimeError): + state.information_state_string() + + def test_game_is_perfect_recall(self): + # We do not count the terminal nodes here. + expected_number_states = PERFECT_RECALL_NUM_STATES[self.game_name] + all_states = [] + for _ in range(3): + infostate_player_to_history = _assert_is_perfect_recall(self.game) + all_states.append(infostate_player_to_history) + # We compare the total number of (infostate, player) touched, to prevent + # any regression (we skip chance nodes). + # We use assertEqual and not assertLen to prevent the huge dict to be + # displayed + self.assertEqual(expected_number_states, len(infostate_player_to_history)) # pylint: disable=g-generic-assert + + def test_constant_sum(self): + game_type = self.game.get_type() + terminal_values = { + tuple(state.returns()) + for state in self.all_states + if state.is_terminal() + } + if game_type.utility in (pyspiel.GameType.Utility.ZERO_SUM, + pyspiel.GameType.Utility.CONSTANT_SUM): + expected_sum = self.game.utility_sum() + for returns in terminal_values: + self.assertEqual(sum(returns), expected_sum) + elif game_type.utility == pyspiel.GameType.Utility.GENERAL_SUM: + all_sums = {sum(returns) for returns in terminal_values} + self.assertNotEqual(len(all_sums), 1) + elif game_type.utility == pyspiel.GameType.Utility.IDENTICAL: + for returns in terminal_values: + self.assertLen(set(returns), 1) + else: + raise AssertionError("Invalid utility type {}".format(game_type.utility)) + + def test_information_state_functions_raises_on_chance_nodes(self): + + def _assert_information_state_on_chance_nodes_raises(state): + + if state.is_chance_node(): + with self.assertRaises(RuntimeError): + state.information_state_string() + with self.assertRaises(RuntimeError): + state.information_state_tensor() + + for state in self.all_states: + _assert_information_state_on_chance_nodes_raises(state) + + def test_current_player_infosets_no_overlap_between_players(self): + # This is the stronger property we can currently verify. In particular, + # we can find some state h0 where player 0 plays such that: + # h0.information_state_string(0) == h0.information_state_string(0). + + states_for_player = [set() for _ in range(self.game.num_players())] + for state in self.all_states: + if not state.is_chance_node() and not state.is_terminal(): + states_for_player[state.current_player()].add(state) + elif state.is_chance_node(): + self.assertEqual(state.get_type(), pyspiel.StateType.CHANCE) + else: + self.assertEqual(state.get_type(), pyspiel.StateType.TERMINAL) + + infoset_functions = [lambda s, player: s.information_state_string(player)] + + def _information_state_tensor(state, player): + return tuple(np.asarray(state.information_state_tensor(player)).flatten()) + + infoset_functions.append(_information_state_tensor) + + for infoset_function in infoset_functions: + + information_sets_per_player = [] + for player in range(self.game.num_players()): + set_l = set( + infoset_function(s, player) for s in states_for_player[player]) + information_sets_per_player.append(set_l) + + union = set() + for information_set in information_sets_per_player: + union = union.union(information_set) + self.assertLen(union, sum([len(x) for x in information_sets_per_player])) + + +class Relation(enum.Enum): + SUBSET_OR_EQUALS = 1 + EQUALS = 2 + + +class EnforceAPIOnPartialTreeBase(parameterized.TestCase): + """This only partially test some properties.""" + + @classmethod + def setUpClass(cls): + super(EnforceAPIOnPartialTreeBase, cls).setUpClass() + + cls.some_states = sample_some_states.sample_some_states( + cls.game, max_states=400) + cls.game_type = cls.game.get_type() + + def test_sequence_lengths(self): + try: + max_history_len = self.game.max_history_length() + max_move_number = self.game.max_move_number() + max_chance_nodes_in_history = self.game.max_chance_nodes_in_history() + except RuntimeError: + return # The function is likely not implemented, so skip the test. + + self.assertGreater(max_history_len, 0) + self.assertGreater(max_move_number, 0) + if self.game_type.chance_mode == pyspiel.GameType.ChanceMode.DETERMINISTIC: + self.assertEqual(max_chance_nodes_in_history, 0) + else: + self.assertGreater(max_chance_nodes_in_history, 0) + + for state in self.some_states: + self.assertLessEqual(len(state.full_history()), max_history_len) + self.assertLessEqual(state.move_number(), max_move_number) + + chance_nodes_in_history = 0 + for item in state.full_history(): + if item.player == pyspiel.PlayerId.CHANCE: + chance_nodes_in_history += 1 + self.assertLessEqual(chance_nodes_in_history, max_chance_nodes_in_history) + + def test_observations_raises_error_on_invalid_player(self): + game = self.game + game_type = self.game_type + game_name = self.game_name + num_players = game.num_players() + + for state in self.some_states: + if game_type.provides_information_state_string: + if not state.is_chance_node(): + for p in range(num_players): + state.information_state_string(p) + msg = f"information_state_string did not raise an error for {game_name}" + with self.assertRaisesRegex(RuntimeError, "player >= 0", msg=msg): + state.information_state_string(-1) + with self.assertRaisesRegex(RuntimeError, "player <", msg=msg): + state.information_state_string(num_players + 1) + + if game_type.provides_information_state_tensor: + if not state.is_chance_node(): + for p in range(num_players): + v = state.information_state_tensor(p) + self.assertLen(v, game.information_state_tensor_size()) + msg = f"information_state_tensor did not raise an error for {game_name}" + with self.assertRaisesRegex(RuntimeError, "player >= 0", msg=msg): + state.information_state_tensor(-1) + with self.assertRaisesRegex(RuntimeError, "player <", msg=msg): + state.information_state_tensor(num_players + 1) + + if game_type.provides_observation_tensor: + if not state.is_chance_node(): + for p in range(num_players): + v = state.observation_tensor(p) + self.assertLen(v, game.observation_tensor_size()) + msg = f"observation_tensor did not raise an error for {game_name}" + with self.assertRaisesRegex(RuntimeError, "player >= 0", msg=msg): + state.observation_tensor(-1) + with self.assertRaisesRegex(RuntimeError, "player <", msg=msg): + state.observation_tensor(num_players + 1) + + if game_type.provides_observation_string: + if not state.is_chance_node(): + for p in range(num_players): + state.observation_string(p) + msg = f"observation_string did not raise an error for {game_name}" + with self.assertRaisesRegex(RuntimeError, "player >= 0", msg=msg): + state.observation_string(-1) + with self.assertRaisesRegex(RuntimeError, "player <", msg=msg): + state.observation_string(num_players + 1) + + def test_legal_actions_returns_empty_list_on_opponent(self): + if self.game_name in _GAMES_TO_OMIT_LEGAL_ACTIONS_CHECK: + return + + for state in self.some_states: + if state.is_terminal(): + # Empty on terminal + msg = ("The game %s does not return an empty list on " + "legal_actions() for state %s" % (self.game_name, state)) + self.assertEmpty(state.legal_actions(), msg=msg) + for player in range(self.game.num_players()): + msg = ("The game %s does not return an empty list on " + "legal_actions(%i) for state %s" % + (self.game_name, player, state)) + self.assertEmpty(state.legal_actions(player), msg=msg) + elif state.is_simultaneous_node(): + # No requirement for legal_actions to be empty, since all players act. + pass + elif state.is_chance_node(): + # Would be an error to request legal actions for a non-chance player. + pass + else: + # Empty for players other than the current player + current_player = state.current_player() + for player in range(self.game.num_players()): + if player != current_player: + msg = ("The game {!r} does not return an empty list on " + "legal_actions() in state {}".format( + self.game_name, state)) + self.assertEmpty(state.legal_actions(player), msg=msg) + + def test_private_information_contents(self): + private_observation = make_observation( + self.game, + pyspiel.IIGObservationType( + public_info=False, + perfect_recall=False, + private_info=pyspiel.PrivateInfoType.SINGLE_PLAYER, + ), + ) + + if (not private_observation + or private_observation.string_from(self.some_states[0], 0) is None): + return + + player_has_private_info = [False] * self.game.num_players() + + for state in self.some_states: + for i in range(self.game.num_players()): + if private_observation.string_from(state, i): + player_has_private_info[i] = True + + if (self.game_type.information == + pyspiel.GameType.Information.IMPERFECT_INFORMATION): + self.assertTrue(any(player_has_private_info)) + if (self.game_type.information == + pyspiel.GameType.Information.PERFECT_INFORMATION): + self.assertFalse(any(player_has_private_info)) + + def test_no_invalid_public_observations(self): + public_observation = make_observation( + self.game, + pyspiel.IIGObservationType( + public_info=True, + perfect_recall=False, + private_info=pyspiel.PrivateInfoType.NONE, + ), + ) + if not public_observation: + return + + if public_observation.string_from(self.some_states[0], 0) is None: + return + + for state in self.some_states: + self.assertIsNotNone(public_observation.string_from(state, 0)) + + +def _assert_properties_recursive(state, assert_functions): + + for assert_function in assert_functions: + assert_function(state) + + # Recursion + # TODO(author2): We often use a `give me the next node` function and we + # probably want a utility method for that, which works for all games. + if state.is_terminal(): + return + elif state.is_chance_node(): + for action, unused_prob in state.chance_outcomes(): + state_for_search = state.child(action) + _assert_properties_recursive(state_for_search, assert_functions) + else: + for action in state.legal_actions(): + state_for_search = state.child(action) + _assert_properties_recursive(state_for_search, assert_functions) + + +def _assert_is_perfect_recall(game): + """Raises an AssertionError if the game is not perfect recall. + + We are willing to ensure the following property (perfect recall): + - define X_i(h) be the sequence of information states and actions from the + start of the game observed by player i (i.e. excluding the states and + actions taken by the opponents unless those actions are included in player + i's information state), along h but not including the state at h: + X_i(h) = (s_1, a_1), (s_2, a_2), ... , (s_{t-1}, a_{t-1}) + then player i has perfect recall in this game iff: forall s in S_i, + forall h1, h2 in s X_i(h1) == X_i(h2). Here, we check that the game has + perfect recall if this is true for all players i (excluding chance). + + For more detail and context, see page 11 of + http://mlanctot.info/files/papers/PhD_Thesis_MarcLanctot.pdf + + In particular, note that: + - we want to check that holds both for + + `std::string information_state_string(current_player)` + + `information_state_tensor`. + - we check that currently only from the point of view of the current + player at the information state (i.e. we compare + `prev_state.information_state_string(current_player)` but not + `prev_state.information_state_string(opponent_player)` + + The strategy is the following: we traverse the full tree (of states, not + infostates), and make sure for each node that the history we get for + the infostate associated to that node, that is is unique with respect to + the infostate. + + Args: + game: A Spiel game to check. + + Returns: + The internal cache mapping (infostate_str, player_id) to a list of one + history leading to this infostate. + """ + game_info = game.get_type() + if game_info.dynamics != pyspiel.GameType.Dynamics.SEQUENTIAL: + raise ValueError("The game is expected to be sequential") + + infostate_player_to_history = {} + _assert_is_perfect_recall_recursive( + game.new_initial_state(), + current_history=[], + infostate_player_to_history=infostate_player_to_history) + + return infostate_player_to_history + + +def _assert_is_perfect_recall_recursive(state, current_history, + infostate_player_to_history): + """Raises an AssertionError if the game is not perfect recall. + + The strategy is the following: we traverse the full tree (of states, not + infostates), and make sure for each node that the history we get for + the infostate associated to that node, that is is unique with respect to + the infostate. + + Args: + state: The current state during the recursive tree traversal. + current_history: The current list of strictly preceding `SpielState` objects + that lead to the current `state` (excluded). + infostate_player_to_history: A dictionnary mapping (infostate string + representation, current_player) to the list of one instance of actual + predecessor nodes. + """ + + if not state.is_chance_node() and not state.is_terminal(): + current_player = state.current_player() + infostate_str = state.information_state_string(current_player) + key = (infostate_str, current_player) + + if key not in infostate_player_to_history: + # First time we see the node. + infostate_player_to_history[key] = list(current_history) + else: + previous_history = infostate_player_to_history[key] + + if len(previous_history) != len(current_history): + raise AssertionError("We found 2 history leading to the same state:\n" + "State: {!r}\n" + "InfoState str: {}\n" + "First history ({} states): {!r}\n" + "Second history ({} states): {!r}\n".format( + state.history(), infostate_str, + len(previous_history), + "|".join([str(sa) for sa in previous_history]), + len(current_history), + "|".join([str(sa) for sa in current_history]))) + + # Check for `information_state` + # pylint: disable=g-complex-comprehension + expected_infosets_history = [(s.information_state_string(current_player), + a) + for s, a in previous_history + if s.current_player() == current_player] + # pylint: disable=g-complex-comprehension + infosets_history = [(s.information_state_string(current_player), a) + for s, a in current_history + if s.current_player() == current_player] + + if expected_infosets_history != infosets_history: + # pyformat: disable + raise AssertionError("We found 2 history leading to the same state:\n" + "history: {!r}\n" + "info_state str: {}\n" + "**First history ({} states)**\n" + "states: {!r}\n" + "info_sets: {!r}\n" + "**Second history ({} states)**\n" + "Second info_state history: {!r}\n" + "Second history: {!r}\n".format( + state.history(), + infostate_str, + len(previous_history), + "|".join([str(sa) for sa in previous_history]), + expected_infosets_history, + len(current_history), infosets_history, + "|".join([str(sa) for sa in current_history]))) + # pyformat: enable + + # Check for `information_state_tensor` + expected_infosets_history = [ + (s.information_state_tensor(s.current_player()), a) + for s, a in previous_history + if s.current_player() == current_player + ] + infosets_history = [(s.information_state_tensor(s.current_player()), a) + for s, a in current_history + if s.current_player() == current_player] + + if infosets_history != expected_infosets_history: + raise ValueError("The history as tensor in the same infoset " + "are different:\n" + "History: {!r}\n".format(state.history())) + + # Recursion + + # TODO(author2): We often use a `give me the next node` function and we + # probably want a utility method for that, which works for all games. + if state.is_terminal(): + return + else: + for action in state.legal_actions(): + state_for_search = state.child(action) + _assert_is_perfect_recall_recursive( + state_for_search, + current_history=current_history + [(state, action)], + infostate_player_to_history=infostate_player_to_history) + + +def _create_test_case_classes(): + """Yields one Testing class per game to test.""" + for game_name, game_string in _GAMES_FULL_TREE_TRAVERSAL_TESTS: + if not re.match(FLAGS.test_only_games, game_string): + continue + game = pyspiel.load_game(game_string) + new_class = type("EnforceAPIFullTree_{}_Test".format(game_name), + (EnforceAPIOnFullTreeBase,), {}) + new_class.game_name = game_name + new_class.game = game + yield new_class + + for game_name in _GAMES_TO_TEST: + if not re.match(FLAGS.test_only_games, game_name): + continue + game = pyspiel.load_game(game_name) + new_class = type("EnforceAPIPartialTree_{}_Test".format(game_name), + (EnforceAPIOnPartialTreeBase,), {}) + new_class.game_name = game_name + new_class.game = game + yield new_class + + +def load_tests(loader, tests, pattern): # pylint: disable=invalid-name,g-doc-args + """Returns Dynamically created TestSuite. + + This creates one TestCase per game to test. + + See https://docs.python.org/2/library/unittest.html#load-tests-protocol. + """ + del pattern + tests = tuple( + loader.loadTestsFromTestCase(test_case_class) + for test_case_class in _create_test_case_classes()) + return unittest.TestSuite(tests=tests) + + +def main(_): + absltest.main() + + +if __name__ == "__main__": + # Necessary to run main via app.run for internal tests. + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/example_connect_four_state.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/example_connect_four_state.txt new file mode 100644 index 0000000..e57e070 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/example_connect_four_state.txt @@ -0,0 +1,6 @@ +....... +....... +....... +...o... +..oxx.. +.oxxo.. diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthrough_test.py b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthrough_test.py new file mode 100644 index 0000000..4ee4ac0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthrough_test.py @@ -0,0 +1,115 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Re-run playthroughs and check for differences.""" + +import os +import re + +from absl import logging +from absl.testing import absltest + +from open_spiel.python.algorithms import generate_playthrough +import pyspiel + +_DATA_DIR = "open_spiel/integration_tests/playthroughs/" + +_OPTIONAL_GAMES = frozenset(["hanabi", "universal_poker"]) +_AVAILABLE_GAMES = set(pyspiel.registered_names()) + +# Games for which we do not have playthroughs. Please don't add new games +# here if you can avoid it. Adding a playthrough is easy and very useful! +# Run `generate_new_playthrough.sh $GAME` to add a playthrough. +_MISSING_GAMES = set(["nfg_game", "efg_game", "restricted_nash_response"]) + +# Regex to find the game name in a playthrough. This will return the name of the +# transform for wrapped games, e.g. goofspiel --> turn_based_simultaneous_game +_SHORTNAME = r'^GameType\.short_name = "(.*)"$' + + +def _is_optional_game(basename): + """Returns (bool, game_name or None). + + Args: + basename: The basename of the file. It is assumed it starts with the game + name. + """ + for game_name in _OPTIONAL_GAMES: + if basename.startswith(game_name): + return True, game_name + return False, None + + +def _playthrough_match(filename, regex): + """Returns the specified value fromm the playthrough.""" + with open(filename, "r", encoding="utf-8") as f: + data = f.read() + return re.search(regex, data, re.MULTILINE) + + +class PlaythroughTest(absltest.TestCase): + + def run_test(self, path, basename): + """Instantiated for each test case in main, below.""" + + # We check whether the game is optional, and if it is, whether we do + # have the game. + is_optional, game_name = _is_optional_game(basename) + if is_optional: + if game_name not in _AVAILABLE_GAMES: + logging.info("Skipping %s because %s is not built in.", basename, + game_name) + return + + file_path = os.path.join(path, basename) + expected, actual = generate_playthrough.replay(file_path) + for line_num, (expected_line, actual_line) in enumerate( + zip(expected.split("\n"), actual.split("\n"))): + self.assertEqual( + expected_line, + actual_line, + msg="Wrong line {} in {}".format(line_num, basename)) + self.assertMultiLineEqual(expected, actual) + + def test_all_games_tested(self): + """Verify that every game is present in the playthroughs.""" + test_srcdir = os.environ.get("TEST_SRCDIR", "") + path = os.path.join(test_srcdir, _DATA_DIR) + basenames = set(os.listdir(path)) + missing_games = set(_AVAILABLE_GAMES) - set(_MISSING_GAMES) - set( + _playthrough_match(os.path.join(path, basename), _SHORTNAME)[1] + for basename in basenames) + self.assertEmpty( + missing_games, + msg="These games do not have playthroughs." + "Create playthroughs using generate_new_playthrough.sh") + + +def _add_tests(): + """Adds a test for each playthrough to the test class (above).""" + test_srcdir = os.environ.get("TEST_SRCDIR", "") + path = os.path.join(test_srcdir, _DATA_DIR) + basenames = sorted(os.listdir(path)) + if len(basenames) < 40: + raise ValueError(f"Playthroughs are missing from {path}") + for basename in basenames: + test_name = f"test_playthrough_{basename}" + test_func = lambda self, basename=basename: self.run_test(path, basename) + test_func.__name__ = test_name + setattr(PlaythroughTest, test_name, test_func) + + +if __name__ == "__main__": + _add_tests() + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/2048.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/2048.txt new file mode 100644 index 0000000..0708108 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/2048.txt @@ -0,0 +1,690 @@ +game: 2048 + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "2048" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = ["max_tile"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "2048" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 4 +PolicyTensorShape() = [4] +MaxChanceOutcomes() = 33 +GetParameters() = {max_tile=2048} +NumPlayers() = 1 +MinUtility() = 0.0 +MaxUtility() = 2.048e+04 +UtilitySum() = None +ObservationTensorShape() = [4, 4] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 16 +MaxGameLength() = 8192 +ToString() = "2048()" + +# State 0 +# 0 0 0 0 +# 0 0 0 0 +# 0 0 0 0 +# 0 0 0 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = " 0 0 0 0\n 0 0 0 0\n 0 0 0 0\n 0 0 0 0\n" +ObservationTensor(0): ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ChanceOutcomes() = [(0,0.05625), (1,0.00625), (2,0.05625), (3,0.00625), (4,0.05625), (5,0.00625), (6,0.05625), (7,0.00625), (8,0.05625), (9,0.00625), (10,0.05625), (11,0.00625), (12,0.05625), (13,0.00625), (14,0.05625), (15,0.00625), (16,0.05625), (17,0.00625), (18,0.05625), (19,0.00625), (20,0.05625), (21,0.00625), (22,0.05625), (23,0.00625), (24,0.05625), (25,0.00625), (26,0.05625), (27,0.00625), (28,0.05625), (29,0.00625), (30,0.05625), (31,0.00625)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] +StringLegalActions() = ["2 added to row 1, column 1", "4 added to row 1, column 1", "2 added to row 1, column 2", "4 added to row 1, column 2", "2 added to row 1, column 3", "4 added to row 1, column 3", "2 added to row 1, column 4", "4 added to row 1, column 4", "2 added to row 2, column 1", "4 added to row 2, column 1", "2 added to row 2, column 2", "4 added to row 2, column 2", "2 added to row 2, column 3", "4 added to row 2, column 3", "2 added to row 2, column 4", "4 added to row 2, column 4", "2 added to row 3, column 1", "4 added to row 3, column 1", "2 added to row 3, column 2", "4 added to row 3, column 2", "2 added to row 3, column 3", "4 added to row 3, column 3", "2 added to row 3, column 4", "4 added to row 3, column 4", "2 added to row 4, column 1", "4 added to row 4, column 1", "2 added to row 4, column 2", "4 added to row 4, column 2", "2 added to row 4, column 3", "4 added to row 4, column 3", "2 added to row 4, column 4", "4 added to row 4, column 4"] + +# Apply action "2 added to row 3, column 3" +action: 20 + +# State 1 +# 0 0 0 0 +# 0 0 0 0 +# 0 0 2 0 +# 0 0 0 0 +IsTerminal() = False +History() = [20] +HistoryString() = "20" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = " 0 0 0 0\n 0 0 0 0\n 0 0 2 0\n 0 0 0 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ChanceOutcomes() = [(0,0.06), (1,0.00666667), (2,0.06), (3,0.00666667), (4,0.06), (5,0.00666667), (6,0.06), (7,0.00666667), (8,0.06), (9,0.00666667), (10,0.06), (11,0.00666667), (12,0.06), (13,0.00666667), (14,0.06), (15,0.00666667), (16,0.06), (17,0.00666667), (18,0.06), (19,0.00666667), (22,0.06), (23,0.00666667), (24,0.06), (25,0.00666667), (26,0.06), (27,0.00666667), (28,0.06), (29,0.00666667), (30,0.06), (31,0.00666667)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] +StringLegalActions() = ["2 added to row 1, column 1", "4 added to row 1, column 1", "2 added to row 1, column 2", "4 added to row 1, column 2", "2 added to row 1, column 3", "4 added to row 1, column 3", "2 added to row 1, column 4", "4 added to row 1, column 4", "2 added to row 2, column 1", "4 added to row 2, column 1", "2 added to row 2, column 2", "4 added to row 2, column 2", "2 added to row 2, column 3", "4 added to row 2, column 3", "2 added to row 2, column 4", "4 added to row 2, column 4", "2 added to row 3, column 1", "4 added to row 3, column 1", "2 added to row 3, column 2", "4 added to row 3, column 2", "2 added to row 3, column 4", "4 added to row 3, column 4", "2 added to row 4, column 1", "4 added to row 4, column 1", "2 added to row 4, column 2", "4 added to row 4, column 2", "2 added to row 4, column 3", "4 added to row 4, column 3", "2 added to row 4, column 4", "4 added to row 4, column 4"] + +# Apply action "2 added to row 2, column 2" +action: 10 + +# State 2 +# 0 0 0 0 +# 0 2 0 0 +# 0 0 2 0 +# 0 0 0 0 +IsTerminal() = False +History() = [20, 10] +HistoryString() = "20, 10" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 0 0 0 0\n 0 2 0 0\n 0 0 2 0\n 0 0 0 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Up", "Right", "Down", "Left"] + +# Apply action "Left" +action: 3 + +# State 3 +# Apply action "2 added to row 3, column 3" +action: 20 + +# State 4 +# 0 0 0 0 +# 2 0 0 0 +# 2 0 2 0 +# 0 0 0 0 +IsTerminal() = False +History() = [20, 10, 3, 20] +HistoryString() = "20, 10, 3, 20" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 0 0 0 0\n 2 0 0 0\n 2 0 2 0\n 0 0 0 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Up", "Right", "Down", "Left"] + +# Apply action "Up" +action: 0 + +# State 5 +# Apply action "4 added to row 2, column 3" +action: 13 + +# State 6 +# 4 0 2 0 +# 0 0 4 0 +# 0 0 0 0 +# 0 0 0 0 +IsTerminal() = False +History() = [20, 10, 3, 20, 0, 13] +HistoryString() = "20, 10, 3, 20, 0, 13" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 4 0 2 0\n 0 0 4 0\n 0 0 0 0\n 0 0 0 0\n" +ObservationTensor(0) = [4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [4] +Returns() = [4] +LegalActions() = [1, 2, 3] +StringLegalActions() = ["Right", "Down", "Left"] + +# Apply action "Left" +action: 3 + +# State 7 +# Apply action "2 added to row 3, column 3" +action: 20 + +# State 8 +# Apply action "Left" +action: 3 + +# State 9 +# Apply action "4 added to row 3, column 2" +action: 19 + +# State 10 +# Apply action "Down" +action: 2 + +# State 11 +# Apply action "2 added to row 2, column 1" +action: 8 + +# State 12 +# Apply action "Up" +action: 0 + +# State 13 +# Apply action "4 added to row 4, column 2" +action: 27 + +# State 14 +# Apply action "Right" +action: 1 + +# State 15 +# Apply action "4 added to row 3, column 2" +action: 19 + +# State 16 +# Apply action "Down" +action: 2 + +# State 17 +# Apply action "4 added to row 1, column 2" +action: 3 + +# State 18 +# Apply action "Right" +action: 1 + +# State 19 +# Apply action "4 added to row 2, column 3" +action: 13 + +# State 20 +# Apply action "Left" +action: 3 + +# State 21 +# Apply action "4 added to row 1, column 4" +action: 7 + +# State 22 +# 4 0 0 4 +# 4 8 0 0 +# 2 0 0 0 +# 4 8 4 0 +IsTerminal() = False +History() = [20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7] +HistoryString() = "20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 4 0 0 4\n 4 8 0 0\n 2 0 0 0\n 4 8 4 0\n" +ObservationTensor(0) = [4.0, 0.0, 0.0, 4.0, 4.0, 8.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 4.0, 8.0, 4.0, 0.0] +Rewards() = [0] +Returns() = [24] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Up", "Right", "Down", "Left"] + +# Apply action "Up" +action: 0 + +# State 23 +# Apply action "4 added to row 4, column 2" +action: 27 + +# State 24 +# Apply action "Up" +action: 0 + +# State 25 +# Apply action "2 added to row 3, column 4" +action: 22 + +# State 26 +# Apply action "Left" +action: 3 + +# State 27 +# Apply action "4 added to row 4, column 1" +action: 25 + +# State 28 +# Apply action "Right" +action: 1 + +# State 29 +# Apply action "4 added to row 3, column 2" +action: 19 + +# State 30 +# Apply action "Down" +action: 2 + +# State 31 +# Apply action "4 added to row 1, column 2" +action: 3 + +# State 32 +# Apply action "Down" +action: 2 + +# State 33 +# Apply action "2 added to row 1, column 2" +action: 2 + +# State 34 +# Apply action "Left" +action: 3 + +# State 35 +# Apply action "2 added to row 2, column 4" +action: 14 + +# State 36 +# Apply action "Up" +action: 0 + +# State 37 +# Apply action "4 added to row 4, column 3" +action: 29 + +# State 38 +# Apply action "Up" +action: 0 + +# State 39 +# Apply action "4 added to row 2, column 4" +action: 15 + +# State 40 +# Apply action "Right" +action: 1 + +# State 41 +# Apply action "4 added to row 2, column 1" +action: 9 + +# State 42 +# 0 2 16 2 +# 4 4 16 4 +# 0 0 16 8 +# 0 0 0 0 +IsTerminal() = False +History() = [20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9] +HistoryString() = "20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 0 2 16 2\n 4 4 16 4\n 0 0 16 8\n 0 0 0 0\n" +ObservationTensor(0) = [0.0, 2.0, 16.0, 2.0, 4.0, 4.0, 16.0, 4.0, 0.0, 0.0, 16.0, 8.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [16] +Returns() = [116] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Up", "Right", "Down", "Left"] + +# Apply action "Right" +action: 1 + +# State 43 +# Apply action "2 added to row 4, column 3" +action: 28 + +# State 44 +# Apply action "Right" +action: 1 + +# State 45 +# Apply action "2 added to row 4, column 3" +action: 28 + +# State 46 +# Apply action "Down" +action: 2 + +# State 47 +# Apply action "2 added to row 4, column 1" +action: 24 + +# State 48 +# Apply action "Up" +action: 0 + +# State 49 +# Apply action "2 added to row 4, column 1" +action: 24 + +# State 50 +# Apply action "Up" +action: 0 + +# State 51 +# Apply action "4 added to row 3, column 1" +action: 17 + +# State 52 +# Apply action "Right" +action: 1 + +# State 53 +# Apply action "2 added to row 4, column 3" +action: 28 + +# State 54 +# Apply action "Left" +action: 3 + +# State 55 +# Apply action "4 added to row 4, column 2" +action: 27 + +# State 56 +# Apply action "Right" +action: 1 + +# State 57 +# Apply action "4 added to row 4, column 3" +action: 29 + +# State 58 +# Apply action "Up" +action: 0 + +# State 59 +# Apply action "2 added to row 2, column 1" +action: 8 + +# State 60 +# Apply action "Right" +action: 1 + +# State 61 +# Apply action "2 added to row 4, column 3" +action: 28 + +# State 62 +# 4 2 16 2 +# 2 8 32 4 +# 0 4 2 16 +# 0 0 2 4 +IsTerminal() = False +History() = [20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9, 1, 28, 1, 28, 2, 24, 0, 24, 0, 17, 1, 28, 3, 27, 1, 29, 0, 8, 1, 28] +HistoryString() = "20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9, 1, 28, 1, 28, 2, 24, 0, 24, 0, 17, 1, 28, 3, 27, 1, 29, 0, 8, 1, 28" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 4 2 16 2\n 2 8 32 4\n 0 4 2 16\n 0 0 2 4\n" +ObservationTensor(0) = [4.0, 2.0, 16.0, 2.0, 2.0, 8.0, 32.0, 4.0, 0.0, 4.0, 2.0, 16.0, 0.0, 0.0, 2.0, 4.0] +Rewards() = [0] +Returns() = [188] +LegalActions() = [0, 2, 3] +StringLegalActions() = ["Up", "Down", "Left"] + +# Apply action "Down" +action: 2 + +# State 63 +# Apply action "2 added to row 1, column 2" +action: 2 + +# State 64 +# Apply action "Left" +action: 3 + +# State 65 +# Apply action "4 added to row 1, column 3" +action: 5 + +# State 66 +# Apply action "Up" +action: 0 + +# State 67 +# Apply action "2 added to row 4, column 2" +action: 26 + +# State 68 +# Apply action "Down" +action: 2 + +# State 69 +# Apply action "2 added to row 1, column 4" +action: 6 + +# State 70 +# Apply action "Right" +action: 1 + +# State 71 +# Apply action "2 added to row 1, column 2" +action: 2 + +# State 72 +# Apply action "Up" +action: 0 + +# State 73 +# Apply action "2 added to row 3, column 1" +action: 16 + +# State 74 +# Apply action "Right" +action: 1 + +# State 75 +# Apply action "4 added to row 3, column 1" +action: 17 + +# State 76 +# Apply action "Left" +action: 3 + +# State 77 +# Apply action "4 added to row 4, column 2" +action: 27 + +# State 78 +# Apply action "Down" +action: 2 + +# State 79 +# Apply action "2 added to row 1, column 2" +action: 2 + +# State 80 +# Apply action "Right" +action: 1 + +# State 81 +# Apply action "2 added to row 1, column 3" +action: 4 + +# State 82 +# 0 0 2 2 +# 0 0 2 4 +# 0 8 4 2 +# 16 4 16 64 +IsTerminal() = False +History() = [20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9, 1, 28, 1, 28, 2, 24, 0, 24, 0, 17, 1, 28, 3, 27, 1, 29, 0, 8, 1, 28, 2, 2, 3, 5, 0, 26, 2, 6, 1, 2, 0, 16, 1, 17, 3, 27, 2, 2, 1, 4] +HistoryString() = "20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9, 1, 28, 1, 28, 2, 24, 0, 24, 0, 17, 1, 28, 3, 27, 1, 29, 0, 8, 1, 28, 2, 2, 3, 5, 0, 26, 2, 6, 1, 2, 0, 16, 1, 17, 3, 27, 2, 2, 1, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 0 0 2 2\n 0 0 2 4\n 0 8 4 2\n 16 4 16 64\n" +ObservationTensor(0) = [0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 2.0, 4.0, 0.0, 8.0, 4.0, 2.0, 16.0, 4.0, 16.0, 64.0] +Rewards() = [0] +Returns() = [364] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Up", "Right", "Down", "Left"] + +# Apply action "Down" +action: 2 + +# State 83 +# Apply action "4 added to row 2, column 1" +action: 9 + +# State 84 +# Apply action "Up" +action: 0 + +# State 85 +# Apply action "2 added to row 4, column 3" +action: 28 + +# State 86 +# Apply action "Left" +action: 3 + +# State 87 +# Apply action "2 added to row 4, column 3" +action: 28 + +# State 88 +# Apply action "Right" +action: 1 + +# State 89 +# Apply action "4 added to row 3, column 1" +action: 17 + +# State 90 +# Apply action "Down" +action: 2 + +# State 91 +# Apply action "4 added to row 2, column 2" +action: 11 + +# State 92 +# Apply action "Up" +action: 0 + +# State 93 +# Apply action "2 added to row 3, column 4" +action: 22 + +# State 94 +# Apply action "Right" +action: 1 + +# State 95 +# Apply action "4 added to row 4, column 3" +action: 29 + +# State 96 +# Apply action "Down" +action: 2 + +# State 97 +# Apply action "2 added to row 2, column 1" +action: 8 + +# State 98 +# Apply action "Up" +action: 0 + +# State 99 +# Apply action "2 added to row 4, column 3" +action: 28 + +# State 100 +# Apply action "Left" +action: 3 + +# State 101 +# Apply action "4 added to row 3, column 4" +action: 23 + +# State 102 +# 2 4 32 2 +# 16 8 64 8 +# 8 4 0 4 +# 2 0 0 0 +IsTerminal() = False +History() = [20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9, 1, 28, 1, 28, 2, 24, 0, 24, 0, 17, 1, 28, 3, 27, 1, 29, 0, 8, 1, 28, 2, 2, 3, 5, 0, 26, 2, 6, 1, 2, 0, 16, 1, 17, 3, 27, 2, 2, 1, 4, 2, 9, 0, 28, 3, 28, 1, 17, 2, 11, 0, 22, 1, 29, 2, 8, 0, 28, 3, 23] +HistoryString() = "20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9, 1, 28, 1, 28, 2, 24, 0, 24, 0, 17, 1, 28, 3, 27, 1, 29, 0, 8, 1, 28, 2, 2, 3, 5, 0, 26, 2, 6, 1, 2, 0, 16, 1, 17, 3, 27, 2, 2, 1, 4, 2, 9, 0, 28, 3, 28, 1, 17, 2, 11, 0, 22, 1, 29, 2, 8, 0, 28, 3, 23" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 2 4 32 2\n 16 8 64 8\n 8 4 0 4\n 2 0 0 0\n" +ObservationTensor(0) = [2.0, 4.0, 32.0, 2.0, 16.0, 8.0, 64.0, 8.0, 8.0, 4.0, 0.0, 4.0, 2.0, 0.0, 0.0, 0.0] +Rewards() = [8] +Returns() = [456] +LegalActions() = [1, 2, 3] +StringLegalActions() = ["Right", "Down", "Left"] + +# Apply action "Right" +action: 1 + +# State 103 +# Apply action "2 added to row 4, column 2" +action: 26 + +# State 104 +# Apply action "Down" +action: 2 + +# State 105 +# Apply action "4 added to row 1, column 2" +action: 3 + +# State 106 +# Apply action "Down" +action: 2 + +# State 107 +# Apply action "4 added to row 1, column 1" +action: 1 + +# State 108 +# Apply action "Left" +action: 3 + +# State 109 +# Apply action "4 added to row 2, column 4" +action: 15 + +# State 110 +# Apply action "Right" +action: 1 + +# State 111 +# Apply action "4 added to row 1, column 2" +action: 3 + +# State 112 +# Apply action "Right" +action: 1 + +# State 113 +# Apply action "2 added to row 1, column 2" +action: 2 + +# State 114 +# Apply action "Left" +action: 3 + +# State 115 +# Apply action "2 added to row 1, column 3" +action: 4 + +# State 116 +# Apply action "Right" +action: 1 + +# State 117 +# Apply action "4 added to row 1, column 1" +action: 1 + +# State 118 +# 4 2 8 2 +# 8 32 2 4 +# 2 8 64 16 +# 16 2 8 2 +IsTerminal() = True +History() = [20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9, 1, 28, 1, 28, 2, 24, 0, 24, 0, 17, 1, 28, 3, 27, 1, 29, 0, 8, 1, 28, 2, 2, 3, 5, 0, 26, 2, 6, 1, 2, 0, 16, 1, 17, 3, 27, 2, 2, 1, 4, 2, 9, 0, 28, 3, 28, 1, 17, 2, 11, 0, 22, 1, 29, 2, 8, 0, 28, 3, 23, 1, 26, 2, 3, 2, 1, 3, 15, 1, 3, 1, 2, 3, 4, 1, 1] +HistoryString() = "20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9, 1, 28, 1, 28, 2, 24, 0, 24, 0, 17, 1, 28, 3, 27, 1, 29, 0, 8, 1, 28, 2, 2, 3, 5, 0, 26, 2, 6, 1, 2, 0, 16, 1, 17, 3, 27, 2, 2, 1, 4, 2, 9, 0, 28, 3, 28, 1, 17, 2, 11, 0, 22, 1, 29, 2, 8, 0, 28, 3, 23, 1, 26, 2, 3, 2, 1, 3, 15, 1, 3, 1, 2, 3, 4, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = " 4 2 8 2\n 8 32 2 4\n 2 8 64 16\n 16 2 8 2\n" +ObservationTensor(0) = [4.0, 2.0, 8.0, 2.0, 8.0, 32.0, 2.0, 4.0, 2.0, 8.0, 64.0, 16.0, 16.0, 2.0, 8.0, 2.0] +Rewards() = [0] +Returns() = [496] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/add_noise(epsilon=1.,seed=1,game=kuhn_poker()).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/add_noise(epsilon=1.,seed=1,game=kuhn_poker()).txt new file mode 100644 index 0000000..9a72763 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/add_noise(epsilon=1.,seed=1,game=kuhn_poker()).txt @@ -0,0 +1,146 @@ +game: add_noise(epsilon=1.,seed=1,game=kuhn_poker()) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Add noise to game=Kuhn Poker epsilon=1 seed=1" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["players"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = True +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "add_noise" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 3 +GetParameters() = {epsilon=1.0,game=kuhn_poker(),seed=1} +NumPlayers() = 2 +MinUtility() = -3.0 +MaxUtility() = 3.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [11] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 11 +ObservationTensorShape() = [7] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 7 +MaxGameLength() = 3 +ToString() = "add_noise(epsilon=1.0,game=kuhn_poker(),seed=1)" + +# State 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "" +InformationStateString(1) = "" +InformationStateTensor(0): ◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "" +ObservationString(1) = "" +ObservationTensor(0): ◉◯◯◯◯◉◉ +ObservationTensor(1): ◯◉◯◯◯◉◉ +ChanceOutcomes() = [(0,0.333333), (1,0.333333), (2,0.333333)] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["Deal:0", "Deal:1", "Deal:2"] + +# Apply action "Deal:2" +action: 2 + +# State 1 +# 2 +IsTerminal() = False +History() = [2] +HistoryString() = "2" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "2" +InformationStateString(1) = "" +InformationStateTensor(0): ◉◯◯◯◉◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "211" +ObservationString(1) = "" +ObservationTensor(0): ◉◯◯◯◉◉◉ +ObservationTensor(1): ◯◉◯◯◯◉◉ +ChanceOutcomes() = [(0,0.5), (1,0.5)] +LegalActions() = [0, 1] +StringLegalActions() = ["Deal:0", "Deal:1"] + +# Apply action "Deal:1" +action: 1 + +# State 2 +# 2 1 +IsTerminal() = False +History() = [2, 1] +HistoryString() = "2, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "2" +InformationStateString(1) = "1" +InformationStateTensor(0): ◉◯◯◯◉◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◉◯◯◯◯◯◯◯ +ObservationString(0) = "211" +ObservationString(1) = "111" +ObservationTensor(0): ◉◯◯◯◉◉◉ +ObservationTensor(1): ◯◉◯◉◯◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["Pass", "Bet"] + +# Apply action "Bet" +action: 1 + +# State 3 +# 2 1 b +IsTerminal() = False +History() = [2, 1, 1] +HistoryString() = "2, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "2b" +InformationStateString(1) = "1b" +InformationStateTensor(0): ◉◯◯◯◉◯◉◯◯◯◯ +InformationStateTensor(1): ◯◉◯◉◯◯◉◯◯◯◯ +ObservationString(0) = "221" +ObservationString(1) = "121" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["Pass", "Bet"] + +# Apply action "Pass" +action: 0 + +# State 4 +# 2 1 bp +IsTerminal() = True +History() = [2, 1, 1, 0] +HistoryString() = "2, 1, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "2bp" +InformationStateString(1) = "1bp" +InformationStateTensor(0): ◉◯◯◯◉◯◉◉◯◯◯ +InformationStateTensor(1): ◯◉◯◉◯◯◉◉◯◯◯ +ObservationString(0) = "221" +ObservationString(1) = "121" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0] +Rewards() = [1.99437, -1.99437] +Returns() = [1.99437, -1.99437] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/amazons.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/amazons.txt new file mode 100644 index 0000000..1c52ce3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/amazons.txt @@ -0,0 +1,797 @@ +game: amazons + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Amazons" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "amazons" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 36 +PolicyTensorShape() = [36] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [4, 6, 6] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 144 +MaxGameLength() = 108 +ToString() = "amazons()" + +# State 0 +# .X..X. +# X....X +# ...... +# ...... +# O....O +# .O..O. +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = ".X..X.\nX....X\n......\n......\nO....O\n.O..O." +ObservationString(1) = ".X..X.\nX....X\n......\n......\nO....O\n.O..O." +ObservationTensor(0): +◉◯◉◉◯◉ ◯◯◯◯◯◯ ◯◉◯◯◉◯ ◯◯◯◯◯◯ +◯◉◉◉◉◯ ◯◯◯◯◯◯ ◉◯◯◯◯◉ ◯◯◯◯◯◯ +◉◉◉◉◉◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◉◉◉◉◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◉◉◉◉◯ ◉◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◉◉◯◉ ◯◉◯◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +ObservationTensor(1): +◉◯◉◉◯◉ ◯◯◯◯◯◯ ◯◉◯◯◉◯ ◯◯◯◯◯◯ +◯◉◉◉◉◯ ◯◯◯◯◯◯ ◉◯◯◯◯◉ ◯◯◯◯◯◯ +◉◉◉◉◉◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◉◉◉◉◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◉◉◉◉◯ ◉◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◉◉◯◉ ◯◉◯◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 4, 6, 11] +StringLegalActions() = ["X From (1, 2)", "X From (1, 5)", "X From (2, 1)", "X From (2, 6)"] + +# Apply action "X From (2, 1)" +action: 6 + +# State 1 +# .X..X. +# .....X +# ...... +# ...... +# O....O +# .O..O. +IsTerminal() = False +History() = [6] +HistoryString() = "6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "6" +InformationStateString(1) = "6" +ObservationString(0) = ".X..X.\n.....X\n......\n......\nO....O\n.O..O." +ObservationString(1) = ".X..X.\n.....X\n......\n......\nO....O\n.O..O." +ObservationTensor(0): +◉◯◉◉◯◉ ◯◯◯◯◯◯ ◯◉◯◯◉◯ ◯◯◯◯◯◯ +◉◉◉◉◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◯◯◯◯◯◯ +◉◉◉◉◉◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◉◉◉◉◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◉◉◉◉◯ ◉◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◉◉◯◉ ◯◉◯◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +ObservationTensor(1): +◉◯◉◉◯◉ ◯◯◯◯◯◯ ◯◉◯◯◉◯ ◯◯◯◯◯◯ +◉◉◉◉◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◯◯◯◯◯◯ +◉◉◉◉◉◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◉◉◉◉◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◉◉◉◉◯ ◉◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◉◉◯◉ ◯◉◯◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 7, 8, 9, 10, 12, 13, 18, 20, 27] +StringLegalActions() = ["X To (1, 1)", "X To (2, 2)", "X To (2, 3)", "X To (2, 4)", "X To (2, 5)", "X To (3, 1)", "X To (3, 2)", "X To (4, 1)", "X To (4, 3)", "X To (5, 4)"] + +# Apply action "X To (3, 1)" +action: 12 + +# State 2 +# .X..X. +# .....X +# X..... +# ...... +# O....O +# .O..O. +IsTerminal() = False +History() = [6, 12] +HistoryString() = "6, 12" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "6, 12" +InformationStateString(1) = "6, 12" +ObservationString(0) = ".X..X.\n.....X\nX.....\n......\nO....O\n.O..O." +ObservationString(1) = ".X..X.\n.....X\nX.....\n......\nO....O\n.O..O." +ObservationTensor(0): +◉◯◉◉◯◉ ◯◯◯◯◯◯ ◯◉◯◯◉◯ ◯◯◯◯◯◯ +◉◉◉◉◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◯◯◯◯◯◯ +◯◉◉◉◉◉ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ +◉◉◉◉◉◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◉◉◉◉◯ ◉◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◉◉◯◉ ◯◉◯◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +ObservationTensor(1): +◉◯◉◉◯◉ ◯◯◯◯◯◯ ◯◉◯◯◉◯ ◯◯◯◯◯◯ +◉◉◉◉◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◯◯◯◯◯◯ +◯◉◉◉◉◉ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ +◉◉◉◉◉◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◉◉◉◉◯ ◉◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◉◉◯◉ ◯◉◯◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 6, 7, 13, 14, 15, 16, 17, 18, 19, 26, 33] +StringLegalActions() = ["X Shoot: (1, 1)", "X Shoot: (1, 3)", "X Shoot: (2, 1)", "X Shoot: (2, 2)", "X Shoot: (3, 2)", "X Shoot: (3, 3)", "X Shoot: (3, 4)", "X Shoot: (3, 5)", "X Shoot: (3, 6)", "X Shoot: (4, 1)", "X Shoot: (4, 2)", "X Shoot: (5, 3)", "X Shoot: (6, 4)"] + +# Apply action "X Shoot: (3, 3)" +action: 14 + +# State 3 +# .X..X. +# .....X +# X.#... +# ...... +# O....O +# .O..O. +IsTerminal() = False +History() = [6, 12, 14] +HistoryString() = "6, 12, 14" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "6, 12, 14" +InformationStateString(1) = "6, 12, 14" +ObservationString(0) = ".X..X.\n.....X\nX.#...\n......\nO....O\n.O..O." +ObservationString(1) = ".X..X.\n.....X\nX.#...\n......\nO....O\n.O..O." +ObservationTensor(0): +◉◯◉◉◯◉ ◯◯◯◯◯◯ ◯◉◯◯◉◯ ◯◯◯◯◯◯ +◉◉◉◉◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◯◯◯◯◯◯ +◯◉◯◉◉◉ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◉◯◯◯ +◉◉◉◉◉◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◉◉◉◉◯ ◉◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◉◉◯◉ ◯◉◯◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +ObservationTensor(1): +◉◯◉◉◯◉ ◯◯◯◯◯◯ ◯◉◯◯◉◯ ◯◯◯◯◯◯ +◉◉◉◉◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◯◯◯◯◯◯ +◯◉◯◉◉◉ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◉◯◯◯ +◉◉◉◉◉◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◉◉◉◉◯ ◉◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◉◉◯◉ ◯◉◯◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [24, 29, 31, 34] +StringLegalActions() = ["O From (5, 1)", "O From (5, 6)", "O From (6, 2)", "O From (6, 5)"] + +# Apply action "O From (6, 2)" +action: 31 + +# State 4 +# .X..X. +# .....X +# X.#... +# ...... +# O....O +# ....O. +IsTerminal() = False +History() = [6, 12, 14, 31] +HistoryString() = "6, 12, 14, 31" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "6, 12, 14, 31" +InformationStateString(1) = "6, 12, 14, 31" +ObservationString(0) = ".X..X.\n.....X\nX.#...\n......\nO....O\n....O." +ObservationString(1) = ".X..X.\n.....X\nX.#...\n......\nO....O\n....O." +ObservationTensor(0): +◉◯◉◉◯◉ ◯◯◯◯◯◯ ◯◉◯◯◉◯ ◯◯◯◯◯◯ +◉◉◉◉◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◯◯◯◯◯◯ +◯◉◯◉◉◉ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◉◯◯◯ +◉◉◉◉◉◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◉◉◉◉◯ ◉◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◉◉◉◯◉ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +ObservationTensor(1): +◉◯◉◉◯◉ ◯◯◯◯◯◯ ◯◉◯◯◉◯ ◯◯◯◯◯◯ +◉◉◉◉◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◯◯◯◯◯◯ +◯◉◯◉◉◉ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◉◯◯◯ +◉◉◉◉◉◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◉◉◉◉◯ ◉◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◉◉◉◯◉ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [7, 13, 16, 19, 21, 25, 26, 30, 32, 33] +StringLegalActions() = ["O To (2, 2)", "O To (3, 2)", "O To (3, 5)", "O To (4, 2)", "O To (4, 4)", "O To (5, 2)", "O To (5, 3)", "O To (6, 1)", "O To (6, 3)", "O To (6, 4)"] + +# Apply action "O To (4, 2)" +action: 19 + +# State 5 +# .X..X. +# .....X +# X.#... +# .O.... +# O....O +# ....O. +IsTerminal() = False +History() = [6, 12, 14, 31, 19] +HistoryString() = "6, 12, 14, 31, 19" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "6, 12, 14, 31, 19" +InformationStateString(1) = "6, 12, 14, 31, 19" +ObservationString(0) = ".X..X.\n.....X\nX.#...\n.O....\nO....O\n....O." +ObservationString(1) = ".X..X.\n.....X\nX.#...\n.O....\nO....O\n....O." +ObservationTensor(0): +◉◯◉◉◯◉ ◯◯◯◯◯◯ ◯◉◯◯◉◯ ◯◯◯◯◯◯ +◉◉◉◉◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◯◯◯◯◯◯ +◯◉◯◉◉◉ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◉◯◯◯ +◉◯◉◉◉◉ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◉◉◉◉◯ ◉◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◉◉◉◯◉ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +ObservationTensor(1): +◉◯◉◉◯◉ ◯◯◯◯◯◯ ◯◉◯◯◉◯ ◯◯◯◯◯◯ +◉◉◉◉◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◯◯◯◯◯◯ +◯◉◯◉◉◉ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◉◯◯◯ +◉◯◉◉◉◉ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◉◉◉◉◯ ◉◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◉◉◉◯◉ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [7, 13, 18, 20, 21, 22, 23, 25, 26, 31, 33] +StringLegalActions() = ["O Shoot: (2, 2)", "O Shoot: (3, 2)", "O Shoot: (4, 1)", "O Shoot: (4, 3)", "O Shoot: (4, 4)", "O Shoot: (4, 5)", "O Shoot: (4, 6)", "O Shoot: (5, 2)", "O Shoot: (5, 3)", "O Shoot: (6, 2)", "O Shoot: (6, 4)"] + +# Apply action "O Shoot: (3, 2)" +action: 13 + +# State 6 +# Apply action "X From (3, 1)" +action: 12 + +# State 7 +# Apply action "X To (1, 1)" +action: 0 + +# State 8 +# Apply action "X Shoot: (2, 1)" +action: 6 + +# State 9 +# Apply action "O From (5, 1)" +action: 24 + +# State 10 +# Apply action "O To (4, 1)" +action: 18 + +# State 11 +# Apply action "O Shoot: (6, 1)" +action: 30 + +# State 12 +# Apply action "X From (1, 5)" +action: 4 + +# State 13 +# Apply action "X To (1, 6)" +action: 5 + +# State 14 +# Apply action "X Shoot: (1, 5)" +action: 4 + +# State 15 +# Apply action "O From (5, 6)" +action: 29 + +# State 16 +# Apply action "O To (6, 6)" +action: 35 + +# State 17 +# Apply action "O Shoot: (5, 6)" +action: 29 + +# State 18 +# Apply action "X From (1, 2)" +action: 1 + +# State 19 +# X...#X +# #....X +# .##... +# OO.... +# .....# +# #...OO +IsTerminal() = False +History() = [6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1] +HistoryString() = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1" +InformationStateString(1) = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1" +ObservationString(0) = "X...#X\n#....X\n.##...\nOO....\n.....#\n#...OO" +ObservationString(1) = "X...#X\n#....X\n.##...\nOO....\n.....#\n#...OO" +ObservationTensor(0): +◯◉◉◉◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◉ ◯◯◯◯◉◯ +◯◉◉◉◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◉◯◯◯◯◯ +◉◯◯◉◉◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◉◯◯◯ +◯◯◉◉◉◉ ◉◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◉◉◉◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ +◯◉◉◉◯◯ ◯◯◯◯◉◉ ◯◯◯◯◯◯ ◉◯◯◯◯◯ +ObservationTensor(1): +◯◉◉◉◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◉ ◯◯◯◯◉◯ +◯◉◉◉◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◉◯◯◯◯◯ +◉◯◯◉◉◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◉◯◯◯ +◯◯◉◉◉◉ ◉◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◉◉◉◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ +◯◉◉◉◯◯ ◯◯◯◯◉◉ ◯◯◯◯◯◯ ◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2, 3, 7, 8, 15, 22] +StringLegalActions() = ["X To (1, 3)", "X To (1, 4)", "X To (2, 2)", "X To (2, 3)", "X To (3, 4)", "X To (4, 5)"] + +# Apply action "X To (3, 4)" +action: 15 + +# State 20 +# Apply action "X Shoot: (5, 4)" +action: 27 + +# State 21 +# Apply action "O From (6, 6)" +action: 35 + +# State 22 +# X...#X +# #....X +# .##X.. +# OO.... +# ...#.# +# #...O. +IsTerminal() = False +History() = [6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35] +HistoryString() = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35" +InformationStateString(1) = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35" +ObservationString(0) = "X...#X\n#....X\n.##X..\nOO....\n...#.#\n#...O." +ObservationString(1) = "X...#X\n#....X\n.##X..\nOO....\n...#.#\n#...O." +ObservationTensor(0): +◯◉◉◉◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◉ ◯◯◯◯◉◯ +◯◉◉◉◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◉◯◯◯◯◯ +◉◯◯◯◉◉ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◉◉◯◯◯ +◯◯◉◉◉◉ ◉◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◉◉◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◉ +◯◉◉◉◯◉ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ +ObservationTensor(1): +◯◉◉◉◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◉ ◯◯◯◯◉◯ +◯◉◉◉◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◉◯◯◯◯◯ +◉◯◯◯◉◉ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◉◉◯◯◯ +◯◯◉◉◉◉ ◉◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◉◉◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◉ +◯◉◉◉◯◉ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [21, 28] +StringLegalActions() = ["O To (4, 4)", "O To (5, 5)"] + +# Apply action "O To (4, 4)" +action: 21 + +# State 23 +# Apply action "O Shoot: (3, 5)" +action: 16 + +# State 24 +# Apply action "X From (1, 6)" +action: 5 + +# State 25 +# Apply action "X To (2, 5)" +action: 10 + +# State 26 +# Apply action "X Shoot: (3, 6)" +action: 17 + +# State 27 +# Apply action "O From (4, 2)" +action: 19 + +# State 28 +# Apply action "O To (5, 3)" +action: 26 + +# State 29 +# Apply action "O Shoot: (5, 1)" +action: 24 + +# State 30 +# Apply action "X From (2, 6)" +action: 11 + +# State 31 +# Apply action "X To (1, 6)" +action: 5 + +# State 32 +# Apply action "X Shoot: (2, 6)" +action: 11 + +# State 33 +# Apply action "O From (4, 1)" +action: 18 + +# State 34 +# Apply action "O To (4, 3)" +action: 20 + +# State 35 +# Apply action "O Shoot: (5, 2)" +action: 25 + +# State 36 +# Apply action "X From (2, 5)" +action: 10 + +# State 37 +# Apply action "X To (2, 3)" +action: 8 + +# State 38 +# X...#X +# #.X..# +# .##X## +# ..OO.. +# ##O#.# +# #...O. +IsTerminal() = False +History() = [6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8] +HistoryString() = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8" +InformationStateString(1) = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8" +ObservationString(0) = "X...#X\n#.X..#\n.##X##\n..OO..\n##O#.#\n#...O." +ObservationString(1) = "X...#X\n#.X..#\n.##X##\n..OO..\n##O#.#\n#...O." +ObservationTensor(0): +◯◉◉◉◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◉ ◯◯◯◯◉◯ +◯◉◯◉◉◯ ◯◯◯◯◯◯ ◯◯◉◯◯◯ ◉◯◯◯◯◉ +◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◉◉◯◉◉ +◉◉◯◯◉◉ ◯◯◉◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◉◯ ◯◯◉◯◯◯ ◯◯◯◯◯◯ ◉◉◯◉◯◉ +◯◉◉◉◯◉ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ +ObservationTensor(1): +◯◉◉◉◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◉ ◯◯◯◯◉◯ +◯◉◯◉◉◯ ◯◯◯◯◯◯ ◯◯◉◯◯◯ ◉◯◯◯◯◉ +◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◉◉◯◉◉ +◉◉◯◯◉◉ ◯◯◉◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◉◯ ◯◯◉◯◯◯ ◯◯◯◯◯◯ ◉◉◯◉◯◉ +◯◉◉◉◯◉ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 3, 7, 9, 10] +StringLegalActions() = ["X Shoot: (1, 2)", "X Shoot: (1, 3)", "X Shoot: (1, 4)", "X Shoot: (2, 2)", "X Shoot: (2, 4)", "X Shoot: (2, 5)"] + +# Apply action "X Shoot: (2, 5)" +action: 10 + +# State 39 +# Apply action "O From (4, 4)" +action: 21 + +# State 40 +# Apply action "O To (5, 5)" +action: 28 + +# State 41 +# X...#X +# #.X.## +# .##X## +# ..O... +# ##O#O# +# #...O. +IsTerminal() = False +History() = [6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8, 10, 21, 28] +HistoryString() = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8, 10, 21, 28" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8, 10, 21, 28" +InformationStateString(1) = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8, 10, 21, 28" +ObservationString(0) = "X...#X\n#.X.##\n.##X##\n..O...\n##O#O#\n#...O." +ObservationString(1) = "X...#X\n#.X.##\n.##X##\n..O...\n##O#O#\n#...O." +ObservationTensor(0): +◯◉◉◉◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◉ ◯◯◯◯◉◯ +◯◉◯◉◯◯ ◯◯◯◯◯◯ ◯◯◉◯◯◯ ◉◯◯◯◉◉ +◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◉◉◯◉◉ +◉◉◯◉◉◉ ◯◯◉◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◉◯◉◯ ◯◯◯◯◯◯ ◉◉◯◉◯◉ +◯◉◉◉◯◉ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ +ObservationTensor(1): +◯◉◉◉◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◉ ◯◯◯◯◉◯ +◯◉◯◉◯◯ ◯◯◯◯◯◯ ◯◯◉◯◯◯ ◉◯◯◯◉◉ +◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◉◉◯◉◉ +◉◉◯◉◉◉ ◯◯◉◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◉◯◉◯ ◯◯◯◯◯◯ ◉◉◯◉◯◉ +◯◉◉◉◯◉ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [21, 22, 23, 33, 35] +StringLegalActions() = ["O Shoot: (4, 4)", "O Shoot: (4, 5)", "O Shoot: (4, 6)", "O Shoot: (6, 4)", "O Shoot: (6, 6)"] + +# Apply action "O Shoot: (6, 4)" +action: 33 + +# State 42 +# Apply action "X From (3, 4)" +action: 15 + +# State 43 +# Apply action "X To (2, 4)" +action: 9 + +# State 44 +# Apply action "X Shoot: (1, 3)" +action: 2 + +# State 45 +# Apply action "O From (5, 3)" +action: 26 + +# State 46 +# Apply action "O To (4, 2)" +action: 19 + +# State 47 +# Apply action "O Shoot: (3, 1)" +action: 12 + +# State 48 +# Apply action "X From (2, 3)" +action: 8 + +# State 49 +# Apply action "X To (2, 2)" +action: 7 + +# State 50 +# Apply action "X Shoot: (1, 2)" +action: 1 + +# State 51 +# Apply action "O From (4, 2)" +action: 19 + +# State 52 +# Apply action "O To (5, 3)" +action: 26 + +# State 53 +# Apply action "O Shoot: (6, 3)" +action: 32 + +# State 54 +# Apply action "X From (2, 4)" +action: 9 + +# State 55 +# Apply action "X To (1, 4)" +action: 3 + +# State 56 +# Apply action "X Shoot: (4, 4)" +action: 21 + +# State 57 +# Apply action "O From (5, 3)" +action: 26 + +# State 58 +# Apply action "O To (6, 2)" +action: 31 + +# State 59 +# Apply action "O Shoot: (5, 3)" +action: 26 + +# State 60 +# X##X#X +# #X..## +# ###.## +# ..O#.. +# ####O# +# #O##O. +IsTerminal() = False +History() = [6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8, 10, 21, 28, 33, 15, 9, 2, 26, 19, 12, 8, 7, 1, 19, 26, 32, 9, 3, 21, 26, 31, 26] +HistoryString() = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8, 10, 21, 28, 33, 15, 9, 2, 26, 19, 12, 8, 7, 1, 19, 26, 32, 9, 3, 21, 26, 31, 26" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8, 10, 21, 28, 33, 15, 9, 2, 26, 19, 12, 8, 7, 1, 19, 26, 32, 9, 3, 21, 26, 31, 26" +InformationStateString(1) = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8, 10, 21, 28, 33, 15, 9, 2, 26, 19, 12, 8, 7, 1, 19, 26, 32, 9, 3, 21, 26, 31, 26" +ObservationString(0) = "X##X#X\n#X..##\n###.##\n..O#..\n####O#\n#O##O." +ObservationString(1) = "X##X#X\n#X..##\n###.##\n..O#..\n####O#\n#O##O." +ObservationTensor(0): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◉◯◉ ◯◉◉◯◉◯ +◯◯◉◉◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◉◯◯◯◉◉ +◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◯◉◉ +◉◉◯◯◉◉ ◯◯◉◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ +◯◯◯◯◯◯ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◉◉◉◉◯◉ +◯◯◯◯◯◉ ◯◉◯◯◉◯ ◯◯◯◯◯◯ ◉◯◉◉◯◯ +ObservationTensor(1): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◉◯◉ ◯◉◉◯◉◯ +◯◯◉◉◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◉◯◯◯◉◉ +◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◯◉◉ +◉◉◯◯◉◉ ◯◯◉◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ +◯◯◯◯◯◯ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◉◉◉◉◯◉ +◯◯◯◯◯◉ ◯◉◯◯◉◯ ◯◯◯◯◯◯ ◉◯◉◉◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [3, 7] +StringLegalActions() = ["X From (1, 4)", "X From (2, 2)"] + +# Apply action "X From (1, 4)" +action: 3 + +# State 61 +# Apply action "X To (3, 4)" +action: 15 + +# State 62 +# Apply action "X Shoot: (1, 4)" +action: 3 + +# State 63 +# X####X +# #X..## +# ###X## +# ..O#.. +# ####O# +# #O##O. +IsTerminal() = False +History() = [6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8, 10, 21, 28, 33, 15, 9, 2, 26, 19, 12, 8, 7, 1, 19, 26, 32, 9, 3, 21, 26, 31, 26, 3, 15, 3] +HistoryString() = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8, 10, 21, 28, 33, 15, 9, 2, 26, 19, 12, 8, 7, 1, 19, 26, 32, 9, 3, 21, 26, 31, 26, 3, 15, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8, 10, 21, 28, 33, 15, 9, 2, 26, 19, 12, 8, 7, 1, 19, 26, 32, 9, 3, 21, 26, 31, 26, 3, 15, 3" +InformationStateString(1) = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8, 10, 21, 28, 33, 15, 9, 2, 26, 19, 12, 8, 7, 1, 19, 26, 32, 9, 3, 21, 26, 31, 26, 3, 15, 3" +ObservationString(0) = "X####X\n#X..##\n###X##\n..O#..\n####O#\n#O##O." +ObservationString(1) = "X####X\n#X..##\n###X##\n..O#..\n####O#\n#O##O." +ObservationTensor(0): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◉ ◯◉◉◉◉◯ +◯◯◉◉◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◉◯◯◯◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◉◉◉◯◉◉ +◉◉◯◯◉◉ ◯◯◉◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ +◯◯◯◯◯◯ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◉◉◉◉◯◉ +◯◯◯◯◯◉ ◯◉◯◯◉◯ ◯◯◯◯◯◯ ◉◯◉◉◯◯ +ObservationTensor(1): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◉ ◯◉◉◉◉◯ +◯◯◉◉◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◉◯◯◯◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◉◉◉◯◉◉ +◉◉◯◯◉◉ ◯◯◉◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ +◯◯◯◯◯◯ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◉◉◉◉◯◉ +◯◯◯◯◯◉ ◯◉◯◯◉◯ ◯◯◯◯◯◯ ◉◯◉◉◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [20, 28, 34] +StringLegalActions() = ["O From (4, 3)", "O From (5, 5)", "O From (6, 5)"] + +# Apply action "O From (6, 5)" +action: 34 + +# State 64 +# Apply action "O To (6, 6)" +action: 35 + +# State 65 +# Apply action "O Shoot: (6, 5)" +action: 34 + +# State 66 +# Apply action "X From (3, 4)" +action: 15 + +# State 67 +# Apply action "X To (2, 4)" +action: 9 + +# State 68 +# Apply action "X Shoot: (3, 4)" +action: 15 + +# State 69 +# Apply action "O From (5, 5)" +action: 28 + +# State 70 +# Apply action "O To (4, 5)" +action: 22 + +# State 71 +# Apply action "O Shoot: (5, 5)" +action: 28 + +# State 72 +# Apply action "X From (2, 2)" +action: 7 + +# State 73 +# Apply action "X To (2, 3)" +action: 8 + +# State 74 +# Apply action "X Shoot: (2, 2)" +action: 7 + +# State 75 +# Apply action "O From (4, 5)" +action: 22 + +# State 76 +# Apply action "O To (4, 6)" +action: 23 + +# State 77 +# Apply action "O Shoot: (4, 5)" +action: 22 + +# State 78 +# X####X +# ##XX## +# ###### +# ..O##O +# ###### +# #O###O +IsTerminal() = True +History() = [6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8, 10, 21, 28, 33, 15, 9, 2, 26, 19, 12, 8, 7, 1, 19, 26, 32, 9, 3, 21, 26, 31, 26, 3, 15, 3, 34, 35, 34, 15, 9, 15, 28, 22, 28, 7, 8, 7, 22, 23, 22] +HistoryString() = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8, 10, 21, 28, 33, 15, 9, 2, 26, 19, 12, 8, 7, 1, 19, 26, 32, 9, 3, 21, 26, 31, 26, 3, 15, 3, 34, 35, 34, 15, 9, 15, 28, 22, 28, 7, 8, 7, 22, 23, 22" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8, 10, 21, 28, 33, 15, 9, 2, 26, 19, 12, 8, 7, 1, 19, 26, 32, 9, 3, 21, 26, 31, 26, 3, 15, 3, 34, 35, 34, 15, 9, 15, 28, 22, 28, 7, 8, 7, 22, 23, 22" +InformationStateString(1) = "6, 12, 14, 31, 19, 13, 12, 0, 6, 24, 18, 30, 4, 5, 4, 29, 35, 29, 1, 15, 27, 35, 21, 16, 5, 10, 17, 19, 26, 24, 11, 5, 11, 18, 20, 25, 10, 8, 10, 21, 28, 33, 15, 9, 2, 26, 19, 12, 8, 7, 1, 19, 26, 32, 9, 3, 21, 26, 31, 26, 3, 15, 3, 34, 35, 34, 15, 9, 15, 28, 22, 28, 7, 8, 7, 22, 23, 22" +ObservationString(0) = "X####X\n##XX##\n######\n..O##O\n######\n#O###O" +ObservationString(1) = "X####X\n##XX##\n######\n..O##O\n######\n#O###O" +ObservationTensor(0): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◉ ◯◉◉◉◉◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◉◉◯◯ ◉◉◯◯◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◉◉◯◯◯◯ ◯◯◉◯◯◉ ◯◯◯◯◯◯ ◯◯◯◉◉◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◉◯◯◯◉ ◯◯◯◯◯◯ ◉◯◉◉◉◯ +ObservationTensor(1): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◉ ◯◉◉◉◉◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◉◉◯◯ ◉◉◯◯◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◉◉◯◯◯◯ ◯◯◉◯◯◉ ◯◯◯◯◯◯ ◯◯◯◉◉◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◉◯◯◯◉ ◯◯◯◯◯◯ ◉◯◉◉◉◯ +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/backgammon(hyper_backgammon=true).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/backgammon(hyper_backgammon=true).txt new file mode 100644 index 0000000..e1ab48e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/backgammon(hyper_backgammon=true).txt @@ -0,0 +1,784 @@ +game: backgammon(hyper_backgammon=true) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Backgammon" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["hyper_backgammon", "scoring_type"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "backgammon" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 1352 +PolicyTensorShape() = [1352] +MaxChanceOutcomes() = 30 +GetParameters() = {hyper_backgammon=True,scoring_type=winloss_scoring} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [200] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 200 +MaxGameLength() = 1000 +ToString() = "backgammon(hyper_backgammon=True)" + +# State 0 +# +------|------+ +# |......|...ooo| +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# | | | +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# |......|...xxx| +# +------|------+ +# Turn: * +# Dice: +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "+------|------+\n|......|...ooo|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|......|...xxx|\n+------|------+\nTurn: *\nDice: \nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|......|...ooo|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|......|...xxx|\n+------|------+\nTurn: *\nDice: \nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0): ◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.0333333), (1,0.0333333), (2,0.0333333), (3,0.0333333), (4,0.0333333), (5,0.0333333), (6,0.0333333), (7,0.0333333), (8,0.0333333), (9,0.0333333), (10,0.0333333), (11,0.0333333), (12,0.0333333), (13,0.0333333), (14,0.0333333), (15,0.0333333), (16,0.0333333), (17,0.0333333), (18,0.0333333), (19,0.0333333), (20,0.0333333), (21,0.0333333), (22,0.0333333), (23,0.0333333), (24,0.0333333), (25,0.0333333), (26,0.0333333), (27,0.0333333), (28,0.0333333), (29,0.0333333)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +StringLegalActions() = ["chance outcome 0 X starts, (roll: 12)", "chance outcome 1 X starts, (roll: 13)", "chance outcome 2 X starts, (roll: 14)", "chance outcome 3 X starts, (roll: 15)", "chance outcome 4 X starts, (roll: 16)", "chance outcome 5 X starts, (roll: 23)", "chance outcome 6 X starts, (roll: 24)", "chance outcome 7 X starts, (roll: 25)", "chance outcome 8 X starts, (roll: 26)", "chance outcome 9 X starts, (roll: 34)", "chance outcome 10 X starts, (roll: 35)", "chance outcome 11 X starts, (roll: 36)", "chance outcome 12 X starts, (roll: 45)", "chance outcome 13 X starts, (roll: 46)", "chance outcome 14 X starts, (roll: 56)", "chance outcome 0 O starts, (roll: 12)", "chance outcome 1 O starts, (roll: 13)", "chance outcome 2 O starts, (roll: 14)", "chance outcome 3 O starts, (roll: 15)", "chance outcome 4 O starts, (roll: 16)", "chance outcome 5 O starts, (roll: 23)", "chance outcome 6 O starts, (roll: 24)", "chance outcome 7 O starts, (roll: 25)", "chance outcome 8 O starts, (roll: 26)", "chance outcome 9 O starts, (roll: 34)", "chance outcome 10 O starts, (roll: 35)", "chance outcome 11 O starts, (roll: 36)", "chance outcome 12 O starts, (roll: 45)", "chance outcome 13 O starts, (roll: 46)", "chance outcome 14 O starts, (roll: 56)"] + +# Apply action "chance outcome 1 X starts, (roll: 13)" +action: 1 + +# State 1 +# +------|------+ +# |......|...ooo| +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# | | | +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# |......|...xxx| +# +------|------+ +# Turn: x +# Dice: 13 +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "+------|------+\n|......|...ooo|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|......|...xxx|\n+------|------+\nTurn: x\nDice: 13\nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|......|...ooo|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|......|...xxx|\n+------|------+\nTurn: x\nDice: 13\nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [26, 52, 53, 78, 105, 132, 702, 728, 729, 756] +StringLegalActions() = ["24/21 23/22", "24/21 22/21", "23/20 22/21", "24/21/20", "23/20/19", "22/19/18", "24/23/20", "24/23 22/19", "23/22/19", "22/21/18"] + +# Apply action "23/22/19" +action: 28 + +# State 2 +# +------|------+ +# |......|...ooo| +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# | | | +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# |......|x..x.x| +# +------|------+ +# Turn: * +# Dice: +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [1, 28] +HistoryString() = "1, 28" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "+------|------+\n|......|...ooo|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|......|x..x.x|\n+------|------+\nTurn: *\nDice: \nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|......|...ooo|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|......|x..x.x|\n+------|------+\nTurn: *\nDice: \nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.0555556), (1,0.0555556), (2,0.0555556), (3,0.0555556), (4,0.0555556), (5,0.0555556), (6,0.0555556), (7,0.0555556), (8,0.0555556), (9,0.0555556), (10,0.0555556), (11,0.0555556), (12,0.0555556), (13,0.0555556), (14,0.0555556), (15,0.0277778), (16,0.0277778), (17,0.0277778), (18,0.0277778), (19,0.0277778), (20,0.0277778)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] +StringLegalActions() = ["chance outcome 0 (roll: 12)", "chance outcome 1 (roll: 13)", "chance outcome 2 (roll: 14)", "chance outcome 3 (roll: 15)", "chance outcome 4 (roll: 16)", "chance outcome 5 (roll: 23)", "chance outcome 6 (roll: 24)", "chance outcome 7 (roll: 25)", "chance outcome 8 (roll: 26)", "chance outcome 9 (roll: 34)", "chance outcome 10 (roll: 35)", "chance outcome 11 (roll: 36)", "chance outcome 12 (roll: 45)", "chance outcome 13 (roll: 46)", "chance outcome 14 (roll: 56)", "chance outcome 15 (roll: 11)", "chance outcome 16 (roll: 22)", "chance outcome 17 (roll: 33)", "chance outcome 18 (roll: 44)", "chance outcome 19 (roll: 55)", "chance outcome 20 (roll: 66)"] + +# Apply action "chance outcome 16 (roll: 22)" +action: 16 + +# State 3 +# +------|------+ +# |......|...ooo| +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# | | | +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# |......|x..x.x| +# +------|------+ +# Turn: o +# Dice: 22 +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [1, 28, 16] +HistoryString() = "1, 28, 16" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "+------|------+\n|......|...ooo|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|......|x..x.x|\n+------|------+\nTurn: o\nDice: 22\nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|......|...ooo|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|......|x..x.x|\n+------|------+\nTurn: o\nDice: 22\nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [515, 542, 593, 619, 620] +StringLegalActions() = ["22/20/18", "23/21/19", "23/21 22/20", "24/22/20", "24/22 23/21"] + +# Apply action "22/20/18" +action: 515 + +# State 4 +# +------|------+ +# |.....o|....oo| +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# | | | +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# |......|x..x.x| +# +------|------+ +# Turn: o +# Dice: 22 +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [1, 28, 16, 515] +HistoryString() = "1, 28, 16, 515" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "+------|------+\n|.....o|....oo|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|......|x..x.x|\n+------|------+\nTurn: o\nDice: 22\nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|.....o|....oo|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|......|x..x.x|\n+------|------+\nTurn: o\nDice: 22\nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [407, 542, 569, 589, 615, 620] +StringLegalActions() = ["18/16/14", "23/21/19", "24/22/20", "23/21 18/16", "24/22 18/16", "24/22 23/21"] + +# Apply action "23/21 18/16" +action: 464 + +# State 5 +# Apply action "chance outcome 7 (roll: 25)" +action: 7 + +# State 6 +# +------|------+ +# |...o..|..o..o| +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# | | | +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# |......|x..x.x| +# +------|------+ +# Turn: x +# Dice: 25 +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [1, 28, 16, 515, 464, 7] +HistoryString() = "1, 28, 16, 515, 464, 7" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "+------|------+\n|...o..|..o..o|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|......|x..x.x|\n+------|------+\nTurn: x\nDice: 25\nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|...o..|..o..o|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|......|x..x.x|\n+------|------+\nTurn: x\nDice: 25\nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 5.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [52, 130, 132, 184, 265, 728, 782, 806, 808, 863] +StringLegalActions() = ["24/19 22/20", "24/19/17", "22/17 19/17", "22/17/15", "19/14/12", "24/22/17", "22/20/15", "24/22 19/14", "22/20 19/14", "19/17/12"] + +# Apply action "24/19/17" +action: 681 + +# State 7 +# Apply action "chance outcome 6 (roll: 24)" +action: 6 + +# State 8 +# +------|------+ +# |...o..|..o..o| +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# | | | +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# |....x.|x..x..| +# +------|------+ +# Turn: o +# Dice: 24 +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [1, 28, 16, 515, 464, 7, 681, 6] +HistoryString() = "1, 28, 16, 515, 464, 7, 681, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "+------|------+\n|...o..|..o..o|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|....x.|x..x..|\n+------|------+\nTurn: o\nDice: 24\nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|...o..|..o..o|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|....x.|x..x..|\n+------|------+\nTurn: o\nDice: 24\nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [301, 436, 517, 535, 613, 618, 1029, 1164, 1211, 1245, 1289, 1294] +StringLegalActions() = ["16/12/10", "21/17/15", "24/20/18", "21/19 16/12", "24/22 16/12", "24/22 21/17", "16/14/10", "21/19/15", "21/17 16/14", "24/22/18", "24/20 16/14", "24/20 21/19"] + +# Apply action "24/20 16/14" +action: 413 + +# State 9 +# Apply action "chance outcome 18 (roll: 44)" +action: 18 + +# State 10 +# +------|------+ +# |.o....|.oo...| +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# | | | +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# |....x.|x..x..| +# +------|------+ +# Turn: x +# Dice: 44 +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [1, 28, 16, 515, 464, 7, 681, 6, 413, 18] +HistoryString() = "1, 28, 16, 515, 464, 7, 681, 6, 413, 18" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "+------|------+\n|.o....|.oo...|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|....x.|x..x..|\n+------|------+\nTurn: x\nDice: 44\nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|.o....|.oo...|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|....x.|x..x..|\n+------|------+\nTurn: x\nDice: 44\nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 4.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 4.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [132, 158, 184, 187, 239, 293] +StringLegalActions() = ["22/18 19/15", "22/18/14", "22/18 17/13", "19/15 17/13", "19/15/11*", "17/13/9"] + +# Apply action "19/15 17/13" +action: 137 + +# State 11 +# Apply action "22/18/14" +action: 158 + +# State 12 +# Apply action "chance outcome 2 (roll: 14)" +action: 2 + +# State 13 +# Apply action "20/16 14/13" +action: 357 + +# State 14 +# Apply action "chance outcome 8 (roll: 26)" +action: 8 + +# State 15 +# Apply action "15/9* 14/12*" +action: 269 + +# State 16 +# Apply action "chance outcome 16 (roll: 22)" +action: 16 + +# State 17 +# Apply action "Bar/23(2)" +action: 648 + +# State 18 +# Apply action "23/21(2)" +action: 594 + +# State 19 +# Apply action "chance outcome 15 (roll: 11)" +action: 15 + +# State 20 +# Apply action "13/12/11" +action: 298 + +# State 21 +# Apply action "11/10/9" +action: 377 + +# State 22 +# Apply action "chance outcome 3 (roll: 15)" +action: 3 + +# State 23 +# Apply action "21/20/15" +action: 1190 + +# State 24 +# Apply action "chance outcome 10 (roll: 35)" +action: 10 + +# State 25 +# Apply action "12/7 9/6" +action: 402 + +# State 26 +# Apply action "chance outcome 9 (roll: 34)" +action: 9 + +# State 27 +# Apply action "15/11/8" +action: 274 + +# State 28 +# Apply action "chance outcome 2 (roll: 14)" +action: 2 + +# State 29 +# Apply action "9/8 7/3" +action: 407 + +# State 30 +# Apply action "chance outcome 13 (roll: 46)" +action: 13 + +# State 31 +# Apply action "21/17*/11" +action: 1112 + +# State 32 +# Apply action "chance outcome 14 (roll: 56)" +action: 14 + +# State 33 +# Apply action "Bar/20/14*" +action: 804 + +# State 34 +# Apply action "chance outcome 13 (roll: 46)" +action: 13 + +# State 35 +# Apply action "Bar/19* 8/4" +action: 206 + +# State 36 +# Apply action "chance outcome 5 (roll: 23)" +action: 5 + +# State 37 +# +------|------+ +# |......|o.ox..| +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# | | | +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# |.x....|..o...| +# +------|------+ +# Turn: x +# Dice: 23 +# Bar: x +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [1, 28, 16, 515, 464, 7, 681, 6, 413, 18, 137, 158, 2, 357, 8, 269, 16, 648, 594, 15, 298, 377, 3, 1190, 10, 402, 9, 274, 2, 407, 13, 1112, 14, 804, 13, 206, 5] +HistoryString() = "1, 28, 16, 515, 464, 7, 681, 6, 413, 18, 137, 158, 2, 357, 8, 269, 16, 648, 594, 15, 298, 377, 3, 1190, 10, 402, 9, 274, 2, 407, 13, 1112, 14, 804, 13, 206, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "+------|------+\n|......|o.ox..|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|.x....|..o...|\n+------|------+\nTurn: x\nDice: 23\nBar: x\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|......|o.ox..|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|.x....|..o...|\n+------|------+\nTurn: x\nDice: 23\nBar: x\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [76, 284, 570, 726, 960] +StringLegalActions() = ["Bar/22/20", "Bar/22 14/12", "Bar/22 3/1", "Bar/23/20", "Bar/23 14/11"] + +# Apply action "Bar/23 14/11" +action: 960 + +# State 38 +# Apply action "chance outcome 12 (roll: 45)" +action: 12 + +# State 39 +# +------|------+ +# |.x....|o.ox..| +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# | | | +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# |......|..o.x.| +# +------|------+ +# Turn: o +# Dice: 45 +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [1, 28, 16, 515, 464, 7, 681, 6, 413, 18, 137, 158, 2, 357, 8, 269, 16, 648, 594, 15, 298, 377, 3, 1190, 10, 402, 9, 274, 2, 407, 13, 1112, 14, 804, 13, 206, 5, 960, 12] +HistoryString() = "1, 28, 16, 515, 464, 7, 681, 6, 413, 18, 137, 158, 2, 357, 8, 269, 16, 648, 594, 15, 298, 377, 3, 1190, 10, 402, 9, 274, 2, 407, 13, 1112, 14, 804, 13, 206, 5, 960, 12" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "+------|------+\n|.x....|o.ox..|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|......|..o.x.|\n+------|------+\nTurn: o\nDice: 45\nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|.x....|o.ox..|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|......|..o.x.|\n+------|------+\nTurn: o\nDice: 45\nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 5.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 5.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [356, 410, 538, 1058, 1112, 1214] +StringLegalActions() = ["19/14*/10", "21/16/12", "21/17 19/14*", "19/15/10", "21/17/12", "21/16 19/15"] + +# Apply action "21/17 19/14*" +action: 1164 + +# State 40 +# Apply action "chance outcome 11 (roll: 36)" +action: 11 + +# State 41 +# Apply action "Bar/19/16" +action: 154 + +# State 42 +# Apply action "chance outcome 19 (roll: 55)" +action: 19 + +# State 43 +# Apply action "17/12 14/9*" +action: 354 + +# State 44 +# Apply action "12/7/2*" +action: 167 + +# State 45 +# Apply action "chance outcome 6 (roll: 24)" +action: 6 + +# State 46 +# Apply action "Bar/23* Bar/21*" +action: 648 + +# State 47 +# Apply action "chance outcome 0 (roll: 12)" +action: 0 + +# State 48 +# Apply action "Bar/24 Bar/23" +action: 648 + +# State 49 +# Apply action "chance outcome 6 (roll: 24)" +action: 6 + +# State 50 +# Apply action "23/21/17" +action: 29 + +# State 51 +# Apply action "chance outcome 4 (roll: 16)" +action: 4 + +# State 52 +# Apply action "23/22* 9/3" +action: 580 + +# State 53 +# Apply action "chance outcome 17 (roll: 33)" +action: 17 + +# State 54 +# Apply action "Bar/22*/19" +action: 76 + +# State 55 +# Apply action "21/18 17/14" +action: 185 + +# State 56 +# Apply action "chance outcome 0 (roll: 12)" +action: 0 + +# State 57 +# Apply action "Bar/24/22" +action: 1298 + +# State 58 +# Apply action "chance outcome 15 (roll: 11)" +action: 15 + +# State 59 +# Apply action "19/18 14/13" +action: 140 + +# State 60 +# Apply action "18/17 13/12" +action: 292 + +# State 61 +# Apply action "chance outcome 9 (roll: 34)" +action: 9 + +# State 62 +# Apply action "22/19 22/18" +action: 1243 + +# State 63 +# Apply action "chance outcome 10 (roll: 35)" +action: 10 + +# State 64 +# Apply action "17/14 12/7*" +action: 194 + +# State 65 +# Apply action "chance outcome 3 (roll: 15)" +action: 3 + +# State 66 +# Apply action "Bar/24 19/14" +action: 1168 + +# State 67 +# Apply action "chance outcome 15 (roll: 11)" +action: 15 + +# State 68 +# Apply action "18/17/16" +action: 188 + +# State 69 +# +------|------+ +# |.o...x|.....o| +# |......|.....o| +# |......|......| +# |......|......| +# |......|......| +# | | | +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# |.x.x..|......| +# +------|------+ +# Turn: x +# Dice: 11 +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [1, 28, 16, 515, 464, 7, 681, 6, 413, 18, 137, 158, 2, 357, 8, 269, 16, 648, 594, 15, 298, 377, 3, 1190, 10, 402, 9, 274, 2, 407, 13, 1112, 14, 804, 13, 206, 5, 960, 12, 1164, 11, 154, 19, 354, 167, 6, 648, 0, 648, 6, 29, 4, 580, 17, 76, 185, 0, 1298, 15, 140, 292, 9, 1243, 10, 194, 3, 1168, 15, 188] +HistoryString() = "1, 28, 16, 515, 464, 7, 681, 6, 413, 18, 137, 158, 2, 357, 8, 269, 16, 648, 594, 15, 298, 377, 3, 1190, 10, 402, 9, 274, 2, 407, 13, 1112, 14, 804, 13, 206, 5, 960, 12, 1164, 11, 154, 19, 354, 167, 6, 648, 0, 648, 6, 29, 4, 580, 17, 76, 185, 0, 1298, 15, 140, 292, 9, 1243, 10, 194, 3, 1168, 15, 188" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "+------|------+\n|.o...x|.....o|\n|......|.....o|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|.x.x..|......|\n+------|------+\nTurn: x\nDice: 11\nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|.o...x|.....o|\n|......|.....o|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|.x.x..|......|\n+------|------+\nTurn: x\nDice: 11\nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [242, 268, 296, 450, 452, 485] +StringLegalActions() = ["16/15/14", "16/15 14/13", "14/13/12", "16/15 7/6", "14/13 7/6", "7/6/5"] + +# Apply action "16/15 14/13" +action: 218 + +# State 70 +# Apply action "chance outcome 14 (roll: 56)" +action: 14 + +# State 71 +# Apply action "24/19 14/8" +action: 611 + +# State 72 +# Apply action "chance outcome 14 (roll: 56)" +action: 14 + +# State 73 +# Apply action "13/8/2" +action: 1103 + +# State 74 +# Apply action "chance outcome 7 (roll: 25)" +action: 7 + +# State 75 +# Apply action "24/22 19/14" +action: 1167 + +# State 76 +# Apply action "chance outcome 18 (roll: 44)" +action: 18 + +# State 77 +# Apply action "15/11*/7" +action: 347 + +# State 78 +# Apply action "7/3*(2)" +action: 459 + +# State 79 +# Apply action "chance outcome 19 (roll: 55)" +action: 19 + +# State 80 +# +------|------+ +# |......|...xx.| +# |......|...x..| +# |......|......| +# |......|......| +# |......|......| +# | | | +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# |....o.|......| +# +------|------+ +# Turn: o +# Dice: 55 +# Bar: oo +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [1, 28, 16, 515, 464, 7, 681, 6, 413, 18, 137, 158, 2, 357, 8, 269, 16, 648, 594, 15, 298, 377, 3, 1190, 10, 402, 9, 274, 2, 407, 13, 1112, 14, 804, 13, 206, 5, 960, 12, 1164, 11, 154, 19, 354, 167, 6, 648, 0, 648, 6, 29, 4, 580, 17, 76, 185, 0, 1298, 15, 140, 292, 9, 1243, 10, 194, 3, 1168, 15, 188, 218, 14, 611, 14, 1103, 7, 1167, 18, 347, 459, 19] +HistoryString() = "1, 28, 16, 515, 464, 7, 681, 6, 413, 18, 137, 158, 2, 357, 8, 269, 16, 648, 594, 15, 298, 377, 3, 1190, 10, 402, 9, 274, 2, 407, 13, 1112, 14, 804, 13, 206, 5, 960, 12, 1164, 11, 154, 19, 354, 167, 6, 648, 0, 648, 6, 29, 4, 580, 17, 76, 185, 0, 1298, 15, 140, 292, 9, 1243, 10, 194, 3, 1168, 15, 188, 218, 14, 611, 14, 1103, 7, 1167, 18, 347, 459, 19" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "+------|------+\n|......|...xx.|\n|......|...x..|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|....o.|......|\n+------|------+\nTurn: o\nDice: 55\nBar: oo\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|......|...xx.|\n|......|...x..|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|....o.|......|\n+------|------+\nTurn: o\nDice: 55\nBar: oo\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 5.0, 5.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 5.0, 5.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [648] +StringLegalActions() = ["Bar/20(2)"] + +# Apply action "Bar/20(2)" +action: 648 + +# State 81 +# Apply action "20/15/10" +action: 383 + +# State 82 +# Apply action "chance outcome 20 (roll: 66)" +action: 20 + +# State 83 +# Apply action "3/Off(2)" +action: 567 + +# State 84 +# Apply action "2/Off Pass" +action: 672 + +# State 85 +# +------|------+ +# |......|.o....| +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# | | | +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# |..o.o.|......| +# +------|------+ +# Turn: * +# Dice: +# Bar: +# Scores, X: 3, O: 0 +IsTerminal() = True +History() = [1, 28, 16, 515, 464, 7, 681, 6, 413, 18, 137, 158, 2, 357, 8, 269, 16, 648, 594, 15, 298, 377, 3, 1190, 10, 402, 9, 274, 2, 407, 13, 1112, 14, 804, 13, 206, 5, 960, 12, 1164, 11, 154, 19, 354, 167, 6, 648, 0, 648, 6, 29, 4, 580, 17, 76, 185, 0, 1298, 15, 140, 292, 9, 1243, 10, 194, 3, 1168, 15, 188, 218, 14, 611, 14, 1103, 7, 1167, 18, 347, 459, 19, 648, 383, 20, 567, 672] +HistoryString() = "1, 28, 16, 515, 464, 7, 681, 6, 413, 18, 137, 158, 2, 357, 8, 269, 16, 648, 594, 15, 298, 377, 3, 1190, 10, 402, 9, 274, 2, 407, 13, 1112, 14, 804, 13, 206, 5, 960, 12, 1164, 11, 154, 19, 354, 167, 6, 648, 0, 648, 6, 29, 4, 580, 17, 76, 185, 0, 1298, 15, 140, 292, 9, 1243, 10, 194, 3, 1168, 15, 188, 218, 14, 611, 14, 1103, 7, 1167, 18, 347, 459, 19, 648, 383, 20, 567, 672" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "+------|------+\n|......|.o....|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|..o.o.|......|\n+------|------+\nTurn: *\nDice: \nBar:\nScores, X: 3, O: 0\n" +ObservationString(1) = "+------|------+\n|......|.o....|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|..o.o.|......|\n+------|------+\nTurn: *\nDice: \nBar:\nScores, X: 3, O: 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/backgammon.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/backgammon.txt new file mode 100644 index 0000000..1f85602 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/backgammon.txt @@ -0,0 +1,816 @@ +game: backgammon + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Backgammon" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["hyper_backgammon", "scoring_type"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "backgammon" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 1352 +PolicyTensorShape() = [1352] +MaxChanceOutcomes() = 30 +GetParameters() = {hyper_backgammon=False,scoring_type=winloss_scoring} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [200] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 200 +MaxGameLength() = 1000 +ToString() = "backgammon()" + +# State 0 +# +------|------+ +# |o...x.|x....o| +# |o...x.|x....o| +# |o...x.|x.....| +# |o.....|x.....| +# |o.....|x.....| +# | | | +# |x.....|o.....| +# |x.....|o.....| +# |x...o.|o.....| +# |x...o.|o....x| +# |x...o.|o....x| +# +------|------+ +# Turn: * +# Dice: +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "+------|------+\n|o...x.|x....o|\n|o...x.|x....o|\n|o...x.|x.....|\n|o.....|x.....|\n|o.....|x.....|\n| | |\n|x.....|o.....|\n|x.....|o.....|\n|x...o.|o.....|\n|x...o.|o....x|\n|x...o.|o....x|\n+------|------+\nTurn: *\nDice: \nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|o...x.|x....o|\n|o...x.|x....o|\n|o...x.|x.....|\n|o.....|x.....|\n|o.....|x.....|\n| | |\n|x.....|o.....|\n|x.....|o.....|\n|x...o.|o.....|\n|x...o.|o....x|\n|x...o.|o....x|\n+------|------+\nTurn: *\nDice: \nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ChanceOutcomes() = [(0,0.0333333), (1,0.0333333), (2,0.0333333), (3,0.0333333), (4,0.0333333), (5,0.0333333), (6,0.0333333), (7,0.0333333), (8,0.0333333), (9,0.0333333), (10,0.0333333), (11,0.0333333), (12,0.0333333), (13,0.0333333), (14,0.0333333), (15,0.0333333), (16,0.0333333), (17,0.0333333), (18,0.0333333), (19,0.0333333), (20,0.0333333), (21,0.0333333), (22,0.0333333), (23,0.0333333), (24,0.0333333), (25,0.0333333), (26,0.0333333), (27,0.0333333), (28,0.0333333), (29,0.0333333)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +StringLegalActions() = ["chance outcome 0 X starts, (roll: 12)", "chance outcome 1 X starts, (roll: 13)", "chance outcome 2 X starts, (roll: 14)", "chance outcome 3 X starts, (roll: 15)", "chance outcome 4 X starts, (roll: 16)", "chance outcome 5 X starts, (roll: 23)", "chance outcome 6 X starts, (roll: 24)", "chance outcome 7 X starts, (roll: 25)", "chance outcome 8 X starts, (roll: 26)", "chance outcome 9 X starts, (roll: 34)", "chance outcome 10 X starts, (roll: 35)", "chance outcome 11 X starts, (roll: 36)", "chance outcome 12 X starts, (roll: 45)", "chance outcome 13 X starts, (roll: 46)", "chance outcome 14 X starts, (roll: 56)", "chance outcome 0 O starts, (roll: 12)", "chance outcome 1 O starts, (roll: 13)", "chance outcome 2 O starts, (roll: 14)", "chance outcome 3 O starts, (roll: 15)", "chance outcome 4 O starts, (roll: 16)", "chance outcome 5 O starts, (roll: 23)", "chance outcome 6 O starts, (roll: 24)", "chance outcome 7 O starts, (roll: 25)", "chance outcome 8 O starts, (roll: 26)", "chance outcome 9 O starts, (roll: 34)", "chance outcome 10 O starts, (roll: 35)", "chance outcome 11 O starts, (roll: 36)", "chance outcome 12 O starts, (roll: 45)", "chance outcome 13 O starts, (roll: 46)", "chance outcome 14 O starts, (roll: 56)"] + +# Apply action "chance outcome 0 X starts, (roll: 12)" +action: 0 + +# State 1 +# +------|------+ +# |o...x.|x....o| +# |o...x.|x....o| +# |o...x.|x.....| +# |o.....|x.....| +# |o.....|x.....| +# | | | +# |x.....|o.....| +# |x.....|o.....| +# |x...o.|o.....| +# |x...o.|o....x| +# |x...o.|o....x| +# +------|------+ +# Turn: x +# Dice: 12 +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "+------|------+\n|o...x.|x....o|\n|o...x.|x....o|\n|o...x.|x.....|\n|o.....|x.....|\n|o.....|x.....|\n| | |\n|x.....|o.....|\n|x.....|o.....|\n|x...o.|o.....|\n|x...o.|o....x|\n|x...o.|o....x|\n+------|------+\nTurn: x\nDice: 12\nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|o...x.|x....o|\n|o...x.|x....o|\n|o...x.|x.....|\n|o.....|x.....|\n|o.....|x.....|\n| | |\n|x.....|o.....|\n|x.....|o.....|\n|x...o.|o.....|\n|x...o.|o....x|\n|x...o.|o....x|\n+------|------+\nTurn: x\nDice: 12\nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [52, 349, 416, 427, 468, 479, 484, 538, 676, 702, 962, 1092, 1108, 1134, 1144, 1160, 1162, 1188] +StringLegalActions() = ["24/22/21", "13/11/10", "24/22 8/7", "13/11 8/7", "24/22 6/5", "13/11 6/5", "8/6/5", "6/4/3", "24/23 24/22", "24/23/21", "24/23 13/11", "24/23 8/6", "8/7 8/6", "8/7/5", "24/23 6/4", "8/7 6/4", "6/5 6/4", "6/5/3"] + +# Apply action "24/23 24/22" +action: 0 + +# State 2 +# +------|------+ +# |o...x.|x....o| +# |o...x.|x....o| +# |o...x.|x.....| +# |o.....|x.....| +# |o.....|x.....| +# | | | +# |x.....|o.....| +# |x.....|o.....| +# |x...o.|o.....| +# |x...o.|o.....| +# |x...o.|o..xx.| +# +------|------+ +# Turn: * +# Dice: +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [0, 0] +HistoryString() = "0, 0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "+------|------+\n|o...x.|x....o|\n|o...x.|x....o|\n|o...x.|x.....|\n|o.....|x.....|\n|o.....|x.....|\n| | |\n|x.....|o.....|\n|x.....|o.....|\n|x...o.|o.....|\n|x...o.|o.....|\n|x...o.|o..xx.|\n+------|------+\nTurn: *\nDice: \nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|o...x.|x....o|\n|o...x.|x....o|\n|o...x.|x.....|\n|o.....|x.....|\n|o.....|x.....|\n| | |\n|x.....|o.....|\n|x.....|o.....|\n|x...o.|o.....|\n|x...o.|o.....|\n|x...o.|o..xx.|\n+------|------+\nTurn: *\nDice: \nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ChanceOutcomes() = [(0,0.0555556), (1,0.0555556), (2,0.0555556), (3,0.0555556), (4,0.0555556), (5,0.0555556), (6,0.0555556), (7,0.0555556), (8,0.0555556), (9,0.0555556), (10,0.0555556), (11,0.0555556), (12,0.0555556), (13,0.0555556), (14,0.0555556), (15,0.0277778), (16,0.0277778), (17,0.0277778), (18,0.0277778), (19,0.0277778), (20,0.0277778)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] +StringLegalActions() = ["chance outcome 0 (roll: 12)", "chance outcome 1 (roll: 13)", "chance outcome 2 (roll: 14)", "chance outcome 3 (roll: 15)", "chance outcome 4 (roll: 16)", "chance outcome 5 (roll: 23)", "chance outcome 6 (roll: 24)", "chance outcome 7 (roll: 25)", "chance outcome 8 (roll: 26)", "chance outcome 9 (roll: 34)", "chance outcome 10 (roll: 35)", "chance outcome 11 (roll: 36)", "chance outcome 12 (roll: 45)", "chance outcome 13 (roll: 46)", "chance outcome 14 (roll: 56)", "chance outcome 15 (roll: 11)", "chance outcome 16 (roll: 22)", "chance outcome 17 (roll: 33)", "chance outcome 18 (roll: 44)", "chance outcome 19 (roll: 55)", "chance outcome 20 (roll: 66)"] + +# Apply action "chance outcome 14 (roll: 56)" +action: 14 + +# State 3 +# +------|------+ +# |o...x.|x....o| +# |o...x.|x....o| +# |o...x.|x.....| +# |o.....|x.....| +# |o.....|x.....| +# | | | +# |x.....|o.....| +# |x.....|o.....| +# |x...o.|o.....| +# |x...o.|o.....| +# |x...o.|o..xx.| +# +------|------+ +# Turn: o +# Dice: 56 +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [0, 0, 14] +HistoryString() = "0, 0, 14" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "+------|------+\n|o...x.|x....o|\n|o...x.|x....o|\n|o...x.|x.....|\n|o.....|x.....|\n|o.....|x.....|\n| | |\n|x.....|o.....|\n|x.....|o.....|\n|x...o.|o.....|\n|x...o.|o.....|\n|x...o.|o..xx.|\n+------|------+\nTurn: o\nDice: 56\nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|o...x.|x....o|\n|o...x.|x....o|\n|o...x.|x.....|\n|o.....|x.....|\n|o.....|x.....|\n| | |\n|x.....|o.....|\n|x.....|o.....|\n|x...o.|o.....|\n|x...o.|o.....|\n|x...o.|o..xx.|\n+------|------+\nTurn: o\nDice: 56\nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 6.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 5.0, 6.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [168, 319, 465, 863, 865, 993, 995, 1000, 1279, 1281, 1286] +StringLegalActions() = ["13/7/2*", "13/8/2*", "24/18/13", "8/2* 6/1", "8/3* 8/2*", "13/7 6/1", "13/7 8/3*", "13/8 13/7", "24/18 6/1", "24/18 8/3*", "24/18 13/8"] + +# Apply action "24/18 6/1" +action: 153 + +# State 4 +# Apply action "chance outcome 12 (roll: 45)" +action: 12 + +# State 5 +# +------|------+ +# |o...xo|x....o| +# |o...x.|x.....| +# |o...x.|x.....| +# |o.....|x.....| +# |o.....|x.....| +# | | | +# |x.....|......| +# |x.....|o.....| +# |x...o.|o.....| +# |x...o.|o.....| +# |x...o.|o..xxo| +# +------|------+ +# Turn: x +# Dice: 45 +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [0, 0, 14, 153, 12] +HistoryString() = "0, 0, 14, 153, 12" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "+------|------+\n|o...xo|x....o|\n|o...x.|x.....|\n|o...x.|x.....|\n|o.....|x.....|\n|o.....|x.....|\n| | |\n|x.....|......|\n|x.....|o.....|\n|x...o.|o.....|\n|x...o.|o.....|\n|x...o.|o..xxo|\n+------|------+\nTurn: x\nDice: 45\nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|o...xo|x....o|\n|o...x.|x.....|\n|o...x.|x.....|\n|o.....|x.....|\n|o.....|x.....|\n| | |\n|x.....|......|\n|x.....|o.....|\n|x...o.|o.....|\n|x...o.|o.....|\n|x...o.|o..xxo|\n+------|------+\nTurn: x\nDice: 45\nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 5.0] +ObservationTensor(1) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 5.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [53, 157, 287, 417, 427, 469, 479, 484, 834, 964, 973, 1077, 1094, 1103, 1108, 1146, 1155, 1160, 1162] +StringLegalActions() = ["23/18 22/18", "23/18/14", "23/18 13/9", "23/18 8/4", "13/8/4", "23/18 6/2", "13/8 6/2", "8/3 6/2", "22/18/13", "22/18 13/8", "13/9 13/8", "13/9/4", "22/18 8/3", "13/9 8/3", "8/4 8/3", "22/18 6/1*", "13/9 6/1*", "8/4 6/1*", "6/2 6/1*"] + +# Apply action "13/8/4" +action: 978 + +# State 6 +# Apply action "chance outcome 6 (roll: 24)" +action: 6 + +# State 7 +# +------|------+ +# |o...xo|x.x..o| +# |o...x.|x.....| +# |o...x.|x.....| +# |o.....|x.....| +# |o.....|x.....| +# | | | +# |......|......| +# |x.....|o.....| +# |x...o.|o.....| +# |x...o.|o.....| +# |x...o.|o..xxo| +# +------|------+ +# Turn: o +# Dice: 24 +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [0, 0, 14, 153, 12, 978, 6] +HistoryString() = "0, 0, 14, 153, 12, 978, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "+------|------+\n|o...xo|x.x..o|\n|o...x.|x.....|\n|o...x.|x.....|\n|o.....|x.....|\n|o.....|x.....|\n| | |\n|......|......|\n|x.....|o.....|\n|x...o.|o.....|\n|x...o.|o.....|\n|x...o.|o..xxo|\n+------|------+\nTurn: o\nDice: 24\nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|o...xo|x.x..o|\n|o...x.|x.....|\n|o...x.|x.....|\n|o.....|x.....|\n|o.....|x.....|\n| | |\n|......|......|\n|x.....|o.....|\n|x...o.|o.....|\n|x...o.|o.....|\n|x...o.|o..xxo|\n+------|------+\nTurn: o\nDice: 24\nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0] +ObservationTensor(1) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [85, 187, 220, 317, 319, 447, 449, 454, 517, 603, 605, 610, 615, 811, 863, 865, 948, 993, 995, 1000, 1123, 1125, 1130, 1245, 1279, 1281, 1286, 1291] +StringLegalActions() = ["8/4/2*", "8/6/2*", "13/9/7", "13/11 6/2*", "13/11 8/4", "18/16 6/2*", "18/16 8/4", "18/16 13/9", "24/20/18", "24/22 6/2*", "24/22 8/4", "24/22 13/9", "24/22 18/14", "6/4 6/2*", "8/4 6/4", "8/6 8/4", "13/11/7", "13/9 6/4", "13/9 8/6", "13/11 13/9", "18/14 6/4", "18/14 8/6", "18/14 13/11", "24/22/18", "24/20 6/4", "24/20 8/6", "24/20 13/11", "24/20 18/16"] + +# Apply action "24/20 8/6" +action: 1281 + +# State 8 +# Apply action "chance outcome 20 (roll: 66)" +action: 20 + +# State 9 +# +------|------+ +# |o...xo|xox...| +# |o...x.|x.....| +# |o...x.|x.....| +# |o.....|x.....| +# |o.....|x.....| +# | | | +# |......|o.....| +# |x.....|o.....| +# |x.....|o.....| +# |x...o.|o.....| +# |x...o.|o..xxo| +# +------|------+ +# Turn: x +# Dice: 66 +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [0, 0, 14, 153, 12, 978, 6, 1281, 20] +HistoryString() = "0, 0, 14, 153, 12, 978, 6, 1281, 20" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "+------|------+\n|o...xo|xox...|\n|o...x.|x.....|\n|o...x.|x.....|\n|o.....|x.....|\n|o.....|x.....|\n| | |\n|......|o.....|\n|x.....|o.....|\n|x.....|o.....|\n|x...o.|o.....|\n|x...o.|o..xxo|\n+------|------+\nTurn: x\nDice: 66\nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|o...xo|xox...|\n|o...x.|x.....|\n|o...x.|x.....|\n|o.....|x.....|\n|o.....|x.....|\n| | |\n|......|o.....|\n|x.....|o.....|\n|x.....|o.....|\n|x...o.|o.....|\n|x...o.|o..xxo|\n+------|------+\nTurn: x\nDice: 66\nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 6.0, 6.0] +ObservationTensor(1) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 6.0, 6.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [210, 288, 297, 418, 427, 432, 453] +StringLegalActions() = ["22/16/10", "22/16 13/7*", "13/7*(2)", "22/16 8/2", "13/7* 8/2", "8/2(2)", "13/7*/1"] + +# Apply action "22/16 8/2" +action: 418 + +# State 10 +# Apply action "16/10 8/2" +action: 424 + +# State 11 +# Apply action "chance outcome 9 (roll: 34)" +action: 9 + +# State 12 +# +------|------+ +# |o.x.xo|xox.x.| +# |o.....|x...x.| +# |o.....|x.....| +# |o.....|x.....| +# |o.....|x.....| +# | | | +# |......|o.....| +# |x.....|o.....| +# |x.....|o.....| +# |x...o.|o.....| +# |x...o.|o...xo| +# +------|------+ +# Turn: o +# Dice: 34 +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [0, 0, 14, 153, 12, 978, 6, 1281, 20, 418, 424, 9] +HistoryString() = "0, 0, 14, 153, 12, 978, 6, 1281, 20, 418, 424, 9" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "+------|------+\n|o.x.xo|xox.x.|\n|o.....|x...x.|\n|o.....|x.....|\n|o.....|x.....|\n|o.....|x.....|\n| | |\n|......|o.....|\n|x.....|o.....|\n|x.....|o.....|\n|x...o.|o.....|\n|x...o.|o...xo|\n+------|------+\nTurn: o\nDice: 34\nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|o.x.xo|xox.x.|\n|o.....|x...x.|\n|o.....|x.....|\n|o.....|x.....|\n|o.....|x.....|\n| | |\n|......|o.....|\n|x.....|o.....|\n|x.....|o.....|\n|x...o.|o.....|\n|x...o.|o...xo|\n+------|------+\nTurn: o\nDice: 34\nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0] +ObservationTensor(1) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 4.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [85, 187, 220, 317, 319, 355, 409, 447, 449, 454, 499, 501, 506, 511, 787, 811, 863, 865, 922, 993, 995, 1000, 1057, 1111, 1123, 1125, 1130, 1175, 1177, 1182, 1187] +StringLegalActions() = ["8/4/1", "8/5 6/2*", "13/9/6", "13/10 6/2*", "13/10 8/4", "18/14/11", "20/16/13", "18/15* 6/2*", "18/15* 8/4", "18/15* 13/9", "20/17* 6/2*", "20/17* 8/4", "20/17* 13/9", "20/17* 18/14", "8/5/1", "6/3 6/2*", "8/4 6/3", "8/5 8/4", "13/10/6", "13/9 6/3", "13/9 8/5", "13/10 13/9", "18/15*/11", "20/17*/13", "18/14 6/3", "18/14 8/5", "18/14 13/10", "20/16 6/3", "20/16 8/5", "20/16 13/10", "20/16 18/15*"] + +# Apply action "13/10 6/2*" +action: 317 + +# State 13 +# Apply action "chance outcome 6 (roll: 24)" +action: 6 + +# State 14 +# Apply action "Bar/21 8/6" +action: 440 + +# State 15 +# Apply action "chance outcome 16 (roll: 22)" +action: 16 + +# State 16 +# Apply action "10/8/6" +action: 191 + +# State 17 +# Apply action "20/18 8/6" +action: 501 + +# State 18 +# Apply action "chance outcome 0 (roll: 12)" +action: 0 + +# State 19 +# Apply action "6/5 4/2" +action: 488 + +# State 20 +# Apply action "chance outcome 19 (roll: 55)" +action: 19 + +# State 21 +# Apply action "18/13(2)" +action: 459 + +# State 22 +# Apply action "13/8(2)" +action: 324 + +# State 23 +# Apply action "chance outcome 6 (roll: 24)" +action: 6 + +# State 24 +# Apply action "10/8 6/2" +action: 1158 + +# State 25 +# Apply action "chance outcome 7 (roll: 25)" +action: 7 + +# State 26 +# Apply action "8/3 6/4*" +action: 137 + +# State 27 +# Apply action "chance outcome 6 (roll: 24)" +action: 6 + +# State 28 +# Apply action "Bar/21* 8/6" +action: 440 + +# State 29 +# Apply action "chance outcome 6 (roll: 24)" +action: 6 + +# State 30 +# Apply action "Bar/21 13/11" +action: 336 + +# State 31 +# Apply action "chance outcome 18 (roll: 44)" +action: 18 + +# State 32 +# Apply action "13/9/5" +action: 401 + +# State 33 +# Apply action "6/2(2)" +action: 486 + +# State 34 +# Apply action "chance outcome 7 (roll: 25)" +action: 7 + +# State 35 +# Apply action "13/11/6" +action: 322 + +# State 36 +# Apply action "chance outcome 20 (roll: 66)" +action: 20 + +# State 37 +# +------|------+ +# |o.....|xxo.6.| +# |o.....|xx..x.| +# |......|x...x.| +# |......|....x.| +# |......|....x.| +# | | | +# |......|o.....| +# |......|o.....| +# |x.....|o.....| +# |x...o.|o.....| +# |xo..o.|6.xooo| +# +------|------+ +# Turn: x +# Dice: 66 +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [0, 0, 14, 153, 12, 978, 6, 1281, 20, 418, 424, 9, 317, 6, 440, 16, 191, 501, 0, 488, 19, 459, 324, 6, 1158, 7, 137, 6, 440, 6, 336, 18, 401, 486, 7, 322, 20] +HistoryString() = "0, 0, 14, 153, 12, 978, 6, 1281, 20, 418, 424, 9, 317, 6, 440, 16, 191, 501, 0, 488, 19, 459, 324, 6, 1158, 7, 137, 6, 440, 6, 336, 18, 401, 486, 7, 322, 20" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "+------|------+\n|o.....|xxo.6.|\n|o.....|xx..x.|\n|......|x...x.|\n|......|....x.|\n|......|....x.|\n| | |\n|......|o.....|\n|......|o.....|\n|x.....|o.....|\n|x...o.|o.....|\n|xo..o.|6.xooo|\n+------|------+\nTurn: x\nDice: 66\nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|o.....|xxo.6.|\n|o.....|xx..x.|\n|......|x...x.|\n|......|....x.|\n|......|....x.|\n| | |\n|......|o.....|\n|......|o.....|\n|x.....|o.....|\n|x...o.|o.....|\n|xo..o.|6.xooo|\n+------|------+\nTurn: x\nDice: 66\nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 6.0, 6.0] +ObservationTensor(1) = [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 6.0, 6.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [237, 289, 297, 453] +StringLegalActions() = ["21/15/9", "21/15 13/7", "13/7(2)", "13/7/1"] + +# Apply action "21/15 13/7" +action: 289 + +# State 38 +# Apply action "15/9/3" +action: 399 + +# State 39 +# Apply action "chance outcome 7 (roll: 25)" +action: 7 + +# State 40 +# +------|------+ +# |o....x|xxox6.| +# |o.....|xx..x.| +# |......|x...x.| +# |......|....x.| +# |......|....x.| +# | | | +# |......|o.....| +# |......|o.....| +# |......|o.....| +# |x...o.|o.....| +# |xo..o.|6..ooo| +# +------|------+ +# Turn: o +# Dice: 25 +# Bar: +# Scores, X: 0, O: 0 +IsTerminal() = False +History() = [0, 0, 14, 153, 12, 978, 6, 1281, 20, 418, 424, 9, 317, 6, 440, 16, 191, 501, 0, 488, 19, 459, 324, 6, 1158, 7, 137, 6, 440, 6, 336, 18, 401, 486, 7, 322, 20, 289, 399, 7] +HistoryString() = "0, 0, 14, 153, 12, 978, 6, 1281, 20, 418, 424, 9, 317, 6, 440, 16, 191, 501, 0, 488, 19, 459, 324, 6, 1158, 7, 137, 6, 440, 6, 336, 18, 401, 486, 7, 322, 20, 289, 399, 7" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "+------|------+\n|o....x|xxox6.|\n|o.....|xx..x.|\n|......|x...x.|\n|......|....x.|\n|......|....x.|\n| | |\n|......|o.....|\n|......|o.....|\n|......|o.....|\n|x...o.|o.....|\n|xo..o.|6..ooo|\n+------|------+\nTurn: o\nDice: 25\nBar:\nScores, X: 0, O: 0\n" +ObservationString(1) = "+------|------+\n|o....x|xxox6.|\n|o.....|xx..x.|\n|......|x...x.|\n|......|....x.|\n|......|....x.|\n| | |\n|......|o.....|\n|......|o.....|\n|......|o.....|\n|x...o.|o.....|\n|xo..o.|6..ooo|\n+------|------+\nTurn: o\nDice: 25\nBar:\nScores, X: 0, O: 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0] +ObservationTensor(1) = [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 5.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [187, 265, 267, 317, 319, 322, 410, 808, 811, 860, 863, 865, 894, 938, 941, 943, 990, 993, 995, 998, 1000, 1198, 1201, 1203, 1206, 1208] +StringLegalActions() = ["8/6/1", "11/9 6/1", "11/9 8/3", "13/11 6/1", "13/11 8/3", "13/11/6", "21/16/14", "6/1 3/1", "6/4 6/1", "8/3/1", "8/3 6/4", "8/6 8/3", "11/9/4", "11/6 3/1", "11/6/4", "11/6 8/6", "13/8 3/1", "13/8 6/4", "13/8/6", "13/8 11/9", "13/11 13/8", "21/16 3/1", "21/16 6/4", "21/16 8/6", "21/16 11/9", "21/16 13/11"] + +# Apply action "21/16 11/9" +action: 280 + +# State 41 +# Apply action "chance outcome 8 (roll: 26)" +action: 8 + +# State 42 +# Apply action "13/11 13/7" +action: 297 + +# State 43 +# Apply action "chance outcome 6 (roll: 24)" +action: 6 + +# State 44 +# Apply action "9/5 3/1" +action: 60 + +# State 45 +# Apply action "chance outcome 11 (roll: 36)" +action: 11 + +# State 46 +# Apply action "11/5 6/3" +action: 1032 + +# State 47 +# Apply action "chance outcome 16 (roll: 22)" +action: 16 + +# State 48 +# Apply action "6/4/2" +action: 83 + +# State 49 +# Apply action "16/14 13/11" +action: 402 + +# State 50 +# Apply action "chance outcome 10 (roll: 35)" +action: 10 + +# State 51 +# Apply action "7/4 7/2" +action: 459 + +# State 52 +# Apply action "chance outcome 3 (roll: 15)" +action: 3 + +# State 53 +# Apply action "14/13 6/1" +action: 819 + +# State 54 +# Apply action "chance outcome 16 (roll: 22)" +action: 16 + +# State 55 +# Apply action "6/4 5/3" +action: 512 + +# State 56 +# Apply action "4/2 3/1" +action: 541 + +# State 57 +# Apply action "chance outcome 15 (roll: 11)" +action: 15 + +# State 58 +# Apply action "11/10 5/4" +action: 264 + +# State 59 +# Apply action "6/5 4/3" +action: 83 + +# State 60 +# Apply action "chance outcome 11 (roll: 36)" +action: 11 + +# State 61 +# Apply action "6/Off 3/Off" +action: 564 + +# State 62 +# Apply action "chance outcome 15 (roll: 11)" +action: 15 + +# State 63 +# Apply action "13/12 3/2" +action: 64 + +# State 64 +# Apply action "10/9 8/7" +action: 241 + +# State 65 +# Apply action "chance outcome 10 (roll: 35)" +action: 10 + +# State 66 +# Apply action "5/2 5/Off" +action: 1189 + +# State 67 +# Apply action "chance outcome 7 (roll: 25)" +action: 7 + +# State 68 +# Apply action "12/10 8/3" +action: 293 + +# State 69 +# Apply action "chance outcome 9 (roll: 34)" +action: 9 + +# State 70 +# Apply action "4/Off 3/Off" +action: 566 + +# State 71 +# Apply action "chance outcome 6 (roll: 24)" +action: 6 + +# State 72 +# +------|------+ +# |o.....|....9x| +# |......|....x.| +# |......|....x.| +# |......|....x.| +# |......|....x.| +# | | | +# |......|......| +# |......|......| +# |......|o...oo| +# |......|o...oo| +# |..oo.o|oo.ooo| +# +------|------+ +# Turn: o +# Dice: 24 +# Bar: +# Scores, X: 5, O: 0 +IsTerminal() = False +History() = [0, 0, 14, 153, 12, 978, 6, 1281, 20, 418, 424, 9, 317, 6, 440, 16, 191, 501, 0, 488, 19, 459, 324, 6, 1158, 7, 137, 6, 440, 6, 336, 18, 401, 486, 7, 322, 20, 289, 399, 7, 280, 8, 297, 6, 60, 11, 1032, 16, 83, 402, 10, 459, 3, 819, 16, 512, 541, 15, 264, 83, 11, 564, 15, 64, 241, 10, 1189, 7, 293, 9, 566, 6] +HistoryString() = "0, 0, 14, 153, 12, 978, 6, 1281, 20, 418, 424, 9, 317, 6, 440, 16, 191, 501, 0, 488, 19, 459, 324, 6, 1158, 7, 137, 6, 440, 6, 336, 18, 401, 486, 7, 322, 20, 289, 399, 7, 280, 8, 297, 6, 60, 11, 1032, 16, 83, 402, 10, 459, 3, 819, 16, 512, 541, 15, 264, 83, 11, 564, 15, 64, 241, 10, 1189, 7, 293, 9, 566, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "+------|------+\n|o.....|....9x|\n|......|....x.|\n|......|....x.|\n|......|....x.|\n|......|....x.|\n| | |\n|......|......|\n|......|......|\n|......|o...oo|\n|......|o...oo|\n|..oo.o|oo.ooo|\n+------|------+\nTurn: o\nDice: 24\nBar:\nScores, X: 5, O: 0\n" +ObservationString(1) = "+------|------+\n|o.....|....9x|\n|......|....x.|\n|......|....x.|\n|......|....x.|\n|......|....x.|\n| | |\n|......|......|\n|......|......|\n|......|o...oo|\n|......|o...oo|\n|..oo.o|oo.ooo|\n+------|------+\nTurn: o\nDice: 24\nBar:\nScores, X: 5, O: 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0] +ObservationTensor(1) = [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 0.0, 2.0, 4.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [134, 160, 161, 212, 213, 214, 238, 239, 240, 242, 316, 317, 318, 320, 321, 782, 808, 810, 811, 834, 836, 837, 867, 886, 888, 889, 890, 912, 914, 915, 916, 918, 948, 990, 992, 993, 994, 996, 997] +StringLegalActions() = ["6/4 5/1", "7/5/1", "7/5 6/2", "9/7 5/1", "9/7 6/2", "9/7/3", "10/8 5/1", "10/8 6/2", "10/8 7/3", "10/8 9/5", "13/11 5/1", "13/11 6/2", "13/11 7/3", "13/11 9/5", "13/11 10/6", "5/1 3/1", "6/2 3/1", "6/2 5/3", "6/4 6/2", "7/3/1", "7/3 5/3", "7/3 6/4", "10/8/4", "9/5 3/1", "9/5/3", "9/5 6/4", "9/5 7/5", "10/6 3/1", "10/6 5/3", "10/6/4", "10/6 7/5", "10/6 9/7", "13/11/7", "13/9 3/1", "13/9 5/3", "13/9 6/4", "13/9 7/5", "13/9/7", "13/9 10/8"] + +# Apply action "5/1 3/1" +action: 56 + +# State 73 +# Apply action "chance outcome 3 (roll: 15)" +action: 3 + +# State 74 +# +------|------+ +# |o.....|....9x| +# |......|....x.| +# |......|....x.| +# |......|....x.| +# |......|....x.| +# | | | +# |......|.....o| +# |......|.....o| +# |......|o...oo| +# |......|o...oo| +# |..oo.o|o...oo| +# +------|------+ +# Turn: x +# Dice: 15 +# Bar: +# Scores, X: 5, O: 0 +IsTerminal() = False +History() = [0, 0, 14, 153, 12, 978, 6, 1281, 20, 418, 424, 9, 317, 6, 440, 16, 191, 501, 0, 488, 19, 459, 324, 6, 1158, 7, 137, 6, 440, 6, 336, 18, 401, 486, 7, 322, 20, 289, 399, 7, 280, 8, 297, 6, 60, 11, 1032, 16, 83, 402, 10, 459, 3, 819, 16, 512, 541, 15, 264, 83, 11, 564, 15, 64, 241, 10, 1189, 7, 293, 9, 566, 6, 56, 3] +HistoryString() = "0, 0, 14, 153, 12, 978, 6, 1281, 20, 418, 424, 9, 317, 6, 440, 16, 191, 501, 0, 488, 19, 459, 324, 6, 1158, 7, 137, 6, 440, 6, 336, 18, 401, 486, 7, 322, 20, 289, 399, 7, 280, 8, 297, 6, 60, 11, 1032, 16, 83, 402, 10, 459, 3, 819, 16, 512, 541, 15, 264, 83, 11, 564, 15, 64, 241, 10, 1189, 7, 293, 9, 566, 6, 56, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "+------|------+\n|o.....|....9x|\n|......|....x.|\n|......|....x.|\n|......|....x.|\n|......|....x.|\n| | |\n|......|.....o|\n|......|.....o|\n|......|o...oo|\n|......|o...oo|\n|..oo.o|o...oo|\n+------|------+\nTurn: x\nDice: 15\nBar:\nScores, X: 5, O: 0\n" +ObservationString(1) = "+------|------+\n|o.....|....9x|\n|......|....x.|\n|......|....x.|\n|......|....x.|\n|......|....x.|\n| | |\n|......|.....o|\n|......|.....o|\n|......|o...oo|\n|......|o...oo|\n|..oo.o|o...oo|\n+------|------+\nTurn: x\nDice: 15\nBar:\nScores, X: 5, O: 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 1.0, 0.0, 0.0, 0.0, 1.0, 5.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 1.0, 1.0, 5.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [620, 1270] +StringLegalActions() = ["2/Off 1/Off", "2/1 2/Off"] + +# Apply action "2/1 2/Off" +action: 1270 + +# State 75 +# Apply action "chance outcome 12 (roll: 45)" +action: 12 + +# State 76 +# Apply action "7/3 6/1" +action: 161 + +# State 77 +# Apply action "chance outcome 19 (roll: 55)" +action: 19 + +# State 78 +# Apply action "2/Off(2)" +action: 594 + +# State 79 +# Apply action "2/Off(2)" +action: 594 + +# State 80 +# Apply action "chance outcome 16 (roll: 22)" +action: 16 + +# State 81 +# Apply action "9/7/5" +action: 164 + +# State 82 +# Apply action "10/8 6/4" +action: 239 + +# State 83 +# Apply action "chance outcome 9 (roll: 34)" +action: 9 + +# State 84 +# Apply action "2/Off(2)" +action: 1270 + +# State 85 +# Apply action "chance outcome 0 (roll: 12)" +action: 0 + +# State 86 +# Apply action "13/11 4/3" +action: 90 + +# State 87 +# Apply action "chance outcome 14 (roll: 56)" +action: 14 + +# State 88 +# Apply action "2/Off 1/Off" +action: 620 + +# State 89 +# Apply action "chance outcome 13 (roll: 46)" +action: 13 + +# State 90 +# Apply action "8/2 5/1" +action: 862 + +# State 91 +# Apply action "chance outcome 6 (roll: 24)" +action: 6 + +# State 92 +# Apply action "1/Off Pass" +action: 673 + +# State 93 +# +------|------+ +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# |......|......| +# | | | +# |......|.....o| +# |......|....oo| +# |......|....oo| +# |......|...ooo| +# |.o....|o..oo7| +# +------|------+ +# Turn: * +# Dice: +# Bar: +# Scores, X: 15, O: 0 +IsTerminal() = True +History() = [0, 0, 14, 153, 12, 978, 6, 1281, 20, 418, 424, 9, 317, 6, 440, 16, 191, 501, 0, 488, 19, 459, 324, 6, 1158, 7, 137, 6, 440, 6, 336, 18, 401, 486, 7, 322, 20, 289, 399, 7, 280, 8, 297, 6, 60, 11, 1032, 16, 83, 402, 10, 459, 3, 819, 16, 512, 541, 15, 264, 83, 11, 564, 15, 64, 241, 10, 1189, 7, 293, 9, 566, 6, 56, 3, 1270, 12, 161, 19, 594, 594, 16, 164, 239, 9, 1270, 0, 90, 14, 620, 13, 862, 6, 673] +HistoryString() = "0, 0, 14, 153, 12, 978, 6, 1281, 20, 418, 424, 9, 317, 6, 440, 16, 191, 501, 0, 488, 19, 459, 324, 6, 1158, 7, 137, 6, 440, 6, 336, 18, 401, 486, 7, 322, 20, 289, 399, 7, 280, 8, 297, 6, 60, 11, 1032, 16, 83, 402, 10, 459, 3, 819, 16, 512, 541, 15, 264, 83, 11, 564, 15, 64, 241, 10, 1189, 7, 293, 9, 566, 6, 56, 3, 1270, 12, 161, 19, 594, 594, 16, 164, 239, 9, 1270, 0, 90, 14, 620, 13, 862, 6, 673" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "+------|------+\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|.....o|\n|......|....oo|\n|......|....oo|\n|......|...ooo|\n|.o....|o..oo7|\n+------|------+\nTurn: *\nDice: \nBar:\nScores, X: 15, O: 0\n" +ObservationString(1) = "+------|------+\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n|......|......|\n| | |\n|......|.....o|\n|......|....oo|\n|......|....oo|\n|......|...ooo|\n|.o....|o..oo7|\n+------|------+\nTurn: *\nDice: \nBar:\nScores, X: 15, O: 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 15.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 15.0, 0.0, 0.0, 0.0] +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/bargaining.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/bargaining.txt new file mode 100644 index 0000000..6af09c7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/bargaining.txt @@ -0,0 +1,4265 @@ +game: bargaining + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Bargaining" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["discount", "instances_file", "max_turns", "prob_end"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "bargaining" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 121 +PolicyTensorShape() = [121] +MaxChanceOutcomes() = 1002 +GetParameters() = {discount=1.0,instances_file=,max_turns=10,prob_end=0.0} +NumPlayers() = 2 +MinUtility() = 0.0 +MaxUtility() = 10.0 +UtilitySum() = None +InformationStateTensorShape() = [309] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 309 +ObservationTensorShape() = [93] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 93 +MaxGameLength() = 10 +ToString() = "bargaining()" + +# State 0 +# Initial chance node +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "Initial chance node" +InformationStateString(1) = "Initial chance node" +InformationStateTensor(0): zeros(309) +InformationStateTensor(1): zeros(309) +ObservationString(0) = "Initial chance node" +ObservationString(1) = "Initial chance node" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.001), (1,0.001), (2,0.001), (3,0.001), (4,0.001), (5,0.001), (6,0.001), (7,0.001), (8,0.001), (9,0.001), (10,0.001), (11,0.001), (12,0.001), (13,0.001), (14,0.001), (15,0.001), (16,0.001), (17,0.001), (18,0.001), (19,0.001), (20,0.001), (21,0.001), (22,0.001), (23,0.001), (24,0.001), (25,0.001), (26,0.001), (27,0.001), (28,0.001), (29,0.001), (30,0.001), (31,0.001), (32,0.001), (33,0.001), (34,0.001), (35,0.001), (36,0.001), (37,0.001), (38,0.001), (39,0.001), (40,0.001), (41,0.001), (42,0.001), (43,0.001), (44,0.001), (45,0.001), (46,0.001), (47,0.001), (48,0.001), (49,0.001), (50,0.001), (51,0.001), (52,0.001), (53,0.001), (54,0.001), (55,0.001), (56,0.001), (57,0.001), (58,0.001), (59,0.001), (60,0.001), (61,0.001), (62,0.001), (63,0.001), (64,0.001), (65,0.001), (66,0.001), (67,0.001), (68,0.001), (69,0.001), (70,0.001), (71,0.001), (72,0.001), (73,0.001), (74,0.001), (75,0.001), (76,0.001), (77,0.001), (78,0.001), (79,0.001), (80,0.001), (81,0.001), (82,0.001), (83,0.001), (84,0.001), (85,0.001), (86,0.001), (87,0.001), (88,0.001), (89,0.001), (90,0.001), (91,0.001), (92,0.001), (93,0.001), (94,0.001), (95,0.001), (96,0.001), (97,0.001), (98,0.001), (99,0.001), (100,0.001), (101,0.001), (102,0.001), (103,0.001), (104,0.001), (105,0.001), (106,0.001), (107,0.001), (108,0.001), (109,0.001), (110,0.001), (111,0.001), (112,0.001), (113,0.001), (114,0.001), (115,0.001), (116,0.001), (117,0.001), (118,0.001), (119,0.001), (120,0.001), (121,0.001), (122,0.001), (123,0.001), (124,0.001), (125,0.001), (126,0.001), (127,0.001), (128,0.001), (129,0.001), (130,0.001), (131,0.001), (132,0.001), (133,0.001), (134,0.001), (135,0.001), (136,0.001), (137,0.001), (138,0.001), (139,0.001), (140,0.001), (141,0.001), (142,0.001), (143,0.001), (144,0.001), (145,0.001), (146,0.001), (147,0.001), (148,0.001), (149,0.001), (150,0.001), (151,0.001), (152,0.001), (153,0.001), (154,0.001), (155,0.001), (156,0.001), (157,0.001), (158,0.001), (159,0.001), (160,0.001), (161,0.001), (162,0.001), (163,0.001), (164,0.001), (165,0.001), (166,0.001), (167,0.001), (168,0.001), (169,0.001), (170,0.001), (171,0.001), (172,0.001), (173,0.001), (174,0.001), (175,0.001), (176,0.001), (177,0.001), (178,0.001), (179,0.001), (180,0.001), (181,0.001), (182,0.001), (183,0.001), (184,0.001), (185,0.001), (186,0.001), (187,0.001), (188,0.001), (189,0.001), (190,0.001), (191,0.001), (192,0.001), (193,0.001), (194,0.001), (195,0.001), (196,0.001), (197,0.001), (198,0.001), (199,0.001), (200,0.001), (201,0.001), (202,0.001), (203,0.001), (204,0.001), (205,0.001), (206,0.001), (207,0.001), (208,0.001), (209,0.001), (210,0.001), (211,0.001), (212,0.001), (213,0.001), (214,0.001), (215,0.001), (216,0.001), (217,0.001), (218,0.001), (219,0.001), (220,0.001), (221,0.001), (222,0.001), (223,0.001), (224,0.001), (225,0.001), (226,0.001), (227,0.001), (228,0.001), (229,0.001), (230,0.001), (231,0.001), (232,0.001), (233,0.001), (234,0.001), (235,0.001), (236,0.001), (237,0.001), (238,0.001), (239,0.001), (240,0.001), (241,0.001), (242,0.001), (243,0.001), (244,0.001), (245,0.001), (246,0.001), (247,0.001), (248,0.001), (249,0.001), (250,0.001), (251,0.001), (252,0.001), (253,0.001), (254,0.001), (255,0.001), (256,0.001), (257,0.001), (258,0.001), (259,0.001), (260,0.001), (261,0.001), (262,0.001), (263,0.001), (264,0.001), (265,0.001), (266,0.001), (267,0.001), (268,0.001), (269,0.001), (270,0.001), (271,0.001), (272,0.001), (273,0.001), (274,0.001), (275,0.001), (276,0.001), (277,0.001), (278,0.001), (279,0.001), (280,0.001), (281,0.001), (282,0.001), (283,0.001), (284,0.001), (285,0.001), (286,0.001), (287,0.001), (288,0.001), (289,0.001), (290,0.001), (291,0.001), (292,0.001), (293,0.001), (294,0.001), (295,0.001), (296,0.001), (297,0.001), (298,0.001), (299,0.001), (300,0.001), (301,0.001), (302,0.001), (303,0.001), (304,0.001), (305,0.001), (306,0.001), (307,0.001), (308,0.001), (309,0.001), (310,0.001), (311,0.001), (312,0.001), (313,0.001), (314,0.001), (315,0.001), (316,0.001), (317,0.001), (318,0.001), (319,0.001), (320,0.001), (321,0.001), (322,0.001), (323,0.001), (324,0.001), (325,0.001), (326,0.001), (327,0.001), (328,0.001), (329,0.001), (330,0.001), (331,0.001), (332,0.001), (333,0.001), (334,0.001), (335,0.001), (336,0.001), (337,0.001), (338,0.001), (339,0.001), (340,0.001), (341,0.001), (342,0.001), (343,0.001), (344,0.001), (345,0.001), (346,0.001), (347,0.001), (348,0.001), (349,0.001), (350,0.001), (351,0.001), (352,0.001), (353,0.001), (354,0.001), (355,0.001), (356,0.001), (357,0.001), (358,0.001), (359,0.001), (360,0.001), (361,0.001), (362,0.001), (363,0.001), (364,0.001), (365,0.001), (366,0.001), (367,0.001), (368,0.001), (369,0.001), (370,0.001), (371,0.001), (372,0.001), (373,0.001), (374,0.001), (375,0.001), (376,0.001), (377,0.001), (378,0.001), (379,0.001), (380,0.001), (381,0.001), (382,0.001), (383,0.001), (384,0.001), (385,0.001), (386,0.001), (387,0.001), (388,0.001), (389,0.001), (390,0.001), (391,0.001), (392,0.001), (393,0.001), (394,0.001), (395,0.001), (396,0.001), (397,0.001), (398,0.001), (399,0.001), (400,0.001), (401,0.001), (402,0.001), (403,0.001), (404,0.001), (405,0.001), (406,0.001), (407,0.001), (408,0.001), (409,0.001), (410,0.001), (411,0.001), (412,0.001), (413,0.001), (414,0.001), (415,0.001), (416,0.001), (417,0.001), (418,0.001), (419,0.001), (420,0.001), (421,0.001), (422,0.001), (423,0.001), (424,0.001), (425,0.001), (426,0.001), (427,0.001), (428,0.001), (429,0.001), (430,0.001), (431,0.001), (432,0.001), (433,0.001), (434,0.001), (435,0.001), (436,0.001), (437,0.001), (438,0.001), (439,0.001), (440,0.001), (441,0.001), (442,0.001), (443,0.001), (444,0.001), (445,0.001), (446,0.001), (447,0.001), (448,0.001), (449,0.001), (450,0.001), (451,0.001), (452,0.001), (453,0.001), (454,0.001), (455,0.001), (456,0.001), (457,0.001), (458,0.001), (459,0.001), (460,0.001), (461,0.001), (462,0.001), (463,0.001), (464,0.001), (465,0.001), (466,0.001), (467,0.001), (468,0.001), (469,0.001), (470,0.001), (471,0.001), (472,0.001), (473,0.001), (474,0.001), (475,0.001), (476,0.001), (477,0.001), (478,0.001), (479,0.001), (480,0.001), (481,0.001), (482,0.001), (483,0.001), (484,0.001), (485,0.001), (486,0.001), (487,0.001), (488,0.001), (489,0.001), (490,0.001), (491,0.001), (492,0.001), (493,0.001), (494,0.001), (495,0.001), (496,0.001), (497,0.001), (498,0.001), (499,0.001), (500,0.001), (501,0.001), (502,0.001), (503,0.001), (504,0.001), (505,0.001), (506,0.001), (507,0.001), (508,0.001), (509,0.001), (510,0.001), (511,0.001), (512,0.001), (513,0.001), (514,0.001), (515,0.001), (516,0.001), (517,0.001), (518,0.001), (519,0.001), (520,0.001), (521,0.001), (522,0.001), (523,0.001), (524,0.001), (525,0.001), (526,0.001), (527,0.001), (528,0.001), (529,0.001), (530,0.001), (531,0.001), (532,0.001), (533,0.001), (534,0.001), (535,0.001), (536,0.001), (537,0.001), (538,0.001), (539,0.001), (540,0.001), (541,0.001), (542,0.001), (543,0.001), (544,0.001), (545,0.001), (546,0.001), (547,0.001), (548,0.001), (549,0.001), (550,0.001), (551,0.001), (552,0.001), (553,0.001), (554,0.001), (555,0.001), (556,0.001), (557,0.001), (558,0.001), (559,0.001), (560,0.001), (561,0.001), (562,0.001), (563,0.001), (564,0.001), (565,0.001), (566,0.001), (567,0.001), (568,0.001), (569,0.001), (570,0.001), (571,0.001), (572,0.001), (573,0.001), (574,0.001), (575,0.001), (576,0.001), (577,0.001), (578,0.001), (579,0.001), (580,0.001), (581,0.001), (582,0.001), (583,0.001), (584,0.001), (585,0.001), (586,0.001), (587,0.001), (588,0.001), (589,0.001), (590,0.001), (591,0.001), (592,0.001), (593,0.001), (594,0.001), (595,0.001), (596,0.001), (597,0.001), (598,0.001), (599,0.001), (600,0.001), (601,0.001), (602,0.001), (603,0.001), (604,0.001), (605,0.001), (606,0.001), (607,0.001), (608,0.001), (609,0.001), (610,0.001), (611,0.001), (612,0.001), (613,0.001), (614,0.001), (615,0.001), (616,0.001), (617,0.001), (618,0.001), (619,0.001), (620,0.001), (621,0.001), (622,0.001), (623,0.001), (624,0.001), (625,0.001), (626,0.001), (627,0.001), (628,0.001), (629,0.001), (630,0.001), (631,0.001), (632,0.001), (633,0.001), (634,0.001), (635,0.001), (636,0.001), (637,0.001), (638,0.001), (639,0.001), (640,0.001), (641,0.001), (642,0.001), (643,0.001), (644,0.001), (645,0.001), (646,0.001), (647,0.001), (648,0.001), (649,0.001), (650,0.001), (651,0.001), (652,0.001), (653,0.001), (654,0.001), (655,0.001), (656,0.001), (657,0.001), (658,0.001), (659,0.001), (660,0.001), (661,0.001), (662,0.001), (663,0.001), (664,0.001), (665,0.001), (666,0.001), (667,0.001), (668,0.001), (669,0.001), (670,0.001), (671,0.001), (672,0.001), (673,0.001), (674,0.001), (675,0.001), (676,0.001), (677,0.001), (678,0.001), (679,0.001), (680,0.001), (681,0.001), (682,0.001), (683,0.001), (684,0.001), (685,0.001), (686,0.001), (687,0.001), (688,0.001), (689,0.001), (690,0.001), (691,0.001), (692,0.001), (693,0.001), (694,0.001), (695,0.001), (696,0.001), (697,0.001), (698,0.001), (699,0.001), (700,0.001), (701,0.001), (702,0.001), (703,0.001), (704,0.001), (705,0.001), (706,0.001), (707,0.001), (708,0.001), (709,0.001), (710,0.001), (711,0.001), (712,0.001), (713,0.001), (714,0.001), (715,0.001), (716,0.001), (717,0.001), (718,0.001), (719,0.001), (720,0.001), (721,0.001), (722,0.001), (723,0.001), (724,0.001), (725,0.001), (726,0.001), (727,0.001), (728,0.001), (729,0.001), (730,0.001), (731,0.001), (732,0.001), (733,0.001), (734,0.001), (735,0.001), (736,0.001), (737,0.001), (738,0.001), (739,0.001), (740,0.001), (741,0.001), (742,0.001), (743,0.001), (744,0.001), (745,0.001), (746,0.001), (747,0.001), (748,0.001), (749,0.001), (750,0.001), (751,0.001), (752,0.001), (753,0.001), (754,0.001), (755,0.001), (756,0.001), (757,0.001), (758,0.001), (759,0.001), (760,0.001), (761,0.001), (762,0.001), (763,0.001), (764,0.001), (765,0.001), (766,0.001), (767,0.001), (768,0.001), (769,0.001), (770,0.001), (771,0.001), (772,0.001), (773,0.001), (774,0.001), (775,0.001), (776,0.001), (777,0.001), (778,0.001), (779,0.001), (780,0.001), (781,0.001), (782,0.001), (783,0.001), (784,0.001), (785,0.001), (786,0.001), (787,0.001), (788,0.001), (789,0.001), (790,0.001), (791,0.001), (792,0.001), (793,0.001), (794,0.001), (795,0.001), (796,0.001), (797,0.001), (798,0.001), (799,0.001), (800,0.001), (801,0.001), (802,0.001), (803,0.001), (804,0.001), (805,0.001), (806,0.001), (807,0.001), (808,0.001), (809,0.001), (810,0.001), (811,0.001), (812,0.001), (813,0.001), (814,0.001), (815,0.001), (816,0.001), (817,0.001), (818,0.001), (819,0.001), (820,0.001), (821,0.001), (822,0.001), (823,0.001), (824,0.001), (825,0.001), (826,0.001), (827,0.001), (828,0.001), (829,0.001), (830,0.001), (831,0.001), (832,0.001), (833,0.001), (834,0.001), (835,0.001), (836,0.001), (837,0.001), (838,0.001), (839,0.001), (840,0.001), (841,0.001), (842,0.001), (843,0.001), (844,0.001), (845,0.001), (846,0.001), (847,0.001), (848,0.001), (849,0.001), (850,0.001), (851,0.001), (852,0.001), (853,0.001), (854,0.001), (855,0.001), (856,0.001), (857,0.001), (858,0.001), (859,0.001), (860,0.001), (861,0.001), (862,0.001), (863,0.001), (864,0.001), (865,0.001), (866,0.001), (867,0.001), (868,0.001), (869,0.001), (870,0.001), (871,0.001), (872,0.001), (873,0.001), (874,0.001), (875,0.001), (876,0.001), (877,0.001), (878,0.001), (879,0.001), (880,0.001), (881,0.001), (882,0.001), (883,0.001), (884,0.001), (885,0.001), (886,0.001), (887,0.001), (888,0.001), (889,0.001), (890,0.001), (891,0.001), (892,0.001), (893,0.001), (894,0.001), (895,0.001), (896,0.001), (897,0.001), (898,0.001), (899,0.001), (900,0.001), (901,0.001), (902,0.001), (903,0.001), (904,0.001), (905,0.001), (906,0.001), (907,0.001), (908,0.001), (909,0.001), (910,0.001), (911,0.001), (912,0.001), (913,0.001), (914,0.001), (915,0.001), (916,0.001), (917,0.001), (918,0.001), (919,0.001), (920,0.001), (921,0.001), (922,0.001), (923,0.001), (924,0.001), (925,0.001), (926,0.001), (927,0.001), (928,0.001), (929,0.001), (930,0.001), (931,0.001), (932,0.001), (933,0.001), (934,0.001), (935,0.001), (936,0.001), (937,0.001), (938,0.001), (939,0.001), (940,0.001), (941,0.001), (942,0.001), (943,0.001), (944,0.001), (945,0.001), (946,0.001), (947,0.001), (948,0.001), (949,0.001), (950,0.001), (951,0.001), (952,0.001), (953,0.001), (954,0.001), (955,0.001), (956,0.001), (957,0.001), (958,0.001), (959,0.001), (960,0.001), (961,0.001), (962,0.001), (963,0.001), (964,0.001), (965,0.001), (966,0.001), (967,0.001), (968,0.001), (969,0.001), (970,0.001), (971,0.001), (972,0.001), (973,0.001), (974,0.001), (975,0.001), (976,0.001), (977,0.001), (978,0.001), (979,0.001), (980,0.001), (981,0.001), (982,0.001), (983,0.001), (984,0.001), (985,0.001), (986,0.001), (987,0.001), (988,0.001), (989,0.001), (990,0.001), (991,0.001), (992,0.001), (993,0.001), (994,0.001), (995,0.001), (996,0.001), (997,0.001), (998,0.001), (999,0.001)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999] +StringLegalActions() = ["Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 8, Hat: 1, Basketball: 0 +P1 vals: Book: 4, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 4, Hat: 1, Basketball: 2 +P1 vals: Book: 2, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 6 +P1 vals: Book: 0, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 9, Hat: 0, Basketball: 1 +P1 vals: Book: 2, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 5, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 1 +P1 vals: Book: 1, Hat: 0, Basketball: 6 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 4, Basketball: 3 +P1 vals: Book: 0, Hat: 2, Basketball: 8 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 1, Basketball: 3 +P1 vals: Book: 1, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 10, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 3, Basketball: 1 +P1 vals: Book: 4, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 6, Hat: 1, Basketball: 0 +P1 vals: Book: 8, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 7, Hat: 3, Basketball: 0 +P1 vals: Book: 0, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 4, Hat: 0, Basketball: 6 +P1 vals: Book: 3, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 3, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 1 +P1 vals: Book: 0, Hat: 2, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 8, Hat: 1, Basketball: 0 +P1 vals: Book: 7, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 6, Basketball: 2 +P1 vals: Book: 2, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 3, Hat: 2, Basketball: 1 +P1 vals: Book: 4, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 4, Hat: 2, Basketball: 0 +P1 vals: Book: 8, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 10, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 4, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 4 +P1 vals: Book: 2, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 4 +P1 vals: Book: 3, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 0, Hat: 5, Basketball: 0 +P1 vals: Book: 3, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 3 +P1 vals: Book: 3, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 0, Basketball: 10 +P1 vals: Book: 1, Hat: 3, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 4, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 2 +P1 vals: Book: 2, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 6, Basketball: 0 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 4, Hat: 2, Basketball: 1 +P1 vals: Book: 4, Hat: 6, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 8 +P1 vals: Book: 5, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 7 +P1 vals: Book: 6, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 4, Hat: 6, Basketball: 0 +P1 vals: Book: 0, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 3, Hat: 2, Basketball: 1 +P1 vals: Book: 2, Hat: 8, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 7, Hat: 1, Basketball: 0 +P1 vals: Book: 4, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 2, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 7 +P1 vals: Book: 7, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 1, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 9, Hat: 0, Basketball: 1 +P1 vals: Book: 0, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 4, Basketball: 6 +P1 vals: Book: 1, Hat: 5, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 6 +P1 vals: Book: 4, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 3 +P1 vals: Book: 0, Hat: 6, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 10, Hat: 0, Basketball: 0 +P1 vals: Book: 1, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 2 +P1 vals: Book: 1, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 5, Hat: 1, Basketball: 2 +P1 vals: Book: 3, Hat: 0, Basketball: 7 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 1 +P1 vals: Book: 3, Hat: 0, Basketball: 7 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 0 +P1 vals: Book: 0, Hat: 3, Basketball: 4 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 2 +P1 vals: Book: 5, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 4, Hat: 2, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 3, Hat: 4, Basketball: 0 +P1 vals: Book: 3, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 4 +P1 vals: Book: 9, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 3, Basketball: 2 +P1 vals: Book: 1, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 4 +P1 vals: Book: 1, Hat: 0, Basketball: 7 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 7 +P1 vals: Book: 0, Hat: 8, Basketball: 2 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 4, Basketball: 2 +P1 vals: Book: 2, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 0, Basketball: 10 +P1 vals: Book: 1, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 4 +P1 vals: Book: 2, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 3, Basketball: 0 +P1 vals: Book: 0, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 3, Hat: 4, Basketball: 0 +P1 vals: Book: 1, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 0, Hat: 2, Basketball: 8 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 0, Basketball: 3 +P1 vals: Book: 2, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 1, Basketball: 2 +P1 vals: Book: 2, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 1, Hat: 1, Basketball: 3 +P1 vals: Book: 0, Hat: 5, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 5 +P1 vals: Book: 1, Hat: 0, Basketball: 7 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 3, Hat: 2, Basketball: 1 +P1 vals: Book: 8, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 3, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 1 +P1 vals: Book: 2, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 4 +P0 vals: Book: 1, Hat: 8, Basketball: 0 +P1 vals: Book: 3, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 8, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 3, Basketball: 2 +P1 vals: Book: 0, Hat: 10, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 3 +P1 vals: Book: 3, Hat: 0, Basketball: 7 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 1, Hat: 8, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 2 +P0 vals: Book: 10, Hat: 0, Basketball: 0 +P1 vals: Book: 2, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 5, Hat: 1, Basketball: 1 +P1 vals: Book: 2, Hat: 0, Basketball: 8 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 4, Basketball: 0 +P1 vals: Book: 3, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 2, Basketball: 1 +P1 vals: Book: 3, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 5, Basketball: 1 +P1 vals: Book: 6, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 8, Basketball: 0 +P1 vals: Book: 1, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 4 +P1 vals: Book: 10, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 0 +P1 vals: Book: 7, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 8, Basketball: 2 +P1 vals: Book: 1, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 9, Basketball: 1 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 3 +P1 vals: Book: 0, Hat: 7, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 5, Basketball: 5 +P1 vals: Book: 3, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 7 +P1 vals: Book: 2, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 4 +P1 vals: Book: 2, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 2, Basketball: 1 +P1 vals: Book: 0, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 2, Hat: 1, Basketball: 2 +P1 vals: Book: 0, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 2, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 7 +P1 vals: Book: 1, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 4 +P0 vals: Book: 0, Hat: 2, Basketball: 2 +P1 vals: Book: 2, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 8 +P1 vals: Book: 0, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 6 +P1 vals: Book: 0, Hat: 2, Basketball: 6 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 1 +P1 vals: Book: 2, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 0, Hat: 6, Basketball: 1 +P1 vals: Book: 1, Hat: 5, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 10, Hat: 0, Basketball: 0 +P1 vals: Book: 3, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 5, Basketball: 2 +P1 vals: Book: 1, Hat: 5, Basketball: 2 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 0, Basketball: 10 +P1 vals: Book: 1, Hat: 2, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 9, Basketball: 0 +P1 vals: Book: 7, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 4, Basketball: 2 +P1 vals: Book: 3, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 4 +P0 vals: Book: 3, Hat: 0, Basketball: 1 +P1 vals: Book: 2, Hat: 6, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 1, Hat: 4, Basketball: 1 +P1 vals: Book: 4, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 2 +P1 vals: Book: 3, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 4 +P1 vals: Book: 0, Hat: 2, Basketball: 6 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 0, Hat: 8, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 3, Hat: 2, Basketball: 1 +P1 vals: Book: 3, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 4, Basketball: 1 +P1 vals: Book: 1, Hat: 9, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 4, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 2 +P1 vals: Book: 2, Hat: 0, Basketball: 6 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 1, Basketball: 0 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 9 +P1 vals: Book: 2, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 1, Hat: 1, Basketball: 2 +P1 vals: Book: 5, Hat: 5, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 0 +P1 vals: Book: 2, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 2 +P0 vals: Book: 4, Hat: 1, Basketball: 1 +P1 vals: Book: 4, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 4 +P1 vals: Book: 4, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 0 +P1 vals: Book: 0, Hat: 3, Basketball: 7 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 4 +P0 vals: Book: 5, Hat: 0, Basketball: 0 +P1 vals: Book: 1, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 5, Basketball: 1 +P1 vals: Book: 0, Hat: 4, Basketball: 6 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 6 +P1 vals: Book: 3, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 4, Hat: 2, Basketball: 0 +P1 vals: Book: 3, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 8 +P1 vals: Book: 1, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 4 +P1 vals: Book: 2, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 3 +P1 vals: Book: 0, Hat: 6, Basketball: 4 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 4 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 1, Basketball: 2 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 8, Hat: 0, Basketball: 1 +P1 vals: Book: 1, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 0, Hat: 5, Basketball: 1 +P1 vals: Book: 8, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 8, Hat: 0, Basketball: 2 +P1 vals: Book: 2, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 4, Hat: 2, Basketball: 0 +P1 vals: Book: 5, Hat: 0, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 4 +P1 vals: Book: 2, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 4, Hat: 1, Basketball: 3 +P1 vals: Book: 3, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 5 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 6 +P1 vals: Book: 0, Hat: 1, Basketball: 8 +", "Sample game instance: +Pool: Book: 3, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 7 +P1 vals: Book: 2, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 3 +P0 vals: Book: 4, Hat: 0, Basketball: 2 +P1 vals: Book: 1, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 1 +P1 vals: Book: 2, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 6, Basketball: 0 +P1 vals: Book: 1, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 2, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 5, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 3, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 1, Basketball: 7 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 1, Basketball: 3 +P1 vals: Book: 3, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 0, Basketball: 2 +P1 vals: Book: 3, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 2 +P0 vals: Book: 1, Hat: 2, Basketball: 1 +P1 vals: Book: 1, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 8, Basketball: 1 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 10, Basketball: 0 +P1 vals: Book: 3, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 6 +P1 vals: Book: 2, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 2 +P0 vals: Book: 6, Hat: 1, Basketball: 0 +P1 vals: Book: 0, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 0, Hat: 2, Basketball: 2 +P1 vals: Book: 2, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 2 +P1 vals: Book: 3, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 3, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 4, Basketball: 6 +P1 vals: Book: 2, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 4, Hat: 0, Basketball: 6 +P1 vals: Book: 0, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 8, Basketball: 0 +P1 vals: Book: 2, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 2 +P1 vals: Book: 4, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 4, Basketball: 1 +P1 vals: Book: 4, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 6 +P1 vals: Book: 1, Hat: 0, Basketball: 7 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 6 +P1 vals: Book: 4, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 4 +P1 vals: Book: 1, Hat: 7, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 5, Hat: 0, Basketball: 0 +P1 vals: Book: 1, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 1, Basketball: 3 +P1 vals: Book: 2, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 2, Basketball: 3 +P1 vals: Book: 1, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 5, Hat: 1, Basketball: 1 +P1 vals: Book: 2, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 5, Hat: 5, Basketball: 0 +P1 vals: Book: 1, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 3, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 4 +P1 vals: Book: 0, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 2, Basketball: 3 +P1 vals: Book: 3, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 5, Hat: 0, Basketball: 0 +P1 vals: Book: 2, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 9, Hat: 1, Basketball: 0 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 0, Basketball: 10 +P1 vals: Book: 4, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 3, Basketball: 2 +P1 vals: Book: 4, Hat: 6, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 5, Hat: 0, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 7, Hat: 0, Basketball: 1 +P1 vals: Book: 1, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 3 +P1 vals: Book: 2, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 4, Basketball: 6 +P1 vals: Book: 2, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 7 +P1 vals: Book: 2, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 2, Basketball: 4 +P1 vals: Book: 4, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 5, Hat: 0, Basketball: 1 +P1 vals: Book: 5, Hat: 5, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 5, Basketball: 5 +P1 vals: Book: 1, Hat: 2, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 10, Hat: 0, Basketball: 0 +P1 vals: Book: 5, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 6 +P1 vals: Book: 9, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 2, Hat: 3, Basketball: 1 +P1 vals: Book: 7, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 3 +P1 vals: Book: 0, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 5 +P1 vals: Book: 0, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 0, Basketball: 3 +P1 vals: Book: 0, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 4, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 4 +P1 vals: Book: 3, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 8 +P1 vals: Book: 1, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 1 +P1 vals: Book: 0, Hat: 1, Basketball: 8 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 5, Hat: 1, Basketball: 1 +P1 vals: Book: 7, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 0 +P1 vals: Book: 3, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 3, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 1 +P1 vals: Book: 3, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 1, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 4, Hat: 2, Basketball: 0 +P1 vals: Book: 2, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 5 +P1 vals: Book: 0, Hat: 4, Basketball: 6 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 4, Basketball: 3 +P1 vals: Book: 2, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 8, Basketball: 1 +P1 vals: Book: 4, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 4, Basketball: 1 +P0 vals: Book: 4, Hat: 0, Basketball: 2 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 3, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 5, Hat: 1, Basketball: 1 +P1 vals: Book: 1, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 4, Hat: 1, Basketball: 1 +P1 vals: Book: 6, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 8 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 4, Basketball: 0 +P1 vals: Book: 2, Hat: 0, Basketball: 6 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 6, Hat: 2, Basketball: 0 +P1 vals: Book: 5, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 7, Basketball: 0 +P1 vals: Book: 0, Hat: 2, Basketball: 8 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 4, Hat: 1, Basketball: 3 +P1 vals: Book: 2, Hat: 0, Basketball: 8 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 0, Basketball: 3 +P1 vals: Book: 2, Hat: 5, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 4, Basketball: 1 +P1 vals: Book: 1, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 4, Basketball: 1 +P1 vals: Book: 1, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 6, Basketball: 4 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 4 +P1 vals: Book: 1, Hat: 2, Basketball: 4 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 7 +P1 vals: Book: 1, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 1, Hat: 3, Basketball: 1 +P1 vals: Book: 10, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 1, Hat: 0, Basketball: 4 +P1 vals: Book: 3, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 0, Basketball: 3 +P1 vals: Book: 2, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 7, Hat: 0, Basketball: 1 +P1 vals: Book: 3, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 7 +P1 vals: Book: 0, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 4, Basketball: 1 +P1 vals: Book: 3, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 6, Basketball: 0 +P1 vals: Book: 0, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 5, Basketball: 5 +P1 vals: Book: 1, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 5, Hat: 1, Basketball: 0 +P1 vals: Book: 2, Hat: 0, Basketball: 8 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 8 +P1 vals: Book: 2, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 4 +P1 vals: Book: 4, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 4, Basketball: 1 +P1 vals: Book: 2, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 1, Basketball: 4 +P1 vals: Book: 2, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 7 +P1 vals: Book: 1, Hat: 5, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 4, Hat: 2, Basketball: 0 +P1 vals: Book: 1, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 1, Hat: 6, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 2 +P0 vals: Book: 4, Hat: 0, Basketball: 1 +P1 vals: Book: 1, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 5, Hat: 0, Basketball: 5 +P1 vals: Book: 3, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 8, Basketball: 1 +P1 vals: Book: 3, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 4 +P1 vals: Book: 1, Hat: 0, Basketball: 6 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 7, Basketball: 3 +P1 vals: Book: 1, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 4, Hat: 2, Basketball: 0 +P1 vals: Book: 0, Hat: 2, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 1, Basketball: 4 +P1 vals: Book: 8, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 4 +P0 vals: Book: 3, Hat: 4, Basketball: 0 +P1 vals: Book: 2, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 6, Basketball: 0 +P1 vals: Book: 2, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 4, Basketball: 0 +P1 vals: Book: 1, Hat: 5, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 6, Basketball: 2 +P1 vals: Book: 1, Hat: 6, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 4, Basketball: 0 +P1 vals: Book: 0, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 3 +P1 vals: Book: 2, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 9, Hat: 0, Basketball: 1 +P1 vals: Book: 0, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 4 +P1 vals: Book: 1, Hat: 0, Basketball: 8 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 6 +P1 vals: Book: 0, Hat: 1, Basketball: 9 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 2 +P1 vals: Book: 1, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 2, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 1, Hat: 4, Basketball: 0 +P1 vals: Book: 1, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 2, Basketball: 0 +P1 vals: Book: 1, Hat: 0, Basketball: 6 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 4, Hat: 2, Basketball: 0 +P1 vals: Book: 5, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 0, Hat: 5, Basketball: 0 +P1 vals: Book: 4, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 0, Basketball: 4 +P1 vals: Book: 1, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 1, Hat: 5, Basketball: 1 +P1 vals: Book: 4, Hat: 6, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 1, Hat: 5, Basketball: 1 +P1 vals: Book: 0, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 4 +P1 vals: Book: 1, Hat: 5, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 3 +P1 vals: Book: 5, Hat: 0, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 5 +P1 vals: Book: 5, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 7 +P1 vals: Book: 5, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 8, Hat: 0, Basketball: 1 +P1 vals: Book: 4, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 0 +P1 vals: Book: 4, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 4 +P1 vals: Book: 1, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 6, Hat: 1, Basketball: 0 +P1 vals: Book: 2, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 4, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 7 +P1 vals: Book: 2, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 5, Basketball: 1 +P1 vals: Book: 0, Hat: 10, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 1, Basketball: 4 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 8, Hat: 0, Basketball: 2 +P1 vals: Book: 2, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 7 +P1 vals: Book: 1, Hat: 3, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 10, Basketball: 0 +P1 vals: Book: 1, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 0, Hat: 1, Basketball: 2 +P1 vals: Book: 2, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 4 +P0 vals: Book: 3, Hat: 4, Basketball: 0 +P1 vals: Book: 1, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 1, Hat: 3, Basketball: 1 +P1 vals: Book: 0, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 0, Hat: 10, Basketball: 0 +P1 vals: Book: 5, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 7, Basketball: 0 +P1 vals: Book: 1, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 8 +P1 vals: Book: 0, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 4, Hat: 2, Basketball: 1 +P1 vals: Book: 1, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 3, Hat: 2, Basketball: 1 +P1 vals: Book: 5, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 4, Basketball: 1 +P1 vals: Book: 1, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 9, Hat: 0, Basketball: 1 +P1 vals: Book: 0, Hat: 1, Basketball: 7 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 2 +P0 vals: Book: 2, Hat: 2, Basketball: 0 +P1 vals: Book: 0, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 2 +P1 vals: Book: 1, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 7, Hat: 0, Basketball: 3 +P1 vals: Book: 4, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 7, Basketball: 0 +P1 vals: Book: 0, Hat: 4, Basketball: 6 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 1, Basketball: 1 +P1 vals: Book: 2, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 2 +P1 vals: Book: 3, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 10, Basketball: 0 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 0 +P1 vals: Book: 0, Hat: 1, Basketball: 9 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 7, Basketball: 0 +P1 vals: Book: 3, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 1, Hat: 0, Basketball: 4 +P1 vals: Book: 1, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 7, Hat: 1, Basketball: 0 +P1 vals: Book: 9, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 2 +P0 vals: Book: 2, Hat: 1, Basketball: 2 +P1 vals: Book: 2, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 0, Basketball: 2 +P1 vals: Book: 2, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 3, Hat: 2, Basketball: 1 +P1 vals: Book: 0, Hat: 1, Basketball: 7 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 8, Basketball: 0 +P1 vals: Book: 4, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 7 +P1 vals: Book: 2, Hat: 0, Basketball: 6 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 1, Basketball: 2 +P1 vals: Book: 8, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 6 +P1 vals: Book: 6, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 0, Hat: 2, Basketball: 2 +P1 vals: Book: 2, Hat: 8, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 2, Hat: 0, Basketball: 2 +P1 vals: Book: 2, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 7 +P1 vals: Book: 1, Hat: 4, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 1 +P1 vals: Book: 1, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 9, Hat: 1, Basketball: 0 +P1 vals: Book: 3, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 4, Basketball: 0 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 9 +P1 vals: Book: 1, Hat: 5, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 9 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 3 +P0 vals: Book: 4, Hat: 2, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 0, Basketball: 3 +P1 vals: Book: 5, Hat: 5, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 0, Hat: 1, Basketball: 8 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 4, Hat: 1, Basketball: 2 +P1 vals: Book: 0, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 6 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 2, Basketball: 1 +P1 vals: Book: 0, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 2, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 4, Hat: 2, Basketball: 1 +P1 vals: Book: 9, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 0 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 2 +P0 vals: Book: 2, Hat: 1, Basketball: 2 +P1 vals: Book: 2, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 3 +P1 vals: Book: 0, Hat: 7, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 4, Basketball: 3 +P1 vals: Book: 3, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 9 +P1 vals: Book: 4, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 9 +P1 vals: Book: 1, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 5, Hat: 1, Basketball: 1 +P1 vals: Book: 4, Hat: 6, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 2 +P0 vals: Book: 0, Hat: 0, Basketball: 5 +P1 vals: Book: 4, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 1 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 1, Basketball: 3 +P1 vals: Book: 0, Hat: 2, Basketball: 4 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 3 +P0 vals: Book: 0, Hat: 2, Basketball: 2 +P1 vals: Book: 2, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 4, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 2 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 3, Hat: 1, Basketball: 0 +P1 vals: Book: 0, Hat: 8, Basketball: 1 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 3 +P1 vals: Book: 0, Hat: 1, Basketball: 9 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 4 +P1 vals: Book: 0, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 0, Hat: 0, Basketball: 10 +P1 vals: Book: 3, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 0, Basketball: 4 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 3, Hat: 3, Basketball: 1 +P1 vals: Book: 8, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 6, Hat: 0, Basketball: 2 +P1 vals: Book: 8, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 4, Basketball: 2 +P1 vals: Book: 1, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 4, Basketball: 3 +P1 vals: Book: 2, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 5 +P1 vals: Book: 1, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 6 +P1 vals: Book: 8, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 1, Basketball: 0 +P1 vals: Book: 2, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 0 +P1 vals: Book: 3, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 4 +P1 vals: Book: 0, Hat: 8, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 0, Basketball: 2 +P1 vals: Book: 1, Hat: 7, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 1 +P1 vals: Book: 3, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 4, Hat: 3, Basketball: 1 +P1 vals: Book: 2, Hat: 8, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 8, Basketball: 1 +P1 vals: Book: 2, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 4 +P1 vals: Book: 2, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 6, Basketball: 1 +P1 vals: Book: 1, Hat: 5, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 4 +P0 vals: Book: 3, Hat: 0, Basketball: 1 +P1 vals: Book: 1, Hat: 8, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 4, Hat: 0, Basketball: 2 +P1 vals: Book: 6, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 4 +P1 vals: Book: 1, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 4, Basketball: 2 +P1 vals: Book: 0, Hat: 3, Basketball: 7 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 1, Hat: 0, Basketball: 6 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 10, Basketball: 0 +P1 vals: Book: 2, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 2, Basketball: 0 +P1 vals: Book: 1, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 3 +P1 vals: Book: 4, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 0 +P1 vals: Book: 0, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 4, Basketball: 0 +P1 vals: Book: 2, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 4, Basketball: 1 +P1 vals: Book: 0, Hat: 0, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 7, Basketball: 1 +P1 vals: Book: 3, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 4, Basketball: 1 +P1 vals: Book: 2, Hat: 6, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 5, Basketball: 1 +P1 vals: Book: 7, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 0, Basketball: 10 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 4 +P1 vals: Book: 5, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 1 +P1 vals: Book: 1, Hat: 0, Basketball: 8 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 4, Basketball: 1 +P1 vals: Book: 3, Hat: 7, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 5, Hat: 1, Basketball: 1 +P1 vals: Book: 1, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 6, Hat: 1, Basketball: 0 +P1 vals: Book: 1, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 3, Hat: 1, Basketball: 2 +P1 vals: Book: 6, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 7 +P1 vals: Book: 2, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 0, Hat: 0, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 6, Hat: 0, Basketball: 1 +P1 vals: Book: 2, Hat: 8, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 2 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 1, Basketball: 2 +P1 vals: Book: 9, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 4 +P0 vals: Book: 2, Hat: 2, Basketball: 1 +P1 vals: Book: 3, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 4, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 6 +P1 vals: Book: 3, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 2, Basketball: 3 +P1 vals: Book: 1, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 9, Basketball: 0 +P1 vals: Book: 4, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 4 +P1 vals: Book: 0, Hat: 2, Basketball: 8 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 10, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 4 +P1 vals: Book: 2, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 6, Basketball: 0 +P1 vals: Book: 1, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 5, Hat: 1, Basketball: 1 +P1 vals: Book: 1, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 6 +P1 vals: Book: 0, Hat: 6, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 0 +P1 vals: Book: 1, Hat: 0, Basketball: 9 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 2, Basketball: 1 +P1 vals: Book: 3, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 0, Basketball: 5 +P1 vals: Book: 1, Hat: 5, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 3 +P0 vals: Book: 4, Hat: 0, Basketball: 2 +P1 vals: Book: 4, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 2, Basketball: 1 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 3, Hat: 4, Basketball: 0 +P1 vals: Book: 0, Hat: 4, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 5, Basketball: 0 +P1 vals: Book: 2, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 5, Hat: 1, Basketball: 0 +P1 vals: Book: 0, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 8, Hat: 0, Basketball: 1 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 2, Basketball: 3 +P1 vals: Book: 2, Hat: 6, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 4 +P0 vals: Book: 1, Hat: 4, Basketball: 1 +P1 vals: Book: 2, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 5, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 2, Hat: 8, Basketball: 0 +P1 vals: Book: 0, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 4, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 10, Basketball: 0 +P1 vals: Book: 1, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 2, Hat: 2, Basketball: 1 +P1 vals: Book: 10, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 4, Hat: 2, Basketball: 0 +P1 vals: Book: 0, Hat: 1, Basketball: 7 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 10, Hat: 0, Basketball: 0 +P1 vals: Book: 5, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 3, Hat: 4, Basketball: 0 +P1 vals: Book: 0, Hat: 8, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 2 +P0 vals: Book: 4, Hat: 1, Basketball: 1 +P1 vals: Book: 4, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 3, Basketball: 2 +P1 vals: Book: 2, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 1, Hat: 4, Basketball: 0 +P1 vals: Book: 0, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 0, Basketball: 3 +P1 vals: Book: 1, Hat: 9, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 7 +P1 vals: Book: 3, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 3, Hat: 1, Basketball: 1 +P1 vals: Book: 2, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 3, Hat: 2, Basketball: 1 +P1 vals: Book: 1, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 3 +P0 vals: Book: 1, Hat: 1, Basketball: 2 +P1 vals: Book: 4, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 6, Hat: 0, Basketball: 4 +P1 vals: Book: 3, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 7 +P1 vals: Book: 7, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 6 +P1 vals: Book: 0, Hat: 3, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 0, Basketball: 3 +P1 vals: Book: 1, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 2, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 3, Hat: 1, Basketball: 2 +P1 vals: Book: 2, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 4 +P1 vals: Book: 2, Hat: 0, Basketball: 6 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 2, Hat: 2, Basketball: 0 +P1 vals: Book: 5, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 0, Basketball: 2 +P1 vals: Book: 3, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 4 +P0 vals: Book: 1, Hat: 0, Basketball: 2 +P1 vals: Book: 0, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 9, Basketball: 1 +P1 vals: Book: 3, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 7 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 1 +P1 vals: Book: 9, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 2 +P0 vals: Book: 6, Hat: 1, Basketball: 0 +P1 vals: Book: 6, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 4, Hat: 2, Basketball: 0 +P1 vals: Book: 2, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 10, Basketball: 0 +P1 vals: Book: 1, Hat: 2, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 3, Hat: 1, Basketball: 2 +P1 vals: Book: 7, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 0, Hat: 2, Basketball: 2 +P1 vals: Book: 3, Hat: 7, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 0, Basketball: 1 +P1 vals: Book: 2, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 0, Hat: 5, Basketball: 1 +P1 vals: Book: 2, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 5 +P1 vals: Book: 0, Hat: 1, Basketball: 9 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 1, Basketball: 2 +P1 vals: Book: 10, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 6, Hat: 4, Basketball: 0 +P1 vals: Book: 0, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 8 +P1 vals: Book: 1, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 6 +P1 vals: Book: 1, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 1, Basketball: 3 +P1 vals: Book: 2, Hat: 5, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 8, Hat: 0, Basketball: 2 +P1 vals: Book: 2, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 7, Hat: 3, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 7, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 7 +P1 vals: Book: 3, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 3, Hat: 2, Basketball: 0 +P1 vals: Book: 1, Hat: 0, Basketball: 8 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 6 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 3 +P0 vals: Book: 1, Hat: 2, Basketball: 1 +P1 vals: Book: 4, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 10, Basketball: 0 +P1 vals: Book: 1, Hat: 3, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 7, Basketball: 0 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 8, Hat: 0, Basketball: 2 +P1 vals: Book: 0, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 4 +P0 vals: Book: 2, Hat: 2, Basketball: 1 +P1 vals: Book: 1, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 2 +P1 vals: Book: 1, Hat: 0, Basketball: 9 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 4, Basketball: 6 +P1 vals: Book: 1, Hat: 5, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 8, Hat: 2, Basketball: 0 +P1 vals: Book: 1, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 4, Hat: 1, Basketball: 1 +P1 vals: Book: 8, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 5 +P1 vals: Book: 3, Hat: 0, Basketball: 7 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 6, Basketball: 4 +P1 vals: Book: 1, Hat: 0, Basketball: 5 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 0, Basketball: 10 +P1 vals: Book: 1, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 4, Hat: 1, Basketball: 3 +P1 vals: Book: 7, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 2, Hat: 0, Basketball: 2 +P1 vals: Book: 8, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 6, Hat: 0, Basketball: 2 +P1 vals: Book: 2, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 0, Basketball: 10 +P1 vals: Book: 1, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 2 +P1 vals: Book: 1, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 8, Hat: 0, Basketball: 2 +P1 vals: Book: 7, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 1, Hat: 0, Basketball: 3 +P1 vals: Book: 4, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 3, Basketball: 2 +P1 vals: Book: 8, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 1, Hat: 4, Basketball: 0 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 2 +P0 vals: Book: 0, Hat: 2, Basketball: 1 +P1 vals: Book: 4, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 1 +P1 vals: Book: 6, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 4, Hat: 1, Basketball: 1 +P1 vals: Book: 6, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 0, Basketball: 10 +P1 vals: Book: 1, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 4, Basketball: 3 +P1 vals: Book: 0, Hat: 0, Basketball: 10 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 3, Hat: 2, Basketball: 1 +P1 vals: Book: 3, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 3, Basketball: 0 +P1 vals: Book: 1, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 8, Hat: 1, Basketball: 0 +P1 vals: Book: 0, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 5 +P1 vals: Book: 3, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 5, Hat: 5, Basketball: 0 +P1 vals: Book: 3, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 3, Hat: 0, Basketball: 2 +P1 vals: Book: 3, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 7, Hat: 1, Basketball: 0 +P1 vals: Book: 6, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 1 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 2, Hat: 4, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 6 +P1 vals: Book: 0, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 8, Basketball: 0 +P1 vals: Book: 3, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 7, Hat: 0, Basketball: 1 +P1 vals: Book: 0, Hat: 7, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 3 +P1 vals: Book: 3, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 2 +P1 vals: Book: 4, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 9, Hat: 1, Basketball: 0 +P1 vals: Book: 1, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 1, Hat: 9, Basketball: 0 +P1 vals: Book: 4, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 8 +P1 vals: Book: 1, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 4, Basketball: 6 +P1 vals: Book: 1, Hat: 3, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 4, Hat: 1, Basketball: 2 +P1 vals: Book: 6, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 7, Basketball: 1 +P1 vals: Book: 1, Hat: 7, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 5, Basketball: 1 +P1 vals: Book: 3, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 6 +P1 vals: Book: 0, Hat: 2, Basketball: 6 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 4, Basketball: 1 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 6, Hat: 0, Basketball: 4 +P1 vals: Book: 0, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 2, Basketball: 1 +P1 vals: Book: 6, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 4, Basketball: 1 +P1 vals: Book: 2, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 1, Hat: 3, Basketball: 1 +P1 vals: Book: 4, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 2, Hat: 3, Basketball: 1 +P1 vals: Book: 6, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 4, Basketball: 2 +P1 vals: Book: 3, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 4, Hat: 2, Basketball: 1 +P1 vals: Book: 2, Hat: 8, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 4, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 0, Basketball: 3 +P1 vals: Book: 0, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 7 +P1 vals: Book: 7, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 5 +P1 vals: Book: 0, Hat: 10, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 1, Hat: 5, Basketball: 1 +P1 vals: Book: 1, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 7, Hat: 3, Basketball: 0 +P1 vals: Book: 1, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 0 +P1 vals: Book: 0, Hat: 1, Basketball: 8 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 2, Hat: 1, Basketball: 2 +P1 vals: Book: 2, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 4, Basketball: 1 +P1 vals: Book: 2, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 3 +P1 vals: Book: 5, Hat: 0, Basketball: 5 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 4, Basketball: 2 +P1 vals: Book: 3, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 4, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 2 +P1 vals: Book: 3, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 3 +P1 vals: Book: 3, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 4, Hat: 0, Basketball: 2 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 0, Hat: 10, Basketball: 0 +P1 vals: Book: 1, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 7, Basketball: 0 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 0, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 7, Basketball: 3 +P1 vals: Book: 1, Hat: 0, Basketball: 7 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 3, Basketball: 2 +P1 vals: Book: 4, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 6 +P1 vals: Book: 5, Hat: 0, Basketball: 5 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 3, Hat: 1, Basketball: 1 +P1 vals: Book: 2, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 4 +P1 vals: Book: 3, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 4, Hat: 2, Basketball: 0 +P1 vals: Book: 1, Hat: 5, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 10, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 7, Basketball: 3 +P1 vals: Book: 1, Hat: 0, Basketball: 6 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 8, Basketball: 0 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 1, Hat: 1, Basketball: 3 +P1 vals: Book: 0, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 2, Hat: 2, Basketball: 1 +P1 vals: Book: 8, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 2 +P0 vals: Book: 2, Hat: 2, Basketball: 0 +P1 vals: Book: 4, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 6, Basketball: 1 +P1 vals: Book: 2, Hat: 6, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 1, Hat: 4, Basketball: 1 +P1 vals: Book: 10, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 1, Basketball: 4 +P1 vals: Book: 3, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 8, Hat: 2, Basketball: 0 +P1 vals: Book: 4, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 7 +P1 vals: Book: 0, Hat: 1, Basketball: 8 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 3, Basketball: 0 +P1 vals: Book: 0, Hat: 3, Basketball: 4 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 2 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 4, Basketball: 3 +P1 vals: Book: 1, Hat: 5, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 1, Basketball: 2 +P1 vals: Book: 1, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 0, Basketball: 2 +P1 vals: Book: 1, Hat: 8, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 3 +P1 vals: Book: 1, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 4 +P0 vals: Book: 2, Hat: 2, Basketball: 1 +P1 vals: Book: 3, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 5, Hat: 1, Basketball: 2 +P1 vals: Book: 0, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 1, Basketball: 1 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 6 +P1 vals: Book: 1, Hat: 0, Basketball: 6 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 5, Hat: 5, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 0, Basketball: 4 +P1 vals: Book: 4, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 5, Hat: 0, Basketball: 5 +P1 vals: Book: 0, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 3 +P1 vals: Book: 10, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 7, Hat: 1, Basketball: 0 +P1 vals: Book: 4, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 5 +P1 vals: Book: 3, Hat: 0, Basketball: 7 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 6 +P1 vals: Book: 1, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 5, Basketball: 0 +P1 vals: Book: 1, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 4, Basketball: 1 +P1 vals: Book: 2, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 4 +P1 vals: Book: 2, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 6, Hat: 2, Basketball: 0 +P1 vals: Book: 0, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 4, Basketball: 0 +P1 vals: Book: 2, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 2, Basketball: 4 +P1 vals: Book: 1, Hat: 5, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 0, Basketball: 3 +P1 vals: Book: 4, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 6, Basketball: 1 +P1 vals: Book: 2, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 5, Basketball: 1 +P1 vals: Book: 2, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 3 +P0 vals: Book: 1, Hat: 1, Basketball: 2 +P1 vals: Book: 1, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 1, Basketball: 2 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 5, Hat: 0, Basketball: 0 +P1 vals: Book: 3, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 9, Basketball: 0 +P1 vals: Book: 4, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 1, Basketball: 2 +P1 vals: Book: 1, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 4, Basketball: 2 +P1 vals: Book: 0, Hat: 5, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 0, Basketball: 10 +P1 vals: Book: 5, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 8 +P1 vals: Book: 2, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 1 +P1 vals: Book: 0, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 8, Hat: 1, Basketball: 0 +P1 vals: Book: 4, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 4, Hat: 2, Basketball: 0 +P1 vals: Book: 1, Hat: 0, Basketball: 9 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 6, Basketball: 1 +P1 vals: Book: 0, Hat: 10, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 1, Basketball: 0 +P1 vals: Book: 2, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 8 +P1 vals: Book: 1, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 3, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 4 +P1 vals: Book: 1, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 7, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 6, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 4, Hat: 2, Basketball: 1 +P1 vals: Book: 1, Hat: 9, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 4, Hat: 0, Basketball: 6 +P1 vals: Book: 0, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 3, Hat: 7, Basketball: 0 +P1 vals: Book: 4, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 4 +P1 vals: Book: 1, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 9 +P1 vals: Book: 1, Hat: 0, Basketball: 7 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 3, Hat: 0, Basketball: 2 +P1 vals: Book: 1, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 4, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 6 +P1 vals: Book: 1, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 5, Hat: 1, Basketball: 1 +P1 vals: Book: 6, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 5, Basketball: 5 +P1 vals: Book: 1, Hat: 0, Basketball: 5 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 2, Basketball: 3 +P1 vals: Book: 2, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 4, Hat: 2, Basketball: 0 +P1 vals: Book: 1, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 4, Hat: 1, Basketball: 2 +P1 vals: Book: 5, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 5, Hat: 0, Basketball: 5 +P1 vals: Book: 1, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 4, Basketball: 6 +P1 vals: Book: 1, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 1, Hat: 3, Basketball: 1 +P1 vals: Book: 2, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 4, Basketball: 0 +P1 vals: Book: 0, Hat: 2, Basketball: 4 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 4, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 1, Hat: 9, Basketball: 0 +P1 vals: Book: 6, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 6, Hat: 1, Basketball: 0 +P1 vals: Book: 4, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 4 +P1 vals: Book: 0, Hat: 2, Basketball: 6 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 2, Basketball: 0 +P1 vals: Book: 0, Hat: 8, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 8 +P1 vals: Book: 2, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 4 +P1 vals: Book: 0, Hat: 5, Basketball: 5 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 4, Basketball: 1 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 7, Hat: 0, Basketball: 3 +P1 vals: Book: 1, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 4, Hat: 0, Basketball: 2 +P1 vals: Book: 5, Hat: 5, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 4 +P0 vals: Book: 4, Hat: 2, Basketball: 0 +P1 vals: Book: 2, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 3, Hat: 2, Basketball: 0 +P1 vals: Book: 0, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 1, Basketball: 3 +P1 vals: Book: 7, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 5, Basketball: 1 +P1 vals: Book: 1, Hat: 8, Basketball: 0 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 5, Basketball: 0 +P1 vals: Book: 1, Hat: 0, Basketball: 5 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 4 +P1 vals: Book: 0, Hat: 6, Basketball: 4 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 0, Basketball: 3 +P1 vals: Book: 1, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 4, Basketball: 1 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 3, Basketball: 2 +P1 vals: Book: 0, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 6 +P1 vals: Book: 3, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 7 +P1 vals: Book: 1, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 4, Hat: 0, Basketball: 6 +P1 vals: Book: 5, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 4 +P1 vals: Book: 0, Hat: 7, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 6 +P1 vals: Book: 2, Hat: 0, Basketball: 8 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 5 +P1 vals: Book: 1, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 0, Basketball: 10 +P1 vals: Book: 1, Hat: 4, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 0, Hat: 3, Basketball: 1 +P1 vals: Book: 2, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 4 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 8 +P1 vals: Book: 2, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 2, Hat: 4, Basketball: 1 +P1 vals: Book: 0, Hat: 10, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 5, Hat: 0, Basketball: 1 +P1 vals: Book: 1, Hat: 9, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 4, Basketball: 1 +P1 vals: Book: 4, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 7, Basketball: 1 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 10, Basketball: 0 +P1 vals: Book: 2, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 1, Hat: 3, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 4, Hat: 1, Basketball: 1 +P1 vals: Book: 5, Hat: 5, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 6, Hat: 2, Basketball: 0 +P1 vals: Book: 6, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 6, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 3, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 4 +P1 vals: Book: 2, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 4, Basketball: 1 +P1 vals: Book: 0, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 6, Basketball: 4 +P1 vals: Book: 2, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 4 +P1 vals: Book: 1, Hat: 5, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 6 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 3, Hat: 2, Basketball: 1 +P1 vals: Book: 0, Hat: 6, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 6, Basketball: 1 +P1 vals: Book: 4, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 1 +P1 vals: Book: 2, Hat: 0, Basketball: 8 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 3, Basketball: 2 +P1 vals: Book: 1, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 6, Hat: 0, Basketball: 4 +P1 vals: Book: 5, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 0, Basketball: 4 +P1 vals: Book: 0, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 1, Basketball: 4 +P1 vals: Book: 2, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 7 +P1 vals: Book: 0, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 0, Basketball: 3 +P1 vals: Book: 3, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 2, Basketball: 0 +P1 vals: Book: 0, Hat: 4, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 6, Basketball: 2 +P1 vals: Book: 1, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 2 +P0 vals: Book: 0, Hat: 0, Basketball: 5 +P1 vals: Book: 1, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 4 +P0 vals: Book: 3, Hat: 4, Basketball: 0 +P1 vals: Book: 0, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 8, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 8, Basketball: 0 +P1 vals: Book: 4, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 5, Hat: 5, Basketball: 0 +P1 vals: Book: 2, Hat: 5, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 2 +P0 vals: Book: 8, Hat: 0, Basketball: 1 +P1 vals: Book: 4, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 2 +P0 vals: Book: 0, Hat: 2, Basketball: 1 +P1 vals: Book: 6, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 6, Basketball: 1 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 4, Basketball: 1 +P1 vals: Book: 0, Hat: 0, Basketball: 10 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 4, Hat: 0, Basketball: 2 +P1 vals: Book: 2, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 5 +P1 vals: Book: 0, Hat: 1, Basketball: 9 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 5, Hat: 1, Basketball: 1 +P1 vals: Book: 2, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 6 +P1 vals: Book: 1, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 4, Basketball: 1 +P1 vals: Book: 6, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 2, Hat: 8, Basketball: 0 +P1 vals: Book: 1, Hat: 5, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 5, Basketball: 5 +P1 vals: Book: 1, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 6, Basketball: 1 +P1 vals: Book: 5, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 1, Hat: 3, Basketball: 0 +P1 vals: Book: 2, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 6 +P1 vals: Book: 3, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 1, Hat: 5, Basketball: 1 +P1 vals: Book: 2, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 4 +P1 vals: Book: 1, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 0, Basketball: 3 +P1 vals: Book: 6, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 9, Basketball: 1 +P1 vals: Book: 1, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 1, Basketball: 2 +P1 vals: Book: 0, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 9 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 7 +P1 vals: Book: 6, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 8, Hat: 0, Basketball: 2 +P1 vals: Book: 2, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 1, Basketball: 2 +P1 vals: Book: 2, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 9 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 2, Hat: 3, Basketball: 1 +P1 vals: Book: 8, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 7, Hat: 3, Basketball: 0 +P1 vals: Book: 7, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 8, Basketball: 0 +P1 vals: Book: 1, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 0, Hat: 8, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 2 +P1 vals: Book: 0, Hat: 0, Basketball: 10 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 3, Basketball: 0 +P1 vals: Book: 4, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 3, Basketball: 2 +P1 vals: Book: 2, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 4, Hat: 6, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 3, Hat: 2, Basketball: 1 +P1 vals: Book: 10, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 4 +P1 vals: Book: 4, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 8, Hat: 0, Basketball: 2 +P1 vals: Book: 0, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 6 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 8, Hat: 2, Basketball: 0 +P1 vals: Book: 6, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 4, Basketball: 0 +P1 vals: Book: 3, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 4 +P1 vals: Book: 7, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 4, Hat: 1, Basketball: 3 +P1 vals: Book: 3, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 0, Basketball: 1 +P1 vals: Book: 0, Hat: 8, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 2 +P0 vals: Book: 6, Hat: 1, Basketball: 0 +P1 vals: Book: 0, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 3 +P0 vals: Book: 4, Hat: 1, Basketball: 1 +P1 vals: Book: 4, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 7, Hat: 3, Basketball: 0 +P1 vals: Book: 1, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 3, Hat: 1, Basketball: 1 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 5, Hat: 2, Basketball: 1 +P1 vals: Book: 3, Hat: 7, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 4, Basketball: 2 +P1 vals: Book: 4, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 6, Hat: 0, Basketball: 1 +P1 vals: Book: 4, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 4 +P1 vals: Book: 1, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 7, Hat: 1, Basketball: 0 +P1 vals: Book: 0, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 3 +P0 vals: Book: 1, Hat: 1, Basketball: 2 +P1 vals: Book: 4, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 4, Hat: 1, Basketball: 1 +P1 vals: Book: 3, Hat: 0, Basketball: 7 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 1 +P1 vals: Book: 1, Hat: 2, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 2, Hat: 3, Basketball: 1 +P1 vals: Book: 5, Hat: 5, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 10, Basketball: 0 +P1 vals: Book: 1, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 4 +P1 vals: Book: 1, Hat: 0, Basketball: 9 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 1 +P1 vals: Book: 1, Hat: 5, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 7 +P1 vals: Book: 1, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 5, Basketball: 2 +P1 vals: Book: 0, Hat: 8, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 10, Hat: 0, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 5 +P1 vals: Book: 3, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 8 +P1 vals: Book: 0, Hat: 3, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 7, Basketball: 0 +P1 vals: Book: 4, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 7, Hat: 0, Basketball: 3 +P1 vals: Book: 0, Hat: 2, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 7, Basketball: 1 +P1 vals: Book: 6, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 1 +P1 vals: Book: 0, Hat: 5, Basketball: 5 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 8, Basketball: 2 +P1 vals: Book: 1, Hat: 2, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 3, Basketball: 0 +P1 vals: Book: 4, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 1, Basketball: 3 +P1 vals: Book: 9, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 4, Basketball: 2 +P1 vals: Book: 3, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 3, Hat: 3, Basketball: 1 +P1 vals: Book: 10, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 3, Hat: 0, Basketball: 2 +P1 vals: Book: 4, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 0, Hat: 1, Basketball: 2 +P1 vals: Book: 8, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 1, Hat: 0, Basketball: 3 +P1 vals: Book: 1, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 8, Hat: 2, Basketball: 0 +P1 vals: Book: 0, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 10, Basketball: 0 +P1 vals: Book: 1, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 1, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 5, Basketball: 1 +P1 vals: Book: 10, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 4 +P0 vals: Book: 2, Hat: 6, Basketball: 0 +P1 vals: Book: 2, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 0 +P1 vals: Book: 1, Hat: 3, Basketball: 4 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 2, Basketball: 3 +P1 vals: Book: 2, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 10, Basketball: 0 +P1 vals: Book: 5, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 1, Basketball: 4 +P1 vals: Book: 1, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 1, Basketball: 3 +P1 vals: Book: 2, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 8, Hat: 2, Basketball: 0 +P1 vals: Book: 0, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 4 +P1 vals: Book: 1, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 4 +P1 vals: Book: 4, Hat: 0, Basketball: 6 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 6, Hat: 0, Basketball: 4 +P1 vals: Book: 4, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 5, Basketball: 1 +P1 vals: Book: 2, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 6, Basketball: 1 +P1 vals: Book: 0, Hat: 7, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 4, Hat: 1, Basketball: 3 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 4, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 0, Basketball: 3 +P1 vals: Book: 1, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 1, Hat: 1, Basketball: 3 +P1 vals: Book: 2, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 4, Hat: 0, Basketball: 3 +P1 vals: Book: 0, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 1 +P1 vals: Book: 4, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 0, Hat: 6, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 2, Hat: 2, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 5, Basketball: 1 +P1 vals: Book: 1, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 1, Basketball: 2 +P1 vals: Book: 5, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 4, Basketball: 2 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 0, Basketball: 1 +P1 vals: Book: 1, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 1, Basketball: 3 +P1 vals: Book: 1, Hat: 7, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 2 +P0 vals: Book: 2, Hat: 2, Basketball: 0 +P1 vals: Book: 1, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 4, Basketball: 0 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 4 +P1 vals: Book: 2, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 5 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 4, Basketball: 6 +P1 vals: Book: 1, Hat: 2, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 1, Basketball: 2 +P1 vals: Book: 6, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 7, Hat: 0, Basketball: 1 +P1 vals: Book: 9, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 2, Hat: 3, Basketball: 1 +P1 vals: Book: 1, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 6, Basketball: 0 +P1 vals: Book: 0, Hat: 9, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 2, Basketball: 1 +P1 vals: Book: 2, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 4, Hat: 6, Basketball: 0 +P1 vals: Book: 0, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 4, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 4 +P1 vals: Book: 1, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 5, Hat: 5, Basketball: 0 +P1 vals: Book: 2, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 4, Basketball: 2 +P1 vals: Book: 9, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 1, Hat: 1, Basketball: 2 +P1 vals: Book: 1, Hat: 5, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 9 +P1 vals: Book: 4, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 2 +P1 vals: Book: 4, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 6, Basketball: 1 +P1 vals: Book: 0, Hat: 0, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 2, Hat: 4, Basketball: 0 +P1 vals: Book: 2, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 1, Hat: 0, Basketball: 4 +P1 vals: Book: 0, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 3, Hat: 1, Basketball: 2 +P1 vals: Book: 1, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 1 +P1 vals: Book: 2, Hat: 0, Basketball: 8 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 5 +P1 vals: Book: 1, Hat: 5, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 4, Basketball: 3 +P1 vals: Book: 1, Hat: 3, Basketball: 4 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 6 +P1 vals: Book: 2, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 4, Hat: 2, Basketball: 1 +P1 vals: Book: 6, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 8, Hat: 1, Basketball: 0 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 3, Hat: 2, Basketball: 1 +P1 vals: Book: 4, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 4, Hat: 0, Basketball: 3 +P1 vals: Book: 1, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 2, Basketball: 3 +P1 vals: Book: 0, Hat: 6, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 1, Hat: 0, Basketball: 9 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 10, Basketball: 0 +P1 vals: Book: 3, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 4, Hat: 3, Basketball: 0 +P1 vals: Book: 0, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 1, Hat: 1, Basketball: 2 +P1 vals: Book: 8, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 4 +P1 vals: Book: 1, Hat: 0, Basketball: 7 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 6 +P1 vals: Book: 4, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 4 +P1 vals: Book: 0, Hat: 3, Basketball: 7 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 6, Hat: 2, Basketball: 0 +P1 vals: Book: 4, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 3 +P1 vals: Book: 1, Hat: 3, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 1, Hat: 3, Basketball: 0 +P1 vals: Book: 2, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 2, Hat: 0, Basketball: 2 +P1 vals: Book: 0, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 4 +P1 vals: Book: 0, Hat: 10, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 4, Basketball: 3 +P1 vals: Book: 1, Hat: 4, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 2 +P1 vals: Book: 2, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 5 +P1 vals: Book: 2, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 8, Basketball: 0 +P1 vals: Book: 0, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 4, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 0, Basketball: 2 +P1 vals: Book: 0, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 2, Basketball: 0 +P1 vals: Book: 0, Hat: 2, Basketball: 8 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 0, Hat: 3, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 4 +P1 vals: Book: 1, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 1, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 4 +P0 vals: Book: 4, Hat: 2, Basketball: 0 +P1 vals: Book: 0, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 7, Basketball: 0 +P1 vals: Book: 1, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 1, Hat: 9, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 2 +P1 vals: Book: 0, Hat: 1, Basketball: 9 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 2 +P0 vals: Book: 0, Hat: 1, Basketball: 3 +P1 vals: Book: 2, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 8 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 4, Basketball: 1 +P1 vals: Book: 0, Hat: 2, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 7, Hat: 0, Basketball: 1 +P1 vals: Book: 5, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 2 +P0 vals: Book: 8, Hat: 0, Basketball: 1 +P1 vals: Book: 6, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 8, Basketball: 2 +P1 vals: Book: 1, Hat: 3, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 3 +P0 vals: Book: 1, Hat: 0, Basketball: 3 +P1 vals: Book: 1, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 3, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 0, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 2, Hat: 8, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 8, Basketball: 0 +P1 vals: Book: 3, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 10, Hat: 0, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 1, Hat: 0, Basketball: 3 +P1 vals: Book: 2, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 0, Basketball: 3 +P1 vals: Book: 4, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 3 +P1 vals: Book: 1, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 4, Hat: 1, Basketball: 1 +P1 vals: Book: 10, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 4 +P1 vals: Book: 2, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 10, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 4, Hat: 0, Basketball: 6 +P1 vals: Book: 3, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 3 +P1 vals: Book: 4, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 8 +P1 vals: Book: 1, Hat: 6, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 1 +P1 vals: Book: 2, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 0, Basketball: 3 +P1 vals: Book: 0, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 6 +P1 vals: Book: 2, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 7 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 1, Basketball: 9 +P1 vals: Book: 1, Hat: 0, Basketball: 6 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 0, Hat: 5, Basketball: 1 +P1 vals: Book: 9, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 4, Hat: 1, Basketball: 0 +P1 vals: Book: 3, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 0 +P1 vals: Book: 0, Hat: 6, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 3, Hat: 2, Basketball: 1 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 6, Basketball: 1 +P1 vals: Book: 1, Hat: 0, Basketball: 7 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 0 +P1 vals: Book: 5, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 3, Basketball: 1 +P1 vals: Book: 3, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 9, Hat: 1, Basketball: 0 +P1 vals: Book: 1, Hat: 5, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 1, Basketball: 3 +P1 vals: Book: 0, Hat: 3, Basketball: 2 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 8, Basketball: 1 +P1 vals: Book: 1, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 3, Hat: 2, Basketball: 1 +P1 vals: Book: 1, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 4 +P1 vals: Book: 4, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 5, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 0, Basketball: 3 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 2, Hat: 2, Basketball: 1 +P1 vals: Book: 4, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 8, Basketball: 0 +P1 vals: Book: 2, Hat: 5, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 5, Hat: 5, Basketball: 0 +P1 vals: Book: 2, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 9 +P1 vals: Book: 7, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 5, Hat: 1, Basketball: 1 +P1 vals: Book: 7, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 0, Hat: 5, Basketball: 1 +P1 vals: Book: 5, Hat: 0, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 8, Hat: 0, Basketball: 1 +P1 vals: Book: 2, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 9, Basketball: 1 +P1 vals: Book: 1, Hat: 5, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 3, Basketball: 2 +P1 vals: Book: 0, Hat: 10, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 5, Basketball: 1 +P1 vals: Book: 0, Hat: 4, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 6, Basketball: 1 +P1 vals: Book: 3, Hat: 7, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 3, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 9 +P1 vals: Book: 0, Hat: 1, Basketball: 7 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 6 +P1 vals: Book: 0, Hat: 6, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 5 +P1 vals: Book: 4, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 0, Hat: 0, Basketball: 5 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 10, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 2 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 3 +P1 vals: Book: 0, Hat: 0, Basketball: 10 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 2, Hat: 8, Basketball: 0 +P1 vals: Book: 2, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 6 +P1 vals: Book: 1, Hat: 4, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 4 +P1 vals: Book: 3, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 3, Hat: 0, Basketball: 1 +P1 vals: Book: 1, Hat: 7, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 6, Hat: 2, Basketball: 0 +P1 vals: Book: 1, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 4, Basketball: 2 +P1 vals: Book: 1, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 4, Hat: 3, Basketball: 0 +P1 vals: Book: 2, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 2, Basketball: 4 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 3 +P1 vals: Book: 3, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 1 +P1 vals: Book: 0, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 6, Hat: 0, Basketball: 1 +P1 vals: Book: 2, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 6, Hat: 0, Basketball: 2 +P1 vals: Book: 4, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 3, Basketball: 1 +P1 vals: Book: 3, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 0, Basketball: 4 +P1 vals: Book: 2, Hat: 6, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 5, Hat: 0, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 6, Basketball: 1 +P1 vals: Book: 10, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 2 +P1 vals: Book: 0, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 5 +P1 vals: Book: 1, Hat: 4, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 0, Basketball: 10 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 5, Hat: 1, Basketball: 2 +P1 vals: Book: 5, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 4 +P1 vals: Book: 6, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 4 +P0 vals: Book: 7, Hat: 3, Basketball: 0 +P1 vals: Book: 2, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 4, Hat: 0, Basketball: 2 +P1 vals: Book: 9, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 8 +P1 vals: Book: 1, Hat: 1, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 3 +P1 vals: Book: 6, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 2, Hat: 3, Basketball: 1 +P1 vals: Book: 10, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 8, Hat: 1, Basketball: 0 +P1 vals: Book: 1, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 6, Hat: 0, Basketball: 2 +P1 vals: Book: 5, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 3, Basketball: 2 +P1 vals: Book: 2, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 2, Hat: 0, Basketball: 2 +P1 vals: Book: 4, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 3 +P0 vals: Book: 1, Hat: 2, Basketball: 1 +P1 vals: Book: 10, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 3, Hat: 1, Basketball: 0 +P1 vals: Book: 0, Hat: 2, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 4, Hat: 1, Basketball: 1 +P1 vals: Book: 5, Hat: 0, Basketball: 5 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 6 +P1 vals: Book: 3, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 0, Basketball: 5 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 3 +P1 vals: Book: 0, Hat: 1, Basketball: 7 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 4 +P1 vals: Book: 2, Hat: 3, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 1, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 10, Hat: 0, Basketball: 0 +P1 vals: Book: 1, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 9 +P1 vals: Book: 5, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 6, Hat: 0, Basketball: 2 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 1, Hat: 4, Basketball: 1 +P1 vals: Book: 3, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 8, Basketball: 0 +P1 vals: Book: 0, Hat: 2, Basketball: 4 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 0, Basketball: 10 +P1 vals: Book: 3, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 2 +P0 vals: Book: 2, Hat: 2, Basketball: 1 +P1 vals: Book: 4, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 9, Hat: 0, Basketball: 1 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 10, Hat: 0, Basketball: 0 +P1 vals: Book: 2, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 10, Hat: 0, Basketball: 0 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 4, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 5 +P1 vals: Book: 3, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 2, Basketball: 2 +P1 vals: Book: 1, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 5, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 5, Basketball: 0 +P1 vals: Book: 0, Hat: 3, Basketball: 7 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 2 +P0 vals: Book: 0, Hat: 2, Basketball: 3 +P1 vals: Book: 2, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 5, Hat: 1, Basketball: 2 +P1 vals: Book: 0, Hat: 2, Basketball: 4 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 0, Basketball: 1 +P1 vals: Book: 3, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 5, Hat: 0, Basketball: 0 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 8, Hat: 1, Basketball: 0 +P1 vals: Book: 2, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 8, Basketball: 1 +P1 vals: Book: 1, Hat: 7, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 0, Basketball: 3 +P1 vals: Book: 3, Hat: 4, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 2, Hat: 2, Basketball: 1 +P1 vals: Book: 2, Hat: 4, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 4 +P0 vals: Book: 0, Hat: 2, Basketball: 2 +P1 vals: Book: 2, Hat: 6, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 4, Basketball: 2 +P1 vals: Book: 1, Hat: 9, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 4 +P1 vals: Book: 3, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 2, Hat: 2, Basketball: 1 +P1 vals: Book: 0, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 3, Hat: 2, Basketball: 1 +P1 vals: Book: 7, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 7 +P1 vals: Book: 2, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 1, Basketball: 2 +P1 vals: Book: 0, Hat: 7, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 2 +P1 vals: Book: 5, Hat: 0, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 4, Basketball: 0 +P1 vals: Book: 2, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 3 +P0 vals: Book: 1, Hat: 3, Basketball: 0 +P1 vals: Book: 1, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 5, Hat: 2, Basketball: 1 +P1 vals: Book: 8, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 4, Hat: 3, Basketball: 0 +P1 vals: Book: 3, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 4 +P0 vals: Book: 8, Hat: 1, Basketball: 0 +P1 vals: Book: 2, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 10, Basketball: 0 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 4, Basketball: 0 +P1 vals: Book: 1, Hat: 2, Basketball: 4 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 1, Hat: 0, Basketball: 9 +P1 vals: Book: 2, Hat: 1, Basketball: 5 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 10, Basketball: 0 +P1 vals: Book: 1, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 7, Basketball: 1 +P1 vals: Book: 2, Hat: 5, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 8 +P1 vals: Book: 4, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 5, Basketball: 0 +P1 vals: Book: 2, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 5, Hat: 0, Basketball: 1 +P1 vals: Book: 9, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 5, Hat: 0, Basketball: 5 +P1 vals: Book: 1, Hat: 2, Basketball: 3 +", "Sample game instance: +Pool: Book: 2, Hat: 4, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 0 +P1 vals: Book: 2, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 4, Hat: 0, Basketball: 1 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 8, Basketball: 1 +P1 vals: Book: 1, Hat: 5, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 0, Hat: 2, Basketball: 2 +P1 vals: Book: 1, Hat: 0, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 4 +P1 vals: Book: 5, Hat: 0, Basketball: 5 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 1 +P1 vals: Book: 0, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 4, Hat: 0, Basketball: 2 +P1 vals: Book: 1, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 1, Hat: 8, Basketball: 0 +P1 vals: Book: 3, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 2, Hat: 1, Basketball: 2 +P1 vals: Book: 1, Hat: 3, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 5 +P0 vals: Book: 4, Hat: 1, Basketball: 1 +P1 vals: Book: 4, Hat: 6, Basketball: 0 +", "Sample game instance: +Pool: Book: 2, Hat: 4, Basketball: 1 +P0 vals: Book: 2, Hat: 1, Basketball: 2 +P1 vals: Book: 4, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 4, Basketball: 1 +P0 vals: Book: 9, Hat: 0, Basketball: 1 +P1 vals: Book: 6, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 6, Basketball: 2 +P1 vals: Book: 1, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 1, Basketball: 2 +P0 vals: Book: 0, Hat: 6, Basketball: 2 +P1 vals: Book: 1, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 4, Basketball: 2 +P1 vals: Book: 2, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 10, Hat: 0, Basketball: 0 +P1 vals: Book: 1, Hat: 6, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 0, Hat: 0, Basketball: 10 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 1 +P1 vals: Book: 2, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 1, Hat: 0, Basketball: 3 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 4, Hat: 3, Basketball: 0 +P1 vals: Book: 2, Hat: 0, Basketball: 4 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 3, Basketball: 4 +P1 vals: Book: 0, Hat: 0, Basketball: 10 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 7, Hat: 1, Basketball: 0 +P1 vals: Book: 1, Hat: 1, Basketball: 6 +", "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 3, Hat: 2, Basketball: 1 +P1 vals: Book: 7, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 1, Hat: 3, Basketball: 1 +P1 vals: Book: 0, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 3 +P0 vals: Book: 3, Hat: 1, Basketball: 0 +P1 vals: Book: 0, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 2, Basketball: 1 +P0 vals: Book: 1, Hat: 1, Basketball: 5 +P1 vals: Book: 0, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 3 +P0 vals: Book: 0, Hat: 5, Basketball: 0 +P1 vals: Book: 5, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 0, Hat: 6, Basketball: 4 +P1 vals: Book: 1, Hat: 4, Basketball: 2 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 0 +P1 vals: Book: 2, Hat: 2, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 5, Hat: 2, Basketball: 1 +P1 vals: Book: 3, Hat: 1, Basketball: 2 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 1 +P0 vals: Book: 2, Hat: 0, Basketball: 2 +P1 vals: Book: 0, Hat: 3, Basketball: 7 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 3, Hat: 2, Basketball: 0 +P1 vals: Book: 1, Hat: 2, Basketball: 4 +", "Sample game instance: +Pool: Book: 2, Hat: 3, Basketball: 1 +P0 vals: Book: 4, Hat: 0, Basketball: 2 +P1 vals: Book: 2, Hat: 1, Basketball: 3 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 0, Hat: 4, Basketball: 2 +P1 vals: Book: 5, Hat: 2, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 1, Basketball: 2 +P0 vals: Book: 2, Hat: 0, Basketball: 1 +P1 vals: Book: 0, Hat: 6, Basketball: 2 +", "Sample game instance: +Pool: Book: 1, Hat: 1, Basketball: 3 +P0 vals: Book: 6, Hat: 4, Basketball: 0 +P1 vals: Book: 4, Hat: 0, Basketball: 2 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 2, Hat: 3, Basketball: 0 +P1 vals: Book: 2, Hat: 0, Basketball: 6 +", "Sample game instance: +Pool: Book: 2, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 4, Basketball: 2 +P1 vals: Book: 3, Hat: 2, Basketball: 0 +", "Sample game instance: +Pool: Book: 3, Hat: 1, Basketball: 1 +P0 vals: Book: 1, Hat: 2, Basketball: 5 +P1 vals: Book: 0, Hat: 9, Basketball: 1 +", "Sample game instance: +Pool: Book: 4, Hat: 2, Basketball: 1 +P0 vals: Book: 0, Hat: 3, Basketball: 4 +P1 vals: Book: 2, Hat: 1, Basketball: 0 +", "Sample game instance: +Pool: Book: 1, Hat: 2, Basketball: 2 +P0 vals: Book: 2, Hat: 4, Basketball: 0 +P1 vals: Book: 6, Hat: 1, Basketball: 1 +", "Sample game instance: +Pool: Book: 1, Hat: 5, Basketball: 1 +P0 vals: Book: 3, Hat: 1, Basketball: 2 +P1 vals: Book: 4, Hat: 0, Basketball: 6 +"] + +# Apply action "Sample game instance: +Pool: Book: 1, Hat: 3, Basketball: 1 +P0 vals: Book: 6, Hat: 1, Basketball: 1 +P1 vals: Book: 4, Hat: 1, Basketball: 3 +" +action: 19 + +# State 1 +# Pool: Book: 1, Hat: 3, Basketball: 1 +# P0 vals: Book: 6, Hat: 1, Basketball: 1 +# P1 vals: Book: 4, Hat: 1, Basketball: 3 +# Agreement reached? 0 +IsTerminal() = False +History() = [19] +HistoryString() = "19" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 6, Hat: 1, Basketball: 1\nAgreement reached? 0\n" +InformationStateString(1) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 4, Hat: 1, Basketball: 3\nAgreement reached? 0\n" +InformationStateTensor(0): binvec(309, 0x100181e181fc300600000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(309, 0x100181e181f0300780000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 6, Hat: 1, Basketball: 1\nAgreement reached? 0\nNumber of offers: 0\n" +ObservationString(1) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 4, Hat: 1, Basketball: 3\nAgreement reached? 0\nNumber of offers: 0\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 8, 9, 15, 16, 21, 22, 36, 37, 43, 44, 49, 50, 54, 55] +StringLegalActions() = ["Offer: Book: 0, Hat: 0, Basketball: 0", "Offer: Book: 1, Hat: 0, Basketball: 0", "Offer: Book: 0, Hat: 1, Basketball: 0", "Offer: Book: 1, Hat: 1, Basketball: 0", "Offer: Book: 0, Hat: 2, Basketball: 0", "Offer: Book: 1, Hat: 2, Basketball: 0", "Offer: Book: 0, Hat: 3, Basketball: 0", "Offer: Book: 1, Hat: 3, Basketball: 0", "Offer: Book: 0, Hat: 0, Basketball: 1", "Offer: Book: 1, Hat: 0, Basketball: 1", "Offer: Book: 0, Hat: 1, Basketball: 1", "Offer: Book: 1, Hat: 1, Basketball: 1", "Offer: Book: 0, Hat: 2, Basketball: 1", "Offer: Book: 1, Hat: 2, Basketball: 1", "Offer: Book: 0, Hat: 3, Basketball: 1", "Offer: Book: 1, Hat: 3, Basketball: 1"] + +# Apply action "Offer: Book: 0, Hat: 0, Basketball: 1" +action: 36 + +# State 2 +# Pool: Book: 1, Hat: 3, Basketball: 1 +# P0 vals: Book: 6, Hat: 1, Basketball: 1 +# P1 vals: Book: 4, Hat: 1, Basketball: 3 +# Agreement reached? 0 +# P0 offers: Offer: Book: 0, Hat: 0, Basketball: 1 +IsTerminal() = False +History() = [19, 36] +HistoryString() = "19, 36" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 6, Hat: 1, Basketball: 1\nAgreement reached? 0\nP0 offers: Offer: Book: 0, Hat: 0, Basketball: 1\n" +InformationStateString(1) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 4, Hat: 1, Basketball: 3\nAgreement reached? 0\nP0 offers: Offer: Book: 0, Hat: 0, Basketball: 1\n" +InformationStateTensor(0): binvec(309, 0x80181e181fc3006008080c0000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(309, 0x80181e181f03007808080c0000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 6, Hat: 1, Basketball: 1\nAgreement reached? 0\nNumber of offers: 1\nP0 offers: Offer: Book: 0, Hat: 0, Basketball: 1\n" +ObservationString(1) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 4, Hat: 1, Basketball: 3\nAgreement reached? 0\nNumber of offers: 1\nP0 offers: Offer: Book: 0, Hat: 0, Basketball: 1\n" +ObservationTensor(0): ◯◉◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◉◉◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 8, 9, 15, 16, 21, 22, 36, 37, 43, 44, 49, 50, 54, 55, 120] +StringLegalActions() = ["Offer: Book: 0, Hat: 0, Basketball: 0", "Offer: Book: 1, Hat: 0, Basketball: 0", "Offer: Book: 0, Hat: 1, Basketball: 0", "Offer: Book: 1, Hat: 1, Basketball: 0", "Offer: Book: 0, Hat: 2, Basketball: 0", "Offer: Book: 1, Hat: 2, Basketball: 0", "Offer: Book: 0, Hat: 3, Basketball: 0", "Offer: Book: 1, Hat: 3, Basketball: 0", "Offer: Book: 0, Hat: 0, Basketball: 1", "Offer: Book: 1, Hat: 0, Basketball: 1", "Offer: Book: 0, Hat: 1, Basketball: 1", "Offer: Book: 1, Hat: 1, Basketball: 1", "Offer: Book: 0, Hat: 2, Basketball: 1", "Offer: Book: 1, Hat: 2, Basketball: 1", "Offer: Book: 0, Hat: 3, Basketball: 1", "Offer: Book: 1, Hat: 3, Basketball: 1", "Agree"] + +# Apply action "Offer: Book: 0, Hat: 2, Basketball: 1" +action: 49 + +# State 3 +# Pool: Book: 1, Hat: 3, Basketball: 1 +# P0 vals: Book: 6, Hat: 1, Basketball: 1 +# P1 vals: Book: 4, Hat: 1, Basketball: 3 +# Agreement reached? 0 +# P0 offers: Offer: Book: 0, Hat: 0, Basketball: 1 +# P1 offers: Offer: Book: 0, Hat: 2, Basketball: 1 +IsTerminal() = False +History() = [19, 36, 49] +HistoryString() = "19, 36, 49" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 6, Hat: 1, Basketball: 1\nAgreement reached? 0\nP0 offers: Offer: Book: 0, Hat: 0, Basketball: 1\nP1 offers: Offer: Book: 0, Hat: 2, Basketball: 1\n" +InformationStateString(1) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 4, Hat: 1, Basketball: 3\nAgreement reached? 0\nP0 offers: Offer: Book: 0, Hat: 0, Basketball: 1\nP1 offers: Offer: Book: 0, Hat: 2, Basketball: 1\n" +InformationStateTensor(0): binvec(309, 0x40181e181fc3006008080c080e0c0000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(309, 0x40181e181f03007808080c080e0c0000000000000000000000000000000000000000000000000) +ObservationString(0) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 6, Hat: 1, Basketball: 1\nAgreement reached? 0\nNumber of offers: 2\nP1 offers: Offer: Book: 0, Hat: 2, Basketball: 1\n" +ObservationString(1) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 4, Hat: 1, Basketball: 3\nAgreement reached? 0\nNumber of offers: 2\nP1 offers: Offer: Book: 0, Hat: 2, Basketball: 1\n" +ObservationTensor(0): ◯◯◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯◉◉◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◉◉◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯◉◉◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 8, 9, 15, 16, 21, 22, 36, 37, 43, 44, 49, 50, 54, 55, 120] +StringLegalActions() = ["Offer: Book: 0, Hat: 0, Basketball: 0", "Offer: Book: 1, Hat: 0, Basketball: 0", "Offer: Book: 0, Hat: 1, Basketball: 0", "Offer: Book: 1, Hat: 1, Basketball: 0", "Offer: Book: 0, Hat: 2, Basketball: 0", "Offer: Book: 1, Hat: 2, Basketball: 0", "Offer: Book: 0, Hat: 3, Basketball: 0", "Offer: Book: 1, Hat: 3, Basketball: 0", "Offer: Book: 0, Hat: 0, Basketball: 1", "Offer: Book: 1, Hat: 0, Basketball: 1", "Offer: Book: 0, Hat: 1, Basketball: 1", "Offer: Book: 1, Hat: 1, Basketball: 1", "Offer: Book: 0, Hat: 2, Basketball: 1", "Offer: Book: 1, Hat: 2, Basketball: 1", "Offer: Book: 0, Hat: 3, Basketball: 1", "Offer: Book: 1, Hat: 3, Basketball: 1", "Agree"] + +# Apply action "Offer: Book: 0, Hat: 2, Basketball: 1" +action: 49 + +# State 4 +# Pool: Book: 1, Hat: 3, Basketball: 1 +# P0 vals: Book: 6, Hat: 1, Basketball: 1 +# P1 vals: Book: 4, Hat: 1, Basketball: 3 +# Agreement reached? 0 +# P0 offers: Offer: Book: 0, Hat: 0, Basketball: 1 +# P1 offers: Offer: Book: 0, Hat: 2, Basketball: 1 +# P0 offers: Offer: Book: 0, Hat: 2, Basketball: 1 +IsTerminal() = False +History() = [19, 36, 49, 49] +HistoryString() = "19, 36, 49, 49" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 6, Hat: 1, Basketball: 1\nAgreement reached? 0\nP0 offers: Offer: Book: 0, Hat: 0, Basketball: 1\nP1 offers: Offer: Book: 0, Hat: 2, Basketball: 1\nP0 offers: Offer: Book: 0, Hat: 2, Basketball: 1\n" +InformationStateString(1) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 4, Hat: 1, Basketball: 3\nAgreement reached? 0\nP0 offers: Offer: Book: 0, Hat: 0, Basketball: 1\nP1 offers: Offer: Book: 0, Hat: 2, Basketball: 1\nP0 offers: Offer: Book: 0, Hat: 2, Basketball: 1\n" +InformationStateTensor(0): binvec(309, 0x20181e181fc3006008080c080e0c080e0c0000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(309, 0x20181e181f03007808080c080e0c080e0c0000000000000000000000000000000000000000000) +ObservationString(0) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 6, Hat: 1, Basketball: 1\nAgreement reached? 0\nNumber of offers: 3\nP0 offers: Offer: Book: 0, Hat: 2, Basketball: 1\n" +ObservationString(1) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 4, Hat: 1, Basketball: 3\nAgreement reached? 0\nNumber of offers: 3\nP0 offers: Offer: Book: 0, Hat: 2, Basketball: 1\n" +ObservationTensor(0): ◯◯◯◉◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯◉◉◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◉◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◉◉◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯◉◉◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 8, 9, 15, 16, 21, 22, 36, 37, 43, 44, 49, 50, 54, 55, 120] +StringLegalActions() = ["Offer: Book: 0, Hat: 0, Basketball: 0", "Offer: Book: 1, Hat: 0, Basketball: 0", "Offer: Book: 0, Hat: 1, Basketball: 0", "Offer: Book: 1, Hat: 1, Basketball: 0", "Offer: Book: 0, Hat: 2, Basketball: 0", "Offer: Book: 1, Hat: 2, Basketball: 0", "Offer: Book: 0, Hat: 3, Basketball: 0", "Offer: Book: 1, Hat: 3, Basketball: 0", "Offer: Book: 0, Hat: 0, Basketball: 1", "Offer: Book: 1, Hat: 0, Basketball: 1", "Offer: Book: 0, Hat: 1, Basketball: 1", "Offer: Book: 1, Hat: 1, Basketball: 1", "Offer: Book: 0, Hat: 2, Basketball: 1", "Offer: Book: 1, Hat: 2, Basketball: 1", "Offer: Book: 0, Hat: 3, Basketball: 1", "Offer: Book: 1, Hat: 3, Basketball: 1", "Agree"] + +# Apply action "Offer: Book: 1, Hat: 2, Basketball: 0" +action: 16 + +# State 5 +# Pool: Book: 1, Hat: 3, Basketball: 1 +# P0 vals: Book: 6, Hat: 1, Basketball: 1 +# P1 vals: Book: 4, Hat: 1, Basketball: 3 +# Agreement reached? 0 +# P0 offers: Offer: Book: 0, Hat: 0, Basketball: 1 +# P1 offers: Offer: Book: 0, Hat: 2, Basketball: 1 +# P0 offers: Offer: Book: 0, Hat: 2, Basketball: 1 +# P1 offers: Offer: Book: 1, Hat: 2, Basketball: 0 +IsTerminal() = False +History() = [19, 36, 49, 49, 16] +HistoryString() = "19, 36, 49, 49, 16" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 6, Hat: 1, Basketball: 1\nAgreement reached? 0\nP0 offers: Offer: Book: 0, Hat: 0, Basketball: 1\nP1 offers: Offer: Book: 0, Hat: 2, Basketball: 1\nP0 offers: Offer: Book: 0, Hat: 2, Basketball: 1\nP1 offers: Offer: Book: 1, Hat: 2, Basketball: 0\n" +InformationStateString(1) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 4, Hat: 1, Basketball: 3\nAgreement reached? 0\nP0 offers: Offer: Book: 0, Hat: 0, Basketball: 1\nP1 offers: Offer: Book: 0, Hat: 2, Basketball: 1\nP0 offers: Offer: Book: 0, Hat: 2, Basketball: 1\nP1 offers: Offer: Book: 1, Hat: 2, Basketball: 0\n" +InformationStateTensor(0): binvec(309, 0x10181e181fc3006008080c080e0c080e0c0c0e080000000000000000000000000000000000000) +InformationStateTensor(1): binvec(309, 0x10181e181f03007808080c080e0c080e0c0c0e080000000000000000000000000000000000000) +ObservationString(0) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 6, Hat: 1, Basketball: 1\nAgreement reached? 0\nNumber of offers: 4\nP1 offers: Offer: Book: 1, Hat: 2, Basketball: 0\n" +ObservationString(1) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 4, Hat: 1, Basketball: 3\nAgreement reached? 0\nNumber of offers: 4\nP1 offers: Offer: Book: 1, Hat: 2, Basketball: 0\n" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◯◯◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◉◉◉◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◯◯◯◯◯◉◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 8, 9, 15, 16, 21, 22, 36, 37, 43, 44, 49, 50, 54, 55, 120] +StringLegalActions() = ["Offer: Book: 0, Hat: 0, Basketball: 0", "Offer: Book: 1, Hat: 0, Basketball: 0", "Offer: Book: 0, Hat: 1, Basketball: 0", "Offer: Book: 1, Hat: 1, Basketball: 0", "Offer: Book: 0, Hat: 2, Basketball: 0", "Offer: Book: 1, Hat: 2, Basketball: 0", "Offer: Book: 0, Hat: 3, Basketball: 0", "Offer: Book: 1, Hat: 3, Basketball: 0", "Offer: Book: 0, Hat: 0, Basketball: 1", "Offer: Book: 1, Hat: 0, Basketball: 1", "Offer: Book: 0, Hat: 1, Basketball: 1", "Offer: Book: 1, Hat: 1, Basketball: 1", "Offer: Book: 0, Hat: 2, Basketball: 1", "Offer: Book: 1, Hat: 2, Basketball: 1", "Offer: Book: 0, Hat: 3, Basketball: 1", "Offer: Book: 1, Hat: 3, Basketball: 1", "Agree"] + +# Apply action "Offer: Book: 1, Hat: 0, Basketball: 1" +action: 37 + +# State 6 +# Pool: Book: 1, Hat: 3, Basketball: 1 +# P0 vals: Book: 6, Hat: 1, Basketball: 1 +# P1 vals: Book: 4, Hat: 1, Basketball: 3 +# Agreement reached? 0 +# P0 offers: Offer: Book: 0, Hat: 0, Basketball: 1 +# P1 offers: Offer: Book: 0, Hat: 2, Basketball: 1 +# P0 offers: Offer: Book: 0, Hat: 2, Basketball: 1 +# P1 offers: Offer: Book: 1, Hat: 2, Basketball: 0 +# P0 offers: Offer: Book: 1, Hat: 0, Basketball: 1 +IsTerminal() = False +History() = [19, 36, 49, 49, 16, 37] +HistoryString() = "19, 36, 49, 49, 16, 37" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 6, Hat: 1, Basketball: 1\nAgreement reached? 0\nP0 offers: Offer: Book: 0, Hat: 0, Basketball: 1\nP1 offers: Offer: Book: 0, Hat: 2, Basketball: 1\nP0 offers: Offer: Book: 0, Hat: 2, Basketball: 1\nP1 offers: Offer: Book: 1, Hat: 2, Basketball: 0\nP0 offers: Offer: Book: 1, Hat: 0, Basketball: 1\n" +InformationStateString(1) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 4, Hat: 1, Basketball: 3\nAgreement reached? 0\nP0 offers: Offer: Book: 0, Hat: 0, Basketball: 1\nP1 offers: Offer: Book: 0, Hat: 2, Basketball: 1\nP0 offers: Offer: Book: 0, Hat: 2, Basketball: 1\nP1 offers: Offer: Book: 1, Hat: 2, Basketball: 0\nP0 offers: Offer: Book: 1, Hat: 0, Basketball: 1\n" +InformationStateTensor(0): binvec(309, 0x8181e181fc3006008080c080e0c080e0c0c0e080c080c0000000000000000000000000000000) +InformationStateTensor(1): binvec(309, 0x8181e181f03007808080c080e0c080e0c0c0e080c080c0000000000000000000000000000000) +ObservationString(0) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 6, Hat: 1, Basketball: 1\nAgreement reached? 0\nNumber of offers: 5\nP0 offers: Offer: Book: 1, Hat: 0, Basketball: 1\n" +ObservationString(1) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 4, Hat: 1, Basketball: 3\nAgreement reached? 0\nNumber of offers: 5\nP0 offers: Offer: Book: 1, Hat: 0, Basketball: 1\n" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◉◉◉◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 8, 9, 15, 16, 21, 22, 36, 37, 43, 44, 49, 50, 54, 55, 120] +StringLegalActions() = ["Offer: Book: 0, Hat: 0, Basketball: 0", "Offer: Book: 1, Hat: 0, Basketball: 0", "Offer: Book: 0, Hat: 1, Basketball: 0", "Offer: Book: 1, Hat: 1, Basketball: 0", "Offer: Book: 0, Hat: 2, Basketball: 0", "Offer: Book: 1, Hat: 2, Basketball: 0", "Offer: Book: 0, Hat: 3, Basketball: 0", "Offer: Book: 1, Hat: 3, Basketball: 0", "Offer: Book: 0, Hat: 0, Basketball: 1", "Offer: Book: 1, Hat: 0, Basketball: 1", "Offer: Book: 0, Hat: 1, Basketball: 1", "Offer: Book: 1, Hat: 1, Basketball: 1", "Offer: Book: 0, Hat: 2, Basketball: 1", "Offer: Book: 1, Hat: 2, Basketball: 1", "Offer: Book: 0, Hat: 3, Basketball: 1", "Offer: Book: 1, Hat: 3, Basketball: 1", "Agree"] + +# Apply action "Agree" +action: 120 + +# State 7 +# Pool: Book: 1, Hat: 3, Basketball: 1 +# P0 vals: Book: 6, Hat: 1, Basketball: 1 +# P1 vals: Book: 4, Hat: 1, Basketball: 3 +# Agreement reached? 1 +# P0 offers: Offer: Book: 0, Hat: 0, Basketball: 1 +# P1 offers: Offer: Book: 0, Hat: 2, Basketball: 1 +# P0 offers: Offer: Book: 0, Hat: 2, Basketball: 1 +# P1 offers: Offer: Book: 1, Hat: 2, Basketball: 0 +# P0 offers: Offer: Book: 1, Hat: 0, Basketball: 1 +IsTerminal() = True +History() = [19, 36, 49, 49, 16, 37, 120] +HistoryString() = "19, 36, 49, 49, 16, 37, 120" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 6, Hat: 1, Basketball: 1\nAgreement reached? 1\nP0 offers: Offer: Book: 0, Hat: 0, Basketball: 1\nP1 offers: Offer: Book: 0, Hat: 2, Basketball: 1\nP0 offers: Offer: Book: 0, Hat: 2, Basketball: 1\nP1 offers: Offer: Book: 1, Hat: 2, Basketball: 0\nP0 offers: Offer: Book: 1, Hat: 0, Basketball: 1\n" +InformationStateString(1) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 4, Hat: 1, Basketball: 3\nAgreement reached? 1\nP0 offers: Offer: Book: 0, Hat: 0, Basketball: 1\nP1 offers: Offer: Book: 0, Hat: 2, Basketball: 1\nP0 offers: Offer: Book: 0, Hat: 2, Basketball: 1\nP1 offers: Offer: Book: 1, Hat: 2, Basketball: 0\nP0 offers: Offer: Book: 1, Hat: 0, Basketball: 1\n" +InformationStateTensor(0): binvec(309, 0x108181e181fc3006008080c080e0c080e0c0c0e080c080c0000000000000000000000000000000) +InformationStateTensor(1): binvec(309, 0x108181e181f03007808080c080e0c080e0c0c0e080c080c0000000000000000000000000000000) +ObservationString(0) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 6, Hat: 1, Basketball: 1\nAgreement reached? 1\nNumber of offers: 5\nP0 offers: Offer: Book: 1, Hat: 0, Basketball: 1\n" +ObservationString(1) = "Pool: Book: 1, Hat: 3, Basketball: 1\nMy values: Book: 4, Hat: 1, Basketball: 3\nAgreement reached? 1\nNumber of offers: 5\nP0 offers: Offer: Book: 1, Hat: 0, Basketball: 1\n" +ObservationTensor(0): ◉◯◯◯◯◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◉◉◉◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◉◉◉◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +Rewards() = [7, 3] +Returns() = [7, 3] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/battleship.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/battleship.txt new file mode 100644 index 0000000..72be1ae --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/battleship.txt @@ -0,0 +1,1252 @@ +game: battleship + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Battleship" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["allow_repeated_shots", "board_height", "board_width", "loss_multiplier", "num_shots", "ship_sizes", "ship_values"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "battleship" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 300 +PolicyTensorShape() = [300] +MaxChanceOutcomes() = 0 +GetParameters() = {allow_repeated_shots=True,board_height=10,board_width=10,loss_multiplier=1.0,num_shots=50,ship_sizes=[2;3;3;4;5],ship_values=[1.0;1.0;1.0;1.0;1.0]} +NumPlayers() = 2 +MinUtility() = -5.0 +MaxUtility() = 5.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [2615] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 2615 +MaxGameLength() = 110 +ToString() = "battleship()" + +# State 0 +# Player 0's board: +# +----------+ +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# +----------+ +# +# Player 1's board: +# +----------+ +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# +----------+ +# +# Full history: +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "T=0 " +InformationStateString(1) = "T=0 " +InformationStateTensor(0): binvec(2615, 0x280000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(2615, 0x180000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "State of player's ships:\n+----------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n" +ObservationString(1) = "State of player's ships:\n+----------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [100, 101, 102, 103, 104, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118, 120, 121, 122, 123, 124, 125, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 140, 141, 142, 143, 144, 145, 146, 147, 148, 150, 151, 152, 153, 154, 155, 156, 157, 158, 160, 161, 162, 163, 164, 165, 166, 167, 168, 170, 171, 172, 173, 174, 175, 176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, 192, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289] +StringLegalActions() = ["Pl0: place ship horizontally with top-left corner in (0, 0)", "Pl0: place ship horizontally with top-left corner in (0, 1)", "Pl0: place ship horizontally with top-left corner in (0, 2)", "Pl0: place ship horizontally with top-left corner in (0, 3)", "Pl0: place ship horizontally with top-left corner in (0, 4)", "Pl0: place ship horizontally with top-left corner in (0, 5)", "Pl0: place ship horizontally with top-left corner in (0, 6)", "Pl0: place ship horizontally with top-left corner in (0, 7)", "Pl0: place ship horizontally with top-left corner in (0, 8)", "Pl0: place ship horizontally with top-left corner in (1, 0)", "Pl0: place ship horizontally with top-left corner in (1, 1)", "Pl0: place ship horizontally with top-left corner in (1, 2)", "Pl0: place ship horizontally with top-left corner in (1, 3)", "Pl0: place ship horizontally with top-left corner in (1, 4)", "Pl0: place ship horizontally with top-left corner in (1, 5)", "Pl0: place ship horizontally with top-left corner in (1, 6)", "Pl0: place ship horizontally with top-left corner in (1, 7)", "Pl0: place ship horizontally with top-left corner in (1, 8)", "Pl0: place ship horizontally with top-left corner in (2, 0)", "Pl0: place ship horizontally with top-left corner in (2, 1)", "Pl0: place ship horizontally with top-left corner in (2, 2)", "Pl0: place ship horizontally with top-left corner in (2, 3)", "Pl0: place ship horizontally with top-left corner in (2, 4)", "Pl0: place ship horizontally with top-left corner in (2, 5)", "Pl0: place ship horizontally with top-left corner in (2, 6)", "Pl0: place ship horizontally with top-left corner in (2, 7)", "Pl0: place ship horizontally with top-left corner in (2, 8)", "Pl0: place ship horizontally with top-left corner in (3, 0)", "Pl0: place ship horizontally with top-left corner in (3, 1)", "Pl0: place ship horizontally with top-left corner in (3, 2)", "Pl0: place ship horizontally with top-left corner in (3, 3)", "Pl0: place ship horizontally with top-left corner in (3, 4)", "Pl0: place ship horizontally with top-left corner in (3, 5)", "Pl0: place ship horizontally with top-left corner in (3, 6)", "Pl0: place ship horizontally with top-left corner in (3, 7)", "Pl0: place ship horizontally with top-left corner in (3, 8)", "Pl0: place ship horizontally with top-left corner in (4, 0)", "Pl0: place ship horizontally with top-left corner in (4, 1)", "Pl0: place ship horizontally with top-left corner in (4, 2)", "Pl0: place ship horizontally with top-left corner in (4, 3)", "Pl0: place ship horizontally with top-left corner in (4, 4)", "Pl0: place ship horizontally with top-left corner in (4, 5)", "Pl0: place ship horizontally with top-left corner in (4, 6)", "Pl0: place ship horizontally with top-left corner in (4, 7)", "Pl0: place ship horizontally with top-left corner in (4, 8)", "Pl0: place ship horizontally with top-left corner in (5, 0)", "Pl0: place ship horizontally with top-left corner in (5, 1)", "Pl0: place ship horizontally with top-left corner in (5, 2)", "Pl0: place ship horizontally with top-left corner in (5, 3)", "Pl0: place ship horizontally with top-left corner in (5, 4)", "Pl0: place ship horizontally with top-left corner in (5, 5)", "Pl0: place ship horizontally with top-left corner in (5, 6)", "Pl0: place ship horizontally with top-left corner in (5, 7)", "Pl0: place ship horizontally with top-left corner in (5, 8)", "Pl0: place ship horizontally with top-left corner in (6, 0)", "Pl0: place ship horizontally with top-left corner in (6, 1)", "Pl0: place ship horizontally with top-left corner in (6, 2)", "Pl0: place ship horizontally with top-left corner in (6, 3)", "Pl0: place ship horizontally with top-left corner in (6, 4)", "Pl0: place ship horizontally with top-left corner in (6, 5)", "Pl0: place ship horizontally with top-left corner in (6, 6)", "Pl0: place ship horizontally with top-left corner in (6, 7)", "Pl0: place ship horizontally with top-left corner in (6, 8)", "Pl0: place ship horizontally with top-left corner in (7, 0)", "Pl0: place ship horizontally with top-left corner in (7, 1)", "Pl0: place ship horizontally with top-left corner in (7, 2)", "Pl0: place ship horizontally with top-left corner in (7, 3)", "Pl0: place ship horizontally with top-left corner in (7, 4)", "Pl0: place ship horizontally with top-left corner in (7, 5)", "Pl0: place ship horizontally with top-left corner in (7, 6)", "Pl0: place ship horizontally with top-left corner in (7, 7)", "Pl0: place ship horizontally with top-left corner in (7, 8)", "Pl0: place ship horizontally with top-left corner in (8, 0)", "Pl0: place ship horizontally with top-left corner in (8, 1)", "Pl0: place ship horizontally with top-left corner in (8, 2)", "Pl0: place ship horizontally with top-left corner in (8, 3)", "Pl0: place ship horizontally with top-left corner in (8, 4)", "Pl0: place ship horizontally with top-left corner in (8, 5)", "Pl0: place ship horizontally with top-left corner in (8, 6)", "Pl0: place ship horizontally with top-left corner in (8, 7)", "Pl0: place ship horizontally with top-left corner in (8, 8)", "Pl0: place ship horizontally with top-left corner in (9, 0)", "Pl0: place ship horizontally with top-left corner in (9, 1)", "Pl0: place ship horizontally with top-left corner in (9, 2)", "Pl0: place ship horizontally with top-left corner in (9, 3)", "Pl0: place ship horizontally with top-left corner in (9, 4)", "Pl0: place ship horizontally with top-left corner in (9, 5)", "Pl0: place ship horizontally with top-left corner in (9, 6)", "Pl0: place ship horizontally with top-left corner in (9, 7)", "Pl0: place ship horizontally with top-left corner in (9, 8)", "Pl0: place ship vertically with top-left corner in (0, 0)", "Pl0: place ship vertically with top-left corner in (0, 1)", "Pl0: place ship vertically with top-left corner in (0, 2)", "Pl0: place ship vertically with top-left corner in (0, 3)", "Pl0: place ship vertically with top-left corner in (0, 4)", "Pl0: place ship vertically with top-left corner in (0, 5)", "Pl0: place ship vertically with top-left corner in (0, 6)", "Pl0: place ship vertically with top-left corner in (0, 7)", "Pl0: place ship vertically with top-left corner in (0, 8)", "Pl0: place ship vertically with top-left corner in (0, 9)", "Pl0: place ship vertically with top-left corner in (1, 0)", "Pl0: place ship vertically with top-left corner in (1, 1)", "Pl0: place ship vertically with top-left corner in (1, 2)", "Pl0: place ship vertically with top-left corner in (1, 3)", "Pl0: place ship vertically with top-left corner in (1, 4)", "Pl0: place ship vertically with top-left corner in (1, 5)", "Pl0: place ship vertically with top-left corner in (1, 6)", "Pl0: place ship vertically with top-left corner in (1, 7)", "Pl0: place ship vertically with top-left corner in (1, 8)", "Pl0: place ship vertically with top-left corner in (1, 9)", "Pl0: place ship vertically with top-left corner in (2, 0)", "Pl0: place ship vertically with top-left corner in (2, 1)", "Pl0: place ship vertically with top-left corner in (2, 2)", "Pl0: place ship vertically with top-left corner in (2, 3)", "Pl0: place ship vertically with top-left corner in (2, 4)", "Pl0: place ship vertically with top-left corner in (2, 5)", "Pl0: place ship vertically with top-left corner in (2, 6)", "Pl0: place ship vertically with top-left corner in (2, 7)", "Pl0: place ship vertically with top-left corner in (2, 8)", "Pl0: place ship vertically with top-left corner in (2, 9)", "Pl0: place ship vertically with top-left corner in (3, 0)", "Pl0: place ship vertically with top-left corner in (3, 1)", "Pl0: place ship vertically with top-left corner in (3, 2)", "Pl0: place ship vertically with top-left corner in (3, 3)", "Pl0: place ship vertically with top-left corner in (3, 4)", "Pl0: place ship vertically with top-left corner in (3, 5)", "Pl0: place ship vertically with top-left corner in (3, 6)", "Pl0: place ship vertically with top-left corner in (3, 7)", "Pl0: place ship vertically with top-left corner in (3, 8)", "Pl0: place ship vertically with top-left corner in (3, 9)", "Pl0: place ship vertically with top-left corner in (4, 0)", "Pl0: place ship vertically with top-left corner in (4, 1)", "Pl0: place ship vertically with top-left corner in (4, 2)", "Pl0: place ship vertically with top-left corner in (4, 3)", "Pl0: place ship vertically with top-left corner in (4, 4)", "Pl0: place ship vertically with top-left corner in (4, 5)", "Pl0: place ship vertically with top-left corner in (4, 6)", "Pl0: place ship vertically with top-left corner in (4, 7)", "Pl0: place ship vertically with top-left corner in (4, 8)", "Pl0: place ship vertically with top-left corner in (4, 9)", "Pl0: place ship vertically with top-left corner in (5, 0)", "Pl0: place ship vertically with top-left corner in (5, 1)", "Pl0: place ship vertically with top-left corner in (5, 2)", "Pl0: place ship vertically with top-left corner in (5, 3)", "Pl0: place ship vertically with top-left corner in (5, 4)", "Pl0: place ship vertically with top-left corner in (5, 5)", "Pl0: place ship vertically with top-left corner in (5, 6)", "Pl0: place ship vertically with top-left corner in (5, 7)", "Pl0: place ship vertically with top-left corner in (5, 8)", "Pl0: place ship vertically with top-left corner in (5, 9)", "Pl0: place ship vertically with top-left corner in (6, 0)", "Pl0: place ship vertically with top-left corner in (6, 1)", "Pl0: place ship vertically with top-left corner in (6, 2)", "Pl0: place ship vertically with top-left corner in (6, 3)", "Pl0: place ship vertically with top-left corner in (6, 4)", "Pl0: place ship vertically with top-left corner in (6, 5)", "Pl0: place ship vertically with top-left corner in (6, 6)", "Pl0: place ship vertically with top-left corner in (6, 7)", "Pl0: place ship vertically with top-left corner in (6, 8)", "Pl0: place ship vertically with top-left corner in (6, 9)", "Pl0: place ship vertically with top-left corner in (7, 0)", "Pl0: place ship vertically with top-left corner in (7, 1)", "Pl0: place ship vertically with top-left corner in (7, 2)", "Pl0: place ship vertically with top-left corner in (7, 3)", "Pl0: place ship vertically with top-left corner in (7, 4)", "Pl0: place ship vertically with top-left corner in (7, 5)", "Pl0: place ship vertically with top-left corner in (7, 6)", "Pl0: place ship vertically with top-left corner in (7, 7)", "Pl0: place ship vertically with top-left corner in (7, 8)", "Pl0: place ship vertically with top-left corner in (7, 9)", "Pl0: place ship vertically with top-left corner in (8, 0)", "Pl0: place ship vertically with top-left corner in (8, 1)", "Pl0: place ship vertically with top-left corner in (8, 2)", "Pl0: place ship vertically with top-left corner in (8, 3)", "Pl0: place ship vertically with top-left corner in (8, 4)", "Pl0: place ship vertically with top-left corner in (8, 5)", "Pl0: place ship vertically with top-left corner in (8, 6)", "Pl0: place ship vertically with top-left corner in (8, 7)", "Pl0: place ship vertically with top-left corner in (8, 8)", "Pl0: place ship vertically with top-left corner in (8, 9)"] + +# Apply action "Pl0: place ship vertically with top-left corner in (2, 4)" +action: 224 + +# State 1 +# Player 0's board: +# +----------+ +# | | +# | | +# | a | +# | a | +# | | +# | | +# | | +# | | +# | | +# | | +# +----------+ +# +# Player 1's board: +# +----------+ +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# +----------+ +# +# Full history: /0:v_2_4 +IsTerminal() = False +History() = [224] +HistoryString() = "224" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "T=1 /v_2_4" +InformationStateString(1) = "T=1 " +InformationStateTensor(0): binvec(2615, 0x252002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(2615, 0x140000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "State of player's ships:\n+----------+\n| |\n| |\n| a |\n| a |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n" +ObservationString(1) = "State of player's ships:\n+----------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [100, 101, 102, 103, 104, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118, 120, 121, 122, 123, 124, 125, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 140, 141, 142, 143, 144, 145, 146, 147, 148, 150, 151, 152, 153, 154, 155, 156, 157, 158, 160, 161, 162, 163, 164, 165, 166, 167, 168, 170, 171, 172, 173, 174, 175, 176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, 192, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289] +StringLegalActions() = ["Pl1: place ship horizontally with top-left corner in (0, 0)", "Pl1: place ship horizontally with top-left corner in (0, 1)", "Pl1: place ship horizontally with top-left corner in (0, 2)", "Pl1: place ship horizontally with top-left corner in (0, 3)", "Pl1: place ship horizontally with top-left corner in (0, 4)", "Pl1: place ship horizontally with top-left corner in (0, 5)", "Pl1: place ship horizontally with top-left corner in (0, 6)", "Pl1: place ship horizontally with top-left corner in (0, 7)", "Pl1: place ship horizontally with top-left corner in (0, 8)", "Pl1: place ship horizontally with top-left corner in (1, 0)", "Pl1: place ship horizontally with top-left corner in (1, 1)", "Pl1: place ship horizontally with top-left corner in (1, 2)", "Pl1: place ship horizontally with top-left corner in (1, 3)", "Pl1: place ship horizontally with top-left corner in (1, 4)", "Pl1: place ship horizontally with top-left corner in (1, 5)", "Pl1: place ship horizontally with top-left corner in (1, 6)", "Pl1: place ship horizontally with top-left corner in (1, 7)", "Pl1: place ship horizontally with top-left corner in (1, 8)", "Pl1: place ship horizontally with top-left corner in (2, 0)", "Pl1: place ship horizontally with top-left corner in (2, 1)", "Pl1: place ship horizontally with top-left corner in (2, 2)", "Pl1: place ship horizontally with top-left corner in (2, 3)", "Pl1: place ship horizontally with top-left corner in (2, 4)", "Pl1: place ship horizontally with top-left corner in (2, 5)", "Pl1: place ship horizontally with top-left corner in (2, 6)", "Pl1: place ship horizontally with top-left corner in (2, 7)", "Pl1: place ship horizontally with top-left corner in (2, 8)", "Pl1: place ship horizontally with top-left corner in (3, 0)", "Pl1: place ship horizontally with top-left corner in (3, 1)", "Pl1: place ship horizontally with top-left corner in (3, 2)", "Pl1: place ship horizontally with top-left corner in (3, 3)", "Pl1: place ship horizontally with top-left corner in (3, 4)", "Pl1: place ship horizontally with top-left corner in (3, 5)", "Pl1: place ship horizontally with top-left corner in (3, 6)", "Pl1: place ship horizontally with top-left corner in (3, 7)", "Pl1: place ship horizontally with top-left corner in (3, 8)", "Pl1: place ship horizontally with top-left corner in (4, 0)", "Pl1: place ship horizontally with top-left corner in (4, 1)", "Pl1: place ship horizontally with top-left corner in (4, 2)", "Pl1: place ship horizontally with top-left corner in (4, 3)", "Pl1: place ship horizontally with top-left corner in (4, 4)", "Pl1: place ship horizontally with top-left corner in (4, 5)", "Pl1: place ship horizontally with top-left corner in (4, 6)", "Pl1: place ship horizontally with top-left corner in (4, 7)", "Pl1: place ship horizontally with top-left corner in (4, 8)", "Pl1: place ship horizontally with top-left corner in (5, 0)", "Pl1: place ship horizontally with top-left corner in (5, 1)", "Pl1: place ship horizontally with top-left corner in (5, 2)", "Pl1: place ship horizontally with top-left corner in (5, 3)", "Pl1: place ship horizontally with top-left corner in (5, 4)", "Pl1: place ship horizontally with top-left corner in (5, 5)", "Pl1: place ship horizontally with top-left corner in (5, 6)", "Pl1: place ship horizontally with top-left corner in (5, 7)", "Pl1: place ship horizontally with top-left corner in (5, 8)", "Pl1: place ship horizontally with top-left corner in (6, 0)", "Pl1: place ship horizontally with top-left corner in (6, 1)", "Pl1: place ship horizontally with top-left corner in (6, 2)", "Pl1: place ship horizontally with top-left corner in (6, 3)", "Pl1: place ship horizontally with top-left corner in (6, 4)", "Pl1: place ship horizontally with top-left corner in (6, 5)", "Pl1: place ship horizontally with top-left corner in (6, 6)", "Pl1: place ship horizontally with top-left corner in (6, 7)", "Pl1: place ship horizontally with top-left corner in (6, 8)", "Pl1: place ship horizontally with top-left corner in (7, 0)", "Pl1: place ship horizontally with top-left corner in (7, 1)", "Pl1: place ship horizontally with top-left corner in (7, 2)", "Pl1: place ship horizontally with top-left corner in (7, 3)", "Pl1: place ship horizontally with top-left corner in (7, 4)", "Pl1: place ship horizontally with top-left corner in (7, 5)", "Pl1: place ship horizontally with top-left corner in (7, 6)", "Pl1: place ship horizontally with top-left corner in (7, 7)", "Pl1: place ship horizontally with top-left corner in (7, 8)", "Pl1: place ship horizontally with top-left corner in (8, 0)", "Pl1: place ship horizontally with top-left corner in (8, 1)", "Pl1: place ship horizontally with top-left corner in (8, 2)", "Pl1: place ship horizontally with top-left corner in (8, 3)", "Pl1: place ship horizontally with top-left corner in (8, 4)", "Pl1: place ship horizontally with top-left corner in (8, 5)", "Pl1: place ship horizontally with top-left corner in (8, 6)", "Pl1: place ship horizontally with top-left corner in (8, 7)", "Pl1: place ship horizontally with top-left corner in (8, 8)", "Pl1: place ship horizontally with top-left corner in (9, 0)", "Pl1: place ship horizontally with top-left corner in (9, 1)", "Pl1: place ship horizontally with top-left corner in (9, 2)", "Pl1: place ship horizontally with top-left corner in (9, 3)", "Pl1: place ship horizontally with top-left corner in (9, 4)", "Pl1: place ship horizontally with top-left corner in (9, 5)", "Pl1: place ship horizontally with top-left corner in (9, 6)", "Pl1: place ship horizontally with top-left corner in (9, 7)", "Pl1: place ship horizontally with top-left corner in (9, 8)", "Pl1: place ship vertically with top-left corner in (0, 0)", "Pl1: place ship vertically with top-left corner in (0, 1)", "Pl1: place ship vertically with top-left corner in (0, 2)", "Pl1: place ship vertically with top-left corner in (0, 3)", "Pl1: place ship vertically with top-left corner in (0, 4)", "Pl1: place ship vertically with top-left corner in (0, 5)", "Pl1: place ship vertically with top-left corner in (0, 6)", "Pl1: place ship vertically with top-left corner in (0, 7)", "Pl1: place ship vertically with top-left corner in (0, 8)", "Pl1: place ship vertically with top-left corner in (0, 9)", "Pl1: place ship vertically with top-left corner in (1, 0)", "Pl1: place ship vertically with top-left corner in (1, 1)", "Pl1: place ship vertically with top-left corner in (1, 2)", "Pl1: place ship vertically with top-left corner in (1, 3)", "Pl1: place ship vertically with top-left corner in (1, 4)", "Pl1: place ship vertically with top-left corner in (1, 5)", "Pl1: place ship vertically with top-left corner in (1, 6)", "Pl1: place ship vertically with top-left corner in (1, 7)", "Pl1: place ship vertically with top-left corner in (1, 8)", "Pl1: place ship vertically with top-left corner in (1, 9)", "Pl1: place ship vertically with top-left corner in (2, 0)", "Pl1: place ship vertically with top-left corner in (2, 1)", "Pl1: place ship vertically with top-left corner in (2, 2)", "Pl1: place ship vertically with top-left corner in (2, 3)", "Pl1: place ship vertically with top-left corner in (2, 4)", "Pl1: place ship vertically with top-left corner in (2, 5)", "Pl1: place ship vertically with top-left corner in (2, 6)", "Pl1: place ship vertically with top-left corner in (2, 7)", "Pl1: place ship vertically with top-left corner in (2, 8)", "Pl1: place ship vertically with top-left corner in (2, 9)", "Pl1: place ship vertically with top-left corner in (3, 0)", "Pl1: place ship vertically with top-left corner in (3, 1)", "Pl1: place ship vertically with top-left corner in (3, 2)", "Pl1: place ship vertically with top-left corner in (3, 3)", "Pl1: place ship vertically with top-left corner in (3, 4)", "Pl1: place ship vertically with top-left corner in (3, 5)", "Pl1: place ship vertically with top-left corner in (3, 6)", "Pl1: place ship vertically with top-left corner in (3, 7)", "Pl1: place ship vertically with top-left corner in (3, 8)", "Pl1: place ship vertically with top-left corner in (3, 9)", "Pl1: place ship vertically with top-left corner in (4, 0)", "Pl1: place ship vertically with top-left corner in (4, 1)", "Pl1: place ship vertically with top-left corner in (4, 2)", "Pl1: place ship vertically with top-left corner in (4, 3)", "Pl1: place ship vertically with top-left corner in (4, 4)", "Pl1: place ship vertically with top-left corner in (4, 5)", "Pl1: place ship vertically with top-left corner in (4, 6)", "Pl1: place ship vertically with top-left corner in (4, 7)", "Pl1: place ship vertically with top-left corner in (4, 8)", "Pl1: place ship vertically with top-left corner in (4, 9)", "Pl1: place ship vertically with top-left corner in (5, 0)", "Pl1: place ship vertically with top-left corner in (5, 1)", "Pl1: place ship vertically with top-left corner in (5, 2)", "Pl1: place ship vertically with top-left corner in (5, 3)", "Pl1: place ship vertically with top-left corner in (5, 4)", "Pl1: place ship vertically with top-left corner in (5, 5)", "Pl1: place ship vertically with top-left corner in (5, 6)", "Pl1: place ship vertically with top-left corner in (5, 7)", "Pl1: place ship vertically with top-left corner in (5, 8)", "Pl1: place ship vertically with top-left corner in (5, 9)", "Pl1: place ship vertically with top-left corner in (6, 0)", "Pl1: place ship vertically with top-left corner in (6, 1)", "Pl1: place ship vertically with top-left corner in (6, 2)", "Pl1: place ship vertically with top-left corner in (6, 3)", "Pl1: place ship vertically with top-left corner in (6, 4)", "Pl1: place ship vertically with top-left corner in (6, 5)", "Pl1: place ship vertically with top-left corner in (6, 6)", "Pl1: place ship vertically with top-left corner in (6, 7)", "Pl1: place ship vertically with top-left corner in (6, 8)", "Pl1: place ship vertically with top-left corner in (6, 9)", "Pl1: place ship vertically with top-left corner in (7, 0)", "Pl1: place ship vertically with top-left corner in (7, 1)", "Pl1: place ship vertically with top-left corner in (7, 2)", "Pl1: place ship vertically with top-left corner in (7, 3)", "Pl1: place ship vertically with top-left corner in (7, 4)", "Pl1: place ship vertically with top-left corner in (7, 5)", "Pl1: place ship vertically with top-left corner in (7, 6)", "Pl1: place ship vertically with top-left corner in (7, 7)", "Pl1: place ship vertically with top-left corner in (7, 8)", "Pl1: place ship vertically with top-left corner in (7, 9)", "Pl1: place ship vertically with top-left corner in (8, 0)", "Pl1: place ship vertically with top-left corner in (8, 1)", "Pl1: place ship vertically with top-left corner in (8, 2)", "Pl1: place ship vertically with top-left corner in (8, 3)", "Pl1: place ship vertically with top-left corner in (8, 4)", "Pl1: place ship vertically with top-left corner in (8, 5)", "Pl1: place ship vertically with top-left corner in (8, 6)", "Pl1: place ship vertically with top-left corner in (8, 7)", "Pl1: place ship vertically with top-left corner in (8, 8)", "Pl1: place ship vertically with top-left corner in (8, 9)"] + +# Apply action "Pl1: place ship vertically with top-left corner in (1, 3)" +action: 213 + +# State 2 +# Player 0's board: +# +----------+ +# | | +# | | +# | a | +# | a | +# | | +# | | +# | | +# | | +# | | +# | | +# +----------+ +# +# Player 1's board: +# +----------+ +# | | +# | a | +# | a | +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# +----------+ +# +# Full history: /0:v_2_4/1:v_1_3 +IsTerminal() = False +History() = [224, 213] +HistoryString() = "224, 213" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "T=2 /v_2_4" +InformationStateString(1) = "T=2 /v_1_3" +InformationStateTensor(0): binvec(2615, 0x292002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(2615, 0x194004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "State of player's ships:\n+----------+\n| |\n| |\n| a |\n| a |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n" +ObservationString(1) = "State of player's ships:\n+----------+\n| |\n| a |\n| a |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [100, 101, 102, 103, 104, 105, 106, 107, 110, 111, 112, 113, 114, 115, 116, 117, 120, 121, 125, 126, 127, 130, 131, 135, 136, 137, 140, 141, 142, 143, 144, 145, 146, 147, 150, 151, 152, 153, 154, 155, 156, 157, 160, 161, 162, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 176, 177, 180, 181, 182, 183, 184, 185, 186, 187, 190, 191, 192, 193, 194, 195, 196, 197, 200, 201, 202, 203, 205, 206, 207, 208, 209, 210, 211, 212, 213, 215, 216, 217, 218, 219, 220, 221, 222, 223, 225, 226, 227, 228, 229, 230, 231, 232, 233, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279] +StringLegalActions() = ["Pl0: place ship horizontally with top-left corner in (0, 0)", "Pl0: place ship horizontally with top-left corner in (0, 1)", "Pl0: place ship horizontally with top-left corner in (0, 2)", "Pl0: place ship horizontally with top-left corner in (0, 3)", "Pl0: place ship horizontally with top-left corner in (0, 4)", "Pl0: place ship horizontally with top-left corner in (0, 5)", "Pl0: place ship horizontally with top-left corner in (0, 6)", "Pl0: place ship horizontally with top-left corner in (0, 7)", "Pl0: place ship horizontally with top-left corner in (1, 0)", "Pl0: place ship horizontally with top-left corner in (1, 1)", "Pl0: place ship horizontally with top-left corner in (1, 2)", "Pl0: place ship horizontally with top-left corner in (1, 3)", "Pl0: place ship horizontally with top-left corner in (1, 4)", "Pl0: place ship horizontally with top-left corner in (1, 5)", "Pl0: place ship horizontally with top-left corner in (1, 6)", "Pl0: place ship horizontally with top-left corner in (1, 7)", "Pl0: place ship horizontally with top-left corner in (2, 0)", "Pl0: place ship horizontally with top-left corner in (2, 1)", "Pl0: place ship horizontally with top-left corner in (2, 5)", "Pl0: place ship horizontally with top-left corner in (2, 6)", "Pl0: place ship horizontally with top-left corner in (2, 7)", "Pl0: place ship horizontally with top-left corner in (3, 0)", "Pl0: place ship horizontally with top-left corner in (3, 1)", "Pl0: place ship horizontally with top-left corner in (3, 5)", "Pl0: place ship horizontally with top-left corner in (3, 6)", "Pl0: place ship horizontally with top-left corner in (3, 7)", "Pl0: place ship horizontally with top-left corner in (4, 0)", "Pl0: place ship horizontally with top-left corner in (4, 1)", "Pl0: place ship horizontally with top-left corner in (4, 2)", "Pl0: place ship horizontally with top-left corner in (4, 3)", "Pl0: place ship horizontally with top-left corner in (4, 4)", "Pl0: place ship horizontally with top-left corner in (4, 5)", "Pl0: place ship horizontally with top-left corner in (4, 6)", "Pl0: place ship horizontally with top-left corner in (4, 7)", "Pl0: place ship horizontally with top-left corner in (5, 0)", "Pl0: place ship horizontally with top-left corner in (5, 1)", "Pl0: place ship horizontally with top-left corner in (5, 2)", "Pl0: place ship horizontally with top-left corner in (5, 3)", "Pl0: place ship horizontally with top-left corner in (5, 4)", "Pl0: place ship horizontally with top-left corner in (5, 5)", "Pl0: place ship horizontally with top-left corner in (5, 6)", "Pl0: place ship horizontally with top-left corner in (5, 7)", "Pl0: place ship horizontally with top-left corner in (6, 0)", "Pl0: place ship horizontally with top-left corner in (6, 1)", "Pl0: place ship horizontally with top-left corner in (6, 2)", "Pl0: place ship horizontally with top-left corner in (6, 3)", "Pl0: place ship horizontally with top-left corner in (6, 4)", "Pl0: place ship horizontally with top-left corner in (6, 5)", "Pl0: place ship horizontally with top-left corner in (6, 6)", "Pl0: place ship horizontally with top-left corner in (6, 7)", "Pl0: place ship horizontally with top-left corner in (7, 0)", "Pl0: place ship horizontally with top-left corner in (7, 1)", "Pl0: place ship horizontally with top-left corner in (7, 2)", "Pl0: place ship horizontally with top-left corner in (7, 3)", "Pl0: place ship horizontally with top-left corner in (7, 4)", "Pl0: place ship horizontally with top-left corner in (7, 5)", "Pl0: place ship horizontally with top-left corner in (7, 6)", "Pl0: place ship horizontally with top-left corner in (7, 7)", "Pl0: place ship horizontally with top-left corner in (8, 0)", "Pl0: place ship horizontally with top-left corner in (8, 1)", "Pl0: place ship horizontally with top-left corner in (8, 2)", "Pl0: place ship horizontally with top-left corner in (8, 3)", "Pl0: place ship horizontally with top-left corner in (8, 4)", "Pl0: place ship horizontally with top-left corner in (8, 5)", "Pl0: place ship horizontally with top-left corner in (8, 6)", "Pl0: place ship horizontally with top-left corner in (8, 7)", "Pl0: place ship horizontally with top-left corner in (9, 0)", "Pl0: place ship horizontally with top-left corner in (9, 1)", "Pl0: place ship horizontally with top-left corner in (9, 2)", "Pl0: place ship horizontally with top-left corner in (9, 3)", "Pl0: place ship horizontally with top-left corner in (9, 4)", "Pl0: place ship horizontally with top-left corner in (9, 5)", "Pl0: place ship horizontally with top-left corner in (9, 6)", "Pl0: place ship horizontally with top-left corner in (9, 7)", "Pl0: place ship vertically with top-left corner in (0, 0)", "Pl0: place ship vertically with top-left corner in (0, 1)", "Pl0: place ship vertically with top-left corner in (0, 2)", "Pl0: place ship vertically with top-left corner in (0, 3)", "Pl0: place ship vertically with top-left corner in (0, 5)", "Pl0: place ship vertically with top-left corner in (0, 6)", "Pl0: place ship vertically with top-left corner in (0, 7)", "Pl0: place ship vertically with top-left corner in (0, 8)", "Pl0: place ship vertically with top-left corner in (0, 9)", "Pl0: place ship vertically with top-left corner in (1, 0)", "Pl0: place ship vertically with top-left corner in (1, 1)", "Pl0: place ship vertically with top-left corner in (1, 2)", "Pl0: place ship vertically with top-left corner in (1, 3)", "Pl0: place ship vertically with top-left corner in (1, 5)", "Pl0: place ship vertically with top-left corner in (1, 6)", "Pl0: place ship vertically with top-left corner in (1, 7)", "Pl0: place ship vertically with top-left corner in (1, 8)", "Pl0: place ship vertically with top-left corner in (1, 9)", "Pl0: place ship vertically with top-left corner in (2, 0)", "Pl0: place ship vertically with top-left corner in (2, 1)", "Pl0: place ship vertically with top-left corner in (2, 2)", "Pl0: place ship vertically with top-left corner in (2, 3)", "Pl0: place ship vertically with top-left corner in (2, 5)", "Pl0: place ship vertically with top-left corner in (2, 6)", "Pl0: place ship vertically with top-left corner in (2, 7)", "Pl0: place ship vertically with top-left corner in (2, 8)", "Pl0: place ship vertically with top-left corner in (2, 9)", "Pl0: place ship vertically with top-left corner in (3, 0)", "Pl0: place ship vertically with top-left corner in (3, 1)", "Pl0: place ship vertically with top-left corner in (3, 2)", "Pl0: place ship vertically with top-left corner in (3, 3)", "Pl0: place ship vertically with top-left corner in (3, 5)", "Pl0: place ship vertically with top-left corner in (3, 6)", "Pl0: place ship vertically with top-left corner in (3, 7)", "Pl0: place ship vertically with top-left corner in (3, 8)", "Pl0: place ship vertically with top-left corner in (3, 9)", "Pl0: place ship vertically with top-left corner in (4, 0)", "Pl0: place ship vertically with top-left corner in (4, 1)", "Pl0: place ship vertically with top-left corner in (4, 2)", "Pl0: place ship vertically with top-left corner in (4, 3)", "Pl0: place ship vertically with top-left corner in (4, 4)", "Pl0: place ship vertically with top-left corner in (4, 5)", "Pl0: place ship vertically with top-left corner in (4, 6)", "Pl0: place ship vertically with top-left corner in (4, 7)", "Pl0: place ship vertically with top-left corner in (4, 8)", "Pl0: place ship vertically with top-left corner in (4, 9)", "Pl0: place ship vertically with top-left corner in (5, 0)", "Pl0: place ship vertically with top-left corner in (5, 1)", "Pl0: place ship vertically with top-left corner in (5, 2)", "Pl0: place ship vertically with top-left corner in (5, 3)", "Pl0: place ship vertically with top-left corner in (5, 4)", "Pl0: place ship vertically with top-left corner in (5, 5)", "Pl0: place ship vertically with top-left corner in (5, 6)", "Pl0: place ship vertically with top-left corner in (5, 7)", "Pl0: place ship vertically with top-left corner in (5, 8)", "Pl0: place ship vertically with top-left corner in (5, 9)", "Pl0: place ship vertically with top-left corner in (6, 0)", "Pl0: place ship vertically with top-left corner in (6, 1)", "Pl0: place ship vertically with top-left corner in (6, 2)", "Pl0: place ship vertically with top-left corner in (6, 3)", "Pl0: place ship vertically with top-left corner in (6, 4)", "Pl0: place ship vertically with top-left corner in (6, 5)", "Pl0: place ship vertically with top-left corner in (6, 6)", "Pl0: place ship vertically with top-left corner in (6, 7)", "Pl0: place ship vertically with top-left corner in (6, 8)", "Pl0: place ship vertically with top-left corner in (6, 9)", "Pl0: place ship vertically with top-left corner in (7, 0)", "Pl0: place ship vertically with top-left corner in (7, 1)", "Pl0: place ship vertically with top-left corner in (7, 2)", "Pl0: place ship vertically with top-left corner in (7, 3)", "Pl0: place ship vertically with top-left corner in (7, 4)", "Pl0: place ship vertically with top-left corner in (7, 5)", "Pl0: place ship vertically with top-left corner in (7, 6)", "Pl0: place ship vertically with top-left corner in (7, 7)", "Pl0: place ship vertically with top-left corner in (7, 8)", "Pl0: place ship vertically with top-left corner in (7, 9)"] + +# Apply action "Pl0: place ship vertically with top-left corner in (4, 8)" +action: 248 + +# State 3 +# Player 0's board: +# +----------+ +# | | +# | | +# | a | +# | a | +# | b | +# | b | +# | b | +# | | +# | | +# | | +# +----------+ +# +# Player 1's board: +# +----------+ +# | | +# | a | +# | a | +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# +----------+ +# +# Full history: /0:v_2_4/1:v_1_3/0:v_4_8 +IsTerminal() = False +History() = [224, 213, 248] +HistoryString() = "224, 213, 248" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "T=3 /v_2_4/v_4_8" +InformationStateString(1) = "T=3 /v_1_3" +InformationStateTensor(0): binvec(2615, 0x252002042000800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(2615, 0x154004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "State of player's ships:\n+----------+\n| |\n| |\n| a |\n| a |\n| b |\n| b |\n| b |\n| |\n| |\n| |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n" +ObservationString(1) = "State of player's ships:\n+----------+\n| |\n| a |\n| a |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [100, 101, 102, 103, 104, 105, 106, 107, 110, 114, 115, 116, 117, 120, 124, 125, 126, 127, 130, 131, 132, 133, 134, 135, 136, 137, 140, 141, 142, 143, 144, 145, 146, 147, 150, 151, 152, 153, 154, 155, 156, 157, 160, 161, 162, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 176, 177, 180, 181, 182, 183, 184, 185, 186, 187, 190, 191, 192, 193, 194, 195, 196, 197, 200, 201, 202, 204, 205, 206, 207, 208, 209, 210, 211, 212, 214, 215, 216, 217, 218, 219, 220, 221, 222, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279] +StringLegalActions() = ["Pl1: place ship horizontally with top-left corner in (0, 0)", "Pl1: place ship horizontally with top-left corner in (0, 1)", "Pl1: place ship horizontally with top-left corner in (0, 2)", "Pl1: place ship horizontally with top-left corner in (0, 3)", "Pl1: place ship horizontally with top-left corner in (0, 4)", "Pl1: place ship horizontally with top-left corner in (0, 5)", "Pl1: place ship horizontally with top-left corner in (0, 6)", "Pl1: place ship horizontally with top-left corner in (0, 7)", "Pl1: place ship horizontally with top-left corner in (1, 0)", "Pl1: place ship horizontally with top-left corner in (1, 4)", "Pl1: place ship horizontally with top-left corner in (1, 5)", "Pl1: place ship horizontally with top-left corner in (1, 6)", "Pl1: place ship horizontally with top-left corner in (1, 7)", "Pl1: place ship horizontally with top-left corner in (2, 0)", "Pl1: place ship horizontally with top-left corner in (2, 4)", "Pl1: place ship horizontally with top-left corner in (2, 5)", "Pl1: place ship horizontally with top-left corner in (2, 6)", "Pl1: place ship horizontally with top-left corner in (2, 7)", "Pl1: place ship horizontally with top-left corner in (3, 0)", "Pl1: place ship horizontally with top-left corner in (3, 1)", "Pl1: place ship horizontally with top-left corner in (3, 2)", "Pl1: place ship horizontally with top-left corner in (3, 3)", "Pl1: place ship horizontally with top-left corner in (3, 4)", "Pl1: place ship horizontally with top-left corner in (3, 5)", "Pl1: place ship horizontally with top-left corner in (3, 6)", "Pl1: place ship horizontally with top-left corner in (3, 7)", "Pl1: place ship horizontally with top-left corner in (4, 0)", "Pl1: place ship horizontally with top-left corner in (4, 1)", "Pl1: place ship horizontally with top-left corner in (4, 2)", "Pl1: place ship horizontally with top-left corner in (4, 3)", "Pl1: place ship horizontally with top-left corner in (4, 4)", "Pl1: place ship horizontally with top-left corner in (4, 5)", "Pl1: place ship horizontally with top-left corner in (4, 6)", "Pl1: place ship horizontally with top-left corner in (4, 7)", "Pl1: place ship horizontally with top-left corner in (5, 0)", "Pl1: place ship horizontally with top-left corner in (5, 1)", "Pl1: place ship horizontally with top-left corner in (5, 2)", "Pl1: place ship horizontally with top-left corner in (5, 3)", "Pl1: place ship horizontally with top-left corner in (5, 4)", "Pl1: place ship horizontally with top-left corner in (5, 5)", "Pl1: place ship horizontally with top-left corner in (5, 6)", "Pl1: place ship horizontally with top-left corner in (5, 7)", "Pl1: place ship horizontally with top-left corner in (6, 0)", "Pl1: place ship horizontally with top-left corner in (6, 1)", "Pl1: place ship horizontally with top-left corner in (6, 2)", "Pl1: place ship horizontally with top-left corner in (6, 3)", "Pl1: place ship horizontally with top-left corner in (6, 4)", "Pl1: place ship horizontally with top-left corner in (6, 5)", "Pl1: place ship horizontally with top-left corner in (6, 6)", "Pl1: place ship horizontally with top-left corner in (6, 7)", "Pl1: place ship horizontally with top-left corner in (7, 0)", "Pl1: place ship horizontally with top-left corner in (7, 1)", "Pl1: place ship horizontally with top-left corner in (7, 2)", "Pl1: place ship horizontally with top-left corner in (7, 3)", "Pl1: place ship horizontally with top-left corner in (7, 4)", "Pl1: place ship horizontally with top-left corner in (7, 5)", "Pl1: place ship horizontally with top-left corner in (7, 6)", "Pl1: place ship horizontally with top-left corner in (7, 7)", "Pl1: place ship horizontally with top-left corner in (8, 0)", "Pl1: place ship horizontally with top-left corner in (8, 1)", "Pl1: place ship horizontally with top-left corner in (8, 2)", "Pl1: place ship horizontally with top-left corner in (8, 3)", "Pl1: place ship horizontally with top-left corner in (8, 4)", "Pl1: place ship horizontally with top-left corner in (8, 5)", "Pl1: place ship horizontally with top-left corner in (8, 6)", "Pl1: place ship horizontally with top-left corner in (8, 7)", "Pl1: place ship horizontally with top-left corner in (9, 0)", "Pl1: place ship horizontally with top-left corner in (9, 1)", "Pl1: place ship horizontally with top-left corner in (9, 2)", "Pl1: place ship horizontally with top-left corner in (9, 3)", "Pl1: place ship horizontally with top-left corner in (9, 4)", "Pl1: place ship horizontally with top-left corner in (9, 5)", "Pl1: place ship horizontally with top-left corner in (9, 6)", "Pl1: place ship horizontally with top-left corner in (9, 7)", "Pl1: place ship vertically with top-left corner in (0, 0)", "Pl1: place ship vertically with top-left corner in (0, 1)", "Pl1: place ship vertically with top-left corner in (0, 2)", "Pl1: place ship vertically with top-left corner in (0, 4)", "Pl1: place ship vertically with top-left corner in (0, 5)", "Pl1: place ship vertically with top-left corner in (0, 6)", "Pl1: place ship vertically with top-left corner in (0, 7)", "Pl1: place ship vertically with top-left corner in (0, 8)", "Pl1: place ship vertically with top-left corner in (0, 9)", "Pl1: place ship vertically with top-left corner in (1, 0)", "Pl1: place ship vertically with top-left corner in (1, 1)", "Pl1: place ship vertically with top-left corner in (1, 2)", "Pl1: place ship vertically with top-left corner in (1, 4)", "Pl1: place ship vertically with top-left corner in (1, 5)", "Pl1: place ship vertically with top-left corner in (1, 6)", "Pl1: place ship vertically with top-left corner in (1, 7)", "Pl1: place ship vertically with top-left corner in (1, 8)", "Pl1: place ship vertically with top-left corner in (1, 9)", "Pl1: place ship vertically with top-left corner in (2, 0)", "Pl1: place ship vertically with top-left corner in (2, 1)", "Pl1: place ship vertically with top-left corner in (2, 2)", "Pl1: place ship vertically with top-left corner in (2, 4)", "Pl1: place ship vertically with top-left corner in (2, 5)", "Pl1: place ship vertically with top-left corner in (2, 6)", "Pl1: place ship vertically with top-left corner in (2, 7)", "Pl1: place ship vertically with top-left corner in (2, 8)", "Pl1: place ship vertically with top-left corner in (2, 9)", "Pl1: place ship vertically with top-left corner in (3, 0)", "Pl1: place ship vertically with top-left corner in (3, 1)", "Pl1: place ship vertically with top-left corner in (3, 2)", "Pl1: place ship vertically with top-left corner in (3, 3)", "Pl1: place ship vertically with top-left corner in (3, 4)", "Pl1: place ship vertically with top-left corner in (3, 5)", "Pl1: place ship vertically with top-left corner in (3, 6)", "Pl1: place ship vertically with top-left corner in (3, 7)", "Pl1: place ship vertically with top-left corner in (3, 8)", "Pl1: place ship vertically with top-left corner in (3, 9)", "Pl1: place ship vertically with top-left corner in (4, 0)", "Pl1: place ship vertically with top-left corner in (4, 1)", "Pl1: place ship vertically with top-left corner in (4, 2)", "Pl1: place ship vertically with top-left corner in (4, 3)", "Pl1: place ship vertically with top-left corner in (4, 4)", "Pl1: place ship vertically with top-left corner in (4, 5)", "Pl1: place ship vertically with top-left corner in (4, 6)", "Pl1: place ship vertically with top-left corner in (4, 7)", "Pl1: place ship vertically with top-left corner in (4, 8)", "Pl1: place ship vertically with top-left corner in (4, 9)", "Pl1: place ship vertically with top-left corner in (5, 0)", "Pl1: place ship vertically with top-left corner in (5, 1)", "Pl1: place ship vertically with top-left corner in (5, 2)", "Pl1: place ship vertically with top-left corner in (5, 3)", "Pl1: place ship vertically with top-left corner in (5, 4)", "Pl1: place ship vertically with top-left corner in (5, 5)", "Pl1: place ship vertically with top-left corner in (5, 6)", "Pl1: place ship vertically with top-left corner in (5, 7)", "Pl1: place ship vertically with top-left corner in (5, 8)", "Pl1: place ship vertically with top-left corner in (5, 9)", "Pl1: place ship vertically with top-left corner in (6, 0)", "Pl1: place ship vertically with top-left corner in (6, 1)", "Pl1: place ship vertically with top-left corner in (6, 2)", "Pl1: place ship vertically with top-left corner in (6, 3)", "Pl1: place ship vertically with top-left corner in (6, 4)", "Pl1: place ship vertically with top-left corner in (6, 5)", "Pl1: place ship vertically with top-left corner in (6, 6)", "Pl1: place ship vertically with top-left corner in (6, 7)", "Pl1: place ship vertically with top-left corner in (6, 8)", "Pl1: place ship vertically with top-left corner in (6, 9)", "Pl1: place ship vertically with top-left corner in (7, 0)", "Pl1: place ship vertically with top-left corner in (7, 1)", "Pl1: place ship vertically with top-left corner in (7, 2)", "Pl1: place ship vertically with top-left corner in (7, 3)", "Pl1: place ship vertically with top-left corner in (7, 4)", "Pl1: place ship vertically with top-left corner in (7, 5)", "Pl1: place ship vertically with top-left corner in (7, 6)", "Pl1: place ship vertically with top-left corner in (7, 7)", "Pl1: place ship vertically with top-left corner in (7, 8)", "Pl1: place ship vertically with top-left corner in (7, 9)"] + +# Apply action "Pl1: place ship vertically with top-left corner in (0, 0)" +action: 200 + +# State 4 +# Player 0's board: +# +----------+ +# | | +# | | +# | a | +# | a | +# | b | +# | b | +# | b | +# | | +# | | +# | | +# +----------+ +# +# Player 1's board: +# +----------+ +# |b | +# |b a | +# |b a | +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# +----------+ +# +# Full history: /0:v_2_4/1:v_1_3/0:v_4_8/1:v_0_0 +IsTerminal() = False +History() = [224, 213, 248, 200] +HistoryString() = "224, 213, 248, 200" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "T=4 /v_2_4/v_4_8" +InformationStateString(1) = "T=4 /v_1_3/v_0_0" +InformationStateTensor(0): binvec(2615, 0x292002042000800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(2615, 0x194004060080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "State of player's ships:\n+----------+\n| |\n| |\n| a |\n| a |\n| b |\n| b |\n| b |\n| |\n| |\n| |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n" +ObservationString(1) = "State of player's ships:\n+----------+\n|b |\n|b a |\n|b a |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [100, 101, 102, 103, 104, 105, 106, 107, 110, 111, 112, 113, 114, 115, 116, 117, 120, 121, 125, 126, 127, 130, 131, 135, 136, 137, 140, 141, 142, 143, 144, 145, 150, 151, 152, 153, 154, 155, 160, 161, 162, 163, 164, 165, 170, 171, 172, 173, 174, 175, 176, 177, 180, 181, 182, 183, 184, 185, 186, 187, 190, 191, 192, 193, 194, 195, 196, 197, 200, 201, 202, 203, 205, 206, 207, 208, 209, 210, 211, 212, 213, 215, 216, 217, 218, 219, 220, 221, 222, 223, 225, 226, 227, 229, 230, 231, 232, 233, 235, 236, 237, 239, 240, 241, 242, 243, 244, 245, 246, 247, 249, 250, 251, 252, 253, 254, 255, 256, 257, 259, 260, 261, 262, 263, 264, 265, 266, 267, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279] +StringLegalActions() = ["Pl0: place ship horizontally with top-left corner in (0, 0)", "Pl0: place ship horizontally with top-left corner in (0, 1)", "Pl0: place ship horizontally with top-left corner in (0, 2)", "Pl0: place ship horizontally with top-left corner in (0, 3)", "Pl0: place ship horizontally with top-left corner in (0, 4)", "Pl0: place ship horizontally with top-left corner in (0, 5)", "Pl0: place ship horizontally with top-left corner in (0, 6)", "Pl0: place ship horizontally with top-left corner in (0, 7)", "Pl0: place ship horizontally with top-left corner in (1, 0)", "Pl0: place ship horizontally with top-left corner in (1, 1)", "Pl0: place ship horizontally with top-left corner in (1, 2)", "Pl0: place ship horizontally with top-left corner in (1, 3)", "Pl0: place ship horizontally with top-left corner in (1, 4)", "Pl0: place ship horizontally with top-left corner in (1, 5)", "Pl0: place ship horizontally with top-left corner in (1, 6)", "Pl0: place ship horizontally with top-left corner in (1, 7)", "Pl0: place ship horizontally with top-left corner in (2, 0)", "Pl0: place ship horizontally with top-left corner in (2, 1)", "Pl0: place ship horizontally with top-left corner in (2, 5)", "Pl0: place ship horizontally with top-left corner in (2, 6)", "Pl0: place ship horizontally with top-left corner in (2, 7)", "Pl0: place ship horizontally with top-left corner in (3, 0)", "Pl0: place ship horizontally with top-left corner in (3, 1)", "Pl0: place ship horizontally with top-left corner in (3, 5)", "Pl0: place ship horizontally with top-left corner in (3, 6)", "Pl0: place ship horizontally with top-left corner in (3, 7)", "Pl0: place ship horizontally with top-left corner in (4, 0)", "Pl0: place ship horizontally with top-left corner in (4, 1)", "Pl0: place ship horizontally with top-left corner in (4, 2)", "Pl0: place ship horizontally with top-left corner in (4, 3)", "Pl0: place ship horizontally with top-left corner in (4, 4)", "Pl0: place ship horizontally with top-left corner in (4, 5)", "Pl0: place ship horizontally with top-left corner in (5, 0)", "Pl0: place ship horizontally with top-left corner in (5, 1)", "Pl0: place ship horizontally with top-left corner in (5, 2)", "Pl0: place ship horizontally with top-left corner in (5, 3)", "Pl0: place ship horizontally with top-left corner in (5, 4)", "Pl0: place ship horizontally with top-left corner in (5, 5)", "Pl0: place ship horizontally with top-left corner in (6, 0)", "Pl0: place ship horizontally with top-left corner in (6, 1)", "Pl0: place ship horizontally with top-left corner in (6, 2)", "Pl0: place ship horizontally with top-left corner in (6, 3)", "Pl0: place ship horizontally with top-left corner in (6, 4)", "Pl0: place ship horizontally with top-left corner in (6, 5)", "Pl0: place ship horizontally with top-left corner in (7, 0)", "Pl0: place ship horizontally with top-left corner in (7, 1)", "Pl0: place ship horizontally with top-left corner in (7, 2)", "Pl0: place ship horizontally with top-left corner in (7, 3)", "Pl0: place ship horizontally with top-left corner in (7, 4)", "Pl0: place ship horizontally with top-left corner in (7, 5)", "Pl0: place ship horizontally with top-left corner in (7, 6)", "Pl0: place ship horizontally with top-left corner in (7, 7)", "Pl0: place ship horizontally with top-left corner in (8, 0)", "Pl0: place ship horizontally with top-left corner in (8, 1)", "Pl0: place ship horizontally with top-left corner in (8, 2)", "Pl0: place ship horizontally with top-left corner in (8, 3)", "Pl0: place ship horizontally with top-left corner in (8, 4)", "Pl0: place ship horizontally with top-left corner in (8, 5)", "Pl0: place ship horizontally with top-left corner in (8, 6)", "Pl0: place ship horizontally with top-left corner in (8, 7)", "Pl0: place ship horizontally with top-left corner in (9, 0)", "Pl0: place ship horizontally with top-left corner in (9, 1)", "Pl0: place ship horizontally with top-left corner in (9, 2)", "Pl0: place ship horizontally with top-left corner in (9, 3)", "Pl0: place ship horizontally with top-left corner in (9, 4)", "Pl0: place ship horizontally with top-left corner in (9, 5)", "Pl0: place ship horizontally with top-left corner in (9, 6)", "Pl0: place ship horizontally with top-left corner in (9, 7)", "Pl0: place ship vertically with top-left corner in (0, 0)", "Pl0: place ship vertically with top-left corner in (0, 1)", "Pl0: place ship vertically with top-left corner in (0, 2)", "Pl0: place ship vertically with top-left corner in (0, 3)", "Pl0: place ship vertically with top-left corner in (0, 5)", "Pl0: place ship vertically with top-left corner in (0, 6)", "Pl0: place ship vertically with top-left corner in (0, 7)", "Pl0: place ship vertically with top-left corner in (0, 8)", "Pl0: place ship vertically with top-left corner in (0, 9)", "Pl0: place ship vertically with top-left corner in (1, 0)", "Pl0: place ship vertically with top-left corner in (1, 1)", "Pl0: place ship vertically with top-left corner in (1, 2)", "Pl0: place ship vertically with top-left corner in (1, 3)", "Pl0: place ship vertically with top-left corner in (1, 5)", "Pl0: place ship vertically with top-left corner in (1, 6)", "Pl0: place ship vertically with top-left corner in (1, 7)", "Pl0: place ship vertically with top-left corner in (1, 8)", "Pl0: place ship vertically with top-left corner in (1, 9)", "Pl0: place ship vertically with top-left corner in (2, 0)", "Pl0: place ship vertically with top-left corner in (2, 1)", "Pl0: place ship vertically with top-left corner in (2, 2)", "Pl0: place ship vertically with top-left corner in (2, 3)", "Pl0: place ship vertically with top-left corner in (2, 5)", "Pl0: place ship vertically with top-left corner in (2, 6)", "Pl0: place ship vertically with top-left corner in (2, 7)", "Pl0: place ship vertically with top-left corner in (2, 9)", "Pl0: place ship vertically with top-left corner in (3, 0)", "Pl0: place ship vertically with top-left corner in (3, 1)", "Pl0: place ship vertically with top-left corner in (3, 2)", "Pl0: place ship vertically with top-left corner in (3, 3)", "Pl0: place ship vertically with top-left corner in (3, 5)", "Pl0: place ship vertically with top-left corner in (3, 6)", "Pl0: place ship vertically with top-left corner in (3, 7)", "Pl0: place ship vertically with top-left corner in (3, 9)", "Pl0: place ship vertically with top-left corner in (4, 0)", "Pl0: place ship vertically with top-left corner in (4, 1)", "Pl0: place ship vertically with top-left corner in (4, 2)", "Pl0: place ship vertically with top-left corner in (4, 3)", "Pl0: place ship vertically with top-left corner in (4, 4)", "Pl0: place ship vertically with top-left corner in (4, 5)", "Pl0: place ship vertically with top-left corner in (4, 6)", "Pl0: place ship vertically with top-left corner in (4, 7)", "Pl0: place ship vertically with top-left corner in (4, 9)", "Pl0: place ship vertically with top-left corner in (5, 0)", "Pl0: place ship vertically with top-left corner in (5, 1)", "Pl0: place ship vertically with top-left corner in (5, 2)", "Pl0: place ship vertically with top-left corner in (5, 3)", "Pl0: place ship vertically with top-left corner in (5, 4)", "Pl0: place ship vertically with top-left corner in (5, 5)", "Pl0: place ship vertically with top-left corner in (5, 6)", "Pl0: place ship vertically with top-left corner in (5, 7)", "Pl0: place ship vertically with top-left corner in (5, 9)", "Pl0: place ship vertically with top-left corner in (6, 0)", "Pl0: place ship vertically with top-left corner in (6, 1)", "Pl0: place ship vertically with top-left corner in (6, 2)", "Pl0: place ship vertically with top-left corner in (6, 3)", "Pl0: place ship vertically with top-left corner in (6, 4)", "Pl0: place ship vertically with top-left corner in (6, 5)", "Pl0: place ship vertically with top-left corner in (6, 6)", "Pl0: place ship vertically with top-left corner in (6, 7)", "Pl0: place ship vertically with top-left corner in (6, 9)", "Pl0: place ship vertically with top-left corner in (7, 0)", "Pl0: place ship vertically with top-left corner in (7, 1)", "Pl0: place ship vertically with top-left corner in (7, 2)", "Pl0: place ship vertically with top-left corner in (7, 3)", "Pl0: place ship vertically with top-left corner in (7, 4)", "Pl0: place ship vertically with top-left corner in (7, 5)", "Pl0: place ship vertically with top-left corner in (7, 6)", "Pl0: place ship vertically with top-left corner in (7, 7)", "Pl0: place ship vertically with top-left corner in (7, 8)", "Pl0: place ship vertically with top-left corner in (7, 9)"] + +# Apply action "Pl0: place ship vertically with top-left corner in (6, 6)" +action: 266 + +# State 5 +# Player 0's board: +# +----------+ +# | | +# | | +# | a | +# | a | +# | b | +# | b | +# | c b | +# | c | +# | c | +# | | +# +----------+ +# +# Player 1's board: +# +----------+ +# |b | +# |b a | +# |b a | +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# +----------+ +# +# Full history: /0:v_2_4/1:v_1_3/0:v_4_8/1:v_0_0/0:v_6_6 +IsTerminal() = False +History() = [224, 213, 248, 200, 266] +HistoryString() = "224, 213, 248, 200, 266" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "T=5 /v_2_4/v_4_8/v_6_6" +InformationStateString(1) = "T=5 /v_1_3/v_0_0" +InformationStateTensor(0): binvec(2615, 0x252002042000902008000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(2615, 0x154004060080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "State of player's ships:\n+----------+\n| |\n| |\n| a |\n| a |\n| b |\n| b |\n| c b |\n| c |\n| c |\n| |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n" +ObservationString(1) = "State of player's ships:\n+----------+\n|b |\n|b a |\n|b a |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+----------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [101, 102, 103, 104, 105, 106, 107, 114, 115, 116, 117, 124, 125, 126, 127, 130, 131, 132, 133, 134, 135, 136, 137, 140, 141, 142, 143, 144, 145, 146, 147, 150, 151, 152, 153, 154, 155, 156, 157, 160, 161, 162, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 176, 177, 180, 181, 182, 183, 184, 185, 186, 187, 190, 191, 192, 193, 194, 195, 196, 197, 201, 202, 204, 205, 206, 207, 208, 209, 211, 212, 214, 215, 216, 217, 218, 219, 221, 222, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279] +StringLegalActions() = ["Pl1: place ship horizontally with top-left corner in (0, 1)", "Pl1: place ship horizontally with top-left corner in (0, 2)", "Pl1: place ship horizontally with top-left corner in (0, 3)", "Pl1: place ship horizontally with top-left corner in (0, 4)", "Pl1: place ship horizontally with top-left corner in (0, 5)", "Pl1: place ship horizontally with top-left corner in (0, 6)", "Pl1: place ship horizontally with top-left corner in (0, 7)", "Pl1: place ship horizontally with top-left corner in (1, 4)", "Pl1: place ship horizontally with top-left corner in (1, 5)", "Pl1: place ship horizontally with top-left corner in (1, 6)", "Pl1: place ship horizontally with top-left corner in (1, 7)", "Pl1: place ship horizontally with top-left corner in (2, 4)", "Pl1: place ship horizontally with top-left corner in (2, 5)", "Pl1: place ship horizontally with top-left corner in (2, 6)", "Pl1: place ship horizontally with top-left corner in (2, 7)", "Pl1: place ship horizontally with top-left corner in (3, 0)", "Pl1: place ship horizontally with top-left corner in (3, 1)", "Pl1: place ship horizontally with top-left corner in (3, 2)", "Pl1: place ship horizontally with top-left corner in (3, 3)", "Pl1: place ship horizontally with top-left corner in (3, 4)", "Pl1: place ship horizontally with top-left corner in (3, 5)", "Pl1: place ship horizontally with top-left corner in (3, 6)", "Pl1: place ship horizontally with top-left corner in (3, 7)", "Pl1: place ship horizontally with top-left corner in (4, 0)", "Pl1: place ship horizontally with top-left corner in (4, 1)", "Pl1: place ship horizontally with top-left corner in (4, 2)", "Pl1: place ship horizontally with top-left corner in (4, 3)", "Pl1: place ship horizontally with top-left corner in (4, 4)", "Pl1: place ship horizontally with top-left corner in (4, 5)", "Pl1: place ship horizontally with top-left corner in (4, 6)", "Pl1: place ship horizontally with top-left corner in (4, 7)", "Pl1: place ship horizontally with top-left corner in (5, 0)", "Pl1: place ship horizontally with top-left corner in (5, 1)", "Pl1: place ship horizontally with top-left corner in (5, 2)", "Pl1: place ship horizontally with top-left corner in (5, 3)", "Pl1: place ship horizontally with top-left corner in (5, 4)", "Pl1: place ship horizontally with top-left corner in (5, 5)", "Pl1: place ship horizontally with top-left corner in (5, 6)", "Pl1: place ship horizontally with top-left corner in (5, 7)", "Pl1: place ship horizontally with top-left corner in (6, 0)", "Pl1: place ship horizontally with top-left corner in (6, 1)", "Pl1: place ship horizontally with top-left corner in (6, 2)", "Pl1: place ship horizontally with top-left corner in (6, 3)", "Pl1: place ship horizontally with top-left corner in (6, 4)", "Pl1: place ship horizontally with top-left corner in (6, 5)", "Pl1: place ship horizontally with top-left corner in (6, 6)", "Pl1: place ship horizontally with top-left corner in (6, 7)", "Pl1: place ship horizontally with top-left corner in (7, 0)", "Pl1: place ship horizontally with top-left corner in (7, 1)", "Pl1: place ship horizontally with top-left corner in (7, 2)", "Pl1: place ship horizontally with top-left corner in (7, 3)", "Pl1: place ship horizontally with top-left corner in (7, 4)", "Pl1: place ship horizontally with top-left corner in (7, 5)", "Pl1: place ship horizontally with top-left corner in (7, 6)", "Pl1: place ship horizontally with top-left corner in (7, 7)", "Pl1: place ship horizontally with top-left corner in (8, 0)", "Pl1: place ship horizontally with top-left corner in (8, 1)", "Pl1: place ship horizontally with top-left corner in (8, 2)", "Pl1: place ship horizontally with top-left corner in (8, 3)", "Pl1: place ship horizontally with top-left corner in (8, 4)", "Pl1: place ship horizontally with top-left corner in (8, 5)", "Pl1: place ship horizontally with top-left corner in (8, 6)", "Pl1: place ship horizontally with top-left corner in (8, 7)", "Pl1: place ship horizontally with top-left corner in (9, 0)", "Pl1: place ship horizontally with top-left corner in (9, 1)", "Pl1: place ship horizontally with top-left corner in (9, 2)", "Pl1: place ship horizontally with top-left corner in (9, 3)", "Pl1: place ship horizontally with top-left corner in (9, 4)", "Pl1: place ship horizontally with top-left corner in (9, 5)", "Pl1: place ship horizontally with top-left corner in (9, 6)", "Pl1: place ship horizontally with top-left corner in (9, 7)", "Pl1: place ship vertically with top-left corner in (0, 1)", "Pl1: place ship vertically with top-left corner in (0, 2)", "Pl1: place ship vertically with top-left corner in (0, 4)", "Pl1: place ship vertically with top-left corner in (0, 5)", "Pl1: place ship vertically with top-left corner in (0, 6)", "Pl1: place ship vertically with top-left corner in (0, 7)", "Pl1: place ship vertically with top-left corner in (0, 8)", "Pl1: place ship vertically with top-left corner in (0, 9)", "Pl1: place ship vertically with top-left corner in (1, 1)", "Pl1: place ship vertically with top-left corner in (1, 2)", "Pl1: place ship vertically with top-left corner in (1, 4)", "Pl1: place ship vertically with top-left corner in (1, 5)", "Pl1: place ship vertically with top-left corner in (1, 6)", "Pl1: place ship vertically with top-left corner in (1, 7)", "Pl1: place ship vertically with top-left corner in (1, 8)", "Pl1: place ship vertically with top-left corner in (1, 9)", "Pl1: place ship vertically with top-left corner in (2, 1)", "Pl1: place ship vertically with top-left corner in (2, 2)", "Pl1: place ship vertically with top-left corner in (2, 4)", "Pl1: place ship vertically with top-left corner in (2, 5)", "Pl1: place ship vertically with top-left corner in (2, 6)", "Pl1: place ship vertically with top-left corner in (2, 7)", "Pl1: place ship vertically with top-left corner in (2, 8)", "Pl1: place ship vertically with top-left corner in (2, 9)", "Pl1: place ship vertically with top-left corner in (3, 0)", "Pl1: place ship vertically with top-left corner in (3, 1)", "Pl1: place ship vertically with top-left corner in (3, 2)", "Pl1: place ship vertically with top-left corner in (3, 3)", "Pl1: place ship vertically with top-left corner in (3, 4)", "Pl1: place ship vertically with top-left corner in (3, 5)", "Pl1: place ship vertically with top-left corner in (3, 6)", "Pl1: place ship vertically with top-left corner in (3, 7)", "Pl1: place ship vertically with top-left corner in (3, 8)", "Pl1: place ship vertically with top-left corner in (3, 9)", "Pl1: place ship vertically with top-left corner in (4, 0)", "Pl1: place ship vertically with top-left corner in (4, 1)", "Pl1: place ship vertically with top-left corner in (4, 2)", "Pl1: place ship vertically with top-left corner in (4, 3)", "Pl1: place ship vertically with top-left corner in (4, 4)", "Pl1: place ship vertically with top-left corner in (4, 5)", "Pl1: place ship vertically with top-left corner in (4, 6)", "Pl1: place ship vertically with top-left corner in (4, 7)", "Pl1: place ship vertically with top-left corner in (4, 8)", "Pl1: place ship vertically with top-left corner in (4, 9)", "Pl1: place ship vertically with top-left corner in (5, 0)", "Pl1: place ship vertically with top-left corner in (5, 1)", "Pl1: place ship vertically with top-left corner in (5, 2)", "Pl1: place ship vertically with top-left corner in (5, 3)", "Pl1: place ship vertically with top-left corner in (5, 4)", "Pl1: place ship vertically with top-left corner in (5, 5)", "Pl1: place ship vertically with top-left corner in (5, 6)", "Pl1: place ship vertically with top-left corner in (5, 7)", "Pl1: place ship vertically with top-left corner in (5, 8)", "Pl1: place ship vertically with top-left corner in (5, 9)", "Pl1: place ship vertically with top-left corner in (6, 0)", "Pl1: place ship vertically with top-left corner in (6, 1)", "Pl1: place ship vertically with top-left corner in (6, 2)", "Pl1: place ship vertically with top-left corner in (6, 3)", "Pl1: place ship vertically with top-left corner in (6, 4)", "Pl1: place ship vertically with top-left corner in (6, 5)", "Pl1: place ship vertically with top-left corner in (6, 6)", "Pl1: place ship vertically with top-left corner in (6, 7)", "Pl1: place ship vertically with top-left corner in (6, 8)", "Pl1: place ship vertically with top-left corner in (6, 9)", "Pl1: place ship vertically with top-left corner in (7, 0)", "Pl1: place ship vertically with top-left corner in (7, 1)", "Pl1: place ship vertically with top-left corner in (7, 2)", "Pl1: place ship vertically with top-left corner in (7, 3)", "Pl1: place ship vertically with top-left corner in (7, 4)", "Pl1: place ship vertically with top-left corner in (7, 5)", "Pl1: place ship vertically with top-left corner in (7, 6)", "Pl1: place ship vertically with top-left corner in (7, 7)", "Pl1: place ship vertically with top-left corner in (7, 8)", "Pl1: place ship vertically with top-left corner in (7, 9)"] + +# Apply action "Pl1: place ship vertically with top-left corner in (6, 3)" +action: 263 + +# State 6 +# Apply action "Pl0: place ship vertically with top-left corner in (4, 1)" +action: 241 + +# State 7 +# Apply action "Pl1: place ship vertically with top-left corner in (0, 5)" +action: 205 + +# State 8 +# Apply action "Pl0: place ship vertically with top-left corner in (5, 2)" +action: 252 + +# State 9 +# Apply action "Pl1: place ship vertically with top-left corner in (2, 4)" +action: 224 + +# State 10 +# Apply action "Pl0: shoot at (6, 8)" +action: 68 + +# State 11 +# Apply action "Pl1: shoot at (6, 1)" +action: 61 + +# State 12 +# Apply action "Pl0: shoot at (7, 4)" +action: 74 + +# State 13 +# Apply action "Pl1: shoot at (9, 7)" +action: 97 + +# State 14 +# Apply action "Pl0: shoot at (9, 7)" +action: 97 + +# State 15 +# Apply action "Pl1: shoot at (1, 7)" +action: 17 + +# State 16 +# Apply action "Pl0: shoot at (8, 6)" +action: 86 + +# State 17 +# Apply action "Pl1: shoot at (9, 7)" +action: 97 + +# State 18 +# Apply action "Pl0: shoot at (5, 1)" +action: 51 + +# State 19 +# Apply action "Pl1: shoot at (1, 4)" +action: 14 + +# State 20 +# Player 0's board: +# +----------+ +# | | +# | * * | +# | a | +# | a | +# | d b | +# | de b | +# | De c b | +# | de c | +# | e c | +# | e * | +# +----------+ +# +# Player 1's board: +# +----------+ +# |b d | +# |b a d | +# |b aed | +# | ed | +# | e | +# | * e | +# | ce * | +# | c* | +# | c * | +# | * | +# +----------+ +# +# Full history: /0:v_2_4/1:v_1_3/0:v_4_8/1:v_0_0/0:v_6_6/1:v_6_3/0:v_4_1/1:v_0_5/0:v_5_2/1:v_2_4/0:6_8/1:6_1/0:7_4/1:9_7/0:9_7/1:1_7/0:8_6/1:9_7/0:5_1/1:1_4 +IsTerminal() = False +History() = [224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14] +HistoryString() = "224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "T=20 /v_2_4/v_4_8/v_6_6/v_4_1/v_5_2/shot_6_8:W/oppshot_6_1/shot_7_4:W/oppshot_9_7/shot_9_7:W/oppshot_1_7/shot_8_6:W/oppshot_9_7/shot_5_1:W/oppshot_1_4" +InformationStateString(1) = "T=20 /v_1_3/v_0_0/v_6_3/v_0_5/v_2_4/oppshot_6_8/shot_6_1:H/oppshot_7_4/shot_9_7:W/oppshot_9_7/shot_1_7:W/oppshot_8_6/shot_9_7:W/oppshot_5_1/shot_1_4:W" +InformationStateTensor(0): binvec(2615, 0x2920020420009020084204010408080800a2042002010208802020801012280008200808880202081040228004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(2615, 0x19400406008010204060004120020808008204200a010200802024801010280009200808080202481040028004100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "State of player's ships:\n+----------+\n| |\n| * * |\n| a |\n| a |\n| d b |\n| de b |\n| De c b |\n| de c |\n| e c |\n| e * |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| |\n| |\n| |\n| |\n| @ |\n| @ |\n| @ |\n| @ |\n| @ |\n+----------+\n" +ObservationString(1) = "State of player's ships:\n+----------+\n|b d |\n|b a d |\n|b aed |\n| ed |\n| e |\n| * e |\n| ce * |\n| c* |\n| c * |\n| * |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| @ @ |\n| |\n| |\n| |\n| |\n| # |\n| |\n| |\n| @ |\n+----------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] +StringLegalActions() = ["Pl0: shoot at (0, 0)", "Pl0: shoot at (0, 1)", "Pl0: shoot at (0, 2)", "Pl0: shoot at (0, 3)", "Pl0: shoot at (0, 4)", "Pl0: shoot at (0, 5)", "Pl0: shoot at (0, 6)", "Pl0: shoot at (0, 7)", "Pl0: shoot at (0, 8)", "Pl0: shoot at (0, 9)", "Pl0: shoot at (1, 0)", "Pl0: shoot at (1, 1)", "Pl0: shoot at (1, 2)", "Pl0: shoot at (1, 3)", "Pl0: shoot at (1, 4)", "Pl0: shoot at (1, 5)", "Pl0: shoot at (1, 6)", "Pl0: shoot at (1, 7)", "Pl0: shoot at (1, 8)", "Pl0: shoot at (1, 9)", "Pl0: shoot at (2, 0)", "Pl0: shoot at (2, 1)", "Pl0: shoot at (2, 2)", "Pl0: shoot at (2, 3)", "Pl0: shoot at (2, 4)", "Pl0: shoot at (2, 5)", "Pl0: shoot at (2, 6)", "Pl0: shoot at (2, 7)", "Pl0: shoot at (2, 8)", "Pl0: shoot at (2, 9)", "Pl0: shoot at (3, 0)", "Pl0: shoot at (3, 1)", "Pl0: shoot at (3, 2)", "Pl0: shoot at (3, 3)", "Pl0: shoot at (3, 4)", "Pl0: shoot at (3, 5)", "Pl0: shoot at (3, 6)", "Pl0: shoot at (3, 7)", "Pl0: shoot at (3, 8)", "Pl0: shoot at (3, 9)", "Pl0: shoot at (4, 0)", "Pl0: shoot at (4, 1)", "Pl0: shoot at (4, 2)", "Pl0: shoot at (4, 3)", "Pl0: shoot at (4, 4)", "Pl0: shoot at (4, 5)", "Pl0: shoot at (4, 6)", "Pl0: shoot at (4, 7)", "Pl0: shoot at (4, 8)", "Pl0: shoot at (4, 9)", "Pl0: shoot at (5, 0)", "Pl0: shoot at (5, 1)", "Pl0: shoot at (5, 2)", "Pl0: shoot at (5, 3)", "Pl0: shoot at (5, 4)", "Pl0: shoot at (5, 5)", "Pl0: shoot at (5, 6)", "Pl0: shoot at (5, 7)", "Pl0: shoot at (5, 8)", "Pl0: shoot at (5, 9)", "Pl0: shoot at (6, 0)", "Pl0: shoot at (6, 1)", "Pl0: shoot at (6, 2)", "Pl0: shoot at (6, 3)", "Pl0: shoot at (6, 4)", "Pl0: shoot at (6, 5)", "Pl0: shoot at (6, 6)", "Pl0: shoot at (6, 7)", "Pl0: shoot at (6, 8)", "Pl0: shoot at (6, 9)", "Pl0: shoot at (7, 0)", "Pl0: shoot at (7, 1)", "Pl0: shoot at (7, 2)", "Pl0: shoot at (7, 3)", "Pl0: shoot at (7, 4)", "Pl0: shoot at (7, 5)", "Pl0: shoot at (7, 6)", "Pl0: shoot at (7, 7)", "Pl0: shoot at (7, 8)", "Pl0: shoot at (7, 9)", "Pl0: shoot at (8, 0)", "Pl0: shoot at (8, 1)", "Pl0: shoot at (8, 2)", "Pl0: shoot at (8, 3)", "Pl0: shoot at (8, 4)", "Pl0: shoot at (8, 5)", "Pl0: shoot at (8, 6)", "Pl0: shoot at (8, 7)", "Pl0: shoot at (8, 8)", "Pl0: shoot at (8, 9)", "Pl0: shoot at (9, 0)", "Pl0: shoot at (9, 1)", "Pl0: shoot at (9, 2)", "Pl0: shoot at (9, 3)", "Pl0: shoot at (9, 4)", "Pl0: shoot at (9, 5)", "Pl0: shoot at (9, 6)", "Pl0: shoot at (9, 7)", "Pl0: shoot at (9, 8)", "Pl0: shoot at (9, 9)"] + +# Apply action "Pl0: shoot at (4, 9)" +action: 49 + +# State 21 +# Player 0's board: +# +----------+ +# | | +# | * * | +# | a | +# | a | +# | d b | +# | de b | +# | De c b | +# | de c | +# | e c | +# | e * | +# +----------+ +# +# Player 1's board: +# +----------+ +# |b d | +# |b a d | +# |b aed | +# | ed | +# | e *| +# | * e | +# | ce * | +# | c* | +# | c * | +# | * | +# +----------+ +# +# Full history: /0:v_2_4/1:v_1_3/0:v_4_8/1:v_0_0/0:v_6_6/1:v_6_3/0:v_4_1/1:v_0_5/0:v_5_2/1:v_2_4/0:6_8/1:6_1/0:7_4/1:9_7/0:9_7/1:1_7/0:8_6/1:9_7/0:5_1/1:1_4/0:4_9 +IsTerminal() = False +History() = [224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49] +HistoryString() = "224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "T=21 /v_2_4/v_4_8/v_6_6/v_4_1/v_5_2/shot_6_8:W/oppshot_6_1/shot_7_4:W/oppshot_9_7/shot_9_7:W/oppshot_1_7/shot_8_6:W/oppshot_9_7/shot_5_1:W/oppshot_1_4/shot_4_9:W" +InformationStateString(1) = "T=21 /v_1_3/v_0_0/v_6_3/v_0_5/v_2_4/oppshot_6_8/shot_6_1:H/oppshot_7_4/shot_9_7:W/oppshot_9_7/shot_1_7:W/oppshot_8_6/shot_9_7:W/oppshot_5_1/shot_1_4:W/oppshot_4_9" +InformationStateTensor(0): binvec(2615, 0x2520020420009020084204010408080800a2042002010208802020801012280008200808880202081040228004020800180000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(2615, 0x15400406008010204060004120020808008204200a010200802024801010280009200808080202481040028004120800100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "State of player's ships:\n+----------+\n| |\n| * * |\n| a |\n| a |\n| d b |\n| de b |\n| De c b |\n| de c |\n| e c |\n| e * |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| |\n| |\n| |\n| @|\n| @ |\n| @ |\n| @ |\n| @ |\n| @ |\n+----------+\n" +ObservationString(1) = "State of player's ships:\n+----------+\n|b d |\n|b a d |\n|b aed |\n| ed |\n| e *|\n| * e |\n| ce * |\n| c* |\n| c * |\n| * |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| @ @ |\n| |\n| |\n| |\n| |\n| # |\n| |\n| |\n| @ |\n+----------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] +StringLegalActions() = ["Pl1: shoot at (0, 0)", "Pl1: shoot at (0, 1)", "Pl1: shoot at (0, 2)", "Pl1: shoot at (0, 3)", "Pl1: shoot at (0, 4)", "Pl1: shoot at (0, 5)", "Pl1: shoot at (0, 6)", "Pl1: shoot at (0, 7)", "Pl1: shoot at (0, 8)", "Pl1: shoot at (0, 9)", "Pl1: shoot at (1, 0)", "Pl1: shoot at (1, 1)", "Pl1: shoot at (1, 2)", "Pl1: shoot at (1, 3)", "Pl1: shoot at (1, 4)", "Pl1: shoot at (1, 5)", "Pl1: shoot at (1, 6)", "Pl1: shoot at (1, 7)", "Pl1: shoot at (1, 8)", "Pl1: shoot at (1, 9)", "Pl1: shoot at (2, 0)", "Pl1: shoot at (2, 1)", "Pl1: shoot at (2, 2)", "Pl1: shoot at (2, 3)", "Pl1: shoot at (2, 4)", "Pl1: shoot at (2, 5)", "Pl1: shoot at (2, 6)", "Pl1: shoot at (2, 7)", "Pl1: shoot at (2, 8)", "Pl1: shoot at (2, 9)", "Pl1: shoot at (3, 0)", "Pl1: shoot at (3, 1)", "Pl1: shoot at (3, 2)", "Pl1: shoot at (3, 3)", "Pl1: shoot at (3, 4)", "Pl1: shoot at (3, 5)", "Pl1: shoot at (3, 6)", "Pl1: shoot at (3, 7)", "Pl1: shoot at (3, 8)", "Pl1: shoot at (3, 9)", "Pl1: shoot at (4, 0)", "Pl1: shoot at (4, 1)", "Pl1: shoot at (4, 2)", "Pl1: shoot at (4, 3)", "Pl1: shoot at (4, 4)", "Pl1: shoot at (4, 5)", "Pl1: shoot at (4, 6)", "Pl1: shoot at (4, 7)", "Pl1: shoot at (4, 8)", "Pl1: shoot at (4, 9)", "Pl1: shoot at (5, 0)", "Pl1: shoot at (5, 1)", "Pl1: shoot at (5, 2)", "Pl1: shoot at (5, 3)", "Pl1: shoot at (5, 4)", "Pl1: shoot at (5, 5)", "Pl1: shoot at (5, 6)", "Pl1: shoot at (5, 7)", "Pl1: shoot at (5, 8)", "Pl1: shoot at (5, 9)", "Pl1: shoot at (6, 0)", "Pl1: shoot at (6, 1)", "Pl1: shoot at (6, 2)", "Pl1: shoot at (6, 3)", "Pl1: shoot at (6, 4)", "Pl1: shoot at (6, 5)", "Pl1: shoot at (6, 6)", "Pl1: shoot at (6, 7)", "Pl1: shoot at (6, 8)", "Pl1: shoot at (6, 9)", "Pl1: shoot at (7, 0)", "Pl1: shoot at (7, 1)", "Pl1: shoot at (7, 2)", "Pl1: shoot at (7, 3)", "Pl1: shoot at (7, 4)", "Pl1: shoot at (7, 5)", "Pl1: shoot at (7, 6)", "Pl1: shoot at (7, 7)", "Pl1: shoot at (7, 8)", "Pl1: shoot at (7, 9)", "Pl1: shoot at (8, 0)", "Pl1: shoot at (8, 1)", "Pl1: shoot at (8, 2)", "Pl1: shoot at (8, 3)", "Pl1: shoot at (8, 4)", "Pl1: shoot at (8, 5)", "Pl1: shoot at (8, 6)", "Pl1: shoot at (8, 7)", "Pl1: shoot at (8, 8)", "Pl1: shoot at (8, 9)", "Pl1: shoot at (9, 0)", "Pl1: shoot at (9, 1)", "Pl1: shoot at (9, 2)", "Pl1: shoot at (9, 3)", "Pl1: shoot at (9, 4)", "Pl1: shoot at (9, 5)", "Pl1: shoot at (9, 6)", "Pl1: shoot at (9, 7)", "Pl1: shoot at (9, 8)", "Pl1: shoot at (9, 9)"] + +# Apply action "Pl1: shoot at (5, 3)" +action: 53 + +# State 22 +# Apply action "Pl0: shoot at (6, 7)" +action: 67 + +# State 23 +# Apply action "Pl1: shoot at (6, 3)" +action: 63 + +# State 24 +# Apply action "Pl0: shoot at (2, 7)" +action: 27 + +# State 25 +# Apply action "Pl1: shoot at (4, 4)" +action: 44 + +# State 26 +# Apply action "Pl0: shoot at (4, 8)" +action: 48 + +# State 27 +# Apply action "Pl1: shoot at (3, 3)" +action: 33 + +# State 28 +# Apply action "Pl0: shoot at (8, 0)" +action: 80 + +# State 29 +# Apply action "Pl1: shoot at (9, 4)" +action: 94 + +# State 30 +# Apply action "Pl0: shoot at (0, 7)" +action: 7 + +# State 31 +# Apply action "Pl1: shoot at (4, 4)" +action: 44 + +# State 32 +# Apply action "Pl0: shoot at (8, 4)" +action: 84 + +# State 33 +# Apply action "Pl1: shoot at (9, 1)" +action: 91 + +# State 34 +# Apply action "Pl0: shoot at (1, 6)" +action: 16 + +# State 35 +# Apply action "Pl1: shoot at (2, 5)" +action: 25 + +# State 36 +# Apply action "Pl0: shoot at (0, 3)" +action: 3 + +# State 37 +# Apply action "Pl1: shoot at (2, 4)" +action: 24 + +# State 38 +# Apply action "Pl0: shoot at (8, 6)" +action: 86 + +# State 39 +# Apply action "Pl1: shoot at (8, 2)" +action: 82 + +# State 40 +# Player 0's board: +# +----------+ +# | | +# | * * | +# | A* | +# | *a | +# | d * b | +# | de* b | +# | De* c b | +# | de c | +# | E c | +# | *e * * | +# +----------+ +# +# Player 1's board: +# +----------+ +# |b * d * | +# |b a d* | +# |b aed * | +# | ed | +# | e **| +# | * e | +# | ce ** | +# | c* | +# |* c* * | +# | * | +# +----------+ +# +# Full history: /0:v_2_4/1:v_1_3/0:v_4_8/1:v_0_0/0:v_6_6/1:v_6_3/0:v_4_1/1:v_0_5/0:v_5_2/1:v_2_4/0:6_8/1:6_1/0:7_4/1:9_7/0:9_7/1:1_7/0:8_6/1:9_7/0:5_1/1:1_4/0:4_9/1:5_3/0:6_7/1:6_3/0:2_7/1:4_4/0:4_8/1:3_3/0:8_0/1:9_4/0:0_7/1:4_4/0:8_4/1:9_1/0:1_6/1:2_5/0:0_3/1:2_4/0:8_6/1:8_2 +IsTerminal() = False +History() = [224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82] +HistoryString() = "224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "T=40 /v_2_4/v_4_8/v_6_6/v_4_1/v_5_2/shot_6_8:W/oppshot_6_1/shot_7_4:W/oppshot_9_7/shot_9_7:W/oppshot_1_7/shot_8_6:W/oppshot_9_7/shot_5_1:W/oppshot_1_4/shot_4_9:W/oppshot_5_3/shot_6_7:W/oppshot_6_3/shot_2_7:W/oppshot_4_4/shot_4_8:W/oppshot_3_3/shot_8_0:W/oppshot_9_4/shot_0_7:W/oppshot_4_4/shot_8_4:W/oppshot_9_1/shot_1_6:W/oppshot_2_5/shot_0_3:W/oppshot_2_4/shot_8_6:W/oppshot_8_2" +InformationStateString(1) = "T=40 /v_1_3/v_0_0/v_6_3/v_0_5/v_2_4/oppshot_6_8/shot_6_1:H/oppshot_7_4/shot_9_7:W/oppshot_9_7/shot_1_7:W/oppshot_8_6/shot_9_7:W/oppshot_5_1/shot_1_4:W/oppshot_4_9/shot_5_3:W/oppshot_6_7/shot_6_3:W/oppshot_2_7/shot_4_4:W/oppshot_4_8/shot_3_3:W/oppshot_8_0/shot_9_4:W/oppshot_0_7/shot_4_4:W/oppshot_8_4/shot_9_1:W/oppshot_1_6/shot_2_5:W/oppshot_0_3/shot_2_4:H/oppshot_8_6/shot_8_2:H" +InformationStateTensor(0): binvec(2615, 0x2920020420009020084204010408080800a204200201020880202080101228000820080888020208104022800402080018820200808012204080220004884010082000a220080200a008802100a0001221004020082088028009000222400202800408900100802022201100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(2615, 0x19400406008010204060004120020808008204200a010200802024801010280009200808080202481040028004120800108202048080102040812200040840104820008220081200a000802104a0001021004120082008028049000202400212800400900102802020201100800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "State of player's ships:\n+----------+\n| |\n| * * |\n| A* |\n| *a |\n| d * b |\n| de* b |\n| De* c b |\n| de c |\n| E c |\n| *e * * |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| @ @ |\n| @ |\n| @ |\n| |\n| @@|\n| @ |\n| @@ |\n| @ |\n|@ @ @ |\n| @ |\n+----------+\n" +ObservationString(1) = "State of player's ships:\n+----------+\n|b * d * |\n|b a d* |\n|b aed * |\n| ed |\n| e **|\n| * e |\n| ce ** |\n| c* |\n|* c* * |\n| * |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| @ @ |\n| #@ |\n| @ |\n| @ |\n| @ |\n| # @ |\n| |\n| # |\n| @ @ @ |\n+----------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] +StringLegalActions() = ["Pl0: shoot at (0, 0)", "Pl0: shoot at (0, 1)", "Pl0: shoot at (0, 2)", "Pl0: shoot at (0, 3)", "Pl0: shoot at (0, 4)", "Pl0: shoot at (0, 5)", "Pl0: shoot at (0, 6)", "Pl0: shoot at (0, 7)", "Pl0: shoot at (0, 8)", "Pl0: shoot at (0, 9)", "Pl0: shoot at (1, 0)", "Pl0: shoot at (1, 1)", "Pl0: shoot at (1, 2)", "Pl0: shoot at (1, 3)", "Pl0: shoot at (1, 4)", "Pl0: shoot at (1, 5)", "Pl0: shoot at (1, 6)", "Pl0: shoot at (1, 7)", "Pl0: shoot at (1, 8)", "Pl0: shoot at (1, 9)", "Pl0: shoot at (2, 0)", "Pl0: shoot at (2, 1)", "Pl0: shoot at (2, 2)", "Pl0: shoot at (2, 3)", "Pl0: shoot at (2, 4)", "Pl0: shoot at (2, 5)", "Pl0: shoot at (2, 6)", "Pl0: shoot at (2, 7)", "Pl0: shoot at (2, 8)", "Pl0: shoot at (2, 9)", "Pl0: shoot at (3, 0)", "Pl0: shoot at (3, 1)", "Pl0: shoot at (3, 2)", "Pl0: shoot at (3, 3)", "Pl0: shoot at (3, 4)", "Pl0: shoot at (3, 5)", "Pl0: shoot at (3, 6)", "Pl0: shoot at (3, 7)", "Pl0: shoot at (3, 8)", "Pl0: shoot at (3, 9)", "Pl0: shoot at (4, 0)", "Pl0: shoot at (4, 1)", "Pl0: shoot at (4, 2)", "Pl0: shoot at (4, 3)", "Pl0: shoot at (4, 4)", "Pl0: shoot at (4, 5)", "Pl0: shoot at (4, 6)", "Pl0: shoot at (4, 7)", "Pl0: shoot at (4, 8)", "Pl0: shoot at (4, 9)", "Pl0: shoot at (5, 0)", "Pl0: shoot at (5, 1)", "Pl0: shoot at (5, 2)", "Pl0: shoot at (5, 3)", "Pl0: shoot at (5, 4)", "Pl0: shoot at (5, 5)", "Pl0: shoot at (5, 6)", "Pl0: shoot at (5, 7)", "Pl0: shoot at (5, 8)", "Pl0: shoot at (5, 9)", "Pl0: shoot at (6, 0)", "Pl0: shoot at (6, 1)", "Pl0: shoot at (6, 2)", "Pl0: shoot at (6, 3)", "Pl0: shoot at (6, 4)", "Pl0: shoot at (6, 5)", "Pl0: shoot at (6, 6)", "Pl0: shoot at (6, 7)", "Pl0: shoot at (6, 8)", "Pl0: shoot at (6, 9)", "Pl0: shoot at (7, 0)", "Pl0: shoot at (7, 1)", "Pl0: shoot at (7, 2)", "Pl0: shoot at (7, 3)", "Pl0: shoot at (7, 4)", "Pl0: shoot at (7, 5)", "Pl0: shoot at (7, 6)", "Pl0: shoot at (7, 7)", "Pl0: shoot at (7, 8)", "Pl0: shoot at (7, 9)", "Pl0: shoot at (8, 0)", "Pl0: shoot at (8, 1)", "Pl0: shoot at (8, 2)", "Pl0: shoot at (8, 3)", "Pl0: shoot at (8, 4)", "Pl0: shoot at (8, 5)", "Pl0: shoot at (8, 6)", "Pl0: shoot at (8, 7)", "Pl0: shoot at (8, 8)", "Pl0: shoot at (8, 9)", "Pl0: shoot at (9, 0)", "Pl0: shoot at (9, 1)", "Pl0: shoot at (9, 2)", "Pl0: shoot at (9, 3)", "Pl0: shoot at (9, 4)", "Pl0: shoot at (9, 5)", "Pl0: shoot at (9, 6)", "Pl0: shoot at (9, 7)", "Pl0: shoot at (9, 8)", "Pl0: shoot at (9, 9)"] + +# Apply action "Pl0: shoot at (1, 0)" +action: 10 + +# State 41 +# Player 0's board: +# +----------+ +# | | +# | * * | +# | A* | +# | *a | +# | d * b | +# | de* b | +# | De* c b | +# | de c | +# | E c | +# | *e * * | +# +----------+ +# +# Player 1's board: +# +----------+ +# |b * d * | +# |B a d* | +# |b aed * | +# | ed | +# | e **| +# | * e | +# | ce ** | +# | c* | +# |* c* * | +# | * | +# +----------+ +# +# Full history: /0:v_2_4/1:v_1_3/0:v_4_8/1:v_0_0/0:v_6_6/1:v_6_3/0:v_4_1/1:v_0_5/0:v_5_2/1:v_2_4/0:6_8/1:6_1/0:7_4/1:9_7/0:9_7/1:1_7/0:8_6/1:9_7/0:5_1/1:1_4/0:4_9/1:5_3/0:6_7/1:6_3/0:2_7/1:4_4/0:4_8/1:3_3/0:8_0/1:9_4/0:0_7/1:4_4/0:8_4/1:9_1/0:1_6/1:2_5/0:0_3/1:2_4/0:8_6/1:8_2/0:1_0 +IsTerminal() = False +History() = [224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82, 10] +HistoryString() = "224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82, 10" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "T=41 /v_2_4/v_4_8/v_6_6/v_4_1/v_5_2/shot_6_8:W/oppshot_6_1/shot_7_4:W/oppshot_9_7/shot_9_7:W/oppshot_1_7/shot_8_6:W/oppshot_9_7/shot_5_1:W/oppshot_1_4/shot_4_9:W/oppshot_5_3/shot_6_7:W/oppshot_6_3/shot_2_7:W/oppshot_4_4/shot_4_8:W/oppshot_3_3/shot_8_0:W/oppshot_9_4/shot_0_7:W/oppshot_4_4/shot_8_4:W/oppshot_9_1/shot_1_6:W/oppshot_2_5/shot_0_3:W/oppshot_2_4/shot_8_6:W/oppshot_8_2/shot_1_0:H" +InformationStateString(1) = "T=41 /v_1_3/v_0_0/v_6_3/v_0_5/v_2_4/oppshot_6_8/shot_6_1:H/oppshot_7_4/shot_9_7:W/oppshot_9_7/shot_1_7:W/oppshot_8_6/shot_9_7:W/oppshot_5_1/shot_1_4:W/oppshot_4_9/shot_5_3:W/oppshot_6_7/shot_6_3:W/oppshot_2_7/shot_4_4:W/oppshot_4_8/shot_3_3:W/oppshot_8_0/shot_9_4:W/oppshot_0_7/shot_4_4:W/oppshot_8_4/shot_9_1:W/oppshot_1_6/shot_2_5:W/oppshot_0_3/shot_2_4:H/oppshot_8_6/shot_8_2:H/oppshot_1_0" +InformationStateTensor(0): binvec(2615, 0x2520020420009020084204010408080800a204200201020880202080101228000820080888020208104022800402080018820200808012204080220004884010082000a220080200a008802100a0001221004020082088028009000222400202800408900100802022201100240200400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(2615, 0x15400406008010204060004120020808008204200a010200802024801010280009200808080202481040028004120800108202048080102040812200040840104820008220081200a000802104a0001021004120082008028049000202400212800400900102802020201100a40200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "State of player's ships:\n+----------+\n| |\n| * * |\n| A* |\n| *a |\n| d * b |\n| de* b |\n| De* c b |\n| de c |\n| E c |\n| *e * * |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| @ @ |\n|# @ |\n| @ |\n| |\n| @@|\n| @ |\n| @@ |\n| @ |\n|@ @ @ |\n| @ |\n+----------+\n" +ObservationString(1) = "State of player's ships:\n+----------+\n|b * d * |\n|B a d* |\n|b aed * |\n| ed |\n| e **|\n| * e |\n| ce ** |\n| c* |\n|* c* * |\n| * |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| @ @ |\n| #@ |\n| @ |\n| @ |\n| @ |\n| # @ |\n| |\n| # |\n| @ @ @ |\n+----------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] +StringLegalActions() = ["Pl1: shoot at (0, 0)", "Pl1: shoot at (0, 1)", "Pl1: shoot at (0, 2)", "Pl1: shoot at (0, 3)", "Pl1: shoot at (0, 4)", "Pl1: shoot at (0, 5)", "Pl1: shoot at (0, 6)", "Pl1: shoot at (0, 7)", "Pl1: shoot at (0, 8)", "Pl1: shoot at (0, 9)", "Pl1: shoot at (1, 0)", "Pl1: shoot at (1, 1)", "Pl1: shoot at (1, 2)", "Pl1: shoot at (1, 3)", "Pl1: shoot at (1, 4)", "Pl1: shoot at (1, 5)", "Pl1: shoot at (1, 6)", "Pl1: shoot at (1, 7)", "Pl1: shoot at (1, 8)", "Pl1: shoot at (1, 9)", "Pl1: shoot at (2, 0)", "Pl1: shoot at (2, 1)", "Pl1: shoot at (2, 2)", "Pl1: shoot at (2, 3)", "Pl1: shoot at (2, 4)", "Pl1: shoot at (2, 5)", "Pl1: shoot at (2, 6)", "Pl1: shoot at (2, 7)", "Pl1: shoot at (2, 8)", "Pl1: shoot at (2, 9)", "Pl1: shoot at (3, 0)", "Pl1: shoot at (3, 1)", "Pl1: shoot at (3, 2)", "Pl1: shoot at (3, 3)", "Pl1: shoot at (3, 4)", "Pl1: shoot at (3, 5)", "Pl1: shoot at (3, 6)", "Pl1: shoot at (3, 7)", "Pl1: shoot at (3, 8)", "Pl1: shoot at (3, 9)", "Pl1: shoot at (4, 0)", "Pl1: shoot at (4, 1)", "Pl1: shoot at (4, 2)", "Pl1: shoot at (4, 3)", "Pl1: shoot at (4, 4)", "Pl1: shoot at (4, 5)", "Pl1: shoot at (4, 6)", "Pl1: shoot at (4, 7)", "Pl1: shoot at (4, 8)", "Pl1: shoot at (4, 9)", "Pl1: shoot at (5, 0)", "Pl1: shoot at (5, 1)", "Pl1: shoot at (5, 2)", "Pl1: shoot at (5, 3)", "Pl1: shoot at (5, 4)", "Pl1: shoot at (5, 5)", "Pl1: shoot at (5, 6)", "Pl1: shoot at (5, 7)", "Pl1: shoot at (5, 8)", "Pl1: shoot at (5, 9)", "Pl1: shoot at (6, 0)", "Pl1: shoot at (6, 1)", "Pl1: shoot at (6, 2)", "Pl1: shoot at (6, 3)", "Pl1: shoot at (6, 4)", "Pl1: shoot at (6, 5)", "Pl1: shoot at (6, 6)", "Pl1: shoot at (6, 7)", "Pl1: shoot at (6, 8)", "Pl1: shoot at (6, 9)", "Pl1: shoot at (7, 0)", "Pl1: shoot at (7, 1)", "Pl1: shoot at (7, 2)", "Pl1: shoot at (7, 3)", "Pl1: shoot at (7, 4)", "Pl1: shoot at (7, 5)", "Pl1: shoot at (7, 6)", "Pl1: shoot at (7, 7)", "Pl1: shoot at (7, 8)", "Pl1: shoot at (7, 9)", "Pl1: shoot at (8, 0)", "Pl1: shoot at (8, 1)", "Pl1: shoot at (8, 2)", "Pl1: shoot at (8, 3)", "Pl1: shoot at (8, 4)", "Pl1: shoot at (8, 5)", "Pl1: shoot at (8, 6)", "Pl1: shoot at (8, 7)", "Pl1: shoot at (8, 8)", "Pl1: shoot at (8, 9)", "Pl1: shoot at (9, 0)", "Pl1: shoot at (9, 1)", "Pl1: shoot at (9, 2)", "Pl1: shoot at (9, 3)", "Pl1: shoot at (9, 4)", "Pl1: shoot at (9, 5)", "Pl1: shoot at (9, 6)", "Pl1: shoot at (9, 7)", "Pl1: shoot at (9, 8)", "Pl1: shoot at (9, 9)"] + +# Apply action "Pl1: shoot at (3, 4)" +action: 34 + +# State 42 +# Apply action "Pl0: shoot at (3, 0)" +action: 30 + +# State 43 +# Apply action "Pl1: shoot at (2, 1)" +action: 21 + +# State 44 +# Apply action "Pl0: shoot at (6, 3)" +action: 63 + +# State 45 +# Apply action "Pl1: shoot at (1, 3)" +action: 13 + +# State 46 +# Apply action "Pl0: shoot at (5, 2)" +action: 52 + +# State 47 +# Apply action "Pl1: shoot at (3, 2)" +action: 32 + +# State 48 +# Apply action "Pl0: shoot at (6, 8)" +action: 68 + +# State 49 +# Apply action "Pl1: shoot at (6, 5)" +action: 65 + +# State 50 +# Apply action "Pl0: shoot at (4, 6)" +action: 46 + +# State 51 +# Apply action "Pl1: shoot at (9, 7)" +action: 97 + +# State 52 +# Apply action "Pl0: shoot at (2, 1)" +action: 21 + +# State 53 +# Apply action "Pl1: shoot at (8, 8)" +action: 88 + +# State 54 +# Apply action "Pl0: shoot at (6, 5)" +action: 65 + +# State 55 +# Apply action "Pl1: shoot at (8, 8)" +action: 88 + +# State 56 +# Apply action "Pl0: shoot at (7, 6)" +action: 76 + +# State 57 +# Apply action "Pl1: shoot at (1, 2)" +action: 12 + +# State 58 +# Apply action "Pl0: shoot at (4, 1)" +action: 41 + +# State 59 +# Apply action "Pl1: shoot at (7, 3)" +action: 73 + +# State 60 +# Player 0's board: +# +----------+ +# | | +# | *** * | +# | * A* | +# | **A | +# | d * b | +# | de* b | +# | De* *c b | +# | de* c | +# | E c * | +# | *e * * | +# +----------+ +# +# Player 1's board: +# +----------+ +# |b * d * | +# |B a d* | +# |b* aed * | +# |* ed | +# | * e * **| +# | ** e | +# | Ce* ** | +# | c* * | +# |* c* * | +# | * | +# +----------+ +# +# Full history: /0:v_2_4/1:v_1_3/0:v_4_8/1:v_0_0/0:v_6_6/1:v_6_3/0:v_4_1/1:v_0_5/0:v_5_2/1:v_2_4/0:6_8/1:6_1/0:7_4/1:9_7/0:9_7/1:1_7/0:8_6/1:9_7/0:5_1/1:1_4/0:4_9/1:5_3/0:6_7/1:6_3/0:2_7/1:4_4/0:4_8/1:3_3/0:8_0/1:9_4/0:0_7/1:4_4/0:8_4/1:9_1/0:1_6/1:2_5/0:0_3/1:2_4/0:8_6/1:8_2/0:1_0/1:3_4/0:3_0/1:2_1/0:6_3/1:1_3/0:5_2/1:3_2/0:6_8/1:6_5/0:4_6/1:9_7/0:2_1/1:8_8/0:6_5/1:8_8/0:7_6/1:1_2/0:4_1/1:7_3 +IsTerminal() = False +History() = [224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82, 10, 34, 30, 21, 63, 13, 52, 32, 68, 65, 46, 97, 21, 88, 65, 88, 76, 12, 41, 73] +HistoryString() = "224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82, 10, 34, 30, 21, 63, 13, 52, 32, 68, 65, 46, 97, 21, 88, 65, 88, 76, 12, 41, 73" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "T=60 /v_2_4/v_4_8/v_6_6/v_4_1/v_5_2/shot_6_8:W/oppshot_6_1/shot_7_4:W/oppshot_9_7/shot_9_7:W/oppshot_1_7/shot_8_6:W/oppshot_9_7/shot_5_1:W/oppshot_1_4/shot_4_9:W/oppshot_5_3/shot_6_7:W/oppshot_6_3/shot_2_7:W/oppshot_4_4/shot_4_8:W/oppshot_3_3/shot_8_0:W/oppshot_9_4/shot_0_7:W/oppshot_4_4/shot_8_4:W/oppshot_9_1/shot_1_6:W/oppshot_2_5/shot_0_3:W/oppshot_2_4/shot_8_6:W/oppshot_8_2/shot_1_0:H/oppshot_3_4/shot_3_0:W/oppshot_2_1/shot_6_3:H/oppshot_1_3/shot_5_2:W/oppshot_3_2/shot_6_8:W/oppshot_6_5/shot_4_6:W/oppshot_9_7/shot_2_1:W/oppshot_8_8/shot_6_5:W/oppshot_8_8/shot_7_6:W/oppshot_1_2/shot_4_1:W/oppshot_7_3" +InformationStateString(1) = "T=60 /v_1_3/v_0_0/v_6_3/v_0_5/v_2_4/oppshot_6_8/shot_6_1:H/oppshot_7_4/shot_9_7:W/oppshot_9_7/shot_1_7:W/oppshot_8_6/shot_9_7:W/oppshot_5_1/shot_1_4:W/oppshot_4_9/shot_5_3:W/oppshot_6_7/shot_6_3:W/oppshot_2_7/shot_4_4:W/oppshot_4_8/shot_3_3:W/oppshot_8_0/shot_9_4:W/oppshot_0_7/shot_4_4:W/oppshot_8_4/shot_9_1:W/oppshot_1_6/shot_2_5:W/oppshot_0_3/shot_2_4:H/oppshot_8_6/shot_8_2:H/oppshot_1_0/shot_3_4:S/oppshot_3_0/shot_2_1:W/oppshot_6_3/shot_1_3:W/oppshot_5_2/shot_3_2:W/oppshot_6_8/shot_6_5:W/oppshot_4_6/shot_9_7:W/oppshot_2_1/shot_8_8:W/oppshot_6_5/shot_8_8:W/oppshot_7_6/shot_1_2:W/oppshot_4_1/shot_7_3:W" +InformationStateTensor(0): binvec(2615, 0x2920020420009020084204010408080800a204200201020880202080101228000820080888020208104022800402080018820200808012204080220004884010082000a220080200a008802100a000122100402008208802800900022240020280040890010080202220110024020048801008408022402002020404a00200810202220100202002881008082002220080822010088040108080422010042010088a004008204022020800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(2615, 0x19400406008010204060004120020808008204200a010200802024801010280009200808080202481040028004120800108202048080102040812200040840104820008220081200a000802104a0001021004120082008028049000202400212800400900102802020201100a4020008801018408002402012020400a00204810200220101202002081008482002020080922010008040148080402010052010080a004048204002020810000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "State of player's ships:\n+----------+\n| |\n| *** * |\n| * A* |\n| **A |\n| d * b |\n| de* b |\n| De* *c b |\n| de* c |\n| E c * |\n| *e * * |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| @ @ |\n|# @ |\n| @ @ |\n|@ |\n| @ @ @@|\n| @@ |\n| # @ @@ |\n| @ @ |\n|@ @ @ |\n| @ |\n+----------+\n" +ObservationString(1) = "State of player's ships:\n+----------+\n|b * d * |\n|B a d* |\n|b* aed * |\n|* ed |\n| * e * **|\n| ** e |\n| Ce* ** |\n| c* * |\n|* c* * |\n| * |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| @@@ @ |\n| @ #@ |\n| @@# |\n| @ |\n| @ |\n| # @ @ |\n| @ |\n| # @ |\n| @ @ @ |\n+----------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] +StringLegalActions() = ["Pl0: shoot at (0, 0)", "Pl0: shoot at (0, 1)", "Pl0: shoot at (0, 2)", "Pl0: shoot at (0, 3)", "Pl0: shoot at (0, 4)", "Pl0: shoot at (0, 5)", "Pl0: shoot at (0, 6)", "Pl0: shoot at (0, 7)", "Pl0: shoot at (0, 8)", "Pl0: shoot at (0, 9)", "Pl0: shoot at (1, 0)", "Pl0: shoot at (1, 1)", "Pl0: shoot at (1, 2)", "Pl0: shoot at (1, 3)", "Pl0: shoot at (1, 4)", "Pl0: shoot at (1, 5)", "Pl0: shoot at (1, 6)", "Pl0: shoot at (1, 7)", "Pl0: shoot at (1, 8)", "Pl0: shoot at (1, 9)", "Pl0: shoot at (2, 0)", "Pl0: shoot at (2, 1)", "Pl0: shoot at (2, 2)", "Pl0: shoot at (2, 3)", "Pl0: shoot at (2, 4)", "Pl0: shoot at (2, 5)", "Pl0: shoot at (2, 6)", "Pl0: shoot at (2, 7)", "Pl0: shoot at (2, 8)", "Pl0: shoot at (2, 9)", "Pl0: shoot at (3, 0)", "Pl0: shoot at (3, 1)", "Pl0: shoot at (3, 2)", "Pl0: shoot at (3, 3)", "Pl0: shoot at (3, 4)", "Pl0: shoot at (3, 5)", "Pl0: shoot at (3, 6)", "Pl0: shoot at (3, 7)", "Pl0: shoot at (3, 8)", "Pl0: shoot at (3, 9)", "Pl0: shoot at (4, 0)", "Pl0: shoot at (4, 1)", "Pl0: shoot at (4, 2)", "Pl0: shoot at (4, 3)", "Pl0: shoot at (4, 4)", "Pl0: shoot at (4, 5)", "Pl0: shoot at (4, 6)", "Pl0: shoot at (4, 7)", "Pl0: shoot at (4, 8)", "Pl0: shoot at (4, 9)", "Pl0: shoot at (5, 0)", "Pl0: shoot at (5, 1)", "Pl0: shoot at (5, 2)", "Pl0: shoot at (5, 3)", "Pl0: shoot at (5, 4)", "Pl0: shoot at (5, 5)", "Pl0: shoot at (5, 6)", "Pl0: shoot at (5, 7)", "Pl0: shoot at (5, 8)", "Pl0: shoot at (5, 9)", "Pl0: shoot at (6, 0)", "Pl0: shoot at (6, 1)", "Pl0: shoot at (6, 2)", "Pl0: shoot at (6, 3)", "Pl0: shoot at (6, 4)", "Pl0: shoot at (6, 5)", "Pl0: shoot at (6, 6)", "Pl0: shoot at (6, 7)", "Pl0: shoot at (6, 8)", "Pl0: shoot at (6, 9)", "Pl0: shoot at (7, 0)", "Pl0: shoot at (7, 1)", "Pl0: shoot at (7, 2)", "Pl0: shoot at (7, 3)", "Pl0: shoot at (7, 4)", "Pl0: shoot at (7, 5)", "Pl0: shoot at (7, 6)", "Pl0: shoot at (7, 7)", "Pl0: shoot at (7, 8)", "Pl0: shoot at (7, 9)", "Pl0: shoot at (8, 0)", "Pl0: shoot at (8, 1)", "Pl0: shoot at (8, 2)", "Pl0: shoot at (8, 3)", "Pl0: shoot at (8, 4)", "Pl0: shoot at (8, 5)", "Pl0: shoot at (8, 6)", "Pl0: shoot at (8, 7)", "Pl0: shoot at (8, 8)", "Pl0: shoot at (8, 9)", "Pl0: shoot at (9, 0)", "Pl0: shoot at (9, 1)", "Pl0: shoot at (9, 2)", "Pl0: shoot at (9, 3)", "Pl0: shoot at (9, 4)", "Pl0: shoot at (9, 5)", "Pl0: shoot at (9, 6)", "Pl0: shoot at (9, 7)", "Pl0: shoot at (9, 8)", "Pl0: shoot at (9, 9)"] + +# Apply action "Pl0: shoot at (4, 1)" +action: 41 + +# State 61 +# Player 0's board: +# +----------+ +# | | +# | *** * | +# | * A* | +# | **A | +# | d * b | +# | de* b | +# | De* *c b | +# | de* c | +# | E c * | +# | *e * * | +# +----------+ +# +# Player 1's board: +# +----------+ +# |b * d * | +# |B a d* | +# |b* aed * | +# |* ed | +# | * e * **| +# | ** e | +# | Ce* ** | +# | c* * | +# |* c* * | +# | * | +# +----------+ +# +# Full history: /0:v_2_4/1:v_1_3/0:v_4_8/1:v_0_0/0:v_6_6/1:v_6_3/0:v_4_1/1:v_0_5/0:v_5_2/1:v_2_4/0:6_8/1:6_1/0:7_4/1:9_7/0:9_7/1:1_7/0:8_6/1:9_7/0:5_1/1:1_4/0:4_9/1:5_3/0:6_7/1:6_3/0:2_7/1:4_4/0:4_8/1:3_3/0:8_0/1:9_4/0:0_7/1:4_4/0:8_4/1:9_1/0:1_6/1:2_5/0:0_3/1:2_4/0:8_6/1:8_2/0:1_0/1:3_4/0:3_0/1:2_1/0:6_3/1:1_3/0:5_2/1:3_2/0:6_8/1:6_5/0:4_6/1:9_7/0:2_1/1:8_8/0:6_5/1:8_8/0:7_6/1:1_2/0:4_1/1:7_3/0:4_1 +IsTerminal() = False +History() = [224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82, 10, 34, 30, 21, 63, 13, 52, 32, 68, 65, 46, 97, 21, 88, 65, 88, 76, 12, 41, 73, 41] +HistoryString() = "224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82, 10, 34, 30, 21, 63, 13, 52, 32, 68, 65, 46, 97, 21, 88, 65, 88, 76, 12, 41, 73, 41" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "T=61 /v_2_4/v_4_8/v_6_6/v_4_1/v_5_2/shot_6_8:W/oppshot_6_1/shot_7_4:W/oppshot_9_7/shot_9_7:W/oppshot_1_7/shot_8_6:W/oppshot_9_7/shot_5_1:W/oppshot_1_4/shot_4_9:W/oppshot_5_3/shot_6_7:W/oppshot_6_3/shot_2_7:W/oppshot_4_4/shot_4_8:W/oppshot_3_3/shot_8_0:W/oppshot_9_4/shot_0_7:W/oppshot_4_4/shot_8_4:W/oppshot_9_1/shot_1_6:W/oppshot_2_5/shot_0_3:W/oppshot_2_4/shot_8_6:W/oppshot_8_2/shot_1_0:H/oppshot_3_4/shot_3_0:W/oppshot_2_1/shot_6_3:H/oppshot_1_3/shot_5_2:W/oppshot_3_2/shot_6_8:W/oppshot_6_5/shot_4_6:W/oppshot_9_7/shot_2_1:W/oppshot_8_8/shot_6_5:W/oppshot_8_8/shot_7_6:W/oppshot_1_2/shot_4_1:W/oppshot_7_3/shot_4_1:W" +InformationStateString(1) = "T=61 /v_1_3/v_0_0/v_6_3/v_0_5/v_2_4/oppshot_6_8/shot_6_1:H/oppshot_7_4/shot_9_7:W/oppshot_9_7/shot_1_7:W/oppshot_8_6/shot_9_7:W/oppshot_5_1/shot_1_4:W/oppshot_4_9/shot_5_3:W/oppshot_6_7/shot_6_3:W/oppshot_2_7/shot_4_4:W/oppshot_4_8/shot_3_3:W/oppshot_8_0/shot_9_4:W/oppshot_0_7/shot_4_4:W/oppshot_8_4/shot_9_1:W/oppshot_1_6/shot_2_5:W/oppshot_0_3/shot_2_4:H/oppshot_8_6/shot_8_2:H/oppshot_1_0/shot_3_4:S/oppshot_3_0/shot_2_1:W/oppshot_6_3/shot_1_3:W/oppshot_5_2/shot_3_2:W/oppshot_6_8/shot_6_5:W/oppshot_4_6/shot_9_7:W/oppshot_2_1/shot_8_8:W/oppshot_6_5/shot_8_8:W/oppshot_7_6/shot_1_2:W/oppshot_4_1/shot_7_3:W/oppshot_4_1" +InformationStateTensor(0): binvec(2615, 0x2520020420009020084204010408080800a204200201020880202080101228000820080888020208104022800402080018820200808012204080220004884010082000a220080200a008802100a000122100402008208802800900022240020280040890010080202220110024020048801008408022402002020404a00200810202220100202002881008082002220080822010088040108080422010042010088a004008204022020802081008000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(2615, 0x15400406008010204060004120020808008204200a010200802024801010280009200808080202481040028004120800108202048080102040812200040840104820008220081200a000802104a0001021004120082008028049000202400212800400900102802020201100a4020008801018408002402012020400a00204810200220101202002081008482002020080922010008040148080402010052010080a004048204002020812081000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "State of player's ships:\n+----------+\n| |\n| *** * |\n| * A* |\n| **A |\n| d * b |\n| de* b |\n| De* *c b |\n| de* c |\n| E c * |\n| *e * * |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| @ @ |\n|# @ |\n| @ @ |\n|@ |\n| @ @ @@|\n| @@ |\n| # @ @@ |\n| @ @ |\n|@ @ @ |\n| @ |\n+----------+\n" +ObservationString(1) = "State of player's ships:\n+----------+\n|b * d * |\n|B a d* |\n|b* aed * |\n|* ed |\n| * e * **|\n| ** e |\n| Ce* ** |\n| c* * |\n|* c* * |\n| * |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| |\n| @@@ @ |\n| @ #@ |\n| @@# |\n| @ |\n| @ |\n| # @ @ |\n| @ |\n| # @ |\n| @ @ @ |\n+----------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] +StringLegalActions() = ["Pl1: shoot at (0, 0)", "Pl1: shoot at (0, 1)", "Pl1: shoot at (0, 2)", "Pl1: shoot at (0, 3)", "Pl1: shoot at (0, 4)", "Pl1: shoot at (0, 5)", "Pl1: shoot at (0, 6)", "Pl1: shoot at (0, 7)", "Pl1: shoot at (0, 8)", "Pl1: shoot at (0, 9)", "Pl1: shoot at (1, 0)", "Pl1: shoot at (1, 1)", "Pl1: shoot at (1, 2)", "Pl1: shoot at (1, 3)", "Pl1: shoot at (1, 4)", "Pl1: shoot at (1, 5)", "Pl1: shoot at (1, 6)", "Pl1: shoot at (1, 7)", "Pl1: shoot at (1, 8)", "Pl1: shoot at (1, 9)", "Pl1: shoot at (2, 0)", "Pl1: shoot at (2, 1)", "Pl1: shoot at (2, 2)", "Pl1: shoot at (2, 3)", "Pl1: shoot at (2, 4)", "Pl1: shoot at (2, 5)", "Pl1: shoot at (2, 6)", "Pl1: shoot at (2, 7)", "Pl1: shoot at (2, 8)", "Pl1: shoot at (2, 9)", "Pl1: shoot at (3, 0)", "Pl1: shoot at (3, 1)", "Pl1: shoot at (3, 2)", "Pl1: shoot at (3, 3)", "Pl1: shoot at (3, 4)", "Pl1: shoot at (3, 5)", "Pl1: shoot at (3, 6)", "Pl1: shoot at (3, 7)", "Pl1: shoot at (3, 8)", "Pl1: shoot at (3, 9)", "Pl1: shoot at (4, 0)", "Pl1: shoot at (4, 1)", "Pl1: shoot at (4, 2)", "Pl1: shoot at (4, 3)", "Pl1: shoot at (4, 4)", "Pl1: shoot at (4, 5)", "Pl1: shoot at (4, 6)", "Pl1: shoot at (4, 7)", "Pl1: shoot at (4, 8)", "Pl1: shoot at (4, 9)", "Pl1: shoot at (5, 0)", "Pl1: shoot at (5, 1)", "Pl1: shoot at (5, 2)", "Pl1: shoot at (5, 3)", "Pl1: shoot at (5, 4)", "Pl1: shoot at (5, 5)", "Pl1: shoot at (5, 6)", "Pl1: shoot at (5, 7)", "Pl1: shoot at (5, 8)", "Pl1: shoot at (5, 9)", "Pl1: shoot at (6, 0)", "Pl1: shoot at (6, 1)", "Pl1: shoot at (6, 2)", "Pl1: shoot at (6, 3)", "Pl1: shoot at (6, 4)", "Pl1: shoot at (6, 5)", "Pl1: shoot at (6, 6)", "Pl1: shoot at (6, 7)", "Pl1: shoot at (6, 8)", "Pl1: shoot at (6, 9)", "Pl1: shoot at (7, 0)", "Pl1: shoot at (7, 1)", "Pl1: shoot at (7, 2)", "Pl1: shoot at (7, 3)", "Pl1: shoot at (7, 4)", "Pl1: shoot at (7, 5)", "Pl1: shoot at (7, 6)", "Pl1: shoot at (7, 7)", "Pl1: shoot at (7, 8)", "Pl1: shoot at (7, 9)", "Pl1: shoot at (8, 0)", "Pl1: shoot at (8, 1)", "Pl1: shoot at (8, 2)", "Pl1: shoot at (8, 3)", "Pl1: shoot at (8, 4)", "Pl1: shoot at (8, 5)", "Pl1: shoot at (8, 6)", "Pl1: shoot at (8, 7)", "Pl1: shoot at (8, 8)", "Pl1: shoot at (8, 9)", "Pl1: shoot at (9, 0)", "Pl1: shoot at (9, 1)", "Pl1: shoot at (9, 2)", "Pl1: shoot at (9, 3)", "Pl1: shoot at (9, 4)", "Pl1: shoot at (9, 5)", "Pl1: shoot at (9, 6)", "Pl1: shoot at (9, 7)", "Pl1: shoot at (9, 8)", "Pl1: shoot at (9, 9)"] + +# Apply action "Pl1: shoot at (0, 7)" +action: 7 + +# State 62 +# Apply action "Pl0: shoot at (1, 9)" +action: 19 + +# State 63 +# Apply action "Pl1: shoot at (9, 5)" +action: 95 + +# State 64 +# Apply action "Pl0: shoot at (1, 4)" +action: 14 + +# State 65 +# Apply action "Pl1: shoot at (0, 1)" +action: 1 + +# State 66 +# Apply action "Pl0: shoot at (1, 7)" +action: 17 + +# State 67 +# Apply action "Pl1: shoot at (3, 3)" +action: 33 + +# State 68 +# Apply action "Pl0: shoot at (3, 7)" +action: 37 + +# State 69 +# Apply action "Pl1: shoot at (4, 3)" +action: 43 + +# State 70 +# Apply action "Pl0: shoot at (1, 1)" +action: 11 + +# State 71 +# Apply action "Pl1: shoot at (7, 8)" +action: 78 + +# State 72 +# Apply action "Pl0: shoot at (5, 6)" +action: 56 + +# State 73 +# Apply action "Pl1: shoot at (3, 5)" +action: 35 + +# State 74 +# Apply action "Pl0: shoot at (2, 5)" +action: 25 + +# State 75 +# Apply action "Pl1: shoot at (9, 6)" +action: 96 + +# State 76 +# Apply action "Pl0: shoot at (3, 8)" +action: 38 + +# State 77 +# Apply action "Pl1: shoot at (5, 3)" +action: 53 + +# State 78 +# Apply action "Pl0: shoot at (0, 3)" +action: 3 + +# State 79 +# Apply action "Pl1: shoot at (2, 3)" +action: 23 + +# State 80 +# Player 0's board: +# +----------+ +# | * * | +# | *** * | +# | * *A* | +# | **A* | +# | d ** b | +# | de* b | +# | De* *c b | +# | de* c * | +# | E c * | +# | *e **** | +# +----------+ +# +# Player 1's board: +# +----------+ +# |b * d * | +# |B* a*d** *| +# |b* aeD * | +# |* ed ** | +# | * e * **| +# | ** e * | +# | Ce* ** | +# | c* * | +# |* c* * | +# | * | +# +----------+ +# +# Full history: /0:v_2_4/1:v_1_3/0:v_4_8/1:v_0_0/0:v_6_6/1:v_6_3/0:v_4_1/1:v_0_5/0:v_5_2/1:v_2_4/0:6_8/1:6_1/0:7_4/1:9_7/0:9_7/1:1_7/0:8_6/1:9_7/0:5_1/1:1_4/0:4_9/1:5_3/0:6_7/1:6_3/0:2_7/1:4_4/0:4_8/1:3_3/0:8_0/1:9_4/0:0_7/1:4_4/0:8_4/1:9_1/0:1_6/1:2_5/0:0_3/1:2_4/0:8_6/1:8_2/0:1_0/1:3_4/0:3_0/1:2_1/0:6_3/1:1_3/0:5_2/1:3_2/0:6_8/1:6_5/0:4_6/1:9_7/0:2_1/1:8_8/0:6_5/1:8_8/0:7_6/1:1_2/0:4_1/1:7_3/0:4_1/1:0_7/0:1_9/1:9_5/0:1_4/1:0_1/0:1_7/1:3_3/0:3_7/1:4_3/0:1_1/1:7_8/0:5_6/1:3_5/0:2_5/1:9_6/0:3_8/1:5_3/0:0_3/1:2_3 +IsTerminal() = False +History() = [224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82, 10, 34, 30, 21, 63, 13, 52, 32, 68, 65, 46, 97, 21, 88, 65, 88, 76, 12, 41, 73, 41, 7, 19, 95, 14, 1, 17, 33, 37, 43, 11, 78, 56, 35, 25, 96, 38, 53, 3, 23] +HistoryString() = "224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82, 10, 34, 30, 21, 63, 13, 52, 32, 68, 65, 46, 97, 21, 88, 65, 88, 76, 12, 41, 73, 41, 7, 19, 95, 14, 1, 17, 33, 37, 43, 11, 78, 56, 35, 25, 96, 38, 53, 3, 23" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "T=80 /v_2_4/v_4_8/v_6_6/v_4_1/v_5_2/shot_6_8:W/oppshot_6_1/shot_7_4:W/oppshot_9_7/shot_9_7:W/oppshot_1_7/shot_8_6:W/oppshot_9_7/shot_5_1:W/oppshot_1_4/shot_4_9:W/oppshot_5_3/shot_6_7:W/oppshot_6_3/shot_2_7:W/oppshot_4_4/shot_4_8:W/oppshot_3_3/shot_8_0:W/oppshot_9_4/shot_0_7:W/oppshot_4_4/shot_8_4:W/oppshot_9_1/shot_1_6:W/oppshot_2_5/shot_0_3:W/oppshot_2_4/shot_8_6:W/oppshot_8_2/shot_1_0:H/oppshot_3_4/shot_3_0:W/oppshot_2_1/shot_6_3:H/oppshot_1_3/shot_5_2:W/oppshot_3_2/shot_6_8:W/oppshot_6_5/shot_4_6:W/oppshot_9_7/shot_2_1:W/oppshot_8_8/shot_6_5:W/oppshot_8_8/shot_7_6:W/oppshot_1_2/shot_4_1:W/oppshot_7_3/shot_4_1:W/oppshot_0_7/shot_1_9:W/oppshot_9_5/shot_1_4:W/oppshot_0_1/shot_1_7:W/oppshot_3_3/shot_3_7:W/oppshot_4_3/shot_1_1:W/oppshot_7_8/shot_5_6:W/oppshot_3_5/shot_2_5:H/oppshot_9_6/shot_3_8:W/oppshot_5_3/shot_0_3:W/oppshot_2_3" +InformationStateString(1) = "T=80 /v_1_3/v_0_0/v_6_3/v_0_5/v_2_4/oppshot_6_8/shot_6_1:H/oppshot_7_4/shot_9_7:W/oppshot_9_7/shot_1_7:W/oppshot_8_6/shot_9_7:W/oppshot_5_1/shot_1_4:W/oppshot_4_9/shot_5_3:W/oppshot_6_7/shot_6_3:W/oppshot_2_7/shot_4_4:W/oppshot_4_8/shot_3_3:W/oppshot_8_0/shot_9_4:W/oppshot_0_7/shot_4_4:W/oppshot_8_4/shot_9_1:W/oppshot_1_6/shot_2_5:W/oppshot_0_3/shot_2_4:H/oppshot_8_6/shot_8_2:H/oppshot_1_0/shot_3_4:S/oppshot_3_0/shot_2_1:W/oppshot_6_3/shot_1_3:W/oppshot_5_2/shot_3_2:W/oppshot_6_8/shot_6_5:W/oppshot_4_6/shot_9_7:W/oppshot_2_1/shot_8_8:W/oppshot_6_5/shot_8_8:W/oppshot_7_6/shot_1_2:W/oppshot_4_1/shot_7_3:W/oppshot_4_1/shot_0_7:W/oppshot_1_9/shot_9_5:W/oppshot_1_4/shot_0_1:W/oppshot_1_7/shot_3_3:W/oppshot_3_7/shot_4_3:W/oppshot_1_1/shot_7_8:W/oppshot_5_6/shot_3_5:W/oppshot_2_5/shot_9_6:W/oppshot_3_8/shot_5_3:W/oppshot_0_3/shot_2_3:W" +InformationStateTensor(0): binvec(2615, 0x2920020420009020084204010408080800a204200201020880202080101228000820080888020208104022800402080018820200808012204080220004884010082000a220080200a008802100a000122100402008208802800900022240020280040890010080202220110024020048801008408022402002020404a00200810202220100202002881008082002220080822010088040108080422010042010088a004008204022020802081008c000209000062008202400208c00800900012220080210004884020090040220200420400888800808800412008102100028820200a0010224008000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(2615, 0x19400406008010204060004120020808008204200a010200802024801010280009200808080202481040028004120800108202048080102040812200040840104820008220081200a000802104a0001021004120082008028049000202400212800400900102802020201100a4020008801018408002402012020400a00204810200220101202002081008482002020080922010008040148080402010052010080a004048204002020812081000c000249000042008212400200c00804900010220081210004084020490040020200520400808800848800402008112100020820204a0010024008100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "State of player's ships:\n+----------+\n| * * |\n| *** * |\n| * *A* |\n| **A* |\n| d ** b |\n| de* b |\n| De* *c b |\n| de* c * |\n| E c * |\n| *e **** |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| @ @ |\n|#@ @ @@ @|\n| @ # @ |\n|@ @@ |\n| @ @ @@|\n| @@ @ |\n| # @ @@ |\n| @ @ |\n|@ @ @ |\n| @ |\n+----------+\n" +ObservationString(1) = "State of player's ships:\n+----------+\n|b * d * |\n|B* a*d** *|\n|b* aeD * |\n|* ed ** |\n| * e * **|\n| ** e * |\n| Ce* ** |\n| c* * |\n|* c* * |\n| * |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| @ @ |\n| @@@ @ |\n| @ @#@ |\n| @@#@ |\n| @@ |\n| @ |\n| # @ @ |\n| @ @ |\n| # @ |\n| @ @@@@ |\n+----------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] +StringLegalActions() = ["Pl0: shoot at (0, 0)", "Pl0: shoot at (0, 1)", "Pl0: shoot at (0, 2)", "Pl0: shoot at (0, 3)", "Pl0: shoot at (0, 4)", "Pl0: shoot at (0, 5)", "Pl0: shoot at (0, 6)", "Pl0: shoot at (0, 7)", "Pl0: shoot at (0, 8)", "Pl0: shoot at (0, 9)", "Pl0: shoot at (1, 0)", "Pl0: shoot at (1, 1)", "Pl0: shoot at (1, 2)", "Pl0: shoot at (1, 3)", "Pl0: shoot at (1, 4)", "Pl0: shoot at (1, 5)", "Pl0: shoot at (1, 6)", "Pl0: shoot at (1, 7)", "Pl0: shoot at (1, 8)", "Pl0: shoot at (1, 9)", "Pl0: shoot at (2, 0)", "Pl0: shoot at (2, 1)", "Pl0: shoot at (2, 2)", "Pl0: shoot at (2, 3)", "Pl0: shoot at (2, 4)", "Pl0: shoot at (2, 5)", "Pl0: shoot at (2, 6)", "Pl0: shoot at (2, 7)", "Pl0: shoot at (2, 8)", "Pl0: shoot at (2, 9)", "Pl0: shoot at (3, 0)", "Pl0: shoot at (3, 1)", "Pl0: shoot at (3, 2)", "Pl0: shoot at (3, 3)", "Pl0: shoot at (3, 4)", "Pl0: shoot at (3, 5)", "Pl0: shoot at (3, 6)", "Pl0: shoot at (3, 7)", "Pl0: shoot at (3, 8)", "Pl0: shoot at (3, 9)", "Pl0: shoot at (4, 0)", "Pl0: shoot at (4, 1)", "Pl0: shoot at (4, 2)", "Pl0: shoot at (4, 3)", "Pl0: shoot at (4, 4)", "Pl0: shoot at (4, 5)", "Pl0: shoot at (4, 6)", "Pl0: shoot at (4, 7)", "Pl0: shoot at (4, 8)", "Pl0: shoot at (4, 9)", "Pl0: shoot at (5, 0)", "Pl0: shoot at (5, 1)", "Pl0: shoot at (5, 2)", "Pl0: shoot at (5, 3)", "Pl0: shoot at (5, 4)", "Pl0: shoot at (5, 5)", "Pl0: shoot at (5, 6)", "Pl0: shoot at (5, 7)", "Pl0: shoot at (5, 8)", "Pl0: shoot at (5, 9)", "Pl0: shoot at (6, 0)", "Pl0: shoot at (6, 1)", "Pl0: shoot at (6, 2)", "Pl0: shoot at (6, 3)", "Pl0: shoot at (6, 4)", "Pl0: shoot at (6, 5)", "Pl0: shoot at (6, 6)", "Pl0: shoot at (6, 7)", "Pl0: shoot at (6, 8)", "Pl0: shoot at (6, 9)", "Pl0: shoot at (7, 0)", "Pl0: shoot at (7, 1)", "Pl0: shoot at (7, 2)", "Pl0: shoot at (7, 3)", "Pl0: shoot at (7, 4)", "Pl0: shoot at (7, 5)", "Pl0: shoot at (7, 6)", "Pl0: shoot at (7, 7)", "Pl0: shoot at (7, 8)", "Pl0: shoot at (7, 9)", "Pl0: shoot at (8, 0)", "Pl0: shoot at (8, 1)", "Pl0: shoot at (8, 2)", "Pl0: shoot at (8, 3)", "Pl0: shoot at (8, 4)", "Pl0: shoot at (8, 5)", "Pl0: shoot at (8, 6)", "Pl0: shoot at (8, 7)", "Pl0: shoot at (8, 8)", "Pl0: shoot at (8, 9)", "Pl0: shoot at (9, 0)", "Pl0: shoot at (9, 1)", "Pl0: shoot at (9, 2)", "Pl0: shoot at (9, 3)", "Pl0: shoot at (9, 4)", "Pl0: shoot at (9, 5)", "Pl0: shoot at (9, 6)", "Pl0: shoot at (9, 7)", "Pl0: shoot at (9, 8)", "Pl0: shoot at (9, 9)"] + +# Apply action "Pl0: shoot at (8, 3)" +action: 83 + +# State 81 +# Player 0's board: +# +----------+ +# | * * | +# | *** * | +# | * *A* | +# | **A* | +# | d ** b | +# | de* b | +# | De* *c b | +# | de* c * | +# | E c * | +# | *e **** | +# +----------+ +# +# Player 1's board: +# +----------+ +# |b * d * | +# |B* a*d** *| +# |b* aeD * | +# |* ed ** | +# | * e * **| +# | ** e * | +# | Ce* ** | +# | c* * | +# |* C* * | +# | * | +# +----------+ +# +# Full history: /0:v_2_4/1:v_1_3/0:v_4_8/1:v_0_0/0:v_6_6/1:v_6_3/0:v_4_1/1:v_0_5/0:v_5_2/1:v_2_4/0:6_8/1:6_1/0:7_4/1:9_7/0:9_7/1:1_7/0:8_6/1:9_7/0:5_1/1:1_4/0:4_9/1:5_3/0:6_7/1:6_3/0:2_7/1:4_4/0:4_8/1:3_3/0:8_0/1:9_4/0:0_7/1:4_4/0:8_4/1:9_1/0:1_6/1:2_5/0:0_3/1:2_4/0:8_6/1:8_2/0:1_0/1:3_4/0:3_0/1:2_1/0:6_3/1:1_3/0:5_2/1:3_2/0:6_8/1:6_5/0:4_6/1:9_7/0:2_1/1:8_8/0:6_5/1:8_8/0:7_6/1:1_2/0:4_1/1:7_3/0:4_1/1:0_7/0:1_9/1:9_5/0:1_4/1:0_1/0:1_7/1:3_3/0:3_7/1:4_3/0:1_1/1:7_8/0:5_6/1:3_5/0:2_5/1:9_6/0:3_8/1:5_3/0:0_3/1:2_3/0:8_3 +IsTerminal() = False +History() = [224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82, 10, 34, 30, 21, 63, 13, 52, 32, 68, 65, 46, 97, 21, 88, 65, 88, 76, 12, 41, 73, 41, 7, 19, 95, 14, 1, 17, 33, 37, 43, 11, 78, 56, 35, 25, 96, 38, 53, 3, 23, 83] +HistoryString() = "224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82, 10, 34, 30, 21, 63, 13, 52, 32, 68, 65, 46, 97, 21, 88, 65, 88, 76, 12, 41, 73, 41, 7, 19, 95, 14, 1, 17, 33, 37, 43, 11, 78, 56, 35, 25, 96, 38, 53, 3, 23, 83" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "T=81 /v_2_4/v_4_8/v_6_6/v_4_1/v_5_2/shot_6_8:W/oppshot_6_1/shot_7_4:W/oppshot_9_7/shot_9_7:W/oppshot_1_7/shot_8_6:W/oppshot_9_7/shot_5_1:W/oppshot_1_4/shot_4_9:W/oppshot_5_3/shot_6_7:W/oppshot_6_3/shot_2_7:W/oppshot_4_4/shot_4_8:W/oppshot_3_3/shot_8_0:W/oppshot_9_4/shot_0_7:W/oppshot_4_4/shot_8_4:W/oppshot_9_1/shot_1_6:W/oppshot_2_5/shot_0_3:W/oppshot_2_4/shot_8_6:W/oppshot_8_2/shot_1_0:H/oppshot_3_4/shot_3_0:W/oppshot_2_1/shot_6_3:H/oppshot_1_3/shot_5_2:W/oppshot_3_2/shot_6_8:W/oppshot_6_5/shot_4_6:W/oppshot_9_7/shot_2_1:W/oppshot_8_8/shot_6_5:W/oppshot_8_8/shot_7_6:W/oppshot_1_2/shot_4_1:W/oppshot_7_3/shot_4_1:W/oppshot_0_7/shot_1_9:W/oppshot_9_5/shot_1_4:W/oppshot_0_1/shot_1_7:W/oppshot_3_3/shot_3_7:W/oppshot_4_3/shot_1_1:W/oppshot_7_8/shot_5_6:W/oppshot_3_5/shot_2_5:H/oppshot_9_6/shot_3_8:W/oppshot_5_3/shot_0_3:W/oppshot_2_3/shot_8_3:H" +InformationStateString(1) = "T=81 /v_1_3/v_0_0/v_6_3/v_0_5/v_2_4/oppshot_6_8/shot_6_1:H/oppshot_7_4/shot_9_7:W/oppshot_9_7/shot_1_7:W/oppshot_8_6/shot_9_7:W/oppshot_5_1/shot_1_4:W/oppshot_4_9/shot_5_3:W/oppshot_6_7/shot_6_3:W/oppshot_2_7/shot_4_4:W/oppshot_4_8/shot_3_3:W/oppshot_8_0/shot_9_4:W/oppshot_0_7/shot_4_4:W/oppshot_8_4/shot_9_1:W/oppshot_1_6/shot_2_5:W/oppshot_0_3/shot_2_4:H/oppshot_8_6/shot_8_2:H/oppshot_1_0/shot_3_4:S/oppshot_3_0/shot_2_1:W/oppshot_6_3/shot_1_3:W/oppshot_5_2/shot_3_2:W/oppshot_6_8/shot_6_5:W/oppshot_4_6/shot_9_7:W/oppshot_2_1/shot_8_8:W/oppshot_6_5/shot_8_8:W/oppshot_7_6/shot_1_2:W/oppshot_4_1/shot_7_3:W/oppshot_4_1/shot_0_7:W/oppshot_1_9/shot_9_5:W/oppshot_1_4/shot_0_1:W/oppshot_1_7/shot_3_3:W/oppshot_3_7/shot_4_3:W/oppshot_1_1/shot_7_8:W/oppshot_5_6/shot_3_5:W/oppshot_2_5/shot_9_6:W/oppshot_3_8/shot_5_3:W/oppshot_0_3/shot_2_3:W/oppshot_8_3" +InformationStateTensor(0): binvec(2615, 0x2520020420009020084204010408080800a204200201020880202080101228000820080888020208104022800402080018820200808012204080220004884010082000a220080200a008802100a000122100402008208802800900022240020280040890010080202220110024020048801008408022402002020404a00200810202220100202002881008082002220080822010088040108080422010042010088a004008204022020802081008c000209000062008202400208c00800900012220080210004884020090040220200420400888800808800412008102100028820200a0010224008020084040000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(2615, 0x15400406008010204060004120020808008204200a010200802024801010280009200808080202481040028004120800108202048080102040812200040840104820008220081200a000802104a0001021004120082008028049000202400212800400900102802020201100a4020008801018408002402012020400a00204810200220101202002081008482002020080922010008040148080402010052010080a004048204002020812081000c000249000042008212400200c00804900010220081210004084020490040020200520400808800848800402008112100020820204a0010024008120084000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "State of player's ships:\n+----------+\n| * * |\n| *** * |\n| * *A* |\n| **A* |\n| d ** b |\n| de* b |\n| De* *c b |\n| de* c * |\n| E c * |\n| *e **** |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| @ @ |\n|#@ @ @@ @|\n| @ # @ |\n|@ @@ |\n| @ @ @@|\n| @@ @ |\n| # @ @@ |\n| @ @ |\n|@ #@ @ |\n| @ |\n+----------+\n" +ObservationString(1) = "State of player's ships:\n+----------+\n|b * d * |\n|B* a*d** *|\n|b* aeD * |\n|* ed ** |\n| * e * **|\n| ** e * |\n| Ce* ** |\n| c* * |\n|* C* * |\n| * |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| @ @ |\n| @@@ @ |\n| @ @#@ |\n| @@#@ |\n| @@ |\n| @ |\n| # @ @ |\n| @ @ |\n| # @ |\n| @ @@@@ |\n+----------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] +StringLegalActions() = ["Pl1: shoot at (0, 0)", "Pl1: shoot at (0, 1)", "Pl1: shoot at (0, 2)", "Pl1: shoot at (0, 3)", "Pl1: shoot at (0, 4)", "Pl1: shoot at (0, 5)", "Pl1: shoot at (0, 6)", "Pl1: shoot at (0, 7)", "Pl1: shoot at (0, 8)", "Pl1: shoot at (0, 9)", "Pl1: shoot at (1, 0)", "Pl1: shoot at (1, 1)", "Pl1: shoot at (1, 2)", "Pl1: shoot at (1, 3)", "Pl1: shoot at (1, 4)", "Pl1: shoot at (1, 5)", "Pl1: shoot at (1, 6)", "Pl1: shoot at (1, 7)", "Pl1: shoot at (1, 8)", "Pl1: shoot at (1, 9)", "Pl1: shoot at (2, 0)", "Pl1: shoot at (2, 1)", "Pl1: shoot at (2, 2)", "Pl1: shoot at (2, 3)", "Pl1: shoot at (2, 4)", "Pl1: shoot at (2, 5)", "Pl1: shoot at (2, 6)", "Pl1: shoot at (2, 7)", "Pl1: shoot at (2, 8)", "Pl1: shoot at (2, 9)", "Pl1: shoot at (3, 0)", "Pl1: shoot at (3, 1)", "Pl1: shoot at (3, 2)", "Pl1: shoot at (3, 3)", "Pl1: shoot at (3, 4)", "Pl1: shoot at (3, 5)", "Pl1: shoot at (3, 6)", "Pl1: shoot at (3, 7)", "Pl1: shoot at (3, 8)", "Pl1: shoot at (3, 9)", "Pl1: shoot at (4, 0)", "Pl1: shoot at (4, 1)", "Pl1: shoot at (4, 2)", "Pl1: shoot at (4, 3)", "Pl1: shoot at (4, 4)", "Pl1: shoot at (4, 5)", "Pl1: shoot at (4, 6)", "Pl1: shoot at (4, 7)", "Pl1: shoot at (4, 8)", "Pl1: shoot at (4, 9)", "Pl1: shoot at (5, 0)", "Pl1: shoot at (5, 1)", "Pl1: shoot at (5, 2)", "Pl1: shoot at (5, 3)", "Pl1: shoot at (5, 4)", "Pl1: shoot at (5, 5)", "Pl1: shoot at (5, 6)", "Pl1: shoot at (5, 7)", "Pl1: shoot at (5, 8)", "Pl1: shoot at (5, 9)", "Pl1: shoot at (6, 0)", "Pl1: shoot at (6, 1)", "Pl1: shoot at (6, 2)", "Pl1: shoot at (6, 3)", "Pl1: shoot at (6, 4)", "Pl1: shoot at (6, 5)", "Pl1: shoot at (6, 6)", "Pl1: shoot at (6, 7)", "Pl1: shoot at (6, 8)", "Pl1: shoot at (6, 9)", "Pl1: shoot at (7, 0)", "Pl1: shoot at (7, 1)", "Pl1: shoot at (7, 2)", "Pl1: shoot at (7, 3)", "Pl1: shoot at (7, 4)", "Pl1: shoot at (7, 5)", "Pl1: shoot at (7, 6)", "Pl1: shoot at (7, 7)", "Pl1: shoot at (7, 8)", "Pl1: shoot at (7, 9)", "Pl1: shoot at (8, 0)", "Pl1: shoot at (8, 1)", "Pl1: shoot at (8, 2)", "Pl1: shoot at (8, 3)", "Pl1: shoot at (8, 4)", "Pl1: shoot at (8, 5)", "Pl1: shoot at (8, 6)", "Pl1: shoot at (8, 7)", "Pl1: shoot at (8, 8)", "Pl1: shoot at (8, 9)", "Pl1: shoot at (9, 0)", "Pl1: shoot at (9, 1)", "Pl1: shoot at (9, 2)", "Pl1: shoot at (9, 3)", "Pl1: shoot at (9, 4)", "Pl1: shoot at (9, 5)", "Pl1: shoot at (9, 6)", "Pl1: shoot at (9, 7)", "Pl1: shoot at (9, 8)", "Pl1: shoot at (9, 9)"] + +# Apply action "Pl1: shoot at (3, 8)" +action: 38 + +# State 82 +# Apply action "Pl0: shoot at (0, 6)" +action: 6 + +# State 83 +# Apply action "Pl1: shoot at (4, 0)" +action: 40 + +# State 84 +# Apply action "Pl0: shoot at (8, 3)" +action: 83 + +# State 85 +# Apply action "Pl1: shoot at (2, 5)" +action: 25 + +# State 86 +# Apply action "Pl0: shoot at (5, 2)" +action: 52 + +# State 87 +# Apply action "Pl1: shoot at (7, 4)" +action: 74 + +# State 88 +# Apply action "Pl0: shoot at (8, 0)" +action: 80 + +# State 89 +# Apply action "Pl1: shoot at (5, 0)" +action: 50 + +# State 90 +# Apply action "Pl0: shoot at (3, 0)" +action: 30 + +# State 91 +# Apply action "Pl1: shoot at (4, 4)" +action: 44 + +# State 92 +# Apply action "Pl0: shoot at (0, 3)" +action: 3 + +# State 93 +# Apply action "Pl1: shoot at (0, 3)" +action: 3 + +# State 94 +# Apply action "Pl0: shoot at (7, 0)" +action: 70 + +# State 95 +# Apply action "Pl1: shoot at (9, 8)" +action: 98 + +# State 96 +# Apply action "Pl0: shoot at (5, 9)" +action: 59 + +# State 97 +# Apply action "Pl1: shoot at (2, 2)" +action: 22 + +# State 98 +# Apply action "Pl0: shoot at (4, 8)" +action: 48 + +# State 99 +# Apply action "Pl1: shoot at (9, 0)" +action: 90 + +# State 100 +# Player 0's board: +# +----------+ +# | * * * | +# | *** * | +# | ***A* | +# | **A* * | +# |*d ** b | +# |*de* b | +# | De* *c b | +# | de** c * | +# | E c * | +# |**e ***** | +# +----------+ +# +# Player 1's board: +# +----------+ +# |b * d** | +# |B* a*d** *| +# |b* aeD * | +# |* ed ** | +# | * e * **| +# | ** e * *| +# | Ce* ** | +# |* c* * | +# |* C* * | +# | * | +# +----------+ +# +# Full history: /0:v_2_4/1:v_1_3/0:v_4_8/1:v_0_0/0:v_6_6/1:v_6_3/0:v_4_1/1:v_0_5/0:v_5_2/1:v_2_4/0:6_8/1:6_1/0:7_4/1:9_7/0:9_7/1:1_7/0:8_6/1:9_7/0:5_1/1:1_4/0:4_9/1:5_3/0:6_7/1:6_3/0:2_7/1:4_4/0:4_8/1:3_3/0:8_0/1:9_4/0:0_7/1:4_4/0:8_4/1:9_1/0:1_6/1:2_5/0:0_3/1:2_4/0:8_6/1:8_2/0:1_0/1:3_4/0:3_0/1:2_1/0:6_3/1:1_3/0:5_2/1:3_2/0:6_8/1:6_5/0:4_6/1:9_7/0:2_1/1:8_8/0:6_5/1:8_8/0:7_6/1:1_2/0:4_1/1:7_3/0:4_1/1:0_7/0:1_9/1:9_5/0:1_4/1:0_1/0:1_7/1:3_3/0:3_7/1:4_3/0:1_1/1:7_8/0:5_6/1:3_5/0:2_5/1:9_6/0:3_8/1:5_3/0:0_3/1:2_3/0:8_3/1:3_8/0:0_6/1:4_0/0:8_3/1:2_5/0:5_2/1:7_4/0:8_0/1:5_0/0:3_0/1:4_4/0:0_3/1:0_3/0:7_0/1:9_8/0:5_9/1:2_2/0:4_8/1:9_0 +IsTerminal() = False +History() = [224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82, 10, 34, 30, 21, 63, 13, 52, 32, 68, 65, 46, 97, 21, 88, 65, 88, 76, 12, 41, 73, 41, 7, 19, 95, 14, 1, 17, 33, 37, 43, 11, 78, 56, 35, 25, 96, 38, 53, 3, 23, 83, 38, 6, 40, 83, 25, 52, 74, 80, 50, 30, 44, 3, 3, 70, 98, 59, 22, 48, 90] +HistoryString() = "224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82, 10, 34, 30, 21, 63, 13, 52, 32, 68, 65, 46, 97, 21, 88, 65, 88, 76, 12, 41, 73, 41, 7, 19, 95, 14, 1, 17, 33, 37, 43, 11, 78, 56, 35, 25, 96, 38, 53, 3, 23, 83, 38, 6, 40, 83, 25, 52, 74, 80, 50, 30, 44, 3, 3, 70, 98, 59, 22, 48, 90" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "T=100 /v_2_4/v_4_8/v_6_6/v_4_1/v_5_2/shot_6_8:W/oppshot_6_1/shot_7_4:W/oppshot_9_7/shot_9_7:W/oppshot_1_7/shot_8_6:W/oppshot_9_7/shot_5_1:W/oppshot_1_4/shot_4_9:W/oppshot_5_3/shot_6_7:W/oppshot_6_3/shot_2_7:W/oppshot_4_4/shot_4_8:W/oppshot_3_3/shot_8_0:W/oppshot_9_4/shot_0_7:W/oppshot_4_4/shot_8_4:W/oppshot_9_1/shot_1_6:W/oppshot_2_5/shot_0_3:W/oppshot_2_4/shot_8_6:W/oppshot_8_2/shot_1_0:H/oppshot_3_4/shot_3_0:W/oppshot_2_1/shot_6_3:H/oppshot_1_3/shot_5_2:W/oppshot_3_2/shot_6_8:W/oppshot_6_5/shot_4_6:W/oppshot_9_7/shot_2_1:W/oppshot_8_8/shot_6_5:W/oppshot_8_8/shot_7_6:W/oppshot_1_2/shot_4_1:W/oppshot_7_3/shot_4_1:W/oppshot_0_7/shot_1_9:W/oppshot_9_5/shot_1_4:W/oppshot_0_1/shot_1_7:W/oppshot_3_3/shot_3_7:W/oppshot_4_3/shot_1_1:W/oppshot_7_8/shot_5_6:W/oppshot_3_5/shot_2_5:H/oppshot_9_6/shot_3_8:W/oppshot_5_3/shot_0_3:W/oppshot_2_3/shot_8_3:H/oppshot_3_8/shot_0_6:W/oppshot_4_0/shot_8_3:H/oppshot_2_5/shot_5_2:W/oppshot_7_4/shot_8_0:W/oppshot_5_0/shot_3_0:W/oppshot_4_4/shot_0_3:W/oppshot_0_3/shot_7_0:W/oppshot_9_8/shot_5_9:W/oppshot_2_2/shot_4_8:W/oppshot_9_0" +InformationStateString(1) = "T=100 /v_1_3/v_0_0/v_6_3/v_0_5/v_2_4/oppshot_6_8/shot_6_1:H/oppshot_7_4/shot_9_7:W/oppshot_9_7/shot_1_7:W/oppshot_8_6/shot_9_7:W/oppshot_5_1/shot_1_4:W/oppshot_4_9/shot_5_3:W/oppshot_6_7/shot_6_3:W/oppshot_2_7/shot_4_4:W/oppshot_4_8/shot_3_3:W/oppshot_8_0/shot_9_4:W/oppshot_0_7/shot_4_4:W/oppshot_8_4/shot_9_1:W/oppshot_1_6/shot_2_5:W/oppshot_0_3/shot_2_4:H/oppshot_8_6/shot_8_2:H/oppshot_1_0/shot_3_4:S/oppshot_3_0/shot_2_1:W/oppshot_6_3/shot_1_3:W/oppshot_5_2/shot_3_2:W/oppshot_6_8/shot_6_5:W/oppshot_4_6/shot_9_7:W/oppshot_2_1/shot_8_8:W/oppshot_6_5/shot_8_8:W/oppshot_7_6/shot_1_2:W/oppshot_4_1/shot_7_3:W/oppshot_4_1/shot_0_7:W/oppshot_1_9/shot_9_5:W/oppshot_1_4/shot_0_1:W/oppshot_1_7/shot_3_3:W/oppshot_3_7/shot_4_3:W/oppshot_1_1/shot_7_8:W/oppshot_5_6/shot_3_5:W/oppshot_2_5/shot_9_6:W/oppshot_3_8/shot_5_3:W/oppshot_0_3/shot_2_3:W/oppshot_8_3/shot_3_8:W/oppshot_0_6/shot_4_0:W/oppshot_8_3/shot_2_5:W/oppshot_5_2/shot_7_4:W/oppshot_8_0/shot_5_0:W/oppshot_3_0/shot_4_4:W/oppshot_0_3/shot_0_3:W/oppshot_7_0/shot_9_8:W/oppshot_5_9/shot_2_2:W/oppshot_4_8/shot_9_0:W" +InformationStateTensor(0): binvec(2615, 0x2920020420009020084204010408080800a204200201020880202080101228000820080888020208104022800402080018820200808012204080220004884010082000a220080200a008802100a000122100402008208802800900022240020280040890010080202220110024020048801008408022402002020404a00200810202220100202002881008082002220080822010088040108080422010042010088a004008204022020802081008c000209000062008202400208c00800900012220080210004884020090040220200420400888800808800412008102100028820200a001022400802008404880010a000222104002008404900080810202202040200a0088210008408022100402800408c00200804802200804204001890040082000a200c00000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(2615, 0x19400406008010204060004120020808008204200a010200802024801010280009200808080202481040028004120800108202048080102040812200040840104820008220081200a000802104a0001021004120082008028049000202400212800400900102802020201100a4020008801018408002402012020400a00204810200220101202002081008482002020080922010008040148080402010052010080a004048204002020812081000c000249000042008212400200c00804900010220081210004084020490040020200520400808800848800402008112100020820204a001002400812008400880014a000202104012008400900084810200202041200a0008210048408002100412800400c002048048002008052040010900404820008200c01000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "State of player's ships:\n+----------+\n| * * * |\n| *** * |\n| ***A* |\n| **A* * |\n|*d ** b |\n|*de* b |\n| De* *c b |\n| de** c * |\n| E c * |\n|**e ***** |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| @ @@ |\n|#@ @ @@ @|\n| @ # @ |\n|@ @@ |\n| @ @ @@|\n| @@ @ @|\n| # @ @@ |\n|@ @ @ |\n|@ #@ @ |\n| @ |\n+----------+\n" +ObservationString(1) = "State of player's ships:\n+----------+\n|b * d** |\n|B* a*d** *|\n|b* aeD * |\n|* ed ** |\n| * e * **|\n| ** e * *|\n| Ce* ** |\n|* c* * |\n|* C* * |\n| * |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| @ @ @ |\n| @@@ @ |\n| @@@#@ |\n| @@#@ @ |\n|@ @@ |\n|@ @ |\n| # @ @ |\n| @@ @ |\n| # @ |\n|@@ @@@@@ |\n+----------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] +StringLegalActions() = ["Pl0: shoot at (0, 0)", "Pl0: shoot at (0, 1)", "Pl0: shoot at (0, 2)", "Pl0: shoot at (0, 3)", "Pl0: shoot at (0, 4)", "Pl0: shoot at (0, 5)", "Pl0: shoot at (0, 6)", "Pl0: shoot at (0, 7)", "Pl0: shoot at (0, 8)", "Pl0: shoot at (0, 9)", "Pl0: shoot at (1, 0)", "Pl0: shoot at (1, 1)", "Pl0: shoot at (1, 2)", "Pl0: shoot at (1, 3)", "Pl0: shoot at (1, 4)", "Pl0: shoot at (1, 5)", "Pl0: shoot at (1, 6)", "Pl0: shoot at (1, 7)", "Pl0: shoot at (1, 8)", "Pl0: shoot at (1, 9)", "Pl0: shoot at (2, 0)", "Pl0: shoot at (2, 1)", "Pl0: shoot at (2, 2)", "Pl0: shoot at (2, 3)", "Pl0: shoot at (2, 4)", "Pl0: shoot at (2, 5)", "Pl0: shoot at (2, 6)", "Pl0: shoot at (2, 7)", "Pl0: shoot at (2, 8)", "Pl0: shoot at (2, 9)", "Pl0: shoot at (3, 0)", "Pl0: shoot at (3, 1)", "Pl0: shoot at (3, 2)", "Pl0: shoot at (3, 3)", "Pl0: shoot at (3, 4)", "Pl0: shoot at (3, 5)", "Pl0: shoot at (3, 6)", "Pl0: shoot at (3, 7)", "Pl0: shoot at (3, 8)", "Pl0: shoot at (3, 9)", "Pl0: shoot at (4, 0)", "Pl0: shoot at (4, 1)", "Pl0: shoot at (4, 2)", "Pl0: shoot at (4, 3)", "Pl0: shoot at (4, 4)", "Pl0: shoot at (4, 5)", "Pl0: shoot at (4, 6)", "Pl0: shoot at (4, 7)", "Pl0: shoot at (4, 8)", "Pl0: shoot at (4, 9)", "Pl0: shoot at (5, 0)", "Pl0: shoot at (5, 1)", "Pl0: shoot at (5, 2)", "Pl0: shoot at (5, 3)", "Pl0: shoot at (5, 4)", "Pl0: shoot at (5, 5)", "Pl0: shoot at (5, 6)", "Pl0: shoot at (5, 7)", "Pl0: shoot at (5, 8)", "Pl0: shoot at (5, 9)", "Pl0: shoot at (6, 0)", "Pl0: shoot at (6, 1)", "Pl0: shoot at (6, 2)", "Pl0: shoot at (6, 3)", "Pl0: shoot at (6, 4)", "Pl0: shoot at (6, 5)", "Pl0: shoot at (6, 6)", "Pl0: shoot at (6, 7)", "Pl0: shoot at (6, 8)", "Pl0: shoot at (6, 9)", "Pl0: shoot at (7, 0)", "Pl0: shoot at (7, 1)", "Pl0: shoot at (7, 2)", "Pl0: shoot at (7, 3)", "Pl0: shoot at (7, 4)", "Pl0: shoot at (7, 5)", "Pl0: shoot at (7, 6)", "Pl0: shoot at (7, 7)", "Pl0: shoot at (7, 8)", "Pl0: shoot at (7, 9)", "Pl0: shoot at (8, 0)", "Pl0: shoot at (8, 1)", "Pl0: shoot at (8, 2)", "Pl0: shoot at (8, 3)", "Pl0: shoot at (8, 4)", "Pl0: shoot at (8, 5)", "Pl0: shoot at (8, 6)", "Pl0: shoot at (8, 7)", "Pl0: shoot at (8, 8)", "Pl0: shoot at (8, 9)", "Pl0: shoot at (9, 0)", "Pl0: shoot at (9, 1)", "Pl0: shoot at (9, 2)", "Pl0: shoot at (9, 3)", "Pl0: shoot at (9, 4)", "Pl0: shoot at (9, 5)", "Pl0: shoot at (9, 6)", "Pl0: shoot at (9, 7)", "Pl0: shoot at (9, 8)", "Pl0: shoot at (9, 9)"] + +# Apply action "Pl0: shoot at (6, 5)" +action: 65 + +# State 101 +# Player 0's board: +# +----------+ +# | * * * | +# | *** * | +# | ***A* | +# | **A* * | +# |*d ** b | +# |*de* b | +# | De* *c b | +# | de** c * | +# | E c * | +# |**e ***** | +# +----------+ +# +# Player 1's board: +# +----------+ +# |b * d** | +# |B* a*d** *| +# |b* aeD * | +# |* ed ** | +# | * e * **| +# | ** e * *| +# | Ce* ** | +# |* c* * | +# |* C* * | +# | * | +# +----------+ +# +# Full history: /0:v_2_4/1:v_1_3/0:v_4_8/1:v_0_0/0:v_6_6/1:v_6_3/0:v_4_1/1:v_0_5/0:v_5_2/1:v_2_4/0:6_8/1:6_1/0:7_4/1:9_7/0:9_7/1:1_7/0:8_6/1:9_7/0:5_1/1:1_4/0:4_9/1:5_3/0:6_7/1:6_3/0:2_7/1:4_4/0:4_8/1:3_3/0:8_0/1:9_4/0:0_7/1:4_4/0:8_4/1:9_1/0:1_6/1:2_5/0:0_3/1:2_4/0:8_6/1:8_2/0:1_0/1:3_4/0:3_0/1:2_1/0:6_3/1:1_3/0:5_2/1:3_2/0:6_8/1:6_5/0:4_6/1:9_7/0:2_1/1:8_8/0:6_5/1:8_8/0:7_6/1:1_2/0:4_1/1:7_3/0:4_1/1:0_7/0:1_9/1:9_5/0:1_4/1:0_1/0:1_7/1:3_3/0:3_7/1:4_3/0:1_1/1:7_8/0:5_6/1:3_5/0:2_5/1:9_6/0:3_8/1:5_3/0:0_3/1:2_3/0:8_3/1:3_8/0:0_6/1:4_0/0:8_3/1:2_5/0:5_2/1:7_4/0:8_0/1:5_0/0:3_0/1:4_4/0:0_3/1:0_3/0:7_0/1:9_8/0:5_9/1:2_2/0:4_8/1:9_0/0:6_5 +IsTerminal() = False +History() = [224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82, 10, 34, 30, 21, 63, 13, 52, 32, 68, 65, 46, 97, 21, 88, 65, 88, 76, 12, 41, 73, 41, 7, 19, 95, 14, 1, 17, 33, 37, 43, 11, 78, 56, 35, 25, 96, 38, 53, 3, 23, 83, 38, 6, 40, 83, 25, 52, 74, 80, 50, 30, 44, 3, 3, 70, 98, 59, 22, 48, 90, 65] +HistoryString() = "224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82, 10, 34, 30, 21, 63, 13, 52, 32, 68, 65, 46, 97, 21, 88, 65, 88, 76, 12, 41, 73, 41, 7, 19, 95, 14, 1, 17, 33, 37, 43, 11, 78, 56, 35, 25, 96, 38, 53, 3, 23, 83, 38, 6, 40, 83, 25, 52, 74, 80, 50, 30, 44, 3, 3, 70, 98, 59, 22, 48, 90, 65" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "T=101 /v_2_4/v_4_8/v_6_6/v_4_1/v_5_2/shot_6_8:W/oppshot_6_1/shot_7_4:W/oppshot_9_7/shot_9_7:W/oppshot_1_7/shot_8_6:W/oppshot_9_7/shot_5_1:W/oppshot_1_4/shot_4_9:W/oppshot_5_3/shot_6_7:W/oppshot_6_3/shot_2_7:W/oppshot_4_4/shot_4_8:W/oppshot_3_3/shot_8_0:W/oppshot_9_4/shot_0_7:W/oppshot_4_4/shot_8_4:W/oppshot_9_1/shot_1_6:W/oppshot_2_5/shot_0_3:W/oppshot_2_4/shot_8_6:W/oppshot_8_2/shot_1_0:H/oppshot_3_4/shot_3_0:W/oppshot_2_1/shot_6_3:H/oppshot_1_3/shot_5_2:W/oppshot_3_2/shot_6_8:W/oppshot_6_5/shot_4_6:W/oppshot_9_7/shot_2_1:W/oppshot_8_8/shot_6_5:W/oppshot_8_8/shot_7_6:W/oppshot_1_2/shot_4_1:W/oppshot_7_3/shot_4_1:W/oppshot_0_7/shot_1_9:W/oppshot_9_5/shot_1_4:W/oppshot_0_1/shot_1_7:W/oppshot_3_3/shot_3_7:W/oppshot_4_3/shot_1_1:W/oppshot_7_8/shot_5_6:W/oppshot_3_5/shot_2_5:H/oppshot_9_6/shot_3_8:W/oppshot_5_3/shot_0_3:W/oppshot_2_3/shot_8_3:H/oppshot_3_8/shot_0_6:W/oppshot_4_0/shot_8_3:H/oppshot_2_5/shot_5_2:W/oppshot_7_4/shot_8_0:W/oppshot_5_0/shot_3_0:W/oppshot_4_4/shot_0_3:W/oppshot_0_3/shot_7_0:W/oppshot_9_8/shot_5_9:W/oppshot_2_2/shot_4_8:W/oppshot_9_0/shot_6_5:W" +InformationStateString(1) = "T=101 /v_1_3/v_0_0/v_6_3/v_0_5/v_2_4/oppshot_6_8/shot_6_1:H/oppshot_7_4/shot_9_7:W/oppshot_9_7/shot_1_7:W/oppshot_8_6/shot_9_7:W/oppshot_5_1/shot_1_4:W/oppshot_4_9/shot_5_3:W/oppshot_6_7/shot_6_3:W/oppshot_2_7/shot_4_4:W/oppshot_4_8/shot_3_3:W/oppshot_8_0/shot_9_4:W/oppshot_0_7/shot_4_4:W/oppshot_8_4/shot_9_1:W/oppshot_1_6/shot_2_5:W/oppshot_0_3/shot_2_4:H/oppshot_8_6/shot_8_2:H/oppshot_1_0/shot_3_4:S/oppshot_3_0/shot_2_1:W/oppshot_6_3/shot_1_3:W/oppshot_5_2/shot_3_2:W/oppshot_6_8/shot_6_5:W/oppshot_4_6/shot_9_7:W/oppshot_2_1/shot_8_8:W/oppshot_6_5/shot_8_8:W/oppshot_7_6/shot_1_2:W/oppshot_4_1/shot_7_3:W/oppshot_4_1/shot_0_7:W/oppshot_1_9/shot_9_5:W/oppshot_1_4/shot_0_1:W/oppshot_1_7/shot_3_3:W/oppshot_3_7/shot_4_3:W/oppshot_1_1/shot_7_8:W/oppshot_5_6/shot_3_5:W/oppshot_2_5/shot_9_6:W/oppshot_3_8/shot_5_3:W/oppshot_0_3/shot_2_3:W/oppshot_8_3/shot_3_8:W/oppshot_0_6/shot_4_0:W/oppshot_8_3/shot_2_5:W/oppshot_5_2/shot_7_4:W/oppshot_8_0/shot_5_0:W/oppshot_3_0/shot_4_4:W/oppshot_0_3/shot_0_3:W/oppshot_7_0/shot_9_8:W/oppshot_5_9/shot_2_2:W/oppshot_4_8/shot_9_0:W/oppshot_6_5" +InformationStateTensor(0): binvec(2615, 0x2520020420009020084204010408080800a204200201020880202080101228000820080888020208104022800402080018820200808012204080220004884010082000a220080200a008802100a000122100402008208802800900022240020280040890010080202220110024020048801008408022402002020404a00200810202220100202002881008082002220080822010088040108080422010042010088a004008204022020802081008c000209000062008202400208c00800900012220080210004884020090040220200420400888800808800412008102100028820200a001022400802008404880010a000222104002008404900080810202202040200a0088210008408022100402800408c00200804802200804204001890040082000a200c00202010800000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(2615, 0x15400406008010204060004120020808008204200a010200802024801010280009200808080202481040028004120800108202048080102040812200040840104820008220081200a000802104a0001021004120082008028049000202400212800400900102802020201100a4020008801018408002402012020400a00204810200220101202002081008482002020080922010008040148080402010052010080a004048204002020812081000c000249000042008212400200c00804900010220081210004084020490040020200520400808800848800402008112100020820204a001002400812008400880014a000202104012008400900084810200202041200a0008210048408002100412800400c002048048002008052040010900404820008200c01202010000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "State of player's ships:\n+----------+\n| * * * |\n| *** * |\n| ***A* |\n| **A* * |\n|*d ** b |\n|*de* b |\n| De* *c b |\n| de** c * |\n| E c * |\n|**e ***** |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| @ @@ |\n|#@ @ @@ @|\n| @ # @ |\n|@ @@ |\n| @ @ @@|\n| @@ @ @|\n| # @ @@ |\n|@ @ @ |\n|@ #@ @ |\n| @ |\n+----------+\n" +ObservationString(1) = "State of player's ships:\n+----------+\n|b * d** |\n|B* a*d** *|\n|b* aeD * |\n|* ed ** |\n| * e * **|\n| ** e * *|\n| Ce* ** |\n|* c* * |\n|* C* * |\n| * |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| @ @ @ |\n| @@@ @ |\n| @@@#@ |\n| @@#@ @ |\n|@ @@ |\n|@ @ |\n| # @ @ |\n| @@ @ |\n| # @ |\n|@@ @@@@@ |\n+----------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] +StringLegalActions() = ["Pl1: shoot at (0, 0)", "Pl1: shoot at (0, 1)", "Pl1: shoot at (0, 2)", "Pl1: shoot at (0, 3)", "Pl1: shoot at (0, 4)", "Pl1: shoot at (0, 5)", "Pl1: shoot at (0, 6)", "Pl1: shoot at (0, 7)", "Pl1: shoot at (0, 8)", "Pl1: shoot at (0, 9)", "Pl1: shoot at (1, 0)", "Pl1: shoot at (1, 1)", "Pl1: shoot at (1, 2)", "Pl1: shoot at (1, 3)", "Pl1: shoot at (1, 4)", "Pl1: shoot at (1, 5)", "Pl1: shoot at (1, 6)", "Pl1: shoot at (1, 7)", "Pl1: shoot at (1, 8)", "Pl1: shoot at (1, 9)", "Pl1: shoot at (2, 0)", "Pl1: shoot at (2, 1)", "Pl1: shoot at (2, 2)", "Pl1: shoot at (2, 3)", "Pl1: shoot at (2, 4)", "Pl1: shoot at (2, 5)", "Pl1: shoot at (2, 6)", "Pl1: shoot at (2, 7)", "Pl1: shoot at (2, 8)", "Pl1: shoot at (2, 9)", "Pl1: shoot at (3, 0)", "Pl1: shoot at (3, 1)", "Pl1: shoot at (3, 2)", "Pl1: shoot at (3, 3)", "Pl1: shoot at (3, 4)", "Pl1: shoot at (3, 5)", "Pl1: shoot at (3, 6)", "Pl1: shoot at (3, 7)", "Pl1: shoot at (3, 8)", "Pl1: shoot at (3, 9)", "Pl1: shoot at (4, 0)", "Pl1: shoot at (4, 1)", "Pl1: shoot at (4, 2)", "Pl1: shoot at (4, 3)", "Pl1: shoot at (4, 4)", "Pl1: shoot at (4, 5)", "Pl1: shoot at (4, 6)", "Pl1: shoot at (4, 7)", "Pl1: shoot at (4, 8)", "Pl1: shoot at (4, 9)", "Pl1: shoot at (5, 0)", "Pl1: shoot at (5, 1)", "Pl1: shoot at (5, 2)", "Pl1: shoot at (5, 3)", "Pl1: shoot at (5, 4)", "Pl1: shoot at (5, 5)", "Pl1: shoot at (5, 6)", "Pl1: shoot at (5, 7)", "Pl1: shoot at (5, 8)", "Pl1: shoot at (5, 9)", "Pl1: shoot at (6, 0)", "Pl1: shoot at (6, 1)", "Pl1: shoot at (6, 2)", "Pl1: shoot at (6, 3)", "Pl1: shoot at (6, 4)", "Pl1: shoot at (6, 5)", "Pl1: shoot at (6, 6)", "Pl1: shoot at (6, 7)", "Pl1: shoot at (6, 8)", "Pl1: shoot at (6, 9)", "Pl1: shoot at (7, 0)", "Pl1: shoot at (7, 1)", "Pl1: shoot at (7, 2)", "Pl1: shoot at (7, 3)", "Pl1: shoot at (7, 4)", "Pl1: shoot at (7, 5)", "Pl1: shoot at (7, 6)", "Pl1: shoot at (7, 7)", "Pl1: shoot at (7, 8)", "Pl1: shoot at (7, 9)", "Pl1: shoot at (8, 0)", "Pl1: shoot at (8, 1)", "Pl1: shoot at (8, 2)", "Pl1: shoot at (8, 3)", "Pl1: shoot at (8, 4)", "Pl1: shoot at (8, 5)", "Pl1: shoot at (8, 6)", "Pl1: shoot at (8, 7)", "Pl1: shoot at (8, 8)", "Pl1: shoot at (8, 9)", "Pl1: shoot at (9, 0)", "Pl1: shoot at (9, 1)", "Pl1: shoot at (9, 2)", "Pl1: shoot at (9, 3)", "Pl1: shoot at (9, 4)", "Pl1: shoot at (9, 5)", "Pl1: shoot at (9, 6)", "Pl1: shoot at (9, 7)", "Pl1: shoot at (9, 8)", "Pl1: shoot at (9, 9)"] + +# Apply action "Pl1: shoot at (8, 2)" +action: 82 + +# State 102 +# Apply action "Pl0: shoot at (5, 4)" +action: 54 + +# State 103 +# Apply action "Pl1: shoot at (3, 2)" +action: 32 + +# State 104 +# Apply action "Pl0: shoot at (0, 1)" +action: 1 + +# State 105 +# Apply action "Pl1: shoot at (5, 9)" +action: 59 + +# State 106 +# Apply action "Pl0: shoot at (9, 6)" +action: 96 + +# State 107 +# Apply action "Pl1: shoot at (9, 8)" +action: 98 + +# State 108 +# Apply action "Pl0: shoot at (5, 7)" +action: 57 + +# State 109 +# Apply action "Pl1: shoot at (6, 7)" +action: 67 + +# State 110 +# Player 0's board: +# +----------+ +# | * * * | +# | *** * | +# | ***A* | +# | **A* * | +# |*d ** b | +# |*de* b*| +# | De* *c*b | +# | de** c * | +# | E c * | +# |**e ***** | +# +----------+ +# +# Player 1's board: +# +----------+ +# |b* * d** | +# |B* a*d** *| +# |b* aeD * | +# |* ed ** | +# | * e * **| +# | ** E ** *| +# | Ce* ** | +# |* c* * | +# |* C* * | +# | ** | +# +----------+ +# +# Full history: /0:v_2_4/1:v_1_3/0:v_4_8/1:v_0_0/0:v_6_6/1:v_6_3/0:v_4_1/1:v_0_5/0:v_5_2/1:v_2_4/0:6_8/1:6_1/0:7_4/1:9_7/0:9_7/1:1_7/0:8_6/1:9_7/0:5_1/1:1_4/0:4_9/1:5_3/0:6_7/1:6_3/0:2_7/1:4_4/0:4_8/1:3_3/0:8_0/1:9_4/0:0_7/1:4_4/0:8_4/1:9_1/0:1_6/1:2_5/0:0_3/1:2_4/0:8_6/1:8_2/0:1_0/1:3_4/0:3_0/1:2_1/0:6_3/1:1_3/0:5_2/1:3_2/0:6_8/1:6_5/0:4_6/1:9_7/0:2_1/1:8_8/0:6_5/1:8_8/0:7_6/1:1_2/0:4_1/1:7_3/0:4_1/1:0_7/0:1_9/1:9_5/0:1_4/1:0_1/0:1_7/1:3_3/0:3_7/1:4_3/0:1_1/1:7_8/0:5_6/1:3_5/0:2_5/1:9_6/0:3_8/1:5_3/0:0_3/1:2_3/0:8_3/1:3_8/0:0_6/1:4_0/0:8_3/1:2_5/0:5_2/1:7_4/0:8_0/1:5_0/0:3_0/1:4_4/0:0_3/1:0_3/0:7_0/1:9_8/0:5_9/1:2_2/0:4_8/1:9_0/0:6_5/1:8_2/0:5_4/1:3_2/0:0_1/1:5_9/0:9_6/1:9_8/0:5_7/1:6_7 +IsTerminal() = True +History() = [224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82, 10, 34, 30, 21, 63, 13, 52, 32, 68, 65, 46, 97, 21, 88, 65, 88, 76, 12, 41, 73, 41, 7, 19, 95, 14, 1, 17, 33, 37, 43, 11, 78, 56, 35, 25, 96, 38, 53, 3, 23, 83, 38, 6, 40, 83, 25, 52, 74, 80, 50, 30, 44, 3, 3, 70, 98, 59, 22, 48, 90, 65, 82, 54, 32, 1, 59, 96, 98, 57, 67] +HistoryString() = "224, 213, 248, 200, 266, 263, 241, 205, 252, 224, 68, 61, 74, 97, 97, 17, 86, 97, 51, 14, 49, 53, 67, 63, 27, 44, 48, 33, 80, 94, 7, 44, 84, 91, 16, 25, 3, 24, 86, 82, 10, 34, 30, 21, 63, 13, 52, 32, 68, 65, 46, 97, 21, 88, 65, 88, 76, 12, 41, 73, 41, 7, 19, 95, 14, 1, 17, 33, 37, 43, 11, 78, 56, 35, 25, 96, 38, 53, 3, 23, 83, 38, 6, 40, 83, 25, 52, 74, 80, 50, 30, 44, 3, 3, 70, 98, 59, 22, 48, 90, 65, 82, 54, 32, 1, 59, 96, 98, 57, 67" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "T=110 /v_2_4/v_4_8/v_6_6/v_4_1/v_5_2/shot_6_8:W/oppshot_6_1/shot_7_4:W/oppshot_9_7/shot_9_7:W/oppshot_1_7/shot_8_6:W/oppshot_9_7/shot_5_1:W/oppshot_1_4/shot_4_9:W/oppshot_5_3/shot_6_7:W/oppshot_6_3/shot_2_7:W/oppshot_4_4/shot_4_8:W/oppshot_3_3/shot_8_0:W/oppshot_9_4/shot_0_7:W/oppshot_4_4/shot_8_4:W/oppshot_9_1/shot_1_6:W/oppshot_2_5/shot_0_3:W/oppshot_2_4/shot_8_6:W/oppshot_8_2/shot_1_0:H/oppshot_3_4/shot_3_0:W/oppshot_2_1/shot_6_3:H/oppshot_1_3/shot_5_2:W/oppshot_3_2/shot_6_8:W/oppshot_6_5/shot_4_6:W/oppshot_9_7/shot_2_1:W/oppshot_8_8/shot_6_5:W/oppshot_8_8/shot_7_6:W/oppshot_1_2/shot_4_1:W/oppshot_7_3/shot_4_1:W/oppshot_0_7/shot_1_9:W/oppshot_9_5/shot_1_4:W/oppshot_0_1/shot_1_7:W/oppshot_3_3/shot_3_7:W/oppshot_4_3/shot_1_1:W/oppshot_7_8/shot_5_6:W/oppshot_3_5/shot_2_5:H/oppshot_9_6/shot_3_8:W/oppshot_5_3/shot_0_3:W/oppshot_2_3/shot_8_3:H/oppshot_3_8/shot_0_6:W/oppshot_4_0/shot_8_3:H/oppshot_2_5/shot_5_2:W/oppshot_7_4/shot_8_0:W/oppshot_5_0/shot_3_0:W/oppshot_4_4/shot_0_3:W/oppshot_0_3/shot_7_0:W/oppshot_9_8/shot_5_9:W/oppshot_2_2/shot_4_8:W/oppshot_9_0/shot_6_5:W/oppshot_8_2/shot_5_4:H/oppshot_3_2/shot_0_1:W/oppshot_5_9/shot_9_6:W/oppshot_9_8/shot_5_7:W/oppshot_6_7" +InformationStateString(1) = "T=110 /v_1_3/v_0_0/v_6_3/v_0_5/v_2_4/oppshot_6_8/shot_6_1:H/oppshot_7_4/shot_9_7:W/oppshot_9_7/shot_1_7:W/oppshot_8_6/shot_9_7:W/oppshot_5_1/shot_1_4:W/oppshot_4_9/shot_5_3:W/oppshot_6_7/shot_6_3:W/oppshot_2_7/shot_4_4:W/oppshot_4_8/shot_3_3:W/oppshot_8_0/shot_9_4:W/oppshot_0_7/shot_4_4:W/oppshot_8_4/shot_9_1:W/oppshot_1_6/shot_2_5:W/oppshot_0_3/shot_2_4:H/oppshot_8_6/shot_8_2:H/oppshot_1_0/shot_3_4:S/oppshot_3_0/shot_2_1:W/oppshot_6_3/shot_1_3:W/oppshot_5_2/shot_3_2:W/oppshot_6_8/shot_6_5:W/oppshot_4_6/shot_9_7:W/oppshot_2_1/shot_8_8:W/oppshot_6_5/shot_8_8:W/oppshot_7_6/shot_1_2:W/oppshot_4_1/shot_7_3:W/oppshot_4_1/shot_0_7:W/oppshot_1_9/shot_9_5:W/oppshot_1_4/shot_0_1:W/oppshot_1_7/shot_3_3:W/oppshot_3_7/shot_4_3:W/oppshot_1_1/shot_7_8:W/oppshot_5_6/shot_3_5:W/oppshot_2_5/shot_9_6:W/oppshot_3_8/shot_5_3:W/oppshot_0_3/shot_2_3:W/oppshot_8_3/shot_3_8:W/oppshot_0_6/shot_4_0:W/oppshot_8_3/shot_2_5:W/oppshot_5_2/shot_7_4:W/oppshot_8_0/shot_5_0:W/oppshot_3_0/shot_4_4:W/oppshot_0_3/shot_0_3:W/oppshot_7_0/shot_9_8:W/oppshot_5_9/shot_2_2:W/oppshot_4_8/shot_9_0:W/oppshot_6_5/shot_8_2:H/oppshot_5_4/shot_3_2:W/oppshot_0_1/shot_5_9:W/oppshot_9_6/shot_9_8:W/oppshot_5_7/shot_6_7:W" +InformationStateTensor(0): binvec(2615, 0x6120020420009020084204010408080800a204200201020880202080101228000820080888020208104022800402080018820200808012204080220004884010082000a220080200a008802100a000122100402008208802800900022240020280040890010080202220110024020048801008408022402002020404a00200810202220100202002881008082002220080822010088040108080422010042010088a004008204022020802081008c000209000062008202400208c00800900012220080210004884020090040220200420400888800808800412008102100028820200a001022400802008404880010a000222104002008404900080810202202040200a0088210008408022100402800408c00200804802200804204001890040082000a200c00202010880440081008122010028010088200088010222008042040048810020) +InformationStateTensor(1): binvec(2615, 0x51400406008010204060004120020808008204200a010200802024801010280009200808080202481040028004120800108202048080102040812200040840104820008220081200a000802104a0001021004120082008028049000202400212800400900102802020201100a4020008801018408002402012020400a00204810200220101202002081008482002020080922010008040148080402010052010080a004048204002020812081000c000249000042008212400200c00804900010220081210004084020490040020200520400808800848800402008112100020820204a001002400812008400880014a000202104012008400900084810200202041200a0008210048408002100412800400c002048048002008052040010900404820008200c012020100804402810080220101280100082000c8010202008052040040810024) +ObservationString(0) = "State of player's ships:\n+----------+\n| * * * |\n| *** * |\n| ***A* |\n| **A* * |\n|*d ** b |\n|*de* b*|\n| De* *c*b |\n| de** c * |\n| E c * |\n|**e ***** |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| @ @ @@ |\n|#@ @ @@ @|\n| @ # @ |\n|@ @@ |\n| @ @ @@|\n| @@ # @@ @|\n| # @ @@ |\n|@ @ @ |\n|@ #@ @ |\n| @@ |\n+----------+\n" +ObservationString(1) = "State of player's ships:\n+----------+\n|b* * d** |\n|B* a*d** *|\n|b* aeD * |\n|* ed ** |\n| * e * **|\n| ** E ** *|\n| Ce* ** |\n|* c* * |\n|* C* * |\n| ** |\n+----------+\n\nPlayer's shot outcomes:\n+----------+\n| @ @ @ |\n| @@@ @ |\n| @@@#@ |\n| @@#@ @ |\n|@ @@ |\n|@ @ @|\n| # @ @ @ |\n| @@ @ |\n| # @ |\n|@@ @@@@@ |\n+----------+\n" +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/blackjack.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/blackjack.txt new file mode 100644 index 0000000..dca9b0c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/blackjack.txt @@ -0,0 +1,150 @@ +game: blackjack + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Blackjack" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "blackjack" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 52 +GetParameters() = {} +NumPlayers() = 1 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = None +ObservationTensorShape() = [189] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 189 +MaxGameLength() = 12 +ToString() = "blackjack()" + +# State 0 +# Current Phase: Initial Deal +# Current Player: -1 +# Player 0: Cards: +# Dealer: Cards: +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "" +ObservationString(0) = "Current Phase: Initial Deal\nCurrent Player: -1\nPlayer 0: Cards: \nDealer: Cards: \n" +ObservationTensor(0): ◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.0192308), (1,0.0192308), (2,0.0192308), (3,0.0192308), (4,0.0192308), (5,0.0192308), (6,0.0192308), (7,0.0192308), (8,0.0192308), (9,0.0192308), (10,0.0192308), (11,0.0192308), (12,0.0192308), (13,0.0192308), (14,0.0192308), (15,0.0192308), (16,0.0192308), (17,0.0192308), (18,0.0192308), (19,0.0192308), (20,0.0192308), (21,0.0192308), (22,0.0192308), (23,0.0192308), (24,0.0192308), (25,0.0192308), (26,0.0192308), (27,0.0192308), (28,0.0192308), (29,0.0192308), (30,0.0192308), (31,0.0192308), (32,0.0192308), (33,0.0192308), (34,0.0192308), (35,0.0192308), (36,0.0192308), (37,0.0192308), (38,0.0192308), (39,0.0192308), (40,0.0192308), (41,0.0192308), (42,0.0192308), (43,0.0192308), (44,0.0192308), (45,0.0192308), (46,0.0192308), (47,0.0192308), (48,0.0192308), (49,0.0192308), (50,0.0192308), (51,0.0192308)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51] +StringLegalActions() = ["CA", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9", "CT", "CJ", "CQ", "CK", "DA", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "DT", "DJ", "DQ", "DK", "HA", "H2", "H3", "H4", "H5", "H6", "H7", "H8", "H9", "HT", "HJ", "HQ", "HK", "SA", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "ST", "SJ", "SQ", "SK"] + +# Apply action "C2" +action: 1 + +# State 1 +# Current Phase: Initial Deal +# Current Player: -1 +# Player 0: Cards: C2 +# Dealer: Cards: +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "1" +ObservationString(0) = "Current Phase: Initial Deal\nCurrent Player: -1\nPlayer 0: Cards: C2\nDealer: Cards: \n" +ObservationTensor(0): ◉◯◯◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.0196078), (2,0.0196078), (3,0.0196078), (4,0.0196078), (5,0.0196078), (6,0.0196078), (7,0.0196078), (8,0.0196078), (9,0.0196078), (10,0.0196078), (11,0.0196078), (12,0.0196078), (13,0.0196078), (14,0.0196078), (15,0.0196078), (16,0.0196078), (17,0.0196078), (18,0.0196078), (19,0.0196078), (20,0.0196078), (21,0.0196078), (22,0.0196078), (23,0.0196078), (24,0.0196078), (25,0.0196078), (26,0.0196078), (27,0.0196078), (28,0.0196078), (29,0.0196078), (30,0.0196078), (31,0.0196078), (32,0.0196078), (33,0.0196078), (34,0.0196078), (35,0.0196078), (36,0.0196078), (37,0.0196078), (38,0.0196078), (39,0.0196078), (40,0.0196078), (41,0.0196078), (42,0.0196078), (43,0.0196078), (44,0.0196078), (45,0.0196078), (46,0.0196078), (47,0.0196078), (48,0.0196078), (49,0.0196078), (50,0.0196078), (51,0.0196078)] +LegalActions() = [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51] +StringLegalActions() = ["CA", "C3", "C4", "C5", "C6", "C7", "C8", "C9", "CT", "CJ", "CQ", "CK", "DA", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "DT", "DJ", "DQ", "DK", "HA", "H2", "H3", "H4", "H5", "H6", "H7", "H8", "H9", "HT", "HJ", "HQ", "HK", "SA", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "ST", "SJ", "SQ", "SK"] + +# Apply action "S7" +action: 45 + +# State 2 +# Apply action "D3" +action: 15 + +# State 3 +# Apply action "HK" +action: 38 + +# State 4 +# Current Phase: Player Turn +# Current Player: 0 +# Player 0: Cards: C2 S7 +# Dealer: Cards: D3 HK +IsTerminal() = False +History() = [1, 45, 15, 38] +HistoryString() = "1, 45, 15, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1 45 38" +ObservationString(0) = "Current Phase: Player Turn\nCurrent Player: 0\nPlayer 0: Cards: C2 S7\nDealer: Cards: ?? HK\n" +ObservationTensor(0): ◯◉◯◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1] +StringLegalActions() = ["Hit", "Stand"] + +# Apply action "Hit" +action: 0 + +# State 5 +# Apply action "SJ" +action: 49 + +# State 6 +# Current Phase: Player Turn +# Current Player: 0 +# Player 0: Cards: C2 S7 SJ +# Dealer: Cards: D3 HK +IsTerminal() = False +History() = [1, 45, 15, 38, 0, 49] +HistoryString() = "1, 45, 15, 38, 0, 49" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1 45 38 0 49" +ObservationString(0) = "Current Phase: Player Turn\nCurrent Player: 0\nPlayer 0: Cards: C2 S7 SJ\nDealer: Cards: ?? HK\n" +ObservationTensor(0): ◯◉◯◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1] +StringLegalActions() = ["Hit", "Stand"] + +# Apply action "Stand" +action: 1 + +# State 7 +# Apply action "D5" +action: 17 + +# State 8 +# Current Phase: Dealer Turn +# Current Player: -4 +# Player 0: Cards: C2 S7 SJ +# Dealer: Cards: D3 HK D5 +IsTerminal() = True +History() = [1, 45, 15, 38, 0, 49, 1, 17] +HistoryString() = "1, 45, 15, 38, 0, 49, 1, 17" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "1 45 38 0 49 1 17" +ObservationString(0) = "Current Phase: Dealer Turn\nCurrent Player: -4\nPlayer 0: Cards: C2 S7 SJ\nDealer: Cards: D3 HK D5\n" +ObservationTensor(0): ◯◯◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [1] +Returns() = [1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/blotto.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/blotto.txt new file mode 100644 index 0000000..07ec24b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/blotto.txt @@ -0,0 +1,81 @@ +game: blotto + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.ONE_SHOT +GameType.long_name = "Blotto" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["coins", "fields", "players"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "blotto" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 66 +PolicyTensorShape() = [66] +MaxChanceOutcomes() = 0 +GetParameters() = {coins=10,fields=3,players=2} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [1] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 1 +ObservationTensorShape() = [1] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 1 +MaxGameLength() = 1 +ToString() = "blotto()" + +# State 0 +# Terminal? 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "Observing player: 0. Non-terminal" +InformationStateString(1) = "Observing player: 1. Non-terminal" +InformationStateTensor(0): ◯ +InformationStateTensor(1): ◯ +ObservationString(0) = "Non-terminal" +ObservationString(1) = "Non-terminal" +ObservationTensor(0): ◯ +ObservationTensor(1): ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65] +LegalActions(1) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65] +StringLegalActions(0) = ["[0,0,10]", "[0,1,9]", "[0,2,8]", "[0,3,7]", "[0,4,6]", "[0,5,5]", "[0,6,4]", "[0,7,3]", "[0,8,2]", "[0,9,1]", "[0,10,0]", "[1,0,9]", "[1,1,8]", "[1,2,7]", "[1,3,6]", "[1,4,5]", "[1,5,4]", "[1,6,3]", "[1,7,2]", "[1,8,1]", "[1,9,0]", "[2,0,8]", "[2,1,7]", "[2,2,6]", "[2,3,5]", "[2,4,4]", "[2,5,3]", "[2,6,2]", "[2,7,1]", "[2,8,0]", "[3,0,7]", "[3,1,6]", "[3,2,5]", "[3,3,4]", "[3,4,3]", "[3,5,2]", "[3,6,1]", "[3,7,0]", "[4,0,6]", "[4,1,5]", "[4,2,4]", "[4,3,3]", "[4,4,2]", "[4,5,1]", "[4,6,0]", "[5,0,5]", "[5,1,4]", "[5,2,3]", "[5,3,2]", "[5,4,1]", "[5,5,0]", "[6,0,4]", "[6,1,3]", "[6,2,2]", "[6,3,1]", "[6,4,0]", "[7,0,3]", "[7,1,2]", "[7,2,1]", "[7,3,0]", "[8,0,2]", "[8,1,1]", "[8,2,0]", "[9,0,1]", "[9,1,0]", "[10,0,0]"] +StringLegalActions(1) = ["[0,0,10]", "[0,1,9]", "[0,2,8]", "[0,3,7]", "[0,4,6]", "[0,5,5]", "[0,6,4]", "[0,7,3]", "[0,8,2]", "[0,9,1]", "[0,10,0]", "[1,0,9]", "[1,1,8]", "[1,2,7]", "[1,3,6]", "[1,4,5]", "[1,5,4]", "[1,6,3]", "[1,7,2]", "[1,8,1]", "[1,9,0]", "[2,0,8]", "[2,1,7]", "[2,2,6]", "[2,3,5]", "[2,4,4]", "[2,5,3]", "[2,6,2]", "[2,7,1]", "[2,8,0]", "[3,0,7]", "[3,1,6]", "[3,2,5]", "[3,3,4]", "[3,4,3]", "[3,5,2]", "[3,6,1]", "[3,7,0]", "[4,0,6]", "[4,1,5]", "[4,2,4]", "[4,3,3]", "[4,4,2]", "[4,5,1]", "[4,6,0]", "[5,0,5]", "[5,1,4]", "[5,2,3]", "[5,3,2]", "[5,4,1]", "[5,5,0]", "[6,0,4]", "[6,1,3]", "[6,2,2]", "[6,3,1]", "[6,4,0]", "[7,0,3]", "[7,1,2]", "[7,2,1]", "[7,3,0]", "[8,0,2]", "[8,1,1]", "[8,2,0]", "[9,0,1]", "[9,1,0]", "[10,0,0]"] + +# Apply joint action ["[6,3,1]", "[7,2,1]"] +actions: [54, 58] + +# State 1 +# Terminal? 1 +# P0 action: [6,3,1] +# P1 action: [7,2,1] +IsTerminal() = True +History() = [54, 58] +HistoryString() = "54, 58" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Observing player: 0. Terminal. History string: 54, 58" +InformationStateString(1) = "Observing player: 1. Terminal. History string: 54, 58" +InformationStateTensor(0): ◉ +InformationStateTensor(1): ◉ +ObservationString(0) = "Terminal. History string: 54, 58" +ObservationString(1) = "Terminal. History string: 54, 58" +ObservationTensor(0): ◉ +ObservationTensor(1): ◉ +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/breakthrough.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/breakthrough.txt new file mode 100644 index 0000000..2a51fa9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/breakthrough.txt @@ -0,0 +1,542 @@ +game: breakthrough + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Breakthrough" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["columns", "rows"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "breakthrough" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 768 +PolicyTensorShape() = [768] +MaxChanceOutcomes() = 0 +GetParameters() = {columns=8,rows=8} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [3, 8, 8] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 192 +MaxGameLength() = 209 +ToString() = "breakthrough()" + +# State 0 +# 8bbbbbbbb +# 7bbbbbbbb +# 6........ +# 5........ +# 4........ +# 3........ +# 2wwwwwwww +# 1wwwwwwww +# abcdefgh +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "8bbbbbbbb\n7bbbbbbbb\n6........\n5........\n4........\n3........\n2wwwwwwww\n1wwwwwwww\n abcdefgh\n" +ObservationString(1) = "8bbbbbbbb\n7bbbbbbbb\n6........\n5........\n4........\n3........\n2wwwwwwww\n1wwwwwwww\n abcdefgh\n" +ObservationTensor(0): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [98, 100, 108, 110, 112, 120, 122, 124, 132, 134, 136, 144, 146, 148, 156, 158, 160, 168, 170, 172, 180, 182] +StringLegalActions() = ["a7a6", "a7b6", "b7a6", "b7b6", "b7c6", "c7b6", "c7c6", "c7d6", "d7c6", "d7d6", "d7e6", "e7d6", "e7e6", "e7f6", "f7e6", "f7f6", "f7g6", "g7f6", "g7g6", "g7h6", "h7g6", "h7h6"] + +# Apply action "c7d6" +action: 124 + +# State 1 +# 8bbbbbbbb +# 7bb.bbbbb +# 6...b.... +# 5........ +# 4........ +# 3........ +# 2wwwwwwww +# 1wwwwwwww +# abcdefgh +IsTerminal() = False +History() = [124] +HistoryString() = "124" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "8bbbbbbbb\n7bb.bbbbb\n6...b....\n5........\n4........\n3........\n2wwwwwwww\n1wwwwwwww\n abcdefgh\n" +ObservationString(1) = "8bbbbbbbb\n7bb.bbbbb\n6...b....\n5........\n4........\n3........\n2wwwwwwww\n1wwwwwwww\n abcdefgh\n" +ObservationTensor(0): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ +◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ +◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [584, 586, 594, 596, 598, 606, 608, 610, 618, 620, 622, 630, 632, 634, 642, 644, 646, 654, 656, 658, 666, 668] +StringLegalActions() = ["a2a3", "a2b3", "b2a3", "b2b3", "b2c3", "c2b3", "c2c3", "c2d3", "d2c3", "d2d3", "d2e3", "e2d3", "e2e3", "e2f3", "f2e3", "f2f3", "f2g3", "g2f3", "g2g3", "g2h3", "h2g3", "h2h3"] + +# Apply action "d2d3" +action: 620 + +# State 2 +# 8bbbbbbbb +# 7bb.bbbbb +# 6...b.... +# 5........ +# 4........ +# 3...w.... +# 2www.wwww +# 1wwwwwwww +# abcdefgh +IsTerminal() = False +History() = [124, 620] +HistoryString() = "124, 620" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "8bbbbbbbb\n7bb.bbbbb\n6...b....\n5........\n4........\n3...w....\n2www.wwww\n1wwwwwwww\n abcdefgh\n" +ObservationString(1) = "8bbbbbbbb\n7bb.bbbbb\n6...b....\n5........\n4........\n3...w....\n2www.wwww\n1wwwwwwww\n abcdefgh\n" +ObservationTensor(0): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ +◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◉◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉◉ ◯◯◯◉◯◯◯◯ +◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ +◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◉◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉◉ ◯◯◯◉◯◯◯◯ +◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [16, 26, 36, 98, 100, 108, 110, 112, 132, 136, 146, 148, 156, 158, 160, 168, 170, 172, 180, 182, 228, 230, 232] +StringLegalActions() = ["b8c7", "c8c7", "d8c7", "a7a6", "a7b6", "b7a6", "b7b6", "b7c6", "d7c6", "d7e6", "e7e6", "e7f6", "f7e6", "f7f6", "f7g6", "g7f6", "g7g6", "g7h6", "h7g6", "h7h6", "d6c5", "d6d5", "d6e5"] + +# Apply action "f7e6" +action: 156 + +# State 3 +# 8bbbbbbbb +# 7bb.bb.bb +# 6...bb... +# 5........ +# 4........ +# 3...w.... +# 2www.wwww +# 1wwwwwwww +# abcdefgh +IsTerminal() = False +History() = [124, 620, 156] +HistoryString() = "124, 620, 156" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "8bbbbbbbb\n7bb.bb.bb\n6...bb...\n5........\n4........\n3...w....\n2www.wwww\n1wwwwwwww\n abcdefgh\n" +ObservationString(1) = "8bbbbbbbb\n7bb.bb.bb\n6...bb...\n5........\n4........\n3...w....\n2www.wwww\n1wwwwwwww\n abcdefgh\n" +ObservationTensor(0): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◉◉◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ +◯◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◉◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉◉ ◯◯◯◉◯◯◯◯ +◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◉◉◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ +◯◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◉◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉◉ ◯◯◯◉◯◯◯◯ +◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [522, 524, 526, 584, 586, 594, 596, 598, 606, 608, 632, 634, 642, 644, 646, 654, 656, 658, 666, 668, 706, 716, 726] +StringLegalActions() = ["d3c4", "d3d4", "d3e4", "a2a3", "a2b3", "b2a3", "b2b3", "b2c3", "c2b3", "c2c3", "e2e3", "e2f3", "f2e3", "f2f3", "f2g3", "g2f3", "g2g3", "g2h3", "h2g3", "h2h3", "c1d2", "d1d2", "e1d2"] + +# Apply action "f2e3" +action: 642 + +# State 4 +# 8bbbbbbbb +# 7bb.bb.bb +# 6...bb... +# 5........ +# 4........ +# 3...ww... +# 2www.w.ww +# 1wwwwwwww +# abcdefgh +IsTerminal() = False +History() = [124, 620, 156, 642] +HistoryString() = "124, 620, 156, 642" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "8bbbbbbbb\n7bb.bb.bb\n6...bb...\n5........\n4........\n3...ww...\n2www.w.ww\n1wwwwwwww\n abcdefgh\n" +ObservationString(1) = "8bbbbbbbb\n7bb.bb.bb\n6...bb...\n5........\n4........\n3...ww...\n2www.w.ww\n1wwwwwwww\n abcdefgh\n" +ObservationTensor(0): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◉◉◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ +◯◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◉◉◯◯◯ ◉◉◉◯◯◉◉◉ +◯◯◯◯◯◯◯◯ ◉◉◉◯◉◯◉◉ ◯◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◉◉◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ +◯◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◉◉◯◯◯ ◉◉◉◯◯◉◉◉ +◯◯◯◯◯◯◯◯ ◉◉◉◯◉◯◉◉ ◯◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [16, 26, 36, 52, 62, 72, 98, 100, 108, 110, 112, 132, 148, 168, 170, 172, 180, 182, 228, 230, 232, 240, 242, 244] +StringLegalActions() = ["b8c7", "c8c7", "d8c7", "e8f7", "f8f7", "g8f7", "a7a6", "a7b6", "b7a6", "b7b6", "b7c6", "d7c6", "e7f6", "g7f6", "g7g6", "g7h6", "h7g6", "h7h6", "d6c5", "d6d5", "d6e5", "e6d5", "e6e5", "e6f5"] + +# Apply action "b8c7" +action: 16 + +# State 5 +# 8b.bbbbbb +# 7bbbbb.bb +# 6...bb... +# 5........ +# 4........ +# 3...ww... +# 2www.w.ww +# 1wwwwwwww +# abcdefgh +IsTerminal() = False +History() = [124, 620, 156, 642, 16] +HistoryString() = "124, 620, 156, 642, 16" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "8b.bbbbbb\n7bbbbb.bb\n6...bb...\n5........\n4........\n3...ww...\n2www.w.ww\n1wwwwwwww\n abcdefgh\n" +ObservationString(1) = "8b.bbbbbb\n7bbbbb.bb\n6...bb...\n5........\n4........\n3...ww...\n2www.w.ww\n1wwwwwwww\n abcdefgh\n" +ObservationTensor(0): +◉◯◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ +◯◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◉◉◯◯◯ ◉◉◉◯◯◉◉◉ +◯◯◯◯◯◯◯◯ ◉◉◉◯◉◯◉◉ ◯◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◯◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ +◯◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◉◉◯◯◯ ◉◉◉◯◯◉◉◉ +◯◯◯◯◯◯◯◯ ◉◉◉◯◉◯◉◉ ◯◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [522, 524, 526, 534, 536, 538, 584, 586, 594, 596, 598, 606, 608, 634, 654, 656, 658, 666, 668, 706, 716, 726, 730, 740, 750] +StringLegalActions() = ["d3c4", "d3d4", "d3e4", "e3d4", "e3e4", "e3f4", "a2a3", "a2b3", "b2a3", "b2b3", "b2c3", "c2b3", "c2c3", "e2f3", "g2f3", "g2g3", "g2h3", "h2g3", "h2h3", "c1d2", "d1d2", "e1d2", "e1f2", "f1f2", "g1f2"] + +# Apply action "b2b3" +action: 596 + +# State 6 +# Apply action "d6d5" +action: 230 + +# State 7 +# Apply action "g1f2" +action: 750 + +# State 8 +# Apply action "f8f7" +action: 62 + +# State 9 +# Apply action "h2h3" +action: 668 + +# State 10 +# Apply action "b7a6" +action: 108 + +# State 11 +# Apply action "h3h4" +action: 572 + +# State 12 +# Apply action "d7c6" +action: 132 + +# State 13 +# Apply action "a1b2" +action: 682 + +# State 14 +# Apply action "g7g6" +action: 170 + +# State 15 +# Apply action "e3f4" +action: 538 + +# State 16 +# Apply action "e7f6" +action: 148 + +# State 17 +# Apply action "b3b4" +action: 500 + +# State 18 +# Apply action "e8d7" +action: 48 + +# State 19 +# Apply action "a2a3" +action: 584 + +# State 20 +# 8b.bb..bb +# 7b.bb.b.b +# 6b.b.bbb. +# 5...b.... +# 4.w...w.w +# 3w..w.... +# 2.ww.www. +# 1.wwwww.w +# abcdefgh +IsTerminal() = False +History() = [124, 620, 156, 642, 16, 596, 230, 750, 62, 668, 108, 572, 132, 682, 170, 538, 148, 500, 48, 584] +HistoryString() = "124, 620, 156, 642, 16, 596, 230, 750, 62, 668, 108, 572, 132, 682, 170, 538, 148, 500, 48, 584" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "8b.bb..bb\n7b.bb.b.b\n6b.b.bbb.\n5...b....\n4.w...w.w\n3w..w....\n2.ww.www.\n1.wwwww.w\n abcdefgh\n" +ObservationString(1) = "8b.bb..bb\n7b.bb.b.b\n6b.b.bbb.\n5...b....\n4.w...w.w\n3w..w....\n2.ww.www.\n1.wwwww.w\n abcdefgh\n" +ObservationTensor(0): +◉◯◉◉◯◯◉◉ ◯◯◯◯◯◯◯◯ ◯◉◯◯◉◉◯◯ +◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◉◯◯◉◯◉◯ +◉◯◉◯◉◉◉◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◯◯◉ +◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉ ◉◯◉◉◉◯◉◯ +◯◯◯◯◯◯◯◯ ◉◯◯◉◯◯◯◯ ◯◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◉◉◯◉◉◉◯ ◉◯◯◉◯◯◯◉ +◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◉ ◉◯◯◯◯◯◉◯ +ObservationTensor(1): +◉◯◉◉◯◯◉◉ ◯◯◯◯◯◯◯◯ ◯◉◯◯◉◉◯◯ +◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◉◯◯◉◯◉◯ +◉◯◉◯◉◉◉◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◯◯◉ +◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉ ◉◯◉◉◉◯◉◯ +◯◯◯◯◯◯◯◯ ◉◯◯◉◯◯◯◯ ◯◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◉◉◯◉◉◉◯ ◉◯◯◉◯◯◯◉ +◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◉ ◉◯◯◯◯◯◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [4, 24, 40, 74, 84, 100, 120, 124, 134, 182, 194, 196, 216, 218, 242, 244, 252, 254, 256, 264, 266, 268, 324, 326, 328] +StringLegalActions() = ["a8b7", "c8b7", "d8e7", "g8g7", "h8g7", "a7b6", "c7b6", "c7d6", "d7d6", "h7h6", "a6a5", "a6b5", "c6b5", "c6c5", "e6e5", "e6f5", "f6e5", "f6f5", "f6g5", "g6f5", "g6g5", "g6h5", "d5c4", "d5d4", "d5e4"] + +# Apply action "g6g5" +action: 266 + +# State 21 +# 8b.bb..bb +# 7b.bb.b.b +# 6b.b.bb.. +# 5...b..b. +# 4.w...w.w +# 3w..w.... +# 2.ww.www. +# 1.wwwww.w +# abcdefgh +IsTerminal() = False +History() = [124, 620, 156, 642, 16, 596, 230, 750, 62, 668, 108, 572, 132, 682, 170, 538, 148, 500, 48, 584, 266] +HistoryString() = "124, 620, 156, 642, 16, 596, 230, 750, 62, 668, 108, 572, 132, 682, 170, 538, 148, 500, 48, 584, 266" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "8b.bb..bb\n7b.bb.b.b\n6b.b.bb..\n5...b..b.\n4.w...w.w\n3w..w....\n2.ww.www.\n1.wwwww.w\n abcdefgh\n" +ObservationString(1) = "8b.bb..bb\n7b.bb.b.b\n6b.b.bb..\n5...b..b.\n4.w...w.w\n3w..w....\n2.ww.www.\n1.wwwww.w\n abcdefgh\n" +ObservationTensor(0): +◉◯◉◉◯◯◉◉ ◯◯◯◯◯◯◯◯ ◯◉◯◯◉◉◯◯ +◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◉◯◯◉◯◉◯ +◉◯◉◯◉◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◯◉◉ +◯◯◯◉◯◯◉◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◯◉ +◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉ ◉◯◉◉◉◯◉◯ +◯◯◯◯◯◯◯◯ ◉◯◯◉◯◯◯◯ ◯◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◉◉◯◉◉◉◯ ◉◯◯◉◯◯◯◉ +◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◉ ◉◯◯◯◯◯◉◯ +ObservationTensor(1): +◉◯◉◉◯◯◉◉ ◯◯◯◯◯◯◯◯ ◯◉◯◯◉◉◯◯ +◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◉◯◯◉◯◉◯ +◉◯◉◯◉◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◯◉◉ +◯◯◯◉◯◯◉◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◯◉ +◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉ ◉◯◉◉◉◯◉◯ +◯◯◯◯◯◯◯◯ ◉◯◯◉◯◯◯◯ ◯◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◉◉◯◉◉◉◯ ◉◯◯◉◯◯◯◉ +◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◉ ◉◯◯◯◯◯◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [402, 404, 406, 450, 452, 455, 475, 476, 488, 522, 524, 526, 596, 598, 606, 608, 632, 634, 642, 644, 646, 654, 656, 658, 690, 706, 716, 726, 764] +StringLegalActions() = ["b4a5", "b4b5", "b4c5", "f4e5", "f4f5", "f4g5*", "h4g5*", "h4h5", "a3a4", "d3c4", "d3d4", "d3e4", "b2b3", "b2c3", "c2b3", "c2c3", "e2e3", "e2f3", "f2e3", "f2f3", "f2g3", "g2f3", "g2g3", "g2h3", "b1a2", "c1d2", "d1d2", "e1d2", "h1h2"] + +# Apply action "g2g3" +action: 656 + +# State 22 +# Apply action "h8g7" +action: 84 + +# State 23 +# Apply action "b4a5" +action: 402 + +# State 24 +# Apply action "c7d6" +action: 124 + +# State 25 +# Apply action "h1g2" +action: 762 + +# State 26 +# Apply action "g7g6" +action: 170 + +# State 27 +# Apply action "h4g5*" +action: 475 + +# State 28 +# Apply action "d6c5" +action: 228 + +# State 29 +# Apply action "g5f6*" +action: 367 + +# State 30 +# Apply action "c5d4" +action: 316 + +# State 31 +# Apply action "f6e7" +action: 258 + +# State 32 +# Apply action "f7f6" +action: 158 + +# State 33 +# Apply action "g3h4" +action: 562 + +# State 34 +# Apply action "f6f5" +action: 254 + +# State 35 +# Apply action "d3c4" +action: 522 + +# State 36 +# Apply action "d4e3" +action: 424 + +# State 37 +# Apply action "e7e8" +action: 152 + +# State 38 +# 8b.bbw.b. +# 7b..b...b +# 6b.b.b.b. +# 5w..b.b.. +# 4..w..w.w +# 3w...b... +# 2.ww.www. +# 1.wwwww.. +# abcdefgh +IsTerminal() = True +History() = [124, 620, 156, 642, 16, 596, 230, 750, 62, 668, 108, 572, 132, 682, 170, 538, 148, 500, 48, 584, 266, 656, 84, 402, 124, 762, 170, 475, 228, 367, 316, 258, 158, 562, 254, 522, 424, 152] +HistoryString() = "124, 620, 156, 642, 16, 596, 230, 750, 62, 668, 108, 572, 132, 682, 170, 538, 148, 500, 48, 584, 266, 656, 84, 402, 124, 762, 170, 475, 228, 367, 316, 258, 158, 562, 254, 522, 424, 152" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "8b.bbw.b.\n7b..b...b\n6b.b.b.b.\n5w..b.b..\n4..w..w.w\n3w...b...\n2.ww.www.\n1.wwwww..\n abcdefgh\n" +ObservationString(1) = "8b.bbw.b.\n7b..b...b\n6b.b.b.b.\n5w..b.b..\n4..w..w.w\n3w...b...\n2.ww.www.\n1.wwwww..\n abcdefgh\n" +ObservationTensor(0): +◉◯◉◉◯◯◉◯ ◯◯◯◯◉◯◯◯ ◯◉◯◯◯◉◯◉ +◉◯◯◉◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◉◉◯◉◉◉◯ +◉◯◉◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ +◯◯◯◉◯◉◯◯ ◉◯◯◯◯◯◯◯ ◯◉◉◯◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◉ ◉◉◯◉◉◯◉◯ +◯◯◯◯◉◯◯◯ ◉◯◯◯◯◯◯◯ ◯◉◉◉◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◉◉◯◉◉◉◯ ◉◯◯◉◯◯◯◉ +◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯ ◉◯◯◯◯◯◉◉ +ObservationTensor(1): +◉◯◉◉◯◯◉◯ ◯◯◯◯◉◯◯◯ ◯◉◯◯◯◉◯◉ +◉◯◯◉◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◉◉◯◉◉◉◯ +◉◯◉◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ +◯◯◯◉◯◉◯◯ ◉◯◯◯◯◯◯◯ ◯◉◉◯◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◉ ◉◉◯◉◉◯◉◯ +◯◯◯◯◉◯◯◯ ◉◯◯◯◯◯◯◯ ◯◉◉◉◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◉◉◯◉◉◉◯ ◉◯◯◉◯◯◯◉ +◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯ ◉◯◯◯◯◯◉◉ +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/bridge(use_double_dummy_result=false).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/bridge(use_double_dummy_result=false).txt new file mode 100644 index 0000000..c1b5f06 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/bridge(use_double_dummy_result=false).txt @@ -0,0 +1,1341 @@ +game: bridge(use_double_dummy_result=false) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Contract Bridge" +GameType.max_num_players = 4 +GameType.min_num_players = 4 +GameType.parameter_specification = ["dealer_vul", "non_dealer_vul", "num_tricks", "use_double_dummy_result"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "bridge" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 90 +PolicyTensorShape() = [90] +MaxChanceOutcomes() = 52 +GetParameters() = {dealer_vul=False,non_dealer_vul=False,num_tricks=2,use_double_dummy_result=False} +NumPlayers() = 4 +MinUtility() = -7600.0 +MaxUtility() = 7600.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [571] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 571 +ObservationTensorShape() = [571] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 571 +MaxGameLength() = 371 +ToString() = "bridge(use_double_dummy_result=False)" + +# State 0 +# Vul: None +# S +# H +# D +# C +# S S +# H H +# D D +# C C +# S +# H +# D +# C +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "Vul: None\nS none\nH none\nD none\nC none\n" +InformationStateString(1) = "Vul: None\nS none\nH none\nD none\nC none\n" +InformationStateString(2) = "Vul: None\nS none\nH none\nD none\nC none\n" +InformationStateString(3) = "Vul: None\nS none\nH none\nD none\nC none\n" +InformationStateTensor(0): zeros(571) +InformationStateTensor(1): zeros(571) +InformationStateTensor(2): zeros(571) +InformationStateTensor(3): zeros(571) +ObservationString(0) = "Vul: None\nS none\nH none\nD none\nC none\n" +ObservationString(1) = "Vul: None\nS none\nH none\nD none\nC none\n" +ObservationString(2) = "Vul: None\nS none\nH none\nD none\nC none\n" +ObservationString(3) = "Vul: None\nS none\nH none\nD none\nC none\n" +ObservationTensor(0): zeros(571) +ObservationTensor(1): zeros(571) +ObservationTensor(2): zeros(571) +ObservationTensor(3): zeros(571) +ChanceOutcomes() = [(0,0.0192308), (1,0.0192308), (2,0.0192308), (3,0.0192308), (4,0.0192308), (5,0.0192308), (6,0.0192308), (7,0.0192308), (8,0.0192308), (9,0.0192308), (10,0.0192308), (11,0.0192308), (12,0.0192308), (13,0.0192308), (14,0.0192308), (15,0.0192308), (16,0.0192308), (17,0.0192308), (18,0.0192308), (19,0.0192308), (20,0.0192308), (21,0.0192308), (22,0.0192308), (23,0.0192308), (24,0.0192308), (25,0.0192308), (26,0.0192308), (27,0.0192308), (28,0.0192308), (29,0.0192308), (30,0.0192308), (31,0.0192308), (32,0.0192308), (33,0.0192308), (34,0.0192308), (35,0.0192308), (36,0.0192308), (37,0.0192308), (38,0.0192308), (39,0.0192308), (40,0.0192308), (41,0.0192308), (42,0.0192308), (43,0.0192308), (44,0.0192308), (45,0.0192308), (46,0.0192308), (47,0.0192308), (48,0.0192308), (49,0.0192308), (50,0.0192308), (51,0.0192308)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51] +StringLegalActions() = ["C2", "D2", "H2", "S2", "C3", "D3", "H3", "S3", "C4", "D4", "H4", "S4", "C5", "D5", "H5", "S5", "C6", "D6", "H6", "S6", "C7", "D7", "H7", "S7", "C8", "D8", "H8", "S8", "C9", "D9", "H9", "S9", "CT", "DT", "HT", "ST", "CJ", "DJ", "HJ", "SJ", "CQ", "DQ", "HQ", "SQ", "CK", "DK", "HK", "SK", "CA", "DA", "HA", "SA"] + +# Apply action "C5" +action: 12 + +# State 1 +# Vul: None +# S +# H +# D +# C 5 +# S S +# H H +# D D +# C C +# S +# H +# D +# C +IsTerminal() = False +History() = [12] +HistoryString() = "12" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "Vul: None\nS none\nH none\nD none\nC 5\n" +InformationStateString(1) = "Vul: None\nS none\nH none\nD none\nC none\n" +InformationStateString(2) = "Vul: None\nS none\nH none\nD none\nC none\n" +InformationStateString(3) = "Vul: None\nS none\nH none\nD none\nC none\n" +InformationStateTensor(0): zeros(571) +InformationStateTensor(1): zeros(571) +InformationStateTensor(2): zeros(571) +InformationStateTensor(3): zeros(571) +ObservationString(0) = "Vul: None\nS none\nH none\nD none\nC 5\n" +ObservationString(1) = "Vul: None\nS none\nH none\nD none\nC none\n" +ObservationString(2) = "Vul: None\nS none\nH none\nD none\nC none\n" +ObservationString(3) = "Vul: None\nS none\nH none\nD none\nC none\n" +ObservationTensor(0): zeros(571) +ObservationTensor(1): zeros(571) +ObservationTensor(2): zeros(571) +ObservationTensor(3): zeros(571) +ChanceOutcomes() = [(0,0.0196078), (1,0.0196078), (2,0.0196078), (3,0.0196078), (4,0.0196078), (5,0.0196078), (6,0.0196078), (7,0.0196078), (8,0.0196078), (9,0.0196078), (10,0.0196078), (11,0.0196078), (13,0.0196078), (14,0.0196078), (15,0.0196078), (16,0.0196078), (17,0.0196078), (18,0.0196078), (19,0.0196078), (20,0.0196078), (21,0.0196078), (22,0.0196078), (23,0.0196078), (24,0.0196078), (25,0.0196078), (26,0.0196078), (27,0.0196078), (28,0.0196078), (29,0.0196078), (30,0.0196078), (31,0.0196078), (32,0.0196078), (33,0.0196078), (34,0.0196078), (35,0.0196078), (36,0.0196078), (37,0.0196078), (38,0.0196078), (39,0.0196078), (40,0.0196078), (41,0.0196078), (42,0.0196078), (43,0.0196078), (44,0.0196078), (45,0.0196078), (46,0.0196078), (47,0.0196078), (48,0.0196078), (49,0.0196078), (50,0.0196078), (51,0.0196078)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51] +StringLegalActions() = ["C2", "D2", "H2", "S2", "C3", "D3", "H3", "S3", "C4", "D4", "H4", "S4", "D5", "H5", "S5", "C6", "D6", "H6", "S6", "C7", "D7", "H7", "S7", "C8", "D8", "H8", "S8", "C9", "D9", "H9", "S9", "CT", "DT", "HT", "ST", "CJ", "DJ", "HJ", "SJ", "CQ", "DQ", "HQ", "SQ", "CK", "DK", "HK", "SK", "CA", "DA", "HA", "SA"] + +# Apply action "S9" +action: 31 + +# State 2 +# Apply action "SJ" +action: 39 + +# State 3 +# Apply action "DK" +action: 45 + +# State 4 +# Apply action "S4" +action: 11 + +# State 5 +# Apply action "H3" +action: 6 + +# State 6 +# Apply action "ST" +action: 35 + +# State 7 +# Apply action "DQ" +action: 41 + +# State 8 +# Apply action "SA" +action: 51 + +# State 9 +# Apply action "C2" +action: 0 + +# State 10 +# Apply action "C3" +action: 4 + +# State 11 +# Apply action "D6" +action: 17 + +# State 12 +# Apply action "S8" +action: 27 + +# State 13 +# Apply action "HT" +action: 34 + +# State 14 +# Apply action "C4" +action: 8 + +# State 15 +# Apply action "S6" +action: 19 + +# State 16 +# Apply action "HA" +action: 50 + +# State 17 +# Apply action "HK" +action: 46 + +# State 18 +# Apply action "CQ" +action: 40 + +# State 19 +# Apply action "H5" +action: 14 + +# State 20 +# Apply action "S5" +action: 15 + +# State 21 +# Apply action "H8" +action: 26 + +# State 22 +# Apply action "D2" +action: 1 + +# State 23 +# Apply action "DT" +action: 33 + +# State 24 +# Apply action "H9" +action: 30 + +# State 25 +# Apply action "C9" +action: 28 + +# State 26 +# Apply action "SQ" +action: 43 + +# State 27 +# Apply action "H7" +action: 22 + +# State 28 +# Apply action "DJ" +action: 37 + +# State 29 +# Apply action "D7" +action: 21 + +# State 30 +# Apply action "D8" +action: 25 + +# State 31 +# Apply action "D3" +action: 5 + +# State 32 +# Apply action "CJ" +action: 36 + +# State 33 +# Apply action "H6" +action: 18 + +# State 34 +# Apply action "C6" +action: 16 + +# State 35 +# Apply action "D4" +action: 9 + +# State 36 +# Apply action "H4" +action: 10 + +# State 37 +# Apply action "C8" +action: 24 + +# State 38 +# Apply action "SK" +action: 47 + +# State 39 +# Apply action "HJ" +action: 38 + +# State 40 +# Apply action "S3" +action: 7 + +# State 41 +# Apply action "HQ" +action: 42 + +# State 42 +# Apply action "H2" +action: 2 + +# State 43 +# Apply action "CK" +action: 44 + +# State 44 +# Apply action "CA" +action: 48 + +# State 45 +# Apply action "D5" +action: 13 + +# State 46 +# Apply action "S2" +action: 3 + +# State 47 +# Apply action "D9" +action: 29 + +# State 48 +# Apply action "CT" +action: 32 + +# State 49 +# Apply action "DA" +action: 49 + +# State 50 +# Apply action "C7" +action: 20 + +# State 51 +# Apply action "S7" +action: 23 + +# State 52 +# Vul: None +# S A8543 +# H A94 +# D J +# C AJT5 +# S 76 S 9 +# H J75 H KQT863 +# D KQT9643 D A75 +# C K C 982 +# S KQJT2 +# H 2 +# D 82 +# C Q7643 +IsTerminal() = False +History() = [12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23] +HistoryString() = "12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n" +InformationStateString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n" +InformationStateString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n" +InformationStateString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n" +InformationStateTensor(0): binvec(571, 0x450000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000009c8009460058000000000000000000000) +InformationStateTensor(1): binvec(571, 0x45000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041021254901120000000000000000000000) +InformationStateTensor(2): binvec(571, 0x4500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003c40442008c880000000000000000000000) +InformationStateTensor(3): binvec(571, 0x45000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002212982212600000000000000000000000) +ObservationString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n" +ObservationString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n" +ObservationString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n" +ObservationString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n" +ObservationTensor(0): binvec(571, 0x450000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000009c8009460058000000000000000000000) +ObservationTensor(1): binvec(571, 0x45000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041021254901120000000000000000000000) +ObservationTensor(2): binvec(571, 0x4500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003c40442008c880000000000000000000000) +ObservationTensor(3): binvec(571, 0x45000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002212982212600000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89] +StringLegalActions() = ["Pass", "1C", "1D", "1H", "1S", "1N", "2C", "2D", "2H", "2S", "2N", "3C", "3D", "3H", "3S", "3N", "4C", "4D", "4H", "4S", "4N", "5C", "5D", "5H", "5S", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "1S" +action: 58 + +# State 53 +# Vul: None +# S A8543 +# H A94 +# D J +# C AJT5 +# S 76 S 9 +# H J75 H KQT863 +# D KQT9643 D A75 +# C K C 982 +# S KQJT2 +# H 2 +# D 82 +# C Q7643 +# +# West North East South +# 1S +IsTerminal() = False +History() = [12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58] +HistoryString() = "12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S " +InformationStateString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S ?" +InformationStateString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S " +InformationStateString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S " +InformationStateTensor(0): binvec(571, 0x450000000000400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000009c8009460058000000000000000000000) +InformationStateTensor(1): binvec(571, 0x45000000000008000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041021254901120000000000000000000000) +InformationStateTensor(2): binvec(571, 0x4500000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003c40442008c880000000000000000000000) +InformationStateTensor(3): binvec(571, 0x45000000000020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002212982212600000000000000000000000) +ObservationString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S " +ObservationString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S ?" +ObservationString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S " +ObservationString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S " +ObservationTensor(0): binvec(571, 0x450000000000400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000009c8009460058000000000000000000000) +ObservationTensor(1): binvec(571, 0x45000000000008000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041021254901120000000000000000000000) +ObservationTensor(2): binvec(571, 0x4500000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003c40442008c880000000000000000000000) +ObservationTensor(3): binvec(571, 0x45000000000020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002212982212600000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 53, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89] +StringLegalActions() = ["Pass", "Dbl", "1N", "2C", "2D", "2H", "2S", "2N", "3C", "3D", "3H", "3S", "3N", "4C", "4D", "4H", "4S", "4N", "5C", "5D", "5H", "5S", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "3H" +action: 67 + +# State 54 +# Vul: None +# S A8543 +# H A94 +# D J +# C AJT5 +# S 76 S 9 +# H J75 H KQT863 +# D KQT9643 D A75 +# C K C 982 +# S KQJT2 +# H 2 +# D 82 +# C Q7643 +# +# West North East South +# 1S 3H +IsTerminal() = False +History() = [12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67] +HistoryString() = "12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S 3H " +InformationStateString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S 3H " +InformationStateString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S 3H ?" +InformationStateString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S 3H " +InformationStateTensor(0): binvec(571, 0x450000000000400000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000009c8009460058000000000000000000000) +InformationStateTensor(1): binvec(571, 0x45000000000008000000000000000000000000040000000000000000000000000000000000000000000000000000000000000000000041021254901120000000000000000000000) +InformationStateTensor(2): binvec(571, 0x4500000000001000000000000000000000000000800000000000000000000000000000000000000000000000000000000000000000003c40442008c880000000000000000000000) +InformationStateTensor(3): binvec(571, 0x45000000000020000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000002212982212600000000000000000000000) +ObservationString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S 3H " +ObservationString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S 3H " +ObservationString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S 3H ?" +ObservationString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S 3H " +ObservationTensor(0): binvec(571, 0x450000000000400000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000009c8009460058000000000000000000000) +ObservationTensor(1): binvec(571, 0x45000000000008000000000000000000000000040000000000000000000000000000000000000000000000000000000000000000000041021254901120000000000000000000000) +ObservationTensor(2): binvec(571, 0x4500000000001000000000000000000000000000800000000000000000000000000000000000000000000000000000000000000000003c40442008c880000000000000000000000) +ObservationTensor(3): binvec(571, 0x45000000000020000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000002212982212600000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 53, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89] +StringLegalActions() = ["Pass", "Dbl", "3S", "3N", "4C", "4D", "4H", "4S", "4N", "5C", "5D", "5H", "5S", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "4S" +action: 73 + +# State 55 +# Vul: None +# S A8543 +# H A94 +# D J +# C AJT5 +# S 76 S 9 +# H J75 H KQT863 +# D KQT9643 D A75 +# C K C 982 +# S KQJT2 +# H 2 +# D 82 +# C Q7643 +# +# West North East South +# 1S 3H 4S +IsTerminal() = False +History() = [12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73] +HistoryString() = "12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S 3H 4S " +InformationStateString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S 3H 4S " +InformationStateString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S 3H 4S " +InformationStateString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S 3H 4S \n?" +InformationStateTensor(0): binvec(571, 0x450000000000400000000000000000000000000200000000000000000100000000000000000000000000000000000000000000000000009c8009460058000000000000000000000) +InformationStateTensor(1): binvec(571, 0x45000000000008000000000000000000000000040000000000000000020000000000000000000000000000000000000000000000000041021254901120000000000000000000000) +InformationStateTensor(2): binvec(571, 0x4500000000001000000000000000000000000000800000000000000004000000000000000000000000000000000000000000000000003c40442008c880000000000000000000000) +InformationStateTensor(3): binvec(571, 0x45000000000020000000000000000000000000010000000000000000008000000000000000000000000000000000000000000000000002212982212600000000000000000000000) +ObservationString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S 3H 4S " +ObservationString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S 3H 4S " +ObservationString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S 3H 4S " +ObservationString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S 3H 4S \n?" +ObservationTensor(0): binvec(571, 0x450000000000400000000000000000000000000200000000000000000100000000000000000000000000000000000000000000000000009c8009460058000000000000000000000) +ObservationTensor(1): binvec(571, 0x45000000000008000000000000000000000000040000000000000000020000000000000000000000000000000000000000000000000041021254901120000000000000000000000) +ObservationTensor(2): binvec(571, 0x4500000000001000000000000000000000000000800000000000000004000000000000000000000000000000000000000000000000003c40442008c880000000000000000000000) +ObservationTensor(3): binvec(571, 0x45000000000020000000000000000000000000010000000000000000008000000000000000000000000000000000000000000000000002212982212600000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 53, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89] +StringLegalActions() = ["Pass", "Dbl", "4N", "5C", "5D", "5H", "5S", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "5H" +action: 77 + +# State 56 +# Vul: None +# S A8543 +# H A94 +# D J +# C AJT5 +# S 76 S 9 +# H J75 H KQT863 +# D KQT9643 D A75 +# C K C 982 +# S KQJT2 +# H 2 +# D 82 +# C Q7643 +# +# West North East South +# 1S 3H 4S +# 5H +IsTerminal() = False +History() = [12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77] +HistoryString() = "12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S 3H 4S \n5H ?" +InformationStateString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S 3H 4S \n5H " +InformationStateString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S 3H 4S \n5H " +InformationStateString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S 3H 4S \n5H " +InformationStateTensor(0): binvec(571, 0x450000000000400000000000000000000000000200000000000000000100000000000080000000000000000000000000000000000000009c8009460058000000000000000000000) +InformationStateTensor(1): binvec(571, 0x45000000000008000000000000000000000000040000000000000000020000000000010000000000000000000000000000000000000041021254901120000000000000000000000) +InformationStateTensor(2): binvec(571, 0x4500000000001000000000000000000000000000800000000000000004000000000002000000000000000000000000000000000000003c40442008c880000000000000000000000) +InformationStateTensor(3): binvec(571, 0x45000000000020000000000000000000000000010000000000000000008000000000040000000000000000000000000000000000000002212982212600000000000000000000000) +ObservationString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S 3H 4S \n5H ?" +ObservationString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S 3H 4S \n5H " +ObservationString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S 3H 4S \n5H " +ObservationString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S 3H 4S \n5H " +ObservationTensor(0): binvec(571, 0x450000000000400000000000000000000000000200000000000000000100000000000080000000000000000000000000000000000000009c8009460058000000000000000000000) +ObservationTensor(1): binvec(571, 0x45000000000008000000000000000000000000040000000000000000020000000000010000000000000000000000000000000000000041021254901120000000000000000000000) +ObservationTensor(2): binvec(571, 0x4500000000001000000000000000000000000000800000000000000004000000000002000000000000000000000000000000000000003c40442008c880000000000000000000000) +ObservationTensor(3): binvec(571, 0x45000000000020000000000000000000000000010000000000000000008000000000040000000000000000000000000000000000000002212982212600000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 53, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89] +StringLegalActions() = ["Pass", "Dbl", "5S", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "Dbl" +action: 53 + +# State 57 +# Vul: None +# S A8543 +# H A94 +# D J +# C AJT5 +# S 76 S 9 +# H J75 H KQT863 +# D KQT9643 D A75 +# C K C 982 +# S KQJT2 +# H 2 +# D 82 +# C Q7643 +# +# West North East South +# 1S 3H 4S +# 5H Dbl +IsTerminal() = False +History() = [12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53] +HistoryString() = "12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S 3H 4S \n5H Dbl " +InformationStateString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S 3H 4S \n5H Dbl ?" +InformationStateString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl " +InformationStateString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S 3H 4S \n5H Dbl " +InformationStateTensor(0): binvec(571, 0x4500000000004000000000000000000000000002000000000000000001000000000000c0000000000000000000000000000000000000009c8009460058000000000000000000000) +InformationStateTensor(1): binvec(571, 0x45000000000008000000000000000000000000040000000000000000020000000000010800000000000000000000000000000000000041021254901120000000000000000000000) +InformationStateTensor(2): binvec(571, 0x4500000000001000000000000000000000000000800000000000000004000000000002100000000000000000000000000000000000003c40442008c880000000000000000000000) +InformationStateTensor(3): binvec(571, 0x45000000000020000000000000000000000000010000000000000000008000000000042000000000000000000000000000000000000002212982212600000000000000000000000) +ObservationString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S 3H 4S \n5H Dbl " +ObservationString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S 3H 4S \n5H Dbl ?" +ObservationString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl " +ObservationString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S 3H 4S \n5H Dbl " +ObservationTensor(0): binvec(571, 0x4500000000004000000000000000000000000002000000000000000001000000000000c0000000000000000000000000000000000000009c8009460058000000000000000000000) +ObservationTensor(1): binvec(571, 0x45000000000008000000000000000000000000040000000000000000020000000000010800000000000000000000000000000000000041021254901120000000000000000000000) +ObservationTensor(2): binvec(571, 0x4500000000001000000000000000000000000000800000000000000004000000000002100000000000000000000000000000000000003c40442008c880000000000000000000000) +ObservationTensor(3): binvec(571, 0x45000000000020000000000000000000000000010000000000000000008000000000042000000000000000000000000000000000000002212982212600000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 54, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89] +StringLegalActions() = ["Pass", "RDbl", "5S", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "Pass" +action: 52 + +# State 58 +# Vul: None +# S A8543 +# H A94 +# D J +# C AJT5 +# S 76 S 9 +# H J75 H KQT863 +# D KQT9643 D A75 +# C K C 982 +# S KQJT2 +# H 2 +# D 82 +# C Q7643 +# +# West North East South +# 1S 3H 4S +# 5H Dbl Pass +IsTerminal() = False +History() = [12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52] +HistoryString() = "12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass " +InformationStateString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass " +InformationStateString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass ?" +InformationStateString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass " +InformationStateTensor(0): binvec(571, 0x4500000000004000000000000000000000000002000000000000000001000000000000c0000000000000000000000000000000000000009c8009460058000000000000000000000) +InformationStateTensor(1): binvec(571, 0x45000000000008000000000000000000000000040000000000000000020000000000010800000000000000000000000000000000000041021254901120000000000000000000000) +InformationStateTensor(2): binvec(571, 0x4500000000001000000000000000000000000000800000000000000004000000000002100000000000000000000000000000000000003c40442008c880000000000000000000000) +InformationStateTensor(3): binvec(571, 0x45000000000020000000000000000000000000010000000000000000008000000000042000000000000000000000000000000000000002212982212600000000000000000000000) +ObservationString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass " +ObservationString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass " +ObservationString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass ?" +ObservationString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass " +ObservationTensor(0): binvec(571, 0x4500000000004000000000000000000000000002000000000000000001000000000000c0000000000000000000000000000000000000009c8009460058000000000000000000000) +ObservationTensor(1): binvec(571, 0x45000000000008000000000000000000000000040000000000000000020000000000010800000000000000000000000000000000000041021254901120000000000000000000000) +ObservationTensor(2): binvec(571, 0x4500000000001000000000000000000000000000800000000000000004000000000002100000000000000000000000000000000000003c40442008c880000000000000000000000) +ObservationTensor(3): binvec(571, 0x45000000000020000000000000000000000000010000000000000000008000000000042000000000000000000000000000000000000002212982212600000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89] +StringLegalActions() = ["Pass", "5S", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "5S" +action: 78 + +# State 59 +# Vul: None +# S A8543 +# H A94 +# D J +# C AJT5 +# S 76 S 9 +# H J75 H KQT863 +# D KQT9643 D A75 +# C K C 982 +# S KQJT2 +# H 2 +# D 82 +# C Q7643 +# +# West North East South +# 1S 3H 4S +# 5H Dbl Pass 5S +IsTerminal() = False +History() = [12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78] +HistoryString() = "12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S " +InformationStateString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S " +InformationStateString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S " +InformationStateString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \n?" +InformationStateTensor(0): binvec(571, 0x4500000000004000000000000000000000000002000000000000000001000000000000c0100000000000000000000000000000000000009c8009460058000000000000000000000) +InformationStateTensor(1): binvec(571, 0x45000000000008000000000000000000000000040000000000000000020000000000010820000000000000000000000000000000000041021254901120000000000000000000000) +InformationStateTensor(2): binvec(571, 0x4500000000001000000000000000000000000000800000000000000004000000000002104000000000000000000000000000000000003c40442008c880000000000000000000000) +InformationStateTensor(3): binvec(571, 0x45000000000020000000000000000000000000010000000000000000008000000000042008000000000000000000000000000000000002212982212600000000000000000000000) +ObservationString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S " +ObservationString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S " +ObservationString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S " +ObservationString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \n?" +ObservationTensor(0): binvec(571, 0x4500000000004000000000000000000000000002000000000000000001000000000000c0100000000000000000000000000000000000009c8009460058000000000000000000000) +ObservationTensor(1): binvec(571, 0x45000000000008000000000000000000000000040000000000000000020000000000010820000000000000000000000000000000000041021254901120000000000000000000000) +ObservationTensor(2): binvec(571, 0x4500000000001000000000000000000000000000800000000000000004000000000002104000000000000000000000000000000000003c40442008c880000000000000000000000) +ObservationTensor(3): binvec(571, 0x45000000000020000000000000000000000000010000000000000000008000000000042008000000000000000000000000000000000002212982212600000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 53, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89] +StringLegalActions() = ["Pass", "Dbl", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "Pass" +action: 52 + +# State 60 +# Vul: None +# S A8543 +# H A94 +# D J +# C AJT5 +# S 76 S 9 +# H J75 H KQT863 +# D KQT9643 D A75 +# C K C 982 +# S KQJT2 +# H 2 +# D 82 +# C Q7643 +# +# West North East South +# 1S 3H 4S +# 5H Dbl Pass 5S +# Pass +IsTerminal() = False +History() = [12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78, 52] +HistoryString() = "12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78, 52" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass ?" +InformationStateString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass " +InformationStateString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass " +InformationStateString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass " +InformationStateTensor(0): binvec(571, 0x4500000000004000000000000000000000000002000000000000000001000000000000c0100000000000000000000000000000000000009c8009460058000000000000000000000) +InformationStateTensor(1): binvec(571, 0x45000000000008000000000000000000000000040000000000000000020000000000010820000000000000000000000000000000000041021254901120000000000000000000000) +InformationStateTensor(2): binvec(571, 0x4500000000001000000000000000000000000000800000000000000004000000000002104000000000000000000000000000000000003c40442008c880000000000000000000000) +InformationStateTensor(3): binvec(571, 0x45000000000020000000000000000000000000010000000000000000008000000000042008000000000000000000000000000000000002212982212600000000000000000000000) +ObservationString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass ?" +ObservationString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass " +ObservationString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass " +ObservationString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass " +ObservationTensor(0): binvec(571, 0x4500000000004000000000000000000000000002000000000000000001000000000000c0100000000000000000000000000000000000009c8009460058000000000000000000000) +ObservationTensor(1): binvec(571, 0x45000000000008000000000000000000000000040000000000000000020000000000010820000000000000000000000000000000000041021254901120000000000000000000000) +ObservationTensor(2): binvec(571, 0x4500000000001000000000000000000000000000800000000000000004000000000002104000000000000000000000000000000000003c40442008c880000000000000000000000) +ObservationTensor(3): binvec(571, 0x45000000000020000000000000000000000000010000000000000000008000000000042008000000000000000000000000000000000002212982212600000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89] +StringLegalActions() = ["Pass", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "Pass" +action: 52 + +# State 61 +# Vul: None +# S A8543 +# H A94 +# D J +# C AJT5 +# S 76 S 9 +# H J75 H KQT863 +# D KQT9643 D A75 +# C K C 982 +# S KQJT2 +# H 2 +# D 82 +# C Q7643 +# +# West North East South +# 1S 3H 4S +# 5H Dbl Pass 5S +# Pass Pass +IsTerminal() = False +History() = [12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78, 52, 52] +HistoryString() = "12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78, 52, 52" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass " +InformationStateString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass ?" +InformationStateString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass " +InformationStateString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass " +InformationStateTensor(0): binvec(571, 0x4500000000004000000000000000000000000002000000000000000001000000000000c0100000000000000000000000000000000000009c8009460058000000000000000000000) +InformationStateTensor(1): binvec(571, 0x45000000000008000000000000000000000000040000000000000000020000000000010820000000000000000000000000000000000041021254901120000000000000000000000) +InformationStateTensor(2): binvec(571, 0x4500000000001000000000000000000000000000800000000000000004000000000002104000000000000000000000000000000000003c40442008c880000000000000000000000) +InformationStateTensor(3): binvec(571, 0x45000000000020000000000000000000000000010000000000000000008000000000042008000000000000000000000000000000000002212982212600000000000000000000000) +ObservationString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass " +ObservationString(1) = "Vul: None\nS 9\nH KQT863\nD A75\nC 982\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass ?" +ObservationString(2) = "Vul: None\nS KQJT2\nH 2\nD 82\nC Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass " +ObservationString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass " +ObservationTensor(0): binvec(571, 0x4500000000004000000000000000000000000002000000000000000001000000000000c0100000000000000000000000000000000000009c8009460058000000000000000000000) +ObservationTensor(1): binvec(571, 0x45000000000008000000000000000000000000040000000000000000020000000000010820000000000000000000000000000000000041021254901120000000000000000000000) +ObservationTensor(2): binvec(571, 0x4500000000001000000000000000000000000000800000000000000004000000000002104000000000000000000000000000000000003c40442008c880000000000000000000000) +ObservationTensor(3): binvec(571, 0x45000000000020000000000000000000000000010000000000000000008000000000042008000000000000000000000000000000000002212982212600000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 53, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89] +StringLegalActions() = ["Pass", "Dbl", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "Pass" +action: 52 + +# State 62 +# Apply action "HK" +action: 46 + +# State 63 +# Apply action "H2" +action: 2 + +# State 64 +# Vul: None +# S A8543 +# H A94 +# D J +# C AJT5 +# S 76 S 9 +# H J75 H QT863 +# D KQT9643 D A75 +# C K C 982 +# S KQJT2 +# H +# D 82 +# C Q7643 +# +# West North East South +# 1S 3H 4S +# 5H Dbl Pass 5S +# Pass Pass Pass +# +# N E S W N E S +# HK H2 +# +# Declarer tricks: 0 +IsTerminal() = False +History() = [12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78, 52, 52, 52, 46, 2] +HistoryString() = "12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78, 52, 52, 52, 46, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 \n\nDeclarer tricks: 0" +InformationStateString(1) = "Vul: None\nS 9\nH QT863\nD A75\nC 982\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 \n\nDeclarer tricks: 0" +InformationStateString(2) = "Vul: None\nS KQJT2\nH none\nD 82\nC Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 \n\nDeclarer tricks: 0" +InformationStateString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 \n\nDeclarer tricks: 0" +InformationStateTensor(0): binvec(571, 0x104148804e4004a3002d620221004644000000000000000000000000080800000000000000000000000000000000000000000000000000000000000000000000000000002001000) +InformationStateTensor(1): binvec(571, 0x84141a081092a480811620221004644000000000000808000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002001000) +InformationStateTensor(2): binvec(571, 0x10414296202210046441620221004644080000000000000000000000000000000000000000000000000800000000000000000000000000000000000000000000000000002001000) +InformationStateTensor(3): binvec(571, 0x84144811094c1109301620221004644000000000000000000000000000000000000008080000000000000000000000000000000000000000000000000000000000000002001000) +ObservationString(0) = "Vul: None\nS A8543\nH A94\nD J\nC AJT5\nContract: 5S N\nCurrent trick: HK H2 " +ObservationString(1) = "Vul: None\nS 9\nH QT863\nD A75\nC 982\nContract: 5S N\nCurrent trick: HK H2 " +ObservationString(2) = "Vul: None\nS KQJT2\nH none\nD 82\nC Q7643\nContract: 5S N\nCurrent trick: HK H2 " +ObservationString(3) = "Vul: None\nS 76\nH J75\nD KQT9643\nC K\nContract: 5S N\nCurrent trick: HK H2 ?" +ObservationTensor(0): binvec(571, 0x104148804e4004a3002d620221004644000000000000000000000000080800000000000000000000000000000000000000000000000000000000000000000000000000002001000) +ObservationTensor(1): binvec(571, 0x84141a081092a480811620221004644000000000000808000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002001000) +ObservationTensor(2): binvec(571, 0x10414296202210046441620221004644080000000000000000000000000000000000000000000000000800000000000000000000000000000000000000000000000000002001000) +ObservationTensor(3): binvec(571, 0x84144811094c1109301620221004644000000000000000000000000000000000000008080000000000000000000000000000000000000000000000000000000000000002001000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [14, 22, 38] +StringLegalActions() = ["H5", "H7", "HJ"] + +# Apply action "H5" +action: 14 + +# State 65 +# Apply action "HA" +action: 50 + +# State 66 +# Apply action "S3" +action: 7 + +# State 67 +# Apply action "S9" +action: 31 + +# State 68 +# Apply action "ST" +action: 35 + +# State 69 +# Apply action "S7" +action: 23 + +# State 70 +# Apply action "SK" +action: 47 + +# State 71 +# Apply action "S6" +action: 19 + +# State 72 +# Apply action "S5" +action: 15 + +# State 73 +# Apply action "H3" +action: 6 + +# State 74 +# Apply action "C3" +action: 4 + +# State 75 +# Apply action "CK" +action: 44 + +# State 76 +# Vul: None +# S A84 +# H 94 +# D J +# C AJT5 +# S S +# H J7 H QT86 +# D KQT9643 D A75 +# C C 982 +# S QJ2 +# H +# D 82 +# C Q764 +# +# West North East South +# 1S 3H 4S +# 5H Dbl Pass 5S +# Pass Pass Pass +# +# N E S W N E S +# HK H2 H5 HA +# S3 S9 ST S7 +# SK S6 S5 H3 +# C3 CK +# +# Declarer tricks: 3 +IsTerminal() = False +History() = [12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78, 52, 52, 52, 46, 2, 14, 50, 7, 31, 35, 23, 47, 19, 15, 6, 4, 44] +HistoryString() = "12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78, 52, 52, 52, 46, 2, 14, 50, 7, 31, 35, 23, 47, 19, 15, 6, 4, 44" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Vul: None\nS A84\nH 94\nD J\nC AJT5\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK \n\nDeclarer tricks: 3" +InformationStateString(1) = "Vul: None\nS none\nH QT86\nD A75\nC 982\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK \n\nDeclarer tricks: 3" +InformationStateString(2) = "Vul: None\nS QJ2\nH none\nD 82\nC Q764\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK \n\nDeclarer tricks: 3" +InformationStateString(3) = "Vul: None\nS none\nH J7\nD KQT9643\nC none\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK \n\nDeclarer tricks: 3" +InformationStateTensor(0): binvec(571, 0x104148800e0004a30025420221000640000000000000000000000000000200000000000000000000002000004000000000080000000000000000000000400000400000000401000) +InformationStateTensor(1): binvec(571, 0x84141a001092a080811420221000640000000000000002000000000000000000000020000000000000000800000000000000000000004000004000000000004000000000401000) +InformationStateTensor(2): binvec(571, 0x10414294202210006401420221000640020000000000000000000000200000000000000000000000000000000000000040000040000000000040000000000800000000000401000) +InformationStateTensor(3): binvec(571, 0x8414481101081109101420221000640000000000002000000000000000000000000000020000000000000000400000000000400000000008000000000000000000000040401000) +ObservationString(0) = "Vul: None\nS A84\nH 94\nD J\nC AJT5\nContract: 5S N\ndefence\ndefence\ndefence\ndeclarer\nTrick 1 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 2 won by declarer\ndeclarer\ndeclarer\ndeclarer\nCurrent trick: C3 CK ?" +ObservationString(1) = "Vul: None\nS none\nH QT86\nD A75\nC 982\nContract: 5S N\ndefence\ndefence\ndefence\ndeclarer\nTrick 1 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 2 won by declarer\ndeclarer\ndeclarer\ndeclarer\nCurrent trick: C3 CK " +ObservationString(2) = "Vul: None\nS QJ2\nH none\nD 82\nC Q764\nContract: 5S N\ndefence\ndefence\ndefence\ndeclarer\nTrick 1 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 2 won by declarer\ndeclarer\ndeclarer\ndeclarer\nCurrent trick: C3 CK " +ObservationString(3) = "Vul: None\nS none\nH J7\nD KQT9643\nC none\nContract: 5S N\ndefence\ndefence\ndefence\ndeclarer\nTrick 1 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 2 won by declarer\ndeclarer\ndeclarer\ndeclarer\nCurrent trick: C3 CK " +ObservationTensor(0): binvec(571, 0x104148800e0004a30025420221000640000000000000000000000000000200000000000000000000002000004000000000080000000000000000000000400000400000000401000) +ObservationTensor(1): binvec(571, 0x84141a001092a080811420221000640000000000000002000000000000000000000020000000000000000800000000000000000000004000004000000000004000000000401000) +ObservationTensor(2): binvec(571, 0x10414294202210006401420221000640020000000000000000000000200000000000000000000000000000000000000040000040000000000040000000000800000000000401000) +ObservationTensor(3): binvec(571, 0x8414481101081109101420221000640000000000002000000000000000000000000000020000000000000000400000000000400000000008000000000000000000000040401000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [12, 32, 36, 48] +StringLegalActions() = ["C5", "CT", "CJ", "CA"] + +# Apply action "CA" +action: 48 + +# State 77 +# Apply action "C2" +action: 0 + +# State 78 +# Apply action "C5" +action: 12 + +# State 79 +# Apply action "C8" +action: 24 + +# State 80 +# Apply action "CQ" +action: 40 + +# State 81 +# Apply action "D3" +action: 5 + +# State 82 +# Apply action "C4" +action: 8 + +# State 83 +# Apply action "D4" +action: 9 + +# State 84 +# Apply action "CJ" +action: 36 + +# State 85 +# Apply action "C9" +action: 28 + +# State 86 +# Apply action "CT" +action: 32 + +# State 87 +# Apply action "H6" +action: 18 + +# State 88 +# Apply action "C6" +action: 16 + +# State 89 +# Apply action "D6" +action: 17 + +# State 90 +# Apply action "H4" +action: 10 + +# State 91 +# Vul: None +# S A84 +# H 9 +# D J +# C +# S S +# H J7 H QT8 +# D KQT9 D A75 +# C C +# S QJ2 +# H +# D 82 +# C 7 +# +# West North East South +# 1S 3H 4S +# 5H Dbl Pass 5S +# Pass Pass Pass +# +# N E S W N E S +# HK H2 H5 HA +# S3 S9 ST S7 +# SK S6 S5 H3 +# C3 CK CA C2 +# C5 C8 CQ D3 +# C4 D4 CJ C9 +# CT H6 C6 D6 +# H4 +# +# Declarer tricks: 7 +IsTerminal() = False +History() = [12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78, 52, 52, 52, 46, 2, 14, 50, 7, 31, 35, 23, 47, 19, 15, 6, 4, 44, 48, 0, 12, 24, 40, 5, 8, 9, 36, 28, 32, 18, 16, 17, 10] +HistoryString() = "12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78, 52, 52, 52, 46, 2, 14, 50, 7, 31, 35, 23, 47, 19, 15, 6, 4, 44, 48, 0, 12, 24, 40, 5, 8, 9, 36, 28, 32, 18, 16, 17, 10" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Vul: None\nS A84\nH 9\nD J\nC none\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 \n\nDeclarer tricks: 7" +InformationStateString(1) = "Vul: None\nS none\nH QT8\nD A75\nC none\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 \n\nDeclarer tricks: 7" +InformationStateString(2) = "Vul: None\nS QJ2\nH none\nD 82\nC 7\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 \n\nDeclarer tricks: 7" +InformationStateString(3) = "Vul: None\nS none\nH J7\nD KQT9\nC none\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 \n\nDeclarer tricks: 7" +InformationStateTensor(0): binvec(571, 0x10414880040004810005400021000440000800000000000000000000000000000000000000000000000000000000200000000080000000000020000000000001000000000041000) +InformationStateTensor(1): binvec(571, 0x8414180010108080811400021000440000000000000000000000000000000000000000000800000000000000800000000000200000000000010000000000000000200000041000) +InformationStateTensor(2): binvec(571, 0x10414294000210004401400021000440000000000000000000000000000008000000000000000000000000002000000000000100000000000000002000000000800000000041000) +InformationStateTensor(3): binvec(571, 0x8414480000081109101400021000440000000000000000080000000000000000000000000000000000000001000000000000000020000000008000000000002000000000041000) +ObservationString(0) = "Vul: None\nS A84\nH 9\nD J\nC none\nContract: 5S N\ndefence\ndefence\ndefence\ndeclarer\nTrick 1 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 2 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 3 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 4 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 5 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 6 won by declarer\ndeclarer\ndeclarer\ndeclarer\nCurrent trick: H4 " +ObservationString(1) = "Vul: None\nS none\nH QT8\nD A75\nC none\nContract: 5S N\ndefence\ndefence\ndefence\ndeclarer\nTrick 1 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 2 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 3 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 4 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 5 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 6 won by declarer\ndeclarer\ndeclarer\ndeclarer\nCurrent trick: H4 ?" +ObservationString(2) = "Vul: None\nS QJ2\nH none\nD 82\nC 7\nContract: 5S N\ndefence\ndefence\ndefence\ndeclarer\nTrick 1 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 2 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 3 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 4 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 5 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 6 won by declarer\ndeclarer\ndeclarer\ndeclarer\nCurrent trick: H4 " +ObservationString(3) = "Vul: None\nS none\nH J7\nD KQT9\nC none\nContract: 5S N\ndefence\ndefence\ndefence\ndeclarer\nTrick 1 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 2 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 3 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 4 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 5 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 6 won by declarer\ndeclarer\ndeclarer\ndeclarer\nCurrent trick: H4 " +ObservationTensor(0): binvec(571, 0x10414880040004810005400021000440000800000000000000000000000000000000000000000000000000000000200000000080000000000020000000000001000000000041000) +ObservationTensor(1): binvec(571, 0x8414180010108080811400021000440000000000000000000000000000000000000000000800000000000000800000000000200000000000010000000000000000200000041000) +ObservationTensor(2): binvec(571, 0x10414294000210004401400021000440000000000000000000000000000008000000000000000000000000002000000000000100000000000000002000000000800000000041000) +ObservationTensor(3): binvec(571, 0x8414480000081109101400021000440000000000000000080000000000000000000000000000000000000001000000000000000020000000008000000000002000000000041000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [26, 34, 42] +StringLegalActions() = ["H8", "HT", "HQ"] + +# Apply action "H8" +action: 26 + +# State 92 +# Apply action "S2" +action: 3 + +# State 93 +# Apply action "H7" +action: 22 + +# State 94 +# Apply action "C7" +action: 20 + +# State 95 +# Vul: None +# S A84 +# H 9 +# D J +# C +# S S +# H J H QT +# D KQT9 D A75 +# C C +# S QJ +# H +# D 82 +# C +# +# West North East South +# 1S 3H 4S +# 5H Dbl Pass 5S +# Pass Pass Pass +# +# N E S W N E S +# HK H2 H5 HA +# S3 S9 ST S7 +# SK S6 S5 H3 +# C3 CK CA C2 +# C5 C8 CQ D3 +# C4 D4 CJ C9 +# CT H6 C6 D6 +# H4 H8 S2 H7 +# C7 +# +# Declarer tricks: 8 +IsTerminal() = False +History() = [12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78, 52, 52, 52, 46, 2, 14, 50, 7, 31, 35, 23, 47, 19, 15, 6, 4, 44, 48, 0, 12, 24, 40, 5, 8, 9, 36, 28, 32, 18, 16, 17, 10, 26, 3, 22, 20] +HistoryString() = "12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78, 52, 52, 52, 46, 2, 14, 50, 7, 31, 35, 23, 47, 19, 15, 6, 4, 44, 48, 0, 12, 24, 40, 5, 8, 9, 36, 28, 32, 18, 16, 17, 10, 26, 3, 22, 20" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "Vul: None\nS A84\nH 9\nD J\nC none\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 H8 S2 H7 \n C7 \n\nDeclarer tricks: 8" +InformationStateString(1) = "Vul: None\nS none\nH QT\nD A75\nC none\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 H8 S2 H7 \n C7 \n\nDeclarer tricks: 8" +InformationStateString(2) = "Vul: None\nS QJ\nH none\nD 82\nC none\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 H8 S2 H7 \n C7 \n\nDeclarer tricks: 8" +InformationStateString(3) = "Vul: None\nS none\nH J\nD KQT9\nC none\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 H8 S2 H7 \n C7 \n\nDeclarer tricks: 8" +InformationStateTensor(0): binvec(571, 0x10414880040004810005000001000440000000000000000000000000000000020000000000000000000000080000000000000000800000040000000000000000080000000021000) +InformationStateTensor(1): binvec(571, 0x8414180010100080811000001000440000000000000000000200000000000000000000000000000000000000008000000400000000000000000800000000080000000000021000) +InformationStateTensor(2): binvec(571, 0x10414290000010004401000001000440000002000000000000000000000000000000000000000000000004000000000000000008000000000800000000000000008000000021000) +InformationStateTensor(3): binvec(571, 0x8414480000001109101000001000440000000000000000000000000000000000000000000002000000000000080000000008000000000000000080000004000000000000021000) +ObservationString(0) = "Vul: None\nS A84\nH 9\nD J\nC none\nContract: 5S N\ndefence\ndefence\ndefence\ndeclarer\nTrick 1 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 2 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 3 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 4 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 5 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 6 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 7 won by declarer\ndefence\ndeclarer\ndeclarer\nCurrent trick: C7 " +ObservationString(1) = "Vul: None\nS none\nH QT\nD A75\nC none\nContract: 5S N\ndefence\ndefence\ndefence\ndeclarer\nTrick 1 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 2 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 3 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 4 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 5 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 6 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 7 won by declarer\ndefence\ndeclarer\ndeclarer\nCurrent trick: C7 " +ObservationString(2) = "Vul: None\nS QJ\nH none\nD 82\nC none\nContract: 5S N\ndefence\ndefence\ndefence\ndeclarer\nTrick 1 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 2 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 3 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 4 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 5 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 6 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 7 won by declarer\ndefence\ndeclarer\ndeclarer\nCurrent trick: C7 " +ObservationString(3) = "Vul: None\nS none\nH J\nD KQT9\nC none\nContract: 5S N\ndefence\ndefence\ndefence\ndeclarer\nTrick 1 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 2 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 3 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 4 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 5 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 6 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 7 won by declarer\ndefence\ndeclarer\ndeclarer\nCurrent trick: C7 ?" +ObservationTensor(0): binvec(571, 0x10414880040004810005000001000440000000000000000000000000000000020000000000000000000000080000000000000000800000040000000000000000080000000021000) +ObservationTensor(1): binvec(571, 0x8414180010100080811000001000440000000000000000000200000000000000000000000000000000000000008000000400000000000000000800000000080000000000021000) +ObservationTensor(2): binvec(571, 0x10414290000010004401000001000440000002000000000000000000000000000000000000000000000004000000000000000008000000000800000000000000008000000021000) +ObservationTensor(3): binvec(571, 0x8414480000001109101000001000440000000000000000000000000000000000000000000002000000000000080000000008000000000000000080000004000000000000021000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [29, 33, 38, 41, 45] +StringLegalActions() = ["D9", "DT", "HJ", "DQ", "DK"] + +# Apply action "D9" +action: 29 + +# State 96 +# Vul: None +# S A84 +# H 9 +# D J +# C +# S S +# H J H QT +# D KQT D A75 +# C C +# S QJ +# H +# D 82 +# C +# +# West North East South +# 1S 3H 4S +# 5H Dbl Pass 5S +# Pass Pass Pass +# +# N E S W N E S +# HK H2 H5 HA +# S3 S9 ST S7 +# SK S6 S5 H3 +# C3 CK CA C2 +# C5 C8 CQ D3 +# C4 D4 CJ C9 +# CT H6 C6 D6 +# H4 H8 S2 H7 +# C7 D9 +# +# Declarer tricks: 8 +IsTerminal() = False +History() = [12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78, 52, 52, 52, 46, 2, 14, 50, 7, 31, 35, 23, 47, 19, 15, 6, 4, 44, 48, 0, 12, 24, 40, 5, 8, 9, 36, 28, 32, 18, 16, 17, 10, 26, 3, 22, 20, 29] +HistoryString() = "12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78, 52, 52, 52, 46, 2, 14, 50, 7, 31, 35, 23, 47, 19, 15, 6, 4, 44, 48, 0, 12, 24, 40, 5, 8, 9, 36, 28, 32, 18, 16, 17, 10, 26, 3, 22, 20, 29" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Vul: None\nS A84\nH 9\nD J\nC none\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 H8 S2 H7 \n C7 D9 \n\nDeclarer tricks: 8" +InformationStateString(1) = "Vul: None\nS none\nH QT\nD A75\nC none\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 H8 S2 H7 \n C7 D9 \n\nDeclarer tricks: 8" +InformationStateString(2) = "Vul: None\nS QJ\nH none\nD 82\nC none\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 H8 S2 H7 \n C7 D9 \n\nDeclarer tricks: 8" +InformationStateString(3) = "Vul: None\nS none\nH J\nD KQT\nC none\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 H8 S2 H7 \n C7 D9 \n\nDeclarer tricks: 8" +InformationStateTensor(0): binvec(571, 0x10414880040004810005000001000440000000000000000000000000000000020000000000000010000000080000000000000000800000040000000000000000080000000021000) +InformationStateTensor(1): binvec(571, 0x8414180010100080811000001000440000000000000000000200000000000000100000000000000000000000008000000400000000000000000800000000080000000000021000) +InformationStateTensor(2): binvec(571, 0x10414290000010004401000001000440000002000000000000001000000000000000000000000000000004000000000000000008000000000800000000000000008000000021000) +InformationStateTensor(3): binvec(571, 0x8414480000000109101000001000440000000010000000000000000000000000000000000002000000000000080000000008000000000000000080000004000000000000021000) +ObservationString(0) = "Vul: None\nS A84\nH 9\nD J\nC none\nContract: 5S N\ndefence\ndefence\ndefence\ndeclarer\nTrick 1 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 2 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 3 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 4 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 5 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 6 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 7 won by declarer\ndefence\ndeclarer\ndeclarer\nCurrent trick: C7 D9 ?" +ObservationString(1) = "Vul: None\nS none\nH QT\nD A75\nC none\nContract: 5S N\ndefence\ndefence\ndefence\ndeclarer\nTrick 1 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 2 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 3 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 4 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 5 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 6 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 7 won by declarer\ndefence\ndeclarer\ndeclarer\nCurrent trick: C7 D9 " +ObservationString(2) = "Vul: None\nS QJ\nH none\nD 82\nC none\nContract: 5S N\ndefence\ndefence\ndefence\ndeclarer\nTrick 1 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 2 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 3 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 4 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 5 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 6 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 7 won by declarer\ndefence\ndeclarer\ndeclarer\nCurrent trick: C7 D9 " +ObservationString(3) = "Vul: None\nS none\nH J\nD KQT\nC none\nContract: 5S N\ndefence\ndefence\ndefence\ndeclarer\nTrick 1 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 2 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 3 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 4 won by declarer\ndefence\ndeclarer\ndeclarer\nTrick 5 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 6 won by declarer\ndeclarer\ndeclarer\ndeclarer\nTrick 7 won by declarer\ndefence\ndeclarer\ndeclarer\nCurrent trick: C7 D9 " +ObservationTensor(0): binvec(571, 0x10414880040004810005000001000440000000000000000000000000000000020000000000000010000000080000000000000000800000040000000000000000080000000021000) +ObservationTensor(1): binvec(571, 0x8414180010100080811000001000440000000000000000000200000000000000100000000000000000000000008000000400000000000000000800000000080000000000021000) +ObservationTensor(2): binvec(571, 0x10414290000010004401000001000440000002000000000000001000000000000000000000000000000004000000000000000008000000000800000000000000008000000021000) +ObservationTensor(3): binvec(571, 0x8414480000000109101000001000440000000010000000000000000000000000000000000002000000000000080000000008000000000000000080000004000000000000021000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [11, 27, 30, 37, 51] +StringLegalActions() = ["S4", "S8", "H9", "DJ", "SA"] + +# Apply action "DJ" +action: 37 + +# State 97 +# Apply action "HT" +action: 34 + +# State 98 +# Apply action "D2" +action: 1 + +# State 99 +# Apply action "DT" +action: 33 + +# State 100 +# Apply action "S8" +action: 27 + +# State 101 +# Apply action "D5" +action: 13 + +# State 102 +# Apply action "H9" +action: 30 + +# State 103 +# Apply action "HQ" +action: 42 + +# State 104 +# Apply action "SJ" +action: 39 + +# State 105 +# Apply action "HJ" +action: 38 + +# State 106 +# Apply action "D8" +action: 25 + +# State 107 +# Apply action "DQ" +action: 41 + +# State 108 +# Apply action "S4" +action: 11 + +# State 109 +# Apply action "D7" +action: 21 + +# State 110 +# Apply action "SA" +action: 51 + +# State 111 +# Apply action "DA" +action: 49 + +# State 112 +# Apply action "SQ" +action: 43 + +# State 113 +# Apply action "DK" +action: 45 + +# State 114 +# Vul: None +# S A8543 +# H A94 +# D J +# C AJT5 +# S 76 S 9 +# H J75 H KQT863 +# D KQT9643 D A75 +# C K C 982 +# S KQJT2 +# H 2 +# D 82 +# C Q7643 +# +# West North East South +# 1S 3H 4S +# 5H Dbl Pass 5S +# Pass Pass Pass +# +# N E S W N E S +# HK H2 H5 HA +# S3 S9 ST S7 +# SK S6 S5 H3 +# C3 CK CA C2 +# C5 C8 CQ D3 +# C4 D4 CJ C9 +# CT H6 C6 D6 +# H4 H8 S2 H7 +# C7 D9 DJ HT +# D2 DT S8 D5 +# H9 HQ SJ HJ +# D8 DQ S4 D7 +# SA DA SQ DK +# +# Declarer tricks: 13 +# Score: N/S 510 E/W -510 +IsTerminal() = True +History() = [12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78, 52, 52, 52, 46, 2, 14, 50, 7, 31, 35, 23, 47, 19, 15, 6, 4, 44, 48, 0, 12, 24, 40, 5, 8, 9, 36, 28, 32, 18, 16, 17, 10, 26, 3, 22, 20, 29, 37, 34, 1, 33, 27, 13, 30, 42, 39, 38, 25, 41, 11, 21, 51, 49, 43, 45] +HistoryString() = "12, 31, 39, 45, 11, 6, 35, 41, 51, 0, 4, 17, 27, 34, 8, 19, 50, 46, 40, 14, 15, 26, 1, 33, 30, 28, 43, 22, 37, 21, 25, 5, 36, 18, 16, 9, 10, 24, 47, 38, 7, 42, 2, 44, 48, 13, 3, 29, 32, 49, 20, 23, 58, 67, 73, 77, 53, 52, 78, 52, 52, 52, 46, 2, 14, 50, 7, 31, 35, 23, 47, 19, 15, 6, 4, 44, 48, 0, 12, 24, 40, 5, 8, 9, 36, 28, 32, 18, 16, 17, 10, 26, 3, 22, 20, 29, 37, 34, 1, 33, 27, 13, 30, 42, 39, 38, 25, 41, 11, 21, 51, 49, 43, 45" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Vul: None\n S A8543\n H A94\n D J\n C AJT5\nS 76 S 9\nH J75 H KQT863\nD KQT9643 D A75\nC K C 982\n S KQJT2\n H 2\n D 82\n C Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 H8 S2 H7 \n C7 D9 DJ HT \n D2 DT S8 D5 \nH9 HQ SJ HJ \n D8 DQ S4 D7 \nSA DA SQ DK \n\nDeclarer tricks: 13\nScore: N/S 510 E/W -510" +InformationStateString(1) = "Vul: None\n S A8543\n H A94\n D J\n C AJT5\nS 76 S 9\nH J75 H KQT863\nD KQT9643 D A75\nC K C 982\n S KQJT2\n H 2\n D 82\n C Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 H8 S2 H7 \n C7 D9 DJ HT \n D2 DT S8 D5 \nH9 HQ SJ HJ \n D8 DQ S4 D7 \nSA DA SQ DK \n\nDeclarer tricks: 13\nScore: N/S 510 E/W -510" +InformationStateString(2) = "Vul: None\n S A8543\n H A94\n D J\n C AJT5\nS 76 S 9\nH J75 H KQT863\nD KQT9643 D A75\nC K C 982\n S KQJT2\n H 2\n D 82\n C Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 H8 S2 H7 \n C7 D9 DJ HT \n D2 DT S8 D5 \nH9 HQ SJ HJ \n D8 DQ S4 D7 \nSA DA SQ DK \n\nDeclarer tricks: 13\nScore: N/S 510 E/W -510" +InformationStateString(3) = "Vul: None\n S A8543\n H A94\n D J\n C AJT5\nS 76 S 9\nH J75 H KQT863\nD KQT9643 D A75\nC K C 982\n S KQJT2\n H 2\n D 82\n C Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 H8 S2 H7 \n C7 D9 DJ HT \n D2 DT S8 D5 \nH9 HQ SJ HJ \n D8 DQ S4 D7 \nSA DA SQ DK \n\nDeclarer tricks: 13\nScore: N/S 510 E/W -510" +InformationStateTensor(0): binvec(571, 0x414880000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004000000000001000000000004000000000000100001000) +InformationStateTensor(1): binvec(571, 0x414180000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000040000000000001000000000000004001000) +InformationStateTensor(2): binvec(571, 0x414280000000000000000000000000000000000000000000000000000000000000000000000000000000000000000400000000000010000000000000040000000000010001000) +InformationStateTensor(3): binvec(571, 0x414480000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000400000000000100000000000400001000) +ObservationString(0) = "Vul: None\n S A8543\n H A94\n D J\n C AJT5\nS 76 S 9\nH J75 H KQT863\nD KQT9643 D A75\nC K C 982\n S KQJT2\n H 2\n D 82\n C Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 H8 S2 H7 \n C7 D9 DJ HT \n D2 DT S8 D5 \nH9 HQ SJ HJ \n D8 DQ S4 D7 \nSA DA SQ DK \n\nDeclarer tricks: 13\nScore: N/S 510 E/W -510" +ObservationString(1) = "Vul: None\n S A8543\n H A94\n D J\n C AJT5\nS 76 S 9\nH J75 H KQT863\nD KQT9643 D A75\nC K C 982\n S KQJT2\n H 2\n D 82\n C Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 H8 S2 H7 \n C7 D9 DJ HT \n D2 DT S8 D5 \nH9 HQ SJ HJ \n D8 DQ S4 D7 \nSA DA SQ DK \n\nDeclarer tricks: 13\nScore: N/S 510 E/W -510" +ObservationString(2) = "Vul: None\n S A8543\n H A94\n D J\n C AJT5\nS 76 S 9\nH J75 H KQT863\nD KQT9643 D A75\nC K C 982\n S KQJT2\n H 2\n D 82\n C Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 H8 S2 H7 \n C7 D9 DJ HT \n D2 DT S8 D5 \nH9 HQ SJ HJ \n D8 DQ S4 D7 \nSA DA SQ DK \n\nDeclarer tricks: 13\nScore: N/S 510 E/W -510" +ObservationString(3) = "Vul: None\n S A8543\n H A94\n D J\n C AJT5\nS 76 S 9\nH J75 H KQT863\nD KQT9643 D A75\nC K C 982\n S KQJT2\n H 2\n D 82\n C Q7643\n\nWest North East South\n 1S 3H 4S \n5H Dbl Pass 5S \nPass Pass Pass \n\nN E S W N E S\n HK H2 H5 HA \nS3 S9 ST S7 \n SK S6 S5 H3 \n C3 CK CA C2 \nC5 C8 CQ D3 \n C4 D4 CJ C9 \nCT H6 C6 D6 \nH4 H8 S2 H7 \n C7 D9 DJ HT \n D2 DT S8 D5 \nH9 HQ SJ HJ \n D8 DQ S4 D7 \nSA DA SQ DK \n\nDeclarer tricks: 13\nScore: N/S 510 E/W -510" +ObservationTensor(0): binvec(571, 0x414880000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004000000000001000000000004000000000000100001000) +ObservationTensor(1): binvec(571, 0x414180000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000040000000000001000000000000004001000) +ObservationTensor(2): binvec(571, 0x414280000000000000000000000000000000000000000000000000000000000000000000000000000000000000000400000000000010000000000000040000000000010001000) +ObservationTensor(3): binvec(571, 0x414480000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000400000000000100000000000400001000) +Rewards() = [510, -510, 510, -510] +Returns() = [510, -510, 510, -510] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/bridge.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/bridge.txt new file mode 100644 index 0000000..81aa008 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/bridge.txt @@ -0,0 +1,647 @@ +game: bridge + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Contract Bridge" +GameType.max_num_players = 4 +GameType.min_num_players = 4 +GameType.parameter_specification = ["dealer_vul", "non_dealer_vul", "num_tricks", "use_double_dummy_result"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "bridge" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 90 +PolicyTensorShape() = [90] +MaxChanceOutcomes() = 52 +GetParameters() = {dealer_vul=False,non_dealer_vul=False,num_tricks=2,use_double_dummy_result=True} +NumPlayers() = 4 +MinUtility() = -7600.0 +MaxUtility() = 7600.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [571] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 571 +ObservationTensorShape() = [571] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 571 +MaxGameLength() = 319 +ToString() = "bridge()" + +# State 0 +# Vul: None +# S +# H +# D +# C +# S S +# H H +# D D +# C C +# S +# H +# D +# C +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "Vul: None\nS none\nH none\nD none\nC none\n" +InformationStateString(1) = "Vul: None\nS none\nH none\nD none\nC none\n" +InformationStateString(2) = "Vul: None\nS none\nH none\nD none\nC none\n" +InformationStateString(3) = "Vul: None\nS none\nH none\nD none\nC none\n" +InformationStateTensor(0): zeros(571) +InformationStateTensor(1): zeros(571) +InformationStateTensor(2): zeros(571) +InformationStateTensor(3): zeros(571) +ObservationString(0) = "Vul: None\nS none\nH none\nD none\nC none\n" +ObservationString(1) = "Vul: None\nS none\nH none\nD none\nC none\n" +ObservationString(2) = "Vul: None\nS none\nH none\nD none\nC none\n" +ObservationString(3) = "Vul: None\nS none\nH none\nD none\nC none\n" +ObservationTensor(0): zeros(571) +ObservationTensor(1): zeros(571) +ObservationTensor(2): zeros(571) +ObservationTensor(3): zeros(571) +ChanceOutcomes() = [(0,0.0192308), (1,0.0192308), (2,0.0192308), (3,0.0192308), (4,0.0192308), (5,0.0192308), (6,0.0192308), (7,0.0192308), (8,0.0192308), (9,0.0192308), (10,0.0192308), (11,0.0192308), (12,0.0192308), (13,0.0192308), (14,0.0192308), (15,0.0192308), (16,0.0192308), (17,0.0192308), (18,0.0192308), (19,0.0192308), (20,0.0192308), (21,0.0192308), (22,0.0192308), (23,0.0192308), (24,0.0192308), (25,0.0192308), (26,0.0192308), (27,0.0192308), (28,0.0192308), (29,0.0192308), (30,0.0192308), (31,0.0192308), (32,0.0192308), (33,0.0192308), (34,0.0192308), (35,0.0192308), (36,0.0192308), (37,0.0192308), (38,0.0192308), (39,0.0192308), (40,0.0192308), (41,0.0192308), (42,0.0192308), (43,0.0192308), (44,0.0192308), (45,0.0192308), (46,0.0192308), (47,0.0192308), (48,0.0192308), (49,0.0192308), (50,0.0192308), (51,0.0192308)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51] +StringLegalActions() = ["C2", "D2", "H2", "S2", "C3", "D3", "H3", "S3", "C4", "D4", "H4", "S4", "C5", "D5", "H5", "S5", "C6", "D6", "H6", "S6", "C7", "D7", "H7", "S7", "C8", "D8", "H8", "S8", "C9", "D9", "H9", "S9", "CT", "DT", "HT", "ST", "CJ", "DJ", "HJ", "SJ", "CQ", "DQ", "HQ", "SQ", "CK", "DK", "HK", "SK", "CA", "DA", "HA", "SA"] + +# Apply action "ST" +action: 35 + +# State 1 +# Vul: None +# S T +# H +# D +# C +# S S +# H H +# D D +# C C +# S +# H +# D +# C +IsTerminal() = False +History() = [35] +HistoryString() = "35" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "Vul: None\nS T\nH none\nD none\nC none\n" +InformationStateString(1) = "Vul: None\nS none\nH none\nD none\nC none\n" +InformationStateString(2) = "Vul: None\nS none\nH none\nD none\nC none\n" +InformationStateString(3) = "Vul: None\nS none\nH none\nD none\nC none\n" +InformationStateTensor(0): zeros(571) +InformationStateTensor(1): zeros(571) +InformationStateTensor(2): zeros(571) +InformationStateTensor(3): zeros(571) +ObservationString(0) = "Vul: None\nS T\nH none\nD none\nC none\n" +ObservationString(1) = "Vul: None\nS none\nH none\nD none\nC none\n" +ObservationString(2) = "Vul: None\nS none\nH none\nD none\nC none\n" +ObservationString(3) = "Vul: None\nS none\nH none\nD none\nC none\n" +ObservationTensor(0): zeros(571) +ObservationTensor(1): zeros(571) +ObservationTensor(2): zeros(571) +ObservationTensor(3): zeros(571) +ChanceOutcomes() = [(0,0.0196078), (1,0.0196078), (2,0.0196078), (3,0.0196078), (4,0.0196078), (5,0.0196078), (6,0.0196078), (7,0.0196078), (8,0.0196078), (9,0.0196078), (10,0.0196078), (11,0.0196078), (12,0.0196078), (13,0.0196078), (14,0.0196078), (15,0.0196078), (16,0.0196078), (17,0.0196078), (18,0.0196078), (19,0.0196078), (20,0.0196078), (21,0.0196078), (22,0.0196078), (23,0.0196078), (24,0.0196078), (25,0.0196078), (26,0.0196078), (27,0.0196078), (28,0.0196078), (29,0.0196078), (30,0.0196078), (31,0.0196078), (32,0.0196078), (33,0.0196078), (34,0.0196078), (36,0.0196078), (37,0.0196078), (38,0.0196078), (39,0.0196078), (40,0.0196078), (41,0.0196078), (42,0.0196078), (43,0.0196078), (44,0.0196078), (45,0.0196078), (46,0.0196078), (47,0.0196078), (48,0.0196078), (49,0.0196078), (50,0.0196078), (51,0.0196078)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51] +StringLegalActions() = ["C2", "D2", "H2", "S2", "C3", "D3", "H3", "S3", "C4", "D4", "H4", "S4", "C5", "D5", "H5", "S5", "C6", "D6", "H6", "S6", "C7", "D7", "H7", "S7", "C8", "D8", "H8", "S8", "C9", "D9", "H9", "S9", "CT", "DT", "HT", "CJ", "DJ", "HJ", "SJ", "CQ", "DQ", "HQ", "SQ", "CK", "DK", "HK", "SK", "CA", "DA", "HA", "SA"] + +# Apply action "C7" +action: 20 + +# State 2 +# Apply action "DK" +action: 45 + +# State 3 +# Apply action "S6" +action: 19 + +# State 4 +# Apply action "DA" +action: 49 + +# State 5 +# Apply action "CQ" +action: 40 + +# State 6 +# Apply action "SA" +action: 51 + +# State 7 +# Apply action "S4" +action: 11 + +# State 8 +# Apply action "S2" +action: 3 + +# State 9 +# Apply action "S8" +action: 27 + +# State 10 +# Apply action "SJ" +action: 39 + +# State 11 +# Apply action "SK" +action: 47 + +# State 12 +# Apply action "CK" +action: 44 + +# State 13 +# Apply action "C2" +action: 0 + +# State 14 +# Apply action "D5" +action: 13 + +# State 15 +# Apply action "CJ" +action: 36 + +# State 16 +# Apply action "C4" +action: 8 + +# State 17 +# Apply action "HA" +action: 50 + +# State 18 +# Apply action "D8" +action: 25 + +# State 19 +# Apply action "S5" +action: 15 + +# State 20 +# Apply action "H9" +action: 30 + +# State 21 +# Apply action "H2" +action: 2 + +# State 22 +# Apply action "CT" +action: 32 + +# State 23 +# Apply action "S7" +action: 23 + +# State 24 +# Apply action "CA" +action: 48 + +# State 25 +# Apply action "H4" +action: 10 + +# State 26 +# Apply action "D2" +action: 1 + +# State 27 +# Apply action "HK" +action: 46 + +# State 28 +# Apply action "DQ" +action: 41 + +# State 29 +# Apply action "D4" +action: 9 + +# State 30 +# Apply action "C5" +action: 12 + +# State 31 +# Apply action "D9" +action: 29 + +# State 32 +# Apply action "HQ" +action: 42 + +# State 33 +# Apply action "H8" +action: 26 + +# State 34 +# Apply action "C9" +action: 28 + +# State 35 +# Apply action "DJ" +action: 37 + +# State 36 +# Apply action "HT" +action: 34 + +# State 37 +# Apply action "S3" +action: 7 + +# State 38 +# Apply action "H6" +action: 18 + +# State 39 +# Apply action "C3" +action: 4 + +# State 40 +# Apply action "D6" +action: 17 + +# State 41 +# Apply action "C8" +action: 24 + +# State 42 +# Apply action "DT" +action: 33 + +# State 43 +# Apply action "H3" +action: 6 + +# State 44 +# Apply action "D7" +action: 21 + +# State 45 +# Apply action "H7" +action: 22 + +# State 46 +# Apply action "HJ" +action: 38 + +# State 47 +# Apply action "H5" +action: 14 + +# State 48 +# Apply action "D3" +action: 5 + +# State 49 +# Apply action "S9" +action: 31 + +# State 50 +# Apply action "C6" +action: 16 + +# State 51 +# Apply action "SQ" +action: 43 + +# State 52 +# Vul: None +# S T2 +# H QT9 +# D AQ763 +# C AK4 +# S KQ7654 S 983 +# H K53 H A8742 +# D J9 D 4 +# C J3 C Q872 +# S AJ +# H J6 +# D KT852 +# C T965 +IsTerminal() = False +History() = [35, 20, 45, 19, 49, 40, 51, 11, 3, 27, 39, 47, 44, 0, 13, 36, 8, 50, 25, 15, 30, 2, 32, 23, 48, 10, 1, 46, 41, 9, 12, 29, 42, 26, 28, 37, 34, 7, 18, 4, 17, 24, 33, 6, 21, 22, 38, 14, 5, 31, 16, 43] +HistoryString() = "35, 20, 45, 19, 49, 40, 51, 11, 3, 27, 39, 47, 44, 0, 13, 36, 8, 50, 25, 15, 30, 2, 32, 23, 48, 10, 1, 46, 41, 9, 12, 29, 42, 26, 28, 37, 34, 7, 18, 4, 17, 24, 33, 6, 21, 22, 38, 14, 5, 31, 16, 43" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Vul: None\nS T2\nH QT9\nD AQ763\nC AK4\n" +InformationStateString(1) = "Vul: None\nS 983\nH A8742\nD 4\nC Q872\n" +InformationStateString(2) = "Vul: None\nS AJ\nH J6\nD KT852\nC T965\n" +InformationStateString(3) = "Vul: None\nS KQ7654\nH K53\nD J9\nC J3\n" +InformationStateTensor(0): binvec(571, 0x4500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a402201183460000000000000000000000) +InformationStateTensor(1): binvec(571, 0x45000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000050b00558804010000000000000000000000) +InformationStateTensor(2): binvec(571, 0x45000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000020065024618208000000000000000000000) +InformationStateTensor(3): binvec(571, 0x45000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005098882060980000000000000000000000) +ObservationString(0) = "Vul: None\nS T2\nH QT9\nD AQ763\nC AK4\n" +ObservationString(1) = "Vul: None\nS 983\nH A8742\nD 4\nC Q872\n" +ObservationString(2) = "Vul: None\nS AJ\nH J6\nD KT852\nC T965\n" +ObservationString(3) = "Vul: None\nS KQ7654\nH K53\nD J9\nC J3\n" +ObservationTensor(0): binvec(571, 0x4500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a402201183460000000000000000000000) +ObservationTensor(1): binvec(571, 0x45000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000050b00558804010000000000000000000000) +ObservationTensor(2): binvec(571, 0x45000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000020065024618208000000000000000000000) +ObservationTensor(3): binvec(571, 0x45000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005098882060980000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89] +StringLegalActions() = ["Pass", "1C", "1D", "1H", "1S", "1N", "2C", "2D", "2H", "2S", "2N", "3C", "3D", "3H", "3S", "3N", "4C", "4D", "4H", "4S", "4N", "5C", "5D", "5H", "5S", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "1N" +action: 59 + +# State 53 +# Vul: None +# S T2 +# H QT9 +# D AQ763 +# C AK4 +# S KQ7654 S 983 +# H K53 H A8742 +# D J9 D 4 +# C J3 C Q872 +# S AJ +# H J6 +# D KT852 +# C T965 +# +# West North East South +# 1N +IsTerminal() = False +History() = [35, 20, 45, 19, 49, 40, 51, 11, 3, 27, 39, 47, 44, 0, 13, 36, 8, 50, 25, 15, 30, 2, 32, 23, 48, 10, 1, 46, 41, 9, 12, 29, 42, 26, 28, 37, 34, 7, 18, 4, 17, 24, 33, 6, 21, 22, 38, 14, 5, 31, 16, 43, 59] +HistoryString() = "35, 20, 45, 19, 49, 40, 51, 11, 3, 27, 39, 47, 44, 0, 13, 36, 8, 50, 25, 15, 30, 2, 32, 23, 48, 10, 1, 46, 41, 9, 12, 29, 42, 26, 28, 37, 34, 7, 18, 4, 17, 24, 33, 6, 21, 22, 38, 14, 5, 31, 16, 43, 59" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Vul: None\nS T2\nH QT9\nD AQ763\nC AK4\n\nWest North East South\n 1N " +InformationStateString(1) = "Vul: None\nS 983\nH A8742\nD 4\nC Q872\n\nWest North East South\n 1N ?" +InformationStateString(2) = "Vul: None\nS AJ\nH J6\nD KT852\nC T965\n\nWest North East South\n 1N " +InformationStateString(3) = "Vul: None\nS KQ7654\nH K53\nD J9\nC J3\n\nWest North East South\n 1N " +InformationStateTensor(0): binvec(571, 0x4500000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a402201183460000000000000000000000) +InformationStateTensor(1): binvec(571, 0x45000000000000008000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000050b00558804010000000000000000000000) +InformationStateTensor(2): binvec(571, 0x45000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000020065024618208000000000000000000000) +InformationStateTensor(3): binvec(571, 0x45000000000000020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005098882060980000000000000000000000) +ObservationString(0) = "Vul: None\nS T2\nH QT9\nD AQ763\nC AK4\n\nWest North East South\n 1N " +ObservationString(1) = "Vul: None\nS 983\nH A8742\nD 4\nC Q872\n\nWest North East South\n 1N ?" +ObservationString(2) = "Vul: None\nS AJ\nH J6\nD KT852\nC T965\n\nWest North East South\n 1N " +ObservationString(3) = "Vul: None\nS KQ7654\nH K53\nD J9\nC J3\n\nWest North East South\n 1N " +ObservationTensor(0): binvec(571, 0x4500000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a402201183460000000000000000000000) +ObservationTensor(1): binvec(571, 0x45000000000000008000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000050b00558804010000000000000000000000) +ObservationTensor(2): binvec(571, 0x45000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000020065024618208000000000000000000000) +ObservationTensor(3): binvec(571, 0x45000000000000020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005098882060980000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 53, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89] +StringLegalActions() = ["Pass", "Dbl", "2C", "2D", "2H", "2S", "2N", "3C", "3D", "3H", "3S", "3N", "4C", "4D", "4H", "4S", "4N", "5C", "5D", "5H", "5S", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "Pass" +action: 52 + +# State 54 +# Vul: None +# S T2 +# H QT9 +# D AQ763 +# C AK4 +# S KQ7654 S 983 +# H K53 H A8742 +# D J9 D 4 +# C J3 C Q872 +# S AJ +# H J6 +# D KT852 +# C T965 +# +# West North East South +# 1N Pass +IsTerminal() = False +History() = [35, 20, 45, 19, 49, 40, 51, 11, 3, 27, 39, 47, 44, 0, 13, 36, 8, 50, 25, 15, 30, 2, 32, 23, 48, 10, 1, 46, 41, 9, 12, 29, 42, 26, 28, 37, 34, 7, 18, 4, 17, 24, 33, 6, 21, 22, 38, 14, 5, 31, 16, 43, 59, 52] +HistoryString() = "35, 20, 45, 19, 49, 40, 51, 11, 3, 27, 39, 47, 44, 0, 13, 36, 8, 50, 25, 15, 30, 2, 32, 23, 48, 10, 1, 46, 41, 9, 12, 29, 42, 26, 28, 37, 34, 7, 18, 4, 17, 24, 33, 6, 21, 22, 38, 14, 5, 31, 16, 43, 59, 52" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "Vul: None\nS T2\nH QT9\nD AQ763\nC AK4\n\nWest North East South\n 1N Pass " +InformationStateString(1) = "Vul: None\nS 983\nH A8742\nD 4\nC Q872\n\nWest North East South\n 1N Pass " +InformationStateString(2) = "Vul: None\nS AJ\nH J6\nD KT852\nC T965\n\nWest North East South\n 1N Pass ?" +InformationStateString(3) = "Vul: None\nS KQ7654\nH K53\nD J9\nC J3\n\nWest North East South\n 1N Pass " +InformationStateTensor(0): binvec(571, 0x4500000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a402201183460000000000000000000000) +InformationStateTensor(1): binvec(571, 0x45000000000000008000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000050b00558804010000000000000000000000) +InformationStateTensor(2): binvec(571, 0x45000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000020065024618208000000000000000000000) +InformationStateTensor(3): binvec(571, 0x45000000000000020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005098882060980000000000000000000000) +ObservationString(0) = "Vul: None\nS T2\nH QT9\nD AQ763\nC AK4\n\nWest North East South\n 1N Pass " +ObservationString(1) = "Vul: None\nS 983\nH A8742\nD 4\nC Q872\n\nWest North East South\n 1N Pass " +ObservationString(2) = "Vul: None\nS AJ\nH J6\nD KT852\nC T965\n\nWest North East South\n 1N Pass ?" +ObservationString(3) = "Vul: None\nS KQ7654\nH K53\nD J9\nC J3\n\nWest North East South\n 1N Pass " +ObservationTensor(0): binvec(571, 0x4500000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a402201183460000000000000000000000) +ObservationTensor(1): binvec(571, 0x45000000000000008000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000050b00558804010000000000000000000000) +ObservationTensor(2): binvec(571, 0x45000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000020065024618208000000000000000000000) +ObservationTensor(3): binvec(571, 0x45000000000000020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005098882060980000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89] +StringLegalActions() = ["Pass", "2C", "2D", "2H", "2S", "2N", "3C", "3D", "3H", "3S", "3N", "4C", "4D", "4H", "4S", "4N", "5C", "5D", "5H", "5S", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "3N" +action: 69 + +# State 55 +# Vul: None +# S T2 +# H QT9 +# D AQ763 +# C AK4 +# S KQ7654 S 983 +# H K53 H A8742 +# D J9 D 4 +# C J3 C Q872 +# S AJ +# H J6 +# D KT852 +# C T965 +# +# West North East South +# 1N Pass 3N +IsTerminal() = False +History() = [35, 20, 45, 19, 49, 40, 51, 11, 3, 27, 39, 47, 44, 0, 13, 36, 8, 50, 25, 15, 30, 2, 32, 23, 48, 10, 1, 46, 41, 9, 12, 29, 42, 26, 28, 37, 34, 7, 18, 4, 17, 24, 33, 6, 21, 22, 38, 14, 5, 31, 16, 43, 59, 52, 69] +HistoryString() = "35, 20, 45, 19, 49, 40, 51, 11, 3, 27, 39, 47, 44, 0, 13, 36, 8, 50, 25, 15, 30, 2, 32, 23, 48, 10, 1, 46, 41, 9, 12, 29, 42, 26, 28, 37, 34, 7, 18, 4, 17, 24, 33, 6, 21, 22, 38, 14, 5, 31, 16, 43, 59, 52, 69" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "Vul: None\nS T2\nH QT9\nD AQ763\nC AK4\n\nWest North East South\n 1N Pass 3N " +InformationStateString(1) = "Vul: None\nS 983\nH A8742\nD 4\nC Q872\n\nWest North East South\n 1N Pass 3N " +InformationStateString(2) = "Vul: None\nS AJ\nH J6\nD KT852\nC T965\n\nWest North East South\n 1N Pass 3N " +InformationStateString(3) = "Vul: None\nS KQ7654\nH K53\nD J9\nC J3\n\nWest North East South\n 1N Pass 3N \n?" +InformationStateTensor(0): binvec(571, 0x4500000000000004000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000a402201183460000000000000000000000) +InformationStateTensor(1): binvec(571, 0x45000000000000008000000000000000000000000000020000000000000000000000000000000000000000000000000000000000000050b00558804010000000000000000000000) +InformationStateTensor(2): binvec(571, 0x45000000000000010000000000000000000000000000040000000000000000000000000000000000000000000000000000000000000020065024618208000000000000000000000) +InformationStateTensor(3): binvec(571, 0x45000000000000020000000000000000000000000000008000000000000000000000000000000000000000000000000000000000000005098882060980000000000000000000000) +ObservationString(0) = "Vul: None\nS T2\nH QT9\nD AQ763\nC AK4\n\nWest North East South\n 1N Pass 3N " +ObservationString(1) = "Vul: None\nS 983\nH A8742\nD 4\nC Q872\n\nWest North East South\n 1N Pass 3N " +ObservationString(2) = "Vul: None\nS AJ\nH J6\nD KT852\nC T965\n\nWest North East South\n 1N Pass 3N " +ObservationString(3) = "Vul: None\nS KQ7654\nH K53\nD J9\nC J3\n\nWest North East South\n 1N Pass 3N \n?" +ObservationTensor(0): binvec(571, 0x4500000000000004000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000a402201183460000000000000000000000) +ObservationTensor(1): binvec(571, 0x45000000000000008000000000000000000000000000020000000000000000000000000000000000000000000000000000000000000050b00558804010000000000000000000000) +ObservationTensor(2): binvec(571, 0x45000000000000010000000000000000000000000000040000000000000000000000000000000000000000000000000000000000000020065024618208000000000000000000000) +ObservationTensor(3): binvec(571, 0x45000000000000020000000000000000000000000000008000000000000000000000000000000000000000000000000000000000000005098882060980000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 53, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89] +StringLegalActions() = ["Pass", "Dbl", "4C", "4D", "4H", "4S", "4N", "5C", "5D", "5H", "5S", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "Pass" +action: 52 + +# State 56 +# Vul: None +# S T2 +# H QT9 +# D AQ763 +# C AK4 +# S KQ7654 S 983 +# H K53 H A8742 +# D J9 D 4 +# C J3 C Q872 +# S AJ +# H J6 +# D KT852 +# C T965 +# +# West North East South +# 1N Pass 3N +# Pass +IsTerminal() = False +History() = [35, 20, 45, 19, 49, 40, 51, 11, 3, 27, 39, 47, 44, 0, 13, 36, 8, 50, 25, 15, 30, 2, 32, 23, 48, 10, 1, 46, 41, 9, 12, 29, 42, 26, 28, 37, 34, 7, 18, 4, 17, 24, 33, 6, 21, 22, 38, 14, 5, 31, 16, 43, 59, 52, 69, 52] +HistoryString() = "35, 20, 45, 19, 49, 40, 51, 11, 3, 27, 39, 47, 44, 0, 13, 36, 8, 50, 25, 15, 30, 2, 32, 23, 48, 10, 1, 46, 41, 9, 12, 29, 42, 26, 28, 37, 34, 7, 18, 4, 17, 24, 33, 6, 21, 22, 38, 14, 5, 31, 16, 43, 59, 52, 69, 52" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Vul: None\nS T2\nH QT9\nD AQ763\nC AK4\n\nWest North East South\n 1N Pass 3N \nPass ?" +InformationStateString(1) = "Vul: None\nS 983\nH A8742\nD 4\nC Q872\n\nWest North East South\n 1N Pass 3N \nPass " +InformationStateString(2) = "Vul: None\nS AJ\nH J6\nD KT852\nC T965\n\nWest North East South\n 1N Pass 3N \nPass " +InformationStateString(3) = "Vul: None\nS KQ7654\nH K53\nD J9\nC J3\n\nWest North East South\n 1N Pass 3N \nPass " +InformationStateTensor(0): binvec(571, 0x4500000000000004000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000a402201183460000000000000000000000) +InformationStateTensor(1): binvec(571, 0x45000000000000008000000000000000000000000000020000000000000000000000000000000000000000000000000000000000000050b00558804010000000000000000000000) +InformationStateTensor(2): binvec(571, 0x45000000000000010000000000000000000000000000040000000000000000000000000000000000000000000000000000000000000020065024618208000000000000000000000) +InformationStateTensor(3): binvec(571, 0x45000000000000020000000000000000000000000000008000000000000000000000000000000000000000000000000000000000000005098882060980000000000000000000000) +ObservationString(0) = "Vul: None\nS T2\nH QT9\nD AQ763\nC AK4\n\nWest North East South\n 1N Pass 3N \nPass ?" +ObservationString(1) = "Vul: None\nS 983\nH A8742\nD 4\nC Q872\n\nWest North East South\n 1N Pass 3N \nPass " +ObservationString(2) = "Vul: None\nS AJ\nH J6\nD KT852\nC T965\n\nWest North East South\n 1N Pass 3N \nPass " +ObservationString(3) = "Vul: None\nS KQ7654\nH K53\nD J9\nC J3\n\nWest North East South\n 1N Pass 3N \nPass " +ObservationTensor(0): binvec(571, 0x4500000000000004000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000a402201183460000000000000000000000) +ObservationTensor(1): binvec(571, 0x45000000000000008000000000000000000000000000020000000000000000000000000000000000000000000000000000000000000050b00558804010000000000000000000000) +ObservationTensor(2): binvec(571, 0x45000000000000010000000000000000000000000000040000000000000000000000000000000000000000000000000000000000000020065024618208000000000000000000000) +ObservationTensor(3): binvec(571, 0x45000000000000020000000000000000000000000000008000000000000000000000000000000000000000000000000000000000000005098882060980000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89] +StringLegalActions() = ["Pass", "4C", "4D", "4H", "4S", "4N", "5C", "5D", "5H", "5S", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "Pass" +action: 52 + +# State 57 +# Vul: None +# S T2 +# H QT9 +# D AQ763 +# C AK4 +# S KQ7654 S 983 +# H K53 H A8742 +# D J9 D 4 +# C J3 C Q872 +# S AJ +# H J6 +# D KT852 +# C T965 +# +# West North East South +# 1N Pass 3N +# Pass Pass +IsTerminal() = False +History() = [35, 20, 45, 19, 49, 40, 51, 11, 3, 27, 39, 47, 44, 0, 13, 36, 8, 50, 25, 15, 30, 2, 32, 23, 48, 10, 1, 46, 41, 9, 12, 29, 42, 26, 28, 37, 34, 7, 18, 4, 17, 24, 33, 6, 21, 22, 38, 14, 5, 31, 16, 43, 59, 52, 69, 52, 52] +HistoryString() = "35, 20, 45, 19, 49, 40, 51, 11, 3, 27, 39, 47, 44, 0, 13, 36, 8, 50, 25, 15, 30, 2, 32, 23, 48, 10, 1, 46, 41, 9, 12, 29, 42, 26, 28, 37, 34, 7, 18, 4, 17, 24, 33, 6, 21, 22, 38, 14, 5, 31, 16, 43, 59, 52, 69, 52, 52" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Vul: None\nS T2\nH QT9\nD AQ763\nC AK4\n\nWest North East South\n 1N Pass 3N \nPass Pass " +InformationStateString(1) = "Vul: None\nS 983\nH A8742\nD 4\nC Q872\n\nWest North East South\n 1N Pass 3N \nPass Pass ?" +InformationStateString(2) = "Vul: None\nS AJ\nH J6\nD KT852\nC T965\n\nWest North East South\n 1N Pass 3N \nPass Pass " +InformationStateString(3) = "Vul: None\nS KQ7654\nH K53\nD J9\nC J3\n\nWest North East South\n 1N Pass 3N \nPass Pass " +InformationStateTensor(0): binvec(571, 0x4500000000000004000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000a402201183460000000000000000000000) +InformationStateTensor(1): binvec(571, 0x45000000000000008000000000000000000000000000020000000000000000000000000000000000000000000000000000000000000050b00558804010000000000000000000000) +InformationStateTensor(2): binvec(571, 0x45000000000000010000000000000000000000000000040000000000000000000000000000000000000000000000000000000000000020065024618208000000000000000000000) +InformationStateTensor(3): binvec(571, 0x45000000000000020000000000000000000000000000008000000000000000000000000000000000000000000000000000000000000005098882060980000000000000000000000) +ObservationString(0) = "Vul: None\nS T2\nH QT9\nD AQ763\nC AK4\n\nWest North East South\n 1N Pass 3N \nPass Pass " +ObservationString(1) = "Vul: None\nS 983\nH A8742\nD 4\nC Q872\n\nWest North East South\n 1N Pass 3N \nPass Pass ?" +ObservationString(2) = "Vul: None\nS AJ\nH J6\nD KT852\nC T965\n\nWest North East South\n 1N Pass 3N \nPass Pass " +ObservationString(3) = "Vul: None\nS KQ7654\nH K53\nD J9\nC J3\n\nWest North East South\n 1N Pass 3N \nPass Pass " +ObservationTensor(0): binvec(571, 0x4500000000000004000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000a402201183460000000000000000000000) +ObservationTensor(1): binvec(571, 0x45000000000000008000000000000000000000000000020000000000000000000000000000000000000000000000000000000000000050b00558804010000000000000000000000) +ObservationTensor(2): binvec(571, 0x45000000000000010000000000000000000000000000040000000000000000000000000000000000000000000000000000000000000020065024618208000000000000000000000) +ObservationTensor(3): binvec(571, 0x45000000000000020000000000000000000000000000008000000000000000000000000000000000000000000000000000000000000005098882060980000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 53, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89] +StringLegalActions() = ["Pass", "Dbl", "4C", "4D", "4H", "4S", "4N", "5C", "5D", "5H", "5S", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "Pass" +action: 52 + +# State 58 +# Vul: None +# S T2 +# H QT9 +# D AQ763 +# C AK4 +# S KQ7654 S 983 +# H K53 H A8742 +# D J9 D 4 +# C J3 C Q872 +# S AJ +# H J6 +# D KT852 +# C T965 +# +# West North East South +# 1N Pass 3N +# Pass Pass Pass +# +# Declarer tricks: 8 +# Score: N/S -50 E/W 50 +IsTerminal() = True +History() = [35, 20, 45, 19, 49, 40, 51, 11, 3, 27, 39, 47, 44, 0, 13, 36, 8, 50, 25, 15, 30, 2, 32, 23, 48, 10, 1, 46, 41, 9, 12, 29, 42, 26, 28, 37, 34, 7, 18, 4, 17, 24, 33, 6, 21, 22, 38, 14, 5, 31, 16, 43, 59, 52, 69, 52, 52, 52] +HistoryString() = "35, 20, 45, 19, 49, 40, 51, 11, 3, 27, 39, 47, 44, 0, 13, 36, 8, 50, 25, 15, 30, 2, 32, 23, 48, 10, 1, 46, 41, 9, 12, 29, 42, 26, 28, 37, 34, 7, 18, 4, 17, 24, 33, 6, 21, 22, 38, 14, 5, 31, 16, 43, 59, 52, 69, 52, 52, 52" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Vul: None\n S T2\n H QT9\n D AQ763\n C AK4\nS KQ7654 S 983\nH K53 H A8742\nD J9 D 4\nC J3 C Q872\n S AJ\n H J6\n D KT852\n C T965\n\nWest North East South\n 1N Pass 3N \nPass Pass Pass \n\nDeclarer tricks: 8\nScore: N/S -50 E/W 50" +InformationStateString(1) = "Vul: None\n S T2\n H QT9\n D AQ763\n C AK4\nS KQ7654 S 983\nH K53 H A8742\nD J9 D 4\nC J3 C Q872\n S AJ\n H J6\n D KT852\n C T965\n\nWest North East South\n 1N Pass 3N \nPass Pass Pass \n\nDeclarer tricks: 8\nScore: N/S -50 E/W 50" +InformationStateString(2) = "Vul: None\n S T2\n H QT9\n D AQ763\n C AK4\nS KQ7654 S 983\nH K53 H A8742\nD J9 D 4\nC J3 C Q872\n S AJ\n H J6\n D KT852\n C T965\n\nWest North East South\n 1N Pass 3N \nPass Pass Pass \n\nDeclarer tricks: 8\nScore: N/S -50 E/W 50" +InformationStateString(3) = "Vul: None\n S T2\n H QT9\n D AQ763\n C AK4\nS KQ7654 S 983\nH K53 H A8742\nD J9 D 4\nC J3 C Q872\n S AJ\n H J6\n D KT852\n C T965\n\nWest North East South\n 1N Pass 3N \nPass Pass Pass \n\nDeclarer tricks: 8\nScore: N/S -50 E/W 50" +InformationStateTensor(0): binvec(571, 0x4500000000000004000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000a402201183460000000000000000000000) +InformationStateTensor(1): binvec(571, 0x45000000000000008000000000000000000000000000020000000000000000000000000000000000000000000000000000000000000050b00558804010000000000000000000000) +InformationStateTensor(2): binvec(571, 0x45000000000000010000000000000000000000000000040000000000000000000000000000000000000000000000000000000000000020065024618208000000000000000000000) +InformationStateTensor(3): binvec(571, 0x45000000000000020000000000000000000000000000008000000000000000000000000000000000000000000000000000000000000005098882060980000000000000000000000) +ObservationString(0) = "Vul: None\n S T2\n H QT9\n D AQ763\n C AK4\nS KQ7654 S 983\nH K53 H A8742\nD J9 D 4\nC J3 C Q872\n S AJ\n H J6\n D KT852\n C T965\n\nWest North East South\n 1N Pass 3N \nPass Pass Pass \n\nDeclarer tricks: 8\nScore: N/S -50 E/W 50" +ObservationString(1) = "Vul: None\n S T2\n H QT9\n D AQ763\n C AK4\nS KQ7654 S 983\nH K53 H A8742\nD J9 D 4\nC J3 C Q872\n S AJ\n H J6\n D KT852\n C T965\n\nWest North East South\n 1N Pass 3N \nPass Pass Pass \n\nDeclarer tricks: 8\nScore: N/S -50 E/W 50" +ObservationString(2) = "Vul: None\n S T2\n H QT9\n D AQ763\n C AK4\nS KQ7654 S 983\nH K53 H A8742\nD J9 D 4\nC J3 C Q872\n S AJ\n H J6\n D KT852\n C T965\n\nWest North East South\n 1N Pass 3N \nPass Pass Pass \n\nDeclarer tricks: 8\nScore: N/S -50 E/W 50" +ObservationString(3) = "Vul: None\n S T2\n H QT9\n D AQ763\n C AK4\nS KQ7654 S 983\nH K53 H A8742\nD J9 D 4\nC J3 C Q872\n S AJ\n H J6\n D KT852\n C T965\n\nWest North East South\n 1N Pass 3N \nPass Pass Pass \n\nDeclarer tricks: 8\nScore: N/S -50 E/W 50" +ObservationTensor(0): binvec(571, 0x4500000000000004000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000a402201183460000000000000000000000) +ObservationTensor(1): binvec(571, 0x45000000000000008000000000000000000000000000020000000000000000000000000000000000000000000000000000000000000050b00558804010000000000000000000000) +ObservationTensor(2): binvec(571, 0x45000000000000010000000000000000000000000000040000000000000000000000000000000000000000000000000000000000000020065024618208000000000000000000000) +ObservationTensor(3): binvec(571, 0x45000000000000020000000000000000000000000000008000000000000000000000000000000000000000000000000000000000000005098882060980000000000000000000000) +Rewards() = [-50, 50, -50, 50] +Returns() = [-50, 50, -50, 50] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/bridge_uncontested_bidding-2NT.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/bridge_uncontested_bidding-2NT.txt new file mode 100644 index 0000000..a80a2a0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/bridge_uncontested_bidding-2NT.txt @@ -0,0 +1,108 @@ +game: bridge_uncontested_bidding(subgame=2NT,relative_scoring=True,num_redeals=1,rng_seed=-1) + +GameType.chance_mode = ChanceMode.SAMPLED_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Bridge: Uncontested Bidding" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["num_redeals", "relative_scoring", "rng_seed", "subgame"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = False +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "bridge_uncontested_bidding" +GameType.utility = Utility.IDENTICAL + +NumDistinctActions() = 36 +PolicyTensorShape() = [36] +MaxChanceOutcomes() = 1 +GetParameters() = {num_redeals=1,relative_scoring=True,rng_seed=-1,subgame=2NT} +NumPlayers() = 2 +MinUtility() = -2170.0 +MaxUtility() = 0.0 +UtilitySum() = None +InformationStateTensorShape() = [126] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 126 +MaxGameLength() = 36 +ToString() = "bridge_uncontested_bidding(num_redeals=1,relative_scoring=True,rng_seed=-1,subgame=2NT)" + +# State 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "" +InformationStateString(1) = "" +InformationStateTensor(0): ◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +InformationStateTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +SerializeState() = "" +ChanceOutcomes() = [(0,1)] +LegalActions() = [0] +StringLegalActions() = ["Deal"] + +# Apply action "Deal" +action: 0 + +# State 1 +# QT85.JT7.AKQ.AKQ 643.86.97642.T94 2N +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "QT85.JT7.AKQ.AKQ 2N" +InformationStateString(1) = "643.86.97642.T94 2N" +InformationStateTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◉◯◯◉◯◉◉◯◉◉◉◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◉◉◉◯◉◯◯◯◯◯◉◉◉◯◉◯◯◯◯◉◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +SerializeState() = "QT85.JT7.AKQ.AKQ 643.86.97642.T94 2N" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Pass", "3C", "3D", "3H", "3S", "3N", "4C", "4D", "4H", "4S", "4N", "5C", "5D", "5H", "5S", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "6D" +action: 27 + +# State 2 +# QT85.JT7.AKQ.AKQ 643.86.97642.T94 2N-6D +IsTerminal() = False +History() = [0, 27] +HistoryString() = "0, 27" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "QT85.JT7.AKQ.AKQ 2N-6D" +InformationStateString(1) = "643.86.97642.T94 2N-6D" +InformationStateTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◉◯◯◉◯◉◉◯◉◉◉◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◉◉◉◯◉◯◯◯◯◯◉◉◉◯◉◯◯◯◯◉◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +SerializeState() = "QT85.JT7.AKQ.AKQ 643.86.97642.T94 2N-6D" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Pass", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "Pass" +action: 0 + +# State 3 +# QT85.JT7.AKQ.AKQ 643.86.97642.T94 2N-6D-Pass Score:-200 2N N:-100 3C E:-200 3D N:-50 3D E:-50 3H N:-200 3H E:-200 3S N:-150 3S E:-150 3N N:-150 4C N:-250 4H N:-250 4H E:-250 4S N:-200 4S E:-200 5C N:-300 5C E:-300 5D N:-150 5D E:-150 6C N:-350 6C E:-350 6D N:-200 6D E:-200 6H N:-350 6H E:-350 6S N:-300 6S E:-300 6N N:-300 7C N:-400 7C E:-400 7D N:-250 7D E:-250 7H N:-400 7H E:-400 7S N:-350 7S E:-350 7N N:-350 +IsTerminal() = True +History() = [0, 27, 0] +HistoryString() = "0, 27, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "QT85.JT7.AKQ.AKQ 2N-6D-Pass" +InformationStateString(1) = "643.86.97642.T94 2N-6D-Pass" +InformationStateTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◉◯◯◉◯◉◉◯◉◉◉◯◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◉◉◉◯◉◯◯◯◯◯◉◉◉◯◉◯◯◯◯◉◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +SerializeState() = "QT85.JT7.AKQ.AKQ 643.86.97642.T94 2N-6D-Pass Score:-200 2N N:-100 3C E:-200 3D N:-50 3D E:-50 3H N:-200 3H E:-200 3S N:-150 3S E:-150 3N N:-150 4C N:-250 4H N:-250 4H E:-250 4S N:-200 4S E:-200 5C N:-300 5C E:-300 5D N:-150 5D E:-150 6C N:-350 6C E:-350 6D N:-200 6D E:-200 6H N:-350 6H E:-350 6S N:-300 6S E:-300 6N N:-300 7C N:-400 7C E:-400 7D N:-250 7D E:-250 7H N:-400 7H E:-400 7S N:-350 7S E:-350 7N N:-350" +Rewards() = [-200, -150] +Returns() = [-200, -150] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/bridge_uncontested_bidding.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/bridge_uncontested_bidding.txt new file mode 100644 index 0000000..5711ec3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/bridge_uncontested_bidding.txt @@ -0,0 +1,150 @@ +game: bridge_uncontested_bidding(relative_scoring=True,num_redeals=1,rng_seed=-1) + +GameType.chance_mode = ChanceMode.SAMPLED_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Bridge: Uncontested Bidding" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["num_redeals", "relative_scoring", "rng_seed", "subgame"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = False +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "bridge_uncontested_bidding" +GameType.utility = Utility.IDENTICAL + +NumDistinctActions() = 36 +PolicyTensorShape() = [36] +MaxChanceOutcomes() = 1 +GetParameters() = {num_redeals=1,relative_scoring=True,rng_seed=-1,subgame=} +NumPlayers() = 2 +MinUtility() = -2170.0 +MaxUtility() = 0.0 +UtilitySum() = None +InformationStateTensorShape() = [126] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 126 +MaxGameLength() = 36 +ToString() = "bridge_uncontested_bidding(num_redeals=1,relative_scoring=True,rng_seed=-1)" + +# State 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "" +InformationStateString(1) = "" +InformationStateTensor(0): ◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +InformationStateTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +SerializeState() = "" +ChanceOutcomes() = [(0,1)] +LegalActions() = [0] +StringLegalActions() = ["Deal"] + +# Apply action "Deal" +action: 0 + +# State 1 +# T8643.KQ983.K2.A K972.AJT6.64.853 +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "T8643.KQ983.K2.A " +InformationStateString(1) = "K972.AJT6.64.853 " +InformationStateTensor(0): ◯◉◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +InformationStateTensor(1): ◯◯◯◉◉◯◯◯◯◉◯◯◉◯◯◯◯◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +SerializeState() = "T8643.KQ983.K2.A K972.AJT6.64.853 " +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Pass", "1C", "1D", "1H", "1S", "1N", "2C", "2D", "2H", "2S", "2N", "3C", "3D", "3H", "3S", "3N", "4C", "4D", "4H", "4S", "4N", "5C", "5D", "5H", "5S", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "4C" +action: 16 + +# State 2 +# T8643.KQ983.K2.A K972.AJT6.64.853 4C +IsTerminal() = False +History() = [0, 16] +HistoryString() = "0, 16" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "T8643.KQ983.K2.A 4C" +InformationStateString(1) = "K972.AJT6.64.853 4C" +InformationStateTensor(0): ◯◉◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +InformationStateTensor(1): ◯◯◯◉◉◯◯◯◯◉◯◯◉◯◯◯◯◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +SerializeState() = "T8643.KQ983.K2.A K972.AJT6.64.853 4C" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Pass", "4D", "4H", "4S", "4N", "5C", "5D", "5H", "5S", "5N", "6C", "6D", "6H", "6S", "6N", "7C", "7D", "7H", "7S", "7N"] + +# Apply action "7H" +action: 33 + +# State 3 +# T8643.KQ983.K2.A K972.AJT6.64.853 4C-7H +IsTerminal() = False +History() = [0, 16, 33] +HistoryString() = "0, 16, 33" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "T8643.KQ983.K2.A 4C-7H" +InformationStateString(1) = "K972.AJT6.64.853 4C-7H" +InformationStateTensor(0): ◯◉◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯ +InformationStateTensor(1): ◯◯◯◉◉◯◯◯◯◉◯◯◉◯◯◯◯◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉ +SerializeState() = "T8643.KQ983.K2.A K972.AJT6.64.853 4C-7H" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 34, 35] +StringLegalActions() = ["Pass", "7S", "7N"] + +# Apply action "7S" +action: 34 + +# State 4 +# T8643.KQ983.K2.A K972.AJT6.64.853 4C-7H-7S +IsTerminal() = False +History() = [0, 16, 33, 34] +HistoryString() = "0, 16, 33, 34" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "T8643.KQ983.K2.A 4C-7H-7S" +InformationStateString(1) = "K972.AJT6.64.853 4C-7H-7S" +InformationStateTensor(0): ◯◉◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◉◯ +InformationStateTensor(1): ◯◯◯◉◉◯◯◯◯◉◯◯◉◯◯◯◯◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◉ +SerializeState() = "T8643.KQ983.K2.A K972.AJT6.64.853 4C-7H-7S" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 35] +StringLegalActions() = ["Pass", "7N"] + +# Apply action "Pass" +action: 0 + +# State 5 +# T8643.KQ983.K2.A K972.AJT6.64.853 4C-7H-7S-Pass Score:-150 Passed Out:0 1C N:-150 1C E:-150 1D N:-150 1D E:-150 1H N:170 1H E:170 1S N:170 1S E:170 1N N:-50 1N E:-50 3N N:-150 3N E:-150 4H N:420 4H E:420 4S N:420 4S E:420 5C N:-350 5C E:-350 5D N:-350 5D E:-350 6C N:-400 6C E:-400 6D N:-400 6D E:-400 6H N:-100 6H E:-100 6S N:-100 6S E:-100 6N N:-300 6N E:-300 7C N:-450 7C E:-450 7D N:-450 7D E:-450 7H N:-150 7H E:-150 7S N:-150 7S E:-150 7N N:-350 7N E:-350 +IsTerminal() = True +History() = [0, 16, 33, 34, 0] +HistoryString() = "0, 16, 33, 34, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "T8643.KQ983.K2.A 4C-7H-7S-Pass" +InformationStateString(1) = "K972.AJT6.64.853 4C-7H-7S-Pass" +InformationStateTensor(0): ◯◉◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◉◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◉◯ +InformationStateTensor(1): ◯◯◯◉◉◯◯◯◯◉◯◯◉◯◯◯◯◉◉◯◯◯◯◉◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◉ +SerializeState() = "T8643.KQ983.K2.A K972.AJT6.64.853 4C-7H-7S-Pass Score:-150 Passed Out:0 1C N:-150 1C E:-150 1D N:-150 1D E:-150 1H N:170 1H E:170 1S N:170 1S E:170 1N N:-50 1N E:-50 3N N:-150 3N E:-150 4H N:420 4H E:420 4S N:420 4S E:420 5C N:-350 5C E:-350 5D N:-350 5D E:-350 6C N:-400 6C E:-400 6D N:-400 6D E:-400 6H N:-100 6H E:-100 6S N:-100 6S E:-100 6N N:-300 6N E:-300 7C N:-450 7C E:-450 7D N:-450 7D E:-450 7H N:-150 7H E:-150 7S N:-150 7S E:-150 7N N:-350 7N E:-350" +Rewards() = [-150, -570] +Returns() = [-150, -570] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/cached_tree(game=tic_tac_toe()).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/cached_tree(game=tic_tac_toe()).txt new file mode 100644 index 0000000..2e82364 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/cached_tree(game=tic_tac_toe()).txt @@ -0,0 +1,240 @@ +game: cached_tree(game=tic_tac_toe()) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Turn-based Tic Tac Toe" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["game"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "cached_tree" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 9 +PolicyTensorShape() = [9] +MaxChanceOutcomes() = 0 +GetParameters() = {game=tic_tac_toe()} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [3, 3, 3] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 27 +MaxGameLength() = 9 +ToString() = "cached_tree(game=tic_tac_toe())" + +# State 0 +# ... +# ... +# ... +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "...\n...\n..." +ObservationString(1) = "...\n...\n..." +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,0)", "x(1,1)", "x(1,2)", "x(2,0)", "x(2,1)", "x(2,2)"] + +# Apply action "x(0,1)" +action: 1 + +# State 1 +# .x. +# ... +# ... +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1" +InformationStateString(1) = "1" +ObservationString(0) = ".x.\n...\n..." +ObservationString(1) = ".x.\n...\n..." +ObservationTensor(0): +◉◯◉ ◯◯◯ ◯◉◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +ObservationTensor(1): +◉◯◉ ◯◯◯ ◯◉◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["o(0,0)", "o(0,2)", "o(1,0)", "o(1,1)", "o(1,2)", "o(2,0)", "o(2,1)", "o(2,2)"] + +# Apply action "o(1,2)" +action: 5 + +# State 2 +# .x. +# ..o +# ... +IsTerminal() = False +History() = [1, 5] +HistoryString() = "1, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1, 5" +InformationStateString(1) = "1, 5" +ObservationString(0) = ".x.\n..o\n..." +ObservationString(1) = ".x.\n..o\n..." +ObservationTensor(0): +◉◯◉ ◯◯◯ ◯◉◯ +◉◉◯ ◯◯◉ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +ObservationTensor(1): +◉◯◉ ◯◯◯ ◯◉◯ +◉◉◯ ◯◯◉ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 4, 6, 7, 8] +StringLegalActions() = ["x(0,0)", "x(0,2)", "x(1,0)", "x(1,1)", "x(2,0)", "x(2,1)", "x(2,2)"] + +# Apply action "x(0,2)" +action: 2 + +# State 3 +# .xx +# ..o +# ... +IsTerminal() = False +History() = [1, 5, 2] +HistoryString() = "1, 5, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1, 5, 2" +InformationStateString(1) = "1, 5, 2" +ObservationString(0) = ".xx\n..o\n..." +ObservationString(1) = ".xx\n..o\n..." +ObservationTensor(0): +◉◯◯ ◯◯◯ ◯◉◉ +◉◉◯ ◯◯◉ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +ObservationTensor(1): +◉◯◯ ◯◯◯ ◯◉◉ +◉◉◯ ◯◯◉ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 3, 4, 6, 7, 8] +StringLegalActions() = ["o(0,0)", "o(1,0)", "o(1,1)", "o(2,0)", "o(2,1)", "o(2,2)"] + +# Apply action "o(2,2)" +action: 8 + +# State 4 +# .xx +# ..o +# ..o +IsTerminal() = False +History() = [1, 5, 2, 8] +HistoryString() = "1, 5, 2, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1, 5, 2, 8" +InformationStateString(1) = "1, 5, 2, 8" +ObservationString(0) = ".xx\n..o\n..o" +ObservationString(1) = ".xx\n..o\n..o" +ObservationTensor(0): +◉◯◯ ◯◯◯ ◯◉◉ +◉◉◯ ◯◯◉ ◯◯◯ +◉◉◯ ◯◯◉ ◯◯◯ +ObservationTensor(1): +◉◯◯ ◯◯◯ ◯◉◉ +◉◉◯ ◯◯◉ ◯◯◯ +◉◉◯ ◯◯◉ ◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 3, 4, 6, 7] +StringLegalActions() = ["x(0,0)", "x(1,0)", "x(1,1)", "x(2,0)", "x(2,1)"] + +# Apply action "x(1,1)" +action: 4 + +# State 5 +# .xx +# .xo +# ..o +IsTerminal() = False +History() = [1, 5, 2, 8, 4] +HistoryString() = "1, 5, 2, 8, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1, 5, 2, 8, 4" +InformationStateString(1) = "1, 5, 2, 8, 4" +ObservationString(0) = ".xx\n.xo\n..o" +ObservationString(1) = ".xx\n.xo\n..o" +ObservationTensor(0): +◉◯◯ ◯◯◯ ◯◉◉ +◉◯◯ ◯◯◉ ◯◉◯ +◉◉◯ ◯◯◉ ◯◯◯ +ObservationTensor(1): +◉◯◯ ◯◯◯ ◯◉◉ +◉◯◯ ◯◯◉ ◯◉◯ +◉◉◯ ◯◯◉ ◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 3, 6, 7] +StringLegalActions() = ["o(0,0)", "o(1,0)", "o(2,0)", "o(2,1)"] + +# Apply action "o(2,0)" +action: 6 + +# State 6 +# Apply action "x(2,1)" +action: 7 + +# State 7 +# .xx +# .xo +# oxo +IsTerminal() = True +History() = [1, 5, 2, 8, 4, 6, 7] +HistoryString() = "1, 5, 2, 8, 4, 6, 7" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "1, 5, 2, 8, 4, 6, 7" +InformationStateString(1) = "1, 5, 2, 8, 4, 6, 7" +ObservationString(0) = ".xx\n.xo\noxo" +ObservationString(1) = ".xx\n.xo\noxo" +ObservationTensor(0): +◉◯◯ ◯◯◯ ◯◉◉ +◉◯◯ ◯◯◉ ◯◉◯ +◯◯◯ ◉◯◉ ◯◉◯ +ObservationTensor(1): +◉◯◯ ◯◯◯ ◯◉◉ +◉◯◯ ◯◯◉ ◯◉◯ +◯◯◯ ◉◯◉ ◯◉◯ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/catch.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/catch.txt new file mode 100644 index 0000000..998b49e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/catch.txt @@ -0,0 +1,229 @@ +game: catch + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Catch" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = ["columns", "rows"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "catch" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 3 +PolicyTensorShape() = [3] +MaxChanceOutcomes() = 5 +GetParameters() = {columns=5,rows=10} +NumPlayers() = 1 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = None +ObservationTensorShape() = [10, 5] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 50 +MaxGameLength() = 10 +ToString() = "catch()" + +# State 0 +# ..... +# ..... +# ..... +# ..... +# ..... +# ..... +# ..... +# ..... +# ..... +# ..... +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = ".....\n.....\n.....\n.....\n.....\n.....\n.....\n.....\n.....\n.....\n" +ObservationTensor(0): ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +ChanceOutcomes() = [(0,0.2), (1,0.2), (2,0.2), (3,0.2), (4,0.2)] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["Initialized ball to 0", "Initialized ball to 1", "Initialized ball to 2", "Initialized ball to 3", "Initialized ball to 4"] + +# Apply action "Initialized ball to 1" +action: 1 + +# State 1 +# .o... +# ..... +# ..... +# ..... +# ..... +# ..... +# ..... +# ..... +# ..... +# ..x.. +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = ".o...\n.....\n.....\n.....\n.....\n.....\n.....\n.....\n.....\n..x..\n" +ObservationTensor(0): ◯◉◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◉◯◯ +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["LEFT", "STAY", "RIGHT"] + +# Apply action "STAY" +action: 1 + +# State 2 +# ..... +# .o... +# ..... +# ..... +# ..... +# ..... +# ..... +# ..... +# ..... +# ..x.. +IsTerminal() = False +History() = [1, 1] +HistoryString() = "1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = ".....\n.o...\n.....\n.....\n.....\n.....\n.....\n.....\n.....\n..x..\n" +ObservationTensor(0): ◯◯◯◯◯ + ◯◉◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◉◯◯ +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["LEFT", "STAY", "RIGHT"] + +# Apply action "LEFT" +action: 0 + +# State 3 +# ..... +# ..... +# .o... +# ..... +# ..... +# ..... +# ..... +# ..... +# ..... +# .x... +IsTerminal() = False +History() = [1, 1, 0] +HistoryString() = "1, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = ".....\n.....\n.o...\n.....\n.....\n.....\n.....\n.....\n.....\n.x...\n" +ObservationTensor(0): ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◉◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◉◯◯◯ +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["LEFT", "STAY", "RIGHT"] + +# Apply action "LEFT" +action: 0 + +# State 4 +# Apply action "STAY" +action: 1 + +# State 5 +# Apply action "STAY" +action: 1 + +# State 6 +# Apply action "LEFT" +action: 0 + +# State 7 +# Apply action "LEFT" +action: 0 + +# State 8 +# Apply action "RIGHT" +action: 2 + +# State 9 +# Apply action "STAY" +action: 1 + +# State 10 +# ..... +# ..... +# ..... +# ..... +# ..... +# ..... +# ..... +# ..... +# ..... +# .x... +IsTerminal() = True +History() = [1, 1, 0, 0, 1, 1, 0, 0, 2, 1] +HistoryString() = "1, 1, 0, 0, 1, 1, 0, 0, 2, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = ".....\n.....\n.....\n.....\n.....\n.....\n.....\n.....\n.....\n.x...\n" +ObservationTensor(0): ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◉◯◯◯ +Rewards() = [1] +Returns() = [1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/chat_game.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/chat_game.txt new file mode 100644 index 0000000..36e3a87 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/chat_game.txt @@ -0,0 +1,2308 @@ +game: chat_game + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Chat Game" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["max_utility", "min_utility", "num_distinct_actions", "num_init_states", "num_llm_seeds", "num_max_replies", "num_players", "players", "silence_logging"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = True +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "chat_game" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 1 +GetParameters() = {max_utility=10.0,min_utility=-10.0,num_distinct_actions=2,num_init_states=1,num_llm_seeds=1,num_max_replies=1,num_players=2,players=0,silence_logging=True} +NumPlayers() = 2 +MinUtility() = -10.0 +MaxUtility() = 10.0 +UtilitySum() = None +InformationStateTensorShape() = player_id: [10], private_info: [300], scenario_prompt: [300], senders: [50, 10], receivers: [50, 10], prompt_actions: [50, 300], messages: [50, 300] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 31610 +ObservationTensorShape() = player_id: [10], private_info: [100], dialogue: [100] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 210 +MaxGameLength() = 2 +ToString() = "chat_game(max_utility=10.0,min_utility=-10.0,num_distinct_actions=2,num_init_states=1,num_llm_seeds=1,num_max_replies=1,num_players=2,players=0,silence_logging=True)" + +# State 0 +# Setting up game... +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "\n\nFull Dialogue\n\n" +InformationStateString(1) = "\n\nFull Dialogue\n\n" +InformationStateTensor(0).player_id: ◉◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).private_info: zeros(300) +InformationStateTensor(0).scenario_prompt: zeros(300) +InformationStateTensor(0).senders: ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).receivers: ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).prompt_actions: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +InformationStateTensor(0).messages: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +InformationStateTensor(1).player_id: ◯◉◯◯◯◯◯◯◯◯ +InformationStateTensor(1).private_info: zeros(300) +InformationStateTensor(1).scenario_prompt: zeros(300) +InformationStateTensor(1).senders: ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).receivers: ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).prompt_actions: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +InformationStateTensor(1).messages: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +ObservationString(0) = "Observation (speaker=0:):\n\nThis is a summary of the dialogue. We are happy.\n" +ObservationString(1) = "Observation (speaker=1:):\n\nThis is a summary of the dialogue. We are happy.\n" +PublicObservationString() = "Observation (speaker=0:):\n\nThis is a summary of the dialogue. We are happy.\n" +PrivateObservationString(0) = "Observation (speaker=0:):\n\nThis is a summary of the dialogue. We are happy.\n" +PrivateObservationString(1) = "Observation (speaker=1:):\n\nThis is a summary of the dialogue. We are happy.\n" +ObservationTensor(0).player_id: ◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_info: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).dialogue: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player_id: ◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_info: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).dialogue: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,1)] +LegalActions() = [0] +StringLegalActions() = ["Sampled init state: 0"] + +# Apply action "Sampled init state: 0" +action: 0 + +# State 1 +# +# +# ############################ +# Email: +# from: Bob +# to: Suzy +# cc: Everyone +# ############################ +# +# Hi Suzy, +# +# I hope you are well, +# +# Best, +# +# Bob +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "\n\nFull Dialogue\n\n\n\n############################\nEmail:\nfrom: Bob\nto: Suzy\ncc: Everyone\n############################\n\nHi Suzy,\n\nI hope you are well,\n\nBest,\n\nBob" +InformationStateString(1) = "\n\nFull Dialogue\n\n\n\n############################\nEmail:\nfrom: Bob\nto: Suzy\ncc: Everyone\n############################\n\nHi Suzy,\n\nI hope you are well,\n\nBest,\n\nBob" +InformationStateTensor(0).player_id: ◉◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).private_info: zeros(300) +InformationStateTensor(0).scenario_prompt: zeros(300) +InformationStateTensor(0).senders: ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).receivers: ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).prompt_actions: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +InformationStateTensor(0).messages: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +InformationStateTensor(1).player_id: ◯◉◯◯◯◯◯◯◯◯ +InformationStateTensor(1).private_info: zeros(300) +InformationStateTensor(1).scenario_prompt: zeros(300) +InformationStateTensor(1).senders: ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).receivers: ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).prompt_actions: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +InformationStateTensor(1).messages: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +ObservationString(0) = "Observation (speaker=0:Bob):\n\nThis is a summary of the dialogue. We are happy.\n" +ObservationString(1) = "Observation (speaker=1:Suzy):\n\nThis is a summary of the dialogue. We are happy.\n" +PublicObservationString() = "Observation (speaker=0:Bob):\n\nThis is a summary of the dialogue. We are happy.\n" +PrivateObservationString(0) = "Observation (speaker=0:Bob):\n\nThis is a summary of the dialogue. We are happy.\n" +PrivateObservationString(1) = "Observation (speaker=1:Suzy):\n\nThis is a summary of the dialogue. We are happy.\n" +ObservationTensor(0).player_id: ◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_info: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).dialogue: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player_id: ◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_info: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).dialogue: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7] +StringLegalActions() = ["Action: +int: 0 +dict: {'tone': 'Happy'}", "Action: +int: 1 +dict: {'tone': 'Sad'}", "Action: +int: 2 +dict: {'tone': 'Angry'}", "Action: +int: 3 +dict: {'tone': 'Calm'}", "Action: +int: 4 +dict: {'tone': 'Happy'}", "Action: +int: 5 +dict: {'tone': 'Sad'}", "Action: +int: 6 +dict: {'tone': 'Angry'}", "Action: +int: 7 +dict: {'tone': 'Calm'}"] + +# Apply action "Action: +int: 4 +dict: {'tone': 'Happy'}" +action: 4 + +# State 2 +# +# +# ############################ +# Email: +# from: Bob +# to: Suzy +# cc: Everyone +# ############################ +# +# Hi Suzy, +# +# I hope you are well, +# +# Best, +# +# Bob +IsTerminal() = False +History() = [0, 4] +HistoryString() = "0, 4" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "\n\nFull Dialogue\n\n\n\n############################\nEmail:\nfrom: Bob\nto: Suzy\ncc: Everyone\n############################\n\nHi Suzy,\n\nI hope you are well,\n\nBest,\n\nBob" +InformationStateString(1) = "\n\nFull Dialogue\n\n\n\n############################\nEmail:\nfrom: Bob\nto: Suzy\ncc: Everyone\n############################\n\nHi Suzy,\n\nI hope you are well,\n\nBest,\n\nBob" +InformationStateTensor(0).player_id: ◉◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).private_info: zeros(300) +InformationStateTensor(0).scenario_prompt: zeros(300) +InformationStateTensor(0).senders: ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).receivers: ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).prompt_actions: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +InformationStateTensor(0).messages: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +InformationStateTensor(1).player_id: ◯◉◯◯◯◯◯◯◯◯ +InformationStateTensor(1).private_info: zeros(300) +InformationStateTensor(1).scenario_prompt: zeros(300) +InformationStateTensor(1).senders: ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).receivers: ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).prompt_actions: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +InformationStateTensor(1).messages: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +ObservationString(0) = "Observation (speaker=0:Bob):\n\nThis is a summary of the dialogue. We are happy.\n" +ObservationString(1) = "Observation (speaker=1:Suzy):\n\nThis is a summary of the dialogue. We are happy.\n" +PublicObservationString() = "Observation (speaker=0:Bob):\n\nThis is a summary of the dialogue. We are happy.\n" +PrivateObservationString(0) = "Observation (speaker=0:Bob):\n\nThis is a summary of the dialogue. We are happy.\n" +PrivateObservationString(1) = "Observation (speaker=1:Suzy):\n\nThis is a summary of the dialogue. We are happy.\n" +ObservationTensor(0).player_id: ◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_info: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).dialogue: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player_id: ◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_info: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).dialogue: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,1)] +LegalActions() = [0] +StringLegalActions() = ["Sampled LLM seed: 0"] + +# Apply action "Sampled LLM seed: 0" +action: 0 + +# State 3 +# +# +# ############################ +# Email: +# from: Suzy +# to: Bob +# cc: +# ############################ +# +# +# That all sounds good to me. +IsTerminal() = False +History() = [0, 4, 0] +HistoryString() = "0, 4, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "\n\nFull Dialogue\n\n\n\n############################\nEmail:\nfrom: Bob\nto: Suzy\ncc: Everyone\n############################\n\nHi Suzy,\n\nI hope you are well,\n\nBest,\n\nBob\n\n############################\nEmail:\nfrom: Suzy\nto: Bob\ncc: \n############################\n\n\nThat all sounds good to me.\n" +InformationStateString(1) = "\n\nFull Dialogue\n\n\n\n############################\nEmail:\nfrom: Bob\nto: Suzy\ncc: Everyone\n############################\n\nHi Suzy,\n\nI hope you are well,\n\nBest,\n\nBob\n\n############################\nEmail:\nfrom: Suzy\nto: Bob\ncc: \n############################\n\n\nThat all sounds good to me.\n" +InformationStateTensor(0).player_id: ◉◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).private_info: zeros(300) +InformationStateTensor(0).scenario_prompt: zeros(300) +InformationStateTensor(0).senders: ◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).receivers: ◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).prompt_actions: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +InformationStateTensor(0).messages: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +InformationStateTensor(1).player_id: ◯◉◯◯◯◯◯◯◯◯ +InformationStateTensor(1).private_info: zeros(300) +InformationStateTensor(1).scenario_prompt: zeros(300) +InformationStateTensor(1).senders: ◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).receivers: ◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).prompt_actions: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +InformationStateTensor(1).messages: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +ObservationString(0) = "Observation (speaker=0:Bob):\n\nThis is a summary of the dialogue. We are happy.\n" +ObservationString(1) = "Observation (speaker=1:Suzy):\n\nThis is a summary of the dialogue. We are happy.\n" +PublicObservationString() = "Observation (speaker=0:Bob):\n\nThis is a summary of the dialogue. We are happy.\n" +PrivateObservationString(0) = "Observation (speaker=0:Bob):\n\nThis is a summary of the dialogue. We are happy.\n" +PrivateObservationString(1) = "Observation (speaker=1:Suzy):\n\nThis is a summary of the dialogue. We are happy.\n" +ObservationTensor(0).player_id: ◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_info: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).dialogue: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player_id: ◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_info: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).dialogue: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7] +StringLegalActions() = ["Action: +int: 0 +dict: {'tone': 'Happy'}", "Action: +int: 1 +dict: {'tone': 'Sad'}", "Action: +int: 2 +dict: {'tone': 'Angry'}", "Action: +int: 3 +dict: {'tone': 'Calm'}", "Action: +int: 4 +dict: {'tone': 'Happy'}", "Action: +int: 5 +dict: {'tone': 'Sad'}", "Action: +int: 6 +dict: {'tone': 'Angry'}", "Action: +int: 7 +dict: {'tone': 'Calm'}"] + +# Apply action "Action: +int: 0 +dict: {'tone': 'Happy'}" +action: 0 + +# State 4 +# +# +# ############################ +# Email: +# from: Suzy +# to: Bob +# cc: +# ############################ +# +# +# That all sounds good to me. +IsTerminal() = True +History() = [0, 4, 0, 0] +HistoryString() = "0, 4, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.TERMINAL +InformationStateString(0) = "\n\nFull Dialogue\n\n\n\n############################\nEmail:\nfrom: Bob\nto: Suzy\ncc: Everyone\n############################\n\nHi Suzy,\n\nI hope you are well,\n\nBest,\n\nBob\n\n############################\nEmail:\nfrom: Suzy\nto: Bob\ncc: \n############################\n\n\nThat all sounds good to me.\n" +InformationStateString(1) = "\n\nFull Dialogue\n\n\n\n############################\nEmail:\nfrom: Bob\nto: Suzy\ncc: Everyone\n############################\n\nHi Suzy,\n\nI hope you are well,\n\nBest,\n\nBob\n\n############################\nEmail:\nfrom: Suzy\nto: Bob\ncc: \n############################\n\n\nThat all sounds good to me.\n" +InformationStateTensor(0).player_id: ◉◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).private_info: zeros(300) +InformationStateTensor(0).scenario_prompt: zeros(300) +InformationStateTensor(0).senders: ◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).receivers: ◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).prompt_actions: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +InformationStateTensor(0).messages: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +InformationStateTensor(1).player_id: ◯◉◯◯◯◯◯◯◯◯ +InformationStateTensor(1).private_info: zeros(300) +InformationStateTensor(1).scenario_prompt: zeros(300) +InformationStateTensor(1).senders: ◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).receivers: ◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).prompt_actions: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +InformationStateTensor(1).messages: +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +zeros(300) +ObservationString(0) = "Observation (speaker=0:Bob):\n\nThis is a summary of the dialogue. We are happy.\n" +ObservationString(1) = "Observation (speaker=1:Suzy):\n\nThis is a summary of the dialogue. We are happy.\n" +PublicObservationString() = "Observation (speaker=0:Bob):\n\nThis is a summary of the dialogue. We are happy.\n" +PrivateObservationString(0) = "Observation (speaker=0:Bob):\n\nThis is a summary of the dialogue. We are happy.\n" +PrivateObservationString(1) = "Observation (speaker=1:Suzy):\n\nThis is a summary of the dialogue. We are happy.\n" +ObservationTensor(0).player_id: ◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_info: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).dialogue: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player_id: ◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_info: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).dialogue: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [5, 5] +Returns() = [5, 5] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/checkers.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/checkers.txt new file mode 100644 index 0000000..bfbf134 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/checkers.txt @@ -0,0 +1,1106 @@ +game: checkers + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Checkers" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["columns", "rows"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "checkers" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 512 +PolicyTensorShape() = [512] +MaxChanceOutcomes() = 0 +GetParameters() = {columns=8,rows=8} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [5, 8, 8] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 320 +MaxGameLength() = 1000 +ToString() = "checkers()" + +# State 0 +# 8.+.+.+.+ +# 7+.+.+.+. +# 6.+.+.+.+ +# 5........ +# 4........ +# 3o.o.o.o. +# 2.o.o.o.o +# 1o.o.o.o. +# abcdefgh +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "8.+.+.+.+\n7+.+.+.+.\n6.+.+.+.+\n5........\n4........\n3o.o.o.o.\n2.o.o.o.o\n1o.o.o.o.\n abcdefgh\n" +ObservationString(1) = "8.+.+.+.+\n7+.+.+.+.\n6.+.+.+.+\n5........\n4........\n3o.o.o.o.\n2.o.o.o.o\n1o.o.o.o.\n abcdefgh\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ ◉◯◉◯◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ ◯◉◯◉◯◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ ◉◯◉◯◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◉◯◉◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ +◯◉◯◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ +◉◯◉◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ +ObservationTensor(1): +◯◉◯◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ +◉◯◉◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ +◯◉◯◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ ◯◉◯◉◯◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ ◉◯◉◯◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ ◯◉◯◉◯◉◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [322, 336, 338, 352, 354, 368, 370] +StringLegalActions() = ["a3b4", "c3b4", "c3d4", "e3d4", "e3f4", "g3f4", "g3h4"] + +# Apply action "c3b4" +action: 336 + +# State 1 +# 8.+.+.+.+ +# 7+.+.+.+. +# 6.+.+.+.+ +# 5........ +# 4.o...... +# 3o...o.o. +# 2.o.o.o.o +# 1o.o.o.o. +# abcdefgh +IsTerminal() = False +History() = [336] +HistoryString() = "336" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "336" +InformationStateString(1) = "336" +ObservationString(0) = "8.+.+.+.+\n7+.+.+.+.\n6.+.+.+.+\n5........\n4.o......\n3o...o.o.\n2.o.o.o.o\n1o.o.o.o.\n abcdefgh\n" +ObservationString(1) = "8.+.+.+.+\n7+.+.+.+.\n6.+.+.+.+\n5........\n4.o......\n3o...o.o.\n2.o.o.o.o\n1o.o.o.o.\n abcdefgh\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ ◉◯◉◯◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ ◯◉◯◉◯◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ ◉◯◉◯◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◉◉◯◉◯◉ +◯◉◯◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ +◉◯◉◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ +ObservationTensor(1): +◯◉◯◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ +◉◯◉◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ +◯◉◯◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◉◯◉◯ ◯◉◉◉◯◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ ◉◯◉◯◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ ◯◉◯◉◯◉◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [140, 142, 156, 158, 172, 174, 190] +StringLegalActions() = ["b6c5", "b6a5", "d6e5", "d6c5", "f6g5", "f6e5", "h6g5"] + +# Apply action "f6e5" +action: 174 + +# State 2 +# 8.+.+.+.+ +# 7+.+.+.+. +# 6.+.+...+ +# 5....+... +# 4.o...... +# 3o...o.o. +# 2.o.o.o.o +# 1o.o.o.o. +# abcdefgh +IsTerminal() = False +History() = [336, 174] +HistoryString() = "336, 174" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "336, 174" +InformationStateString(1) = "336, 174" +ObservationString(0) = "8.+.+.+.+\n7+.+.+.+.\n6.+.+...+\n5....+...\n4.o......\n3o...o.o.\n2.o.o.o.o\n1o.o.o.o.\n abcdefgh\n" +ObservationString(1) = "8.+.+.+.+\n7+.+.+.+.\n6.+.+...+\n5....+...\n4.o......\n3o...o.o.\n2.o.o.o.o\n1o.o.o.o.\n abcdefgh\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ ◉◯◉◯◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ ◯◉◯◉◯◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◯◯◉ ◉◯◉◯◉◉◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◉◉◉◉◯◉◉◉ +◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◉◉◯◉◯◉ +◯◉◯◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ +◉◯◉◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ +ObservationTensor(1): +◯◉◯◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ +◉◯◉◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ +◯◉◯◉◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◉◉◯ +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◉◯◉◯ ◯◉◉◉◯◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ ◉◯◉◯◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ ◯◉◯◉◯◉◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [264, 266, 352, 354, 368, 370, 394, 408] +StringLegalActions() = ["b4a5", "b4c5", "e3d4", "e3f4", "g3f4", "g3h4", "b2c3", "d2c3"] + +# Apply action "e3d4" +action: 352 + +# State 3 +# 8.+.+.+.+ +# 7+.+.+.+. +# 6.+.+...+ +# 5....+... +# 4.o.o.... +# 3o.....o. +# 2.o.o.o.o +# 1o.o.o.o. +# abcdefgh +IsTerminal() = False +History() = [336, 174, 352] +HistoryString() = "336, 174, 352" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "336, 174, 352" +InformationStateString(1) = "336, 174, 352" +ObservationString(0) = "8.+.+.+.+\n7+.+.+.+.\n6.+.+...+\n5....+...\n4.o.o....\n3o.....o.\n2.o.o.o.o\n1o.o.o.o.\n abcdefgh\n" +ObservationString(1) = "8.+.+.+.+\n7+.+.+.+.\n6.+.+...+\n5....+...\n4.o.o....\n3o.....o.\n2.o.o.o.o\n1o.o.o.o.\n abcdefgh\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ ◉◯◉◯◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ ◯◉◯◉◯◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◯◯◉ ◉◯◉◯◉◉◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◉◉◉◉◯◉◉◉ +◯◉◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◉◉◉ +◉◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◉ +◯◉◯◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ +◉◯◉◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ +ObservationTensor(1): +◯◉◯◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ +◉◯◉◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ +◯◉◯◉◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◉◉◯ +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◯◯◯ ◉◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉◯ ◯◉◉◉◉◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ ◉◯◉◯◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ ◯◉◯◉◯◉◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [231] +StringLegalActions() = ["e5c3"] + +# Apply action "e5c3" +action: 231 + +# State 4 +# 8.+.+.+.+ +# 7+.+.+.+. +# 6.+.+...+ +# 5........ +# 4.o...... +# 3o.+...o. +# 2.o.o.o.o +# 1o.o.o.o. +# abcdefgh +IsTerminal() = False +History() = [336, 174, 352, 231] +HistoryString() = "336, 174, 352, 231" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "336, 174, 352, 231" +InformationStateString(1) = "336, 174, 352, 231" +ObservationString(0) = "8.+.+.+.+\n7+.+.+.+.\n6.+.+...+\n5........\n4.o......\n3o.+...o.\n2.o.o.o.o\n1o.o.o.o.\n abcdefgh\n" +ObservationString(1) = "8.+.+.+.+\n7+.+.+.+.\n6.+.+...+\n5........\n4.o......\n3o.+...o.\n2.o.o.o.o\n1o.o.o.o.\n abcdefgh\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ ◉◯◉◯◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ ◯◉◯◉◯◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◯◯◉ ◉◯◉◯◉◉◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◉◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◯◉◯◉◉◉◯◉ +◯◉◯◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ +◉◯◉◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ +ObservationTensor(1): +◯◉◯◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ +◉◯◉◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ +◯◉◯◉◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◉◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉◯ ◯◉◯◉◉◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ ◉◯◉◯◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ ◯◉◯◉◯◉◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [395] +StringLegalActions() = ["b2d4"] + +# Apply action "b2d4" +action: 395 + +# State 5 +# 8.+.+.+.+ +# 7+.+.+.+. +# 6.+.+...+ +# 5........ +# 4.o.o.... +# 3o.....o. +# 2...o.o.o +# 1o.o.o.o. +# abcdefgh +IsTerminal() = False +History() = [336, 174, 352, 231, 395] +HistoryString() = "336, 174, 352, 231, 395" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "336, 174, 352, 231, 395" +InformationStateString(1) = "336, 174, 352, 231, 395" +ObservationString(0) = "8.+.+.+.+\n7+.+.+.+.\n6.+.+...+\n5........\n4.o.o....\n3o.....o.\n2...o.o.o\n1o.o.o.o.\n abcdefgh\n" +ObservationString(1) = "8.+.+.+.+\n7+.+.+.+.\n6.+.+...+\n5........\n4.o.o....\n3o.....o.\n2...o.o.o\n1o.o.o.o.\n abcdefgh\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ ◉◯◉◯◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ ◯◉◯◉◯◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◯◯◉ ◉◯◉◯◉◉◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◉◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◉◉◉ +◉◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◉ +◯◯◯◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◉◯◉◯ +◉◯◉◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ +ObservationTensor(1): +◯◉◯◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ +◉◯◉◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯◉ +◯◉◯◉◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◉◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◯◯◯ ◉◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉◯ ◯◉◉◉◉◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◉◯◉ ◉◉◉◯◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◯◉◯ ◯◉◯◉◯◉◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [100, 118, 140, 142, 156, 158, 190] +StringLegalActions() = ["e7f6", "g7f6", "b6c5", "b6a5", "d6e5", "d6c5", "h6g5"] + +# Apply action "d6e5" +action: 156 + +# State 6 +# Apply action "d4f6" +action: 283 + +# State 7 +# Apply action "e7g5" +action: 101 + +# State 8 +# Apply action "f2e3" +action: 424 + +# State 9 +# Apply action "b6c5" +action: 140 + +# State 10 +# Apply action "b4d6" +action: 267 + +# State 11 +# Apply action "c7e5" +action: 85 + +# State 12 +# Apply action "e1f2" +action: 482 + +# State 13 +# Apply action "d8c7" +action: 30 + +# State 14 +# Apply action "e3f4" +action: 354 + +# State 15 +# Apply action "g5e3" +action: 247 + +# State 16 +# Apply action "f2d4" +action: 425 + +# State 17 +# Apply action "d4f6" +action: 283 + +# State 18 +# Apply action "g7e5" +action: 119 + +# State 19 +# 8.+...+.+ +# 7+.+..... +# 6.......+ +# 5....+... +# 4........ +# 3o.....o. +# 2...o...o +# 1o.o...o. +# abcdefgh +IsTerminal() = False +History() = [336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119] +HistoryString() = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119" +InformationStateString(1) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119" +ObservationString(0) = "8.+...+.+\n7+.+.....\n6.......+\n5....+...\n4........\n3o.....o.\n2...o...o\n1o.o...o.\n abcdefgh\n" +ObservationString(1) = "8.+...+.+\n7+.+.....\n6.......+\n5....+...\n4........\n3o.....o.\n2...o...o\n1o.o...o.\n abcdefgh\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉ ◉◯◉◉◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◯◯◯◯ ◯◉◯◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◉◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◉◉◉◉◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◉◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◉ +◯◯◯◉◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉◯ +◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◉◉◯◉ +ObservationTensor(1): +◯◉◯◯◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◉◉◯◉◯ +◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◉◉◉◉ +◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◯ +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉◯ ◯◉◉◉◉◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◉ ◉◉◉◯◉◉◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◯◯◉◯ ◯◉◯◉◉◉◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [322, 368, 370, 408, 410, 450, 464, 496] +StringLegalActions() = ["a3b4", "g3f4", "g3h4", "d2c3", "d2e3", "a1b2", "c1b2", "g1f2"] + +# Apply action "g1f2" +action: 496 + +# State 20 +# Apply action "h6g5" +action: 190 + +# State 21 +# Apply action "g3f4" +action: 368 + +# State 22 +# 8.+...+.+ +# 7+.+..... +# 6........ +# 5....+.+. +# 4.....o.. +# 3o....... +# 2...o.o.o +# 1o.o..... +# abcdefgh +IsTerminal() = False +History() = [336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368] +HistoryString() = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368" +InformationStateString(1) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368" +ObservationString(0) = "8.+...+.+\n7+.+.....\n6........\n5....+.+.\n4.....o..\n3o.......\n2...o.o.o\n1o.o.....\n abcdefgh\n" +ObservationString(1) = "8.+...+.+\n7+.+.....\n6........\n5....+.+.\n4.....o..\n3o.......\n2...o.o.o\n1o.o.....\n abcdefgh\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉ ◉◯◉◉◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◯◯◯◯ ◯◉◯◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◉◯ ◉◉◉◉◯◉◯◉ +◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◉◉ +◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉◉ +◯◯◯◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◉◯◉◯ +◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◉◉◉◉ +ObservationTensor(1): +◯◉◯◯◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◉◉◯◉◯ +◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◯◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ ◉◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◉◯◉ ◉◉◉◯◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◯◯◯◯ ◯◉◯◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [229, 247] +StringLegalActions() = ["e5g3", "g5e3"] + +# Apply action "g5e3" +action: 247 + +# State 23 +# Apply action "e3g1" +action: 357 + +# State 24 +# Apply action "h2g3" +action: 440 + +# State 25 +# Apply action "g1f2" +action: 496 + +# State 26 +# Apply action "g3h4" +action: 370 + +# State 27 +# Apply action "f2e3" +action: 424 + +# State 28 +# Apply action "d2f4" +action: 411 + +# State 29 +# Apply action "f4d6" +action: 297 + +# State 30 +# Apply action "c7e5" +action: 85 + +# State 31 +# Apply action "a1b2" +action: 450 + +# State 32 +# Apply action "a7b6" +action: 68 + +# State 33 +# Apply action "h4g5" +action: 312 + +# State 34 +# Apply action "h8g7" +action: 62 + +# State 35 +# Apply action "c1d2" +action: 466 + +# State 36 +# Apply action "f8e7" +action: 46 + +# State 37 +# Apply action "a3b4" +action: 322 + +# State 38 +# Apply action "g7f6" +action: 118 + +# State 39 +# 8.+...... +# 7....+... +# 6.+...+.. +# 5....+.o. +# 4.o...... +# 3........ +# 2.o.o.... +# 1........ +# abcdefgh +IsTerminal() = False +History() = [336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118] +HistoryString() = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118" +InformationStateString(1) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118" +ObservationString(0) = "8.+......\n7....+...\n6.+...+..\n5....+.o.\n4.o......\n3........\n2.o.o....\n1........\n abcdefgh\n" +ObservationString(1) = "8.+......\n7....+...\n6.+...+..\n5....+.o.\n4.o......\n3........\n2.o.o....\n1........\n abcdefgh\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◉◉◉◉◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯ ◉◯◉◉◉◯◉◉ +◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◉◉◉◉◯◉◯◉ +◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◉◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉◉ +◯◉◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◉◉◯◉◉ +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯ ◉◉◉◉◯◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◯◯◯ ◉◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [242, 264, 266, 392, 394, 408, 410] +StringLegalActions() = ["g5h6", "b4a5", "b4c5", "b2a3", "b2c3", "d2c3", "d2e3"] + +# Apply action "b4a5" +action: 264 + +# State 40 +# Apply action "f6h4" +action: 173 + +# State 41 +# Apply action "a5c7" +action: 195 + +# State 42 +# 8.+...... +# 7..o.+... +# 6........ +# 5....+... +# 4.......+ +# 3........ +# 2.o.o.... +# 1........ +# abcdefgh +IsTerminal() = False +History() = [336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195] +HistoryString() = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195" +InformationStateString(1) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195" +ObservationString(0) = "8.+......\n7..o.+...\n6........\n5....+...\n4.......+\n3........\n2.o.o....\n1........\n abcdefgh\n" +ObservationString(1) = "8.+......\n7..o.+...\n6........\n5....+...\n4.......+\n3........\n2.o.o....\n1........\n abcdefgh\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◉◉◯◉◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◉◉◉◉◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◉◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◉◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◉◉◯◉◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉◉ +◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◯◯◯◯ ◉◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [13] +StringLegalActions() = ["b8d6"] + +# Apply action "b8d6" +action: 13 + +# State 43 +# Apply action "d2e3" +action: 410 + +# State 44 +# Apply action "e5f4" +action: 228 + +# State 45 +# Apply action "e3g5" +action: 355 + +# State 46 +# Apply action "d6c5" +action: 158 + +# State 47 +# Apply action "g5h6" +action: 242 + +# State 48 +# Apply action "h4g3" +action: 318 + +# State 49 +# Apply action "h6g7" +action: 184 + +# State 50 +# Apply action "g3h2" +action: 372 + +# State 51 +# Apply action "g7h8" +action: 114 + +# State 52 +# Apply action "h2g1" +action: 446 + +# State 53 +# Apply action "h8g7" +action: 62 + +# State 54 +# Apply action "e7d6" +action: 102 + +# State 55 +# Apply action "g7f8" +action: 112 + +# State 56 +# Apply action "g1f2" +action: 496 + +# State 57 +# Apply action "b2c3" +action: 394 + +# State 58 +# Apply action "f2g3" +action: 426 + +# State 59 +# 8.....8.. +# 7........ +# 6...+.... +# 5..+..... +# 4........ +# 3..o...*. +# 2........ +# 1........ +# abcdefgh +IsTerminal() = False +History() = [336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426] +HistoryString() = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426" +InformationStateString(1) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426" +ObservationString(0) = "8.....8..\n7........\n6...+....\n5..+.....\n4........\n3..o...*.\n2........\n1........\n abcdefgh\n" +ObservationString(1) = "8.....8..\n7........\n6...+....\n5..+.....\n4........\n3..o...*.\n2........\n1........\n abcdefgh\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◉◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◉◉◯◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◉◉◯◉◉◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉◉ +◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◯◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◉◉◯◉◉◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [44, 46, 336, 338] +StringLegalActions() = ["f8g7", "f8e7", "c3b4", "c3d4"] + +# Apply action "c3b4" +action: 336 + +# State 60 +# Apply action "c5a3" +action: 215 + +# State 61 +# Apply action "f8g7" +action: 44 + +# State 62 +# 8........ +# 7......8. +# 6...+.... +# 5........ +# 4........ +# 3+.....*. +# 2........ +# 1........ +# abcdefgh +IsTerminal() = False +History() = [336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44] +HistoryString() = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44" +InformationStateString(1) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44" +ObservationString(0) = "8........\n7......8.\n6...+....\n5........\n4........\n3+.....*.\n2........\n1........\n abcdefgh\n" +ObservationString(1) = "8........\n7......8.\n6...+....\n5........\n4........\n3+.....*.\n2........\n1........\n abcdefgh\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◉◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯ ◉◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯◉ +◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [156, 158, 324, 368, 370, 372, 374] +StringLegalActions() = ["d6e5", "d6c5", "a3b2", "g3f4", "g3h4", "g3h2", "g3f2"] + +# Apply action "g3h2" +action: 372 + +# State 63 +# Apply action "g7f8" +action: 112 + +# State 64 +# Apply action "d6c5" +action: 158 + +# State 65 +# Apply action "f8g7" +action: 44 + +# State 66 +# Apply action "h2g1" +action: 446 + +# State 67 +# Apply action "g7h6" +action: 116 + +# State 68 +# Apply action "g1f2" +action: 496 + +# State 69 +# Apply action "h6g7" +action: 184 + +# State 70 +# Apply action "f2e1" +action: 430 + +# State 71 +# Apply action "g7h8" +action: 114 + +# State 72 +# Apply action "c5b4" +action: 214 + +# State 73 +# Apply action "h8g7" +action: 62 + +# State 74 +# Apply action "e1d2" +action: 480 + +# State 75 +# Apply action "g7f6" +action: 118 + +# State 76 +# Apply action "a3b2" +action: 324 + +# State 77 +# Apply action "f6e7" +action: 168 + +# State 78 +# Apply action "b4a3" +action: 270 + +# State 79 +# 8........ +# 7....8... +# 6........ +# 5........ +# 4........ +# 3+....... +# 2.+.*.... +# 1........ +# abcdefgh +IsTerminal() = False +History() = [336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44, 372, 112, 158, 44, 446, 116, 496, 184, 430, 114, 214, 62, 480, 118, 324, 168, 270] +HistoryString() = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44, 372, 112, 158, 44, 446, 116, 496, 184, 430, 114, 214, 62, 480, 118, 324, 168, 270" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44, 372, 112, 158, 44, 446, 116, 496, 184, 430, 114, 214, 62, 480, 118, 324, 168, 270" +InformationStateString(1) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44, 372, 112, 158, 44, 446, 116, 496, 184, 430, 114, 214, 62, 480, 118, 324, 168, 270" +ObservationString(0) = "8........\n7....8...\n6........\n5........\n4........\n3+.......\n2.+.*....\n1........\n abcdefgh\n" +ObservationString(1) = "8........\n7....8...\n6........\n5........\n4........\n3+.......\n2.+.*....\n1........\n abcdefgh\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◉◯◯◯◯◯◯ ◉◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉◉ +◯◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [96, 98, 100, 102] +StringLegalActions() = ["e7d8", "e7f8", "e7f6", "e7d6"] + +# Apply action "e7d8" +action: 96 + +# State 80 +# Apply action "d2c3" +action: 408 + +# State 81 +# Apply action "d8e7" +action: 28 + +# State 82 +# 8........ +# 7....8... +# 6........ +# 5........ +# 4........ +# 3+.*..... +# 2.+...... +# 1........ +# abcdefgh +IsTerminal() = False +History() = [336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44, 372, 112, 158, 44, 446, 116, 496, 184, 430, 114, 214, 62, 480, 118, 324, 168, 270, 96, 408, 28] +HistoryString() = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44, 372, 112, 158, 44, 446, 116, 496, 184, 430, 114, 214, 62, 480, 118, 324, 168, 270, 96, 408, 28" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44, 372, 112, 158, 44, 446, 116, 496, 184, 430, 114, 214, 62, 480, 118, 324, 168, 270, 96, 408, 28" +InformationStateString(1) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44, 372, 112, 158, 44, 446, 116, 496, 184, 430, 114, 214, 62, 480, 118, 324, 168, 270, 96, 408, 28" +ObservationString(0) = "8........\n7....8...\n6........\n5........\n4........\n3+.*.....\n2.+......\n1........\n abcdefgh\n" +ObservationString(1) = "8........\n7....8...\n6........\n5........\n4........\n3+.*.....\n2.+......\n1........\n abcdefgh\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◉◯◯◯◯◯◯◯ ◯◉◯◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◉◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◉◉◉◉ +◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [336, 338, 340, 396, 398] +StringLegalActions() = ["c3b4", "c3d4", "c3d2", "b2c1", "b2a1"] + +# Apply action "b2c1" +action: 396 + +# State 83 +# Apply action "e7f6" +action: 100 + +# State 84 +# Apply action "c3b4" +action: 336 + +# State 85 +# Apply action "f6g7" +action: 170 + +# State 86 +# Apply action "c1d2" +action: 466 + +# State 87 +# Apply action "g7f6" +action: 118 + +# State 88 +# Apply action "b4a5" +action: 264 + +# State 89 +# Apply action "f6g7" +action: 170 + +# State 90 +# Apply action "a3b2" +action: 324 + +# State 91 +# Apply action "g7f6" +action: 118 + +# State 92 +# Apply action "a5b4" +action: 196 + +# State 93 +# Apply action "f6g5" +action: 172 + +# State 94 +# Apply action "b4c5" +action: 266 + +# State 95 +# Apply action "g5f6" +action: 240 + +# State 96 +# Apply action "d2c3" +action: 408 + +# State 97 +# Apply action "f6e7" +action: 168 + +# State 98 +# Apply action "c5b6" +action: 208 + +# State 99 +# 8........ +# 7....8... +# 6.*...... +# 5........ +# 4........ +# 3..*..... +# 2.+...... +# 1........ +# abcdefgh +IsTerminal() = False +History() = [336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44, 372, 112, 158, 44, 446, 116, 496, 184, 430, 114, 214, 62, 480, 118, 324, 168, 270, 96, 408, 28, 396, 100, 336, 170, 466, 118, 264, 170, 324, 118, 196, 172, 266, 240, 408, 168, 208] +HistoryString() = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44, 372, 112, 158, 44, 446, 116, 496, 184, 430, 114, 214, 62, 480, 118, 324, 168, 270, 96, 408, 28, 396, 100, 336, 170, 466, 118, 264, 170, 324, 118, 196, 172, 266, 240, 408, 168, 208" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44, 372, 112, 158, 44, 446, 116, 496, 184, 430, 114, 214, 62, 480, 118, 324, 168, 270, 96, 408, 28, 396, 100, 336, 170, 466, 118, 264, 170, 324, 118, 196, 172, 266, 240, 408, 168, 208" +InformationStateString(1) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44, 372, 112, 158, 44, 446, 116, 496, 184, 430, 114, 214, 62, 480, 118, 324, 168, 270, 96, 408, 28, 396, 100, 336, 170, 466, 118, 264, 170, 324, 118, 196, 172, 266, 240, 408, 168, 208" +ObservationString(0) = "8........\n7....8...\n6.*......\n5........\n4........\n3..*.....\n2.+......\n1........\n abcdefgh\n" +ObservationString(1) = "8........\n7....8...\n6.*......\n5........\n4........\n3..*.....\n2.+......\n1........\n abcdefgh\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◯◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉◉ +◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◯◉◉◉◉◉ +◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [96, 98, 100, 102] +StringLegalActions() = ["e7d8", "e7f8", "e7f6", "e7d6"] + +# Apply action "e7f8" +action: 98 + +# State 100 +# Apply action "b2c1" +action: 396 + +# State 101 +# 8.....8.. +# 7........ +# 6.*...... +# 5........ +# 4........ +# 3..*..... +# 2........ +# 1..*..... +# abcdefgh +IsTerminal() = True +History() = [336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44, 372, 112, 158, 44, 446, 116, 496, 184, 430, 114, 214, 62, 480, 118, 324, 168, 270, 96, 408, 28, 396, 100, 336, 170, 466, 118, 264, 170, 324, 118, 196, 172, 266, 240, 408, 168, 208, 98, 396] +HistoryString() = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44, 372, 112, 158, 44, 446, 116, 496, 184, 430, 114, 214, 62, 480, 118, 324, 168, 270, 96, 408, 28, 396, 100, 336, 170, 466, 118, 264, 170, 324, 118, 196, 172, 266, 240, 408, 168, 208, 98, 396" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44, 372, 112, 158, 44, 446, 116, 496, 184, 430, 114, 214, 62, 480, 118, 324, 168, 270, 96, 408, 28, 396, 100, 336, 170, 466, 118, 264, 170, 324, 118, 196, 172, 266, 240, 408, 168, 208, 98, 396" +InformationStateString(1) = "336, 174, 352, 231, 395, 156, 283, 101, 424, 140, 267, 85, 482, 30, 354, 247, 425, 283, 119, 496, 190, 368, 247, 357, 440, 496, 370, 424, 411, 297, 85, 450, 68, 312, 62, 466, 46, 322, 118, 264, 173, 195, 13, 410, 228, 355, 158, 242, 318, 184, 372, 114, 446, 62, 102, 112, 496, 394, 426, 336, 215, 44, 372, 112, 158, 44, 446, 116, 496, 184, 430, 114, 214, 62, 480, 118, 324, 168, 270, 96, 408, 28, 396, 100, 336, 170, 466, 118, 264, 170, 324, 118, 196, 172, 266, 240, 408, 168, 208, 98, 396" +ObservationString(0) = "8.....8..\n7........\n6.*......\n5........\n4........\n3..*.....\n2........\n1..*.....\n abcdefgh\n" +ObservationString(1) = "8.....8..\n7........\n6.*......\n5........\n4........\n3..*.....\n2........\n1..*.....\n abcdefgh\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◯◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◯◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◯◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◯◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/chess.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/chess.txt new file mode 100644 index 0000000..f1eb2ae --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/chess.txt @@ -0,0 +1,2476 @@ +game: chess + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Chess" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["chess960"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "chess" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 4674 +PolicyTensorShape() = [4674] +MaxChanceOutcomes() = 0 +GetParameters() = {chess960=False} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [20, 8, 8] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 1280 +MaxGameLength() = 17695 +ToString() = "chess()" + +# State 0 +# rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1" +ObservationString(1) = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1" +ObservationTensor(0): +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [89, 90, 652, 656, 673, 674, 1257, 1258, 1841, 1842, 2425, 2426, 3009, 3010, 3572, 3576, 3593, 3594, 4177, 4178] +StringLegalActions() = ["a3", "a4", "Na3", "Nc3", "b3", "b4", "c3", "c4", "d3", "d4", "e3", "e4", "f3", "f4", "Nf3", "Nh3", "g3", "g4", "h3", "h4"] + +# Apply action "Nh3" +action: 3576 + +# State 1 +# rnbqkbnr/pppppppp/8/8/8/7N/PPPPPPPP/RNBQKB1R b KQkq - 1 1 +IsTerminal() = False +History() = [3576] +HistoryString() = "3576" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576" +InformationStateString(1) = "3576" +ObservationString(0) = "rnbqkbnr/pppppppp/8/8/8/7N/PPPPPPPP/RNBQKB1R b KQkq - 1 1" +ObservationString(1) = "rnbqkbnr/pppppppp/8/8/8/7N/PPPPPPPP/RNBQKB1R b KQkq - 1 1" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [89, 90, 652, 656, 673, 674, 1257, 1258, 1841, 1842, 2425, 2426, 3009, 3010, 3572, 3576, 3593, 3594, 4177, 4178] +StringLegalActions() = ["a6", "a5", "Na6", "Nc6", "b6", "b5", "c6", "c5", "d6", "d5", "e6", "e5", "f6", "f5", "Nf6", "Nh6", "g6", "g5", "h6", "h5"] + +# Apply action "f6" +action: 3009 + +# State 2 +# rnbqkbnr/ppppp1pp/5p2/8/8/7N/PPPPPPPP/RNBQKB1R w KQkq - 0 2 +IsTerminal() = False +History() = [3576, 3009] +HistoryString() = "3576, 3009" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009" +InformationStateString(1) = "3576, 3009" +ObservationString(0) = "rnbqkbnr/ppppp1pp/5p2/8/8/7N/PPPPPPPP/RNBQKB1R w KQkq - 0 2" +ObservationString(1) = "rnbqkbnr/ppppp1pp/5p2/8/8/7N/PPPPPPPP/RNBQKB1R w KQkq - 0 2" +ObservationTensor(0): +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◯◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◯◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [89, 90, 652, 656, 673, 674, 1257, 1258, 1841, 1842, 2425, 2426, 3009, 3010, 3593, 3594, 4117, 4300, 4301, 4302] +StringLegalActions() = ["a3", "a4", "Na3", "Nc3", "b3", "b4", "c3", "c4", "d3", "d4", "e3", "e4", "f3", "f4", "g3", "g4", "Rg1", "Nf4", "Ng1", "Ng5"] + +# Apply action "f4" +action: 3010 + +# State 3 +# rnbqkbnr/ppppp1pp/5p2/8/5P2/7N/PPPPP1PP/RNBQKB1R b KQkq - 0 2 +IsTerminal() = False +History() = [3576, 3009, 3010] +HistoryString() = "3576, 3009, 3010" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010" +InformationStateString(1) = "3576, 3009, 3010" +ObservationString(0) = "rnbqkbnr/ppppp1pp/5p2/8/5P2/7N/PPPPP1PP/RNBQKB1R b KQkq - 0 2" +ObservationString(1) = "rnbqkbnr/ppppp1pp/5p2/8/5P2/7N/PPPPP1PP/RNBQKB1R b KQkq - 0 2" +ObservationTensor(0): +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◉◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◉◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◉◉◉◉◉◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [89, 90, 652, 656, 673, 674, 1257, 1258, 1841, 1842, 2380, 2425, 2426, 3082, 3576, 3593, 3594, 4177, 4178] +StringLegalActions() = ["a6", "a5", "Na6", "Nc6", "b6", "b5", "c6", "c5", "d6", "d5", "Kf7", "e6", "e5", "f5", "Nh6", "g6", "g5", "h6", "h5"] + +# Apply action "Kf7" +action: 2380 + +# State 4 +# rnbq1bnr/pppppkpp/5p2/8/5P2/7N/PPPPP1PP/RNBQKB1R w KQ - 1 3 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380] +HistoryString() = "3576, 3009, 3010, 2380" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380" +InformationStateString(1) = "3576, 3009, 3010, 2380" +ObservationString(0) = "rnbq1bnr/pppppkpp/5p2/8/5P2/7N/PPPPP1PP/RNBQKB1R w KQ - 1 3" +ObservationString(1) = "rnbq1bnr/pppppkpp/5p2/8/5P2/7N/PPPPP1PP/RNBQKB1R w KQ - 1 3" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [89, 90, 652, 656, 673, 674, 1257, 1258, 1841, 1842, 2380, 2425, 2426, 3155, 3593, 3594, 4117, 4299, 4301, 4302] +StringLegalActions() = ["a3", "a4", "Na3", "Nc3", "b3", "b4", "c3", "c4", "d3", "d4", "Kf2", "e3", "e4", "f5", "g3", "g4", "Rg1", "Nf2", "Ng1", "Ng5+"] + +# Apply action "b3" +action: 673 + +# State 5 +# rnbq1bnr/pppppkpp/5p2/8/5P2/1P5N/P1PPP1PP/RNBQKB1R b KQ - 0 3 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673] +HistoryString() = "3576, 3009, 3010, 2380, 673" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673" +ObservationString(0) = "rnbq1bnr/pppppkpp/5p2/8/5P2/1P5N/P1PPP1PP/RNBQKB1R b KQ - 0 3" +ObservationString(1) = "rnbq1bnr/pppppkpp/5p2/8/5P2/1P5N/P1PPP1PP/RNBQKB1R b KQ - 0 3" +ObservationTensor(0): +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◉◉◯◉◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◉◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◉◉◉◯◉◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◉◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [89, 90, 652, 656, 673, 674, 1257, 1258, 1782, 1841, 1842, 2425, 2426, 3036, 3037, 3050, 3082, 3576, 3593, 3594, 4177, 4178] +StringLegalActions() = ["a6", "a5", "Na6", "Nc6", "b6", "b5", "c6", "c5", "Qe8", "d6", "d5", "e6", "e5", "Ke8", "Kg6", "Ke6", "f5", "Nh6", "g6", "g5", "h6", "h5"] + +# Apply action "Ke6" +action: 3050 + +# State 6 +# Apply action "Bb2" +action: 1225 + +# State 7 +# Apply action "d5" +action: 1842 + +# State 8 +# Apply action "g3" +action: 3593 + +# State 9 +# Apply action "d4" +action: 1987 + +# State 10 +# Apply action "a4" +action: 90 + +# State 11 +# Apply action "Nh6" +action: 3576 + +# State 12 +# Apply action "Bg2" +action: 2964 + +# State 13 +# Apply action "a6" +action: 89 + +# State 14 +# Apply action "e3" +action: 2425 + +# State 15 +# Apply action "Rg8" +action: 4117 + +# State 16 +# Apply action "Ke2" +action: 2352 + +# State 17 +# Apply action "Bd7" +action: 1212 + +# State 18 +# Apply action "Ng5+" +action: 4302 + +# State 19 +# Apply action "fxg5" +action: 3110 + +# State 20 +# rn1q1br1/1ppbp1pp/p3k2n/6p1/P2p1P2/1P2P1P1/1BPPK1BP/RN1Q3R w - - 0 11 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110" +ObservationString(0) = "rn1q1br1/1ppbp1pp/p3k2n/6p1/P2p1P2/1P2P1P1/1BPPK1BP/RN1Q3R w - - 0 11" +ObservationString(1) = "rn1q1br1/1ppbp1pp/p3k2n/6p1/P2p1P2/1P2P1P1/1BPPK1BP/RN1Q3R w - - 0 11" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◉◉◯◯◯◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◉◯◯ ◯◯◯◉◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◉◯◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ + +◯◯◉◯◉◉◉◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◉◉◯◉◯◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◯◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◉◉◉◯◉◉◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◉◯◉◯◯◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◉◉◯◯◯◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◉◯◯ ◯◯◯◉◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◉◯◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ + +◯◯◉◯◉◉◉◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◉◉◯◉◯◉◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◯◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◉◉◉◯◉◉◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◉◯◉◯◯◉ ◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [16, 17, 235, 652, 656, 701, 702, 714, 715, 746, 1257, 1258, 1781, 1782, 1783, 1784, 1841, 2424, 2439, 2453, 2466, 2467, 2498, 2539, 3155, 3183, 3620, 3621, 3630, 3631, 3632, 3633, 3634, 3666, 4115, 4116, 4117, 4177, 4178] +StringLegalActions() = ["Ra2", "Ra3", "a5", "Na3", "Nc3", "Bc3", "Bxd4", "Ba3", "Bc1", "b4", "c3", "c4", "Qc1", "Qe1", "Qf1", "Qg1", "d3", "Ke1", "Kf2", "Kf3", "Kd3", "Kf1", "e4", "exd4", "f5+", "fxg5", "Bf1", "Bh3+", "Bxb7", "Bc6", "Bd5+", "Be4", "Bf3", "g4", "Re1", "Rf1", "Rg1", "h3", "h4"] + +# Apply action "d3" +action: 1841 + +# State 21 +# rn1q1br1/1ppbp1pp/p3k2n/6p1/P2p1P2/1P1PP1P1/1BP1K1BP/RN1Q3R b - - 0 11 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841" +ObservationString(0) = "rn1q1br1/1ppbp1pp/p3k2n/6p1/P2p1P2/1P1PP1P1/1BP1K1BP/RN1Q3R b - - 0 11" +ObservationString(1) = "rn1q1br1/1ppbp1pp/p3k2n/6p1/P2p1P2/1P1PP1P1/1BP1K1BP/RN1Q3R b - - 0 11" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◉◯◉◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◉◯◯ ◯◯◯◉◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◉◯◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ + +◯◯◉◯◉◉◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◯◉◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◉◯◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◉◉◯◉◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◉◉◉◯◉◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◉◯◉◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◉◉◯◉◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◉◯◯ ◯◯◯◉◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◉◯◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ + +◯◯◉◯◉◉◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◯◉◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◉◯◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◉◉◯◉◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◉◉◉◯◉◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◉◯◉◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [16, 162, 656, 673, 674, 1257, 1258, 1781, 1782, 1868, 1880, 1881, 1882, 1883, 2088, 2511, 2512, 2526, 2540, 3534, 3593, 3739, 3780, 4299, 4300, 4302] +StringLegalActions() = ["Ra7", "a5", "Nc6", "b6", "b5", "c6", "c5", "Qc8", "Qe8", "Bc8", "Bxa4", "Bb5", "Bc6", "Be8", "dxe3", "Kd6", "Kf6", "Kf5", "Kf7", "Rh8", "g6", "g4", "gxf4", "Nf7", "Nf5", "Ng4"] + +# Apply action "c6" +action: 1257 + +# State 22 +# Apply action "Ba3" +action: 714 + +# State 23 +# Apply action "b6" +action: 673 + +# State 24 +# Apply action "Bxe7" +action: 193 + +# State 25 +# Apply action "g6" +action: 3593 + +# State 26 +# Apply action "Rf1" +action: 4116 + +# State 27 +# Apply action "Bg7" +action: 2964 + +# State 28 +# Apply action "Bxg5" +action: 2833 + +# State 29 +# Apply action "Nf5" +action: 4300 + +# State 30 +# Apply action "c3" +action: 1257 + +# State 31 +# Apply action "Nh4" +action: 3209 + +# State 32 +# Apply action "Nd2" +action: 654 + +# State 33 +# Apply action "Bh8" +action: 3635 + +# State 34 +# Apply action "Bxd8" +action: 3851 + +# State 35 +# Apply action "Rf8" +action: 3533 + +# State 36 +# Apply action "Nf3" +action: 1895 + +# State 37 +# Apply action "a5" +action: 162 + +# State 38 +# Apply action "Bh3+" +action: 3621 + +# State 39 +# Apply action "Nf5" +action: 4445 + +# State 40 +# rn1B1r1b/3b3p/1pp1k1p1/p4n2/P2p1P2/1PPPPNPB/4K2P/R2Q1R2 w - - 2 21 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445" +ObservationString(0) = "rn1B1r1b/3b3p/1pp1k1p1/p4n2/P2p1P2/1PPPPNPB/4K2P/R2Q1R2 w - - 2 21" +ObservationString(1) = "rn1B1r1b/3b3p/1pp1k1p1/p4n2/P2p1P2/1PPPPNPB/4K2P/R2Q1R2 w - - 2 21" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [16, 17, 30, 31, 746, 1330, 1358, 1768, 1780, 1781, 1782, 1809, 2305, 2306, 2321, 2322, 2323, 2324, 2424, 2438, 2439, 2498, 2539, 2936, 2949, 2950, 2951, 3131, 3132, 3133, 3134, 3136, 3137, 3138, 3666, 4277, 4290, 4291] +StringLegalActions() = ["Ra2", "Ra3", "Rb1", "Rc1", "b4", "c4", "cxd4", "Qd2", "Qb1", "Qc1", "Qe1", "Qc2", "Bxb6", "Bc7", "Be7", "Bf6", "Bg5", "Bh4", "Ke1", "Kd2", "Kf2", "e4", "exd4", "Rf2", "Re1", "Rg1", "Rh1", "Nd2", "Nxd4+", "Ne1", "Ne5", "Nh4", "Ng1", "Ng5+", "g4", "Bg2", "Bxf5+", "Bg4"] + +# Apply action "Bf6" +action: 2322 + +# State 41 +# rn3r1b/3b3p/1pp1kBp1/p4n2/P2p1P2/1PPPPNPB/4K2P/R2Q1R2 b - - 3 21 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322" +ObservationString(0) = "rn3r1b/3b3p/1pp1kBp1/p4n2/P2p1P2/1PPPPNPB/4K2P/R2Q1R2 b - - 3 21" +ObservationString(1) = "rn3r1b/3b3p/1pp1kBp1/p4n2/P2p1P2/1PPPPNPB/4K2P/R2Q1R2 b - - 3 21" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [16, 17, 652, 746, 1330, 1868, 1883, 2088, 2101, 2511, 2512, 2539, 2540, 2936, 2937, 2947, 2948, 2949, 2950, 3666, 4144, 4145, 4177, 4178] +StringLegalActions() = ["Ra7", "Ra6", "Na6", "b5", "c5", "Bc8", "Be8", "dxe3", "dxc3", "Kd6", "Kxf6", "Kd5", "Kf7", "Rf7", "Rxf6", "Rc8", "Rd8", "Re8", "Rg8", "g5", "Bxf6", "Bg7", "h6", "h5"] + +# Apply action "Kd5" +action: 2539 + +# State 42 +# Apply action "Ng1" +action: 3137 + +# State 43 +# Apply action "Nh6" +action: 3208 + +# State 44 +# Apply action "Kf2" +action: 2439 + +# State 45 +# Apply action "Kc5" +action: 2000 + +# State 46 +# Apply action "Ra3" +action: 17 + +# State 47 +# Apply action "Be8" +action: 1883 + +# State 48 +# Apply action "Qh5+" +action: 1799 + +# State 49 +# Apply action "gxh5" +action: 3694 + +# State 50 +# Apply action "Ne2" +action: 3570 + +# State 51 +# Apply action "Kd6" +action: 1445 + +# State 52 +# Apply action "f5" +action: 3155 + +# State 53 +# Apply action "Bxf6" +action: 4144 + +# State 54 +# Apply action "Rh1" +action: 2951 + +# State 55 +# Apply action "Bd7" +action: 2393 + +# State 56 +# Apply action "Nf4" +action: 2481 + +# State 57 +# Apply action "Bg7" +action: 3124 + +# State 58 +# Apply action "Rg1" +action: 4117 + +# State 59 +# Apply action "Ng8" +action: 4301 + +# State 60 +# rn3rn1/3b2bp/1ppk4/p4P1p/P2p1N2/RPPPP1PB/5K1P/6R1 w - - 6 31 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301" +ObservationString(0) = "rn3rn1/3b2bp/1ppk4/p4P1p/P2p1N2/RPPPP1PB/5K1P/6R1 w - - 6 31" +ObservationString(1) = "rn3rn1/3b2bp/1ppk4/p4P1p/P2p1N2/RPPPP1PB/5K1P/6R1 w - - 6 31" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.05941, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [160, 161, 746, 1330, 1358, 2498, 2539, 3008, 3009, 3022, 3023, 3036, 3205, 3206, 3207, 3209, 3210, 3211, 3228, 3520, 3528, 3529, 3530, 3531, 3532, 3533, 3534, 3666, 4276, 4277, 4291] +StringLegalActions() = ["Raa1", "Ra2", "b4", "c4", "cxd4", "e4", "exd4", "Kf1", "Kf3", "Ke2", "Kg2", "Ke1", "Nd5", "Ne2", "Ne6", "Nxh5", "Ng2", "Ng6", "f6", "Rg2", "Rga1", "Rb1", "Rc1", "Rd1", "Re1", "Rf1", "Rh1", "g4", "Bf1", "Bg2", "Bg4"] + +# Apply action "Rd1" +action: 3531 + +# State 61 +# rn3rn1/3b2bp/1ppk4/p4P1p/P2p1N2/RPPPP1PB/5K1P/3R4 b - - 7 31 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531" +ObservationString(0) = "rn3rn1/3b2bp/1ppk4/p4P1p/P2p1N2/RPPPP1PB/5K1P/3R4 b - - 7 31" +ObservationString(1) = "rn3rn1/3b2bp/1ppk4/p4P1p/P2p1N2/RPPPP1PB/5K1P/3R4 b - - 7 31" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.06931, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [16, 17, 652, 746, 1330, 1868, 1869, 1870, 1883, 1941, 1942, 1955, 1956, 2088, 2101, 2936, 2937, 2938, 2947, 2948, 2949, 3570, 3572, 3576, 3621, 3633, 3634, 3635, 4177, 4323] +StringLegalActions() = ["Ra7", "Ra6", "Na6", "b5", "c5", "Bc8", "Be6", "Bxf5", "Be8", "Kc7", "Ke5", "Kc5", "Ke7", "dxe3+", "dxc3", "Rf7", "Rf6", "Rxf5", "Rc8", "Rd8", "Re8", "Ne7", "Nf6", "Nh6", "Bh6", "Be5", "Bf6", "Bh8", "h6", "h4"] + +# Apply action "Be6" +action: 1869 + +# State 62 +# Apply action "Bf1" +action: 4276 + +# State 63 +# Apply action "h4" +action: 4323 + +# State 64 +# Apply action "b4" +action: 746 + +# State 65 +# Apply action "Bc8" +action: 2524 + +# State 66 +# Apply action "Ra2" +action: 161 + +# State 67 +# Apply action "Rf7" +action: 2936 + +# State 68 +# Apply action "Re1" +action: 1782 + +# State 69 +# Apply action "Raa7" +action: 16 + +# State 70 +# Apply action "Be2" +action: 2977 + +# State 71 +# Apply action "Rf8" +action: 3008 + +# State 72 +# Apply action "Kf3" +action: 3009 + +# State 73 +# Apply action "h5" +action: 4178 + +# State 74 +# Apply action "Bf1" +action: 2467 + +# State 75 +# Apply action "dxc3" +action: 2101 + +# State 76 +# Apply action "Ne2" +action: 3206 + +# State 77 +# Apply action "Ne7" +action: 3570 + +# State 78 +# Apply action "Rd1" +action: 2365 + +# State 79 +# Apply action "Nd7" +action: 654 + +# State 80 +# 2b2r2/r2nn1b1/1ppk4/p4P1p/PP5p/2pPPKP1/R3N2P/3R1B2 w - - 4 41 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654" +ObservationString(0) = "2b2r2/r2nn1b1/1ppk4/p4P1p/PP5p/2pPPKP1/R3N2P/3R1B2 w - - 4 41" +ObservationString(1) = "2b2r2/r2nn1b1/1ppk4/p4P1p/PP5p/2pPPKP1/R3N2P/3R1B2 w - - 4 41" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [88, 89, 103, 104, 105, 819, 860, 1768, 1779, 1780, 1781, 1782, 1914, 2474, 2475, 2477, 2478, 2481, 2498, 2964, 2965, 3081, 3082, 3123, 3124, 3228, 3666, 3694, 4177] +StringLegalActions() = ["Raa1", "Ra3", "Rb2", "Rc2", "Rad2", "b5", "bxa5", "Rdd2", "Rda1", "Rb1", "Rc1", "Re1", "d4", "Nc1", "Nxc3", "Nd4", "Ng1", "Nf4", "e4", "Bg2", "Bh3", "Kf2", "Kf4", "Ke4", "Kg2", "f6", "g4", "gxh4", "h3"] + +# Apply action "Re1" +action: 1782 + +# State 81 +# 2b2r2/r2nn1b1/1ppk4/p4P1p/PP5p/2pPPKP1/R3N2P/4RB2 b - - 5 41 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782" +ObservationString(0) = "2b2r2/r2nn1b1/1ppk4/p4P1p/PP5p/2pPPKP1/R3N2P/4RB2 b - - 5 41" +ObservationString(1) = "2b2r2/r2nn1b1/1ppk4/p4P1p/PP5p/2pPPKP1/R3N2P/4RB2 b - - 5 41" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [88, 89, 103, 104, 263, 746, 1224, 1225, 1330, 1549, 1890, 1893, 1895, 1897, 1914, 1941, 1942, 2477, 2478, 2479, 2481, 2936, 2937, 2938, 2948, 2949, 2950, 2951, 3621, 3632, 3633, 3634, 3635, 4396, 4437] +StringLegalActions() = ["Ra8", "Ra6", "Rb7", "Rc7", "axb4", "b5", "Ba6", "Bb7", "c5", "c2", "Nb8", "Nc5", "Nf6", "Ne5+", "Kd5", "Kc7", "Ke5", "Nd5", "Ng8", "Ng6", "Nxf5", "Rf7", "Rf6", "Rxf5+", "Rd8", "Re8", "Rg8", "Rh8", "Bh6", "Bd4", "Be5", "Bf6", "Bh8", "h3", "hxg3"] + +# Apply action "Rb7" +action: 103 + +# State 82 +# Apply action "Kf2" +action: 3081 + +# State 83 +# Apply action "h3" +action: 4396 + +# State 84 +# Apply action "Rb1" +action: 2363 + +# State 85 +# Apply action "c2" +action: 1549 + +# State 86 +# Apply action "Nd4" +action: 2477 + +# State 87 +# Apply action "axb4" +action: 263 + +# State 88 +# Apply action "Ke2" +action: 3022 + +# State 89 +# Apply action "Nxf5" +action: 2481 + +# State 90 +# Apply action "Kf2" +action: 2439 + +# State 91 +# Apply action "Nxg3+" +action: 3211 + +# State 92 +# Apply action "Nf3" +action: 2040 + +# State 93 +# Apply action "Bh8" +action: 3635 + +# State 94 +# Apply action "Rbb2" +action: 600 + +# State 95 +# Apply action "Ke6" +action: 1928 + +# State 96 +# Apply action "Bxh3+" +action: 2965 + +# State 97 +# Apply action "Nf5" +action: 3936 + +# State 98 +# Apply action "Ne5" +action: 3134 + +# State 99 +# Apply action "Nb8" +action: 1890 + +# State 100 +# 1nb2r1b/1r6/1pp1k3/4Nn1p/Pp6/3PP2B/RRp2K1P/8 w - - 3 51 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890" +ObservationString(0) = "1nb2r1b/1r6/1pp1k3/4Nn1p/Pp6/3PP2B/RRp2K1P/8 w - - 3 51" +ObservationString(1) = "1nb2r1b/1r6/1pp1k3/4Nn1p/Pp6/3PP2B/RRp2K1P/8 w - - 3 51" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [88, 89, 235, 672, 673, 674, 687, 1914, 2498, 2693, 2694, 2696, 2697, 2698, 2699, 2700, 3008, 3009, 3022, 3023, 3036, 3051, 4276, 4277, 4290, 4291] +StringLegalActions() = ["Ra1", "Ra3", "a5", "Rb1", "Rb3", "Rxb4", "Rxc2", "d4", "e4", "Nc4", "Nxc6", "Nd7", "Ng4", "Ng6", "Nf3", "Nf7", "Kf1", "Kf3", "Ke2", "Kg2", "Ke1", "Kg1", "Bf1", "Bg2", "Bxf5+", "Bg4"] + +# Apply action "e4" +action: 2498 + +# State 101 +# 1nb2r1b/1r6/1pp1k3/4Nn1p/Pp2P3/3P3B/RRp2K1P/8 b - - 0 51 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498" +ObservationString(0) = "1nb2r1b/1r6/1pp1k3/4Nn1p/Pp2P3/3P3B/RRp2K1P/8 b - - 0 51" +ObservationString(1) = "1nb2r1b/1r6/1pp1k3/4Nn1p/Pp2P3/3P3B/RRp2K1P/8 b - - 0 51" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◉◯◯◯ ◯◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ + +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◉◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◯◉◉◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◯◉◯◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◯◉◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◉◯◯◯ ◯◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ + +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◉◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◯◉◉◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◯◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◯◉◯◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◯◉◉◯◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [652, 654, 686, 687, 688, 689, 690, 691, 692, 746, 892, 1212, 1330, 1606, 1609, 1612, 1622, 2497, 2498, 2511, 2512, 2936, 2937, 2948, 2949, 2950, 4143, 4144, 4145, 4323] +StringLegalActions() = ["Na6", "Nd7", "Ra7", "Rc7", "Rd7", "Re7", "Rbf7", "Rg7", "Rh7", "b5", "b3", "Bd7", "c5", "c1=R", "c1=B", "c1=N", "c1=Q", "Ke7", "Kxe5", "Kd6", "Kf6", "Rff7", "Rf6", "Rd8", "Re8", "Rg8", "Bxe5", "Bf6", "Bg7", "h4"] + +# Apply action "Bf6" +action: 4144 + +# State 102 +# Apply action "Kf3" +action: 3009 + +# State 103 +# Apply action "Bg5" +action: 3110 + +# State 104 +# Apply action "Ng4" +action: 2697 + +# State 105 +# Apply action "Kd6" +action: 2511 + +# State 106 +# Apply action "Kf2" +action: 3081 + +# State 107 +# Apply action "Re7" +action: 689 + +# State 108 +# Apply action "Ke2" +action: 3022 + +# State 109 +# Apply action "Bh4" +action: 3767 + +# State 110 +# Apply action "Rxc2" +action: 687 + +# State 111 +# Apply action "Bg3" +action: 4437 + +# State 112 +# Apply action "Rd2" +action: 1271 + +# State 113 +# Apply action "Rd7" +action: 2438 + +# State 114 +# Apply action "a5" +action: 235 + +# State 115 +# Apply action "c5" +action: 1330 + +# State 116 +# Apply action "Ra4" +action: 90 + +# State 117 +# Apply action "Rb7" +action: 1853 + +# State 118 +# Apply action "Ra3" +action: 234 + +# State 119 +# Apply action "Nd7" +action: 654 + +# State 120 +# 2b2r2/1r1n4/1p1k4/P1p2n1p/1p2P1N1/R2P2bB/3RK2P/8 w - - 4 61 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654" +ObservationString(0) = "2b2r2/1r1n4/1p1k4/P1p2n1p/1p2P1N1/R2P2bB/3RK2P/8 w - - 4 61" +ObservationString(1) = "2b2r2/1r1n4/1p1k4/P1p2n1p/1p2P1N1/R2P2bB/3RK2P/8 w - - 4 61" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [160, 161, 162, 176, 177, 308, 336, 1840, 1852, 1853, 1854, 1914, 2452, 2453, 2467, 2571, 2599, 3788, 3789, 3790, 3791, 3795, 4218, 4276, 4277] +StringLegalActions() = ["Ra1", "Raa2", "Ra4", "Rb3", "Rc3", "a6", "axb6", "Rd1", "Rda2", "Rb2", "Rc2", "d4", "Kd1", "Kf3", "Kf1", "e5+", "exf5", "Ne3", "Ne5", "Nf2", "Nf6", "Nh6", "hxg3", "Bf1", "Bg2"] + +# Apply action "Rb2" +action: 1853 + +# State 121 +# 2b2r2/1r1n4/1p1k4/P1p2n1p/1p2P1N1/R2P2bB/1R2K2P/8 b - - 5 61 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853" +ObservationString(0) = "2b2r2/1r1n4/1p1k4/P1p2n1p/1p2P1N1/R2P2bB/1R2K2P/8 b - - 5 61" +ObservationString(1) = "2b2r2/1r1n4/1p1k4/P1p2n1p/1p2P1N1/R2P2bB/1R2K2P/8 b - - 5 61" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0495, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [672, 686, 687, 746, 787, 892, 933, 1403, 1890, 1895, 1897, 1927, 1928, 1941, 1956, 2936, 2937, 2948, 2949, 2950, 2951, 3205, 3206, 3207, 3208, 3209, 3210, 3911, 3912, 3913, 3925, 3926, 3927, 4323, 4364] +StringLegalActions() = ["Rb8", "Ra7", "Rc7", "b5", "bxa5", "b3", "bxa3", "c4", "Nb8", "Nf6", "Ne5", "Kc6", "Ke6", "Kc7", "Ke7", "Rf7", "Rf6", "Rd8", "Re8", "Rg8", "Rh8", "Nd4+", "Ne7", "Ne3", "Nh6", "Nh4", "Ng7", "Be5", "Bf4", "Bxh2", "Be1", "Bf2", "Bh4", "h4", "hxg4"] + +# Apply action "Nh4" +action: 3209 + +# State 122 +# Apply action "Rab3" +action: 176 + +# State 123 +# Apply action "Rf6" +action: 2937 + +# State 124 +# Apply action "Ne3" +action: 3788 + +# State 125 +# Apply action "Nf3" +action: 4446 + +# State 126 +# Apply action "Kd1" +action: 2452 + +# State 127 +# Apply action "Nh4" +action: 3354 + +# State 128 +# Apply action "Bxd7" +action: 4288 + +# State 129 +# Apply action "b5" +action: 746 + +# State 130 +# Apply action "Bc6" +action: 2233 + +# State 131 +# Apply action "Rb8" +action: 672 + +# State 132 +# Apply action "Rg2" +action: 691 + +# State 133 +# Apply action "Rf8" +action: 3080 + +# State 134 +# Apply action "Kc2" +action: 1809 + +# State 135 +# Apply action "Kc7" +action: 1941 + +# State 136 +# Apply action "Rxb4" +action: 746 + +# State 137 +# Apply action "Rf1" +action: 2942 + +# State 138 +# Apply action "Rg1" +action: 3592 + +# State 139 +# Apply action "Rb6" +action: 601 + +# State 140 +# 2b5/2k5/1rB5/Ppp4p/1R2P2n/3PN1b1/2K4P/5rR1 w - - 3 71 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601" +ObservationString(0) = "2b5/2k5/1rB5/Ppp4p/1R2P2n/3PN1b1/2K4P/5rR1 w - - 3 71" +ObservationString(1) = "2b5/2k5/1rB5/Ppp4p/1R2P2n/3PN1b1/2K4P/5rR1 w - - 3 71" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [308, 336, 816, 817, 818, 819, 832, 833, 834, 1257, 1270, 1271, 1298, 1576, 1577, 1578, 1589, 1590, 1591, 1914, 2548, 2549, 2550, 2551, 2552, 2553, 2554, 2571, 3520, 3521, 3533, 3534, 4177, 4218] +StringLegalActions() = ["a6", "axb6+", "Rb1", "Rb2", "Rb3", "Rxb5", "Ra4", "Rc4", "Rd4", "Kc3", "Kb2", "Kd2", "Kb3", "Bxb5", "Bd7", "Be8", "Ba8", "Bb7", "Bd5", "d4", "Nc4", "Nd1", "Nd5+", "Ng2", "Ng4", "Nxf1", "Nf5", "e5", "Rg2", "Rxg3", "Rxf1", "Rh1", "h3", "hxg3"] + +# Apply action "Rxb5" +action: 819 + +# State 141 +# 2b5/2k5/1rB5/PRp4p/4P2n/3PN1b1/2K4P/5rR1 b - - 0 71 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819" +ObservationString(0) = "2b5/2k5/1rB5/PRp4p/4P2n/3PN1b1/2K4P/5rR1 b - - 0 71" +ObservationString(1) = "2b5/2k5/1rB5/PRp4p/4P2n/3PN1b1/2K4P/5rR1 b - - 0 71" +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ + +◉◉◉◉◉◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◯◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◯◉◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ + +◉◉◉◉◉◯◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◯◯◉◯◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◯◉◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◯◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [744, 745, 746, 759, 760, 1212, 1213, 1214, 1215, 1216, 1224, 1225, 1257, 1284, 1285, 1299, 1403, 3440, 3441, 3442, 3443, 3444, 3445, 3446, 3456, 3457, 3458, 3459, 3460, 3461, 3910, 3911, 3912, 3913, 3925, 3926, 4445, 4446, 4447, 4448] +StringLegalActions() = ["Rb8", "Rb7", "Rxb5", "Ra6", "Rxc6", "Bd7", "Be6", "Bf5", "Bg4", "Bh3", "Ba6", "Bb7", "Kxc6", "Kb8", "Kd6", "Kd8", "c4", "Rf8", "Rf7", "Rf6", "Rf5", "Rf4", "Rf3", "Rf2+", "Ra1", "Rb1", "Rc1+", "Rd1", "Re1", "Rxg1", "Bd6", "Be5", "Bf4", "Bxh2", "Be1", "Bf2", "Nf5", "Nf3", "Ng6", "Ng2"] + +# Apply action "Rf6" +action: 3442 + +# State 142 +# Apply action "Kb1" +action: 1284 + +# State 143 +# Apply action "Kd8" +action: 1299 + +# State 144 +# Apply action "Rxg3" +action: 3521 + +# State 145 +# Apply action "Rf8" +action: 3080 + +# State 146 +# Apply action "Bd5" +action: 1591 + +# State 147 +# Apply action "Ng2" +action: 4448 + +# State 148 +# Apply action "Nf5" +action: 2554 + +# State 149 +# Apply action "Rf7" +action: 2936 + +# State 150 +# Apply action "Kc1" +action: 614 + +# State 151 +# Apply action "Re6" +action: 762 + +# State 152 +# Apply action "Nh4" +action: 3281 + +# State 153 +# Apply action "Rg7" +action: 3023 + +# State 154 +# Apply action "Re3" +action: 3678 + +# State 155 +# Apply action "Rh6" +action: 2514 + +# State 156 +# Apply action "Rb6" +action: 892 + +# State 157 +# Apply action "Bd7" +action: 1212 + +# State 158 +# Apply action "Bf7" +action: 2089 + +# State 159 +# Apply action "Kc8" +action: 1781 + +# State 160 +# 2k5/3b1Br1/1R5r/P1p4p/4P2N/3PR3/6nP/2K5 w - - 15 81 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781" +ObservationString(0) = "2k5/3b1Br1/1R5r/P1p4p/4P2N/3PR3/6nP/2K5 w - - 15 81" +ObservationString(1) = "2k5/3b1Br1/1R5r/P1p4p/4P2N/3PR3/6nP/2K5 w - - 15 81" +ObservationTensor(0) = [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.14851, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [308, 960, 961, 962, 963, 964, 965, 966, 978, 979, 980, 981, 982, 983, 984, 1184, 1197, 1198, 1212, 1225, 1914, 2496, 2497, 2512, 2513, 2514, 2571, 3397, 3398, 3399, 3400, 3401, 3402, 3415, 3416, 3417, 4177, 4372, 4373, 4374, 4375] +StringLegalActions() = ["a6", "Rb1", "Rb2", "Rb3", "Rb4", "Rb5", "Rb7", "Rb8+", "Ra6", "Rc6+", "Rd6", "Re6", "Rf6", "Rg6", "Rxh6", "Kc2", "Kb1", "Kd1", "Kd2", "Kb2", "d4", "Re1", "Re2", "Rf3", "Rg3", "Rh3", "e5", "Ba2", "Bb3", "Bc4", "Bd5", "Be6", "Bg8", "Be8", "Bg6", "Bxh5", "h3", "Nf3", "Nf5", "Nxg2", "Ng6"] + +# Apply action "Bg6" +action: 3416 + +# State 161 +# 2k5/3b2r1/1R4Br/P1p4p/4P2N/3PR3/6nP/2K5 b - - 16 81 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416" +ObservationString(0) = "2k5/3b2r1/1R4Br/P1p4p/4P2N/3PR3/6nP/2K5 b - - 16 81" +ObservationString(1) = "2k5/3b2r1/1R4Br/P1p4p/4P2N/3PR3/6nP/2K5 b - - 16 81" +ObservationTensor(0) = [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.15842, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1184, 1198, 1403, 1869, 1870, 1871, 1872, 1880, 1881, 1882, 1883, 3592, 3593, 3605, 3606, 3607, 4007, 4008, 4009, 4013, 4248, 4249, 4263] +StringLegalActions() = ["Kc7", "Kd8", "c4", "Be6", "Bf5", "Bg4", "Bh3", "Ba4", "Bb5", "Bc6", "Be8", "Rg8", "Rgxg6", "Re7", "Rf7", "Rgh7", "Nxe3", "Ne1", "Nf4", "Nxh4", "Rh8", "Rhh7", "Rhxg6"] + +# Apply action "Nf4" +action: 4009 + +# State 162 +# Apply action "Rb8+" +action: 966 + +# State 163 +# Apply action "Kxb8" +action: 1197 + +# State 164 +# Apply action "Re2" +action: 2497 + +# State 165 +# Apply action "c4" +action: 1403 + +# State 166 +# Apply action "Rb2+" +action: 2436 + +# State 167 +# Apply action "Ka7" +action: 641 + +# State 168 +# Apply action "Bxh5" +action: 3927 + +# State 169 +# Apply action "cxd3" +action: 1504 + +# State 170 +# Apply action "a6" +action: 308 + +# State 171 +# Apply action "Nxh5" +action: 3281 + +# State 172 +# Apply action "Kd2" +action: 1212 + +# State 173 +# Apply action "Re7" +action: 3605 + +# State 174 +# Apply action "Rb5" +action: 675 + +# State 175 +# Apply action "Be6" +action: 1869 + +# State 176 +# Apply action "Ra5" +action: 905 + +# State 177 +# Apply action "Bb3" +action: 2537 + +# State 178 +# Apply action "Rc5" +action: 323 + +# State 179 +# Apply action "Rh8" +action: 4248 + +# State 180 +# 7r/k3r3/P7/2R4n/4P2N/1b1p4/3K3P/8 w - - 8 91 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248" +ObservationString(0) = "7r/k3r3/P7/2R4n/4P2N/1b1p4/3K3P/8 w - - 8 91" +ObservationString(1) = "7r/k3r3/P7/2R4n/4P2N/1b1p4/3K3P/8 w - - 8 91" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.07921, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1472, 1473, 1474, 1475, 1476, 1477, 1478, 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1841, 1868, 1869, 1882, 1883, 2571, 4177, 4372, 4373, 4374, 4375] +StringLegalActions() = ["Rc1", "Rc2", "Rc3", "Rc4", "Rc6", "Rc7+", "Rc8", "Ra5", "Rb5", "Rd5", "Re5", "Rf5", "Rg5", "Rxh5", "Kxd3", "Kc1", "Ke3", "Kc3", "Ke1", "e5", "h3", "Nf3", "Nf5", "Ng2", "Ng6"] + +# Apply action "Kc1" +action: 1868 + +# State 181 +# 7r/k3r3/P7/2R4n/4P2N/1b1p4/7P/2K5 b - - 9 91 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868" +ObservationString(0) = "7r/k3r3/P7/2R4n/4P2N/1b1p4/7P/2K5 b - - 9 91" +ObservationString(1) = "7r/k3r3/P7/2R4n/4P2N/1b1p4/7P/2K5 b - - 9 91" +ObservationTensor(0) = [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.08911, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [88, 89, 117, 131, 992, 993, 994, 1006, 1007, 1008, 1009, 1010, 1011, 2133, 2424, 2425, 2426, 2427, 2436, 2437, 2438, 2439, 2440, 2441, 4104, 4105, 4111, 4112, 4113, 4114, 4115, 4116, 4117, 4372, 4373, 4374, 4375] +StringLegalActions() = ["Ka8", "Kxa6", "Kb6", "Kb8", "Ba4", "Bc2", "Bd1", "Ba2", "Bc4", "Bd5", "Be6", "Bf7", "Bg8", "d2+", "Ree8", "Re6", "Re5", "Rxe4", "Rb7", "Rc7", "Rd7", "Rf7", "Rg7", "Reh7", "Rhh7", "Rh6", "Ra8", "Rb8", "Rc8", "Rd8", "Rhe8", "Rf8", "Rg8", "Nf6", "Nf4", "Ng7", "Ng3"] + +# Apply action "Ng7" +action: 4374 + +# State 182 +# Apply action "Rf5" +action: 1492 + +# State 183 +# Apply action "Bc4" +action: 1007 + +# State 184 +# Apply action "Rf7" +action: 3229 + +# State 185 +# Apply action "Kxa6" +action: 89 + +# State 186 +# Apply action "Nf3" +action: 4372 + +# State 187 +# Apply action "Rxf7" +action: 2439 + +# State 188 +# Apply action "Nd4" +action: 3132 + +# State 189 +# Apply action "Bb3" +action: 1517 + +# State 190 +# Apply action "Ne2" +action: 2042 + +# State 191 +# Apply action "dxe2" +action: 2161 + +# State 192 +# Apply action "Kb2" +action: 1225 + +# State 193 +# Apply action "e1=N" +action: 2780 + +# State 194 +# Apply action "Kxb3" +action: 673 + +# State 195 +# Apply action "Rhf8" +action: 4116 + +# State 196 +# Apply action "Ka4" +action: 787 + +# State 197 +# Apply action "Rf3" +action: 3012 + +# State 198 +# Apply action "h4" +action: 4178 + +# State 199 +# Apply action "Rd8" +action: 2948 + +# State 200 +# 3r4/6n1/k7/8/K3P2P/5r2/8/4n3 w - - 1 101 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948" +ObservationString(0) = "3r4/6n1/k7/8/K3P2P/5r2/8/4n3 w - - 1 101" +ObservationString(1) = "3r4/6n1/k7/8/K3P2P/5r2/8/4n3 w - - 1 101" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [249, 2571, 4323] +StringLegalActions() = ["Kb4", "e5", "h5"] + +# Apply action "Kb4" +action: 249 + +# State 201 +# 3r4/6n1/k7/8/1K2P2P/5r2/8/4n3 b - - 2 101 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249" +ObservationString(0) = "3r4/6n1/k7/8/1K2P2P/5r2/8/4n3 b - - 2 101" +ObservationString(1) = "3r4/6n1/k7/8/1K2P2P/5r2/8/4n3 b - - 2 101" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [161, 176, 204, 1768, 1769, 1770, 1771, 1772, 1773, 1774, 1779, 1780, 1781, 1782, 1783, 1784, 1785, 2912, 2914, 2916, 3296, 3297, 3298, 3299, 3300, 3301, 3302, 3310, 3311, 3312, 3313, 3314, 3315, 3316, 3642, 3643, 3645, 3649] +StringLegalActions() = ["Ka7", "Kb6", "Kb7", "Rd7", "Rd6", "Rd5", "Rd4+", "Rdd3", "Rd2", "Rd1", "Ra8", "Rb8+", "Rc8", "Re8", "Rdf8", "Rg8", "Rh8", "Nc2+", "Nd3+", "Ng2", "Rff8", "Rf7", "Rf6", "Rf5", "Rf4", "Rf2", "Rf1", "Ra3", "Rb3+", "Rc3", "Rfd3", "Re3", "Rg3", "Rh3", "Ne8", "Ne6", "Nf5", "Nh5"] + +# Apply action "Ra3" +action: 3310 + +# State 202 +# Apply action "e5" +action: 2571 + +# State 203 +# Apply action "Rd5" +action: 1770 + +# State 204 +# Apply action "Kc4" +action: 833 + +# State 205 +# Apply action "Nh5" +action: 3649 + +# State 206 +# Apply action "Kb4" +action: 1416 + +# State 207 +# Apply action "Rb5+" +action: 1999 + +# State 208 +# Apply action "Kxa3" +action: 846 + +# State 209 +# Apply action "Rb4" +action: 819 + +# State 210 +# Apply action "Ka2" +action: 161 + +# State 211 +# Apply action "Ng2" +action: 2916 + +# State 212 +# Apply action "e6" +action: 2644 + +# State 213 +# Apply action "Rf4" +action: 909 + +# State 214 +# Apply action "Kb2" +action: 103 + +# State 215 +# Apply action "Ng7" +action: 4374 + +# State 216 +# Apply action "Ka2" +action: 686 + +# State 217 +# Apply action "Ne8" +action: 3642 + +# State 218 +# Apply action "e7" +action: 2717 + +# State 219 +# Apply action "Rf8" +action: 3224 + +# State 220 +# 4nr2/4P3/k7/8/7P/8/K5n1/8 w - - 1 111 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224" +ObservationString(0) = "4nr2/4P3/k7/8/7P/8/K5n1/8 w - - 1 111" +ObservationString(1) = "4nr2/4P3/k7/8/7P/8/K5n1/8 w - - 1 111" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [88, 89, 103, 117, 131, 2775, 2778, 2781, 2818, 4323] +StringLegalActions() = ["Ka1", "Ka3", "Kb2", "Kb3", "Kb1", "exf8=R", "exf8=B", "exf8=N", "exf8=Q", "h5"] + +# Apply action "Kb1" +action: 131 + +# State 221 +# 4nr2/4P3/k7/8/7P/8/6n1/1K6 b - - 2 111 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131" +ObservationString(0) = "4nr2/4P3/k7/8/7P/8/6n1/1K6 b - - 2 111" +ObservationString(1) = "4nr2/4P3/k7/8/7P/8/6n1/1K6 b - - 2 111" +ObservationTensor(0) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [161, 162, 176, 190, 204, 2402, 2404, 2406, 2408, 2936, 2937, 2938, 2939, 2940, 2941, 2942, 2950, 2951, 4007, 4008, 4009, 4013] +StringLegalActions() = ["Ka7", "Ka5", "Kb6", "Kb5", "Kb7", "Nc7", "Nd6", "Ng7", "Nf6", "Rf7", "Rf6", "Rf5", "Rf4", "Rf3", "Rf2", "Rf1+", "Rg8", "Rh8", "Ne3", "Ne1", "Nf4", "Nxh4"] + +# Apply action "Rg8" +action: 2950 + +# State 222 +# Apply action "Kc2" +action: 628 + +# State 223 +# Apply action "Kb7" +action: 204 + +# State 224 +# Apply action "Kd1" +action: 1299 + +# State 225 +# Apply action "Ng7" +action: 2406 + +# State 226 +# Apply action "Kc2" +action: 1809 + +# State 227 +# Apply action "Kb6" +action: 673 + +# State 228 +# Apply action "Kd1" +action: 1299 + +# State 229 +# Apply action "Kb5" +action: 746 + +# State 230 +# Apply action "Kc2" +action: 1809 + +# State 231 +# Apply action "Kc5" +action: 833 + +# State 232 +# Apply action "Kb1" +action: 1284 + +# State 233 +# Apply action "Nf4" +action: 4009 + +# State 234 +# Apply action "e8=B" +action: 2777 + +# State 235 +# Apply action "Nh3" +action: 3282 + +# State 236 +# Apply action "h5" +action: 4323 + +# State 237 +# Apply action "Kd5" +action: 1417 + +# State 238 +# Apply action "Bb5" +action: 2888 + +# State 239 +# Apply action "Ng5" +action: 4520 + +# State 240 +# 6r1/6n1/8/1B1k2nP/8/8/8/1K6 w - - 3 121 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520" +ObservationString(0) = "6r1/6n1/8/1B1k2nP/8/8/8/1K6 w - - 3 121" +ObservationString(1) = "6r1/6n1/8/1B1k2nP/8/8/8/1K6 w - - 3 121" +ObservationTensor(0) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0297, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [600, 613, 614, 628, 641, 919, 920, 921, 922, 933, 934, 935, 936, 937, 4396] +StringLegalActions() = ["Kb2", "Ka1", "Kc1", "Kc2", "Ka2", "Ba4", "Bc6+", "Bd7", "Be8", "Ba6", "Bc4+", "Bd3", "Be2", "Bf1", "h6"] + +# Apply action "Kc2" +action: 628 + +# State 241 +# 6r1/6n1/8/1B1k2nP/8/8/2K5/8 b - - 4 121 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628" +ObservationString(0) = "6r1/6n1/8/1B1k2nP/8/8/2K5/8 b - - 4 121" +ObservationString(1) = "6r1/6n1/8/1B1k2nP/8/8/2K5/8 b - - 4 121" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0396, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1986, 1987, 2000, 2001, 2015, 2029, 3528, 3529, 3530, 3531, 3532, 3533, 3534, 3642, 3643, 3645, 3649, 3788, 3789, 3790, 3791, 3794, 3795] +StringLegalActions() = ["Kd6", "Kd4", "Kc5", "Ke5", "Ke4", "Ke6", "Ra8", "Rb8", "Rc8+", "Rd8", "Re8", "Rf8", "Rh8", "Ne8", "N7e6", "Nf5", "Nxh5", "N5e6", "Ne4", "Nf7", "Nf3", "Nh7", "Nh3"] + +# Apply action "Kd4" +action: 1987 + +# State 242 +# Apply action "Bc4" +action: 934 + +# State 243 +# Apply action "Rc8" +action: 3530 + +# State 244 +# Apply action "Kd2" +action: 1271 + +# State 245 +# Apply action "Re8" +action: 1199 + +# State 246 +# Apply action "Kc1" +action: 1868 + +# State 247 +# Apply action "Re1+" +action: 2358 + +# State 248 +# Apply action "Kc2" +action: 1184 + +# State 249 +# Apply action "N5e6" +action: 3788 + +# State 250 +# Apply action "Bb3" +action: 1430 + +# State 251 +# Apply action "Nxh5" +action: 3649 + +# State 252 +# Apply action "Ba2" +action: 773 + +# State 253 +# Apply action "Re2+" +action: 2862 + +# State 254 +# Apply action "Kd1" +action: 1299 + +# State 255 +# Apply action "Nef4" +action: 2554 + +# State 256 +# Apply action "Bd5" +action: 119 + +# State 257 +# Apply action "Re6" +action: 2786 + +# State 258 +# Apply action "Bxe6" +action: 2088 + +# State 259 +# Apply action "Nd3" +action: 3278 + +# State 260 +# 8/8/4B3/7n/3k4/3n4/8/3K4 w - - 1 131 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278" +ObservationString(0) = "8/8/4B3/7n/3k4/3n4/8/3K4 w - - 1 131" +ObservationString(1) = "8/8/4B3/7n/3k4/3n4/8/3K4 w - - 1 131" +ObservationTensor(0) = [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0099, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1768, 1796, 1809, 2741, 2742, 2743, 2744, 2745, 2746, 2757, 2758, 2759, 2760, 2761] +StringLegalActions() = ["Kd2", "Ke2", "Kc2", "Ba2", "Bb3", "Bc4", "Bd5", "Bf7", "Bg8", "Bc8", "Bd7", "Bf5", "Bg4", "Bh3"] + +# Apply action "Bd5" +action: 2744 + +# State 261 +# 8/8/8/3B3n/3k4/3n4/8/3K4 b - - 2 131 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744" +ObservationString(0) = "8/8/8/3B3n/3k4/3n4/8/3K4 b - - 2 131" +ObservationString(1) = "8/8/8/3B3n/3k4/3n4/8/3K4 b - - 2 131" +ObservationTensor(0) = [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0198, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2059, 2087, 2088, 2101, 2102, 2182, 2183, 2184, 2185, 2186, 2187, 2188, 2189, 4372, 4373, 4374, 4375] +StringLegalActions() = ["Kxd5", "Kc5", "Ke3", "Kc3", "Ke5", "Nb4", "Nb2+", "Nc5", "Nc1", "Ndf4", "Nf2+", "Ne5", "Ne1", "Nf6", "Nhf4", "Ng7", "Ng3"] + +# Apply action "Ne5" +action: 2188 + +# State 262 +# Apply action "Ba8" +action: 2099 + +# State 263 +# Apply action "Ng3" +action: 4375 + +# State 264 +# Apply action "Bh1" +action: 575 + +# State 265 +# Apply action "Nf7" +action: 2626 + +# State 266 +# Apply action "Bg2" +action: 4145 + +# State 267 +# Apply action "Kd3" +action: 2060 + +# State 268 +# Apply action "Ba8" +action: 3629 + +# State 269 +# Apply action "Nd8" +action: 3058 + +# State 270 +# Apply action "Bf3" +action: 573 + +# State 271 +# Apply action "Ne6" +action: 1824 + +# State 272 +# Apply action "Be2+" +action: 3109 + +# State 273 +# Apply action "Kd4" +action: 2132 + +# State 274 +# Apply action "Ba6" +action: 2463 + +# State 275 +# Apply action "Kc3" +action: 2101 + +# State 276 +# Apply action "Bb5" +action: 423 + +# State 277 +# Apply action "Nf8" +action: 2553 + +# State 278 +# Apply action "Bf1" +action: 937 + +# State 279 +# Apply action "Kd4" +action: 1591 + +# State 280 +# 5n2/8/8/8/3k4/6n1/8/3K1B2 w - - 21 141 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591" +ObservationString(0) = "5n2/8/8/8/3k4/6n1/8/3K1B2 w - - 21 141" +ObservationString(1) = "5n2/8/8/8/3k4/6n1/8/3K1B2 w - - 21 141" +ObservationTensor(0) = [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.20792, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1768, 1781, 1782, 1809, 2964, 2965, 2973, 2974, 2975, 2976, 2977] +StringLegalActions() = ["Kd2", "Kc1", "Ke1", "Kc2", "Bg2", "Bh3", "Ba6", "Bb5", "Bc4", "Bd3", "Be2"] + +# Apply action "Ba6" +action: 2973 + +# State 281 +# 5n2/8/B7/8/3k4/6n1/8/3K4 b - - 22 141 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973" +ObservationString(0) = "5n2/8/B7/8/3k4/6n1/8/3K4 b - - 22 141" +ObservationString(1) = "5n2/8/B7/8/3k4/6n1/8/3K4 b - - 22 141" +ObservationTensor(0) = [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.21782, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2059, 2074, 2087, 2088, 2101, 2102, 2986, 2988, 2990, 2992, 3934, 3935, 3936, 3937, 3940, 3941] +StringLegalActions() = ["Kd5", "Ke4", "Kc5", "Ke3", "Kc3", "Ke5", "Nd7", "Ne6", "Nh7", "Ng6", "Ne4", "Ne2", "Nf5", "Nf1", "Nh5", "Nh1"] + +# Apply action "Nf1" +action: 3937 + +# State 282 +# Apply action "Kc1" +action: 1781 + +# State 283 +# Apply action "Ng3" +action: 3502 + +# State 284 +# Apply action "Bb5" +action: 423 + +# State 285 +# Apply action "Ke3" +action: 2088 + +# State 286 +# Apply action "Bc4" +action: 934 + +# State 287 +# Apply action "Kf2" +action: 2745 + +# State 288 +# Apply action "Bb3" +action: 1430 + +# State 289 +# Apply action "Ke2" +action: 3387 + +# State 290 +# Apply action "Kb2" +action: 1225 + +# State 291 +# Apply action "Nf1" +action: 3937 + +# State 292 +# Apply action "Kc1" +action: 715 + +# State 293 +# Apply action "Ne3" +action: 3498 + +# State 294 +# Apply action "Ba4" +action: 787 + +# State 295 +# Apply action "Nc4" +action: 2766 + +# State 296 +# Apply action "Kb1" +action: 1197 + +# State 297 +# Apply action "Nd2+" +action: 1532 + +# State 298 +# Apply action "Kc1" +action: 614 + +# State 299 +# Apply action "Nf3" +action: 2259 + +# State 300 +# 5n2/8/8/8/B7/5n2/4k3/2K5 w - - 41 151 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259" +ObservationString(0) = "5n2/8/8/8/B7/5n2/4k3/2K5 w - - 41 151" +ObservationString(1) = "5n2/8/8/8/B7/5n2/4k3/2K5 w - - 41 151" +ObservationTensor(0) = [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.40594, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [263, 264, 265, 266, 277, 278, 279, 1184, 1197, 1225] +StringLegalActions() = ["Bb5+", "Bc6", "Bd7", "Be8", "Bb3", "Bc2", "Bd1+", "Kc2", "Kb1", "Kb2"] + +# Apply action "Kc2" +action: 1184 + +# State 301 +# 5n2/8/8/8/B7/5n2/2K1k3/8 b - - 42 151 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184" +ObservationString(0) = "5n2/8/8/8/B7/5n2/2K1k3/8 b - - 42 151" +ObservationString(1) = "5n2/8/8/8/B7/5n2/2K1k3/8 b - - 42 151" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.41584, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2789, 2790, 2804, 2818, 2986, 2988, 2990, 2992, 3350, 3351, 3352, 3353, 3354, 3355, 3356, 3357] +StringLegalActions() = ["Ke3", "Ke1", "Kf2", "Kf1", "Nd7", "Ne6", "Nh7", "Ng6", "Nd4+", "Nd2", "Ne5", "Ne1+", "Nh4", "Nh2", "Ng5", "Ng1"] + +# Apply action "Ng6" +action: 2992 + +# State 302 +# Apply action "Kc3" +action: 1257 + +# State 303 +# Apply action "Ke1" +action: 2790 + +# State 304 +# Apply action "Kc4" +action: 1330 + +# State 305 +# Apply action "Ng1" +action: 3357 + +# State 306 +# Apply action "Kb4" +action: 1416 + +# State 307 +# Apply action "Kd2" +action: 2890 + +# State 308 +# Apply action "Ka5" +action: 860 + +# State 309 +# Apply action "Ne2" +action: 4080 + +# State 310 +# Apply action "Bd1" +action: 279 + +# State 311 +# Apply action "Nh4" +action: 3722 + +# State 312 +# Apply action "Bc2" +action: 1809 + +# State 313 +# Apply action "Kc1" +action: 2247 + +# State 314 +# Apply action "Be4" +action: 1286 + +# State 315 +# Apply action "Kd1" +action: 1709 + +# State 316 +# Apply action "Bb7" +action: 2610 + +# State 317 +# Apply action "Nf5" +action: 4445 + +# State 318 +# Apply action "Bc6" +action: 1080 + +# State 319 +# Apply action "Ne3" +action: 3207 + +# State 320 +# 8/8/2B5/K7/8/4n3/4n3/3k4 w - - 61 161 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207" +ObservationString(0) = "8/8/2B5/K7/8/4n3/4n3/3k4 w - - 61 161" +ObservationString(1) = "8/8/2B5/K7/8/4n3/4n3/3k4 w - - 61 161" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.60396, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [307, 308, 322, 336, 350, 1575, 1576, 1577, 1578, 1589, 1590, 1591, 1592, 1593, 1594, 1595] +StringLegalActions() = ["Ka4", "Ka6", "Kb5", "Kb6", "Kb4", "Ba4+", "Bb5", "Bd7", "Be8", "Ba8", "Bb7", "Bd5", "Be4", "Bf3", "Bg2", "Bh1"] + +# Apply action "Be8" +action: 1578 + +# State 321 +# 4B3/8/8/K7/8/4n3/4n3/3k4 b - - 62 161 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207, 1578] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207, 1578" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207, 1578" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207, 1578" +ObservationString(0) = "4B3/8/8/K7/8/4n3/4n3/3k4 b - - 62 161" +ObservationString(1) = "4B3/8/8/K7/8/4n3/4n3/3k4 b - - 62 161" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.61386, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2278, 2292, 2293, 2306, 2766, 2767, 2768, 2770, 2771, 2772, 2773, 2839, 2840, 2841, 2843, 2844, 2845] +StringLegalActions() = ["Kd2", "Kc1", "Ke1", "Kc2", "Nc4+", "Nc2", "Nd5", "Ng4", "Ng2", "Nf5", "Nf1", "Nc3", "Nc1", "Nd4", "Ng3", "Ng1", "Nf4"] + +# Apply action "Ke1" +action: 2293 + +# State 322 +# Apply action "Bd7" +action: 2890 + +# State 323 +# Apply action "Nf5" +action: 2772 + +# State 324 +# Apply action "Be6" +action: 2248 + +# State 325 +# Apply action "Kf1" +action: 2877 + +# State 326 +# Apply action "Bd7" +action: 2758 + +# State 327 +# Apply action "Nh4" +action: 3209 + +# State 328 +# Apply action "Bb5" +action: 2232 + +# State 329 +# Apply action "Ng6" +action: 4447 + +# State 330 +# Apply action "Ka6" +action: 308 + +# State 331 +# Apply action "Nf4" +action: 3718 + +# State 332 +# Apply action "Bc4" +action: 934 + +# State 333 +# Apply action "Kg2" +action: 3489 + +# State 334 +# Apply action "Be6" +action: 1432 + +# State 335 +# Apply action "Ng1" +action: 2844 + +# State 336 +# Apply action "Ka7" +action: 381 + +# State 337 +# Apply action "Kf2" +action: 3971 + +# State 338 +# Apply action "Bh3" +action: 2761 + +# State 339 +# Apply action "Nge2" +action: 4080 + +# State 340 +# 8/K7/8/8/5n2/7B/4nk2/8 w - - 81 171 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207, 1578, 2293, 2890, 2772, 2248, 2877, 2758, 3209, 2232, 4447, 308, 3718, 934, 3489, 1432, 2844, 381, 3971, 2761, 4080] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207, 1578, 2293, 2890, 2772, 2248, 2877, 2758, 3209, 2232, 4447, 308, 3718, 934, 3489, 1432, 2844, 381, 3971, 2761, 4080" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207, 1578, 2293, 2890, 2772, 2248, 2877, 2758, 3209, 2232, 4447, 308, 3718, 934, 3489, 1432, 2844, 381, 3971, 2761, 4080" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207, 1578, 2293, 2890, 2772, 2248, 2877, 2758, 3209, 2232, 4447, 308, 3718, 934, 3489, 1432, 2844, 381, 3971, 2761, 4080" +ObservationString(0) = "8/K7/8/8/5n2/7B/4nk2/8 w - - 81 171" +ObservationString(1) = "8/K7/8/8/5n2/7B/4nk2/8 w - - 81 171" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.80198, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [453, 454, 468, 482, 496, 4276, 4277, 4287, 4288, 4289, 4290, 4291] +StringLegalActions() = ["Ka6", "Ka8", "Kb7", "Kb8", "Kb6", "Bf1", "Bg2", "Bc8", "Bd7", "Be6", "Bf5", "Bg4"] + +# Apply action "Be6" +action: 4289 + +# State 341 +# 8/K7/4B3/8/5n2/8/4nk2/8 b - - 82 171 +IsTerminal() = False +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207, 1578, 2293, 2890, 2772, 2248, 2877, 2758, 3209, 2232, 4447, 308, 3718, 934, 3489, 1432, 2844, 381, 3971, 2761, 4080, 4289] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207, 1578, 2293, 2890, 2772, 2248, 2877, 2758, 3209, 2232, 4447, 308, 3718, 934, 3489, 1432, 2844, 381, 3971, 2761, 4080, 4289" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207, 1578, 2293, 2890, 2772, 2248, 2877, 2758, 3209, 2232, 4447, 308, 3718, 934, 3489, 1432, 2844, 381, 3971, 2761, 4080, 4289" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207, 1578, 2293, 2890, 2772, 2248, 2877, 2758, 3209, 2232, 4447, 308, 3718, 934, 3489, 1432, 2844, 381, 3971, 2761, 4080, 4289" +ObservationString(0) = "8/K7/4B3/8/5n2/8/4nk2/8 b - - 82 171" +ObservationString(1) = "8/K7/4B3/8/5n2/8/4nk2/8 b - - 82 171" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.81188, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2839, 2840, 2841, 2843, 2844, 3277, 3278, 3279, 3281, 3282, 3283, 3284, 3373, 3374, 3388, 3401, 3402, 3415, 3416] +StringLegalActions() = ["Nc3", "Nc1", "Nd4", "Ng3", "Ng1", "Nd5", "Nd3", "Nxe6", "Nh5", "Nh3", "Ng6", "Ng2", "Kf3", "Kf1", "Kg2", "Ke3", "Kg1", "Ke1", "Kg3"] + +# Apply action "Ke1" +action: 3415 + +# State 342 +# Apply action "Kb7" +action: 468 + +# State 343 +# Apply action "Nh3" +action: 3282 + +# State 344 +# Apply action "Bg8" +action: 2746 + +# State 345 +# Apply action "Nhg1" +action: 4521 + +# State 346 +# Apply action "Ka6" +action: 1065 + +# State 347 +# Apply action "Kf1" +action: 2877 + +# State 348 +# Apply action "Kb5" +action: 423 + +# State 349 +# Apply action "Nc1" +action: 2840 + +# State 350 +# Apply action "Kb4" +action: 891 + +# State 351 +# Apply action "Nf3" +action: 4082 + +# State 352 +# Apply action "Bc4+" +action: 4055 + +# State 353 +# Apply action "Kg2" +action: 3489 + +# State 354 +# Apply action "Kc3" +action: 861 + +# State 355 +# Apply action "Kg1" +action: 3958 + +# State 356 +# Apply action "Bd5" +action: 1431 + +# State 357 +# Apply action "Nd4" +action: 3350 + +# State 358 +# Apply action "Bf3" +action: 2103 + +# State 359 +# 8/8/8/8/3n4/2K2B2/8/2n3k1 b - - 100 180 +IsTerminal() = True +History() = [3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207, 1578, 2293, 2890, 2772, 2248, 2877, 2758, 3209, 2232, 4447, 308, 3718, 934, 3489, 1432, 2844, 381, 3971, 2761, 4080, 4289, 3415, 468, 3282, 2746, 4521, 1065, 2877, 423, 2840, 891, 4082, 4055, 3489, 861, 3958, 1431, 3350, 2103] +HistoryString() = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207, 1578, 2293, 2890, 2772, 2248, 2877, 2758, 3209, 2232, 4447, 308, 3718, 934, 3489, 1432, 2844, 381, 3971, 2761, 4080, 4289, 3415, 468, 3282, 2746, 4521, 1065, 2877, 423, 2840, 891, 4082, 4055, 3489, 861, 3958, 1431, 3350, 2103" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207, 1578, 2293, 2890, 2772, 2248, 2877, 2758, 3209, 2232, 4447, 308, 3718, 934, 3489, 1432, 2844, 381, 3971, 2761, 4080, 4289, 3415, 468, 3282, 2746, 4521, 1065, 2877, 423, 2840, 891, 4082, 4055, 3489, 861, 3958, 1431, 3350, 2103" +InformationStateString(1) = "3576, 3009, 3010, 2380, 673, 3050, 1225, 1842, 3593, 1987, 90, 3576, 2964, 89, 2425, 4117, 2352, 1212, 4302, 3110, 1841, 1257, 714, 673, 193, 3593, 4116, 2964, 2833, 4300, 1257, 3209, 654, 3635, 3851, 3533, 1895, 162, 3621, 4445, 2322, 2539, 3137, 3208, 2439, 2000, 17, 1883, 1799, 3694, 3570, 1445, 3155, 4144, 2951, 2393, 2481, 3124, 4117, 4301, 3531, 1869, 4276, 4323, 746, 2524, 161, 2936, 1782, 16, 2977, 3008, 3009, 4178, 2467, 2101, 3206, 3570, 2365, 654, 1782, 103, 3081, 4396, 2363, 1549, 2477, 263, 3022, 2481, 2439, 3211, 2040, 3635, 600, 1928, 2965, 3936, 3134, 1890, 2498, 4144, 3009, 3110, 2697, 2511, 3081, 689, 3022, 3767, 687, 4437, 1271, 2438, 235, 1330, 90, 1853, 234, 654, 1853, 3209, 176, 2937, 3788, 4446, 2452, 3354, 4288, 746, 2233, 672, 691, 3080, 1809, 1941, 746, 2942, 3592, 601, 819, 3442, 1284, 1299, 3521, 3080, 1591, 4448, 2554, 2936, 614, 762, 3281, 3023, 3678, 2514, 892, 1212, 2089, 1781, 3416, 4009, 966, 1197, 2497, 1403, 2436, 641, 3927, 1504, 308, 3281, 1212, 3605, 675, 1869, 905, 2537, 323, 4248, 1868, 4374, 1492, 1007, 3229, 89, 4372, 2439, 3132, 1517, 2042, 2161, 1225, 2780, 673, 4116, 787, 3012, 4178, 2948, 249, 3310, 2571, 1770, 833, 3649, 1416, 1999, 846, 819, 161, 2916, 2644, 909, 103, 4374, 686, 3642, 2717, 3224, 131, 2950, 628, 204, 1299, 2406, 1809, 673, 1299, 746, 1809, 833, 1284, 4009, 2777, 3282, 4323, 1417, 2888, 4520, 628, 1987, 934, 3530, 1271, 1199, 1868, 2358, 1184, 3788, 1430, 3649, 773, 2862, 1299, 2554, 119, 2786, 2088, 3278, 2744, 2188, 2099, 4375, 575, 2626, 4145, 2060, 3629, 3058, 573, 1824, 3109, 2132, 2463, 2101, 423, 2553, 937, 1591, 2973, 3937, 1781, 3502, 423, 2088, 934, 2745, 1430, 3387, 1225, 3937, 715, 3498, 787, 2766, 1197, 1532, 614, 2259, 1184, 2992, 1257, 2790, 1330, 3357, 1416, 2890, 860, 4080, 279, 3722, 1809, 2247, 1286, 1709, 2610, 4445, 1080, 3207, 1578, 2293, 2890, 2772, 2248, 2877, 2758, 3209, 2232, 4447, 308, 3718, 934, 3489, 1432, 2844, 381, 3971, 2761, 4080, 4289, 3415, 468, 3282, 2746, 4521, 1065, 2877, 423, 2840, 891, 4082, 4055, 3489, 861, 3958, 1431, 3350, 2103" +ObservationString(0) = "8/8/8/8/3n4/2K2B2/8/2n3k1 b - - 100 180" +ObservationString(1) = "8/8/8/8/3n4/2K2B2/8/2n3k1 b - - 100 180" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.9901, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/cliff_walking.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/cliff_walking.txt new file mode 100644 index 0000000..69d5882 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/cliff_walking.txt @@ -0,0 +1,219 @@ +game: cliff_walking + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "CliffWalking" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = ["height", "horizon", "width"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "cliff_walking" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 4 +PolicyTensorShape() = [4] +MaxChanceOutcomes() = 0 +GetParameters() = {height=4,horizon=100,width=8} +NumPlayers() = 1 +MinUtility() = -199.0 +MaxUtility() = -9.0 +UtilitySum() = None +InformationStateTensorShape() = [400] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 400 +ObservationTensorShape() = [4, 8] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 32 +MaxGameLength() = 100 +ToString() = "cliff_walking()" + +# State 0 +# ........ +# ........ +# ........ +# PXXXXXXG +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateTensor(0): zeros(400) +ObservationString(0) = "........\n........\n........\nPXXXXXXG\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [-0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["RIGHT", "UP", "LEFT", "DOWN"] + +# Apply action "LEFT" +action: 2 + +# State 1 +# ........ +# ........ +# ........ +# PXXXXXXG +IsTerminal() = False +History() = [2] +HistoryString() = "2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "2" +InformationStateTensor(0): binvec(400, 0x2000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "........\n........\n........\nPXXXXXXG\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +Rewards() = [-1] +Returns() = [-1] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["RIGHT", "UP", "LEFT", "DOWN"] + +# Apply action "DOWN" +action: 3 + +# State 2 +# ........ +# ........ +# ........ +# PXXXXXXG +IsTerminal() = False +History() = [2, 3] +HistoryString() = "2, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "2, 3" +InformationStateTensor(0): binvec(400, 0x2100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "........\n........\n........\nPXXXXXXG\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +Rewards() = [-1] +Returns() = [-2] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["RIGHT", "UP", "LEFT", "DOWN"] + +# Apply action "LEFT" +action: 2 + +# State 3 +# Apply action "UP" +action: 1 + +# State 4 +# Apply action "LEFT" +action: 2 + +# State 5 +# Apply action "LEFT" +action: 2 + +# State 6 +# Apply action "RIGHT" +action: 0 + +# State 7 +# Apply action "LEFT" +action: 2 + +# State 8 +# Apply action "LEFT" +action: 2 + +# State 9 +# Apply action "LEFT" +action: 2 + +# State 10 +# ........ +# ........ +# P....... +# .XXXXXXG +IsTerminal() = False +History() = [2, 3, 2, 1, 2, 2, 0, 2, 2, 2] +HistoryString() = "2, 3, 2, 1, 2, 2, 0, 2, 2, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "2, 3, 2, 1, 2, 2, 0, 2, 2, 2" +InformationStateTensor(0): binvec(400, 0x2124228222000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "........\n........\nP.......\n.XXXXXXG\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [-1] +Returns() = [-10] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["RIGHT", "UP", "LEFT", "DOWN"] + +# Apply action "LEFT" +action: 2 + +# State 11 +# Apply action "UP" +action: 1 + +# State 12 +# Apply action "UP" +action: 1 + +# State 13 +# Apply action "DOWN" +action: 3 + +# State 14 +# Apply action "DOWN" +action: 3 + +# State 15 +# Apply action "UP" +action: 1 + +# State 16 +# Apply action "DOWN" +action: 3 + +# State 17 +# Apply action "RIGHT" +action: 0 + +# State 18 +# Apply action "DOWN" +action: 3 + +# State 19 +# ........ +# ........ +# ........ +# .PXXXXXG +IsTerminal() = True +History() = [2, 3, 2, 1, 2, 2, 0, 2, 2, 2, 2, 1, 1, 3, 3, 1, 3, 0, 3] +HistoryString() = "2, 3, 2, 1, 2, 2, 0, 2, 2, 2, 2, 1, 1, 3, 3, 1, 3, 0, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "2, 3, 2, 1, 2, 2, 0, 2, 2, 2, 2, 1, 1, 3, 3, 1, 3, 0, 3" +InformationStateTensor(0): binvec(400, 0x2124228222244114181000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "........\n........\n........\n.PXXXXXG\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +Rewards() = [-100] +Returns() = [-118] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/clobber.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/clobber.txt new file mode 100644 index 0000000..2ba7f96 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/clobber.txt @@ -0,0 +1,333 @@ +game: clobber + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Clobber" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["columns", "rows"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "clobber" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 120 +PolicyTensorShape() = [120] +MaxChanceOutcomes() = 0 +GetParameters() = {columns=6,rows=5} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [3, 5, 6] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 90 +MaxGameLength() = 29 +ToString() = "clobber()" + +# State 0 +# 5oxoxox +# 4xoxoxo +# 3oxoxox +# 2xoxoxo +# 1oxoxox +# abcdef +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "5oxoxox\n4xoxoxo\n3oxoxox\n2xoxoxo\n1oxoxox\n abcdef\n" +ObservationString(1) = "5oxoxox\n4xoxoxo\n3oxoxox\n2xoxoxo\n1oxoxox\n abcdef\n" +ObservationTensor(0): +◉◯◉◯◉◯ ◯◉◯◉◯◉ ◯◯◯◯◯◯ +◯◉◯◉◯◉ ◉◯◉◯◉◯ ◯◯◯◯◯◯ +◉◯◉◯◉◯ ◯◉◯◉◯◉ ◯◯◯◯◯◯ +◯◉◯◉◯◉ ◉◯◉◯◉◯ ◯◯◯◯◯◯ +◉◯◉◯◉◯ ◯◉◯◉◯◉ ◯◯◯◯◯◯ +ObservationTensor(1): +◯◉◯◉◯◉ ◉◯◉◯◉◯ ◯◯◯◯◯◯ +◉◯◉◯◉◯ ◯◉◯◉◯◉ ◯◯◯◯◯◯ +◯◉◯◉◯◉ ◉◯◉◯◉◯ ◯◯◯◯◯◯ +◉◯◉◯◉◯ ◯◉◯◉◯◉ ◯◯◯◯◯◯ +◯◉◯◉◯◉ ◉◯◉◯◉◯ ◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 9, 10, 11, 17, 18, 19, 28, 29, 30, 31, 36, 37, 38, 39, 44, 46, 47, 48, 49, 50, 56, 57, 58, 59, 64, 65, 66, 67, 76, 77, 78, 79, 84, 85, 86, 87, 92, 94, 95, 96, 97, 104, 105, 107, 112, 113, 115] +StringLegalActions() = ["a5b5", "a5a4", "c5d5", "c5c4", "c5b5", "e5f5", "e5e4", "e5d5", "b4b5", "b4c4", "b4b3", "b4a4", "d4d5", "d4e4", "d4d3", "d4c4", "f4f5", "f4f3", "f4e4", "a3a4", "a3b3", "a3a2", "c3c4", "c3d3", "c3c2", "c3b3", "e3e4", "e3f3", "e3e2", "e3d3", "b2b3", "b2c2", "b2b1", "b2a2", "d2d3", "d2e2", "d2d1", "d2c2", "f2f3", "f2f1", "f2e2", "a1a2", "a1b1", "c1c2", "c1d1", "c1b1", "e1e2", "e1f1", "e1d1"] + +# Apply action "c5c4" +action: 10 + +# State 1 +# 5ox.xox +# 4xoooxo +# 3oxoxox +# 2xoxoxo +# 1oxoxox +# abcdef +IsTerminal() = False +History() = [10] +HistoryString() = "10" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "10" +InformationStateString(1) = "10" +ObservationString(0) = "5ox.xox\n4xoooxo\n3oxoxox\n2xoxoxo\n1oxoxox\n abcdef\n" +ObservationString(1) = "5ox.xox\n4xoooxo\n3oxoxox\n2xoxoxo\n1oxoxox\n abcdef\n" +ObservationTensor(0): +◉◯◯◯◉◯ ◯◉◯◉◯◉ ◯◯◉◯◯◯ +◯◉◉◉◯◉ ◉◯◯◯◉◯ ◯◯◯◯◯◯ +◉◯◉◯◉◯ ◯◉◯◉◯◉ ◯◯◯◯◯◯ +◯◉◯◉◯◉ ◉◯◉◯◉◯ ◯◯◯◯◯◯ +◉◯◉◯◉◯ ◯◉◯◉◯◉ ◯◯◯◯◯◯ +ObservationTensor(1): +◯◉◯◉◯◉ ◉◯◯◯◉◯ ◯◯◉◯◯◯ +◉◯◯◯◉◯ ◯◉◉◉◯◉ ◯◯◯◯◯◯ +◯◉◯◉◯◉ ◉◯◉◯◉◯ ◯◯◯◯◯◯ +◉◯◉◯◉◯ ◯◉◯◉◯◉ ◯◯◯◯◯◯ +◯◉◯◉◯◉ ◉◯◉◯◉◯ ◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [6, 7, 13, 14, 22, 23, 24, 25, 26, 40, 41, 42, 43, 52, 53, 54, 55, 60, 61, 62, 63, 68, 70, 71, 72, 73, 74, 80, 81, 82, 83, 88, 89, 90, 91, 100, 101, 103, 108, 109, 111, 116, 119] +StringLegalActions() = ["b5b4", "b5a5", "d5e5", "d5d4", "f5f4", "f5e5", "a4a5", "a4b4", "a4a3", "e4e5", "e4f4", "e4e3", "e4d4", "b3b4", "b3c3", "b3b2", "b3a3", "d3d4", "d3e3", "d3d2", "d3c3", "f3f4", "f3f2", "f3e3", "a2a3", "a2b2", "a2a1", "c2c3", "c2d2", "c2c1", "c2b2", "e2e3", "e2f2", "e2e1", "e2d2", "b1b2", "b1c1", "b1a1", "d1d2", "d1e1", "d1c1", "f1f2", "f1e1"] + +# Apply action "b1a1" +action: 103 + +# State 2 +# 5ox.xox +# 4xoooxo +# 3oxoxox +# 2xoxoxo +# 1x.oxox +# abcdef +IsTerminal() = False +History() = [10, 103] +HistoryString() = "10, 103" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "10, 103" +InformationStateString(1) = "10, 103" +ObservationString(0) = "5ox.xox\n4xoooxo\n3oxoxox\n2xoxoxo\n1x.oxox\n abcdef\n" +ObservationString(1) = "5ox.xox\n4xoooxo\n3oxoxox\n2xoxoxo\n1x.oxox\n abcdef\n" +ObservationTensor(0): +◉◯◯◯◉◯ ◯◉◯◉◯◉ ◯◯◉◯◯◯ +◯◉◉◉◯◉ ◉◯◯◯◉◯ ◯◯◯◯◯◯ +◉◯◉◯◉◯ ◯◉◯◉◯◉ ◯◯◯◯◯◯ +◯◉◯◉◯◉ ◉◯◉◯◉◯ ◯◯◯◯◯◯ +◯◯◉◯◉◯ ◉◯◯◉◯◉ ◯◉◯◯◯◯ +ObservationTensor(1): +◯◉◯◉◯◉ ◉◯◯◯◉◯ ◯◯◉◯◯◯ +◉◯◯◯◉◯ ◯◉◉◉◯◉ ◯◯◯◯◯◯ +◯◉◯◉◯◉ ◉◯◉◯◉◯ ◯◯◯◯◯◯ +◉◯◉◯◉◯ ◯◉◯◉◯◉ ◯◯◯◯◯◯ +◉◯◯◉◯◉ ◯◯◉◯◉◯ ◯◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 17, 18, 19, 28, 30, 31, 36, 37, 38, 44, 46, 47, 48, 49, 50, 57, 58, 59, 64, 65, 66, 67, 76, 77, 79, 84, 85, 86, 87, 92, 94, 95, 104, 105, 112, 113, 115] +StringLegalActions() = ["a5b5", "a5a4", "e5f5", "e5e4", "e5d5", "b4b5", "b4b3", "b4a4", "d4d5", "d4e4", "d4d3", "f4f5", "f4f3", "f4e4", "a3a4", "a3b3", "a3a2", "c3d3", "c3c2", "c3b3", "e3e4", "e3f3", "e3e2", "e3d3", "b2b3", "b2c2", "b2a2", "d2d3", "d2e2", "d2d1", "d2c2", "f2f3", "f2f1", "f2e2", "c1c2", "c1d1", "e1e2", "e1f1", "e1d1"] + +# Apply action "e5f5" +action: 17 + +# State 3 +# 5ox.x.o +# 4xoooxo +# 3oxoxox +# 2xoxoxo +# 1x.oxox +# abcdef +IsTerminal() = False +History() = [10, 103, 17] +HistoryString() = "10, 103, 17" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "10, 103, 17" +InformationStateString(1) = "10, 103, 17" +ObservationString(0) = "5ox.x.o\n4xoooxo\n3oxoxox\n2xoxoxo\n1x.oxox\n abcdef\n" +ObservationString(1) = "5ox.x.o\n4xoooxo\n3oxoxox\n2xoxoxo\n1x.oxox\n abcdef\n" +ObservationTensor(0): +◉◯◯◯◯◉ ◯◉◯◉◯◯ ◯◯◉◯◉◯ +◯◉◉◉◯◉ ◉◯◯◯◉◯ ◯◯◯◯◯◯ +◉◯◉◯◉◯ ◯◉◯◉◯◉ ◯◯◯◯◯◯ +◯◉◯◉◯◉ ◉◯◉◯◉◯ ◯◯◯◯◯◯ +◯◯◉◯◉◯ ◉◯◯◉◯◉ ◯◉◯◯◯◯ +ObservationTensor(1): +◯◉◯◉◯◯ ◉◯◯◯◯◉ ◯◯◉◯◉◯ +◉◯◯◯◉◯ ◯◉◉◉◯◉ ◯◯◯◯◯◯ +◯◉◯◉◯◉ ◉◯◉◯◉◯ ◯◯◯◯◯◯ +◉◯◉◯◉◯ ◯◉◯◉◯◉ ◯◯◯◯◯◯ +◉◯◯◉◯◉ ◯◯◉◯◉◯ ◯◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [6, 7, 14, 24, 25, 26, 41, 42, 43, 52, 53, 54, 55, 60, 61, 62, 63, 68, 70, 71, 72, 73, 80, 81, 82, 83, 88, 89, 90, 91, 108, 109, 111, 116, 119] +StringLegalActions() = ["b5b4", "b5a5", "d5d4", "a4a5", "a4b4", "a4a3", "e4f4", "e4e3", "e4d4", "b3b4", "b3c3", "b3b2", "b3a3", "d3d4", "d3e3", "d3d2", "d3c3", "f3f4", "f3f2", "f3e3", "a2a3", "a2b2", "c2c3", "c2d2", "c2c1", "c2b2", "e2e3", "e2f2", "e2e1", "e2d2", "d1d2", "d1e1", "d1c1", "f1f2", "f1e1"] + +# Apply action "b3a3" +action: 55 + +# State 4 +# 5ox.x.o +# 4xoooxo +# 3x.oxox +# 2xoxoxo +# 1x.oxox +# abcdef +IsTerminal() = False +History() = [10, 103, 17, 55] +HistoryString() = "10, 103, 17, 55" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "10, 103, 17, 55" +InformationStateString(1) = "10, 103, 17, 55" +ObservationString(0) = "5ox.x.o\n4xoooxo\n3x.oxox\n2xoxoxo\n1x.oxox\n abcdef\n" +ObservationString(1) = "5ox.x.o\n4xoooxo\n3x.oxox\n2xoxoxo\n1x.oxox\n abcdef\n" +ObservationTensor(0): +◉◯◯◯◯◉ ◯◉◯◉◯◯ ◯◯◉◯◉◯ +◯◉◉◉◯◉ ◉◯◯◯◉◯ ◯◯◯◯◯◯ +◯◯◉◯◉◯ ◉◯◯◉◯◉ ◯◉◯◯◯◯ +◯◉◯◉◯◉ ◉◯◉◯◉◯ ◯◯◯◯◯◯ +◯◯◉◯◉◯ ◉◯◯◉◯◉ ◯◉◯◯◯◯ +ObservationTensor(1): +◯◉◯◉◯◯ ◉◯◯◯◯◉ ◯◯◉◯◉◯ +◉◯◯◯◉◯ ◯◉◉◉◯◉ ◯◯◯◯◯◯ +◉◯◯◉◯◉ ◯◯◉◯◉◯ ◯◉◯◯◯◯ +◉◯◉◯◉◯ ◯◉◯◉◯◉ ◯◯◯◯◯◯ +◉◯◯◉◯◉ ◯◯◉◯◉◯ ◯◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 28, 31, 36, 37, 38, 46, 47, 57, 58, 64, 65, 66, 67, 77, 79, 84, 85, 86, 87, 92, 94, 95, 104, 105, 112, 113, 115] +StringLegalActions() = ["a5b5", "a5a4", "b4b5", "b4a4", "d4d5", "d4e4", "d4d3", "f4f3", "f4e4", "c3d3", "c3c2", "e3e4", "e3f3", "e3e2", "e3d3", "b2c2", "b2a2", "d2d3", "d2e2", "d2d1", "d2c2", "f2f3", "f2f1", "f2e2", "c1c2", "c1d1", "e1e2", "e1f1", "e1d1"] + +# Apply action "e3e4" +action: 64 + +# State 5 +# 5ox.x.o +# 4xooooo +# 3x.ox.x +# 2xoxoxo +# 1x.oxox +# abcdef +IsTerminal() = False +History() = [10, 103, 17, 55, 64] +HistoryString() = "10, 103, 17, 55, 64" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "10, 103, 17, 55, 64" +InformationStateString(1) = "10, 103, 17, 55, 64" +ObservationString(0) = "5ox.x.o\n4xooooo\n3x.ox.x\n2xoxoxo\n1x.oxox\n abcdef\n" +ObservationString(1) = "5ox.x.o\n4xooooo\n3x.ox.x\n2xoxoxo\n1x.oxox\n abcdef\n" +ObservationTensor(0): +◉◯◯◯◯◉ ◯◉◯◉◯◯ ◯◯◉◯◉◯ +◯◉◉◉◉◉ ◉◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◉◯◯◯ ◉◯◯◉◯◉ ◯◉◯◯◉◯ +◯◉◯◉◯◉ ◉◯◉◯◉◯ ◯◯◯◯◯◯ +◯◯◉◯◉◯ ◉◯◯◉◯◉ ◯◉◯◯◯◯ +ObservationTensor(1): +◯◉◯◉◯◯ ◉◯◯◯◯◉ ◯◯◉◯◉◯ +◉◯◯◯◯◯ ◯◉◉◉◉◉ ◯◯◯◯◯◯ +◉◯◯◉◯◉ ◯◯◉◯◯◯ ◯◉◯◯◉◯ +◉◯◉◯◉◯ ◯◉◯◉◯◉ ◯◯◯◯◯◯ +◉◯◯◉◯◉ ◯◯◉◯◉◯ ◯◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [6, 7, 14, 24, 25, 60, 62, 63, 68, 70, 73, 80, 81, 82, 83, 89, 90, 91, 108, 109, 111, 116, 119] +StringLegalActions() = ["b5b4", "b5a5", "d5d4", "a4a5", "a4b4", "d3d4", "d3d2", "d3c3", "f3f4", "f3f2", "a2b2", "c2c3", "c2d2", "c2c1", "c2b2", "e2f2", "e2e1", "e2d2", "d1d2", "d1e1", "d1c1", "f1f2", "f1e1"] + +# Apply action "f3f4" +action: 68 + +# State 6 +# Apply action "d4d3" +action: 38 + +# State 7 +# Apply action "f1f2" +action: 116 + +# State 8 +# Apply action "c1c2" +action: 104 + +# State 9 +# Apply action "d1e1" +action: 109 + +# State 10 +# Apply action "e4f4" +action: 41 + +# State 11 +# Apply action "b5a5" +action: 7 + +# State 12 +# Apply action "d2e2" +action: 85 + +# State 13 +# Apply action "f2e2" +action: 95 + +# State 14 +# Apply action "b4a4" +action: 31 + +# State 15 +# Apply action "a3a4" +action: 48 + +# State 16 +# Apply action "b2a2" +action: 79 + +# State 17 +# Apply action "a1a2" +action: 96 + +# State 18 +# 5x..x.o +# 4x.o..o +# 3..oo.. +# 2x.o.x. +# 1....x. +# abcdef +IsTerminal() = True +History() = [10, 103, 17, 55, 64, 68, 38, 116, 104, 109, 41, 7, 85, 95, 31, 48, 79, 96] +HistoryString() = "10, 103, 17, 55, 64, 68, 38, 116, 104, 109, 41, 7, 85, 95, 31, 48, 79, 96" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "10, 103, 17, 55, 64, 68, 38, 116, 104, 109, 41, 7, 85, 95, 31, 48, 79, 96" +InformationStateString(1) = "10, 103, 17, 55, 64, 68, 38, 116, 104, 109, 41, 7, 85, 95, 31, 48, 79, 96" +ObservationString(0) = "5x..x.o\n4x.o..o\n3..oo..\n2x.o.x.\n1....x.\n abcdef\n" +ObservationString(1) = "5x..x.o\n4x.o..o\n3..oo..\n2x.o.x.\n1....x.\n abcdef\n" +ObservationTensor(0): +◯◯◯◯◯◉ ◉◯◯◉◯◯ ◯◉◉◯◉◯ +◯◯◉◯◯◉ ◉◯◯◯◯◯ ◯◉◯◉◉◯ +◯◯◉◉◯◯ ◯◯◯◯◯◯ ◉◉◯◯◉◉ +◯◯◉◯◯◯ ◉◯◯◯◉◯ ◯◉◯◉◯◉ +◯◯◯◯◯◯ ◯◯◯◯◉◯ ◉◉◉◉◯◉ +ObservationTensor(1): +◉◯◯◉◯◯ ◯◯◯◯◯◉ ◯◉◉◯◉◯ +◉◯◯◯◯◯ ◯◯◉◯◯◉ ◯◉◯◉◉◯ +◯◯◯◯◯◯ ◯◯◉◉◯◯ ◉◉◯◯◉◉ +◉◯◯◯◉◯ ◯◯◉◯◯◯ ◯◉◯◉◯◉ +◯◯◯◯◉◯ ◯◯◯◯◯◯ ◉◉◉◉◯◉ +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/coin_game.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/coin_game.txt new file mode 100644 index 0000000..c14172c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/coin_game.txt @@ -0,0 +1,430 @@ +game: coin_game + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "The Coin Game" +GameType.max_num_players = 10 +GameType.min_num_players = 1 +GameType.parameter_specification = ["columns", "episode_length", "num_coins_per_color", "num_extra_coin_colors", "players", "rows"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "coin_game" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 5 +PolicyTensorShape() = [5] +MaxChanceOutcomes() = 64 +GetParameters() = {columns=8,episode_length=20,num_coins_per_color=4,num_extra_coin_colors=1,players=2,rows=8} +NumPlayers() = 2 +MinUtility() = -144.0 +MaxUtility() = 144.0 +UtilitySum() = None +MaxGameLength() = 20 +ToString() = "coin_game()" + +# State 0 +# phase=AssignPreferences +# preferences= +# moves=0 +# a b c +# player0 0 0 0 +# player1 0 0 0 +# +--------+ +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# +--------+ +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "0\n a b c \nplayer0 0 0 0 \nplayer1 0 0 0 \n+--------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+--------+\n" +ObservationString(1) = "0\n a b c \nplayer0 0 0 0 \nplayer1 0 0 0 \n+--------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+--------+\n" +ChanceOutcomes() = [(0,0.333333), (1,0.333333), (2,0.333333)] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["0", "1", "2"] + +# Apply action "1" +action: 1 + +# State 1 +# phase=AssignPreferences +# preferences=0:b +# moves=0 +# a b c +# player0 0 0 0 +# player1 0 0 0 +# +--------+ +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# | | +# +--------+ +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "1\n a b c \nplayer0 0 0 0 \nplayer1 0 0 0 \n+--------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+--------+\n" +ObservationString(1) = "0\n a b c \nplayer0 0 0 0 \nplayer1 0 0 0 \n+--------+\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n| |\n+--------+\n" +ChanceOutcomes() = [(0,0.5), (2,0.5)] +LegalActions() = [0, 2] +StringLegalActions() = ["0", "2"] + +# Apply action "2" +action: 2 + +# State 2 +# Apply action "62" +action: 62 + +# State 3 +# Apply action "50" +action: 50 + +# State 4 +# Apply action "34" +action: 34 + +# State 5 +# Apply action "9" +action: 9 + +# State 6 +# Apply action "49" +action: 49 + +# State 7 +# Apply action "25" +action: 25 + +# State 8 +# Apply action "60" +action: 60 + +# State 9 +# Apply action "42" +action: 42 + +# State 10 +# Apply action "55" +action: 55 + +# State 11 +# Apply action "19" +action: 19 + +# State 12 +# Apply action "17" +action: 17 + +# State 13 +# Apply action "44" +action: 44 + +# State 14 +# Apply action "35" +action: 35 + +# State 15 +# Apply action "47" +action: 47 + +# State 16 +# phase=Play +# preferences=0:b 1:c +# moves=0 +# a b c +# player0 0 0 0 +# player1 0 0 0 +# +--------+ +# | | +# | a | +# | c b | +# | a | +# | ac | +# | b c c| +# | a1 b| +# | b 0 | +# +--------+ +IsTerminal() = False +History() = [1, 2, 62, 50, 34, 9, 49, 25, 60, 42, 55, 19, 17, 44, 35, 47] +HistoryString() = "1, 2, 62, 50, 34, 9, 49, 25, 60, 42, 55, 19, 17, 44, 35, 47" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1\n a b c \nplayer0 0 0 0 \nplayer1 0 0 0 \n+--------+\n| |\n| a |\n| c b |\n| a |\n| ac |\n| b c c|\n| a1 b|\n| b 0 |\n+--------+\n" +ObservationString(1) = "2\n a b c \nplayer0 0 0 0 \nplayer1 0 0 0 \n+--------+\n| |\n| a |\n| c b |\n| a |\n| ac |\n| b c c|\n| a1 b|\n| b 0 |\n+--------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["up", "down", "left", "right", "stand"] + +# Apply action "stand" +action: 4 + +# State 17 +# phase=Play +# preferences=0:b 1:c +# moves=1 +# a b c +# player0 0 0 0 +# player1 0 0 0 +# +--------+ +# | | +# | a | +# | c b | +# | a | +# | ac | +# | b c c| +# | a1 b| +# | b 0 | +# +--------+ +IsTerminal() = False +History() = [1, 2, 62, 50, 34, 9, 49, 25, 60, 42, 55, 19, 17, 44, 35, 47, 4] +HistoryString() = "1, 2, 62, 50, 34, 9, 49, 25, 60, 42, 55, 19, 17, 44, 35, 47, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1\n a b c \nplayer0 0 0 0 \nplayer1 0 0 0 \n+--------+\n| |\n| a |\n| c b |\n| a |\n| ac |\n| b c c|\n| a1 b|\n| b 0 |\n+--------+\n" +ObservationString(1) = "2\n a b c \nplayer0 0 0 0 \nplayer1 0 0 0 \n+--------+\n| |\n| a |\n| c b |\n| a |\n| ac |\n| b c c|\n| a1 b|\n| b 0 |\n+--------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["up", "down", "left", "right", "stand"] + +# Apply action "up" +action: 0 + +# State 18 +# phase=Play +# preferences=0:b 1:c +# moves=2 +# a b c +# player0 0 0 0 +# player1 0 1 0 +# +--------+ +# | | +# | a | +# | c b | +# | a | +# | ac | +# | 1 c c| +# | a b| +# | b 0 | +# +--------+ +IsTerminal() = False +History() = [1, 2, 62, 50, 34, 9, 49, 25, 60, 42, 55, 19, 17, 44, 35, 47, 4, 0] +HistoryString() = "1, 2, 62, 50, 34, 9, 49, 25, 60, 42, 55, 19, 17, 44, 35, 47, 4, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1\n a b c \nplayer0 0 0 0 \nplayer1 0 1 0 \n+--------+\n| |\n| a |\n| c b |\n| a |\n| ac |\n| 1 c c|\n| a b|\n| b 0 |\n+--------+\n" +ObservationString(1) = "2\n a b c \nplayer0 0 0 0 \nplayer1 0 1 0 \n+--------+\n| |\n| a |\n| c b |\n| a |\n| ac |\n| 1 c c|\n| a b|\n| b 0 |\n+--------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["up", "down", "left", "right", "stand"] + +# Apply action "right" +action: 3 + +# State 19 +# phase=Play +# preferences=0:b 1:c +# moves=3 +# a b c +# player0 0 0 0 +# player1 0 1 0 +# +--------+ +# | | +# | a | +# | c b | +# | a | +# | ac | +# | 1 c c| +# | a b| +# | b 0| +# +--------+ +IsTerminal() = False +History() = [1, 2, 62, 50, 34, 9, 49, 25, 60, 42, 55, 19, 17, 44, 35, 47, 4, 0, 3] +HistoryString() = "1, 2, 62, 50, 34, 9, 49, 25, 60, 42, 55, 19, 17, 44, 35, 47, 4, 0, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1\n a b c \nplayer0 0 0 0 \nplayer1 0 1 0 \n+--------+\n| |\n| a |\n| c b |\n| a |\n| ac |\n| 1 c c|\n| a b|\n| b 0|\n+--------+\n" +ObservationString(1) = "2\n a b c \nplayer0 0 0 0 \nplayer1 0 1 0 \n+--------+\n| |\n| a |\n| c b |\n| a |\n| ac |\n| 1 c c|\n| a b|\n| b 0|\n+--------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["up", "down", "left", "right", "stand"] + +# Apply action "right" +action: 3 + +# State 20 +# phase=Play +# preferences=0:b 1:c +# moves=4 +# a b c +# player0 0 0 0 +# player1 0 1 0 +# +--------+ +# | | +# | a | +# | c b | +# | a | +# | ac | +# | 1c c| +# | a b| +# | b 0| +# +--------+ +IsTerminal() = False +History() = [1, 2, 62, 50, 34, 9, 49, 25, 60, 42, 55, 19, 17, 44, 35, 47, 4, 0, 3, 3] +HistoryString() = "1, 2, 62, 50, 34, 9, 49, 25, 60, 42, 55, 19, 17, 44, 35, 47, 4, 0, 3, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1\n a b c \nplayer0 0 0 0 \nplayer1 0 1 0 \n+--------+\n| |\n| a |\n| c b |\n| a |\n| ac |\n| 1c c|\n| a b|\n| b 0|\n+--------+\n" +ObservationString(1) = "2\n a b c \nplayer0 0 0 0 \nplayer1 0 1 0 \n+--------+\n| |\n| a |\n| c b |\n| a |\n| ac |\n| 1c c|\n| a b|\n| b 0|\n+--------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["up", "down", "left", "right", "stand"] + +# Apply action "left" +action: 2 + +# State 21 +# phase=Play +# preferences=0:b 1:c +# moves=5 +# a b c +# player0 0 0 0 +# player1 0 1 0 +# +--------+ +# | | +# | a | +# | c b | +# | a | +# | ac | +# | 1c c| +# | a b| +# | b 0 | +# +--------+ +IsTerminal() = False +History() = [1, 2, 62, 50, 34, 9, 49, 25, 60, 42, 55, 19, 17, 44, 35, 47, 4, 0, 3, 3, 2] +HistoryString() = "1, 2, 62, 50, 34, 9, 49, 25, 60, 42, 55, 19, 17, 44, 35, 47, 4, 0, 3, 3, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1\n a b c \nplayer0 0 0 0 \nplayer1 0 1 0 \n+--------+\n| |\n| a |\n| c b |\n| a |\n| ac |\n| 1c c|\n| a b|\n| b 0 |\n+--------+\n" +ObservationString(1) = "2\n a b c \nplayer0 0 0 0 \nplayer1 0 1 0 \n+--------+\n| |\n| a |\n| c b |\n| a |\n| ac |\n| 1c c|\n| a b|\n| b 0 |\n+--------+\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["up", "down", "left", "right", "stand"] + +# Apply action "down" +action: 1 + +# State 22 +# Apply action "left" +action: 2 + +# State 23 +# Apply action "down" +action: 1 + +# State 24 +# Apply action "up" +action: 0 + +# State 25 +# Apply action "up" +action: 0 + +# State 26 +# Apply action "stand" +action: 4 + +# State 27 +# Apply action "stand" +action: 4 + +# State 28 +# Apply action "right" +action: 3 + +# State 29 +# Apply action "left" +action: 2 + +# State 30 +# Apply action "down" +action: 1 + +# State 31 +# Apply action "down" +action: 1 + +# State 32 +# Apply action "up" +action: 0 + +# State 33 +# Apply action "left" +action: 2 + +# State 34 +# Apply action "up" +action: 0 + +# State 35 +# Apply action "stand" +action: 4 + +# State 36 +# phase=Play +# preferences=0:b 1:c +# moves=20 +# a b c +# player0 0 0 0 +# player1 0 1 0 +# +--------+ +# | | +# | a | +# | c b | +# | a | +# | ac | +# | c 0c| +# | a b| +# | 1 b | +# +--------+ +IsTerminal() = True +History() = [1, 2, 62, 50, 34, 9, 49, 25, 60, 42, 55, 19, 17, 44, 35, 47, 4, 0, 3, 3, 2, 1, 2, 1, 0, 0, 4, 4, 3, 2, 1, 1, 0, 2, 0, 4] +HistoryString() = "1, 2, 62, 50, 34, 9, 49, 25, 60, 42, 55, 19, 17, 44, 35, 47, 4, 0, 3, 3, 2, 1, 2, 1, 0, 0, 4, 4, 3, 2, 1, 1, 0, 2, 0, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "1\n a b c \nplayer0 0 0 0 \nplayer1 0 1 0 \n+--------+\n| |\n| a |\n| c b |\n| a |\n| ac |\n| c 0c|\n| a b|\n| 1 b |\n+--------+\n" +ObservationString(1) = "2\n a b c \nplayer0 0 0 0 \nplayer1 0 1 0 \n+--------+\n| |\n| a |\n| c b |\n| a |\n| ac |\n| c 0c|\n| a b|\n| 1 b |\n+--------+\n" +Rewards() = [1, 1] +Returns() = [1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/colored_trails.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/colored_trails.txt new file mode 100644 index 0000000..4ef848b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/colored_trails.txt @@ -0,0 +1,209 @@ +game: colored_trails + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Colored Trails" +GameType.max_num_players = 3 +GameType.min_num_players = 3 +GameType.parameter_specification = ["board_size", "boards_file", "num_colors", "players"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "colored_trails" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 93123 +PolicyTensorShape() = [93123] +MaxChanceOutcomes() = 10 +GetParameters() = {board_size=4,boards_file=,num_colors=5,players=3} +NumPlayers() = 3 +MinUtility() = -400.0 +MaxUtility() = 150.0 +UtilitySum() = None +InformationStateTensorShape() = [463] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 463 +ObservationTensorShape() = [463] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 463 +MaxGameLength() = 3 +ToString() = "colored_trails()" + +# State 0 +# Initial chance node +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "@@@@\n@@@@\n@@@@\n@@@@\n\nPlayer: 0\nPos: -1 -1 -1 -1\nMy chips: \nResponder chips: \n" +InformationStateString(1) = "@@@@\n@@@@\n@@@@\n@@@@\n\nPlayer: 1\nPos: -1 -1 -1 -1\nMy chips: \nResponder chips: \n" +InformationStateString(2) = "@@@@\n@@@@\n@@@@\n@@@@\n\nPlayer: 2\nPos: -1 -1 -1 -1\nP0 chips: \nP1 chips: \n" +InformationStateTensor(0): zeros(463) +InformationStateTensor(1): zeros(463) +InformationStateTensor(2): zeros(463) +ObservationString(0) = "@@@@\n@@@@\n@@@@\n@@@@\n\nPlayer: 0\nPos: -1 -1 -1 -1\nMy chips: \nResponder chips: \n" +ObservationString(1) = "@@@@\n@@@@\n@@@@\n@@@@\n\nPlayer: 1\nPos: -1 -1 -1 -1\nMy chips: \nResponder chips: \n" +ObservationString(2) = "@@@@\n@@@@\n@@@@\n@@@@\n\nPlayer: 2\nPos: -1 -1 -1 -1\nP0 chips: \nP1 chips: \n" +ObservationTensor(0): zeros(463) +ObservationTensor(1): zeros(463) +ObservationTensor(2): zeros(463) +ChanceOutcomes() = [(0,0.1), (1,0.1), (2,0.1), (3,0.1), (4,0.1), (5,0.1), (6,0.1), (7,0.1), (8,0.1), (9,0.1)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +StringLegalActions() = ["Chance outcome 0", "Chance outcome 1", "Chance outcome 2", "Chance outcome 3", "Chance outcome 4", "Chance outcome 5", "Chance outcome 6", "Chance outcome 7", "Chance outcome 8", "Chance outcome 9"] + +# Apply action "Chance outcome 5" +action: 5 + +# State 1 +# Move Number: 1 +# BACB +# BEAA +# DBDC +# ECAE +# +# P0 chips: ABCCCDD +# P1 chips: BCDDEE +# P2 chips: ACCCEEE +# Pos: 0 7 5 13 +IsTerminal() = False +History() = [5] +HistoryString() = "5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 0\nPos: 0 7 5 13\nMy chips: ABCCCDD\nResponder chips: ACCCEEE\n" +InformationStateString(1) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 1\nPos: 0 7 5 13\nMy chips: BCDDEE\nResponder chips: ACCCEEE\n" +InformationStateString(2) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 2\nPos: 0 7 5 13\nP0 chips: ABCCCDD\nP1 chips: BCDDEE\n" +InformationStateTensor(0): binvec(463, 0x422044203080902204900c00000800200000260301e0e040201008040201808078201e0000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(463, 0x222044203080902204900c0000080020000024030180e070201008040201808078201e0000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(463, 0x122044203080902204900c00000800200000260301e0e04020180c070381808078201e0000000000000000000000000000000000000000000000) +ObservationString(0) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 0\nPos: 0 7 5 13\nMy chips: ABCCCDD\nResponder chips: ACCCEEE\n" +ObservationString(1) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 1\nPos: 0 7 5 13\nMy chips: BCDDEE\nResponder chips: ACCCEEE\n" +ObservationString(2) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 2\nPos: 0 7 5 13\nP0 chips: ABCCCDD\nP1 chips: BCDDEE\n" +ObservationTensor(0): binvec(463, 0x422044203080902204900c00000800200000260301e0e040201008040201808078201e0000000000000000000000000000000000000000000000) +ObservationTensor(1): binvec(463, 0x222044203080902204900c0000080020000024030180e070201008040201808078201e0000000000000000000000000000000000000000000000) +ObservationTensor(2): binvec(463, 0x122044203080902204900c00000800200000260301e0e04020180c070381808078201e0000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [330, 331, 332, 383, 384, 385, 409, 410, 411, 443, 444, 445, 486, 487, 488, 3952, 3953, 3955, 3956, 3957, 3959, 3963, 3964, 4439, 4442, 4450, 4451, 4926, 4928, 4929, 4934, 4935, 4936, 4937, 4938, 5901, 5903, 5904, 5906, 5909, 5910, 5912, 5915, 5919, 6553, 6556, 6560, 6561, 6568, 6569, 6570, 6711, 6713, 6714, 6715, 6716, 6717, 6718, 6719, 6720, 6721, 6722, 6723, 6724, 6725, 6726, 6727, 6728, 7030, 7036, 7046, 7192, 7196, 7204, 7205, 7222, 7223, 7224, 7680, 7683, 7687, 7688, 7695, 7696, 7697, 7998, 7999, 8003, 8004, 8005, 8012, 8013, 8014, 8015, 8161, 8164, 8165, 8170, 8171, 8172, 8173, 8174, 8185, 8186, 8187, 8188, 8189, 8190, 8191, 8192, 8193, 9464, 9466, 9469, 9470, 9472, 9475, 9479, 9482, 9485, 9489, 10761, 10765, 10801, 10809, 10810, 10817, 10818, 10819, 10830, 10831, 10832, 10960, 10964, 10965, 10966, 10967, 10968, 10969, 10970, 10971, 11041, 11042, 11043, 11044, 11045, 11046, 11047, 11049, 11050, 11051, 11083, 11088, 11089, 11090, 11091, 11092, 11093, 11094, 11095, 11096, 11097, 11098, 11099, 11100, 11101, 11102, 11103, 11104, 11106, 11107, 11108, 11109, 11110, 11111, 11112, 11113, 11114, 11494, 11504, 11659, 11669, 11815, 11828, 11829, 11846, 11847, 11848, 11879, 11880, 11881, 12304, 12312, 12313, 12320, 12321, 12322, 12333, 12334, 12335, 12627, 12631, 12667, 12675, 12676, 12683, 12684, 12685, 12696, 12697, 12698, 13076, 13077, 13078, 13085, 13086, 13087, 13088, 13100, 13101, 13102, 13241, 13242, 13243, 13250, 13251, 13252, 13253, 13265, 13266, 13267, 15080, 15082, 15085, 15089, 15092, 15095, 15099, 15110, 15114, 17279, 17324, 17360, 17377, 17378, 17379, 17390, 17391, 17392, 17409, 17410, 17411, 17520, 17527, 17528, 17529, 17530, 17531, 17532, 17534, 17535, 17536, 17603, 17609, 17616, 17617, 17618, 17619, 17620, 17621, 17623, 17624, 17625, 17708, 17709, 17710, 17711, 17713, 17714, 17715, 17719, 17720, 17753, 17754, 17755, 17756, 17758, 17759, 17760, 17764, 17765, 18310, 18475, 18640, 19276, 19293, 19294, 19295, 19306, 19307, 19308, 19325, 19326, 19327, 19605, 19641, 19658, 19659, 19660, 19671, 19672, 19673, 19690, 19691, 19692, 20055, 20100, 20651, 20652, 20653, 20654, 20666, 20667, 20668, 20687, 20688, 20816, 20817, 20818, 20819, 20831, 20832, 20833, 20852, 20853, 23242, 23245, 23249, 23260, 23264, 23285, 26922, 26933, 26934, 26935, 26937, 26938, 26939, 26942, 26943, 26944, 27013, 27024, 27025, 27026, 27028, 27029, 27030, 27033, 27034, 27035, 27249, 27250, 27251, 27255, 27256, 27262, 27294, 27295, 27296, 27300, 27301, 27307, 29498, 29529, 29530, 29531, 29548, 29549, 29550, 29574, 29575, 29576, 31272, 31273, 31274, 31293, 31294, 31321, 31437, 31438, 31439, 31458, 31459, 31486, 34497, 34501, 34522, 39852, 39868, 39869, 39870, 39873, 39874, 39875, 39879, 39880, 39881, 40260, 40261, 40267, 40305, 40306, 40312, 45548, 45549, 45576, 45713, 45714, 45741, 49455, 57405, 57450, 64156, 64321, 93122] +StringLegalActions() = ["Proposer 0: A for C", "Proposer 0: A for CC", "Proposer 0: A for CCC", "Proposer 0: A for CCCE", "Proposer 0: A for CCCEE", "Proposer 0: A for CCCEEE", "Proposer 0: A for CCE", "Proposer 0: A for CCEE", "Proposer 0: A for CCEEE", "Proposer 0: A for CE", "Proposer 0: A for CEE", "Proposer 0: A for CEEE", "Proposer 0: A for E", "Proposer 0: A for EE", "Proposer 0: A for EEE", "Proposer 0: B for A", "Proposer 0: B for C", "Proposer 0: B for E", "Proposer 0: AB for C", "Proposer 0: AB for CC", "Proposer 0: AB for CE", "Proposer 0: AB for E", "Proposer 0: AB for EE", "Proposer 0: C for A", "Proposer 0: C for E", "Proposer 0: AC for E", "Proposer 0: AC for EE", "Proposer 0: D for A", "Proposer 0: D for C", "Proposer 0: D for E", "Proposer 0: AD for C", "Proposer 0: AD for CC", "Proposer 0: AD for CE", "Proposer 0: AD for E", "Proposer 0: AD for EE", "Proposer 0: B for AC", "Proposer 0: B for AE", "Proposer 0: B for CC", "Proposer 0: B for CE", "Proposer 0: B for EE", "Proposer 0: AB for CCC", "Proposer 0: AB for CCE", "Proposer 0: AB for CEE", "Proposer 0: AB for EEE", "Proposer 0: BC for A", "Proposer 0: BC for AE", "Proposer 0: BC for E", "Proposer 0: BC for EE", "Proposer 0: ABC for E", "Proposer 0: ABC for EE", "Proposer 0: ABC for EEE", "Proposer 0: BD for A", "Proposer 0: BD for AC", "Proposer 0: BD for AE", "Proposer 0: BD for C", "Proposer 0: BD for CC", "Proposer 0: BD for CE", "Proposer 0: BD for E", "Proposer 0: BD for EE", "Proposer 0: ABD for C", "Proposer 0: ABD for CC", "Proposer 0: ABD for CCC", "Proposer 0: ABD for CCE", "Proposer 0: ABD for CE", "Proposer 0: ABD for CEE", "Proposer 0: ABD for E", "Proposer 0: ABD for EE", "Proposer 0: ABD for EEE", "Proposer 0: C for AE", "Proposer 0: C for EE", "Proposer 0: AC for EEE", "Proposer 0: CC for A", "Proposer 0: CC for AE", "Proposer 0: CC for E", "Proposer 0: CC for EE", "Proposer 0: ACC for E", "Proposer 0: ACC for EE", "Proposer 0: ACC for EEE", "Proposer 0: CD for A", "Proposer 0: CD for AE", "Proposer 0: CD for E", "Proposer 0: CD for EE", "Proposer 0: ACD for E", "Proposer 0: ACD for EE", "Proposer 0: ACD for EEE", "Proposer 0: D for AC", "Proposer 0: D for AE", "Proposer 0: D for CC", "Proposer 0: D for CE", "Proposer 0: D for EE", "Proposer 0: AD for CCC", "Proposer 0: AD for CCE", "Proposer 0: AD for CEE", "Proposer 0: AD for EEE", "Proposer 0: DD for A", "Proposer 0: DD for AC", "Proposer 0: DD for AE", "Proposer 0: DD for C", "Proposer 0: DD for CC", "Proposer 0: DD for CE", "Proposer 0: DD for E", "Proposer 0: DD for EE", "Proposer 0: ADD for C", "Proposer 0: ADD for CC", "Proposer 0: ADD for CCC", "Proposer 0: ADD for CCE", "Proposer 0: ADD for CE", "Proposer 0: ADD for CEE", "Proposer 0: ADD for E", "Proposer 0: ADD for EE", "Proposer 0: ADD for EEE", "Proposer 0: B for ACC", "Proposer 0: B for ACE", "Proposer 0: B for AEE", "Proposer 0: B for CCC", "Proposer 0: B for CCE", "Proposer 0: B for CEE", "Proposer 0: B for EEE", "Proposer 0: AB for CCCE", "Proposer 0: AB for CCEE", "Proposer 0: AB for CEEE", "Proposer 0: BC for AEE", "Proposer 0: BC for EEE", "Proposer 0: BCC for A", "Proposer 0: BCC for AE", "Proposer 0: BCC for AEE", "Proposer 0: BCC for E", "Proposer 0: BCC for EE", "Proposer 0: BCC for EEE", "Proposer 0: ABCC for E", "Proposer 0: ABCC for EE", "Proposer 0: ABCC for EEE", "Proposer 0: BCD for A", "Proposer 0: BCD for AE", "Proposer 0: BCD for AEE", "Proposer 0: BCD for E", "Proposer 0: BCD for EE", "Proposer 0: BCD for EEE", "Proposer 0: ABCD for E", "Proposer 0: ABCD for EE", "Proposer 0: ABCD for EEE", "Proposer 0: BD for ACC", "Proposer 0: BD for ACE", "Proposer 0: BD for AEE", "Proposer 0: BD for CCC", "Proposer 0: BD for CCE", "Proposer 0: BD for CEE", "Proposer 0: BD for EEE", "Proposer 0: ABD for CCCE", "Proposer 0: ABD for CCEE", "Proposer 0: ABD for CEEE", "Proposer 0: BDD for A", "Proposer 0: BDD for AC", "Proposer 0: BDD for ACC", "Proposer 0: BDD for ACE", "Proposer 0: BDD for AE", "Proposer 0: BDD for AEE", "Proposer 0: BDD for C", "Proposer 0: BDD for CC", "Proposer 0: BDD for CCC", "Proposer 0: BDD for CCE", "Proposer 0: BDD for CE", "Proposer 0: BDD for CEE", "Proposer 0: BDD for E", "Proposer 0: BDD for EE", "Proposer 0: BDD for EEE", "Proposer 0: ABDD for C", "Proposer 0: ABDD for CC", "Proposer 0: ABDD for CCC", "Proposer 0: ABDD for CCCE", "Proposer 0: ABDD for CCE", "Proposer 0: ABDD for CCEE", "Proposer 0: ABDD for CE", "Proposer 0: ABDD for CEE", "Proposer 0: ABDD for CEEE", "Proposer 0: ABDD for E", "Proposer 0: ABDD for EE", "Proposer 0: ABDD for EEE", "Proposer 0: C for AEE", "Proposer 0: C for EEE", "Proposer 0: CC for AEE", "Proposer 0: CC for EEE", "Proposer 0: CCC for A", "Proposer 0: CCC for AE", "Proposer 0: CCC for AEE", "Proposer 0: CCC for E", "Proposer 0: CCC for EE", "Proposer 0: CCC for EEE", "Proposer 0: ACCC for E", "Proposer 0: ACCC for EE", "Proposer 0: ACCC for EEE", "Proposer 0: CCD for A", "Proposer 0: CCD for AE", "Proposer 0: CCD for AEE", "Proposer 0: CCD for E", "Proposer 0: CCD for EE", "Proposer 0: CCD for EEE", "Proposer 0: ACCD for E", "Proposer 0: ACCD for EE", "Proposer 0: ACCD for EEE", "Proposer 0: CD for AEE", "Proposer 0: CD for EEE", "Proposer 0: CDD for A", "Proposer 0: CDD for AE", "Proposer 0: CDD for AEE", "Proposer 0: CDD for E", "Proposer 0: CDD for EE", "Proposer 0: CDD for EEE", "Proposer 0: ACDD for E", "Proposer 0: ACDD for EE", "Proposer 0: ACDD for EEE", "Proposer 0: D for ACC", "Proposer 0: D for ACE", "Proposer 0: D for AEE", "Proposer 0: D for CCC", "Proposer 0: D for CCE", "Proposer 0: D for CEE", "Proposer 0: D for EEE", "Proposer 0: AD for CCCE", "Proposer 0: AD for CCEE", "Proposer 0: AD for CEEE", "Proposer 0: DD for ACC", "Proposer 0: DD for ACE", "Proposer 0: DD for AEE", "Proposer 0: DD for CCC", "Proposer 0: DD for CCE", "Proposer 0: DD for CEE", "Proposer 0: DD for EEE", "Proposer 0: ADD for CCCE", "Proposer 0: ADD for CCEE", "Proposer 0: ADD for CEEE", "Proposer 0: B for ACCC", "Proposer 0: B for ACCE", "Proposer 0: B for ACEE", "Proposer 0: B for AEEE", "Proposer 0: B for CCCE", "Proposer 0: B for CCEE", "Proposer 0: B for CEEE", "Proposer 0: AB for CCCEE", "Proposer 0: AB for CCEEE", "Proposer 0: BC for AEEE", "Proposer 0: BCC for AEEE", "Proposer 0: BCCC for A", "Proposer 0: BCCC for AE", "Proposer 0: BCCC for AEE", "Proposer 0: BCCC for AEEE", "Proposer 0: BCCC for E", "Proposer 0: BCCC for EE", "Proposer 0: BCCC for EEE", "Proposer 0: ABCCC for E", "Proposer 0: ABCCC for EE", "Proposer 0: ABCCC for EEE", "Proposer 0: BCCD for A", "Proposer 0: BCCD for AE", "Proposer 0: BCCD for AEE", "Proposer 0: BCCD for AEEE", "Proposer 0: BCCD for E", "Proposer 0: BCCD for EE", "Proposer 0: BCCD for EEE", "Proposer 0: ABCCD for E", "Proposer 0: ABCCD for EE", "Proposer 0: ABCCD for EEE", "Proposer 0: BCD for AEEE", "Proposer 0: BCDD for A", "Proposer 0: BCDD for AE", "Proposer 0: BCDD for AEE", "Proposer 0: BCDD for AEEE", "Proposer 0: BCDD for E", "Proposer 0: BCDD for EE", "Proposer 0: BCDD for EEE", "Proposer 0: ABCDD for E", "Proposer 0: ABCDD for EE", "Proposer 0: ABCDD for EEE", "Proposer 0: BD for ACCC", "Proposer 0: BD for ACCE", "Proposer 0: BD for ACEE", "Proposer 0: BD for AEEE", "Proposer 0: BD for CCCE", "Proposer 0: BD for CCEE", "Proposer 0: BD for CEEE", "Proposer 0: ABD for CCCEE", "Proposer 0: ABD for CCEEE", "Proposer 0: BDD for ACCC", "Proposer 0: BDD for ACCE", "Proposer 0: BDD for ACEE", "Proposer 0: BDD for AEEE", "Proposer 0: BDD for CCCE", "Proposer 0: BDD for CCEE", "Proposer 0: BDD for CEEE", "Proposer 0: ABDD for CCCEE", "Proposer 0: ABDD for CCEEE", "Proposer 0: C for AEEE", "Proposer 0: CC for AEEE", "Proposer 0: CCC for AEEE", "Proposer 0: CCCD for A", "Proposer 0: CCCD for AE", "Proposer 0: CCCD for AEE", "Proposer 0: CCCD for AEEE", "Proposer 0: CCCD for E", "Proposer 0: CCCD for EE", "Proposer 0: CCCD for EEE", "Proposer 0: ACCCD for E", "Proposer 0: ACCCD for EE", "Proposer 0: ACCCD for EEE", "Proposer 0: CCD for AEEE", "Proposer 0: CCDD for A", "Proposer 0: CCDD for AE", "Proposer 0: CCDD for AEE", "Proposer 0: CCDD for AEEE", "Proposer 0: CCDD for E", "Proposer 0: CCDD for EE", "Proposer 0: CCDD for EEE", "Proposer 0: ACCDD for E", "Proposer 0: ACCDD for EE", "Proposer 0: ACCDD for EEE", "Proposer 0: CD for AEEE", "Proposer 0: CDD for AEEE", "Proposer 0: D for ACCC", "Proposer 0: D for ACCE", "Proposer 0: D for ACEE", "Proposer 0: D for AEEE", "Proposer 0: D for CCCE", "Proposer 0: D for CCEE", "Proposer 0: D for CEEE", "Proposer 0: AD for CCCEE", "Proposer 0: AD for CCEEE", "Proposer 0: DD for ACCC", "Proposer 0: DD for ACCE", "Proposer 0: DD for ACEE", "Proposer 0: DD for AEEE", "Proposer 0: DD for CCCE", "Proposer 0: DD for CCEE", "Proposer 0: DD for CEEE", "Proposer 0: ADD for CCCEE", "Proposer 0: ADD for CCEEE", "Proposer 0: B for ACCCE", "Proposer 0: B for ACCEE", "Proposer 0: B for ACEEE", "Proposer 0: B for CCCEE", "Proposer 0: B for CCEEE", "Proposer 0: AB for CCCEEE", "Proposer 0: BCCCD for A", "Proposer 0: BCCCD for AE", "Proposer 0: BCCCD for AEE", "Proposer 0: BCCCD for AEEE", "Proposer 0: BCCCD for E", "Proposer 0: BCCCD for EE", "Proposer 0: BCCCD for EEE", "Proposer 0: ABCCCD for E", "Proposer 0: ABCCCD for EE", "Proposer 0: ABCCCD for EEE", "Proposer 0: BCCDD for A", "Proposer 0: BCCDD for AE", "Proposer 0: BCCDD for AEE", "Proposer 0: BCCDD for AEEE", "Proposer 0: BCCDD for E", "Proposer 0: BCCDD for EE", "Proposer 0: BCCDD for EEE", "Proposer 0: ABCCDD for E", "Proposer 0: ABCCDD for EE", "Proposer 0: ABCCDD for EEE", "Proposer 0: BD for ACCCE", "Proposer 0: BD for ACCEE", "Proposer 0: BD for ACEEE", "Proposer 0: BD for CCCEE", "Proposer 0: BD for CCEEE", "Proposer 0: ABD for CCCEEE", "Proposer 0: BDD for ACCCE", "Proposer 0: BDD for ACCEE", "Proposer 0: BDD for ACEEE", "Proposer 0: BDD for CCCEE", "Proposer 0: BDD for CCEEE", "Proposer 0: ABDD for CCCEEE", "Proposer 0: CCCDD for A", "Proposer 0: CCCDD for AE", "Proposer 0: CCCDD for AEE", "Proposer 0: CCCDD for AEEE", "Proposer 0: CCCDD for E", "Proposer 0: CCCDD for EE", "Proposer 0: CCCDD for EEE", "Proposer 0: ACCCDD for E", "Proposer 0: ACCCDD for EE", "Proposer 0: ACCCDD for EEE", "Proposer 0: D for ACCCE", "Proposer 0: D for ACCEE", "Proposer 0: D for ACEEE", "Proposer 0: D for CCCEE", "Proposer 0: D for CCEEE", "Proposer 0: AD for CCCEEE", "Proposer 0: DD for ACCCE", "Proposer 0: DD for ACCEE", "Proposer 0: DD for ACEEE", "Proposer 0: DD for CCCEE", "Proposer 0: DD for CCEEE", "Proposer 0: ADD for CCCEEE", "Proposer 0: B for ACCCEE", "Proposer 0: B for ACCEEE", "Proposer 0: B for CCCEEE", "Proposer 0: BCCCDD for A", "Proposer 0: BCCCDD for AE", "Proposer 0: BCCCDD for AEE", "Proposer 0: BCCCDD for AEEE", "Proposer 0: BCCCDD for E", "Proposer 0: BCCCDD for EE", "Proposer 0: BCCCDD for EEE", "Proposer 0: ABCCCDD for E", "Proposer 0: ABCCCDD for EE", "Proposer 0: ABCCCDD for EEE", "Proposer 0: BD for ACCCEE", "Proposer 0: BD for ACCEEE", "Proposer 0: BD for CCCEEE", "Proposer 0: BDD for ACCCEE", "Proposer 0: BDD for ACCEEE", "Proposer 0: BDD for CCCEEE", "Proposer 0: D for ACCCEE", "Proposer 0: D for ACCEEE", "Proposer 0: D for CCCEEE", "Proposer 0: DD for ACCCEE", "Proposer 0: DD for ACCEEE", "Proposer 0: DD for CCCEEE", "Proposer 0: B for ACCCEEE", "Proposer 0: BD for ACCCEEE", "Proposer 0: BDD for ACCCEEE", "Proposer 0: D for ACCCEEE", "Proposer 0: DD for ACCCEEE", "Proposer 0: Pass trade."] + +# Apply action "Proposer 0: ABDD for C" +action: 11102 + +# State 2 +# Move Number: 2 +# BACB +# BEAA +# DBDC +# ECAE +# +# P0 chips: ABCCCDD +# P1 chips: BCDDEE +# P2 chips: ACCCEEE +# Pos: 0 7 5 13 +# Proposal 0: ABDD for C +IsTerminal() = False +History() = [5, 11102] +HistoryString() = "5, 11102" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 0\nPos: 0 7 5 13\nMy chips: ABCCCDD\nResponder chips: ACCCEEE\n" +InformationStateString(1) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 1\nPos: 0 7 5 13\nMy chips: BCDDEE\nResponder chips: ACCCEEE\n" +InformationStateString(2) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 2\nPos: 0 7 5 13\nP0 chips: ABCCCDD\nP1 chips: BCDDEE\n" +InformationStateTensor(0): binvec(463, 0x422044203080902204900c00000800200000260301e0e040201008040201808078201e0000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(463, 0x222044203080902204900c0000080020000024030180e070201008040201808078201e0000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(463, 0x122044203080902204900c00000800200000260301e0e04020180c070381808078201e0000000000000000000000000000000000000000000000) +ObservationString(0) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 0\nPos: 0 7 5 13\nMy chips: ABCCCDD\nResponder chips: ACCCEEE\n" +ObservationString(1) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 1\nPos: 0 7 5 13\nMy chips: BCDDEE\nResponder chips: ACCCEEE\n" +ObservationString(2) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 2\nPos: 0 7 5 13\nP0 chips: ABCCCDD\nP1 chips: BCDDEE\n" +ObservationTensor(0): binvec(463, 0x422044203080902204900c00000800200000260301e0e040201008040201808078201e0000000000000000000000000000000000000000000000) +ObservationTensor(1): binvec(463, 0x222044203080902204900c0000080020000024030180e070201008040201808078201e0000000000000000000000000000000000000000000000) +ObservationTensor(2): binvec(463, 0x122044203080902204900c00000800200000260301e0e04020180c070381808078201e0000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [3952, 3953, 3955, 4439, 4442, 4926, 4928, 4929, 5413, 5415, 5901, 5903, 5904, 5906, 5909, 6553, 6556, 6560, 6561, 6711, 6713, 6714, 6715, 6716, 6717, 6718, 6719, 6869, 6871, 6873, 6874, 7030, 7036, 7680, 7683, 7687, 7688, 7838, 7998, 7999, 8003, 8004, 8005, 8161, 8164, 8165, 8170, 8171, 8172, 8173, 8174, 8649, 8652, 8656, 8657, 8809, 8814, 8972, 8975, 8981, 8982, 9464, 9466, 9469, 9470, 9472, 9475, 9479, 10761, 10765, 10960, 10964, 10965, 10966, 10967, 10968, 10999, 11041, 11042, 11043, 11044, 11045, 11046, 11047, 11083, 11088, 11089, 11090, 11091, 11092, 11093, 11094, 11095, 11096, 11097, 11098, 11099, 11100, 11101, 11242, 11246, 11247, 11248, 11249, 11250, 11284, 11287, 11326, 11331, 11332, 11336, 11337, 11338, 11494, 11504, 12627, 12631, 12667, 12675, 12676, 12683, 12684, 12685, 12826, 12910, 13076, 13077, 13078, 13085, 13086, 13087, 13088, 13241, 13242, 13243, 13250, 13251, 13252, 13253, 13888, 13896, 13897, 13904, 13905, 13906, 14052, 14056, 14092, 14100, 14101, 14108, 14109, 14110, 14258, 14267, 14423, 14432, 15080, 15082, 15085, 15089, 15092, 15095, 15099, 17279, 17603, 17609, 17616, 17617, 17618, 17619, 17620, 17621, 17649, 17662, 17708, 17709, 17710, 17711, 17713, 17714, 17715, 17753, 17754, 17755, 17756, 17758, 17759, 17760, 17952, 17959, 17960, 17961, 17962, 17963, 17964, 17995, 18001, 18008, 18009, 18010, 18011, 18012, 18013, 18047, 18092, 18310, 20055, 20100, 20296, 20345, 20651, 20652, 20653, 20654, 20666, 20667, 20668, 20816, 20817, 20818, 20819, 20831, 20832, 20833, 21789, 21825, 21842, 21843, 21844, 21855, 21856, 21857, 21994, 22039, 22251, 22416, 23242, 23245, 23249, 23260, 23264, 27168, 27174, 27249, 27250, 27251, 27255, 27256, 27294, 27295, 27296, 27300, 27301, 27584, 27595, 27596, 27597, 27599, 27600, 27601, 30845, 31272, 31273, 31274, 31293, 31294, 31437, 31438, 31439, 31458, 31459, 34497, 34501, 34522, 40160, 40260, 40261, 40267, 40305, 40306, 40312, 45548, 45549, 45576, 45713, 45714, 45741, 49455, 57405, 57450, 64156, 64321, 93122] +StringLegalActions() = ["Proposer 1: B for A", "Proposer 1: B for C", "Proposer 1: B for E", "Proposer 1: C for A", "Proposer 1: C for E", "Proposer 1: D for A", "Proposer 1: D for C", "Proposer 1: D for E", "Proposer 1: E for A", "Proposer 1: E for C", "Proposer 1: B for AC", "Proposer 1: B for AE", "Proposer 1: B for CC", "Proposer 1: B for CE", "Proposer 1: B for EE", "Proposer 1: BC for A", "Proposer 1: BC for AE", "Proposer 1: BC for E", "Proposer 1: BC for EE", "Proposer 1: BD for A", "Proposer 1: BD for AC", "Proposer 1: BD for AE", "Proposer 1: BD for C", "Proposer 1: BD for CC", "Proposer 1: BD for CE", "Proposer 1: BD for E", "Proposer 1: BD for EE", "Proposer 1: BE for A", "Proposer 1: BE for AC", "Proposer 1: BE for C", "Proposer 1: BE for CC", "Proposer 1: C for AE", "Proposer 1: C for EE", "Proposer 1: CD for A", "Proposer 1: CD for AE", "Proposer 1: CD for E", "Proposer 1: CD for EE", "Proposer 1: CE for A", "Proposer 1: D for AC", "Proposer 1: D for AE", "Proposer 1: D for CC", "Proposer 1: D for CE", "Proposer 1: D for EE", "Proposer 1: DD for A", "Proposer 1: DD for AC", "Proposer 1: DD for AE", "Proposer 1: DD for C", "Proposer 1: DD for CC", "Proposer 1: DD for CE", "Proposer 1: DD for E", "Proposer 1: DD for EE", "Proposer 1: DE for A", "Proposer 1: DE for AC", "Proposer 1: DE for C", "Proposer 1: DE for CC", "Proposer 1: E for AC", "Proposer 1: E for CC", "Proposer 1: EE for A", "Proposer 1: EE for AC", "Proposer 1: EE for C", "Proposer 1: EE for CC", "Proposer 1: B for ACC", "Proposer 1: B for ACE", "Proposer 1: B for AEE", "Proposer 1: B for CCC", "Proposer 1: B for CCE", "Proposer 1: B for CEE", "Proposer 1: B for EEE", "Proposer 1: BC for AEE", "Proposer 1: BC for EEE", "Proposer 1: BCD for A", "Proposer 1: BCD for AE", "Proposer 1: BCD for AEE", "Proposer 1: BCD for E", "Proposer 1: BCD for EE", "Proposer 1: BCD for EEE", "Proposer 1: BCE for A", "Proposer 1: BD for ACC", "Proposer 1: BD for ACE", "Proposer 1: BD for AEE", "Proposer 1: BD for CCC", "Proposer 1: BD for CCE", "Proposer 1: BD for CEE", "Proposer 1: BD for EEE", "Proposer 1: BDD for A", "Proposer 1: BDD for AC", "Proposer 1: BDD for ACC", "Proposer 1: BDD for ACE", "Proposer 1: BDD for AE", "Proposer 1: BDD for AEE", "Proposer 1: BDD for C", "Proposer 1: BDD for CC", "Proposer 1: BDD for CCC", "Proposer 1: BDD for CCE", "Proposer 1: BDD for CE", "Proposer 1: BDD for CEE", "Proposer 1: BDD for E", "Proposer 1: BDD for EE", "Proposer 1: BDD for EEE", "Proposer 1: BDE for A", "Proposer 1: BDE for AC", "Proposer 1: BDE for ACC", "Proposer 1: BDE for C", "Proposer 1: BDE for CC", "Proposer 1: BDE for CCC", "Proposer 1: BE for ACC", "Proposer 1: BE for CCC", "Proposer 1: BEE for A", "Proposer 1: BEE for AC", "Proposer 1: BEE for ACC", "Proposer 1: BEE for C", "Proposer 1: BEE for CC", "Proposer 1: BEE for CCC", "Proposer 1: C for AEE", "Proposer 1: C for EEE", "Proposer 1: CD for AEE", "Proposer 1: CD for EEE", "Proposer 1: CDD for A", "Proposer 1: CDD for AE", "Proposer 1: CDD for AEE", "Proposer 1: CDD for E", "Proposer 1: CDD for EE", "Proposer 1: CDD for EEE", "Proposer 1: CDE for A", "Proposer 1: CEE for A", "Proposer 1: D for ACC", "Proposer 1: D for ACE", "Proposer 1: D for AEE", "Proposer 1: D for CCC", "Proposer 1: D for CCE", "Proposer 1: D for CEE", "Proposer 1: D for EEE", "Proposer 1: DD for ACC", "Proposer 1: DD for ACE", "Proposer 1: DD for AEE", "Proposer 1: DD for CCC", "Proposer 1: DD for CCE", "Proposer 1: DD for CEE", "Proposer 1: DD for EEE", "Proposer 1: DDE for A", "Proposer 1: DDE for AC", "Proposer 1: DDE for ACC", "Proposer 1: DDE for C", "Proposer 1: DDE for CC", "Proposer 1: DDE for CCC", "Proposer 1: DE for ACC", "Proposer 1: DE for CCC", "Proposer 1: DEE for A", "Proposer 1: DEE for AC", "Proposer 1: DEE for ACC", "Proposer 1: DEE for C", "Proposer 1: DEE for CC", "Proposer 1: DEE for CCC", "Proposer 1: E for ACC", "Proposer 1: E for CCC", "Proposer 1: EE for ACC", "Proposer 1: EE for CCC", "Proposer 1: B for ACCC", "Proposer 1: B for ACCE", "Proposer 1: B for ACEE", "Proposer 1: B for AEEE", "Proposer 1: B for CCCE", "Proposer 1: B for CCEE", "Proposer 1: B for CEEE", "Proposer 1: BC for AEEE", "Proposer 1: BCD for AEEE", "Proposer 1: BCDD for A", "Proposer 1: BCDD for AE", "Proposer 1: BCDD for AEE", "Proposer 1: BCDD for AEEE", "Proposer 1: BCDD for E", "Proposer 1: BCDD for EE", "Proposer 1: BCDD for EEE", "Proposer 1: BCDE for A", "Proposer 1: BCEE for A", "Proposer 1: BD for ACCC", "Proposer 1: BD for ACCE", "Proposer 1: BD for ACEE", "Proposer 1: BD for AEEE", "Proposer 1: BD for CCCE", "Proposer 1: BD for CCEE", "Proposer 1: BD for CEEE", "Proposer 1: BDD for ACCC", "Proposer 1: BDD for ACCE", "Proposer 1: BDD for ACEE", "Proposer 1: BDD for AEEE", "Proposer 1: BDD for CCCE", "Proposer 1: BDD for CCEE", "Proposer 1: BDD for CEEE", "Proposer 1: BDDE for A", "Proposer 1: BDDE for AC", "Proposer 1: BDDE for ACC", "Proposer 1: BDDE for ACCC", "Proposer 1: BDDE for C", "Proposer 1: BDDE for CC", "Proposer 1: BDDE for CCC", "Proposer 1: BDE for ACCC", "Proposer 1: BDEE for A", "Proposer 1: BDEE for AC", "Proposer 1: BDEE for ACC", "Proposer 1: BDEE for ACCC", "Proposer 1: BDEE for C", "Proposer 1: BDEE for CC", "Proposer 1: BDEE for CCC", "Proposer 1: BE for ACCC", "Proposer 1: BEE for ACCC", "Proposer 1: C for AEEE", "Proposer 1: CD for AEEE", "Proposer 1: CDD for AEEE", "Proposer 1: CDDE for A", "Proposer 1: CDEE for A", "Proposer 1: D for ACCC", "Proposer 1: D for ACCE", "Proposer 1: D for ACEE", "Proposer 1: D for AEEE", "Proposer 1: D for CCCE", "Proposer 1: D for CCEE", "Proposer 1: D for CEEE", "Proposer 1: DD for ACCC", "Proposer 1: DD for ACCE", "Proposer 1: DD for ACEE", "Proposer 1: DD for AEEE", "Proposer 1: DD for CCCE", "Proposer 1: DD for CCEE", "Proposer 1: DD for CEEE", "Proposer 1: DDE for ACCC", "Proposer 1: DDEE for A", "Proposer 1: DDEE for AC", "Proposer 1: DDEE for ACC", "Proposer 1: DDEE for ACCC", "Proposer 1: DDEE for C", "Proposer 1: DDEE for CC", "Proposer 1: DDEE for CCC", "Proposer 1: DE for ACCC", "Proposer 1: DEE for ACCC", "Proposer 1: E for ACCC", "Proposer 1: EE for ACCC", "Proposer 1: B for ACCCE", "Proposer 1: B for ACCEE", "Proposer 1: B for ACEEE", "Proposer 1: B for CCCEE", "Proposer 1: B for CCEEE", "Proposer 1: BCDDE for A", "Proposer 1: BCDEE for A", "Proposer 1: BD for ACCCE", "Proposer 1: BD for ACCEE", "Proposer 1: BD for ACEEE", "Proposer 1: BD for CCCEE", "Proposer 1: BD for CCEEE", "Proposer 1: BDD for ACCCE", "Proposer 1: BDD for ACCEE", "Proposer 1: BDD for ACEEE", "Proposer 1: BDD for CCCEE", "Proposer 1: BDD for CCEEE", "Proposer 1: BDDEE for A", "Proposer 1: BDDEE for AC", "Proposer 1: BDDEE for ACC", "Proposer 1: BDDEE for ACCC", "Proposer 1: BDDEE for C", "Proposer 1: BDDEE for CC", "Proposer 1: BDDEE for CCC", "Proposer 1: CDDEE for A", "Proposer 1: D for ACCCE", "Proposer 1: D for ACCEE", "Proposer 1: D for ACEEE", "Proposer 1: D for CCCEE", "Proposer 1: D for CCEEE", "Proposer 1: DD for ACCCE", "Proposer 1: DD for ACCEE", "Proposer 1: DD for ACEEE", "Proposer 1: DD for CCCEE", "Proposer 1: DD for CCEEE", "Proposer 1: B for ACCCEE", "Proposer 1: B for ACCEEE", "Proposer 1: B for CCCEEE", "Proposer 1: BCDDEE for A", "Proposer 1: BD for ACCCEE", "Proposer 1: BD for ACCEEE", "Proposer 1: BD for CCCEEE", "Proposer 1: BDD for ACCCEE", "Proposer 1: BDD for ACCEEE", "Proposer 1: BDD for CCCEEE", "Proposer 1: D for ACCCEE", "Proposer 1: D for ACCEEE", "Proposer 1: D for CCCEEE", "Proposer 1: DD for ACCCEE", "Proposer 1: DD for ACCEEE", "Proposer 1: DD for CCCEEE", "Proposer 1: B for ACCCEEE", "Proposer 1: BD for ACCCEEE", "Proposer 1: BDD for ACCCEEE", "Proposer 1: D for ACCCEEE", "Proposer 1: DD for ACCCEEE", "Proposer 1: Pass trade."] + +# Apply action "Proposer 1: BDD for ACCCE" +action: 27294 + +# State 3 +# Move Number: 3 +# BACB +# BEAA +# DBDC +# ECAE +# +# P0 chips: ABCCCDD +# P1 chips: BCDDEE +# P2 chips: ACCCEEE +# Pos: 0 7 5 13 +# Proposal 0: ABDD for C +# Proposal 1: BDD for ACCCE +IsTerminal() = False +History() = [5, 11102, 27294] +HistoryString() = "5, 11102, 27294" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 0\nPos: 0 7 5 13\nMy chips: ABCCCDD\nResponder chips: ACCCEEE\n" +InformationStateString(1) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 1\nPos: 0 7 5 13\nMy chips: BCDDEE\nResponder chips: ACCCEEE\n" +InformationStateString(2) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 2\nPos: 0 7 5 13\nP0 chips: ABCCCDD\nP1 chips: BCDDEE\nProposal 0: ABDD for C\nProposal 1: BDD for ACCCE\n" +InformationStateTensor(0): binvec(463, 0x422044203080902204900c00000800200000260301e0e040201008040201808078201e0000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(463, 0x222044203080902204900c0000080020000024030180e070201008040201808078201e0000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(463, 0x122044203080902204900c00000800200000260301e0e04020180c070381808078201e0c060201c0804020180804020180807020180807820180) +ObservationString(0) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 0\nPos: 0 7 5 13\nMy chips: ABCCCDD\nResponder chips: ACCCEEE\n" +ObservationString(1) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 1\nPos: 0 7 5 13\nMy chips: BCDDEE\nResponder chips: ACCCEEE\n" +ObservationString(2) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 2\nPos: 0 7 5 13\nP0 chips: ABCCCDD\nP1 chips: BCDDEE\nProposal 0: ABDD for C\nProposal 1: BDD for ACCCE\n" +ObservationTensor(0): binvec(463, 0x422044203080902204900c00000800200000260301e0e040201008040201808078201e0000000000000000000000000000000000000000000000) +ObservationTensor(1): binvec(463, 0x222044203080902204900c0000080020000024030180e070201008040201808078201e0000000000000000000000000000000000000000000000) +ObservationTensor(2): binvec(463, 0x122044203080902204900c00000800200000260301e0e04020180c070381808078201e0c060201c0804020180804020180807020180807820180) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [93120, 93121, 93122] +StringLegalActions() = ["Deal: trade with proposer 0", "Deal: trade with proposer 1", "No Deal!"] + +# Apply action "Deal: trade with proposer 1" +action: 93121 + +# State 4 +# Move Number: 4 +# BACB +# BEAA +# DBDC +# ECAE +# +# P0 chips: ABCCCDD +# P1 chips: ACCCCEEE +# P2 chips: BDDEE +# Pos: 0 7 5 13 +# Proposal 0: ABDD for C +# Proposal 1: BDD for ACCCE +IsTerminal() = True +History() = [5, 11102, 27294, 93121] +HistoryString() = "5, 11102, 27294, 93121" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 0\nPos: 0 7 5 13\nMy chips: ABCCCDD\nResponder chips: BDDEE\n" +InformationStateString(1) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 1\nPos: 0 7 5 13\nMy chips: ACCCCEEE\nResponder chips: BDDEE\n" +InformationStateString(2) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 2\nPos: 0 7 5 13\nP0 chips: ABCCCDD\nP1 chips: ACCCCEEE\n" +InformationStateTensor(0): binvec(463, 0x4a2044203080902204900c00000800200000260301e0e04020100804020100c040381c0000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(463, 0x2a2044203080902204900c00000800200000260201f0807820100804020100c040381c0000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(463, 0x1a2044203080902204900c00000800200000260301e0e04030100f8403c100c040381c0000000000000000000000000000000000000000000000) +ObservationString(0) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 0\nPos: 0 7 5 13\nMy chips: ABCCCDD\nResponder chips: BDDEE\n" +ObservationString(1) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 1\nPos: 0 7 5 13\nMy chips: ACCCCEEE\nResponder chips: BDDEE\n" +ObservationString(2) = "BACB\nBEAA\nDBDC\nECAE\n\nPlayer: 2\nPos: 0 7 5 13\nP0 chips: ABCCCDD\nP1 chips: ACCCCEEE\n" +ObservationTensor(0): binvec(463, 0x4a2044203080902204900c00000800200000260301e0e04020100804020100c040381c0000000000000000000000000000000000000000000000) +ObservationTensor(1): binvec(463, 0x2a2044203080902204900c00000800200000260201f0807820100804020100c040381c0000000000000000000000000000000000000000000000) +ObservationTensor(2): binvec(463, 0x1a2044203080902204900c00000800200000260301e0e04030100f8403c100c040381c0000000000000000000000000000000000000000000000) +Rewards() = [0, 35, -5] +Returns() = [0, 35, -5] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/connect_four.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/connect_four.txt new file mode 100644 index 0000000..4ef7d7d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/connect_four.txt @@ -0,0 +1,343 @@ +game: connect_four + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Connect Four" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "connect_four" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 7 +PolicyTensorShape() = [7] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [3, 6, 7] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 126 +MaxGameLength() = 42 +ToString() = "connect_four()" + +# State 0 +# ....... +# ....... +# ....... +# ....... +# ....... +# ....... +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = ".......\n.......\n.......\n.......\n.......\n.......\n" +ObservationString(1) = ".......\n.......\n.......\n.......\n.......\n.......\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["x0", "x1", "x2", "x3", "x4", "x5", "x6"] + +# Apply action "x0" +action: 0 + +# State 1 +# ....... +# ....... +# ....... +# ....... +# ....... +# x...... +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "0" +InformationStateString(1) = "0" +ObservationString(0) = ".......\n.......\n.......\n.......\n.......\nx......\n" +ObservationString(1) = ".......\n.......\n.......\n.......\n.......\nx......\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +ObservationTensor(1): +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["o0", "o1", "o2", "o3", "o4", "o5", "o6"] + +# Apply action "o1" +action: 1 + +# State 2 +# ....... +# ....... +# ....... +# ....... +# ....... +# xo..... +IsTerminal() = False +History() = [0, 1] +HistoryString() = "0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "0, 1" +InformationStateString(1) = "0, 1" +ObservationString(0) = ".......\n.......\n.......\n.......\n.......\nxo.....\n" +ObservationString(1) = ".......\n.......\n.......\n.......\n.......\nxo.....\n" +ObservationTensor(0): +◯◉◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +ObservationTensor(1): +◉◯◯◯◯◯◯ ◯◉◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["x0", "x1", "x2", "x3", "x4", "x5", "x6"] + +# Apply action "x2" +action: 2 + +# State 3 +# ....... +# ....... +# ....... +# ....... +# ....... +# xox.... +IsTerminal() = False +History() = [0, 1, 2] +HistoryString() = "0, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "0, 1, 2" +InformationStateString(1) = "0, 1, 2" +ObservationString(0) = ".......\n.......\n.......\n.......\n.......\nxox....\n" +ObservationString(1) = ".......\n.......\n.......\n.......\n.......\nxox....\n" +ObservationTensor(0): +◯◉◯◯◯◯◯ ◉◯◉◯◯◯◯ ◯◯◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +ObservationTensor(1): +◉◯◉◯◯◯◯ ◯◉◯◯◯◯◯ ◯◯◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["o0", "o1", "o2", "o3", "o4", "o5", "o6"] + +# Apply action "o6" +action: 6 + +# State 4 +# ....... +# ....... +# ....... +# ....... +# ....... +# xox...o +IsTerminal() = False +History() = [0, 1, 2, 6] +HistoryString() = "0, 1, 2, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "0, 1, 2, 6" +InformationStateString(1) = "0, 1, 2, 6" +ObservationString(0) = ".......\n.......\n.......\n.......\n.......\nxox...o\n" +ObservationString(1) = ".......\n.......\n.......\n.......\n.......\nxox...o\n" +ObservationTensor(0): +◯◉◯◯◯◯◉ ◉◯◉◯◯◯◯ ◯◯◯◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +ObservationTensor(1): +◉◯◉◯◯◯◯ ◯◉◯◯◯◯◉ ◯◯◯◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["x0", "x1", "x2", "x3", "x4", "x5", "x6"] + +# Apply action "x4" +action: 4 + +# State 5 +# ....... +# ....... +# ....... +# ....... +# ....... +# xox.x.o +IsTerminal() = False +History() = [0, 1, 2, 6, 4] +HistoryString() = "0, 1, 2, 6, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "0, 1, 2, 6, 4" +InformationStateString(1) = "0, 1, 2, 6, 4" +ObservationString(0) = ".......\n.......\n.......\n.......\n.......\nxox.x.o\n" +ObservationString(1) = ".......\n.......\n.......\n.......\n.......\nxox.x.o\n" +ObservationTensor(0): +◯◉◯◯◯◯◉ ◉◯◉◯◉◯◯ ◯◯◯◉◯◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +ObservationTensor(1): +◉◯◉◯◉◯◯ ◯◉◯◯◯◯◉ ◯◯◯◉◯◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["o0", "o1", "o2", "o3", "o4", "o5", "o6"] + +# Apply action "o1" +action: 1 + +# State 6 +# Apply action "x0" +action: 0 + +# State 7 +# Apply action "o2" +action: 2 + +# State 8 +# Apply action "x6" +action: 6 + +# State 9 +# Apply action "o0" +action: 0 + +# State 10 +# Apply action "x5" +action: 5 + +# State 11 +# Apply action "o4" +action: 4 + +# State 12 +# Apply action "x0" +action: 0 + +# State 13 +# Apply action "o6" +action: 6 + +# State 14 +# Apply action "x5" +action: 5 + +# State 15 +# Apply action "o0" +action: 0 + +# State 16 +# Apply action "x3" +action: 3 + +# State 17 +# ....... +# o...... +# x...... +# o.....o +# xoo.oxx +# xoxxxxo +IsTerminal() = True +History() = [0, 1, 2, 6, 4, 1, 0, 2, 6, 0, 5, 4, 0, 6, 5, 0, 3] +HistoryString() = "0, 1, 2, 6, 4, 1, 0, 2, 6, 0, 5, 4, 0, 6, 5, 0, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "0, 1, 2, 6, 4, 1, 0, 2, 6, 0, 5, 4, 0, 6, 5, 0, 3" +InformationStateString(1) = "0, 1, 2, 6, 4, 1, 0, 2, 6, 0, 5, 4, 0, 6, 5, 0, 3" +ObservationString(0) = ".......\no......\nx......\no.....o\nxoo.oxx\nxoxxxxo\n" +ObservationString(1) = ".......\no......\nx......\no.....o\nxoo.oxx\nxoxxxxo\n" +ObservationTensor(0): +◯◉◯◯◯◯◉ ◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯ +◯◉◉◯◉◯◯ ◉◯◯◯◯◉◉ ◯◯◯◉◯◯◯ +◉◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯ +◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +ObservationTensor(1): +◉◯◉◉◉◉◯ ◯◉◯◯◯◯◉ ◯◯◯◯◯◯◯ +◉◯◯◯◯◉◉ ◯◉◉◯◉◯◯ ◯◯◯◉◯◯◯ +◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉ ◯◉◉◉◉◉◯ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/connect_four_start_at.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/connect_four_start_at.txt new file mode 100644 index 0000000..06a5057 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/connect_four_start_at.txt @@ -0,0 +1,327 @@ +game: start_at(history=4;3;3;2;0;4;4;4;4;0,game=connect_four()) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "StartAt history=4;3;3;2;0;4;4;4;4;0 game=Connect Four" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "start_at" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 7 +PolicyTensorShape() = [7] +MaxChanceOutcomes() = 0 +GetParameters() = {game=connect_four(),history=4;3;3;2;0;4;4;4;4;0} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [3, 6, 7] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 126 +MaxGameLength() = 42 +ToString() = "start_at(game=connect_four(),history=4;3;3;2;0;4;4;4;4;0)" + +# State 0 +# ....... +# ....x.. +# ....o.. +# ....x.. +# o..xo.. +# x.oox.. +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "4, 3, 3, 2, 0, 4, 4, 4, 4, 0" +InformationStateString(1) = "4, 3, 3, 2, 0, 4, 4, 4, 4, 0" +ObservationString(0) = ".......\n....x..\n....o..\n....x..\no..xo..\nx.oox..\n" +ObservationString(1) = ".......\n....x..\n....o..\n....x..\no..xo..\nx.oox..\n" +ObservationTensor(0): +◯◯◉◉◯◯◯ ◉◯◯◯◉◯◯ ◯◉◯◯◯◉◉ +◉◯◯◯◉◯◯ ◯◯◯◉◯◯◯ ◯◉◉◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +ObservationTensor(1): +◉◯◯◯◉◯◯ ◯◯◉◉◯◯◯ ◯◉◯◯◯◉◉ +◯◯◯◉◯◯◯ ◉◯◯◯◉◯◯ ◯◉◉◯◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["x0", "x1", "x2", "x3", "x4", "x5", "x6"] + +# Apply action "x2" +action: 2 + +# State 1 +# ....... +# ....x.. +# ....o.. +# ....x.. +# o.xxo.. +# x.oox.. +IsTerminal() = False +History() = [2] +HistoryString() = "2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "4, 3, 3, 2, 0, 4, 4, 4, 4, 0, 2" +InformationStateString(1) = "4, 3, 3, 2, 0, 4, 4, 4, 4, 0, 2" +ObservationString(0) = ".......\n....x..\n....o..\n....x..\no.xxo..\nx.oox..\n" +ObservationString(1) = ".......\n....x..\n....o..\n....x..\no.xxo..\nx.oox..\n" +ObservationTensor(0): +◯◯◉◉◯◯◯ ◉◯◯◯◉◯◯ ◯◉◯◯◯◉◉ +◉◯◯◯◉◯◯ ◯◯◉◉◯◯◯ ◯◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +ObservationTensor(1): +◉◯◯◯◉◯◯ ◯◯◉◉◯◯◯ ◯◉◯◯◯◉◉ +◯◯◉◉◯◯◯ ◉◯◯◯◉◯◯ ◯◉◯◯◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["o0", "o1", "o2", "o3", "o4", "o5", "o6"] + +# Apply action "o0" +action: 0 + +# State 2 +# ....... +# ....x.. +# ....o.. +# o...x.. +# o.xxo.. +# x.oox.. +IsTerminal() = False +History() = [2, 0] +HistoryString() = "2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "4, 3, 3, 2, 0, 4, 4, 4, 4, 0, 2, 0" +InformationStateString(1) = "4, 3, 3, 2, 0, 4, 4, 4, 4, 0, 2, 0" +ObservationString(0) = ".......\n....x..\n....o..\no...x..\no.xxo..\nx.oox..\n" +ObservationString(1) = ".......\n....x..\n....o..\no...x..\no.xxo..\nx.oox..\n" +ObservationTensor(0): +◯◯◉◉◯◯◯ ◉◯◯◯◉◯◯ ◯◉◯◯◯◉◉ +◉◯◯◯◉◯◯ ◯◯◉◉◯◯◯ ◯◉◯◯◯◉◉ +◉◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◯◉◉◉◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +ObservationTensor(1): +◉◯◯◯◉◯◯ ◯◯◉◉◯◯◯ ◯◉◯◯◯◉◉ +◯◯◉◉◯◯◯ ◉◯◯◯◉◯◯ ◯◉◯◯◯◉◉ +◯◯◯◯◉◯◯ ◉◯◯◯◯◯◯ ◯◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["x0", "x1", "x2", "x3", "x4", "x5", "x6"] + +# Apply action "x4" +action: 4 + +# State 3 +# ....x.. +# ....x.. +# ....o.. +# o...x.. +# o.xxo.. +# x.oox.. +IsTerminal() = False +History() = [2, 0, 4] +HistoryString() = "2, 0, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "4, 3, 3, 2, 0, 4, 4, 4, 4, 0, 2, 0, 4" +InformationStateString(1) = "4, 3, 3, 2, 0, 4, 4, 4, 4, 0, 2, 0, 4" +ObservationString(0) = "....x..\n....x..\n....o..\no...x..\no.xxo..\nx.oox..\n" +ObservationString(1) = "....x..\n....x..\n....o..\no...x..\no.xxo..\nx.oox..\n" +ObservationTensor(0): +◯◯◉◉◯◯◯ ◉◯◯◯◉◯◯ ◯◉◯◯◯◉◉ +◉◯◯◯◉◯◯ ◯◯◉◉◯◯◯ ◯◉◯◯◯◉◉ +◉◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◯◉◉◉◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +ObservationTensor(1): +◉◯◯◯◉◯◯ ◯◯◉◉◯◯◯ ◯◉◯◯◯◉◉ +◯◯◉◉◯◯◯ ◉◯◯◯◉◯◯ ◯◉◯◯◯◉◉ +◯◯◯◯◉◯◯ ◉◯◯◯◯◯◯ ◯◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 5, 6] +StringLegalActions() = ["o0", "o1", "o2", "o3", "o5", "o6"] + +# Apply action "o1" +action: 1 + +# State 4 +# ....x.. +# ....x.. +# ....o.. +# o...x.. +# o.xxo.. +# xooox.. +IsTerminal() = False +History() = [2, 0, 4, 1] +HistoryString() = "2, 0, 4, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "4, 3, 3, 2, 0, 4, 4, 4, 4, 0, 2, 0, 4, 1" +InformationStateString(1) = "4, 3, 3, 2, 0, 4, 4, 4, 4, 0, 2, 0, 4, 1" +ObservationString(0) = "....x..\n....x..\n....o..\no...x..\no.xxo..\nxooox..\n" +ObservationString(1) = "....x..\n....x..\n....o..\no...x..\no.xxo..\nxooox..\n" +ObservationTensor(0): +◯◉◉◉◯◯◯ ◉◯◯◯◉◯◯ ◯◯◯◯◯◉◉ +◉◯◯◯◉◯◯ ◯◯◉◉◯◯◯ ◯◉◯◯◯◉◉ +◉◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◯◉◉◉◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +ObservationTensor(1): +◉◯◯◯◉◯◯ ◯◉◉◉◯◯◯ ◯◯◯◯◯◉◉ +◯◯◉◉◯◯◯ ◉◯◯◯◉◯◯ ◯◉◯◯◯◉◉ +◯◯◯◯◉◯◯ ◉◯◯◯◯◯◯ ◯◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 5, 6] +StringLegalActions() = ["x0", "x1", "x2", "x3", "x5", "x6"] + +# Apply action "x2" +action: 2 + +# State 5 +# ....x.. +# ....x.. +# ....o.. +# o.x.x.. +# o.xxo.. +# xooox.. +IsTerminal() = False +History() = [2, 0, 4, 1, 2] +HistoryString() = "2, 0, 4, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "4, 3, 3, 2, 0, 4, 4, 4, 4, 0, 2, 0, 4, 1, 2" +InformationStateString(1) = "4, 3, 3, 2, 0, 4, 4, 4, 4, 0, 2, 0, 4, 1, 2" +ObservationString(0) = "....x..\n....x..\n....o..\no.x.x..\no.xxo..\nxooox..\n" +ObservationString(1) = "....x..\n....x..\n....o..\no.x.x..\no.xxo..\nxooox..\n" +ObservationTensor(0): +◯◉◉◉◯◯◯ ◉◯◯◯◉◯◯ ◯◯◯◯◯◉◉ +◉◯◯◯◉◯◯ ◯◯◉◉◯◯◯ ◯◉◯◯◯◉◉ +◉◯◯◯◯◯◯ ◯◯◉◯◉◯◯ ◯◉◯◉◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +ObservationTensor(1): +◉◯◯◯◉◯◯ ◯◉◉◉◯◯◯ ◯◯◯◯◯◉◉ +◯◯◉◉◯◯◯ ◉◯◯◯◉◯◯ ◯◉◯◯◯◉◉ +◯◯◉◯◉◯◯ ◉◯◯◯◯◯◯ ◯◉◯◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 5, 6] +StringLegalActions() = ["o0", "o1", "o2", "o3", "o5", "o6"] + +# Apply action "o6" +action: 6 + +# State 6 +# Apply action "x3" +action: 3 + +# State 7 +# Apply action "o6" +action: 6 + +# State 8 +# Apply action "x2" +action: 2 + +# State 9 +# Apply action "o3" +action: 3 + +# State 10 +# Apply action "x6" +action: 6 + +# State 11 +# Apply action "o1" +action: 1 + +# State 12 +# Apply action "x2" +action: 2 + +# State 13 +# ....x.. +# ..x.x.. +# ..xoo.. +# o.xxx.x +# ooxxo.o +# xooox.o +IsTerminal() = True +History() = [2, 0, 4, 1, 2, 6, 3, 6, 2, 3, 6, 1, 2] +HistoryString() = "2, 0, 4, 1, 2, 6, 3, 6, 2, 3, 6, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "4, 3, 3, 2, 0, 4, 4, 4, 4, 0, 2, 0, 4, 1, 2, 6, 3, 6, 2, 3, 6, 1, 2" +InformationStateString(1) = "4, 3, 3, 2, 0, 4, 4, 4, 4, 0, 2, 0, 4, 1, 2, 6, 3, 6, 2, 3, 6, 1, 2" +ObservationString(0) = "....x..\n..x.x..\n..xoo..\no.xxx.x\nooxxo.o\nxooox.o\n" +ObservationString(1) = "....x..\n..x.x..\n..xoo..\no.xxx.x\nooxxo.o\nxooox.o\n" +ObservationTensor(0): +◯◉◉◉◯◯◉ ◉◯◯◯◉◯◯ ◯◯◯◯◯◉◯ +◉◉◯◯◉◯◉ ◯◯◉◉◯◯◯ ◯◯◯◯◯◉◯ +◉◯◯◯◯◯◯ ◯◯◉◉◉◯◉ ◯◉◯◯◯◉◯ +◯◯◯◉◉◯◯ ◯◯◉◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◉◯◉◯◯ ◉◉◯◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +ObservationTensor(1): +◉◯◯◯◉◯◯ ◯◉◉◉◯◯◉ ◯◯◯◯◯◉◯ +◯◯◉◉◯◯◯ ◉◉◯◯◉◯◉ ◯◯◯◯◯◉◯ +◯◯◉◉◉◯◉ ◉◯◯◯◯◯◯ ◯◉◯◯◯◉◯ +◯◯◉◯◯◯◯ ◯◯◯◉◉◯◯ ◉◉◯◯◯◉◉ +◯◯◉◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/coop_box_pushing.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/coop_box_pushing.txt new file mode 100644 index 0000000..de221fd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/coop_box_pushing.txt @@ -0,0 +1,2042 @@ +game: coop_box_pushing + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Cooperative Box Pushing" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["fully_observable", "horizon"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "coop_box_pushing" +GameType.utility = Utility.IDENTICAL + +NumDistinctActions() = 4 +PolicyTensorShape() = [4] +MaxChanceOutcomes() = 4 +GetParameters() = {fully_observable=False,horizon=100} +NumPlayers() = 2 +MinUtility() = -1020.0 +MaxUtility() = 1.998e+04 +UtilitySum() = None +ObservationTensorShape() = [5] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 5 +MaxGameLength() = 100 +ToString() = "coop_box_pushing()" + +# State 0 +# Total moves: 0 +# Most recent reward: 0 +# Total rewards: 0 +# ........ +# ........ +# ........ +# .b.BB.b. +# ........ +# ........ +# .>....<. +# ........ +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "field" +ObservationString(1) = "field" +ObservationTensor(0): ◉◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3] +LegalActions(1) = [0, 1, 2, 3] +StringLegalActions(0) = ["turn left", "turn right", "move forward", "stay"] +StringLegalActions(1) = ["turn left", "turn right", "move forward", "stay"] + +# Apply joint action ["stay", "move forward"] +actions: [3, 2] + +# State 1 +# Total moves: 0 +# Most recent reward: 0 +# Total rewards: 0 +# ........ +# ........ +# ........ +# .b.BB.b. +# ........ +# ........ +# .>....<. +# ........ +IsTerminal() = False +History() = [3, 2] +HistoryString() = "3, 2" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "field" +ObservationString(1) = "field" +ObservationTensor(0): ◉◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯ +ChanceOutcomes() = [(0,0.9), (1,0.1)] +LegalActions() = [0, 1] +StringLegalActions() = ["turn left", "turn right"] + +# Apply action "turn right" +action: 1 + +# State 2 +# Total moves: 0 +# Most recent reward: 0 +# Total rewards: 0 +# ........ +# ........ +# ........ +# .b.BB.b. +# ........ +# ........ +# .>....<. +# ........ +IsTerminal() = False +History() = [3, 2, 1] +HistoryString() = "3, 2, 1" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "field" +ObservationString(1) = "field" +ObservationTensor(0): ◉◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯ +ChanceOutcomes() = [(0,0.9), (1,0.1)] +LegalActions() = [0, 1] +StringLegalActions() = ["turn left", "turn right"] + +# Apply action "turn left" +action: 0 + +# State 3 +# Apply action "move forward" +action: 2 + +# State 4 +# Total moves: 1 +# Most recent reward: -0.1 +# Total rewards: -0.1 +# ........ +# ........ +# ........ +# .b.BB.b. +# ........ +# ........ +# .>...<.. +# ........ +IsTerminal() = False +History() = [3, 2, 1, 0, 2] +HistoryString() = "3, 2, 1, 0, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "field" +ObservationString(1) = "field" +ObservationTensor(0): ◉◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯ +Rewards() = [-0.1, -0.1] +Returns() = [-0.1, -0.1] +LegalActions(0) = [0, 1, 2, 3] +LegalActions(1) = [0, 1, 2, 3] +StringLegalActions(0) = ["turn left", "turn right", "move forward", "stay"] +StringLegalActions(1) = ["turn left", "turn right", "move forward", "stay"] + +# Apply joint action ["turn left", "turn left"] +actions: [0, 0] + +# State 5 +# Apply action "turn right" +action: 1 + +# State 6 +# Apply action "turn left" +action: 0 + +# State 7 +# Apply action "stay" +action: 3 + +# State 8 +# Total moves: 2 +# Most recent reward: -0.1 +# Total rewards: -0.2 +# ........ +# ........ +# ........ +# .b.BB.b. +# ........ +# ........ +# .>...v.. +# ........ +IsTerminal() = False +History() = [3, 2, 1, 0, 2, 0, 0, 1, 0, 3] +HistoryString() = "3, 2, 1, 0, 2, 0, 0, 1, 0, 3" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "field" +ObservationString(1) = "field" +ObservationTensor(0): ◉◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯ +Rewards() = [-0.1, -0.1] +Returns() = [-0.2, -0.2] +LegalActions(0) = [0, 1, 2, 3] +LegalActions(1) = [0, 1, 2, 3] +StringLegalActions(0) = ["turn left", "turn right", "move forward", "stay"] +StringLegalActions(1) = ["turn left", "turn right", "move forward", "stay"] + +# Apply joint action ["turn left", "turn right"] +actions: [0, 1] + +# State 9 +# Apply action "turn right" +action: 1 + +# State 10 +# Apply action "turn right" +action: 1 + +# State 11 +# Apply action "move forward" +action: 2 + +# State 12 +# Apply joint action ["turn left", "move forward"] +actions: [0, 2] + +# State 13 +# Apply action "turn right" +action: 1 + +# State 14 +# Apply action "turn right" +action: 1 + +# State 15 +# Apply action "stay" +action: 3 + +# State 16 +# Apply joint action ["turn left", "turn left"] +actions: [0, 0] + +# State 17 +# Apply action "turn right" +action: 1 + +# State 18 +# Apply action "turn left" +action: 0 + +# State 19 +# Apply action "stay" +action: 3 + +# State 20 +# Apply joint action ["stay", "turn right"] +actions: [3, 1] + +# State 21 +# Apply action "turn right" +action: 1 + +# State 22 +# Apply action "turn left" +action: 0 + +# State 23 +# Apply action "stay" +action: 3 + +# State 24 +# Apply joint action ["move forward", "turn left"] +actions: [2, 0] + +# State 25 +# Apply action "turn right" +action: 1 + +# State 26 +# Apply action "turn right" +action: 1 + +# State 27 +# Apply action "move forward" +action: 2 + +# State 28 +# Apply joint action ["turn left", "turn left"] +actions: [0, 0] + +# State 29 +# Apply action "turn right" +action: 1 + +# State 30 +# Apply action "turn right" +action: 1 + +# State 31 +# Apply action "move forward" +action: 2 + +# State 32 +# Apply joint action ["stay", "turn right"] +actions: [3, 1] + +# State 33 +# Apply action "turn left" +action: 0 + +# State 34 +# Apply action "turn right" +action: 1 + +# State 35 +# Apply action "stay" +action: 3 + +# State 36 +# Apply joint action ["move forward", "turn right"] +actions: [2, 1] + +# State 37 +# Apply action "turn right" +action: 1 + +# State 38 +# Apply action "turn right" +action: 1 + +# State 39 +# Apply action "move forward" +action: 2 + +# State 40 +# Total moves: 10 +# Most recent reward: -0.1 +# Total rewards: -1 +# ........ +# ........ +# ........ +# .b.BB.b. +# ........ +# ........ +# .>...v.. +# ........ +IsTerminal() = False +History() = [3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2] +HistoryString() = "3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "field" +ObservationString(1) = "field" +ObservationTensor(0): ◉◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯ +Rewards() = [-0.1, -0.1] +Returns() = [-1, -1] +LegalActions(0) = [0, 1, 2, 3] +LegalActions(1) = [0, 1, 2, 3] +StringLegalActions(0) = ["turn left", "turn right", "move forward", "stay"] +StringLegalActions(1) = ["turn left", "turn right", "move forward", "stay"] + +# Apply joint action ["turn left", "turn left"] +actions: [0, 0] + +# State 41 +# Apply action "turn left" +action: 0 + +# State 42 +# Apply action "turn right" +action: 1 + +# State 43 +# Apply action "move forward" +action: 2 + +# State 44 +# Apply joint action ["turn right", "move forward"] +actions: [1, 2] + +# State 45 +# Apply action "turn left" +action: 0 + +# State 46 +# Apply action "turn right" +action: 1 + +# State 47 +# Apply action "move forward" +action: 2 + +# State 48 +# Apply joint action ["stay", "move forward"] +actions: [3, 2] + +# State 49 +# Apply action "turn right" +action: 1 + +# State 50 +# Apply action "turn left" +action: 0 + +# State 51 +# Apply action "move forward" +action: 2 + +# State 52 +# Apply joint action ["move forward", "stay"] +actions: [2, 3] + +# State 53 +# Apply action "turn right" +action: 1 + +# State 54 +# Apply action "turn left" +action: 0 + +# State 55 +# Apply action "stay" +action: 3 + +# State 56 +# Apply joint action ["move forward", "stay"] +actions: [2, 3] + +# State 57 +# Apply action "turn left" +action: 0 + +# State 58 +# Apply action "turn right" +action: 1 + +# State 59 +# Apply action "stay" +action: 3 + +# State 60 +# Apply joint action ["move forward", "move forward"] +actions: [2, 2] + +# State 61 +# Apply action "turn left" +action: 0 + +# State 62 +# Apply action "turn right" +action: 1 + +# State 63 +# Apply action "stay" +action: 3 + +# State 64 +# Apply joint action ["move forward", "turn left"] +actions: [2, 0] + +# State 65 +# Apply action "turn left" +action: 0 + +# State 66 +# Apply action "turn left" +action: 0 + +# State 67 +# Apply action "move forward" +action: 2 + +# State 68 +# Apply joint action ["turn left", "move forward"] +actions: [0, 2] + +# State 69 +# Apply action "turn right" +action: 1 + +# State 70 +# Apply action "turn left" +action: 0 + +# State 71 +# Apply action "move forward" +action: 2 + +# State 72 +# Apply joint action ["turn right", "turn right"] +actions: [1, 1] + +# State 73 +# Apply action "turn right" +action: 1 + +# State 74 +# Apply action "turn left" +action: 0 + +# State 75 +# Apply action "move forward" +action: 2 + +# State 76 +# Apply joint action ["stay", "turn left"] +actions: [3, 0] + +# State 77 +# Apply action "turn left" +action: 0 + +# State 78 +# Apply action "turn right" +action: 1 + +# State 79 +# Apply action "move forward" +action: 2 + +# State 80 +# Total moves: 20 +# Most recent reward: -0.1 +# Total rewards: -2 +# ........ +# ........ +# ........ +# .b.BB.b. +# ........ +# ........ +# ....>... +# ......v. +IsTerminal() = False +History() = [3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2] +HistoryString() = "3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "field" +ObservationString(1) = "wall" +ObservationTensor(0): ◉◯◯◯◯ +ObservationTensor(1): ◯◉◯◯◯ +Rewards() = [-0.1, -0.1] +Returns() = [-2, -2] +LegalActions(0) = [0, 1, 2, 3] +LegalActions(1) = [0, 1, 2, 3] +StringLegalActions(0) = ["turn left", "turn right", "move forward", "stay"] +StringLegalActions(1) = ["turn left", "turn right", "move forward", "stay"] + +# Apply joint action ["turn left", "turn left"] +actions: [0, 0] + +# State 81 +# Apply action "turn left" +action: 0 + +# State 82 +# Apply action "turn left" +action: 0 + +# State 83 +# Apply action "move forward" +action: 2 + +# State 84 +# Apply joint action ["stay", "turn left"] +actions: [3, 0] + +# State 85 +# Apply action "turn right" +action: 1 + +# State 86 +# Apply action "turn right" +action: 1 + +# State 87 +# Apply action "move forward" +action: 2 + +# State 88 +# Apply joint action ["stay", "turn left"] +actions: [3, 0] + +# State 89 +# Apply action "turn right" +action: 1 + +# State 90 +# Apply action "turn left" +action: 0 + +# State 91 +# Apply action "move forward" +action: 2 + +# State 92 +# Apply joint action ["turn right", "stay"] +actions: [1, 3] + +# State 93 +# Apply action "turn left" +action: 0 + +# State 94 +# Apply action "turn left" +action: 0 + +# State 95 +# Apply action "move forward" +action: 2 + +# State 96 +# Apply joint action ["turn left", "turn left"] +actions: [0, 0] + +# State 97 +# Apply action "turn right" +action: 1 + +# State 98 +# Apply action "turn right" +action: 1 + +# State 99 +# Apply action "stay" +action: 3 + +# State 100 +# Apply joint action ["stay", "move forward"] +actions: [3, 2] + +# State 101 +# Apply action "turn left" +action: 0 + +# State 102 +# Apply action "turn left" +action: 0 + +# State 103 +# Apply action "stay" +action: 3 + +# State 104 +# Apply joint action ["move forward", "turn right"] +actions: [2, 1] + +# State 105 +# Apply action "turn left" +action: 0 + +# State 106 +# Apply action "turn right" +action: 1 + +# State 107 +# Apply action "stay" +action: 3 + +# State 108 +# Apply joint action ["turn left", "stay"] +actions: [0, 3] + +# State 109 +# Apply action "turn right" +action: 1 + +# State 110 +# Apply action "turn left" +action: 0 + +# State 111 +# Apply action "move forward" +action: 2 + +# State 112 +# Apply joint action ["stay", "turn left"] +actions: [3, 0] + +# State 113 +# Apply action "turn left" +action: 0 + +# State 114 +# Apply action "turn right" +action: 1 + +# State 115 +# Apply action "stay" +action: 3 + +# State 116 +# Apply joint action ["turn left", "stay"] +actions: [0, 3] + +# State 117 +# Apply action "turn left" +action: 0 + +# State 118 +# Apply action "turn left" +action: 0 + +# State 119 +# Apply action "stay" +action: 3 + +# State 120 +# Total moves: 30 +# Most recent reward: -0.1 +# Total rewards: -3 +# ........ +# ........ +# ........ +# .b.BB.b. +# ........ +# ........ +# .....^^. +# ........ +IsTerminal() = False +History() = [3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2, 0, 0, 0, 0, 2, 3, 0, 1, 1, 2, 3, 0, 1, 0, 2, 1, 3, 0, 0, 2, 0, 0, 1, 1, 3, 3, 2, 0, 0, 3, 2, 1, 0, 1, 3, 0, 3, 1, 0, 2, 3, 0, 0, 1, 3, 0, 3, 0, 0, 3] +HistoryString() = "3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2, 0, 0, 0, 0, 2, 3, 0, 1, 1, 2, 3, 0, 1, 0, 2, 1, 3, 0, 0, 2, 0, 0, 1, 1, 3, 3, 2, 0, 0, 3, 2, 1, 0, 1, 3, 0, 3, 1, 0, 2, 3, 0, 0, 1, 3, 0, 3, 0, 0, 3" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "field" +ObservationString(1) = "field" +ObservationTensor(0): ◉◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯ +Rewards() = [-0.1, -0.1] +Returns() = [-3, -3] +LegalActions(0) = [0, 1, 2, 3] +LegalActions(1) = [0, 1, 2, 3] +StringLegalActions(0) = ["turn left", "turn right", "move forward", "stay"] +StringLegalActions(1) = ["turn left", "turn right", "move forward", "stay"] + +# Apply joint action ["move forward", "move forward"] +actions: [2, 2] + +# State 121 +# Apply action "turn right" +action: 1 + +# State 122 +# Apply action "turn right" +action: 1 + +# State 123 +# Apply action "stay" +action: 3 + +# State 124 +# Apply joint action ["turn left", "turn left"] +actions: [0, 0] + +# State 125 +# Apply action "turn left" +action: 0 + +# State 126 +# Apply action "turn left" +action: 0 + +# State 127 +# Apply action "stay" +action: 3 + +# State 128 +# Apply joint action ["turn right", "turn right"] +actions: [1, 1] + +# State 129 +# Apply action "turn right" +action: 1 + +# State 130 +# Apply action "turn left" +action: 0 + +# State 131 +# Apply action "stay" +action: 3 + +# State 132 +# Apply joint action ["stay", "turn right"] +actions: [3, 1] + +# State 133 +# Apply action "turn left" +action: 0 + +# State 134 +# Apply action "turn left" +action: 0 + +# State 135 +# Apply action "stay" +action: 3 + +# State 136 +# Apply joint action ["turn right", "turn left"] +actions: [1, 0] + +# State 137 +# Apply action "turn right" +action: 1 + +# State 138 +# Apply action "turn left" +action: 0 + +# State 139 +# Apply action "stay" +action: 3 + +# State 140 +# Apply joint action ["turn right", "move forward"] +actions: [1, 2] + +# State 141 +# Apply action "turn left" +action: 0 + +# State 142 +# Apply action "turn left" +action: 0 + +# State 143 +# Apply action "stay" +action: 3 + +# State 144 +# Apply joint action ["turn right", "turn right"] +actions: [1, 1] + +# State 145 +# Apply action "turn left" +action: 0 + +# State 146 +# Apply action "turn left" +action: 0 + +# State 147 +# Apply action "stay" +action: 3 + +# State 148 +# Apply joint action ["turn left", "turn left"] +actions: [0, 0] + +# State 149 +# Apply action "turn right" +action: 1 + +# State 150 +# Apply action "turn left" +action: 0 + +# State 151 +# Apply action "stay" +action: 3 + +# State 152 +# Apply joint action ["move forward", "turn right"] +actions: [2, 1] + +# State 153 +# Apply action "turn right" +action: 1 + +# State 154 +# Apply action "turn right" +action: 1 + +# State 155 +# Apply action "stay" +action: 3 + +# State 156 +# Apply joint action ["turn right", "move forward"] +actions: [1, 2] + +# State 157 +# Apply action "turn left" +action: 0 + +# State 158 +# Apply action "turn left" +action: 0 + +# State 159 +# Apply action "stay" +action: 3 + +# State 160 +# Total moves: 40 +# Most recent reward: -0.1 +# Total rewards: -4 +# ........ +# ........ +# ........ +# .b.BB.b. +# ......^. +# ........ +# .....v.. +# ........ +IsTerminal() = False +History() = [3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2, 0, 0, 0, 0, 2, 3, 0, 1, 1, 2, 3, 0, 1, 0, 2, 1, 3, 0, 0, 2, 0, 0, 1, 1, 3, 3, 2, 0, 0, 3, 2, 1, 0, 1, 3, 0, 3, 1, 0, 2, 3, 0, 0, 1, 3, 0, 3, 0, 0, 3, 2, 2, 1, 1, 3, 0, 0, 0, 0, 3, 1, 1, 1, 0, 3, 3, 1, 0, 0, 3, 1, 0, 1, 0, 3, 1, 2, 0, 0, 3, 1, 1, 0, 0, 3, 0, 0, 1, 0, 3, 2, 1, 1, 1, 3, 1, 2, 0, 0, 3] +HistoryString() = "3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2, 0, 0, 0, 0, 2, 3, 0, 1, 1, 2, 3, 0, 1, 0, 2, 1, 3, 0, 0, 2, 0, 0, 1, 1, 3, 3, 2, 0, 0, 3, 2, 1, 0, 1, 3, 0, 3, 1, 0, 2, 3, 0, 0, 1, 3, 0, 3, 0, 0, 3, 2, 2, 1, 1, 3, 0, 0, 0, 0, 3, 1, 1, 1, 0, 3, 3, 1, 0, 0, 3, 1, 0, 1, 0, 3, 1, 2, 0, 0, 3, 1, 1, 0, 0, 3, 0, 0, 1, 0, 3, 2, 1, 1, 1, 3, 1, 2, 0, 0, 3" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "field" +ObservationString(1) = "small box" +ObservationTensor(0): ◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◉◯ +Rewards() = [-0.1, -0.1] +Returns() = [-4, -4] +LegalActions(0) = [0, 1, 2, 3] +LegalActions(1) = [0, 1, 2, 3] +StringLegalActions(0) = ["turn left", "turn right", "move forward", "stay"] +StringLegalActions(1) = ["turn left", "turn right", "move forward", "stay"] + +# Apply joint action ["turn right", "move forward"] +actions: [1, 2] + +# State 161 +# Apply action "turn left" +action: 0 + +# State 162 +# Apply action "turn left" +action: 0 + +# State 163 +# Apply action "move forward" +action: 2 + +# State 164 +# Apply joint action ["move forward", "stay"] +actions: [2, 3] + +# State 165 +# Apply action "turn left" +action: 0 + +# State 166 +# Apply action "turn right" +action: 1 + +# State 167 +# Apply action "stay" +action: 3 + +# State 168 +# Apply joint action ["move forward", "turn right"] +actions: [2, 1] + +# State 169 +# Apply action "turn right" +action: 1 + +# State 170 +# Apply action "turn right" +action: 1 + +# State 171 +# Apply action "move forward" +action: 2 + +# State 172 +# Apply joint action ["turn right", "turn left"] +actions: [1, 0] + +# State 173 +# Apply action "turn right" +action: 1 + +# State 174 +# Apply action "turn right" +action: 1 + +# State 175 +# Apply action "stay" +action: 3 + +# State 176 +# Apply joint action ["move forward", "stay"] +actions: [2, 3] + +# State 177 +# Apply action "turn left" +action: 0 + +# State 178 +# Apply action "turn left" +action: 0 + +# State 179 +# Apply action "move forward" +action: 2 + +# State 180 +# Apply joint action ["turn right", "stay"] +actions: [1, 3] + +# State 181 +# Apply action "turn left" +action: 0 + +# State 182 +# Apply action "turn right" +action: 1 + +# State 183 +# Apply action "stay" +action: 3 + +# State 184 +# Apply joint action ["turn left", "turn right"] +actions: [0, 1] + +# State 185 +# Apply action "turn right" +action: 1 + +# State 186 +# Apply action "turn right" +action: 1 + +# State 187 +# Apply action "stay" +action: 3 + +# State 188 +# Apply joint action ["turn left", "stay"] +actions: [0, 3] + +# State 189 +# Apply action "turn right" +action: 1 + +# State 190 +# Apply action "turn right" +action: 1 + +# State 191 +# Apply action "stay" +action: 3 + +# State 192 +# Apply joint action ["stay", "stay"] +actions: [3, 3] + +# State 193 +# Apply action "turn left" +action: 0 + +# State 194 +# Apply action "turn left" +action: 0 + +# State 195 +# Apply action "stay" +action: 3 + +# State 196 +# Apply joint action ["turn left", "stay"] +actions: [0, 3] + +# State 197 +# Apply action "turn left" +action: 0 + +# State 198 +# Apply action "turn left" +action: 0 + +# State 199 +# Apply action "move forward" +action: 2 + +# State 200 +# Total moves: 50 +# Most recent reward: -0.1 +# Total rewards: -5 +# ........ +# ........ +# ......b. +# .b.BB.^. +# ........ +# ........ +# ...<.... +# ........ +IsTerminal() = False +History() = [3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2, 0, 0, 0, 0, 2, 3, 0, 1, 1, 2, 3, 0, 1, 0, 2, 1, 3, 0, 0, 2, 0, 0, 1, 1, 3, 3, 2, 0, 0, 3, 2, 1, 0, 1, 3, 0, 3, 1, 0, 2, 3, 0, 0, 1, 3, 0, 3, 0, 0, 3, 2, 2, 1, 1, 3, 0, 0, 0, 0, 3, 1, 1, 1, 0, 3, 3, 1, 0, 0, 3, 1, 0, 1, 0, 3, 1, 2, 0, 0, 3, 1, 1, 0, 0, 3, 0, 0, 1, 0, 3, 2, 1, 1, 1, 3, 1, 2, 0, 0, 3, 1, 2, 0, 0, 2, 2, 3, 0, 1, 3, 2, 1, 1, 1, 2, 1, 0, 1, 1, 3, 2, 3, 0, 0, 2, 1, 3, 0, 1, 3, 0, 1, 1, 1, 3, 0, 3, 1, 1, 3, 3, 3, 0, 0, 3, 0, 3, 0, 0, 2] +HistoryString() = "3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2, 0, 0, 0, 0, 2, 3, 0, 1, 1, 2, 3, 0, 1, 0, 2, 1, 3, 0, 0, 2, 0, 0, 1, 1, 3, 3, 2, 0, 0, 3, 2, 1, 0, 1, 3, 0, 3, 1, 0, 2, 3, 0, 0, 1, 3, 0, 3, 0, 0, 3, 2, 2, 1, 1, 3, 0, 0, 0, 0, 3, 1, 1, 1, 0, 3, 3, 1, 0, 0, 3, 1, 0, 1, 0, 3, 1, 2, 0, 0, 3, 1, 1, 0, 0, 3, 0, 0, 1, 0, 3, 2, 1, 1, 1, 3, 1, 2, 0, 0, 3, 1, 2, 0, 0, 2, 2, 3, 0, 1, 3, 2, 1, 1, 1, 2, 1, 0, 1, 1, 3, 2, 3, 0, 0, 2, 1, 3, 0, 1, 3, 0, 1, 1, 1, 3, 0, 3, 1, 1, 3, 3, 3, 0, 0, 3, 0, 3, 0, 0, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "field" +ObservationString(1) = "small box" +ObservationTensor(0): ◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◉◯ +Rewards() = [-0.1, -0.1] +Returns() = [-5, -5] +LegalActions(0) = [0, 1, 2, 3] +LegalActions(1) = [0, 1, 2, 3] +StringLegalActions(0) = ["turn left", "turn right", "move forward", "stay"] +StringLegalActions(1) = ["turn left", "turn right", "move forward", "stay"] + +# Apply joint action ["turn left", "stay"] +actions: [0, 3] + +# State 201 +# Apply action "turn right" +action: 1 + +# State 202 +# Apply action "turn right" +action: 1 + +# State 203 +# Apply action "move forward" +action: 2 + +# State 204 +# Apply joint action ["turn right", "turn left"] +actions: [1, 0] + +# State 205 +# Apply action "turn left" +action: 0 + +# State 206 +# Apply action "turn left" +action: 0 + +# State 207 +# Apply action "move forward" +action: 2 + +# State 208 +# Apply joint action ["move forward", "stay"] +actions: [2, 3] + +# State 209 +# Apply action "turn left" +action: 0 + +# State 210 +# Apply action "turn left" +action: 0 + +# State 211 +# Apply action "move forward" +action: 2 + +# State 212 +# Apply joint action ["stay", "turn left"] +actions: [3, 0] + +# State 213 +# Apply action "turn left" +action: 0 + +# State 214 +# Apply action "turn left" +action: 0 + +# State 215 +# Apply action "move forward" +action: 2 + +# State 216 +# Apply joint action ["turn right", "turn right"] +actions: [1, 1] + +# State 217 +# Apply action "turn left" +action: 0 + +# State 218 +# Apply action "turn left" +action: 0 + +# State 219 +# Apply action "stay" +action: 3 + +# State 220 +# Apply joint action ["turn left", "turn right"] +actions: [0, 1] + +# State 221 +# Apply action "turn left" +action: 0 + +# State 222 +# Apply action "turn left" +action: 0 + +# State 223 +# Apply action "stay" +action: 3 + +# State 224 +# Apply joint action ["stay", "turn left"] +actions: [3, 0] + +# State 225 +# Apply action "turn right" +action: 1 + +# State 226 +# Apply action "turn right" +action: 1 + +# State 227 +# Apply action "stay" +action: 3 + +# State 228 +# Apply joint action ["move forward", "move forward"] +actions: [2, 2] + +# State 229 +# Apply action "turn right" +action: 1 + +# State 230 +# Apply action "turn right" +action: 1 + +# State 231 +# Apply action "move forward" +action: 2 + +# State 232 +# Apply joint action ["turn left", "turn left"] +actions: [0, 0] + +# State 233 +# Apply action "turn right" +action: 1 + +# State 234 +# Apply action "turn right" +action: 1 + +# State 235 +# Apply action "stay" +action: 3 + +# State 236 +# Apply joint action ["move forward", "turn left"] +actions: [2, 0] + +# State 237 +# Apply action "turn right" +action: 1 + +# State 238 +# Apply action "turn left" +action: 0 + +# State 239 +# Apply action "move forward" +action: 2 + +# State 240 +# Total moves: 60 +# Most recent reward: -0.1 +# Total rewards: -6 +# ........ +# ........ +# ......b. +# .b.BB.<. +# ........ +# ...^.... +# ........ +# ........ +IsTerminal() = False +History() = [3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2, 0, 0, 0, 0, 2, 3, 0, 1, 1, 2, 3, 0, 1, 0, 2, 1, 3, 0, 0, 2, 0, 0, 1, 1, 3, 3, 2, 0, 0, 3, 2, 1, 0, 1, 3, 0, 3, 1, 0, 2, 3, 0, 0, 1, 3, 0, 3, 0, 0, 3, 2, 2, 1, 1, 3, 0, 0, 0, 0, 3, 1, 1, 1, 0, 3, 3, 1, 0, 0, 3, 1, 0, 1, 0, 3, 1, 2, 0, 0, 3, 1, 1, 0, 0, 3, 0, 0, 1, 0, 3, 2, 1, 1, 1, 3, 1, 2, 0, 0, 3, 1, 2, 0, 0, 2, 2, 3, 0, 1, 3, 2, 1, 1, 1, 2, 1, 0, 1, 1, 3, 2, 3, 0, 0, 2, 1, 3, 0, 1, 3, 0, 1, 1, 1, 3, 0, 3, 1, 1, 3, 3, 3, 0, 0, 3, 0, 3, 0, 0, 2, 0, 3, 1, 1, 2, 1, 0, 0, 0, 2, 2, 3, 0, 0, 2, 3, 0, 0, 0, 2, 1, 1, 0, 0, 3, 0, 1, 0, 0, 3, 3, 0, 1, 1, 3, 2, 2, 1, 1, 2, 0, 0, 1, 1, 3, 2, 0, 1, 0, 2] +HistoryString() = "3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2, 0, 0, 0, 0, 2, 3, 0, 1, 1, 2, 3, 0, 1, 0, 2, 1, 3, 0, 0, 2, 0, 0, 1, 1, 3, 3, 2, 0, 0, 3, 2, 1, 0, 1, 3, 0, 3, 1, 0, 2, 3, 0, 0, 1, 3, 0, 3, 0, 0, 3, 2, 2, 1, 1, 3, 0, 0, 0, 0, 3, 1, 1, 1, 0, 3, 3, 1, 0, 0, 3, 1, 0, 1, 0, 3, 1, 2, 0, 0, 3, 1, 1, 0, 0, 3, 0, 0, 1, 0, 3, 2, 1, 1, 1, 3, 1, 2, 0, 0, 3, 1, 2, 0, 0, 2, 2, 3, 0, 1, 3, 2, 1, 1, 1, 2, 1, 0, 1, 1, 3, 2, 3, 0, 0, 2, 1, 3, 0, 1, 3, 0, 1, 1, 1, 3, 0, 3, 1, 1, 3, 3, 3, 0, 0, 3, 0, 3, 0, 0, 2, 0, 3, 1, 1, 2, 1, 0, 0, 0, 2, 2, 3, 0, 0, 2, 3, 0, 0, 0, 2, 1, 1, 0, 0, 3, 0, 1, 0, 0, 3, 3, 0, 1, 1, 3, 2, 2, 1, 1, 2, 0, 0, 1, 1, 3, 2, 0, 1, 0, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "field" +ObservationString(1) = "field" +ObservationTensor(0): ◉◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯ +Rewards() = [-0.1, -0.1] +Returns() = [-6, -6] +LegalActions(0) = [0, 1, 2, 3] +LegalActions(1) = [0, 1, 2, 3] +StringLegalActions(0) = ["turn left", "turn right", "move forward", "stay"] +StringLegalActions(1) = ["turn left", "turn right", "move forward", "stay"] + +# Apply joint action ["turn left", "stay"] +actions: [0, 3] + +# State 241 +# Apply action "turn right" +action: 1 + +# State 242 +# Apply action "turn left" +action: 0 + +# State 243 +# Apply action "move forward" +action: 2 + +# State 244 +# Apply joint action ["move forward", "stay"] +actions: [2, 3] + +# State 245 +# Apply action "turn left" +action: 0 + +# State 246 +# Apply action "turn left" +action: 0 + +# State 247 +# Apply action "move forward" +action: 2 + +# State 248 +# Apply joint action ["move forward", "turn left"] +actions: [2, 0] + +# State 249 +# Apply action "turn right" +action: 1 + +# State 250 +# Apply action "turn left" +action: 0 + +# State 251 +# Apply action "move forward" +action: 2 + +# State 252 +# Apply joint action ["turn left", "turn right"] +actions: [0, 1] + +# State 253 +# Apply action "turn left" +action: 0 + +# State 254 +# Apply action "turn left" +action: 0 + +# State 255 +# Apply action "stay" +action: 3 + +# State 256 +# Apply joint action ["stay", "stay"] +actions: [3, 3] + +# State 257 +# Apply action "turn left" +action: 0 + +# State 258 +# Apply action "turn left" +action: 0 + +# State 259 +# Apply action "move forward" +action: 2 + +# State 260 +# Apply joint action ["stay", "turn left"] +actions: [3, 0] + +# State 261 +# Apply action "turn left" +action: 0 + +# State 262 +# Apply action "turn right" +action: 1 + +# State 263 +# Apply action "move forward" +action: 2 + +# State 264 +# Apply joint action ["turn left", "turn right"] +actions: [0, 1] + +# State 265 +# Apply action "turn left" +action: 0 + +# State 266 +# Apply action "turn left" +action: 0 + +# State 267 +# Apply action "stay" +action: 3 + +# State 268 +# Apply joint action ["turn left", "move forward"] +actions: [0, 2] + +# State 269 +# Apply action "turn left" +action: 0 + +# State 270 +# Apply action "turn right" +action: 1 + +# State 271 +# Apply action "stay" +action: 3 + +# State 272 +# Apply joint action ["turn right", "turn right"] +actions: [1, 1] + +# State 273 +# Apply action "turn left" +action: 0 + +# State 274 +# Apply action "turn right" +action: 1 + +# State 275 +# Apply action "stay" +action: 3 + +# State 276 +# Apply joint action ["move forward", "turn right"] +actions: [2, 1] + +# State 277 +# Apply action "turn right" +action: 1 + +# State 278 +# Apply action "turn right" +action: 1 + +# State 279 +# Apply action "stay" +action: 3 + +# State 280 +# Total moves: 70 +# Most recent reward: -0.1 +# Total rewards: -7 +# ........ +# ........ +# ......b. +# .b.BB.^. +# ...v.... +# ........ +# ........ +# ........ +IsTerminal() = False +History() = [3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2, 0, 0, 0, 0, 2, 3, 0, 1, 1, 2, 3, 0, 1, 0, 2, 1, 3, 0, 0, 2, 0, 0, 1, 1, 3, 3, 2, 0, 0, 3, 2, 1, 0, 1, 3, 0, 3, 1, 0, 2, 3, 0, 0, 1, 3, 0, 3, 0, 0, 3, 2, 2, 1, 1, 3, 0, 0, 0, 0, 3, 1, 1, 1, 0, 3, 3, 1, 0, 0, 3, 1, 0, 1, 0, 3, 1, 2, 0, 0, 3, 1, 1, 0, 0, 3, 0, 0, 1, 0, 3, 2, 1, 1, 1, 3, 1, 2, 0, 0, 3, 1, 2, 0, 0, 2, 2, 3, 0, 1, 3, 2, 1, 1, 1, 2, 1, 0, 1, 1, 3, 2, 3, 0, 0, 2, 1, 3, 0, 1, 3, 0, 1, 1, 1, 3, 0, 3, 1, 1, 3, 3, 3, 0, 0, 3, 0, 3, 0, 0, 2, 0, 3, 1, 1, 2, 1, 0, 0, 0, 2, 2, 3, 0, 0, 2, 3, 0, 0, 0, 2, 1, 1, 0, 0, 3, 0, 1, 0, 0, 3, 3, 0, 1, 1, 3, 2, 2, 1, 1, 2, 0, 0, 1, 1, 3, 2, 0, 1, 0, 2, 0, 3, 1, 0, 2, 2, 3, 0, 0, 2, 2, 0, 1, 0, 2, 0, 1, 0, 0, 3, 3, 3, 0, 0, 2, 3, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 2, 0, 1, 3, 1, 1, 0, 1, 3, 2, 1, 1, 1, 3] +HistoryString() = "3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2, 0, 0, 0, 0, 2, 3, 0, 1, 1, 2, 3, 0, 1, 0, 2, 1, 3, 0, 0, 2, 0, 0, 1, 1, 3, 3, 2, 0, 0, 3, 2, 1, 0, 1, 3, 0, 3, 1, 0, 2, 3, 0, 0, 1, 3, 0, 3, 0, 0, 3, 2, 2, 1, 1, 3, 0, 0, 0, 0, 3, 1, 1, 1, 0, 3, 3, 1, 0, 0, 3, 1, 0, 1, 0, 3, 1, 2, 0, 0, 3, 1, 1, 0, 0, 3, 0, 0, 1, 0, 3, 2, 1, 1, 1, 3, 1, 2, 0, 0, 3, 1, 2, 0, 0, 2, 2, 3, 0, 1, 3, 2, 1, 1, 1, 2, 1, 0, 1, 1, 3, 2, 3, 0, 0, 2, 1, 3, 0, 1, 3, 0, 1, 1, 1, 3, 0, 3, 1, 1, 3, 3, 3, 0, 0, 3, 0, 3, 0, 0, 2, 0, 3, 1, 1, 2, 1, 0, 0, 0, 2, 2, 3, 0, 0, 2, 3, 0, 0, 0, 2, 1, 1, 0, 0, 3, 0, 1, 0, 0, 3, 3, 0, 1, 1, 3, 2, 2, 1, 1, 2, 0, 0, 1, 1, 3, 2, 0, 1, 0, 2, 0, 3, 1, 0, 2, 2, 3, 0, 0, 2, 2, 0, 1, 0, 2, 0, 1, 0, 0, 3, 3, 3, 0, 0, 2, 3, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 2, 0, 1, 3, 1, 1, 0, 1, 3, 2, 1, 1, 1, 3" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "field" +ObservationString(1) = "small box" +ObservationTensor(0): ◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◉◯ +Rewards() = [-0.1, -0.1] +Returns() = [-7, -7] +LegalActions(0) = [0, 1, 2, 3] +LegalActions(1) = [0, 1, 2, 3] +StringLegalActions(0) = ["turn left", "turn right", "move forward", "stay"] +StringLegalActions(1) = ["turn left", "turn right", "move forward", "stay"] + +# Apply joint action ["turn right", "turn right"] +actions: [1, 1] + +# State 281 +# Apply action "turn left" +action: 0 + +# State 282 +# Apply action "turn right" +action: 1 + +# State 283 +# Apply action "move forward" +action: 2 + +# State 284 +# Apply joint action ["stay", "stay"] +actions: [3, 3] + +# State 285 +# Apply action "turn left" +action: 0 + +# State 286 +# Apply action "turn left" +action: 0 + +# State 287 +# Apply action "stay" +action: 3 + +# State 288 +# Apply joint action ["turn right", "turn right"] +actions: [1, 1] + +# State 289 +# Apply action "turn right" +action: 1 + +# State 290 +# Apply action "turn left" +action: 0 + +# State 291 +# Apply action "stay" +action: 3 + +# State 292 +# Apply joint action ["move forward", "turn right"] +actions: [2, 1] + +# State 293 +# Apply action "turn right" +action: 1 + +# State 294 +# Apply action "turn right" +action: 1 + +# State 295 +# Apply action "move forward" +action: 2 + +# State 296 +# Apply joint action ["turn left", "move forward"] +actions: [0, 2] + +# State 297 +# Apply action "turn left" +action: 0 + +# State 298 +# Apply action "turn left" +action: 0 + +# State 299 +# Apply action "move forward" +action: 2 + +# State 300 +# Apply joint action ["turn left", "turn right"] +actions: [0, 1] + +# State 301 +# Apply action "turn left" +action: 0 + +# State 302 +# Apply action "turn right" +action: 1 + +# State 303 +# Apply action "stay" +action: 3 + +# State 304 +# Apply joint action ["stay", "turn left"] +actions: [3, 0] + +# State 305 +# Apply action "turn right" +action: 1 + +# State 306 +# Apply action "turn left" +action: 0 + +# State 307 +# Apply action "stay" +action: 3 + +# State 308 +# Apply joint action ["move forward", "move forward"] +actions: [2, 2] + +# State 309 +# Apply action "turn left" +action: 0 + +# State 310 +# Apply action "turn right" +action: 1 + +# State 311 +# Apply action "stay" +action: 3 + +# State 312 +# Apply joint action ["stay", "turn right"] +actions: [3, 1] + +# State 313 +# Apply action "turn right" +action: 1 + +# State 314 +# Apply action "turn left" +action: 0 + +# State 315 +# Apply action "move forward" +action: 2 + +# State 316 +# Apply joint action ["turn left", "stay"] +actions: [0, 3] + +# State 317 +# Apply action "turn right" +action: 1 + +# State 318 +# Apply action "turn right" +action: 1 + +# State 319 +# Apply action "stay" +action: 3 + +# State 320 +# Total moves: 80 +# Most recent reward: -0.1 +# Total rewards: -8 +# ........ +# ........ +# ......b. +# .b.BB..> +# ....>... +# ........ +# ........ +# ........ +IsTerminal() = False +History() = [3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2, 0, 0, 0, 0, 2, 3, 0, 1, 1, 2, 3, 0, 1, 0, 2, 1, 3, 0, 0, 2, 0, 0, 1, 1, 3, 3, 2, 0, 0, 3, 2, 1, 0, 1, 3, 0, 3, 1, 0, 2, 3, 0, 0, 1, 3, 0, 3, 0, 0, 3, 2, 2, 1, 1, 3, 0, 0, 0, 0, 3, 1, 1, 1, 0, 3, 3, 1, 0, 0, 3, 1, 0, 1, 0, 3, 1, 2, 0, 0, 3, 1, 1, 0, 0, 3, 0, 0, 1, 0, 3, 2, 1, 1, 1, 3, 1, 2, 0, 0, 3, 1, 2, 0, 0, 2, 2, 3, 0, 1, 3, 2, 1, 1, 1, 2, 1, 0, 1, 1, 3, 2, 3, 0, 0, 2, 1, 3, 0, 1, 3, 0, 1, 1, 1, 3, 0, 3, 1, 1, 3, 3, 3, 0, 0, 3, 0, 3, 0, 0, 2, 0, 3, 1, 1, 2, 1, 0, 0, 0, 2, 2, 3, 0, 0, 2, 3, 0, 0, 0, 2, 1, 1, 0, 0, 3, 0, 1, 0, 0, 3, 3, 0, 1, 1, 3, 2, 2, 1, 1, 2, 0, 0, 1, 1, 3, 2, 0, 1, 0, 2, 0, 3, 1, 0, 2, 2, 3, 0, 0, 2, 2, 0, 1, 0, 2, 0, 1, 0, 0, 3, 3, 3, 0, 0, 2, 3, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 2, 0, 1, 3, 1, 1, 0, 1, 3, 2, 1, 1, 1, 3, 1, 1, 0, 1, 2, 3, 3, 0, 0, 3, 1, 1, 1, 0, 3, 2, 1, 1, 1, 2, 0, 2, 0, 0, 2, 0, 1, 0, 1, 3, 3, 0, 1, 0, 3, 2, 2, 0, 1, 3, 3, 1, 1, 0, 2, 0, 3, 1, 1, 3] +HistoryString() = "3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2, 0, 0, 0, 0, 2, 3, 0, 1, 1, 2, 3, 0, 1, 0, 2, 1, 3, 0, 0, 2, 0, 0, 1, 1, 3, 3, 2, 0, 0, 3, 2, 1, 0, 1, 3, 0, 3, 1, 0, 2, 3, 0, 0, 1, 3, 0, 3, 0, 0, 3, 2, 2, 1, 1, 3, 0, 0, 0, 0, 3, 1, 1, 1, 0, 3, 3, 1, 0, 0, 3, 1, 0, 1, 0, 3, 1, 2, 0, 0, 3, 1, 1, 0, 0, 3, 0, 0, 1, 0, 3, 2, 1, 1, 1, 3, 1, 2, 0, 0, 3, 1, 2, 0, 0, 2, 2, 3, 0, 1, 3, 2, 1, 1, 1, 2, 1, 0, 1, 1, 3, 2, 3, 0, 0, 2, 1, 3, 0, 1, 3, 0, 1, 1, 1, 3, 0, 3, 1, 1, 3, 3, 3, 0, 0, 3, 0, 3, 0, 0, 2, 0, 3, 1, 1, 2, 1, 0, 0, 0, 2, 2, 3, 0, 0, 2, 3, 0, 0, 0, 2, 1, 1, 0, 0, 3, 0, 1, 0, 0, 3, 3, 0, 1, 1, 3, 2, 2, 1, 1, 2, 0, 0, 1, 1, 3, 2, 0, 1, 0, 2, 0, 3, 1, 0, 2, 2, 3, 0, 0, 2, 2, 0, 1, 0, 2, 0, 1, 0, 0, 3, 3, 3, 0, 0, 2, 3, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 2, 0, 1, 3, 1, 1, 0, 1, 3, 2, 1, 1, 1, 3, 1, 1, 0, 1, 2, 3, 3, 0, 0, 3, 1, 1, 1, 0, 3, 2, 1, 1, 1, 2, 0, 2, 0, 0, 2, 0, 1, 0, 1, 3, 3, 0, 1, 0, 3, 2, 2, 0, 1, 3, 3, 1, 1, 0, 2, 0, 3, 1, 1, 3" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "field" +ObservationString(1) = "wall" +ObservationTensor(0): ◉◯◯◯◯ +ObservationTensor(1): ◯◉◯◯◯ +Rewards() = [-0.1, -0.1] +Returns() = [-8, -8] +LegalActions(0) = [0, 1, 2, 3] +LegalActions(1) = [0, 1, 2, 3] +StringLegalActions(0) = ["turn left", "turn right", "move forward", "stay"] +StringLegalActions(1) = ["turn left", "turn right", "move forward", "stay"] + +# Apply joint action ["turn left", "move forward"] +actions: [0, 2] + +# State 321 +# Apply action "turn right" +action: 1 + +# State 322 +# Apply action "turn left" +action: 0 + +# State 323 +# Apply action "move forward" +action: 2 + +# State 324 +# Apply joint action ["turn left", "move forward"] +actions: [0, 2] + +# State 325 +# Apply action "turn right" +action: 1 + +# State 326 +# Apply action "turn left" +action: 0 + +# State 327 +# Apply action "stay" +action: 3 + +# State 328 +# Apply joint action ["turn right", "move forward"] +actions: [1, 2] + +# State 329 +# Apply action "turn right" +action: 1 + +# State 330 +# Apply action "turn right" +action: 1 + +# State 331 +# Apply action "move forward" +action: 2 + +# State 332 +# Apply joint action ["move forward", "turn right"] +actions: [2, 1] + +# State 333 +# Apply action "turn right" +action: 1 + +# State 334 +# Apply action "turn right" +action: 1 + +# State 335 +# Apply action "move forward" +action: 2 + +# State 336 +# Apply joint action ["stay", "stay"] +actions: [3, 3] + +# State 337 +# Apply action "turn right" +action: 1 + +# State 338 +# Apply action "turn left" +action: 0 + +# State 339 +# Apply action "move forward" +action: 2 + +# State 340 +# Apply joint action ["turn left", "move forward"] +actions: [0, 2] + +# State 341 +# Apply action "turn right" +action: 1 + +# State 342 +# Apply action "turn left" +action: 0 + +# State 343 +# Apply action "stay" +action: 3 + +# State 344 +# Apply joint action ["turn left", "turn left"] +actions: [0, 0] + +# State 345 +# Apply action "turn left" +action: 0 + +# State 346 +# Apply action "turn right" +action: 1 + +# State 347 +# Apply action "stay" +action: 3 + +# State 348 +# Apply joint action ["stay", "stay"] +actions: [3, 3] + +# State 349 +# Apply action "turn right" +action: 1 + +# State 350 +# Apply action "turn right" +action: 1 + +# State 351 +# Apply action "move forward" +action: 2 + +# State 352 +# Apply joint action ["stay", "stay"] +actions: [3, 3] + +# State 353 +# Apply action "turn right" +action: 1 + +# State 354 +# Apply action "turn right" +action: 1 + +# State 355 +# Apply action "stay" +action: 3 + +# State 356 +# Apply joint action ["move forward", "move forward"] +actions: [2, 2] + +# State 357 +# Apply action "turn left" +action: 0 + +# State 358 +# Apply action "turn left" +action: 0 + +# State 359 +# Apply action "move forward" +action: 2 + +# State 360 +# Total moves: 90 +# Most recent reward: -10.1 +# Total rewards: -34 +# ........ +# ........ +# ......b. +# .b.BB..> +# ....^... +# ........ +# ........ +# ........ +IsTerminal() = False +History() = [3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2, 0, 0, 0, 0, 2, 3, 0, 1, 1, 2, 3, 0, 1, 0, 2, 1, 3, 0, 0, 2, 0, 0, 1, 1, 3, 3, 2, 0, 0, 3, 2, 1, 0, 1, 3, 0, 3, 1, 0, 2, 3, 0, 0, 1, 3, 0, 3, 0, 0, 3, 2, 2, 1, 1, 3, 0, 0, 0, 0, 3, 1, 1, 1, 0, 3, 3, 1, 0, 0, 3, 1, 0, 1, 0, 3, 1, 2, 0, 0, 3, 1, 1, 0, 0, 3, 0, 0, 1, 0, 3, 2, 1, 1, 1, 3, 1, 2, 0, 0, 3, 1, 2, 0, 0, 2, 2, 3, 0, 1, 3, 2, 1, 1, 1, 2, 1, 0, 1, 1, 3, 2, 3, 0, 0, 2, 1, 3, 0, 1, 3, 0, 1, 1, 1, 3, 0, 3, 1, 1, 3, 3, 3, 0, 0, 3, 0, 3, 0, 0, 2, 0, 3, 1, 1, 2, 1, 0, 0, 0, 2, 2, 3, 0, 0, 2, 3, 0, 0, 0, 2, 1, 1, 0, 0, 3, 0, 1, 0, 0, 3, 3, 0, 1, 1, 3, 2, 2, 1, 1, 2, 0, 0, 1, 1, 3, 2, 0, 1, 0, 2, 0, 3, 1, 0, 2, 2, 3, 0, 0, 2, 2, 0, 1, 0, 2, 0, 1, 0, 0, 3, 3, 3, 0, 0, 2, 3, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 2, 0, 1, 3, 1, 1, 0, 1, 3, 2, 1, 1, 1, 3, 1, 1, 0, 1, 2, 3, 3, 0, 0, 3, 1, 1, 1, 0, 3, 2, 1, 1, 1, 2, 0, 2, 0, 0, 2, 0, 1, 0, 1, 3, 3, 0, 1, 0, 3, 2, 2, 0, 1, 3, 3, 1, 1, 0, 2, 0, 3, 1, 1, 3, 0, 2, 1, 0, 2, 0, 2, 1, 0, 3, 1, 2, 1, 1, 2, 2, 1, 1, 1, 2, 3, 3, 1, 0, 2, 0, 2, 1, 0, 3, 0, 0, 0, 1, 3, 3, 3, 1, 1, 2, 3, 3, 1, 1, 3, 2, 2, 0, 0, 2] +HistoryString() = "3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2, 0, 0, 0, 0, 2, 3, 0, 1, 1, 2, 3, 0, 1, 0, 2, 1, 3, 0, 0, 2, 0, 0, 1, 1, 3, 3, 2, 0, 0, 3, 2, 1, 0, 1, 3, 0, 3, 1, 0, 2, 3, 0, 0, 1, 3, 0, 3, 0, 0, 3, 2, 2, 1, 1, 3, 0, 0, 0, 0, 3, 1, 1, 1, 0, 3, 3, 1, 0, 0, 3, 1, 0, 1, 0, 3, 1, 2, 0, 0, 3, 1, 1, 0, 0, 3, 0, 0, 1, 0, 3, 2, 1, 1, 1, 3, 1, 2, 0, 0, 3, 1, 2, 0, 0, 2, 2, 3, 0, 1, 3, 2, 1, 1, 1, 2, 1, 0, 1, 1, 3, 2, 3, 0, 0, 2, 1, 3, 0, 1, 3, 0, 1, 1, 1, 3, 0, 3, 1, 1, 3, 3, 3, 0, 0, 3, 0, 3, 0, 0, 2, 0, 3, 1, 1, 2, 1, 0, 0, 0, 2, 2, 3, 0, 0, 2, 3, 0, 0, 0, 2, 1, 1, 0, 0, 3, 0, 1, 0, 0, 3, 3, 0, 1, 1, 3, 2, 2, 1, 1, 2, 0, 0, 1, 1, 3, 2, 0, 1, 0, 2, 0, 3, 1, 0, 2, 2, 3, 0, 0, 2, 2, 0, 1, 0, 2, 0, 1, 0, 0, 3, 3, 3, 0, 0, 2, 3, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 2, 0, 1, 3, 1, 1, 0, 1, 3, 2, 1, 1, 1, 3, 1, 1, 0, 1, 2, 3, 3, 0, 0, 3, 1, 1, 1, 0, 3, 2, 1, 1, 1, 2, 0, 2, 0, 0, 2, 0, 1, 0, 1, 3, 3, 0, 1, 0, 3, 2, 2, 0, 1, 3, 3, 1, 1, 0, 2, 0, 3, 1, 1, 3, 0, 2, 1, 0, 2, 0, 2, 1, 0, 3, 1, 2, 1, 1, 2, 2, 1, 1, 1, 2, 3, 3, 1, 0, 2, 0, 2, 1, 0, 3, 0, 0, 0, 1, 3, 3, 3, 1, 1, 2, 3, 3, 1, 1, 3, 2, 2, 0, 0, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "big box" +ObservationString(1) = "wall" +ObservationTensor(0): ◯◯◯◯◉ +ObservationTensor(1): ◯◉◯◯◯ +Rewards() = [-10.1, -10.1] +Returns() = [-34, -34] +LegalActions(0) = [0, 1, 2, 3] +LegalActions(1) = [0, 1, 2, 3] +StringLegalActions(0) = ["turn left", "turn right", "move forward", "stay"] +StringLegalActions(1) = ["turn left", "turn right", "move forward", "stay"] + +# Apply joint action ["turn left", "stay"] +actions: [0, 3] + +# State 361 +# Apply action "turn right" +action: 1 + +# State 362 +# Apply action "turn left" +action: 0 + +# State 363 +# Apply action "move forward" +action: 2 + +# State 364 +# Apply joint action ["move forward", "move forward"] +actions: [2, 2] + +# State 365 +# Apply action "turn left" +action: 0 + +# State 366 +# Apply action "turn left" +action: 0 + +# State 367 +# Apply action "stay" +action: 3 + +# State 368 +# Apply joint action ["turn left", "stay"] +actions: [0, 3] + +# State 369 +# Apply action "turn left" +action: 0 + +# State 370 +# Apply action "turn right" +action: 1 + +# State 371 +# Apply action "stay" +action: 3 + +# State 372 +# Apply joint action ["turn right", "turn right"] +actions: [1, 1] + +# State 373 +# Apply action "turn right" +action: 1 + +# State 374 +# Apply action "turn right" +action: 1 + +# State 375 +# Apply action "stay" +action: 3 + +# State 376 +# Apply joint action ["turn left", "move forward"] +actions: [0, 2] + +# State 377 +# Apply action "turn right" +action: 1 + +# State 378 +# Apply action "turn left" +action: 0 + +# State 379 +# Apply action "move forward" +action: 2 + +# State 380 +# Apply joint action ["move forward", "stay"] +actions: [2, 3] + +# State 381 +# Apply action "turn right" +action: 1 + +# State 382 +# Apply action "turn right" +action: 1 + +# State 383 +# Apply action "stay" +action: 3 + +# State 384 +# Apply joint action ["move forward", "stay"] +actions: [2, 3] + +# State 385 +# Apply action "turn left" +action: 0 + +# State 386 +# Apply action "turn left" +action: 0 + +# State 387 +# Apply action "move forward" +action: 2 + +# State 388 +# Apply joint action ["turn right", "move forward"] +actions: [1, 2] + +# State 389 +# Apply action "turn right" +action: 1 + +# State 390 +# Apply action "turn left" +action: 0 + +# State 391 +# Apply action "move forward" +action: 2 + +# State 392 +# Apply joint action ["turn left", "turn right"] +actions: [0, 1] + +# State 393 +# Apply action "turn right" +action: 1 + +# State 394 +# Apply action "turn right" +action: 1 + +# State 395 +# Apply action "stay" +action: 3 + +# State 396 +# Apply joint action ["turn right", "stay"] +actions: [1, 3] + +# State 397 +# Apply action "turn left" +action: 0 + +# State 398 +# Apply action "turn right" +action: 1 + +# State 399 +# Apply action "stay" +action: 3 + +# State 400 +# Total moves: 100 +# Most recent reward: -0.1 +# Total rewards: -55 +# ........ +# ........ +# ......b. +# .b.BB..> +# ...^.... +# ........ +# ........ +# ........ +IsTerminal() = True +History() = [3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2, 0, 0, 0, 0, 2, 3, 0, 1, 1, 2, 3, 0, 1, 0, 2, 1, 3, 0, 0, 2, 0, 0, 1, 1, 3, 3, 2, 0, 0, 3, 2, 1, 0, 1, 3, 0, 3, 1, 0, 2, 3, 0, 0, 1, 3, 0, 3, 0, 0, 3, 2, 2, 1, 1, 3, 0, 0, 0, 0, 3, 1, 1, 1, 0, 3, 3, 1, 0, 0, 3, 1, 0, 1, 0, 3, 1, 2, 0, 0, 3, 1, 1, 0, 0, 3, 0, 0, 1, 0, 3, 2, 1, 1, 1, 3, 1, 2, 0, 0, 3, 1, 2, 0, 0, 2, 2, 3, 0, 1, 3, 2, 1, 1, 1, 2, 1, 0, 1, 1, 3, 2, 3, 0, 0, 2, 1, 3, 0, 1, 3, 0, 1, 1, 1, 3, 0, 3, 1, 1, 3, 3, 3, 0, 0, 3, 0, 3, 0, 0, 2, 0, 3, 1, 1, 2, 1, 0, 0, 0, 2, 2, 3, 0, 0, 2, 3, 0, 0, 0, 2, 1, 1, 0, 0, 3, 0, 1, 0, 0, 3, 3, 0, 1, 1, 3, 2, 2, 1, 1, 2, 0, 0, 1, 1, 3, 2, 0, 1, 0, 2, 0, 3, 1, 0, 2, 2, 3, 0, 0, 2, 2, 0, 1, 0, 2, 0, 1, 0, 0, 3, 3, 3, 0, 0, 2, 3, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 2, 0, 1, 3, 1, 1, 0, 1, 3, 2, 1, 1, 1, 3, 1, 1, 0, 1, 2, 3, 3, 0, 0, 3, 1, 1, 1, 0, 3, 2, 1, 1, 1, 2, 0, 2, 0, 0, 2, 0, 1, 0, 1, 3, 3, 0, 1, 0, 3, 2, 2, 0, 1, 3, 3, 1, 1, 0, 2, 0, 3, 1, 1, 3, 0, 2, 1, 0, 2, 0, 2, 1, 0, 3, 1, 2, 1, 1, 2, 2, 1, 1, 1, 2, 3, 3, 1, 0, 2, 0, 2, 1, 0, 3, 0, 0, 0, 1, 3, 3, 3, 1, 1, 2, 3, 3, 1, 1, 3, 2, 2, 0, 0, 2, 0, 3, 1, 0, 2, 2, 2, 0, 0, 3, 0, 3, 0, 1, 3, 1, 1, 1, 1, 3, 0, 2, 1, 0, 2, 2, 3, 1, 1, 3, 2, 3, 0, 0, 2, 1, 2, 1, 0, 2, 0, 1, 1, 1, 3, 1, 3, 0, 1, 3] +HistoryString() = "3, 2, 1, 0, 2, 0, 0, 1, 0, 3, 0, 1, 1, 1, 2, 0, 2, 1, 1, 3, 0, 0, 1, 0, 3, 3, 1, 1, 0, 3, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 3, 1, 0, 1, 3, 2, 1, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 0, 1, 2, 3, 2, 1, 0, 2, 2, 3, 1, 0, 3, 2, 3, 0, 1, 3, 2, 2, 0, 1, 3, 2, 0, 0, 0, 2, 0, 2, 1, 0, 2, 1, 1, 1, 0, 2, 3, 0, 0, 1, 2, 0, 0, 0, 0, 2, 3, 0, 1, 1, 2, 3, 0, 1, 0, 2, 1, 3, 0, 0, 2, 0, 0, 1, 1, 3, 3, 2, 0, 0, 3, 2, 1, 0, 1, 3, 0, 3, 1, 0, 2, 3, 0, 0, 1, 3, 0, 3, 0, 0, 3, 2, 2, 1, 1, 3, 0, 0, 0, 0, 3, 1, 1, 1, 0, 3, 3, 1, 0, 0, 3, 1, 0, 1, 0, 3, 1, 2, 0, 0, 3, 1, 1, 0, 0, 3, 0, 0, 1, 0, 3, 2, 1, 1, 1, 3, 1, 2, 0, 0, 3, 1, 2, 0, 0, 2, 2, 3, 0, 1, 3, 2, 1, 1, 1, 2, 1, 0, 1, 1, 3, 2, 3, 0, 0, 2, 1, 3, 0, 1, 3, 0, 1, 1, 1, 3, 0, 3, 1, 1, 3, 3, 3, 0, 0, 3, 0, 3, 0, 0, 2, 0, 3, 1, 1, 2, 1, 0, 0, 0, 2, 2, 3, 0, 0, 2, 3, 0, 0, 0, 2, 1, 1, 0, 0, 3, 0, 1, 0, 0, 3, 3, 0, 1, 1, 3, 2, 2, 1, 1, 2, 0, 0, 1, 1, 3, 2, 0, 1, 0, 2, 0, 3, 1, 0, 2, 2, 3, 0, 0, 2, 2, 0, 1, 0, 2, 0, 1, 0, 0, 3, 3, 3, 0, 0, 2, 3, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 2, 0, 1, 3, 1, 1, 0, 1, 3, 2, 1, 1, 1, 3, 1, 1, 0, 1, 2, 3, 3, 0, 0, 3, 1, 1, 1, 0, 3, 2, 1, 1, 1, 2, 0, 2, 0, 0, 2, 0, 1, 0, 1, 3, 3, 0, 1, 0, 3, 2, 2, 0, 1, 3, 3, 1, 1, 0, 2, 0, 3, 1, 1, 3, 0, 2, 1, 0, 2, 0, 2, 1, 0, 3, 1, 2, 1, 1, 2, 2, 1, 1, 1, 2, 3, 3, 1, 0, 2, 0, 2, 1, 0, 3, 0, 0, 0, 1, 3, 3, 3, 1, 1, 2, 3, 3, 1, 1, 3, 2, 2, 0, 0, 2, 0, 3, 1, 0, 2, 2, 2, 0, 0, 3, 0, 3, 0, 1, 3, 1, 1, 1, 1, 3, 0, 2, 1, 0, 2, 2, 3, 1, 1, 3, 2, 3, 0, 0, 2, 1, 2, 1, 0, 2, 0, 1, 1, 1, 3, 1, 3, 0, 1, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "big box" +ObservationString(1) = "wall" +ObservationTensor(0): ◯◯◯◯◉ +ObservationTensor(1): ◯◉◯◯◯ +Rewards() = [-0.1, -0.1] +Returns() = [-55, -55] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/coop_to_1p(game=tiny_bridge_2p()).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/coop_to_1p(game=tiny_bridge_2p()).txt new file mode 100644 index 0000000..bc46268 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/coop_to_1p(game=tiny_bridge_2p()).txt @@ -0,0 +1,471 @@ +game: coop_to_1p(game=tiny_bridge_2p()) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "1p(Tiny Bridge (Uncontested))" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = ["game"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "coop_to_1p" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 7 +PolicyTensorShape() = [7] +MaxChanceOutcomes() = 28 +GetParameters() = {game=tiny_bridge_2p()} +NumPlayers() = 1 +MinUtility() = -40.0 +MaxUtility() = 35.0 +UtilitySum() = None +ObservationTensorShape() = [287] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 287 +MaxGameLength() = 224 +ToString() = "coop_to_1p(game=tiny_bridge_2p())" + +# State 0 +# W:?? E:?? +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "Player 0\nNew Game\n" +ObservationTensor(0): zeros(287) +ChanceOutcomes() = [(0,0.0357143), (1,0.0357143), (3,0.0357143), (6,0.0357143), (10,0.0357143), (15,0.0357143), (21,0.0357143), (2,0.0357143), (4,0.0357143), (7,0.0357143), (11,0.0357143), (16,0.0357143), (22,0.0357143), (5,0.0357143), (8,0.0357143), (12,0.0357143), (17,0.0357143), (23,0.0357143), (9,0.0357143), (13,0.0357143), (18,0.0357143), (24,0.0357143), (14,0.0357143), (19,0.0357143), (25,0.0357143), (20,0.0357143), (26,0.0357143), (27,0.0357143)] +LegalActions() = [0, 1, 3, 6, 10, 15, 21, 2, 4, 7, 11, 16, 22, 5, 8, 12, 17, 23, 9, 13, 18, 24, 14, 19, 25, 20, 26, 27] +StringLegalActions() = ["HQHJ", "HKHJ", "HAHJ", "SJHJ", "SQHJ", "SKHJ", "SAHJ", "HKHQ", "HAHQ", "SJHQ", "SQHQ", "SKHQ", "SAHQ", "HAHK", "SJHK", "SQHK", "SKHK", "SAHK", "SJHA", "SQHA", "SKHA", "SAHA", "SQSJ", "SKSJ", "SASJ", "SKSQ", "SASQ", "SASK"] + +# Apply action "SQSJ" +action: 14 + +# State 1 +# W:SQSJ E:?? +# Player 0 possible: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK +IsTerminal() = False +History() = [14] +HistoryString() = "14" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "Player 0\nNew Game\nPlayer 0 possible: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK\n" +ObservationTensor(0): zeros(287) +ChanceOutcomes() = [(0,0.0666667), (1,0.0666667), (3,0.0666667), (15,0.0666667), (21,0.0666667), (2,0.0666667), (4,0.0666667), (16,0.0666667), (22,0.0666667), (5,0.0666667), (17,0.0666667), (23,0.0666667), (18,0.0666667), (24,0.0666667), (27,0.0666667)] +LegalActions() = [0, 1, 3, 15, 21, 2, 4, 16, 22, 5, 17, 23, 18, 24, 27] +StringLegalActions() = ["HQHJ", "HKHJ", "HAHJ", "SKHJ", "SAHJ", "HKHQ", "HAHQ", "SKHQ", "SAHQ", "HAHK", "SKHK", "SAHK", "SKHA", "SAHA", "SASK"] + +# Apply action "SKHA" +action: 18 + +# State 2 +# W:SQSJ E:SKHA +# Player 0 Pass: none +# Player 0 1H: none +# Player 0 1S: none +# Player 0 1NT: none +# Player 0 2H: none +# Player 0 2S: none +# Player 0 2NT: none +# Player 0 unassigned: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK +# Player 1 possible: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK +IsTerminal() = False +History() = [14, 18] +HistoryString() = "14, 18" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Player 0\nNew Game\nPlayer 0 Pass: none\nPlayer 0 1H: none\nPlayer 0 1S: none\nPlayer 0 1NT: none\nPlayer 0 2H: none\nPlayer 0 2S: none\nPlayer 0 2NT: none\nPlayer 0 unassigned: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK\nPlayer 1 possible: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK\n" +ObservationTensor(0): binvec(287, 0xffffffffffffff00000000000000000000000000000000000000000000000008000000) +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["HQHJ->Pass", "HQHJ->1H", "HQHJ->1S", "HQHJ->1NT", "HQHJ->2H", "HQHJ->2S", "HQHJ->2NT"] + +# Apply action "HQHJ->2S" +action: 5 + +# State 3 +# W:SQSJ E:SKHA +# Player 0 Pass: none +# Player 0 1H: none +# Player 0 1S: none +# Player 0 1NT: none +# Player 0 2H: none +# Player 0 2S: HQHJ +# Player 0 2NT: none +# Player 0 unassigned: HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK +# Player 1 possible: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK +IsTerminal() = False +History() = [14, 18, 5] +HistoryString() = "14, 18, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Player 0\nNew Game\nPlayer 0 Pass: none\nPlayer 0 1H: none\nPlayer 0 1S: none\nPlayer 0 1NT: none\nPlayer 0 2H: none\nPlayer 0 2S: HQHJ\nPlayer 0 2NT: none\nPlayer 0 unassigned: HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK\nPlayer 1 possible: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK\n" +ObservationTensor(0): binvec(287, 0xffffffffffffff00000000000000000000000000000000000800000000000004000000) +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["HKHJ->Pass", "HKHJ->1H", "HKHJ->1S", "HKHJ->1NT", "HKHJ->2H", "HKHJ->2S", "HKHJ->2NT"] + +# Apply action "HKHJ->2S" +action: 5 + +# State 4 +# W:SQSJ E:SKHA +# Player 0 Pass: none +# Player 0 1H: none +# Player 0 1S: none +# Player 0 1NT: none +# Player 0 2H: none +# Player 0 2S: HQHJ HKHJ +# Player 0 2NT: none +# Player 0 unassigned: HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK +# Player 1 possible: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK +IsTerminal() = False +History() = [14, 18, 5, 5] +HistoryString() = "14, 18, 5, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Player 0\nNew Game\nPlayer 0 Pass: none\nPlayer 0 1H: none\nPlayer 0 1S: none\nPlayer 0 1NT: none\nPlayer 0 2H: none\nPlayer 0 2S: HQHJ HKHJ\nPlayer 0 2NT: none\nPlayer 0 unassigned: HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK\nPlayer 1 possible: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK\n" +ObservationTensor(0): binvec(287, 0xffffffffffffff00000000000000000000000000000000000c00000000000002000000) +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["HKHQ->Pass", "HKHQ->1H", "HKHQ->1S", "HKHQ->1NT", "HKHQ->2H", "HKHQ->2S", "HKHQ->2NT"] + +# Apply action "HKHQ->Pass" +action: 0 + +# State 5 +# Apply action "HAHJ->2H" +action: 4 + +# State 6 +# Apply action "HAHQ->2H" +action: 4 + +# State 7 +# Apply action "HAHK->Pass" +action: 0 + +# State 8 +# Apply action "SJHJ->1H" +action: 1 + +# State 9 +# Apply action "SJHQ->2NT" +action: 6 + +# State 10 +# Apply action "SJHK->Pass" +action: 0 + +# State 11 +# Apply action "SJHA->1NT" +action: 3 + +# State 12 +# W:SQSJ E:SKHA +# Player 0 Pass: HKHQ HAHK SJHK +# Player 0 1H: SJHJ +# Player 0 1S: none +# Player 0 1NT: SJHA +# Player 0 2H: HAHJ HAHQ +# Player 0 2S: HQHJ HKHJ +# Player 0 2NT: SJHQ +# Player 0 unassigned: SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK +# Player 1 possible: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK +IsTerminal() = False +History() = [14, 18, 5, 5, 0, 4, 4, 0, 1, 6, 0, 3] +HistoryString() = "14, 18, 5, 5, 0, 4, 4, 0, 1, 6, 0, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Player 0\nNew Game\nPlayer 0 Pass: HKHQ HAHK SJHK\nPlayer 0 1H: SJHJ\nPlayer 0 1S: none\nPlayer 0 1NT: SJHA\nPlayer 0 2H: HAHJ HAHQ\nPlayer 0 2S: HQHJ HKHJ\nPlayer 0 2NT: SJHQ\nPlayer 0 unassigned: SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK\nPlayer 1 possible: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK\n" +ObservationTensor(0): binvec(287, 0xffffffffffffff24800000200000000000000400001800000c00000001000000020000) +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["SQHJ->Pass", "SQHJ->1H", "SQHJ->1S", "SQHJ->1NT", "SQHJ->2H", "SQHJ->2S", "SQHJ->2NT"] + +# Apply action "SQHJ->Pass" +action: 0 + +# State 13 +# Apply action "SQHQ->1S" +action: 2 + +# State 14 +# Apply action "SQHK->2H" +action: 4 + +# State 15 +# Apply action "SQHA->2NT" +action: 6 + +# State 16 +# Apply action "SQSJ->2NT" +action: 6 + +# State 17 +# Apply action "SKHJ->1S" +action: 2 + +# State 18 +# Apply action "SKHQ->Pass" +action: 0 + +# State 19 +# Apply action "SKHK->1H" +action: 1 + +# State 20 +# Apply action "SKHA->1NT" +action: 3 + +# State 21 +# Apply action "SKSJ->2H" +action: 4 + +# State 22 +# W:SQSJ E:SKHA +# Player 0 Pass: HKHQ HAHK SJHK SQHJ SKHQ +# Player 0 1H: SJHJ SKHK +# Player 0 1S: SQHQ SKHJ +# Player 0 1NT: SJHA SKHA +# Player 0 2H: HAHJ HAHQ SQHK SKSJ +# Player 0 2S: HQHJ HKHJ +# Player 0 2NT: SJHQ SQHA SQSJ +# Player 0 unassigned: SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK +# Player 1 possible: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK +IsTerminal() = False +History() = [14, 18, 5, 5, 0, 4, 4, 0, 1, 6, 0, 3, 0, 2, 4, 6, 6, 2, 0, 1, 3, 4] +HistoryString() = "14, 18, 5, 5, 0, 4, 4, 0, 1, 6, 0, 3, 0, 2, 4, 6, 6, 2, 0, 1, 3, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Player 0\nNew Game\nPlayer 0 Pass: HKHQ HAHK SJHK SQHJ SKHQ\nPlayer 0 1H: SJHJ SKHK\nPlayer 0 1S: SQHQ SKHJ\nPlayer 0 1NT: SJHA SKHA\nPlayer 0 2H: HAHJ HAHQ SQHK SKSJ\nPlayer 0 2S: HQHJ HKHJ\nPlayer 0 2NT: SJHQ SQHA SQSJ\nPlayer 0 unassigned: SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK\nPlayer 1 possible: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK\n" +ObservationTensor(0): binvec(287, 0xffffffffffffff24a08000200400001100000402001808100c00000001060000000080) +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["SKSQ->Pass", "SKSQ->1H", "SKSQ->1S", "SKSQ->1NT", "SKSQ->2H", "SKSQ->2S", "SKSQ->2NT"] + +# Apply action "SKSQ->2NT" +action: 6 + +# State 23 +# Apply action "SAHJ->2S" +action: 5 + +# State 24 +# Apply action "SAHQ->1H" +action: 1 + +# State 25 +# Apply action "SAHK->1S" +action: 2 + +# State 26 +# Apply action "SAHA->1NT" +action: 3 + +# State 27 +# Apply action "SASJ->1H" +action: 1 + +# State 28 +# Apply action "SASQ->2NT" +action: 6 + +# State 29 +# Apply action "SASK->1NT" +action: 3 + +# State 30 +# Apply action "HQHJ->Pass" +action: 0 + +# State 31 +# Apply action "HKHJ->Pass" +action: 0 + +# State 32 +# W:SQSJ E:SKHA 2NT +# Player 0 possible: SJHQ SQHA SQSJ SKSQ SASQ +# Player 1 Pass: HQHJ HKHJ +# Player 1 unassigned: HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK +IsTerminal() = False +History() = [14, 18, 5, 5, 0, 4, 4, 0, 1, 6, 0, 3, 0, 2, 4, 6, 6, 2, 0, 1, 3, 4, 6, 5, 1, 2, 3, 1, 6, 3, 0, 0] +HistoryString() = "14, 18, 5, 5, 0, 4, 4, 0, 1, 6, 0, 3, 0, 2, 4, 6, 6, 2, 0, 1, 3, 4, 6, 5, 1, 2, 3, 1, 6, 3, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Player 0\n2NT\nPlayer 0 possible: SJHQ SQHA SQSJ SKSQ SASQ\nPlayer 1 Pass: HQHJ HKHJ\nPlayer 1 unassigned: HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK\n" +ObservationTensor(0): binvec(287, 0x10106082fffffffc0000000000000000000000000000000000000000000000002000000) +Rewards() = [0] +Returns() = [0] +LegalActions() = [0] +StringLegalActions() = ["HKHQ->Pass"] + +# Apply action "HKHQ->Pass" +action: 0 + +# State 33 +# Apply action "HAHJ->Pass" +action: 0 + +# State 34 +# Apply action "HAHQ->Pass" +action: 0 + +# State 35 +# Apply action "HAHK->Pass" +action: 0 + +# State 36 +# Apply action "SJHJ->Pass" +action: 0 + +# State 37 +# Apply action "SJHQ->Pass" +action: 0 + +# State 38 +# Apply action "SJHK->Pass" +action: 0 + +# State 39 +# Apply action "SJHA->Pass" +action: 0 + +# State 40 +# Apply action "SQHJ->Pass" +action: 0 + +# State 41 +# Apply action "SQHQ->Pass" +action: 0 + +# State 42 +# W:SQSJ E:SKHA 2NT +# Player 0 possible: SJHQ SQHA SQSJ SKSQ SASQ +# Player 1 Pass: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ +# Player 1 unassigned: SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK +IsTerminal() = False +History() = [14, 18, 5, 5, 0, 4, 4, 0, 1, 6, 0, 3, 0, 2, 4, 6, 6, 2, 0, 1, 3, 4, 6, 5, 1, 2, 3, 1, 6, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] +HistoryString() = "14, 18, 5, 5, 0, 4, 4, 0, 1, 6, 0, 3, 0, 2, 4, 6, 6, 2, 0, 1, 3, 4, 6, 5, 1, 2, 3, 1, 6, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Player 0\n2NT\nPlayer 0 possible: SJHQ SQHA SQSJ SKSQ SASQ\nPlayer 1 Pass: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ\nPlayer 1 unassigned: SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK\n" +ObservationTensor(0): binvec(287, 0x10106082ffffffffff00000000000000000000000000000000000000000000000008000) +Rewards() = [0] +Returns() = [0] +LegalActions() = [0] +StringLegalActions() = ["SQHK->Pass"] + +# Apply action "SQHK->Pass" +action: 0 + +# State 43 +# Apply action "SQHA->Pass" +action: 0 + +# State 44 +# Apply action "SQSJ->Pass" +action: 0 + +# State 45 +# Apply action "SKHJ->Pass" +action: 0 + +# State 46 +# Apply action "SKHQ->Pass" +action: 0 + +# State 47 +# Apply action "SKHK->Pass" +action: 0 + +# State 48 +# Apply action "SKHA->Pass" +action: 0 + +# State 49 +# Apply action "SKSJ->Pass" +action: 0 + +# State 50 +# Apply action "SKSQ->Pass" +action: 0 + +# State 51 +# Apply action "SAHJ->Pass" +action: 0 + +# State 52 +# W:SQSJ E:SKHA 2NT +# Player 0 possible: SJHQ SQHA SQSJ SKSQ SASQ +# Player 1 Pass: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ +# Player 1 unassigned: SAHQ SAHK SAHA SASJ SASQ SASK +IsTerminal() = False +History() = [14, 18, 5, 5, 0, 4, 4, 0, 1, 6, 0, 3, 0, 2, 4, 6, 6, 2, 0, 1, 3, 4, 6, 5, 1, 2, 3, 1, 6, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] +HistoryString() = "14, 18, 5, 5, 0, 4, 4, 0, 1, 6, 0, 3, 0, 2, 4, 6, 6, 2, 0, 1, 3, 4, 6, 5, 1, 2, 3, 1, 6, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Player 0\n2NT\nPlayer 0 possible: SJHQ SQHA SQSJ SKSQ SASQ\nPlayer 1 Pass: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ\nPlayer 1 unassigned: SAHQ SAHK SAHA SASJ SASQ SASK\n" +ObservationTensor(0): binvec(287, 0x10106082ffffffffffffc00000000000000000000000000000000000000000000000020) +Rewards() = [0] +Returns() = [0] +LegalActions() = [0] +StringLegalActions() = ["SAHQ->Pass"] + +# Apply action "SAHQ->Pass" +action: 0 + +# State 53 +# Apply action "SAHK->Pass" +action: 0 + +# State 54 +# Apply action "SAHA->Pass" +action: 0 + +# State 55 +# Apply action "SASJ->Pass" +action: 0 + +# State 56 +# Apply action "SASQ->Pass" +action: 0 + +# State 57 +# Apply action "SASK->Pass" +action: 0 + +# State 58 +# W:SQSJ E:SKHA 2NT-Pass +# Player 0 possible: SJHQ SQHA SQSJ SKSQ SASQ +# Player 1 possible: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK +IsTerminal() = True +History() = [14, 18, 5, 5, 0, 4, 4, 0, 1, 6, 0, 3, 0, 2, 4, 6, 6, 2, 0, 1, 3, 4, 6, 5, 1, 2, 3, 1, 6, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] +HistoryString() = "14, 18, 5, 5, 0, 4, 4, 0, 1, 6, 0, 3, 0, 2, 4, 6, 6, 2, 0, 1, 3, 4, 6, 5, 1, 2, 3, 1, 6, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "Player 0\nPass\nPlayer 0 possible: SJHQ SQHA SQSJ SKSQ SASQ\nPlayer 1 possible: HQHJ HKHJ HKHQ HAHJ HAHQ HAHK SJHJ SJHQ SJHK SJHA SQHJ SQHQ SQHK SQHA SQSJ SKHJ SKHQ SKHK SKHA SKSJ SKSQ SAHJ SAHQ SAHK SAHA SASJ SASQ SASK\n" +ObservationTensor(0): binvec(287, 0x400106082fffffff00000000000000000000000000000000000000000000000000000000) +Rewards() = [-20] +Returns() = [-20] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/coop_to_1p(game=tiny_hanabi()).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/coop_to_1p(game=tiny_hanabi()).txt new file mode 100644 index 0000000..5a28384 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/coop_to_1p(game=tiny_hanabi()).txt @@ -0,0 +1,154 @@ +game: coop_to_1p(game=tiny_hanabi()) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "1p(Tiny Hanabi)" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = ["game"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "coop_to_1p" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 3 +PolicyTensorShape() = [3] +MaxChanceOutcomes() = 2 +GetParameters() = {game=tiny_hanabi()} +NumPlayers() = 1 +MinUtility() = 0.0 +MaxUtility() = 10.0 +UtilitySum() = None +ObservationTensorShape() = [15] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 15 +MaxGameLength() = 4 +ToString() = "coop_to_1p(game=tiny_hanabi())" + +# State 0 +# +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "Player 0\nNew Game\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.5), (1,0.5)] +LegalActions() = [0, 1] +StringLegalActions() = ["d0", "d1"] + +# Apply action "d0" +action: 0 + +# State 1 +# p0:d0 +# Player 0 possible: d0 d1 +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "Player 0\nNew Game\nPlayer 0 possible: d0 d1\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.5), (1,0.5)] +LegalActions() = [0, 1] +StringLegalActions() = ["d0", "d1"] + +# Apply action "d1" +action: 1 + +# State 2 +# p0:d0 p1:d1 +# Player 0 p0a0: none +# Player 0 p0a1: none +# Player 0 p0a2: none +# Player 0 unassigned: d0 d1 +# Player 1 possible: d0 d1 +IsTerminal() = False +History() = [0, 1] +HistoryString() = "0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Player 0\nNew Game\nPlayer 0 p0a0: none\nPlayer 0 p0a1: none\nPlayer 0 p0a2: none\nPlayer 0 unassigned: d0 d1\nPlayer 1 possible: d0 d1\n" +ObservationTensor(0): ◯◯◯◉◉◉◉◯◯◯◯◯◯◉◯ +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["d0->p0a0", "d0->p0a1", "d0->p0a2"] + +# Apply action "d0->p0a2" +action: 2 + +# State 3 +# p0:d0 p1:d1 +# Player 0 p0a0: none +# Player 0 p0a1: none +# Player 0 p0a2: d0 +# Player 0 unassigned: d1 +# Player 1 possible: d0 d1 +IsTerminal() = False +History() = [0, 1, 2] +HistoryString() = "0, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Player 0\nNew Game\nPlayer 0 p0a0: none\nPlayer 0 p0a1: none\nPlayer 0 p0a2: d0\nPlayer 0 unassigned: d1\nPlayer 1 possible: d0 d1\n" +ObservationTensor(0): ◯◯◯◉◉◉◉◯◯◯◯◉◯◯◉ +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["d1->p0a0", "d1->p0a1", "d1->p0a2"] + +# Apply action "d1->p0a1" +action: 1 + +# State 4 +# p0:d0 p1:d1 p0:a2 +# Player 0 possible: d0 +# Player 1 p1a0: none +# Player 1 p1a1: none +# Player 1 p1a2: none +# Player 1 unassigned: d0 d1 +IsTerminal() = False +History() = [0, 1, 2, 1] +HistoryString() = "0, 1, 2, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Player 0\np0a2\nPlayer 0 possible: d0\nPlayer 1 p1a0: none\nPlayer 1 p1a1: none\nPlayer 1 p1a2: none\nPlayer 1 unassigned: d0 d1\n" +ObservationTensor(0): ◯◯◉◉◯◉◉◯◯◯◯◯◯◉◯ +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["d0->p1a0", "d0->p1a1", "d0->p1a2"] + +# Apply action "d0->p1a2" +action: 2 + +# State 5 +# Apply action "d1->p1a2" +action: 2 + +# State 6 +# p0:d0 p1:d1 p0:a2 p1:a2 +# Player 0 possible: d0 +# Player 1 possible: d0 d1 +IsTerminal() = True +History() = [0, 1, 2, 1, 2, 2] +HistoryString() = "0, 1, 2, 1, 2, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "Player 0\np1a2\nPlayer 0 possible: d0\nPlayer 1 possible: d0 d1\n" +ObservationTensor(0): ◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯ +Rewards() = [10] +Returns() = [10] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/coordinated_mp.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/coordinated_mp.txt new file mode 100644 index 0000000..e815718 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/coordinated_mp.txt @@ -0,0 +1,113 @@ +game: coordinated_mp + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Coordinated Matching Pennies" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = True +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "coordinated_mp" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 2 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +MaxGameLength() = 2 +ToString() = "coordinated_mp()" + +# State 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "0" +InformationStateString(1) = "0" +ObservationString(0) = "" +ObservationString(1) = "" +PublicObservationString() = "start game" +PrivateObservationString(0) = "" +PrivateObservationString(1) = "" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["Heads", "Tails"] + +# Apply action "Tails" +action: 1 + +# State 1 +# T +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "1T" +InformationStateString(1) = "1" +ObservationString(0) = "" +ObservationString(1) = "" +PublicObservationString() = "clock tick" +PrivateObservationString(0) = "" +PrivateObservationString(1) = "" +ChanceOutcomes() = [(0,0.5), (1,0.5)] +LegalActions() = [0, 1] +StringLegalActions() = ["Top", "Bottom"] + +# Apply action "Top" +action: 0 + +# State 2 +# TT +IsTerminal() = False +History() = [1, 0] +HistoryString() = "1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "2TT" +InformationStateString(1) = "2T" +ObservationString(0) = "T" +ObservationString(1) = "T" +PublicObservationString() = "clock tick" +PrivateObservationString(0) = "T" +PrivateObservationString(1) = "T" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["Heads", "Tails"] + +# Apply action "Tails" +action: 1 + +# State 3 +# TTT +IsTerminal() = True +History() = [1, 0, 1] +HistoryString() = "1, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "3TT" +InformationStateString(1) = "3TT" +ObservationString(0) = "T" +ObservationString(1) = "T" +PublicObservationString() = "clock tick" +PrivateObservationString(0) = "T" +PrivateObservationString(1) = "T" +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/crazy_eights.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/crazy_eights.txt new file mode 100644 index 0000000..b75dbd3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/crazy_eights.txt @@ -0,0 +1,2394 @@ +game: crazy_eights + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Crazy Eights" +GameType.max_num_players = 15 +GameType.min_num_players = 2 +GameType.parameter_specification = ["max_draw_cards", "players", "reshuffle", "use_special_cards"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "crazy_eights" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 58 +PolicyTensorShape() = [58] +MaxChanceOutcomes() = 57 +GetParameters() = {max_draw_cards=5,players=5,reshuffle=False,use_special_cards=False} +NumPlayers() = 5 +MinUtility() = -544.0 +MaxUtility() = 0.0 +UtilitySum() = None +ObservationTensorShape() = [372] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 372 +MaxGameLength() = 10000 +ToString() = "crazy_eights()" + +# State 0 +# Number of cards left in deck: 52 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: Suit C: Suit C: Suit C: Suit C: +# Suit D: Suit D: Suit D: Suit D: Suit D: +# Suit H: Suit H: Suit H: Suit H: Suit H: +# Suit S: Suit S: Suit S: Suit S: Suit S: +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "" +ObservationString(1) = "" +ObservationString(2) = "" +ObservationString(3) = "" +ObservationString(4) = "" +ObservationTensor(0): zeros(372) +ObservationTensor(1): zeros(372) +ObservationTensor(2): zeros(372) +ObservationTensor(3): zeros(372) +ObservationTensor(4): zeros(372) +ChanceOutcomes() = [(52,0.2), (53,0.2), (54,0.2), (55,0.2), (56,0.2)] +LegalActions() = [52, 53, 54, 55, 56] +StringLegalActions() = ["Decide Player 0 to be the dealer", "Decide Player 1 to be the dealer", "Decide Player 2 to be the dealer", "Decide Player 3 to be the dealer", "Decide Player 4 to be the dealer"] + +# Apply action "Decide Player 2 to be the dealer" +action: 54 + +# State 1 +# Player 2 becomes the dealer +# Number of cards left in deck: 52 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: Suit C: Suit C: Suit C: Suit C: +# Suit D: Suit D: Suit D: Suit D: Suit D: +# Suit H: Suit H: Suit H: Suit H: Suit H: +# Suit S: Suit S: Suit S: Suit S: Suit S: +IsTerminal() = False +History() = [54] +HistoryString() = "54" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "" +ObservationString(1) = "" +ObservationString(2) = "" +ObservationString(3) = "" +ObservationString(4) = "" +ObservationTensor(0): zeros(372) +ObservationTensor(1): zeros(372) +ObservationTensor(2): zeros(372) +ObservationTensor(3): zeros(372) +ObservationTensor(4): zeros(372) +ChanceOutcomes() = [(0,0.0192308), (1,0.0192308), (2,0.0192308), (3,0.0192308), (4,0.0192308), (5,0.0192308), (6,0.0192308), (7,0.0192308), (8,0.0192308), (9,0.0192308), (10,0.0192308), (11,0.0192308), (12,0.0192308), (13,0.0192308), (14,0.0192308), (15,0.0192308), (16,0.0192308), (17,0.0192308), (18,0.0192308), (19,0.0192308), (20,0.0192308), (21,0.0192308), (22,0.0192308), (23,0.0192308), (24,0.0192308), (25,0.0192308), (26,0.0192308), (27,0.0192308), (28,0.0192308), (29,0.0192308), (30,0.0192308), (31,0.0192308), (32,0.0192308), (33,0.0192308), (34,0.0192308), (35,0.0192308), (36,0.0192308), (37,0.0192308), (38,0.0192308), (39,0.0192308), (40,0.0192308), (41,0.0192308), (42,0.0192308), (43,0.0192308), (44,0.0192308), (45,0.0192308), (46,0.0192308), (47,0.0192308), (48,0.0192308), (49,0.0192308), (50,0.0192308), (51,0.0192308)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51] +StringLegalActions() = ["Deal C2", "Deal D2", "Deal H2", "Deal S2", "Deal C3", "Deal D3", "Deal H3", "Deal S3", "Deal C4", "Deal D4", "Deal H4", "Deal S4", "Deal C5", "Deal D5", "Deal H5", "Deal S5", "Deal C6", "Deal D6", "Deal H6", "Deal S6", "Deal C7", "Deal D7", "Deal H7", "Deal S7", "Deal C8", "Deal D8", "Deal H8", "Deal S8", "Deal C9", "Deal D9", "Deal H9", "Deal S9", "Deal CT", "Deal DT", "Deal HT", "Deal ST", "Deal CJ", "Deal DJ", "Deal HJ", "Deal SJ", "Deal CQ", "Deal DQ", "Deal HQ", "Deal SQ", "Deal CK", "Deal DK", "Deal HK", "Deal SK", "Deal CA", "Deal DA", "Deal HA", "Deal SA"] + +# Apply action "Deal HA" +action: 50 + +# State 2 +# Apply action "Deal SQ" +action: 43 + +# State 3 +# Apply action "Deal H3" +action: 6 + +# State 4 +# Apply action "Deal HQ" +action: 42 + +# State 5 +# Apply action "Deal C5" +action: 12 + +# State 6 +# Apply action "Deal H2" +action: 2 + +# State 7 +# Apply action "Deal D5" +action: 13 + +# State 8 +# Apply action "Deal H4" +action: 10 + +# State 9 +# Apply action "Deal S8" +action: 27 + +# State 10 +# Apply action "Deal H7" +action: 22 + +# State 11 +# Apply action "Deal S9" +action: 31 + +# State 12 +# Apply action "Deal C4" +action: 8 + +# State 13 +# Apply action "Deal D8" +action: 25 + +# State 14 +# Apply action "Deal CK" +action: 44 + +# State 15 +# Apply action "Deal D3" +action: 5 + +# State 16 +# Apply action "Deal CA" +action: 48 + +# State 17 +# Apply action "Deal C2" +action: 0 + +# State 18 +# Apply action "Deal C6" +action: 16 + +# State 19 +# Apply action "Deal DK" +action: 45 + +# State 20 +# Apply action "Deal C7" +action: 20 + +# State 21 +# Apply action "Deal S7" +action: 23 + +# State 22 +# Apply action "Deal DA" +action: 49 + +# State 23 +# Apply action "Deal HJ" +action: 38 + +# State 24 +# Apply action "Deal ST" +action: 35 + +# State 25 +# Apply action "Deal H6" +action: 18 + +# State 26 +# Apply action "Deal SA" +action: 51 + +# State 27 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Last card: SA +# Last suit: S +# Number of cards left in deck: 26 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: 6 Suit C: K Suit C: 5 7 Suit C: A Suit C: 2 4 +# Suit D: 8 Suit D: K Suit D: 3 Suit D: Suit D: 5 A +# Suit H: 34 J Suit H: Q Suit H: 67 Suit H: 2 A Suit H: +# Suit S: Suit S: 8 T Suit S: A Suit S: 7 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +ObservationString(0) = "Currently I have: \nSuit C: 6 \nSuit D: 8 \nSuit H: 34 J \nSuit S: \nPrevious card: SA\nPrevious suit: S\nStarting counterclockwise, other players have: 5, 5, 6, 5, 5 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: K \nSuit D: K \nSuit H: Q \nSuit S: 8 T \nPrevious card: SA\nPrevious suit: S\nStarting counterclockwise, other players have: 5, 6, 5, 5, 5 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 5 7 \nSuit D: 3 \nSuit H: 67 \nSuit S: A\nPrevious card: SA\nPrevious suit: S\nStarting counterclockwise, other players have: 6, 5, 5, 5, 5 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: A\nSuit D: \nSuit H: 2 A\nSuit S: 7 9 \nPrevious card: SA\nPrevious suit: S\nStarting counterclockwise, other players have: 5, 5, 5, 5, 6 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: 2 4 \nSuit D: 5 A\nSuit H: \nSuit S: Q \nPrevious card: SA\nPrevious suit: S\nStarting counterclockwise, other players have: 5, 5, 5, 6, 5 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa6a6aa6aaa9aaaaaa6aaaaaa0000000000001104000000000000100000000000010000000000000800000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaaaaaaaa9aaa9aaa65aaa0000000000001102000000000000200000000000010000000000000800000000000) +ObservationTensor(2): binvec(372, 0xaa9aaa6aa666aaaaaaaaaaaaa90000000000001104000000000000200000000000010000000000000800000000000) +ObservationTensor(3): binvec(372, 0xa6aaaaaaaaa9aaa9aaaaaaaa660000000000001104000000000000200000000000010000000000000400000000000) +ObservationTensor(4): binvec(372, 0x6aaa6a9aaaaaaaaaaaaaa9aa9a0000000000001104000000000000200000000000008000000000000800000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [23, 31, 48, 50, 52] +StringLegalActions() = ["Play S7", "Play S9", "Play CA", "Play HA", "Draw"] + +# Apply action "Play CA" +action: 48 + +# State 28 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Last card: CA +# Last suit: C +# Number of cards left in deck: 26 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: 6 Suit C: K Suit C: 5 7 Suit C: Suit C: 2 4 +# Suit D: 8 Suit D: K Suit D: 3 Suit D: Suit D: 5 A +# Suit H: 34 J Suit H: Q Suit H: 67 Suit H: 2 A Suit H: +# Suit S: Suit S: 8 T Suit S: A Suit S: 7 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 4 +ObservationString(0) = "Currently I have: \nSuit C: 6 \nSuit D: 8 \nSuit H: 34 J \nSuit S: \nPrevious card: CA\nPrevious suit: C\nStarting counterclockwise, other players have: 5, 5, 6, 4, 5 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: K \nSuit D: K \nSuit H: Q \nSuit S: 8 T \nPrevious card: CA\nPrevious suit: C\nStarting counterclockwise, other players have: 5, 6, 4, 5, 5 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 5 7 \nSuit D: 3 \nSuit H: 67 \nSuit S: A\nPrevious card: CA\nPrevious suit: C\nStarting counterclockwise, other players have: 6, 4, 5, 5, 5 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: 2 A\nSuit S: 7 9 \nPrevious card: CA\nPrevious suit: C\nStarting counterclockwise, other players have: 4, 5, 5, 5, 6 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: 2 4 \nSuit D: 5 A\nSuit H: \nSuit S: Q \nPrevious card: CA\nPrevious suit: C\nStarting counterclockwise, other players have: 5, 5, 5, 6, 4 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa6a6aa6aaa9aaaaaa6aaaaaa0000000000008804000000000000100000000000020000000000000800000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaaaaaaaa9aaa9aaa65aaa0000000000008802000000000000400000000000010000000000000800000000000) +ObservationTensor(2): binvec(372, 0xaa9aaa6aa666aaaaaaaaaaaaa90000000000008808000000000000200000000000010000000000000800000000000) +ObservationTensor(3): binvec(372, 0xa6aaaaaaaaa9aaa9aaaaaaaaa60000000000008804000000000000200000000000010000000000000400000000000) +ObservationTensor(4): binvec(372, 0x6aaa6a9aaaaaaaaaaaaaa9aa9a0000000000008804000000000000200000000000008000000000001000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 8, 49, 52] +StringLegalActions() = ["Play C2", "Play C4", "Play DA", "Draw"] + +# Apply action "Play C4" +action: 8 + +# State 29 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Last card: C4 +# Last suit: C +# Number of cards left in deck: 26 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: 6 Suit C: K Suit C: 5 7 Suit C: Suit C: 2 +# Suit D: 8 Suit D: K Suit D: 3 Suit D: Suit D: 5 A +# Suit H: 34 J Suit H: Q Suit H: 67 Suit H: 2 A Suit H: +# Suit S: Suit S: 8 T Suit S: A Suit S: 7 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Currently I have: \nSuit C: 6 \nSuit D: 8 \nSuit H: 34 J \nSuit S: \nPrevious card: C4\nPrevious suit: C\nStarting counterclockwise, other players have: 5, 5, 6, 4, 4 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: K \nSuit D: K \nSuit H: Q \nSuit S: 8 T \nPrevious card: C4\nPrevious suit: C\nStarting counterclockwise, other players have: 5, 6, 4, 4, 5 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 5 7 \nSuit D: 3 \nSuit H: 67 \nSuit S: A\nPrevious card: C4\nPrevious suit: C\nStarting counterclockwise, other players have: 6, 4, 4, 5, 5 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: 2 A\nSuit S: 7 9 \nPrevious card: C4\nPrevious suit: C\nStarting counterclockwise, other players have: 4, 4, 5, 5, 6 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: 2 \nSuit D: 5 A\nSuit H: \nSuit S: Q \nPrevious card: C4\nPrevious suit: C\nStarting counterclockwise, other players have: 4, 5, 5, 6, 4 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa6a6aa6aaa9aaaaaa6aaaaaa0080000000000804000000000000100000000000020000000000001000000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaaaaaaaa9aaa9aaa65aaa0080000000000802000000000000400000000000020000000000000800000000000) +ObservationTensor(2): binvec(372, 0xaa9aaa6aa666aaaaaaaaaaaaa90080000000000808000000000000400000000000010000000000000800000000000) +ObservationTensor(3): binvec(372, 0xa6aaaaaaaaa9aaa9aaaaaaaaa60080000000000808000000000000200000000000010000000000000400000000000) +ObservationTensor(4): binvec(372, 0x6aaaaa9aaaaaaaaaaaaaa9aa9a0080000000000804000000000000200000000000008000000000001000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [10, 16, 25, 52] +StringLegalActions() = ["Play H4", "Play C6", "Play D8", "Draw"] + +# Apply action "Play H4" +action: 10 + +# State 30 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Last card: H4 +# Last suit: H +# Number of cards left in deck: 26 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: 6 Suit C: K Suit C: 5 7 Suit C: Suit C: 2 +# Suit D: 8 Suit D: K Suit D: 3 Suit D: Suit D: 5 A +# Suit H: 3 J Suit H: Q Suit H: 67 Suit H: 2 A Suit H: +# Suit S: Suit S: 8 T Suit S: A Suit S: 7 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Currently I have: \nSuit C: 6 \nSuit D: 8 \nSuit H: 3 J \nSuit S: \nPrevious card: H4\nPrevious suit: H\nStarting counterclockwise, other players have: 4, 5, 6, 4, 4 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: K \nSuit D: K \nSuit H: Q \nSuit S: 8 T \nPrevious card: H4\nPrevious suit: H\nStarting counterclockwise, other players have: 5, 6, 4, 4, 4 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 5 7 \nSuit D: 3 \nSuit H: 67 \nSuit S: A\nPrevious card: H4\nPrevious suit: H\nStarting counterclockwise, other players have: 6, 4, 4, 4, 5 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: 2 A\nSuit S: 7 9 \nPrevious card: H4\nPrevious suit: H\nStarting counterclockwise, other players have: 4, 4, 4, 5, 6 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: 2 \nSuit D: 5 A\nSuit H: \nSuit S: Q \nPrevious card: H4\nPrevious suit: H\nStarting counterclockwise, other players have: 4, 4, 5, 6, 4 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa6aaaa6aaa9aaaaaa6aaaaaa0020000000000204000000000000100000000000020000000000001000000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaaaaaaaa9aaa9aaa65aaa0020000000000202000000000000400000000000020000000000001000000000000) +ObservationTensor(2): binvec(372, 0xaa9aaa6aa666aaaaaaaaaaaaa90020000000000208000000000000400000000000020000000000000800000000000) +ObservationTensor(3): binvec(372, 0xa6aaaaaaaaa9aaa9aaaaaaaaa60020000000000208000000000000400000000000010000000000000400000000000) +ObservationTensor(4): binvec(372, 0x6aaaaa9aaaaaaaaaaaaaa9aa9a0020000000000208000000000000200000000000008000000000001000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [27, 42, 52] +StringLegalActions() = ["Play S8", "Play HQ", "Draw"] + +# Apply action "Draw" +action: 52 + +# State 31 +# Apply action "Deal H8" +action: 26 + +# State 32 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Player 1 starts drawing +# Player 1 draws H8 +# Last card: H4 +# Last suit: H +# Number of cards left in deck: 25 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: 6 Suit C: K Suit C: 5 7 Suit C: Suit C: 2 +# Suit D: 8 Suit D: K Suit D: 3 Suit D: Suit D: 5 A +# Suit H: 3 J Suit H: 8 Q Suit H: 67 Suit H: 2 A Suit H: +# Suit S: Suit S: 8 T Suit S: A Suit S: 7 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Currently I have: \nSuit C: 6 \nSuit D: 8 \nSuit H: 3 J \nSuit S: \nPrevious card: H4\nPrevious suit: H\nStarting counterclockwise, other players have: 4, 6, 6, 4, 4 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: K \nSuit D: K \nSuit H: 8 Q \nSuit S: 8 T \nPrevious card: H4\nPrevious suit: H\nStarting counterclockwise, other players have: 6, 6, 4, 4, 4 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 5 7 \nSuit D: 3 \nSuit H: 67 \nSuit S: A\nPrevious card: H4\nPrevious suit: H\nStarting counterclockwise, other players have: 6, 4, 4, 4, 6 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: 2 A\nSuit S: 7 9 \nPrevious card: H4\nPrevious suit: H\nStarting counterclockwise, other players have: 4, 4, 4, 6, 6 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: 2 \nSuit D: 5 A\nSuit H: \nSuit S: Q \nPrevious card: H4\nPrevious suit: H\nStarting counterclockwise, other players have: 4, 4, 6, 6, 4 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa6aaaa6aaa9aaaaaa6aaaaaa0020000000000202000000000000100000000000020000000000001000000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaaaaaaaa5aaa9aaa65aaa0020000000000202000000000000400000000000020000000000001000000000000) +ObservationTensor(2): binvec(372, 0xaa9aaa6aa666aaaaaaaaaaaaa90020000000000208000000000000400000000000020000000000000400000000000) +ObservationTensor(3): binvec(372, 0xa6aaaaaaaaa9aaa9aaaaaaaaa60020000000000208000000000000400000000000008000000000000400000000000) +ObservationTensor(4): binvec(372, 0x6aaaaa9aaaaaaaaaaaaaa9aa9a0020000000000208000000000000100000000000008000000000001000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [26, 27, 42, 52] +StringLegalActions() = ["Play H8", "Play S8", "Play HQ", "Draw"] + +# Apply action "Play S8" +action: 27 + +# State 33 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Player 1 starts drawing +# Player 1 draws H8 +# Player 1 plays S8 +# Last card: S8 +# Last suit: S +# Number of cards left in deck: 25 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: 6 Suit C: K Suit C: 5 7 Suit C: Suit C: 2 +# Suit D: 8 Suit D: K Suit D: 3 Suit D: Suit D: 5 A +# Suit H: 3 J Suit H: 8 Q Suit H: 67 Suit H: 2 A Suit H: +# Suit S: Suit S: T Suit S: A Suit S: 7 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Currently I have: \nSuit C: 6 \nSuit D: 8 \nSuit H: 3 J \nSuit S: \nPrevious card: S8\nPrevious suit: S\nStarting counterclockwise, other players have: 4, 5, 6, 4, 4 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: K \nSuit D: K \nSuit H: 8 Q \nSuit S: T \nPrevious card: S8\nPrevious suit: S\nStarting counterclockwise, other players have: 5, 6, 4, 4, 4 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 5 7 \nSuit D: 3 \nSuit H: 67 \nSuit S: A\nPrevious card: S8\nPrevious suit: S\nStarting counterclockwise, other players have: 6, 4, 4, 4, 5 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: 2 A\nSuit S: 7 9 \nPrevious card: S8\nPrevious suit: S\nStarting counterclockwise, other players have: 4, 4, 4, 5, 6 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: 2 \nSuit D: 5 A\nSuit H: \nSuit S: Q \nPrevious card: S8\nPrevious suit: S\nStarting counterclockwise, other players have: 4, 4, 5, 6, 4 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa6aaaa6aaa9aaaaaa6aaaaaa0000001000000104000000000000100000000000020000000000001000000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaaaaaaaa6aaa9aaa65aaa0000001000000102000000000000400000000000020000000000001000000000000) +ObservationTensor(2): binvec(372, 0xaa9aaa6aa666aaaaaaaaaaaaa90000001000000108000000000000400000000000020000000000000800000000000) +ObservationTensor(3): binvec(372, 0xa6aaaaaaaaa9aaa9aaaaaaaaa60000001000000108000000000000400000000000010000000000000400000000000) +ObservationTensor(4): binvec(372, 0x6aaaaa9aaaaaaaaaaaaaa9aa9a0000001000000108000000000000200000000000008000000000001000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [54, 55, 56, 57] +StringLegalActions() = ["Nominate suit C", "Nominate suit D", "Nominate suit H", "Nominate suit S"] + +# Apply action "Nominate suit C" +action: 54 + +# State 34 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Player 1 starts drawing +# Player 1 draws H8 +# Player 1 plays S8 +# Player 1 nominates suit C +# Last card: S8 +# Last suit: C +# Number of cards left in deck: 25 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: 6 Suit C: K Suit C: 5 7 Suit C: Suit C: 2 +# Suit D: 8 Suit D: K Suit D: 3 Suit D: Suit D: 5 A +# Suit H: 3 J Suit H: 8 Q Suit H: 67 Suit H: 2 A Suit H: +# Suit S: Suit S: T Suit S: A Suit S: 7 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "Currently I have: \nSuit C: 6 \nSuit D: 8 \nSuit H: 3 J \nSuit S: \nPrevious card: S8\nPrevious suit: C\nStarting counterclockwise, other players have: 4, 5, 6, 4, 4 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: K \nSuit D: K \nSuit H: 8 Q \nSuit S: T \nPrevious card: S8\nPrevious suit: C\nStarting counterclockwise, other players have: 5, 6, 4, 4, 4 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 5 7 \nSuit D: 3 \nSuit H: 67 \nSuit S: A\nPrevious card: S8\nPrevious suit: C\nStarting counterclockwise, other players have: 6, 4, 4, 4, 5 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: 2 A\nSuit S: 7 9 \nPrevious card: S8\nPrevious suit: C\nStarting counterclockwise, other players have: 4, 4, 4, 5, 6 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: 2 \nSuit D: 5 A\nSuit H: \nSuit S: Q \nPrevious card: S8\nPrevious suit: C\nStarting counterclockwise, other players have: 4, 4, 5, 6, 4 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa6aaaa6aaa9aaaaaa6aaaaaa0000001000000804000000000000100000000000020000000000001000000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaaaaaaaa6aaa9aaa65aaa0000001000000802000000000000400000000000020000000000001000000000000) +ObservationTensor(2): binvec(372, 0xaa9aaa6aa666aaaaaaaaaaaaa90000001000000808000000000000400000000000020000000000000800000000000) +ObservationTensor(3): binvec(372, 0xa6aaaaaaaaa9aaa9aaaaaaaaa60000001000000808000000000000400000000000010000000000000400000000000) +ObservationTensor(4): binvec(372, 0x6aaaaa9aaaaaaaaaaaaaa9aa9a0000001000000808000000000000200000000000008000000000001000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [12, 20, 52] +StringLegalActions() = ["Play C5", "Play C7", "Draw"] + +# Apply action "Draw" +action: 52 + +# State 35 +# Apply action "Deal C3" +action: 4 + +# State 36 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Player 1 starts drawing +# Player 1 draws H8 +# Player 1 plays S8 +# Player 1 nominates suit C +# Player 2 starts drawing +# Player 2 draws C3 +# Last card: S8 +# Last suit: C +# Number of cards left in deck: 24 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: 6 Suit C: K Suit C: 3 5 7 Suit C: Suit C: 2 +# Suit D: 8 Suit D: K Suit D: 3 Suit D: Suit D: 5 A +# Suit H: 3 J Suit H: 8 Q Suit H: 67 Suit H: 2 A Suit H: +# Suit S: Suit S: T Suit S: A Suit S: 7 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "Currently I have: \nSuit C: 6 \nSuit D: 8 \nSuit H: 3 J \nSuit S: \nPrevious card: S8\nPrevious suit: C\nStarting counterclockwise, other players have: 4, 5, 7, 4, 4 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: K \nSuit D: K \nSuit H: 8 Q \nSuit S: T \nPrevious card: S8\nPrevious suit: C\nStarting counterclockwise, other players have: 5, 7, 4, 4, 4 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 3 5 7 \nSuit D: 3 \nSuit H: 67 \nSuit S: A\nPrevious card: S8\nPrevious suit: C\nStarting counterclockwise, other players have: 7, 4, 4, 4, 5 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: 2 A\nSuit S: 7 9 \nPrevious card: S8\nPrevious suit: C\nStarting counterclockwise, other players have: 4, 4, 4, 5, 7 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: 2 \nSuit D: 5 A\nSuit H: \nSuit S: Q \nPrevious card: S8\nPrevious suit: C\nStarting counterclockwise, other players have: 4, 4, 5, 7, 4 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa6aaaa6aaa9aaaaaa6aaaaaa0000001000000804000000000000080000000000020000000000001000000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaaaaaaaa6aaa9aaa65aaa0000001000000801000000000000400000000000020000000000001000000000000) +ObservationTensor(2): binvec(372, 0xaa5aaa6aa666aaaaaaaaaaaaa90000001000000808000000000000400000000000020000000000000800000000000) +ObservationTensor(3): binvec(372, 0xa6aaaaaaaaa9aaa9aaaaaaaaa60000001000000808000000000000400000000000010000000000000200000000000) +ObservationTensor(4): binvec(372, 0x6aaaaa9aaaaaaaaaaaaaa9aa9a0000001000000808000000000000200000000000004000000000001000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [4, 12, 20, 52] +StringLegalActions() = ["Play C3", "Play C5", "Play C7", "Draw"] + +# Apply action "Draw" +action: 52 + +# State 37 +# Apply action "Deal HT" +action: 34 + +# State 38 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Player 1 starts drawing +# Player 1 draws H8 +# Player 1 plays S8 +# Player 1 nominates suit C +# Player 2 starts drawing +# Player 2 draws C3 +# Player 2 starts drawing +# Player 2 draws HT +# Last card: S8 +# Last suit: C +# Number of cards left in deck: 23 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: 6 Suit C: K Suit C: 3 5 7 Suit C: Suit C: 2 +# Suit D: 8 Suit D: K Suit D: 3 Suit D: Suit D: 5 A +# Suit H: 3 J Suit H: 8 Q Suit H: 67 T Suit H: 2 A Suit H: +# Suit S: Suit S: T Suit S: A Suit S: 7 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "Currently I have: \nSuit C: 6 \nSuit D: 8 \nSuit H: 3 J \nSuit S: \nPrevious card: S8\nPrevious suit: C\nStarting counterclockwise, other players have: 4, 5, 8, 4, 4 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: K \nSuit D: K \nSuit H: 8 Q \nSuit S: T \nPrevious card: S8\nPrevious suit: C\nStarting counterclockwise, other players have: 5, 8, 4, 4, 4 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 3 5 7 \nSuit D: 3 \nSuit H: 67 T \nSuit S: A\nPrevious card: S8\nPrevious suit: C\nStarting counterclockwise, other players have: 8, 4, 4, 4, 5 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: 2 A\nSuit S: 7 9 \nPrevious card: S8\nPrevious suit: C\nStarting counterclockwise, other players have: 4, 4, 4, 5, 8 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: 2 \nSuit D: 5 A\nSuit H: \nSuit S: Q \nPrevious card: S8\nPrevious suit: C\nStarting counterclockwise, other players have: 4, 4, 5, 8, 4 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa6aaaa6aaa9aaaaaa6aaaaaa0000001000000804000000000000040000000000020000000000001000000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaaaaaaaa6aaa9aaa65aaa0000001000000800800000000000400000000000020000000000001000000000000) +ObservationTensor(2): binvec(372, 0xaa5aaa6aa666aaaaa6aaaaaaa90000001000000808000000000000400000000000020000000000000800000000000) +ObservationTensor(3): binvec(372, 0xa6aaaaaaaaa9aaa9aaaaaaaaa60000001000000808000000000000400000000000010000000000000100000000000) +ObservationTensor(4): binvec(372, 0x6aaaaa9aaaaaaaaaaaaaa9aa9a0000001000000808000000000000200000000000002000000000001000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [4, 12, 20, 52] +StringLegalActions() = ["Play C3", "Play C5", "Play C7", "Draw"] + +# Apply action "Draw" +action: 52 + +# State 39 +# Apply action "Deal C8" +action: 24 + +# State 40 +# Apply action "Play C8" +action: 24 + +# State 41 +# Apply action "Nominate suit H" +action: 56 + +# State 42 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Player 1 starts drawing +# Player 1 draws H8 +# Player 1 plays S8 +# Player 1 nominates suit C +# Player 2 starts drawing +# Player 2 draws C3 +# Player 2 starts drawing +# Player 2 draws HT +# Player 2 starts drawing +# Player 2 draws C8 +# Player 2 plays C8 +# Player 2 nominates suit H +# Last card: C8 +# Last suit: H +# Number of cards left in deck: 22 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: 6 Suit C: K Suit C: 3 5 7 Suit C: Suit C: 2 +# Suit D: 8 Suit D: K Suit D: 3 Suit D: Suit D: 5 A +# Suit H: 3 J Suit H: 8 Q Suit H: 67 T Suit H: 2 A Suit H: +# Suit S: Suit S: T Suit S: A Suit S: 7 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +ObservationString(0) = "Currently I have: \nSuit C: 6 \nSuit D: 8 \nSuit H: 3 J \nSuit S: \nPrevious card: C8\nPrevious suit: H\nStarting counterclockwise, other players have: 4, 5, 8, 4, 4 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: K \nSuit D: K \nSuit H: 8 Q \nSuit S: T \nPrevious card: C8\nPrevious suit: H\nStarting counterclockwise, other players have: 5, 8, 4, 4, 4 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 3 5 7 \nSuit D: 3 \nSuit H: 67 T \nSuit S: A\nPrevious card: C8\nPrevious suit: H\nStarting counterclockwise, other players have: 8, 4, 4, 4, 5 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: 2 A\nSuit S: 7 9 \nPrevious card: C8\nPrevious suit: H\nStarting counterclockwise, other players have: 4, 4, 4, 5, 8 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: 2 \nSuit D: 5 A\nSuit H: \nSuit S: Q \nPrevious card: C8\nPrevious suit: H\nStarting counterclockwise, other players have: 4, 4, 5, 8, 4 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa6aaaa6aaa9aaaaaa6aaaaaa0000008000000204000000000000040000000000020000000000001000000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaaaaaaaa6aaa9aaa65aaa0000008000000200800000000000400000000000020000000000001000000000000) +ObservationTensor(2): binvec(372, 0xaa5aaa6aa666aaaaa6aaaaaaa90000008000000208000000000000400000000000020000000000000800000000000) +ObservationTensor(3): binvec(372, 0xa6aaaaaaaaa9aaa9aaaaaaaaa60000008000000208000000000000400000000000010000000000000100000000000) +ObservationTensor(4): binvec(372, 0x6aaaaa9aaaaaaaaaaaaaa9aa9a0000008000000208000000000000200000000000002000000000001000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [2, 50, 52] +StringLegalActions() = ["Play H2", "Play HA", "Draw"] + +# Apply action "Play H2" +action: 2 + +# State 43 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Player 1 starts drawing +# Player 1 draws H8 +# Player 1 plays S8 +# Player 1 nominates suit C +# Player 2 starts drawing +# Player 2 draws C3 +# Player 2 starts drawing +# Player 2 draws HT +# Player 2 starts drawing +# Player 2 draws C8 +# Player 2 plays C8 +# Player 2 nominates suit H +# Player 3 plays H2 +# Last card: H2 +# Last suit: H +# Number of cards left in deck: 22 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: 6 Suit C: K Suit C: 3 5 7 Suit C: Suit C: 2 +# Suit D: 8 Suit D: K Suit D: 3 Suit D: Suit D: 5 A +# Suit H: 3 J Suit H: 8 Q Suit H: 67 T Suit H: A Suit H: +# Suit S: Suit S: T Suit S: A Suit S: 7 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 4 +ObservationString(0) = "Currently I have: \nSuit C: 6 \nSuit D: 8 \nSuit H: 3 J \nSuit S: \nPrevious card: H2\nPrevious suit: H\nStarting counterclockwise, other players have: 4, 5, 8, 3, 4 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: K \nSuit D: K \nSuit H: 8 Q \nSuit S: T \nPrevious card: H2\nPrevious suit: H\nStarting counterclockwise, other players have: 5, 8, 3, 4, 4 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 3 5 7 \nSuit D: 3 \nSuit H: 67 T \nSuit S: A\nPrevious card: H2\nPrevious suit: H\nStarting counterclockwise, other players have: 8, 3, 4, 4, 5 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: A\nSuit S: 7 9 \nPrevious card: H2\nPrevious suit: H\nStarting counterclockwise, other players have: 3, 4, 4, 5, 8 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: 2 \nSuit D: 5 A\nSuit H: \nSuit S: Q \nPrevious card: H2\nPrevious suit: H\nStarting counterclockwise, other players have: 4, 4, 5, 8, 3 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa6aaaa6aaa9aaaaaa6aaaaaa2000000000000204000000000000040000000000040000000000001000000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaaaaaaaa6aaa9aaa65aaa2000000000000200800000000000800000000000020000000000001000000000000) +ObservationTensor(2): binvec(372, 0xaa5aaa6aa666aaaaa6aaaaaaa92000000000000210000000000000400000000000020000000000000800000000000) +ObservationTensor(3): binvec(372, 0xaaaaaaaaaaa9aaa9aaaaaaaaa62000000000000208000000000000400000000000010000000000000100000000000) +ObservationTensor(4): binvec(372, 0x6aaaaa9aaaaaaaaaaaaaa9aa9a2000000000000208000000000000200000000000002000000000002000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 52] +StringLegalActions() = ["Play C2", "Draw"] + +# Apply action "Draw" +action: 52 + +# State 44 +# Apply action "Deal D6" +action: 17 + +# State 45 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Player 1 starts drawing +# Player 1 draws H8 +# Player 1 plays S8 +# Player 1 nominates suit C +# Player 2 starts drawing +# Player 2 draws C3 +# Player 2 starts drawing +# Player 2 draws HT +# Player 2 starts drawing +# Player 2 draws C8 +# Player 2 plays C8 +# Player 2 nominates suit H +# Player 3 plays H2 +# Player 4 starts drawing +# Player 4 draws D6 +# Last card: H2 +# Last suit: H +# Number of cards left in deck: 21 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: 6 Suit C: K Suit C: 3 5 7 Suit C: Suit C: 2 +# Suit D: 8 Suit D: K Suit D: 3 Suit D: Suit D: 56 A +# Suit H: 3 J Suit H: 8 Q Suit H: 67 T Suit H: A Suit H: +# Suit S: Suit S: T Suit S: A Suit S: 7 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 4 +ObservationString(0) = "Currently I have: \nSuit C: 6 \nSuit D: 8 \nSuit H: 3 J \nSuit S: \nPrevious card: H2\nPrevious suit: H\nStarting counterclockwise, other players have: 4, 5, 8, 3, 5 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: K \nSuit D: K \nSuit H: 8 Q \nSuit S: T \nPrevious card: H2\nPrevious suit: H\nStarting counterclockwise, other players have: 5, 8, 3, 5, 4 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 3 5 7 \nSuit D: 3 \nSuit H: 67 T \nSuit S: A\nPrevious card: H2\nPrevious suit: H\nStarting counterclockwise, other players have: 8, 3, 5, 4, 5 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: A\nSuit S: 7 9 \nPrevious card: H2\nPrevious suit: H\nStarting counterclockwise, other players have: 3, 5, 4, 5, 8 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: 2 \nSuit D: 56 A\nSuit H: \nSuit S: Q \nPrevious card: H2\nPrevious suit: H\nStarting counterclockwise, other players have: 5, 4, 5, 8, 3 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa6aaaa6aaa9aaaaaa6aaaaaa2000000000000204000000000000040000000000040000000000000800000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaaaaaaaa6aaa9aaa65aaa2000000000000200800000000000800000000000010000000000001000000000000) +ObservationTensor(2): binvec(372, 0xaa5aaa6aa666aaaaa6aaaaaaa92000000000000210000000000000200000000000020000000000000800000000000) +ObservationTensor(3): binvec(372, 0xaaaaaaaaaaa9aaa9aaaaaaaaa62000000000000204000000000000400000000000010000000000000100000000000) +ObservationTensor(4): binvec(372, 0x6aaaaa9a9aaaaaaaaaaaa9aa9a2000000000000208000000000000200000000000002000000000002000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 52] +StringLegalActions() = ["Play C2", "Draw"] + +# Apply action "Play C2" +action: 0 + +# State 46 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Player 1 starts drawing +# Player 1 draws H8 +# Player 1 plays S8 +# Player 1 nominates suit C +# Player 2 starts drawing +# Player 2 draws C3 +# Player 2 starts drawing +# Player 2 draws HT +# Player 2 starts drawing +# Player 2 draws C8 +# Player 2 plays C8 +# Player 2 nominates suit H +# Player 3 plays H2 +# Player 4 starts drawing +# Player 4 draws D6 +# Player 4 plays C2 +# Last card: C2 +# Last suit: C +# Number of cards left in deck: 21 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: 6 Suit C: K Suit C: 3 5 7 Suit C: Suit C: +# Suit D: 8 Suit D: K Suit D: 3 Suit D: Suit D: 56 A +# Suit H: 3 J Suit H: 8 Q Suit H: 67 T Suit H: A Suit H: +# Suit S: Suit S: T Suit S: A Suit S: 7 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Currently I have: \nSuit C: 6 \nSuit D: 8 \nSuit H: 3 J \nSuit S: \nPrevious card: C2\nPrevious suit: C\nStarting counterclockwise, other players have: 4, 5, 8, 3, 4 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: K \nSuit D: K \nSuit H: 8 Q \nSuit S: T \nPrevious card: C2\nPrevious suit: C\nStarting counterclockwise, other players have: 5, 8, 3, 4, 4 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 3 5 7 \nSuit D: 3 \nSuit H: 67 T \nSuit S: A\nPrevious card: C2\nPrevious suit: C\nStarting counterclockwise, other players have: 8, 3, 4, 4, 5 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: A\nSuit S: 7 9 \nPrevious card: C2\nPrevious suit: C\nStarting counterclockwise, other players have: 3, 4, 4, 5, 8 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: \nSuit D: 56 A\nSuit H: \nSuit S: Q \nPrevious card: C2\nPrevious suit: C\nStarting counterclockwise, other players have: 4, 4, 5, 8, 3 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa6aaaa6aaa9aaaaaa6aaaaaa8000000000000804000000000000040000000000040000000000001000000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaaaaaaaa6aaa9aaa65aaa8000000000000800800000000000800000000000020000000000001000000000000) +ObservationTensor(2): binvec(372, 0xaa5aaa6aa666aaaaa6aaaaaaa98000000000000810000000000000400000000000020000000000000800000000000) +ObservationTensor(3): binvec(372, 0xaaaaaaaaaaa9aaa9aaaaaaaaa68000000000000808000000000000400000000000010000000000000100000000000) +ObservationTensor(4): binvec(372, 0xaaaaaa9a9aaaaaaaaaaaa9aa9a8000000000000808000000000000200000000000002000000000002000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [16, 25, 52] +StringLegalActions() = ["Play C6", "Play D8", "Draw"] + +# Apply action "Play C6" +action: 16 + +# State 47 +# Apply action "Play H8" +action: 26 + +# State 48 +# Apply action "Nominate suit C" +action: 54 + +# State 49 +# Apply action "Play C7" +action: 20 + +# State 50 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Player 1 starts drawing +# Player 1 draws H8 +# Player 1 plays S8 +# Player 1 nominates suit C +# Player 2 starts drawing +# Player 2 draws C3 +# Player 2 starts drawing +# Player 2 draws HT +# Player 2 starts drawing +# Player 2 draws C8 +# Player 2 plays C8 +# Player 2 nominates suit H +# Player 3 plays H2 +# Player 4 starts drawing +# Player 4 draws D6 +# Player 4 plays C2 +# Player 0 plays C6 +# Player 1 plays H8 +# Player 1 nominates suit C +# Player 2 plays C7 +# Last card: C7 +# Last suit: C +# Number of cards left in deck: 21 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: Suit C: K Suit C: 3 5 Suit C: Suit C: +# Suit D: 8 Suit D: K Suit D: 3 Suit D: Suit D: 56 A +# Suit H: 3 J Suit H: Q Suit H: 67 T Suit H: A Suit H: +# Suit S: Suit S: T Suit S: A Suit S: 7 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0, 16, 26, 54, 20] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0, 16, 26, 54, 20" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +ObservationString(0) = "Currently I have: \nSuit C: \nSuit D: 8 \nSuit H: 3 J \nSuit S: \nPrevious card: C7\nPrevious suit: C\nStarting counterclockwise, other players have: 3, 4, 7, 3, 4 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: K \nSuit D: K \nSuit H: Q \nSuit S: T \nPrevious card: C7\nPrevious suit: C\nStarting counterclockwise, other players have: 4, 7, 3, 4, 3 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 3 5 \nSuit D: 3 \nSuit H: 67 T \nSuit S: A\nPrevious card: C7\nPrevious suit: C\nStarting counterclockwise, other players have: 7, 3, 4, 3, 4 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: A\nSuit S: 7 9 \nPrevious card: C7\nPrevious suit: C\nStarting counterclockwise, other players have: 3, 4, 3, 4, 7 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: \nSuit D: 56 A\nSuit H: \nSuit S: Q \nPrevious card: C7\nPrevious suit: C\nStarting counterclockwise, other players have: 4, 3, 4, 7, 3 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa6aaaaaaaa9aaaaaa6aaaaaa0000080000000808000000000000080000000000040000000000001000000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaaaaaaaaaaaa9aaa65aaa0000080000000801000000000000800000000000020000000000002000000000000) +ObservationTensor(2): binvec(372, 0xaa5aaa6aa6a6aaaaa6aaaaaaa90000080000000810000000000000400000000000040000000000001000000000000) +ObservationTensor(3): binvec(372, 0xaaaaaaaaaaa9aaa9aaaaaaaaa60000080000000808000000000000800000000000020000000000000200000000000) +ObservationTensor(4): binvec(372, 0xaaaaaa9a9aaaaaaaaaaaa9aa9a0000080000000810000000000000400000000000004000000000002000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [23, 52] +StringLegalActions() = ["Play S7", "Draw"] + +# Apply action "Play S7" +action: 23 + +# State 51 +# Apply action "Draw" +action: 52 + +# State 52 +# Apply action "Deal H9" +action: 30 + +# State 53 +# Apply action "Draw" +action: 52 + +# State 54 +# Apply action "Deal D9" +action: 29 + +# State 55 +# Apply action "Draw" +action: 52 + +# State 56 +# Apply action "Deal S4" +action: 11 + +# State 57 +# Apply action "Draw" +action: 52 + +# State 58 +# Apply action "Deal HK" +action: 46 + +# State 59 +# Apply action "Draw" +action: 52 + +# State 60 +# Apply action "Deal CT" +action: 32 + +# State 61 +# Apply action "Play S4" +action: 11 + +# State 62 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Player 1 starts drawing +# Player 1 draws H8 +# Player 1 plays S8 +# Player 1 nominates suit C +# Player 2 starts drawing +# Player 2 draws C3 +# Player 2 starts drawing +# Player 2 draws HT +# Player 2 starts drawing +# Player 2 draws C8 +# Player 2 plays C8 +# Player 2 nominates suit H +# Player 3 plays H2 +# Player 4 starts drawing +# Player 4 draws D6 +# Player 4 plays C2 +# Player 0 plays C6 +# Player 1 plays H8 +# Player 1 nominates suit C +# Player 2 plays C7 +# Player 3 plays S7 +# Player 4 starts drawing +# Player 4 draws H9 +# Player 4 starts drawing +# Player 4 draws D9 +# Player 4 starts drawing +# Player 4 draws S4 +# Player 4 starts drawing +# Player 4 draws HK +# Player 4 starts drawing +# Player 4 draws CT +# Player 4 plays S4 +# Last card: S4 +# Last suit: S +# Number of cards left in deck: 16 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: Suit C: K Suit C: 3 5 Suit C: Suit C: T +# Suit D: 8 Suit D: K Suit D: 3 Suit D: Suit D: 56 9 A +# Suit H: 3 J Suit H: Q Suit H: 67 T Suit H: A Suit H: 9 K +# Suit S: Suit S: T Suit S: A Suit S: 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0, 16, 26, 54, 20, 23, 52, 30, 52, 29, 52, 11, 52, 46, 52, 32, 11] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0, 16, 26, 54, 20, 23, 52, 30, 52, 29, 52, 11, 52, 46, 52, 32, 11" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Currently I have: \nSuit C: \nSuit D: 8 \nSuit H: 3 J \nSuit S: \nPrevious card: S4\nPrevious suit: S\nStarting counterclockwise, other players have: 3, 4, 7, 2, 8 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: K \nSuit D: K \nSuit H: Q \nSuit S: T \nPrevious card: S4\nPrevious suit: S\nStarting counterclockwise, other players have: 4, 7, 2, 8, 3 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 3 5 \nSuit D: 3 \nSuit H: 67 T \nSuit S: A\nPrevious card: S4\nPrevious suit: S\nStarting counterclockwise, other players have: 7, 2, 8, 3, 4 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: A\nSuit S: 9 \nPrevious card: S4\nPrevious suit: S\nStarting counterclockwise, other players have: 2, 8, 3, 4, 7 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: T \nSuit D: 56 9 A\nSuit H: 9 K \nSuit S: Q \nPrevious card: S4\nPrevious suit: S\nStarting counterclockwise, other players have: 8, 3, 4, 7, 2 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa6aaaaaaaa9aaaaaa6aaaaaa0010000000000108000000000000080000000000080000000000000100000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaaaaaaaaaaaa9aaa65aaa0010000000000101000000000001000000000000002000000000002000000000000) +ObservationTensor(2): binvec(372, 0xaa5aaa6aa6a6aaaaa6aaaaaaa90010000000000120000000000000040000000000040000000000001000000000000) +ObservationTensor(3): binvec(372, 0xaaaaaaaaaaaaaaa9aaaaaaaaa60010000000000100800000000000800000000000020000000000000200000000000) +ObservationTensor(4): binvec(372, 0xaaaaaa9a9aaaaa966aaaa9a69a0010000000000110000000000000400000000000004000000000004000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [25, 52] +StringLegalActions() = ["Play D8", "Draw"] + +# Apply action "Play D8" +action: 25 + +# State 63 +# Apply action "Nominate suit C" +action: 54 + +# State 64 +# Apply action "Draw" +action: 52 + +# State 65 +# Apply action "Deal D7" +action: 21 + +# State 66 +# Apply action "Draw" +action: 52 + +# State 67 +# Apply action "Deal S5" +action: 15 + +# State 68 +# Apply action "Play CK" +action: 44 + +# State 69 +# Apply action "Play C5" +action: 12 + +# State 70 +# Apply action "Draw" +action: 52 + +# State 71 +# Apply action "Deal S6" +action: 19 + +# State 72 +# Apply action "Draw" +action: 52 + +# State 73 +# Apply action "Deal C9" +action: 28 + +# State 74 +# Apply action "Play C9" +action: 28 + +# State 75 +# Apply action "Draw" +action: 52 + +# State 76 +# Apply action "Deal H5" +action: 14 + +# State 77 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Player 1 starts drawing +# Player 1 draws H8 +# Player 1 plays S8 +# Player 1 nominates suit C +# Player 2 starts drawing +# Player 2 draws C3 +# Player 2 starts drawing +# Player 2 draws HT +# Player 2 starts drawing +# Player 2 draws C8 +# Player 2 plays C8 +# Player 2 nominates suit H +# Player 3 plays H2 +# Player 4 starts drawing +# Player 4 draws D6 +# Player 4 plays C2 +# Player 0 plays C6 +# Player 1 plays H8 +# Player 1 nominates suit C +# Player 2 plays C7 +# Player 3 plays S7 +# Player 4 starts drawing +# Player 4 draws H9 +# Player 4 starts drawing +# Player 4 draws D9 +# Player 4 starts drawing +# Player 4 draws S4 +# Player 4 starts drawing +# Player 4 draws HK +# Player 4 starts drawing +# Player 4 draws CT +# Player 4 plays S4 +# Player 0 plays D8 +# Player 0 nominates suit C +# Player 1 starts drawing +# Player 1 draws D7 +# Player 1 starts drawing +# Player 1 draws S5 +# Player 1 plays CK +# Player 2 plays C5 +# Player 3 starts drawing +# Player 3 draws S6 +# Player 3 starts drawing +# Player 3 draws C9 +# Player 3 plays C9 +# Player 4 starts drawing +# Player 4 draws H5 +# Last card: C9 +# Last suit: C +# Number of cards left in deck: 11 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: Suit C: Suit C: 3 Suit C: Suit C: T +# Suit D: Suit D: 7 K Suit D: 3 Suit D: Suit D: 56 9 A +# Suit H: 3 J Suit H: Q Suit H: 67 T Suit H: A Suit H: 5 9 K +# Suit S: Suit S: 5 T Suit S: A Suit S: 6 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0, 16, 26, 54, 20, 23, 52, 30, 52, 29, 52, 11, 52, 46, 52, 32, 11, 25, 54, 52, 21, 52, 15, 44, 12, 52, 19, 52, 28, 28, 52, 14] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0, 16, 26, 54, 20, 23, 52, 30, 52, 29, 52, 11, 52, 46, 52, 32, 11, 25, 54, 52, 21, 52, 15, 44, 12, 52, 19, 52, 28, 28, 52, 14" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 4 +ObservationString(0) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: 3 J \nSuit S: \nPrevious card: C9\nPrevious suit: C\nStarting counterclockwise, other players have: 2, 5, 6, 3, 9 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: \nSuit D: 7 K \nSuit H: Q \nSuit S: 5 T \nPrevious card: C9\nPrevious suit: C\nStarting counterclockwise, other players have: 5, 6, 3, 9, 2 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 3 \nSuit D: 3 \nSuit H: 67 T \nSuit S: A\nPrevious card: C9\nPrevious suit: C\nStarting counterclockwise, other players have: 6, 3, 9, 2, 5 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: A\nSuit S: 6 9 \nPrevious card: C9\nPrevious suit: C\nStarting counterclockwise, other players have: 3, 9, 2, 5, 6 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: T \nSuit D: 56 9 A\nSuit H: 5 9 K \nSuit S: Q \nPrevious card: C9\nPrevious suit: C\nStarting counterclockwise, other players have: 9, 2, 5, 6, 3 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa6aaaaaaaaaaaaaaa6aaaaaa0000000800000804000000000000100000000000040000000000000080000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaa9aa9aaaaaa9aaa69aaa0000000800000802000000000000800000000000001000000000004000000000000) +ObservationTensor(2): binvec(372, 0xaa5aaaaaa6a6aaaaa6aaaaaaa90000000800000810000000000000020000000000080000000000000800000000000) +ObservationTensor(3): binvec(372, 0xaaaaaaaaa9aaaaa9aaaaaaaaa60000000800000800400000000001000000000000010000000000000400000000000) +ObservationTensor(4): binvec(372, 0xaaaaaa969aaaaa966aaaa9a69a0000000800000820000000000000200000000000008000000000002000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [29, 30, 32, 52] +StringLegalActions() = ["Play D9", "Play H9", "Play CT", "Draw"] + +# Apply action "Play CT" +action: 32 + +# State 78 +# Apply action "Draw" +action: 52 + +# State 79 +# Apply action "Deal DJ" +action: 37 + +# State 80 +# Apply action "Draw" +action: 52 + +# State 81 +# Apply action "Deal S3" +action: 7 + +# State 82 +# Apply action "Draw" +action: 52 + +# State 83 +# Apply action "Deal SK" +action: 47 + +# State 84 +# Apply action "Draw" +action: 52 + +# State 85 +# Apply action "Deal DQ" +action: 41 + +# State 86 +# Apply action "Draw" +action: 52 + +# State 87 +# Apply action "Deal CJ" +action: 36 + +# State 88 +# Apply action "Play CJ" +action: 36 + +# State 89 +# Apply action "Draw" +action: 52 + +# State 90 +# Apply action "Deal SJ" +action: 39 + +# State 91 +# Apply action "Play SJ" +action: 39 + +# State 92 +# Apply action "Play SA" +action: 51 + +# State 93 +# Apply action "Play S6" +action: 19 + +# State 94 +# Apply action "Draw" +action: 52 + +# State 95 +# Apply action "Deal S2" +action: 3 + +# State 96 +# Apply action "Play S2" +action: 3 + +# State 97 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Player 1 starts drawing +# Player 1 draws H8 +# Player 1 plays S8 +# Player 1 nominates suit C +# Player 2 starts drawing +# Player 2 draws C3 +# Player 2 starts drawing +# Player 2 draws HT +# Player 2 starts drawing +# Player 2 draws C8 +# Player 2 plays C8 +# Player 2 nominates suit H +# Player 3 plays H2 +# Player 4 starts drawing +# Player 4 draws D6 +# Player 4 plays C2 +# Player 0 plays C6 +# Player 1 plays H8 +# Player 1 nominates suit C +# Player 2 plays C7 +# Player 3 plays S7 +# Player 4 starts drawing +# Player 4 draws H9 +# Player 4 starts drawing +# Player 4 draws D9 +# Player 4 starts drawing +# Player 4 draws S4 +# Player 4 starts drawing +# Player 4 draws HK +# Player 4 starts drawing +# Player 4 draws CT +# Player 4 plays S4 +# Player 0 plays D8 +# Player 0 nominates suit C +# Player 1 starts drawing +# Player 1 draws D7 +# Player 1 starts drawing +# Player 1 draws S5 +# Player 1 plays CK +# Player 2 plays C5 +# Player 3 starts drawing +# Player 3 draws S6 +# Player 3 starts drawing +# Player 3 draws C9 +# Player 3 plays C9 +# Player 4 starts drawing +# Player 4 draws H5 +# Player 4 plays CT +# Player 0 starts drawing +# Player 0 draws DJ +# Player 0 starts drawing +# Player 0 draws S3 +# Player 0 starts drawing +# Player 0 draws SK +# Player 0 starts drawing +# Player 0 draws DQ +# Player 0 starts drawing +# Player 0 draws CJ +# Player 0 plays CJ +# Player 1 starts drawing +# Player 1 draws SJ +# Player 1 plays SJ +# Player 2 plays SA +# Player 3 plays S6 +# Player 4 starts drawing +# Player 4 draws S2 +# Player 4 plays S2 +# Last card: S2 +# Last suit: S +# Number of cards left in deck: 4 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: Suit C: Suit C: 3 Suit C: Suit C: +# Suit D: JQ Suit D: 7 K Suit D: 3 Suit D: Suit D: 56 9 A +# Suit H: 3 J Suit H: Q Suit H: 67 T Suit H: A Suit H: 5 9 K +# Suit S: 3 K Suit S: 5 T Suit S: Suit S: 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0, 16, 26, 54, 20, 23, 52, 30, 52, 29, 52, 11, 52, 46, 52, 32, 11, 25, 54, 52, 21, 52, 15, 44, 12, 52, 19, 52, 28, 28, 52, 14, 32, 52, 37, 52, 7, 52, 47, 52, 41, 52, 36, 36, 52, 39, 39, 51, 19, 52, 3, 3] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0, 16, 26, 54, 20, 23, 52, 30, 52, 29, 52, 11, 52, 46, 52, 32, 11, 25, 54, 52, 21, 52, 15, 44, 12, 52, 19, 52, 28, 28, 52, 14, 32, 52, 37, 52, 7, 52, 47, 52, 41, 52, 36, 36, 52, 39, 39, 51, 19, 52, 3, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Currently I have: \nSuit C: \nSuit D: JQ \nSuit H: 3 J \nSuit S: 3 K \nPrevious card: S2\nPrevious suit: S\nStarting counterclockwise, other players have: 6, 5, 5, 2, 8 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: \nSuit D: 7 K \nSuit H: Q \nSuit S: 5 T \nPrevious card: S2\nPrevious suit: S\nStarting counterclockwise, other players have: 5, 5, 2, 8, 6 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 3 \nSuit D: 3 \nSuit H: 67 T \nSuit S: \nPrevious card: S2\nPrevious suit: S\nStarting counterclockwise, other players have: 5, 2, 8, 6, 5 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: A\nSuit S: 9 \nPrevious card: S2\nPrevious suit: S\nStarting counterclockwise, other players have: 2, 8, 6, 5, 5 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: \nSuit D: 56 9 A\nSuit H: 5 9 K \nSuit S: Q \nPrevious card: S2\nPrevious suit: S\nStarting counterclockwise, other players have: 8, 6, 5, 5, 2 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa5aaaaaaaaaaaaaa969aa9aa1000000000000104000000000000200000000000080000000000000100000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaa9aa9aaaaaa9aaa69aaa1000000000000104000000000001000000000000002000000000000400000000000) +ObservationTensor(2): binvec(372, 0xaa5aaaaaa6a6aaaaa6aaaaaaaa1000000000000120000000000000040000000000008000000000000800000000000) +ObservationTensor(3): binvec(372, 0xaaaaaaaaaaaaaaa9aaaaaaaaa61000000000000100800000000000100000000000010000000000000800000000000) +ObservationTensor(4): binvec(372, 0xaaaaaa969aaaaa96aaaaa9a69a1000000000000102000000000000200000000000010000000000004000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [7, 47, 52] +StringLegalActions() = ["Play S3", "Play SK", "Draw"] + +# Apply action "Play SK" +action: 47 + +# State 98 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Player 1 starts drawing +# Player 1 draws H8 +# Player 1 plays S8 +# Player 1 nominates suit C +# Player 2 starts drawing +# Player 2 draws C3 +# Player 2 starts drawing +# Player 2 draws HT +# Player 2 starts drawing +# Player 2 draws C8 +# Player 2 plays C8 +# Player 2 nominates suit H +# Player 3 plays H2 +# Player 4 starts drawing +# Player 4 draws D6 +# Player 4 plays C2 +# Player 0 plays C6 +# Player 1 plays H8 +# Player 1 nominates suit C +# Player 2 plays C7 +# Player 3 plays S7 +# Player 4 starts drawing +# Player 4 draws H9 +# Player 4 starts drawing +# Player 4 draws D9 +# Player 4 starts drawing +# Player 4 draws S4 +# Player 4 starts drawing +# Player 4 draws HK +# Player 4 starts drawing +# Player 4 draws CT +# Player 4 plays S4 +# Player 0 plays D8 +# Player 0 nominates suit C +# Player 1 starts drawing +# Player 1 draws D7 +# Player 1 starts drawing +# Player 1 draws S5 +# Player 1 plays CK +# Player 2 plays C5 +# Player 3 starts drawing +# Player 3 draws S6 +# Player 3 starts drawing +# Player 3 draws C9 +# Player 3 plays C9 +# Player 4 starts drawing +# Player 4 draws H5 +# Player 4 plays CT +# Player 0 starts drawing +# Player 0 draws DJ +# Player 0 starts drawing +# Player 0 draws S3 +# Player 0 starts drawing +# Player 0 draws SK +# Player 0 starts drawing +# Player 0 draws DQ +# Player 0 starts drawing +# Player 0 draws CJ +# Player 0 plays CJ +# Player 1 starts drawing +# Player 1 draws SJ +# Player 1 plays SJ +# Player 2 plays SA +# Player 3 plays S6 +# Player 4 starts drawing +# Player 4 draws S2 +# Player 4 plays S2 +# Player 0 plays SK +# Last card: SK +# Last suit: S +# Number of cards left in deck: 4 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: Suit C: Suit C: 3 Suit C: Suit C: +# Suit D: JQ Suit D: 7 K Suit D: 3 Suit D: Suit D: 56 9 A +# Suit H: 3 J Suit H: Q Suit H: 67 T Suit H: A Suit H: 5 9 K +# Suit S: 3 Suit S: 5 T Suit S: Suit S: 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0, 16, 26, 54, 20, 23, 52, 30, 52, 29, 52, 11, 52, 46, 52, 32, 11, 25, 54, 52, 21, 52, 15, 44, 12, 52, 19, 52, 28, 28, 52, 14, 32, 52, 37, 52, 7, 52, 47, 52, 41, 52, 36, 36, 52, 39, 39, 51, 19, 52, 3, 3, 47] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0, 16, 26, 54, 20, 23, 52, 30, 52, 29, 52, 11, 52, 46, 52, 32, 11, 25, 54, 52, 21, 52, 15, 44, 12, 52, 19, 52, 28, 28, 52, 14, 32, 52, 37, 52, 7, 52, 47, 52, 41, 52, 36, 36, 52, 39, 39, 51, 19, 52, 3, 3, 47" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Currently I have: \nSuit C: \nSuit D: JQ \nSuit H: 3 J \nSuit S: 3 \nPrevious card: SK\nPrevious suit: S\nStarting counterclockwise, other players have: 5, 5, 5, 2, 8 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: \nSuit D: 7 K \nSuit H: Q \nSuit S: 5 T \nPrevious card: SK\nPrevious suit: S\nStarting counterclockwise, other players have: 5, 5, 2, 8, 5 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 3 \nSuit D: 3 \nSuit H: 67 T \nSuit S: \nPrevious card: SK\nPrevious suit: S\nStarting counterclockwise, other players have: 5, 2, 8, 5, 5 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: \nSuit H: A\nSuit S: 9 \nPrevious card: SK\nPrevious suit: S\nStarting counterclockwise, other players have: 2, 8, 5, 5, 5 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: \nSuit D: 56 9 A\nSuit H: 5 9 K \nSuit S: Q \nPrevious card: SK\nPrevious suit: S\nStarting counterclockwise, other players have: 8, 5, 5, 5, 2 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa5aaaaaaaaaaaaaa969aaaaa0000000000010104000000000000200000000000080000000000000100000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaa9aa9aaaaaa9aaa69aaa0000000000010104000000000001000000000000002000000000000800000000000) +ObservationTensor(2): binvec(372, 0xaa5aaaaaa6a6aaaaa6aaaaaaaa0000000000010120000000000000040000000000010000000000000800000000000) +ObservationTensor(3): binvec(372, 0xaaaaaaaaaaaaaaa9aaaaaaaaa60000000000010100800000000000200000000000010000000000000800000000000) +ObservationTensor(4): binvec(372, 0xaaaaaa969aaaaa96aaaaa9a69a0000000000010104000000000000200000000000010000000000004000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [15, 35, 45, 52] +StringLegalActions() = ["Play S5", "Play ST", "Play DK", "Draw"] + +# Apply action "Play ST" +action: 35 + +# State 99 +# Apply action "Play HT" +action: 34 + +# State 100 +# Apply action "Draw" +action: 52 + +# State 101 +# Apply action "Deal D2" +action: 1 + +# State 102 +# Apply action "Play HA" +action: 50 + +# State 103 +# Apply action "Draw" +action: 52 + +# State 104 +# Apply action "Deal D4" +action: 9 + +# State 105 +# Apply action "Play HK" +action: 46 + +# State 106 +# Apply action "Play H3" +action: 6 + +# State 107 +# Apply action "Draw" +action: 52 + +# State 108 +# Apply action "Deal CQ" +action: 40 + +# State 109 +# Apply action "Draw" +action: 52 + +# State 110 +# Apply action "Deal DT" +action: 33 + +# State 111 +# Apply action "Pass" +action: 53 + +# State 112 +# Apply action "Play H7" +action: 22 + +# State 113 +# Apply action "Pass" +action: 53 + +# State 114 +# Apply action "Play H5" +action: 14 + +# State 115 +# Apply action "Play HJ" +action: 38 + +# State 116 +# Apply action "Pass" +action: 53 + +# State 117 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Player 1 starts drawing +# Player 1 draws H8 +# Player 1 plays S8 +# Player 1 nominates suit C +# Player 2 starts drawing +# Player 2 draws C3 +# Player 2 starts drawing +# Player 2 draws HT +# Player 2 starts drawing +# Player 2 draws C8 +# Player 2 plays C8 +# Player 2 nominates suit H +# Player 3 plays H2 +# Player 4 starts drawing +# Player 4 draws D6 +# Player 4 plays C2 +# Player 0 plays C6 +# Player 1 plays H8 +# Player 1 nominates suit C +# Player 2 plays C7 +# Player 3 plays S7 +# Player 4 starts drawing +# Player 4 draws H9 +# Player 4 starts drawing +# Player 4 draws D9 +# Player 4 starts drawing +# Player 4 draws S4 +# Player 4 starts drawing +# Player 4 draws HK +# Player 4 starts drawing +# Player 4 draws CT +# Player 4 plays S4 +# Player 0 plays D8 +# Player 0 nominates suit C +# Player 1 starts drawing +# Player 1 draws D7 +# Player 1 starts drawing +# Player 1 draws S5 +# Player 1 plays CK +# Player 2 plays C5 +# Player 3 starts drawing +# Player 3 draws S6 +# Player 3 starts drawing +# Player 3 draws C9 +# Player 3 plays C9 +# Player 4 starts drawing +# Player 4 draws H5 +# Player 4 plays CT +# Player 0 starts drawing +# Player 0 draws DJ +# Player 0 starts drawing +# Player 0 draws S3 +# Player 0 starts drawing +# Player 0 draws SK +# Player 0 starts drawing +# Player 0 draws DQ +# Player 0 starts drawing +# Player 0 draws CJ +# Player 0 plays CJ +# Player 1 starts drawing +# Player 1 draws SJ +# Player 1 plays SJ +# Player 2 plays SA +# Player 3 plays S6 +# Player 4 starts drawing +# Player 4 draws S2 +# Player 4 plays S2 +# Player 0 plays SK +# Player 1 plays ST +# Player 2 plays HT +# Player 3 starts drawing +# Player 3 draws D2 +# Player 3 plays HA +# Player 4 starts drawing +# Player 4 draws D4 +# Player 4 plays HK +# Player 0 plays H3 +# Player 1 starts drawing +# Player 1 draws CQ +# Player 1 starts drawing +# Player 1 draws DT +# Player 1 passes +# Player 2 plays H7 +# Player 3 passes +# Player 4 plays H5 +# Player 0 plays HJ +# Player 1 passes +# Last card: HJ +# Last suit: H +# Number of cards left in deck: 0 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: Suit C: Q Suit C: 3 Suit C: Suit C: +# Suit D: JQ Suit D: 7 T K Suit D: 3 Suit D: 2 Suit D: 456 9 A +# Suit H: Suit H: Q Suit H: 6 Suit H: Suit H: 9 +# Suit S: 3 Suit S: 5 Suit S: Suit S: 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0, 16, 26, 54, 20, 23, 52, 30, 52, 29, 52, 11, 52, 46, 52, 32, 11, 25, 54, 52, 21, 52, 15, 44, 12, 52, 19, 52, 28, 28, 52, 14, 32, 52, 37, 52, 7, 52, 47, 52, 41, 52, 36, 36, 52, 39, 39, 51, 19, 52, 3, 3, 47, 35, 34, 52, 1, 50, 52, 9, 46, 6, 52, 40, 52, 33, 53, 22, 53, 14, 38, 53] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0, 16, 26, 54, 20, 23, 52, 30, 52, 29, 52, 11, 52, 46, 52, 32, 11, 25, 54, 52, 21, 52, 15, 44, 12, 52, 19, 52, 28, 28, 52, 14, 32, 52, 37, 52, 7, 52, 47, 52, 41, 52, 36, 36, 52, 39, 39, 51, 19, 52, 3, 3, 47, 35, 34, 52, 1, 50, 52, 9, 46, 6, 52, 40, 52, 33, 53, 22, 53, 14, 38, 53" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "Currently I have: \nSuit C: \nSuit D: JQ \nSuit H: \nSuit S: 3 \nPrevious card: HJ\nPrevious suit: H\nStarting counterclockwise, other players have: 3, 6, 3, 2, 7 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: Q \nSuit D: 7 T K \nSuit H: Q \nSuit S: 5 \nPrevious card: HJ\nPrevious suit: H\nStarting counterclockwise, other players have: 6, 3, 2, 7, 3 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 3 \nSuit D: 3 \nSuit H: 6 \nSuit S: \nPrevious card: HJ\nPrevious suit: H\nStarting counterclockwise, other players have: 3, 2, 7, 3, 6 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: 2 \nSuit H: \nSuit S: 9 \nPrevious card: HJ\nPrevious suit: H\nStarting counterclockwise, other players have: 2, 7, 3, 6, 3 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: \nSuit D: 456 9 A\nSuit H: 9 \nSuit S: Q \nPrevious card: HJ\nPrevious suit: H\nStarting counterclockwise, other players have: 7, 3, 6, 3, 2 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa9aaaaaaaaaaaaaa9a9aaaaa0000000002000202000000000000800000000000080000000000000200000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaa9aa9aaaaa9aaa669aaa0000000002000210000000000001000000000000004000000000002000000000000) +ObservationTensor(2): binvec(372, 0xaa5aaaaaa6aaaaaaaaaaaaaaaa0000000002000220000000000000080000000000040000000000000400000000000) +ObservationTensor(3): binvec(372, 0x9aaaaaaaaaaaaaa9aaaaaaaaaa0000000002000201000000000000800000000000008000000000002000000000000) +ObservationTensor(4): binvec(372, 0xaaaa9a9a9aaaaa96aaaaa9aa9a0000000002000210000000000000100000000000040000000000004000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [18, 53] +StringLegalActions() = ["Play H6", "Pass"] + +# Apply action "Play H6" +action: 18 + +# State 118 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Player 1 starts drawing +# Player 1 draws H8 +# Player 1 plays S8 +# Player 1 nominates suit C +# Player 2 starts drawing +# Player 2 draws C3 +# Player 2 starts drawing +# Player 2 draws HT +# Player 2 starts drawing +# Player 2 draws C8 +# Player 2 plays C8 +# Player 2 nominates suit H +# Player 3 plays H2 +# Player 4 starts drawing +# Player 4 draws D6 +# Player 4 plays C2 +# Player 0 plays C6 +# Player 1 plays H8 +# Player 1 nominates suit C +# Player 2 plays C7 +# Player 3 plays S7 +# Player 4 starts drawing +# Player 4 draws H9 +# Player 4 starts drawing +# Player 4 draws D9 +# Player 4 starts drawing +# Player 4 draws S4 +# Player 4 starts drawing +# Player 4 draws HK +# Player 4 starts drawing +# Player 4 draws CT +# Player 4 plays S4 +# Player 0 plays D8 +# Player 0 nominates suit C +# Player 1 starts drawing +# Player 1 draws D7 +# Player 1 starts drawing +# Player 1 draws S5 +# Player 1 plays CK +# Player 2 plays C5 +# Player 3 starts drawing +# Player 3 draws S6 +# Player 3 starts drawing +# Player 3 draws C9 +# Player 3 plays C9 +# Player 4 starts drawing +# Player 4 draws H5 +# Player 4 plays CT +# Player 0 starts drawing +# Player 0 draws DJ +# Player 0 starts drawing +# Player 0 draws S3 +# Player 0 starts drawing +# Player 0 draws SK +# Player 0 starts drawing +# Player 0 draws DQ +# Player 0 starts drawing +# Player 0 draws CJ +# Player 0 plays CJ +# Player 1 starts drawing +# Player 1 draws SJ +# Player 1 plays SJ +# Player 2 plays SA +# Player 3 plays S6 +# Player 4 starts drawing +# Player 4 draws S2 +# Player 4 plays S2 +# Player 0 plays SK +# Player 1 plays ST +# Player 2 plays HT +# Player 3 starts drawing +# Player 3 draws D2 +# Player 3 plays HA +# Player 4 starts drawing +# Player 4 draws D4 +# Player 4 plays HK +# Player 0 plays H3 +# Player 1 starts drawing +# Player 1 draws CQ +# Player 1 starts drawing +# Player 1 draws DT +# Player 1 passes +# Player 2 plays H7 +# Player 3 passes +# Player 4 plays H5 +# Player 0 plays HJ +# Player 1 passes +# Player 2 plays H6 +# Last card: H6 +# Last suit: H +# Number of cards left in deck: 0 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: Suit C: Q Suit C: 3 Suit C: Suit C: +# Suit D: JQ Suit D: 7 T K Suit D: 3 Suit D: 2 Suit D: 456 9 A +# Suit H: Suit H: Q Suit H: Suit H: Suit H: 9 +# Suit S: 3 Suit S: 5 Suit S: Suit S: 9 Suit S: Q +IsTerminal() = False +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0, 16, 26, 54, 20, 23, 52, 30, 52, 29, 52, 11, 52, 46, 52, 32, 11, 25, 54, 52, 21, 52, 15, 44, 12, 52, 19, 52, 28, 28, 52, 14, 32, 52, 37, 52, 7, 52, 47, 52, 41, 52, 36, 36, 52, 39, 39, 51, 19, 52, 3, 3, 47, 35, 34, 52, 1, 50, 52, 9, 46, 6, 52, 40, 52, 33, 53, 22, 53, 14, 38, 53, 18] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0, 16, 26, 54, 20, 23, 52, 30, 52, 29, 52, 11, 52, 46, 52, 32, 11, 25, 54, 52, 21, 52, 15, 44, 12, 52, 19, 52, 28, 28, 52, 14, 32, 52, 37, 52, 7, 52, 47, 52, 41, 52, 36, 36, 52, 39, 39, 51, 19, 52, 3, 3, 47, 35, 34, 52, 1, 50, 52, 9, 46, 6, 52, 40, 52, 33, 53, 22, 53, 14, 38, 53, 18" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +ObservationString(0) = "Currently I have: \nSuit C: \nSuit D: JQ \nSuit H: \nSuit S: 3 \nPrevious card: H6\nPrevious suit: H\nStarting counterclockwise, other players have: 3, 6, 2, 2, 7 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: Q \nSuit D: 7 T K \nSuit H: Q \nSuit S: 5 \nPrevious card: H6\nPrevious suit: H\nStarting counterclockwise, other players have: 6, 2, 2, 7, 3 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 3 \nSuit D: 3 \nSuit H: \nSuit S: \nPrevious card: H6\nPrevious suit: H\nStarting counterclockwise, other players have: 2, 2, 7, 3, 6 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: 2 \nSuit H: \nSuit S: 9 \nPrevious card: H6\nPrevious suit: H\nStarting counterclockwise, other players have: 2, 7, 3, 6, 2 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: \nSuit D: 456 9 A\nSuit H: 9 \nSuit S: Q \nPrevious card: H6\nPrevious suit: H\nStarting counterclockwise, other players have: 7, 3, 6, 2, 2 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa9aaaaaaaaaaaaaa9a9aaaaa0000200000000202000000000001000000000000080000000000000200000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaa9aa9aaaaa9aaa669aaa0000200000000220000000000001000000000000004000000000002000000000000) +ObservationTensor(2): binvec(372, 0xaa5aaaaaaaaaaaaaaaaaaaaaaa0000200000000220000000000000080000000000040000000000000400000000000) +ObservationTensor(3): binvec(372, 0x9aaaaaaaaaaaaaa9aaaaaaaaaa0000200000000201000000000000800000000000008000000000004000000000000) +ObservationTensor(4): binvec(372, 0xaaaa9a9a9aaaaa96aaaaa9aa9a0000200000000210000000000000100000000000080000000000004000000000000) +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [53] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 53 + +# State 119 +# Apply action "Pass" +action: 53 + +# State 120 +# Apply action "Pass" +action: 53 + +# State 121 +# Apply action "Pass" +action: 53 + +# State 122 +# Apply action "Pass" +action: 53 + +# State 123 +# Apply action "Pass" +action: 53 + +# State 124 +# Player 2 becomes the dealer +# Player 3 is dealt HA +# Player 4 is dealt SQ +# Player 0 is dealt H3 +# Player 1 is dealt HQ +# Player 2 is dealt C5 +# Player 3 is dealt H2 +# Player 4 is dealt D5 +# Player 0 is dealt H4 +# Player 1 is dealt S8 +# Player 2 is dealt H7 +# Player 3 is dealt S9 +# Player 4 is dealt C4 +# Player 0 is dealt D8 +# Player 1 is dealt CK +# Player 2 is dealt D3 +# Player 3 is dealt CA +# Player 4 is dealt C2 +# Player 0 is dealt C6 +# Player 1 is dealt DK +# Player 2 is dealt C7 +# Player 3 is dealt S7 +# Player 4 is dealt DA +# Player 0 is dealt HJ +# Player 1 is dealt ST +# Player 2 is dealt H6 +# Player 2 draws SA +# Player 3 plays CA +# Player 4 plays C4 +# Player 0 plays H4 +# Player 1 starts drawing +# Player 1 draws H8 +# Player 1 plays S8 +# Player 1 nominates suit C +# Player 2 starts drawing +# Player 2 draws C3 +# Player 2 starts drawing +# Player 2 draws HT +# Player 2 starts drawing +# Player 2 draws C8 +# Player 2 plays C8 +# Player 2 nominates suit H +# Player 3 plays H2 +# Player 4 starts drawing +# Player 4 draws D6 +# Player 4 plays C2 +# Player 0 plays C6 +# Player 1 plays H8 +# Player 1 nominates suit C +# Player 2 plays C7 +# Player 3 plays S7 +# Player 4 starts drawing +# Player 4 draws H9 +# Player 4 starts drawing +# Player 4 draws D9 +# Player 4 starts drawing +# Player 4 draws S4 +# Player 4 starts drawing +# Player 4 draws HK +# Player 4 starts drawing +# Player 4 draws CT +# Player 4 plays S4 +# Player 0 plays D8 +# Player 0 nominates suit C +# Player 1 starts drawing +# Player 1 draws D7 +# Player 1 starts drawing +# Player 1 draws S5 +# Player 1 plays CK +# Player 2 plays C5 +# Player 3 starts drawing +# Player 3 draws S6 +# Player 3 starts drawing +# Player 3 draws C9 +# Player 3 plays C9 +# Player 4 starts drawing +# Player 4 draws H5 +# Player 4 plays CT +# Player 0 starts drawing +# Player 0 draws DJ +# Player 0 starts drawing +# Player 0 draws S3 +# Player 0 starts drawing +# Player 0 draws SK +# Player 0 starts drawing +# Player 0 draws DQ +# Player 0 starts drawing +# Player 0 draws CJ +# Player 0 plays CJ +# Player 1 starts drawing +# Player 1 draws SJ +# Player 1 plays SJ +# Player 2 plays SA +# Player 3 plays S6 +# Player 4 starts drawing +# Player 4 draws S2 +# Player 4 plays S2 +# Player 0 plays SK +# Player 1 plays ST +# Player 2 plays HT +# Player 3 starts drawing +# Player 3 draws D2 +# Player 3 plays HA +# Player 4 starts drawing +# Player 4 draws D4 +# Player 4 plays HK +# Player 0 plays H3 +# Player 1 starts drawing +# Player 1 draws CQ +# Player 1 starts drawing +# Player 1 draws DT +# Player 1 passes +# Player 2 plays H7 +# Player 3 passes +# Player 4 plays H5 +# Player 0 plays HJ +# Player 1 passes +# Player 2 plays H6 +# Player 3 passes +# Player 4 passes +# Player 0 passes +# Player 1 passes +# Player 2 passes +# Player 3 passes +# Last card: H6 +# Last suit: H +# Number of cards left in deck: 0 +# Player 0: Player 1: Player 2: Player 3: Player 4: +# Suit C: Suit C: Q Suit C: 3 Suit C: Suit C: +# Suit D: JQ Suit D: 7 T K Suit D: 3 Suit D: 2 Suit D: 456 9 A +# Suit H: Suit H: Q Suit H: Suit H: Suit H: 9 +# Suit S: 3 Suit S: 5 Suit S: Suit S: 9 Suit S: Q +IsTerminal() = True +History() = [54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0, 16, 26, 54, 20, 23, 52, 30, 52, 29, 52, 11, 52, 46, 52, 32, 11, 25, 54, 52, 21, 52, 15, 44, 12, 52, 19, 52, 28, 28, 52, 14, 32, 52, 37, 52, 7, 52, 47, 52, 41, 52, 36, 36, 52, 39, 39, 51, 19, 52, 3, 3, 47, 35, 34, 52, 1, 50, 52, 9, 46, 6, 52, 40, 52, 33, 53, 22, 53, 14, 38, 53, 18, 53, 53, 53, 53, 53, 53] +HistoryString() = "54, 50, 43, 6, 42, 12, 2, 13, 10, 27, 22, 31, 8, 25, 44, 5, 48, 0, 16, 45, 20, 23, 49, 38, 35, 18, 51, 48, 8, 10, 52, 26, 27, 54, 52, 4, 52, 34, 52, 24, 24, 56, 2, 52, 17, 0, 16, 26, 54, 20, 23, 52, 30, 52, 29, 52, 11, 52, 46, 52, 32, 11, 25, 54, 52, 21, 52, 15, 44, 12, 52, 19, 52, 28, 28, 52, 14, 32, 52, 37, 52, 7, 52, 47, 52, 41, 52, 36, 36, 52, 39, 39, 51, 19, 52, 3, 3, 47, 35, 34, 52, 1, 50, 52, 9, 46, 6, 52, 40, 52, 33, 53, 22, 53, 14, 38, 53, 18, 53, 53, 53, 53, 53, 53" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "Currently I have: \nSuit C: \nSuit D: JQ \nSuit H: \nSuit S: 3 \nPrevious card: H6\nPrevious suit: H\nStarting counterclockwise, other players have: 3, 6, 2, 2, 7 cards.\n" +ObservationString(1) = "Currently I have: \nSuit C: Q \nSuit D: 7 T K \nSuit H: Q \nSuit S: 5 \nPrevious card: H6\nPrevious suit: H\nStarting counterclockwise, other players have: 6, 2, 2, 7, 3 cards.\n" +ObservationString(2) = "Currently I have: \nSuit C: 3 \nSuit D: 3 \nSuit H: \nSuit S: \nPrevious card: H6\nPrevious suit: H\nStarting counterclockwise, other players have: 2, 2, 7, 3, 6 cards.\n" +ObservationString(3) = "Currently I have: \nSuit C: \nSuit D: 2 \nSuit H: \nSuit S: 9 \nPrevious card: H6\nPrevious suit: H\nStarting counterclockwise, other players have: 2, 7, 3, 6, 2 cards.\n" +ObservationString(4) = "Currently I have: \nSuit C: \nSuit D: 456 9 A\nSuit H: 9 \nSuit S: Q \nPrevious card: H6\nPrevious suit: H\nStarting counterclockwise, other players have: 7, 3, 6, 2, 2 cards.\n" +ObservationTensor(0): binvec(372, 0xaaa9aaaaaaaaaaaaaa9a9aaaaa0000200000000202000000000001000000000000080000000000000200000000000) +ObservationTensor(1): binvec(372, 0xaaaaaaa9aa9aaaaa9aaa669aaa0000200000000220000000000001000000000000004000000000002000000000000) +ObservationTensor(2): binvec(372, 0xaa5aaaaaaaaaaaaaaaaaaaaaaa0000200000000220000000000000080000000000040000000000000400000000000) +ObservationTensor(3): binvec(372, 0x9aaaaaaaaaaaaaa9aaaaaaaaaa0000200000000201000000000000800000000000008000000000004000000000000) +ObservationTensor(4): binvec(372, 0xaaaa9a9a9aaaaa96aaaaa9aa9a0000200000000210000000000000100000000000080000000000004000000000000) +Rewards() = [-29, -105, -13, -36, -128] +Returns() = [-29, -105, -13, -36, -128] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/cribbage.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/cribbage.txt new file mode 100644 index 0000000..bd2e988 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/cribbage.txt @@ -0,0 +1,2061 @@ +game: cribbage + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Cribbage" +GameType.max_num_players = 4 +GameType.min_num_players = 2 +GameType.parameter_specification = ["players", "winner_bonus_reward"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = False +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "cribbage" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 2757 +PolicyTensorShape() = [2757] +MaxChanceOutcomes() = 52 +GetParameters() = {players=2,winner_bonus_reward=1000.0} +NumPlayers() = 2 +MinUtility() = -1149.0 +MaxUtility() = 1149.0 +UtilitySum() = None +MaxGameLength() = 4400 +ToString() = "cribbage()" + +# State 0 +# --------------------------------- +# Num players: 2 +# Round: 0 +# Phase: Card +# Dealer: 0 +# Cur player: -1 +# Scores: 0 0 +# --------------------------------- +# Crib: +# P0 Hand: +# P1 Hand: +# --------------------------------- +# Running total: 0 +# Played cards: +# --------------------------------- +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ChanceOutcomes() = [(0,0.0192308), (1,0.0192308), (2,0.0192308), (3,0.0192308), (4,0.0192308), (5,0.0192308), (6,0.0192308), (7,0.0192308), (8,0.0192308), (9,0.0192308), (10,0.0192308), (11,0.0192308), (12,0.0192308), (13,0.0192308), (14,0.0192308), (15,0.0192308), (16,0.0192308), (17,0.0192308), (18,0.0192308), (19,0.0192308), (20,0.0192308), (21,0.0192308), (22,0.0192308), (23,0.0192308), (24,0.0192308), (25,0.0192308), (26,0.0192308), (27,0.0192308), (28,0.0192308), (29,0.0192308), (30,0.0192308), (31,0.0192308), (32,0.0192308), (33,0.0192308), (34,0.0192308), (35,0.0192308), (36,0.0192308), (37,0.0192308), (38,0.0192308), (39,0.0192308), (40,0.0192308), (41,0.0192308), (42,0.0192308), (43,0.0192308), (44,0.0192308), (45,0.0192308), (46,0.0192308), (47,0.0192308), (48,0.0192308), (49,0.0192308), (50,0.0192308), (51,0.0192308)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51] +StringLegalActions() = ["Deal AC", "Deal 2C", "Deal 3C", "Deal 4C", "Deal 5C", "Deal 6C", "Deal 7C", "Deal 8C", "Deal 9C", "Deal TC", "Deal JC", "Deal QC", "Deal KC", "Deal AD", "Deal 2D", "Deal 3D", "Deal 4D", "Deal 5D", "Deal 6D", "Deal 7D", "Deal 8D", "Deal 9D", "Deal TD", "Deal JD", "Deal QD", "Deal KD", "Deal AH", "Deal 2H", "Deal 3H", "Deal 4H", "Deal 5H", "Deal 6H", "Deal 7H", "Deal 8H", "Deal 9H", "Deal TH", "Deal JH", "Deal QH", "Deal KH", "Deal AS", "Deal 2S", "Deal 3S", "Deal 4S", "Deal 5S", "Deal 6S", "Deal 7S", "Deal 8S", "Deal 9S", "Deal TS", "Deal JS", "Deal QS", "Deal KS"] + +# Apply action "Deal 3D" +action: 15 + +# State 1 +# --------------------------------- +# Num players: 2 +# Round: 0 +# Phase: Card +# Dealer: 0 +# Cur player: -1 +# Scores: 0 0 +# --------------------------------- +# Crib: +# P0 Hand: 3D +# P1 Hand: +# --------------------------------- +# Running total: 0 +# Played cards: +# --------------------------------- +IsTerminal() = False +History() = [15] +HistoryString() = "15" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ChanceOutcomes() = [(0,0.0196078), (1,0.0196078), (2,0.0196078), (3,0.0196078), (4,0.0196078), (5,0.0196078), (6,0.0196078), (7,0.0196078), (8,0.0196078), (9,0.0196078), (10,0.0196078), (11,0.0196078), (12,0.0196078), (13,0.0196078), (14,0.0196078), (16,0.0196078), (17,0.0196078), (18,0.0196078), (19,0.0196078), (20,0.0196078), (21,0.0196078), (22,0.0196078), (23,0.0196078), (24,0.0196078), (25,0.0196078), (26,0.0196078), (27,0.0196078), (28,0.0196078), (29,0.0196078), (30,0.0196078), (31,0.0196078), (32,0.0196078), (33,0.0196078), (34,0.0196078), (35,0.0196078), (36,0.0196078), (37,0.0196078), (38,0.0196078), (39,0.0196078), (40,0.0196078), (41,0.0196078), (42,0.0196078), (43,0.0196078), (44,0.0196078), (45,0.0196078), (46,0.0196078), (47,0.0196078), (48,0.0196078), (49,0.0196078), (50,0.0196078), (51,0.0196078)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51] +StringLegalActions() = ["Deal AC", "Deal 2C", "Deal 3C", "Deal 4C", "Deal 5C", "Deal 6C", "Deal 7C", "Deal 8C", "Deal 9C", "Deal TC", "Deal JC", "Deal QC", "Deal KC", "Deal AD", "Deal 2D", "Deal 4D", "Deal 5D", "Deal 6D", "Deal 7D", "Deal 8D", "Deal 9D", "Deal TD", "Deal JD", "Deal QD", "Deal KD", "Deal AH", "Deal 2H", "Deal 3H", "Deal 4H", "Deal 5H", "Deal 6H", "Deal 7H", "Deal 8H", "Deal 9H", "Deal TH", "Deal JH", "Deal QH", "Deal KH", "Deal AS", "Deal 2S", "Deal 3S", "Deal 4S", "Deal 5S", "Deal 6S", "Deal 7S", "Deal 8S", "Deal 9S", "Deal TS", "Deal JS", "Deal QS", "Deal KS"] + +# Apply action "Deal 2C" +action: 1 + +# State 2 +# Apply action "Deal JC" +action: 10 + +# State 3 +# Apply action "Deal AH" +action: 26 + +# State 4 +# Apply action "Deal 7D" +action: 19 + +# State 5 +# Apply action "Deal JD" +action: 23 + +# State 6 +# Apply action "Deal 8S" +action: 46 + +# State 7 +# Apply action "Deal 5S" +action: 43 + +# State 8 +# Apply action "Deal QS" +action: 50 + +# State 9 +# Apply action "Deal KH" +action: 38 + +# State 10 +# Apply action "Deal KS" +action: 51 + +# State 11 +# Apply action "Deal TS" +action: 48 + +# State 12 +# --------------------------------- +# Num players: 2 +# Round: 0 +# Phase: Card +# Dealer: 0 +# Cur player: 0 +# Scores: 0 0 +# --------------------------------- +# Crib: +# P0 Hand: AH 2C 3D 7D JC JD +# P1 Hand: 5S 8S TS QS KH KS +# --------------------------------- +# Running total: 0 +# Played cards: +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [114, 119, 123, 127, 595, 842, 851, 855, 1050, 1063, 1405, 1414, 1419, 1423, 1427] +StringLegalActions() = ["Choose 2C JC", "Choose 2C 3D", "Choose 2C 7D", "Choose 2C JD", "Choose JC JD", "Choose 3D JC", "Choose 3D 7D", "Choose 3D JD", "Choose 7D JC", "Choose 7D JD", "Choose AH 2C", "Choose AH JC", "Choose AH 3D", "Choose AH 7D", "Choose AH JD"] + +# Apply action "Choose AH 2C" +action: 1405 + +# State 13 +# --------------------------------- +# Num players: 2 +# Round: 0 +# Phase: Card +# Dealer: 0 +# Cur player: 1 +# Scores: 0 0 +# --------------------------------- +# Crib: AH 2C +# P0 Hand: 3D 7D JC JD +# P1 Hand: 5S 8S TS QS KH KS +# --------------------------------- +# Running total: 0 +# Played cards: +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2079, 2326, 2334, 2336, 2338, 2339, 2482, 2492, 2494, 2495, 2586, 2598, 2599, 2690, 2703] +StringLegalActions() = ["Choose KH KS", "Choose 5S KH", "Choose 5S 8S", "Choose 5S TS", "Choose 5S QS", "Choose 5S KS", "Choose 8S KH", "Choose 8S TS", "Choose 8S QS", "Choose 8S KS", "Choose TS KH", "Choose TS QS", "Choose TS KS", "Choose QS KH", "Choose QS KS"] + +# Apply action "Choose 5S KS" +action: 2339 + +# State 14 +# Apply action "Deal 6C" +action: 5 + +# State 15 +# --------------------------------- +# Num players: 2 +# Round: 0 +# Phase: Play +# Dealer: 0 +# Cur player: 1 +# Scores: 0 0 +# --------------------------------- +# Crib: AH 2C 5S KS +# Starter: 6C +# P0 Hand: 3D 7D JC JD +# P1 Hand: 8S TS QS KH +# --------------------------------- +# Running total: 0 +# Played cards: +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [38, 46, 48, 50] +StringLegalActions() = ["Choose KH", "Choose 8S", "Choose TS", "Choose QS"] + +# Apply action "Choose KH" +action: 38 + +# State 16 +# --------------------------------- +# Num players: 2 +# Round: 0 +# Phase: Play +# Dealer: 0 +# Cur player: 0 +# Scores: 0 0 +# --------------------------------- +# Crib: AH 2C 5S KS +# Starter: 6C +# P0 Hand: 3D 7D JC JD +# P1 Hand: 8S TS QS +# --------------------------------- +# Running total: 10 +# Played cards: KH +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [10, 15, 19, 23] +StringLegalActions() = ["Choose JC", "Choose 3D", "Choose 7D", "Choose JD"] + +# Apply action "Choose 7D" +action: 19 + +# State 17 +# --------------------------------- +# Num players: 2 +# Round: 0 +# Phase: Play +# Dealer: 0 +# Cur player: 1 +# Scores: 0 0 +# --------------------------------- +# Crib: AH 2C 5S KS +# Starter: 6C +# P0 Hand: 3D JC JD +# P1 Hand: 8S TS QS +# --------------------------------- +# Running total: 17 +# Played cards: KH 7D +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [46, 48, 50] +StringLegalActions() = ["Choose 8S", "Choose TS", "Choose QS"] + +# Apply action "Choose TS" +action: 48 + +# State 18 +# --------------------------------- +# Num players: 2 +# Round: 0 +# Phase: Play +# Dealer: 0 +# Cur player: 0 +# Scores: 0 0 +# --------------------------------- +# Crib: AH 2C 5S KS +# Starter: 6C +# P0 Hand: 3D JC JD +# P1 Hand: 8S QS +# --------------------------------- +# Running total: 27 +# Played cards: KH 7D TS +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [15] +StringLegalActions() = ["Choose 3D"] + +# Apply action "Choose 3D" +action: 15 + +# State 19 +# Apply action "Pass" +action: 2756 + +# State 20 +# Apply action "Pass" +action: 2756 + +# State 21 +# Apply action "Choose 8S" +action: 46 + +# State 22 +# Apply action "Choose JD" +action: 23 + +# State 23 +# Apply action "Choose QS" +action: 50 + +# State 24 +# Apply action "Pass" +action: 2756 + +# State 25 +# Apply action "Pass" +action: 2756 + +# State 26 +# Apply action "Choose JC" +action: 10 + +# State 27 +# Apply action "Pass" +action: 2756 + +# State 28 +# Apply action "Pass" +action: 2756 + +# State 29 +# Apply action "Deal 7C" +action: 6 + +# State 30 +# Apply action "Deal TH" +action: 35 + +# State 31 +# Apply action "Deal JH" +action: 36 + +# State 32 +# Apply action "Deal 6D" +action: 18 + +# State 33 +# Apply action "Deal QD" +action: 24 + +# State 34 +# Apply action "Deal 6C" +action: 5 + +# State 35 +# Apply action "Deal 3C" +action: 2 + +# State 36 +# Apply action "Deal TC" +action: 9 + +# State 37 +# Apply action "Deal JS" +action: 49 + +# State 38 +# Apply action "Deal 2S" +action: 40 + +# State 39 +# Apply action "Deal 9C" +action: 8 + +# State 40 +# Apply action "Deal KD" +action: 25 + +# State 41 +# Apply action "Choose 6C JH" +action: 348 + +# State 42 +# Apply action "Choose 2S JS" +action: 2181 + +# State 43 +# Apply action "Deal 8D" +action: 20 + +# State 44 +# Apply action "Choose QD" +action: 24 + +# State 45 +# Apply action "Choose 9C" +action: 8 + +# State 46 +# --------------------------------- +# Num players: 2 +# Round: 1 +# Phase: Play +# Dealer: 1 +# Cur player: 0 +# Scores: 7 1 +# --------------------------------- +# Crib: 2S 6C JH JS +# Starter: 8D +# P0 Hand: 6D 7C TH +# P1 Hand: 3C TC KD +# --------------------------------- +# Running total: 19 +# Played cards: QD 9C +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +Rewards() = [0, 0] +Returns() = [7, 1] +LegalActions() = [6, 18, 35] +StringLegalActions() = ["Choose 7C", "Choose 6D", "Choose TH"] + +# Apply action "Choose 7C" +action: 6 + +# State 47 +# --------------------------------- +# Num players: 2 +# Round: 1 +# Phase: Play +# Dealer: 1 +# Cur player: 1 +# Scores: 7 1 +# --------------------------------- +# Crib: 2S 6C JH JS +# Starter: 8D +# P0 Hand: 6D TH +# P1 Hand: 3C TC KD +# --------------------------------- +# Running total: 26 +# Played cards: QD 9C 7C +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +Rewards() = [0, 0] +Returns() = [7, 1] +LegalActions() = [2] +StringLegalActions() = ["Choose 3C"] + +# Apply action "Choose 3C" +action: 2 + +# State 48 +# Apply action "Pass" +action: 2756 + +# State 49 +# Apply action "Pass" +action: 2756 + +# State 50 +# Apply action "Choose TH" +action: 35 + +# State 51 +# Apply action "Choose TC" +action: 9 + +# State 52 +# Apply action "Choose 6D" +action: 18 + +# State 53 +# Apply action "Pass" +action: 2756 + +# State 54 +# Apply action "Pass" +action: 2756 + +# State 55 +# Apply action "Choose KD" +action: 25 + +# State 56 +# Apply action "Pass" +action: 2756 + +# State 57 +# Apply action "Pass" +action: 2756 + +# State 58 +# Apply action "Deal 8D" +action: 20 + +# State 59 +# Apply action "Deal QH" +action: 37 + +# State 60 +# Apply action "Deal TH" +action: 35 + +# State 61 +# Apply action "Deal AS" +action: 39 + +# State 62 +# Apply action "Deal AD" +action: 13 + +# State 63 +# Apply action "Deal 3D" +action: 15 + +# State 64 +# Apply action "Deal 3S" +action: 41 + +# State 65 +# Apply action "Deal QS" +action: 50 + +# State 66 +# Apply action "Deal JH" +action: 36 + +# State 67 +# Apply action "Deal 9H" +action: 34 + +# State 68 +# Apply action "Deal TS" +action: 48 + +# State 69 +# Apply action "Deal 6H" +action: 31 + +# State 70 +# Apply action "Choose 3D TH" +action: 867 + +# State 71 +# Apply action "Choose 6H TS" +action: 1712 + +# State 72 +# Apply action "Deal 7S" +action: 45 + +# State 73 +# Apply action "Choose 3S" +action: 41 + +# State 74 +# Apply action "Choose AS" +action: 39 + +# State 75 +# Apply action "Choose JH" +action: 36 + +# State 76 +# Apply action "Choose 8D" +action: 20 + +# State 77 +# Apply action "Choose 9H" +action: 34 + +# State 78 +# Apply action "Choose QH" +action: 37 + +# State 79 +# --------------------------------- +# Num players: 2 +# Round: 2 +# Phase: Play +# Dealer: 0 +# Cur player: 1 +# Scores: 13 12 +# --------------------------------- +# Crib: 3D 6H TH TS +# Starter: 7S +# P0 Hand: AD +# P1 Hand: QS +# --------------------------------- +# Running total: 10 +# Played cards: QH +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +Rewards() = [0, 0] +Returns() = [13, 12] +LegalActions() = [50] +StringLegalActions() = ["Choose QS"] + +# Apply action "Choose QS" +action: 50 + +# State 80 +# --------------------------------- +# Num players: 2 +# Round: 2 +# Phase: Play +# Dealer: 0 +# Cur player: 0 +# Scores: 13 14 +# --------------------------------- +# Crib: 3D 6H TH TS +# Starter: 7S +# P0 Hand: AD +# P1 Hand: +# --------------------------------- +# Running total: 20 +# Played cards: QH QS +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +Rewards() = [0, 2] +Returns() = [13, 14] +LegalActions() = [13] +StringLegalActions() = ["Choose AD"] + +# Apply action "Choose AD" +action: 13 + +# State 81 +# Apply action "Pass" +action: 2756 + +# State 82 +# Apply action "Pass" +action: 2756 + +# State 83 +# Apply action "Deal 9H" +action: 34 + +# State 84 +# Apply action "Deal QH" +action: 37 + +# State 85 +# Apply action "Deal 6D" +action: 18 + +# State 86 +# Apply action "Deal JD" +action: 23 + +# State 87 +# Apply action "Deal 7C" +action: 6 + +# State 88 +# Apply action "Deal 7D" +action: 19 + +# State 89 +# Apply action "Deal 3C" +action: 2 + +# State 90 +# Apply action "Deal 4C" +action: 3 + +# State 91 +# Apply action "Deal JS" +action: 49 + +# State 92 +# Apply action "Deal KC" +action: 12 + +# State 93 +# Apply action "Deal TH" +action: 35 + +# State 94 +# Apply action "Deal QC" +action: 11 + +# State 95 +# Apply action "Choose 7D 9H" +action: 1074 + +# State 96 +# Apply action "Choose JS KC" +action: 2612 + +# State 97 +# Apply action "Deal 7S" +action: 45 + +# State 98 +# Apply action "Choose 6D" +action: 18 + +# State 99 +# Apply action "Choose 3C" +action: 2 + +# State 100 +# Apply action "Choose QH" +action: 37 + +# State 101 +# Apply action "Choose 4C" +action: 3 + +# State 102 +# Apply action "Choose 7C" +action: 6 + +# State 103 +# Apply action "Pass" +action: 2756 + +# State 104 +# Apply action "Pass" +action: 2756 + +# State 105 +# Apply action "Choose TH" +action: 35 + +# State 106 +# Apply action "Choose JD" +action: 23 + +# State 107 +# Apply action "Choose QC" +action: 11 + +# State 108 +# Apply action "Pass" +action: 2756 + +# State 109 +# Apply action "Pass" +action: 2756 + +# State 110 +# Apply action "Deal AD" +action: 13 + +# State 111 +# Apply action "Deal 8C" +action: 7 + +# State 112 +# Apply action "Deal KS" +action: 51 + +# State 113 +# Apply action "Deal JC" +action: 10 + +# State 114 +# Apply action "Deal 5H" +action: 30 + +# State 115 +# Apply action "Deal 3D" +action: 15 + +# State 116 +# Apply action "Deal 2S" +action: 40 + +# State 117 +# Apply action "Deal AH" +action: 26 + +# State 118 +# Apply action "Deal QH" +action: 37 + +# State 119 +# Apply action "Deal 4C" +action: 3 + +# State 120 +# Apply action "Deal 9D" +action: 21 + +# State 121 +# Apply action "Deal 8S" +action: 46 + +# State 122 +# Apply action "Choose AD 3D" +action: 743 + +# State 123 +# Apply action "Choose 2S 4C" +action: 2135 + +# State 124 +# Apply action "Deal 5S" +action: 43 + +# State 125 +# --------------------------------- +# Num players: 2 +# Round: 4 +# Phase: Play +# Dealer: 0 +# Cur player: 1 +# Scores: 23 21 +# --------------------------------- +# Crib: AD 2S 3D 4C +# Starter: 5S +# P0 Hand: 5H 8C JC KS +# P1 Hand: AH 8S 9D QH +# --------------------------------- +# Running total: 0 +# Played cards: +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +Rewards() = [0, 0] +Returns() = [23, 21] +LegalActions() = [21, 26, 37, 46] +StringLegalActions() = ["Choose 9D", "Choose AH", "Choose QH", "Choose 8S"] + +# Apply action "Choose QH" +action: 37 + +# State 126 +# --------------------------------- +# Num players: 2 +# Round: 4 +# Phase: Play +# Dealer: 0 +# Cur player: 0 +# Scores: 23 21 +# --------------------------------- +# Crib: AD 2S 3D 4C +# Starter: 5S +# P0 Hand: 5H 8C JC KS +# P1 Hand: AH 8S 9D +# --------------------------------- +# Running total: 10 +# Played cards: QH +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +Rewards() = [0, 0] +Returns() = [23, 21] +LegalActions() = [7, 10, 30, 51] +StringLegalActions() = ["Choose 8C", "Choose JC", "Choose 5H", "Choose KS"] + +# Apply action "Choose 5H" +action: 30 + +# State 127 +# Apply action "Choose 8S" +action: 46 + +# State 128 +# Apply action "Choose 8C" +action: 7 + +# State 129 +# Apply action "Choose AH" +action: 26 + +# State 130 +# Apply action "Choose KS" +action: 51 + +# State 131 +# Apply action "Choose 9D" +action: 21 + +# State 132 +# Apply action "Choose JC" +action: 10 + +# State 133 +# Apply action "Pass" +action: 2756 + +# State 134 +# Apply action "Pass" +action: 2756 + +# State 135 +# Apply action "Deal KC" +action: 12 + +# State 136 +# Apply action "Deal TS" +action: 48 + +# State 137 +# Apply action "Deal 3S" +action: 41 + +# State 138 +# Apply action "Deal 7H" +action: 32 + +# State 139 +# Apply action "Deal AH" +action: 26 + +# State 140 +# Apply action "Deal AD" +action: 13 + +# State 141 +# Apply action "Deal TC" +action: 9 + +# State 142 +# Apply action "Deal 4H" +action: 29 + +# State 143 +# Apply action "Deal 5H" +action: 30 + +# State 144 +# Apply action "Deal KD" +action: 25 + +# State 145 +# Apply action "Deal 2C" +action: 1 + +# State 146 +# Apply action "Deal 9C" +action: 8 + +# State 147 +# Apply action "Choose AD AH" +action: 754 + +# State 148 +# Apply action "Choose 4H 5H" +action: 1590 + +# State 149 +# Apply action "Deal 8S" +action: 46 + +# State 150 +# Apply action "Choose TS" +action: 48 + +# State 151 +# Apply action "Choose 9C" +action: 8 + +# State 152 +# Apply action "Choose KC" +action: 12 + +# State 153 +# Apply action "Choose 2C" +action: 1 + +# State 154 +# Apply action "Choose 7H" +action: 32 + +# State 155 +# Apply action "Choose KD" +action: 25 + +# State 156 +# Apply action "Choose 3S" +action: 41 + +# State 157 +# Apply action "Choose TC" +action: 9 + +# State 158 +# --------------------------------- +# Num players: 2 +# Round: 5 +# Phase: Play +# Dealer: 1 +# Cur player: 0 +# Scores: 47 27 +# --------------------------------- +# Crib: AD AH 4H 5H +# Starter: 8S +# P0 Hand: +# P1 Hand: +# --------------------------------- +# Running total: 30 +# Played cards: 7H KD 3S TC +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +Rewards() = [0, 0] +Returns() = [47, 27] +LegalActions() = [2756] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 2756 + +# State 159 +# --------------------------------- +# Num players: 2 +# Round: 5 +# Phase: Play +# Dealer: 1 +# Cur player: 1 +# Scores: 47 27 +# --------------------------------- +# Crib: AD AH 4H 5H +# Starter: 8S +# P0 Hand: +# P1 Hand: +# --------------------------------- +# Running total: 30 +# Played cards: 7H KD 3S TC +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +Rewards() = [0, 0] +Returns() = [47, 27] +LegalActions() = [2756] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 2756 + +# State 160 +# Apply action "Deal KC" +action: 12 + +# State 161 +# Apply action "Deal 8C" +action: 7 + +# State 162 +# Apply action "Deal 3C" +action: 2 + +# State 163 +# Apply action "Deal AS" +action: 39 + +# State 164 +# Apply action "Deal JC" +action: 10 + +# State 165 +# Apply action "Deal 8S" +action: 46 + +# State 166 +# Apply action "Deal KS" +action: 51 + +# State 167 +# Apply action "Deal 9D" +action: 21 + +# State 168 +# Apply action "Deal 3S" +action: 41 + +# State 169 +# Apply action "Deal JD" +action: 23 + +# State 170 +# Apply action "Deal 8D" +action: 20 + +# State 171 +# Apply action "Deal 2H" +action: 27 + +# State 172 +# Apply action "Choose 8C 8S" +action: 462 + +# State 173 +# Apply action "Choose 8D 9D" +action: 1113 + +# State 174 +# Apply action "Deal 5H" +action: 30 + +# State 175 +# Apply action "Choose 3S" +action: 41 + +# State 176 +# Apply action "Choose 3C" +action: 2 + +# State 177 +# Apply action "Choose 2H" +action: 27 + +# State 178 +# Apply action "Choose KC" +action: 12 + +# State 179 +# Apply action "Choose JD" +action: 23 + +# State 180 +# Apply action "Choose AS" +action: 39 + +# State 181 +# Apply action "Pass" +action: 2756 + +# State 182 +# Apply action "Pass" +action: 2756 + +# State 183 +# Apply action "Choose KS" +action: 51 + +# State 184 +# Apply action "Choose JC" +action: 10 + +# State 185 +# Apply action "Pass" +action: 2756 + +# State 186 +# Apply action "Pass" +action: 2756 + +# State 187 +# Apply action "Deal 4H" +action: 29 + +# State 188 +# Apply action "Deal 6D" +action: 18 + +# State 189 +# Apply action "Deal 7H" +action: 32 + +# State 190 +# Apply action "Deal 5S" +action: 43 + +# State 191 +# Apply action "Deal KD" +action: 25 + +# State 192 +# Apply action "Deal 9S" +action: 47 + +# State 193 +# Apply action "Deal TH" +action: 35 + +# State 194 +# Apply action "Deal 2H" +action: 27 + +# State 195 +# Apply action "Deal 3H" +action: 28 + +# State 196 +# Apply action "Deal 2D" +action: 14 + +# State 197 +# Apply action "Deal 3D" +action: 15 + +# State 198 +# Apply action "Deal AC" +action: 0 + +# State 199 +# Apply action "Choose 5S 9S" +action: 2335 + +# State 200 +# Apply action "Choose 2D 3H" +action: 808 + +# State 201 +# Apply action "Deal AD" +action: 13 + +# State 202 +# Apply action "Choose 7H" +action: 32 + +# State 203 +# Apply action "Choose AC" +action: 0 + +# State 204 +# --------------------------------- +# Num players: 2 +# Round: 7 +# Phase: Play +# Dealer: 1 +# Cur player: 0 +# Scores: 63 43 +# --------------------------------- +# Crib: 2D 3H 5S 9S +# Starter: AD +# P0 Hand: 4H 6D KD +# P1 Hand: 2H 3D TH +# --------------------------------- +# Running total: 8 +# Played cards: 7H AC +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +Rewards() = [0, 0] +Returns() = [63, 43] +LegalActions() = [18, 25, 29] +StringLegalActions() = ["Choose 6D", "Choose KD", "Choose 4H"] + +# Apply action "Choose 4H" +action: 29 + +# State 205 +# --------------------------------- +# Num players: 2 +# Round: 7 +# Phase: Play +# Dealer: 1 +# Cur player: 1 +# Scores: 63 43 +# --------------------------------- +# Crib: 2D 3H 5S 9S +# Starter: AD +# P0 Hand: 6D KD +# P1 Hand: 2H 3D TH +# --------------------------------- +# Running total: 12 +# Played cards: 7H AC 4H +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0, 29] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0, 29" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +Rewards() = [0, 0] +Returns() = [63, 43] +LegalActions() = [15, 27, 35] +StringLegalActions() = ["Choose 3D", "Choose 2H", "Choose TH"] + +# Apply action "Choose 3D" +action: 15 + +# State 206 +# Apply action "Choose KD" +action: 25 + +# State 207 +# Apply action "Choose 2H" +action: 27 + +# State 208 +# Apply action "Pass" +action: 2756 + +# State 209 +# Apply action "Pass" +action: 2756 + +# State 210 +# Apply action "Choose 6D" +action: 18 + +# State 211 +# Apply action "Choose TH" +action: 35 + +# State 212 +# Apply action "Pass" +action: 2756 + +# State 213 +# Apply action "Pass" +action: 2756 + +# State 214 +# Apply action "Deal QS" +action: 50 + +# State 215 +# Apply action "Deal 8H" +action: 33 + +# State 216 +# Apply action "Deal 5H" +action: 30 + +# State 217 +# Apply action "Deal KD" +action: 25 + +# State 218 +# Apply action "Deal 9H" +action: 34 + +# State 219 +# Apply action "Deal 6D" +action: 18 + +# State 220 +# Apply action "Deal TD" +action: 22 + +# State 221 +# Apply action "Deal 6S" +action: 44 + +# State 222 +# Apply action "Deal AC" +action: 0 + +# State 223 +# Apply action "Deal QD" +action: 24 + +# State 224 +# Apply action "Deal 6C" +action: 5 + +# State 225 +# Apply action "Deal AS" +action: 39 + +# State 226 +# Apply action "Choose 5H 9H" +action: 1646 + +# State 227 +# Apply action "Choose TD QD" +action: 1220 + +# State 228 +# Apply action "Deal 5D" +action: 17 + +# State 229 +# Apply action "Choose 6S" +action: 44 + +# State 230 +# Apply action "Choose KD" +action: 25 + +# State 231 +# Apply action "Choose AS" +action: 39 + +# State 232 +# Apply action "Choose 8H" +action: 33 + +# State 233 +# Apply action "Choose 6C" +action: 5 + +# State 234 +# Apply action "Choose 6D" +action: 18 + +# State 235 +# Apply action "Choose AC" +action: 0 + +# State 236 +# Apply action "Choose QS" +action: 50 + +# State 237 +# --------------------------------- +# Num players: 2 +# Round: 8 +# Phase: Play +# Dealer: 0 +# Cur player: 1 +# Scores: 65 68 +# --------------------------------- +# Crib: 5H 9H TD QD +# Starter: 5D +# P0 Hand: +# P1 Hand: +# --------------------------------- +# Running total: 17 +# Played cards: 6D AC QS +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0, 29, 15, 25, 27, 2756, 2756, 18, 35, 2756, 2756, 50, 33, 30, 25, 34, 18, 22, 44, 0, 24, 5, 39, 1646, 1220, 17, 44, 25, 39, 33, 5, 18, 0, 50] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0, 29, 15, 25, 27, 2756, 2756, 18, 35, 2756, 2756, 50, 33, 30, 25, 34, 18, 22, 44, 0, 24, 5, 39, 1646, 1220, 17, 44, 25, 39, 33, 5, 18, 0, 50" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +Rewards() = [0, 0] +Returns() = [65, 68] +LegalActions() = [2756] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 2756 + +# State 238 +# --------------------------------- +# Num players: 2 +# Round: 8 +# Phase: Play +# Dealer: 0 +# Cur player: 0 +# Scores: 65 68 +# --------------------------------- +# Crib: 5H 9H TD QD +# Starter: 5D +# P0 Hand: +# P1 Hand: +# --------------------------------- +# Running total: 17 +# Played cards: 6D AC QS +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0, 29, 15, 25, 27, 2756, 2756, 18, 35, 2756, 2756, 50, 33, 30, 25, 34, 18, 22, 44, 0, 24, 5, 39, 1646, 1220, 17, 44, 25, 39, 33, 5, 18, 0, 50, 2756] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0, 29, 15, 25, 27, 2756, 2756, 18, 35, 2756, 2756, 50, 33, 30, 25, 34, 18, 22, 44, 0, 24, 5, 39, 1646, 1220, 17, 44, 25, 39, 33, 5, 18, 0, 50, 2756" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +Rewards() = [0, 0] +Returns() = [65, 68] +LegalActions() = [2756] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 2756 + +# State 239 +# Apply action "Deal 6H" +action: 31 + +# State 240 +# Apply action "Deal 9C" +action: 8 + +# State 241 +# Apply action "Deal 2H" +action: 27 + +# State 242 +# Apply action "Deal 4H" +action: 29 + +# State 243 +# Apply action "Deal 3H" +action: 28 + +# State 244 +# Apply action "Deal TC" +action: 9 + +# State 245 +# Apply action "Deal 5C" +action: 4 + +# State 246 +# Apply action "Deal 4S" +action: 42 + +# State 247 +# Apply action "Deal 9D" +action: 21 + +# State 248 +# Apply action "Deal 8C" +action: 7 + +# State 249 +# Apply action "Deal 9S" +action: 47 + +# State 250 +# Apply action "Deal 8H" +action: 33 + +# State 251 +# Apply action "Choose 6H TC" +action: 1673 + +# State 252 +# Apply action "Choose 4S 8C" +action: 2243 + +# State 253 +# Apply action "Deal 4D" +action: 16 + +# State 254 +# Apply action "Choose 2H" +action: 27 + +# State 255 +# Apply action "Choose 8H" +action: 33 + +# State 256 +# Apply action "Choose 3H" +action: 28 + +# State 257 +# Apply action "Choose 9D" +action: 21 + +# State 258 +# Apply action "Choose 4H" +action: 29 + +# State 259 +# Apply action "Choose 5C" +action: 4 + +# State 260 +# Apply action "Choose 9C" +action: 8 + +# State 261 +# Apply action "Choose 9S" +action: 47 + +# State 262 +# Apply action "Pass" +action: 2756 + +# State 263 +# Apply action "Pass" +action: 2756 + +# State 264 +# Apply action "Deal JD" +action: 23 + +# State 265 +# Apply action "Deal JH" +action: 36 + +# State 266 +# Apply action "Deal 5D" +action: 17 + +# State 267 +# Apply action "Deal 8S" +action: 46 + +# State 268 +# Apply action "Deal QS" +action: 50 + +# State 269 +# Apply action "Deal 7D" +action: 19 + +# State 270 +# Apply action "Deal 7C" +action: 6 + +# State 271 +# Apply action "Deal AS" +action: 39 + +# State 272 +# Apply action "Deal 6D" +action: 18 + +# State 273 +# Apply action "Deal 2H" +action: 27 + +# State 274 +# Apply action "Deal KH" +action: 38 + +# State 275 +# Apply action "Deal 9H" +action: 34 + +# State 276 +# Apply action "Choose 7D JD" +action: 1063 + +# State 277 +# Apply action "Choose 2H 9H" +action: 1490 + +# State 278 +# Apply action "Deal 8H" +action: 33 + +# State 279 +# Apply action "Choose 7C" +action: 6 + +# State 280 +# Apply action "Choose JH" +action: 36 + +# State 281 +# Apply action "Choose 6D" +action: 18 + +# State 282 +# Apply action "Choose 5D" +action: 17 + +# State 283 +# --------------------------------- +# Num players: 2 +# Round: 10 +# Phase: Play +# Dealer: 0 +# Cur player: 1 +# Scores: 92 81 +# --------------------------------- +# Crib: 2H 7D 9H JD +# Starter: 8H +# P0 Hand: 8S QS +# P1 Hand: AS KH +# --------------------------------- +# Running total: 28 +# Played cards: 7C JH 6D 5D +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0, 29, 15, 25, 27, 2756, 2756, 18, 35, 2756, 2756, 50, 33, 30, 25, 34, 18, 22, 44, 0, 24, 5, 39, 1646, 1220, 17, 44, 25, 39, 33, 5, 18, 0, 50, 2756, 2756, 31, 8, 27, 29, 28, 9, 4, 42, 21, 7, 47, 33, 1673, 2243, 16, 27, 33, 28, 21, 29, 4, 8, 47, 2756, 2756, 23, 36, 17, 46, 50, 19, 6, 39, 18, 27, 38, 34, 1063, 1490, 33, 6, 36, 18, 17] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0, 29, 15, 25, 27, 2756, 2756, 18, 35, 2756, 2756, 50, 33, 30, 25, 34, 18, 22, 44, 0, 24, 5, 39, 1646, 1220, 17, 44, 25, 39, 33, 5, 18, 0, 50, 2756, 2756, 31, 8, 27, 29, 28, 9, 4, 42, 21, 7, 47, 33, 1673, 2243, 16, 27, 33, 28, 21, 29, 4, 8, 47, 2756, 2756, 23, 36, 17, 46, 50, 19, 6, 39, 18, 27, 38, 34, 1063, 1490, 33, 6, 36, 18, 17" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +Rewards() = [0, 0] +Returns() = [92, 81] +LegalActions() = [39] +StringLegalActions() = ["Choose AS"] + +# Apply action "Choose AS" +action: 39 + +# State 284 +# --------------------------------- +# Num players: 2 +# Round: 10 +# Phase: Play +# Dealer: 0 +# Cur player: 0 +# Scores: 92 81 +# --------------------------------- +# Crib: 2H 7D 9H JD +# Starter: 8H +# P0 Hand: 8S QS +# P1 Hand: KH +# --------------------------------- +# Running total: 29 +# Played cards: 7C JH 6D 5D AS +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0, 29, 15, 25, 27, 2756, 2756, 18, 35, 2756, 2756, 50, 33, 30, 25, 34, 18, 22, 44, 0, 24, 5, 39, 1646, 1220, 17, 44, 25, 39, 33, 5, 18, 0, 50, 2756, 2756, 31, 8, 27, 29, 28, 9, 4, 42, 21, 7, 47, 33, 1673, 2243, 16, 27, 33, 28, 21, 29, 4, 8, 47, 2756, 2756, 23, 36, 17, 46, 50, 19, 6, 39, 18, 27, 38, 34, 1063, 1490, 33, 6, 36, 18, 17, 39] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0, 29, 15, 25, 27, 2756, 2756, 18, 35, 2756, 2756, 50, 33, 30, 25, 34, 18, 22, 44, 0, 24, 5, 39, 1646, 1220, 17, 44, 25, 39, 33, 5, 18, 0, 50, 2756, 2756, 31, 8, 27, 29, 28, 9, 4, 42, 21, 7, 47, 33, 1673, 2243, 16, 27, 33, 28, 21, 29, 4, 8, 47, 2756, 2756, 23, 36, 17, 46, 50, 19, 6, 39, 18, 27, 38, 34, 1063, 1490, 33, 6, 36, 18, 17, 39" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +Rewards() = [0, 0] +Returns() = [92, 81] +LegalActions() = [2756] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 2756 + +# State 285 +# Apply action "Pass" +action: 2756 + +# State 286 +# Apply action "Choose 8S" +action: 46 + +# State 287 +# Apply action "Choose KH" +action: 38 + +# State 288 +# Apply action "Choose QS" +action: 50 + +# State 289 +# Apply action "Pass" +action: 2756 + +# State 290 +# Apply action "Pass" +action: 2756 + +# State 291 +# Apply action "Deal 2H" +action: 27 + +# State 292 +# Apply action "Deal 9S" +action: 47 + +# State 293 +# Apply action "Deal QD" +action: 24 + +# State 294 +# Apply action "Deal 3H" +action: 28 + +# State 295 +# Apply action "Deal 5C" +action: 4 + +# State 296 +# Apply action "Deal AC" +action: 0 + +# State 297 +# Apply action "Deal 2D" +action: 14 + +# State 298 +# Apply action "Deal 9D" +action: 21 + +# State 299 +# Apply action "Deal 8D" +action: 20 + +# State 300 +# Apply action "Deal 7C" +action: 6 + +# State 301 +# Apply action "Deal JD" +action: 23 + +# State 302 +# Apply action "Deal 8H" +action: 33 + +# State 303 +# Apply action "Choose 2H 5C" +action: 1460 + +# State 304 +# Apply action "Choose 8D 8H" +action: 1125 + +# State 305 +# Apply action "Deal AH" +action: 26 + +# State 306 +# Apply action "Choose AC" +action: 0 + +# State 307 +# Apply action "Choose 9D" +action: 21 + +# State 308 +# Apply action "Choose 9S" +action: 47 + +# State 309 +# Apply action "Choose 7C" +action: 6 + +# State 310 +# Apply action "Choose 3H" +action: 28 + +# State 311 +# Apply action "Choose 2D" +action: 14 + +# State 312 +# Apply action "Choose QD" +action: 24 + +# State 313 +# Apply action "Choose JD" +action: 23 + +# State 314 +# Apply action "Pass" +action: 2756 + +# State 315 +# Apply action "Pass" +action: 2756 + +# State 316 +# Apply action "Deal 3S" +action: 41 + +# State 317 +# Apply action "Deal 8H" +action: 33 + +# State 318 +# Apply action "Deal 3C" +action: 2 + +# State 319 +# Apply action "Deal 6D" +action: 18 + +# State 320 +# Apply action "Deal 5H" +action: 30 + +# State 321 +# Apply action "Deal QD" +action: 24 + +# State 322 +# Apply action "Deal 7S" +action: 45 + +# State 323 +# Apply action "Deal 9C" +action: 8 + +# State 324 +# Apply action "Deal 2S" +action: 40 + +# State 325 +# Apply action "Deal KS" +action: 51 + +# State 326 +# Apply action "Deal TS" +action: 48 + +# State 327 +# Apply action "Deal 5S" +action: 43 + +# State 328 +# --------------------------------- +# Num players: 2 +# Round: 12 +# Phase: Card +# Dealer: 0 +# Cur player: 0 +# Scores: 111 98 +# --------------------------------- +# Crib: +# P0 Hand: 3C 3S 5H 6D 8H QD +# P1 Hand: 2S 5S 7S 9C TS KS +# --------------------------------- +# Running total: 0 +# Played cards: +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0, 29, 15, 25, 27, 2756, 2756, 18, 35, 2756, 2756, 50, 33, 30, 25, 34, 18, 22, 44, 0, 24, 5, 39, 1646, 1220, 17, 44, 25, 39, 33, 5, 18, 0, 50, 2756, 2756, 31, 8, 27, 29, 28, 9, 4, 42, 21, 7, 47, 33, 1673, 2243, 16, 27, 33, 28, 21, 29, 4, 8, 47, 2756, 2756, 23, 36, 17, 46, 50, 19, 6, 39, 18, 27, 38, 34, 1063, 1490, 33, 6, 36, 18, 17, 39, 2756, 2756, 46, 38, 50, 2756, 2756, 27, 47, 24, 28, 4, 0, 14, 21, 20, 6, 23, 33, 1460, 1125, 26, 0, 21, 47, 6, 28, 14, 24, 23, 2756, 2756, 41, 33, 2, 18, 30, 24, 45, 8, 40, 51, 48, 43] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0, 29, 15, 25, 27, 2756, 2756, 18, 35, 2756, 2756, 50, 33, 30, 25, 34, 18, 22, 44, 0, 24, 5, 39, 1646, 1220, 17, 44, 25, 39, 33, 5, 18, 0, 50, 2756, 2756, 31, 8, 27, 29, 28, 9, 4, 42, 21, 7, 47, 33, 1673, 2243, 16, 27, 33, 28, 21, 29, 4, 8, 47, 2756, 2756, 23, 36, 17, 46, 50, 19, 6, 39, 18, 27, 38, 34, 1063, 1490, 33, 6, 36, 18, 17, 39, 2756, 2756, 46, 38, 50, 2756, 2756, 27, 47, 24, 28, 4, 0, 14, 21, 20, 6, 23, 33, 1460, 1125, 26, 0, 21, 47, 6, 28, 14, 24, 23, 2756, 2756, 41, 33, 2, 18, 30, 24, 45, 8, 40, 51, 48, 43" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +Rewards() = [4, 7] +Returns() = [111, 98] +LegalActions() = [174, 180, 186, 189, 197, 1012, 1021, 1630, 1636, 1645, 1792, 2202, 2208, 2214, 2217] +StringLegalActions() = ["Choose 3C 6D", "Choose 3C QD", "Choose 3C 5H", "Choose 3C 8H", "Choose 3C 3S", "Choose 6D QD", "Choose 6D 8H", "Choose 5H 6D", "Choose 5H QD", "Choose 5H 8H", "Choose 8H QD", "Choose 3S 6D", "Choose 3S QD", "Choose 3S 5H", "Choose 3S 8H"] + +# Apply action "Choose 3S 5H" +action: 2214 + +# State 329 +# --------------------------------- +# Num players: 2 +# Round: 12 +# Phase: Card +# Dealer: 0 +# Cur player: 1 +# Scores: 111 98 +# --------------------------------- +# Crib: 3S 5H +# P0 Hand: 3C 6D 8H QD +# P1 Hand: 2S 5S 7S 9C TS KS +# --------------------------------- +# Running total: 0 +# Played cards: +# --------------------------------- +IsTerminal() = False +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0, 29, 15, 25, 27, 2756, 2756, 18, 35, 2756, 2756, 50, 33, 30, 25, 34, 18, 22, 44, 0, 24, 5, 39, 1646, 1220, 17, 44, 25, 39, 33, 5, 18, 0, 50, 2756, 2756, 31, 8, 27, 29, 28, 9, 4, 42, 21, 7, 47, 33, 1673, 2243, 16, 27, 33, 28, 21, 29, 4, 8, 47, 2756, 2756, 23, 36, 17, 46, 50, 19, 6, 39, 18, 27, 38, 34, 1063, 1490, 33, 6, 36, 18, 17, 39, 2756, 2756, 46, 38, 50, 2756, 2756, 27, 47, 24, 28, 4, 0, 14, 21, 20, 6, 23, 33, 1460, 1125, 26, 0, 21, 47, 6, 28, 14, 24, 23, 2756, 2756, 41, 33, 2, 18, 30, 24, 45, 8, 40, 51, 48, 43, 2214] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0, 29, 15, 25, 27, 2756, 2756, 18, 35, 2756, 2756, 50, 33, 30, 25, 34, 18, 22, 44, 0, 24, 5, 39, 1646, 1220, 17, 44, 25, 39, 33, 5, 18, 0, 50, 2756, 2756, 31, 8, 27, 29, 28, 9, 4, 42, 21, 7, 47, 33, 1673, 2243, 16, 27, 33, 28, 21, 29, 4, 8, 47, 2756, 2756, 23, 36, 17, 46, 50, 19, 6, 39, 18, 27, 38, 34, 1063, 1490, 33, 6, 36, 18, 17, 39, 2756, 2756, 46, 38, 50, 2756, 2756, 27, 47, 24, 28, 4, 0, 14, 21, 20, 6, 23, 33, 1460, 1125, 26, 0, 21, 47, 6, 28, 14, 24, 23, 2756, 2756, 41, 33, 2, 18, 30, 24, 45, 8, 40, 51, 48, 43, 2214" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +Rewards() = [0, 0] +Returns() = [111, 98] +LegalActions() = [516, 519, 2140, 2175, 2177, 2180, 2183, 2296, 2333, 2336, 2339, 2400, 2440, 2443, 2599] +StringLegalActions() = ["Choose 9C TS", "Choose 9C KS", "Choose 2S 9C", "Choose 2S 5S", "Choose 2S 7S", "Choose 2S TS", "Choose 2S KS", "Choose 5S 9C", "Choose 5S 7S", "Choose 5S TS", "Choose 5S KS", "Choose 7S 9C", "Choose 7S TS", "Choose 7S KS", "Choose TS KS"] + +# Apply action "Choose 5S 7S" +action: 2333 + +# State 330 +# Apply action "Deal 4C" +action: 3 + +# State 331 +# Apply action "Choose 2S" +action: 40 + +# State 332 +# Apply action "Choose 3C" +action: 2 + +# State 333 +# Apply action "Choose KS" +action: 51 + +# State 334 +# Apply action "Choose QD" +action: 24 + +# State 335 +# Apply action "Pass" +action: 2756 + +# State 336 +# Apply action "Choose 6D" +action: 18 + +# State 337 +# Apply action "Choose TS" +action: 48 + +# State 338 +# Apply action "Choose 8H" +action: 33 + +# State 339 +# Apply action "Choose 9C" +action: 8 + +# State 340 +# Apply action "Pass" +action: 2756 + +# State 341 +# Apply action "Pass" +action: 2756 + +# State 342 +# --------------------------------- +# Num players: 2 +# Round: 13 +# Phase: Card +# Dealer: 1 +# Cur player: -1 +# Scores: 127 106 +# --------------------------------- +# Crib: +# P0 Hand: +# P1 Hand: +# --------------------------------- +# Running total: 0 +# Played cards: +# --------------------------------- +IsTerminal() = True +History() = [15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0, 29, 15, 25, 27, 2756, 2756, 18, 35, 2756, 2756, 50, 33, 30, 25, 34, 18, 22, 44, 0, 24, 5, 39, 1646, 1220, 17, 44, 25, 39, 33, 5, 18, 0, 50, 2756, 2756, 31, 8, 27, 29, 28, 9, 4, 42, 21, 7, 47, 33, 1673, 2243, 16, 27, 33, 28, 21, 29, 4, 8, 47, 2756, 2756, 23, 36, 17, 46, 50, 19, 6, 39, 18, 27, 38, 34, 1063, 1490, 33, 6, 36, 18, 17, 39, 2756, 2756, 46, 38, 50, 2756, 2756, 27, 47, 24, 28, 4, 0, 14, 21, 20, 6, 23, 33, 1460, 1125, 26, 0, 21, 47, 6, 28, 14, 24, 23, 2756, 2756, 41, 33, 2, 18, 30, 24, 45, 8, 40, 51, 48, 43, 2214, 2333, 3, 40, 2, 51, 24, 2756, 18, 48, 33, 8, 2756, 2756] +HistoryString() = "15, 1, 10, 26, 19, 23, 46, 43, 50, 38, 51, 48, 1405, 2339, 5, 38, 19, 48, 15, 2756, 2756, 46, 23, 50, 2756, 2756, 10, 2756, 2756, 6, 35, 36, 18, 24, 5, 2, 9, 49, 40, 8, 25, 348, 2181, 20, 24, 8, 6, 2, 2756, 2756, 35, 9, 18, 2756, 2756, 25, 2756, 2756, 20, 37, 35, 39, 13, 15, 41, 50, 36, 34, 48, 31, 867, 1712, 45, 41, 39, 36, 20, 34, 37, 50, 13, 2756, 2756, 34, 37, 18, 23, 6, 19, 2, 3, 49, 12, 35, 11, 1074, 2612, 45, 18, 2, 37, 3, 6, 2756, 2756, 35, 23, 11, 2756, 2756, 13, 7, 51, 10, 30, 15, 40, 26, 37, 3, 21, 46, 743, 2135, 43, 37, 30, 46, 7, 26, 51, 21, 10, 2756, 2756, 12, 48, 41, 32, 26, 13, 9, 29, 30, 25, 1, 8, 754, 1590, 46, 48, 8, 12, 1, 32, 25, 41, 9, 2756, 2756, 12, 7, 2, 39, 10, 46, 51, 21, 41, 23, 20, 27, 462, 1113, 30, 41, 2, 27, 12, 23, 39, 2756, 2756, 51, 10, 2756, 2756, 29, 18, 32, 43, 25, 47, 35, 27, 28, 14, 15, 0, 2335, 808, 13, 32, 0, 29, 15, 25, 27, 2756, 2756, 18, 35, 2756, 2756, 50, 33, 30, 25, 34, 18, 22, 44, 0, 24, 5, 39, 1646, 1220, 17, 44, 25, 39, 33, 5, 18, 0, 50, 2756, 2756, 31, 8, 27, 29, 28, 9, 4, 42, 21, 7, 47, 33, 1673, 2243, 16, 27, 33, 28, 21, 29, 4, 8, 47, 2756, 2756, 23, 36, 17, 46, 50, 19, 6, 39, 18, 27, 38, 34, 1063, 1490, 33, 6, 36, 18, 17, 39, 2756, 2756, 46, 38, 50, 2756, 2756, 27, 47, 24, 28, 4, 0, 14, 21, 20, 6, 23, 33, 1460, 1125, 26, 0, 21, 47, 6, 28, 14, 24, 23, 2756, 2756, 41, 33, 2, 18, 30, 24, 45, 8, 40, 51, 48, 43, 2214, 2333, 3, 40, 2, 51, 24, 2756, 18, 48, 33, 8, 2756, 2756" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +Rewards() = [1014, -997] +Returns() = [1127, -894] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/cursor_go(board_size=5,max_cursor_moves=7).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/cursor_go(board_size=5,max_cursor_moves=7).txt new file mode 100644 index 0000000..1c4e773 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/cursor_go(board_size=5,max_cursor_moves=7).txt @@ -0,0 +1,413 @@ +game: cursor_go(board_size=5,max_cursor_moves=7) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Cursor Go" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["board_size", "handicap", "komi", "max_cursor_moves"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "cursor_go" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 6 +PolicyTensorShape() = [6] +MaxChanceOutcomes() = 0 +GetParameters() = {board_size=5,handicap=0,komi=7.5,max_cursor_moves=7} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [6, 5, 5] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 150 +MaxGameLength() = 400 +ToString() = "cursor_go(board_size=5,max_cursor_moves=7)" + +# State 0 +# CursorGoState(komi=7.5, to_play=B, history.size()=0, cursor_moves_count=0) +# +# 5 +++++ +# 4 +++++ +# 3 +++++ +# 2 +++++ +# 1 +++++ +# ABCDE +# +# Cursor: c3 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "CursorGoState(komi=7.5, to_play=B, history.size()=0, cursor_moves_count=0)\n\n 5 +++++\n 4 +++++\n 3 +++++\n 2 +++++\n 1 +++++\n ABCDE\n\nCursor: c3" +ObservationString(1) = "CursorGoState(komi=7.5, to_play=B, history.size()=0, cursor_moves_count=0)\n\n 5 +++++\n 4 +++++\n 3 +++++\n 2 +++++\n 1 +++++\n ABCDE\n\nCursor: c3" +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◉◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◉◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Up", "Down", "Left", "Right", "Place Stone", "Pass"] + +# Apply action "Down" +action: 1 + +# State 1 +# CursorGoState(komi=7.5, to_play=B, history.size()=1, cursor_moves_count=1) +# +# 5 +++++ +# 4 +++++ +# 3 +++++ +# 2 +++++ +# 1 +++++ +# ABCDE +# +# Cursor: c2 +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1" +InformationStateString(1) = "1" +ObservationString(0) = "CursorGoState(komi=7.5, to_play=B, history.size()=1, cursor_moves_count=1)\n\n 5 +++++\n 4 +++++\n 3 +++++\n 2 +++++\n 1 +++++\n ABCDE\n\nCursor: c2" +ObservationString(1) = "CursorGoState(komi=7.5, to_play=B, history.size()=1, cursor_moves_count=1)\n\n 5 +++++\n 4 +++++\n 3 +++++\n 2 +++++\n 1 +++++\n ABCDE\n\nCursor: c2" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Up", "Down", "Left", "Right", "Place Stone", "Pass"] + +# Apply action "Right" +action: 3 + +# State 2 +# CursorGoState(komi=7.5, to_play=B, history.size()=2, cursor_moves_count=2) +# +# 5 +++++ +# 4 +++++ +# 3 +++++ +# 2 +++++ +# 1 +++++ +# ABCDE +# +# Cursor: d2 +IsTerminal() = False +History() = [1, 3] +HistoryString() = "1, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1, 3" +InformationStateString(1) = "1, 3" +ObservationString(0) = "CursorGoState(komi=7.5, to_play=B, history.size()=2, cursor_moves_count=2)\n\n 5 +++++\n 4 +++++\n 3 +++++\n 2 +++++\n 1 +++++\n ABCDE\n\nCursor: d2" +ObservationString(1) = "CursorGoState(komi=7.5, to_play=B, history.size()=2, cursor_moves_count=2)\n\n 5 +++++\n 4 +++++\n 3 +++++\n 2 +++++\n 1 +++++\n ABCDE\n\nCursor: d2" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571, 0.28571] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Up", "Down", "Left", "Right", "Place Stone", "Pass"] + +# Apply action "Place Stone" +action: 4 + +# State 3 +# CursorGoState(komi=7.5, to_play=W, history.size()=3, cursor_moves_count=0) +# +# 5 +++++ +# 4 +++++ +# 3 +++++ +# 2 +++X+ +# 1 +++++ +# ABCDE +# +# Cursor: c3 +IsTerminal() = False +History() = [1, 3, 4] +HistoryString() = "1, 3, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1, 3, 4" +InformationStateString(1) = "1, 3, 4" +ObservationString(0) = "CursorGoState(komi=7.5, to_play=W, history.size()=3, cursor_moves_count=0)\n\n 5 +++++\n 4 +++++\n 3 +++++\n 2 +++X+\n 1 +++++\n ABCDE\n\nCursor: c3" +ObservationString(1) = "CursorGoState(komi=7.5, to_play=W, history.size()=3, cursor_moves_count=0)\n\n 5 +++++\n 4 +++++\n 3 +++++\n 2 +++X+\n 1 +++++\n ABCDE\n\nCursor: c3" +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◉◯ ◯◯◯◯◯ ◉◉◉◯◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◉◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◉◯ ◯◯◯◯◯ ◉◉◉◯◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◉◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Up", "Down", "Left", "Right", "Place Stone", "Pass"] + +# Apply action "Up" +action: 0 + +# State 4 +# CursorGoState(komi=7.5, to_play=W, history.size()=4, cursor_moves_count=1) +# +# 5 +++++ +# 4 +++++ +# 3 +++++ +# 2 +++X+ +# 1 +++++ +# ABCDE +# +# Cursor: c4 +IsTerminal() = False +History() = [1, 3, 4, 0] +HistoryString() = "1, 3, 4, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1, 3, 4, 0" +InformationStateString(1) = "1, 3, 4, 0" +ObservationString(0) = "CursorGoState(komi=7.5, to_play=W, history.size()=4, cursor_moves_count=1)\n\n 5 +++++\n 4 +++++\n 3 +++++\n 2 +++X+\n 1 +++++\n ABCDE\n\nCursor: c4" +ObservationString(1) = "CursorGoState(komi=7.5, to_play=W, history.size()=4, cursor_moves_count=1)\n\n 5 +++++\n 4 +++++\n 3 +++++\n 2 +++X+\n 1 +++++\n ABCDE\n\nCursor: c4" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286, 0.14286] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Up", "Down", "Left", "Right", "Place Stone", "Pass"] + +# Apply action "Pass" +action: 5 + +# State 5 +# Apply action "Right" +action: 3 + +# State 6 +# Apply action "Left" +action: 2 + +# State 7 +# Apply action "Right" +action: 3 + +# State 8 +# Apply action "Place Stone" +action: 4 + +# State 9 +# CursorGoState(komi=7.5, to_play=W, history.size()=9, cursor_moves_count=0) +# +# 5 +++++ +# 4 +++++ +# 3 +++++ +# 2 +++XX +# 1 +++++ +# ABCDE +# +# Cursor: c4 +IsTerminal() = False +History() = [1, 3, 4, 0, 5, 3, 2, 3, 4] +HistoryString() = "1, 3, 4, 0, 5, 3, 2, 3, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1, 3, 4, 0, 5, 3, 2, 3, 4" +InformationStateString(1) = "1, 3, 4, 0, 5, 3, 2, 3, 4" +ObservationString(0) = "CursorGoState(komi=7.5, to_play=W, history.size()=9, cursor_moves_count=0)\n\n 5 +++++\n 4 +++++\n 3 +++++\n 2 +++XX\n 1 +++++\n ABCDE\n\nCursor: c4" +ObservationString(1) = "CursorGoState(komi=7.5, to_play=W, history.size()=9, cursor_moves_count=0)\n\n 5 +++++\n 4 +++++\n 3 +++++\n 2 +++XX\n 1 +++++\n ABCDE\n\nCursor: c4" +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◉◉ ◯◯◯◯◯ ◉◉◉◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◉◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◉◉ ◯◯◯◯◯ ◉◉◉◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◉◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Up", "Down", "Left", "Right", "Place Stone", "Pass"] + +# Apply action "Right" +action: 3 + +# State 10 +# Apply action "Place Stone" +action: 4 + +# State 11 +# Apply action "Left" +action: 2 + +# State 12 +# Apply action "Down" +action: 1 + +# State 13 +# Apply action "Place Stone" +action: 4 + +# State 14 +# Apply action "Up" +action: 0 + +# State 15 +# Apply action "Left" +action: 2 + +# State 16 +# Apply action "Left" +action: 2 + +# State 17 +# Apply action "Left" +action: 2 + +# State 18 +# Apply action "Right" +action: 3 + +# State 19 +# Apply action "Right" +action: 3 + +# State 20 +# CursorGoState(komi=7.5, to_play=W, history.size()=20, cursor_moves_count=6) +# +# 5 +++++ +# 4 +++O+ +# 3 +++++ +# 2 +++XX +# 1 +++X+ +# ABCDE +# +# Cursor: c5 +IsTerminal() = False +History() = [1, 3, 4, 0, 5, 3, 2, 3, 4, 3, 4, 2, 1, 4, 0, 2, 2, 2, 3, 3] +HistoryString() = "1, 3, 4, 0, 5, 3, 2, 3, 4, 3, 4, 2, 1, 4, 0, 2, 2, 2, 3, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1, 3, 4, 0, 5, 3, 2, 3, 4, 3, 4, 2, 1, 4, 0, 2, 2, 2, 3, 3" +InformationStateString(1) = "1, 3, 4, 0, 5, 3, 2, 3, 4, 3, 4, 2, 1, 4, 0, 2, 2, 2, 3, 3" +ObservationString(0) = "CursorGoState(komi=7.5, to_play=W, history.size()=20, cursor_moves_count=6)\n\n 5 +++++\n 4 +++O+\n 3 +++++\n 2 +++XX\n 1 +++X+\n ABCDE\n\nCursor: c5" +ObservationString(1) = "CursorGoState(komi=7.5, to_play=W, history.size()=20, cursor_moves_count=6)\n\n 5 +++++\n 4 +++O+\n 3 +++++\n 2 +++XX\n 1 +++X+\n ABCDE\n\nCursor: c5" +ObservationTensor(0) = [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714] +ObservationTensor(1) = [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714, 0.85714] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 3, 4, 5] +StringLegalActions() = ["Down", "Left", "Right", "Place Stone", "Pass"] + +# Apply action "Right" +action: 3 + +# State 21 +# Apply action "Pass" +action: 5 + +# State 22 +# CursorGoState(komi=7.5, to_play=B, history.size()=22, cursor_moves_count=0) +# +# 5 +++++ +# 4 +++O+ +# 3 +++++ +# 2 +++XX +# 1 +++X+ +# ABCDE +# +# Cursor: d1 +IsTerminal() = False +History() = [1, 3, 4, 0, 5, 3, 2, 3, 4, 3, 4, 2, 1, 4, 0, 2, 2, 2, 3, 3, 3, 5] +HistoryString() = "1, 3, 4, 0, 5, 3, 2, 3, 4, 3, 4, 2, 1, 4, 0, 2, 2, 2, 3, 3, 3, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1, 3, 4, 0, 5, 3, 2, 3, 4, 3, 4, 2, 1, 4, 0, 2, 2, 2, 3, 3, 3, 5" +InformationStateString(1) = "1, 3, 4, 0, 5, 3, 2, 3, 4, 3, 4, 2, 1, 4, 0, 2, 2, 2, 3, 3, 3, 5" +ObservationString(0) = "CursorGoState(komi=7.5, to_play=B, history.size()=22, cursor_moves_count=0)\n\n 5 +++++\n 4 +++O+\n 3 +++++\n 2 +++XX\n 1 +++X+\n ABCDE\n\nCursor: d1" +ObservationString(1) = "CursorGoState(komi=7.5, to_play=B, history.size()=22, cursor_moves_count=0)\n\n 5 +++++\n 4 +++O+\n 3 +++++\n 2 +++XX\n 1 +++X+\n ABCDE\n\nCursor: d1" +ObservationTensor(0): +◯◯◯◉◯ ◯◯◯◯◯ ◉◉◉◯◉ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◉◉ ◯◯◯◯◯ ◉◉◉◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◉◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◉◯ ◯◯◯◯◯ ◉◉◉◯◉ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◉◉ ◯◯◯◯◯ ◉◉◉◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◉◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 5] +StringLegalActions() = ["Up", "Left", "Right", "Pass"] + +# Apply action "Pass" +action: 5 + +# State 23 +# CursorGoState(komi=7.5, history.size()=23) +# +# 5 +++++ +# 4 +++O+ +# 3 +++++ +# 2 +++XX +# 1 +++X+ +# ABCDE +IsTerminal() = True +History() = [1, 3, 4, 0, 5, 3, 2, 3, 4, 3, 4, 2, 1, 4, 0, 2, 2, 2, 3, 3, 3, 5, 5] +HistoryString() = "1, 3, 4, 0, 5, 3, 2, 3, 4, 3, 4, 2, 1, 4, 0, 2, 2, 2, 3, 3, 3, 5, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "1, 3, 4, 0, 5, 3, 2, 3, 4, 3, 4, 2, 1, 4, 0, 2, 2, 2, 3, 3, 3, 5, 5" +InformationStateString(1) = "1, 3, 4, 0, 5, 3, 2, 3, 4, 3, 4, 2, 1, 4, 0, 2, 2, 2, 3, 3, 3, 5, 5" +ObservationString(0) = "CursorGoState(komi=7.5, history.size()=23)\n\n 5 +++++\n 4 +++O+\n 3 +++++\n 2 +++XX\n 1 +++X+\n ABCDE\n" +ObservationString(1) = "CursorGoState(komi=7.5, history.size()=23)\n\n 5 +++++\n 4 +++O+\n 3 +++++\n 2 +++XX\n 1 +++X+\n ABCDE\n" +ObservationTensor(0): +◯◯◯◉◯ ◯◯◯◯◯ ◉◉◉◯◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◉◉ ◯◯◯◯◯ ◉◉◉◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◉◯ ◉◉◉◯◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◉◯ ◉◉◉◉◉ ◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◉◯ ◯◯◯◯◯ ◉◉◉◯◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◉◉ ◯◯◯◯◯ ◉◉◉◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◉◯ ◉◉◉◯◉ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◉◯ ◉◉◉◉◉ ◯◯◯◯◯ +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dark_chess(board_size=4).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dark_chess(board_size=4).txt new file mode 100644 index 0000000..0803096 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dark_chess(board_size=4).txt @@ -0,0 +1,1246 @@ +game: dark_chess(board_size=4) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Dark Chess" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["board_size", "fen"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "dark_chess" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 4674 +PolicyTensorShape() = [4674] +MaxChanceOutcomes() = 0 +GetParameters() = {board_size=4,fen=r1kr/pppp/PPPP/R1KR w - - 0 1} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = public_K_pieces: [4, 4], public_k_pieces: [4, 4], public_Q_pieces: [4, 4], public_q_pieces: [4, 4], public_R_pieces: [4, 4], public_r_pieces: [4, 4], public_B_pieces: [4, 4], public_b_pieces: [4, 4], public_N_pieces: [4, 4], public_n_pieces: [4, 4], public_P_pieces: [4, 4], public_p_pieces: [4, 4], public_empty_pieces: [4, 4], repetitions: [3], side_to_play: [2], irreversible_move_counter: [1], private_K_pieces: [4, 4], private_k_pieces: [4, 4], private_Q_pieces: [4, 4], private_q_pieces: [4, 4], private_R_pieces: [4, 4], private_r_pieces: [4, 4], private_B_pieces: [4, 4], private_b_pieces: [4, 4], private_N_pieces: [4, 4], private_n_pieces: [4, 4], private_P_pieces: [4, 4], private_p_pieces: [4, 4], private_empty_pieces: [4, 4], private_unknown_squares: [4, 4], private_left_castling: [2], private_right_castling: [2] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 442 +MaxGameLength() = 17695 +ToString() = "dark_chess(board_size=4)" + +# State 0 +# r1kr/pppp/PPPP/R1KR w - - 0 1 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "r1kr/pppp/PPPP/???? w - - 0 1" +ObservationString(1) = "????/pppp/PPPP/R1KR w - - 0 1" +ObservationTensor(0).public_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◉◯◯ + ◯◉◯◯ + ◯◉◯◯ + ◯◉◯◯ +ObservationTensor(0).public_p_pieces: ◯◯◉◯ + ◯◯◉◯ + ◯◯◉◯ + ◯◯◉◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).irreversible_move_counter: ◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_unknown_squares: ◉◉◉◯ + ◉◉◉◯ + ◉◉◉◯ + ◉◉◉◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(1).public_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◉◯◯ + ◯◉◯◯ + ◯◉◯◯ + ◯◉◯◯ +ObservationTensor(1).public_p_pieces: ◯◯◉◯ + ◯◯◉◯ + ◯◯◉◯ + ◯◯◉◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).irreversible_move_counter: ◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◉◉◉ + ◯◉◉◉ + ◯◉◉◉ + ◯◉◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [30, 117, 701, 714, 1197, 1285, 1298, 1882] +StringLegalActions() = ["Rb1", "axb3", "bxc3", "bxa3", "Kb1", "cxd3", "cxb3", "dxc3"] + +# Apply action "axb3" +action: 117 + +# State 1 +# r1kr/pPpp/1PPP/R1KR b - - 0 1 +IsTerminal() = False +History() = [117] +HistoryString() = "117" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "r1kr/pPpp/1PPP/???? b - - 0 1" +ObservationString(1) = "r1k?/pPpp/1PPP/R1KR b - - 0 1" +ObservationTensor(0).public_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◉◯◯ + ◯◉◯◯ +ObservationTensor(0).public_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◉◯ + ◯◯◉◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).irreversible_move_counter: ◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_empty_pieces: ◯◉◯◯ + ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_unknown_squares: ◉◯◉◯ + ◉◉◉◯ + ◉◉◉◉ + ◉◉◉◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(1).public_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◉◯◯ + ◯◉◯◯ +ObservationTensor(1).public_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◉◯ + ◯◯◉◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).irreversible_move_counter: ◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◉◯◯ + ◉◯◯◉ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◉◯ + ◯◉◉◯ + ◯◉◉◉ + ◯◉◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [30, 89, 117, 1197, 1225, 1285, 1298, 1882] +StringLegalActions() = ["Rb4", "a2", "axb2", "Kb4", "Kxb3", "cxd2", "cxb2", "dxc2"] + +# Apply action "cxd2" +action: 1285 + +# State 2 +# r1kr/pP1p/1PPp/R1KR w - - 0 2 +IsTerminal() = False +History() = [117, 1285] +HistoryString() = "117, 1285" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "r1kr/pP1p/1PPp/??K? w - - 0 2" +ObservationString(1) = "r1k?/pP1p/1PPp/R1KR w - - 0 2" +ObservationTensor(0).public_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◉◯◯ + ◯◯◯◯ +ObservationTensor(0).public_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◉◉◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).irreversible_move_counter: ◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_empty_pieces: ◯◉◯◯ + ◯◯◯◉ + ◯◯◉◯ + ◯◯◯◯ +ObservationTensor(0).private_unknown_squares: ◉◯◉◯ + ◉◉◉◯ + ◉◉◯◉ + ◉◉◉◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(1).public_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◉◯◯ + ◯◯◯◯ +ObservationTensor(1).public_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◉◉◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).irreversible_move_counter: ◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◉◯◯ + ◉◯◯◉ + ◯◯◉◯ + ◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◉◯ + ◯◉◉◯ + ◉◉◯◉ + ◯◉◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [16, 17, 30, 714, 730, 731, 732, 733, 734, 735, 736, 737, 738, 746, 774, 787, 1197, 1212, 1257, 1285, 1768] +StringLegalActions() = ["Ra2", "Rxa3", "Rb1", "bxa3", "b4=R", "bxc4=R", "bxa4=R", "b4=B", "bxc4=B", "bxa4=B", "b4=N", "bxc4=N", "bxa4=N", "b4=Q", "bxc4=Q", "bxa4=Q", "Kb1", "Kxd2", "c3", "cxd3", "Rxd2"] + +# Apply action "cxd3" +action: 1285 + +# State 3 +# r1kr/pP1P/1P1p/R1KR b - - 0 2 +IsTerminal() = False +History() = [117, 1285, 1285] +HistoryString() = "117, 1285, 1285" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "r1kr/pP1P/1P?p/??K? b - - 0 2" +ObservationString(1) = "r1k?/pP?P/1P1p/R1KR b - - 0 2" +ObservationTensor(0).public_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◯◯◯ + ◯◯◉◯ +ObservationTensor(0).public_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◉◯◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).irreversible_move_counter: ◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_empty_pieces: ◯◉◯◯ + ◯◯◯◉ + ◯◯◉◯ + ◯◯◯◯ +ObservationTensor(0).private_unknown_squares: ◉◯◉◯ + ◉◉◉◯ + ◉◉◯◉ + ◉◉◉◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(1).public_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◯◯◯ + ◯◯◉◯ +ObservationTensor(1).public_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◉◯◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).irreversible_move_counter: ◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◉◯◯ + ◉◯◯◉ + ◯◉◯◯ + ◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◉◯ + ◯◉◉◯ + ◉◯◉◉ + ◯◉◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [30, 89, 117, 1184, 1197, 1212, 1225, 1768, 1900, 1903, 1906, 1955] +StringLegalActions() = ["Rb4", "a2", "axb2", "Kc3", "Kb4", "Kxd3", "Kxb3", "Rxd3", "dxc1=R", "dxc1=B", "dxc1=N", "dxc1=Q"] + +# Apply action "dxc1=Q" +action: 1955 + +# State 4 +# r1kr/pP1P/1P2/R1qR w - - 0 3 +IsTerminal() = True +History() = [117, 1285, 1285, 1955] +HistoryString() = "117, 1285, 1285, 1955" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "r1kr/pP1P/1P2/R1qR w - - 0 3" +ObservationString(1) = "r1k?/pP?P/1P?1/R1qR w - - 0 3" +ObservationTensor(0).public_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◯◯◯ + ◯◯◉◯ +ObservationTensor(0).public_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).irreversible_move_counter: ◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_empty_pieces: ◯◉◯◯ + ◯◯◯◉ + ◯◉◉◯ + ◯◉◯◯ +ObservationTensor(0).private_unknown_squares: ◉◯◉◯ + ◉◉◉◯ + ◉◯◯◉ + ◉◯◉◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(1).public_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◯◯◯ + ◯◯◉◯ +ObservationTensor(1).public_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).irreversible_move_counter: ◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◉◯◯ + ◯◯◯◉ + ◯◯◯◯ + ◯◉◯◯ +ObservationTensor(1).private_unknown_squares: ◉◯◉◯ + ◉◉◉◯ + ◉◉◉◉ + ◉◯◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dark_chess.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dark_chess.txt new file mode 100644 index 0000000..92c39e4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dark_chess.txt @@ -0,0 +1,6238 @@ +game: dark_chess + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Dark Chess" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["board_size", "fen"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "dark_chess" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 4674 +PolicyTensorShape() = [4674] +MaxChanceOutcomes() = 0 +GetParameters() = {board_size=8,fen=rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = public_K_pieces: [8, 8], public_k_pieces: [8, 8], public_Q_pieces: [8, 8], public_q_pieces: [8, 8], public_R_pieces: [8, 8], public_r_pieces: [8, 8], public_B_pieces: [8, 8], public_b_pieces: [8, 8], public_N_pieces: [8, 8], public_n_pieces: [8, 8], public_P_pieces: [8, 8], public_p_pieces: [8, 8], public_empty_pieces: [8, 8], repetitions: [3], side_to_play: [2], irreversible_move_counter: [1], private_K_pieces: [8, 8], private_k_pieces: [8, 8], private_Q_pieces: [8, 8], private_q_pieces: [8, 8], private_R_pieces: [8, 8], private_r_pieces: [8, 8], private_B_pieces: [8, 8], private_b_pieces: [8, 8], private_N_pieces: [8, 8], private_n_pieces: [8, 8], private_P_pieces: [8, 8], private_p_pieces: [8, 8], private_empty_pieces: [8, 8], private_unknown_squares: [8, 8], private_left_castling: [2], private_right_castling: [2] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 1738 +MaxGameLength() = 17695 +ToString() = "dark_chess()" + +# State 0 +# rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "rnbqkbnr/pppppppp/8/8/????????/????????/????????/???????? w kq - 0 1" +ObservationString(1) = "????????/????????/????????/????????/8/8/PPPPPPPP/RNBQKBNR w KQ - 0 1" +ObservationTensor(0).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).irreversible_move_counter: ◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ +ObservationTensor(0).private_unknown_squares: ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(1).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).irreversible_move_counter: ◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [89, 90, 652, 656, 673, 674, 1257, 1258, 1841, 1842, 2425, 2426, 3009, 3010, 3572, 3576, 3593, 3594, 4177, 4178] +StringLegalActions() = ["a3", "a4", "Na3", "Nc3", "b3", "b4", "c3", "c4", "d3", "d4", "e3", "e4", "f3", "f4", "Nf3", "Nh3", "g3", "g4", "h3", "h4"] + +# Apply action "Nh3" +action: 3576 + +# State 1 +# rnbqkbnr/pppppppp/8/8/8/7N/PPPPPPPP/RNBQKB1R b KQkq - 1 1 +IsTerminal() = False +History() = [3576] +HistoryString() = "3576" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "rnbqkbnr/pppppppp/8/8/????????/????????/????????/???????? b kq - 1 1" +ObservationString(1) = "????????/????????/????????/??????1?/7?/7N/PPPPPPPP/RNBQKB1R b KQ - 1 1" +ObservationTensor(0).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).irreversible_move_counter = [0.01] +ObservationTensor(0).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ +ObservationTensor(0).private_unknown_squares: ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(1).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).irreversible_move_counter = [0.01] +ObservationTensor(1).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◉◯◉◉◉◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◯◉◉◉ + ◯◯◯◉◉◉◉◉ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [89, 90, 652, 656, 673, 674, 1257, 1258, 1841, 1842, 2425, 2426, 3009, 3010, 3572, 3576, 3593, 3594, 4177, 4178] +StringLegalActions() = ["a6", "a5", "Na6", "Nc6", "b6", "b5", "c6", "c5", "d6", "d5", "e6", "e5", "f6", "f5", "Nf6", "Nh6", "g6", "g5", "h6", "h5"] + +# Apply action "d6" +action: 1841 + +# State 2 +# rnbqkbnr/ppp1pppp/3p4/8/8/7N/PPPPPPPP/RNBQKB1R w KQkq - 0 2 +IsTerminal() = False +History() = [3576, 1841] +HistoryString() = "3576, 1841" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "rnbqkbnr/ppp1pppp/3p4/8/??????1?/???????N/????????/???????? w kq - 0 2" +ObservationString(1) = "????????/????????/????????/??????1?/7?/7N/PPPPPPPP/RNBQKB1R w KQ - 0 2" +ObservationTensor(0).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).irreversible_move_counter: ◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◯◉◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◉◉◉◯◯ + ◯◯◯◯◉◉◯◯ +ObservationTensor(0).private_unknown_squares: ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◯◯◯◯◯ + ◉◉◯◉◯◯◯◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(1).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).irreversible_move_counter: ◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◉◯◉◉◉◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◯◉◉◉ + ◯◯◯◉◉◉◉◉ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [89, 90, 652, 656, 673, 674, 1257, 1258, 1841, 1842, 2425, 2426, 3009, 3010, 3593, 3594, 4117, 4300, 4301, 4302] +StringLegalActions() = ["a3", "a4", "Na3", "Nc3", "b3", "b4", "c3", "c4", "d3", "d4", "e3", "e4", "f3", "f4", "g3", "g4", "Rg1", "Nf4", "Ng1", "Ng5"] + +# Apply action "Rg1" +action: 4117 + +# State 3 +# rnbqkbnr/ppp1pppp/3p4/8/8/7N/PPPPPPPP/RNBQKBR1 b Qkq - 1 2 +IsTerminal() = False +History() = [3576, 1841, 4117] +HistoryString() = "3576, 1841, 4117" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "rnbqkbnr/ppp1pppp/3p4/8/??????1?/???????N/????????/???????? b kq - 1 2" +ObservationString(1) = "????????/????????/????????/??????1?/7?/7N/PPPPPPPP/RNBQKBR1 b Q - 1 2" +ObservationTensor(0).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).irreversible_move_counter = [0.01] +ObservationTensor(0).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◯◉◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◉◉◉◯◯ + ◯◯◯◯◉◉◯◯ +ObservationTensor(0).private_unknown_squares: ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◯◯◯◯◯ + ◉◉◯◉◯◯◯◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(1).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).irreversible_move_counter = [0.01] +ObservationTensor(1).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◉◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◯◉◉◉ + ◯◯◯◉◉◉◉◉ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [89, 90, 652, 654, 656, 673, 674, 1212, 1213, 1214, 1215, 1216, 1257, 1258, 1768, 1914, 2393, 2425, 2426, 3009, 3010, 3572, 3576, 3593, 3594, 4177, 4178] +StringLegalActions() = ["a6", "a5", "Na6", "Nd7", "Nc6", "b6", "b5", "Bd7", "Be6", "Bf5", "Bg4", "Bxh3", "c6", "c5", "Qd7", "d5", "Kd7", "e6", "e5", "f6", "f5", "Nf6", "Nh6", "g6", "g5", "h6", "h5"] + +# Apply action "Be6" +action: 1213 + +# State 4 +# rn1qkbnr/ppp1pppp/3pb3/8/8/7N/PPPPPPPP/RNBQKBR1 w Qkq - 2 3 +IsTerminal() = False +History() = [3576, 1841, 4117, 1213] +HistoryString() = "3576, 1841, 4117, 1213" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "rn1qkbnr/ppp1pppp/3pb3/4?3/??1???1?/?1?????N/P???????/???????? w kq - 2 3" +ObservationString(1) = "????????/????????/????????/??????1?/7?/7N/PPPPPPPP/RNBQKBR1 w Q - 2 3" +ObservationTensor(0).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).irreversible_move_counter = [0.02] +ObservationTensor(0).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯◉◉◯◯ + ◯◯◉◯◉◉◯◯ + ◯◯◯◉◉◉◯◉ + ◯◯◯◯◉◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◉◉◉◯◯ + ◯◯◯◯◉◉◯◯ +ObservationTensor(0).private_unknown_squares: ◉◯◉◉◯◯◯◯ + ◉◉◯◉◯◯◯◯ + ◉◉◉◯◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◉◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◯◯◯◯◯ + ◉◉◯◉◯◯◯◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(1).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).irreversible_move_counter = [0.02] +ObservationTensor(1).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◉◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◯◉◉◉ + ◯◯◯◉◉◉◉◉ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [89, 90, 652, 656, 673, 674, 1257, 1258, 1841, 1842, 2425, 2426, 3009, 3010, 3534, 3593, 3594, 4300, 4302] +StringLegalActions() = ["a3", "a4", "Na3", "Nc3", "b3", "b4", "c3", "c4", "d3", "d4", "e3", "e4", "f3", "f4", "Rh1", "g3", "g4", "Nf4", "Ng5"] + +# Apply action "e4" +action: 2426 + +# State 5 +# rn1qkbnr/ppp1pppp/3pb3/8/4P3/7N/PPPP1PPP/RNBQKBR1 b Qkq - 0 3 +IsTerminal() = False +History() = [3576, 1841, 4117, 1213, 2426] +HistoryString() = "3576, 1841, 4117, 1213, 2426" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "rn1qkbnr/ppp1pppp/3pb3/4?3/??1???1?/?1?????N/P???????/???????? b kq - 0 3" +ObservationString(1) = "????????/????????/1???????/?1??1?2/4P2?/4?2N/PPPP1PPP/RNBQKBR1 b Q - 0 3" +ObservationTensor(0).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).irreversible_move_counter: ◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯◉◉◯◯ + ◯◯◉◯◉◉◯◯ + ◯◯◯◉◉◉◯◉ + ◯◯◯◯◉◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◉◯◯ + ◯◯◯◉◉◉◯◯ + ◯◯◯◯◉◉◯◯ +ObservationTensor(0).private_unknown_squares: ◉◯◉◉◯◯◯◯ + ◉◉◯◉◯◯◯◯ + ◉◉◉◯◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◉◯◯◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◯◯◯◯◯ + ◉◉◯◉◯◯◯◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(1).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).irreversible_move_counter: ◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◉◉◯◉◯◯ + ◯◯◉◉◉◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◉◯◯◉◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◉◉◯◯◯ + ◉◯◯◯◉◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◯◯◉◯◉◉ + ◯◯◯◯◯◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◉◯◯◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◯◉◉◉ + ◯◯◯◉◯◉◉◉ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [89, 90, 652, 654, 656, 673, 674, 1257, 1258, 1768, 1781, 1914, 2393, 2524, 2525, 2526, 2527, 2528, 2536, 2537, 2538, 2539, 3009, 3010, 3572, 3576, 3593, 3594, 4177, 4178] +StringLegalActions() = ["a6", "a5", "Na6", "Nd7", "Nc6", "b6", "b5", "c6", "c5", "Qd7", "Qc8", "d5", "Kd7", "Bc8", "Bd7", "Bf5", "Bg4", "Bxh3", "Bxa2", "Bb3", "Bc4", "Bd5", "f6", "f5", "Nf6", "Nh6", "g6", "g5", "h6", "h5"] + +# Apply action "f5" +action: 3010 + +# State 6 +# Apply action "Bc4" +action: 2975 + +# State 7 +# Apply action "f4" +action: 3155 + +# State 8 +# Apply action "f3" +action: 3009 + +# State 9 +# Apply action "Qd7" +action: 1768 + +# State 10 +# Apply action "Ba6" +action: 1443 + +# State 11 +# Apply action "Nc6" +action: 656 + +# State 12 +# Apply action "c4" +action: 1258 + +# State 13 +# Apply action "Nh6" +action: 3576 + +# State 14 +# Apply action "c5" +action: 1403 + +# State 15 +# Apply action "g6" +action: 3593 + +# State 16 +# Apply action "Ke2" +action: 2352 + +# State 17 +# Apply action "Ng4" +action: 4302 + +# State 18 +# Apply action "Bxb7" +action: 409 + +# State 19 +# Apply action "Nxh2" +action: 3868 + +# State 20 +# r3kb1r/pBpqp2p/2npb1p1/2P5/4Pp2/5P1N/PP1PK1Pn/RNBQ2R1 w kq - 0 11 +IsTerminal() = False +History() = [3576, 1841, 4117, 1213, 2426, 3010, 2975, 3155, 3009, 1768, 1443, 656, 1258, 3576, 1403, 3593, 2352, 4302, 409, 3868] +HistoryString() = "3576, 1841, 4117, 1213, 2426, 3010, 2975, 3155, 3009, 1768, 1443, 656, 1258, 3576, 1403, 3593, 2352, 4302, 409, 3868" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "r3kb1r/p?pqp2p/1?npb?p1/1?P5/?3?p1?/?1???P?N/P??????n/?????1?? w kq - 0 11" +ObservationString(1) = "r?1?????/?B??????/1?np????/??P?1?1?/2?1Pp1?/5P1N/PP1PK1P?/RNBQ2R1 w - - 0 11" +ObservationTensor(0).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).irreversible_move_counter: ◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◉◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯◉◉◯◯ + ◯◯◉◉◯◯◯◉ + ◯◯◯◉◯◯◯◉ + ◯◯◯◉◉◯◯◉ + ◯◯◯◯◉◯◯◯ + ◉◯◯◯◉◯◉◯ + ◯◯◯◉◉◯◉◉ + ◯◯◯◯◉◉◯◯ +ObservationTensor(0).private_unknown_squares: ◉◯◉◉◯◯◯◯ + ◉◉◯◯◉◉◉◯ + ◉◉◉◯◉◯◯◯ + ◉◉◉◯◯◉◯◯ + ◉◉◉◉◯◯◯◯ + ◯◉◯◯◯◉◯◯ + ◉◉◉◯◯◯◯◯ + ◉◯◯◉◯◯◯◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(1).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).irreversible_move_counter: ◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◉◉◯◉◯◯ + ◯◯◉◉◯◯◯◯ + ◯◉◉◯◯◯◯◉ + ◯◯◉◉◯◯◯◯ + ◉◯◉◯◉◯◯◯ + ◉◉◯◯◯◯◯◯ + ◯◯◉◉◉◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◯◯◉◯◉◯ + ◯◯◯◯◉◉◯◉ + ◯◯◯◉◉◯◉◯ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◯◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◯◉◉◉ + ◯◉◯◉◉◉◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [89, 90, 652, 656, 673, 674, 1065, 1066, 1079, 1080, 1504, 1782, 1783, 1807, 1808, 1809, 1841, 1842, 2424, 2425, 2439, 2466, 2467, 2571, 3532, 3533, 3534, 3593, 3594, 4299, 4300, 4302] +StringLegalActions() = ["a3", "a4", "Na3", "Nc3", "b3", "b4", "Ba6", "Bc8", "Bxa8", "Bxc6", "cxd6", "Qe1", "Qf1", "Qa4", "Qb3", "Qc2", "d3", "d4", "Ke1", "Ke3", "Kf2", "Kd3", "Kf1", "e5", "Re1", "Rf1", "Rh1", "g3", "g4", "Nf2", "Nxf4", "Ng5"] + +# Apply action "Rh1" +action: 3534 + +# State 21 +# r3kb1r/pBpqp2p/2npb1p1/2P5/4Pp2/5P1N/PP1PK1Pn/RNBQ3R b kq - 1 11 +IsTerminal() = False +History() = [3576, 1841, 4117, 1213, 2426, 3010, 2975, 3155, 3009, 1768, 1443, 656, 1258, 3576, 1403, 3593, 2352, 4302, 409, 3868, 3534] +HistoryString() = "3576, 1841, 4117, 1213, 2426, 3010, 2975, 3155, 3009, 1768, 1443, 656, 1258, 3576, 1403, 3593, 2352, 4302, 409, 3868, 3534" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "r3kb1r/p?pqp2p/1?npb?p1/1?P5/?3?p1?/?1???P?N/P??????n/?????1?? b kq - 1 11" +ObservationString(1) = "r?1?????/?B??????/1?np????/??P?1?1?/2?1Pp1?/5P1N/PP1PK1Pn/RNBQ3R b - - 1 11" +ObservationTensor(0).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).irreversible_move_counter = [0.01] +ObservationTensor(0).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◉◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯◉◉◯◯ + ◯◯◉◉◯◯◯◉ + ◯◯◯◉◯◯◯◉ + ◯◯◯◉◉◯◯◉ + ◯◯◯◯◉◯◯◯ + ◉◯◯◯◉◯◉◯ + ◯◯◯◉◉◯◉◉ + ◯◯◯◯◉◉◯◯ +ObservationTensor(0).private_unknown_squares: ◉◯◉◉◯◯◯◯ + ◉◉◯◯◉◉◉◯ + ◉◉◉◯◉◯◯◯ + ◉◉◉◯◯◉◯◯ + ◉◉◉◉◯◯◯◯ + ◯◉◯◯◯◉◯◯ + ◉◉◉◯◯◯◯◯ + ◉◯◯◉◯◯◯◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(1).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).irreversible_move_counter = [0.01] +ObservationTensor(1).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◉◉◯◉◯◯ + ◯◯◉◉◯◯◯◯ + ◯◉◉◯◯◯◯◉ + ◯◯◉◉◯◯◯◯ + ◉◯◉◯◉◯◯◯ + ◉◉◯◯◯◯◯◯ + ◉◯◉◉◉◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◯◯◉◯◉◯ + ◯◯◯◯◉◉◯◉ + ◯◯◯◉◉◯◉◯ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◯◉◉◉ + ◯◯◯◯◉◉◉◉ + ◯◯◯◯◯◉◉◉ + ◯◯◯◉◉◉◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [30, 31, 32, 89, 90, 1380, 1381, 1382, 1384, 1385, 1386, 1840, 1868, 1914, 1955, 2365, 2380, 2526, 2527, 2528, 2536, 2537, 2538, 2539, 2540, 2541, 2964, 2965, 3666, 4117, 4177, 4178, 4591, 4592, 4593, 4672] +StringLegalActions() = ["Rb8", "Rc8", "Rd8", "a6", "a5", "Na5", "Nb8", "Nb4", "Ne5", "Nd8", "Nd4", "Qd8", "Qc8", "d5", "dxc5", "Kd8", "Kf7", "Bf5", "Bg4", "Bxh3", "Bxa2", "Bb3", "Bc4", "Bd5", "Bf7", "Bg8", "Bg7", "Bh6", "g5", "Rg8", "h6", "h5", "Nxf3", "Nf1", "Ng4", "O-O-O"] + +# Apply action "d5" +action: 1914 + +# State 22 +# Apply action "Re1" +action: 4115 + +# State 23 +# Apply action "d4" +action: 1987 + +# State 24 +# Apply action "b3" +action: 673 + +# State 25 +# Apply action "h6" +action: 4177 + +# State 26 +# Apply action "Ng1" +action: 4301 + +# State 27 +# Apply action "Kf7" +action: 2380 + +# State 28 +# Apply action "Bxc6" +action: 1080 + +# State 29 +# Apply action "Qd8" +action: 1840 + +# State 30 +# Apply action "Be8" +action: 1578 + +# State 31 +# Apply action "Bxb3" +action: 2537 + +# State 32 +# Apply action "Ba4" +action: 2887 + +# State 33 +# Apply action "Kg7" +action: 3023 + +# State 34 +# Apply action "Bc6" +action: 264 + +# State 35 +# Apply action "Bd5" +action: 1008 + +# State 36 +# Apply action "Bb5" +action: 1576 + +# State 37 +# Apply action "Bg8" +action: 2031 + +# State 38 +# Apply action "c6" +action: 1476 + +# State 39 +# Apply action "Kf7" +action: 3606 + +# State 40 +# r2q1bbr/p1p1pk2/2P3pp/1B6/3pPp2/5P2/P2PK1Pn/RNBQR1N1 w - - 1 21 +IsTerminal() = False +History() = [3576, 1841, 4117, 1213, 2426, 3010, 2975, 3155, 3009, 1768, 1443, 656, 1258, 3576, 1403, 3593, 2352, 4302, 409, 3868, 3534, 1914, 4115, 1987, 673, 4177, 4301, 2380, 1080, 1840, 1578, 2537, 2887, 3023, 264, 1008, 1576, 2031, 1476, 3606] +HistoryString() = "3576, 1841, 4117, 1213, 2426, 3010, 2975, 3155, 3009, 1768, 1443, 656, 1258, 3576, 1403, 3593, 2352, 4302, 409, 3868, 3534, 1914, 4115, 1987, 673, 4177, 4301, 2380, 1080, 1840, 1578, 2537, 2887, 3023, 264, 1008, 1576, 2031, 1476, 3606" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "r2q1bbr/p?p1pk2/1??3pp/1??2?2/???p?p1?/???1?P??/???????n/?????1?? w - - 1 21" +ObservationString(1) = "????????/????????/1?P?????/?B??1???/1?1?P?1?/5P2/P2PK1P?/RNBQR1N? w - - 1 21" +ObservationTensor(0).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).irreversible_move_counter = [0.01] +ObservationTensor(0).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯◉◉◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◉ + ◯◯◉◯◉◉◉◯ + ◯◯◯◯◉◉◯◉ + ◉◯◯◯◯◉◯◯ + ◯◯◯◉◉◯◉◯ + ◯◯◯◯◉◯◉◯ +ObservationTensor(0).private_unknown_squares: ◉◉◉◉◯◯◯◯ + ◉◉◉◉◉◉◉◯ + ◉◉◉◉◉◉◯◯ + ◉◉◯◯◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◯◉◯◯◉◯◯◯ + ◉◉◉◯◯◯◯◯ + ◉◯◉◉◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(1).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).irreversible_move_counter = [0.01] +ObservationTensor(1).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◉◉◯◉◯◯ + ◯◉◉◯◯◯◯◯ + ◯◉◉◉◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◉◯◉◯◯◯ + ◉◉◯◯◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◯◯◉◯◉◉ + ◯◯◯◉◯◉◉◉ + ◯◯◯◯◉◯◉◉ + ◯◯◯◉◉◉◉◉ + ◯◯◯◯◯◉◉◉ + ◯◯◯◉◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◉◉◯◉◉◉◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [89, 90, 652, 656, 919, 933, 934, 935, 1224, 1225, 1807, 1808, 1809, 1841, 2366, 2425, 2439, 2466, 2467, 2571, 3576, 3593, 3594] +StringLegalActions() = ["a3", "a4", "Na3", "Nc3", "Ba4", "Ba6", "Bc4", "Bd3", "Ba3", "Bb2", "Qa4", "Qb3", "Qc2", "d3", "Rf1", "Ke3", "Kf2", "Kd3", "Kf1", "e5", "Nh3", "g3", "g4"] + +# Apply action "Qb3" +action: 1808 + +# State 41 +# r2q1bbr/p1p1pk2/2P3pp/1B6/3pPp2/1Q3P2/P2PK1Pn/RNB1R1N1 b - - 2 21 +IsTerminal() = False +History() = [3576, 1841, 4117, 1213, 2426, 3010, 2975, 3155, 3009, 1768, 1443, 656, 1258, 3576, 1403, 3593, 2352, 4302, 409, 3868, 3534, 1914, 4115, 1987, 673, 4177, 4301, 2380, 1080, 1840, 1578, 2537, 2887, 3023, 264, 1008, 1576, 2031, 1476, 3606, 1808] +HistoryString() = "3576, 1841, 4117, 1213, 2426, 3010, 2975, 3155, 3009, 1768, 1443, 656, 1258, 3576, 1403, 3593, 2352, 4302, 409, 3868, 3534, 1914, 4115, 1987, 673, 4177, 4301, 2380, 1080, 1840, 1578, 2537, 2887, 3023, 264, 1008, 1576, 2031, 1476, 3606, 1808" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "r2q1bbr/p?p1pk2/1??3pp/1??2?2/???p?p1?/???1?P??/???????n/?????1?? b - - 2 21" +ObservationString(1) = "????????/?????k??/1?P?1???/?B?2???/3?P?1?/1Q3P2/P2PK1P?/RNB1R1N? b - - 2 21" +ObservationTensor(0).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).irreversible_move_counter = [0.02] +ObservationTensor(0).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯◉◉◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◉ + ◯◯◉◯◉◉◉◯ + ◯◯◯◯◉◉◯◉ + ◉◯◯◯◯◉◯◯ + ◯◯◯◉◉◯◉◯ + ◯◯◯◯◉◯◉◯ +ObservationTensor(0).private_unknown_squares: ◉◉◉◉◯◯◯◯ + ◉◉◉◉◉◉◉◯ + ◉◉◉◉◉◉◯◯ + ◉◉◯◯◯◯◯◯ + ◉◉◉◉◯◯◯◯ + ◯◉◯◯◉◯◯◯ + ◉◉◉◯◯◯◯◯ + ◉◯◉◉◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(1).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).irreversible_move_counter = [0.02] +ObservationTensor(1).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◉◉◯◉◯◯ + ◯◉◯◉◯◯◯◯ + ◯◉◉◉◯◯◯◯ + ◉◯◉◯◉◯◯◯ + ◯◯◉◯◉◉◯◯ + ◉◉◯◯◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◯◯◉◯◉◉ + ◯◯◯◯◯◉◉◉ + ◯◯◯◯◉◯◉◉ + ◯◯◯◉◯◉◉◉ + ◯◯◯◯◯◯◉◉ + ◯◯◯◉◉◉◯◉ + ◯◯◯◯◉◉◉◉ + ◉◉◯◉◉◉◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [30, 31, 89, 90, 1768, 1769, 1770, 1780, 1781, 1782, 2060, 2425, 2426, 2964, 3009, 3023, 3036, 3050, 3548, 3666, 4104, 4250, 4591, 4592, 4593] +StringLegalActions() = ["Rb8", "Rc8", "a6", "a5", "Qd7", "Qd6", "Qd5", "Qb8", "Qc8", "Qe8", "d3", "e6", "e5", "Bg7", "Kf6", "Kg7", "Ke8", "Ke6", "Bh7", "g5", "Rh7", "h5", "Nxf3", "Nf1", "Ng4"] + +# Apply action "Qd5" +action: 1770 + +# State 42 +# Apply action "Ke3" +action: 2425 + +# State 43 +# Apply action "Rc8" +action: 31 + +# State 44 +# Apply action "Bb2" +action: 1225 + +# State 45 +# Apply action "Ng4" +action: 4593 + +# State 46 +# Apply action "Qa3" +action: 759 + +# State 47 +# Apply action "Qg5" +action: 2003 + +# State 48 +# Apply action "Qb3" +action: 176 + +# State 49 +# Apply action "Qf6" +action: 3766 + +# State 50 +# Apply action "Ba4" +action: 919 + +# State 51 +# Apply action "Kg7" +action: 3023 + +# State 52 +# Apply action "Bc1" +action: 715 + +# State 53 +# Apply action "Re8" +action: 1199 + +# State 54 +# Apply action "Kxf4" +action: 2526 + +# State 55 +# Apply action "Qh4" +action: 3111 + +# State 56 +# Apply action "Kxg4" +action: 3169 + +# State 57 +# Apply action "Ra8" +action: 2362 + +# State 58 +# Apply action "Kxh4" +action: 3753 + +# State 59 +# Apply action "g5" +action: 3666 + +# State 60 +# r4bbr/p1p1p1k1/2P4p/6p1/B2pP2K/1Q3P2/P2P2P1/RNB1R1N1 w - - 0 31 +IsTerminal() = False +History() = [3576, 1841, 4117, 1213, 2426, 3010, 2975, 3155, 3009, 1768, 1443, 656, 1258, 3576, 1403, 3593, 2352, 4302, 409, 3868, 3534, 1914, 4115, 1987, 673, 4177, 4301, 2380, 1080, 1840, 1578, 2537, 2887, 3023, 264, 1008, 1576, 2031, 1476, 3606, 1808, 1770, 2425, 31, 1225, 4593, 759, 2003, 176, 3766, 919, 3023, 715, 1199, 2526, 3111, 3169, 2362, 3753, 3666] +HistoryString() = "3576, 1841, 4117, 1213, 2426, 3010, 2975, 3155, 3009, 1768, 1443, 656, 1258, 3576, 1403, 3593, 2352, 4302, 409, 3868, 3534, 1914, 4115, 1987, 673, 4177, 4301, 2380, 1080, 1840, 1578, 2537, 2887, 3023, 264, 1008, 1576, 2031, 1476, 3606, 1808, 1770, 2425, 31, 1225, 4593, 759, 2003, 176, 3766, 919, 3023, 715, 1199, 2526, 3111, 3169, 2362, 3753, 3666" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "r4bbr/p?p?p1k1/1???3p/1??2?p1/??1p??1K/?Q?1????/????????/???????? w - - 0 31" +ObservationString(1) = "?1????b?/?1???1??/?1P?1???/?1?2?p1/B2?P2K/1Q3P2/P2P1?P?/RNB1R1N? w - - 0 31" +ObservationTensor(0).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).irreversible_move_counter: ◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯◉◉◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◉ + ◯◯◉◯◯◯◯◉ + ◯◯◯◯◉◯◯◉ + ◯◯◯◯◯◉◯◯ + ◯◯◯◉◯◉◯◯ + ◯◯◯◯◉◯◉◯ +ObservationTensor(0).private_unknown_squares: ◉◉◉◉◯◯◯◯ + ◉◉◉◉◉◉◉◯ + ◉◉◉◉◉◉◯◯ + ◉◉◯◯◉◉◉◯ + ◉◉◉◉◯◉◯◯ + ◉◉◉◉◉◯◉◯ + ◉◉◉◯◉◯◯◉ + ◉◉◉◉◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(1).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).irreversible_move_counter: ◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◉◯◯◯◯◯ + ◯◉◯◉◉◉◉◉ + ◯◉◉◯◯◯◯◯ + ◉◯◉◯◯◯◯◯ + ◯◉◉◯◉◯◯◯ + ◉◯◯◉◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◉◯◉◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◯◯◉◉◉◉ + ◯◯◉◯◯◯◯◯ + ◯◯◯◉◉◯◉◉ + ◯◯◯◉◉◉◉◉ + ◯◯◯◯◯◉◉◉ + ◯◉◯◯◉◉◉◉ + ◯◯◯◯◉◉◉◉ + ◉◉◯◉◯◉◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [89, 263, 652, 656, 745, 746, 747, 748, 749, 750, 759, 760, 761, 762, 774, 775, 776, 777, 778, 788, 789, 1224, 1225, 1841, 2352, 2353, 2365, 2366, 2571, 3082, 3570, 3576, 3593, 3594, 4322, 4323, 4336, 4350, 4364] +StringLegalActions() = ["a3", "Bb5", "Na3", "Nc3", "Qb2", "Qb4", "Qb5", "Qb6", "Qb7", "Qb8", "Qa3", "Qc3", "Qd3", "Qe3", "Qc4", "Qd5", "Qe6", "Qf7", "Qxg8", "Qc2", "Qd1", "Ba3", "Bb2", "d3", "Re2", "Re3", "Rd1", "Rf1", "e5", "f4", "Ne2", "Nh3", "g3", "g4", "Kh3", "Kh5", "Kg4", "Kg3", "Kxg5"] + +# Apply action "Qe6" +action: 776 + +# State 61 +# r4bbr/p1p1p1k1/2P1Q2p/6p1/B2pP2K/5P2/P2P2P1/RNB1R1N1 b - - 1 31 +IsTerminal() = False +History() = [3576, 1841, 4117, 1213, 2426, 3010, 2975, 3155, 3009, 1768, 1443, 656, 1258, 3576, 1403, 3593, 2352, 4302, 409, 3868, 3534, 1914, 4115, 1987, 673, 4177, 4301, 2380, 1080, 1840, 1578, 2537, 2887, 3023, 264, 1008, 1576, 2031, 1476, 3606, 1808, 1770, 2425, 31, 1225, 4593, 759, 2003, 176, 3766, 919, 3023, 715, 1199, 2526, 3111, 3169, 2362, 3753, 3666, 776] +HistoryString() = "3576, 1841, 4117, 1213, 2426, 3010, 2975, 3155, 3009, 1768, 1443, 656, 1258, 3576, 1403, 3593, 2352, 4302, 409, 3868, 3534, 1914, 4115, 1987, 673, 4177, 4301, 2380, 1080, 1840, 1578, 2537, 2887, 3023, 264, 1008, 1576, 2031, 1476, 3606, 1808, 1770, 2425, 31, 1225, 4593, 759, 2003, 176, 3766, 919, 3023, 715, 1199, 2526, 3111, 3169, 2362, 3753, 3666, 776" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "r4bbr/p?p?p1k1/1???Q2p/1?????p1/???p??1K/???1????/????????/???????? b - - 1 31" +ObservationString(1) = "??1???b?/???1p1??/??P1Q2p/?1?3p1/B?1?P2K/5P2/P2P1?P?/RNB1R1N? b - - 1 31" +ObservationTensor(0).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).irreversible_move_counter = [0.01] +ObservationTensor(0).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯◉◉◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◉ + ◯◯◉◯◯◯◯◉ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◉◯◯ + ◯◯◯◉◯◉◯◯ + ◯◯◯◯◉◯◉◯ +ObservationTensor(0).private_unknown_squares: ◉◉◉◉◯◯◯◯ + ◉◉◉◉◉◉◉◯ + ◉◉◉◉◉◉◯◯ + ◉◉◯◯◉◉◉◯ + ◉◉◉◉◉◉◯◯ + ◉◉◉◉◉◯◉◯ + ◉◉◉◯◉◯◯◉ + ◉◉◉◉◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(1).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).irreversible_move_counter = [0.01] +ObservationTensor(1).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◉◯◯◯◯◯ + ◯◉◉◯◉◯◯◯ + ◯◉◉◉◯◯◯◉ + ◉◯◉◯◉◉◉◯ + ◯◉◉◯◉◯◯◯ + ◉◯◯◉◉◉◯◯ + ◯◯◉◉◯◉◯◯ + ◯◯◉◯◉◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◯◯◉◉◉◉ + ◯◯◯◉◯◉◉◉ + ◯◯◯◯◉◯◉◯ + ◯◯◯◉◯◯◯◉ + ◯◯◯◯◯◉◯◉ + ◯◉◯◯◯◯◉◉ + ◯◯◯◯◉◯◉◉ + ◉◉◯◉◯◯◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [30, 31, 32, 33, 89, 90, 2060, 3548, 3560, 3561, 3593, 3606, 3607, 3634, 3739, 3767, 4104, 4250] +StringLegalActions() = ["Rb8", "Rc8", "Rd8", "Re8", "a6", "a5", "d3", "Bh7", "Bxe6", "Bf7", "Kg6", "Kf7", "Kh7", "Kf6", "g4", "gxh4", "Rh7", "h5"] + +# Apply action "a6" +action: 89 + +# State 62 +# Apply action "Qg4" +action: 2760 + +# State 63 +# Apply action "Rb8" +action: 30 + +# State 64 +# Apply action "Kxg5" +action: 4364 + +# State 65 +# Apply action "e6" +action: 2425 + +# State 66 +# Apply action "d3" +action: 1841 + +# State 67 +# Apply action "a5" +action: 162 + +# State 68 +# Apply action "Bd1" +action: 279 + +# State 69 +# Apply action "hxg5" +action: 4291 + +# State 70 +# 1r3bbr/2p3k1/2P1p3/p5p1/3pP1Q1/3P1P2/P5P1/RNBBR1N1 w - - 0 36 +IsTerminal() = True +History() = [3576, 1841, 4117, 1213, 2426, 3010, 2975, 3155, 3009, 1768, 1443, 656, 1258, 3576, 1403, 3593, 2352, 4302, 409, 3868, 3534, 1914, 4115, 1987, 673, 4177, 4301, 2380, 1080, 1840, 1578, 2537, 2887, 3023, 264, 1008, 1576, 2031, 1476, 3606, 1808, 1770, 2425, 31, 1225, 4593, 759, 2003, 176, 3766, 919, 3023, 715, 1199, 2526, 3111, 3169, 2362, 3753, 3666, 776, 89, 2760, 30, 4364, 2425, 1841, 162, 279, 4291] +HistoryString() = "3576, 1841, 4117, 1213, 2426, 3010, 2975, 3155, 3009, 1768, 1443, 656, 1258, 3576, 1403, 3593, 2352, 4302, 409, 3868, 3534, 1914, 4115, 1987, 673, 4177, 4301, 2380, 1080, 1840, 1578, 2537, 2887, 3023, 264, 1008, 1576, 2031, 1476, 3606, 1808, 1770, 2425, 31, 1225, 4593, 759, 2003, 176, 3766, 919, 3023, 715, 1199, 2526, 3111, 3169, 2362, 3753, 3666, 776, 89, 2760, 30, 4364, 2425, 1841, 162, 279, 4291" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "1r3bbr/?1p?2k1/?1?1p3/p2?1?p1/2?p???1/2?????1/?1?????1/?N?????1 w - - 0 36" +ObservationString(1) = "????????/????????/??P?p???/????2p1/1???P1Q1/3P1P2/P4?P?/RNBBR1N? w - - 0 36" +ObservationTensor(0).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).repetitions: ◉◯◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).irreversible_move_counter: ◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_empty_pieces: ◯◯◉◉◯◯◯◉ + ◯◉◉◉◉◉◉◯ + ◯◯◯◯◉◯◯◉ + ◯◯◯◯◯◉◯◉ + ◯◯◯◯◉◯◉◉ + ◯◯◯◯◯◉◉◯ + ◯◯◯◯◯◉◯◯ + ◉◉◉◉◉◉◉◯ +ObservationTensor(0).private_unknown_squares: ◉◉◯◯◯◉◉◯ + ◯◯◯◯◯◯◯◯ + ◉◉◉◉◯◉◯◯ + ◉◉◉◯◉◯◉◯ + ◉◉◉◉◯◯◯◯ + ◉◉◉◉◉◯◯◯ + ◉◉◉◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(1).public_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).public_empty_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).repetitions: ◉◯◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).irreversible_move_counter: ◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◉◉◯◯◯◯ + ◯◉◉◯◯◯◯◯ + ◯◉◉◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◉◯◉◯◯◯ + ◉◯◯◉◉◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◉◉◉◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◯◯◉◉◉◉ + ◯◯◯◉◉◉◉◉ + ◯◯◯◉◉◯◉◉ + ◯◯◯◉◉◉◉◉ + ◯◯◯◯◯◯◉◉ + ◯◉◯◯◯◉◉◉ + ◯◯◯◯◯◉◉◉ + ◉◉◯◯◯◉◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dark_hex(num_rows=5,num_cols=3).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dark_hex(num_rows=5,num_cols=3).txt new file mode 100644 index 0000000..296c3d7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dark_hex(num_rows=5,num_cols=3).txt @@ -0,0 +1,349 @@ +game: dark_hex(num_rows=5,num_cols=3) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Dark Hex" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["board_size", "gameversion", "num_cols", "num_rows", "obstype"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "dark_hex" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 15 +PolicyTensorShape() = [15] +MaxChanceOutcomes() = 0 +GetParameters() = {board_size=3,gameversion=cdh,num_cols=3,num_rows=5,obstype=reveal-nothing} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [360] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 360 +ObservationTensorShape() = [135] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 135 +MaxGameLength() = 29 +ToString() = "dark_hex(num_cols=3,num_rows=5)" + +# State 0 +# . . . +# . . . +# . . . +# . . . +# . . . +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "...\n...\n...\n...\n...\n0\n" +InformationStateString(1) = "...\n...\n...\n...\n...\n0\n" +InformationStateTensor(0): binvec(360, 0x80402010080402010080402010080402000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(360, 0x80402010080402010080402010080402000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "...\n...\n...\n...\n..." +ObservationString(1) = "...\n...\n...\n...\n..." +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3", "a4", "b4", "c4", "a5", "b5", "c5"] + +# Apply action "a1" +action: 0 + +# State 1 +# x . . +# . . . +# . . . +# . . . +# . . . +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "x..\n...\n...\n...\n...\n1\n0,0 " +InformationStateString(1) = "...\n...\n...\n...\n...\n1\n" +InformationStateTensor(0): binvec(360, 0x40402010080402010080402010080402100000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(360, 0x80402010080402010080402010080402000000000000000000000000000000000000000000000000000000000) +ObservationString(0) = "x..\n...\n...\n...\n..." +ObservationString(1) = "...\n...\n...\n...\n..." +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3", "a4", "b4", "c4", "a5", "b5", "c5"] + +# Apply action "c5" +action: 14 + +# State 2 +# x . . +# . . . +# . . . +# . . . +# . . o +IsTerminal() = False +History() = [0, 14] +HistoryString() = "0, 14" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "x..\n...\n...\n...\n...\n2\n0,0 " +InformationStateString(1) = "...\n...\n...\n...\n..o\n2\n1,14 " +InformationStateTensor(0): binvec(360, 0x40402010080402010080402010080402100000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(360, 0x80402010080402010080402010080404000040000000000000000000000000000000000000000000000000000) +ObservationString(0) = "x..\n...\n...\n...\n..." +ObservationString(1) = "...\n...\n...\n...\n..o" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] +StringLegalActions() = ["b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3", "a4", "b4", "c4", "a5", "b5", "c5"] + +# Apply action "c2" +action: 5 + +# State 3 +# x . . +# . . x +# . . . +# . . . +# . . o +IsTerminal() = False +History() = [0, 14, 5] +HistoryString() = "0, 14, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "x..\n..x\n...\n...\n...\n3\n0,0 0,5 " +InformationStateString(1) = "...\n...\n...\n...\n..o\n3\n1,14 " +InformationStateTensor(0): binvec(360, 0x40402010080202010080402010080402100001000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(360, 0x80402010080402010080402010080404000040000000000000000000000000000000000000000000000000000) +ObservationString(0) = "x..\n..x\n...\n...\n..." +ObservationString(1) = "...\n...\n...\n...\n..o" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3", "a4", "b4", "c4", "a5", "b5"] + +# Apply action "c4" +action: 11 + +# State 4 +# x . . +# . . x +# . . . +# . . o +# . . o +IsTerminal() = False +History() = [0, 14, 5, 11] +HistoryString() = "0, 14, 5, 11" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "x..\n..x\n...\n...\n...\n4\n0,0 0,5 " +InformationStateString(1) = "...\n...\n...\n..o\n..o\n4\n1,14 1,11 " +InformationStateTensor(0): binvec(360, 0x40402010080202010080402010080402100001000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(360, 0x80402010080402010080402020080404000040040000000000000000000000000000000000000000000000000) +ObservationString(0) = "x..\n..x\n...\n...\n..." +ObservationString(1) = "...\n...\n...\n..o\n..o" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14] +StringLegalActions() = ["b1", "c1", "a2", "b2", "a3", "b3", "c3", "a4", "b4", "c4", "a5", "b5", "c5"] + +# Apply action "c5" +action: 14 + +# State 5 +# Apply action "c4" +action: 11 + +# State 6 +# Apply action "b2" +action: 4 + +# State 7 +# x . . +# . x x +# . . . +# . . o +# . . o +IsTerminal() = False +History() = [0, 14, 5, 11, 14, 11, 4] +HistoryString() = "0, 14, 5, 11, 14, 11, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "x..\n.xx\n...\n..o\n..o\n7\n0,0 0,5 0,14 0,11 0,4 " +InformationStateString(1) = "...\n...\n...\n..o\n..o\n7\n1,14 1,11 " +InformationStateTensor(0): binvec(360, 0x40402010040202010080402020080404100001000001001010000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(360, 0x80402010080402010080402020080404000040040000000000000000000000000000000000000000000000000) +ObservationString(0) = "x..\n.xx\n...\n..o\n..o" +ObservationString(1) = "...\n...\n...\n..o\n..o" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3", "a4", "b4", "a5", "b5"] + +# Apply action "c3" +action: 8 + +# State 8 +# Apply action "b5" +action: 13 + +# State 9 +# Apply action "a3" +action: 6 + +# State 10 +# Apply action "a5" +action: 12 + +# State 11 +# Apply action "b1" +action: 1 + +# State 12 +# Apply action "b3" +action: 7 + +# State 13 +# Apply action "b2" +action: 4 + +# State 14 +# Apply action "b3" +action: 7 + +# State 15 +# Apply action "a4" +action: 9 + +# State 16 +# Apply action "b1" +action: 1 + +# State 17 +# Apply action "a4" +action: 9 + +# State 18 +# x o . +# . x x +# o x o +# o . o +# x x o +IsTerminal() = False +History() = [0, 14, 5, 11, 14, 11, 4, 8, 13, 6, 12, 1, 7, 4, 7, 9, 1, 9] +HistoryString() = "0, 14, 5, 11, 14, 11, 4, 8, 13, 6, 12, 1, 7, 4, 7, 9, 1, 9" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "xo.\n.xx\n.x.\no.o\nxxo\n18\n0,0 0,5 0,14 0,11 0,4 0,13 0,12 0,7 0,1 0,9 " +InformationStateString(1) = ".o.\n.x.\noxo\no.o\n..o\n18\n1,14 1,11 1,8 1,6 1,1 1,4 1,7 1,9 " +InformationStateTensor(0): binvec(360, 0x40802010040202008080802020040204100001000001001010000010004010080000100000000000000000000) +InformationStateTensor(1): binvec(360, 0x80802010040404008100802020080404000040040040020080002000080004000000000000000000000000000) +ObservationString(0) = "xo.\n.xx\n.x.\no.o\nxxo" +ObservationString(1) = ".o.\n.x.\noxo\no.o\n..o" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [2, 3, 6, 8, 10] +StringLegalActions() = ["c1", "a2", "a3", "c3", "b4"] + +# Apply action "b4" +action: 10 + +# State 19 +# Apply action "b4" +action: 10 + +# State 20 +# Apply action "a1" +action: 0 + +# State 21 +# x o . +# . x x +# o x o +# o x o +# x x o +IsTerminal() = False +History() = [0, 14, 5, 11, 14, 11, 4, 8, 13, 6, 12, 1, 7, 4, 7, 9, 1, 9, 10, 10, 0] +HistoryString() = "0, 14, 5, 11, 14, 11, 4, 8, 13, 6, 12, 1, 7, 4, 7, 9, 1, 9, 10, 10, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "xo.\n.xx\n.x.\noxo\nxxo\n21\n0,0 0,5 0,14 0,11 0,4 0,13 0,12 0,7 0,1 0,9 0,10 " +InformationStateString(1) = "xo.\n.x.\noxo\noxo\n..o\n21\n1,14 1,11 1,8 1,6 1,1 1,4 1,7 1,9 1,10 1,0 " +InformationStateTensor(0): binvec(360, 0x40802010040202008080801020040204100001000001001010000010004010080000100010000000000000000) +InformationStateTensor(1): binvec(360, 0x40802010040404008100801020080404000040040040020080002000080004000420000000000000000000000) +ObservationString(0) = "xo.\n.xx\n.x.\noxo\nxxo" +ObservationString(1) = "xo.\n.x.\noxo\noxo\n..o" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [2, 3, 5, 12, 13] +StringLegalActions() = ["c1", "a2", "c2", "a5", "b5"] + +# Apply action "c1" +action: 2 + +# State 22 +# Apply action "c1" +action: 2 + +# State 23 +# Apply action "c3" +action: 8 + +# State 24 +# Apply action "a2" +action: 3 + +# State 25 +# x o o +# x x x +# o x o +# o x o +# x x o +IsTerminal() = True +History() = [0, 14, 5, 11, 14, 11, 4, 8, 13, 6, 12, 1, 7, 4, 7, 9, 1, 9, 10, 10, 0, 2, 2, 8, 3] +HistoryString() = "0, 14, 5, 11, 14, 11, 4, 8, 13, 6, 12, 1, 7, 4, 7, 9, 1, 9, 10, 10, 0, 2, 2, 8, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "xoo\nxxx\n.xo\noxo\nxxo\n25\n0,0 0,5 0,14 0,11 0,4 0,13 0,12 0,7 0,1 0,9 0,10 0,2 0,8 0,3 " +InformationStateString(1) = "xoo\n.x.\noxo\noxo\n..o\n25\n1,14 1,11 1,8 1,6 1,1 1,4 1,7 1,9 1,10 1,0 1,2 " +InformationStateTensor(0): binvec(360, 0x40804001040202008100801020040204100001000001001010000010004010080000100010200001004000000) +InformationStateTensor(1): binvec(360, 0x40804010040404008100801020080404000040040040020080002000080004000420001000000000000000000) +ObservationString(0) = "xoo\nxxx\n.xo\noxo\nxxo" +ObservationString(1) = "xoo\n.x.\noxo\noxo\n..o" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dark_hex_ir(board_size=3).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dark_hex_ir(board_size=3).txt new file mode 100644 index 0000000..7b6c647 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dark_hex_ir(board_size=3).txt @@ -0,0 +1,223 @@ +game: dark_hex_ir(board_size=3) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Dark Hex with Imperfect Recall" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["board_size", "gameversion", "num_cols", "num_rows", "obstype"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "dark_hex_ir" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 9 +PolicyTensorShape() = [9] +MaxChanceOutcomes() = 0 +GetParameters() = {board_size=3,gameversion=cdh,num_cols=3,num_rows=3,obstype=reveal-nothing} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [162] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 162 +ObservationTensorShape() = [81] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 81 +MaxGameLength() = 17 +ToString() = "dark_hex_ir(board_size=3)" + +# State 0 +# . . . +# . . . +# . . . +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "P0 ...\n...\n..." +InformationStateString(1) = "P1 ...\n...\n..." +InformationStateTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "...\n...\n..." +ObservationString(1) = "...\n...\n..." +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3"] + +# Apply action "b2" +action: 4 + +# State 1 +# . . . +# . x . +# . . . +IsTerminal() = False +History() = [4] +HistoryString() = "4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "P0 ...\n.x.\n..." +InformationStateString(1) = "P1 ...\n...\n..." +InformationStateTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "...\n.x.\n..." +ObservationString(1) = "...\n...\n..." +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3"] + +# Apply action "c3" +action: 8 + +# State 2 +# . . . +# . x . +# . . o +IsTerminal() = False +History() = [4, 8] +HistoryString() = "4, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "P0 ...\n.x.\n..." +InformationStateString(1) = "P1 ...\n...\n..o" +InformationStateTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "...\n.x.\n..." +ObservationString(1) = "...\n...\n..o" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 5, 6, 7, 8] +StringLegalActions() = ["a1", "b1", "c1", "a2", "c2", "a3", "b3", "c3"] + +# Apply action "a3" +action: 6 + +# State 3 +# . . . +# . x . +# x . o +IsTerminal() = False +History() = [4, 8, 6] +HistoryString() = "4, 8, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "P0 ...\n.x.\nx.." +InformationStateString(1) = "P1 ...\n...\n..o" +InformationStateTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "...\n.x.\nx.." +ObservationString(1) = "...\n...\n..o" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3"] + +# Apply action "a3" +action: 6 + +# State 4 +# . . . +# . x . +# x . o +IsTerminal() = False +History() = [4, 8, 6, 6] +HistoryString() = "4, 8, 6, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "P0 ...\n.x.\nx.." +InformationStateString(1) = "P1 ...\n...\nx.o" +InformationStateTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "...\n.x.\nx.." +ObservationString(1) = "...\n...\nx.o" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 4, 5, 7] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "b3"] + +# Apply action "c2" +action: 5 + +# State 5 +# . . . +# . x o +# x . o +IsTerminal() = False +History() = [4, 8, 6, 6, 5] +HistoryString() = "4, 8, 6, 6, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "P0 ...\n.x.\nx.." +InformationStateString(1) = "P1 ...\n..o\nx.o" +InformationStateTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "...\n.x.\nx.." +ObservationString(1) = "...\n..o\nx.o" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 5, 7, 8] +StringLegalActions() = ["a1", "b1", "c1", "a2", "c2", "b3", "c3"] + +# Apply action "a1" +action: 0 + +# State 6 +# Apply action "b1" +action: 1 + +# State 7 +# Apply action "c2" +action: 5 + +# State 8 +# Apply action "a2" +action: 3 + +# State 9 +# x o . +# x x o +# x . o +IsTerminal() = True +History() = [4, 8, 6, 6, 5, 0, 1, 5, 3] +HistoryString() = "4, 8, 6, 6, 5, 0, 1, 5, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "P0 x..\nxxo\nx.." +InformationStateString(1) = "P1 .o.\n..o\nx.o" +InformationStateTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "x..\nxxo\nx.." +ObservationString(1) = ".o.\n..o\nx.o" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dark_hex_reveal_turn_long.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dark_hex_reveal_turn_long.txt new file mode 100644 index 0000000..f5891bd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dark_hex_reveal_turn_long.txt @@ -0,0 +1,255 @@ +game: dark_hex(gameversion=adh,obstype=reveal-numturns) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Dark Hex" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["board_size", "gameversion", "num_cols", "num_rows", "obstype"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "dark_hex" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 9 +PolicyTensorShape() = [9] +MaxChanceOutcomes() = 0 +GetParameters() = {board_size=3,gameversion=adh,num_cols=3,num_rows=3,obstype=reveal-numturns} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [268] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 268 +ObservationTensorShape() = [99] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 99 +MaxGameLength() = 17 +ToString() = "dark_hex(gameversion=adh,obstype=reveal-numturns)" + +# State 0 +# . . . +# . . . +# . . . +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "...\n...\n...\n0\n" +InformationStateString(1) = "...\n...\n...\n0\n" +InformationStateTensor(0): binvec(268, 0x804020100804020100800000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(268, 0x804020100804020100800000000000000000000000000000000000000000000000) +ObservationString(0) = "...\n...\n...\nTotal turns: 0" +ObservationString(1) = "...\n...\n...\nTotal turns: 0" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3"] + +# Apply action "a1" +action: 0 + +# State 1 +# x . . +# . . . +# . . . +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "x..\n...\n...\n1\n0,0 " +InformationStateString(1) = "...\n...\n...\n1\n0,? " +InformationStateTensor(0): binvec(268, 0x404020100804020100820000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(268, 0x804020100804020100800100000000000000000000000000000000000000000000) +ObservationString(0) = "x..\n...\n...\nTotal turns: 1" +ObservationString(1) = "...\n...\n...\nTotal turns: 1" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3"] + +# Apply action "c1" +action: 2 + +# State 2 +# x . o +# . . . +# . . . +IsTerminal() = False +History() = [0, 2] +HistoryString() = "0, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "x..\n...\n...\n2\n0,0 1,? " +InformationStateString(1) = "..o\n...\n...\n2\n0,? 1,2 " +InformationStateTensor(0): binvec(268, 0x404020100804020100820080200000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(268, 0x804040100804020100800190000000000000000000000000000000000000000000) +ObservationString(0) = "x..\n...\n...\nTotal turns: 2" +ObservationString(1) = "..o\n...\n...\nTotal turns: 2" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3"] + +# Apply action "b1" +action: 1 + +# State 3 +# x x o +# . . . +# . . . +IsTerminal() = False +History() = [0, 2, 1] +HistoryString() = "0, 2, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "xx.\n...\n...\n3\n0,0 1,? 0,1 " +InformationStateString(1) = "..o\n...\n...\n3\n0,? 1,2 0,? " +InformationStateTensor(0): binvec(268, 0x402020100804020100820080240000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(268, 0x804040100804020100800190000400000000000000000000000000000000000000) +ObservationString(0) = "xx.\n...\n...\nTotal turns: 3" +ObservationString(1) = "..o\n...\n...\nTotal turns: 3" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["a1", "b1", "a2", "b2", "c2", "a3", "b3", "c3"] + +# Apply action "a2" +action: 3 + +# State 4 +# x x o +# o . . +# . . . +IsTerminal() = False +History() = [0, 2, 1, 3] +HistoryString() = "0, 2, 1, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "xx.\n...\n...\n4\n0,0 1,? 0,1 1,? " +InformationStateString(1) = "..o\no..\n...\n4\n0,? 1,2 0,? 1,3 " +InformationStateTensor(0): binvec(268, 0x402020100804020100820080240200800000000000000000000000000000000000) +InformationStateTensor(1): binvec(268, 0x804040200804020100800190000620000000000000000000000000000000000000) +ObservationString(0) = "xx.\n...\n...\nTotal turns: 4" +ObservationString(1) = "..o\no..\n...\nTotal turns: 4" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["c1", "a2", "b2", "c2", "a3", "b3", "c3"] + +# Apply action "b2" +action: 4 + +# State 5 +# x x o +# o x . +# . . . +IsTerminal() = False +History() = [0, 2, 1, 3, 4] +HistoryString() = "0, 2, 1, 3, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "xx.\n.x.\n...\n5\n0,0 1,? 0,1 1,? 0,4 " +InformationStateString(1) = "..o\no..\n...\n5\n0,? 1,2 0,? 1,3 0,? " +InformationStateTensor(0): binvec(268, 0x402020100404020100820080240200820000000000000000000000000000000000) +InformationStateTensor(1): binvec(268, 0x804040200804020100800190000620001000000000000000000000000000000000) +ObservationString(0) = "xx.\n.x.\n...\nTotal turns: 5" +ObservationString(1) = "..o\no..\n...\nTotal turns: 5" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 4, 5, 6, 7, 8] +StringLegalActions() = ["a1", "b1", "b2", "c2", "a3", "b3", "c3"] + +# Apply action "a3" +action: 6 + +# State 6 +# Apply action "c2" +action: 5 + +# State 7 +# Apply action "b3" +action: 7 + +# State 8 +# Apply action "b3" +action: 7 + +# State 9 +# Apply action "c2" +action: 5 + +# State 10 +# Apply action "a3" +action: 6 + +# State 11 +# Apply action "b2" +action: 4 + +# State 12 +# Apply action "a2" +action: 3 + +# State 13 +# Apply action "b1" +action: 1 + +# State 14 +# Apply action "c1" +action: 2 + +# State 15 +# Apply action "a1" +action: 0 + +# State 16 +# Apply action "c3" +action: 8 + +# State 17 +# x x o +# o x x +# o o x +IsTerminal() = True +History() = [0, 2, 1, 3, 4, 6, 5, 7, 7, 5, 6, 4, 3, 1, 2, 0, 8] +HistoryString() = "0, 2, 1, 3, 4, 6, 5, 7, 7, 5, 6, 4, 3, 1, 2, 0, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "xxo\noxx\noox\n17\n0,0 1,? 0,1 1,? 0,4 1,? 0,5 1,? 0,7 1,? 0,6 1,? 0,3 1,? 0,2 1,? 0,8 " +InformationStateString(1) = "xxo\noxx\noo.\n17\n0,? 1,2 0,? 1,3 0,? 1,6 0,? 1,7 0,? 1,5 0,? 1,4 0,? 1,1 0,? 1,0 0,? " +InformationStateTensor(0): binvec(268, 0x4020402004020402000a0080240200820802042008048020220084080220200802) +InformationStateTensor(1): binvec(268, 0x4020402004020402008001900006200018100060200182000610001a0000700001) +ObservationString(0) = "xxo\noxx\noox\nTotal turns: 17" +ObservationString(1) = "xxo\noxx\noo.\nTotal turns: 17" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1): ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/deep_sea.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/deep_sea.txt new file mode 100644 index 0000000..8d8fc28 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/deep_sea.txt @@ -0,0 +1,142 @@ +game: deep_sea + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "DeepSea" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = ["randomize_actions", "seed", "size", "unscaled_move_cost"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "deep_sea" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 2 +GetParameters() = {randomize_actions=True,seed=42,size=5,unscaled_move_cost=0.01} +NumPlayers() = 1 +MinUtility() = -0.01 +MaxUtility() = 0.99 +UtilitySum() = None +ObservationTensorShape() = [5, 5] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 25 +MaxGameLength() = 5 +ToString() = "deep_sea()" + +# State 0 +# x..... +# RR.... +# RRL... +# RLRL.. +# RLLRL. +# ...... +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "x............................." +ObservationTensor(0): ◉◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1] +StringLegalActions() = ["LEFT", "RIGHT"] + +# Apply action "LEFT" +action: 0 + +# State 1 +# R..... +# xR.... +# RRL... +# RLRL.. +# RLLRL. +# ...... +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = ".....x........................" +ObservationTensor(0): ◯◯◯◯◯ + ◉◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1] +StringLegalActions() = ["LEFT", "RIGHT"] + +# Apply action "RIGHT" +action: 1 + +# State 2 +# R..... +# RR.... +# RxL... +# RLRL.. +# RLLRL. +# ...... +IsTerminal() = False +History() = [0, 1] +HistoryString() = "0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "...........x.................." +ObservationTensor(0): ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◉◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +Rewards() = [-0.002] +Returns() = [-0.002] +LegalActions() = [0, 1] +StringLegalActions() = ["LEFT", "RIGHT"] + +# Apply action "LEFT" +action: 0 + +# State 3 +# Apply action "RIGHT" +action: 1 + +# State 4 +# Apply action "LEFT" +action: 0 + +# State 5 +# R..... +# RR.... +# RRL... +# RLRL.. +# RLLRL. +# ..x... +IsTerminal() = True +History() = [0, 1, 0, 1, 0] +HistoryString() = "0, 1, 0, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "...........................x.." +ObservationTensor(0): ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +Rewards() = [-0.002] +Returns() = [-0.006] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dots_and_boxes.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dots_and_boxes.txt new file mode 100644 index 0000000..ab946f6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dots_and_boxes.txt @@ -0,0 +1,358 @@ +game: dots_and_boxes + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Dots and Boxes" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["num_cols", "num_rows", "utility_margin"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "dots_and_boxes" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 12 +PolicyTensorShape() = [12] +MaxChanceOutcomes() = 0 +GetParameters() = {num_cols=2,num_rows=2,utility_margin=False} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [3, 9, 3] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 81 +MaxGameLength() = 12 +ToString() = "dots_and_boxes()" + +# State 0 +# ┌╴ ╶┬╴ ╶┐ +# +# ├╴ ╶┼╴ ╶┤ +# +# └╴ ╶┴╴ ╶┘ +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "┌╴ ╶┬╴ ╶┐\n \n├╴ ╶┼╴ ╶┤\n \n└╴ ╶┴╴ ╶┘\n" +ObservationString(1) = "┌╴ ╶┬╴ ╶┐\n \n├╴ ╶┼╴ ╶┤\n \n└╴ ╶┴╴ ╶┘\n" +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] +StringLegalActions() = ["P1(h,0,0)", "P1(h,0,1)", "P1(h,1,0)", "P1(h,1,1)", "P1(h,2,0)", "P1(h,2,1)", "P1(v,0,0)", "P1(v,0,1)", "P1(v,0,2)", "P1(v,1,0)", "P1(v,1,1)", "P1(v,1,2)"] + +# Apply action "P1(v,1,1)" +action: 10 + +# State 1 +# ┌╴ ╶┬╴ ╶┐ +# +# ├╴ ╶┼╴ ╶┤ +# │ +# └╴ ╶┴╴ ╶┘ +IsTerminal() = False +History() = [10] +HistoryString() = "10" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "10" +InformationStateString(1) = "10" +ObservationString(0) = "┌╴ ╶┬╴ ╶┐\n \n├╴ ╶┼╴ ╶┤\n │ \n└╴ ╶┴╴ ╶┘\n" +ObservationString(1) = "┌╴ ╶┬╴ ╶┐\n \n├╴ ╶┼╴ ╶┤\n │ \n└╴ ╶┴╴ ╶┘\n" +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11] +StringLegalActions() = ["P2(h,0,0)", "P2(h,0,1)", "P2(h,1,0)", "P2(h,1,1)", "P2(h,2,0)", "P2(h,2,1)", "P2(v,0,0)", "P2(v,0,1)", "P2(v,0,2)", "P2(v,1,0)", "P2(v,1,2)"] + +# Apply action "P2(h,0,1)" +action: 1 + +# State 2 +# ┌╴ ╶┬───┐ +# +# ├╴ ╶┼╴ ╶┤ +# │ +# └╴ ╶┴╴ ╶┘ +IsTerminal() = False +History() = [10, 1] +HistoryString() = "10, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "10, 1" +InformationStateString(1) = "10, 1" +ObservationString(0) = "┌╴ ╶┬───┐\n \n├╴ ╶┼╴ ╶┤\n │ \n└╴ ╶┴╴ ╶┘\n" +ObservationString(1) = "┌╴ ╶┬───┐\n \n├╴ ╶┼╴ ╶┤\n │ \n└╴ ╶┴╴ ╶┘\n" +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◯◯◯ ◉◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◯◯◯ ◉◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 4, 5, 6, 7, 8, 9, 11] +StringLegalActions() = ["P1(h,0,0)", "P1(h,1,0)", "P1(h,1,1)", "P1(h,2,0)", "P1(h,2,1)", "P1(v,0,0)", "P1(v,0,1)", "P1(v,0,2)", "P1(v,1,0)", "P1(v,1,2)"] + +# Apply action "P1(v,0,2)" +action: 8 + +# State 3 +# ┌╴ ╶┬───┐ +# │ +# ├╴ ╶┼╴ ╶┤ +# │ +# └╴ ╶┴╴ ╶┘ +IsTerminal() = False +History() = [10, 1, 8] +HistoryString() = "10, 1, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "10, 1, 8" +InformationStateString(1) = "10, 1, 8" +ObservationString(0) = "┌╴ ╶┬───┐\n │\n├╴ ╶┼╴ ╶┤\n │ \n└╴ ╶┴╴ ╶┘\n" +ObservationString(1) = "┌╴ ╶┬───┐\n │\n├╴ ╶┼╴ ╶┤\n │ \n└╴ ╶┴╴ ╶┘\n" +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◯◯◯ ◉◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◯◯◯ ◉◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 4, 5, 6, 7, 9, 11] +StringLegalActions() = ["P2(h,0,0)", "P2(h,1,0)", "P2(h,1,1)", "P2(h,2,0)", "P2(h,2,1)", "P2(v,0,0)", "P2(v,0,1)", "P2(v,1,0)", "P2(v,1,2)"] + +# Apply action "P2(v,1,2)" +action: 11 + +# State 4 +# ┌╴ ╶┬───┐ +# │ +# ├╴ ╶┼╴ ╶┤ +# │ │ +# └╴ ╶┴╴ ╶┘ +IsTerminal() = False +History() = [10, 1, 8, 11] +HistoryString() = "10, 1, 8, 11" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "10, 1, 8, 11" +InformationStateString(1) = "10, 1, 8, 11" +ObservationString(0) = "┌╴ ╶┬───┐\n │\n├╴ ╶┼╴ ╶┤\n │ │\n└╴ ╶┴╴ ╶┘\n" +ObservationString(1) = "┌╴ ╶┬───┐\n │\n├╴ ╶┼╴ ╶┤\n │ │\n└╴ ╶┴╴ ╶┘\n" +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◯◯◯ ◉◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◯◉ ◯◯◯ ◯◉◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◯◯◯ ◉◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◯◉ ◯◯◯ ◯◉◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 4, 5, 6, 7, 9] +StringLegalActions() = ["P1(h,0,0)", "P1(h,1,0)", "P1(h,1,1)", "P1(h,2,0)", "P1(h,2,1)", "P1(v,0,0)", "P1(v,0,1)", "P1(v,1,0)"] + +# Apply action "P1(v,1,0)" +action: 9 + +# State 5 +# ┌╴ ╶┬───┐ +# │ +# ├╴ ╶┼╴ ╶┤ +# │ │ │ +# └╴ ╶┴╴ ╶┘ +IsTerminal() = False +History() = [10, 1, 8, 11, 9] +HistoryString() = "10, 1, 8, 11, 9" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "10, 1, 8, 11, 9" +InformationStateString(1) = "10, 1, 8, 11, 9" +ObservationString(0) = "┌╴ ╶┬───┐\n │\n├╴ ╶┼╴ ╶┤\n│ │ │\n└╴ ╶┴╴ ╶┘\n" +ObservationString(1) = "┌╴ ╶┬───┐\n │\n├╴ ╶┼╴ ╶┤\n│ │ │\n└╴ ╶┴╴ ╶┘\n" +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◯◯◯ ◉◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◯◉ ◯◯◯ ◯◉◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◯◯◯ ◉◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◉◯◉ ◯◯◯ ◯◉◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 4, 5, 6, 7] +StringLegalActions() = ["P2(h,0,0)", "P2(h,1,0)", "P2(h,1,1)", "P2(h,2,0)", "P2(h,2,1)", "P2(v,0,0)", "P2(v,0,1)"] + +# Apply action "P2(h,1,1)" +action: 3 + +# State 6 +# Apply action "P1(h,2,1)" +action: 5 + +# State 7 +# Apply action "P1(h,0,0)" +action: 0 + +# State 8 +# Apply action "P2(h,1,0)" +action: 2 + +# State 9 +# Apply action "P1(v,0,1)" +action: 7 + +# State 10 +# Apply action "P1(v,0,0)" +action: 6 + +# State 11 +# Apply action "P1(h,2,0)" +action: 4 + +# State 12 +# ┌───┬───┐ +# │ 1 │ 1 │ +# ├───┼───┤ +# │ 1 │ 1 │ +# └───┴───┘ +IsTerminal() = True +History() = [10, 1, 8, 11, 9, 3, 5, 0, 2, 7, 6, 4] +HistoryString() = "10, 1, 8, 11, 9, 3, 5, 0, 2, 7, 6, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "10, 1, 8, 11, 9, 3, 5, 0, 2, 7, 6, 4" +InformationStateString(1) = "10, 1, 8, 11, 9, 3, 5, 0, 2, 7, 6, 4" +ObservationString(0) = "┌───┬───┐\n│ 1 │ 1 │\n├───┼───┤\n│ 1 │ 1 │\n└───┴───┘\n" +ObservationString(1) = "┌───┬───┐\n│ 1 │ 1 │\n├───┼───┤\n│ 1 │ 1 │\n└───┴───┘\n" +ObservationTensor(0): +◯◯◯ ◉◉◉ ◯◯◯ +◯◯◯ ◯◉◉ ◉◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◯◯◯ ◯◉◉ ◉◯◯ +◯◯◯ ◯◉◉ ◉◯◯ +◉◯◉ ◯◯◯ ◯◉◯ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +ObservationTensor(1): +◯◯◯ ◉◉◉ ◯◯◯ +◯◯◯ ◯◉◉ ◉◯◯ +◉◯◉ ◯◉◯ ◯◯◯ +◯◯◯ ◯◉◉ ◉◯◯ +◯◯◯ ◯◉◉ ◉◯◯ +◉◯◉ ◯◯◯ ◯◉◯ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dou_dizhu.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dou_dizhu.txt new file mode 100644 index 0000000..0f6b8ce --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/dou_dizhu.txt @@ -0,0 +1,1762 @@ +game: dou_dizhu + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Dou Dizhu" +GameType.max_num_players = 3 +GameType.min_num_players = 3 +GameType.parameter_specification = [] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "dou_dizhu" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 26057 +PolicyTensorShape() = [26057] +MaxChanceOutcomes() = 105 +GetParameters() = {} +NumPlayers() = 3 +MinUtility() = -2.4576e+04 +MaxUtility() = 4.9152e+04 +UtilitySum() = 0.0 +ObservationTensorShape() = [159] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 159 +MaxGameLength() = 171 +ToString() = "dou_dizhu()" + +# State 0 +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "My hand \nPlayed cards \nface up card rank: -1start player: -3My position from Dizhu: 0" +ObservationString(1) = "My hand \nPlayed cards \nface up card rank: -1start player: -3My position from Dizhu: 1" +ObservationString(2) = "My hand \nPlayed cards \nface up card rank: -1start player: -3My position from Dizhu: 2" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.0196078), (1,0.0196078), (2,0.0196078), (3,0.0196078), (4,0.0196078), (5,0.0196078), (6,0.0196078), (7,0.0196078), (8,0.0196078), (9,0.0196078), (10,0.0196078), (11,0.0196078), (12,0.0196078), (13,0.0196078), (14,0.0196078), (15,0.0196078), (16,0.0196078), (17,0.0196078), (18,0.0196078), (19,0.0196078), (20,0.0196078), (21,0.0196078), (22,0.0196078), (23,0.0196078), (24,0.0196078), (25,0.0196078), (26,0.0196078), (27,0.0196078), (28,0.0196078), (29,0.0196078), (30,0.0196078), (31,0.0196078), (32,0.0196078), (33,0.0196078), (34,0.0196078), (35,0.0196078), (36,0.0196078), (37,0.0196078), (38,0.0196078), (39,0.0196078), (40,0.0196078), (41,0.0196078), (42,0.0196078), (43,0.0196078), (44,0.0196078), (45,0.0196078), (46,0.0196078), (47,0.0196078), (48,0.0196078), (49,0.0196078), (50,0.0196078)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +StringLegalActions() = ["Decide first card up position 0", "Decide first card up position 1", "Decide first card up position 2", "Decide first card up position 3", "Decide first card up position 4", "Decide first card up position 5", "Decide first card up position 6", "Decide first card up position 7", "Decide first card up position 8", "Decide first card up position 9", "Decide first card up position 10", "Decide first card up position 11", "Decide first card up position 12", "Decide first card up position 13", "Decide first card up position 14", "Decide first card up position 15", "Decide first card up position 16", "Decide first card up position 17", "Decide first card up position 18", "Decide first card up position 19", "Decide first card up position 20", "Decide first card up position 21", "Decide first card up position 22", "Decide first card up position 23", "Decide first card up position 24", "Decide first card up position 25", "Decide first card up position 26", "Decide first card up position 27", "Decide first card up position 28", "Decide first card up position 29", "Decide first card up position 30", "Decide first card up position 31", "Decide first card up position 32", "Decide first card up position 33", "Decide first card up position 34", "Decide first card up position 35", "Decide first card up position 36", "Decide first card up position 37", "Decide first card up position 38", "Decide first card up position 39", "Decide first card up position 40", "Decide first card up position 41", "Decide first card up position 42", "Decide first card up position 43", "Decide first card up position 44", "Decide first card up position 45", "Decide first card up position 46", "Decide first card up position 47", "Decide first card up position 48", "Decide first card up position 49", "Decide first card up position 50"] + +# Apply action "Decide first card up position 5" +action: 5 + +# State 1 +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +IsTerminal() = False +History() = [5] +HistoryString() = "5" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "My hand \nPlayed cards \nface up card rank: -1start player: -3My position from Dizhu: 0" +ObservationString(1) = "My hand \nPlayed cards \nface up card rank: -1start player: -3My position from Dizhu: 1" +ObservationString(2) = "My hand \nPlayed cards \nface up card rank: -1start player: -3My position from Dizhu: 2" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(51,0.0185185), (52,0.0185185), (53,0.0185185), (54,0.0185185), (55,0.0185185), (56,0.0185185), (57,0.0185185), (58,0.0185185), (59,0.0185185), (60,0.0185185), (61,0.0185185), (62,0.0185185), (63,0.0185185), (64,0.0185185), (65,0.0185185), (66,0.0185185), (67,0.0185185), (68,0.0185185), (69,0.0185185), (70,0.0185185), (71,0.0185185), (72,0.0185185), (73,0.0185185), (74,0.0185185), (75,0.0185185), (76,0.0185185), (77,0.0185185), (78,0.0185185), (79,0.0185185), (80,0.0185185), (81,0.0185185), (82,0.0185185), (83,0.0185185), (84,0.0185185), (85,0.0185185), (86,0.0185185), (87,0.0185185), (88,0.0185185), (89,0.0185185), (90,0.0185185), (91,0.0185185), (92,0.0185185), (93,0.0185185), (94,0.0185185), (95,0.0185185), (96,0.0185185), (97,0.0185185), (98,0.0185185), (99,0.0185185), (100,0.0185185), (101,0.0185185), (102,0.0185185), (103,0.0185185), (104,0.0185185)] +LegalActions() = [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104] +StringLegalActions() = ["Deal C3", "Deal C4", "Deal C5", "Deal C6", "Deal C7", "Deal C8", "Deal C9", "Deal CT", "Deal CJ", "Deal CQ", "Deal CK", "Deal CA", "Deal C2", "Deal D3", "Deal D4", "Deal D5", "Deal D6", "Deal D7", "Deal D8", "Deal D9", "Deal DT", "Deal DJ", "Deal DQ", "Deal DK", "Deal DA", "Deal D2", "Deal H3", "Deal H4", "Deal H5", "Deal H6", "Deal H7", "Deal H8", "Deal H9", "Deal HT", "Deal HJ", "Deal HQ", "Deal HK", "Deal HA", "Deal H2", "Deal S3", "Deal S4", "Deal S5", "Deal S6", "Deal S7", "Deal S8", "Deal S9", "Deal ST", "Deal SJ", "Deal SQ", "Deal SK", "Deal SA", "Deal S2", "Deal (BWJ)", "Deal (CJ)"] + +# Apply action "Deal D4" +action: 65 + +# State 2 +# Apply action "Deal H5" +action: 79 + +# State 3 +# Apply action "Deal S9" +action: 96 + +# State 4 +# Apply action "Deal H9" +action: 83 + +# State 5 +# Apply action "Deal H7" +action: 81 + +# State 6 +# Apply action "Deal H2" +action: 89 + +# State 7 +# Apply action "Deal HK" +action: 87 + +# State 8 +# Apply action "Deal (CJ)" +action: 104 + +# State 9 +# Apply action "Deal CJ" +action: 59 + +# State 10 +# Apply action "Deal CK" +action: 61 + +# State 11 +# Apply action "Deal D8" +action: 69 + +# State 12 +# Apply action "Deal D9" +action: 70 + +# State 13 +# Apply action "Deal C9" +action: 57 + +# State 14 +# Apply action "Deal H6" +action: 80 + +# State 15 +# Apply action "Deal CQ" +action: 60 + +# State 16 +# Apply action "Deal D6" +action: 67 + +# State 17 +# Apply action "Deal DJ" +action: 72 + +# State 18 +# Apply action "Deal C3" +action: 51 + +# State 19 +# Apply action "Deal S4" +action: 91 + +# State 20 +# Apply action "Deal SJ" +action: 98 + +# State 21 +# Apply action "Deal CT" +action: 58 + +# State 22 +# Apply action "Deal D3" +action: 64 + +# State 23 +# Apply action "Deal C2" +action: 63 + +# State 24 +# Apply action "Deal SK" +action: 100 + +# State 25 +# Apply action "Deal (BWJ)" +action: 103 + +# State 26 +# Apply action "Deal H4" +action: 78 + +# State 27 +# Apply action "Deal C7" +action: 55 + +# State 28 +# Apply action "Deal ST" +action: 97 + +# State 29 +# Apply action "Deal S3" +action: 90 + +# State 30 +# Apply action "Deal C4" +action: 52 + +# State 31 +# Apply action "Deal SA" +action: 101 + +# State 32 +# Apply action "Deal S5" +action: 92 + +# State 33 +# Apply action "Deal D5" +action: 66 + +# State 34 +# Apply action "Deal HJ" +action: 85 + +# State 35 +# Apply action "Deal HA" +action: 88 + +# State 36 +# Apply action "Deal C6" +action: 54 + +# State 37 +# Apply action "Deal S6" +action: 93 + +# State 38 +# Apply action "Deal C5" +action: 53 + +# State 39 +# Apply action "Deal S8" +action: 95 + +# State 40 +# Apply action "Deal H8" +action: 82 + +# State 41 +# Apply action "Deal DA" +action: 75 + +# State 42 +# Apply action "Deal S2" +action: 102 + +# State 43 +# Apply action "Deal HQ" +action: 86 + +# State 44 +# Apply action "Deal DK" +action: 74 + +# State 45 +# Apply action "Deal C8" +action: 56 + +# State 46 +# Apply action "Deal HT" +action: 84 + +# State 47 +# Apply action "Deal D7" +action: 68 + +# State 48 +# Apply action "Deal SQ" +action: 99 + +# State 49 +# Apply action "Deal CA" +action: 62 + +# State 50 +# Apply action "Deal D2" +action: 76 + +# State 51 +# Apply action "Deal DQ" +action: 73 + +# State 52 +# 3 3 +# 4 4 +# 555 5 +# 6 6 +# 77 7 +# 8 88 +# 99 +# T +# JJ J +# QQQ +# K K +# AA +# 22 22 +# +# (CJ) +# 3 +# 44 +# +# 66 +# +# 8 +# 99 +# TT +# J +# Q +# KK +# AA +# +# (BWJ) +# +IsTerminal() = False +History() = [5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73] +HistoryString() = "5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "My hand 34466899TTJQKKAA(BWJ)\nPlayed cards \nface up card rank: 12start player: 2My position from Dizhu: 0" +ObservationString(1) = "My hand 345556778JJKAA22(CJ)\nPlayed cards \nface up card rank: 12start player: 2My position from Dizhu: 1" +ObservationString(2) = "My hand 345678899TJQQQK22\nPlayed cards \nface up card rank: 12start player: 2My position from Dizhu: 2" +ObservationTensor(0): ◯◉◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◉◯◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Pass", "Bid 1", "Bid 2", "Bid 3"] + +# Apply action "Pass" +action: 0 + +# State 53 +# 3 3 +# 4 4 +# 555 5 +# 6 6 +# 77 7 +# 8 88 +# 99 +# T +# JJ J +# QQQ +# K K +# AA +# 22 22 +# +# (CJ) +# 3 +# 44 +# +# 66 +# +# 8 +# 99 +# TT +# J +# Q +# KK +# AA +# +# (BWJ) +# +# Bidding phase begin +# Player 2 played Pass +IsTerminal() = False +History() = [5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0] +HistoryString() = "5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "My hand 34466899TTJQKKAA(BWJ)\nPlayed cards \nface up card rank: 12start player: 2My position from Dizhu: 0" +ObservationString(1) = "My hand 345556778JJKAA22(CJ)\nPlayed cards \nface up card rank: 12start player: 2My position from Dizhu: 1" +ObservationString(2) = "My hand 345678899TJQQQK22\nPlayed cards \nface up card rank: 12start player: 2My position from Dizhu: 2" +ObservationTensor(0): ◯◉◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◉◯◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Pass", "Bid 1", "Bid 2", "Bid 3"] + +# Apply action "Bid 3" +action: 3 + +# State 54 +# 3 3 +# 4 4 +# 555 5 +# 6 6 +# 77 7 +# 8 88 +# 99 +# T +# JJ J +# QQQ +# K K +# AA +# 22 22 +# +# (CJ) +# 33 +# 44 +# +# 66 +# 7 +# 8 +# 99 +# TTT +# J +# Q +# KK +# AA +# +# (BWJ) +# +# Bidding phase begin +# Player 2 played Pass +# Player 0 played Bid 3 +IsTerminal() = False +History() = [5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3] +HistoryString() = "5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "My hand 3344667899TTTJQKKAA(BWJ)\nPlayed cards \nface up card rank: 12start player: 2My position from Dizhu: 0" +ObservationString(1) = "My hand 345556778JJKAA22(CJ)\nPlayed cards \nface up card rank: 12start player: 2My position from Dizhu: 1" +ObservationString(2) = "My hand 345678899TJQQQK22\nPlayed cards \nface up card rank: 12start player: 2My position from Dizhu: 2" +ObservationTensor(0): ◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◉◯◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 30, 31, 32, 33, 37, 38, 39, 43, 44, 48, 55, 56, 58, 61, 62, 65, 66, 127, 231, 232, 234, 235, 236, 237, 238, 239, 240, 241, 243, 399, 400, 402, 405, 408, 409] +StringLegalActions() = ["3", "4", "6", "7", "8", "9", "T", "J", "Q", "K", "A", "(BWJ)", "6789T", "789TJ", "89TJQ", "9TJQK", "TJQKA", "6789TJ", "789TJQ", "89TJQK", "9TJQKA", "6789TJQ", "789TJQK", "89TJQKA", "6789TJQK", "789TJQKA", "6789TJQKA", "33", "44", "66", "99", "TT", "KK", "AA", "TTT", "3TTT", "4TTT", "6TTT", "7TTT", "8TTT", "9TTT", "TTTJ", "TTTQ", "TTTK", "TTTA", "TTT(BWJ)", "33TTT", "44TTT", "66TTT", "99TTT", "TTTKK", "TTTAA"] + +# Apply action "T" +action: 11 + +# State 55 +# 3 3 +# 4 4 +# 555 5 +# 6 6 +# 77 7 +# 8 88 +# 99 +# T +# JJ J +# QQQ +# K K +# AA +# 22 22 +# +# (CJ) +# 33 +# 44 +# +# 66 +# 7 +# 8 +# 99 +# TT +# J +# Q +# KK +# AA +# +# (BWJ) +# +# Bidding phase begin +# Player 2 played Pass +# Player 0 played Bid 3 +# Playing phase begin +# Player 0 played T +IsTerminal() = False +History() = [5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11] +HistoryString() = "5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "My hand 3344667899TTJQKKAA(BWJ)\nPlayed cards T\nface up card rank: 12start player: 2My position from Dizhu: 0" +ObservationString(1) = "My hand 345556778JJKAA22(CJ)\nPlayed cards T\nface up card rank: 12start player: 2My position from Dizhu: 1" +ObservationString(2) = "My hand 345678899TJQQQK22\nPlayed cards T\nface up card rank: 12start player: 2My position from Dizhu: 2" +ObservationTensor(0): ◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◉◯◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 12, 14, 15, 16, 18] +StringLegalActions() = ["Pass", "J", "K", "A", "2", "(CJ)"] + +# Apply action "K" +action: 14 + +# State 56 +# 3 3 +# 4 4 +# 555 5 +# 6 6 +# 77 7 +# 8 88 +# 99 +# T +# JJ J +# QQQ +# K +# AA +# 22 22 +# +# (CJ) +# 33 +# 44 +# +# 66 +# 7 +# 8 +# 99 +# TT +# J +# Q +# KK +# AA +# +# (BWJ) +# +# Bidding phase begin +# Player 2 played Pass +# Player 0 played Bid 3 +# Playing phase begin +# Player 0 played T +# Player 1 played K +IsTerminal() = False +History() = [5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14] +HistoryString() = "5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "My hand 3344667899TTJQKKAA(BWJ)\nPlayed cards TK\nface up card rank: 12start player: 2My position from Dizhu: 0" +ObservationString(1) = "My hand 345556778JJAA22(CJ)\nPlayed cards TK\nface up card rank: 12start player: 2My position from Dizhu: 1" +ObservationString(2) = "My hand 345678899TJQQQK22\nPlayed cards TK\nface up card rank: 12start player: 2My position from Dizhu: 2" +ObservationTensor(0): ◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◉◯◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 16] +StringLegalActions() = ["Pass", "2"] + +# Apply action "Pass" +action: 0 + +# State 57 +# 3 3 +# 4 4 +# 555 5 +# 6 6 +# 77 7 +# 8 88 +# 99 +# T +# JJ J +# QQQ +# K +# AA +# 22 22 +# +# (CJ) +# 33 +# 44 +# +# 66 +# 7 +# 8 +# 99 +# TT +# J +# Q +# KK +# AA +# +# (BWJ) +# +# Bidding phase begin +# Player 2 played Pass +# Player 0 played Bid 3 +# Playing phase begin +# Player 0 played T +# Player 1 played K +# Player 2 played Pass +IsTerminal() = False +History() = [5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0] +HistoryString() = "5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "My hand 3344667899TTJQKKAA(BWJ)\nPlayed cards TK\nface up card rank: 12start player: 2My position from Dizhu: 0" +ObservationString(1) = "My hand 345556778JJAA22(CJ)\nPlayed cards TK\nface up card rank: 12start player: 2My position from Dizhu: 1" +ObservationString(2) = "My hand 345678899TJQQQK22\nPlayed cards TK\nface up card rank: 12start player: 2My position from Dizhu: 2" +ObservationTensor(0): ◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◉◯◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 15, 17] +StringLegalActions() = ["Pass", "A", "(BWJ)"] + +# Apply action "A" +action: 15 + +# State 58 +# 3 3 +# 4 4 +# 555 5 +# 6 6 +# 77 7 +# 8 88 +# 99 +# T +# JJ J +# QQQ +# K +# AA +# 22 22 +# +# (CJ) +# 33 +# 44 +# +# 66 +# 7 +# 8 +# 99 +# TT +# J +# Q +# KK +# A +# +# (BWJ) +# +# Bidding phase begin +# Player 2 played Pass +# Player 0 played Bid 3 +# Playing phase begin +# Player 0 played T +# Player 1 played K +# Player 2 played Pass +# Player 0 played A +IsTerminal() = False +History() = [5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15] +HistoryString() = "5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "My hand 3344667899TTJQKKA(BWJ)\nPlayed cards TKA\nface up card rank: 12start player: 2My position from Dizhu: 0" +ObservationString(1) = "My hand 345556778JJAA22(CJ)\nPlayed cards TKA\nface up card rank: 12start player: 2My position from Dizhu: 1" +ObservationString(2) = "My hand 345678899TJQQQK22\nPlayed cards TKA\nface up card rank: 12start player: 2My position from Dizhu: 2" +ObservationTensor(0): ◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◉◯◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 16, 18] +StringLegalActions() = ["Pass", "2", "(CJ)"] + +# Apply action "(CJ)" +action: 18 + +# State 59 +# 3 3 +# 4 4 +# 555 5 +# 6 6 +# 77 7 +# 8 88 +# 99 +# T +# JJ J +# QQQ +# K +# AA +# 22 22 +# +# +# 33 +# 44 +# +# 66 +# 7 +# 8 +# 99 +# TT +# J +# Q +# KK +# A +# +# (BWJ) +# +# Bidding phase begin +# Player 2 played Pass +# Player 0 played Bid 3 +# Playing phase begin +# Player 0 played T +# Player 1 played K +# Player 2 played Pass +# Player 0 played A +# Player 1 played (CJ) +IsTerminal() = False +History() = [5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18] +HistoryString() = "5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "My hand 3344667899TTJQKKA(BWJ)\nPlayed cards TKA(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 0" +ObservationString(1) = "My hand 345556778JJAA22\nPlayed cards TKA(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 1" +ObservationString(2) = "My hand 345678899TJQQQK22\nPlayed cards TKA(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 2" +ObservationTensor(0): ◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◉◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 0 + +# State 60 +# Apply action "Pass" +action: 0 + +# State 61 +# 3 3 +# 4 4 +# 555 5 +# 6 6 +# 77 7 +# 8 88 +# 99 +# T +# JJ J +# QQQ +# K +# AA +# 22 22 +# +# +# 33 +# 44 +# +# 66 +# 7 +# 8 +# 99 +# TT +# J +# Q +# KK +# A +# +# (BWJ) +# +# Bidding phase begin +# Player 2 played Pass +# Player 0 played Bid 3 +# Playing phase begin +# Player 0 played T +# Player 1 played K +# Player 2 played Pass +# Player 0 played A +# Player 1 played (CJ) +# Player 2 played Pass +# Player 0 played Pass +IsTerminal() = False +History() = [5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18, 0, 0] +HistoryString() = "5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "My hand 3344667899TTJQKKA(BWJ)\nPlayed cards TKA(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 0" +ObservationString(1) = "My hand 345556778JJAA22\nPlayed cards TKA(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 1" +ObservationString(2) = "My hand 345678899TJQQQK22\nPlayed cards TKA(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 2" +ObservationTensor(0): ◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◉◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [4, 5, 6, 7, 8, 9, 12, 15, 16, 19, 20, 27, 57, 59, 63, 66, 67, 122, 161, 162, 163, 164, 165, 168, 171, 172, 342, 346, 349, 350] +StringLegalActions() = ["3", "4", "5", "6", "7", "8", "J", "A", "2", "34567", "45678", "345678", "55", "77", "JJ", "AA", "22", "555", "3555", "4555", "5556", "5557", "5558", "555J", "555A", "5552", "55577", "555JJ", "555AA", "55522"] + +# Apply action "555A" +action: 171 + +# State 62 +# Apply action "5QQQ" +action: 261 + +# State 63 +# Apply action "Pass" +action: 0 + +# State 64 +# Apply action "Pass" +action: 0 + +# State 65 +# Apply action "6" +action: 7 + +# State 66 +# Apply action "(BWJ)" +action: 17 + +# State 67 +# Apply action "Pass" +action: 0 + +# State 68 +# Apply action "Pass" +action: 0 + +# State 69 +# Apply action "9TJQKA" +action: 33 + +# State 70 +# Apply action "Pass" +action: 0 + +# State 71 +# Apply action "Pass" +action: 0 + +# State 72 +# Apply action "44" +action: 56 + +# State 73 +# Apply action "Pass" +action: 0 + +# State 74 +# Apply action "22" +action: 67 + +# State 75 +# Apply action "Pass" +action: 0 + +# State 76 +# Apply action "Pass" +action: 0 + +# State 77 +# Apply action "7" +action: 8 + +# State 78 +# Apply action "Pass" +action: 0 + +# State 79 +# Apply action "J" +action: 12 + +# State 80 +# Apply action "K" +action: 14 + +# State 81 +# 3 3 +# 4 4 +# +# 6 +# 77 +# 8 88 +# 99 +# T +# J J +# +# +# A +# 22 +# +# +# 33 +# +# +# 66 +# 7 +# 8 +# 9 +# T +# +# +# K +# +# +# +# +# Bidding phase begin +# Player 2 played Pass +# Player 0 played Bid 3 +# Playing phase begin +# Player 0 played T +# Player 1 played K +# Player 2 played Pass +# Player 0 played A +# Player 1 played (CJ) +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 555A +# Player 2 played 5QQQ +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 6 +# Player 0 played (BWJ) +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 9TJQKA +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 44 +# Player 1 played Pass +# Player 2 played 22 +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 7 +# Player 0 played Pass +# Player 1 played J +# Player 2 played K +IsTerminal() = False +History() = [5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18, 0, 0, 171, 261, 0, 0, 7, 17, 0, 0, 33, 0, 0, 56, 0, 67, 0, 0, 8, 0, 12, 14] +HistoryString() = "5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18, 0, 0, 171, 261, 0, 0, 7, 17, 0, 0, 33, 0, 0, 56, 0, 67, 0, 0, 8, 0, 12, 14" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "My hand 3366789TK\nPlayed cards 445555679TTJJQQQQKKKAAA22(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 0" +ObservationString(1) = "My hand 346778JA22\nPlayed cards 445555679TTJJQQQQKKKAAA22(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 1" +ObservationString(2) = "My hand 348899TJ\nPlayed cards 445555679TTJJQQQQKKKAAA22(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 2" +ObservationTensor(0): ◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◉◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◉◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 0 + +# State 82 +# Apply action "A" +action: 15 + +# State 83 +# 3 3 +# 4 4 +# +# 6 +# 77 +# 8 88 +# 99 +# T +# J J +# +# +# +# 22 +# +# +# 33 +# +# +# 66 +# 7 +# 8 +# 9 +# T +# +# +# K +# +# +# +# +# Bidding phase begin +# Player 2 played Pass +# Player 0 played Bid 3 +# Playing phase begin +# Player 0 played T +# Player 1 played K +# Player 2 played Pass +# Player 0 played A +# Player 1 played (CJ) +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 555A +# Player 2 played 5QQQ +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 6 +# Player 0 played (BWJ) +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 9TJQKA +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 44 +# Player 1 played Pass +# Player 2 played 22 +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 7 +# Player 0 played Pass +# Player 1 played J +# Player 2 played K +# Player 0 played Pass +# Player 1 played A +IsTerminal() = False +History() = [5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18, 0, 0, 171, 261, 0, 0, 7, 17, 0, 0, 33, 0, 0, 56, 0, 67, 0, 0, 8, 0, 12, 14, 0, 15] +HistoryString() = "5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18, 0, 0, 171, 261, 0, 0, 7, 17, 0, 0, 33, 0, 0, 56, 0, 67, 0, 0, 8, 0, 12, 14, 0, 15" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "My hand 3366789TK\nPlayed cards 445555679TTJJQQQQKKKAAAA22(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 0" +ObservationString(1) = "My hand 346778J22\nPlayed cards 445555679TTJJQQQQKKKAAAA22(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 1" +ObservationString(2) = "My hand 348899TJ\nPlayed cards 445555679TTJJQQQQKKKAAAA22(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 2" +ObservationTensor(0): ◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◉◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 0 + +# State 84 +# Apply action "Pass" +action: 0 + +# State 85 +# 3 3 +# 4 4 +# +# 6 +# 77 +# 8 88 +# 99 +# T +# J J +# +# +# +# 22 +# +# +# 33 +# +# +# 66 +# 7 +# 8 +# 9 +# T +# +# +# K +# +# +# +# +# Bidding phase begin +# Player 2 played Pass +# Player 0 played Bid 3 +# Playing phase begin +# Player 0 played T +# Player 1 played K +# Player 2 played Pass +# Player 0 played A +# Player 1 played (CJ) +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 555A +# Player 2 played 5QQQ +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 6 +# Player 0 played (BWJ) +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 9TJQKA +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 44 +# Player 1 played Pass +# Player 2 played 22 +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 7 +# Player 0 played Pass +# Player 1 played J +# Player 2 played K +# Player 0 played Pass +# Player 1 played A +# Player 2 played Pass +# Player 0 played Pass +IsTerminal() = False +History() = [5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18, 0, 0, 171, 261, 0, 0, 7, 17, 0, 0, 33, 0, 0, 56, 0, 67, 0, 0, 8, 0, 12, 14, 0, 15, 0, 0] +HistoryString() = "5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18, 0, 0, 171, 261, 0, 0, 7, 17, 0, 0, 33, 0, 0, 56, 0, 67, 0, 0, 8, 0, 12, 14, 0, 15, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "My hand 3366789TK\nPlayed cards 445555679TTJJQQQQKKKAAAA22(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 0" +ObservationString(1) = "My hand 346778J22\nPlayed cards 445555679TTJJQQQQKKKAAAA22(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 1" +ObservationString(2) = "My hand 348899TJ\nPlayed cards 445555679TTJJQQQQKKKAAAA22(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 2" +ObservationTensor(0): ◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◉◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◉◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [4, 5, 7, 8, 9, 12, 16, 59, 67] +StringLegalActions() = ["3", "4", "6", "7", "8", "J", "2", "77", "22"] + +# Apply action "8" +action: 9 + +# State 86 +# Apply action "J" +action: 12 + +# State 87 +# Apply action "K" +action: 14 + +# State 88 +# Apply action "2" +action: 16 + +# State 89 +# Apply action "Pass" +action: 0 + +# State 90 +# Apply action "Pass" +action: 0 + +# State 91 +# Apply action "7" +action: 8 + +# State 92 +# Apply action "T" +action: 11 + +# State 93 +# Apply action "Pass" +action: 0 + +# State 94 +# Apply action "2" +action: 16 + +# State 95 +# Apply action "Pass" +action: 0 + +# State 96 +# Apply action "Pass" +action: 0 + +# State 97 +# Apply action "6" +action: 7 + +# State 98 +# Apply action "Pass" +action: 0 + +# State 99 +# Apply action "T" +action: 11 + +# State 100 +# Apply action "Pass" +action: 0 + +# State 101 +# Apply action "Pass" +action: 0 + +# State 102 +# Apply action "66" +action: 58 + +# State 103 +# Apply action "Pass" +action: 0 + +# State 104 +# Apply action "99" +action: 61 + +# State 105 +# Apply action "Pass" +action: 0 + +# State 106 +# Apply action "Pass" +action: 0 + +# State 107 +# Apply action "8" +action: 9 + +# State 108 +# Apply action "9" +action: 10 + +# State 109 +# Apply action "J" +action: 12 + +# State 110 +# Apply action "Pass" +action: 0 + +# State 111 +# 3 3 +# 4 4 +# +# +# 7 +# 8 +# +# +# +# +# +# +# +# +# +# 33 +# +# +# +# 7 +# 8 +# +# +# +# +# +# +# +# +# +# Bidding phase begin +# Player 2 played Pass +# Player 0 played Bid 3 +# Playing phase begin +# Player 0 played T +# Player 1 played K +# Player 2 played Pass +# Player 0 played A +# Player 1 played (CJ) +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 555A +# Player 2 played 5QQQ +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 6 +# Player 0 played (BWJ) +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 9TJQKA +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 44 +# Player 1 played Pass +# Player 2 played 22 +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 7 +# Player 0 played Pass +# Player 1 played J +# Player 2 played K +# Player 0 played Pass +# Player 1 played A +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 8 +# Player 2 played J +# Player 0 played K +# Player 1 played 2 +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 7 +# Player 2 played T +# Player 0 played Pass +# Player 1 played 2 +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 6 +# Player 2 played Pass +# Player 0 played T +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 66 +# Player 1 played Pass +# Player 2 played 99 +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 8 +# Player 0 played 9 +# Player 1 played J +# Player 2 played Pass +IsTerminal() = False +History() = [5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18, 0, 0, 171, 261, 0, 0, 7, 17, 0, 0, 33, 0, 0, 56, 0, 67, 0, 0, 8, 0, 12, 14, 0, 15, 0, 0, 9, 12, 14, 16, 0, 0, 8, 11, 0, 16, 0, 0, 7, 0, 11, 0, 0, 58, 0, 61, 0, 0, 9, 10, 12, 0] +HistoryString() = "5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18, 0, 0, 171, 261, 0, 0, 7, 17, 0, 0, 33, 0, 0, 56, 0, 67, 0, 0, 8, 0, 12, 14, 0, 15, 0, 0, 9, 12, 14, 16, 0, 0, 8, 11, 0, 16, 0, 0, 7, 0, 11, 0, 0, 58, 0, 61, 0, 0, 9, 10, 12, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "My hand 3378\nPlayed cards 445555666677889999TTTTJJJJQQQQKKKKAAAA2222(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 0" +ObservationString(1) = "My hand 347\nPlayed cards 445555666677889999TTTTJJJJQQQQKKKKAAAA2222(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 1" +ObservationString(2) = "My hand 348\nPlayed cards 445555666677889999TTTTJJJJQQQQKKKKAAAA2222(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 2" +ObservationTensor(0): ◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◉◯◯◉◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◉◯◯◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 0 + +# State 112 +# Apply action "4" +action: 5 + +# State 113 +# 3 3 +# 4 +# +# +# 7 +# 8 +# +# +# +# +# +# +# +# +# +# 33 +# +# +# +# 7 +# 8 +# +# +# +# +# +# +# +# +# +# Bidding phase begin +# Player 2 played Pass +# Player 0 played Bid 3 +# Playing phase begin +# Player 0 played T +# Player 1 played K +# Player 2 played Pass +# Player 0 played A +# Player 1 played (CJ) +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 555A +# Player 2 played 5QQQ +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 6 +# Player 0 played (BWJ) +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 9TJQKA +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 44 +# Player 1 played Pass +# Player 2 played 22 +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 7 +# Player 0 played Pass +# Player 1 played J +# Player 2 played K +# Player 0 played Pass +# Player 1 played A +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 8 +# Player 2 played J +# Player 0 played K +# Player 1 played 2 +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 7 +# Player 2 played T +# Player 0 played Pass +# Player 1 played 2 +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 6 +# Player 2 played Pass +# Player 0 played T +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 66 +# Player 1 played Pass +# Player 2 played 99 +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 8 +# Player 0 played 9 +# Player 1 played J +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 4 +IsTerminal() = False +History() = [5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18, 0, 0, 171, 261, 0, 0, 7, 17, 0, 0, 33, 0, 0, 56, 0, 67, 0, 0, 8, 0, 12, 14, 0, 15, 0, 0, 9, 12, 14, 16, 0, 0, 8, 11, 0, 16, 0, 0, 7, 0, 11, 0, 0, 58, 0, 61, 0, 0, 9, 10, 12, 0, 0, 5] +HistoryString() = "5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18, 0, 0, 171, 261, 0, 0, 7, 17, 0, 0, 33, 0, 0, 56, 0, 67, 0, 0, 8, 0, 12, 14, 0, 15, 0, 0, 9, 12, 14, 16, 0, 0, 8, 11, 0, 16, 0, 0, 7, 0, 11, 0, 0, 58, 0, 61, 0, 0, 9, 10, 12, 0, 0, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "My hand 3378\nPlayed cards 4445555666677889999TTTTJJJJQQQQKKKKAAAA2222(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 0" +ObservationString(1) = "My hand 37\nPlayed cards 4445555666677889999TTTTJJJJQQQQKKKKAAAA2222(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 1" +ObservationString(2) = "My hand 348\nPlayed cards 4445555666677889999TTTTJJJJQQQQKKKKAAAA2222(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 2" +ObservationTensor(0): ◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◉◯◯◉◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◉◯◯◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 9] +StringLegalActions() = ["Pass", "8"] + +# Apply action "Pass" +action: 0 + +# State 114 +# Apply action "Pass" +action: 0 + +# State 115 +# 3 3 +# 4 +# +# +# 7 +# 8 +# +# +# +# +# +# +# +# +# +# 33 +# +# +# +# 7 +# 8 +# +# +# +# +# +# +# +# +# +# Bidding phase begin +# Player 2 played Pass +# Player 0 played Bid 3 +# Playing phase begin +# Player 0 played T +# Player 1 played K +# Player 2 played Pass +# Player 0 played A +# Player 1 played (CJ) +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 555A +# Player 2 played 5QQQ +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 6 +# Player 0 played (BWJ) +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 9TJQKA +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 44 +# Player 1 played Pass +# Player 2 played 22 +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 7 +# Player 0 played Pass +# Player 1 played J +# Player 2 played K +# Player 0 played Pass +# Player 1 played A +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 8 +# Player 2 played J +# Player 0 played K +# Player 1 played 2 +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 7 +# Player 2 played T +# Player 0 played Pass +# Player 1 played 2 +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 6 +# Player 2 played Pass +# Player 0 played T +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 66 +# Player 1 played Pass +# Player 2 played 99 +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 8 +# Player 0 played 9 +# Player 1 played J +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 4 +# Player 2 played Pass +# Player 0 played Pass +IsTerminal() = False +History() = [5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18, 0, 0, 171, 261, 0, 0, 7, 17, 0, 0, 33, 0, 0, 56, 0, 67, 0, 0, 8, 0, 12, 14, 0, 15, 0, 0, 9, 12, 14, 16, 0, 0, 8, 11, 0, 16, 0, 0, 7, 0, 11, 0, 0, 58, 0, 61, 0, 0, 9, 10, 12, 0, 0, 5, 0, 0] +HistoryString() = "5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18, 0, 0, 171, 261, 0, 0, 7, 17, 0, 0, 33, 0, 0, 56, 0, 67, 0, 0, 8, 0, 12, 14, 0, 15, 0, 0, 9, 12, 14, 16, 0, 0, 8, 11, 0, 16, 0, 0, 7, 0, 11, 0, 0, 58, 0, 61, 0, 0, 9, 10, 12, 0, 0, 5, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "My hand 3378\nPlayed cards 4445555666677889999TTTTJJJJQQQQKKKKAAAA2222(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 0" +ObservationString(1) = "My hand 37\nPlayed cards 4445555666677889999TTTTJJJJQQQQKKKKAAAA2222(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 1" +ObservationString(2) = "My hand 348\nPlayed cards 4445555666677889999TTTTJJJJQQQQKKKKAAAA2222(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 2" +ObservationTensor(0): ◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◉◯◯◉◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◉◯◯◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [4, 8] +StringLegalActions() = ["3", "7"] + +# Apply action "7" +action: 8 + +# State 116 +# Apply action "Pass" +action: 0 + +# State 117 +# Apply action "Pass" +action: 0 + +# State 118 +# Apply action "3" +action: 4 + +# State 119 +# 3 3 +# 4 44 +# 5 +# 6 66 +# 7 +# 88 8 +# 99 99 +# T TT +# J J +# QQQ Q +# K KK +# AA +# 22 +# (BWJ) +# +# 33 +# 4 +# 555 +# 6 +# 777 +# 8 +# +# T +# JJ +# +# K +# AA +# 22 +# +# (CJ) +# Bidding phase begin +# Player 2 played Pass +# Player 0 played Bid 3 +# Playing phase begin +# Player 0 played T +# Player 1 played K +# Player 2 played Pass +# Player 0 played A +# Player 1 played (CJ) +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 555A +# Player 2 played 5QQQ +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 6 +# Player 0 played (BWJ) +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 9TJQKA +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 44 +# Player 1 played Pass +# Player 2 played 22 +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 7 +# Player 0 played Pass +# Player 1 played J +# Player 2 played K +# Player 0 played Pass +# Player 1 played A +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 8 +# Player 2 played J +# Player 0 played K +# Player 1 played 2 +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 7 +# Player 2 played T +# Player 0 played Pass +# Player 1 played 2 +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 6 +# Player 2 played Pass +# Player 0 played T +# Player 1 played Pass +# Player 2 played Pass +# Player 0 played 66 +# Player 1 played Pass +# Player 2 played 99 +# Player 0 played Pass +# Player 1 played Pass +# Player 2 played 8 +# Player 0 played 9 +# Player 1 played J +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 4 +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 7 +# Player 2 played Pass +# Player 0 played Pass +# Player 1 played 3 +# The results are: +# Player 0 got -6.000000 +# Player 1 got 3.000000 +# Player 2 got 3.000000 +IsTerminal() = True +History() = [5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18, 0, 0, 171, 261, 0, 0, 7, 17, 0, 0, 33, 0, 0, 56, 0, 67, 0, 0, 8, 0, 12, 14, 0, 15, 0, 0, 9, 12, 14, 16, 0, 0, 8, 11, 0, 16, 0, 0, 7, 0, 11, 0, 0, 58, 0, 61, 0, 0, 9, 10, 12, 0, 0, 5, 0, 0, 8, 0, 0, 4] +HistoryString() = "5, 65, 79, 96, 83, 81, 89, 87, 104, 59, 61, 69, 70, 57, 80, 60, 67, 72, 51, 91, 98, 58, 64, 63, 100, 103, 78, 55, 97, 90, 52, 101, 92, 66, 85, 88, 54, 93, 53, 95, 82, 75, 102, 86, 74, 56, 84, 68, 99, 62, 76, 73, 0, 3, 11, 14, 0, 15, 18, 0, 0, 171, 261, 0, 0, 7, 17, 0, 0, 33, 0, 0, 56, 0, 67, 0, 0, 8, 0, 12, 14, 0, 15, 0, 0, 9, 12, 14, 16, 0, 0, 8, 11, 0, 16, 0, 0, 7, 0, 11, 0, 0, 58, 0, 61, 0, 0, 9, 10, 12, 0, 0, 5, 0, 0, 8, 0, 0, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "My hand 3378\nPlayed cards 344455556666777889999TTTTJJJJQQQQKKKKAAAA2222(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 0" +ObservationString(1) = "My hand \nPlayed cards 344455556666777889999TTTTJJJJQQQQKKKKAAAA2222(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 1" +ObservationString(2) = "My hand 348\nPlayed cards 344455556666777889999TTTTJJJJQQQQKKKKAAAA2222(BWJ)(CJ)\nface up card rank: 12start player: 2My position from Dizhu: 2" +ObservationTensor(0): ◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◉◯◯◉◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◉◯◯◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◉◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +Rewards() = [-6, 3, 3] +Returns() = [-6, 3, 3] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/einstein_wurfelt_nicht.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/einstein_wurfelt_nicht.txt new file mode 100644 index 0000000..b417120 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/einstein_wurfelt_nicht.txt @@ -0,0 +1,376 @@ +game: einstein_wurfelt_nicht + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "einstein_wurfelt_nicht" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "einstein_wurfelt_nicht" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 300 +PolicyTensorShape() = [300] +MaxChanceOutcomes() = 720 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [300] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 300 +MaxGameLength() = 112 +ToString() = "einstein_wurfelt_nicht()" + +# State 0 +# |__||__||__||__||__| +# |__||__||__||__||__| +# |__||__||__||__||__| +# |__||__||__||__||__| +# |__||__||__||__||__| +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "|__||__||__||__||__|\n|__||__||__||__||__|\n|__||__||__||__||__|\n|__||__||__||__||__|\n|__||__||__||__||__|\n" +ObservationString(1) = "|__||__||__||__||__|\n|__||__||__||__||__|\n|__||__||__||__||__|\n|__||__||__||__||__|\n|__||__||__||__||__|\n" +ObservationTensor(0): zeros(300) +ObservationTensor(1): zeros(300) +ChanceOutcomes() = [(0,0.00138889), (1,0.00138889), (2,0.00138889), (3,0.00138889), (4,0.00138889), (5,0.00138889), (6,0.00138889), (7,0.00138889), (8,0.00138889), (9,0.00138889), (10,0.00138889), (11,0.00138889), (12,0.00138889), (13,0.00138889), (14,0.00138889), (15,0.00138889), (16,0.00138889), (17,0.00138889), (18,0.00138889), (19,0.00138889), (20,0.00138889), (21,0.00138889), (22,0.00138889), (23,0.00138889), (24,0.00138889), (25,0.00138889), (26,0.00138889), (27,0.00138889), (28,0.00138889), (29,0.00138889), (30,0.00138889), (31,0.00138889), (32,0.00138889), (33,0.00138889), (34,0.00138889), (35,0.00138889), (36,0.00138889), (37,0.00138889), (38,0.00138889), (39,0.00138889), (40,0.00138889), (41,0.00138889), (42,0.00138889), (43,0.00138889), (44,0.00138889), (45,0.00138889), (46,0.00138889), (47,0.00138889), (48,0.00138889), (49,0.00138889), (50,0.00138889), (51,0.00138889), (52,0.00138889), (53,0.00138889), (54,0.00138889), (55,0.00138889), (56,0.00138889), (57,0.00138889), (58,0.00138889), (59,0.00138889), (60,0.00138889), (61,0.00138889), (62,0.00138889), (63,0.00138889), (64,0.00138889), (65,0.00138889), (66,0.00138889), (67,0.00138889), (68,0.00138889), (69,0.00138889), (70,0.00138889), (71,0.00138889), (72,0.00138889), (73,0.00138889), (74,0.00138889), (75,0.00138889), (76,0.00138889), (77,0.00138889), (78,0.00138889), (79,0.00138889), (80,0.00138889), (81,0.00138889), (82,0.00138889), (83,0.00138889), (84,0.00138889), (85,0.00138889), (86,0.00138889), (87,0.00138889), (88,0.00138889), (89,0.00138889), (90,0.00138889), (91,0.00138889), (92,0.00138889), (93,0.00138889), (94,0.00138889), (95,0.00138889), (96,0.00138889), (97,0.00138889), (98,0.00138889), (99,0.00138889), (100,0.00138889), (101,0.00138889), (102,0.00138889), (103,0.00138889), (104,0.00138889), (105,0.00138889), (106,0.00138889), (107,0.00138889), (108,0.00138889), (109,0.00138889), (110,0.00138889), (111,0.00138889), (112,0.00138889), (113,0.00138889), (114,0.00138889), (115,0.00138889), (116,0.00138889), (117,0.00138889), (118,0.00138889), (119,0.00138889), (120,0.00138889), (121,0.00138889), (122,0.00138889), (123,0.00138889), (124,0.00138889), (125,0.00138889), (126,0.00138889), (127,0.00138889), (128,0.00138889), (129,0.00138889), (130,0.00138889), (131,0.00138889), (132,0.00138889), (133,0.00138889), (134,0.00138889), (135,0.00138889), (136,0.00138889), (137,0.00138889), (138,0.00138889), (139,0.00138889), (140,0.00138889), (141,0.00138889), (142,0.00138889), (143,0.00138889), (144,0.00138889), (145,0.00138889), (146,0.00138889), (147,0.00138889), (148,0.00138889), (149,0.00138889), (150,0.00138889), (151,0.00138889), (152,0.00138889), (153,0.00138889), (154,0.00138889), (155,0.00138889), (156,0.00138889), (157,0.00138889), (158,0.00138889), (159,0.00138889), (160,0.00138889), (161,0.00138889), (162,0.00138889), (163,0.00138889), (164,0.00138889), (165,0.00138889), (166,0.00138889), (167,0.00138889), (168,0.00138889), (169,0.00138889), (170,0.00138889), (171,0.00138889), (172,0.00138889), (173,0.00138889), (174,0.00138889), (175,0.00138889), (176,0.00138889), (177,0.00138889), (178,0.00138889), (179,0.00138889), (180,0.00138889), (181,0.00138889), (182,0.00138889), (183,0.00138889), (184,0.00138889), (185,0.00138889), (186,0.00138889), (187,0.00138889), (188,0.00138889), (189,0.00138889), (190,0.00138889), (191,0.00138889), (192,0.00138889), (193,0.00138889), (194,0.00138889), (195,0.00138889), (196,0.00138889), (197,0.00138889), (198,0.00138889), (199,0.00138889), (200,0.00138889), (201,0.00138889), (202,0.00138889), (203,0.00138889), (204,0.00138889), (205,0.00138889), (206,0.00138889), (207,0.00138889), (208,0.00138889), (209,0.00138889), (210,0.00138889), (211,0.00138889), (212,0.00138889), (213,0.00138889), (214,0.00138889), (215,0.00138889), (216,0.00138889), (217,0.00138889), (218,0.00138889), (219,0.00138889), (220,0.00138889), (221,0.00138889), (222,0.00138889), (223,0.00138889), (224,0.00138889), (225,0.00138889), (226,0.00138889), (227,0.00138889), (228,0.00138889), (229,0.00138889), (230,0.00138889), (231,0.00138889), (232,0.00138889), (233,0.00138889), (234,0.00138889), (235,0.00138889), (236,0.00138889), (237,0.00138889), (238,0.00138889), (239,0.00138889), (240,0.00138889), (241,0.00138889), (242,0.00138889), (243,0.00138889), (244,0.00138889), (245,0.00138889), (246,0.00138889), (247,0.00138889), (248,0.00138889), (249,0.00138889), (250,0.00138889), (251,0.00138889), (252,0.00138889), (253,0.00138889), (254,0.00138889), (255,0.00138889), (256,0.00138889), (257,0.00138889), (258,0.00138889), (259,0.00138889), (260,0.00138889), (261,0.00138889), (262,0.00138889), (263,0.00138889), (264,0.00138889), (265,0.00138889), (266,0.00138889), (267,0.00138889), (268,0.00138889), (269,0.00138889), (270,0.00138889), (271,0.00138889), (272,0.00138889), (273,0.00138889), (274,0.00138889), (275,0.00138889), (276,0.00138889), (277,0.00138889), (278,0.00138889), (279,0.00138889), (280,0.00138889), (281,0.00138889), (282,0.00138889), (283,0.00138889), (284,0.00138889), (285,0.00138889), (286,0.00138889), (287,0.00138889), (288,0.00138889), (289,0.00138889), (290,0.00138889), (291,0.00138889), (292,0.00138889), (293,0.00138889), (294,0.00138889), (295,0.00138889), (296,0.00138889), (297,0.00138889), (298,0.00138889), (299,0.00138889), (300,0.00138889), (301,0.00138889), (302,0.00138889), (303,0.00138889), (304,0.00138889), (305,0.00138889), (306,0.00138889), (307,0.00138889), (308,0.00138889), (309,0.00138889), (310,0.00138889), (311,0.00138889), (312,0.00138889), (313,0.00138889), (314,0.00138889), (315,0.00138889), (316,0.00138889), (317,0.00138889), (318,0.00138889), (319,0.00138889), (320,0.00138889), (321,0.00138889), (322,0.00138889), (323,0.00138889), (324,0.00138889), (325,0.00138889), (326,0.00138889), (327,0.00138889), (328,0.00138889), (329,0.00138889), (330,0.00138889), (331,0.00138889), (332,0.00138889), (333,0.00138889), (334,0.00138889), (335,0.00138889), (336,0.00138889), (337,0.00138889), (338,0.00138889), (339,0.00138889), (340,0.00138889), (341,0.00138889), (342,0.00138889), (343,0.00138889), (344,0.00138889), (345,0.00138889), (346,0.00138889), (347,0.00138889), (348,0.00138889), (349,0.00138889), (350,0.00138889), (351,0.00138889), (352,0.00138889), (353,0.00138889), (354,0.00138889), (355,0.00138889), (356,0.00138889), (357,0.00138889), (358,0.00138889), (359,0.00138889), (360,0.00138889), (361,0.00138889), (362,0.00138889), (363,0.00138889), (364,0.00138889), (365,0.00138889), (366,0.00138889), (367,0.00138889), (368,0.00138889), (369,0.00138889), (370,0.00138889), (371,0.00138889), (372,0.00138889), (373,0.00138889), (374,0.00138889), (375,0.00138889), (376,0.00138889), (377,0.00138889), (378,0.00138889), (379,0.00138889), (380,0.00138889), (381,0.00138889), (382,0.00138889), (383,0.00138889), (384,0.00138889), (385,0.00138889), (386,0.00138889), (387,0.00138889), (388,0.00138889), (389,0.00138889), (390,0.00138889), (391,0.00138889), (392,0.00138889), (393,0.00138889), (394,0.00138889), (395,0.00138889), (396,0.00138889), (397,0.00138889), (398,0.00138889), (399,0.00138889), (400,0.00138889), (401,0.00138889), (402,0.00138889), (403,0.00138889), (404,0.00138889), (405,0.00138889), (406,0.00138889), (407,0.00138889), (408,0.00138889), (409,0.00138889), (410,0.00138889), (411,0.00138889), (412,0.00138889), (413,0.00138889), (414,0.00138889), (415,0.00138889), (416,0.00138889), (417,0.00138889), (418,0.00138889), (419,0.00138889), (420,0.00138889), (421,0.00138889), (422,0.00138889), (423,0.00138889), (424,0.00138889), (425,0.00138889), (426,0.00138889), (427,0.00138889), (428,0.00138889), (429,0.00138889), (430,0.00138889), (431,0.00138889), (432,0.00138889), (433,0.00138889), (434,0.00138889), (435,0.00138889), (436,0.00138889), (437,0.00138889), (438,0.00138889), (439,0.00138889), (440,0.00138889), (441,0.00138889), (442,0.00138889), (443,0.00138889), (444,0.00138889), (445,0.00138889), (446,0.00138889), (447,0.00138889), (448,0.00138889), (449,0.00138889), (450,0.00138889), (451,0.00138889), (452,0.00138889), (453,0.00138889), (454,0.00138889), (455,0.00138889), (456,0.00138889), (457,0.00138889), (458,0.00138889), (459,0.00138889), (460,0.00138889), (461,0.00138889), (462,0.00138889), (463,0.00138889), (464,0.00138889), (465,0.00138889), (466,0.00138889), (467,0.00138889), (468,0.00138889), (469,0.00138889), (470,0.00138889), (471,0.00138889), (472,0.00138889), (473,0.00138889), (474,0.00138889), (475,0.00138889), (476,0.00138889), (477,0.00138889), (478,0.00138889), (479,0.00138889), (480,0.00138889), (481,0.00138889), (482,0.00138889), (483,0.00138889), (484,0.00138889), (485,0.00138889), (486,0.00138889), (487,0.00138889), (488,0.00138889), (489,0.00138889), (490,0.00138889), (491,0.00138889), (492,0.00138889), (493,0.00138889), (494,0.00138889), (495,0.00138889), (496,0.00138889), (497,0.00138889), (498,0.00138889), (499,0.00138889), (500,0.00138889), (501,0.00138889), (502,0.00138889), (503,0.00138889), (504,0.00138889), (505,0.00138889), (506,0.00138889), (507,0.00138889), (508,0.00138889), (509,0.00138889), (510,0.00138889), (511,0.00138889), (512,0.00138889), (513,0.00138889), (514,0.00138889), (515,0.00138889), (516,0.00138889), (517,0.00138889), (518,0.00138889), (519,0.00138889), (520,0.00138889), (521,0.00138889), (522,0.00138889), (523,0.00138889), (524,0.00138889), (525,0.00138889), (526,0.00138889), (527,0.00138889), (528,0.00138889), (529,0.00138889), (530,0.00138889), (531,0.00138889), (532,0.00138889), (533,0.00138889), (534,0.00138889), (535,0.00138889), (536,0.00138889), (537,0.00138889), (538,0.00138889), (539,0.00138889), (540,0.00138889), (541,0.00138889), (542,0.00138889), (543,0.00138889), (544,0.00138889), (545,0.00138889), (546,0.00138889), (547,0.00138889), (548,0.00138889), (549,0.00138889), (550,0.00138889), (551,0.00138889), (552,0.00138889), (553,0.00138889), (554,0.00138889), (555,0.00138889), (556,0.00138889), (557,0.00138889), (558,0.00138889), (559,0.00138889), (560,0.00138889), (561,0.00138889), (562,0.00138889), (563,0.00138889), (564,0.00138889), (565,0.00138889), (566,0.00138889), (567,0.00138889), (568,0.00138889), (569,0.00138889), (570,0.00138889), (571,0.00138889), (572,0.00138889), (573,0.00138889), (574,0.00138889), (575,0.00138889), (576,0.00138889), (577,0.00138889), (578,0.00138889), (579,0.00138889), (580,0.00138889), (581,0.00138889), (582,0.00138889), (583,0.00138889), (584,0.00138889), (585,0.00138889), (586,0.00138889), (587,0.00138889), (588,0.00138889), (589,0.00138889), (590,0.00138889), (591,0.00138889), (592,0.00138889), (593,0.00138889), (594,0.00138889), (595,0.00138889), (596,0.00138889), (597,0.00138889), (598,0.00138889), (599,0.00138889), (600,0.00138889), (601,0.00138889), (602,0.00138889), (603,0.00138889), (604,0.00138889), (605,0.00138889), (606,0.00138889), (607,0.00138889), (608,0.00138889), (609,0.00138889), (610,0.00138889), (611,0.00138889), (612,0.00138889), (613,0.00138889), (614,0.00138889), (615,0.00138889), (616,0.00138889), (617,0.00138889), (618,0.00138889), (619,0.00138889), (620,0.00138889), (621,0.00138889), (622,0.00138889), (623,0.00138889), (624,0.00138889), (625,0.00138889), (626,0.00138889), (627,0.00138889), (628,0.00138889), (629,0.00138889), (630,0.00138889), (631,0.00138889), (632,0.00138889), (633,0.00138889), (634,0.00138889), (635,0.00138889), (636,0.00138889), (637,0.00138889), (638,0.00138889), (639,0.00138889), (640,0.00138889), (641,0.00138889), (642,0.00138889), (643,0.00138889), (644,0.00138889), (645,0.00138889), (646,0.00138889), (647,0.00138889), (648,0.00138889), (649,0.00138889), (650,0.00138889), (651,0.00138889), (652,0.00138889), (653,0.00138889), (654,0.00138889), (655,0.00138889), (656,0.00138889), (657,0.00138889), (658,0.00138889), (659,0.00138889), (660,0.00138889), (661,0.00138889), (662,0.00138889), (663,0.00138889), (664,0.00138889), (665,0.00138889), (666,0.00138889), (667,0.00138889), (668,0.00138889), (669,0.00138889), (670,0.00138889), (671,0.00138889), (672,0.00138889), (673,0.00138889), (674,0.00138889), (675,0.00138889), (676,0.00138889), (677,0.00138889), (678,0.00138889), (679,0.00138889), (680,0.00138889), (681,0.00138889), (682,0.00138889), (683,0.00138889), (684,0.00138889), (685,0.00138889), (686,0.00138889), (687,0.00138889), (688,0.00138889), (689,0.00138889), (690,0.00138889), (691,0.00138889), (692,0.00138889), (693,0.00138889), (694,0.00138889), (695,0.00138889), (696,0.00138889), (697,0.00138889), (698,0.00138889), (699,0.00138889), (700,0.00138889), (701,0.00138889), (702,0.00138889), (703,0.00138889), (704,0.00138889), (705,0.00138889), (706,0.00138889), (707,0.00138889), (708,0.00138889), (709,0.00138889), (710,0.00138889), (711,0.00138889), (712,0.00138889), (713,0.00138889), (714,0.00138889), (715,0.00138889), (716,0.00138889), (717,0.00138889), (718,0.00138889), (719,0.00138889)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719] +StringLegalActions() = ["Placing black cubes on the board - action 0", "Placing black cubes on the board - action 1", "Placing black cubes on the board - action 2", "Placing black cubes on the board - action 3", "Placing black cubes on the board - action 4", "Placing black cubes on the board - action 5", "Placing black cubes on the board - action 6", "Placing black cubes on the board - action 7", "Placing black cubes on the board - action 8", "Placing black cubes on the board - action 9", "Placing black cubes on the board - action 10", "Placing black cubes on the board - action 11", "Placing black cubes on the board - action 12", "Placing black cubes on the board - action 13", "Placing black cubes on the board - action 14", "Placing black cubes on the board - action 15", "Placing black cubes on the board - action 16", "Placing black cubes on the board - action 17", "Placing black cubes on the board - action 18", "Placing black cubes on the board - action 19", "Placing black cubes on the board - action 20", "Placing black cubes on the board - action 21", "Placing black cubes on the board - action 22", "Placing black cubes on the board - action 23", "Placing black cubes on the board - action 24", "Placing black cubes on the board - action 25", "Placing black cubes on the board - action 26", "Placing black cubes on the board - action 27", "Placing black cubes on the board - action 28", "Placing black cubes on the board - action 29", "Placing black cubes on the board - action 30", "Placing black cubes on the board - action 31", "Placing black cubes on the board - action 32", "Placing black cubes on the board - action 33", "Placing black cubes on the board - action 34", "Placing black cubes on the board - action 35", "Placing black cubes on the board - action 36", "Placing black cubes on the board - action 37", "Placing black cubes on the board - action 38", "Placing black cubes on the board - action 39", "Placing black cubes on the board - action 40", "Placing black cubes on the board - action 41", "Placing black cubes on the board - action 42", "Placing black cubes on the board - action 43", "Placing black cubes on the board - action 44", "Placing black cubes on the board - action 45", "Placing black cubes on the board - action 46", "Placing black cubes on the board - action 47", "Placing black cubes on the board - action 48", "Placing black cubes on the board - action 49", "Placing black cubes on the board - action 50", "Placing black cubes on the board - action 51", "Placing black cubes on the board - action 52", "Placing black cubes on the board - action 53", "Placing black cubes on the board - action 54", "Placing black cubes on the board - action 55", "Placing black cubes on the board - action 56", "Placing black cubes on the board - action 57", "Placing black cubes on the board - action 58", "Placing black cubes on the board - action 59", "Placing black cubes on the board - action 60", "Placing black cubes on the board - action 61", "Placing black cubes on the board - action 62", "Placing black cubes on the board - action 63", "Placing black cubes on the board - action 64", "Placing black cubes on the board - action 65", "Placing black cubes on the board - action 66", "Placing black cubes on the board - action 67", "Placing black cubes on the board - action 68", "Placing black cubes on the board - action 69", "Placing black cubes on the board - action 70", "Placing black cubes on the board - action 71", "Placing black cubes on the board - action 72", "Placing black cubes on the board - action 73", "Placing black cubes on the board - action 74", "Placing black cubes on the board - action 75", "Placing black cubes on the board - action 76", "Placing black cubes on the board - action 77", "Placing black cubes on the board - action 78", "Placing black cubes on the board - action 79", "Placing black cubes on the board - action 80", "Placing black cubes on the board - action 81", "Placing black cubes on the board - action 82", "Placing black cubes on the board - action 83", "Placing black cubes on the board - action 84", "Placing black cubes on the board - action 85", "Placing black cubes on the board - action 86", "Placing black cubes on the board - action 87", "Placing black cubes on the board - action 88", "Placing black cubes on the board - action 89", "Placing black cubes on the board - action 90", "Placing black cubes on the board - action 91", "Placing black cubes on the board - action 92", "Placing black cubes on the board - action 93", "Placing black cubes on the board - action 94", "Placing black cubes on the board - action 95", "Placing black cubes on the board - action 96", "Placing black cubes on the board - action 97", "Placing black cubes on the board - action 98", "Placing black cubes on the board - action 99", "Placing black cubes on the board - action 100", "Placing black cubes on the board - action 101", "Placing black cubes on the board - action 102", "Placing black cubes on the board - action 103", "Placing black cubes on the board - action 104", "Placing black cubes on the board - action 105", "Placing black cubes on the board - action 106", "Placing black cubes on the board - action 107", "Placing black cubes on the board - action 108", "Placing black cubes on the board - action 109", "Placing black cubes on the board - action 110", "Placing black cubes on the board - action 111", "Placing black cubes on the board - action 112", "Placing black cubes on the board - action 113", "Placing black cubes on the board - action 114", "Placing black cubes on the board - action 115", "Placing black cubes on the board - action 116", "Placing black cubes on the board - action 117", "Placing black cubes on the board - action 118", "Placing black cubes on the board - action 119", "Placing black cubes on the board - action 120", "Placing black cubes on the board - action 121", "Placing black cubes on the board - action 122", "Placing black cubes on the board - action 123", "Placing black cubes on the board - action 124", "Placing black cubes on the board - action 125", "Placing black cubes on the board - action 126", "Placing black cubes on the board - action 127", "Placing black cubes on the board - action 128", "Placing black cubes on the board - action 129", "Placing black cubes on the board - action 130", "Placing black cubes on the board - action 131", "Placing black cubes on the board - action 132", "Placing black cubes on the board - action 133", "Placing black cubes on the board - action 134", "Placing black cubes on the board - action 135", "Placing black cubes on the board - action 136", "Placing black cubes on the board - action 137", "Placing black cubes on the board - action 138", "Placing black cubes on the board - action 139", "Placing black cubes on the board - action 140", "Placing black cubes on the board - action 141", "Placing black cubes on the board - action 142", "Placing black cubes on the board - action 143", "Placing black cubes on the board - action 144", "Placing black cubes on the board - action 145", "Placing black cubes on the board - action 146", "Placing black cubes on the board - action 147", "Placing black cubes on the board - action 148", "Placing black cubes on the board - action 149", "Placing black cubes on the board - action 150", "Placing black cubes on the board - action 151", "Placing black cubes on the board - action 152", "Placing black cubes on the board - action 153", "Placing black cubes on the board - action 154", "Placing black cubes on the board - action 155", "Placing black cubes on the board - action 156", "Placing black cubes on the board - action 157", "Placing black cubes on the board - action 158", "Placing black cubes on the board - action 159", "Placing black cubes on the board - action 160", "Placing black cubes on the board - action 161", "Placing black cubes on the board - action 162", "Placing black cubes on the board - action 163", "Placing black cubes on the board - action 164", "Placing black cubes on the board - action 165", "Placing black cubes on the board - action 166", "Placing black cubes on the board - action 167", "Placing black cubes on the board - action 168", "Placing black cubes on the board - action 169", "Placing black cubes on the board - action 170", "Placing black cubes on the board - action 171", "Placing black cubes on the board - action 172", "Placing black cubes on the board - action 173", "Placing black cubes on the board - action 174", "Placing black cubes on the board - action 175", "Placing black cubes on the board - action 176", "Placing black cubes on the board - action 177", "Placing black cubes on the board - action 178", "Placing black cubes on the board - action 179", "Placing black cubes on the board - action 180", "Placing black cubes on the board - action 181", "Placing black cubes on the board - action 182", "Placing black cubes on the board - action 183", "Placing black cubes on the board - action 184", "Placing black cubes on the board - action 185", "Placing black cubes on the board - action 186", "Placing black cubes on the board - action 187", "Placing black cubes on the board - action 188", "Placing black cubes on the board - action 189", "Placing black cubes on the board - action 190", "Placing black cubes on the board - action 191", "Placing black cubes on the board - action 192", "Placing black cubes on the board - action 193", "Placing black cubes on the board - action 194", "Placing black cubes on the board - action 195", "Placing black cubes on the board - action 196", "Placing black cubes on the board - action 197", "Placing black cubes on the board - action 198", "Placing black cubes on the board - action 199", "Placing black cubes on the board - action 200", "Placing black cubes on the board - action 201", "Placing black cubes on the board - action 202", "Placing black cubes on the board - action 203", "Placing black cubes on the board - action 204", "Placing black cubes on the board - action 205", "Placing black cubes on the board - action 206", "Placing black cubes on the board - action 207", "Placing black cubes on the board - action 208", "Placing black cubes on the board - action 209", "Placing black cubes on the board - action 210", "Placing black cubes on the board - action 211", "Placing black cubes on the board - action 212", "Placing black cubes on the board - action 213", "Placing black cubes on the board - action 214", "Placing black cubes on the board - action 215", "Placing black cubes on the board - action 216", "Placing black cubes on the board - action 217", "Placing black cubes on the board - action 218", "Placing black cubes on the board - action 219", "Placing black cubes on the board - action 220", "Placing black cubes on the board - action 221", "Placing black cubes on the board - action 222", "Placing black cubes on the board - action 223", "Placing black cubes on the board - action 224", "Placing black cubes on the board - action 225", "Placing black cubes on the board - action 226", "Placing black cubes on the board - action 227", "Placing black cubes on the board - action 228", "Placing black cubes on the board - action 229", "Placing black cubes on the board - action 230", "Placing black cubes on the board - action 231", "Placing black cubes on the board - action 232", "Placing black cubes on the board - action 233", "Placing black cubes on the board - action 234", "Placing black cubes on the board - action 235", "Placing black cubes on the board - action 236", "Placing black cubes on the board - action 237", "Placing black cubes on the board - action 238", "Placing black cubes on the board - action 239", "Placing black cubes on the board - action 240", "Placing black cubes on the board - action 241", "Placing black cubes on the board - action 242", "Placing black cubes on the board - action 243", "Placing black cubes on the board - action 244", "Placing black cubes on the board - action 245", "Placing black cubes on the board - action 246", "Placing black cubes on the board - action 247", "Placing black cubes on the board - action 248", "Placing black cubes on the board - action 249", "Placing black cubes on the board - action 250", "Placing black cubes on the board - action 251", "Placing black cubes on the board - action 252", "Placing black cubes on the board - action 253", "Placing black cubes on the board - action 254", "Placing black cubes on the board - action 255", "Placing black cubes on the board - action 256", "Placing black cubes on the board - action 257", "Placing black cubes on the board - action 258", "Placing black cubes on the board - action 259", "Placing black cubes on the board - action 260", "Placing black cubes on the board - action 261", "Placing black cubes on the board - action 262", "Placing black cubes on the board - action 263", "Placing black cubes on the board - action 264", "Placing black cubes on the board - action 265", "Placing black cubes on the board - action 266", "Placing black cubes on the board - action 267", "Placing black cubes on the board - action 268", "Placing black cubes on the board - action 269", "Placing black cubes on the board - action 270", "Placing black cubes on the board - action 271", "Placing black cubes on the board - action 272", "Placing black cubes on the board - action 273", "Placing black cubes on the board - action 274", "Placing black cubes on the board - action 275", "Placing black cubes on the board - action 276", "Placing black cubes on the board - action 277", "Placing black cubes on the board - action 278", "Placing black cubes on the board - action 279", "Placing black cubes on the board - action 280", "Placing black cubes on the board - action 281", "Placing black cubes on the board - action 282", "Placing black cubes on the board - action 283", "Placing black cubes on the board - action 284", "Placing black cubes on the board - action 285", "Placing black cubes on the board - action 286", "Placing black cubes on the board - action 287", "Placing black cubes on the board - action 288", "Placing black cubes on the board - action 289", "Placing black cubes on the board - action 290", "Placing black cubes on the board - action 291", "Placing black cubes on the board - action 292", "Placing black cubes on the board - action 293", "Placing black cubes on the board - action 294", "Placing black cubes on the board - action 295", "Placing black cubes on the board - action 296", "Placing black cubes on the board - action 297", "Placing black cubes on the board - action 298", "Placing black cubes on the board - action 299", "Placing black cubes on the board - action 300", "Placing black cubes on the board - action 301", "Placing black cubes on the board - action 302", "Placing black cubes on the board - action 303", "Placing black cubes on the board - action 304", "Placing black cubes on the board - action 305", "Placing black cubes on the board - action 306", "Placing black cubes on the board - action 307", "Placing black cubes on the board - action 308", "Placing black cubes on the board - action 309", "Placing black cubes on the board - action 310", "Placing black cubes on the board - action 311", "Placing black cubes on the board - action 312", "Placing black cubes on the board - action 313", "Placing black cubes on the board - action 314", "Placing black cubes on the board - action 315", "Placing black cubes on the board - action 316", "Placing black cubes on the board - action 317", "Placing black cubes on the board - action 318", "Placing black cubes on the board - action 319", "Placing black cubes on the board - action 320", "Placing black cubes on the board - action 321", "Placing black cubes on the board - action 322", "Placing black cubes on the board - action 323", "Placing black cubes on the board - action 324", "Placing black cubes on the board - action 325", "Placing black cubes on the board - action 326", "Placing black cubes on the board - action 327", "Placing black cubes on the board - action 328", "Placing black cubes on the board - action 329", "Placing black cubes on the board - action 330", "Placing black cubes on the board - action 331", "Placing black cubes on the board - action 332", "Placing black cubes on the board - action 333", "Placing black cubes on the board - action 334", "Placing black cubes on the board - action 335", "Placing black cubes on the board - action 336", "Placing black cubes on the board - action 337", "Placing black cubes on the board - action 338", "Placing black cubes on the board - action 339", "Placing black cubes on the board - action 340", "Placing black cubes on the board - action 341", "Placing black cubes on the board - action 342", "Placing black cubes on the board - action 343", "Placing black cubes on the board - action 344", "Placing black cubes on the board - action 345", "Placing black cubes on the board - action 346", "Placing black cubes on the board - action 347", "Placing black cubes on the board - action 348", "Placing black cubes on the board - action 349", "Placing black cubes on the board - action 350", "Placing black cubes on the board - action 351", "Placing black cubes on the board - action 352", "Placing black cubes on the board - action 353", "Placing black cubes on the board - action 354", "Placing black cubes on the board - action 355", "Placing black cubes on the board - action 356", "Placing black cubes on the board - action 357", "Placing black cubes on the board - action 358", "Placing black cubes on the board - action 359", "Placing black cubes on the board - action 360", "Placing black cubes on the board - action 361", "Placing black cubes on the board - action 362", "Placing black cubes on the board - action 363", "Placing black cubes on the board - action 364", "Placing black cubes on the board - action 365", "Placing black cubes on the board - action 366", "Placing black cubes on the board - action 367", "Placing black cubes on the board - action 368", "Placing black cubes on the board - action 369", "Placing black cubes on the board - action 370", "Placing black cubes on the board - action 371", "Placing black cubes on the board - action 372", "Placing black cubes on the board - action 373", "Placing black cubes on the board - action 374", "Placing black cubes on the board - action 375", "Placing black cubes on the board - action 376", "Placing black cubes on the board - action 377", "Placing black cubes on the board - action 378", "Placing black cubes on the board - action 379", "Placing black cubes on the board - action 380", "Placing black cubes on the board - action 381", "Placing black cubes on the board - action 382", "Placing black cubes on the board - action 383", "Placing black cubes on the board - action 384", "Placing black cubes on the board - action 385", "Placing black cubes on the board - action 386", "Placing black cubes on the board - action 387", "Placing black cubes on the board - action 388", "Placing black cubes on the board - action 389", "Placing black cubes on the board - action 390", "Placing black cubes on the board - action 391", "Placing black cubes on the board - action 392", "Placing black cubes on the board - action 393", "Placing black cubes on the board - action 394", "Placing black cubes on the board - action 395", "Placing black cubes on the board - action 396", "Placing black cubes on the board - action 397", "Placing black cubes on the board - action 398", "Placing black cubes on the board - action 399", "Placing black cubes on the board - action 400", "Placing black cubes on the board - action 401", "Placing black cubes on the board - action 402", "Placing black cubes on the board - action 403", "Placing black cubes on the board - action 404", "Placing black cubes on the board - action 405", "Placing black cubes on the board - action 406", "Placing black cubes on the board - action 407", "Placing black cubes on the board - action 408", "Placing black cubes on the board - action 409", "Placing black cubes on the board - action 410", "Placing black cubes on the board - action 411", "Placing black cubes on the board - action 412", "Placing black cubes on the board - action 413", "Placing black cubes on the board - action 414", "Placing black cubes on the board - action 415", "Placing black cubes on the board - action 416", "Placing black cubes on the board - action 417", "Placing black cubes on the board - action 418", "Placing black cubes on the board - action 419", "Placing black cubes on the board - action 420", "Placing black cubes on the board - action 421", "Placing black cubes on the board - action 422", "Placing black cubes on the board - action 423", "Placing black cubes on the board - action 424", "Placing black cubes on the board - action 425", "Placing black cubes on the board - action 426", "Placing black cubes on the board - action 427", "Placing black cubes on the board - action 428", "Placing black cubes on the board - action 429", "Placing black cubes on the board - action 430", "Placing black cubes on the board - action 431", "Placing black cubes on the board - action 432", "Placing black cubes on the board - action 433", "Placing black cubes on the board - action 434", "Placing black cubes on the board - action 435", "Placing black cubes on the board - action 436", "Placing black cubes on the board - action 437", "Placing black cubes on the board - action 438", "Placing black cubes on the board - action 439", "Placing black cubes on the board - action 440", "Placing black cubes on the board - action 441", "Placing black cubes on the board - action 442", "Placing black cubes on the board - action 443", "Placing black cubes on the board - action 444", "Placing black cubes on the board - action 445", "Placing black cubes on the board - action 446", "Placing black cubes on the board - action 447", "Placing black cubes on the board - action 448", "Placing black cubes on the board - action 449", "Placing black cubes on the board - action 450", "Placing black cubes on the board - action 451", "Placing black cubes on the board - action 452", "Placing black cubes on the board - action 453", "Placing black cubes on the board - action 454", "Placing black cubes on the board - action 455", "Placing black cubes on the board - action 456", "Placing black cubes on the board - action 457", "Placing black cubes on the board - action 458", "Placing black cubes on the board - action 459", "Placing black cubes on the board - action 460", "Placing black cubes on the board - action 461", "Placing black cubes on the board - action 462", "Placing black cubes on the board - action 463", "Placing black cubes on the board - action 464", "Placing black cubes on the board - action 465", "Placing black cubes on the board - action 466", "Placing black cubes on the board - action 467", "Placing black cubes on the board - action 468", "Placing black cubes on the board - action 469", "Placing black cubes on the board - action 470", "Placing black cubes on the board - action 471", "Placing black cubes on the board - action 472", "Placing black cubes on the board - action 473", "Placing black cubes on the board - action 474", "Placing black cubes on the board - action 475", "Placing black cubes on the board - action 476", "Placing black cubes on the board - action 477", "Placing black cubes on the board - action 478", "Placing black cubes on the board - action 479", "Placing black cubes on the board - action 480", "Placing black cubes on the board - action 481", "Placing black cubes on the board - action 482", "Placing black cubes on the board - action 483", "Placing black cubes on the board - action 484", "Placing black cubes on the board - action 485", "Placing black cubes on the board - action 486", "Placing black cubes on the board - action 487", "Placing black cubes on the board - action 488", "Placing black cubes on the board - action 489", "Placing black cubes on the board - action 490", "Placing black cubes on the board - action 491", "Placing black cubes on the board - action 492", "Placing black cubes on the board - action 493", "Placing black cubes on the board - action 494", "Placing black cubes on the board - action 495", "Placing black cubes on the board - action 496", "Placing black cubes on the board - action 497", "Placing black cubes on the board - action 498", "Placing black cubes on the board - action 499", "Placing black cubes on the board - action 500", "Placing black cubes on the board - action 501", "Placing black cubes on the board - action 502", "Placing black cubes on the board - action 503", "Placing black cubes on the board - action 504", "Placing black cubes on the board - action 505", "Placing black cubes on the board - action 506", "Placing black cubes on the board - action 507", "Placing black cubes on the board - action 508", "Placing black cubes on the board - action 509", "Placing black cubes on the board - action 510", "Placing black cubes on the board - action 511", "Placing black cubes on the board - action 512", "Placing black cubes on the board - action 513", "Placing black cubes on the board - action 514", "Placing black cubes on the board - action 515", "Placing black cubes on the board - action 516", "Placing black cubes on the board - action 517", "Placing black cubes on the board - action 518", "Placing black cubes on the board - action 519", "Placing black cubes on the board - action 520", "Placing black cubes on the board - action 521", "Placing black cubes on the board - action 522", "Placing black cubes on the board - action 523", "Placing black cubes on the board - action 524", "Placing black cubes on the board - action 525", "Placing black cubes on the board - action 526", "Placing black cubes on the board - action 527", "Placing black cubes on the board - action 528", "Placing black cubes on the board - action 529", "Placing black cubes on the board - action 530", "Placing black cubes on the board - action 531", "Placing black cubes on the board - action 532", "Placing black cubes on the board - action 533", "Placing black cubes on the board - action 534", "Placing black cubes on the board - action 535", "Placing black cubes on the board - action 536", "Placing black cubes on the board - action 537", "Placing black cubes on the board - action 538", "Placing black cubes on the board - action 539", "Placing black cubes on the board - action 540", "Placing black cubes on the board - action 541", "Placing black cubes on the board - action 542", "Placing black cubes on the board - action 543", "Placing black cubes on the board - action 544", "Placing black cubes on the board - action 545", "Placing black cubes on the board - action 546", "Placing black cubes on the board - action 547", "Placing black cubes on the board - action 548", "Placing black cubes on the board - action 549", "Placing black cubes on the board - action 550", "Placing black cubes on the board - action 551", "Placing black cubes on the board - action 552", "Placing black cubes on the board - action 553", "Placing black cubes on the board - action 554", "Placing black cubes on the board - action 555", "Placing black cubes on the board - action 556", "Placing black cubes on the board - action 557", "Placing black cubes on the board - action 558", "Placing black cubes on the board - action 559", "Placing black cubes on the board - action 560", "Placing black cubes on the board - action 561", "Placing black cubes on the board - action 562", "Placing black cubes on the board - action 563", "Placing black cubes on the board - action 564", "Placing black cubes on the board - action 565", "Placing black cubes on the board - action 566", "Placing black cubes on the board - action 567", "Placing black cubes on the board - action 568", "Placing black cubes on the board - action 569", "Placing black cubes on the board - action 570", "Placing black cubes on the board - action 571", "Placing black cubes on the board - action 572", "Placing black cubes on the board - action 573", "Placing black cubes on the board - action 574", "Placing black cubes on the board - action 575", "Placing black cubes on the board - action 576", "Placing black cubes on the board - action 577", "Placing black cubes on the board - action 578", "Placing black cubes on the board - action 579", "Placing black cubes on the board - action 580", "Placing black cubes on the board - action 581", "Placing black cubes on the board - action 582", "Placing black cubes on the board - action 583", "Placing black cubes on the board - action 584", "Placing black cubes on the board - action 585", "Placing black cubes on the board - action 586", "Placing black cubes on the board - action 587", "Placing black cubes on the board - action 588", "Placing black cubes on the board - action 589", "Placing black cubes on the board - action 590", "Placing black cubes on the board - action 591", "Placing black cubes on the board - action 592", "Placing black cubes on the board - action 593", "Placing black cubes on the board - action 594", "Placing black cubes on the board - action 595", "Placing black cubes on the board - action 596", "Placing black cubes on the board - action 597", "Placing black cubes on the board - action 598", "Placing black cubes on the board - action 599", "Placing black cubes on the board - action 600", "Placing black cubes on the board - action 601", "Placing black cubes on the board - action 602", "Placing black cubes on the board - action 603", "Placing black cubes on the board - action 604", "Placing black cubes on the board - action 605", "Placing black cubes on the board - action 606", "Placing black cubes on the board - action 607", "Placing black cubes on the board - action 608", "Placing black cubes on the board - action 609", "Placing black cubes on the board - action 610", "Placing black cubes on the board - action 611", "Placing black cubes on the board - action 612", "Placing black cubes on the board - action 613", "Placing black cubes on the board - action 614", "Placing black cubes on the board - action 615", "Placing black cubes on the board - action 616", "Placing black cubes on the board - action 617", "Placing black cubes on the board - action 618", "Placing black cubes on the board - action 619", "Placing black cubes on the board - action 620", "Placing black cubes on the board - action 621", "Placing black cubes on the board - action 622", "Placing black cubes on the board - action 623", "Placing black cubes on the board - action 624", "Placing black cubes on the board - action 625", "Placing black cubes on the board - action 626", "Placing black cubes on the board - action 627", "Placing black cubes on the board - action 628", "Placing black cubes on the board - action 629", "Placing black cubes on the board - action 630", "Placing black cubes on the board - action 631", "Placing black cubes on the board - action 632", "Placing black cubes on the board - action 633", "Placing black cubes on the board - action 634", "Placing black cubes on the board - action 635", "Placing black cubes on the board - action 636", "Placing black cubes on the board - action 637", "Placing black cubes on the board - action 638", "Placing black cubes on the board - action 639", "Placing black cubes on the board - action 640", "Placing black cubes on the board - action 641", "Placing black cubes on the board - action 642", "Placing black cubes on the board - action 643", "Placing black cubes on the board - action 644", "Placing black cubes on the board - action 645", "Placing black cubes on the board - action 646", "Placing black cubes on the board - action 647", "Placing black cubes on the board - action 648", "Placing black cubes on the board - action 649", "Placing black cubes on the board - action 650", "Placing black cubes on the board - action 651", "Placing black cubes on the board - action 652", "Placing black cubes on the board - action 653", "Placing black cubes on the board - action 654", "Placing black cubes on the board - action 655", "Placing black cubes on the board - action 656", "Placing black cubes on the board - action 657", "Placing black cubes on the board - action 658", "Placing black cubes on the board - action 659", "Placing black cubes on the board - action 660", "Placing black cubes on the board - action 661", "Placing black cubes on the board - action 662", "Placing black cubes on the board - action 663", "Placing black cubes on the board - action 664", "Placing black cubes on the board - action 665", "Placing black cubes on the board - action 666", "Placing black cubes on the board - action 667", "Placing black cubes on the board - action 668", "Placing black cubes on the board - action 669", "Placing black cubes on the board - action 670", "Placing black cubes on the board - action 671", "Placing black cubes on the board - action 672", "Placing black cubes on the board - action 673", "Placing black cubes on the board - action 674", "Placing black cubes on the board - action 675", "Placing black cubes on the board - action 676", "Placing black cubes on the board - action 677", "Placing black cubes on the board - action 678", "Placing black cubes on the board - action 679", "Placing black cubes on the board - action 680", "Placing black cubes on the board - action 681", "Placing black cubes on the board - action 682", "Placing black cubes on the board - action 683", "Placing black cubes on the board - action 684", "Placing black cubes on the board - action 685", "Placing black cubes on the board - action 686", "Placing black cubes on the board - action 687", "Placing black cubes on the board - action 688", "Placing black cubes on the board - action 689", "Placing black cubes on the board - action 690", "Placing black cubes on the board - action 691", "Placing black cubes on the board - action 692", "Placing black cubes on the board - action 693", "Placing black cubes on the board - action 694", "Placing black cubes on the board - action 695", "Placing black cubes on the board - action 696", "Placing black cubes on the board - action 697", "Placing black cubes on the board - action 698", "Placing black cubes on the board - action 699", "Placing black cubes on the board - action 700", "Placing black cubes on the board - action 701", "Placing black cubes on the board - action 702", "Placing black cubes on the board - action 703", "Placing black cubes on the board - action 704", "Placing black cubes on the board - action 705", "Placing black cubes on the board - action 706", "Placing black cubes on the board - action 707", "Placing black cubes on the board - action 708", "Placing black cubes on the board - action 709", "Placing black cubes on the board - action 710", "Placing black cubes on the board - action 711", "Placing black cubes on the board - action 712", "Placing black cubes on the board - action 713", "Placing black cubes on the board - action 714", "Placing black cubes on the board - action 715", "Placing black cubes on the board - action 716", "Placing black cubes on the board - action 717", "Placing black cubes on the board - action 718", "Placing black cubes on the board - action 719"] + +# Apply action "Placing black cubes on the board - action 120" +action: 120 + +# State 1 +# |b2||b1||b3||__||__| +# |b4||b5||__||__||__| +# |b6||__||__||__||__| +# |__||__||__||__||__| +# |__||__||__||__||__| +IsTerminal() = False +History() = [120] +HistoryString() = "120" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "|b2||b1||b3||__||__|\n|b4||b5||__||__||__|\n|b6||__||__||__||__|\n|__||__||__||__||__|\n|__||__||__||__||__|\n" +ObservationString(1) = "|b2||b1||b3||__||__|\n|b4||b5||__||__||__|\n|b6||__||__||__||__|\n|__||__||__||__||__|\n|__||__||__||__||__|\n" +ObservationTensor(0): binvec(300, 0x40000000000200000000000000200000000010000000000000200000000000800000000000) +ObservationTensor(1): binvec(300, 0x40000000000200000000000000200000000010000000000000200000000000800000000000) +ChanceOutcomes() = [(0,0.00138889), (1,0.00138889), (2,0.00138889), (3,0.00138889), (4,0.00138889), (5,0.00138889), (6,0.00138889), (7,0.00138889), (8,0.00138889), (9,0.00138889), (10,0.00138889), (11,0.00138889), (12,0.00138889), (13,0.00138889), (14,0.00138889), (15,0.00138889), (16,0.00138889), (17,0.00138889), (18,0.00138889), (19,0.00138889), (20,0.00138889), (21,0.00138889), (22,0.00138889), (23,0.00138889), (24,0.00138889), (25,0.00138889), (26,0.00138889), (27,0.00138889), (28,0.00138889), (29,0.00138889), (30,0.00138889), (31,0.00138889), (32,0.00138889), (33,0.00138889), (34,0.00138889), (35,0.00138889), (36,0.00138889), (37,0.00138889), (38,0.00138889), (39,0.00138889), (40,0.00138889), (41,0.00138889), (42,0.00138889), (43,0.00138889), (44,0.00138889), (45,0.00138889), (46,0.00138889), (47,0.00138889), (48,0.00138889), (49,0.00138889), (50,0.00138889), (51,0.00138889), (52,0.00138889), (53,0.00138889), (54,0.00138889), (55,0.00138889), (56,0.00138889), (57,0.00138889), (58,0.00138889), (59,0.00138889), (60,0.00138889), (61,0.00138889), (62,0.00138889), (63,0.00138889), (64,0.00138889), (65,0.00138889), (66,0.00138889), (67,0.00138889), (68,0.00138889), (69,0.00138889), (70,0.00138889), (71,0.00138889), (72,0.00138889), (73,0.00138889), (74,0.00138889), (75,0.00138889), (76,0.00138889), (77,0.00138889), (78,0.00138889), (79,0.00138889), (80,0.00138889), (81,0.00138889), (82,0.00138889), (83,0.00138889), (84,0.00138889), (85,0.00138889), (86,0.00138889), (87,0.00138889), (88,0.00138889), (89,0.00138889), (90,0.00138889), (91,0.00138889), (92,0.00138889), (93,0.00138889), (94,0.00138889), (95,0.00138889), (96,0.00138889), (97,0.00138889), (98,0.00138889), (99,0.00138889), (100,0.00138889), (101,0.00138889), (102,0.00138889), (103,0.00138889), (104,0.00138889), (105,0.00138889), (106,0.00138889), (107,0.00138889), (108,0.00138889), (109,0.00138889), (110,0.00138889), (111,0.00138889), (112,0.00138889), (113,0.00138889), (114,0.00138889), (115,0.00138889), (116,0.00138889), (117,0.00138889), (118,0.00138889), (119,0.00138889), (120,0.00138889), (121,0.00138889), (122,0.00138889), (123,0.00138889), (124,0.00138889), (125,0.00138889), (126,0.00138889), (127,0.00138889), (128,0.00138889), (129,0.00138889), (130,0.00138889), (131,0.00138889), (132,0.00138889), (133,0.00138889), (134,0.00138889), (135,0.00138889), (136,0.00138889), (137,0.00138889), (138,0.00138889), (139,0.00138889), (140,0.00138889), (141,0.00138889), (142,0.00138889), (143,0.00138889), (144,0.00138889), (145,0.00138889), (146,0.00138889), (147,0.00138889), (148,0.00138889), (149,0.00138889), (150,0.00138889), (151,0.00138889), (152,0.00138889), (153,0.00138889), (154,0.00138889), (155,0.00138889), (156,0.00138889), (157,0.00138889), (158,0.00138889), (159,0.00138889), (160,0.00138889), (161,0.00138889), (162,0.00138889), (163,0.00138889), (164,0.00138889), (165,0.00138889), (166,0.00138889), (167,0.00138889), (168,0.00138889), (169,0.00138889), (170,0.00138889), (171,0.00138889), (172,0.00138889), (173,0.00138889), (174,0.00138889), (175,0.00138889), (176,0.00138889), (177,0.00138889), (178,0.00138889), (179,0.00138889), (180,0.00138889), (181,0.00138889), (182,0.00138889), (183,0.00138889), (184,0.00138889), (185,0.00138889), (186,0.00138889), (187,0.00138889), (188,0.00138889), (189,0.00138889), (190,0.00138889), (191,0.00138889), (192,0.00138889), (193,0.00138889), (194,0.00138889), (195,0.00138889), (196,0.00138889), (197,0.00138889), (198,0.00138889), (199,0.00138889), (200,0.00138889), (201,0.00138889), (202,0.00138889), (203,0.00138889), (204,0.00138889), (205,0.00138889), (206,0.00138889), (207,0.00138889), (208,0.00138889), (209,0.00138889), (210,0.00138889), (211,0.00138889), (212,0.00138889), (213,0.00138889), (214,0.00138889), (215,0.00138889), (216,0.00138889), (217,0.00138889), (218,0.00138889), (219,0.00138889), (220,0.00138889), (221,0.00138889), (222,0.00138889), (223,0.00138889), (224,0.00138889), (225,0.00138889), (226,0.00138889), (227,0.00138889), (228,0.00138889), (229,0.00138889), (230,0.00138889), (231,0.00138889), (232,0.00138889), (233,0.00138889), (234,0.00138889), (235,0.00138889), (236,0.00138889), (237,0.00138889), (238,0.00138889), (239,0.00138889), (240,0.00138889), (241,0.00138889), (242,0.00138889), (243,0.00138889), (244,0.00138889), (245,0.00138889), (246,0.00138889), (247,0.00138889), (248,0.00138889), (249,0.00138889), (250,0.00138889), (251,0.00138889), (252,0.00138889), (253,0.00138889), (254,0.00138889), (255,0.00138889), (256,0.00138889), (257,0.00138889), (258,0.00138889), (259,0.00138889), (260,0.00138889), (261,0.00138889), (262,0.00138889), (263,0.00138889), (264,0.00138889), (265,0.00138889), (266,0.00138889), (267,0.00138889), (268,0.00138889), (269,0.00138889), (270,0.00138889), (271,0.00138889), (272,0.00138889), (273,0.00138889), (274,0.00138889), (275,0.00138889), (276,0.00138889), (277,0.00138889), (278,0.00138889), (279,0.00138889), (280,0.00138889), (281,0.00138889), (282,0.00138889), (283,0.00138889), (284,0.00138889), (285,0.00138889), (286,0.00138889), (287,0.00138889), (288,0.00138889), (289,0.00138889), (290,0.00138889), (291,0.00138889), (292,0.00138889), (293,0.00138889), (294,0.00138889), (295,0.00138889), (296,0.00138889), (297,0.00138889), (298,0.00138889), (299,0.00138889), (300,0.00138889), (301,0.00138889), (302,0.00138889), (303,0.00138889), (304,0.00138889), (305,0.00138889), (306,0.00138889), (307,0.00138889), (308,0.00138889), (309,0.00138889), (310,0.00138889), (311,0.00138889), (312,0.00138889), (313,0.00138889), (314,0.00138889), (315,0.00138889), (316,0.00138889), (317,0.00138889), (318,0.00138889), (319,0.00138889), (320,0.00138889), (321,0.00138889), (322,0.00138889), (323,0.00138889), (324,0.00138889), (325,0.00138889), (326,0.00138889), (327,0.00138889), (328,0.00138889), (329,0.00138889), (330,0.00138889), (331,0.00138889), (332,0.00138889), (333,0.00138889), (334,0.00138889), (335,0.00138889), (336,0.00138889), (337,0.00138889), (338,0.00138889), (339,0.00138889), (340,0.00138889), (341,0.00138889), (342,0.00138889), (343,0.00138889), (344,0.00138889), (345,0.00138889), (346,0.00138889), (347,0.00138889), (348,0.00138889), (349,0.00138889), (350,0.00138889), (351,0.00138889), (352,0.00138889), (353,0.00138889), (354,0.00138889), (355,0.00138889), (356,0.00138889), (357,0.00138889), (358,0.00138889), (359,0.00138889), (360,0.00138889), (361,0.00138889), (362,0.00138889), (363,0.00138889), (364,0.00138889), (365,0.00138889), (366,0.00138889), (367,0.00138889), (368,0.00138889), (369,0.00138889), (370,0.00138889), (371,0.00138889), (372,0.00138889), (373,0.00138889), (374,0.00138889), (375,0.00138889), (376,0.00138889), (377,0.00138889), (378,0.00138889), (379,0.00138889), (380,0.00138889), (381,0.00138889), (382,0.00138889), (383,0.00138889), (384,0.00138889), (385,0.00138889), (386,0.00138889), (387,0.00138889), (388,0.00138889), (389,0.00138889), (390,0.00138889), (391,0.00138889), (392,0.00138889), (393,0.00138889), (394,0.00138889), (395,0.00138889), (396,0.00138889), (397,0.00138889), (398,0.00138889), (399,0.00138889), (400,0.00138889), (401,0.00138889), (402,0.00138889), (403,0.00138889), (404,0.00138889), (405,0.00138889), (406,0.00138889), (407,0.00138889), (408,0.00138889), (409,0.00138889), (410,0.00138889), (411,0.00138889), (412,0.00138889), (413,0.00138889), (414,0.00138889), (415,0.00138889), (416,0.00138889), (417,0.00138889), (418,0.00138889), (419,0.00138889), (420,0.00138889), (421,0.00138889), (422,0.00138889), (423,0.00138889), (424,0.00138889), (425,0.00138889), (426,0.00138889), (427,0.00138889), (428,0.00138889), (429,0.00138889), (430,0.00138889), (431,0.00138889), (432,0.00138889), (433,0.00138889), (434,0.00138889), (435,0.00138889), (436,0.00138889), (437,0.00138889), (438,0.00138889), (439,0.00138889), (440,0.00138889), (441,0.00138889), (442,0.00138889), (443,0.00138889), (444,0.00138889), (445,0.00138889), (446,0.00138889), (447,0.00138889), (448,0.00138889), (449,0.00138889), (450,0.00138889), (451,0.00138889), (452,0.00138889), (453,0.00138889), (454,0.00138889), (455,0.00138889), (456,0.00138889), (457,0.00138889), (458,0.00138889), (459,0.00138889), (460,0.00138889), (461,0.00138889), (462,0.00138889), (463,0.00138889), (464,0.00138889), (465,0.00138889), (466,0.00138889), (467,0.00138889), (468,0.00138889), (469,0.00138889), (470,0.00138889), (471,0.00138889), (472,0.00138889), (473,0.00138889), (474,0.00138889), (475,0.00138889), (476,0.00138889), (477,0.00138889), (478,0.00138889), (479,0.00138889), (480,0.00138889), (481,0.00138889), (482,0.00138889), (483,0.00138889), (484,0.00138889), (485,0.00138889), (486,0.00138889), (487,0.00138889), (488,0.00138889), (489,0.00138889), (490,0.00138889), (491,0.00138889), (492,0.00138889), (493,0.00138889), (494,0.00138889), (495,0.00138889), (496,0.00138889), (497,0.00138889), (498,0.00138889), (499,0.00138889), (500,0.00138889), (501,0.00138889), (502,0.00138889), (503,0.00138889), (504,0.00138889), (505,0.00138889), (506,0.00138889), (507,0.00138889), (508,0.00138889), (509,0.00138889), (510,0.00138889), (511,0.00138889), (512,0.00138889), (513,0.00138889), (514,0.00138889), (515,0.00138889), (516,0.00138889), (517,0.00138889), (518,0.00138889), (519,0.00138889), (520,0.00138889), (521,0.00138889), (522,0.00138889), (523,0.00138889), (524,0.00138889), (525,0.00138889), (526,0.00138889), (527,0.00138889), (528,0.00138889), (529,0.00138889), (530,0.00138889), (531,0.00138889), (532,0.00138889), (533,0.00138889), (534,0.00138889), (535,0.00138889), (536,0.00138889), (537,0.00138889), (538,0.00138889), (539,0.00138889), (540,0.00138889), (541,0.00138889), (542,0.00138889), (543,0.00138889), (544,0.00138889), (545,0.00138889), (546,0.00138889), (547,0.00138889), (548,0.00138889), (549,0.00138889), (550,0.00138889), (551,0.00138889), (552,0.00138889), (553,0.00138889), (554,0.00138889), (555,0.00138889), (556,0.00138889), (557,0.00138889), (558,0.00138889), (559,0.00138889), (560,0.00138889), (561,0.00138889), (562,0.00138889), (563,0.00138889), (564,0.00138889), (565,0.00138889), (566,0.00138889), (567,0.00138889), (568,0.00138889), (569,0.00138889), (570,0.00138889), (571,0.00138889), (572,0.00138889), (573,0.00138889), (574,0.00138889), (575,0.00138889), (576,0.00138889), (577,0.00138889), (578,0.00138889), (579,0.00138889), (580,0.00138889), (581,0.00138889), (582,0.00138889), (583,0.00138889), (584,0.00138889), (585,0.00138889), (586,0.00138889), (587,0.00138889), (588,0.00138889), (589,0.00138889), (590,0.00138889), (591,0.00138889), (592,0.00138889), (593,0.00138889), (594,0.00138889), (595,0.00138889), (596,0.00138889), (597,0.00138889), (598,0.00138889), (599,0.00138889), (600,0.00138889), (601,0.00138889), (602,0.00138889), (603,0.00138889), (604,0.00138889), (605,0.00138889), (606,0.00138889), (607,0.00138889), (608,0.00138889), (609,0.00138889), (610,0.00138889), (611,0.00138889), (612,0.00138889), (613,0.00138889), (614,0.00138889), (615,0.00138889), (616,0.00138889), (617,0.00138889), (618,0.00138889), (619,0.00138889), (620,0.00138889), (621,0.00138889), (622,0.00138889), (623,0.00138889), (624,0.00138889), (625,0.00138889), (626,0.00138889), (627,0.00138889), (628,0.00138889), (629,0.00138889), (630,0.00138889), (631,0.00138889), (632,0.00138889), (633,0.00138889), (634,0.00138889), (635,0.00138889), (636,0.00138889), (637,0.00138889), (638,0.00138889), (639,0.00138889), (640,0.00138889), (641,0.00138889), (642,0.00138889), (643,0.00138889), (644,0.00138889), (645,0.00138889), (646,0.00138889), (647,0.00138889), (648,0.00138889), (649,0.00138889), (650,0.00138889), (651,0.00138889), (652,0.00138889), (653,0.00138889), (654,0.00138889), (655,0.00138889), (656,0.00138889), (657,0.00138889), (658,0.00138889), (659,0.00138889), (660,0.00138889), (661,0.00138889), (662,0.00138889), (663,0.00138889), (664,0.00138889), (665,0.00138889), (666,0.00138889), (667,0.00138889), (668,0.00138889), (669,0.00138889), (670,0.00138889), (671,0.00138889), (672,0.00138889), (673,0.00138889), (674,0.00138889), (675,0.00138889), (676,0.00138889), (677,0.00138889), (678,0.00138889), (679,0.00138889), (680,0.00138889), (681,0.00138889), (682,0.00138889), (683,0.00138889), (684,0.00138889), (685,0.00138889), (686,0.00138889), (687,0.00138889), (688,0.00138889), (689,0.00138889), (690,0.00138889), (691,0.00138889), (692,0.00138889), (693,0.00138889), (694,0.00138889), (695,0.00138889), (696,0.00138889), (697,0.00138889), (698,0.00138889), (699,0.00138889), (700,0.00138889), (701,0.00138889), (702,0.00138889), (703,0.00138889), (704,0.00138889), (705,0.00138889), (706,0.00138889), (707,0.00138889), (708,0.00138889), (709,0.00138889), (710,0.00138889), (711,0.00138889), (712,0.00138889), (713,0.00138889), (714,0.00138889), (715,0.00138889), (716,0.00138889), (717,0.00138889), (718,0.00138889), (719,0.00138889)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719] +StringLegalActions() = ["Placing white cubes on the board - action 0", "Placing white cubes on the board - action 1", "Placing white cubes on the board - action 2", "Placing white cubes on the board - action 3", "Placing white cubes on the board - action 4", "Placing white cubes on the board - action 5", "Placing white cubes on the board - action 6", "Placing white cubes on the board - action 7", "Placing white cubes on the board - action 8", "Placing white cubes on the board - action 9", "Placing white cubes on the board - action 10", "Placing white cubes on the board - action 11", "Placing white cubes on the board - action 12", "Placing white cubes on the board - action 13", "Placing white cubes on the board - action 14", "Placing white cubes on the board - action 15", "Placing white cubes on the board - action 16", "Placing white cubes on the board - action 17", "Placing white cubes on the board - action 18", "Placing white cubes on the board - action 19", "Placing white cubes on the board - action 20", "Placing white cubes on the board - action 21", "Placing white cubes on the board - action 22", "Placing white cubes on the board - action 23", "Placing white cubes on the board - action 24", "Placing white cubes on the board - action 25", "Placing white cubes on the board - action 26", "Placing white cubes on the board - action 27", "Placing white cubes on the board - action 28", "Placing white cubes on the board - action 29", "Placing white cubes on the board - action 30", "Placing white cubes on the board - action 31", "Placing white cubes on the board - action 32", "Placing white cubes on the board - action 33", "Placing white cubes on the board - action 34", "Placing white cubes on the board - action 35", "Placing white cubes on the board - action 36", "Placing white cubes on the board - action 37", "Placing white cubes on the board - action 38", "Placing white cubes on the board - action 39", "Placing white cubes on the board - action 40", "Placing white cubes on the board - action 41", "Placing white cubes on the board - action 42", "Placing white cubes on the board - action 43", "Placing white cubes on the board - action 44", "Placing white cubes on the board - action 45", "Placing white cubes on the board - action 46", "Placing white cubes on the board - action 47", "Placing white cubes on the board - action 48", "Placing white cubes on the board - action 49", "Placing white cubes on the board - action 50", "Placing white cubes on the board - action 51", "Placing white cubes on the board - action 52", "Placing white cubes on the board - action 53", "Placing white cubes on the board - action 54", "Placing white cubes on the board - action 55", "Placing white cubes on the board - action 56", "Placing white cubes on the board - action 57", "Placing white cubes on the board - action 58", "Placing white cubes on the board - action 59", "Placing white cubes on the board - action 60", "Placing white cubes on the board - action 61", "Placing white cubes on the board - action 62", "Placing white cubes on the board - action 63", "Placing white cubes on the board - action 64", "Placing white cubes on the board - action 65", "Placing white cubes on the board - action 66", "Placing white cubes on the board - action 67", "Placing white cubes on the board - action 68", "Placing white cubes on the board - action 69", "Placing white cubes on the board - action 70", "Placing white cubes on the board - action 71", "Placing white cubes on the board - action 72", "Placing white cubes on the board - action 73", "Placing white cubes on the board - action 74", "Placing white cubes on the board - action 75", "Placing white cubes on the board - action 76", "Placing white cubes on the board - action 77", "Placing white cubes on the board - action 78", "Placing white cubes on the board - action 79", "Placing white cubes on the board - action 80", "Placing white cubes on the board - action 81", "Placing white cubes on the board - action 82", "Placing white cubes on the board - action 83", "Placing white cubes on the board - action 84", "Placing white cubes on the board - action 85", "Placing white cubes on the board - action 86", "Placing white cubes on the board - action 87", "Placing white cubes on the board - action 88", "Placing white cubes on the board - action 89", "Placing white cubes on the board - action 90", "Placing white cubes on the board - action 91", "Placing white cubes on the board - action 92", "Placing white cubes on the board - action 93", "Placing white cubes on the board - action 94", "Placing white cubes on the board - action 95", "Placing white cubes on the board - action 96", "Placing white cubes on the board - action 97", "Placing white cubes on the board - action 98", "Placing white cubes on the board - action 99", "Placing white cubes on the board - action 100", "Placing white cubes on the board - action 101", "Placing white cubes on the board - action 102", "Placing white cubes on the board - action 103", "Placing white cubes on the board - action 104", "Placing white cubes on the board - action 105", "Placing white cubes on the board - action 106", "Placing white cubes on the board - action 107", "Placing white cubes on the board - action 108", "Placing white cubes on the board - action 109", "Placing white cubes on the board - action 110", "Placing white cubes on the board - action 111", "Placing white cubes on the board - action 112", "Placing white cubes on the board - action 113", "Placing white cubes on the board - action 114", "Placing white cubes on the board - action 115", "Placing white cubes on the board - action 116", "Placing white cubes on the board - action 117", "Placing white cubes on the board - action 118", "Placing white cubes on the board - action 119", "Placing white cubes on the board - action 120", "Placing white cubes on the board - action 121", "Placing white cubes on the board - action 122", "Placing white cubes on the board - action 123", "Placing white cubes on the board - action 124", "Placing white cubes on the board - action 125", "Placing white cubes on the board - action 126", "Placing white cubes on the board - action 127", "Placing white cubes on the board - action 128", "Placing white cubes on the board - action 129", "Placing white cubes on the board - action 130", "Placing white cubes on the board - action 131", "Placing white cubes on the board - action 132", "Placing white cubes on the board - action 133", "Placing white cubes on the board - action 134", "Placing white cubes on the board - action 135", "Placing white cubes on the board - action 136", "Placing white cubes on the board - action 137", "Placing white cubes on the board - action 138", "Placing white cubes on the board - action 139", "Placing white cubes on the board - action 140", "Placing white cubes on the board - action 141", "Placing white cubes on the board - action 142", "Placing white cubes on the board - action 143", "Placing white cubes on the board - action 144", "Placing white cubes on the board - action 145", "Placing white cubes on the board - action 146", "Placing white cubes on the board - action 147", "Placing white cubes on the board - action 148", "Placing white cubes on the board - action 149", "Placing white cubes on the board - action 150", "Placing white cubes on the board - action 151", "Placing white cubes on the board - action 152", "Placing white cubes on the board - action 153", "Placing white cubes on the board - action 154", "Placing white cubes on the board - action 155", "Placing white cubes on the board - action 156", "Placing white cubes on the board - action 157", "Placing white cubes on the board - action 158", "Placing white cubes on the board - action 159", "Placing white cubes on the board - action 160", "Placing white cubes on the board - action 161", "Placing white cubes on the board - action 162", "Placing white cubes on the board - action 163", "Placing white cubes on the board - action 164", "Placing white cubes on the board - action 165", "Placing white cubes on the board - action 166", "Placing white cubes on the board - action 167", "Placing white cubes on the board - action 168", "Placing white cubes on the board - action 169", "Placing white cubes on the board - action 170", "Placing white cubes on the board - action 171", "Placing white cubes on the board - action 172", "Placing white cubes on the board - action 173", "Placing white cubes on the board - action 174", "Placing white cubes on the board - action 175", "Placing white cubes on the board - action 176", "Placing white cubes on the board - action 177", "Placing white cubes on the board - action 178", "Placing white cubes on the board - action 179", "Placing white cubes on the board - action 180", "Placing white cubes on the board - action 181", "Placing white cubes on the board - action 182", "Placing white cubes on the board - action 183", "Placing white cubes on the board - action 184", "Placing white cubes on the board - action 185", "Placing white cubes on the board - action 186", "Placing white cubes on the board - action 187", "Placing white cubes on the board - action 188", "Placing white cubes on the board - action 189", "Placing white cubes on the board - action 190", "Placing white cubes on the board - action 191", "Placing white cubes on the board - action 192", "Placing white cubes on the board - action 193", "Placing white cubes on the board - action 194", "Placing white cubes on the board - action 195", "Placing white cubes on the board - action 196", "Placing white cubes on the board - action 197", "Placing white cubes on the board - action 198", "Placing white cubes on the board - action 199", "Placing white cubes on the board - action 200", "Placing white cubes on the board - action 201", "Placing white cubes on the board - action 202", "Placing white cubes on the board - action 203", "Placing white cubes on the board - action 204", "Placing white cubes on the board - action 205", "Placing white cubes on the board - action 206", "Placing white cubes on the board - action 207", "Placing white cubes on the board - action 208", "Placing white cubes on the board - action 209", "Placing white cubes on the board - action 210", "Placing white cubes on the board - action 211", "Placing white cubes on the board - action 212", "Placing white cubes on the board - action 213", "Placing white cubes on the board - action 214", "Placing white cubes on the board - action 215", "Placing white cubes on the board - action 216", "Placing white cubes on the board - action 217", "Placing white cubes on the board - action 218", "Placing white cubes on the board - action 219", "Placing white cubes on the board - action 220", "Placing white cubes on the board - action 221", "Placing white cubes on the board - action 222", "Placing white cubes on the board - action 223", "Placing white cubes on the board - action 224", "Placing white cubes on the board - action 225", "Placing white cubes on the board - action 226", "Placing white cubes on the board - action 227", "Placing white cubes on the board - action 228", "Placing white cubes on the board - action 229", "Placing white cubes on the board - action 230", "Placing white cubes on the board - action 231", "Placing white cubes on the board - action 232", "Placing white cubes on the board - action 233", "Placing white cubes on the board - action 234", "Placing white cubes on the board - action 235", "Placing white cubes on the board - action 236", "Placing white cubes on the board - action 237", "Placing white cubes on the board - action 238", "Placing white cubes on the board - action 239", "Placing white cubes on the board - action 240", "Placing white cubes on the board - action 241", "Placing white cubes on the board - action 242", "Placing white cubes on the board - action 243", "Placing white cubes on the board - action 244", "Placing white cubes on the board - action 245", "Placing white cubes on the board - action 246", "Placing white cubes on the board - action 247", "Placing white cubes on the board - action 248", "Placing white cubes on the board - action 249", "Placing white cubes on the board - action 250", "Placing white cubes on the board - action 251", "Placing white cubes on the board - action 252", "Placing white cubes on the board - action 253", "Placing white cubes on the board - action 254", "Placing white cubes on the board - action 255", "Placing white cubes on the board - action 256", "Placing white cubes on the board - action 257", "Placing white cubes on the board - action 258", "Placing white cubes on the board - action 259", "Placing white cubes on the board - action 260", "Placing white cubes on the board - action 261", "Placing white cubes on the board - action 262", "Placing white cubes on the board - action 263", "Placing white cubes on the board - action 264", "Placing white cubes on the board - action 265", "Placing white cubes on the board - action 266", "Placing white cubes on the board - action 267", "Placing white cubes on the board - action 268", "Placing white cubes on the board - action 269", "Placing white cubes on the board - action 270", "Placing white cubes on the board - action 271", "Placing white cubes on the board - action 272", "Placing white cubes on the board - action 273", "Placing white cubes on the board - action 274", "Placing white cubes on the board - action 275", "Placing white cubes on the board - action 276", "Placing white cubes on the board - action 277", "Placing white cubes on the board - action 278", "Placing white cubes on the board - action 279", "Placing white cubes on the board - action 280", "Placing white cubes on the board - action 281", "Placing white cubes on the board - action 282", "Placing white cubes on the board - action 283", "Placing white cubes on the board - action 284", "Placing white cubes on the board - action 285", "Placing white cubes on the board - action 286", "Placing white cubes on the board - action 287", "Placing white cubes on the board - action 288", "Placing white cubes on the board - action 289", "Placing white cubes on the board - action 290", "Placing white cubes on the board - action 291", "Placing white cubes on the board - action 292", "Placing white cubes on the board - action 293", "Placing white cubes on the board - action 294", "Placing white cubes on the board - action 295", "Placing white cubes on the board - action 296", "Placing white cubes on the board - action 297", "Placing white cubes on the board - action 298", "Placing white cubes on the board - action 299", "Placing white cubes on the board - action 300", "Placing white cubes on the board - action 301", "Placing white cubes on the board - action 302", "Placing white cubes on the board - action 303", "Placing white cubes on the board - action 304", "Placing white cubes on the board - action 305", "Placing white cubes on the board - action 306", "Placing white cubes on the board - action 307", "Placing white cubes on the board - action 308", "Placing white cubes on the board - action 309", "Placing white cubes on the board - action 310", "Placing white cubes on the board - action 311", "Placing white cubes on the board - action 312", "Placing white cubes on the board - action 313", "Placing white cubes on the board - action 314", "Placing white cubes on the board - action 315", "Placing white cubes on the board - action 316", "Placing white cubes on the board - action 317", "Placing white cubes on the board - action 318", "Placing white cubes on the board - action 319", "Placing white cubes on the board - action 320", "Placing white cubes on the board - action 321", "Placing white cubes on the board - action 322", "Placing white cubes on the board - action 323", "Placing white cubes on the board - action 324", "Placing white cubes on the board - action 325", "Placing white cubes on the board - action 326", "Placing white cubes on the board - action 327", "Placing white cubes on the board - action 328", "Placing white cubes on the board - action 329", "Placing white cubes on the board - action 330", "Placing white cubes on the board - action 331", "Placing white cubes on the board - action 332", "Placing white cubes on the board - action 333", "Placing white cubes on the board - action 334", "Placing white cubes on the board - action 335", "Placing white cubes on the board - action 336", "Placing white cubes on the board - action 337", "Placing white cubes on the board - action 338", "Placing white cubes on the board - action 339", "Placing white cubes on the board - action 340", "Placing white cubes on the board - action 341", "Placing white cubes on the board - action 342", "Placing white cubes on the board - action 343", "Placing white cubes on the board - action 344", "Placing white cubes on the board - action 345", "Placing white cubes on the board - action 346", "Placing white cubes on the board - action 347", "Placing white cubes on the board - action 348", "Placing white cubes on the board - action 349", "Placing white cubes on the board - action 350", "Placing white cubes on the board - action 351", "Placing white cubes on the board - action 352", "Placing white cubes on the board - action 353", "Placing white cubes on the board - action 354", "Placing white cubes on the board - action 355", "Placing white cubes on the board - action 356", "Placing white cubes on the board - action 357", "Placing white cubes on the board - action 358", "Placing white cubes on the board - action 359", "Placing white cubes on the board - action 360", "Placing white cubes on the board - action 361", "Placing white cubes on the board - action 362", "Placing white cubes on the board - action 363", "Placing white cubes on the board - action 364", "Placing white cubes on the board - action 365", "Placing white cubes on the board - action 366", "Placing white cubes on the board - action 367", "Placing white cubes on the board - action 368", "Placing white cubes on the board - action 369", "Placing white cubes on the board - action 370", "Placing white cubes on the board - action 371", "Placing white cubes on the board - action 372", "Placing white cubes on the board - action 373", "Placing white cubes on the board - action 374", "Placing white cubes on the board - action 375", "Placing white cubes on the board - action 376", "Placing white cubes on the board - action 377", "Placing white cubes on the board - action 378", "Placing white cubes on the board - action 379", "Placing white cubes on the board - action 380", "Placing white cubes on the board - action 381", "Placing white cubes on the board - action 382", "Placing white cubes on the board - action 383", "Placing white cubes on the board - action 384", "Placing white cubes on the board - action 385", "Placing white cubes on the board - action 386", "Placing white cubes on the board - action 387", "Placing white cubes on the board - action 388", "Placing white cubes on the board - action 389", "Placing white cubes on the board - action 390", "Placing white cubes on the board - action 391", "Placing white cubes on the board - action 392", "Placing white cubes on the board - action 393", "Placing white cubes on the board - action 394", "Placing white cubes on the board - action 395", "Placing white cubes on the board - action 396", "Placing white cubes on the board - action 397", "Placing white cubes on the board - action 398", "Placing white cubes on the board - action 399", "Placing white cubes on the board - action 400", "Placing white cubes on the board - action 401", "Placing white cubes on the board - action 402", "Placing white cubes on the board - action 403", "Placing white cubes on the board - action 404", "Placing white cubes on the board - action 405", "Placing white cubes on the board - action 406", "Placing white cubes on the board - action 407", "Placing white cubes on the board - action 408", "Placing white cubes on the board - action 409", "Placing white cubes on the board - action 410", "Placing white cubes on the board - action 411", "Placing white cubes on the board - action 412", "Placing white cubes on the board - action 413", "Placing white cubes on the board - action 414", "Placing white cubes on the board - action 415", "Placing white cubes on the board - action 416", "Placing white cubes on the board - action 417", "Placing white cubes on the board - action 418", "Placing white cubes on the board - action 419", "Placing white cubes on the board - action 420", "Placing white cubes on the board - action 421", "Placing white cubes on the board - action 422", "Placing white cubes on the board - action 423", "Placing white cubes on the board - action 424", "Placing white cubes on the board - action 425", "Placing white cubes on the board - action 426", "Placing white cubes on the board - action 427", "Placing white cubes on the board - action 428", "Placing white cubes on the board - action 429", "Placing white cubes on the board - action 430", "Placing white cubes on the board - action 431", "Placing white cubes on the board - action 432", "Placing white cubes on the board - action 433", "Placing white cubes on the board - action 434", "Placing white cubes on the board - action 435", "Placing white cubes on the board - action 436", "Placing white cubes on the board - action 437", "Placing white cubes on the board - action 438", "Placing white cubes on the board - action 439", "Placing white cubes on the board - action 440", "Placing white cubes on the board - action 441", "Placing white cubes on the board - action 442", "Placing white cubes on the board - action 443", "Placing white cubes on the board - action 444", "Placing white cubes on the board - action 445", "Placing white cubes on the board - action 446", "Placing white cubes on the board - action 447", "Placing white cubes on the board - action 448", "Placing white cubes on the board - action 449", "Placing white cubes on the board - action 450", "Placing white cubes on the board - action 451", "Placing white cubes on the board - action 452", "Placing white cubes on the board - action 453", "Placing white cubes on the board - action 454", "Placing white cubes on the board - action 455", "Placing white cubes on the board - action 456", "Placing white cubes on the board - action 457", "Placing white cubes on the board - action 458", "Placing white cubes on the board - action 459", "Placing white cubes on the board - action 460", "Placing white cubes on the board - action 461", "Placing white cubes on the board - action 462", "Placing white cubes on the board - action 463", "Placing white cubes on the board - action 464", "Placing white cubes on the board - action 465", "Placing white cubes on the board - action 466", "Placing white cubes on the board - action 467", "Placing white cubes on the board - action 468", "Placing white cubes on the board - action 469", "Placing white cubes on the board - action 470", "Placing white cubes on the board - action 471", "Placing white cubes on the board - action 472", "Placing white cubes on the board - action 473", "Placing white cubes on the board - action 474", "Placing white cubes on the board - action 475", "Placing white cubes on the board - action 476", "Placing white cubes on the board - action 477", "Placing white cubes on the board - action 478", "Placing white cubes on the board - action 479", "Placing white cubes on the board - action 480", "Placing white cubes on the board - action 481", "Placing white cubes on the board - action 482", "Placing white cubes on the board - action 483", "Placing white cubes on the board - action 484", "Placing white cubes on the board - action 485", "Placing white cubes on the board - action 486", "Placing white cubes on the board - action 487", "Placing white cubes on the board - action 488", "Placing white cubes on the board - action 489", "Placing white cubes on the board - action 490", "Placing white cubes on the board - action 491", "Placing white cubes on the board - action 492", "Placing white cubes on the board - action 493", "Placing white cubes on the board - action 494", "Placing white cubes on the board - action 495", "Placing white cubes on the board - action 496", "Placing white cubes on the board - action 497", "Placing white cubes on the board - action 498", "Placing white cubes on the board - action 499", "Placing white cubes on the board - action 500", "Placing white cubes on the board - action 501", "Placing white cubes on the board - action 502", "Placing white cubes on the board - action 503", "Placing white cubes on the board - action 504", "Placing white cubes on the board - action 505", "Placing white cubes on the board - action 506", "Placing white cubes on the board - action 507", "Placing white cubes on the board - action 508", "Placing white cubes on the board - action 509", "Placing white cubes on the board - action 510", "Placing white cubes on the board - action 511", "Placing white cubes on the board - action 512", "Placing white cubes on the board - action 513", "Placing white cubes on the board - action 514", "Placing white cubes on the board - action 515", "Placing white cubes on the board - action 516", "Placing white cubes on the board - action 517", "Placing white cubes on the board - action 518", "Placing white cubes on the board - action 519", "Placing white cubes on the board - action 520", "Placing white cubes on the board - action 521", "Placing white cubes on the board - action 522", "Placing white cubes on the board - action 523", "Placing white cubes on the board - action 524", "Placing white cubes on the board - action 525", "Placing white cubes on the board - action 526", "Placing white cubes on the board - action 527", "Placing white cubes on the board - action 528", "Placing white cubes on the board - action 529", "Placing white cubes on the board - action 530", "Placing white cubes on the board - action 531", "Placing white cubes on the board - action 532", "Placing white cubes on the board - action 533", "Placing white cubes on the board - action 534", "Placing white cubes on the board - action 535", "Placing white cubes on the board - action 536", "Placing white cubes on the board - action 537", "Placing white cubes on the board - action 538", "Placing white cubes on the board - action 539", "Placing white cubes on the board - action 540", "Placing white cubes on the board - action 541", "Placing white cubes on the board - action 542", "Placing white cubes on the board - action 543", "Placing white cubes on the board - action 544", "Placing white cubes on the board - action 545", "Placing white cubes on the board - action 546", "Placing white cubes on the board - action 547", "Placing white cubes on the board - action 548", "Placing white cubes on the board - action 549", "Placing white cubes on the board - action 550", "Placing white cubes on the board - action 551", "Placing white cubes on the board - action 552", "Placing white cubes on the board - action 553", "Placing white cubes on the board - action 554", "Placing white cubes on the board - action 555", "Placing white cubes on the board - action 556", "Placing white cubes on the board - action 557", "Placing white cubes on the board - action 558", "Placing white cubes on the board - action 559", "Placing white cubes on the board - action 560", "Placing white cubes on the board - action 561", "Placing white cubes on the board - action 562", "Placing white cubes on the board - action 563", "Placing white cubes on the board - action 564", "Placing white cubes on the board - action 565", "Placing white cubes on the board - action 566", "Placing white cubes on the board - action 567", "Placing white cubes on the board - action 568", "Placing white cubes on the board - action 569", "Placing white cubes on the board - action 570", "Placing white cubes on the board - action 571", "Placing white cubes on the board - action 572", "Placing white cubes on the board - action 573", "Placing white cubes on the board - action 574", "Placing white cubes on the board - action 575", "Placing white cubes on the board - action 576", "Placing white cubes on the board - action 577", "Placing white cubes on the board - action 578", "Placing white cubes on the board - action 579", "Placing white cubes on the board - action 580", "Placing white cubes on the board - action 581", "Placing white cubes on the board - action 582", "Placing white cubes on the board - action 583", "Placing white cubes on the board - action 584", "Placing white cubes on the board - action 585", "Placing white cubes on the board - action 586", "Placing white cubes on the board - action 587", "Placing white cubes on the board - action 588", "Placing white cubes on the board - action 589", "Placing white cubes on the board - action 590", "Placing white cubes on the board - action 591", "Placing white cubes on the board - action 592", "Placing white cubes on the board - action 593", "Placing white cubes on the board - action 594", "Placing white cubes on the board - action 595", "Placing white cubes on the board - action 596", "Placing white cubes on the board - action 597", "Placing white cubes on the board - action 598", "Placing white cubes on the board - action 599", "Placing white cubes on the board - action 600", "Placing white cubes on the board - action 601", "Placing white cubes on the board - action 602", "Placing white cubes on the board - action 603", "Placing white cubes on the board - action 604", "Placing white cubes on the board - action 605", "Placing white cubes on the board - action 606", "Placing white cubes on the board - action 607", "Placing white cubes on the board - action 608", "Placing white cubes on the board - action 609", "Placing white cubes on the board - action 610", "Placing white cubes on the board - action 611", "Placing white cubes on the board - action 612", "Placing white cubes on the board - action 613", "Placing white cubes on the board - action 614", "Placing white cubes on the board - action 615", "Placing white cubes on the board - action 616", "Placing white cubes on the board - action 617", "Placing white cubes on the board - action 618", "Placing white cubes on the board - action 619", "Placing white cubes on the board - action 620", "Placing white cubes on the board - action 621", "Placing white cubes on the board - action 622", "Placing white cubes on the board - action 623", "Placing white cubes on the board - action 624", "Placing white cubes on the board - action 625", "Placing white cubes on the board - action 626", "Placing white cubes on the board - action 627", "Placing white cubes on the board - action 628", "Placing white cubes on the board - action 629", "Placing white cubes on the board - action 630", "Placing white cubes on the board - action 631", "Placing white cubes on the board - action 632", "Placing white cubes on the board - action 633", "Placing white cubes on the board - action 634", "Placing white cubes on the board - action 635", "Placing white cubes on the board - action 636", "Placing white cubes on the board - action 637", "Placing white cubes on the board - action 638", "Placing white cubes on the board - action 639", "Placing white cubes on the board - action 640", "Placing white cubes on the board - action 641", "Placing white cubes on the board - action 642", "Placing white cubes on the board - action 643", "Placing white cubes on the board - action 644", "Placing white cubes on the board - action 645", "Placing white cubes on the board - action 646", "Placing white cubes on the board - action 647", "Placing white cubes on the board - action 648", "Placing white cubes on the board - action 649", "Placing white cubes on the board - action 650", "Placing white cubes on the board - action 651", "Placing white cubes on the board - action 652", "Placing white cubes on the board - action 653", "Placing white cubes on the board - action 654", "Placing white cubes on the board - action 655", "Placing white cubes on the board - action 656", "Placing white cubes on the board - action 657", "Placing white cubes on the board - action 658", "Placing white cubes on the board - action 659", "Placing white cubes on the board - action 660", "Placing white cubes on the board - action 661", "Placing white cubes on the board - action 662", "Placing white cubes on the board - action 663", "Placing white cubes on the board - action 664", "Placing white cubes on the board - action 665", "Placing white cubes on the board - action 666", "Placing white cubes on the board - action 667", "Placing white cubes on the board - action 668", "Placing white cubes on the board - action 669", "Placing white cubes on the board - action 670", "Placing white cubes on the board - action 671", "Placing white cubes on the board - action 672", "Placing white cubes on the board - action 673", "Placing white cubes on the board - action 674", "Placing white cubes on the board - action 675", "Placing white cubes on the board - action 676", "Placing white cubes on the board - action 677", "Placing white cubes on the board - action 678", "Placing white cubes on the board - action 679", "Placing white cubes on the board - action 680", "Placing white cubes on the board - action 681", "Placing white cubes on the board - action 682", "Placing white cubes on the board - action 683", "Placing white cubes on the board - action 684", "Placing white cubes on the board - action 685", "Placing white cubes on the board - action 686", "Placing white cubes on the board - action 687", "Placing white cubes on the board - action 688", "Placing white cubes on the board - action 689", "Placing white cubes on the board - action 690", "Placing white cubes on the board - action 691", "Placing white cubes on the board - action 692", "Placing white cubes on the board - action 693", "Placing white cubes on the board - action 694", "Placing white cubes on the board - action 695", "Placing white cubes on the board - action 696", "Placing white cubes on the board - action 697", "Placing white cubes on the board - action 698", "Placing white cubes on the board - action 699", "Placing white cubes on the board - action 700", "Placing white cubes on the board - action 701", "Placing white cubes on the board - action 702", "Placing white cubes on the board - action 703", "Placing white cubes on the board - action 704", "Placing white cubes on the board - action 705", "Placing white cubes on the board - action 706", "Placing white cubes on the board - action 707", "Placing white cubes on the board - action 708", "Placing white cubes on the board - action 709", "Placing white cubes on the board - action 710", "Placing white cubes on the board - action 711", "Placing white cubes on the board - action 712", "Placing white cubes on the board - action 713", "Placing white cubes on the board - action 714", "Placing white cubes on the board - action 715", "Placing white cubes on the board - action 716", "Placing white cubes on the board - action 717", "Placing white cubes on the board - action 718", "Placing white cubes on the board - action 719"] + +# Apply action "Placing white cubes on the board - action 638" +action: 638 + +# State 2 +# Apply action "roll 5" +action: 4 + +# State 3 +# |b2||b1||b3||__||__| +# |b4||b5||__||__||__| +# |b6||__||__||__||w6| +# |__||__||__||w2||w4| +# |__||__||w3||w1||w5| +IsTerminal() = False +History() = [120, 638, 4] +HistoryString() = "120, 638, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "|b2||b1||b3||__||__|\n|b4||b5||__||__||__|\n|b6||__||__||__||w6|\n|__||__||__||w2||w4|\n|__||__||w3||w1||w5|\n" +ObservationString(1) = "|b2||b1||b3||__||__|\n|b4||b5||__||__||__|\n|b6||__||__||__||w6|\n|__||__||__||w2||w4|\n|__||__||w3||w1||w5|\n" +ObservationTensor(0): binvec(300, 0x40000000008200000000004000200000010010000000000020200000000004800000000004) +ObservationTensor(1): binvec(300, 0x40000000008200000000004000200000010010000000000020200000000004800000000004) +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [295, 297, 299] +StringLegalActions() = ["W5-up*", "W5-diag*", "W5-left*"] + +# Apply action "W5-diag*" +action: 297 + +# State 4 +# Apply action "roll 3" +action: 2 + +# State 5 +# |b2||b1||b3||__||__| +# |b4||b5||__||__||__| +# |b6||__||__||__||w6| +# |__||__||__||w5||w4| +# |__||__||w3||w1||__| +IsTerminal() = False +History() = [120, 638, 4, 297, 2] +HistoryString() = "120, 638, 4, 297, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "|b2||b1||b3||__||__|\n|b4||b5||__||__||__|\n|b6||__||__||__||w6|\n|__||__||__||w5||w4|\n|__||__||w3||w1||__|\n" +ObservationString(1) = "|b2||b1||b3||__||__|\n|b4||b5||__||__||__|\n|b6||__||__||__||w6|\n|__||__||__||w5||w4|\n|__||__||w3||w1||__|\n" +ObservationTensor(0): binvec(300, 0x40000000008200000000000000200000010010000000000020200000000100800000000004) +ObservationTensor(1): binvec(300, 0x40000000008200000000000000200000010010000000000020200000000100800000000004) +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [24, 26, 28] +StringLegalActions() = ["B3-diag", "B3-down", "B3-right"] + +# Apply action "B3-diag" +action: 24 + +# State 6 +# Apply action "roll 3" +action: 2 + +# State 7 +# |b2||b1||__||__||__| +# |b4||b5||__||b3||__| +# |b6||__||__||__||w6| +# |__||__||__||w5||w4| +# |__||__||w3||w1||__| +IsTerminal() = False +History() = [120, 638, 4, 297, 2, 24, 2] +HistoryString() = "120, 638, 4, 297, 2, 24, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "|b2||b1||__||__||__|\n|b4||b5||__||b3||__|\n|b6||__||__||__||w6|\n|__||__||__||w5||w4|\n|__||__||w3||w1||__|\n" +ObservationString(1) = "|b2||b1||__||__||__|\n|b4||b5||__||b3||__|\n|b6||__||__||__||w6|\n|__||__||__||w5||w4|\n|__||__||w3||w1||__|\n" +ObservationTensor(0): binvec(300, 0x40000000008200000000000000008000010010000000000020200000000100800000000004) +ObservationTensor(1): binvec(300, 0x40000000008200000000000000008000010010000000000020200000000100800000000004) +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [270, 272, 274] +StringLegalActions() = ["W3-up", "W3-diag", "W3-left"] + +# Apply action "W3-left" +action: 274 + +# State 8 +# Apply action "roll 1" +action: 0 + +# State 9 +# |b2||b1||__||__||__| +# |b4||b5||__||b3||__| +# |b6||__||__||__||w6| +# |__||__||__||w5||w4| +# |__||w3||__||w1||__| +IsTerminal() = False +History() = [120, 638, 4, 297, 2, 24, 2, 274, 0] +HistoryString() = "120, 638, 4, 297, 2, 24, 2, 274, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "|b2||b1||__||__||__|\n|b4||b5||__||b3||__|\n|b6||__||__||__||w6|\n|__||__||__||w5||w4|\n|__||w3||__||w1||__|\n" +ObservationString(1) = "|b2||b1||__||__||__|\n|b4||b5||__||b3||__|\n|b6||__||__||__||w6|\n|__||__||__||w5||w4|\n|__||w3||__||w1||__|\n" +ObservationTensor(0): binvec(300, 0x40000000008200000000000000008000200010000000000020200000000100800000000004) +ObservationTensor(1): binvec(300, 0x40000000008200000000000000008000200010000000000020200000000100800000000004) +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [12, 15, 16] +StringLegalActions() = ["B1-diag", "B1-down*", "B1-right"] + +# Apply action "B1-down*" +action: 15 + +# State 10 +# Apply action "roll 4" +action: 3 + +# State 11 +# |b2||__||__||__||__| +# |b4||b1||__||b3||__| +# |b6||__||__||__||w6| +# |__||__||__||w5||w4| +# |__||w3||__||w1||__| +IsTerminal() = False +History() = [120, 638, 4, 297, 2, 24, 2, 274, 0, 15, 3] +HistoryString() = "120, 638, 4, 297, 2, 24, 2, 274, 0, 15, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "|b2||__||__||__||__|\n|b4||b1||__||b3||__|\n|b6||__||__||__||w6|\n|__||__||__||w5||w4|\n|__||w3||__||w1||__|\n" +ObservationString(1) = "|b2||__||__||__||__|\n|b4||b1||__||b3||__|\n|b6||__||__||__||w6|\n|__||__||__||w5||w4|\n|__||w3||__||w1||__|\n" +ObservationTensor(0): binvec(300, 0x20000000008200000000000000008000200010000000000020000000000100800000000004) +ObservationTensor(1): binvec(300, 0x20000000008200000000000000008000200010000000000020000000000100800000000004) +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [235, 236, 239] +StringLegalActions() = ["W4-up*", "W4-diag", "W4-left*"] + +# Apply action "W4-up*" +action: 235 + +# State 12 +# Apply action "roll 2" +action: 1 + +# State 13 +# |b2||__||__||__||__| +# |b4||b1||__||b3||__| +# |b6||__||__||__||w4| +# |__||__||__||w5||__| +# |__||w3||__||w1||__| +IsTerminal() = False +History() = [120, 638, 4, 297, 2, 24, 2, 274, 0, 15, 3, 235, 1] +HistoryString() = "120, 638, 4, 297, 2, 24, 2, 274, 0, 15, 3, 235, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "|b2||__||__||__||__|\n|b4||b1||__||b3||__|\n|b6||__||__||__||w4|\n|__||__||__||w5||__|\n|__||w3||__||w1||__|\n" +ObservationString(1) = "|b2||__||__||__||__|\n|b4||b1||__||b3||__|\n|b6||__||__||__||w4|\n|__||__||__||w5||__|\n|__||w3||__||w1||__|\n" +ObservationTensor(0): binvec(300, 0x20000000008200000000000000008000200010000000000040000000000100800000000000) +ObservationTensor(1): binvec(300, 0x20000000008200000000000000008000200010000000000040000000000100800000000000) +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 3, 4] +StringLegalActions() = ["B2-diag*", "B2-down*", "B2-right"] + +# Apply action "B2-down*" +action: 3 + +# State 14 +# Apply action "roll 5" +action: 4 + +# State 15 +# Apply action "W5-up" +action: 222 + +# State 16 +# Apply action "roll 3" +action: 2 + +# State 17 +# Apply action "B3-down*" +action: 99 + +# State 18 +# Apply action "roll 3" +action: 2 + +# State 19 +# Apply action "W3-left" +action: 262 + +# State 20 +# Apply action "roll 6" +action: 5 + +# State 21 +# Apply action "B6-diag" +action: 120 + +# State 22 +# Apply action "roll 1" +action: 0 + +# State 23 +# Apply action "W1-diag" +action: 284 + +# State 24 +# Apply action "roll 4" +action: 3 + +# State 25 +# Apply action "B6-diag" +action: 192 + +# State 26 +# Apply action "roll 5" +action: 4 + +# State 27 +# Apply action "W4-up" +action: 174 + +# State 28 +# Apply action "roll 6" +action: 5 + +# State 29 +# Apply action "B6-right" +action: 268 + +# State 30 +# Apply action "roll 6" +action: 5 + +# State 31 +# Apply action "W4-up" +action: 114 + +# State 32 +# Apply action "roll 1" +action: 0 + +# State 33 +# Apply action "B1-right" +action: 76 + +# State 34 +# Apply action "roll 6" +action: 5 + +# State 35 +# Apply action "W4-left" +action: 58 + +# State 36 +# Apply action "roll 5" +action: 4 + +# State 37 +# Apply action "B3-right" +action: 160 + +# State 38 +# Apply action "roll 3" +action: 2 + +# State 39 +# Apply action "W3-up" +action: 246 + +# State 40 +# Apply action "roll 5" +action: 4 + +# State 41 +# Apply action "B6-right" +action: 280 + +# State 42 +# |__||__||__||w4||__| +# |b2||__||b1||__||__| +# |__||__||__||__||b3| +# |w3||__||w1||__||__| +# |__||__||__||__||b6| +IsTerminal() = True +History() = [120, 638, 4, 297, 2, 24, 2, 274, 0, 15, 3, 235, 1, 3, 4, 222, 2, 99, 2, 262, 5, 120, 0, 284, 3, 192, 4, 174, 5, 268, 5, 114, 0, 76, 5, 58, 4, 160, 2, 246, 4, 280] +HistoryString() = "120, 638, 4, 297, 2, 24, 2, 274, 0, 15, 3, 235, 1, 3, 4, 222, 2, 99, 2, 262, 5, 120, 0, 284, 3, 192, 4, 174, 5, 268, 5, 114, 0, 76, 5, 58, 4, 160, 2, 246, 4, 280" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "|__||__||__||w4||__|\n|b2||__||b1||__||__|\n|__||__||__||__||b3|\n|w3||__||w1||__||__|\n|__||__||__||__||b6|\n" +ObservationString(1) = "|__||__||__||w4||__|\n|b2||__||b1||__||__|\n|__||__||__||__||b3|\n|w3||__||w1||__||__|\n|__||__||__||__||b6|\n" +ObservationTensor(0): binvec(300, 0x1000000200100000000000000000208000000000000002000000000000000000002000000) +ObservationTensor(1): binvec(300, 0x1000000200100000000000000000208000000000000002000000000000000000002000000) +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/euchre.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/euchre.txt new file mode 100644 index 0000000..42a85cc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/euchre.txt @@ -0,0 +1,704 @@ +game: euchre + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Euchre" +GameType.max_num_players = 4 +GameType.min_num_players = 4 +GameType.parameter_specification = ["allow_lone_defender", "stick_the_dealer"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = False +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "euchre" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 31 +PolicyTensorShape() = [31] +MaxChanceOutcomes() = 24 +GetParameters() = {allow_lone_defender=False,stick_the_dealer=True} +NumPlayers() = 4 +MinUtility() = -4.0 +MaxUtility() = 4.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [935] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 935 +MaxGameLength() = 29 +ToString() = "euchre()" + +# State 0 +# Dealer: +# +# S +# H +# D +# C +# S S +# H H +# D D +# C C +# S +# H +# D +# C +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateTensor(0): zeros(935) +InformationStateTensor(1): zeros(935) +InformationStateTensor(2): zeros(935) +InformationStateTensor(3): zeros(935) +ChanceOutcomes() = [(0,0.25), (1,0.25), (2,0.25), (3,0.25)] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["N", "E", "S", "W"] + +# Apply action "E" +action: 1 + +# State 1 +# Dealer: E +# +# S +# H +# D +# C +# S S +# H H +# D D +# C C +# S +# H +# D +# C +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateTensor(0): zeros(935) +InformationStateTensor(1): zeros(935) +InformationStateTensor(2): zeros(935) +InformationStateTensor(3): zeros(935) +ChanceOutcomes() = [(0,0.0416667), (1,0.0416667), (2,0.0416667), (3,0.0416667), (4,0.0416667), (5,0.0416667), (6,0.0416667), (7,0.0416667), (8,0.0416667), (9,0.0416667), (10,0.0416667), (11,0.0416667), (12,0.0416667), (13,0.0416667), (14,0.0416667), (15,0.0416667), (16,0.0416667), (17,0.0416667), (18,0.0416667), (19,0.0416667), (20,0.0416667), (21,0.0416667), (22,0.0416667), (23,0.0416667)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] +StringLegalActions() = ["C9", "D9", "H9", "S9", "CT", "DT", "HT", "ST", "CJ", "DJ", "HJ", "SJ", "CQ", "DQ", "HQ", "SQ", "CK", "DK", "HK", "SK", "CA", "DA", "HA", "SA"] + +# Apply action "HT" +action: 6 + +# State 2 +# Apply action "C9" +action: 0 + +# State 3 +# Apply action "SJ" +action: 11 + +# State 4 +# Apply action "DA" +action: 21 + +# State 5 +# Apply action "CJ" +action: 8 + +# State 6 +# Apply action "SK" +action: 19 + +# State 7 +# Apply action "ST" +action: 7 + +# State 8 +# Apply action "HQ" +action: 14 + +# State 9 +# Apply action "S9" +action: 3 + +# State 10 +# Apply action "HA" +action: 22 + +# State 11 +# Apply action "CT" +action: 4 + +# State 12 +# Apply action "SQ" +action: 15 + +# State 13 +# Apply action "SA" +action: 23 + +# State 14 +# Apply action "DQ" +action: 13 + +# State 15 +# Apply action "H9" +action: 2 + +# State 16 +# Apply action "CA" +action: 20 + +# State 17 +# Apply action "DK" +action: 17 + +# State 18 +# Apply action "CK" +action: 16 + +# State 19 +# Apply action "HK" +action: 18 + +# State 20 +# Apply action "CQ" +action: 12 + +# State 21 +# Apply action "DT" +action: 5 + +# State 22 +# Dealer: E +# +# S Q +# H Q +# D A +# C AQ +# S JT S A9 +# H K9 H T +# D D K +# C T C J +# S K +# H A +# D Q +# C K9 +# +# Upcard: DT +IsTerminal() = False +History() = [1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5] +HistoryString() = "1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateTensor(0): binvec(935, 0x202000000000000003000b0c000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(935, 0x202000000000000003128041000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(935, 0x202000000000000003800492000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(935, 0x202000000000000003291020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [24, 26] +StringLegalActions() = ["Pass", "Diamonds"] + +# Apply action "Pass" +action: 24 + +# State 23 +# Dealer: E +# +# S Q +# H Q +# D A +# C AQ +# S JT S A9 +# H K9 H T +# D D K +# C T C J +# S K +# H A +# D Q +# C K9 +# +# Upcard: DT +# Bidding: +# North East South West +# Pass +IsTerminal() = False +History() = [1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24] +HistoryString() = "1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateTensor(0): binvec(935, 0x202000002000000003000b0c000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(935, 0x202000002000000003128041000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(935, 0x202000002000000003800492000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(935, 0x202000002000000003291020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [24, 26] +StringLegalActions() = ["Pass", "Diamonds"] + +# Apply action "Pass" +action: 24 + +# State 24 +# Dealer: E +# +# S Q +# H Q +# D A +# C AQ +# S JT S A9 +# H K9 H T +# D D K +# C T C J +# S K +# H A +# D Q +# C K9 +# +# Upcard: DT +# Bidding: +# North East South West +# Pass Pass +# +IsTerminal() = False +History() = [1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24] +HistoryString() = "1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateTensor(0): binvec(935, 0x202000002100000003000b0c000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(935, 0x202000002100000003128041000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(935, 0x202000002100000003800492000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(935, 0x202000002100000003291020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [24, 26] +StringLegalActions() = ["Pass", "Diamonds"] + +# Apply action "Pass" +action: 24 + +# State 25 +# Dealer: E +# +# S Q +# H Q +# D A +# C AQ +# S JT S A9 +# H K9 H T +# D D K +# C T C J +# S K +# H A +# D Q +# C K9 +# +# Upcard: DT +# Bidding: +# North East South West +# Pass Pass +# Pass +IsTerminal() = False +History() = [1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24, 24] +HistoryString() = "1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24, 24" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateTensor(0): binvec(935, 0x202000002108000003000b0c000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(935, 0x202000002108000003128041000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(935, 0x202000002108000003800492000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(935, 0x202000002108000003291020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [24, 26] +StringLegalActions() = ["Pass", "Diamonds"] + +# Apply action "Pass" +action: 24 + +# State 26 +# Dealer: E +# +# S Q +# H Q +# D A +# C AQ +# S JT S A9 +# H K9 H T +# D D K +# C T C J +# S K +# H A +# D Q +# C K9 +# +# Upcard: DT +# Bidding: +# North East South West +# Pass Pass +# Pass Pass +IsTerminal() = False +History() = [1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24, 24, 24] +HistoryString() = "1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24, 24, 24" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateTensor(0): binvec(935, 0x202000002108400003000b0c000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(935, 0x202000002108400003128041000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(935, 0x202000002108400003800492000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(935, 0x202000002108400003291020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [24, 25, 27, 28] +StringLegalActions() = ["Pass", "Clubs", "Hearts", "Spades"] + +# Apply action "Hearts" +action: 27 + +# State 27 +# Dealer: E +# +# S Q +# H Q +# D A +# C AQ +# S JT S A9 +# H K9 H T +# D D K +# C T C J +# S K +# H A +# D Q +# C K9 +# +# Upcard: DT +# Bidding: +# North East South West +# Pass Pass +# Pass Pass Pick up! +IsTerminal() = False +History() = [1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24, 24, 24, 27] +HistoryString() = "1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24, 24, 24, 27" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateTensor(0): binvec(935, 0x202000002108500000000b0c000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(935, 0x202000002108500000128041000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(935, 0x202000002108500000800492000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(935, 0x202000002108500000291020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [29, 30] +StringLegalActions() = ["Alone", "Partner"] + +# Apply action "Alone" +action: 29 + +# State 28 +# Apply action "CK" +action: 16 + +# State 29 +# Dealer: E +# +# S Q +# H Q +# D A +# C AQ +# S JT S A9 +# H K9 H T +# D D K +# C T C J +# S K +# H A +# D Q +# C 9 +# +# Upcard: DT +# Bidding: +# North East South West +# Pass Pass +# Pass Pass Pick up! +# +# Declarer go alone: true +# +# Tricks: +# N E S W N E S +# CK +# +# Points: +# N: 0 +# E: 0 +# S: 0 +# W: 0 +IsTerminal() = False +History() = [1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24, 24, 24, 27, 29, 16] +HistoryString() = "1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24, 24, 24, 27, 29, 16" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateTensor(0): binvec(935, 0x202000002108500004000b0c000000000000000080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(935, 0x202000002108500004128041000000000000000080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(935, 0x202000002108500004800412000000000000000080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(935, 0x202000002108500004291020000000000000000080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [4] +StringLegalActions() = ["CT"] + +# Apply action "CT" +action: 4 + +# State 30 +# Dealer: E +# +# S Q +# H Q +# D A +# C AQ +# S JT S A9 +# H K9 H T +# D D K +# C C J +# S K +# H A +# D Q +# C 9 +# +# Upcard: DT +# Bidding: +# North East South West +# Pass Pass +# Pass Pass Pick up! +# +# Declarer go alone: true +# +# Tricks: +# N E S W N E S +# CK CT +# +# Points: +# N: 0 +# E: 0 +# S: 0 +# W: 0 +IsTerminal() = False +History() = [1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24, 24, 24, 27, 29, 16, 4] +HistoryString() = "1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24, 24, 24, 27, 29, 16, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateTensor(0): binvec(935, 0x202000002108500004000b0c000000000000000080080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(935, 0x202000002108500004128041000000000000000080080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(935, 0x202000002108500004800412000000000000000080080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(935, 0x202000002108500004211020000000000000000080080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [8] +StringLegalActions() = ["CJ"] + +# Apply action "CJ" +action: 8 + +# State 31 +# Apply action "C9" +action: 0 + +# State 32 +# Dealer: E +# +# S Q +# H Q +# D A +# C AQ +# S JT S A9 +# H K9 H T +# D D K +# C C +# S K +# H A +# D Q +# C +# +# Upcard: DT +# Bidding: +# North East South West +# Pass Pass +# Pass Pass Pick up! +# +# Declarer go alone: true +# +# Tricks: +# N E S W N E S +# CK CT CJ +# C9 +# +# Points: +# N: 0 +# E: 0 +# S: 0 +# W: 0 +IsTerminal() = False +History() = [1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24, 24, 24, 27, 29, 16, 4, 8, 0] +HistoryString() = "1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24, 24, 24, 27, 29, 16, 4, 8, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateTensor(0): binvec(935, 0x202000002108500004000b0c000000000000000080080000000000008000000000000000000000800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(935, 0x202000002108500004120041000000000000000080080000000000008000000000000000000000800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(935, 0x202000002108500004000412000000000000000080080000000000008000000000000000000000800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(935, 0x202000002108500004211020000000000000000080080000000000008000000000000000000000800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2, 7, 11, 18] +StringLegalActions() = ["H9", "ST", "SJ", "HK"] + +# Apply action "H9" +action: 2 + +# State 33 +# Dealer: E +# +# S Q +# H Q +# D A +# C AQ +# S JT S A9 +# H K H T +# D D K +# C C +# S K +# H A +# D Q +# C +# +# Upcard: DT +# Bidding: +# North East South West +# Pass Pass +# Pass Pass Pick up! +# +# Declarer go alone: true +# +# Tricks: +# N E S W N E S +# CK CT CJ +# C9 H9 +# +# Points: +# N: 0 +# E: 0 +# S: 0 +# W: 0 +IsTerminal() = False +History() = [1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24, 24, 24, 27, 29, 16, 4, 8, 0, 2] +HistoryString() = "1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24, 24, 24, 27, 29, 16, 4, 8, 0, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateTensor(0): binvec(935, 0x202000002108500004000b0c000000000000000080080000000000008000000000000000000000800000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(935, 0x202000002108500004120041000000000000000080080000000000008000000000000000000000800000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(935, 0x202000002108500004000412000000000000000080080000000000008000000000000000000000800000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(935, 0x202000002108500004011020000000000000000080080000000000008000000000000000000000800000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [3, 6, 17, 23] +StringLegalActions() = ["S9", "HT", "DK", "SA"] + +# Apply action "S9" +action: 3 + +# State 34 +# Apply action "SJ" +action: 11 + +# State 35 +# Apply action "SA" +action: 23 + +# State 36 +# Apply action "SK" +action: 19 + +# State 37 +# Apply action "HT" +action: 6 + +# State 38 +# Apply action "HA" +action: 22 + +# State 39 +# Apply action "HK" +action: 18 + +# State 40 +# Apply action "DQ" +action: 13 + +# State 41 +# Apply action "ST" +action: 7 + +# State 42 +# Apply action "DK" +action: 17 + +# State 43 +# Dealer: E +# +# S Q +# H Q +# D A +# C AQ +# S JT S A9 +# H K9 H T +# D D K +# C T C J +# S K +# H A +# D Q +# C K9 +# +# Upcard: DT +# Bidding: +# North East South West +# Pass Pass +# Pass Pass Pick up! +# +# Declarer go alone: true +# +# Tricks: +# N E S W N E S +# CK CT CJ +# C9 H9 S9 +# SJ SA SK +# HT HA HK +# DQ ST DK +# +# Points: +# N: -2 +# E: 2 +# S: -2 +# W: 2 +IsTerminal() = True +History() = [1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24, 24, 24, 27, 29, 16, 4, 8, 0, 2, 3, 11, 23, 19, 6, 22, 18, 13, 7, 17] +HistoryString() = "1, 6, 0, 11, 21, 8, 19, 7, 14, 3, 22, 4, 15, 23, 13, 2, 20, 17, 16, 18, 12, 5, 24, 24, 24, 24, 27, 29, 16, 4, 8, 0, 2, 3, 11, 23, 19, 6, 22, 18, 13, 7, 17" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateTensor(0): binvec(935, 0x202000002108500004000b0c000000000000000080080000000000008000000000000000000000800000200000000000100000000000000000000000000000001000000000000001000010000000020000000002000020000000000000000000000000000000000400010000000000000040000000) +InformationStateTensor(1): binvec(935, 0x202000002108500004000000000000000000000080080000000000008000000000000000000000800000200000000000100000000000000000000000000000001000000000000001000010000000020000000002000020000000000000000000000000000000000400010000000000000040000000) +InformationStateTensor(2): binvec(935, 0x202000002108500004000000000000000000000080080000000000008000000000000000000000800000200000000000100000000000000000000000000000001000000000000001000010000000020000000002000020000000000000000000000000000000000400010000000000000040000000) +InformationStateTensor(3): binvec(935, 0x202000002108500004000000000000000000000080080000000000008000000000000000000000800000200000000000100000000000000000000000000000001000000000000001000010000000020000000002000020000000000000000000000000000000000400010000000000000040000000) +Rewards() = [-2, 2, -2, 2] +Returns() = [-2, 2, -2, 2] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/first_sealed_auction.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/first_sealed_auction.txt new file mode 100644 index 0000000..755c768 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/first_sealed_auction.txt @@ -0,0 +1,151 @@ +game: first_sealed_auction + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "First-Price Sealed-Bid Auction" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["max_value", "players"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "first_sealed_auction" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 10 +PolicyTensorShape() = [10] +MaxChanceOutcomes() = 11 +GetParameters() = {max_value=10,players=2} +NumPlayers() = 2 +MinUtility() = 0.0 +MaxUtility() = 10.0 +UtilitySum() = None +InformationStateTensorShape() = [22] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 22 +ObservationTensorShape() = [10] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 10 +MaxGameLength() = 2 +ToString() = "first_sealed_auction()" + +# State 0 +# ; +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "p0" +InformationStateString(1) = "p1" +InformationStateTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "" +ObservationString(1) = "" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(1,0.1), (2,0.1), (3,0.1), (4,0.1), (5,0.1), (6,0.1), (7,0.1), (8,0.1), (9,0.1), (10,0.1)] +LegalActions() = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] +StringLegalActions() = ["Player 0 value: 1", "Player 0 value: 2", "Player 0 value: 3", "Player 0 value: 4", "Player 0 value: 5", "Player 0 value: 6", "Player 0 value: 7", "Player 0 value: 8", "Player 0 value: 9", "Player 0 value: 10"] + +# Apply action "Player 0 value: 9" +action: 9 + +# State 1 +# 9; +IsTerminal() = False +History() = [9] +HistoryString() = "9" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "p0 val 9" +InformationStateString(1) = "p1" +InformationStateTensor(0): ◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "9" +ObservationString(1) = "" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(1,0.1), (2,0.1), (3,0.1), (4,0.1), (5,0.1), (6,0.1), (7,0.1), (8,0.1), (9,0.1), (10,0.1)] +LegalActions() = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] +StringLegalActions() = ["Player 1 value: 1", "Player 1 value: 2", "Player 1 value: 3", "Player 1 value: 4", "Player 1 value: 5", "Player 1 value: 6", "Player 1 value: 7", "Player 1 value: 8", "Player 1 value: 9", "Player 1 value: 10"] + +# Apply action "Player 1 value: 10" +action: 10 + +# State 2 +# 9,10; +IsTerminal() = False +History() = [9, 10] +HistoryString() = "9, 10" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "p0 val 9" +InformationStateString(1) = "p1 val 10" +InformationStateTensor(0): ◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "9" +ObservationString(1) = "10" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["Player 0 bid: 0", "Player 0 bid: 1", "Player 0 bid: 2", "Player 0 bid: 3", "Player 0 bid: 4", "Player 0 bid: 5", "Player 0 bid: 6", "Player 0 bid: 7", "Player 0 bid: 8"] + +# Apply action "Player 0 bid: 3" +action: 3 + +# State 3 +# 9,10;3 +IsTerminal() = False +History() = [9, 10, 3] +HistoryString() = "9, 10, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "p0 val 9 bid 3" +InformationStateString(1) = "p1 val 10" +InformationStateTensor(0): ◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "9" +ObservationString(1) = "10" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +StringLegalActions() = ["Player 1 bid: 0", "Player 1 bid: 1", "Player 1 bid: 2", "Player 1 bid: 3", "Player 1 bid: 4", "Player 1 bid: 5", "Player 1 bid: 6", "Player 1 bid: 7", "Player 1 bid: 8", "Player 1 bid: 9"] + +# Apply action "Player 1 bid: 5" +action: 5 + +# State 4 +# Apply action "Chose winner 1" +action: 1 + +# State 5 +# 9,10;3,5;1 +IsTerminal() = True +History() = [9, 10, 3, 5, 1] +HistoryString() = "9, 10, 3, 5, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "p0 val 9 bid 3" +InformationStateString(1) = "p1 val 10 bid 5" +InformationStateTensor(0): ◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯ +ObservationString(0) = "9" +ObservationString(1) = "10" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◉ +Rewards() = [0, 5] +Returns() = [0, 5] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/gin_rummy.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/gin_rummy.txt new file mode 100644 index 0000000..9f62fec --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/gin_rummy.txt @@ -0,0 +1,977 @@ +game: gin_rummy + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Gin Rummy" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["gin_bonus", "hand_size", "knock_card", "num_ranks", "num_suits", "oklahoma", "undercut_bonus"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "gin_rummy" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 241 +PolicyTensorShape() = [241] +MaxChanceOutcomes() = 52 +GetParameters() = {gin_bonus=25,hand_size=10,knock_card=10,num_ranks=13,num_suits=4,oklahoma=False,undercut_bonus=25} +NumPlayers() = 2 +MinUtility() = -123.0 +MaxUtility() = 123.0 +UtilitySum() = 0.0 +ObservationTensorShape() = player: [2], private_hand: [2, 52], current_player: [2], knock_card: [10], upcard: [52], discard_pile: [52], stock_size: [52], layed_melds: [2, 185] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 644 +MaxGameLength() = 300 +ToString() = "gin_rummy()" + +# State 0 +# +# Knock card: 10 +# Prev upcard: XX +# Repeated move: 0 +# Current player: -1 +# Phase: Deal +# +# Player0: Deadwood=0 +# +--------------------------+ +# | | +# | | +# | | +# | | +# +--------------------------+ +# +# Stock size: 52 Upcard: XX +# Discard pile: +# +# Player1: Deadwood=0 +# +--------------------------+ +# | | +# | | +# | | +# | | +# +--------------------------+ +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n")" +InformationStateString(1) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n")" +ObservationString(0) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +ObservationString(1) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PublicObservationString() = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(0) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(1) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).current_player: ◯◯ +ObservationTensor(0).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(0).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(0).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).current_player: ◯◯ +ObservationTensor(1).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.0192308), (1,0.0192308), (2,0.0192308), (3,0.0192308), (4,0.0192308), (5,0.0192308), (6,0.0192308), (7,0.0192308), (8,0.0192308), (9,0.0192308), (10,0.0192308), (11,0.0192308), (12,0.0192308), (13,0.0192308), (14,0.0192308), (15,0.0192308), (16,0.0192308), (17,0.0192308), (18,0.0192308), (19,0.0192308), (20,0.0192308), (21,0.0192308), (22,0.0192308), (23,0.0192308), (24,0.0192308), (25,0.0192308), (26,0.0192308), (27,0.0192308), (28,0.0192308), (29,0.0192308), (30,0.0192308), (31,0.0192308), (32,0.0192308), (33,0.0192308), (34,0.0192308), (35,0.0192308), (36,0.0192308), (37,0.0192308), (38,0.0192308), (39,0.0192308), (40,0.0192308), (41,0.0192308), (42,0.0192308), (43,0.0192308), (44,0.0192308), (45,0.0192308), (46,0.0192308), (47,0.0192308), (48,0.0192308), (49,0.0192308), (50,0.0192308), (51,0.0192308)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51] +StringLegalActions() = ["Chance outcome: As", "Chance outcome: 2s", "Chance outcome: 3s", "Chance outcome: 4s", "Chance outcome: 5s", "Chance outcome: 6s", "Chance outcome: 7s", "Chance outcome: 8s", "Chance outcome: 9s", "Chance outcome: Ts", "Chance outcome: Js", "Chance outcome: Qs", "Chance outcome: Ks", "Chance outcome: Ac", "Chance outcome: 2c", "Chance outcome: 3c", "Chance outcome: 4c", "Chance outcome: 5c", "Chance outcome: 6c", "Chance outcome: 7c", "Chance outcome: 8c", "Chance outcome: 9c", "Chance outcome: Tc", "Chance outcome: Jc", "Chance outcome: Qc", "Chance outcome: Kc", "Chance outcome: Ad", "Chance outcome: 2d", "Chance outcome: 3d", "Chance outcome: 4d", "Chance outcome: 5d", "Chance outcome: 6d", "Chance outcome: 7d", "Chance outcome: 8d", "Chance outcome: 9d", "Chance outcome: Td", "Chance outcome: Jd", "Chance outcome: Qd", "Chance outcome: Kd", "Chance outcome: Ah", "Chance outcome: 2h", "Chance outcome: 3h", "Chance outcome: 4h", "Chance outcome: 5h", "Chance outcome: 6h", "Chance outcome: 7h", "Chance outcome: 8h", "Chance outcome: 9h", "Chance outcome: Th", "Chance outcome: Jh", "Chance outcome: Qh", "Chance outcome: Kh"] + +# Apply action "Chance outcome: 4d" +action: 29 + +# State 1 +# +# Knock card: 10 +# Prev upcard: XX +# Repeated move: 0 +# Current player: -1 +# Phase: Deal +# +# Player0: Deadwood=0 +# +--------------------------+ +# | | +# | | +# | 4d | +# | | +# +--------------------------+ +# +# Stock size: 51 Upcard: XX +# Discard pile: +# +# Player1: Deadwood=0 +# +--------------------------+ +# | | +# | | +# | | +# | | +# +--------------------------+ +IsTerminal() = False +History() = [29] +HistoryString() = "29" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n")" +InformationStateString(1) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n")" +ObservationString(0) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +ObservationString(1) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PublicObservationString() = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(0) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(1) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).current_player: ◯◯ +ObservationTensor(0).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(0).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯ +ObservationTensor(0).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).current_player: ◯◯ +ObservationTensor(1).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯ +ObservationTensor(1).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.0196078), (1,0.0196078), (2,0.0196078), (3,0.0196078), (4,0.0196078), (5,0.0196078), (6,0.0196078), (7,0.0196078), (8,0.0196078), (9,0.0196078), (10,0.0196078), (11,0.0196078), (12,0.0196078), (13,0.0196078), (14,0.0196078), (15,0.0196078), (16,0.0196078), (17,0.0196078), (18,0.0196078), (19,0.0196078), (20,0.0196078), (21,0.0196078), (22,0.0196078), (23,0.0196078), (24,0.0196078), (25,0.0196078), (26,0.0196078), (27,0.0196078), (28,0.0196078), (30,0.0196078), (31,0.0196078), (32,0.0196078), (33,0.0196078), (34,0.0196078), (35,0.0196078), (36,0.0196078), (37,0.0196078), (38,0.0196078), (39,0.0196078), (40,0.0196078), (41,0.0196078), (42,0.0196078), (43,0.0196078), (44,0.0196078), (45,0.0196078), (46,0.0196078), (47,0.0196078), (48,0.0196078), (49,0.0196078), (50,0.0196078), (51,0.0196078)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51] +StringLegalActions() = ["Chance outcome: As", "Chance outcome: 2s", "Chance outcome: 3s", "Chance outcome: 4s", "Chance outcome: 5s", "Chance outcome: 6s", "Chance outcome: 7s", "Chance outcome: 8s", "Chance outcome: 9s", "Chance outcome: Ts", "Chance outcome: Js", "Chance outcome: Qs", "Chance outcome: Ks", "Chance outcome: Ac", "Chance outcome: 2c", "Chance outcome: 3c", "Chance outcome: 4c", "Chance outcome: 5c", "Chance outcome: 6c", "Chance outcome: 7c", "Chance outcome: 8c", "Chance outcome: 9c", "Chance outcome: Tc", "Chance outcome: Jc", "Chance outcome: Qc", "Chance outcome: Kc", "Chance outcome: Ad", "Chance outcome: 2d", "Chance outcome: 3d", "Chance outcome: 5d", "Chance outcome: 6d", "Chance outcome: 7d", "Chance outcome: 8d", "Chance outcome: 9d", "Chance outcome: Td", "Chance outcome: Jd", "Chance outcome: Qd", "Chance outcome: Kd", "Chance outcome: Ah", "Chance outcome: 2h", "Chance outcome: 3h", "Chance outcome: 4h", "Chance outcome: 5h", "Chance outcome: 6h", "Chance outcome: 7h", "Chance outcome: 8h", "Chance outcome: 9h", "Chance outcome: Th", "Chance outcome: Jh", "Chance outcome: Qh", "Chance outcome: Kh"] + +# Apply action "Chance outcome: Jh" +action: 49 + +# State 2 +# Apply action "Chance outcome: Ts" +action: 9 + +# State 3 +# Apply action "Chance outcome: 3s" +action: 2 + +# State 4 +# Apply action "Chance outcome: Kh" +action: 51 + +# State 5 +# Apply action "Chance outcome: Qc" +action: 24 + +# State 6 +# Apply action "Chance outcome: Td" +action: 35 + +# State 7 +# Apply action "Chance outcome: 8c" +action: 20 + +# State 8 +# Apply action "Chance outcome: 9h" +action: 47 + +# State 9 +# Apply action "Chance outcome: 6d" +action: 31 + +# State 10 +# Apply action "Chance outcome: 6s" +action: 5 + +# State 11 +# Apply action "Chance outcome: Js" +action: 10 + +# State 12 +# Apply action "Chance outcome: 7s" +action: 6 + +# State 13 +# Apply action "Chance outcome: 4h" +action: 42 + +# State 14 +# Apply action "Chance outcome: Qh" +action: 50 + +# State 15 +# Apply action "Chance outcome: Qs" +action: 11 + +# State 16 +# Apply action "Chance outcome: 2s" +action: 1 + +# State 17 +# Apply action "Chance outcome: 9c" +action: 21 + +# State 18 +# Apply action "Chance outcome: 8h" +action: 46 + +# State 19 +# Apply action "Chance outcome: 8s" +action: 7 + +# State 20 +# Apply action "Chance outcome: 5c" +action: 17 + +# State 21 +# +# Knock card: 10 +# Prev upcard: XX +# Repeated move: 0 +# Current player: 0 +# Phase: FirstUpcard +# +# Player0: Deadwood=80 +# +--------------------------+ +# | 3s Ts | +# | 8c Qc | +# | 4d 6d Td | +# | 9h Jh Kh| +# +--------------------------+ +# +# Stock size: 31 Upcard: 5c +# Discard pile: +# +# Player1: Deadwood=53 +# +--------------------------+ +# | 2s 6s7s8s JsQs | +# | 9c | +# | | +# | 4h 8h Qh | +# +--------------------------+ +IsTerminal() = False +History() = [29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17] +HistoryString() = "29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n")" +InformationStateString(1) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n")" +ObservationString(0) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +ObservationString(1) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n" +PublicObservationString() = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(0) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(1) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand: ◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◉ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).current_player: ◉◯ +ObservationTensor(0).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(0).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◉◉◉◯◯◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯ +ObservationTensor(1).current_player: ◉◯ +ObservationTensor(1).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [52, 54] +StringLegalActions() = ["Player: 0 Action: Draw upcard", "Player: 0 Action: Pass"] + +# Apply action "Player: 0 Action: Pass" +action: 54 + +# State 22 +# +# Knock card: 10 +# Prev upcard: XX +# Repeated move: 0 +# Current player: 1 +# Phase: FirstUpcard +# +# Player0: Deadwood=80 +# +--------------------------+ +# | 3s Ts | +# | 8c Qc | +# | 4d 6d Td | +# | 9h Jh Kh| +# +--------------------------+ +# +# Stock size: 31 Upcard: 5c +# Discard pile: +# +# Player1: Deadwood=53 +# +--------------------------+ +# | 2s 6s7s8s JsQs | +# | 9c | +# | | +# | 4h 8h Qh | +# +--------------------------+ +IsTerminal() = False +History() = [29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17, 54] +HistoryString() = "29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17, 54" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n")" +InformationStateString(1) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n")" +ObservationString(0) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +ObservationString(1) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n" +PublicObservationString() = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(0) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(1) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand: ◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◉ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).current_player: ◯◉ +ObservationTensor(0).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(0).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◉◉◉◯◯◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯ +ObservationTensor(1).current_player: ◯◉ +ObservationTensor(1).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [52, 54] +StringLegalActions() = ["Player: 1 Action: Draw upcard", "Player: 1 Action: Pass"] + +# Apply action "Player: 1 Action: Pass" +action: 54 + +# State 23 +# +# Knock card: 10 +# Prev upcard: XX +# Repeated move: 0 +# Current player: 0 +# Phase: FirstUpcard +# +# Player0: Deadwood=80 +# +--------------------------+ +# | 3s Ts | +# | 8c Qc | +# | 4d 6d Td | +# | 9h Jh Kh| +# +--------------------------+ +# +# Stock size: 31 Upcard: 5c +# Discard pile: +# +# Player1: Deadwood=53 +# +--------------------------+ +# | 2s 6s7s8s JsQs | +# | 9c | +# | | +# | 4h 8h Qh | +# +--------------------------+ +IsTerminal() = False +History() = [29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17, 54, 54] +HistoryString() = "29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17, 54, 54" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n")" +InformationStateString(1) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n")" +ObservationString(0) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +ObservationString(1) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n" +PublicObservationString() = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(0) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(1) = "\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand: ◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◉◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◉ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).current_player: ◉◯ +ObservationTensor(0).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(0).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◉◉◉◯◯◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯ +ObservationTensor(1).current_player: ◉◯ +ObservationTensor(1).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [53] +StringLegalActions() = ["Player: 0 Action: Draw stock"] + +# Apply action "Player: 0 Action: Draw stock" +action: 53 + +# State 24 +# Apply action "Chance outcome: 2d" +action: 27 + +# State 25 +# +# Knock card: 10 +# Prev upcard: 5c +# Repeated move: 0 +# Current player: 0 +# Phase: Discard +# +# Player0: Deadwood=72 +# +--------------------------+ +# | 3s Ts | +# | 8c Qc | +# | 2d 4d 6d Td | +# | 9h Jh Kh| +# +--------------------------+ +# +# Stock size: 30 Upcard: XX +# Discard pile: 5c +# +# Player1: Deadwood=53 +# +--------------------------+ +# | 2s 6s7s8s JsQs | +# | 9c | +# | | +# | 4h 8h Qh | +# +--------------------------+ +IsTerminal() = False +History() = [29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17, 54, 54, 53, 27] +HistoryString() = "29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17, 54, 54, 53, 27" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: XX\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n")" +InformationStateString(1) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: XX\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n")" +ObservationString(0) = "\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +ObservationString(1) = "\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n" +PublicObservationString() = "\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(0) = "\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(1) = "\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand: ◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◉◯◉◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◉ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).current_player: ◉◯ +ObservationTensor(0).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(0).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◉◉◉◯◯◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯ +ObservationTensor(1).current_player: ◉◯ +ObservationTensor(1).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2, 9, 20, 24, 27, 29, 31, 35, 47, 49, 51] +StringLegalActions() = ["Player: 0 Action: 3s", "Player: 0 Action: Ts", "Player: 0 Action: 8c", "Player: 0 Action: Qc", "Player: 0 Action: 2d", "Player: 0 Action: 4d", "Player: 0 Action: 6d", "Player: 0 Action: Td", "Player: 0 Action: 9h", "Player: 0 Action: Jh", "Player: 0 Action: Kh"] + +# Apply action "Player: 0 Action: Kh" +action: 51 + +# State 26 +# +# Knock card: 10 +# Prev upcard: 5c +# Repeated move: 0 +# Current player: 1 +# Phase: Draw +# +# Player0: Deadwood=72 +# +--------------------------+ +# | 3s Ts | +# | 8c Qc | +# | 2d 4d 6d Td | +# | 9h Jh | +# +--------------------------+ +# +# Stock size: 30 Upcard: Kh +# Discard pile: 5c +# +# Player1: Deadwood=53 +# +--------------------------+ +# | 2s 6s7s8s JsQs | +# | 9c | +# | | +# | 4h 8h Qh | +# +--------------------------+ +IsTerminal() = False +History() = [29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17, 54, 54, 53, 27, 51] +HistoryString() = "29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17, 54, 54, 53, 27, 51" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: XX\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=51, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 30 Upcard: Kh\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n")" +InformationStateString(1) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: XX\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: Kh\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n")" +ObservationString(0) = "\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 30 Upcard: Kh\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +ObservationString(1) = "\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: Kh\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n" +PublicObservationString() = "\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: Kh\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(0) = "\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 30 Upcard: Kh\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(1) = "\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: Kh\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand: ◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◉◯◉◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).current_player: ◯◉ +ObservationTensor(0).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(0).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◉◉◉◯◯◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯ +ObservationTensor(1).current_player: ◯◉ +ObservationTensor(1).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [52, 53] +StringLegalActions() = ["Player: 1 Action: Draw upcard", "Player: 1 Action: Draw stock"] + +# Apply action "Player: 1 Action: Draw stock" +action: 53 + +# State 27 +# Apply action "Chance outcome: 6c" +action: 18 + +# State 28 +# +# Knock card: 10 +# Prev upcard: Kh +# Repeated move: 0 +# Current player: 1 +# Phase: Discard +# +# Player0: Deadwood=72 +# +--------------------------+ +# | 3s Ts | +# | 8c Qc | +# | 2d 4d 6d Td | +# | 9h Jh | +# +--------------------------+ +# +# Stock size: 29 Upcard: XX +# Discard pile: 5cKh +# +# Player1: Deadwood=49 +# +--------------------------+ +# | 2s 6s7s8s JsQs | +# | 6c 9c | +# | | +# | 4h 8h Qh | +# +--------------------------+ +IsTerminal() = False +History() = [29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17, 54, 54, 53, 27, 51, 53, 18] +HistoryString() = "29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17, 54, 54, 53, 27, 51, 53, 18" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: XX\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=51, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 30 Upcard: Kh\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n")" +InformationStateString(1) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: XX\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: Kh\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n")" +ObservationString(0) = "\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +ObservationString(1) = "\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n" +PublicObservationString() = "\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(0) = "\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(1) = "\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand: ◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◉◯◉◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).current_player: ◯◉ +ObservationTensor(0).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(0).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◉◉◉◯◯◉◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯ +ObservationTensor(1).current_player: ◯◉ +ObservationTensor(1).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 5, 6, 7, 10, 11, 18, 21, 42, 46, 50] +StringLegalActions() = ["Player: 1 Action: 2s", "Player: 1 Action: 6s", "Player: 1 Action: 7s", "Player: 1 Action: 8s", "Player: 1 Action: Js", "Player: 1 Action: Qs", "Player: 1 Action: 6c", "Player: 1 Action: 9c", "Player: 1 Action: 4h", "Player: 1 Action: 8h", "Player: 1 Action: Qh"] + +# Apply action "Player: 1 Action: Qh" +action: 50 + +# State 29 +# Apply action "Player: 0 Action: Draw stock" +action: 53 + +# State 30 +# Apply action "Chance outcome: 7d" +action: 32 + +# State 31 +# Apply action "Player: 0 Action: Qc" +action: 24 + +# State 32 +# Apply action "Player: 1 Action: Draw stock" +action: 53 + +# State 33 +# Apply action "Chance outcome: Jd" +action: 36 + +# State 34 +# Apply action "Player: 1 Action: Qs" +action: 11 + +# State 35 +# Apply action "Player: 0 Action: Draw stock" +action: 53 + +# State 36 +# Apply action "Chance outcome: Jc" +action: 23 + +# State 37 +# Apply action "Player: 0 Action: Jh" +action: 49 + +# State 38 +# Apply action "Player: 1 Action: Draw upcard" +action: 52 + +# State 39 +# Apply action "Player: 1 Action: 9c" +action: 21 + +# State 40 +# Apply action "Player: 0 Action: Draw stock" +action: 53 + +# State 41 +# Apply action "Chance outcome: Ks" +action: 12 + +# State 42 +# Apply action "Player: 0 Action: Ks" +action: 12 + +# State 43 +# Apply action "Player: 1 Action: Draw stock" +action: 53 + +# State 44 +# Apply action "Chance outcome: 3h" +action: 41 + +# State 45 +# Apply action "Player: 1 Action: 8h" +action: 46 + +# State 46 +# Apply action "Player: 0 Action: Draw stock" +action: 53 + +# State 47 +# Apply action "Chance outcome: 5d" +action: 30 + +# State 48 +# +# Knock card: 10 +# Prev upcard: 8h +# Repeated move: 0 +# Current player: 0 +# Phase: Discard +# +# Player0: Deadwood=42 +# +--------------------------+ +# | 3s Ts | +# | 8c Jc | +# | 2d 4d5d6d7d Td | +# | 9h | +# +--------------------------+ +# +# Stock size: 23 Upcard: XX +# Discard pile: 5cKhQhQcQs9cKs8h +# +# Player1: Deadwood=15 +# +--------------------------+ +# | 2s 6s7s8s Js | +# | 6c | +# | Jd | +# | 3h4h Jh | +# +--------------------------+ +IsTerminal() = False +History() = [29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17, 54, 54, 53, 27, 51, 53, 18, 50, 53, 32, 24, 53, 36, 11, 53, 23, 49, 52, 21, 53, 12, 12, 53, 41, 46, 53, 30] +HistoryString() = "29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17, 54, 54, 53, 27, 51, 53, 18, 50, 53, 32, 24, 53, 36, 11, 53, 23, 49, 52, 21, 53, 12, 12, 53, 41, 46, 53, 30" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: XX\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=51, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 30 Upcard: Kh\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 29 Upcard: Qh\nDiscard pile: 5cKh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKhQh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 28 Upcard: XX\nDiscard pile: 5cKhQh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=24, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 28 Upcard: Qc\nDiscard pile: 5cKhQh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 28 Upcard: XX\nDiscard pile: 5cKhQhQc\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 27 Upcard: XX\nDiscard pile: 5cKhQhQc\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 27 Upcard: Qs\nDiscard pile: 5cKhQhQc\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 27 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=49, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 26 Upcard: Jh\nDiscard pile: 5cKhQhQcQs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Jh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Jh\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 26 Upcard: 9c\nDiscard pile: 5cKhQhQcQs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts Ks|\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 25 Upcard: XX\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=12, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 25 Upcard: Ks\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 25 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 24 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 24 Upcard: 8h\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 24 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=42\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d5d6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n")" +InformationStateString(1) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: XX\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: Kh\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=50, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 29 Upcard: Qh\nDiscard pile: 5cKh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKhQh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 28 Upcard: XX\nDiscard pile: 5cKhQh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 28 Upcard: Qc\nDiscard pile: 5cKhQh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 28 Upcard: XX\nDiscard pile: 5cKhQhQc\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 27 Upcard: XX\nDiscard pile: 5cKhQhQc\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| Jd |\n| 4h 8h |\n+--------------------------+\n"), (action=11, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 27 Upcard: Qs\nDiscard pile: 5cKhQhQc\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c 9c |\n| Jd |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 27 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c 9c |\n| Jd |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c 9c |\n| Jd |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 26 Upcard: Jh\nDiscard pile: 5cKhQhQcQs\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c 9c |\n| Jd |\n| 4h 8h |\n+--------------------------+\n"), (action=52, observation="\nKnock card: 10\nPrev upcard: Jh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c 9c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=21, observation="\nKnock card: 10\nPrev upcard: Jh\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 26 Upcard: 9c\nDiscard pile: 5cKhQhQcQs\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 25 Upcard: XX\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 25 Upcard: Ks\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 25 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 24 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1: Deadwood=15\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 3h4h 8h Jh |\n+--------------------------+\n"), (action=46, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 24 Upcard: 8h\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1: Deadwood=15\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 24 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1: Deadwood=15\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1: Deadwood=15\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n")" +ObservationString(0) = "\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=42\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d5d6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +ObservationString(1) = "\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1: Deadwood=15\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n" +PublicObservationString() = "\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(0) = "\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=42\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d5d6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(1) = "\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1: Deadwood=15\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand: ◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◉◯◉◉◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).current_player: ◉◯ +ObservationTensor(0).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(0).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◉◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◉ +ObservationTensor(0).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◉◉◉◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◉◯◯◯◯◯◯◉◯◯ +ObservationTensor(1).current_player: ◉◯ +ObservationTensor(1).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◉◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◉ +ObservationTensor(1).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2, 9, 20, 23, 27, 29, 30, 31, 32, 35, 47] +StringLegalActions() = ["Player: 0 Action: 3s", "Player: 0 Action: Ts", "Player: 0 Action: 8c", "Player: 0 Action: Jc", "Player: 0 Action: 2d", "Player: 0 Action: 4d", "Player: 0 Action: 5d", "Player: 0 Action: 6d", "Player: 0 Action: 7d", "Player: 0 Action: Td", "Player: 0 Action: 9h"] + +# Apply action "Player: 0 Action: Jc" +action: 23 + +# State 49 +# Apply action "Player: 1 Action: Draw upcard" +action: 52 + +# State 50 +# +# Knock card: 10 +# Prev upcard: Jc +# Repeated move: 0 +# Current player: 1 +# Phase: Discard +# +# Player0: Deadwood=42 +# +--------------------------+ +# | 3s Ts | +# | 8c | +# | 2d 4d5d6d7d Td | +# | 9h | +# +--------------------------+ +# +# Stock size: 23 Upcard: XX +# Discard pile: 5cKhQhQcQs9cKs8h +# +# Player1: Deadwood=9 +# +--------------------------+ +# | 2s 6s7s8s Js | +# | 6c Jc | +# | Jd | +# | 3h4h Jh | +# +--------------------------+ +IsTerminal() = False +History() = [29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17, 54, 54, 53, 27, 51, 53, 18, 50, 53, 32, 24, 53, 36, 11, 53, 23, 49, 52, 21, 53, 12, 12, 53, 41, 46, 53, 30, 23, 52] +HistoryString() = "29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17, 54, 54, 53, 27, 51, 53, 18, 50, 53, 32, 24, 53, 36, 11, 53, 23, 49, 52, 21, 53, 12, 12, 53, 41, 46, 53, 30, 23, 52" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: XX\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=51, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 30 Upcard: Kh\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 29 Upcard: Qh\nDiscard pile: 5cKh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKhQh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 28 Upcard: XX\nDiscard pile: 5cKhQh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=24, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 28 Upcard: Qc\nDiscard pile: 5cKhQh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 28 Upcard: XX\nDiscard pile: 5cKhQhQc\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 27 Upcard: XX\nDiscard pile: 5cKhQhQc\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 27 Upcard: Qs\nDiscard pile: 5cKhQhQc\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 27 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=49, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 26 Upcard: Jh\nDiscard pile: 5cKhQhQcQs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Jh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Jh\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 26 Upcard: 9c\nDiscard pile: 5cKhQhQcQs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts Ks|\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 25 Upcard: XX\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=12, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 25 Upcard: Ks\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 25 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 24 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 24 Upcard: 8h\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 24 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=42\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d5d6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=23, observation="\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=42\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d5d6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: Jc\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=42\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d5d6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n")" +InformationStateString(1) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: XX\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: Kh\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=50, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 29 Upcard: Qh\nDiscard pile: 5cKh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKhQh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 28 Upcard: XX\nDiscard pile: 5cKhQh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 28 Upcard: Qc\nDiscard pile: 5cKhQh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 28 Upcard: XX\nDiscard pile: 5cKhQhQc\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 27 Upcard: XX\nDiscard pile: 5cKhQhQc\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| Jd |\n| 4h 8h |\n+--------------------------+\n"), (action=11, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 27 Upcard: Qs\nDiscard pile: 5cKhQhQc\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c 9c |\n| Jd |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 27 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c 9c |\n| Jd |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c 9c |\n| Jd |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 26 Upcard: Jh\nDiscard pile: 5cKhQhQcQs\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c 9c |\n| Jd |\n| 4h 8h |\n+--------------------------+\n"), (action=52, observation="\nKnock card: 10\nPrev upcard: Jh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c 9c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=21, observation="\nKnock card: 10\nPrev upcard: Jh\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 26 Upcard: 9c\nDiscard pile: 5cKhQhQcQs\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 25 Upcard: XX\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 25 Upcard: Ks\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 25 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 24 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1: Deadwood=15\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 3h4h 8h Jh |\n+--------------------------+\n"), (action=46, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 24 Upcard: 8h\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1: Deadwood=15\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 24 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1: Deadwood=15\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1: Deadwood=15\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: Jc\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1: Deadwood=15\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n"), (action=52, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1: Deadwood=9\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c Jc |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n")" +ObservationString(0) = "\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=42\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d5d6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +ObservationString(1) = "\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1: Deadwood=9\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c Jc |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n" +PublicObservationString() = "\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(0) = "\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=42\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d5d6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(1) = "\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1: Deadwood=9\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c Jc |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand: ◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◉◉◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).current_player: ◯◉ +ObservationTensor(0).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(0).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◉◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◉ +ObservationTensor(0).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◉◉◉◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◉◯◯◯◯◯◯◉◯◯ +ObservationTensor(1).current_player: ◯◉ +ObservationTensor(1).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◉◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◉ +ObservationTensor(1).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 5, 6, 7, 10, 18, 23, 36, 41, 42, 49, 55] +StringLegalActions() = ["Player: 1 Action: 2s", "Player: 1 Action: 6s", "Player: 1 Action: 7s", "Player: 1 Action: 8s", "Player: 1 Action: Js", "Player: 1 Action: 6c", "Player: 1 Action: Jc", "Player: 1 Action: Jd", "Player: 1 Action: 3h", "Player: 1 Action: 4h", "Player: 1 Action: Jh", "Player: 1 Action: Knock"] + +# Apply action "Player: 1 Action: Knock" +action: 55 + +# State 51 +# Apply action "Player: 1 Action: 6c" +action: 18 + +# State 52 +# Apply action "Player: 1 Action: 6s7s8s" +action: 126 + +# State 53 +# Apply action "Player: 1 Action: JsJcJdJh" +action: 110 + +# State 54 +# Apply action "Player: 1 Action: Pass" +action: 54 + +# State 55 +# Apply action "Player: 0 Action: Pass" +action: 54 + +# State 56 +# Apply action "Player: 0 Action: 4d5d6d7d" +action: 188 + +# State 57 +# Apply action "Player: 0 Action: Pass" +action: 54 + +# State 58 +# +# Knock card: 10 +# Prev upcard: Jc +# Repeated move: 0 +# Current player: 0 +# Phase: GameOver +# +# Player0: Deadwood=42 +# Layed melds: 4d5d6d7d +# +--------------------------+ +# | 3s Ts | +# | 8c | +# | 2d Td | +# | 9h | +# +--------------------------+ +# +# Stock size: 23 Upcard: XX +# Discard pile: 5cKhQhQcQs9cKs8h6c +# +# Player1: Deadwood=9 +# Layed melds: 6s7s8s JsJcJdJh +# +--------------------------+ +# | 2s | +# | | +# | | +# | 3h4h | +# +--------------------------+ +IsTerminal() = True +History() = [29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17, 54, 54, 53, 27, 51, 53, 18, 50, 53, 32, 24, 53, 36, 11, 53, 23, 49, 52, 21, 53, 12, 12, 53, 41, 46, 53, 30, 23, 52, 55, 18, 126, 110, 54, 54, 188, 54] +HistoryString() = "29, 49, 9, 2, 51, 24, 35, 20, 47, 31, 5, 10, 6, 42, 50, 11, 1, 21, 46, 7, 17, 54, 54, 53, 27, 51, 53, 18, 50, 53, 32, 24, 53, 36, 11, 53, 23, 49, 52, 21, 53, 12, 12, 53, 41, 46, 53, 30, 23, 52, 55, 18, 126, 110, 54, 54, 188, 54" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d |\n| Jh Kh|\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| Jh Kh|\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=0\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=80\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 31 Upcard: XX\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh Kh|\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=51, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 30 Upcard: Kh\nDiscard pile: 5c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 29 Upcard: Qh\nDiscard pile: 5cKh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=72\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKhQh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Qc |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 28 Upcard: XX\nDiscard pile: 5cKhQh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=24, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 28 Upcard: Qc\nDiscard pile: 5cKhQh\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 28 Upcard: XX\nDiscard pile: 5cKhQhQc\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 27 Upcard: XX\nDiscard pile: 5cKhQhQc\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 27 Upcard: Qs\nDiscard pile: 5cKhQhQc\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 27 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h Jh |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=49, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 26 Upcard: Jh\nDiscard pile: 5cKhQhQcQs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Jh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Jh\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 26 Upcard: 9c\nDiscard pile: 5cKhQhQcQs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts Ks|\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 25 Upcard: XX\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=12, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 25 Upcard: Ks\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 25 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 24 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 24 Upcard: 8h\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0: Deadwood=69\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d 6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 24 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0: Deadwood=42\n+--------------------------+\n| 3s Ts |\n| 8c Jc |\n| 2d 4d5d6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=23, observation="\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0: Deadwood=42\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d5d6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: Jc\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0: Deadwood=42\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d5d6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 1\nPhase: Knock\n\nPlayer0: Deadwood=64\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d5d6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 1\nPhase: Knock\n\nPlayer0: Deadwood=64\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d5d6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 1\nPhase: Knock\n\nPlayer0: Deadwood=64\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d5d6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1:\nLayed melds: 6s7s8s\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 1\nPhase: Knock\n\nPlayer0: Deadwood=64\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d5d6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1:\nLayed melds: 6s7s8s JsJcJdJh\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 0\nPhase: Layoff\n\nPlayer0: Deadwood=64\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d5d6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1:\nLayed melds: 6s7s8s JsJcJdJh\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 0\nPhase: Layoff\n\nPlayer0: Deadwood=64\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d 4d5d6d7d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1:\nLayed melds: 6s7s8s JsJcJdJh\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=188, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 0\nPhase: Layoff\n\nPlayer0: Deadwood=42\nLayed melds: 4d5d6d7d\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1:\nLayed melds: 6s7s8s JsJcJdJh\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 0\nPhase: GameOver\n\nPlayer0: Deadwood=42\nLayed melds: 4d5d6d7d\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1:\nLayed melds: 6s7s8s JsJcJdJh\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n")" +InformationStateString(1) = "(action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 52 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 51 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 50 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 49 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 48 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 47 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 46 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 45 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 44 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 43 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 42 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 41 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 40 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 39 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 38 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 37 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s Js |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 36 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 35 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 34 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 33 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 32 Upcard: XX\nDiscard pile: \n\nPlayer1: Deadwood=0\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 1\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: XX\nRepeated move: 0\nCurrent player: 0\nPhase: FirstUpcard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: 5c\nDiscard pile: \n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 31 Upcard: XX\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 5c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: Kh\nDiscard pile: 5c\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 30 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1: Deadwood=53\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h Qh |\n+--------------------------+\n"), (action=50, observation="\nKnock card: 10\nPrev upcard: Kh\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 29 Upcard: Qh\nDiscard pile: 5cKh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 29 Upcard: XX\nDiscard pile: 5cKhQh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 28 Upcard: XX\nDiscard pile: 5cKhQh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qh\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 28 Upcard: Qc\nDiscard pile: 5cKhQh\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 28 Upcard: XX\nDiscard pile: 5cKhQhQc\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 27 Upcard: XX\nDiscard pile: 5cKhQhQc\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s JsQs |\n| 6c 9c |\n| Jd |\n| 4h 8h |\n+--------------------------+\n"), (action=11, observation="\nKnock card: 10\nPrev upcard: Qc\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 27 Upcard: Qs\nDiscard pile: 5cKhQhQc\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c 9c |\n| Jd |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 27 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c 9c |\n| Jd |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c 9c |\n| Jd |\n| 4h 8h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Qs\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 26 Upcard: Jh\nDiscard pile: 5cKhQhQcQs\n\nPlayer1: Deadwood=49\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c 9c |\n| Jd |\n| 4h 8h |\n+--------------------------+\n"), (action=52, observation="\nKnock card: 10\nPrev upcard: Jh\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c 9c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=21, observation="\nKnock card: 10\nPrev upcard: Jh\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 26 Upcard: 9c\nDiscard pile: 5cKhQhQcQs\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 26 Upcard: XX\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 25 Upcard: XX\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 9c\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 25 Upcard: Ks\nDiscard pile: 5cKhQhQcQs9c\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=53, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 25 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1: Deadwood=20\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 4h 8h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 24 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1: Deadwood=15\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 3h4h 8h Jh |\n+--------------------------+\n"), (action=46, observation="\nKnock card: 10\nPrev upcard: Ks\nRepeated move: 0\nCurrent player: 0\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 24 Upcard: 8h\nDiscard pile: 5cKhQhQcQs9cKs\n\nPlayer1: Deadwood=15\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: -1\nPhase: Deal\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 24 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1: Deadwood=15\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: 0\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1: Deadwood=15\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: 8h\nRepeated move: 0\nCurrent player: 1\nPhase: Draw\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: Jc\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1: Deadwood=15\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n"), (action=52, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 1\nPhase: Discard\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1: Deadwood=9\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c Jc |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n"), (action=55, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 1\nPhase: Knock\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h\n\nPlayer1: Deadwood=76\n+--------------------------+\n| 2s 6s7s8s Js |\n| 6c Jc |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n"), (action=18, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 1\nPhase: Knock\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1: Deadwood=70\n+--------------------------+\n| 2s 6s7s8s Js |\n| Jc |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n"), (action=126, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 1\nPhase: Knock\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1: Deadwood=49\nLayed melds: 6s7s8s\n+--------------------------+\n| 2s Js |\n| Jc |\n| Jd |\n| 3h4h Jh |\n+--------------------------+\n"), (action=110, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 1\nPhase: Knock\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1: Deadwood=9\nLayed melds: 6s7s8s JsJcJdJh\n+--------------------------+\n| 2s |\n| |\n| |\n| 3h4h |\n+--------------------------+\n"), (action=54, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 0\nPhase: Layoff\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1: Deadwood=9\nLayed melds: 6s7s8s JsJcJdJh\n+--------------------------+\n| 2s |\n| |\n| |\n| 3h4h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 0\nPhase: Layoff\n\nPlayer0:\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1: Deadwood=9\nLayed melds: 6s7s8s JsJcJdJh\n+--------------------------+\n| 2s |\n| |\n| |\n| 3h4h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 0\nPhase: Layoff\n\nPlayer0:\nLayed melds: 4d5d6d7d\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1: Deadwood=9\nLayed melds: 6s7s8s JsJcJdJh\n+--------------------------+\n| 2s |\n| |\n| |\n| 3h4h |\n+--------------------------+\n"), (action=None, observation="\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 0\nPhase: GameOver\n\nPlayer0:\nLayed melds: 4d5d6d7d\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1: Deadwood=9\nLayed melds: 6s7s8s JsJcJdJh\n+--------------------------+\n| 2s |\n| |\n| |\n| 3h4h |\n+--------------------------+\n")" +ObservationString(0) = "\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 0\nPhase: GameOver\n\nPlayer0: Deadwood=42\nLayed melds: 4d5d6d7d\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1:\nLayed melds: 6s7s8s JsJcJdJh\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +ObservationString(1) = "\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 0\nPhase: GameOver\n\nPlayer0:\nLayed melds: 4d5d6d7d\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1: Deadwood=9\nLayed melds: 6s7s8s JsJcJdJh\n+--------------------------+\n| 2s |\n| |\n| |\n| 3h4h |\n+--------------------------+\n" +PublicObservationString() = "\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 0\nPhase: GameOver\n\nPlayer0:\nLayed melds: 4d5d6d7d\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1:\nLayed melds: 6s7s8s JsJcJdJh\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(0) = "\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 0\nPhase: GameOver\n\nPlayer0: Deadwood=42\nLayed melds: 4d5d6d7d\n+--------------------------+\n| 3s Ts |\n| 8c |\n| 2d Td |\n| 9h |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1:\nLayed melds: 6s7s8s JsJcJdJh\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n" +PrivateObservationString(1) = "\nKnock card: 10\nPrev upcard: Jc\nRepeated move: 0\nCurrent player: 0\nPhase: GameOver\n\nPlayer0:\nLayed melds: 4d5d6d7d\n+--------------------------+\n| |\n| |\n| |\n| |\n+--------------------------+\n\nStock size: 23 Upcard: XX\nDiscard pile: 5cKhQhQcQs9cKs8h6c\n\nPlayer1: Deadwood=9\nLayed melds: 6s7s8s JsJcJdJh\n+--------------------------+\n| 2s |\n| |\n| |\n| 3h4h |\n+--------------------------+\n" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand: ◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).current_player: ◉◯ +ObservationTensor(0).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(0).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◉◉◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◉ +ObservationTensor(0).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).current_player: ◉◯ +ObservationTensor(1).knock_card: ◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1).upcard: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).discard_pile: ◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◉◉◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◉ +ObservationTensor(1).stock_size: ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).layed_melds: +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [-33, 33] +Returns() = [-33, 33] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/go.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/go.txt new file mode 100644 index 0000000..0ff23cf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/go.txt @@ -0,0 +1,839 @@ +game: go(board_size=7,komi=4.5) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Go" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["board_size", "handicap", "komi", "max_game_length"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "go" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 50 +PolicyTensorShape() = [50] +MaxChanceOutcomes() = 0 +GetParameters() = {board_size=7,handicap=0,komi=4.5,max_game_length=98} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [4, 7, 7] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 196 +MaxGameLength() = 98 +ToString() = "go(board_size=7,komi=4.5)" + +# State 0 +# GoState(komi=4.5, to_play=B, history.size()=0) +# +# 7 +++++++ +# 6 +++++++ +# 5 +++++++ +# 4 +++++++ +# 3 +++++++ +# 2 +++++++ +# 1 +++++++ +# ABCDEFG +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "GoState(komi=4.5, to_play=B, history.size()=0)\n\n 7 +++++++\n 6 +++++++\n 5 +++++++\n 4 +++++++\n 3 +++++++\n 2 +++++++\n 1 +++++++\n ABCDEFG\n" +ObservationString(1) = "GoState(komi=4.5, to_play=B, history.size()=0)\n\n 7 +++++++\n 6 +++++++\n 5 +++++++\n 4 +++++++\n 3 +++++++\n 2 +++++++\n 1 +++++++\n ABCDEFG\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49] +StringLegalActions() = ["B a1", "B b1", "B c1", "B d1", "B e1", "B f1", "B g1", "B a2", "B b2", "B c2", "B d2", "B e2", "B f2", "B g2", "B a3", "B b3", "B c3", "B d3", "B e3", "B f3", "B g3", "B a4", "B b4", "B c4", "B d4", "B e4", "B f4", "B g4", "B a5", "B b5", "B c5", "B d5", "B e5", "B f5", "B g5", "B a6", "B b6", "B c6", "B d6", "B e6", "B f6", "B g6", "B a7", "B b7", "B c7", "B d7", "B e7", "B f7", "B g7", "B PASS"] + +# Apply action "B d4" +action: 24 + +# State 1 +# GoState(komi=4.5, to_play=W, history.size()=1) +# +# 7 +++++++ +# 6 +++++++ +# 5 +++++++ +# 4 +++X+++ +# 3 +++++++ +# 2 +++++++ +# 1 +++++++ +# ABCDEFG +IsTerminal() = False +History() = [24] +HistoryString() = "24" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "24" +InformationStateString(1) = "24" +ObservationString(0) = "GoState(komi=4.5, to_play=W, history.size()=1)\n\n 7 +++++++\n 6 +++++++\n 5 +++++++\n 4 +++X+++\n 3 +++++++\n 2 +++++++\n 1 +++++++\n ABCDEFG\n" +ObservationString(1) = "GoState(komi=4.5, to_play=W, history.size()=1)\n\n 7 +++++++\n 6 +++++++\n 5 +++++++\n 4 +++X+++\n 3 +++++++\n 2 +++++++\n 1 +++++++\n ABCDEFG\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49] +StringLegalActions() = ["W a1", "W b1", "W c1", "W d1", "W e1", "W f1", "W g1", "W a2", "W b2", "W c2", "W d2", "W e2", "W f2", "W g2", "W a3", "W b3", "W c3", "W d3", "W e3", "W f3", "W g3", "W a4", "W b4", "W c4", "W e4", "W f4", "W g4", "W a5", "W b5", "W c5", "W d5", "W e5", "W f5", "W g5", "W a6", "W b6", "W c6", "W d6", "W e6", "W f6", "W g6", "W a7", "W b7", "W c7", "W d7", "W e7", "W f7", "W g7", "W PASS"] + +# Apply action "W d3" +action: 17 + +# State 2 +# GoState(komi=4.5, to_play=B, history.size()=2) +# +# 7 +++++++ +# 6 +++++++ +# 5 +++++++ +# 4 +++X+++ +# 3 +++O+++ +# 2 +++++++ +# 1 +++++++ +# ABCDEFG +IsTerminal() = False +History() = [24, 17] +HistoryString() = "24, 17" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "24, 17" +InformationStateString(1) = "24, 17" +ObservationString(0) = "GoState(komi=4.5, to_play=B, history.size()=2)\n\n 7 +++++++\n 6 +++++++\n 5 +++++++\n 4 +++X+++\n 3 +++O+++\n 2 +++++++\n 1 +++++++\n ABCDEFG\n" +ObservationString(1) = "GoState(komi=4.5, to_play=B, history.size()=2)\n\n 7 +++++++\n 6 +++++++\n 5 +++++++\n 4 +++X+++\n 3 +++O+++\n 2 +++++++\n 1 +++++++\n ABCDEFG\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49] +StringLegalActions() = ["B a1", "B b1", "B c1", "B d1", "B e1", "B f1", "B g1", "B a2", "B b2", "B c2", "B d2", "B e2", "B f2", "B g2", "B a3", "B b3", "B c3", "B e3", "B f3", "B g3", "B a4", "B b4", "B c4", "B e4", "B f4", "B g4", "B a5", "B b5", "B c5", "B d5", "B e5", "B f5", "B g5", "B a6", "B b6", "B c6", "B d6", "B e6", "B f6", "B g6", "B a7", "B b7", "B c7", "B d7", "B e7", "B f7", "B g7", "B PASS"] + +# Apply action "B b2" +action: 8 + +# State 3 +# GoState(komi=4.5, to_play=W, history.size()=3) +# +# 7 +++++++ +# 6 +++++++ +# 5 +++++++ +# 4 +++X+++ +# 3 +++O+++ +# 2 +X+++++ +# 1 +++++++ +# ABCDEFG +IsTerminal() = False +History() = [24, 17, 8] +HistoryString() = "24, 17, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "24, 17, 8" +InformationStateString(1) = "24, 17, 8" +ObservationString(0) = "GoState(komi=4.5, to_play=W, history.size()=3)\n\n 7 +++++++\n 6 +++++++\n 5 +++++++\n 4 +++X+++\n 3 +++O+++\n 2 +X+++++\n 1 +++++++\n ABCDEFG\n" +ObservationString(1) = "GoState(komi=4.5, to_play=W, history.size()=3)\n\n 7 +++++++\n 6 +++++++\n 5 +++++++\n 4 +++X+++\n 3 +++O+++\n 2 +X+++++\n 1 +++++++\n ABCDEFG\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◉◉◉◯◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◉◉◉◯◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49] +StringLegalActions() = ["W a1", "W b1", "W c1", "W d1", "W e1", "W f1", "W g1", "W a2", "W c2", "W d2", "W e2", "W f2", "W g2", "W a3", "W b3", "W c3", "W e3", "W f3", "W g3", "W a4", "W b4", "W c4", "W e4", "W f4", "W g4", "W a5", "W b5", "W c5", "W d5", "W e5", "W f5", "W g5", "W a6", "W b6", "W c6", "W d6", "W e6", "W f6", "W g6", "W a7", "W b7", "W c7", "W d7", "W e7", "W f7", "W g7", "W PASS"] + +# Apply action "W b5" +action: 29 + +# State 4 +# GoState(komi=4.5, to_play=B, history.size()=4) +# +# 7 +++++++ +# 6 +++++++ +# 5 +O+++++ +# 4 +++X+++ +# 3 +++O+++ +# 2 +X+++++ +# 1 +++++++ +# ABCDEFG +IsTerminal() = False +History() = [24, 17, 8, 29] +HistoryString() = "24, 17, 8, 29" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "24, 17, 8, 29" +InformationStateString(1) = "24, 17, 8, 29" +ObservationString(0) = "GoState(komi=4.5, to_play=B, history.size()=4)\n\n 7 +++++++\n 6 +++++++\n 5 +O+++++\n 4 +++X+++\n 3 +++O+++\n 2 +X+++++\n 1 +++++++\n ABCDEFG\n" +ObservationString(1) = "GoState(komi=4.5, to_play=B, history.size()=4)\n\n 7 +++++++\n 6 +++++++\n 5 +O+++++\n 4 +++X+++\n 3 +++O+++\n 2 +X+++++\n 1 +++++++\n ABCDEFG\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯ ◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯ ◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49] +StringLegalActions() = ["B a1", "B b1", "B c1", "B d1", "B e1", "B f1", "B g1", "B a2", "B c2", "B d2", "B e2", "B f2", "B g2", "B a3", "B b3", "B c3", "B e3", "B f3", "B g3", "B a4", "B b4", "B c4", "B e4", "B f4", "B g4", "B a5", "B c5", "B d5", "B e5", "B f5", "B g5", "B a6", "B b6", "B c6", "B d6", "B e6", "B f6", "B g6", "B a7", "B b7", "B c7", "B d7", "B e7", "B f7", "B g7", "B PASS"] + +# Apply action "B g6" +action: 41 + +# State 5 +# GoState(komi=4.5, to_play=W, history.size()=5) +# +# 7 +++++++ +# 6 ++++++X +# 5 +O+++++ +# 4 +++X+++ +# 3 +++O+++ +# 2 +X+++++ +# 1 +++++++ +# ABCDEFG +IsTerminal() = False +History() = [24, 17, 8, 29, 41] +HistoryString() = "24, 17, 8, 29, 41" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "24, 17, 8, 29, 41" +InformationStateString(1) = "24, 17, 8, 29, 41" +ObservationString(0) = "GoState(komi=4.5, to_play=W, history.size()=5)\n\n 7 +++++++\n 6 ++++++X\n 5 +O+++++\n 4 +++X+++\n 3 +++O+++\n 2 +X+++++\n 1 +++++++\n ABCDEFG\n" +ObservationString(1) = "GoState(komi=4.5, to_play=W, history.size()=5)\n\n 7 +++++++\n 6 ++++++X\n 5 +O+++++\n 4 +++X+++\n 3 +++O+++\n 2 +X+++++\n 1 +++++++\n ABCDEFG\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◉◉◉◯◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯ ◉◯◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◉◉◉◯◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯ ◉◯◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49] +StringLegalActions() = ["W a1", "W b1", "W c1", "W d1", "W e1", "W f1", "W g1", "W a2", "W c2", "W d2", "W e2", "W f2", "W g2", "W a3", "W b3", "W c3", "W e3", "W f3", "W g3", "W a4", "W b4", "W c4", "W e4", "W f4", "W g4", "W a5", "W c5", "W d5", "W e5", "W f5", "W g5", "W a6", "W b6", "W c6", "W d6", "W e6", "W f6", "W a7", "W b7", "W c7", "W d7", "W e7", "W f7", "W g7", "W PASS"] + +# Apply action "W c1" +action: 2 + +# State 6 +# Apply action "B a7" +action: 42 + +# State 7 +# Apply action "W c6" +action: 37 + +# State 8 +# Apply action "B e3" +action: 18 + +# State 9 +# Apply action "W e1" +action: 4 + +# State 10 +# Apply action "B f7" +action: 47 + +# State 11 +# Apply action "W a1" +action: 0 + +# State 12 +# Apply action "B c3" +action: 16 + +# State 13 +# Apply action "W c4" +action: 23 + +# State 14 +# Apply action "B g5" +action: 34 + +# State 15 +# Apply action "W g3" +action: 20 + +# State 16 +# Apply action "B e6" +action: 39 + +# State 17 +# Apply action "W f1" +action: 5 + +# State 18 +# Apply action "B f4" +action: 26 + +# State 19 +# Apply action "W a4" +action: 21 + +# State 20 +# GoState(komi=4.5, to_play=B, history.size()=20) +# +# 7 X++++X+ +# 6 ++O+X+X +# 5 +O++++X +# 4 O+OX+X+ +# 3 ++XOX+O +# 2 +X+++++ +# 1 O+O+OO+ +# ABCDEFG +IsTerminal() = False +History() = [24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21] +HistoryString() = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21" +InformationStateString(1) = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21" +ObservationString(0) = "GoState(komi=4.5, to_play=B, history.size()=20)\n\n 7 X++++X+\n 6 ++O+X+X\n 5 +O++++X\n 4 O+OX+X+\n 3 ++XOX+O\n 2 +X+++++\n 1 O+O+OO+\n ABCDEFG\n" +ObservationString(1) = "GoState(komi=4.5, to_play=B, history.size()=20)\n\n 7 X++++X+\n 6 ++O+X+X\n 5 +O++++X\n 4 O+OX+X+\n 3 ++XOX+O\n 2 +X+++++\n 1 O+O+OO+\n ABCDEFG\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◉◯◉◯◉◉◯ ◯◉◯◉◯◯◉ ◯◯◯◯◯◯◯ +◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◉◯◉◯◯ ◯◯◯◉◯◯◉ ◉◉◯◯◯◉◯ ◯◯◯◯◯◯◯ +◯◯◯◉◯◉◯ ◉◯◉◯◯◯◯ ◯◉◯◯◉◯◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉ ◯◉◯◯◯◯◯ ◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◉◯◉ ◯◯◉◯◯◯◯ ◉◉◯◉◯◉◯ ◯◯◯◯◯◯◯ +◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◯◉ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◉◯◉◯◉◉◯ ◯◉◯◉◯◯◉ ◯◯◯◯◯◯◯ +◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◉◯◉◯◯ ◯◯◯◉◯◯◉ ◉◉◯◯◯◉◯ ◯◯◯◯◯◯◯ +◯◯◯◉◯◉◯ ◉◯◉◯◯◯◯ ◯◉◯◯◉◯◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉ ◯◉◯◯◯◯◯ ◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◉◯◉ ◯◯◉◯◯◯◯ ◉◉◯◉◯◉◯ ◯◯◯◯◯◯◯ +◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◯◉ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 3, 6, 7, 9, 10, 11, 12, 13, 14, 15, 19, 22, 25, 27, 28, 30, 31, 32, 33, 35, 36, 38, 40, 43, 44, 45, 46, 48, 49] +StringLegalActions() = ["B b1", "B d1", "B g1", "B a2", "B c2", "B d2", "B e2", "B f2", "B g2", "B a3", "B b3", "B f3", "B b4", "B e4", "B g4", "B a5", "B c5", "B d5", "B e5", "B f5", "B a6", "B b6", "B d6", "B f6", "B b7", "B c7", "B d7", "B e7", "B g7", "B PASS"] + +# Apply action "B PASS" +action: 49 + +# State 21 +# GoState(komi=4.5, to_play=W, history.size()=21) +# +# 7 X++++X+ +# 6 ++O+X+X +# 5 +O++++X +# 4 O+OX+X+ +# 3 ++XOX+O +# 2 +X+++++ +# 1 O+O+OO+ +# ABCDEFG +IsTerminal() = False +History() = [24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49] +HistoryString() = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49" +InformationStateString(1) = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49" +ObservationString(0) = "GoState(komi=4.5, to_play=W, history.size()=21)\n\n 7 X++++X+\n 6 ++O+X+X\n 5 +O++++X\n 4 O+OX+X+\n 3 ++XOX+O\n 2 +X+++++\n 1 O+O+OO+\n ABCDEFG\n" +ObservationString(1) = "GoState(komi=4.5, to_play=W, history.size()=21)\n\n 7 X++++X+\n 6 ++O+X+X\n 5 +O++++X\n 4 O+OX+X+\n 3 ++XOX+O\n 2 +X+++++\n 1 O+O+OO+\n ABCDEFG\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◉◯◉◯◉◉◯ ◯◉◯◉◯◯◉ ◉◉◉◉◉◉◉ +◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◉◯◉◯◯ ◯◯◯◉◯◯◉ ◉◉◯◯◯◉◯ ◉◉◉◉◉◉◉ +◯◯◯◉◯◉◯ ◉◯◉◯◯◯◯ ◯◉◯◯◉◯◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◉ ◯◉◯◯◯◯◯ ◉◯◉◉◉◉◯ ◉◉◉◉◉◉◉ +◯◯◯◯◉◯◉ ◯◯◉◯◯◯◯ ◉◉◯◉◯◉◯ ◉◉◉◉◉◉◉ +◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◯◉ ◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◉◯◉◯◉◉◯ ◯◉◯◉◯◯◉ ◉◉◉◉◉◉◉ +◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉ ◉◉◉◉◉◉◉ +◯◯◉◯◉◯◯ ◯◯◯◉◯◯◉ ◉◉◯◯◯◉◯ ◉◉◉◉◉◉◉ +◯◯◯◉◯◉◯ ◉◯◉◯◯◯◯ ◯◉◯◯◉◯◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◉ ◯◉◯◯◯◯◯ ◉◯◉◉◉◉◯ ◉◉◉◉◉◉◉ +◯◯◯◯◉◯◉ ◯◯◉◯◯◯◯ ◉◉◯◉◯◉◯ ◉◉◉◉◉◉◉ +◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◯◉ ◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 3, 6, 7, 9, 10, 11, 12, 13, 14, 15, 19, 22, 25, 27, 28, 30, 31, 32, 33, 35, 36, 38, 40, 43, 44, 45, 46, 49] +StringLegalActions() = ["W b1", "W d1", "W g1", "W a2", "W c2", "W d2", "W e2", "W f2", "W g2", "W a3", "W b3", "W f3", "W b4", "W e4", "W g4", "W a5", "W c5", "W d5", "W e5", "W f5", "W a6", "W b6", "W d6", "W f6", "W b7", "W c7", "W d7", "W e7", "W PASS"] + +# Apply action "W b3" +action: 15 + +# State 22 +# Apply action "B a5" +action: 28 + +# State 23 +# Apply action "W d6" +action: 38 + +# State 24 +# Apply action "B b6" +action: 36 + +# State 25 +# Apply action "W d5" +action: 31 + +# State 26 +# Apply action "B e7" +action: 46 + +# State 27 +# Apply action "W b1" +action: 1 + +# State 28 +# Apply action "B e5" +action: 32 + +# State 29 +# Apply action "W e4" +action: 25 + +# State 30 +# Apply action "B b7" +action: 43 + +# State 31 +# Apply action "W d4" +action: 24 + +# State 32 +# Apply action "B a3" +action: 14 + +# State 33 +# Apply action "W g1" +action: 6 + +# State 34 +# Apply action "B f2" +action: 12 + +# State 35 +# Apply action "W c2" +action: 9 + +# State 36 +# Apply action "B f6" +action: 40 + +# State 37 +# Apply action "W PASS" +action: 49 + +# State 38 +# Apply action "B f3" +action: 19 + +# State 39 +# Apply action "W g4" +action: 27 + +# State 40 +# GoState(komi=4.5, to_play=B, history.size()=40) +# +# 7 XX++XX+ +# 6 +XOOXXX +# 5 XO+OX+X +# 4 O+OOOXO +# 3 XO+OXXO +# 2 +XO++X+ +# 1 OOO+OOO +# ABCDEFG +IsTerminal() = False +History() = [24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27] +HistoryString() = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27" +InformationStateString(1) = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27" +ObservationString(0) = "GoState(komi=4.5, to_play=B, history.size()=40)\n\n 7 XX++XX+\n 6 +XOOXXX\n 5 XO+OX+X\n 4 O+OOOXO\n 3 XO+OXXO\n 2 +XO++X+\n 1 OOO+OOO\n ABCDEFG\n" +ObservationString(1) = "GoState(komi=4.5, to_play=B, history.size()=40)\n\n 7 XX++XX+\n 6 +XOOXXX\n 5 XO+OX+X\n 4 O+OOOXO\n 3 XO+OXXO\n 2 +XO++X+\n 1 OOO+OOO\n ABCDEFG\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ +◯◉◯◯◯◉◯ ◯◯◉◯◯◯◯ ◉◯◯◉◉◯◉ ◯◯◯◯◯◯◯ +◉◯◯◯◉◉◯ ◯◉◯◉◯◯◉ ◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◉◯ ◉◯◉◉◉◯◉ ◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ +◉◯◯◯◉◯◉ ◯◉◯◉◯◯◯ ◯◯◉◯◯◉◯ ◯◯◯◯◯◯◯ +◯◉◯◯◉◉◉ ◯◯◉◉◯◯◯ ◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◉◉◯◯◉◉◯ ◯◯◯◯◯◯◯ ◯◯◉◉◯◯◉ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ +◯◉◯◯◯◉◯ ◯◯◉◯◯◯◯ ◉◯◯◉◉◯◉ ◯◯◯◯◯◯◯ +◉◯◯◯◉◉◯ ◯◉◯◉◯◯◉ ◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◉◯ ◉◯◉◉◉◯◉ ◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ +◉◯◯◯◉◯◉ ◯◉◯◉◯◯◯ ◯◯◉◯◯◉◯ ◯◯◯◯◯◯◯ +◯◉◯◯◉◉◉ ◯◯◉◉◯◯◯ ◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◉◉◯◯◉◉◯ ◯◯◯◯◯◯◯ ◯◯◉◉◯◯◉ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [3, 10, 11, 13, 22, 33, 35, 44, 45, 48, 49] +StringLegalActions() = ["B d1", "B d2", "B e2", "B g2", "B b4", "B f5", "B a6", "B c7", "B d7", "B g7", "B PASS"] + +# Apply action "B d7" +action: 45 + +# State 41 +# GoState(komi=4.5, to_play=W, history.size()=41) +# +# 7 XX+XXX+ +# 6 +XOOXXX +# 5 XO+OX+X +# 4 O+OOOXO +# 3 XO+OXXO +# 2 +XO++X+ +# 1 OOO+OOO +# ABCDEFG +IsTerminal() = False +History() = [24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27, 45] +HistoryString() = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27, 45" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27, 45" +InformationStateString(1) = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27, 45" +ObservationString(0) = "GoState(komi=4.5, to_play=W, history.size()=41)\n\n 7 XX+XXX+\n 6 +XOOXXX\n 5 XO+OX+X\n 4 O+OOOXO\n 3 XO+OXXO\n 2 +XO++X+\n 1 OOO+OOO\n ABCDEFG\n" +ObservationString(1) = "GoState(komi=4.5, to_play=W, history.size()=41)\n\n 7 XX+XXX+\n 6 +XOOXXX\n 5 XO+OX+X\n 4 O+OOOXO\n 3 XO+OXXO\n 2 +XO++X+\n 1 OOO+OOO\n ABCDEFG\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◉◯◯◯ ◉◉◉◉◉◉◉ +◯◉◯◯◯◉◯ ◯◯◉◯◯◯◯ ◉◯◯◉◉◯◉ ◉◉◉◉◉◉◉ +◉◯◯◯◉◉◯ ◯◉◯◉◯◯◉ ◯◯◉◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◉◯ ◉◯◉◉◉◯◉ ◯◉◯◯◯◯◯ ◉◉◉◉◉◉◉ +◉◯◯◯◉◯◉ ◯◉◯◉◯◯◯ ◯◯◉◯◯◉◯ ◉◉◉◉◉◉◉ +◯◉◯◯◉◉◉ ◯◯◉◉◯◯◯ ◉◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◉◉◯◉◉◉◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◉ ◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◉◯◯◯ ◉◉◉◉◉◉◉ +◯◉◯◯◯◉◯ ◯◯◉◯◯◯◯ ◉◯◯◉◉◯◉ ◉◉◉◉◉◉◉ +◉◯◯◯◉◉◯ ◯◉◯◉◯◯◉ ◯◯◉◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◉◯ ◉◯◉◉◉◯◉ ◯◉◯◯◯◯◯ ◉◉◉◉◉◉◉ +◉◯◯◯◉◯◉ ◯◉◯◉◯◯◯ ◯◯◉◯◯◉◯ ◉◉◉◉◉◉◉ +◯◉◯◯◉◉◉ ◯◯◉◉◯◯◯ ◉◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◉◉◯◉◉◉◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◉ ◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [3, 7, 10, 11, 13, 16, 22, 30, 35, 44, 49] +StringLegalActions() = ["W d1", "W a2", "W d2", "W e2", "W g2", "W c3", "W b4", "W c5", "W a6", "W c7", "W PASS"] + +# Apply action "W a2" +action: 7 + +# State 42 +# Apply action "B d1" +action: 3 + +# State 43 +# Apply action "W a6" +action: 35 + +# State 44 +# Apply action "B PASS" +action: 49 + +# State 45 +# Apply action "W c7" +action: 44 + +# State 46 +# Apply action "B b7" +action: 43 + +# State 47 +# Apply action "W c5" +action: 30 + +# State 48 +# Apply action "B b6" +action: 36 + +# State 49 +# Apply action "W c3" +action: 16 + +# State 50 +# Apply action "B PASS" +action: 49 + +# State 51 +# Apply action "W a3" +action: 14 + +# State 52 +# Apply action "B d2" +action: 10 + +# State 53 +# Apply action "W b2" +action: 8 + +# State 54 +# Apply action "B g2" +action: 13 + +# State 55 +# Apply action "W a5" +action: 28 + +# State 56 +# Apply action "B f5" +action: 33 + +# State 57 +# Apply action "W g3" +action: 20 + +# State 58 +# Apply action "B PASS" +action: 49 + +# State 59 +# Apply action "W a7" +action: 42 + +# State 60 +# GoState(komi=4.5, to_play=B, history.size()=60) +# +# 7 O+OXXX+ +# 6 O+OOXXX +# 5 OOOOXXX +# 4 O+OOOX+ +# 3 OOOOXXO +# 2 OOOX+XX +# 1 OOOXOOO +# ABCDEFG +IsTerminal() = False +History() = [24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27, 45, 7, 3, 35, 49, 44, 43, 30, 36, 16, 49, 14, 10, 8, 13, 28, 33, 20, 49, 42] +HistoryString() = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27, 45, 7, 3, 35, 49, 44, 43, 30, 36, 16, 49, 14, 10, 8, 13, 28, 33, 20, 49, 42" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27, 45, 7, 3, 35, 49, 44, 43, 30, 36, 16, 49, 14, 10, 8, 13, 28, 33, 20, 49, 42" +InformationStateString(1) = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27, 45, 7, 3, 35, 49, 44, 43, 30, 36, 16, 49, 14, 10, 8, 13, 28, 33, 20, 49, 42" +ObservationString(0) = "GoState(komi=4.5, to_play=B, history.size()=60)\n\n 7 O+OXXX+\n 6 O+OOXXX\n 5 OOOOXXX\n 4 O+OOOX+\n 3 OOOOXXO\n 2 OOOX+XX\n 1 OOOXOOO\n ABCDEFG\n" +ObservationString(1) = "GoState(komi=4.5, to_play=B, history.size()=60)\n\n 7 O+OXXX+\n 6 O+OOXXX\n 5 OOOOXXX\n 4 O+OOOX+\n 3 OOOOXXO\n 2 OOOX+XX\n 1 OOOXOOO\n ABCDEFG\n" +ObservationTensor(0): +◯◯◯◉◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◉◯◉◉ ◉◉◉◯◯◯◯ ◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◉◉◯ ◉◉◉◉◯◯◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◉◯ ◉◯◉◉◉◯◯ ◯◉◯◯◯◯◉ ◯◯◯◯◯◯◯ +◯◯◯◯◉◉◉ ◉◉◉◉◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◉◉◉ ◉◯◉◉◯◯◯ ◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◉◉◉◯ ◉◯◉◯◯◯◯ ◯◉◯◯◯◯◉ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◉◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◉◯◉◉ ◉◉◉◯◯◯◯ ◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◉◉◯ ◉◉◉◉◯◯◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◉◯ ◉◯◉◉◉◯◯ ◯◉◯◯◯◯◉ ◯◯◯◯◯◯◯ +◯◯◯◯◉◉◉ ◉◉◉◉◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◉◉◉ ◉◯◉◉◯◯◯ ◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◉◉◉◯ ◉◯◉◯◯◯◯ ◯◉◯◯◯◯◉ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [11, 27, 36, 43, 48, 49] +StringLegalActions() = ["B e2", "B g4", "B b6", "B b7", "B g7", "B PASS"] + +# Apply action "B e2" +action: 11 + +# State 61 +# GoState(komi=4.5, to_play=W, history.size()=61) +# +# 7 O+OXXX+ +# 6 O+OOXXX +# 5 OOOOXXX +# 4 O+OOOX+ +# 3 OOOOXXO +# 2 OOOXXXX +# 1 OOOX+++ +# ABCDEFG +IsTerminal() = False +History() = [24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27, 45, 7, 3, 35, 49, 44, 43, 30, 36, 16, 49, 14, 10, 8, 13, 28, 33, 20, 49, 42, 11] +HistoryString() = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27, 45, 7, 3, 35, 49, 44, 43, 30, 36, 16, 49, 14, 10, 8, 13, 28, 33, 20, 49, 42, 11" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27, 45, 7, 3, 35, 49, 44, 43, 30, 36, 16, 49, 14, 10, 8, 13, 28, 33, 20, 49, 42, 11" +InformationStateString(1) = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27, 45, 7, 3, 35, 49, 44, 43, 30, 36, 16, 49, 14, 10, 8, 13, 28, 33, 20, 49, 42, 11" +ObservationString(0) = "GoState(komi=4.5, to_play=W, history.size()=61)\n\n 7 O+OXXX+\n 6 O+OOXXX\n 5 OOOOXXX\n 4 O+OOOX+\n 3 OOOOXXO\n 2 OOOXXXX\n 1 OOOX+++\n ABCDEFG\n" +ObservationString(1) = "GoState(komi=4.5, to_play=W, history.size()=61)\n\n 7 O+OXXX+\n 6 O+OOXXX\n 5 OOOOXXX\n 4 O+OOOX+\n 3 OOOOXXO\n 2 OOOXXXX\n 1 OOOX+++\n ABCDEFG\n" +ObservationTensor(0): +◯◯◯◉◯◯◯ ◉◉◉◯◯◯◯ ◯◯◯◯◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◉◉◉◉ ◉◉◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◉◉◯ ◉◉◉◉◯◯◉ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◉◯ ◉◯◉◉◉◯◯ ◯◉◯◯◯◯◉ ◉◉◉◉◉◉◉ +◯◯◯◯◉◉◉ ◉◉◉◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◉◉◉ ◉◯◉◉◯◯◯ ◯◉◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◉◉◉◯ ◉◯◉◯◯◯◯ ◯◉◯◯◯◯◉ ◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◉◯◯◯ ◉◉◉◯◯◯◯ ◯◯◯◯◉◉◉ ◉◉◉◉◉◉◉ +◯◯◯◉◉◉◉ ◉◉◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◉◉◯ ◉◉◉◉◯◯◉ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◉◯ ◉◯◉◉◉◯◯ ◯◉◯◯◯◯◉ ◉◉◉◉◉◉◉ +◯◯◯◯◉◉◉ ◉◉◉◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◉◉◉ ◉◯◉◉◯◯◯ ◯◉◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◉◉◉◯ ◉◯◉◯◯◯◯ ◯◉◯◯◯◯◉ ◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [4, 5, 6, 22, 36, 43, 49] +StringLegalActions() = ["W e1", "W f1", "W g1", "W b4", "W b6", "W b7", "W PASS"] + +# Apply action "W b6" +action: 36 + +# State 62 +# Apply action "B e1" +action: 4 + +# State 63 +# Apply action "W g1" +action: 6 + +# State 64 +# Apply action "B g7" +action: 48 + +# State 65 +# Apply action "W PASS" +action: 49 + +# State 66 +# Apply action "B g4" +action: 27 + +# State 67 +# Apply action "W PASS" +action: 49 + +# State 68 +# Apply action "B PASS" +action: 49 + +# State 69 +# GoState(komi=4.5, to_play=W, history.size()=69) +# +# 7 O+OXXXX +# 6 OOOOXXX +# 5 OOOOXXX +# 4 O+OOOXX +# 3 OOOOXX+ +# 2 OOOXXXX +# 1 OOOXX+O +# ABCDEFG +IsTerminal() = True +History() = [24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27, 45, 7, 3, 35, 49, 44, 43, 30, 36, 16, 49, 14, 10, 8, 13, 28, 33, 20, 49, 42, 11, 36, 4, 6, 48, 49, 27, 49, 49] +HistoryString() = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27, 45, 7, 3, 35, 49, 44, 43, 30, 36, 16, 49, 14, 10, 8, 13, 28, 33, 20, 49, 42, 11, 36, 4, 6, 48, 49, 27, 49, 49" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27, 45, 7, 3, 35, 49, 44, 43, 30, 36, 16, 49, 14, 10, 8, 13, 28, 33, 20, 49, 42, 11, 36, 4, 6, 48, 49, 27, 49, 49" +InformationStateString(1) = "24, 17, 8, 29, 41, 2, 42, 37, 18, 4, 47, 0, 16, 23, 34, 20, 39, 5, 26, 21, 49, 15, 28, 38, 36, 31, 46, 1, 32, 25, 43, 24, 14, 6, 12, 9, 40, 49, 19, 27, 45, 7, 3, 35, 49, 44, 43, 30, 36, 16, 49, 14, 10, 8, 13, 28, 33, 20, 49, 42, 11, 36, 4, 6, 48, 49, 27, 49, 49" +ObservationString(0) = "GoState(komi=4.5, to_play=W, history.size()=69)\n\n 7 O+OXXXX\n 6 OOOOXXX\n 5 OOOOXXX\n 4 O+OOOXX\n 3 OOOOXX+\n 2 OOOXXXX\n 1 OOOXX+O\n ABCDEFG\n" +ObservationString(1) = "GoState(komi=4.5, to_play=W, history.size()=69)\n\n 7 O+OXXXX\n 6 OOOOXXX\n 5 OOOOXXX\n 4 O+OOOXX\n 3 OOOOXX+\n 2 OOOXXXX\n 1 OOOXX+O\n ABCDEFG\n" +ObservationTensor(0): +◯◯◯◉◉◯◯ ◉◉◉◯◯◯◉ ◯◯◯◯◯◉◯ ◉◉◉◉◉◉◉ +◯◯◯◉◉◉◉ ◉◉◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◉◉◯ ◉◉◉◉◯◯◯ ◯◯◯◯◯◯◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◉◉ ◉◯◉◉◉◯◯ ◯◉◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◉◉◉ ◉◉◉◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◉◉◉ ◉◉◉◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◉◉◉◉ ◉◯◉◯◯◯◯ ◯◉◯◯◯◯◯ ◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◉◉◯◯ ◉◉◉◯◯◯◉ ◯◯◯◯◯◉◯ ◉◉◉◉◉◉◉ +◯◯◯◉◉◉◉ ◉◉◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◉◉◯ ◉◉◉◉◯◯◯ ◯◯◯◯◯◯◉ ◉◉◉◉◉◉◉ +◯◯◯◯◯◉◉ ◉◯◉◉◉◯◯ ◯◉◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◉◉◉ ◉◉◉◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◉◉◉ ◉◉◉◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◉◉◉◉ ◉◯◉◯◯◯◯ ◯◉◯◯◯◯◯ ◉◉◉◉◉◉◉ +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/goofspiel.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/goofspiel.txt new file mode 100644 index 0000000..a8ecac2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/goofspiel.txt @@ -0,0 +1,339 @@ +game: goofspiel(imp_info=True,num_cards=4,points_order=descending) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Goofspiel" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["egocentric", "imp_info", "num_cards", "num_turns", "players", "points_order", "returns_type"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = True +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "goofspiel" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 4 +PolicyTensorShape() = [4] +MaxChanceOutcomes() = 0 +GetParameters() = {egocentric=False,imp_info=True,num_cards=4,num_turns=-1,players=2,points_order=descending,returns_type=win_loss} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = point_totals: [2, 11], player_hand: [4], win_sequence: [4, 2], point_card_sequence: [4, 4], player_action_sequence: [4, 4] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 66 +ObservationTensorShape() = current_point_card: [4], remaining_point_cards: [4], point_totals: [2, 11], player_hand: [4], win_sequence: [4, 2] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 42 +MaxGameLength() = 4 +ToString() = "goofspiel(imp_info=True,num_cards=4,points_order=descending)" + +# State 0 +# P0 hand: 1 2 3 4 +# P1 hand: 1 2 3 4 +# P0 actions: +# P1 actions: +# Point card sequence: 4 +# Points: 0 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "P0 hand: 1 2 3 4 \nP0 action sequence: \nPoint card sequence: 4 \nWin sequence: \nPoints: 0 0 \nTerminal?: 0\n" +InformationStateString(1) = "P1 hand: 1 2 3 4 \nP1 action sequence: \nPoint card sequence: 4 \nWin sequence: \nPoints: 0 0 \nTerminal?: 0\n" +InformationStateTensor(0).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).player_hand: ◉◉◉◉ +InformationStateTensor(0).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).point_card_sequence: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(0).player_action_sequence: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).player_hand: ◉◉◉◉ +InformationStateTensor(1).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).point_card_sequence: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).player_action_sequence: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationString(0) = "Current point card: 4\nRemaining Point Cards: 123\nPoints: 0 0 \nP0 hand: 1 2 3 4 \nWin sequence: \n" +ObservationString(1) = "Current point card: 4\nRemaining Point Cards: 123\nPoints: 0 0 \nP1 hand: 1 2 3 4 \nWin sequence: \n" +PublicObservationString() = "Current point card: 4\nRemaining Point Cards: 123\nWin sequence: \nPoints: 0 0 \n" +PrivateObservationString(0) = "Current point card: 4\nRemaining Point Cards: 123\nPoints: 0 0 \nP0 hand: 1 2 3 4 \nWin sequence: \n" +PrivateObservationString(1) = "Current point card: 4\nRemaining Point Cards: 123\nPoints: 0 0 \nP1 hand: 1 2 3 4 \nWin sequence: \n" +ObservationTensor(0).current_point_card: ◯◯◯◉ +ObservationTensor(0).remaining_point_cards: ◉◉◉◯ +ObservationTensor(0).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).player_hand: ◉◉◉◉ +ObservationTensor(0).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationTensor(1).current_point_card: ◯◯◯◉ +ObservationTensor(1).remaining_point_cards: ◉◉◉◯ +ObservationTensor(1).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player_hand: ◉◉◉◉ +ObservationTensor(1).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3] +LegalActions(1) = [0, 1, 2, 3] +StringLegalActions(0) = ["[P0]Bid: 1", "[P0]Bid: 2", "[P0]Bid: 3", "[P0]Bid: 4"] +StringLegalActions(1) = ["[P1]Bid: 1", "[P1]Bid: 2", "[P1]Bid: 3", "[P1]Bid: 4"] + +# Apply joint action ["[P0]Bid: 3", "[P1]Bid: 4"] +actions: [2, 3] + +# State 1 +# P0 hand: 1 2 4 +# P1 hand: 1 2 3 +# P0 actions: 2 +# P1 actions: 3 +# Point card sequence: 4 3 +# Points: 0 4 +IsTerminal() = False +History() = [2, 3] +HistoryString() = "2, 3" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "P0 hand: 1 2 4 \nP0 action sequence: 2 \nPoint card sequence: 4 3 \nWin sequence: 1 \nPoints: 0 4 \nTerminal?: 0\n" +InformationStateString(1) = "P1 hand: 1 2 3 \nP1 action sequence: 3 \nPoint card sequence: 4 3 \nWin sequence: 1 \nPoints: 0 4 \nTerminal?: 0\n" +InformationStateTensor(0).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ +InformationStateTensor(0).player_hand: ◉◉◯◉ +InformationStateTensor(0).win_sequence: ◯◉ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).point_card_sequence: ◯◯◯◉ + ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(0).player_action_sequence: ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).point_totals: ◯◯◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).player_hand: ◉◉◉◯ +InformationStateTensor(1).win_sequence: ◯◉ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).point_card_sequence: ◯◯◯◉ + ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).player_action_sequence: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationString(0) = "Current point card: 3\nRemaining Point Cards: 12\nPoints: 0 4 \nP0 hand: 1 2 4 \nWin sequence: 1 \n" +ObservationString(1) = "Current point card: 3\nRemaining Point Cards: 12\nPoints: 0 4 \nP1 hand: 1 2 3 \nWin sequence: 1 \n" +PublicObservationString() = "Current point card: 3\nRemaining Point Cards: 12\nWin sequence: 1 \nPoints: 0 4 \n" +PrivateObservationString(0) = "Current point card: 3\nRemaining Point Cards: 12\nPoints: 0 4 \nP0 hand: 1 2 4 \nWin sequence: 1 \n" +PrivateObservationString(1) = "Current point card: 3\nRemaining Point Cards: 12\nPoints: 0 4 \nP1 hand: 1 2 3 \nWin sequence: 1 \n" +ObservationTensor(0).current_point_card: ◯◯◉◯ +ObservationTensor(0).remaining_point_cards: ◉◉◯◯ +ObservationTensor(0).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(0).player_hand: ◉◉◯◉ +ObservationTensor(0).win_sequence: ◯◉ + ◯◯ + ◯◯ + ◯◯ +ObservationTensor(1).current_point_card: ◯◯◉◯ +ObservationTensor(1).remaining_point_cards: ◉◉◯◯ +ObservationTensor(1).point_totals: ◯◯◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player_hand: ◉◉◉◯ +ObservationTensor(1).win_sequence: ◯◉ + ◯◯ + ◯◯ + ◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 3] +LegalActions(1) = [0, 1, 2] +StringLegalActions(0) = ["[P0]Bid: 1", "[P0]Bid: 2", "[P0]Bid: 4"] +StringLegalActions(1) = ["[P1]Bid: 1", "[P1]Bid: 2", "[P1]Bid: 3"] + +# Apply joint action ["[P0]Bid: 2", "[P1]Bid: 1"] +actions: [1, 0] + +# State 2 +# P0 hand: 1 4 +# P1 hand: 2 3 +# P0 actions: 2 1 +# P1 actions: 3 0 +# Point card sequence: 4 3 2 +# Points: 3 4 +IsTerminal() = False +History() = [2, 3, 1, 0] +HistoryString() = "2, 3, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "P0 hand: 1 4 \nP0 action sequence: 2 1 \nPoint card sequence: 4 3 2 \nWin sequence: 1 0 \nPoints: 3 4 \nTerminal?: 0\n" +InformationStateString(1) = "P1 hand: 2 3 \nP1 action sequence: 3 0 \nPoint card sequence: 4 3 2 \nWin sequence: 1 0 \nPoints: 3 4 \nTerminal?: 0\n" +InformationStateTensor(0).point_totals: ◯◯◯◉◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ +InformationStateTensor(0).player_hand: ◉◯◯◉ +InformationStateTensor(0).win_sequence: ◯◉ + ◉◯ + ◯◯ + ◯◯ +InformationStateTensor(0).point_card_sequence: ◯◯◯◉ + ◯◯◉◯ + ◯◉◯◯ + ◯◯◯◯ +InformationStateTensor(0).player_action_sequence: ◯◯◉◯ + ◯◉◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).point_totals: ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯◯◯◯ +InformationStateTensor(1).player_hand: ◯◉◉◯ +InformationStateTensor(1).win_sequence: ◯◉ + ◉◯ + ◯◯ + ◯◯ +InformationStateTensor(1).point_card_sequence: ◯◯◯◉ + ◯◯◉◯ + ◯◉◯◯ + ◯◯◯◯ +InformationStateTensor(1).player_action_sequence: ◯◯◯◉ + ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationString(0) = "Current point card: 2\nRemaining Point Cards: 1\nPoints: 3 4 \nP0 hand: 1 4 \nWin sequence: 1 0 \n" +ObservationString(1) = "Current point card: 2\nRemaining Point Cards: 1\nPoints: 3 4 \nP1 hand: 2 3 \nWin sequence: 1 0 \n" +PublicObservationString() = "Current point card: 2\nRemaining Point Cards: 1\nWin sequence: 1 0 \nPoints: 3 4 \n" +PrivateObservationString(0) = "Current point card: 2\nRemaining Point Cards: 1\nPoints: 3 4 \nP0 hand: 1 4 \nWin sequence: 1 0 \n" +PrivateObservationString(1) = "Current point card: 2\nRemaining Point Cards: 1\nPoints: 3 4 \nP1 hand: 2 3 \nWin sequence: 1 0 \n" +ObservationTensor(0).current_point_card: ◯◉◯◯ +ObservationTensor(0).remaining_point_cards: ◉◯◯◯ +ObservationTensor(0).point_totals: ◯◯◯◉◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(0).player_hand: ◉◯◯◉ +ObservationTensor(0).win_sequence: ◯◉ + ◉◯ + ◯◯ + ◯◯ +ObservationTensor(1).current_point_card: ◯◉◯◯ +ObservationTensor(1).remaining_point_cards: ◉◯◯◯ +ObservationTensor(1).point_totals: ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(1).player_hand: ◯◉◉◯ +ObservationTensor(1).win_sequence: ◯◉ + ◉◯ + ◯◯ + ◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 3] +LegalActions(1) = [1, 2] +StringLegalActions(0) = ["[P0]Bid: 1", "[P0]Bid: 4"] +StringLegalActions(1) = ["[P1]Bid: 2", "[P1]Bid: 3"] + +# Apply joint action ["[P0]Bid: 4", "[P1]Bid: 2"] +actions: [3, 1] + +# State 3 +# P0 hand: +# P1 hand: +# P0 actions: 2 1 3 0 +# P1 actions: 3 0 1 2 +# Point card sequence: 4 3 2 1 +# Points: 5 5 +IsTerminal() = True +History() = [2, 3, 1, 0, 3, 1] +HistoryString() = "2, 3, 1, 0, 3, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "P0 hand: \nP0 action sequence: 2 1 3 0 \nPoint card sequence: 4 3 2 1 \nWin sequence: 1 0 0 1 \nPoints: 5 5 \nTerminal?: 1\n" +InformationStateString(1) = "P1 hand: \nP1 action sequence: 3 0 1 2 \nPoint card sequence: 4 3 2 1 \nWin sequence: 1 0 0 1 \nPoints: 5 5 \nTerminal?: 1\n" +InformationStateTensor(0).point_totals: ◯◯◯◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◉◯◯◯◯◯ +InformationStateTensor(0).player_hand: ◯◯◯◯ +InformationStateTensor(0).win_sequence: ◯◉ + ◉◯ + ◉◯ + ◯◉ +InformationStateTensor(0).point_card_sequence: ◯◯◯◉ + ◯◯◉◯ + ◯◉◯◯ + ◉◯◯◯ +InformationStateTensor(0).player_action_sequence: ◯◯◉◯ + ◯◉◯◯ + ◯◯◯◉ + ◉◯◯◯ +InformationStateTensor(1).point_totals: ◯◯◯◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◉◯◯◯◯◯ +InformationStateTensor(1).player_hand: ◯◯◯◯ +InformationStateTensor(1).win_sequence: ◯◉ + ◉◯ + ◉◯ + ◯◉ +InformationStateTensor(1).point_card_sequence: ◯◯◯◉ + ◯◯◉◯ + ◯◉◯◯ + ◉◯◯◯ +InformationStateTensor(1).player_action_sequence: ◯◯◯◉ + ◉◯◯◯ + ◯◉◯◯ + ◯◯◉◯ +ObservationString(0) = "Current point card: 1\nRemaining Point Cards: \nPoints: 5 5 \nP0 hand: \nWin sequence: 1 0 0 1 \n" +ObservationString(1) = "Current point card: 1\nRemaining Point Cards: \nPoints: 5 5 \nP1 hand: \nWin sequence: 1 0 0 1 \n" +PublicObservationString() = "Current point card: 1\nRemaining Point Cards: \nWin sequence: 1 0 0 1 \nPoints: 5 5 \n" +PrivateObservationString(0) = "Current point card: 1\nRemaining Point Cards: \nPoints: 5 5 \nP0 hand: \nWin sequence: 1 0 0 1 \n" +PrivateObservationString(1) = "Current point card: 1\nRemaining Point Cards: \nPoints: 5 5 \nP1 hand: \nWin sequence: 1 0 0 1 \n" +ObservationTensor(0).current_point_card: ◉◯◯◯ +ObservationTensor(0).remaining_point_cards: ◯◯◯◯ +ObservationTensor(0).point_totals: ◯◯◯◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(0).player_hand: ◯◯◯◯ +ObservationTensor(0).win_sequence: ◯◉ + ◉◯ + ◉◯ + ◯◉ +ObservationTensor(1).current_point_card: ◉◯◯◯ +ObservationTensor(1).remaining_point_cards: ◯◯◯◯ +ObservationTensor(1).point_totals: ◯◯◯◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1).player_hand: ◯◯◯◯ +ObservationTensor(1).win_sequence: ◯◉ + ◉◯ + ◉◯ + ◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/goofspiel_egocentric.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/goofspiel_egocentric.txt new file mode 100644 index 0000000..c045ced --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/goofspiel_egocentric.txt @@ -0,0 +1,339 @@ +game: goofspiel(imp_info=True,egocentric=True,num_cards=4,points_order=descending) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Goofspiel" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["egocentric", "imp_info", "num_cards", "num_turns", "players", "points_order", "returns_type"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = True +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "goofspiel" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 4 +PolicyTensorShape() = [4] +MaxChanceOutcomes() = 0 +GetParameters() = {egocentric=True,imp_info=True,num_cards=4,num_turns=-1,players=2,points_order=descending,returns_type=win_loss} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = point_totals: [2, 11], player_hand: [4], win_sequence: [4, 2], point_card_sequence: [4, 4], player_action_sequence: [4, 4] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 66 +ObservationTensorShape() = current_point_card: [4], remaining_point_cards: [4], point_totals: [2, 11], player_hand: [4], win_sequence: [4, 2] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 42 +MaxGameLength() = 4 +ToString() = "goofspiel(egocentric=True,imp_info=True,num_cards=4,points_order=descending)" + +# State 0 +# P0 hand: 1 2 3 4 +# P1 hand: 1 2 3 4 +# P0 actions: +# P1 actions: +# Point card sequence: 4 +# Points: 0 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "P0 hand: 1 2 3 4 \nP0 action sequence: \nPoint card sequence: 4 \nWin sequence: \nPoints: 0 0 \nTerminal?: 0\n" +InformationStateString(1) = "P1 hand: 1 2 3 4 \nP1 action sequence: \nPoint card sequence: 4 \nWin sequence: \nPoints: 0 0 \nTerminal?: 0\n" +InformationStateTensor(0).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).player_hand: ◉◉◉◉ +InformationStateTensor(0).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).point_card_sequence: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(0).player_action_sequence: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).player_hand: ◉◉◉◉ +InformationStateTensor(1).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).point_card_sequence: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).player_action_sequence: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationString(0) = "Current point card: 4\nRemaining Point Cards: 123\nPoints: 0 0 \nP0 hand: 1 2 3 4 \nWin sequence: \n" +ObservationString(1) = "Current point card: 4\nRemaining Point Cards: 123\nPoints: 0 0 \nP1 hand: 1 2 3 4 \nWin sequence: \n" +PublicObservationString() = "Current point card: 4\nRemaining Point Cards: 123\nWin sequence: \nPoints: 0 0 \n" +PrivateObservationString(0) = "Current point card: 4\nRemaining Point Cards: 123\nPoints: 0 0 \nP0 hand: 1 2 3 4 \nWin sequence: \n" +PrivateObservationString(1) = "Current point card: 4\nRemaining Point Cards: 123\nPoints: 0 0 \nP1 hand: 1 2 3 4 \nWin sequence: \n" +ObservationTensor(0).current_point_card: ◯◯◯◉ +ObservationTensor(0).remaining_point_cards: ◉◉◉◯ +ObservationTensor(0).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).player_hand: ◉◉◉◉ +ObservationTensor(0).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationTensor(1).current_point_card: ◯◯◯◉ +ObservationTensor(1).remaining_point_cards: ◉◉◉◯ +ObservationTensor(1).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player_hand: ◉◉◉◉ +ObservationTensor(1).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3] +LegalActions(1) = [0, 1, 2, 3] +StringLegalActions(0) = ["[P0]Bid: 1", "[P0]Bid: 2", "[P0]Bid: 3", "[P0]Bid: 4"] +StringLegalActions(1) = ["[P1]Bid: 1", "[P1]Bid: 2", "[P1]Bid: 3", "[P1]Bid: 4"] + +# Apply joint action ["[P0]Bid: 3", "[P1]Bid: 4"] +actions: [2, 3] + +# State 1 +# P0 hand: 1 2 4 +# P1 hand: 1 2 3 +# P0 actions: 2 +# P1 actions: 3 +# Point card sequence: 4 3 +# Points: 0 4 +IsTerminal() = False +History() = [2, 3] +HistoryString() = "2, 3" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "P0 hand: 1 2 4 \nP0 action sequence: 2 \nPoint card sequence: 4 3 \nWin sequence: 1 \nPoints: 0 4 \nTerminal?: 0\n" +InformationStateString(1) = "P1 hand: 1 2 3 \nP1 action sequence: 3 \nPoint card sequence: 4 3 \nWin sequence: 1 \nPoints: 0 4 \nTerminal?: 0\n" +InformationStateTensor(0).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ +InformationStateTensor(0).player_hand: ◉◉◯◉ +InformationStateTensor(0).win_sequence: ◯◉ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).point_card_sequence: ◯◯◯◉ + ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(0).player_action_sequence: ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).point_totals: ◯◯◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).player_hand: ◉◉◉◯ +InformationStateTensor(1).win_sequence: ◉◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).point_card_sequence: ◯◯◯◉ + ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).player_action_sequence: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationString(0) = "Current point card: 3\nRemaining Point Cards: 12\nPoints: 0 4 \nP0 hand: 1 2 4 \nWin sequence: 1 \n" +ObservationString(1) = "Current point card: 3\nRemaining Point Cards: 12\nPoints: 0 4 \nP1 hand: 1 2 3 \nWin sequence: 1 \n" +PublicObservationString() = "Current point card: 3\nRemaining Point Cards: 12\nWin sequence: 1 \nPoints: 0 4 \n" +PrivateObservationString(0) = "Current point card: 3\nRemaining Point Cards: 12\nPoints: 0 4 \nP0 hand: 1 2 4 \nWin sequence: 1 \n" +PrivateObservationString(1) = "Current point card: 3\nRemaining Point Cards: 12\nPoints: 0 4 \nP1 hand: 1 2 3 \nWin sequence: 1 \n" +ObservationTensor(0).current_point_card: ◯◯◉◯ +ObservationTensor(0).remaining_point_cards: ◉◉◯◯ +ObservationTensor(0).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(0).player_hand: ◉◉◯◉ +ObservationTensor(0).win_sequence: ◯◉ + ◯◯ + ◯◯ + ◯◯ +ObservationTensor(1).current_point_card: ◯◯◉◯ +ObservationTensor(1).remaining_point_cards: ◉◉◯◯ +ObservationTensor(1).point_totals: ◯◯◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player_hand: ◉◉◉◯ +ObservationTensor(1).win_sequence: ◉◯ + ◯◯ + ◯◯ + ◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 3] +LegalActions(1) = [0, 1, 2] +StringLegalActions(0) = ["[P0]Bid: 1", "[P0]Bid: 2", "[P0]Bid: 4"] +StringLegalActions(1) = ["[P1]Bid: 1", "[P1]Bid: 2", "[P1]Bid: 3"] + +# Apply joint action ["[P0]Bid: 2", "[P1]Bid: 1"] +actions: [1, 0] + +# State 2 +# P0 hand: 1 4 +# P1 hand: 2 3 +# P0 actions: 2 1 +# P1 actions: 3 0 +# Point card sequence: 4 3 2 +# Points: 3 4 +IsTerminal() = False +History() = [2, 3, 1, 0] +HistoryString() = "2, 3, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "P0 hand: 1 4 \nP0 action sequence: 2 1 \nPoint card sequence: 4 3 2 \nWin sequence: 1 0 \nPoints: 3 4 \nTerminal?: 0\n" +InformationStateString(1) = "P1 hand: 2 3 \nP1 action sequence: 3 0 \nPoint card sequence: 4 3 2 \nWin sequence: 1 0 \nPoints: 3 4 \nTerminal?: 0\n" +InformationStateTensor(0).point_totals: ◯◯◯◉◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ +InformationStateTensor(0).player_hand: ◉◯◯◉ +InformationStateTensor(0).win_sequence: ◯◉ + ◉◯ + ◯◯ + ◯◯ +InformationStateTensor(0).point_card_sequence: ◯◯◯◉ + ◯◯◉◯ + ◯◉◯◯ + ◯◯◯◯ +InformationStateTensor(0).player_action_sequence: ◯◯◉◯ + ◯◉◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).point_totals: ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯◯◯◯ +InformationStateTensor(1).player_hand: ◯◉◉◯ +InformationStateTensor(1).win_sequence: ◉◯ + ◯◉ + ◯◯ + ◯◯ +InformationStateTensor(1).point_card_sequence: ◯◯◯◉ + ◯◯◉◯ + ◯◉◯◯ + ◯◯◯◯ +InformationStateTensor(1).player_action_sequence: ◯◯◯◉ + ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationString(0) = "Current point card: 2\nRemaining Point Cards: 1\nPoints: 3 4 \nP0 hand: 1 4 \nWin sequence: 1 0 \n" +ObservationString(1) = "Current point card: 2\nRemaining Point Cards: 1\nPoints: 3 4 \nP1 hand: 2 3 \nWin sequence: 1 0 \n" +PublicObservationString() = "Current point card: 2\nRemaining Point Cards: 1\nWin sequence: 1 0 \nPoints: 3 4 \n" +PrivateObservationString(0) = "Current point card: 2\nRemaining Point Cards: 1\nPoints: 3 4 \nP0 hand: 1 4 \nWin sequence: 1 0 \n" +PrivateObservationString(1) = "Current point card: 2\nRemaining Point Cards: 1\nPoints: 3 4 \nP1 hand: 2 3 \nWin sequence: 1 0 \n" +ObservationTensor(0).current_point_card: ◯◉◯◯ +ObservationTensor(0).remaining_point_cards: ◉◯◯◯ +ObservationTensor(0).point_totals: ◯◯◯◉◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(0).player_hand: ◉◯◯◉ +ObservationTensor(0).win_sequence: ◯◉ + ◉◯ + ◯◯ + ◯◯ +ObservationTensor(1).current_point_card: ◯◉◯◯ +ObservationTensor(1).remaining_point_cards: ◉◯◯◯ +ObservationTensor(1).point_totals: ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(1).player_hand: ◯◉◉◯ +ObservationTensor(1).win_sequence: ◉◯ + ◯◉ + ◯◯ + ◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 3] +LegalActions(1) = [1, 2] +StringLegalActions(0) = ["[P0]Bid: 1", "[P0]Bid: 4"] +StringLegalActions(1) = ["[P1]Bid: 2", "[P1]Bid: 3"] + +# Apply joint action ["[P0]Bid: 4", "[P1]Bid: 2"] +actions: [3, 1] + +# State 3 +# P0 hand: +# P1 hand: +# P0 actions: 2 1 3 0 +# P1 actions: 3 0 1 2 +# Point card sequence: 4 3 2 1 +# Points: 5 5 +IsTerminal() = True +History() = [2, 3, 1, 0, 3, 1] +HistoryString() = "2, 3, 1, 0, 3, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "P0 hand: \nP0 action sequence: 2 1 3 0 \nPoint card sequence: 4 3 2 1 \nWin sequence: 1 0 0 1 \nPoints: 5 5 \nTerminal?: 1\n" +InformationStateString(1) = "P1 hand: \nP1 action sequence: 3 0 1 2 \nPoint card sequence: 4 3 2 1 \nWin sequence: 1 0 0 1 \nPoints: 5 5 \nTerminal?: 1\n" +InformationStateTensor(0).point_totals: ◯◯◯◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◉◯◯◯◯◯ +InformationStateTensor(0).player_hand: ◯◯◯◯ +InformationStateTensor(0).win_sequence: ◯◉ + ◉◯ + ◉◯ + ◯◉ +InformationStateTensor(0).point_card_sequence: ◯◯◯◉ + ◯◯◉◯ + ◯◉◯◯ + ◉◯◯◯ +InformationStateTensor(0).player_action_sequence: ◯◯◉◯ + ◯◉◯◯ + ◯◯◯◉ + ◉◯◯◯ +InformationStateTensor(1).point_totals: ◯◯◯◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◉◯◯◯◯◯ +InformationStateTensor(1).player_hand: ◯◯◯◯ +InformationStateTensor(1).win_sequence: ◉◯ + ◯◉ + ◯◉ + ◉◯ +InformationStateTensor(1).point_card_sequence: ◯◯◯◉ + ◯◯◉◯ + ◯◉◯◯ + ◉◯◯◯ +InformationStateTensor(1).player_action_sequence: ◯◯◯◉ + ◉◯◯◯ + ◯◉◯◯ + ◯◯◉◯ +ObservationString(0) = "Current point card: 1\nRemaining Point Cards: \nPoints: 5 5 \nP0 hand: \nWin sequence: 1 0 0 1 \n" +ObservationString(1) = "Current point card: 1\nRemaining Point Cards: \nPoints: 5 5 \nP1 hand: \nWin sequence: 1 0 0 1 \n" +PublicObservationString() = "Current point card: 1\nRemaining Point Cards: \nWin sequence: 1 0 0 1 \nPoints: 5 5 \n" +PrivateObservationString(0) = "Current point card: 1\nRemaining Point Cards: \nPoints: 5 5 \nP0 hand: \nWin sequence: 1 0 0 1 \n" +PrivateObservationString(1) = "Current point card: 1\nRemaining Point Cards: \nPoints: 5 5 \nP1 hand: \nWin sequence: 1 0 0 1 \n" +ObservationTensor(0).current_point_card: ◉◯◯◯ +ObservationTensor(0).remaining_point_cards: ◯◯◯◯ +ObservationTensor(0).point_totals: ◯◯◯◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(0).player_hand: ◯◯◯◯ +ObservationTensor(0).win_sequence: ◯◉ + ◉◯ + ◉◯ + ◯◉ +ObservationTensor(1).current_point_card: ◉◯◯◯ +ObservationTensor(1).remaining_point_cards: ◯◯◯◯ +ObservationTensor(1).point_totals: ◯◯◯◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1).player_hand: ◯◯◯◯ +ObservationTensor(1).win_sequence: ◉◯ + ◯◉ + ◯◉ + ◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/goofspiel_random_points_order.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/goofspiel_random_points_order.txt new file mode 100644 index 0000000..970fe8d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/goofspiel_random_points_order.txt @@ -0,0 +1,493 @@ +game: goofspiel(imp_info=True,num_cards=4) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Goofspiel" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["egocentric", "imp_info", "num_cards", "num_turns", "players", "points_order", "returns_type"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = True +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "goofspiel" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 4 +PolicyTensorShape() = [4] +MaxChanceOutcomes() = 4 +GetParameters() = {egocentric=False,imp_info=True,num_cards=4,num_turns=-1,players=2,points_order=random,returns_type=win_loss} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = point_totals: [2, 11], player_hand: [4], win_sequence: [4, 2], point_card_sequence: [4, 4], player_action_sequence: [4, 4] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 66 +ObservationTensorShape() = current_point_card: [4], remaining_point_cards: [4], point_totals: [2, 11], player_hand: [4], win_sequence: [4, 2] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 42 +MaxGameLength() = 4 +ToString() = "goofspiel(imp_info=True,num_cards=4)" + +# State 0 +# P0 hand: 1 2 3 4 +# P1 hand: 1 2 3 4 +# P0 actions: +# P1 actions: +# Point card sequence: +# Points: 0 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "P0 hand: 1 2 3 4 \nP0 action sequence: \nPoint card sequence: \nWin sequence: \nPoints: 0 0 \nTerminal?: 0\n" +InformationStateString(1) = "P1 hand: 1 2 3 4 \nP1 action sequence: \nPoint card sequence: \nWin sequence: \nPoints: 0 0 \nTerminal?: 0\n" +InformationStateTensor(0).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).player_hand: ◉◉◉◉ +InformationStateTensor(0).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).point_card_sequence: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(0).player_action_sequence: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).player_hand: ◉◉◉◉ +InformationStateTensor(1).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).point_card_sequence: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).player_action_sequence: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationString(0) = "Current point card: 0\nRemaining Point Cards: 1234\nPoints: 0 0 \nP0 hand: 1 2 3 4 \nWin sequence: \n" +ObservationString(1) = "Current point card: 0\nRemaining Point Cards: 1234\nPoints: 0 0 \nP1 hand: 1 2 3 4 \nWin sequence: \n" +PublicObservationString() = "Current point card: 0\nRemaining Point Cards: 1234\nWin sequence: \nPoints: 0 0 \n" +PrivateObservationString(0) = "Current point card: 0\nRemaining Point Cards: 1234\nPoints: 0 0 \nP0 hand: 1 2 3 4 \nWin sequence: \n" +PrivateObservationString(1) = "Current point card: 0\nRemaining Point Cards: 1234\nPoints: 0 0 \nP1 hand: 1 2 3 4 \nWin sequence: \n" +ObservationTensor(0).current_point_card: ◯◯◯◯ +ObservationTensor(0).remaining_point_cards: ◉◉◉◉ +ObservationTensor(0).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).player_hand: ◉◉◉◉ +ObservationTensor(0).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationTensor(1).current_point_card: ◯◯◯◯ +ObservationTensor(1).remaining_point_cards: ◉◉◉◉ +ObservationTensor(1).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player_hand: ◉◉◉◉ +ObservationTensor(1).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +ChanceOutcomes() = [(0,0.25), (1,0.25), (2,0.25), (3,0.25)] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Deal 1", "Deal 2", "Deal 3", "Deal 4"] + +# Apply action "Deal 4" +action: 3 + +# State 1 +# P0 hand: 1 2 3 4 +# P1 hand: 1 2 3 4 +# P0 actions: +# P1 actions: +# Point card sequence: 4 +# Points: 0 0 +IsTerminal() = False +History() = [3] +HistoryString() = "3" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "P0 hand: 1 2 3 4 \nP0 action sequence: \nPoint card sequence: 4 \nWin sequence: \nPoints: 0 0 \nTerminal?: 0\n" +InformationStateString(1) = "P1 hand: 1 2 3 4 \nP1 action sequence: \nPoint card sequence: 4 \nWin sequence: \nPoints: 0 0 \nTerminal?: 0\n" +InformationStateTensor(0).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).player_hand: ◉◉◉◉ +InformationStateTensor(0).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).point_card_sequence: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(0).player_action_sequence: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).player_hand: ◉◉◉◉ +InformationStateTensor(1).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).point_card_sequence: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).player_action_sequence: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationString(0) = "Current point card: 4\nRemaining Point Cards: 123\nPoints: 0 0 \nP0 hand: 1 2 3 4 \nWin sequence: \n" +ObservationString(1) = "Current point card: 4\nRemaining Point Cards: 123\nPoints: 0 0 \nP1 hand: 1 2 3 4 \nWin sequence: \n" +PublicObservationString() = "Current point card: 4\nRemaining Point Cards: 123\nWin sequence: \nPoints: 0 0 \n" +PrivateObservationString(0) = "Current point card: 4\nRemaining Point Cards: 123\nPoints: 0 0 \nP0 hand: 1 2 3 4 \nWin sequence: \n" +PrivateObservationString(1) = "Current point card: 4\nRemaining Point Cards: 123\nPoints: 0 0 \nP1 hand: 1 2 3 4 \nWin sequence: \n" +ObservationTensor(0).current_point_card: ◯◯◯◉ +ObservationTensor(0).remaining_point_cards: ◉◉◉◯ +ObservationTensor(0).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).player_hand: ◉◉◉◉ +ObservationTensor(0).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationTensor(1).current_point_card: ◯◯◯◉ +ObservationTensor(1).remaining_point_cards: ◉◉◉◯ +ObservationTensor(1).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player_hand: ◉◉◉◉ +ObservationTensor(1).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3] +LegalActions(1) = [0, 1, 2, 3] +StringLegalActions(0) = ["[P0]Bid: 1", "[P0]Bid: 2", "[P0]Bid: 3", "[P0]Bid: 4"] +StringLegalActions(1) = ["[P1]Bid: 1", "[P1]Bid: 2", "[P1]Bid: 3", "[P1]Bid: 4"] + +# Apply joint action ["[P0]Bid: 1", "[P1]Bid: 1"] +actions: [0, 0] + +# State 2 +# P0 hand: 2 3 4 +# P1 hand: 2 3 4 +# P0 actions: 0 +# P1 actions: 0 +# Point card sequence: 4 +# Points: 0 0 +IsTerminal() = False +History() = [3, 0, 0] +HistoryString() = "3, 0, 0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "P0 hand: 2 3 4 \nP0 action sequence: 0 \nPoint card sequence: 4 \nWin sequence: -3 \nPoints: 0 0 \nTerminal?: 0\n" +InformationStateString(1) = "P1 hand: 2 3 4 \nP1 action sequence: 0 \nPoint card sequence: 4 \nWin sequence: -3 \nPoints: 0 0 \nTerminal?: 0\n" +InformationStateTensor(0).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).player_hand: ◯◉◉◉ +InformationStateTensor(0).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).point_card_sequence: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(0).player_action_sequence: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).player_hand: ◯◉◉◉ +InformationStateTensor(1).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).point_card_sequence: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).player_action_sequence: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationString(0) = "Current point card: 0\nRemaining Point Cards: 123\nPoints: 0 0 \nP0 hand: 2 3 4 \nWin sequence: -3 \n" +ObservationString(1) = "Current point card: 0\nRemaining Point Cards: 123\nPoints: 0 0 \nP1 hand: 2 3 4 \nWin sequence: -3 \n" +PublicObservationString() = "Current point card: 0\nRemaining Point Cards: 123\nWin sequence: -3 \nPoints: 0 0 \n" +PrivateObservationString(0) = "Current point card: 0\nRemaining Point Cards: 123\nPoints: 0 0 \nP0 hand: 2 3 4 \nWin sequence: -3 \n" +PrivateObservationString(1) = "Current point card: 0\nRemaining Point Cards: 123\nPoints: 0 0 \nP1 hand: 2 3 4 \nWin sequence: -3 \n" +ObservationTensor(0).current_point_card: ◯◯◯◉ +ObservationTensor(0).remaining_point_cards: ◉◉◉◯ +ObservationTensor(0).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).player_hand: ◯◉◉◉ +ObservationTensor(0).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationTensor(1).current_point_card: ◯◯◯◉ +ObservationTensor(1).remaining_point_cards: ◉◉◉◯ +ObservationTensor(1).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player_hand: ◯◉◉◉ +ObservationTensor(1).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +ChanceOutcomes() = [(0,0.333333), (1,0.333333), (2,0.333333)] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["Deal 1", "Deal 2", "Deal 3"] + +# Apply action "Deal 2" +action: 1 + +# State 3 +# P0 hand: 2 3 4 +# P1 hand: 2 3 4 +# P0 actions: 0 +# P1 actions: 0 +# Point card sequence: 4 2 +# Points: 0 0 +IsTerminal() = False +History() = [3, 0, 0, 1] +HistoryString() = "3, 0, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "P0 hand: 2 3 4 \nP0 action sequence: 0 \nPoint card sequence: 4 2 \nWin sequence: -3 \nPoints: 0 0 \nTerminal?: 0\n" +InformationStateString(1) = "P1 hand: 2 3 4 \nP1 action sequence: 0 \nPoint card sequence: 4 2 \nWin sequence: -3 \nPoints: 0 0 \nTerminal?: 0\n" +InformationStateTensor(0).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).player_hand: ◯◉◉◉ +InformationStateTensor(0).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).point_card_sequence: ◯◯◯◉ + ◯◉◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(0).player_action_sequence: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).player_hand: ◯◉◉◉ +InformationStateTensor(1).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).point_card_sequence: ◯◯◯◉ + ◯◉◯◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).player_action_sequence: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationString(0) = "Current point card: 2\nRemaining Point Cards: 13\nPoints: 0 0 \nP0 hand: 2 3 4 \nWin sequence: -3 \n" +ObservationString(1) = "Current point card: 2\nRemaining Point Cards: 13\nPoints: 0 0 \nP1 hand: 2 3 4 \nWin sequence: -3 \n" +PublicObservationString() = "Current point card: 2\nRemaining Point Cards: 13\nWin sequence: -3 \nPoints: 0 0 \n" +PrivateObservationString(0) = "Current point card: 2\nRemaining Point Cards: 13\nPoints: 0 0 \nP0 hand: 2 3 4 \nWin sequence: -3 \n" +PrivateObservationString(1) = "Current point card: 2\nRemaining Point Cards: 13\nPoints: 0 0 \nP1 hand: 2 3 4 \nWin sequence: -3 \n" +ObservationTensor(0).current_point_card: ◯◉◯◯ +ObservationTensor(0).remaining_point_cards: ◉◯◉◯ +ObservationTensor(0).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).player_hand: ◯◉◉◉ +ObservationTensor(0).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationTensor(1).current_point_card: ◯◉◯◯ +ObservationTensor(1).remaining_point_cards: ◉◯◉◯ +ObservationTensor(1).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).player_hand: ◯◉◉◉ +ObservationTensor(1).win_sequence: ◯◯ + ◯◯ + ◯◯ + ◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [1, 2, 3] +LegalActions(1) = [1, 2, 3] +StringLegalActions(0) = ["[P0]Bid: 2", "[P0]Bid: 3", "[P0]Bid: 4"] +StringLegalActions(1) = ["[P1]Bid: 2", "[P1]Bid: 3", "[P1]Bid: 4"] + +# Apply joint action ["[P0]Bid: 3", "[P1]Bid: 2"] +actions: [2, 1] + +# State 4 +# Apply action "Deal 1" +action: 0 + +# State 5 +# P0 hand: 2 4 +# P1 hand: 3 4 +# P0 actions: 0 2 +# P1 actions: 0 1 +# Point card sequence: 4 2 1 +# Points: 2 0 +IsTerminal() = False +History() = [3, 0, 0, 1, 2, 1, 0] +HistoryString() = "3, 0, 0, 1, 2, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "P0 hand: 2 4 \nP0 action sequence: 0 2 \nPoint card sequence: 4 2 1 \nWin sequence: -3 0 \nPoints: 2 0 \nTerminal?: 0\n" +InformationStateString(1) = "P1 hand: 3 4 \nP1 action sequence: 0 1 \nPoint card sequence: 4 2 1 \nWin sequence: -3 0 \nPoints: 2 0 \nTerminal?: 0\n" +InformationStateTensor(0).point_totals: ◯◯◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).player_hand: ◯◉◯◉ +InformationStateTensor(0).win_sequence: ◯◯ + ◉◯ + ◯◯ + ◯◯ +InformationStateTensor(0).point_card_sequence: ◯◯◯◉ + ◯◉◯◯ + ◉◯◯◯ + ◯◯◯◯ +InformationStateTensor(0).player_action_sequence: ◉◯◯◯ + ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯ +InformationStateTensor(1).player_hand: ◯◯◉◉ +InformationStateTensor(1).win_sequence: ◯◯ + ◉◯ + ◯◯ + ◯◯ +InformationStateTensor(1).point_card_sequence: ◯◯◯◉ + ◯◉◯◯ + ◉◯◯◯ + ◯◯◯◯ +InformationStateTensor(1).player_action_sequence: ◉◯◯◯ + ◯◉◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationString(0) = "Current point card: 1\nRemaining Point Cards: 3\nPoints: 2 0 \nP0 hand: 2 4 \nWin sequence: -3 0 \n" +ObservationString(1) = "Current point card: 1\nRemaining Point Cards: 3\nPoints: 2 0 \nP1 hand: 3 4 \nWin sequence: -3 0 \n" +PublicObservationString() = "Current point card: 1\nRemaining Point Cards: 3\nWin sequence: -3 0 \nPoints: 2 0 \n" +PrivateObservationString(0) = "Current point card: 1\nRemaining Point Cards: 3\nPoints: 2 0 \nP0 hand: 2 4 \nWin sequence: -3 0 \n" +PrivateObservationString(1) = "Current point card: 1\nRemaining Point Cards: 3\nPoints: 2 0 \nP1 hand: 3 4 \nWin sequence: -3 0 \n" +ObservationTensor(0).current_point_card: ◉◯◯◯ +ObservationTensor(0).remaining_point_cards: ◯◯◉◯ +ObservationTensor(0).point_totals: ◯◯◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).player_hand: ◯◉◯◉ +ObservationTensor(0).win_sequence: ◯◯ + ◉◯ + ◯◯ + ◯◯ +ObservationTensor(1).current_point_card: ◉◯◯◯ +ObservationTensor(1).remaining_point_cards: ◯◯◉◯ +ObservationTensor(1).point_totals: ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(1).player_hand: ◯◯◉◉ +ObservationTensor(1).win_sequence: ◯◯ + ◉◯ + ◯◯ + ◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [1, 3] +LegalActions(1) = [2, 3] +StringLegalActions(0) = ["[P0]Bid: 2", "[P0]Bid: 4"] +StringLegalActions(1) = ["[P1]Bid: 3", "[P1]Bid: 4"] + +# Apply joint action ["[P0]Bid: 4", "[P1]Bid: 3"] +actions: [3, 2] + +# State 6 +# P0 hand: +# P1 hand: +# P0 actions: 0 2 3 1 +# P1 actions: 0 1 2 3 +# Point card sequence: 4 2 1 3 +# Points: 3 3 +IsTerminal() = True +History() = [3, 0, 0, 1, 2, 1, 0, 3, 2] +HistoryString() = "3, 0, 0, 1, 2, 1, 0, 3, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "P0 hand: \nP0 action sequence: 0 2 3 1 \nPoint card sequence: 4 2 1 3 \nWin sequence: -3 0 0 1 \nPoints: 3 3 \nTerminal?: 1\n" +InformationStateString(1) = "P1 hand: \nP1 action sequence: 0 1 2 3 \nPoint card sequence: 4 2 1 3 \nWin sequence: -3 0 0 1 \nPoints: 3 3 \nTerminal?: 1\n" +InformationStateTensor(0).point_totals: ◯◯◯◉◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯◯◯◯ +InformationStateTensor(0).player_hand: ◯◯◯◯ +InformationStateTensor(0).win_sequence: ◯◯ + ◉◯ + ◉◯ + ◯◉ +InformationStateTensor(0).point_card_sequence: ◯◯◯◉ + ◯◉◯◯ + ◉◯◯◯ + ◯◯◉◯ +InformationStateTensor(0).player_action_sequence: ◉◯◯◯ + ◯◯◉◯ + ◯◯◯◉ + ◯◉◯◯ +InformationStateTensor(1).point_totals: ◯◯◯◉◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯◯◯◯ +InformationStateTensor(1).player_hand: ◯◯◯◯ +InformationStateTensor(1).win_sequence: ◯◯ + ◉◯ + ◉◯ + ◯◉ +InformationStateTensor(1).point_card_sequence: ◯◯◯◉ + ◯◉◯◯ + ◉◯◯◯ + ◯◯◉◯ +InformationStateTensor(1).player_action_sequence: ◉◯◯◯ + ◯◉◯◯ + ◯◯◉◯ + ◯◯◯◉ +ObservationString(0) = "Current point card: 3\nRemaining Point Cards: \nPoints: 3 3 \nP0 hand: \nWin sequence: -3 0 0 1 \n" +ObservationString(1) = "Current point card: 3\nRemaining Point Cards: \nPoints: 3 3 \nP1 hand: \nWin sequence: -3 0 0 1 \n" +PublicObservationString() = "Current point card: 3\nRemaining Point Cards: \nWin sequence: -3 0 0 1 \nPoints: 3 3 \n" +PrivateObservationString(0) = "Current point card: 3\nRemaining Point Cards: \nPoints: 3 3 \nP0 hand: \nWin sequence: -3 0 0 1 \n" +PrivateObservationString(1) = "Current point card: 3\nRemaining Point Cards: \nPoints: 3 3 \nP1 hand: \nWin sequence: -3 0 0 1 \n" +ObservationTensor(0).current_point_card: ◯◯◉◯ +ObservationTensor(0).remaining_point_cards: ◯◯◯◯ +ObservationTensor(0).point_totals: ◯◯◯◉◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(0).player_hand: ◯◯◯◯ +ObservationTensor(0).win_sequence: ◯◯ + ◉◯ + ◉◯ + ◯◉ +ObservationTensor(1).current_point_card: ◯◯◉◯ +ObservationTensor(1).remaining_point_cards: ◯◯◯◯ +ObservationTensor(1).point_totals: ◯◯◯◉◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(1).player_hand: ◯◯◯◯ +ObservationTensor(1).win_sequence: ◯◯ + ◉◯ + ◉◯ + ◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/goofspiel_turn_based.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/goofspiel_turn_based.txt new file mode 100644 index 0000000..4c7a247 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/goofspiel_turn_based.txt @@ -0,0 +1,238 @@ +game: turn_based_simultaneous_game(game=goofspiel(imp_info=True,num_cards=4,points_order=descending)) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Turn-based Goofspiel" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["game"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = True +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "turn_based_simultaneous_game" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 4 +PolicyTensorShape() = [4] +MaxChanceOutcomes() = 0 +GetParameters() = {game=goofspiel(egocentric=False,imp_info=True,num_cards=4,num_turns=-1,players=2,points_order=descending,returns_type=win_loss)} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [70] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 70 +ObservationTensorShape() = [46] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 46 +MaxGameLength() = 8 +ToString() = "turn_based_simultaneous_game(game=goofspiel(egocentric=False,imp_info=True,num_cards=4,num_turns=-1,players=2,points_order=descending,returns_type=win_loss))" + +# State 0 +# Partial joint action: +# P0 hand: 1 2 3 4 +# P1 hand: 1 2 3 4 +# P0 actions: +# P1 actions: +# Point card sequence: 4 +# Points: 0 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Current player: 0\nP0 hand: 1 2 3 4 \nP0 action sequence: \nPoint card sequence: 4 \nWin sequence: \nPoints: 0 0 \nTerminal?: 0\n" +InformationStateString(1) = "Current player: 0\nP1 hand: 1 2 3 4 \nP1 action sequence: \nPoint card sequence: 4 \nWin sequence: \nPoints: 0 0 \nTerminal?: 0\n" +InformationStateTensor(0): ◉◯◉◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◉◯◯◉◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "Current player: 0\nCurrent point card: 4\nRemaining Point Cards: 123\nPoints: 0 0 \nP0 hand: 1 2 3 4 \nWin sequence: \n" +ObservationString(1) = "Current player: 0\nCurrent point card: 4\nRemaining Point Cards: 123\nPoints: 0 0 \nP1 hand: 1 2 3 4 \nWin sequence: \n" +ObservationTensor(0): ◉◯◉◯◯◯◯◉◉◉◉◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◉◉◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◉◉◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["[P0]Bid: 1", "[P0]Bid: 2", "[P0]Bid: 3", "[P0]Bid: 4"] + +# Apply action "[P0]Bid: 3" +action: 2 + +# State 1 +# Partial joint action: 2 +# P0 hand: 1 2 3 4 +# P1 hand: 1 2 3 4 +# P0 actions: +# P1 actions: +# Point card sequence: 4 +# Points: 0 0 +IsTerminal() = False +History() = [2] +HistoryString() = "2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Current player: 1\nObserver's action this turn: 2\nP0 hand: 1 2 3 4 \nP0 action sequence: \nPoint card sequence: 4 \nWin sequence: \nPoints: 0 0 \nTerminal?: 0\n" +InformationStateString(1) = "Current player: 1\nP1 hand: 1 2 3 4 \nP1 action sequence: \nPoint card sequence: 4 \nWin sequence: \nPoints: 0 0 \nTerminal?: 0\n" +InformationStateTensor(0): ◯◉◉◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◉◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "Current player: 1\nObserver's action this turn: 2\nCurrent point card: 4\nRemaining Point Cards: 123\nPoints: 0 0 \nP0 hand: 1 2 3 4 \nWin sequence: \n" +ObservationString(1) = "Current player: 1\nCurrent point card: 4\nRemaining Point Cards: 123\nPoints: 0 0 \nP1 hand: 1 2 3 4 \nWin sequence: \n" +ObservationTensor(0): ◯◉◉◯◯◯◯◉◉◉◉◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◉◉◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◉◯◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◉◉◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["[P1]Bid: 1", "[P1]Bid: 2", "[P1]Bid: 3", "[P1]Bid: 4"] + +# Apply action "[P1]Bid: 4" +action: 3 + +# State 2 +# Partial joint action: +# P0 hand: 1 2 4 +# P1 hand: 1 2 3 +# P0 actions: 2 +# P1 actions: 3 +# Point card sequence: 4 3 +# Points: 0 4 +IsTerminal() = False +History() = [2, 3] +HistoryString() = "2, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Current player: 0\nP0 hand: 1 2 4 \nP0 action sequence: 2 \nPoint card sequence: 4 3 \nWin sequence: 1 \nPoints: 0 4 \nTerminal?: 0\n" +InformationStateString(1) = "Current player: 0\nP1 hand: 1 2 3 \nP1 action sequence: 3 \nPoint card sequence: 4 3 \nWin sequence: 1 \nPoints: 0 4 \nTerminal?: 0\n" +InformationStateTensor(0): ◉◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◉◯◉◯◉◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◉◉◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "Current player: 0\nCurrent point card: 3\nRemaining Point Cards: 12\nPoints: 0 4 \nP0 hand: 1 2 4 \nWin sequence: 1 \n" +ObservationString(1) = "Current player: 0\nCurrent point card: 3\nRemaining Point Cards: 12\nPoints: 0 4 \nP1 hand: 1 2 3 \nWin sequence: 1 \n" +ObservationTensor(0): ◉◯◉◯◯◯◉◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◉◯◉◯◉◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◉◯◯◉◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◉◉◯◯◉◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 3] +StringLegalActions() = ["[P0]Bid: 1", "[P0]Bid: 2", "[P0]Bid: 4"] + +# Apply action "[P0]Bid: 2" +action: 1 + +# State 3 +# Partial joint action: 1 +# P0 hand: 1 2 4 +# P1 hand: 1 2 3 +# P0 actions: 2 +# P1 actions: 3 +# Point card sequence: 4 3 +# Points: 0 4 +IsTerminal() = False +History() = [2, 3, 1] +HistoryString() = "2, 3, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Current player: 1\nObserver's action this turn: 1\nP0 hand: 1 2 4 \nP0 action sequence: 2 \nPoint card sequence: 4 3 \nWin sequence: 1 \nPoints: 0 4 \nTerminal?: 0\n" +InformationStateString(1) = "Current player: 1\nP1 hand: 1 2 3 \nP1 action sequence: 3 \nPoint card sequence: 4 3 \nWin sequence: 1 \nPoints: 0 4 \nTerminal?: 0\n" +InformationStateTensor(0): ◯◉◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◉◯◉◯◉◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◉◉◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "Current player: 1\nObserver's action this turn: 1\nCurrent point card: 3\nRemaining Point Cards: 12\nPoints: 0 4 \nP0 hand: 1 2 4 \nWin sequence: 1 \n" +ObservationString(1) = "Current player: 1\nCurrent point card: 3\nRemaining Point Cards: 12\nPoints: 0 4 \nP1 hand: 1 2 3 \nWin sequence: 1 \n" +ObservationTensor(0): ◯◉◉◯◯◯◉◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◉◯◉◯◉◯◯◯◯◯◯ +ObservationTensor(1): ◯◉◯◉◯◯◉◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◉◉◯◯◉◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["[P1]Bid: 1", "[P1]Bid: 2", "[P1]Bid: 3"] + +# Apply action "[P1]Bid: 1" +action: 0 + +# State 4 +# Partial joint action: +# P0 hand: 1 4 +# P1 hand: 2 3 +# P0 actions: 2 1 +# P1 actions: 3 0 +# Point card sequence: 4 3 2 +# Points: 3 4 +IsTerminal() = False +History() = [2, 3, 1, 0] +HistoryString() = "2, 3, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Current player: 0\nP0 hand: 1 4 \nP0 action sequence: 2 1 \nPoint card sequence: 4 3 2 \nWin sequence: 1 0 \nPoints: 3 4 \nTerminal?: 0\n" +InformationStateString(1) = "Current player: 0\nP1 hand: 2 3 \nP1 action sequence: 3 0 \nPoint card sequence: 4 3 2 \nWin sequence: 1 0 \nPoints: 3 4 \nTerminal?: 0\n" +InformationStateTensor(0): ◉◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◉◯◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "Current player: 0\nCurrent point card: 2\nRemaining Point Cards: 1\nPoints: 3 4 \nP0 hand: 1 4 \nWin sequence: 1 0 \n" +ObservationString(1) = "Current player: 0\nCurrent point card: 2\nRemaining Point Cards: 1\nPoints: 3 4 \nP1 hand: 2 3 \nWin sequence: 1 0 \n" +ObservationTensor(0): ◉◯◉◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◉◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◉◯◉◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◉◯◯◉◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 3] +StringLegalActions() = ["[P0]Bid: 1", "[P0]Bid: 4"] + +# Apply action "[P0]Bid: 4" +action: 3 + +# State 5 +# Partial joint action: 3 +# P0 hand: 1 4 +# P1 hand: 2 3 +# P0 actions: 2 1 +# P1 actions: 3 0 +# Point card sequence: 4 3 2 +# Points: 3 4 +IsTerminal() = False +History() = [2, 3, 1, 0, 3] +HistoryString() = "2, 3, 1, 0, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Current player: 1\nObserver's action this turn: 3\nP0 hand: 1 4 \nP0 action sequence: 2 1 \nPoint card sequence: 4 3 2 \nWin sequence: 1 0 \nPoints: 3 4 \nTerminal?: 0\n" +InformationStateString(1) = "Current player: 1\nP1 hand: 2 3 \nP1 action sequence: 3 0 \nPoint card sequence: 4 3 2 \nWin sequence: 1 0 \nPoints: 3 4 \nTerminal?: 0\n" +InformationStateTensor(0): ◯◉◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◉◯◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "Current player: 1\nObserver's action this turn: 3\nCurrent point card: 2\nRemaining Point Cards: 1\nPoints: 3 4 \nP0 hand: 1 4 \nWin sequence: 1 0 \n" +ObservationString(1) = "Current player: 1\nCurrent point card: 2\nRemaining Point Cards: 1\nPoints: 3 4 \nP1 hand: 2 3 \nWin sequence: 1 0 \n" +ObservationTensor(0): ◯◉◉◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◉◉◯◯◯◯◯ +ObservationTensor(1): ◯◉◯◉◯◉◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◉◯◯◉◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2] +StringLegalActions() = ["[P1]Bid: 2", "[P1]Bid: 3"] + +# Apply action "[P1]Bid: 2" +action: 1 + +# State 6 +# P0 hand: +# P1 hand: +# P0 actions: 2 1 3 0 +# P1 actions: 3 0 1 2 +# Point card sequence: 4 3 2 1 +# Points: 5 5 +IsTerminal() = True +History() = [2, 3, 1, 0, 3, 1] +HistoryString() = "2, 3, 1, 0, 3, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Current player: -4\nP0 hand: \nP0 action sequence: 2 1 3 0 \nPoint card sequence: 4 3 2 1 \nWin sequence: 1 0 0 1 \nPoints: 5 5 \nTerminal?: 1\n" +InformationStateString(1) = "Current player: -4\nP1 hand: \nP1 action sequence: 3 0 1 2 \nPoint card sequence: 4 3 2 1 \nWin sequence: 1 0 0 1 \nPoints: 5 5 \nTerminal?: 1\n" +InformationStateTensor(0): ◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◉◯◉◯◯◉◯◯◯◉◯◯◉◯◯◉◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◯◯◉◉◯◯◯ +InformationStateTensor(1): ◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◉◯◉◯◯◉◯◯◯◉◯◯◉◯◯◉◯◯◉◯◯◯◯◯◯◉◉◯◯◯◯◉◯◯◯◯◉◯ +ObservationString(0) = "Current player: -4\nCurrent point card: 1\nRemaining Point Cards: \nPoints: 5 5 \nP0 hand: \nWin sequence: 1 0 0 1 \n" +ObservationString(1) = "Current player: -4\nCurrent point card: 1\nRemaining Point Cards: \nPoints: 5 5 \nP1 hand: \nWin sequence: 1 0 0 1 \n" +ObservationTensor(0): ◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◉◯◉◯◯◉ +ObservationTensor(1): ◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◉◯◉◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/hanabi.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/hanabi.txt new file mode 100644 index 0000000..0ad3c9d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/hanabi.txt @@ -0,0 +1,516 @@ +game: hanabi(colors=2,ranks=3,hand_size=3,players=3) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Hanabi" +GameType.max_num_players = 5 +GameType.min_num_players = 2 +GameType.parameter_specification = ["colors", "hand_size", "max_information_tokens", "max_life_tokens", "observation_type", "players", "random_start_player", "ranks", "seed"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "hanabi" +GameType.utility = Utility.IDENTICAL + +NumDistinctActions() = 16 +PolicyTensorShape() = [16] +MaxChanceOutcomes() = 6 +GetParameters() = {colors=2,hand_size=3,players=3,ranks=3} +NumPlayers() = 3 +MinUtility() = 0.0 +MaxUtility() = 6.0 +UtilitySum() = None +ObservationTensorShape() = [199] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 199 +MaxGameLength() = 41 +ToString() = "hanabi(colors=2,hand_size=3,players=3,ranks=3)" + +# State 0 +# Life tokens: 3 +# Info tokens: 8 +# Fireworks: R0 Y0 +# Hands: +# ----- +# ----- +# Deck size: 12 +# Discards: +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "Life tokens: 3\nInfo tokens: 8\nFireworks: R0 Y0 \nHands:\n-----\n-----\nDeck size: 12\nDiscards:" +ObservationString(1) = "Life tokens: 3\nInfo tokens: 8\nFireworks: R0 Y0 \nHands:\n-----\n-----\nDeck size: 12\nDiscards:" +ObservationString(2) = "Life tokens: 3\nInfo tokens: 8\nFireworks: R0 Y0 \nHands:\n-----\n-----\nDeck size: 12\nDiscards:" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.25), (1,0.166667), (2,0.0833333), (3,0.25), (4,0.166667), (5,0.0833333)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["(Deal R1)", "(Deal R2)", "(Deal R3)", "(Deal Y1)", "(Deal Y2)", "(Deal Y3)"] + +# Apply action "(Deal Y1)" +action: 3 + +# State 1 +# Life tokens: 3 +# Info tokens: 8 +# Fireworks: R0 Y0 +# Hands: +# Y1 || XX|RY123 +# ----- +# ----- +# Deck size: 11 +# Discards: +IsTerminal() = False +History() = [3] +HistoryString() = "3" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "Life tokens: 3\nInfo tokens: 8\nFireworks: R0 Y0 \nHands:\nXX || XX|RY123\n-----\n-----\nDeck size: 11\nDiscards:" +ObservationString(1) = "Life tokens: 3\nInfo tokens: 8\nFireworks: R0 Y0 \nHands:\n-----\n-----\nY1 || XX|RY123\nDeck size: 11\nDiscards:" +ObservationString(2) = "Life tokens: 3\nInfo tokens: 8\nFireworks: R0 Y0 \nHands:\n-----\nY1 || XX|RY123\n-----\nDeck size: 11\nDiscards:" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.272727), (1,0.181818), (2,0.0909091), (3,0.181818), (4,0.181818), (5,0.0909091)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["(Deal R1)", "(Deal R2)", "(Deal R3)", "(Deal Y1)", "(Deal Y2)", "(Deal Y3)"] + +# Apply action "(Deal R2)" +action: 1 + +# State 2 +# Apply action "(Deal Y2)" +action: 4 + +# State 3 +# Apply action "(Deal Y2)" +action: 4 + +# State 4 +# Apply action "(Deal R3)" +action: 2 + +# State 5 +# Apply action "(Deal Y1)" +action: 3 + +# State 6 +# Apply action "(Deal R1)" +action: 0 + +# State 7 +# Apply action "(Deal R2)" +action: 1 + +# State 8 +# Apply action "(Deal Y1)" +action: 3 + +# State 9 +# Life tokens: 3 +# Info tokens: 8 +# Fireworks: R0 Y0 +# Hands: +# Cur player +# Y1 || XX|RY123 +# R2 || XX|RY123 +# Y2 || XX|RY123 +# ----- +# Y2 || XX|RY123 +# R3 || XX|RY123 +# Y1 || XX|RY123 +# ----- +# R1 || XX|RY123 +# R2 || XX|RY123 +# Y1 || XX|RY123 +# Deck size: 3 +# Discards: +IsTerminal() = False +History() = [3, 1, 4, 4, 2, 3, 0, 1, 3] +HistoryString() = "3, 1, 4, 4, 2, 3, 0, 1, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Life tokens: 3\nInfo tokens: 8\nFireworks: R0 Y0 \nHands:\nCur player\nXX || XX|RY123\nXX || XX|RY123\nXX || XX|RY123\n-----\nY2 || XX|RY123\nR3 || XX|RY123\nY1 || XX|RY123\n-----\nR1 || XX|RY123\nR2 || XX|RY123\nY1 || XX|RY123\nDeck size: 3\nDiscards:" +ObservationString(1) = "Life tokens: 3\nInfo tokens: 8\nFireworks: R0 Y0 \nHands:\nXX || XX|RY123\nXX || XX|RY123\nXX || XX|RY123\n-----\nR1 || XX|RY123\nR2 || XX|RY123\nY1 || XX|RY123\n-----\nCur player\nY1 || XX|RY123\nR2 || XX|RY123\nY2 || XX|RY123\nDeck size: 3\nDiscards:" +ObservationString(2) = "Life tokens: 3\nInfo tokens: 8\nFireworks: R0 Y0 \nHands:\nXX || XX|RY123\nXX || XX|RY123\nXX || XX|RY123\n-----\nCur player\nY1 || XX|RY123\nR2 || XX|RY123\nY2 || XX|RY123\n-----\nY2 || XX|RY123\nR3 || XX|RY123\nY1 || XX|RY123\nDeck size: 3\nDiscards:" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◉◉◉◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◉◉◉◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◉◉◉◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] +StringLegalActions() = ["(Play 0)", "(Play 1)", "(Play 2)", "(Reveal player +1 color R)", "(Reveal player +1 color Y)", "(Reveal player +2 color R)", "(Reveal player +2 color Y)", "(Reveal player +1 rank 1)", "(Reveal player +1 rank 2)", "(Reveal player +1 rank 3)", "(Reveal player +2 rank 1)", "(Reveal player +2 rank 2)"] + +# Apply action "(Reveal player +2 rank 2)" +action: 14 + +# State 10 +# Life tokens: 3 +# Info tokens: 7 +# Fireworks: R0 Y0 +# Hands: +# Y1 || XX|RY123 +# R2 || XX|RY123 +# Y2 || XX|RY123 +# ----- +# Cur player +# Y2 || XX|RY123 +# R3 || XX|RY123 +# Y1 || XX|RY123 +# ----- +# R1 || XX|RY13 +# R2 || X2|RY2 +# Y1 || XX|RY13 +# Deck size: 3 +# Discards: +IsTerminal() = False +History() = [3, 1, 4, 4, 2, 3, 0, 1, 3, 14] +HistoryString() = "3, 1, 4, 4, 2, 3, 0, 1, 3, 14" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Life tokens: 3\nInfo tokens: 7\nFireworks: R0 Y0 \nHands:\nXX || XX|RY123\nXX || XX|RY123\nXX || XX|RY123\n-----\nCur player\nY2 || XX|RY123\nR3 || XX|RY123\nY1 || XX|RY123\n-----\nR1 || XX|RY13\nR2 || X2|RY2\nY1 || XX|RY13\nDeck size: 3\nDiscards:" +ObservationString(1) = "Life tokens: 3\nInfo tokens: 7\nFireworks: R0 Y0 \nHands:\nCur player\nXX || XX|RY123\nXX || XX|RY123\nXX || XX|RY123\n-----\nR1 || XX|RY13\nR2 || X2|RY2\nY1 || XX|RY13\n-----\nY1 || XX|RY123\nR2 || XX|RY123\nY2 || XX|RY123\nDeck size: 3\nDiscards:" +ObservationString(2) = "Life tokens: 3\nInfo tokens: 7\nFireworks: R0 Y0 \nHands:\nXX || XX|RY13\nXX || X2|RY2\nXX || XX|RY13\n-----\nY1 || XX|RY123\nR2 || XX|RY123\nY2 || XX|RY123\n-----\nCur player\nY2 || XX|RY123\nR3 || XX|RY123\nY1 || XX|RY123\nDeck size: 3\nDiscards:" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◉◉◉◯◯◯◯◯◯◉◉◉◉◉◉◉◯◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◉◯◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◉◉◉◯◯◯◯◯◯◉◉◉◉◉◉◉◯◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◉◯◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◉◉◉◯◯◯◯◯◯◉◉◉◉◉◉◉◯◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◉◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◉◯◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14] +StringLegalActions() = ["(Discard 0)", "(Discard 1)", "(Discard 2)", "(Play 0)", "(Play 1)", "(Play 2)", "(Reveal player +1 color R)", "(Reveal player +1 color Y)", "(Reveal player +2 color R)", "(Reveal player +2 color Y)", "(Reveal player +1 rank 1)", "(Reveal player +1 rank 2)", "(Reveal player +2 rank 1)", "(Reveal player +2 rank 2)"] + +# Apply action "(Reveal player +2 rank 1)" +action: 13 + +# State 11 +# Life tokens: 3 +# Info tokens: 6 +# Fireworks: R0 Y0 +# Hands: +# Y1 || X1|RY1 +# R2 || XX|RY23 +# Y2 || XX|RY23 +# ----- +# Y2 || XX|RY123 +# R3 || XX|RY123 +# Y1 || XX|RY123 +# ----- +# Cur player +# R1 || XX|RY13 +# R2 || X2|RY2 +# Y1 || XX|RY13 +# Deck size: 3 +# Discards: +IsTerminal() = False +History() = [3, 1, 4, 4, 2, 3, 0, 1, 3, 14, 13] +HistoryString() = "3, 1, 4, 4, 2, 3, 0, 1, 3, 14, 13" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "Life tokens: 3\nInfo tokens: 6\nFireworks: R0 Y0 \nHands:\nXX || X1|RY1\nXX || XX|RY23\nXX || XX|RY23\n-----\nY2 || XX|RY123\nR3 || XX|RY123\nY1 || XX|RY123\n-----\nCur player\nR1 || XX|RY13\nR2 || X2|RY2\nY1 || XX|RY13\nDeck size: 3\nDiscards:" +ObservationString(1) = "Life tokens: 3\nInfo tokens: 6\nFireworks: R0 Y0 \nHands:\nXX || XX|RY123\nXX || XX|RY123\nXX || XX|RY123\n-----\nCur player\nR1 || XX|RY13\nR2 || X2|RY2\nY1 || XX|RY13\n-----\nY1 || X1|RY1\nR2 || XX|RY23\nY2 || XX|RY23\nDeck size: 3\nDiscards:" +ObservationString(2) = "Life tokens: 3\nInfo tokens: 6\nFireworks: R0 Y0 \nHands:\nCur player\nXX || XX|RY13\nXX || X2|RY2\nXX || XX|RY13\n-----\nY1 || X1|RY1\nR2 || XX|RY23\nY2 || XX|RY23\n-----\nY2 || XX|RY123\nR3 || XX|RY123\nY1 || XX|RY123\nDeck size: 3\nDiscards:" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◉◉◉◯◯◯◯◯◯◉◉◉◉◉◉◯◯◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◉◉◯◉◉◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◉◯◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◉◉◉◯◯◯◯◯◯◉◉◉◉◉◉◯◯◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◉◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◉◯◉◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◉◉◯◉◉◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◉◉◉◯◯◯◯◯◯◉◉◉◉◉◉◯◯◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◉◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◉◯◉◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◉◉◯◉◉◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15] +StringLegalActions() = ["(Discard 0)", "(Discard 1)", "(Discard 2)", "(Play 0)", "(Play 1)", "(Play 2)", "(Reveal player +1 color R)", "(Reveal player +1 color Y)", "(Reveal player +2 color R)", "(Reveal player +2 color Y)", "(Reveal player +1 rank 1)", "(Reveal player +1 rank 2)", "(Reveal player +2 rank 1)", "(Reveal player +2 rank 2)", "(Reveal player +2 rank 3)"] + +# Apply action "(Reveal player +2 rank 3)" +action: 15 + +# State 12 +# Life tokens: 3 +# Info tokens: 5 +# Fireworks: R0 Y0 +# Hands: +# Cur player +# Y1 || X1|RY1 +# R2 || XX|RY23 +# Y2 || XX|RY23 +# ----- +# Y2 || XX|RY12 +# R3 || X3|RY3 +# Y1 || XX|RY12 +# ----- +# R1 || XX|RY13 +# R2 || X2|RY2 +# Y1 || XX|RY13 +# Deck size: 3 +# Discards: +IsTerminal() = False +History() = [3, 1, 4, 4, 2, 3, 0, 1, 3, 14, 13, 15] +HistoryString() = "3, 1, 4, 4, 2, 3, 0, 1, 3, 14, 13, 15" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Life tokens: 3\nInfo tokens: 5\nFireworks: R0 Y0 \nHands:\nCur player\nXX || X1|RY1\nXX || XX|RY23\nXX || XX|RY23\n-----\nY2 || XX|RY12\nR3 || X3|RY3\nY1 || XX|RY12\n-----\nR1 || XX|RY13\nR2 || X2|RY2\nY1 || XX|RY13\nDeck size: 3\nDiscards:" +ObservationString(1) = "Life tokens: 3\nInfo tokens: 5\nFireworks: R0 Y0 \nHands:\nXX || XX|RY12\nXX || X3|RY3\nXX || XX|RY12\n-----\nR1 || XX|RY13\nR2 || X2|RY2\nY1 || XX|RY13\n-----\nCur player\nY1 || X1|RY1\nR2 || XX|RY23\nY2 || XX|RY23\nDeck size: 3\nDiscards:" +ObservationString(2) = "Life tokens: 3\nInfo tokens: 5\nFireworks: R0 Y0 \nHands:\nXX || XX|RY13\nXX || X2|RY2\nXX || XX|RY13\n-----\nCur player\nY1 || X1|RY1\nR2 || XX|RY23\nY2 || XX|RY23\n-----\nY2 || XX|RY12\nR3 || X3|RY3\nY1 || XX|RY12\nDeck size: 3\nDiscards:" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◉◉◉◯◯◯◯◯◯◉◉◉◉◉◯◯◯◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◉◉◯◉◉◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◉◯◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◉◉◉◯◯◯◯◯◯◉◉◉◉◉◯◯◯◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◉◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◉◯◉◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◉◉◯◉◉◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◉◉◉◯◯◯◯◯◯◉◉◉◉◉◯◯◯◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◉◯◉◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◉◉◯◉◉◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] +StringLegalActions() = ["(Discard 0)", "(Discard 1)", "(Discard 2)", "(Play 0)", "(Play 1)", "(Play 2)", "(Reveal player +1 color R)", "(Reveal player +1 color Y)", "(Reveal player +2 color R)", "(Reveal player +2 color Y)", "(Reveal player +1 rank 1)", "(Reveal player +1 rank 2)", "(Reveal player +1 rank 3)", "(Reveal player +2 rank 1)", "(Reveal player +2 rank 2)"] + +# Apply action "(Discard 0)" +action: 0 + +# State 13 +# Apply action "(Deal Y3)" +action: 5 + +# State 14 +# Life tokens: 3 +# Info tokens: 6 +# Fireworks: R0 Y0 +# Hands: +# R2 || XX|RY23 +# Y2 || XX|RY23 +# Y3 || XX|RY123 +# ----- +# Cur player +# Y2 || XX|RY12 +# R3 || X3|RY3 +# Y1 || XX|RY12 +# ----- +# R1 || XX|RY13 +# R2 || X2|RY2 +# Y1 || XX|RY13 +# Deck size: 2 +# Discards: Y1 +IsTerminal() = False +History() = [3, 1, 4, 4, 2, 3, 0, 1, 3, 14, 13, 15, 0, 5] +HistoryString() = "3, 1, 4, 4, 2, 3, 0, 1, 3, 14, 13, 15, 0, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Life tokens: 3\nInfo tokens: 6\nFireworks: R0 Y0 \nHands:\nXX || XX|RY23\nXX || XX|RY23\nXX || XX|RY123\n-----\nCur player\nY2 || XX|RY12\nR3 || X3|RY3\nY1 || XX|RY12\n-----\nR1 || XX|RY13\nR2 || X2|RY2\nY1 || XX|RY13\nDeck size: 2\nDiscards: Y1" +ObservationString(1) = "Life tokens: 3\nInfo tokens: 6\nFireworks: R0 Y0 \nHands:\nCur player\nXX || XX|RY12\nXX || X3|RY3\nXX || XX|RY12\n-----\nR1 || XX|RY13\nR2 || X2|RY2\nY1 || XX|RY13\n-----\nR2 || XX|RY23\nY2 || XX|RY23\nY3 || XX|RY123\nDeck size: 2\nDiscards: Y1" +ObservationString(2) = "Life tokens: 3\nInfo tokens: 6\nFireworks: R0 Y0 \nHands:\nXX || XX|RY13\nXX || X2|RY2\nXX || XX|RY13\n-----\nR2 || XX|RY23\nY2 || XX|RY23\nY3 || XX|RY123\n-----\nCur player\nY2 || XX|RY12\nR3 || X3|RY3\nY1 || XX|RY12\nDeck size: 2\nDiscards: Y1" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◯◯◉◉◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◉◯◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◯◯◉◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◉◯◉◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◯◯◉◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◉◯◉◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15] +StringLegalActions() = ["(Discard 0)", "(Discard 1)", "(Discard 2)", "(Play 0)", "(Play 1)", "(Play 2)", "(Reveal player +1 color R)", "(Reveal player +1 color Y)", "(Reveal player +2 color R)", "(Reveal player +2 color Y)", "(Reveal player +1 rank 1)", "(Reveal player +1 rank 2)", "(Reveal player +2 rank 2)", "(Reveal player +2 rank 3)"] + +# Apply action "(Reveal player +2 color R)" +action: 8 + +# State 15 +# Life tokens: 3 +# Info tokens: 5 +# Fireworks: R0 Y0 +# Hands: +# R2 || RX|R23 +# Y2 || XX|Y23 +# Y3 || XX|Y123 +# ----- +# Y2 || XX|RY12 +# R3 || X3|RY3 +# Y1 || XX|RY12 +# ----- +# Cur player +# R1 || XX|RY13 +# R2 || X2|RY2 +# Y1 || XX|RY13 +# Deck size: 2 +# Discards: Y1 +IsTerminal() = False +History() = [3, 1, 4, 4, 2, 3, 0, 1, 3, 14, 13, 15, 0, 5, 8] +HistoryString() = "3, 1, 4, 4, 2, 3, 0, 1, 3, 14, 13, 15, 0, 5, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "Life tokens: 3\nInfo tokens: 5\nFireworks: R0 Y0 \nHands:\nXX || RX|R23\nXX || XX|Y23\nXX || XX|Y123\n-----\nY2 || XX|RY12\nR3 || X3|RY3\nY1 || XX|RY12\n-----\nCur player\nR1 || XX|RY13\nR2 || X2|RY2\nY1 || XX|RY13\nDeck size: 2\nDiscards: Y1" +ObservationString(1) = "Life tokens: 3\nInfo tokens: 5\nFireworks: R0 Y0 \nHands:\nXX || XX|RY12\nXX || X3|RY3\nXX || XX|RY12\n-----\nCur player\nR1 || XX|RY13\nR2 || X2|RY2\nY1 || XX|RY13\n-----\nR2 || RX|R23\nY2 || XX|Y23\nY3 || XX|Y123\nDeck size: 2\nDiscards: Y1" +ObservationString(2) = "Life tokens: 3\nInfo tokens: 5\nFireworks: R0 Y0 \nHands:\nCur player\nXX || XX|RY13\nXX || X2|RY2\nXX || XX|RY13\n-----\nR2 || RX|R23\nY2 || XX|Y23\nY3 || XX|Y123\n-----\nY2 || XX|RY12\nR3 || X3|RY3\nY1 || XX|RY12\nDeck size: 2\nDiscards: Y1" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◉◉◯◯◯◯◯◯◯◉◉◉◉◉◯◯◯◉◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◉◯◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◯◯◉◉◉◉◉◯◯◯◉◉◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◉◯◉◯◯◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◉◉◯◯◯◯◯◯◯◉◉◉◉◉◯◯◯◉◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◉◯◯◉◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◉◯◉◯◯◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15] +StringLegalActions() = ["(Discard 0)", "(Discard 1)", "(Discard 2)", "(Play 0)", "(Play 1)", "(Play 2)", "(Reveal player +1 color R)", "(Reveal player +1 color Y)", "(Reveal player +2 color R)", "(Reveal player +2 color Y)", "(Reveal player +1 rank 2)", "(Reveal player +1 rank 3)", "(Reveal player +2 rank 1)", "(Reveal player +2 rank 2)", "(Reveal player +2 rank 3)"] + +# Apply action "(Play 2)" +action: 5 + +# State 16 +# Apply action "(Deal R1)" +action: 0 + +# State 17 +# Life tokens: 3 +# Info tokens: 5 +# Fireworks: R0 Y1 +# Hands: +# Cur player +# R2 || RX|R23 +# Y2 || XX|Y23 +# Y3 || XX|Y123 +# ----- +# Y2 || XX|RY12 +# R3 || X3|RY3 +# Y1 || XX|RY12 +# ----- +# R1 || XX|RY13 +# R2 || X2|RY2 +# R1 || XX|RY123 +# Deck size: 1 +# Discards: Y1 +IsTerminal() = False +History() = [3, 1, 4, 4, 2, 3, 0, 1, 3, 14, 13, 15, 0, 5, 8, 5, 0] +HistoryString() = "3, 1, 4, 4, 2, 3, 0, 1, 3, 14, 13, 15, 0, 5, 8, 5, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Life tokens: 3\nInfo tokens: 5\nFireworks: R0 Y1 \nHands:\nCur player\nXX || RX|R23\nXX || XX|Y23\nXX || XX|Y123\n-----\nY2 || XX|RY12\nR3 || X3|RY3\nY1 || XX|RY12\n-----\nR1 || XX|RY13\nR2 || X2|RY2\nR1 || XX|RY123\nDeck size: 1\nDiscards: Y1" +ObservationString(1) = "Life tokens: 3\nInfo tokens: 5\nFireworks: R0 Y1 \nHands:\nXX || XX|RY12\nXX || X3|RY3\nXX || XX|RY12\n-----\nR1 || XX|RY13\nR2 || X2|RY2\nR1 || XX|RY123\n-----\nCur player\nR2 || RX|R23\nY2 || XX|Y23\nY3 || XX|Y123\nDeck size: 1\nDiscards: Y1" +ObservationString(2) = "Life tokens: 3\nInfo tokens: 5\nFireworks: R0 Y1 \nHands:\nXX || XX|RY13\nXX || X2|RY2\nXX || XX|RY123\n-----\nCur player\nR2 || RX|R23\nY2 || XX|Y23\nY3 || XX|Y123\n-----\nY2 || XX|RY12\nR3 || X3|RY3\nY1 || XX|RY12\nDeck size: 1\nDiscards: Y1" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◉◉◉◉◉◯◯◯◉◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◉◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◉◉◉◉◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◉◉◉◉◉◯◯◯◉◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◉◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◉◉◉◉◉◯◯◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◉◉◉◉◉◯◯◯◉◉◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◉◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◉◉◉◉◉◯◯◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯ +Rewards() = [1, 1, 1] +Returns() = [1, 1, 1] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14] +StringLegalActions() = ["(Discard 0)", "(Discard 1)", "(Discard 2)", "(Play 0)", "(Play 1)", "(Play 2)", "(Reveal player +1 color R)", "(Reveal player +1 color Y)", "(Reveal player +2 color R)", "(Reveal player +1 rank 1)", "(Reveal player +1 rank 2)", "(Reveal player +1 rank 3)", "(Reveal player +2 rank 1)", "(Reveal player +2 rank 2)"] + +# Apply action "(Play 1)" +action: 4 + +# State 18 +# Apply action "(Deal R1)" +action: 0 + +# State 19 +# Life tokens: 3 +# Info tokens: 5 +# Fireworks: R0 Y2 +# Hands: +# R2 || RX|R23 +# Y3 || XX|Y123 +# R1 || XX|RY123 +# ----- +# Cur player +# Y2 || XX|RY12 +# R3 || X3|RY3 +# Y1 || XX|RY12 +# ----- +# R1 || XX|RY13 +# R2 || X2|RY2 +# R1 || XX|RY123 +# Deck size: 0 +# Discards: Y1 +IsTerminal() = False +History() = [3, 1, 4, 4, 2, 3, 0, 1, 3, 14, 13, 15, 0, 5, 8, 5, 0, 4, 0] +HistoryString() = "3, 1, 4, 4, 2, 3, 0, 1, 3, 14, 13, 15, 0, 5, 8, 5, 0, 4, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Life tokens: 3\nInfo tokens: 5\nFireworks: R0 Y2 \nHands:\nXX || RX|R23\nXX || XX|Y123\nXX || XX|RY123\n-----\nCur player\nY2 || XX|RY12\nR3 || X3|RY3\nY1 || XX|RY12\n-----\nR1 || XX|RY13\nR2 || X2|RY2\nR1 || XX|RY123\nDeck size: 0\nDiscards: Y1" +ObservationString(1) = "Life tokens: 3\nInfo tokens: 5\nFireworks: R0 Y2 \nHands:\nCur player\nXX || XX|RY12\nXX || X3|RY3\nXX || XX|RY12\n-----\nR1 || XX|RY13\nR2 || X2|RY2\nR1 || XX|RY123\n-----\nR2 || RX|R23\nY3 || XX|Y123\nR1 || XX|RY123\nDeck size: 0\nDiscards: Y1" +ObservationString(2) = "Life tokens: 3\nInfo tokens: 5\nFireworks: R0 Y2 \nHands:\nXX || XX|RY13\nXX || X2|RY2\nXX || XX|RY123\n-----\nR2 || RX|R23\nY3 || XX|Y123\nR1 || XX|RY123\n-----\nCur player\nY2 || XX|RY12\nR3 || X3|RY3\nY1 || XX|RY12\nDeck size: 0\nDiscards: Y1" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◉◉◉◉◯◯◯◉◉◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◉◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◉◉◉◉◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◉◉◉◉◯◯◯◉◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◉◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◉◉◉◉◉◯◯◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◉◉◉◉◯◯◯◉◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◉◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◉◉◉◉◉◯◯◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯ +Rewards() = [1, 1, 1] +Returns() = [2, 2, 2] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 13, 14, 15] +StringLegalActions() = ["(Discard 0)", "(Discard 1)", "(Discard 2)", "(Play 0)", "(Play 1)", "(Play 2)", "(Reveal player +1 color R)", "(Reveal player +2 color R)", "(Reveal player +2 color Y)", "(Reveal player +1 rank 1)", "(Reveal player +1 rank 2)", "(Reveal player +2 rank 1)", "(Reveal player +2 rank 2)", "(Reveal player +2 rank 3)"] + +# Apply action "(Reveal player +2 color Y)" +action: 9 + +# State 20 +# Life tokens: 3 +# Info tokens: 4 +# Fireworks: R0 Y2 +# Hands: +# R2 || RX|R23 +# Y3 || YX|Y123 +# R1 || XX|R123 +# ----- +# Y2 || XX|RY12 +# R3 || X3|RY3 +# Y1 || XX|RY12 +# ----- +# Cur player +# R1 || XX|RY13 +# R2 || X2|RY2 +# R1 || XX|RY123 +# Deck size: 0 +# Discards: Y1 +IsTerminal() = False +History() = [3, 1, 4, 4, 2, 3, 0, 1, 3, 14, 13, 15, 0, 5, 8, 5, 0, 4, 0, 9] +HistoryString() = "3, 1, 4, 4, 2, 3, 0, 1, 3, 14, 13, 15, 0, 5, 8, 5, 0, 4, 0, 9" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "Life tokens: 3\nInfo tokens: 4\nFireworks: R0 Y2 \nHands:\nXX || RX|R23\nXX || YX|Y123\nXX || XX|R123\n-----\nY2 || XX|RY12\nR3 || X3|RY3\nY1 || XX|RY12\n-----\nCur player\nR1 || XX|RY13\nR2 || X2|RY2\nR1 || XX|RY123\nDeck size: 0\nDiscards: Y1" +ObservationString(1) = "Life tokens: 3\nInfo tokens: 4\nFireworks: R0 Y2 \nHands:\nXX || XX|RY12\nXX || X3|RY3\nXX || XX|RY12\n-----\nCur player\nR1 || XX|RY13\nR2 || X2|RY2\nR1 || XX|RY123\n-----\nR2 || RX|R23\nY3 || YX|Y123\nR1 || XX|R123\nDeck size: 0\nDiscards: Y1" +ObservationString(2) = "Life tokens: 3\nInfo tokens: 4\nFireworks: R0 Y2 \nHands:\nCur player\nXX || XX|RY13\nXX || X2|RY2\nXX || XX|RY123\n-----\nR2 || RX|R23\nY3 || YX|Y123\nR1 || XX|R123\n-----\nY2 || XX|RY12\nR3 || X3|RY3\nY1 || XX|RY12\nDeck size: 0\nDiscards: Y1" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◉◉◉◯◯◯◯◉◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◉◉◉◯◉◯◯◯◉◉◉◯◯◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◉◉◉◉◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◉◉◉◯◯◯◯◉◉◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◉◉◉◉◉◯◯◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◉◉◉◯◉◯◯◯◉◉◉◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◉◉◉◯◯◯◯◉◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◉◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◉◉◉◉◉◯◯◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◉◉◉◯◉◯◯◯◉◉◉◯◯◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [2, 2, 2] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] +StringLegalActions() = ["(Discard 0)", "(Discard 1)", "(Discard 2)", "(Play 0)", "(Play 1)", "(Play 2)", "(Reveal player +1 color R)", "(Reveal player +1 color Y)", "(Reveal player +2 color R)", "(Reveal player +2 color Y)", "(Reveal player +1 rank 1)", "(Reveal player +1 rank 2)", "(Reveal player +1 rank 3)", "(Reveal player +2 rank 1)", "(Reveal player +2 rank 2)", "(Reveal player +2 rank 3)"] + +# Apply action "(Reveal player +1 rank 2)" +action: 11 + +# State 21 +# Apply action "(Discard 2)" +action: 2 + +# State 22 +# Life tokens: 3 +# Info tokens: 4 +# Fireworks: R0 Y2 +# Hands: +# R2 || R2|R2 +# Y3 || YX|Y13 +# ----- +# Cur player +# Y2 || XX|RY12 +# R3 || X3|RY3 +# Y1 || XX|RY12 +# ----- +# R1 || XX|RY13 +# R2 || X2|RY2 +# R1 || XX|RY123 +# Deck size: 0 +# Discards: Y1 R1 +IsTerminal() = True +History() = [3, 1, 4, 4, 2, 3, 0, 1, 3, 14, 13, 15, 0, 5, 8, 5, 0, 4, 0, 9, 11, 2] +HistoryString() = "3, 1, 4, 4, 2, 3, 0, 1, 3, 14, 13, 15, 0, 5, 8, 5, 0, 4, 0, 9, 11, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "Life tokens: 3\nInfo tokens: 4\nFireworks: R0 Y2 \nHands:\nXX || R2|R2\nXX || YX|Y13\n-----\nCur player\nY2 || XX|RY12\nR3 || X3|RY3\nY1 || XX|RY12\n-----\nR1 || XX|RY13\nR2 || X2|RY2\nR1 || XX|RY123\nDeck size: 0\nDiscards: Y1 R1" +ObservationString(1) = "Life tokens: 3\nInfo tokens: 4\nFireworks: R0 Y2 \nHands:\nCur player\nXX || XX|RY12\nXX || X3|RY3\nXX || XX|RY12\n-----\nR1 || XX|RY13\nR2 || X2|RY2\nR1 || XX|RY123\n-----\nR2 || R2|R2\nY3 || YX|Y13\nDeck size: 0\nDiscards: Y1 R1" +ObservationString(2) = "Life tokens: 3\nInfo tokens: 4\nFireworks: R0 Y2 \nHands:\nXX || XX|RY13\nXX || X2|RY2\nXX || XX|RY123\n-----\nR2 || R2|R2\nY3 || YX|Y13\n-----\nCur player\nY2 || XX|RY12\nR3 || X3|RY3\nY1 || XX|RY12\nDeck size: 0\nDiscards: Y1 R1" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◉◉◉◉◯◯◯◯◉◉◉◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◉◉◉◉◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◉◉◉◉◯◯◯◯◉◉◉◉◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◉◉◉◉◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◉◉◉◉◯◯◯◯◉◉◉◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◉◯◉◉◯◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◉◉◉◉◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◉◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◉◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [2, 2, 2] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/havannah(board_size=4).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/havannah(board_size=4).txt new file mode 100644 index 0000000..cc5d0c4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/havannah(board_size=4).txt @@ -0,0 +1,513 @@ +game: havannah(board_size=4) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Havannah" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["ansi_color_output", "board_size", "swap"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "havannah" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 49 +PolicyTensorShape() = [49] +MaxChanceOutcomes() = 0 +GetParameters() = {ansi_color_output=False,board_size=4,swap=False} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [3, 7, 7] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 147 +MaxGameLength() = 37 +ToString() = "havannah(board_size=4)" + +# State 0 +# a b c d +# 1 . . . . e +# 2 . . . . . f +# 3 . . . . . . g +# 4 . . . . . . . +# 5 . . . . . . +# 6 . . . . . +# 7 . . . . +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = " a b c d\n 1 . . . . e\n 2 . . . . . f\n 3 . . . . . . g\n 4 . . . . . . .\n 5 . . . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationString(1) = " a b c d\n 1 . . . . e\n 2 . . . . . f\n 3 . . . . . . g\n 4 . . . . . . .\n 5 . . . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 7, 8, 9, 10, 11, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 45, 46, 47, 48] +StringLegalActions() = ["a1", "b1", "c1", "d1", "a2", "b2", "c2", "d2", "e2", "a3", "b3", "c3", "d3", "e3", "f3", "a4", "b4", "c4", "d4", "e4", "f4", "g4", "b5", "c5", "d5", "e5", "f5", "g5", "c6", "d6", "e6", "f6", "g6", "d7", "e7", "f7", "g7"] + +# Apply action "c4" +action: 23 + +# State 1 +# a b c d +# 1 . . . . e +# 2 . . . . . f +# 3 . . . . . . g +# 4 . .[O]. . . . +# 5 . . . . . . +# 6 . . . . . +# 7 . . . . +IsTerminal() = False +History() = [23] +HistoryString() = "23" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "23" +InformationStateString(1) = "23" +ObservationString(0) = " a b c d\n 1 . . . . e\n 2 . . . . . f\n 3 . . . . . . g\n 4 . .[O]. . . .\n 5 . . . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationString(1) = " a b c d\n 1 . . . . e\n 2 . . . . . f\n 3 . . . . . . g\n 4 . .[O]. . . .\n 5 . . . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 7, 8, 9, 10, 11, 14, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 45, 46, 47, 48] +StringLegalActions() = ["a1", "b1", "c1", "d1", "a2", "b2", "c2", "d2", "e2", "a3", "b3", "c3", "d3", "e3", "f3", "a4", "b4", "d4", "e4", "f4", "g4", "b5", "c5", "d5", "e5", "f5", "g5", "c6", "d6", "e6", "f6", "g6", "d7", "e7", "f7", "g7"] + +# Apply action "d2" +action: 10 + +# State 2 +# a b c d +# 1 . . . . e +# 2 . . .[@]. f +# 3 . . . . . . g +# 4 . . O . . . . +# 5 . . . . . . +# 6 . . . . . +# 7 . . . . +IsTerminal() = False +History() = [23, 10] +HistoryString() = "23, 10" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 10" +InformationStateString(1) = "23, 10" +ObservationString(0) = " a b c d\n 1 . . . . e\n 2 . . .[@]. f\n 3 . . . . . . g\n 4 . . O . . . .\n 5 . . . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationString(1) = " a b c d\n 1 . . . . e\n 2 . . .[@]. f\n 3 . . . . . . g\n 4 . . O . . . .\n 5 . . . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◉◉◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 7, 8, 9, 11, 14, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 45, 46, 47, 48] +StringLegalActions() = ["a1", "b1", "c1", "d1", "a2", "b2", "c2", "e2", "a3", "b3", "c3", "d3", "e3", "f3", "a4", "b4", "d4", "e4", "f4", "g4", "b5", "c5", "d5", "e5", "f5", "g5", "c6", "d6", "e6", "f6", "g6", "d7", "e7", "f7", "g7"] + +# Apply action "c5" +action: 30 + +# State 3 +# a b c d +# 1 . . . . e +# 2 . . . @ . f +# 3 . . . . . . g +# 4 . . O . . . . +# 5 .[O]. . . . +# 6 . . . . . +# 7 . . . . +IsTerminal() = False +History() = [23, 10, 30] +HistoryString() = "23, 10, 30" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "23, 10, 30" +InformationStateString(1) = "23, 10, 30" +ObservationString(0) = " a b c d\n 1 . . . . e\n 2 . . . @ . f\n 3 . . . . . . g\n 4 . . O . . . .\n 5 .[O]. . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationString(1) = " a b c d\n 1 . . . . e\n 2 . . . @ . f\n 3 . . . . . . g\n 4 . . O . . . .\n 5 .[O]. . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◉◉◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 7, 8, 9, 11, 14, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 29, 31, 32, 33, 34, 37, 38, 39, 40, 41, 45, 46, 47, 48] +StringLegalActions() = ["a1", "b1", "c1", "d1", "a2", "b2", "c2", "e2", "a3", "b3", "c3", "d3", "e3", "f3", "a4", "b4", "d4", "e4", "f4", "g4", "b5", "d5", "e5", "f5", "g5", "c6", "d6", "e6", "f6", "g6", "d7", "e7", "f7", "g7"] + +# Apply action "b4" +action: 22 + +# State 4 +# a b c d +# 1 . . . . e +# 2 . . . @ . f +# 3 . . . . . . g +# 4 .[@]O . . . . +# 5 . O . . . . +# 6 . . . . . +# 7 . . . . +IsTerminal() = False +History() = [23, 10, 30, 22] +HistoryString() = "23, 10, 30, 22" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 10, 30, 22" +InformationStateString(1) = "23, 10, 30, 22" +ObservationString(0) = " a b c d\n 1 . . . . e\n 2 . . . @ . f\n 3 . . . . . . g\n 4 .[@]O . . . .\n 5 . O . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationString(1) = " a b c d\n 1 . . . . e\n 2 . . . @ . f\n 3 . . . . . . g\n 4 .[@]O . . . .\n 5 . O . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◉◉◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◉◯◯◯◯ ◯◉◯◯◯◯◯ ◉◯◯◉◉◉◉ +◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◉◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◯◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 7, 8, 9, 11, 14, 15, 16, 17, 18, 19, 21, 24, 25, 26, 27, 29, 31, 32, 33, 34, 37, 38, 39, 40, 41, 45, 46, 47, 48] +StringLegalActions() = ["a1", "b1", "c1", "d1", "a2", "b2", "c2", "e2", "a3", "b3", "c3", "d3", "e3", "f3", "a4", "d4", "e4", "f4", "g4", "b5", "d5", "e5", "f5", "g5", "c6", "d6", "e6", "f6", "g6", "d7", "e7", "f7", "g7"] + +# Apply action "f4" +action: 26 + +# State 5 +# a b c d +# 1 . . . . e +# 2 . . . @ . f +# 3 . . . . . . g +# 4 . @ O . .[O]. +# 5 . O . . . . +# 6 . . . . . +# 7 . . . . +IsTerminal() = False +History() = [23, 10, 30, 22, 26] +HistoryString() = "23, 10, 30, 22, 26" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "23, 10, 30, 22, 26" +InformationStateString(1) = "23, 10, 30, 22, 26" +ObservationString(0) = " a b c d\n 1 . . . . e\n 2 . . . @ . f\n 3 . . . . . . g\n 4 . @ O . .[O].\n 5 . O . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationString(1) = " a b c d\n 1 . . . . e\n 2 . . . @ . f\n 3 . . . . . . g\n 4 . @ O . .[O].\n 5 . O . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◉◉◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◉◯◯◉◯ ◯◉◯◯◯◯◯ ◉◯◯◉◉◯◉ +◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◉◯◯◯◯◯ ◯◯◉◯◯◉◯ ◉◯◯◉◉◯◉ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 7, 8, 9, 11, 14, 15, 16, 17, 18, 19, 21, 24, 25, 27, 29, 31, 32, 33, 34, 37, 38, 39, 40, 41, 45, 46, 47, 48] +StringLegalActions() = ["a1", "b1", "c1", "d1", "a2", "b2", "c2", "e2", "a3", "b3", "c3", "d3", "e3", "f3", "a4", "d4", "e4", "g4", "b5", "d5", "e5", "f5", "g5", "c6", "d6", "e6", "f6", "g6", "d7", "e7", "f7", "g7"] + +# Apply action "b3" +action: 15 + +# State 6 +# Apply action "e2" +action: 11 + +# State 7 +# Apply action "f5" +action: 33 + +# State 8 +# Apply action "g4" +action: 27 + +# State 9 +# Apply action "e5" +action: 32 + +# State 10 +# Apply action "b2" +action: 8 + +# State 11 +# Apply action "g5" +action: 34 + +# State 12 +# Apply action "a4" +action: 21 + +# State 13 +# Apply action "a3" +action: 14 + +# State 14 +# Apply action "d4" +action: 24 + +# State 15 +# Apply action "f3" +action: 19 + +# State 16 +# Apply action "d1" +action: 3 + +# State 17 +# Apply action "f6" +action: 40 + +# State 18 +# Apply action "d5" +action: 31 + +# State 19 +# Apply action "a1" +action: 0 + +# State 20 +# a b c d +# 1[@]. . O e +# 2 . O . @ O f +# 3 @ @ . . . @ g +# 4 O @ O O . O O +# 5 . O O @ @ @ +# 6 . . . @ . +# 7 . . . . +IsTerminal() = False +History() = [23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0] +HistoryString() = "23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0" +InformationStateString(1) = "23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0" +ObservationString(0) = " a b c d\n 1[@]. . O e\n 2 . O . @ O f\n 3 @ @ . . . @ g\n 4 O @ O O . O O\n 5 . O O @ @ @\n 6 . . . @ .\n 7 . . . .\n" +ObservationString(1) = " a b c d\n 1[@]. . O e\n 2 . O . @ O f\n 3 @ @ . . . @ g\n 4 O @ O O . O O\n 5 . O O @ @ @\n 6 . . . @ .\n 7 . . . .\n" +ObservationTensor(0): +◯◯◯◉◯◯◯ ◉◯◯◯◯◯◯ ◯◉◉◯◯◯◯ +◯◉◯◯◉◯◯ ◯◯◯◉◯◯◯ ◉◯◉◯◯◯◯ +◯◯◯◯◯◯◯ ◉◉◯◯◯◉◯ ◯◯◉◉◉◯◯ +◉◯◉◉◯◉◉ ◯◉◯◯◯◯◯ ◯◯◯◯◉◯◯ +◯◯◉◉◯◯◯ ◯◯◯◯◉◉◉ ◯◉◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◉◉◉◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +ObservationTensor(1): +◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◉◉◯◯◯◯ +◯◯◯◉◯◯◯ ◯◉◯◯◉◯◯ ◉◯◉◯◯◯◯ +◉◉◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ +◯◉◯◯◯◯◯ ◉◯◉◉◯◉◉ ◯◯◯◯◉◯◯ +◯◯◯◯◉◉◉ ◯◯◉◉◯◯◯ ◯◉◯◯◯◯◯ +◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 7, 9, 16, 17, 18, 25, 29, 37, 38, 39, 41, 45, 46, 47, 48] +StringLegalActions() = ["b1", "c1", "a2", "c2", "c3", "d3", "e3", "e4", "b5", "c6", "d6", "e6", "g6", "d7", "e7", "f7", "g7"] + +# Apply action "d6" +action: 38 + +# State 21 +# a b c d +# 1 @ . . O e +# 2 . O . @ O f +# 3 @ @ . . . @ g +# 4 O @ O O . O O +# 5 . O O @ @ @ +# 6 .[O]. @ . +# 7 . . . . +IsTerminal() = False +History() = [23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0, 38] +HistoryString() = "23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0, 38" +InformationStateString(1) = "23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0, 38" +ObservationString(0) = " a b c d\n 1 @ . . O e\n 2 . O . @ O f\n 3 @ @ . . . @ g\n 4 O @ O O . O O\n 5 . O O @ @ @\n 6 .[O]. @ .\n 7 . . . .\n" +ObservationString(1) = " a b c d\n 1 @ . . O e\n 2 . O . @ O f\n 3 @ @ . . . @ g\n 4 O @ O O . O O\n 5 . O O @ @ @\n 6 .[O]. @ .\n 7 . . . .\n" +ObservationTensor(0): +◯◯◯◉◯◯◯ ◉◯◯◯◯◯◯ ◯◉◉◯◯◯◯ +◯◉◯◯◉◯◯ ◯◯◯◉◯◯◯ ◉◯◉◯◯◯◯ +◯◯◯◯◯◯◯ ◉◉◯◯◯◉◯ ◯◯◉◉◉◯◯ +◉◯◉◉◯◉◉ ◯◉◯◯◯◯◯ ◯◯◯◯◉◯◯ +◯◯◉◉◯◯◯ ◯◯◯◯◉◉◉ ◯◉◯◯◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◉◯ ◯◯◉◯◉◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +ObservationTensor(1): +◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◉◉◯◯◯◯ +◯◯◯◉◯◯◯ ◯◉◯◯◉◯◯ ◉◯◉◯◯◯◯ +◉◉◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ +◯◉◯◯◯◯◯ ◉◯◉◉◯◉◉ ◯◯◯◯◉◯◯ +◯◯◯◯◉◉◉ ◯◯◉◉◯◯◯ ◯◉◯◯◯◯◯ +◯◯◯◯◯◉◯ ◯◯◯◉◯◯◯ ◯◯◉◯◉◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 7, 9, 16, 17, 18, 25, 29, 37, 39, 41, 45, 46, 47, 48] +StringLegalActions() = ["b1", "c1", "a2", "c2", "c3", "d3", "e3", "e4", "b5", "c6", "e6", "g6", "d7", "e7", "f7", "g7"] + +# Apply action "d7" +action: 45 + +# State 22 +# Apply action "e6" +action: 39 + +# State 23 +# Apply action "e3" +action: 18 + +# State 24 +# Apply action "f7" +action: 47 + +# State 25 +# Apply action "g6" +action: 41 + +# State 26 +# Apply action "c1" +action: 2 + +# State 27 +# Apply action "c2" +action: 9 + +# State 28 +# Apply action "c6" +action: 37 + +# State 29 +# Apply action "d3" +action: 17 + +# State 30 +# Apply action "b1" +action: 1 + +# State 31 +# Apply action "g7" +action: 48 + +# State 32 +# Apply action "a2" +action: 7 + +# State 33 +# a b c d +# 1 @ O O O e +# 2[O]O @ @ O f +# 3 @ @ . @ @ @ g +# 4 O @ O O . O O +# 5 . O O @ @ @ +# 6 O O O @ @ +# 7 @ . O @ +IsTerminal() = True +History() = [23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0, 38, 45, 39, 18, 47, 41, 2, 9, 37, 17, 1, 48, 7] +HistoryString() = "23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0, 38, 45, 39, 18, 47, 41, 2, 9, 37, 17, 1, 48, 7" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0, 38, 45, 39, 18, 47, 41, 2, 9, 37, 17, 1, 48, 7" +InformationStateString(1) = "23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0, 38, 45, 39, 18, 47, 41, 2, 9, 37, 17, 1, 48, 7" +ObservationString(0) = " a b c d\n 1 @ O O O e\n 2[O]O @ @ O f\n 3 @ @ . @ @ @ g\n 4 O @ O O . O O\n 5 . O O @ @ @\n 6 O O O @ @\n 7 @ . O @\n" +ObservationString(1) = " a b c d\n 1 @ O O O e\n 2[O]O @ @ O f\n 3 @ @ . @ @ @ g\n 4 O @ O O . O O\n 5 . O O @ @ @\n 6 O O O @ @\n 7 @ . O @\n" +ObservationTensor(0): +◯◉◉◉◯◯◯ ◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◉◉◯◯◉◯◯ ◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◉◉◯◉◉◉◯ ◯◯◉◯◯◯◯ +◉◯◉◉◯◉◉ ◯◉◯◯◯◯◯ ◯◯◯◯◉◯◯ +◯◯◉◉◯◯◯ ◯◯◯◯◉◉◉ ◯◉◯◯◯◯◯ +◯◯◉◉◉◯◯ ◯◯◯◯◯◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◉◯ ◯◯◯◉◯◯◉ ◯◯◯◯◉◯◯ +ObservationTensor(1): +◉◯◯◯◯◯◯ ◯◉◉◉◯◯◯ ◯◯◯◯◯◯◯ +◯◯◉◉◯◯◯ ◉◉◯◯◉◯◯ ◯◯◯◯◯◯◯ +◉◉◯◉◉◉◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ +◯◉◯◯◯◯◯ ◉◯◉◉◯◉◉ ◯◯◯◯◉◯◯ +◯◯◯◯◉◉◉ ◯◯◉◉◯◯◯ ◯◉◯◯◯◯◯ +◯◯◯◯◯◉◉ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ +◯◯◯◉◯◯◉ ◯◯◯◯◯◉◯ ◯◯◯◯◉◯◯ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/havannah(board_size=4,swap=True).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/havannah(board_size=4,swap=True).txt new file mode 100644 index 0000000..65eec26 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/havannah(board_size=4,swap=True).txt @@ -0,0 +1,517 @@ +game: havannah(board_size=4,swap=True) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Havannah" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["ansi_color_output", "board_size", "swap"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "havannah" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 49 +PolicyTensorShape() = [49] +MaxChanceOutcomes() = 0 +GetParameters() = {ansi_color_output=False,board_size=4,swap=True} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [3, 7, 7] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 147 +MaxGameLength() = 38 +ToString() = "havannah(board_size=4,swap=True)" + +# State 0 +# a b c d +# 1 . . . . e +# 2 . . . . . f +# 3 . . . . . . g +# 4 . . . . . . . +# 5 . . . . . . +# 6 . . . . . +# 7 . . . . +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = " a b c d\n 1 . . . . e\n 2 . . . . . f\n 3 . . . . . . g\n 4 . . . . . . .\n 5 . . . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationString(1) = " a b c d\n 1 . . . . e\n 2 . . . . . f\n 3 . . . . . . g\n 4 . . . . . . .\n 5 . . . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 7, 8, 9, 10, 11, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 45, 46, 47, 48] +StringLegalActions() = ["a1", "b1", "c1", "d1", "a2", "b2", "c2", "d2", "e2", "a3", "b3", "c3", "d3", "e3", "f3", "a4", "b4", "c4", "d4", "e4", "f4", "g4", "b5", "c5", "d5", "e5", "f5", "g5", "c6", "d6", "e6", "f6", "g6", "d7", "e7", "f7", "g7"] + +# Apply action "c4" +action: 23 + +# State 1 +# a b c d +# 1 . . . . e +# 2 . . . . . f +# 3 . . . . . . g +# 4 . .[O]. . . . +# 5 . . . . . . +# 6 . . . . . +# 7 . . . . +IsTerminal() = False +History() = [23] +HistoryString() = "23" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "23" +InformationStateString(1) = "23" +ObservationString(0) = " a b c d\n 1 . . . . e\n 2 . . . . . f\n 3 . . . . . . g\n 4 . .[O]. . . .\n 5 . . . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationString(1) = " a b c d\n 1 . . . . e\n 2 . . . . . f\n 3 . . . . . . g\n 4 . .[O]. . . .\n 5 . . . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 7, 8, 9, 10, 11, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 45, 46, 47, 48] +StringLegalActions() = ["a1", "b1", "c1", "d1", "a2", "b2", "c2", "d2", "e2", "a3", "b3", "c3", "d3", "e3", "f3", "a4", "b4", "c4", "d4", "e4", "f4", "g4", "b5", "c5", "d5", "e5", "f5", "g5", "c6", "d6", "e6", "f6", "g6", "d7", "e7", "f7", "g7"] + +# Apply action "c4" +action: 23 + +# State 2 +# a b c d +# 1 . . . . e +# 2 . . . . . f +# 3 . . . . . . g +# 4 . .[@]. . . . +# 5 . . . . . . +# 6 . . . . . +# 7 . . . . +IsTerminal() = False +History() = [23, 23] +HistoryString() = "23, 23" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 23" +InformationStateString(1) = "23, 23" +ObservationString(0) = " a b c d\n 1 . . . . e\n 2 . . . . . f\n 3 . . . . . . g\n 4 . .[@]. . . .\n 5 . . . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationString(1) = " a b c d\n 1 . . . . e\n 2 . . . . . f\n 3 . . . . . . g\n 4 . .[@]. . . .\n 5 . . . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 7, 8, 9, 10, 11, 14, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 45, 46, 47, 48] +StringLegalActions() = ["a1", "b1", "c1", "d1", "a2", "b2", "c2", "d2", "e2", "a3", "b3", "c3", "d3", "e3", "f3", "a4", "b4", "d4", "e4", "f4", "g4", "b5", "c5", "d5", "e5", "f5", "g5", "c6", "d6", "e6", "f6", "g6", "d7", "e7", "f7", "g7"] + +# Apply action "d2" +action: 10 + +# State 3 +# a b c d +# 1 . . . . e +# 2 . . .[O]. f +# 3 . . . . . . g +# 4 . . @ . . . . +# 5 . . . . . . +# 6 . . . . . +# 7 . . . . +IsTerminal() = False +History() = [23, 23, 10] +HistoryString() = "23, 23, 10" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "23, 23, 10" +InformationStateString(1) = "23, 23, 10" +ObservationString(0) = " a b c d\n 1 . . . . e\n 2 . . .[O]. f\n 3 . . . . . . g\n 4 . . @ . . . .\n 5 . . . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationString(1) = " a b c d\n 1 . . . . e\n 2 . . .[O]. f\n 3 . . . . . . g\n 4 . . @ . . . .\n 5 . . . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◉◉◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 7, 8, 9, 11, 14, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 45, 46, 47, 48] +StringLegalActions() = ["a1", "b1", "c1", "d1", "a2", "b2", "c2", "e2", "a3", "b3", "c3", "d3", "e3", "f3", "a4", "b4", "d4", "e4", "f4", "g4", "b5", "c5", "d5", "e5", "f5", "g5", "c6", "d6", "e6", "f6", "g6", "d7", "e7", "f7", "g7"] + +# Apply action "c5" +action: 30 + +# State 4 +# a b c d +# 1 . . . . e +# 2 . . . O . f +# 3 . . . . . . g +# 4 . . @ . . . . +# 5 .[@]. . . . +# 6 . . . . . +# 7 . . . . +IsTerminal() = False +History() = [23, 23, 10, 30] +HistoryString() = "23, 23, 10, 30" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 23, 10, 30" +InformationStateString(1) = "23, 23, 10, 30" +ObservationString(0) = " a b c d\n 1 . . . . e\n 2 . . . O . f\n 3 . . . . . . g\n 4 . . @ . . . .\n 5 .[@]. . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationString(1) = " a b c d\n 1 . . . . e\n 2 . . . O . f\n 3 . . . . . . g\n 4 . . @ . . . .\n 5 .[@]. . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◉◉◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 7, 8, 9, 11, 14, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 29, 31, 32, 33, 34, 37, 38, 39, 40, 41, 45, 46, 47, 48] +StringLegalActions() = ["a1", "b1", "c1", "d1", "a2", "b2", "c2", "e2", "a3", "b3", "c3", "d3", "e3", "f3", "a4", "b4", "d4", "e4", "f4", "g4", "b5", "d5", "e5", "f5", "g5", "c6", "d6", "e6", "f6", "g6", "d7", "e7", "f7", "g7"] + +# Apply action "b4" +action: 22 + +# State 5 +# a b c d +# 1 . . . . e +# 2 . . . O . f +# 3 . . . . . . g +# 4 .[O]@ . . . . +# 5 . @ . . . . +# 6 . . . . . +# 7 . . . . +IsTerminal() = False +History() = [23, 23, 10, 30, 22] +HistoryString() = "23, 23, 10, 30, 22" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "23, 23, 10, 30, 22" +InformationStateString(1) = "23, 23, 10, 30, 22" +ObservationString(0) = " a b c d\n 1 . . . . e\n 2 . . . O . f\n 3 . . . . . . g\n 4 .[O]@ . . . .\n 5 . @ . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationString(1) = " a b c d\n 1 . . . . e\n 2 . . . O . f\n 3 . . . . . . g\n 4 .[O]@ . . . .\n 5 . @ . . . .\n 6 . . . . .\n 7 . . . .\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◉◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◯◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◉◉◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ +◯◯◉◯◯◯◯ ◯◉◯◯◯◯◯ ◉◯◯◉◉◉◉ +◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 7, 8, 9, 11, 14, 15, 16, 17, 18, 19, 21, 24, 25, 26, 27, 29, 31, 32, 33, 34, 37, 38, 39, 40, 41, 45, 46, 47, 48] +StringLegalActions() = ["a1", "b1", "c1", "d1", "a2", "b2", "c2", "e2", "a3", "b3", "c3", "d3", "e3", "f3", "a4", "d4", "e4", "f4", "g4", "b5", "d5", "e5", "f5", "g5", "c6", "d6", "e6", "f6", "g6", "d7", "e7", "f7", "g7"] + +# Apply action "f4" +action: 26 + +# State 6 +# Apply action "b3" +action: 15 + +# State 7 +# Apply action "e2" +action: 11 + +# State 8 +# Apply action "f5" +action: 33 + +# State 9 +# Apply action "g4" +action: 27 + +# State 10 +# Apply action "e5" +action: 32 + +# State 11 +# Apply action "b2" +action: 8 + +# State 12 +# Apply action "g5" +action: 34 + +# State 13 +# Apply action "a4" +action: 21 + +# State 14 +# Apply action "a3" +action: 14 + +# State 15 +# Apply action "d4" +action: 24 + +# State 16 +# Apply action "f3" +action: 19 + +# State 17 +# Apply action "d1" +action: 3 + +# State 18 +# Apply action "f6" +action: 40 + +# State 19 +# Apply action "d5" +action: 31 + +# State 20 +# a b c d +# 1 . . . @ e +# 2 . @ . O @ f +# 3 O O . . . O g +# 4 @ O @ @ . @ @ +# 5 . @[@]O O O +# 6 . . . O . +# 7 . . . . +IsTerminal() = False +History() = [23, 23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31] +HistoryString() = "23, 23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31" +InformationStateString(1) = "23, 23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31" +ObservationString(0) = " a b c d\n 1 . . . @ e\n 2 . @ . O @ f\n 3 O O . . . O g\n 4 @ O @ @ . @ @\n 5 . @[@]O O O\n 6 . . . O .\n 7 . . . .\n" +ObservationString(1) = " a b c d\n 1 . . . @ e\n 2 . @ . O @ f\n 3 O O . . . O g\n 4 @ O @ @ . @ @\n 5 . @[@]O O O\n 6 . . . O .\n 7 . . . .\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◉◉◉◯◯◯◯ +◯◯◯◉◯◯◯ ◯◉◯◯◉◯◯ ◉◯◉◯◯◯◯ +◉◉◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ +◯◉◯◯◯◯◯ ◉◯◉◉◯◉◉ ◯◯◯◯◉◯◯ +◯◯◯◯◉◉◉ ◯◯◉◉◯◯◯ ◯◉◯◯◯◯◯ +◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +ObservationTensor(1): +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯ +◯◉◯◯◉◯◯ ◯◯◯◉◯◯◯ ◉◯◉◯◯◯◯ +◯◯◯◯◯◯◯ ◉◉◯◯◯◉◯ ◯◯◉◉◉◯◯ +◉◯◉◉◯◉◉ ◯◉◯◯◯◯◯ ◯◯◯◯◉◯◯ +◯◯◉◉◯◯◯ ◯◯◯◯◉◉◉ ◯◉◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◉◉◉◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 7, 9, 16, 17, 18, 25, 29, 37, 38, 39, 41, 45, 46, 47, 48] +StringLegalActions() = ["a1", "b1", "c1", "a2", "c2", "c3", "d3", "e3", "e4", "b5", "c6", "d6", "e6", "g6", "d7", "e7", "f7", "g7"] + +# Apply action "a1" +action: 0 + +# State 21 +# a b c d +# 1[O]. . @ e +# 2 . @ . O @ f +# 3 O O . . . O g +# 4 @ O @ @ . @ @ +# 5 . @ @ O O O +# 6 . . . O . +# 7 . . . . +IsTerminal() = False +History() = [23, 23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0] +HistoryString() = "23, 23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "23, 23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0" +InformationStateString(1) = "23, 23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0" +ObservationString(0) = " a b c d\n 1[O]. . @ e\n 2 . @ . O @ f\n 3 O O . . . O g\n 4 @ O @ @ . @ @\n 5 . @ @ O O O\n 6 . . . O .\n 7 . . . .\n" +ObservationString(1) = " a b c d\n 1[O]. . @ e\n 2 . @ . O @ f\n 3 O O . . . O g\n 4 @ O @ @ . @ @\n 5 . @ @ O O O\n 6 . . . O .\n 7 . . . .\n" +ObservationTensor(0): +◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◉◉◯◯◯◯ +◯◯◯◉◯◯◯ ◯◉◯◯◉◯◯ ◉◯◉◯◯◯◯ +◉◉◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ +◯◉◯◯◯◯◯ ◉◯◉◉◯◉◉ ◯◯◯◯◉◯◯ +◯◯◯◯◉◉◉ ◯◯◉◉◯◯◯ ◯◉◯◯◯◯◯ +◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +ObservationTensor(1): +◯◯◯◉◯◯◯ ◉◯◯◯◯◯◯ ◯◉◉◯◯◯◯ +◯◉◯◯◉◯◯ ◯◯◯◉◯◯◯ ◉◯◉◯◯◯◯ +◯◯◯◯◯◯◯ ◉◉◯◯◯◉◯ ◯◯◉◉◉◯◯ +◉◯◉◉◯◉◉ ◯◉◯◯◯◯◯ ◯◯◯◯◉◯◯ +◯◯◉◉◯◯◯ ◯◯◯◯◉◉◉ ◯◉◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◉◉◉◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 7, 9, 16, 17, 18, 25, 29, 37, 38, 39, 41, 45, 46, 47, 48] +StringLegalActions() = ["b1", "c1", "a2", "c2", "c3", "d3", "e3", "e4", "b5", "c6", "d6", "e6", "g6", "d7", "e7", "f7", "g7"] + +# Apply action "d6" +action: 38 + +# State 22 +# Apply action "d7" +action: 45 + +# State 23 +# Apply action "e6" +action: 39 + +# State 24 +# Apply action "e3" +action: 18 + +# State 25 +# Apply action "f7" +action: 47 + +# State 26 +# Apply action "g6" +action: 41 + +# State 27 +# Apply action "c1" +action: 2 + +# State 28 +# Apply action "c2" +action: 9 + +# State 29 +# Apply action "c6" +action: 37 + +# State 30 +# Apply action "d3" +action: 17 + +# State 31 +# Apply action "b1" +action: 1 + +# State 32 +# Apply action "g7" +action: 48 + +# State 33 +# Apply action "a2" +action: 7 + +# State 34 +# a b c d +# 1 O @ @ @ e +# 2[@]@ O O @ f +# 3 O O . O O O g +# 4 @ O @ @ . @ @ +# 5 . @ @ O O O +# 6 @ @ @ O O +# 7 O . @ O +IsTerminal() = True +History() = [23, 23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0, 38, 45, 39, 18, 47, 41, 2, 9, 37, 17, 1, 48, 7] +HistoryString() = "23, 23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0, 38, 45, 39, 18, 47, 41, 2, 9, 37, 17, 1, 48, 7" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "23, 23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0, 38, 45, 39, 18, 47, 41, 2, 9, 37, 17, 1, 48, 7" +InformationStateString(1) = "23, 23, 10, 30, 22, 26, 15, 11, 33, 27, 32, 8, 34, 21, 14, 24, 19, 3, 40, 31, 0, 38, 45, 39, 18, 47, 41, 2, 9, 37, 17, 1, 48, 7" +ObservationString(0) = " a b c d\n 1 O @ @ @ e\n 2[@]@ O O @ f\n 3 O O . O O O g\n 4 @ O @ @ . @ @\n 5 . @ @ O O O\n 6 @ @ @ O O\n 7 O . @ O\n" +ObservationString(1) = " a b c d\n 1 O @ @ @ e\n 2[@]@ O O @ f\n 3 O O . O O O g\n 4 @ O @ @ . @ @\n 5 . @ @ O O O\n 6 @ @ @ O O\n 7 O . @ O\n" +ObservationTensor(0): +◉◯◯◯◯◯◯ ◯◉◉◉◯◯◯ ◯◯◯◯◯◯◯ +◯◯◉◉◯◯◯ ◉◉◯◯◉◯◯ ◯◯◯◯◯◯◯ +◉◉◯◉◉◉◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ +◯◉◯◯◯◯◯ ◉◯◉◉◯◉◉ ◯◯◯◯◉◯◯ +◯◯◯◯◉◉◉ ◯◯◉◉◯◯◯ ◯◉◯◯◯◯◯ +◯◯◯◯◯◉◉ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ +◯◯◯◉◯◯◉ ◯◯◯◯◯◉◯ ◯◯◯◯◉◯◯ +ObservationTensor(1): +◯◉◉◉◯◯◯ ◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◉◉◯◯◉◯◯ ◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◉◉◯◉◉◉◯ ◯◯◉◯◯◯◯ +◉◯◉◉◯◉◉ ◯◉◯◯◯◯◯ ◯◯◯◯◉◯◯ +◯◯◉◉◯◯◯ ◯◯◯◯◉◉◉ ◯◉◯◯◯◯◯ +◯◯◉◉◉◯◯ ◯◯◯◯◯◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◉◯ ◯◯◯◉◯◯◉ ◯◯◯◯◉◯◯ +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/hearts.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/hearts.txt new file mode 100644 index 0000000..e22b1a4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/hearts.txt @@ -0,0 +1,1296 @@ +game: hearts + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Hearts" +GameType.max_num_players = 4 +GameType.min_num_players = 4 +GameType.parameter_specification = ["avoid_all_tricks_bonus", "can_lead_any_club", "can_lead_hearts_instead_of_qs", "jd_bonus", "must_break_hearts", "no_pts_on_first_trick", "pass_cards", "qs_breaks_hearts"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = False +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "hearts" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 52 +PolicyTensorShape() = [52] +MaxChanceOutcomes() = 52 +GetParameters() = {avoid_all_tricks_bonus=False,can_lead_any_club=False,can_lead_hearts_instead_of_qs=False,jd_bonus=False,must_break_hearts=True,no_pts_on_first_trick=True,pass_cards=True,qs_breaks_hearts=True} +NumPlayers() = 4 +MinUtility() = 0.0 +MaxUtility() = 36.0 +UtilitySum() = None +InformationStateTensorShape() = [5088] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 5088 +MaxGameLength() = 64 +ToString() = "hearts()" + +# State 0 +# Pass Direction: No Pass +# +# S +# H +# D +# C +# S S +# H H +# D D +# C C +# S +# H +# D +# C +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS none\nH none\nD none\nC none\n" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS none\nH none\nD none\nC none\n" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS none\nH none\nD none\nC none\n" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS none\nH none\nD none\nC none\n" +InformationStateTensor(0): zeros(5088) +InformationStateTensor(1): zeros(5088) +InformationStateTensor(2): zeros(5088) +InformationStateTensor(3): zeros(5088) +ChanceOutcomes() = [(0,0.25), (1,0.25), (2,0.25), (3,0.25)] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["No Pass", "Left", "Across", "Right"] + +# Apply action "No Pass" +action: 0 + +# State 1 +# Pass Direction: No Pass +# +# S +# H +# D +# C +# S S +# H H +# D D +# C C +# S +# H +# D +# C +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS none\nH none\nD none\nC none\n" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS none\nH none\nD none\nC none\n" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS none\nH none\nD none\nC none\n" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS none\nH none\nD none\nC none\n" +InformationStateTensor(0): zeros(5088) +InformationStateTensor(1): zeros(5088) +InformationStateTensor(2): zeros(5088) +InformationStateTensor(3): zeros(5088) +ChanceOutcomes() = [(0,0.0192308), (1,0.0192308), (2,0.0192308), (3,0.0192308), (4,0.0192308), (5,0.0192308), (6,0.0192308), (7,0.0192308), (8,0.0192308), (9,0.0192308), (10,0.0192308), (11,0.0192308), (12,0.0192308), (13,0.0192308), (14,0.0192308), (15,0.0192308), (16,0.0192308), (17,0.0192308), (18,0.0192308), (19,0.0192308), (20,0.0192308), (21,0.0192308), (22,0.0192308), (23,0.0192308), (24,0.0192308), (25,0.0192308), (26,0.0192308), (27,0.0192308), (28,0.0192308), (29,0.0192308), (30,0.0192308), (31,0.0192308), (32,0.0192308), (33,0.0192308), (34,0.0192308), (35,0.0192308), (36,0.0192308), (37,0.0192308), (38,0.0192308), (39,0.0192308), (40,0.0192308), (41,0.0192308), (42,0.0192308), (43,0.0192308), (44,0.0192308), (45,0.0192308), (46,0.0192308), (47,0.0192308), (48,0.0192308), (49,0.0192308), (50,0.0192308), (51,0.0192308)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51] +StringLegalActions() = ["2C", "2D", "2H", "2S", "3C", "3D", "3H", "3S", "4C", "4D", "4H", "4S", "5C", "5D", "5H", "5S", "6C", "6D", "6H", "6S", "7C", "7D", "7H", "7S", "8C", "8D", "8H", "8S", "9C", "9D", "9H", "9S", "TC", "TD", "TH", "TS", "JC", "JD", "JH", "JS", "QC", "QD", "QH", "QS", "KC", "KD", "KH", "KS", "AC", "AD", "AH", "AS"] + +# Apply action "TD" +action: 33 + +# State 2 +# Apply action "5H" +action: 14 + +# State 3 +# Apply action "TS" +action: 35 + +# State 4 +# Apply action "TH" +action: 34 + +# State 5 +# Apply action "2D" +action: 1 + +# State 6 +# Apply action "6S" +action: 19 + +# State 7 +# Apply action "5S" +action: 15 + +# State 8 +# Apply action "3S" +action: 7 + +# State 9 +# Apply action "2H" +action: 2 + +# State 10 +# Apply action "6C" +action: 16 + +# State 11 +# Apply action "AC" +action: 48 + +# State 12 +# Apply action "6D" +action: 17 + +# State 13 +# Apply action "9S" +action: 31 + +# State 14 +# Apply action "4C" +action: 8 + +# State 15 +# Apply action "QH" +action: 42 + +# State 16 +# Apply action "4D" +action: 9 + +# State 17 +# Apply action "QS" +action: 43 + +# State 18 +# Apply action "AD" +action: 49 + +# State 19 +# Apply action "3H" +action: 6 + +# State 20 +# Apply action "KH" +action: 46 + +# State 21 +# Apply action "JD" +action: 37 + +# State 22 +# Apply action "AH" +action: 50 + +# State 23 +# Apply action "2S" +action: 3 + +# State 24 +# Apply action "5C" +action: 12 + +# State 25 +# Apply action "JH" +action: 38 + +# State 26 +# Apply action "9H" +action: 30 + +# State 27 +# Apply action "TC" +action: 32 + +# State 28 +# Apply action "KC" +action: 44 + +# State 29 +# Apply action "8S" +action: 27 + +# State 30 +# Apply action "5D" +action: 13 + +# State 31 +# Apply action "QC" +action: 40 + +# State 32 +# Apply action "8C" +action: 24 + +# State 33 +# Apply action "3D" +action: 5 + +# State 34 +# Apply action "4S" +action: 11 + +# State 35 +# Apply action "KS" +action: 47 + +# State 36 +# Apply action "8H" +action: 26 + +# State 37 +# Apply action "JC" +action: 36 + +# State 38 +# Apply action "9D" +action: 29 + +# State 39 +# Apply action "4H" +action: 10 + +# State 40 +# Apply action "7S" +action: 23 + +# State 41 +# Apply action "KD" +action: 45 + +# State 42 +# Apply action "7D" +action: 21 + +# State 43 +# Apply action "2C" +action: 0 + +# State 44 +# Apply action "AS" +action: 51 + +# State 45 +# Apply action "7C" +action: 20 + +# State 46 +# Apply action "7H" +action: 22 + +# State 47 +# Apply action "QD" +action: 41 + +# State 48 +# Apply action "9C" +action: 28 + +# State 49 +# Apply action "3C" +action: 4 + +# State 50 +# Apply action "6H" +action: 18 + +# State 51 +# Apply action "JS" +action: 39 + +# State 52 +# Apply action "8D" +action: 25 + +# State 53 +# Pass Direction: No Pass +# +# S Q98 +# H J2 +# D KJT32 +# C J73 +# S A73 S 64 +# H KT8 H A9765 +# D 864 D A975 +# C K985 C 64 +# S KJT52 +# H Q43 +# D Q +# C AQT2 +IsTerminal() = False +History() = [0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25] +HistoryString() = "0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS Q98\nH J2\nD KJT32\nC J73\n" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS 64\nH A9765\nD A975\nC 64\n" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS KJT52\nH Q43\nD Q\nC AQT2\n" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS A73\nH KT8\nD 864\nC K985\n" +InformationStateTensor(0): binvec(5088, 0x86c0008114e140000000000000000000000000006c0008114e140ffc000000ffc000000ffc000000ffc0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(5088, 0x80096b60600006000000000000000000000000000096b60600006ffc000000ffc000000ffc000000ffc0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(5088, 0x89221000091e18000000000000000000000000009221000091e18ffc000000ffc000000ffc000000ffc0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(5088, 0x8014841e8200a100000000000000000000000000014841e8200a1ffc000000ffc000000ffc000000ffc0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0] +StringLegalActions() = ["2C"] + +# Apply action "2C" +action: 0 + +# State 54 +# Pass Direction: No Pass +# +# S Q98 +# H J2 +# D KJT32 +# C J73 +# S A73 S 64 +# H KT8 H A9765 +# D 864 D A975 +# C K985 C 64 +# S KJT52 +# H Q43 +# D Q +# C AQT +# +# Tricks: +# N E S W N E S +# 2C +# +# Points: +# N: 0 +# E: 0 +# S: 0 +# W: 0 +IsTerminal() = False +History() = [0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0] +HistoryString() = "0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS Q98\nH J2\nD KJT32\nC J73\n\nTricks:\nN E S W N E S\n 2C \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS 64\nH A9765\nD A975\nC 64\n\nTricks:\nN E S W N E S\n 2C \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS KJT52\nH Q43\nD Q\nC AQT\n\nTricks:\nN E S W N E S\n 2C \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS A73\nH KT8\nD 864\nC K985\n\nTricks:\nN E S W N E S\n 2C \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateTensor(0): binvec(5088, 0x86c0008114e140000000000000000000000000006c0008114e140ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(5088, 0x80096b60600006000000000000000000000000000096b60600006ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(5088, 0x89221000091e18000000000000000000000000001221000091e18ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(5088, 0x8014841e8200a100000000000000000000000000014841e8200a1ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [12, 24, 28, 44] +StringLegalActions() = ["5C", "8C", "9C", "KC"] + +# Apply action "KC" +action: 44 + +# State 55 +# Pass Direction: No Pass +# +# S Q98 +# H J2 +# D KJT32 +# C J73 +# S A73 S 64 +# H KT8 H A9765 +# D 864 D A975 +# C 985 C 64 +# S KJT52 +# H Q43 +# D Q +# C AQT +# +# Tricks: +# N E S W N E S +# 2C KC +# +# Points: +# N: 0 +# E: 0 +# S: 0 +# W: 0 +IsTerminal() = False +History() = [0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44] +HistoryString() = "0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS Q98\nH J2\nD KJT32\nC J73\n\nTricks:\nN E S W N E S\n 2C KC \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS 64\nH A9765\nD A975\nC 64\n\nTricks:\nN E S W N E S\n 2C KC \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS KJT52\nH Q43\nD Q\nC AQT\n\nTricks:\nN E S W N E S\n 2C KC \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS A73\nH KT8\nD 864\nC 985\n\nTricks:\nN E S W N E S\n 2C KC \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateTensor(0): binvec(5088, 0x86c0008114e140000000000000000000000000006c0008114e140ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(5088, 0x80096b60600006000000000000000000000000000096b60600006ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(5088, 0x89221000091e18000000000000000000000000001221000091e18ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(5088, 0x8014841e8200a100000000000000000000000000014841e820021ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [4, 20, 36] +StringLegalActions() = ["3C", "7C", "JC"] + +# Apply action "3C" +action: 4 + +# State 56 +# Pass Direction: No Pass +# +# S Q98 +# H J2 +# D KJT32 +# C J7 +# S A73 S 64 +# H KT8 H A9765 +# D 864 D A975 +# C 985 C 64 +# S KJT52 +# H Q43 +# D Q +# C AQT +# +# Tricks: +# N E S W N E S +# 2C KC 3C +# +# Points: +# N: 0 +# E: 0 +# S: 0 +# W: 0 +IsTerminal() = False +History() = [0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4] +HistoryString() = "0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS Q98\nH J2\nD KJT32\nC J7\n\nTricks:\nN E S W N E S\n 2C KC 3C \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS 64\nH A9765\nD A975\nC 64\n\nTricks:\nN E S W N E S\n 2C KC 3C \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS KJT52\nH Q43\nD Q\nC AQT\n\nTricks:\nN E S W N E S\n 2C KC 3C \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS A73\nH KT8\nD 864\nC 985\n\nTricks:\nN E S W N E S\n 2C KC 3C \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateTensor(0): binvec(5088, 0x86c0008114e14000000000000000000000000000640008114e140ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(5088, 0x80096b60600006000000000000000000000000000096b60600006ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(5088, 0x89221000091e18000000000000000000000000001221000091e18ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(5088, 0x8014841e8200a100000000000000000000000000014841e820021ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [8, 16] +StringLegalActions() = ["4C", "6C"] + +# Apply action "6C" +action: 16 + +# State 57 +# Pass Direction: No Pass +# +# S Q98 +# H J2 +# D KJT32 +# C J7 +# S A73 S 64 +# H KT8 H A9765 +# D 864 D A975 +# C 985 C 4 +# S KJT52 +# H Q43 +# D Q +# C AQT +# +# Tricks: +# N E S W N E S +# 2C KC 3C 6C +# +# Points: +# N: 0 +# E: 0 +# S: 0 +# W: 0 +IsTerminal() = False +History() = [0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16] +HistoryString() = "0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS Q98\nH J2\nD KJT32\nC J7\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS 64\nH A9765\nD A975\nC 4\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS KJT52\nH Q43\nD Q\nC AQT\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS A73\nH KT8\nD 864\nC 985\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateTensor(0): binvec(5088, 0x86c0008114e14000000000000000000000000000640008114e140ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(5088, 0x80096b60600006000000000000000000000000000096360600006ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(5088, 0x89221000091e18000000000000000000000000001221000091e18ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(5088, 0x8014841e8200a100000000000000000000000000014841e820021ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [7, 9, 12, 17, 23, 24, 25, 28, 51] +StringLegalActions() = ["3S", "4D", "5C", "6D", "7S", "8C", "8D", "9C", "AS"] + +# Apply action "3S" +action: 7 + +# State 58 +# Pass Direction: No Pass +# +# S Q98 +# H J2 +# D KJT32 +# C J7 +# S A7 S 64 +# H KT8 H A9765 +# D 864 D A975 +# C 985 C 4 +# S KJT52 +# H Q43 +# D Q +# C AQT +# +# Tricks: +# N E S W N E S +# 2C KC 3C 6C +# 3S +# +# Points: +# N: 0 +# E: 0 +# S: 0 +# W: 0 +IsTerminal() = False +History() = [0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7] +HistoryString() = "0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS Q98\nH J2\nD KJT32\nC J7\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS 64\nH A9765\nD A975\nC 4\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS KJT52\nH Q43\nD Q\nC AQT\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS A7\nH KT8\nD 864\nC 985\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateTensor(0): binvec(5088, 0x86c0008114e14000000000000000000000000000640008114e140ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(5088, 0x80096b60600006000000000000000000000000000096360600006ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(5088, 0x89221000091e18000000000000000000000000001221000091e18ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(5088, 0x8014841e8200a100000000000000000000000000004841e820021ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [27, 31, 43] +StringLegalActions() = ["8S", "9S", "QS"] + +# Apply action "8S" +action: 27 + +# State 59 +# Pass Direction: No Pass +# +# S Q9 +# H J2 +# D KJT32 +# C J7 +# S A7 S 64 +# H KT8 H A9765 +# D 864 D A975 +# C 985 C 4 +# S KJT52 +# H Q43 +# D Q +# C AQT +# +# Tricks: +# N E S W N E S +# 2C KC 3C 6C +# 3S 8S +# +# Points: +# N: 0 +# E: 0 +# S: 0 +# W: 0 +IsTerminal() = False +History() = [0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27] +HistoryString() = "0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS Q9\nH J2\nD KJT32\nC J7\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS 64\nH A9765\nD A975\nC 4\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS KJT52\nH Q43\nD Q\nC AQT\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS A7\nH KT8\nD 864\nC 985\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateTensor(0): binvec(5088, 0x86c0008114e14000000000000000000000000000640008014e140ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(5088, 0x80096b60600006000000000000000000000000000096360600006ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(5088, 0x89221000091e18000000000000000000000000001221000091e18ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(5088, 0x8014841e8200a100000000000000000000000000004841e820021ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [11, 19] +StringLegalActions() = ["4S", "6S"] + +# Apply action "4S" +action: 11 + +# State 60 +# Pass Direction: No Pass +# +# S Q9 +# H J2 +# D KJT32 +# C J7 +# S A7 S 6 +# H KT8 H A9765 +# D 864 D A975 +# C 985 C 4 +# S KJT52 +# H Q43 +# D Q +# C AQT +# +# Tricks: +# N E S W N E S +# 2C KC 3C 6C +# 3S 8S 4S +# +# Points: +# N: 0 +# E: 0 +# S: 0 +# W: 0 +IsTerminal() = False +History() = [0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11] +HistoryString() = "0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS Q9\nH J2\nD KJT32\nC J7\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS 6\nH A9765\nD A975\nC 4\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS KJT52\nH Q43\nD Q\nC AQT\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS A7\nH KT8\nD 864\nC 985\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateTensor(0): binvec(5088, 0x86c0008114e14000000000000000000000000000640008014e140ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(5088, 0x80096b60600006000000000000000000000000000086360600006ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(5088, 0x89221000091e18000000000000000000000000001221000091e18ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(5088, 0x8014841e8200a100000000000000000000000000004841e820021ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [3, 15, 35, 39, 47] +StringLegalActions() = ["2S", "5S", "TS", "JS", "KS"] + +# Apply action "5S" +action: 15 + +# State 61 +# Pass Direction: No Pass +# +# S Q9 +# H J2 +# D KJT32 +# C J7 +# S A7 S 6 +# H KT8 H A9765 +# D 864 D A975 +# C 985 C 4 +# S KJT2 +# H Q43 +# D Q +# C AQT +# +# Tricks: +# N E S W N E S +# 2C KC 3C 6C +# 3S 8S 4S 5S +# +# Points: +# N: 0 +# E: 0 +# S: 0 +# W: 0 +IsTerminal() = False +History() = [0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15] +HistoryString() = "0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS Q9\nH J2\nD KJT32\nC J7\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS 6\nH A9765\nD A975\nC 4\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS KJT2\nH Q43\nD Q\nC AQT\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS A7\nH KT8\nD 864\nC 985\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateTensor(0): binvec(5088, 0x86c0008114e14000000000000000000000000000640008014e140ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(5088, 0x80096b60600006000000000000000000000000000086360600006ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(5088, 0x89221000091e18000000000000000000000000001220000091e18ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(5088, 0x8014841e8200a100000000000000000000000000004841e820021ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [1, 5, 20, 31, 33, 36, 37, 43, 45] +StringLegalActions() = ["2D", "3D", "7C", "9S", "TD", "JC", "JD", "QS", "KD"] + +# Apply action "KD" +action: 45 + +# State 62 +# Pass Direction: No Pass +# +# S Q9 +# H J2 +# D JT32 +# C J7 +# S A7 S 6 +# H KT8 H A9765 +# D 864 D A975 +# C 985 C 4 +# S KJT2 +# H Q43 +# D Q +# C AQT +# +# Tricks: +# N E S W N E S +# 2C KC 3C 6C +# 3S 8S 4S 5S +# KD +# +# Points: +# N: 0 +# E: 0 +# S: 0 +# W: 0 +IsTerminal() = False +History() = [0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15, 45] +HistoryString() = "0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15, 45" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS Q9\nH J2\nD JT32\nC J7\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS 6\nH A9765\nD A975\nC 4\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS KJT2\nH Q43\nD Q\nC AQT\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS A7\nH KT8\nD 864\nC 985\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateTensor(0): binvec(5088, 0x86c0008114e14000000000000000000000000000640008014e100ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(5088, 0x80096b60600006000000000000000000000000000086360600006ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(5088, 0x89221000091e18000000000000000000000000001220000091e18ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(5088, 0x8014841e8200a100000000000000000000000000004841e820021ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [13, 21, 29, 49] +StringLegalActions() = ["5D", "7D", "9D", "AD"] + +# Apply action "9D" +action: 29 + +# State 63 +# Pass Direction: No Pass +# +# S Q9 +# H J2 +# D JT32 +# C J7 +# S A7 S 6 +# H KT8 H A9765 +# D 864 D A75 +# C 985 C 4 +# S KJT2 +# H Q43 +# D Q +# C AQT +# +# Tricks: +# N E S W N E S +# 2C KC 3C 6C +# 3S 8S 4S 5S +# KD 9D +# +# Points: +# N: 0 +# E: 0 +# S: 0 +# W: 0 +IsTerminal() = False +History() = [0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15, 45, 29] +HistoryString() = "0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15, 45, 29" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS Q9\nH J2\nD JT32\nC J7\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS 6\nH A9765\nD A75\nC 4\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS KJT2\nH Q43\nD Q\nC AQT\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS A7\nH KT8\nD 864\nC 985\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateTensor(0): binvec(5088, 0x86c0008114e14000000000000000000000000000640008014e100ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(5088, 0x80096b60600006000000000000000000000000000086360200006ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(5088, 0x89221000091e18000000000000000000000000001220000091e18ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(5088, 0x8014841e8200a100000000000000000000000000004841e820021ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [41] +StringLegalActions() = ["QD"] + +# Apply action "QD" +action: 41 + +# State 64 +# Pass Direction: No Pass +# +# S Q9 +# H J2 +# D JT32 +# C J7 +# S A7 S 6 +# H KT8 H A9765 +# D 864 D A75 +# C 985 C 4 +# S KJT2 +# H Q43 +# D +# C AQT +# +# Tricks: +# N E S W N E S +# 2C KC 3C 6C +# 3S 8S 4S 5S +# KD 9D QD +# +# Points: +# N: 0 +# E: 0 +# S: 0 +# W: 0 +IsTerminal() = False +History() = [0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15, 45, 29, 41] +HistoryString() = "0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15, 45, 29, 41" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS Q9\nH J2\nD JT32\nC J7\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS 6\nH A9765\nD A75\nC 4\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS KJT2\nH Q43\nD none\nC AQT\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS A7\nH KT8\nD 864\nC 985\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD \n\nPoints:\nN: 0\nE: 0\nS: 0\nW: 0" +InformationStateTensor(0): binvec(5088, 0x86c0008114e14000000000000000000000000000640008014e100ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(5088, 0x80096b60600006000000000000000000000000000086360200006ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(5088, 0x89221000091e18000000000000000000000000001220000091a18ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(5088, 0x8014841e8200a100000000000000000000000000004841e820021ffc000000ffc000000ffc000000ffc0000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [9, 17, 25] +StringLegalActions() = ["4D", "6D", "8D"] + +# Apply action "4D" +action: 9 + +# State 65 +# Apply action "2D" +action: 1 + +# State 66 +# Apply action "7D" +action: 21 + +# State 67 +# Apply action "3H" +action: 6 + +# State 68 +# Apply action "8D" +action: 25 + +# State 69 +# Apply action "5C" +action: 12 + +# State 70 +# Apply action "JC" +action: 36 + +# State 71 +# Apply action "4C" +action: 8 + +# State 72 +# Apply action "QC" +action: 40 + +# State 73 +# Apply action "AC" +action: 48 + +# State 74 +# Apply action "9C" +action: 28 + +# State 75 +# Apply action "7C" +action: 20 + +# State 76 +# Apply action "5D" +action: 13 + +# State 77 +# Apply action "2S" +action: 3 + +# State 78 +# Apply action "AS" +action: 51 + +# State 79 +# Apply action "9S" +action: 31 + +# State 80 +# Apply action "6S" +action: 19 + +# State 81 +# Apply action "7S" +action: 23 + +# State 82 +# Apply action "QS" +action: 43 + +# State 83 +# Apply action "6H" +action: 18 + +# State 84 +# Apply action "TS" +action: 35 + +# State 85 +# Apply action "3D" +action: 5 + +# State 86 +# Apply action "AD" +action: 49 + +# State 87 +# Apply action "KS" +action: 47 + +# State 88 +# Apply action "6D" +action: 17 + +# State 89 +# Apply action "7H" +action: 22 + +# State 90 +# Apply action "QH" +action: 42 + +# State 91 +# Apply action "KH" +action: 46 + +# State 92 +# Apply action "JH" +action: 38 + +# State 93 +# Pass Direction: No Pass +# +# S +# H 2 +# D JT +# C +# S S +# H T8 H A95 +# D D +# C 8 C +# S J +# H 4 +# D +# C T +# +# Tricks: +# N E S W N E S +# 2C KC 3C 6C +# 3S 8S 4S 5S +# KD 9D QD 4D +# 2D 7D 3H 8D +# 5C JC 4C QC +# AC 9C 7C 5D +# 2S AS 9S 6S +# 7S QS 6H TS +# 3D AD KS 6D +# 7H QH KH JH +# +# Points: +# N: 14 +# E: 0 +# S: 0 +# W: 5 +IsTerminal() = False +History() = [0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15, 45, 29, 41, 9, 1, 21, 6, 25, 12, 36, 8, 40, 48, 28, 20, 13, 3, 51, 31, 19, 23, 43, 18, 35, 5, 49, 47, 17, 22, 42, 46, 38] +HistoryString() = "0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15, 45, 29, 41, 9, 1, 21, 6, 25, 12, 36, 8, 40, 48, 28, 20, 13, 3, 51, 31, 19, 23, 43, 18, 35, 5, 49, 47, 17, 22, 42, 46, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS none\nH 2\nD JT\nC none\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n\nPoints:\nN: 14\nE: 0\nS: 0\nW: 5" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS none\nH A95\nD none\nC none\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n\nPoints:\nN: 14\nE: 0\nS: 0\nW: 5" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS J\nH 4\nD none\nC T\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n\nPoints:\nN: 14\nE: 0\nS: 0\nW: 5" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS none\nH T8\nD none\nC 8\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n\nPoints:\nN: 14\nE: 0\nS: 0\nW: 5" +InformationStateTensor(0): binvec(5088, 0x86c0008114e140000000000000000000000000002000000044000ffffff000ffc000000ffc000000fffe000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(5088, 0x80096b60600006000000000000000000000000000002000200002ffffff000ffc000000ffc000000fffe000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(5088, 0x89221000091e18000000000000000000000000000020000081000ffffff000ffc000000ffc000000fffe000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(5088, 0x8014841e8200a100000000000000000000000000000000a020000ffffff000ffc000000ffc000000fffe000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [24, 26, 34] +StringLegalActions() = ["8C", "8H", "TH"] + +# Apply action "8H" +action: 26 + +# State 94 +# Pass Direction: No Pass +# +# S +# H 2 +# D JT +# C +# S S +# H T H A95 +# D D +# C 8 C +# S J +# H 4 +# D +# C T +# +# Tricks: +# N E S W N E S +# 2C KC 3C 6C +# 3S 8S 4S 5S +# KD 9D QD 4D +# 2D 7D 3H 8D +# 5C JC 4C QC +# AC 9C 7C 5D +# 2S AS 9S 6S +# 7S QS 6H TS +# 3D AD KS 6D +# 7H QH KH JH +# 8H +# +# Points: +# N: 14 +# E: 0 +# S: 0 +# W: 5 +IsTerminal() = False +History() = [0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15, 45, 29, 41, 9, 1, 21, 6, 25, 12, 36, 8, 40, 48, 28, 20, 13, 3, 51, 31, 19, 23, 43, 18, 35, 5, 49, 47, 17, 22, 42, 46, 38, 26] +HistoryString() = "0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15, 45, 29, 41, 9, 1, 21, 6, 25, 12, 36, 8, 40, 48, 28, 20, 13, 3, 51, 31, 19, 23, 43, 18, 35, 5, 49, 47, 17, 22, 42, 46, 38, 26" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS none\nH 2\nD JT\nC none\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n 8H \n\nPoints:\nN: 14\nE: 0\nS: 0\nW: 5" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS none\nH A95\nD none\nC none\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n 8H \n\nPoints:\nN: 14\nE: 0\nS: 0\nW: 5" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS J\nH 4\nD none\nC T\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n 8H \n\nPoints:\nN: 14\nE: 0\nS: 0\nW: 5" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS none\nH T\nD none\nC 8\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n 8H \n\nPoints:\nN: 14\nE: 0\nS: 0\nW: 5" +InformationStateTensor(0): binvec(5088, 0x86c0008114e140000000000000000000000000002000000044000ffffff000ffc000000ffc000000fffe000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(5088, 0x80096b60600006000000000000000000000000000002000200002ffffff000ffc000000ffc000000fffe000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(5088, 0x89221000091e18000000000000000000000000000020000081000ffffff000ffc000000ffc000000fffe000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(5088, 0x8014841e8200a1000000000000000000000000000000008020000ffffff000ffc000000ffc000000fffe000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2] +StringLegalActions() = ["2H"] + +# Apply action "2H" +action: 2 + +# State 95 +# Pass Direction: No Pass +# +# S +# H +# D JT +# C +# S S +# H T H A95 +# D D +# C 8 C +# S J +# H 4 +# D +# C T +# +# Tricks: +# N E S W N E S +# 2C KC 3C 6C +# 3S 8S 4S 5S +# KD 9D QD 4D +# 2D 7D 3H 8D +# 5C JC 4C QC +# AC 9C 7C 5D +# 2S AS 9S 6S +# 7S QS 6H TS +# 3D AD KS 6D +# 7H QH KH JH +# 8H 2H +# +# Points: +# N: 14 +# E: 0 +# S: 0 +# W: 5 +IsTerminal() = False +History() = [0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15, 45, 29, 41, 9, 1, 21, 6, 25, 12, 36, 8, 40, 48, 28, 20, 13, 3, 51, 31, 19, 23, 43, 18, 35, 5, 49, 47, 17, 22, 42, 46, 38, 26, 2] +HistoryString() = "0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15, 45, 29, 41, 9, 1, 21, 6, 25, 12, 36, 8, 40, 48, 28, 20, 13, 3, 51, 31, 19, 23, 43, 18, 35, 5, 49, 47, 17, 22, 42, 46, 38, 26, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS none\nH none\nD JT\nC none\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n 8H 2H \n\nPoints:\nN: 14\nE: 0\nS: 0\nW: 5" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS none\nH A95\nD none\nC none\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n 8H 2H \n\nPoints:\nN: 14\nE: 0\nS: 0\nW: 5" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS J\nH 4\nD none\nC T\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n 8H 2H \n\nPoints:\nN: 14\nE: 0\nS: 0\nW: 5" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS none\nH T\nD none\nC 8\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n 8H 2H \n\nPoints:\nN: 14\nE: 0\nS: 0\nW: 5" +InformationStateTensor(0): binvec(5088, 0x86c0008114e140000000000000000000000000000000000044000ffffff000ffc000000ffc000000fffe000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000200000020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(5088, 0x80096b60600006000000000000000000000000000002000200002ffffff000ffc000000ffc000000fffe000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000200000020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(5088, 0x89221000091e18000000000000000000000000000020000081000ffffff000ffc000000ffc000000fffe000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000200000020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(5088, 0x8014841e8200a1000000000000000000000000000000008020000ffffff000ffc000000ffc000000fffe000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000200000020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [14, 30, 50] +StringLegalActions() = ["5H", "9H", "AH"] + +# Apply action "AH" +action: 50 + +# State 96 +# Pass Direction: No Pass +# +# S +# H +# D JT +# C +# S S +# H T H 95 +# D D +# C 8 C +# S J +# H 4 +# D +# C T +# +# Tricks: +# N E S W N E S +# 2C KC 3C 6C +# 3S 8S 4S 5S +# KD 9D QD 4D +# 2D 7D 3H 8D +# 5C JC 4C QC +# AC 9C 7C 5D +# 2S AS 9S 6S +# 7S QS 6H TS +# 3D AD KS 6D +# 7H QH KH JH +# 8H 2H AH +# +# Points: +# N: 14 +# E: 0 +# S: 0 +# W: 5 +IsTerminal() = False +History() = [0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15, 45, 29, 41, 9, 1, 21, 6, 25, 12, 36, 8, 40, 48, 28, 20, 13, 3, 51, 31, 19, 23, 43, 18, 35, 5, 49, 47, 17, 22, 42, 46, 38, 26, 2, 50] +HistoryString() = "0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15, 45, 29, 41, 9, 1, 21, 6, 25, 12, 36, 8, 40, 48, 28, 20, 13, 3, 51, 31, 19, 23, 43, 18, 35, 5, 49, 47, 17, 22, 42, 46, 38, 26, 2, 50" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "Pass Direction: No Pass\n\nHand: \nS none\nH none\nD JT\nC none\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n 8H 2H AH \n\nPoints:\nN: 14\nE: 0\nS: 0\nW: 5" +InformationStateString(1) = "Pass Direction: No Pass\n\nHand: \nS none\nH 95\nD none\nC none\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n 8H 2H AH \n\nPoints:\nN: 14\nE: 0\nS: 0\nW: 5" +InformationStateString(2) = "Pass Direction: No Pass\n\nHand: \nS J\nH 4\nD none\nC T\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n 8H 2H AH \n\nPoints:\nN: 14\nE: 0\nS: 0\nW: 5" +InformationStateString(3) = "Pass Direction: No Pass\n\nHand: \nS none\nH T\nD none\nC 8\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n 8H 2H AH \n\nPoints:\nN: 14\nE: 0\nS: 0\nW: 5" +InformationStateTensor(0): binvec(5088, 0x86c0008114e140000000000000000000000000000000000044000ffffff000ffc000000ffc000000fffe000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000200000020000000000000000000000002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(5088, 0x80096b60600006000000000000000000000000000002000200000ffffff000ffc000000ffc000000fffe000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000200000020000000000000000000000002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(5088, 0x89221000091e18000000000000000000000000000020000081000ffffff000ffc000000ffc000000fffe000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000200000020000000000000000000000002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(3): binvec(5088, 0x8014841e8200a1000000000000000000000000000000008020000ffffff000ffc000000ffc000000fffe000000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000200000020000000000000000000000002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [10] +StringLegalActions() = ["4H"] + +# Apply action "4H" +action: 10 + +# State 97 +# Apply action "5H" +action: 14 + +# State 98 +# Apply action "JS" +action: 39 + +# State 99 +# Apply action "TH" +action: 34 + +# State 100 +# Apply action "JD" +action: 37 + +# State 101 +# Apply action "8C" +action: 24 + +# State 102 +# Apply action "TD" +action: 33 + +# State 103 +# Apply action "9H" +action: 30 + +# State 104 +# Apply action "TC" +action: 32 + +# State 105 +# Pass Direction: No Pass +# +# S Q98 +# H J2 +# D KJT32 +# C J73 +# S A73 S 64 +# H KT8 H A9765 +# D 864 D A975 +# C K985 C 64 +# S KJT52 +# H Q43 +# D Q +# C AQT2 +# +# Tricks: +# N E S W N E S +# 2C KC 3C 6C +# 3S 8S 4S 5S +# KD 9D QD 4D +# 2D 7D 3H 8D +# 5C JC 4C QC +# AC 9C 7C 5D +# 2S AS 9S 6S +# 7S QS 6H TS +# 3D AD KS 6D +# 7H QH KH JH +# 8H 2H AH 4H +# 5H JS TH JD +# 8C TD 9H TC +# +# Points: +# N: 14 +# E: 4 +# S: 1 +# W: 7 +IsTerminal() = True +History() = [0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15, 45, 29, 41, 9, 1, 21, 6, 25, 12, 36, 8, 40, 48, 28, 20, 13, 3, 51, 31, 19, 23, 43, 18, 35, 5, 49, 47, 17, 22, 42, 46, 38, 26, 2, 50, 10, 14, 39, 34, 37, 24, 33, 30, 32] +HistoryString() = "0, 33, 14, 35, 34, 1, 19, 15, 7, 2, 16, 48, 17, 31, 8, 42, 9, 43, 49, 6, 46, 37, 50, 3, 12, 38, 30, 32, 44, 27, 13, 40, 24, 5, 11, 47, 26, 36, 29, 10, 23, 45, 21, 0, 51, 20, 22, 41, 28, 4, 18, 39, 25, 0, 44, 4, 16, 7, 27, 11, 15, 45, 29, 41, 9, 1, 21, 6, 25, 12, 36, 8, 40, 48, 28, 20, 13, 3, 51, 31, 19, 23, 43, 18, 35, 5, 49, 47, 17, 22, 42, 46, 38, 26, 2, 50, 10, 14, 39, 34, 37, 24, 33, 30, 32" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Pass Direction: No Pass\n\n S Q98\n H J2\n D KJT32\n C J73\nS A73 S 64\nH KT8 H A9765\nD 864 D A975\nC K985 C 64\n S KJT52\n H Q43\n D Q\n C AQT2\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n 8H 2H AH 4H \n 5H JS TH JD \n 8C TD 9H TC \n\nPoints:\nN: 14\nE: 4\nS: 1\nW: 7" +InformationStateString(1) = "Pass Direction: No Pass\n\n S Q98\n H J2\n D KJT32\n C J73\nS A73 S 64\nH KT8 H A9765\nD 864 D A975\nC K985 C 64\n S KJT52\n H Q43\n D Q\n C AQT2\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n 8H 2H AH 4H \n 5H JS TH JD \n 8C TD 9H TC \n\nPoints:\nN: 14\nE: 4\nS: 1\nW: 7" +InformationStateString(2) = "Pass Direction: No Pass\n\n S Q98\n H J2\n D KJT32\n C J73\nS A73 S 64\nH KT8 H A9765\nD 864 D A975\nC K985 C 64\n S KJT52\n H Q43\n D Q\n C AQT2\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n 8H 2H AH 4H \n 5H JS TH JD \n 8C TD 9H TC \n\nPoints:\nN: 14\nE: 4\nS: 1\nW: 7" +InformationStateString(3) = "Pass Direction: No Pass\n\n S Q98\n H J2\n D KJT32\n C J73\nS A73 S 64\nH KT8 H A9765\nD 864 D A975\nC K985 C 64\n S KJT52\n H Q43\n D Q\n C AQT2\n\nTricks:\nN E S W N E S\n 2C KC 3C 6C \n 3S 8S 4S 5S \nKD 9D QD 4D \n2D 7D 3H 8D \n 5C JC 4C QC \n AC 9C 7C 5D \n 2S AS 9S 6S \n 7S QS 6H TS \n3D AD KS 6D \n 7H QH KH JH \n 8H 2H AH 4H \n 5H JS TH JD \n 8C TD 9H TC \n\nPoints:\nN: 14\nE: 4\nS: 1\nW: 7" +InformationStateTensor(0): binvec(5088, 0x86c0008114e140000000000000000000000000000000000000000ffffff000fffc00000ffe000000ffff800000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000200000020000000000000000000000002002000000000000000000000000002000000000000000000100000000000200000000000004000000000000000000000000000000000000000000000000000000000000000000000000008000000000000004000000000002000000000000080000) +InformationStateTensor(1): binvec(5088, 0x80096b60600006000000000000000000000000000000000000000ffffff000fffc00000ffe000000ffff800000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000200000020000000000000000000000002002000000000000000000000000002000000000000000000100000000000200000000000004000000000000000000000000000000000000000000000000000000000000000000000000008000000000000004000000000002000000000000080000) +InformationStateTensor(2): binvec(5088, 0x89221000091e18000000000000000000000000000000000000000ffffff000fffc00000ffe000000ffff800000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000200000020000000000000000000000002002000000000000000000000000002000000000000000000100000000000200000000000004000000000000000000000000000000000000000000000000000000000000000000000000008000000000000004000000000002000000000000080000) +InformationStateTensor(3): binvec(5088, 0x8014841e8200a1000000000000000000000000000000000000000ffffff000fffc00000ffe000000ffff800000000000000000000000000000080000000000000000000000080080000000000000008000000000000000000000000000000000000000000000000000000000000010000000000000000010000000010000000000000100000000000000000000400000000400000000000000040000400000000000000000000000000000000000000000000000004000000000000000004000000002000000000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000000000080000080000000000000000000080000000000000000000000000000000000000000800000008000000000080000000000400000000000000000000000000000000000000000000000010000000000000000000000001000000010000000001000000000000000000000000000000000000000000000000000000000000000001000000000000000001000000200000000000000001000004000000000000000000000004000000000001000004000000000000000000000000000000000000000000000000000000000000000002000000000000000002000000000000020000000000200000000000000000000000000000000000000000000000000000000000000000000000000200000020000000000000000000000002002000000000000000000000000002000000000000000000100000000000200000000000004000000000000000000000000000000000000000000000000000000000000000000000000008000000000000004000000000002000000000000080000) +Rewards() = [12, 22, 25, 19] +Returns() = [12, 22, 25, 19] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/hex(board_size=5).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/hex(board_size=5).txt new file mode 100644 index 0000000..d234e39 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/hex(board_size=5).txt @@ -0,0 +1,370 @@ +game: hex(board_size=5) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Hex" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["board_size", "num_cols", "num_rows", "string_rep"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "hex" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 25 +PolicyTensorShape() = [25] +MaxChanceOutcomes() = 0 +GetParameters() = {board_size=5,num_cols=5,num_rows=5,string_rep=standard} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [9, 5, 5] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 225 +MaxGameLength() = 25 +ToString() = "hex(board_size=5)" + +# State 0 +# . . . . . +# . . . . . +# . . . . . +# . . . . . +# . . . . . +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = ". . . . . \n . . . . . \n . . . . . \n . . . . . \n . . . . . " +ObservationString(1) = ". . . . . \n . . . . . \n . . . . . \n . . . . . \n . . . . . " +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] +StringLegalActions() = ["a1", "b1", "c1", "d1", "e1", "a2", "b2", "c2", "d2", "e2", "a3", "b3", "c3", "d3", "e3", "a4", "b4", "c4", "d4", "e4", "a5", "b5", "c5", "d5", "e5"] + +# Apply action "a2" +action: 5 + +# State 1 +# . . . . . +# x . . . . +# . . . . . +# . . . . . +# . . . . . +IsTerminal() = False +History() = [5] +HistoryString() = "5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "5" +InformationStateString(1) = "5" +ObservationString(0) = ". . . . . \n x . . . . \n . . . . . \n . . . . . \n . . . . . " +ObservationString(1) = ". . . . . \n x . . . . \n . . . . . \n . . . . . \n . . . . . " +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◉◉◉◉ ◉◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◉◉◉◉ ◉◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] +StringLegalActions() = ["a1", "b1", "c1", "d1", "e1", "b2", "c2", "d2", "e2", "a3", "b3", "c3", "d3", "e3", "a4", "b4", "c4", "d4", "e4", "a5", "b5", "c5", "d5", "e5"] + +# Apply action "d4" +action: 18 + +# State 2 +# . . . . . +# x . . . . +# . . . . . +# . . . o . +# . . . . . +IsTerminal() = False +History() = [5, 18] +HistoryString() = "5, 18" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "5, 18" +InformationStateString(1) = "5, 18" +ObservationString(0) = ". . . . . \n x . . . . \n . . . . . \n . . . o . \n . . . . . " +ObservationString(1) = ". . . . . \n x . . . . \n . . . . . \n . . . o . \n . . . . . " +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◉◉◉◉ ◉◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◉◉◉◉ ◉◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24] +StringLegalActions() = ["a1", "b1", "c1", "d1", "e1", "b2", "c2", "d2", "e2", "a3", "b3", "c3", "d3", "e3", "a4", "b4", "c4", "e4", "a5", "b5", "c5", "d5", "e5"] + +# Apply action "d5" +action: 23 + +# State 3 +# . . . . . +# x . . . . +# . . . . . +# . . . o . +# . . . x . +IsTerminal() = False +History() = [5, 18, 23] +HistoryString() = "5, 18, 23" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "5, 18, 23" +InformationStateString(1) = "5, 18, 23" +ObservationString(0) = ". . . . . \n x . . . . \n . . . . . \n . . . o . \n . . . x . " +ObservationString(1) = ". . . . . \n x . . . . \n . . . . . \n . . . o . \n . . . x . " +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◉◉◉◉ ◉◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◉◉◉◉ ◉◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 24] +StringLegalActions() = ["a1", "b1", "c1", "d1", "e1", "b2", "c2", "d2", "e2", "a3", "b3", "c3", "d3", "e3", "a4", "b4", "c4", "e4", "a5", "b5", "c5", "e5"] + +# Apply action "d1" +action: 3 + +# State 4 +# . . . o . +# x . . . . +# . . . . . +# . . . o . +# . . . x . +IsTerminal() = False +History() = [5, 18, 23, 3] +HistoryString() = "5, 18, 23, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "5, 18, 23, 3" +InformationStateString(1) = "5, 18, 23, 3" +ObservationString(0) = ". . . o . \n x . . . . \n . . . . . \n . . . o . \n . . . x . " +ObservationString(1) = ". . . o . \n x . . . . \n . . . . . \n . . . o . \n . . . x . " +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◉◉◉◉ ◉◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◉◉◉◉ ◉◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 24] +StringLegalActions() = ["a1", "b1", "c1", "e1", "b2", "c2", "d2", "e2", "a3", "b3", "c3", "d3", "e3", "a4", "b4", "c4", "e4", "a5", "b5", "c5", "e5"] + +# Apply action "b4" +action: 16 + +# State 5 +# . . . o . +# x . . . . +# . . . . . +# . x . o . +# . . . x . +IsTerminal() = False +History() = [5, 18, 23, 3, 16] +HistoryString() = "5, 18, 23, 3, 16" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "5, 18, 23, 3, 16" +InformationStateString(1) = "5, 18, 23, 3, 16" +ObservationString(0) = ". . . o . \n x . . . . \n . . . . . \n . x . o . \n . . . x . " +ObservationString(1) = ". . . o . \n x . . . . \n . . . . . \n . x . o . \n . . . x . " +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◉◉◉◉ ◉◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◉◯◉◯◉ ◯◉◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◉◉◉◉ ◉◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◉◯◉◯◉ ◯◉◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 19, 20, 21, 22, 24] +StringLegalActions() = ["a1", "b1", "c1", "e1", "b2", "c2", "d2", "e2", "a3", "b3", "c3", "d3", "e3", "a4", "c4", "e4", "a5", "b5", "c5", "e5"] + +# Apply action "a4" +action: 15 + +# State 6 +# Apply action "a3" +action: 10 + +# State 7 +# Apply action "d3" +action: 13 + +# State 8 +# Apply action "e3" +action: 14 + +# State 9 +# Apply action "e4" +action: 19 + +# State 10 +# Apply action "c1" +action: 2 + +# State 11 +# Apply action "c5" +action: 22 + +# State 12 +# Apply action "a5" +action: 20 + +# State 13 +# Apply action "d2" +action: 8 + +# State 14 +# Apply action "c4" +action: 17 + +# State 15 +# Apply action "c2" +action: 7 + +# State 16 +# Apply action "e5" +action: 24 + +# State 17 +# Apply action "c3" +action: 12 + +# State 18 +# Apply action "b2" +action: 6 + +# State 19 +# Apply action "e2" +action: 9 + +# State 20 +# . . x o . +# x x o o o +# x . o o x +# o x x o o +# x . o x x +IsTerminal() = False +History() = [5, 18, 23, 3, 16, 15, 10, 13, 14, 19, 2, 22, 20, 8, 17, 7, 24, 12, 6, 9] +HistoryString() = "5, 18, 23, 3, 16, 15, 10, 13, 14, 19, 2, 22, 20, 8, 17, 7, 24, 12, 6, 9" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "5, 18, 23, 3, 16, 15, 10, 13, 14, 19, 2, 22, 20, 8, 17, 7, 24, 12, 6, 9" +InformationStateString(1) = "5, 18, 23, 3, 16, 15, 10, 13, 14, 19, 2, 22, 20, 8, 17, 7, 24, 12, 6, 9" +ObservationString(0) = ". . x o . \n x x o o o \n x . o o x \n o x x o o \n x . o x x " +ObservationString(1) = ". . x o . \n x x o o o \n x . o o x \n o x x o o \n x . o x x " +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◉◉◯◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◉◯ ◯◯◯◯◯ ◯◉◯◯◯ ◯◯◯◯◉ ◯◯◯◯◯ ◉◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◉◯◯◯◯ ◯◯◯◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◉◉◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◯◯ ◯◯◯◯◯ ◯◉◯◯◯ ◯◯◯◯◯ ◉◯◯◉◉ ◯◯◯◯◯ ◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◉◉◯◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◉◯ ◯◯◯◯◯ ◯◉◯◯◯ ◯◯◯◯◉ ◯◯◯◯◯ ◉◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◉◯◯◯◯ ◯◯◯◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◉◉◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◯◯ ◯◯◯◯◯ ◯◉◯◯◯ ◯◯◯◯◯ ◉◯◯◉◉ ◯◯◯◯◯ ◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 4, 11, 21] +StringLegalActions() = ["a1", "b1", "e1", "b3", "b5"] + +# Apply action "b3" +action: 11 + +# State 21 +# . . x o . +# x x o o o +# x x o o x +# o x x o o +# x . o x x +IsTerminal() = True +History() = [5, 18, 23, 3, 16, 15, 10, 13, 14, 19, 2, 22, 20, 8, 17, 7, 24, 12, 6, 9, 11] +HistoryString() = "5, 18, 23, 3, 16, 15, 10, 13, 14, 19, 2, 22, 20, 8, 17, 7, 24, 12, 6, 9, 11" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "5, 18, 23, 3, 16, 15, 10, 13, 14, 19, 2, 22, 20, 8, 17, 7, 24, 12, 6, 9, 11" +InformationStateString(1) = "5, 18, 23, 3, 16, 15, 10, 13, 14, 19, 2, 22, 20, 8, 17, 7, 24, 12, 6, 9, 11" +ObservationString(0) = ". . x o . \n x x o o o \n x x o o x \n o x x o o \n x . o x x " +ObservationString(1) = ". . x o . \n x x o o o \n x x o o x \n o x x o o \n x . o x x " +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◉◉◯◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◉◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◉ ◯◯◯◯◯ ◉◯◯◯◯ ◯◉◯◯◯ +◯◯◯◯◯ ◉◯◯◯◯ ◯◯◯◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◉◉◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◯◯ ◯◯◯◯◯ ◯◉◯◯◯ ◯◯◯◯◯ ◉◯◯◉◉ ◯◯◯◯◯ ◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◉◉◯◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◉◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◉ ◯◯◯◯◯ ◉◯◯◯◯ ◯◉◯◯◯ +◯◯◯◯◯ ◉◯◯◯◯ ◯◯◯◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◉◉◯◯ ◯◯◯◯◯ ◯◯◯◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◯◯ ◯◯◯◯◯ ◯◉◯◯◯ ◯◯◯◯◯ ◉◯◯◉◉ ◯◯◯◯◯ ◯◯◯◯◯ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/hive.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/hive.txt new file mode 100644 index 0000000..318ed7d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/hive.txt @@ -0,0 +1,1598 @@ +game: hive + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Hive" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["ansi_color_output", "board_size", "uses_ladybug", "uses_mosquito", "uses_pillbug"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "hive" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 5489 +PolicyTensorShape() = [5489] +MaxChanceOutcomes() = 0 +GetParameters() = {ansi_color_output=False,board_size=8,uses_ladybug=True,uses_mosquito=True,uses_pillbug=True} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [23, 17, 17] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 6647 +MaxGameLength() = 500 +ToString() = "hive()" + +# State 0 +# Base+MLP;NotStarted;White[1]; +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "Base+MLP;NotStarted;White[1];" +ObservationString(1) = "Base+MLP;NotStarted;White[1];" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [209, 412, 615, 818, 1021, 1224, 1427, 1630, 1833, 2036, 2239, 2442, 2645] +StringLegalActions() = ["wA1", "wA2", "wA3", "wG1", "wG2", "wG3", "wS1", "wS2", "wB1", "wB2", "wM", "wL", "wP"] + +# Apply action "wS2" +action: 1630 + +# State 1 +# Base+MLP;InProgress;Black[1];wS2 +IsTerminal() = False +History() = [1630] +HistoryString() = "1630" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1630" +InformationStateString(1) = "1630" +ObservationString(0) = "Base+MLP;InProgress;Black[1];wS2" +ObservationString(1) = "Base+MLP;InProgress;Black[1];wS2" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2996, 2997, 2998, 2999, 3000, 3001, 3192, 3193, 3194, 3195, 3196, 3197, 3388, 3389, 3390, 3391, 3392, 3393, 3584, 3585, 3586, 3587, 3588, 3589, 3780, 3781, 3782, 3783, 3784, 3785, 3976, 3977, 3978, 3979, 3980, 3981, 4172, 4173, 4174, 4175, 4176, 4177, 4368, 4369, 4370, 4371, 4372, 4373, 4564, 4565, 4566, 4567, 4568, 4569, 4760, 4761, 4762, 4763, 4764, 4765, 4956, 4957, 4958, 4959, 4960, 4961, 5152, 5153, 5154, 5155, 5156, 5157, 5348, 5349, 5350, 5351, 5352, 5353] +StringLegalActions() = ["bA1 wS2/", "bA1 wS2-", "bA1 wS2\", "bA1 /wS2", "bA1 -wS2", "bA1 \wS2", "bA2 wS2/", "bA2 wS2-", "bA2 wS2\", "bA2 /wS2", "bA2 -wS2", "bA2 \wS2", "bA3 wS2/", "bA3 wS2-", "bA3 wS2\", "bA3 /wS2", "bA3 -wS2", "bA3 \wS2", "bG1 wS2/", "bG1 wS2-", "bG1 wS2\", "bG1 /wS2", "bG1 -wS2", "bG1 \wS2", "bG2 wS2/", "bG2 wS2-", "bG2 wS2\", "bG2 /wS2", "bG2 -wS2", "bG2 \wS2", "bG3 wS2/", "bG3 wS2-", "bG3 wS2\", "bG3 /wS2", "bG3 -wS2", "bG3 \wS2", "bS1 wS2/", "bS1 wS2-", "bS1 wS2\", "bS1 /wS2", "bS1 -wS2", "bS1 \wS2", "bS2 wS2/", "bS2 wS2-", "bS2 wS2\", "bS2 /wS2", "bS2 -wS2", "bS2 \wS2", "bB1 wS2/", "bB1 wS2-", "bB1 wS2\", "bB1 /wS2", "bB1 -wS2", "bB1 \wS2", "bB2 wS2/", "bB2 wS2-", "bB2 wS2\", "bB2 /wS2", "bB2 -wS2", "bB2 \wS2", "bM wS2/", "bM wS2-", "bM wS2\", "bM /wS2", "bM -wS2", "bM \wS2", "bL wS2/", "bL wS2-", "bL wS2\", "bL /wS2", "bL -wS2", "bL \wS2", "bP wS2/", "bP wS2-", "bP wS2\", "bP /wS2", "bP -wS2", "bP \wS2"] + +# Apply action "bA3 /wS2" +action: 3391 + +# State 2 +# Base+MLP;InProgress;White[2];wS2;bA3 /wS2 +IsTerminal() = False +History() = [1630, 3391] +HistoryString() = "1630, 3391" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1630, 3391" +InformationStateString(1) = "1630, 3391" +ObservationString(0) = "Base+MLP;InProgress;White[2];wS2;bA3 /wS2" +ObservationString(1) = "Base+MLP;InProgress;White[2];wS2;bA3 /wS2" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [56, 57, 61, 252, 253, 257, 448, 449, 453, 644, 645, 649, 840, 841, 845, 1036, 1037, 1041, 1232, 1233, 1237, 1428, 1429, 1433, 1820, 1821, 1825, 2016, 2017, 2021, 2212, 2213, 2217, 2408, 2409, 2413, 2604, 2605, 2609] +StringLegalActions() = ["wQ wS2/", "wQ wS2-", "wQ \wS2", "wA1 wS2/", "wA1 wS2-", "wA1 \wS2", "wA2 wS2/", "wA2 wS2-", "wA2 \wS2", "wA3 wS2/", "wA3 wS2-", "wA3 \wS2", "wG1 wS2/", "wG1 wS2-", "wG1 \wS2", "wG2 wS2/", "wG2 wS2-", "wG2 \wS2", "wG3 wS2/", "wG3 wS2-", "wG3 \wS2", "wS1 wS2/", "wS1 wS2-", "wS1 \wS2", "wB1 wS2/", "wB1 wS2-", "wB1 \wS2", "wB2 wS2/", "wB2 wS2-", "wB2 \wS2", "wM wS2/", "wM wS2-", "wM \wS2", "wL wS2/", "wL wS2-", "wL \wS2", "wP wS2/", "wP wS2-", "wP \wS2"] + +# Apply action "wG1 wS2/" +action: 840 + +# State 3 +# Base+MLP;InProgress;Black[2];wS2;bA3 /wS2;wG1 wS2/ +IsTerminal() = False +History() = [1630, 3391, 840] +HistoryString() = "1630, 3391, 840" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1630, 3391, 840" +InformationStateString(1) = "1630, 3391, 840" +ObservationString(0) = "Base+MLP;InProgress;Black[2];wS2;bA3 /wS2;wG1 wS2/" +ObservationString(1) = "Base+MLP;InProgress;Black[2];wS2;bA3 /wS2;wG1 wS2/" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2865, 2866, 2867, 3061, 3062, 3063, 3257, 3258, 3259, 3649, 3650, 3651, 3845, 3846, 3847, 4041, 4042, 4043, 4237, 4238, 4239, 4433, 4434, 4435, 4629, 4630, 4631, 4825, 4826, 4827, 5021, 5022, 5023, 5217, 5218, 5219, 5413, 5414, 5415] +StringLegalActions() = ["bQ bA3\", "bQ /bA3", "bQ -bA3", "bA1 bA3\", "bA1 /bA3", "bA1 -bA3", "bA2 bA3\", "bA2 /bA3", "bA2 -bA3", "bG1 bA3\", "bG1 /bA3", "bG1 -bA3", "bG2 bA3\", "bG2 /bA3", "bG2 -bA3", "bG3 bA3\", "bG3 /bA3", "bG3 -bA3", "bS1 bA3\", "bS1 /bA3", "bS1 -bA3", "bS2 bA3\", "bS2 /bA3", "bS2 -bA3", "bB1 bA3\", "bB1 /bA3", "bB1 -bA3", "bB2 bA3\", "bB2 /bA3", "bB2 -bA3", "bM bA3\", "bM /bA3", "bM -bA3", "bL bA3\", "bL /bA3", "bL -bA3", "bP bA3\", "bP /bA3", "bP -bA3"] + +# Apply action "bS1 /bA3" +action: 4238 + +# State 4 +# Base+MLP;InProgress;White[3];wS2;bA3 /wS2;wG1 wS2/;bS1 /bA3 +IsTerminal() = False +History() = [1630, 3391, 840, 4238] +HistoryString() = "1630, 3391, 840, 4238" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1630, 3391, 840, 4238" +InformationStateString(1) = "1630, 3391, 840, 4238" +ObservationString(0) = "Base+MLP;InProgress;White[3];wS2;bA3 /wS2;wG1 wS2/;bS1 /bA3" +ObservationString(1) = "Base+MLP;InProgress;White[3];wS2;bA3 /wS2;wG1 wS2/;bS1 /bA3" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [28, 29, 30, 32, 33, 57, 61, 224, 225, 226, 228, 229, 253, 257, 420, 421, 422, 424, 425, 449, 453, 616, 617, 618, 620, 621, 645, 649, 1008, 1009, 1010, 1012, 1013, 1037, 1041, 1204, 1205, 1206, 1208, 1209, 1233, 1237, 1400, 1401, 1402, 1404, 1405, 1429, 1433, 1792, 1793, 1794, 1796, 1797, 1821, 1825, 1988, 1989, 1990, 1992, 1993, 2017, 2021, 2184, 2185, 2186, 2188, 2189, 2213, 2217, 2380, 2381, 2382, 2384, 2385, 2409, 2413, 2576, 2577, 2578, 2580, 2581, 2605, 2609] +StringLegalActions() = ["wQ wG1/", "wQ wG1-", "wQ wG1\", "wQ -wG1", "wQ \wG1", "wQ wS2-", "wQ \wS2", "wA1 wG1/", "wA1 wG1-", "wA1 wG1\", "wA1 -wG1", "wA1 \wG1", "wA1 wS2-", "wA1 \wS2", "wA2 wG1/", "wA2 wG1-", "wA2 wG1\", "wA2 -wG1", "wA2 \wG1", "wA2 wS2-", "wA2 \wS2", "wA3 wG1/", "wA3 wG1-", "wA3 wG1\", "wA3 -wG1", "wA3 \wG1", "wA3 wS2-", "wA3 \wS2", "wG2 wG1/", "wG2 wG1-", "wG2 wG1\", "wG2 -wG1", "wG2 \wG1", "wG2 wS2-", "wG2 \wS2", "wG3 wG1/", "wG3 wG1-", "wG3 wG1\", "wG3 -wG1", "wG3 \wG1", "wG3 wS2-", "wG3 \wS2", "wS1 wG1/", "wS1 wG1-", "wS1 wG1\", "wS1 -wG1", "wS1 \wG1", "wS1 wS2-", "wS1 \wS2", "wB1 wG1/", "wB1 wG1-", "wB1 wG1\", "wB1 -wG1", "wB1 \wG1", "wB1 wS2-", "wB1 \wS2", "wB2 wG1/", "wB2 wG1-", "wB2 wG1\", "wB2 -wG1", "wB2 \wG1", "wB2 wS2-", "wB2 \wS2", "wM wG1/", "wM wG1-", "wM wG1\", "wM -wG1", "wM \wG1", "wM wS2-", "wM \wS2", "wL wG1/", "wL wG1-", "wL wG1\", "wL -wG1", "wL \wG1", "wL wS2-", "wL \wS2", "wP wG1/", "wP wG1-", "wP wG1\", "wP -wG1", "wP \wG1", "wP wS2-", "wP \wS2"] + +# Apply action "wQ wG1-" +action: 29 + +# State 5 +# Base+MLP;InProgress;Black[3];wS2;bA3 /wS2;wG1 wS2/;bS1 /bA3;wQ wG1- +IsTerminal() = False +History() = [1630, 3391, 840, 4238, 29] +HistoryString() = "1630, 3391, 840, 4238, 29" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1630, 3391, 840, 4238, 29" +InformationStateString(1) = "1630, 3391, 840, 4238, 29" +ObservationString(0) = "Base+MLP;InProgress;Black[3];wS2;bA3 /wS2;wG1 wS2/;bS1 /bA3;wQ wG1-" +ObservationString(1) = "Base+MLP;InProgress;Black[3];wS2;bA3 /wS2;wG1 wS2/;bS1 /bA3;wQ wG1-" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2865, 2867, 2892, 2893, 2894, 2895, 2896, 3061, 3063, 3088, 3089, 3090, 3091, 3092, 3257, 3259, 3284, 3285, 3286, 3287, 3288, 3649, 3651, 3676, 3677, 3678, 3679, 3680, 3845, 3847, 3872, 3873, 3874, 3875, 3876, 4041, 4043, 4068, 4069, 4070, 4071, 4072, 4433, 4435, 4460, 4461, 4462, 4463, 4464, 4629, 4631, 4656, 4657, 4658, 4659, 4660, 4825, 4827, 4852, 4853, 4854, 4855, 4856, 5021, 5023, 5048, 5049, 5050, 5051, 5052, 5217, 5219, 5244, 5245, 5246, 5247, 5248, 5413, 5415, 5440, 5441, 5442, 5443, 5444] +StringLegalActions() = ["bQ bA3\", "bQ -bA3", "bQ bS1-", "bQ bS1\", "bQ /bS1", "bQ -bS1", "bQ \bS1", "bA1 bA3\", "bA1 -bA3", "bA1 bS1-", "bA1 bS1\", "bA1 /bS1", "bA1 -bS1", "bA1 \bS1", "bA2 bA3\", "bA2 -bA3", "bA2 bS1-", "bA2 bS1\", "bA2 /bS1", "bA2 -bS1", "bA2 \bS1", "bG1 bA3\", "bG1 -bA3", "bG1 bS1-", "bG1 bS1\", "bG1 /bS1", "bG1 -bS1", "bG1 \bS1", "bG2 bA3\", "bG2 -bA3", "bG2 bS1-", "bG2 bS1\", "bG2 /bS1", "bG2 -bS1", "bG2 \bS1", "bG3 bA3\", "bG3 -bA3", "bG3 bS1-", "bG3 bS1\", "bG3 /bS1", "bG3 -bS1", "bG3 \bS1", "bS2 bA3\", "bS2 -bA3", "bS2 bS1-", "bS2 bS1\", "bS2 /bS1", "bS2 -bS1", "bS2 \bS1", "bB1 bA3\", "bB1 -bA3", "bB1 bS1-", "bB1 bS1\", "bB1 /bS1", "bB1 -bS1", "bB1 \bS1", "bB2 bA3\", "bB2 -bA3", "bB2 bS1-", "bB2 bS1\", "bB2 /bS1", "bB2 -bS1", "bB2 \bS1", "bM bA3\", "bM -bA3", "bM bS1-", "bM bS1\", "bM /bS1", "bM -bS1", "bM \bS1", "bL bA3\", "bL -bA3", "bL bS1-", "bL bS1\", "bL /bS1", "bL -bS1", "bL \bS1", "bP bA3\", "bP -bA3", "bP bS1-", "bP bS1\", "bP /bS1", "bP -bS1", "bP \bS1"] + +# Apply action "bS2 bS1-" +action: 4460 + +# State 6 +# Apply action "wL wG1\" +action: 2382 + +# State 7 +# Apply action "bQ /bS2" +action: 2901 + +# State 8 +# Apply action "wA3 wQ/" +action: 588 + +# State 9 +# Apply action "bL /bS1" +action: 5246 + +# State 10 +# Apply action "wA1 -wA3" +action: 221 + +# State 11 +# Apply action "bG2 bQ\" +action: 3824 + +# State 12 +# Apply action "wG3 \wA1" +action: 1188 + +# State 13 +# Apply action "bG1 bG2\" +action: 3663 + +# State 14 +# Apply action "wA2 \wS2" +action: 453 + +# State 15 +# Apply action "bM \bS1" +action: 5052 + +# State 16 +# Apply action "wA2 wS2\" +action: 450 + +# State 17 +# Apply action "bB1 bQ-" +action: 4607 + +# State 18 +# Apply action "wA2 -wG3" +action: 438 + +# State 19 +# Apply action "bG3 /bL" +action: 4105 + +# State 20 +# Base+MLP;InProgress;White[11];wS2;bA3 /wS2;wG1 wS2/;bS1 /bA3;wQ wG1-;bS2 bS1-;wL wG1\;bQ /bS2;wA3 wQ/;bL /bS1;wA1 -wA3;bG2 bQ\;wG3 \wA1;bG1 bG2\;wA2 \wS2;bM \bS1;wA2 wS2\;bB1 bQ-;wA2 -wG3;bG3 /bL +IsTerminal() = False +History() = [1630, 3391, 840, 4238, 29, 4460, 2382, 2901, 588, 5246, 221, 3824, 1188, 3663, 453, 5052, 450, 4607, 438, 4105] +HistoryString() = "1630, 3391, 840, 4238, 29, 4460, 2382, 2901, 588, 5246, 221, 3824, 1188, 3663, 453, 5052, 450, 4607, 438, 4105" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1630, 3391, 840, 4238, 29, 4460, 2382, 2901, 588, 5246, 221, 3824, 1188, 3663, 453, 5052, 450, 4607, 438, 4105" +InformationStateString(1) = "1630, 3391, 840, 4238, 29, 4460, 2382, 2901, 588, 5246, 221, 3824, 1188, 3663, 453, 5052, 450, 4607, 438, 4105" +ObservationString(0) = "Base+MLP;InProgress;White[11];wS2;bA3 /wS2;wG1 wS2/;bS1 /bA3;wQ wG1-;bS2 bS1-;wL wG1\\;bQ /bS2;wA3 wQ/;bL /bS1;wA1 -wA3;bG2 bQ\\;wG3 \\wA1;bG1 bG2\\;wA2 \\wS2;bM \\bS1;wA2 wS2\\;bB1 bQ-;wA2 -wG3;bG3 /bL" +ObservationString(1) = "Base+MLP;InProgress;White[11];wS2;bA3 /wS2;wG1 wS2/;bS1 /bA3;wQ wG1-;bS2 bS1-;wL wG1\\;bQ /bS2;wA3 wQ/;bL /bS1;wA1 -wA3;bG2 bQ\\;wG3 \\wA1;bG1 bG2\\;wA2 \\wS2;bM \\bS1;wA2 wS2\\;bB1 bQ-;wA2 -wG3;bG3 /bL" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◉◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◉◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◉◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◉◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◉◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◉◉◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [23, 85, 393, 394, 399, 403, 413, 414, 415, 418, 424, 425, 434, 435, 437, 439, 450, 452, 453, 477, 478, 479, 493, 512, 516, 518, 519, 520, 521, 522, 526, 528, 529, 533, 534, 535, 536, 537, 543, 546, 547, 553, 554, 555, 567, 570, 571, 572, 576, 578, 579, 589, 590, 595, 599, 602, 604, 605, 606, 607, 620, 621, 630, 631, 633, 635, 646, 648, 649, 673, 674, 675, 689, 708, 712, 714, 715, 716, 717, 718, 722, 724, 725, 729, 730, 731, 732, 733, 739, 742, 743, 749, 750, 751, 763, 766, 767, 768, 772, 774, 775, 785, 791, 807, 810, 827, 870, 927, 981, 982, 987, 991, 994, 996, 997, 998, 999, 1001, 1002, 1003, 1006, 1012, 1013, 1022, 1023, 1025, 1027, 1041, 1065, 1066, 1373, 1374, 1379, 1383, 1386, 1388, 1389, 1390, 1391, 1393, 1394, 1395, 1398, 1404, 1405, 1414, 1415, 1417, 1419, 1433, 1457, 1458, 1765, 1766, 1771, 1775, 1778, 1780, 1781, 1782, 1783, 1785, 1786, 1787, 1790, 1796, 1797, 1806, 1807, 1809, 1811, 1825, 1849, 1850, 1961, 1962, 1967, 1971, 1974, 1976, 1977, 1978, 1979, 1981, 1982, 1983, 1986, 1992, 1993, 2002, 2003, 2005, 2007, 2021, 2045, 2046, 2157, 2158, 2163, 2167, 2170, 2172, 2173, 2174, 2175, 2177, 2178, 2179, 2182, 2188, 2189, 2198, 2199, 2201, 2203, 2217, 2241, 2242, 2353, 2354, 2359, 2363, 2368, 2373, 2374, 2375, 2378, 2384, 2385, 2395, 2397, 2410, 2412, 2413, 2472, 2476, 2506, 2527, 2549, 2550, 2555, 2559, 2562, 2564, 2565, 2566, 2567, 2569, 2570, 2571, 2574, 2580, 2581, 2590, 2591, 2593, 2595, 2609, 2633, 2634] +StringLegalActions() = ["wQ wA3\", "wQ wL-", "wA2 wQ-", "wA2 wQ\", "wA2 wA1/", "wA2 -wA1", "wA2 wA3/", "wA2 wA3-", "wA2 wA3\", "wA2 \wA3", "wA2 -wG1", "wA2 \wG1", "wA2 wG3/", "wA2 wG3-", "wA2 /wG3", "wA2 \wG3", "wA2 wS2\", "wA2 -wS2", "wA2 \wS2", "wA2 wL-", "wA2 wL\", "wA2 /wL", "wA2 /bQ", "wA2 bA3-", "wA2 \bA3", "wA2 bG1/", "wA2 bG1-", "wA2 bG1\", "wA2 /bG1", "wA2 -bG1", "wA2 bG2-", "wA2 /bG2", "wA2 -bG2", "wA2 bG3-", "wA2 bG3\", "wA2 /bG3", "wA2 -bG3", "wA2 \bG3", "wA2 -bS1", "wA2 bS2/", "wA2 bS2-", "wA2 bB1/", "wA2 bB1-", "wA2 bB1\", "wA2 bM/", "wA2 /bM", "wA2 -bM", "wA2 \bM", "wA2 bL\", "wA2 -bL", "wA2 \bL", "wA3 wQ-", "wA3 wQ\", "wA3 wA1/", "wA3 -wA1", "wA3 wA2/", "wA3 wA2\", "wA3 /wA2", "wA3 -wA2", "wA3 \wA2", "wA3 -wG1", "wA3 \wG1", "wA3 wG3/", "wA3 wG3-", "wA3 /wG3", "wA3 \wG3", "wA3 wS2\", "wA3 -wS2", "wA3 \wS2", "wA3 wL-", "wA3 wL\", "wA3 /wL", "wA3 /bQ", "wA3 bA3-", "wA3 \bA3", "wA3 bG1/", "wA3 bG1-", "wA3 bG1\", "wA3 /bG1", "wA3 -bG1", "wA3 bG2-", "wA3 /bG2", "wA3 -bG2", "wA3 bG3-", "wA3 bG3\", "wA3 /bG3", "wA3 -bG3", "wA3 \bG3", "wA3 -bS1", "wA3 bS2/", "wA3 bS2-", "wA3 bB1/", "wA3 bB1-", "wA3 bB1\", "wA3 bM/", "wA3 /bM", "wA3 -bM", "wA3 \bM", "wA3 bL\", "wA3 -bL", "wA3 \bL", "wG1 wQ-", "wG1 wA1/", "wG1 wA3\", "wG1 \wA3", "wG1 wG3-", "wG1 wL\", "wG1 /bG3", "wG2 wQ-", "wG2 wQ\", "wG2 wA1/", "wG2 -wA1", "wG2 wA2/", "wG2 wA2\", "wG2 /wA2", "wG2 -wA2", "wG2 \wA2", "wG2 wA3/", "wG2 wA3-", "wG2 wA3\", "wG2 \wA3", "wG2 -wG1", "wG2 \wG1", "wG2 wG3/", "wG2 wG3-", "wG2 /wG3", "wG2 \wG3", "wG2 \wS2", "wG2 wL-", "wG2 wL\", "wS1 wQ-", "wS1 wQ\", "wS1 wA1/", "wS1 -wA1", "wS1 wA2/", "wS1 wA2\", "wS1 /wA2", "wS1 -wA2", "wS1 \wA2", "wS1 wA3/", "wS1 wA3-", "wS1 wA3\", "wS1 \wA3", "wS1 -wG1", "wS1 \wG1", "wS1 wG3/", "wS1 wG3-", "wS1 /wG3", "wS1 \wG3", "wS1 \wS2", "wS1 wL-", "wS1 wL\", "wB1 wQ-", "wB1 wQ\", "wB1 wA1/", "wB1 -wA1", "wB1 wA2/", "wB1 wA2\", "wB1 /wA2", "wB1 -wA2", "wB1 \wA2", "wB1 wA3/", "wB1 wA3-", "wB1 wA3\", "wB1 \wA3", "wB1 -wG1", "wB1 \wG1", "wB1 wG3/", "wB1 wG3-", "wB1 /wG3", "wB1 \wG3", "wB1 \wS2", "wB1 wL-", "wB1 wL\", "wB2 wQ-", "wB2 wQ\", "wB2 wA1/", "wB2 -wA1", "wB2 wA2/", "wB2 wA2\", "wB2 /wA2", "wB2 -wA2", "wB2 \wA2", "wB2 wA3/", "wB2 wA3-", "wB2 wA3\", "wB2 \wA3", "wB2 -wG1", "wB2 \wG1", "wB2 wG3/", "wB2 wG3-", "wB2 /wG3", "wB2 \wG3", "wB2 \wS2", "wB2 wL-", "wB2 wL\", "wM wQ-", "wM wQ\", "wM wA1/", "wM -wA1", "wM wA2/", "wM wA2\", "wM /wA2", "wM -wA2", "wM \wA2", "wM wA3/", "wM wA3-", "wM wA3\", "wM \wA3", "wM -wG1", "wM \wG1", "wM wG3/", "wM wG3-", "wM /wG3", "wM \wG3", "wM \wS2", "wM wL-", "wM wL\", "wL wQ-", "wL wQ\", "wL wA1/", "wL -wA1", "wL wA2\", "wL wA3/", "wL wA3-", "wL wA3\", "wL \wA3", "wL -wG1", "wL \wG1", "wL wG3-", "wL /wG3", "wL wS2\", "wL -wS2", "wL \wS2", "wL bA3-", "wL \bA3", "wL bS2/", "wL bM/", "wP wQ-", "wP wQ\", "wP wA1/", "wP -wA1", "wP wA2/", "wP wA2\", "wP /wA2", "wP -wA2", "wP \wA2", "wP wA3/", "wP wA3-", "wP wA3\", "wP \wA3", "wP -wG1", "wP \wG1", "wP wG3/", "wP wG3-", "wP /wG3", "wP \wG3", "wP \wS2", "wP wL-", "wP wL\"] + +# Apply action "wA3 wL\" +action: 674 + +# State 21 +# Base+MLP;InProgress;Black[11];wS2;bA3 /wS2;wG1 wS2/;bS1 /bA3;wQ wG1-;bS2 bS1-;wL wG1\;bQ /bS2;wA3 wQ/;bL /bS1;wA1 -wA3;bG2 bQ\;wG3 \wA1;bG1 bG2\;wA2 \wS2;bM \bS1;wA2 wS2\;bB1 bQ-;wA2 -wG3;bG3 /bL;wA3 wL\ +IsTerminal() = False +History() = [1630, 3391, 840, 4238, 29, 4460, 2382, 2901, 588, 5246, 221, 3824, 1188, 3663, 453, 5052, 450, 4607, 438, 4105, 674] +HistoryString() = "1630, 3391, 840, 4238, 29, 4460, 2382, 2901, 588, 5246, 221, 3824, 1188, 3663, 453, 5052, 450, 4607, 438, 4105, 674" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1630, 3391, 840, 4238, 29, 4460, 2382, 2901, 588, 5246, 221, 3824, 1188, 3663, 453, 5052, 450, 4607, 438, 4105, 674" +InformationStateString(1) = "1630, 3391, 840, 4238, 29, 4460, 2382, 2901, 588, 5246, 221, 3824, 1188, 3663, 453, 5052, 450, 4607, 438, 4105, 674" +ObservationString(0) = "Base+MLP;InProgress;Black[11];wS2;bA3 /wS2;wG1 wS2/;bS1 /bA3;wQ wG1-;bS2 bS1-;wL wG1\\;bQ /bS2;wA3 wQ/;bL /bS1;wA1 -wA3;bG2 bQ\\;wG3 \\wA1;bG1 bG2\\;wA2 \\wS2;bM \\bS1;wA2 wS2\\;bB1 bQ-;wA2 -wG3;bG3 /bL;wA3 wL\\" +ObservationString(1) = "Base+MLP;InProgress;Black[11];wS2;bA3 /wS2;wG1 wS2/;bS1 /bA3;wQ wG1-;bS2 bS1-;wL wG1\\;bQ /bS2;wA3 wQ/;bL /bS1;wA1 -wA3;bG2 bQ\\;wG3 \\wA1;bG1 bG2\\;wA2 \\wS2;bM \\bS1;wA2 wS2\\;bB1 bQ-;wA2 -wG3;bG3 /bL;wA3 wL\\" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◉◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◉◉◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [3041, 3066, 3067, 3068, 3069, 3070, 3074, 3076, 3077, 3081, 3082, 3083, 3084, 3085, 3091, 3102, 3103, 3118, 3119, 3120, 3124, 3126, 3127, 3237, 3262, 3263, 3264, 3265, 3266, 3270, 3272, 3273, 3277, 3278, 3279, 3280, 3281, 3287, 3298, 3299, 3314, 3315, 3316, 3320, 3322, 3323, 3708, 3927, 3963, 4334, 4335, 4438, 4446, 4474, 4475, 4532, 4612, 4634, 4642, 4647, 4663, 4668, 4805, 4830, 4831, 4832, 4833, 4834, 4838, 4840, 4841, 4845, 4846, 4847, 4848, 4849, 4855, 4866, 4867, 4882, 4883, 4884, 4888, 4890, 4891, 4900, 4901, 4902, 4907, 4908, 4911, 4914, 4916, 4917, 4918, 4919, 4921, 4922, 4923, 4924, 4932, 4933, 4942, 4943, 4945, 4947, 4960, 4961, 4985, 5001, 5024, 5026, 5027, 5028, 5029, 5030, 5034, 5036, 5037, 5041, 5042, 5043, 5044, 5045, 5051, 5055, 5061, 5062, 5063, 5084, 5086, 5087, 5393, 5418, 5419, 5420, 5421, 5422, 5426, 5428, 5429, 5433, 5434, 5435, 5436, 5437, 5443, 5454, 5455, 5470, 5471, 5472, 5476, 5478, 5479] +StringLegalActions() = ["bA1 /bQ", "bA1 bG1/", "bA1 bG1-", "bA1 bG1\", "bA1 /bG1", "bA1 -bG1", "bA1 bG2-", "bA1 /bG2", "bA1 -bG2", "bA1 bG3-", "bA1 bG3\", "bA1 /bG3", "bA1 -bG3", "bA1 \bG3", "bA1 -bS1", "bA1 bB1-", "bA1 bB1\", "bA1 /bM", "bA1 -bM", "bA1 \bM", "bA1 bL\", "bA1 -bL", "bA1 \bL", "bA2 /bQ", "bA2 bG1/", "bA2 bG1-", "bA2 bG1\", "bA2 /bG1", "bA2 -bG1", "bA2 bG2-", "bA2 /bG2", "bA2 -bG2", "bA2 bG3-", "bA2 bG3\", "bA2 /bG3", "bA2 -bG3", "bA2 \bG3", "bA2 -bS1", "bA2 bB1-", "bA2 bB1\", "bA2 /bM", "bA2 -bM", "bA2 \bM", "bA2 bL\", "bA2 -bL", "bA2 \bL", "bG1 \bM", "bG3 wA1/", "bG3 wG3-", "bS2 wA3-", "bS2 wA3\", "bS2 bG1/", "bS2 bG2-", "bS2 bB1-", "bS2 bB1\", "bB1 /wA3", "bB1 bQ", "bB1 bG1/", "bB1 bG2-", "bB1 bG2", "bB1 bS2-", "bB1 bS2", "bB2 /bQ", "bB2 bG1/", "bB2 bG1-", "bB2 bG1\", "bB2 /bG1", "bB2 -bG1", "bB2 bG2-", "bB2 /bG2", "bB2 -bG2", "bB2 bG3-", "bB2 bG3\", "bB2 /bG3", "bB2 -bG3", "bB2 \bG3", "bB2 -bS1", "bB2 bB1-", "bB2 bB1\", "bB2 /bM", "bB2 -bM", "bB2 \bM", "bB2 bL\", "bB2 -bL", "bB2 \bL", "bM wQ/", "bM wQ-", "bM wQ\", "bM wA1/", "bM wA1-", "bM -wA1", "bM wA2/", "bM wA2\", "bM /wA2", "bM -wA2", "bM \wA2", "bM wA3/", "bM wA3-", "bM wA3\", "bM /wA3", "bM -wG1", "bM \wG1", "bM wG3/", "bM wG3-", "bM /wG3", "bM \wG3", "bM -wS2", "bM \wS2", "bM wL-", "bM /bQ", "bM \bA3", "bM bG1/", "bM bG1-", "bM bG1\", "bM /bG1", "bM -bG1", "bM bG2-", "bM /bG2", "bM -bG2", "bM bG3-", "bM bG3\", "bM /bG3", "bM -bG3", "bM \bG3", "bM -bS1", "bM bS2-", "bM bB1/", "bM bB1-", "bM bB1\", "bM bL\", "bM -bL", "bM \bL", "bP /bQ", "bP bG1/", "bP bG1-", "bP bG1\", "bP /bG1", "bP -bG1", "bP bG2-", "bP /bG2", "bP -bG2", "bP bG3-", "bP bG3\", "bP /bG3", "bP -bG3", "bP \bG3", "bP -bS1", "bP bB1-", "bP bB1\", "bP /bM", "bP -bM", "bP \bM", "bP bL\", "bP -bL", "bP \bL"] + +# Apply action "bG3 wA1/" +action: 3927 + +# State 22 +# Apply action "wB1 -wG1" +action: 1796 + +# State 23 +# Apply action "bB1 /wA3" +action: 4532 + +# State 24 +# Apply action "wA3 /bG1" +action: 717 + +# State 25 +# Apply action "bL /wL" +action: 5183 + +# State 26 +# Apply action "wM wA2/" +action: 2170 + +# State 27 +# Apply action "bB1 wL\" +action: 4594 + +# State 28 +# Apply action "wA2 \bM" +action: 572 + +# State 29 +# Apply action "bL -bQ" +action: 5198 + +# State 30 +# Apply action "wA2 bQ-" +action: 491 + +# State 31 +# Apply action "bA1 bL\" +action: 3124 + +# State 32 +# Base+MLP;WhiteWins;White[17];wS2;bA3 /wS2;wG1 wS2/;bS1 /bA3;wQ wG1-;bS2 bS1-;wL wG1\;bQ /bS2;wA3 wQ/;bL /bS1;wA1 -wA3;bG2 bQ\;wG3 \wA1;bG1 bG2\;wA2 \wS2;bM \bS1;wA2 wS2\;bB1 bQ-;wA2 -wG3;bG3 /bL;wA3 wL\;bG3 wA1/;wB1 -wG1;bB1 /wA3;wA3 /bG1;bL /wL;wM wA2/;bB1 wL\;wA2 \bM;bL -bQ;wA2 bQ-;bA1 bL\ +IsTerminal() = True +History() = [1630, 3391, 840, 4238, 29, 4460, 2382, 2901, 588, 5246, 221, 3824, 1188, 3663, 453, 5052, 450, 4607, 438, 4105, 674, 3927, 1796, 4532, 717, 5183, 2170, 4594, 572, 5198, 491, 3124] +HistoryString() = "1630, 3391, 840, 4238, 29, 4460, 2382, 2901, 588, 5246, 221, 3824, 1188, 3663, 453, 5052, 450, 4607, 438, 4105, 674, 3927, 1796, 4532, 717, 5183, 2170, 4594, 572, 5198, 491, 3124" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "1630, 3391, 840, 4238, 29, 4460, 2382, 2901, 588, 5246, 221, 3824, 1188, 3663, 453, 5052, 450, 4607, 438, 4105, 674, 3927, 1796, 4532, 717, 5183, 2170, 4594, 572, 5198, 491, 3124" +InformationStateString(1) = "1630, 3391, 840, 4238, 29, 4460, 2382, 2901, 588, 5246, 221, 3824, 1188, 3663, 453, 5052, 450, 4607, 438, 4105, 674, 3927, 1796, 4532, 717, 5183, 2170, 4594, 572, 5198, 491, 3124" +ObservationString(0) = "Base+MLP;WhiteWins;White[17];wS2;bA3 /wS2;wG1 wS2/;bS1 /bA3;wQ wG1-;bS2 bS1-;wL wG1\\;bQ /bS2;wA3 wQ/;bL /bS1;wA1 -wA3;bG2 bQ\\;wG3 \\wA1;bG1 bG2\\;wA2 \\wS2;bM \\bS1;wA2 wS2\\;bB1 bQ-;wA2 -wG3;bG3 /bL;wA3 wL\\;bG3 wA1/;wB1 -wG1;bB1 /wA3;wA3 /bG1;bL /wL;wM wA2/;bB1 wL\\;wA2 \\bM;bL -bQ;wA2 bQ-;bA1 bL\\" +ObservationString(1) = "Base+MLP;WhiteWins;White[17];wS2;bA3 /wS2;wG1 wS2/;bS1 /bA3;wQ wG1-;bS2 bS1-;wL wG1\\;bQ /bS2;wA3 wQ/;bL /bS1;wA1 -wA3;bG2 bQ\\;wG3 \\wA1;bG1 bG2\\;wA2 \\wS2;bM \\bS1;wA2 wS2\\;bB1 bQ-;wA2 -wG3;bG3 /bL;wA3 wL\\;bG3 wA1/;wB1 -wG1;bB1 /wA3;wA3 /bG1;bL /wL;wM wA2/;bB1 wL\\;wA2 \\bM;bL -bQ;wA2 bQ-;bA1 bL\\" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/kriegspiel(board_size=4).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/kriegspiel(board_size=4).txt new file mode 100644 index 0000000..088700a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/kriegspiel(board_size=4).txt @@ -0,0 +1,1510 @@ +game: kriegspiel(board_size=4) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Kriegspiel" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["50_move_rule", "board_size", "fen", "threefold_repetition"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "kriegspiel" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 4674 +PolicyTensorShape() = [4674] +MaxChanceOutcomes() = 0 +GetParameters() = {50_move_rule=True,board_size=4,fen=r1kr/pppp/PPPP/R1KR w - - 0 1,threefold_repetition=True} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = public_repetitions: [3], public_side_to_play: [2], public_irreversible_move_counter: [1], public_illegal: [2], public_capture_type: [3], public_captured_square: [4, 4], public_check_one: [6], public_check_two: [6], public_to_move: [3], public_pawn_tries: [17], private_K_pieces: [4, 4], private_k_pieces: [4, 4], private_Q_pieces: [4, 4], private_q_pieces: [4, 4], private_R_pieces: [4, 4], private_r_pieces: [4, 4], private_B_pieces: [4, 4], private_b_pieces: [4, 4], private_N_pieces: [4, 4], private_n_pieces: [4, 4], private_P_pieces: [4, 4], private_p_pieces: [4, 4], private_empty_pieces: [4, 4], private_unknown_squares: [4, 4], private_left_castling: [2], private_right_castling: [2], private_last_move_from: [4, 4], private_last_move_to: [4, 4], private_last_move_promotion: [6], private_last_move_castle_dir: [3] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 328 +MaxGameLength() = 17695 +ToString() = "kriegspiel(board_size=4)" + +# State 0 +# r1kr/pppp/PPPP/R1KR w - - 0 1 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "" +ObservationString(1) = "" +ObservationTensor(0).public_repetitions: ◉◯◯ +ObservationTensor(0).public_side_to_play: ◯◉ +ObservationTensor(0).public_irreversible_move_counter: ◯ +ObservationTensor(0).public_illegal: ◉◯ +ObservationTensor(0).public_capture_type: ◉◯◯ +ObservationTensor(0).public_captured_square: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_check_one: ◉◯◯◯◯◯ +ObservationTensor(0).public_check_two: ◉◯◯◯◯◯ +ObservationTensor(0).public_to_move: ◯◯◉ +ObservationTensor(0).public_pawn_tries: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◉◯ + ◯◯◉◯ + ◯◯◉◯ + ◯◯◉◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_unknown_squares: ◉◉◯◯ + ◉◉◯◉ + ◉◉◯◯ + ◉◉◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_last_move_from: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_last_move_to: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_last_move_promotion: ◉◯◯◯◯◯ +ObservationTensor(0).private_last_move_castle_dir: ◯◯◉ +ObservationTensor(1).public_repetitions: ◉◯◯ +ObservationTensor(1).public_side_to_play: ◯◉ +ObservationTensor(1).public_irreversible_move_counter: ◯ +ObservationTensor(1).public_illegal: ◉◯ +ObservationTensor(1).public_capture_type: ◉◯◯ +ObservationTensor(1).public_captured_square: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_check_one: ◉◯◯◯◯◯ +ObservationTensor(1).public_check_two: ◉◯◯◯◯◯ +ObservationTensor(1).public_to_move: ◯◯◉ +ObservationTensor(1).public_pawn_tries: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◉◯◯ + ◯◉◯◯ + ◯◉◯◯ + ◯◉◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◉◉ + ◉◯◉◉ + ◯◯◉◉ + ◯◯◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_last_move_from: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_last_move_to: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_last_move_promotion: ◉◯◯◯◯◯ +ObservationTensor(1).private_last_move_castle_dir: ◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [30, 89, 117, 673, 701, 714, 1197, 1257, 1285, 1298, 1841, 1882] +StringLegalActions() = ["a1b1", "a2a3", "a2b3", "b2b3", "b2c3", "b2a3", "c1b1", "c2c3", "c2d3", "c2b3", "d2d3", "d2c3"] + +# Apply action "d2d3" +action: 1841 + +# State 1 +# r1kr/pppp/PPPP/R1KR w - - 0 1 +IsTerminal() = False +History() = [1841] +HistoryString() = "1841" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Illegal move." +ObservationString(1) = "Illegal move." +ObservationTensor(0).public_repetitions: ◉◯◯ +ObservationTensor(0).public_side_to_play: ◯◉ +ObservationTensor(0).public_irreversible_move_counter: ◯ +ObservationTensor(0).public_illegal: ◯◉ +ObservationTensor(0).public_capture_type: ◉◯◯ +ObservationTensor(0).public_captured_square: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_check_one: ◉◯◯◯◯◯ +ObservationTensor(0).public_check_two: ◉◯◯◯◯◯ +ObservationTensor(0).public_to_move: ◯◉◯ +ObservationTensor(0).public_pawn_tries: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◉◯ + ◯◯◉◯ + ◯◯◉◯ + ◯◯◉◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_unknown_squares: ◉◉◯◯ + ◉◉◯◉ + ◉◉◯◯ + ◉◉◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_last_move_from: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_last_move_to: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_last_move_promotion: ◉◯◯◯◯◯ +ObservationTensor(0).private_last_move_castle_dir: ◯◯◉ +ObservationTensor(1).public_repetitions: ◉◯◯ +ObservationTensor(1).public_side_to_play: ◯◉ +ObservationTensor(1).public_irreversible_move_counter: ◯ +ObservationTensor(1).public_illegal: ◯◉ +ObservationTensor(1).public_capture_type: ◉◯◯ +ObservationTensor(1).public_captured_square: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_check_one: ◉◯◯◯◯◯ +ObservationTensor(1).public_check_two: ◉◯◯◯◯◯ +ObservationTensor(1).public_to_move: ◯◉◯ +ObservationTensor(1).public_pawn_tries: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◉◯◯ + ◯◉◯◯ + ◯◉◯◯ + ◯◉◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◯◉◉ + ◉◯◉◉ + ◯◯◉◉ + ◯◯◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_last_move_from: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◉◯◯ +ObservationTensor(1).private_last_move_to: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◉◯ +ObservationTensor(1).private_last_move_promotion: ◉◯◯◯◯◯ +ObservationTensor(1).private_last_move_castle_dir: ◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [30, 89, 117, 673, 701, 714, 1197, 1257, 1285, 1298, 1882] +StringLegalActions() = ["a1b1", "a2a3", "a2b3", "b2b3", "b2c3", "b2a3", "c1b1", "c2c3", "c2d3", "c2b3", "d2c3"] + +# Apply action "a2b3" +action: 117 + +# State 2 +# r1kr/pPpp/1PPP/R1KR b - - 0 1 +IsTerminal() = False +History() = [1841, 117] +HistoryString() = "1841, 117" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Pawn at b3 captured, black's move." +ObservationString(1) = "Pawn at b3 captured, black's move." +ObservationTensor(0).public_repetitions: ◉◯◯ +ObservationTensor(0).public_side_to_play: ◉◯ +ObservationTensor(0).public_irreversible_move_counter: ◯ +ObservationTensor(0).public_illegal: ◉◯ +ObservationTensor(0).public_capture_type: ◯◉◯ +ObservationTensor(0).public_captured_square: ◯◯◯◯ + ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_check_one: ◉◯◯◯◯◯ +ObservationTensor(0).public_check_two: ◉◯◯◯◯◯ +ObservationTensor(0).public_to_move: ◉◯◯ +ObservationTensor(0).public_pawn_tries: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◉◯ + ◯◯◉◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_unknown_squares: ◉◉◯◯ + ◉◉◉◉ + ◉◉◯◯ + ◉◉◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_last_move_from: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_last_move_to: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_last_move_promotion: ◉◯◯◯◯◯ +ObservationTensor(0).private_last_move_castle_dir: ◯◯◉ +ObservationTensor(1).public_repetitions: ◉◯◯ +ObservationTensor(1).public_side_to_play: ◉◯ +ObservationTensor(1).public_irreversible_move_counter: ◯ +ObservationTensor(1).public_illegal: ◉◯ +ObservationTensor(1).public_capture_type: ◯◉◯ +ObservationTensor(1).public_captured_square: ◯◯◯◯ + ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_check_one: ◉◯◯◯◯◯ +ObservationTensor(1).public_check_two: ◉◯◯◯◯◯ +ObservationTensor(1).public_to_move: ◉◯◯ +ObservationTensor(1).public_pawn_tries: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◉◯◯ + ◯◉◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◉◉◉ + ◉◯◯◉ + ◯◯◉◉ + ◯◯◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_last_move_from: ◯◉◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_last_move_to: ◯◯◯◯ + ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_last_move_promotion: ◉◯◯◯◯◯ +ObservationTensor(1).private_last_move_castle_dir: ◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [30, 89, 117, 1197, 1225, 1257, 1285, 1298, 1841, 1882] +StringLegalActions() = ["a4b4", "a3a2", "a3b2", "c4b4", "c4b3", "c3c2", "c3d2", "c3b2", "d3d2", "d3c2"] + +# Apply action "c3d2" +action: 1285 + +# State 3 +# r1kr/pPpp/1PPP/R1KR b - - 0 1 +IsTerminal() = False +History() = [1841, 117, 1285] +HistoryString() = "1841, 117, 1285" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Illegal move." +ObservationString(1) = "Illegal move." +ObservationTensor(0).public_repetitions: ◉◯◯ +ObservationTensor(0).public_side_to_play: ◉◯ +ObservationTensor(0).public_irreversible_move_counter: ◯ +ObservationTensor(0).public_illegal: ◯◉ +ObservationTensor(0).public_capture_type: ◉◯◯ +ObservationTensor(0).public_captured_square: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_check_one: ◉◯◯◯◯◯ +ObservationTensor(0).public_check_two: ◉◯◯◯◯◯ +ObservationTensor(0).public_to_move: ◉◯◯ +ObservationTensor(0).public_pawn_tries: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◉◯ + ◯◯◉◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_unknown_squares: ◉◉◯◯ + ◉◉◉◉ + ◉◉◯◯ + ◉◉◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_last_move_from: ◯◯◯◯ + ◯◯◯◯ + ◯◯◉◯ + ◯◯◯◯ +ObservationTensor(0).private_last_move_to: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◉◯◯ +ObservationTensor(0).private_last_move_promotion: ◉◯◯◯◯◯ +ObservationTensor(0).private_last_move_castle_dir: ◯◯◉ +ObservationTensor(1).public_repetitions: ◉◯◯ +ObservationTensor(1).public_side_to_play: ◉◯ +ObservationTensor(1).public_irreversible_move_counter: ◯ +ObservationTensor(1).public_illegal: ◯◉ +ObservationTensor(1).public_capture_type: ◉◯◯ +ObservationTensor(1).public_captured_square: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_check_one: ◉◯◯◯◯◯ +ObservationTensor(1).public_check_two: ◉◯◯◯◯◯ +ObservationTensor(1).public_to_move: ◉◯◯ +ObservationTensor(1).public_pawn_tries: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◉◯◯ + ◯◉◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◉◉◉ + ◉◯◯◉ + ◯◯◉◉ + ◯◯◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_last_move_from: ◯◉◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_last_move_to: ◯◯◯◯ + ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_last_move_promotion: ◉◯◯◯◯◯ +ObservationTensor(1).private_last_move_castle_dir: ◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [30, 89, 117, 1197, 1225, 1257, 1298, 1841, 1882] +StringLegalActions() = ["a4b4", "a3a2", "a3b2", "c4b4", "c4b3", "c3c2", "c3b2", "d3d2", "d3c2"] + +# Apply action "d3d2" +action: 1841 + +# State 4 +# r1kr/pPpp/1PPP/R1KR b - - 0 1 +IsTerminal() = False +History() = [1841, 117, 1285, 1841] +HistoryString() = "1841, 117, 1285, 1841" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Illegal move." +ObservationString(1) = "Illegal move." +ObservationTensor(0).public_repetitions: ◉◯◯ +ObservationTensor(0).public_side_to_play: ◉◯ +ObservationTensor(0).public_irreversible_move_counter: ◯ +ObservationTensor(0).public_illegal: ◯◉ +ObservationTensor(0).public_capture_type: ◉◯◯ +ObservationTensor(0).public_captured_square: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_check_one: ◉◯◯◯◯◯ +ObservationTensor(0).public_check_two: ◉◯◯◯◯◯ +ObservationTensor(0).public_to_move: ◉◯◯ +ObservationTensor(0).public_pawn_tries: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◉◯ + ◯◯◉◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_unknown_squares: ◉◉◯◯ + ◉◉◉◉ + ◉◉◯◯ + ◉◉◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_last_move_from: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◉◯ +ObservationTensor(0).private_last_move_to: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◉◯◯ +ObservationTensor(0).private_last_move_promotion: ◉◯◯◯◯◯ +ObservationTensor(0).private_last_move_castle_dir: ◯◯◉ +ObservationTensor(1).public_repetitions: ◉◯◯ +ObservationTensor(1).public_side_to_play: ◉◯ +ObservationTensor(1).public_irreversible_move_counter: ◯ +ObservationTensor(1).public_illegal: ◯◉ +ObservationTensor(1).public_capture_type: ◉◯◯ +ObservationTensor(1).public_captured_square: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_check_one: ◉◯◯◯◯◯ +ObservationTensor(1).public_check_two: ◉◯◯◯◯◯ +ObservationTensor(1).public_to_move: ◉◯◯ +ObservationTensor(1).public_pawn_tries: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◉◯◯ + ◯◉◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◉◉◉ + ◉◯◯◉ + ◯◯◉◉ + ◯◯◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_last_move_from: ◯◉◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_last_move_to: ◯◯◯◯ + ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_last_move_promotion: ◉◯◯◯◯◯ +ObservationTensor(1).private_last_move_castle_dir: ◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [30, 89, 117, 1197, 1225, 1257, 1298, 1882] +StringLegalActions() = ["a4b4", "a3a2", "a3b2", "c4b4", "c4b3", "c3c2", "c3b2", "d3c2"] + +# Apply action "c4b4" +action: 1197 + +# State 5 +# rk1r/pPpp/1PPP/R1KR w - - 1 2 +IsTerminal() = False +History() = [1841, 117, 1285, 1841, 1197] +HistoryString() = "1841, 117, 1285, 1841, 1197" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "white's move, 8 pawn tries." +ObservationString(1) = "white's move, 8 pawn tries." +ObservationTensor(0).public_repetitions: ◉◯◯ +ObservationTensor(0).public_side_to_play: ◯◉ +ObservationTensor(0).public_irreversible_move_counter = [0.01] +ObservationTensor(0).public_illegal: ◉◯ +ObservationTensor(0).public_capture_type: ◉◯◯ +ObservationTensor(0).public_captured_square: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_check_one: ◉◯◯◯◯◯ +ObservationTensor(0).public_check_two: ◉◯◯◯◯◯ +ObservationTensor(0).public_to_move: ◯◉◯ +ObservationTensor(0).public_pawn_tries: ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◉◯ + ◯◯◉◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_unknown_squares: ◉◉◯◯ + ◉◉◉◯ + ◉◉◯◉ + ◉◉◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_last_move_from: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).private_last_move_to: ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_last_move_promotion: ◉◯◯◯◯◯ +ObservationTensor(0).private_last_move_castle_dir: ◯◯◉ +ObservationTensor(1).public_repetitions: ◉◯◯ +ObservationTensor(1).public_side_to_play: ◯◉ +ObservationTensor(1).public_irreversible_move_counter = [0.01] +ObservationTensor(1).public_illegal: ◉◯ +ObservationTensor(1).public_capture_type: ◉◯◯ +ObservationTensor(1).public_captured_square: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_check_one: ◉◯◯◯◯◯ +ObservationTensor(1).public_check_two: ◉◯◯◯◯◯ +ObservationTensor(1).public_to_move: ◯◉◯ +ObservationTensor(1).public_pawn_tries: ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◉◯◯ + ◯◉◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◉◉◉ + ◉◯◯◉ + ◯◯◉◉ + ◯◯◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_last_move_from: ◯◉◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_last_move_to: ◯◯◯◯ + ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_last_move_promotion: ◉◯◯◯◯◯ +ObservationTensor(1).private_last_move_castle_dir: ◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [16, 17, 18, 30, 701, 714, 730, 731, 732, 733, 734, 735, 736, 737, 738, 746, 774, 787, 1197, 1257, 1285, 1841, 1882] +StringLegalActions() = ["a1a2", "a1a3", "a1a4", "a1b1", "b2c3", "b2a3", "b3b4r", "b3c4r", "b3a4r", "b3b4b", "b3c4b", "b3a4b", "b3b4n", "b3c4n", "b3a4n", "b3b4q", "b3c4q", "b3a4q", "c1b1", "c2c3", "c2d3", "d2d3", "d2c3"] + +# Apply action "d2c3" +action: 1882 + +# State 6 +# Apply action "b2a1r" +action: 732 + +# State 7 +# Apply action "b4c3" +action: 628 + +# State 8 +# Apply action "b4b3" +action: 600 + +# State 9 +# Apply action "b4b3" +action: 600 + +# State 10 +# Apply action "b4c4" +action: 614 + +# State 11 +# Apply action "d3c2" +action: 1882 + +# State 12 +# Apply action "a3a2" +action: 89 + +# State 13 +# rk1r/pPPp/1PP1/R1KR b - - 0 2 +IsTerminal() = False +History() = [1841, 117, 1285, 1841, 1197, 1882, 732, 628, 600, 600, 614, 1882, 89] +HistoryString() = "1841, 117, 1285, 1841, 1197, 1882, 732, 628, 600, 600, 614, 1882, 89" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Illegal move." +ObservationString(1) = "Illegal move." +ObservationTensor(0).public_repetitions: ◉◯◯ +ObservationTensor(0).public_side_to_play: ◉◯ +ObservationTensor(0).public_irreversible_move_counter: ◯ +ObservationTensor(0).public_illegal: ◯◉ +ObservationTensor(0).public_capture_type: ◉◯◯ +ObservationTensor(0).public_captured_square: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_check_one: ◉◯◯◯◯◯ +ObservationTensor(0).public_check_two: ◉◯◯◯◯◯ +ObservationTensor(0).public_to_move: ◉◯◯ +ObservationTensor(0).public_pawn_tries: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◉◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_unknown_squares: ◉◉◯◯ + ◉◉◉◯ + ◉◉◉◉ + ◉◉◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_last_move_from: ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_last_move_to: ◯◉◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_last_move_promotion: ◉◯◯◯◯◯ +ObservationTensor(0).private_last_move_castle_dir: ◯◯◉ +ObservationTensor(1).public_repetitions: ◉◯◯ +ObservationTensor(1).public_side_to_play: ◉◯ +ObservationTensor(1).public_irreversible_move_counter: ◯ +ObservationTensor(1).public_illegal: ◯◉ +ObservationTensor(1).public_capture_type: ◉◯◯ +ObservationTensor(1).public_captured_square: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_check_one: ◉◯◯◯◯◯ +ObservationTensor(1).public_check_two: ◉◯◯◯◯◯ +ObservationTensor(1).public_to_move: ◉◯◯ +ObservationTensor(1).public_pawn_tries: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◉◉◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◉◉◉ + ◉◯◯◉ + ◯◯◯◉ + ◯◉◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_last_move_from: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◉◯◯ +ObservationTensor(1).private_last_move_to: ◯◯◯◯ + ◯◯◯◯ + ◯◯◉◯ + ◯◯◯◯ +ObservationTensor(1).private_last_move_promotion: ◉◯◯◯◯◯ +ObservationTensor(1).private_last_move_castle_dir: ◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [117, 1781, 1841] +StringLegalActions() = ["a3b2", "d4c4", "d3d2"] + +# Apply action "a3a2" +action: 89 + +# State 14 +# Apply action "d4c4" +action: 1781 + +# State 15 +# Apply action "a3b2" +action: 117 + +# State 16 +# Apply action "a3b2" +action: 117 + +# State 17 +# Apply action "d3d2" +action: 1841 + +# State 18 +# rk1r/pPPp/1PP1/R1KR b - - 0 2 +IsTerminal() = True +History() = [1841, 117, 1285, 1841, 1197, 1882, 732, 628, 600, 600, 614, 1882, 89, 89, 1781, 117, 117, 1841] +HistoryString() = "1841, 117, 1285, 1841, 1197, 1882, 732, 628, 600, 600, 614, 1882, 89, 89, 1781, 117, 117, 1841" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "Illegal move." +ObservationString(1) = "Illegal move." +ObservationTensor(0).public_repetitions: ◉◯◯ +ObservationTensor(0).public_side_to_play: ◉◯ +ObservationTensor(0).public_irreversible_move_counter: ◯ +ObservationTensor(0).public_illegal: ◯◉ +ObservationTensor(0).public_capture_type: ◉◯◯ +ObservationTensor(0).public_captured_square: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).public_check_one: ◉◯◯◯◯◯ +ObservationTensor(0).public_check_two: ◉◯◯◯◯◯ +ObservationTensor(0).public_to_move: ◉◯◯ +ObservationTensor(0).public_pawn_tries: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◉◯ +ObservationTensor(0).private_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_unknown_squares: ◉◉◯◯ + ◉◉◉◯ + ◉◉◉◉ + ◉◉◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_last_move_from: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◉◯ +ObservationTensor(0).private_last_move_to: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◉◯◯ +ObservationTensor(0).private_last_move_promotion: ◉◯◯◯◯◯ +ObservationTensor(0).private_last_move_castle_dir: ◯◯◉ +ObservationTensor(1).public_repetitions: ◉◯◯ +ObservationTensor(1).public_side_to_play: ◉◯ +ObservationTensor(1).public_irreversible_move_counter: ◯ +ObservationTensor(1).public_illegal: ◯◉ +ObservationTensor(1).public_capture_type: ◉◯◯ +ObservationTensor(1).public_captured_square: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).public_check_one: ◉◯◯◯◯◯ +ObservationTensor(1).public_check_two: ◉◯◯◯◯◯ +ObservationTensor(1).public_to_move: ◉◯◯ +ObservationTensor(1).public_pawn_tries: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_R_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_P_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◉◉◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_empty_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_unknown_squares: ◯◉◉◉ + ◉◯◯◉ + ◯◯◯◉ + ◯◉◉◉ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_last_move_from: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◉◯◯ +ObservationTensor(1).private_last_move_to: ◯◯◯◯ + ◯◯◯◯ + ◯◯◉◯ + ◯◯◯◯ +ObservationTensor(1).private_last_move_promotion: ◉◯◯◯◯◯ +ObservationTensor(1).private_last_move_castle_dir: ◯◯◉ +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/kuhn_poker_2p.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/kuhn_poker_2p.txt new file mode 100644 index 0000000..0ca49da --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/kuhn_poker_2p.txt @@ -0,0 +1,221 @@ +game: kuhn_poker(players=2) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Kuhn Poker" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["players"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = True +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "kuhn_poker" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 3 +GetParameters() = {players=2} +NumPlayers() = 2 +MinUtility() = -2.0 +MaxUtility() = 2.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = player: [2], private_card: [3], betting: [3, 2] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 11 +ObservationTensorShape() = player: [2], private_card: [3], pot_contribution: [2] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 7 +MaxGameLength() = 3 +ToString() = "kuhn_poker(players=2)" + +# State 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "" +InformationStateString(1) = "" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◯◯ +InformationStateTensor(0).betting: ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◯ +InformationStateTensor(1).betting: ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "" +ObservationString(1) = "" +PublicObservationString() = "start game" +PrivateObservationString(0) = "" +PrivateObservationString(1) = "" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◯◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉ +ChanceOutcomes() = [(0,0.333333), (1,0.333333), (2,0.333333)] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["Deal:0", "Deal:1", "Deal:2"] + +# Apply action "Deal:1" +action: 1 + +# State 1 +# 1 +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "1" +InformationStateString(1) = "" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◉◯ +InformationStateTensor(0).betting: ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◯ +InformationStateTensor(1).betting: ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "111" +ObservationString(1) = "" +PublicObservationString() = "Deal to player 0" +PrivateObservationString(0) = "Received card 1" +PrivateObservationString(1) = "" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◉◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉ +ChanceOutcomes() = [(0,0.5), (2,0.5)] +LegalActions() = [0, 2] +StringLegalActions() = ["Deal:0", "Deal:2"] + +# Apply action "Deal:2" +action: 2 + +# State 2 +# 1 2 +IsTerminal() = False +History() = [1, 2] +HistoryString() = "1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1" +InformationStateString(1) = "2" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◉◯ +InformationStateTensor(0).betting: ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◉ +InformationStateTensor(1).betting: ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "111" +ObservationString(1) = "211" +PublicObservationString() = "Deal to player 1" +PrivateObservationString(0) = "" +PrivateObservationString(1) = "Received card 2" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◉◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◉ +ObservationTensor(1).pot_contribution: ◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["Pass", "Bet"] + +# Apply action "Pass" +action: 0 + +# State 3 +# 1 2 p +IsTerminal() = False +History() = [1, 2, 0] +HistoryString() = "1, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1p" +InformationStateString(1) = "2p" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◉◯ +InformationStateTensor(0).betting: ◉◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◉ +InformationStateTensor(1).betting: ◉◯ + ◯◯ + ◯◯ +ObservationString(0) = "111" +ObservationString(1) = "211" +PublicObservationString() = "Pass" +PrivateObservationString(0) = "" +PrivateObservationString(1) = "" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◉◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◉ +ObservationTensor(1).pot_contribution: ◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["Pass", "Bet"] + +# Apply action "Pass" +action: 0 + +# State 4 +# 1 2 pp +IsTerminal() = True +History() = [1, 2, 0, 0] +HistoryString() = "1, 2, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "1pp" +InformationStateString(1) = "2pp" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◉◯ +InformationStateTensor(0).betting: ◉◯ + ◉◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◉ +InformationStateTensor(1).betting: ◉◯ + ◉◯ + ◯◯ +ObservationString(0) = "111" +ObservationString(1) = "211" +PublicObservationString() = "Pass" +PrivateObservationString(0) = "" +PrivateObservationString(1) = "" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◉◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◉ +ObservationTensor(1).pot_contribution: ◉◉ +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/kuhn_poker_3p.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/kuhn_poker_3p.txt new file mode 100644 index 0000000..7d187d0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/kuhn_poker_3p.txt @@ -0,0 +1,478 @@ +game: kuhn_poker(players=3) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Kuhn Poker" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["players"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = True +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "kuhn_poker" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 4 +GetParameters() = {players=3} +NumPlayers() = 3 +MinUtility() = -2.0 +MaxUtility() = 4.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = player: [3], private_card: [4], betting: [5, 2] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 17 +ObservationTensorShape() = player: [3], private_card: [4], pot_contribution: [3] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 10 +MaxGameLength() = 5 +ToString() = "kuhn_poker(players=3)" + +# State 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "" +InformationStateString(1) = "" +InformationStateString(2) = "" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯ +InformationStateTensor(0).betting: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◯◯ +InformationStateTensor(1).betting: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◯ +InformationStateTensor(2).betting: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "" +ObservationString(1) = "" +ObservationString(2) = "" +PublicObservationString() = "start game" +PrivateObservationString(0) = "" +PrivateObservationString(1) = "" +PrivateObservationString(2) = "" +ObservationTensor(0).player: ◉◯◯ +ObservationTensor(0).private_card: ◯◯◯◯ +ObservationTensor(0).pot_contribution: ◉◉◉ +ObservationTensor(1).player: ◯◉◯ +ObservationTensor(1).private_card: ◯◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉◉ +ObservationTensor(2).player: ◯◯◉ +ObservationTensor(2).private_card: ◯◯◯◯ +ObservationTensor(2).pot_contribution: ◉◉◉ +ChanceOutcomes() = [(0,0.25), (1,0.25), (2,0.25), (3,0.25)] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Deal:0", "Deal:1", "Deal:2", "Deal:3"] + +# Apply action "Deal:1" +action: 1 + +# State 1 +# 1 +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "1" +InformationStateString(1) = "" +InformationStateString(2) = "" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◉◯◯ +InformationStateTensor(0).betting: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◯◯ +InformationStateTensor(1).betting: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◯ +InformationStateTensor(2).betting: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "1111" +ObservationString(1) = "" +ObservationString(2) = "" +PublicObservationString() = "Deal to player 0" +PrivateObservationString(0) = "Received card 1" +PrivateObservationString(1) = "" +PrivateObservationString(2) = "" +ObservationTensor(0).player: ◉◯◯ +ObservationTensor(0).private_card: ◯◉◯◯ +ObservationTensor(0).pot_contribution: ◉◉◉ +ObservationTensor(1).player: ◯◉◯ +ObservationTensor(1).private_card: ◯◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉◉ +ObservationTensor(2).player: ◯◯◉ +ObservationTensor(2).private_card: ◯◯◯◯ +ObservationTensor(2).pot_contribution: ◉◉◉ +ChanceOutcomes() = [(0,0.333333), (2,0.333333), (3,0.333333)] +LegalActions() = [0, 2, 3] +StringLegalActions() = ["Deal:0", "Deal:2", "Deal:3"] + +# Apply action "Deal:3" +action: 3 + +# State 2 +# Apply action "Deal:2" +action: 2 + +# State 3 +# 1 3 2 +IsTerminal() = False +History() = [1, 3, 2] +HistoryString() = "1, 3, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1" +InformationStateString(1) = "3" +InformationStateString(2) = "2" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◉◯◯ +InformationStateTensor(0).betting: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◯◉ +InformationStateTensor(1).betting: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◉◯ +InformationStateTensor(2).betting: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "1111" +ObservationString(1) = "3111" +ObservationString(2) = "2111" +PublicObservationString() = "Deal to player 2" +PrivateObservationString(0) = "" +PrivateObservationString(1) = "" +PrivateObservationString(2) = "Received card 2" +ObservationTensor(0).player: ◉◯◯ +ObservationTensor(0).private_card: ◯◉◯◯ +ObservationTensor(0).pot_contribution: ◉◉◉ +ObservationTensor(1).player: ◯◉◯ +ObservationTensor(1).private_card: ◯◯◯◉ +ObservationTensor(1).pot_contribution: ◉◉◉ +ObservationTensor(2).player: ◯◯◉ +ObservationTensor(2).private_card: ◯◯◉◯ +ObservationTensor(2).pot_contribution: ◉◉◉ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["Pass", "Bet"] + +# Apply action "Pass" +action: 0 + +# State 4 +# 1 3 2 p +IsTerminal() = False +History() = [1, 3, 2, 0] +HistoryString() = "1, 3, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1p" +InformationStateString(1) = "3p" +InformationStateString(2) = "2p" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◉◯◯ +InformationStateTensor(0).betting: ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◯◉ +InformationStateTensor(1).betting: ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◉◯ +InformationStateTensor(2).betting: ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "1111" +ObservationString(1) = "3111" +ObservationString(2) = "2111" +PublicObservationString() = "Pass" +PrivateObservationString(0) = "" +PrivateObservationString(1) = "" +PrivateObservationString(2) = "" +ObservationTensor(0).player: ◉◯◯ +ObservationTensor(0).private_card: ◯◉◯◯ +ObservationTensor(0).pot_contribution: ◉◉◉ +ObservationTensor(1).player: ◯◉◯ +ObservationTensor(1).private_card: ◯◯◯◉ +ObservationTensor(1).pot_contribution: ◉◉◉ +ObservationTensor(2).player: ◯◯◉ +ObservationTensor(2).private_card: ◯◯◉◯ +ObservationTensor(2).pot_contribution: ◉◉◉ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["Pass", "Bet"] + +# Apply action "Pass" +action: 0 + +# State 5 +# 1 3 2 pp +IsTerminal() = False +History() = [1, 3, 2, 0, 0] +HistoryString() = "1, 3, 2, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "1pp" +InformationStateString(1) = "3pp" +InformationStateString(2) = "2pp" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◉◯◯ +InformationStateTensor(0).betting: ◉◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◯◉ +InformationStateTensor(1).betting: ◉◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◉◯ +InformationStateTensor(2).betting: ◉◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "1111" +ObservationString(1) = "3111" +ObservationString(2) = "2111" +PublicObservationString() = "Pass" +PrivateObservationString(0) = "" +PrivateObservationString(1) = "" +PrivateObservationString(2) = "" +ObservationTensor(0).player: ◉◯◯ +ObservationTensor(0).private_card: ◯◉◯◯ +ObservationTensor(0).pot_contribution: ◉◉◉ +ObservationTensor(1).player: ◯◉◯ +ObservationTensor(1).private_card: ◯◯◯◉ +ObservationTensor(1).pot_contribution: ◉◉◉ +ObservationTensor(2).player: ◯◯◉ +ObservationTensor(2).private_card: ◯◯◉◯ +ObservationTensor(2).pot_contribution: ◉◉◉ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["Pass", "Bet"] + +# Apply action "Bet" +action: 1 + +# State 6 +# 1 3 2 ppb +IsTerminal() = False +History() = [1, 3, 2, 0, 0, 1] +HistoryString() = "1, 3, 2, 0, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1ppb" +InformationStateString(1) = "3ppb" +InformationStateString(2) = "2ppb" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◉◯◯ +InformationStateTensor(0).betting: ◉◯ + ◉◯ + ◯◉ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◯◉ +InformationStateTensor(1).betting: ◉◯ + ◉◯ + ◯◉ + ◯◯ + ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◉◯ +InformationStateTensor(2).betting: ◉◯ + ◉◯ + ◯◉ + ◯◯ + ◯◯ +ObservationString(0) = "1112" +ObservationString(1) = "3112" +ObservationString(2) = "2112" +PublicObservationString() = "Bet" +PrivateObservationString(0) = "" +PrivateObservationString(1) = "" +PrivateObservationString(2) = "" +ObservationTensor(0).player: ◉◯◯ +ObservationTensor(0).private_card: ◯◉◯◯ +ObservationTensor(0).pot_contribution = [1.0, 1.0, 2.0] +ObservationTensor(1).player: ◯◉◯ +ObservationTensor(1).private_card: ◯◯◯◉ +ObservationTensor(1).pot_contribution = [1.0, 1.0, 2.0] +ObservationTensor(2).player: ◯◯◉ +ObservationTensor(2).private_card: ◯◯◉◯ +ObservationTensor(2).pot_contribution = [1.0, 1.0, 2.0] +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["Pass", "Bet"] + +# Apply action "Bet" +action: 1 + +# State 7 +# 1 3 2 ppbb +IsTerminal() = False +History() = [1, 3, 2, 0, 0, 1, 1] +HistoryString() = "1, 3, 2, 0, 0, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1ppbb" +InformationStateString(1) = "3ppbb" +InformationStateString(2) = "2ppbb" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◉◯◯ +InformationStateTensor(0).betting: ◉◯ + ◉◯ + ◯◉ + ◯◉ + ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◯◉ +InformationStateTensor(1).betting: ◉◯ + ◉◯ + ◯◉ + ◯◉ + ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◉◯ +InformationStateTensor(2).betting: ◉◯ + ◉◯ + ◯◉ + ◯◉ + ◯◯ +ObservationString(0) = "1212" +ObservationString(1) = "3212" +ObservationString(2) = "2212" +PublicObservationString() = "Bet" +PrivateObservationString(0) = "" +PrivateObservationString(1) = "" +PrivateObservationString(2) = "" +ObservationTensor(0).player: ◉◯◯ +ObservationTensor(0).private_card: ◯◉◯◯ +ObservationTensor(0).pot_contribution = [2.0, 1.0, 2.0] +ObservationTensor(1).player: ◯◉◯ +ObservationTensor(1).private_card: ◯◯◯◉ +ObservationTensor(1).pot_contribution = [2.0, 1.0, 2.0] +ObservationTensor(2).player: ◯◯◉ +ObservationTensor(2).private_card: ◯◯◉◯ +ObservationTensor(2).pot_contribution = [2.0, 1.0, 2.0] +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["Pass", "Bet"] + +# Apply action "Pass" +action: 0 + +# State 8 +# 1 3 2 ppbbp +IsTerminal() = True +History() = [1, 3, 2, 0, 0, 1, 1, 0] +HistoryString() = "1, 3, 2, 0, 0, 1, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "1ppbbp" +InformationStateString(1) = "3ppbbp" +InformationStateString(2) = "2ppbbp" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◉◯◯ +InformationStateTensor(0).betting: ◉◯ + ◉◯ + ◯◉ + ◯◉ + ◉◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◯◉ +InformationStateTensor(1).betting: ◉◯ + ◉◯ + ◯◉ + ◯◉ + ◉◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◉◯ +InformationStateTensor(2).betting: ◉◯ + ◉◯ + ◯◉ + ◯◉ + ◉◯ +ObservationString(0) = "1212" +ObservationString(1) = "3212" +ObservationString(2) = "2212" +PublicObservationString() = "Pass" +PrivateObservationString(0) = "" +PrivateObservationString(1) = "" +PrivateObservationString(2) = "" +ObservationTensor(0).player: ◉◯◯ +ObservationTensor(0).private_card: ◯◉◯◯ +ObservationTensor(0).pot_contribution = [2.0, 1.0, 2.0] +ObservationTensor(1).player: ◯◉◯ +ObservationTensor(1).private_card: ◯◯◯◉ +ObservationTensor(1).pot_contribution = [2.0, 1.0, 2.0] +ObservationTensor(2).player: ◯◯◉ +ObservationTensor(2).private_card: ◯◯◉◯ +ObservationTensor(2).pot_contribution = [2.0, 1.0, 2.0] +Rewards() = [-2, -1, 3] +Returns() = [-2, -1, 3] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/laser_tag(fully_obs=false,horizon=20).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/laser_tag(fully_obs=false,horizon=20).txt new file mode 100644 index 0000000..9e9559b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/laser_tag(fully_obs=false,horizon=20).txt @@ -0,0 +1,647 @@ +game: laser_tag(fully_obs=false,horizon=20) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Laser Tag" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["fully_obs", "grid", "horizon", "obs_back", "obs_front", "obs_side", "zero_sum"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "laser_tag" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 10 +PolicyTensorShape() = [10] +MaxChanceOutcomes() = 6 +GetParameters() = {fully_obs=False,grid=S.....S\n.......\n..*.*..\n.**.**.\n..*.*..\n.......\nS.....S,horizon=20,obs_back=2,obs_front=17,obs_side=10,zero_sum=False} +NumPlayers() = 2 +MinUtility() = -20.0 +MaxUtility() = 20.0 +UtilitySum() = None +ObservationTensorShape() = [4, 20, 21] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 1680 +MaxGameLength() = 20 +ToString() = "laser_tag(fully_obs=False,horizon=20)" + +# State 0 +# ....... +# ....... +# ..*.*.. +# .**.**. +# ..*.*.. +# ....... +# ....... +# Orientations: 1 1 +# Chance Node +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n***.......***********\n***.......***********\n***..*.*..***********\n***.**.**.***********\n***..*.*..***********\n***.......***********\n***.......***********\n*********************\n*********************\n*********************\nOrientations: -1 -1\nChance Node" +ObservationString(1) = "*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n***.......***********\n***.......***********\n***..*.*..***********\n***.**.**.***********\n***..*.*..***********\n***.......***********\n***.......***********\n*********************\n*********************\n*********************\nOrientations: -1 -1\nChance Node" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◯◉◯◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◯◉◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◯◉◉◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◯◉◯◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◯◉◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◯◉◯◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◯◉◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◯◉◉◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◯◉◯◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◯◉◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +ChanceOutcomes() = [(2,0.25), (3,0.25), (4,0.25), (5,0.25)] +LegalActions() = [2, 3, 4, 5] +StringLegalActions() = ["(spawned at location #0)", "(spawned at location #1)", "(spawned at location #2)", "(spawned at location #3)"] + +# Apply action "(spawned at location #3)" +action: 5 + +# State 1 +# ....... +# ....... +# ..*.*.. +# .**.**. +# ..*.*.. +# ....... +# ......B +# Orientations: 1 1 +# Chance Node +IsTerminal() = False +History() = [5] +HistoryString() = "5" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n***B......***********\n***.......***********\n***..*.*..***********\n***.**.**.***********\n***..*.*..***********\n***.......***********\n***.......***********\n*********************\n*********************\n*********************\nOrientations: -1 1\nChance Node" +ObservationString(1) = "*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n**********B......****\n**********.......****\n**********..*.*..****\nOrientations: -1 1\nChance Node" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◯◉◯◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◯◉◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◉◉◯◉◉◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◯◉◯◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◯◉◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◉◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◉◯◉◯◉◉◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◯◯◉◯◉◯◯◉◉◉◉ +ChanceOutcomes() = [(2,0.333333), (3,0.333333), (4,0.333333)] +LegalActions() = [2, 3, 4] +StringLegalActions() = ["(spawned at location #0)", "(spawned at location #1)", "(spawned at location #2)"] + +# Apply action "(spawned at location #2)" +action: 4 + +# State 2 +# ....... +# ....... +# ..*.*.. +# .**.**. +# ..*.*.. +# ....... +# A.....B +# Orientations: 1 1 +IsTerminal() = False +History() = [5, 4] +HistoryString() = "5, 4" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n****B.....A**********\n****.......**********\n****..*.*..**********\nOrientations: 1 1\n" +ObservationString(1) = "*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n**********B.....A****\n**********.......****\n**********..*.*..****\nOrientations: 1 1\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◉◯◉◯◉◉◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◉◯◉◯◯◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◉◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◉◯◉◯◉◉◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◯◯◉◯◉◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +LegalActions(1) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +StringLegalActions(0) = ["left turn", "right turn", "move forward", "move backward", "step left", "step right", "stand", "step forward and left turn", "step forward and right turn", "fire"] +StringLegalActions(1) = ["left turn", "right turn", "move forward", "move backward", "step left", "step right", "stand", "step forward and left turn", "step forward and right turn", "fire"] + +# Apply joint action ["step right", "stand"] +actions: [5, 6] + +# State 3 +# Apply action "(B's action first)" +action: 1 + +# State 4 +# ....... +# ....... +# ..*.*.. +# .**.**. +# ..*.*.. +# ....... +# A.....B +# Orientations: 1 1 +IsTerminal() = False +History() = [5, 4, 5, 6, 1] +HistoryString() = "5, 4, 5, 6, 1" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n****B.....A**********\n****.......**********\n****..*.*..**********\nOrientations: 1 1\n" +ObservationString(1) = "*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n**********B.....A****\n**********.......****\n**********..*.*..****\nOrientations: 1 1\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◉◯◉◯◉◉◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◉◯◉◯◯◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◉◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◉◯◉◯◉◉◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◯◯◉◯◉◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +LegalActions(1) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +StringLegalActions(0) = ["left turn", "right turn", "move forward", "move backward", "step left", "step right", "stand", "step forward and left turn", "step forward and right turn", "fire"] +StringLegalActions(1) = ["left turn", "right turn", "move forward", "move backward", "step left", "step right", "stand", "step forward and left turn", "step forward and right turn", "fire"] + +# Apply joint action ["move backward", "step forward and right turn"] +actions: [3, 8] + +# State 5 +# Apply action "(B's action first)" +action: 1 + +# State 6 +# ....... +# ....... +# ..*.*.. +# .**.**. +# ..*.*.. +# A...... +# ......B +# Orientations: 1 1 +IsTerminal() = False +History() = [5, 4, 5, 6, 1, 3, 8, 1] +HistoryString() = "5, 4, 5, 6, 1, 3, 8, 1" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n****B......**********\n****......A**********\n****..*.*..**********\n****.**.**.**********\nOrientations: 1 1\n" +ObservationString(1) = "*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n**********B......****\n**********......A****\n**********..*.*..****\nOrientations: 1 1\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◉◯◉◯◉◉◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◉◯◉◯◯◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◉◉◯◉◉◯◉◉◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◉◯◉◯◉◉◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◯◯◉◯◉◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +LegalActions(1) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +StringLegalActions(0) = ["left turn", "right turn", "move forward", "move backward", "step left", "step right", "stand", "step forward and left turn", "step forward and right turn", "fire"] +StringLegalActions(1) = ["left turn", "right turn", "move forward", "move backward", "step left", "step right", "stand", "step forward and left turn", "step forward and right turn", "fire"] + +# Apply joint action ["stand", "step forward and left turn"] +actions: [6, 7] + +# State 7 +# Apply action "(A's action first)" +action: 0 + +# State 8 +# Apply joint action ["step left", "step forward and right turn"] +actions: [4, 8] + +# State 9 +# Apply action "(B's action first)" +action: 1 + +# State 10 +# Apply joint action ["step right", "step forward and right turn"] +actions: [5, 8] + +# State 11 +# Apply action "(A's action first)" +action: 0 + +# State 12 +# Apply joint action ["move forward", "step left"] +actions: [2, 4] + +# State 13 +# Apply action "(B's action first)" +action: 1 + +# State 14 +# Apply joint action ["step left", "left turn"] +actions: [4, 0] + +# State 15 +# Apply action "(A's action first)" +action: 0 + +# State 16 +# Apply joint action ["step forward and right turn", "stand"] +actions: [8, 6] + +# State 17 +# Apply action "(A's action first)" +action: 0 + +# State 18 +# Apply joint action ["step forward and right turn", "right turn"] +actions: [8, 1] + +# State 19 +# Apply action "(B's action first)" +action: 1 + +# State 20 +# Apply joint action ["fire", "move forward"] +actions: [9, 2] + +# State 21 +# Apply action "(A's action first)" +action: 0 + +# State 22 +# ....... +# ....... +# ..*.*.. +# .**.**. +# ..*.*.. +# ....... +# .A....B +# Orientations: 1 1 +IsTerminal() = False +History() = [5, 4, 5, 6, 1, 3, 8, 1, 6, 7, 0, 4, 8, 1, 5, 8, 0, 2, 4, 1, 4, 0, 0, 8, 6, 0, 8, 1, 1, 9, 2, 0] +HistoryString() = "5, 4, 5, 6, 1, 3, 8, 1, 6, 7, 0, 4, 8, 1, 5, 8, 0, 2, 4, 1, 4, 0, 0, 8, 6, 0, 8, 1, 1, 9, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*****B....A.*********\n*****.......*********\n*****..*.*..*********\nOrientations: 1 1\n" +ObservationString(1) = "*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n**********B....A.****\n**********.......****\n**********..*.*..****\nOrientations: 1 1\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◉◉◉◯◉◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◉◯◉◯◉◉◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◉◯◉◯◯◉◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◉◉◉◯◉◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◉◉◉◉◉◉◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◉◯◉◯◉◉◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◯◯◉◯◉◯◯◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +LegalActions(1) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +StringLegalActions(0) = ["left turn", "right turn", "move forward", "move backward", "step left", "step right", "stand", "step forward and left turn", "step forward and right turn", "fire"] +StringLegalActions(1) = ["left turn", "right turn", "move forward", "move backward", "step left", "step right", "stand", "step forward and left turn", "step forward and right turn", "fire"] + +# Apply joint action ["right turn", "left turn"] +actions: [1, 0] + +# State 23 +# Apply action "(B's action first)" +action: 1 + +# State 24 +# Apply joint action ["stand", "step forward and left turn"] +actions: [6, 7] + +# State 25 +# Apply action "(A's action first)" +action: 0 + +# State 26 +# Apply joint action ["step right", "step forward and left turn"] +actions: [5, 7] + +# State 27 +# Apply action "(A's action first)" +action: 0 + +# State 28 +# Apply joint action ["move backward", "stand"] +actions: [3, 6] + +# State 29 +# Apply action "(A's action first)" +action: 0 + +# State 30 +# Apply joint action ["move backward", "step forward and left turn"] +actions: [3, 7] + +# State 31 +# Apply action "(A's action first)" +action: 0 + +# State 32 +# Apply joint action ["left turn", "fire"] +actions: [0, 9] + +# State 33 +# Apply action "(B's action first)" +action: 1 + +# State 34 +# Apply joint action ["move forward", "left turn"] +actions: [2, 0] + +# State 35 +# Apply action "(B's action first)" +action: 1 + +# State 36 +# Apply joint action ["step forward and right turn", "right turn"] +actions: [8, 1] + +# State 37 +# Apply action "(B's action first)" +action: 1 + +# State 38 +# Apply joint action ["step right", "move backward"] +actions: [5, 3] + +# State 39 +# Apply action "(B's action first)" +action: 1 + +# State 40 +# Apply joint action ["fire", "stand"] +actions: [9, 6] + +# State 41 +# Apply action "(A's action first)" +action: 0 + +# State 42 +# ....... +# ....... +# ..*.*.. +# .**.**. +# ..*.*.. +# ....... +# ..A..B. +# Orientations: 1 2 +IsTerminal() = True +History() = [5, 4, 5, 6, 1, 3, 8, 1, 6, 7, 0, 4, 8, 1, 5, 8, 0, 2, 4, 1, 4, 0, 0, 8, 6, 0, 8, 1, 1, 9, 2, 0, 1, 0, 1, 6, 7, 0, 5, 7, 0, 3, 6, 0, 3, 7, 0, 0, 9, 1, 2, 0, 1, 8, 1, 1, 5, 3, 1, 9, 6, 0] +HistoryString() = "5, 4, 5, 6, 1, 3, 8, 1, 6, 7, 0, 4, 8, 1, 5, 8, 0, 2, 4, 1, 4, 0, 0, 8, 6, 0, 8, 1, 1, 9, 2, 0, 1, 0, 1, 6, 7, 0, 5, 7, 0, 3, 6, 0, 3, 7, 0, 0, 9, 1, 2, 0, 1, 8, 1, 1, 5, 3, 1, 9, 6, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n******.B..A..********\n******.......********\n******..*.*..********\nOrientations: 1 2\n" +ObservationString(1) = "*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n*********************\n****.......**********\n****...*..B**********\n****..***..**********\n****.......**********\nOrientations: -1 2\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◉◉◯◉◉◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◉◯◉◯◉◉◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯◯◉◯◉◯◯◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◉◉◯◉◉◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◉◯◯◯◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◉◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/laser_tag(horizon=20).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/laser_tag(horizon=20).txt new file mode 100644 index 0000000..71e2d9a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/laser_tag(horizon=20).txt @@ -0,0 +1,465 @@ +game: laser_tag(horizon=20) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Laser Tag" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["fully_obs", "grid", "horizon", "obs_back", "obs_front", "obs_side", "zero_sum"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "laser_tag" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 10 +PolicyTensorShape() = [10] +MaxChanceOutcomes() = 6 +GetParameters() = {fully_obs=True,grid=S.....S\n.......\n..*.*..\n.**.**.\n..*.*..\n.......\nS.....S,horizon=20,obs_back=2,obs_front=17,obs_side=10,zero_sum=False} +NumPlayers() = 2 +MinUtility() = -20.0 +MaxUtility() = 20.0 +UtilitySum() = None +ObservationTensorShape() = [4, 7, 7] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 196 +MaxGameLength() = 20 +ToString() = "laser_tag(horizon=20)" + +# State 0 +# ....... +# ....... +# ..*.*.. +# .**.**. +# ..*.*.. +# ....... +# ....... +# Orientations: 1 1 +# Chance Node +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = ".......\n.......\n..*.*..\n.**.**.\n..*.*..\n.......\n.......\nOrientations: 1 1\nChance Node" +ObservationString(1) = ".......\n.......\n..*.*..\n.**.**.\n..*.*..\n.......\n.......\nOrientations: 1 1\nChance Node" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +ChanceOutcomes() = [(2,0.25), (3,0.25), (4,0.25), (5,0.25)] +LegalActions() = [2, 3, 4, 5] +StringLegalActions() = ["(spawned at location #0)", "(spawned at location #1)", "(spawned at location #2)", "(spawned at location #3)"] + +# Apply action "(spawned at location #1)" +action: 3 + +# State 1 +# ......B +# ....... +# ..*.*.. +# .**.**. +# ..*.*.. +# ....... +# ....... +# Orientations: 1 1 +# Chance Node +IsTerminal() = False +History() = [3] +HistoryString() = "3" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "......B\n.......\n..*.*..\n.**.**.\n..*.*..\n.......\n.......\nOrientations: 1 1\nChance Node" +ObservationString(1) = "......B\n.......\n..*.*..\n.**.**.\n..*.*..\n.......\n.......\nOrientations: 1 1\nChance Node" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉ ◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉ ◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +ChanceOutcomes() = [(2,0.333333), (4,0.333333), (5,0.333333)] +LegalActions() = [2, 4, 5] +StringLegalActions() = ["(spawned at location #0)", "(spawned at location #2)", "(spawned at location #3)"] + +# Apply action "(spawned at location #3)" +action: 5 + +# State 2 +# ......B +# ....... +# ..*.*.. +# .**.**. +# ..*.*.. +# ....... +# ......A +# Orientations: 1 1 +IsTerminal() = False +History() = [3, 5] +HistoryString() = "3, 5" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "......B\n.......\n..*.*..\n.**.**.\n..*.*..\n.......\n......A\nOrientations: 1 1\n" +ObservationString(1) = "......B\n.......\n..*.*..\n.**.**.\n..*.*..\n.......\n......A\nOrientations: 1 1\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉ ◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉ ◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +LegalActions(1) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +StringLegalActions(0) = ["left turn", "right turn", "move forward", "move backward", "step left", "step right", "stand", "step forward and left turn", "step forward and right turn", "fire"] +StringLegalActions(1) = ["left turn", "right turn", "move forward", "move backward", "step left", "step right", "stand", "step forward and left turn", "step forward and right turn", "fire"] + +# Apply joint action ["step forward and left turn", "step forward and right turn"] +actions: [7, 8] + +# State 3 +# Apply action "(A's action first)" +action: 0 + +# State 4 +# ....... +# ......B +# ..*.*.. +# .**.**. +# ..*.*.. +# ....... +# ......A +# Orientations: 1 3 +IsTerminal() = False +History() = [3, 5, 7, 8, 0] +HistoryString() = "3, 5, 7, 8, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = ".......\n......B\n..*.*..\n.**.**.\n..*.*..\n.......\n......A\nOrientations: 1 3\n" +ObservationString(1) = ".......\n......B\n..*.*..\n.**.**.\n..*.*..\n.......\n......A\nOrientations: 1 3\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉ ◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉ ◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +LegalActions(1) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +StringLegalActions(0) = ["left turn", "right turn", "move forward", "move backward", "step left", "step right", "stand", "step forward and left turn", "step forward and right turn", "fire"] +StringLegalActions(1) = ["left turn", "right turn", "move forward", "move backward", "step left", "step right", "stand", "step forward and left turn", "step forward and right turn", "fire"] + +# Apply joint action ["move backward", "step left"] +actions: [3, 4] + +# State 5 +# Apply action "(B's action first)" +action: 1 + +# State 6 +# ....... +# ....... +# ..*.*.B +# .**.**. +# ..*.*.. +# ......A +# ....... +# Orientations: 1 3 +IsTerminal() = False +History() = [3, 5, 7, 8, 0, 3, 4, 1] +HistoryString() = "3, 5, 7, 8, 0, 3, 4, 1" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = ".......\n.......\n..*.*.B\n.**.**.\n..*.*..\n......A\n.......\nOrientations: 1 3\n" +ObservationString(1) = ".......\n.......\n..*.*.B\n.**.**.\n..*.*..\n......A\n.......\nOrientations: 1 3\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉ ◉◉◯◉◯◉◯ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉ ◉◉◯◉◯◉◯ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +LegalActions(1) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +StringLegalActions(0) = ["left turn", "right turn", "move forward", "move backward", "step left", "step right", "stand", "step forward and left turn", "step forward and right turn", "fire"] +StringLegalActions(1) = ["left turn", "right turn", "move forward", "move backward", "step left", "step right", "stand", "step forward and left turn", "step forward and right turn", "fire"] + +# Apply joint action ["step right", "fire"] +actions: [5, 9] + +# State 7 +# Apply action "(B's action first)" +action: 1 + +# State 8 +# Apply joint action ["fire", "step right"] +actions: [9, 5] + +# State 9 +# Apply action "(A's action first)" +action: 0 + +# State 10 +# Apply joint action ["move backward", "move forward"] +actions: [3, 2] + +# State 11 +# Apply action "(B's action first)" +action: 1 + +# State 12 +# Apply joint action ["move forward", "step left"] +actions: [2, 4] + +# State 13 +# Apply action "(A's action first)" +action: 0 + +# State 14 +# Apply joint action ["move forward", "step left"] +actions: [2, 4] + +# State 15 +# Apply action "(B's action first)" +action: 1 + +# State 16 +# Apply joint action ["stand", "move backward"] +actions: [6, 3] + +# State 17 +# Apply action "(B's action first)" +action: 1 + +# State 18 +# Apply joint action ["move backward", "right turn"] +actions: [3, 1] + +# State 19 +# Apply action "(B's action first)" +action: 1 + +# State 20 +# Apply joint action ["step left", "step left"] +actions: [4, 4] + +# State 21 +# Apply action "(A's action first)" +action: 0 + +# State 22 +# ....... +# ....... +# ..*.*B. +# .**.**. +# ..*.*.. +# ......A +# ....... +# Orientations: 1 0 +IsTerminal() = False +History() = [3, 5, 7, 8, 0, 3, 4, 1, 5, 9, 1, 9, 5, 0, 3, 2, 1, 2, 4, 0, 2, 4, 1, 6, 3, 1, 3, 1, 1, 4, 4, 0] +HistoryString() = "3, 5, 7, 8, 0, 3, 4, 1, 5, 9, 1, 9, 5, 0, 3, 2, 1, 2, 4, 0, 2, 4, 1, 6, 3, 1, 3, 1, 1, 4, 4, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = ".......\n.......\n..*.*B.\n.**.**.\n..*.*..\n......A\n.......\nOrientations: 1 0\n" +ObservationString(1) = ".......\n.......\n..*.*B.\n.**.**.\n..*.*..\n......A\n.......\nOrientations: 1 0\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◉◉◯◉◯◯◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◉◉◯◉◯◯◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +LegalActions(1) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +StringLegalActions(0) = ["left turn", "right turn", "move forward", "move backward", "step left", "step right", "stand", "step forward and left turn", "step forward and right turn", "fire"] +StringLegalActions(1) = ["left turn", "right turn", "move forward", "move backward", "step left", "step right", "stand", "step forward and left turn", "step forward and right turn", "fire"] + +# Apply joint action ["stand", "right turn"] +actions: [6, 1] + +# State 23 +# Apply action "(A's action first)" +action: 0 + +# State 24 +# Apply joint action ["right turn", "fire"] +actions: [1, 9] + +# State 25 +# Apply action "(A's action first)" +action: 0 + +# State 26 +# Apply joint action ["stand", "stand"] +actions: [6, 6] + +# State 27 +# Apply action "(B's action first)" +action: 1 + +# State 28 +# Apply joint action ["step forward and right turn", "stand"] +actions: [8, 6] + +# State 29 +# Apply action "(B's action first)" +action: 1 + +# State 30 +# Apply joint action ["stand", "step left"] +actions: [6, 4] + +# State 31 +# Apply action "(B's action first)" +action: 1 + +# State 32 +# Apply joint action ["move backward", "step forward and right turn"] +actions: [3, 8] + +# State 33 +# Apply action "(A's action first)" +action: 0 + +# State 34 +# Apply joint action ["step forward and right turn", "left turn"] +actions: [8, 0] + +# State 35 +# Apply action "(A's action first)" +action: 0 + +# State 36 +# Apply joint action ["step left", "move backward"] +actions: [4, 3] + +# State 37 +# Apply action "(A's action first)" +action: 0 + +# State 38 +# Apply joint action ["step left", "step left"] +actions: [4, 4] + +# State 39 +# Apply action "(A's action first)" +action: 0 + +# State 40 +# Apply joint action ["left turn", "fire"] +actions: [0, 9] + +# State 41 +# Apply action "(A's action first)" +action: 0 + +# State 42 +# .....B. +# ....... +# ..*.*.. +# .**.**. +# ..*.*A. +# ....... +# ....... +# Orientations: 0 2 +IsTerminal() = True +History() = [3, 5, 7, 8, 0, 3, 4, 1, 5, 9, 1, 9, 5, 0, 3, 2, 1, 2, 4, 0, 2, 4, 1, 6, 3, 1, 3, 1, 1, 4, 4, 0, 6, 1, 0, 1, 9, 0, 6, 6, 1, 8, 6, 1, 6, 4, 1, 3, 8, 0, 8, 0, 0, 4, 3, 0, 4, 4, 0, 0, 9, 0] +HistoryString() = "3, 5, 7, 8, 0, 3, 4, 1, 5, 9, 1, 9, 5, 0, 3, 2, 1, 2, 4, 0, 2, 4, 1, 6, 3, 1, 3, 1, 1, 4, 4, 0, 6, 1, 0, 1, 9, 0, 6, 6, 1, 8, 6, 1, 6, 4, 1, 3, 8, 0, 8, 0, 0, 4, 3, 0, 4, 4, 0, 0, 9, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = ".....B.\n.......\n..*.*..\n.**.**.\n..*.*A.\n.......\n.......\nOrientations: 0 2\n" +ObservationString(1) = ".....B.\n.......\n..*.*..\n.**.**.\n..*.*A.\n.......\n.......\nOrientations: 0 2\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◉◉◉◉◉◯◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ +◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◯◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◉◉◉◉◉◯◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◉◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ +◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◉◉◯◉◯◯◉ ◯◯◉◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/leduc_poker_1540482260.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/leduc_poker_1540482260.txt new file mode 100644 index 0000000..5f72068 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/leduc_poker_1540482260.txt @@ -0,0 +1,292 @@ +game: leduc_poker + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Leduc Poker" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["action_mapping", "players", "suit_isomorphism"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "leduc_poker" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 3 +PolicyTensorShape() = [3] +MaxChanceOutcomes() = 6 +GetParameters() = {action_mapping=False,players=2,suit_isomorphism=False} +NumPlayers() = 2 +MinUtility() = -13.0 +MaxUtility() = 13.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = player: [2], private_card: [6], community_card: [6], betting: [2, 4, 2] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 30 +ObservationTensorShape() = player: [2], private_card: [6], community_card: [6], pot_contribution: [2] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 16 +MaxGameLength() = 8 +ToString() = "leduc_poker()" + +# State 0 +# Round: 1 +# Player: -1 +# Pot: 2 +# Money (p1 p2 ...): 99 99 +# Cards (public p1 p2 ...): -10000 -10000 -10000 +# Round 1 sequence: +# Round 2 sequence: +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "[Observer: 0][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◯◯◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +ObservationString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +PublicObservationString() = "[Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: -10000]" +PrivateObservationString(1) = "[Observer: 1][Private: -10000]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◯◯◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉ +ChanceOutcomes() = [(0,0.166667), (1,0.166667), (2,0.166667), (3,0.166667), (4,0.166667), (5,0.166667)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Chance outcome:0", "Chance outcome:1", "Chance outcome:2", "Chance outcome:3", "Chance outcome:4", "Chance outcome:5"] + +# Apply action "Chance outcome:5" +action: 5 + +# State 1 +# Round: 1 +# Player: -1 +# Pot: 2 +# Money (p1 p2 ...): 99 99 +# Cards (public p1 p2 ...): -10000 5 -10000 +# Round 1 sequence: +# Round 2 sequence: +IsTerminal() = False +History() = [5] +HistoryString() = "5" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "[Observer: 0][Private: 5][Round 1][Player: -1][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◯◯◯◯◉ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 5][Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +ObservationString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +PublicObservationString() = "[Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 5]" +PrivateObservationString(1) = "[Observer: 1][Private: -10000]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◯◯◯◯◉ +ObservationTensor(0).community_card: ◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉ +ChanceOutcomes() = [(0,0.2), (1,0.2), (2,0.2), (3,0.2), (4,0.2)] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["Chance outcome:0", "Chance outcome:1", "Chance outcome:2", "Chance outcome:3", "Chance outcome:4"] + +# Apply action "Chance outcome:1" +action: 1 + +# State 2 +# Round: 1 +# Player: 0 +# Pot: 2 +# Money (p1 p2 ...): 99 99 +# Cards (public p1 p2 ...): -10000 5 1 +# Round 1 sequence: +# Round 2 sequence: +IsTerminal() = False +History() = [5, 1] +HistoryString() = "5, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "[Observer: 0][Private: 5][Round 1][Player: 0][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 1][Round 1][Player: 0][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◯◯◯◯◉ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◉◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 5][Round 1][Player: 0][Pot: 2][Money: 99 99][Ante: 1 1]" +ObservationString(1) = "[Observer: 1][Private: 1][Round 1][Player: 0][Pot: 2][Money: 99 99][Ante: 1 1]" +PublicObservationString() = "[Round 1][Player: 0][Pot: 2][Money: 99 99][Ante: 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 5]" +PrivateObservationString(1) = "[Observer: 1][Private: 1]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◯◯◯◯◉ +ObservationTensor(0).community_card: ◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◉◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2] +StringLegalActions() = ["Call", "Raise"] + +# Apply action "Raise" +action: 2 + +# State 3 +# Round: 1 +# Player: 1 +# Pot: 4 +# Money (p1 p2 ...): 97 99 +# Cards (public p1 p2 ...): -10000 5 1 +# Round 1 sequence: Raise +# Round 2 sequence: +IsTerminal() = False +History() = [5, 1, 2] +HistoryString() = "5, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "[Observer: 0][Private: 5][Round 1][Player: 1][Pot: 4][Money: 97 99][Round1: 2][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 1][Round 1][Player: 1][Pot: 4][Money: 97 99][Round1: 2][Round2: ]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◯◯◯◯◉ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◉ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◉◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◉ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 5][Round 1][Player: 1][Pot: 4][Money: 97 99][Ante: 3 1]" +ObservationString(1) = "[Observer: 1][Private: 1][Round 1][Player: 1][Pot: 4][Money: 97 99][Ante: 3 1]" +PublicObservationString() = "[Round 1][Player: 1][Pot: 4][Money: 97 99][Ante: 3 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 5]" +PrivateObservationString(1) = "[Observer: 1][Private: 1]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◯◯◯◯◉ +ObservationTensor(0).community_card: ◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution = [3.0, 1.0] +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◉◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution = [3.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["Fold", "Call", "Raise"] + +# Apply action "Fold" +action: 0 + +# State 4 +# Round: 1 +# Player: 1 +# Pot: 0 +# Money (p1 p2 ...): 101 99 +# Cards (public p1 p2 ...): -10000 5 1 +# Round 1 sequence: Raise, Fold +# Round 2 sequence: +IsTerminal() = True +History() = [5, 1, 2, 0] +HistoryString() = "5, 1, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "[Observer: 0][Private: 5][Round 1][Player: 1][Pot: 0][Money: 101 99][Round1: 2 0][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 1][Round 1][Player: 1][Pot: 0][Money: 101 99][Round1: 2 0][Round2: ]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◯◯◯◯◉ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◉ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◉◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◉ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 5][Round 1][Player: 1][Pot: 0][Money: 101 99][Ante: 3 1]" +ObservationString(1) = "[Observer: 1][Private: 1][Round 1][Player: 1][Pot: 0][Money: 101 99][Ante: 3 1]" +PublicObservationString() = "[Round 1][Player: 1][Pot: 0][Money: 101 99][Ante: 3 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 5]" +PrivateObservationString(1) = "[Observer: 1][Private: 1]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◯◯◯◯◉ +ObservationTensor(0).community_card: ◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution = [3.0, 1.0] +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◉◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution = [3.0, 1.0] +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/leduc_poker_3977671846.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/leduc_poker_3977671846.txt new file mode 100644 index 0000000..9089b55 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/leduc_poker_3977671846.txt @@ -0,0 +1,455 @@ +game: leduc_poker + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Leduc Poker" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["action_mapping", "players", "suit_isomorphism"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "leduc_poker" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 3 +PolicyTensorShape() = [3] +MaxChanceOutcomes() = 6 +GetParameters() = {action_mapping=False,players=2,suit_isomorphism=False} +NumPlayers() = 2 +MinUtility() = -13.0 +MaxUtility() = 13.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = player: [2], private_card: [6], community_card: [6], betting: [2, 4, 2] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 30 +ObservationTensorShape() = player: [2], private_card: [6], community_card: [6], pot_contribution: [2] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 16 +MaxGameLength() = 8 +ToString() = "leduc_poker()" + +# State 0 +# Round: 1 +# Player: -1 +# Pot: 2 +# Money (p1 p2 ...): 99 99 +# Cards (public p1 p2 ...): -10000 -10000 -10000 +# Round 1 sequence: +# Round 2 sequence: +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "[Observer: 0][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◯◯◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +ObservationString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +PublicObservationString() = "[Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: -10000]" +PrivateObservationString(1) = "[Observer: 1][Private: -10000]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◯◯◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉ +ChanceOutcomes() = [(0,0.166667), (1,0.166667), (2,0.166667), (3,0.166667), (4,0.166667), (5,0.166667)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Chance outcome:0", "Chance outcome:1", "Chance outcome:2", "Chance outcome:3", "Chance outcome:4", "Chance outcome:5"] + +# Apply action "Chance outcome:1" +action: 1 + +# State 1 +# Round: 1 +# Player: -1 +# Pot: 2 +# Money (p1 p2 ...): 99 99 +# Cards (public p1 p2 ...): -10000 1 -10000 +# Round 1 sequence: +# Round 2 sequence: +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "[Observer: 0][Private: 1][Round 1][Player: -1][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◉◯◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 1][Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +ObservationString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +PublicObservationString() = "[Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 1]" +PrivateObservationString(1) = "[Observer: 1][Private: -10000]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◉◯◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉ +ChanceOutcomes() = [(0,0.2), (2,0.2), (3,0.2), (4,0.2), (5,0.2)] +LegalActions() = [0, 2, 3, 4, 5] +StringLegalActions() = ["Chance outcome:0", "Chance outcome:2", "Chance outcome:3", "Chance outcome:4", "Chance outcome:5"] + +# Apply action "Chance outcome:0" +action: 0 + +# State 2 +# Round: 1 +# Player: 0 +# Pot: 2 +# Money (p1 p2 ...): 99 99 +# Cards (public p1 p2 ...): -10000 1 0 +# Round 1 sequence: +# Round 2 sequence: +IsTerminal() = False +History() = [1, 0] +HistoryString() = "1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "[Observer: 0][Private: 1][Round 1][Player: 0][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 0][Round 1][Player: 0][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◉◯◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 1][Round 1][Player: 0][Pot: 2][Money: 99 99][Ante: 1 1]" +ObservationString(1) = "[Observer: 1][Private: 0][Round 1][Player: 0][Pot: 2][Money: 99 99][Ante: 1 1]" +PublicObservationString() = "[Round 1][Player: 0][Pot: 2][Money: 99 99][Ante: 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 1]" +PrivateObservationString(1) = "[Observer: 1][Private: 0]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◉◯◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◉◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2] +StringLegalActions() = ["Call", "Raise"] + +# Apply action "Call" +action: 1 + +# State 3 +# Round: 1 +# Player: 1 +# Pot: 2 +# Money (p1 p2 ...): 99 99 +# Cards (public p1 p2 ...): -10000 1 0 +# Round 1 sequence: Call +# Round 2 sequence: +IsTerminal() = False +History() = [1, 0, 1] +HistoryString() = "1, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "[Observer: 0][Private: 1][Round 1][Player: 1][Pot: 2][Money: 99 99][Round1: 1][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 0][Round 1][Player: 1][Pot: 2][Money: 99 99][Round1: 1][Round2: ]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◉◯◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 1][Round 1][Player: 1][Pot: 2][Money: 99 99][Ante: 1 1]" +ObservationString(1) = "[Observer: 1][Private: 0][Round 1][Player: 1][Pot: 2][Money: 99 99][Ante: 1 1]" +PublicObservationString() = "[Round 1][Player: 1][Pot: 2][Money: 99 99][Ante: 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 1]" +PrivateObservationString(1) = "[Observer: 1][Private: 0]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◉◯◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◉◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2] +StringLegalActions() = ["Call", "Raise"] + +# Apply action "Raise" +action: 2 + +# State 4 +# Round: 1 +# Player: 0 +# Pot: 4 +# Money (p1 p2 ...): 99 97 +# Cards (public p1 p2 ...): -10000 1 0 +# Round 1 sequence: Call, Raise +# Round 2 sequence: +IsTerminal() = False +History() = [1, 0, 1, 2] +HistoryString() = "1, 0, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "[Observer: 0][Private: 1][Round 1][Player: 0][Pot: 4][Money: 99 97][Round1: 1 2][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 0][Round 1][Player: 0][Pot: 4][Money: 99 97][Round1: 1 2][Round2: ]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◉◯◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 1][Round 1][Player: 0][Pot: 4][Money: 99 97][Ante: 1 3]" +ObservationString(1) = "[Observer: 1][Private: 0][Round 1][Player: 0][Pot: 4][Money: 99 97][Ante: 1 3]" +PublicObservationString() = "[Round 1][Player: 0][Pot: 4][Money: 99 97][Ante: 1 3]" +PrivateObservationString(0) = "[Observer: 0][Private: 1]" +PrivateObservationString(1) = "[Observer: 1][Private: 0]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◉◯◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution = [1.0, 3.0] +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◉◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution = [1.0, 3.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["Fold", "Call", "Raise"] + +# Apply action "Call" +action: 1 + +# State 5 +# Apply action "Chance outcome:3" +action: 3 + +# State 6 +# Round: 2 +# Player: 0 +# Pot: 6 +# Money (p1 p2 ...): 97 97 +# Cards (public p1 p2 ...): 3 1 0 +# Round 1 sequence: Call, Raise, Call +# Round 2 sequence: +IsTerminal() = False +History() = [1, 0, 1, 2, 1, 3] +HistoryString() = "1, 0, 1, 2, 1, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "[Observer: 0][Private: 1][Round 2][Player: 0][Pot: 6][Money: 97 97][Public: 3][Round1: 1 2 1][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 0][Round 2][Player: 0][Pot: 6][Money: 97 97][Public: 3][Round1: 1 2 1][Round2: ]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◉◯◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◉◯◯ +InformationStateTensor(0).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◉◯◯ +InformationStateTensor(1).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 1][Round 2][Player: 0][Pot: 6][Money: 97 97][Public: 3][Ante: 3 3]" +ObservationString(1) = "[Observer: 1][Private: 0][Round 2][Player: 0][Pot: 6][Money: 97 97][Public: 3][Ante: 3 3]" +PublicObservationString() = "[Round 2][Player: 0][Pot: 6][Money: 97 97][Public: 3][Ante: 3 3]" +PrivateObservationString(0) = "[Observer: 0][Private: 1]" +PrivateObservationString(1) = "[Observer: 1][Private: 0]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◉◯◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◉◯◯ +ObservationTensor(0).pot_contribution = [3.0, 3.0] +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◉◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◉◯◯ +ObservationTensor(1).pot_contribution = [3.0, 3.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2] +StringLegalActions() = ["Call", "Raise"] + +# Apply action "Call" +action: 1 + +# State 7 +# Round: 2 +# Player: 1 +# Pot: 6 +# Money (p1 p2 ...): 97 97 +# Cards (public p1 p2 ...): 3 1 0 +# Round 1 sequence: Call, Raise, Call +# Round 2 sequence: Call +IsTerminal() = False +History() = [1, 0, 1, 2, 1, 3, 1] +HistoryString() = "1, 0, 1, 2, 1, 3, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "[Observer: 0][Private: 1][Round 2][Player: 1][Pot: 6][Money: 97 97][Public: 3][Round1: 1 2 1][Round2: 1]" +InformationStateString(1) = "[Observer: 1][Private: 0][Round 2][Player: 1][Pot: 6][Money: 97 97][Public: 3][Round1: 1 2 1][Round2: 1]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◉◯◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◉◯◯ +InformationStateTensor(0).betting: +◉◯ ◉◯ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◉◯◯ +InformationStateTensor(1).betting: +◉◯ ◉◯ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 1][Round 2][Player: 1][Pot: 6][Money: 97 97][Public: 3][Ante: 3 3]" +ObservationString(1) = "[Observer: 1][Private: 0][Round 2][Player: 1][Pot: 6][Money: 97 97][Public: 3][Ante: 3 3]" +PublicObservationString() = "[Round 2][Player: 1][Pot: 6][Money: 97 97][Public: 3][Ante: 3 3]" +PrivateObservationString(0) = "[Observer: 0][Private: 1]" +PrivateObservationString(1) = "[Observer: 1][Private: 0]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◉◯◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◉◯◯ +ObservationTensor(0).pot_contribution = [3.0, 3.0] +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◉◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◉◯◯ +ObservationTensor(1).pot_contribution = [3.0, 3.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2] +StringLegalActions() = ["Call", "Raise"] + +# Apply action "Call" +action: 1 + +# State 8 +# Round: 2 +# Player: 1 +# Pot: 0 +# Money (p1 p2 ...): 100 100 +# Cards (public p1 p2 ...): 3 1 0 +# Round 1 sequence: Call, Raise, Call +# Round 2 sequence: Call, Call +IsTerminal() = True +History() = [1, 0, 1, 2, 1, 3, 1, 1] +HistoryString() = "1, 0, 1, 2, 1, 3, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "[Observer: 0][Private: 1][Round 2][Player: 1][Pot: 0][Money: 100 100][Public: 3][Round1: 1 2 1][Round2: 1 1]" +InformationStateString(1) = "[Observer: 1][Private: 0][Round 2][Player: 1][Pot: 0][Money: 100 100][Public: 3][Round1: 1 2 1][Round2: 1 1]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◉◯◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◉◯◯ +InformationStateTensor(0).betting: +◉◯ ◉◯ +◯◉ ◉◯ +◉◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◉◯◯ +InformationStateTensor(1).betting: +◉◯ ◉◯ +◯◉ ◉◯ +◉◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 1][Round 2][Player: 1][Pot: 0][Money: 100 100][Public: 3][Ante: 3 3]" +ObservationString(1) = "[Observer: 1][Private: 0][Round 2][Player: 1][Pot: 0][Money: 100 100][Public: 3][Ante: 3 3]" +PublicObservationString() = "[Round 2][Player: 1][Pot: 0][Money: 100 100][Public: 3][Ante: 3 3]" +PrivateObservationString(0) = "[Observer: 0][Private: 1]" +PrivateObservationString(1) = "[Observer: 1][Private: 0]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◉◯◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◉◯◯ +ObservationTensor(0).pot_contribution = [3.0, 3.0] +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◉◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◉◯◯ +ObservationTensor(1).pot_contribution = [3.0, 3.0] +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/leduc_poker_3p.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/leduc_poker_3p.txt new file mode 100644 index 0000000..4c16302 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/leduc_poker_3p.txt @@ -0,0 +1,728 @@ +game: leduc_poker(players=3) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Leduc Poker" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["action_mapping", "players", "suit_isomorphism"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "leduc_poker" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 3 +PolicyTensorShape() = [3] +MaxChanceOutcomes() = 8 +GetParameters() = {action_mapping=False,players=3,suit_isomorphism=False} +NumPlayers() = 3 +MinUtility() = -13.0 +MaxUtility() = 26.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = player: [3], private_card: [8], community_card: [8], betting: [2, 7, 2] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 47 +ObservationTensorShape() = player: [3], private_card: [8], community_card: [8], pot_contribution: [3] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 22 +MaxGameLength() = 14 +ToString() = "leduc_poker(players=3)" + +# State 0 +# Round: 1 +# Player: -1 +# Pot: 3 +# Money (p1 p2 ...): 99 99 99 +# Cards (public p1 p2 ...): -10000 -10000 -10000 -10000 +# Round 1 sequence: +# Round 2 sequence: +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "[Observer: 0][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateString(2) = "[Observer: 2][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(2).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +ObservationString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +ObservationString(2) = "[Observer: 2][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +PublicObservationString() = "[Round 1][Player: -1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: -10000]" +PrivateObservationString(1) = "[Observer: 1][Private: -10000]" +PrivateObservationString(2) = "[Observer: 2][Private: -10000]" +ObservationTensor(0).player: ◉◯◯ +ObservationTensor(0).private_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution: ◉◉◉ +ObservationTensor(1).player: ◯◉◯ +ObservationTensor(1).private_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉◉ +ObservationTensor(2).player: ◯◯◉ +ObservationTensor(2).private_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(2).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(2).pot_contribution: ◉◉◉ +ChanceOutcomes() = [(0,0.125), (1,0.125), (2,0.125), (3,0.125), (4,0.125), (5,0.125), (6,0.125), (7,0.125)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7] +StringLegalActions() = ["Chance outcome:0", "Chance outcome:1", "Chance outcome:2", "Chance outcome:3", "Chance outcome:4", "Chance outcome:5", "Chance outcome:6", "Chance outcome:7"] + +# Apply action "Chance outcome:4" +action: 4 + +# State 1 +# Round: 1 +# Player: -1 +# Pot: 3 +# Money (p1 p2 ...): 99 99 99 +# Cards (public p1 p2 ...): -10000 4 -10000 -10000 +# Round 1 sequence: +# Round 2 sequence: +IsTerminal() = False +History() = [4] +HistoryString() = "4" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "[Observer: 0][Private: 4][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateString(2) = "[Observer: 2][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◉◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(2).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 4][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +ObservationString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +ObservationString(2) = "[Observer: 2][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +PublicObservationString() = "[Round 1][Player: -1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 4]" +PrivateObservationString(1) = "[Observer: 1][Private: -10000]" +PrivateObservationString(2) = "[Observer: 2][Private: -10000]" +ObservationTensor(0).player: ◉◯◯ +ObservationTensor(0).private_card: ◯◯◯◯◉◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution: ◉◉◉ +ObservationTensor(1).player: ◯◉◯ +ObservationTensor(1).private_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉◉ +ObservationTensor(2).player: ◯◯◉ +ObservationTensor(2).private_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(2).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(2).pot_contribution: ◉◉◉ +ChanceOutcomes() = [(0,0.142857), (1,0.142857), (2,0.142857), (3,0.142857), (5,0.142857), (6,0.142857), (7,0.142857)] +LegalActions() = [0, 1, 2, 3, 5, 6, 7] +StringLegalActions() = ["Chance outcome:0", "Chance outcome:1", "Chance outcome:2", "Chance outcome:3", "Chance outcome:5", "Chance outcome:6", "Chance outcome:7"] + +# Apply action "Chance outcome:2" +action: 2 + +# State 2 +# Apply action "Chance outcome:3" +action: 3 + +# State 3 +# Round: 1 +# Player: 0 +# Pot: 3 +# Money (p1 p2 ...): 99 99 99 +# Cards (public p1 p2 ...): -10000 4 2 3 +# Round 1 sequence: +# Round 2 sequence: +IsTerminal() = False +History() = [4, 2, 3] +HistoryString() = "4, 2, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "[Observer: 0][Private: 4][Round 1][Player: 0][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 2][Round 1][Player: 0][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateString(2) = "[Observer: 2][Private: 3][Round 1][Player: 0][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◉◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◉◯◯◯◯ +InformationStateTensor(2).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 4][Round 1][Player: 0][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +ObservationString(1) = "[Observer: 1][Private: 2][Round 1][Player: 0][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +ObservationString(2) = "[Observer: 2][Private: 3][Round 1][Player: 0][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +PublicObservationString() = "[Round 1][Player: 0][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 4]" +PrivateObservationString(1) = "[Observer: 1][Private: 2]" +PrivateObservationString(2) = "[Observer: 2][Private: 3]" +ObservationTensor(0).player: ◉◯◯ +ObservationTensor(0).private_card: ◯◯◯◯◉◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution: ◉◉◉ +ObservationTensor(1).player: ◯◉◯ +ObservationTensor(1).private_card: ◯◯◉◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉◉ +ObservationTensor(2).player: ◯◯◉ +ObservationTensor(2).private_card: ◯◯◯◉◯◯◯◯ +ObservationTensor(2).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(2).pot_contribution: ◉◉◉ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [1, 2] +StringLegalActions() = ["Call", "Raise"] + +# Apply action "Call" +action: 1 + +# State 4 +# Round: 1 +# Player: 1 +# Pot: 3 +# Money (p1 p2 ...): 99 99 99 +# Cards (public p1 p2 ...): -10000 4 2 3 +# Round 1 sequence: Call +# Round 2 sequence: +IsTerminal() = False +History() = [4, 2, 3, 1] +HistoryString() = "4, 2, 3, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "[Observer: 0][Private: 4][Round 1][Player: 1][Pot: 3][Money: 99 99 99][Round1: 1][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 2][Round 1][Player: 1][Pot: 3][Money: 99 99 99][Round1: 1][Round2: ]" +InformationStateString(2) = "[Observer: 2][Private: 3][Round 1][Player: 1][Pot: 3][Money: 99 99 99][Round1: 1][Round2: ]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◉◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◉◯◯◯◯ +InformationStateTensor(2).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 4][Round 1][Player: 1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +ObservationString(1) = "[Observer: 1][Private: 2][Round 1][Player: 1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +ObservationString(2) = "[Observer: 2][Private: 3][Round 1][Player: 1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +PublicObservationString() = "[Round 1][Player: 1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 4]" +PrivateObservationString(1) = "[Observer: 1][Private: 2]" +PrivateObservationString(2) = "[Observer: 2][Private: 3]" +ObservationTensor(0).player: ◉◯◯ +ObservationTensor(0).private_card: ◯◯◯◯◉◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution: ◉◉◉ +ObservationTensor(1).player: ◯◉◯ +ObservationTensor(1).private_card: ◯◯◉◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉◉ +ObservationTensor(2).player: ◯◯◉ +ObservationTensor(2).private_card: ◯◯◯◉◯◯◯◯ +ObservationTensor(2).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(2).pot_contribution: ◉◉◉ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [1, 2] +StringLegalActions() = ["Call", "Raise"] + +# Apply action "Raise" +action: 2 + +# State 5 +# Round: 1 +# Player: 2 +# Pot: 5 +# Money (p1 p2 ...): 99 97 99 +# Cards (public p1 p2 ...): -10000 4 2 3 +# Round 1 sequence: Call, Raise +# Round 2 sequence: +IsTerminal() = False +History() = [4, 2, 3, 1, 2] +HistoryString() = "4, 2, 3, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "[Observer: 0][Private: 4][Round 1][Player: 2][Pot: 5][Money: 99 97 99][Round1: 1 2][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 2][Round 1][Player: 2][Pot: 5][Money: 99 97 99][Round1: 1 2][Round2: ]" +InformationStateString(2) = "[Observer: 2][Private: 3][Round 1][Player: 2][Pot: 5][Money: 99 97 99][Round1: 1 2][Round2: ]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◉◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◉◯◯◯◯ +InformationStateTensor(2).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 4][Round 1][Player: 2][Pot: 5][Money: 99 97 99][Ante: 1 3 1]" +ObservationString(1) = "[Observer: 1][Private: 2][Round 1][Player: 2][Pot: 5][Money: 99 97 99][Ante: 1 3 1]" +ObservationString(2) = "[Observer: 2][Private: 3][Round 1][Player: 2][Pot: 5][Money: 99 97 99][Ante: 1 3 1]" +PublicObservationString() = "[Round 1][Player: 2][Pot: 5][Money: 99 97 99][Ante: 1 3 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 4]" +PrivateObservationString(1) = "[Observer: 1][Private: 2]" +PrivateObservationString(2) = "[Observer: 2][Private: 3]" +ObservationTensor(0).player: ◉◯◯ +ObservationTensor(0).private_card: ◯◯◯◯◉◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution = [1.0, 3.0, 1.0] +ObservationTensor(1).player: ◯◉◯ +ObservationTensor(1).private_card: ◯◯◉◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution = [1.0, 3.0, 1.0] +ObservationTensor(2).player: ◯◯◉ +ObservationTensor(2).private_card: ◯◯◯◉◯◯◯◯ +ObservationTensor(2).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(2).pot_contribution = [1.0, 3.0, 1.0] +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["Fold", "Call", "Raise"] + +# Apply action "Call" +action: 1 + +# State 6 +# Round: 1 +# Player: 0 +# Pot: 7 +# Money (p1 p2 ...): 99 97 97 +# Cards (public p1 p2 ...): -10000 4 2 3 +# Round 1 sequence: Call, Raise, Call +# Round 2 sequence: +IsTerminal() = False +History() = [4, 2, 3, 1, 2, 1] +HistoryString() = "4, 2, 3, 1, 2, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "[Observer: 0][Private: 4][Round 1][Player: 0][Pot: 7][Money: 99 97 97][Round1: 1 2 1][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 2][Round 1][Player: 0][Pot: 7][Money: 99 97 97][Round1: 1 2 1][Round2: ]" +InformationStateString(2) = "[Observer: 2][Private: 3][Round 1][Player: 0][Pot: 7][Money: 99 97 97][Round1: 1 2 1][Round2: ]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◉◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◉◯◯◯◯ +InformationStateTensor(2).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 4][Round 1][Player: 0][Pot: 7][Money: 99 97 97][Ante: 1 3 3]" +ObservationString(1) = "[Observer: 1][Private: 2][Round 1][Player: 0][Pot: 7][Money: 99 97 97][Ante: 1 3 3]" +ObservationString(2) = "[Observer: 2][Private: 3][Round 1][Player: 0][Pot: 7][Money: 99 97 97][Ante: 1 3 3]" +PublicObservationString() = "[Round 1][Player: 0][Pot: 7][Money: 99 97 97][Ante: 1 3 3]" +PrivateObservationString(0) = "[Observer: 0][Private: 4]" +PrivateObservationString(1) = "[Observer: 1][Private: 2]" +PrivateObservationString(2) = "[Observer: 2][Private: 3]" +ObservationTensor(0).player: ◉◯◯ +ObservationTensor(0).private_card: ◯◯◯◯◉◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution = [1.0, 3.0, 3.0] +ObservationTensor(1).player: ◯◉◯ +ObservationTensor(1).private_card: ◯◯◉◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution = [1.0, 3.0, 3.0] +ObservationTensor(2).player: ◯◯◉ +ObservationTensor(2).private_card: ◯◯◯◉◯◯◯◯ +ObservationTensor(2).community_card: ◯◯◯◯◯◯◯◯ +ObservationTensor(2).pot_contribution = [1.0, 3.0, 3.0] +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["Fold", "Call", "Raise"] + +# Apply action "Fold" +action: 0 + +# State 7 +# Apply action "Chance outcome:1" +action: 1 + +# State 8 +# Round: 2 +# Player: 1 +# Pot: 7 +# Money (p1 p2 ...): 99 97 97 +# Cards (public p1 p2 ...): 1 4 2 3 +# Round 1 sequence: Call, Raise, Call, Fold +# Round 2 sequence: +IsTerminal() = False +History() = [4, 2, 3, 1, 2, 1, 0, 1] +HistoryString() = "4, 2, 3, 1, 2, 1, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "[Observer: 0][Private: 4][Round 2][Player: 1][Pot: 7][Money: 99 97 97][Public: 1][Round1: 1 2 1 0][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 2][Round 2][Player: 1][Pot: 7][Money: 99 97 97][Public: 1][Round1: 1 2 1 0][Round2: ]" +InformationStateString(2) = "[Observer: 2][Private: 3][Round 2][Player: 1][Pot: 7][Money: 99 97 97][Public: 1][Round1: 1 2 1 0][Round2: ]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◉◯◯◯ +InformationStateTensor(0).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◉◯◯◯◯ +InformationStateTensor(2).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 4][Round 2][Player: 1][Pot: 7][Money: 99 97 97][Public: 1][Ante: 1 3 3]" +ObservationString(1) = "[Observer: 1][Private: 2][Round 2][Player: 1][Pot: 7][Money: 99 97 97][Public: 1][Ante: 1 3 3]" +ObservationString(2) = "[Observer: 2][Private: 3][Round 2][Player: 1][Pot: 7][Money: 99 97 97][Public: 1][Ante: 1 3 3]" +PublicObservationString() = "[Round 2][Player: 1][Pot: 7][Money: 99 97 97][Public: 1][Ante: 1 3 3]" +PrivateObservationString(0) = "[Observer: 0][Private: 4]" +PrivateObservationString(1) = "[Observer: 1][Private: 2]" +PrivateObservationString(2) = "[Observer: 2][Private: 3]" +ObservationTensor(0).player: ◉◯◯ +ObservationTensor(0).private_card: ◯◯◯◯◉◯◯◯ +ObservationTensor(0).community_card: ◯◉◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution = [1.0, 3.0, 3.0] +ObservationTensor(1).player: ◯◉◯ +ObservationTensor(1).private_card: ◯◯◉◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◉◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution = [1.0, 3.0, 3.0] +ObservationTensor(2).player: ◯◯◉ +ObservationTensor(2).private_card: ◯◯◯◉◯◯◯◯ +ObservationTensor(2).community_card: ◯◉◯◯◯◯◯◯ +ObservationTensor(2).pot_contribution = [1.0, 3.0, 3.0] +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [1, 2] +StringLegalActions() = ["Call", "Raise"] + +# Apply action "Raise" +action: 2 + +# State 9 +# Round: 2 +# Player: 2 +# Pot: 11 +# Money (p1 p2 ...): 99 93 97 +# Cards (public p1 p2 ...): 1 4 2 3 +# Round 1 sequence: Call, Raise, Call, Fold +# Round 2 sequence: Raise +IsTerminal() = False +History() = [4, 2, 3, 1, 2, 1, 0, 1, 2] +HistoryString() = "4, 2, 3, 1, 2, 1, 0, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "[Observer: 0][Private: 4][Round 2][Player: 2][Pot: 11][Money: 99 93 97][Public: 1][Round1: 1 2 1 0][Round2: 2]" +InformationStateString(1) = "[Observer: 1][Private: 2][Round 2][Player: 2][Pot: 11][Money: 99 93 97][Public: 1][Round1: 1 2 1 0][Round2: 2]" +InformationStateString(2) = "[Observer: 2][Private: 3][Round 2][Player: 2][Pot: 11][Money: 99 93 97][Public: 1][Round1: 1 2 1 0][Round2: 2]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◉◯◯◯ +InformationStateTensor(0).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◉◯ ◯◉ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◉◯ ◯◉ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◉◯◯◯◯ +InformationStateTensor(2).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◉◯ ◯◉ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 4][Round 2][Player: 2][Pot: 11][Money: 99 93 97][Public: 1][Ante: 1 7 3]" +ObservationString(1) = "[Observer: 1][Private: 2][Round 2][Player: 2][Pot: 11][Money: 99 93 97][Public: 1][Ante: 1 7 3]" +ObservationString(2) = "[Observer: 2][Private: 3][Round 2][Player: 2][Pot: 11][Money: 99 93 97][Public: 1][Ante: 1 7 3]" +PublicObservationString() = "[Round 2][Player: 2][Pot: 11][Money: 99 93 97][Public: 1][Ante: 1 7 3]" +PrivateObservationString(0) = "[Observer: 0][Private: 4]" +PrivateObservationString(1) = "[Observer: 1][Private: 2]" +PrivateObservationString(2) = "[Observer: 2][Private: 3]" +ObservationTensor(0).player: ◉◯◯ +ObservationTensor(0).private_card: ◯◯◯◯◉◯◯◯ +ObservationTensor(0).community_card: ◯◉◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution = [1.0, 7.0, 3.0] +ObservationTensor(1).player: ◯◉◯ +ObservationTensor(1).private_card: ◯◯◉◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◉◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution = [1.0, 7.0, 3.0] +ObservationTensor(2).player: ◯◯◉ +ObservationTensor(2).private_card: ◯◯◯◉◯◯◯◯ +ObservationTensor(2).community_card: ◯◉◯◯◯◯◯◯ +ObservationTensor(2).pot_contribution = [1.0, 7.0, 3.0] +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["Fold", "Call", "Raise"] + +# Apply action "Call" +action: 1 + +# State 10 +# Round: 2 +# Player: 2 +# Pot: 0 +# Money (p1 p2 ...): 99 100.5 100.5 +# Cards (public p1 p2 ...): 1 4 2 3 +# Round 1 sequence: Call, Raise, Call, Fold +# Round 2 sequence: Raise, Call +IsTerminal() = True +History() = [4, 2, 3, 1, 2, 1, 0, 1, 2, 1] +HistoryString() = "4, 2, 3, 1, 2, 1, 0, 1, 2, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "[Observer: 0][Private: 4][Round 2][Player: 2][Pot: 0][Money: 99 100.5 100.5][Public: 1][Round1: 1 2 1 0][Round2: 2 1]" +InformationStateString(1) = "[Observer: 1][Private: 2][Round 2][Player: 2][Pot: 0][Money: 99 100.5 100.5][Public: 1][Round1: 1 2 1 0][Round2: 2 1]" +InformationStateString(2) = "[Observer: 2][Private: 3][Round 2][Player: 2][Pot: 0][Money: 99 100.5 100.5][Public: 1][Round1: 1 2 1 0][Round2: 2 1]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◉◯◯◯ +InformationStateTensor(0).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◉◯ ◯◉ +◯◉ ◉◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◉◯ ◯◉ +◯◉ ◉◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◉◯◯◯◯ +InformationStateTensor(2).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◉◯ ◯◉ +◯◉ ◉◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 4][Round 2][Player: 2][Pot: 0][Money: 99 100.5 100.5][Public: 1][Ante: 1 7 7]" +ObservationString(1) = "[Observer: 1][Private: 2][Round 2][Player: 2][Pot: 0][Money: 99 100.5 100.5][Public: 1][Ante: 1 7 7]" +ObservationString(2) = "[Observer: 2][Private: 3][Round 2][Player: 2][Pot: 0][Money: 99 100.5 100.5][Public: 1][Ante: 1 7 7]" +PublicObservationString() = "[Round 2][Player: 2][Pot: 0][Money: 99 100.5 100.5][Public: 1][Ante: 1 7 7]" +PrivateObservationString(0) = "[Observer: 0][Private: 4]" +PrivateObservationString(1) = "[Observer: 1][Private: 2]" +PrivateObservationString(2) = "[Observer: 2][Private: 3]" +ObservationTensor(0).player: ◉◯◯ +ObservationTensor(0).private_card: ◯◯◯◯◉◯◯◯ +ObservationTensor(0).community_card: ◯◉◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution = [1.0, 7.0, 7.0] +ObservationTensor(1).player: ◯◉◯ +ObservationTensor(1).private_card: ◯◯◉◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◉◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution = [1.0, 7.0, 7.0] +ObservationTensor(2).player: ◯◯◉ +ObservationTensor(2).private_card: ◯◯◯◉◯◯◯◯ +ObservationTensor(2).community_card: ◯◉◯◯◯◯◯◯ +ObservationTensor(2).pot_contribution = [1.0, 7.0, 7.0] +Rewards() = [-1, 0.5, 0.5] +Returns() = [-1, 0.5, 0.5] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/leduc_poker_3p_single_tensor.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/leduc_poker_3p_single_tensor.txt new file mode 100644 index 0000000..1eb8462 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/leduc_poker_3p_single_tensor.txt @@ -0,0 +1,648 @@ +game: leduc_poker(players=3) +observation_params: single_tensor + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Leduc Poker" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["action_mapping", "players", "suit_isomorphism"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "leduc_poker" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 3 +PolicyTensorShape() = [3] +MaxChanceOutcomes() = 8 +GetParameters() = {action_mapping=False,players=3,suit_isomorphism=False} +NumPlayers() = 3 +MinUtility() = -13.0 +MaxUtility() = 26.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = player: [3], private_card: [8], community_card: [8], betting: [2, 7, 2] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 47 +ObservationTensorShape() = [22] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 22 +MaxGameLength() = 14 +ToString() = "leduc_poker(players=3)" + +# State 0 +# Round: 1 +# Player: -1 +# Pot: 3 +# Money (p1 p2 ...): 99 99 99 +# Cards (public p1 p2 ...): -10000 -10000 -10000 -10000 +# Round 1 sequence: +# Round 2 sequence: +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "[Observer: 0][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateString(2) = "[Observer: 2][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(2).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +ObservationString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +ObservationString(2) = "[Observer: 2][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +PublicObservationString() = "[Round 1][Player: -1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: -10000]" +PrivateObservationString(1) = "[Observer: 1][Private: -10000]" +PrivateObservationString(2) = "[Observer: 2][Private: -10000]" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉ +ObservationTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉ +ObservationTensor(2): ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉ +ChanceOutcomes() = [(0,0.125), (1,0.125), (2,0.125), (3,0.125), (4,0.125), (5,0.125), (6,0.125), (7,0.125)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7] +StringLegalActions() = ["Chance outcome:0", "Chance outcome:1", "Chance outcome:2", "Chance outcome:3", "Chance outcome:4", "Chance outcome:5", "Chance outcome:6", "Chance outcome:7"] + +# Apply action "Chance outcome:4" +action: 4 + +# State 1 +# Round: 1 +# Player: -1 +# Pot: 3 +# Money (p1 p2 ...): 99 99 99 +# Cards (public p1 p2 ...): -10000 4 -10000 -10000 +# Round 1 sequence: +# Round 2 sequence: +IsTerminal() = False +History() = [4] +HistoryString() = "4" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "[Observer: 0][Private: 4][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateString(2) = "[Observer: 2][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◉◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(2).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 4][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +ObservationString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +ObservationString(2) = "[Observer: 2][Private: -10000][Round 1][Player: -1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +PublicObservationString() = "[Round 1][Player: -1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 4]" +PrivateObservationString(1) = "[Observer: 1][Private: -10000]" +PrivateObservationString(2) = "[Observer: 2][Private: -10000]" +ObservationTensor(0): ◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◉◉ +ObservationTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉ +ObservationTensor(2): ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉ +ChanceOutcomes() = [(0,0.142857), (1,0.142857), (2,0.142857), (3,0.142857), (5,0.142857), (6,0.142857), (7,0.142857)] +LegalActions() = [0, 1, 2, 3, 5, 6, 7] +StringLegalActions() = ["Chance outcome:0", "Chance outcome:1", "Chance outcome:2", "Chance outcome:3", "Chance outcome:5", "Chance outcome:6", "Chance outcome:7"] + +# Apply action "Chance outcome:2" +action: 2 + +# State 2 +# Apply action "Chance outcome:3" +action: 3 + +# State 3 +# Round: 1 +# Player: 0 +# Pot: 3 +# Money (p1 p2 ...): 99 99 99 +# Cards (public p1 p2 ...): -10000 4 2 3 +# Round 1 sequence: +# Round 2 sequence: +IsTerminal() = False +History() = [4, 2, 3] +HistoryString() = "4, 2, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "[Observer: 0][Private: 4][Round 1][Player: 0][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 2][Round 1][Player: 0][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateString(2) = "[Observer: 2][Private: 3][Round 1][Player: 0][Pot: 3][Money: 99 99 99][Round1: ][Round2: ]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◉◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◉◯◯◯◯ +InformationStateTensor(2).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 4][Round 1][Player: 0][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +ObservationString(1) = "[Observer: 1][Private: 2][Round 1][Player: 0][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +ObservationString(2) = "[Observer: 2][Private: 3][Round 1][Player: 0][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +PublicObservationString() = "[Round 1][Player: 0][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 4]" +PrivateObservationString(1) = "[Observer: 1][Private: 2]" +PrivateObservationString(2) = "[Observer: 2][Private: 3]" +ObservationTensor(0): ◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◉◉ +ObservationTensor(1): ◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉ +ObservationTensor(2): ◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [1, 2] +StringLegalActions() = ["Call", "Raise"] + +# Apply action "Call" +action: 1 + +# State 4 +# Round: 1 +# Player: 1 +# Pot: 3 +# Money (p1 p2 ...): 99 99 99 +# Cards (public p1 p2 ...): -10000 4 2 3 +# Round 1 sequence: Call +# Round 2 sequence: +IsTerminal() = False +History() = [4, 2, 3, 1] +HistoryString() = "4, 2, 3, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "[Observer: 0][Private: 4][Round 1][Player: 1][Pot: 3][Money: 99 99 99][Round1: 1][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 2][Round 1][Player: 1][Pot: 3][Money: 99 99 99][Round1: 1][Round2: ]" +InformationStateString(2) = "[Observer: 2][Private: 3][Round 1][Player: 1][Pot: 3][Money: 99 99 99][Round1: 1][Round2: ]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◉◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◉◯◯◯◯ +InformationStateTensor(2).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 4][Round 1][Player: 1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +ObservationString(1) = "[Observer: 1][Private: 2][Round 1][Player: 1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +ObservationString(2) = "[Observer: 2][Private: 3][Round 1][Player: 1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +PublicObservationString() = "[Round 1][Player: 1][Pot: 3][Money: 99 99 99][Ante: 1 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 4]" +PrivateObservationString(1) = "[Observer: 1][Private: 2]" +PrivateObservationString(2) = "[Observer: 2][Private: 3]" +ObservationTensor(0): ◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◉◉ +ObservationTensor(1): ◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉ +ObservationTensor(2): ◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [1, 2] +StringLegalActions() = ["Call", "Raise"] + +# Apply action "Raise" +action: 2 + +# State 5 +# Round: 1 +# Player: 2 +# Pot: 5 +# Money (p1 p2 ...): 99 97 99 +# Cards (public p1 p2 ...): -10000 4 2 3 +# Round 1 sequence: Call, Raise +# Round 2 sequence: +IsTerminal() = False +History() = [4, 2, 3, 1, 2] +HistoryString() = "4, 2, 3, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "[Observer: 0][Private: 4][Round 1][Player: 2][Pot: 5][Money: 99 97 99][Round1: 1 2][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 2][Round 1][Player: 2][Pot: 5][Money: 99 97 99][Round1: 1 2][Round2: ]" +InformationStateString(2) = "[Observer: 2][Private: 3][Round 1][Player: 2][Pot: 5][Money: 99 97 99][Round1: 1 2][Round2: ]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◉◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◉◯◯◯◯ +InformationStateTensor(2).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 4][Round 1][Player: 2][Pot: 5][Money: 99 97 99][Ante: 1 3 1]" +ObservationString(1) = "[Observer: 1][Private: 2][Round 1][Player: 2][Pot: 5][Money: 99 97 99][Ante: 1 3 1]" +ObservationString(2) = "[Observer: 2][Private: 3][Round 1][Player: 2][Pot: 5][Money: 99 97 99][Ante: 1 3 1]" +PublicObservationString() = "[Round 1][Player: 2][Pot: 5][Money: 99 97 99][Ante: 1 3 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 4]" +PrivateObservationString(1) = "[Observer: 1][Private: 2]" +PrivateObservationString(2) = "[Observer: 2][Private: 3]" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0] +ObservationTensor(2) = [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0] +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["Fold", "Call", "Raise"] + +# Apply action "Call" +action: 1 + +# State 6 +# Round: 1 +# Player: 0 +# Pot: 7 +# Money (p1 p2 ...): 99 97 97 +# Cards (public p1 p2 ...): -10000 4 2 3 +# Round 1 sequence: Call, Raise, Call +# Round 2 sequence: +IsTerminal() = False +History() = [4, 2, 3, 1, 2, 1] +HistoryString() = "4, 2, 3, 1, 2, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "[Observer: 0][Private: 4][Round 1][Player: 0][Pot: 7][Money: 99 97 97][Round1: 1 2 1][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 2][Round 1][Player: 0][Pot: 7][Money: 99 97 97][Round1: 1 2 1][Round2: ]" +InformationStateString(2) = "[Observer: 2][Private: 3][Round 1][Player: 0][Pot: 7][Money: 99 97 97][Round1: 1 2 1][Round2: ]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◉◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◉◯◯◯◯ +InformationStateTensor(2).community_card: ◯◯◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 4][Round 1][Player: 0][Pot: 7][Money: 99 97 97][Ante: 1 3 3]" +ObservationString(1) = "[Observer: 1][Private: 2][Round 1][Player: 0][Pot: 7][Money: 99 97 97][Ante: 1 3 3]" +ObservationString(2) = "[Observer: 2][Private: 3][Round 1][Player: 0][Pot: 7][Money: 99 97 97][Ante: 1 3 3]" +PublicObservationString() = "[Round 1][Player: 0][Pot: 7][Money: 99 97 97][Ante: 1 3 3]" +PrivateObservationString(0) = "[Observer: 0][Private: 4]" +PrivateObservationString(1) = "[Observer: 1][Private: 2]" +PrivateObservationString(2) = "[Observer: 2][Private: 3]" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0] +ObservationTensor(2) = [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0] +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["Fold", "Call", "Raise"] + +# Apply action "Fold" +action: 0 + +# State 7 +# Apply action "Chance outcome:1" +action: 1 + +# State 8 +# Round: 2 +# Player: 1 +# Pot: 7 +# Money (p1 p2 ...): 99 97 97 +# Cards (public p1 p2 ...): 1 4 2 3 +# Round 1 sequence: Call, Raise, Call, Fold +# Round 2 sequence: +IsTerminal() = False +History() = [4, 2, 3, 1, 2, 1, 0, 1] +HistoryString() = "4, 2, 3, 1, 2, 1, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "[Observer: 0][Private: 4][Round 2][Player: 1][Pot: 7][Money: 99 97 97][Public: 1][Round1: 1 2 1 0][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 2][Round 2][Player: 1][Pot: 7][Money: 99 97 97][Public: 1][Round1: 1 2 1 0][Round2: ]" +InformationStateString(2) = "[Observer: 2][Private: 3][Round 2][Player: 1][Pot: 7][Money: 99 97 97][Public: 1][Round1: 1 2 1 0][Round2: ]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◉◯◯◯ +InformationStateTensor(0).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◉◯◯◯◯ +InformationStateTensor(2).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◉◯ ◯◯ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 4][Round 2][Player: 1][Pot: 7][Money: 99 97 97][Public: 1][Ante: 1 3 3]" +ObservationString(1) = "[Observer: 1][Private: 2][Round 2][Player: 1][Pot: 7][Money: 99 97 97][Public: 1][Ante: 1 3 3]" +ObservationString(2) = "[Observer: 2][Private: 3][Round 2][Player: 1][Pot: 7][Money: 99 97 97][Public: 1][Ante: 1 3 3]" +PublicObservationString() = "[Round 2][Player: 1][Pot: 7][Money: 99 97 97][Public: 1][Ante: 1 3 3]" +PrivateObservationString(0) = "[Observer: 0][Private: 4]" +PrivateObservationString(1) = "[Observer: 1][Private: 2]" +PrivateObservationString(2) = "[Observer: 2][Private: 3]" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0] +ObservationTensor(2) = [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0] +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [1, 2] +StringLegalActions() = ["Call", "Raise"] + +# Apply action "Raise" +action: 2 + +# State 9 +# Round: 2 +# Player: 2 +# Pot: 11 +# Money (p1 p2 ...): 99 93 97 +# Cards (public p1 p2 ...): 1 4 2 3 +# Round 1 sequence: Call, Raise, Call, Fold +# Round 2 sequence: Raise +IsTerminal() = False +History() = [4, 2, 3, 1, 2, 1, 0, 1, 2] +HistoryString() = "4, 2, 3, 1, 2, 1, 0, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "[Observer: 0][Private: 4][Round 2][Player: 2][Pot: 11][Money: 99 93 97][Public: 1][Round1: 1 2 1 0][Round2: 2]" +InformationStateString(1) = "[Observer: 1][Private: 2][Round 2][Player: 2][Pot: 11][Money: 99 93 97][Public: 1][Round1: 1 2 1 0][Round2: 2]" +InformationStateString(2) = "[Observer: 2][Private: 3][Round 2][Player: 2][Pot: 11][Money: 99 93 97][Public: 1][Round1: 1 2 1 0][Round2: 2]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◉◯◯◯ +InformationStateTensor(0).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◉◯ ◯◉ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◉◯ ◯◉ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◉◯◯◯◯ +InformationStateTensor(2).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◉◯ ◯◉ +◯◉ ◯◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 4][Round 2][Player: 2][Pot: 11][Money: 99 93 97][Public: 1][Ante: 1 7 3]" +ObservationString(1) = "[Observer: 1][Private: 2][Round 2][Player: 2][Pot: 11][Money: 99 93 97][Public: 1][Ante: 1 7 3]" +ObservationString(2) = "[Observer: 2][Private: 3][Round 2][Player: 2][Pot: 11][Money: 99 93 97][Public: 1][Ante: 1 7 3]" +PublicObservationString() = "[Round 2][Player: 2][Pot: 11][Money: 99 93 97][Public: 1][Ante: 1 7 3]" +PrivateObservationString(0) = "[Observer: 0][Private: 4]" +PrivateObservationString(1) = "[Observer: 1][Private: 2]" +PrivateObservationString(2) = "[Observer: 2][Private: 3]" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 7.0, 3.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 7.0, 3.0] +ObservationTensor(2) = [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 7.0, 3.0] +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["Fold", "Call", "Raise"] + +# Apply action "Call" +action: 1 + +# State 10 +# Round: 2 +# Player: 2 +# Pot: 0 +# Money (p1 p2 ...): 99 100.5 100.5 +# Cards (public p1 p2 ...): 1 4 2 3 +# Round 1 sequence: Call, Raise, Call, Fold +# Round 2 sequence: Raise, Call +IsTerminal() = True +History() = [4, 2, 3, 1, 2, 1, 0, 1, 2, 1] +HistoryString() = "4, 2, 3, 1, 2, 1, 0, 1, 2, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "[Observer: 0][Private: 4][Round 2][Player: 2][Pot: 0][Money: 99 100.5 100.5][Public: 1][Round1: 1 2 1 0][Round2: 2 1]" +InformationStateString(1) = "[Observer: 1][Private: 2][Round 2][Player: 2][Pot: 0][Money: 99 100.5 100.5][Public: 1][Round1: 1 2 1 0][Round2: 2 1]" +InformationStateString(2) = "[Observer: 2][Private: 3][Round 2][Player: 2][Pot: 0][Money: 99 100.5 100.5][Public: 1][Round1: 1 2 1 0][Round2: 2 1]" +InformationStateTensor(0).player: ◉◯◯ +InformationStateTensor(0).private_card: ◯◯◯◯◉◯◯◯ +InformationStateTensor(0).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◉◯ ◯◉ +◯◉ ◉◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉◯ +InformationStateTensor(1).private_card: ◯◯◉◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◉◯ ◯◉ +◯◉ ◉◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(2).player: ◯◯◉ +InformationStateTensor(2).private_card: ◯◯◯◉◯◯◯◯ +InformationStateTensor(2).community_card: ◯◉◯◯◯◯◯◯ +InformationStateTensor(2).betting: +◉◯ ◯◉ +◯◉ ◉◯ +◉◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 4][Round 2][Player: 2][Pot: 0][Money: 99 100.5 100.5][Public: 1][Ante: 1 7 7]" +ObservationString(1) = "[Observer: 1][Private: 2][Round 2][Player: 2][Pot: 0][Money: 99 100.5 100.5][Public: 1][Ante: 1 7 7]" +ObservationString(2) = "[Observer: 2][Private: 3][Round 2][Player: 2][Pot: 0][Money: 99 100.5 100.5][Public: 1][Ante: 1 7 7]" +PublicObservationString() = "[Round 2][Player: 2][Pot: 0][Money: 99 100.5 100.5][Public: 1][Ante: 1 7 7]" +PrivateObservationString(0) = "[Observer: 0][Private: 4]" +PrivateObservationString(1) = "[Observer: 1][Private: 2]" +PrivateObservationString(2) = "[Observer: 2][Private: 3]" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 7.0, 7.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 7.0, 7.0] +ObservationTensor(2) = [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 7.0, 7.0] +Rewards() = [-1, 0.5, 0.5] +Returns() = [-1, 0.5, 0.5] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/leduc_poker_773740114.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/leduc_poker_773740114.txt new file mode 100644 index 0000000..2c9ce42 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/leduc_poker_773740114.txt @@ -0,0 +1,292 @@ +game: leduc_poker + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Leduc Poker" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["action_mapping", "players", "suit_isomorphism"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "leduc_poker" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 3 +PolicyTensorShape() = [3] +MaxChanceOutcomes() = 6 +GetParameters() = {action_mapping=False,players=2,suit_isomorphism=False} +NumPlayers() = 2 +MinUtility() = -13.0 +MaxUtility() = 13.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = player: [2], private_card: [6], community_card: [6], betting: [2, 4, 2] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 30 +ObservationTensorShape() = player: [2], private_card: [6], community_card: [6], pot_contribution: [2] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 16 +MaxGameLength() = 8 +ToString() = "leduc_poker()" + +# State 0 +# Round: 1 +# Player: -1 +# Pot: 2 +# Money (p1 p2 ...): 99 99 +# Cards (public p1 p2 ...): -10000 -10000 -10000 +# Round 1 sequence: +# Round 2 sequence: +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "[Observer: 0][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◯◯◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +ObservationString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +PublicObservationString() = "[Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: -10000]" +PrivateObservationString(1) = "[Observer: 1][Private: -10000]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◯◯◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉ +ChanceOutcomes() = [(0,0.166667), (1,0.166667), (2,0.166667), (3,0.166667), (4,0.166667), (5,0.166667)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Chance outcome:0", "Chance outcome:1", "Chance outcome:2", "Chance outcome:3", "Chance outcome:4", "Chance outcome:5"] + +# Apply action "Chance outcome:0" +action: 0 + +# State 1 +# Round: 1 +# Player: -1 +# Pot: 2 +# Money (p1 p2 ...): 99 99 +# Cards (public p1 p2 ...): -10000 0 -10000 +# Round 1 sequence: +# Round 2 sequence: +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "[Observer: 0][Private: 0][Round 1][Player: -1][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◉◯◯◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◯◯◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 0][Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +ObservationString(1) = "[Observer: 1][Private: -10000][Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +PublicObservationString() = "[Round 1][Player: -1][Pot: 2][Money: 99 99][Ante: 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 0]" +PrivateObservationString(1) = "[Observer: 1][Private: -10000]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◉◯◯◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◯◯◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉ +ChanceOutcomes() = [(1,0.2), (2,0.2), (3,0.2), (4,0.2), (5,0.2)] +LegalActions() = [1, 2, 3, 4, 5] +StringLegalActions() = ["Chance outcome:1", "Chance outcome:2", "Chance outcome:3", "Chance outcome:4", "Chance outcome:5"] + +# Apply action "Chance outcome:3" +action: 3 + +# State 2 +# Round: 1 +# Player: 0 +# Pot: 2 +# Money (p1 p2 ...): 99 99 +# Cards (public p1 p2 ...): -10000 0 3 +# Round 1 sequence: +# Round 2 sequence: +IsTerminal() = False +History() = [0, 3] +HistoryString() = "0, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "[Observer: 0][Private: 0][Round 1][Player: 0][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 3][Round 1][Player: 0][Pot: 2][Money: 99 99][Round1: ][Round2: ]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◉◯◯◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◯◉◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 0][Round 1][Player: 0][Pot: 2][Money: 99 99][Ante: 1 1]" +ObservationString(1) = "[Observer: 1][Private: 3][Round 1][Player: 0][Pot: 2][Money: 99 99][Ante: 1 1]" +PublicObservationString() = "[Round 1][Player: 0][Pot: 2][Money: 99 99][Ante: 1 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 0]" +PrivateObservationString(1) = "[Observer: 1][Private: 3]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◉◯◯◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◯◉◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2] +StringLegalActions() = ["Call", "Raise"] + +# Apply action "Raise" +action: 2 + +# State 3 +# Round: 1 +# Player: 1 +# Pot: 4 +# Money (p1 p2 ...): 97 99 +# Cards (public p1 p2 ...): -10000 0 3 +# Round 1 sequence: Raise +# Round 2 sequence: +IsTerminal() = False +History() = [0, 3, 2] +HistoryString() = "0, 3, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "[Observer: 0][Private: 0][Round 1][Player: 1][Pot: 4][Money: 97 99][Round1: 2][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 3][Round 1][Player: 1][Pot: 4][Money: 97 99][Round1: 2][Round2: ]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◉◯◯◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◉ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◯◉◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◉ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 0][Round 1][Player: 1][Pot: 4][Money: 97 99][Ante: 3 1]" +ObservationString(1) = "[Observer: 1][Private: 3][Round 1][Player: 1][Pot: 4][Money: 97 99][Ante: 3 1]" +PublicObservationString() = "[Round 1][Player: 1][Pot: 4][Money: 97 99][Ante: 3 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 0]" +PrivateObservationString(1) = "[Observer: 1][Private: 3]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◉◯◯◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution = [3.0, 1.0] +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◯◉◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution = [3.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["Fold", "Call", "Raise"] + +# Apply action "Fold" +action: 0 + +# State 4 +# Round: 1 +# Player: 1 +# Pot: 0 +# Money (p1 p2 ...): 101 99 +# Cards (public p1 p2 ...): -10000 0 3 +# Round 1 sequence: Raise, Fold +# Round 2 sequence: +IsTerminal() = True +History() = [0, 3, 2, 0] +HistoryString() = "0, 3, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "[Observer: 0][Private: 0][Round 1][Player: 1][Pot: 0][Money: 101 99][Round1: 2 0][Round2: ]" +InformationStateString(1) = "[Observer: 1][Private: 3][Round 1][Player: 1][Pot: 0][Money: 101 99][Round1: 2 0][Round2: ]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◉◯◯◯◯◯ +InformationStateTensor(0).community_card: ◯◯◯◯◯◯ +InformationStateTensor(0).betting: +◯◉ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◯◉◯◯ +InformationStateTensor(1).community_card: ◯◯◯◯◯◯ +InformationStateTensor(1).betting: +◯◉ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +◯◯ ◯◯ +ObservationString(0) = "[Observer: 0][Private: 0][Round 1][Player: 1][Pot: 0][Money: 101 99][Ante: 3 1]" +ObservationString(1) = "[Observer: 1][Private: 3][Round 1][Player: 1][Pot: 0][Money: 101 99][Ante: 3 1]" +PublicObservationString() = "[Round 1][Player: 1][Pot: 0][Money: 101 99][Ante: 3 1]" +PrivateObservationString(0) = "[Observer: 0][Private: 0]" +PrivateObservationString(1) = "[Observer: 1][Private: 3]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◉◯◯◯◯◯ +ObservationTensor(0).community_card: ◯◯◯◯◯◯ +ObservationTensor(0).pot_contribution = [3.0, 1.0] +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◯◉◯◯ +ObservationTensor(1).community_card: ◯◯◯◯◯◯ +ObservationTensor(1).pot_contribution = [3.0, 1.0] +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/lewis_signaling.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/lewis_signaling.txt new file mode 100644 index 0000000..8bba18e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/lewis_signaling.txt @@ -0,0 +1,124 @@ +game: lewis_signaling + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Lewis Signaling Game" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["num_messages", "num_states", "payoffs"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "lewis_signaling" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 3 +PolicyTensorShape() = [3] +MaxChanceOutcomes() = 3 +GetParameters() = {num_messages=3,num_states=3,payoffs=1, 0, 0, 0, 1, 0, 0, 0, 1} +NumPlayers() = 2 +MinUtility() = 0.0 +MaxUtility() = 1.0 +UtilitySum() = None +InformationStateTensorShape() = [6] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 6 +ObservationTensorShape() = [6] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 6 +MaxGameLength() = 2 +ToString() = "lewis_signaling()" + +# State 0 +# Initial chance node +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "ChanceNode -- no observation" +InformationStateString(1) = "ChanceNode -- no observation" +InformationStateTensor(0): ◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◯◯ +ObservationString(0) = "ChanceNode -- no observation" +ObservationString(1) = "ChanceNode -- no observation" +ObservationTensor(0): ◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.333333), (1,0.333333), (2,0.333333)] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["State 0", "State 1", "State 2"] + +# Apply action "State 0" +action: 0 + +# State 1 +# State 0 +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Current turn: 0\nState: 0\n" +InformationStateString(1) = "Current turn: 0\nMessage: -1\n" +InformationStateTensor(0): ◉◯◯◉◯◯ +InformationStateTensor(1): ◉◯◯◯◯◯ +ObservationString(0) = "Current turn: 0\nState: 0\n" +ObservationString(1) = "Current turn: 0\nMessage: -1\n" +ObservationTensor(0): ◉◯◯◉◯◯ +ObservationTensor(1): ◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["Message 0", "Message 1", "Message 2"] + +# Apply action "Message 1" +action: 1 + +# State 2 +# State 0, Message 1 +IsTerminal() = False +History() = [0, 1] +HistoryString() = "0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Current turn: 1\nState: 0\n" +InformationStateString(1) = "Current turn: 1\nMessage: 1\n" +InformationStateTensor(0): ◯◉◯◉◯◯ +InformationStateTensor(1): ◯◉◯◯◉◯ +ObservationString(0) = "Current turn: 1\nState: 0\n" +ObservationString(1) = "Current turn: 1\nMessage: 1\n" +ObservationTensor(0): ◯◉◯◉◯◯ +ObservationTensor(1): ◯◉◯◯◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["Action 0", "Action 1", "Action 2"] + +# Apply action "Action 0" +action: 0 + +# State 3 +# State 0, Message 1, Action 0 +IsTerminal() = True +History() = [0, 1, 0] +HistoryString() = "0, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Current turn: 1\nState: 0\n" +InformationStateString(1) = "Current turn: 1\nMessage: 1\n" +InformationStateTensor(0): ◯◉◉◉◯◯ +InformationStateTensor(1): ◯◉◉◯◉◯ +ObservationString(0) = "Current turn: 1\nState: 0\n" +ObservationString(1) = "Current turn: 1\nMessage: 1\n" +ObservationTensor(0): ◯◉◉◉◯◯ +ObservationTensor(1): ◯◉◉◯◉◯ +Rewards() = [1, 1] +Returns() = [1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/liars_dice.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/liars_dice.txt new file mode 100644 index 0000000..7534906 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/liars_dice.txt @@ -0,0 +1,181 @@ +game: liars_dice + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Liars Dice" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["bidding_rule", "dice_sides", "numdice", "players"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = False +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "liars_dice" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 13 +PolicyTensorShape() = [13] +MaxChanceOutcomes() = 6 +GetParameters() = {bidding_rule=reset-face,dice_sides=6,numdice=1,players=2} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [21] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 21 +ObservationTensorShape() = [21] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 21 +MaxGameLength() = 13 +ToString() = "liars_dice()" + +# State 0 +# -1 -1 - chance node, current roller is player 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "-1" +InformationStateString(1) = "-1" +InformationStateTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.166667), (1,0.166667), (2,0.166667), (3,0.166667), (4,0.166667), (5,0.166667)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Roll 1", "Roll 2", "Roll 3", "Roll 4", "Roll 5", "Roll 6"] + +# Apply action "Roll 2" +action: 1 + +# State 1 +# 2 -1 - chance node, current roller is player 1 +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "2" +InformationStateString(1) = "-1" +InformationStateTensor(0): ◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0): ◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.166667), (1,0.166667), (2,0.166667), (3,0.166667), (4,0.166667), (5,0.166667)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Roll 1", "Roll 2", "Roll 3", "Roll 4", "Roll 5", "Roll 6"] + +# Apply action "Roll 5" +action: 4 + +# State 2 +# 2 5 +IsTerminal() = False +History() = [1, 4] +HistoryString() = "1, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "2" +InformationStateString(1) = "5" +InformationStateTensor(0): ◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0): ◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] +StringLegalActions() = ["1-1", "1-2", "1-3", "1-4", "1-5", "1-6", "2-1", "2-2", "2-3", "2-4", "2-5", "2-6"] + +# Apply action "1-2" +action: 1 + +# State 3 +# 2 5 1-2 +IsTerminal() = False +History() = [1, 4, 1] +HistoryString() = "1, 4, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "2 1-2" +InformationStateString(1) = "5 1-2" +InformationStateTensor(0): ◉◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0): ◉◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] +StringLegalActions() = ["1-3", "1-4", "1-5", "1-6", "2-1", "2-2", "2-3", "2-4", "2-5", "2-6", "Liar"] + +# Apply action "1-5" +action: 4 + +# State 4 +# 2 5 1-2 1-5 +IsTerminal() = False +History() = [1, 4, 1, 4] +HistoryString() = "1, 4, 1, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "2 1-2 1-5" +InformationStateString(1) = "5 1-2 1-5" +InformationStateTensor(0): ◉◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◉◯◯◉◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(0): ◉◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◉◯◯◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [5, 6, 7, 8, 9, 10, 11, 12] +StringLegalActions() = ["1-6", "2-1", "2-2", "2-3", "2-4", "2-5", "2-6", "Liar"] + +# Apply action "2-3" +action: 8 + +# State 5 +# 2 5 1-2 1-5 2-3 +IsTerminal() = False +History() = [1, 4, 1, 4, 8] +HistoryString() = "1, 4, 1, 4, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "2 1-2 1-5 2-3" +InformationStateString(1) = "5 1-2 1-5 2-3" +InformationStateTensor(0): ◉◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◉◯◯◉◯◯◉◯◯◯◉◯◯◯◯ +ObservationTensor(0): ◉◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [9, 10, 11, 12] +StringLegalActions() = ["2-4", "2-5", "2-6", "Liar"] + +# Apply action "Liar" +action: 12 + +# State 6 +# 2 5 1-2 1-5 2-3 Liar +IsTerminal() = True +History() = [1, 4, 1, 4, 8, 12] +HistoryString() = "1, 4, 1, 4, 8, 12" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "2 1-2 1-5 2-3 Liar" +InformationStateString(1) = "5 1-2 1-5 2-3 Liar" +InformationStateTensor(0): ◉◯◯◉◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◉ +InformationStateTensor(1): ◯◉◯◯◯◯◉◯◯◉◯◯◉◯◯◯◉◯◯◯◉ +ObservationTensor(0): ◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◉ +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/liars_dice_ir.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/liars_dice_ir.txt new file mode 100644 index 0000000..1b8bc7c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/liars_dice_ir.txt @@ -0,0 +1,129 @@ +game: liars_dice_ir + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Liars Dice with Imperfect Recall" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["bidding_rule", "dice_sides", "numdice", "players", "recall_length"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = False +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "liars_dice_ir" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 13 +PolicyTensorShape() = [13] +MaxChanceOutcomes() = 6 +GetParameters() = {bidding_rule=reset-face,dice_sides=6,numdice=1,players=2,rollout_length=4} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +MaxGameLength() = 13 +ToString() = "liars_dice_ir()" + +# State 0 +# -1 -1 - chance node, current roller is player 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "P0 -1" +InformationStateString(1) = "P1 -1" +ChanceOutcomes() = [(0,0.166667), (1,0.166667), (2,0.166667), (3,0.166667), (4,0.166667), (5,0.166667)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Roll 1", "Roll 2", "Roll 3", "Roll 4", "Roll 5", "Roll 6"] + +# Apply action "Roll 1" +action: 0 + +# State 1 +# 1 -1 - chance node, current roller is player 1 +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "P0 1" +InformationStateString(1) = "P1 -1" +ChanceOutcomes() = [(0,0.166667), (1,0.166667), (2,0.166667), (3,0.166667), (4,0.166667), (5,0.166667)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Roll 1", "Roll 2", "Roll 3", "Roll 4", "Roll 5", "Roll 6"] + +# Apply action "Roll 2" +action: 1 + +# State 2 +# 1 2 +IsTerminal() = False +History() = [0, 1] +HistoryString() = "0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "P0 1" +InformationStateString(1) = "P1 2" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] +StringLegalActions() = ["1-1", "1-2", "1-3", "1-4", "1-5", "1-6", "2-1", "2-2", "2-3", "2-4", "2-5", "2-6"] + +# Apply action "2-1" +action: 6 + +# State 3 +# 1 2 2-1 +IsTerminal() = False +History() = [0, 1, 6] +HistoryString() = "0, 1, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "P0 1 2-1" +InformationStateString(1) = "P1 2 2-1" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [7, 8, 9, 10, 11, 12] +StringLegalActions() = ["2-2", "2-3", "2-4", "2-5", "2-6", "Liar"] + +# Apply action "2-4" +action: 9 + +# State 4 +# 1 2 2-1 2-4 +IsTerminal() = False +History() = [0, 1, 6, 9] +HistoryString() = "0, 1, 6, 9" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "P0 1 2-1 2-4" +InformationStateString(1) = "P1 2 2-1 2-4" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [10, 11, 12] +StringLegalActions() = ["2-5", "2-6", "Liar"] + +# Apply action "Liar" +action: 12 + +# State 5 +# 1 2 2-1 2-4 Liar +IsTerminal() = True +History() = [0, 1, 6, 9, 12] +HistoryString() = "0, 1, 6, 9, 12" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "P0 1 2-1 2-4 Liar" +InformationStateString(1) = "P1 2 2-1 2-4 Liar" +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/maedn.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/maedn.txt new file mode 100644 index 0000000..dca630c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/maedn.txt @@ -0,0 +1,2148 @@ +game: maedn + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Mensch-Aergere-Dich-Nicht" +GameType.max_num_players = 4 +GameType.min_num_players = 2 +GameType.parameter_specification = ["players", "twoPlayersOpposite"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "maedn" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 45 +PolicyTensorShape() = [45] +MaxChanceOutcomes() = 6 +GetParameters() = {players=2,twoPlayersOpposite=True} +NumPlayers() = 2 +MinUtility() = -3.0 +MaxUtility() = 3.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [238] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 238 +MaxGameLength() = 1000 +ToString() = "maedn()" + +# State 0 +# 1 1 o-o-S . . +# 1 1 o . o . . +# o . o +# o . o +# S-o-o-o-o . o-o-o-o-o +# o . . . . . . . . o +# o-o-o-o-o . o-o-o-o-S +# o . o +# o . o +# . . o . o 2 2 +# . . S-o-o 2 2 +# Turn: * +# Dice: +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "1 1 o-o-S . .\n1 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . o 2 2\n. . S-o-o 2 2\nTurn: *\nDice: \n" +ObservationString(1) = "1 1 o-o-S . .\n1 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . o 2 2\n. . S-o-o 2 2\nTurn: *\nDice: \n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ChanceOutcomes() = [(0,0.166667), (1,0.166667), (2,0.166667), (3,0.166667), (4,0.166667), (5,0.166667)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["chance outcome 0 (roll: 1)", "chance outcome 1 (roll: 2)", "chance outcome 2 (roll: 3)", "chance outcome 3 (roll: 4)", "chance outcome 4 (roll: 5)", "chance outcome 5 (roll: 6)"] + +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 1 +# 1 1 o-o-S . . +# 1 1 o . o . . +# o . o +# o . o +# S-o-o-o-o . o-o-o-o-o +# o . . . . . . . . o +# o-o-o-o-o . o-o-o-o-S +# o . o +# o . o +# . . o . o 2 2 +# . . S-o-o 2 2 +# Turn: 1 +# Dice: 4 +IsTerminal() = False +History() = [3] +HistoryString() = "3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1 1 o-o-S . .\n1 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . o 2 2\n. . S-o-o 2 2\nTurn: 1\nDice: 4\n" +ObservationString(1) = "1 1 o-o-S . .\n1 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . o 2 2\n. . S-o-o 2 2\nTurn: 1\nDice: 4\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0] +StringLegalActions() = ["0 - passes"] + +# Apply action "0 - passes" +action: 0 + +# State 2 +# 1 1 o-o-S . . +# 1 1 o . o . . +# o . o +# o . o +# S-o-o-o-o . o-o-o-o-o +# o . . . . . . . . o +# o-o-o-o-o . o-o-o-o-S +# o . o +# o . o +# . . o . o 2 2 +# . . S-o-o 2 2 +# Turn: * +# Dice: +IsTerminal() = False +History() = [3, 0] +HistoryString() = "3, 0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "1 1 o-o-S . .\n1 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . o 2 2\n. . S-o-o 2 2\nTurn: *\nDice: \n" +ObservationString(1) = "1 1 o-o-S . .\n1 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . o 2 2\n. . S-o-o 2 2\nTurn: *\nDice: \n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ChanceOutcomes() = [(0,0.166667), (1,0.166667), (2,0.166667), (3,0.166667), (4,0.166667), (5,0.166667)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["chance outcome 0 (roll: 1)", "chance outcome 1 (roll: 2)", "chance outcome 2 (roll: 3)", "chance outcome 3 (roll: 4)", "chance outcome 4 (roll: 5)", "chance outcome 5 (roll: 6)"] + +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 3 +# 1 1 o-o-S . . +# 1 1 o . o . . +# o . o +# o . o +# S-o-o-o-o . o-o-o-o-o +# o . . . . . . . . o +# o-o-o-o-o . o-o-o-o-S +# o . o +# o . o +# . . o . o 2 2 +# . . S-o-o 2 2 +# Turn: 2 +# Dice: 6 +IsTerminal() = False +History() = [3, 0, 5] +HistoryString() = "3, 0, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1 1 o-o-S . .\n1 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . o 2 2\n. . S-o-o 2 2\nTurn: 2\nDice: 6\n" +ObservationString(1) = "1 1 o-o-S . .\n1 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . o 2 2\n. . S-o-o 2 2\nTurn: 2\nDice: 6\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1] +StringLegalActions() = ["1 - brings in new piece"] + +# Apply action "1 - brings in new piece" +action: 1 + +# State 4 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 5 +# 1 1 o-o-S . . +# 1 1 o . o . . +# o . o +# o . o +# S-o-o-o-o . o-o-o-o-o +# o . . . . . . . . o +# o-o-o-o-o . o-o-o-o-2 +# o . o +# o . o +# . . o . o . 2 +# . . S-o-o 2 2 +# Turn: 2 +# Dice: 4 +IsTerminal() = False +History() = [3, 0, 5, 1, 3] +HistoryString() = "3, 0, 5, 1, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1 1 o-o-S . .\n1 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-o-o-o-o . o-o-o-o-2\n o . o \n o . o \n. . o . o . 2\n. . S-o-o 2 2\nTurn: 2\nDice: 4\n" +ObservationString(1) = "1 1 o-o-S . .\n1 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-o-o-o-o . o-o-o-o-2\n o . o \n o . o \n. . o . o . 2\n. . S-o-o 2 2\nTurn: 2\nDice: 4\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2] +StringLegalActions() = ["2 - moves piece on field 0"] + +# Apply action "2 - moves piece on field 0" +action: 2 + +# State 6 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 7 +# 1 1 o-o-S . . +# 1 1 o . o . . +# o . o +# o . o +# S-o-o-o-o . o-o-o-o-o +# o . . . . . . . . o +# o-o-o-o-o . 2-o-o-o-S +# o . o +# o . o +# . . o . o . 2 +# . . S-o-o 2 2 +# Turn: 1 +# Dice: 4 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3] +HistoryString() = "3, 0, 5, 1, 3, 2, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1 1 o-o-S . .\n1 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-o-o-o-o . 2-o-o-o-S\n o . o \n o . o \n. . o . o . 2\n. . S-o-o 2 2\nTurn: 1\nDice: 4\n" +ObservationString(1) = "1 1 o-o-S . .\n1 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-o-o-o-o . 2-o-o-o-S\n o . o \n o . o \n. . o . o . 2\n. . S-o-o 2 2\nTurn: 1\nDice: 4\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0] +StringLegalActions() = ["0 - passes"] + +# Apply action "0 - passes" +action: 0 + +# State 8 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 9 +# 1 1 o-o-S . . +# 1 1 o . o . . +# o . o +# o . o +# S-o-o-o-o . o-o-o-o-o +# o . . . . . . . . o +# o-o-o-o-o . 2-o-o-o-S +# o . o +# o . o +# . . o . o . 2 +# . . S-o-o 2 2 +# Turn: 2 +# Dice: 5 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1 1 o-o-S . .\n1 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-o-o-o-o . 2-o-o-o-S\n o . o \n o . o \n. . o . o . 2\n. . S-o-o 2 2\nTurn: 2\nDice: 5\n" +ObservationString(1) = "1 1 o-o-S . .\n1 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-o-o-o-o . 2-o-o-o-S\n o . o \n o . o \n. . o . o . 2\n. . S-o-o 2 2\nTurn: 2\nDice: 5\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [6] +StringLegalActions() = ["6 - moves piece on field 4"] + +# Apply action "6 - moves piece on field 4" +action: 6 + +# State 10 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 11 +# 1 1 o-o-S . . +# 1 1 o . o . . +# o . o +# o . o +# S-o-o-o-o . o-o-o-o-o +# o . . . . . . . . o +# o-o-o-o-o . o-o-o-o-S +# o . o +# o . o +# . . o . o . 2 +# . . S-2-o 2 2 +# Turn: 1 +# Dice: 5 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1 1 o-o-S . .\n1 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . o . 2\n. . S-2-o 2 2\nTurn: 1\nDice: 5\n" +ObservationString(1) = "1 1 o-o-S . .\n1 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . o . 2\n. . S-2-o 2 2\nTurn: 1\nDice: 5\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0] +StringLegalActions() = ["0 - passes"] + +# Apply action "0 - passes" +action: 0 + +# State 12 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 13 +# Apply action "11 - moves piece on field 9" +action: 11 + +# State 14 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 15 +# Apply action "0 - passes" +action: 0 + +# State 16 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 17 +# Apply action "1 - brings in new piece" +action: 1 + +# State 18 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 19 +# Apply action "2 - moves piece on field 0" +action: 2 + +# State 20 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 21 +# Apply action "14 - moves piece on field 12" +action: 14 + +# State 22 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 23 +# Apply action "1 - brings in new piece" +action: 1 + +# State 24 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 25 +# Apply action "2 - moves piece on field 0" +action: 2 + +# State 26 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 27 +# Apply action "18 - moves piece on field 16" +action: 18 + +# State 28 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 29 +# Apply action "4 - moves piece on field 2" +action: 4 + +# State 30 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 31 +# Apply action "1 - brings in new piece" +action: 1 + +# State 32 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 33 +# Apply action "2 - moves piece on field 0" +action: 2 + +# State 34 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 35 +# Apply action "9 - moves piece on field 7" +action: 9 + +# State 36 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 37 +# 1 1 o-1-S . . +# . 1 o . o . . +# o . o +# o . o +# S-o-o-o-o . o-o-o-o-o +# o . . . . . . . . o +# o-2-o-o-o . o-2-o-o-S +# o . o +# o . 2 +# . . o . o . . +# . . S-o-o 2 . +# Turn: 2 +# Dice: 2 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1 1 o-1-S . .\n. 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-2-o-o-o . o-2-o-o-S\n o . o \n o . 2 \n. . o . o . .\n. . S-o-o 2 .\nTurn: 2\nDice: 2\n" +ObservationString(1) = "1 1 o-1-S . .\n. 1 o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-2-o-o-o . o-2-o-o-S\n o . o \n o . 2 \n. . o . o . .\n. . S-o-o 2 .\nTurn: 2\nDice: 2\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [5, 8, 19] +StringLegalActions() = ["5 - moves piece on field 3", "8 - moves piece on field 6", "19 - moves piece on field 17"] + +# Apply action "19 - moves piece on field 17" +action: 19 + +# State 38 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 39 +# Apply action "1 - brings in new piece" +action: 1 + +# State 40 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 41 +# Apply action "2 - moves piece on field 0" +action: 2 + +# State 42 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 43 +# Apply action "8 - moves piece on field 6" +action: 8 + +# State 44 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 45 +# 1 1 o-1-S . . +# . . o . o . . +# o . o +# o . o +# S-o-1-o-o . o-o-o-o-o +# 2 . . . . . . . . o +# o-o-o-o-o . o-2-o-o-S +# o . o +# o . o +# . . o . o . . +# . . S-o-2 2 . +# Turn: 1 +# Dice: 2 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1 1 o-1-S . .\n. . o . o . .\n o . o \n o . o \nS-o-1-o-o . o-o-o-o-o\n2 . . . . . . . . o\no-o-o-o-o . o-2-o-o-S\n o . o \n o . o \n. . o . o . .\n. . S-o-2 2 .\nTurn: 1\nDice: 2\n" +ObservationString(1) = "1 1 o-1-S . .\n. . o . o . .\n o . o \n o . o \nS-o-1-o-o . o-o-o-o-o\n2 . . . . . . . . o\no-o-o-o-o . o-2-o-o-S\n o . o \n o . o \n. . o . o . .\n. . S-o-2 2 .\nTurn: 1\nDice: 2\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [4, 11] +StringLegalActions() = ["4 - moves piece on field 2", "11 - moves piece on field 9"] + +# Apply action "11 - moves piece on field 9" +action: 11 + +# State 46 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 47 +# Apply action "1 - brings in new piece" +action: 1 + +# State 48 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 49 +# Apply action "21 - moves piece on field 19" +action: 21 + +# State 50 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 51 +# Apply action "13 - moves piece on field 11" +action: 13 + +# State 52 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 53 +# Apply action "2 - moves piece on field 0" +action: 2 + +# State 54 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 55 +# Apply action "15 - moves piece on field 13" +action: 15 + +# State 56 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 57 +# Apply action "23 - moves piece on field 21" +action: 23 + +# State 58 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 59 +# Apply action "4 - moves piece on field 2" +action: 4 + +# State 60 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 61 +# Apply action "5 - moves piece on field 3" +action: 5 + +# State 62 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 63 +# Apply action "6 - moves piece on field 4" +action: 6 + +# State 64 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 65 +# Apply action "10 - moves piece on field 8" +action: 10 + +# State 66 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 67 +# Apply action "9 - moves piece on field 7" +action: 9 + +# State 68 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 69 +# Apply action "1 - brings in new piece" +action: 1 + +# State 70 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 71 +# Apply action "6 - moves piece on field 4" +action: 6 + +# State 72 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 73 +# Apply action "17 - moves piece on field 15" +action: 17 + +# State 74 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 75 +# 1 1 1-o-S . . +# . . o . o . . +# o . o +# o . o +# S-o-o-o-o . o-o-o-1-o +# o . . . . . . . . o +# o-o-o-o-o . o-o-2-o-2 +# 2 . o +# o . o +# . . o . o . . +# . . S-o-2 . . +# Turn: 2 +# Dice: 6 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1 1 1-o-S . .\n. . o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-1-o\no . . . . . . . . o\no-o-o-o-o . o-o-2-o-2\n 2 . o \n o . o \n. . o . o . .\n. . S-o-2 . .\nTurn: 2\nDice: 6\n" +ObservationString(1) = "1 1 1-o-S . .\n. . o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-1-o\no . . . . . . . . o\no-o-o-o-o . o-o-2-o-2\n 2 . o \n o . o \n. . o . o . .\n. . S-o-2 . .\nTurn: 2\nDice: 6\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2, 10, 15] +StringLegalActions() = ["2 - moves piece on field 0", "10 - moves piece on field 8", "15 - moves piece on field 13"] + +# Apply action "2 - moves piece on field 0" +action: 2 + +# State 76 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 77 +# Apply action "10 - moves piece on field 8" +action: 10 + +# State 78 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 79 +# Apply action "10 - moves piece on field 8" +action: 10 + +# State 80 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 81 +# Apply action "15 - moves piece on field 13" +action: 15 + +# State 82 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 83 +# Apply action "19 - moves piece on field 17" +action: 19 + +# State 84 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 85 +# Apply action "14 - moves piece on field 12" +action: 14 + +# State 86 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 87 +# Apply action "24 - moves piece on field 22" +action: 24 + +# State 88 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 89 +# Apply action "8 - moves piece on field 6" +action: 8 + +# State 90 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 91 +# 1 1 o-1-S . . +# . . o . o . . +# o . o +# o . o +# S-o-o-o-o . o-o-o-o-o +# o . . . . . . . . o +# o-2-o-2-o . 1-o-o-o-S +# o . o +# o . o +# . . o . o . . +# . . S-2-o 2 . +# Turn: 1 +# Dice: 5 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1 1 o-1-S . .\n. . o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-2-o-2-o . 1-o-o-o-S\n o . o \n o . o \n. . o . o . .\n. . S-2-o 2 .\nTurn: 1\nDice: 5\n" +ObservationString(1) = "1 1 o-1-S . .\n. . o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\no-2-o-2-o . 1-o-o-o-S\n o . o \n o . o \n. . o . o . .\n. . S-2-o 2 .\nTurn: 1\nDice: 5\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [11, 26] +StringLegalActions() = ["11 - moves piece on field 9", "26 - moves piece on field 24"] + +# Apply action "26 - moves piece on field 24" +action: 26 + +# State 92 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 93 +# Apply action "17 - moves piece on field 15" +action: 17 + +# State 94 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 95 +# Apply action "11 - moves piece on field 9" +action: 11 + +# State 96 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 97 +# Apply action "19 - moves piece on field 17" +action: 19 + +# State 98 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 99 +# Apply action "1 - brings in new piece" +action: 1 + +# State 100 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 101 +# Apply action "2 - moves piece on field 0" +action: 2 + +# State 102 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 103 +# Apply action "8 - moves piece on field 6" +action: 8 + +# State 104 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 105 +# Apply action "1 - brings in new piece" +action: 1 + +# State 106 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 107 +# Apply action "2 - moves piece on field 0" +action: 2 + +# State 108 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 109 +# Apply action "31 - moves piece on field 29" +action: 31 + +# State 110 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 111 +# Apply action "1 - brings in new piece" +action: 1 + +# State 112 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 113 +# 1 . o-1-S . . +# . . o . o . . +# o . o +# o . o +# S-o-o-o-o . 1-o-o-o-o +# 2 . . . . . . . . o +# 2-o-o-o-o . o-o-o-2-2 +# 1 . o +# o . o +# . . o . o . . +# . . S-o-o . . +# Turn: 2 +# Dice: 2 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1 . o-1-S . .\n. . o . o . .\n o . o \n o . o \nS-o-o-o-o . 1-o-o-o-o\n2 . . . . . . . . o\n2-o-o-o-o . o-o-o-2-2\n 1 . o \n o . o \n. . o . o . .\n. . S-o-o . .\nTurn: 2\nDice: 2\n" +ObservationString(1) = "1 . o-1-S . .\n. . o . o . .\n o . o \n o . o \nS-o-o-o-o . 1-o-o-o-o\n2 . . . . . . . . o\n2-o-o-o-o . o-o-o-2-2\n 1 . o \n o . o \n. . o . o . .\n. . S-o-o . .\nTurn: 2\nDice: 2\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2, 3, 20, 21] +StringLegalActions() = ["2 - moves piece on field 0", "3 - moves piece on field 1", "20 - moves piece on field 18", "21 - moves piece on field 19"] + +# Apply action "21 - moves piece on field 19" +action: 21 + +# State 114 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 115 +# Apply action "35 - moves piece on field 33" +action: 35 + +# State 116 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 117 +# Apply action "2 - moves piece on field 0" +action: 2 + +# State 118 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 119 +# Apply action "11 - moves piece on field 9" +action: 11 + +# State 120 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 121 +# Apply action "3 - moves piece on field 1" +action: 3 + +# State 122 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 123 +# Apply action "13 - moves piece on field 11" +action: 13 + +# State 124 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 125 +# Apply action "8 - moves piece on field 6" +action: 8 + +# State 126 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 127 +# Apply action "16 - moves piece on field 14" +action: 16 + +# State 128 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 129 +# Apply action "7 - moves piece on field 5" +action: 7 + +# State 130 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 131 +# 1 . o-o-S . . +# . . o . o . . +# o . o +# o . o +# S-2-o-o-o . o-1-o-o-o +# o . . . . . . . . 1 +# 1-o-o-o-o . o-o-o-o-S +# o . o +# o . o +# . . o . 2 . . +# . . S-o-2 2 . +# Turn: 1 +# Dice: 1 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1 . o-o-S . .\n. . o . o . .\n o . o \n o . o \nS-2-o-o-o . o-1-o-o-o\no . . . . . . . . 1\n1-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . 2 . .\n. . S-o-2 2 .\nTurn: 1\nDice: 1\n" +ObservationString(1) = "1 . o-o-S . .\n. . o . o . .\n o . o \n o . o \nS-2-o-o-o . o-1-o-o-o\no . . . . . . . . 1\n1-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . 2 . .\n. . S-o-2 2 .\nTurn: 1\nDice: 1\n" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◉◯◯◯◉◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉◯◉◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [17, 21, 40] +StringLegalActions() = ["17 - moves piece on field 15", "21 - moves piece on field 19", "40 - moves piece on field 38"] + +# Apply action "17 - moves piece on field 15" +action: 17 + +# State 132 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 133 +# Apply action "1 - brings in new piece" +action: 1 + +# State 134 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 135 +# Apply action "10 - moves piece on field 8" +action: 10 + +# State 136 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 137 +# Apply action "21 - moves piece on field 19" +action: 21 + +# State 138 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 139 +# Apply action "9 - moves piece on field 7" +action: 9 + +# State 140 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 141 +# Apply action "18 - moves piece on field 16" +action: 18 + +# State 142 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 143 +# Apply action "1 - brings in new piece" +action: 1 + +# State 144 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 145 +# Apply action "23 - moves piece on field 21" +action: 23 + +# State 146 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 147 +# 1 1 o-o-S . . +# . . 2 . o . . +# o . o +# o . o +# S-o-o-o-o . o-o-o-o-o +# o . . . . . . . . o +# 1-o-o-o-o . 1-o-o-o-2 +# o . o +# o . o +# . . o . o . . +# . . 2-2-o . . +# Turn: 2 +# Dice: 6 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1 1 o-o-S . .\n. . 2 . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\n1-o-o-o-o . 1-o-o-o-2\n o . o \n o . o \n. . o . o . .\n. . 2-2-o . .\nTurn: 2\nDice: 6\n" +ObservationString(1) = "1 1 o-o-S . .\n. . 2 . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . . . . . . . o\n1-o-o-o-o . 1-o-o-o-2\n o . o \n o . o \n. . o . o . .\n. . 2-2-o . .\nTurn: 2\nDice: 6\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2, 11, 12, 29] +StringLegalActions() = ["2 - moves piece on field 0", "11 - moves piece on field 9", "12 - moves piece on field 10", "29 - moves piece on field 27"] + +# Apply action "29 - moves piece on field 27" +action: 29 + +# State 148 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 149 +# Apply action "35 - moves piece on field 33" +action: 35 + +# State 150 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 151 +# Apply action "40 - moves piece on field 38" +action: 40 + +# State 152 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 153 +# Apply action "38 - moves piece on field 36" +action: 38 + +# State 154 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 155 +# Apply action "2 - moves piece on field 0" +action: 2 + +# State 156 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 157 +# Apply action "1 - brings in new piece" +action: 1 + +# State 158 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 159 +# Apply action "2 - moves piece on field 0" +action: 2 + +# State 160 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 161 +# Apply action "11 - moves piece on field 9" +action: 11 + +# State 162 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 163 +# Apply action "6 - moves piece on field 4" +action: 6 + +# State 164 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 165 +# Apply action "5 - moves piece on field 3" +action: 5 + +# State 166 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 167 +# Apply action "17 - moves piece on field 15" +action: 17 + +# State 168 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 169 +# Apply action "1 - brings in new piece" +action: 1 + +# State 170 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 171 +# Apply action "2 - moves piece on field 0" +action: 2 + +# State 172 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 173 +# Apply action "12 - moves piece on field 10" +action: 12 + +# State 174 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 175 +# Apply action "8 - moves piece on field 6" +action: 8 + +# State 176 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 177 +# Apply action "14 - moves piece on field 12" +action: 14 + +# State 178 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 179 +# 1 . o-o-S . . +# . . 1 . o . . +# o . o +# o . o +# S-o-o-o-1 . o-o-o-o-o +# o . . 1 . . 2 . . o +# o-o-o-o-2 . o-o-o-o-S +# o . o +# o . 2 +# . . o . o . . +# . . S-o-o 2 . +# Turn: 1 +# Dice: 2 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1 . o-o-S . .\n. . 1 . o . .\n o . o \n o . o \nS-o-o-o-1 . o-o-o-o-o\no . . 1 . . 2 . . o\no-o-o-o-2 . o-o-o-o-S\n o . o \n o . 2 \n. . o . o . .\n. . S-o-o 2 .\nTurn: 1\nDice: 2\n" +ObservationString(1) = "1 . o-o-S . .\n. . 1 . o . .\n o . o \n o . o \nS-o-o-o-1 . o-o-o-o-o\no . . 1 . . 2 . . o\no-o-o-o-2 . o-o-o-o-S\n o . o \n o . 2 \n. . o . o . .\n. . S-o-o 2 .\nTurn: 1\nDice: 2\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◉◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◉◯◉◯◯◯◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [6, 9] +StringLegalActions() = ["6 - moves piece on field 4", "9 - moves piece on field 7"] + +# Apply action "6 - moves piece on field 4" +action: 6 + +# State 180 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 181 +# Apply action "1 - brings in new piece" +action: 1 + +# State 182 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 183 +# 1 . o-o-S . . +# . . 1 . o . . +# 1 . o +# o . o +# S-o-o-o-o . o-o-o-o-o +# o . . 1 . . 2 . . o +# o-o-o-o-2 . o-o-o-o-2 +# o . o +# o . 2 +# . . o . o . . +# . . S-o-o . . +# Turn: 2 +# Dice: 2 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1 . o-o-S . .\n. . 1 . o . .\n 1 . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . 1 . . 2 . . o\no-o-o-o-2 . o-o-o-o-2\n o . o \n o . 2 \n. . o . o . .\n. . S-o-o . .\nTurn: 2\nDice: 2\n" +ObservationString(1) = "1 . o-o-S . .\n. . 1 . o . .\n 1 . o \n o . o \nS-o-o-o-o . o-o-o-o-o\no . . 1 . . 2 . . o\no-o-o-o-2 . o-o-o-o-2\n o . o \n o . 2 \n. . o . o . .\n. . S-o-o . .\nTurn: 2\nDice: 2\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2, 8, 16] +StringLegalActions() = ["2 - moves piece on field 0", "8 - moves piece on field 6", "16 - moves piece on field 14"] + +# Apply action "16 - moves piece on field 14" +action: 16 + +# State 184 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 185 +# Apply action "9 - moves piece on field 7" +action: 9 + +# State 186 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 187 +# Apply action "8 - moves piece on field 6" +action: 8 + +# State 188 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 189 +# Apply action "11 - moves piece on field 9" +action: 11 + +# State 190 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 191 +# Apply action "13 - moves piece on field 11" +action: 13 + +# State 192 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 193 +# Apply action "8 - moves piece on field 6" +action: 8 + +# State 194 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 195 +# Apply action "18 - moves piece on field 16" +action: 18 + +# State 196 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 197 +# Apply action "15 - moves piece on field 13" +action: 15 + +# State 198 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 199 +# Apply action "20 - moves piece on field 18" +action: 20 + +# State 200 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 201 +# Apply action "1 - brings in new piece" +action: 1 + +# State 202 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 203 +# Apply action "2 - moves piece on field 0" +action: 2 + +# State 204 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 205 +# Apply action "2 - moves piece on field 0" +action: 2 + +# State 206 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 207 +# Apply action "13 - moves piece on field 11" +action: 13 + +# State 208 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 209 +# Apply action "14 - moves piece on field 12" +action: 14 + +# State 210 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 211 +# Apply action "5 - moves piece on field 3" +action: 5 + +# State 212 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 213 +# Apply action "4 - moves piece on field 2" +action: 4 + +# State 214 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 215 +# Apply action "18 - moves piece on field 16" +action: 18 + +# State 216 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 217 +# Apply action "15 - moves piece on field 13" +action: 15 + +# State 218 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 219 +# . . o-o-S . . +# . . o . o . . +# 1 . o +# o . o +# S-o-o-o-o . o-1-o-o-1 +# o . . 1 . . 2 . . o +# o-o-o-o-2 . o-2-o-o-S +# o . o +# o . o +# . . o . o . . +# . . S-o-o 2 . +# Turn: 1 +# Dice: 1 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = ". . o-o-S . .\n. . o . o . .\n 1 . o \n o . o \nS-o-o-o-o . o-1-o-o-1\no . . 1 . . 2 . . o\no-o-o-o-2 . o-2-o-o-S\n o . o \n o . o \n. . o . o . .\n. . S-o-o 2 .\nTurn: 1\nDice: 1\n" +ObservationString(1) = ". . o-o-S . .\n. . o . o . .\n 1 . o \n o . o \nS-o-o-o-o . o-1-o-o-1\no . . 1 . . 2 . . o\no-o-o-o-2 . o-2-o-o-S\n o . o \n o . o \n. . o . o . .\n. . S-o-o 2 .\nTurn: 1\nDice: 1\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◉◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [8, 17, 20, 44] +StringLegalActions() = ["8 - moves piece on field 6", "17 - moves piece on field 15", "20 - moves piece on field 18", "44 - moves piece on field 42"] + +# Apply action "44 - moves piece on field 42" +action: 44 + +# State 220 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 221 +# Apply action "16 - moves piece on field 14" +action: 16 + +# State 222 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 223 +# Apply action "8 - moves piece on field 6" +action: 8 + +# State 224 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 225 +# Apply action "20 - moves piece on field 18" +action: 20 + +# State 226 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 227 +# Apply action "17 - moves piece on field 15" +action: 17 + +# State 228 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 229 +# . . o-o-S . . +# . . o . o . . +# o . 1 +# o . o +# S-o-o-o-o . o-o-o-o-1 +# o . . . 1 . 2 . . o +# o-o-2-o-o . 1-2-o-o-S +# o . o +# o . o +# . . o . o . . +# . . S-o-o 2 . +# Turn: 2 +# Dice: 6 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0, 44, 1, 16, 5, 8, 5, 20, 2, 17, 5] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0, 44, 1, 16, 5, 8, 5, 20, 2, 17, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = ". . o-o-S . .\n. . o . o . .\n o . 1 \n o . o \nS-o-o-o-o . o-o-o-o-1\no . . . 1 . 2 . . o\no-o-2-o-o . 1-2-o-o-S\n o . o \n o . o \n. . o . o . .\n. . S-o-o 2 .\nTurn: 2\nDice: 6\n" +ObservationString(1) = ". . o-o-S . .\n. . o . o . .\n o . 1 \n o . o \nS-o-o-o-o . o-o-o-o-1\no . . . 1 . 2 . . o\no-o-2-o-o . 1-2-o-o-S\n o . o \n o . o \n. . o . o . .\n. . S-o-o 2 .\nTurn: 2\nDice: 6\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◉ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1] +StringLegalActions() = ["1 - brings in new piece"] + +# Apply action "1 - brings in new piece" +action: 1 + +# State 230 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 231 +# Apply action "2 - moves piece on field 0" +action: 2 + +# State 232 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 233 +# Apply action "26 - moves piece on field 24" +action: 26 + +# State 234 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 235 +# Apply action "7 - moves piece on field 5" +action: 7 + +# State 236 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 237 +# Apply action "31 - moves piece on field 29" +action: 31 + +# State 238 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 239 +# Apply action "5 - moves piece on field 3" +action: 5 + +# State 240 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 241 +# Apply action "14 - moves piece on field 12" +action: 14 + +# State 242 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 243 +# Apply action "18 - moves piece on field 16" +action: 18 + +# State 244 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 245 +# Apply action "16 - moves piece on field 14" +action: 16 + +# State 246 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 247 +# Apply action "22 - moves piece on field 20" +action: 22 + +# State 248 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 249 +# Apply action "33 - moves piece on field 31" +action: 33 + +# State 250 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 251 +# Apply action "39 - moves piece on field 37" +action: 39 + +# State 252 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 253 +# Apply action "7 - moves piece on field 5" +action: 7 + +# State 254 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 255 +# . . o-o-S . . +# . . o . o . . +# o . o +# o . o +# S-o-2-o-o . o-o-o-1-1 +# o . . . 1 . 2 . . o +# 1-o-o-o-o . o-o-o-o-S +# o . o +# o . o +# . . o . 2 . . +# . . 2-o-o . . +# Turn: 1 +# Dice: 3 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0, 44, 1, 16, 5, 8, 5, 20, 2, 17, 5, 1, 4, 2, 4, 26, 4, 7, 1, 31, 1, 5, 1, 14, 3, 18, 2, 16, 1, 22, 5, 33, 0, 39, 1, 7, 2] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0, 44, 1, 16, 5, 8, 5, 20, 2, 17, 5, 1, 4, 2, 4, 26, 4, 7, 1, 31, 1, 5, 1, 14, 3, 18, 2, 16, 1, 22, 5, 33, 0, 39, 1, 7, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = ". . o-o-S . .\n. . o . o . .\n o . o \n o . o \nS-o-2-o-o . o-o-o-1-1\no . . . 1 . 2 . . o\n1-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . 2 . .\n. . 2-o-o . .\nTurn: 1\nDice: 3\n" +ObservationString(1) = ". . o-o-S . .\n. . o . o . .\n o . o \n o . o \nS-o-2-o-o . o-o-o-1-1\no . . . 1 . 2 . . o\n1-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . 2 . .\n. . 2-o-o . .\nTurn: 1\nDice: 3\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [19, 20, 40] +StringLegalActions() = ["19 - moves piece on field 17", "20 - moves piece on field 18", "40 - moves piece on field 38"] + +# Apply action "40 - moves piece on field 38" +action: 40 + +# State 256 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 257 +# Apply action "12 - moves piece on field 10" +action: 12 + +# State 258 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 259 +# Apply action "9 - moves piece on field 7" +action: 9 + +# State 260 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 261 +# Apply action "18 - moves piece on field 16" +action: 18 + +# State 262 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 263 +# Apply action "20 - moves piece on field 18" +action: 20 + +# State 264 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 265 +# . . o-o-S . . +# . . o . o . . +# o . o +# o . o +# S-o-2-o-o . o-o-o-1-o +# 2 . 1 . 1 . 2 . . o +# o-o-o-o-o . o-o-1-o-S +# 2 . o +# o . o +# . . o . o . . +# . . S-o-o . . +# Turn: 2 +# Dice: 1 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0, 44, 1, 16, 5, 8, 5, 20, 2, 17, 5, 1, 4, 2, 4, 26, 4, 7, 1, 31, 1, 5, 1, 14, 3, 18, 2, 16, 1, 22, 5, 33, 0, 39, 1, 7, 2, 40, 5, 12, 5, 9, 2, 18, 3, 20, 0] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0, 44, 1, 16, 5, 8, 5, 20, 2, 17, 5, 1, 4, 2, 4, 26, 4, 7, 1, 31, 1, 5, 1, 14, 3, 18, 2, 16, 1, 22, 5, 33, 0, 39, 1, 7, 2, 40, 5, 12, 5, 9, 2, 18, 3, 20, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = ". . o-o-S . .\n. . o . o . .\n o . o \n o . o \nS-o-2-o-o . o-o-o-1-o\n2 . 1 . 1 . 2 . . o\no-o-o-o-o . o-o-1-o-S\n 2 . o \n o . o \n. . o . o . .\n. . S-o-o . .\nTurn: 2\nDice: 1\n" +ObservationString(1) = ". . o-o-S . .\n. . o . o . .\n o . o \n o . o \nS-o-2-o-o . o-o-o-1-o\n2 . 1 . 1 . 2 . . o\no-o-o-o-o . o-o-1-o-S\n 2 . o \n o . o \n. . o . o . .\n. . S-o-o . .\nTurn: 2\nDice: 1\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [15, 21, 24, 44] +StringLegalActions() = ["15 - moves piece on field 13", "21 - moves piece on field 19", "24 - moves piece on field 22", "44 - moves piece on field 42"] + +# Apply action "24 - moves piece on field 22" +action: 24 + +# State 266 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 267 +# Apply action "24 - moves piece on field 22" +action: 24 + +# State 268 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 269 +# Apply action "15 - moves piece on field 13" +action: 15 + +# State 270 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 271 +# Apply action "29 - moves piece on field 27" +action: 29 + +# State 272 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 273 +# Apply action "21 - moves piece on field 19" +action: 21 + +# State 274 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 275 +# Apply action "25 - moves piece on field 23" +action: 25 + +# State 276 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 277 +# Apply action "19 - moves piece on field 17" +action: 19 + +# State 278 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 279 +# Apply action "30 - moves piece on field 28" +action: 30 + +# State 280 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 281 +# Apply action "19 - moves piece on field 17" +action: 19 + +# State 282 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 283 +# Apply action "22 - moves piece on field 20" +action: 22 + +# State 284 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 285 +# Apply action "20 - moves piece on field 18" +action: 20 + +# State 286 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 287 +# Apply action "34 - moves piece on field 32" +action: 34 + +# State 288 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 289 +# Apply action "27 - moves piece on field 25" +action: 27 + +# State 290 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 291 +# Apply action "36 - moves piece on field 34" +action: 36 + +# State 292 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 293 +# Apply action "27 - moves piece on field 25" +action: 27 + +# State 294 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 295 +# Apply action "21 - moves piece on field 19" +action: 21 + +# State 296 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 297 +# Apply action "31 - moves piece on field 29" +action: 31 + +# State 298 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 299 +# . . o-o-S . . +# . . o . 2 . . +# o . o +# o . o +# S-o-o-2-o . o-o-o-2-o +# o . 1 . 1 . 2 . . o +# o-o-1-o-o . o-o-o-o-S +# 1 . o +# o . o +# . . o . o . . +# . . S-o-o . . +# Turn: 2 +# Dice: 1 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0, 44, 1, 16, 5, 8, 5, 20, 2, 17, 5, 1, 4, 2, 4, 26, 4, 7, 1, 31, 1, 5, 1, 14, 3, 18, 2, 16, 1, 22, 5, 33, 0, 39, 1, 7, 2, 40, 5, 12, 5, 9, 2, 18, 3, 20, 0, 24, 4, 24, 3, 15, 4, 29, 5, 21, 4, 25, 2, 19, 5, 30, 0, 19, 4, 22, 0, 20, 3, 34, 5, 27, 2, 36, 3, 27, 3, 21, 3, 31, 0] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0, 44, 1, 16, 5, 8, 5, 20, 2, 17, 5, 1, 4, 2, 4, 26, 4, 7, 1, 31, 1, 5, 1, 14, 3, 18, 2, 16, 1, 22, 5, 33, 0, 39, 1, 7, 2, 40, 5, 12, 5, 9, 2, 18, 3, 20, 0, 24, 4, 24, 3, 15, 4, 29, 5, 21, 4, 25, 2, 19, 5, 30, 0, 19, 4, 22, 0, 20, 3, 34, 5, 27, 2, 36, 3, 27, 3, 21, 3, 31, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = ". . o-o-S . .\n. . o . 2 . .\n o . o \n o . o \nS-o-o-2-o . o-o-o-2-o\no . 1 . 1 . 2 . . o\no-o-1-o-o . o-o-o-o-S\n 1 . o \n o . o \n. . o . o . .\n. . S-o-o . .\nTurn: 2\nDice: 1\n" +ObservationString(1) = ". . o-o-S . .\n. . o . 2 . .\n o . o \n o . o \nS-o-o-2-o . o-o-o-2-o\no . 1 . 1 . 2 . . o\no-o-1-o-o . o-o-o-o-S\n 1 . o \n o . o \n. . o . o . .\n. . S-o-o . .\nTurn: 2\nDice: 1\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [25, 33, 39, 44] +StringLegalActions() = ["25 - moves piece on field 23", "33 - moves piece on field 31", "39 - moves piece on field 37", "44 - moves piece on field 42"] + +# Apply action "25 - moves piece on field 23" +action: 25 + +# State 300 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 301 +# Apply action "35 - moves piece on field 33" +action: 35 + +# State 302 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 303 +# Apply action "33 - moves piece on field 31" +action: 33 + +# State 304 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 305 +# . . o-o-S . . +# . . o . o . . +# o . o +# o . o +# S-o-o-o-2 . o-2-o-2-o +# o . 1 . 1 . 2 . . o +# o-o-1-o-1 . o-o-o-o-S +# o . o +# o . o +# . . o . o . . +# . . S-o-o . . +# Turn: 1 +# Dice: 2 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0, 44, 1, 16, 5, 8, 5, 20, 2, 17, 5, 1, 4, 2, 4, 26, 4, 7, 1, 31, 1, 5, 1, 14, 3, 18, 2, 16, 1, 22, 5, 33, 0, 39, 1, 7, 2, 40, 5, 12, 5, 9, 2, 18, 3, 20, 0, 24, 4, 24, 3, 15, 4, 29, 5, 21, 4, 25, 2, 19, 5, 30, 0, 19, 4, 22, 0, 20, 3, 34, 5, 27, 2, 36, 3, 27, 3, 21, 3, 31, 0, 25, 0, 35, 3, 33, 1] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0, 44, 1, 16, 5, 8, 5, 20, 2, 17, 5, 1, 4, 2, 4, 26, 4, 7, 1, 31, 1, 5, 1, 14, 3, 18, 2, 16, 1, 22, 5, 33, 0, 39, 1, 7, 2, 40, 5, 12, 5, 9, 2, 18, 3, 20, 0, 24, 4, 24, 3, 15, 4, 29, 5, 21, 4, 25, 2, 19, 5, 30, 0, 19, 4, 22, 0, 20, 3, 34, 5, 27, 2, 36, 3, 27, 3, 21, 3, 31, 0, 25, 0, 35, 3, 33, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = ". . o-o-S . .\n. . o . o . .\n o . o \n o . o \nS-o-o-o-2 . o-2-o-2-o\no . 1 . 1 . 2 . . o\no-o-1-o-1 . o-o-o-o-S\n o . o \n o . o \n. . o . o . .\n. . S-o-o . .\nTurn: 1\nDice: 2\n" +ObservationString(1) = ". . o-o-S . .\n. . o . o . .\n o . o \n o . o \nS-o-o-o-2 . o-2-o-2-o\no . 1 . 1 . 2 . . o\no-o-1-o-1 . o-o-o-o-S\n o . o \n o . o \n. . o . o . .\n. . S-o-o . .\nTurn: 1\nDice: 2\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [38] +StringLegalActions() = ["38 - moves piece on field 36"] + +# Apply action "38 - moves piece on field 36" +action: 38 + +# State 306 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 307 +# Apply action "26 - moves piece on field 24" +action: 26 + +# State 308 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 309 +# Apply action "36 - moves piece on field 34" +action: 36 + +# State 310 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 311 +# Apply action "44 - moves piece on field 42" +action: 44 + +# State 312 +# Apply action "chance outcome 5 (roll: 6)" +action: 5 + +# State 313 +# Apply action "38 - moves piece on field 36" +action: 38 + +# State 314 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 315 +# Apply action "40 - moves piece on field 38" +action: 40 + +# State 316 +# Apply action "chance outcome 4 (roll: 5)" +action: 4 + +# State 317 +# Apply action "39 - moves piece on field 37" +action: 39 + +# State 318 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 319 +# Apply action "0 - passes" +action: 0 + +# State 320 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 321 +# Apply action "27 - moves piece on field 25" +action: 27 + +# State 322 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 323 +# Apply action "0 - passes" +action: 0 + +# State 324 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 325 +# Apply action "37 - moves piece on field 35" +action: 37 + +# State 326 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 327 +# Apply action "0 - passes" +action: 0 + +# State 328 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 329 +# Apply action "39 - moves piece on field 37" +action: 39 + +# State 330 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 331 +# Apply action "0 - passes" +action: 0 + +# State 332 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 333 +# Apply action "31 - moves piece on field 29" +action: 31 + +# State 334 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 335 +# Apply action "0 - passes" +action: 0 + +# State 336 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 337 +# Apply action "35 - moves piece on field 33" +action: 35 + +# State 338 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 339 +# Apply action "0 - passes" +action: 0 + +# State 340 +# Apply action "chance outcome 1 (roll: 2)" +action: 1 + +# State 341 +# . . o-o-S . . +# . . o . o . . +# o . o +# o . o +# S-o-o-o-o . o-o-2-o-o +# 1 . 1 1 1 2 2 . 2 o +# o-o-o-o-o . o-o-o-o-S +# o . o +# o . o +# . . o . o . . +# . . S-o-o . . +# Turn: 2 +# Dice: 2 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0, 44, 1, 16, 5, 8, 5, 20, 2, 17, 5, 1, 4, 2, 4, 26, 4, 7, 1, 31, 1, 5, 1, 14, 3, 18, 2, 16, 1, 22, 5, 33, 0, 39, 1, 7, 2, 40, 5, 12, 5, 9, 2, 18, 3, 20, 0, 24, 4, 24, 3, 15, 4, 29, 5, 21, 4, 25, 2, 19, 5, 30, 0, 19, 4, 22, 0, 20, 3, 34, 5, 27, 2, 36, 3, 27, 3, 21, 3, 31, 0, 25, 0, 35, 3, 33, 1, 38, 0, 26, 1, 36, 0, 44, 5, 38, 0, 40, 4, 39, 2, 0, 3, 27, 2, 0, 1, 37, 1, 0, 2, 39, 3, 0, 3, 31, 2, 0, 2, 35, 3, 0, 1] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0, 44, 1, 16, 5, 8, 5, 20, 2, 17, 5, 1, 4, 2, 4, 26, 4, 7, 1, 31, 1, 5, 1, 14, 3, 18, 2, 16, 1, 22, 5, 33, 0, 39, 1, 7, 2, 40, 5, 12, 5, 9, 2, 18, 3, 20, 0, 24, 4, 24, 3, 15, 4, 29, 5, 21, 4, 25, 2, 19, 5, 30, 0, 19, 4, 22, 0, 20, 3, 34, 5, 27, 2, 36, 3, 27, 3, 21, 3, 31, 0, 25, 0, 35, 3, 33, 1, 38, 0, 26, 1, 36, 0, 44, 5, 38, 0, 40, 4, 39, 2, 0, 3, 27, 2, 0, 1, 37, 1, 0, 2, 39, 3, 0, 3, 31, 2, 0, 2, 35, 3, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = ". . o-o-S . .\n. . o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-2-o-o\n1 . 1 1 1 2 2 . 2 o\no-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . o . .\n. . S-o-o . .\nTurn: 2\nDice: 2\n" +ObservationString(1) = ". . o-o-S . .\n. . o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-2-o-o\n1 . 1 1 1 2 2 . 2 o\no-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . o . .\n. . S-o-o . .\nTurn: 2\nDice: 2\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [38] +StringLegalActions() = ["38 - moves piece on field 36"] + +# Apply action "38 - moves piece on field 36" +action: 38 + +# State 342 +# Apply action "chance outcome 2 (roll: 3)" +action: 2 + +# State 343 +# . . o-o-S . . +# . . o . o . . +# o . o +# o . o +# S-o-o-o-o . o-o-o-o-2 +# 1 . 1 1 1 2 2 . 2 o +# o-o-o-o-o . o-o-o-o-S +# o . o +# o . o +# . . o . o . . +# . . S-o-o . . +# Turn: 1 +# Dice: 3 +IsTerminal() = False +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0, 44, 1, 16, 5, 8, 5, 20, 2, 17, 5, 1, 4, 2, 4, 26, 4, 7, 1, 31, 1, 5, 1, 14, 3, 18, 2, 16, 1, 22, 5, 33, 0, 39, 1, 7, 2, 40, 5, 12, 5, 9, 2, 18, 3, 20, 0, 24, 4, 24, 3, 15, 4, 29, 5, 21, 4, 25, 2, 19, 5, 30, 0, 19, 4, 22, 0, 20, 3, 34, 5, 27, 2, 36, 3, 27, 3, 21, 3, 31, 0, 25, 0, 35, 3, 33, 1, 38, 0, 26, 1, 36, 0, 44, 5, 38, 0, 40, 4, 39, 2, 0, 3, 27, 2, 0, 1, 37, 1, 0, 2, 39, 3, 0, 3, 31, 2, 0, 2, 35, 3, 0, 1, 38, 2] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0, 44, 1, 16, 5, 8, 5, 20, 2, 17, 5, 1, 4, 2, 4, 26, 4, 7, 1, 31, 1, 5, 1, 14, 3, 18, 2, 16, 1, 22, 5, 33, 0, 39, 1, 7, 2, 40, 5, 12, 5, 9, 2, 18, 3, 20, 0, 24, 4, 24, 3, 15, 4, 29, 5, 21, 4, 25, 2, 19, 5, 30, 0, 19, 4, 22, 0, 20, 3, 34, 5, 27, 2, 36, 3, 27, 3, 21, 3, 31, 0, 25, 0, 35, 3, 33, 1, 38, 0, 26, 1, 36, 0, 44, 5, 38, 0, 40, 4, 39, 2, 0, 3, 27, 2, 0, 1, 37, 1, 0, 2, 39, 3, 0, 3, 31, 2, 0, 2, 35, 3, 0, 1, 38, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = ". . o-o-S . .\n. . o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-2\n1 . 1 1 1 2 2 . 2 o\no-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . o . .\n. . S-o-o . .\nTurn: 1\nDice: 3\n" +ObservationString(1) = ". . o-o-S . .\n. . o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-2\n1 . 1 1 1 2 2 . 2 o\no-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . o . .\n. . S-o-o . .\nTurn: 1\nDice: 3\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0] +StringLegalActions() = ["0 - passes"] + +# Apply action "0 - passes" +action: 0 + +# State 344 +# Apply action "chance outcome 3 (roll: 4)" +action: 3 + +# State 345 +# Apply action "0 - passes" +action: 0 + +# State 346 +# Apply action "chance outcome 0 (roll: 1)" +action: 0 + +# State 347 +# Apply action "41 - moves piece on field 39" +action: 41 + +# State 348 +# . . o-o-S . . +# . . o . o . . +# o . o +# o . o +# S-o-o-o-o . o-o-o-o-2 +# o 1 1 1 1 2 2 . 2 o +# o-o-o-o-o . o-o-o-o-S +# o . o +# o . o +# . . o . o . . +# . . S-o-o . . +# Turn: * +# Dice: +IsTerminal() = True +History() = [3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0, 44, 1, 16, 5, 8, 5, 20, 2, 17, 5, 1, 4, 2, 4, 26, 4, 7, 1, 31, 1, 5, 1, 14, 3, 18, 2, 16, 1, 22, 5, 33, 0, 39, 1, 7, 2, 40, 5, 12, 5, 9, 2, 18, 3, 20, 0, 24, 4, 24, 3, 15, 4, 29, 5, 21, 4, 25, 2, 19, 5, 30, 0, 19, 4, 22, 0, 20, 3, 34, 5, 27, 2, 36, 3, 27, 3, 21, 3, 31, 0, 25, 0, 35, 3, 33, 1, 38, 0, 26, 1, 36, 0, 44, 5, 38, 0, 40, 4, 39, 2, 0, 3, 27, 2, 0, 1, 37, 1, 0, 2, 39, 3, 0, 3, 31, 2, 0, 2, 35, 3, 0, 1, 38, 2, 0, 3, 0, 0, 41] +HistoryString() = "3, 0, 5, 1, 3, 2, 3, 0, 4, 6, 4, 0, 2, 11, 4, 0, 5, 1, 5, 2, 3, 14, 5, 1, 1, 2, 0, 18, 4, 4, 5, 1, 2, 2, 1, 9, 1, 19, 5, 1, 1, 2, 1, 8, 1, 11, 5, 1, 1, 21, 1, 13, 1, 2, 1, 15, 2, 23, 1, 4, 0, 5, 2, 6, 4, 10, 0, 9, 5, 1, 3, 6, 1, 17, 5, 2, 3, 10, 0, 10, 1, 15, 4, 19, 4, 14, 1, 24, 2, 8, 4, 26, 3, 17, 4, 11, 0, 19, 5, 1, 5, 2, 2, 8, 5, 1, 0, 2, 3, 31, 5, 1, 1, 21, 4, 35, 4, 2, 1, 11, 4, 3, 3, 13, 0, 8, 4, 16, 2, 7, 0, 17, 5, 1, 1, 10, 4, 21, 1, 9, 3, 18, 5, 1, 5, 23, 5, 29, 2, 35, 3, 40, 5, 38, 3, 2, 5, 1, 2, 2, 5, 11, 1, 6, 2, 5, 4, 17, 5, 1, 3, 2, 1, 12, 0, 8, 1, 14, 1, 6, 5, 1, 1, 16, 1, 9, 4, 8, 3, 11, 0, 13, 4, 8, 1, 18, 2, 15, 1, 20, 5, 1, 2, 2, 1, 2, 3, 13, 0, 14, 2, 5, 0, 4, 1, 18, 0, 15, 0, 44, 1, 16, 5, 8, 5, 20, 2, 17, 5, 1, 4, 2, 4, 26, 4, 7, 1, 31, 1, 5, 1, 14, 3, 18, 2, 16, 1, 22, 5, 33, 0, 39, 1, 7, 2, 40, 5, 12, 5, 9, 2, 18, 3, 20, 0, 24, 4, 24, 3, 15, 4, 29, 5, 21, 4, 25, 2, 19, 5, 30, 0, 19, 4, 22, 0, 20, 3, 34, 5, 27, 2, 36, 3, 27, 3, 21, 3, 31, 0, 25, 0, 35, 3, 33, 1, 38, 0, 26, 1, 36, 0, 44, 5, 38, 0, 40, 4, 39, 2, 0, 3, 27, 2, 0, 1, 37, 1, 0, 2, 39, 3, 0, 3, 31, 2, 0, 2, 35, 3, 0, 1, 38, 2, 0, 3, 0, 0, 41" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = ". . o-o-S . .\n. . o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-2\no 1 1 1 1 2 2 . 2 o\no-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . o . .\n. . S-o-o . .\nTurn: *\nDice: \n" +ObservationString(1) = ". . o-o-S . .\n. . o . o . .\n o . o \n o . o \nS-o-o-o-o . o-o-o-o-2\no 1 1 1 1 2 2 . 2 o\no-o-o-o-o . o-o-o-o-S\n o . o \n o . o \n. . o . o . .\n. . S-o-o . .\nTurn: *\nDice: \n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mancala.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mancala.txt new file mode 100644 index 0000000..9205862 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mancala.txt @@ -0,0 +1,312 @@ +game: mancala + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Mancala" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "mancala" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 14 +PolicyTensorShape() = [14] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [14] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 14 +MaxGameLength() = 1000 +ToString() = "mancala()" + +# State 0 +# -4-4-4-4-4-4- +# 0-----------0 +# -4-4-4-4-4-4- +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "-4-4-4-4-4-4-\n0-----------0\n-4-4-4-4-4-4-" +ObservationString(1) = "-4-4-4-4-4-4-\n0-----------0\n-4-4-4-4-4-4-" +ObservationTensor(0) = [0.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 0.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0] +ObservationTensor(1) = [0.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 0.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 3, 4, 5, 6] +StringLegalActions() = ["1", "2", "3", "4", "5", "6"] + +# Apply action "2" +action: 2 + +# State 1 +# -4-4-4-4-4-4- +# 0-----------0 +# -4-0-5-5-5-5- +IsTerminal() = False +History() = [2] +HistoryString() = "2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "-4-4-4-4-4-4-\n0-----------0\n-4-0-5-5-5-5-" +ObservationString(1) = "-4-4-4-4-4-4-\n0-----------0\n-4-0-5-5-5-5-" +ObservationTensor(0) = [0.0, 4.0, 0.0, 5.0, 5.0, 5.0, 5.0, 0.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0] +ObservationTensor(1) = [0.0, 4.0, 0.0, 5.0, 5.0, 5.0, 5.0, 0.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [8, 9, 10, 11, 12, 13] +StringLegalActions() = ["8", "9", "10", "11", "12", "13"] + +# Apply action "12" +action: 12 + +# State 2 +# -5-0-4-4-4-4- +# 1-----------0 +# -5-1-5-5-5-5- +IsTerminal() = False +History() = [2, 12] +HistoryString() = "2, 12" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "-5-0-4-4-4-4-\n1-----------0\n-5-1-5-5-5-5-" +ObservationString(1) = "-5-0-4-4-4-4-\n1-----------0\n-5-1-5-5-5-5-" +ObservationTensor(0) = [1.0, 5.0, 1.0, 5.0, 5.0, 5.0, 5.0, 0.0, 4.0, 4.0, 4.0, 4.0, 0.0, 5.0] +ObservationTensor(1) = [1.0, 5.0, 1.0, 5.0, 5.0, 5.0, 5.0, 0.0, 4.0, 4.0, 4.0, 4.0, 0.0, 5.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 3, 4, 5, 6] +StringLegalActions() = ["1", "2", "3", "4", "5", "6"] + +# Apply action "4" +action: 4 + +# State 3 +# -5-0-4-4-5-5- +# 1-----------1 +# -5-1-5-0-6-6- +IsTerminal() = False +History() = [2, 12, 4] +HistoryString() = "2, 12, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "-5-0-4-4-5-5-\n1-----------1\n-5-1-5-0-6-6-" +ObservationString(1) = "-5-0-4-4-5-5-\n1-----------1\n-5-1-5-0-6-6-" +ObservationTensor(0) = [1.0, 5.0, 1.0, 5.0, 0.0, 6.0, 6.0, 1.0, 5.0, 5.0, 4.0, 4.0, 0.0, 5.0] +ObservationTensor(1) = [1.0, 5.0, 1.0, 5.0, 0.0, 6.0, 6.0, 1.0, 5.0, 5.0, 4.0, 4.0, 0.0, 5.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [8, 9, 10, 11, 13] +StringLegalActions() = ["8", "9", "10", "11", "13"] + +# Apply action "9" +action: 9 + +# State 4 +# -6-1-5-5-0-5- +# 2-----------1 +# -5-1-5-0-6-6- +IsTerminal() = False +History() = [2, 12, 4, 9] +HistoryString() = "2, 12, 4, 9" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "-6-1-5-5-0-5-\n2-----------1\n-5-1-5-0-6-6-" +ObservationString(1) = "-6-1-5-5-0-5-\n2-----------1\n-5-1-5-0-6-6-" +ObservationTensor(0) = [2.0, 5.0, 1.0, 5.0, 0.0, 6.0, 6.0, 1.0, 5.0, 0.0, 5.0, 5.0, 1.0, 6.0] +ObservationTensor(1) = [2.0, 5.0, 1.0, 5.0, 0.0, 6.0, 6.0, 1.0, 5.0, 0.0, 5.0, 5.0, 1.0, 6.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [8, 10, 11, 12, 13] +StringLegalActions() = ["8", "10", "11", "12", "13"] + +# Apply action "8" +action: 8 + +# State 5 +# -7-2-6-6-1-0- +# 2-----------1 +# -5-1-5-0-6-6- +IsTerminal() = False +History() = [2, 12, 4, 9, 8] +HistoryString() = "2, 12, 4, 9, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "-7-2-6-6-1-0-\n2-----------1\n-5-1-5-0-6-6-" +ObservationString(1) = "-7-2-6-6-1-0-\n2-----------1\n-5-1-5-0-6-6-" +ObservationTensor(0) = [2.0, 5.0, 1.0, 5.0, 0.0, 6.0, 6.0, 1.0, 0.0, 1.0, 6.0, 6.0, 2.0, 7.0] +ObservationTensor(1) = [2.0, 5.0, 1.0, 5.0, 0.0, 6.0, 6.0, 1.0, 0.0, 1.0, 6.0, 6.0, 2.0, 7.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 3, 5, 6] +StringLegalActions() = ["1", "2", "3", "5", "6"] + +# Apply action "3" +action: 3 + +# State 6 +# Apply action "12" +action: 12 + +# State 7 +# Apply action "13" +action: 13 + +# State 8 +# Apply action "6" +action: 6 + +# State 9 +# Apply action "8" +action: 8 + +# State 10 +# Apply action "5" +action: 5 + +# State 11 +# Apply action "12" +action: 12 + +# State 12 +# Apply action "9" +action: 9 + +# State 13 +# Apply action "4" +action: 4 + +# State 14 +# Apply action "11" +action: 11 + +# State 15 +# Apply action "4" +action: 4 + +# State 16 +# Apply action "8" +action: 8 + +# State 17 +# Apply action "1" +action: 1 + +# State 18 +# -5-2-0-11-2-1- +# 6-----------5 +# -0-4-3-1-4-4- +IsTerminal() = False +History() = [2, 12, 4, 9, 8, 3, 12, 13, 6, 8, 5, 12, 9, 4, 11, 4, 8, 1] +HistoryString() = "2, 12, 4, 9, 8, 3, 12, 13, 6, 8, 5, 12, 9, 4, 11, 4, 8, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "-5-2-0-11-2-1-\n6-----------5\n-0-4-3-1-4-4-" +ObservationString(1) = "-5-2-0-11-2-1-\n6-----------5\n-0-4-3-1-4-4-" +ObservationTensor(0) = [6.0, 0.0, 4.0, 3.0, 1.0, 4.0, 4.0, 5.0, 1.0, 2.0, 11.0, 0.0, 2.0, 5.0] +ObservationTensor(1) = [6.0, 0.0, 4.0, 3.0, 1.0, 4.0, 4.0, 5.0, 1.0, 2.0, 11.0, 0.0, 2.0, 5.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [8, 9, 10, 12, 13] +StringLegalActions() = ["8", "9", "10", "12", "13"] + +# Apply action "12" +action: 12 + +# State 19 +# Apply action "8" +action: 8 + +# State 20 +# Apply action "3" +action: 3 + +# State 21 +# Apply action "9" +action: 9 + +# State 22 +# Apply action "4" +action: 4 + +# State 23 +# Apply action "11" +action: 11 + +# State 24 +# -6-1-0-12-0-0- +# 12-----------5 +# -0-0-0-0-6-6- +IsTerminal() = False +History() = [2, 12, 4, 9, 8, 3, 12, 13, 6, 8, 5, 12, 9, 4, 11, 4, 8, 1, 12, 8, 3, 9, 4, 11] +HistoryString() = "2, 12, 4, 9, 8, 3, 12, 13, 6, 8, 5, 12, 9, 4, 11, 4, 8, 1, 12, 8, 3, 9, 4, 11" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "-6-1-0-12-0-0-\n12-----------5\n-0-0-0-0-6-6-" +ObservationString(1) = "-6-1-0-12-0-0-\n12-----------5\n-0-0-0-0-6-6-" +ObservationTensor(0) = [12.0, 0.0, 0.0, 0.0, 0.0, 6.0, 6.0, 5.0, 0.0, 0.0, 12.0, 0.0, 1.0, 6.0] +ObservationTensor(1) = [12.0, 0.0, 0.0, 0.0, 0.0, 6.0, 6.0, 5.0, 0.0, 0.0, 12.0, 0.0, 1.0, 6.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [5, 6] +StringLegalActions() = ["5", "6"] + +# Apply action "6" +action: 6 + +# State 25 +# Apply action "9" +action: 9 + +# State 26 +# Apply action "5" +action: 5 + +# State 27 +# Apply action "12" +action: 12 + +# State 28 +# Apply action "11" +action: 11 + +# State 29 +# Apply action "6" +action: 6 + +# State 30 +# -8-1-0-15-1-2- +# 13-----------8 +# -0-0-0-0-0-0- +IsTerminal() = True +History() = [2, 12, 4, 9, 8, 3, 12, 13, 6, 8, 5, 12, 9, 4, 11, 4, 8, 1, 12, 8, 3, 9, 4, 11, 6, 9, 5, 12, 11, 6] +HistoryString() = "2, 12, 4, 9, 8, 3, 12, 13, 6, 8, 5, 12, 9, 4, 11, 4, 8, 1, 12, 8, 3, 9, 4, 11, 6, 9, 5, 12, 11, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "-8-1-0-15-1-2-\n13-----------8\n-0-0-0-0-0-0-" +ObservationString(1) = "-8-1-0-15-1-2-\n13-----------8\n-0-0-0-0-0-0-" +ObservationTensor(0) = [13.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 2.0, 1.0, 15.0, 0.0, 1.0, 8.0] +ObservationTensor(1) = [13.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 2.0, 1.0, 15.0, 0.0, 1.0, 8.0] +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/markov_soccer.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/markov_soccer.txt new file mode 100644 index 0000000..28ca391 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/markov_soccer.txt @@ -0,0 +1,319 @@ +game: markov_soccer(horizon=20) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Markov Soccer" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["grid", "horizon"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "markov_soccer" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 5 +PolicyTensorShape() = [5] +MaxChanceOutcomes() = 4 +GetParameters() = {grid=.....\n..OB.\n.AO..\n.....,horizon=20} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [6, 4, 5] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 120 +MaxGameLength() = 20 +ToString() = "markov_soccer(horizon=20)" + +# State 0 +# ..... +# ...b. +# .a... +# ..... +# Chance Node +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = ".....\n...b.\n.a...\n.....\nChance Node" +ObservationString(1) = ".....\n...b.\n.a...\n.....\nChance Node" +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ +◯◉◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◯◉◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ +◯◉◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◯◉◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +ChanceOutcomes() = [(2,0.5), (3,0.5)] +LegalActions() = [2, 3] +StringLegalActions() = ["(ball at 1,2)", "(ball at 2,2)"] + +# Apply action "(ball at 2,2)" +action: 3 + +# State 1 +# ..... +# ...b. +# .aO.. +# ..... +IsTerminal() = False +History() = [3] +HistoryString() = "3" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = ".....\n...b.\n.aO..\n.....\n" +ObservationString(1) = ".....\n...b.\n.aO..\n.....\n" +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ +◯◉◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◯◯ ◉◯◯◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ +◯◉◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◯◯ ◉◯◯◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3, 4] +LegalActions(1) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["up", "down", "left", "right", "stand"] +StringLegalActions(1) = ["up", "down", "left", "right", "stand"] + +# Apply joint action ["right", "up"] +actions: [3, 0] + +# State 2 +# ..... +# ...b. +# .aO.. +# ..... +# Chance Node +IsTerminal() = False +History() = [3, 3, 0] +HistoryString() = "3, 3, 0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = ".....\n...b.\n.aO..\n.....\nChance Node" +ObservationString(1) = ".....\n...b.\n.aO..\n.....\nChance Node" +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ +◯◉◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◯◯ ◉◯◯◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ +◯◉◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◯◯ ◉◯◯◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +ChanceOutcomes() = [(0,0.5), (1,0.5)] +LegalActions() = [0, 1] +StringLegalActions() = ["(A's action first)", "(B's action first)"] + +# Apply action "(B's action first)" +action: 1 + +# State 3 +# ...b. +# ..... +# ..A.. +# ..... +IsTerminal() = False +History() = [3, 3, 0, 1] +HistoryString() = "3, 3, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "...b.\n.....\n..A..\n.....\n" +ObservationString(1) = "...b.\n.....\n..A..\n.....\n" +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◉◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◯◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◉◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◯◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3, 4] +LegalActions(1) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["up", "down", "left", "right", "stand"] +StringLegalActions(1) = ["up", "down", "left", "right", "stand"] + +# Apply joint action ["down", "stand"] +actions: [1, 4] + +# State 4 +# Apply action "(B's action first)" +action: 1 + +# State 5 +# ...b. +# ..... +# ..... +# ..A.. +IsTerminal() = False +History() = [3, 3, 0, 1, 1, 4, 1] +HistoryString() = "3, 3, 0, 1, 1, 4, 1" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "...b.\n.....\n.....\n..A..\n" +ObservationString(1) = "...b.\n.....\n.....\n..A..\n" +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◉◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◯◉◉ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◉◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◯◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3, 4] +LegalActions(1) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["up", "down", "left", "right", "stand"] +StringLegalActions(1) = ["up", "down", "left", "right", "stand"] + +# Apply joint action ["down", "up"] +actions: [1, 0] + +# State 6 +# Apply action "(B's action first)" +action: 1 + +# State 7 +# Apply joint action ["right", "left"] +actions: [3, 2] + +# State 8 +# Apply action "(A's action first)" +action: 0 + +# State 9 +# Apply joint action ["down", "left"] +actions: [1, 2] + +# State 10 +# Apply action "(B's action first)" +action: 1 + +# State 11 +# Apply joint action ["down", "right"] +actions: [1, 3] + +# State 12 +# Apply action "(A's action first)" +action: 0 + +# State 13 +# Apply joint action ["stand", "stand"] +actions: [4, 4] + +# State 14 +# Apply action "(A's action first)" +action: 0 + +# State 15 +# Apply joint action ["right", "down"] +actions: [3, 1] + +# State 16 +# Apply action "(A's action first)" +action: 0 + +# State 17 +# Apply joint action ["up", "stand"] +actions: [0, 4] + +# State 18 +# Apply action "(A's action first)" +action: 0 + +# State 19 +# Apply joint action ["up", "right"] +actions: [0, 3] + +# State 20 +# Apply action "(B's action first)" +action: 1 + +# State 21 +# ..... +# ...bA +# ..... +# ..... +IsTerminal() = False +History() = [3, 3, 0, 1, 1, 4, 1, 1, 0, 1, 3, 2, 0, 1, 2, 1, 1, 3, 0, 4, 4, 0, 3, 1, 0, 0, 4, 0, 0, 3, 1] +HistoryString() = "3, 3, 0, 1, 1, 4, 1, 1, 0, 1, 3, 2, 0, 1, 2, 1, 1, 3, 0, 4, 4, 0, 3, 1, 0, 0, 4, 0, 0, 3, 1" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = ".....\n...bA\n.....\n.....\n" +ObservationString(1) = ".....\n...bA\n.....\n.....\n" +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◯◯◉ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◯◯◉ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◯ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3, 4] +LegalActions(1) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["up", "down", "left", "right", "stand"] +StringLegalActions(1) = ["up", "down", "left", "right", "stand"] + +# Apply joint action ["right", "down"] +actions: [3, 1] + +# State 22 +# Apply action "(A's action first)" +action: 0 + +# State 23 +# ..... +# ..... +# ...b. +# ..... +IsTerminal() = True +History() = [3, 3, 0, 1, 1, 4, 1, 1, 0, 1, 3, 2, 0, 1, 2, 1, 1, 3, 0, 4, 4, 0, 3, 1, 0, 0, 4, 0, 0, 3, 1, 3, 1, 0] +HistoryString() = "3, 3, 0, 1, 1, 4, 1, 1, 0, 1, 3, 2, 0, 1, 2, 1, 1, 3, 0, 4, 4, 0, 3, 1, 0, 0, 4, 0, 0, 3, 1, 3, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = ".....\n.....\n...b.\n.....\n" +ObservationString(1) = ".....\n.....\n...b.\n.....\n" +ObservationTensor(0): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◯◉ +◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matching_pennies_3p.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matching_pennies_3p.txt new file mode 100644 index 0000000..9bc8a3b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matching_pennies_3p.txt @@ -0,0 +1,89 @@ +game: matching_pennies_3p + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.ONE_SHOT +GameType.long_name = "Three-Player Matching Pennies" +GameType.max_num_players = 3 +GameType.min_num_players = 3 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "matching_pennies_3p" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 3 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = None +InformationStateTensorShape() = [1] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 1 +ObservationTensorShape() = [1] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 1 +MaxGameLength() = 1 +ToString() = "matching_pennies_3p()" + +# State 0 +# Normal form game default NFGState::ToString. Non-terminal +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "Observing player: 0. Non-terminal" +InformationStateString(1) = "Observing player: 1. Non-terminal" +InformationStateString(2) = "Observing player: 2. Non-terminal" +InformationStateTensor(0): ◯ +InformationStateTensor(1): ◯ +InformationStateTensor(2): ◯ +ObservationString(0) = "Non-terminal" +ObservationString(1) = "Non-terminal" +ObservationString(2) = "Non-terminal" +ObservationTensor(0): ◯ +ObservationTensor(1): ◯ +ObservationTensor(2): ◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions(0) = [0, 1] +LegalActions(1) = [0, 1] +LegalActions(2) = [0, 1] +StringLegalActions(0) = ["Heads", "Tails"] +StringLegalActions(1) = ["Heads", "Tails"] +StringLegalActions(2) = ["Heads", "Tails"] + +# Apply joint action ["Tails", "Tails", "Heads"] +actions: [1, 1, 0] + +# State 1 +# Normal form game default NFGState::ToString. Terminal, history: 1, 1, 0, returns: 1,-1,1 +IsTerminal() = True +History() = [1, 1, 0] +HistoryString() = "1, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Observing player: 0. Terminal. History string: 1, 1, 0" +InformationStateString(1) = "Observing player: 1. Terminal. History string: 1, 1, 0" +InformationStateString(2) = "Observing player: 2. Terminal. History string: 1, 1, 0" +InformationStateTensor(0): ◉ +InformationStateTensor(1): ◉ +InformationStateTensor(2): ◉ +ObservationString(0) = "Terminal. History string: 1, 1, 0" +ObservationString(1) = "Terminal. History string: 1, 1, 0" +ObservationString(2) = "Terminal. History string: 1, 1, 0" +ObservationTensor(0): ◉ +ObservationTensor(1): ◉ +ObservationTensor(2): ◉ +Rewards() = [1, -1, 1] +Returns() = [1, -1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_bos.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_bos.txt new file mode 100644 index 0000000..d949716 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_bos.txt @@ -0,0 +1,91 @@ +game: matrix_bos + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.ONE_SHOT +GameType.long_name = "Bach or Stravinsky" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "matrix_bos" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = 0.0 +MaxUtility() = 3.0 +UtilitySum() = None +InformationStateTensorShape() = [1] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 1 +ObservationTensorShape() = [1] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 1 +MaxGameLength() = 1 +ToString() = "matrix_bos()" + +# State 0 +# Terminal? false +# Row actions: Bach Stravinsky +# Col actions: Bach Stravinsky +# Utility matrix: +# 3,2 0,0 +# 0,0 2,3 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "Observing player: 0. Non-terminal" +InformationStateString(1) = "Observing player: 1. Non-terminal" +InformationStateTensor(0): ◯ +InformationStateTensor(1): ◯ +ObservationString(0) = "Non-terminal" +ObservationString(1) = "Non-terminal" +ObservationTensor(0): ◯ +ObservationTensor(1): ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1] +LegalActions(1) = [0, 1] +StringLegalActions(0) = ["Bach", "Stravinsky"] +StringLegalActions(1) = ["Bach", "Stravinsky"] + +# Apply joint action ["Stravinsky", "Bach"] +actions: [1, 0] + +# State 1 +# Terminal? true +# History: 1, 0 +# Returns: 0,0 +# Row actions: +# Col actions: +# Utility matrix: +# 3,2 0,0 +# 0,0 2,3 +IsTerminal() = True +History() = [1, 0] +HistoryString() = "1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Observing player: 0. Terminal. History string: 1, 0" +InformationStateString(1) = "Observing player: 1. Terminal. History string: 1, 0" +InformationStateTensor(0): ◉ +InformationStateTensor(1): ◉ +ObservationString(0) = "Terminal. History string: 1, 0" +ObservationString(1) = "Terminal. History string: 1, 0" +ObservationTensor(0): ◉ +ObservationTensor(1): ◉ +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_brps.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_brps.txt new file mode 100644 index 0000000..f26f343 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_brps.txt @@ -0,0 +1,93 @@ +game: matrix_brps + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.ONE_SHOT +GameType.long_name = "Biased Rock, Paper, Scissors" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "matrix_brps" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 3 +PolicyTensorShape() = [3] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = -50.0 +MaxUtility() = 50.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [1] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 1 +ObservationTensorShape() = [1] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 1 +MaxGameLength() = 1 +ToString() = "matrix_brps()" + +# State 0 +# Terminal? false +# Row actions: Rock Paper Scissors +# Col actions: Rock Paper Scissors +# Utility matrix: +# 0,0 -25,25 50,-50 +# 25,-25 0,0 -5,5 +# -50,50 5,-5 0,0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "Observing player: 0. Non-terminal" +InformationStateString(1) = "Observing player: 1. Non-terminal" +InformationStateTensor(0): ◯ +InformationStateTensor(1): ◯ +ObservationString(0) = "Non-terminal" +ObservationString(1) = "Non-terminal" +ObservationTensor(0): ◯ +ObservationTensor(1): ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2] +LegalActions(1) = [0, 1, 2] +StringLegalActions(0) = ["Rock", "Paper", "Scissors"] +StringLegalActions(1) = ["Rock", "Paper", "Scissors"] + +# Apply joint action ["Paper", "Paper"] +actions: [1, 1] + +# State 1 +# Terminal? true +# History: 1, 1 +# Returns: 0,0 +# Row actions: +# Col actions: +# Utility matrix: +# 0,0 -25,25 50,-50 +# 25,-25 0,0 -5,5 +# -50,50 5,-5 0,0 +IsTerminal() = True +History() = [1, 1] +HistoryString() = "1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Observing player: 0. Terminal. History string: 1, 1" +InformationStateString(1) = "Observing player: 1. Terminal. History string: 1, 1" +InformationStateTensor(0): ◉ +InformationStateTensor(1): ◉ +ObservationString(0) = "Terminal. History string: 1, 1" +ObservationString(1) = "Terminal. History string: 1, 1" +ObservationTensor(0): ◉ +ObservationTensor(1): ◉ +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_cd.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_cd.txt new file mode 100644 index 0000000..6f2529d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_cd.txt @@ -0,0 +1,91 @@ +game: matrix_cd + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.ONE_SHOT +GameType.long_name = "Chicken-Dare" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "matrix_cd" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = 0.0 +MaxUtility() = 4.0 +UtilitySum() = None +InformationStateTensorShape() = [1] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 1 +ObservationTensorShape() = [1] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 1 +MaxGameLength() = 1 +ToString() = "matrix_cd()" + +# State 0 +# Terminal? false +# Row actions: Dare Chicken +# Col actions: Dare Chicken +# Utility matrix: +# 0,0 4,1 +# 1,4 3,3 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "Observing player: 0. Non-terminal" +InformationStateString(1) = "Observing player: 1. Non-terminal" +InformationStateTensor(0): ◯ +InformationStateTensor(1): ◯ +ObservationString(0) = "Non-terminal" +ObservationString(1) = "Non-terminal" +ObservationTensor(0): ◯ +ObservationTensor(1): ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1] +LegalActions(1) = [0, 1] +StringLegalActions(0) = ["Dare", "Chicken"] +StringLegalActions(1) = ["Dare", "Chicken"] + +# Apply joint action ["Dare", "Chicken"] +actions: [0, 1] + +# State 1 +# Terminal? true +# History: 0, 1 +# Returns: 4,1 +# Row actions: +# Col actions: +# Utility matrix: +# 0,0 4,1 +# 1,4 3,3 +IsTerminal() = True +History() = [0, 1] +HistoryString() = "0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Observing player: 0. Terminal. History string: 0, 1" +InformationStateString(1) = "Observing player: 1. Terminal. History string: 0, 1" +InformationStateTensor(0): ◉ +InformationStateTensor(1): ◉ +ObservationString(0) = "Terminal. History string: 0, 1" +ObservationString(1) = "Terminal. History string: 0, 1" +ObservationTensor(0): ◉ +ObservationTensor(1): ◉ +Rewards() = [4, 1] +Returns() = [4, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_coordination.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_coordination.txt new file mode 100644 index 0000000..29549ba --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_coordination.txt @@ -0,0 +1,91 @@ +game: matrix_coordination + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.ONE_SHOT +GameType.long_name = "Coordination" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "matrix_coordination" +GameType.utility = Utility.IDENTICAL + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = 0.0 +MaxUtility() = 1.0 +UtilitySum() = None +InformationStateTensorShape() = [1] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 1 +ObservationTensorShape() = [1] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 1 +MaxGameLength() = 1 +ToString() = "matrix_coordination()" + +# State 0 +# Terminal? false +# Row actions: Left Right +# Col actions: Left Right +# Utility matrix: +# 1,1 0,0 +# 0,0 1,1 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "Observing player: 0. Non-terminal" +InformationStateString(1) = "Observing player: 1. Non-terminal" +InformationStateTensor(0): ◯ +InformationStateTensor(1): ◯ +ObservationString(0) = "Non-terminal" +ObservationString(1) = "Non-terminal" +ObservationTensor(0): ◯ +ObservationTensor(1): ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1] +LegalActions(1) = [0, 1] +StringLegalActions(0) = ["Left", "Right"] +StringLegalActions(1) = ["Left", "Right"] + +# Apply joint action ["Right", "Left"] +actions: [1, 0] + +# State 1 +# Terminal? true +# History: 1, 0 +# Returns: 0,0 +# Row actions: +# Col actions: +# Utility matrix: +# 1,1 0,0 +# 0,0 1,1 +IsTerminal() = True +History() = [1, 0] +HistoryString() = "1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Observing player: 0. Terminal. History string: 1, 0" +InformationStateString(1) = "Observing player: 1. Terminal. History string: 1, 0" +InformationStateTensor(0): ◉ +InformationStateTensor(1): ◉ +ObservationString(0) = "Terminal. History string: 1, 0" +ObservationString(1) = "Terminal. History string: 1, 0" +ObservationTensor(0): ◉ +ObservationTensor(1): ◉ +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_mp.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_mp.txt new file mode 100644 index 0000000..a9888e5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_mp.txt @@ -0,0 +1,91 @@ +game: matrix_mp + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.ONE_SHOT +GameType.long_name = "Matching Pennies" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "matrix_mp" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [1] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 1 +ObservationTensorShape() = [1] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 1 +MaxGameLength() = 1 +ToString() = "matrix_mp()" + +# State 0 +# Terminal? false +# Row actions: Heads Tails +# Col actions: Heads Tails +# Utility matrix: +# 1,-1 -1,1 +# -1,1 1,-1 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "Observing player: 0. Non-terminal" +InformationStateString(1) = "Observing player: 1. Non-terminal" +InformationStateTensor(0): ◯ +InformationStateTensor(1): ◯ +ObservationString(0) = "Non-terminal" +ObservationString(1) = "Non-terminal" +ObservationTensor(0): ◯ +ObservationTensor(1): ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1] +LegalActions(1) = [0, 1] +StringLegalActions(0) = ["Heads", "Tails"] +StringLegalActions(1) = ["Heads", "Tails"] + +# Apply joint action ["Tails", "Tails"] +actions: [1, 1] + +# State 1 +# Terminal? true +# History: 1, 1 +# Returns: 1,-1 +# Row actions: +# Col actions: +# Utility matrix: +# 1,-1 -1,1 +# -1,1 1,-1 +IsTerminal() = True +History() = [1, 1] +HistoryString() = "1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Observing player: 0. Terminal. History string: 1, 1" +InformationStateString(1) = "Observing player: 1. Terminal. History string: 1, 1" +InformationStateTensor(0): ◉ +InformationStateTensor(1): ◉ +ObservationString(0) = "Terminal. History string: 1, 1" +ObservationString(1) = "Terminal. History string: 1, 1" +ObservationTensor(0): ◉ +ObservationTensor(1): ◉ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_pd.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_pd.txt new file mode 100644 index 0000000..92a8b8a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_pd.txt @@ -0,0 +1,91 @@ +game: matrix_pd + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.ONE_SHOT +GameType.long_name = "Prisoner's Dilemma" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "matrix_pd" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = 0.0 +MaxUtility() = 10.0 +UtilitySum() = None +InformationStateTensorShape() = [1] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 1 +ObservationTensorShape() = [1] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 1 +MaxGameLength() = 1 +ToString() = "matrix_pd()" + +# State 0 +# Terminal? false +# Row actions: Cooperate Defect +# Col actions: Cooperate Defect +# Utility matrix: +# 5,5 0,10 +# 10,0 1,1 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "Observing player: 0. Non-terminal" +InformationStateString(1) = "Observing player: 1. Non-terminal" +InformationStateTensor(0): ◯ +InformationStateTensor(1): ◯ +ObservationString(0) = "Non-terminal" +ObservationString(1) = "Non-terminal" +ObservationTensor(0): ◯ +ObservationTensor(1): ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1] +LegalActions(1) = [0, 1] +StringLegalActions(0) = ["Cooperate", "Defect"] +StringLegalActions(1) = ["Cooperate", "Defect"] + +# Apply joint action ["Defect", "Cooperate"] +actions: [1, 0] + +# State 1 +# Terminal? true +# History: 1, 0 +# Returns: 10,0 +# Row actions: +# Col actions: +# Utility matrix: +# 5,5 0,10 +# 10,0 1,1 +IsTerminal() = True +History() = [1, 0] +HistoryString() = "1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Observing player: 0. Terminal. History string: 1, 0" +InformationStateString(1) = "Observing player: 1. Terminal. History string: 1, 0" +InformationStateTensor(0): ◉ +InformationStateTensor(1): ◉ +ObservationString(0) = "Terminal. History string: 1, 0" +ObservationString(1) = "Terminal. History string: 1, 0" +ObservationTensor(0): ◉ +ObservationTensor(1): ◉ +Rewards() = [10, 0] +Returns() = [10, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_rps.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_rps.txt new file mode 100644 index 0000000..6ee5e56 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_rps.txt @@ -0,0 +1,93 @@ +game: matrix_rps + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.ONE_SHOT +GameType.long_name = "Rock, Paper, Scissors" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "matrix_rps" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 3 +PolicyTensorShape() = [3] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [1] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 1 +ObservationTensorShape() = [1] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 1 +MaxGameLength() = 1 +ToString() = "matrix_rps()" + +# State 0 +# Terminal? false +# Row actions: Rock Paper Scissors +# Col actions: Rock Paper Scissors +# Utility matrix: +# 0,0 -1,1 1,-1 +# 1,-1 0,0 -1,1 +# -1,1 1,-1 0,0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "Observing player: 0. Non-terminal" +InformationStateString(1) = "Observing player: 1. Non-terminal" +InformationStateTensor(0): ◯ +InformationStateTensor(1): ◯ +ObservationString(0) = "Non-terminal" +ObservationString(1) = "Non-terminal" +ObservationTensor(0): ◯ +ObservationTensor(1): ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2] +LegalActions(1) = [0, 1, 2] +StringLegalActions(0) = ["Rock", "Paper", "Scissors"] +StringLegalActions(1) = ["Rock", "Paper", "Scissors"] + +# Apply joint action ["Scissors", "Rock"] +actions: [2, 0] + +# State 1 +# Terminal? true +# History: 2, 0 +# Returns: -1,1 +# Row actions: +# Col actions: +# Utility matrix: +# 0,0 -1,1 1,-1 +# 1,-1 0,0 -1,1 +# -1,1 1,-1 0,0 +IsTerminal() = True +History() = [2, 0] +HistoryString() = "2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Observing player: 0. Terminal. History string: 2, 0" +InformationStateString(1) = "Observing player: 1. Terminal. History string: 2, 0" +InformationStateTensor(0): ◉ +InformationStateTensor(1): ◉ +ObservationString(0) = "Terminal. History string: 2, 0" +ObservationString(1) = "Terminal. History string: 2, 0" +ObservationTensor(0): ◉ +ObservationTensor(1): ◉ +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_rpsw.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_rpsw.txt new file mode 100644 index 0000000..8a23232 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_rpsw.txt @@ -0,0 +1,95 @@ +game: matrix_rpsw + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.ONE_SHOT +GameType.long_name = "Rock, Paper, Scissors, Water" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "matrix_rpsw" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 4 +PolicyTensorShape() = [4] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [1] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 1 +ObservationTensorShape() = [1] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 1 +MaxGameLength() = 1 +ToString() = "matrix_rpsw()" + +# State 0 +# Terminal? false +# Row actions: Rock Paper Scissors Water +# Col actions: Rock Paper Scissors Water +# Utility matrix: +# 0,0 -1,1 1,-1 0,0 +# 1,-1 0,0 -1,1 0,0 +# -1,1 1,-1 0,0 0,0 +# 0,0 0,0 0,0 0,0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "Observing player: 0. Non-terminal" +InformationStateString(1) = "Observing player: 1. Non-terminal" +InformationStateTensor(0): ◯ +InformationStateTensor(1): ◯ +ObservationString(0) = "Non-terminal" +ObservationString(1) = "Non-terminal" +ObservationTensor(0): ◯ +ObservationTensor(1): ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3] +LegalActions(1) = [0, 1, 2, 3] +StringLegalActions(0) = ["Rock", "Paper", "Scissors", "Water"] +StringLegalActions(1) = ["Rock", "Paper", "Scissors", "Water"] + +# Apply joint action ["Rock", "Scissors"] +actions: [0, 2] + +# State 1 +# Terminal? true +# History: 0, 2 +# Returns: 1,-1 +# Row actions: +# Col actions: +# Utility matrix: +# 0,0 -1,1 1,-1 0,0 +# 1,-1 0,0 -1,1 0,0 +# -1,1 1,-1 0,0 0,0 +# 0,0 0,0 0,0 0,0 +IsTerminal() = True +History() = [0, 2] +HistoryString() = "0, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Observing player: 0. Terminal. History string: 0, 2" +InformationStateString(1) = "Observing player: 1. Terminal. History string: 0, 2" +InformationStateTensor(0): ◉ +InformationStateTensor(1): ◉ +ObservationString(0) = "Terminal. History string: 0, 2" +ObservationString(1) = "Terminal. History string: 0, 2" +ObservationTensor(0): ◉ +ObservationTensor(1): ◉ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_sh.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_sh.txt new file mode 100644 index 0000000..a1dbbf6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_sh.txt @@ -0,0 +1,91 @@ +game: matrix_sh + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.ONE_SHOT +GameType.long_name = "Stag Hunt" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "matrix_sh" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = 0.0 +MaxUtility() = 2.0 +UtilitySum() = None +InformationStateTensorShape() = [1] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 1 +ObservationTensorShape() = [1] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 1 +MaxGameLength() = 1 +ToString() = "matrix_sh()" + +# State 0 +# Terminal? false +# Row actions: Stag Hare +# Col actions: Stag Hare +# Utility matrix: +# 2,2 0,1 +# 1,0 1,1 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "Observing player: 0. Non-terminal" +InformationStateString(1) = "Observing player: 1. Non-terminal" +InformationStateTensor(0): ◯ +InformationStateTensor(1): ◯ +ObservationString(0) = "Non-terminal" +ObservationString(1) = "Non-terminal" +ObservationTensor(0): ◯ +ObservationTensor(1): ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1] +LegalActions(1) = [0, 1] +StringLegalActions(0) = ["Stag", "Hare"] +StringLegalActions(1) = ["Stag", "Hare"] + +# Apply joint action ["Stag", "Stag"] +actions: [0, 0] + +# State 1 +# Terminal? true +# History: 0, 0 +# Returns: 2,2 +# Row actions: +# Col actions: +# Utility matrix: +# 2,2 0,1 +# 1,0 1,1 +IsTerminal() = True +History() = [0, 0] +HistoryString() = "0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Observing player: 0. Terminal. History string: 0, 0" +InformationStateString(1) = "Observing player: 1. Terminal. History string: 0, 0" +InformationStateTensor(0): ◉ +InformationStateTensor(1): ◉ +ObservationString(0) = "Terminal. History string: 0, 0" +ObservationString(1) = "Terminal. History string: 0, 0" +ObservationTensor(0): ◉ +ObservationTensor(1): ◉ +Rewards() = [2, 2] +Returns() = [2, 2] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_shapleys_game.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_shapleys_game.txt new file mode 100644 index 0000000..7bb2ddb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/matrix_shapleys_game.txt @@ -0,0 +1,93 @@ +game: matrix_shapleys_game + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.ONE_SHOT +GameType.long_name = "Shapley's Game" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "matrix_shapleys_game" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 3 +PolicyTensorShape() = [3] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = 0.0 +MaxUtility() = 1.0 +UtilitySum() = None +InformationStateTensorShape() = [1] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 1 +ObservationTensorShape() = [1] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 1 +MaxGameLength() = 1 +ToString() = "matrix_shapleys_game()" + +# State 0 +# Terminal? false +# Row actions: Rock Paper Scissors +# Col actions: Rock Paper Scissors +# Utility matrix: +# 0,0 0,1 1,0 +# 1,0 0,0 0,1 +# 0,1 1,0 0,0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "Observing player: 0. Non-terminal" +InformationStateString(1) = "Observing player: 1. Non-terminal" +InformationStateTensor(0): ◯ +InformationStateTensor(1): ◯ +ObservationString(0) = "Non-terminal" +ObservationString(1) = "Non-terminal" +ObservationTensor(0): ◯ +ObservationTensor(1): ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2] +LegalActions(1) = [0, 1, 2] +StringLegalActions(0) = ["Rock", "Paper", "Scissors"] +StringLegalActions(1) = ["Rock", "Paper", "Scissors"] + +# Apply joint action ["Paper", "Rock"] +actions: [1, 0] + +# State 1 +# Terminal? true +# History: 1, 0 +# Returns: 1,0 +# Row actions: +# Col actions: +# Utility matrix: +# 0,0 0,1 1,0 +# 1,0 0,0 0,1 +# 0,1 1,0 0,0 +IsTerminal() = True +History() = [1, 0] +HistoryString() = "1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Observing player: 0. Terminal. History string: 1, 0" +InformationStateString(1) = "Observing player: 1. Terminal. History string: 1, 0" +InformationStateTensor(0): ◉ +InformationStateTensor(1): ◉ +ObservationString(0) = "Terminal. History string: 1, 0" +ObservationString(1) = "Terminal. History string: 1, 0" +ObservationTensor(0): ◉ +ObservationTensor(1): ◉ +Rewards() = [1, 0] +Returns() = [1, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mean_field_lin_quad.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mean_field_lin_quad.txt new file mode 100644 index 0000000..e29d18a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mean_field_lin_quad.txt @@ -0,0 +1,298 @@ +game: mean_field_lin_quad + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.MEAN_FIELD +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Mean-Field Linear Quadratic Game" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = ["cross_q", "dt", "horizon", "kappa", "mean_revert", "n_actions_per_side", "size", "spatial_bias", "terminal_cost", "volatility"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "mean_field_lin_quad" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 7 +PolicyTensorShape() = [7] +MaxChanceOutcomes() = 7 +GetParameters() = {cross_q=0.01,dt=1.0,horizon=10,kappa=0.5,mean_revert=0.0,n_actions_per_side=3,size=10,spatial_bias=0,terminal_cost=1.0,volatility=1.0} +NumPlayers() = 1 +MinUtility() = -inf +MaxUtility() = inf +UtilitySum() = 0.0 +ObservationTensorShape() = x: [], t: [], observation: [2] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 2 +MaxGameLength() = 10 +ToString() = "mean_field_lin_quad(cross_q=0.01,dt=1.0,horizon=10,kappa=0.5,mean_revert=0.0,n_actions_per_side=3,size=10,spatial_bias=0,terminal_cost=1.0,volatility=1.0)" + +# State 0 +# initial +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "" +ObservationString(0) = "initial" +ObservationTensor(0).x = [0.0] +ObservationTensor(0).t: ◯ +ObservationTensor(0) = [nan, 0.0] +ChanceOutcomes() = [(0,0.1), (1,0.1), (2,0.1), (3,0.1), (4,0.1), (5,0.1), (6,0.1), (7,0.1), (8,0.1), (9,0.1)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +StringLegalActions() = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] + +# Apply action "0" +action: 0 + +# State 1 +# (0, 0) +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "0" +ObservationString(0) = "(0, 0)" +ObservationTensor(0).x: ◯ +ObservationTensor(0).t: ◯ +ObservationTensor(0): ◯◯ +Rewards() = [-5.0625] +Returns() = [-5.0625] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["0", "1", "2", "3", "4", "5", "6"] + +# Apply action "3" +action: 3 + +# State 2 +# (0, 0)_a_mu +IsTerminal() = False +History() = [0, 3] +HistoryString() = "0, 3" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "0, 3" +ObservationString(0) = "(0, 0)_a_mu" +ObservationTensor(0).x: ◯ +ObservationTensor(0).t: ◯ +ObservationTensor(0): ◯◯ +ChanceOutcomes() = [(0,0.00620967), (1,0.0605975), (2,0.24173), (3,0.382925), (4,0.24173), (5,0.0605975), (6,0.00620967)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["0", "1", "2", "3", "4", "5", "6"] + +# Apply action "5" +action: 5 + +# State 3 +# (2, 1)_a +IsTerminal() = False +History() = [0, 3, 5] +HistoryString() = "0, 3, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -5 +InformationStateString(0) = "0, 3, 5" +ObservationString(0) = "(2, 1)_a" +ObservationTensor(0).x = [2] +ObservationTensor(0).t: ◉ +ObservationTensor(0) = [2.0, 1.0] +Rewards() = [0] +Returns() = [0] +DistributionSupport() = ['(0, 1)_a', '(1, 1)_a', '(2, 1)_a', '(3, 1)_a', '(4, 1)_a', '(5, 1)_a', '(6, 1)_a', '(7, 1)_a', '(8, 1)_a', '(9, 1)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 4 +# (2, 1) +IsTerminal() = False +History() = [0, 3, 5] +HistoryString() = "0, 3, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "0, 3, 5" +ObservationString(0) = "(2, 1)" +ObservationTensor(0).x = [2] +ObservationTensor(0).t: ◉ +ObservationTensor(0) = [2.0, 1.0] +Rewards() = [-1.5625] +Returns() = [-1.5625] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["0", "1", "2", "3", "4", "5", "6"] + +# Apply action "0" +action: 0 + +# State 5 +# Apply action "1" +action: 1 + +# State 6 +# (7, 2)_a +IsTerminal() = False +History() = [0, 3, 5, 0, 1] +HistoryString() = "0, 3, 5, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -5 +InformationStateString(0) = "0, 3, 5, 0, 1" +ObservationString(0) = "(7, 2)_a" +ObservationTensor(0).x = [7] +ObservationTensor(0).t = [2.0] +ObservationTensor(0) = [7.0, 2.0] +Rewards() = [0] +Returns() = [0] +DistributionSupport() = ['(0, 2)_a', '(1, 2)_a', '(2, 2)_a', '(3, 2)_a', '(4, 2)_a', '(5, 2)_a', '(6, 2)_a', '(7, 2)_a', '(8, 2)_a', '(9, 2)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 7 +# (7, 2) +IsTerminal() = False +History() = [0, 3, 5, 0, 1] +HistoryString() = "0, 3, 5, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "0, 3, 5, 0, 1" +ObservationString(0) = "(7, 2)" +ObservationTensor(0).x = [7] +ObservationTensor(0).t = [2.0] +ObservationTensor(0) = [7.0, 2.0] +Rewards() = [-5.9875] +Returns() = [-5.9875] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["0", "1", "2", "3", "4", "5", "6"] + +# Apply action "6" +action: 6 + +# State 8 +# Apply action "5" +action: 5 + +# State 9 +# (2, 3)_a +IsTerminal() = False +History() = [0, 3, 5, 0, 1, 6, 5] +HistoryString() = "0, 3, 5, 0, 1, 6, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -5 +InformationStateString(0) = "0, 3, 5, 0, 1, 6, 5" +ObservationString(0) = "(2, 3)_a" +ObservationTensor(0).x = [2] +ObservationTensor(0).t = [3.0] +ObservationTensor(0) = [2.0, 3.0] +Rewards() = [0] +Returns() = [0] +DistributionSupport() = ['(0, 3)_a', '(1, 3)_a', '(2, 3)_a', '(3, 3)_a', '(4, 3)_a', '(5, 3)_a', '(6, 3)_a', '(7, 3)_a', '(8, 3)_a', '(9, 3)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 10 +# Apply action "3" +action: 3 + +# State 11 +# Apply action "3" +action: 3 + +# State 12 +# Set mean field distribution to be uniform +action: update_distribution + +# State 13 +# Apply action "3" +action: 3 + +# State 14 +# Apply action "3" +action: 3 + +# State 15 +# Set mean field distribution to be uniform +action: update_distribution + +# State 16 +# Apply action "5" +action: 5 + +# State 17 +# Apply action "3" +action: 3 + +# State 18 +# Set mean field distribution to be uniform +action: update_distribution + +# State 19 +# Apply action "4" +action: 4 + +# State 20 +# Apply action "1" +action: 1 + +# State 21 +# Set mean field distribution to be uniform +action: update_distribution + +# State 22 +# Apply action "3" +action: 3 + +# State 23 +# Apply action "0" +action: 0 + +# State 24 +# Set mean field distribution to be uniform +action: update_distribution + +# State 25 +# Apply action "4" +action: 4 + +# State 26 +# Apply action "1" +action: 1 + +# State 27 +# Set mean field distribution to be uniform +action: update_distribution + +# State 28 +# Apply action "6" +action: 6 + +# State 29 +# Apply action "2" +action: 2 + +# State 30 +# (1, 10)_a +IsTerminal() = True +History() = [0, 3, 5, 0, 1, 6, 5, 3, 3, 3, 3, 5, 3, 4, 1, 3, 0, 4, 1, 6, 2] +HistoryString() = "0, 3, 5, 0, 1, 6, 5, 3, 3, 3, 3, 5, 3, 4, 1, 3, 0, 4, 1, 6, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "0, 3, 5, 0, 1, 6, 5, 3, 3, 3, 3, 5, 3, 4, 1, 3, 0, 4, 1, 6, 2" +ObservationString(0) = "(1, 10)_a" +ObservationTensor(0).x: ◉ +ObservationTensor(0).t = [10.0] +ObservationTensor(0) = [1.0, 10.0] +Rewards() = [0] +Returns() = [0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mfg_crowd_modelling.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mfg_crowd_modelling.txt new file mode 100644 index 0000000..2fa3c52 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mfg_crowd_modelling.txt @@ -0,0 +1,280 @@ +game: mfg_crowd_modelling + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.MEAN_FIELD +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Mean Field Crowd Modelling" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = ["horizon", "size"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "mfg_crowd_modelling" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 3 +PolicyTensorShape() = [3] +MaxChanceOutcomes() = 10 +GetParameters() = {horizon=10,size=10} +NumPlayers() = 1 +MinUtility() = -inf +MaxUtility() = inf +UtilitySum() = None +ObservationTensorShape() = [21] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 21 +MaxGameLength() = 10 +ToString() = "mfg_crowd_modelling()" + +# State 0 +# initial +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "" +ObservationString(0) = "initial" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.1), (1,0.1), (2,0.1), (3,0.1), (4,0.1), (5,0.1), (6,0.1), (7,0.1), (8,0.1), (9,0.1)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +StringLegalActions() = ["init_state=0", "init_state=1", "init_state=2", "init_state=3", "init_state=4", "init_state=5", "init_state=6", "init_state=7", "init_state=8", "init_state=9"] + +# Apply action "init_state=4" +action: 4 + +# State 1 +# (4, 0) +IsTerminal() = False +History() = [4] +HistoryString() = "4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "4" +ObservationString(0) = "(4, 0)" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +Rewards() = [3.10259] +Returns() = [3.10259] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["-1", "0", "1"] + +# Apply action "1" +action: 2 + +# State 2 +# (5, 0)_a_mu +IsTerminal() = False +History() = [4, 2] +HistoryString() = "4, 2" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "4, 2" +ObservationString(0) = "(5, 0)_a_mu" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.333333), (1,0.333333), (2,0.333333)] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["-1", "0", "1"] + +# Apply action "1" +action: 2 + +# State 3 +# (6, 1)_a +IsTerminal() = False +History() = [4, 2, 2] +HistoryString() = "4, 2, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -5 +InformationStateString(0) = "4, 2, 2" +ObservationString(0) = "(6, 1)_a" +ObservationTensor(0): ◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [3.10259] +DistributionSupport() = ['(0, 1)_a', '(1, 1)_a', '(2, 1)_a', '(3, 1)_a', '(4, 1)_a', '(5, 1)_a', '(6, 1)_a', '(7, 1)_a', '(8, 1)_a', '(9, 1)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 4 +# (6, 1) +IsTerminal() = False +History() = [4, 2, 2] +HistoryString() = "4, 2, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "4, 2, 2" +ObservationString(0) = "(6, 1)" +ObservationTensor(0): ◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +Rewards() = [3.00259] +Returns() = [6.10517] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["-1", "0", "1"] + +# Apply action "0" +action: 1 + +# State 5 +# Apply action "0" +action: 1 + +# State 6 +# (6, 2)_a +IsTerminal() = False +History() = [4, 2, 2, 1, 1] +HistoryString() = "4, 2, 2, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -5 +InformationStateString(0) = "4, 2, 2, 1, 1" +ObservationString(0) = "(6, 2)_a" +ObservationTensor(0): ◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [6.10517] +DistributionSupport() = ['(0, 2)_a', '(1, 2)_a', '(2, 2)_a', '(3, 2)_a', '(4, 2)_a', '(5, 2)_a', '(6, 2)_a', '(7, 2)_a', '(8, 2)_a', '(9, 2)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 7 +# (6, 2) +IsTerminal() = False +History() = [4, 2, 2, 1, 1] +HistoryString() = "4, 2, 2, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "4, 2, 2, 1, 1" +ObservationString(0) = "(6, 2)" +ObservationTensor(0): ◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +Rewards() = [3.10259] +Returns() = [9.20776] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["-1", "0", "1"] + +# Apply action "1" +action: 2 + +# State 8 +# Apply action "-1" +action: 0 + +# State 9 +# (6, 3)_a +IsTerminal() = False +History() = [4, 2, 2, 1, 1, 2, 0] +HistoryString() = "4, 2, 2, 1, 1, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -5 +InformationStateString(0) = "4, 2, 2, 1, 1, 2, 0" +ObservationString(0) = "(6, 3)_a" +ObservationTensor(0): ◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [9.20776] +DistributionSupport() = ['(0, 3)_a', '(1, 3)_a', '(2, 3)_a', '(3, 3)_a', '(4, 3)_a', '(5, 3)_a', '(6, 3)_a', '(7, 3)_a', '(8, 3)_a', '(9, 3)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 10 +# Apply action "-1" +action: 0 + +# State 11 +# Apply action "1" +action: 2 + +# State 12 +# Set mean field distribution to be uniform +action: update_distribution + +# State 13 +# Apply action "-1" +action: 0 + +# State 14 +# Apply action "0" +action: 1 + +# State 15 +# Set mean field distribution to be uniform +action: update_distribution + +# State 16 +# Apply action "-1" +action: 0 + +# State 17 +# Apply action "0" +action: 1 + +# State 18 +# Set mean field distribution to be uniform +action: update_distribution + +# State 19 +# Apply action "0" +action: 1 + +# State 20 +# Apply action "1" +action: 2 + +# State 21 +# Set mean field distribution to be uniform +action: update_distribution + +# State 22 +# Apply action "-1" +action: 0 + +# State 23 +# Apply action "0" +action: 1 + +# State 24 +# Set mean field distribution to be uniform +action: update_distribution + +# State 25 +# Apply action "0" +action: 1 + +# State 26 +# Apply action "1" +action: 2 + +# State 27 +# Set mean field distribution to be uniform +action: update_distribution + +# State 28 +# Apply action "1" +action: 2 + +# State 29 +# Apply action "-1" +action: 0 + +# State 30 +# (5, 10)_a +IsTerminal() = True +History() = [4, 2, 2, 1, 1, 2, 0, 0, 2, 0, 1, 0, 1, 1, 2, 0, 1, 1, 2, 2, 0] +HistoryString() = "4, 2, 2, 1, 1, 2, 0, 0, 2, 0, 1, 0, 1, 1, 2, 0, 1, 1, 2, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "4, 2, 2, 1, 1, 2, 0, 0, 2, 0, 1, 0, 1, 1, 2, 0, 1, 1, 2, 2, 0" +ObservationString(0) = "(5, 10)_a" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +Rewards() = [0] +Returns() = [31.0259] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mfg_crowd_modelling_2d.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mfg_crowd_modelling_2d.txt new file mode 100644 index 0000000..5ce2552 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mfg_crowd_modelling_2d.txt @@ -0,0 +1,280 @@ +game: mfg_crowd_modelling_2d + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.MEAN_FIELD +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Mean Field Crowd Modelling 2D" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = ["crowd_aversion_coef", "forbidden_states", "horizon", "initial_distribution", "initial_distribution_value", "noise_intensity", "only_distribution_reward", "positional_reward", "positional_reward_value", "size", "with_congestion"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "mfg_crowd_modelling_2d" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 5 +PolicyTensorShape() = [5] +MaxChanceOutcomes() = 100 +GetParameters() = {crowd_aversion_coef=1.0,forbidden_states=[],horizon=10,initial_distribution=[],initial_distribution_value=[],noise_intensity=1.0,only_distribution_reward=False,positional_reward=[],positional_reward_value=[],size=10,with_congestion=False} +NumPlayers() = 1 +MinUtility() = -inf +MaxUtility() = inf +UtilitySum() = None +ObservationTensorShape() = [31] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 31 +MaxGameLength() = 10 +ToString() = "mfg_crowd_modelling_2d()" + +# State 0 +# initial +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "" +ObservationString(0) = "initial" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.01), (1,0.01), (2,0.01), (3,0.01), (4,0.01), (5,0.01), (6,0.01), (7,0.01), (8,0.01), (9,0.01), (10,0.01), (11,0.01), (12,0.01), (13,0.01), (14,0.01), (15,0.01), (16,0.01), (17,0.01), (18,0.01), (19,0.01), (20,0.01), (21,0.01), (22,0.01), (23,0.01), (24,0.01), (25,0.01), (26,0.01), (27,0.01), (28,0.01), (29,0.01), (30,0.01), (31,0.01), (32,0.01), (33,0.01), (34,0.01), (35,0.01), (36,0.01), (37,0.01), (38,0.01), (39,0.01), (40,0.01), (41,0.01), (42,0.01), (43,0.01), (44,0.01), (45,0.01), (46,0.01), (47,0.01), (48,0.01), (49,0.01), (50,0.01), (51,0.01), (52,0.01), (53,0.01), (54,0.01), (55,0.01), (56,0.01), (57,0.01), (58,0.01), (59,0.01), (60,0.01), (61,0.01), (62,0.01), (63,0.01), (64,0.01), (65,0.01), (66,0.01), (67,0.01), (68,0.01), (69,0.01), (70,0.01), (71,0.01), (72,0.01), (73,0.01), (74,0.01), (75,0.01), (76,0.01), (77,0.01), (78,0.01), (79,0.01), (80,0.01), (81,0.01), (82,0.01), (83,0.01), (84,0.01), (85,0.01), (86,0.01), (87,0.01), (88,0.01), (89,0.01), (90,0.01), (91,0.01), (92,0.01), (93,0.01), (94,0.01), (95,0.01), (96,0.01), (97,0.01), (98,0.01), (99,0.01)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] +StringLegalActions() = ["init_state=0", "init_state=1", "init_state=2", "init_state=3", "init_state=4", "init_state=5", "init_state=6", "init_state=7", "init_state=8", "init_state=9", "init_state=10", "init_state=11", "init_state=12", "init_state=13", "init_state=14", "init_state=15", "init_state=16", "init_state=17", "init_state=18", "init_state=19", "init_state=20", "init_state=21", "init_state=22", "init_state=23", "init_state=24", "init_state=25", "init_state=26", "init_state=27", "init_state=28", "init_state=29", "init_state=30", "init_state=31", "init_state=32", "init_state=33", "init_state=34", "init_state=35", "init_state=36", "init_state=37", "init_state=38", "init_state=39", "init_state=40", "init_state=41", "init_state=42", "init_state=43", "init_state=44", "init_state=45", "init_state=46", "init_state=47", "init_state=48", "init_state=49", "init_state=50", "init_state=51", "init_state=52", "init_state=53", "init_state=54", "init_state=55", "init_state=56", "init_state=57", "init_state=58", "init_state=59", "init_state=60", "init_state=61", "init_state=62", "init_state=63", "init_state=64", "init_state=65", "init_state=66", "init_state=67", "init_state=68", "init_state=69", "init_state=70", "init_state=71", "init_state=72", "init_state=73", "init_state=74", "init_state=75", "init_state=76", "init_state=77", "init_state=78", "init_state=79", "init_state=80", "init_state=81", "init_state=82", "init_state=83", "init_state=84", "init_state=85", "init_state=86", "init_state=87", "init_state=88", "init_state=89", "init_state=90", "init_state=91", "init_state=92", "init_state=93", "init_state=94", "init_state=95", "init_state=96", "init_state=97", "init_state=98", "init_state=99"] + +# Apply action "init_state=44" +action: 44 + +# State 1 +# (4, 4, 0) +IsTerminal() = False +History() = [44] +HistoryString() = "44" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "44" +ObservationString(0) = "(4, 4, 0)" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +Rewards() = [6.20517] +Returns() = [6.20517] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["(0,-1)", "(-1,0)", "(0,0)", "(1,0)", "(0,1)"] + +# Apply action "(0,0)" +action: 2 + +# State 2 +# (4, 4, 0)_a_mu +IsTerminal() = False +History() = [44, 2] +HistoryString() = "44, 2" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "44, 2" +ObservationString(0) = "(4, 4, 0)_a_mu" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.2), (1,0.2), (2,0.2), (3,0.2), (4,0.2)] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["(0,-1)", "(-1,0)", "(0,0)", "(1,0)", "(0,1)"] + +# Apply action "(0,0)" +action: 2 + +# State 3 +# (4, 4, 1)_a +IsTerminal() = False +History() = [44, 2, 2] +HistoryString() = "44, 2, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -5 +InformationStateString(0) = "44, 2, 2" +ObservationString(0) = "(4, 4, 1)_a" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [6.20517] +DistributionSupport() = ['(0, 0, 1)_a', '(0, 1, 1)_a', '(0, 2, 1)_a', '(0, 3, 1)_a', '(0, 4, 1)_a', '(0, 5, 1)_a', '(0, 6, 1)_a', '(0, 7, 1)_a', '(0, 8, 1)_a', '(0, 9, 1)_a', '(1, 0, 1)_a', '(1, 1, 1)_a', '(1, 2, 1)_a', '(1, 3, 1)_a', '(1, 4, 1)_a', '(1, 5, 1)_a', '(1, 6, 1)_a', '(1, 7, 1)_a', '(1, 8, 1)_a', '(1, 9, 1)_a', '(2, 0, 1)_a', '(2, 1, 1)_a', '(2, 2, 1)_a', '(2, 3, 1)_a', '(2, 4, 1)_a', '(2, 5, 1)_a', '(2, 6, 1)_a', '(2, 7, 1)_a', '(2, 8, 1)_a', '(2, 9, 1)_a', '(3, 0, 1)_a', '(3, 1, 1)_a', '(3, 2, 1)_a', '(3, 3, 1)_a', '(3, 4, 1)_a', '(3, 5, 1)_a', '(3, 6, 1)_a', '(3, 7, 1)_a', '(3, 8, 1)_a', '(3, 9, 1)_a', '(4, 0, 1)_a', '(4, 1, 1)_a', '(4, 2, 1)_a', '(4, 3, 1)_a', '(4, 4, 1)_a', '(4, 5, 1)_a', '(4, 6, 1)_a', '(4, 7, 1)_a', '(4, 8, 1)_a', '(4, 9, 1)_a', '(5, 0, 1)_a', '(5, 1, 1)_a', '(5, 2, 1)_a', '(5, 3, 1)_a', '(5, 4, 1)_a', '(5, 5, 1)_a', '(5, 6, 1)_a', '(5, 7, 1)_a', '(5, 8, 1)_a', '(5, 9, 1)_a', '(6, 0, 1)_a', '(6, 1, 1)_a', '(6, 2, 1)_a', '(6, 3, 1)_a', '(6, 4, 1)_a', '(6, 5, 1)_a', '(6, 6, 1)_a', '(6, 7, 1)_a', '(6, 8, 1)_a', '(6, 9, 1)_a', '(7, 0, 1)_a', '(7, 1, 1)_a', '(7, 2, 1)_a', '(7, 3, 1)_a', '(7, 4, 1)_a', '(7, 5, 1)_a', '(7, 6, 1)_a', '(7, 7, 1)_a', '(7, 8, 1)_a', '(7, 9, 1)_a', '(8, 0, 1)_a', '(8, 1, 1)_a', '(8, 2, 1)_a', '(8, 3, 1)_a', '(8, 4, 1)_a', '(8, 5, 1)_a', '(8, 6, 1)_a', '(8, 7, 1)_a', '(8, 8, 1)_a', '(8, 9, 1)_a', '(9, 0, 1)_a', '(9, 1, 1)_a', '(9, 2, 1)_a', '(9, 3, 1)_a', '(9, 4, 1)_a', '(9, 5, 1)_a', '(9, 6, 1)_a', '(9, 7, 1)_a', '(9, 8, 1)_a', '(9, 9, 1)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 4 +# (4, 4, 1) +IsTerminal() = False +History() = [44, 2, 2] +HistoryString() = "44, 2, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "44, 2, 2" +ObservationString(0) = "(4, 4, 1)" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +Rewards() = [6.20517] +Returns() = [12.4103] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["(0,-1)", "(-1,0)", "(0,0)", "(1,0)", "(0,1)"] + +# Apply action "(1,0)" +action: 3 + +# State 5 +# Apply action "(0,1)" +action: 4 + +# State 6 +# (5, 5, 2)_a +IsTerminal() = False +History() = [44, 2, 2, 3, 4] +HistoryString() = "44, 2, 2, 3, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -5 +InformationStateString(0) = "44, 2, 2, 3, 4" +ObservationString(0) = "(5, 5, 2)_a" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [12.4103] +DistributionSupport() = ['(0, 0, 2)_a', '(0, 1, 2)_a', '(0, 2, 2)_a', '(0, 3, 2)_a', '(0, 4, 2)_a', '(0, 5, 2)_a', '(0, 6, 2)_a', '(0, 7, 2)_a', '(0, 8, 2)_a', '(0, 9, 2)_a', '(1, 0, 2)_a', '(1, 1, 2)_a', '(1, 2, 2)_a', '(1, 3, 2)_a', '(1, 4, 2)_a', '(1, 5, 2)_a', '(1, 6, 2)_a', '(1, 7, 2)_a', '(1, 8, 2)_a', '(1, 9, 2)_a', '(2, 0, 2)_a', '(2, 1, 2)_a', '(2, 2, 2)_a', '(2, 3, 2)_a', '(2, 4, 2)_a', '(2, 5, 2)_a', '(2, 6, 2)_a', '(2, 7, 2)_a', '(2, 8, 2)_a', '(2, 9, 2)_a', '(3, 0, 2)_a', '(3, 1, 2)_a', '(3, 2, 2)_a', '(3, 3, 2)_a', '(3, 4, 2)_a', '(3, 5, 2)_a', '(3, 6, 2)_a', '(3, 7, 2)_a', '(3, 8, 2)_a', '(3, 9, 2)_a', '(4, 0, 2)_a', '(4, 1, 2)_a', '(4, 2, 2)_a', '(4, 3, 2)_a', '(4, 4, 2)_a', '(4, 5, 2)_a', '(4, 6, 2)_a', '(4, 7, 2)_a', '(4, 8, 2)_a', '(4, 9, 2)_a', '(5, 0, 2)_a', '(5, 1, 2)_a', '(5, 2, 2)_a', '(5, 3, 2)_a', '(5, 4, 2)_a', '(5, 5, 2)_a', '(5, 6, 2)_a', '(5, 7, 2)_a', '(5, 8, 2)_a', '(5, 9, 2)_a', '(6, 0, 2)_a', '(6, 1, 2)_a', '(6, 2, 2)_a', '(6, 3, 2)_a', '(6, 4, 2)_a', '(6, 5, 2)_a', '(6, 6, 2)_a', '(6, 7, 2)_a', '(6, 8, 2)_a', '(6, 9, 2)_a', '(7, 0, 2)_a', '(7, 1, 2)_a', '(7, 2, 2)_a', '(7, 3, 2)_a', '(7, 4, 2)_a', '(7, 5, 2)_a', '(7, 6, 2)_a', '(7, 7, 2)_a', '(7, 8, 2)_a', '(7, 9, 2)_a', '(8, 0, 2)_a', '(8, 1, 2)_a', '(8, 2, 2)_a', '(8, 3, 2)_a', '(8, 4, 2)_a', '(8, 5, 2)_a', '(8, 6, 2)_a', '(8, 7, 2)_a', '(8, 8, 2)_a', '(8, 9, 2)_a', '(9, 0, 2)_a', '(9, 1, 2)_a', '(9, 2, 2)_a', '(9, 3, 2)_a', '(9, 4, 2)_a', '(9, 5, 2)_a', '(9, 6, 2)_a', '(9, 7, 2)_a', '(9, 8, 2)_a', '(9, 9, 2)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 7 +# (5, 5, 2) +IsTerminal() = False +History() = [44, 2, 2, 3, 4] +HistoryString() = "44, 2, 2, 3, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "44, 2, 2, 3, 4" +ObservationString(0) = "(5, 5, 2)" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +Rewards() = [6.50517] +Returns() = [18.9155] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["(0,-1)", "(-1,0)", "(0,0)", "(1,0)", "(0,1)"] + +# Apply action "(-1,0)" +action: 1 + +# State 8 +# Apply action "(1,0)" +action: 3 + +# State 9 +# (5, 5, 3)_a +IsTerminal() = False +History() = [44, 2, 2, 3, 4, 1, 3] +HistoryString() = "44, 2, 2, 3, 4, 1, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -5 +InformationStateString(0) = "44, 2, 2, 3, 4, 1, 3" +ObservationString(0) = "(5, 5, 3)_a" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [18.9155] +DistributionSupport() = ['(0, 0, 3)_a', '(0, 1, 3)_a', '(0, 2, 3)_a', '(0, 3, 3)_a', '(0, 4, 3)_a', '(0, 5, 3)_a', '(0, 6, 3)_a', '(0, 7, 3)_a', '(0, 8, 3)_a', '(0, 9, 3)_a', '(1, 0, 3)_a', '(1, 1, 3)_a', '(1, 2, 3)_a', '(1, 3, 3)_a', '(1, 4, 3)_a', '(1, 5, 3)_a', '(1, 6, 3)_a', '(1, 7, 3)_a', '(1, 8, 3)_a', '(1, 9, 3)_a', '(2, 0, 3)_a', '(2, 1, 3)_a', '(2, 2, 3)_a', '(2, 3, 3)_a', '(2, 4, 3)_a', '(2, 5, 3)_a', '(2, 6, 3)_a', '(2, 7, 3)_a', '(2, 8, 3)_a', '(2, 9, 3)_a', '(3, 0, 3)_a', '(3, 1, 3)_a', '(3, 2, 3)_a', '(3, 3, 3)_a', '(3, 4, 3)_a', '(3, 5, 3)_a', '(3, 6, 3)_a', '(3, 7, 3)_a', '(3, 8, 3)_a', '(3, 9, 3)_a', '(4, 0, 3)_a', '(4, 1, 3)_a', '(4, 2, 3)_a', '(4, 3, 3)_a', '(4, 4, 3)_a', '(4, 5, 3)_a', '(4, 6, 3)_a', '(4, 7, 3)_a', '(4, 8, 3)_a', '(4, 9, 3)_a', '(5, 0, 3)_a', '(5, 1, 3)_a', '(5, 2, 3)_a', '(5, 3, 3)_a', '(5, 4, 3)_a', '(5, 5, 3)_a', '(5, 6, 3)_a', '(5, 7, 3)_a', '(5, 8, 3)_a', '(5, 9, 3)_a', '(6, 0, 3)_a', '(6, 1, 3)_a', '(6, 2, 3)_a', '(6, 3, 3)_a', '(6, 4, 3)_a', '(6, 5, 3)_a', '(6, 6, 3)_a', '(6, 7, 3)_a', '(6, 8, 3)_a', '(6, 9, 3)_a', '(7, 0, 3)_a', '(7, 1, 3)_a', '(7, 2, 3)_a', '(7, 3, 3)_a', '(7, 4, 3)_a', '(7, 5, 3)_a', '(7, 6, 3)_a', '(7, 7, 3)_a', '(7, 8, 3)_a', '(7, 9, 3)_a', '(8, 0, 3)_a', '(8, 1, 3)_a', '(8, 2, 3)_a', '(8, 3, 3)_a', '(8, 4, 3)_a', '(8, 5, 3)_a', '(8, 6, 3)_a', '(8, 7, 3)_a', '(8, 8, 3)_a', '(8, 9, 3)_a', '(9, 0, 3)_a', '(9, 1, 3)_a', '(9, 2, 3)_a', '(9, 3, 3)_a', '(9, 4, 3)_a', '(9, 5, 3)_a', '(9, 6, 3)_a', '(9, 7, 3)_a', '(9, 8, 3)_a', '(9, 9, 3)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 10 +# Apply action "(0,0)" +action: 2 + +# State 11 +# Apply action "(0,0)" +action: 2 + +# State 12 +# Set mean field distribution to be uniform +action: update_distribution + +# State 13 +# Apply action "(0,1)" +action: 4 + +# State 14 +# Apply action "(1,0)" +action: 3 + +# State 15 +# Set mean field distribution to be uniform +action: update_distribution + +# State 16 +# Apply action "(0,0)" +action: 2 + +# State 17 +# Apply action "(0,-1)" +action: 0 + +# State 18 +# Set mean field distribution to be uniform +action: update_distribution + +# State 19 +# Apply action "(0,1)" +action: 4 + +# State 20 +# Apply action "(0,0)" +action: 2 + +# State 21 +# Set mean field distribution to be uniform +action: update_distribution + +# State 22 +# Apply action "(0,0)" +action: 2 + +# State 23 +# Apply action "(0,0)" +action: 2 + +# State 24 +# Set mean field distribution to be uniform +action: update_distribution + +# State 25 +# Apply action "(1,0)" +action: 3 + +# State 26 +# Apply action "(0,1)" +action: 4 + +# State 27 +# Set mean field distribution to be uniform +action: update_distribution + +# State 28 +# Apply action "(0,1)" +action: 4 + +# State 29 +# Apply action "(1,0)" +action: 3 + +# State 30 +# (8, 8, 10)_a +IsTerminal() = True +History() = [44, 2, 2, 3, 4, 1, 3, 2, 2, 4, 3, 2, 0, 4, 2, 2, 2, 3, 4, 4, 3] +HistoryString() = "44, 2, 2, 3, 4, 1, 3, 2, 2, 4, 3, 2, 0, 4, 2, 2, 2, 3, 4, 4, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "44, 2, 2, 3, 4, 1, 3, 2, 2, 4, 3, 2, 0, 4, 2, 2, 2, 3, 4, 4, 3" +ObservationString(0) = "(8, 8, 10)_a" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉ +Rewards() = [0] +Returns() = [62.5517] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mfg_dynamic_routing.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mfg_dynamic_routing.txt new file mode 100644 index 0000000..05341e7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mfg_dynamic_routing.txt @@ -0,0 +1,215 @@ +game: mfg_dynamic_routing + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.MEAN_FIELD +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Cpp Mean Field Dynamic Routing" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = ["max_num_time_step", "network", "perform_sanity_checks", "players", "time_step_length"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = True +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "mfg_dynamic_routing" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 8 +PolicyTensorShape() = [8] +MaxChanceOutcomes() = 1 +GetParameters() = {max_num_time_step=10,network=braess,perform_sanity_checks=True,time_step_length=1.0} +NumPlayers() = 1 +MinUtility() = -11.0 +MaxUtility() = 0.0 +UtilitySum() = None +MaxGameLength() = 10 +ToString() = "mfg_dynamic_routing()" + +# State 0 +# Before initial chance node. +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "" +ObservationString(0) = "Before initial chance node." +ChanceOutcomes() = [(0,1)] +LegalActions() = [0] +StringLegalActions() = ["Vehicle is assigned to population 0"] + +# Apply action "Vehicle is assigned to population 0" +action: 0 + +# State 1 +# Location=O->A, waiting time=0, t=0, destination=D->E +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "0" +ObservationString(0) = "Location=O->A, waiting time=0, t=0, destination=D->E" +Rewards() = [0] +Returns() = [0] +LegalActions() = [1, 2] +StringLegalActions() = ["Vehicle 0 would like to move to A->B.", "Vehicle 0 would like to move to A->C."] + +# Apply action "Vehicle 0 would like to move to A->C." +action: 2 + +# State 2 +# Location=A->C, waiting time=-1, t=1_mean_field, destination=D->E +IsTerminal() = False +History() = [0, 2] +HistoryString() = "0, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -5 +InformationStateString(0) = "0, 2" +ObservationString(0) = "Location=A->C, waiting time=-1, t=1_mean_field, destination=D->E" +Rewards() = [0] +Returns() = [0] +DistributionSupport() = ['Location=A->C, waiting time=-1, t=1_mean_field, destination=D->E', 'Location=A->C, waiting time=0, t=1_mean_field, destination=D->E', 'Location=A->C, waiting time=1, t=1_mean_field, destination=D->E', 'Location=A->C, waiting time=2, t=1_mean_field, destination=D->E', 'Location=A->C, waiting time=3, t=1_mean_field, destination=D->E', 'Location=A->C, waiting time=4, t=1_mean_field, destination=D->E', 'Location=A->C, waiting time=5, t=1_mean_field, destination=D->E', 'Location=A->C, waiting time=6, t=1_mean_field, destination=D->E', 'Location=A->C, waiting time=7, t=1_mean_field, destination=D->E', 'Location=A->C, waiting time=8, t=1_mean_field, destination=D->E', 'Location=A->C, waiting time=9, t=1_mean_field, destination=D->E'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 3 +# Location=A->C, waiting time=1, t=1, destination=D->E +IsTerminal() = False +History() = [0, 2] +HistoryString() = "0, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "0, 2" +ObservationString(0) = "Location=A->C, waiting time=1, t=1, destination=D->E" +Rewards() = [0] +Returns() = [0] +LegalActions() = [0] +StringLegalActions() = ["Vehicle 0 reach a sink node or its destination."] + +# Apply action "Vehicle 0 reach a sink node or its destination." +action: 0 + +# State 4 +# Location=A->C, waiting time=0, t=2_mean_field, destination=D->E +IsTerminal() = False +History() = [0, 2, 0] +HistoryString() = "0, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -5 +InformationStateString(0) = "0, 2, 0" +ObservationString(0) = "Location=A->C, waiting time=0, t=2_mean_field, destination=D->E" +Rewards() = [0] +Returns() = [0] +DistributionSupport() = ['Location=A->C, waiting time=-1, t=2_mean_field, destination=D->E', 'Location=A->C, waiting time=0, t=2_mean_field, destination=D->E', 'Location=A->C, waiting time=1, t=2_mean_field, destination=D->E', 'Location=A->C, waiting time=2, t=2_mean_field, destination=D->E', 'Location=A->C, waiting time=3, t=2_mean_field, destination=D->E', 'Location=A->C, waiting time=4, t=2_mean_field, destination=D->E', 'Location=A->C, waiting time=5, t=2_mean_field, destination=D->E', 'Location=A->C, waiting time=6, t=2_mean_field, destination=D->E', 'Location=A->C, waiting time=7, t=2_mean_field, destination=D->E', 'Location=A->C, waiting time=8, t=2_mean_field, destination=D->E', 'Location=A->C, waiting time=9, t=2_mean_field, destination=D->E'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 5 +# Location=A->C, waiting time=0, t=2, destination=D->E +IsTerminal() = False +History() = [0, 2, 0] +HistoryString() = "0, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "0, 2, 0" +ObservationString(0) = "Location=A->C, waiting time=0, t=2, destination=D->E" +Rewards() = [0] +Returns() = [0] +LegalActions() = [5] +StringLegalActions() = ["Vehicle 0 would like to move to C->D."] + +# Apply action "Vehicle 0 would like to move to C->D." +action: 5 + +# State 6 +# Location=C->D, waiting time=-1, t=3_mean_field, destination=D->E +IsTerminal() = False +History() = [0, 2, 0, 5] +HistoryString() = "0, 2, 0, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -5 +InformationStateString(0) = "0, 2, 0, 5" +ObservationString(0) = "Location=C->D, waiting time=-1, t=3_mean_field, destination=D->E" +Rewards() = [0] +Returns() = [0] +DistributionSupport() = ['Location=C->D, waiting time=-1, t=3_mean_field, destination=D->E', 'Location=C->D, waiting time=0, t=3_mean_field, destination=D->E', 'Location=C->D, waiting time=1, t=3_mean_field, destination=D->E', 'Location=C->D, waiting time=2, t=3_mean_field, destination=D->E', 'Location=C->D, waiting time=3, t=3_mean_field, destination=D->E', 'Location=C->D, waiting time=4, t=3_mean_field, destination=D->E', 'Location=C->D, waiting time=5, t=3_mean_field, destination=D->E', 'Location=C->D, waiting time=6, t=3_mean_field, destination=D->E', 'Location=C->D, waiting time=7, t=3_mean_field, destination=D->E', 'Location=C->D, waiting time=8, t=3_mean_field, destination=D->E', 'Location=C->D, waiting time=9, t=3_mean_field, destination=D->E'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 7 +# Apply action "Vehicle 0 reach a sink node or its destination." +action: 0 + +# State 8 +# Set mean field distribution to be uniform +action: update_distribution + +# State 9 +# Apply action "Vehicle 0 would like to move to D->E." +action: 6 + +# State 10 +# Set mean field distribution to be uniform +action: update_distribution + +# State 11 +# Apply action "Vehicle 0 reach a sink node or its destination." +action: 0 + +# State 12 +# Set mean field distribution to be uniform +action: update_distribution + +# State 13 +# Apply action "Vehicle 0 reach a sink node or its destination." +action: 0 + +# State 14 +# Set mean field distribution to be uniform +action: update_distribution + +# State 15 +# Apply action "Vehicle 0 reach a sink node or its destination." +action: 0 + +# State 16 +# Set mean field distribution to be uniform +action: update_distribution + +# State 17 +# Apply action "Vehicle 0 reach a sink node or its destination." +action: 0 + +# State 18 +# Set mean field distribution to be uniform +action: update_distribution + +# State 19 +# Apply action "Vehicle 0 reach a sink node or its destination." +action: 0 + +# State 20 +# Arrived at D->E, with arrival time 4.00, t=10 +IsTerminal() = True +History() = [0, 2, 0, 5, 0, 6, 0, 0, 0, 0, 0] +HistoryString() = "0, 2, 0, 5, 0, 6, 0, 0, 0, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "0, 2, 0, 5, 0, 6, 0, 0, 0, 0, 0" +ObservationString(0) = "Arrived at D->E, with arrival time 4.00, t=10" +Rewards() = [-4] +Returns() = [-4] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mfg_garnet.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mfg_garnet.txt new file mode 100644 index 0000000..d83bff8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mfg_garnet.txt @@ -0,0 +1,280 @@ +game: mfg_garnet + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.MEAN_FIELD +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Mean Field Garnet" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = ["eta", "horizon", "num_action", "num_chance_action", "seed", "size", "sparsity_factor"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "mfg_garnet" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 3 +PolicyTensorShape() = [3] +MaxChanceOutcomes() = 10 +GetParameters() = {eta=1.0,horizon=10,num_action=3,num_chance_action=3,seed=0,size=10,sparsity_factor=1.0} +NumPlayers() = 1 +MinUtility() = -inf +MaxUtility() = inf +UtilitySum() = None +ObservationTensorShape() = [21] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 21 +MaxGameLength() = 10 +ToString() = "mfg_garnet()" + +# State 0 +# initial +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "initial" +ObservationString(0) = "initial" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.1), (1,0.1), (2,0.1), (3,0.1), (4,0.1), (5,0.1), (6,0.1), (7,0.1), (8,0.1), (9,0.1)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +StringLegalActions() = ["init_state=0", "init_state=1", "init_state=2", "init_state=3", "init_state=4", "init_state=5", "init_state=6", "init_state=7", "init_state=8", "init_state=9"] + +# Apply action "init_state=1" +action: 1 + +# State 1 +# (1, 0) +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "(1, 0)" +ObservationString(0) = "(1, 0)" +ObservationTensor(0): ◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +Rewards() = [2.60012] +Returns() = [2.60012] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["0", "1", "2"] + +# Apply action "2" +action: 2 + +# State 2 +# (1, 0, 2)_a_mu +IsTerminal() = False +History() = [1, 2] +HistoryString() = "1, 2" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "(1, 0, 2)_a_mu" +ObservationString(0) = "(1, 0, 2)_a_mu" +ObservationTensor(0): ◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.313218), (1,0.652198), (2,0.0345838)] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["0", "1", "2"] + +# Apply action "0" +action: 0 + +# State 3 +# (5, 1)_a +IsTerminal() = False +History() = [1, 2, 0] +HistoryString() = "1, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -5 +InformationStateString(0) = "(5, 1)_a" +ObservationString(0) = "(5, 1)_a" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [2.60012] +DistributionSupport() = ['(0, 1)_a', '(1, 1)_a', '(2, 1)_a', '(3, 1)_a', '(4, 1)_a', '(5, 1)_a', '(6, 1)_a', '(7, 1)_a', '(8, 1)_a', '(9, 1)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 4 +# (5, 1) +IsTerminal() = False +History() = [1, 2, 0] +HistoryString() = "1, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "(5, 1)" +ObservationString(0) = "(5, 1)" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +Rewards() = [2.62358] +Returns() = [5.2237] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["0", "1", "2"] + +# Apply action "1" +action: 1 + +# State 5 +# Apply action "2" +action: 2 + +# State 6 +# (9, 2)_a +IsTerminal() = False +History() = [1, 2, 0, 1, 2] +HistoryString() = "1, 2, 0, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -5 +InformationStateString(0) = "(9, 2)_a" +ObservationString(0) = "(9, 2)_a" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [5.2237] +DistributionSupport() = ['(0, 2)_a', '(1, 2)_a', '(2, 2)_a', '(3, 2)_a', '(4, 2)_a', '(5, 2)_a', '(6, 2)_a', '(7, 2)_a', '(8, 2)_a', '(9, 2)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 7 +# (9, 2) +IsTerminal() = False +History() = [1, 2, 0, 1, 2] +HistoryString() = "1, 2, 0, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "(9, 2)" +ObservationString(0) = "(9, 2)" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯ +Rewards() = [2.69868] +Returns() = [7.92239] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["0", "1", "2"] + +# Apply action "2" +action: 2 + +# State 8 +# Apply action "0" +action: 0 + +# State 9 +# (3, 3)_a +IsTerminal() = False +History() = [1, 2, 0, 1, 2, 2, 0] +HistoryString() = "1, 2, 0, 1, 2, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -5 +InformationStateString(0) = "(3, 3)_a" +ObservationString(0) = "(3, 3)_a" +ObservationTensor(0): ◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [7.92239] +DistributionSupport() = ['(0, 3)_a', '(1, 3)_a', '(2, 3)_a', '(3, 3)_a', '(4, 3)_a', '(5, 3)_a', '(6, 3)_a', '(7, 3)_a', '(8, 3)_a', '(9, 3)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 10 +# Apply action "0" +action: 0 + +# State 11 +# Apply action "1" +action: 1 + +# State 12 +# Set mean field distribution to be uniform +action: update_distribution + +# State 13 +# Apply action "1" +action: 1 + +# State 14 +# Apply action "0" +action: 0 + +# State 15 +# Set mean field distribution to be uniform +action: update_distribution + +# State 16 +# Apply action "1" +action: 1 + +# State 17 +# Apply action "0" +action: 0 + +# State 18 +# Set mean field distribution to be uniform +action: update_distribution + +# State 19 +# Apply action "1" +action: 1 + +# State 20 +# Apply action "1" +action: 1 + +# State 21 +# Set mean field distribution to be uniform +action: update_distribution + +# State 22 +# Apply action "1" +action: 1 + +# State 23 +# Apply action "1" +action: 1 + +# State 24 +# Set mean field distribution to be uniform +action: update_distribution + +# State 25 +# Apply action "1" +action: 1 + +# State 26 +# Apply action "0" +action: 0 + +# State 27 +# Set mean field distribution to be uniform +action: update_distribution + +# State 28 +# Apply action "0" +action: 0 + +# State 29 +# Apply action "2" +action: 2 + +# State 30 +# (3, 10)_a +IsTerminal() = True +History() = [1, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 2] +HistoryString() = "1, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "(3, 10)_a" +ObservationString(0) = "(3, 10)_a" +ObservationTensor(0): ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +Rewards() = [0] +Returns() = [27.7474] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/misere(game=kuhn_poker()).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/misere(game=kuhn_poker()).txt new file mode 100644 index 0000000..04dd9bc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/misere(game=kuhn_poker()).txt @@ -0,0 +1,170 @@ +game: misere(game=kuhn_poker()) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Misere Kuhn Poker" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["players"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = True +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "misere" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 3 +GetParameters() = {game=kuhn_poker()} +NumPlayers() = 2 +MinUtility() = -2.0 +MaxUtility() = 2.0 +UtilitySum() = -0.0 +InformationStateTensorShape() = [11] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 11 +ObservationTensorShape() = [7] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 7 +MaxGameLength() = 3 +ToString() = "misere(game=kuhn_poker())" + +# State 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "" +InformationStateString(1) = "" +InformationStateTensor(0): ◉◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "" +ObservationString(1) = "" +ObservationTensor(0): ◉◯◯◯◯◉◉ +ObservationTensor(1): ◯◉◯◯◯◉◉ +ChanceOutcomes() = [(0,0.333333), (1,0.333333), (2,0.333333)] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["Deal:0", "Deal:1", "Deal:2"] + +# Apply action "Deal:1" +action: 1 + +# State 1 +# 1 +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "1" +InformationStateString(1) = "" +InformationStateTensor(0): ◉◯◯◉◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "111" +ObservationString(1) = "" +ObservationTensor(0): ◉◯◯◉◯◉◉ +ObservationTensor(1): ◯◉◯◯◯◉◉ +ChanceOutcomes() = [(0,0.5), (2,0.5)] +LegalActions() = [0, 2] +StringLegalActions() = ["Deal:0", "Deal:2"] + +# Apply action "Deal:2" +action: 2 + +# State 2 +# 1 2 +IsTerminal() = False +History() = [1, 2] +HistoryString() = "1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1" +InformationStateString(1) = "2" +InformationStateTensor(0): ◉◯◯◉◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◉◯◯◯◯◯◯ +ObservationString(0) = "111" +ObservationString(1) = "211" +ObservationTensor(0): ◉◯◯◉◯◉◉ +ObservationTensor(1): ◯◉◯◯◉◉◉ +Rewards() = [-0, -0] +Returns() = [-0, -0] +LegalActions() = [0, 1] +StringLegalActions() = ["Pass", "Bet"] + +# Apply action "Pass" +action: 0 + +# State 3 +# 1 2 p +IsTerminal() = False +History() = [1, 2, 0] +HistoryString() = "1, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1p" +InformationStateString(1) = "2p" +InformationStateTensor(0): ◉◯◯◉◯◉◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◉◉◯◯◯◯◯ +ObservationString(0) = "111" +ObservationString(1) = "211" +ObservationTensor(0): ◉◯◯◉◯◉◉ +ObservationTensor(1): ◯◉◯◯◉◉◉ +Rewards() = [-0, -0] +Returns() = [-0, -0] +LegalActions() = [0, 1] +StringLegalActions() = ["Pass", "Bet"] + +# Apply action "Bet" +action: 1 + +# State 4 +# 1 2 pb +IsTerminal() = False +History() = [1, 2, 0, 1] +HistoryString() = "1, 2, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1pb" +InformationStateString(1) = "2pb" +InformationStateTensor(0): ◉◯◯◉◯◉◯◯◉◯◯ +InformationStateTensor(1): ◯◉◯◯◉◉◯◯◉◯◯ +ObservationString(0) = "112" +ObservationString(1) = "212" +ObservationTensor(0) = [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0] +Rewards() = [-0, -0] +Returns() = [-0, -0] +LegalActions() = [0, 1] +StringLegalActions() = ["Pass", "Bet"] + +# Apply action "Bet" +action: 1 + +# State 5 +# 1 2 pbb +IsTerminal() = True +History() = [1, 2, 0, 1, 1] +HistoryString() = "1, 2, 0, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "1pbb" +InformationStateString(1) = "2pbb" +InformationStateTensor(0): ◉◯◯◉◯◉◯◯◉◯◉ +InformationStateTensor(1): ◯◉◯◯◉◉◯◯◉◯◉ +ObservationString(0) = "122" +ObservationString(1) = "222" +ObservationTensor(0) = [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0] +Rewards() = [2, -2] +Returns() = [2, -2] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/misere(game=pig(players=3,horizon=20,winscore=6)).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/misere(game=pig(players=3,horizon=20,winscore=6)).txt new file mode 100644 index 0000000..66a9232 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/misere(game=pig(players=3,horizon=20,winscore=6)).txt @@ -0,0 +1,388 @@ +game: misere(game=pig(players=3,horizon=20,winscore=5)) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Misere Pig" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["diceoutcomes", "horizon", "piglet", "players", "winscore"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "misere" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 6 +GetParameters() = {game=pig(horizon=20,players=3,winscore=5)} +NumPlayers() = 3 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = -0.0 +ObservationTensorShape() = [4, 6] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 24 +MaxGameLength() = 20 +ToString() = "misere(game=pig(horizon=20,players=3,winscore=5))" + +# State 0 +# Scores: 0 0 0, Turn total: 0 +# Current player: 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationString(1) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationString(2) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationTensor(0): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ +Rewards() = [-0, -0, -0] +Returns() = [-0, -0, -0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 1 +# Scores: 0 0 0, Turn total: 0 +# Current player: 0 (rolling) +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 0 (rolling)\n" +ObservationString(1) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 0 (rolling)\n" +ObservationString(2) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 0 (rolling)\n" +ObservationTensor(0): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ +ChanceOutcomes() = [(0,0.166667), (1,0.166667), (2,0.166667), (3,0.166667), (4,0.166667), (5,0.166667)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Roll 1", "Roll 2", "Roll 3", "Roll 4", "Roll 5", "Roll 6"] + +# Apply action "Roll 1" +action: 0 + +# State 2 +# Scores: 0 0 0, Turn total: 0 +# Current player: 1 +IsTerminal() = False +History() = [0, 0] +HistoryString() = "0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(1) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(2) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationTensor(0): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ +Rewards() = [-0, -0, -0] +Returns() = [-0, -0, -0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 3 +# Scores: 0 0 0, Turn total: 0 +# Current player: 1 (rolling) +IsTerminal() = False +History() = [0, 0, 0] +HistoryString() = "0, 0, 0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 1 (rolling)\n" +ObservationString(1) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 1 (rolling)\n" +ObservationString(2) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 1 (rolling)\n" +ObservationTensor(0): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ +ChanceOutcomes() = [(0,0.166667), (1,0.166667), (2,0.166667), (3,0.166667), (4,0.166667), (5,0.166667)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Roll 1", "Roll 2", "Roll 3", "Roll 4", "Roll 5", "Roll 6"] + +# Apply action "Roll 3" +action: 2 + +# State 4 +# Scores: 0 0 0, Turn total: 3 +# Current player: 1 +IsTerminal() = False +History() = [0, 0, 0, 2] +HistoryString() = "0, 0, 0, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Scores: 0 0 0, Turn total: 3\nCurrent player: 1\n" +ObservationString(1) = "Scores: 0 0 0, Turn total: 3\nCurrent player: 1\n" +ObservationString(2) = "Scores: 0 0 0, Turn total: 3\nCurrent player: 1\n" +ObservationTensor(0): ◯◯◯◉◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◉◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◉◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◉◯◯◯◯◯ +Rewards() = [-0, -0, -0] +Returns() = [-0, -0, -0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 5 +# Scores: 0 3 0, Turn total: 0 +# Current player: 2 +IsTerminal() = False +History() = [0, 0, 0, 2, 1] +HistoryString() = "0, 0, 0, 2, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "Scores: 0 3 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(1) = "Scores: 0 3 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(2) = "Scores: 0 3 0, Turn total: 0\nCurrent player: 2\n" +ObservationTensor(0): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◉◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◉◯◯◯◯◯ +Rewards() = [-0, -0, -0] +Returns() = [-0, -0, -0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 6 +# Scores: 0 3 0, Turn total: 0 +# Current player: 0 +IsTerminal() = False +History() = [0, 0, 0, 2, 1, 1] +HistoryString() = "0, 0, 0, 2, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Scores: 0 3 0, Turn total: 0\nCurrent player: 0\n" +ObservationString(1) = "Scores: 0 3 0, Turn total: 0\nCurrent player: 0\n" +ObservationString(2) = "Scores: 0 3 0, Turn total: 0\nCurrent player: 0\n" +ObservationTensor(0): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◉◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◉◯◯◯◯◯ +Rewards() = [-0, -0, -0] +Returns() = [-0, -0, -0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 7 +# Scores: 0 3 0, Turn total: 0 +# Current player: 1 +IsTerminal() = False +History() = [0, 0, 0, 2, 1, 1, 1] +HistoryString() = "0, 0, 0, 2, 1, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Scores: 0 3 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(1) = "Scores: 0 3 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(2) = "Scores: 0 3 0, Turn total: 0\nCurrent player: 1\n" +ObservationTensor(0): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◉◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◉◯◯◯◯◯ +Rewards() = [-0, -0, -0] +Returns() = [-0, -0, -0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 8 +# Scores: 0 3 0, Turn total: 0 +# Current player: 2 +IsTerminal() = False +History() = [0, 0, 0, 2, 1, 1, 1, 1] +HistoryString() = "0, 0, 0, 2, 1, 1, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "Scores: 0 3 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(1) = "Scores: 0 3 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(2) = "Scores: 0 3 0, Turn total: 0\nCurrent player: 2\n" +ObservationTensor(0): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◉◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◉◯◯◯◯◯ +Rewards() = [-0, -0, -0] +Returns() = [-0, -0, -0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 9 +# Apply action "Roll 2" +action: 1 + +# State 10 +# Scores: 0 3 0, Turn total: 2 +# Current player: 2 +IsTerminal() = False +History() = [0, 0, 0, 2, 1, 1, 1, 1, 0, 1] +HistoryString() = "0, 0, 0, 2, 1, 1, 1, 1, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "Scores: 0 3 0, Turn total: 2\nCurrent player: 2\n" +ObservationString(1) = "Scores: 0 3 0, Turn total: 2\nCurrent player: 2\n" +ObservationString(2) = "Scores: 0 3 0, Turn total: 2\nCurrent player: 2\n" +ObservationTensor(0): ◯◯◉◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◉◯◯◯◯◯ +ObservationTensor(1): ◯◯◉◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◉◯◯◯◯◯ +ObservationTensor(2): ◯◯◉◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◉◯◯◯◯◯ +Rewards() = [-0, -0, -0] +Returns() = [-0, -0, -0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 11 +# Apply action "Roll 3" +action: 2 + +# State 12 +# Apply action "stop" +action: 1 + +# State 13 +# Scores: 0 3 5, Turn total: 0 +# Current player: 0 +IsTerminal() = True +History() = [0, 0, 0, 2, 1, 1, 1, 1, 0, 1, 0, 2, 1] +HistoryString() = "0, 0, 0, 2, 1, 1, 1, 1, 0, 1, 0, 2, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "Scores: 0 3 5, Turn total: 0\nCurrent player: 0\n" +ObservationString(1) = "Scores: 0 3 5, Turn total: 0\nCurrent player: 0\n" +ObservationString(2) = "Scores: 0 3 5, Turn total: 0\nCurrent player: 0\n" +ObservationTensor(0): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◯◯◯◯◯◉ +ObservationTensor(1): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◯◯◯◯◯◉ +ObservationTensor(2): ◉◯◯◯◯◯ + ◉◯◯◯◯◯ + ◯◯◯◉◯◯ + ◯◯◯◯◯◉ +Rewards() = [0.5, 0.5, -1] +Returns() = [0.5, 0.5, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/misere(game=tic_tac_toe()).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/misere(game=tic_tac_toe()).txt new file mode 100644 index 0000000..629d489 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/misere(game=tic_tac_toe()).txt @@ -0,0 +1,244 @@ +game: misere(game=tic_tac_toe()) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Misere Tic Tac Toe" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "misere" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 9 +PolicyTensorShape() = [9] +MaxChanceOutcomes() = 0 +GetParameters() = {game=tic_tac_toe()} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = -0.0 +ObservationTensorShape() = [3, 3, 3] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 27 +MaxGameLength() = 9 +ToString() = "misere(game=tic_tac_toe())" + +# State 0 +# ... +# ... +# ... +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "...\n...\n..." +ObservationString(1) = "...\n...\n..." +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +Rewards() = [-0, -0] +Returns() = [-0, -0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,0)", "x(1,1)", "x(1,2)", "x(2,0)", "x(2,1)", "x(2,2)"] + +# Apply action "x(0,1)" +action: 1 + +# State 1 +# .x. +# ... +# ... +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1" +InformationStateString(1) = "1" +ObservationString(0) = ".x.\n...\n..." +ObservationString(1) = ".x.\n...\n..." +ObservationTensor(0): +◉◯◉ ◯◯◯ ◯◉◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +ObservationTensor(1): +◉◯◉ ◯◯◯ ◯◉◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +Rewards() = [-0, -0] +Returns() = [-0, -0] +LegalActions() = [0, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["o(0,0)", "o(0,2)", "o(1,0)", "o(1,1)", "o(1,2)", "o(2,0)", "o(2,1)", "o(2,2)"] + +# Apply action "o(0,2)" +action: 2 + +# State 2 +# .xo +# ... +# ... +IsTerminal() = False +History() = [1, 2] +HistoryString() = "1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1, 2" +InformationStateString(1) = "1, 2" +ObservationString(0) = ".xo\n...\n..." +ObservationString(1) = ".xo\n...\n..." +ObservationTensor(0): +◉◯◯ ◯◯◉ ◯◉◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +ObservationTensor(1): +◉◯◯ ◯◯◉ ◯◉◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +Rewards() = [-0, -0] +Returns() = [-0, -0] +LegalActions() = [0, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["x(0,0)", "x(1,0)", "x(1,1)", "x(1,2)", "x(2,0)", "x(2,1)", "x(2,2)"] + +# Apply action "x(2,1)" +action: 7 + +# State 3 +# .xo +# ... +# .x. +IsTerminal() = False +History() = [1, 2, 7] +HistoryString() = "1, 2, 7" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1, 2, 7" +InformationStateString(1) = "1, 2, 7" +ObservationString(0) = ".xo\n...\n.x." +ObservationString(1) = ".xo\n...\n.x." +ObservationTensor(0): +◉◯◯ ◯◯◉ ◯◉◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◯◉ ◯◯◯ ◯◉◯ +ObservationTensor(1): +◉◯◯ ◯◯◉ ◯◉◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◯◉ ◯◯◯ ◯◉◯ +Rewards() = [-0, -0] +Returns() = [-0, -0] +LegalActions() = [0, 3, 4, 5, 6, 8] +StringLegalActions() = ["o(0,0)", "o(1,0)", "o(1,1)", "o(1,2)", "o(2,0)", "o(2,2)"] + +# Apply action "o(2,2)" +action: 8 + +# State 4 +# .xo +# ... +# .xo +IsTerminal() = False +History() = [1, 2, 7, 8] +HistoryString() = "1, 2, 7, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1, 2, 7, 8" +InformationStateString(1) = "1, 2, 7, 8" +ObservationString(0) = ".xo\n...\n.xo" +ObservationString(1) = ".xo\n...\n.xo" +ObservationTensor(0): +◉◯◯ ◯◯◉ ◯◉◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◯◯ ◯◯◉ ◯◉◯ +ObservationTensor(1): +◉◯◯ ◯◯◉ ◯◉◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◯◯ ◯◯◉ ◯◉◯ +Rewards() = [-0, -0] +Returns() = [-0, -0] +LegalActions() = [0, 3, 4, 5, 6] +StringLegalActions() = ["x(0,0)", "x(1,0)", "x(1,1)", "x(1,2)", "x(2,0)"] + +# Apply action "x(1,2)" +action: 5 + +# State 5 +# .xo +# ..x +# .xo +IsTerminal() = False +History() = [1, 2, 7, 8, 5] +HistoryString() = "1, 2, 7, 8, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1, 2, 7, 8, 5" +InformationStateString(1) = "1, 2, 7, 8, 5" +ObservationString(0) = ".xo\n..x\n.xo" +ObservationString(1) = ".xo\n..x\n.xo" +ObservationTensor(0): +◉◯◯ ◯◯◉ ◯◉◯ +◉◉◯ ◯◯◯ ◯◯◉ +◉◯◯ ◯◯◉ ◯◉◯ +ObservationTensor(1): +◉◯◯ ◯◯◉ ◯◉◯ +◉◉◯ ◯◯◯ ◯◯◉ +◉◯◯ ◯◯◉ ◯◉◯ +Rewards() = [-0, -0] +Returns() = [-0, -0] +LegalActions() = [0, 3, 4, 6] +StringLegalActions() = ["o(0,0)", "o(1,0)", "o(1,1)", "o(2,0)"] + +# Apply action "o(2,0)" +action: 6 + +# State 6 +# Apply action "x(1,0)" +action: 3 + +# State 7 +# Apply action "o(1,1)" +action: 4 + +# State 8 +# .xo +# xox +# oxo +IsTerminal() = True +History() = [1, 2, 7, 8, 5, 6, 3, 4] +HistoryString() = "1, 2, 7, 8, 5, 6, 3, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "1, 2, 7, 8, 5, 6, 3, 4" +InformationStateString(1) = "1, 2, 7, 8, 5, 6, 3, 4" +ObservationString(0) = ".xo\nxox\noxo" +ObservationString(1) = ".xo\nxox\noxo" +ObservationTensor(0): +◉◯◯ ◯◯◉ ◯◉◯ +◯◯◯ ◯◉◯ ◉◯◉ +◯◯◯ ◉◯◉ ◯◉◯ +ObservationTensor(1): +◉◯◯ ◯◯◉ ◯◉◯ +◯◯◯ ◯◉◯ ◉◯◉ +◯◯◯ ◉◯◉ ◯◉◯ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mnk.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mnk.txt new file mode 100644 index 0000000..12dbe7a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/mnk.txt @@ -0,0 +1,1520 @@ +game: mnk + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "m,n,k-game" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["k", "m", "n"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "mnk" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 225 +PolicyTensorShape() = [225] +MaxChanceOutcomes() = 0 +GetParameters() = {m=15,n=15} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [3, 15, 15] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 675 +MaxGameLength() = 225 +ToString() = "mnk()" + +# State 0 +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n..............." +ObservationString(1) = "...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n..............." +ObservationTensor(0): +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(0,3)", "x(0,4)", "x(0,5)", "x(0,6)", "x(0,7)", "x(0,8)", "x(0,9)", "x(0,10)", "x(0,11)", "x(0,12)", "x(0,13)", "x(0,14)", "x(1,0)", "x(1,1)", "x(1,2)", "x(1,3)", "x(1,4)", "x(1,5)", "x(1,6)", "x(1,7)", "x(1,8)", "x(1,9)", "x(1,10)", "x(1,11)", "x(1,12)", "x(1,13)", "x(1,14)", "x(2,0)", "x(2,1)", "x(2,2)", "x(2,3)", "x(2,4)", "x(2,5)", "x(2,6)", "x(2,7)", "x(2,8)", "x(2,9)", "x(2,10)", "x(2,11)", "x(2,12)", "x(2,13)", "x(2,14)", "x(3,0)", "x(3,1)", "x(3,2)", "x(3,3)", "x(3,4)", "x(3,5)", "x(3,6)", "x(3,7)", "x(3,8)", "x(3,9)", "x(3,10)", "x(3,11)", "x(3,12)", "x(3,13)", "x(3,14)", "x(4,0)", "x(4,1)", "x(4,2)", "x(4,3)", "x(4,4)", "x(4,5)", "x(4,6)", "x(4,7)", "x(4,8)", "x(4,9)", "x(4,10)", "x(4,11)", "x(4,12)", "x(4,13)", "x(4,14)", "x(5,0)", "x(5,1)", "x(5,2)", "x(5,3)", "x(5,4)", "x(5,5)", "x(5,6)", "x(5,7)", "x(5,8)", "x(5,9)", "x(5,10)", "x(5,11)", "x(5,12)", "x(5,13)", "x(5,14)", "x(6,0)", "x(6,1)", "x(6,2)", "x(6,3)", "x(6,4)", "x(6,5)", "x(6,6)", "x(6,7)", "x(6,8)", "x(6,9)", "x(6,10)", "x(6,11)", "x(6,12)", "x(6,13)", "x(6,14)", "x(7,0)", "x(7,1)", "x(7,2)", "x(7,3)", "x(7,4)", "x(7,5)", "x(7,6)", "x(7,7)", "x(7,8)", "x(7,9)", "x(7,10)", "x(7,11)", "x(7,12)", "x(7,13)", "x(7,14)", "x(8,0)", "x(8,1)", "x(8,2)", "x(8,3)", "x(8,4)", "x(8,5)", "x(8,6)", "x(8,7)", "x(8,8)", "x(8,9)", "x(8,10)", "x(8,11)", "x(8,12)", "x(8,13)", "x(8,14)", "x(9,0)", "x(9,1)", "x(9,2)", "x(9,3)", "x(9,4)", "x(9,5)", "x(9,6)", "x(9,7)", "x(9,8)", "x(9,9)", "x(9,10)", "x(9,11)", "x(9,12)", "x(9,13)", "x(9,14)", "x(10,0)", "x(10,1)", "x(10,2)", "x(10,3)", "x(10,4)", "x(10,5)", "x(10,6)", "x(10,7)", "x(10,8)", "x(10,9)", "x(10,10)", "x(10,11)", "x(10,12)", "x(10,13)", "x(10,14)", "x(11,0)", "x(11,1)", "x(11,2)", "x(11,3)", "x(11,4)", "x(11,5)", "x(11,6)", "x(11,7)", "x(11,8)", "x(11,9)", "x(11,10)", "x(11,11)", "x(11,12)", "x(11,13)", "x(11,14)", "x(12,0)", "x(12,1)", "x(12,2)", "x(12,3)", "x(12,4)", "x(12,5)", "x(12,6)", "x(12,7)", "x(12,8)", "x(12,9)", "x(12,10)", "x(12,11)", "x(12,12)", "x(12,13)", "x(12,14)", "x(13,0)", "x(13,1)", "x(13,2)", "x(13,3)", "x(13,4)", "x(13,5)", "x(13,6)", "x(13,7)", "x(13,8)", "x(13,9)", "x(13,10)", "x(13,11)", "x(13,12)", "x(13,13)", "x(13,14)", "x(14,0)", "x(14,1)", "x(14,2)", "x(14,3)", "x(14,4)", "x(14,5)", "x(14,6)", "x(14,7)", "x(14,8)", "x(14,9)", "x(14,10)", "x(14,11)", "x(14,12)", "x(14,13)", "x(14,14)"] + +# Apply action "x(2,7)" +action: 37 + +# State 1 +# ............... +# ............... +# .......x....... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +IsTerminal() = False +History() = [37] +HistoryString() = "37" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "37" +InformationStateString(1) = "37" +ObservationString(0) = "...............\n...............\n.......x.......\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n..............." +ObservationString(1) = "...............\n...............\n.......x.......\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n..............." +ObservationTensor(0): +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◯◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◯◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(0,3)", "o(0,4)", "o(0,5)", "o(0,6)", "o(0,7)", "o(0,8)", "o(0,9)", "o(0,10)", "o(0,11)", "o(0,12)", "o(0,13)", "o(0,14)", "o(1,0)", "o(1,1)", "o(1,2)", "o(1,3)", "o(1,4)", "o(1,5)", "o(1,6)", "o(1,7)", "o(1,8)", "o(1,9)", "o(1,10)", "o(1,11)", "o(1,12)", "o(1,13)", "o(1,14)", "o(2,0)", "o(2,1)", "o(2,2)", "o(2,3)", "o(2,4)", "o(2,5)", "o(2,6)", "o(2,8)", "o(2,9)", "o(2,10)", "o(2,11)", "o(2,12)", "o(2,13)", "o(2,14)", "o(3,0)", "o(3,1)", "o(3,2)", "o(3,3)", "o(3,4)", "o(3,5)", "o(3,6)", "o(3,7)", "o(3,8)", "o(3,9)", "o(3,10)", "o(3,11)", "o(3,12)", "o(3,13)", "o(3,14)", "o(4,0)", "o(4,1)", "o(4,2)", "o(4,3)", "o(4,4)", "o(4,5)", "o(4,6)", "o(4,7)", "o(4,8)", "o(4,9)", "o(4,10)", "o(4,11)", "o(4,12)", "o(4,13)", "o(4,14)", "o(5,0)", "o(5,1)", "o(5,2)", "o(5,3)", "o(5,4)", "o(5,5)", "o(5,6)", "o(5,7)", "o(5,8)", "o(5,9)", "o(5,10)", "o(5,11)", "o(5,12)", "o(5,13)", "o(5,14)", "o(6,0)", "o(6,1)", "o(6,2)", "o(6,3)", "o(6,4)", "o(6,5)", "o(6,6)", "o(6,7)", "o(6,8)", "o(6,9)", "o(6,10)", "o(6,11)", "o(6,12)", "o(6,13)", "o(6,14)", "o(7,0)", "o(7,1)", "o(7,2)", "o(7,3)", "o(7,4)", "o(7,5)", "o(7,6)", "o(7,7)", "o(7,8)", "o(7,9)", "o(7,10)", "o(7,11)", "o(7,12)", "o(7,13)", "o(7,14)", "o(8,0)", "o(8,1)", "o(8,2)", "o(8,3)", "o(8,4)", "o(8,5)", "o(8,6)", "o(8,7)", "o(8,8)", "o(8,9)", "o(8,10)", "o(8,11)", "o(8,12)", "o(8,13)", "o(8,14)", "o(9,0)", "o(9,1)", "o(9,2)", "o(9,3)", "o(9,4)", "o(9,5)", "o(9,6)", "o(9,7)", "o(9,8)", "o(9,9)", "o(9,10)", "o(9,11)", "o(9,12)", "o(9,13)", "o(9,14)", "o(10,0)", "o(10,1)", "o(10,2)", "o(10,3)", "o(10,4)", "o(10,5)", "o(10,6)", "o(10,7)", "o(10,8)", "o(10,9)", "o(10,10)", "o(10,11)", "o(10,12)", "o(10,13)", "o(10,14)", "o(11,0)", "o(11,1)", "o(11,2)", "o(11,3)", "o(11,4)", "o(11,5)", "o(11,6)", "o(11,7)", "o(11,8)", "o(11,9)", "o(11,10)", "o(11,11)", "o(11,12)", "o(11,13)", "o(11,14)", "o(12,0)", "o(12,1)", "o(12,2)", "o(12,3)", "o(12,4)", "o(12,5)", "o(12,6)", "o(12,7)", "o(12,8)", "o(12,9)", "o(12,10)", "o(12,11)", "o(12,12)", "o(12,13)", "o(12,14)", "o(13,0)", "o(13,1)", "o(13,2)", "o(13,3)", "o(13,4)", "o(13,5)", "o(13,6)", "o(13,7)", "o(13,8)", "o(13,9)", "o(13,10)", "o(13,11)", "o(13,12)", "o(13,13)", "o(13,14)", "o(14,0)", "o(14,1)", "o(14,2)", "o(14,3)", "o(14,4)", "o(14,5)", "o(14,6)", "o(14,7)", "o(14,8)", "o(14,9)", "o(14,10)", "o(14,11)", "o(14,12)", "o(14,13)", "o(14,14)"] + +# Apply action "o(2,4)" +action: 34 + +# State 2 +# ............... +# ............... +# ....o..x....... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +IsTerminal() = False +History() = [37, 34] +HistoryString() = "37, 34" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "37, 34" +InformationStateString(1) = "37, 34" +ObservationString(0) = "...............\n...............\n....o..x.......\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n..............." +ObservationString(1) = "...............\n...............\n....o..x.......\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n...............\n..............." +ObservationTensor(0): +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◯◉◉◯◉◉◉◉◉◉◉ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◯◉◉◯◉◉◉◉◉◉◉ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(0,3)", "x(0,4)", "x(0,5)", "x(0,6)", "x(0,7)", "x(0,8)", "x(0,9)", "x(0,10)", "x(0,11)", "x(0,12)", "x(0,13)", "x(0,14)", "x(1,0)", "x(1,1)", "x(1,2)", "x(1,3)", "x(1,4)", "x(1,5)", "x(1,6)", "x(1,7)", "x(1,8)", "x(1,9)", "x(1,10)", "x(1,11)", "x(1,12)", "x(1,13)", "x(1,14)", "x(2,0)", "x(2,1)", "x(2,2)", "x(2,3)", "x(2,5)", "x(2,6)", "x(2,8)", "x(2,9)", "x(2,10)", "x(2,11)", "x(2,12)", "x(2,13)", "x(2,14)", "x(3,0)", "x(3,1)", "x(3,2)", "x(3,3)", "x(3,4)", "x(3,5)", "x(3,6)", "x(3,7)", "x(3,8)", "x(3,9)", "x(3,10)", "x(3,11)", "x(3,12)", "x(3,13)", "x(3,14)", "x(4,0)", "x(4,1)", "x(4,2)", "x(4,3)", "x(4,4)", "x(4,5)", "x(4,6)", "x(4,7)", "x(4,8)", "x(4,9)", "x(4,10)", "x(4,11)", "x(4,12)", "x(4,13)", "x(4,14)", "x(5,0)", "x(5,1)", "x(5,2)", "x(5,3)", "x(5,4)", "x(5,5)", "x(5,6)", "x(5,7)", "x(5,8)", "x(5,9)", "x(5,10)", "x(5,11)", "x(5,12)", "x(5,13)", "x(5,14)", "x(6,0)", "x(6,1)", "x(6,2)", "x(6,3)", "x(6,4)", "x(6,5)", "x(6,6)", "x(6,7)", "x(6,8)", "x(6,9)", "x(6,10)", "x(6,11)", "x(6,12)", "x(6,13)", "x(6,14)", "x(7,0)", "x(7,1)", "x(7,2)", "x(7,3)", "x(7,4)", "x(7,5)", "x(7,6)", "x(7,7)", "x(7,8)", "x(7,9)", "x(7,10)", "x(7,11)", "x(7,12)", "x(7,13)", "x(7,14)", "x(8,0)", "x(8,1)", "x(8,2)", "x(8,3)", "x(8,4)", "x(8,5)", "x(8,6)", "x(8,7)", "x(8,8)", "x(8,9)", "x(8,10)", "x(8,11)", "x(8,12)", "x(8,13)", "x(8,14)", "x(9,0)", "x(9,1)", "x(9,2)", "x(9,3)", "x(9,4)", "x(9,5)", "x(9,6)", "x(9,7)", "x(9,8)", "x(9,9)", "x(9,10)", "x(9,11)", "x(9,12)", "x(9,13)", "x(9,14)", "x(10,0)", "x(10,1)", "x(10,2)", "x(10,3)", "x(10,4)", "x(10,5)", "x(10,6)", "x(10,7)", "x(10,8)", "x(10,9)", "x(10,10)", "x(10,11)", "x(10,12)", "x(10,13)", "x(10,14)", "x(11,0)", "x(11,1)", "x(11,2)", "x(11,3)", "x(11,4)", "x(11,5)", "x(11,6)", "x(11,7)", "x(11,8)", "x(11,9)", "x(11,10)", "x(11,11)", "x(11,12)", "x(11,13)", "x(11,14)", "x(12,0)", "x(12,1)", "x(12,2)", "x(12,3)", "x(12,4)", "x(12,5)", "x(12,6)", "x(12,7)", "x(12,8)", "x(12,9)", "x(12,10)", "x(12,11)", "x(12,12)", "x(12,13)", "x(12,14)", "x(13,0)", "x(13,1)", "x(13,2)", "x(13,3)", "x(13,4)", "x(13,5)", "x(13,6)", "x(13,7)", "x(13,8)", "x(13,9)", "x(13,10)", "x(13,11)", "x(13,12)", "x(13,13)", "x(13,14)", "x(14,0)", "x(14,1)", "x(14,2)", "x(14,3)", "x(14,4)", "x(14,5)", "x(14,6)", "x(14,7)", "x(14,8)", "x(14,9)", "x(14,10)", "x(14,11)", "x(14,12)", "x(14,13)", "x(14,14)"] + +# Apply action "x(9,9)" +action: 144 + +# State 3 +# ............... +# ............... +# ....o..x....... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# .........x..... +# ............... +# ............... +# ............... +# ............... +# ............... +IsTerminal() = False +History() = [37, 34, 144] +HistoryString() = "37, 34, 144" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "37, 34, 144" +InformationStateString(1) = "37, 34, 144" +ObservationString(0) = "...............\n...............\n....o..x.......\n...............\n...............\n...............\n...............\n...............\n...............\n.........x.....\n...............\n...............\n...............\n...............\n..............." +ObservationString(1) = "...............\n...............\n....o..x.......\n...............\n...............\n...............\n...............\n...............\n...............\n.........x.....\n...............\n...............\n...............\n...............\n..............." +ObservationTensor(0): +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◯◉◉◯◉◉◉◉◉◉◉ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◯◉◉◯◉◉◉◉◉◉◉ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(0,3)", "o(0,4)", "o(0,5)", "o(0,6)", "o(0,7)", "o(0,8)", "o(0,9)", "o(0,10)", "o(0,11)", "o(0,12)", "o(0,13)", "o(0,14)", "o(1,0)", "o(1,1)", "o(1,2)", "o(1,3)", "o(1,4)", "o(1,5)", "o(1,6)", "o(1,7)", "o(1,8)", "o(1,9)", "o(1,10)", "o(1,11)", "o(1,12)", "o(1,13)", "o(1,14)", "o(2,0)", "o(2,1)", "o(2,2)", "o(2,3)", "o(2,5)", "o(2,6)", "o(2,8)", "o(2,9)", "o(2,10)", "o(2,11)", "o(2,12)", "o(2,13)", "o(2,14)", "o(3,0)", "o(3,1)", "o(3,2)", "o(3,3)", "o(3,4)", "o(3,5)", "o(3,6)", "o(3,7)", "o(3,8)", "o(3,9)", "o(3,10)", "o(3,11)", "o(3,12)", "o(3,13)", "o(3,14)", "o(4,0)", "o(4,1)", "o(4,2)", "o(4,3)", "o(4,4)", "o(4,5)", "o(4,6)", "o(4,7)", "o(4,8)", "o(4,9)", "o(4,10)", "o(4,11)", "o(4,12)", "o(4,13)", "o(4,14)", "o(5,0)", "o(5,1)", "o(5,2)", "o(5,3)", "o(5,4)", "o(5,5)", "o(5,6)", "o(5,7)", "o(5,8)", "o(5,9)", "o(5,10)", "o(5,11)", "o(5,12)", "o(5,13)", "o(5,14)", "o(6,0)", "o(6,1)", "o(6,2)", "o(6,3)", "o(6,4)", "o(6,5)", "o(6,6)", "o(6,7)", "o(6,8)", "o(6,9)", "o(6,10)", "o(6,11)", "o(6,12)", "o(6,13)", "o(6,14)", "o(7,0)", "o(7,1)", "o(7,2)", "o(7,3)", "o(7,4)", "o(7,5)", "o(7,6)", "o(7,7)", "o(7,8)", "o(7,9)", "o(7,10)", "o(7,11)", "o(7,12)", "o(7,13)", "o(7,14)", "o(8,0)", "o(8,1)", "o(8,2)", "o(8,3)", "o(8,4)", "o(8,5)", "o(8,6)", "o(8,7)", "o(8,8)", "o(8,9)", "o(8,10)", "o(8,11)", "o(8,12)", "o(8,13)", "o(8,14)", "o(9,0)", "o(9,1)", "o(9,2)", "o(9,3)", "o(9,4)", "o(9,5)", "o(9,6)", "o(9,7)", "o(9,8)", "o(9,10)", "o(9,11)", "o(9,12)", "o(9,13)", "o(9,14)", "o(10,0)", "o(10,1)", "o(10,2)", "o(10,3)", "o(10,4)", "o(10,5)", "o(10,6)", "o(10,7)", "o(10,8)", "o(10,9)", "o(10,10)", "o(10,11)", "o(10,12)", "o(10,13)", "o(10,14)", "o(11,0)", "o(11,1)", "o(11,2)", "o(11,3)", "o(11,4)", "o(11,5)", "o(11,6)", "o(11,7)", "o(11,8)", "o(11,9)", "o(11,10)", "o(11,11)", "o(11,12)", "o(11,13)", "o(11,14)", "o(12,0)", "o(12,1)", "o(12,2)", "o(12,3)", "o(12,4)", "o(12,5)", "o(12,6)", "o(12,7)", "o(12,8)", "o(12,9)", "o(12,10)", "o(12,11)", "o(12,12)", "o(12,13)", "o(12,14)", "o(13,0)", "o(13,1)", "o(13,2)", "o(13,3)", "o(13,4)", "o(13,5)", "o(13,6)", "o(13,7)", "o(13,8)", "o(13,9)", "o(13,10)", "o(13,11)", "o(13,12)", "o(13,13)", "o(13,14)", "o(14,0)", "o(14,1)", "o(14,2)", "o(14,3)", "o(14,4)", "o(14,5)", "o(14,6)", "o(14,7)", "o(14,8)", "o(14,9)", "o(14,10)", "o(14,11)", "o(14,12)", "o(14,13)", "o(14,14)"] + +# Apply action "o(2,3)" +action: 33 + +# State 4 +# ............... +# ............... +# ...oo..x....... +# ............... +# ............... +# ............... +# ............... +# ............... +# ............... +# .........x..... +# ............... +# ............... +# ............... +# ............... +# ............... +IsTerminal() = False +History() = [37, 34, 144, 33] +HistoryString() = "37, 34, 144, 33" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "37, 34, 144, 33" +InformationStateString(1) = "37, 34, 144, 33" +ObservationString(0) = "...............\n...............\n...oo..x.......\n...............\n...............\n...............\n...............\n...............\n...............\n.........x.....\n...............\n...............\n...............\n...............\n..............." +ObservationString(1) = "...............\n...............\n...oo..x.......\n...............\n...............\n...............\n...............\n...............\n...............\n.........x.....\n...............\n...............\n...............\n...............\n..............." +ObservationTensor(0): +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◯◯◉◉◯◉◉◉◉◉◉◉ ◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◯◯◉◉◯◉◉◉◉◉◉◉ ◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(0,3)", "x(0,4)", "x(0,5)", "x(0,6)", "x(0,7)", "x(0,8)", "x(0,9)", "x(0,10)", "x(0,11)", "x(0,12)", "x(0,13)", "x(0,14)", "x(1,0)", "x(1,1)", "x(1,2)", "x(1,3)", "x(1,4)", "x(1,5)", "x(1,6)", "x(1,7)", "x(1,8)", "x(1,9)", "x(1,10)", "x(1,11)", "x(1,12)", "x(1,13)", "x(1,14)", "x(2,0)", "x(2,1)", "x(2,2)", "x(2,5)", "x(2,6)", "x(2,8)", "x(2,9)", "x(2,10)", "x(2,11)", "x(2,12)", "x(2,13)", "x(2,14)", "x(3,0)", "x(3,1)", "x(3,2)", "x(3,3)", "x(3,4)", "x(3,5)", "x(3,6)", "x(3,7)", "x(3,8)", "x(3,9)", "x(3,10)", "x(3,11)", "x(3,12)", "x(3,13)", "x(3,14)", "x(4,0)", "x(4,1)", "x(4,2)", "x(4,3)", "x(4,4)", "x(4,5)", "x(4,6)", "x(4,7)", "x(4,8)", "x(4,9)", "x(4,10)", "x(4,11)", "x(4,12)", "x(4,13)", "x(4,14)", "x(5,0)", "x(5,1)", "x(5,2)", "x(5,3)", "x(5,4)", "x(5,5)", "x(5,6)", "x(5,7)", "x(5,8)", "x(5,9)", "x(5,10)", "x(5,11)", "x(5,12)", "x(5,13)", "x(5,14)", "x(6,0)", "x(6,1)", "x(6,2)", "x(6,3)", "x(6,4)", "x(6,5)", "x(6,6)", "x(6,7)", "x(6,8)", "x(6,9)", "x(6,10)", "x(6,11)", "x(6,12)", "x(6,13)", "x(6,14)", "x(7,0)", "x(7,1)", "x(7,2)", "x(7,3)", "x(7,4)", "x(7,5)", "x(7,6)", "x(7,7)", "x(7,8)", "x(7,9)", "x(7,10)", "x(7,11)", "x(7,12)", "x(7,13)", "x(7,14)", "x(8,0)", "x(8,1)", "x(8,2)", "x(8,3)", "x(8,4)", "x(8,5)", "x(8,6)", "x(8,7)", "x(8,8)", "x(8,9)", "x(8,10)", "x(8,11)", "x(8,12)", "x(8,13)", "x(8,14)", "x(9,0)", "x(9,1)", "x(9,2)", "x(9,3)", "x(9,4)", "x(9,5)", "x(9,6)", "x(9,7)", "x(9,8)", "x(9,10)", "x(9,11)", "x(9,12)", "x(9,13)", "x(9,14)", "x(10,0)", "x(10,1)", "x(10,2)", "x(10,3)", "x(10,4)", "x(10,5)", "x(10,6)", "x(10,7)", "x(10,8)", "x(10,9)", "x(10,10)", "x(10,11)", "x(10,12)", "x(10,13)", "x(10,14)", "x(11,0)", "x(11,1)", "x(11,2)", "x(11,3)", "x(11,4)", "x(11,5)", "x(11,6)", "x(11,7)", "x(11,8)", "x(11,9)", "x(11,10)", "x(11,11)", "x(11,12)", "x(11,13)", "x(11,14)", "x(12,0)", "x(12,1)", "x(12,2)", "x(12,3)", "x(12,4)", "x(12,5)", "x(12,6)", "x(12,7)", "x(12,8)", "x(12,9)", "x(12,10)", "x(12,11)", "x(12,12)", "x(12,13)", "x(12,14)", "x(13,0)", "x(13,1)", "x(13,2)", "x(13,3)", "x(13,4)", "x(13,5)", "x(13,6)", "x(13,7)", "x(13,8)", "x(13,9)", "x(13,10)", "x(13,11)", "x(13,12)", "x(13,13)", "x(13,14)", "x(14,0)", "x(14,1)", "x(14,2)", "x(14,3)", "x(14,4)", "x(14,5)", "x(14,6)", "x(14,7)", "x(14,8)", "x(14,9)", "x(14,10)", "x(14,11)", "x(14,12)", "x(14,13)", "x(14,14)"] + +# Apply action "x(4,5)" +action: 65 + +# State 5 +# ............... +# ............... +# ...oo..x....... +# ............... +# .....x......... +# ............... +# ............... +# ............... +# ............... +# .........x..... +# ............... +# ............... +# ............... +# ............... +# ............... +IsTerminal() = False +History() = [37, 34, 144, 33, 65] +HistoryString() = "37, 34, 144, 33, 65" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "37, 34, 144, 33, 65" +InformationStateString(1) = "37, 34, 144, 33, 65" +ObservationString(0) = "...............\n...............\n...oo..x.......\n...............\n.....x.........\n...............\n...............\n...............\n...............\n.........x.....\n...............\n...............\n...............\n...............\n..............." +ObservationString(1) = "...............\n...............\n...oo..x.......\n...............\n.....x.........\n...............\n...............\n...............\n...............\n.........x.....\n...............\n...............\n...............\n...............\n..............." +ObservationTensor(0): +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◯◯◉◉◯◉◉◉◉◉◉◉ ◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◯◯◉◉◯◉◉◉◉◉◉◉ ◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(0,3)", "o(0,4)", "o(0,5)", "o(0,6)", "o(0,7)", "o(0,8)", "o(0,9)", "o(0,10)", "o(0,11)", "o(0,12)", "o(0,13)", "o(0,14)", "o(1,0)", "o(1,1)", "o(1,2)", "o(1,3)", "o(1,4)", "o(1,5)", "o(1,6)", "o(1,7)", "o(1,8)", "o(1,9)", "o(1,10)", "o(1,11)", "o(1,12)", "o(1,13)", "o(1,14)", "o(2,0)", "o(2,1)", "o(2,2)", "o(2,5)", "o(2,6)", "o(2,8)", "o(2,9)", "o(2,10)", "o(2,11)", "o(2,12)", "o(2,13)", "o(2,14)", "o(3,0)", "o(3,1)", "o(3,2)", "o(3,3)", "o(3,4)", "o(3,5)", "o(3,6)", "o(3,7)", "o(3,8)", "o(3,9)", "o(3,10)", "o(3,11)", "o(3,12)", "o(3,13)", "o(3,14)", "o(4,0)", "o(4,1)", "o(4,2)", "o(4,3)", "o(4,4)", "o(4,6)", "o(4,7)", "o(4,8)", "o(4,9)", "o(4,10)", "o(4,11)", "o(4,12)", "o(4,13)", "o(4,14)", "o(5,0)", "o(5,1)", "o(5,2)", "o(5,3)", "o(5,4)", "o(5,5)", "o(5,6)", "o(5,7)", "o(5,8)", "o(5,9)", "o(5,10)", "o(5,11)", "o(5,12)", "o(5,13)", "o(5,14)", "o(6,0)", "o(6,1)", "o(6,2)", "o(6,3)", "o(6,4)", "o(6,5)", "o(6,6)", "o(6,7)", "o(6,8)", "o(6,9)", "o(6,10)", "o(6,11)", "o(6,12)", "o(6,13)", "o(6,14)", "o(7,0)", "o(7,1)", "o(7,2)", "o(7,3)", "o(7,4)", "o(7,5)", "o(7,6)", "o(7,7)", "o(7,8)", "o(7,9)", "o(7,10)", "o(7,11)", "o(7,12)", "o(7,13)", "o(7,14)", "o(8,0)", "o(8,1)", "o(8,2)", "o(8,3)", "o(8,4)", "o(8,5)", "o(8,6)", "o(8,7)", "o(8,8)", "o(8,9)", "o(8,10)", "o(8,11)", "o(8,12)", "o(8,13)", "o(8,14)", "o(9,0)", "o(9,1)", "o(9,2)", "o(9,3)", "o(9,4)", "o(9,5)", "o(9,6)", "o(9,7)", "o(9,8)", "o(9,10)", "o(9,11)", "o(9,12)", "o(9,13)", "o(9,14)", "o(10,0)", "o(10,1)", "o(10,2)", "o(10,3)", "o(10,4)", "o(10,5)", "o(10,6)", "o(10,7)", "o(10,8)", "o(10,9)", "o(10,10)", "o(10,11)", "o(10,12)", "o(10,13)", "o(10,14)", "o(11,0)", "o(11,1)", "o(11,2)", "o(11,3)", "o(11,4)", "o(11,5)", "o(11,6)", "o(11,7)", "o(11,8)", "o(11,9)", "o(11,10)", "o(11,11)", "o(11,12)", "o(11,13)", "o(11,14)", "o(12,0)", "o(12,1)", "o(12,2)", "o(12,3)", "o(12,4)", "o(12,5)", "o(12,6)", "o(12,7)", "o(12,8)", "o(12,9)", "o(12,10)", "o(12,11)", "o(12,12)", "o(12,13)", "o(12,14)", "o(13,0)", "o(13,1)", "o(13,2)", "o(13,3)", "o(13,4)", "o(13,5)", "o(13,6)", "o(13,7)", "o(13,8)", "o(13,9)", "o(13,10)", "o(13,11)", "o(13,12)", "o(13,13)", "o(13,14)", "o(14,0)", "o(14,1)", "o(14,2)", "o(14,3)", "o(14,4)", "o(14,5)", "o(14,6)", "o(14,7)", "o(14,8)", "o(14,9)", "o(14,10)", "o(14,11)", "o(14,12)", "o(14,13)", "o(14,14)"] + +# Apply action "o(13,1)" +action: 196 + +# State 6 +# Apply action "x(5,6)" +action: 81 + +# State 7 +# Apply action "o(5,11)" +action: 86 + +# State 8 +# Apply action "x(1,11)" +action: 26 + +# State 9 +# Apply action "o(9,0)" +action: 135 + +# State 10 +# Apply action "x(7,6)" +action: 111 + +# State 11 +# Apply action "o(7,5)" +action: 110 + +# State 12 +# Apply action "x(5,1)" +action: 76 + +# State 13 +# Apply action "o(2,12)" +action: 42 + +# State 14 +# Apply action "x(1,14)" +action: 29 + +# State 15 +# Apply action "o(2,5)" +action: 35 + +# State 16 +# Apply action "x(14,13)" +action: 223 + +# State 17 +# Apply action "o(12,9)" +action: 189 + +# State 18 +# Apply action "x(11,9)" +action: 174 + +# State 19 +# Apply action "o(2,1)" +action: 31 + +# State 20 +# ............... +# ...........x..x +# .o.ooo.x....o.. +# ............... +# .....x......... +# .x....x....o... +# ............... +# .....ox........ +# ............... +# o........x..... +# ............... +# .........x..... +# .........o..... +# .o............. +# .............x. +IsTerminal() = False +History() = [37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31] +HistoryString() = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31" +InformationStateString(1) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31" +ObservationString(0) = "...............\n...........x..x\n.o.ooo.x....o..\n...............\n.....x.........\n.x....x....o...\n...............\n.....ox........\n...............\no........x.....\n...............\n.........x.....\n.........o.....\n.o.............\n.............x." +ObservationString(1) = "...............\n...........x..x\n.o.ooo.x....o..\n...............\n.....x.........\n.x....x....o...\n...............\n.....ox........\n...............\no........x.....\n...............\n.........x.....\n.........o.....\n.o.............\n.............x." +ObservationTensor(0): +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◯◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◉◯◉◯◯◯◉◯◉◉◉◉◯◉◉ ◯◉◯◉◉◉◯◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +◉◯◉◉◉◉◯◉◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯ ◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◯◉◉◉◉◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◯◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◉◯◉◯◯◯◉◯◉◉◉◉◯◉◉ ◯◉◯◉◉◉◯◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +◉◯◉◉◉◉◯◉◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯ ◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◯◉◉◉◉◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 30, 32, 36, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 78, 79, 80, 82, 83, 84, 85, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 136, 137, 138, 139, 140, 141, 142, 143, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, 192, 193, 194, 195, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 224] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(0,3)", "x(0,4)", "x(0,5)", "x(0,6)", "x(0,7)", "x(0,8)", "x(0,9)", "x(0,10)", "x(0,11)", "x(0,12)", "x(0,13)", "x(0,14)", "x(1,0)", "x(1,1)", "x(1,2)", "x(1,3)", "x(1,4)", "x(1,5)", "x(1,6)", "x(1,7)", "x(1,8)", "x(1,9)", "x(1,10)", "x(1,12)", "x(1,13)", "x(2,0)", "x(2,2)", "x(2,6)", "x(2,8)", "x(2,9)", "x(2,10)", "x(2,11)", "x(2,13)", "x(2,14)", "x(3,0)", "x(3,1)", "x(3,2)", "x(3,3)", "x(3,4)", "x(3,5)", "x(3,6)", "x(3,7)", "x(3,8)", "x(3,9)", "x(3,10)", "x(3,11)", "x(3,12)", "x(3,13)", "x(3,14)", "x(4,0)", "x(4,1)", "x(4,2)", "x(4,3)", "x(4,4)", "x(4,6)", "x(4,7)", "x(4,8)", "x(4,9)", "x(4,10)", "x(4,11)", "x(4,12)", "x(4,13)", "x(4,14)", "x(5,0)", "x(5,2)", "x(5,3)", "x(5,4)", "x(5,5)", "x(5,7)", "x(5,8)", "x(5,9)", "x(5,10)", "x(5,12)", "x(5,13)", "x(5,14)", "x(6,0)", "x(6,1)", "x(6,2)", "x(6,3)", "x(6,4)", "x(6,5)", "x(6,6)", "x(6,7)", "x(6,8)", "x(6,9)", "x(6,10)", "x(6,11)", "x(6,12)", "x(6,13)", "x(6,14)", "x(7,0)", "x(7,1)", "x(7,2)", "x(7,3)", "x(7,4)", "x(7,7)", "x(7,8)", "x(7,9)", "x(7,10)", "x(7,11)", "x(7,12)", "x(7,13)", "x(7,14)", "x(8,0)", "x(8,1)", "x(8,2)", "x(8,3)", "x(8,4)", "x(8,5)", "x(8,6)", "x(8,7)", "x(8,8)", "x(8,9)", "x(8,10)", "x(8,11)", "x(8,12)", "x(8,13)", "x(8,14)", "x(9,1)", "x(9,2)", "x(9,3)", "x(9,4)", "x(9,5)", "x(9,6)", "x(9,7)", "x(9,8)", "x(9,10)", "x(9,11)", "x(9,12)", "x(9,13)", "x(9,14)", "x(10,0)", "x(10,1)", "x(10,2)", "x(10,3)", "x(10,4)", "x(10,5)", "x(10,6)", "x(10,7)", "x(10,8)", "x(10,9)", "x(10,10)", "x(10,11)", "x(10,12)", "x(10,13)", "x(10,14)", "x(11,0)", "x(11,1)", "x(11,2)", "x(11,3)", "x(11,4)", "x(11,5)", "x(11,6)", "x(11,7)", "x(11,8)", "x(11,10)", "x(11,11)", "x(11,12)", "x(11,13)", "x(11,14)", "x(12,0)", "x(12,1)", "x(12,2)", "x(12,3)", "x(12,4)", "x(12,5)", "x(12,6)", "x(12,7)", "x(12,8)", "x(12,10)", "x(12,11)", "x(12,12)", "x(12,13)", "x(12,14)", "x(13,0)", "x(13,2)", "x(13,3)", "x(13,4)", "x(13,5)", "x(13,6)", "x(13,7)", "x(13,8)", "x(13,9)", "x(13,10)", "x(13,11)", "x(13,12)", "x(13,13)", "x(13,14)", "x(14,0)", "x(14,1)", "x(14,2)", "x(14,3)", "x(14,4)", "x(14,5)", "x(14,6)", "x(14,7)", "x(14,8)", "x(14,9)", "x(14,10)", "x(14,11)", "x(14,12)", "x(14,14)"] + +# Apply action "x(7,10)" +action: 115 + +# State 21 +# ............... +# ...........x..x +# .o.ooo.x....o.. +# ............... +# .....x......... +# .x....x....o... +# ............... +# .....ox...x.... +# ............... +# o........x..... +# ............... +# .........x..... +# .........o..... +# .o............. +# .............x. +IsTerminal() = False +History() = [37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115] +HistoryString() = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115" +InformationStateString(1) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115" +ObservationString(0) = "...............\n...........x..x\n.o.ooo.x....o..\n...............\n.....x.........\n.x....x....o...\n...............\n.....ox...x....\n...............\no........x.....\n...............\n.........x.....\n.........o.....\n.o.............\n.............x." +ObservationString(1) = "...............\n...........x..x\n.o.ooo.x....o..\n...............\n.....x.........\n.x....x....o...\n...............\n.....ox...x....\n...............\no........x.....\n...............\n.........x.....\n.........o.....\n.o.............\n.............x." +ObservationTensor(0): +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◯◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◉◯◉◯◯◯◉◯◉◉◉◉◯◉◉ ◯◉◯◉◉◉◯◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +◉◯◉◉◉◉◯◉◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯ ◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◯◉◉◉◯◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◯◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◉◯◉◯◯◯◉◯◉◉◉◉◯◉◉ ◯◉◯◉◉◉◯◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +◉◯◉◉◉◉◯◉◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯ ◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◯◉◉◉◯◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 30, 32, 36, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 78, 79, 80, 82, 83, 84, 85, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 112, 113, 114, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 136, 137, 138, 139, 140, 141, 142, 143, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, 192, 193, 194, 195, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 224] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(0,3)", "o(0,4)", "o(0,5)", "o(0,6)", "o(0,7)", "o(0,8)", "o(0,9)", "o(0,10)", "o(0,11)", "o(0,12)", "o(0,13)", "o(0,14)", "o(1,0)", "o(1,1)", "o(1,2)", "o(1,3)", "o(1,4)", "o(1,5)", "o(1,6)", "o(1,7)", "o(1,8)", "o(1,9)", "o(1,10)", "o(1,12)", "o(1,13)", "o(2,0)", "o(2,2)", "o(2,6)", "o(2,8)", "o(2,9)", "o(2,10)", "o(2,11)", "o(2,13)", "o(2,14)", "o(3,0)", "o(3,1)", "o(3,2)", "o(3,3)", "o(3,4)", "o(3,5)", "o(3,6)", "o(3,7)", "o(3,8)", "o(3,9)", "o(3,10)", "o(3,11)", "o(3,12)", "o(3,13)", "o(3,14)", "o(4,0)", "o(4,1)", "o(4,2)", "o(4,3)", "o(4,4)", "o(4,6)", "o(4,7)", "o(4,8)", "o(4,9)", "o(4,10)", "o(4,11)", "o(4,12)", "o(4,13)", "o(4,14)", "o(5,0)", "o(5,2)", "o(5,3)", "o(5,4)", "o(5,5)", "o(5,7)", "o(5,8)", "o(5,9)", "o(5,10)", "o(5,12)", "o(5,13)", "o(5,14)", "o(6,0)", "o(6,1)", "o(6,2)", "o(6,3)", "o(6,4)", "o(6,5)", "o(6,6)", "o(6,7)", "o(6,8)", "o(6,9)", "o(6,10)", "o(6,11)", "o(6,12)", "o(6,13)", "o(6,14)", "o(7,0)", "o(7,1)", "o(7,2)", "o(7,3)", "o(7,4)", "o(7,7)", "o(7,8)", "o(7,9)", "o(7,11)", "o(7,12)", "o(7,13)", "o(7,14)", "o(8,0)", "o(8,1)", "o(8,2)", "o(8,3)", "o(8,4)", "o(8,5)", "o(8,6)", "o(8,7)", "o(8,8)", "o(8,9)", "o(8,10)", "o(8,11)", "o(8,12)", "o(8,13)", "o(8,14)", "o(9,1)", "o(9,2)", "o(9,3)", "o(9,4)", "o(9,5)", "o(9,6)", "o(9,7)", "o(9,8)", "o(9,10)", "o(9,11)", "o(9,12)", "o(9,13)", "o(9,14)", "o(10,0)", "o(10,1)", "o(10,2)", "o(10,3)", "o(10,4)", "o(10,5)", "o(10,6)", "o(10,7)", "o(10,8)", "o(10,9)", "o(10,10)", "o(10,11)", "o(10,12)", "o(10,13)", "o(10,14)", "o(11,0)", "o(11,1)", "o(11,2)", "o(11,3)", "o(11,4)", "o(11,5)", "o(11,6)", "o(11,7)", "o(11,8)", "o(11,10)", "o(11,11)", "o(11,12)", "o(11,13)", "o(11,14)", "o(12,0)", "o(12,1)", "o(12,2)", "o(12,3)", "o(12,4)", "o(12,5)", "o(12,6)", "o(12,7)", "o(12,8)", "o(12,10)", "o(12,11)", "o(12,12)", "o(12,13)", "o(12,14)", "o(13,0)", "o(13,2)", "o(13,3)", "o(13,4)", "o(13,5)", "o(13,6)", "o(13,7)", "o(13,8)", "o(13,9)", "o(13,10)", "o(13,11)", "o(13,12)", "o(13,13)", "o(13,14)", "o(14,0)", "o(14,1)", "o(14,2)", "o(14,3)", "o(14,4)", "o(14,5)", "o(14,6)", "o(14,7)", "o(14,8)", "o(14,9)", "o(14,10)", "o(14,11)", "o(14,12)", "o(14,14)"] + +# Apply action "o(0,5)" +action: 5 + +# State 22 +# Apply action "x(14,0)" +action: 210 + +# State 23 +# Apply action "o(6,8)" +action: 98 + +# State 24 +# Apply action "x(0,2)" +action: 2 + +# State 25 +# Apply action "o(2,11)" +action: 41 + +# State 26 +# Apply action "x(3,8)" +action: 53 + +# State 27 +# Apply action "o(1,5)" +action: 20 + +# State 28 +# Apply action "x(9,8)" +action: 143 + +# State 29 +# Apply action "o(14,6)" +action: 216 + +# State 30 +# Apply action "x(5,9)" +action: 84 + +# State 31 +# Apply action "o(11,2)" +action: 167 + +# State 32 +# Apply action "x(14,5)" +action: 215 + +# State 33 +# Apply action "o(14,12)" +action: 222 + +# State 34 +# Apply action "x(6,13)" +action: 103 + +# State 35 +# Apply action "o(13,14)" +action: 209 + +# State 36 +# Apply action "x(6,11)" +action: 101 + +# State 37 +# Apply action "o(9,11)" +action: 146 + +# State 38 +# Apply action "x(12,14)" +action: 194 + +# State 39 +# Apply action "o(3,0)" +action: 45 + +# State 40 +# ..x..o......... +# .....o.....x..x +# .o.ooo.x...oo.. +# o.......x...... +# .....x......... +# .x....x..x.o... +# ........o..x.x. +# .....ox...x.... +# ............... +# o.......xx.o... +# ............... +# ..o......x..... +# .........o....x +# .o............o +# x....xo.....ox. +IsTerminal() = False +History() = [37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45] +HistoryString() = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45" +InformationStateString(1) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45" +ObservationString(0) = "..x..o.........\n.....o.....x..x\n.o.ooo.x...oo..\no.......x......\n.....x.........\n.x....x..x.o...\n........o..x.x.\n.....ox...x....\n...............\no.......xx.o...\n...............\n..o......x.....\n.........o....x\n.o............o\nx....xo.....ox." +ObservationString(1) = "..x..o.........\n.....o.....x..x\n.o.ooo.x...oo..\no.......x......\n.....x.........\n.x....x..x.o...\n........o..x.x.\n.....ox...x....\n...............\no.......xx.o...\n...............\n..o......x.....\n.........o....x\n.o............o\nx....xo.....ox." +ObservationTensor(0): +◉◉◯◉◉◯◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉◉◉◉◯◉◉◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◉◯◉◯◯◯◉◯◉◉◉◯◯◉◉ ◯◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◯◉◉◉◉◉◉ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +◉◯◉◉◉◉◯◉◉◯◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯ ◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯ +◉◉◉◉◉◯◯◉◉◉◯◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◯◯◉◯◉◉◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◉◯◉◉◉◉◉◉◉◉◉◉◉◉◯ ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◯◯◉◉◉◉◉◯◯◉ ◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯ ◉◯◯◯◯◉◯◯◯◯◯◯◯◉◯ +ObservationTensor(1): +◉◉◯◉◉◯◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉◉◉◉◯◉◉◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◉◯◉◯◯◯◉◯◉◉◉◯◯◉◉ ◯◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◯◉◉◉◉◉◉ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +◉◯◉◉◉◉◯◉◉◯◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯ ◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯ +◉◉◉◉◉◯◯◉◉◉◯◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◯◯◉◯◉◉◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◉◯◉◉◉◉◉◉◉◉◉◉◉◉◯ ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◯◯◉◉◉◉◉◯◯◉ ◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯ ◉◯◯◯◯◉◯◯◯◯◯◯◯◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 27, 28, 30, 32, 36, 38, 39, 40, 43, 44, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 78, 79, 80, 82, 83, 85, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, 102, 104, 105, 106, 107, 108, 109, 112, 113, 114, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 136, 137, 138, 139, 140, 141, 142, 145, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 168, 169, 170, 171, 172, 173, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, 192, 193, 195, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 211, 212, 213, 214, 217, 218, 219, 220, 221, 224] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,3)", "x(0,4)", "x(0,6)", "x(0,7)", "x(0,8)", "x(0,9)", "x(0,10)", "x(0,11)", "x(0,12)", "x(0,13)", "x(0,14)", "x(1,0)", "x(1,1)", "x(1,2)", "x(1,3)", "x(1,4)", "x(1,6)", "x(1,7)", "x(1,8)", "x(1,9)", "x(1,10)", "x(1,12)", "x(1,13)", "x(2,0)", "x(2,2)", "x(2,6)", "x(2,8)", "x(2,9)", "x(2,10)", "x(2,13)", "x(2,14)", "x(3,1)", "x(3,2)", "x(3,3)", "x(3,4)", "x(3,5)", "x(3,6)", "x(3,7)", "x(3,9)", "x(3,10)", "x(3,11)", "x(3,12)", "x(3,13)", "x(3,14)", "x(4,0)", "x(4,1)", "x(4,2)", "x(4,3)", "x(4,4)", "x(4,6)", "x(4,7)", "x(4,8)", "x(4,9)", "x(4,10)", "x(4,11)", "x(4,12)", "x(4,13)", "x(4,14)", "x(5,0)", "x(5,2)", "x(5,3)", "x(5,4)", "x(5,5)", "x(5,7)", "x(5,8)", "x(5,10)", "x(5,12)", "x(5,13)", "x(5,14)", "x(6,0)", "x(6,1)", "x(6,2)", "x(6,3)", "x(6,4)", "x(6,5)", "x(6,6)", "x(6,7)", "x(6,9)", "x(6,10)", "x(6,12)", "x(6,14)", "x(7,0)", "x(7,1)", "x(7,2)", "x(7,3)", "x(7,4)", "x(7,7)", "x(7,8)", "x(7,9)", "x(7,11)", "x(7,12)", "x(7,13)", "x(7,14)", "x(8,0)", "x(8,1)", "x(8,2)", "x(8,3)", "x(8,4)", "x(8,5)", "x(8,6)", "x(8,7)", "x(8,8)", "x(8,9)", "x(8,10)", "x(8,11)", "x(8,12)", "x(8,13)", "x(8,14)", "x(9,1)", "x(9,2)", "x(9,3)", "x(9,4)", "x(9,5)", "x(9,6)", "x(9,7)", "x(9,10)", "x(9,12)", "x(9,13)", "x(9,14)", "x(10,0)", "x(10,1)", "x(10,2)", "x(10,3)", "x(10,4)", "x(10,5)", "x(10,6)", "x(10,7)", "x(10,8)", "x(10,9)", "x(10,10)", "x(10,11)", "x(10,12)", "x(10,13)", "x(10,14)", "x(11,0)", "x(11,1)", "x(11,3)", "x(11,4)", "x(11,5)", "x(11,6)", "x(11,7)", "x(11,8)", "x(11,10)", "x(11,11)", "x(11,12)", "x(11,13)", "x(11,14)", "x(12,0)", "x(12,1)", "x(12,2)", "x(12,3)", "x(12,4)", "x(12,5)", "x(12,6)", "x(12,7)", "x(12,8)", "x(12,10)", "x(12,11)", "x(12,12)", "x(12,13)", "x(13,0)", "x(13,2)", "x(13,3)", "x(13,4)", "x(13,5)", "x(13,6)", "x(13,7)", "x(13,8)", "x(13,9)", "x(13,10)", "x(13,11)", "x(13,12)", "x(13,13)", "x(14,1)", "x(14,2)", "x(14,3)", "x(14,4)", "x(14,7)", "x(14,8)", "x(14,9)", "x(14,10)", "x(14,11)", "x(14,14)"] + +# Apply action "x(4,8)" +action: 68 + +# State 41 +# ..x..o......... +# .....o.....x..x +# .o.ooo.x...oo.. +# o.......x...... +# .....x..x...... +# .x....x..x.o... +# ........o..x.x. +# .....ox...x.... +# ............... +# o.......xx.o... +# ............... +# ..o......x..... +# .........o....x +# .o............o +# x....xo.....ox. +IsTerminal() = False +History() = [37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68] +HistoryString() = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68" +InformationStateString(1) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68" +ObservationString(0) = "..x..o.........\n.....o.....x..x\n.o.ooo.x...oo..\no.......x......\n.....x..x......\n.x....x..x.o...\n........o..x.x.\n.....ox...x....\n...............\no.......xx.o...\n...............\n..o......x.....\n.........o....x\n.o............o\nx....xo.....ox." +ObservationString(1) = "..x..o.........\n.....o.....x..x\n.o.ooo.x...oo..\no.......x......\n.....x..x......\n.x....x..x.o...\n........o..x.x.\n.....ox...x....\n...............\no.......xx.o...\n...............\n..o......x.....\n.........o....x\n.o............o\nx....xo.....ox." +ObservationTensor(0): +◉◉◯◉◉◯◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉◉◉◉◯◉◉◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◉◯◉◯◯◯◉◯◉◉◉◯◯◉◉ ◯◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◯◉◉◉◉◉◉ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉◯◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯ +◉◯◉◉◉◉◯◉◉◯◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯ ◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯ +◉◉◉◉◉◯◯◉◉◉◯◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◯◯◉◯◉◉◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◉◯◉◉◉◉◉◉◉◉◉◉◉◉◯ ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◯◯◉◉◉◉◉◯◯◉ ◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯ ◉◯◯◯◯◉◯◯◯◯◯◯◯◉◯ +ObservationTensor(1): +◉◉◯◉◉◯◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉◉◉◉◯◉◉◯ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◉◯◉◯◯◯◉◯◉◉◉◯◯◉◉ ◯◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◯◉◉◉◉◉◉ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◉◉◉◉◉◯◉◉◯◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯ +◉◯◉◉◉◉◯◉◉◯◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯ ◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯ +◉◉◉◉◉◯◯◉◉◉◯◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◯◯◉◯◉◉◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◉◉◯◉◉◉◉◉ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◉◯◉◉◉◉◉◉◉◉◉◉◉◉◯ ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◯◯◉◉◉◉◉◯◯◉ ◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯ ◉◯◯◯◯◉◯◯◯◯◯◯◯◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 27, 28, 30, 32, 36, 38, 39, 40, 43, 44, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 69, 70, 71, 72, 73, 74, 75, 77, 78, 79, 80, 82, 83, 85, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, 102, 104, 105, 106, 107, 108, 109, 112, 113, 114, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 136, 137, 138, 139, 140, 141, 142, 145, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 168, 169, 170, 171, 172, 173, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, 192, 193, 195, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 211, 212, 213, 214, 217, 218, 219, 220, 221, 224] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,3)", "o(0,4)", "o(0,6)", "o(0,7)", "o(0,8)", "o(0,9)", "o(0,10)", "o(0,11)", "o(0,12)", "o(0,13)", "o(0,14)", "o(1,0)", "o(1,1)", "o(1,2)", "o(1,3)", "o(1,4)", "o(1,6)", "o(1,7)", "o(1,8)", "o(1,9)", "o(1,10)", "o(1,12)", "o(1,13)", "o(2,0)", "o(2,2)", "o(2,6)", "o(2,8)", "o(2,9)", "o(2,10)", "o(2,13)", "o(2,14)", "o(3,1)", "o(3,2)", "o(3,3)", "o(3,4)", "o(3,5)", "o(3,6)", "o(3,7)", "o(3,9)", "o(3,10)", "o(3,11)", "o(3,12)", "o(3,13)", "o(3,14)", "o(4,0)", "o(4,1)", "o(4,2)", "o(4,3)", "o(4,4)", "o(4,6)", "o(4,7)", "o(4,9)", "o(4,10)", "o(4,11)", "o(4,12)", "o(4,13)", "o(4,14)", "o(5,0)", "o(5,2)", "o(5,3)", "o(5,4)", "o(5,5)", "o(5,7)", "o(5,8)", "o(5,10)", "o(5,12)", "o(5,13)", "o(5,14)", "o(6,0)", "o(6,1)", "o(6,2)", "o(6,3)", "o(6,4)", "o(6,5)", "o(6,6)", "o(6,7)", "o(6,9)", "o(6,10)", "o(6,12)", "o(6,14)", "o(7,0)", "o(7,1)", "o(7,2)", "o(7,3)", "o(7,4)", "o(7,7)", "o(7,8)", "o(7,9)", "o(7,11)", "o(7,12)", "o(7,13)", "o(7,14)", "o(8,0)", "o(8,1)", "o(8,2)", "o(8,3)", "o(8,4)", "o(8,5)", "o(8,6)", "o(8,7)", "o(8,8)", "o(8,9)", "o(8,10)", "o(8,11)", "o(8,12)", "o(8,13)", "o(8,14)", "o(9,1)", "o(9,2)", "o(9,3)", "o(9,4)", "o(9,5)", "o(9,6)", "o(9,7)", "o(9,10)", "o(9,12)", "o(9,13)", "o(9,14)", "o(10,0)", "o(10,1)", "o(10,2)", "o(10,3)", "o(10,4)", "o(10,5)", "o(10,6)", "o(10,7)", "o(10,8)", "o(10,9)", "o(10,10)", "o(10,11)", "o(10,12)", "o(10,13)", "o(10,14)", "o(11,0)", "o(11,1)", "o(11,3)", "o(11,4)", "o(11,5)", "o(11,6)", "o(11,7)", "o(11,8)", "o(11,10)", "o(11,11)", "o(11,12)", "o(11,13)", "o(11,14)", "o(12,0)", "o(12,1)", "o(12,2)", "o(12,3)", "o(12,4)", "o(12,5)", "o(12,6)", "o(12,7)", "o(12,8)", "o(12,10)", "o(12,11)", "o(12,12)", "o(12,13)", "o(13,0)", "o(13,2)", "o(13,3)", "o(13,4)", "o(13,5)", "o(13,6)", "o(13,7)", "o(13,8)", "o(13,9)", "o(13,10)", "o(13,11)", "o(13,12)", "o(13,13)", "o(14,1)", "o(14,2)", "o(14,3)", "o(14,4)", "o(14,7)", "o(14,8)", "o(14,9)", "o(14,10)", "o(14,11)", "o(14,14)"] + +# Apply action "o(2,0)" +action: 30 + +# State 42 +# Apply action "x(13,7)" +action: 202 + +# State 43 +# Apply action "o(7,4)" +action: 109 + +# State 44 +# Apply action "x(14,14)" +action: 224 + +# State 45 +# Apply action "o(8,4)" +action: 124 + +# State 46 +# Apply action "x(0,7)" +action: 7 + +# State 47 +# Apply action "o(1,3)" +action: 18 + +# State 48 +# Apply action "x(4,1)" +action: 61 + +# State 49 +# Apply action "o(14,10)" +action: 220 + +# State 50 +# Apply action "x(6,3)" +action: 93 + +# State 51 +# Apply action "o(5,4)" +action: 79 + +# State 52 +# Apply action "x(11,14)" +action: 179 + +# State 53 +# Apply action "o(9,13)" +action: 148 + +# State 54 +# Apply action "x(4,11)" +action: 71 + +# State 55 +# Apply action "o(13,0)" +action: 195 + +# State 56 +# Apply action "x(7,13)" +action: 118 + +# State 57 +# Apply action "o(13,3)" +action: 198 + +# State 58 +# Apply action "x(4,9)" +action: 69 + +# State 59 +# Apply action "o(8,10)" +action: 130 + +# State 60 +# ..x..o.x....... +# ...o.o.....x..x +# oo.ooo.x...oo.. +# o.......x...... +# .x...x..xx.x... +# .x..o.x..x.o... +# ...x....o..x.x. +# ....oox...x..x. +# ....o.....o.... +# o.......xx.o.o. +# ............... +# ..o......x....x +# .........o....x +# oo.o...x......o +# x....xo...o.oxx +IsTerminal() = False +History() = [37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130] +HistoryString() = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130" +InformationStateString(1) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130" +ObservationString(0) = "..x..o.x.......\n...o.o.....x..x\noo.ooo.x...oo..\no.......x......\n.x...x..xx.x...\n.x..o.x..x.o...\n...x....o..x.x.\n....oox...x..x.\n....o.....o....\no.......xx.o.o.\n...............\n..o......x....x\n.........o....x\noo.o...x......o\nx....xo...o.oxx" +ObservationString(1) = "..x..o.x.......\n...o.o.....x..x\noo.ooo.x...oo..\no.......x......\n.x...x..xx.x...\n.x..o.x..x.o...\n...x....o..x.x.\n....oox...x..x.\n....o.....o....\no.......xx.o.o.\n...............\n..o......x....x\n.........o....x\noo.o...x......o\nx....xo...o.oxx" +ObservationTensor(0): +◉◉◯◉◉◯◉◯◉◉◉◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◯◉◯◉◉◉◉◉◯◉◉◯ ◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◯◯◉◯◯◯◉◯◉◉◉◯◯◉◉ ◉◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◯◉◉◉◉◉◉ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◉◯◉◉◉◯◉◉◯◯◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◉◉◯◉◯◯◯ +◉◯◉◉◯◉◯◉◉◯◉◯◉◉◉ ◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯ ◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯ +◉◉◉◯◉◉◉◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ +◉◉◉◉◯◯◯◉◉◉◯◉◉◯◉ ◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◉◯◯◉◯ +◉◉◉◉◯◉◉◉◉◉◯◉◉◉◉ ◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◯◯◉◯◉◯◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◯◯◉◯◉◉◉◯◉◉◉◉◉◉◯ ◉◉◯◉◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◯◯◉◉◉◯◉◯◯◯ ◯◯◯◯◯◯◉◯◯◯◉◯◉◯◯ ◉◯◯◯◯◉◯◯◯◯◯◯◯◉◉ +ObservationTensor(1): +◉◉◯◉◉◯◉◯◉◉◉◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◯◉◯◉◉◉◉◉◯◉◉◯ ◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◯◯◉◯◯◯◉◯◉◉◉◯◯◉◉ ◉◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◯◉◉◉◉◉◉ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◉◯◉◉◉◯◉◉◯◯◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◉◉◯◉◯◯◯ +◉◯◉◉◯◉◯◉◉◯◉◯◉◉◉ ◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯ ◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯ +◉◉◉◯◉◉◉◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ +◉◉◉◉◯◯◯◉◉◉◯◉◉◯◉ ◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◉◯◯◉◯ +◉◉◉◉◯◉◉◉◉◉◯◉◉◉◉ ◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◯◯◉◯◉◯◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◯◯◉◯◉◉◉◯◉◉◉◉◉◉◯ ◉◉◯◉◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◯◯◉◉◉◯◉◯◯◯ ◯◯◯◯◯◯◉◯◯◯◉◯◉◯◯ ◉◯◯◯◯◉◯◯◯◯◯◯◯◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 23, 24, 25, 27, 28, 32, 36, 38, 39, 40, 43, 44, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 66, 67, 70, 72, 73, 74, 75, 77, 78, 80, 82, 83, 85, 87, 88, 89, 90, 91, 92, 94, 95, 96, 97, 99, 100, 102, 104, 105, 106, 107, 108, 112, 113, 114, 116, 117, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 131, 132, 133, 134, 136, 137, 138, 139, 140, 141, 142, 145, 147, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 168, 169, 170, 171, 172, 173, 175, 176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, 192, 193, 197, 199, 200, 201, 203, 204, 205, 206, 207, 208, 211, 212, 213, 214, 217, 218, 219, 221] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,3)", "x(0,4)", "x(0,6)", "x(0,8)", "x(0,9)", "x(0,10)", "x(0,11)", "x(0,12)", "x(0,13)", "x(0,14)", "x(1,0)", "x(1,1)", "x(1,2)", "x(1,4)", "x(1,6)", "x(1,7)", "x(1,8)", "x(1,9)", "x(1,10)", "x(1,12)", "x(1,13)", "x(2,2)", "x(2,6)", "x(2,8)", "x(2,9)", "x(2,10)", "x(2,13)", "x(2,14)", "x(3,1)", "x(3,2)", "x(3,3)", "x(3,4)", "x(3,5)", "x(3,6)", "x(3,7)", "x(3,9)", "x(3,10)", "x(3,11)", "x(3,12)", "x(3,13)", "x(3,14)", "x(4,0)", "x(4,2)", "x(4,3)", "x(4,4)", "x(4,6)", "x(4,7)", "x(4,10)", "x(4,12)", "x(4,13)", "x(4,14)", "x(5,0)", "x(5,2)", "x(5,3)", "x(5,5)", "x(5,7)", "x(5,8)", "x(5,10)", "x(5,12)", "x(5,13)", "x(5,14)", "x(6,0)", "x(6,1)", "x(6,2)", "x(6,4)", "x(6,5)", "x(6,6)", "x(6,7)", "x(6,9)", "x(6,10)", "x(6,12)", "x(6,14)", "x(7,0)", "x(7,1)", "x(7,2)", "x(7,3)", "x(7,7)", "x(7,8)", "x(7,9)", "x(7,11)", "x(7,12)", "x(7,14)", "x(8,0)", "x(8,1)", "x(8,2)", "x(8,3)", "x(8,5)", "x(8,6)", "x(8,7)", "x(8,8)", "x(8,9)", "x(8,11)", "x(8,12)", "x(8,13)", "x(8,14)", "x(9,1)", "x(9,2)", "x(9,3)", "x(9,4)", "x(9,5)", "x(9,6)", "x(9,7)", "x(9,10)", "x(9,12)", "x(9,14)", "x(10,0)", "x(10,1)", "x(10,2)", "x(10,3)", "x(10,4)", "x(10,5)", "x(10,6)", "x(10,7)", "x(10,8)", "x(10,9)", "x(10,10)", "x(10,11)", "x(10,12)", "x(10,13)", "x(10,14)", "x(11,0)", "x(11,1)", "x(11,3)", "x(11,4)", "x(11,5)", "x(11,6)", "x(11,7)", "x(11,8)", "x(11,10)", "x(11,11)", "x(11,12)", "x(11,13)", "x(12,0)", "x(12,1)", "x(12,2)", "x(12,3)", "x(12,4)", "x(12,5)", "x(12,6)", "x(12,7)", "x(12,8)", "x(12,10)", "x(12,11)", "x(12,12)", "x(12,13)", "x(13,2)", "x(13,4)", "x(13,5)", "x(13,6)", "x(13,8)", "x(13,9)", "x(13,10)", "x(13,11)", "x(13,12)", "x(13,13)", "x(14,1)", "x(14,2)", "x(14,3)", "x(14,4)", "x(14,7)", "x(14,8)", "x(14,9)", "x(14,11)"] + +# Apply action "x(13,8)" +action: 203 + +# State 61 +# ..x..o.x....... +# ...o.o.....x..x +# oo.ooo.x...oo.. +# o.......x...... +# .x...x..xx.x... +# .x..o.x..x.o... +# ...x....o..x.x. +# ....oox...x..x. +# ....o.....o.... +# o.......xx.o.o. +# ............... +# ..o......x....x +# .........o....x +# oo.o...xx.....o +# x....xo...o.oxx +IsTerminal() = False +History() = [37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203] +HistoryString() = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203" +InformationStateString(1) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203" +ObservationString(0) = "..x..o.x.......\n...o.o.....x..x\noo.ooo.x...oo..\no.......x......\n.x...x..xx.x...\n.x..o.x..x.o...\n...x....o..x.x.\n....oox...x..x.\n....o.....o....\no.......xx.o.o.\n...............\n..o......x....x\n.........o....x\noo.o...xx.....o\nx....xo...o.oxx" +ObservationString(1) = "..x..o.x.......\n...o.o.....x..x\noo.ooo.x...oo..\no.......x......\n.x...x..xx.x...\n.x..o.x..x.o...\n...x....o..x.x.\n....oox...x..x.\n....o.....o....\no.......xx.o.o.\n...............\n..o......x....x\n.........o....x\noo.o...xx.....o\nx....xo...o.oxx" +ObservationTensor(0): +◉◉◯◉◉◯◉◯◉◉◉◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◯◉◯◉◉◉◉◉◯◉◉◯ ◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◯◯◉◯◯◯◉◯◉◉◉◯◯◉◉ ◉◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◯◉◉◉◉◉◉ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◉◯◉◉◉◯◉◉◯◯◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◉◉◯◉◯◯◯ +◉◯◉◉◯◉◯◉◉◯◉◯◉◉◉ ◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯ ◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯ +◉◉◉◯◉◉◉◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ +◉◉◉◉◯◯◯◉◉◉◯◉◉◯◉ ◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◉◯◯◉◯ +◉◉◉◉◯◉◉◉◉◉◯◉◉◉◉ ◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◯◯◉◯◉◯◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◯◯◉◯◉◉◉◯◯◉◉◉◉◉◯ ◉◉◯◉◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +◯◉◉◉◉◯◯◉◉◉◯◉◯◯◯ ◯◯◯◯◯◯◉◯◯◯◉◯◉◯◯ ◉◯◯◯◯◉◯◯◯◯◯◯◯◉◉ +ObservationTensor(1): +◉◉◯◉◉◯◉◯◉◉◉◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◯◉◯◉◉◉◉◉◯◉◉◯ ◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◯◯◉◯◯◯◉◯◉◉◉◯◯◉◉ ◉◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◯◉◉◉◉◉◉ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◉◯◉◉◉◯◉◉◯◯◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◉◉◯◉◯◯◯ +◉◯◉◉◯◉◯◉◉◯◉◯◉◉◉ ◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯ ◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯ +◉◉◉◯◉◉◉◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ +◉◉◉◉◯◯◯◉◉◉◯◉◉◯◉ ◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◉◯◯◉◯ +◉◉◉◉◯◉◉◉◉◉◯◉◉◉◉ ◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◯◯◉◯◉◯◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◯◯◉◯◉◉◉◯◯◉◉◉◉◉◯ ◉◉◯◉◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +◯◉◉◉◉◯◯◉◉◉◯◉◯◯◯ ◯◯◯◯◯◯◉◯◯◯◉◯◉◯◯ ◉◯◯◯◯◉◯◯◯◯◯◯◯◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 23, 24, 25, 27, 28, 32, 36, 38, 39, 40, 43, 44, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 66, 67, 70, 72, 73, 74, 75, 77, 78, 80, 82, 83, 85, 87, 88, 89, 90, 91, 92, 94, 95, 96, 97, 99, 100, 102, 104, 105, 106, 107, 108, 112, 113, 114, 116, 117, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 131, 132, 133, 134, 136, 137, 138, 139, 140, 141, 142, 145, 147, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 168, 169, 170, 171, 172, 173, 175, 176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, 192, 193, 197, 199, 200, 201, 204, 205, 206, 207, 208, 211, 212, 213, 214, 217, 218, 219, 221] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,3)", "o(0,4)", "o(0,6)", "o(0,8)", "o(0,9)", "o(0,10)", "o(0,11)", "o(0,12)", "o(0,13)", "o(0,14)", "o(1,0)", "o(1,1)", "o(1,2)", "o(1,4)", "o(1,6)", "o(1,7)", "o(1,8)", "o(1,9)", "o(1,10)", "o(1,12)", "o(1,13)", "o(2,2)", "o(2,6)", "o(2,8)", "o(2,9)", "o(2,10)", "o(2,13)", "o(2,14)", "o(3,1)", "o(3,2)", "o(3,3)", "o(3,4)", "o(3,5)", "o(3,6)", "o(3,7)", "o(3,9)", "o(3,10)", "o(3,11)", "o(3,12)", "o(3,13)", "o(3,14)", "o(4,0)", "o(4,2)", "o(4,3)", "o(4,4)", "o(4,6)", "o(4,7)", "o(4,10)", "o(4,12)", "o(4,13)", "o(4,14)", "o(5,0)", "o(5,2)", "o(5,3)", "o(5,5)", "o(5,7)", "o(5,8)", "o(5,10)", "o(5,12)", "o(5,13)", "o(5,14)", "o(6,0)", "o(6,1)", "o(6,2)", "o(6,4)", "o(6,5)", "o(6,6)", "o(6,7)", "o(6,9)", "o(6,10)", "o(6,12)", "o(6,14)", "o(7,0)", "o(7,1)", "o(7,2)", "o(7,3)", "o(7,7)", "o(7,8)", "o(7,9)", "o(7,11)", "o(7,12)", "o(7,14)", "o(8,0)", "o(8,1)", "o(8,2)", "o(8,3)", "o(8,5)", "o(8,6)", "o(8,7)", "o(8,8)", "o(8,9)", "o(8,11)", "o(8,12)", "o(8,13)", "o(8,14)", "o(9,1)", "o(9,2)", "o(9,3)", "o(9,4)", "o(9,5)", "o(9,6)", "o(9,7)", "o(9,10)", "o(9,12)", "o(9,14)", "o(10,0)", "o(10,1)", "o(10,2)", "o(10,3)", "o(10,4)", "o(10,5)", "o(10,6)", "o(10,7)", "o(10,8)", "o(10,9)", "o(10,10)", "o(10,11)", "o(10,12)", "o(10,13)", "o(10,14)", "o(11,0)", "o(11,1)", "o(11,3)", "o(11,4)", "o(11,5)", "o(11,6)", "o(11,7)", "o(11,8)", "o(11,10)", "o(11,11)", "o(11,12)", "o(11,13)", "o(12,0)", "o(12,1)", "o(12,2)", "o(12,3)", "o(12,4)", "o(12,5)", "o(12,6)", "o(12,7)", "o(12,8)", "o(12,10)", "o(12,11)", "o(12,12)", "o(12,13)", "o(13,2)", "o(13,4)", "o(13,5)", "o(13,6)", "o(13,9)", "o(13,10)", "o(13,11)", "o(13,12)", "o(13,13)", "o(14,1)", "o(14,2)", "o(14,3)", "o(14,4)", "o(14,7)", "o(14,8)", "o(14,9)", "o(14,11)"] + +# Apply action "o(3,5)" +action: 50 + +# State 62 +# Apply action "x(7,14)" +action: 119 + +# State 63 +# Apply action "o(10,9)" +action: 159 + +# State 64 +# Apply action "x(2,2)" +action: 32 + +# State 65 +# Apply action "o(10,10)" +action: 160 + +# State 66 +# Apply action "x(9,5)" +action: 140 + +# State 67 +# Apply action "o(4,6)" +action: 66 + +# State 68 +# Apply action "x(5,5)" +action: 80 + +# State 69 +# Apply action "o(8,0)" +action: 120 + +# State 70 +# Apply action "x(5,3)" +action: 78 + +# State 71 +# Apply action "o(3,9)" +action: 54 + +# State 72 +# Apply action "x(7,3)" +action: 108 + +# State 73 +# Apply action "o(11,6)" +action: 171 + +# State 74 +# Apply action "x(8,1)" +action: 121 + +# State 75 +# Apply action "o(14,8)" +action: 218 + +# State 76 +# Apply action "x(4,14)" +action: 74 + +# State 77 +# Apply action "o(7,1)" +action: 106 + +# State 78 +# Apply action "x(8,2)" +action: 122 + +# State 79 +# Apply action "o(1,8)" +action: 23 + +# State 80 +# ..x..o.x....... +# ...o.o..o..x..x +# ooxooo.x...oo.. +# o....o..xo..... +# .x...xo.xx.x..x +# .x.xoxx..x.o... +# ...x....o..x.x. +# .o.xoox...x..xx +# oxx.o.....o.... +# o....x..xx.o.o. +# .........oo.... +# ..o...o..x....x +# .........o....x +# oo.o...xx.....o +# x....xo.o.o.oxx +IsTerminal() = False +History() = [37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23] +HistoryString() = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23" +InformationStateString(1) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23" +ObservationString(0) = "..x..o.x.......\n...o.o..o..x..x\nooxooo.x...oo..\no....o..xo.....\n.x...xo.xx.x..x\n.x.xoxx..x.o...\n...x....o..x.x.\n.o.xoox...x..xx\noxx.o.....o....\no....x..xx.o.o.\n.........oo....\n..o...o..x....x\n.........o....x\noo.o...xx.....o\nx....xo.o.o.oxx" +ObservationString(1) = "..x..o.x.......\n...o.o..o..x..x\nooxooo.x...oo..\no....o..xo.....\n.x...xo.xx.x..x\n.x.xoxx..x.o...\n...x....o..x.x.\n.o.xoox...x..xx\noxx.o.....o....\no....x..xx.o.o.\n.........oo....\n..o...o..x....x\n.........o....x\noo.o...xx.....o\nx....xo.o.o.oxx" +ObservationTensor(0): +◉◉◯◉◉◯◉◯◉◉◉◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◯◉◯◉◉◯◉◉◯◉◉◯ ◯◯◯◉◯◉◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◯◯◯◯◯◯◉◯◉◉◉◯◯◉◉ ◉◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◉◯◯◉◉◉◉◉ ◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◉◯◉◉◉◯◯◉◯◯◉◯◉◉◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◉◉◯◉◯◯◉ +◉◯◉◯◯◯◯◉◉◯◉◯◉◉◉ ◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯ ◯◉◯◉◯◉◉◯◯◉◯◯◯◯◯ +◉◉◉◯◉◉◉◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ +◉◯◉◯◯◯◯◉◉◉◯◉◉◯◯ ◯◉◯◯◉◉◯◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉◯◯◯◉◯◯◉◉ +◯◯◯◉◯◉◉◉◉◉◯◉◉◉◉ ◉◯◯◯◉◯◯◯◯◯◉◯◯◯◯ ◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◉◯◯◉◯◉◯◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◉◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◯◉◉◯◉◉◉◉◯ ◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◯◯◉◯◉◉◉◯◯◉◉◉◉◉◯ ◉◉◯◉◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +◯◉◉◉◉◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◉◯◉◯◉◯◉◯◯ ◉◯◯◯◯◉◯◯◯◯◯◯◯◉◉ +ObservationTensor(1): +◉◉◯◉◉◯◉◯◉◉◉◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◯◉◯◉◉◯◉◉◯◉◉◯ ◯◯◯◉◯◉◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◯◯◯◯◯◯◉◯◉◉◉◯◯◉◉ ◉◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◉◯◯◉◉◉◉◉ ◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◉◯◉◉◉◯◯◉◯◯◉◯◉◉◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◉◉◯◉◯◯◉ +◉◯◉◯◯◯◯◉◉◯◉◯◉◉◉ ◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯ ◯◉◯◉◯◉◉◯◯◉◯◯◯◯◯ +◉◉◉◯◉◉◉◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ +◉◯◉◯◯◯◯◉◉◉◯◉◉◯◯ ◯◉◯◯◉◉◯◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉◯◯◯◉◯◯◉◉ +◯◯◯◉◯◉◉◉◉◉◯◉◉◉◉ ◉◯◯◯◉◯◯◯◯◯◉◯◯◯◯ ◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◉◯◯◉◯◉◯◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◉◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◯◉◉◯◉◉◉◉◯ ◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◯◯◉◯◉◉◉◯◯◉◉◉◉◉◯ ◉◉◯◉◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +◯◉◉◉◉◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◉◯◉◯◉◯◉◯◯ ◉◯◯◯◯◉◯◯◯◯◯◯◯◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 25, 27, 28, 36, 38, 39, 40, 43, 44, 46, 47, 48, 49, 51, 52, 55, 56, 57, 58, 59, 60, 62, 63, 64, 67, 70, 72, 73, 75, 77, 82, 83, 85, 87, 88, 89, 90, 91, 92, 94, 95, 96, 97, 99, 100, 102, 104, 105, 107, 112, 113, 114, 116, 117, 123, 125, 126, 127, 128, 129, 131, 132, 133, 134, 136, 137, 138, 139, 141, 142, 145, 147, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 161, 162, 163, 164, 165, 166, 168, 169, 170, 172, 173, 175, 176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, 192, 193, 197, 199, 200, 201, 204, 205, 206, 207, 208, 211, 212, 213, 214, 217, 219, 221] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,3)", "x(0,4)", "x(0,6)", "x(0,8)", "x(0,9)", "x(0,10)", "x(0,11)", "x(0,12)", "x(0,13)", "x(0,14)", "x(1,0)", "x(1,1)", "x(1,2)", "x(1,4)", "x(1,6)", "x(1,7)", "x(1,9)", "x(1,10)", "x(1,12)", "x(1,13)", "x(2,6)", "x(2,8)", "x(2,9)", "x(2,10)", "x(2,13)", "x(2,14)", "x(3,1)", "x(3,2)", "x(3,3)", "x(3,4)", "x(3,6)", "x(3,7)", "x(3,10)", "x(3,11)", "x(3,12)", "x(3,13)", "x(3,14)", "x(4,0)", "x(4,2)", "x(4,3)", "x(4,4)", "x(4,7)", "x(4,10)", "x(4,12)", "x(4,13)", "x(5,0)", "x(5,2)", "x(5,7)", "x(5,8)", "x(5,10)", "x(5,12)", "x(5,13)", "x(5,14)", "x(6,0)", "x(6,1)", "x(6,2)", "x(6,4)", "x(6,5)", "x(6,6)", "x(6,7)", "x(6,9)", "x(6,10)", "x(6,12)", "x(6,14)", "x(7,0)", "x(7,2)", "x(7,7)", "x(7,8)", "x(7,9)", "x(7,11)", "x(7,12)", "x(8,3)", "x(8,5)", "x(8,6)", "x(8,7)", "x(8,8)", "x(8,9)", "x(8,11)", "x(8,12)", "x(8,13)", "x(8,14)", "x(9,1)", "x(9,2)", "x(9,3)", "x(9,4)", "x(9,6)", "x(9,7)", "x(9,10)", "x(9,12)", "x(9,14)", "x(10,0)", "x(10,1)", "x(10,2)", "x(10,3)", "x(10,4)", "x(10,5)", "x(10,6)", "x(10,7)", "x(10,8)", "x(10,11)", "x(10,12)", "x(10,13)", "x(10,14)", "x(11,0)", "x(11,1)", "x(11,3)", "x(11,4)", "x(11,5)", "x(11,7)", "x(11,8)", "x(11,10)", "x(11,11)", "x(11,12)", "x(11,13)", "x(12,0)", "x(12,1)", "x(12,2)", "x(12,3)", "x(12,4)", "x(12,5)", "x(12,6)", "x(12,7)", "x(12,8)", "x(12,10)", "x(12,11)", "x(12,12)", "x(12,13)", "x(13,2)", "x(13,4)", "x(13,5)", "x(13,6)", "x(13,9)", "x(13,10)", "x(13,11)", "x(13,12)", "x(13,13)", "x(14,1)", "x(14,2)", "x(14,3)", "x(14,4)", "x(14,7)", "x(14,9)", "x(14,11)"] + +# Apply action "x(6,4)" +action: 94 + +# State 81 +# ..x..o.x....... +# ...o.o..o..x..x +# ooxooo.x...oo.. +# o....o..xo..... +# .x...xo.xx.x..x +# .x.xoxx..x.o... +# ...xx...o..x.x. +# .o.xoox...x..xx +# oxx.o.....o.... +# o....x..xx.o.o. +# .........oo.... +# ..o...o..x....x +# .........o....x +# oo.o...xx.....o +# x....xo.o.o.oxx +IsTerminal() = False +History() = [37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23, 94] +HistoryString() = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23, 94" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23, 94" +InformationStateString(1) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23, 94" +ObservationString(0) = "..x..o.x.......\n...o.o..o..x..x\nooxooo.x...oo..\no....o..xo.....\n.x...xo.xx.x..x\n.x.xoxx..x.o...\n...xx...o..x.x.\n.o.xoox...x..xx\noxx.o.....o....\no....x..xx.o.o.\n.........oo....\n..o...o..x....x\n.........o....x\noo.o...xx.....o\nx....xo.o.o.oxx" +ObservationString(1) = "..x..o.x.......\n...o.o..o..x..x\nooxooo.x...oo..\no....o..xo.....\n.x...xo.xx.x..x\n.x.xoxx..x.o...\n...xx...o..x.x.\n.o.xoox...x..xx\noxx.o.....o....\no....x..xx.o.o.\n.........oo....\n..o...o..x....x\n.........o....x\noo.o...xx.....o\nx....xo.o.o.oxx" +ObservationTensor(0): +◉◉◯◉◉◯◉◯◉◉◉◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◯◉◯◉◉◯◉◉◯◉◉◯ ◯◯◯◉◯◉◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◯◯◯◯◯◯◉◯◉◉◉◯◯◉◉ ◉◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◉◯◯◉◉◉◉◉ ◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◉◯◉◉◉◯◯◉◯◯◉◯◉◉◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◉◉◯◉◯◯◉ +◉◯◉◯◯◯◯◉◉◯◉◯◉◉◉ ◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯ ◯◉◯◉◯◉◉◯◯◉◯◯◯◯◯ +◉◉◉◯◯◉◉◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◉◉◯◯◯◯◯◯◉◯◉◯ +◉◯◉◯◯◯◯◉◉◉◯◉◉◯◯ ◯◉◯◯◉◉◯◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉◯◯◯◉◯◯◉◉ +◯◯◯◉◯◉◉◉◉◉◯◉◉◉◉ ◉◯◯◯◉◯◯◯◯◯◉◯◯◯◯ ◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◉◯◯◉◯◉◯◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◉◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◯◉◉◯◉◉◉◉◯ ◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◯◯◉◯◉◉◉◯◯◉◉◉◉◉◯ ◉◉◯◉◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +◯◉◉◉◉◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◉◯◉◯◉◯◉◯◯ ◉◯◯◯◯◉◯◯◯◯◯◯◯◉◉ +ObservationTensor(1): +◉◉◯◉◉◯◉◯◉◉◉◉◉◉◉ ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◯◉◯◉◉◯◉◉◯◉◉◯ ◯◯◯◉◯◉◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◯◯◯◯◯◯◉◯◉◉◉◯◯◉◉ ◉◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◉◯◯◉◉◉◉◉ ◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +◉◯◉◉◉◯◯◉◯◯◉◯◉◉◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◉◉◯◉◯◯◉ +◉◯◉◯◯◯◯◉◉◯◉◯◉◉◉ ◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯ ◯◉◯◉◯◉◉◯◯◉◯◯◯◯◯ +◉◉◉◯◯◉◉◉◯◉◉◯◉◯◉ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◉◉◯◯◯◯◯◯◉◯◉◯ +◉◯◉◯◯◯◯◉◉◉◯◉◉◯◯ ◯◉◯◯◉◉◯◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉◯◯◯◉◯◯◉◉ +◯◯◯◉◯◉◉◉◉◉◯◉◉◉◉ ◉◯◯◯◉◯◯◯◯◯◉◯◯◯◯ ◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◉◯◯◉◯◉◯◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◉◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◉◯◉◉◉◯◉◉◯◉◉◉◉◯ ◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◯◯◉◯◉◉◉◯◯◉◉◉◉◉◯ ◉◉◯◉◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +◯◉◉◉◉◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◉◯◉◯◉◯◉◯◯ ◉◯◯◯◯◉◯◯◯◯◯◯◯◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 25, 27, 28, 36, 38, 39, 40, 43, 44, 46, 47, 48, 49, 51, 52, 55, 56, 57, 58, 59, 60, 62, 63, 64, 67, 70, 72, 73, 75, 77, 82, 83, 85, 87, 88, 89, 90, 91, 92, 95, 96, 97, 99, 100, 102, 104, 105, 107, 112, 113, 114, 116, 117, 123, 125, 126, 127, 128, 129, 131, 132, 133, 134, 136, 137, 138, 139, 141, 142, 145, 147, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 161, 162, 163, 164, 165, 166, 168, 169, 170, 172, 173, 175, 176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, 192, 193, 197, 199, 200, 201, 204, 205, 206, 207, 208, 211, 212, 213, 214, 217, 219, 221] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,3)", "o(0,4)", "o(0,6)", "o(0,8)", "o(0,9)", "o(0,10)", "o(0,11)", "o(0,12)", "o(0,13)", "o(0,14)", "o(1,0)", "o(1,1)", "o(1,2)", "o(1,4)", "o(1,6)", "o(1,7)", "o(1,9)", "o(1,10)", "o(1,12)", "o(1,13)", "o(2,6)", "o(2,8)", "o(2,9)", "o(2,10)", "o(2,13)", "o(2,14)", "o(3,1)", "o(3,2)", "o(3,3)", "o(3,4)", "o(3,6)", "o(3,7)", "o(3,10)", "o(3,11)", "o(3,12)", "o(3,13)", "o(3,14)", "o(4,0)", "o(4,2)", "o(4,3)", "o(4,4)", "o(4,7)", "o(4,10)", "o(4,12)", "o(4,13)", "o(5,0)", "o(5,2)", "o(5,7)", "o(5,8)", "o(5,10)", "o(5,12)", "o(5,13)", "o(5,14)", "o(6,0)", "o(6,1)", "o(6,2)", "o(6,5)", "o(6,6)", "o(6,7)", "o(6,9)", "o(6,10)", "o(6,12)", "o(6,14)", "o(7,0)", "o(7,2)", "o(7,7)", "o(7,8)", "o(7,9)", "o(7,11)", "o(7,12)", "o(8,3)", "o(8,5)", "o(8,6)", "o(8,7)", "o(8,8)", "o(8,9)", "o(8,11)", "o(8,12)", "o(8,13)", "o(8,14)", "o(9,1)", "o(9,2)", "o(9,3)", "o(9,4)", "o(9,6)", "o(9,7)", "o(9,10)", "o(9,12)", "o(9,14)", "o(10,0)", "o(10,1)", "o(10,2)", "o(10,3)", "o(10,4)", "o(10,5)", "o(10,6)", "o(10,7)", "o(10,8)", "o(10,11)", "o(10,12)", "o(10,13)", "o(10,14)", "o(11,0)", "o(11,1)", "o(11,3)", "o(11,4)", "o(11,5)", "o(11,7)", "o(11,8)", "o(11,10)", "o(11,11)", "o(11,12)", "o(11,13)", "o(12,0)", "o(12,1)", "o(12,2)", "o(12,3)", "o(12,4)", "o(12,5)", "o(12,6)", "o(12,7)", "o(12,8)", "o(12,10)", "o(12,11)", "o(12,12)", "o(12,13)", "o(13,2)", "o(13,4)", "o(13,5)", "o(13,6)", "o(13,9)", "o(13,10)", "o(13,11)", "o(13,12)", "o(13,13)", "o(14,1)", "o(14,2)", "o(14,3)", "o(14,4)", "o(14,7)", "o(14,9)", "o(14,11)"] + +# Apply action "o(5,13)" +action: 88 + +# State 82 +# Apply action "x(6,9)" +action: 99 + +# State 83 +# Apply action "o(7,0)" +action: 105 + +# State 84 +# Apply action "x(11,3)" +action: 168 + +# State 85 +# Apply action "o(6,14)" +action: 104 + +# State 86 +# Apply action "x(3,7)" +action: 52 + +# State 87 +# Apply action "o(10,6)" +action: 156 + +# State 88 +# Apply action "x(14,2)" +action: 212 + +# State 89 +# Apply action "o(7,7)" +action: 112 + +# State 90 +# Apply action "x(11,11)" +action: 176 + +# State 91 +# Apply action "o(8,13)" +action: 133 + +# State 92 +# Apply action "x(9,2)" +action: 137 + +# State 93 +# Apply action "o(13,2)" +action: 197 + +# State 94 +# Apply action "x(10,14)" +action: 164 + +# State 95 +# Apply action "o(0,3)" +action: 3 + +# State 96 +# Apply action "x(4,2)" +action: 62 + +# State 97 +# Apply action "o(5,0)" +action: 75 + +# State 98 +# Apply action "x(13,9)" +action: 204 + +# State 99 +# Apply action "o(11,4)" +action: 169 + +# State 100 +# ..xo.o.x....... +# ...o.o..o..x..x +# ooxooo.x...oo.. +# o....o.xxo..... +# .xx..xo.xx.x..x +# ox.xoxx..x.o.o. +# ...xx...ox.x.xo +# oo.xooxo..x..xx +# oxx.o.....o..o. +# o.x..x..xx.o.o. +# ......o..oo...x +# ..oxo.o..x.x..x +# .........o....x +# oooo...xxx....o +# x.x..xo.o.o.oxx +IsTerminal() = False +History() = [37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23, 94, 88, 99, 105, 168, 104, 52, 156, 212, 112, 176, 133, 137, 197, 164, 3, 62, 75, 204, 169] +HistoryString() = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23, 94, 88, 99, 105, 168, 104, 52, 156, 212, 112, 176, 133, 137, 197, 164, 3, 62, 75, 204, 169" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23, 94, 88, 99, 105, 168, 104, 52, 156, 212, 112, 176, 133, 137, 197, 164, 3, 62, 75, 204, 169" +InformationStateString(1) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23, 94, 88, 99, 105, 168, 104, 52, 156, 212, 112, 176, 133, 137, 197, 164, 3, 62, 75, 204, 169" +ObservationString(0) = "..xo.o.x.......\n...o.o..o..x..x\nooxooo.x...oo..\no....o.xxo.....\n.xx..xo.xx.x..x\nox.xoxx..x.o.o.\n...xx...ox.x.xo\noo.xooxo..x..xx\noxx.o.....o..o.\no.x..x..xx.o.o.\n......o..oo...x\n..oxo.o..x.x..x\n.........o....x\noooo...xxx....o\nx.x..xo.o.o.oxx" +ObservationString(1) = "..xo.o.x.......\n...o.o..o..x..x\nooxooo.x...oo..\no....o.xxo.....\n.xx..xo.xx.x..x\nox.xoxx..x.o.o.\n...xx...ox.x.xo\noo.xooxo..x..xx\noxx.o.....o..o.\no.x..x..xx.o.o.\n......o..oo...x\n..oxo.o..x.x..x\n.........o....x\noooo...xxx....o\nx.x..xo.o.o.oxx" +ObservationTensor(0): +◉◉◯◯◉◯◉◯◉◉◉◉◉◉◉ ◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◯◉◯◉◉◯◉◉◯◉◉◯ ◯◯◯◉◯◉◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◯◯◯◯◯◯◉◯◉◉◉◯◯◉◉ ◉◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◯◯◯◉◉◉◉◉ ◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +◉◯◯◉◉◯◯◉◯◯◉◯◉◉◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◉◯◯◉◯◯◉◉◯◉◯◯◉ +◯◯◉◯◯◯◯◉◉◯◉◯◉◯◉ ◉◯◯◯◉◯◯◯◯◯◯◉◯◉◯ ◯◉◯◉◯◉◉◯◯◉◯◯◯◯◯ +◉◉◉◯◯◉◉◉◯◯◉◯◉◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉ ◯◯◯◉◉◯◯◯◯◉◯◉◯◉◯ +◯◯◉◯◯◯◯◯◉◉◯◉◉◯◯ ◉◉◯◯◉◉◯◉◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉◯◯◯◉◯◯◉◉ +◯◯◯◉◯◉◉◉◉◉◯◉◉◯◉ ◉◯◯◯◉◯◯◯◯◯◉◯◯◉◯ ◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◯◉◉◯◉◉◯◯◉◯◉◯◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◉◯ ◯◯◉◯◯◉◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◯◉◉◯◯◉◉◉◯ ◯◯◯◯◯◯◉◯◯◉◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◉◉◯◯◯◉◯◉◉◯◉◯◉◉◯ ◯◯◉◯◉◯◉◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯◉◯◉◯◯◉ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◯◯◯◯◉◉◉◯◯◯◉◉◉◉◯ ◉◉◉◉◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯ +◯◉◯◉◉◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◉◯◉◯◉◯◉◯◯ ◉◯◉◯◯◉◯◯◯◯◯◯◯◉◉ +ObservationTensor(1): +◉◉◯◯◉◯◉◯◉◉◉◉◉◉◉ ◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◉◉◉◯◉◯◉◉◯◉◉◯◉◉◯ ◯◯◯◉◯◉◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◯◯◯◯◯◯◉◯◉◉◉◯◯◉◉ ◉◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◯◯◯◉◉◉◉◉ ◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +◉◯◯◉◉◯◯◉◯◯◉◯◉◉◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◉◯◯◉◯◯◉◉◯◉◯◯◉ +◯◯◉◯◯◯◯◉◉◯◉◯◉◯◉ ◉◯◯◯◉◯◯◯◯◯◯◉◯◉◯ ◯◉◯◉◯◉◉◯◯◉◯◯◯◯◯ +◉◉◉◯◯◉◉◉◯◯◉◯◉◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉ ◯◯◯◉◉◯◯◯◯◉◯◉◯◉◯ +◯◯◉◯◯◯◯◯◉◉◯◉◉◯◯ ◉◉◯◯◉◉◯◉◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉◯◯◯◉◯◯◉◉ +◯◯◯◉◯◉◉◉◉◉◯◉◉◯◉ ◉◯◯◯◉◯◯◯◯◯◉◯◯◉◯ ◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◯◉◉◯◉◉◯◯◉◯◉◯◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◉◯ ◯◯◉◯◯◉◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◯◉◉◯◯◉◉◉◯ ◯◯◯◯◯◯◉◯◯◉◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◉◉◯◯◯◉◯◉◉◯◉◯◉◉◯ ◯◯◉◯◉◯◉◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯◉◯◉◯◯◉ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◯◯◯◯◉◉◉◯◯◯◉◉◉◉◯ ◉◉◉◉◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯ +◯◉◯◉◉◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◉◯◉◯◉◯◉◯◯ ◉◯◉◯◯◉◯◯◯◯◯◯◯◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 25, 27, 28, 36, 38, 39, 40, 43, 44, 46, 47, 48, 49, 51, 55, 56, 57, 58, 59, 60, 63, 64, 67, 70, 72, 73, 77, 82, 83, 85, 87, 89, 90, 91, 92, 95, 96, 97, 100, 102, 107, 113, 114, 116, 117, 123, 125, 126, 127, 128, 129, 131, 132, 134, 136, 138, 139, 141, 142, 145, 147, 149, 150, 151, 152, 153, 154, 155, 157, 158, 161, 162, 163, 165, 166, 170, 172, 173, 175, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, 192, 193, 199, 200, 201, 205, 206, 207, 208, 211, 213, 214, 217, 219, 221] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,4)", "x(0,6)", "x(0,8)", "x(0,9)", "x(0,10)", "x(0,11)", "x(0,12)", "x(0,13)", "x(0,14)", "x(1,0)", "x(1,1)", "x(1,2)", "x(1,4)", "x(1,6)", "x(1,7)", "x(1,9)", "x(1,10)", "x(1,12)", "x(1,13)", "x(2,6)", "x(2,8)", "x(2,9)", "x(2,10)", "x(2,13)", "x(2,14)", "x(3,1)", "x(3,2)", "x(3,3)", "x(3,4)", "x(3,6)", "x(3,10)", "x(3,11)", "x(3,12)", "x(3,13)", "x(3,14)", "x(4,0)", "x(4,3)", "x(4,4)", "x(4,7)", "x(4,10)", "x(4,12)", "x(4,13)", "x(5,2)", "x(5,7)", "x(5,8)", "x(5,10)", "x(5,12)", "x(5,14)", "x(6,0)", "x(6,1)", "x(6,2)", "x(6,5)", "x(6,6)", "x(6,7)", "x(6,10)", "x(6,12)", "x(7,2)", "x(7,8)", "x(7,9)", "x(7,11)", "x(7,12)", "x(8,3)", "x(8,5)", "x(8,6)", "x(8,7)", "x(8,8)", "x(8,9)", "x(8,11)", "x(8,12)", "x(8,14)", "x(9,1)", "x(9,3)", "x(9,4)", "x(9,6)", "x(9,7)", "x(9,10)", "x(9,12)", "x(9,14)", "x(10,0)", "x(10,1)", "x(10,2)", "x(10,3)", "x(10,4)", "x(10,5)", "x(10,7)", "x(10,8)", "x(10,11)", "x(10,12)", "x(10,13)", "x(11,0)", "x(11,1)", "x(11,5)", "x(11,7)", "x(11,8)", "x(11,10)", "x(11,12)", "x(11,13)", "x(12,0)", "x(12,1)", "x(12,2)", "x(12,3)", "x(12,4)", "x(12,5)", "x(12,6)", "x(12,7)", "x(12,8)", "x(12,10)", "x(12,11)", "x(12,12)", "x(12,13)", "x(13,4)", "x(13,5)", "x(13,6)", "x(13,10)", "x(13,11)", "x(13,12)", "x(13,13)", "x(14,1)", "x(14,3)", "x(14,4)", "x(14,7)", "x(14,9)", "x(14,11)"] + +# Apply action "x(0,8)" +action: 8 + +# State 101 +# ..xo.o.xx...... +# ...o.o..o..x..x +# ooxooo.x...oo.. +# o....o.xxo..... +# .xx..xo.xx.x..x +# ox.xoxx..x.o.o. +# ...xx...ox.x.xo +# oo.xooxo..x..xx +# oxx.o.....o..o. +# o.x..x..xx.o.o. +# ......o..oo...x +# ..oxo.o..x.x..x +# .........o....x +# oooo...xxx....o +# x.x..xo.o.o.oxx +IsTerminal() = False +History() = [37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23, 94, 88, 99, 105, 168, 104, 52, 156, 212, 112, 176, 133, 137, 197, 164, 3, 62, 75, 204, 169, 8] +HistoryString() = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23, 94, 88, 99, 105, 168, 104, 52, 156, 212, 112, 176, 133, 137, 197, 164, 3, 62, 75, 204, 169, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23, 94, 88, 99, 105, 168, 104, 52, 156, 212, 112, 176, 133, 137, 197, 164, 3, 62, 75, 204, 169, 8" +InformationStateString(1) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23, 94, 88, 99, 105, 168, 104, 52, 156, 212, 112, 176, 133, 137, 197, 164, 3, 62, 75, 204, 169, 8" +ObservationString(0) = "..xo.o.xx......\n...o.o..o..x..x\nooxooo.x...oo..\no....o.xxo.....\n.xx..xo.xx.x..x\nox.xoxx..x.o.o.\n...xx...ox.x.xo\noo.xooxo..x..xx\noxx.o.....o..o.\no.x..x..xx.o.o.\n......o..oo...x\n..oxo.o..x.x..x\n.........o....x\noooo...xxx....o\nx.x..xo.o.o.oxx" +ObservationString(1) = "..xo.o.xx......\n...o.o..o..x..x\nooxooo.x...oo..\no....o.xxo.....\n.xx..xo.xx.x..x\nox.xoxx..x.o.o.\n...xx...ox.x.xo\noo.xooxo..x..xx\noxx.o.....o..o.\no.x..x..xx.o.o.\n......o..oo...x\n..oxo.o..x.x..x\n.........o....x\noooo...xxx....o\nx.x..xo.o.o.oxx" +ObservationTensor(0): +◉◉◯◯◉◯◉◯◯◉◉◉◉◉◉ ◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉◉◯◯◯◯◯◯ +◉◉◉◯◉◯◉◉◯◉◉◯◉◉◯ ◯◯◯◉◯◉◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◯◯◯◯◯◯◉◯◉◉◉◯◯◉◉ ◉◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◯◯◯◉◉◉◉◉ ◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +◉◯◯◉◉◯◯◉◯◯◉◯◉◉◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◉◯◯◉◯◯◉◉◯◉◯◯◉ +◯◯◉◯◯◯◯◉◉◯◉◯◉◯◉ ◉◯◯◯◉◯◯◯◯◯◯◉◯◉◯ ◯◉◯◉◯◉◉◯◯◉◯◯◯◯◯ +◉◉◉◯◯◉◉◉◯◯◉◯◉◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉ ◯◯◯◉◉◯◯◯◯◉◯◉◯◉◯ +◯◯◉◯◯◯◯◯◉◉◯◉◉◯◯ ◉◉◯◯◉◉◯◉◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉◯◯◯◉◯◯◉◉ +◯◯◯◉◯◉◉◉◉◉◯◉◉◯◉ ◉◯◯◯◉◯◯◯◯◯◉◯◯◉◯ ◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◯◉◉◯◉◉◯◯◉◯◉◯◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◉◯ ◯◯◉◯◯◉◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◯◉◉◯◯◉◉◉◯ ◯◯◯◯◯◯◉◯◯◉◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◉◉◯◯◯◉◯◉◉◯◉◯◉◉◯ ◯◯◉◯◉◯◉◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯◉◯◉◯◯◉ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◯◯◯◯◉◉◉◯◯◯◉◉◉◉◯ ◉◉◉◉◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯ +◯◉◯◉◉◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◉◯◉◯◉◯◉◯◯ ◉◯◉◯◯◉◯◯◯◯◯◯◯◉◉ +ObservationTensor(1): +◉◉◯◯◉◯◉◯◯◉◉◉◉◉◉ ◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉◉◯◯◯◯◯◯ +◉◉◉◯◉◯◉◉◯◉◉◯◉◉◯ ◯◯◯◉◯◉◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◯◯◯◯◯◯◉◯◉◉◉◯◯◉◉ ◉◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◯◯◯◉◉◉◉◉ ◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +◉◯◯◉◉◯◯◉◯◯◉◯◉◉◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◉◯◯◉◯◯◉◉◯◉◯◯◉ +◯◯◉◯◯◯◯◉◉◯◉◯◉◯◉ ◉◯◯◯◉◯◯◯◯◯◯◉◯◉◯ ◯◉◯◉◯◉◉◯◯◉◯◯◯◯◯ +◉◉◉◯◯◉◉◉◯◯◉◯◉◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉ ◯◯◯◉◉◯◯◯◯◉◯◉◯◉◯ +◯◯◉◯◯◯◯◯◉◉◯◉◉◯◯ ◉◉◯◯◉◉◯◉◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉◯◯◯◉◯◯◉◉ +◯◯◯◉◯◉◉◉◉◉◯◉◉◯◉ ◉◯◯◯◉◯◯◯◯◯◉◯◯◉◯ ◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◯◉◉◯◉◉◯◯◉◯◉◯◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◉◯ ◯◯◉◯◯◉◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◯◉◉◯◯◉◉◉◯ ◯◯◯◯◯◯◉◯◯◉◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◉◉◯◯◯◉◯◉◉◯◉◯◉◉◯ ◯◯◉◯◉◯◉◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯◉◯◉◯◯◉ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◯◯◯◯◉◉◉◯◯◯◉◉◉◉◯ ◉◉◉◉◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯ +◯◉◯◉◉◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◉◯◉◯◉◯◉◯◯ ◉◯◉◯◯◉◯◯◯◯◯◯◯◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 4, 6, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 25, 27, 28, 36, 38, 39, 40, 43, 44, 46, 47, 48, 49, 51, 55, 56, 57, 58, 59, 60, 63, 64, 67, 70, 72, 73, 77, 82, 83, 85, 87, 89, 90, 91, 92, 95, 96, 97, 100, 102, 107, 113, 114, 116, 117, 123, 125, 126, 127, 128, 129, 131, 132, 134, 136, 138, 139, 141, 142, 145, 147, 149, 150, 151, 152, 153, 154, 155, 157, 158, 161, 162, 163, 165, 166, 170, 172, 173, 175, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, 192, 193, 199, 200, 201, 205, 206, 207, 208, 211, 213, 214, 217, 219, 221] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,4)", "o(0,6)", "o(0,9)", "o(0,10)", "o(0,11)", "o(0,12)", "o(0,13)", "o(0,14)", "o(1,0)", "o(1,1)", "o(1,2)", "o(1,4)", "o(1,6)", "o(1,7)", "o(1,9)", "o(1,10)", "o(1,12)", "o(1,13)", "o(2,6)", "o(2,8)", "o(2,9)", "o(2,10)", "o(2,13)", "o(2,14)", "o(3,1)", "o(3,2)", "o(3,3)", "o(3,4)", "o(3,6)", "o(3,10)", "o(3,11)", "o(3,12)", "o(3,13)", "o(3,14)", "o(4,0)", "o(4,3)", "o(4,4)", "o(4,7)", "o(4,10)", "o(4,12)", "o(4,13)", "o(5,2)", "o(5,7)", "o(5,8)", "o(5,10)", "o(5,12)", "o(5,14)", "o(6,0)", "o(6,1)", "o(6,2)", "o(6,5)", "o(6,6)", "o(6,7)", "o(6,10)", "o(6,12)", "o(7,2)", "o(7,8)", "o(7,9)", "o(7,11)", "o(7,12)", "o(8,3)", "o(8,5)", "o(8,6)", "o(8,7)", "o(8,8)", "o(8,9)", "o(8,11)", "o(8,12)", "o(8,14)", "o(9,1)", "o(9,3)", "o(9,4)", "o(9,6)", "o(9,7)", "o(9,10)", "o(9,12)", "o(9,14)", "o(10,0)", "o(10,1)", "o(10,2)", "o(10,3)", "o(10,4)", "o(10,5)", "o(10,7)", "o(10,8)", "o(10,11)", "o(10,12)", "o(10,13)", "o(11,0)", "o(11,1)", "o(11,5)", "o(11,7)", "o(11,8)", "o(11,10)", "o(11,12)", "o(11,13)", "o(12,0)", "o(12,1)", "o(12,2)", "o(12,3)", "o(12,4)", "o(12,5)", "o(12,6)", "o(12,7)", "o(12,8)", "o(12,10)", "o(12,11)", "o(12,12)", "o(12,13)", "o(13,4)", "o(13,5)", "o(13,6)", "o(13,10)", "o(13,11)", "o(13,12)", "o(13,13)", "o(14,1)", "o(14,3)", "o(14,4)", "o(14,7)", "o(14,9)", "o(14,11)"] + +# Apply action "o(4,0)" +action: 60 + +# State 102 +# Apply action "x(4,4)" +action: 64 + +# State 103 +# Apply action "o(11,1)" +action: 166 + +# State 104 +# Apply action "x(0,12)" +action: 12 + +# State 105 +# Apply action "o(5,14)" +action: 89 + +# State 106 +# Apply action "x(5,8)" +action: 83 + +# State 107 +# Apply action "o(14,4)" +action: 214 + +# State 108 +# Apply action "x(5,10)" +action: 85 + +# State 109 +# ..xo.o.xx...x.. +# ...o.o..o..x..x +# ooxooo.x...oo.. +# o....o.xxo..... +# oxx.xxo.xx.x..x +# ox.xoxx.xxxo.oo +# ...xx...ox.x.xo +# oo.xooxo..x..xx +# oxx.o.....o..o. +# o.x..x..xx.o.o. +# ......o..oo...x +# .ooxo.o..x.x..x +# .........o....x +# oooo...xxx....o +# x.x.oxo.o.o.oxx +IsTerminal() = True +History() = [37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23, 94, 88, 99, 105, 168, 104, 52, 156, 212, 112, 176, 133, 137, 197, 164, 3, 62, 75, 204, 169, 8, 60, 64, 166, 12, 89, 83, 214, 85] +HistoryString() = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23, 94, 88, 99, 105, 168, 104, 52, 156, 212, 112, 176, 133, 137, 197, 164, 3, 62, 75, 204, 169, 8, 60, 64, 166, 12, 89, 83, 214, 85" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23, 94, 88, 99, 105, 168, 104, 52, 156, 212, 112, 176, 133, 137, 197, 164, 3, 62, 75, 204, 169, 8, 60, 64, 166, 12, 89, 83, 214, 85" +InformationStateString(1) = "37, 34, 144, 33, 65, 196, 81, 86, 26, 135, 111, 110, 76, 42, 29, 35, 223, 189, 174, 31, 115, 5, 210, 98, 2, 41, 53, 20, 143, 216, 84, 167, 215, 222, 103, 209, 101, 146, 194, 45, 68, 30, 202, 109, 224, 124, 7, 18, 61, 220, 93, 79, 179, 148, 71, 195, 118, 198, 69, 130, 203, 50, 119, 159, 32, 160, 140, 66, 80, 120, 78, 54, 108, 171, 121, 218, 74, 106, 122, 23, 94, 88, 99, 105, 168, 104, 52, 156, 212, 112, 176, 133, 137, 197, 164, 3, 62, 75, 204, 169, 8, 60, 64, 166, 12, 89, 83, 214, 85" +ObservationString(0) = "..xo.o.xx...x..\n...o.o..o..x..x\nooxooo.x...oo..\no....o.xxo.....\noxx.xxo.xx.x..x\nox.xoxx.xxxo.oo\n...xx...ox.x.xo\noo.xooxo..x..xx\noxx.o.....o..o.\no.x..x..xx.o.o.\n......o..oo...x\n.ooxo.o..x.x..x\n.........o....x\noooo...xxx....o\nx.x.oxo.o.o.oxx" +ObservationString(1) = "..xo.o.xx...x..\n...o.o..o..x..x\nooxooo.x...oo..\no....o.xxo.....\noxx.xxo.xx.x..x\nox.xoxx.xxxo.oo\n...xx...ox.x.xo\noo.xooxo..x..xx\noxx.o.....o..o.\no.x..x..xx.o.o.\n......o..oo...x\n.ooxo.o..x.x..x\n.........o....x\noooo...xxx....o\nx.x.oxo.o.o.oxx" +ObservationTensor(0): +◉◉◯◯◉◯◉◯◯◉◉◉◯◉◉ ◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉◉◯◯◯◉◯◯ +◉◉◉◯◉◯◉◉◯◉◉◯◉◉◯ ◯◯◯◉◯◉◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◯◯◯◯◯◯◉◯◉◉◉◯◯◉◉ ◉◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◯◯◯◉◉◉◉◉ ◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +◯◯◯◉◯◯◯◉◯◯◉◯◉◉◯ ◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◉◯◉◉◯◯◉◉◯◉◯◯◉ +◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯ ◉◯◯◯◉◯◯◯◯◯◯◉◯◉◉ ◯◉◯◉◯◉◉◯◉◉◉◯◯◯◯ +◉◉◉◯◯◉◉◉◯◯◉◯◉◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉ ◯◯◯◉◉◯◯◯◯◉◯◉◯◉◯ +◯◯◉◯◯◯◯◯◉◉◯◉◉◯◯ ◉◉◯◯◉◉◯◉◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉◯◯◯◉◯◯◉◉ +◯◯◯◉◯◉◉◉◉◉◯◉◉◯◉ ◉◯◯◯◉◯◯◯◯◯◉◯◯◉◯ ◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◯◉◉◯◉◉◯◯◉◯◉◯◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◉◯ ◯◯◉◯◯◉◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◯◉◉◯◯◉◉◉◯ ◯◯◯◯◯◯◉◯◯◉◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◉◯◯◯◯◉◯◉◉◯◉◯◉◉◯ ◯◉◉◯◉◯◉◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯◉◯◉◯◯◉ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◯◯◯◯◉◉◉◯◯◯◉◉◉◉◯ ◉◉◉◉◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯ +◯◉◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯ ◉◯◉◯◯◉◯◯◯◯◯◯◯◉◉ +ObservationTensor(1): +◉◉◯◯◉◯◉◯◯◉◉◉◯◉◉ ◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◉◉◯◯◯◉◯◯ +◉◉◉◯◉◯◉◉◯◉◉◯◉◉◯ ◯◯◯◉◯◉◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +◯◯◯◯◯◯◉◯◉◉◉◯◯◉◉ ◉◉◯◉◉◉◯◯◯◯◯◉◉◯◯ ◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◯◯◯◉◉◉◉◉ ◉◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +◯◯◯◉◯◯◯◉◯◯◉◯◉◉◯ ◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◉◯◉◉◯◯◉◉◯◉◯◯◉ +◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯ ◉◯◯◯◉◯◯◯◯◯◯◉◯◉◉ ◯◉◯◉◯◉◉◯◉◉◉◯◯◯◯ +◉◉◉◯◯◉◉◉◯◯◉◯◉◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉ ◯◯◯◉◉◯◯◯◯◉◯◉◯◉◯ +◯◯◉◯◯◯◯◯◉◉◯◉◉◯◯ ◉◉◯◯◉◉◯◉◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉◯◯◯◉◯◯◉◉ +◯◯◯◉◯◉◉◉◉◉◯◉◉◯◉ ◉◯◯◯◉◯◯◯◯◯◉◯◯◉◯ ◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◯◉◉◯◉◉◯◯◉◯◉◯◉ ◉◯◯◯◯◯◯◯◯◯◯◉◯◉◯ ◯◯◉◯◯◉◯◯◉◉◯◯◯◯◯ +◉◉◉◉◉◉◯◉◉◯◯◉◉◉◯ ◯◯◯◯◯◯◉◯◯◉◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◉◯◯◯◯◉◯◉◉◯◉◯◉◉◯ ◯◉◉◯◉◯◉◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯◉◯◉◯◯◉ +◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +◯◯◯◯◉◉◉◯◯◯◉◉◉◉◯ ◉◉◉◉◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◉◉◉◯◯◯◯◯ +◯◉◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯ ◉◯◉◯◯◉◯◯◯◯◯◯◯◉◉ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/morpion_solitaire.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/morpion_solitaire.txt new file mode 100644 index 0000000..d7b2749 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/morpion_solitaire.txt @@ -0,0 +1,269 @@ +game: morpion_solitaire + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Morpion Solitaire" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = [] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "morpion_solitaire" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 460 +PolicyTensorShape() = [460] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 1 +MinUtility() = 0.0 +MaxUtility() = 35.0 +UtilitySum() = None +MaxGameLength() = 35 +ToString() = "morpion_solitaire()" + +# State 0 +# 0000000000000 +# 0000000000000 +# 0000000000000 +# 0000011100000 +# 0000010100000 +# 0001110111000 +# 0001000001000 +# 0001110111000 +# 0000010100000 +# 0000011100000 +# 0000000000000 +# 0000000000000 +# 0000000000000 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "0000000000000\n0000000000000\n0000000000000\n0000011100000\n0000010100000\n0001110111000\n0001000001000\n0001110111000\n0000010100000\n0000011100000\n0000000000000\n0000000000000\n0000000000000\n" +Rewards() = [0] +Returns() = [0] +LegalActions() = [34, 35, 52, 53, 54, 55, 56, 57, 72, 73, 74, 75, 76, 77, 94, 95, 161, 163, 174, 176, 185, 187, 189, 191, 198, 200, 202, 204, 213, 215, 226, 228, 285, 307, 312, 334, 384, 402, 417, 435] +StringLegalActions() = ["[3,4] [3,5] [3,6] [3,7] ", "[3,5] [3,6] [3,7] [3,8] ", "[5,2] [5,3] [5,4] [5,5] ", "[5,3] [5,4] [5,5] [5,6] ", "[5,4] [5,5] [5,6] [5,7] ", "[5,5] [5,6] [5,7] [5,8] ", "[5,6] [5,7] [5,8] [5,9] ", "[5,7] [5,8] [5,9] [5,10] ", "[7,2] [7,3] [7,4] [7,5] ", "[7,3] [7,4] [7,5] [7,6] ", "[7,4] [7,5] [7,6] [7,7] ", "[7,5] [7,6] [7,7] [7,8] ", "[7,6] [7,7] [7,8] [7,9] ", "[7,7] [7,8] [7,9] [7,10] ", "[9,4] [9,5] [9,6] [9,7] ", "[9,5] [9,6] [9,7] [9,8] ", "[2,5] [3,5] [4,5] [5,5] ", "[2,7] [3,7] [4,7] [5,7] ", "[3,5] [4,5] [5,5] [6,5] ", "[3,7] [4,7] [5,7] [6,7] ", "[4,3] [5,3] [6,3] [7,3] ", "[4,5] [5,5] [6,5] [7,5] ", "[4,7] [5,7] [6,7] [7,7] ", "[4,9] [5,9] [6,9] [7,9] ", "[5,3] [6,3] [7,3] [8,3] ", "[5,5] [6,5] [7,5] [8,5] ", "[5,7] [6,7] [7,7] [8,7] ", "[5,9] [6,9] [7,9] [8,9] ", "[6,5] [7,5] [8,5] [9,5] ", "[6,7] [7,7] [8,7] [9,7] ", "[7,5] [8,5] [9,5] [10,5] ", "[7,7] [8,7] [9,7] [10,7] ", "[2,5] [3,6] [4,7] [5,8] ", "[4,7] [5,8] [6,9] [7,10] ", "[5,2] [6,3] [7,4] [8,5] ", "[7,4] [8,5] [9,6] [10,7] ", "[2,7] [3,6] [4,5] [5,4] ", "[4,5] [5,4] [6,3] [7,2] ", "[5,10] [6,9] [7,8] [8,7] ", "[7,8] [8,7] [9,6] [10,5] "] + +# Apply action "[7,5] [7,6] [7,7] [7,8] " +action: 75 + +# State 1 +# 0000000000000 +# 0000000000000 +# 0000000000000 +# 0000011100000 +# 0000010100000 +# 0001110111000 +# 0001000001000 +# 0001111111000 +# 0000010100000 +# 0000011100000 +# 0000000000000 +# 0000000000000 +# 0000000000000 +IsTerminal() = False +History() = [75] +HistoryString() = "75" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "0000000000000\n0000000000000\n0000000000000\n0000011100000\n0000010100000\n0001110111000\n0001000001000\n0001111111000\n0000010100000\n0000011100000\n0000000000000\n0000000000000\n0000000000000\n" +Rewards() = [1] +Returns() = [1] +LegalActions() = [34, 35, 52, 53, 54, 55, 56, 57, 94, 95, 161, 163, 174, 176, 185, 187, 189, 191, 198, 200, 202, 204, 213, 215, 226, 228, 285, 307, 312, 314, 334, 384, 402, 415, 417, 435] +StringLegalActions() = ["[3,4] [3,5] [3,6] [3,7] ", "[3,5] [3,6] [3,7] [3,8] ", "[5,2] [5,3] [5,4] [5,5] ", "[5,3] [5,4] [5,5] [5,6] ", "[5,4] [5,5] [5,6] [5,7] ", "[5,5] [5,6] [5,7] [5,8] ", "[5,6] [5,7] [5,8] [5,9] ", "[5,7] [5,8] [5,9] [5,10] ", "[9,4] [9,5] [9,6] [9,7] ", "[9,5] [9,6] [9,7] [9,8] ", "[2,5] [3,5] [4,5] [5,5] ", "[2,7] [3,7] [4,7] [5,7] ", "[3,5] [4,5] [5,5] [6,5] ", "[3,7] [4,7] [5,7] [6,7] ", "[4,3] [5,3] [6,3] [7,3] ", "[4,5] [5,5] [6,5] [7,5] ", "[4,7] [5,7] [6,7] [7,7] ", "[4,9] [5,9] [6,9] [7,9] ", "[5,3] [6,3] [7,3] [8,3] ", "[5,5] [6,5] [7,5] [8,5] ", "[5,7] [6,7] [7,7] [8,7] ", "[5,9] [6,9] [7,9] [8,9] ", "[6,5] [7,5] [8,5] [9,5] ", "[6,7] [7,7] [8,7] [9,7] ", "[7,5] [8,5] [9,5] [10,5] ", "[7,7] [8,7] [9,7] [10,7] ", "[2,5] [3,6] [4,7] [5,8] ", "[4,7] [5,8] [6,9] [7,10] ", "[5,2] [6,3] [7,4] [8,5] ", "[5,4] [6,5] [7,6] [8,7] ", "[7,4] [8,5] [9,6] [10,7] ", "[2,7] [3,6] [4,5] [5,4] ", "[4,5] [5,4] [6,3] [7,2] ", "[5,8] [6,7] [7,6] [8,5] ", "[5,10] [6,9] [7,8] [8,7] ", "[7,8] [8,7] [9,6] [10,5] "] + +# Apply action "[5,3] [6,3] [7,3] [8,3] " +action: 198 + +# State 2 +# 0000000000000 +# 0000000000000 +# 0000000000000 +# 0000011100000 +# 0000010100000 +# 0001110111000 +# 0001000001000 +# 0001111111000 +# 0001010100000 +# 0000011100000 +# 0000000000000 +# 0000000000000 +# 0000000000000 +IsTerminal() = False +History() = [75, 198] +HistoryString() = "75, 198" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "0000000000000\n0000000000000\n0000000000000\n0000011100000\n0000010100000\n0001110111000\n0001000001000\n0001111111000\n0001010100000\n0000011100000\n0000000000000\n0000000000000\n0000000000000\n" +Rewards() = [1] +Returns() = [2] +LegalActions() = [34, 35, 52, 53, 54, 55, 56, 57, 94, 95, 161, 163, 174, 176, 187, 189, 191, 200, 202, 204, 213, 215, 226, 228, 285, 307, 312, 314, 334, 384, 402, 415, 417, 435] +StringLegalActions() = ["[3,4] [3,5] [3,6] [3,7] ", "[3,5] [3,6] [3,7] [3,8] ", "[5,2] [5,3] [5,4] [5,5] ", "[5,3] [5,4] [5,5] [5,6] ", "[5,4] [5,5] [5,6] [5,7] ", "[5,5] [5,6] [5,7] [5,8] ", "[5,6] [5,7] [5,8] [5,9] ", "[5,7] [5,8] [5,9] [5,10] ", "[9,4] [9,5] [9,6] [9,7] ", "[9,5] [9,6] [9,7] [9,8] ", "[2,5] [3,5] [4,5] [5,5] ", "[2,7] [3,7] [4,7] [5,7] ", "[3,5] [4,5] [5,5] [6,5] ", "[3,7] [4,7] [5,7] [6,7] ", "[4,5] [5,5] [6,5] [7,5] ", "[4,7] [5,7] [6,7] [7,7] ", "[4,9] [5,9] [6,9] [7,9] ", "[5,5] [6,5] [7,5] [8,5] ", "[5,7] [6,7] [7,7] [8,7] ", "[5,9] [6,9] [7,9] [8,9] ", "[6,5] [7,5] [8,5] [9,5] ", "[6,7] [7,7] [8,7] [9,7] ", "[7,5] [8,5] [9,5] [10,5] ", "[7,7] [8,7] [9,7] [10,7] ", "[2,5] [3,6] [4,7] [5,8] ", "[4,7] [5,8] [6,9] [7,10] ", "[5,2] [6,3] [7,4] [8,5] ", "[5,4] [6,5] [7,6] [8,7] ", "[7,4] [8,5] [9,6] [10,7] ", "[2,7] [3,6] [4,5] [5,4] ", "[4,5] [5,4] [6,3] [7,2] ", "[5,8] [6,7] [7,6] [8,5] ", "[5,10] [6,9] [7,8] [8,7] ", "[7,8] [8,7] [9,6] [10,5] "] + +# Apply action "[2,7] [3,6] [4,5] [5,4] " +action: 384 + +# State 3 +# Apply action "[9,4] [9,5] [9,6] [9,7] " +action: 94 + +# State 4 +# Apply action "[4,7] [5,7] [6,7] [7,7] " +action: 189 + +# State 5 +# Apply action "[3,5] [3,6] [3,7] [3,8] " +action: 35 + +# State 6 +# Apply action "[5,2] [5,3] [5,4] [5,5] " +action: 52 + +# State 7 +# Apply action "[5,9] [6,9] [7,9] [8,9] " +action: 204 + +# State 8 +# Apply action "[7,5] [8,5] [9,5] [10,5] " +action: 226 + +# State 9 +# Apply action "[5,6] [5,7] [5,8] [5,9] " +action: 56 + +# State 10 +# 0000000000000 +# 0000000000000 +# 0000000100000 +# 0000011110000 +# 0000010100000 +# 0011111111000 +# 0001000101000 +# 0001111111000 +# 0001010101000 +# 0000111100000 +# 0000010000000 +# 0000000000000 +# 0000000000000 +IsTerminal() = False +History() = [75, 198, 384, 94, 189, 35, 52, 204, 226, 56] +HistoryString() = "75, 198, 384, 94, 189, 35, 52, 204, 226, 56" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "0000000000000\n0000000000000\n0000000100000\n0000011110000\n0000010100000\n0011111111000\n0001000101000\n0001111111000\n0001010101000\n0000111100000\n0000010000000\n0000000000000\n0000000000000\n" +Rewards() = [1] +Returns() = [10] +LegalActions() = [161, 174, 285, 294, 301, 307, 314, 327, 332, 334, 343, 386, 395, 404, 406, 413, 417, 433, 444] +StringLegalActions() = ["[2,5] [3,5] [4,5] [5,5] ", "[3,5] [4,5] [5,5] [6,5] ", "[2,5] [3,6] [4,7] [5,8] ", "[3,4] [4,5] [5,6] [6,7] ", "[4,1] [5,2] [6,3] [7,4] ", "[4,7] [5,8] [6,9] [7,10] ", "[5,4] [6,5] [7,6] [8,7] ", "[6,7] [7,8] [8,9] [9,10] ", "[7,2] [8,3] [9,4] [10,5] ", "[7,4] [8,5] [9,6] [10,7] ", "[8,3] [9,4] [10,5] [11,6] ", "[2,9] [3,8] [4,7] [5,6] ", "[3,8] [4,7] [5,6] [6,5] ", "[4,7] [5,6] [6,5] [7,4] ", "[4,9] [5,8] [6,7] [7,6] ", "[5,6] [6,5] [7,4] [8,3] ", "[5,10] [6,9] [7,8] [8,7] ", "[7,6] [8,5] [9,4] [10,3] ", "[8,7] [9,6] [10,5] [11,4] "] + +# Apply action "[5,6] [6,5] [7,4] [8,3] " +action: 413 + +# State 11 +# Apply action "[8,7] [9,6] [10,5] [11,4] " +action: 444 + +# State 12 +# Apply action "[8,3] [9,4] [10,5] [11,6] " +action: 343 + +# State 13 +# Apply action "[2,5] [3,6] [4,7] [5,8] " +action: 285 + +# State 14 +# Apply action "[1,5] [2,5] [3,5] [4,5] " +action: 148 + +# State 15 +# Apply action "[7,6] [8,5] [9,4] [10,3] " +action: 433 + +# State 16 +# Apply action "[6,5] [7,6] [8,7] [9,8] " +action: 325 + +# State 17 +# Apply action "[7,4] [8,5] [9,6] [10,7] " +action: 334 + +# State 18 +# Apply action "[6,7] [7,8] [8,9] [9,10] " +action: 327 + +# State 19 +# Apply action "[9,8] [10,7] [11,6] [12,5] " +action: 455 + +# State 20 +# 0000000000000 +# 0000010000000 +# 0000010100000 +# 0000011110000 +# 0000010100000 +# 0011111111000 +# 0001010101000 +# 0001111111000 +# 0001010101000 +# 0000111110100 +# 0001010100000 +# 0000101000000 +# 0000010000000 +IsTerminal() = False +History() = [75, 198, 384, 94, 189, 35, 52, 204, 226, 56, 413, 444, 343, 285, 148, 433, 325, 334, 327, 455] +HistoryString() = "75, 198, 384, 94, 189, 35, 52, 204, 226, 56, 413, 444, 343, 285, 148, 433, 325, 334, 327, 455" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "0000000000000\n0000010000000\n0000010100000\n0000011110000\n0000010100000\n0011111111000\n0001010101000\n0001111111000\n0001010101000\n0000111110100\n0001010100000\n0000101000000\n0000010000000\n" +Rewards() = [1] +Returns() = [20] +LegalActions() = [241, 352] +StringLegalActions() = ["[8,7] [9,7] [10,7] [11,7] ", "[9,2] [10,3] [11,4] [12,5] "] + +# Apply action "[9,2] [10,3] [11,4] [12,5] " +action: 352 + +# State 21 +# Apply action "[8,7] [9,7] [10,7] [11,7] " +action: 241 + +# State 22 +# Apply action "[11,4] [11,5] [11,6] [11,7] " +action: 114 + +# State 23 +# 0000000000000 +# 0000010000000 +# 0000010100000 +# 0000011110000 +# 0000010100000 +# 0011111111000 +# 0001010101000 +# 0001111111000 +# 0001010101000 +# 0010111110100 +# 0001010100000 +# 0000111100000 +# 0000010000000 +IsTerminal() = True +History() = [75, 198, 384, 94, 189, 35, 52, 204, 226, 56, 413, 444, 343, 285, 148, 433, 325, 334, 327, 455, 352, 241, 114] +HistoryString() = "75, 198, 384, 94, 189, 35, 52, 204, 226, 56, 413, 444, 343, 285, 148, 433, 325, 334, 327, 455, 352, 241, 114" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "0000000000000\n0000010000000\n0000010100000\n0000011110000\n0000010100000\n0011111111000\n0001010101000\n0001111111000\n0001010101000\n0010111110100\n0001010100000\n0000111100000\n0000010000000\n" +Rewards() = [1] +Returns() = [23] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/negotiation(rng_seed=100,utterance_dim=2,num_symbols=3).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/negotiation(rng_seed=100,utterance_dim=2,num_symbols=3).txt new file mode 100644 index 0000000..f628eef --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/negotiation(rng_seed=100,utterance_dim=2,num_symbols=3).txt @@ -0,0 +1,249 @@ +game: negotiation(rng_seed=100,utterance_dim=2,num_symbols=3) + +GameType.chance_mode = ChanceMode.SAMPLED_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Negotiation" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["enable_proposals", "enable_utterances", "num_items", "num_symbols", "rng_seed", "utterance_dim"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "negotiation" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 226 +PolicyTensorShape() = [226] +MaxChanceOutcomes() = 1 +GetParameters() = {enable_proposals=True,enable_utterances=True,num_items=3,num_symbols=3,rng_seed=100,utterance_dim=2} +NumPlayers() = 2 +MinUtility() = -150.0 +MaxUtility() = 150.0 +UtilitySum() = None +ObservationTensorShape() = [81] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 81 +MaxGameLength() = 20 +ToString() = "negotiation(num_symbols=3,rng_seed=100,utterance_dim=2)" + +# State 0 +# Initial chance node +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "ChanceNode -- no observation" +ObservationString(1) = "ChanceNode -- no observation" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +SerializeState() = "chance" +ChanceOutcomes() = [(0,1)] +LegalActions() = [0] +StringLegalActions() = ["chance outcome 0"] + +# Apply action "chance outcome 0" +action: 0 + +# State 1 +# Max steps: 4 +# Item pool: 0 1 4 +# Agent 0 util vec: 2 9 4 +# Agent 1 util vec: 1 9 6 +# Current player: 0 +# Turn Type: Proposal +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Max steps: 4\nItem pool: 0 1 4\nAgent 0 util vec: 2 9 4\nCurrent player: 0\nTurn Type: Proposal\n" +ObservationString(1) = "Max steps: 4\nItem pool: 0 1 4\nAgent 1 util vec: 1 9 6\nCurrent player: 0\nTurn Type: Proposal\n" +ObservationTensor(0): ◉◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +SerializeState() = "4\n0 1 4\n2 9 4\n1 9 6\n0\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10] +StringLegalActions() = ["Proposal: [0, 0, 0]", "Proposal: [0, 0, 1]", "Proposal: [0, 0, 2]", "Proposal: [0, 0, 3]", "Proposal: [0, 0, 4]", "Proposal: [0, 1, 0]", "Proposal: [0, 1, 1]", "Proposal: [0, 1, 2]", "Proposal: [0, 1, 3]", "Proposal: [0, 1, 4]"] + +# Apply action "Proposal: [0, 1, 3]" +action: 9 + +# State 2 +# Max steps: 4 +# Item pool: 0 1 4 +# Agent 0 util vec: 2 9 4 +# Agent 1 util vec: 1 9 6 +# Current player: 0 +# Turn Type: Utterance +# Player 0 proposes: [0, 1, 3] +IsTerminal() = False +History() = [0, 9] +HistoryString() = "0, 9" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Max steps: 4\nItem pool: 0 1 4\nAgent 0 util vec: 2 9 4\nCurrent player: 0\nTurn Type: Utterance\nMost recent proposal: [0, 1, 3]\n" +ObservationString(1) = "Max steps: 4\nItem pool: 0 1 4\nAgent 1 util vec: 1 9 6\nCurrent player: 0\nTurn Type: Utterance\nMost recent proposal: [0, 1, 3]\n" +ObservationTensor(0): ◉◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +SerializeState() = "4\n0 1 4\n2 9 4\n1 9 6\n0, 9\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [217, 218, 219, 220, 221, 222, 223, 224, 225] +StringLegalActions() = [", Utterance: [0, 0]", ", Utterance: [0, 1]", ", Utterance: [0, 2]", ", Utterance: [1, 0]", ", Utterance: [1, 1]", ", Utterance: [1, 2]", ", Utterance: [2, 0]", ", Utterance: [2, 1]", ", Utterance: [2, 2]"] + +# Apply action ", Utterance: [2, 0]" +action: 223 + +# State 3 +# Max steps: 4 +# Item pool: 0 1 4 +# Agent 0 util vec: 2 9 4 +# Agent 1 util vec: 1 9 6 +# Current player: 1 +# Turn Type: Proposal +# Player 0 proposes: [0, 1, 3] utters: [2, 0] +IsTerminal() = False +History() = [0, 9, 223] +HistoryString() = "0, 9, 223" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Max steps: 4\nItem pool: 0 1 4\nAgent 0 util vec: 2 9 4\nCurrent player: 1\nTurn Type: Proposal\nMost recent proposal: [0, 1, 3]\nMost recent utterance: [2, 0]\n" +ObservationString(1) = "Max steps: 4\nItem pool: 0 1 4\nAgent 1 util vec: 1 9 6\nCurrent player: 1\nTurn Type: Proposal\nMost recent proposal: [0, 1, 3]\nMost recent utterance: [2, 0]\n" +ObservationTensor(0): ◯◉◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◉◉◯◯ +ObservationTensor(1): ◯◉◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◉◉◯◯ +SerializeState() = "4\n0 1 4\n2 9 4\n1 9 6\n0, 9, 223\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 216] +StringLegalActions() = ["Proposal: [0, 0, 0]", "Proposal: [0, 0, 1]", "Proposal: [0, 0, 2]", "Proposal: [0, 0, 3]", "Proposal: [0, 0, 4]", "Proposal: [0, 1, 0]", "Proposal: [0, 1, 1]", "Proposal: [0, 1, 2]", "Proposal: [0, 1, 3]", "Proposal: [0, 1, 4]", "Proposal: Agreement reached!"] + +# Apply action "Proposal: [0, 0, 1]" +action: 1 + +# State 4 +# Max steps: 4 +# Item pool: 0 1 4 +# Agent 0 util vec: 2 9 4 +# Agent 1 util vec: 1 9 6 +# Current player: 1 +# Turn Type: Utterance +# Player 0 proposes: [0, 1, 3] utters: [2, 0] +# Player 1 proposes: [0, 0, 1] +IsTerminal() = False +History() = [0, 9, 223, 1] +HistoryString() = "0, 9, 223, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Max steps: 4\nItem pool: 0 1 4\nAgent 0 util vec: 2 9 4\nCurrent player: 1\nTurn Type: Utterance\nMost recent proposal: [0, 0, 1]\nMost recent utterance: [2, 0]\n" +ObservationString(1) = "Max steps: 4\nItem pool: 0 1 4\nAgent 1 util vec: 1 9 6\nCurrent player: 1\nTurn Type: Utterance\nMost recent proposal: [0, 0, 1]\nMost recent utterance: [2, 0]\n" +ObservationTensor(0): ◯◉◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◉◉◯◯ +ObservationTensor(1): ◯◉◯◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◉◉◯◯ +SerializeState() = "4\n0 1 4\n2 9 4\n1 9 6\n0, 9, 223, 1\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [217, 218, 219, 220, 221, 222, 223, 224, 225] +StringLegalActions() = [", Utterance: [0, 0]", ", Utterance: [0, 1]", ", Utterance: [0, 2]", ", Utterance: [1, 0]", ", Utterance: [1, 1]", ", Utterance: [1, 2]", ", Utterance: [2, 0]", ", Utterance: [2, 1]", ", Utterance: [2, 2]"] + +# Apply action ", Utterance: [2, 2]" +action: 225 + +# State 5 +# Max steps: 4 +# Item pool: 0 1 4 +# Agent 0 util vec: 2 9 4 +# Agent 1 util vec: 1 9 6 +# Current player: 0 +# Turn Type: Proposal +# Player 0 proposes: [0, 1, 3] utters: [2, 0] +# Player 1 proposes: [0, 0, 1] utters: [2, 2] +IsTerminal() = False +History() = [0, 9, 223, 1, 225] +HistoryString() = "0, 9, 223, 1, 225" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Max steps: 4\nItem pool: 0 1 4\nAgent 0 util vec: 2 9 4\nCurrent player: 0\nTurn Type: Proposal\nMost recent proposal: [0, 0, 1]\nMost recent utterance: [2, 2]\n" +ObservationString(1) = "Max steps: 4\nItem pool: 0 1 4\nAgent 1 util vec: 1 9 6\nCurrent player: 0\nTurn Type: Proposal\nMost recent proposal: [0, 0, 1]\nMost recent utterance: [2, 2]\n" +ObservationTensor(0): ◉◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉ +ObservationTensor(1): ◉◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◉ +SerializeState() = "4\n0 1 4\n2 9 4\n1 9 6\n0, 9, 223, 1, 225\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 216] +StringLegalActions() = ["Proposal: [0, 0, 0]", "Proposal: [0, 0, 1]", "Proposal: [0, 0, 2]", "Proposal: [0, 0, 3]", "Proposal: [0, 0, 4]", "Proposal: [0, 1, 0]", "Proposal: [0, 1, 1]", "Proposal: [0, 1, 2]", "Proposal: [0, 1, 3]", "Proposal: [0, 1, 4]", "Proposal: Agreement reached!"] + +# Apply action "Proposal: [0, 1, 1]" +action: 7 + +# State 6 +# Apply action ", Utterance: [1, 0]" +action: 220 + +# State 7 +# Max steps: 4 +# Item pool: 0 1 4 +# Agent 0 util vec: 2 9 4 +# Agent 1 util vec: 1 9 6 +# Current player: 1 +# Turn Type: Proposal +# Player 0 proposes: [0, 1, 3] utters: [2, 0] +# Player 1 proposes: [0, 0, 1] utters: [2, 2] +# Player 0 proposes: [0, 1, 1] utters: [1, 0] +IsTerminal() = False +History() = [0, 9, 223, 1, 225, 7, 220] +HistoryString() = "0, 9, 223, 1, 225, 7, 220" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Max steps: 4\nItem pool: 0 1 4\nAgent 0 util vec: 2 9 4\nCurrent player: 1\nTurn Type: Proposal\nMost recent proposal: [0, 1, 1]\nMost recent utterance: [1, 0]\n" +ObservationString(1) = "Max steps: 4\nItem pool: 0 1 4\nAgent 1 util vec: 1 9 6\nCurrent player: 1\nTurn Type: Proposal\nMost recent proposal: [0, 1, 1]\nMost recent utterance: [1, 0]\n" +ObservationTensor(0): ◯◉◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◉◯◯ +ObservationTensor(1): ◯◉◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◉◯◯ +SerializeState() = "4\n0 1 4\n2 9 4\n1 9 6\n0, 9, 223, 1, 225, 7, 220\n" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 216] +StringLegalActions() = ["Proposal: [0, 0, 0]", "Proposal: [0, 0, 1]", "Proposal: [0, 0, 2]", "Proposal: [0, 0, 3]", "Proposal: [0, 0, 4]", "Proposal: [0, 1, 0]", "Proposal: [0, 1, 1]", "Proposal: [0, 1, 2]", "Proposal: [0, 1, 3]", "Proposal: [0, 1, 4]", "Proposal: Agreement reached!"] + +# Apply action "Proposal: [0, 1, 3]" +action: 9 + +# State 8 +# Apply action ", Utterance: [2, 0]" +action: 223 + +# State 9 +# Max steps: 4 +# Item pool: 0 1 4 +# Agent 0 util vec: 2 9 4 +# Agent 1 util vec: 1 9 6 +# Current player: 0 +# Turn Type: Proposal +# Player 0 proposes: [0, 1, 3] utters: [2, 0] +# Player 1 proposes: [0, 0, 1] utters: [2, 2] +# Player 0 proposes: [0, 1, 1] utters: [1, 0] +# Player 1 proposes: [0, 1, 3] utters: [2, 0] +IsTerminal() = True +History() = [0, 9, 223, 1, 225, 7, 220, 9, 223] +HistoryString() = "0, 9, 223, 1, 225, 7, 220, 9, 223" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "Max steps: 4\nItem pool: 0 1 4\nAgent 0 util vec: 2 9 4\nCurrent player: -4\nTurn Type: Proposal\nMost recent proposal: [0, 1, 3]\nMost recent utterance: [2, 0]\n" +ObservationString(1) = "Max steps: 4\nItem pool: 0 1 4\nAgent 1 util vec: 1 9 6\nCurrent player: -4\nTurn Type: Proposal\nMost recent proposal: [0, 1, 3]\nMost recent utterance: [2, 0]\n" +ObservationTensor(0): ◯◯◉◯◉◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◉◉◯◯ +ObservationTensor(1): ◯◯◉◯◉◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◉◉◯◯ +SerializeState() = "4\n0 1 4\n2 9 4\n1 9 6\n0, 9, 223, 1, 225, 7, 220, 9, 223\n" +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/nim.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/nim.txt new file mode 100644 index 0000000..1d94516 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/nim.txt @@ -0,0 +1,180 @@ +game: nim + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Nim" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["is_misere", "pile_sizes"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "nim" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 29 +PolicyTensorShape() = [29] +MaxChanceOutcomes() = 0 +GetParameters() = {is_misere=True,pile_sizes=1;3;5;7} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [39] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 39 +MaxGameLength() = 16 +ToString() = "nim()" + +# State 0 +# (0): 1 3 5 7 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "(0): 1 3 5 7" +ObservationString(1) = "(0): 1 3 5 7" +ObservationTensor(0): ◉◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1): ◉◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 5, 6, 7, 9, 10, 11, 14, 15, 18, 19, 23, 27] +StringLegalActions() = ["pile:1, take:1;", "pile:2, take:1;", "pile:3, take:1;", "pile:4, take:1;", "pile:2, take:2;", "pile:3, take:2;", "pile:4, take:2;", "pile:2, take:3;", "pile:3, take:3;", "pile:4, take:3;", "pile:3, take:4;", "pile:4, take:4;", "pile:3, take:5;", "pile:4, take:5;", "pile:4, take:6;", "pile:4, take:7;"] + +# Apply action "pile:4, take:5;" +action: 19 + +# State 1 +# (1): 1 3 5 2 +IsTerminal() = False +History() = [19] +HistoryString() = "19" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "19" +InformationStateString(1) = "19" +ObservationString(0) = "(1): 1 3 5 2" +ObservationString(1) = "(1): 1 3 5 2" +ObservationTensor(0): ◯◉◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 5, 6, 7, 9, 10, 14, 18] +StringLegalActions() = ["pile:1, take:1;", "pile:2, take:1;", "pile:3, take:1;", "pile:4, take:1;", "pile:2, take:2;", "pile:3, take:2;", "pile:4, take:2;", "pile:2, take:3;", "pile:3, take:3;", "pile:3, take:4;", "pile:3, take:5;"] + +# Apply action "pile:3, take:5;" +action: 18 + +# State 2 +# (0): 1 3 0 2 +IsTerminal() = False +History() = [19, 18] +HistoryString() = "19, 18" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "19, 18" +InformationStateString(1) = "19, 18" +ObservationString(0) = "(0): 1 3 0 2" +ObservationString(1) = "(0): 1 3 0 2" +ObservationTensor(0): ◉◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 3, 5, 7, 9] +StringLegalActions() = ["pile:1, take:1;", "pile:2, take:1;", "pile:4, take:1;", "pile:2, take:2;", "pile:4, take:2;", "pile:2, take:3;"] + +# Apply action "pile:1, take:1;" +action: 0 + +# State 3 +# (1): 0 3 0 2 +IsTerminal() = False +History() = [19, 18, 0] +HistoryString() = "19, 18, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "19, 18, 0" +InformationStateString(1) = "19, 18, 0" +ObservationString(0) = "(1): 0 3 0 2" +ObservationString(1) = "(1): 0 3 0 2" +ObservationTensor(0): ◯◉◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 3, 5, 7, 9] +StringLegalActions() = ["pile:2, take:1;", "pile:4, take:1;", "pile:2, take:2;", "pile:4, take:2;", "pile:2, take:3;"] + +# Apply action "pile:2, take:1;" +action: 1 + +# State 4 +# (0): 0 2 0 2 +IsTerminal() = False +History() = [19, 18, 0, 1] +HistoryString() = "19, 18, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "19, 18, 0, 1" +InformationStateString(1) = "19, 18, 0, 1" +ObservationString(0) = "(0): 0 2 0 2" +ObservationString(1) = "(0): 0 2 0 2" +ObservationTensor(0): ◉◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 3, 5, 7] +StringLegalActions() = ["pile:2, take:1;", "pile:4, take:1;", "pile:2, take:2;", "pile:4, take:2;"] + +# Apply action "pile:4, take:2;" +action: 7 + +# State 5 +# (1): 0 2 0 0 +IsTerminal() = False +History() = [19, 18, 0, 1, 7] +HistoryString() = "19, 18, 0, 1, 7" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "19, 18, 0, 1, 7" +InformationStateString(1) = "19, 18, 0, 1, 7" +ObservationString(0) = "(1): 0 2 0 0" +ObservationString(1) = "(1): 0 2 0 0" +ObservationTensor(0): ◯◉◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 5] +StringLegalActions() = ["pile:2, take:1;", "pile:2, take:2;"] + +# Apply action "pile:2, take:2;" +action: 5 + +# State 6 +# (0): 0 0 0 0 +IsTerminal() = True +History() = [19, 18, 0, 1, 7, 5] +HistoryString() = "19, 18, 0, 1, 7, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "19, 18, 0, 1, 7, 5" +InformationStateString(1) = "19, 18, 0, 1, 7, 5" +ObservationString(0) = "(0): 0 0 0 0" +ObservationString(1) = "(0): 0 0 0 0" +ObservationTensor(0): ◉◯◉◯◯◯◉◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◉◯◯◯◉◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/nine_mens_morris.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/nine_mens_morris.txt new file mode 100644 index 0000000..bb09422 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/nine_mens_morris.txt @@ -0,0 +1,1481 @@ +game: nine_mens_morris + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Nine men's morris" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "nine_mens_morris" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 600 +PolicyTensorShape() = [600] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [5, 7, 7] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 245 +MaxGameLength() = 214 +ToString() = "nine_mens_morris()" + +# State 0 +# .------.------. +# | | | +# | .----.----. | +# | | | | | +# | | .--.--. | | +# | | | | | | +# .-.-. .-.-. +# | | | | | | +# | | .--.--. | | +# | | | | | +# | .----.----. | +# | | | +# .------.------. +# +# Current player: W +# Turn number: 0 +# Men to deploy: 9 9 +# Num men: 9 9 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = ".------.------.\n| | |\n| .----.----. |\n| | | | |\n| | .--.--. | |\n| | | | | |\n.-.-. .-.-.\n| | | | | |\n| | .--.--. | |\n| | | | |\n| .----.----. |\n| | |\n.------.------.\n\nCurrent player: W\nTurn number: 0\nMen to deploy: 9 9\nNum men: 9 9\n" +ObservationString(1) = ".------.------.\n| | |\n| .----.----. |\n| | | | |\n| | .--.--. | |\n| | | | | |\n.-.-. .-.-.\n| | | | | |\n| | .--.--. | |\n| | | | |\n| .----.----. |\n| | |\n.------.------.\n\nCurrent player: W\nTurn number: 0\nMen to deploy: 9 9\nNum men: 9 9\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] +StringLegalActions() = ["Point 0", "Point 1", "Point 2", "Point 3", "Point 4", "Point 5", "Point 6", "Point 7", "Point 8", "Point 9", "Point 10", "Point 11", "Point 12", "Point 13", "Point 14", "Point 15", "Point 16", "Point 17", "Point 18", "Point 19", "Point 20", "Point 21", "Point 22", "Point 23"] + +# Apply action "Point 21" +action: 21 + +# State 1 +# .------.------. +# | | | +# | .----.----. | +# | | | | | +# | | .--.--. | | +# | | | | | | +# .-.-. .-.-. +# | | | | | | +# | | .--.--. | | +# | | | | | +# | .----.----. | +# | | | +# W------.------. +# +# Current player: B +# Turn number: 1 +# Men to deploy: 8 9 +# Num men: 9 9 +IsTerminal() = False +History() = [21] +HistoryString() = "21" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "21" +InformationStateString(1) = "21" +ObservationString(0) = ".------.------.\n| | |\n| .----.----. |\n| | | | |\n| | .--.--. | |\n| | | | | |\n.-.-. .-.-.\n| | | | | |\n| | .--.--. | |\n| | | | |\n| .----.----. |\n| | |\nW------.------.\n\nCurrent player: B\nTurn number: 1\nMen to deploy: 8 9\nNum men: 9 9\n" +ObservationString(1) = ".------.------.\n| | |\n| .----.----. |\n| | | | |\n| | .--.--. | |\n| | | | | |\n.-.-. .-.-.\n| | | | | |\n| | .--.--. | |\n| | | | |\n| .----.----. |\n| | |\nW------.------.\n\nCurrent player: B\nTurn number: 1\nMen to deploy: 8 9\nNum men: 9 9\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23] +StringLegalActions() = ["Point 0", "Point 1", "Point 2", "Point 3", "Point 4", "Point 5", "Point 6", "Point 7", "Point 8", "Point 9", "Point 10", "Point 11", "Point 12", "Point 13", "Point 14", "Point 15", "Point 16", "Point 17", "Point 18", "Point 19", "Point 20", "Point 22", "Point 23"] + +# Apply action "Point 0" +action: 0 + +# State 2 +# B------.------. +# | | | +# | .----.----. | +# | | | | | +# | | .--.--. | | +# | | | | | | +# .-.-. .-.-. +# | | | | | | +# | | .--.--. | | +# | | | | | +# | .----.----. | +# | | | +# W------.------. +# +# Current player: W +# Turn number: 2 +# Men to deploy: 8 8 +# Num men: 9 9 +IsTerminal() = False +History() = [21, 0] +HistoryString() = "21, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "21, 0" +InformationStateString(1) = "21, 0" +ObservationString(0) = "B------.------.\n| | |\n| .----.----. |\n| | | | |\n| | .--.--. | |\n| | | | | |\n.-.-. .-.-.\n| | | | | |\n| | .--.--. | |\n| | | | |\n| .----.----. |\n| | |\nW------.------.\n\nCurrent player: W\nTurn number: 2\nMen to deploy: 8 8\nNum men: 9 9\n" +ObservationString(1) = "B------.------.\n| | |\n| .----.----. |\n| | | | |\n| | .--.--. | |\n| | | | | |\n.-.-. .-.-.\n| | | | | |\n| | .--.--. | |\n| | | | |\n| .----.----. |\n| | |\nW------.------.\n\nCurrent player: W\nTurn number: 2\nMen to deploy: 8 8\nNum men: 9 9\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23] +StringLegalActions() = ["Point 1", "Point 2", "Point 3", "Point 4", "Point 5", "Point 6", "Point 7", "Point 8", "Point 9", "Point 10", "Point 11", "Point 12", "Point 13", "Point 14", "Point 15", "Point 16", "Point 17", "Point 18", "Point 19", "Point 20", "Point 22", "Point 23"] + +# Apply action "Point 2" +action: 2 + +# State 3 +# B------.------W +# | | | +# | .----.----. | +# | | | | | +# | | .--.--. | | +# | | | | | | +# .-.-. .-.-. +# | | | | | | +# | | .--.--. | | +# | | | | | +# | .----.----. | +# | | | +# W------.------. +# +# Current player: B +# Turn number: 3 +# Men to deploy: 7 8 +# Num men: 9 9 +IsTerminal() = False +History() = [21, 0, 2] +HistoryString() = "21, 0, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "21, 0, 2" +InformationStateString(1) = "21, 0, 2" +ObservationString(0) = "B------.------W\n| | |\n| .----.----. |\n| | | | |\n| | .--.--. | |\n| | | | | |\n.-.-. .-.-.\n| | | | | |\n| | .--.--. | |\n| | | | |\n| .----.----. |\n| | |\nW------.------.\n\nCurrent player: B\nTurn number: 3\nMen to deploy: 7 8\nNum men: 9 9\n" +ObservationString(1) = "B------.------W\n| | |\n| .----.----. |\n| | | | |\n| | .--.--. | |\n| | | | | |\n.-.-. .-.-.\n| | | | | |\n| | .--.--. | |\n| | | | |\n| .----.----. |\n| | |\nW------.------.\n\nCurrent player: B\nTurn number: 3\nMen to deploy: 7 8\nNum men: 9 9\n" +ObservationTensor(0): +◯◯◯◯◯◯◉ ◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◉ ◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23] +StringLegalActions() = ["Point 1", "Point 3", "Point 4", "Point 5", "Point 6", "Point 7", "Point 8", "Point 9", "Point 10", "Point 11", "Point 12", "Point 13", "Point 14", "Point 15", "Point 16", "Point 17", "Point 18", "Point 19", "Point 20", "Point 22", "Point 23"] + +# Apply action "Point 15" +action: 15 + +# State 4 +# B------.------W +# | | | +# | .----.----. | +# | | | | | +# | | .--.--. | | +# | | | | | | +# .-.-. .-.-. +# | | | | | | +# | | B--.--. | | +# | | | | | +# | .----.----. | +# | | | +# W------.------. +# +# Current player: W +# Turn number: 4 +# Men to deploy: 7 7 +# Num men: 9 9 +IsTerminal() = False +History() = [21, 0, 2, 15] +HistoryString() = "21, 0, 2, 15" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "21, 0, 2, 15" +InformationStateString(1) = "21, 0, 2, 15" +ObservationString(0) = "B------.------W\n| | |\n| .----.----. |\n| | | | |\n| | .--.--. | |\n| | | | | |\n.-.-. .-.-.\n| | | | | |\n| | B--.--. | |\n| | | | |\n| .----.----. |\n| | |\nW------.------.\n\nCurrent player: W\nTurn number: 4\nMen to deploy: 7 7\nNum men: 9 9\n" +ObservationString(1) = "B------.------W\n| | |\n| .----.----. |\n| | | | |\n| | .--.--. | |\n| | | | | |\n.-.-. .-.-.\n| | | | | |\n| | B--.--. | |\n| | | | |\n| .----.----. |\n| | |\nW------.------.\n\nCurrent player: W\nTurn number: 4\nMen to deploy: 7 7\nNum men: 9 9\n" +ObservationTensor(0): +◯◯◯◯◯◯◉ ◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◉ ◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23] +StringLegalActions() = ["Point 1", "Point 3", "Point 4", "Point 5", "Point 6", "Point 7", "Point 8", "Point 9", "Point 10", "Point 11", "Point 12", "Point 13", "Point 14", "Point 16", "Point 17", "Point 18", "Point 19", "Point 20", "Point 22", "Point 23"] + +# Apply action "Point 11" +action: 11 + +# State 5 +# B------.------W +# | | | +# | .----.----. | +# | | | | | +# | | .--.--. | | +# | | | | | | +# .-.-W .-.-. +# | | | | | | +# | | B--.--. | | +# | | | | | +# | .----.----. | +# | | | +# W------.------. +# +# Current player: B +# Turn number: 5 +# Men to deploy: 6 7 +# Num men: 9 9 +IsTerminal() = False +History() = [21, 0, 2, 15, 11] +HistoryString() = "21, 0, 2, 15, 11" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "21, 0, 2, 15, 11" +InformationStateString(1) = "21, 0, 2, 15, 11" +ObservationString(0) = "B------.------W\n| | |\n| .----.----. |\n| | | | |\n| | .--.--. | |\n| | | | | |\n.-.-W .-.-.\n| | | | | |\n| | B--.--. | |\n| | | | |\n| .----.----. |\n| | |\nW------.------.\n\nCurrent player: B\nTurn number: 5\nMen to deploy: 6 7\nNum men: 9 9\n" +ObservationString(1) = "B------.------W\n| | |\n| .----.----. |\n| | | | |\n| | .--.--. | |\n| | | | | |\n.-.-W .-.-.\n| | | | | |\n| | B--.--. | |\n| | | | |\n| .----.----. |\n| | |\nW------.------.\n\nCurrent player: B\nTurn number: 5\nMen to deploy: 6 7\nNum men: 9 9\n" +ObservationTensor(0): +◯◯◯◯◯◯◉ ◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◉ ◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23] +StringLegalActions() = ["Point 1", "Point 3", "Point 4", "Point 5", "Point 6", "Point 7", "Point 8", "Point 9", "Point 10", "Point 12", "Point 13", "Point 14", "Point 16", "Point 17", "Point 18", "Point 19", "Point 20", "Point 22", "Point 23"] + +# Apply action "Point 5" +action: 5 + +# State 6 +# Apply action "Point 7" +action: 7 + +# State 7 +# Apply action "Point 1" +action: 1 + +# State 8 +# Apply action "Point 10" +action: 10 + +# State 9 +# Apply action "Point 23" +action: 23 + +# State 10 +# Apply action "Point 4" +action: 4 + +# State 11 +# Apply action "Point 13" +action: 13 + +# State 12 +# Apply action "Point 20" +action: 20 + +# State 13 +# Apply action "Point 22" +action: 22 + +# State 14 +# Apply action "Point 14" +action: 14 + +# State 15 +# Apply action "Point 8" +action: 8 + +# State 16 +# Apply action "Point 9" +action: 9 + +# State 17 +# Apply action "Point 5" +action: 5 + +# State 18 +# Apply action "Point 5" +action: 5 + +# State 19 +# B------B------W +# | | | +# | .----W----B | +# | | | | | +# | | .--W--B | | +# | | | | | | +# W-W-W .-B-W +# | | | | | | +# | | B--.--. | | +# | | | | | +# | .----.----W | +# | | | +# W------B------B +# +# Current player: W +# Turn number: 18 +# Men to deploy: 0 0 +# Num men: 9 8 +IsTerminal() = False +History() = [21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5] +HistoryString() = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5" +InformationStateString(1) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5" +ObservationString(0) = "B------B------W\n| | |\n| .----W----B |\n| | | | |\n| | .--W--B | |\n| | | | | |\nW-W-W .-B-W\n| | | | | |\n| | B--.--. | |\n| | | | |\n| .----.----W |\n| | |\nW------B------B\n\nCurrent player: W\nTurn number: 18\nMen to deploy: 0 0\nNum men: 9 8\n" +ObservationString(1) = "B------B------W\n| | |\n| .----W----B |\n| | | | |\n| | .--W--B | |\n| | | | | |\nW-W-W .-B-W\n| | | | | |\n| | B--.--. | |\n| | | | |\n| .----.----W |\n| | |\nW------B------B\n\nCurrent player: W\nTurn number: 18\nMen to deploy: 0 0\nNum men: 9 8\n" +ObservationTensor(0): +◯◯◯◯◯◯◉ ◉◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◉◯ ◯◉◯◯◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◉◯◯◯ ◯◯◯◯◉◯◯ ◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◉◉◯◯◯◉ ◯◯◯◯◯◉◯ ◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◉ ◉◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◉◯ ◯◉◯◯◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◉◯◯◯ ◯◯◯◯◉◯◯ ◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◉◉◯◯◯◉ ◯◯◯◯◯◉◯ ◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [123, 198, 267, 282, 294, 523] +StringLegalActions() = ["Move 4 -> 3", "Move 7 -> 6", "Move 10 -> 3", "Move 10 -> 18", "Move 11 -> 6", "Move 20 -> 19"] + +# Apply action "Move 10 -> 3" +action: 267 + +# State 20 +# Apply action "Move 22 -> 19" +action: 571 + +# State 21 +# Apply action "Move 3 -> 10" +action: 106 + +# State 22 +# Apply action "Point 15" +action: 15 + +# State 23 +# B------B------W +# | | | +# | .----W----B | +# | | | | | +# | | .--W--B | | +# | | | | | | +# W-W-W .-B-W +# | | | | | | +# | | .--.--. | | +# | | | | | +# | .----B----W | +# | | | +# W------.------B +# +# Current player: B +# Turn number: 21 +# Men to deploy: 0 0 +# Num men: 9 7 +IsTerminal() = False +History() = [21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15] +HistoryString() = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15" +InformationStateString(1) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15" +ObservationString(0) = "B------B------W\n| | |\n| .----W----B |\n| | | | |\n| | .--W--B | |\n| | | | | |\nW-W-W .-B-W\n| | | | | |\n| | .--.--. | |\n| | | | |\n| .----B----W |\n| | |\nW------.------B\n\nCurrent player: B\nTurn number: 21\nMen to deploy: 0 0\nNum men: 9 7\n" +ObservationString(1) = "B------B------W\n| | |\n| .----W----B |\n| | | | |\n| | .--W--B | |\n| | | | | |\nW-W-W .-B-W\n| | | | | |\n| | .--.--. | |\n| | | | |\n| .----B----W |\n| | |\nW------.------B\n\nCurrent player: B\nTurn number: 21\nMen to deploy: 0 0\nNum men: 9 7\n" +ObservationTensor(0): +◯◯◯◯◯◯◉ ◉◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◉◯ ◯◉◯◯◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◉◯◯◯ ◯◯◯◯◉◯◯ ◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◉◉◯◯◯◉ ◯◯◯◯◯◉◯ ◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◉◯ ◯◯◯◉◯◯◯ ◯◉◯◯◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◉ ◯◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◉ ◉◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◉◯ ◯◉◯◯◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◉◯◯◯ ◯◯◯◯◉◯◯ ◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◉◉◯◯◯◉ ◯◯◯◯◯◉◯ ◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◉◯ ◯◯◯◉◯◯◯ ◯◉◯◯◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◉ ◯◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [228, 348, 496, 498, 502, 598] +StringLegalActions() = ["Move 8 -> 12", "Move 13 -> 12", "Move 19 -> 16", "Move 19 -> 18", "Move 19 -> 22", "Move 23 -> 22"] + +# Apply action "Move 19 -> 18" +action: 498 + +# State 24 +# Apply action "Move 7 -> 6" +action: 198 + +# State 25 +# Apply action "Move 8 -> 7" +action: 223 + +# State 26 +# Apply action "Move 21 -> 22" +action: 550 + +# State 27 +# Apply action "Move 7 -> 8" +action: 200 + +# State 28 +# Apply action "Move 11 -> 15" +action: 303 + +# State 29 +# Apply action "Move 13 -> 12" +action: 348 + +# State 30 +# Apply action "Move 15 -> 11" +action: 395 + +# State 31 +# Apply action "Point 8" +action: 8 + +# State 32 +# Apply action "Move 12 -> 13" +action: 325 + +# State 33 +# Apply action "Move 22 -> 21" +action: 573 + +# State 34 +# Apply action "Move 18 -> 19" +action: 475 + +# State 35 +# Apply action "Move 4 -> 7" +action: 127 + +# State 36 +# Apply action "Move 5 -> 4" +action: 148 + +# State 37 +# B------B------W +# | | | +# | .----B----. | +# | | | | | +# | | W--W--. | | +# | | | | | | +# W-W-W .-B-W +# | | | | | | +# | | .--.--. | | +# | | | | | +# | .----B----W | +# | | | +# W------.------B +# +# Current player: W +# Turn number: 34 +# Men to deploy: 0 0 +# Num men: 9 6 +IsTerminal() = False +History() = [21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148] +HistoryString() = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148" +InformationStateString(1) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148" +ObservationString(0) = "B------B------W\n| | |\n| .----B----. |\n| | | | |\n| | W--W--. | |\n| | | | | |\nW-W-W .-B-W\n| | | | | |\n| | .--.--. | |\n| | | | |\n| .----B----W |\n| | |\nW------.------B\n\nCurrent player: W\nTurn number: 34\nMen to deploy: 0 0\nNum men: 9 6\n" +ObservationString(1) = "B------B------W\n| | |\n| .----B----. |\n| | | | |\n| | W--W--. | |\n| | | | | |\nW-W-W .-B-W\n| | | | | |\n| | .--.--. | |\n| | | | |\n| .----B----W |\n| | |\nW------.------B\n\nCurrent player: W\nTurn number: 34\nMen to deploy: 0 0\nNum men: 9 6\n" +ObservationTensor(0): +◯◯◯◯◯◯◉ ◉◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◉◯◯◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◉◉◯◯◯◉ ◯◯◯◯◯◉◯ ◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◉◯ ◯◯◯◉◯◯◯ ◯◉◯◯◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◉ ◯◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◉ ◉◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◉◯◯◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◉◉◯◯◯◉ ◯◯◯◯◯◉◯ ◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◉◯ ◯◯◯◉◯◯◯ ◯◉◯◯◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◉ ◯◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [200, 267, 282, 303, 550] +StringLegalActions() = ["Move 7 -> 8", "Move 10 -> 3", "Move 10 -> 18", "Move 11 -> 15", "Move 21 -> 22"] + +# Apply action "Move 7 -> 8" +action: 200 + +# State 38 +# Apply action "Move 19 -> 18" +action: 498 + +# State 39 +# Apply action "Move 21 -> 22" +action: 550 + +# State 40 +# Apply action "Move 4 -> 7" +action: 127 + +# State 41 +# Apply action "Move 20 -> 19" +action: 523 + +# State 42 +# Apply action "Move 13 -> 20" +action: 356 + +# State 43 +# Apply action "Move 8 -> 12" +action: 228 + +# State 44 +# B------B------W +# | | | +# | .----.----. | +# | | | | | +# | | W--B--. | | +# | | | | | | +# W-W-W W-.-W +# | | | | | | +# | | .--.--. | | +# | | | | | +# | B----W----B | +# | | | +# .------W------B +# +# Current player: B +# Turn number: 41 +# Men to deploy: 0 0 +# Num men: 9 6 +IsTerminal() = False +History() = [21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228] +HistoryString() = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228" +InformationStateString(1) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228" +ObservationString(0) = "B------B------W\n| | |\n| .----.----. |\n| | | | |\n| | W--B--. | |\n| | | | | |\nW-W-W W-.-W\n| | | | | |\n| | .--.--. | |\n| | | | |\n| B----W----B |\n| | |\n.------W------B\n\nCurrent player: B\nTurn number: 41\nMen to deploy: 0 0\nNum men: 9 6\n" +ObservationString(1) = "B------B------W\n| | |\n| .----.----. |\n| | | | |\n| | W--B--. | |\n| | | | | |\nW-W-W W-.-W\n| | | | | |\n| | .--.--. | |\n| | | | |\n| B----W----B |\n| | |\n.------W------B\n\nCurrent player: B\nTurn number: 41\nMen to deploy: 0 0\nNum men: 9 6\n" +ObservationTensor(0): +◯◯◯◯◯◯◉ ◉◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◉◯◯◯◯ ◯◯◯◉◯◯◯ ◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◉◉◯◉◯◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◉◯◯◯ ◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◉ ◉◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◉ ◉◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◉◯◯◯◯ ◯◯◯◉◯◯◯ ◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◉◉◯◉◯◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◉◯◯◯ ◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◉ ◉◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [52, 196, 200, 517] +StringLegalActions() = ["Move 1 -> 4", "Move 7 -> 4", "Move 7 -> 8", "Move 20 -> 13"] + +# Apply action "Move 20 -> 13" +action: 517 + +# State 45 +# Apply action "Move 9 -> 21" +action: 261 + +# State 46 +# Apply action "Move 0 -> 9" +action: 33 + +# State 47 +# Apply action "Move 10 -> 3" +action: 267 + +# State 48 +# Apply action "Move 13 -> 20" +action: 356 + +# State 49 +# Apply action "Move 12 -> 17" +action: 329 + +# State 50 +# Apply action "Move 7 -> 8" +action: 200 + +# State 51 +# Apply action "Move 19 -> 16" +action: 496 + +# State 52 +# Apply action "Move 9 -> 10" +action: 250 + +# State 53 +# Apply action "Move 17 -> 12" +action: 444 + +# State 54 +# Apply action "Move 20 -> 13" +action: 517 + +# State 55 +# Apply action "Move 21 -> 9" +action: 537 + +# State 56 +# Apply action "Move 1 -> 4" +action: 52 + +# State 57 +# .------.------W +# | | | +# | W----B----. | +# | | | | | +# | | W--.--B | | +# | | | | | | +# W-B-W W-B-W +# | | | | | | +# | | .--W--. | | +# | | | | | +# | B----.----. | +# | | | +# .------W------B +# +# Current player: W +# Turn number: 54 +# Men to deploy: 0 0 +# Num men: 9 6 +IsTerminal() = False +History() = [21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52] +HistoryString() = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52" +InformationStateString(1) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52" +ObservationString(0) = ".------.------W\n| | |\n| W----B----. |\n| | | | |\n| | W--.--B | |\n| | | | | |\nW-B-W W-B-W\n| | | | | |\n| | .--W--. | |\n| | | | |\n| B----.----. |\n| | |\n.------W------B\n\nCurrent player: W\nTurn number: 54\nMen to deploy: 0 0\nNum men: 9 6\n" +ObservationString(1) = ".------.------W\n| | |\n| W----B----. |\n| | | | |\n| | W--.--B | |\n| | | | | |\nW-B-W W-B-W\n| | | | | |\n| | .--W--. | |\n| | | | |\n| B----.----. |\n| | |\n.------W------B\n\nCurrent player: W\nTurn number: 54\nMen to deploy: 0 0\nNum men: 9 6\n" +ObservationTensor(0): +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◉◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◯◯◯◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◉◯◯◯◯ ◯◯◯◯◉◯◯ ◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◯◉◯◉◯◉ ◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯ ◯◯◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◉ ◉◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◉◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◯◯◯◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◉◯◯◯◯ ◯◯◯◯◉◯◯ ◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◯◉◯◉◯◉ ◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯ ◯◯◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◉ ◉◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [73, 175, 261, 303, 329, 423, 425, 427, 571, 573] +StringLegalActions() = ["Move 2 -> 1", "Move 6 -> 7", "Move 9 -> 21", "Move 11 -> 15", "Move 12 -> 17", "Move 16 -> 15", "Move 16 -> 17", "Move 16 -> 19", "Move 22 -> 19", "Move 22 -> 21"] + +# Apply action "Move 2 -> 1" +action: 73 + +# State 58 +# Apply action "Move 4 -> 7" +action: 127 + +# State 59 +# Apply action "Move 16 -> 15" +action: 423 + +# State 60 +# Apply action "Point 23" +action: 23 + +# State 61 +# Apply action "Move 7 -> 4" +action: 196 + +# State 62 +# Apply action "Move 15 -> 16" +action: 400 + +# State 63 +# Apply action "Move 4 -> 5" +action: 125 + +# State 64 +# Apply action "Move 14 -> 23" +action: 383 + +# State 65 +# .------W------. +# | | | +# | W----.----B | +# | | | | | +# | | W--.--B | | +# | | | | | | +# W-B-W W-B-. +# | | | | | | +# | | .--W--. | | +# | | | | | +# | B----.----. | +# | | | +# .------W------W +# +# Current player: B +# Turn number: 61 +# Men to deploy: 0 0 +# Num men: 9 5 +IsTerminal() = False +History() = [21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383] +HistoryString() = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383" +InformationStateString(1) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383" +ObservationString(0) = ".------W------.\n| | |\n| W----.----B |\n| | | | |\n| | W--.--B | |\n| | | | | |\nW-B-W W-B-.\n| | | | | |\n| | .--W--. | |\n| | | | |\n| B----.----. |\n| | |\n.------W------W\n\nCurrent player: B\nTurn number: 61\nMen to deploy: 0 0\nNum men: 9 5\n" +ObservationString(1) = ".------W------.\n| | |\n| W----.----B |\n| | | | |\n| | W--.--B | |\n| | | | | |\nW-B-W W-B-.\n| | | | | |\n| | .--W--. | |\n| | | | |\n| B----.----. |\n| | |\n.------W------W\n\nCurrent player: B\nTurn number: 61\nMen to deploy: 0 0\nNum men: 9 5\n" +ObservationTensor(0): +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◉◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◉◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◉◯◯◯◯ ◯◯◯◯◉◯◯ ◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◯◉◯◉◯◯ ◯◉◯◯◯◉◯ ◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯ ◯◯◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◉◯◯◉ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◉◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◉◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◉◯◯◯◯ ◯◯◯◯◉◯◯ ◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◯◉◯◉◯◯ ◯◉◯◯◯◉◯ ◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯ ◯◯◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◉◯◯◉ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [148, 223, 350, 356, 475] +StringLegalActions() = ["Move 5 -> 4", "Move 8 -> 7", "Move 13 -> 14", "Move 13 -> 20", "Move 18 -> 19"] + +# Apply action "Move 5 -> 4" +action: 148 + +# State 66 +# Apply action "Move 16 -> 17" +action: 425 + +# State 67 +# Apply action "Move 4 -> 7" +action: 127 + +# State 68 +# Apply action "Move 11 -> 15" +action: 303 + +# State 69 +# Apply action "Move 13 -> 20" +action: 356 + +# State 70 +# Apply action "Move 23 -> 14" +action: 590 + +# State 71 +# Apply action "Move 18 -> 19" +action: 475 + +# State 72 +# Apply action "Move 14 -> 23" +action: 383 + +# State 73 +# Apply action "Move 20 -> 13" +action: 517 + +# State 74 +# Apply action "Move 15 -> 16" +action: 400 + +# State 75 +# Apply action "Move 13 -> 14" +action: 350 + +# State 76 +# .------W------. +# | | | +# | W----.----. | +# | | | | | +# | | W--B--B | | +# | | | | | | +# W-B-. W-.-B +# | | | | | | +# | | .--W--W | | +# | | | | | +# | .----B----. | +# | | | +# .------W------W +# +# Current player: W +# Turn number: 72 +# Men to deploy: 0 0 +# Num men: 9 5 +IsTerminal() = False +History() = [21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350] +HistoryString() = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350" +InformationStateString(1) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350" +ObservationString(0) = ".------W------.\n| | |\n| W----.----. |\n| | | | |\n| | W--B--B | |\n| | | | | |\nW-B-. W-.-B\n| | | | | |\n| | .--W--W | |\n| | | | |\n| .----B----. |\n| | |\n.------W------W\n\nCurrent player: W\nTurn number: 72\nMen to deploy: 0 0\nNum men: 9 5\n" +ObservationString(1) = ".------W------.\n| | |\n| W----.----. |\n| | | | |\n| | W--B--B | |\n| | | | | |\nW-B-. W-.-B\n| | | | | |\n| | .--W--W | |\n| | | | |\n| .----B----. |\n| | |\n.------W------W\n\nCurrent player: W\nTurn number: 72\nMen to deploy: 0 0\nNum men: 9 5\n" +ObservationTensor(0): +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◉◯◯◯◯ ◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◯◯◯◉◯◯ ◯◉◯◯◯◯◉ ◯◯◉◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◉◯◯◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◉◯◯◉ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◉◯◯◯◯ ◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◯◯◯◉◯◯ ◯◉◯◯◯◯◉ ◯◯◉◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◉◯◯◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◉◯◯◉ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [50, 52, 100, 179, 261, 325, 423, 573] +StringLegalActions() = ["Move 1 -> 2", "Move 1 -> 4", "Move 3 -> 4", "Move 6 -> 11", "Move 9 -> 21", "Move 12 -> 13", "Move 16 -> 15", "Move 22 -> 21"] + +# Apply action "Move 6 -> 11" +action: 179 + +# State 77 +# Apply action "Move 14 -> 2" +action: 362 + +# State 78 +# Apply action "Move 11 -> 15" +action: 303 + +# State 79 +# Apply action "Point 2" +action: 2 + +# State 80 +# Apply action "Move 19 -> 18" +action: 498 + +# State 81 +# Apply action "Move 1 -> 2" +action: 50 + +# State 82 +# Apply action "Move 7 -> 4" +action: 196 + +# State 83 +# Apply action "Move 2 -> 1" +action: 73 + +# State 84 +# Apply action "Move 10 -> 11" +action: 275 + +# State 85 +# Apply action "Move 22 -> 19" +action: 571 + +# State 86 +# .------W------. +# | | | +# | W----B----. | +# | | | | | +# | | .--.--B | | +# | | | | | | +# W-.-B W-.-. +# | | | | | | +# | | W--W--W | | +# | | | | | +# | B----W----. | +# | | | +# .------.------W +# +# Current player: B +# Turn number: 81 +# Men to deploy: 0 0 +# Num men: 9 4 +IsTerminal() = False +History() = [21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571] +HistoryString() = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571" +InformationStateString(1) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571" +ObservationString(0) = ".------W------.\n| | |\n| W----B----. |\n| | | | |\n| | .--.--B | |\n| | | | | |\nW-.-B W-.-.\n| | | | | |\n| | W--W--W | |\n| | | | |\n| B----W----. |\n| | |\n.------.------W\n\nCurrent player: B\nTurn number: 81\nMen to deploy: 0 0\nNum men: 9 4\n" +ObservationString(1) = ".------W------.\n| | |\n| W----B----. |\n| | | | |\n| | .--.--B | |\n| | | | | |\nW-.-B W-.-.\n| | | | | |\n| | W--W--W | |\n| | | | |\n| B----W----. |\n| | |\n.------.------W\n\nCurrent player: B\nTurn number: 81\nMen to deploy: 0 0\nNum men: 9 4\n" +ObservationTensor(0): +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◉◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◯◯◯◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◯◯◯◉◯◯ ◯◯◉◯◯◯◯ ◯◉◯◯◯◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◉◯◯◯ ◯◉◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◉◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◯◯◯◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◯◯◯◉◯◯ ◯◯◉◯◯◯◯ ◯◉◯◯◯◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◉◯◯◯ ◯◉◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [125, 127, 223, 294, 298, 466] +StringLegalActions() = ["Move 4 -> 5", "Move 4 -> 7", "Move 8 -> 7", "Move 11 -> 6", "Move 11 -> 10", "Move 18 -> 10"] + +# Apply action "Move 18 -> 10" +action: 466 + +# State 87 +# Apply action "Move 9 -> 21" +action: 261 + +# State 88 +# Apply action "Move 10 -> 18" +action: 282 + +# State 89 +# Apply action "Move 3 -> 10" +action: 106 + +# State 90 +# Apply action "Move 4 -> 5" +action: 125 + +# State 91 +# Apply action "Move 19 -> 20" +action: 500 + +# State 92 +# Apply action "Move 5 -> 4" +action: 148 + +# State 93 +# Apply action "Move 16 -> 19" +action: 427 + +# State 94 +# Apply action "Move 4 -> 7" +action: 127 + +# State 95 +# .------W------. +# | | | +# | .----.----. | +# | | | | | +# | | .--B--B | | +# | | | | | | +# .-W-B W-.-. +# | | | | | | +# | | W--.--W | | +# | | | | | +# | B----W----W | +# | | | +# W------.------W +# +# Current player: W +# Turn number: 90 +# Men to deploy: 0 0 +# Num men: 9 4 +IsTerminal() = False +History() = [21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127] +HistoryString() = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127" +InformationStateString(1) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127" +ObservationString(0) = ".------W------.\n| | |\n| .----.----. |\n| | | | |\n| | .--B--B | |\n| | | | | |\n.-W-B W-.-.\n| | | | | |\n| | W--.--W | |\n| | | | |\n| B----W----W |\n| | |\nW------.------W\n\nCurrent player: W\nTurn number: 90\nMen to deploy: 0 0\nNum men: 9 4\n" +ObservationString(1) = ".------W------.\n| | |\n| .----.----. |\n| | | | |\n| | .--B--B | |\n| | | | | |\n.-W-B W-.-.\n| | | | | |\n| | W--.--W | |\n| | | | |\n| B----W----W |\n| | |\nW------.------W\n\nCurrent player: W\nTurn number: 90\nMen to deploy: 0 0\nNum men: 9 4\n" +ObservationTensor(0): +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◉◉◯◯ ◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◉◯◯◉◯◯ ◯◯◉◯◯◯◯ ◉◯◯◯◯◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◉◯◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◉◯◉◯ ◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◉◉◯◯ ◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◉◯◯◉◯◯ ◯◯◉◯◯◯◯ ◉◯◯◯◯◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◉◯◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◉◯◉◯ ◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [50, 52, 267, 273, 325, 400, 448, 496, 502, 517, 537, 550, 590, 598] +StringLegalActions() = ["Move 1 -> 2", "Move 1 -> 4", "Move 10 -> 3", "Move 10 -> 9", "Move 12 -> 13", "Move 15 -> 16", "Move 17 -> 16", "Move 19 -> 16", "Move 19 -> 22", "Move 20 -> 13", "Move 21 -> 9", "Move 21 -> 22", "Move 23 -> 14", "Move 23 -> 22"] + +# Apply action "Move 15 -> 16" +action: 400 + +# State 96 +# Apply action "Move 11 -> 6" +action: 294 + +# State 97 +# Apply action "Point 21" +action: 21 + +# State 98 +# Apply action "Move 1 -> 2" +action: 50 + +# State 99 +# Apply action "Move 7 -> 4" +action: 196 + +# State 100 +# Apply action "Move 23 -> 22" +action: 598 + +# State 101 +# Apply action "Point 8" +action: 8 + +# State 102 +# Apply action "Move 18 -> 8" +action: 464 + +# State 103 +# Apply action "Move 2 -> 14" +action: 86 + +# State 104 +# Apply action "Move 4 -> 18" +action: 138 + +# State 105 +# Apply action "Move 14 -> 2" +action: 362 + +# State 106 +# .------.------W +# | | | +# | .----.----. | +# | | | | | +# | | B--.--B | | +# | | | | | | +# .-W-. W-.-. +# | | | | | | +# | | .--W--W | | +# | | | | | +# | B----W----W | +# | | | +# .------W------. +# +# Current player: B +# Turn number: 99 +# Men to deploy: 0 0 +# Num men: 8 3 +IsTerminal() = False +History() = [21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127, 400, 294, 21, 50, 196, 598, 8, 464, 86, 138, 362] +HistoryString() = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127, 400, 294, 21, 50, 196, 598, 8, 464, 86, 138, 362" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127, 400, 294, 21, 50, 196, 598, 8, 464, 86, 138, 362" +InformationStateString(1) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127, 400, 294, 21, 50, 196, 598, 8, 464, 86, 138, 362" +ObservationString(0) = ".------.------W\n| | |\n| .----.----. |\n| | | | |\n| | B--.--B | |\n| | | | | |\n.-W-. W-.-.\n| | | | | |\n| | .--W--W | |\n| | | | |\n| B----W----W |\n| | |\n.------W------.\n\nCurrent player: B\nTurn number: 99\nMen to deploy: 0 0\nNum men: 8 3\n" +ObservationString(1) = ".------.------W\n| | |\n| .----.----. |\n| | | | |\n| | B--.--B | |\n| | | | | |\n.-W-. W-.-.\n| | | | | |\n| | .--W--W | |\n| | | | |\n| B----W----W |\n| | |\n.------W------.\n\nCurrent player: B\nTurn number: 99\nMen to deploy: 0 0\nNum men: 8 3\n" +ObservationTensor(0): +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◉◯◉◯◯ ◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◯◉◯◯◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◉◯◉◯ ◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◯ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◉◯◉◯◯ ◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◉◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◯◉◯◯◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◉◉◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◯◉◯◉◯ ◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [168, 169, 171, 172, 173, 175, 177, 179, 181, 182, 183, 189, 191, 216, 217, 219, 220, 221, 223, 225, 227, 229, 230, 231, 237, 239, 456, 457, 459, 460, 461, 463, 465, 467, 469, 470, 471, 477, 479] +StringLegalActions() = ["Move 6 -> 0", "Move 6 -> 1", "Move 6 -> 3", "Move 6 -> 4", "Move 6 -> 5", "Move 6 -> 7", "Move 6 -> 9", "Move 6 -> 11", "Move 6 -> 13", "Move 6 -> 14", "Move 6 -> 15", "Move 6 -> 21", "Move 6 -> 23", "Move 8 -> 0", "Move 8 -> 1", "Move 8 -> 3", "Move 8 -> 4", "Move 8 -> 5", "Move 8 -> 7", "Move 8 -> 9", "Move 8 -> 11", "Move 8 -> 13", "Move 8 -> 14", "Move 8 -> 15", "Move 8 -> 21", "Move 8 -> 23", "Move 18 -> 0", "Move 18 -> 1", "Move 18 -> 3", "Move 18 -> 4", "Move 18 -> 5", "Move 18 -> 7", "Move 18 -> 9", "Move 18 -> 11", "Move 18 -> 13", "Move 18 -> 14", "Move 18 -> 15", "Move 18 -> 21", "Move 18 -> 23"] + +# Apply action "Move 18 -> 7" +action: 463 + +# State 107 +# Apply action "Point 2" +action: 2 + +# State 108 +# Apply action "Move 16 -> 15" +action: 423 + +# State 109 +# Apply action "Move 7 -> 2" +action: 194 + +# State 110 +# Apply action "Move 19 -> 18" +action: 498 + +# State 111 +# Apply action "Move 8 -> 4" +action: 220 + +# State 112 +# Apply action "Move 15 -> 11" +action: 395 + +# State 113 +# Apply action "Move 6 -> 3" +action: 171 + +# State 114 +# Apply action "Move 22 -> 21" +action: 573 + +# State 115 +# Apply action "Move 2 -> 16" +action: 88 + +# State 116 +# .------.------. +# | | | +# | B----B----. | +# | | | | | +# | | .--.--. | | +# | | | | | | +# .-W-W W-.-. +# | | | | | | +# | | .--B--W | | +# | | | | | +# | W----.----W | +# | | | +# W------.------. +# +# Current player: W +# Turn number: 108 +# Men to deploy: 0 0 +# Num men: 7 3 +IsTerminal() = False +History() = [21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127, 400, 294, 21, 50, 196, 598, 8, 464, 86, 138, 362, 463, 2, 423, 194, 498, 220, 395, 171, 573, 88] +HistoryString() = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127, 400, 294, 21, 50, 196, 598, 8, 464, 86, 138, 362, 463, 2, 423, 194, 498, 220, 395, 171, 573, 88" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127, 400, 294, 21, 50, 196, 598, 8, 464, 86, 138, 362, 463, 2, 423, 194, 498, 220, 395, 171, 573, 88" +InformationStateString(1) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127, 400, 294, 21, 50, 196, 598, 8, 464, 86, 138, 362, 463, 2, 423, 194, 498, 220, 395, 171, 573, 88" +ObservationString(0) = ".------.------.\n| | |\n| B----B----. |\n| | | | |\n| | .--.--. | |\n| | | | | |\n.-W-W W-.-.\n| | | | | |\n| | .--B--W | |\n| | | | |\n| W----.----W |\n| | |\nW------.------.\n\nCurrent player: W\nTurn number: 108\nMen to deploy: 0 0\nNum men: 7 3\n" +ObservationString(1) = ".------.------.\n| | |\n| B----B----. |\n| | | | |\n| | .--.--. | |\n| | | | | |\n.-W-W W-.-.\n| | | | | |\n| | .--B--W | |\n| | | | |\n| W----.----W |\n| | |\nW------.------.\n\nCurrent player: W\nTurn number: 108\nMen to deploy: 0 0\nNum men: 7 3\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◉◯◉◯◯◯ ◯◯◯◯◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◉◉◯◉◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯ ◯◯◯◉◯◯◯ ◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◉◯◉◯◯◯ ◯◯◯◯◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◉◉◯◉◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◉◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯ ◯◯◯◉◯◯◯ ◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [273, 294, 303, 320, 325, 475, 517, 523, 537, 550] +StringLegalActions() = ["Move 10 -> 9", "Move 11 -> 6", "Move 11 -> 15", "Move 12 -> 8", "Move 12 -> 13", "Move 18 -> 19", "Move 20 -> 13", "Move 20 -> 19", "Move 21 -> 9", "Move 21 -> 22"] + +# Apply action "Move 12 -> 8" +action: 320 + +# State 117 +# Apply action "Move 3 -> 2" +action: 98 + +# State 118 +# Apply action "Move 21 -> 22" +action: 550 + +# State 119 +# Apply action "Move 2 -> 5" +action: 77 + +# State 120 +# Apply action "Move 8 -> 12" +action: 228 + +# State 121 +# Apply action "Move 5 -> 8" +action: 152 + +# State 122 +# Apply action "Move 10 -> 9" +action: 273 + +# State 123 +# Apply action "Move 4 -> 0" +action: 120 + +# State 124 +# Apply action "Move 12 -> 13" +action: 325 + +# State 125 +# B------.------. +# | | | +# | .----.----. | +# | | | | | +# | | .--.--B | | +# | | | | | | +# W-.-W .-W-. +# | | | | | | +# | | .--B--W | | +# | | | | | +# | W----.----W | +# | | | +# .------W------. +# +# Current player: B +# Turn number: 117 +# Men to deploy: 0 0 +# Num men: 7 3 +IsTerminal() = False +History() = [21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127, 400, 294, 21, 50, 196, 598, 8, 464, 86, 138, 362, 463, 2, 423, 194, 498, 220, 395, 171, 573, 88, 320, 98, 550, 77, 228, 152, 273, 120, 325] +HistoryString() = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127, 400, 294, 21, 50, 196, 598, 8, 464, 86, 138, 362, 463, 2, 423, 194, 498, 220, 395, 171, 573, 88, 320, 98, 550, 77, 228, 152, 273, 120, 325" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127, 400, 294, 21, 50, 196, 598, 8, 464, 86, 138, 362, 463, 2, 423, 194, 498, 220, 395, 171, 573, 88, 320, 98, 550, 77, 228, 152, 273, 120, 325" +InformationStateString(1) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127, 400, 294, 21, 50, 196, 598, 8, 464, 86, 138, 362, 463, 2, 423, 194, 498, 220, 395, 171, 573, 88, 320, 98, 550, 77, 228, 152, 273, 120, 325" +ObservationString(0) = "B------.------.\n| | |\n| .----.----. |\n| | | | |\n| | .--.--B | |\n| | | | | |\nW-.-W .-W-.\n| | | | | |\n| | .--B--W | |\n| | | | |\n| W----.----W |\n| | |\n.------W------.\n\nCurrent player: B\nTurn number: 117\nMen to deploy: 0 0\nNum men: 7 3\n" +ObservationString(1) = "B------.------.\n| | |\n| .----.----. |\n| | | | |\n| | .--.--B | |\n| | | | | |\nW-.-W .-W-.\n| | | | | |\n| | .--B--W | |\n| | | | |\n| W----.----W |\n| | |\n.------W------.\n\nCurrent player: B\nTurn number: 117\nMen to deploy: 0 0\nNum men: 7 3\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◯◉◯◯◉◯ ◯◯◯◯◯◯◯ ◯◉◯◯◉◯◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯ ◯◯◯◉◯◯◯ ◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◉◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◉◯◉◯◯◉◯ ◯◯◯◯◯◯◯ ◯◉◯◯◉◯◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯ ◯◯◯◉◯◯◯ ◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [25, 26, 27, 28, 29, 30, 31, 34, 36, 38, 39, 43, 45, 47, 217, 218, 219, 220, 221, 222, 223, 226, 228, 230, 231, 235, 237, 239, 409, 410, 411, 412, 413, 414, 415, 418, 420, 422, 423, 427, 429, 431] +StringLegalActions() = ["Move 0 -> 1", "Move 0 -> 2", "Move 0 -> 3", "Move 0 -> 4", "Move 0 -> 5", "Move 0 -> 6", "Move 0 -> 7", "Move 0 -> 10", "Move 0 -> 12", "Move 0 -> 14", "Move 0 -> 15", "Move 0 -> 19", "Move 0 -> 21", "Move 0 -> 23", "Move 8 -> 1", "Move 8 -> 2", "Move 8 -> 3", "Move 8 -> 4", "Move 8 -> 5", "Move 8 -> 6", "Move 8 -> 7", "Move 8 -> 10", "Move 8 -> 12", "Move 8 -> 14", "Move 8 -> 15", "Move 8 -> 19", "Move 8 -> 21", "Move 8 -> 23", "Move 16 -> 1", "Move 16 -> 2", "Move 16 -> 3", "Move 16 -> 4", "Move 16 -> 5", "Move 16 -> 6", "Move 16 -> 7", "Move 16 -> 10", "Move 16 -> 12", "Move 16 -> 14", "Move 16 -> 15", "Move 16 -> 19", "Move 16 -> 21", "Move 16 -> 23"] + +# Apply action "Move 0 -> 5" +action: 29 + +# State 126 +# Apply action "Move 9 -> 21" +action: 261 + +# State 127 +# Apply action "Move 5 -> 3" +action: 147 + +# State 128 +# Apply action "Move 22 -> 19" +action: 571 + +# State 129 +# Apply action "Point 16" +action: 16 + +# State 130 +# .------.------. +# | | | +# | B----.----. | +# | | | | | +# | | .--.--B | | +# | | | | | | +# .-.-W .-W-. +# | | | | | | +# | | .--.--W | | +# | | | | | +# | W----W----W | +# | | | +# W------.------. +# +# Current player: B +# Turn number: 121 +# Men to deploy: 0 0 +# Num men: 7 2 +IsTerminal() = True +History() = [21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127, 400, 294, 21, 50, 196, 598, 8, 464, 86, 138, 362, 463, 2, 423, 194, 498, 220, 395, 171, 573, 88, 320, 98, 550, 77, 228, 152, 273, 120, 325, 29, 261, 147, 571, 16] +HistoryString() = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127, 400, 294, 21, 50, 196, 598, 8, 464, 86, 138, 362, 463, 2, 423, 194, 498, 220, 395, 171, 573, 88, 320, 98, 550, 77, 228, 152, 273, 120, 325, 29, 261, 147, 571, 16" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127, 400, 294, 21, 50, 196, 598, 8, 464, 86, 138, 362, 463, 2, 423, 194, 498, 220, 395, 171, 573, 88, 320, 98, 550, 77, 228, 152, 273, 120, 325, 29, 261, 147, 571, 16" +InformationStateString(1) = "21, 0, 2, 15, 11, 5, 7, 1, 10, 23, 4, 13, 20, 22, 14, 8, 9, 5, 5, 267, 571, 106, 15, 498, 198, 223, 550, 200, 303, 348, 395, 8, 325, 573, 475, 127, 148, 200, 498, 550, 127, 523, 356, 228, 517, 261, 33, 267, 356, 329, 200, 496, 250, 444, 517, 537, 52, 73, 127, 423, 23, 196, 400, 125, 383, 148, 425, 127, 303, 356, 590, 475, 383, 517, 400, 350, 179, 362, 303, 2, 498, 50, 196, 73, 275, 571, 466, 261, 282, 106, 125, 500, 148, 427, 127, 400, 294, 21, 50, 196, 598, 8, 464, 86, 138, 362, 463, 2, 423, 194, 498, 220, 395, 171, 573, 88, 320, 98, 550, 77, 228, 152, 273, 120, 325, 29, 261, 147, 571, 16" +ObservationString(0) = ".------.------.\n| | |\n| B----.----. |\n| | | | |\n| | .--.--B | |\n| | | | | |\n.-.-W .-W-.\n| | | | | |\n| | .--.--W | |\n| | | | |\n| W----W----W |\n| | |\nW------.------.\n\nCurrent player: B\nTurn number: 121\nMen to deploy: 0 0\nNum men: 7 2\n" +ObservationString(1) = ".------.------.\n| | |\n| B----.----. |\n| | | | |\n| | .--.--B | |\n| | | | | |\n.-.-W .-W-.\n| | | | | |\n| | .--.--W | |\n| | | | |\n| W----W----W |\n| | |\nW------.------.\n\nCurrent player: B\nTurn number: 121\nMen to deploy: 0 0\nNum men: 7 2\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯ ◯◯◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◉◯◯◉◯ ◯◯◯◯◯◯◯ ◉◉◯◯◉◯◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◉◯◉◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯ ◯◯◯◉◯◉◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◯◉◯◯◉◯ ◯◯◯◯◯◯◯ ◉◉◯◯◉◯◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯ ◉◉◯◯◯◉◉ +◯◉◯◉◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◉◯◯ ◉◯◯◯◯◯◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◉◯◯◉ ◯◉◉◯◉◉◯ ◯◯◯◯◯◯◯ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/normal_form_extensive_game(game=first_sealed_auction(players=3,max_value=3)).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/normal_form_extensive_game(game=first_sealed_auction(players=3,max_value=3)).txt new file mode 100644 index 0000000..1d6bf5c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/normal_form_extensive_game(game=first_sealed_auction(players=3,max_value=3)).txt @@ -0,0 +1,139 @@ +game: normal_form_extensive_game(game=first_sealed_auction(players=3,max_value=3)) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.ONE_SHOT +GameType.long_name = "Normal-form First-Price Sealed-Bid Auction" +GameType.max_num_players = 3 +GameType.min_num_players = 3 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = False +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "normal_form_extensive_game" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 6 +PolicyTensorShape() = [6] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 3 +MinUtility() = 0.074074 +MaxUtility() = 1.1111 +UtilitySum() = None +InformationStateTensorShape() = [1] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 1 +MaxGameLength() = 1 +ToString() = "normal_form_extensive_game()" + +# State 0 +# Terminal? false +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "Observing player: 0. Non-terminal" +InformationStateString(1) = "Observing player: 1. Non-terminal" +InformationStateString(2) = "Observing player: 2. Non-terminal" +InformationStateTensor(0): ◯ +InformationStateTensor(1): ◯ +InformationStateTensor(2): ◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions(0) = [0, 1, 2, 3, 4, 5] +LegalActions(1) = [0, 1, 2, 3, 4, 5] +LegalActions(2) = [0, 1, 2, 3, 4, 5] +StringLegalActions(0) = ["p0 val 1 --- action = 0 +p0 val 2 --- action = 0 +p0 val 3 --- action = 0 +", "p0 val 1 --- action = 0 +p0 val 2 --- action = 1 +p0 val 3 --- action = 0 +", "p0 val 1 --- action = 0 +p0 val 2 --- action = 0 +p0 val 3 --- action = 1 +", "p0 val 1 --- action = 0 +p0 val 2 --- action = 1 +p0 val 3 --- action = 1 +", "p0 val 1 --- action = 0 +p0 val 2 --- action = 0 +p0 val 3 --- action = 2 +", "p0 val 1 --- action = 0 +p0 val 2 --- action = 1 +p0 val 3 --- action = 2 +"] +StringLegalActions(1) = ["p1 val 1 --- action = 0 +p1 val 2 --- action = 0 +p1 val 3 --- action = 0 +", "p1 val 1 --- action = 0 +p1 val 2 --- action = 1 +p1 val 3 --- action = 0 +", "p1 val 1 --- action = 0 +p1 val 2 --- action = 0 +p1 val 3 --- action = 1 +", "p1 val 1 --- action = 0 +p1 val 2 --- action = 1 +p1 val 3 --- action = 1 +", "p1 val 1 --- action = 0 +p1 val 2 --- action = 0 +p1 val 3 --- action = 2 +", "p1 val 1 --- action = 0 +p1 val 2 --- action = 1 +p1 val 3 --- action = 2 +"] +StringLegalActions(2) = ["p2 val 1 --- action = 0 +p2 val 2 --- action = 0 +p2 val 3 --- action = 0 +", "p2 val 1 --- action = 0 +p2 val 2 --- action = 1 +p2 val 3 --- action = 0 +", "p2 val 1 --- action = 0 +p2 val 2 --- action = 0 +p2 val 3 --- action = 1 +", "p2 val 1 --- action = 0 +p2 val 2 --- action = 1 +p2 val 3 --- action = 1 +", "p2 val 1 --- action = 0 +p2 val 2 --- action = 0 +p2 val 3 --- action = 2 +", "p2 val 1 --- action = 0 +p2 val 2 --- action = 1 +p2 val 3 --- action = 2 +"] + +# Apply joint action ["p0 val 1 --- action = 0 +p0 val 2 --- action = 0 +p0 val 3 --- action = 0 +", "p1 val 1 --- action = 0 +p1 val 2 --- action = 0 +p1 val 3 --- action = 0 +", "p2 val 1 --- action = 0 +p2 val 2 --- action = 0 +p2 val 3 --- action = 1 +"] +actions: [0, 0, 2] + +# State 1 +# Terminal? true +# History: 0, 0, 2 +# Returns: 0.444444,0.444444,1 +IsTerminal() = True +History() = [0, 0, 2] +HistoryString() = "0, 0, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Observing player: 0. Terminal. History string: 0, 0, 2" +InformationStateString(1) = "Observing player: 1. Terminal. History string: 0, 0, 2" +InformationStateString(2) = "Observing player: 2. Terminal. History string: 0, 0, 2" +InformationStateTensor(0): ◉ +InformationStateTensor(1): ◉ +InformationStateTensor(2): ◉ +Rewards() = [0.444444, 0.444444, 1] +Returns() = [0.444444, 0.444444, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/oh_hell.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/oh_hell.txt new file mode 100644 index 0000000..f4ab9b1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/oh_hell.txt @@ -0,0 +1,636 @@ +game: oh_hell + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Oh Hell!" +GameType.max_num_players = 7 +GameType.min_num_players = 3 +GameType.parameter_specification = ["num_cards_per_suit", "num_suits", "num_tricks_fixed", "off_bid_penalty", "players", "points_per_trick"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = False +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "oh_hell" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 70 +PolicyTensorShape() = [70] +MaxChanceOutcomes() = 52 +GetParameters() = {num_cards_per_suit=13,num_suits=4,num_tricks_fixed=-1,off_bid_penalty=False,players=3,points_per_trick=1} +NumPlayers() = 3 +MinUtility() = 0.0 +MaxUtility() = 27.0 +UtilitySum() = None +InformationStateTensorShape() = [4704] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 4704 +MaxGameLength() = 108 +ToString() = "oh_hell()" + +# State 0 +# Phase: ChooseNumTricks +# Num Total Tricks: 0 +# Dealer: -3 +# Player: 0 +# C: +# D: +# S: +# H: +# +# Player: 1 +# C: +# D: +# S: +# H: +# +# Player: 2 +# C: +# D: +# S: +# H: +# +# +# +# Bids: -1 -1 -1 +# Tricks Won: 0 0 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "" +InformationStateString(1) = "" +InformationStateString(2) = "" +InformationStateTensor(0): zeros(4704) +InformationStateTensor(1): zeros(4704) +InformationStateTensor(2): zeros(4704) +ChanceOutcomes() = [(1,0.0588235), (2,0.0588235), (3,0.0588235), (4,0.0588235), (5,0.0588235), (6,0.0588235), (7,0.0588235), (8,0.0588235), (9,0.0588235), (10,0.0588235), (11,0.0588235), (12,0.0588235), (13,0.0588235), (14,0.0588235), (15,0.0588235), (16,0.0588235), (17,0.0588235)] +LegalActions() = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] +StringLegalActions() = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17"] + +# Apply action "2" +action: 2 + +# State 1 +# Phase: ChooseDealer +# Num Total Tricks: 2 +# Dealer: -3 +# Player: 0 +# C: +# D: +# S: +# H: +# +# Player: 1 +# C: +# D: +# S: +# H: +# +# Player: 2 +# C: +# D: +# S: +# H: +# +# +# +# Bids: -1 -1 -1 +# Tricks Won: 0 0 0 +IsTerminal() = False +History() = [2] +HistoryString() = "2" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "Num Total Tricks: 2\n" +InformationStateString(1) = "Num Total Tricks: 2\n" +InformationStateString(2) = "Num Total Tricks: 2\n" +InformationStateTensor(0): zeros(4704) +InformationStateTensor(1): zeros(4704) +InformationStateTensor(2): zeros(4704) +ChanceOutcomes() = [(0,0.333333), (1,0.333333), (2,0.333333)] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["0", "1", "2"] + +# Apply action "2" +action: 2 + +# State 2 +# Apply action "H6" +action: 19 + +# State 3 +# Apply action "C5" +action: 12 + +# State 4 +# Apply action "H9" +action: 31 + +# State 5 +# Apply action "C7" +action: 20 + +# State 6 +# Apply action "S8" +action: 26 + +# State 7 +# Apply action "C9" +action: 28 + +# State 8 +# Apply action "H2" +action: 3 + +# State 9 +# Phase: Bid +# Num Total Tricks: 2 +# Dealer: 2 +# Player: 0 +# C: 7 +# D: +# S: +# H: 6 +# +# Player: 1 +# C: 5 +# D: +# S: 8 +# H: +# +# Player: 2 +# C: 9 +# D: +# S: +# H: 9 +# +# Trump: H2 +# +# +# Bids: -1 -1 -1 +# Tricks Won: 0 0 0 +IsTerminal() = False +History() = [2, 2, 19, 12, 31, 20, 26, 28, 3] +HistoryString() = "2, 2, 19, 12, 31, 20, 26, 28, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 0\n C: 7\n D: \n S: \n H: 6\n\n\nBids: -1 -1 -1 \nTricks Won: 0 0 0 \n" +InformationStateString(1) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 1\n C: 5\n D: \n S: 8\n H: \n\n\nBids: -1 -1 -1 \nTricks Won: 0 0 0 \n" +InformationStateString(2) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 2\n C: 9\n D: \n S: \n H: 9\n\n\nBids: -1 -1 -1 \nTricks Won: 0 0 0 \n" +InformationStateTensor(0): binvec(4704, 0x400011000000000000000018000000000001800000008000100002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x400011000000000000000800200000000080020000008000100002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x400011000000000000000000090000000000009000008000100002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [52, 53, 54] +StringLegalActions() = ["0", "1", "2"] + +# Apply action "1" +action: 53 + +# State 10 +# Phase: Bid +# Num Total Tricks: 2 +# Dealer: 2 +# Player: 0 +# C: 7 +# D: +# S: +# H: 6 +# +# Player: 1 +# C: 5 +# D: +# S: 8 +# H: +# +# Player: 2 +# C: 9 +# D: +# S: +# H: 9 +# +# Trump: H2 +# +# +# Bids: 1 -1 -1 +# Tricks Won: 0 0 0 +IsTerminal() = False +History() = [2, 2, 19, 12, 31, 20, 26, 28, 3, 53] +HistoryString() = "2, 2, 19, 12, 31, 20, 26, 28, 3, 53" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 0\n C: 7\n D: \n S: \n H: 6\n\n\nBids: 1 -1 -1 \nTricks Won: 0 0 0 \n" +InformationStateString(1) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 1\n C: 5\n D: \n S: 8\n H: \n\n\nBids: 1 -1 -1 \nTricks Won: 0 0 0 \n" +InformationStateString(2) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 2\n C: 9\n D: \n S: \n H: 9\n\n\nBids: 1 -1 -1 \nTricks Won: 0 0 0 \n" +InformationStateTensor(0): binvec(4704, 0x400011000000000000000018000000000001800000002000100002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x400011000000000000000800200000000080020000002000100002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x400011000000000000000000090000000000009000002000100002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [52, 53, 54] +StringLegalActions() = ["0", "1", "2"] + +# Apply action "2" +action: 54 + +# State 11 +# Phase: Bid +# Num Total Tricks: 2 +# Dealer: 2 +# Player: 0 +# C: 7 +# D: +# S: +# H: 6 +# +# Player: 1 +# C: 5 +# D: +# S: 8 +# H: +# +# Player: 2 +# C: 9 +# D: +# S: +# H: 9 +# +# Trump: H2 +# +# +# Bids: 1 2 -1 +# Tricks Won: 0 0 0 +IsTerminal() = False +History() = [2, 2, 19, 12, 31, 20, 26, 28, 3, 53, 54] +HistoryString() = "2, 2, 19, 12, 31, 20, 26, 28, 3, 53, 54" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 0\n C: 7\n D: \n S: \n H: 6\n\n\nBids: 1 2 -1 \nTricks Won: 0 0 0 \n" +InformationStateString(1) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 1\n C: 5\n D: \n S: 8\n H: \n\n\nBids: 1 2 -1 \nTricks Won: 0 0 0 \n" +InformationStateString(2) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 2\n C: 9\n D: \n S: \n H: 9\n\n\nBids: 1 2 -1 \nTricks Won: 0 0 0 \n" +InformationStateTensor(0): binvec(4704, 0x400011000000000000000018000000000001800000002000020002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x400011000000000000000800200000000080020000002000020002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x400011000000000000000000090000000000009000002000020002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [52, 53, 54] +StringLegalActions() = ["0", "1", "2"] + +# Apply action "1" +action: 53 + +# State 12 +# Phase: Play +# Num Total Tricks: 2 +# Dealer: 2 +# Player: 0 +# C: 7 +# D: +# S: +# H: 6 +# +# Player: 1 +# C: 5 +# D: +# S: 8 +# H: +# +# Player: 2 +# C: 9 +# D: +# S: +# H: 9 +# +# Trump: H2 +# +# +# Bids: 1 2 1 +# Tricks Won: 0 0 0 +IsTerminal() = False +History() = [2, 2, 19, 12, 31, 20, 26, 28, 3, 53, 54, 53] +HistoryString() = "2, 2, 19, 12, 31, 20, 26, 28, 3, 53, 54, 53" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 0\n C: 7\n D: \n S: \n H: 6\n\n\nBids: 1 2 1 \nTricks Won: 0 0 0 \n" +InformationStateString(1) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 1\n C: 5\n D: \n S: 8\n H: \n\n\nBids: 1 2 1 \nTricks Won: 0 0 0 \n" +InformationStateString(2) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 2\n C: 9\n D: \n S: \n H: 9\n\n\nBids: 1 2 1 \nTricks Won: 0 0 0 \n" +InformationStateTensor(0): binvec(4704, 0x400011000000000000000018000000000001800000002000020000800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x400011000000000000000800200000000080020000002000020000800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x400011000000000000000000090000000000009000002000020000800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [19, 20] +StringLegalActions() = ["H6", "C7"] + +# Apply action "H6" +action: 19 + +# State 13 +# Phase: Play +# Num Total Tricks: 2 +# Dealer: 2 +# Player: 0 +# C: 7 +# D: +# S: +# H: +# +# Player: 1 +# C: 5 +# D: +# S: 8 +# H: +# +# Player: 2 +# C: 9 +# D: +# S: +# H: 9 +# +# Trump: H2 +# +# Tricks: +# 0 1 2 0 1 +# H6 +# +# Bids: 1 2 1 +# Tricks Won: 0 0 0 +IsTerminal() = False +History() = [2, 2, 19, 12, 31, 20, 26, 28, 3, 53, 54, 53, 19] +HistoryString() = "2, 2, 19, 12, 31, 20, 26, 28, 3, 53, 54, 53, 19" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 0\n C: 7\n D: \n S: \n H: \n\nTricks:\n0 1 2 0 1 \nH6 \n\nBids: 1 2 1 \nTricks Won: 0 0 0 \n" +InformationStateString(1) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 1\n C: 5\n D: \n S: 8\n H: \n\nTricks:\n0 1 2 0 1 \nH6 \n\nBids: 1 2 1 \nTricks Won: 0 0 0 \n" +InformationStateString(2) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 2\n C: 9\n D: \n S: \n H: 9\n\nTricks:\n0 1 2 0 1 \nH6 \n\nBids: 1 2 1 \nTricks Won: 0 0 0 \n" +InformationStateTensor(0): binvec(4704, 0x400011000000000000000018000000000000800000002000020000800000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x400011000000000000000800200000000080020000002000020000800000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x400011000000000000000000090000000000009000002000020000800000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [12, 26] +StringLegalActions() = ["C5", "S8"] + +# Apply action "C5" +action: 12 + +# State 14 +# Phase: Play +# Num Total Tricks: 2 +# Dealer: 2 +# Player: 0 +# C: 7 +# D: +# S: +# H: +# +# Player: 1 +# C: +# D: +# S: 8 +# H: +# +# Player: 2 +# C: 9 +# D: +# S: +# H: 9 +# +# Trump: H2 +# +# Tricks: +# 0 1 2 0 1 +# H6 C5 +# +# Bids: 1 2 1 +# Tricks Won: 0 0 0 +IsTerminal() = False +History() = [2, 2, 19, 12, 31, 20, 26, 28, 3, 53, 54, 53, 19, 12] +HistoryString() = "2, 2, 19, 12, 31, 20, 26, 28, 3, 53, 54, 53, 19, 12" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 0\n C: 7\n D: \n S: \n H: \n\nTricks:\n0 1 2 0 1 \nH6 C5 \n\nBids: 1 2 1 \nTricks Won: 0 0 0 \n" +InformationStateString(1) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 1\n C: \n D: \n S: 8\n H: \n\nTricks:\n0 1 2 0 1 \nH6 C5 \n\nBids: 1 2 1 \nTricks Won: 0 0 0 \n" +InformationStateString(2) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 2\n C: 9\n D: \n S: \n H: 9\n\nTricks:\n0 1 2 0 1 \nH6 C5 \n\nBids: 1 2 1 \nTricks Won: 0 0 0 \n" +InformationStateTensor(0): binvec(4704, 0x400011000000000000000018000000000000800000002000020000800000000000000000000100000000000800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x400011000000000000000800200000000000020000002000020000800000000000000000000100000000000800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x400011000000000000000000090000000000009000002000020000800000000000000000000100000000000800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [31] +StringLegalActions() = ["H9"] + +# Apply action "H9" +action: 31 + +# State 15 +# Phase: Play +# Num Total Tricks: 2 +# Dealer: 2 +# Player: 0 +# C: 7 +# D: +# S: +# H: +# +# Player: 1 +# C: +# D: +# S: 8 +# H: +# +# Player: 2 +# C: 9 +# D: +# S: +# H: +# +# Trump: H2 +# +# Tricks: +# 0 1 2 0 1 +# H6 C5 H9 +# +# Bids: 1 2 1 +# Tricks Won: 0 0 1 +IsTerminal() = False +History() = [2, 2, 19, 12, 31, 20, 26, 28, 3, 53, 54, 53, 19, 12, 31] +HistoryString() = "2, 2, 19, 12, 31, 20, 26, 28, 3, 53, 54, 53, 19, 12, 31" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 0\n C: 7\n D: \n S: \n H: \n\nTricks:\n0 1 2 0 1 \nH6 C5 H9 \n\nBids: 1 2 1 \nTricks Won: 0 0 1 \n" +InformationStateString(1) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 1\n C: \n D: \n S: 8\n H: \n\nTricks:\n0 1 2 0 1 \nH6 C5 H9 \n\nBids: 1 2 1 \nTricks Won: 0 0 1 \n" +InformationStateString(2) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 2\n C: 9\n D: \n S: \n H: \n\nTricks:\n0 1 2 0 1 \nH6 C5 H9 \n\nBids: 1 2 1 \nTricks Won: 0 0 1 \n" +InformationStateTensor(0): binvec(4704, 0x400011000000000000000018000000000000800000002000020000800000000000100000000100000000000800000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x400011000000000000000800200000000000020000002000020000800000000000100000000100000000000800000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x400011000000000000000000090000000000008000002000020000800000000000100000000100000000000800000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [28] +StringLegalActions() = ["C9"] + +# Apply action "C9" +action: 28 + +# State 16 +# Phase: Play +# Num Total Tricks: 2 +# Dealer: 2 +# Player: 0 +# C: 7 +# D: +# S: +# H: +# +# Player: 1 +# C: +# D: +# S: 8 +# H: +# +# Player: 2 +# C: +# D: +# S: +# H: +# +# Trump: H2 +# +# Tricks: +# 0 1 2 0 1 +# H6 C5 H9 +# C9 +# +# Bids: 1 2 1 +# Tricks Won: 0 0 1 +IsTerminal() = False +History() = [2, 2, 19, 12, 31, 20, 26, 28, 3, 53, 54, 53, 19, 12, 31, 28] +HistoryString() = "2, 2, 19, 12, 31, 20, 26, 28, 3, 53, 54, 53, 19, 12, 31, 28" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 0\n C: 7\n D: \n S: \n H: \n\nTricks:\n0 1 2 0 1 \nH6 C5 H9 \n C9 \n\nBids: 1 2 1 \nTricks Won: 0 0 1 \n" +InformationStateString(1) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 1\n C: \n D: \n S: 8\n H: \n\nTricks:\n0 1 2 0 1 \nH6 C5 H9 \n C9 \n\nBids: 1 2 1 \nTricks Won: 0 0 1 \n" +InformationStateString(2) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 2\n C: \n D: \n S: \n H: \n\nTricks:\n0 1 2 0 1 \nH6 C5 H9 \n C9 \n\nBids: 1 2 1 \nTricks Won: 0 0 1 \n" +InformationStateTensor(0): binvec(4704, 0x400011000000000000000018000000000000800000002000020000800000000000100000000100000000000800000000000000001000000000000000000000000000000000000000000000000000000000000000080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x400011000000000000000800200000000000020000002000020000800000000000100000000100000000000800000000000000001000000000000000000000000000000000000000000000000000000000000000080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x400011000000000000000000090000000000000000002000020000800000000000100000000100000000000800000000000000001000000000000000000000000000000000000000000000000000000000000000080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [20] +StringLegalActions() = ["C7"] + +# Apply action "C7" +action: 20 + +# State 17 +# Phase: Play +# Num Total Tricks: 2 +# Dealer: 2 +# Player: 0 +# C: +# D: +# S: +# H: +# +# Player: 1 +# C: +# D: +# S: 8 +# H: +# +# Player: 2 +# C: +# D: +# S: +# H: +# +# Trump: H2 +# +# Tricks: +# 0 1 2 0 1 +# H6 C5 H9 +# C9 C7 +# +# Bids: 1 2 1 +# Tricks Won: 0 0 1 +IsTerminal() = False +History() = [2, 2, 19, 12, 31, 20, 26, 28, 3, 53, 54, 53, 19, 12, 31, 28, 20] +HistoryString() = "2, 2, 19, 12, 31, 20, 26, 28, 3, 53, 54, 53, 19, 12, 31, 28, 20" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 0\n C: \n D: \n S: \n H: \n\nTricks:\n0 1 2 0 1 \nH6 C5 H9 \n C9 C7 \n\nBids: 1 2 1 \nTricks Won: 0 0 1 \n" +InformationStateString(1) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 1\n C: \n D: \n S: 8\n H: \n\nTricks:\n0 1 2 0 1 \nH6 C5 H9 \n C9 C7 \n\nBids: 1 2 1 \nTricks Won: 0 0 1 \n" +InformationStateString(2) = "Num Total Tricks: 2\nDealer: 2\nNum Cards Dealt: 7\nTrump: H2\nPlayer: 2\n C: \n D: \n S: \n H: \n\nTricks:\n0 1 2 0 1 \nH6 C5 H9 \n C9 C7 \n\nBids: 1 2 1 \nTricks Won: 0 0 1 \n" +InformationStateTensor(0): binvec(4704, 0x400011000000000000000018000000000000000000002000020000800000000000100000000100000000000800000000000000001000000000000000000000000000000000000000000000000000000000000000080000000000800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x400011000000000000000800200000000000020000002000020000800000000000100000000100000000000800000000000000001000000000000000000000000000000000000000000000000000000000000000080000000000800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x400011000000000000000000090000000000000000002000020000800000000000100000000100000000000800000000000000001000000000000000000000000000000000000000000000000000000000000000080000000000800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [26] +StringLegalActions() = ["S8"] + +# Apply action "S8" +action: 26 + +# State 18 +# Phase: GameOver +# Num Total Tricks: 2 +# Dealer: 2 +# Player: 0 +# C: 7 +# D: +# S: +# H: 6 +# +# Player: 1 +# C: 5 +# D: +# S: 8 +# H: +# +# Player: 2 +# C: 9 +# D: +# S: +# H: 9 +# +# Trump: H2 +# +# Tricks: +# 0 1 2 0 1 +# H6 C5 H9 +# C9 C7 S8 +# +# Bids: 1 2 1 +# Tricks Won: 0 0 2 +# Score: 0 0 2 +IsTerminal() = True +History() = [2, 2, 19, 12, 31, 20, 26, 28, 3, 53, 54, 53, 19, 12, 31, 28, 20, 26] +HistoryString() = "2, 2, 19, 12, 31, 20, 26, 28, 3, 53, 54, 53, 19, 12, 31, 28, 20, 26" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Phase: GameOver\nNum Total Tricks: 2\nDealer: 2\nPlayer: 0\n C: 7\n D: \n S: \n H: 6\n\nPlayer: 1\n C: 5\n D: \n S: 8\n H: \n\nPlayer: 2\n C: 9\n D: \n S: \n H: 9\n\nTrump: H2\n\nTricks:\n0 1 2 0 1 \nH6 C5 H9 \n C9 C7 S8 \n\nBids: 1 2 1 \nTricks Won: 0 0 2 \nScore: 0 0 2 \n" +InformationStateString(1) = "Phase: GameOver\nNum Total Tricks: 2\nDealer: 2\nPlayer: 0\n C: 7\n D: \n S: \n H: 6\n\nPlayer: 1\n C: 5\n D: \n S: 8\n H: \n\nPlayer: 2\n C: 9\n D: \n S: \n H: 9\n\nTrump: H2\n\nTricks:\n0 1 2 0 1 \nH6 C5 H9 \n C9 C7 S8 \n\nBids: 1 2 1 \nTricks Won: 0 0 2 \nScore: 0 0 2 \n" +InformationStateString(2) = "Phase: GameOver\nNum Total Tricks: 2\nDealer: 2\nPlayer: 0\n C: 7\n D: \n S: \n H: 6\n\nPlayer: 1\n C: 5\n D: \n S: 8\n H: \n\nPlayer: 2\n C: 9\n D: \n S: \n H: 9\n\nTrump: H2\n\nTricks:\n0 1 2 0 1 \nH6 C5 H9 \n C9 C7 S8 \n\nBids: 1 2 1 \nTricks Won: 0 0 2 \nScore: 0 0 2 \n" +InformationStateTensor(0): zeros(4704) +InformationStateTensor(1): zeros(4704) +InformationStateTensor(2): zeros(4704) +Rewards() = [0, 0, 2] +Returns() = [0, 0, 2] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/oshi_zumo.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/oshi_zumo.txt new file mode 100644 index 0000000..579d0ec --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/oshi_zumo.txt @@ -0,0 +1,128 @@ +game: oshi_zumo(coins=10) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Oshi Zumo" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["alesia", "coins", "horizon", "min_bid", "size"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "oshi_zumo" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 11 +PolicyTensorShape() = [11] +MaxChanceOutcomes() = 0 +GetParameters() = {alesia=False,coins=10,horizon=1000,min_bid=0,size=3} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [31] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 31 +MaxGameLength() = 1000 +ToString() = "oshi_zumo(coins=10)" + +# State 0 +# Coins: 10 10, Field: #...W...# +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "Coins: 10 10, Field: #...W...#\n" +ObservationString(1) = "Coins: 10 10, Field: #...W...#\n" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] +LegalActions(1) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] +StringLegalActions(0) = ["[P0]Bid: 0", "[P0]Bid: 1", "[P0]Bid: 2", "[P0]Bid: 3", "[P0]Bid: 4", "[P0]Bid: 5", "[P0]Bid: 6", "[P0]Bid: 7", "[P0]Bid: 8", "[P0]Bid: 9", "[P0]Bid: 10"] +StringLegalActions(1) = ["[P1]Bid: 0", "[P1]Bid: 1", "[P1]Bid: 2", "[P1]Bid: 3", "[P1]Bid: 4", "[P1]Bid: 5", "[P1]Bid: 6", "[P1]Bid: 7", "[P1]Bid: 8", "[P1]Bid: 9", "[P1]Bid: 10"] + +# Apply joint action ["[P0]Bid: 6", "[P1]Bid: 10"] +actions: [6, 10] + +# State 1 +# Coins: 4 0, Field: #..W....# +IsTerminal() = False +History() = [6, 10] +HistoryString() = "6, 10" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "6, 10" +InformationStateString(1) = "6, 10" +ObservationString(0) = "Coins: 4 0, Field: #..W....#\n" +ObservationString(1) = "Coins: 4 0, Field: #..W....#\n" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3, 4] +LegalActions(1) = [0] +StringLegalActions(0) = ["[P0]Bid: 0", "[P0]Bid: 1", "[P0]Bid: 2", "[P0]Bid: 3", "[P0]Bid: 4"] +StringLegalActions(1) = ["[P1]Bid: 0"] + +# Apply joint action ["[P0]Bid: 1", "[P1]Bid: 0"] +actions: [1, 0] + +# State 2 +# Coins: 3 0, Field: #...W...# +IsTerminal() = False +History() = [6, 10, 1, 0] +HistoryString() = "6, 10, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +InformationStateString(0) = "6, 10, 1, 0" +InformationStateString(1) = "6, 10, 1, 0" +ObservationString(0) = "Coins: 3 0, Field: #...W...#\n" +ObservationString(1) = "Coins: 3 0, Field: #...W...#\n" +ObservationTensor(0): ◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2, 3] +LegalActions(1) = [0] +StringLegalActions(0) = ["[P0]Bid: 0", "[P0]Bid: 1", "[P0]Bid: 2", "[P0]Bid: 3"] +StringLegalActions(1) = ["[P1]Bid: 0"] + +# Apply joint action ["[P0]Bid: 1", "[P1]Bid: 0"] +actions: [1, 0] + +# State 3 +# Apply joint action ["[P0]Bid: 1", "[P1]Bid: 0"] +actions: [1, 0] + +# State 4 +# Apply joint action ["[P0]Bid: 1", "[P1]Bid: 0"] +actions: [1, 0] + +# State 5 +# Coins: 0 0, Field: #......W# +IsTerminal() = True +History() = [6, 10, 1, 0, 1, 0, 1, 0, 1, 0] +HistoryString() = "6, 10, 1, 0, 1, 0, 1, 0, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "6, 10, 1, 0, 1, 0, 1, 0, 1, 0" +InformationStateString(1) = "6, 10, 1, 0, 1, 0, 1, 0, 1, 0" +ObservationString(0) = "Coins: 0 0, Field: #......W#\n" +ObservationString(1) = "Coins: 0 0, Field: #......W#\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/othello.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/othello.txt new file mode 100644 index 0000000..6811cda --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/othello.txt @@ -0,0 +1,754 @@ +game: othello + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Othello" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "othello" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 65 +PolicyTensorShape() = [65] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [3, 8, 8] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 192 +MaxGameLength() = 128 +ToString() = "othello()" + +# State 0 +# Black (x) to play: +# a b c d e f g h +# 1 - - - - - - - - 1 +# 2 - - - - - - - - 2 +# 3 - - - - - - - - 3 +# 4 - - - o x - - - 4 +# 5 - - - x o - - - 5 +# 6 - - - - - - - - 6 +# 7 - - - - - - - - 7 +# 8 - - - - - - - - 8 +# a b c d e f g h +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "Black (x) to play:\n a b c d e f g h \n1 - - - - - - - - 1\n2 - - - - - - - - 2\n3 - - - - - - - - 3\n4 - - - o x - - - 4\n5 - - - x o - - - 5\n6 - - - - - - - - 6\n7 - - - - - - - - 7\n8 - - - - - - - - 8\n a b c d e f g h " +ObservationString(1) = "Black (x) to play:\n a b c d e f g h \n1 - - - - - - - - 1\n2 - - - - - - - - 2\n3 - - - - - - - - 3\n4 - - - o x - - - 4\n5 - - - x o - - - 5\n6 - - - - - - - - 6\n7 - - - - - - - - 7\n8 - - - - - - - - 8\n a b c d e f g h " +ObservationTensor(0): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◯◉◯◯◯ ◯◯◯◉◯◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◉◯◯◯◯ ◯◯◯◯◉◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◉◯◯◯◯ ◯◯◯◯◉◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◯◉◯◯◯ ◯◯◯◉◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [19, 26, 37, 44] +StringLegalActions() = ["d3", "c4", "f5", "e6"] + +# Apply action "e6" +action: 44 + +# State 1 +# White (o) to play: +# a b c d e f g h +# 1 - - - - - - - - 1 +# 2 - - - - - - - - 2 +# 3 - - - - - - - - 3 +# 4 - - - o x - - - 4 +# 5 - - - x x - - - 5 +# 6 - - - - x - - - 6 +# 7 - - - - - - - - 7 +# 8 - - - - - - - - 8 +# a b c d e f g h +IsTerminal() = False +History() = [44] +HistoryString() = "44" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "44" +InformationStateString(1) = "44" +ObservationString(0) = "White (o) to play:\n a b c d e f g h \n1 - - - - - - - - 1\n2 - - - - - - - - 2\n3 - - - - - - - - 3\n4 - - - o x - - - 4\n5 - - - x x - - - 5\n6 - - - - x - - - 6\n7 - - - - - - - - 7\n8 - - - - - - - - 8\n a b c d e f g h " +ObservationString(1) = "White (o) to play:\n a b c d e f g h \n1 - - - - - - - - 1\n2 - - - - - - - - 2\n3 - - - - - - - - 3\n4 - - - o x - - - 4\n5 - - - x x - - - 5\n6 - - - - x - - - 6\n7 - - - - - - - - 7\n8 - - - - - - - - 8\n a b c d e f g h " +ObservationTensor(0): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◯◉◯◯◯ ◯◯◯◉◯◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◯◉◉◉ ◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◉◯◯◯◯ ◯◯◯◯◉◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◉◉◯◯◯ +◉◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [29, 43, 45] +StringLegalActions() = ["f4", "d6", "f6"] + +# Apply action "d6" +action: 43 + +# State 2 +# Black (x) to play: +# a b c d e f g h +# 1 - - - - - - - - 1 +# 2 - - - - - - - - 2 +# 3 - - - - - - - - 3 +# 4 - - - o x - - - 4 +# 5 - - - o x - - - 5 +# 6 - - - o x - - - 6 +# 7 - - - - - - - - 7 +# 8 - - - - - - - - 8 +# a b c d e f g h +IsTerminal() = False +History() = [44, 43] +HistoryString() = "44, 43" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "44, 43" +InformationStateString(1) = "44, 43" +ObservationString(0) = "Black (x) to play:\n a b c d e f g h \n1 - - - - - - - - 1\n2 - - - - - - - - 2\n3 - - - - - - - - 3\n4 - - - o x - - - 4\n5 - - - o x - - - 5\n6 - - - o x - - - 6\n7 - - - - - - - - 7\n8 - - - - - - - - 8\n a b c d e f g h " +ObservationString(1) = "Black (x) to play:\n a b c d e f g h \n1 - - - - - - - - 1\n2 - - - - - - - - 2\n3 - - - - - - - - 3\n4 - - - o x - - - 4\n5 - - - o x - - - 5\n6 - - - o x - - - 6\n7 - - - - - - - - 7\n8 - - - - - - - - 8\n a b c d e f g h " +ObservationTensor(0): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◯◉◯◯◯ ◯◯◯◉◯◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◯◉◯◯◯ ◯◯◯◉◯◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◯◉◯◯◯ ◯◯◯◉◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◉◯◯◯◯ ◯◯◯◯◉◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◉◯◯◯◯ ◯◯◯◯◉◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◉◯◯◯◯ ◯◯◯◯◉◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [18, 26, 34, 42, 50] +StringLegalActions() = ["c3", "c4", "c5", "c6", "c7"] + +# Apply action "c4" +action: 26 + +# State 3 +# White (o) to play: +# a b c d e f g h +# 1 - - - - - - - - 1 +# 2 - - - - - - - - 2 +# 3 - - - - - - - - 3 +# 4 - - x x x - - - 4 +# 5 - - - x x - - - 5 +# 6 - - - o x - - - 6 +# 7 - - - - - - - - 7 +# 8 - - - - - - - - 8 +# a b c d e f g h +IsTerminal() = False +History() = [44, 43, 26] +HistoryString() = "44, 43, 26" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "44, 43, 26" +InformationStateString(1) = "44, 43, 26" +ObservationString(0) = "White (o) to play:\n a b c d e f g h \n1 - - - - - - - - 1\n2 - - - - - - - - 2\n3 - - - - - - - - 3\n4 - - x x x - - - 4\n5 - - - x x - - - 5\n6 - - - o x - - - 6\n7 - - - - - - - - 7\n8 - - - - - - - - 8\n a b c d e f g h " +ObservationString(1) = "White (o) to play:\n a b c d e f g h \n1 - - - - - - - - 1\n2 - - - - - - - - 2\n3 - - - - - - - - 3\n4 - - x x x - - - 4\n5 - - - x x - - - 5\n6 - - - o x - - - 6\n7 - - - - - - - - 7\n8 - - - - - - - - 8\n a b c d e f g h " +ObservationTensor(0): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◯◯◉◉◉ ◯◯◉◉◉◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◯◉◯◯◯ ◯◯◯◉◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◯◯◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◉◉◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◉◯◯◯◯ ◯◯◯◯◉◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [19, 29, 45] +StringLegalActions() = ["d3", "f4", "f6"] + +# Apply action "f4" +action: 29 + +# State 4 +# Black (x) to play: +# a b c d e f g h +# 1 - - - - - - - - 1 +# 2 - - - - - - - - 2 +# 3 - - - - - - - - 3 +# 4 - - x x x o - - 4 +# 5 - - - x o - - - 5 +# 6 - - - o x - - - 6 +# 7 - - - - - - - - 7 +# 8 - - - - - - - - 8 +# a b c d e f g h +IsTerminal() = False +History() = [44, 43, 26, 29] +HistoryString() = "44, 43, 26, 29" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "44, 43, 26, 29" +InformationStateString(1) = "44, 43, 26, 29" +ObservationString(0) = "Black (x) to play:\n a b c d e f g h \n1 - - - - - - - - 1\n2 - - - - - - - - 2\n3 - - - - - - - - 3\n4 - - x x x o - - 4\n5 - - - x o - - - 5\n6 - - - o x - - - 6\n7 - - - - - - - - 7\n8 - - - - - - - - 8\n a b c d e f g h " +ObservationString(1) = "Black (x) to play:\n a b c d e f g h \n1 - - - - - - - - 1\n2 - - - - - - - - 2\n3 - - - - - - - - 3\n4 - - x x x o - - 4\n5 - - - x o - - - 5\n6 - - - o x - - - 6\n7 - - - - - - - - 7\n8 - - - - - - - - 8\n a b c d e f g h " +ObservationTensor(0): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◯◯◯◉◉ ◯◯◉◉◉◯◯◯ ◯◯◯◯◯◉◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◉◯◯◯◯ ◯◯◯◯◉◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◯◉◯◯◯ ◯◯◯◉◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◯◯◯◉◉ ◯◯◯◯◯◉◯◯ ◯◯◉◉◉◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◯◉◯◯◯ ◯◯◯◉◯◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◉◯◯◯◯ ◯◯◯◯◉◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [30, 37, 42, 45, 51] +StringLegalActions() = ["g4", "f5", "c6", "f6", "d7"] + +# Apply action "f6" +action: 45 + +# State 5 +# White (o) to play: +# a b c d e f g h +# 1 - - - - - - - - 1 +# 2 - - - - - - - - 2 +# 3 - - - - - - - - 3 +# 4 - - x x x o - - 4 +# 5 - - - x x - - - 5 +# 6 - - - o x x - - 6 +# 7 - - - - - - - - 7 +# 8 - - - - - - - - 8 +# a b c d e f g h +IsTerminal() = False +History() = [44, 43, 26, 29, 45] +HistoryString() = "44, 43, 26, 29, 45" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "44, 43, 26, 29, 45" +InformationStateString(1) = "44, 43, 26, 29, 45" +ObservationString(0) = "White (o) to play:\n a b c d e f g h \n1 - - - - - - - - 1\n2 - - - - - - - - 2\n3 - - - - - - - - 3\n4 - - x x x o - - 4\n5 - - - x x - - - 5\n6 - - - o x x - - 6\n7 - - - - - - - - 7\n8 - - - - - - - - 8\n a b c d e f g h " +ObservationString(1) = "White (o) to play:\n a b c d e f g h \n1 - - - - - - - - 1\n2 - - - - - - - - 2\n3 - - - - - - - - 3\n4 - - x x x o - - 4\n5 - - - x x - - - 5\n6 - - - o x x - - 6\n7 - - - - - - - - 7\n8 - - - - - - - - 8\n a b c d e f g h " +ObservationTensor(0): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◯◯◯◉◉ ◯◯◉◉◉◯◯◯ ◯◯◯◯◯◉◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◉◉◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◯◯◯◉◉ ◯◯◯◯◉◉◯◯ ◯◯◯◉◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◯◯◯◯◉◉ ◯◯◯◯◯◉◯◯ ◯◯◉◉◉◯◯◯ +◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◉◉◯◯◯ +◉◉◉◯◯◯◉◉ ◯◯◯◉◯◯◯◯ ◯◯◯◯◉◉◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [19, 25, 46] +StringLegalActions() = ["d3", "b4", "g6"] + +# Apply action "b4" +action: 25 + +# State 6 +# Apply action "b3" +action: 17 + +# State 7 +# Apply action "e7" +action: 52 + +# State 8 +# Apply action "f3" +action: 21 + +# State 9 +# Apply action "f2" +action: 13 + +# State 10 +# Apply action "f5" +action: 37 + +# State 11 +# Apply action "c6" +action: 42 + +# State 12 +# Apply action "b6" +action: 41 + +# State 13 +# Apply action "f7" +action: 53 + +# State 14 +# Apply action "c5" +action: 34 + +# State 15 +# Apply action "a2" +action: 8 + +# State 16 +# Apply action "a3" +action: 16 + +# State 17 +# Apply action "c7" +action: 50 + +# State 18 +# Apply action "a1" +action: 0 + +# State 19 +# Apply action "a7" +action: 48 + +# State 20 +# Black (x) to play: +# a b c d e f g h +# 1 x - - - - - - - 1 +# 2 x - - - - o - - 2 +# 3 x o - - - o - - 3 +# 4 - x o o o o - - 4 +# 5 - - o o o o - - 5 +# 6 - o o o o o - - 6 +# 7 o - o - o o - - 7 +# 8 - - - - - - - - 8 +# a b c d e f g h +IsTerminal() = False +History() = [44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48] +HistoryString() = "44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48" +InformationStateString(1) = "44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48" +ObservationString(0) = "Black (x) to play:\n a b c d e f g h \n1 x - - - - - - - 1\n2 x - - - - o - - 2\n3 x o - - - o - - 3\n4 - x o o o o - - 4\n5 - - o o o o - - 5\n6 - o o o o o - - 6\n7 o - o - o o - - 7\n8 - - - - - - - - 8\n a b c d e f g h " +ObservationString(1) = "Black (x) to play:\n a b c d e f g h \n1 x - - - - - - - 1\n2 x - - - - o - - 2\n3 x o - - - o - - 3\n4 - x o o o o - - 4\n5 - - o o o o - - 5\n6 - o o o o o - - 6\n7 o - o - o o - - 7\n8 - - - - - - - - 8\n a b c d e f g h " +ObservationTensor(0): +◯◉◉◉◉◉◉◉ ◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◉ ◉◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ +◯◯◉◉◉◯◉◉ ◉◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯ +◉◯◯◯◯◯◉◉ ◯◉◯◯◯◯◯◯ ◯◯◉◉◉◉◯◯ +◉◉◯◯◯◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◉◉◉◉◯◯ +◉◯◯◯◯◯◉◉ ◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯ +◯◉◯◉◯◯◉◉ ◯◯◯◯◯◯◯◯ ◉◯◉◯◉◉◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◉ ◯◯◯◯◯◉◯◯ ◉◯◯◯◯◯◯◯ +◯◯◉◉◉◯◉◉ ◯◉◯◯◯◉◯◯ ◉◯◯◯◯◯◯◯ +◉◯◯◯◯◯◉◉ ◯◯◉◉◉◉◯◯ ◯◉◯◯◯◯◯◯ +◉◉◯◯◯◯◉◉ ◯◯◉◉◉◉◯◯ ◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◉◉ ◯◉◉◉◉◉◯◯ ◯◯◯◯◯◯◯◯ +◯◉◯◉◯◯◉◉ ◉◯◉◯◉◉◯◯ ◯◯◯◯◯◯◯◯ +◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [9, 18, 30, 61, 62] +StringLegalActions() = ["b2", "c3", "g4", "f8", "g8"] + +# Apply action "f8" +action: 61 + +# State 21 +# White (o) to play: +# a b c d e f g h +# 1 x - - - - - - - 1 +# 2 x - - - - o - - 2 +# 3 x o - - - o - - 3 +# 4 - x o o o o - - 4 +# 5 - - x o o o - - 5 +# 6 - o o x o o - - 6 +# 7 o - o - x o - - 7 +# 8 - - - - - x - - 8 +# a b c d e f g h +IsTerminal() = False +History() = [44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48, 61] +HistoryString() = "44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48, 61" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48, 61" +InformationStateString(1) = "44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48, 61" +ObservationString(0) = "White (o) to play:\n a b c d e f g h \n1 x - - - - - - - 1\n2 x - - - - o - - 2\n3 x o - - - o - - 3\n4 - x o o o o - - 4\n5 - - x o o o - - 5\n6 - o o x o o - - 6\n7 o - o - x o - - 7\n8 - - - - - x - - 8\n a b c d e f g h " +ObservationString(1) = "White (o) to play:\n a b c d e f g h \n1 x - - - - - - - 1\n2 x - - - - o - - 2\n3 x o - - - o - - 3\n4 - x o o o o - - 4\n5 - - x o o o - - 5\n6 - o o x o o - - 6\n7 o - o - x o - - 7\n8 - - - - - x - - 8\n a b c d e f g h " +ObservationTensor(0): +◯◉◉◉◉◉◉◉ ◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◉ ◉◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ +◯◯◉◉◉◯◉◉ ◉◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯ +◉◯◯◯◯◯◉◉ ◯◉◯◯◯◯◯◯ ◯◯◉◉◉◉◯◯ +◉◉◯◯◯◯◉◉ ◯◯◉◯◯◯◯◯ ◯◯◯◉◉◉◯◯ +◉◯◯◯◯◯◉◉ ◯◯◯◉◯◯◯◯ ◯◉◉◯◉◉◯◯ +◯◉◯◉◯◯◉◉ ◯◯◯◯◉◯◯◯ ◉◯◉◯◯◉◯◯ +◉◉◉◉◉◯◉◉ ◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯ +◯◉◉◉◉◯◉◉ ◯◯◯◯◯◉◯◯ ◉◯◯◯◯◯◯◯ +◯◯◉◉◉◯◉◉ ◯◉◯◯◯◉◯◯ ◉◯◯◯◯◯◯◯ +◉◯◯◯◯◯◉◉ ◯◯◉◉◉◉◯◯ ◯◉◯◯◯◯◯◯ +◉◉◯◯◯◯◉◉ ◯◯◯◉◉◉◯◯ ◯◯◉◯◯◯◯◯ +◉◯◯◯◯◯◉◉ ◯◉◉◯◉◉◯◯ ◯◯◯◉◯◯◯◯ +◯◉◯◉◯◯◉◉ ◉◯◉◯◯◉◯◯ ◯◯◯◯◉◯◯◯ +◉◉◉◉◉◯◉◉ ◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [24, 33, 51, 59, 60] +StringLegalActions() = ["a4", "b5", "d7", "d8", "e8"] + +# Apply action "e8" +action: 60 + +# State 22 +# Apply action "d8" +action: 59 + +# State 23 +# Apply action "b5" +action: 33 + +# State 24 +# Apply action "c3" +action: 18 + +# State 25 +# Apply action "d7" +action: 51 + +# State 26 +# Apply action "c8" +action: 58 + +# State 27 +# Apply action "b2" +action: 9 + +# State 28 +# Apply action "c1" +action: 2 + +# State 29 +# Apply action "b7" +action: 49 + +# State 30 +# Apply action "g8" +action: 62 + +# State 31 +# Apply action "b1" +action: 1 + +# State 32 +# Apply action "a6" +action: 40 + +# State 33 +# Apply action "g7" +action: 54 + +# State 34 +# Apply action "a8" +action: 56 + +# State 35 +# Apply action "a5" +action: 32 + +# State 36 +# Apply action "g3" +action: 22 + +# State 37 +# Apply action "b8" +action: 57 + +# State 38 +# Apply action "g5" +action: 38 + +# State 39 +# Apply action "d1" +action: 3 + +# State 40 +# Black (x) to play: +# a b c d e f g h +# 1 x o o o - - - - 1 +# 2 x o - - - o - - 2 +# 3 x o o - - o x - 3 +# 4 - o x o o x - - 4 +# 5 o o o o x x x - 5 +# 6 x o x x x x - - 6 +# 7 x o o o x o o - 7 +# 8 x o x x x x x - 8 +# a b c d e f g h +IsTerminal() = False +History() = [44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48, 61, 60, 59, 33, 18, 51, 58, 9, 2, 49, 62, 1, 40, 54, 56, 32, 22, 57, 38, 3] +HistoryString() = "44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48, 61, 60, 59, 33, 18, 51, 58, 9, 2, 49, 62, 1, 40, 54, 56, 32, 22, 57, 38, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48, 61, 60, 59, 33, 18, 51, 58, 9, 2, 49, 62, 1, 40, 54, 56, 32, 22, 57, 38, 3" +InformationStateString(1) = "44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48, 61, 60, 59, 33, 18, 51, 58, 9, 2, 49, 62, 1, 40, 54, 56, 32, 22, 57, 38, 3" +ObservationString(0) = "Black (x) to play:\n a b c d e f g h \n1 x o o o - - - - 1\n2 x o - - - o - - 2\n3 x o o - - o x - 3\n4 - o x o o x - - 4\n5 o o o o x x x - 5\n6 x o x x x x - - 6\n7 x o o o x o o - 7\n8 x o x x x x x - 8\n a b c d e f g h " +ObservationString(1) = "Black (x) to play:\n a b c d e f g h \n1 x o o o - - - - 1\n2 x o - - - o - - 2\n3 x o o - - o x - 3\n4 - o x o o x - - 4\n5 o o o o x x x - 5\n6 x o x x x x - - 6\n7 x o o o x o o - 7\n8 x o x x x x x - 8\n a b c d e f g h " +ObservationTensor(0): +◯◯◯◯◉◉◉◉ ◉◯◯◯◯◯◯◯ ◯◉◉◉◯◯◯◯ +◯◯◉◉◉◯◉◉ ◉◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯ +◯◯◯◉◉◯◯◉ ◉◯◯◯◯◯◉◯ ◯◉◉◯◯◉◯◯ +◉◯◯◯◯◯◉◉ ◯◯◉◯◯◉◯◯ ◯◉◯◉◉◯◯◯ +◯◯◯◯◯◯◯◉ ◯◯◯◯◉◉◉◯ ◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◉◉ ◉◯◉◉◉◉◯◯ ◯◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◉ ◉◯◯◯◉◯◯◯ ◯◉◉◉◯◉◉◯ +◯◯◯◯◯◯◯◉ ◉◯◉◉◉◉◉◯ ◯◉◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◉◉◉◉ ◯◉◉◉◯◯◯◯ ◉◯◯◯◯◯◯◯ +◯◯◉◉◉◯◉◉ ◯◉◯◯◯◉◯◯ ◉◯◯◯◯◯◯◯ +◯◯◯◉◉◯◯◉ ◯◉◉◯◯◉◯◯ ◉◯◯◯◯◯◉◯ +◉◯◯◯◯◯◉◉ ◯◉◯◉◉◯◯◯ ◯◯◉◯◯◉◯◯ +◯◯◯◯◯◯◯◉ ◉◉◉◉◯◯◯◯ ◯◯◯◯◉◉◉◯ +◯◯◯◯◯◯◉◉ ◯◉◯◯◯◯◯◯ ◉◯◉◉◉◉◯◯ +◯◯◯◯◯◯◯◉ ◯◉◉◉◯◉◉◯ ◉◯◯◯◉◯◯◯ +◯◯◯◯◯◯◯◉ ◯◉◯◯◯◯◯◯ ◉◯◉◉◉◉◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [4, 5, 10, 14, 19, 20, 24, 46, 47, 55, 63] +StringLegalActions() = ["e1", "f1", "c2", "g2", "d3", "e3", "a4", "g6", "h6", "h7", "h8"] + +# Apply action "g2" +action: 14 + +# State 41 +# White (o) to play: +# a b c d e f g h +# 1 x o o o - - - - 1 +# 2 x o - - - o x - 2 +# 3 x o o - - x x - 3 +# 4 - o x o x x - - 4 +# 5 o o o x x x x - 5 +# 6 x o x x x x - - 6 +# 7 x o o o x o o - 7 +# 8 x o x x x x x - 8 +# a b c d e f g h +IsTerminal() = False +History() = [44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48, 61, 60, 59, 33, 18, 51, 58, 9, 2, 49, 62, 1, 40, 54, 56, 32, 22, 57, 38, 3, 14] +HistoryString() = "44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48, 61, 60, 59, 33, 18, 51, 58, 9, 2, 49, 62, 1, 40, 54, 56, 32, 22, 57, 38, 3, 14" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48, 61, 60, 59, 33, 18, 51, 58, 9, 2, 49, 62, 1, 40, 54, 56, 32, 22, 57, 38, 3, 14" +InformationStateString(1) = "44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48, 61, 60, 59, 33, 18, 51, 58, 9, 2, 49, 62, 1, 40, 54, 56, 32, 22, 57, 38, 3, 14" +ObservationString(0) = "White (o) to play:\n a b c d e f g h \n1 x o o o - - - - 1\n2 x o - - - o x - 2\n3 x o o - - x x - 3\n4 - o x o x x - - 4\n5 o o o x x x x - 5\n6 x o x x x x - - 6\n7 x o o o x o o - 7\n8 x o x x x x x - 8\n a b c d e f g h " +ObservationString(1) = "White (o) to play:\n a b c d e f g h \n1 x o o o - - - - 1\n2 x o - - - o x - 2\n3 x o o - - x x - 3\n4 - o x o x x - - 4\n5 o o o x x x x - 5\n6 x o x x x x - - 6\n7 x o o o x o o - 7\n8 x o x x x x x - 8\n a b c d e f g h " +ObservationTensor(0): +◯◯◯◯◉◉◉◉ ◉◯◯◯◯◯◯◯ ◯◉◉◉◯◯◯◯ +◯◯◉◉◉◯◯◉ ◉◯◯◯◯◯◉◯ ◯◉◯◯◯◉◯◯ +◯◯◯◉◉◯◯◉ ◉◯◯◯◯◉◉◯ ◯◉◉◯◯◯◯◯ +◉◯◯◯◯◯◉◉ ◯◯◉◯◉◉◯◯ ◯◉◯◉◯◯◯◯ +◯◯◯◯◯◯◯◉ ◯◯◯◉◉◉◉◯ ◉◉◉◯◯◯◯◯ +◯◯◯◯◯◯◉◉ ◉◯◉◉◉◉◯◯ ◯◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◉ ◉◯◯◯◉◯◯◯ ◯◉◉◉◯◉◉◯ +◯◯◯◯◯◯◯◉ ◉◯◉◉◉◉◉◯ ◯◉◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◉◉◉◉ ◯◉◉◉◯◯◯◯ ◉◯◯◯◯◯◯◯ +◯◯◉◉◉◯◯◉ ◯◉◯◯◯◉◯◯ ◉◯◯◯◯◯◉◯ +◯◯◯◉◉◯◯◉ ◯◉◉◯◯◯◯◯ ◉◯◯◯◯◉◉◯ +◉◯◯◯◯◯◉◉ ◯◉◯◉◯◯◯◯ ◯◯◉◯◉◉◯◯ +◯◯◯◯◯◯◯◉ ◉◉◉◯◯◯◯◯ ◯◯◯◉◉◉◉◯ +◯◯◯◯◯◯◉◉ ◯◉◯◯◯◯◯◯ ◉◯◉◉◉◉◯◯ +◯◯◯◯◯◯◯◉ ◯◉◉◉◯◉◉◯ ◉◯◯◯◉◯◯◯ +◯◯◯◯◯◯◯◉ ◯◉◯◯◯◯◯◯ ◉◯◉◉◉◉◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [7, 15, 19, 30, 31, 39, 46, 63] +StringLegalActions() = ["h1", "h2", "d3", "g4", "h4", "h5", "g6", "h8"] + +# Apply action "d3" +action: 19 + +# State 42 +# Apply action "e2" +action: 12 + +# State 43 +# Apply action "h8" +action: 63 + +# State 44 +# Apply action "e3" +action: 20 + +# State 45 +# Apply action "h4" +action: 31 + +# State 46 +# Apply action "h5" +action: 39 + +# State 47 +# Apply action "a4" +action: 24 + +# State 48 +# Apply action "h3" +action: 23 + +# State 49 +# Apply action "d2" +action: 11 + +# State 50 +# Apply action "g6" +action: 46 + +# State 51 +# Apply action "g4" +action: 30 + +# State 52 +# Apply action "c2" +action: 10 + +# State 53 +# Apply action "h2" +action: 15 + +# State 54 +# Apply action "h6" +action: 47 + +# State 55 +# Apply action "f1" +action: 5 + +# State 56 +# Apply action "g1" +action: 6 + +# State 57 +# Apply action "e1" +action: 4 + +# State 58 +# Apply action "h1" +action: 7 + +# State 59 +# Apply action "h7" +action: 55 + +# State 60 +# Terminal State: +# a b c d e f g h +# 1 x o o o o o x x 1 +# 2 x x x o o o x x 2 +# 3 x o o o x o o x 3 +# 4 o o x o o x o x 4 +# 5 o o x o x o x x 5 +# 6 x x o o o x o x 6 +# 7 x o o o o o o o 7 +# 8 x o o o o o o o 8 +# a b c d e f g h +IsTerminal() = True +History() = [44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48, 61, 60, 59, 33, 18, 51, 58, 9, 2, 49, 62, 1, 40, 54, 56, 32, 22, 57, 38, 3, 14, 19, 12, 63, 20, 31, 39, 24, 23, 11, 46, 30, 10, 15, 47, 5, 6, 4, 7, 55] +HistoryString() = "44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48, 61, 60, 59, 33, 18, 51, 58, 9, 2, 49, 62, 1, 40, 54, 56, 32, 22, 57, 38, 3, 14, 19, 12, 63, 20, 31, 39, 24, 23, 11, 46, 30, 10, 15, 47, 5, 6, 4, 7, 55" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48, 61, 60, 59, 33, 18, 51, 58, 9, 2, 49, 62, 1, 40, 54, 56, 32, 22, 57, 38, 3, 14, 19, 12, 63, 20, 31, 39, 24, 23, 11, 46, 30, 10, 15, 47, 5, 6, 4, 7, 55" +InformationStateString(1) = "44, 43, 26, 29, 45, 25, 17, 52, 21, 13, 37, 42, 41, 53, 34, 8, 16, 50, 0, 48, 61, 60, 59, 33, 18, 51, 58, 9, 2, 49, 62, 1, 40, 54, 56, 32, 22, 57, 38, 3, 14, 19, 12, 63, 20, 31, 39, 24, 23, 11, 46, 30, 10, 15, 47, 5, 6, 4, 7, 55" +ObservationString(0) = "Terminal State:\n a b c d e f g h \n1 x o o o o o x x 1\n2 x x x o o o x x 2\n3 x o o o x o o x 3\n4 o o x o o x o x 4\n5 o o x o x o x x 5\n6 x x o o o x o x 6\n7 x o o o o o o o 7\n8 x o o o o o o o 8\n a b c d e f g h " +ObservationString(1) = "Terminal State:\n a b c d e f g h \n1 x o o o o o x x 1\n2 x x x o o o x x 2\n3 x o o o x o o x 3\n4 o o x o o x o x 4\n5 o o x o x o x x 5\n6 x x o o o x o x 6\n7 x o o o o o o o 7\n8 x o o o o o o o 8\n a b c d e f g h " +ObservationTensor(0): +◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉◉ ◯◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉ ◯◯◯◉◉◉◯◯ +◯◯◯◯◯◯◯◯ ◉◯◯◯◉◯◯◉ ◯◉◉◉◯◉◉◯ +◯◯◯◯◯◯◯◯ ◯◯◉◯◯◉◯◉ ◉◉◯◉◉◯◉◯ +◯◯◯◯◯◯◯◯ ◯◯◉◯◉◯◉◉ ◉◉◯◉◯◉◯◯ +◯◯◯◯◯◯◯◯ ◉◉◯◯◯◉◯◉ ◯◯◉◉◉◯◉◯ +◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯ ◉◯◯◯◯◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◯◉◉◉◯◯ ◉◉◉◯◯◯◉◉ +◯◯◯◯◯◯◯◯ ◯◉◉◉◯◉◉◯ ◉◯◯◯◉◯◯◉ +◯◯◯◯◯◯◯◯ ◉◉◯◉◉◯◉◯ ◯◯◉◯◯◉◯◉ +◯◯◯◯◯◯◯◯ ◉◉◯◉◯◉◯◯ ◯◯◉◯◉◯◉◉ +◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◯ ◉◉◯◯◯◉◯◉ +◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉◉ ◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉◉ ◉◯◯◯◯◯◯◯ +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/oware.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/oware.txt new file mode 100644 index 0000000..574027a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/oware.txt @@ -0,0 +1,828 @@ +game: oware + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Oware" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["num_houses_per_player", "num_seeds_per_house"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "oware" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 6 +PolicyTensorShape() = [6] +MaxChanceOutcomes() = 0 +GetParameters() = {num_houses_per_player=6,num_seeds_per_house=4} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [14] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 14 +MaxGameLength() = 1000 +ToString() = "oware()" + +# State 0 +# Player 1 score = 0 +# f e d c b a +# 4 4 4 4 4 4 +# 4 4 4 4 4 4 +# A B C D E F +# Player 0 score = 0 [PLAYING] +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "0 | 0 0 | 4 4 4 4 4 4 4 4 4 4 4 4" +ObservationString(1) = "0 | 0 0 | 4 4 4 4 4 4 4 4 4 4 4 4" +ObservationTensor(0) = [0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.0, 0.0] +ObservationTensor(1) = [0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["A", "B", "C", "D", "E", "F"] + +# Apply action "C" +action: 2 + +# State 1 +# Player 1 score = 0 [PLAYING] +# f e d c b a +# 4 4 4 4 4 5 +# 4 4 0 5 5 5 +# A B C D E F +# Player 0 score = 0 +IsTerminal() = False +History() = [2] +HistoryString() = "2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1 | 0 0 | 4 4 0 5 5 5 5 4 4 4 4 4" +ObservationString(1) = "1 | 0 0 | 4 4 0 5 5 5 5 4 4 4 4 4" +ObservationTensor(0) = [0.08333, 0.08333, 0.0, 0.10417, 0.10417, 0.10417, 0.10417, 0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.0, 0.0] +ObservationTensor(1) = [0.08333, 0.08333, 0.0, 0.10417, 0.10417, 0.10417, 0.10417, 0.08333, 0.08333, 0.08333, 0.08333, 0.08333, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["a", "b", "c", "d", "e", "f"] + +# Apply action "f" +action: 5 + +# State 2 +# Player 1 score = 0 +# f e d c b a +# 0 4 4 4 4 5 +# 5 5 1 6 5 5 +# A B C D E F +# Player 0 score = 0 [PLAYING] +IsTerminal() = False +History() = [2, 5] +HistoryString() = "2, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "0 | 0 0 | 5 5 1 6 5 5 5 4 4 4 4 0" +ObservationString(1) = "0 | 0 0 | 5 5 1 6 5 5 5 4 4 4 4 0" +ObservationTensor(0) = [0.10417, 0.10417, 0.02083, 0.125, 0.10417, 0.10417, 0.10417, 0.08333, 0.08333, 0.08333, 0.08333, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.10417, 0.10417, 0.02083, 0.125, 0.10417, 0.10417, 0.10417, 0.08333, 0.08333, 0.08333, 0.08333, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["A", "B", "C", "D", "E", "F"] + +# Apply action "C" +action: 2 + +# State 3 +# Player 1 score = 0 [PLAYING] +# f e d c b a +# 0 4 4 4 4 5 +# 5 5 0 7 5 5 +# A B C D E F +# Player 0 score = 0 +IsTerminal() = False +History() = [2, 5, 2] +HistoryString() = "2, 5, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1 | 0 0 | 5 5 0 7 5 5 5 4 4 4 4 0" +ObservationString(1) = "1 | 0 0 | 5 5 0 7 5 5 5 4 4 4 4 0" +ObservationTensor(0) = [0.10417, 0.10417, 0.0, 0.14583, 0.10417, 0.10417, 0.10417, 0.08333, 0.08333, 0.08333, 0.08333, 0.0, 0.0, 0.0] +ObservationTensor(1) = [0.10417, 0.10417, 0.0, 0.14583, 0.10417, 0.10417, 0.10417, 0.08333, 0.08333, 0.08333, 0.08333, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["a", "b", "c", "d", "e"] + +# Apply action "b" +action: 1 + +# State 4 +# Player 1 score = 0 +# f e d c b a +# 1 5 5 5 0 5 +# 5 5 0 7 5 5 +# A B C D E F +# Player 0 score = 0 [PLAYING] +IsTerminal() = False +History() = [2, 5, 2, 1] +HistoryString() = "2, 5, 2, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "0 | 0 0 | 5 5 0 7 5 5 5 0 5 5 5 1" +ObservationString(1) = "0 | 0 0 | 5 5 0 7 5 5 5 0 5 5 5 1" +ObservationTensor(0) = [0.10417, 0.10417, 0.0, 0.14583, 0.10417, 0.10417, 0.10417, 0.0, 0.10417, 0.10417, 0.10417, 0.02083, 0.0, 0.0] +ObservationTensor(1) = [0.10417, 0.10417, 0.0, 0.14583, 0.10417, 0.10417, 0.10417, 0.0, 0.10417, 0.10417, 0.10417, 0.02083, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 3, 4, 5] +StringLegalActions() = ["A", "B", "D", "E", "F"] + +# Apply action "B" +action: 1 + +# State 5 +# Player 1 score = 0 [PLAYING] +# f e d c b a +# 1 5 5 5 0 6 +# 5 0 1 8 6 6 +# A B C D E F +# Player 0 score = 0 +IsTerminal() = False +History() = [2, 5, 2, 1, 1] +HistoryString() = "2, 5, 2, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1 | 0 0 | 5 0 1 8 6 6 6 0 5 5 5 1" +ObservationString(1) = "1 | 0 0 | 5 0 1 8 6 6 6 0 5 5 5 1" +ObservationTensor(0) = [0.10417, 0.0, 0.02083, 0.16667, 0.125, 0.125, 0.125, 0.0, 0.10417, 0.10417, 0.10417, 0.02083, 0.0, 0.0] +ObservationTensor(1) = [0.10417, 0.0, 0.02083, 0.16667, 0.125, 0.125, 0.125, 0.0, 0.10417, 0.10417, 0.10417, 0.02083, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 4, 5] +StringLegalActions() = ["a", "c", "d", "e", "f"] + +# Apply action "c" +action: 2 + +# State 6 +# Apply action "C" +action: 2 + +# State 7 +# Apply action "d" +action: 3 + +# State 8 +# Apply action "F" +action: 5 + +# State 9 +# Apply action "e" +action: 4 + +# State 10 +# Apply action "C" +action: 2 + +# State 11 +# Apply action "d" +action: 3 + +# State 12 +# Apply action "D" +action: 3 + +# State 13 +# Apply action "e" +action: 4 + +# State 14 +# Apply action "A" +action: 0 + +# State 15 +# Apply action "b" +action: 1 + +# State 16 +# Apply action "D" +action: 3 + +# State 17 +# Apply action "a" +action: 0 + +# State 18 +# Apply action "C" +action: 2 + +# State 19 +# Apply action "c" +action: 2 + +# State 20 +# Player 1 score = 0 +# f e d c b a +# 9 4 5 0 1 0 +# 2 7 0 2 14 4 +# A B C D E F +# Player 0 score = 0 [PLAYING] +IsTerminal() = False +History() = [2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2] +HistoryString() = "2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "0 | 0 0 | 2 7 0 2 14 4 0 1 0 5 4 9" +ObservationString(1) = "0 | 0 0 | 2 7 0 2 14 4 0 1 0 5 4 9" +ObservationTensor(0) = [0.04167, 0.14583, 0.0, 0.04167, 0.29167, 0.08333, 0.0, 0.02083, 0.0, 0.10417, 0.08333, 0.1875, 0.0, 0.0] +ObservationTensor(1) = [0.04167, 0.14583, 0.0, 0.04167, 0.29167, 0.08333, 0.0, 0.02083, 0.0, 0.10417, 0.08333, 0.1875, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 3, 4, 5] +StringLegalActions() = ["A", "B", "D", "E", "F"] + +# Apply action "E" +action: 4 + +# State 21 +# Player 1 score = 0 [PLAYING] +# f e d c b a +# 10 5 6 1 0 0 +# 3 8 1 3 0 6 +# A B C D E F +# Player 0 score = 5 +IsTerminal() = False +History() = [2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4] +HistoryString() = "2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1 | 5 0 | 3 8 1 3 0 6 0 0 1 6 5 10" +ObservationString(1) = "1 | 5 0 | 3 8 1 3 0 6 0 0 1 6 5 10" +ObservationTensor(0) = [0.0625, 0.16667, 0.02083, 0.0625, 0.0, 0.125, 0.0, 0.0, 0.02083, 0.125, 0.10417, 0.20833, 0.10417, 0.0] +ObservationTensor(1) = [0.0625, 0.16667, 0.02083, 0.0625, 0.0, 0.125, 0.0, 0.0, 0.02083, 0.125, 0.10417, 0.20833, 0.10417, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2, 3, 4, 5] +StringLegalActions() = ["c", "d", "e", "f"] + +# Apply action "c" +action: 2 + +# State 22 +# Apply action "C" +action: 2 + +# State 23 +# Apply action "e" +action: 4 + +# State 24 +# Apply action "D" +action: 3 + +# State 25 +# Apply action "d" +action: 3 + +# State 26 +# Apply action "A" +action: 0 + +# State 27 +# Apply action "e" +action: 4 + +# State 28 +# Apply action "F" +action: 5 + +# State 29 +# Apply action "b" +action: 1 + +# State 30 +# Apply action "A" +action: 0 + +# State 31 +# Apply action "e" +action: 4 + +# State 32 +# Apply action "E" +action: 4 + +# State 33 +# Apply action "a" +action: 0 + +# State 34 +# Apply action "F" +action: 5 + +# State 35 +# Apply action "b" +action: 1 + +# State 36 +# Apply action "B" +action: 1 + +# State 37 +# Apply action "e" +action: 4 + +# State 38 +# Apply action "A" +action: 0 + +# State 39 +# Apply action "d" +action: 3 + +# State 40 +# Player 1 score = 2 +# f e d c b a +# 18 1 0 6 1 2 +# 1 1 5 4 1 1 +# A B C D E F +# Player 0 score = 5 [PLAYING] +IsTerminal() = False +History() = [2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3] +HistoryString() = "2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "0 | 5 2 | 1 1 5 4 1 1 2 1 6 0 1 18" +ObservationString(1) = "0 | 5 2 | 1 1 5 4 1 1 2 1 6 0 1 18" +ObservationTensor(0) = [0.02083, 0.02083, 0.10417, 0.08333, 0.02083, 0.02083, 0.04167, 0.02083, 0.125, 0.0, 0.02083, 0.375, 0.10417, 0.04167] +ObservationTensor(1) = [0.02083, 0.02083, 0.10417, 0.08333, 0.02083, 0.02083, 0.04167, 0.02083, 0.125, 0.0, 0.02083, 0.375, 0.10417, 0.04167] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["A", "B", "C", "D", "E", "F"] + +# Apply action "D" +action: 3 + +# State 41 +# Player 1 score = 2 [PLAYING] +# f e d c b a +# 18 1 0 6 0 0 +# 1 1 5 0 2 2 +# A B C D E F +# Player 0 score = 10 +IsTerminal() = False +History() = [2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3, 3] +HistoryString() = "2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1 | 10 2 | 1 1 5 0 2 2 0 0 6 0 1 18" +ObservationString(1) = "1 | 10 2 | 1 1 5 0 2 2 0 0 6 0 1 18" +ObservationTensor(0) = [0.02083, 0.02083, 0.10417, 0.0, 0.04167, 0.04167, 0.0, 0.0, 0.125, 0.0, 0.02083, 0.375, 0.20833, 0.04167] +ObservationTensor(1) = [0.02083, 0.02083, 0.10417, 0.0, 0.04167, 0.04167, 0.0, 0.0, 0.125, 0.0, 0.02083, 0.375, 0.20833, 0.04167] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2, 4, 5] +StringLegalActions() = ["c", "e", "f"] + +# Apply action "e" +action: 4 + +# State 42 +# Apply action "B" +action: 1 + +# State 43 +# Apply action "f" +action: 5 + +# State 44 +# Apply action "D" +action: 3 + +# State 45 +# Apply action "c" +action: 2 + +# State 46 +# Apply action "A" +action: 0 + +# State 47 +# Apply action "d" +action: 3 + +# State 48 +# Apply action "F" +action: 5 + +# State 49 +# Apply action "f" +action: 5 + +# State 50 +# Apply action "B" +action: 1 + +# State 51 +# Apply action "b" +action: 1 + +# State 52 +# Apply action "D" +action: 3 + +# State 53 +# Apply action "c" +action: 2 + +# State 54 +# Apply action "C" +action: 2 + +# State 55 +# Apply action "d" +action: 3 + +# State 56 +# Apply action "F" +action: 5 + +# State 57 +# Apply action "f" +action: 5 + +# State 58 +# Apply action "E" +action: 4 + +# State 59 +# Apply action "f" +action: 5 + +# State 60 +# Player 1 score = 10 +# f e d c b a +# 0 9 1 1 1 8 +# 0 2 0 1 0 1 +# A B C D E F +# Player 0 score = 14 [PLAYING] +IsTerminal() = False +History() = [2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3, 3, 4, 1, 5, 3, 2, 0, 3, 5, 5, 1, 1, 3, 2, 2, 3, 5, 5, 4, 5] +HistoryString() = "2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3, 3, 4, 1, 5, 3, 2, 0, 3, 5, 5, 1, 1, 3, 2, 2, 3, 5, 5, 4, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "0 | 14 10 | 0 2 0 1 0 1 8 1 1 1 9 0" +ObservationString(1) = "0 | 14 10 | 0 2 0 1 0 1 8 1 1 1 9 0" +ObservationTensor(0) = [0.0, 0.04167, 0.0, 0.02083, 0.0, 0.02083, 0.16667, 0.02083, 0.02083, 0.02083, 0.1875, 0.0, 0.29167, 0.20833] +ObservationTensor(1) = [0.0, 0.04167, 0.0, 0.02083, 0.0, 0.02083, 0.16667, 0.02083, 0.02083, 0.02083, 0.1875, 0.0, 0.29167, 0.20833] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 3, 5] +StringLegalActions() = ["B", "D", "F"] + +# Apply action "D" +action: 3 + +# State 61 +# Player 1 score = 10 [PLAYING] +# f e d c b a +# 0 9 1 1 1 8 +# 0 2 0 0 1 1 +# A B C D E F +# Player 0 score = 14 +IsTerminal() = False +History() = [2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3, 3, 4, 1, 5, 3, 2, 0, 3, 5, 5, 1, 1, 3, 2, 2, 3, 5, 5, 4, 5, 3] +HistoryString() = "2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3, 3, 4, 1, 5, 3, 2, 0, 3, 5, 5, 1, 1, 3, 2, 2, 3, 5, 5, 4, 5, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1 | 14 10 | 0 2 0 0 1 1 8 1 1 1 9 0" +ObservationString(1) = "1 | 14 10 | 0 2 0 0 1 1 8 1 1 1 9 0" +ObservationTensor(0) = [0.0, 0.04167, 0.0, 0.0, 0.02083, 0.02083, 0.16667, 0.02083, 0.02083, 0.02083, 0.1875, 0.0, 0.29167, 0.20833] +ObservationTensor(1) = [0.0, 0.04167, 0.0, 0.0, 0.02083, 0.02083, 0.16667, 0.02083, 0.02083, 0.02083, 0.1875, 0.0, 0.29167, 0.20833] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["a", "b", "c", "d", "e"] + +# Apply action "d" +action: 3 + +# State 62 +# Apply action "B" +action: 1 + +# State 63 +# Apply action "e" +action: 4 + +# State 64 +# Apply action "D" +action: 3 + +# State 65 +# Apply action "b" +action: 1 + +# State 66 +# Apply action "F" +action: 5 + +# State 67 +# Apply action "b" +action: 1 + +# State 68 +# Apply action "E" +action: 4 + +# State 69 +# Apply action "d" +action: 3 + +# State 70 +# Apply action "B" +action: 1 + +# State 71 +# Apply action "f" +action: 5 + +# State 72 +# Apply action "C" +action: 2 + +# State 73 +# Apply action "b" +action: 1 + +# State 74 +# Apply action "F" +action: 5 + +# State 75 +# Apply action "b" +action: 1 + +# State 76 +# Apply action "D" +action: 3 + +# State 77 +# Apply action "c" +action: 2 + +# State 78 +# Apply action "C" +action: 2 + +# State 79 +# Apply action "f" +action: 5 + +# State 80 +# Player 1 score = 14 +# f e d c b a +# 0 2 1 0 0 12 +# 0 1 0 2 2 0 +# A B C D E F +# Player 0 score = 14 [PLAYING] +IsTerminal() = False +History() = [2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3, 3, 4, 1, 5, 3, 2, 0, 3, 5, 5, 1, 1, 3, 2, 2, 3, 5, 5, 4, 5, 3, 3, 1, 4, 3, 1, 5, 1, 4, 3, 1, 5, 2, 1, 5, 1, 3, 2, 2, 5] +HistoryString() = "2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3, 3, 4, 1, 5, 3, 2, 0, 3, 5, 5, 1, 1, 3, 2, 2, 3, 5, 5, 4, 5, 3, 3, 1, 4, 3, 1, 5, 1, 4, 3, 1, 5, 2, 1, 5, 1, 3, 2, 2, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "0 | 14 14 | 0 1 0 2 2 0 12 0 0 1 2 0" +ObservationString(1) = "0 | 14 14 | 0 1 0 2 2 0 12 0 0 1 2 0" +ObservationTensor(0) = [0.0, 0.02083, 0.0, 0.04167, 0.04167, 0.0, 0.25, 0.0, 0.0, 0.02083, 0.04167, 0.0, 0.29167, 0.29167] +ObservationTensor(1) = [0.0, 0.02083, 0.0, 0.04167, 0.04167, 0.0, 0.25, 0.0, 0.0, 0.02083, 0.04167, 0.0, 0.29167, 0.29167] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 3, 4] +StringLegalActions() = ["B", "D", "E"] + +# Apply action "E" +action: 4 + +# State 81 +# Player 1 score = 14 [PLAYING] +# f e d c b a +# 0 2 1 0 0 13 +# 0 1 0 2 0 1 +# A B C D E F +# Player 0 score = 14 +IsTerminal() = False +History() = [2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3, 3, 4, 1, 5, 3, 2, 0, 3, 5, 5, 1, 1, 3, 2, 2, 3, 5, 5, 4, 5, 3, 3, 1, 4, 3, 1, 5, 1, 4, 3, 1, 5, 2, 1, 5, 1, 3, 2, 2, 5, 4] +HistoryString() = "2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3, 3, 4, 1, 5, 3, 2, 0, 3, 5, 5, 1, 1, 3, 2, 2, 3, 5, 5, 4, 5, 3, 3, 1, 4, 3, 1, 5, 1, 4, 3, 1, 5, 2, 1, 5, 1, 3, 2, 2, 5, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1 | 14 14 | 0 1 0 2 0 1 13 0 0 1 2 0" +ObservationString(1) = "1 | 14 14 | 0 1 0 2 0 1 13 0 0 1 2 0" +ObservationTensor(0) = [0.0, 0.02083, 0.0, 0.04167, 0.0, 0.02083, 0.27083, 0.0, 0.0, 0.02083, 0.04167, 0.0, 0.29167, 0.29167] +ObservationTensor(1) = [0.0, 0.02083, 0.0, 0.04167, 0.0, 0.02083, 0.27083, 0.0, 0.0, 0.02083, 0.04167, 0.0, 0.29167, 0.29167] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 3, 4] +StringLegalActions() = ["a", "d", "e"] + +# Apply action "e" +action: 4 + +# State 82 +# Apply action "D" +action: 3 + +# State 83 +# Apply action "d" +action: 3 + +# State 84 +# Apply action "B" +action: 1 + +# State 85 +# Apply action "a" +action: 0 + +# State 86 +# Apply action "E" +action: 4 + +# State 87 +# Apply action "a" +action: 0 + +# State 88 +# Apply action "C" +action: 2 + +# State 89 +# Apply action "b" +action: 1 + +# State 90 +# Apply action "F" +action: 5 + +# State 91 +# Apply action "a" +action: 0 + +# State 92 +# Apply action "D" +action: 3 + +# State 93 +# Apply action "e" +action: 4 + +# State 94 +# Apply action "F" +action: 5 + +# State 95 +# Apply action "f" +action: 5 + +# State 96 +# Apply action "B" +action: 1 + +# State 97 +# Apply action "b" +action: 1 + +# State 98 +# Apply action "A" +action: 0 + +# State 99 +# Apply action "a" +action: 0 + +# State 100 +# Player 1 score = 19 +# f e d c b a +# 0 0 1 5 1 0 +# 0 1 2 0 2 0 +# A B C D E F +# Player 0 score = 17 [PLAYING] +IsTerminal() = False +History() = [2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3, 3, 4, 1, 5, 3, 2, 0, 3, 5, 5, 1, 1, 3, 2, 2, 3, 5, 5, 4, 5, 3, 3, 1, 4, 3, 1, 5, 1, 4, 3, 1, 5, 2, 1, 5, 1, 3, 2, 2, 5, 4, 4, 3, 3, 1, 0, 4, 0, 2, 1, 5, 0, 3, 4, 5, 5, 1, 1, 0, 0] +HistoryString() = "2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3, 3, 4, 1, 5, 3, 2, 0, 3, 5, 5, 1, 1, 3, 2, 2, 3, 5, 5, 4, 5, 3, 3, 1, 4, 3, 1, 5, 1, 4, 3, 1, 5, 2, 1, 5, 1, 3, 2, 2, 5, 4, 4, 3, 3, 1, 0, 4, 0, 2, 1, 5, 0, 3, 4, 5, 5, 1, 1, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "0 | 17 19 | 0 1 2 0 2 0 0 1 5 1 0 0" +ObservationString(1) = "0 | 17 19 | 0 1 2 0 2 0 0 1 5 1 0 0" +ObservationTensor(0) = [0.0, 0.02083, 0.04167, 0.0, 0.04167, 0.0, 0.0, 0.02083, 0.10417, 0.02083, 0.0, 0.0, 0.35417, 0.39583] +ObservationTensor(1) = [0.0, 0.02083, 0.04167, 0.0, 0.04167, 0.0, 0.0, 0.02083, 0.10417, 0.02083, 0.0, 0.0, 0.35417, 0.39583] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 4] +StringLegalActions() = ["B", "C", "E"] + +# Apply action "E" +action: 4 + +# State 101 +# Player 1 score = 19 [PLAYING] +# f e d c b a +# 0 0 1 5 1 1 +# 0 1 2 0 0 1 +# A B C D E F +# Player 0 score = 17 +IsTerminal() = False +History() = [2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3, 3, 4, 1, 5, 3, 2, 0, 3, 5, 5, 1, 1, 3, 2, 2, 3, 5, 5, 4, 5, 3, 3, 1, 4, 3, 1, 5, 1, 4, 3, 1, 5, 2, 1, 5, 1, 3, 2, 2, 5, 4, 4, 3, 3, 1, 0, 4, 0, 2, 1, 5, 0, 3, 4, 5, 5, 1, 1, 0, 0, 4] +HistoryString() = "2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3, 3, 4, 1, 5, 3, 2, 0, 3, 5, 5, 1, 1, 3, 2, 2, 3, 5, 5, 4, 5, 3, 3, 1, 4, 3, 1, 5, 1, 4, 3, 1, 5, 2, 1, 5, 1, 3, 2, 2, 5, 4, 4, 3, 3, 1, 0, 4, 0, 2, 1, 5, 0, 3, 4, 5, 5, 1, 1, 0, 0, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1 | 17 19 | 0 1 2 0 0 1 1 1 5 1 0 0" +ObservationString(1) = "1 | 17 19 | 0 1 2 0 0 1 1 1 5 1 0 0" +ObservationTensor(0) = [0.0, 0.02083, 0.04167, 0.0, 0.0, 0.02083, 0.02083, 0.02083, 0.10417, 0.02083, 0.0, 0.0, 0.35417, 0.39583] +ObservationTensor(1) = [0.0, 0.02083, 0.04167, 0.0, 0.0, 0.02083, 0.02083, 0.02083, 0.10417, 0.02083, 0.0, 0.0, 0.35417, 0.39583] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["a", "b", "c", "d"] + +# Apply action "c" +action: 2 + +# State 102 +# Apply action "C" +action: 2 + +# State 103 +# Apply action "e" +action: 4 + +# State 104 +# Apply action "A" +action: 0 + +# State 105 +# Apply action "f" +action: 5 + +# State 106 +# Apply action "D" +action: 3 + +# State 107 +# Apply action "d" +action: 3 + +# State 108 +# Apply action "F" +action: 5 + +# State 109 +# Apply action "f" +action: 5 + +# State 110 +# [FINISHED] +# Player 1 score = 27 +# f e d c b a +# 0 0 0 0 0 0 +# 0 0 0 0 0 0 +# A B C D E F +# Player 0 score = 21 +IsTerminal() = True +History() = [2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3, 3, 4, 1, 5, 3, 2, 0, 3, 5, 5, 1, 1, 3, 2, 2, 3, 5, 5, 4, 5, 3, 3, 1, 4, 3, 1, 5, 1, 4, 3, 1, 5, 2, 1, 5, 1, 3, 2, 2, 5, 4, 4, 3, 3, 1, 0, 4, 0, 2, 1, 5, 0, 3, 4, 5, 5, 1, 1, 0, 0, 4, 2, 2, 4, 0, 5, 3, 3, 5, 5] +HistoryString() = "2, 5, 2, 1, 1, 2, 2, 3, 5, 4, 2, 3, 3, 4, 0, 1, 3, 0, 2, 2, 4, 2, 2, 4, 3, 3, 0, 4, 5, 1, 0, 4, 4, 0, 5, 1, 1, 4, 0, 3, 3, 4, 1, 5, 3, 2, 0, 3, 5, 5, 1, 1, 3, 2, 2, 3, 5, 5, 4, 5, 3, 3, 1, 4, 3, 1, 5, 1, 4, 3, 1, 5, 2, 1, 5, 1, 3, 2, 2, 5, 4, 4, 3, 3, 1, 0, 4, 0, 2, 1, 5, 0, 3, 4, 5, 5, 1, 1, 0, 0, 4, 2, 2, 4, 0, 5, 3, 3, 5, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "0 | 21 27 | 0 0 0 0 0 0 0 0 0 0 0 0" +ObservationString(1) = "0 | 21 27 | 0 0 0 0 0 0 0 0 0 0 0 0" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4375, 0.5625] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4375, 0.5625] +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/pathfinding.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/pathfinding.txt new file mode 100644 index 0000000..52700a9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/pathfinding.txt @@ -0,0 +1,2490 @@ +game: pathfinding + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Pathfinding" +GameType.max_num_players = 10 +GameType.min_num_players = 1 +GameType.parameter_specification = ["grid", "group_reward", "horizon", "players", "solve_reward", "step_reward"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "pathfinding" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 5 +PolicyTensorShape() = [5] +MaxChanceOutcomes() = 1 +GetParameters() = {grid=A.*..**\n..*....\n....*a.\n,group_reward=100.0,horizon=1000,players=1,solve_reward=100.0,step_reward=-0.01} +NumPlayers() = 1 +MinUtility() = -10.0 +MaxUtility() = 200.0 +UtilitySum() = None +ObservationTensorShape() = [5, 3, 7] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 105 +MaxGameLength() = 1000 +ToString() = "pathfinding()" + +# State 0 +# ..*..** +# ..*.... +# ....*0. +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n....*0.\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◉◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◯◉ +Rewards() = [0] +Returns() = [0] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Stay"] +actions: [0] + +# State 1 +# ..*..** +# ..*.... +# ....*0. +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n....*0.\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◉◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◯◉ +Rewards() = [-0.01] +Returns() = [-0.01] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Down"] +actions: [4] + +# State 2 +# ..*..** +# ..*.... +# ....*0. +IsTerminal() = False +History() = [0, 4] +HistoryString() = "0, 4" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n....*0.\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◉◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◯◉ +Rewards() = [-0.01] +Returns() = [-0.02] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Stay"] +actions: [0] + +# State 3 +# Apply joint action ["Right"] +actions: [3] + +# State 4 +# Apply joint action ["Down"] +actions: [4] + +# State 5 +# Apply joint action ["Left"] +actions: [1] + +# State 6 +# Apply joint action ["Up"] +actions: [2] + +# State 7 +# Apply joint action ["Stay"] +actions: [0] + +# State 8 +# Apply joint action ["Up"] +actions: [2] + +# State 9 +# Apply joint action ["Up"] +actions: [2] + +# State 10 +# ..*..** +# ..*..0. +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*..0.\n....*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◯◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-0.1] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Stay"] +actions: [0] + +# State 11 +# Apply joint action ["Stay"] +actions: [0] + +# State 12 +# Apply joint action ["Left"] +actions: [1] + +# State 13 +# Apply joint action ["Left"] +actions: [1] + +# State 14 +# Apply joint action ["Stay"] +actions: [0] + +# State 15 +# Apply joint action ["Down"] +actions: [4] + +# State 16 +# Apply joint action ["Stay"] +actions: [0] + +# State 17 +# Apply joint action ["Down"] +actions: [4] + +# State 18 +# Apply joint action ["Left"] +actions: [1] + +# State 19 +# Apply joint action ["Left"] +actions: [1] + +# State 20 +# ..*..** +# ..*.... +# .0..*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n.0..*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◉◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◯◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-0.2] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Down"] +actions: [4] + +# State 21 +# Apply joint action ["Right"] +actions: [3] + +# State 22 +# Apply joint action ["Down"] +actions: [4] + +# State 23 +# Apply joint action ["Stay"] +actions: [0] + +# State 24 +# Apply joint action ["Stay"] +actions: [0] + +# State 25 +# Apply joint action ["Up"] +actions: [2] + +# State 26 +# Apply joint action ["Right"] +actions: [3] + +# State 27 +# Apply joint action ["Up"] +actions: [2] + +# State 28 +# Apply joint action ["Stay"] +actions: [0] + +# State 29 +# Apply joint action ["Right"] +actions: [3] + +# State 30 +# ..*..** +# ..*.0.. +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*.0..\n....*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-0.3] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Left"] +actions: [1] + +# State 31 +# Apply joint action ["Stay"] +actions: [0] + +# State 32 +# Apply joint action ["Stay"] +actions: [0] + +# State 33 +# Apply joint action ["Right"] +actions: [3] + +# State 34 +# Apply joint action ["Down"] +actions: [4] + +# State 35 +# Apply joint action ["Left"] +actions: [1] + +# State 36 +# Apply joint action ["Stay"] +actions: [0] + +# State 37 +# Apply joint action ["Up"] +actions: [2] + +# State 38 +# Apply joint action ["Stay"] +actions: [0] + +# State 39 +# Apply joint action ["Stay"] +actions: [0] + +# State 40 +# ..*0.** +# ..*.... +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*0.**\n..*....\n....*..\n" +ObservationTensor(0): +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-0.4] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Right"] +actions: [3] + +# State 41 +# Apply joint action ["Stay"] +actions: [0] + +# State 42 +# Apply joint action ["Up"] +actions: [2] + +# State 43 +# Apply joint action ["Stay"] +actions: [0] + +# State 44 +# Apply joint action ["Down"] +actions: [4] + +# State 45 +# Apply joint action ["Down"] +actions: [4] + +# State 46 +# Apply joint action ["Stay"] +actions: [0] + +# State 47 +# Apply joint action ["Down"] +actions: [4] + +# State 48 +# Apply joint action ["Down"] +actions: [4] + +# State 49 +# Apply joint action ["Up"] +actions: [2] + +# State 50 +# ..*.0** +# ..*.... +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*.0**\n..*....\n....*..\n" +ObservationTensor(0): +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-0.5] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Right"] +actions: [3] + +# State 51 +# Apply joint action ["Left"] +actions: [1] + +# State 52 +# Apply joint action ["Up"] +actions: [2] + +# State 53 +# Apply joint action ["Left"] +actions: [1] + +# State 54 +# Apply joint action ["Stay"] +actions: [0] + +# State 55 +# Apply joint action ["Stay"] +actions: [0] + +# State 56 +# Apply joint action ["Right"] +actions: [3] + +# State 57 +# Apply joint action ["Up"] +actions: [2] + +# State 58 +# Apply joint action ["Right"] +actions: [3] + +# State 59 +# Apply joint action ["Down"] +actions: [4] + +# State 60 +# ..*..** +# ..*.0.. +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*.0..\n....*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-0.6] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Up"] +actions: [2] + +# State 61 +# Apply joint action ["Left"] +actions: [1] + +# State 62 +# Apply joint action ["Left"] +actions: [1] + +# State 63 +# Apply joint action ["Down"] +actions: [4] + +# State 64 +# Apply joint action ["Down"] +actions: [4] + +# State 65 +# Apply joint action ["Stay"] +actions: [0] + +# State 66 +# Apply joint action ["Up"] +actions: [2] + +# State 67 +# Apply joint action ["Stay"] +actions: [0] + +# State 68 +# Apply joint action ["Left"] +actions: [1] + +# State 69 +# Apply joint action ["Up"] +actions: [2] + +# State 70 +# ..*0.** +# ..*.... +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*0.**\n..*....\n....*..\n" +ObservationTensor(0): +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-0.7] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Stay"] +actions: [0] + +# State 71 +# Apply joint action ["Right"] +actions: [3] + +# State 72 +# Apply joint action ["Down"] +actions: [4] + +# State 73 +# Apply joint action ["Stay"] +actions: [0] + +# State 74 +# Apply joint action ["Left"] +actions: [1] + +# State 75 +# Apply joint action ["Right"] +actions: [3] + +# State 76 +# Apply joint action ["Stay"] +actions: [0] + +# State 77 +# Apply joint action ["Left"] +actions: [1] + +# State 78 +# Apply joint action ["Down"] +actions: [4] + +# State 79 +# Apply joint action ["Up"] +actions: [2] + +# State 80 +# ..*..** +# ..*0... +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*0...\n....*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◯◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-0.8] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Stay"] +actions: [0] + +# State 81 +# Apply joint action ["Left"] +actions: [1] + +# State 82 +# Apply joint action ["Stay"] +actions: [0] + +# State 83 +# Apply joint action ["Right"] +actions: [3] + +# State 84 +# Apply joint action ["Stay"] +actions: [0] + +# State 85 +# Apply joint action ["Right"] +actions: [3] + +# State 86 +# Apply joint action ["Down"] +actions: [4] + +# State 87 +# Apply joint action ["Down"] +actions: [4] + +# State 88 +# Apply joint action ["Stay"] +actions: [0] + +# State 89 +# Apply joint action ["Right"] +actions: [3] + +# State 90 +# ..*..** +# ..*.... +# ....*.0 +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n....*.0\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◯ +Rewards() = [-0.01] +Returns() = [-0.9] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Stay"] +actions: [0] + +# State 91 +# Apply joint action ["Left"] +actions: [1] + +# State 92 +# Apply joint action ["Left"] +actions: [1] + +# State 93 +# Apply joint action ["Down"] +actions: [4] + +# State 94 +# Apply joint action ["Up"] +actions: [2] + +# State 95 +# Apply joint action ["Down"] +actions: [4] + +# State 96 +# Apply joint action ["Stay"] +actions: [0] + +# State 97 +# Apply joint action ["Right"] +actions: [3] + +# State 98 +# Apply joint action ["Down"] +actions: [4] + +# State 99 +# Apply joint action ["Up"] +actions: [2] + +# State 100 +# ..*..** +# ..*...0 +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*...0\n....*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-1] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Down"] +actions: [4] + +# State 101 +# Apply joint action ["Stay"] +actions: [0] + +# State 102 +# Apply joint action ["Stay"] +actions: [0] + +# State 103 +# Apply joint action ["Right"] +actions: [3] + +# State 104 +# Apply joint action ["Right"] +actions: [3] + +# State 105 +# Apply joint action ["Down"] +actions: [4] + +# State 106 +# Apply joint action ["Down"] +actions: [4] + +# State 107 +# Apply joint action ["Up"] +actions: [2] + +# State 108 +# Apply joint action ["Right"] +actions: [3] + +# State 109 +# Apply joint action ["Up"] +actions: [2] + +# State 110 +# ..*..** +# ..*...0 +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*...0\n....*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-1.1] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Left"] +actions: [1] + +# State 111 +# Apply joint action ["Stay"] +actions: [0] + +# State 112 +# Apply joint action ["Right"] +actions: [3] + +# State 113 +# Apply joint action ["Up"] +actions: [2] + +# State 114 +# Apply joint action ["Right"] +actions: [3] + +# State 115 +# Apply joint action ["Stay"] +actions: [0] + +# State 116 +# Apply joint action ["Left"] +actions: [1] + +# State 117 +# Apply joint action ["Right"] +actions: [3] + +# State 118 +# Apply joint action ["Stay"] +actions: [0] + +# State 119 +# Apply joint action ["Stay"] +actions: [0] + +# State 120 +# ..*..** +# ..*...0 +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*...0\n....*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-1.2] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Down"] +actions: [4] + +# State 121 +# Apply joint action ["Stay"] +actions: [0] + +# State 122 +# Apply joint action ["Up"] +actions: [2] + +# State 123 +# Apply joint action ["Right"] +actions: [3] + +# State 124 +# Apply joint action ["Left"] +actions: [1] + +# State 125 +# Apply joint action ["Down"] +actions: [4] + +# State 126 +# Apply joint action ["Down"] +actions: [4] + +# State 127 +# Apply joint action ["Up"] +actions: [2] + +# State 128 +# Apply joint action ["Up"] +actions: [2] + +# State 129 +# Apply joint action ["Left"] +actions: [1] + +# State 130 +# ..*..** +# ..*.0.. +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*.0..\n....*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-1.3] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Up"] +actions: [2] + +# State 131 +# Apply joint action ["Down"] +actions: [4] + +# State 132 +# Apply joint action ["Down"] +actions: [4] + +# State 133 +# Apply joint action ["Up"] +actions: [2] + +# State 134 +# Apply joint action ["Stay"] +actions: [0] + +# State 135 +# Apply joint action ["Right"] +actions: [3] + +# State 136 +# Apply joint action ["Left"] +actions: [1] + +# State 137 +# Apply joint action ["Up"] +actions: [2] + +# State 138 +# Apply joint action ["Down"] +actions: [4] + +# State 139 +# Apply joint action ["Right"] +actions: [3] + +# State 140 +# ..*..** +# ..*.0.. +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*.0..\n....*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-1.4] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Left"] +actions: [1] + +# State 141 +# Apply joint action ["Left"] +actions: [1] + +# State 142 +# Apply joint action ["Right"] +actions: [3] + +# State 143 +# Apply joint action ["Right"] +actions: [3] + +# State 144 +# Apply joint action ["Left"] +actions: [1] + +# State 145 +# Apply joint action ["Right"] +actions: [3] + +# State 146 +# Apply joint action ["Right"] +actions: [3] + +# State 147 +# Apply joint action ["Right"] +actions: [3] + +# State 148 +# Apply joint action ["Up"] +actions: [2] + +# State 149 +# Apply joint action ["Down"] +actions: [4] + +# State 150 +# ..*..** +# ..*.... +# ....*.0 +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n....*.0\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◯ +Rewards() = [-0.01] +Returns() = [-1.5] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Up"] +actions: [2] + +# State 151 +# Apply joint action ["Left"] +actions: [1] + +# State 152 +# Apply joint action ["Left"] +actions: [1] + +# State 153 +# Apply joint action ["Up"] +actions: [2] + +# State 154 +# Apply joint action ["Right"] +actions: [3] + +# State 155 +# Apply joint action ["Left"] +actions: [1] + +# State 156 +# Apply joint action ["Down"] +actions: [4] + +# State 157 +# Apply joint action ["Up"] +actions: [2] + +# State 158 +# Apply joint action ["Up"] +actions: [2] + +# State 159 +# Apply joint action ["Right"] +actions: [3] + +# State 160 +# ..*.0** +# ..*.... +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*.0**\n..*....\n....*..\n" +ObservationTensor(0): +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-1.6] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Left"] +actions: [1] + +# State 161 +# Apply joint action ["Down"] +actions: [4] + +# State 162 +# Apply joint action ["Left"] +actions: [1] + +# State 163 +# Apply joint action ["Right"] +actions: [3] + +# State 164 +# Apply joint action ["Stay"] +actions: [0] + +# State 165 +# Apply joint action ["Right"] +actions: [3] + +# State 166 +# Apply joint action ["Up"] +actions: [2] + +# State 167 +# Apply joint action ["Up"] +actions: [2] + +# State 168 +# Apply joint action ["Stay"] +actions: [0] + +# State 169 +# Apply joint action ["Down"] +actions: [4] + +# State 170 +# ..*..** +# ..*.... +# ....*0. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n....*0.\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◉◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◯◉ +Rewards() = [-0.01] +Returns() = [-1.7] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Stay"] +actions: [0] + +# State 171 +# Apply joint action ["Left"] +actions: [1] + +# State 172 +# Apply joint action ["Left"] +actions: [1] + +# State 173 +# Apply joint action ["Stay"] +actions: [0] + +# State 174 +# Apply joint action ["Down"] +actions: [4] + +# State 175 +# Apply joint action ["Stay"] +actions: [0] + +# State 176 +# Apply joint action ["Left"] +actions: [1] + +# State 177 +# Apply joint action ["Up"] +actions: [2] + +# State 178 +# Apply joint action ["Up"] +actions: [2] + +# State 179 +# Apply joint action ["Right"] +actions: [3] + +# State 180 +# ..*..** +# ..*...0 +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*...0\n....*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-1.8] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Stay"] +actions: [0] + +# State 181 +# Apply joint action ["Down"] +actions: [4] + +# State 182 +# Apply joint action ["Left"] +actions: [1] + +# State 183 +# Apply joint action ["Left"] +actions: [1] + +# State 184 +# Apply joint action ["Stay"] +actions: [0] + +# State 185 +# Apply joint action ["Right"] +actions: [3] + +# State 186 +# Apply joint action ["Right"] +actions: [3] + +# State 187 +# Apply joint action ["Right"] +actions: [3] + +# State 188 +# Apply joint action ["Left"] +actions: [1] + +# State 189 +# Apply joint action ["Left"] +actions: [1] + +# State 190 +# ..*..** +# ..*.... +# ....*0. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n....*0.\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◉◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◯◉ +Rewards() = [-0.01] +Returns() = [-1.9] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Down"] +actions: [4] + +# State 191 +# Apply joint action ["Stay"] +actions: [0] + +# State 192 +# Apply joint action ["Left"] +actions: [1] + +# State 193 +# Apply joint action ["Down"] +actions: [4] + +# State 194 +# Apply joint action ["Up"] +actions: [2] + +# State 195 +# Apply joint action ["Up"] +actions: [2] + +# State 196 +# Apply joint action ["Down"] +actions: [4] + +# State 197 +# Apply joint action ["Up"] +actions: [2] + +# State 198 +# Apply joint action ["Right"] +actions: [3] + +# State 199 +# Apply joint action ["Up"] +actions: [2] + +# State 200 +# ..*..** +# ..*...0 +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*...0\n....*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-2] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Left"] +actions: [1] + +# State 201 +# Apply joint action ["Stay"] +actions: [0] + +# State 202 +# Apply joint action ["Stay"] +actions: [0] + +# State 203 +# Apply joint action ["Up"] +actions: [2] + +# State 204 +# Apply joint action ["Stay"] +actions: [0] + +# State 205 +# Apply joint action ["Right"] +actions: [3] + +# State 206 +# Apply joint action ["Stay"] +actions: [0] + +# State 207 +# Apply joint action ["Stay"] +actions: [0] + +# State 208 +# Apply joint action ["Down"] +actions: [4] + +# State 209 +# Apply joint action ["Stay"] +actions: [0] + +# State 210 +# ..*..** +# ..*.... +# ....*.0 +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n....*.0\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◯ +Rewards() = [-0.01] +Returns() = [-2.1] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Stay"] +actions: [0] + +# State 211 +# Apply joint action ["Down"] +actions: [4] + +# State 212 +# Apply joint action ["Stay"] +actions: [0] + +# State 213 +# Apply joint action ["Left"] +actions: [1] + +# State 214 +# Apply joint action ["Left"] +actions: [1] + +# State 215 +# Apply joint action ["Up"] +actions: [2] + +# State 216 +# Apply joint action ["Stay"] +actions: [0] + +# State 217 +# Apply joint action ["Up"] +actions: [2] + +# State 218 +# Apply joint action ["Left"] +actions: [1] + +# State 219 +# Apply joint action ["Stay"] +actions: [0] + +# State 220 +# ..*..** +# ..*.0.. +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*.0..\n....*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-2.2] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Up"] +actions: [2] + +# State 221 +# Apply joint action ["Stay"] +actions: [0] + +# State 222 +# Apply joint action ["Right"] +actions: [3] + +# State 223 +# Apply joint action ["Up"] +actions: [2] + +# State 224 +# Apply joint action ["Up"] +actions: [2] + +# State 225 +# Apply joint action ["Right"] +actions: [3] + +# State 226 +# Apply joint action ["Right"] +actions: [3] + +# State 227 +# Apply joint action ["Stay"] +actions: [0] + +# State 228 +# Apply joint action ["Up"] +actions: [2] + +# State 229 +# Apply joint action ["Left"] +actions: [1] + +# State 230 +# ..*0.** +# ..*.... +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*0.**\n..*....\n....*..\n" +ObservationTensor(0): +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-2.3] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Right"] +actions: [3] + +# State 231 +# Apply joint action ["Down"] +actions: [4] + +# State 232 +# Apply joint action ["Left"] +actions: [1] + +# State 233 +# Apply joint action ["Right"] +actions: [3] + +# State 234 +# Apply joint action ["Down"] +actions: [4] + +# State 235 +# Apply joint action ["Right"] +actions: [3] + +# State 236 +# Apply joint action ["Left"] +actions: [1] + +# State 237 +# Apply joint action ["Down"] +actions: [4] + +# State 238 +# Apply joint action ["Down"] +actions: [4] + +# State 239 +# Apply joint action ["Stay"] +actions: [0] + +# State 240 +# ..*..** +# ..*.0.. +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*.0..\n....*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-2.4] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Stay"] +actions: [0] + +# State 241 +# Apply joint action ["Stay"] +actions: [0] + +# State 242 +# Apply joint action ["Up"] +actions: [2] + +# State 243 +# Apply joint action ["Right"] +actions: [3] + +# State 244 +# Apply joint action ["Right"] +actions: [3] + +# State 245 +# Apply joint action ["Down"] +actions: [4] + +# State 246 +# Apply joint action ["Up"] +actions: [2] + +# State 247 +# Apply joint action ["Up"] +actions: [2] + +# State 248 +# Apply joint action ["Up"] +actions: [2] + +# State 249 +# Apply joint action ["Left"] +actions: [1] + +# State 250 +# ..*0.** +# ..*.... +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*0.**\n..*....\n....*..\n" +ObservationTensor(0): +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-2.5] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Up"] +actions: [2] + +# State 251 +# Apply joint action ["Down"] +actions: [4] + +# State 252 +# Apply joint action ["Right"] +actions: [3] + +# State 253 +# Apply joint action ["Right"] +actions: [3] + +# State 254 +# Apply joint action ["Down"] +actions: [4] + +# State 255 +# Apply joint action ["Left"] +actions: [1] + +# State 256 +# Apply joint action ["Stay"] +actions: [0] + +# State 257 +# Apply joint action ["Up"] +actions: [2] + +# State 258 +# Apply joint action ["Down"] +actions: [4] + +# State 259 +# Apply joint action ["Left"] +actions: [1] + +# State 260 +# ..*..** +# ..*.... +# ....*0. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n....*0.\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◉◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◯◉ +Rewards() = [-0.01] +Returns() = [-2.6] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Left"] +actions: [1] + +# State 261 +# Apply joint action ["Stay"] +actions: [0] + +# State 262 +# Apply joint action ["Down"] +actions: [4] + +# State 263 +# Apply joint action ["Up"] +actions: [2] + +# State 264 +# Apply joint action ["Stay"] +actions: [0] + +# State 265 +# Apply joint action ["Right"] +actions: [3] + +# State 266 +# Apply joint action ["Down"] +actions: [4] + +# State 267 +# Apply joint action ["Down"] +actions: [4] + +# State 268 +# Apply joint action ["Right"] +actions: [3] + +# State 269 +# Apply joint action ["Right"] +actions: [3] + +# State 270 +# ..*..** +# ..*.... +# ....*.0 +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n....*.0\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◯ +Rewards() = [-0.01] +Returns() = [-2.7] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Right"] +actions: [3] + +# State 271 +# Apply joint action ["Up"] +actions: [2] + +# State 272 +# Apply joint action ["Up"] +actions: [2] + +# State 273 +# Apply joint action ["Right"] +actions: [3] + +# State 274 +# Apply joint action ["Up"] +actions: [2] + +# State 275 +# Apply joint action ["Down"] +actions: [4] + +# State 276 +# Apply joint action ["Up"] +actions: [2] + +# State 277 +# Apply joint action ["Left"] +actions: [1] + +# State 278 +# Apply joint action ["Down"] +actions: [4] + +# State 279 +# Apply joint action ["Left"] +actions: [1] + +# State 280 +# ..*..** +# ..*.... +# ....*0. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n....*0.\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◉◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◯◉ +Rewards() = [-0.01] +Returns() = [-2.8] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Down"] +actions: [4] + +# State 281 +# Apply joint action ["Stay"] +actions: [0] + +# State 282 +# Apply joint action ["Down"] +actions: [4] + +# State 283 +# Apply joint action ["Left"] +actions: [1] + +# State 284 +# Apply joint action ["Right"] +actions: [3] + +# State 285 +# Apply joint action ["Down"] +actions: [4] + +# State 286 +# Apply joint action ["Right"] +actions: [3] + +# State 287 +# Apply joint action ["Right"] +actions: [3] + +# State 288 +# Apply joint action ["Stay"] +actions: [0] + +# State 289 +# Apply joint action ["Up"] +actions: [2] + +# State 290 +# ..*..** +# ..*...0 +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*...0\n....*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-2.9] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Down"] +actions: [4] + +# State 291 +# Apply joint action ["Stay"] +actions: [0] + +# State 292 +# Apply joint action ["Stay"] +actions: [0] + +# State 293 +# Apply joint action ["Down"] +actions: [4] + +# State 294 +# Apply joint action ["Up"] +actions: [2] + +# State 295 +# Apply joint action ["Stay"] +actions: [0] + +# State 296 +# Apply joint action ["Left"] +actions: [1] + +# State 297 +# Apply joint action ["Right"] +actions: [3] + +# State 298 +# Apply joint action ["Left"] +actions: [1] + +# State 299 +# Apply joint action ["Left"] +actions: [1] + +# State 300 +# ..*..** +# ..*.0.. +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*.0..\n....*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◯◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-3] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Up"] +actions: [2] + +# State 301 +# Apply joint action ["Left"] +actions: [1] + +# State 302 +# Apply joint action ["Up"] +actions: [2] + +# State 303 +# Apply joint action ["Up"] +actions: [2] + +# State 304 +# Apply joint action ["Up"] +actions: [2] + +# State 305 +# Apply joint action ["Left"] +actions: [1] + +# State 306 +# Apply joint action ["Stay"] +actions: [0] + +# State 307 +# Apply joint action ["Down"] +actions: [4] + +# State 308 +# Apply joint action ["Stay"] +actions: [0] + +# State 309 +# Apply joint action ["Stay"] +actions: [0] + +# State 310 +# ..*..** +# ..*0... +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*0...\n....*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◯◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-3.1] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Stay"] +actions: [0] + +# State 311 +# Apply joint action ["Left"] +actions: [1] + +# State 312 +# Apply joint action ["Right"] +actions: [3] + +# State 313 +# Apply joint action ["Stay"] +actions: [0] + +# State 314 +# Apply joint action ["Stay"] +actions: [0] + +# State 315 +# Apply joint action ["Stay"] +actions: [0] + +# State 316 +# Apply joint action ["Stay"] +actions: [0] + +# State 317 +# Apply joint action ["Right"] +actions: [3] + +# State 318 +# Apply joint action ["Up"] +actions: [2] + +# State 319 +# Apply joint action ["Down"] +actions: [4] + +# State 320 +# ..*..** +# ..*.... +# ....*0. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n....*0.\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◉◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◯◉ +Rewards() = [-0.01] +Returns() = [-3.2] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Left"] +actions: [1] + +# State 321 +# Apply joint action ["Stay"] +actions: [0] + +# State 322 +# Apply joint action ["Down"] +actions: [4] + +# State 323 +# Apply joint action ["Right"] +actions: [3] + +# State 324 +# Apply joint action ["Stay"] +actions: [0] + +# State 325 +# Apply joint action ["Down"] +actions: [4] + +# State 326 +# Apply joint action ["Left"] +actions: [1] + +# State 327 +# Apply joint action ["Right"] +actions: [3] + +# State 328 +# Apply joint action ["Right"] +actions: [3] + +# State 329 +# Apply joint action ["Up"] +actions: [2] + +# State 330 +# ..*..** +# ..*...0 +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*...0\n....*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-3.3] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Left"] +actions: [1] + +# State 331 +# Apply joint action ["Down"] +actions: [4] + +# State 332 +# Apply joint action ["Up"] +actions: [2] + +# State 333 +# Apply joint action ["Left"] +actions: [1] + +# State 334 +# Apply joint action ["Left"] +actions: [1] + +# State 335 +# Apply joint action ["Left"] +actions: [1] + +# State 336 +# Apply joint action ["Stay"] +actions: [0] + +# State 337 +# Apply joint action ["Left"] +actions: [1] + +# State 338 +# Apply joint action ["Left"] +actions: [1] + +# State 339 +# Apply joint action ["Up"] +actions: [2] + +# State 340 +# ..*0.** +# ..*.... +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2, 1, 4, 2, 1, 1, 1, 0, 1, 1, 2] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2, 1, 4, 2, 1, 1, 1, 0, 1, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*0.**\n..*....\n....*..\n" +ObservationTensor(0): +◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◯◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-3.4] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Up"] +actions: [2] + +# State 341 +# Apply joint action ["Stay"] +actions: [0] + +# State 342 +# Apply joint action ["Down"] +actions: [4] + +# State 343 +# Apply joint action ["Right"] +actions: [3] + +# State 344 +# Apply joint action ["Right"] +actions: [3] + +# State 345 +# Apply joint action ["Left"] +actions: [1] + +# State 346 +# Apply joint action ["Left"] +actions: [1] + +# State 347 +# Apply joint action ["Up"] +actions: [2] + +# State 348 +# Apply joint action ["Right"] +actions: [3] + +# State 349 +# Apply joint action ["Up"] +actions: [2] + +# State 350 +# ..*.0** +# ..*.... +# ....*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2, 1, 4, 2, 1, 1, 1, 0, 1, 1, 2, 2, 0, 4, 3, 3, 1, 1, 2, 3, 2] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2, 1, 4, 2, 1, 1, 1, 0, 1, 1, 2, 2, 0, 4, 3, 3, 1, 1, 2, 3, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*.0**\n..*....\n....*..\n" +ObservationTensor(0): +◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◯◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-3.5] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Left"] +actions: [1] + +# State 351 +# Apply joint action ["Left"] +actions: [1] + +# State 352 +# Apply joint action ["Right"] +actions: [3] + +# State 353 +# Apply joint action ["Down"] +actions: [4] + +# State 354 +# Apply joint action ["Left"] +actions: [1] + +# State 355 +# Apply joint action ["Down"] +actions: [4] + +# State 356 +# Apply joint action ["Down"] +actions: [4] + +# State 357 +# Apply joint action ["Stay"] +actions: [0] + +# State 358 +# Apply joint action ["Left"] +actions: [1] + +# State 359 +# Apply joint action ["Down"] +actions: [4] + +# State 360 +# ..*..** +# ..*.... +# ..0.*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2, 1, 4, 2, 1, 1, 1, 0, 1, 1, 2, 2, 0, 4, 3, 3, 1, 1, 2, 3, 2, 1, 1, 3, 4, 1, 4, 4, 0, 1, 4] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2, 1, 4, 2, 1, 1, 1, 0, 1, 1, 2, 2, 0, 4, 3, 3, 1, 1, 2, 3, 2, 1, 1, 3, 4, 1, 4, 4, 0, 1, 4" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n..0.*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◉◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◯◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-3.6] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Left"] +actions: [1] + +# State 361 +# Apply joint action ["Stay"] +actions: [0] + +# State 362 +# Apply joint action ["Stay"] +actions: [0] + +# State 363 +# Apply joint action ["Right"] +actions: [3] + +# State 364 +# Apply joint action ["Right"] +actions: [3] + +# State 365 +# Apply joint action ["Stay"] +actions: [0] + +# State 366 +# Apply joint action ["Left"] +actions: [1] + +# State 367 +# Apply joint action ["Stay"] +actions: [0] + +# State 368 +# Apply joint action ["Stay"] +actions: [0] + +# State 369 +# Apply joint action ["Stay"] +actions: [0] + +# State 370 +# ..*..** +# ..*.... +# ..0.*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2, 1, 4, 2, 1, 1, 1, 0, 1, 1, 2, 2, 0, 4, 3, 3, 1, 1, 2, 3, 2, 1, 1, 3, 4, 1, 4, 4, 0, 1, 4, 1, 0, 0, 3, 3, 0, 1, 0, 0, 0] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2, 1, 4, 2, 1, 1, 1, 0, 1, 1, 2, 2, 0, 4, 3, 3, 1, 1, 2, 3, 2, 1, 1, 3, 4, 1, 4, 4, 0, 1, 4, 1, 0, 0, 3, 3, 0, 1, 0, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n..0.*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◉◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◯◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-3.7] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Up"] +actions: [2] + +# State 371 +# Apply joint action ["Up"] +actions: [2] + +# State 372 +# Apply joint action ["Up"] +actions: [2] + +# State 373 +# Apply joint action ["Left"] +actions: [1] + +# State 374 +# Apply joint action ["Stay"] +actions: [0] + +# State 375 +# Apply joint action ["Down"] +actions: [4] + +# State 376 +# Apply joint action ["Left"] +actions: [1] + +# State 377 +# Apply joint action ["Left"] +actions: [1] + +# State 378 +# Apply joint action ["Stay"] +actions: [0] + +# State 379 +# Apply joint action ["Right"] +actions: [3] + +# State 380 +# ..*..** +# ..*.... +# .0..*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2, 1, 4, 2, 1, 1, 1, 0, 1, 1, 2, 2, 0, 4, 3, 3, 1, 1, 2, 3, 2, 1, 1, 3, 4, 1, 4, 4, 0, 1, 4, 1, 0, 0, 3, 3, 0, 1, 0, 0, 0, 2, 2, 2, 1, 0, 4, 1, 1, 0, 3] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2, 1, 4, 2, 1, 1, 1, 0, 1, 1, 2, 2, 0, 4, 3, 3, 1, 1, 2, 3, 2, 1, 1, 3, 4, 1, 4, 4, 0, 1, 4, 1, 0, 0, 3, 3, 0, 1, 0, 0, 0, 2, 2, 2, 1, 0, 4, 1, 1, 0, 3" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n.0..*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◉◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◯◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-3.8] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Stay"] +actions: [0] + +# State 381 +# Apply joint action ["Right"] +actions: [3] + +# State 382 +# Apply joint action ["Down"] +actions: [4] + +# State 383 +# Apply joint action ["Right"] +actions: [3] + +# State 384 +# Apply joint action ["Down"] +actions: [4] + +# State 385 +# Apply joint action ["Stay"] +actions: [0] + +# State 386 +# Apply joint action ["Left"] +actions: [1] + +# State 387 +# Apply joint action ["Up"] +actions: [2] + +# State 388 +# Apply joint action ["Left"] +actions: [1] + +# State 389 +# Apply joint action ["Right"] +actions: [3] + +# State 390 +# ..*..** +# ..*.... +# ..0.*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2, 1, 4, 2, 1, 1, 1, 0, 1, 1, 2, 2, 0, 4, 3, 3, 1, 1, 2, 3, 2, 1, 1, 3, 4, 1, 4, 4, 0, 1, 4, 1, 0, 0, 3, 3, 0, 1, 0, 0, 0, 2, 2, 2, 1, 0, 4, 1, 1, 0, 3, 0, 3, 4, 3, 4, 0, 1, 2, 1, 3] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2, 1, 4, 2, 1, 1, 1, 0, 1, 1, 2, 2, 0, 4, 3, 3, 1, 1, 2, 3, 2, 1, 1, 3, 4, 1, 4, 4, 0, 1, 4, 1, 0, 0, 3, 3, 0, 1, 0, 0, 0, 2, 2, 2, 1, 0, 4, 1, 1, 0, 3, 0, 3, 4, 3, 4, 0, 1, 2, 1, 3" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n..0.*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◉◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◯◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-3.9] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Down"] +actions: [4] + +# State 391 +# Apply joint action ["Stay"] +actions: [0] + +# State 392 +# Apply joint action ["Right"] +actions: [3] + +# State 393 +# Apply joint action ["Stay"] +actions: [0] + +# State 394 +# Apply joint action ["Up"] +actions: [2] + +# State 395 +# Apply joint action ["Down"] +actions: [4] + +# State 396 +# Apply joint action ["Left"] +actions: [1] + +# State 397 +# Apply joint action ["Left"] +actions: [1] + +# State 398 +# Apply joint action ["Left"] +actions: [1] + +# State 399 +# Apply joint action ["Left"] +actions: [1] + +# State 400 +# ..*..** +# ..*.... +# 0...*.. +IsTerminal() = False +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2, 1, 4, 2, 1, 1, 1, 0, 1, 1, 2, 2, 0, 4, 3, 3, 1, 1, 2, 3, 2, 1, 1, 3, 4, 1, 4, 4, 0, 1, 4, 1, 0, 0, 3, 3, 0, 1, 0, 0, 0, 2, 2, 2, 1, 0, 4, 1, 1, 0, 3, 0, 3, 4, 3, 4, 0, 1, 2, 1, 3, 4, 0, 3, 0, 2, 4, 1, 1, 1, 1] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2, 1, 4, 2, 1, 1, 1, 0, 1, 1, 2, 2, 0, 4, 3, 3, 1, 1, 2, 3, 2, 1, 1, 3, 4, 1, 4, 4, 0, 1, 4, 1, 0, 0, 3, 3, 0, 1, 0, 0, 0, 2, 2, 2, 1, 0, 4, 1, 1, 0, 3, 0, 3, 4, 3, 4, 0, 1, 2, 1, 3, 4, 0, 3, 0, 2, 4, 1, 1, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "..*..**\n..*....\n0...*..\n" +ObservationTensor(0): +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◉◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◯◉◉◉◯◉◉ +Rewards() = [-0.01] +Returns() = [-4] +LegalActions(0) = [0, 1, 2, 3, 4] +StringLegalActions(0) = ["Stay", "Left", "Up", "Right", "Down"] + +# Apply joint action ["Up"] +actions: [2] + +# State 401 +# Apply joint action ["Stay"] +actions: [0] + +# State 402 +# Apply joint action ["Right"] +actions: [3] + +# State 403 +# Apply joint action ["Stay"] +actions: [0] + +# State 404 +# Apply joint action ["Up"] +actions: [2] + +# State 405 +# Apply joint action ["Left"] +actions: [1] + +# State 406 +# 0.*..** +# ..*.... +# ....*.. +IsTerminal() = True +History() = [0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2, 1, 4, 2, 1, 1, 1, 0, 1, 1, 2, 2, 0, 4, 3, 3, 1, 1, 2, 3, 2, 1, 1, 3, 4, 1, 4, 4, 0, 1, 4, 1, 0, 0, 3, 3, 0, 1, 0, 0, 0, 2, 2, 2, 1, 0, 4, 1, 1, 0, 3, 0, 3, 4, 3, 4, 0, 1, 2, 1, 3, 4, 0, 3, 0, 2, 4, 1, 1, 1, 1, 2, 0, 3, 0, 2, 1] +HistoryString() = "0, 4, 0, 3, 4, 1, 2, 0, 2, 2, 0, 0, 1, 1, 0, 4, 0, 4, 1, 1, 4, 3, 4, 0, 0, 2, 3, 2, 0, 3, 1, 0, 0, 3, 4, 1, 0, 2, 0, 0, 3, 0, 2, 0, 4, 4, 0, 4, 4, 2, 3, 1, 2, 1, 0, 0, 3, 2, 3, 4, 2, 1, 1, 4, 4, 0, 2, 0, 1, 2, 0, 3, 4, 0, 1, 3, 0, 1, 4, 2, 0, 1, 0, 3, 0, 3, 4, 4, 0, 3, 0, 1, 1, 4, 2, 4, 0, 3, 4, 2, 4, 0, 0, 3, 3, 4, 4, 2, 3, 2, 1, 0, 3, 2, 3, 0, 1, 3, 0, 0, 4, 0, 2, 3, 1, 4, 4, 2, 2, 1, 2, 4, 4, 2, 0, 3, 1, 2, 4, 3, 1, 1, 3, 3, 1, 3, 3, 3, 2, 4, 2, 1, 1, 2, 3, 1, 4, 2, 2, 3, 1, 4, 1, 3, 0, 3, 2, 2, 0, 4, 0, 1, 1, 0, 4, 0, 1, 2, 2, 3, 0, 4, 1, 1, 0, 3, 3, 3, 1, 1, 4, 0, 1, 4, 2, 2, 4, 2, 3, 2, 1, 0, 0, 2, 0, 3, 0, 0, 4, 0, 0, 4, 0, 1, 1, 2, 0, 2, 1, 0, 2, 0, 3, 2, 2, 3, 3, 0, 2, 1, 3, 4, 1, 3, 4, 3, 1, 4, 4, 0, 0, 0, 2, 3, 3, 4, 2, 2, 2, 1, 2, 4, 3, 3, 4, 1, 0, 2, 4, 1, 1, 0, 4, 2, 0, 3, 4, 4, 3, 3, 3, 2, 2, 3, 2, 4, 2, 1, 4, 1, 4, 0, 4, 1, 3, 4, 3, 3, 0, 2, 4, 0, 0, 4, 2, 0, 1, 3, 1, 1, 2, 1, 2, 2, 2, 1, 0, 4, 0, 0, 0, 1, 3, 0, 0, 0, 0, 3, 2, 4, 1, 0, 4, 3, 0, 4, 1, 3, 3, 2, 1, 4, 2, 1, 1, 1, 0, 1, 1, 2, 2, 0, 4, 3, 3, 1, 1, 2, 3, 2, 1, 1, 3, 4, 1, 4, 4, 0, 1, 4, 1, 0, 0, 3, 3, 0, 1, 0, 0, 0, 2, 2, 2, 1, 0, 4, 1, 1, 0, 3, 0, 3, 4, 3, 4, 0, 1, 2, 1, 3, 4, 0, 3, 0, 2, 4, 1, 1, 1, 1, 2, 0, 3, 0, 2, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "0.*..**\n..*....\n....*..\n" +ObservationTensor(0): +◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯ ◯◯◉◯◯◉◉ ◯◉◯◉◉◯◯ +◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯ ◉◉◯◉◉◉◉ +◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯ ◉◉◉◉◯◉◉ +Rewards() = [200] +Returns() = [195.95] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/pentago.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/pentago.txt new file mode 100644 index 0000000..4f03366 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/pentago.txt @@ -0,0 +1,488 @@ +game: pentago + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Pentago" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["ansi_color_output"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "pentago" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 288 +PolicyTensorShape() = [288] +MaxChanceOutcomes() = 0 +GetParameters() = {ansi_color_output=False} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [3, 6, 6] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 108 +MaxGameLength() = 36 +ToString() = "pentago()" + +# State 0 +# > t u < +# a b c d e f +# v 1 . . . . . . v +# s 2 . . . . . . v +# 3 . . . . . . +# 4 . . . . . . +# z 5 . . . . . . w +# ^ 6 . . . . . . ^ +# > y x < +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = " > t u <\n a b c d e f\nv 1 . . . . . . v\ns 2 . . . . . . v\n 3 . . . . . . \n 4 . . . . . . \nz 5 . . . . . . w\n^ 6 . . . . . . ^\n > y x <\n" +ObservationString(1) = " > t u <\n a b c d e f\nv 1 . . . . . . v\ns 2 . . . . . . v\n 3 . . . . . . \n 4 . . . . . . \nz 5 . . . . . . w\n^ 6 . . . . . . ^\n > y x <\n" +ObservationTensor(0): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287] +StringLegalActions() = ["a1s", "a1t", "a1u", "a1v", "a1w", "a1x", "a1y", "a1z", "b1s", "b1t", "b1u", "b1v", "b1w", "b1x", "b1y", "b1z", "c1s", "c1t", "c1u", "c1v", "c1w", "c1x", "c1y", "c1z", "d1s", "d1t", "d1u", "d1v", "d1w", "d1x", "d1y", "d1z", "e1s", "e1t", "e1u", "e1v", "e1w", "e1x", "e1y", "e1z", "f1s", "f1t", "f1u", "f1v", "f1w", "f1x", "f1y", "f1z", "a2s", "a2t", "a2u", "a2v", "a2w", "a2x", "a2y", "a2z", "b2s", "b2t", "b2u", "b2v", "b2w", "b2x", "b2y", "b2z", "c2s", "c2t", "c2u", "c2v", "c2w", "c2x", "c2y", "c2z", "d2s", "d2t", "d2u", "d2v", "d2w", "d2x", "d2y", "d2z", "e2s", "e2t", "e2u", "e2v", "e2w", "e2x", "e2y", "e2z", "f2s", "f2t", "f2u", "f2v", "f2w", "f2x", "f2y", "f2z", "a3s", "a3t", "a3u", "a3v", "a3w", "a3x", "a3y", "a3z", "b3s", "b3t", "b3u", "b3v", "b3w", "b3x", "b3y", "b3z", "c3s", "c3t", "c3u", "c3v", "c3w", "c3x", "c3y", "c3z", "d3s", "d3t", "d3u", "d3v", "d3w", "d3x", "d3y", "d3z", "e3s", "e3t", "e3u", "e3v", "e3w", "e3x", "e3y", "e3z", "f3s", "f3t", "f3u", "f3v", "f3w", "f3x", "f3y", "f3z", "a4s", "a4t", "a4u", "a4v", "a4w", "a4x", "a4y", "a4z", "b4s", "b4t", "b4u", "b4v", "b4w", "b4x", "b4y", "b4z", "c4s", "c4t", "c4u", "c4v", "c4w", "c4x", "c4y", "c4z", "d4s", "d4t", "d4u", "d4v", "d4w", "d4x", "d4y", "d4z", "e4s", "e4t", "e4u", "e4v", "e4w", "e4x", "e4y", "e4z", "f4s", "f4t", "f4u", "f4v", "f4w", "f4x", "f4y", "f4z", "a5s", "a5t", "a5u", "a5v", "a5w", "a5x", "a5y", "a5z", "b5s", "b5t", "b5u", "b5v", "b5w", "b5x", "b5y", "b5z", "c5s", "c5t", "c5u", "c5v", "c5w", "c5x", "c5y", "c5z", "d5s", "d5t", "d5u", "d5v", "d5w", "d5x", "d5y", "d5z", "e5s", "e5t", "e5u", "e5v", "e5w", "e5x", "e5y", "e5z", "f5s", "f5t", "f5u", "f5v", "f5w", "f5x", "f5y", "f5z", "a6s", "a6t", "a6u", "a6v", "a6w", "a6x", "a6y", "a6z", "b6s", "b6t", "b6u", "b6v", "b6w", "b6x", "b6y", "b6z", "c6s", "c6t", "c6u", "c6v", "c6w", "c6x", "c6y", "c6z", "d6s", "d6t", "d6u", "d6v", "d6w", "d6x", "d6y", "d6z", "e6s", "e6t", "e6u", "e6v", "e6w", "e6x", "e6y", "e6z", "f6s", "f6t", "f6u", "f6v", "f6w", "f6x", "f6y", "f6z"] + +# Apply action "c6s" +action: 256 + +# State 1 +# > t u < +# a b c d e f +# v 1 . . . . . . v +# s 2 . . . . . . v +# 3 . . . . . . +# 4 . . . . . . +# z 5 . . . . . . w +# ^ 6 . . O . . . ^ +# > y x < +IsTerminal() = False +History() = [256] +HistoryString() = "256" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "256" +InformationStateString(1) = "256" +ObservationString(0) = " > t u <\n a b c d e f\nv 1 . . . . . . v\ns 2 . . . . . . v\n 3 . . . . . . \n 4 . . . . . . \nz 5 . . . . . . w\n^ 6 . . O . . . ^\n > y x <\n" +ObservationString(1) = " > t u <\n a b c d e f\nv 1 . . . . . . v\ns 2 . . . . . . v\n 3 . . . . . . \n 4 . . . . . . \nz 5 . . . . . . w\n^ 6 . . O . . . ^\n > y x <\n" +ObservationTensor(0): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◉◯◯◯ ◯◯◯◯◯◯ ◉◉◯◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◉◯◯◯ ◉◉◯◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287] +StringLegalActions() = ["a1s", "a1t", "a1u", "a1v", "a1w", "a1x", "a1y", "a1z", "b1s", "b1t", "b1u", "b1v", "b1w", "b1x", "b1y", "b1z", "c1s", "c1t", "c1u", "c1v", "c1w", "c1x", "c1y", "c1z", "d1s", "d1t", "d1u", "d1v", "d1w", "d1x", "d1y", "d1z", "e1s", "e1t", "e1u", "e1v", "e1w", "e1x", "e1y", "e1z", "f1s", "f1t", "f1u", "f1v", "f1w", "f1x", "f1y", "f1z", "a2s", "a2t", "a2u", "a2v", "a2w", "a2x", "a2y", "a2z", "b2s", "b2t", "b2u", "b2v", "b2w", "b2x", "b2y", "b2z", "c2s", "c2t", "c2u", "c2v", "c2w", "c2x", "c2y", "c2z", "d2s", "d2t", "d2u", "d2v", "d2w", "d2x", "d2y", "d2z", "e2s", "e2t", "e2u", "e2v", "e2w", "e2x", "e2y", "e2z", "f2s", "f2t", "f2u", "f2v", "f2w", "f2x", "f2y", "f2z", "a3s", "a3t", "a3u", "a3v", "a3w", "a3x", "a3y", "a3z", "b3s", "b3t", "b3u", "b3v", "b3w", "b3x", "b3y", "b3z", "c3s", "c3t", "c3u", "c3v", "c3w", "c3x", "c3y", "c3z", "d3s", "d3t", "d3u", "d3v", "d3w", "d3x", "d3y", "d3z", "e3s", "e3t", "e3u", "e3v", "e3w", "e3x", "e3y", "e3z", "f3s", "f3t", "f3u", "f3v", "f3w", "f3x", "f3y", "f3z", "a4s", "a4t", "a4u", "a4v", "a4w", "a4x", "a4y", "a4z", "b4s", "b4t", "b4u", "b4v", "b4w", "b4x", "b4y", "b4z", "c4s", "c4t", "c4u", "c4v", "c4w", "c4x", "c4y", "c4z", "d4s", "d4t", "d4u", "d4v", "d4w", "d4x", "d4y", "d4z", "e4s", "e4t", "e4u", "e4v", "e4w", "e4x", "e4y", "e4z", "f4s", "f4t", "f4u", "f4v", "f4w", "f4x", "f4y", "f4z", "a5s", "a5t", "a5u", "a5v", "a5w", "a5x", "a5y", "a5z", "b5s", "b5t", "b5u", "b5v", "b5w", "b5x", "b5y", "b5z", "c5s", "c5t", "c5u", "c5v", "c5w", "c5x", "c5y", "c5z", "d5s", "d5t", "d5u", "d5v", "d5w", "d5x", "d5y", "d5z", "e5s", "e5t", "e5u", "e5v", "e5w", "e5x", "e5y", "e5z", "f5s", "f5t", "f5u", "f5v", "f5w", "f5x", "f5y", "f5z", "a6s", "a6t", "a6u", "a6v", "a6w", "a6x", "a6y", "a6z", "b6s", "b6t", "b6u", "b6v", "b6w", "b6x", "b6y", "b6z", "d6s", "d6t", "d6u", "d6v", "d6w", "d6x", "d6y", "d6z", "e6s", "e6t", "e6u", "e6v", "e6w", "e6x", "e6y", "e6z", "f6s", "f6t", "f6u", "f6v", "f6w", "f6x", "f6y", "f6z"] + +# Apply action "b1u" +action: 10 + +# State 2 +# > t u < +# a b c d e f +# v 1 . @ . . . . v +# s 2 . . . . . . v +# 3 . . . . . . +# 4 . . . . . . +# z 5 . . . . . . w +# ^ 6 . . O . . . ^ +# > y x < +IsTerminal() = False +History() = [256, 10] +HistoryString() = "256, 10" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "256, 10" +InformationStateString(1) = "256, 10" +ObservationString(0) = " > t u <\n a b c d e f\nv 1 . @ . . . . v\ns 2 . . . . . . v\n 3 . . . . . . \n 4 . . . . . . \nz 5 . . . . . . w\n^ 6 . . O . . . ^\n > y x <\n" +ObservationString(1) = " > t u <\n a b c d e f\nv 1 . @ . . . . v\ns 2 . . . . . . v\n 3 . . . . . . \n 4 . . . . . . \nz 5 . . . . . . w\n^ 6 . . O . . . ^\n > y x <\n" +ObservationTensor(0): +◯◯◯◯◯◯ ◯◉◯◯◯◯ ◉◯◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◉◯◯◯ ◯◯◯◯◯◯ ◉◉◯◉◉◉ +ObservationTensor(1): +◯◉◯◯◯◯ ◯◯◯◯◯◯ ◉◯◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◉◯◯◯ ◉◉◯◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287] +StringLegalActions() = ["a1s", "a1t", "a1u", "a1v", "a1w", "a1x", "a1y", "a1z", "c1s", "c1t", "c1u", "c1v", "c1w", "c1x", "c1y", "c1z", "d1s", "d1t", "d1u", "d1v", "d1w", "d1x", "d1y", "d1z", "e1s", "e1t", "e1u", "e1v", "e1w", "e1x", "e1y", "e1z", "f1s", "f1t", "f1u", "f1v", "f1w", "f1x", "f1y", "f1z", "a2s", "a2t", "a2u", "a2v", "a2w", "a2x", "a2y", "a2z", "b2s", "b2t", "b2u", "b2v", "b2w", "b2x", "b2y", "b2z", "c2s", "c2t", "c2u", "c2v", "c2w", "c2x", "c2y", "c2z", "d2s", "d2t", "d2u", "d2v", "d2w", "d2x", "d2y", "d2z", "e2s", "e2t", "e2u", "e2v", "e2w", "e2x", "e2y", "e2z", "f2s", "f2t", "f2u", "f2v", "f2w", "f2x", "f2y", "f2z", "a3s", "a3t", "a3u", "a3v", "a3w", "a3x", "a3y", "a3z", "b3s", "b3t", "b3u", "b3v", "b3w", "b3x", "b3y", "b3z", "c3s", "c3t", "c3u", "c3v", "c3w", "c3x", "c3y", "c3z", "d3s", "d3t", "d3u", "d3v", "d3w", "d3x", "d3y", "d3z", "e3s", "e3t", "e3u", "e3v", "e3w", "e3x", "e3y", "e3z", "f3s", "f3t", "f3u", "f3v", "f3w", "f3x", "f3y", "f3z", "a4s", "a4t", "a4u", "a4v", "a4w", "a4x", "a4y", "a4z", "b4s", "b4t", "b4u", "b4v", "b4w", "b4x", "b4y", "b4z", "c4s", "c4t", "c4u", "c4v", "c4w", "c4x", "c4y", "c4z", "d4s", "d4t", "d4u", "d4v", "d4w", "d4x", "d4y", "d4z", "e4s", "e4t", "e4u", "e4v", "e4w", "e4x", "e4y", "e4z", "f4s", "f4t", "f4u", "f4v", "f4w", "f4x", "f4y", "f4z", "a5s", "a5t", "a5u", "a5v", "a5w", "a5x", "a5y", "a5z", "b5s", "b5t", "b5u", "b5v", "b5w", "b5x", "b5y", "b5z", "c5s", "c5t", "c5u", "c5v", "c5w", "c5x", "c5y", "c5z", "d5s", "d5t", "d5u", "d5v", "d5w", "d5x", "d5y", "d5z", "e5s", "e5t", "e5u", "e5v", "e5w", "e5x", "e5y", "e5z", "f5s", "f5t", "f5u", "f5v", "f5w", "f5x", "f5y", "f5z", "a6s", "a6t", "a6u", "a6v", "a6w", "a6x", "a6y", "a6z", "b6s", "b6t", "b6u", "b6v", "b6w", "b6x", "b6y", "b6z", "d6s", "d6t", "d6u", "d6v", "d6w", "d6x", "d6y", "d6z", "e6s", "e6t", "e6u", "e6v", "e6w", "e6x", "e6y", "e6z", "f6s", "f6t", "f6u", "f6v", "f6w", "f6x", "f6y", "f6z"] + +# Apply action "d6y" +action: 270 + +# State 3 +# > t u < +# a b c d e f +# v 1 . @ . . . . v +# s 2 . . . . . . v +# 3 . . . . . . +# 4 . . O . . . +# z 5 . . . . . . w +# ^ 6 . . . O . . ^ +# > y x < +IsTerminal() = False +History() = [256, 10, 270] +HistoryString() = "256, 10, 270" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "256, 10, 270" +InformationStateString(1) = "256, 10, 270" +ObservationString(0) = " > t u <\n a b c d e f\nv 1 . @ . . . . v\ns 2 . . . . . . v\n 3 . . . . . . \n 4 . . O . . . \nz 5 . . . . . . w\n^ 6 . . . O . . ^\n > y x <\n" +ObservationString(1) = " > t u <\n a b c d e f\nv 1 . @ . . . . v\ns 2 . . . . . . v\n 3 . . . . . . \n 4 . . O . . . \nz 5 . . . . . . w\n^ 6 . . . O . . ^\n > y x <\n" +ObservationTensor(0): +◯◯◯◯◯◯ ◯◉◯◯◯◯ ◉◯◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◉◯◯◯ ◯◯◯◯◯◯ ◉◉◯◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◉◯◯ ◯◯◯◯◯◯ ◉◉◉◯◉◉ +ObservationTensor(1): +◯◉◯◯◯◯ ◯◯◯◯◯◯ ◉◯◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◉◯◯◯ ◉◉◯◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◉◯◯ ◉◉◉◯◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287] +StringLegalActions() = ["a1s", "a1t", "a1u", "a1v", "a1w", "a1x", "a1y", "a1z", "c1s", "c1t", "c1u", "c1v", "c1w", "c1x", "c1y", "c1z", "d1s", "d1t", "d1u", "d1v", "d1w", "d1x", "d1y", "d1z", "e1s", "e1t", "e1u", "e1v", "e1w", "e1x", "e1y", "e1z", "f1s", "f1t", "f1u", "f1v", "f1w", "f1x", "f1y", "f1z", "a2s", "a2t", "a2u", "a2v", "a2w", "a2x", "a2y", "a2z", "b2s", "b2t", "b2u", "b2v", "b2w", "b2x", "b2y", "b2z", "c2s", "c2t", "c2u", "c2v", "c2w", "c2x", "c2y", "c2z", "d2s", "d2t", "d2u", "d2v", "d2w", "d2x", "d2y", "d2z", "e2s", "e2t", "e2u", "e2v", "e2w", "e2x", "e2y", "e2z", "f2s", "f2t", "f2u", "f2v", "f2w", "f2x", "f2y", "f2z", "a3s", "a3t", "a3u", "a3v", "a3w", "a3x", "a3y", "a3z", "b3s", "b3t", "b3u", "b3v", "b3w", "b3x", "b3y", "b3z", "c3s", "c3t", "c3u", "c3v", "c3w", "c3x", "c3y", "c3z", "d3s", "d3t", "d3u", "d3v", "d3w", "d3x", "d3y", "d3z", "e3s", "e3t", "e3u", "e3v", "e3w", "e3x", "e3y", "e3z", "f3s", "f3t", "f3u", "f3v", "f3w", "f3x", "f3y", "f3z", "a4s", "a4t", "a4u", "a4v", "a4w", "a4x", "a4y", "a4z", "b4s", "b4t", "b4u", "b4v", "b4w", "b4x", "b4y", "b4z", "d4s", "d4t", "d4u", "d4v", "d4w", "d4x", "d4y", "d4z", "e4s", "e4t", "e4u", "e4v", "e4w", "e4x", "e4y", "e4z", "f4s", "f4t", "f4u", "f4v", "f4w", "f4x", "f4y", "f4z", "a5s", "a5t", "a5u", "a5v", "a5w", "a5x", "a5y", "a5z", "b5s", "b5t", "b5u", "b5v", "b5w", "b5x", "b5y", "b5z", "c5s", "c5t", "c5u", "c5v", "c5w", "c5x", "c5y", "c5z", "d5s", "d5t", "d5u", "d5v", "d5w", "d5x", "d5y", "d5z", "e5s", "e5t", "e5u", "e5v", "e5w", "e5x", "e5y", "e5z", "f5s", "f5t", "f5u", "f5v", "f5w", "f5x", "f5y", "f5z", "a6s", "a6t", "a6u", "a6v", "a6w", "a6x", "a6y", "a6z", "b6s", "b6t", "b6u", "b6v", "b6w", "b6x", "b6y", "b6z", "c6s", "c6t", "c6u", "c6v", "c6w", "c6x", "c6y", "c6z", "e6s", "e6t", "e6u", "e6v", "e6w", "e6x", "e6y", "e6z", "f6s", "f6t", "f6u", "f6v", "f6w", "f6x", "f6y", "f6z"] + +# Apply action "b2s" +action: 56 + +# State 4 +# > t u < +# a b c d e f +# v 1 . . . . . . v +# s 2 @ @ . . . . v +# 3 . . . . . . +# 4 . . O . . . +# z 5 . . . . . . w +# ^ 6 . . . O . . ^ +# > y x < +IsTerminal() = False +History() = [256, 10, 270, 56] +HistoryString() = "256, 10, 270, 56" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "256, 10, 270, 56" +InformationStateString(1) = "256, 10, 270, 56" +ObservationString(0) = " > t u <\n a b c d e f\nv 1 . . . . . . v\ns 2 @ @ . . . . v\n 3 . . . . . . \n 4 . . O . . . \nz 5 . . . . . . w\n^ 6 . . . O . . ^\n > y x <\n" +ObservationString(1) = " > t u <\n a b c d e f\nv 1 . . . . . . v\ns 2 @ @ . . . . v\n 3 . . . . . . \n 4 . . O . . . \nz 5 . . . . . . w\n^ 6 . . . O . . ^\n > y x <\n" +ObservationTensor(0): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◉◉◯◯◯◯ ◯◯◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◉◯◯◯ ◯◯◯◯◯◯ ◉◉◯◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◉◯◯ ◯◯◯◯◯◯ ◉◉◉◯◉◉ +ObservationTensor(1): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◉◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◉◯◯◯ ◉◉◯◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◉◯◯ ◉◉◉◯◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287] +StringLegalActions() = ["a1s", "a1t", "a1u", "a1v", "a1w", "a1x", "a1y", "a1z", "b1s", "b1t", "b1u", "b1v", "b1w", "b1x", "b1y", "b1z", "c1s", "c1t", "c1u", "c1v", "c1w", "c1x", "c1y", "c1z", "d1s", "d1t", "d1u", "d1v", "d1w", "d1x", "d1y", "d1z", "e1s", "e1t", "e1u", "e1v", "e1w", "e1x", "e1y", "e1z", "f1s", "f1t", "f1u", "f1v", "f1w", "f1x", "f1y", "f1z", "c2s", "c2t", "c2u", "c2v", "c2w", "c2x", "c2y", "c2z", "d2s", "d2t", "d2u", "d2v", "d2w", "d2x", "d2y", "d2z", "e2s", "e2t", "e2u", "e2v", "e2w", "e2x", "e2y", "e2z", "f2s", "f2t", "f2u", "f2v", "f2w", "f2x", "f2y", "f2z", "a3s", "a3t", "a3u", "a3v", "a3w", "a3x", "a3y", "a3z", "b3s", "b3t", "b3u", "b3v", "b3w", "b3x", "b3y", "b3z", "c3s", "c3t", "c3u", "c3v", "c3w", "c3x", "c3y", "c3z", "d3s", "d3t", "d3u", "d3v", "d3w", "d3x", "d3y", "d3z", "e3s", "e3t", "e3u", "e3v", "e3w", "e3x", "e3y", "e3z", "f3s", "f3t", "f3u", "f3v", "f3w", "f3x", "f3y", "f3z", "a4s", "a4t", "a4u", "a4v", "a4w", "a4x", "a4y", "a4z", "b4s", "b4t", "b4u", "b4v", "b4w", "b4x", "b4y", "b4z", "d4s", "d4t", "d4u", "d4v", "d4w", "d4x", "d4y", "d4z", "e4s", "e4t", "e4u", "e4v", "e4w", "e4x", "e4y", "e4z", "f4s", "f4t", "f4u", "f4v", "f4w", "f4x", "f4y", "f4z", "a5s", "a5t", "a5u", "a5v", "a5w", "a5x", "a5y", "a5z", "b5s", "b5t", "b5u", "b5v", "b5w", "b5x", "b5y", "b5z", "c5s", "c5t", "c5u", "c5v", "c5w", "c5x", "c5y", "c5z", "d5s", "d5t", "d5u", "d5v", "d5w", "d5x", "d5y", "d5z", "e5s", "e5t", "e5u", "e5v", "e5w", "e5x", "e5y", "e5z", "f5s", "f5t", "f5u", "f5v", "f5w", "f5x", "f5y", "f5z", "a6s", "a6t", "a6u", "a6v", "a6w", "a6x", "a6y", "a6z", "b6s", "b6t", "b6u", "b6v", "b6w", "b6x", "b6y", "b6z", "c6s", "c6t", "c6u", "c6v", "c6w", "c6x", "c6y", "c6z", "e6s", "e6t", "e6u", "e6v", "e6w", "e6x", "e6y", "e6z", "f6s", "f6t", "f6u", "f6v", "f6w", "f6x", "f6y", "f6z"] + +# Apply action "a3t" +action: 97 + +# State 5 +# > t u < +# a b c d e f +# v 1 O @ . . . . v +# s 2 . @ . . . . v +# 3 . . . . . . +# 4 . . O . . . +# z 5 . . . . . . w +# ^ 6 . . . O . . ^ +# > y x < +IsTerminal() = False +History() = [256, 10, 270, 56, 97] +HistoryString() = "256, 10, 270, 56, 97" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "256, 10, 270, 56, 97" +InformationStateString(1) = "256, 10, 270, 56, 97" +ObservationString(0) = " > t u <\n a b c d e f\nv 1 O @ . . . . v\ns 2 . @ . . . . v\n 3 . . . . . . \n 4 . . O . . . \nz 5 . . . . . . w\n^ 6 . . . O . . ^\n > y x <\n" +ObservationString(1) = " > t u <\n a b c d e f\nv 1 O @ . . . . v\ns 2 . @ . . . . v\n 3 . . . . . . \n 4 . . O . . . \nz 5 . . . . . . w\n^ 6 . . . O . . ^\n > y x <\n" +ObservationTensor(0): +◉◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◉◉◉◉ +◯◯◯◯◯◯ ◯◉◯◯◯◯ ◉◯◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◉◯◯◯ ◯◯◯◯◯◯ ◉◉◯◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◉◯◯ ◯◯◯◯◯◯ ◉◉◉◯◉◉ +ObservationTensor(1): +◯◉◯◯◯◯ ◉◯◯◯◯◯ ◯◯◉◉◉◉ +◯◉◯◯◯◯ ◯◯◯◯◯◯ ◉◯◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◉◯◯◯ ◉◉◯◉◉◉ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◉◉◉◉ +◯◯◯◯◯◯ ◯◯◯◉◯◯ ◉◉◉◯◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287] +StringLegalActions() = ["c1s", "c1t", "c1u", "c1v", "c1w", "c1x", "c1y", "c1z", "d1s", "d1t", "d1u", "d1v", "d1w", "d1x", "d1y", "d1z", "e1s", "e1t", "e1u", "e1v", "e1w", "e1x", "e1y", "e1z", "f1s", "f1t", "f1u", "f1v", "f1w", "f1x", "f1y", "f1z", "a2s", "a2t", "a2u", "a2v", "a2w", "a2x", "a2y", "a2z", "c2s", "c2t", "c2u", "c2v", "c2w", "c2x", "c2y", "c2z", "d2s", "d2t", "d2u", "d2v", "d2w", "d2x", "d2y", "d2z", "e2s", "e2t", "e2u", "e2v", "e2w", "e2x", "e2y", "e2z", "f2s", "f2t", "f2u", "f2v", "f2w", "f2x", "f2y", "f2z", "a3s", "a3t", "a3u", "a3v", "a3w", "a3x", "a3y", "a3z", "b3s", "b3t", "b3u", "b3v", "b3w", "b3x", "b3y", "b3z", "c3s", "c3t", "c3u", "c3v", "c3w", "c3x", "c3y", "c3z", "d3s", "d3t", "d3u", "d3v", "d3w", "d3x", "d3y", "d3z", "e3s", "e3t", "e3u", "e3v", "e3w", "e3x", "e3y", "e3z", "f3s", "f3t", "f3u", "f3v", "f3w", "f3x", "f3y", "f3z", "a4s", "a4t", "a4u", "a4v", "a4w", "a4x", "a4y", "a4z", "b4s", "b4t", "b4u", "b4v", "b4w", "b4x", "b4y", "b4z", "d4s", "d4t", "d4u", "d4v", "d4w", "d4x", "d4y", "d4z", "e4s", "e4t", "e4u", "e4v", "e4w", "e4x", "e4y", "e4z", "f4s", "f4t", "f4u", "f4v", "f4w", "f4x", "f4y", "f4z", "a5s", "a5t", "a5u", "a5v", "a5w", "a5x", "a5y", "a5z", "b5s", "b5t", "b5u", "b5v", "b5w", "b5x", "b5y", "b5z", "c5s", "c5t", "c5u", "c5v", "c5w", "c5x", "c5y", "c5z", "d5s", "d5t", "d5u", "d5v", "d5w", "d5x", "d5y", "d5z", "e5s", "e5t", "e5u", "e5v", "e5w", "e5x", "e5y", "e5z", "f5s", "f5t", "f5u", "f5v", "f5w", "f5x", "f5y", "f5z", "a6s", "a6t", "a6u", "a6v", "a6w", "a6x", "a6y", "a6z", "b6s", "b6t", "b6u", "b6v", "b6w", "b6x", "b6y", "b6z", "c6s", "c6t", "c6u", "c6v", "c6w", "c6x", "c6y", "c6z", "e6s", "e6t", "e6u", "e6v", "e6w", "e6x", "e6y", "e6z", "f6s", "f6t", "f6u", "f6v", "f6w", "f6x", "f6y", "f6z"] + +# Apply action "b6v" +action: 251 + +# State 6 +# Apply action "f2s" +action: 88 + +# State 7 +# Apply action "d3x" +action: 125 + +# State 8 +# Apply action "b1y" +action: 14 + +# State 9 +# Apply action "f1u" +action: 42 + +# State 10 +# Apply action "a1y" +action: 6 + +# State 11 +# Apply action "c4w" +action: 164 + +# State 12 +# Apply action "d5u" +action: 218 + +# State 13 +# Apply action "e2w" +action: 84 + +# State 14 +# Apply action "d1t" +action: 25 + +# State 15 +# Apply action "e1t" +action: 33 + +# State 16 +# Apply action "f4w" +action: 188 + +# State 17 +# Apply action "a1y" +action: 6 + +# State 18 +# Apply action "a2s" +action: 48 + +# State 19 +# Apply action "d6z" +action: 271 + +# State 20 +# > t u < +# a b c d e f +# v 1 O @ O O @ @ v +# s 2 . @ O O @ . v +# 3 @ O . @ . . +# 4 . @ @ O . O +# z 5 . . . . . O w +# ^ 6 O . . @ . . ^ +# > y x < +IsTerminal() = False +History() = [256, 10, 270, 56, 97, 251, 88, 125, 14, 42, 6, 164, 218, 84, 25, 33, 188, 6, 48, 271] +HistoryString() = "256, 10, 270, 56, 97, 251, 88, 125, 14, 42, 6, 164, 218, 84, 25, 33, 188, 6, 48, 271" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "256, 10, 270, 56, 97, 251, 88, 125, 14, 42, 6, 164, 218, 84, 25, 33, 188, 6, 48, 271" +InformationStateString(1) = "256, 10, 270, 56, 97, 251, 88, 125, 14, 42, 6, 164, 218, 84, 25, 33, 188, 6, 48, 271" +ObservationString(0) = " > t u <\n a b c d e f\nv 1 O @ O O @ @ v\ns 2 . @ O O @ . v\n 3 @ O . @ . . \n 4 . @ @ O . O \nz 5 . . . . . O w\n^ 6 O . . @ . . ^\n > y x <\n" +ObservationString(1) = " > t u <\n a b c d e f\nv 1 O @ O O @ @ v\ns 2 . @ O O @ . v\n 3 @ O . @ . . \n 4 . @ @ O . O \nz 5 . . . . . O w\n^ 6 O . . @ . . ^\n > y x <\n" +ObservationTensor(0): +◉◯◉◉◯◯ ◯◉◯◯◉◉ ◯◯◯◯◯◯ +◯◯◉◉◯◯ ◯◉◯◯◉◯ ◉◯◯◯◯◉ +◯◉◯◯◯◯ ◉◯◯◉◯◯ ◯◯◉◯◉◉ +◯◯◯◉◯◉ ◯◉◉◯◯◯ ◉◯◯◯◉◯ +◯◯◯◯◯◉ ◯◯◯◯◯◯ ◉◉◉◉◉◯ +◉◯◯◯◯◯ ◯◯◯◉◯◯ ◯◉◉◯◉◉ +ObservationTensor(1): +◯◉◯◯◉◉ ◉◯◉◉◯◯ ◯◯◯◯◯◯ +◯◉◯◯◉◯ ◯◯◉◉◯◯ ◉◯◯◯◯◉ +◉◯◯◉◯◯ ◯◉◯◯◯◯ ◯◯◉◯◉◉ +◯◉◉◯◯◯ ◯◯◯◉◯◉ ◉◯◯◯◉◯ +◯◯◯◯◯◯ ◯◯◯◯◯◉ ◉◉◉◉◉◯ +◯◯◯◉◯◯ ◉◯◯◯◯◯ ◯◉◉◯◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [48, 49, 50, 51, 52, 53, 54, 55, 88, 89, 90, 91, 92, 93, 94, 95, 112, 113, 114, 115, 116, 117, 118, 119, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 176, 177, 178, 179, 180, 181, 182, 183, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287] +StringLegalActions() = ["a2s", "a2t", "a2u", "a2v", "a2w", "a2x", "a2y", "a2z", "f2s", "f2t", "f2u", "f2v", "f2w", "f2x", "f2y", "f2z", "c3s", "c3t", "c3u", "c3v", "c3w", "c3x", "c3y", "c3z", "e3s", "e3t", "e3u", "e3v", "e3w", "e3x", "e3y", "e3z", "f3s", "f3t", "f3u", "f3v", "f3w", "f3x", "f3y", "f3z", "a4s", "a4t", "a4u", "a4v", "a4w", "a4x", "a4y", "a4z", "e4s", "e4t", "e4u", "e4v", "e4w", "e4x", "e4y", "e4z", "a5s", "a5t", "a5u", "a5v", "a5w", "a5x", "a5y", "a5z", "b5s", "b5t", "b5u", "b5v", "b5w", "b5x", "b5y", "b5z", "c5s", "c5t", "c5u", "c5v", "c5w", "c5x", "c5y", "c5z", "d5s", "d5t", "d5u", "d5v", "d5w", "d5x", "d5y", "d5z", "e5s", "e5t", "e5u", "e5v", "e5w", "e5x", "e5y", "e5z", "b6s", "b6t", "b6u", "b6v", "b6w", "b6x", "b6y", "b6z", "c6s", "c6t", "c6u", "c6v", "c6w", "c6x", "c6y", "c6z", "e6s", "e6t", "e6u", "e6v", "e6w", "e6x", "e6y", "e6z", "f6s", "f6t", "f6u", "f6v", "f6w", "f6x", "f6y", "f6z"] + +# Apply action "f3u" +action: 138 + +# State 21 +# > t u < +# a b c d e f +# v 1 O @ O @ . O v +# s 2 . @ O @ @ . v +# 3 @ O . O O @ +# 4 . @ @ O . O +# z 5 . . . . . O w +# ^ 6 O . . @ . . ^ +# > y x < +IsTerminal() = False +History() = [256, 10, 270, 56, 97, 251, 88, 125, 14, 42, 6, 164, 218, 84, 25, 33, 188, 6, 48, 271, 138] +HistoryString() = "256, 10, 270, 56, 97, 251, 88, 125, 14, 42, 6, 164, 218, 84, 25, 33, 188, 6, 48, 271, 138" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "256, 10, 270, 56, 97, 251, 88, 125, 14, 42, 6, 164, 218, 84, 25, 33, 188, 6, 48, 271, 138" +InformationStateString(1) = "256, 10, 270, 56, 97, 251, 88, 125, 14, 42, 6, 164, 218, 84, 25, 33, 188, 6, 48, 271, 138" +ObservationString(0) = " > t u <\n a b c d e f\nv 1 O @ O @ . O v\ns 2 . @ O @ @ . v\n 3 @ O . O O @ \n 4 . @ @ O . O \nz 5 . . . . . O w\n^ 6 O . . @ . . ^\n > y x <\n" +ObservationString(1) = " > t u <\n a b c d e f\nv 1 O @ O @ . O v\ns 2 . @ O @ @ . v\n 3 @ O . O O @ \n 4 . @ @ O . O \nz 5 . . . . . O w\n^ 6 O . . @ . . ^\n > y x <\n" +ObservationTensor(0): +◉◯◉◯◯◉ ◯◉◯◉◯◯ ◯◯◯◯◉◯ +◯◯◉◯◯◯ ◯◉◯◉◉◯ ◉◯◯◯◯◉ +◯◉◯◉◉◯ ◉◯◯◯◯◉ ◯◯◉◯◯◯ +◯◯◯◉◯◉ ◯◉◉◯◯◯ ◉◯◯◯◉◯ +◯◯◯◯◯◉ ◯◯◯◯◯◯ ◉◉◉◉◉◯ +◉◯◯◯◯◯ ◯◯◯◉◯◯ ◯◉◉◯◉◉ +ObservationTensor(1): +◯◉◯◉◯◯ ◉◯◉◯◯◉ ◯◯◯◯◉◯ +◯◉◯◉◉◯ ◯◯◉◯◯◯ ◉◯◯◯◯◉ +◉◯◯◯◯◉ ◯◉◯◉◉◯ ◯◯◉◯◯◯ +◯◉◉◯◯◯ ◯◯◯◉◯◉ ◉◯◯◯◉◯ +◯◯◯◯◯◯ ◯◯◯◯◯◉ ◉◉◉◉◉◯ +◯◯◯◉◯◯ ◉◯◯◯◯◯ ◯◉◉◯◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [32, 33, 34, 35, 36, 37, 38, 39, 48, 49, 50, 51, 52, 53, 54, 55, 88, 89, 90, 91, 92, 93, 94, 95, 112, 113, 114, 115, 116, 117, 118, 119, 144, 145, 146, 147, 148, 149, 150, 151, 176, 177, 178, 179, 180, 181, 182, 183, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287] +StringLegalActions() = ["e1s", "e1t", "e1u", "e1v", "e1w", "e1x", "e1y", "e1z", "a2s", "a2t", "a2u", "a2v", "a2w", "a2x", "a2y", "a2z", "f2s", "f2t", "f2u", "f2v", "f2w", "f2x", "f2y", "f2z", "c3s", "c3t", "c3u", "c3v", "c3w", "c3x", "c3y", "c3z", "a4s", "a4t", "a4u", "a4v", "a4w", "a4x", "a4y", "a4z", "e4s", "e4t", "e4u", "e4v", "e4w", "e4x", "e4y", "e4z", "a5s", "a5t", "a5u", "a5v", "a5w", "a5x", "a5y", "a5z", "b5s", "b5t", "b5u", "b5v", "b5w", "b5x", "b5y", "b5z", "c5s", "c5t", "c5u", "c5v", "c5w", "c5x", "c5y", "c5z", "d5s", "d5t", "d5u", "d5v", "d5w", "d5x", "d5y", "d5z", "e5s", "e5t", "e5u", "e5v", "e5w", "e5x", "e5y", "e5z", "b6s", "b6t", "b6u", "b6v", "b6w", "b6x", "b6y", "b6z", "c6s", "c6t", "c6u", "c6v", "c6w", "c6x", "c6y", "c6z", "e6s", "e6t", "e6u", "e6v", "e6w", "e6x", "e6y", "e6z", "f6s", "f6t", "f6u", "f6v", "f6w", "f6x", "f6y", "f6z"] + +# Apply action "b5w" +action: 204 + +# State 22 +# Apply action "b6x" +action: 253 + +# State 23 +# Apply action "c6y" +action: 262 + +# State 24 +# Apply action "a6y" +action: 246 + +# State 25 +# Apply action "c5u" +action: 210 + +# State 26 +# Apply action "e5s" +action: 224 + +# State 27 +# Apply action "c1u" +action: 18 + +# State 28 +# Apply action "e4u" +action: 178 + +# State 29 +# > t u < +# a b c d e f +# v 1 O O @ O @ @ v +# s 2 @ @ O O @ . v +# 3 O . @ @ . O +# 4 @ O O O O O +# z 5 . @ @ . O O w +# ^ 6 @ @ O @ . . ^ +# > y x < +IsTerminal() = True +History() = [256, 10, 270, 56, 97, 251, 88, 125, 14, 42, 6, 164, 218, 84, 25, 33, 188, 6, 48, 271, 138, 204, 253, 262, 246, 210, 224, 18, 178] +HistoryString() = "256, 10, 270, 56, 97, 251, 88, 125, 14, 42, 6, 164, 218, 84, 25, 33, 188, 6, 48, 271, 138, 204, 253, 262, 246, 210, 224, 18, 178" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "256, 10, 270, 56, 97, 251, 88, 125, 14, 42, 6, 164, 218, 84, 25, 33, 188, 6, 48, 271, 138, 204, 253, 262, 246, 210, 224, 18, 178" +InformationStateString(1) = "256, 10, 270, 56, 97, 251, 88, 125, 14, 42, 6, 164, 218, 84, 25, 33, 188, 6, 48, 271, 138, 204, 253, 262, 246, 210, 224, 18, 178" +ObservationString(0) = " > t u <\n a b c d e f\nv 1 O O @ O @ @ v\ns 2 @ @ O O @ . v\n 3 O . @ @ . O \n 4 @ O O O O O \nz 5 . @ @ . O O w\n^ 6 @ @ O @ . . ^\n > y x <\n" +ObservationString(1) = " > t u <\n a b c d e f\nv 1 O O @ O @ @ v\ns 2 @ @ O O @ . v\n 3 O . @ @ . O \n 4 @ O O O O O \nz 5 . @ @ . O O w\n^ 6 @ @ O @ . . ^\n > y x <\n" +ObservationTensor(0): +◉◉◯◉◯◯ ◯◯◉◯◉◉ ◯◯◯◯◯◯ +◯◯◉◉◯◯ ◉◉◯◯◉◯ ◯◯◯◯◯◉ +◉◯◯◯◯◉ ◯◯◉◉◯◯ ◯◉◯◯◉◯ +◯◉◉◉◉◉ ◉◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◉◉ ◯◉◉◯◯◯ ◉◯◯◉◯◯ +◯◯◉◯◯◯ ◉◉◯◉◯◯ ◯◯◯◯◉◉ +ObservationTensor(1): +◯◯◉◯◉◉ ◉◉◯◉◯◯ ◯◯◯◯◯◯ +◉◉◯◯◉◯ ◯◯◉◉◯◯ ◯◯◯◯◯◉ +◯◯◉◉◯◯ ◉◯◯◯◯◉ ◯◉◯◯◉◯ +◉◯◯◯◯◯ ◯◉◉◉◉◉ ◯◯◯◯◯◯ +◯◉◉◯◯◯ ◯◯◯◯◉◉ ◉◯◯◉◯◯ +◉◉◯◉◯◯ ◯◯◉◯◯◯ ◯◯◯◯◉◉ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/phantom_go.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/phantom_go.txt new file mode 100644 index 0000000..91fda7f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/phantom_go.txt @@ -0,0 +1,3341 @@ +game: phantom_go + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Phantom Go" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["board_size", "handicap", "komi", "max_game_length"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "phantom_go" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 82 +PolicyTensorShape() = [82] +MaxChanceOutcomes() = 0 +GetParameters() = {board_size=9,handicap=0,komi=7.5,max_game_length=324} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [326] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 326 +MaxGameLength() = 324 +ToString() = "phantom_go()" + +# State 0 +# GoState(komi=7.5, to_play=B, history.size()=0, stones_count: w0 b0) +# +# 9 +++++++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 +++++++++ +# 5 +++++++++ +# 4 +++++++++ +# 3 +++++++++ +# 2 +++++++++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Observation white: +# 9 +++++++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 +++++++++ +# 5 +++++++++ +# 4 +++++++++ +# 3 +++++++++ +# 2 +++++++++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Observation black: +# 9 +++++++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 +++++++++ +# 5 +++++++++ +# 4 +++++++++ +# 3 +++++++++ +# 2 +++++++++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 +++++++++\n 8 +++++++++\n 7 +++++++++\n 6 +++++++++\n 5 +++++++++\n 4 +++++++++\n 3 +++++++++\n 2 +++++++++\n 1 +++++++++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 +++++++++\n 8 +++++++++\n 7 +++++++++\n 6 +++++++++\n 5 +++++++++\n 4 +++++++++\n 3 +++++++++\n 2 +++++++++\n 1 +++++++++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0): binvec(326, 0xffffffffffffffffffff8000000000000000000000000000000000000000000000000000000000000) +ObservationTensor(1): binvec(326, 0xffffffffffffffffffff8000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81] +StringLegalActions() = ["B a1", "B b1", "B c1", "B d1", "B e1", "B f1", "B g1", "B h1", "B j1", "B a2", "B b2", "B c2", "B d2", "B e2", "B f2", "B g2", "B h2", "B j2", "B a3", "B b3", "B c3", "B d3", "B e3", "B f3", "B g3", "B h3", "B j3", "B a4", "B b4", "B c4", "B d4", "B e4", "B f4", "B g4", "B h4", "B j4", "B a5", "B b5", "B c5", "B d5", "B e5", "B f5", "B g5", "B h5", "B j5", "B a6", "B b6", "B c6", "B d6", "B e6", "B f6", "B g6", "B h6", "B j6", "B a7", "B b7", "B c7", "B d7", "B e7", "B f7", "B g7", "B h7", "B j7", "B a8", "B b8", "B c8", "B d8", "B e8", "B f8", "B g8", "B h8", "B j8", "B a9", "B b9", "B c9", "B d9", "B e9", "B f9", "B g9", "B h9", "B j9", "B PASS"] + +# Apply action "B a4" +action: 27 + +# State 1 +# GoState(komi=7.5, to_play=W, history.size()=1, stones_count: w0 b1) +# +# 9 +++++++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 +++++++++ +# 5 +++++++++ +# 4 X++++++++ +# 3 +++++++++ +# 2 +++++++++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Observation white: +# 9 +++++++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 +++++++++ +# 5 +++++++++ +# 4 +++++++++ +# 3 +++++++++ +# 2 +++++++++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Observation black: +# 9 +++++++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 +++++++++ +# 5 +++++++++ +# 4 X++++++++ +# 3 +++++++++ +# 2 +++++++++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27] +HistoryString() = "27" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = " 9 +++++++++\n 8 +++++++++\n 7 +++++++++\n 6 +++++++++\n 5 +++++++++\n 4 X++++++++\n 3 +++++++++\n 2 +++++++++\n 1 +++++++++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 +++++++++\n 8 +++++++++\n 7 +++++++++\n 6 +++++++++\n 5 +++++++++\n 4 +++++++++\n 3 +++++++++\n 2 +++++++++\n 1 +++++++++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0): binvec(326, 0x2ffffffefffffffffffff80000000000000000000000000040000000000001ffffffffffffffffffff) +ObservationTensor(1): binvec(326, 0x2ffffffffffffffffffff80000000000000000000000000000000000000001ffffffffffffffffffff) +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81] +StringLegalActions() = ["W a1", "W b1", "W c1", "W d1", "W e1", "W f1", "W g1", "W h1", "W j1", "W a2", "W b2", "W c2", "W d2", "W e2", "W f2", "W g2", "W h2", "W j2", "W a3", "W b3", "W c3", "W d3", "W e3", "W f3", "W g3", "W h3", "W j3", "W a4", "W b4", "W c4", "W d4", "W e4", "W f4", "W g4", "W h4", "W j4", "W a5", "W b5", "W c5", "W d5", "W e5", "W f5", "W g5", "W h5", "W j5", "W a6", "W b6", "W c6", "W d6", "W e6", "W f6", "W g6", "W h6", "W j6", "W a7", "W b7", "W c7", "W d7", "W e7", "W f7", "W g7", "W h7", "W j7", "W a8", "W b8", "W c8", "W d8", "W e8", "W f8", "W g8", "W h8", "W j8", "W a9", "W b9", "W c9", "W d9", "W e9", "W f9", "W g9", "W h9", "W j9", "W PASS"] + +# Apply action "W e9" +action: 76 + +# State 2 +# GoState(komi=7.5, to_play=B, history.size()=2, stones_count: w1 b1) +# +# 9 ++++O++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 +++++++++ +# 5 +++++++++ +# 4 X++++++++ +# 3 +++++++++ +# 2 +++++++++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Observation white: +# 9 ++++O++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 +++++++++ +# 5 +++++++++ +# 4 +++++++++ +# 3 +++++++++ +# 2 +++++++++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Observation black: +# 9 +++++++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 +++++++++ +# 5 +++++++++ +# 4 X++++++++ +# 3 +++++++++ +# 2 +++++++++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27, 76] +HistoryString() = "27, 76" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 +++++++++\n 8 +++++++++\n 7 +++++++++\n 6 +++++++++\n 5 +++++++++\n 4 X++++++++\n 3 +++++++++\n 2 +++++++++\n 1 +++++++++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 ++++O++++\n 8 +++++++++\n 7 +++++++++\n 6 +++++++++\n 5 +++++++++\n 4 +++++++++\n 3 +++++++++\n 2 +++++++++\n 1 +++++++++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0): binvec(326, 0x3ffffffefffffffffffff8000000000000000000000000004000000000000000000000000000000000) +ObservationTensor(1): binvec(326, 0x3fffffffffffffffffff78000000000000000000400000000000000000000000000000000000000000) +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81] +StringLegalActions() = ["B a1", "B b1", "B c1", "B d1", "B e1", "B f1", "B g1", "B h1", "B j1", "B a2", "B b2", "B c2", "B d2", "B e2", "B f2", "B g2", "B h2", "B j2", "B a3", "B b3", "B c3", "B d3", "B e3", "B f3", "B g3", "B h3", "B j3", "B b4", "B c4", "B d4", "B e4", "B f4", "B g4", "B h4", "B j4", "B a5", "B b5", "B c5", "B d5", "B e5", "B f5", "B g5", "B h5", "B j5", "B a6", "B b6", "B c6", "B d6", "B e6", "B f6", "B g6", "B h6", "B j6", "B a7", "B b7", "B c7", "B d7", "B e7", "B f7", "B g7", "B h7", "B j7", "B a8", "B b8", "B c8", "B d8", "B e8", "B f8", "B g8", "B h8", "B j8", "B a9", "B b9", "B c9", "B d9", "B e9", "B f9", "B g9", "B h9", "B j9", "B PASS"] + +# Apply action "B a6" +action: 45 + +# State 3 +# GoState(komi=7.5, to_play=W, history.size()=3, stones_count: w1 b2) +# +# 9 ++++O++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 X++++++++ +# 5 +++++++++ +# 4 X++++++++ +# 3 +++++++++ +# 2 +++++++++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Observation white: +# 9 ++++O++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 +++++++++ +# 5 +++++++++ +# 4 +++++++++ +# 3 +++++++++ +# 2 +++++++++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Observation black: +# 9 +++++++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 X++++++++ +# 5 +++++++++ +# 4 X++++++++ +# 3 +++++++++ +# 2 +++++++++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27, 76, 45] +HistoryString() = "27, 76, 45" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = " 9 +++++++++\n 8 +++++++++\n 7 +++++++++\n 6 X++++++++\n 5 +++++++++\n 4 X++++++++\n 3 +++++++++\n 2 +++++++++\n 1 +++++++++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 ++++O++++\n 8 +++++++++\n 7 +++++++++\n 6 +++++++++\n 5 +++++++++\n 4 +++++++++\n 3 +++++++++\n 2 +++++++++\n 1 +++++++++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0) = [2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81] +StringLegalActions() = ["W a1", "W b1", "W c1", "W d1", "W e1", "W f1", "W g1", "W h1", "W j1", "W a2", "W b2", "W c2", "W d2", "W e2", "W f2", "W g2", "W h2", "W j2", "W a3", "W b3", "W c3", "W d3", "W e3", "W f3", "W g3", "W h3", "W j3", "W a4", "W b4", "W c4", "W d4", "W e4", "W f4", "W g4", "W h4", "W j4", "W a5", "W b5", "W c5", "W d5", "W e5", "W f5", "W g5", "W h5", "W j5", "W a6", "W b6", "W c6", "W d6", "W e6", "W f6", "W g6", "W h6", "W j6", "W a7", "W b7", "W c7", "W d7", "W e7", "W f7", "W g7", "W h7", "W j7", "W a8", "W b8", "W c8", "W d8", "W e8", "W f8", "W g8", "W h8", "W j8", "W a9", "W b9", "W c9", "W d9", "W f9", "W g9", "W h9", "W j9", "W PASS"] + +# Apply action "W f2" +action: 14 + +# State 4 +# GoState(komi=7.5, to_play=B, history.size()=4, stones_count: w2 b2) +# +# 9 ++++O++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 X++++++++ +# 5 +++++++++ +# 4 X++++++++ +# 3 +++++++++ +# 2 +++++O+++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Observation white: +# 9 ++++O++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 +++++++++ +# 5 +++++++++ +# 4 +++++++++ +# 3 +++++++++ +# 2 +++++O+++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Observation black: +# 9 +++++++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 X++++++++ +# 5 +++++++++ +# 4 X++++++++ +# 3 +++++++++ +# 2 +++++++++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27, 76, 45, 14] +HistoryString() = "27, 76, 45, 14" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 +++++++++\n 8 +++++++++\n 7 +++++++++\n 6 X++++++++\n 5 +++++++++\n 4 X++++++++\n 3 +++++++++\n 2 +++++++++\n 1 +++++++++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 ++++O++++\n 8 +++++++++\n 7 +++++++++\n 6 +++++++++\n 5 +++++++++\n 4 +++++++++\n 3 +++++++++\n 2 +++++O+++\n 1 +++++++++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0) = [2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81] +StringLegalActions() = ["B a1", "B b1", "B c1", "B d1", "B e1", "B f1", "B g1", "B h1", "B j1", "B a2", "B b2", "B c2", "B d2", "B e2", "B f2", "B g2", "B h2", "B j2", "B a3", "B b3", "B c3", "B d3", "B e3", "B f3", "B g3", "B h3", "B j3", "B b4", "B c4", "B d4", "B e4", "B f4", "B g4", "B h4", "B j4", "B a5", "B b5", "B c5", "B d5", "B e5", "B f5", "B g5", "B h5", "B j5", "B b6", "B c6", "B d6", "B e6", "B f6", "B g6", "B h6", "B j6", "B a7", "B b7", "B c7", "B d7", "B e7", "B f7", "B g7", "B h7", "B j7", "B a8", "B b8", "B c8", "B d8", "B e8", "B f8", "B g8", "B h8", "B j8", "B a9", "B b9", "B c9", "B d9", "B e9", "B f9", "B g9", "B h9", "B j9", "B PASS"] + +# Apply action "B d5" +action: 39 + +# State 5 +# GoState(komi=7.5, to_play=W, history.size()=5, stones_count: w2 b3) +# +# 9 ++++O++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 X++++++++ +# 5 +++X+++++ +# 4 X++++++++ +# 3 +++++++++ +# 2 +++++O+++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Observation white: +# 9 ++++O++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 +++++++++ +# 5 +++++++++ +# 4 +++++++++ +# 3 +++++++++ +# 2 +++++O+++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Observation black: +# 9 +++++++++ +# 8 +++++++++ +# 7 +++++++++ +# 6 X++++++++ +# 5 +++X+++++ +# 4 X++++++++ +# 3 +++++++++ +# 2 +++++++++ +# 1 +++++++++ +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27, 76, 45, 14, 39] +HistoryString() = "27, 76, 45, 14, 39" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = " 9 +++++++++\n 8 +++++++++\n 7 +++++++++\n 6 X++++++++\n 5 +++X+++++\n 4 X++++++++\n 3 +++++++++\n 2 +++++++++\n 1 +++++++++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 ++++O++++\n 8 +++++++++\n 7 +++++++++\n 6 +++++++++\n 5 +++++++++\n 4 +++++++++\n 3 +++++++++\n 2 +++++O+++\n 1 +++++++++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0) = [3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81] +StringLegalActions() = ["W a1", "W b1", "W c1", "W d1", "W e1", "W f1", "W g1", "W h1", "W j1", "W a2", "W b2", "W c2", "W d2", "W e2", "W g2", "W h2", "W j2", "W a3", "W b3", "W c3", "W d3", "W e3", "W f3", "W g3", "W h3", "W j3", "W a4", "W b4", "W c4", "W d4", "W e4", "W f4", "W g4", "W h4", "W j4", "W a5", "W b5", "W c5", "W d5", "W e5", "W f5", "W g5", "W h5", "W j5", "W a6", "W b6", "W c6", "W d6", "W e6", "W f6", "W g6", "W h6", "W j6", "W a7", "W b7", "W c7", "W d7", "W e7", "W f7", "W g7", "W h7", "W j7", "W a8", "W b8", "W c8", "W d8", "W e8", "W f8", "W g8", "W h8", "W j8", "W a9", "W b9", "W c9", "W d9", "W f9", "W g9", "W h9", "W j9", "W PASS"] + +# Apply action "W b3" +action: 19 + +# State 6 +# Apply action "B e9" +action: 76 + +# State 7 +# Apply action "B d1" +action: 3 + +# State 8 +# Apply action "W b6" +action: 46 + +# State 9 +# Apply action "B c4" +action: 29 + +# State 10 +# Apply action "W a1" +action: 0 + +# State 11 +# Apply action "B e3" +action: 22 + +# State 12 +# Apply action "W e3" +action: 22 + +# State 13 +# Apply action "W g8" +action: 69 + +# State 14 +# Apply action "B a8" +action: 63 + +# State 15 +# Apply action "W e8" +action: 67 + +# State 16 +# Apply action "B g4" +action: 33 + +# State 17 +# Apply action "W f6" +action: 50 + +# State 18 +# Apply action "B PASS" +action: 81 + +# State 19 +# Apply action "W d3" +action: 21 + +# State 20 +# GoState(komi=7.5, to_play=B, history.size()=20, stones_count: w9 b8) +# +# 9 ++++O++++ +# 8 X+++O+O++ +# 7 +++++++++ +# 6 XO+++O+++ +# 5 +++X+++++ +# 4 X+X+++X++ +# 3 +O+OX++++ +# 2 +++++O+++ +# 1 O++X+++++ +# ABCDEFGHJ +# +# Observation white: +# 9 ++++O++++ +# 8 ++++O+O++ +# 7 +++++++++ +# 6 +O+++O+++ +# 5 +++++++++ +# 4 +++++++++ +# 3 +O+OX++++ +# 2 +++++O+++ +# 1 O++++++++ +# ABCDEFGHJ +# +# Observation black: +# 9 ++++O++++ +# 8 X++++++++ +# 7 +++++++++ +# 6 X++++++++ +# 5 +++X+++++ +# 4 X+X+++X++ +# 3 ++++X++++ +# 2 +++++++++ +# 1 +++X+++++ +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 ++++O++++\n 8 X++++++++\n 7 +++++++++\n 6 X++++++++\n 5 +++X+++++\n 4 X+X+++X++\n 3 ++++X++++\n 2 +++++++++\n 1 +++X+++++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 ++++O++++\n 8 ++++O+O++\n 7 +++++++++\n 6 +O+++O+++\n 5 +++++++++\n 4 +++++++++\n 3 +O+OX++++\n 2 +++++O+++\n 1 O++++++++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0) = [8.0, 9.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [8.0, 9.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 28, 30, 31, 32, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81] +StringLegalActions() = ["B a1", "B b1", "B c1", "B e1", "B f1", "B g1", "B h1", "B j1", "B a2", "B b2", "B c2", "B d2", "B e2", "B f2", "B g2", "B h2", "B j2", "B a3", "B b3", "B c3", "B d3", "B f3", "B g3", "B h3", "B j3", "B b4", "B d4", "B e4", "B f4", "B h4", "B j4", "B a5", "B b5", "B c5", "B e5", "B f5", "B g5", "B h5", "B j5", "B b6", "B c6", "B d6", "B e6", "B f6", "B g6", "B h6", "B j6", "B a7", "B b7", "B c7", "B d7", "B e7", "B f7", "B g7", "B h7", "B j7", "B b8", "B c8", "B d8", "B e8", "B f8", "B g8", "B h8", "B j8", "B a9", "B b9", "B c9", "B d9", "B f9", "B g9", "B h9", "B j9", "B PASS"] + +# Apply action "B h4" +action: 34 + +# State 21 +# GoState(komi=7.5, to_play=W, history.size()=21, stones_count: w9 b9) +# +# 9 ++++O++++ +# 8 X+++O+O++ +# 7 +++++++++ +# 6 XO+++O+++ +# 5 +++X+++++ +# 4 X+X+++XX+ +# 3 +O+OX++++ +# 2 +++++O+++ +# 1 O++X+++++ +# ABCDEFGHJ +# +# Observation white: +# 9 ++++O++++ +# 8 ++++O+O++ +# 7 +++++++++ +# 6 +O+++O+++ +# 5 +++++++++ +# 4 +++++++++ +# 3 +O+OX++++ +# 2 +++++O+++ +# 1 O++++++++ +# ABCDEFGHJ +# +# Observation black: +# 9 ++++O++++ +# 8 X++++++++ +# 7 +++++++++ +# 6 X++++++++ +# 5 +++X+++++ +# 4 X+X+++XX+ +# 3 ++++X++++ +# 2 +++++++++ +# 1 +++X+++++ +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = " 9 ++++O++++\n 8 X++++++++\n 7 +++++++++\n 6 X++++++++\n 5 +++X+++++\n 4 X+X+++XX+\n 3 ++++X++++\n 2 +++++++++\n 1 +++X+++++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 ++++O++++\n 8 ++++O+O++\n 7 +++++++++\n 6 +O+++O+++\n 5 +++++++++\n 4 +++++++++\n 3 +O+OX++++\n 2 +++++O+++\n 1 O++++++++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0) = [9.0, 9.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [9.0, 9.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 20, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 68, 70, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81] +StringLegalActions() = ["W b1", "W c1", "W d1", "W e1", "W f1", "W g1", "W h1", "W j1", "W a2", "W b2", "W c2", "W d2", "W e2", "W g2", "W h2", "W j2", "W a3", "W c3", "W f3", "W g3", "W h3", "W j3", "W a4", "W b4", "W c4", "W d4", "W e4", "W f4", "W g4", "W h4", "W j4", "W a5", "W b5", "W c5", "W d5", "W e5", "W f5", "W g5", "W h5", "W j5", "W a6", "W c6", "W d6", "W e6", "W g6", "W h6", "W j6", "W a7", "W b7", "W c7", "W d7", "W e7", "W f7", "W g7", "W h7", "W j7", "W a8", "W b8", "W c8", "W d8", "W f8", "W h8", "W j8", "W a9", "W b9", "W c9", "W d9", "W f9", "W g9", "W h9", "W j9", "W PASS"] + +# Apply action "W a7" +action: 54 + +# State 22 +# Apply action "B a1" +action: 0 + +# State 23 +# Apply action "B g1" +action: 6 + +# State 24 +# Apply action "W c4" +action: 29 + +# State 25 +# Apply action "W a5" +action: 36 + +# State 26 +# Apply action "B b2" +action: 10 + +# State 27 +# Apply action "W h7" +action: 61 + +# State 28 +# Apply action "B e7" +action: 58 + +# State 29 +# Apply action "W f9" +action: 77 + +# State 30 +# Apply action "B f8" +action: 68 + +# State 31 +# Apply action "W g5" +action: 42 + +# State 32 +# Apply action "B e4" +action: 31 + +# State 33 +# Apply action "W d1" +action: 3 + +# State 34 +# Apply action "W d5" +action: 39 + +# State 35 +# Apply action "W h6" +action: 52 + +# State 36 +# Apply action "B c7" +action: 56 + +# State 37 +# Apply action "W d9" +action: 75 + +# State 38 +# Apply action "B g9" +action: 78 + +# State 39 +# GoState(komi=7.5, to_play=W, history.size()=39, stones_count: w16 b15) +# +# 9 +++OOOX++ +# 8 X+++OXO++ +# 7 O+X+X++O+ +# 6 +O+++O+O+ +# 5 O++X++O++ +# 4 X+X+X+XX+ +# 3 +O+OX++++ +# 2 +X+++O+++ +# 1 O++X++X++ +# ABCDEFGHJ +# +# Observation white: +# 9 +++OOO+++ +# 8 ++++O+O++ +# 7 O++++++O+ +# 6 +O+++O+O+ +# 5 O++X++O++ +# 4 ++X++++++ +# 3 +O+OX++++ +# 2 +++++O+++ +# 1 O++X+++++ +# ABCDEFGHJ +# +# Observation black: +# 9 ++++O+X++ +# 8 X++++X+++ +# 7 ++X+X++++ +# 6 +++++++++ +# 5 +++X+++++ +# 4 X+X+X+XX+ +# 3 ++++X++++ +# 2 +X+++++++ +# 1 O++X++X++ +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = " 9 ++++O+X++\n 8 X++++X+++\n 7 ++X+X++++\n 6 +++++++++\n 5 +++X+++++\n 4 X+X+X+XX+\n 3 ++++X++++\n 2 +X+++++++\n 1 O++X++X++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 +++OOO+++\n 8 ++++O+O++\n 7 O++++++O+\n 6 +O+++O+O+\n 5 O++X++O++\n 4 ++X++++++\n 3 +O+OX++++\n 2 +++++O+++\n 1 O++X+++++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0) = [15.0, 16.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [15.0, 16.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 20, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 37, 38, 40, 41, 43, 44, 45, 47, 48, 49, 51, 53, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 68, 70, 71, 72, 73, 74, 78, 79, 80, 81] +StringLegalActions() = ["W b1", "W c1", "W e1", "W f1", "W g1", "W h1", "W j1", "W a2", "W b2", "W c2", "W d2", "W e2", "W g2", "W h2", "W j2", "W a3", "W c3", "W f3", "W g3", "W h3", "W j3", "W a4", "W b4", "W d4", "W e4", "W f4", "W g4", "W h4", "W j4", "W b5", "W c5", "W e5", "W f5", "W h5", "W j5", "W a6", "W c6", "W d6", "W e6", "W g6", "W j6", "W b7", "W c7", "W d7", "W e7", "W f7", "W g7", "W j7", "W a8", "W b8", "W c8", "W d8", "W f8", "W h8", "W j8", "W a9", "W b9", "W c9", "W g9", "W h9", "W j9", "W PASS"] + +# Apply action "W g1" +action: 6 + +# State 40 +# Apply action "W f1" +action: 5 + +# State 41 +# Apply action "B d6" +action: 48 + +# State 42 +# Apply action "W c5" +action: 38 + +# State 43 +# GoState(komi=7.5, to_play=B, history.size()=43, stones_count: w18 b16) +# +# 9 +++OOOX++ +# 8 X+++OXO++ +# 7 O+X+X++O+ +# 6 +O+X+O+O+ +# 5 O+OX++O++ +# 4 X+X+X+XX+ +# 3 +O+OX++++ +# 2 +X+++O+++ +# 1 O++X+OX++ +# ABCDEFGHJ +# +# Observation white: +# 9 +++OOO+++ +# 8 ++++O+O++ +# 7 O++++++O+ +# 6 +O+++O+O+ +# 5 O+OX++O++ +# 4 ++X++++++ +# 3 +O+OX++++ +# 2 +++++O+++ +# 1 O++X+OX++ +# ABCDEFGHJ +# +# Observation black: +# 9 ++++O+X++ +# 8 X++++X+++ +# 7 ++X+X++++ +# 6 +++X+++++ +# 5 +++X+++++ +# 4 X+X+X+XX+ +# 3 ++++X++++ +# 2 +X+++++++ +# 1 O++X++X++ +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 ++++O+X++\n 8 X++++X+++\n 7 ++X+X++++\n 6 +++X+++++\n 5 +++X+++++\n 4 X+X+X+XX+\n 3 ++++X++++\n 2 +X+++++++\n 1 O++X++X++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 +++OOO+++\n 8 ++++O+O++\n 7 O++++++O+\n 6 +O+++O+O+\n 5 O+OX++O++\n 4 ++X++++++\n 3 +O+OX++++\n 2 +++++O+++\n 1 O++X+OX++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0) = [16.0, 18.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [16.0, 18.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 4, 5, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 28, 30, 32, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 57, 59, 60, 61, 62, 64, 65, 66, 67, 69, 70, 71, 72, 73, 74, 75, 77, 79, 80, 81] +StringLegalActions() = ["B b1", "B c1", "B e1", "B f1", "B h1", "B j1", "B a2", "B c2", "B d2", "B e2", "B f2", "B g2", "B h2", "B j2", "B a3", "B b3", "B c3", "B d3", "B f3", "B g3", "B h3", "B j3", "B b4", "B d4", "B f4", "B j4", "B a5", "B b5", "B c5", "B e5", "B f5", "B g5", "B h5", "B j5", "B a6", "B b6", "B c6", "B e6", "B f6", "B g6", "B h6", "B j6", "B a7", "B b7", "B d7", "B f7", "B g7", "B h7", "B j7", "B b8", "B c8", "B d8", "B e8", "B g8", "B h8", "B j8", "B a9", "B b9", "B c9", "B d9", "B f9", "B h9", "B j9", "B PASS"] + +# Apply action "B a5" +action: 36 + +# State 44 +# Apply action "B e5" +action: 40 + +# State 45 +# Apply action "W b9" +action: 73 + +# State 46 +# Apply action "B e1" +action: 4 + +# State 47 +# Apply action "W a8" +action: 63 + +# State 48 +# Apply action "W h8" +action: 70 + +# State 49 +# Apply action "B c9" +action: 74 + +# State 50 +# Apply action "W j6" +action: 53 + +# State 51 +# Apply action "B a3" +action: 18 + +# State 52 +# Apply action "W j7" +action: 62 + +# State 53 +# Apply action "B a9" +action: 72 + +# State 54 +# Apply action "W b2" +action: 10 + +# State 55 +# Apply action "W j1" +action: 8 + +# State 56 +# Apply action "B c6" +action: 47 + +# State 57 +# GoState(komi=7.5, to_play=W, history.size()=57, stones_count: w23 b22) +# +# 9 XOXOOOX++ +# 8 X+++OXOO+ +# 7 O+X+X++OO +# 6 +OXX+O+OO +# 5 O+OXX+O++ +# 4 X+X+X+XX+ +# 3 XO+OX++++ +# 2 +X+++O+++ +# 1 O++XXOX+O +# ABCDEFGHJ +# +# Observation white: +# 9 +O+OOO+++ +# 8 X+++O+OO+ +# 7 O++++++OO +# 6 +O+++O+OO +# 5 O+OX++O++ +# 4 ++X++++++ +# 3 +O+OX++++ +# 2 +X+++O+++ +# 1 O++X+OX+O +# ABCDEFGHJ +# +# Observation black: +# 9 X+X+O+X++ +# 8 X++++X+++ +# 7 ++X+X++++ +# 6 ++XX+++++ +# 5 O++XX++++ +# 4 X+X+X+XX+ +# 3 X+++X++++ +# 2 +X+++++++ +# 1 O++XX+X++ +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = " 9 X+X+O+X++\n 8 X++++X+++\n 7 ++X+X++++\n 6 ++XX+++++\n 5 O++XX++++\n 4 X+X+X+XX+\n 3 X+++X++++\n 2 +X+++++++\n 1 O++XX+X++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 +O+OOO+++\n 8 X+++O+OO+\n 7 O++++++OO\n 6 +O+++O+OO\n 5 O+OX++O++\n 4 ++X++++++\n 3 +O+OX++++\n 2 +X+++O+++\n 1 O++X+OX+O\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0) = [22.0, 23.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [22.0, 23.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 4, 7, 9, 11, 12, 13, 15, 16, 17, 18, 20, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 37, 40, 41, 43, 44, 45, 47, 48, 49, 51, 55, 56, 57, 58, 59, 60, 64, 65, 66, 68, 71, 72, 74, 78, 79, 80, 81] +StringLegalActions() = ["W b1", "W c1", "W e1", "W h1", "W a2", "W c2", "W d2", "W e2", "W g2", "W h2", "W j2", "W a3", "W c3", "W f3", "W g3", "W h3", "W j3", "W a4", "W b4", "W d4", "W e4", "W f4", "W g4", "W h4", "W j4", "W b5", "W e5", "W f5", "W h5", "W j5", "W a6", "W c6", "W d6", "W e6", "W g6", "W b7", "W c7", "W d7", "W e7", "W f7", "W g7", "W b8", "W c8", "W d8", "W f8", "W j8", "W a9", "W c9", "W g9", "W h9", "W j9", "W PASS"] + +# Apply action "W h5" +action: 43 + +# State 58 +# Apply action "B g5" +action: 42 + +# State 59 +# Apply action "B b5" +action: 37 + +# State 60 +# Apply action "W c5" +action: 38 + +# State 61 +# Apply action "W b8" +action: 64 + +# State 62 +# Apply action "B c3" +action: 20 + +# State 63 +# Apply action "W b1" +action: 1 + +# State 64 +# GoState(komi=7.5, to_play=B, history.size()=64, stones_count: w25 b22) +# +# 9 +OXOOOX++ +# 8 +O++OXOO+ +# 7 O+X+X++OO +# 6 +OXX+O+OO +# 5 OX+XX+OO+ +# 4 X+X+X+XX+ +# 3 XOXOX++++ +# 2 +X+++O+++ +# 1 OO+XXOX+O +# ABCDEFGHJ +# +# Observation white: +# 9 +O+OOO+++ +# 8 +O++O+OO+ +# 7 O++++++OO +# 6 +O+++O+OO +# 5 O++X++OO+ +# 4 ++X++++++ +# 3 +O+OX++++ +# 2 +X+++O+++ +# 1 OO+X+OX+O +# ABCDEFGHJ +# +# Observation black: +# 9 ++X+O+X++ +# 8 +++++X+++ +# 7 ++X+X++++ +# 6 ++XX+++++ +# 5 OX+XX+O++ +# 4 X+X+X+XX+ +# 3 X+X+X++++ +# 2 +X+++++++ +# 1 O++XX+X++ +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 ++X+O+X++\n 8 +++++X+++\n 7 ++X+X++++\n 6 ++XX+++++\n 5 OX+XX+O++\n 4 X+X+X+XX+\n 3 X+X+X++++\n 2 +X+++++++\n 1 O++XX+X++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 +O+OOO+++\n 8 +O++O+OO+\n 7 O++++++OO\n 6 +O+++O+OO\n 5 O++X++OO+\n 4 ++X++++++\n 3 +O+OX++++\n 2 +X+++O+++\n 1 OO+X+OX+O\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0) = [22.0, 25.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [22.0, 25.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 5, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 19, 21, 23, 24, 25, 26, 28, 30, 32, 35, 38, 41, 43, 44, 45, 46, 49, 50, 51, 52, 53, 54, 55, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73, 75, 77, 79, 80, 81] +StringLegalActions() = ["B b1", "B c1", "B f1", "B h1", "B j1", "B a2", "B c2", "B d2", "B e2", "B f2", "B g2", "B h2", "B j2", "B b3", "B d3", "B f3", "B g3", "B h3", "B j3", "B b4", "B d4", "B f4", "B j4", "B c5", "B f5", "B h5", "B j5", "B a6", "B b6", "B e6", "B f6", "B g6", "B h6", "B j6", "B a7", "B b7", "B d7", "B f7", "B g7", "B h7", "B j7", "B a8", "B b8", "B c8", "B d8", "B e8", "B g8", "B h8", "B j8", "B a9", "B b9", "B d9", "B f9", "B h9", "B j9", "B PASS"] + +# Apply action "B f4" +action: 32 + +# State 65 +# Apply action "W e2" +action: 13 + +# State 66 +# Apply action "B h2" +action: 16 + +# State 67 +# Apply action "W a2" +action: 9 + +# State 68 +# Apply action "B d4" +action: 30 + +# State 69 +# Apply action "W c3" +action: 20 + +# State 70 +# Apply action "W d8" +action: 66 + +# State 71 +# Apply action "B c5" +action: 38 + +# State 72 +# Apply action "W g4" +action: 33 + +# State 73 +# Apply action "W c8" +action: 65 + +# State 74 +# Apply action "B j7" +action: 62 + +# State 75 +# Apply action "B a2" +action: 9 + +# State 76 +# Apply action "B a9" +action: 72 + +# State 77 +# GoState(komi=7.5, to_play=W, history.size()=77, stones_count: w29 b26) +# +# 9 XO+OOOX++ +# 8 +OOOOXOO+ +# 7 O+X+X++OO +# 6 +OXX+O+OO +# 5 OXXXX+OO+ +# 4 X+XXXXXX+ +# 3 XOXOX++++ +# 2 OX++OO+X+ +# 1 OO+XXOX+O +# ABCDEFGHJ +# +# Observation white: +# 9 +O+OOO+++ +# 8 +OOOO+OO+ +# 7 O++++++OO +# 6 +O+++O+OO +# 5 O++X++OO+ +# 4 ++X+++X++ +# 3 +OXOX++++ +# 2 OX++OO+++ +# 1 OO+X+OX+O +# ABCDEFGHJ +# +# Observation black: +# 9 X+++O+X++ +# 8 +++++X+++ +# 7 ++X+X+++O +# 6 ++XX+++++ +# 5 OXXXX+O++ +# 4 X+XXXXXX+ +# 3 X+X+X++++ +# 2 OX+++++X+ +# 1 O++XX+X++ +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = " 9 X+++O+X++\n 8 +++++X+++\n 7 ++X+X+++O\n 6 ++XX+++++\n 5 OXXXX+O++\n 4 X+XXXXXX+\n 3 X+X+X++++\n 2 OX+++++X+\n 1 O++XX+X++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 +O+OOO+++\n 8 +OOOO+OO+\n 7 O++++++OO\n 6 +O+++O+OO\n 5 O++X++OO+\n 4 ++X+++X++\n 3 +OXOX++++\n 2 OX++OO+++\n 1 OO+X+OX+O\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0) = [26.0, 29.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [26.0, 29.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2, 4, 7, 11, 12, 15, 16, 17, 18, 23, 24, 25, 26, 27, 28, 30, 31, 32, 34, 35, 37, 38, 40, 41, 44, 45, 47, 48, 49, 51, 55, 56, 57, 58, 59, 60, 63, 68, 71, 72, 74, 78, 79, 80, 81] +StringLegalActions() = ["W c1", "W e1", "W h1", "W c2", "W d2", "W g2", "W h2", "W j2", "W a3", "W f3", "W g3", "W h3", "W j3", "W a4", "W b4", "W d4", "W e4", "W f4", "W h4", "W j4", "W b5", "W c5", "W e5", "W f5", "W j5", "W a6", "W c6", "W d6", "W e6", "W g6", "W b7", "W c7", "W d7", "W e7", "W f7", "W g7", "W a8", "W f8", "W j8", "W a9", "W c9", "W g9", "W h9", "W j9", "W PASS"] + +# Apply action "W c7" +action: 56 + +# State 78 +# Apply action "W d6" +action: 48 + +# State 79 +# Apply action "W f4" +action: 32 + +# State 80 +# Apply action "W b5" +action: 37 + +# State 81 +# Apply action "W h1" +action: 7 + +# State 82 +# Apply action "B h8" +action: 70 + +# State 83 +# Apply action "B h6" +action: 52 + +# State 84 +# Apply action "B h5" +action: 43 + +# State 85 +# GoState(komi=7.5, to_play=B, history.size()=85, stones_count: w30 b26) +# +# 9 XO+OOOX++ +# 8 +OOOOXOO+ +# 7 O+X+X++OO +# 6 +OXX+O+OO +# 5 OXXXX+OO+ +# 4 X+XXXXXX+ +# 3 XOXOX++++ +# 2 OX++OO+X+ +# 1 OO+XXOXOO +# ABCDEFGHJ +# +# Observation white: +# 9 +O+OOO+++ +# 8 +OOOO+OO+ +# 7 O+X++++OO +# 6 +O+X+O+OO +# 5 OX+X++OO+ +# 4 ++X++XX++ +# 3 +OXOX++++ +# 2 OX++OO+++ +# 1 OO+X+OXOO +# ABCDEFGHJ +# +# Observation black: +# 9 X+++O+X++ +# 8 +++++X+O+ +# 7 ++X+X+++O +# 6 ++XX+++O+ +# 5 OXXXX+OO+ +# 4 X+XXXXXX+ +# 3 X+X+X++++ +# 2 OX+++++X+ +# 1 O++XX+X++ +# ABCDEFGHJ +# +# Previous move was observational +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 X+++O+X++\n 8 +++++X+O+\n 7 ++X+X+++O\n 6 ++XX+++O+\n 5 OXXXX+OO+\n 4 X+XXXXXX+\n 3 X+X+X++++\n 2 OX+++++X+\n 1 O++XX+X++\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationString(1) = " 9 +O+OOO+++\n 8 +OOOO+OO+\n 7 O+X++++OO\n 6 +O+X+O+OO\n 5 OX+X++OO+\n 4 ++X++XX++\n 3 +OXOX++++\n 2 OX++OO+++\n 1 OO+X+OXOO\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationTensor(0) = [26.0, 30.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [26.0, 30.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 5, 7, 8, 11, 12, 13, 14, 15, 17, 19, 21, 23, 24, 25, 26, 28, 35, 41, 44, 45, 46, 49, 50, 51, 53, 54, 55, 57, 59, 60, 61, 63, 64, 65, 66, 67, 69, 71, 73, 74, 75, 77, 79, 80, 81] +StringLegalActions() = ["B b1", "B c1", "B f1", "B h1", "B j1", "B c2", "B d2", "B e2", "B f2", "B g2", "B j2", "B b3", "B d3", "B f3", "B g3", "B h3", "B j3", "B b4", "B j4", "B f5", "B j5", "B a6", "B b6", "B e6", "B f6", "B g6", "B j6", "B a7", "B b7", "B d7", "B f7", "B g7", "B h7", "B a8", "B b8", "B c8", "B d8", "B e8", "B g8", "B j8", "B b9", "B c9", "B d9", "B f9", "B h9", "B j9", "B PASS"] + +# Apply action "B d2" +action: 12 + +# State 86 +# Apply action "W d4" +action: 30 + +# State 87 +# Apply action "W e5" +action: 40 + +# State 88 +# Apply action "W c1" +action: 2 + +# State 89 +# Apply action "B g8" +action: 69 + +# State 90 +# Apply action "B a8" +action: 63 + +# State 91 +# Apply action "B j6" +action: 53 + +# State 92 +# Apply action "B a8" +action: 63 + +# State 93 +# Apply action "B b4" +action: 28 + +# State 94 +# Apply action "W PASS" +action: 81 + +# State 95 +# Apply action "B f3" +action: 23 + +# State 96 +# Apply action "W j9" +action: 80 + +# State 97 +# Apply action "B d9" +action: 75 + +# State 98 +# Apply action "B a8" +action: 63 + +# State 99 +# Apply action "B a8" +action: 63 + +# State 100 +# GoState(komi=7.5, to_play=B, history.size()=100, stones_count: w30 b29) +# +# 9 XO+OOOX+O +# 8 +OOOOXOO+ +# 7 O+X+X++OO +# 6 +OXX+O+OO +# 5 OXXXX+OO+ +# 4 XXXXXXXX+ +# 3 X+X+XX+++ +# 2 OX+XOO+X+ +# 1 OOOXXOXOO +# ABCDEFGHJ +# +# Observation white: +# 9 +O+OOO++O +# 8 +OOOO+OO+ +# 7 O+X++++OO +# 6 +O+X+O+OO +# 5 OX+XX+OO+ +# 4 ++XX+XX++ +# 3 ++X+X++++ +# 2 OX++OO+++ +# 1 OOOX+OXOO +# ABCDEFGHJ +# +# Observation black: +# 9 X++OO+X++ +# 8 +++++XOO+ +# 7 ++X+X+++O +# 6 ++XX+++OO +# 5 OXXXX+OO+ +# 4 XXXXXXXX+ +# 3 X+X+XX+++ +# 2 OX+X+++X+ +# 1 O++XX+X++ +# ABCDEFGHJ +# +# Previous move was observational +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 X++OO+X++\n 8 +++++XOO+\n 7 ++X+X+++O\n 6 ++XX+++OO\n 5 OXXXX+OO+\n 4 XXXXXXXX+\n 3 X+X+XX+++\n 2 OX+X+++X+\n 1 O++XX+X++\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationString(1) = " 9 +O+OOO++O\n 8 +OOOO+OO+\n 7 O+X++++OO\n 6 +O+X+O+OO\n 5 OX+XX+OO+\n 4 ++XX+XX++\n 3 ++X+X++++\n 2 OX++OO+++\n 1 OOOX+OXOO\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationTensor(0) = [29.0, 30.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [29.0, 30.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 5, 7, 8, 11, 13, 14, 15, 17, 19, 21, 24, 25, 26, 35, 41, 44, 45, 46, 49, 50, 51, 54, 55, 57, 59, 60, 61, 63, 64, 65, 66, 67, 71, 73, 74, 77, 79, 80, 81] +StringLegalActions() = ["B b1", "B c1", "B f1", "B h1", "B j1", "B c2", "B e2", "B f2", "B g2", "B j2", "B b3", "B d3", "B g3", "B h3", "B j3", "B j4", "B f5", "B j5", "B a6", "B b6", "B e6", "B f6", "B g6", "B a7", "B b7", "B d7", "B f7", "B g7", "B h7", "B a8", "B b8", "B c8", "B d8", "B e8", "B j8", "B b9", "B c9", "B f9", "B h9", "B j9", "B PASS"] + +# Apply action "B f7" +action: 59 + +# State 101 +# GoState(komi=7.5, to_play=W, history.size()=101, stones_count: w30 b30) +# +# 9 XO+OOOX+O +# 8 +OOOOXOO+ +# 7 O+X+XX+OO +# 6 +OXX+O+OO +# 5 OXXXX+OO+ +# 4 XXXXXXXX+ +# 3 X+X+XX+++ +# 2 OX+XOO+X+ +# 1 OOOXXOXOO +# ABCDEFGHJ +# +# Observation white: +# 9 +O+OOO++O +# 8 +OOOO+OO+ +# 7 O+X++++OO +# 6 +O+X+O+OO +# 5 OX+XX+OO+ +# 4 ++XX+XX++ +# 3 ++X+X++++ +# 2 OX++OO+++ +# 1 OOOX+OXOO +# ABCDEFGHJ +# +# Observation black: +# 9 X++OO+X++ +# 8 +++++XOO+ +# 7 ++X+XX++O +# 6 ++XX+++OO +# 5 OXXXX+OO+ +# 4 XXXXXXXX+ +# 3 X+X+XX+++ +# 2 OX+X+++X+ +# 1 O++XX+X++ +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = " 9 X++OO+X++\n 8 +++++XOO+\n 7 ++X+XX++O\n 6 ++XX+++OO\n 5 OXXXX+OO+\n 4 XXXXXXXX+\n 3 X+X+XX+++\n 2 OX+X+++X+\n 1 O++XX+X++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 +O+OOO++O\n 8 +OOOO+OO+\n 7 O+X++++OO\n 6 +O+X+O+OO\n 5 OX+XX+OO+\n 4 ++XX+XX++\n 3 ++X+X++++\n 2 OX++OO+++\n 1 OOOX+OXOO\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0) = [30.0, 30.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [30.0, 30.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [4, 11, 12, 15, 16, 17, 18, 19, 21, 23, 24, 25, 26, 27, 28, 31, 34, 35, 38, 41, 44, 45, 47, 49, 51, 55, 57, 58, 59, 60, 63, 68, 71, 72, 74, 78, 79, 81] +StringLegalActions() = ["W e1", "W c2", "W d2", "W g2", "W h2", "W j2", "W a3", "W b3", "W d3", "W f3", "W g3", "W h3", "W j3", "W a4", "W b4", "W e4", "W h4", "W j4", "W c5", "W f5", "W j5", "W a6", "W c6", "W e6", "W g6", "W b7", "W d7", "W e7", "W f7", "W g7", "W a8", "W f8", "W j8", "W a9", "W c9", "W g9", "W h9", "W PASS"] + +# Apply action "W e1" +action: 4 + +# State 102 +# Apply action "W f3" +action: 23 + +# State 103 +# Apply action "W f7" +action: 59 + +# State 104 +# Apply action "W h2" +action: 16 + +# State 105 +# Apply action "W b4" +action: 28 + +# State 106 +# Apply action "W e6" +action: 49 + +# State 107 +# Apply action "B b8" +action: 64 + +# State 108 +# Apply action "B f6" +action: 50 + +# State 109 +# Apply action "B e8" +action: 67 + +# State 110 +# Apply action "B e2" +action: 13 + +# State 111 +# Apply action "B c2" +action: 11 + +# State 112 +# Apply action "W g7" +action: 60 + +# State 113 +# Apply action "B b3" +action: 19 + +# State 114 +# Apply action "W d7" +action: 57 + +# State 115 +# Apply action "B a2" +action: 9 + +# State 116 +# Apply action "W b7" +action: 55 + +# State 117 +# Apply action "B h9" +action: 79 + +# State 118 +# Apply action "B g6" +action: 51 + +# State 119 +# GoState(komi=7.5, to_play=B, history.size()=119, stones_count: w30 b30) +# +# 9 XO+OOOX+O +# 8 +OOOO+OO+ +# 7 OOXO++OOO +# 6 +OXXOO+OO +# 5 OXXXX+OO+ +# 4 XXXXXXXX+ +# 3 XXX+XX+++ +# 2 XXXXOO+X+ +# 1 +++XXOXOO +# ABCDEFGHJ +# +# Observation white: +# 9 +O+OOO++O +# 8 +OOOO+OO+ +# 7 OOXO++OOO +# 6 +O+XOO+OO +# 5 OX+XX+OO+ +# 4 +XXX+XX++ +# 3 ++X+XX+++ +# 2 +X++OO+X+ +# 1 +++XXOXOO +# ABCDEFGHJ +# +# Observation black: +# 9 X++OO+X++ +# 8 +O++O+OO+ +# 7 ++X+++++O +# 6 ++XX+O+OO +# 5 OXXXX+OO+ +# 4 XXXXXXXX+ +# 3 XXX+XX+++ +# 2 XXXXO++X+ +# 1 +++XX+X++ +# ABCDEFGHJ +# +# Previous move was observational +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 X++OO+X++\n 8 +O++O+OO+\n 7 ++X+++++O\n 6 ++XX+O+OO\n 5 OXXXX+OO+\n 4 XXXXXXXX+\n 3 XXX+XX+++\n 2 XXXXO++X+\n 1 +++XX+X++\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationString(1) = " 9 +O+OOO++O\n 8 +OOOO+OO+\n 7 OOXO++OOO\n 6 +O+XOO+OO\n 5 OX+XX+OO+\n 4 +XXX+XX++\n 3 ++X+XX+++\n 2 +X++OO+X+\n 1 +++XXOXOO\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationTensor(0) = [30.0, 30.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [30.0, 30.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 5, 7, 8, 14, 15, 17, 21, 24, 25, 26, 35, 41, 44, 45, 46, 49, 51, 54, 55, 57, 58, 59, 60, 61, 63, 65, 66, 68, 71, 73, 74, 77, 79, 80, 81] +StringLegalActions() = ["B a1", "B b1", "B c1", "B f1", "B h1", "B j1", "B f2", "B g2", "B j2", "B d3", "B g3", "B h3", "B j3", "B j4", "B f5", "B j5", "B a6", "B b6", "B e6", "B g6", "B a7", "B b7", "B d7", "B e7", "B f7", "B g7", "B h7", "B a8", "B c8", "B d8", "B f8", "B j8", "B b9", "B c9", "B f9", "B h9", "B j9", "B PASS"] + +# Apply action "B PASS" +action: 81 + +# State 120 +# Apply action "W a1" +action: 0 + +# State 121 +# Apply action "B j8" +action: 71 + +# State 122 +# Apply action "B PASS" +action: 81 + +# State 123 +# GoState(komi=7.5, to_play=W, history.size()=123, stones_count: w31 b30) +# +# 9 XO+OOOX+O +# 8 +OOOO+OO+ +# 7 OOXO++OOO +# 6 +OXXOO+OO +# 5 OXXXX+OO+ +# 4 XXXXXXXX+ +# 3 XXX+XX+++ +# 2 XXXXOO+X+ +# 1 O++XXOXOO +# ABCDEFGHJ +# +# Observation white: +# 9 +O+OOO++O +# 8 +OOOO+OO+ +# 7 OOXO++OOO +# 6 +O+XOO+OO +# 5 OX+XX+OO+ +# 4 +XXX+XX++ +# 3 ++X+XX+++ +# 2 +X++OO+X+ +# 1 O++XXOXOO +# ABCDEFGHJ +# +# Observation black: +# 9 X++OO+X++ +# 8 +O++O+OO+ +# 7 ++X+++++O +# 6 ++XX+O+OO +# 5 OXXXX+OO+ +# 4 XXXXXXXX+ +# 3 XXX+XX+++ +# 2 XXXXO++X+ +# 1 +++XX+X++ +# ABCDEFGHJ +# +# Previous move was valid and was a pass +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = " 9 X++OO+X++\n 8 +O++O+OO+\n 7 ++X+++++O\n 6 ++XX+O+OO\n 5 OXXXX+OO+\n 4 XXXXXXXX+\n 3 XXX+XX+++\n 2 XXXXO++X+\n 1 +++XX+X++\n ABCDEFGHJ\nPrevious move was valid and was a pass\n" +ObservationString(1) = " 9 +O+OOO++O\n 8 +OOOO+OO+\n 7 OOXO++OOO\n 6 +O+XOO+OO\n 5 OX+XX+OO+\n 4 +XXX+XX++\n 3 ++X+XX+++\n 2 +X++OO+X+\n 1 O++XXOXOO\n ABCDEFGHJ\nPrevious move was valid and was a pass\n" +ObservationTensor(0) = [30.0, 31.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [30.0, 31.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 9, 11, 12, 15, 17, 18, 19, 21, 24, 25, 26, 27, 31, 34, 35, 38, 41, 44, 45, 47, 51, 58, 59, 63, 68, 71, 72, 74, 78, 79, 81] +StringLegalActions() = ["W b1", "W c1", "W a2", "W c2", "W d2", "W g2", "W j2", "W a3", "W b3", "W d3", "W g3", "W h3", "W j3", "W a4", "W e4", "W h4", "W j4", "W c5", "W f5", "W j5", "W a6", "W c6", "W g6", "W e7", "W f7", "W a8", "W f8", "W j8", "W a9", "W c9", "W g9", "W h9", "W PASS"] + +# Apply action "W a6" +action: 45 + +# State 124 +# Apply action "B f7" +action: 59 + +# State 125 +# Apply action "W j3" +action: 26 + +# State 126 +# Apply action "B c8" +action: 65 + +# State 127 +# Apply action "B b7" +action: 55 + +# State 128 +# Apply action "B j5" +action: 44 + +# State 129 +# Apply action "W j4" +action: 35 + +# State 130 +# Apply action "B g7" +action: 60 + +# State 131 +# Apply action "B j9" +action: 80 + +# State 132 +# Apply action "B g2" +action: 15 + +# State 133 +# Apply action "W e2" +action: 13 + +# State 134 +# GoState(komi=7.5, to_play=B, history.size()=134, stones_count: w32 b32) +# +# 9 XO+OOOX+O +# 8 +OOOO+OO+ +# 7 OOXO+XOOO +# 6 OOXXOO+OO +# 5 OXXXX+OO+ +# 4 XXXXXXXXO +# 3 XXX+XX++O +# 2 XXXXO+XX+ +# 1 O++XX+XOO +# ABCDEFGHJ +# +# Observation white: +# 9 +O+OOO++O +# 8 +OOOO+OO+ +# 7 OOXO++OOO +# 6 OO+XOO+OO +# 5 OX+XX+OO+ +# 4 +XXX+XX+O +# 3 ++X+XX++O +# 2 +X++O++X+ +# 1 O++XX+XOO +# ABCDEFGHJ +# +# Observation black: +# 9 X++OO+X+O +# 8 +OO+O+OO+ +# 7 +OX++XO+O +# 6 ++XX+O+OO +# 5 OXXXX+OO+ +# 4 XXXXXXXX+ +# 3 XXX+XX+++ +# 2 XXXX++XX+ +# 1 +++XX+X++ +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 X++OO+X+O\n 8 +OO+O+OO+\n 7 +OX++XO+O\n 6 ++XX+O+OO\n 5 OXXXX+OO+\n 4 XXXXXXXX+\n 3 XXX+XX+++\n 2 XXXX++XX+\n 1 +++XX+X++\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 +O+OOO++O\n 8 +OOOO+OO+\n 7 OOXO++OOO\n 6 OO+XOO+OO\n 5 OX+XX+OO+\n 4 +XXX+XX+O\n 3 ++X+XX++O\n 2 +X++O++X+\n 1 O++XX+XOO\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0) = [32.0, 32.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [32.0, 32.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 5, 7, 8, 13, 14, 17, 21, 24, 25, 26, 35, 41, 44, 45, 46, 49, 51, 54, 57, 58, 61, 63, 66, 68, 71, 73, 74, 77, 79, 81] +StringLegalActions() = ["B a1", "B b1", "B c1", "B f1", "B h1", "B j1", "B e2", "B f2", "B j2", "B d3", "B g3", "B h3", "B j3", "B j4", "B f5", "B j5", "B a6", "B b6", "B e6", "B g6", "B a7", "B d7", "B e7", "B h7", "B a8", "B d8", "B f8", "B j8", "B b9", "B c9", "B f9", "B h9", "B PASS"] + +# Apply action "B h3" +action: 25 + +# State 135 +# Apply action "W a2" +action: 9 + +# State 136 +# Apply action "W g3" +action: 24 + +# State 137 +# Apply action "W a4" +action: 27 + +# State 138 +# Apply action "W c6" +action: 47 + +# State 139 +# Apply action "W g9" +action: 78 + +# State 140 +# Apply action "W h4" +action: 34 + +# State 141 +# GoState(komi=7.5, to_play=W, history.size()=141, stones_count: w32 b33) +# +# 9 XO+OOOX+O +# 8 +OOOO+OO+ +# 7 OOXO+XOOO +# 6 OOXXOO+OO +# 5 OXXXX+OO+ +# 4 XXXXXXXXO +# 3 XXX+XX+XO +# 2 XXXXO+XX+ +# 1 O++XX+XOO +# ABCDEFGHJ +# +# Observation white: +# 9 +O+OOOX+O +# 8 +OOOO+OO+ +# 7 OOXO++OOO +# 6 OOXXOO+OO +# 5 OX+XX+OO+ +# 4 XXXX+XXXO +# 3 ++X+XX++O +# 2 XX++O++X+ +# 1 O++XX+XOO +# ABCDEFGHJ +# +# Observation black: +# 9 X++OO+X+O +# 8 +OO+O+OO+ +# 7 +OX++XO+O +# 6 ++XX+O+OO +# 5 OXXXX+OO+ +# 4 XXXXXXXX+ +# 3 XXX+XX+X+ +# 2 XXXX++XX+ +# 1 +++XX+X++ +# ABCDEFGHJ +# +# Previous move was observational +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = " 9 X++OO+X+O\n 8 +OO+O+OO+\n 7 +OX++XO+O\n 6 ++XX+O+OO\n 5 OXXXX+OO+\n 4 XXXXXXXX+\n 3 XXX+XX+X+\n 2 XXXX++XX+\n 1 +++XX+X++\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationString(1) = " 9 +O+OOOX+O\n 8 +OOOO+OO+\n 7 OOXO++OOO\n 6 OOXXOO+OO\n 5 OX+XX+OO+\n 4 XXXX+XXXO\n 3 ++X+XX++O\n 2 XX++O++X+\n 1 O++XX+XOO\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationTensor(0) = [33.0, 32.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [33.0, 32.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 5, 11, 12, 14, 15, 17, 18, 19, 21, 24, 25, 31, 38, 41, 44, 51, 58, 59, 63, 68, 71, 72, 74, 79, 81] +StringLegalActions() = ["W b1", "W c1", "W f1", "W c2", "W d2", "W f2", "W g2", "W j2", "W a3", "W b3", "W d3", "W g3", "W h3", "W e4", "W c5", "W f5", "W j5", "W g6", "W e7", "W f7", "W a8", "W f8", "W j8", "W a9", "W c9", "W h9", "W PASS"] + +# Apply action "W f1" +action: 5 + +# State 142 +# Apply action "B f1" +action: 5 + +# State 143 +# Apply action "B a8" +action: 63 + +# State 144 +# Apply action "B e2" +action: 13 + +# State 145 +# Apply action "B h7" +action: 61 + +# State 146 +# Apply action "B f8" +action: 68 + +# State 147 +# Apply action "W c9" +action: 74 + +# State 148 +# Apply action "B j3" +action: 26 + +# State 149 +# Apply action "B g3" +action: 24 + +# State 150 +# Apply action "W b3" +action: 19 + +# State 151 +# Apply action "W g2" +action: 15 + +# State 152 +# Apply action "W g6" +action: 51 + +# State 153 +# Apply action "B f2" +action: 14 + +# State 154 +# Apply action "W j8" +action: 71 + +# State 155 +# Apply action "B h1" +action: 7 + +# State 156 +# GoState(komi=7.5, to_play=B, history.size()=156, stones_count: w34 b36) +# +# 9 XOOOOOX+O +# 8 +OOOOXOOO +# 7 OOXO+XOOO +# 6 OOXXOOOOO +# 5 OXXXX+OO+ +# 4 XXXXXXXXO +# 3 XXX+XXXXO +# 2 XXXX+XXX+ +# 1 O++XX+XOO +# ABCDEFGHJ +# +# Observation white: +# 9 +OOOOOX+O +# 8 +OOOO+OOO +# 7 OOXO++OOO +# 6 OOXXOOOOO +# 5 OX+XX+OO+ +# 4 XXXX+XXXO +# 3 +XX+XX++O +# 2 XX++++XX+ +# 1 O++XX+XOO +# ABCDEFGHJ +# +# Observation black: +# 9 X++OO+X+O +# 8 +OO+OXOO+ +# 7 +OX++XOOO +# 6 ++XX+O+OO +# 5 OXXXX+OO+ +# 4 XXXXXXXX+ +# 3 XXX+XXXXO +# 2 XXXX+XXX+ +# 1 +++XX+XO+ +# ABCDEFGHJ +# +# Previous move was observational +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 X++OO+X+O\n 8 +OO+OXOO+\n 7 +OX++XOOO\n 6 ++XX+O+OO\n 5 OXXXX+OO+\n 4 XXXXXXXX+\n 3 XXX+XXXXO\n 2 XXXX+XXX+\n 1 +++XX+XO+\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationString(1) = " 9 +OOOOOX+O\n 8 +OOOO+OOO\n 7 OOXO++OOO\n 6 OOXXOOOOO\n 5 OX+XX+OO+\n 4 XXXX+XXXO\n 3 +XX+XX++O\n 2 XX++++XX+\n 1 O++XX+XOO\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationTensor(0) = [36.0, 34.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [36.0, 34.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 5, 8, 13, 17, 21, 35, 41, 44, 45, 46, 49, 51, 54, 57, 58, 63, 66, 71, 73, 74, 77, 79, 81] +StringLegalActions() = ["B a1", "B b1", "B c1", "B f1", "B j1", "B e2", "B j2", "B d3", "B j4", "B f5", "B j5", "B a6", "B b6", "B e6", "B g6", "B a7", "B d7", "B e7", "B a8", "B d8", "B j8", "B b9", "B c9", "B f9", "B h9", "B PASS"] + +# Apply action "B j4" +action: 35 + +# State 157 +# Apply action "B j1" +action: 8 + +# State 158 +# Apply action "B j2" +action: 17 + +# State 159 +# Apply action "W c2" +action: 11 + +# State 160 +# Apply action "W h1" +action: 7 + +# State 161 +# Apply action "B g6" +action: 51 + +# State 162 +# Apply action "B c9" +action: 74 + +# State 163 +# Apply action "B j1" +action: 8 + +# State 164 +# Apply action "W j5" +action: 44 + +# State 165 +# Apply action "B j5" +action: 44 + +# State 166 +# Apply action "B d7" +action: 57 + +# State 167 +# Apply action "B j8" +action: 71 + +# State 168 +# Apply action "B h1" +action: 7 + +# State 169 +# Apply action "W d3" +action: 21 + +# State 170 +# GoState(komi=7.5, to_play=W, history.size()=170, stones_count: w33 b39) +# +# 9 XOOOOOX+O +# 8 +OOOOXOOO +# 7 OOXO+XOOO +# 6 OOXXOOOOO +# 5 OXXXX+OOO +# 4 XXXXXXXXO +# 3 XXX+XXXXO +# 2 XXXX+XXXX +# 1 O++XX+XXX +# ABCDEFGHJ +# +# Observation white: +# 9 +OOOOOX+O +# 8 +OOOO+OOO +# 7 OOXO++OOO +# 6 OOXXOOOOO +# 5 OX+XX+OOO +# 4 XXXX+XXXO +# 3 +XX+XX++O +# 2 XXX+++XX+ +# 1 O++XX+X++ +# ABCDEFGHJ +# +# Observation black: +# 9 X+OOO+X+O +# 8 +OO+OXOOO +# 7 +OXO+XOOO +# 6 ++XX+OOOO +# 5 OXXXX+OOO +# 4 XXXXXXXXO +# 3 XXX+XXXXO +# 2 XXXX+XXXX +# 1 +++XX+XXX +# ABCDEFGHJ +# +# Previous move was observational +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = " 9 X+OOO+X+O\n 8 +OO+OXOOO\n 7 +OXO+XOOO\n 6 ++XX+OOOO\n 5 OXXXX+OOO\n 4 XXXXXXXXO\n 3 XXX+XXXXO\n 2 XXXX+XXXX\n 1 +++XX+XXX\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationString(1) = " 9 +OOOOOX+O\n 8 +OOOO+OOO\n 7 OOXO++OOO\n 6 OOXXOOOOO\n 5 OX+XX+OOO\n 4 XXXX+XXXO\n 3 +XX+XX++O\n 2 XXX+++XX+\n 1 O++XX+X++\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationTensor(0) = [39.0, 33.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [39.0, 33.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 5, 7, 8, 12, 13, 14, 17, 18, 21, 24, 25, 31, 38, 41, 58, 59, 63, 68, 72, 79, 81] +StringLegalActions() = ["W b1", "W c1", "W f1", "W h1", "W j1", "W d2", "W e2", "W f2", "W j2", "W a3", "W d3", "W g3", "W h3", "W e4", "W c5", "W f5", "W e7", "W f7", "W a8", "W f8", "W a9", "W h9", "W PASS"] + +# Apply action "W j1" +action: 8 + +# State 171 +# Apply action "W f7" +action: 59 + +# State 172 +# Apply action "W PASS" +action: 81 + +# State 173 +# GoState(komi=7.5, to_play=B, history.size()=173, stones_count: w33 b39) +# +# 9 XOOOOOX+O +# 8 +OOOOXOOO +# 7 OOXO+XOOO +# 6 OOXXOOOOO +# 5 OXXXX+OOO +# 4 XXXXXXXXO +# 3 XXX+XXXXO +# 2 XXXX+XXXX +# 1 O++XX+XXX +# ABCDEFGHJ +# +# Observation white: +# 9 +OOOOOX+O +# 8 +OOOO+OOO +# 7 OOXO+XOOO +# 6 OOXXOOOOO +# 5 OX+XX+OOO +# 4 XXXX+XXXO +# 3 +XX+XX++O +# 2 XXX+++XX+ +# 1 O++XX+X+X +# ABCDEFGHJ +# +# Observation black: +# 9 X+OOO+X+O +# 8 +OO+OXOOO +# 7 +OXO+XOOO +# 6 ++XX+OOOO +# 5 OXXXX+OOO +# 4 XXXXXXXXO +# 3 XXX+XXXXO +# 2 XXXX+XXXX +# 1 +++XX+XXX +# ABCDEFGHJ +# +# Previous move was valid and was a pass +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 X+OOO+X+O\n 8 +OO+OXOOO\n 7 +OXO+XOOO\n 6 ++XX+OOOO\n 5 OXXXX+OOO\n 4 XXXXXXXXO\n 3 XXX+XXXXO\n 2 XXXX+XXXX\n 1 +++XX+XXX\n ABCDEFGHJ\nPrevious move was valid and was a pass\n" +ObservationString(1) = " 9 +OOOOOX+O\n 8 +OOOO+OOO\n 7 OOXO+XOOO\n 6 OOXXOOOOO\n 5 OX+XX+OOO\n 4 XXXX+XXXO\n 3 +XX+XX++O\n 2 XXX+++XX+\n 1 O++XX+X+X\n ABCDEFGHJ\nPrevious move was valid and was a pass\n" +ObservationTensor(0) = [39.0, 33.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [39.0, 33.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 5, 13, 21, 41, 45, 46, 49, 54, 58, 63, 66, 73, 77, 79, 81] +StringLegalActions() = ["B a1", "B b1", "B c1", "B f1", "B e2", "B d3", "B f5", "B a6", "B b6", "B e6", "B a7", "B e7", "B a8", "B d8", "B b9", "B f9", "B h9", "B PASS"] + +# Apply action "B a1" +action: 0 + +# State 174 +# Apply action "B b9" +action: 73 + +# State 175 +# Apply action "B f5" +action: 41 + +# State 176 +# Apply action "W c5" +action: 38 + +# State 177 +# Apply action "W e4" +action: 31 + +# State 178 +# Apply action "W h9" +action: 79 + +# State 179 +# Apply action "B e7" +action: 58 + +# State 180 +# Apply action "B f1" +action: 5 + +# State 181 +# Apply action "W c1" +action: 2 + +# State 182 +# Apply action "B h9" +action: 79 + +# State 183 +# Apply action "B a6" +action: 45 + +# State 184 +# Apply action "B PASS" +action: 81 + +# State 185 +# Apply action "W e2" +action: 13 + +# State 186 +# Apply action "W f2" +action: 14 + +# State 187 +# Apply action "W a9" +action: 72 + +# State 188 +# GoState(komi=7.5, to_play=W, history.size()=188, stones_count: w35 b40) +# +# 9 XOOOOO+OO +# 8 +OOOOXOOO +# 7 OOXO+XOOO +# 6 OOXXOOOOO +# 5 OXXXXXOOO +# 4 XXXXXXXXO +# 3 XXX+XXXXO +# 2 XXXX+XXXX +# 1 O+OXXXXXX +# ABCDEFGHJ +# +# Observation white: +# 9 XOOOOO+OO +# 8 +OOOO+OOO +# 7 OOXO+XOOO +# 6 OOXXOOOOO +# 5 OXXXX+OOO +# 4 XXXXXXXXO +# 3 +XX+XX++O +# 2 XXX++XXX+ +# 1 O+OXX+X+X +# ABCDEFGHJ +# +# Observation black: +# 9 XOOOO++OO +# 8 +OO+OXOOO +# 7 +OXO+XOOO +# 6 O+XX+OOOO +# 5 OXXXXXOOO +# 4 XXXXXXXXO +# 3 XXX+XXXXO +# 2 XXXX+XXXX +# 1 O++XXXXXX +# ABCDEFGHJ +# +# Previous move was observational +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = " 9 XOOOO++OO\n 8 +OO+OXOOO\n 7 +OXO+XOOO\n 6 O+XX+OOOO\n 5 OXXXXXOOO\n 4 XXXXXXXXO\n 3 XXX+XXXXO\n 2 XXXX+XXXX\n 1 O++XXXXXX\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationString(1) = " 9 XOOOOO+OO\n 8 +OOOO+OOO\n 7 OOXO+XOOO\n 6 OOXXOOOOO\n 5 OXXXX+OOO\n 4 XXXXXXXXO\n 3 +XX+XX++O\n 2 XXX++XXX+\n 1 O+OXX+X+X\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationTensor(0) = [40.0, 35.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [40.0, 35.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 5, 7, 12, 13, 17, 18, 21, 24, 25, 41, 58, 63, 68, 78, 81] +StringLegalActions() = ["W b1", "W f1", "W h1", "W d2", "W e2", "W j2", "W a3", "W d3", "W g3", "W h3", "W f5", "W e7", "W a8", "W f8", "W g9", "W PASS"] + +# Apply action "W j2" +action: 17 + +# State 189 +# Apply action "W g9" +action: 78 + +# State 190 +# Apply action "B d8" +action: 66 + +# State 191 +# Apply action "B a7" +action: 54 + +# State 192 +# GoState(komi=7.5, to_play=B, history.size()=192, stones_count: w36 b40) +# +# 9 XOOOOOOOO +# 8 +OOOOXOOO +# 7 OOXO+XOOO +# 6 OOXXOOOOO +# 5 OXXXXXOOO +# 4 XXXXXXXXO +# 3 XXX+XXXXO +# 2 XXXX+XXXX +# 1 O+OXXXXXX +# ABCDEFGHJ +# +# Observation white: +# 9 XOOOOOOOO +# 8 +OOOO+OOO +# 7 OOXO+XOOO +# 6 OOXXOOOOO +# 5 OXXXX+OOO +# 4 XXXXXXXXO +# 3 +XX+XX++O +# 2 XXX++XXXX +# 1 O+OXX+X+X +# ABCDEFGHJ +# +# Observation black: +# 9 XOOOO++OO +# 8 +OOOOXOOO +# 7 OOXO+XOOO +# 6 O+XX+OOOO +# 5 OXXXXXOOO +# 4 XXXXXXXXO +# 3 XXX+XXXXO +# 2 XXXX+XXXX +# 1 O++XXXXXX +# ABCDEFGHJ +# +# Previous move was observational +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 XOOOO++OO\n 8 +OOOOXOOO\n 7 OOXO+XOOO\n 6 O+XX+OOOO\n 5 OXXXXXOOO\n 4 XXXXXXXXO\n 3 XXX+XXXXO\n 2 XXXX+XXXX\n 1 O++XXXXXX\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationString(1) = " 9 XOOOOOOOO\n 8 +OOOO+OOO\n 7 OOXO+XOOO\n 6 OOXXOOOOO\n 5 OXXXX+OOO\n 4 XXXXXXXXO\n 3 +XX+XX++O\n 2 XXX++XXXX\n 1 O+OXX+X+X\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationTensor(0) = [40.0, 36.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [40.0, 36.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 13, 21, 46, 49, 58, 63, 77, 78, 81] +StringLegalActions() = ["B b1", "B c1", "B e2", "B d3", "B b6", "B e6", "B e7", "B a8", "B f9", "B g9", "B PASS"] + +# Apply action "B b1" +action: 1 + +# State 193 +# Apply action "W d3" +action: 21 + +# State 194 +# Apply action "W b1" +action: 1 + +# State 195 +# Apply action "W f8" +action: 68 + +# State 196 +# Apply action "W f1" +action: 5 + +# State 197 +# Apply action "W d2" +action: 12 + +# State 198 +# Apply action "W d3" +action: 21 + +# State 199 +# Apply action "W c1" +action: 2 + +# State 200 +# Apply action "W PASS" +action: 81 + +# State 201 +# Apply action "B f9" +action: 77 + +# State 202 +# Apply action "B a8" +action: 63 + +# State 203 +# Apply action "B d3" +action: 21 + +# State 204 +# GoState(komi=7.5, to_play=W, history.size()=204, stones_count: w34 b42) +# +# 9 XOOOOOOOO +# 8 +OOOOXOOO +# 7 OOXO+XOOO +# 6 OOXXOOOOO +# 5 OXXXXXOOO +# 4 XXXXXXXXO +# 3 XXXXXXXXO +# 2 XXXX+XXXX +# 1 +X+XXXXXX +# ABCDEFGHJ +# +# Observation white: +# 9 XOOOOOOOO +# 8 +OOOOXOOO +# 7 OOXO+XOOO +# 6 OOXXOOOOO +# 5 OXXXX+OOO +# 4 XXXXXXXXO +# 3 +XX+XX++O +# 2 XXXX+XXXX +# 1 +X+XXXX+X +# ABCDEFGHJ +# +# Observation black: +# 9 XOOOOO+OO +# 8 +OOOOXOOO +# 7 OOXO+XOOO +# 6 O+XX+OOOO +# 5 OXXXXXOOO +# 4 XXXXXXXXO +# 3 XXXXXXXXO +# 2 XXXX+XXXX +# 1 +X+XXXXXX +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = " 9 XOOOOO+OO\n 8 +OOOOXOOO\n 7 OOXO+XOOO\n 6 O+XX+OOOO\n 5 OXXXXXOOO\n 4 XXXXXXXXO\n 3 XXXXXXXXO\n 2 XXXX+XXXX\n 1 +X+XXXXXX\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 XOOOOOOOO\n 8 +OOOOXOOO\n 7 OOXO+XOOO\n 6 OOXXOOOOO\n 5 OXXXX+OOO\n 4 XXXXXXXXO\n 3 +XX+XX++O\n 2 XXXX+XXXX\n 1 +X+XXXX+X\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0) = [42.0, 34.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [42.0, 34.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 7, 13, 18, 21, 24, 25, 41, 58, 63, 81] +StringLegalActions() = ["W a1", "W c1", "W h1", "W e2", "W a3", "W d3", "W g3", "W h3", "W f5", "W e7", "W a8", "W PASS"] + +# Apply action "W a1" +action: 0 + +# State 205 +# Apply action "W g3" +action: 24 + +# State 206 +# Apply action "W h1" +action: 7 + +# State 207 +# Apply action "W f5" +action: 41 + +# State 208 +# Apply action "W c1" +action: 2 + +# State 209 +# Apply action "W e7" +action: 58 + +# State 210 +# Apply action "B a1" +action: 0 + +# State 211 +# Apply action "W d3" +action: 21 + +# State 212 +# Apply action "W e2" +action: 13 + +# State 213 +# Apply action "W PASS" +action: 81 + +# State 214 +# Apply action "B a8" +action: 63 + +# State 215 +# Apply action "B e7" +action: 58 + +# State 216 +# Apply action "B e6" +action: 49 + +# State 217 +# Apply action "B PASS" +action: 81 + +# State 218 +# Apply action "W a3" +action: 18 + +# State 219 +# GoState(komi=7.5, to_play=W, history.size()=219, stones_count: w35 b41) +# +# 9 XOOOOOOOO +# 8 +OOOO+OOO +# 7 OOXOO+OOO +# 6 OOXXOOOOO +# 5 OXXXXXOOO +# 4 XXXXXXXXO +# 3 XXXXXXXXO +# 2 XXXX+XXXX +# 1 XX+XXXXXX +# ABCDEFGHJ +# +# Observation white: +# 9 XOOOOOOOO +# 8 +OOOO+OOO +# 7 OOXOO+OOO +# 6 OOXXOOOOO +# 5 OXXXXXOOO +# 4 XXXXXXXXO +# 3 XXXXXXX+O +# 2 XXXX+XXXX +# 1 +X+XXXXXX +# ABCDEFGHJ +# +# Observation black: +# 9 XOOOOO+OO +# 8 +OOOO+OOO +# 7 OOXOO+OOO +# 6 O+XXOOOOO +# 5 OXXXXXOOO +# 4 XXXXXXXXO +# 3 XXXXXXXXO +# 2 XXXX+XXXX +# 1 XX+XXXXXX +# ABCDEFGHJ +# +# Previous move was observational +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = " 9 XOOOOO+OO\n 8 +OOOO+OOO\n 7 OOXOO+OOO\n 6 O+XXOOOOO\n 5 OXXXXXOOO\n 4 XXXXXXXXO\n 3 XXXXXXXXO\n 2 XXXX+XXXX\n 1 XX+XXXXXX\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationString(1) = " 9 XOOOOOOOO\n 8 +OOOO+OOO\n 7 OOXOO+OOO\n 6 OOXXOOOOO\n 5 OXXXXXOOO\n 4 XXXXXXXXO\n 3 XXXXXXX+O\n 2 XXXX+XXXX\n 1 +X+XXXXXX\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationTensor(0) = [41.0, 35.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [41.0, 35.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 13, 25, 59, 63, 68, 81] +StringLegalActions() = ["W a1", "W c1", "W e2", "W h3", "W f7", "W a8", "W f8", "W PASS"] + +# Apply action "W h3" +action: 25 + +# State 220 +# Apply action "W f7" +action: 59 + +# State 221 +# Apply action "B e2" +action: 13 + +# State 222 +# Apply action "W a8" +action: 63 + +# State 223 +# GoState(komi=7.5, to_play=B, history.size()=223, stones_count: w37 b41) +# +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OOXOOOOOO +# 6 OOXXOOOOO +# 5 OXXXXXOOO +# 4 XXXXXXXXO +# 3 XXXXXXXXO +# 2 XXXXXXXXX +# 1 XX+XXXXXX +# ABCDEFGHJ +# +# Observation white: +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OOXOOOOOO +# 6 OOXXOOOOO +# 5 OXXXXXOOO +# 4 XXXXXXXXO +# 3 XXXXXXXXO +# 2 XXXX+XXXX +# 1 +X+XXXXXX +# ABCDEFGHJ +# +# Observation black: +# 9 +OOOOO+OO +# 8 +OOOO+OOO +# 7 OOXOO+OOO +# 6 O+XXOOOOO +# 5 OXXXXXOOO +# 4 XXXXXXXXO +# 3 XXXXXXXXO +# 2 XXXXXXXXX +# 1 XX+XXXXXX +# ABCDEFGHJ +# +# Previous move was valid +# In previous move 1 stones were captured +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 +OOOOO+OO\n 8 +OOOO+OOO\n 7 OOXOO+OOO\n 6 O+XXOOOOO\n 5 OXXXXXOOO\n 4 XXXXXXXXO\n 3 XXXXXXXXO\n 2 XXXXXXXXX\n 1 XX+XXXXXX\n ABCDEFGHJ\nPrevious move was valid\nIn previous move 1 stones were captured\n" +ObservationString(1) = " 9 +OOOOOOOO\n 8 OOOOO+OOO\n 7 OOXOOOOOO\n 6 OOXXOOOOO\n 5 OXXXXXOOO\n 4 XXXXXXXXO\n 3 XXXXXXXXO\n 2 XXXX+XXXX\n 1 +X+XXXXXX\n ABCDEFGHJ\nPrevious move was valid\nIn previous move 1 stones were captured\n" +ObservationTensor(0) = [41.0, 37.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [41.0, 37.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2, 46, 59, 63, 68, 72, 78, 81] +StringLegalActions() = ["B c1", "B b6", "B f7", "B a8", "B f8", "B a9", "B g9", "B PASS"] + +# Apply action "B PASS" +action: 81 + +# State 224 +# Apply action "W c1" +action: 2 + +# State 225 +# Apply action "B f3" +action: 23 + +# State 226 +# Apply action "W h1" +action: 7 + +# State 227 +# Apply action "B j1" +action: 8 + +# State 228 +# Apply action "W b3" +action: 19 + +# State 229 +# Apply action "B b1" +action: 1 + +# State 230 +# Apply action "W a3" +action: 18 + +# State 231 +# Apply action "B h3" +action: 25 + +# State 232 +# Apply action "W g1" +action: 6 + +# State 233 +# Apply action "B g4" +action: 33 + +# State 234 +# Apply action "W g4" +action: 33 + +# State 235 +# Apply action "W b2" +action: 10 + +# State 236 +# Apply action "B c6" +action: 47 + +# State 237 +# GoState(komi=7.5, to_play=W, history.size()=237, stones_count: w43 b6) +# +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OO+OOOOOO +# 6 OOX+OOOOO +# 5 O+++++OOO +# 4 ++++++X+O +# 3 OO+++X+XO +# 2 +O+++++++ +# 1 +XO+++OOX +# ABCDEFGHJ +# +# Observation white: +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OO+OOOOOO +# 6 OO++OOOOO +# 5 O+++++OOO +# 4 ++++++X+O +# 3 OO++++++O +# 2 +O+++++++ +# 1 ++O+++OO+ +# ABCDEFGHJ +# +# Observation black: +# 9 +OOOOO+OO +# 8 +OOOO+OOO +# 7 OO+OO+OOO +# 6 O+X+OOOOO +# 5 O+++++OOO +# 4 ++++++X+O +# 3 +++++X+XO +# 2 +++++++++ +# 1 +X++++++X +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = " 9 +OOOOO+OO\n 8 +OOOO+OOO\n 7 OO+OO+OOO\n 6 O+X+OOOOO\n 5 O+++++OOO\n 4 ++++++X+O\n 3 +++++X+XO\n 2 +++++++++\n 1 +X++++++X\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 +OOOOOOOO\n 8 OOOOO+OOO\n 7 OO+OOOOOO\n 6 OO++OOOOO\n 5 O+++++OOO\n 4 ++++++X+O\n 3 OO++++++O\n 2 +O+++++++\n 1 ++O+++OO+\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0) = [6.0, 43.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [6.0, 43.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 3, 4, 5, 8, 9, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 25, 27, 28, 29, 30, 31, 32, 34, 37, 38, 39, 40, 41, 47, 48, 56, 68, 72, 81] +StringLegalActions() = ["W a1", "W b1", "W d1", "W e1", "W f1", "W j1", "W a2", "W c2", "W d2", "W e2", "W f2", "W g2", "W h2", "W j2", "W c3", "W d3", "W e3", "W f3", "W g3", "W h3", "W a4", "W b4", "W c4", "W d4", "W e4", "W f4", "W h4", "W b5", "W c5", "W d5", "W e5", "W f5", "W c6", "W d6", "W c7", "W f8", "W a9", "W PASS"] + +# Apply action "W e5" +action: 40 + +# State 238 +# Apply action "B e2" +action: 13 + +# State 239 +# Apply action "W b1" +action: 1 + +# State 240 +# Apply action "W e2" +action: 13 + +# State 241 +# Apply action "W d4" +action: 30 + +# State 242 +# Apply action "B c7" +action: 56 + +# State 243 +# Apply action "W f1" +action: 5 + +# State 244 +# Apply action "B a8" +action: 63 + +# State 245 +# GoState(komi=7.5, to_play=B, history.size()=245, stones_count: w46 b8) +# +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OOXOOOOOO +# 6 OOX+OOOOO +# 5 O+++O+OOO +# 4 +++O++X+O +# 3 OO+++X+XO +# 2 +O++X++++ +# 1 +XO++OOOX +# ABCDEFGHJ +# +# Observation white: +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OO+OOOOOO +# 6 OO++OOOOO +# 5 O+++O+OOO +# 4 +++O++X+O +# 3 OO++++++O +# 2 +O++X++++ +# 1 +XO++OOO+ +# ABCDEFGHJ +# +# Observation black: +# 9 +OOOOO+OO +# 8 OOOOO+OOO +# 7 OOXOO+OOO +# 6 O+X+OOOOO +# 5 O+++++OOO +# 4 ++++++X+O +# 3 +++++X+XO +# 2 ++++X++++ +# 1 +X++++++X +# ABCDEFGHJ +# +# Previous move was observational +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 +OOOOO+OO\n 8 OOOOO+OOO\n 7 OOXOO+OOO\n 6 O+X+OOOOO\n 5 O+++++OOO\n 4 ++++++X+O\n 3 +++++X+XO\n 2 ++++X++++\n 1 +X++++++X\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationString(1) = " 9 +OOOOOOOO\n 8 OOOOO+OOO\n 7 OO+OOOOOO\n 6 OO++OOOOO\n 5 O+++O+OOO\n 4 +++O++X+O\n 3 OO++++++O\n 2 +O++X++++\n 1 +XO++OOO+\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationTensor(0) = [8.0, 46.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [8.0, 46.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 27, 28, 29, 30, 31, 32, 34, 37, 38, 39, 40, 41, 46, 48, 59, 68, 72, 78, 81] +StringLegalActions() = ["B a1", "B c1", "B d1", "B e1", "B f1", "B g1", "B h1", "B a2", "B b2", "B c2", "B d2", "B f2", "B g2", "B h2", "B j2", "B a3", "B b3", "B c3", "B d3", "B e3", "B g3", "B a4", "B b4", "B c4", "B d4", "B e4", "B f4", "B h4", "B b5", "B c5", "B d5", "B e5", "B f5", "B b6", "B d6", "B f7", "B f8", "B a9", "B g9", "B PASS"] + +# Apply action "B c1" +action: 2 + +# State 246 +# Apply action "B f1" +action: 5 + +# State 247 +# Apply action "B c3" +action: 20 + +# State 248 +# Apply action "W j1" +action: 8 + +# State 249 +# Apply action "W b5" +action: 37 + +# State 250 +# Apply action "B f5" +action: 41 + +# State 251 +# Apply action "W h4" +action: 34 + +# State 252 +# Apply action "B a4" +action: 27 + +# State 253 +# Apply action "W b4" +action: 28 + +# State 254 +# Apply action "B f8" +action: 68 + +# State 255 +# Apply action "B e5" +action: 40 + +# State 256 +# Apply action "B c4" +action: 29 + +# State 257 +# Apply action "W d2" +action: 12 + +# State 258 +# Apply action "B f8" +action: 68 + +# State 259 +# Apply action "B b6" +action: 46 + +# State 260 +# GoState(komi=7.5, to_play=B, history.size()=260, stones_count: w50 b11) +# +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OOXOOOOOO +# 6 OOX+OOOOO +# 5 OO++OXOOO +# 4 +OXO++XOO +# 3 OOX++X+XO +# 2 +O+OX++++ +# 1 +XO++OOOX +# ABCDEFGHJ +# +# Observation white: +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OO+OOOOOO +# 6 OO++OOOOO +# 5 OO++O+OOO +# 4 +O+O++XOO +# 3 OO++++++O +# 2 +O+OX++++ +# 1 +XO++OOOX +# ABCDEFGHJ +# +# Observation black: +# 9 +OOOOO+OO +# 8 OOOOO+OOO +# 7 OOXOO+OOO +# 6 OOX+OOOOO +# 5 O+++OXOOO +# 4 ++X+++X+O +# 3 ++X++X+XO +# 2 ++++X++++ +# 1 +XO++O++X +# ABCDEFGHJ +# +# Previous move was observational +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63, 2, 5, 20, 8, 37, 41, 34, 27, 28, 68, 40, 29, 12, 68, 46] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63, 2, 5, 20, 8, 37, 41, 34, 27, 28, 68, 40, 29, 12, 68, 46" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 +OOOOO+OO\n 8 OOOOO+OOO\n 7 OOXOO+OOO\n 6 OOX+OOOOO\n 5 O+++OXOOO\n 4 ++X+++X+O\n 3 ++X++X+XO\n 2 ++++X++++\n 1 +XO++O++X\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationString(1) = " 9 +OOOOOOOO\n 8 OOOOO+OOO\n 7 OO+OOOOOO\n 6 OO++OOOOO\n 5 OO++O+OOO\n 4 +O+O++XOO\n 3 OO++++++O\n 2 +O+OX++++\n 1 +XO++OOOX\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationTensor(0) = [11.0, 50.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [11.0, 50.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 21, 22, 24, 27, 28, 30, 31, 32, 34, 37, 38, 39, 48, 59, 68, 72, 78, 81] +StringLegalActions() = ["B a1", "B d1", "B e1", "B g1", "B h1", "B a2", "B b2", "B c2", "B d2", "B f2", "B g2", "B h2", "B j2", "B a3", "B b3", "B d3", "B e3", "B g3", "B a4", "B b4", "B d4", "B e4", "B f4", "B h4", "B b5", "B c5", "B d5", "B d6", "B f7", "B f8", "B a9", "B g9", "B PASS"] + +# Apply action "B d1" +action: 3 + +# State 261 +# GoState(komi=7.5, to_play=W, history.size()=261, stones_count: w50 b12) +# +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OOXOOOOOO +# 6 OOX+OOOOO +# 5 OO++OXOOO +# 4 +OXO++XOO +# 3 OOX++X+XO +# 2 +O+OX++++ +# 1 +XOX+OOOX +# ABCDEFGHJ +# +# Observation white: +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OO+OOOOOO +# 6 OO++OOOOO +# 5 OO++O+OOO +# 4 +O+O++XOO +# 3 OO++++++O +# 2 +O+OX++++ +# 1 +XO++OOOX +# ABCDEFGHJ +# +# Observation black: +# 9 +OOOOO+OO +# 8 OOOOO+OOO +# 7 OOXOO+OOO +# 6 OOX+OOOOO +# 5 O+++OXOOO +# 4 ++X+++X+O +# 3 ++X++X+XO +# 2 ++++X++++ +# 1 +XOX+O++X +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63, 2, 5, 20, 8, 37, 41, 34, 27, 28, 68, 40, 29, 12, 68, 46, 3] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63, 2, 5, 20, 8, 37, 41, 34, 27, 28, 68, 40, 29, 12, 68, 46, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = " 9 +OOOOO+OO\n 8 OOOOO+OOO\n 7 OOXOO+OOO\n 6 OOX+OOOOO\n 5 O+++OXOOO\n 4 ++X+++X+O\n 3 ++X++X+XO\n 2 ++++X++++\n 1 +XOX+O++X\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 +OOOOOOOO\n 8 OOOOO+OOO\n 7 OO+OOOOOO\n 6 OO++OOOOO\n 5 OO++O+OOO\n 4 +O+O++XOO\n 3 OO++++++O\n 2 +O+OX++++\n 1 +XO++OOOX\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0) = [12.0, 50.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [12.0, 50.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 3, 4, 9, 11, 14, 15, 16, 17, 20, 21, 22, 23, 24, 25, 27, 29, 31, 32, 38, 39, 41, 47, 48, 56, 68, 72, 81] +StringLegalActions() = ["W a1", "W d1", "W e1", "W a2", "W c2", "W f2", "W g2", "W h2", "W j2", "W c3", "W d3", "W e3", "W f3", "W g3", "W h3", "W a4", "W c4", "W e4", "W f4", "W c5", "W d5", "W f5", "W c6", "W d6", "W c7", "W f8", "W a9", "W PASS"] + +# Apply action "W h3" +action: 25 + +# State 262 +# Apply action "W c5" +action: 38 + +# State 263 +# Apply action "B h4" +action: 34 + +# State 264 +# Apply action "B a3" +action: 18 + +# State 265 +# Apply action "B a1" +action: 0 + +# State 266 +# Apply action "W d6" +action: 48 + +# State 267 +# Apply action "B f7" +action: 59 + +# State 268 +# Apply action "B c6" +action: 47 + +# State 269 +# Apply action "W e1" +action: 4 + +# State 270 +# Apply action "B c2" +action: 11 + +# State 271 +# Apply action "W PASS" +action: 81 + +# State 272 +# Apply action "B g9" +action: 78 + +# State 273 +# Apply action "B b2" +action: 10 + +# State 274 +# Apply action "B e3" +action: 22 + +# State 275 +# Apply action "W a4" +action: 27 + +# State 276 +# GoState(komi=7.5, to_play=B, history.size()=276, stones_count: w54 b13) +# +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OO+OOOOOO +# 6 OOXOOOOOO +# 5 OOO+OXOOO +# 4 OOXO++XOO +# 3 OOX+XX+XO +# 2 +OXOX++++ +# 1 XXO+OOOOX +# ABCDEFGHJ +# +# Observation white: +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OO+OOOOOO +# 6 OO+OOOOOO +# 5 OOO+O+OOO +# 4 OO+O++XOO +# 3 OO+++++XO +# 2 +O+OX++++ +# 1 +XO+OOOOX +# ABCDEFGHJ +# +# Observation black: +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OO+OOOOOO +# 6 OOX+OOOOO +# 5 O+++OXOOO +# 4 ++X+++XOO +# 3 O+X+XX+XO +# 2 +OX+X++++ +# 1 XXO++O++X +# ABCDEFGHJ +# +# Previous move was valid +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63, 2, 5, 20, 8, 37, 41, 34, 27, 28, 68, 40, 29, 12, 68, 46, 3, 25, 38, 34, 18, 0, 48, 59, 47, 4, 11, 81, 78, 10, 22, 27] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63, 2, 5, 20, 8, 37, 41, 34, 27, 28, 68, 40, 29, 12, 68, 46, 3, 25, 38, 34, 18, 0, 48, 59, 47, 4, 11, 81, 78, 10, 22, 27" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 +OOOOOOOO\n 8 OOOOO+OOO\n 7 OO+OOOOOO\n 6 OOX+OOOOO\n 5 O+++OXOOO\n 4 ++X+++XOO\n 3 O+X+XX+XO\n 2 +OX+X++++\n 1 XXO++O++X\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationString(1) = " 9 +OOOOOOOO\n 8 OOOOO+OOO\n 7 OO+OOOOOO\n 6 OO+OOOOOO\n 5 OOO+O+OOO\n 4 OO+O++XOO\n 3 OO+++++XO\n 2 +O+OX++++\n 1 +XO+OOOOX\n ABCDEFGHJ\nPrevious move was valid\n" +ObservationTensor(0) = [13.0, 54.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [13.0, 54.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [3, 4, 6, 7, 9, 12, 14, 15, 16, 17, 19, 21, 24, 27, 28, 30, 31, 32, 37, 38, 39, 48, 56, 68, 72, 81] +StringLegalActions() = ["B d1", "B e1", "B g1", "B h1", "B a2", "B d2", "B f2", "B g2", "B h2", "B j2", "B b3", "B d3", "B g3", "B a4", "B b4", "B d4", "B e4", "B f4", "B b5", "B c5", "B d5", "B d6", "B c7", "B f8", "B a9", "B PASS"] + +# Apply action "B f2" +action: 14 + +# State 277 +# Apply action "W d1" +action: 3 + +# State 278 +# Apply action "B g3" +action: 24 + +# State 279 +# Apply action "W d3" +action: 21 + +# State 280 +# Apply action "B PASS" +action: 81 + +# State 281 +# Apply action "W c4" +action: 29 + +# State 282 +# Apply action "B a9" +action: 72 + +# State 283 +# Apply action "B h1" +action: 7 + +# State 284 +# Apply action "B c7" +action: 56 + +# State 285 +# Apply action "B PASS" +action: 81 + +# State 286 +# Apply action "W f2" +action: 14 + +# State 287 +# GoState(komi=7.5, to_play=W, history.size()=287, stones_count: w57 b12) +# +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OO+OOOOOO +# 6 OOXOOOOOO +# 5 OOO+OXOOO +# 4 OOOO++XOO +# 3 OO+OXXXXO +# 2 +O+OXX+++ +# 1 XXOOOOOOX +# ABCDEFGHJ +# +# Observation white: +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OO+OOOOOO +# 6 OO+OOOOOO +# 5 OOO+O+OOO +# 4 OOOO++XOO +# 3 OO+O+++XO +# 2 +O+OXX+++ +# 1 +XOOOOOOX +# ABCDEFGHJ +# +# Observation black: +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OO+OOOOOO +# 6 OOX+OOOOO +# 5 O+++OXOOO +# 4 ++++++XOO +# 3 O+++XXXXO +# 2 +O++XX+++ +# 1 XXO++O+OX +# ABCDEFGHJ +# +# Previous move was observational +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63, 2, 5, 20, 8, 37, 41, 34, 27, 28, 68, 40, 29, 12, 68, 46, 3, 25, 38, 34, 18, 0, 48, 59, 47, 4, 11, 81, 78, 10, 22, 27, 14, 3, 24, 21, 81, 29, 72, 7, 56, 81, 14] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63, 2, 5, 20, 8, 37, 41, 34, 27, 28, 68, 40, 29, 12, 68, 46, 3, 25, 38, 34, 18, 0, 48, 59, 47, 4, 11, 81, 78, 10, 22, 27, 14, 3, 24, 21, 81, 29, 72, 7, 56, 81, 14" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = " 9 +OOOOOOOO\n 8 OOOOO+OOO\n 7 OO+OOOOOO\n 6 OOX+OOOOO\n 5 O+++OXOOO\n 4 ++++++XOO\n 3 O+++XXXXO\n 2 +O++XX+++\n 1 XXO++O+OX\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationString(1) = " 9 +OOOOOOOO\n 8 OOOOO+OOO\n 7 OO+OOOOOO\n 6 OO+OOOOOO\n 5 OOO+O+OOO\n 4 OOOO++XOO\n 3 OO+O+++XO\n 2 +O+OXX+++\n 1 +XOOOOOOX\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationTensor(0) = [12.0, 57.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [12.0, 57.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 9, 11, 15, 16, 17, 20, 22, 23, 24, 31, 32, 39, 41, 47, 56, 68, 72, 81] +StringLegalActions() = ["W a1", "W a2", "W c2", "W g2", "W h2", "W j2", "W c3", "W e3", "W f3", "W g3", "W e4", "W f4", "W d5", "W f5", "W c6", "W c7", "W f8", "W a9", "W PASS"] + +# Apply action "W g2" +action: 15 + +# State 288 +# Apply action "B b4" +action: 28 + +# State 289 +# Apply action "B a2" +action: 9 + +# State 290 +# Apply action "B c5" +action: 38 + +# State 291 +# GoState(komi=7.5, to_play=B, history.size()=291, stones_count: w58 b12) +# +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OO+OOOOOO +# 6 OOXOOOOOO +# 5 OOO+OXOOO +# 4 OOOO++XOO +# 3 OO+OXXXXO +# 2 +O+OXXO++ +# 1 XXOOOOOOX +# ABCDEFGHJ +# +# Observation white: +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OO+OOOOOO +# 6 OO+OOOOOO +# 5 OOO+O+OOO +# 4 OOOO++XOO +# 3 OO+O+++XO +# 2 +O+OXXO++ +# 1 +XOOOOOOX +# ABCDEFGHJ +# +# Observation black: +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OO+OOOOOO +# 6 OOX+OOOOO +# 5 O+O+OXOOO +# 4 +O++++XOO +# 3 O+++XXXXO +# 2 +O++XX+++ +# 1 XXO++O+OX +# ABCDEFGHJ +# +# Previous move was observational +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63, 2, 5, 20, 8, 37, 41, 34, 27, 28, 68, 40, 29, 12, 68, 46, 3, 25, 38, 34, 18, 0, 48, 59, 47, 4, 11, 81, 78, 10, 22, 27, 14, 3, 24, 21, 81, 29, 72, 7, 56, 81, 14, 15, 28, 9, 38] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63, 2, 5, 20, 8, 37, 41, 34, 27, 28, 68, 40, 29, 12, 68, 46, 3, 25, 38, 34, 18, 0, 48, 59, 47, 4, 11, 81, 78, 10, 22, 27, 14, 3, 24, 21, 81, 29, 72, 7, 56, 81, 14, 15, 28, 9, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 +OOOOOOOO\n 8 OOOOO+OOO\n 7 OO+OOOOOO\n 6 OOX+OOOOO\n 5 O+O+OXOOO\n 4 +O++++XOO\n 3 O+++XXXXO\n 2 +O++XX+++\n 1 XXO++O+OX\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationString(1) = " 9 +OOOOOOOO\n 8 OOOOO+OOO\n 7 OO+OOOOOO\n 6 OO+OOOOOO\n 5 OOO+O+OOO\n 4 OOOO++XOO\n 3 OO+O+++XO\n 2 +O+OXXO++\n 1 +XOOOOOOX\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationTensor(0) = [12.0, 58.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [12.0, 58.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [3, 4, 6, 9, 11, 12, 15, 16, 17, 19, 20, 21, 27, 29, 30, 31, 32, 37, 39, 48, 56, 68, 72, 81] +StringLegalActions() = ["B d1", "B e1", "B g1", "B a2", "B c2", "B d2", "B g2", "B h2", "B j2", "B b3", "B c3", "B d3", "B a4", "B c4", "B d4", "B e4", "B f4", "B b5", "B d5", "B d6", "B c7", "B f8", "B a9", "B PASS"] + +# Apply action "B f4" +action: 32 + +# State 292 +# Apply action "W a2" +action: 9 + +# State 293 +# Apply action "B c7" +action: 56 + +# State 294 +# Apply action "B j2" +action: 17 + +# State 295 +# Apply action "W d5" +action: 39 + +# State 296 +# Apply action "B a9" +action: 72 + +# State 297 +# Apply action "B h2" +action: 16 + +# State 298 +# Apply action "W c7" +action: 56 + +# State 299 +# Apply action "B a9" +action: 72 + +# State 300 +# Apply action "B d3" +action: 21 + +# State 301 +# Apply action "B g1" +action: 6 + +# State 302 +# Apply action "B e1" +action: 4 + +# State 303 +# Apply action "B a4" +action: 27 + +# State 304 +# GoState(komi=7.5, to_play=B, history.size()=304, stones_count: w61 b12) +# +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OOOOOOOOO +# 6 OO+OOOOOO +# 5 OOOOOXOOO +# 4 OOOO+XXOO +# 3 OO+OXXXXO +# 2 OO+OXXOXX +# 1 ++OOOOOOX +# ABCDEFGHJ +# +# Observation white: +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OOOOOOOOO +# 6 OO+OOOOOO +# 5 OOOOO+OOO +# 4 OOOO++XOO +# 3 OO+O+++XO +# 2 OO+OXXO++ +# 1 ++OOOOOOX +# ABCDEFGHJ +# +# Observation black: +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OO+OOOOOO +# 6 OO++OOOOO +# 5 O+O+OXOOO +# 4 OO+++XXOO +# 3 O++OXXXXO +# 2 +O++XX+XX +# 1 ++O+OOOOX +# ABCDEFGHJ +# +# Previous move was observational +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63, 2, 5, 20, 8, 37, 41, 34, 27, 28, 68, 40, 29, 12, 68, 46, 3, 25, 38, 34, 18, 0, 48, 59, 47, 4, 11, 81, 78, 10, 22, 27, 14, 3, 24, 21, 81, 29, 72, 7, 56, 81, 14, 15, 28, 9, 38, 32, 9, 56, 17, 39, 72, 16, 56, 72, 21, 6, 4, 27] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63, 2, 5, 20, 8, 37, 41, 34, 27, 28, 68, 40, 29, 12, 68, 46, 3, 25, 38, 34, 18, 0, 48, 59, 47, 4, 11, 81, 78, 10, 22, 27, 14, 3, 24, 21, 81, 29, 72, 7, 56, 81, 14, 15, 28, 9, 38, 32, 9, 56, 17, 39, 72, 16, 56, 72, 21, 6, 4, 27" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 +OOOOOOOO\n 8 OOOOO+OOO\n 7 OO+OOOOOO\n 6 OO++OOOOO\n 5 O+O+OXOOO\n 4 OO+++XXOO\n 3 O++OXXXXO\n 2 +O++XX+XX\n 1 ++O+OOOOX\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationString(1) = " 9 +OOOOOOOO\n 8 OOOOO+OOO\n 7 OOOOOOOOO\n 6 OO+OOOOOO\n 5 OOOOO+OOO\n 4 OOOO++XOO\n 3 OO+O+++XO\n 2 OO+OXXO++\n 1 ++OOOOOOX\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationTensor(0) = [12.0, 61.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [12.0, 61.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 3, 9, 11, 12, 15, 19, 20, 29, 30, 31, 37, 39, 47, 48, 56, 68, 72, 81] +StringLegalActions() = ["B a1", "B b1", "B d1", "B a2", "B c2", "B d2", "B g2", "B b3", "B c3", "B c4", "B d4", "B e4", "B b5", "B d5", "B c6", "B d6", "B c7", "B f8", "B a9", "B PASS"] + +# Apply action "B d6" +action: 48 + +# State 305 +# Apply action "B c3" +action: 20 + +# State 306 +# Apply action "W b1" +action: 1 + +# State 307 +# Apply action "B d1" +action: 3 + +# State 308 +# Apply action "B b1" +action: 1 + +# State 309 +# Apply action "B c6" +action: 47 + +# State 310 +# Apply action "B b3" +action: 19 + +# State 311 +# Apply action "B b5" +action: 37 + +# State 312 +# Apply action "B d2" +action: 12 + +# State 313 +# Apply action "B g2" +action: 15 + +# State 314 +# Apply action "B e4" +action: 31 + +# State 315 +# GoState(komi=7.5, to_play=B, history.size()=315, stones_count: w62 b13) +# +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OOOOOOOOO +# 6 OO+OOOOOO +# 5 OOOOOXOOO +# 4 OOOO+XXOO +# 3 OOXOXXXXO +# 2 OO+OXXOXX +# 1 +OOOOOOOX +# ABCDEFGHJ +# +# Observation white: +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OOOOOOOOO +# 6 OO+OOOOOO +# 5 OOOOO+OOO +# 4 OOOO++XOO +# 3 OO+O+++XO +# 2 OO+OXXO++ +# 1 +OOOOOOOX +# ABCDEFGHJ +# +# Observation black: +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OO+OOOOOO +# 6 OO+OOOOOO +# 5 OOO+OXOOO +# 4 OO+++XXOO +# 3 OOXOXXXXO +# 2 +O+OXXOXX +# 1 +OOOOOOOX +# ABCDEFGHJ +# +# Previous move was observational +IsTerminal() = False +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63, 2, 5, 20, 8, 37, 41, 34, 27, 28, 68, 40, 29, 12, 68, 46, 3, 25, 38, 34, 18, 0, 48, 59, 47, 4, 11, 81, 78, 10, 22, 27, 14, 3, 24, 21, 81, 29, 72, 7, 56, 81, 14, 15, 28, 9, 38, 32, 9, 56, 17, 39, 72, 16, 56, 72, 21, 6, 4, 27, 48, 20, 1, 3, 1, 47, 19, 37, 12, 15, 31] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63, 2, 5, 20, 8, 37, 41, 34, 27, 28, 68, 40, 29, 12, 68, 46, 3, 25, 38, 34, 18, 0, 48, 59, 47, 4, 11, 81, 78, 10, 22, 27, 14, 3, 24, 21, 81, 29, 72, 7, 56, 81, 14, 15, 28, 9, 38, 32, 9, 56, 17, 39, 72, 16, 56, 72, 21, 6, 4, 27, 48, 20, 1, 3, 1, 47, 19, 37, 12, 15, 31" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = " 9 +OOOOOOOO\n 8 OOOOO+OOO\n 7 OO+OOOOOO\n 6 OO+OOOOOO\n 5 OOO+OXOOO\n 4 OO+++XXOO\n 3 OOXOXXXXO\n 2 +O+OXXOXX\n 1 +OOOOOOOX\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationString(1) = " 9 +OOOOOOOO\n 8 OOOOO+OOO\n 7 OOOOOOOOO\n 6 OO+OOOOOO\n 5 OOOOO+OOO\n 4 OOOO++XOO\n 3 OO+O+++XO\n 2 OO+OXXO++\n 1 +OOOOOOOX\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationTensor(0) = [13.0, 62.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [13.0, 62.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 9, 11, 29, 30, 31, 39, 47, 56, 68, 72, 81] +StringLegalActions() = ["B a1", "B a2", "B c2", "B c4", "B d4", "B e4", "B d5", "B c6", "B c7", "B f8", "B a9", "B PASS"] + +# Apply action "B PASS" +action: 81 + +# State 316 +# Apply action "W f4" +action: 32 + +# State 317 +# Apply action "W e3" +action: 22 + +# State 318 +# Apply action "W c2" +action: 11 + +# State 319 +# Apply action "B f8" +action: 68 + +# State 320 +# Apply action "B c2" +action: 11 + +# State 321 +# Apply action "B f8" +action: 68 + +# State 322 +# Apply action "B f8" +action: 68 + +# State 323 +# Apply action "B a1" +action: 0 + +# State 324 +# GoState(komi=7.5, to_play=B, history.size()=324, stones_count: w63 b12) +# +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OOOOOOOOO +# 6 OO+OOOOOO +# 5 OOOOOXOOO +# 4 OOOO+XXOO +# 3 OO+OXXXXO +# 2 OOOOXXOXX +# 1 +OOOOOOOX +# ABCDEFGHJ +# +# Observation white: +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OOOOOOOOO +# 6 OO+OOOOOO +# 5 OOOOO+OOO +# 4 OOOO+XXOO +# 3 OO+OX++XO +# 2 OOOOXXO++ +# 1 +OOOOOOOX +# ABCDEFGHJ +# +# Observation black: +# 9 +OOOOOOOO +# 8 OOOOO+OOO +# 7 OO+OOOOOO +# 6 OO+OOOOOO +# 5 OOO+OXOOO +# 4 OO+++XXOO +# 3 OO+OXXXXO +# 2 +OOOXXOXX +# 1 +OOOOOOOX +# ABCDEFGHJ +# +# Previous move was observational +IsTerminal() = True +History() = [27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63, 2, 5, 20, 8, 37, 41, 34, 27, 28, 68, 40, 29, 12, 68, 46, 3, 25, 38, 34, 18, 0, 48, 59, 47, 4, 11, 81, 78, 10, 22, 27, 14, 3, 24, 21, 81, 29, 72, 7, 56, 81, 14, 15, 28, 9, 38, 32, 9, 56, 17, 39, 72, 16, 56, 72, 21, 6, 4, 27, 48, 20, 1, 3, 1, 47, 19, 37, 12, 15, 31, 81, 32, 22, 11, 68, 11, 68, 68, 0] +HistoryString() = "27, 76, 45, 14, 39, 19, 76, 3, 46, 29, 0, 22, 22, 69, 63, 67, 33, 50, 81, 21, 34, 54, 0, 6, 29, 36, 10, 61, 58, 77, 68, 42, 31, 3, 39, 52, 56, 75, 78, 6, 5, 48, 38, 36, 40, 73, 4, 63, 70, 74, 53, 18, 62, 72, 10, 8, 47, 43, 42, 37, 38, 64, 20, 1, 32, 13, 16, 9, 30, 20, 66, 38, 33, 65, 62, 9, 72, 56, 48, 32, 37, 7, 70, 52, 43, 12, 30, 40, 2, 69, 63, 53, 63, 28, 81, 23, 80, 75, 63, 63, 59, 4, 23, 59, 16, 28, 49, 64, 50, 67, 13, 11, 60, 19, 57, 9, 55, 79, 51, 81, 0, 71, 81, 45, 59, 26, 65, 55, 44, 35, 60, 80, 15, 13, 25, 9, 24, 27, 47, 78, 34, 5, 5, 63, 13, 61, 68, 74, 26, 24, 19, 15, 51, 14, 71, 7, 35, 8, 17, 11, 7, 51, 74, 8, 44, 44, 57, 71, 7, 21, 8, 59, 81, 0, 73, 41, 38, 31, 79, 58, 5, 2, 79, 45, 81, 13, 14, 72, 17, 78, 66, 54, 1, 21, 1, 68, 5, 12, 21, 2, 81, 77, 63, 21, 0, 24, 7, 41, 2, 58, 0, 21, 13, 81, 63, 58, 49, 81, 18, 25, 59, 13, 63, 81, 2, 23, 7, 8, 19, 1, 18, 25, 6, 33, 33, 10, 47, 40, 13, 1, 13, 30, 56, 5, 63, 2, 5, 20, 8, 37, 41, 34, 27, 28, 68, 40, 29, 12, 68, 46, 3, 25, 38, 34, 18, 0, 48, 59, 47, 4, 11, 81, 78, 10, 22, 27, 14, 3, 24, 21, 81, 29, 72, 7, 56, 81, 14, 15, 28, 9, 38, 32, 9, 56, 17, 39, 72, 16, 56, 72, 21, 6, 4, 27, 48, 20, 1, 3, 1, 47, 19, 37, 12, 15, 31, 81, 32, 22, 11, 68, 11, 68, 68, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = " 9 +OOOOOOOO\n 8 OOOOO+OOO\n 7 OO+OOOOOO\n 6 OO+OOOOOO\n 5 OOO+OXOOO\n 4 OO+++XXOO\n 3 OO+OXXXXO\n 2 +OOOXXOXX\n 1 +OOOOOOOX\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationString(1) = " 9 +OOOOOOOO\n 8 OOOOO+OOO\n 7 OOOOOOOOO\n 6 OO+OOOOOO\n 5 OOOOO+OOO\n 4 OOOO+XXOO\n 3 OO+OX++XO\n 2 OOOOXXO++\n 1 +OOOOOOOX\n ABCDEFGHJ\nPrevious move was observational\n" +ObservationTensor(0) = [12.0, 63.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [12.0, 63.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/phantom_ttt.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/phantom_ttt.txt new file mode 100644 index 0000000..48fa581 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/phantom_ttt.txt @@ -0,0 +1,245 @@ +game: phantom_ttt + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Phantom Tic Tac Toe" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["gameversion", "obstype"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "phantom_ttt" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 9 +PolicyTensorShape() = [9] +MaxChanceOutcomes() = 0 +GetParameters() = {gameversion=classical,obstype=reveal-nothing} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [1, 108] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 108 +ObservationTensorShape() = [27] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 27 +MaxGameLength() = 17 +ToString() = "phantom_ttt()" + +# State 0 +# ... +# ... +# ... +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "...\n...\n...\n" +InformationStateString(1) = "...\n...\n...\n" +InformationStateTensor(0): +◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): +◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "...\n...\n..." +ObservationString(1) = "...\n...\n..." +ObservationTensor(0): ◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,0)", "x(1,1)", "x(1,2)", "x(2,0)", "x(2,1)", "x(2,2)"] + +# Apply action "x(2,0)" +action: 6 + +# State 1 +# ... +# ... +# x.. +IsTerminal() = False +History() = [6] +HistoryString() = "6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "...\n...\nx..\n0,6 " +InformationStateString(1) = "...\n...\n...\n" +InformationStateTensor(0): +◉◉◉◉◉◉◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): +◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "...\n...\nx.." +ObservationString(1) = "...\n...\n..." +ObservationTensor(0): ◉◉◉◉◉◉◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(1,0)", "o(1,1)", "o(1,2)", "o(2,0)", "o(2,1)", "o(2,2)"] + +# Apply action "o(2,1)" +action: 7 + +# State 2 +# ... +# ... +# xo. +IsTerminal() = False +History() = [6, 7] +HistoryString() = "6, 7" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "...\n...\nx..\n0,6 " +InformationStateString(1) = "...\n...\n.o.\n1,7 " +InformationStateTensor(0): +◉◉◉◉◉◉◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): +◉◉◉◉◉◉◉◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "...\n...\nx.." +ObservationString(1) = "...\n...\n.o." +ObservationTensor(0): ◉◉◉◉◉◉◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◉◉◉◉◉◉◉◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 7, 8] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,0)", "x(1,1)", "x(1,2)", "x(2,1)", "x(2,2)"] + +# Apply action "x(2,2)" +action: 8 + +# State 3 +# ... +# ... +# xox +IsTerminal() = False +History() = [6, 7, 8] +HistoryString() = "6, 7, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "...\n...\nx.x\n0,6 0,8 " +InformationStateString(1) = "...\n...\n.o.\n1,7 " +InformationStateTensor(0): +◉◉◉◉◉◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): +◉◉◉◉◉◉◉◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "...\n...\nx.x" +ObservationString(1) = "...\n...\n.o." +ObservationTensor(0): ◉◉◉◉◉◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉ +ObservationTensor(1): ◉◉◉◉◉◉◉◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 8] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(1,0)", "o(1,1)", "o(1,2)", "o(2,0)", "o(2,2)"] + +# Apply action "o(2,2)" +action: 8 + +# State 4 +# ... +# ... +# xox +IsTerminal() = False +History() = [6, 7, 8, 8] +HistoryString() = "6, 7, 8, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "...\n...\nx.x\n0,6 0,8 " +InformationStateString(1) = "...\n...\n.ox\n1,7 1,8 " +InformationStateTensor(0): +◉◉◉◉◉◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): +◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "...\n...\nx.x" +ObservationString(1) = "...\n...\n.ox" +ObservationTensor(0): ◉◉◉◉◉◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉ +ObservationTensor(1): ◉◉◉◉◉◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(1,0)", "o(1,1)", "o(1,2)", "o(2,0)"] + +# Apply action "o(0,2)" +action: 2 + +# State 5 +# ..o +# ... +# xox +IsTerminal() = False +History() = [6, 7, 8, 8, 2] +HistoryString() = "6, 7, 8, 8, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "...\n...\nx.x\n0,6 0,8 " +InformationStateString(1) = "..o\n...\n.ox\n1,7 1,8 1,2 " +InformationStateTensor(0): +◉◉◉◉◉◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): +◉◉◯◉◉◉◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "...\n...\nx.x" +ObservationString(1) = "..o\n...\n.ox" +ObservationTensor(0): ◉◉◉◉◉◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉ +ObservationTensor(1): ◉◉◯◉◉◉◉◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 7] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,0)", "x(1,1)", "x(1,2)", "x(2,1)"] + +# Apply action "x(1,0)" +action: 3 + +# State 6 +# Apply action "o(2,0)" +action: 6 + +# State 7 +# Apply action "o(1,2)" +action: 5 + +# State 8 +# Apply action "x(2,1)" +action: 7 + +# State 9 +# Apply action "x(0,2)" +action: 2 + +# State 10 +# Apply action "x(0,0)" +action: 0 + +# State 11 +# x.o +# x.o +# xox +IsTerminal() = True +History() = [6, 7, 8, 8, 2, 3, 6, 5, 7, 2, 0] +HistoryString() = "6, 7, 8, 8, 2, 3, 6, 5, 7, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "x.o\nx..\nxox\n0,6 0,8 0,3 0,7 0,2 0,0 " +InformationStateString(1) = "..o\n..o\nxox\n1,7 1,8 1,2 1,6 1,5 " +InformationStateTensor(0): +◯◉◯◯◉◉◯◯◯◯◯◉◯◯◯◯◉◯◉◯◯◉◯◯◉◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): +◉◉◯◉◉◯◯◯◯◯◯◉◯◯◉◯◉◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "x.o\nx..\nxox" +ObservationString(1) = "..o\n..o\nxox" +ObservationTensor(0): ◯◉◯◯◉◉◯◯◯◯◯◉◯◯◯◯◉◯◉◯◯◉◯◯◉◯◉ +ObservationTensor(1): ◉◉◯◉◉◯◯◯◯◯◯◉◯◯◉◯◉◯◯◯◯◯◯◯◉◯◉ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/phantom_ttt_ir.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/phantom_ttt_ir.txt new file mode 100644 index 0000000..a93bb04 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/phantom_ttt_ir.txt @@ -0,0 +1,187 @@ +game: phantom_ttt_ir + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Phantom Tic Tac Toe with Imperfect Recall" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["gameversion", "obstype"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = False +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "phantom_ttt_ir" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 9 +PolicyTensorShape() = [9] +MaxChanceOutcomes() = 0 +GetParameters() = {gameversion=classical,obstype=reveal-nothing} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +MaxGameLength() = 17 +ToString() = "phantom_ttt_ir()" + +# State 0 +# ... +# ... +# ... +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "P0 ...\n...\n..." +InformationStateString(1) = "P1 ...\n...\n..." +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,0)", "x(1,1)", "x(1,2)", "x(2,0)", "x(2,1)", "x(2,2)"] + +# Apply action "x(2,0)" +action: 6 + +# State 1 +# ... +# ... +# x.. +IsTerminal() = False +History() = [6] +HistoryString() = "6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "P0 ...\n...\nx.." +InformationStateString(1) = "P1 ...\n...\n..." +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(1,0)", "o(1,1)", "o(1,2)", "o(2,0)", "o(2,1)", "o(2,2)"] + +# Apply action "o(2,2)" +action: 8 + +# State 2 +# ... +# ... +# x.o +IsTerminal() = False +History() = [6, 8] +HistoryString() = "6, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "P0 ...\n...\nx.." +InformationStateString(1) = "P1 ...\n...\n..o" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 7, 8] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,0)", "x(1,1)", "x(1,2)", "x(2,1)", "x(2,2)"] + +# Apply action "x(0,1)" +action: 1 + +# State 3 +# .x. +# ... +# x.o +IsTerminal() = False +History() = [6, 8, 1] +HistoryString() = "6, 8, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "P0 .x.\n...\nx.." +InformationStateString(1) = "P1 ...\n...\n..o" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(1,0)", "o(1,1)", "o(1,2)", "o(2,0)", "o(2,1)"] + +# Apply action "o(0,1)" +action: 1 + +# State 4 +# .x. +# ... +# x.o +IsTerminal() = False +History() = [6, 8, 1, 1] +HistoryString() = "6, 8, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "P0 .x.\n...\nx.." +InformationStateString(1) = "P1 .x.\n...\n..o" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 4, 5, 6, 7] +StringLegalActions() = ["o(0,0)", "o(0,2)", "o(1,0)", "o(1,1)", "o(1,2)", "o(2,0)", "o(2,1)"] + +# Apply action "o(0,0)" +action: 0 + +# State 5 +# ox. +# ... +# x.o +IsTerminal() = False +History() = [6, 8, 1, 1, 0] +HistoryString() = "6, 8, 1, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "P0 .x.\n...\nx.." +InformationStateString(1) = "P1 ox.\n...\n..o" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 4, 5, 7, 8] +StringLegalActions() = ["x(0,0)", "x(0,2)", "x(1,0)", "x(1,1)", "x(1,2)", "x(2,1)", "x(2,2)"] + +# Apply action "x(2,2)" +action: 8 + +# State 6 +# Apply action "x(1,2)" +action: 5 + +# State 7 +# Apply action "o(2,1)" +action: 7 + +# State 8 +# Apply action "x(1,0)" +action: 3 + +# State 9 +# Apply action "o(1,0)" +action: 3 + +# State 10 +# Apply action "o(1,2)" +action: 5 + +# State 11 +# Apply action "o(1,1)" +action: 4 + +# State 12 +# ox. +# xox +# xoo +IsTerminal() = True +History() = [6, 8, 1, 1, 0, 8, 5, 7, 3, 3, 5, 4] +HistoryString() = "6, 8, 1, 1, 0, 8, 5, 7, 3, 3, 5, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "P0 .x.\nx.x\nx.o" +InformationStateString(1) = "P1 ox.\nxox\n.oo" +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/pig_3p.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/pig_3p.txt new file mode 100644 index 0000000..a50b51d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/pig_3p.txt @@ -0,0 +1,416 @@ +game: pig(players=3,horizon=10,winscore=10) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Pig" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["diceoutcomes", "horizon", "piglet", "players", "winscore"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "pig" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 6 +GetParameters() = {diceoutcomes=6,horizon=10,piglet=False,players=3,winscore=10} +NumPlayers() = 3 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [4, 11] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 44 +MaxGameLength() = 10 +ToString() = "pig(horizon=10,players=3,winscore=10)" + +# State 0 +# Scores: 0 0 0, Turn total: 0 +# Current player: 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationString(1) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationString(2) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 1 +# Scores: 0 0 0, Turn total: 0 +# Current player: 1 +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(1) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(2) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 2 +# Scores: 0 0 0, Turn total: 0 +# Current player: 1 (rolling) +IsTerminal() = False +History() = [1, 0] +HistoryString() = "1, 0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 1 (rolling)\n" +ObservationString(1) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 1 (rolling)\n" +ObservationString(2) = "Scores: 0 0 0, Turn total: 0\nCurrent player: 1 (rolling)\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.166667), (1,0.166667), (2,0.166667), (3,0.166667), (4,0.166667), (5,0.166667)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Roll 1", "Roll 2", "Roll 3", "Roll 4", "Roll 5", "Roll 6"] + +# Apply action "Roll 4" +action: 3 + +# State 3 +# Scores: 0 0 0, Turn total: 4 +# Current player: 1 +IsTerminal() = False +History() = [1, 0, 3] +HistoryString() = "1, 0, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Scores: 0 0 0, Turn total: 4\nCurrent player: 1\n" +ObservationString(1) = "Scores: 0 0 0, Turn total: 4\nCurrent player: 1\n" +ObservationString(2) = "Scores: 0 0 0, Turn total: 4\nCurrent player: 1\n" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 4 +# Scores: 0 4 0, Turn total: 0 +# Current player: 2 +IsTerminal() = False +History() = [1, 0, 3, 1] +HistoryString() = "1, 0, 3, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "Scores: 0 4 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(1) = "Scores: 0 4 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(2) = "Scores: 0 4 0, Turn total: 0\nCurrent player: 2\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 5 +# Scores: 0 4 0, Turn total: 0 +# Current player: 2 (rolling) +IsTerminal() = False +History() = [1, 0, 3, 1, 0] +HistoryString() = "1, 0, 3, 1, 0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "Scores: 0 4 0, Turn total: 0\nCurrent player: 2 (rolling)\n" +ObservationString(1) = "Scores: 0 4 0, Turn total: 0\nCurrent player: 2 (rolling)\n" +ObservationString(2) = "Scores: 0 4 0, Turn total: 0\nCurrent player: 2 (rolling)\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.166667), (1,0.166667), (2,0.166667), (3,0.166667), (4,0.166667), (5,0.166667)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Roll 1", "Roll 2", "Roll 3", "Roll 4", "Roll 5", "Roll 6"] + +# Apply action "Roll 3" +action: 2 + +# State 6 +# Scores: 0 4 0, Turn total: 3 +# Current player: 2 +IsTerminal() = False +History() = [1, 0, 3, 1, 0, 2] +HistoryString() = "1, 0, 3, 1, 0, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "Scores: 0 4 0, Turn total: 3\nCurrent player: 2\n" +ObservationString(1) = "Scores: 0 4 0, Turn total: 3\nCurrent player: 2\n" +ObservationString(2) = "Scores: 0 4 0, Turn total: 3\nCurrent player: 2\n" +ObservationTensor(0): ◯◯◯◉◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◉◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◉◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 7 +# Scores: 0 4 3, Turn total: 0 +# Current player: 0 +IsTerminal() = False +History() = [1, 0, 3, 1, 0, 2, 1] +HistoryString() = "1, 0, 3, 1, 0, 2, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Scores: 0 4 3, Turn total: 0\nCurrent player: 0\n" +ObservationString(1) = "Scores: 0 4 3, Turn total: 0\nCurrent player: 0\n" +ObservationString(2) = "Scores: 0 4 3, Turn total: 0\nCurrent player: 0\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 8 +# Scores: 0 4 3, Turn total: 0 +# Current player: 1 +IsTerminal() = False +History() = [1, 0, 3, 1, 0, 2, 1, 1] +HistoryString() = "1, 0, 3, 1, 0, 2, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Scores: 0 4 3, Turn total: 0\nCurrent player: 1\n" +ObservationString(1) = "Scores: 0 4 3, Turn total: 0\nCurrent player: 1\n" +ObservationString(2) = "Scores: 0 4 3, Turn total: 0\nCurrent player: 1\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 9 +# Scores: 0 4 3, Turn total: 0 +# Current player: 2 +IsTerminal() = False +History() = [1, 0, 3, 1, 0, 2, 1, 1, 1] +HistoryString() = "1, 0, 3, 1, 0, 2, 1, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "Scores: 0 4 3, Turn total: 0\nCurrent player: 2\n" +ObservationString(1) = "Scores: 0 4 3, Turn total: 0\nCurrent player: 2\n" +ObservationString(2) = "Scores: 0 4 3, Turn total: 0\nCurrent player: 2\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 10 +# Apply action "Roll 4" +action: 3 + +# State 11 +# Apply action "stop" +action: 1 + +# State 12 +# Scores: 0 4 7, Turn total: 0 +# Current player: 0 +IsTerminal() = False +History() = [1, 0, 3, 1, 0, 2, 1, 1, 1, 0, 3, 1] +HistoryString() = "1, 0, 3, 1, 0, 2, 1, 1, 1, 0, 3, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Scores: 0 4 7, Turn total: 0\nCurrent player: 0\n" +ObservationString(1) = "Scores: 0 4 7, Turn total: 0\nCurrent player: 0\n" +ObservationString(2) = "Scores: 0 4 7, Turn total: 0\nCurrent player: 0\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 13 +# Scores: 0 4 7, Turn total: 0 +# Current player: 0 (rolling) +IsTerminal() = True +History() = [1, 0, 3, 1, 0, 2, 1, 1, 1, 0, 3, 1, 0] +HistoryString() = "1, 0, 3, 1, 0, 2, 1, 1, 1, 0, 3, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "Scores: 0 4 7, Turn total: 0\nCurrent player: 0 (rolling)\n" +ObservationString(1) = "Scores: 0 4 7, Turn total: 0\nCurrent player: 0 (rolling)\n" +ObservationString(2) = "Scores: 0 4 7, Turn total: 0\nCurrent player: 0 (rolling)\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/pig_4p.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/pig_4p.txt new file mode 100644 index 0000000..883394e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/pig_4p.txt @@ -0,0 +1,499 @@ +game: pig(players=4,horizon=10,winscore=8) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Pig" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["diceoutcomes", "horizon", "piglet", "players", "winscore"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "pig" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 6 +GetParameters() = {diceoutcomes=6,horizon=10,piglet=False,players=4,winscore=8} +NumPlayers() = 4 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [5, 9] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 45 +MaxGameLength() = 10 +ToString() = "pig(horizon=10,players=4,winscore=8)" + +# State 0 +# Scores: 0 0 0 0, Turn total: 0 +# Current player: 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Scores: 0 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationString(1) = "Scores: 0 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationString(2) = "Scores: 0 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationString(3) = "Scores: 0 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 1 +# Scores: 0 0 0 0, Turn total: 0 +# Current player: 0 (rolling) +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "Scores: 0 0 0 0, Turn total: 0\nCurrent player: 0 (rolling)\n" +ObservationString(1) = "Scores: 0 0 0 0, Turn total: 0\nCurrent player: 0 (rolling)\n" +ObservationString(2) = "Scores: 0 0 0 0, Turn total: 0\nCurrent player: 0 (rolling)\n" +ObservationString(3) = "Scores: 0 0 0 0, Turn total: 0\nCurrent player: 0 (rolling)\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.166667), (1,0.166667), (2,0.166667), (3,0.166667), (4,0.166667), (5,0.166667)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Roll 1", "Roll 2", "Roll 3", "Roll 4", "Roll 5", "Roll 6"] + +# Apply action "Roll 2" +action: 1 + +# State 2 +# Scores: 0 0 0 0, Turn total: 2 +# Current player: 0 +IsTerminal() = False +History() = [0, 1] +HistoryString() = "0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Scores: 0 0 0 0, Turn total: 2\nCurrent player: 0\n" +ObservationString(1) = "Scores: 0 0 0 0, Turn total: 2\nCurrent player: 0\n" +ObservationString(2) = "Scores: 0 0 0 0, Turn total: 2\nCurrent player: 0\n" +ObservationString(3) = "Scores: 0 0 0 0, Turn total: 2\nCurrent player: 0\n" +ObservationTensor(0): ◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 3 +# Scores: 0 0 0 0, Turn total: 2 +# Current player: 0 (rolling) +IsTerminal() = False +History() = [0, 1, 0] +HistoryString() = "0, 1, 0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "Scores: 0 0 0 0, Turn total: 2\nCurrent player: 0 (rolling)\n" +ObservationString(1) = "Scores: 0 0 0 0, Turn total: 2\nCurrent player: 0 (rolling)\n" +ObservationString(2) = "Scores: 0 0 0 0, Turn total: 2\nCurrent player: 0 (rolling)\n" +ObservationString(3) = "Scores: 0 0 0 0, Turn total: 2\nCurrent player: 0 (rolling)\n" +ObservationTensor(0): ◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◯◯◉◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.166667), (1,0.166667), (2,0.166667), (3,0.166667), (4,0.166667), (5,0.166667)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Roll 1", "Roll 2", "Roll 3", "Roll 4", "Roll 5", "Roll 6"] + +# Apply action "Roll 2" +action: 1 + +# State 4 +# Scores: 0 0 0 0, Turn total: 4 +# Current player: 0 +IsTerminal() = False +History() = [0, 1, 0, 1] +HistoryString() = "0, 1, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Scores: 0 0 0 0, Turn total: 4\nCurrent player: 0\n" +ObservationString(1) = "Scores: 0 0 0 0, Turn total: 4\nCurrent player: 0\n" +ObservationString(2) = "Scores: 0 0 0 0, Turn total: 4\nCurrent player: 0\n" +ObservationString(3) = "Scores: 0 0 0 0, Turn total: 4\nCurrent player: 0\n" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 5 +# Scores: 4 0 0 0, Turn total: 0 +# Current player: 1 +IsTerminal() = False +History() = [0, 1, 0, 1, 1] +HistoryString() = "0, 1, 0, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Scores: 4 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(1) = "Scores: 4 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(2) = "Scores: 4 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(3) = "Scores: 4 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 6 +# Scores: 4 0 0 0, Turn total: 0 +# Current player: 2 +IsTerminal() = False +History() = [0, 1, 0, 1, 1, 1] +HistoryString() = "0, 1, 0, 1, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "Scores: 4 0 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(1) = "Scores: 4 0 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(2) = "Scores: 4 0 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(3) = "Scores: 4 0 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 7 +# Apply action "Roll 1" +action: 0 + +# State 8 +# Scores: 4 0 0 0, Turn total: 0 +# Current player: 3 +IsTerminal() = False +History() = [0, 1, 0, 1, 1, 1, 0, 0] +HistoryString() = "0, 1, 0, 1, 1, 1, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +ObservationString(0) = "Scores: 4 0 0 0, Turn total: 0\nCurrent player: 3\n" +ObservationString(1) = "Scores: 4 0 0 0, Turn total: 0\nCurrent player: 3\n" +ObservationString(2) = "Scores: 4 0 0 0, Turn total: 0\nCurrent player: 3\n" +ObservationString(3) = "Scores: 4 0 0 0, Turn total: 0\nCurrent player: 3\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 9 +# Apply action "roll" +action: 0 + +# State 10 +# Apply action "Roll 1" +action: 0 + +# State 11 +# Scores: 4 0 0 0, Turn total: 0 +# Current player: 1 +IsTerminal() = False +History() = [0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0] +HistoryString() = "0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Scores: 4 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(1) = "Scores: 4 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(2) = "Scores: 4 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(3) = "Scores: 4 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 12 +# Apply action "Roll 4" +action: 3 + +# State 13 +# Scores: 4 0 0 0, Turn total: 4 +# Current player: 1 +IsTerminal() = False +History() = [0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 3] +HistoryString() = "0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Scores: 4 0 0 0, Turn total: 4\nCurrent player: 1\n" +ObservationString(1) = "Scores: 4 0 0 0, Turn total: 4\nCurrent player: 1\n" +ObservationString(2) = "Scores: 4 0 0 0, Turn total: 4\nCurrent player: 1\n" +ObservationString(3) = "Scores: 4 0 0 0, Turn total: 4\nCurrent player: 1\n" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◯◉◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◯◯◯◯◉◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 14 +# Apply action "Roll 4" +action: 3 + +# State 15 +# Apply action "stop" +action: 1 + +# State 16 +# Scores: 4 8 0 0, Turn total: 0 +# Current player: 2 +IsTerminal() = True +History() = [0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 3, 0, 3, 1] +HistoryString() = "0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 3, 0, 3, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "Scores: 4 8 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(1) = "Scores: 4 8 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(2) = "Scores: 4 8 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(3) = "Scores: 4 8 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯◉ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯◉ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯◉ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯◉ + ◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯ +Rewards() = [-0.333333, 1, -0.333333, -0.333333] +Returns() = [-0.333333, 1, -0.333333, -0.333333] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/pig_5p.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/pig_5p.txt new file mode 100644 index 0000000..8ea1567 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/pig_5p.txt @@ -0,0 +1,1048 @@ +game: pig(players=5,horizon=100,winscore=16) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Pig" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["diceoutcomes", "horizon", "piglet", "players", "winscore"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "pig" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 6 +GetParameters() = {diceoutcomes=6,horizon=100,piglet=False,players=5,winscore=16} +NumPlayers() = 5 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [6, 17] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 102 +MaxGameLength() = 100 +ToString() = "pig(horizon=100,players=5,winscore=16)" + +# State 0 +# Scores: 0 0 0 0 0, Turn total: 0 +# Current player: 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Scores: 0 0 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationString(1) = "Scores: 0 0 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationString(2) = "Scores: 0 0 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationString(3) = "Scores: 0 0 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationString(4) = "Scores: 0 0 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(4): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 1 +# Scores: 0 0 0 0 0, Turn total: 0 +# Current player: 1 +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Scores: 0 0 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(1) = "Scores: 0 0 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(2) = "Scores: 0 0 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(3) = "Scores: 0 0 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(4) = "Scores: 0 0 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(4): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 2 +# Scores: 0 0 0 0 0, Turn total: 0 +# Current player: 1 (rolling) +IsTerminal() = False +History() = [1, 0] +HistoryString() = "1, 0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "Scores: 0 0 0 0 0, Turn total: 0\nCurrent player: 1 (rolling)\n" +ObservationString(1) = "Scores: 0 0 0 0 0, Turn total: 0\nCurrent player: 1 (rolling)\n" +ObservationString(2) = "Scores: 0 0 0 0 0, Turn total: 0\nCurrent player: 1 (rolling)\n" +ObservationString(3) = "Scores: 0 0 0 0 0, Turn total: 0\nCurrent player: 1 (rolling)\n" +ObservationString(4) = "Scores: 0 0 0 0 0, Turn total: 0\nCurrent player: 1 (rolling)\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(4): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.166667), (1,0.166667), (2,0.166667), (3,0.166667), (4,0.166667), (5,0.166667)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Roll 1", "Roll 2", "Roll 3", "Roll 4", "Roll 5", "Roll 6"] + +# Apply action "Roll 2" +action: 1 + +# State 3 +# Scores: 0 0 0 0 0, Turn total: 2 +# Current player: 1 +IsTerminal() = False +History() = [1, 0, 1] +HistoryString() = "1, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Scores: 0 0 0 0 0, Turn total: 2\nCurrent player: 1\n" +ObservationString(1) = "Scores: 0 0 0 0 0, Turn total: 2\nCurrent player: 1\n" +ObservationString(2) = "Scores: 0 0 0 0 0, Turn total: 2\nCurrent player: 1\n" +ObservationString(3) = "Scores: 0 0 0 0 0, Turn total: 2\nCurrent player: 1\n" +ObservationString(4) = "Scores: 0 0 0 0 0, Turn total: 2\nCurrent player: 1\n" +ObservationTensor(0): ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(4): ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 4 +# Scores: 0 2 0 0 0, Turn total: 0 +# Current player: 2 +IsTerminal() = False +History() = [1, 0, 1, 1] +HistoryString() = "1, 0, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(1) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(2) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(3) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(4) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(4): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 5 +# Scores: 0 2 0 0 0, Turn total: 0 +# Current player: 3 +IsTerminal() = False +History() = [1, 0, 1, 1, 1] +HistoryString() = "1, 0, 1, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +ObservationString(0) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 3\n" +ObservationString(1) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 3\n" +ObservationString(2) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 3\n" +ObservationString(3) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 3\n" +ObservationString(4) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 3\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(4): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 6 +# Scores: 0 2 0 0 0, Turn total: 0 +# Current player: 4 +IsTerminal() = False +History() = [1, 0, 1, 1, 1, 1] +HistoryString() = "1, 0, 1, 1, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 4 +ObservationString(0) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 4\n" +ObservationString(1) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 4\n" +ObservationString(2) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 4\n" +ObservationString(3) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 4\n" +ObservationString(4) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 4\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(4): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 7 +# Scores: 0 2 0 0 0, Turn total: 0 +# Current player: 4 (rolling) +IsTerminal() = False +History() = [1, 0, 1, 1, 1, 1, 0] +HistoryString() = "1, 0, 1, 1, 1, 1, 0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 4 (rolling)\n" +ObservationString(1) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 4 (rolling)\n" +ObservationString(2) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 4 (rolling)\n" +ObservationString(3) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 4 (rolling)\n" +ObservationString(4) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 4 (rolling)\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(4): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.166667), (1,0.166667), (2,0.166667), (3,0.166667), (4,0.166667), (5,0.166667)] +LegalActions() = [0, 1, 2, 3, 4, 5] +StringLegalActions() = ["Roll 1", "Roll 2", "Roll 3", "Roll 4", "Roll 5", "Roll 6"] + +# Apply action "Roll 2" +action: 1 + +# State 8 +# Scores: 0 2 0 0 0, Turn total: 2 +# Current player: 4 +IsTerminal() = False +History() = [1, 0, 1, 1, 1, 1, 0, 1] +HistoryString() = "1, 0, 1, 1, 1, 1, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 4 +ObservationString(0) = "Scores: 0 2 0 0 0, Turn total: 2\nCurrent player: 4\n" +ObservationString(1) = "Scores: 0 2 0 0 0, Turn total: 2\nCurrent player: 4\n" +ObservationString(2) = "Scores: 0 2 0 0 0, Turn total: 2\nCurrent player: 4\n" +ObservationString(3) = "Scores: 0 2 0 0 0, Turn total: 2\nCurrent player: 4\n" +ObservationString(4) = "Scores: 0 2 0 0 0, Turn total: 2\nCurrent player: 4\n" +ObservationTensor(0): ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(4): ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 9 +# Apply action "Roll 3" +action: 2 + +# State 10 +# Scores: 0 2 0 0 0, Turn total: 5 +# Current player: 4 +IsTerminal() = False +History() = [1, 0, 1, 1, 1, 1, 0, 1, 0, 2] +HistoryString() = "1, 0, 1, 1, 1, 1, 0, 1, 0, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 4 +ObservationString(0) = "Scores: 0 2 0 0 0, Turn total: 5\nCurrent player: 4\n" +ObservationString(1) = "Scores: 0 2 0 0 0, Turn total: 5\nCurrent player: 4\n" +ObservationString(2) = "Scores: 0 2 0 0 0, Turn total: 5\nCurrent player: 4\n" +ObservationString(3) = "Scores: 0 2 0 0 0, Turn total: 5\nCurrent player: 4\n" +ObservationString(4) = "Scores: 0 2 0 0 0, Turn total: 5\nCurrent player: 4\n" +ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(4): ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 11 +# Apply action "Roll 1" +action: 0 + +# State 12 +# Scores: 0 2 0 0 0, Turn total: 0 +# Current player: 0 +IsTerminal() = False +History() = [1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0] +HistoryString() = "1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationString(1) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationString(2) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationString(3) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationString(4) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 0\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(4): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 13 +# Scores: 0 2 0 0 0, Turn total: 0 +# Current player: 1 +IsTerminal() = False +History() = [1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 1] +HistoryString() = "1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(1) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(2) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(3) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationString(4) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 1\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(4): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 14 +# Scores: 0 2 0 0 0, Turn total: 0 +# Current player: 2 +IsTerminal() = False +History() = [1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 1, 1] +HistoryString() = "1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(1) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(2) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(3) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationString(4) = "Scores: 0 2 0 0 0, Turn total: 0\nCurrent player: 2\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(4): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 15 +# Apply action "Roll 6" +action: 5 + +# State 16 +# Scores: 0 2 0 0 0, Turn total: 6 +# Current player: 2 +IsTerminal() = False +History() = [1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 1, 1, 0, 5] +HistoryString() = "1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 1, 1, 0, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "Scores: 0 2 0 0 0, Turn total: 6\nCurrent player: 2\n" +ObservationString(1) = "Scores: 0 2 0 0 0, Turn total: 6\nCurrent player: 2\n" +ObservationString(2) = "Scores: 0 2 0 0 0, Turn total: 6\nCurrent player: 2\n" +ObservationString(3) = "Scores: 0 2 0 0 0, Turn total: 6\nCurrent player: 2\n" +ObservationString(4) = "Scores: 0 2 0 0 0, Turn total: 6\nCurrent player: 2\n" +ObservationTensor(0): ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(4): ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 17 +# Apply action "Roll 2" +action: 1 + +# State 18 +# Apply action "stop" +action: 1 + +# State 19 +# Scores: 0 2 8 0 0, Turn total: 0 +# Current player: 3 +IsTerminal() = False +History() = [1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 1, 1, 0, 5, 0, 1, 1] +HistoryString() = "1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 1, 1, 0, 5, 0, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +ObservationString(0) = "Scores: 0 2 8 0 0, Turn total: 0\nCurrent player: 3\n" +ObservationString(1) = "Scores: 0 2 8 0 0, Turn total: 0\nCurrent player: 3\n" +ObservationString(2) = "Scores: 0 2 8 0 0, Turn total: 0\nCurrent player: 3\n" +ObservationString(3) = "Scores: 0 2 8 0 0, Turn total: 0\nCurrent player: 3\n" +ObservationString(4) = "Scores: 0 2 8 0 0, Turn total: 0\nCurrent player: 3\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(4): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 20 +# Apply action "Roll 4" +action: 3 + +# State 21 +# Scores: 0 2 8 0 0, Turn total: 4 +# Current player: 3 +IsTerminal() = False +History() = [1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 1, 1, 0, 5, 0, 1, 1, 0, 3] +HistoryString() = "1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 1, 1, 0, 5, 0, 1, 1, 0, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +ObservationString(0) = "Scores: 0 2 8 0 0, Turn total: 4\nCurrent player: 3\n" +ObservationString(1) = "Scores: 0 2 8 0 0, Turn total: 4\nCurrent player: 3\n" +ObservationString(2) = "Scores: 0 2 8 0 0, Turn total: 4\nCurrent player: 3\n" +ObservationString(3) = "Scores: 0 2 8 0 0, Turn total: 4\nCurrent player: 3\n" +ObservationString(4) = "Scores: 0 2 8 0 0, Turn total: 4\nCurrent player: 3\n" +ObservationTensor(0): ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(4): ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "stop" +action: 1 + +# State 22 +# Apply action "roll" +action: 0 + +# State 23 +# Apply action "Roll 3" +action: 2 + +# State 24 +# Apply action "roll" +action: 0 + +# State 25 +# Apply action "Roll 5" +action: 4 + +# State 26 +# Apply action "roll" +action: 0 + +# State 27 +# Apply action "Roll 3" +action: 2 + +# State 28 +# Apply action "stop" +action: 1 + +# State 29 +# Scores: 0 2 8 4 11, Turn total: 0 +# Current player: 0 +IsTerminal() = False +History() = [1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 1, 1, 0, 5, 0, 1, 1, 0, 3, 1, 0, 2, 0, 4, 0, 2, 1] +HistoryString() = "1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 1, 1, 0, 5, 0, 1, 1, 0, 3, 1, 0, 2, 0, 4, 0, 2, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "Scores: 0 2 8 4 11, Turn total: 0\nCurrent player: 0\n" +ObservationString(1) = "Scores: 0 2 8 4 11, Turn total: 0\nCurrent player: 0\n" +ObservationString(2) = "Scores: 0 2 8 4 11, Turn total: 0\nCurrent player: 0\n" +ObservationString(3) = "Scores: 0 2 8 4 11, Turn total: 0\nCurrent player: 0\n" +ObservationString(4) = "Scores: 0 2 8 4 11, Turn total: 0\nCurrent player: 0\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(4): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [0, 0, 0, 0, 0] +Returns() = [0, 0, 0, 0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["roll", "stop"] + +# Apply action "roll" +action: 0 + +# State 30 +# Apply action "Roll 5" +action: 4 + +# State 31 +# Apply action "stop" +action: 1 + +# State 32 +# Apply action "roll" +action: 0 + +# State 33 +# Apply action "Roll 6" +action: 5 + +# State 34 +# Apply action "roll" +action: 0 + +# State 35 +# Apply action "Roll 5" +action: 4 + +# State 36 +# Apply action "roll" +action: 0 + +# State 37 +# Apply action "Roll 4" +action: 3 + +# State 38 +# Apply action "stop" +action: 1 + +# State 39 +# Scores: 5 17 8 4 11, Turn total: 0 +# Current player: 2 +IsTerminal() = True +History() = [1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 1, 1, 0, 5, 0, 1, 1, 0, 3, 1, 0, 2, 0, 4, 0, 2, 1, 0, 4, 1, 0, 5, 0, 4, 0, 3, 1] +HistoryString() = "1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 1, 1, 0, 5, 0, 1, 1, 0, 3, 1, 0, 2, 0, 4, 0, 2, 1, 0, 4, 1, 0, 5, 0, 4, 0, 3, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "Scores: 5 17 8 4 11, Turn total: 0\nCurrent player: 2\n" +ObservationString(1) = "Scores: 5 17 8 4 11, Turn total: 0\nCurrent player: 2\n" +ObservationString(2) = "Scores: 5 17 8 4 11, Turn total: 0\nCurrent player: 2\n" +ObservationString(3) = "Scores: 5 17 8 4 11, Turn total: 0\nCurrent player: 2\n" +ObservationString(4) = "Scores: 5 17 8 4 11, Turn total: 0\nCurrent player: 2\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(2): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(3): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(4): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +Rewards() = [-0.25, 1, -0.25, -0.25, -0.25] +Returns() = [-0.25, 1, -0.25, -0.25, -0.25] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_block_dominoes.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_block_dominoes.txt new file mode 100644 index 0000000..e13ec64 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_block_dominoes.txt @@ -0,0 +1,564 @@ +game: python_block_dominoes + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Python block dominoes" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = True +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "python_block_dominoes" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 154 +PolicyTensorShape() = [154] +MaxChanceOutcomes() = 28 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = -69.0 +MaxUtility() = 69.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = player: [2], hand: [7, 3], actions_history: [14, 5] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 93 +ObservationTensorShape() = player: [2], hand: [7, 3], last_action: [4], hand_sizes: [2] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 29 +MaxGameLength() = 28 +ToString() = "python_block_dominoes()" + +# State 0 +# hand0:[] hand1:[] history:[] +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "p0 hand:[] history:[]" +InformationStateString(1) = "p1 hand:[] history:[]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +InformationStateTensor(0).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +InformationStateTensor(1).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +ObservationString(0) = "p0 hand:[]" +ObservationString(1) = "p1 hand:[]" +PublicObservationString() = "p0" +PrivateObservationString(0) = "p0 hand:[]" +PrivateObservationString(1) = "p1 hand:[]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +ObservationTensor(0).last_action: ◯◯◯◯ +ObservationTensor(0).hand_sizes: ◯◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +ObservationTensor(1).last_action: ◯◯◯◯ +ObservationTensor(1).hand_sizes: ◯◯ +ChanceOutcomes() = [(0,0.0357143), (1,0.0357143), (2,0.0357143), (3,0.0357143), (4,0.0357143), (5,0.0357143), (6,0.0357143), (7,0.0357143), (8,0.0357143), (9,0.0357143), (10,0.0357143), (11,0.0357143), (12,0.0357143), (13,0.0357143), (14,0.0357143), (15,0.0357143), (16,0.0357143), (17,0.0357143), (18,0.0357143), (19,0.0357143), (20,0.0357143), (21,0.0357143), (22,0.0357143), (23,0.0357143), (24,0.0357143), (25,0.0357143), (26,0.0357143), (27,0.0357143)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] +StringLegalActions() = ["Deal (0.0, 0.0)", "Deal (0.0, 1.0)", "Deal (0.0, 2.0)", "Deal (0.0, 3.0)", "Deal (0.0, 4.0)", "Deal (0.0, 5.0)", "Deal (0.0, 6.0)", "Deal (1.0, 1.0)", "Deal (1.0, 2.0)", "Deal (1.0, 3.0)", "Deal (1.0, 4.0)", "Deal (1.0, 5.0)", "Deal (1.0, 6.0)", "Deal (2.0, 2.0)", "Deal (2.0, 3.0)", "Deal (2.0, 4.0)", "Deal (2.0, 5.0)", "Deal (2.0, 6.0)", "Deal (3.0, 3.0)", "Deal (3.0, 4.0)", "Deal (3.0, 5.0)", "Deal (3.0, 6.0)", "Deal (4.0, 4.0)", "Deal (4.0, 5.0)", "Deal (4.0, 6.0)", "Deal (5.0, 5.0)", "Deal (5.0, 6.0)", "Deal (6.0, 6.0)"] + +# Apply action "Deal (0.0, 5.0)" +action: 5 + +# State 1 +# hand0:['(0.0, 5.0)'] hand1:[] history:[] +IsTerminal() = False +History() = [5] +HistoryString() = "5" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "p0 hand:[(0.0, 5.0)] history:[]" +InformationStateString(1) = "p1 hand:[] history:[]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).hand = [0.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +InformationStateTensor(1).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +ObservationString(0) = "p0 hand:[(0.0, 5.0)]" +ObservationString(1) = "p1 hand:[]" +PublicObservationString() = "p0" +PrivateObservationString(0) = "p0 hand:[(0.0, 5.0)]" +PrivateObservationString(1) = "p1 hand:[]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).hand = [0.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action: ◯◯◯◯ +ObservationTensor(0).hand_sizes: ◉◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +ObservationTensor(1).last_action: ◯◯◯◯ +ObservationTensor(1).hand_sizes: ◯◉ +ChanceOutcomes() = [(0,0.037037), (1,0.037037), (2,0.037037), (3,0.037037), (4,0.037037), (6,0.037037), (7,0.037037), (8,0.037037), (9,0.037037), (10,0.037037), (11,0.037037), (12,0.037037), (13,0.037037), (14,0.037037), (15,0.037037), (16,0.037037), (17,0.037037), (18,0.037037), (19,0.037037), (20,0.037037), (21,0.037037), (22,0.037037), (23,0.037037), (24,0.037037), (25,0.037037), (26,0.037037), (27,0.037037)] +LegalActions() = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] +StringLegalActions() = ["Deal (0.0, 0.0)", "Deal (0.0, 1.0)", "Deal (0.0, 2.0)", "Deal (0.0, 3.0)", "Deal (0.0, 4.0)", "Deal (0.0, 6.0)", "Deal (1.0, 1.0)", "Deal (1.0, 2.0)", "Deal (1.0, 3.0)", "Deal (1.0, 4.0)", "Deal (1.0, 5.0)", "Deal (1.0, 6.0)", "Deal (2.0, 2.0)", "Deal (2.0, 3.0)", "Deal (2.0, 4.0)", "Deal (2.0, 5.0)", "Deal (2.0, 6.0)", "Deal (3.0, 3.0)", "Deal (3.0, 4.0)", "Deal (3.0, 5.0)", "Deal (3.0, 6.0)", "Deal (4.0, 4.0)", "Deal (4.0, 5.0)", "Deal (4.0, 6.0)", "Deal (5.0, 5.0)", "Deal (5.0, 6.0)", "Deal (6.0, 6.0)"] + +# Apply action "Deal (1.0, 4.0)" +action: 10 + +# State 2 +# Apply action "Deal (5.0, 6.0)" +action: 26 + +# State 3 +# Apply action "Deal (3.0, 4.0)" +action: 19 + +# State 4 +# Apply action "Deal (0.0, 0.0)" +action: 0 + +# State 5 +# Apply action "Deal (2.0, 5.0)" +action: 16 + +# State 6 +# Apply action "Deal (0.0, 4.0)" +action: 4 + +# State 7 +# Apply action "Deal (3.0, 5.0)" +action: 20 + +# State 8 +# Apply action "Deal (4.0, 5.0)" +action: 23 + +# State 9 +# Apply action "Deal (0.0, 1.0)" +action: 1 + +# State 10 +# Apply action "Deal (6.0, 6.0)" +action: 27 + +# State 11 +# Apply action "Deal (0.0, 6.0)" +action: 6 + +# State 12 +# Apply action "Deal (2.0, 2.0)" +action: 13 + +# State 13 +# Apply action "Deal (5.0, 5.0)" +action: 25 + +# State 14 +# hand0:['(0.0, 0.0)', '(0.0, 4.0)', '(0.0, 5.0)', '(1.0, 4.0)', '(2.0, 5.0)', '(3.0, 4.0)', '(5.0, 6.0)'] hand1:['(0.0, 1.0)', '(0.0, 6.0)', '(2.0, 2.0)', '(3.0, 5.0)', '(4.0, 5.0)', '(5.0, 5.0)', '(6.0, 6.0)'] history:[] +IsTerminal() = False +History() = [5, 10, 26, 19, 0, 16, 4, 20, 23, 1, 27, 6, 13, 25] +HistoryString() = "5, 10, 26, 19, 0, 16, 4, 20, 23, 1, 27, 6, 13, 25" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "p0 hand:[(0.0, 0.0), (0.0, 4.0), (0.0, 5.0), (1.0, 4.0), (2.0, 5.0), (3.0, 4.0), (5.0, 6.0)] history:[]" +InformationStateString(1) = "p1 hand:[(0.0, 1.0), (0.0, 6.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (5.0, 5.0), (6.0, 6.0)] history:[]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).hand = [0.0, 0.0, 1.0, 0.0, 4.0, 1.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 4.0, 1.0, 5.0, 6.0, 1.0] +InformationStateTensor(0).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).hand = [0.0, 1.0, 1.0, 0.0, 6.0, 1.0, 2.0, 2.0, 1.0, 3.0, 5.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 6.0, 6.0, 1.0] +InformationStateTensor(1).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +ObservationString(0) = "p0 hand:[(0.0, 0.0), (0.0, 4.0), (0.0, 5.0), (1.0, 4.0), (2.0, 5.0), (3.0, 4.0), (5.0, 6.0)]" +ObservationString(1) = "p1 hand:[(0.0, 1.0), (0.0, 6.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (5.0, 5.0), (6.0, 6.0)]" +PublicObservationString() = "p0" +PrivateObservationString(0) = "p0 hand:[(0.0, 0.0), (0.0, 4.0), (0.0, 5.0), (1.0, 4.0), (2.0, 5.0), (3.0, 4.0), (5.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(0.0, 1.0), (0.0, 6.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (5.0, 5.0), (6.0, 6.0)]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).hand = [0.0, 0.0, 1.0, 0.0, 4.0, 1.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 4.0, 1.0, 5.0, 6.0, 1.0] +ObservationTensor(0).last_action: ◯◯◯◯ +ObservationTensor(0).hand_sizes = [7.0, 7.0] +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).hand = [0.0, 1.0, 1.0, 0.0, 6.0, 1.0, 2.0, 2.0, 1.0, 3.0, 5.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 6.0, 6.0, 1.0] +ObservationTensor(1).last_action: ◯◯◯◯ +ObservationTensor(1).hand_sizes = [7.0, 7.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 11, 14, 28, 45, 53, 72] +StringLegalActions() = ["p0 tile:(0.0, 0.0) pip:None", "p0 tile:(0.0, 4.0) pip:None", "p0 tile:(0.0, 5.0) pip:None", "p0 tile:(1.0, 4.0) pip:None", "p0 tile:(2.0, 5.0) pip:None", "p0 tile:(3.0, 4.0) pip:None", "p0 tile:(5.0, 6.0) pip:None"] + +# Apply action "p0 tile:(0.0, 0.0) pip:None" +action: 0 + +# State 15 +# hand0:['(0.0, 4.0)', '(0.0, 5.0)', '(1.0, 4.0)', '(2.0, 5.0)', '(3.0, 4.0)', '(5.0, 6.0)'] hand1:['(0.0, 1.0)', '(0.0, 6.0)', '(2.0, 2.0)', '(3.0, 5.0)', '(4.0, 5.0)', '(5.0, 5.0)', '(6.0, 6.0)'] history:['p0 tile:(0.0, 0.0) pip:None'] +IsTerminal() = False +History() = [5, 10, 26, 19, 0, 16, 4, 20, 23, 1, 27, 6, 13, 25, 0] +HistoryString() = "5, 10, 26, 19, 0, 16, 4, 20, 23, 1, 27, 6, 13, 25, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "p0 hand:[(0.0, 4.0), (0.0, 5.0), (1.0, 4.0), (2.0, 5.0), (3.0, 4.0), (5.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None]" +InformationStateString(1) = "p1 hand:[(0.0, 1.0), (0.0, 6.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (5.0, 5.0), (6.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).hand = [0.0, 4.0, 1.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 4.0, 1.0, 5.0, 6.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history: ◯◯◯◯◉ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).hand = [0.0, 1.0, 1.0, 0.0, 6.0, 1.0, 2.0, 2.0, 1.0, 3.0, 5.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 6.0, 6.0, 1.0] +InformationStateTensor(1).actions_history: ◯◯◯◯◉ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +ObservationString(0) = "p0 hand:[(0.0, 4.0), (0.0, 5.0), (1.0, 4.0), (2.0, 5.0), (3.0, 4.0), (5.0, 6.0)] last_action:p0 tile:(0.0, 0.0) pip:None" +ObservationString(1) = "p1 hand:[(0.0, 1.0), (0.0, 6.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (5.0, 5.0), (6.0, 6.0)] last_action:p0 tile:(0.0, 0.0) pip:None" +PublicObservationString() = "p0 last_action:p0 tile:(0.0, 0.0) pip:None" +PrivateObservationString(0) = "p0 hand:[(0.0, 4.0), (0.0, 5.0), (1.0, 4.0), (2.0, 5.0), (3.0, 4.0), (5.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(0.0, 1.0), (0.0, 6.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (5.0, 5.0), (6.0, 6.0)]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).hand = [0.0, 4.0, 1.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 4.0, 1.0, 5.0, 6.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action: ◯◯◯◯ +ObservationTensor(0).hand_sizes = [6.0, 7.0] +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).hand = [0.0, 1.0, 1.0, 0.0, 6.0, 1.0, 2.0, 2.0, 1.0, 3.0, 5.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 6.0, 6.0, 1.0] +ObservationTensor(1).last_action: ◯◯◯◯ +ObservationTensor(1).hand_sizes = [7.0, 6.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [80, 95] +StringLegalActions() = ["p1 tile:(0.0, 1.0) pip:0.0", "p1 tile:(0.0, 6.0) pip:0.0"] + +# Apply action "p1 tile:(0.0, 6.0) pip:0.0" +action: 95 + +# State 16 +# hand0:['(0.0, 4.0)', '(0.0, 5.0)', '(1.0, 4.0)', '(2.0, 5.0)', '(3.0, 4.0)', '(5.0, 6.0)'] hand1:['(0.0, 1.0)', '(2.0, 2.0)', '(3.0, 5.0)', '(4.0, 5.0)', '(5.0, 5.0)', '(6.0, 6.0)'] history:['p0 tile:(0.0, 0.0) pip:None', 'p1 tile:(0.0, 6.0) pip:0.0'] +IsTerminal() = False +History() = [5, 10, 26, 19, 0, 16, 4, 20, 23, 1, 27, 6, 13, 25, 0, 95] +HistoryString() = "5, 10, 26, 19, 0, 16, 4, 20, 23, 1, 27, 6, 13, 25, 0, 95" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "p0 hand:[(0.0, 4.0), (0.0, 5.0), (1.0, 4.0), (2.0, 5.0), (3.0, 4.0), (5.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0]" +InformationStateString(1) = "p1 hand:[(0.0, 1.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (5.0, 5.0), (6.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).hand = [0.0, 4.0, 1.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 4.0, 1.0, 5.0, 6.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).hand = [0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 5.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "p0 hand:[(0.0, 4.0), (0.0, 5.0), (1.0, 4.0), (2.0, 5.0), (3.0, 4.0), (5.0, 6.0)] last_action:p1 tile:(0.0, 6.0) pip:0.0" +ObservationString(1) = "p1 hand:[(0.0, 1.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (5.0, 5.0), (6.0, 6.0)] last_action:p1 tile:(0.0, 6.0) pip:0.0" +PublicObservationString() = "p0 last_action:p1 tile:(0.0, 6.0) pip:0.0" +PrivateObservationString(0) = "p0 hand:[(0.0, 4.0), (0.0, 5.0), (1.0, 4.0), (2.0, 5.0), (3.0, 4.0), (5.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(0.0, 1.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (5.0, 5.0), (6.0, 6.0)]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).hand = [0.0, 4.0, 1.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 4.0, 1.0, 5.0, 6.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action = [0.0, 6.0, 0.0, 1.0] +ObservationTensor(0).hand_sizes = [6.0, 6.0] +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).hand = [0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 5.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(1).last_action = [0.0, 6.0, 0.0, 1.0] +ObservationTensor(1).hand_sizes = [6.0, 6.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [12, 15, 74] +StringLegalActions() = ["p0 tile:(0.0, 4.0) pip:0.0", "p0 tile:(0.0, 5.0) pip:0.0", "p0 tile:(5.0, 6.0) pip:6.0"] + +# Apply action "p0 tile:(0.0, 5.0) pip:0.0" +action: 15 + +# State 17 +# hand0:['(0.0, 4.0)', '(1.0, 4.0)', '(2.0, 5.0)', '(3.0, 4.0)', '(5.0, 6.0)'] hand1:['(0.0, 1.0)', '(2.0, 2.0)', '(3.0, 5.0)', '(4.0, 5.0)', '(5.0, 5.0)', '(6.0, 6.0)'] history:['p0 tile:(0.0, 0.0) pip:None', 'p1 tile:(0.0, 6.0) pip:0.0', 'p0 tile:(0.0, 5.0) pip:0.0'] +IsTerminal() = False +History() = [5, 10, 26, 19, 0, 16, 4, 20, 23, 1, 27, 6, 13, 25, 0, 95, 15] +HistoryString() = "5, 10, 26, 19, 0, 16, 4, 20, 23, 1, 27, 6, 13, 25, 0, 95, 15" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "p0 hand:[(0.0, 4.0), (1.0, 4.0), (2.0, 5.0), (3.0, 4.0), (5.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p0 tile:(0.0, 5.0) pip:0.0]" +InformationStateString(1) = "p1 hand:[(0.0, 1.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (5.0, 5.0), (6.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p0 tile:(0.0, 5.0) pip:0.0]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).hand = [0.0, 4.0, 1.0, 1.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 4.0, 1.0, 5.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).hand = [0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 5.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "p0 hand:[(0.0, 4.0), (1.0, 4.0), (2.0, 5.0), (3.0, 4.0), (5.0, 6.0)] last_action:p0 tile:(0.0, 5.0) pip:0.0" +ObservationString(1) = "p1 hand:[(0.0, 1.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (5.0, 5.0), (6.0, 6.0)] last_action:p0 tile:(0.0, 5.0) pip:0.0" +PublicObservationString() = "p0 last_action:p0 tile:(0.0, 5.0) pip:0.0" +PrivateObservationString(0) = "p0 hand:[(0.0, 4.0), (1.0, 4.0), (2.0, 5.0), (3.0, 4.0), (5.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(0.0, 1.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (5.0, 5.0), (6.0, 6.0)]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).hand = [0.0, 4.0, 1.0, 1.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 4.0, 1.0, 5.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action = [0.0, 5.0, 0.0, 0.0] +ObservationTensor(0).hand_sizes = [5.0, 6.0] +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).hand = [0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 5.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(1).last_action = [0.0, 5.0, 0.0, 0.0] +ObservationTensor(1).hand_sizes = [6.0, 5.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [135, 143, 148, 153] +StringLegalActions() = ["p1 tile:(3.0, 5.0) pip:5.0", "p1 tile:(4.0, 5.0) pip:5.0", "p1 tile:(5.0, 5.0) pip:5.0", "p1 tile:(6.0, 6.0) pip:6.0"] + +# Apply action "p1 tile:(5.0, 5.0) pip:5.0" +action: 148 + +# State 18 +# hand0:['(0.0, 4.0)', '(1.0, 4.0)', '(2.0, 5.0)', '(3.0, 4.0)', '(5.0, 6.0)'] hand1:['(0.0, 1.0)', '(2.0, 2.0)', '(3.0, 5.0)', '(4.0, 5.0)', '(6.0, 6.0)'] history:['p0 tile:(0.0, 0.0) pip:None', 'p1 tile:(0.0, 6.0) pip:0.0', 'p0 tile:(0.0, 5.0) pip:0.0', 'p1 tile:(5.0, 5.0) pip:5.0'] +IsTerminal() = False +History() = [5, 10, 26, 19, 0, 16, 4, 20, 23, 1, 27, 6, 13, 25, 0, 95, 15, 148] +HistoryString() = "5, 10, 26, 19, 0, 16, 4, 20, 23, 1, 27, 6, 13, 25, 0, 95, 15, 148" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "p0 hand:[(0.0, 4.0), (1.0, 4.0), (2.0, 5.0), (3.0, 4.0), (5.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p0 tile:(0.0, 5.0) pip:0.0, p1 tile:(5.0, 5.0) pip:5.0]" +InformationStateString(1) = "p1 hand:[(0.0, 1.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (6.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p0 tile:(0.0, 5.0) pip:0.0, p1 tile:(5.0, 5.0) pip:5.0]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).hand = [0.0, 4.0, 1.0, 1.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 4.0, 1.0, 5.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 5.0, 0.0, 0.0, 1.0, 5.0, 5.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).hand = [0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 5.0, 1.0, 4.0, 5.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 5.0, 0.0, 0.0, 1.0, 5.0, 5.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "p0 hand:[(0.0, 4.0), (1.0, 4.0), (2.0, 5.0), (3.0, 4.0), (5.0, 6.0)] last_action:p1 tile:(5.0, 5.0) pip:5.0" +ObservationString(1) = "p1 hand:[(0.0, 1.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (6.0, 6.0)] last_action:p1 tile:(5.0, 5.0) pip:5.0" +PublicObservationString() = "p0 last_action:p1 tile:(5.0, 5.0) pip:5.0" +PrivateObservationString(0) = "p0 hand:[(0.0, 4.0), (1.0, 4.0), (2.0, 5.0), (3.0, 4.0), (5.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(0.0, 1.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (6.0, 6.0)]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).hand = [0.0, 4.0, 1.0, 1.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 4.0, 1.0, 5.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action = [5.0, 5.0, 5.0, 1.0] +ObservationTensor(0).hand_sizes = [5.0, 5.0] +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).hand = [0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 5.0, 1.0, 4.0, 5.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1).last_action = [5.0, 5.0, 5.0, 1.0] +ObservationTensor(1).hand_sizes = [5.0, 5.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [47, 73, 74] +StringLegalActions() = ["p0 tile:(2.0, 5.0) pip:5.0", "p0 tile:(5.0, 6.0) pip:5.0", "p0 tile:(5.0, 6.0) pip:6.0"] + +# Apply action "p0 tile:(2.0, 5.0) pip:5.0" +action: 47 + +# State 19 +# hand0:['(0.0, 4.0)', '(1.0, 4.0)', '(3.0, 4.0)', '(5.0, 6.0)'] hand1:['(0.0, 1.0)', '(2.0, 2.0)', '(3.0, 5.0)', '(4.0, 5.0)', '(6.0, 6.0)'] history:['p0 tile:(0.0, 0.0) pip:None', 'p1 tile:(0.0, 6.0) pip:0.0', 'p0 tile:(0.0, 5.0) pip:0.0', 'p1 tile:(5.0, 5.0) pip:5.0', 'p0 tile:(2.0, 5.0) pip:5.0'] +IsTerminal() = False +History() = [5, 10, 26, 19, 0, 16, 4, 20, 23, 1, 27, 6, 13, 25, 0, 95, 15, 148, 47] +HistoryString() = "5, 10, 26, 19, 0, 16, 4, 20, 23, 1, 27, 6, 13, 25, 0, 95, 15, 148, 47" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "p0 hand:[(0.0, 4.0), (1.0, 4.0), (3.0, 4.0), (5.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p0 tile:(0.0, 5.0) pip:0.0, p1 tile:(5.0, 5.0) pip:5.0, p0 tile:(2.0, 5.0) pip:5.0]" +InformationStateString(1) = "p1 hand:[(0.0, 1.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (6.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p0 tile:(0.0, 5.0) pip:0.0, p1 tile:(5.0, 5.0) pip:5.0, p0 tile:(2.0, 5.0) pip:5.0]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).hand = [0.0, 4.0, 1.0, 1.0, 4.0, 1.0, 3.0, 4.0, 1.0, 5.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 5.0, 0.0, 0.0, 1.0, 5.0, 5.0, 5.0, 1.0, 1.0, 2.0, 5.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).hand = [0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 5.0, 1.0, 4.0, 5.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 5.0, 0.0, 0.0, 1.0, 5.0, 5.0, 5.0, 1.0, 1.0, 2.0, 5.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "p0 hand:[(0.0, 4.0), (1.0, 4.0), (3.0, 4.0), (5.0, 6.0)] last_action:p0 tile:(2.0, 5.0) pip:5.0" +ObservationString(1) = "p1 hand:[(0.0, 1.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (6.0, 6.0)] last_action:p0 tile:(2.0, 5.0) pip:5.0" +PublicObservationString() = "p0 last_action:p0 tile:(2.0, 5.0) pip:5.0" +PrivateObservationString(0) = "p0 hand:[(0.0, 4.0), (1.0, 4.0), (3.0, 4.0), (5.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(0.0, 1.0), (2.0, 2.0), (3.0, 5.0), (4.0, 5.0), (6.0, 6.0)]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).hand = [0.0, 4.0, 1.0, 1.0, 4.0, 1.0, 3.0, 4.0, 1.0, 5.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action = [2.0, 5.0, 5.0, 0.0] +ObservationTensor(0).hand_sizes = [4.0, 5.0] +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).hand = [0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 5.0, 1.0, 4.0, 5.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1).last_action = [2.0, 5.0, 5.0, 0.0] +ObservationTensor(1).hand_sizes = [5.0, 4.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [115, 153] +StringLegalActions() = ["p1 tile:(2.0, 2.0) pip:2.0", "p1 tile:(6.0, 6.0) pip:6.0"] + +# Apply action "p1 tile:(2.0, 2.0) pip:2.0" +action: 115 + +# State 20 +# Apply action "p0 tile:(5.0, 6.0) pip:6.0" +action: 74 + +# State 21 +# Apply action "p1 tile:(4.0, 5.0) pip:5.0" +action: 143 + +# State 22 +# Apply action "p0 tile:(3.0, 4.0) pip:4.0" +action: 55 + +# State 23 +# Apply action "p1 tile:(3.0, 5.0) pip:3.0" +action: 134 + +# State 24 +# hand0:['(0.0, 4.0)', '(1.0, 4.0)'] hand1:['(0.0, 1.0)', '(6.0, 6.0)'] history:['p0 tile:(0.0, 0.0) pip:None', 'p1 tile:(0.0, 6.0) pip:0.0', 'p0 tile:(0.0, 5.0) pip:0.0', 'p1 tile:(5.0, 5.0) pip:5.0', 'p0 tile:(2.0, 5.0) pip:5.0', 'p1 tile:(2.0, 2.0) pip:2.0', 'p0 tile:(5.0, 6.0) pip:6.0', 'p1 tile:(4.0, 5.0) pip:5.0', 'p0 tile:(3.0, 4.0) pip:4.0', 'p1 tile:(3.0, 5.0) pip:3.0'] +IsTerminal() = True +History() = [5, 10, 26, 19, 0, 16, 4, 20, 23, 1, 27, 6, 13, 25, 0, 95, 15, 148, 47, 115, 74, 143, 55, 134] +HistoryString() = "5, 10, 26, 19, 0, 16, 4, 20, 23, 1, 27, 6, 13, 25, 0, 95, 15, 148, 47, 115, 74, 143, 55, 134" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.TERMINAL +InformationStateString(0) = "p0 hand:[(0.0, 4.0), (1.0, 4.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p0 tile:(0.0, 5.0) pip:0.0, p1 tile:(5.0, 5.0) pip:5.0, p0 tile:(2.0, 5.0) pip:5.0, p1 tile:(2.0, 2.0) pip:2.0, p0 tile:(5.0, 6.0) pip:6.0, p1 tile:(4.0, 5.0) pip:5.0, p0 tile:(3.0, 4.0) pip:4.0, p1 tile:(3.0, 5.0) pip:3.0]" +InformationStateString(1) = "p1 hand:[(0.0, 1.0), (6.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p0 tile:(0.0, 5.0) pip:0.0, p1 tile:(5.0, 5.0) pip:5.0, p0 tile:(2.0, 5.0) pip:5.0, p1 tile:(2.0, 2.0) pip:2.0, p0 tile:(5.0, 6.0) pip:6.0, p1 tile:(4.0, 5.0) pip:5.0, p0 tile:(3.0, 4.0) pip:4.0, p1 tile:(3.0, 5.0) pip:3.0]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).hand = [0.0, 4.0, 1.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 5.0, 0.0, 0.0, 1.0, 5.0, 5.0, 5.0, 1.0, 1.0, 2.0, 5.0, 5.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 5.0, 6.0, 6.0, 0.0, 1.0, 4.0, 5.0, 5.0, 1.0, 1.0, 3.0, 4.0, 4.0, 0.0, 1.0, 3.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).hand = [0.0, 1.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 5.0, 0.0, 0.0, 1.0, 5.0, 5.0, 5.0, 1.0, 1.0, 2.0, 5.0, 5.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 5.0, 6.0, 6.0, 0.0, 1.0, 4.0, 5.0, 5.0, 1.0, 1.0, 3.0, 4.0, 4.0, 0.0, 1.0, 3.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "p0 hand:[(0.0, 4.0), (1.0, 4.0)] last_action:p1 tile:(3.0, 5.0) pip:3.0" +ObservationString(1) = "p1 hand:[(0.0, 1.0), (6.0, 6.0)] last_action:p1 tile:(3.0, 5.0) pip:3.0" +PublicObservationString() = "p0 last_action:p1 tile:(3.0, 5.0) pip:3.0" +PrivateObservationString(0) = "p0 hand:[(0.0, 4.0), (1.0, 4.0)]" +PrivateObservationString(1) = "p1 hand:[(0.0, 1.0), (6.0, 6.0)]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).hand = [0.0, 4.0, 1.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action = [3.0, 5.0, 3.0, 1.0] +ObservationTensor(0).hand_sizes = [2.0, 2.0] +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).hand = [0.0, 1.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1).last_action = [3.0, 5.0, 3.0, 1.0] +ObservationTensor(1).hand_sizes = [2.0, 2.0] +Rewards() = [13, -13] +Returns() = [13, -13] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_dynamic_routing.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_dynamic_routing.txt new file mode 100644 index 0000000..f675f0f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_dynamic_routing.txt @@ -0,0 +1,194 @@ +game: python_dynamic_routing + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Python Dynamic Routing Game" +GameType.max_num_players = 100 +GameType.min_num_players = 0 +GameType.parameter_specification = ["max_num_time_step", "players", "time_step_length"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = True +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "python_dynamic_routing" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 8 +PolicyTensorShape() = [8] +MaxChanceOutcomes() = 0 +GetParameters() = {max_num_time_step=10,players=-1,time_step_length=0.5} +NumPlayers() = 5 +MinUtility() = -11.0 +MaxUtility() = 0.0 +UtilitySum() = None +ObservationTensorShape() = [11, 6] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 66 +MaxGameLength() = 10 +ToString() = "python_dynamic_routing(max_num_time_step=10,players=-1,time_step_length=0.5)" + +# State 0 +# Vehicle locations: ['O->A', 'O->A', 'O->A', 'O->A', 'O->A'], time: 0, waiting_time=[0, 0, 0, 0, 0]. +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = PlayerId.SIMULTANEOUS +InformationStateString(0) = "" +InformationStateString(1) = "" +InformationStateString(2) = "" +InformationStateString(3) = "" +InformationStateString(4) = "" +ObservationString(0) = "0: " +ObservationString(1) = "1: " +ObservationString(2) = "2: " +ObservationString(3) = "3: " +ObservationString(4) = "4: " +ObservationTensor(0) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(2) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(3) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(4) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0, 0, 0, 0] +Returns() = [-0, -0, -0, -0, -0] +LegalActions(0) = [1, 2] +LegalActions(1) = [1, 2] +LegalActions(2) = [1, 2] +LegalActions(3) = [1, 2] +LegalActions(4) = [1, 2] +StringLegalActions(0) = ["Vehicle 0 would like to move to A->B.", "Vehicle 0 would like to move to A->C."] +StringLegalActions(1) = ["Vehicle 1 would like to move to A->B.", "Vehicle 1 would like to move to A->C."] +StringLegalActions(2) = ["Vehicle 2 would like to move to A->B.", "Vehicle 2 would like to move to A->C."] +StringLegalActions(3) = ["Vehicle 3 would like to move to A->B.", "Vehicle 3 would like to move to A->C."] +StringLegalActions(4) = ["Vehicle 4 would like to move to A->B.", "Vehicle 4 would like to move to A->C."] + +# Apply joint action ["Vehicle 0 would like to move to A->B.", "Vehicle 1 would like to move to A->C.", "Vehicle 2 would like to move to A->B.", "Vehicle 3 would like to move to A->B.", "Vehicle 4 would like to move to A->B."] +actions: [1, 2, 1, 1, 1] + +# State 1 +# Vehicle locations: ['A->B', 'A->C', 'A->B', 'A->B', 'A->B'], time: 1, waiting_time=[2, 3, 2, 2, 2]. +IsTerminal() = False +History() = [1, 2, 1, 1, 1] +HistoryString() = "1, 2, 1, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = PlayerId.SIMULTANEOUS +InformationStateString(0) = "1, 2, 1, 1, 1" +InformationStateString(1) = "1, 2, 1, 1, 1" +InformationStateString(2) = "1, 2, 1, 1, 1" +InformationStateString(3) = "1, 2, 1, 1, 1" +InformationStateString(4) = "1, 2, 1, 1, 1" +ObservationString(0) = "0: 1, 2, 1, 1, 1" +ObservationString(1) = "1: 1, 2, 1, 1, 1" +ObservationString(2) = "2: 1, 2, 1, 1, 1" +ObservationString(3) = "3: 1, 2, 1, 1, 1" +ObservationString(4) = "4: 1, 2, 1, 1, 1" +ObservationTensor(0) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(2) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(3) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(4) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [-0.5, -0.5, -0.5, -0.5, -0.5] +Returns() = [-0.5, -0.5, -0.5, -0.5, -0.5] +LegalActions(0) = [0] +LegalActions(1) = [0] +LegalActions(2) = [0] +LegalActions(3) = [0] +LegalActions(4) = [0] +StringLegalActions(0) = ["Vehicle 0 reach a sink node or its destination."] +StringLegalActions(1) = ["Vehicle 1 reach a sink node or its destination."] +StringLegalActions(2) = ["Vehicle 2 reach a sink node or its destination."] +StringLegalActions(3) = ["Vehicle 3 reach a sink node or its destination."] +StringLegalActions(4) = ["Vehicle 4 reach a sink node or its destination."] + +# Apply joint action ["Vehicle 0 reach a sink node or its destination.", "Vehicle 1 reach a sink node or its destination.", "Vehicle 2 reach a sink node or its destination.", "Vehicle 3 reach a sink node or its destination.", "Vehicle 4 reach a sink node or its destination."] +actions: [0, 0, 0, 0, 0] + +# State 2 +# Vehicle locations: ['A->B', 'A->C', 'A->B', 'A->B', 'A->B'], time: 2, waiting_time=[1, 2, 1, 1, 1]. +IsTerminal() = False +History() = [1, 2, 1, 1, 1, 0, 0, 0, 0, 0] +HistoryString() = "1, 2, 1, 1, 1, 0, 0, 0, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = PlayerId.SIMULTANEOUS +InformationStateString(0) = "1, 2, 1, 1, 1, 0, 0, 0, 0, 0" +InformationStateString(1) = "1, 2, 1, 1, 1, 0, 0, 0, 0, 0" +InformationStateString(2) = "1, 2, 1, 1, 1, 0, 0, 0, 0, 0" +InformationStateString(3) = "1, 2, 1, 1, 1, 0, 0, 0, 0, 0" +InformationStateString(4) = "1, 2, 1, 1, 1, 0, 0, 0, 0, 0" +ObservationString(0) = "0: 1, 2, 1, 1, 1, 0, 0, 0, 0, 0" +ObservationString(1) = "1: 1, 2, 1, 1, 1, 0, 0, 0, 0, 0" +ObservationString(2) = "2: 1, 2, 1, 1, 1, 0, 0, 0, 0, 0" +ObservationString(3) = "3: 1, 2, 1, 1, 1, 0, 0, 0, 0, 0" +ObservationString(4) = "4: 1, 2, 1, 1, 1, 0, 0, 0, 0, 0" +ObservationTensor(0) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(2) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(3) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(4) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [-0.5, -0.5, -0.5, -0.5, -0.5] +Returns() = [-1, -1, -1, -1, -1] +LegalActions(0) = [0] +LegalActions(1) = [0] +LegalActions(2) = [0] +LegalActions(3) = [0] +LegalActions(4) = [0] +StringLegalActions(0) = ["Vehicle 0 reach a sink node or its destination."] +StringLegalActions(1) = ["Vehicle 1 reach a sink node or its destination."] +StringLegalActions(2) = ["Vehicle 2 reach a sink node or its destination."] +StringLegalActions(3) = ["Vehicle 3 reach a sink node or its destination."] +StringLegalActions(4) = ["Vehicle 4 reach a sink node or its destination."] + +# Apply joint action ["Vehicle 0 reach a sink node or its destination.", "Vehicle 1 reach a sink node or its destination.", "Vehicle 2 reach a sink node or its destination.", "Vehicle 3 reach a sink node or its destination.", "Vehicle 4 reach a sink node or its destination."] +actions: [0, 0, 0, 0, 0] + +# State 3 +# Apply joint action ["Vehicle 0 would like to move to B->C.", "Vehicle 1 reach a sink node or its destination.", "Vehicle 2 would like to move to B->D.", "Vehicle 3 would like to move to B->C.", "Vehicle 4 would like to move to B->D."] +actions: [3, 0, 4, 3, 4] + +# State 4 +# Apply joint action ["Vehicle 0 would like to move to C->D.", "Vehicle 1 would like to move to C->D.", "Vehicle 2 reach a sink node or its destination.", "Vehicle 3 would like to move to C->D.", "Vehicle 4 reach a sink node or its destination."] +actions: [5, 5, 0, 5, 0] + +# State 5 +# Apply joint action ["Vehicle 0 reach a sink node or its destination.", "Vehicle 1 reach a sink node or its destination.", "Vehicle 2 reach a sink node or its destination.", "Vehicle 3 reach a sink node or its destination.", "Vehicle 4 reach a sink node or its destination."] +actions: [0, 0, 0, 0, 0] + +# State 6 +# Apply joint action ["Vehicle 0 reach a sink node or its destination.", "Vehicle 1 reach a sink node or its destination.", "Vehicle 2 reach a sink node or its destination.", "Vehicle 3 reach a sink node or its destination.", "Vehicle 4 reach a sink node or its destination."] +actions: [0, 0, 0, 0, 0] + +# State 7 +# Apply joint action ["Vehicle 0 would like to move to D->E.", "Vehicle 1 would like to move to D->E.", "Vehicle 2 would like to move to D->E.", "Vehicle 3 would like to move to D->E.", "Vehicle 4 would like to move to D->E."] +actions: [6, 6, 6, 6, 6] + +# State 8 +# Vehicle locations: ['D->E', 'D->E', 'D->E', 'D->E', 'D->E'], time: 8, game finished., waiting_time=[0, 0, 0, 0, 0]. +IsTerminal() = True +History() = [1, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 3, 4, 5, 5, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6] +HistoryString() = "1, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 3, 4, 5, 5, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.TERMINAL +InformationStateString(0) = "1, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 3, 4, 5, 5, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6" +InformationStateString(1) = "1, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 3, 4, 5, 5, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6" +InformationStateString(2) = "1, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 3, 4, 5, 5, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6" +InformationStateString(3) = "1, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 3, 4, 5, 5, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6" +InformationStateString(4) = "1, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 3, 4, 5, 5, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6" +ObservationString(0) = "0: 1, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 3, 4, 5, 5, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6" +ObservationString(1) = "1: 1, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 3, 4, 5, 5, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6" +ObservationString(2) = "2: 1, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 3, 4, 5, 5, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6" +ObservationString(3) = "3: 1, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 3, 4, 5, 5, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6" +ObservationString(4) = "4: 1, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 3, 4, 5, 5, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6" +ObservationTensor(0) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 3.0, 2.0, 4.0, 3.0, 4.0, 4.0, 5.0, 5.0, 4.0, 5.0, 4.0, 4.0, 5.0, 5.0, 4.0, 5.0, 4.0, 4.0, 5.0, 5.0, 4.0, 5.0, 4.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 3.0, 2.0, 4.0, 3.0, 4.0, 4.0, 5.0, 5.0, 4.0, 5.0, 4.0, 4.0, 5.0, 5.0, 4.0, 5.0, 4.0, 4.0, 5.0, 5.0, 4.0, 5.0, 4.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(2) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 3.0, 2.0, 4.0, 3.0, 4.0, 4.0, 5.0, 5.0, 4.0, 5.0, 4.0, 4.0, 5.0, 5.0, 4.0, 5.0, 4.0, 4.0, 5.0, 5.0, 4.0, 5.0, 4.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(3) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 3.0, 2.0, 4.0, 3.0, 4.0, 4.0, 5.0, 5.0, 4.0, 5.0, 4.0, 4.0, 5.0, 5.0, 4.0, 5.0, 4.0, 4.0, 5.0, 5.0, 4.0, 5.0, 4.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(4) = [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 3.0, 2.0, 4.0, 3.0, 4.0, 4.0, 5.0, 5.0, 4.0, 5.0, 4.0, 4.0, 5.0, 5.0, 4.0, 5.0, 4.0, 4.0, 5.0, 5.0, 4.0, 5.0, 4.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0, 0, 0, 0] +Returns() = [-3.5, -3.5, -3.5, -3.5, -3.5] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_iterated_prisoners_dilemma.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_iterated_prisoners_dilemma.txt new file mode 100644 index 0000000..615b387 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_iterated_prisoners_dilemma.txt @@ -0,0 +1,159 @@ +game: python_iterated_prisoners_dilemma + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Python Iterated Prisoner's Dilemma" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["max_game_length", "termination_probability"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = False +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "python_iterated_prisoners_dilemma" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 2 +GetParameters() = {max_game_length=9999,termination_probability=0.125} +NumPlayers() = 2 +MinUtility() = 0.0 +MaxUtility() = 9.999e+04 +UtilitySum() = None +MaxGameLength() = 9999 +ToString() = "python_iterated_prisoners_dilemma(max_game_length=9999,termination_probability=0.125)" + +# State 0 +# p0: p1: +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = PlayerId.SIMULTANEOUS +InformationStateString(0) = "us: op:" +InformationStateString(1) = "us: op:" +ObservationString(0) = "us: op:" +ObservationString(1) = "us: op:" +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1] +LegalActions(1) = [0, 1] +StringLegalActions(0) = ["COOPERATE", "DEFECT"] +StringLegalActions(1) = ["COOPERATE", "DEFECT"] + +# Apply joint action ["COOPERATE", "COOPERATE"] +actions: [0, 0] + +# State 1 +# p0:C p1:C +IsTerminal() = False +History() = [0, 0] +HistoryString() = "0, 0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "us:C op:C" +InformationStateString(1) = "us:C op:C" +ObservationString(0) = "us:C op:C" +ObservationString(1) = "us:C op:C" +ChanceOutcomes() = [(0,0.875), (1,0.125)] +LegalActions() = [0, 1] +StringLegalActions() = ["CONTINUE", "STOP"] + +# Apply action "CONTINUE" +action: 0 + +# State 2 +# p0:C p1:C +IsTerminal() = False +History() = [0, 0, 0] +HistoryString() = "0, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = PlayerId.SIMULTANEOUS +InformationStateString(0) = "us:C op:C" +InformationStateString(1) = "us:C op:C" +ObservationString(0) = "us:C op:C" +ObservationString(1) = "us:C op:C" +Rewards() = [5, 5] +Returns() = [5, 5] +LegalActions(0) = [0, 1] +LegalActions(1) = [0, 1] +StringLegalActions(0) = ["COOPERATE", "DEFECT"] +StringLegalActions(1) = ["COOPERATE", "DEFECT"] + +# Apply joint action ["COOPERATE", "DEFECT"] +actions: [0, 1] + +# State 3 +# p0:CC p1:CD +IsTerminal() = False +History() = [0, 0, 0, 0, 1] +HistoryString() = "0, 0, 0, 0, 1" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "us:CC op:CD" +InformationStateString(1) = "us:CD op:CC" +ObservationString(0) = "us:CC op:CD" +ObservationString(1) = "us:CD op:CC" +ChanceOutcomes() = [(0,0.875), (1,0.125)] +LegalActions() = [0, 1] +StringLegalActions() = ["CONTINUE", "STOP"] + +# Apply action "CONTINUE" +action: 0 + +# State 4 +# p0:CC p1:CD +IsTerminal() = False +History() = [0, 0, 0, 0, 1, 0] +HistoryString() = "0, 0, 0, 0, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = PlayerId.SIMULTANEOUS +InformationStateString(0) = "us:CC op:CD" +InformationStateString(1) = "us:CD op:CC" +ObservationString(0) = "us:CC op:CD" +ObservationString(1) = "us:CD op:CC" +Rewards() = [0, 10] +Returns() = [5, 15] +LegalActions(0) = [0, 1] +LegalActions(1) = [0, 1] +StringLegalActions(0) = ["COOPERATE", "DEFECT"] +StringLegalActions(1) = ["COOPERATE", "DEFECT"] + +# Apply joint action ["DEFECT", "COOPERATE"] +actions: [1, 0] + +# State 5 +# Apply action "CONTINUE" +action: 0 + +# State 6 +# Apply joint action ["COOPERATE", "COOPERATE"] +actions: [0, 0] + +# State 7 +# Apply action "STOP" +action: 1 + +# State 8 +# p0:CCDC p1:CDCC +IsTerminal() = True +History() = [0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1] +HistoryString() = "0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.TERMINAL +InformationStateString(0) = "us:CCDC op:CDCC" +InformationStateString(1) = "us:CDCC op:CCDC" +ObservationString(0) = "us:CCDC op:CDCC" +ObservationString(1) = "us:CDCC op:CCDC" +Rewards() = [5, 5] +Returns() = [20, 20] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_iterated_prisoners_dilemma_turn_based.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_iterated_prisoners_dilemma_turn_based.txt new file mode 100644 index 0000000..ee624ed --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_iterated_prisoners_dilemma_turn_based.txt @@ -0,0 +1,175 @@ +game: turn_based_simultaneous_game(game=python_iterated_prisoners_dilemma()) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Turn-based Python Iterated Prisoner's Dilemma" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["game"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = False +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "turn_based_simultaneous_game" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 2 +GetParameters() = {game=python_iterated_prisoners_dilemma(max_game_length=9999,termination_probability=0.125)} +NumPlayers() = 2 +MinUtility() = 0.0 +MaxUtility() = 9.999e+04 +UtilitySum() = None +MaxGameLength() = 19998 +ToString() = "turn_based_simultaneous_game(game=python_iterated_prisoners_dilemma(max_game_length=9999,termination_probability=0.125))" + +# State 0 +# Partial joint action: +# p0: p1: +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["COOPERATE", "DEFECT"] + +# Apply action "COOPERATE" +action: 0 + +# State 1 +# Partial joint action: 0 +# p0: p1: +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["COOPERATE", "DEFECT"] + +# Apply action "DEFECT" +action: 1 + +# State 2 +# p0:C p1:D +IsTerminal() = False +History() = [0, 1] +HistoryString() = "0, 1" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ChanceOutcomes() = [(0,0.875), (1,0.125)] +LegalActions() = [0, 1] +StringLegalActions() = ["CONTINUE", "STOP"] + +# Apply action "CONTINUE" +action: 0 + +# State 3 +# Partial joint action: +# p0:C p1:D +IsTerminal() = False +History() = [0, 1, 0] +HistoryString() = "0, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +Rewards() = [0, 10] +Returns() = [0, 10] +LegalActions() = [0, 1] +StringLegalActions() = ["COOPERATE", "DEFECT"] + +# Apply action "COOPERATE" +action: 0 + +# State 4 +# Partial joint action: 0 +# p0:C p1:D +IsTerminal() = False +History() = [0, 1, 0, 0] +HistoryString() = "0, 1, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +Rewards() = [0, 0] +Returns() = [0, 10] +LegalActions() = [0, 1] +StringLegalActions() = ["COOPERATE", "DEFECT"] + +# Apply action "COOPERATE" +action: 0 + +# State 5 +# p0:CC p1:DC +IsTerminal() = False +History() = [0, 1, 0, 0, 0] +HistoryString() = "0, 1, 0, 0, 0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ChanceOutcomes() = [(0,0.875), (1,0.125)] +LegalActions() = [0, 1] +StringLegalActions() = ["CONTINUE", "STOP"] + +# Apply action "CONTINUE" +action: 0 + +# State 6 +# Partial joint action: +# p0:CC p1:DC +IsTerminal() = False +History() = [0, 1, 0, 0, 0, 0] +HistoryString() = "0, 1, 0, 0, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +Rewards() = [5, 5] +Returns() = [5, 15] +LegalActions() = [0, 1] +StringLegalActions() = ["COOPERATE", "DEFECT"] + +# Apply action "COOPERATE" +action: 0 + +# State 7 +# Partial joint action: 0 +# p0:CC p1:DC +IsTerminal() = False +History() = [0, 1, 0, 0, 0, 0, 0] +HistoryString() = "0, 1, 0, 0, 0, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +Rewards() = [0, 0] +Returns() = [5, 15] +LegalActions() = [0, 1] +StringLegalActions() = ["COOPERATE", "DEFECT"] + +# Apply action "COOPERATE" +action: 0 + +# State 8 +# Apply action "STOP" +action: 1 + +# State 9 +# p0:CCC p1:DCC +IsTerminal() = True +History() = [0, 1, 0, 0, 0, 0, 0, 0, 1] +HistoryString() = "0, 1, 0, 0, 0, 0, 0, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +Rewards() = [5, 5] +Returns() = [10, 20] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_kuhn_poker.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_kuhn_poker.txt new file mode 100644 index 0000000..699b1d3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_kuhn_poker.txt @@ -0,0 +1,221 @@ +game: python_kuhn_poker + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Python Kuhn Poker" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = True +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "python_kuhn_poker" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 3 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = -2.0 +MaxUtility() = 2.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = player: [2], private_card: [3], betting: [3, 2] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 11 +ObservationTensorShape() = player: [2], private_card: [3], pot_contribution: [2] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 7 +MaxGameLength() = 3 +ToString() = "python_kuhn_poker()" + +# State 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "p0" +InformationStateString(1) = "p1" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◯◯ +InformationStateTensor(0).betting: ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◯ +InformationStateTensor(1).betting: ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 pot[1 1]" +ObservationString(1) = "p1 pot[1 1]" +PublicObservationString() = "p0 pot[1 1]" +PrivateObservationString(0) = "p0" +PrivateObservationString(1) = "p1" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◯◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉ +ChanceOutcomes() = [(0,0.333333), (1,0.333333), (2,0.333333)] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["Deal:0", "Deal:1", "Deal:2"] + +# Apply action "Deal:1" +action: 1 + +# State 1 +# 1 +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "p0 card:1" +InformationStateString(1) = "p1" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◉◯ +InformationStateTensor(0).betting: ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◯ +InformationStateTensor(1).betting: ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 card:1 pot[1 1]" +ObservationString(1) = "p1 pot[1 1]" +PublicObservationString() = "p0 pot[1 1]" +PrivateObservationString(0) = "p0 card:1" +PrivateObservationString(1) = "p1" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◉◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◯ +ObservationTensor(1).pot_contribution: ◉◉ +ChanceOutcomes() = [(0,0.5), (2,0.5)] +LegalActions() = [0, 2] +StringLegalActions() = ["Deal:0", "Deal:2"] + +# Apply action "Deal:2" +action: 2 + +# State 2 +# 12 +IsTerminal() = False +History() = [1, 2] +HistoryString() = "1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "p0 card:1" +InformationStateString(1) = "p1 card:2" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◉◯ +InformationStateTensor(0).betting: ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◉ +InformationStateTensor(1).betting: ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 card:1 pot[1 1]" +ObservationString(1) = "p1 card:2 pot[1 1]" +PublicObservationString() = "p0 pot[1 1]" +PrivateObservationString(0) = "p0 card:1" +PrivateObservationString(1) = "p1 card:2" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◉◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◉ +ObservationTensor(1).pot_contribution: ◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["Pass", "Bet"] + +# Apply action "Pass" +action: 0 + +# State 3 +# 12p +IsTerminal() = False +History() = [1, 2, 0] +HistoryString() = "1, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "p0 card:1 p" +InformationStateString(1) = "p1 card:2 p" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◉◯ +InformationStateTensor(0).betting: ◉◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◉ +InformationStateTensor(1).betting: ◉◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 card:1 pot[1 1]" +ObservationString(1) = "p1 card:2 pot[1 1]" +PublicObservationString() = "p0 pot[1 1]" +PrivateObservationString(0) = "p0 card:1" +PrivateObservationString(1) = "p1 card:2" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◉◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◉ +ObservationTensor(1).pot_contribution: ◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["Pass", "Bet"] + +# Apply action "Pass" +action: 0 + +# State 4 +# 12pp +IsTerminal() = True +History() = [1, 2, 0, 0] +HistoryString() = "1, 2, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.TERMINAL +InformationStateString(0) = "p0 card:1 pp" +InformationStateString(1) = "p1 card:2 pp" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_card: ◯◉◯ +InformationStateTensor(0).betting: ◉◯ + ◉◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_card: ◯◯◉ +InformationStateTensor(1).betting: ◉◯ + ◉◯ + ◯◯ +ObservationString(0) = "p0 card:1 pot[1 1]" +ObservationString(1) = "p1 card:2 pot[1 1]" +PublicObservationString() = "p0 pot[1 1]" +PrivateObservationString(0) = "p0 card:1" +PrivateObservationString(1) = "p1 card:2" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_card: ◯◉◯ +ObservationTensor(0).pot_contribution: ◉◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_card: ◯◯◉ +ObservationTensor(1).pot_contribution: ◉◉ +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_liars_poker.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_liars_poker.txt new file mode 100644 index 0000000..4dca732 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_liars_poker.txt @@ -0,0 +1,7654 @@ +game: python_liars_poker + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Python Liars Poker" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["hand_length", "num_digits", "players"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = False +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "python_liars_poker" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 201 +PolicyTensorShape() = [201] +MaxChanceOutcomes() = 100 +GetParameters() = {hand_length=10,num_digits=10,players=2} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = player: [2], private_hand: [10], rebid_state: [1], counts_state: [1], bid_history: [200, 2], challenge_history: [200, 2] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 814 +ObservationTensorShape() = player: [2], private_hand: [10], rebid_state: [1], counts_state: [1] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 14 +MaxGameLength() = 400 +ToString() = "python_liars_poker(hand_length=10,num_digits=10,players=2)" + +# State 0 +# Hands: [[], []], Bidder: -1, Current Player: PlayerId.CHANCE, Current Bid: None of None, Rebid: False +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "p0 rebid:[0] counts:[0]" +InformationStateString(1) = "p1 rebid:[0] counts:[0]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_hand: ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).rebid_state: ◯ +InformationStateTensor(0).counts_state: ◯ +InformationStateTensor(0).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_hand: ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).rebid_state: ◯ +InformationStateTensor(1).counts_state: ◯ +InformationStateTensor(1).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 rebid:[0] counts:[0]" +ObservationString(1) = "p1 rebid:[0] counts:[0]" +PublicObservationString() = "p0 rebid:[0] counts:[0]" +PrivateObservationString(0) = "p0" +PrivateObservationString(1) = "p1" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand: ◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).rebid_state: ◯ +ObservationTensor(0).counts_state: ◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand: ◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).rebid_state: ◯ +ObservationTensor(1).counts_state: ◯ +ChanceOutcomes() = [(1,0.1), (2,0.1), (3,0.1), (4,0.1), (5,0.1), (6,0.1), (7,0.1), (8,0.1), (9,0.1), (0,0.1)] +LegalActions() = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] +StringLegalActions() = ["Deal: 1", "Deal: 2", "Deal: 3", "Deal: 4", "Deal: 5", "Deal: 6", "Deal: 7", "Deal: 8", "Deal: 9", "Deal: 0"] + +# Apply action "Deal: 2" +action: 2 + +# State 1 +# Hands: [[2], []], Bidder: -1, Current Player: PlayerId.CHANCE, Current Bid: None of None, Rebid: False +IsTerminal() = False +History() = [2] +HistoryString() = "2" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "p0 rebid:[0] counts:[0]" +InformationStateString(1) = "p1 rebid:[0] counts:[0]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_hand: ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(0).rebid_state: ◯ +InformationStateTensor(0).counts_state: ◯ +InformationStateTensor(0).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_hand: ◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1).rebid_state: ◯ +InformationStateTensor(1).counts_state: ◯ +InformationStateTensor(1).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 rebid:[0] counts:[0]" +ObservationString(1) = "p1 rebid:[0] counts:[0]" +PublicObservationString() = "p0 rebid:[0] counts:[0]" +PrivateObservationString(0) = "p0" +PrivateObservationString(1) = "p1" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand: ◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).rebid_state: ◯ +ObservationTensor(0).counts_state: ◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand: ◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).rebid_state: ◯ +ObservationTensor(1).counts_state: ◯ +ChanceOutcomes() = [(1,0.1), (2,0.1), (3,0.1), (4,0.1), (5,0.1), (6,0.1), (7,0.1), (8,0.1), (9,0.1), (0,0.1)] +LegalActions() = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] +StringLegalActions() = ["Deal: 1", "Deal: 2", "Deal: 3", "Deal: 4", "Deal: 5", "Deal: 6", "Deal: 7", "Deal: 8", "Deal: 9", "Deal: 0"] + +# Apply action "Deal: 2" +action: 2 + +# State 2 +# Apply action "Deal: 4" +action: 4 + +# State 3 +# Apply action "Deal: 7" +action: 7 + +# State 4 +# Apply action "Deal: 9" +action: 9 + +# State 5 +# Apply action "Deal: 8" +action: 8 + +# State 6 +# Apply action "Deal: 2" +action: 2 + +# State 7 +# Apply action "Deal: 0" +action: 0 + +# State 8 +# Apply action "Deal: 8" +action: 8 + +# State 9 +# Apply action "Deal: 9" +action: 9 + +# State 10 +# Apply action "Deal: 2" +action: 2 + +# State 11 +# Apply action "Deal: 6" +action: 6 + +# State 12 +# Apply action "Deal: 0" +action: 0 + +# State 13 +# Apply action "Deal: 3" +action: 3 + +# State 14 +# Apply action "Deal: 9" +action: 9 + +# State 15 +# Apply action "Deal: 9" +action: 9 + +# State 16 +# Apply action "Deal: 7" +action: 7 + +# State 17 +# Apply action "Deal: 0" +action: 0 + +# State 18 +# Apply action "Deal: 6" +action: 6 + +# State 19 +# Apply action "Deal: 5" +action: 5 + +# State 20 +# Hands: [[2, 4, 9, 2, 8, 2, 0, 9, 7, 6], [2, 7, 8, 0, 9, 6, 3, 9, 0, 5]], Bidder: -1, Current Player: 0, Current Bid: None of None, Rebid: False +IsTerminal() = False +History() = [2, 2, 4, 7, 9, 8, 2, 0, 8, 9, 2, 6, 0, 3, 9, 9, 7, 0, 6, 5] +HistoryString() = "2, 2, 4, 7, 9, 8, 2, 0, 8, 9, 2, 6, 0, 3, 9, 9, 7, 0, 6, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6] rebid:[0] counts:[0]" +InformationStateString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5] rebid:[0] counts:[0]" +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_hand = [2, 4, 9, 2, 8, 2, 0, 9, 7, 6] +InformationStateTensor(0).rebid_state: ◯ +InformationStateTensor(0).counts_state: ◯ +InformationStateTensor(0).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_hand = [2, 7, 8, 0, 9, 6, 3, 9, 0, 5] +InformationStateTensor(1).rebid_state: ◯ +InformationStateTensor(1).counts_state: ◯ +InformationStateTensor(1).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5] rebid:[0] counts:[0]" +PublicObservationString() = "p0 rebid:[0] counts:[0]" +PrivateObservationString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6]" +PrivateObservationString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand = [2, 4, 9, 2, 8, 2, 0, 9, 7, 6] +ObservationTensor(0).rebid_state: ◯ +ObservationTensor(0).counts_state: ◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand = [2, 7, 8, 0, 9, 6, 3, 9, 0, 5] +ObservationTensor(1).rebid_state: ◯ +ObservationTensor(1).counts_state: ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200] +StringLegalActions() = ["Bid: 1 of 1", "Bid: 1 of 2", "Bid: 1 of 3", "Bid: 1 of 4", "Bid: 1 of 5", "Bid: 1 of 6", "Bid: 1 of 7", "Bid: 1 of 8", "Bid: 1 of 9", "Bid: 1 of 10", "Bid: 2 of 1", "Bid: 2 of 2", "Bid: 2 of 3", "Bid: 2 of 4", "Bid: 2 of 5", "Bid: 2 of 6", "Bid: 2 of 7", "Bid: 2 of 8", "Bid: 2 of 9", "Bid: 2 of 10", "Bid: 3 of 1", "Bid: 3 of 2", "Bid: 3 of 3", "Bid: 3 of 4", "Bid: 3 of 5", "Bid: 3 of 6", "Bid: 3 of 7", "Bid: 3 of 8", "Bid: 3 of 9", "Bid: 3 of 10", "Bid: 4 of 1", "Bid: 4 of 2", "Bid: 4 of 3", "Bid: 4 of 4", "Bid: 4 of 5", "Bid: 4 of 6", "Bid: 4 of 7", "Bid: 4 of 8", "Bid: 4 of 9", "Bid: 4 of 10", "Bid: 5 of 1", "Bid: 5 of 2", "Bid: 5 of 3", "Bid: 5 of 4", "Bid: 5 of 5", "Bid: 5 of 6", "Bid: 5 of 7", "Bid: 5 of 8", "Bid: 5 of 9", "Bid: 5 of 10", "Bid: 6 of 1", "Bid: 6 of 2", "Bid: 6 of 3", "Bid: 6 of 4", "Bid: 6 of 5", "Bid: 6 of 6", "Bid: 6 of 7", "Bid: 6 of 8", "Bid: 6 of 9", "Bid: 6 of 10", "Bid: 7 of 1", "Bid: 7 of 2", "Bid: 7 of 3", "Bid: 7 of 4", "Bid: 7 of 5", "Bid: 7 of 6", "Bid: 7 of 7", "Bid: 7 of 8", "Bid: 7 of 9", "Bid: 7 of 10", "Bid: 8 of 1", "Bid: 8 of 2", "Bid: 8 of 3", "Bid: 8 of 4", "Bid: 8 of 5", "Bid: 8 of 6", "Bid: 8 of 7", "Bid: 8 of 8", "Bid: 8 of 9", "Bid: 8 of 10", "Bid: 9 of 1", "Bid: 9 of 2", "Bid: 9 of 3", "Bid: 9 of 4", "Bid: 9 of 5", "Bid: 9 of 6", "Bid: 9 of 7", "Bid: 9 of 8", "Bid: 9 of 9", "Bid: 9 of 10", "Bid: 10 of 1", "Bid: 10 of 2", "Bid: 10 of 3", "Bid: 10 of 4", "Bid: 10 of 5", "Bid: 10 of 6", "Bid: 10 of 7", "Bid: 10 of 8", "Bid: 10 of 9", "Bid: 10 of 10", "Bid: 11 of 1", "Bid: 11 of 2", "Bid: 11 of 3", "Bid: 11 of 4", "Bid: 11 of 5", "Bid: 11 of 6", "Bid: 11 of 7", "Bid: 11 of 8", "Bid: 11 of 9", "Bid: 11 of 10", "Bid: 12 of 1", "Bid: 12 of 2", "Bid: 12 of 3", "Bid: 12 of 4", "Bid: 12 of 5", "Bid: 12 of 6", "Bid: 12 of 7", "Bid: 12 of 8", "Bid: 12 of 9", "Bid: 12 of 10", "Bid: 13 of 1", "Bid: 13 of 2", "Bid: 13 of 3", "Bid: 13 of 4", "Bid: 13 of 5", "Bid: 13 of 6", "Bid: 13 of 7", "Bid: 13 of 8", "Bid: 13 of 9", "Bid: 13 of 10", "Bid: 14 of 1", "Bid: 14 of 2", "Bid: 14 of 3", "Bid: 14 of 4", "Bid: 14 of 5", "Bid: 14 of 6", "Bid: 14 of 7", "Bid: 14 of 8", "Bid: 14 of 9", "Bid: 14 of 10", "Bid: 15 of 1", "Bid: 15 of 2", "Bid: 15 of 3", "Bid: 15 of 4", "Bid: 15 of 5", "Bid: 15 of 6", "Bid: 15 of 7", "Bid: 15 of 8", "Bid: 15 of 9", "Bid: 15 of 10", "Bid: 16 of 1", "Bid: 16 of 2", "Bid: 16 of 3", "Bid: 16 of 4", "Bid: 16 of 5", "Bid: 16 of 6", "Bid: 16 of 7", "Bid: 16 of 8", "Bid: 16 of 9", "Bid: 16 of 10", "Bid: 17 of 1", "Bid: 17 of 2", "Bid: 17 of 3", "Bid: 17 of 4", "Bid: 17 of 5", "Bid: 17 of 6", "Bid: 17 of 7", "Bid: 17 of 8", "Bid: 17 of 9", "Bid: 17 of 10", "Bid: 18 of 1", "Bid: 18 of 2", "Bid: 18 of 3", "Bid: 18 of 4", "Bid: 18 of 5", "Bid: 18 of 6", "Bid: 18 of 7", "Bid: 18 of 8", "Bid: 18 of 9", "Bid: 18 of 10", "Bid: 19 of 1", "Bid: 19 of 2", "Bid: 19 of 3", "Bid: 19 of 4", "Bid: 19 of 5", "Bid: 19 of 6", "Bid: 19 of 7", "Bid: 19 of 8", "Bid: 19 of 9", "Bid: 19 of 10", "Bid: 20 of 1", "Bid: 20 of 2", "Bid: 20 of 3", "Bid: 20 of 4", "Bid: 20 of 5", "Bid: 20 of 6", "Bid: 20 of 7", "Bid: 20 of 8", "Bid: 20 of 9", "Bid: 20 of 10"] + +# Apply action "Bid: 18 of 9" +action: 179 + +# State 21 +# Hands: [[2, 4, 9, 2, 8, 2, 0, 9, 7, 6], [2, 7, 8, 0, 9, 6, 3, 9, 0, 5]], Bidder: 0, Current Player: 1, Current Bid: 18 of 9, Rebid: False +IsTerminal() = False +History() = [2, 2, 4, 7, 9, 8, 2, 0, 8, 9, 2, 6, 0, 3, 9, 9, 7, 0, 6, 5, 179] +HistoryString() = "2, 2, 4, 7, 9, 8, 2, 0, 8, 9, 2, 6, 0, 3, 9, 9, 7, 0, 6, 5, 179" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6] rebid:[0] counts:[0] b:178." +InformationStateString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5] rebid:[0] counts:[0] b:178." +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_hand = [2, 4, 9, 2, 8, 2, 0, 9, 7, 6] +InformationStateTensor(0).rebid_state: ◯ +InformationStateTensor(0).counts_state: ◯ +InformationStateTensor(0).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_hand = [2, 7, 8, 0, 9, 6, 3, 9, 0, 5] +InformationStateTensor(1).rebid_state: ◯ +InformationStateTensor(1).counts_state: ◯ +InformationStateTensor(1).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5] rebid:[0] counts:[0]" +PublicObservationString() = "p0 rebid:[0] counts:[0]" +PrivateObservationString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6]" +PrivateObservationString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand = [2, 4, 9, 2, 8, 2, 0, 9, 7, 6] +ObservationTensor(0).rebid_state: ◯ +ObservationTensor(0).counts_state: ◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand = [2, 7, 8, 0, 9, 6, 3, 9, 0, 5] +ObservationTensor(1).rebid_state: ◯ +ObservationTensor(1).counts_state: ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200] +StringLegalActions() = ["Challenge", "Bid: 18 of 10", "Bid: 19 of 1", "Bid: 19 of 2", "Bid: 19 of 3", "Bid: 19 of 4", "Bid: 19 of 5", "Bid: 19 of 6", "Bid: 19 of 7", "Bid: 19 of 8", "Bid: 19 of 9", "Bid: 19 of 10", "Bid: 20 of 1", "Bid: 20 of 2", "Bid: 20 of 3", "Bid: 20 of 4", "Bid: 20 of 5", "Bid: 20 of 6", "Bid: 20 of 7", "Bid: 20 of 8", "Bid: 20 of 9", "Bid: 20 of 10"] + +# Apply action "Bid: 19 of 5" +action: 185 + +# State 22 +# Hands: [[2, 4, 9, 2, 8, 2, 0, 9, 7, 6], [2, 7, 8, 0, 9, 6, 3, 9, 0, 5]], Bidder: 1, Current Player: 0, Current Bid: 19 of 5, Rebid: False +IsTerminal() = False +History() = [2, 2, 4, 7, 9, 8, 2, 0, 8, 9, 2, 6, 0, 3, 9, 9, 7, 0, 6, 5, 179, 185] +HistoryString() = "2, 2, 4, 7, 9, 8, 2, 0, 8, 9, 2, 6, 0, 3, 9, 9, 7, 0, 6, 5, 179, 185" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6] rebid:[0] counts:[0] b:178. b:184." +InformationStateString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5] rebid:[0] counts:[0] b:178. b:184." +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_hand = [2, 4, 9, 2, 8, 2, 0, 9, 7, 6] +InformationStateTensor(0).rebid_state: ◯ +InformationStateTensor(0).counts_state: ◯ +InformationStateTensor(0).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◉ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_hand = [2, 7, 8, 0, 9, 6, 3, 9, 0, 5] +InformationStateTensor(1).rebid_state: ◯ +InformationStateTensor(1).counts_state: ◯ +InformationStateTensor(1).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◉ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5] rebid:[0] counts:[0]" +PublicObservationString() = "p0 rebid:[0] counts:[0]" +PrivateObservationString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6]" +PrivateObservationString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand = [2, 4, 9, 2, 8, 2, 0, 9, 7, 6] +ObservationTensor(0).rebid_state: ◯ +ObservationTensor(0).counts_state: ◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand = [2, 7, 8, 0, 9, 6, 3, 9, 0, 5] +ObservationTensor(1).rebid_state: ◯ +ObservationTensor(1).counts_state: ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200] +StringLegalActions() = ["Challenge", "Bid: 19 of 6", "Bid: 19 of 7", "Bid: 19 of 8", "Bid: 19 of 9", "Bid: 19 of 10", "Bid: 20 of 1", "Bid: 20 of 2", "Bid: 20 of 3", "Bid: 20 of 4", "Bid: 20 of 5", "Bid: 20 of 6", "Bid: 20 of 7", "Bid: 20 of 8", "Bid: 20 of 9", "Bid: 20 of 10"] + +# Apply action "Bid: 20 of 5" +action: 195 + +# State 23 +# Hands: [[2, 4, 9, 2, 8, 2, 0, 9, 7, 6], [2, 7, 8, 0, 9, 6, 3, 9, 0, 5]], Bidder: 0, Current Player: 1, Current Bid: 20 of 5, Rebid: False +IsTerminal() = False +History() = [2, 2, 4, 7, 9, 8, 2, 0, 8, 9, 2, 6, 0, 3, 9, 9, 7, 0, 6, 5, 179, 185, 195] +HistoryString() = "2, 2, 4, 7, 9, 8, 2, 0, 8, 9, 2, 6, 0, 3, 9, 9, 7, 0, 6, 5, 179, 185, 195" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6] rebid:[0] counts:[0] b:178. b:184. b:194." +InformationStateString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5] rebid:[0] counts:[0] b:178. b:184. b:194." +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_hand = [2, 4, 9, 2, 8, 2, 0, 9, 7, 6] +InformationStateTensor(0).rebid_state: ◯ +InformationStateTensor(0).counts_state: ◯ +InformationStateTensor(0).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◉ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_hand = [2, 7, 8, 0, 9, 6, 3, 9, 0, 5] +InformationStateTensor(1).rebid_state: ◯ +InformationStateTensor(1).counts_state: ◯ +InformationStateTensor(1).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◉ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5] rebid:[0] counts:[0]" +PublicObservationString() = "p0 rebid:[0] counts:[0]" +PrivateObservationString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6]" +PrivateObservationString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand = [2, 4, 9, 2, 8, 2, 0, 9, 7, 6] +ObservationTensor(0).rebid_state: ◯ +ObservationTensor(0).counts_state: ◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand = [2, 7, 8, 0, 9, 6, 3, 9, 0, 5] +ObservationTensor(1).rebid_state: ◯ +ObservationTensor(1).counts_state: ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 196, 197, 198, 199, 200] +StringLegalActions() = ["Challenge", "Bid: 20 of 6", "Bid: 20 of 7", "Bid: 20 of 8", "Bid: 20 of 9", "Bid: 20 of 10"] + +# Apply action "Bid: 20 of 7" +action: 197 + +# State 24 +# Hands: [[2, 4, 9, 2, 8, 2, 0, 9, 7, 6], [2, 7, 8, 0, 9, 6, 3, 9, 0, 5]], Bidder: 1, Current Player: 0, Current Bid: 20 of 7, Rebid: False +IsTerminal() = False +History() = [2, 2, 4, 7, 9, 8, 2, 0, 8, 9, 2, 6, 0, 3, 9, 9, 7, 0, 6, 5, 179, 185, 195, 197] +HistoryString() = "2, 2, 4, 7, 9, 8, 2, 0, 8, 9, 2, 6, 0, 3, 9, 9, 7, 0, 6, 5, 179, 185, 195, 197" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6] rebid:[0] counts:[0] b:178. b:184. b:194. b:196." +InformationStateString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5] rebid:[0] counts:[0] b:178. b:184. b:194. b:196." +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_hand = [2, 4, 9, 2, 8, 2, 0, 9, 7, 6] +InformationStateTensor(0).rebid_state: ◯ +InformationStateTensor(0).counts_state: ◯ +InformationStateTensor(0).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◉ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◉ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(0).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_hand = [2, 7, 8, 0, 9, 6, 3, 9, 0, 5] +InformationStateTensor(1).rebid_state: ◯ +InformationStateTensor(1).counts_state: ◯ +InformationStateTensor(1).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◉ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◉ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5] rebid:[0] counts:[0]" +PublicObservationString() = "p0 rebid:[0] counts:[0]" +PrivateObservationString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6]" +PrivateObservationString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand = [2, 4, 9, 2, 8, 2, 0, 9, 7, 6] +ObservationTensor(0).rebid_state: ◯ +ObservationTensor(0).counts_state: ◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand = [2, 7, 8, 0, 9, 6, 3, 9, 0, 5] +ObservationTensor(1).rebid_state: ◯ +ObservationTensor(1).counts_state: ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 198, 199, 200] +StringLegalActions() = ["Challenge", "Bid: 20 of 8", "Bid: 20 of 9", "Bid: 20 of 10"] + +# Apply action "Bid: 20 of 8" +action: 198 + +# State 25 +# Hands: [[2, 4, 9, 2, 8, 2, 0, 9, 7, 6], [2, 7, 8, 0, 9, 6, 3, 9, 0, 5]], Bidder: 0, Current Player: 1, Current Bid: 20 of 8, Rebid: False +IsTerminal() = False +History() = [2, 2, 4, 7, 9, 8, 2, 0, 8, 9, 2, 6, 0, 3, 9, 9, 7, 0, 6, 5, 179, 185, 195, 197, 198] +HistoryString() = "2, 2, 4, 7, 9, 8, 2, 0, 8, 9, 2, 6, 0, 3, 9, 9, 7, 0, 6, 5, 179, 185, 195, 197, 198" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6] rebid:[0] counts:[0] b:178. b:184. b:194. b:196. b:197." +InformationStateString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5] rebid:[0] counts:[0] b:178. b:184. b:194. b:196. b:197." +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_hand = [2, 4, 9, 2, 8, 2, 0, 9, 7, 6] +InformationStateTensor(0).rebid_state: ◯ +InformationStateTensor(0).counts_state: ◯ +InformationStateTensor(0).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◉ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◉ + ◉◯ + ◯◯ + ◯◯ +InformationStateTensor(0).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_hand = [2, 7, 8, 0, 9, 6, 3, 9, 0, 5] +InformationStateTensor(1).rebid_state: ◯ +InformationStateTensor(1).counts_state: ◯ +InformationStateTensor(1).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◉ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◉ + ◉◯ + ◯◯ + ◯◯ +InformationStateTensor(1).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ +ObservationString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6] rebid:[0] counts:[0]" +ObservationString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5] rebid:[0] counts:[0]" +PublicObservationString() = "p0 rebid:[0] counts:[0]" +PrivateObservationString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6]" +PrivateObservationString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand = [2, 4, 9, 2, 8, 2, 0, 9, 7, 6] +ObservationTensor(0).rebid_state: ◯ +ObservationTensor(0).counts_state: ◯ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand = [2, 7, 8, 0, 9, 6, 3, 9, 0, 5] +ObservationTensor(1).rebid_state: ◯ +ObservationTensor(1).counts_state: ◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 199, 200] +StringLegalActions() = ["Challenge", "Bid: 20 of 9", "Bid: 20 of 10"] + +# Apply action "Challenge" +action: 0 + +# State 26 +# Apply action "Challenge" +action: 0 + +# State 27 +# Hands: [[2, 4, 9, 2, 8, 2, 0, 9, 7, 6], [2, 7, 8, 0, 9, 6, 3, 9, 0, 5]], Bidder: 0, Current Player: PlayerId.TERMINAL, Current Bid: 20 of 8, Rebid: False +IsTerminal() = True +History() = [2, 2, 4, 7, 9, 8, 2, 0, 8, 9, 2, 6, 0, 3, 9, 9, 7, 0, 6, 5, 179, 185, 195, 197, 198, 0, 0] +HistoryString() = "2, 2, 4, 7, 9, 8, 2, 0, 8, 9, 2, 6, 0, 3, 9, 9, 7, 0, 6, 5, 179, 185, 195, 197, 198, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.TERMINAL +InformationStateString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6] rebid:[0] counts:[1] b:178. b:184. b:194. b:196. b:197. c:197." +InformationStateString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5] rebid:[0] counts:[1] b:178. b:184. b:194. b:196. b:197. c:197." +InformationStateTensor(0).player: ◉◯ +InformationStateTensor(0).private_hand = [2, 4, 9, 2, 8, 2, 0, 9, 7, 6] +InformationStateTensor(0).rebid_state: ◯ +InformationStateTensor(0).counts_state: ◉ +InformationStateTensor(0).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◉ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◉ + ◉◯ + ◯◯ + ◯◯ +InformationStateTensor(0).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◉ + ◯◯ + ◯◯ +InformationStateTensor(1).player: ◯◉ +InformationStateTensor(1).private_hand = [2, 7, 8, 0, 9, 6, 3, 9, 0, 5] +InformationStateTensor(1).rebid_state: ◯ +InformationStateTensor(1).counts_state: ◉ +InformationStateTensor(1).bid_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◉ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◯ + ◯◯ + ◯◉ + ◉◯ + ◯◯ + ◯◯ +InformationStateTensor(1).challenge_history: ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◯◯ + ◉◉ + ◯◯ + ◯◯ +ObservationString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6] rebid:[0] counts:[1]" +ObservationString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5] rebid:[0] counts:[1]" +PublicObservationString() = "p0 rebid:[0] counts:[1]" +PrivateObservationString(0) = "p0 hand:[2, 4, 9, 2, 8, 2, 0, 9, 7, 6]" +PrivateObservationString(1) = "p1 hand:[2, 7, 8, 0, 9, 6, 3, 9, 0, 5]" +ObservationTensor(0).player: ◉◯ +ObservationTensor(0).private_hand = [2, 4, 9, 2, 8, 2, 0, 9, 7, 6] +ObservationTensor(0).rebid_state: ◯ +ObservationTensor(0).counts_state: ◉ +ObservationTensor(1).player: ◯◉ +ObservationTensor(1).private_hand = [2, 7, 8, 0, 9, 6, 3, 9, 0, 5] +ObservationTensor(1).rebid_state: ◯ +ObservationTensor(1).counts_state: ◉ +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_mfg_crowd_avoidance.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_mfg_crowd_avoidance.txt new file mode 100644 index 0000000..81c520f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_mfg_crowd_avoidance.txt @@ -0,0 +1,343 @@ +game: python_mfg_crowd_avoidance + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.MEAN_FIELD +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Python Mean Field Crowd Avoidance" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["coef_congestion", "coef_target", "congestion_matrix", "forbidden_states", "geometry", "horizon", "init_distrib", "players", "proba_noise", "size"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "python_mfg_crowd_avoidance" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 5 +PolicyTensorShape() = [5] +MaxChanceOutcomes() = 49 +GetParameters() = {coef_congestion=0.0,coef_target=1.0,congestion_matrix=0 1 1 0,forbidden_states=[0|0;1|0;2|0;3|0;4|0;5|0;6|0;0|1;3|1;6|1;0|2;6|2;0|3;3|3;6|3;0|4;6|4;0|5;3|5;6|5;0|6;1|6;2|6;3|6;4|6;5|6;6|6],geometry=0,horizon=10,init_distrib=0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.4 0.4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.4 0.4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0,players=2,proba_noise=0.5,size=7} +NumPlayers() = 2 +MinUtility() = -inf +MaxUtility() = inf +UtilitySum() = None +ObservationTensorShape() = x: [7], y: [7], t: [11] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 25 +MaxGameLength() = 10 +ToString() = "python_mfg_crowd_avoidance(coef_congestion=0.0,coef_target=1.0,congestion_matrix=0 1 1 0,forbidden_states=[0|0;1|0;2|0;3|0;4|0;5|0;6|0;0|1;3|1;6|1;0|2;6|2;0|3;3|3;6|3;0|4;6|4;0|5;3|5;6|5;0|6;1|6;2|6;3|6;4|6;5|6;6|6],geometry=0,horizon=10,init_distrib=0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.4 0.4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.4 0.4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0,players=2,proba_noise=0.5,size=7)" + +# State 0 +# position_init_1 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "position_init_1" +ObservationString(1) = "position_init_1" +ObservationTensor(0).x: ◯◯◯◯◯◯◯ +ObservationTensor(0).y: ◯◯◯◯◯◯◯ +ObservationTensor(0).t: ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).x: ◯◯◯◯◯◯◯ +ObservationTensor(1).y: ◯◯◯◯◯◯◯ +ObservationTensor(1).t: ◉◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(19,0.2), (32,0.4), (33,0.4)] +LegalActions() = [19, 32, 33] +StringLegalActions() = ["init_position=19", "init_position=32", "init_position=33"] + +# Apply action "init_position=32" +action: 32 + +# State 1 +# (pop=1, t=0, pos=[4 4]) +IsTerminal() = False +History() = [32] +HistoryString() = "32" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "32" +InformationStateString(1) = "32" +ObservationString(0) = "(pop=1, t=0, pos=[4 4])" +ObservationString(1) = "(pop=1, t=0, pos=[4 4])" +ObservationTensor(0).x: ◯◯◯◯◉◯◯ +ObservationTensor(0).y: ◯◯◯◯◉◯◯ +ObservationTensor(0).t: ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).x: ◯◯◯◯◉◯◯ +ObservationTensor(1).y: ◯◯◯◯◉◯◯ +ObservationTensor(1).t: ◉◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["[0 0]", "[1 0]", "[0 1]", "[ 0 -1]", "[-1 0]"] + +# Apply action "[ 0 -1]" +action: 3 + +# State 2 +# (pop=1, t=0_a_mu, pos=[4 3]) +IsTerminal() = False +History() = [32, 3] +HistoryString() = "32, 3" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "32, 3" +InformationStateString(1) = "32, 3" +ObservationString(0) = "(pop=1, t=0_a_mu, pos=[4 3])" +ObservationString(1) = "(pop=1, t=0_a_mu, pos=[4 3])" +ObservationTensor(0).x: ◯◯◯◯◉◯◯ +ObservationTensor(0).y: ◯◯◯◉◯◯◯ +ObservationTensor(0).t: ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).x: ◯◯◯◯◉◯◯ +ObservationTensor(1).y: ◯◯◯◉◯◯◯ +ObservationTensor(1).t: ◉◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.5), (1,0.125), (2,0.125), (3,0.125), (4,0.125)] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["[0 0]", "[1 0]", "[0 1]", "[ 0 -1]", "[-1 0]"] + +# Apply action "[-1 0]" +action: 4 + +# State 3 +# (pop=1, t=1_a, pos=[3 3]) +IsTerminal() = False +History() = [32, 3, 4] +HistoryString() = "32, 3, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.MEAN_FIELD +InformationStateString(0) = "32, 3, 4" +InformationStateString(1) = "32, 3, 4" +ObservationString(0) = "(pop=1, t=1_a, pos=[3 3])" +ObservationString(1) = "(pop=1, t=1_a, pos=[3 3])" +ObservationTensor(0).x: ◯◯◯◉◯◯◯ +ObservationTensor(0).y: ◯◯◯◉◯◯◯ +ObservationTensor(0).t: ◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).x: ◯◯◯◉◯◯◯ +ObservationTensor(1).y: ◯◯◯◉◯◯◯ +ObservationTensor(1).t: ◯◉◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +DistributionSupport() = ['(pop=0, t=1_a, pos=[0 0])', '(pop=1, t=1_a, pos=[0 0])', '(pop=0, t=1_a, pos=[0 1])', '(pop=1, t=1_a, pos=[0 1])', '(pop=0, t=1_a, pos=[0 2])', '(pop=1, t=1_a, pos=[0 2])', '(pop=0, t=1_a, pos=[0 3])', '(pop=1, t=1_a, pos=[0 3])', '(pop=0, t=1_a, pos=[0 4])', '(pop=1, t=1_a, pos=[0 4])', '(pop=0, t=1_a, pos=[0 5])', '(pop=1, t=1_a, pos=[0 5])', '(pop=0, t=1_a, pos=[0 6])', '(pop=1, t=1_a, pos=[0 6])', '(pop=0, t=1_a, pos=[1 0])', '(pop=1, t=1_a, pos=[1 0])', '(pop=0, t=1_a, pos=[1 1])', '(pop=1, t=1_a, pos=[1 1])', '(pop=0, t=1_a, pos=[1 2])', '(pop=1, t=1_a, pos=[1 2])', '(pop=0, t=1_a, pos=[1 3])', '(pop=1, t=1_a, pos=[1 3])', '(pop=0, t=1_a, pos=[1 4])', '(pop=1, t=1_a, pos=[1 4])', '(pop=0, t=1_a, pos=[1 5])', '(pop=1, t=1_a, pos=[1 5])', '(pop=0, t=1_a, pos=[1 6])', '(pop=1, t=1_a, pos=[1 6])', '(pop=0, t=1_a, pos=[2 0])', '(pop=1, t=1_a, pos=[2 0])', '(pop=0, t=1_a, pos=[2 1])', '(pop=1, t=1_a, pos=[2 1])', '(pop=0, t=1_a, pos=[2 2])', '(pop=1, t=1_a, pos=[2 2])', '(pop=0, t=1_a, pos=[2 3])', '(pop=1, t=1_a, pos=[2 3])', '(pop=0, t=1_a, pos=[2 4])', '(pop=1, t=1_a, pos=[2 4])', '(pop=0, t=1_a, pos=[2 5])', '(pop=1, t=1_a, pos=[2 5])', '(pop=0, t=1_a, pos=[2 6])', '(pop=1, t=1_a, pos=[2 6])', '(pop=0, t=1_a, pos=[3 0])', '(pop=1, t=1_a, pos=[3 0])', '(pop=0, t=1_a, pos=[3 1])', '(pop=1, t=1_a, pos=[3 1])', '(pop=0, t=1_a, pos=[3 2])', '(pop=1, t=1_a, pos=[3 2])', '(pop=0, t=1_a, pos=[3 3])', '(pop=1, t=1_a, pos=[3 3])', '(pop=0, t=1_a, pos=[3 4])', '(pop=1, t=1_a, pos=[3 4])', '(pop=0, t=1_a, pos=[3 5])', '(pop=1, t=1_a, pos=[3 5])', '(pop=0, t=1_a, pos=[3 6])', '(pop=1, t=1_a, pos=[3 6])', '(pop=0, t=1_a, pos=[4 0])', '(pop=1, t=1_a, pos=[4 0])', '(pop=0, t=1_a, pos=[4 1])', '(pop=1, t=1_a, pos=[4 1])', '(pop=0, t=1_a, pos=[4 2])', '(pop=1, t=1_a, pos=[4 2])', '(pop=0, t=1_a, pos=[4 3])', '(pop=1, t=1_a, pos=[4 3])', '(pop=0, t=1_a, pos=[4 4])', '(pop=1, t=1_a, pos=[4 4])', '(pop=0, t=1_a, pos=[4 5])', '(pop=1, t=1_a, pos=[4 5])', '(pop=0, t=1_a, pos=[4 6])', '(pop=1, t=1_a, pos=[4 6])', '(pop=0, t=1_a, pos=[5 0])', '(pop=1, t=1_a, pos=[5 0])', '(pop=0, t=1_a, pos=[5 1])', '(pop=1, t=1_a, pos=[5 1])', '(pop=0, t=1_a, pos=[5 2])', '(pop=1, t=1_a, pos=[5 2])', '(pop=0, t=1_a, pos=[5 3])', '(pop=1, t=1_a, pos=[5 3])', '(pop=0, t=1_a, pos=[5 4])', '(pop=1, t=1_a, pos=[5 4])', '(pop=0, t=1_a, pos=[5 5])', '(pop=1, t=1_a, pos=[5 5])', '(pop=0, t=1_a, pos=[5 6])', '(pop=1, t=1_a, pos=[5 6])', '(pop=0, t=1_a, pos=[6 0])', '(pop=1, t=1_a, pos=[6 0])', '(pop=0, t=1_a, pos=[6 1])', '(pop=1, t=1_a, pos=[6 1])', '(pop=0, t=1_a, pos=[6 2])', '(pop=1, t=1_a, pos=[6 2])', '(pop=0, t=1_a, pos=[6 3])', '(pop=1, t=1_a, pos=[6 3])', '(pop=0, t=1_a, pos=[6 4])', '(pop=1, t=1_a, pos=[6 4])', '(pop=0, t=1_a, pos=[6 5])', '(pop=1, t=1_a, pos=[6 5])', '(pop=0, t=1_a, pos=[6 6])', '(pop=1, t=1_a, pos=[6 6])'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 4 +# (pop=1, t=1, pos=[3 3]) +IsTerminal() = False +History() = [32, 3, 4] +HistoryString() = "32, 3, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "32, 3, 4" +InformationStateString(1) = "32, 3, 4" +ObservationString(0) = "(pop=1, t=1, pos=[3 3])" +ObservationString(1) = "(pop=1, t=1, pos=[3 3])" +ObservationTensor(0).x: ◯◯◯◉◯◯◯ +ObservationTensor(0).y: ◯◯◯◉◯◯◯ +ObservationTensor(0).t: ◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).x: ◯◯◯◉◯◯◯ +ObservationTensor(1).y: ◯◯◯◉◯◯◯ +ObservationTensor(1).t: ◯◉◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["[0 0]", "[1 0]", "[0 1]", "[ 0 -1]", "[-1 0]"] + +# Apply action "[0 1]" +action: 2 + +# State 5 +# Apply action "[0 1]" +action: 2 + +# State 6 +# (pop=1, t=2_a, pos=[3 5]) +IsTerminal() = False +History() = [32, 3, 4, 2, 2] +HistoryString() = "32, 3, 4, 2, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.MEAN_FIELD +InformationStateString(0) = "32, 3, 4, 2, 2" +InformationStateString(1) = "32, 3, 4, 2, 2" +ObservationString(0) = "(pop=1, t=2_a, pos=[3 5])" +ObservationString(1) = "(pop=1, t=2_a, pos=[3 5])" +ObservationTensor(0).x: ◯◯◯◉◯◯◯ +ObservationTensor(0).y: ◯◯◯◯◯◉◯ +ObservationTensor(0).t: ◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(1).x: ◯◯◯◉◯◯◯ +ObservationTensor(1).y: ◯◯◯◯◯◉◯ +ObservationTensor(1).t: ◯◯◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +DistributionSupport() = ['(pop=0, t=2_a, pos=[0 0])', '(pop=1, t=2_a, pos=[0 0])', '(pop=0, t=2_a, pos=[0 1])', '(pop=1, t=2_a, pos=[0 1])', '(pop=0, t=2_a, pos=[0 2])', '(pop=1, t=2_a, pos=[0 2])', '(pop=0, t=2_a, pos=[0 3])', '(pop=1, t=2_a, pos=[0 3])', '(pop=0, t=2_a, pos=[0 4])', '(pop=1, t=2_a, pos=[0 4])', '(pop=0, t=2_a, pos=[0 5])', '(pop=1, t=2_a, pos=[0 5])', '(pop=0, t=2_a, pos=[0 6])', '(pop=1, t=2_a, pos=[0 6])', '(pop=0, t=2_a, pos=[1 0])', '(pop=1, t=2_a, pos=[1 0])', '(pop=0, t=2_a, pos=[1 1])', '(pop=1, t=2_a, pos=[1 1])', '(pop=0, t=2_a, pos=[1 2])', '(pop=1, t=2_a, pos=[1 2])', '(pop=0, t=2_a, pos=[1 3])', '(pop=1, t=2_a, pos=[1 3])', '(pop=0, t=2_a, pos=[1 4])', '(pop=1, t=2_a, pos=[1 4])', '(pop=0, t=2_a, pos=[1 5])', '(pop=1, t=2_a, pos=[1 5])', '(pop=0, t=2_a, pos=[1 6])', '(pop=1, t=2_a, pos=[1 6])', '(pop=0, t=2_a, pos=[2 0])', '(pop=1, t=2_a, pos=[2 0])', '(pop=0, t=2_a, pos=[2 1])', '(pop=1, t=2_a, pos=[2 1])', '(pop=0, t=2_a, pos=[2 2])', '(pop=1, t=2_a, pos=[2 2])', '(pop=0, t=2_a, pos=[2 3])', '(pop=1, t=2_a, pos=[2 3])', '(pop=0, t=2_a, pos=[2 4])', '(pop=1, t=2_a, pos=[2 4])', '(pop=0, t=2_a, pos=[2 5])', '(pop=1, t=2_a, pos=[2 5])', '(pop=0, t=2_a, pos=[2 6])', '(pop=1, t=2_a, pos=[2 6])', '(pop=0, t=2_a, pos=[3 0])', '(pop=1, t=2_a, pos=[3 0])', '(pop=0, t=2_a, pos=[3 1])', '(pop=1, t=2_a, pos=[3 1])', '(pop=0, t=2_a, pos=[3 2])', '(pop=1, t=2_a, pos=[3 2])', '(pop=0, t=2_a, pos=[3 3])', '(pop=1, t=2_a, pos=[3 3])', '(pop=0, t=2_a, pos=[3 4])', '(pop=1, t=2_a, pos=[3 4])', '(pop=0, t=2_a, pos=[3 5])', '(pop=1, t=2_a, pos=[3 5])', '(pop=0, t=2_a, pos=[3 6])', '(pop=1, t=2_a, pos=[3 6])', '(pop=0, t=2_a, pos=[4 0])', '(pop=1, t=2_a, pos=[4 0])', '(pop=0, t=2_a, pos=[4 1])', '(pop=1, t=2_a, pos=[4 1])', '(pop=0, t=2_a, pos=[4 2])', '(pop=1, t=2_a, pos=[4 2])', '(pop=0, t=2_a, pos=[4 3])', '(pop=1, t=2_a, pos=[4 3])', '(pop=0, t=2_a, pos=[4 4])', '(pop=1, t=2_a, pos=[4 4])', '(pop=0, t=2_a, pos=[4 5])', '(pop=1, t=2_a, pos=[4 5])', '(pop=0, t=2_a, pos=[4 6])', '(pop=1, t=2_a, pos=[4 6])', '(pop=0, t=2_a, pos=[5 0])', '(pop=1, t=2_a, pos=[5 0])', '(pop=0, t=2_a, pos=[5 1])', '(pop=1, t=2_a, pos=[5 1])', '(pop=0, t=2_a, pos=[5 2])', '(pop=1, t=2_a, pos=[5 2])', '(pop=0, t=2_a, pos=[5 3])', '(pop=1, t=2_a, pos=[5 3])', '(pop=0, t=2_a, pos=[5 4])', '(pop=1, t=2_a, pos=[5 4])', '(pop=0, t=2_a, pos=[5 5])', '(pop=1, t=2_a, pos=[5 5])', '(pop=0, t=2_a, pos=[5 6])', '(pop=1, t=2_a, pos=[5 6])', '(pop=0, t=2_a, pos=[6 0])', '(pop=1, t=2_a, pos=[6 0])', '(pop=0, t=2_a, pos=[6 1])', '(pop=1, t=2_a, pos=[6 1])', '(pop=0, t=2_a, pos=[6 2])', '(pop=1, t=2_a, pos=[6 2])', '(pop=0, t=2_a, pos=[6 3])', '(pop=1, t=2_a, pos=[6 3])', '(pop=0, t=2_a, pos=[6 4])', '(pop=1, t=2_a, pos=[6 4])', '(pop=0, t=2_a, pos=[6 5])', '(pop=1, t=2_a, pos=[6 5])', '(pop=0, t=2_a, pos=[6 6])', '(pop=1, t=2_a, pos=[6 6])'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 7 +# (pop=1, t=2, pos=[3 5]) +IsTerminal() = False +History() = [32, 3, 4, 2, 2] +HistoryString() = "32, 3, 4, 2, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "32, 3, 4, 2, 2" +InformationStateString(1) = "32, 3, 4, 2, 2" +ObservationString(0) = "(pop=1, t=2, pos=[3 5])" +ObservationString(1) = "(pop=1, t=2, pos=[3 5])" +ObservationTensor(0).x: ◯◯◯◉◯◯◯ +ObservationTensor(0).y: ◯◯◯◯◯◉◯ +ObservationTensor(0).t: ◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(1).x: ◯◯◯◉◯◯◯ +ObservationTensor(1).y: ◯◯◯◯◯◉◯ +ObservationTensor(1).t: ◯◯◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["[0 0]", "[1 0]", "[0 1]", "[ 0 -1]", "[-1 0]"] + +# Apply action "[0 1]" +action: 2 + +# State 8 +# Apply action "[ 0 -1]" +action: 3 + +# State 9 +# (pop=1, t=3_a, pos=[3 5]) +IsTerminal() = False +History() = [32, 3, 4, 2, 2, 2, 3] +HistoryString() = "32, 3, 4, 2, 2, 2, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.MEAN_FIELD +InformationStateString(0) = "32, 3, 4, 2, 2, 2, 3" +InformationStateString(1) = "32, 3, 4, 2, 2, 2, 3" +ObservationString(0) = "(pop=1, t=3_a, pos=[3 5])" +ObservationString(1) = "(pop=1, t=3_a, pos=[3 5])" +ObservationTensor(0).x: ◯◯◯◉◯◯◯ +ObservationTensor(0).y: ◯◯◯◯◯◉◯ +ObservationTensor(0).t: ◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(1).x: ◯◯◯◉◯◯◯ +ObservationTensor(1).y: ◯◯◯◯◯◉◯ +ObservationTensor(1).t: ◯◯◯◉◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +DistributionSupport() = ['(pop=0, t=3_a, pos=[0 0])', '(pop=1, t=3_a, pos=[0 0])', '(pop=0, t=3_a, pos=[0 1])', '(pop=1, t=3_a, pos=[0 1])', '(pop=0, t=3_a, pos=[0 2])', '(pop=1, t=3_a, pos=[0 2])', '(pop=0, t=3_a, pos=[0 3])', '(pop=1, t=3_a, pos=[0 3])', '(pop=0, t=3_a, pos=[0 4])', '(pop=1, t=3_a, pos=[0 4])', '(pop=0, t=3_a, pos=[0 5])', '(pop=1, t=3_a, pos=[0 5])', '(pop=0, t=3_a, pos=[0 6])', '(pop=1, t=3_a, pos=[0 6])', '(pop=0, t=3_a, pos=[1 0])', '(pop=1, t=3_a, pos=[1 0])', '(pop=0, t=3_a, pos=[1 1])', '(pop=1, t=3_a, pos=[1 1])', '(pop=0, t=3_a, pos=[1 2])', '(pop=1, t=3_a, pos=[1 2])', '(pop=0, t=3_a, pos=[1 3])', '(pop=1, t=3_a, pos=[1 3])', '(pop=0, t=3_a, pos=[1 4])', '(pop=1, t=3_a, pos=[1 4])', '(pop=0, t=3_a, pos=[1 5])', '(pop=1, t=3_a, pos=[1 5])', '(pop=0, t=3_a, pos=[1 6])', '(pop=1, t=3_a, pos=[1 6])', '(pop=0, t=3_a, pos=[2 0])', '(pop=1, t=3_a, pos=[2 0])', '(pop=0, t=3_a, pos=[2 1])', '(pop=1, t=3_a, pos=[2 1])', '(pop=0, t=3_a, pos=[2 2])', '(pop=1, t=3_a, pos=[2 2])', '(pop=0, t=3_a, pos=[2 3])', '(pop=1, t=3_a, pos=[2 3])', '(pop=0, t=3_a, pos=[2 4])', '(pop=1, t=3_a, pos=[2 4])', '(pop=0, t=3_a, pos=[2 5])', '(pop=1, t=3_a, pos=[2 5])', '(pop=0, t=3_a, pos=[2 6])', '(pop=1, t=3_a, pos=[2 6])', '(pop=0, t=3_a, pos=[3 0])', '(pop=1, t=3_a, pos=[3 0])', '(pop=0, t=3_a, pos=[3 1])', '(pop=1, t=3_a, pos=[3 1])', '(pop=0, t=3_a, pos=[3 2])', '(pop=1, t=3_a, pos=[3 2])', '(pop=0, t=3_a, pos=[3 3])', '(pop=1, t=3_a, pos=[3 3])', '(pop=0, t=3_a, pos=[3 4])', '(pop=1, t=3_a, pos=[3 4])', '(pop=0, t=3_a, pos=[3 5])', '(pop=1, t=3_a, pos=[3 5])', '(pop=0, t=3_a, pos=[3 6])', '(pop=1, t=3_a, pos=[3 6])', '(pop=0, t=3_a, pos=[4 0])', '(pop=1, t=3_a, pos=[4 0])', '(pop=0, t=3_a, pos=[4 1])', '(pop=1, t=3_a, pos=[4 1])', '(pop=0, t=3_a, pos=[4 2])', '(pop=1, t=3_a, pos=[4 2])', '(pop=0, t=3_a, pos=[4 3])', '(pop=1, t=3_a, pos=[4 3])', '(pop=0, t=3_a, pos=[4 4])', '(pop=1, t=3_a, pos=[4 4])', '(pop=0, t=3_a, pos=[4 5])', '(pop=1, t=3_a, pos=[4 5])', '(pop=0, t=3_a, pos=[4 6])', '(pop=1, t=3_a, pos=[4 6])', '(pop=0, t=3_a, pos=[5 0])', '(pop=1, t=3_a, pos=[5 0])', '(pop=0, t=3_a, pos=[5 1])', '(pop=1, t=3_a, pos=[5 1])', '(pop=0, t=3_a, pos=[5 2])', '(pop=1, t=3_a, pos=[5 2])', '(pop=0, t=3_a, pos=[5 3])', '(pop=1, t=3_a, pos=[5 3])', '(pop=0, t=3_a, pos=[5 4])', '(pop=1, t=3_a, pos=[5 4])', '(pop=0, t=3_a, pos=[5 5])', '(pop=1, t=3_a, pos=[5 5])', '(pop=0, t=3_a, pos=[5 6])', '(pop=1, t=3_a, pos=[5 6])', '(pop=0, t=3_a, pos=[6 0])', '(pop=1, t=3_a, pos=[6 0])', '(pop=0, t=3_a, pos=[6 1])', '(pop=1, t=3_a, pos=[6 1])', '(pop=0, t=3_a, pos=[6 2])', '(pop=1, t=3_a, pos=[6 2])', '(pop=0, t=3_a, pos=[6 3])', '(pop=1, t=3_a, pos=[6 3])', '(pop=0, t=3_a, pos=[6 4])', '(pop=1, t=3_a, pos=[6 4])', '(pop=0, t=3_a, pos=[6 5])', '(pop=1, t=3_a, pos=[6 5])', '(pop=0, t=3_a, pos=[6 6])', '(pop=1, t=3_a, pos=[6 6])'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 10 +# Apply action "[ 0 -1]" +action: 3 + +# State 11 +# Apply action "[ 0 -1]" +action: 3 + +# State 12 +# Set mean field distribution to be uniform +action: update_distribution + +# State 13 +# Apply action "[0 0]" +action: 0 + +# State 14 +# Apply action "[0 0]" +action: 0 + +# State 15 +# Set mean field distribution to be uniform +action: update_distribution + +# State 16 +# Apply action "[-1 0]" +action: 4 + +# State 17 +# Apply action "[0 1]" +action: 2 + +# State 18 +# Set mean field distribution to be uniform +action: update_distribution + +# State 19 +# Apply action "[0 0]" +action: 0 + +# State 20 +# Apply action "[ 0 -1]" +action: 3 + +# State 21 +# Set mean field distribution to be uniform +action: update_distribution + +# State 22 +# Apply action "[0 0]" +action: 0 + +# State 23 +# Apply action "[0 1]" +action: 2 + +# State 24 +# Set mean field distribution to be uniform +action: update_distribution + +# State 25 +# Apply action "[0 1]" +action: 2 + +# State 26 +# Apply action "[0 0]" +action: 0 + +# State 27 +# Set mean field distribution to be uniform +action: update_distribution + +# State 28 +# Apply action "[-1 0]" +action: 4 + +# State 29 +# Apply action "[0 0]" +action: 0 + +# State 30 +# (pop=1, t=10_a, pos=[1 5]) +IsTerminal() = True +History() = [32, 3, 4, 2, 2, 2, 3, 3, 3, 0, 0, 4, 2, 0, 3, 0, 2, 2, 0, 4, 0] +HistoryString() = "32, 3, 4, 2, 2, 2, 3, 3, 3, 0, 0, 4, 2, 0, 3, 0, 2, 2, 0, 4, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.TERMINAL +InformationStateString(0) = "32, 3, 4, 2, 2, 2, 3, 3, 3, 0, 0, 4, 2, 0, 3, 0, 2, 2, 0, 4, 0" +InformationStateString(1) = "32, 3, 4, 2, 2, 2, 3, 3, 3, 0, 0, 4, 2, 0, 3, 0, 2, 2, 0, 4, 0" +ObservationString(0) = "(pop=1, t=10_a, pos=[1 5])" +ObservationString(1) = "(pop=1, t=10_a, pos=[1 5])" +ObservationTensor(0).x: ◯◉◯◯◯◯◯ +ObservationTensor(0).y: ◯◯◯◯◯◉◯ +ObservationTensor(0).t: ◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).x: ◯◉◯◯◯◯◯ +ObservationTensor(1).y: ◯◯◯◯◯◉◯ +ObservationTensor(1).t: ◯◯◯◯◯◯◯◯◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_mfg_crowd_modelling.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_mfg_crowd_modelling.txt new file mode 100644 index 0000000..441ef02 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_mfg_crowd_modelling.txt @@ -0,0 +1,289 @@ +game: python_mfg_crowd_modelling + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.MEAN_FIELD +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Python Mean Field Crowd Modelling" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = ["horizon", "size"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "python_mfg_crowd_modelling" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 3 +PolicyTensorShape() = [3] +MaxChanceOutcomes() = 10 +GetParameters() = {horizon=10,size=10} +NumPlayers() = 1 +MinUtility() = -inf +MaxUtility() = inf +UtilitySum() = None +ObservationTensorShape() = x: [10], t: [11] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 21 +MaxGameLength() = 10 +ToString() = "python_mfg_crowd_modelling(horizon=10,size=10)" + +# State 0 +# initial +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "" +ObservationString(0) = "initial" +ObservationTensor(0).x: ◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).t: ◉◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.1), (1,0.1), (2,0.1), (3,0.1), (4,0.1), (5,0.1), (6,0.1), (7,0.1), (8,0.1), (9,0.1)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +StringLegalActions() = ["init_state=0", "init_state=1", "init_state=2", "init_state=3", "init_state=4", "init_state=5", "init_state=6", "init_state=7", "init_state=8", "init_state=9"] + +# Apply action "init_state=5" +action: 5 + +# State 1 +# (5, 0) +IsTerminal() = False +History() = [5] +HistoryString() = "5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.DEFAULT_PLAYER_ID +InformationStateString(0) = "5" +ObservationString(0) = "(5, 0)" +ObservationTensor(0).x: ◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(0).t: ◉◯◯◯◯◯◯◯◯◯◯ +Rewards() = [3.30259] +Returns() = [3.30259] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["-1", "0", "1"] + +# Apply action "0" +action: 1 + +# State 2 +# (5, 0)_a_mu +IsTerminal() = False +History() = [5, 1] +HistoryString() = "5, 1" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "5, 1" +ObservationString(0) = "(5, 0)_a_mu" +ObservationTensor(0).x: ◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(0).t: ◉◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.333333), (1,0.333333), (2,0.333333)] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["-1", "0", "1"] + +# Apply action "0" +action: 1 + +# State 3 +# (5, 1)_a +IsTerminal() = False +History() = [5, 1, 1] +HistoryString() = "5, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.MEAN_FIELD +InformationStateString(0) = "5, 1, 1" +ObservationString(0) = "(5, 1)_a" +ObservationTensor(0).x: ◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(0).t: ◯◉◯◯◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [3.30259] +DistributionSupport() = ['(0, 1)_a', '(1, 1)_a', '(2, 1)_a', '(3, 1)_a', '(4, 1)_a', '(5, 1)_a', '(6, 1)_a', '(7, 1)_a', '(8, 1)_a', '(9, 1)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 4 +# (5, 1) +IsTerminal() = False +History() = [5, 1, 1] +HistoryString() = "5, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.DEFAULT_PLAYER_ID +InformationStateString(0) = "5, 1, 1" +ObservationString(0) = "(5, 1)" +ObservationTensor(0).x: ◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(0).t: ◯◉◯◯◯◯◯◯◯◯◯ +Rewards() = [3.30259] +Returns() = [6.60517] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["-1", "0", "1"] + +# Apply action "-1" +action: 0 + +# State 5 +# Apply action "-1" +action: 0 + +# State 6 +# (3, 2)_a +IsTerminal() = False +History() = [5, 1, 1, 0, 0] +HistoryString() = "5, 1, 1, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.MEAN_FIELD +InformationStateString(0) = "5, 1, 1, 0, 0" +ObservationString(0) = "(3, 2)_a" +ObservationTensor(0).x: ◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(0).t: ◯◯◉◯◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [6.60517] +DistributionSupport() = ['(0, 2)_a', '(1, 2)_a', '(2, 2)_a', '(3, 2)_a', '(4, 2)_a', '(5, 2)_a', '(6, 2)_a', '(7, 2)_a', '(8, 2)_a', '(9, 2)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 7 +# (3, 2) +IsTerminal() = False +History() = [5, 1, 1, 0, 0] +HistoryString() = "5, 1, 1, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.DEFAULT_PLAYER_ID +InformationStateString(0) = "5, 1, 1, 0, 0" +ObservationString(0) = "(3, 2)" +ObservationTensor(0).x: ◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(0).t: ◯◯◉◯◯◯◯◯◯◯◯ +Rewards() = [2.80259] +Returns() = [9.40776] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["-1", "0", "1"] + +# Apply action "1" +action: 2 + +# State 8 +# Apply action "0" +action: 1 + +# State 9 +# (4, 3)_a +IsTerminal() = False +History() = [5, 1, 1, 0, 0, 2, 1] +HistoryString() = "5, 1, 1, 0, 0, 2, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.MEAN_FIELD +InformationStateString(0) = "5, 1, 1, 0, 0, 2, 1" +ObservationString(0) = "(4, 3)_a" +ObservationTensor(0).x: ◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(0).t: ◯◯◯◉◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [9.40776] +DistributionSupport() = ['(0, 3)_a', '(1, 3)_a', '(2, 3)_a', '(3, 3)_a', '(4, 3)_a', '(5, 3)_a', '(6, 3)_a', '(7, 3)_a', '(8, 3)_a', '(9, 3)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 10 +# Apply action "0" +action: 1 + +# State 11 +# Apply action "-1" +action: 0 + +# State 12 +# Set mean field distribution to be uniform +action: update_distribution + +# State 13 +# Apply action "-1" +action: 0 + +# State 14 +# Apply action "-1" +action: 0 + +# State 15 +# Set mean field distribution to be uniform +action: update_distribution + +# State 16 +# Apply action "1" +action: 2 + +# State 17 +# Apply action "1" +action: 2 + +# State 18 +# Set mean field distribution to be uniform +action: update_distribution + +# State 19 +# Apply action "0" +action: 1 + +# State 20 +# Apply action "-1" +action: 0 + +# State 21 +# Set mean field distribution to be uniform +action: update_distribution + +# State 22 +# Apply action "0" +action: 1 + +# State 23 +# Apply action "0" +action: 1 + +# State 24 +# Set mean field distribution to be uniform +action: update_distribution + +# State 25 +# Apply action "-1" +action: 0 + +# State 26 +# Apply action "-1" +action: 0 + +# State 27 +# Set mean field distribution to be uniform +action: update_distribution + +# State 28 +# Apply action "-1" +action: 0 + +# State 29 +# Apply action "0" +action: 1 + +# State 30 +# (9, 10)_a +IsTerminal() = True +History() = [5, 1, 1, 0, 0, 2, 1, 1, 0, 0, 0, 2, 2, 1, 0, 1, 1, 0, 0, 0, 1] +HistoryString() = "5, 1, 1, 0, 0, 2, 1, 1, 0, 0, 0, 2, 2, 1, 0, 1, 1, 0, 0, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.TERMINAL +InformationStateString(0) = "5, 1, 1, 0, 0, 2, 1, 1, 0, 0, 0, 2, 2, 1, 0, 1, 1, 0, 0, 0, 1" +ObservationString(0) = "(9, 10)_a" +ObservationTensor(0).x: ◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).t: ◯◯◯◯◯◯◯◯◯◯◉ +Rewards() = [0] +Returns() = [28.1259] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_mfg_dynamic_routing.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_mfg_dynamic_routing.txt new file mode 100644 index 0000000..87a305f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_mfg_dynamic_routing.txt @@ -0,0 +1,250 @@ +game: python_mfg_dynamic_routing + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.MEAN_FIELD +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Python Mean Field Routing Game" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = ["max_num_time_step", "players", "time_step_length"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = True +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "python_mfg_dynamic_routing" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 8 +PolicyTensorShape() = [8] +MaxChanceOutcomes() = 1 +GetParameters() = {max_num_time_step=10,players=-1,time_step_length=0.5} +NumPlayers() = 1 +MinUtility() = -11.0 +MaxUtility() = 0.0 +UtilitySum() = None +ObservationTensorShape() = location: [8], destination: [8], time: [11], waiting: [1] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 28 +MaxGameLength() = 10 +ToString() = "python_mfg_dynamic_routing(max_num_time_step=10,players=-1,time_step_length=0.5)" + +# State 0 +# Before initial chance node +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "" +ObservationString(0) = "Before initial chance node" +ObservationTensor(0).location: ◉◯◯◯◯◯◯◯ +ObservationTensor(0).destination: ◉◯◯◯◯◯◯◯ +ObservationTensor(0).time: ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).waiting: ◯ +ChanceOutcomes() = [(0,1)] +LegalActions() = [0] +StringLegalActions() = ["Vehicle is assigned to population 0."] + +# Apply action "Vehicle is assigned to population 0." +action: 0 + +# State 1 +# Location=O->A, waiting_time=0, t=0, destination='D->E' +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.DEFAULT_PLAYER_ID +InformationStateString(0) = "0" +ObservationString(0) = "Location=O->A, waiting_time=0, t=0, destination='D->E'" +ObservationTensor(0).location: ◯◯◯◯◯◯◯◉ +ObservationTensor(0).destination: ◯◯◯◯◯◯◉◯ +ObservationTensor(0).time: ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).waiting: ◯ +Rewards() = [0] +Returns() = [0] +LegalActions() = [1, 2] +StringLegalActions() = ["Vehicle 0 would like to move to A->B.", "Vehicle 0 would like to move to A->C."] + +# Apply action "Vehicle 0 would like to move to A->C." +action: 2 + +# State 2 +# Location=A->C, waiting_time=-1, t=1_mean_field, destination='D->E' +IsTerminal() = False +History() = [0, 2] +HistoryString() = "0, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.MEAN_FIELD +InformationStateString(0) = "0, 2" +ObservationString(0) = "Location=A->C, waiting_time=-1, t=1_mean_field, destination='D->E'" +ObservationTensor(0).location: ◯◯◉◯◯◯◯◯ +ObservationTensor(0).destination: ◯◯◯◯◯◯◉◯ +ObservationTensor(0).time: ◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).waiting: ◯ +Rewards() = [0] +Returns() = [0] +DistributionSupport() = ["Location=A->C, waiting_time=-1, t=1_mean_field, destination='D->E'", "Location=A->C, waiting_time=0, t=1_mean_field, destination='D->E'", "Location=A->C, waiting_time=1, t=1_mean_field, destination='D->E'", "Location=A->C, waiting_time=2, t=1_mean_field, destination='D->E'", "Location=A->C, waiting_time=3, t=1_mean_field, destination='D->E'", "Location=A->C, waiting_time=4, t=1_mean_field, destination='D->E'", "Location=A->C, waiting_time=5, t=1_mean_field, destination='D->E'", "Location=A->C, waiting_time=6, t=1_mean_field, destination='D->E'", "Location=A->C, waiting_time=7, t=1_mean_field, destination='D->E'", "Location=A->C, waiting_time=8, t=1_mean_field, destination='D->E'", "Location=A->C, waiting_time=9, t=1_mean_field, destination='D->E'"] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 3 +# Location=A->C, waiting_time=3, t=1, destination='D->E' +IsTerminal() = False +History() = [0, 2] +HistoryString() = "0, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.DEFAULT_PLAYER_ID +InformationStateString(0) = "0, 2" +ObservationString(0) = "Location=A->C, waiting_time=3, t=1, destination='D->E'" +ObservationTensor(0).location: ◯◯◉◯◯◯◯◯ +ObservationTensor(0).destination: ◯◯◯◯◯◯◉◯ +ObservationTensor(0).time: ◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).waiting: ◉ +Rewards() = [0] +Returns() = [0] +LegalActions() = [0] +StringLegalActions() = ["Vehicle 0 reach a sink node or its destination."] + +# Apply action "Vehicle 0 reach a sink node or its destination." +action: 0 + +# State 4 +# Location=A->C, waiting_time=2, t=2_mean_field, destination='D->E' +IsTerminal() = False +History() = [0, 2, 0] +HistoryString() = "0, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.MEAN_FIELD +InformationStateString(0) = "0, 2, 0" +ObservationString(0) = "Location=A->C, waiting_time=2, t=2_mean_field, destination='D->E'" +ObservationTensor(0).location: ◯◯◉◯◯◯◯◯ +ObservationTensor(0).destination: ◯◯◯◯◯◯◉◯ +ObservationTensor(0).time: ◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(0).waiting: ◉ +Rewards() = [0] +Returns() = [0] +DistributionSupport() = ["Location=A->C, waiting_time=-1, t=2_mean_field, destination='D->E'", "Location=A->C, waiting_time=0, t=2_mean_field, destination='D->E'", "Location=A->C, waiting_time=1, t=2_mean_field, destination='D->E'", "Location=A->C, waiting_time=2, t=2_mean_field, destination='D->E'", "Location=A->C, waiting_time=3, t=2_mean_field, destination='D->E'", "Location=A->C, waiting_time=4, t=2_mean_field, destination='D->E'", "Location=A->C, waiting_time=5, t=2_mean_field, destination='D->E'", "Location=A->C, waiting_time=6, t=2_mean_field, destination='D->E'", "Location=A->C, waiting_time=7, t=2_mean_field, destination='D->E'", "Location=A->C, waiting_time=8, t=2_mean_field, destination='D->E'", "Location=A->C, waiting_time=9, t=2_mean_field, destination='D->E'"] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 5 +# Location=A->C, waiting_time=2, t=2, destination='D->E' +IsTerminal() = False +History() = [0, 2, 0] +HistoryString() = "0, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.DEFAULT_PLAYER_ID +InformationStateString(0) = "0, 2, 0" +ObservationString(0) = "Location=A->C, waiting_time=2, t=2, destination='D->E'" +ObservationTensor(0).location: ◯◯◉◯◯◯◯◯ +ObservationTensor(0).destination: ◯◯◯◯◯◯◉◯ +ObservationTensor(0).time: ◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(0).waiting: ◉ +Rewards() = [0] +Returns() = [0] +LegalActions() = [0] +StringLegalActions() = ["Vehicle 0 reach a sink node or its destination."] + +# Apply action "Vehicle 0 reach a sink node or its destination." +action: 0 + +# State 6 +# Location=A->C, waiting_time=1, t=3_mean_field, destination='D->E' +IsTerminal() = False +History() = [0, 2, 0, 0] +HistoryString() = "0, 2, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.MEAN_FIELD +InformationStateString(0) = "0, 2, 0, 0" +ObservationString(0) = "Location=A->C, waiting_time=1, t=3_mean_field, destination='D->E'" +ObservationTensor(0).location: ◯◯◉◯◯◯◯◯ +ObservationTensor(0).destination: ◯◯◯◯◯◯◉◯ +ObservationTensor(0).time: ◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(0).waiting: ◉ +Rewards() = [0] +Returns() = [0] +DistributionSupport() = ["Location=A->C, waiting_time=-1, t=3_mean_field, destination='D->E'", "Location=A->C, waiting_time=0, t=3_mean_field, destination='D->E'", "Location=A->C, waiting_time=1, t=3_mean_field, destination='D->E'", "Location=A->C, waiting_time=2, t=3_mean_field, destination='D->E'", "Location=A->C, waiting_time=3, t=3_mean_field, destination='D->E'", "Location=A->C, waiting_time=4, t=3_mean_field, destination='D->E'", "Location=A->C, waiting_time=5, t=3_mean_field, destination='D->E'", "Location=A->C, waiting_time=6, t=3_mean_field, destination='D->E'", "Location=A->C, waiting_time=7, t=3_mean_field, destination='D->E'", "Location=A->C, waiting_time=8, t=3_mean_field, destination='D->E'", "Location=A->C, waiting_time=9, t=3_mean_field, destination='D->E'"] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 7 +# Apply action "Vehicle 0 reach a sink node or its destination." +action: 0 + +# State 8 +# Set mean field distribution to be uniform +action: update_distribution + +# State 9 +# Apply action "Vehicle 0 would like to move to C->D." +action: 5 + +# State 10 +# Set mean field distribution to be uniform +action: update_distribution + +# State 11 +# Apply action "Vehicle 0 reach a sink node or its destination." +action: 0 + +# State 12 +# Set mean field distribution to be uniform +action: update_distribution + +# State 13 +# Apply action "Vehicle 0 reach a sink node or its destination." +action: 0 + +# State 14 +# Set mean field distribution to be uniform +action: update_distribution + +# State 15 +# Apply action "Vehicle 0 reach a sink node or its destination." +action: 0 + +# State 16 +# Set mean field distribution to be uniform +action: update_distribution + +# State 17 +# Apply action "Vehicle 0 would like to move to D->E." +action: 6 + +# State 18 +# Set mean field distribution to be uniform +action: update_distribution + +# State 19 +# Apply action "Vehicle 0 reach a sink node or its destination." +action: 0 + +# State 20 +# Arrived at D->E, with arrival time 8, t=10_mean_field +IsTerminal() = True +History() = [0, 2, 0, 0, 0, 5, 0, 0, 0, 6, 0] +HistoryString() = "0, 2, 0, 0, 0, 5, 0, 0, 0, 6, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.TERMINAL +InformationStateString(0) = "0, 2, 0, 0, 0, 5, 0, 0, 0, 6, 0" +ObservationString(0) = "Arrived at D->E, with arrival time 8, t=10_mean_field" +ObservationTensor(0).location: ◯◯◯◯◯◯◉◯ +ObservationTensor(0).destination: ◯◯◯◯◯◯◉◯ +ObservationTensor(0).time: ◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).waiting: ◯ +Rewards() = [-4] +Returns() = [-4] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_mfg_periodic_aversion.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_mfg_periodic_aversion.txt new file mode 100644 index 0000000..c98dbf1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_mfg_periodic_aversion.txt @@ -0,0 +1,440 @@ +game: python_mfg_periodic_aversion + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.MEAN_FIELD +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Mean-Field Periodic Aversion Game" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = ["coef_aversion", "dt", "horizon", "n_actions_per_side", "size", "volatility", "xmax", "xmin"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "python_mfg_periodic_aversion" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 21 +PolicyTensorShape() = [21] +MaxChanceOutcomes() = 21 +GetParameters() = {coef_aversion=1.0,dt=0.01,horizon=20,n_actions_per_side=10,size=21,volatility=1.0,xmax=1.0,xmin=0.0} +NumPlayers() = 1 +MinUtility() = -inf +MaxUtility() = inf +UtilitySum() = 0.0 +ObservationTensorShape() = x: [21], t: [21] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 42 +MaxGameLength() = 20 +ToString() = "python_mfg_periodic_aversion(coef_aversion=1.0,dt=0.01,horizon=20,n_actions_per_side=10,size=21,volatility=1.0,xmax=1.0,xmin=0.0)" + +# State 0 +# initial +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "" +ObservationString(0) = "initial" +ObservationTensor(0).x: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).t: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.047619), (1,0.047619), (2,0.047619), (3,0.047619), (4,0.047619), (5,0.047619), (6,0.047619), (7,0.047619), (8,0.047619), (9,0.047619), (10,0.047619), (11,0.047619), (12,0.047619), (13,0.047619), (14,0.047619), (15,0.047619), (16,0.047619), (17,0.047619), (18,0.047619), (19,0.047619), (20,0.047619)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] +StringLegalActions() = ["-10", "-9", "-8", "-7", "-6", "-5", "-4", "-3", "-2", "-1", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + +# Apply action "5" +action: 15 + +# State 1 +# (15, 0) +IsTerminal() = False +History() = [15] +HistoryString() = "15" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.DEFAULT_PLAYER_ID +InformationStateString(0) = "15" +ObservationString(0) = "(15, 0)" +ObservationTensor(0).x: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(0).t: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [-0.216904] +Returns() = [-0.216904] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] +StringLegalActions() = ["-10", "-9", "-8", "-7", "-6", "-5", "-4", "-3", "-2", "-1", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + +# Apply action "-10" +action: 0 + +# State 2 +# (5, 0)_a_mu +IsTerminal() = False +History() = [15, 0] +HistoryString() = "15, 0" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "15, 0" +ObservationString(0) = "(5, 0)_a_mu" +ObservationTensor(0).x: ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).t: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,7.4336e-07), (1,7.99187e-06), (2,6.69151e-05), (3,0.000436341), (4,0.00221592), (5,0.00876415), (6,0.0269955), (7,0.0647588), (8,0.120985), (9,0.176033), (10,0.199471), (11,0.176033), (12,0.120985), (13,0.0647588), (14,0.0269955), (15,0.00876415), (16,0.00221592), (17,0.000436341), (18,6.69151e-05), (19,7.99187e-06), (20,7.4336e-07)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] +StringLegalActions() = ["-10", "-9", "-8", "-7", "-6", "-5", "-4", "-3", "-2", "-1", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + +# Apply action "10" +action: 20 + +# State 3 +# (15, 1)_a +IsTerminal() = False +History() = [15, 0, 20] +HistoryString() = "15, 0, 20" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.MEAN_FIELD +InformationStateString(0) = "15, 0, 20" +ObservationString(0) = "(15, 1)_a" +ObservationTensor(0).x: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(0).t: ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [0] +DistributionSupport() = ['(0, 1)_a', '(1, 1)_a', '(2, 1)_a', '(3, 1)_a', '(4, 1)_a', '(5, 1)_a', '(6, 1)_a', '(7, 1)_a', '(8, 1)_a', '(9, 1)_a', '(10, 1)_a', '(11, 1)_a', '(12, 1)_a', '(13, 1)_a', '(14, 1)_a', '(15, 1)_a', '(16, 1)_a', '(17, 1)_a', '(18, 1)_a', '(19, 1)_a', '(20, 1)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 4 +# (15, 1) +IsTerminal() = False +History() = [15, 0, 20] +HistoryString() = "15, 0, 20" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.DEFAULT_PLAYER_ID +InformationStateString(0) = "15, 0, 20" +ObservationString(0) = "(15, 1)" +ObservationTensor(0).x: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(0).t: ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [-12.7169] +Returns() = [-12.7169] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] +StringLegalActions() = ["-10", "-9", "-8", "-7", "-6", "-5", "-4", "-3", "-2", "-1", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + +# Apply action "-1" +action: 9 + +# State 5 +# Apply action "6" +action: 16 + +# State 6 +# (20, 2)_a +IsTerminal() = False +History() = [15, 0, 20, 9, 16] +HistoryString() = "15, 0, 20, 9, 16" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.MEAN_FIELD +InformationStateString(0) = "15, 0, 20, 9, 16" +ObservationString(0) = "(20, 2)_a" +ObservationTensor(0).x: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).t: ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [0] +DistributionSupport() = ['(0, 2)_a', '(1, 2)_a', '(2, 2)_a', '(3, 2)_a', '(4, 2)_a', '(5, 2)_a', '(6, 2)_a', '(7, 2)_a', '(8, 2)_a', '(9, 2)_a', '(10, 2)_a', '(11, 2)_a', '(12, 2)_a', '(13, 2)_a', '(14, 2)_a', '(15, 2)_a', '(16, 2)_a', '(17, 2)_a', '(18, 2)_a', '(19, 2)_a', '(20, 2)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 7 +# (20, 2) +IsTerminal() = False +History() = [15, 0, 20, 9, 16] +HistoryString() = "15, 0, 20, 9, 16" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.DEFAULT_PLAYER_ID +InformationStateString(0) = "15, 0, 20, 9, 16" +ObservationString(0) = "(20, 2)" +ObservationTensor(0).x: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).t: ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [-0.321904] +Returns() = [-0.321904] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] +StringLegalActions() = ["-10", "-9", "-8", "-7", "-6", "-5", "-4", "-3", "-2", "-1", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + +# Apply action "4" +action: 14 + +# State 8 +# Apply action "6" +action: 16 + +# State 9 +# (9, 3)_a +IsTerminal() = False +History() = [15, 0, 20, 9, 16, 14, 16] +HistoryString() = "15, 0, 20, 9, 16, 14, 16" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.MEAN_FIELD +InformationStateString(0) = "15, 0, 20, 9, 16, 14, 16" +ObservationString(0) = "(9, 3)_a" +ObservationTensor(0).x: ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).t: ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [0] +DistributionSupport() = ['(0, 3)_a', '(1, 3)_a', '(2, 3)_a', '(3, 3)_a', '(4, 3)_a', '(5, 3)_a', '(6, 3)_a', '(7, 3)_a', '(8, 3)_a', '(9, 3)_a', '(10, 3)_a', '(11, 3)_a', '(12, 3)_a', '(13, 3)_a', '(14, 3)_a', '(15, 3)_a', '(16, 3)_a', '(17, 3)_a', '(18, 3)_a', '(19, 3)_a', '(20, 3)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 10 +# Apply action "7" +action: 17 + +# State 11 +# Apply action "1" +action: 11 + +# State 12 +# Set mean field distribution to be uniform +action: update_distribution + +# State 13 +# Apply action "-7" +action: 3 + +# State 14 +# Apply action "-9" +action: 1 + +# State 15 +# Set mean field distribution to be uniform +action: update_distribution + +# State 16 +# Apply action "3" +action: 13 + +# State 17 +# Apply action "8" +action: 18 + +# State 18 +# Set mean field distribution to be uniform +action: update_distribution + +# State 19 +# Apply action "-3" +action: 7 + +# State 20 +# Apply action "-2" +action: 8 + +# State 21 +# Set mean field distribution to be uniform +action: update_distribution + +# State 22 +# Apply action "7" +action: 17 + +# State 23 +# Apply action "-3" +action: 7 + +# State 24 +# Set mean field distribution to be uniform +action: update_distribution + +# State 25 +# Apply action "5" +action: 15 + +# State 26 +# Apply action "-6" +action: 4 + +# State 27 +# Set mean field distribution to be uniform +action: update_distribution + +# State 28 +# Apply action "-4" +action: 6 + +# State 29 +# Apply action "-6" +action: 4 + +# State 30 +# Set mean field distribution to be uniform +action: update_distribution + +# State 31 +# (0, 10) +IsTerminal() = False +History() = [15, 0, 20, 9, 16, 14, 16, 17, 11, 3, 1, 13, 18, 7, 8, 17, 7, 15, 4, 6, 4] +HistoryString() = "15, 0, 20, 9, 16, 14, 16, 17, 11, 3, 1, 13, 18, 7, 8, 17, 7, 15, 4, 6, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.DEFAULT_PLAYER_ID +InformationStateString(0) = "15, 0, 20, 9, 16, 14, 16, 17, 11, 3, 1, 13, 18, 7, 8, 17, 7, 15, 4, 6, 4" +ObservationString(0) = "(0, 10)" +ObservationTensor(0).x: ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).t: ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +Rewards() = [-2.1969] +Returns() = [-2.1969] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] +StringLegalActions() = ["-10", "-9", "-8", "-7", "-6", "-5", "-4", "-3", "-2", "-1", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + +# Apply action "-4" +action: 6 + +# State 32 +# Apply action "0" +action: 10 + +# State 33 +# (17, 11)_a +IsTerminal() = False +History() = [15, 0, 20, 9, 16, 14, 16, 17, 11, 3, 1, 13, 18, 7, 8, 17, 7, 15, 4, 6, 4, 6, 10] +HistoryString() = "15, 0, 20, 9, 16, 14, 16, 17, 11, 3, 1, 13, 18, 7, 8, 17, 7, 15, 4, 6, 4, 6, 10" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.MEAN_FIELD +InformationStateString(0) = "15, 0, 20, 9, 16, 14, 16, 17, 11, 3, 1, 13, 18, 7, 8, 17, 7, 15, 4, 6, 4, 6, 10" +ObservationString(0) = "(17, 11)_a" +ObservationTensor(0).x: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯ +ObservationTensor(0).t: ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +Rewards() = [0] +Returns() = [0] +DistributionSupport() = ['(0, 11)_a', '(1, 11)_a', '(2, 11)_a', '(3, 11)_a', '(4, 11)_a', '(5, 11)_a', '(6, 11)_a', '(7, 11)_a', '(8, 11)_a', '(9, 11)_a', '(10, 11)_a', '(11, 11)_a', '(12, 11)_a', '(13, 11)_a', '(14, 11)_a', '(15, 11)_a', '(16, 11)_a', '(17, 11)_a', '(18, 11)_a', '(19, 11)_a', '(20, 11)_a'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 34 +# Apply action "6" +action: 16 + +# State 35 +# Apply action "1" +action: 11 + +# State 36 +# Set mean field distribution to be uniform +action: update_distribution + +# State 37 +# Apply action "10" +action: 20 + +# State 38 +# Apply action "10" +action: 20 + +# State 39 +# Set mean field distribution to be uniform +action: update_distribution + +# State 40 +# Apply action "9" +action: 19 + +# State 41 +# Apply action "8" +action: 18 + +# State 42 +# Set mean field distribution to be uniform +action: update_distribution + +# State 43 +# Apply action "-10" +action: 0 + +# State 44 +# Apply action "-2" +action: 8 + +# State 45 +# Set mean field distribution to be uniform +action: update_distribution + +# State 46 +# Apply action "7" +action: 17 + +# State 47 +# Apply action "4" +action: 14 + +# State 48 +# Set mean field distribution to be uniform +action: update_distribution + +# State 49 +# Apply action "-4" +action: 6 + +# State 50 +# Apply action "6" +action: 16 + +# State 51 +# Set mean field distribution to be uniform +action: update_distribution + +# State 52 +# Apply action "-6" +action: 4 + +# State 53 +# Apply action "0" +action: 10 + +# State 54 +# Set mean field distribution to be uniform +action: update_distribution + +# State 55 +# Apply action "-8" +action: 2 + +# State 56 +# Apply action "8" +action: 18 + +# State 57 +# Set mean field distribution to be uniform +action: update_distribution + +# State 58 +# Apply action "-7" +action: 3 + +# State 59 +# Apply action "10" +action: 20 + +# State 60 +# (17, 20)_a +IsTerminal() = True +History() = [15, 0, 20, 9, 16, 14, 16, 17, 11, 3, 1, 13, 18, 7, 8, 17, 7, 15, 4, 6, 4, 6, 10, 16, 11, 20, 20, 19, 18, 0, 8, 17, 14, 6, 16, 4, 10, 2, 18, 3, 20] +HistoryString() = "15, 0, 20, 9, 16, 14, 16, 17, 11, 3, 1, 13, 18, 7, 8, 17, 7, 15, 4, 6, 4, 6, 10, 16, 11, 20, 20, 19, 18, 0, 8, 17, 14, 6, 16, 4, 10, 2, 18, 3, 20" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.TERMINAL +InformationStateString(0) = "15, 0, 20, 9, 16, 14, 16, 17, 11, 3, 1, 13, 18, 7, 8, 17, 7, 15, 4, 6, 4, 6, 10, 16, 11, 20, 20, 19, 18, 0, 8, 17, 14, 6, 16, 4, 10, 2, 18, 3, 20" +ObservationString(0) = "(17, 20)_a" +ObservationTensor(0).x: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯ +ObservationTensor(0).t: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +Rewards() = [0] +Returns() = [0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_mfg_predator_prey.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_mfg_predator_prey.txt new file mode 100644 index 0000000..cc5c4cd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_mfg_predator_prey.txt @@ -0,0 +1,388 @@ +game: python_mfg_predator_prey + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.MEAN_FIELD +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Python Mean Field Predator Prey" +GameType.max_num_players = 1000000000 +GameType.min_num_players = 1 +GameType.parameter_specification = ["congestion_coeff", "geometry", "horizon", "init_distrib", "noise_probability", "players", "reward_matrix", "size"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "python_mfg_predator_prey" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 5 +PolicyTensorShape() = [5] +MaxChanceOutcomes() = 25 +GetParameters() = {congestion_coeff=1.0,geometry=0,horizon=10,init_distrib=1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0,noise_probability=0.8,players=3,reward_matrix=0 -1 1 1 0 -1 -1 1 0,size=5} +NumPlayers() = 3 +MinUtility() = -inf +MaxUtility() = inf +UtilitySum() = None +ObservationTensorShape() = x: [5], y: [5], t: [11] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 21 +MaxGameLength() = 10 +ToString() = "python_mfg_predator_prey(congestion_coeff=1.0,geometry=0,horizon=10,init_distrib=1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0,noise_probability=0.8,players=3,reward_matrix=0 -1 1 1 0 -1 -1 1 0,size=5)" + +# State 0 +# position_init_2 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "" +InformationStateString(1) = "" +InformationStateString(2) = "" +ObservationString(0) = "position_init_2" +ObservationString(1) = "position_init_2" +ObservationString(2) = "position_init_2" +ObservationTensor(0).x: ◯◯◯◯◯ +ObservationTensor(0).y: ◯◯◯◯◯ +ObservationTensor(0).t: ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).x: ◯◯◯◯◯ +ObservationTensor(1).y: ◯◯◯◯◯ +ObservationTensor(1).t: ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2).x: ◯◯◯◯◯ +ObservationTensor(2).y: ◯◯◯◯◯ +ObservationTensor(2).t: ◉◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(20,1)] +LegalActions() = [20] +StringLegalActions() = ["init_position=20"] + +# Apply action "init_position=20" +action: 20 + +# State 1 +# (pop=2, t=0, pos=[0 4]) +IsTerminal() = False +History() = [20] +HistoryString() = "20" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "20" +InformationStateString(1) = "20" +InformationStateString(2) = "20" +ObservationString(0) = "(pop=2, t=0, pos=[0 4])" +ObservationString(1) = "(pop=2, t=0, pos=[0 4])" +ObservationString(2) = "(pop=2, t=0, pos=[0 4])" +ObservationTensor(0).x: ◉◯◯◯◯ +ObservationTensor(0).y: ◯◯◯◯◉ +ObservationTensor(0).t: ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).x: ◉◯◯◯◯ +ObservationTensor(1).y: ◯◯◯◯◉ +ObservationTensor(1).t: ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2).x: ◉◯◯◯◯ +ObservationTensor(2).y: ◯◯◯◯◉ +ObservationTensor(2).t: ◉◯◯◯◯◯◯◯◯◯◯ +Rewards() = [57.5646, 57.5646, 57.5646] +Returns() = [57.5646, 57.5646, 57.5646] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["[0 0]", "[1 0]", "[0 1]", "[ 0 -1]", "[-1 0]"] + +# Apply action "[-1 0]" +action: 4 + +# State 2 +# (pop=2, t=0_a_mu, pos=[0 4]) +IsTerminal() = False +History() = [20, 4] +HistoryString() = "20, 4" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "20, 4" +InformationStateString(1) = "20, 4" +InformationStateString(2) = "20, 4" +ObservationString(0) = "(pop=2, t=0_a_mu, pos=[0 4])" +ObservationString(1) = "(pop=2, t=0_a_mu, pos=[0 4])" +ObservationString(2) = "(pop=2, t=0_a_mu, pos=[0 4])" +ObservationTensor(0).x: ◉◯◯◯◯ +ObservationTensor(0).y: ◯◯◯◯◉ +ObservationTensor(0).t: ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).x: ◉◯◯◯◯ +ObservationTensor(1).y: ◯◯◯◯◉ +ObservationTensor(1).t: ◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2).x: ◉◯◯◯◯ +ObservationTensor(2).y: ◯◯◯◯◉ +ObservationTensor(2).t: ◉◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.2), (1,0.2), (2,0.2), (3,0.2), (4,0.2)] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["[0 0]", "[1 0]", "[0 1]", "[ 0 -1]", "[-1 0]"] + +# Apply action "[0 0]" +action: 0 + +# State 3 +# (pop=2, t=1_a, pos=[0 4]) +IsTerminal() = False +History() = [20, 4, 0] +HistoryString() = "20, 4, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.MEAN_FIELD +InformationStateString(0) = "20, 4, 0" +InformationStateString(1) = "20, 4, 0" +InformationStateString(2) = "20, 4, 0" +ObservationString(0) = "(pop=2, t=1_a, pos=[0 4])" +ObservationString(1) = "(pop=2, t=1_a, pos=[0 4])" +ObservationString(2) = "(pop=2, t=1_a, pos=[0 4])" +ObservationTensor(0).x: ◉◯◯◯◯ +ObservationTensor(0).y: ◯◯◯◯◉ +ObservationTensor(0).t: ◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).x: ◉◯◯◯◯ +ObservationTensor(1).y: ◯◯◯◯◉ +ObservationTensor(1).t: ◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(2).x: ◉◯◯◯◯ +ObservationTensor(2).y: ◯◯◯◯◉ +ObservationTensor(2).t: ◯◉◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [57.5646, 57.5646, 57.5646] +DistributionSupport() = ['(pop=0, t=1_a, pos=[0 0])', '(pop=1, t=1_a, pos=[0 0])', '(pop=2, t=1_a, pos=[0 0])', '(pop=0, t=1_a, pos=[0 1])', '(pop=1, t=1_a, pos=[0 1])', '(pop=2, t=1_a, pos=[0 1])', '(pop=0, t=1_a, pos=[0 2])', '(pop=1, t=1_a, pos=[0 2])', '(pop=2, t=1_a, pos=[0 2])', '(pop=0, t=1_a, pos=[0 3])', '(pop=1, t=1_a, pos=[0 3])', '(pop=2, t=1_a, pos=[0 3])', '(pop=0, t=1_a, pos=[0 4])', '(pop=1, t=1_a, pos=[0 4])', '(pop=2, t=1_a, pos=[0 4])', '(pop=0, t=1_a, pos=[1 0])', '(pop=1, t=1_a, pos=[1 0])', '(pop=2, t=1_a, pos=[1 0])', '(pop=0, t=1_a, pos=[1 1])', '(pop=1, t=1_a, pos=[1 1])', '(pop=2, t=1_a, pos=[1 1])', '(pop=0, t=1_a, pos=[1 2])', '(pop=1, t=1_a, pos=[1 2])', '(pop=2, t=1_a, pos=[1 2])', '(pop=0, t=1_a, pos=[1 3])', '(pop=1, t=1_a, pos=[1 3])', '(pop=2, t=1_a, pos=[1 3])', '(pop=0, t=1_a, pos=[1 4])', '(pop=1, t=1_a, pos=[1 4])', '(pop=2, t=1_a, pos=[1 4])', '(pop=0, t=1_a, pos=[2 0])', '(pop=1, t=1_a, pos=[2 0])', '(pop=2, t=1_a, pos=[2 0])', '(pop=0, t=1_a, pos=[2 1])', '(pop=1, t=1_a, pos=[2 1])', '(pop=2, t=1_a, pos=[2 1])', '(pop=0, t=1_a, pos=[2 2])', '(pop=1, t=1_a, pos=[2 2])', '(pop=2, t=1_a, pos=[2 2])', '(pop=0, t=1_a, pos=[2 3])', '(pop=1, t=1_a, pos=[2 3])', '(pop=2, t=1_a, pos=[2 3])', '(pop=0, t=1_a, pos=[2 4])', '(pop=1, t=1_a, pos=[2 4])', '(pop=2, t=1_a, pos=[2 4])', '(pop=0, t=1_a, pos=[3 0])', '(pop=1, t=1_a, pos=[3 0])', '(pop=2, t=1_a, pos=[3 0])', '(pop=0, t=1_a, pos=[3 1])', '(pop=1, t=1_a, pos=[3 1])', '(pop=2, t=1_a, pos=[3 1])', '(pop=0, t=1_a, pos=[3 2])', '(pop=1, t=1_a, pos=[3 2])', '(pop=2, t=1_a, pos=[3 2])', '(pop=0, t=1_a, pos=[3 3])', '(pop=1, t=1_a, pos=[3 3])', '(pop=2, t=1_a, pos=[3 3])', '(pop=0, t=1_a, pos=[3 4])', '(pop=1, t=1_a, pos=[3 4])', '(pop=2, t=1_a, pos=[3 4])', '(pop=0, t=1_a, pos=[4 0])', '(pop=1, t=1_a, pos=[4 0])', '(pop=2, t=1_a, pos=[4 0])', '(pop=0, t=1_a, pos=[4 1])', '(pop=1, t=1_a, pos=[4 1])', '(pop=2, t=1_a, pos=[4 1])', '(pop=0, t=1_a, pos=[4 2])', '(pop=1, t=1_a, pos=[4 2])', '(pop=2, t=1_a, pos=[4 2])', '(pop=0, t=1_a, pos=[4 3])', '(pop=1, t=1_a, pos=[4 3])', '(pop=2, t=1_a, pos=[4 3])', '(pop=0, t=1_a, pos=[4 4])', '(pop=1, t=1_a, pos=[4 4])', '(pop=2, t=1_a, pos=[4 4])'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 4 +# (pop=2, t=1, pos=[0 4]) +IsTerminal() = False +History() = [20, 4, 0] +HistoryString() = "20, 4, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "20, 4, 0" +InformationStateString(1) = "20, 4, 0" +InformationStateString(2) = "20, 4, 0" +ObservationString(0) = "(pop=2, t=1, pos=[0 4])" +ObservationString(1) = "(pop=2, t=1, pos=[0 4])" +ObservationString(2) = "(pop=2, t=1, pos=[0 4])" +ObservationTensor(0).x: ◉◯◯◯◯ +ObservationTensor(0).y: ◯◯◯◯◉ +ObservationTensor(0).t: ◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).x: ◉◯◯◯◯ +ObservationTensor(1).y: ◯◯◯◯◉ +ObservationTensor(1).t: ◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(2).x: ◉◯◯◯◯ +ObservationTensor(2).y: ◯◯◯◯◉ +ObservationTensor(2).t: ◯◉◯◯◯◯◯◯◯◯◯ +Rewards() = [4.31749, 4.31749, 4.31749] +Returns() = [61.8821, 61.8821, 61.8821] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["[0 0]", "[1 0]", "[0 1]", "[ 0 -1]", "[-1 0]"] + +# Apply action "[1 0]" +action: 1 + +# State 5 +# Apply action "[ 0 -1]" +action: 3 + +# State 6 +# (pop=2, t=2_a, pos=[1 3]) +IsTerminal() = False +History() = [20, 4, 0, 1, 3] +HistoryString() = "20, 4, 0, 1, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.MEAN_FIELD +InformationStateString(0) = "20, 4, 0, 1, 3" +InformationStateString(1) = "20, 4, 0, 1, 3" +InformationStateString(2) = "20, 4, 0, 1, 3" +ObservationString(0) = "(pop=2, t=2_a, pos=[1 3])" +ObservationString(1) = "(pop=2, t=2_a, pos=[1 3])" +ObservationString(2) = "(pop=2, t=2_a, pos=[1 3])" +ObservationTensor(0).x: ◯◉◯◯◯ +ObservationTensor(0).y: ◯◯◯◉◯ +ObservationTensor(0).t: ◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(1).x: ◯◉◯◯◯ +ObservationTensor(1).y: ◯◯◯◉◯ +ObservationTensor(1).t: ◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(2).x: ◯◉◯◯◯ +ObservationTensor(2).y: ◯◯◯◉◯ +ObservationTensor(2).t: ◯◯◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [61.8821, 61.8821, 61.8821] +DistributionSupport() = ['(pop=0, t=2_a, pos=[0 0])', '(pop=1, t=2_a, pos=[0 0])', '(pop=2, t=2_a, pos=[0 0])', '(pop=0, t=2_a, pos=[0 1])', '(pop=1, t=2_a, pos=[0 1])', '(pop=2, t=2_a, pos=[0 1])', '(pop=0, t=2_a, pos=[0 2])', '(pop=1, t=2_a, pos=[0 2])', '(pop=2, t=2_a, pos=[0 2])', '(pop=0, t=2_a, pos=[0 3])', '(pop=1, t=2_a, pos=[0 3])', '(pop=2, t=2_a, pos=[0 3])', '(pop=0, t=2_a, pos=[0 4])', '(pop=1, t=2_a, pos=[0 4])', '(pop=2, t=2_a, pos=[0 4])', '(pop=0, t=2_a, pos=[1 0])', '(pop=1, t=2_a, pos=[1 0])', '(pop=2, t=2_a, pos=[1 0])', '(pop=0, t=2_a, pos=[1 1])', '(pop=1, t=2_a, pos=[1 1])', '(pop=2, t=2_a, pos=[1 1])', '(pop=0, t=2_a, pos=[1 2])', '(pop=1, t=2_a, pos=[1 2])', '(pop=2, t=2_a, pos=[1 2])', '(pop=0, t=2_a, pos=[1 3])', '(pop=1, t=2_a, pos=[1 3])', '(pop=2, t=2_a, pos=[1 3])', '(pop=0, t=2_a, pos=[1 4])', '(pop=1, t=2_a, pos=[1 4])', '(pop=2, t=2_a, pos=[1 4])', '(pop=0, t=2_a, pos=[2 0])', '(pop=1, t=2_a, pos=[2 0])', '(pop=2, t=2_a, pos=[2 0])', '(pop=0, t=2_a, pos=[2 1])', '(pop=1, t=2_a, pos=[2 1])', '(pop=2, t=2_a, pos=[2 1])', '(pop=0, t=2_a, pos=[2 2])', '(pop=1, t=2_a, pos=[2 2])', '(pop=2, t=2_a, pos=[2 2])', '(pop=0, t=2_a, pos=[2 3])', '(pop=1, t=2_a, pos=[2 3])', '(pop=2, t=2_a, pos=[2 3])', '(pop=0, t=2_a, pos=[2 4])', '(pop=1, t=2_a, pos=[2 4])', '(pop=2, t=2_a, pos=[2 4])', '(pop=0, t=2_a, pos=[3 0])', '(pop=1, t=2_a, pos=[3 0])', '(pop=2, t=2_a, pos=[3 0])', '(pop=0, t=2_a, pos=[3 1])', '(pop=1, t=2_a, pos=[3 1])', '(pop=2, t=2_a, pos=[3 1])', '(pop=0, t=2_a, pos=[3 2])', '(pop=1, t=2_a, pos=[3 2])', '(pop=2, t=2_a, pos=[3 2])', '(pop=0, t=2_a, pos=[3 3])', '(pop=1, t=2_a, pos=[3 3])', '(pop=2, t=2_a, pos=[3 3])', '(pop=0, t=2_a, pos=[3 4])', '(pop=1, t=2_a, pos=[3 4])', '(pop=2, t=2_a, pos=[3 4])', '(pop=0, t=2_a, pos=[4 0])', '(pop=1, t=2_a, pos=[4 0])', '(pop=2, t=2_a, pos=[4 0])', '(pop=0, t=2_a, pos=[4 1])', '(pop=1, t=2_a, pos=[4 1])', '(pop=2, t=2_a, pos=[4 1])', '(pop=0, t=2_a, pos=[4 2])', '(pop=1, t=2_a, pos=[4 2])', '(pop=2, t=2_a, pos=[4 2])', '(pop=0, t=2_a, pos=[4 3])', '(pop=1, t=2_a, pos=[4 3])', '(pop=2, t=2_a, pos=[4 3])', '(pop=0, t=2_a, pos=[4 4])', '(pop=1, t=2_a, pos=[4 4])', '(pop=2, t=2_a, pos=[4 4])'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 7 +# (pop=2, t=2, pos=[1 3]) +IsTerminal() = False +History() = [20, 4, 0, 1, 3] +HistoryString() = "20, 4, 0, 1, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "20, 4, 0, 1, 3" +InformationStateString(1) = "20, 4, 0, 1, 3" +InformationStateString(2) = "20, 4, 0, 1, 3" +ObservationString(0) = "(pop=2, t=2, pos=[1 3])" +ObservationString(1) = "(pop=2, t=2, pos=[1 3])" +ObservationString(2) = "(pop=2, t=2, pos=[1 3])" +ObservationTensor(0).x: ◯◉◯◯◯ +ObservationTensor(0).y: ◯◯◯◉◯ +ObservationTensor(0).t: ◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(1).x: ◯◉◯◯◯ +ObservationTensor(1).y: ◯◯◯◉◯ +ObservationTensor(1).t: ◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(2).x: ◯◉◯◯◯ +ObservationTensor(2).y: ◯◯◯◉◯ +ObservationTensor(2).t: ◯◯◉◯◯◯◯◯◯◯◯ +Rewards() = [4.31749, 4.31749, 4.31749] +Returns() = [66.1996, 66.1996, 66.1996] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["[0 0]", "[1 0]", "[0 1]", "[ 0 -1]", "[-1 0]"] + +# Apply action "[1 0]" +action: 1 + +# State 8 +# Apply action "[1 0]" +action: 1 + +# State 9 +# (pop=2, t=3_a, pos=[3 3]) +IsTerminal() = False +History() = [20, 4, 0, 1, 3, 1, 1] +HistoryString() = "20, 4, 0, 1, 3, 1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.MEAN_FIELD +InformationStateString(0) = "20, 4, 0, 1, 3, 1, 1" +InformationStateString(1) = "20, 4, 0, 1, 3, 1, 1" +InformationStateString(2) = "20, 4, 0, 1, 3, 1, 1" +ObservationString(0) = "(pop=2, t=3_a, pos=[3 3])" +ObservationString(1) = "(pop=2, t=3_a, pos=[3 3])" +ObservationString(2) = "(pop=2, t=3_a, pos=[3 3])" +ObservationTensor(0).x: ◯◯◯◉◯ +ObservationTensor(0).y: ◯◯◯◉◯ +ObservationTensor(0).t: ◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(1).x: ◯◯◯◉◯ +ObservationTensor(1).y: ◯◯◯◉◯ +ObservationTensor(1).t: ◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(2).x: ◯◯◯◉◯ +ObservationTensor(2).y: ◯◯◯◉◯ +ObservationTensor(2).t: ◯◯◯◉◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0] +Returns() = [66.1996, 66.1996, 66.1996] +DistributionSupport() = ['(pop=0, t=3_a, pos=[0 0])', '(pop=1, t=3_a, pos=[0 0])', '(pop=2, t=3_a, pos=[0 0])', '(pop=0, t=3_a, pos=[0 1])', '(pop=1, t=3_a, pos=[0 1])', '(pop=2, t=3_a, pos=[0 1])', '(pop=0, t=3_a, pos=[0 2])', '(pop=1, t=3_a, pos=[0 2])', '(pop=2, t=3_a, pos=[0 2])', '(pop=0, t=3_a, pos=[0 3])', '(pop=1, t=3_a, pos=[0 3])', '(pop=2, t=3_a, pos=[0 3])', '(pop=0, t=3_a, pos=[0 4])', '(pop=1, t=3_a, pos=[0 4])', '(pop=2, t=3_a, pos=[0 4])', '(pop=0, t=3_a, pos=[1 0])', '(pop=1, t=3_a, pos=[1 0])', '(pop=2, t=3_a, pos=[1 0])', '(pop=0, t=3_a, pos=[1 1])', '(pop=1, t=3_a, pos=[1 1])', '(pop=2, t=3_a, pos=[1 1])', '(pop=0, t=3_a, pos=[1 2])', '(pop=1, t=3_a, pos=[1 2])', '(pop=2, t=3_a, pos=[1 2])', '(pop=0, t=3_a, pos=[1 3])', '(pop=1, t=3_a, pos=[1 3])', '(pop=2, t=3_a, pos=[1 3])', '(pop=0, t=3_a, pos=[1 4])', '(pop=1, t=3_a, pos=[1 4])', '(pop=2, t=3_a, pos=[1 4])', '(pop=0, t=3_a, pos=[2 0])', '(pop=1, t=3_a, pos=[2 0])', '(pop=2, t=3_a, pos=[2 0])', '(pop=0, t=3_a, pos=[2 1])', '(pop=1, t=3_a, pos=[2 1])', '(pop=2, t=3_a, pos=[2 1])', '(pop=0, t=3_a, pos=[2 2])', '(pop=1, t=3_a, pos=[2 2])', '(pop=2, t=3_a, pos=[2 2])', '(pop=0, t=3_a, pos=[2 3])', '(pop=1, t=3_a, pos=[2 3])', '(pop=2, t=3_a, pos=[2 3])', '(pop=0, t=3_a, pos=[2 4])', '(pop=1, t=3_a, pos=[2 4])', '(pop=2, t=3_a, pos=[2 4])', '(pop=0, t=3_a, pos=[3 0])', '(pop=1, t=3_a, pos=[3 0])', '(pop=2, t=3_a, pos=[3 0])', '(pop=0, t=3_a, pos=[3 1])', '(pop=1, t=3_a, pos=[3 1])', '(pop=2, t=3_a, pos=[3 1])', '(pop=0, t=3_a, pos=[3 2])', '(pop=1, t=3_a, pos=[3 2])', '(pop=2, t=3_a, pos=[3 2])', '(pop=0, t=3_a, pos=[3 3])', '(pop=1, t=3_a, pos=[3 3])', '(pop=2, t=3_a, pos=[3 3])', '(pop=0, t=3_a, pos=[3 4])', '(pop=1, t=3_a, pos=[3 4])', '(pop=2, t=3_a, pos=[3 4])', '(pop=0, t=3_a, pos=[4 0])', '(pop=1, t=3_a, pos=[4 0])', '(pop=2, t=3_a, pos=[4 0])', '(pop=0, t=3_a, pos=[4 1])', '(pop=1, t=3_a, pos=[4 1])', '(pop=2, t=3_a, pos=[4 1])', '(pop=0, t=3_a, pos=[4 2])', '(pop=1, t=3_a, pos=[4 2])', '(pop=2, t=3_a, pos=[4 2])', '(pop=0, t=3_a, pos=[4 3])', '(pop=1, t=3_a, pos=[4 3])', '(pop=2, t=3_a, pos=[4 3])', '(pop=0, t=3_a, pos=[4 4])', '(pop=1, t=3_a, pos=[4 4])', '(pop=2, t=3_a, pos=[4 4])'] + +# Set mean field distribution to be uniform +action: update_distribution + +# State 10 +# Apply action "[1 0]" +action: 1 + +# State 11 +# Apply action "[1 0]" +action: 1 + +# State 12 +# Set mean field distribution to be uniform +action: update_distribution + +# State 13 +# Apply action "[ 0 -1]" +action: 3 + +# State 14 +# Apply action "[1 0]" +action: 1 + +# State 15 +# Set mean field distribution to be uniform +action: update_distribution + +# State 16 +# Apply action "[1 0]" +action: 1 + +# State 17 +# Apply action "[-1 0]" +action: 4 + +# State 18 +# Set mean field distribution to be uniform +action: update_distribution + +# State 19 +# Apply action "[-1 0]" +action: 4 + +# State 20 +# Apply action "[0 1]" +action: 2 + +# State 21 +# Set mean field distribution to be uniform +action: update_distribution + +# State 22 +# Apply action "[1 0]" +action: 1 + +# State 23 +# Apply action "[1 0]" +action: 1 + +# State 24 +# Set mean field distribution to be uniform +action: update_distribution + +# State 25 +# Apply action "[0 0]" +action: 0 + +# State 26 +# Apply action "[0 1]" +action: 2 + +# State 27 +# Set mean field distribution to be uniform +action: update_distribution + +# State 28 +# Apply action "[ 0 -1]" +action: 3 + +# State 29 +# Apply action "[0 0]" +action: 0 + +# State 30 +# (pop=2, t=10_a, pos=[4 3]) +IsTerminal() = True +History() = [20, 4, 0, 1, 3, 1, 1, 1, 1, 3, 1, 1, 4, 4, 2, 1, 1, 0, 2, 3, 0] +HistoryString() = "20, 4, 0, 1, 3, 1, 1, 1, 1, 3, 1, 1, 4, 4, 2, 1, 1, 0, 2, 3, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.TERMINAL +InformationStateString(0) = "20, 4, 0, 1, 3, 1, 1, 1, 1, 3, 1, 1, 4, 4, 2, 1, 1, 0, 2, 3, 0" +InformationStateString(1) = "20, 4, 0, 1, 3, 1, 1, 1, 1, 3, 1, 1, 4, 4, 2, 1, 1, 0, 2, 3, 0" +InformationStateString(2) = "20, 4, 0, 1, 3, 1, 1, 1, 1, 3, 1, 1, 4, 4, 2, 1, 1, 0, 2, 3, 0" +ObservationString(0) = "(pop=2, t=10_a, pos=[4 3])" +ObservationString(1) = "(pop=2, t=10_a, pos=[4 3])" +ObservationString(2) = "(pop=2, t=10_a, pos=[4 3])" +ObservationTensor(0).x: ◯◯◯◯◉ +ObservationTensor(0).y: ◯◯◯◉◯ +ObservationTensor(0).t: ◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).x: ◯◯◯◯◉ +ObservationTensor(1).y: ◯◯◯◉◯ +ObservationTensor(1).t: ◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(2).x: ◯◯◯◯◉ +ObservationTensor(2).y: ◯◯◯◉◯ +ObservationTensor(2).t: ◯◯◯◯◯◯◯◯◯◯◉ +Rewards() = [0, 0, 0] +Returns() = [96.422, 96.422, 96.422] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_team_dominoes.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_team_dominoes.txt new file mode 100644 index 0000000..dd8d62f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_team_dominoes.txt @@ -0,0 +1,1585 @@ +game: python_team_dominoes + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Python Team Dominoes (4 players)" +GameType.max_num_players = 4 +GameType.min_num_players = 4 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = True +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "python_team_dominoes" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 308 +PolicyTensorShape() = [308] +MaxChanceOutcomes() = 28 +GetParameters() = {} +NumPlayers() = 4 +MinUtility() = -100.0 +MaxUtility() = 100.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = player: [4], hand: [7, 3], actions_history: [25, 5] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 150 +ObservationTensorShape() = player: [4], hand: [7, 3], last_action: [4], hand_sizes: [4] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 33 +MaxGameLength() = 28 +ToString() = "python_team_dominoes()" + +# State 0 +# hand0:[] +# hand1:[] +# hand2:[] +# hand3:[] +# +# board: [] +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "p0 hand:[] history:[]" +InformationStateString(1) = "p1 hand:[] history:[]" +InformationStateString(2) = "p2 hand:[] history:[]" +InformationStateString(3) = "p3 hand:[] history:[]" +InformationStateTensor(0).player: ◉◯◯◯ +InformationStateTensor(0).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +InformationStateTensor(0).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +InformationStateTensor(1).player: ◯◉◯◯ +InformationStateTensor(1).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +InformationStateTensor(1).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +InformationStateTensor(2).player: ◯◯◉◯ +InformationStateTensor(2).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +InformationStateTensor(2).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +InformationStateTensor(3).player: ◯◯◯◉ +InformationStateTensor(3).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +InformationStateTensor(3).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +ObservationString(0) = "p0 hand:[]" +ObservationString(1) = "p1 hand:[]" +ObservationString(2) = "p2 hand:[]" +ObservationString(3) = "p3 hand:[]" +PublicObservationString() = "p0" +PrivateObservationString(0) = "p0 hand:[]" +PrivateObservationString(1) = "p1 hand:[]" +PrivateObservationString(2) = "p2 hand:[]" +PrivateObservationString(3) = "p3 hand:[]" +ObservationTensor(0).player: ◉◯◯◯ +ObservationTensor(0).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +ObservationTensor(0).last_action: ◯◯◯◯ +ObservationTensor(0).hand_sizes: ◯◯◯◯ +ObservationTensor(1).player: ◯◉◯◯ +ObservationTensor(1).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +ObservationTensor(1).last_action: ◯◯◯◯ +ObservationTensor(1).hand_sizes: ◯◯◯◯ +ObservationTensor(2).player: ◯◯◉◯ +ObservationTensor(2).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +ObservationTensor(2).last_action: ◯◯◯◯ +ObservationTensor(2).hand_sizes: ◯◯◯◯ +ObservationTensor(3).player: ◯◯◯◉ +ObservationTensor(3).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +ObservationTensor(3).last_action: ◯◯◯◯ +ObservationTensor(3).hand_sizes: ◯◯◯◯ +ChanceOutcomes() = [(0,0.0357143), (1,0.0357143), (2,0.0357143), (3,0.0357143), (4,0.0357143), (5,0.0357143), (6,0.0357143), (7,0.0357143), (8,0.0357143), (9,0.0357143), (10,0.0357143), (11,0.0357143), (12,0.0357143), (13,0.0357143), (14,0.0357143), (15,0.0357143), (16,0.0357143), (17,0.0357143), (18,0.0357143), (19,0.0357143), (20,0.0357143), (21,0.0357143), (22,0.0357143), (23,0.0357143), (24,0.0357143), (25,0.0357143), (26,0.0357143), (27,0.0357143)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] +StringLegalActions() = ["Deal (0.0, 0.0)", "Deal (0.0, 1.0)", "Deal (0.0, 2.0)", "Deal (0.0, 3.0)", "Deal (0.0, 4.0)", "Deal (0.0, 5.0)", "Deal (0.0, 6.0)", "Deal (1.0, 1.0)", "Deal (1.0, 2.0)", "Deal (1.0, 3.0)", "Deal (1.0, 4.0)", "Deal (1.0, 5.0)", "Deal (1.0, 6.0)", "Deal (2.0, 2.0)", "Deal (2.0, 3.0)", "Deal (2.0, 4.0)", "Deal (2.0, 5.0)", "Deal (2.0, 6.0)", "Deal (3.0, 3.0)", "Deal (3.0, 4.0)", "Deal (3.0, 5.0)", "Deal (3.0, 6.0)", "Deal (4.0, 4.0)", "Deal (4.0, 5.0)", "Deal (4.0, 6.0)", "Deal (5.0, 5.0)", "Deal (5.0, 6.0)", "Deal (6.0, 6.0)"] + +# Apply action "Deal (0.0, 2.0)" +action: 2 + +# State 1 +# hand0:['(0.0, 2.0)'] +# hand1:[] +# hand2:[] +# hand3:[] +# +# board: [] +IsTerminal() = False +History() = [2] +HistoryString() = "2" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.CHANCE +InformationStateString(0) = "p0 hand:[(0.0, 2.0)] history:[]" +InformationStateString(1) = "p1 hand:[] history:[]" +InformationStateString(2) = "p2 hand:[] history:[]" +InformationStateString(3) = "p3 hand:[] history:[]" +InformationStateTensor(0).player: ◉◯◯◯ +InformationStateTensor(0).hand = [0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +InformationStateTensor(1).player: ◯◉◯◯ +InformationStateTensor(1).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +InformationStateTensor(1).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +InformationStateTensor(2).player: ◯◯◉◯ +InformationStateTensor(2).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +InformationStateTensor(2).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +InformationStateTensor(3).player: ◯◯◯◉ +InformationStateTensor(3).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +InformationStateTensor(3).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +ObservationString(0) = "p0 hand:[(0.0, 2.0)]" +ObservationString(1) = "p1 hand:[]" +ObservationString(2) = "p2 hand:[]" +ObservationString(3) = "p3 hand:[]" +PublicObservationString() = "p0" +PrivateObservationString(0) = "p0 hand:[(0.0, 2.0)]" +PrivateObservationString(1) = "p1 hand:[]" +PrivateObservationString(2) = "p2 hand:[]" +PrivateObservationString(3) = "p3 hand:[]" +ObservationTensor(0).player: ◉◯◯◯ +ObservationTensor(0).hand = [0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action: ◯◯◯◯ +ObservationTensor(0).hand_sizes: ◉◯◯◯ +ObservationTensor(1).player: ◯◉◯◯ +ObservationTensor(1).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +ObservationTensor(1).last_action: ◯◯◯◯ +ObservationTensor(1).hand_sizes: ◯◉◯◯ +ObservationTensor(2).player: ◯◯◉◯ +ObservationTensor(2).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +ObservationTensor(2).last_action: ◯◯◯◯ +ObservationTensor(2).hand_sizes: ◯◯◯◯ +ObservationTensor(3).player: ◯◯◯◉ +ObservationTensor(3).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +ObservationTensor(3).last_action: ◯◯◯◯ +ObservationTensor(3).hand_sizes: ◯◯◯◯ +ChanceOutcomes() = [(0,0.037037), (1,0.037037), (3,0.037037), (4,0.037037), (5,0.037037), (6,0.037037), (7,0.037037), (8,0.037037), (9,0.037037), (10,0.037037), (11,0.037037), (12,0.037037), (13,0.037037), (14,0.037037), (15,0.037037), (16,0.037037), (17,0.037037), (18,0.037037), (19,0.037037), (20,0.037037), (21,0.037037), (22,0.037037), (23,0.037037), (24,0.037037), (25,0.037037), (26,0.037037), (27,0.037037)] +LegalActions() = [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] +StringLegalActions() = ["Deal (0.0, 0.0)", "Deal (0.0, 1.0)", "Deal (0.0, 3.0)", "Deal (0.0, 4.0)", "Deal (0.0, 5.0)", "Deal (0.0, 6.0)", "Deal (1.0, 1.0)", "Deal (1.0, 2.0)", "Deal (1.0, 3.0)", "Deal (1.0, 4.0)", "Deal (1.0, 5.0)", "Deal (1.0, 6.0)", "Deal (2.0, 2.0)", "Deal (2.0, 3.0)", "Deal (2.0, 4.0)", "Deal (2.0, 5.0)", "Deal (2.0, 6.0)", "Deal (3.0, 3.0)", "Deal (3.0, 4.0)", "Deal (3.0, 5.0)", "Deal (3.0, 6.0)", "Deal (4.0, 4.0)", "Deal (4.0, 5.0)", "Deal (4.0, 6.0)", "Deal (5.0, 5.0)", "Deal (5.0, 6.0)", "Deal (6.0, 6.0)"] + +# Apply action "Deal (1.0, 6.0)" +action: 12 + +# State 2 +# Apply action "Deal (3.0, 4.0)" +action: 19 + +# State 3 +# Apply action "Deal (5.0, 6.0)" +action: 26 + +# State 4 +# Apply action "Deal (6.0, 6.0)" +action: 27 + +# State 5 +# Apply action "Deal (2.0, 4.0)" +action: 15 + +# State 6 +# Apply action "Deal (4.0, 6.0)" +action: 24 + +# State 7 +# Apply action "Deal (4.0, 5.0)" +action: 23 + +# State 8 +# Apply action "Deal (0.0, 5.0)" +action: 5 + +# State 9 +# Apply action "Deal (1.0, 1.0)" +action: 7 + +# State 10 +# Apply action "Deal (2.0, 6.0)" +action: 17 + +# State 11 +# Apply action "Deal (1.0, 5.0)" +action: 11 + +# State 12 +# Apply action "Deal (0.0, 0.0)" +action: 0 + +# State 13 +# Apply action "Deal (2.0, 2.0)" +action: 13 + +# State 14 +# Apply action "Deal (0.0, 3.0)" +action: 3 + +# State 15 +# Apply action "Deal (3.0, 3.0)" +action: 18 + +# State 16 +# Apply action "Deal (0.0, 1.0)" +action: 1 + +# State 17 +# Apply action "Deal (2.0, 5.0)" +action: 16 + +# State 18 +# Apply action "Deal (3.0, 6.0)" +action: 21 + +# State 19 +# Apply action "Deal (1.0, 3.0)" +action: 9 + +# State 20 +# Apply action "Deal (1.0, 4.0)" +action: 10 + +# State 21 +# Apply action "Deal (0.0, 6.0)" +action: 6 + +# State 22 +# Apply action "Deal (0.0, 4.0)" +action: 4 + +# State 23 +# Apply action "Deal (5.0, 5.0)" +action: 25 + +# State 24 +# Apply action "Deal (2.0, 3.0)" +action: 14 + +# State 25 +# Apply action "Deal (3.0, 5.0)" +action: 20 + +# State 26 +# Apply action "Deal (4.0, 4.0)" +action: 22 + +# State 27 +# Apply action "Deal (1.0, 2.0)" +action: 8 + +# State 28 +# hand0:['(0.0, 0.0)', '(0.0, 1.0)', '(0.0, 2.0)', '(0.0, 5.0)', '(1.0, 4.0)', '(2.0, 3.0)', '(6.0, 6.0)'] +# hand1:['(0.0, 6.0)', '(1.0, 1.0)', '(1.0, 6.0)', '(2.0, 2.0)', '(2.0, 4.0)', '(2.0, 5.0)', '(3.0, 5.0)'] +# hand2:['(0.0, 3.0)', '(0.0, 4.0)', '(2.0, 6.0)', '(3.0, 4.0)', '(3.0, 6.0)', '(4.0, 4.0)', '(4.0, 6.0)'] +# hand3:['(1.0, 2.0)', '(1.0, 3.0)', '(1.0, 5.0)', '(3.0, 3.0)', '(4.0, 5.0)', '(5.0, 5.0)', '(5.0, 6.0)'] +# +# board: [] +IsTerminal() = False +History() = [2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8] +HistoryString() = "2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "p0 hand:[(0.0, 0.0), (0.0, 1.0), (0.0, 2.0), (0.0, 5.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)] history:[]" +InformationStateString(1) = "p1 hand:[(0.0, 6.0), (1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)] history:[]" +InformationStateString(2) = "p2 hand:[(0.0, 3.0), (0.0, 4.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] history:[]" +InformationStateString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0), (5.0, 6.0)] history:[]" +InformationStateTensor(0).player: ◉◯◯◯ +InformationStateTensor(0).hand = [0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0] +InformationStateTensor(0).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +InformationStateTensor(1).player: ◯◉◯◯ +InformationStateTensor(1).hand = [0.0, 6.0, 1.0, 1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0] +InformationStateTensor(1).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +InformationStateTensor(2).player: ◯◯◉◯ +InformationStateTensor(2).hand = [0.0, 3.0, 1.0, 0.0, 4.0, 1.0, 2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0] +InformationStateTensor(2).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +InformationStateTensor(3).player: ◯◯◯◉ +InformationStateTensor(3).hand = [1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 5.0, 6.0, 1.0] +InformationStateTensor(3).actions_history: ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +ObservationString(0) = "p0 hand:[(0.0, 0.0), (0.0, 1.0), (0.0, 2.0), (0.0, 5.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)]" +ObservationString(1) = "p1 hand:[(0.0, 6.0), (1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)]" +ObservationString(2) = "p2 hand:[(0.0, 3.0), (0.0, 4.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)]" +ObservationString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0), (5.0, 6.0)]" +PublicObservationString() = "p0" +PrivateObservationString(0) = "p0 hand:[(0.0, 0.0), (0.0, 1.0), (0.0, 2.0), (0.0, 5.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(0.0, 6.0), (1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)]" +PrivateObservationString(2) = "p2 hand:[(0.0, 3.0), (0.0, 4.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)]" +PrivateObservationString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0), (5.0, 6.0)]" +ObservationTensor(0).player: ◉◯◯◯ +ObservationTensor(0).hand = [0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0] +ObservationTensor(0).last_action: ◯◯◯◯ +ObservationTensor(0).hand_sizes = [7.0, 7.0, 0.0, 0.0] +ObservationTensor(1).player: ◯◉◯◯ +ObservationTensor(1).hand = [0.0, 6.0, 1.0, 1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0] +ObservationTensor(1).last_action: ◯◯◯◯ +ObservationTensor(1).hand_sizes = [7.0, 7.0, 0.0, 0.0] +ObservationTensor(2).player: ◯◯◉◯ +ObservationTensor(2).hand = [0.0, 3.0, 1.0, 0.0, 4.0, 1.0, 2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0] +ObservationTensor(2).last_action: ◯◯◯◯ +ObservationTensor(2).hand_sizes = [7.0, 7.0, 0.0, 0.0] +ObservationTensor(3).player: ◯◯◯◉ +ObservationTensor(3).hand = [1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 5.0, 6.0, 1.0] +ObservationTensor(3).last_action: ◯◯◯◯ +ObservationTensor(3).hand_sizes = [7.0, 7.0, 0.0, 0.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 2, 5, 14, 28, 39, 75] +StringLegalActions() = ["p0 tile:(0.0, 0.0) pip:None", "p0 tile:(0.0, 1.0) pip:None", "p0 tile:(0.0, 2.0) pip:None", "p0 tile:(0.0, 5.0) pip:None", "p0 tile:(1.0, 4.0) pip:None", "p0 tile:(2.0, 3.0) pip:None", "p0 tile:(6.0, 6.0) pip:None"] + +# Apply action "p0 tile:(0.0, 0.0) pip:None" +action: 0 + +# State 29 +# hand0:['(0.0, 1.0)', '(0.0, 2.0)', '(0.0, 5.0)', '(1.0, 4.0)', '(2.0, 3.0)', '(6.0, 6.0)'] +# hand1:['(0.0, 6.0)', '(1.0, 1.0)', '(1.0, 6.0)', '(2.0, 2.0)', '(2.0, 4.0)', '(2.0, 5.0)', '(3.0, 5.0)'] +# hand2:['(0.0, 3.0)', '(0.0, 4.0)', '(2.0, 6.0)', '(3.0, 4.0)', '(3.0, 6.0)', '(4.0, 4.0)', '(4.0, 6.0)'] +# hand3:['(1.0, 2.0)', '(1.0, 3.0)', '(1.0, 5.0)', '(3.0, 3.0)', '(4.0, 5.0)', '(5.0, 5.0)', '(5.0, 6.0)'] +# +# board: [(0.0, 0.0)] +IsTerminal() = False +History() = [2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0] +HistoryString() = "2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (0.0, 5.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None]" +InformationStateString(1) = "p1 hand:[(0.0, 6.0), (1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None]" +InformationStateString(2) = "p2 hand:[(0.0, 3.0), (0.0, 4.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None]" +InformationStateString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0), (5.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None]" +InformationStateTensor(0).player: ◉◯◯◯ +InformationStateTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history: ◯◯◯◯◉ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +InformationStateTensor(1).player: ◯◉◯◯ +InformationStateTensor(1).hand = [0.0, 6.0, 1.0, 1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0] +InformationStateTensor(1).actions_history: ◯◯◯◯◉ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +InformationStateTensor(2).player: ◯◯◉◯ +InformationStateTensor(2).hand = [0.0, 3.0, 1.0, 0.0, 4.0, 1.0, 2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0] +InformationStateTensor(2).actions_history: ◯◯◯◯◉ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +InformationStateTensor(3).player: ◯◯◯◉ +InformationStateTensor(3).hand = [1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 5.0, 6.0, 1.0] +InformationStateTensor(3).actions_history: ◯◯◯◯◉ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ + ◯◯◯◯◯ +ObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (0.0, 5.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)] last_action:p0 tile:(0.0, 0.0) pip:None" +ObservationString(1) = "p1 hand:[(0.0, 6.0), (1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)] last_action:p0 tile:(0.0, 0.0) pip:None" +ObservationString(2) = "p2 hand:[(0.0, 3.0), (0.0, 4.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] last_action:p0 tile:(0.0, 0.0) pip:None" +ObservationString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0), (5.0, 6.0)] last_action:p0 tile:(0.0, 0.0) pip:None" +PublicObservationString() = "p0 last_action:p0 tile:(0.0, 0.0) pip:None" +PrivateObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (0.0, 5.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(0.0, 6.0), (1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)]" +PrivateObservationString(2) = "p2 hand:[(0.0, 3.0), (0.0, 4.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)]" +PrivateObservationString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0), (5.0, 6.0)]" +ObservationTensor(0).player: ◉◯◯◯ +ObservationTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action: ◯◯◯◯ +ObservationTensor(0).hand_sizes = [6.0, 7.0, 0.0, 0.0] +ObservationTensor(1).player: ◯◉◯◯ +ObservationTensor(1).hand = [0.0, 6.0, 1.0, 1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0] +ObservationTensor(1).last_action: ◯◯◯◯ +ObservationTensor(1).hand_sizes = [7.0, 6.0, 0.0, 0.0] +ObservationTensor(2).player: ◯◯◉◯ +ObservationTensor(2).hand = [0.0, 3.0, 1.0, 0.0, 4.0, 1.0, 2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0] +ObservationTensor(2).last_action: ◯◯◯◯ +ObservationTensor(2).hand_sizes = [7.0, 7.0, 0.0, 0.0] +ObservationTensor(3).player: ◯◯◯◉ +ObservationTensor(3).hand = [1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 5.0, 6.0, 1.0] +ObservationTensor(3).last_action: ◯◯◯◯ +ObservationTensor(3).hand_sizes = [7.0, 7.0, 0.0, 0.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [95] +StringLegalActions() = ["p1 tile:(0.0, 6.0) pip:0.0"] + +# Apply action "p1 tile:(0.0, 6.0) pip:0.0" +action: 95 + +# State 30 +# hand0:['(0.0, 1.0)', '(0.0, 2.0)', '(0.0, 5.0)', '(1.0, 4.0)', '(2.0, 3.0)', '(6.0, 6.0)'] +# hand1:['(1.0, 1.0)', '(1.0, 6.0)', '(2.0, 2.0)', '(2.0, 4.0)', '(2.0, 5.0)', '(3.0, 5.0)'] +# hand2:['(0.0, 3.0)', '(0.0, 4.0)', '(2.0, 6.0)', '(3.0, 4.0)', '(3.0, 6.0)', '(4.0, 4.0)', '(4.0, 6.0)'] +# hand3:['(1.0, 2.0)', '(1.0, 3.0)', '(1.0, 5.0)', '(3.0, 3.0)', '(4.0, 5.0)', '(5.0, 5.0)', '(5.0, 6.0)'] +# +# board: [(6.0, 0.0), (0.0, 0.0)] +IsTerminal() = False +History() = [2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95] +HistoryString() = "2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (0.0, 5.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0]" +InformationStateString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0]" +InformationStateString(2) = "p2 hand:[(0.0, 3.0), (0.0, 4.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0]" +InformationStateString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0), (5.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0]" +InformationStateTensor(0).player: ◉◯◯◯ +InformationStateTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).player: ◯◉◯◯ +InformationStateTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).player: ◯◯◉◯ +InformationStateTensor(2).hand = [0.0, 3.0, 1.0, 0.0, 4.0, 1.0, 2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0] +InformationStateTensor(2).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).player: ◯◯◯◉ +InformationStateTensor(3).hand = [1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 5.0, 6.0, 1.0] +InformationStateTensor(3).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (0.0, 5.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)] last_action:p1 tile:(0.0, 6.0) pip:0.0" +ObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)] last_action:p1 tile:(0.0, 6.0) pip:0.0" +ObservationString(2) = "p2 hand:[(0.0, 3.0), (0.0, 4.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] last_action:p1 tile:(0.0, 6.0) pip:0.0" +ObservationString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0), (5.0, 6.0)] last_action:p1 tile:(0.0, 6.0) pip:0.0" +PublicObservationString() = "p0 last_action:p1 tile:(0.0, 6.0) pip:0.0" +PrivateObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (0.0, 5.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)]" +PrivateObservationString(2) = "p2 hand:[(0.0, 3.0), (0.0, 4.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)]" +PrivateObservationString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0), (5.0, 6.0)]" +ObservationTensor(0).player: ◉◯◯◯ +ObservationTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action = [0.0, 6.0, 0.0, 1.0] +ObservationTensor(0).hand_sizes = [6.0, 6.0, 0.0, 0.0] +ObservationTensor(1).player: ◯◉◯◯ +ObservationTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(1).last_action = [0.0, 6.0, 0.0, 1.0] +ObservationTensor(1).hand_sizes = [6.0, 6.0, 0.0, 0.0] +ObservationTensor(2).player: ◯◯◉◯ +ObservationTensor(2).hand = [0.0, 3.0, 1.0, 0.0, 4.0, 1.0, 2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0] +ObservationTensor(2).last_action = [0.0, 6.0, 0.0, 1.0] +ObservationTensor(2).hand_sizes = [7.0, 7.0, 0.0, 0.0] +ObservationTensor(3).player: ◯◯◯◉ +ObservationTensor(3).hand = [1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 5.0, 6.0, 1.0] +ObservationTensor(3).last_action = [0.0, 6.0, 0.0, 1.0] +ObservationTensor(3).hand_sizes = [7.0, 7.0, 0.0, 0.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [163, 166, 204, 215, 223] +StringLegalActions() = ["p2 tile:(0.0, 3.0) pip:0.0", "p2 tile:(0.0, 4.0) pip:0.0", "p2 tile:(2.0, 6.0) pip:6.0", "p2 tile:(3.0, 6.0) pip:6.0", "p2 tile:(4.0, 6.0) pip:6.0"] + +# Apply action "p2 tile:(0.0, 4.0) pip:0.0" +action: 166 + +# State 31 +# hand0:['(0.0, 1.0)', '(0.0, 2.0)', '(0.0, 5.0)', '(1.0, 4.0)', '(2.0, 3.0)', '(6.0, 6.0)'] +# hand1:['(1.0, 1.0)', '(1.0, 6.0)', '(2.0, 2.0)', '(2.0, 4.0)', '(2.0, 5.0)', '(3.0, 5.0)'] +# hand2:['(0.0, 3.0)', '(2.0, 6.0)', '(3.0, 4.0)', '(3.0, 6.0)', '(4.0, 4.0)', '(4.0, 6.0)'] +# hand3:['(1.0, 2.0)', '(1.0, 3.0)', '(1.0, 5.0)', '(3.0, 3.0)', '(4.0, 5.0)', '(5.0, 5.0)', '(5.0, 6.0)'] +# +# board: [(6.0, 0.0), (0.0, 0.0), (0.0, 4.0)] +IsTerminal() = False +History() = [2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166] +HistoryString() = "2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (0.0, 5.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0]" +InformationStateString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0]" +InformationStateString(2) = "p2 hand:[(0.0, 3.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0]" +InformationStateString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0), (5.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0]" +InformationStateTensor(0).player: ◉◯◯◯ +InformationStateTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).player: ◯◉◯◯ +InformationStateTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).player: ◯◯◉◯ +InformationStateTensor(2).hand = [0.0, 3.0, 1.0, 2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).player: ◯◯◯◉ +InformationStateTensor(3).hand = [1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 5.0, 6.0, 1.0] +InformationStateTensor(3).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (0.0, 5.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)] last_action:p2 tile:(0.0, 4.0) pip:0.0" +ObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)] last_action:p2 tile:(0.0, 4.0) pip:0.0" +ObservationString(2) = "p2 hand:[(0.0, 3.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] last_action:p2 tile:(0.0, 4.0) pip:0.0" +ObservationString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0), (5.0, 6.0)] last_action:p2 tile:(0.0, 4.0) pip:0.0" +PublicObservationString() = "p0 last_action:p2 tile:(0.0, 4.0) pip:0.0" +PrivateObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (0.0, 5.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)]" +PrivateObservationString(2) = "p2 hand:[(0.0, 3.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)]" +PrivateObservationString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0), (5.0, 6.0)]" +ObservationTensor(0).player: ◉◯◯◯ +ObservationTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action = [0.0, 4.0, 0.0, 2.0] +ObservationTensor(0).hand_sizes = [6.0, 6.0, 0.0, 0.0] +ObservationTensor(1).player: ◯◉◯◯ +ObservationTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(1).last_action = [0.0, 4.0, 0.0, 2.0] +ObservationTensor(1).hand_sizes = [6.0, 6.0, 0.0, 0.0] +ObservationTensor(2).player: ◯◯◉◯ +ObservationTensor(2).hand = [0.0, 3.0, 1.0, 2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(2).last_action = [0.0, 4.0, 0.0, 2.0] +ObservationTensor(2).hand_sizes = [6.0, 7.0, 0.0, 0.0] +ObservationTensor(3).player: ◯◯◯◉ +ObservationTensor(3).hand = [1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 5.0, 6.0, 1.0] +ObservationTensor(3).last_action = [0.0, 4.0, 0.0, 2.0] +ObservationTensor(3).hand_sizes = [7.0, 6.0, 0.0, 0.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [296, 305] +StringLegalActions() = ["p3 tile:(4.0, 5.0) pip:4.0", "p3 tile:(5.0, 6.0) pip:6.0"] + +# Apply action "p3 tile:(5.0, 6.0) pip:6.0" +action: 305 + +# State 32 +# hand0:['(0.0, 1.0)', '(0.0, 2.0)', '(0.0, 5.0)', '(1.0, 4.0)', '(2.0, 3.0)', '(6.0, 6.0)'] +# hand1:['(1.0, 1.0)', '(1.0, 6.0)', '(2.0, 2.0)', '(2.0, 4.0)', '(2.0, 5.0)', '(3.0, 5.0)'] +# hand2:['(0.0, 3.0)', '(2.0, 6.0)', '(3.0, 4.0)', '(3.0, 6.0)', '(4.0, 4.0)', '(4.0, 6.0)'] +# hand3:['(1.0, 2.0)', '(1.0, 3.0)', '(1.0, 5.0)', '(3.0, 3.0)', '(4.0, 5.0)', '(5.0, 5.0)'] +# +# board: [(5.0, 6.0), (6.0, 0.0), (0.0, 0.0), (0.0, 4.0)] +IsTerminal() = False +History() = [2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305] +HistoryString() = "2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (0.0, 5.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0]" +InformationStateString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0]" +InformationStateString(2) = "p2 hand:[(0.0, 3.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0]" +InformationStateString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0]" +InformationStateTensor(0).player: ◉◯◯◯ +InformationStateTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).player: ◯◉◯◯ +InformationStateTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).player: ◯◯◉◯ +InformationStateTensor(2).hand = [0.0, 3.0, 1.0, 2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).player: ◯◯◯◉ +InformationStateTensor(3).hand = [1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (0.0, 5.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)] last_action:p3 tile:(5.0, 6.0) pip:6.0" +ObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)] last_action:p3 tile:(5.0, 6.0) pip:6.0" +ObservationString(2) = "p2 hand:[(0.0, 3.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] last_action:p3 tile:(5.0, 6.0) pip:6.0" +ObservationString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)] last_action:p3 tile:(5.0, 6.0) pip:6.0" +PublicObservationString() = "p0 last_action:p3 tile:(5.0, 6.0) pip:6.0" +PrivateObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (0.0, 5.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)]" +PrivateObservationString(2) = "p2 hand:[(0.0, 3.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)]" +PrivateObservationString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)]" +ObservationTensor(0).player: ◉◯◯◯ +ObservationTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 5.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action = [5.0, 6.0, 6.0, 3.0] +ObservationTensor(0).hand_sizes = [6.0, 6.0, 0.0, 0.0] +ObservationTensor(1).player: ◯◉◯◯ +ObservationTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(1).last_action = [5.0, 6.0, 6.0, 3.0] +ObservationTensor(1).hand_sizes = [6.0, 6.0, 0.0, 0.0] +ObservationTensor(2).player: ◯◯◉◯ +ObservationTensor(2).hand = [0.0, 3.0, 1.0, 2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(2).last_action = [5.0, 6.0, 6.0, 3.0] +ObservationTensor(2).hand_sizes = [6.0, 6.0, 0.0, 0.0] +ObservationTensor(3).player: ◯◯◯◉ +ObservationTensor(3).hand = [1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(3).last_action = [5.0, 6.0, 6.0, 3.0] +ObservationTensor(3).hand_sizes = [6.0, 6.0, 0.0, 0.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [16, 30] +StringLegalActions() = ["p0 tile:(0.0, 5.0) pip:5.0", "p0 tile:(1.0, 4.0) pip:4.0"] + +# Apply action "p0 tile:(0.0, 5.0) pip:5.0" +action: 16 + +# State 33 +# hand0:['(0.0, 1.0)', '(0.0, 2.0)', '(1.0, 4.0)', '(2.0, 3.0)', '(6.0, 6.0)'] +# hand1:['(1.0, 1.0)', '(1.0, 6.0)', '(2.0, 2.0)', '(2.0, 4.0)', '(2.0, 5.0)', '(3.0, 5.0)'] +# hand2:['(0.0, 3.0)', '(2.0, 6.0)', '(3.0, 4.0)', '(3.0, 6.0)', '(4.0, 4.0)', '(4.0, 6.0)'] +# hand3:['(1.0, 2.0)', '(1.0, 3.0)', '(1.0, 5.0)', '(3.0, 3.0)', '(4.0, 5.0)', '(5.0, 5.0)'] +# +# board: [(0.0, 5.0), (5.0, 6.0), (6.0, 0.0), (0.0, 0.0), (0.0, 4.0)] +IsTerminal() = False +History() = [2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305, 16] +HistoryString() = "2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305, 16" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0]" +InformationStateString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0]" +InformationStateString(2) = "p2 hand:[(0.0, 3.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0]" +InformationStateString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0]" +InformationStateTensor(0).player: ◉◯◯◯ +InformationStateTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).player: ◯◉◯◯ +InformationStateTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).player: ◯◯◉◯ +InformationStateTensor(2).hand = [0.0, 3.0, 1.0, 2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).player: ◯◯◯◉ +InformationStateTensor(3).hand = [1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)] last_action:p0 tile:(0.0, 5.0) pip:5.0" +ObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)] last_action:p0 tile:(0.0, 5.0) pip:5.0" +ObservationString(2) = "p2 hand:[(0.0, 3.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] last_action:p0 tile:(0.0, 5.0) pip:5.0" +ObservationString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)] last_action:p0 tile:(0.0, 5.0) pip:5.0" +PublicObservationString() = "p0 last_action:p0 tile:(0.0, 5.0) pip:5.0" +PrivateObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 4.0), (2.0, 5.0), (3.0, 5.0)]" +PrivateObservationString(2) = "p2 hand:[(0.0, 3.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)]" +PrivateObservationString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)]" +ObservationTensor(0).player: ◉◯◯◯ +ObservationTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action = [0.0, 5.0, 5.0, 0.0] +ObservationTensor(0).hand_sizes = [5.0, 6.0, 0.0, 0.0] +ObservationTensor(1).player: ◯◉◯◯ +ObservationTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 4.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(1).last_action = [0.0, 5.0, 5.0, 0.0] +ObservationTensor(1).hand_sizes = [6.0, 5.0, 0.0, 0.0] +ObservationTensor(2).player: ◯◯◉◯ +ObservationTensor(2).hand = [0.0, 3.0, 1.0, 2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(2).last_action = [0.0, 5.0, 5.0, 0.0] +ObservationTensor(2).hand_sizes = [6.0, 6.0, 0.0, 0.0] +ObservationTensor(3).player: ◯◯◯◉ +ObservationTensor(3).hand = [1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(3).last_action = [0.0, 5.0, 5.0, 0.0] +ObservationTensor(3).hand_sizes = [6.0, 6.0, 0.0, 0.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [121] +StringLegalActions() = ["p1 tile:(2.0, 4.0) pip:4.0"] + +# Apply action "p1 tile:(2.0, 4.0) pip:4.0" +action: 121 + +# State 34 +# hand0:['(0.0, 1.0)', '(0.0, 2.0)', '(1.0, 4.0)', '(2.0, 3.0)', '(6.0, 6.0)'] +# hand1:['(1.0, 1.0)', '(1.0, 6.0)', '(2.0, 2.0)', '(2.0, 5.0)', '(3.0, 5.0)'] +# hand2:['(0.0, 3.0)', '(2.0, 6.0)', '(3.0, 4.0)', '(3.0, 6.0)', '(4.0, 4.0)', '(4.0, 6.0)'] +# hand3:['(1.0, 2.0)', '(1.0, 3.0)', '(1.0, 5.0)', '(3.0, 3.0)', '(4.0, 5.0)', '(5.0, 5.0)'] +# +# board: [(0.0, 5.0), (5.0, 6.0), (6.0, 0.0), (0.0, 0.0), (0.0, 4.0), (4.0, 2.0)] +IsTerminal() = False +History() = [2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305, 16, 121] +HistoryString() = "2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305, 16, 121" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0]" +InformationStateString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0), (3.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0]" +InformationStateString(2) = "p2 hand:[(0.0, 3.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0]" +InformationStateString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0]" +InformationStateTensor(0).player: ◉◯◯◯ +InformationStateTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).player: ◯◉◯◯ +InformationStateTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).player: ◯◯◉◯ +InformationStateTensor(2).hand = [0.0, 3.0, 1.0, 2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).player: ◯◯◯◉ +InformationStateTensor(3).hand = [1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)] last_action:p1 tile:(2.0, 4.0) pip:4.0" +ObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0), (3.0, 5.0)] last_action:p1 tile:(2.0, 4.0) pip:4.0" +ObservationString(2) = "p2 hand:[(0.0, 3.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] last_action:p1 tile:(2.0, 4.0) pip:4.0" +ObservationString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)] last_action:p1 tile:(2.0, 4.0) pip:4.0" +PublicObservationString() = "p0 last_action:p1 tile:(2.0, 4.0) pip:4.0" +PrivateObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0), (3.0, 5.0)]" +PrivateObservationString(2) = "p2 hand:[(0.0, 3.0), (2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)]" +PrivateObservationString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)]" +ObservationTensor(0).player: ◉◯◯◯ +ObservationTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action = [2.0, 4.0, 4.0, 1.0] +ObservationTensor(0).hand_sizes = [5.0, 5.0, 0.0, 0.0] +ObservationTensor(1).player: ◯◉◯◯ +ObservationTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1).last_action = [2.0, 4.0, 4.0, 1.0] +ObservationTensor(1).hand_sizes = [5.0, 5.0, 0.0, 0.0] +ObservationTensor(2).player: ◯◯◉◯ +ObservationTensor(2).hand = [0.0, 3.0, 1.0, 2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(2).last_action = [2.0, 4.0, 4.0, 1.0] +ObservationTensor(2).hand_sizes = [6.0, 6.0, 0.0, 0.0] +ObservationTensor(3).player: ◯◯◯◉ +ObservationTensor(3).hand = [1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(3).last_action = [2.0, 4.0, 4.0, 1.0] +ObservationTensor(3).hand_sizes = [6.0, 6.0, 0.0, 0.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [163, 203] +StringLegalActions() = ["p2 tile:(0.0, 3.0) pip:0.0", "p2 tile:(2.0, 6.0) pip:2.0"] + +# Apply action "p2 tile:(0.0, 3.0) pip:0.0" +action: 163 + +# State 35 +# hand0:['(0.0, 1.0)', '(0.0, 2.0)', '(1.0, 4.0)', '(2.0, 3.0)', '(6.0, 6.0)'] +# hand1:['(1.0, 1.0)', '(1.0, 6.0)', '(2.0, 2.0)', '(2.0, 5.0)', '(3.0, 5.0)'] +# hand2:['(2.0, 6.0)', '(3.0, 4.0)', '(3.0, 6.0)', '(4.0, 4.0)', '(4.0, 6.0)'] +# hand3:['(1.0, 2.0)', '(1.0, 3.0)', '(1.0, 5.0)', '(3.0, 3.0)', '(4.0, 5.0)', '(5.0, 5.0)'] +# +# board: [(3.0, 0.0), (0.0, 5.0), (5.0, 6.0), (6.0, 0.0), (0.0, 0.0), (0.0, 4.0), (4.0, 2.0)] +IsTerminal() = False +History() = [2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305, 16, 121, 163] +HistoryString() = "2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305, 16, 121, 163" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0]" +InformationStateString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0), (3.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0]" +InformationStateString(2) = "p2 hand:[(2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0]" +InformationStateString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0]" +InformationStateTensor(0).player: ◉◯◯◯ +InformationStateTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).player: ◯◉◯◯ +InformationStateTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).player: ◯◯◉◯ +InformationStateTensor(2).hand = [2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).player: ◯◯◯◉ +InformationStateTensor(3).hand = [1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)] last_action:p2 tile:(0.0, 3.0) pip:0.0" +ObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0), (3.0, 5.0)] last_action:p2 tile:(0.0, 3.0) pip:0.0" +ObservationString(2) = "p2 hand:[(2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] last_action:p2 tile:(0.0, 3.0) pip:0.0" +ObservationString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)] last_action:p2 tile:(0.0, 3.0) pip:0.0" +PublicObservationString() = "p0 last_action:p2 tile:(0.0, 3.0) pip:0.0" +PrivateObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0), (3.0, 5.0)]" +PrivateObservationString(2) = "p2 hand:[(2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)]" +PrivateObservationString(3) = "p3 hand:[(1.0, 2.0), (1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)]" +ObservationTensor(0).player: ◉◯◯◯ +ObservationTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action = [0.0, 3.0, 0.0, 2.0] +ObservationTensor(0).hand_sizes = [5.0, 5.0, 0.0, 0.0] +ObservationTensor(1).player: ◯◉◯◯ +ObservationTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1).last_action = [0.0, 3.0, 0.0, 2.0] +ObservationTensor(1).hand_sizes = [5.0, 5.0, 0.0, 0.0] +ObservationTensor(2).player: ◯◯◉◯ +ObservationTensor(2).hand = [2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(2).last_action = [0.0, 3.0, 0.0, 2.0] +ObservationTensor(2).hand_sizes = [5.0, 6.0, 0.0, 0.0] +ObservationTensor(3).player: ◯◯◯◉ +ObservationTensor(3).hand = [1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0] +ObservationTensor(3).last_action = [0.0, 3.0, 0.0, 2.0] +ObservationTensor(3).hand_sizes = [6.0, 5.0, 0.0, 0.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [255, 258, 283] +StringLegalActions() = ["p3 tile:(1.0, 2.0) pip:2.0", "p3 tile:(1.0, 3.0) pip:3.0", "p3 tile:(3.0, 3.0) pip:3.0"] + +# Apply action "p3 tile:(1.0, 2.0) pip:2.0" +action: 255 + +# State 36 +# hand0:['(0.0, 1.0)', '(0.0, 2.0)', '(1.0, 4.0)', '(2.0, 3.0)', '(6.0, 6.0)'] +# hand1:['(1.0, 1.0)', '(1.0, 6.0)', '(2.0, 2.0)', '(2.0, 5.0)', '(3.0, 5.0)'] +# hand2:['(2.0, 6.0)', '(3.0, 4.0)', '(3.0, 6.0)', '(4.0, 4.0)', '(4.0, 6.0)'] +# hand3:['(1.0, 3.0)', '(1.0, 5.0)', '(3.0, 3.0)', '(4.0, 5.0)', '(5.0, 5.0)'] +# +# board: [(3.0, 0.0), (0.0, 5.0), (5.0, 6.0), (6.0, 0.0), (0.0, 0.0), (0.0, 4.0), (4.0, 2.0), (2.0, 1.0)] +IsTerminal() = False +History() = [2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305, 16, 121, 163, 255] +HistoryString() = "2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305, 16, 121, 163, 255" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0]" +InformationStateString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0), (3.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0]" +InformationStateString(2) = "p2 hand:[(2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0]" +InformationStateString(3) = "p3 hand:[(1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0]" +InformationStateTensor(0).player: ◉◯◯◯ +InformationStateTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).player: ◯◉◯◯ +InformationStateTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).player: ◯◯◉◯ +InformationStateTensor(2).hand = [2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).player: ◯◯◯◉ +InformationStateTensor(3).hand = [1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)] last_action:p3 tile:(1.0, 2.0) pip:2.0" +ObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0), (3.0, 5.0)] last_action:p3 tile:(1.0, 2.0) pip:2.0" +ObservationString(2) = "p2 hand:[(2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] last_action:p3 tile:(1.0, 2.0) pip:2.0" +ObservationString(3) = "p3 hand:[(1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)] last_action:p3 tile:(1.0, 2.0) pip:2.0" +PublicObservationString() = "p0 last_action:p3 tile:(1.0, 2.0) pip:2.0" +PrivateObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (1.0, 4.0), (2.0, 3.0), (6.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0), (3.0, 5.0)]" +PrivateObservationString(2) = "p2 hand:[(2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)]" +PrivateObservationString(3) = "p3 hand:[(1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)]" +ObservationTensor(0).player: ◉◯◯◯ +ObservationTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action = [1.0, 2.0, 2.0, 3.0] +ObservationTensor(0).hand_sizes = [5.0, 5.0, 0.0, 0.0] +ObservationTensor(1).player: ◯◉◯◯ +ObservationTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1).last_action = [1.0, 2.0, 2.0, 3.0] +ObservationTensor(1).hand_sizes = [5.0, 5.0, 0.0, 0.0] +ObservationTensor(2).player: ◯◯◉◯ +ObservationTensor(2).hand = [2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(2).last_action = [1.0, 2.0, 2.0, 3.0] +ObservationTensor(2).hand_sizes = [5.0, 5.0, 0.0, 0.0] +ObservationTensor(3).player: ◯◯◯◉ +ObservationTensor(3).hand = [1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(3).last_action = [1.0, 2.0, 2.0, 3.0] +ObservationTensor(3).hand_sizes = [5.0, 5.0, 0.0, 0.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [4, 29, 41] +StringLegalActions() = ["p0 tile:(0.0, 1.0) pip:1.0", "p0 tile:(1.0, 4.0) pip:1.0", "p0 tile:(2.0, 3.0) pip:3.0"] + +# Apply action "p0 tile:(1.0, 4.0) pip:1.0" +action: 29 + +# State 37 +# hand0:['(0.0, 1.0)', '(0.0, 2.0)', '(2.0, 3.0)', '(6.0, 6.0)'] +# hand1:['(1.0, 1.0)', '(1.0, 6.0)', '(2.0, 2.0)', '(2.0, 5.0)', '(3.0, 5.0)'] +# hand2:['(2.0, 6.0)', '(3.0, 4.0)', '(3.0, 6.0)', '(4.0, 4.0)', '(4.0, 6.0)'] +# hand3:['(1.0, 3.0)', '(1.0, 5.0)', '(3.0, 3.0)', '(4.0, 5.0)', '(5.0, 5.0)'] +# +# board: [(3.0, 0.0), (0.0, 5.0), (5.0, 6.0), (6.0, 0.0), (0.0, 0.0), (0.0, 4.0), (4.0, 2.0), (2.0, 1.0), (1.0, 4.0)] +IsTerminal() = False +History() = [2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305, 16, 121, 163, 255, 29] +HistoryString() = "2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305, 16, 121, 163, 255, 29" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (2.0, 3.0), (6.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0, p0 tile:(1.0, 4.0) pip:1.0]" +InformationStateString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0), (3.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0, p0 tile:(1.0, 4.0) pip:1.0]" +InformationStateString(2) = "p2 hand:[(2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0, p0 tile:(1.0, 4.0) pip:1.0]" +InformationStateString(3) = "p3 hand:[(1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0, p0 tile:(1.0, 4.0) pip:1.0]" +InformationStateTensor(0).player: ◉◯◯◯ +InformationStateTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).player: ◯◉◯◯ +InformationStateTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).player: ◯◯◉◯ +InformationStateTensor(2).hand = [2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).player: ◯◯◯◉ +InformationStateTensor(3).hand = [1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (2.0, 3.0), (6.0, 6.0)] last_action:p0 tile:(1.0, 4.0) pip:1.0" +ObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0), (3.0, 5.0)] last_action:p0 tile:(1.0, 4.0) pip:1.0" +ObservationString(2) = "p2 hand:[(2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] last_action:p0 tile:(1.0, 4.0) pip:1.0" +ObservationString(3) = "p3 hand:[(1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)] last_action:p0 tile:(1.0, 4.0) pip:1.0" +PublicObservationString() = "p0 last_action:p0 tile:(1.0, 4.0) pip:1.0" +PrivateObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (2.0, 3.0), (6.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0), (3.0, 5.0)]" +PrivateObservationString(2) = "p2 hand:[(2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)]" +PrivateObservationString(3) = "p3 hand:[(1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)]" +ObservationTensor(0).player: ◉◯◯◯ +ObservationTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action = [1.0, 4.0, 1.0, 0.0] +ObservationTensor(0).hand_sizes = [4.0, 5.0, 0.0, 0.0] +ObservationTensor(1).player: ◯◉◯◯ +ObservationTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 5.0, 1.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1).last_action = [1.0, 4.0, 1.0, 0.0] +ObservationTensor(1).hand_sizes = [5.0, 4.0, 0.0, 0.0] +ObservationTensor(2).player: ◯◯◉◯ +ObservationTensor(2).hand = [2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(2).last_action = [1.0, 4.0, 1.0, 0.0] +ObservationTensor(2).hand_sizes = [5.0, 5.0, 0.0, 0.0] +ObservationTensor(3).player: ◯◯◯◉ +ObservationTensor(3).hand = [1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(3).last_action = [1.0, 4.0, 1.0, 0.0] +ObservationTensor(3).hand_sizes = [5.0, 5.0, 0.0, 0.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [134] +StringLegalActions() = ["p1 tile:(3.0, 5.0) pip:3.0"] + +# Apply action "p1 tile:(3.0, 5.0) pip:3.0" +action: 134 + +# State 38 +# hand0:['(0.0, 1.0)', '(0.0, 2.0)', '(2.0, 3.0)', '(6.0, 6.0)'] +# hand1:['(1.0, 1.0)', '(1.0, 6.0)', '(2.0, 2.0)', '(2.0, 5.0)'] +# hand2:['(2.0, 6.0)', '(3.0, 4.0)', '(3.0, 6.0)', '(4.0, 4.0)', '(4.0, 6.0)'] +# hand3:['(1.0, 3.0)', '(1.0, 5.0)', '(3.0, 3.0)', '(4.0, 5.0)', '(5.0, 5.0)'] +# +# board: [(5.0, 3.0), (3.0, 0.0), (0.0, 5.0), (5.0, 6.0), (6.0, 0.0), (0.0, 0.0), (0.0, 4.0), (4.0, 2.0), (2.0, 1.0), (1.0, 4.0)] +IsTerminal() = False +History() = [2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305, 16, 121, 163, 255, 29, 134] +HistoryString() = "2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305, 16, 121, 163, 255, 29, 134" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (2.0, 3.0), (6.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0, p0 tile:(1.0, 4.0) pip:1.0, p1 tile:(3.0, 5.0) pip:3.0]" +InformationStateString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0, p0 tile:(1.0, 4.0) pip:1.0, p1 tile:(3.0, 5.0) pip:3.0]" +InformationStateString(2) = "p2 hand:[(2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0, p0 tile:(1.0, 4.0) pip:1.0, p1 tile:(3.0, 5.0) pip:3.0]" +InformationStateString(3) = "p3 hand:[(1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0, p0 tile:(1.0, 4.0) pip:1.0, p1 tile:(3.0, 5.0) pip:3.0]" +InformationStateTensor(0).player: ◉◯◯◯ +InformationStateTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).player: ◯◉◯◯ +InformationStateTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).player: ◯◯◉◯ +InformationStateTensor(2).hand = [2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).player: ◯◯◯◉ +InformationStateTensor(3).hand = [1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (2.0, 3.0), (6.0, 6.0)] last_action:p1 tile:(3.0, 5.0) pip:3.0" +ObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0)] last_action:p1 tile:(3.0, 5.0) pip:3.0" +ObservationString(2) = "p2 hand:[(2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)] last_action:p1 tile:(3.0, 5.0) pip:3.0" +ObservationString(3) = "p3 hand:[(1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)] last_action:p1 tile:(3.0, 5.0) pip:3.0" +PublicObservationString() = "p0 last_action:p1 tile:(3.0, 5.0) pip:3.0" +PrivateObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (2.0, 3.0), (6.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0)]" +PrivateObservationString(2) = "p2 hand:[(2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0), (4.0, 6.0)]" +PrivateObservationString(3) = "p3 hand:[(1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)]" +ObservationTensor(0).player: ◉◯◯◯ +ObservationTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action = [3.0, 5.0, 3.0, 1.0] +ObservationTensor(0).hand_sizes = [4.0, 4.0, 0.0, 0.0] +ObservationTensor(1).player: ◯◉◯◯ +ObservationTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1).last_action = [3.0, 5.0, 3.0, 1.0] +ObservationTensor(1).hand_sizes = [4.0, 4.0, 0.0, 0.0] +ObservationTensor(2).player: ◯◯◉◯ +ObservationTensor(2).hand = [2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 4.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(2).last_action = [3.0, 5.0, 3.0, 1.0] +ObservationTensor(2).hand_sizes = [5.0, 5.0, 0.0, 0.0] +ObservationTensor(3).player: ◯◯◯◉ +ObservationTensor(3).hand = [1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(3).last_action = [3.0, 5.0, 3.0, 1.0] +ObservationTensor(3).hand_sizes = [5.0, 5.0, 0.0, 0.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [209, 217, 222] +StringLegalActions() = ["p2 tile:(3.0, 4.0) pip:4.0", "p2 tile:(4.0, 4.0) pip:4.0", "p2 tile:(4.0, 6.0) pip:4.0"] + +# Apply action "p2 tile:(4.0, 6.0) pip:4.0" +action: 222 + +# State 39 +# hand0:['(0.0, 1.0)', '(0.0, 2.0)', '(2.0, 3.0)', '(6.0, 6.0)'] +# hand1:['(1.0, 1.0)', '(1.0, 6.0)', '(2.0, 2.0)', '(2.0, 5.0)'] +# hand2:['(2.0, 6.0)', '(3.0, 4.0)', '(3.0, 6.0)', '(4.0, 4.0)'] +# hand3:['(1.0, 3.0)', '(1.0, 5.0)', '(3.0, 3.0)', '(4.0, 5.0)', '(5.0, 5.0)'] +# +# board: [(5.0, 3.0), (3.0, 0.0), (0.0, 5.0), (5.0, 6.0), (6.0, 0.0), (0.0, 0.0), (0.0, 4.0), (4.0, 2.0), (2.0, 1.0), (1.0, 4.0), (4.0, 6.0)] +IsTerminal() = False +History() = [2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305, 16, 121, 163, 255, 29, 134, 222] +HistoryString() = "2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305, 16, 121, 163, 255, 29, 134, 222" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (2.0, 3.0), (6.0, 6.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0, p0 tile:(1.0, 4.0) pip:1.0, p1 tile:(3.0, 5.0) pip:3.0, p2 tile:(4.0, 6.0) pip:4.0]" +InformationStateString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0, p0 tile:(1.0, 4.0) pip:1.0, p1 tile:(3.0, 5.0) pip:3.0, p2 tile:(4.0, 6.0) pip:4.0]" +InformationStateString(2) = "p2 hand:[(2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0, p0 tile:(1.0, 4.0) pip:1.0, p1 tile:(3.0, 5.0) pip:3.0, p2 tile:(4.0, 6.0) pip:4.0]" +InformationStateString(3) = "p3 hand:[(1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0, p0 tile:(1.0, 4.0) pip:1.0, p1 tile:(3.0, 5.0) pip:3.0, p2 tile:(4.0, 6.0) pip:4.0]" +InformationStateTensor(0).player: ◉◯◯◯ +InformationStateTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(0).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 1.0, 1.0, 4.0, 6.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).player: ◯◉◯◯ +InformationStateTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 1.0, 1.0, 4.0, 6.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).player: ◯◯◉◯ +InformationStateTensor(2).hand = [2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 1.0, 1.0, 4.0, 6.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).player: ◯◯◯◉ +InformationStateTensor(3).hand = [1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 1.0, 1.0, 4.0, 6.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (2.0, 3.0), (6.0, 6.0)] last_action:p2 tile:(4.0, 6.0) pip:4.0" +ObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0)] last_action:p2 tile:(4.0, 6.0) pip:4.0" +ObservationString(2) = "p2 hand:[(2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0)] last_action:p2 tile:(4.0, 6.0) pip:4.0" +ObservationString(3) = "p3 hand:[(1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)] last_action:p2 tile:(4.0, 6.0) pip:4.0" +PublicObservationString() = "p0 last_action:p2 tile:(4.0, 6.0) pip:4.0" +PrivateObservationString(0) = "p0 hand:[(0.0, 1.0), (0.0, 2.0), (2.0, 3.0), (6.0, 6.0)]" +PrivateObservationString(1) = "p1 hand:[(1.0, 1.0), (1.0, 6.0), (2.0, 2.0), (2.0, 5.0)]" +PrivateObservationString(2) = "p2 hand:[(2.0, 6.0), (3.0, 4.0), (3.0, 6.0), (4.0, 4.0)]" +PrivateObservationString(3) = "p3 hand:[(1.0, 3.0), (1.0, 5.0), (3.0, 3.0), (4.0, 5.0), (5.0, 5.0)]" +ObservationTensor(0).player: ◉◯◯◯ +ObservationTensor(0).hand = [0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 1.0, 6.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(0).last_action = [4.0, 6.0, 4.0, 2.0] +ObservationTensor(0).hand_sizes = [4.0, 4.0, 0.0, 0.0] +ObservationTensor(1).player: ◯◉◯◯ +ObservationTensor(1).hand = [1.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 2.0, 1.0, 2.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1).last_action = [4.0, 6.0, 4.0, 2.0] +ObservationTensor(1).hand_sizes = [4.0, 4.0, 0.0, 0.0] +ObservationTensor(2).player: ◯◯◉◯ +ObservationTensor(2).hand = [2.0, 6.0, 1.0, 3.0, 4.0, 1.0, 3.0, 6.0, 1.0, 4.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(2).last_action = [4.0, 6.0, 4.0, 2.0] +ObservationTensor(2).hand_sizes = [4.0, 5.0, 0.0, 0.0] +ObservationTensor(3).player: ◯◯◯◉ +ObservationTensor(3).hand = [1.0, 3.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 1.0, 4.0, 5.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(3).last_action = [4.0, 6.0, 4.0, 2.0] +ObservationTensor(3).hand_sizes = [5.0, 4.0, 0.0, 0.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [264, 297, 302] +StringLegalActions() = ["p3 tile:(1.0, 5.0) pip:5.0", "p3 tile:(4.0, 5.0) pip:5.0", "p3 tile:(5.0, 5.0) pip:5.0"] + +# Apply action "p3 tile:(4.0, 5.0) pip:5.0" +action: 297 + +# State 40 +# Apply action "p0 tile:(6.0, 6.0) pip:6.0" +action: 76 + +# State 41 +# Apply action "p1 tile:(1.0, 6.0) pip:6.0" +action: 113 + +# State 42 +# Apply action "p2 tile:(3.0, 4.0) pip:4.0" +action: 209 + +# State 43 +# Apply action "p3 tile:(3.0, 3.0) pip:3.0" +action: 283 + +# State 44 +# Apply action "p0 tile:(0.0, 1.0) pip:1.0" +action: 4 + +# State 45 +# Apply action "p2 tile:(3.0, 6.0) pip:3.0" +action: 214 + +# State 46 +# Apply action "p0 tile:(0.0, 2.0) pip:0.0" +action: 6 + +# State 47 +# Apply action "p1 tile:(2.0, 5.0) pip:2.0" +action: 123 + +# State 48 +# Apply action "p2 tile:(2.0, 6.0) pip:6.0" +action: 204 + +# State 49 +# Apply action "p3 tile:(1.0, 5.0) pip:5.0" +action: 264 + +# State 50 +# Apply action "p0 tile:(2.0, 3.0) pip:2.0" +action: 40 + +# State 51 +# hand0:[] +# hand1:['(1.0, 1.0)', '(2.0, 2.0)'] +# hand2:['(4.0, 4.0)'] +# hand3:['(1.0, 3.0)', '(5.0, 5.0)'] +# +# board: [(3.0, 2.0), (2.0, 6.0), (6.0, 3.0), (3.0, 3.0), (3.0, 4.0), (4.0, 5.0), (5.0, 3.0), (3.0, 0.0), (0.0, 5.0), (5.0, 6.0), (6.0, 0.0), (0.0, 0.0), (0.0, 4.0), (4.0, 2.0), (2.0, 1.0), (1.0, 4.0), (4.0, 6.0), (6.0, 6.0), (6.0, 1.0), (1.0, 0.0), (0.0, 2.0), (2.0, 5.0), (5.0, 1.0)] +IsTerminal() = True +History() = [2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305, 16, 121, 163, 255, 29, 134, 222, 297, 76, 113, 209, 283, 4, 214, 6, 123, 204, 264, 40] +HistoryString() = "2, 12, 19, 26, 27, 15, 24, 23, 5, 7, 17, 11, 0, 13, 3, 18, 1, 16, 21, 9, 10, 6, 4, 25, 14, 20, 22, 8, 0, 95, 166, 305, 16, 121, 163, 255, 29, 134, 222, 297, 76, 113, 209, 283, 4, 214, 6, 123, 204, 264, 40" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.TERMINAL +InformationStateString(0) = "p0 hand:[] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0, p0 tile:(1.0, 4.0) pip:1.0, p1 tile:(3.0, 5.0) pip:3.0, p2 tile:(4.0, 6.0) pip:4.0, p3 tile:(4.0, 5.0) pip:5.0, p0 tile:(6.0, 6.0) pip:6.0, p1 tile:(1.0, 6.0) pip:6.0, p2 tile:(3.0, 4.0) pip:4.0, p3 tile:(3.0, 3.0) pip:3.0, p0 tile:(0.0, 1.0) pip:1.0, p2 tile:(3.0, 6.0) pip:3.0, p0 tile:(0.0, 2.0) pip:0.0, p1 tile:(2.0, 5.0) pip:2.0, p2 tile:(2.0, 6.0) pip:6.0, p3 tile:(1.0, 5.0) pip:5.0, p0 tile:(2.0, 3.0) pip:2.0]" +InformationStateString(1) = "p1 hand:[(1.0, 1.0), (2.0, 2.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0, p0 tile:(1.0, 4.0) pip:1.0, p1 tile:(3.0, 5.0) pip:3.0, p2 tile:(4.0, 6.0) pip:4.0, p3 tile:(4.0, 5.0) pip:5.0, p0 tile:(6.0, 6.0) pip:6.0, p1 tile:(1.0, 6.0) pip:6.0, p2 tile:(3.0, 4.0) pip:4.0, p3 tile:(3.0, 3.0) pip:3.0, p0 tile:(0.0, 1.0) pip:1.0, p2 tile:(3.0, 6.0) pip:3.0, p0 tile:(0.0, 2.0) pip:0.0, p1 tile:(2.0, 5.0) pip:2.0, p2 tile:(2.0, 6.0) pip:6.0, p3 tile:(1.0, 5.0) pip:5.0, p0 tile:(2.0, 3.0) pip:2.0]" +InformationStateString(2) = "p2 hand:[(4.0, 4.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0, p0 tile:(1.0, 4.0) pip:1.0, p1 tile:(3.0, 5.0) pip:3.0, p2 tile:(4.0, 6.0) pip:4.0, p3 tile:(4.0, 5.0) pip:5.0, p0 tile:(6.0, 6.0) pip:6.0, p1 tile:(1.0, 6.0) pip:6.0, p2 tile:(3.0, 4.0) pip:4.0, p3 tile:(3.0, 3.0) pip:3.0, p0 tile:(0.0, 1.0) pip:1.0, p2 tile:(3.0, 6.0) pip:3.0, p0 tile:(0.0, 2.0) pip:0.0, p1 tile:(2.0, 5.0) pip:2.0, p2 tile:(2.0, 6.0) pip:6.0, p3 tile:(1.0, 5.0) pip:5.0, p0 tile:(2.0, 3.0) pip:2.0]" +InformationStateString(3) = "p3 hand:[(1.0, 3.0), (5.0, 5.0)] history:[p0 tile:(0.0, 0.0) pip:None, p1 tile:(0.0, 6.0) pip:0.0, p2 tile:(0.0, 4.0) pip:0.0, p3 tile:(5.0, 6.0) pip:6.0, p0 tile:(0.0, 5.0) pip:5.0, p1 tile:(2.0, 4.0) pip:4.0, p2 tile:(0.0, 3.0) pip:0.0, p3 tile:(1.0, 2.0) pip:2.0, p0 tile:(1.0, 4.0) pip:1.0, p1 tile:(3.0, 5.0) pip:3.0, p2 tile:(4.0, 6.0) pip:4.0, p3 tile:(4.0, 5.0) pip:5.0, p0 tile:(6.0, 6.0) pip:6.0, p1 tile:(1.0, 6.0) pip:6.0, p2 tile:(3.0, 4.0) pip:4.0, p3 tile:(3.0, 3.0) pip:3.0, p0 tile:(0.0, 1.0) pip:1.0, p2 tile:(3.0, 6.0) pip:3.0, p0 tile:(0.0, 2.0) pip:0.0, p1 tile:(2.0, 5.0) pip:2.0, p2 tile:(2.0, 6.0) pip:6.0, p3 tile:(1.0, 5.0) pip:5.0, p0 tile:(2.0, 3.0) pip:2.0]" +InformationStateTensor(0).player: ◉◯◯◯ +InformationStateTensor(0).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +InformationStateTensor(0).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 1.0, 1.0, 4.0, 6.0, 4.0, 2.0, 1.0, 4.0, 5.0, 5.0, 3.0, 1.0, 6.0, 6.0, 6.0, 0.0, 1.0, 1.0, 6.0, 6.0, 1.0, 1.0, 3.0, 4.0, 4.0, 2.0, 1.0, 3.0, 3.0, 3.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 6.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 5.0, 2.0, 1.0, 1.0, 2.0, 6.0, 6.0, 2.0, 1.0, 1.0, 5.0, 5.0, 3.0, 1.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).player: ◯◉◯◯ +InformationStateTensor(1).hand = [1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 1.0, 1.0, 4.0, 6.0, 4.0, 2.0, 1.0, 4.0, 5.0, 5.0, 3.0, 1.0, 6.0, 6.0, 6.0, 0.0, 1.0, 1.0, 6.0, 6.0, 1.0, 1.0, 3.0, 4.0, 4.0, 2.0, 1.0, 3.0, 3.0, 3.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 6.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 5.0, 2.0, 1.0, 1.0, 2.0, 6.0, 6.0, 2.0, 1.0, 1.0, 5.0, 5.0, 3.0, 1.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).player: ◯◯◉◯ +InformationStateTensor(2).hand = [4.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(2).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 1.0, 1.0, 4.0, 6.0, 4.0, 2.0, 1.0, 4.0, 5.0, 5.0, 3.0, 1.0, 6.0, 6.0, 6.0, 0.0, 1.0, 1.0, 6.0, 6.0, 1.0, 1.0, 3.0, 4.0, 4.0, 2.0, 1.0, 3.0, 3.0, 3.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 6.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 5.0, 2.0, 1.0, 1.0, 2.0, 6.0, 6.0, 2.0, 1.0, 1.0, 5.0, 5.0, 3.0, 1.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).player: ◯◯◯◉ +InformationStateTensor(3).hand = [1.0, 3.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(3).actions_history = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 6.0, 0.0, 1.0, 1.0, 0.0, 4.0, 0.0, 2.0, 1.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 5.0, 5.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 1.0, 1.0, 4.0, 6.0, 4.0, 2.0, 1.0, 4.0, 5.0, 5.0, 3.0, 1.0, 6.0, 6.0, 6.0, 0.0, 1.0, 1.0, 6.0, 6.0, 1.0, 1.0, 3.0, 4.0, 4.0, 2.0, 1.0, 3.0, 3.0, 3.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 6.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 5.0, 2.0, 1.0, 1.0, 2.0, 6.0, 6.0, 2.0, 1.0, 1.0, 5.0, 5.0, 3.0, 1.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "p0 hand:[] last_action:p0 tile:(2.0, 3.0) pip:2.0" +ObservationString(1) = "p1 hand:[(1.0, 1.0), (2.0, 2.0)] last_action:p0 tile:(2.0, 3.0) pip:2.0" +ObservationString(2) = "p2 hand:[(4.0, 4.0)] last_action:p0 tile:(2.0, 3.0) pip:2.0" +ObservationString(3) = "p3 hand:[(1.0, 3.0), (5.0, 5.0)] last_action:p0 tile:(2.0, 3.0) pip:2.0" +PublicObservationString() = "p0 last_action:p0 tile:(2.0, 3.0) pip:2.0" +PrivateObservationString(0) = "p0 hand:[]" +PrivateObservationString(1) = "p1 hand:[(1.0, 1.0), (2.0, 2.0)]" +PrivateObservationString(2) = "p2 hand:[(4.0, 4.0)]" +PrivateObservationString(3) = "p3 hand:[(1.0, 3.0), (5.0, 5.0)]" +ObservationTensor(0).player: ◉◯◯◯ +ObservationTensor(0).hand: ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ + ◯◯◯ +ObservationTensor(0).last_action = [2.0, 3.0, 2.0, 0.0] +ObservationTensor(0).hand_sizes = [0.0, 2.0, 0.0, 0.0] +ObservationTensor(1).player: ◯◉◯◯ +ObservationTensor(1).hand = [1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1).last_action = [2.0, 3.0, 2.0, 0.0] +ObservationTensor(1).hand_sizes = [2.0, 0.0, 0.0, 0.0] +ObservationTensor(2).player: ◯◯◉◯ +ObservationTensor(2).hand = [4.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(2).last_action = [2.0, 3.0, 2.0, 0.0] +ObservationTensor(2).hand_sizes = [1.0, 2.0, 0.0, 0.0] +ObservationTensor(3).player: ◯◯◯◉ +ObservationTensor(3).hand = [1.0, 3.0, 1.0, 5.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(3).last_action = [2.0, 3.0, 2.0, 0.0] +ObservationTensor(3).hand_sizes = [2.0, 1.0, 0.0, 0.0] +Rewards() = [20, -20, 20, -20] +Returns() = [20, -20, 20, -20] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_tic_tac_toe.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_tic_tac_toe.txt new file mode 100644 index 0000000..c97b9f7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/python_tic_tac_toe.txt @@ -0,0 +1,240 @@ +game: python_tic_tac_toe + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Python Tic-Tac-Toe" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "python_tic_tac_toe" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 9 +PolicyTensorShape() = [9] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [3, 3, 3] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 27 +MaxGameLength() = 9 +ToString() = "python_tic_tac_toe()" + +# State 0 +# ... +# ... +# ... +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "...\n...\n..." +ObservationString(1) = "...\n...\n..." +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,0)", "x(1,1)", "x(1,2)", "x(2,0)", "x(2,1)", "x(2,2)"] + +# Apply action "x(2,2)" +action: 8 + +# State 1 +# ... +# ... +# ..x +IsTerminal() = False +History() = [8] +HistoryString() = "8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "8" +InformationStateString(1) = "8" +ObservationString(0) = "...\n...\n..x" +ObservationString(1) = "...\n...\n..x" +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◯ ◯◯◯ ◯◯◉ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◯ ◯◯◯ ◯◯◉ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(1,0)", "o(1,1)", "o(1,2)", "o(2,0)", "o(2,1)"] + +# Apply action "o(1,0)" +action: 3 + +# State 2 +# ... +# o.. +# ..x +IsTerminal() = False +History() = [8, 3] +HistoryString() = "8, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "8, 3" +InformationStateString(1) = "8, 3" +ObservationString(0) = "...\no..\n..x" +ObservationString(1) = "...\no..\n..x" +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◉◉◯ ◯◯◯ ◯◯◉ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◉◉◯ ◯◯◯ ◯◯◉ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 4, 5, 6, 7] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,1)", "x(1,2)", "x(2,0)", "x(2,1)"] + +# Apply action "x(2,0)" +action: 6 + +# State 3 +# ... +# o.. +# x.x +IsTerminal() = False +History() = [8, 3, 6] +HistoryString() = "8, 3, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "8, 3, 6" +InformationStateString(1) = "8, 3, 6" +ObservationString(0) = "...\no..\nx.x" +ObservationString(1) = "...\no..\nx.x" +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [0, 1, 2, 4, 5, 7] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(1,1)", "o(1,2)", "o(2,1)"] + +# Apply action "o(0,0)" +action: 0 + +# State 4 +# o.. +# o.. +# x.x +IsTerminal() = False +History() = [8, 3, 6, 0] +HistoryString() = "8, 3, 6, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "8, 3, 6, 0" +InformationStateString(1) = "8, 3, 6, 0" +ObservationString(0) = "o..\no..\nx.x" +ObservationString(1) = "o..\no..\nx.x" +ObservationTensor(0): +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +ObservationTensor(1): +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [1, 2, 4, 5, 7] +StringLegalActions() = ["x(0,1)", "x(0,2)", "x(1,1)", "x(1,2)", "x(2,1)"] + +# Apply action "x(0,2)" +action: 2 + +# State 5 +# o.x +# o.. +# x.x +IsTerminal() = False +History() = [8, 3, 6, 0, 2] +HistoryString() = "8, 3, 6, 0, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "8, 3, 6, 0, 2" +InformationStateString(1) = "8, 3, 6, 0, 2" +ObservationString(0) = "o.x\no..\nx.x" +ObservationString(1) = "o.x\no..\nx.x" +ObservationTensor(0): +◯◉◯ ◉◯◯ ◯◯◉ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +ObservationTensor(1): +◯◉◯ ◉◯◯ ◯◯◉ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +Rewards() = [0, 0] +Returns() = [0, -0] +LegalActions() = [1, 4, 5, 7] +StringLegalActions() = ["o(0,1)", "o(1,1)", "o(1,2)", "o(2,1)"] + +# Apply action "o(0,1)" +action: 1 + +# State 6 +# Apply action "x(1,2)" +action: 5 + +# State 7 +# oox +# o.x +# x.x +IsTerminal() = True +History() = [8, 3, 6, 0, 2, 1, 5] +HistoryString() = "8, 3, 6, 0, 2, 1, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = PlayerId.TERMINAL +InformationStateString(0) = "8, 3, 6, 0, 2, 1, 5" +InformationStateString(1) = "8, 3, 6, 0, 2, 1, 5" +ObservationString(0) = "oox\no.x\nx.x" +ObservationString(1) = "oox\no.x\nx.x" +ObservationTensor(0): +◯◯◯ ◉◉◯ ◯◯◉ +◯◉◯ ◉◯◯ ◯◯◉ +◯◉◯ ◯◯◯ ◉◯◉ +ObservationTensor(1): +◯◯◯ ◉◉◯ ◯◯◉ +◯◉◯ ◉◯◯ ◯◯◉ +◯◉◯ ◯◯◯ ◉◯◉ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/quoridor(board_size=5).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/quoridor(board_size=5).txt new file mode 100644 index 0000000..9284464 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/quoridor(board_size=5).txt @@ -0,0 +1,1063 @@ +game: quoridor(board_size=5) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Quoridor" +GameType.max_num_players = 4 +GameType.min_num_players = 2 +GameType.parameter_specification = ["ansi_color_output", "board_size", "players", "wall_count"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "quoridor" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 81 +PolicyTensorShape() = [81] +MaxChanceOutcomes() = 0 +GetParameters() = {ansi_color_output=False,board_size=5,players=2,wall_count=3} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [5, 9, 9] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 405 +MaxGameLength() = 100 +ToString() = "quoridor(board_size=5)" + +# State 0 +# Board size: 5, walls: 3, 3 +# a b c d e +# 1 . . @ . . 1 +# +# 2 . . . . . 2 +# +# 3 . . . . . 3 +# +# 4 . . . . . 4 +# +# 5 . . 0 . . 5 +# a b c d e +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "Board size: 5, walls: 3, 3\n a b c d e\n 1 . . @ . . 1\n \n 2 . . . . . 2\n \n 3 . . . . . 3\n \n 4 . . . . . 4\n \n 5 . . 0 . . 5\n a b c d e\n" +ObservationString(1) = "Board size: 5, walls: 3, 3\n a b c d e\n 1 . . @ . . 1\n \n 2 . . . . . 2\n \n 3 . . . . . 3\n \n 4 . . . . . 4\n \n 5 . . 0 . . 5\n a b c d e\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 3, 5, 7, 9, 11, 13, 15, 18, 19, 21, 22, 23, 25, 27, 29, 31, 33, 37, 39, 41, 43, 45, 47, 49, 51, 55, 57, 59, 61, 63, 65, 67, 69] +StringLegalActions() = ["a1v", "c4", "b1v", "c1v", "d1v", "a1h", "b1h", "c1h", "d1h", "b5", "a2v", "b2v", "d5", "c2v", "d2v", "a2h", "b2h", "c2h", "d2h", "a3v", "b3v", "c3v", "d3v", "a3h", "b3h", "c3h", "d3h", "a4v", "b4v", "c4v", "d4v", "a4h", "b4h", "c4h", "d4h"] + +# Apply action "a4h" +action: 63 + +# State 1 +# Board size: 5, walls: 2, 3 +# a b c d e +# 1 . . @ . . 1 +# +# 2 . . . . . 2 +# +# 3 . . . . . 3 +# +# 4 . . . . . 4 +# ---+--- +# 5 . . 0 . . 5 +# a b c d e +IsTerminal() = False +History() = [63] +HistoryString() = "63" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "63" +InformationStateString(1) = "63" +ObservationString(0) = "Board size: 5, walls: 2, 3\n a b c d e\n 1 . . @ . . 1\n \n 2 . . . . . 2\n \n 3 . . . . . 3\n \n 4 . . . . . 4\n ---+--- \n 5 . . 0 . . 5\n a b c d e\n" +ObservationString(1) = "Board size: 5, walls: 2, 3\n a b c d e\n 1 . . @ . . 1\n \n 2 . . . . . 2\n \n 3 . . . . . 3\n \n 4 . . . . . 4\n ---+--- \n 5 . . 0 . . 5\n a b c d e\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 3, 5, 7, 9, 11, 13, 15, 18, 19, 21, 22, 23, 25, 27, 29, 31, 33, 37, 38, 39, 41, 43, 45, 47, 49, 51, 57, 59, 61, 67, 69] +StringLegalActions() = ["a1v", "b1v", "c1v", "d1v", "a1h", "b1h", "c1h", "d1h", "b1", "a2v", "b2v", "d1", "c2v", "d2v", "a2h", "b2h", "c2h", "d2h", "a3v", "c2", "b3v", "c3v", "d3v", "a3h", "b3h", "c3h", "d3h", "b4v", "c4v", "d4v", "c4h", "d4h"] + +# Apply action "c4v" +action: 59 + +# State 2 +# Board size: 5, walls: 2, 2 +# a b c d e +# 1 . . @ . . 1 +# +# 2 . . . . . 2 +# +# 3 . . . . . 3 +# +# 4 . . . | . . 4 +# ---+--- + +# 5 . . 0 | . . 5 +# a b c d e +IsTerminal() = False +History() = [63, 59] +HistoryString() = "63, 59" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "63, 59" +InformationStateString(1) = "63, 59" +ObservationString(0) = "Board size: 5, walls: 2, 2\n a b c d e\n 1 . . @ . . 1\n \n 2 . . . . . 2\n \n 3 . . . . . 3\n \n 4 . . . | . . 4\n ---+--- + \n 5 . . 0 | . . 5\n a b c d e\n" +ObservationString(1) = "Board size: 5, walls: 2, 2\n a b c d e\n 1 . . @ . . 1\n \n 2 . . . . . 2\n \n 3 . . . . . 3\n \n 4 . . . | . . 4\n ---+--- + \n 5 . . 0 | . . 5\n a b c d e\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 3, 5, 7, 9, 11, 13, 15, 18, 19, 21, 23, 25, 27, 29, 31, 33, 37, 39, 43, 45, 47, 49, 51, 57, 61, 69] +StringLegalActions() = ["a1v", "c4", "b1v", "c1v", "d1v", "a1h", "b1h", "c1h", "d1h", "b5", "a2v", "b2v", "c2v", "d2v", "a2h", "b2h", "c2h", "d2h", "a3v", "b3v", "d3v", "a3h", "b3h", "c3h", "d3h", "b4v", "d4v", "d4h"] + +# Apply action "d1v" +action: 7 + +# State 3 +# Board size: 5, walls: 1, 2 +# a b c d e +# 1 . . @ . | . 1 +# + +# 2 . . . . | . 2 +# +# 3 . . . . . 3 +# +# 4 . . . | . . 4 +# ---+--- + +# 5 . . 0 | . . 5 +# a b c d e +IsTerminal() = False +History() = [63, 59, 7] +HistoryString() = "63, 59, 7" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "63, 59, 7" +InformationStateString(1) = "63, 59, 7" +ObservationString(0) = "Board size: 5, walls: 1, 2\n a b c d e\n 1 . . @ . | . 1\n + \n 2 . . . . | . 2\n \n 3 . . . . . 3\n \n 4 . . . | . . 4\n ---+--- + \n 5 . . 0 | . . 5\n a b c d e\n" +ObservationString(1) = "Board size: 5, walls: 1, 2\n a b c d e\n 1 . . @ . | . 1\n + \n 2 . . . . | . 2\n \n 3 . . . . . 3\n \n 4 . . . | . . 4\n ---+--- + \n 5 . . 0 | . . 5\n a b c d e\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 3, 5, 9, 11, 13, 18, 19, 21, 22, 23, 27, 29, 31, 33, 37, 38, 39, 43, 45, 47, 49, 51, 57, 61, 69] +StringLegalActions() = ["a1v", "b1v", "c1v", "a1h", "b1h", "c1h", "b1", "a2v", "b2v", "d1", "c2v", "a2h", "b2h", "c2h", "d2h", "a3v", "c2", "b3v", "d3v", "a3h", "b3h", "c3h", "d3h", "b4v", "d4v", "d4h"] + +# Apply action "c3h" +action: 49 + +# State 4 +# Board size: 5, walls: 1, 1 +# a b c d e +# 1 . . @ . | . 1 +# + +# 2 . . . . | . 2 +# +# 3 . . . . . 3 +# ---+--- +# 4 . . . | . . 4 +# ---+--- + +# 5 . . 0 | . . 5 +# a b c d e +IsTerminal() = False +History() = [63, 59, 7, 49] +HistoryString() = "63, 59, 7, 49" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "63, 59, 7, 49" +InformationStateString(1) = "63, 59, 7, 49" +ObservationString(0) = "Board size: 5, walls: 1, 1\n a b c d e\n 1 . . @ . | . 1\n + \n 2 . . . . | . 2\n \n 3 . . . . . 3\n ---+--- \n 4 . . . | . . 4\n ---+--- + \n 5 . . 0 | . . 5\n a b c d e\n" +ObservationString(1) = "Board size: 5, walls: 1, 1\n a b c d e\n 1 . . @ . | . 1\n + \n 2 . . . . | . 2\n \n 3 . . . . . 3\n ---+--- \n 4 . . . | . . 4\n ---+--- + \n 5 . . 0 | . . 5\n a b c d e\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ ◉◉◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 3, 5, 9, 11, 13, 18, 19, 21, 23, 27, 29, 31, 33, 37, 43, 61, 69] +StringLegalActions() = ["a1v", "c4", "b1v", "c1v", "a1h", "b1h", "c1h", "b5", "a2v", "b2v", "c2v", "a2h", "b2h", "c2h", "d2h", "a3v", "d3v", "d4v", "d4h"] + +# Apply action "d4v" +action: 61 + +# State 5 +# Board size: 5, walls: 0, 1 +# a b c d e +# 1 . . @ . | . 1 +# + +# 2 . . . . | . 2 +# +# 3 . . . . . 3 +# ---+--- +# 4 . . . | . | . 4 +# ---+--- + + +# 5 . . 0 | . | . 5 +# a b c d e +IsTerminal() = False +History() = [63, 59, 7, 49, 61] +HistoryString() = "63, 59, 7, 49, 61" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "63, 59, 7, 49, 61" +InformationStateString(1) = "63, 59, 7, 49, 61" +ObservationString(0) = "Board size: 5, walls: 0, 1\n a b c d e\n 1 . . @ . | . 1\n + \n 2 . . . . | . 2\n \n 3 . . . . . 3\n ---+--- \n 4 . . . | . | . 4\n ---+--- + + \n 5 . . 0 | . | . 5\n a b c d e\n" +ObservationString(1) = "Board size: 5, walls: 0, 1\n a b c d e\n 1 . . @ . | . 1\n + \n 2 . . . . | . 2\n \n 3 . . . . . 3\n ---+--- \n 4 . . . | . | . 4\n ---+--- + + \n 5 . . 0 | . | . 5\n a b c d e\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 3, 5, 9, 11, 13, 18, 19, 21, 22, 23, 27, 29, 31, 33, 37, 38] +StringLegalActions() = ["a1v", "b1v", "c1v", "a1h", "b1h", "c1h", "b1", "a2v", "b2v", "d1", "c2v", "a2h", "b2h", "c2h", "d2h", "a3v", "c2"] + +# Apply action "c2h" +action: 31 + +# State 6 +# Apply action "b5" +action: 18 + +# State 7 +# Apply action "b1" +action: 18 + +# State 8 +# Apply action "c5" +action: 22 + +# State 9 +# Apply action "a1" +action: 18 + +# State 10 +# Apply action "c4" +action: 2 + +# State 11 +# Apply action "b1" +action: 22 + +# State 12 +# Apply action "c5" +action: 38 + +# State 13 +# Apply action "c1" +action: 22 + +# State 14 +# Apply action "c4" +action: 2 + +# State 15 +# Apply action "b1" +action: 18 + +# State 16 +# Apply action "c5" +action: 38 + +# State 17 +# Apply action "a1" +action: 18 + +# State 18 +# Apply action "b5" +action: 18 + +# State 19 +# Apply action "a2" +action: 38 + +# State 20 +# Board size: 5, walls: 0, 0 +# a b c d e +# 1 . . . . | . 1 +# + +# 2 @ . . . | . 2 +# ---+--- +# 3 . . . . . 3 +# ---+--- +# 4 . . . | . | . 4 +# ---+--- + + +# 5 . 0 . | . | . 5 +# a b c d e +IsTerminal() = False +History() = [63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38] +HistoryString() = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38" +InformationStateString(1) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38" +ObservationString(0) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . . . . | . 1\n + \n 2 @ . . . | . 2\n ---+--- \n 3 . . . . . 3\n ---+--- \n 4 . . . | . | . 4\n ---+--- + + \n 5 . 0 . | . | . 5\n a b c d e\n" +ObservationString(1) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . . . . | . 1\n + \n 2 @ . . . | . 2\n ---+--- \n 3 . . . . . 3\n ---+--- \n 4 . . . | . | . 4\n ---+--- + + \n 5 . 0 . | . | . 5\n a b c d e\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [18, 22] +StringLegalActions() = ["a5", "c5"] + +# Apply action "a5" +action: 18 + +# State 21 +# Board size: 5, walls: 0, 0 +# a b c d e +# 1 . . . . | . 1 +# + +# 2 @ . . . | . 2 +# ---+--- +# 3 . . . . . 3 +# ---+--- +# 4 . . . | . | . 4 +# ---+--- + + +# 5 0 . . | . | . 5 +# a b c d e +IsTerminal() = False +History() = [63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18] +HistoryString() = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18" +InformationStateString(1) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18" +ObservationString(0) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . . . . | . 1\n + \n 2 @ . . . | . 2\n ---+--- \n 3 . . . . . 3\n ---+--- \n 4 . . . | . | . 4\n ---+--- + + \n 5 0 . . | . | . 5\n a b c d e\n" +ObservationString(1) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . . . . | . 1\n + \n 2 @ . . . | . 2\n ---+--- \n 3 . . . . . 3\n ---+--- \n 4 . . . | . | . 4\n ---+--- + + \n 5 0 . . | . | . 5\n a b c d e\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2, 22, 38] +StringLegalActions() = ["a1", "b2", "a3"] + +# Apply action "b2" +action: 22 + +# State 22 +# Apply action "b5" +action: 22 + +# State 23 +# Apply action "c2" +action: 22 + +# State 24 +# Apply action "a5" +action: 18 + +# State 25 +# Apply action "d2" +action: 22 + +# State 26 +# Apply action "b5" +action: 22 + +# State 27 +# Apply action "c2" +action: 18 + +# State 28 +# Apply action "c5" +action: 22 + +# State 29 +# Apply action "b2" +action: 18 + +# State 30 +# Apply action "b5" +action: 18 + +# State 31 +# Apply action "b3" +action: 38 + +# State 32 +# Apply action "a5" +action: 18 + +# State 33 +# Apply action "c3" +action: 22 + +# State 34 +# Apply action "b5" +action: 22 + +# State 35 +# Apply action "d3" +action: 22 + +# State 36 +# Apply action "c5" +action: 22 + +# State 37 +# Apply action "e3" +action: 22 + +# State 38 +# Apply action "b5" +action: 18 + +# State 39 +# Apply action "e4" +action: 38 + +# State 40 +# Board size: 5, walls: 0, 0 +# a b c d e +# 1 . . . . | . 1 +# + +# 2 . . . . | . 2 +# ---+--- +# 3 . . . . . 3 +# ---+--- +# 4 . . . | . | @ 4 +# ---+--- + + +# 5 . 0 . | . | . 5 +# a b c d e +IsTerminal() = False +History() = [63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38] +HistoryString() = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38" +InformationStateString(1) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38" +ObservationString(0) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . . . . | . 1\n + \n 2 . . . . | . 2\n ---+--- \n 3 . . . . . 3\n ---+--- \n 4 . . . | . | @ 4\n ---+--- + + \n 5 . 0 . | . | . 5\n a b c d e\n" +ObservationString(1) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . . . . | . 1\n + \n 2 . . . . | . 2\n ---+--- \n 3 . . . . . 3\n ---+--- \n 4 . . . | . | @ 4\n ---+--- + + \n 5 . 0 . | . | . 5\n a b c d e\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [18, 22] +StringLegalActions() = ["a5", "c5"] + +# Apply action "a5" +action: 18 + +# State 41 +# Board size: 5, walls: 0, 0 +# a b c d e +# 1 . . . . | . 1 +# + +# 2 . . . . | . 2 +# ---+--- +# 3 . . . . . 3 +# ---+--- +# 4 . . . | . | @ 4 +# ---+--- + + +# 5 0 . . | . | . 5 +# a b c d e +IsTerminal() = False +History() = [63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18] +HistoryString() = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18" +InformationStateString(1) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18" +ObservationString(0) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . . . . | . 1\n + \n 2 . . . . | . 2\n ---+--- \n 3 . . . . . 3\n ---+--- \n 4 . . . | . | @ 4\n ---+--- + + \n 5 0 . . | . | . 5\n a b c d e\n" +ObservationString(1) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . . . . | . 1\n + \n 2 . . . . | . 2\n ---+--- \n 3 . . . . . 3\n ---+--- \n 4 . . . | . | @ 4\n ---+--- + + \n 5 0 . . | . | . 5\n a b c d e\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2, 38] +StringLegalActions() = ["e3", "e5"] + +# Apply action "e3" +action: 2 + +# State 42 +# Apply action "b5" +action: 22 + +# State 43 +# Apply action "e2" +action: 2 + +# State 44 +# Apply action "c5" +action: 22 + +# State 45 +# Apply action "e1" +action: 2 + +# State 46 +# Apply action "c4" +action: 2 + +# State 47 +# Apply action "e2" +action: 38 + +# State 48 +# Apply action "c5" +action: 38 + +# State 49 +# Apply action "e1" +action: 2 + +# State 50 +# Apply action "b5" +action: 18 + +# State 51 +# Apply action "e2" +action: 38 + +# State 52 +# Apply action "a5" +action: 18 + +# State 53 +# Apply action "e3" +action: 38 + +# State 54 +# Apply action "b5" +action: 22 + +# State 55 +# Apply action "d3" +action: 18 + +# State 56 +# Apply action "c5" +action: 22 + +# State 57 +# Apply action "e3" +action: 22 + +# State 58 +# Apply action "b5" +action: 18 + +# State 59 +# Apply action "d3" +action: 18 + +# State 60 +# Board size: 5, walls: 0, 0 +# a b c d e +# 1 . . . . | . 1 +# + +# 2 . . . . | . 2 +# ---+--- +# 3 . . . @ . 3 +# ---+--- +# 4 . . . | . | . 4 +# ---+--- + + +# 5 . 0 . | . | . 5 +# a b c d e +IsTerminal() = False +History() = [63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18] +HistoryString() = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18" +InformationStateString(1) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18" +ObservationString(0) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . . . . | . 1\n + \n 2 . . . . | . 2\n ---+--- \n 3 . . . @ . 3\n ---+--- \n 4 . . . | . | . 4\n ---+--- + + \n 5 . 0 . | . | . 5\n a b c d e\n" +ObservationString(1) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . . . . | . 1\n + \n 2 . . . . | . 2\n ---+--- \n 3 . . . @ . 3\n ---+--- \n 4 . . . | . | . 4\n ---+--- + + \n 5 . 0 . | . | . 5\n a b c d e\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [18, 22] +StringLegalActions() = ["a5", "c5"] + +# Apply action "c5" +action: 22 + +# State 61 +# Board size: 5, walls: 0, 0 +# a b c d e +# 1 . . . . | . 1 +# + +# 2 . . . . | . 2 +# ---+--- +# 3 . . . @ . 3 +# ---+--- +# 4 . . . | . | . 4 +# ---+--- + + +# 5 . . 0 | . | . 5 +# a b c d e +IsTerminal() = False +History() = [63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18, 22] +HistoryString() = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18, 22" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18, 22" +InformationStateString(1) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18, 22" +ObservationString(0) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . . . . | . 1\n + \n 2 . . . . | . 2\n ---+--- \n 3 . . . @ . 3\n ---+--- \n 4 . . . | . | . 4\n ---+--- + + \n 5 . . 0 | . | . 5\n a b c d e\n" +ObservationString(1) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . . . . | . 1\n + \n 2 . . . . | . 2\n ---+--- \n 3 . . . @ . 3\n ---+--- \n 4 . . . | . | . 4\n ---+--- + + \n 5 . . 0 | . | . 5\n a b c d e\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [18, 22] +StringLegalActions() = ["c3", "e3"] + +# Apply action "c3" +action: 18 + +# State 62 +# Apply action "b5" +action: 18 + +# State 63 +# Apply action "b3" +action: 18 + +# State 64 +# Apply action "a5" +action: 18 + +# State 65 +# Apply action "a3" +action: 18 + +# State 66 +# Apply action "b5" +action: 22 + +# State 67 +# Apply action "a4" +action: 38 + +# State 68 +# Apply action "a5" +action: 18 + +# State 69 +# Apply action "a3" +action: 2 + +# State 70 +# Apply action "b5" +action: 22 + +# State 71 +# Apply action "b3" +action: 22 + +# State 72 +# Apply action "c5" +action: 22 + +# State 73 +# Apply action "b2" +action: 2 + +# State 74 +# Apply action "b5" +action: 18 + +# State 75 +# Apply action "c2" +action: 22 + +# State 76 +# Apply action "a5" +action: 18 + +# State 77 +# Apply action "c1" +action: 2 + +# State 78 +# Apply action "b5" +action: 22 + +# State 79 +# Apply action "b1" +action: 18 + +# State 80 +# Board size: 5, walls: 0, 0 +# a b c d e +# 1 . @ . . | . 1 +# + +# 2 . . . . | . 2 +# ---+--- +# 3 . . . . . 3 +# ---+--- +# 4 . . . | . | . 4 +# ---+--- + + +# 5 . 0 . | . | . 5 +# a b c d e +IsTerminal() = False +History() = [63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18, 22, 18, 18, 18, 18, 18, 22, 38, 18, 2, 22, 22, 22, 2, 18, 22, 18, 2, 22, 18] +HistoryString() = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18, 22, 18, 18, 18, 18, 18, 22, 38, 18, 2, 22, 22, 22, 2, 18, 22, 18, 2, 22, 18" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18, 22, 18, 18, 18, 18, 18, 22, 38, 18, 2, 22, 22, 22, 2, 18, 22, 18, 2, 22, 18" +InformationStateString(1) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18, 22, 18, 18, 18, 18, 18, 22, 38, 18, 2, 22, 22, 22, 2, 18, 22, 18, 2, 22, 18" +ObservationString(0) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . @ . . | . 1\n + \n 2 . . . . | . 2\n ---+--- \n 3 . . . . . 3\n ---+--- \n 4 . . . | . | . 4\n ---+--- + + \n 5 . 0 . | . | . 5\n a b c d e\n" +ObservationString(1) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . @ . . | . 1\n + \n 2 . . . . | . 2\n ---+--- \n 3 . . . . . 3\n ---+--- \n 4 . . . | . | . 4\n ---+--- + + \n 5 . 0 . | . | . 5\n a b c d e\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [18, 22] +StringLegalActions() = ["a5", "c5"] + +# Apply action "c5" +action: 22 + +# State 81 +# Board size: 5, walls: 0, 0 +# a b c d e +# 1 . @ . . | . 1 +# + +# 2 . . . . | . 2 +# ---+--- +# 3 . . . . . 3 +# ---+--- +# 4 . . . | . | . 4 +# ---+--- + + +# 5 . . 0 | . | . 5 +# a b c d e +IsTerminal() = False +History() = [63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18, 22, 18, 18, 18, 18, 18, 22, 38, 18, 2, 22, 22, 22, 2, 18, 22, 18, 2, 22, 18, 22] +HistoryString() = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18, 22, 18, 18, 18, 18, 18, 22, 38, 18, 2, 22, 22, 22, 2, 18, 22, 18, 2, 22, 18, 22" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18, 22, 18, 18, 18, 18, 18, 22, 38, 18, 2, 22, 22, 22, 2, 18, 22, 18, 2, 22, 18, 22" +InformationStateString(1) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18, 22, 18, 18, 18, 18, 18, 22, 38, 18, 2, 22, 22, 22, 2, 18, 22, 18, 2, 22, 18, 22" +ObservationString(0) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . @ . . | . 1\n + \n 2 . . . . | . 2\n ---+--- \n 3 . . . . . 3\n ---+--- \n 4 . . . | . | . 4\n ---+--- + + \n 5 . . 0 | . | . 5\n a b c d e\n" +ObservationString(1) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . @ . . | . 1\n + \n 2 . . . . | . 2\n ---+--- \n 3 . . . . . 3\n ---+--- \n 4 . . . | . | . 4\n ---+--- + + \n 5 . . 0 | . | . 5\n a b c d e\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [18, 22, 38] +StringLegalActions() = ["a1", "c1", "b2"] + +# Apply action "a1" +action: 18 + +# State 82 +# Apply action "b5" +action: 18 + +# State 83 +# Apply action "a2" +action: 38 + +# State 84 +# Apply action "a5" +action: 18 + +# State 85 +# Apply action "a1" +action: 2 + +# State 86 +# Apply action "b5" +action: 22 + +# State 87 +# Apply action "a2" +action: 38 + +# State 88 +# Apply action "a5" +action: 18 + +# State 89 +# Apply action "b2" +action: 22 + +# State 90 +# Apply action "b5" +action: 22 + +# State 91 +# Apply action "a2" +action: 18 + +# State 92 +# Apply action "c5" +action: 22 + +# State 93 +# Apply action "a3" +action: 38 + +# State 94 +# Apply action "c4" +action: 2 + +# State 95 +# Apply action "a4" +action: 38 + +# State 96 +# Apply action "b4" +action: 18 + +# State 97 +# Apply action "a3" +action: 2 + +# State 98 +# Apply action "c4" +action: 22 + +# State 99 +# Apply action "b3" +action: 22 + +# State 100 +# Board size: 5, walls: 0, 0 +# a b c d e +# 1 . . . . | . 1 +# + +# 2 . . . . | . 2 +# ---+--- +# 3 . @ . . . 3 +# ---+--- +# 4 . . 0 | . | . 4 +# ---+--- + + +# 5 . . . | . | . 5 +# a b c d e +IsTerminal() = True +History() = [63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18, 22, 18, 18, 18, 18, 18, 22, 38, 18, 2, 22, 22, 22, 2, 18, 22, 18, 2, 22, 18, 22, 18, 18, 38, 18, 2, 22, 38, 18, 22, 22, 18, 22, 38, 2, 38, 18, 2, 22, 22] +HistoryString() = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18, 22, 18, 18, 18, 18, 18, 22, 38, 18, 2, 22, 22, 22, 2, 18, 22, 18, 2, 22, 18, 22, 18, 18, 38, 18, 2, 22, 38, 18, 22, 22, 18, 22, 38, 2, 38, 18, 2, 22, 22" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18, 22, 18, 18, 18, 18, 18, 22, 38, 18, 2, 22, 22, 22, 2, 18, 22, 18, 2, 22, 18, 22, 18, 18, 38, 18, 2, 22, 38, 18, 22, 22, 18, 22, 38, 2, 38, 18, 2, 22, 22" +InformationStateString(1) = "63, 59, 7, 49, 61, 31, 18, 18, 22, 18, 2, 22, 38, 22, 2, 18, 38, 18, 18, 38, 18, 22, 22, 22, 18, 22, 22, 18, 22, 18, 18, 38, 18, 22, 22, 22, 22, 22, 18, 38, 18, 2, 22, 2, 22, 2, 2, 38, 38, 2, 18, 38, 18, 38, 22, 18, 22, 22, 18, 18, 22, 18, 18, 18, 18, 18, 22, 38, 18, 2, 22, 22, 22, 2, 18, 22, 18, 2, 22, 18, 22, 18, 18, 38, 18, 2, 22, 38, 18, 22, 22, 18, 22, 38, 2, 38, 18, 2, 22, 22" +ObservationString(0) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . . . . | . 1\n + \n 2 . . . . | . 2\n ---+--- \n 3 . @ . . . 3\n ---+--- \n 4 . . 0 | . | . 4\n ---+--- + + \n 5 . . . | . | . 5\n a b c d e\n" +ObservationString(1) = "Board size: 5, walls: 0, 0\n a b c d e\n 1 . . . . | . 1\n + \n 2 . . . . | . 2\n ---+--- \n 3 . @ . . . 3\n ---+--- \n 4 . . 0 | . | . 4\n ---+--- + + \n 5 . . . | . | . 5\n a b c d e\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/quoridor(players=4).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/quoridor(players=4).txt new file mode 100644 index 0000000..7809d2b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/quoridor(players=4).txt @@ -0,0 +1,7321 @@ +game: quoridor(players=4) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Quoridor" +GameType.max_num_players = 4 +GameType.min_num_players = 2 +GameType.parameter_specification = ["ansi_color_output", "board_size", "players", "wall_count"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "quoridor" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 289 +PolicyTensorShape() = [289] +MaxChanceOutcomes() = 0 +GetParameters() = {ansi_color_output=False,board_size=9,players=4,wall_count=10} +NumPlayers() = 4 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [9, 17, 17] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 2601 +MaxGameLength() = 324 +ToString() = "quoridor(players=4)" + +# State 0 +# Board size: 9, walls: 10, 10, 10, 10 +# a b c d e f g h i +# 1 . . . . @ . . . . 1 +# +# 2 . . . . . . . . . 2 +# +# 3 . . . . . . . . . 3 +# +# 4 . . . . . . . . . 4 +# +# 5 # . . . . . . . % 5 +# +# 6 . . . . . . . . . 6 +# +# 7 . . . . . . . . . 7 +# +# 8 . . . . . . . . . 8 +# +# 9 . . . . 0 . . . . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +InformationStateString(2) = "" +InformationStateString(3) = "" +ObservationString(0) = "Board size: 9, walls: 10, 10, 10, 10\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . . . . . 4\n \n 5 # . . . . . . . % 5\n \n 6 . . . . . . . . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 10, 10, 10, 10\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . . . . . 4\n \n 5 # . . . . . . . % 5\n \n 6 . . . . . . . . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 10, 10, 10, 10\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . . . . . 4\n \n 5 # . . . . . . . % 5\n \n 6 . . . . . . . . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 10, 10, 10, 10\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . . . . . 4\n \n 5 # . . . . . . . % 5\n \n 6 . . . . . . . . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0] +ObservationTensor(2) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0] +ObservationTensor(3) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [1, 2, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 34, 35, 37, 38, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 103, 105, 107, 109, 111, 113, 115, 117, 119, 121, 123, 125, 127, 129, 131, 133, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159, 161, 163, 165, 167, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263, 265, 267, 269] +StringLegalActions() = ["a1v", "e8", "b1v", "c1v", "d1v", "e1v", "f1v", "g1v", "h1v", "a1h", "b1h", "c1h", "d1h", "e1h", "f1h", "g1h", "h1h", "d9", "a2v", "b2v", "f9", "c2v", "d2v", "e2v", "f2v", "g2v", "h2v", "a2h", "b2h", "c2h", "d2h", "e2h", "f2h", "g2h", "h2h", "a3v", "b3v", "c3v", "d3v", "e3v", "f3v", "g3v", "h3v", "a3h", "b3h", "c3h", "d3h", "e3h", "f3h", "g3h", "h3h", "a4v", "b4v", "c4v", "d4v", "e4v", "f4v", "g4v", "h4v", "a4h", "b4h", "c4h", "d4h", "e4h", "f4h", "g4h", "h4h", "a5v", "b5v", "c5v", "d5v", "e5v", "f5v", "g5v", "h5v", "a5h", "b5h", "c5h", "d5h", "e5h", "f5h", "g5h", "h5h", "a6v", "b6v", "c6v", "d6v", "e6v", "f6v", "g6v", "h6v", "a6h", "b6h", "c6h", "d6h", "e6h", "f6h", "g6h", "h6h", "a7v", "b7v", "c7v", "d7v", "e7v", "f7v", "g7v", "h7v", "a7h", "b7h", "c7h", "d7h", "e7h", "f7h", "g7h", "h7h", "a8v", "b8v", "c8v", "d8v", "e8v", "f8v", "g8v", "h8v", "a8h", "b8h", "c8h", "d8h", "e8h", "f8h", "g8h", "h8h"] + +# Apply action "c5v" +action: 141 + +# State 1 +# Board size: 9, walls: 9, 10, 10, 10 +# a b c d e f g h i +# 1 . . . . @ . . . . 1 +# +# 2 . . . . . . . . . 2 +# +# 3 . . . . . . . . . 3 +# +# 4 . . . . . . . . . 4 +# +# 5 # . . | . . . . . % 5 +# + +# 6 . . . | . . . . . . 6 +# +# 7 . . . . . . . . . 7 +# +# 8 . . . . . . . . . 8 +# +# 9 . . . . 0 . . . . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141] +HistoryString() = "141" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "141" +InformationStateString(1) = "141" +InformationStateString(2) = "141" +InformationStateString(3) = "141" +ObservationString(0) = "Board size: 9, walls: 9, 10, 10, 10\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . . . . . 4\n \n 5 # . . | . . . . . % 5\n + \n 6 . . . | . . . . . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 9, 10, 10, 10\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . . . . . 4\n \n 5 # . . | . . . . . % 5\n + \n 6 . . . | . . . . . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 9, 10, 10, 10\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . . . . . 4\n \n 5 # . . | . . . . . % 5\n + \n 6 . . . | . . . . . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 9, 10, 10, 10\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . . . . . 4\n \n 5 # . . | . . . . . % 5\n + \n 6 . . . | . . . . . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0] +ObservationTensor(2) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0] +ObservationTensor(3) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [1, 2, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 35, 37, 38, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 69, 70, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 103, 105, 109, 111, 113, 115, 117, 119, 121, 123, 125, 127, 129, 131, 133, 137, 139, 143, 145, 147, 149, 151, 153, 155, 159, 161, 163, 165, 167, 171, 173, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263, 265, 267, 269] +StringLegalActions() = ["a1v", "a4", "b1v", "c1v", "d1v", "e1v", "f1v", "g1v", "h1v", "a1h", "b1h", "c1h", "d1h", "e1h", "f1h", "g1h", "h1h", "a2v", "b2v", "b5", "c2v", "d2v", "e2v", "f2v", "g2v", "h2v", "a2h", "b2h", "c2h", "d2h", "e2h", "f2h", "g2h", "h2h", "a3v", "a6", "b3v", "c3v", "d3v", "e3v", "f3v", "g3v", "h3v", "a3h", "b3h", "c3h", "d3h", "e3h", "f3h", "g3h", "h3h", "a4v", "b4v", "d4v", "e4v", "f4v", "g4v", "h4v", "a4h", "b4h", "c4h", "d4h", "e4h", "f4h", "g4h", "h4h", "a5v", "b5v", "d5v", "e5v", "f5v", "g5v", "h5v", "a5h", "b5h", "d5h", "e5h", "f5h", "g5h", "h5h", "a6v", "b6v", "d6v", "e6v", "f6v", "g6v", "h6v", "a6h", "b6h", "c6h", "d6h", "e6h", "f6h", "g6h", "h6h", "a7v", "b7v", "c7v", "d7v", "e7v", "f7v", "g7v", "h7v", "a7h", "b7h", "c7h", "d7h", "e7h", "f7h", "g7h", "h7h", "a8v", "b8v", "c8v", "d8v", "e8v", "f8v", "g8v", "h8v", "a8h", "b8h", "c8h", "d8h", "e8h", "f8h", "g8h", "h8h"] + +# Apply action "g5v" +action: 149 + +# State 2 +# Board size: 9, walls: 9, 9, 10, 10 +# a b c d e f g h i +# 1 . . . . @ . . . . 1 +# +# 2 . . . . . . . . . 2 +# +# 3 . . . . . . . . . 3 +# +# 4 . . . . . . . . . 4 +# +# 5 # . . | . . . . | . % 5 +# + + +# 6 . . . | . . . . | . . 6 +# +# 7 . . . . . . . . . 7 +# +# 8 . . . . . . . . . 8 +# +# 9 . . . . 0 . . . . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149] +HistoryString() = "141, 149" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "141, 149" +InformationStateString(1) = "141, 149" +InformationStateString(2) = "141, 149" +InformationStateString(3) = "141, 149" +ObservationString(0) = "Board size: 9, walls: 9, 9, 10, 10\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . . . . . 4\n \n 5 # . . | . . . . | . % 5\n + + \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 9, 9, 10, 10\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . . . . . 4\n \n 5 # . . | . . . . | . % 5\n + + \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 9, 9, 10, 10\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . . . . . 4\n \n 5 # . . | . . . . | . % 5\n + + \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 9, 9, 10, 10\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . . . . . 4\n \n 5 # . . | . . . . | . % 5\n + + \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0] +ObservationTensor(2) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0] +ObservationTensor(3) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 34, 35, 37, 38, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 69, 70, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 103, 105, 109, 111, 113, 117, 119, 121, 123, 125, 127, 129, 131, 133, 137, 139, 143, 145, 147, 151, 153, 155, 159, 161, 163, 167, 171, 173, 177, 179, 181, 185, 187, 189, 191, 193, 195, 197, 199, 201, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263, 265, 267, 269] +StringLegalActions() = ["a1v", "b1v", "c1v", "d1v", "e1v", "f1v", "g1v", "h1v", "a1h", "b1h", "c1h", "d1h", "e1h", "f1h", "g1h", "h1h", "d1", "a2v", "b2v", "f1", "c2v", "d2v", "e2v", "f2v", "g2v", "h2v", "a2h", "b2h", "c2h", "d2h", "e2h", "f2h", "g2h", "h2h", "a3v", "e2", "b3v", "c3v", "d3v", "e3v", "f3v", "g3v", "h3v", "a3h", "b3h", "c3h", "d3h", "e3h", "f3h", "g3h", "h3h", "a4v", "b4v", "d4v", "e4v", "f4v", "h4v", "a4h", "b4h", "c4h", "d4h", "e4h", "f4h", "g4h", "h4h", "a5v", "b5v", "d5v", "e5v", "f5v", "h5v", "a5h", "b5h", "d5h", "e5h", "f5h", "h5h", "a6v", "b6v", "d6v", "e6v", "f6v", "h6v", "a6h", "b6h", "c6h", "d6h", "e6h", "f6h", "g6h", "h6h", "a7v", "b7v", "c7v", "d7v", "e7v", "f7v", "g7v", "h7v", "a7h", "b7h", "c7h", "d7h", "e7h", "f7h", "g7h", "h7h", "a8v", "b8v", "c8v", "d8v", "e8v", "f8v", "g8v", "h8v", "a8h", "b8h", "c8h", "d8h", "e8h", "f8h", "g8h", "h8h"] + +# Apply action "e1h" +action: 25 + +# State 3 +# Board size: 9, walls: 9, 9, 9, 10 +# a b c d e f g h i +# 1 . . . . @ . . . . 1 +# ---+--- +# 2 . . . . . . . . . 2 +# +# 3 . . . . . . . . . 3 +# +# 4 . . . . . . . . . 4 +# +# 5 # . . | . . . . | . % 5 +# + + +# 6 . . . | . . . . | . . 6 +# +# 7 . . . . . . . . . 7 +# +# 8 . . . . . . . . . 8 +# +# 9 . . . . 0 . . . . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25] +HistoryString() = "141, 149, 25" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "141, 149, 25" +InformationStateString(1) = "141, 149, 25" +InformationStateString(2) = "141, 149, 25" +InformationStateString(3) = "141, 149, 25" +ObservationString(0) = "Board size: 9, walls: 9, 9, 9, 10\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . . . . . 4\n \n 5 # . . | . . . . | . % 5\n + + \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 9, 9, 9, 10\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . . . . . 4\n \n 5 # . . | . . . . | . % 5\n + + \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 9, 9, 9, 10\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . . . . . 4\n \n 5 # . . | . . . . | . % 5\n + + \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 9, 9, 9, 10\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . . . . . 4\n \n 5 # . . | . . . . | . % 5\n + + \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0] +ObservationTensor(2) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0] +ObservationTensor(3) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [1, 2, 3, 5, 7, 11, 13, 15, 17, 19, 21, 29, 31, 34, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 69, 70, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 103, 105, 109, 111, 113, 117, 119, 121, 123, 125, 127, 129, 131, 133, 137, 139, 143, 145, 147, 151, 153, 155, 159, 161, 163, 167, 171, 173, 177, 179, 181, 185, 187, 189, 191, 193, 195, 197, 199, 201, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263, 265, 267, 269] +StringLegalActions() = ["a1v", "i4", "b1v", "c1v", "d1v", "f1v", "g1v", "h1v", "a1h", "b1h", "c1h", "g1h", "h1h", "h5", "a2v", "b2v", "c2v", "d2v", "e2v", "f2v", "g2v", "h2v", "a2h", "b2h", "c2h", "d2h", "e2h", "f2h", "g2h", "h2h", "a3v", "i6", "b3v", "c3v", "d3v", "e3v", "f3v", "g3v", "h3v", "a3h", "b3h", "c3h", "d3h", "e3h", "f3h", "g3h", "h3h", "a4v", "b4v", "d4v", "e4v", "f4v", "h4v", "a4h", "b4h", "c4h", "d4h", "e4h", "f4h", "g4h", "h4h", "a5v", "b5v", "d5v", "e5v", "f5v", "h5v", "a5h", "b5h", "d5h", "e5h", "f5h", "h5h", "a6v", "b6v", "d6v", "e6v", "f6v", "h6v", "a6h", "b6h", "c6h", "d6h", "e6h", "f6h", "g6h", "h6h", "a7v", "b7v", "c7v", "d7v", "e7v", "f7v", "g7v", "h7v", "a7h", "b7h", "c7h", "d7h", "e7h", "f7h", "g7h", "h7h", "a8v", "b8v", "c8v", "d8v", "e8v", "f8v", "g8v", "h8v", "a8h", "b8h", "c8h", "d8h", "e8h", "f8h", "g8h", "h8h"] + +# Apply action "e4v" +action: 111 + +# State 4 +# Board size: 9, walls: 9, 9, 9, 9 +# a b c d e f g h i +# 1 . . . . @ . . . . 1 +# ---+--- +# 2 . . . . . . . . . 2 +# +# 3 . . . . . . . . . 3 +# +# 4 . . . . . | . . . . 4 +# + +# 5 # . . | . . | . . | . % 5 +# + + +# 6 . . . | . . . . | . . 6 +# +# 7 . . . . . . . . . 7 +# +# 8 . . . . . . . . . 8 +# +# 9 . . . . 0 . . . . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111] +HistoryString() = "141, 149, 25, 111" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "141, 149, 25, 111" +InformationStateString(1) = "141, 149, 25, 111" +InformationStateString(2) = "141, 149, 25, 111" +InformationStateString(3) = "141, 149, 25, 111" +ObservationString(0) = "Board size: 9, walls: 9, 9, 9, 9\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n + + \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 9, 9, 9, 9\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n + + \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 9, 9, 9, 9\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n + + \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 9, 9, 9, 9\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . . . . . . . 2\n \n 3 . . . . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n + + \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] +ObservationTensor(2) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] +ObservationTensor(3) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [1, 2, 3, 5, 7, 11, 13, 15, 17, 19, 21, 29, 31, 34, 35, 37, 38, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 69, 71, 73, 75, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 103, 105, 109, 113, 117, 119, 121, 123, 125, 129, 131, 133, 137, 139, 143, 147, 151, 153, 155, 159, 161, 163, 167, 171, 173, 177, 179, 181, 185, 187, 189, 191, 193, 195, 197, 199, 201, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263, 265, 267, 269] +StringLegalActions() = ["a1v", "e8", "b1v", "c1v", "d1v", "f1v", "g1v", "h1v", "a1h", "b1h", "c1h", "g1h", "h1h", "d9", "a2v", "b2v", "f9", "c2v", "d2v", "e2v", "f2v", "g2v", "h2v", "a2h", "b2h", "c2h", "d2h", "e2h", "f2h", "g2h", "h2h", "a3v", "b3v", "c3v", "d3v", "f3v", "g3v", "h3v", "a3h", "b3h", "c3h", "d3h", "e3h", "f3h", "g3h", "h3h", "a4v", "b4v", "d4v", "f4v", "h4v", "a4h", "b4h", "c4h", "d4h", "f4h", "g4h", "h4h", "a5v", "b5v", "d5v", "f5v", "h5v", "a5h", "b5h", "d5h", "e5h", "f5h", "h5h", "a6v", "b6v", "d6v", "e6v", "f6v", "h6v", "a6h", "b6h", "c6h", "d6h", "e6h", "f6h", "g6h", "h6h", "a7v", "b7v", "c7v", "d7v", "e7v", "f7v", "g7v", "h7v", "a7h", "b7h", "c7h", "d7h", "e7h", "f7h", "g7h", "h7h", "a8v", "b8v", "c8v", "d8v", "e8v", "f8v", "g8v", "h8v", "a8h", "b8h", "c8h", "d8h", "e8h", "f8h", "g8h", "h8h"] + +# Apply action "d2h" +action: 57 + +# State 5 +# Board size: 9, walls: 8, 9, 9, 9 +# a b c d e f g h i +# 1 . . . . @ . . . . 1 +# ---+--- +# 2 . . . . . . . . . 2 +# ---+--- +# 3 . . . . . . . . . 3 +# +# 4 . . . . . | . . . . 4 +# + +# 5 # . . | . . | . . | . % 5 +# + + +# 6 . . . | . . . . | . . 6 +# +# 7 . . . . . . . . . 7 +# +# 8 . . . . . . . . . 8 +# +# 9 . . . . 0 . . . . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57] +HistoryString() = "141, 149, 25, 111, 57" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "141, 149, 25, 111, 57" +InformationStateString(1) = "141, 149, 25, 111, 57" +InformationStateString(2) = "141, 149, 25, 111, 57" +InformationStateString(3) = "141, 149, 25, 111, 57" +ObservationString(0) = "Board size: 9, walls: 8, 9, 9, 9\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . . . . . . . 2\n ---+--- \n 3 . . . . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n + + \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 8, 9, 9, 9\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . . . . . . . 2\n ---+--- \n 3 . . . . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n + + \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 8, 9, 9, 9\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . . . . . . . 2\n ---+--- \n 3 . . . . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n + + \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 8, 9, 9, 9\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . . . . . . . 2\n ---+--- \n 3 . . . . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n + + \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] +ObservationTensor(2) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] +ObservationTensor(3) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [1, 2, 3, 5, 7, 11, 13, 15, 17, 19, 21, 29, 31, 35, 37, 38, 39, 43, 45, 47, 49, 51, 53, 61, 63, 65, 69, 70, 71, 73, 75, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 103, 105, 109, 113, 117, 119, 121, 123, 125, 129, 131, 133, 137, 139, 143, 147, 151, 153, 155, 159, 161, 163, 167, 171, 173, 177, 179, 181, 185, 187, 189, 191, 193, 195, 197, 199, 201, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263, 265, 267, 269] +StringLegalActions() = ["a1v", "a4", "b1v", "c1v", "d1v", "f1v", "g1v", "h1v", "a1h", "b1h", "c1h", "g1h", "h1h", "a2v", "b2v", "b5", "c2v", "e2v", "f2v", "g2v", "h2v", "a2h", "b2h", "f2h", "g2h", "h2h", "a3v", "a6", "b3v", "c3v", "d3v", "f3v", "g3v", "h3v", "a3h", "b3h", "c3h", "d3h", "e3h", "f3h", "g3h", "h3h", "a4v", "b4v", "d4v", "f4v", "h4v", "a4h", "b4h", "c4h", "d4h", "f4h", "g4h", "h4h", "a5v", "b5v", "d5v", "f5v", "h5v", "a5h", "b5h", "d5h", "e5h", "f5h", "h5h", "a6v", "b6v", "d6v", "e6v", "f6v", "h6v", "a6h", "b6h", "c6h", "d6h", "e6h", "f6h", "g6h", "h6h", "a7v", "b7v", "c7v", "d7v", "e7v", "f7v", "g7v", "h7v", "a7h", "b7h", "c7h", "d7h", "e7h", "f7h", "g7h", "h7h", "a8v", "b8v", "c8v", "d8v", "e8v", "f8v", "g8v", "h8v", "a8h", "b8h", "c8h", "d8h", "e8h", "f8h", "g8h", "h8h"] + +# Apply action "h5h" +action: 167 + +# State 6 +# Board size: 9, walls: 8, 8, 9, 9 +# a b c d e f g h i +# 1 . . . . @ . . . . 1 +# ---+--- +# 2 . . . . . . . . . 2 +# ---+--- +# 3 . . . . . . . . . 3 +# +# 4 . . . . . | . . . . 4 +# + +# 5 # . . | . . | . . | . % 5 +# + +---+--- +# 6 . . . | . . . . | . . 6 +# +# 7 . . . . . . . . . 7 +# +# 8 . . . . . . . . . 8 +# +# 9 . . . . 0 . . . . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167] +HistoryString() = "141, 149, 25, 111, 57, 167" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "141, 149, 25, 111, 57, 167" +InformationStateString(1) = "141, 149, 25, 111, 57, 167" +InformationStateString(2) = "141, 149, 25, 111, 57, 167" +InformationStateString(3) = "141, 149, 25, 111, 57, 167" +ObservationString(0) = "Board size: 9, walls: 8, 8, 9, 9\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . . . . . . . 2\n ---+--- \n 3 . . . . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n + +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 8, 8, 9, 9\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . . . . . . . 2\n ---+--- \n 3 . . . . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n + +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 8, 8, 9, 9\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . . . . . . . 2\n ---+--- \n 3 . . . . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n + +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 8, 8, 9, 9\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . . . . . . . 2\n ---+--- \n 3 . . . . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n + +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] +ObservationTensor(2) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] +ObservationTensor(3) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [1, 3, 5, 7, 11, 13, 15, 17, 19, 21, 29, 31, 34, 35, 37, 38, 39, 43, 45, 47, 49, 51, 53, 61, 63, 65, 69, 71, 73, 75, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 103, 105, 109, 113, 117, 119, 121, 123, 125, 129, 131, 137, 139, 143, 147, 153, 155, 159, 161, 163, 171, 173, 177, 179, 181, 185, 187, 189, 191, 193, 195, 197, 199, 201, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263, 265, 267, 269] +StringLegalActions() = ["a1v", "b1v", "c1v", "d1v", "f1v", "g1v", "h1v", "a1h", "b1h", "c1h", "g1h", "h1h", "d1", "a2v", "b2v", "f1", "c2v", "e2v", "f2v", "g2v", "h2v", "a2h", "b2h", "f2h", "g2h", "h2h", "a3v", "b3v", "c3v", "d3v", "f3v", "g3v", "h3v", "a3h", "b3h", "c3h", "d3h", "e3h", "f3h", "g3h", "h3h", "a4v", "b4v", "d4v", "f4v", "h4v", "a4h", "b4h", "c4h", "d4h", "f4h", "g4h", "a5v", "b5v", "d5v", "f5v", "a5h", "b5h", "d5h", "e5h", "f5h", "a6v", "b6v", "d6v", "e6v", "f6v", "h6v", "a6h", "b6h", "c6h", "d6h", "e6h", "f6h", "g6h", "h6h", "a7v", "b7v", "c7v", "d7v", "e7v", "f7v", "g7v", "h7v", "a7h", "b7h", "c7h", "d7h", "e7h", "f7h", "g7h", "h7h", "a8v", "b8v", "c8v", "d8v", "e8v", "f8v", "g8v", "h8v", "a8h", "b8h", "c8h", "d8h", "e8h", "f8h", "g8h", "h8h"] + +# Apply action "c2v" +action: 39 + +# State 7 +# Board size: 9, walls: 8, 8, 8, 9 +# a b c d e f g h i +# 1 . . . . @ . . . . 1 +# ---+--- +# 2 . . . | . . . . . . 2 +# +---+--- +# 3 . . . | . . . . . . 3 +# +# 4 . . . . . | . . . . 4 +# + +# 5 # . . | . . | . . | . % 5 +# + +---+--- +# 6 . . . | . . . . | . . 6 +# +# 7 . . . . . . . . . 7 +# +# 8 . . . . . . . . . 8 +# +# 9 . . . . 0 . . . . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39] +HistoryString() = "141, 149, 25, 111, 57, 167, 39" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39" +ObservationString(0) = "Board size: 9, walls: 8, 8, 8, 9\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . | . . . . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n + +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 8, 8, 8, 9\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . | . . . . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n + +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 8, 8, 8, 9\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . | . . . . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n + +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 8, 8, 8, 9\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . | . . . . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n + +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] +ObservationTensor(2) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] +ObservationTensor(3) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [1, 2, 3, 7, 11, 13, 15, 17, 19, 21, 29, 31, 34, 35, 37, 43, 45, 47, 49, 51, 53, 61, 63, 65, 69, 71, 75, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 103, 105, 109, 113, 117, 119, 121, 123, 125, 129, 131, 137, 139, 143, 147, 153, 155, 159, 161, 163, 171, 173, 177, 179, 181, 185, 187, 189, 191, 193, 195, 197, 199, 201, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263, 265, 267, 269] +StringLegalActions() = ["a1v", "i4", "b1v", "d1v", "f1v", "g1v", "h1v", "a1h", "b1h", "c1h", "g1h", "h1h", "h5", "a2v", "b2v", "e2v", "f2v", "g2v", "h2v", "a2h", "b2h", "f2h", "g2h", "h2h", "a3v", "b3v", "d3v", "f3v", "g3v", "h3v", "a3h", "b3h", "c3h", "d3h", "e3h", "f3h", "g3h", "h3h", "a4v", "b4v", "d4v", "f4v", "h4v", "a4h", "b4h", "c4h", "d4h", "f4h", "g4h", "a5v", "b5v", "d5v", "f5v", "a5h", "b5h", "d5h", "e5h", "f5h", "a6v", "b6v", "d6v", "e6v", "f6v", "h6v", "a6h", "b6h", "c6h", "d6h", "e6h", "f6h", "g6h", "h6h", "a7v", "b7v", "c7v", "d7v", "e7v", "f7v", "g7v", "h7v", "a7h", "b7h", "c7h", "d7h", "e7h", "f7h", "g7h", "h7h", "a8v", "b8v", "c8v", "d8v", "e8v", "f8v", "g8v", "h8v", "a8h", "b8h", "c8h", "d8h", "e8h", "f8h", "g8h", "h8h"] + +# Apply action "d5h" +action: 159 + +# State 8 +# Board size: 9, walls: 8, 8, 8, 8 +# a b c d e f g h i +# 1 . . . . @ . . . . 1 +# ---+--- +# 2 . . . | . . . . . . 2 +# +---+--- +# 3 . . . | . . . . . . 3 +# +# 4 . . . . . | . . . . 4 +# + +# 5 # . . | . . | . . | . % 5 +# +---+--- +---+--- +# 6 . . . | . . . . | . . 6 +# +# 7 . . . . . . . . . 7 +# +# 8 . . . . . . . . . 8 +# +# 9 . . . . 0 . . . . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159" +ObservationString(0) = "Board size: 9, walls: 8, 8, 8, 8\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . | . . . . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n +---+--- +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 8, 8, 8, 8\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . | . . . . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n +---+--- +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 8, 8, 8, 8\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . | . . . . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n +---+--- +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 8, 8, 8, 8\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . | . . . . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n +---+--- +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0] +ObservationTensor(2) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0] +ObservationTensor(3) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [1, 2, 3, 7, 11, 13, 15, 17, 19, 21, 29, 31, 34, 35, 37, 38, 43, 45, 47, 49, 51, 53, 61, 63, 65, 69, 71, 75, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 103, 105, 109, 113, 117, 119, 121, 123, 125, 129, 131, 137, 139, 147, 153, 155, 163, 171, 173, 177, 179, 181, 185, 187, 189, 191, 193, 195, 197, 199, 201, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263, 265, 267, 269] +StringLegalActions() = ["a1v", "e8", "b1v", "d1v", "f1v", "g1v", "h1v", "a1h", "b1h", "c1h", "g1h", "h1h", "d9", "a2v", "b2v", "f9", "e2v", "f2v", "g2v", "h2v", "a2h", "b2h", "f2h", "g2h", "h2h", "a3v", "b3v", "d3v", "f3v", "g3v", "h3v", "a3h", "b3h", "c3h", "d3h", "e3h", "f3h", "g3h", "h3h", "a4v", "b4v", "d4v", "f4v", "h4v", "a4h", "b4h", "c4h", "d4h", "f4h", "g4h", "a5v", "b5v", "f5v", "a5h", "b5h", "f5h", "a6v", "b6v", "d6v", "e6v", "f6v", "h6v", "a6h", "b6h", "c6h", "d6h", "e6h", "f6h", "g6h", "h6h", "a7v", "b7v", "c7v", "d7v", "e7v", "f7v", "g7v", "h7v", "a7h", "b7h", "c7h", "d7h", "e7h", "f7h", "g7h", "h7h", "a8v", "b8v", "c8v", "d8v", "e8v", "f8v", "g8v", "h8v", "a8h", "b8h", "c8h", "d8h", "e8h", "f8h", "g8h", "h8h"] + +# Apply action "b3h" +action: 87 + +# State 9 +# Board size: 9, walls: 7, 8, 8, 8 +# a b c d e f g h i +# 1 . . . . @ . . . . 1 +# ---+--- +# 2 . . . | . . . . . . 2 +# +---+--- +# 3 . . . | . . . . . . 3 +# ---+--- +# 4 . . . . . | . . . . 4 +# + +# 5 # . . | . . | . . | . % 5 +# +---+--- +---+--- +# 6 . . . | . . . . | . . 6 +# +# 7 . . . . . . . . . 7 +# +# 8 . . . . . . . . . 8 +# +# 9 . . . . 0 . . . . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87" +ObservationString(0) = "Board size: 9, walls: 7, 8, 8, 8\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . | . . . . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n ---+--- \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n +---+--- +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 7, 8, 8, 8\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . | . . . . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n ---+--- \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n +---+--- +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 7, 8, 8, 8\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . | . . . . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n ---+--- \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n +---+--- +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 7, 8, 8, 8\n a b c d e f g h i\n 1 . . . . @ . . . . 1\n ---+--- \n 2 . . . | . . . . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n ---+--- \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n +---+--- +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0] +ObservationTensor(2) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0] +ObservationTensor(3) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [1, 2, 3, 7, 11, 13, 15, 17, 19, 21, 29, 31, 35, 37, 38, 43, 45, 47, 49, 51, 53, 61, 63, 65, 69, 70, 75, 79, 81, 83, 91, 93, 95, 97, 99, 103, 105, 109, 113, 117, 119, 121, 123, 125, 129, 131, 137, 139, 147, 153, 155, 163, 171, 173, 177, 179, 181, 185, 187, 189, 191, 193, 195, 197, 199, 201, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263, 265, 267, 269] +StringLegalActions() = ["a1v", "a4", "b1v", "d1v", "f1v", "g1v", "h1v", "a1h", "b1h", "c1h", "g1h", "h1h", "a2v", "b2v", "b5", "e2v", "f2v", "g2v", "h2v", "a2h", "b2h", "f2h", "g2h", "h2h", "a3v", "a6", "d3v", "f3v", "g3v", "h3v", "d3h", "e3h", "f3h", "g3h", "h3h", "a4v", "b4v", "d4v", "f4v", "h4v", "a4h", "b4h", "c4h", "d4h", "f4h", "g4h", "a5v", "b5v", "f5v", "a5h", "b5h", "f5h", "a6v", "b6v", "d6v", "e6v", "f6v", "h6v", "a6h", "b6h", "c6h", "d6h", "e6h", "f6h", "g6h", "h6h", "a7v", "b7v", "c7v", "d7v", "e7v", "f7v", "g7v", "h7v", "a7h", "b7h", "c7h", "d7h", "e7h", "f7h", "g7h", "h7h", "a8v", "b8v", "c8v", "d8v", "e8v", "f8v", "g8v", "h8v", "a8h", "b8h", "c8h", "d8h", "e8h", "f8h", "g8h", "h8h"] + +# Apply action "f1v" +action: 11 + +# State 10 +# Board size: 9, walls: 7, 7, 8, 8 +# a b c d e f g h i +# 1 . . . . @ . | . . . 1 +# ---+---+ +# 2 . . . | . . . | . . . 2 +# +---+--- +# 3 . . . | . . . . . . 3 +# ---+--- +# 4 . . . . . | . . . . 4 +# + +# 5 # . . | . . | . . | . % 5 +# +---+--- +---+--- +# 6 . . . | . . . . | . . 6 +# +# 7 . . . . . . . . . 7 +# +# 8 . . . . . . . . . 8 +# +# 9 . . . . 0 . . . . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11" +ObservationString(0) = "Board size: 9, walls: 7, 7, 8, 8\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+---+ \n 2 . . . | . . . | . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n ---+--- \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n +---+--- +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 7, 7, 8, 8\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+---+ \n 2 . . . | . . . | . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n ---+--- \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n +---+--- +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 7, 7, 8, 8\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+---+ \n 2 . . . | . . . | . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n ---+--- \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n +---+--- +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 7, 7, 8, 8\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+---+ \n 2 . . . | . . . | . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n ---+--- \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n +---+--- +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . . . . . . . 7\n \n 8 . . . . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0] +ObservationTensor(2) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0] +ObservationTensor(3) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [1, 3, 13, 15, 17, 19, 21, 29, 31, 34, 35, 37, 38, 43, 47, 49, 51, 53, 61, 63, 65, 69, 75, 79, 81, 83, 91, 93, 95, 97, 99, 103, 105, 109, 113, 117, 119, 121, 123, 125, 129, 131, 137, 139, 147, 153, 155, 163, 171, 173, 177, 179, 181, 185, 187, 189, 191, 193, 195, 197, 199, 201, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263, 265, 267, 269] +StringLegalActions() = ["a1v", "b1v", "g1v", "h1v", "a1h", "b1h", "c1h", "g1h", "h1h", "d1", "a2v", "b2v", "f1", "e2v", "g2v", "h2v", "a2h", "b2h", "f2h", "g2h", "h2h", "a3v", "d3v", "f3v", "g3v", "h3v", "d3h", "e3h", "f3h", "g3h", "h3h", "a4v", "b4v", "d4v", "f4v", "h4v", "a4h", "b4h", "c4h", "d4h", "f4h", "g4h", "a5v", "b5v", "f5v", "a5h", "b5h", "f5h", "a6v", "b6v", "d6v", "e6v", "f6v", "h6v", "a6h", "b6h", "c6h", "d6h", "e6h", "f6h", "g6h", "h6h", "a7v", "b7v", "c7v", "d7v", "e7v", "f7v", "g7v", "h7v", "a7h", "b7h", "c7h", "d7h", "e7h", "f7h", "g7h", "h7h", "a8v", "b8v", "c8v", "d8v", "e8v", "f8v", "g8v", "h8v", "a8h", "b8h", "c8h", "d8h", "e8h", "f8h", "g8h", "h8h"] + +# Apply action "c7v" +action: 209 + +# State 11 +# Board size: 9, walls: 7, 7, 7, 8 +# a b c d e f g h i +# 1 . . . . @ . | . . . 1 +# ---+---+ +# 2 . . . | . . . | . . . 2 +# +---+--- +# 3 . . . | . . . . . . 3 +# ---+--- +# 4 . . . . . | . . . . 4 +# + +# 5 # . . | . . | . . | . % 5 +# +---+--- +---+--- +# 6 . . . | . . . . | . . 6 +# +# 7 . . . | . . . . . . 7 +# + +# 8 . . . | . . . . . . 8 +# +# 9 . . . . 0 . . . . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209" +ObservationString(0) = "Board size: 9, walls: 7, 7, 7, 8\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+---+ \n 2 . . . | . . . | . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n ---+--- \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n +---+--- +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . | . . . . . . 7\n + \n 8 . . . | . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 7, 7, 7, 8\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+---+ \n 2 . . . | . . . | . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n ---+--- \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n +---+--- +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . | . . . . . . 7\n + \n 8 . . . | . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 7, 7, 7, 8\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+---+ \n 2 . . . | . . . | . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n ---+--- \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n +---+--- +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . | . . . . . . 7\n + \n 8 . . . | . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 7, 7, 7, 8\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+---+ \n 2 . . . | . . . | . . . 2\n +---+--- \n 3 . . . | . . . . . . 3\n ---+--- \n 4 . . . . . | . . . . 4\n + \n 5 # . . | . . | . . | . % 5\n +---+--- +---+--- \n 6 . . . | . . . . | . . 6\n \n 7 . . . | . . . . . . 7\n + \n 8 . . . | . . . . . . 8\n \n 9 . . . . 0 . . . . 9\n a b c d e f g h i\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0] +ObservationTensor(2) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0] +ObservationTensor(3) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [1, 2, 3, 13, 15, 17, 19, 21, 29, 31, 34, 35, 37, 43, 47, 49, 51, 53, 61, 63, 65, 69, 75, 79, 81, 83, 91, 93, 95, 97, 99, 103, 105, 109, 113, 117, 119, 121, 123, 125, 129, 131, 137, 139, 147, 153, 155, 163, 171, 173, 177, 179, 181, 185, 187, 189, 191, 193, 195, 197, 199, 201, 205, 207, 211, 213, 215, 217, 219, 221, 223, 227, 229, 231, 233, 235, 239, 241, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263, 265, 267, 269] +StringLegalActions() = ["a1v", "i4", "b1v", "g1v", "h1v", "a1h", "b1h", "c1h", "g1h", "h1h", "h5", "a2v", "b2v", "e2v", "g2v", "h2v", "a2h", "b2h", "f2h", "g2h", "h2h", "a3v", "d3v", "f3v", "g3v", "h3v", "d3h", "e3h", "f3h", "g3h", "h3h", "a4v", "b4v", "d4v", "f4v", "h4v", "a4h", "b4h", "c4h", "d4h", "f4h", "g4h", "a5v", "b5v", "f5v", "a5h", "b5h", "f5h", "a6v", "b6v", "d6v", "e6v", "f6v", "h6v", "a6h", "b6h", "c6h", "d6h", "e6h", "f6h", "g6h", "h6h", "a7v", "b7v", "d7v", "e7v", "f7v", "g7v", "h7v", "a7h", "b7h", "d7h", "e7h", "f7h", "g7h", "h7h", "a8v", "b8v", "d8v", "e8v", "f8v", "g8v", "h8v", "a8h", "b8h", "c8h", "d8h", "e8h", "f8h", "g8h", "h8h"] + +# Apply action "b6h" +action: 189 + +# State 12 +# Apply action "a7v" +action: 205 + +# State 13 +# Apply action "e7h" +action: 229 + +# State 14 +# Apply action "b8h" +action: 257 + +# State 15 +# Apply action "d3v" +action: 75 + +# State 16 +# Apply action "d6v" +action: 177 + +# State 17 +# Apply action "g3h" +action: 97 + +# State 18 +# Apply action "h8v" +action: 253 + +# State 19 +# Apply action "g7v" +action: 217 + +# State 20 +# Apply action "h3v" +action: 83 + +# State 21 +# Apply action "g8h" +action: 267 + +# State 22 +# Apply action "h7h" +action: 235 + +# State 23 +# Apply action "a4h" +action: 119 + +# State 24 +# Apply action "h2h" +action: 65 + +# State 25 +# Apply action "h1h" +action: 31 + +# State 26 +# Apply action "a2h" +action: 51 + +# State 27 +# Apply action "f4v" +action: 113 + +# State 28 +# Apply action "e3h" +action: 93 + +# State 29 +# Apply action "b7h" +action: 223 + +# State 30 +# Apply action "b1h" +action: 19 + +# State 31 +# Apply action "a5h" +action: 153 + +# State 32 +# Apply action "d8h" +action: 261 + +# State 33 +# Apply action "f6h" +action: 197 + +# State 34 +# Apply action "d1" +action: 34 + +# State 35 +# Apply action "h6h" +action: 201 + +# State 36 +# Apply action "f2h" +action: 61 + +# State 37 +# Apply action "g2v" +action: 47 + +# State 38 +# Apply action "e1" +action: 38 + +# State 39 +# Apply action "i4" +action: 2 + +# State 40 +# Board size: 9, walls: 0, 0, 2, 1 +# a b c d e f g h i +# 1 . . . . @ . | . . . 1 +# ---+--- ---+---+ ---+--- +# 2 . . . | . . . | . | . . 2 +# ---+--- +---+--- ---+---+---+--- +# 3 . . . | . | . . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | . | . . | % 4 +# ---+--- + + +# 5 # . . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . 0 . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2" +ObservationString(0) = "Board size: 9, walls: 0, 0, 2, 1\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+--- ---+---+ ---+--- \n 2 . . . | . . . | . | . . 2\n ---+--- +---+--- ---+---+---+--- \n 3 . . . | . | . . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | % 4\n ---+--- + + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . 0 . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 2, 1\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+--- ---+---+ ---+--- \n 2 . . . | . . . | . | . . 2\n ---+--- +---+--- ---+---+---+--- \n 3 . . . | . | . . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | % 4\n ---+--- + + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . 0 . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 2, 1\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+--- ---+---+ ---+--- \n 2 . . . | . . . | . | . . 2\n ---+--- +---+--- ---+---+---+--- \n 3 . . . | . | . . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | % 4\n ---+--- + + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . 0 . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 2, 1\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+--- ---+---+ ---+--- \n 2 . . . | . . . | . | . . 2\n ---+--- +---+--- ---+---+---+--- \n 3 . . . | . | . . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | % 4\n ---+--- + + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . 0 . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(2) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(3) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [34, 38] +StringLegalActions() = ["d9", "f9"] + +# Apply action "d9" +action: 34 + +# State 41 +# Board size: 9, walls: 0, 0, 2, 1 +# a b c d e f g h i +# 1 . . . . @ . | . . . 1 +# ---+--- ---+---+ ---+--- +# 2 . . . | . . . | . | . . 2 +# ---+--- +---+--- ---+---+---+--- +# 3 . . . | . | . . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | . | . . | % 4 +# ---+--- + + +# 5 # . . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . 0 . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34" +ObservationString(0) = "Board size: 9, walls: 0, 0, 2, 1\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+--- ---+---+ ---+--- \n 2 . . . | . . . | . | . . 2\n ---+--- +---+--- ---+---+---+--- \n 3 . . . | . | . . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | % 4\n ---+--- + + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 2, 1\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+--- ---+---+ ---+--- \n 2 . . . | . . . | . | . . 2\n ---+--- +---+--- ---+---+---+--- \n 3 . . . | . | . . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | % 4\n ---+--- + + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 2, 1\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+--- ---+---+ ---+--- \n 2 . . . | . . . | . | . . 2\n ---+--- +---+--- ---+---+---+--- \n 3 . . . | . | . . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | % 4\n ---+--- + + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 2, 1\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+--- ---+---+ ---+--- \n 2 . . . | . . . | . | . . 2\n ---+--- +---+--- ---+---+---+--- \n 3 . . . | . | . . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | % 4\n ---+--- + + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(2) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(3) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [38] +StringLegalActions() = ["b5"] + +# Apply action "b5" +action: 38 + +# State 42 +# Board size: 9, walls: 0, 0, 2, 1 +# a b c d e f g h i +# 1 . . . . @ . | . . . 1 +# ---+--- ---+---+ ---+--- +# 2 . . . | . . . | . | . . 2 +# ---+--- +---+--- ---+---+---+--- +# 3 . . . | . | . . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | . | . . | % 4 +# ---+--- + + +# 5 . # . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . 0 . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38" +ObservationString(0) = "Board size: 9, walls: 0, 0, 2, 1\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+--- ---+---+ ---+--- \n 2 . . . | . . . | . | . . 2\n ---+--- +---+--- ---+---+---+--- \n 3 . . . | . | . . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | % 4\n ---+--- + + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 2, 1\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+--- ---+---+ ---+--- \n 2 . . . | . . . | . | . . 2\n ---+--- +---+--- ---+---+---+--- \n 3 . . . | . | . . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | % 4\n ---+--- + + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 2, 1\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+--- ---+---+ ---+--- \n 2 . . . | . . . | . | . . 2\n ---+--- +---+--- ---+---+---+--- \n 3 . . . | . | . . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | % 4\n ---+--- + + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 2, 1\n a b c d e f g h i\n 1 . . . . @ . | . . . 1\n ---+--- ---+---+ ---+--- \n 2 . . . | . . . | . | . . 2\n ---+--- +---+--- ---+---+---+--- \n 3 . . . | . | . . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | % 4\n ---+--- + + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(2) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(3) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [34, 38, 43, 125] +StringLegalActions() = ["d1", "f1", "e2v", "d4h"] + +# Apply action "d1" +action: 34 + +# State 43 +# Board size: 9, walls: 0, 0, 2, 1 +# a b c d e f g h i +# 1 . . . @ . . | . . . 1 +# ---+--- ---+---+ ---+--- +# 2 . . . | . . . | . | . . 2 +# ---+--- +---+--- ---+---+---+--- +# 3 . . . | . | . . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | . | . . | % 4 +# ---+--- + + +# 5 . # . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . 0 . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34" +ObservationString(0) = "Board size: 9, walls: 0, 0, 2, 1\n a b c d e f g h i\n 1 . . . @ . . | . . . 1\n ---+--- ---+---+ ---+--- \n 2 . . . | . . . | . | . . 2\n ---+--- +---+--- ---+---+---+--- \n 3 . . . | . | . . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | % 4\n ---+--- + + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 2, 1\n a b c d e f g h i\n 1 . . . @ . . | . . . 1\n ---+--- ---+---+ ---+--- \n 2 . . . | . . . | . | . . 2\n ---+--- +---+--- ---+---+---+--- \n 3 . . . | . | . . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | % 4\n ---+--- + + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 2, 1\n a b c d e f g h i\n 1 . . . @ . . | . . . 1\n ---+--- ---+---+ ---+--- \n 2 . . . | . . . | . | . . 2\n ---+--- +---+--- ---+---+---+--- \n 3 . . . | . | . . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | % 4\n ---+--- + + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 2, 1\n a b c d e f g h i\n 1 . . . @ . . | . . . 1\n ---+--- ---+---+ ---+--- \n 2 . . . | . . . | . | . . 2\n ---+--- +---+--- ---+---+---+--- \n 3 . . . | . | . . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | % 4\n ---+--- + + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(1) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(2) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +ObservationTensor(3) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2, 7, 43, 70, 125] +StringLegalActions() = ["i3", "d1v", "e2v", "i5", "d4h"] + +# Apply action "i3" +action: 2 + +# State 44 +# Apply action "e9" +action: 38 + +# State 45 +# Apply action "c5" +action: 38 + +# State 46 +# Apply action "d2" +action: 70 + +# State 47 +# Apply action "d4h" +action: 125 + +# State 48 +# Apply action "f9" +action: 38 + +# State 49 +# Apply action "b5" +action: 34 + +# State 50 +# Apply action "d1" +action: 2 + +# State 51 +# Apply action "i4" +action: 70 + +# State 52 +# Apply action "g9" +action: 38 + +# State 53 +# Apply action "a5" +action: 34 + +# State 54 +# Apply action "c1" +action: 34 + +# State 55 +# Apply action "i3" +action: 2 + +# State 56 +# Apply action "h9" +action: 38 + +# State 57 +# Apply action "b5" +action: 38 + +# State 58 +# Apply action "e2v" +action: 43 + +# State 59 +# Apply action "i4" +action: 70 + +# State 60 +# Apply action "g9" +action: 34 + +# State 61 +# Apply action "c5" +action: 38 + +# State 62 +# Apply action "b1" +action: 34 + +# State 63 +# Apply action "i3" +action: 2 + +# State 64 +# Apply action "f9" +action: 34 + +# State 65 +# Apply action "b5" +action: 34 + +# State 66 +# Apply action "d1v" +action: 7 + +# State 67 +# Apply action "i4" +action: 70 + +# State 68 +# Apply action "g9" +action: 38 + +# State 69 +# Apply action "c5" +action: 38 + +# State 70 +# Apply action "c1" +action: 38 + +# State 71 +# Apply action "i3" +action: 2 + +# State 72 +# Apply action "f9" +action: 34 + +# State 73 +# Apply action "c4" +action: 2 + +# State 74 +# Apply action "b1" +action: 34 + +# State 75 +# Apply action "i4" +action: 70 + +# State 76 +# Apply action "e9" +action: 34 + +# State 77 +# Apply action "b4" +action: 34 + +# State 78 +# Apply action "c1" +action: 38 + +# State 79 +# Apply action "i5" +action: 70 + +# State 80 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . @ . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . # . . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 . . . | . . | . | . | . % 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . 0 . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . # . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . % 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . 0 . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . # . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . % 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . 0 . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . # . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . % 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . 0 . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . # . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . % 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . 0 . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [34, 38] +StringLegalActions() = ["d9", "f9"] + +# Apply action "f9" +action: 38 + +# State 81 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . @ . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . # . . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 . . . | . . | . | . | . % 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . 0 . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . # . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . % 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . 0 . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . # . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . % 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . 0 . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . # . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . % 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . 0 . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . # . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . % 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . 0 . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [34, 38] +StringLegalActions() = ["a4", "c4"] + +# Apply action "a4" +action: 34 + +# State 82 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . @ . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 # . . . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 . . . | . . | . | . | . % 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . 0 . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 # . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . % 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . 0 . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 # . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . % 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . 0 . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 # . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . % 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . 0 . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 # . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . % 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . 0 . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [34, 38] +StringLegalActions() = ["b1", "d1"] + +# Apply action "b1" +action: 34 + +# State 83 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . @ . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 # . . . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 . . . | . . | . | . | . % 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . 0 . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . @ . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 # . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . % 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . 0 . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . @ . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 # . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . % 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . 0 . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . @ . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 # . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . % 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . 0 . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . @ . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 # . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . % 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . 0 . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2, 34] +StringLegalActions() = ["i4", "h5"] + +# Apply action "h5" +action: 34 + +# State 84 +# Apply action "e9" +action: 34 + +# State 85 +# Apply action "b4" +action: 38 + +# State 86 +# Apply action "a1" +action: 34 + +# State 87 +# Apply action "h4" +action: 2 + +# State 88 +# Apply action "d9" +action: 34 + +# State 89 +# Apply action "c4" +action: 38 + +# State 90 +# Apply action "a2" +action: 70 + +# State 91 +# Apply action "h5" +action: 70 + +# State 92 +# Apply action "e9" +action: 38 + +# State 93 +# Apply action "d4" +action: 38 + +# State 94 +# Apply action "a1" +action: 2 + +# State 95 +# Apply action "h4" +action: 2 + +# State 96 +# Apply action "f9" +action: 38 + +# State 97 +# Apply action "d3" +action: 2 + +# State 98 +# Apply action "a2" +action: 70 + +# State 99 +# Apply action "g4" +action: 34 + +# State 100 +# Apply action "f8" +action: 2 + +# State 101 +# Apply action "d4" +action: 70 + +# State 102 +# Apply action "a1" +action: 2 + +# State 103 +# Apply action "g5" +action: 70 + +# State 104 +# Apply action "e8" +action: 34 + +# State 105 +# Apply action "d3" +action: 2 + +# State 106 +# Apply action "a2" +action: 70 + +# State 107 +# Apply action "g6" +action: 70 + +# State 108 +# Apply action "d8" +action: 34 + +# State 109 +# Apply action "d4" +action: 70 + +# State 110 +# Apply action "a1" +action: 2 + +# State 111 +# Apply action "f6" +action: 34 + +# State 112 +# Apply action "d7" +action: 2 + +# State 113 +# Apply action "d3" +action: 2 + +# State 114 +# Apply action "a2" +action: 70 + +# State 115 +# Apply action "f5" +action: 2 + +# State 116 +# Apply action "d8" +action: 70 + +# State 117 +# Apply action "d4" +action: 70 + +# State 118 +# Apply action "b2" +action: 38 + +# State 119 +# Apply action "f4" +action: 2 + +# State 120 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . @ . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . # | . | % | . . | . 4 +# ---+--- ---+---+ + +# 5 . . . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | 0 . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . # | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | 0 . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . # | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | 0 . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . # | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | 0 . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . # | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | 0 . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2, 38] +StringLegalActions() = ["d7", "e8"] + +# Apply action "e8" +action: 38 + +# State 121 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . @ . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . # | . | % | . . | . 4 +# ---+--- ---+---+ + +# 5 . . . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . 0 . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . # | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . 0 . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . # | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . 0 . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . # | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . 0 . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . # | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . 0 . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2, 34] +StringLegalActions() = ["d3", "c4"] + +# Apply action "d3" +action: 2 + +# State 122 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . @ . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | # | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | % | . . | . 4 +# ---+--- ---+---+ + +# 5 . . . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . 0 . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | # | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . 0 . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | # | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . 0 . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | # | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . 0 . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | # | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . 0 . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [34, 38] +StringLegalActions() = ["a2", "c2"] + +# Apply action "a2" +action: 34 + +# State 123 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 @ . . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | # | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | % | . . | . 4 +# ---+--- ---+---+ + +# 5 . . . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . 0 . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 @ . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | # | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . 0 . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 @ . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | # | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . 0 . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 @ . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | # | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . 0 . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 @ . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | # | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . 0 . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [70] +StringLegalActions() = ["f5"] + +# Apply action "f5" +action: 70 + +# State 124 +# Apply action "f8" +action: 38 + +# State 125 +# Apply action "d4" +action: 70 + +# State 126 +# Apply action "b2" +action: 38 + +# State 127 +# Apply action "f6" +action: 70 + +# State 128 +# Apply action "f9" +action: 70 + +# State 129 +# Apply action "d3" +action: 2 + +# State 130 +# Apply action "a2" +action: 34 + +# State 131 +# Apply action "e6" +action: 34 + +# State 132 +# Apply action "f8" +action: 2 + +# State 133 +# Apply action "d4" +action: 70 + +# State 134 +# Apply action "b2" +action: 38 + +# State 135 +# Apply action "e7" +action: 70 + +# State 136 +# Apply action "f9" +action: 70 + +# State 137 +# Apply action "c4" +action: 34 + +# State 138 +# Apply action "c2" +action: 38 + +# State 139 +# Apply action "e6" +action: 2 + +# State 140 +# Apply action "g9" +action: 38 + +# State 141 +# Apply action "d4" +action: 38 + +# State 142 +# Apply action "b2" +action: 34 + +# State 143 +# Apply action "f6" +action: 38 + +# State 144 +# Apply action "f9" +action: 34 + +# State 145 +# Apply action "c4" +action: 34 + +# State 146 +# Apply action "c2" +action: 38 + +# State 147 +# Apply action "g6" +action: 38 + +# State 148 +# Apply action "g9" +action: 38 + +# State 149 +# Apply action "d4" +action: 38 + +# State 150 +# Apply action "b2" +action: 34 + +# State 151 +# Apply action "g5" +action: 2 + +# State 152 +# Apply action "f9" +action: 34 + +# State 153 +# Apply action "d3" +action: 2 + +# State 154 +# Apply action "c2" +action: 38 + +# State 155 +# Apply action "g6" +action: 70 + +# State 156 +# Apply action "f8" +action: 2 + +# State 157 +# Apply action "d4" +action: 70 + +# State 158 +# Apply action "b2" +action: 34 + +# State 159 +# Apply action "f6" +action: 34 + +# State 160 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . @ . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . # | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 . . . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . % . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . 0 . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . # | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . % . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . 0 . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . # | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . % . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . 0 . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . # | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . % . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . 0 . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . # | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . % . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . 0 . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [34, 38, 70] +StringLegalActions() = ["e8", "g8", "f9"] + +# Apply action "g8" +action: 38 + +# State 161 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . @ . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . # | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 . . . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . % . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . 0 | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . # | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . % . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . 0 | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . # | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . % . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . 0 | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . # | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . % . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . 0 | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . # | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . % . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . 0 | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2, 34] +StringLegalActions() = ["d3", "c4"] + +# Apply action "c4" +action: 34 + +# State 162 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . @ . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . # . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 . . . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . % . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . 0 | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . # . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . % . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . 0 | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . # . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . % . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . 0 | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . # . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . % . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . 0 | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . @ . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . # . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . % . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . 0 | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [34, 38] +StringLegalActions() = ["a2", "c2"] + +# Apply action "a2" +action: 34 + +# State 163 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 @ . . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . # . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 . . . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . % . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . 0 | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 @ . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . # . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . % . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . 0 | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 @ . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . # . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . % . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . 0 | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 @ . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . # . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . % . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . 0 | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 @ . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . # . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . % . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . 0 | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2, 34, 38] +StringLegalActions() = ["f5", "e6", "g6"] + +# Apply action "g6" +action: 38 + +# State 164 +# Apply action "f8" +action: 34 + +# State 165 +# Apply action "c5" +action: 70 + +# State 166 +# Apply action "b2" +action: 38 + +# State 167 +# Apply action "f6" +action: 34 + +# State 168 +# Apply action "f9" +action: 70 + +# State 169 +# Apply action "b5" +action: 34 + +# State 170 +# Apply action "a2" +action: 34 + +# State 171 +# Apply action "g6" +action: 38 + +# State 172 +# Apply action "e9" +action: 34 + +# State 173 +# Apply action "a5" +action: 34 + +# State 174 +# Apply action "a1" +action: 2 + +# State 175 +# Apply action "f6" +action: 34 + +# State 176 +# Apply action "d9" +action: 34 + +# State 177 +# Apply action "b5" +action: 38 + +# State 178 +# Apply action "b1" +action: 38 + +# State 179 +# Apply action "f5" +action: 2 + +# State 180 +# Apply action "c9" +action: 34 + +# State 181 +# Apply action "c5" +action: 38 + +# State 182 +# Apply action "c1" +action: 38 + +# State 183 +# Apply action "f6" +action: 70 + +# State 184 +# Apply action "d9" +action: 38 + +# State 185 +# Apply action "c6" +action: 70 + +# State 186 +# Apply action "b1" +action: 34 + +# State 187 +# Apply action "e6" +action: 34 + +# State 188 +# Apply action "e9" +action: 38 + +# State 189 +# Apply action "c5" +action: 2 + +# State 190 +# Apply action "c1" +action: 38 + +# State 191 +# Apply action "e7" +action: 70 + +# State 192 +# Apply action "d9" +action: 34 + +# State 193 +# Apply action "b5" +action: 34 + +# State 194 +# Apply action "d1" +action: 38 + +# State 195 +# Apply action "e6" +action: 2 + +# State 196 +# Apply action "c9" +action: 34 + +# State 197 +# Apply action "c5" +action: 38 + +# State 198 +# Apply action "c1" +action: 34 + +# State 199 +# Apply action "e7" +action: 70 + +# State 200 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . @ . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 . . # | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | % . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . 0 . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . # | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . 0 . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . # | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . 0 . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . # | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . 0 . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . # | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . 0 . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [34, 38] +StringLegalActions() = ["b9", "d9"] + +# Apply action "d9" +action: 38 + +# State 201 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . @ . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 . . # | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | % . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . 0 . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . # | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . # | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . # | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . . # | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2, 34, 70] +StringLegalActions() = ["c4", "b5", "c6"] + +# Apply action "b5" +action: 34 + +# State 202 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . @ . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 . # . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | % . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . 0 . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . @ . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [34, 38] +StringLegalActions() = ["b1", "d1"] + +# Apply action "d1" +action: 38 + +# State 203 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . @ | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 . # . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | % . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . 0 . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . 0 . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2, 38] +StringLegalActions() = ["e6", "f7"] + +# Apply action "f7" +action: 38 + +# State 204 +# Apply action "e9" +action: 38 + +# State 205 +# Apply action "c5" +action: 38 + +# State 206 +# Apply action "c1" +action: 34 + +# State 207 +# Apply action "e7" +action: 34 + +# State 208 +# Apply action "d9" +action: 34 + +# State 209 +# Apply action "b5" +action: 34 + +# State 210 +# Apply action "d1" +action: 38 + +# State 211 +# Apply action "f7" +action: 38 + +# State 212 +# Apply action "c9" +action: 34 + +# State 213 +# Apply action "a5" +action: 34 + +# State 214 +# Apply action "d2" +action: 70 + +# State 215 +# Apply action "e7" +action: 34 + +# State 216 +# Apply action "d9" +action: 38 + +# State 217 +# Apply action "b5" +action: 38 + +# State 218 +# Apply action "d1" +action: 2 + +# State 219 +# Apply action "e6" +action: 2 + +# State 220 +# Apply action "e9" +action: 38 + +# State 221 +# Apply action "c5" +action: 38 + +# State 222 +# Apply action "c1" +action: 34 + +# State 223 +# Apply action "f6" +action: 38 + +# State 224 +# Apply action "d9" +action: 34 + +# State 225 +# Apply action "c6" +action: 70 + +# State 226 +# Apply action "d1" +action: 38 + +# State 227 +# Apply action "f5" +action: 2 + +# State 228 +# Apply action "c9" +action: 34 + +# State 229 +# Apply action "c5" +action: 2 + +# State 230 +# Apply action "c1" +action: 34 + +# State 231 +# Apply action "f6" +action: 70 + +# State 232 +# Apply action "b9" +action: 34 + +# State 233 +# Apply action "b5" +action: 34 + +# State 234 +# Apply action "d1" +action: 38 + +# State 235 +# Apply action "f5" +action: 2 + +# State 236 +# Apply action "a9" +action: 34 + +# State 237 +# Apply action "a5" +action: 34 + +# State 238 +# Apply action "d2" +action: 70 + +# State 239 +# Apply action "f4" +action: 2 + +# State 240 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | @ | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | % | . . | . 4 +# ---+--- ---+---+ + +# 5 # . . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 0 . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 0 . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 0 . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 0 . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 0 . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2, 38] +StringLegalActions() = ["a8", "b9"] + +# Apply action "a8" +action: 2 + +# State 241 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | @ | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | % | . . | . 4 +# ---+--- ---+---+ + +# 5 # . . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 0 | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 0 | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 0 | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 0 | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 0 | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [38] +StringLegalActions() = ["b5"] + +# Apply action "b5" +action: 38 + +# State 242 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | @ | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | % | . . | . 4 +# ---+--- ---+---+ + +# 5 . # . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 0 | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 0 | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 0 | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 0 | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 0 | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2] +StringLegalActions() = ["d1"] + +# Apply action "d1" +action: 2 + +# State 243 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . @ | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | % | . . | . 4 +# ---+--- ---+---+ + +# 5 . # . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 0 | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 0 | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 0 | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 0 | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | % | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 0 | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [70] +StringLegalActions() = ["f5"] + +# Apply action "f5" +action: 70 + +# State 244 +# Apply action "a7" +action: 2 + +# State 245 +# Apply action "a5" +action: 34 + +# State 246 +# Apply action "d2" +action: 70 + +# State 247 +# Apply action "f4" +action: 2 + +# State 248 +# Apply action "a6" +action: 2 + +# State 249 +# Apply action "b5" +action: 38 + +# State 250 +# Apply action "d1" +action: 2 + +# State 251 +# Apply action "f5" +action: 70 + +# State 252 +# Apply action "b6" +action: 38 + +# State 253 +# Apply action "a5" +action: 34 + +# State 254 +# Apply action "d2" +action: 70 + +# State 255 +# Apply action "f6" +action: 70 + +# State 256 +# Apply action "a6" +action: 34 + +# State 257 +# Apply action "b5" +action: 38 + +# State 258 +# Apply action "d1" +action: 2 + +# State 259 +# Apply action "e6" +action: 34 + +# State 260 +# Apply action "b6" +action: 38 + +# State 261 +# Apply action "a5" +action: 34 + +# State 262 +# Apply action "d2" +action: 70 + +# State 263 +# Apply action "e7" +action: 70 + +# State 264 +# Apply action "a6" +action: 34 + +# State 265 +# Apply action "b5" +action: 38 + +# State 266 +# Apply action "d1" +action: 2 + +# State 267 +# Apply action "f7" +action: 38 + +# State 268 +# Apply action "b6" +action: 38 + +# State 269 +# Apply action "c5" +action: 38 + +# State 270 +# Apply action "d2" +action: 70 + +# State 271 +# Apply action "e7" +action: 34 + +# State 272 +# Apply action "a6" +action: 34 + +# State 273 +# Apply action "b5" +action: 34 + +# State 274 +# Apply action "d1" +action: 2 + +# State 275 +# Apply action "e6" +action: 2 + +# State 276 +# Apply action "b6" +action: 38 + +# State 277 +# Apply action "a5" +action: 34 + +# State 278 +# Apply action "d2" +action: 70 + +# State 279 +# Apply action "e7" +action: 70 + +# State 280 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | @ | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 # . . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . 0 . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | % . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . 0 . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . 0 . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . 0 . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . 0 . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [34, 38] +StringLegalActions() = ["a6", "c6"] + +# Apply action "c6" +action: 38 + +# State 281 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | @ | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 # . . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . 0 | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | % . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . 0 | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . 0 | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . 0 | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . 0 | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [38] +StringLegalActions() = ["b5"] + +# Apply action "b5" +action: 38 + +# State 282 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | @ | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 . # . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . 0 | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | % . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . 0 | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . 0 | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . 0 | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . 0 | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2] +StringLegalActions() = ["d1"] + +# Apply action "d1" +action: 2 + +# State 283 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . @ | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 . # . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . 0 | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | % . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . 0 | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . 0 | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . 0 | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . 0 | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2, 38] +StringLegalActions() = ["e6", "f7"] + +# Apply action "e6" +action: 2 + +# State 284 +# Apply action "b6" +action: 34 + +# State 285 +# Apply action "c5" +action: 38 + +# State 286 +# Apply action "d2" +action: 70 + +# State 287 +# Apply action "f6" +action: 38 + +# State 288 +# Apply action "c6" +action: 38 + +# State 289 +# Apply action "c4" +action: 2 + +# State 290 +# Apply action "d1" +action: 2 + +# State 291 +# Apply action "f5" +action: 2 + +# State 292 +# Apply action "c5" +action: 2 + +# State 293 +# Apply action "c6" +action: 70 + +# State 294 +# Apply action "c1" +action: 34 + +# State 295 +# Apply action "f6" +action: 70 + +# State 296 +# Apply action "c4" +action: 2 + +# State 297 +# Apply action "c5" +action: 2 + +# State 298 +# Apply action "d1" +action: 38 + +# State 299 +# Apply action "e6" +action: 34 + +# State 300 +# Apply action "d4" +action: 38 + +# State 301 +# Apply action "c4" +action: 2 + +# State 302 +# Apply action "c1" +action: 34 + +# State 303 +# Apply action "e7" +action: 70 + +# State 304 +# Apply action "d3" +action: 2 + +# State 305 +# Apply action "c5" +action: 70 + +# State 306 +# Apply action "b1" +action: 34 + +# State 307 +# Apply action "e6" +action: 2 + +# State 308 +# Apply action "d4" +action: 70 + +# State 309 +# Apply action "c4" +action: 2 + +# State 310 +# Apply action "c1" +action: 38 + +# State 311 +# Apply action "e7" +action: 70 + +# State 312 +# Apply action "d3" +action: 2 + +# State 313 +# Apply action "c5" +action: 70 + +# State 314 +# Apply action "d1" +action: 38 + +# State 315 +# Apply action "e6" +action: 2 + +# State 316 +# Apply action "d4" +action: 70 + +# State 317 +# Apply action "b5" +action: 34 + +# State 318 +# Apply action "d2" +action: 70 + +# State 319 +# Apply action "e7" +action: 70 + +# State 320 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | @ | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | . | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . 0 | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 . # . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | % . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . 0 | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . 0 | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . 0 | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | . | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . 0 | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2, 34] +StringLegalActions() = ["d3", "c4"] + +# Apply action "d3" +action: 2 + +# State 321 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | @ | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | 0 | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 . # . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | % . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | 0 | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | 0 | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | 0 | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | 0 | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 . # . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [34, 38] +StringLegalActions() = ["a5", "c5"] + +# Apply action "a5" +action: 34 + +# State 322 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . . | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | @ | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | 0 | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 # . . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | % . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | 0 | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | 0 | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | 0 | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . . | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | @ | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | 0 | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2] +StringLegalActions() = ["d1"] + +# Apply action "d1" +action: 2 + +# State 323 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . @ | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | 0 | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 # . . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | . . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | % . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = False +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34, 2] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34, 2" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34, 2" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34, 2" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34, 2" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | 0 | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | 0 | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | 0 | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | 0 | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | . . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | % . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2, 38] +StringLegalActions() = ["e6", "f7"] + +# Apply action "e6" +action: 2 + +# State 324 +# Board size: 9, walls: 0, 0, 0, 0 +# a b c d e f g h i +# 1 . . . @ | . . | . . . 1 +# ---+--- +---+---+ ---+--- +# 2 . . . | . | . | . | . | . . 2 +# ---+--- +---+---+---+---+---+--- +# 3 . . . | 0 | . | . . | . | . 3 +# ---+--- +---+--- ---+---+ +# 4 . . . . | . | . | . . | . 4 +# ---+--- ---+---+ + +# 5 # . . | . . | . | . | . . 5 +# ---+--- +---+--- +---+--- +# 6 . . . | . | % . . | . . 6 +# ---+--- + ---+--- ---+--- +# 7 . | . . | . | . . . | . . 7 +# +---+---+ ---+--- +---+--- +# 8 . | . . | . . . . | . | . 8 +# ---+--- ---+--- ---+---+ +# 9 . . . . . . . . | . 9 +# a b c d e f g h i +IsTerminal() = True +History() = [141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34, 2, 2] +HistoryString() = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34, 2, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34, 2, 2" +InformationStateString(1) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34, 2, 2" +InformationStateString(2) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34, 2, 2" +InformationStateString(3) = "141, 149, 25, 111, 57, 167, 39, 159, 87, 11, 209, 189, 205, 229, 257, 75, 177, 97, 253, 217, 83, 267, 235, 119, 65, 31, 51, 113, 93, 223, 19, 153, 261, 197, 34, 201, 61, 47, 38, 2, 34, 38, 34, 2, 38, 38, 70, 125, 38, 34, 2, 70, 38, 34, 34, 2, 38, 38, 43, 70, 34, 38, 34, 2, 34, 34, 7, 70, 38, 38, 38, 2, 34, 2, 34, 70, 34, 34, 38, 70, 38, 34, 34, 34, 34, 38, 34, 2, 34, 38, 70, 70, 38, 38, 2, 2, 38, 2, 70, 34, 2, 70, 2, 70, 34, 2, 70, 70, 34, 70, 2, 34, 2, 2, 70, 2, 70, 70, 38, 2, 38, 2, 34, 70, 38, 70, 38, 70, 70, 2, 34, 34, 2, 70, 38, 70, 70, 34, 38, 2, 38, 38, 34, 38, 34, 34, 38, 38, 38, 38, 34, 2, 34, 2, 38, 70, 2, 70, 34, 34, 38, 34, 34, 38, 34, 70, 38, 34, 70, 34, 34, 38, 34, 34, 2, 34, 34, 38, 38, 2, 34, 38, 38, 70, 38, 70, 34, 34, 38, 2, 38, 70, 34, 34, 38, 2, 34, 38, 34, 70, 38, 34, 38, 38, 38, 38, 34, 34, 34, 34, 38, 38, 34, 34, 70, 34, 38, 38, 2, 2, 38, 38, 34, 38, 34, 70, 38, 2, 34, 2, 34, 70, 34, 34, 38, 2, 34, 34, 70, 2, 2, 38, 2, 70, 2, 34, 70, 2, 2, 38, 2, 70, 38, 34, 70, 70, 34, 38, 2, 34, 38, 34, 70, 70, 34, 38, 2, 38, 38, 38, 70, 34, 34, 34, 2, 2, 38, 34, 70, 70, 38, 38, 2, 2, 34, 38, 70, 38, 38, 2, 2, 2, 2, 70, 34, 70, 2, 2, 38, 34, 38, 2, 34, 70, 2, 70, 34, 2, 70, 2, 38, 70, 2, 70, 38, 2, 70, 34, 70, 70, 2, 34, 2, 2" +ObservationString(0) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | 0 | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | % . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(1) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | 0 | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | % . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(2) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | 0 | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | % . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationString(3) = "Board size: 9, walls: 0, 0, 0, 0\n a b c d e f g h i\n 1 . . . @ | . . | . . . 1\n ---+--- +---+---+ ---+--- \n 2 . . . | . | . | . | . | . . 2\n ---+--- +---+---+---+---+---+--- \n 3 . . . | 0 | . | . . | . | . 3\n ---+--- +---+--- ---+---+ \n 4 . . . . | . | . | . . | . 4\n ---+--- ---+---+ + \n 5 # . . | . . | . | . | . . 5\n ---+--- +---+--- +---+--- \n 6 . . . | . | % . . | . . 6\n ---+--- + ---+--- ---+--- \n 7 . | . . | . | . . . | . . 7\n +---+---+ ---+--- +---+--- \n 8 . | . . | . . . . | . | . 8\n ---+--- ---+--- ---+---+ \n 9 . . . . . . . . | . 9\n a b c d e f g h i\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(2): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(3): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◉◯◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◉◯◉◯◉◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◉◉◉◉◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯◉◯◉◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◉◉◉◉◯◯◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◯◉◯◯◉◉◉◯◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◉◉◉◯◯◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◉◯◯◯◯◯◯◯◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◉◯◉◉◉◯◯◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/rbc(board_size=4).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/rbc(board_size=4).txt new file mode 100644 index 0000000..dc7f578 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/rbc(board_size=4).txt @@ -0,0 +1,1940 @@ +game: rbc(board_size=4) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Reconnaisance Blind Chess" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["board_size", "fen", "sense_size"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "rbc" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 4674 +PolicyTensorShape() = [4674] +MaxChanceOutcomes() = 0 +GetParameters() = {board_size=4,fen=r1kr/pppp/PPPP/R1KR w - - 0 1,sense_size=3} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = pieces_black: [9], pieces_white: [9], phase: [2], capture: [2], side_to_play: [2], illegal_move: [2], private_k_pieces: [4, 4], private_q_pieces: [4, 4], private_r_pieces: [4, 4], private_b_pieces: [4, 4], private_n_pieces: [4, 4], private_p_pieces: [4, 4], private_left_castling: [2], private_right_castling: [2], private_sense_K_pieces: [4, 4], private_sense_Q_pieces: [4, 4], private_sense_R_pieces: [4, 4], private_sense_B_pieces: [4, 4], private_sense_N_pieces: [4, 4], private_sense_P_pieces: [4, 4] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 222 +MaxGameLength() = 17695 +ToString() = "rbc(board_size=4)" + +# State 0 +# r1kr/pppp/PPPP/R1KR w - - 0 1 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "r1kr/pppp/4/4 - s - w -" +ObservationString(1) = "4/4/PPPP/R1KR - s - w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◉◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◉◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◉◯ + ◯◯◉◯ + ◯◯◉◯ + ◯◯◉◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◉◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◉◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◉◯◯ + ◯◉◯◯ + ◯◉◯◯ + ◯◉◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense a2", "Sense b2"] + +# Apply action "Sense a2" +action: 2 + +# State 1 +# r1kr/pppp/PPPP/R1KR w - - 0 1 +IsTerminal() = False +History() = [2] +HistoryString() = "2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "r1kr/pppp/4/4 - m - w -" +ObservationString(1) = "r k1/ppp1/PPPP/R1KR - m - w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◉◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◉◯ +ObservationTensor(0).phase: ◉◯ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◉◯ + ◯◯◉◯ + ◯◯◉◯ + ◯◯◉◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◉◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◉◯ +ObservationTensor(1).phase: ◉◯ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◉◯◯ + ◯◉◯◯ + ◯◉◯◯ + ◯◉◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◉◯ + ◯◯◉◯ + ◯◯◉◯ + ◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 30, 89, 117, 673, 701, 714, 1197, 1257, 1285, 1298, 1841, 1882] +StringLegalActions() = ["pass", "a1b1", "a2a3", "a2b3", "b2b3", "b2c3", "b2a3", "c1b1", "c2c3", "c2d3", "c2b3", "d2d3", "d2c3"] + +# Apply action "a2b3" +action: 117 + +# State 2 +# r1kr/pPpp/1PPP/R1KR b - - 0 1 +IsTerminal() = False +History() = [2, 117] +HistoryString() = "2, 117" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "r1kr/p1pp/4/4 - s c b -" +ObservationString(1) = "4/1P2/1PPP/R1KR - s c b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◉◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◉◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◯◉ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◉◯ + ◯◯◉◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◉◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◉◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◯◉ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◉◯◯ + ◯◉◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense a2", "Sense b2"] + +# Apply action "Sense b1" +action: 1 + +# State 3 +# r1kr/pPpp/1PPP/R1KR b - - 0 1 +IsTerminal() = False +History() = [2, 117, 1] +HistoryString() = "2, 117, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "r1kr/pPpp/1PPP/1 KR - m - b -" +ObservationString(1) = "4/1P2/1PPP/R1KR - m - b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◉◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◉◯ +ObservationTensor(0).phase: ◉◯ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◉◯ + ◯◯◉◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◉◯◯ + ◯◉◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◉◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◉◯ +ObservationTensor(1).phase: ◉◯ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◉◯◯ + ◯◉◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 30, 89, 117, 1197, 1225, 1257, 1285, 1298, 1841, 1882] +StringLegalActions() = ["pass", "a4b4", "a3a2", "a3b2", "c4b4", "c4b3", "c3c2", "c3d2", "c3b2", "d3d2", "d3c2"] + +# Apply action "a4b4" +action: 30 + +# State 4 +# 1rkr/pPpp/1PPP/R1KR w - - 1 2 +IsTerminal() = False +History() = [2, 117, 1, 30] +HistoryString() = "2, 117, 1, 30" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1rkr/p1pp/4/4 - s - w -" +ObservationString(1) = "4/1P2/1PPP/R1KR - s - w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◉◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◉◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◉◯ + ◯◯◉◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◉◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◉◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◉◯◯ + ◯◉◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense a2", "Sense b2"] + +# Apply action "Sense a1" +action: 0 + +# State 5 +# Apply action "a1b1" +action: 30 + +# State 6 +# 1rkr/pPpp/1PPP/1RKR b - - 2 2 +IsTerminal() = False +History() = [2, 117, 1, 30, 0, 30] +HistoryString() = "2, 117, 1, 30, 0, 30" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1rkr/p1pp/4/4 - s - b -" +ObservationString(1) = "4/1P2/1PPP/1RKR - s - b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◉◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◉◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯ + ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◉◯ + ◯◯◉◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◉◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◉◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◉◉◯ + ◯◉◯◯ + ◯◉◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense a2", "Sense b2"] + +# Apply action "Sense a1" +action: 0 + +# State 7 +# Apply action "b4b1" +action: 602 + +# State 8 +# Apply action "Sense a1" +action: 0 + +# State 9 +# Apply action "b1a1" +action: 613 + +# State 10 +# Apply action "Sense a2" +action: 2 + +# State 11 +# Apply action "c4b4" +action: 1197 + +# State 12 +# Apply action "Sense b1" +action: 1 + +# State 13 +# Apply action "c2d3" +action: 1285 + +# State 14 +# Apply action "Sense b1" +action: 1 + +# State 15 +# Apply action "d4c4" +action: 1781 + +# State 16 +# Apply action "Sense a2" +action: 2 + +# State 17 +# Apply action "d3d4q" +action: 1914 + +# State 18 +# Apply action "Sense a1" +action: 0 + +# State 19 +# Apply action "b4a4" +action: 613 + +# State 20 +# k1rQ/prp1/1P1P/R1KR w - - 1 6 +IsTerminal() = False +History() = [2, 117, 1, 30, 0, 30, 0, 602, 0, 613, 2, 1197, 1, 1285, 1, 1781, 2, 1914, 0, 613] +HistoryString() = "2, 117, 1, 30, 0, 30, 0, 602, 0, 613, 2, 1197, 1, 1285, 1, 1781, 2, 1914, 0, 613" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "k1r1/prp1/4/4 - s - w -" +ObservationString(1) = "3Q/4/1P1P/R1KR - s - w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◉◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◉◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯ + ◯◯◉◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◉◯ + ◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◉◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◉◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(1).private_r_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◉◯◯ + ◯◯◯◯ + ◯◉◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense a2", "Sense b2"] + +# Apply action "Sense a2" +action: 2 + +# State 21 +# Apply action "b2a3" +action: 714 + +# State 22 +# k1rQ/Prp1/3P/R1KR b - - 0 6 +IsTerminal() = False +History() = [2, 117, 1, 30, 0, 30, 0, 602, 0, 613, 2, 1197, 1, 1285, 1, 1781, 2, 1914, 0, 613, 2, 714] +HistoryString() = "2, 117, 1, 30, 0, 30, 0, 602, 0, 613, 2, 1197, 1, 1285, 1, 1781, 2, 1914, 0, 613, 2, 714" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "k1r1/1rp1/4/4 - s c b -" +ObservationString(1) = "3Q/P3/3P/R1KR - s c b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◉◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◉◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◯◉ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯ + ◯◯◉◯ + ◯◯◯◉ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◉◯ + ◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◉◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◉◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◯◉ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◉ +ObservationTensor(1).private_r_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◉◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense a2", "Sense b2"] + +# Apply action "Sense b1" +action: 1 + +# State 23 +# Apply action "b3a3" +action: 686 + +# State 24 +# Apply action "Sense b2" +action: 3 + +# State 25 +# Apply action "a1a2" +action: 16 + +# State 26 +# Apply action "Sense b2" +action: 3 + +# State 27 +# Apply action "a3a2" +action: 89 + +# State 28 +# Apply action "Sense b2" +action: 3 + +# State 29 +# Apply action "d4a4" +action: 1998 + +# State 30 +# Apply action "Sense b2" +action: 3 + +# State 31 +# Apply action "a2b2" +action: 176 + +# State 32 +# Apply action "Sense b2" +action: 3 + +# State 33 +# Apply action "c4b3" +action: 1430 + +# State 34 +# Apply action "Sense b2" +action: 3 + +# State 35 +# Apply action "c3c2" +action: 1257 + +# State 36 +# Apply action "Sense b2" +action: 3 + +# State 37 +# Apply action "b3c3" +action: 760 + +# State 38 +# Apply action "Sense b2" +action: 3 + +# State 39 +# Apply action "c2d1n" +action: 1321 + +# State 40 +# k3/2Q1/1r1P/2Kn w - - 0 11 +IsTerminal() = False +History() = [2, 117, 1, 30, 0, 30, 0, 602, 0, 613, 2, 1197, 1, 1285, 1, 1781, 2, 1914, 0, 613, 2, 714, 1, 686, 3, 16, 3, 89, 3, 1998, 3, 176, 3, 1430, 3, 1257, 3, 760, 3, 1321] +HistoryString() = "2, 117, 1, 30, 0, 30, 0, 602, 0, 613, 2, 1197, 1, 1285, 1, 1781, 2, 1914, 0, 613, 2, 714, 1, 686, 3, 16, 3, 89, 3, 1998, 3, 176, 3, 1430, 3, 1257, 3, 760, 3, 1321" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "k3/4/1r2/3n - s c w -" +ObservationString(1) = "4/2Q1/3P/2K1 - s c w -" +ObservationTensor(0).pieces_black: ◯◯◯◉◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◉◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◯◉ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯ + ◯◉◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◉◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◉◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◯◉ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◉◯ + ◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◉◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense a2", "Sense b2"] + +# Apply action "Sense b2" +action: 3 + +# State 41 +# Apply action "c1d1" +action: 1198 + +# State 42 +# k3/2Q1/1r1P/3K b - - 0 11 +IsTerminal() = False +History() = [2, 117, 1, 30, 0, 30, 0, 602, 0, 613, 2, 1197, 1, 1285, 1, 1781, 2, 1914, 0, 613, 2, 714, 1, 686, 3, 16, 3, 89, 3, 1998, 3, 176, 3, 1430, 3, 1257, 3, 760, 3, 1321, 3, 1198] +HistoryString() = "2, 117, 1, 30, 0, 30, 0, 602, 0, 613, 2, 1197, 1, 1285, 1, 1781, 2, 1914, 0, 613, 2, 714, 1, 686, 3, 16, 3, 89, 3, 1998, 3, 176, 3, 1430, 3, 1257, 3, 760, 3, 1321, 3, 1198" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "k3/4/1r2/4 - s c b -" +ObservationString(1) = "4/2Q1/3P/3K - s c b -" +ObservationTensor(0).pieces_black: ◯◯◉◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◉◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◯◉ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◉ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯ + ◯◉◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◉◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◉◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◯◉ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◉◯ + ◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◉◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense a2", "Sense b2"] + +# Apply action "Sense b2" +action: 3 + +# State 43 +# Apply action "a4a3" +action: 16 + +# State 44 +# Apply action "Sense a2" +action: 2 + +# State 45 +# Apply action "c3c1" +action: 1328 + +# State 46 +# Apply action "Sense a2" +action: 2 + +# State 47 +# Apply action "b2b3" +action: 745 + +# State 48 +# Apply action "Sense b2" +action: 3 + +# State 49 +# Apply action "d2c3" +action: 1882 + +# State 50 +# Apply action "Sense b2" +action: 3 + +# State 51 +# Apply action "b3b4" +action: 672 + +# State 52 +# Apply action "Sense b2" +action: 3 + +# State 53 +# Apply action "c1b1" +action: 1197 + +# State 54 +# Apply action "Sense b2" +action: 3 + +# State 55 +# Apply action "b4b1" +action: 602 + +# State 56 +# Apply action "Sense b1" +action: 1 + +# State 57 +# Apply action "d2c3" +action: 1882 + +# State 58 +# Apply action "Sense a1" +action: 0 + +# State 59 +# Apply action "b1a1" +action: 832 + +# State 60 +# 4/k3/3P/r2K w - - 1 16 +IsTerminal() = False +History() = [2, 117, 1, 30, 0, 30, 0, 602, 0, 613, 2, 1197, 1, 1285, 1, 1781, 2, 1914, 0, 613, 2, 714, 1, 686, 3, 16, 3, 89, 3, 1998, 3, 176, 3, 1430, 3, 1257, 3, 760, 3, 1321, 3, 1198, 3, 16, 2, 1328, 2, 745, 3, 1882, 3, 672, 3, 1197, 3, 602, 1, 1882, 0, 832] +HistoryString() = "2, 117, 1, 30, 0, 30, 0, 602, 0, 613, 2, 1197, 1, 1285, 1, 1781, 2, 1914, 0, 613, 2, 714, 1, 686, 3, 16, 3, 89, 3, 1998, 3, 176, 3, 1430, 3, 1257, 3, 760, 3, 1321, 3, 1198, 3, 16, 2, 1328, 2, 745, 3, 1882, 3, 672, 3, 1197, 3, 602, 1, 1882, 0, 832" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "4/k3/4/r3 - s - w -" +ObservationString(1) = "4/4/3P/3K - s - w -" +ObservationTensor(0).pieces_black: ◯◯◉◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◉◯◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◉◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◉◯◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◉◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◉◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense a2", "Sense b2"] + +# Apply action "Sense b1" +action: 1 + +# State 61 +# Apply action "d1c2" +action: 1809 + +# State 62 +# 4/k3/2KP/r3 b - - 2 16 +IsTerminal() = False +History() = [2, 117, 1, 30, 0, 30, 0, 602, 0, 613, 2, 1197, 1, 1285, 1, 1781, 2, 1914, 0, 613, 2, 714, 1, 686, 3, 16, 3, 89, 3, 1998, 3, 176, 3, 1430, 3, 1257, 3, 760, 3, 1321, 3, 1198, 3, 16, 2, 1328, 2, 745, 3, 1882, 3, 672, 3, 1197, 3, 602, 1, 1882, 0, 832, 1, 1809] +HistoryString() = "2, 117, 1, 30, 0, 30, 0, 602, 0, 613, 2, 1197, 1, 1285, 1, 1781, 2, 1914, 0, 613, 2, 714, 1, 686, 3, 16, 3, 89, 3, 1998, 3, 176, 3, 1430, 3, 1257, 3, 760, 3, 1321, 3, 1198, 3, 16, 2, 1328, 2, 745, 3, 1882, 3, 672, 3, 1197, 3, 602, 1, 1882, 0, 832, 1, 1809" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "4/k3/4/r3 - s - b -" +ObservationString(1) = "4/4/2KP/4 - s - b -" +ObservationTensor(0).pieces_black: ◯◯◉◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◉◯◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◉◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◉◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◉◯◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◉◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◉◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense a2", "Sense b2"] + +# Apply action "Sense a2" +action: 2 + +# State 63 +# Apply action "a1b1" +action: 249 + +# State 64 +# Apply action "Sense b1" +action: 1 + +# State 65 +# Apply action "c2b2" +action: 1270 + +# State 66 +# Apply action "Sense b2" +action: 3 + +# State 67 +# Apply action "b1b3" +action: 817 + +# State 68 +# 4/k3/1r1P/4 w - - 0 18 +IsTerminal() = True +History() = [2, 117, 1, 30, 0, 30, 0, 602, 0, 613, 2, 1197, 1, 1285, 1, 1781, 2, 1914, 0, 613, 2, 714, 1, 686, 3, 16, 3, 89, 3, 1998, 3, 176, 3, 1430, 3, 1257, 3, 760, 3, 1321, 3, 1198, 3, 16, 2, 1328, 2, 745, 3, 1882, 3, 672, 3, 1197, 3, 602, 1, 1882, 0, 832, 1, 1809, 2, 249, 1, 1270, 3, 817] +HistoryString() = "2, 117, 1, 30, 0, 30, 0, 602, 0, 613, 2, 1197, 1, 1285, 1, 1781, 2, 1914, 0, 613, 2, 714, 1, 686, 3, 16, 3, 89, 3, 1998, 3, 176, 3, 1430, 3, 1257, 3, 760, 3, 1321, 3, 1198, 3, 16, 2, 1328, 2, 745, 3, 1882, 3, 672, 3, 1197, 3, 602, 1, 1882, 0, 832, 1, 1809, 2, 249, 1, 1270, 3, 817" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "4/k3/1r2/4 - s c w -" +ObservationString(1) = "4/4/3P/4 - s c w -" +ObservationTensor(0).pieces_black: ◯◯◉◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◉◯◯◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◯◉ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◉◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯ + ◯◉◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◉◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◉◯◯◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◯◉ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◉◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ + ◯◯◯◯ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/rbc.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/rbc.txt new file mode 100644 index 0000000..2ec7475 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/rbc.txt @@ -0,0 +1,7658 @@ +game: rbc + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Reconnaisance Blind Chess" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["board_size", "fen", "sense_size"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "rbc" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 4674 +PolicyTensorShape() = [4674] +MaxChanceOutcomes() = 0 +GetParameters() = {board_size=8,fen=rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1,sense_size=3} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = pieces_black: [17], pieces_white: [17], phase: [2], capture: [2], side_to_play: [2], illegal_move: [2], private_k_pieces: [8, 8], private_q_pieces: [8, 8], private_r_pieces: [8, 8], private_b_pieces: [8, 8], private_n_pieces: [8, 8], private_p_pieces: [8, 8], private_left_castling: [2], private_right_castling: [2], private_sense_K_pieces: [8, 8], private_sense_Q_pieces: [8, 8], private_sense_R_pieces: [8, 8], private_sense_B_pieces: [8, 8], private_sense_N_pieces: [8, 8], private_sense_P_pieces: [8, 8] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 814 +MaxGameLength() = 17695 +ToString() = "rbc()" + +# State 0 +# rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "rnbqkbnr/pppppppp/8/8/8/8/8/8 KQ s - w -" +ObservationString(1) = "8/8/8/8/8/8/PPPPPPPP/RNBQKBNR KQ s - w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◯◉ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense a1" +action: 0 + +# State 1 +# rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1 +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "rnbqkbnr/pppppppp/8/8/8/8/8/8 KQ m - w -" +ObservationString(1) = "8/8/8/8/8/ 5/PPPPPPPP/RNBQKBNR KQ m - w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).phase: ◉◯ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).phase: ◉◯ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◯◉ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 89, 90, 117, 652, 656, 673, 674, 701, 714, 1257, 1258, 1285, 1298, 1841, 1842, 1869, 1882, 2425, 2426, 2453, 2466, 3009, 3010, 3037, 3050, 3572, 3576, 3593, 3594, 3621, 3634, 4177, 4178, 4218] +StringLegalActions() = ["pass", "a2a3", "a2a4", "a2b3", "b1a3", "b1c3", "b2b3", "b2b4", "b2c3", "b2a3", "c2c3", "c2c4", "c2d3", "c2b3", "d2d3", "d2d4", "d2e3", "d2c3", "e2e3", "e2e4", "e2f3", "e2d3", "f2f3", "f2f4", "f2g3", "f2e3", "g1f3", "g1h3", "g2g3", "g2g4", "g2h3", "g2f3", "h2h3", "h2h4", "h2g3"] + +# Apply action "b2b4" +action: 674 + +# State 2 +# rnbqkbnr/pppppppp/8/8/1P6/8/P1PPPPPP/RNBQKBNR b KQkq - 0 1 +IsTerminal() = False +History() = [0, 674] +HistoryString() = "0, 674" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "rnbqkbnr/pppppppp/8/8/8/8/8/8 KQ s - b -" +ObservationString(1) = "8/8/8/8/1P6/8/P1PPPPPP/RNBQKBNR KQ s - b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◯◉ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense b1" +action: 1 + +# State 3 +# rnbqkbnr/pppppppp/8/8/1P6/8/P1PPPPPP/RNBQKBNR b KQkq - 0 1 +IsTerminal() = False +History() = [0, 674, 1] +HistoryString() = "0, 674, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "rnbqkbnr/pppppppp/8/8/8/1 4/1 PP4/1NBQ4 KQ m - b -" +ObservationString(1) = "8/8/8/8/1P6/8/P1PPPPPP/RNBQKBNR KQ m - b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).phase: ◉◯ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).phase: ◉◯ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◯◉ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 89, 90, 117, 652, 656, 673, 674, 701, 714, 1257, 1258, 1285, 1298, 1841, 1842, 1869, 1882, 2425, 2426, 2453, 2466, 3009, 3010, 3037, 3050, 3572, 3576, 3593, 3594, 3621, 3634, 4177, 4178, 4218] +StringLegalActions() = ["pass", "a7a6", "a7a5", "a7b6", "b8a6", "b8c6", "b7b6", "b7b5", "b7c6", "b7a6", "c7c6", "c7c5", "c7d6", "c7b6", "d7d6", "d7d5", "d7e6", "d7c6", "e7e6", "e7e5", "e7f6", "e7d6", "f7f6", "f7f5", "f7g6", "f7e6", "g8f6", "g8h6", "g7g6", "g7g5", "g7h6", "g7f6", "h7h6", "h7h5", "h7g6"] + +# Apply action "c7c6" +action: 1257 + +# State 4 +# rnbqkbnr/pp1ppppp/2p5/8/1P6/8/P1PPPPPP/RNBQKBNR w KQkq - 0 2 +IsTerminal() = False +History() = [0, 674, 1, 1257] +HistoryString() = "0, 674, 1, 1257" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "rnbqkbnr/pp1ppppp/2p5/8/8/8/8/8 KQ s - w -" +ObservationString(1) = "8/8/8/8/1P6/8/P1PPPPPP/RNBQKBNR KQ s - w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◯◉ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense e4" +action: 22 + +# State 5 +# Apply action "a2b3" +action: 117 + +# State 6 +# rnbqkbnr/pp1ppppp/2p5/8/1P6/8/P1PPPPPP/RNBQKBNR b KQkq - 0 2 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117] +HistoryString() = "0, 674, 1, 1257, 22, 117" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "rnbqkbnr/pp1ppppp/2p5/8/8/8/8/8 KQ s - b i" +ObservationString(1) = "8/8/8/8/1P6/8/P1PPPPPP/RNBQKBNR KQ s - b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◯◉ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◯◉ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense d2" +action: 9 + +# State 7 +# Apply action "d8a5" +action: 1807 + +# State 8 +# Apply action "Sense a4" +action: 18 + +# State 9 +# Apply action "c1a3" +action: 1224 + +# State 10 +# Apply action "Sense e1" +action: 4 + +# State 11 +# Apply action "a5b4" +action: 263 + +# State 12 +# Apply action "Sense e3" +action: 16 + +# State 13 +# Apply action "d2d3" +action: 1841 + +# State 14 +# Apply action "Sense a4" +action: 18 + +# State 15 +# Apply action "e7e6" +action: 2425 + +# State 16 +# Apply action "Sense c6" +action: 32 + +# State 17 +# Apply action "a2b3" +action: 117 + +# State 18 +# Apply action "Sense d4" +action: 21 + +# State 19 +# Apply action "g8e7" +action: 3570 + +# State 20 +# rnb1kb1r/pp1pnppp/2p1p3/8/1q6/B2P4/P1P1PPPP/RN1QKBNR w KQkq - 1 6 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "rnb1kb1r/pp1pnppp/2p1p3/8/1q6/8/8/8 KQ s - w -" +ObservationString(1) = "8/8/8/8/8/B2P4/P1P1PPPP/RN1QKBNR KQ s - w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◯◉ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense e5" +action: 28 + +# State 21 +# Apply action "g1h3" +action: 3576 + +# State 22 +# rnb1kb1r/pp1pnppp/2p1p3/8/1q6/B2P3N/P1P1PPPP/RN1QKB1R b KQkq - 2 6 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "rnb1kb1r/pp1pnppp/2p1p3/8/1q6/8/8/8 KQ s - b -" +ObservationString(1) = "8/8/8/8/8/B2P3N/P1P1PPPP/RN1QKB1R KQ s - b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◉◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◯◉ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense f1" +action: 5 + +# State 23 +# Apply action "h7h5" +action: 4178 + +# State 24 +# Apply action "Sense d5" +action: 27 + +# State 25 +# Apply action "g2g3" +action: 3593 + +# State 26 +# Apply action "Sense f3" +action: 17 + +# State 27 +# Apply action "d7d6" +action: 1841 + +# State 28 +# Apply action "Sense a5" +action: 24 + +# State 29 +# Apply action "d1d2" +action: 1768 + +# State 30 +# Apply action "Sense e6" +action: 34 + +# State 31 +# Apply action "a7a6" +action: 89 + +# State 32 +# Apply action "Sense f6" +action: 35 + +# State 33 +# Apply action "a3b2" +action: 204 + +# State 34 +# Apply action "Sense b5" +action: 25 + +# State 35 +# Apply action "b4d4" +action: 907 + +# State 36 +# Apply action "Sense c6" +action: 32 + +# State 37 +# Apply action "b2f6" +action: 704 + +# State 38 +# Apply action "Sense e5" +action: 28 + +# State 39 +# Apply action "f7f5" +action: 3010 + +# State 40 +# rnb1kb1r/1p2n1p1/p1ppp3/5p1p/3B4/3P2PN/P1PQPP1P/RN2KB1R w KQkq - 0 11 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "rnb1kb1r/1p2n1p1/p1ppp3/5p1p/8/8/8/8 KQ s - w -" +ObservationString(1) = "8/8/8/8/3B4/3P2PN/P1PQPP1P/RN2KB1R KQ s - w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◉◯◯◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◯◉ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense f5" +action: 29 + +# State 41 +# Apply action "d2a5" +action: 1880 + +# State 42 +# rnb1kb1r/1p2n1p1/p1ppp3/Q4p1p/3B4/3P2PN/P1P1PP1P/RN2KB1R b KQkq - 1 11 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "rnb1kb1r/1p2n1p1/p1ppp3/5p1p/8/8/8/8 KQ s - b -" +ObservationString(1) = "8/8/8/Q7/3B4/3P2PN/P1P1PP1P/RN2KB1R KQ s - b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◉◯◯◯ +ObservationTensor(0).private_left_castling: ◯◉ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◯◉ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense b3" +action: 13 + +# State 43 +# Apply action "b7b6" +action: 673 + +# State 44 +# Apply action "Sense d6" +action: 33 + +# State 45 +# Apply action "a5a7" +action: 309 + +# State 46 +# Apply action "Sense b5" +action: 25 + +# State 47 +# Apply action "g7h6" +action: 3621 + +# State 48 +# Apply action "Sense d2" +action: 9 + +# State 49 +# Apply action "a6b5" +action: 423 + +# State 50 +# Apply action "Sense e6" +action: 34 + +# State 51 +# Apply action "h5g4" +action: 4364 + +# State 52 +# Apply action "Sense c5" +action: 26 + +# State 53 +# Apply action "e2e4" +action: 2426 + +# State 54 +# Apply action "Sense a1" +action: 0 + +# State 55 +# Apply action "a8a6" +action: 17 + +# State 56 +# Apply action "Sense f3" +action: 17 + +# State 57 +# Apply action "b1d2" +action: 654 + +# State 58 +# Apply action "Sense a4" +action: 18 + +# State 59 +# Apply action "f5f4" +action: 3155 + +# State 60 +# 1nb1kb1r/4n1p1/rpppp3/1Q5p/3BPp2/3P2PN/P1PN1P1P/R3KB1R w KQk - 0 16 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1nb1kb1r/4n1p1/rpppp3/7p/5p2/8/8/8 K s - w -" +ObservationString(1) = "8/8/8/1Q6/3BP3/3P2PN/P1PN1P1P/R3KB1R KQ s - w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◉◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◯◉ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense f6" +action: 35 + +# State 61 +# Apply action "d4c3" +action: 2014 + +# State 62 +# 1nb1kb1r/4n1p1/rpppp3/1Q5p/4Pp2/2BP2PN/P1PN1P1P/R3KB1R b KQk - 1 16 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1nb1kb1r/4n1p1/rpppp3/7p/5p2/8/8/8 K s - b -" +ObservationString(1) = "8/8/8/1Q6/4P3/2BP2PN/P1PN1P1P/R3KB1R KQ s - b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◉◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◯◉ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense d2" +action: 9 + +# State 63 +# Apply action "h5h4" +action: 4323 + +# State 64 +# Apply action "Sense a3" +action: 12 + +# State 65 +# Apply action "b5b3" +action: 890 + +# State 66 +# Apply action "Sense a3" +action: 12 + +# State 67 +# Apply action "a6a2" +action: 165 + +# State 68 +# Apply action "Sense e5" +action: 28 + +# State 69 +# Apply action "b3e6" +action: 776 + +# State 70 +# Apply action "Sense a3" +action: 12 + +# State 71 +# Apply action "a2a7" +action: 449 + +# State 72 +# Apply action "Sense f6" +action: 35 + +# State 73 +# Apply action "e6h6" +action: 2733 + +# State 74 +# Apply action "Sense d4" +action: 21 + +# State 75 +# Apply action "c8h3" +action: 1216 + +# State 76 +# Apply action "Sense e5" +action: 28 + +# State 77 +# Apply action "h6f8" +action: 4509 + +# State 78 +# Apply action "Sense f1" +action: 5 + +# State 79 +# Apply action "h3g2" +action: 4510 + +# State 80 +# 1n2kb1r/r3n1Q1/1ppp4/8/4Pp1p/2BP2P1/2PN1PbP/R3KB1R w KQk - 1 21 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1n2kb1r/r3n3/1ppp4/8/5p1p/8/6b1/8 K s - w -" +ObservationString(1) = "8/6Q1/8/8/4P3/2BP2P1/2PN1P1P/R3KB1R KQ s - w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◯◉ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense c5" +action: 26 + +# State 81 +# Apply action "g3f4" +action: 3707 + +# State 82 +# 1n2kb1r/r3n1Q1/1ppp4/8/4PP1p/2BP4/2PN1PbP/R3KB1R b KQk - 0 21 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1n2kb1r/r3n3/1ppp4/8/7p/8/6b1/8 K s c b -" +ObservationString(1) = "8/6Q1/8/8/4PP2/2BP4/2PN1P1P/R3KB1R KQ s c b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◯◉ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◯◉ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◯◉ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◉◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◯◉ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense d2" +action: 9 + +# State 83 +# Apply action "g2h1" +action: 3986 + +# State 84 +# Apply action "Sense b6" +action: 31 + +# State 85 +# Apply action "e4f5" +action: 2599 + +# State 86 +# Apply action "Sense a3" +action: 12 + +# State 87 +# Apply action "h8h6" +action: 4105 + +# State 88 +# Apply action "Sense c5" +action: 26 + +# State 89 +# Apply action "h2h3" +action: 4177 + +# State 90 +# Apply action "Sense a4" +action: 18 + +# State 91 +# Apply action "h1e4" +action: 4640 + +# State 92 +# Apply action "Sense c6" +action: 32 + +# State 93 +# Apply action "e1c1" +action: 2364 + +# State 94 +# Apply action "Sense b2" +action: 7 + +# State 95 +# Apply action "a7a6" +action: 89 + +# State 96 +# Apply action "Sense d5" +action: 27 + +# State 97 +# Apply action "g7h8" +action: 3986 + +# State 98 +# Apply action "Sense e4" +action: 22 + +# State 99 +# Apply action "b6a5" +action: 787 + +# State 100 +# 1n2kb1Q/4n3/rppp3r/8/4bP1p/2BP3P/2PN1P2/R3KB2 w Q - 2 26 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1n2kb2/4n3/rppp3r/8/4b2p/8/8/8 - s - w -" +ObservationString(1) = "7Q/8/8/8/5P2/2BP3P/2PN1P2/R3KB2 Q s - w i" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◯◉ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense c4" +action: 20 + +# State 101 +# Apply action "h3h4" +action: 4250 + +# State 102 +# 1n2kb1Q/4n3/rppp3r/8/4bP1p/2BP3P/2PN1P2/R3KB2 b Q - 2 26 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1n2kb2/4n3/rppp3r/8/4b2p/8/8/8 - s - b i" +ObservationString(1) = "7Q/8/8/8/5P2/2BP3P/2PN1P2/R3KB2 Q s - b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◯◉ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense c3" +action: 14 + +# State 103 +# Apply action "a6a3" +action: 164 + +# State 104 +# Apply action "Sense c5" +action: 26 + +# State 105 +# Apply action "c3e5" +action: 1359 + +# State 106 +# Apply action "Sense f1" +action: 5 + +# State 107 +# Apply action "e4b1" +action: 2683 + +# State 108 +# Apply action "Sense a3" +action: 12 + +# State 109 +# Apply action "e5f6" +action: 2672 + +# State 110 +# Apply action "Sense f5" +action: 29 + +# State 111 +# Apply action "a3a7" +action: 377 + +# State 112 +# Apply action "Sense a5" +action: 24 + +# State 113 +# Apply action "h8a8" +action: 4622 + +# State 114 +# Apply action "Sense a1" +action: 0 + +# State 115 +# Apply action "d3e4" +action: 2175 + +# State 116 +# Apply action "Sense a3" +action: 12 + +# State 117 +# Apply action "c2c3" +action: 1257 + +# State 118 +# Apply action "Sense b2" +action: 7 + +# State 119 +# Apply action "h6h8" +action: 4248 + +# State 120 +# 1n2kQ1r/r3n3/1ppp1B2/8/4bP1p/2P4P/3N1P2/R3KB2 w Q - 1 31 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1n2k2r/r3n3/1ppp4/8/4b2p/8/8/8 - s - w -" +ObservationString(1) = "5Q2/8/5B2/8/5P2/2P4P/3N1P2/R3KB2 Q s - w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense b1" +action: 1 + +# State 121 +# Apply action "f6h4" +action: 3344 + +# State 122 +# 1n2kQ1r/r3n3/1ppp4/8/4bP1B/2P4P/3N1P2/R3KB2 b Q - 0 31 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1n2k2r/r3n3/1ppp4/8/4b3/8/8/8 - s c b -" +ObservationString(1) = "5Q2/8/8/8/5P1B/2P4P/3N1P2/R3KB2 Q s c b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◯◉ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◯◉ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense e3" +action: 16 + +# State 123 +# Apply action "h8h2" +action: 4109 + +# State 124 +# Apply action "Sense b5" +action: 25 + +# State 125 +# Apply action "c1c2" +action: 1184 + +# State 126 +# Apply action "Sense e3" +action: 16 + +# State 127 +# Apply action "h4h5" +action: 4395 + +# State 128 +# Apply action "Sense b1" +action: 1 + +# State 129 +# Apply action "f8g8" +action: 3461 + +# State 130 +# Apply action "Sense c6" +action: 32 + +# State 131 +# Apply action "h5a5" +action: 4330 + +# State 132 +# Apply action "Sense b3" +action: 13 + +# State 133 +# Apply action "d1a1" +action: 1779 + +# State 134 +# Apply action "Sense d4" +action: 21 + +# State 135 +# Apply action "c6d5" +action: 1358 + +# State 136 +# Apply action "Sense e2" +action: 10 + +# State 137 +# Apply action "g8g1" +action: 4024 + +# State 138 +# Apply action "Sense c6" +action: 32 + +# State 139 +# Apply action "e4c2" +action: 2684 + +# State 140 +# 1n2k3/r3n3/1ppp4/r7/5P2/2P4P/2bN1P2/R3KBQ1 w Q - 5 36 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1n2k3/r3n3/1ppp4/r7/8/8/2b5/8 - s - w -" +ObservationString(1) = "8/8/8/8/5P2/2P4P/3N1P2/R3KBQ1 Q s - w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◉◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense b3" +action: 13 + +# State 141 +# Apply action "f1d3" +action: 2976 + +# State 142 +# 1n2k3/r3n3/1ppp4/r7/5P2/2PB3P/2bN1P2/R3K1Q1 b Q - 6 36 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1n2k3/r3n3/1ppp4/r7/8/8/2b5/8 - s - b -" +ObservationString(1) = "8/8/8/8/5P2/2PB3P/3N1P2/R3K1Q1 Q s - b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◉◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◯◉ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense b2" +action: 7 + +# State 143 +# Apply action "c2d1" +action: 1650 + +# State 144 +# Apply action "Sense c4" +action: 20 + +# State 145 +# Apply action "a1a8" +action: 22 + +# State 146 +# Apply action "Sense d5" +action: 27 + +# State 147 +# Apply action "d1c2" +action: 2306 + +# State 148 +# Apply action "Sense d5" +action: 27 + +# State 149 +# Apply action "g1g7" +action: 3525 + +# State 150 +# Apply action "Sense c3" +action: 14 + +# State 151 +# Apply action "b6b5" +action: 746 + +# State 152 +# Apply action "Sense f2" +action: 11 + +# State 153 +# Apply action "d3e4" +action: 1942 + +# State 154 +# Apply action "Sense f5" +action: 29 + +# State 155 +# Apply action "c2f5" +action: 1666 + +# State 156 +# Apply action "Sense c2" +action: 8 + +# State 157 +# Apply action "g7b7" +action: 3967 + +# State 158 +# Apply action "Sense a4" +action: 18 + +# State 159 +# Apply action "e4f5" +action: 2686 + +# State 160 +# 1n2k3/r3Q3/2pp4/Rp3b2/5P2/2P4P/3N1P2/4K3 w - - 1 41 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1n2k3/r7/2pp4/1p3b2/8/8/8/8 - s - w -" +ObservationString(1) = "8/4Q3/8/R7/5P2/2P4P/3N1P2/4K3 - s - w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense b2" +action: 7 + +# State 161 +# Apply action "h3h4" +action: 4250 + +# State 162 +# 1n2k3/r3Q3/2pp4/Rp3b2/5P1P/2P5/3N1P2/4K3 b - - 0 41 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1n2k3/r7/2pp4/1p3b2/8/8/8/8 - s - b -" +ObservationString(1) = "8/4Q3/8/R7/5P1P/2P5/3N1P2/4K3 - s - b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense f4" +action: 23 + +# State 163 +# Apply action "c6c5" +action: 1330 + +# State 164 +# Apply action "Sense b5" +action: 25 + +# State 165 +# Apply action "f4e5" +action: 3196 + +# State 166 +# Apply action "Sense b1" +action: 1 + +# State 167 +# Apply action "a7a1" +action: 94 + +# State 168 +# Apply action "Sense e4" +action: 22 + +# State 169 +# Apply action "d2f3" +action: 1895 + +# State 170 +# Apply action "Sense c1" +action: 2 + +# State 171 +# Apply action "f5g4" +action: 3183 + +# State 172 +# Apply action "Sense a6" +action: 30 + +# State 173 +# Apply action "e7f7" +action: 2804 + +# State 174 +# Apply action "Sense d3" +action: 15 + +# State 175 +# Apply action "e8e7" +action: 2352 + +# State 176 +# Apply action "Sense f2" +action: 11 + +# State 177 +# Apply action "e1d1" +action: 2365 + +# State 178 +# Apply action "Sense c5" +action: 26 + +# State 179 +# Apply action "e7d8" +action: 2452 + +# State 180 +# 1n1k4/5Q2/3p4/rpp5/5PbP/2P2N2/5P2/3K4 w - - 6 46 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250, 23, 1330, 25, 3196, 1, 94, 22, 1895, 2, 3183, 30, 2804, 15, 2352, 11, 2365, 26, 2452] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250, 23, 1330, 25, 3196, 1, 94, 22, 1895, 2, 3183, 30, 2804, 15, 2352, 11, 2365, 26, 2452" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "1n1k4/8/3p4/rpp5/6b1/8/8/8 - s - w -" +ObservationString(1) = "8/5Q2/8/8/5P1P/2P2N2/5P2/3K4 - s - w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense c5" +action: 26 + +# State 181 +# Apply action "f7f8" +action: 3374 + +# State 182 +# 1n1k1Q2/8/3p4/rpp5/5PbP/2P2N2/5P2/3K4 b - - 7 46 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250, 23, 1330, 25, 3196, 1, 94, 22, 1895, 2, 3183, 30, 2804, 15, 2352, 11, 2365, 26, 2452, 26, 3374] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250, 23, 1330, 25, 3196, 1, 94, 22, 1895, 2, 3183, 30, 2804, 15, 2352, 11, 2365, 26, 2452, 26, 3374" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "1n1k4/8/3p4/rpp5/6b1/8/8/8 - s - b -" +ObservationString(1) = "5Q2/8/8/8/5P1P/2P2N2/5P2/3K4 - s - b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense f3" +action: 17 + +# State 183 +# Apply action "b8c6" +action: 656 + +# State 184 +# Apply action "Sense d5" +action: 27 + +# State 185 +# Apply action "c3c4" +action: 1330 + +# State 186 +# Apply action "Sense f1" +action: 5 + +# State 187 +# Apply action "a5a7" +action: 233 + +# State 188 +# Apply action "Sense e4" +action: 22 + +# State 189 +# Apply action "c4d5" +action: 1431 + +# State 190 +# Apply action "Sense f4" +action: 23 + +# State 191 +# Apply action "a7f7" +action: 107 + +# State 192 +# Apply action "Sense e4" +action: 22 + +# State 193 +# Apply action "f3g5" +action: 3138 + +# State 194 +# Apply action "Sense e4" +action: 22 + +# State 195 +# Apply action "g4d7" +action: 3837 + +# State 196 +# Apply action "Sense b1" +action: 1 + +# State 197 +# Apply action "h4h5" +action: 4323 + +# State 198 +# Apply action "Sense b4" +action: 19 + +# State 199 +# Apply action "f7e7" +action: 3022 + +# State 200 +# 3k1Q2/3br3/2np4/1pp3NP/2P2P2/8/5P2/3K4 w - - 1 51 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250, 23, 1330, 25, 3196, 1, 94, 22, 1895, 2, 3183, 30, 2804, 15, 2352, 11, 2365, 26, 2452, 26, 3374, 17, 656, 27, 1330, 5, 233, 22, 1431, 23, 107, 22, 3138, 22, 3837, 1, 4323, 19, 3022] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250, 23, 1330, 25, 3196, 1, 94, 22, 1895, 2, 3183, 30, 2804, 15, 2352, 11, 2365, 26, 2452, 26, 3374, 17, 656, 27, 1330, 5, 233, 22, 1431, 23, 107, 22, 3138, 22, 3837, 1, 4323, 19, 3022" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "3k4/3br3/2np4/1pp5/8/8/8/8 - s - w -" +ObservationString(1) = "5Q2/8/8/6NP/2P2P2/8/5P2/3K4 - s - w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense c4" +action: 20 + +# State 201 +# Apply action "f8d6" +action: 3473 + +# State 202 +# 3k4/3bQ3/2np4/1pp3NP/2P2P2/8/5P2/3K4 b - - 0 51 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250, 23, 1330, 25, 3196, 1, 94, 22, 1895, 2, 3183, 30, 2804, 15, 2352, 11, 2365, 26, 2452, 26, 3374, 17, 656, 27, 1330, 5, 233, 22, 1431, 23, 107, 22, 3138, 22, 3837, 1, 4323, 19, 3022, 20, 3473] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250, 23, 1330, 25, 3196, 1, 94, 22, 1895, 2, 3183, 30, 2804, 15, 2352, 11, 2365, 26, 2452, 26, 3374, 17, 656, 27, 1330, 5, 233, 22, 1431, 23, 107, 22, 3138, 22, 3837, 1, 4323, 19, 3022, 20, 3473" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "3k4/3b4/2np4/1pp5/8/8/8/8 - s c b -" +ObservationString(1) = "8/4Q3/8/6NP/2P2P2/8/5P2/3K4 - s c b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◯◉ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◯◉ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◉◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense a2" +action: 6 + +# State 203 +# Apply action "d6d5" +action: 1914 + +# State 204 +# Apply action "Sense c5" +action: 26 + +# State 205 +# Apply action "c4d5" +action: 1431 + +# State 206 +# Apply action "Sense d1" +action: 3 + +# State 207 +# Apply action "c6a7" +action: 1379 + +# State 208 +# Apply action "Sense a4" +action: 18 + +# State 209 +# Apply action "d1c2" +action: 1809 + +# State 210 +# Apply action "Sense a5" +action: 24 + +# State 211 +# Apply action "b5a4" +action: 860 + +# State 212 +# Apply action "Sense f2" +action: 11 + +# State 213 +# Apply action "c2b2" +action: 1270 + +# State 214 +# Apply action "Sense a2" +action: 6 + +# State 215 +# Apply action "d7c6" +action: 1882 + +# State 216 +# Apply action "Sense d3" +action: 15 + +# State 217 +# Apply action "g5e6" +action: 3862 + +# State 218 +# Apply action "Sense a6" +action: 30 + +# State 219 +# Apply action "c6e4" +action: 1359 + +# State 220 +# 3k4/n3Q3/4N3/1ppb3P/5P2/8/1K3P2/8 w - - 0 56 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250, 23, 1330, 25, 3196, 1, 94, 22, 1895, 2, 3183, 30, 2804, 15, 2352, 11, 2365, 26, 2452, 26, 3374, 17, 656, 27, 1330, 5, 233, 22, 1431, 23, 107, 22, 3138, 22, 3837, 1, 4323, 19, 3022, 20, 3473, 6, 1914, 26, 1431, 3, 1379, 18, 1809, 24, 860, 11, 1270, 6, 1882, 15, 3862, 30, 1359] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250, 23, 1330, 25, 3196, 1, 94, 22, 1895, 2, 3183, 30, 2804, 15, 2352, 11, 2365, 26, 2452, 26, 3374, 17, 656, 27, 1330, 5, 233, 22, 1431, 23, 107, 22, 3138, 22, 3837, 1, 4323, 19, 3022, 20, 3473, 6, 1914, 26, 1431, 3, 1379, 18, 1809, 24, 860, 11, 1270, 6, 1882, 15, 3862, 30, 1359" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "3k4/n7/8/1ppb4/8/8/8/8 - s c w -" +ObservationString(1) = "8/4Q3/4N3/7P/5P2/8/1K3P2/8 - s c w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◯◉ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◯◉ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◉◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense d3" +action: 15 + +# State 221 +# Apply action "b2a3" +action: 714 + +# State 222 +# 3k4/n3Q3/4N3/1ppb3P/5P2/K7/5P2/8 b - - 1 56 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250, 23, 1330, 25, 3196, 1, 94, 22, 1895, 2, 3183, 30, 2804, 15, 2352, 11, 2365, 26, 2452, 26, 3374, 17, 656, 27, 1330, 5, 233, 22, 1431, 23, 107, 22, 3138, 22, 3837, 1, 4323, 19, 3022, 20, 3473, 6, 1914, 26, 1431, 3, 1379, 18, 1809, 24, 860, 11, 1270, 6, 1882, 15, 3862, 30, 1359, 15, 714] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250, 23, 1330, 25, 3196, 1, 94, 22, 1895, 2, 3183, 30, 2804, 15, 2352, 11, 2365, 26, 2452, 26, 3374, 17, 656, 27, 1330, 5, 233, 22, 1431, 23, 107, 22, 3138, 22, 3837, 1, 4323, 19, 3022, 20, 3473, 6, 1914, 26, 1431, 3, 1379, 18, 1809, 24, 860, 11, 1270, 6, 1882, 15, 3862, 30, 1359, 15, 714" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "3k4/n7/8/1ppb4/8/8/8/8 - s - b -" +ObservationString(1) = "8/4Q3/4N3/7P/5P2/K7/5P2/8 - s - b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◉◯ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◉◯ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◉◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◉◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense b5" +action: 25 + +# State 223 +# Apply action "c5c4" +action: 1403 + +# State 224 +# Apply action "Sense f6" +action: 35 + +# State 225 +# Apply action "e7h7" +action: 2806 + +# State 226 +# Apply action "Sense d1" +action: 3 + +# State 227 +# Apply action "b5a4" +action: 860 + +# State 228 +# Apply action "Sense b6" +action: 31 + +# State 229 +# Apply action "e6g7" +action: 2771 + +# State 230 +# Apply action "Sense f3" +action: 17 + +# State 231 +# Apply action "d5h1" +action: 2018 + +# State 232 +# Apply action "Sense b3" +action: 13 + +# State 233 +# Apply action "h7h8" +action: 4542 + +# State 234 +# Apply action "Sense b6" +action: 31 + +# State 235 +# Apply action "h1b7" +action: 4637 + +# State 236 +# Apply action "Sense b1" +action: 1 + +# State 237 +# Apply action "f2f3" +action: 3009 + +# State 238 +# Apply action "Sense e3" +action: 16 + +# State 239 +# Apply action "b7c8" +action: 715 + +# State 240 +# 2bk3Q/n5N1/8/1p5P/2p2P2/K4P2/8/8 w - - 1 61 +IsTerminal() = False +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250, 23, 1330, 25, 3196, 1, 94, 22, 1895, 2, 3183, 30, 2804, 15, 2352, 11, 2365, 26, 2452, 26, 3374, 17, 656, 27, 1330, 5, 233, 22, 1431, 23, 107, 22, 3138, 22, 3837, 1, 4323, 19, 3022, 20, 3473, 6, 1914, 26, 1431, 3, 1379, 18, 1809, 24, 860, 11, 1270, 6, 1882, 15, 3862, 30, 1359, 15, 714, 25, 1403, 35, 2806, 3, 860, 31, 2771, 17, 2018, 13, 4542, 31, 4637, 1, 3009, 16, 715] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250, 23, 1330, 25, 3196, 1, 94, 22, 1895, 2, 3183, 30, 2804, 15, 2352, 11, 2365, 26, 2452, 26, 3374, 17, 656, 27, 1330, 5, 233, 22, 1431, 23, 107, 22, 3138, 22, 3837, 1, 4323, 19, 3022, 20, 3473, 6, 1914, 26, 1431, 3, 1379, 18, 1809, 24, 860, 11, 1270, 6, 1882, 15, 3862, 30, 1359, 15, 714, 25, 1403, 35, 2806, 3, 860, 31, 2771, 17, 2018, 13, 4542, 31, 4637, 1, 3009, 16, 715" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "2bk4/n7/8/1p6/2p5/8/8/8 - s - w -" +ObservationString(1) = "7Q/6N1/8/7P/5P2/K4P2/8/8 - s - w -" +ObservationTensor(0).pieces_black: ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◉◯ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◉◯ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +StringLegalActions() = ["Sense a1", "Sense b1", "Sense c1", "Sense d1", "Sense e1", "Sense f1", "Sense a2", "Sense b2", "Sense c2", "Sense d2", "Sense e2", "Sense f2", "Sense a3", "Sense b3", "Sense c3", "Sense d3", "Sense e3", "Sense f3", "Sense a4", "Sense b4", "Sense c4", "Sense d4", "Sense e4", "Sense f4", "Sense a5", "Sense b5", "Sense c5", "Sense d5", "Sense e5", "Sense f5", "Sense a6", "Sense b6", "Sense c6", "Sense d6", "Sense e6", "Sense f6"] + +# Apply action "Sense c2" +action: 8 + +# State 241 +# Apply action "h8c8" +action: 4624 + +# State 242 +# 2bQ4/n5N1/8/1p5P/2p2P2/K4P2/8/8 b - - 0 61 +IsTerminal() = True +History() = [0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250, 23, 1330, 25, 3196, 1, 94, 22, 1895, 2, 3183, 30, 2804, 15, 2352, 11, 2365, 26, 2452, 26, 3374, 17, 656, 27, 1330, 5, 233, 22, 1431, 23, 107, 22, 3138, 22, 3837, 1, 4323, 19, 3022, 20, 3473, 6, 1914, 26, 1431, 3, 1379, 18, 1809, 24, 860, 11, 1270, 6, 1882, 15, 3862, 30, 1359, 15, 714, 25, 1403, 35, 2806, 3, 860, 31, 2771, 17, 2018, 13, 4542, 31, 4637, 1, 3009, 16, 715, 8, 4624] +HistoryString() = "0, 674, 1, 1257, 22, 117, 9, 1807, 18, 1224, 4, 263, 16, 1841, 18, 2425, 32, 117, 21, 3570, 28, 3576, 5, 4178, 27, 3593, 17, 1841, 24, 1768, 34, 89, 35, 204, 25, 907, 32, 704, 28, 3010, 29, 1880, 13, 673, 33, 309, 25, 3621, 9, 423, 34, 4364, 26, 2426, 0, 17, 17, 654, 18, 3155, 35, 2014, 9, 4323, 12, 890, 12, 165, 28, 776, 12, 449, 35, 2733, 21, 1216, 28, 4509, 5, 4510, 26, 3707, 9, 3986, 31, 2599, 12, 4105, 26, 4177, 18, 4640, 32, 2364, 7, 89, 27, 3986, 22, 787, 20, 4250, 14, 164, 26, 1359, 5, 2683, 12, 2672, 29, 377, 24, 4622, 0, 2175, 12, 1257, 7, 4248, 1, 3344, 16, 4109, 25, 1184, 16, 4395, 1, 3461, 32, 4330, 13, 1779, 21, 1358, 10, 4024, 32, 2684, 13, 2976, 7, 1650, 20, 22, 27, 2306, 27, 3525, 14, 746, 11, 1942, 29, 1666, 8, 3967, 18, 2686, 7, 4250, 23, 1330, 25, 3196, 1, 94, 22, 1895, 2, 3183, 30, 2804, 15, 2352, 11, 2365, 26, 2452, 26, 3374, 17, 656, 27, 1330, 5, 233, 22, 1431, 23, 107, 22, 3138, 22, 3837, 1, 4323, 19, 3022, 20, 3473, 6, 1914, 26, 1431, 3, 1379, 18, 1809, 24, 860, 11, 1270, 6, 1882, 15, 3862, 30, 1359, 15, 714, 25, 1403, 35, 2806, 3, 860, 31, 2771, 17, 2018, 13, 4542, 31, 4637, 1, 3009, 16, 715, 8, 4624" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "2b5/n7/8/1p6/2p5/8/8/8 - s c b -" +ObservationString(1) = "3Q4/6N1/8/7P/5P2/K4P2/8/8 - s c b -" +ObservationTensor(0).pieces_black: ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).pieces_white: ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(0).phase: ◯◉ +ObservationTensor(0).capture: ◯◉ +ObservationTensor(0).side_to_play: ◯◉ +ObservationTensor(0).illegal_move: ◉◯ +ObservationTensor(0).private_k_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_n_pieces: ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ + ◯◯◯◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_left_castling: ◉◯ +ObservationTensor(0).private_right_castling: ◉◯ +ObservationTensor(0).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(0).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_black: ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).pieces_white: ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1).phase: ◯◉ +ObservationTensor(1).capture: ◯◉ +ObservationTensor(1).side_to_play: ◯◉ +ObservationTensor(1).illegal_move: ◉◯ +ObservationTensor(1).private_k_pieces: ◯◯◉◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◉ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_r_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_b_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_n_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◉◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_p_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◉◉◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◉◯◯◯ +ObservationTensor(1).private_left_castling: ◉◯ +ObservationTensor(1).private_right_castling: ◉◯ +ObservationTensor(1).private_sense_K_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_Q_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_R_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_B_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_N_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +ObservationTensor(1).private_sense_P_pieces: ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ + ◯◯◯◯◯◯◯◯ +Rewards() = [-1, 1] +Returns() = [-1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/repeated_game(stage_game=matrix_rps(),num_repetitions=10).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/repeated_game(stage_game=matrix_rps(),num_repetitions=10).txt new file mode 100644 index 0000000..e85b79c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/repeated_game(stage_game=matrix_rps(),num_repetitions=10).txt @@ -0,0 +1,179 @@ +game: repeated_game(stage_game=matrix_rps(),num_repetitions=10) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SIMULTANEOUS +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Repeated Rock, Paper, Scissors" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["num_repetitions", "recall", "stage_game"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "repeated_game" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 3 +PolicyTensorShape() = [3] +MaxChanceOutcomes() = 0 +GetParameters() = {num_repetitions=10,recall=1,stage_game=matrix_rps()} +NumPlayers() = 2 +MinUtility() = -10.0 +MaxUtility() = 10.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [6] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 6 +MaxGameLength() = 10 +ToString() = "repeated_game(num_repetitions=10,stage_game=matrix_rps())" + +# State 0 +# Total Returns: 0 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "" +ObservationString(1) = "" +ObservationTensor(0): ◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2] +LegalActions(1) = [0, 1, 2] +StringLegalActions(0) = ["Rock", "Paper", "Scissors"] +StringLegalActions(1) = ["Rock", "Paper", "Scissors"] + +# Apply joint action ["Rock", "Rock"] +actions: [0, 0] + +# State 1 +# Round 0: +# Actions: Rock Rock +# Rewards: 0 0 +# Total Returns: 0 0 +IsTerminal() = False +History() = [0, 0] +HistoryString() = "0, 0" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "Rock Rock " +ObservationString(1) = "Rock Rock " +ObservationTensor(0): ◉◯◯◉◯◯ +ObservationTensor(1): ◉◯◯◉◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2] +LegalActions(1) = [0, 1, 2] +StringLegalActions(0) = ["Rock", "Paper", "Scissors"] +StringLegalActions(1) = ["Rock", "Paper", "Scissors"] + +# Apply joint action ["Scissors", "Scissors"] +actions: [2, 2] + +# State 2 +# Round 0: +# Actions: Rock Rock +# Rewards: 0 0 +# Round 1: +# Actions: Scissors Scissors +# Rewards: 0 0 +# Total Returns: 0 0 +IsTerminal() = False +History() = [0, 0, 2, 2] +HistoryString() = "0, 0, 2, 2" +IsChanceNode() = False +IsSimultaneousNode() = True +CurrentPlayer() = -2 +ObservationString(0) = "Scissors Scissors " +ObservationString(1) = "Scissors Scissors " +ObservationTensor(0): ◯◯◉◯◯◉ +ObservationTensor(1): ◯◯◉◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions(0) = [0, 1, 2] +LegalActions(1) = [0, 1, 2] +StringLegalActions(0) = ["Rock", "Paper", "Scissors"] +StringLegalActions(1) = ["Rock", "Paper", "Scissors"] + +# Apply joint action ["Paper", "Rock"] +actions: [1, 0] + +# State 3 +# Apply joint action ["Paper", "Paper"] +actions: [1, 1] + +# State 4 +# Apply joint action ["Paper", "Paper"] +actions: [1, 1] + +# State 5 +# Apply joint action ["Rock", "Paper"] +actions: [0, 1] + +# State 6 +# Apply joint action ["Rock", "Scissors"] +actions: [0, 2] + +# State 7 +# Apply joint action ["Rock", "Scissors"] +actions: [0, 2] + +# State 8 +# Apply joint action ["Rock", "Rock"] +actions: [0, 0] + +# State 9 +# Apply joint action ["Rock", "Scissors"] +actions: [0, 2] + +# State 10 +# Round 0: +# Actions: Rock Rock +# Rewards: 0 0 +# Round 1: +# Actions: Scissors Scissors +# Rewards: 0 0 +# Round 2: +# Actions: Paper Rock +# Rewards: 1 -1 +# Round 3: +# Actions: Paper Paper +# Rewards: 0 0 +# Round 4: +# Actions: Paper Paper +# Rewards: 0 0 +# Round 5: +# Actions: Rock Paper +# Rewards: -1 1 +# Round 6: +# Actions: Rock Scissors +# Rewards: 1 -1 +# Round 7: +# Actions: Rock Scissors +# Rewards: 1 -1 +# Round 8: +# Actions: Rock Rock +# Rewards: 0 0 +# Round 9: +# Actions: Rock Scissors +# Rewards: 1 -1 +# Total Returns: 3 -3 +IsTerminal() = True +History() = [0, 0, 2, 2, 1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 2, 0, 0, 0, 2] +HistoryString() = "0, 0, 2, 2, 1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 2, 0, 0, 0, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "Rock Scissors " +ObservationString(1) = "Rock Scissors " +ObservationTensor(0): ◉◯◯◯◯◉ +ObservationTensor(1): ◉◯◯◯◯◉ +Rewards() = [1, -1] +Returns() = [3, -3] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/sheriff.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/sheriff.txt new file mode 100644 index 0000000..7af7a0a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/sheriff.txt @@ -0,0 +1,190 @@ +game: sheriff + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Sheriff" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["item_penalty", "item_value", "max_bribe", "max_items", "num_rounds", "sheriff_penalty"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = False +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "sheriff" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 10 +PolicyTensorShape() = [10] +MaxChanceOutcomes() = 0 +GetParameters() = {item_penalty=2.0,item_value=1.0,max_bribe=3,max_items=3,num_rounds=4,sheriff_penalty=3.0} +NumPlayers() = 2 +MinUtility() = -6.0 +MaxUtility() = 6.0 +UtilitySum() = None +InformationStateTensorShape() = [33] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 33 +MaxGameLength() = 9 +ToString() = "sheriff()" + +# State 0 +# Initial game state (smuggler hasn't decided the number of illegal cargo items yet) +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "T=0 num_illegal_items:none" +InformationStateString(1) = "T=0 " +InformationStateTensor(0): ◉◯◉◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◉◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2, 3, 4, 5] +StringLegalActions() = ["PlaceIllegalItems(num=0)", "PlaceIllegalItems(num=1)", "PlaceIllegalItems(num=2)", "PlaceIllegalItems(num=3)"] + +# Apply action "PlaceIllegalItems(num=1)" +action: 3 + +# State 1 +# Num illegal items in cargo: 1 +# Bribes : [] +# Feedback: [] +IsTerminal() = False +History() = [3] +HistoryString() = "3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "T=1 num_illegal_items:1" +InformationStateString(1) = "T=1 " +InformationStateTensor(0): ◉◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◉◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [6, 7, 8, 9] +StringLegalActions() = ["Bribe(amount=0)", "Bribe(amount=1)", "Bribe(amount=2)", "Bribe(amount=3)"] + +# Apply action "Bribe(amount=3)" +action: 9 + +# State 2 +# Num illegal items in cargo: 1 +# Bribes : [3] +# Feedback: [] +IsTerminal() = False +History() = [3, 9] +HistoryString() = "3, 9" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "T=2 num_illegal_items:1/bribe:3" +InformationStateString(1) = "T=2 /bribe:3" +InformationStateTensor(0): ◯◉◉◯◯◯◉◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◉◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["InspectionFeedback(will_inspect=False)", "InspectionFeedback(will_inspect=True)"] + +# Apply action "InspectionFeedback(will_inspect=True)" +action: 1 + +# State 3 +# Num illegal items in cargo: 1 +# Bribes : [3] +# Feedback: [1] +IsTerminal() = False +History() = [3, 9, 1] +HistoryString() = "3, 9, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "T=3 num_illegal_items:1/bribe:3/feedback:1" +InformationStateString(1) = "T=3 /bribe:3/feedback:1" +InformationStateTensor(0): ◉◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◉◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [6, 7, 8, 9] +StringLegalActions() = ["Bribe(amount=0)", "Bribe(amount=1)", "Bribe(amount=2)", "Bribe(amount=3)"] + +# Apply action "Bribe(amount=3)" +action: 9 + +# State 4 +# Num illegal items in cargo: 1 +# Bribes : [3,3] +# Feedback: [1] +IsTerminal() = False +History() = [3, 9, 1, 9] +HistoryString() = "3, 9, 1, 9" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "T=4 num_illegal_items:1/bribe:3/feedback:1/bribe:3" +InformationStateString(1) = "T=4 /bribe:3/feedback:1/bribe:3" +InformationStateTensor(0): ◯◉◉◯◯◯◯◯◉◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◉◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["InspectionFeedback(will_inspect=False)", "InspectionFeedback(will_inspect=True)"] + +# Apply action "InspectionFeedback(will_inspect=False)" +action: 0 + +# State 5 +# Apply action "Bribe(amount=2)" +action: 8 + +# State 6 +# Num illegal items in cargo: 1 +# Bribes : [3,3,2] +# Feedback: [1,0] +IsTerminal() = False +History() = [3, 9, 1, 9, 0, 8] +HistoryString() = "3, 9, 1, 9, 0, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "T=6 num_illegal_items:1/bribe:3/feedback:1/bribe:3/feedback:0/bribe:2" +InformationStateString(1) = "T=6 /bribe:3/feedback:1/bribe:3/feedback:0/bribe:2" +InformationStateTensor(0): ◯◉◉◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◉◯◯◉◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◉◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◉◯◯◉◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["InspectionFeedback(will_inspect=False)", "InspectionFeedback(will_inspect=True)"] + +# Apply action "InspectionFeedback(will_inspect=False)" +action: 0 + +# State 7 +# Apply action "Bribe(amount=3)" +action: 9 + +# State 8 +# Apply action "InspectionFeedback(will_inspect=True)" +action: 1 + +# State 9 +# Num illegal items in cargo: 1 +# Bribes : [3,3,2,3] +# Feedback: [1,0,0,1] +IsTerminal() = True +History() = [3, 9, 1, 9, 0, 8, 0, 9, 1] +HistoryString() = "3, 9, 1, 9, 0, 8, 0, 9, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "T=9 num_illegal_items:1/bribe:3/feedback:1/bribe:3/feedback:0/bribe:2/feedback:0/bribe:3/feedback:1" +InformationStateString(1) = "T=9 /bribe:3/feedback:1/bribe:3/feedback:0/bribe:2/feedback:0/bribe:3/feedback:1" +InformationStateTensor(0): ◯◯◉◯◯◯◯◯◯◯◉◯◯◉◯◯◉◯◯◯◯◉◉◯◯◉◯◉◯◯◯◉◯ +InformationStateTensor(1): ◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◉◉◯◯◉◯◉◯◯◯◉◯ +Rewards() = [-2, 2] +Returns() = [-2, 2] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/skat.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/skat.txt new file mode 100644 index 0000000..793a44d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/skat.txt @@ -0,0 +1,664 @@ +game: skat + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Skat" +GameType.max_num_players = 3 +GameType.min_num_players = 3 +GameType.parameter_specification = [] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "skat" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 39 +PolicyTensorShape() = [39] +MaxChanceOutcomes() = 32 +GetParameters() = {} +NumPlayers() = 3 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [299] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 299 +MaxGameLength() = 35 +ToString() = "skat()" + +# State 0 +# Phase: dealing +# Current Player: -1 +# Deck: 🃇 🃈 🃉 🃍 🃎 🃊 🃁 🃋 🂷 🂸 🂹 🂽 🂾 🂺 🂱 🂻 🂧 🂨 🂩 🂭 🂮 🂪 🂡 🂫 🃗 🃘 🃙 🃝 🃞 🃚 🃑 🃛 +# Player 0: +# Player 1: +# Player 2: +# Skat: +# +# Game Type: unknown/pass +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "No Observation" +ObservationString(1) = "No Observation" +ObservationString(2) = "No Observation" +ObservationTensor(0): zeros(299) +ObservationTensor(1): zeros(299) +ObservationTensor(2): zeros(299) +ChanceOutcomes() = [(0,0.03125), (1,0.03125), (2,0.03125), (3,0.03125), (4,0.03125), (5,0.03125), (6,0.03125), (7,0.03125), (8,0.03125), (9,0.03125), (10,0.03125), (11,0.03125), (12,0.03125), (13,0.03125), (14,0.03125), (15,0.03125), (16,0.03125), (17,0.03125), (18,0.03125), (19,0.03125), (20,0.03125), (21,0.03125), (22,0.03125), (23,0.03125), (24,0.03125), (25,0.03125), (26,0.03125), (27,0.03125), (28,0.03125), (29,0.03125), (30,0.03125), (31,0.03125)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] +StringLegalActions() = ["D7", "D8", "D9", "DQ", "DK", "DT", "DA", "DJ", "H7", "H8", "H9", "HQ", "HK", "HT", "HA", "HJ", "S7", "S8", "S9", "SQ", "SK", "ST", "SA", "SJ", "C7", "C8", "C9", "CQ", "CK", "CT", "CA", "CJ"] + +# Apply action "DK" +action: 4 + +# State 1 +# Phase: dealing +# Current Player: -1 +# Deck: 🃇 🃈 🃉 🃍 🃊 🃁 🃋 🂷 🂸 🂹 🂽 🂾 🂺 🂱 🂻 🂧 🂨 🂩 🂭 🂮 🂪 🂡 🂫 🃗 🃘 🃙 🃝 🃞 🃚 🃑 🃛 +# Player 0: 🃎 +# Player 1: +# Player 2: +# Skat: +# +# Game Type: unknown/pass +IsTerminal() = False +History() = [4] +HistoryString() = "4" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "No Observation" +ObservationString(1) = "No Observation" +ObservationString(2) = "No Observation" +ObservationTensor(0): zeros(299) +ObservationTensor(1): zeros(299) +ObservationTensor(2): zeros(299) +ChanceOutcomes() = [(0,0.0322581), (1,0.0322581), (2,0.0322581), (3,0.0322581), (5,0.0322581), (6,0.0322581), (7,0.0322581), (8,0.0322581), (9,0.0322581), (10,0.0322581), (11,0.0322581), (12,0.0322581), (13,0.0322581), (14,0.0322581), (15,0.0322581), (16,0.0322581), (17,0.0322581), (18,0.0322581), (19,0.0322581), (20,0.0322581), (21,0.0322581), (22,0.0322581), (23,0.0322581), (24,0.0322581), (25,0.0322581), (26,0.0322581), (27,0.0322581), (28,0.0322581), (29,0.0322581), (30,0.0322581), (31,0.0322581)] +LegalActions() = [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] +StringLegalActions() = ["D7", "D8", "D9", "DQ", "DT", "DA", "DJ", "H7", "H8", "H9", "HQ", "HK", "HT", "HA", "HJ", "S7", "S8", "S9", "SQ", "SK", "ST", "SA", "SJ", "C7", "C8", "C9", "CQ", "CK", "CT", "CA", "CJ"] + +# Apply action "H7" +action: 8 + +# State 2 +# Apply action "HQ" +action: 11 + +# State 3 +# Apply action "SQ" +action: 19 + +# State 4 +# Apply action "C7" +action: 24 + +# State 5 +# Apply action "DJ" +action: 7 + +# State 6 +# Apply action "HT" +action: 13 + +# State 7 +# Apply action "S7" +action: 16 + +# State 8 +# Apply action "CK" +action: 28 + +# State 9 +# Apply action "C8" +action: 25 + +# State 10 +# Apply action "CA" +action: 30 + +# State 11 +# Apply action "CQ" +action: 27 + +# State 12 +# Apply action "H8" +action: 9 + +# State 13 +# Apply action "DT" +action: 5 + +# State 14 +# Apply action "ST" +action: 21 + +# State 15 +# Apply action "HK" +action: 12 + +# State 16 +# Apply action "CT" +action: 29 + +# State 17 +# Apply action "DA" +action: 6 + +# State 18 +# Apply action "D8" +action: 1 + +# State 19 +# Apply action "D7" +action: 0 + +# State 20 +# Apply action "HJ" +action: 15 + +# State 21 +# Apply action "SJ" +action: 23 + +# State 22 +# Apply action "S9" +action: 18 + +# State 23 +# Apply action "C9" +action: 26 + +# State 24 +# Apply action "D9" +action: 2 + +# State 25 +# Apply action "SA" +action: 22 + +# State 26 +# Apply action "HA" +action: 14 + +# State 27 +# Apply action "DQ" +action: 3 + +# State 28 +# Apply action "H9" +action: 10 + +# State 29 +# Apply action "S8" +action: 17 + +# State 30 +# Apply action "SK" +action: 20 + +# State 31 +# Apply action "CJ" +action: 31 + +# State 32 +# Phase: bidding +# Current Player: 0 +# Deck: +# Player 0: 🃉 🃎 🃊 🂷 🂸 🂽 🂪 🂡 🃙 🃝 +# Player 1: 🃈 🃍 🃁 🃋 🂹 🂾 🂱 🂭 🃗 🃚 +# Player 2: 🃇 🂺 🂻 🂧 🂨 🂩 🂮 🂫 🃞 🃛 +# Skat: 🃘 🃑 +# +# Game Type: unknown/pass +IsTerminal() = False +History() = [4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31] +HistoryString() = "4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "PlPos:0|Phase:bidding|Hand:🃉 🃎 🃊 🂷 🂸 🂽 🂪 🂡 🃙 🃝 |Bids:unknown/pass unknown/pass unknown/pass |SoloPl:-1|Skat:|Game:unknown/pass|CurrTrick(Leader:-1):" +ObservationString(1) = "PlPos:1|Phase:bidding|Hand:🃈 🃍 🃁 🃋 🂹 🂾 🂱 🂭 🃗 🃚 |Bids:unknown/pass unknown/pass unknown/pass |SoloPl:-1|Skat:|Game:unknown/pass|CurrTrick(Leader:-1):" +ObservationString(2) = "PlPos:2|Phase:bidding|Hand:🃇 🂺 🂻 🂧 🂨 🂩 🂮 🂫 🃞 🃛 |Bids:unknown/pass unknown/pass unknown/pass |SoloPl:-1|Skat:|Game:unknown/pass|CurrTrick(Leader:-1):" +ObservationTensor(0): binvec(299, 0x4859a00c6102040000000001000000000000000000000000000000000000000000000000000) +ObservationTensor(1): binvec(299, 0x28a654210902040000000001000000000000000000000000000000000000000000000000000) +ObservationTensor(2): binvec(299, 0x19000bd21302040000000001000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [32, 33, 34, 35, 36, 37, 38] +StringLegalActions() = ["unknown/pass", "diamonds", "hearts", "spades", "clubs", "grand", "null"] + +# Apply action "diamonds" +action: 33 + +# State 33 +# Phase: discarding cards +# Current Player: 0 +# Deck: +# Player 0: 🃉 🃎 🃊 🂷 🂸 🂽 🂪 🂡 🃘 🃙 🃝 🃑 +# Player 1: 🃈 🃍 🃁 🃋 🂹 🂾 🂱 🂭 🃗 🃚 +# Player 2: 🃇 🂺 🂻 🂧 🂨 🂩 🂮 🂫 🃞 🃛 +# Skat: +# +# Game Type: diamonds +IsTerminal() = False +History() = [4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33] +HistoryString() = "4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "PlPos:0|Phase:discarding cards|Hand:🃉 🃎 🃊 🂷 🂸 🂽 🂪 🂡 🃘 🃙 🃝 🃑 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:-1):" +ObservationString(1) = "PlPos:1|Phase:discarding cards|Hand:🃈 🃍 🃁 🃋 🂹 🂾 🂱 🂭 🃗 🃚 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:-1):" +ObservationString(2) = "PlPos:2|Phase:discarding cards|Hand:🃇 🂺 🂻 🂧 🂨 🂩 🂮 🂫 🃞 🃛 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:-1):" +ObservationTensor(0): binvec(299, 0x4459a00ce482040800000000800000000000000000000000000000000000000000000000000) +ObservationTensor(1): binvec(299, 0x24a654210882040800000000800000000000000000000000000000000000000000000000000) +ObservationTensor(2): binvec(299, 0x15000bd21282040800000000800000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [2, 4, 5, 8, 9, 11, 21, 22, 25, 26, 27, 30] +StringLegalActions() = ["D9", "DK", "DT", "H7", "H8", "HQ", "ST", "SA", "C8", "C9", "CQ", "CA"] + +# Apply action "D9" +action: 2 + +# State 34 +# Phase: discarding cards +# Current Player: 0 +# Deck: +# Player 0: 🃎 🃊 🂷 🂸 🂽 🂪 🂡 🃘 🃙 🃝 🃑 +# Player 1: 🃈 🃍 🃁 🃋 🂹 🂾 🂱 🂭 🃗 🃚 +# Player 2: 🃇 🂺 🂻 🂧 🂨 🂩 🂮 🂫 🃞 🃛 +# Skat: 🃉 +# +# Game Type: diamonds +IsTerminal() = False +History() = [4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2] +HistoryString() = "4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "PlPos:0|Phase:discarding cards|Hand:🃎 🃊 🂷 🂸 🂽 🂪 🂡 🃘 🃙 🃝 🃑 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:🃉 |Game:diamonds|CurrTrick(Leader:-1):" +ObservationString(1) = "PlPos:1|Phase:discarding cards|Hand:🃈 🃍 🃁 🃋 🂹 🂾 🂱 🂭 🃗 🃚 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:-1):" +ObservationString(2) = "PlPos:2|Phase:discarding cards|Hand:🃇 🂺 🂻 🂧 🂨 🂩 🂮 🂫 🃞 🃛 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:-1):" +ObservationTensor(0): binvec(299, 0x4419a00ce482040840000000800000000000000000000000000000000000000000000000000) +ObservationTensor(1): binvec(299, 0x24a654210882040800000000800000000000000000000000000000000000000000000000000) +ObservationTensor(2): binvec(299, 0x15000bd21282040800000000800000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [4, 5, 8, 9, 11, 21, 22, 25, 26, 27, 30] +StringLegalActions() = ["DK", "DT", "H7", "H8", "HQ", "ST", "SA", "C8", "C9", "CQ", "CA"] + +# Apply action "H8" +action: 9 + +# State 35 +# Apply action "CA" +action: 30 + +# State 36 +# Phase: playing +# Current Player: 1 +# Deck: +# Player 0: 🃎 🃊 🂷 🂽 🂪 🂡 🃘 🃙 🃝 +# Player 1: 🃈 🃍 🃁 🃋 🂹 🂾 🂱 🂭 🃗 🃚 +# Player 2: 🃇 🂺 🂻 🂧 🂨 🂩 🂮 🂫 🃞 🃛 +# Skat: 🃉 🂸 +# +# Last trick won by player -1 +# Solo Player: 0 +# Points (Solo / Team): (0 / 0) +# Current Trick: Leader: 0, 🃑 +# Game Type: diamonds +IsTerminal() = False +History() = [4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2, 9, 30] +HistoryString() = "4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2, 9, 30" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "PlPos:0|Phase:playing|Hand:🃎 🃊 🂷 🂽 🂪 🂡 🃘 🃙 🃝 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:🃉 🂸 |Game:diamonds|CurrTrick(Leader:0):🃑 " +ObservationString(1) = "PlPos:1|Phase:playing|Hand:🃈 🃍 🃁 🃋 🂹 🂾 🂱 🂭 🃗 🃚 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:0):🃑 " +ObservationString(2) = "PlPos:2|Phase:playing|Hand:🃇 🂺 🂻 🂧 🂨 🂩 🂮 🂫 🃞 🃛 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:0):🃑 " +ObservationTensor(0): binvec(299, 0x4219200ce082040840800000820000000100000000000000000000000000000000000000000) +ObservationTensor(1): binvec(299, 0x22a654210882040800000000820000000100000000000000000000000000000000000000000) +ObservationTensor(2): binvec(299, 0x13000bd21282040800000000820000000100000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [24, 29] +StringLegalActions() = ["C7", "CT"] + +# Apply action "C7" +action: 24 + +# State 37 +# Phase: playing +# Current Player: 2 +# Deck: +# Player 0: 🃎 🃊 🂷 🂽 🂪 🂡 🃘 🃙 🃝 +# Player 1: 🃈 🃍 🃁 🃋 🂹 🂾 🂱 🂭 🃚 +# Player 2: 🃇 🂺 🂻 🂧 🂨 🂩 🂮 🂫 🃞 🃛 +# Skat: 🃉 🂸 +# +# Last trick won by player -1 +# Solo Player: 0 +# Points (Solo / Team): (0 / 0) +# Current Trick: Leader: 0, 🃑 🃗 +# Game Type: diamonds +IsTerminal() = False +History() = [4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2, 9, 30, 24] +HistoryString() = "4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2, 9, 30, 24" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "PlPos:0|Phase:playing|Hand:🃎 🃊 🂷 🂽 🂪 🂡 🃘 🃙 🃝 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:🃉 🂸 |Game:diamonds|CurrTrick(Leader:0):🃑 🃗 " +ObservationString(1) = "PlPos:1|Phase:playing|Hand:🃈 🃍 🃁 🃋 🂹 🂾 🂱 🂭 🃚 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:0):🃑 🃗 " +ObservationString(2) = "PlPos:2|Phase:playing|Hand:🃇 🂺 🂻 🂧 🂨 🂩 🂮 🂫 🃞 🃛 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:0):🃑 🃗 " +ObservationTensor(0): binvec(299, 0x4219200ce082040840800000820000000100000040000000000000000000000000000000000) +ObservationTensor(1): binvec(299, 0x22a654200882040800000000820000000100000040000000000000000000000000000000000) +ObservationTensor(2): binvec(299, 0x13000bd21282040800000000820000000100000040000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [28] +StringLegalActions() = ["CK"] + +# Apply action "CK" +action: 28 + +# State 38 +# Apply action "DT" +action: 5 + +# State 39 +# Phase: playing +# Current Player: 1 +# Deck: +# Player 0: 🃎 🂷 🂽 🂪 🂡 🃘 🃙 🃝 +# Player 1: 🃈 🃍 🃁 🃋 🂹 🂾 🂱 🂭 🃚 +# Player 2: 🃇 🂺 🂻 🂧 🂨 🂩 🂮 🂫 🃛 +# Skat: 🃉 🂸 +# +# Last trick won by player 0 +# Solo Player: 0 +# Points (Solo / Team): (15 / 0) +# Current Trick: Leader: 0, 🃊 +# Last Trick: Leader: 0, 🃑 🃗 🃞 +# Game Type: diamonds +IsTerminal() = False +History() = [4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2, 9, 30, 24, 28, 5] +HistoryString() = "4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2, 9, 30, 24, 28, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "PlPos:0|Phase:playing|Hand:🃎 🂷 🂽 🂪 🂡 🃘 🃙 🃝 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:🃉 🂸 |Game:diamonds|CurrTrick(Leader:0):🃊 |PrevTrick(Leader:0):🃑 🃗 🃞 " +ObservationString(1) = "PlPos:1|Phase:playing|Hand:🃈 🃍 🃁 🃋 🂹 🂾 🂱 🂭 🃚 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:0):🃊 |PrevTrick(Leader:0):🃑 🃗 🃞 " +ObservationString(2) = "PlPos:2|Phase:playing|Hand:🃇 🂺 🂻 🂧 🂨 🂩 🂮 🂫 🃛 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:0):🃊 |PrevTrick(Leader:0):🃑 🃗 🃞 " +ObservationTensor(0): binvec(299, 0x4211200ce082040840800000820200000000000000000000004000000020000008000000008) +ObservationTensor(1): binvec(299, 0x22a654200882040800000000820200000000000000000000004000000020000008000000008) +ObservationTensor(2): binvec(299, 0x13000bd20282040800000000820200000000000000000000004000000020000008000000008) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [1, 3, 6, 7] +StringLegalActions() = ["D8", "DQ", "DA", "DJ"] + +# Apply action "DJ" +action: 7 + +# State 40 +# Phase: playing +# Current Player: 2 +# Deck: +# Player 0: 🃎 🂷 🂽 🂪 🂡 🃘 🃙 🃝 +# Player 1: 🃈 🃍 🃁 🂹 🂾 🂱 🂭 🃚 +# Player 2: 🃇 🂺 🂻 🂧 🂨 🂩 🂮 🂫 🃛 +# Skat: 🃉 🂸 +# +# Last trick won by player 0 +# Solo Player: 0 +# Points (Solo / Team): (15 / 0) +# Current Trick: Leader: 0, 🃊 🃋 +# Last Trick: Leader: 0, 🃑 🃗 🃞 +# Game Type: diamonds +IsTerminal() = False +History() = [4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2, 9, 30, 24, 28, 5, 7] +HistoryString() = "4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2, 9, 30, 24, 28, 5, 7" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "PlPos:0|Phase:playing|Hand:🃎 🂷 🂽 🂪 🂡 🃘 🃙 🃝 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:🃉 🂸 |Game:diamonds|CurrTrick(Leader:0):🃊 🃋 |PrevTrick(Leader:0):🃑 🃗 🃞 " +ObservationString(1) = "PlPos:1|Phase:playing|Hand:🃈 🃍 🃁 🂹 🂾 🂱 🂭 🃚 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:0):🃊 🃋 |PrevTrick(Leader:0):🃑 🃗 🃞 " +ObservationString(2) = "PlPos:2|Phase:playing|Hand:🃇 🂺 🂻 🂧 🂨 🂩 🂮 🂫 🃛 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:0):🃊 🃋 |PrevTrick(Leader:0):🃑 🃗 🃞 " +ObservationTensor(0): binvec(299, 0x4211200ce082040840800000820200000000800000000000004000000020000008000000008) +ObservationTensor(1): binvec(299, 0x22a454200882040800000000820200000000800000000000004000000020000008000000008) +ObservationTensor(2): binvec(299, 0x13000bd20282040800000000820200000000800000000000004000000020000008000000008) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 15, 23, 31] +StringLegalActions() = ["D7", "HJ", "SJ", "CJ"] + +# Apply action "HJ" +action: 15 + +# State 41 +# Phase: playing +# Current Player: 2 +# Deck: +# Player 0: 🃎 🂷 🂽 🂪 🂡 🃘 🃙 🃝 +# Player 1: 🃈 🃍 🃁 🂹 🂾 🂱 🂭 🃚 +# Player 2: 🃇 🂺 🂧 🂨 🂩 🂮 🂫 🃛 +# Skat: 🃉 🂸 +# +# Last trick won by player 2 +# Solo Player: 0 +# Points (Solo / Team): (15 / 14) +# Current Trick: Leader: 2, +# Last Trick: Leader: 0, 🃊 🃋 🂻 +# Game Type: diamonds +IsTerminal() = False +History() = [4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2, 9, 30, 24, 28, 5, 7, 15] +HistoryString() = "4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2, 9, 30, 24, 28, 5, 7, 15" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "PlPos:0|Phase:playing|Hand:🃎 🂷 🂽 🂪 🂡 🃘 🃙 🃝 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:🃉 🂸 |Game:diamonds|CurrTrick(Leader:2):|PrevTrick(Leader:0):🃊 🃋 🂻 " +ObservationString(1) = "PlPos:1|Phase:playing|Hand:🃈 🃍 🃁 🂹 🂾 🂱 🂭 🃚 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:2):|PrevTrick(Leader:0):🃊 🃋 🂻 " +ObservationString(2) = "PlPos:2|Phase:playing|Hand:🃇 🂺 🂧 🂨 🂩 🂮 🂫 🃛 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:2):|PrevTrick(Leader:0):🃊 🃋 🂻 " +ObservationTensor(0): binvec(299, 0x4211200ce082040840800000808000000000000000000000004040000000100000000010000) +ObservationTensor(1): binvec(299, 0x22a454200882040800000000808000000000000000000000004040000000100000000010000) +ObservationTensor(2): binvec(299, 0x130009d20282040800000000808000000000000000000000004040000000100000000010000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 13, 16, 17, 18, 20, 23, 31] +StringLegalActions() = ["D7", "HT", "S7", "S8", "S9", "SK", "SJ", "CJ"] + +# Apply action "S8" +action: 17 + +# State 42 +# Apply action "ST" +action: 21 + +# State 43 +# Phase: playing +# Current Player: 1 +# Deck: +# Player 0: 🃎 🂷 🂽 🂡 🃘 🃙 🃝 +# Player 1: 🃈 🃍 🃁 🂹 🂾 🂱 🂭 🃚 +# Player 2: 🃇 🂺 🂧 🂩 🂮 🂫 🃛 +# Skat: 🃉 🂸 +# +# Last trick won by player 2 +# Solo Player: 0 +# Points (Solo / Team): (15 / 14) +# Current Trick: Leader: 2, 🂨 🂪 +# Last Trick: Leader: 0, 🃊 🃋 🂻 +# Game Type: diamonds +IsTerminal() = False +History() = [4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2, 9, 30, 24, 28, 5, 7, 15, 17, 21] +HistoryString() = "4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2, 9, 30, 24, 28, 5, 7, 15, 17, 21" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "PlPos:0|Phase:playing|Hand:🃎 🂷 🂽 🂡 🃘 🃙 🃝 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:🃉 🂸 |Game:diamonds|CurrTrick(Leader:2):🂨 🂪 |PrevTrick(Leader:0):🃊 🃋 🂻 " +ObservationString(1) = "PlPos:1|Phase:playing|Hand:🃈 🃍 🃁 🂹 🂾 🂱 🂭 🃚 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:2):🂨 🂪 |PrevTrick(Leader:0):🃊 🃋 🂻 " +ObservationString(2) = "PlPos:2|Phase:playing|Hand:🃇 🂺 🂧 🂩 🂮 🂫 🃛 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:2):🂨 🂪 |PrevTrick(Leader:0):🃊 🃋 🂻 " +ObservationTensor(0): binvec(299, 0x42112004e082040840800000808000200000000200000000004040000000100000000010000) +ObservationTensor(1): binvec(299, 0x22a454200882040800000000808000200000000200000000004040000000100000000010000) +ObservationTensor(2): binvec(299, 0x130009520282040800000000808000200000000200000000004040000000100000000010000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [19] +StringLegalActions() = ["SQ"] + +# Apply action "SQ" +action: 19 + +# State 44 +# Apply action "CQ" +action: 27 + +# State 45 +# Apply action "CT" +action: 29 + +# State 46 +# Apply action "S9" +action: 18 + +# State 47 +# Apply action "DQ" +action: 3 + +# State 48 +# Apply action "SJ" +action: 23 + +# State 49 +# Apply action "DK" +action: 4 + +# State 50 +# Apply action "HT" +action: 13 + +# State 51 +# Apply action "HQ" +action: 11 + +# State 52 +# Apply action "H9" +action: 10 + +# State 53 +# Apply action "SK" +action: 20 + +# State 54 +# Apply action "SA" +action: 22 + +# State 55 +# Apply action "HK" +action: 12 + +# State 56 +# Phase: playing +# Current Player: 0 +# Deck: +# Player 0: 🂷 🃘 🃙 +# Player 1: 🃈 🃁 🂱 +# Player 2: 🃇 🂧 🃛 +# Skat: 🃉 🂸 +# +# Last trick won by player 0 +# Solo Player: 0 +# Points (Solo / Team): (47 / 49) +# Current Trick: Leader: 0, +# Last Trick: Leader: 2, 🂮 🂡 🂾 +# Game Type: diamonds +IsTerminal() = False +History() = [4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2, 9, 30, 24, 28, 5, 7, 15, 17, 21, 19, 27, 29, 18, 3, 23, 4, 13, 11, 10, 20, 22, 12] +HistoryString() = "4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2, 9, 30, 24, 28, 5, 7, 15, 17, 21, 19, 27, 29, 18, 3, 23, 4, 13, 11, 10, 20, 22, 12" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "PlPos:0|Phase:playing|Hand:🂷 🃘 🃙 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:🃉 🂸 |Game:diamonds|CurrTrick(Leader:0):|PrevTrick(Leader:2):🂮 🂡 🂾 " +ObservationString(1) = "PlPos:1|Phase:playing|Hand:🃈 🃁 🂱 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:0):|PrevTrick(Leader:2):🂮 🂡 🂾 " +ObservationString(2) = "PlPos:2|Phase:playing|Hand:🃇 🂧 🃛 |Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:0):|PrevTrick(Leader:2):🂮 🂡 🂾 " +ObservationTensor(0): binvec(299, 0x42010000c082040840800000820000000000000000000000001000008000000020000080000) +ObservationTensor(1): binvec(299, 0x228404000082040800000000820000000000000000000000001000008000000020000080000) +ObservationTensor(2): binvec(299, 0x130001000282040800000000820000000000000000000000001000008000000020000080000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [8, 25, 26] +StringLegalActions() = ["H7", "C8", "C9"] + +# Apply action "C8" +action: 25 + +# State 57 +# Apply action "D8" +action: 1 + +# State 58 +# Apply action "S7" +action: 16 + +# State 59 +# Apply action "HA" +action: 14 + +# State 60 +# Apply action "CJ" +action: 31 + +# State 61 +# Apply action "H7" +action: 8 + +# State 62 +# Apply action "D7" +action: 0 + +# State 63 +# Apply action "C9" +action: 26 + +# State 64 +# Apply action "DA" +action: 6 + +# State 65 +# Phase: game over +# Current Player: 1 +# Deck: +# Player 0: +# Player 1: +# Player 2: +# Skat: 🃉 🂸 +# +# Last trick won by player 1 +# Solo Player: 0 +# Points (Solo / Team): (47 / 73) +# Current Trick: Leader: 2, 🃇 🃙 🃁 +# Last Trick: Leader: 2, 🃇 🃙 🃁 +# Game Type: diamonds +IsTerminal() = True +History() = [4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2, 9, 30, 24, 28, 5, 7, 15, 17, 21, 19, 27, 29, 18, 3, 23, 4, 13, 11, 10, 20, 22, 12, 25, 1, 16, 14, 31, 8, 0, 26, 6] +HistoryString() = "4, 8, 11, 19, 24, 7, 13, 16, 28, 25, 30, 27, 9, 5, 21, 12, 29, 6, 1, 0, 15, 23, 18, 26, 2, 22, 14, 3, 10, 17, 20, 31, 33, 2, 9, 30, 24, 28, 5, 7, 15, 17, 21, 19, 27, 29, 18, 3, 23, 4, 13, 11, 10, 20, 22, 12, 25, 1, 16, 14, 31, 8, 0, 26, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "PlPos:0|Phase:game over|Hand:|Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:🃉 🂸 |Game:diamonds|CurrTrick(Leader:-1):|PrevTrick(Leader:2):🃇 🃙 🃁 " +ObservationString(1) = "PlPos:1|Phase:game over|Hand:|Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:-1):|PrevTrick(Leader:2):🃇 🃙 🃁 " +ObservationString(2) = "PlPos:2|Phase:game over|Hand:|Bids:diamonds unknown/pass unknown/pass |SoloPl:0|Skat:|Game:diamonds|CurrTrick(Leader:-1):|PrevTrick(Leader:2):🃇 🃙 🃁 " +ObservationTensor(0): binvec(299, 0x400000000082040840800000800000000000000000000000001800000000000002002000000) +ObservationTensor(1): binvec(299, 0x200000000082040800000000800000000000000000000000001800000000000002002000000) +ObservationTensor(2): binvec(299, 0x100000000082040800000000800000000000000000000000001800000000000002002000000) +Rewards() = [-0.108333, 0.0541667, 0.0541667] +Returns() = [-0.108333, 0.0541667, 0.0541667] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/solitaire.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/solitaire.txt new file mode 100644 index 0000000..8679dca --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/solitaire.txt @@ -0,0 +1,1044 @@ +game: solitaire + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Klondike Solitaire" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = ["depth_limit", "is_colored", "players"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "solitaire" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 205 +PolicyTensorShape() = [205] +MaxChanceOutcomes() = 53 +GetParameters() = {depth_limit=150,is_colored=False,players=1} +NumPlayers() = 1 +MinUtility() = 0.0 +MaxUtility() = 3220.0 +UtilitySum() = None +ObservationTensorShape() = [1741] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 1741 +MaxGameLength() = 150 +ToString() = "solitaire()" + +# State 0 +# WASTE : 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 +# FOUNDATIONS : ♠ ♥ ♣ ♦ +# TABLEAUS : +# 🂠 +# 🂠 🂠 +# 🂠 🂠 🂠 +# 🂠 🂠 🂠 🂠 +# 🂠 🂠 🂠 🂠 🂠 +# 🂠 🂠 🂠 🂠 🂠 🂠 +# 🂠 🂠 🂠 🂠 🂠 🂠 🂠 +# TARGETS : ♠ ♥ ♣ ♦ +# SOURCES : +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "" +ObservationString(0) = "WASTE : 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 \nFOUNDATIONS : ♠ ♥ ♣ ♦ \nTABLEAUS : \n🂠 \n🂠 🂠 \n🂠 🂠 🂠 \n🂠 🂠 🂠 🂠 \n🂠 🂠 🂠 🂠 🂠 \n🂠 🂠 🂠 🂠 🂠 🂠 \n🂠 🂠 🂠 🂠 🂠 🂠 🂠 \nTARGETS : ♠ ♥ ♣ ♦ \nSOURCES : " +ObservationTensor(0): binvec(1741, 0x10004001000400100000000000000300000000000000700000000000000f00000000000001f00000000000003f00000000000007f0000000000000800000000000040000000000002000000000000100000000000008000000000000400000000000020000000000001000000000000080000000000004000000000000200000000000010000000000000800000000000040000000000002000000000000100000000000008000000000000400000000000020000000000001000000000000080000000000004000000000000200000000000010000000000000) +ChanceOutcomes() = [(1,0.0192308), (2,0.0192308), (3,0.0192308), (4,0.0192308), (5,0.0192308), (6,0.0192308), (7,0.0192308), (8,0.0192308), (9,0.0192308), (10,0.0192308), (11,0.0192308), (12,0.0192308), (13,0.0192308), (14,0.0192308), (15,0.0192308), (16,0.0192308), (17,0.0192308), (18,0.0192308), (19,0.0192308), (20,0.0192308), (21,0.0192308), (22,0.0192308), (23,0.0192308), (24,0.0192308), (25,0.0192308), (26,0.0192308), (27,0.0192308), (28,0.0192308), (29,0.0192308), (30,0.0192308), (31,0.0192308), (32,0.0192308), (33,0.0192308), (34,0.0192308), (35,0.0192308), (36,0.0192308), (37,0.0192308), (38,0.0192308), (39,0.0192308), (40,0.0192308), (41,0.0192308), (42,0.0192308), (43,0.0192308), (44,0.0192308), (45,0.0192308), (46,0.0192308), (47,0.0192308), (48,0.0192308), (49,0.0192308), (50,0.0192308), (51,0.0192308), (52,0.0192308)] +LegalActions() = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52] +StringLegalActions() = ["RevealA♠", "Reveal2♠", "Reveal3♠", "Reveal4♠", "Reveal5♠", "Reveal6♠", "Reveal7♠", "Reveal8♠", "Reveal9♠", "RevealT♠", "RevealJ♠", "RevealQ♠", "RevealK♠", "RevealA♥", "Reveal2♥", "Reveal3♥", "Reveal4♥", "Reveal5♥", "Reveal6♥", "Reveal7♥", "Reveal8♥", "Reveal9♥", "RevealT♥", "RevealJ♥", "RevealQ♥", "RevealK♥", "RevealA♣", "Reveal2♣", "Reveal3♣", "Reveal4♣", "Reveal5♣", "Reveal6♣", "Reveal7♣", "Reveal8♣", "Reveal9♣", "RevealT♣", "RevealJ♣", "RevealQ♣", "RevealK♣", "RevealA♦", "Reveal2♦", "Reveal3♦", "Reveal4♦", "Reveal5♦", "Reveal6♦", "Reveal7♦", "Reveal8♦", "Reveal9♦", "RevealT♦", "RevealJ♦", "RevealQ♦", "RevealK♦"] + +# Apply action "RevealT♥" +action: 23 + +# State 1 +# WASTE : 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 +# FOUNDATIONS : ♠ ♥ ♣ ♦ +# TABLEAUS : +# T♥ +# 🂠 🂠 +# 🂠 🂠 🂠 +# 🂠 🂠 🂠 🂠 +# 🂠 🂠 🂠 🂠 🂠 +# 🂠 🂠 🂠 🂠 🂠 🂠 +# 🂠 🂠 🂠 🂠 🂠 🂠 🂠 +# TARGETS : T♥ ♠ ♥ ♣ ♦ +# SOURCES : T♥ +IsTerminal() = False +History() = [23] +HistoryString() = "23" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "23" +ObservationString(0) = "WASTE : 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 🂠 \nFOUNDATIONS : ♠ ♥ ♣ ♦ \nTABLEAUS : \nT♥ \n🂠 🂠 \n🂠 🂠 🂠 \n🂠 🂠 🂠 🂠 \n🂠 🂠 🂠 🂠 🂠 \n🂠 🂠 🂠 🂠 🂠 🂠 \n🂠 🂠 🂠 🂠 🂠 🂠 🂠 \nTARGETS : T♥ ♠ ♥ ♣ ♦ \nSOURCES : T♥ " +ObservationTensor(0): binvec(1741, 0x10004001000400000000008000000300000000000000700000000000000f00000000000001f00000000000003f00000000000007f0000000000000800000000000040000000000002000000000000100000000000008000000000000400000000000020000000000001000000000000080000000000004000000000000200000000000010000000000000800000000000040000000000002000000000000100000000000008000000000000400000000000020000000000001000000000000080000000000004000000000000200000000000010000000000000) +ChanceOutcomes() = [(1,0.0196078), (2,0.0196078), (3,0.0196078), (4,0.0196078), (5,0.0196078), (6,0.0196078), (7,0.0196078), (8,0.0196078), (9,0.0196078), (10,0.0196078), (11,0.0196078), (12,0.0196078), (13,0.0196078), (14,0.0196078), (15,0.0196078), (16,0.0196078), (17,0.0196078), (18,0.0196078), (19,0.0196078), (20,0.0196078), (21,0.0196078), (22,0.0196078), (24,0.0196078), (25,0.0196078), (26,0.0196078), (27,0.0196078), (28,0.0196078), (29,0.0196078), (30,0.0196078), (31,0.0196078), (32,0.0196078), (33,0.0196078), (34,0.0196078), (35,0.0196078), (36,0.0196078), (37,0.0196078), (38,0.0196078), (39,0.0196078), (40,0.0196078), (41,0.0196078), (42,0.0196078), (43,0.0196078), (44,0.0196078), (45,0.0196078), (46,0.0196078), (47,0.0196078), (48,0.0196078), (49,0.0196078), (50,0.0196078), (51,0.0196078), (52,0.0196078)] +LegalActions() = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52] +StringLegalActions() = ["RevealA♠", "Reveal2♠", "Reveal3♠", "Reveal4♠", "Reveal5♠", "Reveal6♠", "Reveal7♠", "Reveal8♠", "Reveal9♠", "RevealT♠", "RevealJ♠", "RevealQ♠", "RevealK♠", "RevealA♥", "Reveal2♥", "Reveal3♥", "Reveal4♥", "Reveal5♥", "Reveal6♥", "Reveal7♥", "Reveal8♥", "Reveal9♥", "RevealJ♥", "RevealQ♥", "RevealK♥", "RevealA♣", "Reveal2♣", "Reveal3♣", "Reveal4♣", "Reveal5♣", "Reveal6♣", "Reveal7♣", "Reveal8♣", "Reveal9♣", "RevealT♣", "RevealJ♣", "RevealQ♣", "RevealK♣", "RevealA♦", "Reveal2♦", "Reveal3♦", "Reveal4♦", "Reveal5♦", "Reveal6♦", "Reveal7♦", "Reveal8♦", "Reveal9♦", "RevealT♦", "RevealJ♦", "RevealQ♦", "RevealK♦"] + +# Apply action "Reveal2♣" +action: 28 + +# State 2 +# Apply action "Reveal3♥" +action: 16 + +# State 3 +# Apply action "Reveal2♦" +action: 41 + +# State 4 +# Apply action "Reveal3♠" +action: 3 + +# State 5 +# Apply action "Reveal8♥" +action: 21 + +# State 6 +# Apply action "RevealA♦" +action: 40 + +# State 7 +# Apply action "Reveal7♠" +action: 7 + +# State 8 +# Apply action "Reveal5♦" +action: 44 + +# State 9 +# Apply action "Reveal7♦" +action: 46 + +# State 10 +# Apply action "Reveal9♠" +action: 9 + +# State 11 +# Apply action "Reveal9♦" +action: 48 + +# State 12 +# Apply action "RevealK♣" +action: 39 + +# State 13 +# Apply action "Reveal3♣" +action: 29 + +# State 14 +# Apply action "RevealJ♣" +action: 37 + +# State 15 +# Apply action "Reveal4♥" +action: 17 + +# State 16 +# Apply action "RevealJ♠" +action: 11 + +# State 17 +# Apply action "Reveal4♠" +action: 4 + +# State 18 +# Apply action "Reveal5♣" +action: 31 + +# State 19 +# Apply action "Reveal3♦" +action: 42 + +# State 20 +# Apply action "Reveal2♠" +action: 2 + +# State 21 +# Apply action "Reveal7♣" +action: 33 + +# State 22 +# Apply action "Reveal9♥" +action: 22 + +# State 23 +# Apply action "Reveal6♥" +action: 19 + +# State 24 +# Apply action "RevealQ♣" +action: 38 + +# State 25 +# Apply action "RevealK♦" +action: 52 + +# State 26 +# Apply action "Reveal5♥" +action: 18 + +# State 27 +# Apply action "RevealT♣" +action: 36 + +# State 28 +# Apply action "Reveal5♠" +action: 5 + +# State 29 +# Apply action "RevealJ♦" +action: 50 + +# State 30 +# Apply action "Reveal4♦" +action: 43 + +# State 31 +# WASTE : 7♠ 5♦ 7♦ 9♠ 9♦ K♣ 3♣ J♣ 4♥ J♠ 4♠ 5♣ 3♦ 2♠ 7♣ 9♥ 6♥ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ +# FOUNDATIONS : ♠ ♥ ♣ ♦ +# TABLEAUS : +# T♥ +# 🂠 2♣ +# 🂠 🂠 3♥ +# 🂠 🂠 🂠 2♦ +# 🂠 🂠 🂠 🂠 3♠ +# 🂠 🂠 🂠 🂠 🂠 8♥ +# 🂠 🂠 🂠 🂠 🂠 🂠 A♦ +# TARGETS : T♥ 2♣ 3♥ 2♦ 3♠ 8♥ A♦ ♠ ♥ ♣ ♦ +# SOURCES : T♥ 2♣ 3♥ 2♦ 3♠ 8♥ A♦ 7♠ 9♠ 3♣ J♠ 3♦ 9♥ K♦ 5♠ +IsTerminal() = False +History() = [23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43] +HistoryString() = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43" +ObservationString(0) = "WASTE : 7♠ 5♦ 7♦ 9♠ 9♦ K♣ 3♣ J♣ 4♥ J♠ 4♠ 5♣ 3♦ 2♠ 7♣ 9♥ 6♥ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ \nFOUNDATIONS : ♠ ♥ ♣ ♦ \nTABLEAUS : \nT♥ \n🂠 2♣ \n🂠 🂠 3♥ \n🂠 🂠 🂠 2♦ \n🂠 🂠 🂠 🂠 3♠ \n🂠 🂠 🂠 🂠 🂠 8♥ \n🂠 🂠 🂠 🂠 🂠 🂠 A♦ \nTARGETS : T♥ 2♣ 3♥ 2♦ 3♠ 8♥ A♦ ♠ ♥ ♣ ♦ \nSOURCES : T♥ 2♣ 3♥ 2♦ 3♠ 8♥ A♦ 7♠ 9♠ 3♣ J♠ 3♦ 9♥ K♦ 5♠ " +ObservationTensor(0): binvec(1741, 0x10004001000400000000008000000200000000800000600001000000000e00000000001001e08000000000003e00000400000007e0000000001000010000000000000000000000400000000000008000800000000000000000000008000000000080000000001000000000000000800000004000000000008000000000020000000000000000000200000000000000020010000000000000000000010000000000400000000000100000000000000000100000000000000002000040000000000000000080000200000000000000000000000080000000000200) +Rewards() = [0] +Returns() = [0] +LegalActions() = [58, 91, 105, 111, 121, 188] +StringLegalActions() = ["3♠ ← 2♦", "3♥ ← 2♣", "8♥ ← 7♠", "T♥ ← 9♠", "2♣ ← A♦", "♦ ← A♦"] + +# Apply action "♦ ← A♦" +action: 188 + +# State 32 +# Apply action "RevealK♠" +action: 13 + +# State 33 +# WASTE : 7♠ 5♦ 7♦ 9♠ 9♦ K♣ 3♣ J♣ 4♥ J♠ 4♠ 5♣ 3♦ 2♠ 7♣ 9♥ 6♥ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ +# FOUNDATIONS : ♠ ♥ ♣ A♦ +# TABLEAUS : +# T♥ +# 🂠 2♣ +# 🂠 🂠 3♥ +# 🂠 🂠 🂠 2♦ +# 🂠 🂠 🂠 🂠 3♠ +# 🂠 🂠 🂠 🂠 🂠 8♥ +# 🂠 🂠 🂠 🂠 🂠 K♠ +# TARGETS : T♥ 2♣ 3♥ 2♦ 3♠ 8♥ K♠ ♠ ♥ ♣ A♦ +# SOURCES : T♥ 2♣ 3♥ 2♦ 3♠ 8♥ K♠ A♦ 7♠ 9♠ 3♣ J♠ 3♦ 9♥ K♦ 5♠ +IsTerminal() = False +History() = [23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13] +HistoryString() = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13" +ObservationString(0) = "WASTE : 7♠ 5♦ 7♦ 9♠ 9♦ K♣ 3♣ J♣ 4♥ J♠ 4♠ 5♣ 3♦ 2♠ 7♣ 9♥ 6♥ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ \nFOUNDATIONS : ♠ ♥ ♣ A♦ \nTABLEAUS : \nT♥ \n🂠 2♣ \n🂠 🂠 3♥ \n🂠 🂠 🂠 2♦ \n🂠 🂠 🂠 🂠 3♠ \n🂠 🂠 🂠 🂠 🂠 8♥ \n🂠 🂠 🂠 🂠 🂠 K♠ \nTARGETS : T♥ 2♣ 3♥ 2♦ 3♠ 8♥ K♠ ♠ ♥ ♣ A♦ \nSOURCES : T♥ 2♣ 3♥ 2♦ 3♠ 8♥ K♠ A♦ 7♠ 9♠ 3♣ J♠ 3♦ 9♥ K♦ 5♠ " +ObservationTensor(0): binvec(1741, 0x10004001000200000000008000000200000000800000600001000000000e00000000001001e08000000000003e00000400000007c0008000000000010000000000000000000000400000000000008000800000000000000000000008000000000080000000001000000000000000800000004000000000008000000000020000000000000000000200000000000000020010000000000000000000010000000000400000000000100000000000000000100000000000000002000040000000000000000080000200000000000000000000000080000000000200) +Rewards() = [120] +Returns() = [120] +LegalActions() = [58, 91, 105, 111, 121, 196] +StringLegalActions() = ["3♠ ← 2♦", "3♥ ← 2♣", "8♥ ← 7♠", "T♥ ← 9♠", "2♣ ← A♦", "A♦ ← 2♦"] + +# Apply action "2♣ ← A♦" +action: 121 + +# State 34 +# WASTE : 7♠ 5♦ 7♦ 9♠ 9♦ K♣ 3♣ J♣ 4♥ J♠ 4♠ 5♣ 3♦ 2♠ 7♣ 9♥ 6♥ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ +# FOUNDATIONS : ♠ ♥ ♣ ♦ +# TABLEAUS : +# T♥ +# 🂠 2♣ A♦ +# 🂠 🂠 3♥ +# 🂠 🂠 🂠 2♦ +# 🂠 🂠 🂠 🂠 3♠ +# 🂠 🂠 🂠 🂠 🂠 8♥ +# 🂠 🂠 🂠 🂠 🂠 K♠ +# TARGETS : T♥ A♦ 3♥ 2♦ 3♠ 8♥ K♠ ♠ ♥ ♣ ♦ +# SOURCES : T♥ 2♣ A♦ 3♥ 2♦ 3♠ 8♥ K♠ 7♠ 9♠ 3♣ J♠ 3♦ 9♥ K♦ 5♠ +IsTerminal() = False +History() = [23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121] +HistoryString() = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121" +ObservationString(0) = "WASTE : 7♠ 5♦ 7♦ 9♠ 9♦ K♣ 3♣ J♣ 4♥ J♠ 4♠ 5♣ 3♦ 2♠ 7♣ 9♥ 6♥ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ \nFOUNDATIONS : ♠ ♥ ♣ ♦ \nTABLEAUS : \nT♥ \n🂠 2♣ A♦ \n🂠 🂠 3♥ \n🂠 🂠 🂠 2♦ \n🂠 🂠 🂠 🂠 3♠ \n🂠 🂠 🂠 🂠 🂠 8♥ \n🂠 🂠 🂠 🂠 🂠 K♠ \nTARGETS : T♥ A♦ 3♥ 2♦ 3♠ 8♥ K♠ ♠ ♥ ♣ ♦ \nSOURCES : T♥ 2♣ A♦ 3♥ 2♦ 3♠ 8♥ K♠ 7♠ 9♠ 3♣ J♠ 3♦ 9♥ K♦ 5♠ " +ObservationTensor(0): binvec(1741, 0x10004001000400000000008000000200000000800800600001000000000e00000000001001e08000000000003e00000400000007c0008000000000010000000000000000000000400000000000008000800000000000000000000008000000000080000000001000000000000000800000004000000000008000000000020000000000000000000200000000000000020010000000000000000000010000000000400000000000100000000000000000100000000000000002000040000000000000000080000200000000000000000000000080000000000200) +Rewards() = [-100] +Returns() = [20] +LegalActions() = [58, 91, 105, 111, 188] +StringLegalActions() = ["3♠ ← 2♦", "3♥ ← 2♣", "8♥ ← 7♠", "T♥ ← 9♠", "♦ ← A♦"] + +# Apply action "♦ ← A♦" +action: 188 + +# State 35 +# Apply action "3♠ ← 2♦" +action: 58 + +# State 36 +# Apply action "RevealA♣" +action: 27 + +# State 37 +# Apply action "3♥ ← 2♣" +action: 91 + +# State 38 +# Apply action "RevealA♠" +action: 1 + +# State 39 +# Apply action "♣ ← A♣" +action: 187 + +# State 40 +# Apply action "RevealT♠" +action: 10 + +# State 41 +# Apply action "A♦ ← 2♦" +action: 196 + +# State 42 +# Apply action "3♠ ← 2♦" +action: 58 + +# State 43 +# Apply action "T♠ ← 9♥" +action: 78 + +# State 44 +# Apply action "2♣ ← A♦" +action: 121 + +# State 45 +# WASTE : 7♠ 5♦ 7♦ 9♠ 9♦ K♣ 3♣ J♣ 4♥ J♠ 4♠ 5♣ 3♦ 2♠ 7♣ 6♥ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ +# FOUNDATIONS : ♠ ♥ A♣ ♦ +# TABLEAUS : +# T♥ +# A♠ +# 🂠 🂠 3♥ 2♣ A♦ +# 🂠 T♠ 9♥ +# 🂠 🂠 🂠 🂠 3♠ 2♦ +# 🂠 🂠 🂠 🂠 🂠 8♥ +# 🂠 🂠 🂠 🂠 🂠 K♠ +# TARGETS : T♥ A♠ A♦ 9♥ 2♦ 8♥ K♠ ♠ ♥ A♣ ♦ +# SOURCES : T♥ A♠ 3♥ 2♣ A♦ T♠ 9♥ 3♠ 2♦ 8♥ K♠ A♣ 7♠ 9♠ 3♣ J♠ 3♦ 6♥ 5♥ J♦ +IsTerminal() = False +History() = [23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121] +HistoryString() = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121" +ObservationString(0) = "WASTE : 7♠ 5♦ 7♦ 9♠ 9♦ K♣ 3♣ J♣ 4♥ J♠ 4♠ 5♣ 3♦ 2♠ 7♣ 6♥ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ \nFOUNDATIONS : ♠ ♥ A♣ ♦ \nTABLEAUS : \nT♥ \nA♠ \n🂠 🂠 3♥ 2♣ A♦ \n🂠 T♠ 9♥ \n🂠 🂠 🂠 🂠 3♠ 2♦ \n🂠 🂠 🂠 🂠 🂠 8♥ \n🂠 🂠 🂠 🂠 🂠 K♠ \nTARGETS : T♥ A♠ A♦ 9♥ 2♦ 8♥ K♠ ♠ ♥ A♣ ♦ \nSOURCES : T♥ A♠ 3♥ 2♣ A♦ T♠ 9♥ 3♠ 2♦ 8♥ K♠ A♣ 7♠ 9♠ 3♣ J♠ 3♦ 6♥ 5♥ J♦ " +ObservationTensor(0): binvec(1741, 0x10004000800400000000008000000004000000000000600001001001000800080080000001e08000000002003e00000400000007c0008000000000010000000000000000000000400000000000008000800000000000000000000008000000000080000000001000000000000000800000004000000000008000000000020000000000000000000200000000000000020010000000000000000000010000000002000000000000000002000000000000000040000800000000000000001000004000000000000000000000001000000000004000000000000000) +Rewards() = [-100] +Returns() = [200] +LegalActions() = [105, 111, 153, 154, 185, 188] +StringLegalActions() = ["8♥ ← 7♠", "T♥ ← 9♠", "2♦ ← A♠", "2♦ ← A♣", "♠ ← A♠", "♦ ← A♦"] + +# Apply action "2♦ ← A♣" +action: 154 + +# State 46 +# Apply action "T♥ ← 9♠" +action: 111 + +# State 47 +# Apply action "8♥ ← 7♠" +action: 105 + +# State 48 +# Apply action "♠ ← A♠" +action: 185 + +# State 49 +# Apply action "♣ ← A♣" +action: 187 + +# State 50 +# Apply action "9♠ ← 8♥" +action: 75 + +# State 51 +# Apply action "Reveal8♦" +action: 47 + +# State 52 +# Apply action "2♦ ← A♣" +action: 154 + +# State 53 +# Apply action "🂿 ← K♠" +action: 189 + +# State 54 +# Apply action "RevealA♥" +action: 14 + +# State 55 +# Apply action "♣ ← A♣" +action: 187 + +# State 56 +# Apply action "8♦ ← 7♠" +action: 171 + +# State 57 +# WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ 4♥ J♠ 4♠ 5♣ 3♦ 2♠ 7♣ 6♥ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ +# FOUNDATIONS : A♠ ♥ A♣ ♦ +# TABLEAUS : +# T♥ 9♠ 8♥ +# K♠ +# 🂠 🂠 3♥ 2♣ A♦ +# 🂠 T♠ 9♥ +# 🂠 🂠 🂠 🂠 3♠ 2♦ +# 🂠 🂠 🂠 🂠 8♦ 7♠ +# 🂠 🂠 🂠 🂠 A♥ +# TARGETS : 8♥ K♠ A♦ 9♥ 2♦ 7♠ A♥ A♠ ♥ A♣ ♦ +# SOURCES : T♥ 9♠ 8♥ K♠ 3♥ 2♣ A♦ T♠ 9♥ 3♠ 2♦ 8♦ 7♠ A♥ A♠ A♣ 5♦ K♣ 4♥ 5♣ 7♣ K♦ 5♠ +IsTerminal() = False +History() = [23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171] +HistoryString() = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171" +ObservationString(0) = "WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ 4♥ J♠ 4♠ 5♣ 3♦ 2♠ 7♣ 6♥ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ \nFOUNDATIONS : A♠ ♥ A♣ ♦ \nTABLEAUS : \nT♥ 9♠ 8♥ \nK♠ \n🂠 🂠 3♥ 2♣ A♦ \n🂠 T♠ 9♥ \n🂠 🂠 🂠 🂠 3♠ 2♦ \n🂠 🂠 🂠 🂠 8♦ 7♠ \n🂠 🂠 🂠 🂠 A♥ \nTARGETS : 8♥ K♠ A♦ 9♥ 2♦ 7♠ A♥ A♠ ♥ A♣ ♦ \nSOURCES : T♥ 9♠ 8♥ K♠ 3♥ 2♣ A♦ T♠ 9♥ 3♠ 2♦ 8♦ 7♠ A♥ A♠ A♣ 5♦ K♣ 4♥ 5♣ 7♣ K♦ 5♠ " +ObservationTensor(0): binvec(1741, 0x8004000800400000020028000000000004000000000600001001001000800080080000001e08000000002003c0100000000010780004000000000000000000008000000000000100000000000002000000000020000000000400000000000000200000001000000000002000000000008000000000000000000080000000000000008004000000000000000000004000000000800000000000000000800000000000000010000200000000000000000400001000000000000000000000000400000000001000000000000000000000000000000000000000000) +Rewards() = [0] +Returns() = [380] +LegalActions() = [105, 106, 153, 154, 186, 188] +StringLegalActions() = ["8♥ ← 7♠", "8♥ ← 7♣", "2♦ ← A♠", "2♦ ← A♣", "♥ ← A♥", "♦ ← A♦"] + +# Apply action "♦ ← A♦" +action: 188 + +# State 58 +# Apply action "2♦ ← A♣" +action: 154 + +# State 59 +# Apply action "♥ ← A♥" +action: 186 + +# State 60 +# Apply action "RevealT♦" +action: 49 + +# State 61 +# Apply action "2♣ ← A♦" +action: 121 + +# State 62 +# Apply action "8♥ ← 7♠" +action: 105 + +# State 63 +# Apply action "♦ ← A♦" +action: 188 + +# State 64 +# Apply action "8♦ ← 7♣" +action: 172 + +# State 65 +# Apply action "T♦ ← 9♠" +action: 177 + +# State 66 +# Apply action "2♣ ← A♥" +action: 120 + +# State 67 +# Apply action "♥ ← A♥" +action: 186 + +# State 68 +# WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ 4♥ J♠ 4♠ 5♣ 3♦ 2♠ 6♥ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ +# FOUNDATIONS : A♠ A♥ ♣ A♦ +# TABLEAUS : +# T♥ +# K♠ +# 🂠 🂠 3♥ 2♣ +# 🂠 T♠ 9♥ +# 🂠 🂠 🂠 🂠 3♠ 2♦ A♣ +# 🂠 🂠 🂠 🂠 8♦ 7♣ +# 🂠 🂠 🂠 T♦ 9♠ 8♥ 7♠ +# TARGETS : T♥ K♠ 2♣ 9♥ A♣ 7♣ 7♠ A♠ A♥ ♣ A♦ +# SOURCES : T♥ K♠ 3♥ 2♣ T♠ 9♥ 3♠ 2♦ A♣ 8♦ 7♣ T♦ 9♠ 8♥ 7♠ A♠ A♥ A♦ 5♦ K♣ 4♥ 5♣ 6♥ 5♥ J♦ +IsTerminal() = False +History() = [23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186] +HistoryString() = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186" +ObservationString(0) = "WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ 4♥ J♠ 4♠ 5♣ 3♦ 2♠ 6♥ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ \nFOUNDATIONS : A♠ A♥ ♣ A♦ \nTABLEAUS : \nT♥ \nK♠ \n🂠 🂠 3♥ 2♣ \n🂠 T♠ 9♥ \n🂠 🂠 🂠 🂠 3♠ 2♦ A♣ \n🂠 🂠 🂠 🂠 8♦ 7♣ \n🂠 🂠 🂠 T♦ 9♠ 8♥ 7♠ \nTARGETS : T♥ K♠ 2♣ 9♥ A♣ 7♣ 7♠ A♠ A♥ ♣ A♦ \nSOURCES : T♥ K♠ 3♥ 2♣ T♠ 9♥ 3♠ 2♦ A♣ 8♦ 7♣ T♦ 9♠ 8♥ 7♠ A♠ A♥ A♦ 5♦ K♣ 4♥ 5♣ 6♥ 5♥ J♦ " +ObservationTensor(0): binvec(1741, 0x8002001000200000000008000000000004000000000600001001000000800080080000001e08000008002003c0000000040010700280080000008000000000008000000000000100000000000002000000000020000000000400000000000000200000001000000000002000000000008000000000000000000080000000000000008004000000000000000010000000000000000010000000000000000200004000000000000000008000020000000000000000000000008000000000020000000000000000000000000000000000000000000000000000000) +Rewards() = [100] +Returns() = [520] +LegalActions() = [69, 111, 120, 121, 135, 187] +StringLegalActions() = ["7♠ ← 6♥", "T♥ ← 9♠", "2♣ ← A♥", "2♣ ← A♦", "7♣ ← 6♥", "♣ ← A♣"] + +# Apply action "♣ ← A♣" +action: 187 + +# State 69 +# Apply action "2♣ ← A♦" +action: 121 + +# State 70 +# Apply action "T♥ ← 9♠" +action: 111 + +# State 71 +# Apply action "7♣ ← 6♥" +action: 135 + +# State 72 +# Apply action "7♠ ← 6♥" +action: 69 + +# State 73 +# Apply action "6♥ ← 5♣" +action: 100 + +# State 74 +# Apply action "♦ ← A♦" +action: 188 + +# State 75 +# Apply action "2♦ ← A♠" +action: 153 + +# State 76 +# Apply action "A♣ ← 2♣" +action: 195 + +# State 77 +# Apply action "T♦ ← 9♠" +action: 177 + +# State 78 +# WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ 4♥ J♠ 4♠ 3♦ 2♠ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ +# FOUNDATIONS : ♠ A♥ 2♣ A♦ +# TABLEAUS : +# T♥ +# K♠ +# 🂠 🂠 3♥ +# 🂠 T♠ 9♥ +# 🂠 🂠 🂠 🂠 3♠ 2♦ A♠ +# 🂠 🂠 🂠 🂠 8♦ 7♣ +# 🂠 🂠 🂠 T♦ 9♠ 8♥ 7♠ 6♥ 5♣ +# TARGETS : T♥ K♠ 3♥ 9♥ A♠ 7♣ 5♣ ♠ A♥ 2♣ A♦ +# SOURCES : T♥ K♠ 3♥ T♠ 9♥ 3♠ 2♦ A♠ 8♦ 7♣ T♦ 9♠ 8♥ 7♠ 6♥ 5♣ A♥ 2♣ A♦ 5♦ K♣ 4♥ 3♦ K♦ 5♠ +IsTerminal() = False +History() = [23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177] +HistoryString() = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177" +ObservationString(0) = "WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ 4♥ J♠ 4♠ 3♦ 2♠ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ \nFOUNDATIONS : ♠ A♥ 2♣ A♦ \nTABLEAUS : \nT♥ \nK♠ \n🂠 🂠 3♥ \n🂠 T♠ 9♥ \n🂠 🂠 🂠 🂠 3♠ 2♦ A♠ \n🂠 🂠 🂠 🂠 8♦ 7♣ \n🂠 🂠 🂠 T♦ 9♠ 8♥ 7♠ 6♥ 5♣ \nTARGETS : T♥ K♠ 3♥ 9♥ A♠ 7♣ 5♣ ♠ A♥ 2♣ A♦ \nSOURCES : T♥ K♠ 3♥ T♠ 9♥ 3♠ 2♦ A♠ 8♦ 7♣ T♦ 9♠ 8♥ 7♠ 6♥ 5♣ A♥ 2♣ A♦ 5♦ K♣ 4♥ 3♦ K♦ 5♠ " +ObservationTensor(0): binvec(1741, 0x10002000400200000000008000000000004000000000600001000000000800080080000001e28000000002003c0000000040010700280280200008000000000008000000000000100000000000002000000000020000000000400000000000000200000001000000000002000000000008000000000000000000000100080000000000000000000004000000000000000080001000000000000000002000008000000000000000000000002000000000008000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0] +Returns() = [650] +LegalActions() = [91, 111, 129, 135, 185] +StringLegalActions() = ["3♥ ← 2♣", "T♥ ← 9♠", "5♣ ← 4♥", "7♣ ← 6♥", "♠ ← A♠"] + +# Apply action "5♣ ← 4♥" +action: 129 + +# State 79 +# Apply action "7♣ ← 6♥" +action: 135 + +# State 80 +# Apply action "3♥ ← 2♠" +action: 90 + +# State 81 +# Apply action "♠ ← A♠" +action: 185 + +# State 82 +# Apply action "A♠ ← 2♠" +action: 193 + +# State 83 +# Apply action "T♥ ← 9♠" +action: 111 + +# State 84 +# Apply action "T♦ ← 9♠" +action: 177 + +# State 85 +# Apply action "3♥ ← 2♠" +action: 90 + +# State 86 +# Apply action "7♠ ← 6♥" +action: 69 + +# State 87 +# Apply action "2♠ ← A♦" +action: 55 + +# State 88 +# WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ J♠ 4♠ 3♦ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ +# FOUNDATIONS : A♠ A♥ 2♣ ♦ +# TABLEAUS : +# T♥ +# K♠ +# 🂠 🂠 3♥ 2♠ A♦ +# 🂠 T♠ 9♥ +# 🂠 🂠 🂠 🂠 3♠ 2♦ +# 🂠 🂠 🂠 🂠 8♦ 7♣ +# 🂠 🂠 🂠 T♦ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ +# TARGETS : T♥ K♠ A♦ 9♥ 2♦ 7♣ 4♥ A♠ A♥ 2♣ ♦ +# SOURCES : T♥ K♠ 3♥ 2♠ A♦ T♠ 9♥ 3♠ 2♦ 8♦ 7♣ T♦ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ A♠ A♥ 2♣ 5♦ K♣ J♠ Q♣ T♣ 4♦ +IsTerminal() = False +History() = [23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55] +HistoryString() = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55" +ObservationString(0) = "WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ J♠ 4♠ 3♦ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ \nFOUNDATIONS : A♠ A♥ 2♣ ♦ \nTABLEAUS : \nT♥ \nK♠ \n🂠 🂠 3♥ 2♠ A♦ \n🂠 T♠ 9♥ \n🂠 🂠 🂠 🂠 3♠ 2♦ \n🂠 🂠 🂠 🂠 8♦ 7♣ \n🂠 🂠 🂠 T♦ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ \nTARGETS : T♥ K♠ A♦ 9♥ 2♦ 7♣ 4♥ A♠ A♥ 2♣ ♦ \nSOURCES : T♥ K♠ 3♥ 2♠ A♦ T♠ 9♥ 3♠ 2♦ 8♦ 7♣ T♦ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ A♠ A♥ 2♣ 5♦ K♣ J♠ Q♣ T♣ 4♦ " +ObservationTensor(0): binvec(1741, 0x8002000400400000000008000000000004000000000604001000001000800080080000001e08000000002003c0000000040010700280a80200008000000000008000000000000100000000000002000000000020000000000400000000000000200000040000000000100000000000000000000002000000000001000000000000000020000400000000000000000800002000000000000000000000000800000000002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [-100] +Returns() = [690] +LegalActions() = [93, 111, 135, 153, 188] +StringLegalActions() = ["4♥ ← 3♠", "T♥ ← 9♠", "7♣ ← 6♥", "2♦ ← A♠", "♦ ← A♦"] + +# Apply action "2♦ ← A♠" +action: 153 + +# State 89 +# Apply action "♠ ← A♠" +action: 185 + +# State 90 +# Apply action "7♣ ← 6♥" +action: 135 + +# State 91 +# Apply action "4♥ ← 3♠" +action: 93 + +# State 92 +# Apply action "Reveal2♥" +action: 15 + +# State 93 +# Apply action "2♦ ← A♠" +action: 153 + +# State 94 +# Apply action "♦ ← A♦" +action: 188 + +# State 95 +# Apply action "2♥ ← A♠" +action: 87 + +# State 96 +# Apply action "7♠ ← 6♥" +action: 69 + +# State 97 +# Apply action "T♥ ← 9♠" +action: 111 + +# State 98 +# Apply action "2♦ ← A♠" +action: 153 + +# State 99 +# WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ J♠ 4♠ 3♦ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ +# FOUNDATIONS : ♠ A♥ 2♣ A♦ +# TABLEAUS : +# T♥ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ 2♦ A♠ +# K♠ +# 🂠 🂠 3♥ 2♠ +# 🂠 T♠ 9♥ +# 🂠 🂠 🂠 2♥ +# 🂠 🂠 🂠 🂠 8♦ 7♣ +# 🂠 🂠 🂠 T♦ +# TARGETS : A♠ K♠ 2♠ 9♥ 2♥ 7♣ T♦ ♠ A♥ 2♣ A♦ +# SOURCES : T♥ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ 2♦ A♠ K♠ 3♥ 2♠ T♠ 9♥ 2♥ 8♦ 7♣ T♦ A♥ 2♣ A♦ 5♦ K♣ J♠ Q♣ T♣ 4♦ +IsTerminal() = False +History() = [23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153] +HistoryString() = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153" +ObservationString(0) = "WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ J♠ 4♠ 3♦ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ \nFOUNDATIONS : ♠ A♥ 2♣ A♦ \nTABLEAUS : \nT♥ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ 2♦ A♠ \nK♠ \n🂠 🂠 3♥ 2♠ \n🂠 T♠ 9♥ \n🂠 🂠 🂠 2♥ \n🂠 🂠 🂠 🂠 8♦ 7♣ \n🂠 🂠 🂠 T♦ \nTARGETS : A♠ K♠ 2♠ 9♥ 2♥ 7♣ T♦ ♠ A♥ 2♣ A♦ \nSOURCES : T♥ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ 2♦ A♠ K♠ 3♥ 2♠ T♠ 9♥ 2♥ 8♦ 7♣ T♦ A♥ 2♣ A♦ 5♦ K♣ J♠ Q♣ T♣ 4♦ " +ObservationTensor(0): binvec(1741, 0x100020004002000028a02a8080200000004000000000604001000000000800080080000001c00008000000003c0000000040010700000000000008000000000008000000000000100000000000002000000000020000000000400000000000000200000040000000000100000000000000000000002000000000001000000000000000020000400000000000000000800002000000000000000000000000800000000002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0] +Returns() = [710] +LegalActions() = [54, 55, 87, 135, 177, 185, 194] +StringLegalActions() = ["2♠ ← A♥", "2♠ ← A♦", "2♥ ← A♠", "7♣ ← 6♥", "T♦ ← 9♠", "♠ ← A♠", "A♥ ← 2♥"] + +# Apply action "2♥ ← A♠" +action: 87 + +# State 100 +# Apply action "A♦ ← 2♦" +action: 196 + +# State 101 +# Apply action "7♣ ← 6♥" +action: 135 + +# State 102 +# Apply action "3♠ ← 2♥" +action: 57 + +# State 103 +# Apply action "RevealQ♠" +action: 12 + +# State 104 +# Apply action "♠ ← A♠" +action: 185 + +# State 105 +# Apply action "2♠ ← A♥" +action: 54 + +# State 106 +# Apply action "7♠ ← 6♥" +action: 69 + +# State 107 +# Apply action "2♥ ← A♠" +action: 87 + +# State 108 +# Apply action "♠ ← A♠" +action: 185 + +# State 109 +# Apply action "2♥ ← A♠" +action: 87 + +# State 110 +# WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ J♠ 4♠ 3♦ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ +# FOUNDATIONS : ♠ ♥ 2♣ 2♦ +# TABLEAUS : +# T♥ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ 2♥ A♠ +# K♠ +# 🂠 🂠 3♥ 2♠ A♥ +# 🂠 T♠ 9♥ +# 🂠 🂠 Q♠ +# 🂠 🂠 🂠 🂠 8♦ 7♣ +# 🂠 🂠 🂠 T♦ +# TARGETS : A♠ K♠ A♥ 9♥ Q♠ 7♣ T♦ ♠ ♥ 2♣ 2♦ +# SOURCES : T♥ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ 2♥ A♠ K♠ 3♥ 2♠ A♥ T♠ 9♥ Q♠ 8♦ 7♣ T♦ 2♣ 2♦ 5♦ K♣ J♠ Q♣ T♣ 4♦ +IsTerminal() = False +History() = [23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153, 87, 196, 135, 57, 12, 185, 54, 69, 87, 185, 87] +HistoryString() = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153, 87, 196, 135, 57, 12, 185, 54, 69, 87, 185, 87" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153, 87, 196, 135, 57, 12, 185, 54, 69, 87, 185, 87" +ObservationString(0) = "WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ J♠ 4♠ 3♦ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ \nFOUNDATIONS : ♠ ♥ 2♣ 2♦ \nTABLEAUS : \nT♥ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ 2♥ A♠ \nK♠ \n🂠 🂠 3♥ 2♠ A♥ \n🂠 T♠ 9♥ \n🂠 🂠 Q♠ \n🂠 🂠 🂠 🂠 8♦ 7♣ \n🂠 🂠 🂠 T♦ \nTARGETS : A♠ K♠ A♥ 9♥ Q♠ 7♣ T♦ ♠ ♥ 2♣ 2♦ \nSOURCES : T♥ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ 2♥ A♠ K♠ 3♥ 2♠ A♥ T♠ 9♥ Q♠ 8♦ 7♣ T♦ 2♣ 2♦ 5♦ K♣ J♠ Q♣ T♣ 4♦ " +ObservationTensor(0): binvec(1741, 0x100040004001000028a0aa8080000000004000000000604005000000000800080080000001800040000000003c0000000040010700000000000008000000000008000000000000100000000000002000000000020000000000400000000000000200000040000000000100000000000000000000002000000000001000000000000000020000400000000000000000800002000000000000000000000000800000000002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [-100] +Returns() = [720] +LegalActions() = [135, 177, 185, 186] +StringLegalActions() = ["7♣ ← 6♥", "T♦ ← 9♠", "♠ ← A♠", "♥ ← A♥"] + +# Apply action "T♦ ← 9♠" +action: 177 + +# State 111 +# Apply action "7♣ ← 6♥" +action: 135 + +# State 112 +# Apply action "7♠ ← 6♥" +action: 69 + +# State 113 +# Apply action "7♣ ← 6♥" +action: 135 + +# State 114 +# Apply action "♠ ← A♠" +action: 185 + +# State 115 +# Apply action "T♥ ← 9♠" +action: 111 + +# State 116 +# Apply action "2♥ ← A♠" +action: 87 + +# State 117 +# Apply action "7♠ ← 6♥" +action: 69 + +# State 118 +# Apply action "♥ ← A♥" +action: 186 + +# State 119 +# Apply action "T♦ ← 9♠" +action: 177 + +# State 120 +# WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ J♠ 4♠ 3♦ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ +# FOUNDATIONS : ♠ A♥ 2♣ 2♦ +# TABLEAUS : +# T♥ +# K♠ +# 🂠 🂠 3♥ 2♠ +# 🂠 T♠ 9♥ +# 🂠 🂠 Q♠ +# 🂠 🂠 🂠 🂠 8♦ 7♣ +# 🂠 🂠 🂠 T♦ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ 2♥ A♠ +# TARGETS : T♥ K♠ 2♠ 9♥ Q♠ 7♣ A♠ ♠ A♥ 2♣ 2♦ +# SOURCES : T♥ K♠ 3♥ 2♠ T♠ 9♥ Q♠ 8♦ 7♣ T♦ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ 2♥ A♠ A♥ 2♣ 2♦ 5♦ K♣ J♠ Q♣ T♣ 4♦ +IsTerminal() = False +History() = [23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153, 87, 196, 135, 57, 12, 185, 54, 69, 87, 185, 87, 177, 135, 69, 135, 185, 111, 87, 69, 186, 177] +HistoryString() = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153, 87, 196, 135, 57, 12, 185, 54, 69, 87, 185, 87, 177, 135, 69, 135, 185, 111, 87, 69, 186, 177" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153, 87, 196, 135, 57, 12, 185, 54, 69, 87, 185, 87, 177, 135, 69, 135, 185, 111, 87, 69, 186, 177" +ObservationString(0) = "WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ J♠ 4♠ 3♦ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ \nFOUNDATIONS : ♠ A♥ 2♣ 2♦ \nTABLEAUS : \nT♥ \nK♠ \n🂠 🂠 3♥ 2♠ \n🂠 T♠ 9♥ \n🂠 🂠 Q♠ \n🂠 🂠 🂠 🂠 8♦ 7♣ \n🂠 🂠 🂠 T♦ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ 2♥ A♠ \nTARGETS : T♥ K♠ 2♠ 9♥ Q♠ 7♣ A♠ ♠ A♥ 2♣ 2♦ \nSOURCES : T♥ K♠ 3♥ 2♠ T♠ 9♥ Q♠ 8♦ 7♣ T♦ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ 2♥ A♠ A♥ 2♣ 2♦ 5♦ K♣ J♠ Q♣ T♣ 4♦ " +ObservationTensor(0): binvec(1741, 0x10002000400100000000008000000000004000000000604001000000000800080080000001800040000000003c000000004001070a282a80200008000000000008000000000000100000000000002000000000020000000000400000000000000200000040000000000100000000000000000000002000000000001000000000000000020000400000000000000000800002000000000000000000000000800000000002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0] +Returns() = [820] +LegalActions() = [54, 111, 135, 185] +StringLegalActions() = ["2♠ ← A♥", "T♥ ← 9♠", "7♣ ← 6♥", "♠ ← A♠"] + +# Apply action "2♠ ← A♥" +action: 54 + +# State 121 +# Apply action "7♣ ← 6♥" +action: 135 + +# State 122 +# Apply action "♠ ← A♠" +action: 185 + +# State 123 +# Apply action "7♠ ← 6♥" +action: 69 + +# State 124 +# Apply action "2♥ ← A♠" +action: 87 + +# State 125 +# Apply action "♥ ← A♥" +action: 186 + +# State 126 +# Apply action "T♥ ← 9♠" +action: 111 + +# State 127 +# Apply action "T♦ ← 9♠" +action: 177 + +# State 128 +# Apply action "2♠ ← A♥" +action: 54 + +# State 129 +# Apply action "♠ ← A♠" +action: 185 + +# State 130 +# WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ J♠ 4♠ 3♦ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ +# FOUNDATIONS : A♠ ♥ 2♣ 2♦ +# TABLEAUS : +# T♥ +# K♠ +# 🂠 🂠 3♥ 2♠ A♥ +# 🂠 T♠ 9♥ +# 🂠 🂠 Q♠ +# 🂠 🂠 🂠 🂠 8♦ 7♣ +# 🂠 🂠 🂠 T♦ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ 2♥ +# TARGETS : T♥ K♠ A♥ 9♥ Q♠ 7♣ 2♥ A♠ ♥ 2♣ 2♦ +# SOURCES : T♥ K♠ 3♥ 2♠ A♥ T♠ 9♥ Q♠ 8♦ 7♣ T♦ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ 2♥ A♠ 2♣ 2♦ 5♦ K♣ J♠ Q♣ T♣ 4♦ +IsTerminal() = False +History() = [23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153, 87, 196, 135, 57, 12, 185, 54, 69, 87, 185, 87, 177, 135, 69, 135, 185, 111, 87, 69, 186, 177, 54, 135, 185, 69, 87, 186, 111, 177, 54, 185] +HistoryString() = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153, 87, 196, 135, 57, 12, 185, 54, 69, 87, 185, 87, 177, 135, 69, 135, 185, 111, 87, 69, 186, 177, 54, 135, 185, 69, 87, 186, 111, 177, 54, 185" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153, 87, 196, 135, 57, 12, 185, 54, 69, 87, 185, 87, 177, 135, 69, 135, 185, 111, 87, 69, 186, 177, 54, 135, 185, 69, 87, 186, 111, 177, 54, 185" +ObservationString(0) = "WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ J♠ 4♠ 3♦ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ \nFOUNDATIONS : A♠ ♥ 2♣ 2♦ \nTABLEAUS : \nT♥ \nK♠ \n🂠 🂠 3♥ 2♠ A♥ \n🂠 T♠ 9♥ \n🂠 🂠 Q♠ \n🂠 🂠 🂠 🂠 8♦ 7♣ \n🂠 🂠 🂠 T♦ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ 2♥ \nTARGETS : T♥ K♠ A♥ 9♥ Q♠ 7♣ 2♥ A♠ ♥ 2♣ 2♦ \nSOURCES : T♥ K♠ 3♥ 2♠ A♥ T♠ 9♥ Q♠ 8♦ 7♣ T♦ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ 2♥ A♠ 2♣ 2♦ 5♦ K♣ J♠ Q♣ T♣ 4♦ " +ObservationTensor(0): binvec(1741, 0x8004000400100000000008000000000004000000000604005000000000800080080000001800040000000003c0000000040010702282a80200008000000000008000000000000100000000000002000000000020000000000400000000000000200000040000000000100000000000000000000002000000000001000000000000000020000400000000000000000800002000000000000000000000000800000000002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [100] +Returns() = [820] +LegalActions() = [87, 111, 135, 186] +StringLegalActions() = ["2♥ ← A♠", "T♥ ← 9♠", "7♣ ← 6♥", "♥ ← A♥"] + +# Apply action "♥ ← A♥" +action: 186 + +# State 131 +# Apply action "A♠ ← 2♠" +action: 193 + +# State 132 +# Apply action "A♥ ← 2♥" +action: 194 + +# State 133 +# Apply action "3♠ ← 2♥" +action: 57 + +# State 134 +# Apply action "T♥ ← 9♠" +action: 111 + +# State 135 +# Apply action "T♦ ← 9♠" +action: 177 + +# State 136 +# Apply action "3♥ ← 2♠" +action: 90 + +# State 137 +# Apply action "2♥ ← A♠" +action: 87 + +# State 138 +# Apply action "♠ ← A♠" +action: 185 + +# State 139 +# Apply action "A♥ ← 2♥" +action: 194 + +# State 140 +# WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ J♠ 4♠ 3♦ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ +# FOUNDATIONS : A♠ 2♥ 2♣ 2♦ +# TABLEAUS : +# T♥ +# K♠ +# 🂠 🂠 3♥ 2♠ +# 🂠 T♠ 9♥ +# 🂠 🂠 Q♠ +# 🂠 🂠 🂠 🂠 8♦ 7♣ +# 🂠 🂠 🂠 T♦ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ +# TARGETS : T♥ K♠ 2♠ 9♥ Q♠ 7♣ 3♠ A♠ 2♥ 2♣ 2♦ +# SOURCES : T♥ K♠ 3♥ 2♠ T♠ 9♥ Q♠ 8♦ 7♣ T♦ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ A♠ 2♥ 2♣ 2♦ 5♦ K♣ J♠ Q♣ T♣ 4♦ +IsTerminal() = False +History() = [23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153, 87, 196, 135, 57, 12, 185, 54, 69, 87, 185, 87, 177, 135, 69, 135, 185, 111, 87, 69, 186, 177, 54, 135, 185, 69, 87, 186, 111, 177, 54, 185, 186, 193, 194, 57, 111, 177, 90, 87, 185, 194] +HistoryString() = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153, 87, 196, 135, 57, 12, 185, 54, 69, 87, 185, 87, 177, 135, 69, 135, 185, 111, 87, 69, 186, 177, 54, 135, 185, 69, 87, 186, 111, 177, 54, 185, 186, 193, 194, 57, 111, 177, 90, 87, 185, 194" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153, 87, 196, 135, 57, 12, 185, 54, 69, 87, 185, 87, 177, 135, 69, 135, 185, 111, 87, 69, 186, 177, 54, 135, 185, 69, 87, 186, 111, 177, 54, 185, 186, 193, 194, 57, 111, 177, 90, 87, 185, 194" +ObservationString(0) = "WASTE : 5♦ 7♦ 9♦ K♣ 3♣ J♣ J♠ 4♠ 3♦ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ \nFOUNDATIONS : A♠ 2♥ 2♣ 2♦ \nTABLEAUS : \nT♥ \nK♠ \n🂠 🂠 3♥ 2♠ \n🂠 T♠ 9♥ \n🂠 🂠 Q♠ \n🂠 🂠 🂠 🂠 8♦ 7♣ \n🂠 🂠 🂠 T♦ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ \nTARGETS : T♥ K♠ 2♠ 9♥ Q♠ 7♣ 3♠ A♠ 2♥ 2♣ 2♦ \nSOURCES : T♥ K♠ 3♥ 2♠ T♠ 9♥ Q♠ 8♦ 7♣ T♦ 9♠ 8♥ 7♠ 6♥ 5♣ 4♥ 3♠ A♠ 2♥ 2♣ 2♦ 5♦ K♣ J♠ Q♣ T♣ 4♦ " +ObservationTensor(0): binvec(1741, 0x8001000400100000000008000000000004000000000604001000000000800080080000001800040000000003c0000000040010702280a80200008000000000008000000000000100000000000002000000000020000000000400000000000000200000040000000000100000000000000000000002000000000001000000000000000020000400000000000000000800002000000000000000000000000800000000002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [90] +Returns() = [1010] +LegalActions() = [57, 58, 111, 135, 193] +StringLegalActions() = ["3♠ ← 2♥", "3♠ ← 2♦", "T♥ ← 9♠", "7♣ ← 6♥", "A♠ ← 2♠"] + +# Apply action "T♥ ← 9♠" +action: 111 + +# State 141 +# Apply action "A♠ ← 2♠" +action: 193 + +# State 142 +# Apply action "3♥ ← 2♠" +action: 90 + +# State 143 +# Apply action "3♠ ← 2♥" +action: 57 + +# State 144 +# Apply action "A♥ ← 2♥" +action: 194 + +# State 145 +# Apply action "A♠ ← 2♠" +action: 193 + +# State 146 +# Apply action "2♥ ← 3♥" +action: 86 + +# State 147 +# Apply action "Reveal6♣" +action: 32 + +# State 148 +# Apply action "7♣ ← 6♥" +action: 135 + +# State 149 +# Apply action "6♣ ← 5♦" +action: 133 + +# State 150 +# WASTE : 7♦ 9♦ K♣ 3♣ J♣ J♠ 4♠ 3♦ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ +# FOUNDATIONS : 2♠ 3♥ 2♣ 2♦ +# TABLEAUS : +# T♥ 9♠ 8♥ 7♠ +# K♠ +# 🂠 6♣ 5♦ +# 🂠 T♠ 9♥ +# 🂠 🂠 Q♠ +# 🂠 🂠 🂠 🂠 8♦ 7♣ 6♥ 5♣ 4♥ 3♠ +# 🂠 🂠 🂠 T♦ +# TARGETS : 7♠ K♠ 5♦ 9♥ Q♠ 3♠ T♦ 2♠ 3♥ 2♣ 2♦ +# SOURCES : T♥ 9♠ 8♥ 7♠ K♠ 6♣ 5♦ T♠ 9♥ Q♠ 8♦ 7♣ 6♥ 5♣ 4♥ 3♠ T♦ 2♠ 3♥ 2♣ 2♦ 7♦ 3♣ 4♠ K♦ 5♠ +IsTerminal() = True +History() = [23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153, 87, 196, 135, 57, 12, 185, 54, 69, 87, 185, 87, 177, 135, 69, 135, 185, 111, 87, 69, 186, 177, 54, 135, 185, 69, 87, 186, 111, 177, 54, 185, 186, 193, 194, 57, 111, 177, 90, 87, 185, 194, 111, 193, 90, 57, 194, 193, 86, 32, 135, 133] +HistoryString() = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153, 87, 196, 135, 57, 12, 185, 54, 69, 87, 185, 87, 177, 135, 69, 135, 185, 111, 87, 69, 186, 177, 54, 135, 185, 69, 87, 186, 111, 177, 54, 185, 186, 193, 194, 57, 111, 177, 90, 87, 185, 194, 111, 193, 90, 57, 194, 193, 86, 32, 135, 133" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "23, 28, 16, 41, 3, 21, 40, 7, 44, 46, 9, 48, 39, 29, 37, 17, 11, 4, 31, 42, 2, 33, 22, 19, 38, 52, 18, 36, 5, 50, 43, 188, 13, 121, 188, 58, 27, 91, 1, 187, 10, 196, 58, 78, 121, 154, 111, 105, 185, 187, 75, 47, 154, 189, 14, 187, 171, 188, 154, 186, 49, 121, 105, 188, 172, 177, 120, 186, 187, 121, 111, 135, 69, 100, 188, 153, 195, 177, 129, 135, 90, 185, 193, 111, 177, 90, 69, 55, 153, 185, 135, 93, 15, 153, 188, 87, 69, 111, 153, 87, 196, 135, 57, 12, 185, 54, 69, 87, 185, 87, 177, 135, 69, 135, 185, 111, 87, 69, 186, 177, 54, 135, 185, 69, 87, 186, 111, 177, 54, 185, 186, 193, 194, 57, 111, 177, 90, 87, 185, 194, 111, 193, 90, 57, 194, 193, 86, 32, 135, 133" +ObservationString(0) = "WASTE : 7♦ 9♦ K♣ 3♣ J♣ J♠ 4♠ 3♦ Q♣ K♦ 5♥ T♣ 5♠ J♦ 4♦ \nFOUNDATIONS : 2♠ 3♥ 2♣ 2♦ \nTABLEAUS : \nT♥ 9♠ 8♥ 7♠ \nK♠ \n🂠 6♣ 5♦ \n🂠 T♠ 9♥ \n🂠 🂠 Q♠ \n🂠 🂠 🂠 🂠 8♦ 7♣ 6♥ 5♣ 4♥ 3♠ \n🂠 🂠 🂠 T♦ \nTARGETS : 7♠ K♠ 5♦ 9♥ Q♠ 3♠ T♦ 2♠ 3♥ 2♣ 2♦ \nSOURCES : T♥ 9♠ 8♥ 7♠ K♠ 6♣ 5♦ T♠ 9♥ Q♠ 8♦ 7♣ 6♥ 5♣ 4♥ 3♠ T♦ 2♠ 3♥ 2♣ 2♦ 7♦ 3♣ 4♠ K♦ 5♠ " +ObservationTensor(0): binvec(1741, 0x40008004001000000a0028000000000004000000000400000000100100800080080000001800040000000003c1000500140010700000000000008000000000002000000000000040000000000400000000008000000000000004000000800000000002000000000000000000000040000000000020000000000000000400008000000000000000010000040000000000000000000000010000000000040000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [20] +Returns() = [1220] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/spades.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/spades.txt new file mode 100644 index 0000000..9e19675 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/spades.txt @@ -0,0 +1,1245 @@ +game: spades + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Partnership Spades" +GameType.max_num_players = 4 +GameType.min_num_players = 4 +GameType.parameter_specification = ["mercy_threshold", "num_tricks", "use_mercy_rule", "win_or_loss_bonus", "win_threshold"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "spades" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 66 +PolicyTensorShape() = [66] +MaxChanceOutcomes() = 52 +GetParameters() = {mercy_threshold=-350,num_tricks=2,use_mercy_rule=True,win_or_loss_bonus=200,win_threshold=500} +NumPlayers() = 4 +MinUtility() = -430.0 +MaxUtility() = 430.0 +UtilitySum() = None +ObservationTensorShape() = [578] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 578 +MaxGameLength() = 56 +ToString() = "spades()" + +# State 0 +# S +# H +# D +# C +# S S +# H H +# D D +# C C +# S +# H +# D +# C +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "S none\nH none\nD none\nC none\n" +ObservationString(1) = "S none\nH none\nD none\nC none\n" +ObservationString(2) = "S none\nH none\nD none\nC none\n" +ObservationString(3) = "S none\nH none\nD none\nC none\n" +ObservationTensor(0): zeros(578) +ObservationTensor(1): zeros(578) +ObservationTensor(2): zeros(578) +ObservationTensor(3): zeros(578) +ChanceOutcomes() = [(0,0.0192308), (1,0.0192308), (2,0.0192308), (3,0.0192308), (4,0.0192308), (5,0.0192308), (6,0.0192308), (7,0.0192308), (8,0.0192308), (9,0.0192308), (10,0.0192308), (11,0.0192308), (12,0.0192308), (13,0.0192308), (14,0.0192308), (15,0.0192308), (16,0.0192308), (17,0.0192308), (18,0.0192308), (19,0.0192308), (20,0.0192308), (21,0.0192308), (22,0.0192308), (23,0.0192308), (24,0.0192308), (25,0.0192308), (26,0.0192308), (27,0.0192308), (28,0.0192308), (29,0.0192308), (30,0.0192308), (31,0.0192308), (32,0.0192308), (33,0.0192308), (34,0.0192308), (35,0.0192308), (36,0.0192308), (37,0.0192308), (38,0.0192308), (39,0.0192308), (40,0.0192308), (41,0.0192308), (42,0.0192308), (43,0.0192308), (44,0.0192308), (45,0.0192308), (46,0.0192308), (47,0.0192308), (48,0.0192308), (49,0.0192308), (50,0.0192308), (51,0.0192308)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51] +StringLegalActions() = ["C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9", "CT", "CJ", "CQ", "CK", "CA", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "DT", "DJ", "DQ", "DK", "DA", "H2", "H3", "H4", "H5", "H6", "H7", "H8", "H9", "HT", "HJ", "HQ", "HK", "HA", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "ST", "SJ", "SQ", "SK", "SA"] + +# Apply action "D8" +action: 19 + +# State 1 +# S +# H +# D 8 +# C +# S S +# H H +# D D +# C C +# S +# H +# D +# C +IsTerminal() = False +History() = [19] +HistoryString() = "19" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "S none\nH none\nD 8\nC none\n" +ObservationString(1) = "S none\nH none\nD none\nC none\n" +ObservationString(2) = "S none\nH none\nD none\nC none\n" +ObservationString(3) = "S none\nH none\nD none\nC none\n" +ObservationTensor(0): zeros(578) +ObservationTensor(1): zeros(578) +ObservationTensor(2): zeros(578) +ObservationTensor(3): zeros(578) +ChanceOutcomes() = [(0,0.0196078), (1,0.0196078), (2,0.0196078), (3,0.0196078), (4,0.0196078), (5,0.0196078), (6,0.0196078), (7,0.0196078), (8,0.0196078), (9,0.0196078), (10,0.0196078), (11,0.0196078), (12,0.0196078), (13,0.0196078), (14,0.0196078), (15,0.0196078), (16,0.0196078), (17,0.0196078), (18,0.0196078), (20,0.0196078), (21,0.0196078), (22,0.0196078), (23,0.0196078), (24,0.0196078), (25,0.0196078), (26,0.0196078), (27,0.0196078), (28,0.0196078), (29,0.0196078), (30,0.0196078), (31,0.0196078), (32,0.0196078), (33,0.0196078), (34,0.0196078), (35,0.0196078), (36,0.0196078), (37,0.0196078), (38,0.0196078), (39,0.0196078), (40,0.0196078), (41,0.0196078), (42,0.0196078), (43,0.0196078), (44,0.0196078), (45,0.0196078), (46,0.0196078), (47,0.0196078), (48,0.0196078), (49,0.0196078), (50,0.0196078), (51,0.0196078)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51] +StringLegalActions() = ["C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9", "CT", "CJ", "CQ", "CK", "CA", "D2", "D3", "D4", "D5", "D6", "D7", "D9", "DT", "DJ", "DQ", "DK", "DA", "H2", "H3", "H4", "H5", "H6", "H7", "H8", "H9", "HT", "HJ", "HQ", "HK", "HA", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "ST", "SJ", "SQ", "SK", "SA"] + +# Apply action "D7" +action: 18 + +# State 2 +# Apply action "SJ" +action: 48 + +# State 3 +# Apply action "S6" +action: 43 + +# State 4 +# Apply action "CA" +action: 12 + +# State 5 +# Apply action "H2" +action: 26 + +# State 6 +# Apply action "C2" +action: 0 + +# State 7 +# Apply action "C9" +action: 7 + +# State 8 +# Apply action "H4" +action: 28 + +# State 9 +# Apply action "C3" +action: 1 + +# State 10 +# Apply action "S3" +action: 40 + +# State 11 +# Apply action "DT" +action: 21 + +# State 12 +# Apply action "D5" +action: 16 + +# State 13 +# Apply action "ST" +action: 47 + +# State 14 +# Apply action "H5" +action: 29 + +# State 15 +# Apply action "HT" +action: 34 + +# State 16 +# Apply action "D9" +action: 20 + +# State 17 +# Apply action "SA" +action: 51 + +# State 18 +# Apply action "HQ" +action: 36 + +# State 19 +# Apply action "D3" +action: 14 + +# State 20 +# Apply action "SK" +action: 50 + +# State 21 +# Apply action "DA" +action: 25 + +# State 22 +# Apply action "C6" +action: 4 + +# State 23 +# Apply action "C7" +action: 5 + +# State 24 +# Apply action "S8" +action: 45 + +# State 25 +# Apply action "D4" +action: 15 + +# State 26 +# Apply action "HJ" +action: 35 + +# State 27 +# Apply action "S9" +action: 46 + +# State 28 +# Apply action "S2" +action: 39 + +# State 29 +# Apply action "S4" +action: 41 + +# State 30 +# Apply action "C4" +action: 2 + +# State 31 +# Apply action "CJ" +action: 9 + +# State 32 +# Apply action "D2" +action: 13 + +# State 33 +# Apply action "D6" +action: 17 + +# State 34 +# Apply action "S5" +action: 42 + +# State 35 +# Apply action "HA" +action: 38 + +# State 36 +# Apply action "HK" +action: 37 + +# State 37 +# Apply action "DK" +action: 24 + +# State 38 +# Apply action "S7" +action: 44 + +# State 39 +# Apply action "H7" +action: 31 + +# State 40 +# Apply action "CQ" +action: 10 + +# State 41 +# Apply action "DQ" +action: 23 + +# State 42 +# Apply action "SQ" +action: 49 + +# State 43 +# Apply action "H8" +action: 32 + +# State 44 +# Apply action "CT" +action: 8 + +# State 45 +# Apply action "C5" +action: 3 + +# State 46 +# Apply action "H3" +action: 27 + +# State 47 +# Apply action "DJ" +action: 22 + +# State 48 +# Apply action "H6" +action: 30 + +# State 49 +# Apply action "C8" +action: 6 + +# State 50 +# Apply action "CK" +action: 11 + +# State 51 +# Apply action "H9" +action: 33 + +# State 52 +# S K82 +# H K64 +# D 9852 +# C AQT +# S 96 S AT4 +# H AT987 H 2 +# D JT3 D AKQ764 +# C J97 C 853 +# S QJ753 +# H QJ53 +# D +# C K642 +IsTerminal() = False +History() = [19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33] +HistoryString() = "19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "S K82\nH K64\nD 9852\nC AQT\n" +ObservationString(1) = "S AT4\nH 2\nD AKQ764\nC 853\n" +ObservationString(2) = "S QJ753\nH QJ53\nD none\nC K642\n" +ObservationString(3) = "S 96\nH AT987\nD JT3\nC J97\n" +ObservationTensor(0): binvec(578, 0x20000000000000000ac980a05042000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationTensor(1): binvec(578, 0x200000000000000520161e000411000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationTensor(2): binvec(578, 0x200000000000000a810001418a8c000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationTensor(3): binvec(578, 0x20000000000000005420601e2120000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65] +StringLegalActions() = ["Nil", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13"] + +# Apply action "2" +action: 54 + +# State 53 +# S K82 +# H K64 +# D 9852 +# C AQT +# S 96 S AT4 +# H AT987 H 2 +# D JT3 D AKQ764 +# C J97 C 853 +# S QJ753 +# H QJ53 +# D +# C K642 +# +# North East South West +# 2 +IsTerminal() = False +History() = [19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54] +HistoryString() = "19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "S K82\nH K64\nD 9852\nC AQT\n\nNorth East South West \n2 " +ObservationString(1) = "S AT4\nH 2\nD AKQ764\nC 853\n\nNorth East South West \n2 ?" +ObservationString(2) = "S QJ753\nH QJ53\nD none\nC K642\n\nNorth East South West \n2 " +ObservationString(3) = "S 96\nH AT987\nD JT3\nC J97\n\nNorth East South West \n2 " +ObservationTensor(0): binvec(578, 0x22000000000000000ac980a05042000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationTensor(1): binvec(578, 0x220000000000000520161e000411000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationTensor(2): binvec(578, 0x220000000000000a810001418a8c000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationTensor(3): binvec(578, 0x22000000000000005420601e2120000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65] +StringLegalActions() = ["Nil", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13"] + +# Apply action "12" +action: 64 + +# State 54 +# S K82 +# H K64 +# D 9852 +# C AQT +# S 96 S AT4 +# H AT987 H 2 +# D JT3 D AKQ764 +# C J97 C 853 +# S QJ753 +# H QJ53 +# D +# C K642 +# +# North East South West +# 2 12 +IsTerminal() = False +History() = [19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64] +HistoryString() = "19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "S K82\nH K64\nD 9852\nC AQT\n\nNorth East South West \n2 12 " +ObservationString(1) = "S AT4\nH 2\nD AKQ764\nC 853\n\nNorth East South West \n2 12 " +ObservationString(2) = "S QJ753\nH QJ53\nD none\nC K642\n\nNorth East South West \n2 12 ?" +ObservationString(3) = "S 96\nH AT987\nD JT3\nC J97\n\nNorth East South West \n2 12 " +ObservationTensor(0): binvec(578, 0x22000002000000000ac980a05042000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationTensor(1): binvec(578, 0x220000020000000520161e000411000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationTensor(2): binvec(578, 0x220000020000000a810001418a8c000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationTensor(3): binvec(578, 0x22000002000000005420601e2120000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63] +StringLegalActions() = ["Nil", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"] + +# Apply action "5" +action: 57 + +# State 55 +# S K82 +# H K64 +# D 9852 +# C AQT +# S 96 S AT4 +# H AT987 H 2 +# D JT3 D AKQ764 +# C J97 C 853 +# S QJ753 +# H QJ53 +# D +# C K642 +# +# North East South West +# 2 12 5 +IsTerminal() = False +History() = [19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57] +HistoryString() = "19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +ObservationString(0) = "S K82\nH K64\nD 9852\nC AQT\n\nNorth East South West \n2 12 5 " +ObservationString(1) = "S AT4\nH 2\nD AKQ764\nC 853\n\nNorth East South West \n2 12 5 " +ObservationString(2) = "S QJ753\nH QJ53\nD none\nC K642\n\nNorth East South West \n2 12 5 " +ObservationString(3) = "S 96\nH AT987\nD JT3\nC J97\n\nNorth East South West \n2 12 5 \n?" +ObservationTensor(0): binvec(578, 0x22000002040000000ac980a05042000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationTensor(1): binvec(578, 0x220000020400000520161e000411000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationTensor(2): binvec(578, 0x220000020400000a810001418a8c000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationTensor(3): binvec(578, 0x22000002040000005420601e2120000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [52, 53] +StringLegalActions() = ["Nil", "1"] + +# Apply action "1" +action: 53 + +# State 56 +# S K82 +# H K64 +# D 9852 +# C AQT +# S 96 S AT4 +# H AT987 H 2 +# D JT3 D AKQ764 +# C J97 C 853 +# S QJ753 +# H QJ53 +# D +# C K642 +# +# North East South West +# 2 12 5 1 +IsTerminal() = False +History() = [19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53] +HistoryString() = "19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "S K82\nH K64\nD 9852\nC AQT\n\nNorth East South West \n2 12 5 1 " +ObservationString(1) = "S AT4\nH 2\nD AKQ764\nC 853\n\nNorth East South West \n2 12 5 1 " +ObservationString(2) = "S QJ753\nH QJ53\nD none\nC K642\n\nNorth East South West \n2 12 5 1 " +ObservationString(3) = "S 96\nH AT987\nD JT3\nC J97\n\nNorth East South West \n2 12 5 1 " +ObservationTensor(0): binvec(578, 0x12000002040100000ac980a05042000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationTensor(1): binvec(578, 0x120000020401000520161e000411000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationTensor(2): binvec(578, 0x120000020401000a810001418a8c000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +ObservationTensor(3): binvec(578, 0x12000002040100005420601e2120000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [8, 10, 12, 13, 16, 19, 20, 28, 30, 37] +StringLegalActions() = ["CT", "CQ", "CA", "D2", "D5", "D8", "D9", "H4", "H6", "HK"] + +# Apply action "D8" +action: 19 + +# State 57 +# S K82 +# H K64 +# D 952 +# C AQT +# S 96 S AT4 +# H AT987 H 2 +# D JT3 D AKQ764 +# C J97 C 853 +# S QJ753 +# H QJ53 +# D +# C K642 +# +# North East South West +# 2 12 5 1 +# +# N E S W N E S +# D8 +# +# Tricks taken: +# +# North East South West +# 0 0 0 0 +IsTerminal() = False +History() = [19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19] +HistoryString() = "19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "S K82\nH K64\nD 952\nC AQT\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 \n\nTricks taken:\n\nNorth East South West\n0 0 0 0 \n" +ObservationString(1) = "S AT4\nH 2\nD AKQ764\nC 853\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 \n\nTricks taken:\n\nNorth East South West\n0 0 0 0 \n" +ObservationString(2) = "S QJ753\nH QJ53\nD none\nC K642\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 \n\nTricks taken:\n\nNorth East South West\n0 0 0 0 \n" +ObservationString(3) = "S 96\nH AT987\nD JT3\nC J97\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 \n\nTricks taken:\n\nNorth East South West\n0 0 0 0 \n" +ObservationTensor(0): binvec(578, 0x12000002040100000ac880a05042000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008004002001000) +ObservationTensor(1): binvec(578, 0x120000020401000520161e000411000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000008004002001000) +ObservationTensor(2): binvec(578, 0x120000020401000a810001418a8c000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000008004002001000) +ObservationTensor(3): binvec(578, 0x12000002040100005420601e2120000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000008004002001000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [15, 17, 18, 23, 24, 25] +StringLegalActions() = ["D4", "D6", "D7", "DQ", "DK", "DA"] + +# Apply action "D6" +action: 17 + +# State 58 +# S K82 +# H K64 +# D 952 +# C AQT +# S 96 S AT4 +# H AT987 H 2 +# D JT3 D AKQ74 +# C J97 C 853 +# S QJ753 +# H QJ53 +# D +# C K642 +# +# North East South West +# 2 12 5 1 +# +# N E S W N E S +# D8 D6 +# +# Tricks taken: +# +# North East South West +# 0 0 0 0 +IsTerminal() = False +History() = [19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17] +HistoryString() = "19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "S K82\nH K64\nD 952\nC AQT\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 \n\nTricks taken:\n\nNorth East South West\n0 0 0 0 \n" +ObservationString(1) = "S AT4\nH 2\nD AKQ74\nC 853\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 \n\nTricks taken:\n\nNorth East South West\n0 0 0 0 \n" +ObservationString(2) = "S QJ753\nH QJ53\nD none\nC K642\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 \n\nTricks taken:\n\nNorth East South West\n0 0 0 0 \n" +ObservationString(3) = "S 96\nH AT987\nD JT3\nC J97\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 \n\nTricks taken:\n\nNorth East South West\n0 0 0 0 \n" +ObservationTensor(0): binvec(578, 0x12000002040100000ac880a05042000010000000000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000008004002001000) +ObservationTensor(1): binvec(578, 0x120000020401000520121e000411000040000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000008004002001000) +ObservationTensor(2): binvec(578, 0x120000020401000a810001418a8c000000000000000000000000000000100000000000040000000000000000000000000000000000000000000000000000000000008004002001000) +ObservationTensor(3): binvec(578, 0x12000002040100005420601e2120000000000000000001000000000000400000000000000000000000000000000000000000000000000000000000000000000000008004002001000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 2, 4, 11, 27, 29, 35, 36, 40, 42, 44, 48, 49] +StringLegalActions() = ["C2", "C4", "C6", "CK", "H3", "H5", "HJ", "HQ", "S3", "S5", "S7", "SJ", "SQ"] + +# Apply action "S5" +action: 42 + +# State 59 +# S K82 +# H K64 +# D 952 +# C AQT +# S 96 S AT4 +# H AT987 H 2 +# D JT3 D AKQ74 +# C J97 C 853 +# S QJ73 +# H QJ53 +# D +# C K642 +# +# North East South West +# 2 12 5 1 +# +# N E S W N E S +# D8 D6 S5 +# +# Tricks taken: +# +# North East South West +# 0 0 0 0 +IsTerminal() = False +History() = [19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42] +HistoryString() = "19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +ObservationString(0) = "S K82\nH K64\nD 952\nC AQT\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 \n\nTricks taken:\n\nNorth East South West\n0 0 0 0 \n" +ObservationString(1) = "S AT4\nH 2\nD AKQ74\nC 853\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 \n\nTricks taken:\n\nNorth East South West\n0 0 0 0 \n" +ObservationString(2) = "S QJ73\nH QJ53\nD none\nC K642\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 \n\nTricks taken:\n\nNorth East South West\n0 0 0 0 \n" +ObservationString(3) = "S 96\nH AT987\nD JT3\nC J97\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 \n\nTricks taken:\n\nNorth East South West\n0 0 0 0 \n" +ObservationTensor(0): binvec(578, 0x12000002040100000ac880a05042000010000000000004000000000000000000200000000000000000000000000000000000000000000000000000000000000000008004002001000) +ObservationTensor(1): binvec(578, 0x120000020401000520121e000411000040000000000000000002000000000000000000010000000000000000000000000000000000000000000000000000000000008004002001000) +ObservationTensor(2): binvec(578, 0x120000020401000a81000141888c000000000020000000000000000000100000000000040000000000000000000000000000000000000000000000000000000000008004002001000) +ObservationTensor(3): binvec(578, 0x12000002040100005420601e2120000000000000000001000000000000400000000000000000020000000000000000000000000000000000000000000000000000008004002001000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [14, 21, 22] +StringLegalActions() = ["D3", "DT", "DJ"] + +# Apply action "DJ" +action: 22 + +# State 60 +# S K82 +# H K64 +# D 952 +# C AQT +# S 96 S AT4 +# H AT987 H 2 +# D T3 D AKQ74 +# C J97 C 853 +# S QJ73 +# H QJ53 +# D +# C K642 +# +# North East South West +# 2 12 5 1 +# +# N E S W N E S +# D8 D6 S5 DJ +# +# Tricks taken: +# +# North East South West +# 0 0 1 0 +IsTerminal() = False +History() = [19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22] +HistoryString() = "19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "S K82\nH K64\nD 952\nC AQT\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n\nTricks taken:\n\nNorth East South West\n0 0 1 0 \n" +ObservationString(1) = "S AT4\nH 2\nD AKQ74\nC 853\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n\nTricks taken:\n\nNorth East South West\n0 0 1 0 \n" +ObservationString(2) = "S QJ73\nH QJ53\nD none\nC K642\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n\nTricks taken:\n\nNorth East South West\n0 0 1 0 \n" +ObservationString(3) = "S 96\nH AT987\nD T3\nC J97\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n\nTricks taken:\n\nNorth East South West\n0 0 1 0 \n" +ObservationTensor(0): binvec(578, 0x12000002040100000ac880a05042000000000000000000000000000000000000000000000000000000001000000000000400000000000000000020000000200000008004001001000) +ObservationTensor(1): binvec(578, 0x120000020401000520121e000411000000000000000000000000000000000000000000000000000000004000000000000000000200000002000000000001000000008004001001000) +ObservationTensor(2): binvec(578, 0x120000020401000a81000141888c000000000000000000000000000000000000000000000000000000000000002000000020000000000010000000000004000000008004001001000) +ObservationTensor(3): binvec(578, 0x12000002040100005420401e2120000000000000000000000000000000000000000000000000000000000200000000000100000000000040000000000000000002008004001001000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 2, 4, 11, 27, 29, 35, 36] +StringLegalActions() = ["C2", "C4", "C6", "CK", "H3", "H5", "HJ", "HQ"] + +# Apply action "HJ" +action: 35 + +# State 61 +# S K82 +# H K64 +# D 952 +# C AQT +# S 96 S AT4 +# H AT987 H 2 +# D T3 D AKQ74 +# C J97 C 853 +# S QJ73 +# H Q53 +# D +# C K642 +# +# North East South West +# 2 12 5 1 +# +# N E S W N E S +# D8 D6 S5 DJ +# HJ +# +# Tricks taken: +# +# North East South West +# 0 0 1 0 +IsTerminal() = False +History() = [19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22, 35] +HistoryString() = "19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22, 35" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +ObservationString(0) = "S K82\nH K64\nD 952\nC AQT\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ \n\nTricks taken:\n\nNorth East South West\n0 0 1 0 \n" +ObservationString(1) = "S AT4\nH 2\nD AKQ74\nC 853\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ \n\nTricks taken:\n\nNorth East South West\n0 0 1 0 \n" +ObservationString(2) = "S QJ73\nH Q53\nD none\nC K642\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ \n\nTricks taken:\n\nNorth East South West\n0 0 1 0 \n" +ObservationString(3) = "S 96\nH AT987\nD T3\nC J97\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ \n\nTricks taken:\n\nNorth East South West\n0 0 1 0 \n" +ObservationTensor(0): binvec(578, 0x12000002040100000ac880a05042000000000000000000000000000000000010000000000000000000001000000000000400000000000000000020000000200000008004001001000) +ObservationTensor(1): binvec(578, 0x120000020401000520121e000411000000000000000000000100000000000000000000000000000000004000000000000000000200000002000000000001000000008004001001000) +ObservationTensor(2): binvec(578, 0x120000020401000a81000140888c000000001000000000000000000000000000000000000000000000000000002000000020000000000010000000000004000000008004001001000) +ObservationTensor(3): binvec(578, 0x12000002040100005420401e2120000000000000000000000000000000000000000000000001000000000200000000000100000000000040000000000000000002008004001001000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [31, 32, 33, 34, 38] +StringLegalActions() = ["H7", "H8", "H9", "HT", "HA"] + +# Apply action "H8" +action: 32 + +# State 62 +# S K82 +# H K64 +# D 952 +# C AQT +# S 96 S AT4 +# H AT97 H 2 +# D T3 D AKQ74 +# C J97 C 853 +# S QJ73 +# H Q53 +# D +# C K642 +# +# North East South West +# 2 12 5 1 +# +# N E S W N E S +# D8 D6 S5 DJ +# HJ H8 +# +# Tricks taken: +# +# North East South West +# 0 0 1 0 +IsTerminal() = False +History() = [19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22, 35, 32] +HistoryString() = "19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22, 35, 32" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "S K82\nH K64\nD 952\nC AQT\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 \n\nTricks taken:\n\nNorth East South West\n0 0 1 0 \n" +ObservationString(1) = "S AT4\nH 2\nD AKQ74\nC 853\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 \n\nTricks taken:\n\nNorth East South West\n0 0 1 0 \n" +ObservationString(2) = "S QJ73\nH Q53\nD none\nC K642\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 \n\nTricks taken:\n\nNorth East South West\n0 0 1 0 \n" +ObservationString(3) = "S 96\nH AT97\nD T3\nC J97\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 \n\nTricks taken:\n\nNorth East South West\n0 0 1 0 \n" +ObservationTensor(0): binvec(578, 0x12000002040100000ac880a05042000000000000000000000000000000000010000000000008000000001000000000000400000000000000000020000000200000008004001001000) +ObservationTensor(1): binvec(578, 0x120000020401000520121e000411000000000000000000000100000000000080000000000000000000004000000000000000000200000002000000000001000000008004001001000) +ObservationTensor(2): binvec(578, 0x120000020401000a81000140888c000000001000000000000800000000000000000000000000000000000000002000000020000000000010000000000004000000008004001001000) +ObservationTensor(3): binvec(578, 0x1200000204010000542040162120000000008000000000000000000000000000000000000001000000000200000000000100000000000040000000000000000002008004001001000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [28, 30, 37] +StringLegalActions() = ["H4", "H6", "HK"] + +# Apply action "HK" +action: 37 + +# State 63 +# S K82 +# H 64 +# D 952 +# C AQT +# S 96 S AT4 +# H AT97 H 2 +# D T3 D AKQ74 +# C J97 C 853 +# S QJ73 +# H Q53 +# D +# C K642 +# +# North East South West +# 2 12 5 1 +# +# N E S W N E S +# D8 D6 S5 DJ +# HJ H8 HK +# +# Tricks taken: +# +# North East South West +# 0 0 1 0 +IsTerminal() = False +History() = [19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22, 35, 32, 37] +HistoryString() = "19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22, 35, 32, 37" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "S K82\nH 64\nD 952\nC AQT\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK \n\nTricks taken:\n\nNorth East South West\n0 0 1 0 \n" +ObservationString(1) = "S AT4\nH 2\nD AKQ74\nC 853\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK \n\nTricks taken:\n\nNorth East South West\n0 0 1 0 \n" +ObservationString(2) = "S QJ73\nH Q53\nD none\nC K642\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK \n\nTricks taken:\n\nNorth East South West\n0 0 1 0 \n" +ObservationString(3) = "S 96\nH AT97\nD T3\nC J97\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK \n\nTricks taken:\n\nNorth East South West\n0 0 1 0 \n" +ObservationTensor(0): binvec(578, 0x12000002040100000ac880a01042000000000400000000000000000000000010000000000008000000001000000000000400000000000000000020000000200000008004001001000) +ObservationTensor(1): binvec(578, 0x120000020401000520121e000411000000000000000000000100000000000080000000000000400000004000000000000000000200000002000000000001000000008004001001000) +ObservationTensor(2): binvec(578, 0x120000020401000a81000140888c000000001000000000000800000000000004000000000000000000000000002000000020000000000010000000000004000000008004001001000) +ObservationTensor(3): binvec(578, 0x1200000204010000542040162120000000008000000000000040000000000000000000000001000000000200000000000100000000000040000000000000000002008004001001000) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [26] +StringLegalActions() = ["H2"] + +# Apply action "H2" +action: 26 + +# State 64 +# Apply action "D9" +action: 20 + +# State 65 +# Apply action "DK" +action: 24 + +# State 66 +# Apply action "HQ" +action: 36 + +# State 67 +# Apply action "DT" +action: 21 + +# State 68 +# Apply action "C8" +action: 6 + +# State 69 +# Apply action "C6" +action: 4 + +# State 70 +# Apply action "CJ" +action: 9 + +# State 71 +# Apply action "CT" +action: 8 + +# State 72 +# Apply action "H7" +action: 31 + +# State 73 +# Apply action "H6" +action: 30 + +# State 74 +# Apply action "ST" +action: 47 + +# State 75 +# Apply action "H3" +action: 27 + +# State 76 +# Apply action "C3" +action: 1 + +# State 77 +# Apply action "CK" +action: 11 + +# State 78 +# Apply action "C7" +action: 5 + +# State 79 +# Apply action "CA" +action: 12 + +# State 80 +# Apply action "CQ" +action: 10 + +# State 81 +# Apply action "C5" +action: 3 + +# State 82 +# Apply action "C2" +action: 0 + +# State 83 +# Apply action "C9" +action: 7 + +# State 84 +# Apply action "H4" +action: 28 + +# State 85 +# Apply action "S4" +action: 41 + +# State 86 +# Apply action "H5" +action: 29 + +# State 87 +# Apply action "H9" +action: 33 + +# State 88 +# Apply action "D4" +action: 15 + +# State 89 +# Apply action "SJ" +action: 48 + +# State 90 +# Apply action "D3" +action: 14 + +# State 91 +# Apply action "D2" +action: 13 + +# State 92 +# S K82 +# H +# D 5 +# C +# S 96 S A +# H AT H +# D D AQ7 +# C C +# S Q73 +# H +# D +# C 4 +# +# North East South West +# 2 12 5 1 +# +# N E S W N E S +# D8 D6 S5 DJ +# HJ H8 HK H2 +# D9 DK HQ DT +# C8 C6 CJ CT +# H7 H6 ST H3 +# C3 CK C7 CA +# CQ C5 C2 C9 +# H4 S4 H5 H9 +# D4 SJ D3 D2 +# +# Tricks taken: +# +# North East South West +# 3 3 2 1 +IsTerminal() = False +History() = [19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22, 35, 32, 37, 26, 20, 24, 36, 21, 6, 4, 9, 8, 31, 30, 47, 27, 1, 11, 5, 12, 10, 3, 0, 7, 28, 41, 29, 33, 15, 48, 14, 13] +HistoryString() = "19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22, 35, 32, 37, 26, 20, 24, 36, 21, 6, 4, 9, 8, 31, 30, 47, 27, 1, 11, 5, 12, 10, 3, 0, 7, 28, 41, 29, 33, 15, 48, 14, 13" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +ObservationString(0) = "S K82\nH none\nD 5\nC none\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n\nTricks taken:\n\nNorth East South West\n3 3 2 1 \n" +ObservationString(1) = "S A\nH none\nD AQ7\nC none\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n\nTricks taken:\n\nNorth East South West\n3 3 2 1 \n" +ObservationString(2) = "S Q73\nH none\nD none\nC 4\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n\nTricks taken:\n\nNorth East South West\n3 3 2 1 \n" +ObservationString(3) = "S 96\nH AT\nD none\nC none\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n\nTricks taken:\n\nNorth East South West\n3 3 2 1 \n" +ObservationTensor(0): binvec(578, 0x1200000204010000000800001042000000000000000000000000000000000000000000000000000000040000000000001000000000000000000000800020000000001000800800800) +ObservationTensor(1): binvec(578, 0x1200000204010000000214000001000000000000000000000000000000000000000000000000000000010000000000000000000008000200000000000040000000001000800800800) +ObservationTensor(2): binvec(578, 0x1200000204010002000000000884000000000000000000000000000000000000000000000000000000000000000080002000000000000400000000000010000000001000800800800) +ObservationTensor(3): binvec(578, 0x1200000204010000000000022120000000000000000000000000000000000000000000000000000000020000000000004000000000000100000000000000000000081000800800800) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [2] +StringLegalActions() = ["C4"] + +# Apply action "C4" +action: 2 + +# State 93 +# S K82 +# H +# D 5 +# C +# S 96 S A +# H AT H +# D D AQ7 +# C C +# S Q73 +# H +# D +# C +# +# North East South West +# 2 12 5 1 +# +# N E S W N E S +# D8 D6 S5 DJ +# HJ H8 HK H2 +# D9 DK HQ DT +# C8 C6 CJ CT +# H7 H6 ST H3 +# C3 CK C7 CA +# CQ C5 C2 C9 +# H4 S4 H5 H9 +# D4 SJ D3 D2 +# C4 +# +# Tricks taken: +# +# North East South West +# 3 3 2 1 +IsTerminal() = False +History() = [19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22, 35, 32, 37, 26, 20, 24, 36, 21, 6, 4, 9, 8, 31, 30, 47, 27, 1, 11, 5, 12, 10, 3, 0, 7, 28, 41, 29, 33, 15, 48, 14, 13, 2] +HistoryString() = "19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22, 35, 32, 37, 26, 20, 24, 36, 21, 6, 4, 9, 8, 31, 30, 47, 27, 1, 11, 5, 12, 10, 3, 0, 7, 28, 41, 29, 33, 15, 48, 14, 13, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +ObservationString(0) = "S K82\nH none\nD 5\nC none\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n C4 \n\nTricks taken:\n\nNorth East South West\n3 3 2 1 \n" +ObservationString(1) = "S A\nH none\nD AQ7\nC none\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n C4 \n\nTricks taken:\n\nNorth East South West\n3 3 2 1 \n" +ObservationString(2) = "S Q73\nH none\nD none\nC none\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n C4 \n\nTricks taken:\n\nNorth East South West\n3 3 2 1 \n" +ObservationString(3) = "S 96\nH AT\nD none\nC none\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n C4 \n\nTricks taken:\n\nNorth East South West\n3 3 2 1 \n" +ObservationTensor(0): binvec(578, 0x1200000204010000000800001042000000000000000000000000002000000000000000000000000000040000000000001000000000000000000000800020000000001000800800800) +ObservationTensor(1): binvec(578, 0x1200000204010000000214000001000000000000020000000000000000000000000000000000000000010000000000000000000008000200000000000040000000001000800800800) +ObservationTensor(2): binvec(578, 0x1200000204010000000000000884200000000000000000000000000000000000000000000000000000000000000080002000000000000400000000000010000000001000800800800) +ObservationTensor(3): binvec(578, 0x1200000204010000000000022120000000000000000000000000000000000000000200000000000000020000000000004000000000000100000000000000000000081000800800800) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [34, 38, 43, 46] +StringLegalActions() = ["HT", "HA", "S6", "S9"] + +# Apply action "HA" +action: 38 + +# State 94 +# S K82 +# H +# D 5 +# C +# S 96 S A +# H T H +# D D AQ7 +# C C +# S Q73 +# H +# D +# C +# +# North East South West +# 2 12 5 1 +# +# N E S W N E S +# D8 D6 S5 DJ +# HJ H8 HK H2 +# D9 DK HQ DT +# C8 C6 CJ CT +# H7 H6 ST H3 +# C3 CK C7 CA +# CQ C5 C2 C9 +# H4 S4 H5 H9 +# D4 SJ D3 D2 +# C4 HA +# +# Tricks taken: +# +# North East South West +# 3 3 2 1 +IsTerminal() = False +History() = [19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22, 35, 32, 37, 26, 20, 24, 36, 21, 6, 4, 9, 8, 31, 30, 47, 27, 1, 11, 5, 12, 10, 3, 0, 7, 28, 41, 29, 33, 15, 48, 14, 13, 2, 38] +HistoryString() = "19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22, 35, 32, 37, 26, 20, 24, 36, 21, 6, 4, 9, 8, 31, 30, 47, 27, 1, 11, 5, 12, 10, 3, 0, 7, 28, 41, 29, 33, 15, 48, 14, 13, 2, 38" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "S K82\nH none\nD 5\nC none\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n C4 HA \n\nTricks taken:\n\nNorth East South West\n3 3 2 1 \n" +ObservationString(1) = "S A\nH none\nD AQ7\nC none\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n C4 HA \n\nTricks taken:\n\nNorth East South West\n3 3 2 1 \n" +ObservationString(2) = "S Q73\nH none\nD none\nC none\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n C4 HA \n\nTricks taken:\n\nNorth East South West\n3 3 2 1 \n" +ObservationString(3) = "S 96\nH T\nD none\nC none\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n C4 HA \n\nTricks taken:\n\nNorth East South West\n3 3 2 1 \n" +ObservationTensor(0): binvec(578, 0x1200000204010000000800001042000000000000000000000000002000000000000000000000200000040000000000001000000000000000000000800020000000001000800800800) +ObservationTensor(1): binvec(578, 0x1200000204010000000214000001000000000000020000000000000000000002000000000000000000010000000000000000000008000200000000000040000000001000800800800) +ObservationTensor(2): binvec(578, 0x1200000204010000000000000884200000000000000000000020000000000000000000000000000000000000000080002000000000000400000000000010000000001000800800800) +ObservationTensor(3): binvec(578, 0x1200000204010000000000020120000000000200000000000000000000000000000200000000000000020000000000004000000000000100000000000000000000081000800800800) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [16, 39, 45, 50] +StringLegalActions() = ["D5", "S2", "S8", "SK"] + +# Apply action "S8" +action: 45 + +# State 95 +# S K2 +# H +# D 5 +# C +# S 96 S A +# H T H +# D D AQ7 +# C C +# S Q73 +# H +# D +# C +# +# North East South West +# 2 12 5 1 +# +# N E S W N E S +# D8 D6 S5 DJ +# HJ H8 HK H2 +# D9 DK HQ DT +# C8 C6 CJ CT +# H7 H6 ST H3 +# C3 CK C7 CA +# CQ C5 C2 C9 +# H4 S4 H5 H9 +# D4 SJ D3 D2 +# C4 HA S8 +# +# Tricks taken: +# +# North East South West +# 3 3 2 1 +IsTerminal() = False +History() = [19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22, 35, 32, 37, 26, 20, 24, 36, 21, 6, 4, 9, 8, 31, 30, 47, 27, 1, 11, 5, 12, 10, 3, 0, 7, 28, 41, 29, 33, 15, 48, 14, 13, 2, 38, 45] +HistoryString() = "19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22, 35, 32, 37, 26, 20, 24, 36, 21, 6, 4, 9, 8, 31, 30, 47, 27, 1, 11, 5, 12, 10, 3, 0, 7, 28, 41, 29, 33, 15, 48, 14, 13, 2, 38, 45" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +ObservationString(0) = "S K2\nH none\nD 5\nC none\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n C4 HA S8 \n\nTricks taken:\n\nNorth East South West\n3 3 2 1 \n" +ObservationString(1) = "S A\nH none\nD AQ7\nC none\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n C4 HA S8 \n\nTricks taken:\n\nNorth East South West\n3 3 2 1 \n" +ObservationString(2) = "S Q73\nH none\nD none\nC none\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n C4 HA S8 \n\nTricks taken:\n\nNorth East South West\n3 3 2 1 \n" +ObservationString(3) = "S 96\nH T\nD none\nC none\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n C4 HA S8 \n\nTricks taken:\n\nNorth East South West\n3 3 2 1 \n" +ObservationTensor(0): binvec(578, 0x1200000204010000000800001002000000000004000000000000002000000000000000000000200000040000000000001000000000000000000000800020000000001000800800800) +ObservationTensor(1): binvec(578, 0x1200000204010000000214000001000000000000020000000000000000000002000000000000004000010000000000000000000008000200000000000040000000001000800800800) +ObservationTensor(2): binvec(578, 0x1200000204010000000000000884200000000000000000000020000000000000040000000000000000000000000080002000000000000400000000000010000000001000800800800) +ObservationTensor(3): binvec(578, 0x1200000204010000000000020120000000000200000000000000400000000000000200000000000000020000000000004000000000000100000000000000000000081000800800800) +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [18, 23, 25, 51] +StringLegalActions() = ["D7", "DQ", "DA", "SA"] + +# Apply action "DQ" +action: 23 + +# State 96 +# Apply action "D5" +action: 16 + +# State 97 +# Apply action "D7" +action: 18 + +# State 98 +# Apply action "SQ" +action: 49 + +# State 99 +# Apply action "HT" +action: 34 + +# State 100 +# Apply action "S3" +action: 40 + +# State 101 +# Apply action "S6" +action: 43 + +# State 102 +# Apply action "S2" +action: 39 + +# State 103 +# Apply action "SA" +action: 51 + +# State 104 +# Apply action "DA" +action: 25 + +# State 105 +# Apply action "S7" +action: 44 + +# State 106 +# Apply action "S9" +action: 46 + +# State 107 +# Apply action "SK" +action: 50 + +# State 108 +# S K82 +# H K64 +# D 9852 +# C AQT +# S 96 S AT4 +# H AT987 H 2 +# D JT3 D AKQ764 +# C J97 C 853 +# S QJ753 +# H QJ53 +# D +# C K642 +# +# North East South West +# 2 12 5 1 +# +# N E S W N E S +# D8 D6 S5 DJ +# HJ H8 HK H2 +# D9 DK HQ DT +# C8 C6 CJ CT +# H7 H6 ST H3 +# C3 CK C7 CA +# CQ C5 C2 C9 +# H4 S4 H5 H9 +# D4 SJ D3 D2 +# C4 HA S8 DQ +# D5 D7 SQ HT +# S3 S6 S2 SA +# DA S7 S9 SK +# +# Tricks taken: +# +# North East South West +# 5 4 3 1 +# +# Score: N/S 71 E/W -130 +IsTerminal() = True +History() = [19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22, 35, 32, 37, 26, 20, 24, 36, 21, 6, 4, 9, 8, 31, 30, 47, 27, 1, 11, 5, 12, 10, 3, 0, 7, 28, 41, 29, 33, 15, 48, 14, 13, 2, 38, 45, 23, 16, 18, 49, 34, 40, 43, 39, 51, 25, 44, 46, 50] +HistoryString() = "19, 18, 48, 43, 12, 26, 0, 7, 28, 1, 40, 21, 16, 47, 29, 34, 20, 51, 36, 14, 50, 25, 4, 5, 45, 15, 35, 46, 39, 41, 2, 9, 13, 17, 42, 38, 37, 24, 44, 31, 10, 23, 49, 32, 8, 3, 27, 22, 30, 6, 11, 33, 54, 64, 57, 53, 19, 17, 42, 22, 35, 32, 37, 26, 20, 24, 36, 21, 6, 4, 9, 8, 31, 30, 47, 27, 1, 11, 5, 12, 10, 3, 0, 7, 28, 41, 29, 33, 15, 48, 14, 13, 2, 38, 45, 23, 16, 18, 49, 34, 40, 43, 39, 51, 25, 44, 46, 50" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = " S K82\n H K64\n D 9852\n C AQT\nS 96 S AT4\nH AT987 H 2\nD JT3 D AKQ764\nC J97 C 853\n S QJ753\n H QJ53\n D \n C K642\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n C4 HA S8 DQ \nD5 D7 SQ HT \n S3 S6 S2 SA \n DA S7 S9 SK \n\nTricks taken:\n\nNorth East South West\n5 4 3 1 \n\nScore: N/S 71 E/W -130" +ObservationString(1) = " S K82\n H K64\n D 9852\n C AQT\nS 96 S AT4\nH AT987 H 2\nD JT3 D AKQ764\nC J97 C 853\n S QJ753\n H QJ53\n D \n C K642\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n C4 HA S8 DQ \nD5 D7 SQ HT \n S3 S6 S2 SA \n DA S7 S9 SK \n\nTricks taken:\n\nNorth East South West\n5 4 3 1 \n\nScore: N/S 71 E/W -130" +ObservationString(2) = " S K82\n H K64\n D 9852\n C AQT\nS 96 S AT4\nH AT987 H 2\nD JT3 D AKQ764\nC J97 C 853\n S QJ753\n H QJ53\n D \n C K642\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n C4 HA S8 DQ \nD5 D7 SQ HT \n S3 S6 S2 SA \n DA S7 S9 SK \n\nTricks taken:\n\nNorth East South West\n5 4 3 1 \n\nScore: N/S 71 E/W -130" +ObservationString(3) = " S K82\n H K64\n D 9852\n C AQT\nS 96 S AT4\nH AT987 H 2\nD JT3 D AKQ764\nC J97 C 853\n S QJ753\n H QJ53\n D \n C K642\n\nNorth East South West \n2 12 5 1 \n\nN E S W N E S\nD8 D6 S5 DJ \n HJ H8 HK H2 \nD9 DK HQ DT \n C8 C6 CJ CT \n H7 H6 ST H3 \n C3 CK C7 CA \nCQ C5 C2 C9 \nH4 S4 H5 H9 \n D4 SJ D3 D2 \n C4 HA S8 DQ \nD5 D7 SQ HT \n S3 S6 S2 SA \n DA S7 S9 SK \n\nTricks taken:\n\nNorth East South West\n5 4 3 1 \n\nScore: N/S 71 E/W -130" +ObservationTensor(0): binvec(578, 0xa00000204010000000000000000000000000000000000000000000000000000000000000000000000000000000020000004000000000000000008000000000000200400400400800) +ObservationTensor(1): binvec(578, 0xa00000204010000000000000000000000000000000000000000000000000000000000000000000000000040000000000000000080000000000002000000000000020400400400800) +ObservationTensor(2): binvec(578, 0xa00000204010000000000000000000000000000000000000000000000000000000000000000000000000000000800000000000020000000000000200000040000000400400400800) +ObservationTensor(3): binvec(578, 0xa00000204010000000000000000000000000000000000000000000000000000000000000000000000000000000200000000000002000000400000000000000000800400400400800) +Rewards() = [71, -130, 71, -130] +Returns() = [71, -130, 71, -130] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/stones_and_gems.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/stones_and_gems.txt new file mode 100644 index 0000000..0ea43b1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/stones_and_gems.txt @@ -0,0 +1,754 @@ +game: stones_and_gems + +GameType.chance_mode = ChanceMode.SAMPLED_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Stones and Gems" +GameType.max_num_players = 1 +GameType.min_num_players = 1 +GameType.parameter_specification = ["blob_chance", "blob_max_percentage", "grid", "magic_wall_steps", "obs_show_ids", "rng_seed"] +GameType.provides_information_state_string = False +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.REWARDS +GameType.short_name = "stones_and_gems" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 5 +PolicyTensorShape() = [5] +MaxChanceOutcomes() = 1 +GetParameters() = {blob_chance=20,blob_max_percentage=0.16,grid=20|12|600|4\n19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19\n19|03|02|02|03|02|02|02|02|03|02|02|02|02|02|03|02|02|02|19\n19|02|00|02|02|02|02|02|02|01|02|02|02|02|02|02|02|02|02|19\n19|02|02|02|05|02|02|02|02|02|02|03|02|02|02|02|02|02|02|19\n19|18|18|18|18|18|18|18|18|18|18|18|18|18|02|02|02|03|02|19\n19|02|02|02|02|02|05|02|02|02|02|02|02|02|02|02|02|02|02|19\n19|02|02|03|02|02|02|02|02|02|02|05|02|02|03|02|02|01|01|19\n19|02|02|03|02|02|02|03|02|02|02|02|02|02|02|02|02|01|11|19\n19|02|02|02|02|02|18|18|18|18|18|18|18|18|18|18|18|18|18|19\n19|02|02|05|02|02|02|02|02|02|05|03|02|02|03|02|02|03|02|19\n19|02|02|02|02|02|02|02|02|02|02|02|02|02|03|02|02|02|02|07\n19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19|19,magic_wall_steps=140,obs_show_ids=False,rng_seed=0} +NumPlayers() = 1 +MinUtility() = 0.0 +MaxUtility() = 664.0 +UtilitySum() = None +ObservationTensorShape() = [31, 12, 20] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 7440 +MaxGameLength() = 600 +ToString() = "stones_and_gems()" + +# State 0 +# SSSSSSSSSSSSSSSSSSSS +# So..o....o.....o...S +# S.@...... .........S +# S...*......o.......S +# SHHHHHHHHHHHHH...o.S +# S.....*............S +# S..o.......*..o.. S +# S..o...o......... FS +# S.....HHHHHHHHHHHHHS +# S..*......*o..o..o.S +# S.............o....C +# SSSSSSSSSSSSSSSSSSSS +# time left: 600, gems required: 4, gems collectred: 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "SSSSSSSSSSSSSSSSSSSS\nSo..o....o.....o...S\nS.@...... .........S\nS...*......o.......S\nSHHHHHHHHHHHHH...o.S\nS.....*............S\nS..o.......*..o.. S\nS..o...o......... FS\nS.....HHHHHHHHHHHHHS\nS..*......*o..o..o.S\nS.............o....C\nSSSSSSSSSSSSSSSSSSSS\ntime left: 600, gems required: 4, gems collectred: 0" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◯◉◉◉◉◯◉◉◉◉◉◯◉◉◉◯ ◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◉◯◉◉◉◉◉◉◯◉◉◉◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◯◉◉◉◉◉◉◯◉◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◉◉◉◉◉◉◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯ ◯◉◉◯◉◉◉◉◉◉◉◯◉◉◯◉◉◯◯◯ ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ ◯◉◉◯◉◉◉◯◉◉◉◉◉◉◉◉◉◯◯◯ ◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◯◉◉◉◉◉◉◯◯◉◉◯◉◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◉◯◯ ◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉◉◉◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +SerializeState() = "20,12,600,140,0,38,0,20,-1,1,4,0,0,0,0,240,0\n19,1,19,2,19,3,19,4,19,5,19,6,19,7,19,8,19,9,19,10,19,11,19,12,19,13,19,14,19,15,19,16,19,17,19,18,19,19,19,20\n19,21,3,22,2,23,2,24,3,25,2,26,2,27,2,28,2,29,3,30,2,31,2,32,2,33,2,34,2,35,3,36,2,37,2,38,2,39,19,40\n19,41,2,42,0,43,2,44,2,45,2,46,2,47,2,48,2,49,1,50,2,51,2,52,2,53,2,54,2,55,2,56,2,57,2,58,2,59,19,60\n19,61,2,62,2,63,2,64,5,65,2,66,2,67,2,68,2,69,2,70,2,71,3,72,2,73,2,74,2,75,2,76,2,77,2,78,2,79,19,80\n19,81,18,82,18,83,18,84,18,85,18,86,18,87,18,88,18,89,18,90,18,91,18,92,18,93,18,94,2,95,2,96,2,97,3,98,2,99,19,100\n19,101,2,102,2,103,2,104,2,105,2,106,5,107,2,108,2,109,2,110,2,111,2,112,2,113,2,114,2,115,2,116,2,117,2,118,2,119,19,120\n19,121,2,122,2,123,3,124,2,125,2,126,2,127,2,128,2,129,2,130,2,131,5,132,2,133,2,134,3,135,2,136,2,137,1,138,1,139,19,140\n19,141,2,142,2,143,3,144,2,145,2,146,2,147,3,148,2,149,2,150,2,151,2,152,2,153,2,154,2,155,2,156,2,157,1,158,11,159,19,160\n19,161,2,162,2,163,2,164,2,165,2,166,18,167,18,168,18,169,18,170,18,171,18,172,18,173,18,174,18,175,18,176,18,177,18,178,18,179,19,180\n19,181,2,182,2,183,5,184,2,185,2,186,2,187,2,188,2,189,2,190,5,191,3,192,2,193,2,194,3,195,2,196,2,197,3,198,2,199,19,200\n19,201,2,202,2,203,2,204,2,205,2,206,2,207,2,208,2,209,2,210,2,211,2,212,2,213,2,214,3,215,2,216,2,217,2,218,2,219,7,220\n19,221,19,222,19,223,19,224,19,225,19,226,19,227,19,228,19,229,19,230,19,231,19,232,19,233,19,234,19,235,19,236,19,237,19,238,19,239,19,240" +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["none", "up", "right", "down", "left"] + +# Apply action "right" +action: 2 + +# State 1 +# chance node +IsTerminal() = False +History() = [2] +HistoryString() = "2" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "ChanceNode -- no observation" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +SerializeState() = "20,12,599,140,0,38,0,20,-1,1,4,0,0,0,0,241,-1\n19,1,19,2,19,3,19,4,19,5,19,6,19,7,19,8,19,9,19,10,19,11,19,12,19,13,19,14,19,15,19,16,19,17,19,18,19,19,19,20\n19,21,3,22,2,23,2,24,3,25,2,26,2,27,2,28,2,29,3,30,2,31,2,32,2,33,2,34,2,35,3,36,2,37,2,38,2,39,19,40\n19,41,2,42,1,241,0,43,2,45,2,46,2,47,2,48,2,49,1,50,2,51,2,52,2,53,2,54,2,55,2,56,2,57,2,58,2,59,19,60\n19,61,2,62,2,63,2,64,5,65,2,66,2,67,2,68,2,69,2,70,2,71,3,72,2,73,2,74,2,75,2,76,2,77,2,78,2,79,19,80\n19,81,18,82,18,83,18,84,18,85,18,86,18,87,18,88,18,89,18,90,18,91,18,92,18,93,18,94,2,95,2,96,2,97,3,98,2,99,19,100\n19,101,2,102,2,103,2,104,2,105,2,106,5,107,2,108,2,109,2,110,2,111,2,112,2,113,2,114,2,115,2,116,2,117,2,118,2,119,19,120\n19,121,2,122,2,123,3,124,2,125,2,126,2,127,2,128,2,129,2,130,2,131,5,132,2,133,2,134,3,135,2,136,2,137,1,138,1,139,19,140\n19,141,2,142,2,143,3,144,2,145,2,146,2,147,3,148,2,149,2,150,2,151,2,152,2,153,2,154,2,155,2,156,2,157,1,158,11,159,19,160\n19,161,2,162,2,163,2,164,2,165,2,166,18,167,18,168,18,169,18,170,18,171,18,172,18,173,18,174,18,175,18,176,18,177,18,178,18,179,19,180\n19,181,2,182,2,183,5,184,2,185,2,186,2,187,2,188,2,189,2,190,5,191,3,192,2,193,2,194,3,195,2,196,2,197,3,198,2,199,19,200\n19,201,2,202,2,203,2,204,2,205,2,206,2,207,2,208,2,209,2,210,2,211,2,212,2,213,2,214,3,215,2,216,2,217,2,218,2,219,7,220\n19,221,19,222,19,223,19,224,19,225,19,226,19,227,19,228,19,229,19,230,19,231,19,232,19,233,19,234,19,235,19,236,19,237,19,238,19,239,19,240" +ChanceOutcomes() = [(0,1)] +LegalActions() = [0] +StringLegalActions() = ["Chance outcome: 0"] + +# Apply action "Chance outcome: 0" +action: 0 + +# State 2 +# SSSSSSSSSSSSSSSSSSSS +# So..o.... .....o...S +# S. @.....o.........S +# S...*......o.......S +# SHHHHHHHHHHHHH...o.S +# S.....*............S +# S..o.......*..o.. S +# S..o...o.........F S +# S.....HHHHHHHHHHHHHS +# S..*......*o..o..o.S +# S.............o....C +# SSSSSSSSSSSSSSSSSSSS +# time left: 599, gems required: 4, gems collectred: 0 +IsTerminal() = False +History() = [2, 0] +HistoryString() = "2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "SSSSSSSSSSSSSSSSSSSS\nSo..o.... .....o...S\nS. @.....o.........S\nS...*......o.......S\nSHHHHHHHHHHHHH...o.S\nS.....*............S\nS..o.......*..o.. S\nS..o...o.........F S\nS.....HHHHHHHHHHHHHS\nS..*......*o..o..o.S\nS.............o....C\nSSSSSSSSSSSSSSSSSSSS\ntime left: 599, gems required: 4, gems collectred: 0" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◯◉◉◉◉◯◉◉◉◉◉◯◉◉◉◯ ◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◉◉◉◉◉◯◉◉◉◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◯◉◉◉◉◉◉◯◉◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◉◉◉◉◉◉◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯ ◯◉◉◯◉◉◉◉◉◉◉◯◉◉◯◉◉◯◯◯ ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◉◉◯◉◉◉◯◉◉◉◉◉◉◉◉◉◯◯◯ ◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◯◉◉◉◉◉◉◯◯◉◉◯◉◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◉◯◯ ◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉◉◉◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +SerializeState() = "20,12,599,140,0,38,0,20,5,1,4,0,0,0,0,243,0\n19,1,19,2,19,3,19,4,19,5,19,6,19,7,19,8,19,9,19,10,19,11,19,12,19,13,19,14,19,15,19,16,19,17,19,18,19,19,19,20\n19,21,3,22,2,23,2,24,3,25,2,26,2,27,2,28,2,29,1,242,2,31,2,32,2,33,2,34,2,35,3,36,2,37,2,38,2,39,19,40\n19,41,2,42,1,241,0,43,2,45,2,46,2,47,2,48,2,49,4,30,2,51,2,52,2,53,2,54,2,55,2,56,2,57,2,58,2,59,19,60\n19,61,2,62,2,63,2,64,5,65,2,66,2,67,2,68,2,69,2,70,2,71,3,72,2,73,2,74,2,75,2,76,2,77,2,78,2,79,19,80\n19,81,18,82,18,83,18,84,18,85,18,86,18,87,18,88,18,89,18,90,18,91,18,92,18,93,18,94,2,95,2,96,2,97,3,98,2,99,19,100\n19,101,2,102,2,103,2,104,2,105,2,106,5,107,2,108,2,109,2,110,2,111,2,112,2,113,2,114,2,115,2,116,2,117,2,118,2,119,19,120\n19,121,2,122,2,123,3,124,2,125,2,126,2,127,2,128,2,129,2,130,2,131,5,132,2,133,2,134,3,135,2,136,2,137,1,138,1,139,19,140\n19,141,2,142,2,143,3,144,2,145,2,146,2,147,3,148,2,149,2,150,2,151,2,152,2,153,2,154,2,155,2,156,2,157,11,159,1,243,19,160\n19,161,2,162,2,163,2,164,2,165,2,166,18,167,18,168,18,169,18,170,18,171,18,172,18,173,18,174,18,175,18,176,18,177,18,178,18,179,19,180\n19,181,2,182,2,183,5,184,2,185,2,186,2,187,2,188,2,189,2,190,5,191,3,192,2,193,2,194,3,195,2,196,2,197,3,198,2,199,19,200\n19,201,2,202,2,203,2,204,2,205,2,206,2,207,2,208,2,209,2,210,2,211,2,212,2,213,2,214,3,215,2,216,2,217,2,218,2,219,7,220\n19,221,19,222,19,223,19,224,19,225,19,226,19,227,19,228,19,229,19,230,19,231,19,232,19,233,19,234,19,235,19,236,19,237,19,238,19,239,19,240" +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["none", "up", "right", "down", "left"] + +# Apply action "left" +action: 4 + +# State 3 +# chance node +IsTerminal() = False +History() = [2, 0, 4] +HistoryString() = "2, 0, 4" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +ObservationString(0) = "ChanceNode -- no observation" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +SerializeState() = "20,12,598,140,0,38,0,20,5,1,4,0,0,0,0,244,-1\n19,1,19,2,19,3,19,4,19,5,19,6,19,7,19,8,19,9,19,10,19,11,19,12,19,13,19,14,19,15,19,16,19,17,19,18,19,19,19,20\n19,21,3,22,2,23,2,24,3,25,2,26,2,27,2,28,2,29,1,242,2,31,2,32,2,33,2,34,2,35,3,36,2,37,2,38,2,39,19,40\n19,41,2,42,0,43,1,244,2,45,2,46,2,47,2,48,2,49,4,30,2,51,2,52,2,53,2,54,2,55,2,56,2,57,2,58,2,59,19,60\n19,61,2,62,2,63,2,64,5,65,2,66,2,67,2,68,2,69,2,70,2,71,3,72,2,73,2,74,2,75,2,76,2,77,2,78,2,79,19,80\n19,81,18,82,18,83,18,84,18,85,18,86,18,87,18,88,18,89,18,90,18,91,18,92,18,93,18,94,2,95,2,96,2,97,3,98,2,99,19,100\n19,101,2,102,2,103,2,104,2,105,2,106,5,107,2,108,2,109,2,110,2,111,2,112,2,113,2,114,2,115,2,116,2,117,2,118,2,119,19,120\n19,121,2,122,2,123,3,124,2,125,2,126,2,127,2,128,2,129,2,130,2,131,5,132,2,133,2,134,3,135,2,136,2,137,1,138,1,139,19,140\n19,141,2,142,2,143,3,144,2,145,2,146,2,147,3,148,2,149,2,150,2,151,2,152,2,153,2,154,2,155,2,156,2,157,11,159,1,243,19,160\n19,161,2,162,2,163,2,164,2,165,2,166,18,167,18,168,18,169,18,170,18,171,18,172,18,173,18,174,18,175,18,176,18,177,18,178,18,179,19,180\n19,181,2,182,2,183,5,184,2,185,2,186,2,187,2,188,2,189,2,190,5,191,3,192,2,193,2,194,3,195,2,196,2,197,3,198,2,199,19,200\n19,201,2,202,2,203,2,204,2,205,2,206,2,207,2,208,2,209,2,210,2,211,2,212,2,213,2,214,3,215,2,216,2,217,2,218,2,219,7,220\n19,221,19,222,19,223,19,224,19,225,19,226,19,227,19,228,19,229,19,230,19,231,19,232,19,233,19,234,19,235,19,236,19,237,19,238,19,239,19,240" +ChanceOutcomes() = [(0,1)] +LegalActions() = [0] +StringLegalActions() = ["Chance outcome: 0"] + +# Apply action "Chance outcome: 0" +action: 0 + +# State 4 +# SSSSSSSSSSSSSSSSSSSS +# So..o.... .....o...S +# S.@ .....o.........S +# S...*......o.......S +# SHHHHHHHHHHHHH...o.S +# S.....*............S +# S..o.......*..o.. S +# S..o...o.........F S +# S.....HHHHHHHHHHHHHS +# S..*......*o..o..o.S +# S.............o....C +# SSSSSSSSSSSSSSSSSSSS +# time left: 598, gems required: 4, gems collectred: 0 +IsTerminal() = False +History() = [2, 0, 4, 0] +HistoryString() = "2, 0, 4, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +ObservationString(0) = "SSSSSSSSSSSSSSSSSSSS\nSo..o.... .....o...S\nS.@ .....o.........S\nS...*......o.......S\nSHHHHHHHHHHHHH...o.S\nS.....*............S\nS..o.......*..o.. S\nS..o...o.........F S\nS.....HHHHHHHHHHHHHS\nS..*......*o..o..o.S\nS.............o....C\nSSSSSSSSSSSSSSSSSSSS\ntime left: 598, gems required: 4, gems collectred: 0" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◯◉◉◉◉◯◉◉◉◉◉◯◉◉◉◯ ◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◉◉◉◉◉◯◉◉◉◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◯◉◉◉◉◉◉◯◉◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◉◉◉◉◉◉◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯ ◯◉◉◯◉◉◉◉◉◉◉◯◉◉◯◉◉◯◯◯ ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◉◉◯◉◉◉◯◉◉◉◉◉◉◉◉◉◯◯◯ ◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◯◉◉◉◉◉◉◯◯◉◉◯◉◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◉◯◯ ◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉◉◉◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +SerializeState() = "20,12,598,140,0,38,0,20,5,1,4,0,0,0,0,244,0\n19,1,19,2,19,3,19,4,19,5,19,6,19,7,19,8,19,9,19,10,19,11,19,12,19,13,19,14,19,15,19,16,19,17,19,18,19,19,19,20\n19,21,3,22,2,23,2,24,3,25,2,26,2,27,2,28,2,29,1,242,2,31,2,32,2,33,2,34,2,35,3,36,2,37,2,38,2,39,19,40\n19,41,2,42,0,43,1,244,2,45,2,46,2,47,2,48,2,49,3,30,2,51,2,52,2,53,2,54,2,55,2,56,2,57,2,58,2,59,19,60\n19,61,2,62,2,63,2,64,5,65,2,66,2,67,2,68,2,69,2,70,2,71,3,72,2,73,2,74,2,75,2,76,2,77,2,78,2,79,19,80\n19,81,18,82,18,83,18,84,18,85,18,86,18,87,18,88,18,89,18,90,18,91,18,92,18,93,18,94,2,95,2,96,2,97,3,98,2,99,19,100\n19,101,2,102,2,103,2,104,2,105,2,106,5,107,2,108,2,109,2,110,2,111,2,112,2,113,2,114,2,115,2,116,2,117,2,118,2,119,19,120\n19,121,2,122,2,123,3,124,2,125,2,126,2,127,2,128,2,129,2,130,2,131,5,132,2,133,2,134,3,135,2,136,2,137,1,138,1,139,19,140\n19,141,2,142,2,143,3,144,2,145,2,146,2,147,3,148,2,149,2,150,2,151,2,152,2,153,2,154,2,155,2,156,2,157,10,159,1,243,19,160\n19,161,2,162,2,163,2,164,2,165,2,166,18,167,18,168,18,169,18,170,18,171,18,172,18,173,18,174,18,175,18,176,18,177,18,178,18,179,19,180\n19,181,2,182,2,183,5,184,2,185,2,186,2,187,2,188,2,189,2,190,5,191,3,192,2,193,2,194,3,195,2,196,2,197,3,198,2,199,19,200\n19,201,2,202,2,203,2,204,2,205,2,206,2,207,2,208,2,209,2,210,2,211,2,212,2,213,2,214,3,215,2,216,2,217,2,218,2,219,7,220\n19,221,19,222,19,223,19,224,19,225,19,226,19,227,19,228,19,229,19,230,19,231,19,232,19,233,19,234,19,235,19,236,19,237,19,238,19,239,19,240" +Rewards() = [0] +Returns() = [0] +LegalActions() = [0, 1, 2, 3, 4] +StringLegalActions() = ["none", "up", "right", "down", "left"] + +# Apply action "none" +action: 0 + +# State 5 +# Apply action "Chance outcome: 0" +action: 0 + +# State 6 +# Apply action "left" +action: 4 + +# State 7 +# Apply action "Chance outcome: 0" +action: 0 + +# State 8 +# Apply action "down" +action: 3 + +# State 9 +# Apply action "Chance outcome: 0" +action: 0 + +# State 10 +# Apply action "none" +action: 0 + +# State 11 +# Apply action "Chance outcome: 0" +action: 0 + +# State 12 +# SSSSSSSSSSSSSSSSSSSS +# S ..o.... .....o...S +# SEE .....o.........S +# SEE.*......o.......S +# SEEHHHHHHHHHHH...o.S +# S.....*............S +# S..o.......*..o.. FS +# S..o...o......... S +# S.....HHHHHHHHHHHHHS +# S..*......*o..o..o.S +# S.............o....C +# SSSSSSSSSSSSSSSSSSSS +# time left: 594, gems required: 4, gems collectred: 0 +IsTerminal() = True +History() = [2, 0, 4, 0, 0, 0, 4, 0, 3, 0, 0, 0] +HistoryString() = "2, 0, 4, 0, 0, 0, 4, 0, 3, 0, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +ObservationString(0) = "SSSSSSSSSSSSSSSSSSSS\nS ..o.... .....o...S\nSEE .....o.........S\nSEE.*......o.......S\nSEEHHHHHHHHHHH...o.S\nS.....*............S\nS..o.......*..o.. FS\nS..o...o......... S\nS.....HHHHHHHHHHHHHS\nS..*......*o..o..o.S\nS.............o....C\nSSSSSSSSSSSSSSSSSSSS\ntime left: 594, gems required: 4, gems collectred: 0" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◉◉◯◉◉◉◉◯◉◉◉◉◉◯◉◉◉◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◉◉◉◉◉◯◉◉◉◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◉◯◉◉◉◉◉◉◯◉◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ ◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◉◉◉◉◉◉◉◉◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ ◯◉◉◯◉◉◉◉◉◉◉◯◉◉◯◉◉◯◯◯ ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯ ◯◉◉◯◉◉◉◯◉◉◉◉◉◉◉◉◉◯◯◯ ◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◯◉◉◉◉◉◉◯◯◉◉◯◉◉◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◉◯◯ ◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◉◉◉◉◉◉◉◉◯◉◉◉◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◉◉◉◉◉◉◉◉◉◉◉◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◉◉◉◉◉◉◉◉◉◉◉◉◉◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉◉ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ + +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +SerializeState() = "20,12,594,140,0,38,0,20,5,1,4,0,0,0,0,255,0\n19,1,19,2,19,3,19,4,19,5,19,6,19,7,19,8,19,9,19,10,19,11,19,12,19,13,19,14,19,15,19,16,19,17,19,18,19,19,19,20\n19,21,1,248,2,23,2,24,3,25,2,26,2,27,2,28,2,29,1,242,2,31,2,32,2,33,2,34,2,35,3,36,2,37,2,38,2,39,19,40\n19,41,26,251,26,254,1,244,2,45,2,46,2,47,2,48,2,49,3,30,2,51,2,52,2,53,2,54,2,55,2,56,2,57,2,58,2,59,19,60\n19,61,26,250,26,252,2,64,5,65,2,66,2,67,2,68,2,69,2,70,2,71,3,72,2,73,2,74,2,75,2,76,2,77,2,78,2,79,19,80\n19,81,26,253,26,255,18,84,18,85,18,86,18,87,18,88,18,89,18,90,18,91,18,92,18,93,18,94,2,95,2,96,2,97,3,98,2,99,19,100\n19,101,2,102,2,103,2,104,2,105,2,106,5,107,2,108,2,109,2,110,2,111,2,112,2,113,2,114,2,115,2,116,2,117,2,118,2,119,19,120\n19,121,2,122,2,123,3,124,2,125,2,126,2,127,2,128,2,129,2,130,2,131,5,132,2,133,2,134,3,135,2,136,2,137,1,249,12,159,19,140\n19,141,2,142,2,143,3,144,2,145,2,146,2,147,3,148,2,149,2,150,2,151,2,152,2,153,2,154,2,155,2,156,2,157,1,245,1,243,19,160\n19,161,2,162,2,163,2,164,2,165,2,166,18,167,18,168,18,169,18,170,18,171,18,172,18,173,18,174,18,175,18,176,18,177,18,178,18,179,19,180\n19,181,2,182,2,183,5,184,2,185,2,186,2,187,2,188,2,189,2,190,5,191,3,192,2,193,2,194,3,195,2,196,2,197,3,198,2,199,19,200\n19,201,2,202,2,203,2,204,2,205,2,206,2,207,2,208,2,209,2,210,2,211,2,212,2,213,2,214,3,215,2,216,2,217,2,218,2,219,7,220\n19,221,19,222,19,223,19,224,19,225,19,226,19,227,19,228,19,229,19,230,19,231,19,232,19,233,19,234,19,235,19,236,19,237,19,238,19,239,19,240" +Rewards() = [0] +Returns() = [0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tarok(players=3,rng_seed=0).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tarok(players=3,rng_seed=0).txt new file mode 100644 index 0000000..7af725b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tarok(players=3,rng_seed=0).txt @@ -0,0 +1,276 @@ +game: tarok(players=3,rng_seed=0) + +GameType.chance_mode = ChanceMode.SAMPLED_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Slovenian Tarok" +GameType.max_num_players = 4 +GameType.min_num_players = 3 +GameType.parameter_specification = ["players", "rng_seed"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = False +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "tarok" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 54 +PolicyTensorShape() = [54] +MaxChanceOutcomes() = 1 +GetParameters() = {players=3,rng_seed=0} +NumPlayers() = 3 +MinUtility() = -500.0 +MaxUtility() = 500.0 +UtilitySum() = None +MaxGameLength() = 65 +ToString() = "tarok(players=3,rng_seed=0)" + +# State 0 +# Game phase: Card dealing +# Selected contract: Not selected +# Current player: -1 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "" +InformationStateString(1) = "" +InformationStateString(2) = "" +SerializeState() = "" +ChanceOutcomes() = [(0,1)] +LegalActions() = [0] +StringLegalActions() = ["Deal"] + +# Apply action "Deal" +action: 0 + +# State 1 +# Game phase: Bidding +# Selected contract: Not selected +# Current player: 1 +# Player cards: 0,1,6,7,12,18,20,22,23,24,30,36,37,40,42,45 +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "9,10,15,17,19,21,28,32,35,39,43,46,48,49,51,53;" +InformationStateString(1) = "0,1,6,7,12,18,20,22,23,24,30,36,37,40,42,45;" +InformationStateString(2) = "2,3,4,5,13,14,16,25,26,29,31,34,38,41,44,50;" +SerializeState() = "-1937831252" +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 3, 4, 8, 9, 10, 11, 12] +StringLegalActions() = ["Pass", "Two", "One", "Beggar", "Solo without", "Open beggar", "Colour valat without", "Valat without"] + +# Apply action "Open beggar" +action: 10 + +# State 2 +# Game phase: Bidding +# Selected contract: Not selected +# Current player: 2 +# Player cards: 2,3,4,5,13,14,16,25,26,29,31,34,38,41,44,50 +IsTerminal() = False +History() = [0, 10] +HistoryString() = "0, 10" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "9,10,15,17,19,21,28,32,35,39,43,46,48,49,51,53;10," +InformationStateString(1) = "0,1,6,7,12,18,20,22,23,24,30,36,37,40,42,45;10," +InformationStateString(2) = "2,3,4,5,13,14,16,25,26,29,31,34,38,41,44,50;10," +SerializeState() = "-1937831252\n10" +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 11, 12] +StringLegalActions() = ["Pass", "Colour valat without", "Valat without"] + +# Apply action "Valat without" +action: 12 + +# State 3 +# Game phase: Bidding +# Selected contract: Not selected +# Current player: 0 +# Player cards: 9,10,15,17,19,21,28,32,35,39,43,46,48,49,51,53 +IsTerminal() = False +History() = [0, 10, 12] +HistoryString() = "0, 10, 12" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "9,10,15,17,19,21,28,32,35,39,43,46,48,49,51,53;10,12," +InformationStateString(1) = "0,1,6,7,12,18,20,22,23,24,30,36,37,40,42,45;10,12," +InformationStateString(2) = "2,3,4,5,13,14,16,25,26,29,31,34,38,41,44,50;10,12," +SerializeState() = "-1937831252\n10\n12" +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 12] +StringLegalActions() = ["Pass", "Valat without"] + +# Apply action "Valat without" +action: 12 + +# State 4 +# Game phase: Bidding +# Selected contract: Not selected +# Current player: 1 +# Player cards: 0,1,6,7,12,18,20,22,23,24,30,36,37,40,42,45 +IsTerminal() = False +History() = [0, 10, 12, 12] +HistoryString() = "0, 10, 12, 12" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "9,10,15,17,19,21,28,32,35,39,43,46,48,49,51,53;10,12,12," +InformationStateString(1) = "0,1,6,7,12,18,20,22,23,24,30,36,37,40,42,45;10,12,12," +InformationStateString(2) = "2,3,4,5,13,14,16,25,26,29,31,34,38,41,44,50;10,12,12," +SerializeState() = "-1937831252\n10\n12\n12" +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 0 + +# State 5 +# Game phase: Bidding +# Selected contract: Not selected +# Current player: 2 +# Player cards: 2,3,4,5,13,14,16,25,26,29,31,34,38,41,44,50 +IsTerminal() = False +History() = [0, 10, 12, 12, 0] +HistoryString() = "0, 10, 12, 12, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "9,10,15,17,19,21,28,32,35,39,43,46,48,49,51,53;10,12,12,0," +InformationStateString(1) = "0,1,6,7,12,18,20,22,23,24,30,36,37,40,42,45;10,12,12,0," +InformationStateString(2) = "2,3,4,5,13,14,16,25,26,29,31,34,38,41,44,50;10,12,12,0," +SerializeState() = "-1937831252\n10\n12\n12\n0" +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 0 + +# State 6 +# Game phase: Bidding +# Selected contract: Not selected +# Current player: 0 +# Player cards: 9,10,15,17,19,21,28,32,35,39,43,46,48,49,51,53 +IsTerminal() = False +History() = [0, 10, 12, 12, 0, 0] +HistoryString() = "0, 10, 12, 12, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "9,10,15,17,19,21,28,32,35,39,43,46,48,49,51,53;10,12,12,0,0," +InformationStateString(1) = "0,1,6,7,12,18,20,22,23,24,30,36,37,40,42,45;10,12,12,0,0," +InformationStateString(2) = "2,3,4,5,13,14,16,25,26,29,31,34,38,41,44,50;10,12,12,0,0," +SerializeState() = "-1937831252\n10\n12\n12\n0\n0" +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [12] +StringLegalActions() = ["Valat without"] + +# Apply action "Valat without" +action: 12 + +# State 7 +# Game phase: Tricks playing +# Selected contract: Valat without +# Current player: 0 +# Player cards: 9,10,15,17,19,21,28,32,35,39,43,46,48,49,51,53 +# Trick cards: +IsTerminal() = False +History() = [0, 10, 12, 12, 0, 0, 12] +HistoryString() = "0, 10, 12, 12, 0, 0, 12" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "9,10,15,17,19,21,28,32,35,39,43,46,48,49,51,53;10,12,12,0,0,12;" +InformationStateString(1) = "0,1,6,7,12,18,20,22,23,24,30,36,37,40,42,45;10,12,12,0,0,12;" +InformationStateString(2) = "2,3,4,5,13,14,16,25,26,29,31,34,38,41,44,50;10,12,12,0,0,12;" +SerializeState() = "-1937831252\n10\n12\n12\n0\n0\n12" +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [9, 10, 15, 17, 19, 21, 28, 32, 35, 39, 43, 46, 48, 49, 51, 53] +StringLegalActions() = ["X", "XI", "XVI", "XVIII", "XX", "Skis", "Queen of Hearts", "2 of Diamonds", "Knight of Diamonds", "8 of Spades", "Knight of Spades", "7 of Clubs", "9 of Clubs", "10 of Clubs", "Knight of Clubs", "King of Clubs"] + +# Apply action "King of Clubs" +action: 53 + +# State 8 +# Game phase: Tricks playing +# Selected contract: Valat without +# Current player: 1 +# Player cards: 0,1,6,7,12,18,20,22,23,24,30,36,37,40,42,45 +# Trick cards: 53 +IsTerminal() = False +History() = [0, 10, 12, 12, 0, 0, 12, 53] +HistoryString() = "0, 10, 12, 12, 0, 0, 12, 53" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "9,10,15,17,19,21,28,32,35,39,43,46,48,49,51,53;10,12,12,0,0,12;53," +InformationStateString(1) = "0,1,6,7,12,18,20,22,23,24,30,36,37,40,42,45;10,12,12,0,0,12;53," +InformationStateString(2) = "2,3,4,5,13,14,16,25,26,29,31,34,38,41,44,50;10,12,12,0,0,12;53," +SerializeState() = "-1937831252\n10\n12\n12\n0\n0\n12\n53" +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [0, 1, 6, 7, 12, 18, 20] +StringLegalActions() = ["Pagat", "II", "VII", "VIII", "XIII", "XIX", "Mond"] + +# Apply action "Pagat" +action: 0 + +# State 9 +# Game phase: Tricks playing +# Selected contract: Valat without +# Current player: 2 +# Player cards: 2,3,4,5,13,14,16,25,26,29,31,34,38,41,44,50 +# Trick cards: 53,0 +IsTerminal() = False +History() = [0, 10, 12, 12, 0, 0, 12, 53, 0] +HistoryString() = "0, 10, 12, 12, 0, 0, 12, 53, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "9,10,15,17,19,21,28,32,35,39,43,46,48,49,51,53;10,12,12,0,0,12;53,0," +InformationStateString(1) = "0,1,6,7,12,18,20,22,23,24,30,36,37,40,42,45;10,12,12,0,0,12;53,0," +InformationStateString(2) = "2,3,4,5,13,14,16,25,26,29,31,34,38,41,44,50;10,12,12,0,0,12;53,0," +SerializeState() = "-1937831252\n10\n12\n12\n0\n0\n12\n53\n0" +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [50] +StringLegalActions() = ["Jack of Clubs"] + +# Apply action "Jack of Clubs" +action: 50 + +# State 10 +# Game phase: Finished +# Selected contract: Valat without +# Current player: -4 +IsTerminal() = True +History() = [0, 10, 12, 12, 0, 0, 12, 53, 0, 50] +HistoryString() = "0, 10, 12, 12, 0, 0, 12, 53, 0, 50" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "9,10,15,17,19,21,28,32,35,39,43,46,48,49,51,53;10,12,12,0,0,12;53,0,50" +InformationStateString(1) = "0,1,6,7,12,18,20,22,23,24,30,36,37,40,42,45;10,12,12,0,0,12;53,0,50" +InformationStateString(2) = "2,3,4,5,13,14,16,25,26,29,31,34,38,41,44,50;10,12,12,0,0,12;53,0,50" +SerializeState() = "-1937831252\n10\n12\n12\n0\n0\n12\n53\n0\n50" +Rewards() = [-500, 0, 0] +Returns() = [-500, 0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tarok(players=4,rng_seed=0).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tarok(players=4,rng_seed=0).txt new file mode 100644 index 0000000..a20d7ee --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tarok(players=4,rng_seed=0).txt @@ -0,0 +1,336 @@ +game: tarok(players=4,rng_seed=0) + +GameType.chance_mode = ChanceMode.SAMPLED_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Slovenian Tarok" +GameType.max_num_players = 4 +GameType.min_num_players = 3 +GameType.parameter_specification = ["players", "rng_seed"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = False +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "tarok" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 54 +PolicyTensorShape() = [54] +MaxChanceOutcomes() = 1 +GetParameters() = {players=4,rng_seed=0} +NumPlayers() = 4 +MinUtility() = -500.0 +MaxUtility() = 500.0 +UtilitySum() = None +MaxGameLength() = 72 +ToString() = "tarok(players=4,rng_seed=0)" + +# State 0 +# Game phase: Card dealing +# Selected contract: Not selected +# Current player: -1 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "" +InformationStateString(1) = "" +InformationStateString(2) = "" +InformationStateString(3) = "" +SerializeState() = "" +ChanceOutcomes() = [(0,1)] +LegalActions() = [0] +StringLegalActions() = ["Deal"] + +# Apply action "Deal" +action: 0 + +# State 1 +# Game phase: Bidding +# Selected contract: Not selected +# Current player: 1 +# Player cards: 0,1,9,10,12,15,18,19,36,37,40,42 +IsTerminal() = False +History() = [0] +HistoryString() = "0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "17,21,28,32,35,39,43,46,48,49,51,53;" +InformationStateString(1) = "0,1,9,10,12,15,18,19,36,37,40,42;" +InformationStateString(2) = "6,7,16,20,22,23,24,30,34,38,41,45;" +InformationStateString(3) = "2,3,4,5,13,14,25,26,29,31,44,50;" +SerializeState() = "-1937831252" +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] +StringLegalActions() = ["Pass", "Two", "One", "Solo three", "Solo two", "Solo one", "Beggar", "Solo without", "Open beggar", "Colour valat without", "Valat without"] + +# Apply action "Solo three" +action: 5 + +# State 2 +# Game phase: Bidding +# Selected contract: Not selected +# Current player: 2 +# Player cards: 6,7,16,20,22,23,24,30,34,38,41,45 +IsTerminal() = False +History() = [0, 5] +HistoryString() = "0, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "17,21,28,32,35,39,43,46,48,49,51,53;5," +InformationStateString(1) = "0,1,9,10,12,15,18,19,36,37,40,42;5," +InformationStateString(2) = "6,7,16,20,22,23,24,30,34,38,41,45;5," +InformationStateString(3) = "2,3,4,5,13,14,25,26,29,31,44,50;5," +SerializeState() = "-1937831252\n5" +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 6, 7, 8, 9, 10, 11, 12] +StringLegalActions() = ["Pass", "Solo two", "Solo one", "Beggar", "Solo without", "Open beggar", "Colour valat without", "Valat without"] + +# Apply action "Pass" +action: 0 + +# State 3 +# Game phase: Bidding +# Selected contract: Not selected +# Current player: 3 +# Player cards: 2,3,4,5,13,14,25,26,29,31,44,50 +IsTerminal() = False +History() = [0, 5, 0] +HistoryString() = "0, 5, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "17,21,28,32,35,39,43,46,48,49,51,53;5,0," +InformationStateString(1) = "0,1,9,10,12,15,18,19,36,37,40,42;5,0," +InformationStateString(2) = "6,7,16,20,22,23,24,30,34,38,41,45;5,0," +InformationStateString(3) = "2,3,4,5,13,14,25,26,29,31,44,50;5,0," +SerializeState() = "-1937831252\n5\n0" +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 6, 7, 8, 9, 10, 11, 12] +StringLegalActions() = ["Pass", "Solo two", "Solo one", "Beggar", "Solo without", "Open beggar", "Colour valat without", "Valat without"] + +# Apply action "Solo one" +action: 7 + +# State 4 +# Game phase: Bidding +# Selected contract: Not selected +# Current player: 0 +# Player cards: 17,21,28,32,35,39,43,46,48,49,51,53 +IsTerminal() = False +History() = [0, 5, 0, 7] +HistoryString() = "0, 5, 0, 7" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "17,21,28,32,35,39,43,46,48,49,51,53;5,0,7," +InformationStateString(1) = "0,1,9,10,12,15,18,19,36,37,40,42;5,0,7," +InformationStateString(2) = "6,7,16,20,22,23,24,30,34,38,41,45;5,0,7," +InformationStateString(3) = "2,3,4,5,13,14,25,26,29,31,44,50;5,0,7," +SerializeState() = "-1937831252\n5\n0\n7" +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 7, 8, 9, 10, 11, 12] +StringLegalActions() = ["Pass", "Solo one", "Beggar", "Solo without", "Open beggar", "Colour valat without", "Valat without"] + +# Apply action "Valat without" +action: 12 + +# State 5 +# Game phase: Bidding +# Selected contract: Not selected +# Current player: 1 +# Player cards: 0,1,9,10,12,15,18,19,36,37,40,42 +IsTerminal() = False +History() = [0, 5, 0, 7, 12] +HistoryString() = "0, 5, 0, 7, 12" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "17,21,28,32,35,39,43,46,48,49,51,53;5,0,7,12," +InformationStateString(1) = "0,1,9,10,12,15,18,19,36,37,40,42;5,0,7,12," +InformationStateString(2) = "6,7,16,20,22,23,24,30,34,38,41,45;5,0,7,12," +InformationStateString(3) = "2,3,4,5,13,14,25,26,29,31,44,50;5,0,7,12," +SerializeState() = "-1937831252\n5\n0\n7\n12" +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 0 + +# State 6 +# Game phase: Bidding +# Selected contract: Not selected +# Current player: 3 +# Player cards: 2,3,4,5,13,14,25,26,29,31,44,50 +IsTerminal() = False +History() = [0, 5, 0, 7, 12, 0] +HistoryString() = "0, 5, 0, 7, 12, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "17,21,28,32,35,39,43,46,48,49,51,53;5,0,7,12,0," +InformationStateString(1) = "0,1,9,10,12,15,18,19,36,37,40,42;5,0,7,12,0," +InformationStateString(2) = "6,7,16,20,22,23,24,30,34,38,41,45;5,0,7,12,0," +InformationStateString(3) = "2,3,4,5,13,14,25,26,29,31,44,50;5,0,7,12,0," +SerializeState() = "-1937831252\n5\n0\n7\n12\n0" +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 0 + +# State 7 +# Game phase: Bidding +# Selected contract: Not selected +# Current player: 0 +# Player cards: 17,21,28,32,35,39,43,46,48,49,51,53 +IsTerminal() = False +History() = [0, 5, 0, 7, 12, 0, 0] +HistoryString() = "0, 5, 0, 7, 12, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "17,21,28,32,35,39,43,46,48,49,51,53;5,0,7,12,0,0," +InformationStateString(1) = "0,1,9,10,12,15,18,19,36,37,40,42;5,0,7,12,0,0," +InformationStateString(2) = "6,7,16,20,22,23,24,30,34,38,41,45;5,0,7,12,0,0," +InformationStateString(3) = "2,3,4,5,13,14,25,26,29,31,44,50;5,0,7,12,0,0," +SerializeState() = "-1937831252\n5\n0\n7\n12\n0\n0" +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [12] +StringLegalActions() = ["Valat without"] + +# Apply action "Valat without" +action: 12 + +# State 8 +# Game phase: Tricks playing +# Selected contract: Valat without +# Current player: 0 +# Player cards: 17,21,28,32,35,39,43,46,48,49,51,53 +# Trick cards: +IsTerminal() = False +History() = [0, 5, 0, 7, 12, 0, 0, 12] +HistoryString() = "0, 5, 0, 7, 12, 0, 0, 12" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "17,21,28,32,35,39,43,46,48,49,51,53;5,0,7,12,0,0,12;" +InformationStateString(1) = "0,1,9,10,12,15,18,19,36,37,40,42;5,0,7,12,0,0,12;" +InformationStateString(2) = "6,7,16,20,22,23,24,30,34,38,41,45;5,0,7,12,0,0,12;" +InformationStateString(3) = "2,3,4,5,13,14,25,26,29,31,44,50;5,0,7,12,0,0,12;" +SerializeState() = "-1937831252\n5\n0\n7\n12\n0\n0\n12" +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [17, 21, 28, 32, 35, 39, 43, 46, 48, 49, 51, 53] +StringLegalActions() = ["XVIII", "Skis", "Queen of Hearts", "2 of Diamonds", "Knight of Diamonds", "8 of Spades", "Knight of Spades", "7 of Clubs", "9 of Clubs", "10 of Clubs", "Knight of Clubs", "King of Clubs"] + +# Apply action "7 of Clubs" +action: 46 + +# State 9 +# Game phase: Tricks playing +# Selected contract: Valat without +# Current player: 1 +# Player cards: 0,1,9,10,12,15,18,19,36,37,40,42 +# Trick cards: 46 +IsTerminal() = False +History() = [0, 5, 0, 7, 12, 0, 0, 12, 46] +HistoryString() = "0, 5, 0, 7, 12, 0, 0, 12, 46" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "17,21,28,32,35,39,43,46,48,49,51,53;5,0,7,12,0,0,12;46," +InformationStateString(1) = "0,1,9,10,12,15,18,19,36,37,40,42;5,0,7,12,0,0,12;46," +InformationStateString(2) = "6,7,16,20,22,23,24,30,34,38,41,45;5,0,7,12,0,0,12;46," +InformationStateString(3) = "2,3,4,5,13,14,25,26,29,31,44,50;5,0,7,12,0,0,12;46," +SerializeState() = "-1937831252\n5\n0\n7\n12\n0\n0\n12\n46" +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 1, 9, 10, 12, 15, 18, 19] +StringLegalActions() = ["Pagat", "II", "X", "XI", "XIII", "XVI", "XIX", "XX"] + +# Apply action "X" +action: 9 + +# State 10 +# Game phase: Tricks playing +# Selected contract: Valat without +# Current player: 2 +# Player cards: 6,7,16,20,22,23,24,30,34,38,41,45 +# Trick cards: 46,9 +IsTerminal() = False +History() = [0, 5, 0, 7, 12, 0, 0, 12, 46, 9] +HistoryString() = "0, 5, 0, 7, 12, 0, 0, 12, 46, 9" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "17,21,28,32,35,39,43,46,48,49,51,53;5,0,7,12,0,0,12;46,9," +InformationStateString(1) = "0,1,9,10,12,15,18,19,36,37,40,42;5,0,7,12,0,0,12;46,9," +InformationStateString(2) = "6,7,16,20,22,23,24,30,34,38,41,45;5,0,7,12,0,0,12;46,9," +InformationStateString(3) = "2,3,4,5,13,14,25,26,29,31,44,50;5,0,7,12,0,0,12;46,9," +SerializeState() = "-1937831252\n5\n0\n7\n12\n0\n0\n12\n46\n9" +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [6, 7, 16, 20] +StringLegalActions() = ["VII", "VIII", "XVII", "Mond"] + +# Apply action "Mond" +action: 20 + +# State 11 +# Game phase: Tricks playing +# Selected contract: Valat without +# Current player: 3 +# Player cards: 2,3,4,5,13,14,25,26,29,31,44,50 +# Trick cards: 46,9,20 +IsTerminal() = False +History() = [0, 5, 0, 7, 12, 0, 0, 12, 46, 9, 20] +HistoryString() = "0, 5, 0, 7, 12, 0, 0, 12, 46, 9, 20" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "17,21,28,32,35,39,43,46,48,49,51,53;5,0,7,12,0,0,12;46,9,20," +InformationStateString(1) = "0,1,9,10,12,15,18,19,36,37,40,42;5,0,7,12,0,0,12;46,9,20," +InformationStateString(2) = "6,7,16,20,22,23,24,30,34,38,41,45;5,0,7,12,0,0,12;46,9,20," +InformationStateString(3) = "2,3,4,5,13,14,25,26,29,31,44,50;5,0,7,12,0,0,12;46,9,20," +SerializeState() = "-1937831252\n5\n0\n7\n12\n0\n0\n12\n46\n9\n20" +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [50] +StringLegalActions() = ["Jack of Clubs"] + +# Apply action "Jack of Clubs" +action: 50 + +# State 12 +# Game phase: Finished +# Selected contract: Valat without +# Current player: -4 +IsTerminal() = True +History() = [0, 5, 0, 7, 12, 0, 0, 12, 46, 9, 20, 50] +HistoryString() = "0, 5, 0, 7, 12, 0, 0, 12, 46, 9, 20, 50" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "17,21,28,32,35,39,43,46,48,49,51,53;5,0,7,12,0,0,12;46,9,20,50" +InformationStateString(1) = "0,1,9,10,12,15,18,19,36,37,40,42;5,0,7,12,0,0,12;46,9,20,50" +InformationStateString(2) = "6,7,16,20,22,23,24,30,34,38,41,45;5,0,7,12,0,0,12;46,9,20,50" +InformationStateString(3) = "2,3,4,5,13,14,25,26,29,31,44,50;5,0,7,12,0,0,12;46,9,20,50" +SerializeState() = "-1937831252\n5\n0\n7\n12\n0\n0\n12\n46\n9\n20\n50" +Rewards() = [-500, 0, 0, 0] +Returns() = [-500, 0, 0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tic_tac_toe.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tic_tac_toe.txt new file mode 100644 index 0000000..d251d79 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tic_tac_toe.txt @@ -0,0 +1,240 @@ +game: tic_tac_toe + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Tic Tac Toe" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "tic_tac_toe" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 9 +PolicyTensorShape() = [9] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [3, 3, 3] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 27 +MaxGameLength() = 9 +ToString() = "tic_tac_toe()" + +# State 0 +# ... +# ... +# ... +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "...\n...\n..." +ObservationString(1) = "...\n...\n..." +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,0)", "x(1,1)", "x(1,2)", "x(2,0)", "x(2,1)", "x(2,2)"] + +# Apply action "x(2,2)" +action: 8 + +# State 1 +# ... +# ... +# ..x +IsTerminal() = False +History() = [8] +HistoryString() = "8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "8" +InformationStateString(1) = "8" +ObservationString(0) = "...\n...\n..x" +ObservationString(1) = "...\n...\n..x" +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◯ ◯◯◯ ◯◯◉ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◉ ◯◯◯ ◯◯◯ +◉◉◯ ◯◯◯ ◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(1,0)", "o(1,1)", "o(1,2)", "o(2,0)", "o(2,1)"] + +# Apply action "o(1,0)" +action: 3 + +# State 2 +# ... +# o.. +# ..x +IsTerminal() = False +History() = [8, 3] +HistoryString() = "8, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "8, 3" +InformationStateString(1) = "8, 3" +ObservationString(0) = "...\no..\n..x" +ObservationString(1) = "...\no..\n..x" +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◉◉◯ ◯◯◯ ◯◯◉ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◉◉◯ ◯◯◯ ◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 4, 5, 6, 7] +StringLegalActions() = ["x(0,0)", "x(0,1)", "x(0,2)", "x(1,1)", "x(1,2)", "x(2,0)", "x(2,1)"] + +# Apply action "x(2,0)" +action: 6 + +# State 3 +# ... +# o.. +# x.x +IsTerminal() = False +History() = [8, 3, 6] +HistoryString() = "8, 3, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "8, 3, 6" +InformationStateString(1) = "8, 3, 6" +ObservationString(0) = "...\no..\nx.x" +ObservationString(1) = "...\no..\nx.x" +ObservationTensor(0): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +ObservationTensor(1): +◉◉◉ ◯◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 4, 5, 7] +StringLegalActions() = ["o(0,0)", "o(0,1)", "o(0,2)", "o(1,1)", "o(1,2)", "o(2,1)"] + +# Apply action "o(0,0)" +action: 0 + +# State 4 +# o.. +# o.. +# x.x +IsTerminal() = False +History() = [8, 3, 6, 0] +HistoryString() = "8, 3, 6, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "8, 3, 6, 0" +InformationStateString(1) = "8, 3, 6, 0" +ObservationString(0) = "o..\no..\nx.x" +ObservationString(1) = "o..\no..\nx.x" +ObservationTensor(0): +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +ObservationTensor(1): +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 4, 5, 7] +StringLegalActions() = ["x(0,1)", "x(0,2)", "x(1,1)", "x(1,2)", "x(2,1)"] + +# Apply action "x(0,2)" +action: 2 + +# State 5 +# o.x +# o.. +# x.x +IsTerminal() = False +History() = [8, 3, 6, 0, 2] +HistoryString() = "8, 3, 6, 0, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "8, 3, 6, 0, 2" +InformationStateString(1) = "8, 3, 6, 0, 2" +ObservationString(0) = "o.x\no..\nx.x" +ObservationString(1) = "o.x\no..\nx.x" +ObservationTensor(0): +◯◉◯ ◉◯◯ ◯◯◉ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +ObservationTensor(1): +◯◉◯ ◉◯◯ ◯◯◉ +◯◉◉ ◉◯◯ ◯◯◯ +◯◉◯ ◯◯◯ ◉◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 4, 5, 7] +StringLegalActions() = ["o(0,1)", "o(1,1)", "o(1,2)", "o(2,1)"] + +# Apply action "o(0,1)" +action: 1 + +# State 6 +# Apply action "x(1,2)" +action: 5 + +# State 7 +# oox +# o.x +# x.x +IsTerminal() = True +History() = [8, 3, 6, 0, 2, 1, 5] +HistoryString() = "8, 3, 6, 0, 2, 1, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "8, 3, 6, 0, 2, 1, 5" +InformationStateString(1) = "8, 3, 6, 0, 2, 1, 5" +ObservationString(0) = "oox\no.x\nx.x" +ObservationString(1) = "oox\no.x\nx.x" +ObservationTensor(0): +◯◯◯ ◉◉◯ ◯◯◉ +◯◉◯ ◉◯◯ ◯◯◉ +◯◉◯ ◯◯◯ ◉◯◉ +ObservationTensor(1): +◯◯◯ ◉◉◯ ◯◯◉ +◯◉◯ ◉◯◯ ◯◯◉ +◯◉◯ ◯◯◯ ◉◯◉ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tiny_bridge_2p(abstracted=true).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tiny_bridge_2p(abstracted=true).txt new file mode 100644 index 0000000..2578c84 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tiny_bridge_2p(abstracted=true).txt @@ -0,0 +1,171 @@ +game: tiny_bridge_2p(abstracted=true) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Tiny Bridge (Uncontested)" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["abstracted"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "tiny_bridge_2p" +GameType.utility = Utility.IDENTICAL + +NumDistinctActions() = 7 +PolicyTensorShape() = [7] +MaxChanceOutcomes() = 28 +GetParameters() = {abstracted=True} +NumPlayers() = 2 +MinUtility() = -40.0 +MaxUtility() = 35.0 +UtilitySum() = None +InformationStateTensorShape() = [26] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 26 +ObservationTensorShape() = [19] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 19 +MaxGameLength() = 8 +ToString() = "tiny_bridge_2p(abstracted=True)" + +# State 0 +# W:?? E:?? +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "??" +InformationStateString(1) = "??" +InformationStateTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "??" +ObservationString(1) = "??" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.0357143), (1,0.0357143), (3,0.0357143), (6,0.0357143), (10,0.0357143), (15,0.0357143), (21,0.0357143), (2,0.0357143), (4,0.0357143), (7,0.0357143), (11,0.0357143), (16,0.0357143), (22,0.0357143), (5,0.0357143), (8,0.0357143), (12,0.0357143), (17,0.0357143), (23,0.0357143), (9,0.0357143), (13,0.0357143), (18,0.0357143), (24,0.0357143), (14,0.0357143), (19,0.0357143), (25,0.0357143), (20,0.0357143), (26,0.0357143), (27,0.0357143)] +LegalActions() = [0, 1, 3, 6, 10, 15, 21, 2, 4, 7, 11, 16, 22, 5, 8, 12, 17, 23, 9, 13, 18, 24, 14, 19, 25, 20, 26, 27] +StringLegalActions() = ["HQHJ", "HKHJ", "HAHJ", "SJHJ", "SQHJ", "SKHJ", "SAHJ", "HKHQ", "HAHQ", "SJHQ", "SQHQ", "SKHQ", "SAHQ", "HAHK", "SJHK", "SQHK", "SKHK", "SAHK", "SJHA", "SQHA", "SKHA", "SAHA", "SQSJ", "SKSJ", "SASJ", "SKSQ", "SASQ", "SASK"] + +# Apply action "SKHJ" +action: 15 + +# State 1 +# W:SKHJ E:?? +IsTerminal() = False +History() = [15] +HistoryString() = "15" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "SJHJ SJHK SJHQ SKHJ SKHK SKHQ SQHJ SQHK SQHQ" +InformationStateString(1) = "??" +InformationStateTensor(0): ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "SJHJ SJHK SJHQ SKHJ SKHK SKHQ SQHJ SQHK SQHQ" +ObservationString(1) = "??" +ObservationTensor(0): ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(2,0.0666667), (4,0.0666667), (7,0.0666667), (11,0.0666667), (22,0.0666667), (5,0.0666667), (8,0.0666667), (12,0.0666667), (23,0.0666667), (9,0.0666667), (13,0.0666667), (24,0.0666667), (14,0.0666667), (25,0.0666667), (26,0.0666667)] +LegalActions() = [2, 4, 7, 11, 22, 5, 8, 12, 23, 9, 13, 24, 14, 25, 26] +StringLegalActions() = ["HKHQ", "HAHQ", "SJHQ", "SQHQ", "SAHQ", "HAHK", "SJHK", "SQHK", "SAHK", "SJHA", "SQHA", "SAHA", "SQSJ", "SASJ", "SASQ"] + +# Apply action "SQSJ" +action: 14 + +# State 2 +# W:SKHJ E:SQSJ +IsTerminal() = False +History() = [15, 14] +HistoryString() = "15, 14" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "SJHJ SJHK SJHQ SKHJ SKHK SKHQ SQHJ SQHK SQHQ" +InformationStateString(1) = "SQSJ" +InformationStateTensor(0): ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "SJHJ SJHK SJHQ SKHJ SKHK SKHQ SQHJ SQHK SQHQ" +ObservationString(1) = "SQSJ" +ObservationTensor(0): ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["Pass", "1H", "1S", "1NT", "2H", "2S", "2NT"] + +# Apply action "2S" +action: 5 + +# State 3 +# W:SKHJ E:SQSJ 2S +IsTerminal() = False +History() = [15, 14, 5] +HistoryString() = "15, 14, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "SJHJ SJHK SJHQ SKHJ SKHK SKHQ SQHJ SQHK SQHQ Us 2S" +InformationStateString(1) = "SQSJ Pd 2S" +InformationStateTensor(0): ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯ +InformationStateTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationString(0) = "SJHJ SJHK SJHQ SKHJ SKHK SKHQ SQHJ SQHK SQHQ 2S:Us" +ObservationString(1) = "SQSJ 2S:Pd" +ObservationTensor(0): ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 6] +StringLegalActions() = ["Pass", "2NT"] + +# Apply action "2NT" +action: 6 + +# State 4 +# W:SKHJ E:SQSJ 2S-2NT +IsTerminal() = False +History() = [15, 14, 5, 6] +HistoryString() = "15, 14, 5, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "SJHJ SJHK SJHQ SKHJ SKHK SKHQ SQHJ SQHK SQHQ Us 2S-2NT" +InformationStateString(1) = "SQSJ Pd 2S-2NT" +InformationStateTensor(0): ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +InformationStateTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◉◯ +ObservationString(0) = "SJHJ SJHK SJHQ SKHJ SKHK SKHQ SQHJ SQHK SQHQ 2NT:Pd" +ObservationString(1) = "SQSJ 2NT:Us" +ObservationTensor(0): ◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 0 + +# State 5 +# W:SKHJ E:SQSJ 2S-2NT-Pass +IsTerminal() = True +History() = [15, 14, 5, 6, 0] +HistoryString() = "15, 14, 5, 6, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "SJHJ SJHK SJHQ SKHJ SKHK SKHQ SQHJ SQHK SQHQ Us 2S-2NT-Pass" +InformationStateString(1) = "SQSJ Pd 2S-2NT-Pass" +InformationStateTensor(0): ◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◉ +InformationStateTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◉◉◯ +ObservationString(0) = "SJHJ SJHK SJHQ SKHJ SKHK SKHQ SQHJ SQHK SQHQ 2NT:Pd" +ObservationString(1) = "SQSJ 2NT:Us" +ObservationTensor(0): ◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯ +Rewards() = [-40, -40] +Returns() = [-40, -40] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tiny_bridge_2p.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tiny_bridge_2p.txt new file mode 100644 index 0000000..df9e99b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tiny_bridge_2p.txt @@ -0,0 +1,171 @@ +game: tiny_bridge_2p + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Tiny Bridge (Uncontested)" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["abstracted"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "tiny_bridge_2p" +GameType.utility = Utility.IDENTICAL + +NumDistinctActions() = 7 +PolicyTensorShape() = [7] +MaxChanceOutcomes() = 28 +GetParameters() = {abstracted=False} +NumPlayers() = 2 +MinUtility() = -40.0 +MaxUtility() = 35.0 +UtilitySum() = None +InformationStateTensorShape() = [22] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 22 +ObservationTensorShape() = [15] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 15 +MaxGameLength() = 8 +ToString() = "tiny_bridge_2p()" + +# State 0 +# W:?? E:?? +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "??" +InformationStateString(1) = "??" +InformationStateTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "??" +ObservationString(1) = "??" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.0357143), (1,0.0357143), (3,0.0357143), (6,0.0357143), (10,0.0357143), (15,0.0357143), (21,0.0357143), (2,0.0357143), (4,0.0357143), (7,0.0357143), (11,0.0357143), (16,0.0357143), (22,0.0357143), (5,0.0357143), (8,0.0357143), (12,0.0357143), (17,0.0357143), (23,0.0357143), (9,0.0357143), (13,0.0357143), (18,0.0357143), (24,0.0357143), (14,0.0357143), (19,0.0357143), (25,0.0357143), (20,0.0357143), (26,0.0357143), (27,0.0357143)] +LegalActions() = [0, 1, 3, 6, 10, 15, 21, 2, 4, 7, 11, 16, 22, 5, 8, 12, 17, 23, 9, 13, 18, 24, 14, 19, 25, 20, 26, 27] +StringLegalActions() = ["HQHJ", "HKHJ", "HAHJ", "SJHJ", "SQHJ", "SKHJ", "SAHJ", "HKHQ", "HAHQ", "SJHQ", "SQHQ", "SKHQ", "SAHQ", "HAHK", "SJHK", "SQHK", "SKHK", "SAHK", "SJHA", "SQHA", "SKHA", "SAHA", "SQSJ", "SKSJ", "SASJ", "SKSQ", "SASQ", "SASK"] + +# Apply action "HAHK" +action: 5 + +# State 1 +# W:HAHK E:?? +IsTerminal() = False +History() = [5] +HistoryString() = "5" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "HAHK" +InformationStateString(1) = "??" +InformationStateTensor(0): ◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "HAHK" +ObservationString(1) = "??" +ObservationTensor(0): ◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.0666667), (6,0.0666667), (10,0.0666667), (15,0.0666667), (21,0.0666667), (7,0.0666667), (11,0.0666667), (16,0.0666667), (22,0.0666667), (14,0.0666667), (19,0.0666667), (25,0.0666667), (20,0.0666667), (26,0.0666667), (27,0.0666667)] +LegalActions() = [0, 6, 10, 15, 21, 7, 11, 16, 22, 14, 19, 25, 20, 26, 27] +StringLegalActions() = ["HQHJ", "SJHJ", "SQHJ", "SKHJ", "SAHJ", "SJHQ", "SQHQ", "SKHQ", "SAHQ", "SQSJ", "SKSJ", "SASJ", "SKSQ", "SASQ", "SASK"] + +# Apply action "SASJ" +action: 25 + +# State 2 +# W:HAHK E:SASJ +IsTerminal() = False +History() = [5, 25] +HistoryString() = "5, 25" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "HAHK" +InformationStateString(1) = "SASJ" +InformationStateTensor(0): ◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "HAHK" +ObservationString(1) = "SASJ" +ObservationTensor(0): ◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["Pass", "1H", "1S", "1NT", "2H", "2S", "2NT"] + +# Apply action "2S" +action: 5 + +# State 3 +# W:HAHK E:SASJ 2S +IsTerminal() = False +History() = [5, 25, 5] +HistoryString() = "5, 25, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "HAHK Us 2S" +InformationStateString(1) = "SASJ Pd 2S" +InformationStateTensor(0): ◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯ +InformationStateTensor(1): ◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +ObservationString(0) = "HAHK 2S:Us" +ObservationString(1) = "SASJ 2S:Pd" +ObservationTensor(0): ◯◯◉◉◯◯◯◯◯◯◯◯◯◉◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◉◯◯◯◯◯◉◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 6] +StringLegalActions() = ["Pass", "2NT"] + +# Apply action "2NT" +action: 6 + +# State 4 +# W:HAHK E:SASJ 2S-2NT +IsTerminal() = False +History() = [5, 25, 5, 6] +HistoryString() = "5, 25, 5, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "HAHK Us 2S-2NT" +InformationStateString(1) = "SASJ Pd 2S-2NT" +InformationStateTensor(0): ◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉ +InformationStateTensor(1): ◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◉◯ +ObservationString(0) = "HAHK 2NT:Pd" +ObservationString(1) = "SASJ 2NT:Us" +ObservationTensor(0): ◯◯◉◉◯◯◯◯◯◯◯◯◯◯◉ +ObservationTensor(1): ◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 0 + +# State 5 +# W:HAHK E:SASJ 2S-2NT-Pass +IsTerminal() = True +History() = [5, 25, 5, 6, 0] +HistoryString() = "5, 25, 5, 6, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "HAHK Us 2S-2NT-Pass" +InformationStateString(1) = "SASJ Pd 2S-2NT-Pass" +InformationStateTensor(0): ◯◯◉◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◉ +InformationStateTensor(1): ◯◯◯◯◉◯◯◉◯◉◯◯◯◯◯◯◯◯◯◉◉◯ +ObservationString(0) = "HAHK 2NT:Pd" +ObservationString(1) = "SASJ 2NT:Us" +ObservationTensor(0): ◯◯◉◉◯◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◉◯◯◉◉◯◯◯◯◯◯ +Rewards() = [25.8333, 25.8333] +Returns() = [25.8333, 25.8333] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tiny_bridge_4p.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tiny_bridge_4p.txt new file mode 100644 index 0000000..945cd66 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tiny_bridge_4p.txt @@ -0,0 +1,419 @@ +game: tiny_bridge_4p + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Tiny Bridge (Contested)" +GameType.max_num_players = 4 +GameType.min_num_players = 4 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "tiny_bridge_4p" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 9 +PolicyTensorShape() = [9] +MaxChanceOutcomes() = 28 +GetParameters() = {} +NumPlayers() = 4 +MinUtility() = -160.0 +MaxUtility() = 160.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [84] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 84 +ObservationTensorShape() = [30] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 30 +MaxGameLength() = 57 +ToString() = "tiny_bridge_4p()" + +# State 0 +# W:?? N:?? E:?? S:?? +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "??" +InformationStateString(1) = "??" +InformationStateString(2) = "??" +InformationStateString(3) = "??" +InformationStateTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(2): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(3): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "??" +ObservationString(1) = "??" +ObservationString(2) = "??" +ObservationString(3) = "??" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(3): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.0357143), (1,0.0357143), (3,0.0357143), (6,0.0357143), (10,0.0357143), (15,0.0357143), (21,0.0357143), (2,0.0357143), (4,0.0357143), (7,0.0357143), (11,0.0357143), (16,0.0357143), (22,0.0357143), (5,0.0357143), (8,0.0357143), (12,0.0357143), (17,0.0357143), (23,0.0357143), (9,0.0357143), (13,0.0357143), (18,0.0357143), (24,0.0357143), (14,0.0357143), (19,0.0357143), (25,0.0357143), (20,0.0357143), (26,0.0357143), (27,0.0357143)] +LegalActions() = [0, 1, 3, 6, 10, 15, 21, 2, 4, 7, 11, 16, 22, 5, 8, 12, 17, 23, 9, 13, 18, 24, 14, 19, 25, 20, 26, 27] +StringLegalActions() = ["HQHJ", "HKHJ", "HAHJ", "SJHJ", "SQHJ", "SKHJ", "SAHJ", "HKHQ", "HAHQ", "SJHQ", "SQHQ", "SKHQ", "SAHQ", "HAHK", "SJHK", "SQHK", "SKHK", "SAHK", "SJHA", "SQHA", "SKHA", "SAHA", "SQSJ", "SKSJ", "SASJ", "SKSQ", "SASQ", "SASK"] + +# Apply action "SAHJ" +action: 21 + +# State 1 +# W:SAHJ N:?? E:?? S:?? +IsTerminal() = False +History() = [21] +HistoryString() = "21" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "SAHJ" +InformationStateString(1) = "??" +InformationStateString(2) = "??" +InformationStateString(3) = "??" +InformationStateTensor(0): ◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(2): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(3): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "SAHJ" +ObservationString(1) = "??" +ObservationString(2) = "??" +ObservationString(3) = "??" +ObservationTensor(0): ◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(3): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(2,0.0666667), (4,0.0666667), (7,0.0666667), (11,0.0666667), (16,0.0666667), (5,0.0666667), (8,0.0666667), (12,0.0666667), (17,0.0666667), (9,0.0666667), (13,0.0666667), (18,0.0666667), (14,0.0666667), (19,0.0666667), (20,0.0666667)] +LegalActions() = [2, 4, 7, 11, 16, 5, 8, 12, 17, 9, 13, 18, 14, 19, 20] +StringLegalActions() = ["HKHQ", "HAHQ", "SJHQ", "SQHQ", "SKHQ", "HAHK", "SJHK", "SQHK", "SKHK", "SJHA", "SQHA", "SKHA", "SQSJ", "SKSJ", "SKSQ"] + +# Apply action "HAHQ" +action: 4 + +# State 2 +# Apply action "SKHK" +action: 17 + +# State 3 +# Apply action "SQSJ" +action: 14 + +# State 4 +# W:SAHJ N:HAHQ E:SKHK S:SQSJ +IsTerminal() = False +History() = [21, 4, 17, 14] +HistoryString() = "21, 4, 17, 14" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "SAHJ" +InformationStateString(1) = "HAHQ" +InformationStateString(2) = "SKHK" +InformationStateString(3) = "SQSJ" +InformationStateTensor(0): ◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(2): ◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(3): ◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "SAHJ" +ObservationString(1) = "HAHQ" +ObservationString(2) = "SKHK" +ObservationString(3) = "SQSJ" +ObservationTensor(0): ◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(3): ◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["Pass", "1H", "1S", "1NT", "2H", "2S", "2NT"] + +# Apply action "Pass" +action: 0 + +# State 5 +# W:SAHJ N:HAHQ E:SKHK S:SQSJ Pass +IsTerminal() = False +History() = [21, 4, 17, 14, 0] +HistoryString() = "21, 4, 17, 14, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "SAHJ Us Pass" +InformationStateString(1) = "HAHQ RH Pass" +InformationStateString(2) = "SKHK Pd Pass" +InformationStateString(3) = "SQSJ LH Pass" +InformationStateTensor(0): ◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(2): ◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(3): ◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "SAHJ" +ObservationString(1) = "HAHQ" +ObservationString(2) = "SKHK" +ObservationString(3) = "SQSJ" +ObservationTensor(0): ◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯ +ObservationTensor(2): ◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯ +ObservationTensor(3): ◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6] +StringLegalActions() = ["Pass", "1H", "1S", "1NT", "2H", "2S", "2NT"] + +# Apply action "2H" +action: 4 + +# State 6 +# W:SAHJ N:HAHQ E:SKHK S:SQSJ Pass-2H +IsTerminal() = False +History() = [21, 4, 17, 14, 0, 4] +HistoryString() = "21, 4, 17, 14, 0, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "SAHJ Us Pass-2H" +InformationStateString(1) = "HAHQ RH Pass-2H" +InformationStateString(2) = "SKHK Pd Pass-2H" +InformationStateString(3) = "SQSJ LH Pass-2H" +InformationStateTensor(0): ◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(2): ◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(3): ◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "SAHJ 2H:LH" +ObservationString(1) = "HAHQ 2H:Us" +ObservationString(2) = "SKHK 2H:RH" +ObservationString(3) = "SQSJ 2H:Pd" +ObservationTensor(0): ◉◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯ +ObservationTensor(1): ◯◉◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◉◯◯ +ObservationTensor(2): ◯◯◉◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯ +ObservationTensor(3): ◯◯◯◯◉◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 5, 6, 7] +StringLegalActions() = ["Pass", "2S", "2NT", "Dbl"] + +# Apply action "2S" +action: 5 + +# State 7 +# W:SAHJ N:HAHQ E:SKHK S:SQSJ Pass-2H-2S +IsTerminal() = False +History() = [21, 4, 17, 14, 0, 4, 5] +HistoryString() = "21, 4, 17, 14, 0, 4, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "SAHJ Us Pass-2H-2S" +InformationStateString(1) = "HAHQ RH Pass-2H-2S" +InformationStateString(2) = "SKHK Pd Pass-2H-2S" +InformationStateString(3) = "SQSJ LH Pass-2H-2S" +InformationStateTensor(0): ◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(2): ◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(3): ◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "SAHJ 2S:Pd" +ObservationString(1) = "HAHQ 2S:LH" +ObservationString(2) = "SKHK 2S:Us" +ObservationString(3) = "SQSJ 2S:RH" +ObservationTensor(0): ◉◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯ +ObservationTensor(1): ◯◉◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯ +ObservationTensor(2): ◯◯◉◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯ +ObservationTensor(3): ◯◯◯◯◉◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 6, 7] +StringLegalActions() = ["Pass", "2NT", "Dbl"] + +# Apply action "2NT" +action: 6 + +# State 8 +# W:SAHJ N:HAHQ E:SKHK S:SQSJ Pass-2H-2S-2NT +IsTerminal() = False +History() = [21, 4, 17, 14, 0, 4, 5, 6] +HistoryString() = "21, 4, 17, 14, 0, 4, 5, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "SAHJ Us Pass-2H-2S-2NT" +InformationStateString(1) = "HAHQ RH Pass-2H-2S-2NT" +InformationStateString(2) = "SKHK Pd Pass-2H-2S-2NT" +InformationStateString(3) = "SQSJ LH Pass-2H-2S-2NT" +InformationStateTensor(0): ◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉ +InformationStateTensor(1): ◯◉◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯ +InformationStateTensor(2): ◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯ +InformationStateTensor(3): ◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯ +ObservationString(0) = "SAHJ 2NT:RH" +ObservationString(1) = "HAHQ 2NT:Pd" +ObservationString(2) = "SKHK 2NT:LH" +ObservationString(3) = "SQSJ 2NT:Us" +ObservationTensor(0): ◉◯◯◯◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉ +ObservationTensor(1): ◯◉◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉ +ObservationTensor(2): ◯◯◉◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉ +ObservationTensor(3): ◯◯◯◯◉◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 7] +StringLegalActions() = ["Pass", "Dbl"] + +# Apply action "Dbl" +action: 7 + +# State 9 +# W:SAHJ N:HAHQ E:SKHK S:SQSJ Pass-2H-2S-2NT-Dbl +IsTerminal() = False +History() = [21, 4, 17, 14, 0, 4, 5, 6, 7] +HistoryString() = "21, 4, 17, 14, 0, 4, 5, 6, 7" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "SAHJ Us Pass-2H-2S-2NT-Dbl" +InformationStateString(1) = "HAHQ RH Pass-2H-2S-2NT-Dbl" +InformationStateString(2) = "SKHK Pd Pass-2H-2S-2NT-Dbl" +InformationStateString(3) = "SQSJ LH Pass-2H-2S-2NT-Dbl" +InformationStateTensor(0): ◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◉ +InformationStateTensor(1): ◯◉◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯ +InformationStateTensor(2): ◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯ +InformationStateTensor(3): ◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◉◯◯◯ +ObservationString(0) = "SAHJ 2NT:RH Dbl:Us" +ObservationString(1) = "HAHQ 2NT:Pd Dbl:RH" +ObservationString(2) = "SKHK 2NT:LH Dbl:Pd" +ObservationString(3) = "SQSJ 2NT:Us Dbl:LH" +ObservationTensor(0): ◉◯◯◯◯◯◯◉◯◯◯◉◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉ +ObservationTensor(1): ◯◉◯◉◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◉ +ObservationTensor(2): ◯◯◉◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉ +ObservationTensor(3): ◯◯◯◯◉◉◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0, 8] +StringLegalActions() = ["Pass", "RDbl"] + +# Apply action "RDbl" +action: 8 + +# State 10 +# W:SAHJ N:HAHQ E:SKHK S:SQSJ Pass-2H-2S-2NT-Dbl-RDbl +IsTerminal() = False +History() = [21, 4, 17, 14, 0, 4, 5, 6, 7, 8] +HistoryString() = "21, 4, 17, 14, 0, 4, 5, 6, 7, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "SAHJ Us Pass-2H-2S-2NT-Dbl-RDbl" +InformationStateString(1) = "HAHQ RH Pass-2H-2S-2NT-Dbl-RDbl" +InformationStateString(2) = "SKHK Pd Pass-2H-2S-2NT-Dbl-RDbl" +InformationStateString(3) = "SQSJ LH Pass-2H-2S-2NT-Dbl-RDbl" +InformationStateTensor(0): ◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◉◯◯◯◯◯◉ +InformationStateTensor(1): ◯◉◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◉◯◯◯◯◯◉◯ +InformationStateTensor(2): ◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◉◯◯ +InformationStateTensor(3): ◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◉◯◉◯◯◯ +ObservationString(0) = "SAHJ 2NT:RH Dbl:Us RDbl:LH" +ObservationString(1) = "HAHQ 2NT:Pd Dbl:RH RDbl:Us" +ObservationString(2) = "SKHK 2NT:LH Dbl:Pd RDbl:RH" +ObservationString(3) = "SQSJ 2NT:Us Dbl:LH RDbl:Pd" +ObservationTensor(0): ◉◯◯◯◯◯◯◉◯◯◯◉◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◉ +ObservationTensor(1): ◯◉◯◉◯◯◯◯◯◯◉◯◯◯◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◉ +ObservationTensor(2): ◯◯◉◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉ +ObservationTensor(3): ◯◯◯◯◉◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◉ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 0 + +# State 11 +# W:SAHJ N:HAHQ E:SKHK S:SQSJ Pass-2H-2S-2NT-Dbl-RDbl-Pass +IsTerminal() = False +History() = [21, 4, 17, 14, 0, 4, 5, 6, 7, 8, 0] +HistoryString() = "21, 4, 17, 14, 0, 4, 5, 6, 7, 8, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 3 +InformationStateString(0) = "SAHJ Us Pass-2H-2S-2NT-Dbl-RDbl-Pass" +InformationStateString(1) = "HAHQ RH Pass-2H-2S-2NT-Dbl-RDbl-Pass" +InformationStateString(2) = "SKHK Pd Pass-2H-2S-2NT-Dbl-RDbl-Pass" +InformationStateString(3) = "SQSJ LH Pass-2H-2S-2NT-Dbl-RDbl-Pass" +InformationStateTensor(0): ◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◉◯◯◯◯◯◉ +InformationStateTensor(1): ◯◉◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◉◯◯◯◯◯◉◯ +InformationStateTensor(2): ◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◉◯◯ +InformationStateTensor(3): ◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◉◯◉◯◯◯ +ObservationString(0) = "SAHJ 2NT:RH Dbl:Us RDbl:LH" +ObservationString(1) = "HAHQ 2NT:Pd Dbl:RH RDbl:Us" +ObservationString(2) = "SKHK 2NT:LH Dbl:Pd RDbl:RH" +ObservationString(3) = "SQSJ 2NT:Us Dbl:LH RDbl:Pd" +ObservationTensor(0): ◉◯◯◯◯◯◯◉◯◯◯◉◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◉ +ObservationTensor(1): ◯◉◯◉◯◯◯◯◯◯◉◯◯◯◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◉ +ObservationTensor(2): ◯◯◉◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉ +ObservationTensor(3): ◯◯◯◯◉◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◉ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 0 + +# State 12 +# W:SAHJ N:HAHQ E:SKHK S:SQSJ Pass-2H-2S-2NT-Dbl-RDbl-Pass-Pass +IsTerminal() = False +History() = [21, 4, 17, 14, 0, 4, 5, 6, 7, 8, 0, 0] +HistoryString() = "21, 4, 17, 14, 0, 4, 5, 6, 7, 8, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "SAHJ Us Pass-2H-2S-2NT-Dbl-RDbl-Pass-Pass" +InformationStateString(1) = "HAHQ RH Pass-2H-2S-2NT-Dbl-RDbl-Pass-Pass" +InformationStateString(2) = "SKHK Pd Pass-2H-2S-2NT-Dbl-RDbl-Pass-Pass" +InformationStateString(3) = "SQSJ LH Pass-2H-2S-2NT-Dbl-RDbl-Pass-Pass" +InformationStateTensor(0): ◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◉◯◯◯◯◯◉ +InformationStateTensor(1): ◯◉◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◉◯◯◯◯◯◉◯ +InformationStateTensor(2): ◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◉◯◯ +InformationStateTensor(3): ◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◉◯◉◯◯◯ +ObservationString(0) = "SAHJ 2NT:RH Dbl:Us RDbl:LH" +ObservationString(1) = "HAHQ 2NT:Pd Dbl:RH RDbl:Us" +ObservationString(2) = "SKHK 2NT:LH Dbl:Pd RDbl:RH" +ObservationString(3) = "SQSJ 2NT:Us Dbl:LH RDbl:Pd" +ObservationTensor(0): ◉◯◯◯◯◯◯◉◯◯◯◉◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◉ +ObservationTensor(1): ◯◉◯◉◯◯◯◯◯◯◉◯◯◯◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◉ +ObservationTensor(2): ◯◯◉◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉ +ObservationTensor(3): ◯◯◯◯◉◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◉ +Rewards() = [0, 0, 0, 0] +Returns() = [0, 0, 0, 0] +LegalActions() = [0] +StringLegalActions() = ["Pass"] + +# Apply action "Pass" +action: 0 + +# State 13 +# W:SAHJ N:HAHQ E:SKHK S:SQSJ Pass-2H-2S-2NT-Dbl-RDbl-Pass-Pass-Pass +IsTerminal() = True +History() = [21, 4, 17, 14, 0, 4, 5, 6, 7, 8, 0, 0, 0] +HistoryString() = "21, 4, 17, 14, 0, 4, 5, 6, 7, 8, 0, 0, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "SAHJ Us Pass-2H-2S-2NT-Dbl-RDbl-Pass-Pass-Pass" +InformationStateString(1) = "HAHQ RH Pass-2H-2S-2NT-Dbl-RDbl-Pass-Pass-Pass" +InformationStateString(2) = "SKHK Pd Pass-2H-2S-2NT-Dbl-RDbl-Pass-Pass-Pass" +InformationStateString(3) = "SQSJ LH Pass-2H-2S-2NT-Dbl-RDbl-Pass-Pass-Pass" +InformationStateTensor(0): ◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◉◯◯◯◯◯◉ +InformationStateTensor(1): ◯◉◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◉◯◯◯◯◯◉◯ +InformationStateTensor(2): ◯◯◉◯◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◉◯◯ +InformationStateTensor(3): ◯◯◯◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◉◯◉◯◯◯ +ObservationString(0) = "SAHJ 2NT:RH Dbl:Us RDbl:LH" +ObservationString(1) = "HAHQ 2NT:Pd Dbl:RH RDbl:Us" +ObservationString(2) = "SKHK 2NT:LH Dbl:Pd RDbl:RH" +ObservationString(3) = "SQSJ 2NT:Us Dbl:LH RDbl:Pd" +ObservationTensor(0): ◉◯◯◯◯◯◯◉◯◯◯◉◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◉ +ObservationTensor(1): ◯◉◯◉◯◯◯◯◯◯◉◯◯◯◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◉ +ObservationTensor(2): ◯◯◉◯◯◯◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◉ +ObservationTensor(3): ◯◯◯◯◉◉◯◯◉◯◯◯◯◉◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◉ +Rewards() = [80, -80, 80, -80] +Returns() = [80, -80, 80, -80] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tiny_hanabi(2p2a2c_hard5) b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tiny_hanabi(2p2a2c_hard5) new file mode 100644 index 0000000..409f8e6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tiny_hanabi(2p2a2c_hard5) @@ -0,0 +1,146 @@ +game: tiny_hanabi(num_players=2,num_chance=2,num_actions=2,payoff=3;0;0;3;2;0;3;3;2;2;3;0;0;1;0;2) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Tiny Hanabi" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["num_actions", "num_chance", "num_players", "payoff"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "tiny_hanabi" +GameType.utility = Utility.IDENTICAL + +NumDistinctActions() = 2 +PolicyTensorShape() = [2] +MaxChanceOutcomes() = 2 +GetParameters() = {num_actions=2,num_chance=2,num_players=2,payoff=3;0;0;3;2;0;3;3;2;2;3;0;0;1;0;2} +NumPlayers() = 2 +MinUtility() = 0.0 +MaxUtility() = 3.0 +UtilitySum() = None +InformationStateTensorShape() = [6] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 6 +ObservationTensorShape() = [6] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 6 +MaxGameLength() = 2 +ToString() = "tiny_hanabi(num_actions=2,num_chance=2,num_players=2,payoff=3;0;0;3;2;0;3;3;2;2;3;0;0;1;0;2)" + +# State 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "p0" +InformationStateString(1) = "p1" +InformationStateTensor(0): ◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◯◯ +ObservationString(0) = "p0" +ObservationString(1) = "p1" +ObservationTensor(0): ◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.5), (1,0.5)] +LegalActions() = [0, 1] +StringLegalActions() = ["d0", "d1"] + +# Apply action "d1" +action: 1 + +# State 1 +# p0:d1 +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "p0:d1" +InformationStateString(1) = "p1" +InformationStateTensor(0): ◯◉◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◯◯ +ObservationString(0) = "p0:d1" +ObservationString(1) = "p1" +ObservationTensor(0): ◯◉◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.5), (1,0.5)] +LegalActions() = [0, 1] +StringLegalActions() = ["d0", "d1"] + +# Apply action "d1" +action: 1 + +# State 2 +# p0:d1 p1:d1 +IsTerminal() = False +History() = [1, 1] +HistoryString() = "1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "p0:d1" +InformationStateString(1) = "p1:d1" +InformationStateTensor(0): ◯◉◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯ +ObservationString(0) = "p0:d1" +ObservationString(1) = "p1:d1" +ObservationTensor(0): ◯◉◯◯◯◯ +ObservationTensor(1): ◯◉◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["p0a0", "p0a1"] + +# Apply action "p0a0" +action: 0 + +# State 3 +# p0:d1 p1:d1 p0:a0 +IsTerminal() = False +History() = [1, 1, 0] +HistoryString() = "1, 1, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "p0:d1 p0:a0" +InformationStateString(1) = "p1:d1 p0:a0" +InformationStateTensor(0): ◯◉◉◯◯◯ +InformationStateTensor(1): ◯◉◉◯◯◯ +ObservationString(0) = "p0:d1 p0:a0" +ObservationString(1) = "p1:d1 p0:a0" +ObservationTensor(0): ◯◉◉◯◯◯ +ObservationTensor(1): ◯◉◉◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1] +StringLegalActions() = ["p1a0", "p1a1"] + +# Apply action "p1a1" +action: 1 + +# State 4 +# p0:d1 p1:d1 p0:a0 p1:a1 +IsTerminal() = True +History() = [1, 1, 0, 1] +HistoryString() = "1, 1, 0, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "p0:d1 p0:a0 p1:a1" +InformationStateString(1) = "p1:d1 p0:a0 p1:a1" +InformationStateTensor(0): ◯◉◉◯◯◉ +InformationStateTensor(1): ◯◉◉◯◯◉ +ObservationString(0) = "p0:d1 p0:a0 p1:a1" +ObservationString(1) = "p1:d1 p0:a0 p1:a1" +ObservationTensor(0): ◯◉◉◯◯◉ +ObservationTensor(1): ◯◉◉◯◯◉ +Rewards() = [1, 1] +Returns() = [1, 1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tiny_hanabi.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tiny_hanabi.txt new file mode 100644 index 0000000..ce066c7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/tiny_hanabi.txt @@ -0,0 +1,146 @@ +game: tiny_hanabi + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Tiny Hanabi" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["num_actions", "num_chance", "num_players", "payoff"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "tiny_hanabi" +GameType.utility = Utility.IDENTICAL + +NumDistinctActions() = 3 +PolicyTensorShape() = [3] +MaxChanceOutcomes() = 2 +GetParameters() = {num_actions=3,num_chance=2,num_players=2,payoff=10;0;0;4;8;4;10;0;0;0;0;10;4;8;4;0;0;10;0;0;10;4;8;4;0;0;0;10;0;0;4;8;4;10;0;0} +NumPlayers() = 2 +MinUtility() = 0.0 +MaxUtility() = 10.0 +UtilitySum() = None +InformationStateTensorShape() = [8] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 8 +ObservationTensorShape() = [8] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 8 +MaxGameLength() = 2 +ToString() = "tiny_hanabi()" + +# State 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "p0" +InformationStateString(1) = "p1" +InformationStateTensor(0): ◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◯◯◯◯ +ObservationString(0) = "p0" +ObservationString(1) = "p1" +ObservationTensor(0): ◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.5), (1,0.5)] +LegalActions() = [0, 1] +StringLegalActions() = ["d0", "d1"] + +# Apply action "d1" +action: 1 + +# State 1 +# p0:d1 +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "p0:d1" +InformationStateString(1) = "p1" +InformationStateTensor(0): ◯◉◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◯◯◯◯ +ObservationString(0) = "p0:d1" +ObservationString(1) = "p1" +ObservationTensor(0): ◯◉◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.5), (1,0.5)] +LegalActions() = [0, 1] +StringLegalActions() = ["d0", "d1"] + +# Apply action "d1" +action: 1 + +# State 2 +# p0:d1 p1:d1 +IsTerminal() = False +History() = [1, 1] +HistoryString() = "1, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "p0:d1" +InformationStateString(1) = "p1:d1" +InformationStateTensor(0): ◯◉◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯ +ObservationString(0) = "p0:d1" +ObservationString(1) = "p1:d1" +ObservationTensor(0): ◯◉◯◯◯◯◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["p0a0", "p0a1", "p0a2"] + +# Apply action "p0a2" +action: 2 + +# State 3 +# p0:d1 p1:d1 p0:a2 +IsTerminal() = False +History() = [1, 1, 2] +HistoryString() = "1, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "p0:d1 p0:a2" +InformationStateString(1) = "p1:d1 p0:a2" +InformationStateTensor(0): ◯◉◯◯◉◯◯◯ +InformationStateTensor(1): ◯◉◯◯◉◯◯◯ +ObservationString(0) = "p0:d1 p0:a2" +ObservationString(1) = "p1:d1 p0:a2" +ObservationTensor(0): ◯◉◯◯◉◯◯◯ +ObservationTensor(1): ◯◉◯◯◉◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["p1a0", "p1a1", "p1a2"] + +# Apply action "p1a0" +action: 0 + +# State 4 +# p0:d1 p1:d1 p0:a2 p1:a0 +IsTerminal() = True +History() = [1, 1, 2, 0] +HistoryString() = "1, 1, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "p0:d1 p0:a2 p1:a0" +InformationStateString(1) = "p1:d1 p0:a2 p1:a0" +InformationStateTensor(0): ◯◉◯◯◉◉◯◯ +InformationStateTensor(1): ◯◉◯◯◉◉◯◯ +ObservationString(0) = "p0:d1 p0:a2 p1:a0" +ObservationString(1) = "p1:d1 p0:a2 p1:a0" +ObservationTensor(0): ◯◉◯◯◉◉◯◯ +ObservationTensor(1): ◯◉◯◯◉◉◯◯ +Rewards() = [10, 10] +Returns() = [10, 10] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/trade_comm.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/trade_comm.txt new file mode 100644 index 0000000..74bf25f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/trade_comm.txt @@ -0,0 +1,187 @@ +game: trade_comm + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Trading and Communication" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["num_items"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "trade_comm" +GameType.utility = Utility.GENERAL_SUM + +NumDistinctActions() = 110 +PolicyTensorShape() = [110] +MaxChanceOutcomes() = 100 +GetParameters() = {num_items=10} +NumPlayers() = 2 +MinUtility() = 0.0 +MaxUtility() = 1.0 +UtilitySum() = None +InformationStateTensorShape() = [57] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 57 +ObservationTensorShape() = [57] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 57 +MaxGameLength() = 4 +ToString() = "trade_comm()" + +# State 0 +# Initial chance node +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "ChanceNode -- no observation" +InformationStateString(1) = "ChanceNode -- no observation" +InformationStateTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "ChanceNode -- no observation" +ObservationString(1) = "ChanceNode -- no observation" +ObservationTensor(0): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ChanceOutcomes() = [(0,0.01), (1,0.01), (2,0.01), (3,0.01), (4,0.01), (5,0.01), (6,0.01), (7,0.01), (8,0.01), (9,0.01), (10,0.01), (11,0.01), (12,0.01), (13,0.01), (14,0.01), (15,0.01), (16,0.01), (17,0.01), (18,0.01), (19,0.01), (20,0.01), (21,0.01), (22,0.01), (23,0.01), (24,0.01), (25,0.01), (26,0.01), (27,0.01), (28,0.01), (29,0.01), (30,0.01), (31,0.01), (32,0.01), (33,0.01), (34,0.01), (35,0.01), (36,0.01), (37,0.01), (38,0.01), (39,0.01), (40,0.01), (41,0.01), (42,0.01), (43,0.01), (44,0.01), (45,0.01), (46,0.01), (47,0.01), (48,0.01), (49,0.01), (50,0.01), (51,0.01), (52,0.01), (53,0.01), (54,0.01), (55,0.01), (56,0.01), (57,0.01), (58,0.01), (59,0.01), (60,0.01), (61,0.01), (62,0.01), (63,0.01), (64,0.01), (65,0.01), (66,0.01), (67,0.01), (68,0.01), (69,0.01), (70,0.01), (71,0.01), (72,0.01), (73,0.01), (74,0.01), (75,0.01), (76,0.01), (77,0.01), (78,0.01), (79,0.01), (80,0.01), (81,0.01), (82,0.01), (83,0.01), (84,0.01), (85,0.01), (86,0.01), (87,0.01), (88,0.01), (89,0.01), (90,0.01), (91,0.01), (92,0.01), (93,0.01), (94,0.01), (95,0.01), (96,0.01), (97,0.01), (98,0.01), (99,0.01)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] +StringLegalActions() = ["Allocate 0 0", "Allocate 0 1", "Allocate 0 2", "Allocate 0 3", "Allocate 0 4", "Allocate 0 5", "Allocate 0 6", "Allocate 0 7", "Allocate 0 8", "Allocate 0 9", "Allocate 1 0", "Allocate 1 1", "Allocate 1 2", "Allocate 1 3", "Allocate 1 4", "Allocate 1 5", "Allocate 1 6", "Allocate 1 7", "Allocate 1 8", "Allocate 1 9", "Allocate 2 0", "Allocate 2 1", "Allocate 2 2", "Allocate 2 3", "Allocate 2 4", "Allocate 2 5", "Allocate 2 6", "Allocate 2 7", "Allocate 2 8", "Allocate 2 9", "Allocate 3 0", "Allocate 3 1", "Allocate 3 2", "Allocate 3 3", "Allocate 3 4", "Allocate 3 5", "Allocate 3 6", "Allocate 3 7", "Allocate 3 8", "Allocate 3 9", "Allocate 4 0", "Allocate 4 1", "Allocate 4 2", "Allocate 4 3", "Allocate 4 4", "Allocate 4 5", "Allocate 4 6", "Allocate 4 7", "Allocate 4 8", "Allocate 4 9", "Allocate 5 0", "Allocate 5 1", "Allocate 5 2", "Allocate 5 3", "Allocate 5 4", "Allocate 5 5", "Allocate 5 6", "Allocate 5 7", "Allocate 5 8", "Allocate 5 9", "Allocate 6 0", "Allocate 6 1", "Allocate 6 2", "Allocate 6 3", "Allocate 6 4", "Allocate 6 5", "Allocate 6 6", "Allocate 6 7", "Allocate 6 8", "Allocate 6 9", "Allocate 7 0", "Allocate 7 1", "Allocate 7 2", "Allocate 7 3", "Allocate 7 4", "Allocate 7 5", "Allocate 7 6", "Allocate 7 7", "Allocate 7 8", "Allocate 7 9", "Allocate 8 0", "Allocate 8 1", "Allocate 8 2", "Allocate 8 3", "Allocate 8 4", "Allocate 8 5", "Allocate 8 6", "Allocate 8 7", "Allocate 8 8", "Allocate 8 9", "Allocate 9 0", "Allocate 9 1", "Allocate 9 2", "Allocate 9 3", "Allocate 9 4", "Allocate 9 5", "Allocate 9 6", "Allocate 9 7", "Allocate 9 8", "Allocate 9 9"] + +# Apply action "Allocate 3 6" +action: 36 + +# State 1 +# Items: 3 6 +# Phase: comm +# Comm history: +# Trade history: +IsTerminal() = False +History() = [36] +HistoryString() = "36" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Current turn: 0\nMy item: 3\nPhase: comm\nComm history: \nTrade history size: 0\n" +InformationStateString(1) = "Current turn: 0\nMy item: 6\nPhase: comm\nComm history: \nTrade history size: 0\n" +InformationStateTensor(0): ◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "Current turn: 0\nMy item: 3\nPhase: comm\nComm history: \nTrade history size: 0\n" +ObservationString(1) = "Current turn: 0\nMy item: 6\nPhase: comm\nComm history: \nTrade history size: 0\n" +ObservationTensor(0): ◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +StringLegalActions() = ["Utter 0", "Utter 1", "Utter 2", "Utter 3", "Utter 4", "Utter 5", "Utter 6", "Utter 7", "Utter 8", "Utter 9"] + +# Apply action "Utter 5" +action: 5 + +# State 2 +# Items: 3 6 +# Phase: comm +# Comm history: 5 +# Trade history: +IsTerminal() = False +History() = [36, 5] +HistoryString() = "36, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Current turn: 1\nMy item: 3\nPhase: comm\nComm history: 5\nTrade history size: 0\n" +InformationStateString(1) = "Current turn: 1\nMy item: 6\nPhase: comm\nComm history: 5\nTrade history size: 0\n" +InformationStateTensor(0): ◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "Current turn: 1\nMy item: 3\nPhase: comm\nComm history: 5\nTrade history size: 0\n" +ObservationString(1) = "Current turn: 1\nMy item: 6\nPhase: comm\nComm history: 5\nTrade history size: 0\n" +ObservationTensor(0): ◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +StringLegalActions() = ["Utter 0", "Utter 1", "Utter 2", "Utter 3", "Utter 4", "Utter 5", "Utter 6", "Utter 7", "Utter 8", "Utter 9"] + +# Apply action "Utter 9" +action: 9 + +# State 3 +# Items: 3 6 +# Phase: trade +# Comm history: 5 9 +# Trade history: +IsTerminal() = False +History() = [36, 5, 9] +HistoryString() = "36, 5, 9" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Current turn: 0\nMy item: 3\nPhase: trade\nComm history: 5 9\nTrade history size: 0\n" +InformationStateString(1) = "Current turn: 0\nMy item: 6\nPhase: trade\nComm history: 5 9\nTrade history size: 0\n" +InformationStateTensor(0): ◉◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "Current turn: 0\nMy item: 3\nPhase: trade\nComm history: 5 9\nTrade history size: 0\n" +ObservationString(1) = "Current turn: 0\nMy item: 6\nPhase: trade\nComm history: 5 9\nTrade history size: 0\n" +ObservationTensor(0): ◉◯◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): ◉◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109] +StringLegalActions() = ["Trade 0:0", "Trade 0:1", "Trade 0:2", "Trade 0:3", "Trade 0:4", "Trade 0:5", "Trade 0:6", "Trade 0:7", "Trade 0:8", "Trade 0:9", "Trade 1:0", "Trade 1:1", "Trade 1:2", "Trade 1:3", "Trade 1:4", "Trade 1:5", "Trade 1:6", "Trade 1:7", "Trade 1:8", "Trade 1:9", "Trade 2:0", "Trade 2:1", "Trade 2:2", "Trade 2:3", "Trade 2:4", "Trade 2:5", "Trade 2:6", "Trade 2:7", "Trade 2:8", "Trade 2:9", "Trade 3:0", "Trade 3:1", "Trade 3:2", "Trade 3:3", "Trade 3:4", "Trade 3:5", "Trade 3:6", "Trade 3:7", "Trade 3:8", "Trade 3:9", "Trade 4:0", "Trade 4:1", "Trade 4:2", "Trade 4:3", "Trade 4:4", "Trade 4:5", "Trade 4:6", "Trade 4:7", "Trade 4:8", "Trade 4:9", "Trade 5:0", "Trade 5:1", "Trade 5:2", "Trade 5:3", "Trade 5:4", "Trade 5:5", "Trade 5:6", "Trade 5:7", "Trade 5:8", "Trade 5:9", "Trade 6:0", "Trade 6:1", "Trade 6:2", "Trade 6:3", "Trade 6:4", "Trade 6:5", "Trade 6:6", "Trade 6:7", "Trade 6:8", "Trade 6:9", "Trade 7:0", "Trade 7:1", "Trade 7:2", "Trade 7:3", "Trade 7:4", "Trade 7:5", "Trade 7:6", "Trade 7:7", "Trade 7:8", "Trade 7:9", "Trade 8:0", "Trade 8:1", "Trade 8:2", "Trade 8:3", "Trade 8:4", "Trade 8:5", "Trade 8:6", "Trade 8:7", "Trade 8:8", "Trade 8:9", "Trade 9:0", "Trade 9:1", "Trade 9:2", "Trade 9:3", "Trade 9:4", "Trade 9:5", "Trade 9:6", "Trade 9:7", "Trade 9:8", "Trade 9:9"] + +# Apply action "Trade 8:5" +action: 95 + +# State 4 +# Items: 3 6 +# Phase: trade +# Comm history: 5 9 +# Trade history: 8:5 +IsTerminal() = False +History() = [36, 5, 9, 95] +HistoryString() = "36, 5, 9, 95" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Current turn: 1\nMy item: 3\nPhase: trade\nComm history: 5 9\nTrade history size: 1\nObserver's trade offer: 8:5\n" +InformationStateString(1) = "Current turn: 1\nMy item: 6\nPhase: trade\nComm history: 5 9\nTrade history size: 1\n" +InformationStateTensor(0): ◯◉◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯ +InformationStateTensor(1): ◯◉◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "Current turn: 1\nMy item: 3\nPhase: trade\nComm history: 5 9\nTrade history size: 1\nObserver's trade offer: 8:5\n" +ObservationString(1) = "Current turn: 1\nMy item: 6\nPhase: trade\nComm history: 5 9\nTrade history size: 1\n" +ObservationTensor(0): ◯◉◯◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◯◉◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109] +StringLegalActions() = ["Trade 0:0", "Trade 0:1", "Trade 0:2", "Trade 0:3", "Trade 0:4", "Trade 0:5", "Trade 0:6", "Trade 0:7", "Trade 0:8", "Trade 0:9", "Trade 1:0", "Trade 1:1", "Trade 1:2", "Trade 1:3", "Trade 1:4", "Trade 1:5", "Trade 1:6", "Trade 1:7", "Trade 1:8", "Trade 1:9", "Trade 2:0", "Trade 2:1", "Trade 2:2", "Trade 2:3", "Trade 2:4", "Trade 2:5", "Trade 2:6", "Trade 2:7", "Trade 2:8", "Trade 2:9", "Trade 3:0", "Trade 3:1", "Trade 3:2", "Trade 3:3", "Trade 3:4", "Trade 3:5", "Trade 3:6", "Trade 3:7", "Trade 3:8", "Trade 3:9", "Trade 4:0", "Trade 4:1", "Trade 4:2", "Trade 4:3", "Trade 4:4", "Trade 4:5", "Trade 4:6", "Trade 4:7", "Trade 4:8", "Trade 4:9", "Trade 5:0", "Trade 5:1", "Trade 5:2", "Trade 5:3", "Trade 5:4", "Trade 5:5", "Trade 5:6", "Trade 5:7", "Trade 5:8", "Trade 5:9", "Trade 6:0", "Trade 6:1", "Trade 6:2", "Trade 6:3", "Trade 6:4", "Trade 6:5", "Trade 6:6", "Trade 6:7", "Trade 6:8", "Trade 6:9", "Trade 7:0", "Trade 7:1", "Trade 7:2", "Trade 7:3", "Trade 7:4", "Trade 7:5", "Trade 7:6", "Trade 7:7", "Trade 7:8", "Trade 7:9", "Trade 8:0", "Trade 8:1", "Trade 8:2", "Trade 8:3", "Trade 8:4", "Trade 8:5", "Trade 8:6", "Trade 8:7", "Trade 8:8", "Trade 8:9", "Trade 9:0", "Trade 9:1", "Trade 9:2", "Trade 9:3", "Trade 9:4", "Trade 9:5", "Trade 9:6", "Trade 9:7", "Trade 9:8", "Trade 9:9"] + +# Apply action "Trade 0:0" +action: 10 + +# State 5 +# Items: 3 6 +# Phase: trade +# Comm history: 5 9 +# Trade history: 8:5 0:0 +IsTerminal() = True +History() = [36, 5, 9, 95, 10] +HistoryString() = "36, 5, 9, 95, 10" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Current turn: 0\nMy item: 3\nPhase: trade\nComm history: 5 9\nTrade history size: 2\nObserver's trade offer: 8:5\nOther players's trade offer: 0:0\n" +InformationStateString(1) = "Current turn: 0\nMy item: 6\nPhase: trade\nComm history: 5 9\nTrade history size: 2\nObserver's trade offer: 0:0\nOther players's trade offer: 8:5\n" +InformationStateTensor(0): ◉◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯ +InformationStateTensor(1): ◉◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "Current turn: 0\nMy item: 3\nPhase: trade\nComm history: 5 9\nTrade history size: 2\nObserver's trade offer: 8:5\nOther players's trade offer: 0:0\n" +ObservationString(1) = "Current turn: 0\nMy item: 6\nPhase: trade\nComm history: 5 9\nTrade history size: 2\nObserver's trade offer: 0:0\nOther players's trade offer: 8:5\n" +ObservationTensor(0): ◉◯◉◉◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯ +ObservationTensor(1): ◉◯◉◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/twixt.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/twixt.txt new file mode 100644 index 0000000..2f1ec95 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/twixt.txt @@ -0,0 +1,702 @@ +game: twixt + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "TwixT" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["ansi_color_output", "board_size"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "twixt" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 64 +PolicyTensorShape() = [64] +MaxChanceOutcomes() = 0 +GetParameters() = {ansi_color_output=True,board_size=8} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [12, 8, 6] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 576 +MaxGameLength() = 61 +ToString() = "twixt()" + +# State 0 +# a b c d e f g h  +# +# 1  . . . . . . +# +# +# 2  . . . . . . . . +# +# +# 3  . . . . . . . . +# +# +# 4  . . . . . . . . +# +# +# 5  . . . . . . . . +# +# +# 6  . . . . . . . . +# +# +# 7  . . . . . . . . +# +# +# 8  . . . . . . +# +# +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  . . . . . . . . \n \n \n 3  . . . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . . . . . \n \n \n 8  . . . . . . \n \n\n" +InformationStateString(1) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  . . . . . . . . \n \n \n 3  . . . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . . . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationString(0) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  . . . . . . . . \n \n \n 3  . . . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . . . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationString(1) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  . . . . . . . . \n \n \n 3  . . . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . . . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationTensor(0): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55] +StringLegalActions() = ["xb8", "xb7", "xb6", "xb5", "xb4", "xb3", "xb2", "xb1", "xc8", "xc7", "xc6", "xc5", "xc4", "xc3", "xc2", "xc1", "xd8", "xd7", "xd6", "xd5", "xd4", "xd3", "xd2", "xd1", "xe8", "xe7", "xe6", "xe5", "xe4", "xe3", "xe2", "xe1", "xf8", "xf7", "xf6", "xf5", "xf4", "xf3", "xf2", "xf1", "xg8", "xg7", "xg6", "xg5", "xg4", "xg3", "xg2", "xg1"] + +# Apply action "xb2" +action: 14 + +# State 1 +# a b c d e f g h  +# +# 1  . . . . . . +# +# +# 2  . x . . . . . . +# +# +# 3  . . . . . . . . +# +# +# 4  . . . . . . . . +# +# +# 5  . . . . . . . . +# +# +# 6  . . . . . . . . +# +# +# 7  . . . . . . . . +# +# +# 8  . . . . . . +# +# +IsTerminal() = False +History() = [14] +HistoryString() = "14" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  . x . . . . . . \n \n \n 3  . . . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . . . . . \n \n \n 8  . . . . . . \n \n\n" +InformationStateString(1) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  . x . . . . . . \n \n \n 3  . . . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . . . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationString(0) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  . x . . . . . . \n \n \n 3  . . . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . . . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationString(1) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  . x . . . . . . \n \n \n 3  . . . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . . . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationTensor(0): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 21, 22, 25, 26, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 41, 42, 43, 44, 45, 46, 49, 50, 51, 52, 53, 54, 57, 58, 59, 60, 61, 62] +StringLegalActions() = ["oa7", "oa6", "oa5", "oa4", "oa3", "oa2", "ob7", "ob6", "ob5", "ob4", "ob3", "ob2", "oc7", "oc6", "oc5", "oc4", "oc3", "oc2", "od7", "od6", "od5", "od4", "od3", "od2", "oe7", "oe6", "oe5", "oe4", "oe3", "oe2", "of7", "of6", "of5", "of4", "of3", "of2", "og7", "og6", "og5", "og4", "og3", "og2", "oh7", "oh6", "oh5", "oh4", "oh3", "oh2"] + +# Apply action "ob3" +action: 13 + +# State 2 +# a b c d e f g h  +# +# 1  . . . . . . +# +# +# 2  . x . . . . . . +# +# +# 3  . o . . . . . . +# +# +# 4  . . . . . . . . +# +# +# 5  . . . . . . . . +# +# +# 6  . . . . . . . . +# +# +# 7  . . . . . . . . +# +# +# 8  . . . . . . +# +# +IsTerminal() = False +History() = [14, 13] +HistoryString() = "14, 13" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  . x . . . . . . \n \n \n 3  . o . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . . . . . \n \n \n 8  . . . . . . \n \n\n" +InformationStateString(1) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  . x . . . . . . \n \n \n 3  . o . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . . . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationString(0) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  . x . . . . . . \n \n \n 3  . o . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . . . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationString(1) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  . x . . . . . . \n \n \n 3  . o . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . . . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationTensor(0): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [8, 9, 10, 11, 12, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55] +StringLegalActions() = ["xb8", "xb7", "xb6", "xb5", "xb4", "xb1", "xc8", "xc7", "xc6", "xc5", "xc4", "xc3", "xc2", "xc1", "xd8", "xd7", "xd6", "xd5", "xd4", "xd3", "xd2", "xd1", "xe8", "xe7", "xe6", "xe5", "xe4", "xe3", "xe2", "xe1", "xf8", "xf7", "xf6", "xf5", "xf4", "xf3", "xf2", "xf1", "xg8", "xg7", "xg6", "xg5", "xg4", "xg3", "xg2", "xg1"] + +# Apply action "xe7" +action: 33 + +# State 3 +# a b c d e f g h  +# +# 1  . . . . . . +# +# +# 2  . x . . . . . . +# +# +# 3  . o . . . . . . +# +# +# 4  . . . . . . . . +# +# +# 5  . . . . . . . . +# +# +# 6  . . . . . . . . +# +# +# 7  . . . . x . . . +# +# +# 8  . . . . . . +# +# +IsTerminal() = False +History() = [14, 13, 33] +HistoryString() = "14, 13, 33" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  . x . . . . . . \n \n \n 3  . o . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . x . . . \n \n \n 8  . . . . . . \n \n\n" +InformationStateString(1) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  . x . . . . . . \n \n \n 3  . o . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . x . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationString(0) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  . x . . . . . . \n \n \n 3  . o . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . x . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationString(1) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  . x . . . . . . \n \n \n 3  . o . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . x . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationTensor(0): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 17, 18, 19, 20, 21, 22, 25, 26, 27, 28, 29, 30, 34, 35, 36, 37, 38, 41, 42, 43, 44, 45, 46, 49, 50, 51, 52, 53, 54, 57, 58, 59, 60, 61, 62] +StringLegalActions() = ["oa7", "oa6", "oa5", "oa4", "oa3", "oa2", "ob7", "ob6", "ob5", "ob4", "oc7", "oc6", "oc5", "oc4", "oc3", "oc2", "od7", "od6", "od5", "od4", "od3", "od2", "oe6", "oe5", "oe4", "oe3", "oe2", "of7", "of6", "of5", "of4", "of3", "of2", "og7", "og6", "og5", "og4", "og3", "og2", "oh7", "oh6", "oh5", "oh4", "oh3", "oh2"] + +# Apply action "oa2" +action: 6 + +# State 4 +# a b c d e f g h  +# +# 1  . . . . . . +# +# +# 2  o x . . . . . . +# +# +# 3  . o . . . . . . +# +# +# 4  . . . . . . . . +# +# +# 5  . . . . . . . . +# +# +# 6  . . . . . . . . +# +# +# 7  . . . . x . . . +# +# +# 8  . . . . . . +# +# +IsTerminal() = False +History() = [14, 13, 33, 6] +HistoryString() = "14, 13, 33, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  o x . . . . . . \n \n \n 3  . o . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . x . . . \n \n \n 8  . . . . . . \n \n\n" +InformationStateString(1) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  o x . . . . . . \n \n \n 3  . o . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . x . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationString(0) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  o x . . . . . . \n \n \n 3  . o . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . x . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationString(1) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  o x . . . . . . \n \n \n 3  . o . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . . . . . . \n \n \n 7  . . . . x . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationTensor(0): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [8, 9, 10, 11, 12, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55] +StringLegalActions() = ["xb8", "xb7", "xb6", "xb5", "xb4", "xb1", "xc8", "xc7", "xc6", "xc5", "xc4", "xc3", "xc2", "xc1", "xd8", "xd7", "xd6", "xd5", "xd4", "xd3", "xd2", "xd1", "xe8", "xe6", "xe5", "xe4", "xe3", "xe2", "xe1", "xf8", "xf7", "xf6", "xf5", "xf4", "xf3", "xf2", "xf1", "xg8", "xg7", "xg6", "xg5", "xg4", "xg3", "xg2", "xg1"] + +# Apply action "xd6" +action: 26 + +# State 5 +# a b c d e f g h  +# +# 1  . . . . . . +# +# +# 2  o x . . . . . . +# +# +# 3  . o . . . . . . +# +# +# 4  . . . . . . . . +# +# +# 5  . . . . . . . . +# +# +# 6  . . . x . . . . +# +# +# 7  . . . . x . . . +# +# +# 8  . . . . . . +# +# +IsTerminal() = False +History() = [14, 13, 33, 6, 26] +HistoryString() = "14, 13, 33, 6, 26" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  o x . . . . . . \n \n \n 3  . o . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . x . . . . \n \n \n 7  . . . . x . . . \n \n \n 8  . . . . . . \n \n\n" +InformationStateString(1) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  o x . . . . . . \n \n \n 3  . o . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . x . . . . \n \n \n 7  . . . . x . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationString(0) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  o x . . . . . . \n \n \n 3  . o . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . x . . . . \n \n \n 7  . . . . x . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationString(1) = " a b c d e f g h \n \n 1  . . . . . . \n \n \n 2  o x . . . . . . \n \n \n 3  . o . . . . . . \n \n \n 4  . . . . . . . . \n \n \n 5  . . . . . . . . \n \n \n 6  . . . x . . . . \n \n \n 7  . . . . x . . . \n \n \n 8  . . . . . . \n \n\n" +ObservationTensor(0): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◉◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◉◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 3, 4, 5, 9, 10, 11, 12, 17, 18, 19, 20, 21, 22, 25, 27, 28, 29, 30, 34, 35, 36, 37, 38, 41, 42, 43, 44, 45, 46, 49, 50, 51, 52, 53, 54, 57, 58, 59, 60, 61, 62] +StringLegalActions() = ["oa7", "oa6", "oa5", "oa4", "oa3", "ob7", "ob6", "ob5", "ob4", "oc7", "oc6", "oc5", "oc4", "oc3", "oc2", "od7", "od5", "od4", "od3", "od2", "oe6", "oe5", "oe4", "oe3", "oe2", "of7", "of6", "of5", "of4", "of3", "of2", "og7", "og6", "og5", "og4", "og3", "og2", "oh7", "oh6", "oh5", "oh4", "oh3", "oh2"] + +# Apply action "oh7" +action: 57 + +# State 6 +# Apply action "xc3" +action: 21 + +# State 7 +# Apply action "og7" +action: 49 + +# State 8 +# Apply action "xc5" +action: 19 + +# State 9 +# Apply action "of5" +action: 43 + +# State 10 +# Apply action "xe8" +action: 32 + +# State 11 +# Apply action "of3" +action: 45 + +# State 12 +# Apply action "xg2" +action: 54 + +# State 13 +# Apply action "oh6" +action: 58 + +# State 14 +# Apply action "xb1" +action: 15 + +# State 15 +# Apply action "of2" +action: 46 + +# State 16 +# Apply action "xc4" +action: 20 + +# State 17 +# Apply action "oe2" +action: 38 + +# State 18 +# Apply action "xf4" +action: 44 + +# State 19 +# Apply action "oc2" +action: 22 + +# State 20 +# a b c d e f g h  +# +# 1  x . . . . . +# | +# \ +# 2  o x| o . o o x . +# | \ | +# \ | / +# 3  . o| x . . o |. . +# \ / +# | | +# 4  . . x . . x . . +# | +# \ +# 5  . . x| . . o_ . . +# \ | \_ +# | \ \_ +# 6  . . . x . .| . o +# | \ +# \ | +# 7  . . . .| x . o o +# \ +# | +# 8  . . . x . . +# +# +IsTerminal() = False +History() = [14, 13, 33, 6, 26, 57, 21, 49, 19, 43, 32, 45, 54, 58, 15, 46, 20, 38, 44, 22] +HistoryString() = "14, 13, 33, 6, 26, 57, 21, 49, 19, 43, 32, 45, 54, 58, 15, 46, 20, 38, 44, 22" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = " a b c d e f g h \n \n 1  x . . . . . \n | \n \\ \n 2  o x| o . o o x . \n | \\ | \n \\ | / \n 3  . o| x . . o |. . \n \\ / \n | | \n 4  . . x . . x . . \n | \n \\ \n 5  . . x| . . o_ . . \n \\ | \\_ \n | \\ \\_ \n 6  . . . x . .| . o \n | \\ \n \\ | \n 7  . . . .| x . o o \n \\ \n | \n 8  . . . x . . \n \n\n" +InformationStateString(1) = " a b c d e f g h \n \n 1  x . . . . . \n | \n \\ \n 2  o x| o . o o x . \n | \\ | \n \\ | / \n 3  . o| x . . o |. . \n \\ / \n | | \n 4  . . x . . x . . \n | \n \\ \n 5  . . x| . . o_ . . \n \\ | \\_ \n | \\ \\_ \n 6  . . . x . .| . o \n | \\ \n \\ | \n 7  . . . .| x . o o \n \\ \n | \n 8  . . . x . . \n \n\n" +ObservationString(0) = " a b c d e f g h \n \n 1  x . . . . . \n | \n \\ \n 2  o x| o . o o x . \n | \\ | \n \\ | / \n 3  . o| x . . o |. . \n \\ / \n | | \n 4  . . x . . x . . \n | \n \\ \n 5  . . x| . . o_ . . \n \\ | \\_ \n | \\ \\_ \n 6  . . . x . .| . o \n | \\ \n \\ | \n 7  . . . .| x . o o \n \\ \n | \n 8  . . . x . . \n \n\n" +ObservationString(1) = " a b c d e f g h \n \n 1  x . . . . . \n | \n \\ \n 2  o x| o . o o x . \n | \\ | \n \\ | / \n 3  . o| x . . o |. . \n \\ / \n | | \n 4  . . x . . x . . \n | \n \\ \n 5  . . x| . . o_ . . \n \\ | \\_ \n | \\ \\_ \n 6  . . . x . .| . o \n | \\ \n \\ | \n 7  . . . .| x . o o \n \\ \n | \n 8  . . . x . . \n \n\n" +ObservationTensor(0): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◉◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◉◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◉◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◉◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [8, 9, 10, 11, 12, 16, 17, 18, 23, 24, 25, 27, 28, 29, 30, 31, 34, 35, 36, 37, 39, 40, 41, 42, 47, 48, 50, 51, 52, 53, 55] +StringLegalActions() = ["xb8", "xb7", "xb6", "xb5", "xb4", "xc8", "xc7", "xc6", "xc1", "xd8", "xd7", "xd5", "xd4", "xd3", "xd2", "xd1", "xe6", "xe5", "xe4", "xe3", "xe1", "xf8", "xf7", "xf6", "xf1", "xg8", "xg6", "xg5", "xg4", "xg3", "xg1"] + +# Apply action "xe6" +action: 34 + +# State 21 +# a b c d e f g h  +# +# 1  x . . . . . +# | +# \ +# 2  o x| o . o o x . +# | \ | +# \ | / +# 3  . o| x . . o |. . +# \ / +# | | +# 4  . . x . . x . . +# | | +# \ / +# 5  . . x| . . |o_ . . +# \ / | \_ +# | | \ \_ +# 6  . . . x x .| . o +# | \ +# \ | +# 7  . . . .| x . o o +# \ +# | +# 8  . . . x . . +# +# +IsTerminal() = False +History() = [14, 13, 33, 6, 26, 57, 21, 49, 19, 43, 32, 45, 54, 58, 15, 46, 20, 38, 44, 22, 34] +HistoryString() = "14, 13, 33, 6, 26, 57, 21, 49, 19, 43, 32, 45, 54, 58, 15, 46, 20, 38, 44, 22, 34" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = " a b c d e f g h \n \n 1  x . . . . . \n | \n \\ \n 2  o x| o . o o x . \n | \\ | \n \\ | / \n 3  . o| x . . o |. . \n \\ / \n | | \n 4  . . x . . x . . \n | | \n \\ / \n 5  . . x| . . |o_ . . \n \\ / | \\_ \n | | \\ \\_ \n 6  . . . x x .| . o \n | \\ \n \\ | \n 7  . . . .| x . o o \n \\ \n | \n 8  . . . x . . \n \n\n" +InformationStateString(1) = " a b c d e f g h \n \n 1  x . . . . . \n | \n \\ \n 2  o x| o . o o x . \n | \\ | \n \\ | / \n 3  . o| x . . o |. . \n \\ / \n | | \n 4  . . x . . x . . \n | | \n \\ / \n 5  . . x| . . |o_ . . \n \\ / | \\_ \n | | \\ \\_ \n 6  . . . x x .| . o \n | \\ \n \\ | \n 7  . . . .| x . o o \n \\ \n | \n 8  . . . x . . \n \n\n" +ObservationString(0) = " a b c d e f g h \n \n 1  x . . . . . \n | \n \\ \n 2  o x| o . o o x . \n | \\ | \n \\ | / \n 3  . o| x . . o |. . \n \\ / \n | | \n 4  . . x . . x . . \n | | \n \\ / \n 5  . . x| . . |o_ . . \n \\ / | \\_ \n | | \\ \\_ \n 6  . . . x x .| . o \n | \\ \n \\ | \n 7  . . . .| x . o o \n \\ \n | \n 8  . . . x . . \n \n\n" +ObservationString(1) = " a b c d e f g h \n \n 1  x . . . . . \n | \n \\ \n 2  o x| o . o o x . \n | \\ | \n \\ | / \n 3  . o| x . . o |. . \n \\ / \n | | \n 4  . . x . . x . . \n | | \n \\ / \n 5  . . x| . . |o_ . . \n \\ / | \\_ \n | | \\ \\_ \n 6  . . . x x .| . o \n | \\ \n \\ | \n 7  . . . .| x . o o \n \\ \n | \n 8  . . . x . . \n \n\n" +ObservationTensor(0): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◉◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◉◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◉◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◉◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◉◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 3, 4, 5, 9, 10, 11, 12, 17, 18, 25, 27, 28, 29, 30, 35, 36, 37, 41, 42, 50, 51, 52, 53, 59, 60, 61, 62] +StringLegalActions() = ["oa7", "oa6", "oa5", "oa4", "oa3", "ob7", "ob6", "ob5", "ob4", "oc7", "oc6", "od7", "od5", "od4", "od3", "od2", "oe5", "oe4", "oe3", "of7", "of6", "og6", "og5", "og4", "og3", "oh5", "oh4", "oh3", "oh2"] + +# Apply action "oa5" +action: 3 + +# State 22 +# Apply action "xd7" +action: 25 + +# State 23 +# Apply action "ob7" +action: 9 + +# State 24 +# Apply action "xb5" +action: 11 + +# State 25 +# Apply action "og3" +action: 53 + +# State 26 +# Apply action "xc7" +action: 17 + +# State 27 +# Apply action "oh3" +action: 61 + +# State 28 +# Apply action "xe3" +action: 37 + +# State 29 +# Apply action "oa7" +action: 1 + +# State 30 +# Apply action "xf7" +action: 41 + +# State 31 +# Apply action "og4" +action: 52 + +# State 32 +# Apply action "xg1" +action: 55 + +# State 33 +# Apply action "od5" +action: 27 + +# State 34 +# Apply action "xd2" +action: 30 + +# State 35 +# a b c d e f g h  +# +# 1  x_ . . . . x +# | \_ +# \ \_ +# 2  o x| o x o o _x . +# | \ | _/ | +# \ | / _/ / +# 3  . o| x |. _x o |o o +# | \ / _/ / | +# / |_/ | / +# 4  . |. x . . x |o . +# / | |/ | +# | \ /| \ +# 5  o x x| o . |o_ .| . +# | | | \ / | \_ \ +# \ \ \ | | \ \_| +# 6  .| .| .| x_ x .| . o +# \ \ \| \_ \ +# | | |\ \_ | +# 7  o o x_ x| x x o o +# \_ \ +# \_| +# 8  . . . x . . +# +# +# [x has won] +IsTerminal() = True +History() = [14, 13, 33, 6, 26, 57, 21, 49, 19, 43, 32, 45, 54, 58, 15, 46, 20, 38, 44, 22, 34, 3, 25, 9, 11, 53, 17, 61, 37, 1, 41, 52, 55, 27, 30] +HistoryString() = "14, 13, 33, 6, 26, 57, 21, 49, 19, 43, 32, 45, 54, 58, 15, 46, 20, 38, 44, 22, 34, 3, 25, 9, 11, 53, 17, 61, 37, 1, 41, 52, 55, 27, 30" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = " a b c d e f g h \n \n 1  x_ . . . . x \n | \\_ \n \\ \\_ \n 2  o x| o x o o _x . \n | \\ | _/ | \n \\ | / _/ / \n 3  . o| x |. _x o |o o \n | \\ / _/ / | \n / |_/ | / \n 4  . |. x . . x |o . \n / | |/ | \n | \\ /| \\ \n 5  o x x| o . |o_ .| . \n | | | \\ / | \\_ \\ \n \\ \\ \\ | | \\ \\_| \n 6  .| .| .| x_ x .| . o \n \\ \\ \\| \\_ \\ \n | | |\\ \\_ | \n 7  o o x_ x| x x o o \n \\_ \\ \n \\_| \n 8  . . . x . . \n \n\n[x has won]" +InformationStateString(1) = " a b c d e f g h \n \n 1  x_ . . . . x \n | \\_ \n \\ \\_ \n 2  o x| o x o o _x . \n | \\ | _/ | \n \\ | / _/ / \n 3  . o| x |. _x o |o o \n | \\ / _/ / | \n / |_/ | / \n 4  . |. x . . x |o . \n / | |/ | \n | \\ /| \\ \n 5  o x x| o . |o_ .| . \n | | | \\ / | \\_ \\ \n \\ \\ \\ | | \\ \\_| \n 6  .| .| .| x_ x .| . o \n \\ \\ \\| \\_ \\ \n | | |\\ \\_ | \n 7  o o x_ x| x x o o \n \\_ \\ \n \\_| \n 8  . . . x . . \n \n\n[x has won]" +ObservationString(0) = " a b c d e f g h \n \n 1  x_ . . . . x \n | \\_ \n \\ \\_ \n 2  o x| o x o o _x . \n | \\ | _/ | \n \\ | / _/ / \n 3  . o| x |. _x o |o o \n | \\ / _/ / | \n / |_/ | / \n 4  . |. x . . x |o . \n / | |/ | \n | \\ /| \\ \n 5  o x x| o . |o_ .| . \n | | | \\ / | \\_ \\ \n \\ \\ \\ | | \\ \\_| \n 6  .| .| .| x_ x .| . o \n \\ \\ \\| \\_ \\ \n | | |\\ \\_ | \n 7  o o x_ x| x x o o \n \\_ \\ \n \\_| \n 8  . . . x . . \n \n\n[x has won]" +ObservationString(1) = " a b c d e f g h \n \n 1  x_ . . . . x \n | \\_ \n \\ \\_ \n 2  o x| o x o o _x . \n | \\ | _/ | \n \\ | / _/ / \n 3  . o| x |. _x o |o o \n | \\ / _/ / | \n / |_/ | / \n 4  . |. x . . x |o . \n / | |/ | \n | \\ /| \\ \n 5  o x x| o . |o_ .| . \n | | | \\ / | \\_ \\ \n \\ \\ \\ | | \\ \\_| \n 6  .| .| .| x_ x .| . o \n \\ \\ \\| \\_ \\ \n | | |\\ \\_ | \n 7  o o x_ x| x x o o \n \\_ \\ \n \\_| \n 8  . . . x . . \n \n\n[x has won]" +ObservationTensor(0): +◯◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◉◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◉◯◯ ◉◯◯◯◯◯ +◯◯◯◯◯◯ ◯◉◯◯◉◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◯◯◯◯ ◉◉◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◉◯◯◯ ◯◯◉◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◉ ◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◉ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◉◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◉◯◯ ◉◯◯◯◯◯ +◯◯◯◯◯◯ ◯◉◯◯◉◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◉◯◯◯◯ ◉◉◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◉◯◯◯ ◯◯◉◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◉◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ +◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◉◯◯◯◯◉ ◯◯◯◉◯◯ ◯◯◯◯◯◯ ◯◯◯◯◯◯ ◯◯◯◉◯◯ ◯◯◯◯◯◯ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/ultimate_tic_tac_toe.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/ultimate_tic_tac_toe.txt new file mode 100644 index 0000000..a09f6ec --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/ultimate_tic_tac_toe.txt @@ -0,0 +1,678 @@ +game: ultimate_tic_tac_toe + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Ultimate Tic-Tac-Toe" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = [] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "ultimate_tic_tac_toe" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 9 +PolicyTensorShape() = [9] +MaxChanceOutcomes() = 0 +GetParameters() = {} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [3, 9, 3, 3] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 243 +MaxGameLength() = 162 +ToString() = "ultimate_tic_tac_toe()" + +# State 0 +# ... ... ... +# ... ... ... +# ... ... ... +# +# ... ... ... +# ... ... ... +# ... ... ... +# +# ... ... ... +# ... ... ... +# ... ... ... +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = "... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\n... ... ...\n" +ObservationString(1) = "... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\n... ... ...\n" +ObservationTensor(0) = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["Choose local board 0", "Choose local board 1", "Choose local board 2", "Choose local board 3", "Choose local board 4", "Choose local board 5", "Choose local board 6", "Choose local board 7", "Choose local board 8"] + +# Apply action "Choose local board 7" +action: 7 + +# State 1 +# ... ... ... +# ... ... ... +# ... ... ... +# +# ... ... ... +# ... ... ... +# ... ... ... +# +# ... ... ... +# ... ... ... +# ... ... ... +IsTerminal() = False +History() = [7] +HistoryString() = "7" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "7" +InformationStateString(1) = "7" +ObservationString(0) = "... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\n... ... ...\n" +ObservationString(1) = "... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\n... ... ...\n" +ObservationTensor(0) = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["Local board 7: x(0,0)", "Local board 7: x(0,1)", "Local board 7: x(0,2)", "Local board 7: x(1,0)", "Local board 7: x(1,1)", "Local board 7: x(1,2)", "Local board 7: x(2,0)", "Local board 7: x(2,1)", "Local board 7: x(2,2)"] + +# Apply action "Local board 7: x(2,0)" +action: 6 + +# State 2 +# ... ... ... +# ... ... ... +# ... ... ... +# +# ... ... ... +# ... ... ... +# ... ... ... +# +# ... ... ... +# ... ... ... +# ... x.. ... +IsTerminal() = False +History() = [7, 6] +HistoryString() = "7, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "7, 6" +InformationStateString(1) = "7, 6" +ObservationString(0) = "... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\n... x.. ...\n" +ObservationString(1) = "... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\n... x.. ...\n" +ObservationTensor(0) = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["Local board 6: o(0,0)", "Local board 6: o(0,1)", "Local board 6: o(0,2)", "Local board 6: o(1,0)", "Local board 6: o(1,1)", "Local board 6: o(1,2)", "Local board 6: o(2,0)", "Local board 6: o(2,1)", "Local board 6: o(2,2)"] + +# Apply action "Local board 6: o(1,0)" +action: 3 + +# State 3 +# ... ... ... +# ... ... ... +# ... ... ... +# +# ... ... ... +# ... ... ... +# ... ... ... +# +# ... ... ... +# o.. ... ... +# ... x.. ... +IsTerminal() = False +History() = [7, 6, 3] +HistoryString() = "7, 6, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "7, 6, 3" +InformationStateString(1) = "7, 6, 3" +ObservationString(0) = "... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\n... ... ...\n\n... ... ...\no.. ... ...\n... x.. ...\n" +ObservationString(1) = "... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\n... ... ...\n\n... ... ...\no.. ... ...\n... x.. ...\n" +ObservationTensor(0) = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] +StringLegalActions() = ["Local board 3: x(0,0)", "Local board 3: x(0,1)", "Local board 3: x(0,2)", "Local board 3: x(1,0)", "Local board 3: x(1,1)", "Local board 3: x(1,2)", "Local board 3: x(2,0)", "Local board 3: x(2,1)", "Local board 3: x(2,2)"] + +# Apply action "Local board 3: x(2,0)" +action: 6 + +# State 4 +# ... ... ... +# ... ... ... +# ... ... ... +# +# ... ... ... +# ... ... ... +# x.. ... ... +# +# ... ... ... +# o.. ... ... +# ... x.. ... +IsTerminal() = False +History() = [7, 6, 3, 6] +HistoryString() = "7, 6, 3, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "7, 6, 3, 6" +InformationStateString(1) = "7, 6, 3, 6" +ObservationString(0) = "... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\nx.. ... ...\n\n... ... ...\no.. ... ...\n... x.. ...\n" +ObservationString(1) = "... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\nx.. ... ...\n\n... ... ...\no.. ... ...\n... x.. ...\n" +ObservationTensor(0) = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationTensor(1) = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 4, 5, 6, 7, 8] +StringLegalActions() = ["Local board 6: o(0,0)", "Local board 6: o(0,1)", "Local board 6: o(0,2)", "Local board 6: o(1,1)", "Local board 6: o(1,2)", "Local board 6: o(2,0)", "Local board 6: o(2,1)", "Local board 6: o(2,2)"] + +# Apply action "Local board 6: o(2,2)" +action: 8 + +# State 5 +# Apply action "Local board 8: x(2,2)" +action: 8 + +# State 6 +# ... ... ... +# ... ... ... +# ... ... ... +# +# ... ... ... +# ... ... ... +# x.. ... ... +# +# ... ... ... +# o.. ... ... +# ..o x.. ..x +IsTerminal() = False +History() = [7, 6, 3, 6, 8, 8] +HistoryString() = "7, 6, 3, 6, 8, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "7, 6, 3, 6, 8, 8" +InformationStateString(1) = "7, 6, 3, 6, 8, 8" +ObservationString(0) = "... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\nx.. ... ...\n\n... ... ...\no.. ... ...\n..o x.. ..x\n" +ObservationString(1) = "... ... ...\n... ... ...\n... ... ...\n\n... ... ...\n... ... ...\nx.. ... ...\n\n... ... ...\no.. ... ...\n..o x.. ..x\n" +ObservationTensor(0) = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] +ObservationTensor(1) = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7] +StringLegalActions() = ["Local board 8: o(0,0)", "Local board 8: o(0,1)", "Local board 8: o(0,2)", "Local board 8: o(1,0)", "Local board 8: o(1,1)", "Local board 8: o(1,2)", "Local board 8: o(2,0)", "Local board 8: o(2,1)"] + +# Apply action "Local board 8: o(0,2)" +action: 2 + +# State 7 +# Apply action "Local board 2: x(0,1)" +action: 1 + +# State 8 +# Apply action "Local board 1: o(1,0)" +action: 3 + +# State 9 +# Apply action "Local board 3: x(0,2)" +action: 2 + +# State 10 +# Apply action "Local board 2: o(0,2)" +action: 2 + +# State 11 +# Apply action "Local board 2: x(1,1)" +action: 4 + +# State 12 +# Apply action "Local board 4: o(2,0)" +action: 6 + +# State 13 +# Apply action "Local board 6: x(2,0)" +action: 6 + +# State 14 +# Apply action "Local board 6: o(0,2)" +action: 2 + +# State 15 +# Apply action "Local board 2: x(1,2)" +action: 5 + +# State 16 +# Apply action "Local board 5: o(2,0)" +action: 6 + +# State 17 +# Apply action "Local board 6: x(1,1)" +action: 4 + +# State 18 +# Apply action "Local board 4: o(2,2)" +action: 8 + +# State 19 +# ... ... .xo +# ... o.. .xx +# ... ... ... +# +# ..x ... ... +# ... ... ... +# x.. o.o o.. +# +# ..o ... ..o +# ox. ... ... +# x.o x.. ..x +IsTerminal() = False +History() = [7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8] +HistoryString() = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8" +InformationStateString(1) = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8" +ObservationString(0) = "... ... .xo\n... o.. .xx\n... ... ...\n\n..x ... ...\n... ... ...\nx.. o.o o..\n\n..o ... ..o\nox. ... ...\nx.o x.. ..x\n" +ObservationString(1) = "... ... .xo\n... o.. .xx\n... ... ...\n\n..x ... ...\n... ... ...\nx.. o.o o..\n\n..o ... ..o\nox. ... ...\nx.o x.. ..x\n" +ObservationTensor(0) = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] +ObservationTensor(1) = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 3, 4, 5, 6, 7] +StringLegalActions() = ["Local board 8: x(0,0)", "Local board 8: x(0,1)", "Local board 8: x(1,0)", "Local board 8: x(1,1)", "Local board 8: x(1,2)", "Local board 8: x(2,0)", "Local board 8: x(2,1)"] + +# Apply action "Local board 8: x(0,0)" +action: 0 + +# State 20 +# Apply action "Local board 0: o(1,1)" +action: 4 + +# State 21 +# Apply action "Local board 4: x(1,2)" +action: 5 + +# State 22 +# ... ... .xo +# .o. o.. .xx +# ... ... ... +# +# ..x ... ... +# ... ..x ... +# x.. o.o o.. +# +# ..o ... x.o +# ox. ... ... +# x.o x.. ..x +IsTerminal() = False +History() = [7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5] +HistoryString() = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5" +InformationStateString(1) = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5" +ObservationString(0) = "... ... .xo\n.o. o.. .xx\n... ... ...\n\n..x ... ...\n... ..x ...\nx.. o.o o..\n\n..o ... x.o\nox. ... ...\nx.o x.. ..x\n" +ObservationString(1) = "... ... .xo\n.o. o.. .xx\n... ... ...\n\n..x ... ...\n... ..x ...\nx.. o.o o..\n\n..o ... x.o\nox. ... ...\nx.o x.. ..x\n" +ObservationTensor(0) = [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] +ObservationTensor(1) = [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 7, 8] +StringLegalActions() = ["Local board 5: o(0,0)", "Local board 5: o(0,1)", "Local board 5: o(0,2)", "Local board 5: o(1,0)", "Local board 5: o(1,1)", "Local board 5: o(1,2)", "Local board 5: o(2,1)", "Local board 5: o(2,2)"] + +# Apply action "Local board 5: o(0,1)" +action: 1 + +# State 23 +# Apply action "Local board 1: x(2,1)" +action: 7 + +# State 24 +# Apply action "Local board 7: o(1,1)" +action: 4 + +# State 25 +# Apply action "Local board 4: x(1,0)" +action: 3 + +# State 26 +# Apply action "Local board 3: o(1,0)" +action: 3 + +# State 27 +# Apply action "Local board 3: x(0,0)" +action: 0 + +# State 28 +# Apply action "Local board 0: o(2,2)" +action: 8 + +# State 29 +# Apply action "Local board 8: x(2,0)" +action: 6 + +# State 30 +# Apply action "Local board 6: o(0,0)" +action: 0 + +# State 31 +# Apply action "Local board 0: x(0,1)" +action: 1 + +# State 32 +# Apply action "Local board 1: o(1,2)" +action: 5 + +# State 33 +# Apply action "Local board 5: x(0,2)" +action: 2 + +# State 34 +# Apply action "Local board 2: o(2,0)" +action: 6 + +# State 35 +# Apply action "Local board 6: x(1,2)" +action: 5 + +# State 36 +# Apply action "Local board 5: o(1,2)" +action: 5 + +# State 37 +# Apply action "Local board 5: x(2,1)" +action: 7 + +# State 38 +# Apply action "Local board 7: o(0,2)" +action: 2 + +# State 39 +# .x. ... .xo +# .o. o.o .xx +# ..o .x. o.. +# +# x.x ... .ox +# o.. x.x ..o +# x.. o.o ox. +# +# o.o ..o x.o +# oxx .o. ... +# x.o x.. x.x +IsTerminal() = False +History() = [7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2] +HistoryString() = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2" +InformationStateString(1) = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2" +ObservationString(0) = ".x. ... .xo\n.o. o.o .xx\n..o .x. o..\n\nx.x ... .ox\no.. x.x ..o\nx.. o.o ox.\n\no.o ..o x.o\noxx .o. ...\nx.o x.. x.x\n" +ObservationString(1) = ".x. ... .xo\n.o. o.o .xx\n..o .x. o..\n\nx.x ... .ox\no.. x.x ..o\nx.. o.o ox.\n\no.o ..o x.o\noxx .o. ...\nx.o x.. x.x\n" +ObservationTensor(0) = [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0] +ObservationTensor(1) = [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 3, 7, 8] +StringLegalActions() = ["Local board 2: x(0,0)", "Local board 2: x(1,0)", "Local board 2: x(2,1)", "Local board 2: x(2,2)"] + +# Apply action "Local board 2: x(0,0)" +action: 0 + +# State 40 +# Apply action "Local board 0: o(1,2)" +action: 5 + +# State 41 +# Apply action "Local board 5: x(0,0)" +action: 0 + +# State 42 +# .x. ... xxo +# .oo o.o .xx +# ..o .x. o.. +# +# x.x ... xox +# o.. x.x ..o +# x.. o.o ox. +# +# o.o ..o x.o +# oxx .o. ... +# x.o x.. x.x +IsTerminal() = False +History() = [7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2, 0, 5, 0] +HistoryString() = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2, 0, 5, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2, 0, 5, 0" +InformationStateString(1) = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2, 0, 5, 0" +ObservationString(0) = ".x. ... xxo\n.oo o.o .xx\n..o .x. o..\n\nx.x ... xox\no.. x.x ..o\nx.. o.o ox.\n\no.o ..o x.o\noxx .o. ...\nx.o x.. x.x\n" +ObservationString(1) = ".x. ... xxo\n.oo o.o .xx\n..o .x. o..\n\nx.x ... xox\no.. x.x ..o\nx.. o.o ox.\n\no.o ..o x.o\noxx .o. ...\nx.o x.. x.x\n" +ObservationTensor(0) = [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0] +ObservationTensor(1) = [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 6, 7] +StringLegalActions() = ["Local board 0: o(0,0)", "Local board 0: o(0,2)", "Local board 0: o(1,0)", "Local board 0: o(2,0)", "Local board 0: o(2,1)"] + +# Apply action "Local board 0: o(1,0)" +action: 3 + +# State 43 +# Apply action "Local board 3: x(0,1)" +action: 1 + +# State 44 +# Apply action "Local board 1: o(1,1)" +action: 4 + +# State 45 +# Apply action "Local board 4: x(1,1)" +action: 4 + +# State 46 +# Apply action "Choose local board 6" +action: 6 + +# State 47 +# Apply action "Local board 6: o(0,1)" +action: 1 + +# State 48 +# Apply action "Choose local board 7" +action: 7 + +# State 49 +# Apply action "Local board 7: x(1,0)" +action: 3 + +# State 50 +# Apply action "Choose local board 7" +action: 7 + +# State 51 +# Apply action "Local board 7: o(1,2)" +action: 5 + +# State 52 +# Apply action "Local board 5: x(2,2)" +action: 8 + +# State 53 +# Apply action "Local board 8: o(2,1)" +action: 7 + +# State 54 +# Apply action "Local board 7: x(2,2)" +action: 8 + +# State 55 +# Apply action "Local board 8: o(0,1)" +action: 1 + +# State 56 +# Apply action "Choose local board 5" +action: 5 + +# State 57 +# Apply action "Local board 5: x(1,0)" +action: 3 + +# State 58 +# Apply action "Choose local board 2" +action: 2 + +# State 59 +# Apply action "Local board 2: o(1,0)" +action: 3 + +# State 60 +# .x. ... xxo +# ooo ooo oxx +# ..o .x. o.. +# +# xxx ... xox +# o.. xxx o.o +# x.. o.o oxx +# +# ooo ..o xoo +# oxx xoo ... +# x.o x.x xox +IsTerminal() = False +History() = [7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2, 0, 5, 0, 3, 1, 4, 4, 6, 1, 7, 3, 7, 5, 8, 7, 8, 1, 5, 3, 2, 3] +HistoryString() = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2, 0, 5, 0, 3, 1, 4, 4, 6, 1, 7, 3, 7, 5, 8, 7, 8, 1, 5, 3, 2, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2, 0, 5, 0, 3, 1, 4, 4, 6, 1, 7, 3, 7, 5, 8, 7, 8, 1, 5, 3, 2, 3" +InformationStateString(1) = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2, 0, 5, 0, 3, 1, 4, 4, 6, 1, 7, 3, 7, 5, 8, 7, 8, 1, 5, 3, 2, 3" +ObservationString(0) = ".x. ... xxo\nooo ooo oxx\n..o .x. o..\n\nxxx ... xox\no.. xxx o.o\nx.. o.o oxx\n\nooo ..o xoo\noxx xoo ...\nx.o x.x xox\n" +ObservationString(1) = ".x. ... xxo\nooo ooo oxx\n..o .x. o..\n\nxxx ... xox\no.. xxx o.o\nx.. o.o oxx\n\nooo ..o xoo\noxx xoo ...\nx.o x.x xox\n" +ObservationTensor(0) = [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0] +ObservationTensor(1) = [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2, 5, 7, 8] +StringLegalActions() = ["Choose local board 2", "Choose local board 5", "Choose local board 7", "Choose local board 8"] + +# Apply action "Choose local board 7" +action: 7 + +# State 61 +# Apply action "Local board 7: x(0,0)" +action: 0 + +# State 62 +# .x. ... xxo +# ooo ooo oxx +# ..o .x. o.. +# +# xxx ... xox +# o.. xxx o.o +# x.. o.o oxx +# +# ooo o.o xoo +# oxx xoo ... +# x.o x.x xox +IsTerminal() = False +History() = [7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2, 0, 5, 0, 3, 1, 4, 4, 6, 1, 7, 3, 7, 5, 8, 7, 8, 1, 5, 3, 2, 3, 7, 0] +HistoryString() = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2, 0, 5, 0, 3, 1, 4, 4, 6, 1, 7, 3, 7, 5, 8, 7, 8, 1, 5, 3, 2, 3, 7, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2, 0, 5, 0, 3, 1, 4, 4, 6, 1, 7, 3, 7, 5, 8, 7, 8, 1, 5, 3, 2, 3, 7, 0" +InformationStateString(1) = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2, 0, 5, 0, 3, 1, 4, 4, 6, 1, 7, 3, 7, 5, 8, 7, 8, 1, 5, 3, 2, 3, 7, 0" +ObservationString(0) = ".x. ... xxo\nooo ooo oxx\n..o .x. o..\n\nxxx ... xox\no.. xxx o.o\nx.. o.o oxx\n\nooo o.o xoo\noxx xoo ...\nx.o x.x xox\n" +ObservationString(1) = ".x. ... xxo\nooo ooo oxx\n..o .x. o..\n\nxxx ... xox\no.. xxx o.o\nx.. o.o oxx\n\nooo o.o xoo\noxx xoo ...\nx.o x.x xox\n" +ObservationTensor(0) = [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0] +ObservationTensor(1) = [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [2, 5, 7, 8] +StringLegalActions() = ["Choose local board 2", "Choose local board 5", "Choose local board 7", "Choose local board 8"] + +# Apply action "Choose local board 2" +action: 2 + +# State 63 +# Apply action "Local board 2: o(2,1)" +action: 7 + +# State 64 +# Apply action "Local board 7: x(0,1)" +action: 1 + +# State 65 +# Apply action "Choose local board 8" +action: 8 + +# State 66 +# Apply action "Local board 8: o(1,1)" +action: 4 + +# State 67 +# Apply action "Choose local board 7" +action: 7 + +# State 68 +# Apply action "Local board 7: x(2,1)" +action: 7 + +# State 69 +# Apply action "Choose local board 5" +action: 5 + +# State 70 +# Apply action "Local board 5: o(1,1)" +action: 4 + +# State 71 +# .x. ... xxo +# ooo ooo oxx +# ..o .x. ox. +# +# xxx ... xox +# o.. xxx oxo +# x.. o.o oxx +# +# ooo oxo xoo +# oxx xoo .x. +# x.o xox xox +IsTerminal() = True +History() = [7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2, 0, 5, 0, 3, 1, 4, 4, 6, 1, 7, 3, 7, 5, 8, 7, 8, 1, 5, 3, 2, 3, 7, 0, 2, 7, 1, 8, 4, 7, 7, 5, 4] +HistoryString() = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2, 0, 5, 0, 3, 1, 4, 4, 6, 1, 7, 3, 7, 5, 8, 7, 8, 1, 5, 3, 2, 3, 7, 0, 2, 7, 1, 8, 4, 7, 7, 5, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2, 0, 5, 0, 3, 1, 4, 4, 6, 1, 7, 3, 7, 5, 8, 7, 8, 1, 5, 3, 2, 3, 7, 0, 2, 7, 1, 8, 4, 7, 7, 5, 4" +InformationStateString(1) = "7, 6, 3, 6, 8, 8, 2, 1, 3, 2, 2, 4, 6, 6, 2, 5, 6, 4, 8, 0, 4, 5, 1, 7, 4, 3, 3, 0, 8, 6, 0, 1, 5, 2, 6, 5, 5, 7, 2, 0, 5, 0, 3, 1, 4, 4, 6, 1, 7, 3, 7, 5, 8, 7, 8, 1, 5, 3, 2, 3, 7, 0, 2, 7, 1, 8, 4, 7, 7, 5, 4" +ObservationString(0) = ".x. ... xxo\nooo ooo oxx\n..o .x. ox.\n\nxxx ... xox\no.. xxx oxo\nx.. o.o oxx\n\nooo oxo xoo\noxx xoo .x.\nx.o xox xox\n" +ObservationString(1) = ".x. ... xxo\nooo ooo oxx\n..o .x. ox.\n\nxxx ... xox\no.. xxx oxo\nx.. o.o oxx\n\nooo oxo xoo\noxx xoo .x.\nx.o xox xox\n" +ObservationTensor(0) = [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0] +ObservationTensor(1) = [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0] +Rewards() = [0, 0] +Returns() = [0, 0] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/universal_poker(bettingAbstraction=fullgame).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/universal_poker(bettingAbstraction=fullgame).txt new file mode 100644 index 0000000..c032faf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/universal_poker(bettingAbstraction=fullgame).txt @@ -0,0 +1,201 @@ +game: universal_poker(bettingAbstraction=fullgame) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Universal Poker" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["betting", "bettingAbstraction", "blind", "boardCards", "firstPlayer", "handReaches", "maxRaises", "numBoardCards", "numHoleCards", "numPlayers", "numRanks", "numRounds", "numSuits", "potSize", "raiseSize", "stack"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "universal_poker" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 1201 +PolicyTensorShape() = [1201] +MaxChanceOutcomes() = 24 +GetParameters() = {betting=nolimit,bettingAbstraction=fullgame,blind=100 100,boardCards=,firstPlayer=1 1,handReaches=,maxRaises=,numBoardCards=0 1,numHoleCards=1,numPlayers=2,numRanks=6,numRounds=2,numSuits=4,potSize=0,stack=1200 1200} +NumPlayers() = 2 +MinUtility() = -1200.0 +MaxUtility() = 1200.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [110] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 110 +ObservationTensorShape() = [52] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 52 +MaxGameLength() = 20 +ToString() = "universal_poker(bettingAbstraction=fullgame)" + +# State 0 +# BettingAbstraction: FULLGAME +# P0 Cards: +# P1 Cards: +# BoardCards +# PossibleCardsToDeal 7s7h7d7c6s6h6d6c5s5h5d5c4s4h4d4c3s3h3d3c2s2h2d2c +# Node type?: Chance node +# ] +# Round: 0 +# ACPC State: STATE:0::2c|2c +# Spent: [P0: 100 P1: 100 ] +# +# Action Sequence: +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "[Round 0][Player: -1][Pot: 200][Money: 1100 1100][Private: ][Public: ][Sequences: ]" +InformationStateString(1) = "[Round 0][Player: -1][Pot: 200][Money: 1100 1100][Private: ][Public: ][Sequences: ]" +InformationStateTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "[Round 0][Player: -1][Pot: 200][Money: 1100 1100][Private: ][Ante: 100 100]" +ObservationString(1) = "[Round 0][Player: -1][Pot: 200][Money: 1100 1100][Private: ][Ante: 100 100]" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0] +ChanceOutcomes() = [(0,0.0416667), (1,0.0416667), (2,0.0416667), (3,0.0416667), (4,0.0416667), (5,0.0416667), (6,0.0416667), (7,0.0416667), (8,0.0416667), (9,0.0416667), (10,0.0416667), (11,0.0416667), (12,0.0416667), (13,0.0416667), (14,0.0416667), (15,0.0416667), (16,0.0416667), (17,0.0416667), (18,0.0416667), (19,0.0416667), (20,0.0416667), (21,0.0416667), (22,0.0416667), (23,0.0416667)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] +StringLegalActions() = ["player=-1 move=Deal(0)", "player=-1 move=Deal(1)", "player=-1 move=Deal(2)", "player=-1 move=Deal(3)", "player=-1 move=Deal(4)", "player=-1 move=Deal(5)", "player=-1 move=Deal(6)", "player=-1 move=Deal(7)", "player=-1 move=Deal(8)", "player=-1 move=Deal(9)", "player=-1 move=Deal(10)", "player=-1 move=Deal(11)", "player=-1 move=Deal(12)", "player=-1 move=Deal(13)", "player=-1 move=Deal(14)", "player=-1 move=Deal(15)", "player=-1 move=Deal(16)", "player=-1 move=Deal(17)", "player=-1 move=Deal(18)", "player=-1 move=Deal(19)", "player=-1 move=Deal(20)", "player=-1 move=Deal(21)", "player=-1 move=Deal(22)", "player=-1 move=Deal(23)"] + +# Apply action "player=-1 move=Deal(15)" +action: 15 + +# State 1 +# BettingAbstraction: FULLGAME +# P0 Cards: 5s +# P1 Cards: +# BoardCards +# PossibleCardsToDeal 7s7h7d7c6s6h6d6c5h5d5c4s4h4d4c3s3h3d3c2s2h2d2c +# Node type?: Chance node +# ] +# Round: 0 +# ACPC State: STATE:0::5s|2c +# Spent: [P0: 100 P1: 100 ] +# +# Action Sequence: d +IsTerminal() = False +History() = [15] +HistoryString() = "15" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "[Round 0][Player: -1][Pot: 200][Money: 1100 1100][Private: 5s][Public: ][Sequences: ]" +InformationStateString(1) = "[Round 0][Player: -1][Pot: 200][Money: 1100 1100][Private: ][Public: ][Sequences: ]" +InformationStateTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "[Round 0][Player: -1][Pot: 200][Money: 1100 1100][Private: 5s][Ante: 100 100]" +ObservationString(1) = "[Round 0][Player: -1][Pot: 200][Money: 1100 1100][Private: ][Ante: 100 100]" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0] +ChanceOutcomes() = [(0,0.0434783), (1,0.0434783), (2,0.0434783), (3,0.0434783), (4,0.0434783), (5,0.0434783), (6,0.0434783), (7,0.0434783), (8,0.0434783), (9,0.0434783), (10,0.0434783), (11,0.0434783), (12,0.0434783), (13,0.0434783), (14,0.0434783), (16,0.0434783), (17,0.0434783), (18,0.0434783), (19,0.0434783), (20,0.0434783), (21,0.0434783), (22,0.0434783), (23,0.0434783)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23] +StringLegalActions() = ["player=-1 move=Deal(0)", "player=-1 move=Deal(1)", "player=-1 move=Deal(2)", "player=-1 move=Deal(3)", "player=-1 move=Deal(4)", "player=-1 move=Deal(5)", "player=-1 move=Deal(6)", "player=-1 move=Deal(7)", "player=-1 move=Deal(8)", "player=-1 move=Deal(9)", "player=-1 move=Deal(10)", "player=-1 move=Deal(11)", "player=-1 move=Deal(12)", "player=-1 move=Deal(13)", "player=-1 move=Deal(14)", "player=-1 move=Deal(16)", "player=-1 move=Deal(17)", "player=-1 move=Deal(18)", "player=-1 move=Deal(19)", "player=-1 move=Deal(20)", "player=-1 move=Deal(21)", "player=-1 move=Deal(22)", "player=-1 move=Deal(23)"] + +# Apply action "player=-1 move=Deal(21)" +action: 21 + +# State 2 +# BettingAbstraction: FULLGAME +# P0 Cards: 5s +# P1 Cards: 7d +# BoardCards +# Node type?: Player node for player 0 +# ] +# Round: 0 +# ACPC State: STATE:0::5s|7d +# Spent: [P0: 100 P1: 100 ] +# +# Action Sequence: dd +IsTerminal() = False +History() = [15, 21] +HistoryString() = "15, 21" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "[Round 0][Player: 0][Pot: 200][Money: 1100 1100][Private: 5s][Public: ][Sequences: ]" +InformationStateString(1) = "[Round 0][Player: 0][Pot: 200][Money: 1100 1100][Private: 7d][Public: ][Sequences: ]" +InformationStateTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "[Round 0][Player: 0][Pot: 200][Money: 1100 1100][Private: 5s][Ante: 100 100]" +ObservationString(1) = "[Round 0][Player: 0][Pot: 200][Money: 1100 1100][Private: 7d][Ante: 100 100]" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200] +StringLegalActions() = ["player=0 move=Call", "player=0 move=Bet200", "player=0 move=Bet201", "player=0 move=Bet202", "player=0 move=Bet203", "player=0 move=Bet204", "player=0 move=Bet205", "player=0 move=Bet206", "player=0 move=Bet207", "player=0 move=Bet208", "player=0 move=Bet209", "player=0 move=Bet210", "player=0 move=Bet211", "player=0 move=Bet212", "player=0 move=Bet213", "player=0 move=Bet214", "player=0 move=Bet215", "player=0 move=Bet216", "player=0 move=Bet217", "player=0 move=Bet218", "player=0 move=Bet219", "player=0 move=Bet220", "player=0 move=Bet221", "player=0 move=Bet222", "player=0 move=Bet223", "player=0 move=Bet224", "player=0 move=Bet225", "player=0 move=Bet226", "player=0 move=Bet227", "player=0 move=Bet228", "player=0 move=Bet229", "player=0 move=Bet230", "player=0 move=Bet231", "player=0 move=Bet232", "player=0 move=Bet233", "player=0 move=Bet234", "player=0 move=Bet235", "player=0 move=Bet236", "player=0 move=Bet237", "player=0 move=Bet238", "player=0 move=Bet239", "player=0 move=Bet240", "player=0 move=Bet241", "player=0 move=Bet242", "player=0 move=Bet243", "player=0 move=Bet244", "player=0 move=Bet245", "player=0 move=Bet246", "player=0 move=Bet247", "player=0 move=Bet248", "player=0 move=Bet249", "player=0 move=Bet250", "player=0 move=Bet251", "player=0 move=Bet252", "player=0 move=Bet253", "player=0 move=Bet254", "player=0 move=Bet255", "player=0 move=Bet256", "player=0 move=Bet257", "player=0 move=Bet258", "player=0 move=Bet259", "player=0 move=Bet260", "player=0 move=Bet261", "player=0 move=Bet262", "player=0 move=Bet263", "player=0 move=Bet264", "player=0 move=Bet265", "player=0 move=Bet266", "player=0 move=Bet267", "player=0 move=Bet268", "player=0 move=Bet269", "player=0 move=Bet270", "player=0 move=Bet271", "player=0 move=Bet272", "player=0 move=Bet273", "player=0 move=Bet274", "player=0 move=Bet275", "player=0 move=Bet276", "player=0 move=Bet277", "player=0 move=Bet278", "player=0 move=Bet279", "player=0 move=Bet280", "player=0 move=Bet281", "player=0 move=Bet282", "player=0 move=Bet283", "player=0 move=Bet284", "player=0 move=Bet285", "player=0 move=Bet286", "player=0 move=Bet287", "player=0 move=Bet288", "player=0 move=Bet289", "player=0 move=Bet290", "player=0 move=Bet291", "player=0 move=Bet292", "player=0 move=Bet293", "player=0 move=Bet294", "player=0 move=Bet295", "player=0 move=Bet296", "player=0 move=Bet297", "player=0 move=Bet298", "player=0 move=Bet299", "player=0 move=Bet300", "player=0 move=Bet301", "player=0 move=Bet302", "player=0 move=Bet303", "player=0 move=Bet304", "player=0 move=Bet305", "player=0 move=Bet306", "player=0 move=Bet307", "player=0 move=Bet308", "player=0 move=Bet309", "player=0 move=Bet310", "player=0 move=Bet311", "player=0 move=Bet312", "player=0 move=Bet313", "player=0 move=Bet314", "player=0 move=Bet315", "player=0 move=Bet316", "player=0 move=Bet317", "player=0 move=Bet318", "player=0 move=Bet319", "player=0 move=Bet320", "player=0 move=Bet321", "player=0 move=Bet322", "player=0 move=Bet323", "player=0 move=Bet324", "player=0 move=Bet325", "player=0 move=Bet326", "player=0 move=Bet327", "player=0 move=Bet328", "player=0 move=Bet329", "player=0 move=Bet330", "player=0 move=Bet331", "player=0 move=Bet332", "player=0 move=Bet333", "player=0 move=Bet334", "player=0 move=Bet335", "player=0 move=Bet336", "player=0 move=Bet337", "player=0 move=Bet338", "player=0 move=Bet339", "player=0 move=Bet340", "player=0 move=Bet341", "player=0 move=Bet342", "player=0 move=Bet343", "player=0 move=Bet344", "player=0 move=Bet345", "player=0 move=Bet346", "player=0 move=Bet347", "player=0 move=Bet348", "player=0 move=Bet349", "player=0 move=Bet350", "player=0 move=Bet351", "player=0 move=Bet352", "player=0 move=Bet353", "player=0 move=Bet354", "player=0 move=Bet355", "player=0 move=Bet356", "player=0 move=Bet357", "player=0 move=Bet358", "player=0 move=Bet359", "player=0 move=Bet360", "player=0 move=Bet361", "player=0 move=Bet362", "player=0 move=Bet363", "player=0 move=Bet364", "player=0 move=Bet365", "player=0 move=Bet366", "player=0 move=Bet367", "player=0 move=Bet368", "player=0 move=Bet369", "player=0 move=Bet370", "player=0 move=Bet371", "player=0 move=Bet372", "player=0 move=Bet373", "player=0 move=Bet374", "player=0 move=Bet375", "player=0 move=Bet376", "player=0 move=Bet377", "player=0 move=Bet378", "player=0 move=Bet379", "player=0 move=Bet380", "player=0 move=Bet381", "player=0 move=Bet382", "player=0 move=Bet383", "player=0 move=Bet384", "player=0 move=Bet385", "player=0 move=Bet386", "player=0 move=Bet387", "player=0 move=Bet388", "player=0 move=Bet389", "player=0 move=Bet390", "player=0 move=Bet391", "player=0 move=Bet392", "player=0 move=Bet393", "player=0 move=Bet394", "player=0 move=Bet395", "player=0 move=Bet396", "player=0 move=Bet397", "player=0 move=Bet398", "player=0 move=Bet399", "player=0 move=Bet400", "player=0 move=Bet401", "player=0 move=Bet402", "player=0 move=Bet403", "player=0 move=Bet404", "player=0 move=Bet405", "player=0 move=Bet406", "player=0 move=Bet407", "player=0 move=Bet408", "player=0 move=Bet409", "player=0 move=Bet410", "player=0 move=Bet411", "player=0 move=Bet412", "player=0 move=Bet413", "player=0 move=Bet414", "player=0 move=Bet415", "player=0 move=Bet416", "player=0 move=Bet417", "player=0 move=Bet418", "player=0 move=Bet419", "player=0 move=Bet420", "player=0 move=Bet421", "player=0 move=Bet422", "player=0 move=Bet423", "player=0 move=Bet424", "player=0 move=Bet425", "player=0 move=Bet426", "player=0 move=Bet427", "player=0 move=Bet428", "player=0 move=Bet429", "player=0 move=Bet430", "player=0 move=Bet431", "player=0 move=Bet432", "player=0 move=Bet433", "player=0 move=Bet434", "player=0 move=Bet435", "player=0 move=Bet436", "player=0 move=Bet437", "player=0 move=Bet438", "player=0 move=Bet439", "player=0 move=Bet440", "player=0 move=Bet441", "player=0 move=Bet442", "player=0 move=Bet443", "player=0 move=Bet444", "player=0 move=Bet445", "player=0 move=Bet446", "player=0 move=Bet447", "player=0 move=Bet448", "player=0 move=Bet449", "player=0 move=Bet450", "player=0 move=Bet451", "player=0 move=Bet452", "player=0 move=Bet453", "player=0 move=Bet454", "player=0 move=Bet455", "player=0 move=Bet456", "player=0 move=Bet457", "player=0 move=Bet458", "player=0 move=Bet459", "player=0 move=Bet460", "player=0 move=Bet461", "player=0 move=Bet462", "player=0 move=Bet463", "player=0 move=Bet464", "player=0 move=Bet465", "player=0 move=Bet466", "player=0 move=Bet467", "player=0 move=Bet468", "player=0 move=Bet469", "player=0 move=Bet470", "player=0 move=Bet471", "player=0 move=Bet472", "player=0 move=Bet473", "player=0 move=Bet474", "player=0 move=Bet475", "player=0 move=Bet476", "player=0 move=Bet477", "player=0 move=Bet478", "player=0 move=Bet479", "player=0 move=Bet480", "player=0 move=Bet481", "player=0 move=Bet482", "player=0 move=Bet483", "player=0 move=Bet484", "player=0 move=Bet485", "player=0 move=Bet486", "player=0 move=Bet487", "player=0 move=Bet488", "player=0 move=Bet489", "player=0 move=Bet490", "player=0 move=Bet491", "player=0 move=Bet492", "player=0 move=Bet493", "player=0 move=Bet494", "player=0 move=Bet495", "player=0 move=Bet496", "player=0 move=Bet497", "player=0 move=Bet498", "player=0 move=Bet499", "player=0 move=Bet500", "player=0 move=Bet501", "player=0 move=Bet502", "player=0 move=Bet503", "player=0 move=Bet504", "player=0 move=Bet505", "player=0 move=Bet506", "player=0 move=Bet507", "player=0 move=Bet508", "player=0 move=Bet509", "player=0 move=Bet510", "player=0 move=Bet511", "player=0 move=Bet512", "player=0 move=Bet513", "player=0 move=Bet514", "player=0 move=Bet515", "player=0 move=Bet516", "player=0 move=Bet517", "player=0 move=Bet518", "player=0 move=Bet519", "player=0 move=Bet520", "player=0 move=Bet521", "player=0 move=Bet522", "player=0 move=Bet523", "player=0 move=Bet524", "player=0 move=Bet525", "player=0 move=Bet526", "player=0 move=Bet527", "player=0 move=Bet528", "player=0 move=Bet529", "player=0 move=Bet530", "player=0 move=Bet531", "player=0 move=Bet532", "player=0 move=Bet533", "player=0 move=Bet534", "player=0 move=Bet535", "player=0 move=Bet536", "player=0 move=Bet537", "player=0 move=Bet538", "player=0 move=Bet539", "player=0 move=Bet540", "player=0 move=Bet541", "player=0 move=Bet542", "player=0 move=Bet543", "player=0 move=Bet544", "player=0 move=Bet545", "player=0 move=Bet546", "player=0 move=Bet547", "player=0 move=Bet548", "player=0 move=Bet549", "player=0 move=Bet550", "player=0 move=Bet551", "player=0 move=Bet552", "player=0 move=Bet553", "player=0 move=Bet554", "player=0 move=Bet555", "player=0 move=Bet556", "player=0 move=Bet557", "player=0 move=Bet558", "player=0 move=Bet559", "player=0 move=Bet560", "player=0 move=Bet561", "player=0 move=Bet562", "player=0 move=Bet563", "player=0 move=Bet564", "player=0 move=Bet565", "player=0 move=Bet566", "player=0 move=Bet567", "player=0 move=Bet568", "player=0 move=Bet569", "player=0 move=Bet570", "player=0 move=Bet571", "player=0 move=Bet572", "player=0 move=Bet573", "player=0 move=Bet574", "player=0 move=Bet575", "player=0 move=Bet576", "player=0 move=Bet577", "player=0 move=Bet578", "player=0 move=Bet579", "player=0 move=Bet580", "player=0 move=Bet581", "player=0 move=Bet582", "player=0 move=Bet583", "player=0 move=Bet584", "player=0 move=Bet585", "player=0 move=Bet586", "player=0 move=Bet587", "player=0 move=Bet588", "player=0 move=Bet589", "player=0 move=Bet590", "player=0 move=Bet591", "player=0 move=Bet592", "player=0 move=Bet593", "player=0 move=Bet594", "player=0 move=Bet595", "player=0 move=Bet596", "player=0 move=Bet597", "player=0 move=Bet598", "player=0 move=Bet599", "player=0 move=Bet600", "player=0 move=Bet601", "player=0 move=Bet602", "player=0 move=Bet603", "player=0 move=Bet604", "player=0 move=Bet605", "player=0 move=Bet606", "player=0 move=Bet607", "player=0 move=Bet608", "player=0 move=Bet609", "player=0 move=Bet610", "player=0 move=Bet611", "player=0 move=Bet612", "player=0 move=Bet613", "player=0 move=Bet614", "player=0 move=Bet615", "player=0 move=Bet616", "player=0 move=Bet617", "player=0 move=Bet618", "player=0 move=Bet619", "player=0 move=Bet620", "player=0 move=Bet621", "player=0 move=Bet622", "player=0 move=Bet623", "player=0 move=Bet624", "player=0 move=Bet625", "player=0 move=Bet626", "player=0 move=Bet627", "player=0 move=Bet628", "player=0 move=Bet629", "player=0 move=Bet630", "player=0 move=Bet631", "player=0 move=Bet632", "player=0 move=Bet633", "player=0 move=Bet634", "player=0 move=Bet635", "player=0 move=Bet636", "player=0 move=Bet637", "player=0 move=Bet638", "player=0 move=Bet639", "player=0 move=Bet640", "player=0 move=Bet641", "player=0 move=Bet642", "player=0 move=Bet643", "player=0 move=Bet644", "player=0 move=Bet645", "player=0 move=Bet646", "player=0 move=Bet647", "player=0 move=Bet648", "player=0 move=Bet649", "player=0 move=Bet650", "player=0 move=Bet651", "player=0 move=Bet652", "player=0 move=Bet653", "player=0 move=Bet654", "player=0 move=Bet655", "player=0 move=Bet656", "player=0 move=Bet657", "player=0 move=Bet658", "player=0 move=Bet659", "player=0 move=Bet660", "player=0 move=Bet661", "player=0 move=Bet662", "player=0 move=Bet663", "player=0 move=Bet664", "player=0 move=Bet665", "player=0 move=Bet666", "player=0 move=Bet667", "player=0 move=Bet668", "player=0 move=Bet669", "player=0 move=Bet670", "player=0 move=Bet671", "player=0 move=Bet672", "player=0 move=Bet673", "player=0 move=Bet674", "player=0 move=Bet675", "player=0 move=Bet676", "player=0 move=Bet677", "player=0 move=Bet678", "player=0 move=Bet679", "player=0 move=Bet680", "player=0 move=Bet681", "player=0 move=Bet682", "player=0 move=Bet683", "player=0 move=Bet684", "player=0 move=Bet685", "player=0 move=Bet686", "player=0 move=Bet687", "player=0 move=Bet688", "player=0 move=Bet689", "player=0 move=Bet690", "player=0 move=Bet691", "player=0 move=Bet692", "player=0 move=Bet693", "player=0 move=Bet694", "player=0 move=Bet695", "player=0 move=Bet696", "player=0 move=Bet697", "player=0 move=Bet698", "player=0 move=Bet699", "player=0 move=Bet700", "player=0 move=Bet701", "player=0 move=Bet702", "player=0 move=Bet703", "player=0 move=Bet704", "player=0 move=Bet705", "player=0 move=Bet706", "player=0 move=Bet707", "player=0 move=Bet708", "player=0 move=Bet709", "player=0 move=Bet710", "player=0 move=Bet711", "player=0 move=Bet712", "player=0 move=Bet713", "player=0 move=Bet714", "player=0 move=Bet715", "player=0 move=Bet716", "player=0 move=Bet717", "player=0 move=Bet718", "player=0 move=Bet719", "player=0 move=Bet720", "player=0 move=Bet721", "player=0 move=Bet722", "player=0 move=Bet723", "player=0 move=Bet724", "player=0 move=Bet725", "player=0 move=Bet726", "player=0 move=Bet727", "player=0 move=Bet728", "player=0 move=Bet729", "player=0 move=Bet730", "player=0 move=Bet731", "player=0 move=Bet732", "player=0 move=Bet733", "player=0 move=Bet734", "player=0 move=Bet735", "player=0 move=Bet736", "player=0 move=Bet737", "player=0 move=Bet738", "player=0 move=Bet739", "player=0 move=Bet740", "player=0 move=Bet741", "player=0 move=Bet742", "player=0 move=Bet743", "player=0 move=Bet744", "player=0 move=Bet745", "player=0 move=Bet746", "player=0 move=Bet747", "player=0 move=Bet748", "player=0 move=Bet749", "player=0 move=Bet750", "player=0 move=Bet751", "player=0 move=Bet752", "player=0 move=Bet753", "player=0 move=Bet754", "player=0 move=Bet755", "player=0 move=Bet756", "player=0 move=Bet757", "player=0 move=Bet758", "player=0 move=Bet759", "player=0 move=Bet760", "player=0 move=Bet761", "player=0 move=Bet762", "player=0 move=Bet763", "player=0 move=Bet764", "player=0 move=Bet765", "player=0 move=Bet766", "player=0 move=Bet767", "player=0 move=Bet768", "player=0 move=Bet769", "player=0 move=Bet770", "player=0 move=Bet771", "player=0 move=Bet772", "player=0 move=Bet773", "player=0 move=Bet774", "player=0 move=Bet775", "player=0 move=Bet776", "player=0 move=Bet777", "player=0 move=Bet778", "player=0 move=Bet779", "player=0 move=Bet780", "player=0 move=Bet781", "player=0 move=Bet782", "player=0 move=Bet783", "player=0 move=Bet784", "player=0 move=Bet785", "player=0 move=Bet786", "player=0 move=Bet787", "player=0 move=Bet788", "player=0 move=Bet789", "player=0 move=Bet790", "player=0 move=Bet791", "player=0 move=Bet792", "player=0 move=Bet793", "player=0 move=Bet794", "player=0 move=Bet795", "player=0 move=Bet796", "player=0 move=Bet797", "player=0 move=Bet798", "player=0 move=Bet799", "player=0 move=Bet800", "player=0 move=Bet801", "player=0 move=Bet802", "player=0 move=Bet803", "player=0 move=Bet804", "player=0 move=Bet805", "player=0 move=Bet806", "player=0 move=Bet807", "player=0 move=Bet808", "player=0 move=Bet809", "player=0 move=Bet810", "player=0 move=Bet811", "player=0 move=Bet812", "player=0 move=Bet813", "player=0 move=Bet814", "player=0 move=Bet815", "player=0 move=Bet816", "player=0 move=Bet817", "player=0 move=Bet818", "player=0 move=Bet819", "player=0 move=Bet820", "player=0 move=Bet821", "player=0 move=Bet822", "player=0 move=Bet823", "player=0 move=Bet824", "player=0 move=Bet825", "player=0 move=Bet826", "player=0 move=Bet827", "player=0 move=Bet828", "player=0 move=Bet829", "player=0 move=Bet830", "player=0 move=Bet831", "player=0 move=Bet832", "player=0 move=Bet833", "player=0 move=Bet834", "player=0 move=Bet835", "player=0 move=Bet836", "player=0 move=Bet837", "player=0 move=Bet838", "player=0 move=Bet839", "player=0 move=Bet840", "player=0 move=Bet841", "player=0 move=Bet842", "player=0 move=Bet843", "player=0 move=Bet844", "player=0 move=Bet845", "player=0 move=Bet846", "player=0 move=Bet847", "player=0 move=Bet848", "player=0 move=Bet849", "player=0 move=Bet850", "player=0 move=Bet851", "player=0 move=Bet852", "player=0 move=Bet853", "player=0 move=Bet854", "player=0 move=Bet855", "player=0 move=Bet856", "player=0 move=Bet857", "player=0 move=Bet858", "player=0 move=Bet859", "player=0 move=Bet860", "player=0 move=Bet861", "player=0 move=Bet862", "player=0 move=Bet863", "player=0 move=Bet864", "player=0 move=Bet865", "player=0 move=Bet866", "player=0 move=Bet867", "player=0 move=Bet868", "player=0 move=Bet869", "player=0 move=Bet870", "player=0 move=Bet871", "player=0 move=Bet872", "player=0 move=Bet873", "player=0 move=Bet874", "player=0 move=Bet875", "player=0 move=Bet876", "player=0 move=Bet877", "player=0 move=Bet878", "player=0 move=Bet879", "player=0 move=Bet880", "player=0 move=Bet881", "player=0 move=Bet882", "player=0 move=Bet883", "player=0 move=Bet884", "player=0 move=Bet885", "player=0 move=Bet886", "player=0 move=Bet887", "player=0 move=Bet888", "player=0 move=Bet889", "player=0 move=Bet890", "player=0 move=Bet891", "player=0 move=Bet892", "player=0 move=Bet893", "player=0 move=Bet894", "player=0 move=Bet895", "player=0 move=Bet896", "player=0 move=Bet897", "player=0 move=Bet898", "player=0 move=Bet899", "player=0 move=Bet900", "player=0 move=Bet901", "player=0 move=Bet902", "player=0 move=Bet903", "player=0 move=Bet904", "player=0 move=Bet905", "player=0 move=Bet906", "player=0 move=Bet907", "player=0 move=Bet908", "player=0 move=Bet909", "player=0 move=Bet910", "player=0 move=Bet911", "player=0 move=Bet912", "player=0 move=Bet913", "player=0 move=Bet914", "player=0 move=Bet915", "player=0 move=Bet916", "player=0 move=Bet917", "player=0 move=Bet918", "player=0 move=Bet919", "player=0 move=Bet920", "player=0 move=Bet921", "player=0 move=Bet922", "player=0 move=Bet923", "player=0 move=Bet924", "player=0 move=Bet925", "player=0 move=Bet926", "player=0 move=Bet927", "player=0 move=Bet928", "player=0 move=Bet929", "player=0 move=Bet930", "player=0 move=Bet931", "player=0 move=Bet932", "player=0 move=Bet933", "player=0 move=Bet934", "player=0 move=Bet935", "player=0 move=Bet936", "player=0 move=Bet937", "player=0 move=Bet938", "player=0 move=Bet939", "player=0 move=Bet940", "player=0 move=Bet941", "player=0 move=Bet942", "player=0 move=Bet943", "player=0 move=Bet944", "player=0 move=Bet945", "player=0 move=Bet946", "player=0 move=Bet947", "player=0 move=Bet948", "player=0 move=Bet949", "player=0 move=Bet950", "player=0 move=Bet951", "player=0 move=Bet952", "player=0 move=Bet953", "player=0 move=Bet954", "player=0 move=Bet955", "player=0 move=Bet956", "player=0 move=Bet957", "player=0 move=Bet958", "player=0 move=Bet959", "player=0 move=Bet960", "player=0 move=Bet961", "player=0 move=Bet962", "player=0 move=Bet963", "player=0 move=Bet964", "player=0 move=Bet965", "player=0 move=Bet966", "player=0 move=Bet967", "player=0 move=Bet968", "player=0 move=Bet969", "player=0 move=Bet970", "player=0 move=Bet971", "player=0 move=Bet972", "player=0 move=Bet973", "player=0 move=Bet974", "player=0 move=Bet975", "player=0 move=Bet976", "player=0 move=Bet977", "player=0 move=Bet978", "player=0 move=Bet979", "player=0 move=Bet980", "player=0 move=Bet981", "player=0 move=Bet982", "player=0 move=Bet983", "player=0 move=Bet984", "player=0 move=Bet985", "player=0 move=Bet986", "player=0 move=Bet987", "player=0 move=Bet988", "player=0 move=Bet989", "player=0 move=Bet990", "player=0 move=Bet991", "player=0 move=Bet992", "player=0 move=Bet993", "player=0 move=Bet994", "player=0 move=Bet995", "player=0 move=Bet996", "player=0 move=Bet997", "player=0 move=Bet998", "player=0 move=Bet999", "player=0 move=Bet1000", "player=0 move=Bet1001", "player=0 move=Bet1002", "player=0 move=Bet1003", "player=0 move=Bet1004", "player=0 move=Bet1005", "player=0 move=Bet1006", "player=0 move=Bet1007", "player=0 move=Bet1008", "player=0 move=Bet1009", "player=0 move=Bet1010", "player=0 move=Bet1011", "player=0 move=Bet1012", "player=0 move=Bet1013", "player=0 move=Bet1014", "player=0 move=Bet1015", "player=0 move=Bet1016", "player=0 move=Bet1017", "player=0 move=Bet1018", "player=0 move=Bet1019", "player=0 move=Bet1020", "player=0 move=Bet1021", "player=0 move=Bet1022", "player=0 move=Bet1023", "player=0 move=Bet1024", "player=0 move=Bet1025", "player=0 move=Bet1026", "player=0 move=Bet1027", "player=0 move=Bet1028", "player=0 move=Bet1029", "player=0 move=Bet1030", "player=0 move=Bet1031", "player=0 move=Bet1032", "player=0 move=Bet1033", "player=0 move=Bet1034", "player=0 move=Bet1035", "player=0 move=Bet1036", "player=0 move=Bet1037", "player=0 move=Bet1038", "player=0 move=Bet1039", "player=0 move=Bet1040", "player=0 move=Bet1041", "player=0 move=Bet1042", "player=0 move=Bet1043", "player=0 move=Bet1044", "player=0 move=Bet1045", "player=0 move=Bet1046", "player=0 move=Bet1047", "player=0 move=Bet1048", "player=0 move=Bet1049", "player=0 move=Bet1050", "player=0 move=Bet1051", "player=0 move=Bet1052", "player=0 move=Bet1053", "player=0 move=Bet1054", "player=0 move=Bet1055", "player=0 move=Bet1056", "player=0 move=Bet1057", "player=0 move=Bet1058", "player=0 move=Bet1059", "player=0 move=Bet1060", "player=0 move=Bet1061", "player=0 move=Bet1062", "player=0 move=Bet1063", "player=0 move=Bet1064", "player=0 move=Bet1065", "player=0 move=Bet1066", "player=0 move=Bet1067", "player=0 move=Bet1068", "player=0 move=Bet1069", "player=0 move=Bet1070", "player=0 move=Bet1071", "player=0 move=Bet1072", "player=0 move=Bet1073", "player=0 move=Bet1074", "player=0 move=Bet1075", "player=0 move=Bet1076", "player=0 move=Bet1077", "player=0 move=Bet1078", "player=0 move=Bet1079", "player=0 move=Bet1080", "player=0 move=Bet1081", "player=0 move=Bet1082", "player=0 move=Bet1083", "player=0 move=Bet1084", "player=0 move=Bet1085", "player=0 move=Bet1086", "player=0 move=Bet1087", "player=0 move=Bet1088", "player=0 move=Bet1089", "player=0 move=Bet1090", "player=0 move=Bet1091", "player=0 move=Bet1092", "player=0 move=Bet1093", "player=0 move=Bet1094", "player=0 move=Bet1095", "player=0 move=Bet1096", "player=0 move=Bet1097", "player=0 move=Bet1098", "player=0 move=Bet1099", "player=0 move=Bet1100", "player=0 move=Bet1101", "player=0 move=Bet1102", "player=0 move=Bet1103", "player=0 move=Bet1104", "player=0 move=Bet1105", "player=0 move=Bet1106", "player=0 move=Bet1107", "player=0 move=Bet1108", "player=0 move=Bet1109", "player=0 move=Bet1110", "player=0 move=Bet1111", "player=0 move=Bet1112", "player=0 move=Bet1113", "player=0 move=Bet1114", "player=0 move=Bet1115", "player=0 move=Bet1116", "player=0 move=Bet1117", "player=0 move=Bet1118", "player=0 move=Bet1119", "player=0 move=Bet1120", "player=0 move=Bet1121", "player=0 move=Bet1122", "player=0 move=Bet1123", "player=0 move=Bet1124", "player=0 move=Bet1125", "player=0 move=Bet1126", "player=0 move=Bet1127", "player=0 move=Bet1128", "player=0 move=Bet1129", "player=0 move=Bet1130", "player=0 move=Bet1131", "player=0 move=Bet1132", "player=0 move=Bet1133", "player=0 move=Bet1134", "player=0 move=Bet1135", "player=0 move=Bet1136", "player=0 move=Bet1137", "player=0 move=Bet1138", "player=0 move=Bet1139", "player=0 move=Bet1140", "player=0 move=Bet1141", "player=0 move=Bet1142", "player=0 move=Bet1143", "player=0 move=Bet1144", "player=0 move=Bet1145", "player=0 move=Bet1146", "player=0 move=Bet1147", "player=0 move=Bet1148", "player=0 move=Bet1149", "player=0 move=Bet1150", "player=0 move=Bet1151", "player=0 move=Bet1152", "player=0 move=Bet1153", "player=0 move=Bet1154", "player=0 move=Bet1155", "player=0 move=Bet1156", "player=0 move=Bet1157", "player=0 move=Bet1158", "player=0 move=Bet1159", "player=0 move=Bet1160", "player=0 move=Bet1161", "player=0 move=Bet1162", "player=0 move=Bet1163", "player=0 move=Bet1164", "player=0 move=Bet1165", "player=0 move=Bet1166", "player=0 move=Bet1167", "player=0 move=Bet1168", "player=0 move=Bet1169", "player=0 move=Bet1170", "player=0 move=Bet1171", "player=0 move=Bet1172", "player=0 move=Bet1173", "player=0 move=Bet1174", "player=0 move=Bet1175", "player=0 move=Bet1176", "player=0 move=Bet1177", "player=0 move=Bet1178", "player=0 move=Bet1179", "player=0 move=Bet1180", "player=0 move=Bet1181", "player=0 move=Bet1182", "player=0 move=Bet1183", "player=0 move=Bet1184", "player=0 move=Bet1185", "player=0 move=Bet1186", "player=0 move=Bet1187", "player=0 move=Bet1188", "player=0 move=Bet1189", "player=0 move=Bet1190", "player=0 move=Bet1191", "player=0 move=Bet1192", "player=0 move=Bet1193", "player=0 move=Bet1194", "player=0 move=Bet1195", "player=0 move=Bet1196", "player=0 move=Bet1197", "player=0 move=Bet1198", "player=0 move=Bet1199", "player=0 move=Bet1200"] + +# Apply action "player=0 move=Bet801" +action: 801 + +# State 3 +# BettingAbstraction: FULLGAME +# P0 Cards: 5s +# P1 Cards: 7d +# BoardCards +# Node type?: Player node for player 1 +# ] +# Round: 0 +# ACPC State: STATE:0:r801:5s|7d +# Spent: [P0: 801 P1: 100 ] +# +# Action Sequence: ddp +IsTerminal() = False +History() = [15, 21, 801] +HistoryString() = "15, 21, 801" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "[Round 0][Player: 1][Pot: 1602][Money: 399 1100][Private: 5s][Public: ][Sequences: r801]" +InformationStateString(1) = "[Round 0][Player: 1][Pot: 1602][Money: 399 1100][Private: 7d][Public: ][Sequences: r801]" +InformationStateTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 801.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 801.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "[Round 0][Player: 1][Pot: 1602][Money: 399 1100][Private: 5s][Ante: 801 100]" +ObservationString(1) = "[Round 0][Player: 1][Pot: 1602][Money: 399 1100][Private: 7d][Ante: 801 100]" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 801.0, 100.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 801.0, 100.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 1200] +StringLegalActions() = ["player=1 move=Fold", "player=1 move=Call", "player=1 move=Bet1200"] + +# Apply action "player=1 move=Fold" +action: 0 + +# State 4 +# BettingAbstraction: FULLGAME +# P0 Cards: 5s +# P1 Cards: 7d +# BoardCards +# P0 Reward: 100 +# P1 Reward: -100 +# Node type?: Terminal Node! +# ] +# Round: 0 +# ACPC State: STATE:0:r801f:5s|7d +# Spent: [P0: 801 P1: 100 ] +# +# Action Sequence: ddpf +IsTerminal() = True +History() = [15, 21, 801, 0] +HistoryString() = "15, 21, 801, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "[Round 0][Player: -4][Pot: 801][Money: 399 1100][Private: 5s][Public: ][Sequences: r801f]" +InformationStateString(1) = "[Round 0][Player: -4][Pot: 801][Money: 399 1100][Private: 7d][Public: ][Sequences: r801f]" +InformationStateTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 801.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 801.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "[Round 0][Player: -4][Pot: 801][Money: 399 1100][Private: 5s][Ante: 801 100]" +ObservationString(1) = "[Round 0][Player: -4][Pot: 801][Money: 399 1100][Private: 7d][Ante: 801 100]" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 801.0, 100.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 801.0, 100.0] +Rewards() = [100, -100] +Returns() = [100, -100] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/universal_poker.txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/universal_poker.txt new file mode 100644 index 0000000..331af46 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/universal_poker.txt @@ -0,0 +1,235 @@ +game: universal_poker + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "Universal Poker" +GameType.max_num_players = 10 +GameType.min_num_players = 2 +GameType.parameter_specification = ["betting", "bettingAbstraction", "blind", "boardCards", "firstPlayer", "handReaches", "maxRaises", "numBoardCards", "numHoleCards", "numPlayers", "numRanks", "numRounds", "numSuits", "potSize", "raiseSize", "stack"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "universal_poker" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 4 +PolicyTensorShape() = [4] +MaxChanceOutcomes() = 24 +GetParameters() = {betting=nolimit,bettingAbstraction=fcpa,blind=100 100,boardCards=,firstPlayer=1 1,handReaches=,maxRaises=,numBoardCards=0 1,numHoleCards=1,numPlayers=2,numRanks=6,numRounds=2,numSuits=4,potSize=0,stack=1200 1200} +NumPlayers() = 2 +MinUtility() = -1200.0 +MaxUtility() = 1200.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [83] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 83 +ObservationTensorShape() = [52] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 52 +MaxGameLength() = 11 +ToString() = "universal_poker()" + +# State 0 +# BettingAbstration: FCPA +# P0 Cards: +# P1 Cards: +# BoardCards +# PossibleCardsToDeal 7s7h7d7c6s6h6d6c5s5h5d5c4s4h4d4c3s3h3d3c2s2h2d2c +# Node type?: Chance node +# PossibleActions (1): [ ACTION_DEAL ] +# Round: 0 +# ACPC State: STATE:0::2c|2c +# Spent: [P0: 100 P1: 100 ] +# +# Action Sequence: +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "[Round 0][Player: -1][Pot: 200][Money: 1100 1100][Private: ][Public: ][Sequences: ]" +InformationStateString(1) = "[Round 0][Player: -1][Pot: 200][Money: 1100 1100][Private: ][Public: ][Sequences: ]" +InformationStateTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "[Round 0][Player: -1][Pot: 200][Money: 1100 1100][Private: ][Ante: 100 100]" +ObservationString(1) = "[Round 0][Player: -1][Pot: 200][Money: 1100 1100][Private: ][Ante: 100 100]" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0] +ChanceOutcomes() = [(0,0.0416667), (1,0.0416667), (2,0.0416667), (3,0.0416667), (4,0.0416667), (5,0.0416667), (6,0.0416667), (7,0.0416667), (8,0.0416667), (9,0.0416667), (10,0.0416667), (11,0.0416667), (12,0.0416667), (13,0.0416667), (14,0.0416667), (15,0.0416667), (16,0.0416667), (17,0.0416667), (18,0.0416667), (19,0.0416667), (20,0.0416667), (21,0.0416667), (22,0.0416667), (23,0.0416667)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] +StringLegalActions() = ["player=-1 move=Deal(0)", "player=-1 move=Deal(1)", "player=-1 move=Deal(2)", "player=-1 move=Deal(3)", "player=-1 move=Deal(4)", "player=-1 move=Deal(5)", "player=-1 move=Deal(6)", "player=-1 move=Deal(7)", "player=-1 move=Deal(8)", "player=-1 move=Deal(9)", "player=-1 move=Deal(10)", "player=-1 move=Deal(11)", "player=-1 move=Deal(12)", "player=-1 move=Deal(13)", "player=-1 move=Deal(14)", "player=-1 move=Deal(15)", "player=-1 move=Deal(16)", "player=-1 move=Deal(17)", "player=-1 move=Deal(18)", "player=-1 move=Deal(19)", "player=-1 move=Deal(20)", "player=-1 move=Deal(21)", "player=-1 move=Deal(22)", "player=-1 move=Deal(23)"] + +# Apply action "player=-1 move=Deal(12)" +action: 12 + +# State 1 +# BettingAbstration: FCPA +# P0 Cards: 5c +# P1 Cards: +# BoardCards +# PossibleCardsToDeal 7s7h7d7c6s6h6d6c5s5h5d4s4h4d4c3s3h3d3c2s2h2d2c +# Node type?: Chance node +# PossibleActions (1): [ ACTION_DEAL ] +# Round: 0 +# ACPC State: STATE:0::5c|2c +# Spent: [P0: 100 P1: 100 ] +# +# Action Sequence: d +IsTerminal() = False +History() = [12] +HistoryString() = "12" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "[Round 0][Player: -1][Pot: 200][Money: 1100 1100][Private: 5c][Public: ][Sequences: ]" +InformationStateString(1) = "[Round 0][Player: -1][Pot: 200][Money: 1100 1100][Private: ][Public: ][Sequences: ]" +InformationStateTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "[Round 0][Player: -1][Pot: 200][Money: 1100 1100][Private: 5c][Ante: 100 100]" +ObservationString(1) = "[Round 0][Player: -1][Pot: 200][Money: 1100 1100][Private: ][Ante: 100 100]" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0] +ChanceOutcomes() = [(0,0.0434783), (1,0.0434783), (2,0.0434783), (3,0.0434783), (4,0.0434783), (5,0.0434783), (6,0.0434783), (7,0.0434783), (8,0.0434783), (9,0.0434783), (10,0.0434783), (11,0.0434783), (13,0.0434783), (14,0.0434783), (15,0.0434783), (16,0.0434783), (17,0.0434783), (18,0.0434783), (19,0.0434783), (20,0.0434783), (21,0.0434783), (22,0.0434783), (23,0.0434783)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] +StringLegalActions() = ["player=-1 move=Deal(0)", "player=-1 move=Deal(1)", "player=-1 move=Deal(2)", "player=-1 move=Deal(3)", "player=-1 move=Deal(4)", "player=-1 move=Deal(5)", "player=-1 move=Deal(6)", "player=-1 move=Deal(7)", "player=-1 move=Deal(8)", "player=-1 move=Deal(9)", "player=-1 move=Deal(10)", "player=-1 move=Deal(11)", "player=-1 move=Deal(13)", "player=-1 move=Deal(14)", "player=-1 move=Deal(15)", "player=-1 move=Deal(16)", "player=-1 move=Deal(17)", "player=-1 move=Deal(18)", "player=-1 move=Deal(19)", "player=-1 move=Deal(20)", "player=-1 move=Deal(21)", "player=-1 move=Deal(22)", "player=-1 move=Deal(23)"] + +# Apply action "player=-1 move=Deal(13)" +action: 13 + +# State 2 +# BettingAbstration: FCPA +# P0 Cards: 5c +# P1 Cards: 5d +# BoardCards +# Node type?: Player node for player 0 +# PossibleActions (3): [ ACTION_CHECK_CALL ACTION_BET ACTION_ALL_IN ] +# Round: 0 +# ACPC State: STATE:0::5c|5d +# Spent: [P0: 100 P1: 100 ] +# +# Action Sequence: dd +IsTerminal() = False +History() = [12, 13] +HistoryString() = "12, 13" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "[Round 0][Player: 0][Pot: 200][Money: 1100 1100][Private: 5c][Public: ][Sequences: ]" +InformationStateString(1) = "[Round 0][Player: 0][Pot: 200][Money: 1100 1100][Private: 5d][Public: ][Sequences: ]" +InformationStateTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "[Round 0][Player: 0][Pot: 200][Money: 1100 1100][Private: 5c][Ante: 100 100]" +ObservationString(1) = "[Round 0][Player: 0][Pot: 200][Money: 1100 1100][Private: 5d][Ante: 100 100]" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 3] +StringLegalActions() = ["player=0 move=Call", "player=0 move=Bet", "player=0 move=AllIn"] + +# Apply action "player=0 move=Call" +action: 1 + +# State 3 +# BettingAbstration: FCPA +# P0 Cards: 5c +# P1 Cards: 5d +# BoardCards +# Node type?: Player node for player 1 +# PossibleActions (3): [ ACTION_CHECK_CALL ACTION_BET ACTION_ALL_IN ] +# Round: 0 +# ACPC State: STATE:0:c:5c|5d +# Spent: [P0: 100 P1: 100 ] +# +# Action Sequence: ddc +IsTerminal() = False +History() = [12, 13, 1] +HistoryString() = "12, 13, 1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "[Round 0][Player: 1][Pot: 200][Money: 1100 1100][Private: 5c][Public: ][Sequences: c]" +InformationStateString(1) = "[Round 0][Player: 1][Pot: 200][Money: 1100 1100][Private: 5d][Public: ][Sequences: c]" +InformationStateTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +InformationStateTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ +ObservationString(0) = "[Round 0][Player: 1][Pot: 200][Money: 1100 1100][Private: 5c][Ante: 100 100]" +ObservationString(1) = "[Round 0][Player: 1][Pot: 200][Money: 1100 1100][Private: 5d][Ante: 100 100]" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [1, 2, 3] +StringLegalActions() = ["player=1 move=Call", "player=1 move=Bet", "player=1 move=AllIn"] + +# Apply action "player=1 move=Bet" +action: 2 + +# State 4 +# BettingAbstration: FCPA +# P0 Cards: 5c +# P1 Cards: 5d +# BoardCards +# Node type?: Player node for player 0 +# PossibleActions (4): [ ACTION_FOLD ACTION_CHECK_CALL ACTION_BET ACTION_ALL_IN ] +# Round: 0 +# ACPC State: STATE:0:cr300:5c|5d +# Spent: [P0: 100 P1: 300 ] +# +# Action Sequence: ddcp +IsTerminal() = False +History() = [12, 13, 1, 2] +HistoryString() = "12, 13, 1, 2" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "[Round 0][Player: 0][Pot: 600][Money: 1100 900][Private: 5c][Public: ][Sequences: cr300]" +InformationStateString(1) = "[Round 0][Player: 0][Pot: 600][Money: 1100 900][Private: 5d][Public: ][Sequences: cr300]" +InformationStateTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 300.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 300.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "[Round 0][Player: 0][Pot: 600][Money: 1100 900][Private: 5c][Ante: 100 300]" +ObservationString(1) = "[Round 0][Player: 0][Pot: 600][Money: 1100 900][Private: 5d][Ante: 100 300]" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 300.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 300.0] +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3] +StringLegalActions() = ["player=0 move=Fold", "player=0 move=Call", "player=0 move=Bet", "player=0 move=AllIn"] + +# Apply action "player=0 move=Fold" +action: 0 + +# State 5 +# BettingAbstration: FCPA +# P0 Cards: 5c +# P1 Cards: 5d +# BoardCards +# P0 Reward: -100 +# P1 Reward: 100 +# Node type?: Terminal Node! +# PossibleActions (0): [] +# Round: 0 +# ACPC State: STATE:0:cr300f:5c|5d +# Spent: [P0: 100 P1: 300 ] +# +# Action Sequence: ddcpf +IsTerminal() = True +History() = [12, 13, 1, 2, 0] +HistoryString() = "12, 13, 1, 2, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "[Round 0][Player: -4][Pot: 300][Money: 1100 900][Private: 5c][Public: ][Sequences: cr300f]" +InformationStateString(1) = "[Round 0][Player: -4][Pot: 300][Money: 1100 900][Private: 5d][Public: ][Sequences: cr300f]" +InformationStateTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 300.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +InformationStateTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 300.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = "[Round 0][Player: -4][Pot: 300][Money: 1100 900][Private: 5c][Ante: 100 300]" +ObservationString(1) = "[Round 0][Player: -4][Pot: 300][Money: 1100 900][Private: 5d][Ante: 100 300]" +ObservationTensor(0) = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 300.0] +ObservationTensor(1) = [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 300.0] +Rewards() = [-100, 100] +Returns() = [-100, 100] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/y(board_size=9).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/y(board_size=9).txt new file mode 100644 index 0000000..aa4efbe --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/y(board_size=9).txt @@ -0,0 +1,644 @@ +game: y(board_size=9) + +GameType.chance_mode = ChanceMode.DETERMINISTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.PERFECT_INFORMATION +GameType.long_name = "Y Connection Game" +GameType.max_num_players = 2 +GameType.min_num_players = 2 +GameType.parameter_specification = ["ansi_color_output", "board_size"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = False +GameType.provides_observation_string = True +GameType.provides_observation_tensor = True +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "y" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 81 +PolicyTensorShape() = [81] +MaxChanceOutcomes() = 0 +GetParameters() = {ansi_color_output=False,board_size=9} +NumPlayers() = 2 +MinUtility() = -1.0 +MaxUtility() = 1.0 +UtilitySum() = 0.0 +ObservationTensorShape() = [3, 9, 9] +ObservationTensorLayout() = TensorLayout.CHW +ObservationTensorSize() = 243 +MaxGameLength() = 45 +ToString() = "y(board_size=9)" + +# State 0 +# a b c d e f g h i +# 1 . . . . . . . . . +# 2 . . . . . . . . +# 3 . . . . . . . +# 4 . . . . . . +# 5 . . . . . +# 6 . . . . +# 7 . . . +# 8 . . +# 9 . +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "" +InformationStateString(1) = "" +ObservationString(0) = " a b c d e f g h i\n 1 . . . . . . . . .\n 2 . . . . . . . .\n 3 . . . . . . .\n 4 . . . . . .\n 5 . . . . .\n 6 . . . .\n 7 . . .\n 8 . .\n 9 .\n" +ObservationString(1) = " a b c d e f g h i\n 1 . . . . . . . . .\n 2 . . . . . . . .\n 3 . . . . . . .\n 4 . . . . . .\n 5 . . . . .\n 6 . . . .\n 7 . . .\n 8 . .\n 9 .\n" +ObservationTensor(0): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 45, 46, 47, 48, 54, 55, 56, 63, 64, 72] +StringLegalActions() = ["a1", "b1", "c1", "d1", "e1", "f1", "g1", "h1", "i1", "a2", "b2", "c2", "d2", "e2", "f2", "g2", "h2", "a3", "b3", "c3", "d3", "e3", "f3", "g3", "a4", "b4", "c4", "d4", "e4", "f4", "a5", "b5", "c5", "d5", "e5", "a6", "b6", "c6", "d6", "a7", "b7", "c7", "a8", "b8", "a9"] + +# Apply action "b1" +action: 1 + +# State 1 +# a b c d e f g h i +# 1 .[O]. . . . . . . +# 2 . . . . . . . . +# 3 . . . . . . . +# 4 . . . . . . +# 5 . . . . . +# 6 . . . . +# 7 . . . +# 8 . . +# 9 . +IsTerminal() = False +History() = [1] +HistoryString() = "1" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1" +InformationStateString(1) = "1" +ObservationString(0) = " a b c d e f g h i\n 1 .[O]. . . . . . .\n 2 . . . . . . . .\n 3 . . . . . . .\n 4 . . . . . .\n 5 . . . . .\n 6 . . . .\n 7 . . .\n 8 . .\n 9 .\n" +ObservationString(1) = " a b c d e f g h i\n 1 .[O]. . . . . . .\n 2 . . . . . . . .\n 3 . . . . . . .\n 4 . . . . . .\n 5 . . . . .\n 6 . . . .\n 7 . . .\n 8 . .\n 9 .\n" +ObservationTensor(0): +◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 45, 46, 47, 48, 54, 55, 56, 63, 64, 72] +StringLegalActions() = ["a1", "c1", "d1", "e1", "f1", "g1", "h1", "i1", "a2", "b2", "c2", "d2", "e2", "f2", "g2", "h2", "a3", "b3", "c3", "d3", "e3", "f3", "g3", "a4", "b4", "c4", "d4", "e4", "f4", "a5", "b5", "c5", "d5", "e5", "a6", "b6", "c6", "d6", "a7", "b7", "c7", "a8", "b8", "a9"] + +# Apply action "f4" +action: 32 + +# State 2 +# a b c d e f g h i +# 1 . O . . . . . . . +# 2 . . . . . . . . +# 3 . . . . . . . +# 4 . . . . .[@] +# 5 . . . . . +# 6 . . . . +# 7 . . . +# 8 . . +# 9 . +IsTerminal() = False +History() = [1, 32] +HistoryString() = "1, 32" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1, 32" +InformationStateString(1) = "1, 32" +ObservationString(0) = " a b c d e f g h i\n 1 . O . . . . . . .\n 2 . . . . . . . .\n 3 . . . . . . .\n 4 . . . . .[@]\n 5 . . . . .\n 6 . . . .\n 7 . . .\n 8 . .\n 9 .\n" +ObservationString(1) = " a b c d e f g h i\n 1 . O . . . . . . .\n 2 . . . . . . . .\n 3 . . . . . . .\n 4 . . . . .[@]\n 5 . . . . .\n 6 . . . .\n 7 . . .\n 8 . .\n 9 .\n" +ObservationTensor(0): +◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯◯ ◉◯◉◉◉◉◉◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◯◯ +◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 36, 37, 38, 39, 40, 45, 46, 47, 48, 54, 55, 56, 63, 64, 72] +StringLegalActions() = ["a1", "c1", "d1", "e1", "f1", "g1", "h1", "i1", "a2", "b2", "c2", "d2", "e2", "f2", "g2", "h2", "a3", "b3", "c3", "d3", "e3", "f3", "g3", "a4", "b4", "c4", "d4", "e4", "a5", "b5", "c5", "d5", "e5", "a6", "b6", "c6", "d6", "a7", "b7", "c7", "a8", "b8", "a9"] + +# Apply action "g1" +action: 6 + +# State 3 +# a b c d e f g h i +# 1 . O . . . .[O]. . +# 2 . . . . . . . . +# 3 . . . . . . . +# 4 . . . . . @ +# 5 . . . . . +# 6 . . . . +# 7 . . . +# 8 . . +# 9 . +IsTerminal() = False +History() = [1, 32, 6] +HistoryString() = "1, 32, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1, 32, 6" +InformationStateString(1) = "1, 32, 6" +ObservationString(0) = " a b c d e f g h i\n 1 . O . . . .[O]. .\n 2 . . . . . . . .\n 3 . . . . . . .\n 4 . . . . . @\n 5 . . . . .\n 6 . . . .\n 7 . . .\n 8 . .\n 9 .\n" +ObservationString(1) = " a b c d e f g h i\n 1 . O . . . .[O]. .\n 2 . . . . . . . .\n 3 . . . . . . .\n 4 . . . . . @\n 5 . . . . .\n 6 . . . .\n 7 . . .\n 8 . .\n 9 .\n" +ObservationTensor(0): +◯◉◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◉◯◯ ◉◯◉◉◉◉◯◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◯◯ +◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 36, 37, 38, 39, 40, 45, 46, 47, 48, 54, 55, 56, 63, 64, 72] +StringLegalActions() = ["a1", "c1", "d1", "e1", "f1", "h1", "i1", "a2", "b2", "c2", "d2", "e2", "f2", "g2", "h2", "a3", "b3", "c3", "d3", "e3", "f3", "g3", "a4", "b4", "c4", "d4", "e4", "a5", "b5", "c5", "d5", "e5", "a6", "b6", "c6", "d6", "a7", "b7", "c7", "a8", "b8", "a9"] + +# Apply action "f1" +action: 5 + +# State 4 +# a b c d e f g h i +# 1 . O . . .[@]O . . +# 2 . . . . . . . . +# 3 . . . . . . . +# 4 . . . . . @ +# 5 . . . . . +# 6 . . . . +# 7 . . . +# 8 . . +# 9 . +IsTerminal() = False +History() = [1, 32, 6, 5] +HistoryString() = "1, 32, 6, 5" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1, 32, 6, 5" +InformationStateString(1) = "1, 32, 6, 5" +ObservationString(0) = " a b c d e f g h i\n 1 . O . . .[@]O . .\n 2 . . . . . . . .\n 3 . . . . . . .\n 4 . . . . . @\n 5 . . . . .\n 6 . . . .\n 7 . . .\n 8 . .\n 9 .\n" +ObservationString(1) = " a b c d e f g h i\n 1 . O . . .[@]O . .\n 2 . . . . . . . .\n 3 . . . . . . .\n 4 . . . . . @\n 5 . . . . .\n 6 . . . .\n 7 . . .\n 8 . .\n 9 .\n" +ObservationTensor(0): +◯◉◯◯◯◯◉◯◯ ◯◯◯◯◯◉◯◯◯ ◉◯◉◉◉◯◯◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◉◯◯◯ ◯◉◯◯◯◯◉◯◯ ◉◯◉◉◉◯◯◉◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◯◯ +◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 36, 37, 38, 39, 40, 45, 46, 47, 48, 54, 55, 56, 63, 64, 72] +StringLegalActions() = ["a1", "c1", "d1", "e1", "h1", "i1", "a2", "b2", "c2", "d2", "e2", "f2", "g2", "h2", "a3", "b3", "c3", "d3", "e3", "f3", "g3", "a4", "b4", "c4", "d4", "e4", "a5", "b5", "c5", "d5", "e5", "a6", "b6", "c6", "d6", "a7", "b7", "c7", "a8", "b8", "a9"] + +# Apply action "h1" +action: 7 + +# State 5 +# a b c d e f g h i +# 1 . O . . . @ O[O]. +# 2 . . . . . . . . +# 3 . . . . . . . +# 4 . . . . . @ +# 5 . . . . . +# 6 . . . . +# 7 . . . +# 8 . . +# 9 . +IsTerminal() = False +History() = [1, 32, 6, 5, 7] +HistoryString() = "1, 32, 6, 5, 7" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1, 32, 6, 5, 7" +InformationStateString(1) = "1, 32, 6, 5, 7" +ObservationString(0) = " a b c d e f g h i\n 1 . O . . . @ O[O].\n 2 . . . . . . . .\n 3 . . . . . . .\n 4 . . . . . @\n 5 . . . . .\n 6 . . . .\n 7 . . .\n 8 . .\n 9 .\n" +ObservationString(1) = " a b c d e f g h i\n 1 . O . . . @ O[O].\n 2 . . . . . . . .\n 3 . . . . . . .\n 4 . . . . . @\n 5 . . . . .\n 6 . . . .\n 7 . . .\n 8 . .\n 9 .\n" +ObservationTensor(0): +◯◉◯◯◯◯◉◉◯ ◯◯◯◯◯◉◯◯◯ ◉◯◉◉◉◯◯◯◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◉◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◉◯◯◯ ◯◉◯◯◯◯◉◉◯ ◉◯◉◉◉◯◯◯◉ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◉◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◉◉◯◯ +◯◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◉◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 36, 37, 38, 39, 40, 45, 46, 47, 48, 54, 55, 56, 63, 64, 72] +StringLegalActions() = ["a1", "c1", "d1", "e1", "i1", "a2", "b2", "c2", "d2", "e2", "f2", "g2", "h2", "a3", "b3", "c3", "d3", "e3", "f3", "g3", "a4", "b4", "c4", "d4", "e4", "a5", "b5", "c5", "d5", "e5", "a6", "b6", "c6", "d6", "a7", "b7", "c7", "a8", "b8", "a9"] + +# Apply action "c2" +action: 11 + +# State 6 +# Apply action "a3" +action: 18 + +# State 7 +# Apply action "b8" +action: 64 + +# State 8 +# Apply action "g3" +action: 24 + +# State 9 +# Apply action "b5" +action: 37 + +# State 10 +# Apply action "d4" +action: 30 + +# State 11 +# Apply action "a2" +action: 9 + +# State 12 +# Apply action "e4" +action: 31 + +# State 13 +# Apply action "b6" +action: 46 + +# State 14 +# Apply action "a5" +action: 36 + +# State 15 +# Apply action "b7" +action: 55 + +# State 16 +# Apply action "b4" +action: 28 + +# State 17 +# Apply action "i1" +action: 8 + +# State 18 +# Apply action "e5" +action: 40 + +# State 19 +# Apply action "h2" +action: 16 + +# State 20 +# a b c d e f g h i +# 1 . O . . . @ O O @ +# 2 @ . @ . . . .[@] +# 3 O . . . . . O +# 4 . O . O O @ +# 5 O @ . . O +# 6 . @ . . +# 7 . @ . +# 8 . @ +# 9 . +IsTerminal() = False +History() = [1, 32, 6, 5, 7, 11, 18, 64, 24, 37, 30, 9, 31, 46, 36, 55, 28, 8, 40, 16] +HistoryString() = "1, 32, 6, 5, 7, 11, 18, 64, 24, 37, 30, 9, 31, 46, 36, 55, 28, 8, 40, 16" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1, 32, 6, 5, 7, 11, 18, 64, 24, 37, 30, 9, 31, 46, 36, 55, 28, 8, 40, 16" +InformationStateString(1) = "1, 32, 6, 5, 7, 11, 18, 64, 24, 37, 30, 9, 31, 46, 36, 55, 28, 8, 40, 16" +ObservationString(0) = " a b c d e f g h i\n 1 . O . . . @ O O @\n 2 @ . @ . . . .[@]\n 3 O . . . . . O\n 4 . O . O O @\n 5 O @ . . O\n 6 . @ . .\n 7 . @ .\n 8 . @\n 9 .\n" +ObservationString(1) = " a b c d e f g h i\n 1 . O . . . @ O O @\n 2 @ . @ . . . .[@]\n 3 O . . . . . O\n 4 . O . O O @\n 5 O @ . . O\n 6 . @ . .\n 7 . @ .\n 8 . @\n 9 .\n" +ObservationTensor(0): +◯◉◯◯◯◯◉◉◯ ◯◯◯◯◯◉◯◯◉ ◉◯◉◉◉◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◉◯◉◯◯◯◯◉◯ ◯◉◯◉◉◉◉◯◯ +◉◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◯ +◯◉◯◉◉◯◯◯◯ ◯◯◯◯◯◉◯◯◯ ◉◯◉◯◯◯◯◯◯ +◉◯◯◯◉◯◯◯◯ ◯◉◯◯◯◯◯◯◯ ◯◯◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯◯ ◉◯◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯◯ ◉◯◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◉◯◯◉ ◯◉◯◯◯◯◉◉◯ ◉◯◉◉◉◯◯◯◯ +◉◯◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯ ◯◉◯◉◉◉◉◯◯ +◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉◯◯ ◯◉◉◉◉◉◯◯◯ +◯◯◯◯◯◉◯◯◯ ◯◉◯◉◉◯◯◯◯ ◉◯◉◯◯◯◯◯◯ +◯◉◯◯◯◯◯◯◯ ◉◯◯◯◉◯◯◯◯ ◯◯◉◉◯◯◯◯◯ +◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◉◉◯◯◯◯◯ +◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◉◯◯◯◯◯◯ +◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 4, 10, 12, 13, 14, 15, 19, 20, 21, 22, 23, 27, 29, 38, 39, 45, 47, 48, 54, 56, 63, 72] +StringLegalActions() = ["a1", "c1", "d1", "e1", "b2", "d2", "e2", "f2", "g2", "b3", "c3", "d3", "e3", "f3", "a4", "c4", "c5", "d5", "a6", "c6", "d6", "a7", "c7", "a8", "a9"] + +# Apply action "e1" +action: 4 + +# State 21 +# a b c d e f g h i +# 1 . O . .[O]@ O O @ +# 2 @ . @ . . . . @ +# 3 O . . . . . O +# 4 . O . O O @ +# 5 O @ . . O +# 6 . @ . . +# 7 . @ . +# 8 . @ +# 9 . +IsTerminal() = False +History() = [1, 32, 6, 5, 7, 11, 18, 64, 24, 37, 30, 9, 31, 46, 36, 55, 28, 8, 40, 16, 4] +HistoryString() = "1, 32, 6, 5, 7, 11, 18, 64, 24, 37, 30, 9, 31, 46, 36, 55, 28, 8, 40, 16, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "1, 32, 6, 5, 7, 11, 18, 64, 24, 37, 30, 9, 31, 46, 36, 55, 28, 8, 40, 16, 4" +InformationStateString(1) = "1, 32, 6, 5, 7, 11, 18, 64, 24, 37, 30, 9, 31, 46, 36, 55, 28, 8, 40, 16, 4" +ObservationString(0) = " a b c d e f g h i\n 1 . O . .[O]@ O O @\n 2 @ . @ . . . . @\n 3 O . . . . . O\n 4 . O . O O @\n 5 O @ . . O\n 6 . @ . .\n 7 . @ .\n 8 . @\n 9 .\n" +ObservationString(1) = " a b c d e f g h i\n 1 . O . .[O]@ O O @\n 2 @ . @ . . . . @\n 3 O . . . . . O\n 4 . O . O O @\n 5 O @ . . O\n 6 . @ . .\n 7 . @ .\n 8 . @\n 9 .\n" +ObservationTensor(0): +◯◉◯◯◉◯◉◉◯ ◯◯◯◯◯◉◯◯◉ ◉◯◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◉◯◉◯◯◯◯◉◯ ◯◉◯◉◉◉◉◯◯ +◉◯◯◯◯◯◉◯◯ ◯◯◯◯◯◯◯◯◯ ◯◉◉◉◉◉◯◯◯ +◯◉◯◉◉◯◯◯◯ ◯◯◯◯◯◉◯◯◯ ◉◯◉◯◯◯◯◯◯ +◉◯◯◯◉◯◯◯◯ ◯◉◯◯◯◯◯◯◯ ◯◯◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯◯ ◉◯◉◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯◯ ◉◯◉◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◯◯◯◯◯◉◯◯◉ ◯◉◯◯◉◯◉◉◯ ◉◯◉◉◯◯◯◯◯ +◉◯◉◯◯◯◯◉◯ ◯◯◯◯◯◯◯◯◯ ◯◉◯◉◉◉◉◯◯ +◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◉◯◯ ◯◉◉◉◉◉◯◯◯ +◯◯◯◯◯◉◯◯◯ ◯◉◯◉◉◯◯◯◯ ◉◯◉◯◯◯◯◯◯ +◯◉◯◯◯◯◯◯◯ ◉◯◯◯◉◯◯◯◯ ◯◯◉◉◯◯◯◯◯ +◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◉◉◯◯◯◯◯ +◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◉◯◯◯◯◯◯ +◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [0, 2, 3, 10, 12, 13, 14, 15, 19, 20, 21, 22, 23, 27, 29, 38, 39, 45, 47, 48, 54, 56, 63, 72] +StringLegalActions() = ["a1", "c1", "d1", "b2", "d2", "e2", "f2", "g2", "b3", "c3", "d3", "e3", "f3", "a4", "c4", "c5", "d5", "a6", "c6", "d6", "a7", "c7", "a8", "a9"] + +# Apply action "e3" +action: 22 + +# State 22 +# Apply action "c4" +action: 29 + +# State 23 +# Apply action "a9" +action: 72 + +# State 24 +# Apply action "a8" +action: 63 + +# State 25 +# Apply action "d2" +action: 12 + +# State 26 +# Apply action "d3" +action: 21 + +# State 27 +# Apply action "g2" +action: 15 + +# State 28 +# Apply action "c5" +action: 38 + +# State 29 +# Apply action "a4" +action: 27 + +# State 30 +# Apply action "f2" +action: 14 + +# State 31 +# Apply action "a6" +action: 45 + +# State 32 +# Apply action "c1" +action: 2 + +# State 33 +# Apply action "a7" +action: 54 + +# State 34 +# Apply action "d1" +action: 3 + +# State 35 +# Apply action "c7" +action: 56 + +# State 36 +# Apply action "b2" +action: 10 + +# State 37 +# Apply action "b3" +action: 19 + +# State 38 +# Apply action "c6" +action: 47 + +# State 39 +# Apply action "a1" +action: 0 + +# State 40 +# a b c d e f g h i +# 1[@]O O O O @ O O @ +# 2 @ O @ @ . O @ @ +# 3 O @ . O @ . O +# 4 @ O O O O @ +# 5 O @ O . O +# 6 @ @ O . +# 7 @ @ @ +# 8 O @ +# 9 @ +IsTerminal() = False +History() = [1, 32, 6, 5, 7, 11, 18, 64, 24, 37, 30, 9, 31, 46, 36, 55, 28, 8, 40, 16, 4, 22, 29, 72, 63, 12, 21, 15, 38, 27, 14, 45, 2, 54, 3, 56, 10, 19, 47, 0] +HistoryString() = "1, 32, 6, 5, 7, 11, 18, 64, 24, 37, 30, 9, 31, 46, 36, 55, 28, 8, 40, 16, 4, 22, 29, 72, 63, 12, 21, 15, 38, 27, 14, 45, 2, 54, 3, 56, 10, 19, 47, 0" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "1, 32, 6, 5, 7, 11, 18, 64, 24, 37, 30, 9, 31, 46, 36, 55, 28, 8, 40, 16, 4, 22, 29, 72, 63, 12, 21, 15, 38, 27, 14, 45, 2, 54, 3, 56, 10, 19, 47, 0" +InformationStateString(1) = "1, 32, 6, 5, 7, 11, 18, 64, 24, 37, 30, 9, 31, 46, 36, 55, 28, 8, 40, 16, 4, 22, 29, 72, 63, 12, 21, 15, 38, 27, 14, 45, 2, 54, 3, 56, 10, 19, 47, 0" +ObservationString(0) = " a b c d e f g h i\n 1[@]O O O O @ O O @\n 2 @ O @ @ . O @ @\n 3 O @ . O @ . O\n 4 @ O O O O @\n 5 O @ O . O\n 6 @ @ O .\n 7 @ @ @\n 8 O @\n 9 @\n" +ObservationString(1) = " a b c d e f g h i\n 1[@]O O O O @ O O @\n 2 @ O @ @ . O @ @\n 3 O @ . O @ . O\n 4 @ O O O O @\n 5 O @ O . O\n 6 @ @ O .\n 7 @ @ @\n 8 O @\n 9 @\n" +ObservationTensor(0): +◯◉◉◉◉◯◉◉◯ ◉◯◯◯◯◉◯◯◉ ◯◯◯◯◯◯◯◯◯ +◯◉◯◯◯◉◯◯◯ ◉◯◉◉◯◯◉◉◯ ◯◯◯◯◉◯◯◯◯ +◉◯◯◉◯◯◉◯◯ ◯◉◯◯◉◯◯◯◯ ◯◯◉◯◯◉◯◯◯ +◯◉◉◉◉◯◯◯◯ ◉◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯ +◉◯◉◯◉◯◯◯◯ ◯◉◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯ +◯◯◉◯◯◯◯◯◯ ◉◉◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◯◯◯◯◉◯◯◉ ◯◉◉◉◉◯◉◉◯ ◯◯◯◯◯◯◯◯◯ +◉◯◉◉◯◯◉◉◯ ◯◉◯◯◯◉◯◯◯ ◯◯◯◯◉◯◯◯◯ +◯◉◯◯◉◯◯◯◯ ◉◯◯◉◯◯◉◯◯ ◯◯◉◯◯◉◯◯◯ +◉◯◯◯◯◉◯◯◯ ◯◉◉◉◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◉◯◯◯◯◯◯◯ ◉◯◉◯◉◯◯◯◯ ◯◯◯◉◯◯◯◯◯ +◉◉◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯ +◉◉◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◉◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +Rewards() = [0, 0] +Returns() = [0, 0] +LegalActions() = [13, 20, 23, 39, 48] +StringLegalActions() = ["e2", "c3", "f3", "d5", "d6"] + +# Apply action "f3" +action: 23 + +# State 41 +# a b c d e f g h i +# 1 @ O O O O @ O O @ +# 2 @ O @ @ . O @ @ +# 3 O @ . O @[O]O +# 4 @ O O O O @ +# 5 O @ O . O +# 6 @ @ O . +# 7 @ @ @ +# 8 O @ +# 9 @ +IsTerminal() = True +History() = [1, 32, 6, 5, 7, 11, 18, 64, 24, 37, 30, 9, 31, 46, 36, 55, 28, 8, 40, 16, 4, 22, 29, 72, 63, 12, 21, 15, 38, 27, 14, 45, 2, 54, 3, 56, 10, 19, 47, 0, 23] +HistoryString() = "1, 32, 6, 5, 7, 11, 18, 64, 24, 37, 30, 9, 31, 46, 36, 55, 28, 8, 40, 16, 4, 22, 29, 72, 63, 12, 21, 15, 38, 27, 14, 45, 2, 54, 3, 56, 10, 19, 47, 0, 23" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "1, 32, 6, 5, 7, 11, 18, 64, 24, 37, 30, 9, 31, 46, 36, 55, 28, 8, 40, 16, 4, 22, 29, 72, 63, 12, 21, 15, 38, 27, 14, 45, 2, 54, 3, 56, 10, 19, 47, 0, 23" +InformationStateString(1) = "1, 32, 6, 5, 7, 11, 18, 64, 24, 37, 30, 9, 31, 46, 36, 55, 28, 8, 40, 16, 4, 22, 29, 72, 63, 12, 21, 15, 38, 27, 14, 45, 2, 54, 3, 56, 10, 19, 47, 0, 23" +ObservationString(0) = " a b c d e f g h i\n 1 @ O O O O @ O O @\n 2 @ O @ @ . O @ @\n 3 O @ . O @[O]O\n 4 @ O O O O @\n 5 O @ O . O\n 6 @ @ O .\n 7 @ @ @\n 8 O @\n 9 @\n" +ObservationString(1) = " a b c d e f g h i\n 1 @ O O O O @ O O @\n 2 @ O @ @ . O @ @\n 3 O @ . O @[O]O\n 4 @ O O O O @\n 5 O @ O . O\n 6 @ @ O .\n 7 @ @ @\n 8 O @\n 9 @\n" +ObservationTensor(0): +◯◉◉◉◉◯◉◉◯ ◉◯◯◯◯◉◯◯◉ ◯◯◯◯◯◯◯◯◯ +◯◉◯◯◯◉◯◯◯ ◉◯◉◉◯◯◉◉◯ ◯◯◯◯◉◯◯◯◯ +◉◯◯◉◯◉◉◯◯ ◯◉◯◯◉◯◯◯◯ ◯◯◉◯◯◯◯◯◯ +◯◉◉◉◉◯◯◯◯ ◉◯◯◯◯◉◯◯◯ ◯◯◯◯◯◯◯◯◯ +◉◯◉◯◉◯◯◯◯ ◯◉◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯ +◯◯◉◯◯◯◯◯◯ ◉◉◯◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◉◉◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯ ◯◉◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◯◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +ObservationTensor(1): +◉◯◯◯◯◉◯◯◉ ◯◉◉◉◉◯◉◉◯ ◯◯◯◯◯◯◯◯◯ +◉◯◉◉◯◯◉◉◯ ◯◉◯◯◯◉◯◯◯ ◯◯◯◯◉◯◯◯◯ +◯◉◯◯◉◯◯◯◯ ◉◯◯◉◯◉◉◯◯ ◯◯◉◯◯◯◯◯◯ +◉◯◯◯◯◉◯◯◯ ◯◉◉◉◉◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◉◯◯◯◯◯◯◯ ◉◯◉◯◉◯◯◯◯ ◯◯◯◉◯◯◯◯◯ +◉◉◯◯◯◯◯◯◯ ◯◯◉◯◯◯◯◯◯ ◯◯◯◉◯◯◯◯◯ +◉◉◉◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◯◉◯◯◯◯◯◯◯ ◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +◉◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ ◯◯◯◯◯◯◯◯◯ +Rewards() = [1, -1] +Returns() = [1, -1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/zerosum(game=oh_hell()).txt b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/zerosum(game=oh_hell()).txt new file mode 100644 index 0000000..d5ce0a0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/integration_tests/playthroughs/zerosum(game=oh_hell()).txt @@ -0,0 +1,786 @@ +game: zerosum(game=oh_hell()) + +GameType.chance_mode = ChanceMode.EXPLICIT_STOCHASTIC +GameType.dynamics = Dynamics.SEQUENTIAL +GameType.information = Information.IMPERFECT_INFORMATION +GameType.long_name = "ZeroSum Oh Hell!" +GameType.max_num_players = 7 +GameType.min_num_players = 3 +GameType.parameter_specification = ["num_cards_per_suit", "num_suits", "num_tricks_fixed", "off_bid_penalty", "players", "points_per_trick"] +GameType.provides_information_state_string = True +GameType.provides_information_state_tensor = True +GameType.provides_observation_string = False +GameType.provides_observation_tensor = False +GameType.provides_factored_observation_string = False +GameType.reward_model = RewardModel.TERMINAL +GameType.short_name = "zerosum" +GameType.utility = Utility.ZERO_SUM + +NumDistinctActions() = 70 +PolicyTensorShape() = [70] +MaxChanceOutcomes() = 52 +GetParameters() = {game=oh_hell()} +NumPlayers() = 3 +MinUtility() = -18.0 +MaxUtility() = 18.0 +UtilitySum() = 0.0 +InformationStateTensorShape() = [4704] +InformationStateTensorLayout() = TensorLayout.CHW +InformationStateTensorSize() = 4704 +MaxGameLength() = 108 +ToString() = "zerosum(game=oh_hell())" + +# State 0 +# Phase: ChooseNumTricks +# Num Total Tricks: 0 +# Dealer: -3 +# Player: 0 +# C: +# D: +# S: +# H: +# +# Player: 1 +# C: +# D: +# S: +# H: +# +# Player: 2 +# C: +# D: +# S: +# H: +# +# +# +# Bids: -1 -1 -1 +# Tricks Won: 0 0 0 +IsTerminal() = False +History() = [] +HistoryString() = "" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "" +InformationStateString(1) = "" +InformationStateString(2) = "" +InformationStateTensor(0): zeros(4704) +InformationStateTensor(1): zeros(4704) +InformationStateTensor(2): zeros(4704) +ChanceOutcomes() = [(1,0.0588235), (2,0.0588235), (3,0.0588235), (4,0.0588235), (5,0.0588235), (6,0.0588235), (7,0.0588235), (8,0.0588235), (9,0.0588235), (10,0.0588235), (11,0.0588235), (12,0.0588235), (13,0.0588235), (14,0.0588235), (15,0.0588235), (16,0.0588235), (17,0.0588235)] +LegalActions() = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] +StringLegalActions() = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17"] + +# Apply action "8" +action: 8 + +# State 1 +# Phase: ChooseDealer +# Num Total Tricks: 8 +# Dealer: -3 +# Player: 0 +# C: +# D: +# S: +# H: +# +# Player: 1 +# C: +# D: +# S: +# H: +# +# Player: 2 +# C: +# D: +# S: +# H: +# +# +# +# Bids: -1 -1 -1 +# Tricks Won: 0 0 0 +IsTerminal() = False +History() = [8] +HistoryString() = "8" +IsChanceNode() = True +IsSimultaneousNode() = False +CurrentPlayer() = -1 +InformationStateString(0) = "Num Total Tricks: 8\n" +InformationStateString(1) = "Num Total Tricks: 8\n" +InformationStateString(2) = "Num Total Tricks: 8\n" +InformationStateTensor(0): zeros(4704) +InformationStateTensor(1): zeros(4704) +InformationStateTensor(2): zeros(4704) +ChanceOutcomes() = [(0,0.333333), (1,0.333333), (2,0.333333)] +LegalActions() = [0, 1, 2] +StringLegalActions() = ["0", "1", "2"] + +# Apply action "2" +action: 2 + +# State 2 +# Apply action "D7" +action: 21 + +# State 3 +# Apply action "C4" +action: 8 + +# State 4 +# Apply action "D5" +action: 13 + +# State 5 +# Apply action "ST" +action: 34 + +# State 6 +# Apply action "D2" +action: 1 + +# State 7 +# Apply action "C3" +action: 4 + +# State 8 +# Apply action "H9" +action: 31 + +# State 9 +# Apply action "SQ" +action: 42 + +# State 10 +# Apply action "D8" +action: 25 + +# State 11 +# Apply action "D3" +action: 5 + +# State 12 +# Apply action "S6" +action: 18 + +# State 13 +# Apply action "CQ" +action: 40 + +# State 14 +# Apply action "H4" +action: 11 + +# State 15 +# Apply action "C8" +action: 24 + +# State 16 +# Apply action "C5" +action: 12 + +# State 17 +# Apply action "H5" +action: 15 + +# State 18 +# Apply action "HT" +action: 35 + +# State 19 +# Apply action "S3" +action: 6 + +# State 20 +# Apply action "C9" +action: 28 + +# State 21 +# Apply action "H2" +action: 3 + +# State 22 +# Apply action "C7" +action: 20 + +# State 23 +# Apply action "CA" +action: 48 + +# State 24 +# Apply action "SA" +action: 50 + +# State 25 +# Apply action "H8" +action: 27 + +# State 26 +# Apply action "H6" +action: 19 + +# State 27 +# Phase: Bid +# Num Total Tricks: 8 +# Dealer: 2 +# Player: 0 +# C: A9 +# D: 73 +# S: T +# H: 954 +# +# Player: 1 +# C: 84 +# D: 2 +# S: AQ6 +# H: T2 +# +# Player: 2 +# C: Q753 +# D: 85 +# S: 3 +# H: 8 +# +# Trump: H6 +# +# +# Bids: -1 -1 -1 +# Tricks Won: 0 0 0 +IsTerminal() = False +History() = [8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19] +HistoryString() = "8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 0\n C: A9\n D: 73\n S: T\n H: 954\n\n\nBids: -1 -1 -1 \nTricks Won: 0 0 0 \n" +InformationStateString(1) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 1\n C: 84\n D: 2\n S: AQ6\n H: T2\n\n\nBids: -1 -1 -1 \nTricks Won: 0 0 0 \n" +InformationStateString(2) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 2\n C: Q753\n D: 85\n S: 3\n H: 8\n\n\nBids: -1 -1 -1 \nTricks Won: 0 0 0 \n" +InformationStateTensor(0): binvec(4704, 0x10010000100000000041104092000804110409200088000100002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x10010000100000000508020801020250802080102028000100002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x100100001000000000a0c0850008000a0c0850008008000100002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [52, 53, 54, 55, 56, 57, 58, 59, 60] +StringLegalActions() = ["0", "1", "2", "3", "4", "5", "6", "7", "8"] + +# Apply action "7" +action: 59 + +# State 28 +# Phase: Bid +# Num Total Tricks: 8 +# Dealer: 2 +# Player: 0 +# C: A9 +# D: 73 +# S: T +# H: 954 +# +# Player: 1 +# C: 84 +# D: 2 +# S: AQ6 +# H: T2 +# +# Player: 2 +# C: Q753 +# D: 85 +# S: 3 +# H: 8 +# +# Trump: H6 +# +# +# Bids: 7 -1 -1 +# Tricks Won: 0 0 0 +IsTerminal() = False +History() = [8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59] +HistoryString() = "8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 0\n C: A9\n D: 73\n S: T\n H: 954\n\n\nBids: 7 -1 -1 \nTricks Won: 0 0 0 \n" +InformationStateString(1) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 1\n C: 84\n D: 2\n S: AQ6\n H: T2\n\n\nBids: 7 -1 -1 \nTricks Won: 0 0 0 \n" +InformationStateString(2) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 2\n C: Q753\n D: 85\n S: 3\n H: 8\n\n\nBids: 7 -1 -1 \nTricks Won: 0 0 0 \n" +InformationStateTensor(0): binvec(4704, 0x10010000100000000041104092000804110409200080080100002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x10010000100000000508020801020250802080102020080100002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x100100001000000000a0c0850008000a0c0850008000080100002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [52, 53, 54, 55, 56, 57, 58, 59, 60] +StringLegalActions() = ["0", "1", "2", "3", "4", "5", "6", "7", "8"] + +# Apply action "8" +action: 60 + +# State 29 +# Phase: Bid +# Num Total Tricks: 8 +# Dealer: 2 +# Player: 0 +# C: A9 +# D: 73 +# S: T +# H: 954 +# +# Player: 1 +# C: 84 +# D: 2 +# S: AQ6 +# H: T2 +# +# Player: 2 +# C: Q753 +# D: 85 +# S: 3 +# H: 8 +# +# Trump: H6 +# +# +# Bids: 7 8 -1 +# Tricks Won: 0 0 0 +IsTerminal() = False +History() = [8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59, 60] +HistoryString() = "8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59, 60" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 0\n C: A9\n D: 73\n S: T\n H: 954\n\n\nBids: 7 8 -1 \nTricks Won: 0 0 0 \n" +InformationStateString(1) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 1\n C: 84\n D: 2\n S: AQ6\n H: T2\n\n\nBids: 7 8 -1 \nTricks Won: 0 0 0 \n" +InformationStateString(2) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 2\n C: Q753\n D: 85\n S: 3\n H: 8\n\n\nBids: 7 8 -1 \nTricks Won: 0 0 0 \n" +InformationStateTensor(0): binvec(4704, 0x10010000100000000041104092000804110409200080080000802000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x10010000100000000508020801020250802080102020080000802000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x100100001000000000a0c0850008000a0c0850008000080000802000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [52, 53, 54, 55, 56, 57, 58, 59, 60] +StringLegalActions() = ["0", "1", "2", "3", "4", "5", "6", "7", "8"] + +# Apply action "5" +action: 57 + +# State 30 +# Phase: Play +# Num Total Tricks: 8 +# Dealer: 2 +# Player: 0 +# C: A9 +# D: 73 +# S: T +# H: 954 +# +# Player: 1 +# C: 84 +# D: 2 +# S: AQ6 +# H: T2 +# +# Player: 2 +# C: Q753 +# D: 85 +# S: 3 +# H: 8 +# +# Trump: H6 +# +# +# Bids: 7 8 5 +# Tricks Won: 0 0 0 +IsTerminal() = False +History() = [8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59, 60, 57] +HistoryString() = "8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59, 60, 57" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 0\n C: A9\n D: 73\n S: T\n H: 954\n\n\nBids: 7 8 5 \nTricks Won: 0 0 0 \n" +InformationStateString(1) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 1\n C: 84\n D: 2\n S: AQ6\n H: T2\n\n\nBids: 7 8 5 \nTricks Won: 0 0 0 \n" +InformationStateString(2) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 2\n C: Q753\n D: 85\n S: 3\n H: 8\n\n\nBids: 7 8 5 \nTricks Won: 0 0 0 \n" +InformationStateTensor(0): binvec(4704, 0x10010000100000000041104092000804110409200080080000800080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x10010000100000000508020801020250802080102020080000800080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x100100001000000000a0c0850008000a0c0850008000080000800080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [5, 11, 15, 21, 28, 31, 34, 48] +StringLegalActions() = ["D3", "H4", "H5", "D7", "C9", "H9", "ST", "CA"] + +# Apply action "ST" +action: 34 + +# State 31 +# Phase: Play +# Num Total Tricks: 8 +# Dealer: 2 +# Player: 0 +# C: A9 +# D: 73 +# S: +# H: 954 +# +# Player: 1 +# C: 84 +# D: 2 +# S: AQ6 +# H: T2 +# +# Player: 2 +# C: Q753 +# D: 85 +# S: 3 +# H: 8 +# +# Trump: H6 +# +# Tricks: +# 0 1 2 0 1 +# ST +# +# Bids: 7 8 5 +# Tricks Won: 0 0 0 +IsTerminal() = False +History() = [8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59, 60, 57, 34] +HistoryString() = "8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59, 60, 57, 34" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 0\n C: A9\n D: 73\n S: \n H: 954\n\nTricks:\n0 1 2 0 1 \nST \n\nBids: 7 8 5 \nTricks Won: 0 0 0 \n" +InformationStateString(1) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 1\n C: 84\n D: 2\n S: AQ6\n H: T2\n\nTricks:\n0 1 2 0 1 \nST \n\nBids: 7 8 5 \nTricks Won: 0 0 0 \n" +InformationStateString(2) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 2\n C: Q753\n D: 85\n S: 3\n H: 8\n\nTricks:\n0 1 2 0 1 \nST \n\nBids: 7 8 5 \nTricks Won: 0 0 0 \n" +InformationStateTensor(0): binvec(4704, 0x10010000100000000041104092000804110409000080080000800080000000000000000000000020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x10010000100000000508020801020250802080102020080000800080000000000000000000000020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x100100001000000000a0c0850008000a0c0850008000080000800080000000000000000000000020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [18, 42, 50] +StringLegalActions() = ["S6", "SQ", "SA"] + +# Apply action "SA" +action: 50 + +# State 32 +# Phase: Play +# Num Total Tricks: 8 +# Dealer: 2 +# Player: 0 +# C: A9 +# D: 73 +# S: +# H: 954 +# +# Player: 1 +# C: 84 +# D: 2 +# S: Q6 +# H: T2 +# +# Player: 2 +# C: Q753 +# D: 85 +# S: 3 +# H: 8 +# +# Trump: H6 +# +# Tricks: +# 0 1 2 0 1 +# ST SA +# +# Bids: 7 8 5 +# Tricks Won: 0 0 0 +IsTerminal() = False +History() = [8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59, 60, 57, 34, 50] +HistoryString() = "8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59, 60, 57, 34, 50" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 0\n C: A9\n D: 73\n S: \n H: 954\n\nTricks:\n0 1 2 0 1 \nST SA \n\nBids: 7 8 5 \nTricks Won: 0 0 0 \n" +InformationStateString(1) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 1\n C: 84\n D: 2\n S: Q6\n H: T2\n\nTricks:\n0 1 2 0 1 \nST SA \n\nBids: 7 8 5 \nTricks Won: 0 0 0 \n" +InformationStateString(2) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 2\n C: Q753\n D: 85\n S: 3\n H: 8\n\nTricks:\n0 1 2 0 1 \nST SA \n\nBids: 7 8 5 \nTricks Won: 0 0 0 \n" +InformationStateTensor(0): binvec(4704, 0x10010000100000000041104092000804110409000080080000800080000000000000000000000020000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x10010000100000000508020801020250802080102000080000800080000000000000000000000020000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x100100001000000000a0c0850008000a0c0850008000080000800080000000000000000000000020000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [6] +StringLegalActions() = ["S3"] + +# Apply action "S3" +action: 6 + +# State 33 +# Phase: Play +# Num Total Tricks: 8 +# Dealer: 2 +# Player: 0 +# C: A9 +# D: 73 +# S: +# H: 954 +# +# Player: 1 +# C: 84 +# D: 2 +# S: Q6 +# H: T2 +# +# Player: 2 +# C: Q753 +# D: 85 +# S: +# H: 8 +# +# Trump: H6 +# +# Tricks: +# 0 1 2 0 1 +# ST SA S3 +# +# Bids: 7 8 5 +# Tricks Won: 0 1 0 +IsTerminal() = False +History() = [8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59, 60, 57, 34, 50, 6] +HistoryString() = "8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59, 60, 57, 34, 50, 6" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 1 +InformationStateString(0) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 0\n C: A9\n D: 73\n S: \n H: 954\n\nTricks:\n0 1 2 0 1 \nST SA S3 \n\nBids: 7 8 5 \nTricks Won: 0 1 0 \n" +InformationStateString(1) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 1\n C: 84\n D: 2\n S: Q6\n H: T2\n\nTricks:\n0 1 2 0 1 \nST SA S3 \n\nBids: 7 8 5 \nTricks Won: 0 1 0 \n" +InformationStateString(2) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 2\n C: Q753\n D: 85\n S: \n H: 8\n\nTricks:\n0 1 2 0 1 \nST SA S3 \n\nBids: 7 8 5 \nTricks Won: 0 1 0 \n" +InformationStateTensor(0): binvec(4704, 0x10010000100000000041104092000804110409000080080000800080000002000000000000000020000000000000000202000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x10010000100000000508020801020250802080102000080000800080000002000000000000000020000000000000000202000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x100100001000000000a0c085000800080c0850008000080000800080000002000000000000000020000000000000000202000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [1, 3, 8, 18, 24, 35, 42] +StringLegalActions() = ["D2", "H2", "C4", "S6", "C8", "HT", "SQ"] + +# Apply action "H2" +action: 3 + +# State 34 +# Phase: Play +# Num Total Tricks: 8 +# Dealer: 2 +# Player: 0 +# C: A9 +# D: 73 +# S: +# H: 954 +# +# Player: 1 +# C: 84 +# D: 2 +# S: Q6 +# H: T +# +# Player: 2 +# C: Q753 +# D: 85 +# S: +# H: 8 +# +# Trump: H6 +# +# Tricks: +# 0 1 2 0 1 +# ST SA S3 +# H2 +# +# Bids: 7 8 5 +# Tricks Won: 0 1 0 +IsTerminal() = False +History() = [8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59, 60, 57, 34, 50, 6, 3] +HistoryString() = "8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59, 60, 57, 34, 50, 6, 3" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 2 +InformationStateString(0) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 0\n C: A9\n D: 73\n S: \n H: 954\n\nTricks:\n0 1 2 0 1 \nST SA S3 \n H2 \n\nBids: 7 8 5 \nTricks Won: 0 1 0 \n" +InformationStateString(1) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 1\n C: 84\n D: 2\n S: Q6\n H: T\n\nTricks:\n0 1 2 0 1 \nST SA S3 \n H2 \n\nBids: 7 8 5 \nTricks Won: 0 1 0 \n" +InformationStateString(2) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 2\n C: Q753\n D: 85\n S: \n H: 8\n\nTricks:\n0 1 2 0 1 \nST SA S3 \n H2 \n\nBids: 7 8 5 \nTricks Won: 0 1 0 \n" +InformationStateTensor(0): binvec(4704, 0x10010000100000000041104092000804110409000080080000800080000002000000000000000020000000000000000202000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x10010000100000000508020801020240802080102000080000800080000002000000000000000020000000000000000202000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x100100001000000000a0c085000800080c0850008000080000800080000002000000000000000020000000000000000202000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [27] +StringLegalActions() = ["H8"] + +# Apply action "H8" +action: 27 + +# State 35 +# Phase: Play +# Num Total Tricks: 8 +# Dealer: 2 +# Player: 0 +# C: A9 +# D: 73 +# S: +# H: 954 +# +# Player: 1 +# C: 84 +# D: 2 +# S: Q6 +# H: T +# +# Player: 2 +# C: Q753 +# D: 85 +# S: +# H: +# +# Trump: H6 +# +# Tricks: +# 0 1 2 0 1 +# ST SA S3 +# H2 H8 +# +# Bids: 7 8 5 +# Tricks Won: 0 1 0 +IsTerminal() = False +History() = [8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59, 60, 57, 34, 50, 6, 3, 27] +HistoryString() = "8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59, 60, 57, 34, 50, 6, 3, 27" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = 0 +InformationStateString(0) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 0\n C: A9\n D: 73\n S: \n H: 954\n\nTricks:\n0 1 2 0 1 \nST SA S3 \n H2 H8 \n\nBids: 7 8 5 \nTricks Won: 0 1 0 \n" +InformationStateString(1) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 1\n C: 84\n D: 2\n S: Q6\n H: T\n\nTricks:\n0 1 2 0 1 \nST SA S3 \n H2 H8 \n\nBids: 7 8 5 \nTricks Won: 0 1 0 \n" +InformationStateString(2) = "Num Total Tricks: 8\nDealer: 2\nNum Cards Dealt: 25\nTrump: H6\nPlayer: 2\n C: Q753\n D: 85\n S: \n H: \n\nTricks:\n0 1 2 0 1 \nST SA S3 \n H2 H8 \n\nBids: 7 8 5 \nTricks Won: 0 1 0 \n" +InformationStateTensor(0): binvec(4704, 0x10010000100000000041104092000804110409000080080000800080000002000000000000000020000000000000000202000000000000000000000000000000000000000000000000001000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(1): binvec(4704, 0x10010000100000000508020801020240802080102000080000800080000002000000000000000020000000000000000202000000000000000000000000000000000000000000000000001000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +InformationStateTensor(2): binvec(4704, 0x100100001000000000a0c085000800080c0840008000080000800080000002000000000000000020000000000000000202000000000000000000000000000000000000000000000000001000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000) +Rewards() = [0, 0, 0] +Returns() = [0, 0, 0] +LegalActions() = [11, 15, 31] +StringLegalActions() = ["H4", "H5", "H9"] + +# Apply action "H9" +action: 31 + +# State 36 +# Apply action "CA" +action: 48 + +# State 37 +# Apply action "C8" +action: 24 + +# State 38 +# Apply action "C7" +action: 20 + +# State 39 +# Apply action "C9" +action: 28 + +# State 40 +# Apply action "C4" +action: 8 + +# State 41 +# Apply action "C5" +action: 12 + +# State 42 +# Apply action "H5" +action: 15 + +# State 43 +# Apply action "HT" +action: 35 + +# State 44 +# Apply action "D8" +action: 25 + +# State 45 +# Apply action "S6" +action: 18 + +# State 46 +# Apply action "D5" +action: 13 + +# State 47 +# Apply action "H4" +action: 11 + +# State 48 +# Apply action "D3" +action: 5 + +# State 49 +# Apply action "D2" +action: 1 + +# State 50 +# Apply action "CQ" +action: 40 + +# State 51 +# Apply action "D7" +action: 21 + +# State 52 +# Apply action "SQ" +action: 42 + +# State 53 +# Apply action "C3" +action: 4 + +# State 54 +# Phase: GameOver +# Num Total Tricks: 8 +# Dealer: 2 +# Player: 0 +# C: A9 +# D: 73 +# S: T +# H: 954 +# +# Player: 1 +# C: 84 +# D: 2 +# S: AQ6 +# H: T2 +# +# Player: 2 +# C: Q753 +# D: 85 +# S: 3 +# H: 8 +# +# Trump: H6 +# +# Tricks: +# 0 1 2 0 1 +# ST SA S3 +# H2 H8 H9 +# CA C8 C7 +# C9 C4 C5 +# H5 HT D8 +# S6 D5 H4 +# D3 D2 CQ +# D7 SQ C3 +# +# Bids: 7 8 5 +# Tricks Won: 6 2 0 +# Score: 6 2 0 +IsTerminal() = True +History() = [8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59, 60, 57, 34, 50, 6, 3, 27, 31, 48, 24, 20, 28, 8, 12, 15, 35, 25, 18, 13, 11, 5, 1, 40, 21, 42, 4] +HistoryString() = "8, 2, 21, 8, 13, 34, 1, 4, 31, 42, 25, 5, 18, 40, 11, 24, 12, 15, 35, 6, 28, 3, 20, 48, 50, 27, 19, 59, 60, 57, 34, 50, 6, 3, 27, 31, 48, 24, 20, 28, 8, 12, 15, 35, 25, 18, 13, 11, 5, 1, 40, 21, 42, 4" +IsChanceNode() = False +IsSimultaneousNode() = False +CurrentPlayer() = -4 +InformationStateString(0) = "Phase: GameOver\nNum Total Tricks: 8\nDealer: 2\nPlayer: 0\n C: A9\n D: 73\n S: T\n H: 954\n\nPlayer: 1\n C: 84\n D: 2\n S: AQ6\n H: T2\n\nPlayer: 2\n C: Q753\n D: 85\n S: 3\n H: 8\n\nTrump: H6\n\nTricks:\n0 1 2 0 1 \nST SA S3 \n H2 H8 H9 \nCA C8 C7 \nC9 C4 C5 \nH5 HT D8 \n S6 D5 H4 \nD3 D2 CQ \nD7 SQ C3 \n\nBids: 7 8 5 \nTricks Won: 6 2 0 \nScore: 6 2 0 \n" +InformationStateString(1) = "Phase: GameOver\nNum Total Tricks: 8\nDealer: 2\nPlayer: 0\n C: A9\n D: 73\n S: T\n H: 954\n\nPlayer: 1\n C: 84\n D: 2\n S: AQ6\n H: T2\n\nPlayer: 2\n C: Q753\n D: 85\n S: 3\n H: 8\n\nTrump: H6\n\nTricks:\n0 1 2 0 1 \nST SA S3 \n H2 H8 H9 \nCA C8 C7 \nC9 C4 C5 \nH5 HT D8 \n S6 D5 H4 \nD3 D2 CQ \nD7 SQ C3 \n\nBids: 7 8 5 \nTricks Won: 6 2 0 \nScore: 6 2 0 \n" +InformationStateString(2) = "Phase: GameOver\nNum Total Tricks: 8\nDealer: 2\nPlayer: 0\n C: A9\n D: 73\n S: T\n H: 954\n\nPlayer: 1\n C: 84\n D: 2\n S: AQ6\n H: T2\n\nPlayer: 2\n C: Q753\n D: 85\n S: 3\n H: 8\n\nTrump: H6\n\nTricks:\n0 1 2 0 1 \nST SA S3 \n H2 H8 H9 \nCA C8 C7 \nC9 C4 C5 \nH5 HT D8 \n S6 D5 H4 \nD3 D2 CQ \nD7 SQ C3 \n\nBids: 7 8 5 \nTricks Won: 6 2 0 \nScore: 6 2 0 \n" +InformationStateTensor(0): zeros(4704) +InformationStateTensor(1): zeros(4704) +InformationStateTensor(2): zeros(4704) +Rewards() = [3.33333, -0.666667, -2.66667] +Returns() = [3.33333, -0.666667, -2.66667] diff --git a/scenarios/bargaining/open_spiel/open_spiel/julia/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/julia/CMakeLists.txt new file mode 100644 index 0000000..f0c1048 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/julia/CMakeLists.txt @@ -0,0 +1,18 @@ +find_package(JlCxx REQUIRED) + +add_library(spieljl SHARED wrapper/spieljl.cc ${OPEN_SPIEL_OBJECTS}) +target_link_libraries(spieljl PRIVATE JlCxx::cxxwrap_julia JlCxx::cxxwrap_julia_stl) +set_target_properties(spieljl PROPERTIES + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CXX_EXTENSIONS ON +) + +install(TARGETS spieljl + RUNTIME DESTINATION lib + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib +) + +add_test(NAME julia_test COMMAND julia --project=${CMAKE_CURRENT_SOURCE_DIR} + -e "using Pkg; Pkg.build(); Pkg.test()") diff --git a/scenarios/bargaining/open_spiel/open_spiel/julia/Project.toml b/scenarios/bargaining/open_spiel/open_spiel/julia/Project.toml new file mode 100644 index 0000000..56dc33b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/julia/Project.toml @@ -0,0 +1,19 @@ +name = "OpenSpiel" +uuid = "7e5d8dc8-489f-4be6-9273-f4c3010e6ec1" +authors = ["Jun Tian "] +version = "0.1.1" + +[deps] +CxxWrap = "1f15a43c-97ca-5a2a-ae31-89f07a497df4" + +[compat] +CxxWrap = "0.11.2" +julia = "1.3" + +[extras] +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["StatsBase", "Test", "Random"] diff --git a/scenarios/bargaining/open_spiel/open_spiel/julia/README.md b/scenarios/bargaining/open_spiel/open_spiel/julia/README.md new file mode 100644 index 0000000..0df3b8b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/julia/README.md @@ -0,0 +1,10 @@ +# Julia OpenSpiel + +For general usage, please refer +[OpenSpiel on Julia](https://openspiel.readthedocs.io/en/latest/julia.html). + +For developers, the basic idea of this Julia wrapper is that, a shared lib named +`libspieljl.so` is built with the help of +[CxxWrap.jl](https://github.com/JuliaInterop/CxxWrap.jl) and then it is wrapped +in the `OpenSpiel` module. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/julia/deps/deps.jl b/scenarios/bargaining/open_spiel/open_spiel/julia/deps/deps.jl new file mode 100644 index 0000000..0aa6cae --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/julia/deps/deps.jl @@ -0,0 +1 @@ +const LIB_OPEN_SPIEL = "$(dirname(@__FILE__))/../../../build/julia/libspieljl" diff --git a/scenarios/bargaining/open_spiel/open_spiel/julia/src/OpenSpiel.jl b/scenarios/bargaining/open_spiel/open_spiel/julia/src/OpenSpiel.jl new file mode 100644 index 0000000..379ebe8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/julia/src/OpenSpiel.jl @@ -0,0 +1,32 @@ +module OpenSpiel + +include("$(@__DIR__)/../deps/deps.jl") + +using CxxWrap +import CxxWrap:argument_overloads +import Base: step, first, last + +struct PlayerAction + player::Int32 + action::Int64 +end + +@wrapmodule(LIB_OPEN_SPIEL) + +include("patch.jl") + +# export all +for n in names(@__MODULE__(); all=true) + if Base.isidentifier(n) && + !startswith(String(n), "_") && + n ∉ (Symbol(@__MODULE__()), :eval, :include) + @eval export $n + end +end + + +function __init__() + @initcxx +end + +end diff --git a/scenarios/bargaining/open_spiel/open_spiel/julia/src/patch.jl b/scenarios/bargaining/open_spiel/open_spiel/julia/src/patch.jl new file mode 100644 index 0000000..14ddc89 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/julia/src/patch.jl @@ -0,0 +1,68 @@ +Base.show(io::IO, g::CxxWrap.StdLib.SharedPtrAllocated{Game}) = print(io, to_string(g)) +Base.show(io::IO, s::CxxWrap.StdLib.UniquePtrAllocated{State}) = print(io, to_string(s)) +Base.show(io::IO, gp::Union{GameParameterAllocated, GameParameterDereferenced}) = print(io, to_repr_string(gp)) + +GameParameter(x::Int) = GameParameter(Ref(Int32(x))) + +Base.copy(s::CxxWrap.StdLib.UniquePtrAllocated{State}) = deepcopy(s) +Base.deepcopy(s::CxxWrap.StdLib.UniquePtrAllocated{State}) = clone(s) + +if Sys.KERNEL == :Linux + function apply_action(state, actions::AbstractVector{<:Number}) + A = StdVector{CxxLong}() + for a in actions + push!(A, a) + end + apply_actions(state, A) + end +elseif Sys.KERNEL == :Darwin + function apply_action(state, actions::AbstractVector{<:Number}) + A = StdVector{Int}() + for a in actions + push!(A, a) + end + apply_actions(state, A) + end +else + @error "unsupported system" +end + +function deserialize_game_and_state(s::CxxWrap.StdLib.StdStringAllocated) + game_and_state = _deserialize_game_and_state(s) + first(game_and_state), last(game_and_state) +end + +Base.values(m::StdMap) = [m[k] for k in keys(m)] + +function StdMap{K, V}(kw) where {K, V} + ps = StdMap{K, V}() + for (k, v) in kw + ps[convert(K, k)] = convert(V, v) + end + ps +end + +function Base.show(io::IO, ps::StdMapAllocated{K, V}) where {K, V} + println(io, "StdMap{$K,$V} with $(length(ps)) entries:") + for k in keys(ps) + println(io, " $k => $(ps[k])") + end +end + +function load_game(s::Union{String, CxxWrap.StdLib.StdStringAllocated}; kw...) + if length(kw) == 0 + _load_game(s) + else + ps = [StdString(string(k)) => v for (k,v) in kw] + _load_game(s, StdMap{StdString, GameParameter}(ps)) + end +end + +function load_game_as_turn_based(s::Union{String, CxxWrap.StdLib.StdStringAllocated}; kw...) + if length(kw) == 0 + _load_game_as_turn_based(s) + else + ps = [StdString(string(k)) => v for (k,v) in kw] + _load_game_as_turn_based(s, StdMap{StdString, GameParameter}(ps)) + end +end diff --git a/scenarios/bargaining/open_spiel/open_spiel/julia/test/bots.jl b/scenarios/bargaining/open_spiel/open_spiel/julia/test/bots.jl new file mode 100644 index 0000000..d9492c0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/julia/test/bots.jl @@ -0,0 +1,88 @@ +@testset "bots" begin + +@testset "MCTSBot" begin + UCT_C = 2. + + init_bot(game, max_simulations, evaluator) = MCTSBot(game, evaluator, UCT_C, max_simulations, 5, true, 42, false, UCT, 0., 0.) + + @testset "can play tic_tac_toe" begin + game = load_game("tic_tac_toe") + max_simulations = 100 + evaluator = random_rollout_evaluator_factory(20, 42) + bot0 = init_bot(game, max_simulations, evaluator) + bot1 = init_bot(game, max_simulations, evaluator) + results = evaluate_bots(new_initial_state(game), [bot0, bot1], 42) + @test results[1] + results[2] == 0 + end + + @testset "can play single player" begin + game = load_game("catch") + max_simulations = 100 + evaluator = random_rollout_evaluator_factory(20, 42) + bot = init_bot(game, max_simulations, evaluator) + results = evaluate_bots(new_initial_state(game), [bot], 42) + @test results[] > 0 + end + + @testset "play three player stochastic games" begin + game = load_game("pig(players=3,winscore=20,horizon=30)") + max_simulations = 1000 + evaluator = random_rollout_evaluator_factory(20, 42) + bot0 = init_bot(game, max_simulations, evaluator) + bot1 = init_bot(game, max_simulations, evaluator) + bot2 = init_bot(game, max_simulations, evaluator) + results = evaluate_bots(new_initial_state(game), [bot0, bot1, bot2], 42) + @test sum(results) == 0 + end + + function get_action_by_str(state, action_str) + for action in legal_actions(state) + if action_str == action_to_string(state, current_player(state), action) + return action + end + end + @error "Illegal action: $action_str" + end + + function search_tic_tac_toe_state(initial_actions) + game = load_game("tic_tac_toe") + state = new_initial_state(game) + for action_str in split(initial_actions, " ") + apply_action(state, get_action_by_str(state, action_str)) + end + evaluator = random_rollout_evaluator_factory(20, 42) + bot = MCTSBot(game, evaluator, UCT_C, 10000, 10, true, 42, false, UCT, 0., 0.) + mcts_search(bot, state), state + end + + @testset "solve draw" begin + root, state = search_tic_tac_toe_state("x(1,1) o(0,0) x(2,2)") + @test to_string(state) == "o..\n.x.\n..x" + @test get_outcome(root)[get_player(root)+1] == 0 + for c in get_children(root) + @test get_outcome(c)[get_player(c)+1] <= 0 + end + best = best_child(root)[] + @test get_outcome(best)[get_player(best)+1] == 0 + end + + @testset "solve loss" begin + root, state = search_tic_tac_toe_state("x(1,1) o(0,0) x(2,2) o(0,1) x(0,2)") + @test to_string(state) == "oox\n.x.\n..x" + @test get_outcome(root)[get_player(root)+1] == -1 + for c in get_children(root) + @test get_outcome(c)[get_player(c)+1] == -1 + end + end + + @testset "solve win" begin + root, state = search_tic_tac_toe_state("x(0,1) o(2,2)") + @test to_string(state) == ".x.\n...\n..o" + @test get_outcome(root)[get_player(root)+1] == 1 + best = best_child(root)[] + @test get_outcome(best)[get_player(best)+1] == 1 + @test action_to_string(state, get_player(best), get_action(best)) == "x(0,2)" + end +end + +end diff --git a/scenarios/bargaining/open_spiel/open_spiel/julia/test/cfr.jl b/scenarios/bargaining/open_spiel/open_spiel/julia/test/cfr.jl new file mode 100644 index 0000000..6500871 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/julia/test/cfr.jl @@ -0,0 +1,38 @@ +@testset "cfr" begin + +function test_nash_kuhn_poker(game, policy) + game_values = expected_returns(new_initial_state(game), policy, -1) + + # 1/18 is the Nash value. See https://en.wikipedia.org/wiki/Kuhn_poker + nash_value = 1.0 / 18.0 + eps = 1e-3 + @test length(game_values) == 2 + @test isapprox(game_values[1], -nash_value, atol=eps) + @test isapprox(game_values[2], nash_value, atol=eps) +end + +test_exploitability_kuhn_poker(game, policy) = @test exploitability(game, policy) <= 0.05 + +@testset "CFRSolver" begin + game = load_game("kuhn_poker") + solver = CFRSolver(game) + for _ in 1:300 + evaluate_and_update_policy(solver) + end + avg_policy = average_policy(solver) + test_nash_kuhn_poker(game, avg_policy) + test_exploitability_kuhn_poker(game, avg_policy) +end + +@testset "CFRPlusSolver" begin + game = load_game("kuhn_poker") + solver = CFRPlusSolver(game) + for _ in 1:200 + evaluate_and_update_policy(solver) + end + avg_policy = average_policy(solver) + test_nash_kuhn_poker(game, avg_policy) + test_exploitability_kuhn_poker(game, avg_policy) +end + +end diff --git a/scenarios/bargaining/open_spiel/open_spiel/julia/test/games_api.jl b/scenarios/bargaining/open_spiel/open_spiel/julia/test/games_api.jl new file mode 100644 index 0000000..93b53d1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/julia/test/games_api.jl @@ -0,0 +1,84 @@ +@testset "Games" begin + +@testset "registered game attributes" begin + games = Dict(short_name(game_info) => game_info for game_info in registered_games()) + @test dynamics(games["kuhn_poker"]) == SEQUENTIAL + @test chance_mode(games["kuhn_poker"]) == EXPLICIT_STOCHASTIC + @test information(games["kuhn_poker"]) == IMPERFECT_INFORMATION + @test utility(games["kuhn_poker"]) == ZERO_SUM + @test min_num_players(games["kuhn_poker"]) == 2 +end + +@testset "create game" begin + game = load_game("kuhn_poker") + game_info = get_type(game) + @test information(game_info) == IMPERFECT_INFORMATION + @test num_players(game) == 2 +end + +@testset "play kuhn_poker" begin + game = load_game("kuhn_poker") + state = new_initial_state(game) + @test is_chance_node(state) == true + @test is_initial_state(state) == true + @test chance_outcomes(state) == [0 => 1/3, 1 => 1/3, 2 => 1/3] + + apply_action(state, 1) + @test is_chance_node(state) == true + @test is_initial_state(state) == false + @test chance_outcomes(state) == [0 => 1/2, 2 => 1/2] + + apply_action(state, 2) + @test is_chance_node(state) == false + @test is_initial_state(state) == false + @test legal_actions(state) == [0, 1] + + @test length(full_history(state)) == 2 +end + +@testset "tic_tac_toe" begin + game = load_game("tic_tac_toe") + state = new_initial_state(game) + @test is_chance_node(state) == false + @test is_terminal(state) == false + @test is_initial_state(state) == true + @test legal_actions(state) == 0:8 +end + +@testset "GameParameter" begin + io = IOBuffer() + print(io, GameParameter(true)) + @test String(take!(io)) == "GameParameter(bool_value=True)" + print(io, GameParameter(false)) + @test String(take!(io)) == "GameParameter(bool_value=False)" + print(io, GameParameter("one")) + @test String(take!(io)) == "GameParameter(string_value='one')" + print(io, GameParameter(1)) + @test String(take!(io)) == "GameParameter(int_value=1)" + print(io, GameParameter(1.0)) + @test String(take!(io)) == "GameParameter(double_value=1)" + print(io, GameParameter(1.2)) + @test String(take!(io)) == "GameParameter(double_value=1.2)" +end + +@testset "simultaneous game history" begin + game = load_game("coop_box_pushing") + state = new_initial_state(game) + apply_action(state, 0) + state2 = new_initial_state(game) + apply_action(state2, fill(0, num_players(game))) + @test history(state) == history(state2) +end + +@testset "Matrixrame" begin + matrix_blotto = load_matrix_game("blotto") + @test num_rows(matrix_blotto) == 66 + @test num_cols(matrix_blotto) == 66 + + kuhn_game = load_game("kuhn_poker") + kuhn_matrix_game = extensive_to_matrix_game(kuhn_game) + @test num_rows(kuhn_matrix_game) == 64 + @test num_cols(kuhn_matrix_game) == 64 +end + +end diff --git a/scenarios/bargaining/open_spiel/open_spiel/julia/test/games_simulation.jl b/scenarios/bargaining/open_spiel/open_spiel/julia/test/games_simulation.jl new file mode 100644 index 0000000..612dfbd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/julia/test/games_simulation.jl @@ -0,0 +1,141 @@ +@testset "games simulation" begin + +MAX_ACTIONS_PER_GAME = 1000 + +SPIEL_GAMES_LIST = registered_games() + +SPIEL_LOADABLE_GAMES_LIST = [ + g for g in SPIEL_GAMES_LIST if default_loadable(g) +] + +@test length(SPIEL_LOADABLE_GAMES_LIST) >= 38 + +SPIEL_SIMULTANEOUS_GAMES_LIST = [ + g for g in SPIEL_LOADABLE_GAMES_LIST + if dynamics(g) == SIMULTANEOUS +] + +@test length(SPIEL_SIMULTANEOUS_GAMES_LIST) >= 14 + +SPIEL_MULTIPLAYER_GAMES_LIST = [ + (g, p) + for g in SPIEL_LOADABLE_GAMES_LIST + for p in max(min_num_players(g), 2) : min(max_num_players(g), 6) + if (max_num_players(g) > 2) && + (max_num_players(g) > min_num_players(g)) && + (short_name(g) != "tiny_hanabi") && # default payoff only works for 2p + (short_name(g) != "universal_poker") +] + +@test length(SPIEL_MULTIPLAYER_GAMES_LIST) >= 35 + +function apply_action_test_clone(state, action) + state_copy = copy(state) + @test string(state) == string(state_copy) + @test history(state) == history(state_copy) + + apply_action(state, action) + apply_action(state_copy, action) + + @test string(state) == string(state_copy) + @test history(state) == history(state_copy) +end + +function serialize_deserialize(game, state) + ser_str = serialize_game_and_state(game, state) + new_game, new_state = deserialize_game_and_state(ser_str) + @test string(game) == string(new_game) + @test string(state) == string(new_state) +end + +function simulate_game(game) + @info "simulating game $(short_name(get_type(game)))" + min_u, max_u = min_utility(game), max_utility(game) + @test min_u < max_u + + state = new_initial_state(game) + total_actions = 0 + + next_serialize_check = 1 + + while !is_terminal(state) && (total_actions <= MAX_ACTIONS_PER_GAME) + total_actions += 1 + + # Serialize/Deserialize is costly. Only do it every power of 2 actions. + if total_actions >= next_serialize_check + serialize_deserialize(game, state) + next_serialize_check *= 2 + end + + # The state can be of four different types: chance node, + # simultaneous node, decision node or mean field node. + if is_chance_node(state) + # Chance node: sample an outcome + outcomes = chance_outcomes(state) + @test length(outcomes) > 0 + action_list, prob_list = zip(outcomes...) + action = action_list[sample(weights(collect(prob_list)))] + apply_action(state, action) + elseif is_simultaneous_node(state) + chosen_actions = [ + rand(legal_actions(state, pid-1)) + for pid in 1:num_players(game) + ] # in julia, index starts with 1 + # Apply the joint action and test cloning states. + apply_action_test_clone(state, chosen_actions) + elseif is_mean_field_node(state) + num_states = length(distribution_support(state)) + update_distribution( + state, StdVector([1. / num_states for _ in 1:num_states])) + else + @test is_player_node(state) + # Decision node: sample action for the single current player + action = rand(legal_actions(state, current_player(state))) + # Apply action and test state cloning. + apply_action_test_clone(state, action) + end + end + + @test total_actions > 0 + + if is_terminal(state) + # Check there are no legal actions. + @test length(legal_actions(state)) == 0 + for player in 0:(num_players(game)-1) + @test length(legal_actions(state, player)) == 0 + end + + utilities = returns(state) + + for u in utilities + @test u >= min_utility(game) + @test u <= max_utility(game) + end + + @info "Simulation of game $game" total_actions utilities + else + @info "Simulation of game $game terminated after maximum number of actions $MAX_ACTIONS_PER_GAME" + end +end + +for game_info in SPIEL_LOADABLE_GAMES_LIST + game = load_game(short_name(game_info)) + @test num_players(game) >= min_num_players(game_info) + @test num_players(game) <= max_num_players(game_info) + simulate_game(game) +end + +for game_info in SPIEL_SIMULTANEOUS_GAMES_LIST + converted_game = load_game_as_turn_based(short_name(game_info)) + simulate_game(converted_game) +end + +for (game_info, n) in SPIEL_MULTIPLAYER_GAMES_LIST + game = load_game(short_name(game_info); players=GameParameter(n)) + simulate_game(game) +end + +simulate_game(load_game("breakthrough(rows=6,columns=6)")) +simulate_game(load_game("pig(players=2,winscore=15)")) + +end diff --git a/scenarios/bargaining/open_spiel/open_spiel/julia/test/runtests.jl b/scenarios/bargaining/open_spiel/open_spiel/julia/test/runtests.jl new file mode 100644 index 0000000..60c9b97 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/julia/test/runtests.jl @@ -0,0 +1,43 @@ +using OpenSpiel +using StatsBase +using Test +using CxxWrap +using Random + +function evaluate_bots( + state::Union{Ptr{Nothing}, CxxPtr{<:State}, CxxWrap.StdLib.UniquePtrAllocated{State}}, + bots::Vector{<:Bot}, + seed::Int) + + rng = MersenneTwister(seed) + + for bot in bots + restart_at(bot, state) + end + + while !is_terminal(state) + if is_chance_node(state) + outcomes_with_probs = chance_outcomes(state) + actions, probs = zip(outcomes_with_probs...) + action = actions[sample(rng, weights(collect(probs)))] + apply_action(state, action) + elseif is_simultaneous_node(state) + chosen_actions = [ + legal_actions(state, pid)[pid+1] ? step(bot, state) : INVALID_ACTION + for (pid, bot) in enumerate(bots) + ] # in julia, index starts with 1 + apply_action(state, chosen_actions) + else + apply_action(state, step(bots[current_player(state) + 1], state)) + end + end + returns(state) +end + +@testset "OpenSpiel.jl" begin + include("games_api.jl") + include("games_simulation.jl") + include("bots.jl") + include("cfr.jl") + include("trajector.jl") +end diff --git a/scenarios/bargaining/open_spiel/open_spiel/julia/test/trajector.jl b/scenarios/bargaining/open_spiel/open_spiel/julia/test/trajector.jl new file mode 100644 index 0000000..90249a9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/julia/test/trajector.jl @@ -0,0 +1,36 @@ +@testset "Trajectory" begin + +function get_states_to_indices(game) + state_index = StdMap{StdString, Cint}() + to_visit = [] + push!(to_visit, new_initial_state(game)) + i = 0 + while length(to_visit) != 0 + state = pop!(to_visit) + if (!is_chance_node(state)) && (!is_terminal(state)) + state_index[information_state_string(state)] = Cint(i) + end + i += 1 + for action in legal_actions(state) + push!(to_visit, child(state, action)) + end + end + state_index +end + +@testset "BatchedTrajectory" begin + for game_name in ["kuhn_poker", "leduc_poker", "liars_dice"] + game = load_game(game_name) + batch_size = 32 + states_to_inds = get_states_to_indices(game) + policies = StdVector([get_uniform_policy(game) for _ in 1:2]) + t = record_batched_trajectories(game, policies, states_to_inds, batch_size, false, 123, -1) + @test length(legal_actions(t)) == batch_size + @test length(actions(t)) == batch_size + @test length(player_policies(t)) == batch_size + @test length(player_ids(t)) == batch_size + @test length(next_is_terminal(t)) == batch_size + end +end + +end diff --git a/scenarios/bargaining/open_spiel/open_spiel/julia/wrapper/spieljl.cc b/scenarios/bargaining/open_spiel/open_spiel/julia/wrapper/spieljl.cc new file mode 100644 index 0000000..20a9726 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/julia/wrapper/spieljl.cc @@ -0,0 +1,797 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include // for std::nan + +#include "jlcxx/jlcxx.hpp" +#include "jlcxx/stl.hpp" +#include "open_spiel/algorithms/best_response.h" +#include "open_spiel/algorithms/cfr.h" +#include "open_spiel/algorithms/cfr_br.h" +#include "open_spiel/algorithms/evaluate_bots.h" +#include "open_spiel/algorithms/expected_returns.h" +#include "open_spiel/algorithms/matrix_game_utils.h" +#include "open_spiel/algorithms/mcts.h" +#include "open_spiel/algorithms/tabular_exploitability.h" +#include "open_spiel/algorithms/trajectories.h" +#include "open_spiel/game_transforms/turn_based_simultaneous_game.h" +#include "open_spiel/matrix_game.h" +#include "open_spiel/normal_form_game.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" +#include "open_spiel/spiel_utils.h" + +namespace jlcxx { +template <> +struct SuperType { + typedef open_spiel::Game type; +}; +template <> +struct SuperType { + typedef open_spiel::SimMoveGame type; +}; +template <> +struct SuperType { + typedef open_spiel::NormalFormGame type; +}; + +template <> +struct SuperType { + typedef open_spiel::algorithms::Evaluator type; +}; + +template <> +struct SuperType { + typedef open_spiel::Policy type; +}; + +template <> +struct SuperType { + typedef open_spiel::Bot type; +}; + +template <> +struct SuperType { + typedef open_spiel::algorithms::CFRSolverBase type; +}; +template <> +struct SuperType { + typedef open_spiel::algorithms::CFRSolverBase type; +}; +template <> +struct SuperType { + typedef open_spiel::algorithms::CFRSolverBase type; +}; +} // namespace jlcxx + +template <> +struct jlcxx::IsMirroredType : std::true_type { +}; +template <> +struct jlcxx::IsMirroredType : std::true_type {}; +template <> +struct jlcxx::IsMirroredType : std::true_type { +}; +template <> +struct jlcxx::IsMirroredType + : std::true_type {}; +template <> +struct jlcxx::IsMirroredType + : std::true_type {}; +template <> +struct jlcxx::IsMirroredType : std::true_type {}; +template <> +struct jlcxx::IsMirroredType + : std::true_type {}; +template <> +struct jlcxx::IsMirroredType : std::true_type {}; +template <> +struct jlcxx::IsMirroredType + : std::true_type {}; + +template <> +struct jlcxx::IsMirroredType> + : std::true_type {}; + +template +struct jlcxx::julia_type_factory> { + static jl_datatype_t* julia_type() { + return (jl_datatype_t*)apply_type( + jlcxx::julia_type("Pair"), + jl_svec2(julia_base_type(), julia_base_type())); + } +}; + +JLCXX_MODULE define_julia_module(jlcxx::Module& mod) { + jlcxx::stl::apply_stl>(mod); + jlcxx::stl::apply_stl>>( + mod); + jlcxx::stl::apply_stl>(mod); + jlcxx::stl::apply_stl>>(mod); + jlcxx::stl::apply_stl>(mod); + jlcxx::stl::apply_stl>>(mod); + jlcxx::stl::apply_stl>(mod); + + mod.map_type("PlayerAction"); + jlcxx::stl::apply_stl>(mod); + + mod.add_bits("GameParameterStateType", + jlcxx::julia_type("CppEnum")); + mod.set_const("UNSET", open_spiel::GameParameter::Type::kUnset); + mod.set_const("INT", open_spiel::GameParameter::Type::kInt); + mod.set_const("DOUBLE", open_spiel::GameParameter::Type::kDouble); + mod.set_const("STRING", open_spiel::GameParameter::Type::kString); + mod.set_const("BOOL", open_spiel::GameParameter::Type::kBool); + + mod.add_type("GameParameter") + .constructor() + .constructor() + .constructor() + .constructor() + .constructor() + .constructor() + .constructor() + .constructor() + .constructor() + .constructor() + .method("is_mandatory", &open_spiel::GameParameter::is_mandatory) + .method("to_string", &open_spiel::GameParameter::ToString) + .method("to_repr_string", &open_spiel::GameParameter::ToReprString); + + // !!! not a good pratice to do so + mod.add_type, jlcxx::TypeVar<2>>>( + "StdMap", jlcxx::julia_type("AbstractDict", "Base")) + .apply, + std::unordered_map, + std::unordered_map, + std::unordered_map, + std::unordered_map>([](auto wrapped) { + typedef typename decltype(wrapped)::type WrappedT; + typedef typename WrappedT::key_type WrappedKey; + typedef typename WrappedT::mapped_type WrappedVal; + + wrapped.module().set_override_module(jl_base_module); + + wrapped.method("length", &WrappedT::size); + wrapped.method("getindex", + [](WrappedT& w, WrappedKey& k) { return w[k]; }); + wrapped.method("setindex!", [](WrappedT& w, WrappedVal& v, + WrappedKey& k) { return w[k] = v; }); + wrapped.method("keys", [](WrappedT ps) { + std::vector keys; + keys.reserve(ps.size()); + for (auto const& it : ps) { + keys.emplace_back(it.first); + } + return keys; + }); + + wrapped.module().unset_override_module(); + }); + + mod.add_bits("StateType", + jlcxx::julia_type("CppEnum")); + mod.set_const("TERMINAL_STATE", open_spiel::StateType::kTerminal); + mod.set_const("CHANCE_STATE", open_spiel::StateType::kChance); + mod.set_const("DECISION_STATE", open_spiel::StateType::kDecision); + + mod.add_bits("Dynamics", + jlcxx::julia_type("CppEnum")); + mod.set_const("SEQUENTIAL", open_spiel::GameType::Dynamics::kSequential); + mod.set_const("SIMULTANEOUS", open_spiel::GameType::Dynamics::kSimultaneous); + + mod.add_bits("ChanceMode", + jlcxx::julia_type("CppEnum")); + mod.set_const("DETERMINISTIC", + open_spiel::GameType::ChanceMode::kDeterministic); + mod.set_const("EXPLICIT_STOCHASTIC", + open_spiel::GameType::ChanceMode::kExplicitStochastic); + mod.set_const("SAMPLED_STOCHASTIC", + open_spiel::GameType::ChanceMode::kSampledStochastic); + + mod.add_bits("Information", + jlcxx::julia_type("CppEnum")); + mod.set_const("ONE_SHOT", open_spiel::GameType::Information::kOneShot); + mod.set_const("PERFECT_INFORMATION", + open_spiel::GameType::Information::kPerfectInformation); + mod.set_const("IMPERFECT_INFORMATION", + open_spiel::GameType::Information::kImperfectInformation); + + mod.add_bits("Utility", + jlcxx::julia_type("CppEnum")); + mod.set_const("ZERO_SUM", open_spiel::GameType::Utility::kZeroSum); + mod.set_const("CONSTANT_SUM", open_spiel::GameType::Utility::kConstantSum); + mod.set_const("GENERAL_SUM", open_spiel::GameType::Utility::kGeneralSum); + mod.set_const("IDENTICAL", open_spiel::GameType::Utility::kIdentical); + + mod.add_bits("RewardModel", + jlcxx::julia_type("CppEnum")); + mod.set_const("REWARDS", open_spiel::GameType::RewardModel::kRewards); + mod.set_const("TERMINAL", open_spiel::GameType::RewardModel::kTerminal); + + mod.add_type("GameType") + .method("short_name", + [](const open_spiel::GameType& gt) { return gt.short_name; }) + .method("long_name", + [](const open_spiel::GameType& gt) { return gt.long_name; }) + .method("dynamics", + [](const open_spiel::GameType& gt) { return gt.dynamics; }) + .method("chance_mode", + [](const open_spiel::GameType& gt) { return gt.chance_mode; }) + .method("information", + [](const open_spiel::GameType& gt) { return gt.information; }) + .method("utility", + [](const open_spiel::GameType& gt) { return gt.utility; }) + .method("reward_model", + [](const open_spiel::GameType& gt) { return gt.reward_model; }) + .method("max_num_players", + [](const open_spiel::GameType& gt) { return gt.max_num_players; }) + .method("min_num_players", + [](const open_spiel::GameType& gt) { return gt.min_num_players; }) + .method("default_loadable", + [](const open_spiel::GameType& gt) { + return gt.default_loadable; + }) + .method("provides_information_state_string", + [](const open_spiel::GameType& gt) { + return gt.provides_information_state_string; + }) + .method("provides_information_state_tensor", + [](const open_spiel::GameType& gt) { + return gt.provides_information_state_tensor; + }) + .method("provides_observation_string", + [](const open_spiel::GameType& gt) { + return gt.provides_observation_string; + }) + .method("provides_observation_tensor", + [](const open_spiel::GameType& gt) { + return gt.provides_observation_tensor; + }) + .method("provides_factored_observation_string", + [](const open_spiel::GameType& gt) { + return gt.provides_factored_observation_string; + }) + .method("parameter_specification", [](const open_spiel::GameType& gt) { + return gt.parameter_specification; + }); + + mod.add_bits("PlayerId", jlcxx::julia_type("CppEnum")); + mod.set_const("INVALID_PLAYER", open_spiel::kInvalidPlayer); + mod.set_const("TERMINAL_PLAYER", open_spiel::kTerminalPlayerId); + mod.set_const("CHANCE_PLAYER", open_spiel::kChancePlayerId); + mod.set_const("SIMULTANEOUS_PLAYER", open_spiel::kSimultaneousPlayerId); + + mod.set_const("INVALID_ACTION", open_spiel::kInvalidAction); + + mod.add_type("State") + .method("current_player", &open_spiel::State::CurrentPlayer) + .method("apply_action", &open_spiel::State::ApplyAction) + .method("legal_actions", + [](open_spiel::State& s) { return s.LegalActions(); }) + .method("legal_actions", + [](open_spiel::State& s, open_spiel::Player p) { + return s.LegalActions(p); + }) + .method("legal_actions_mask", + [](open_spiel::State& s) { return s.LegalActionsMask(); }) + .method("legal_actions_mask", + [](open_spiel::State& s, open_spiel::Player p) { + return s.LegalActionsMask(p); + }) + .method("action_to_string", + [](open_spiel::State& s, open_spiel::Player p, + open_spiel::Action a) { return s.ActionToString(p, a); }) + .method("action_to_string", + [](open_spiel::State& s, open_spiel::Action a) { + return s.ActionToString(a); + }) + .method("string_to_action", + [](open_spiel::State& s, open_spiel::Player p, + const std::string& action_str) { + return s.StringToAction(p, action_str); + }) + .method("string_to_action", + [](open_spiel::State& s, const std::string& action_str) { + return s.StringToAction(action_str); + }) + .method("to_string", &open_spiel::State::ToString) + .method("is_terminal", &open_spiel::State::IsTerminal) + .method("is_initial_state", &open_spiel::State::IsInitialState) + .method("rewards", &open_spiel::State::Rewards) + .method("returns", &open_spiel::State::Returns) + .method("player_reward", &open_spiel::State::PlayerReward) + .method("player_return", &open_spiel::State::PlayerReturn) + .method("is_chance_node", &open_spiel::State::IsChanceNode) + .method("is_simultaneous_node", &open_spiel::State::IsSimultaneousNode) + .method("is_mean_field_node", &open_spiel::State::IsMeanFieldNode) + .method("is_player_node", &open_spiel::State::IsPlayerNode) + .method("history", &open_spiel::State::History) + .method("history_str", &open_spiel::State::HistoryString) + .method("full_history", &open_spiel::State::FullHistory) + .method("information_state_string", + [](open_spiel::State& s, open_spiel::Player p) { + return s.InformationStateString(p); + }) + .method("information_state_string", + [](open_spiel::State& s) { return s.InformationStateString(); }) + .method("information_state_tensor", + [](open_spiel::State& s) { return s.InformationStateTensor(); }) + .method("information_state_tensor", + [](open_spiel::State& s, open_spiel::Player p) { + return s.InformationStateTensor(p); + }) + .method("information_state_as_normalized_vector", + [](open_spiel::State& s, open_spiel::Player p, + std::vector data) { + return s.InformationStateTensor(p, &data); + }) + .method("observation_string", + [](open_spiel::State& s) { return s.ObservationString(); }) + .method("observation_string", + [](open_spiel::State& s, open_spiel::Player p) { + return s.ObservationString(p); + }) + .method("observation_tensor", + [](open_spiel::State& s) { return s.ObservationTensor(); }) + .method("observation_tensor", + [](open_spiel::State& s, open_spiel::Player p) { + return s.ObservationTensor(p); + }) + .method("clone", &open_spiel::State::Clone) + .method("child", &open_spiel::State::Child) + .method("undo_action", &open_spiel::State::UndoAction) + .method("apply_actions", + [](open_spiel::State& s, std::vector data) { + return s.ApplyActions(data); + }) + .method("num_distinct_actions", &open_spiel::State::NumDistinctActions) + .method("num_players", &open_spiel::State::NumPlayers) + .method("chance_outcomes", &open_spiel::State::ChanceOutcomes) + .method("get_type", &open_spiel::State::GetType) + .method("serialize", &open_spiel::State::Serialize) + .method("distribution_support", &open_spiel::State::DistributionSupport) + .method("update_distribution", + [](open_spiel::State& s, std::vector distribution) { + return s.UpdateDistribution(distribution); + }); + + mod.add_type("Game") + .method("num_distinct_actions", &open_spiel::Game::NumDistinctActions) + .method("new_initial_state", + [](open_spiel::Game& g) { return g.NewInitialState(); }) + .method("new_initial_state_from_string", + [](open_spiel::Game& g, const std::string& s) { + return g.NewInitialState(s); + }) + .method("max_chance_outcomes", &open_spiel::Game::MaxChanceOutcomes) + .method("get_parameters", &open_spiel::Game::GetParameters) + .method("num_players", &open_spiel::Game::NumPlayers) + .method("min_utility", &open_spiel::Game::MinUtility) + .method("max_utility", &open_spiel::Game::MaxUtility) + .method("get_type", &open_spiel::Game::GetType) + .method("utility_sum", + [](open_spiel::Game& g) { + return g.UtilitySum().value_or(std::nan("")); + }) + .method("information_state_tensor_shape", + &open_spiel::Game::InformationStateTensorShape) + .method("information_state_tensor_size", + &open_spiel::Game::InformationStateTensorSize) + .method("observation_tensor_shape", + &open_spiel::Game::ObservationTensorShape) + .method("observation_tensor_size", + &open_spiel::Game::ObservationTensorSize) + .method("deserialize_state", &open_spiel::Game::DeserializeState) + .method("max_game_length", &open_spiel::Game::MaxGameLength) + .method("to_string", &open_spiel::Game::ToString); + + mod.add_type("SimMoveGame"); + mod.add_type("NormalFormGame"); + + mod.add_type( + "MatrixGame", jlcxx::julia_base_type()) + .constructor, std::vector, + std::vector, std::vector>() + .constructor, std::vector, + const std::vector>&, + const std::vector>&>() + .method("num_rows", &open_spiel::matrix_game::MatrixGame::NumRows) + .method("num_cols", &open_spiel::matrix_game::MatrixGame::NumCols) + .method("row_utility", &open_spiel::matrix_game::MatrixGame::RowUtility) + .method("col_utility", &open_spiel::matrix_game::MatrixGame::ColUtility) + .method("player_utility", + &open_spiel::matrix_game::MatrixGame::PlayerUtility) + .method("row_action_name", + &open_spiel::matrix_game::MatrixGame::RowActionName) + .method("col_action_name", + &open_spiel::matrix_game::MatrixGame::ColActionName); + + mod.method( + "create_matrix_game", + [](const std::string& a, const std::string& b, + const std::vector& c, const std::vector& d, + const std::vector>& e, + const std::vector>& f) { + return open_spiel::matrix_game::CreateMatrixGame(a, b, c, d, e, f); + }); + mod.method("create_matrix_game", + [](const std::vector>& a, + const std::vector>& b) { + return open_spiel::matrix_game::CreateMatrixGame(a, b); + }); + mod.method("_load_game", + [](const std::string& s) { return open_spiel::LoadGame(s); }); + mod.method("_load_game", + [](const std::string& s, const open_spiel::GameParameters& ps) { + return open_spiel::LoadGame(s, ps); + }); + mod.method("_load_game_as_turn_based", [](const std::string& s) { + return open_spiel::LoadGameAsTurnBased(s); + }); + mod.method("_load_game_as_turn_based", + [](const std::string& s, const open_spiel::GameParameters& ps) { + return open_spiel::LoadGameAsTurnBased(s, ps); + }); + mod.method("load_matrix_game", &open_spiel::algorithms::LoadMatrixGame); + mod.method("extensive_to_matrix_game", + &open_spiel::algorithms::ExtensiveToMatrixGame); + mod.method("registered_names", &open_spiel::GameRegisterer::RegisteredNames); + mod.method("registered_games", &open_spiel::GameRegisterer::RegisteredGames); + + mod.add_type("Bot") + .method("step", &open_spiel::Bot::Step) + .method("restart", &open_spiel::Bot::Restart) + .method("restart_at", &open_spiel::Bot::RestartAt) + .method("provides_force_action", &open_spiel::Bot::ProvidesForceAction) + .method("force_action", &open_spiel::Bot::ForceAction) + .method("provides_policy", &open_spiel::Bot::ProvidesPolicy) + .method("get_policy", &open_spiel::Bot::GetPolicy) + .method("step_with_policy", &open_spiel::Bot::StepWithPolicy); + + jlcxx::stl::apply_stl(mod); + + mod.add_type("Policy") + .method("get_state_policy_as_parallel_vectors", + [](open_spiel::Policy p, const open_spiel::State& state) { + return p.GetStatePolicyAsParallelVectors(state); + }) + .method("get_state_policy_as_parallel_vectors", + [](open_spiel::Policy p, const std::string state) { + return p.GetStatePolicyAsParallelVectors(state); + }) + .method("get_state_policy_as_map", + [](open_spiel::Policy p, const open_spiel::State& state) { + return p.GetStatePolicyAsMap(state); + }) + .method("get_state_policy_as_map", + [](open_spiel::Policy p, const std::string state) { + return p.GetStatePolicyAsMap(state); + }) + .method("get_state_policy", + [](open_spiel::Policy p, const open_spiel::State& state) { + return p.GetStatePolicy(state); + }) + .method("get_state_policy", + [](open_spiel::Policy p, const std::string state) { + return p.GetStatePolicy(state); + }); + + jlcxx::stl::apply_stl(mod); + + mod.add_type( + "TabularPolicy", jlcxx::julia_base_type()) + .constructor() + .constructor< + const std::unordered_map&>() + .method("get_state_policy", &open_spiel::TabularPolicy::GetStatePolicy) + .method("policy_table", + [](open_spiel::TabularPolicy p) { return p.PolicyTable(); }) + .method("get_state_policy", + [](open_spiel::TabularPolicy p, const open_spiel::State& state) { + return p.GetStatePolicy(state.InformationStateString()); + }) + .method("get_state_policy", + [](open_spiel::TabularPolicy p, const std::string& state) { + return p.GetStatePolicy(state); + }); + + jlcxx::stl::apply_stl(mod); + + mod.method("get_empty_tabular_policy", &open_spiel::GetEmptyTabularPolicy); + mod.method("get_uniform_policy", &open_spiel::GetUniformPolicy); + mod.method("get_random_policy", &open_spiel::GetRandomPolicy); + mod.method("get_first_action_policy", &open_spiel::GetFirstActionPolicy); + + // !!! Bots below are not exported directly in c++ + // !!! which makes it hard to dispatch overriden methods + // mod.method("make_uniform_random_bot", &open_spiel::MakeUniformRandomBot); + // mod.method("make_fixed_action_preference_bot", + // &open_spiel::MakeFixedActionPreferenceBot); mod.method("make_policy_bot", + // [](const open_spiel::Game& game, open_spiel::Player pid, int seed, + // open_spiel::Policy policy) { return open_spiel::MakePolicyBot(game, pid, + // seed, std::make_unique(policy)); }); + + // !!! just a workaround here + mod.add_type, + std::unique_ptr>>("GameStatePair") + .method("first", + [](std::pair, + std::unique_ptr>& p) { + return p.first; + }) + .method("last", [](std::pair, + std::unique_ptr>& p) { + return std::move(p.second); + }); + + mod.method("serialize_game_and_state", &open_spiel::SerializeGameAndState); + mod.method("_deserialize_game_and_state", + &open_spiel::DeserializeGameAndState); + + mod.add_type("Evaluator"); + + mod.add_type( + "RandomRolloutEvaluator", + jlcxx::julia_base_type()) + .constructor() + .method("evaluate", &open_spiel::algorithms::Evaluator::Evaluate) + .method("prior", &open_spiel::algorithms::Evaluator::Prior); + + mod.method("random_rollout_evaluator_factory", [](int rollouts, int seed) { + return std::shared_ptr( + new open_spiel::algorithms::RandomRolloutEvaluator(rollouts, seed)); + }); + + mod.add_bits( + "ChildSelectionPolicy", jlcxx::julia_type("CppEnum")); + mod.set_const("UCT", open_spiel::algorithms::ChildSelectionPolicy::UCT); + mod.set_const("PUCT", open_spiel::algorithms::ChildSelectionPolicy::PUCT); + + mod.add_type("SearchNode") + .constructor() + .method("UCTValue", &open_spiel::algorithms::SearchNode::UCTValue) + .method("PUCTValue", &open_spiel::algorithms::SearchNode::PUCTValue) + .method("compare_final", + &open_spiel::algorithms::SearchNode::CompareFinal) + .method("best_child", &open_spiel::algorithms::SearchNode::BestChild) + .method("to_string", &open_spiel::algorithms::SearchNode::ToString) + .method("children_str", &open_spiel::algorithms::SearchNode::ChildrenStr) + // TODO(author11): https://github.com/JuliaInterop/CxxWrap.jl/issues/90 + .method("get_action", + [](open_spiel::algorithms::SearchNode& sn) { return sn.action; }) + .method("get_prior", + [](open_spiel::algorithms::SearchNode& sn) { return sn.prior; }) + .method("get_player", + [](open_spiel::algorithms::SearchNode& sn) { return sn.player; }) + .method("get_explore_count", + [](open_spiel::algorithms::SearchNode& sn) { + return sn.explore_count; + }) + .method("get_total_reward", + [](open_spiel::algorithms::SearchNode& sn) { + return sn.total_reward; + }) + .method("get_outcome", + [](open_spiel::algorithms::SearchNode& sn) { return sn.outcome; }) + .method("set_action!", + [](open_spiel::algorithms::SearchNode& sn, + open_spiel::Action action) { sn.action = action; }) + .method("set_prior!", [](open_spiel::algorithms::SearchNode& sn, + double prior) { sn.prior = prior; }) + .method("set_player!", + [](open_spiel::algorithms::SearchNode& sn, + open_spiel::Player player) { sn.player = player; }) + .method("set_explore_count!", + [](open_spiel::algorithms::SearchNode& sn, int explore_count) { + sn.explore_count = explore_count; + }) + .method("set_total_reward!", + [](open_spiel::algorithms::SearchNode& sn, double total_reward) { + sn.total_reward = total_reward; + }) + .method("set_outcome!", + [](open_spiel::algorithms::SearchNode& sn, + std::vector outcome) { sn.outcome = outcome; }); + + jlcxx::stl::apply_stl(mod); + + mod.method("get_children", [](open_spiel::algorithms::SearchNode& sn) { + return sn.children; + }); + mod.method("set_children!", + [](open_spiel::algorithms::SearchNode& sn, + std::vector children) { + sn.children = children; + }); + + mod.add_type( + "MCTSBot", jlcxx::julia_base_type()) + .constructor, + double, int, int64_t, bool, int, bool, + open_spiel::algorithms::ChildSelectionPolicy, double, + double>() + .method("restart", &open_spiel::algorithms::MCTSBot::Restart) + .method("restart_at", &open_spiel::algorithms::MCTSBot::RestartAt) + .method("step", &open_spiel::algorithms::MCTSBot::Step) + .method("step_with_policy", + &open_spiel::algorithms::MCTSBot::StepWithPolicy) + .method("mcts_search", &open_spiel::algorithms::MCTSBot::MCTSearch); + + jlcxx::stl::apply_stl(mod); + + mod.add_type( + "TabularBestResponse") + .constructor() + .constructor< + const open_spiel::Game&, open_spiel::Player, + const std::unordered_map&>() + .method("best_response_action", + [](open_spiel::algorithms::TabularBestResponse& t, + const std::string& infostate) { + return t.BestResponseAction(infostate); + }) + .method( + "get_best_response_actions", + &open_spiel::algorithms::TabularBestResponse::GetBestResponseActions) + .method( + "get_best_response_policy", + &open_spiel::algorithms::TabularBestResponse::GetBestResponsePolicy) + .method("value", + [](open_spiel::algorithms::TabularBestResponse& t, + const std::string& history) { + return t.Value(history); + }) + .method("set_policy", + [](open_spiel::algorithms::TabularBestResponse& t, + const open_spiel::Policy* p) { return t.SetPolicy(p); }) + .method( + "set_policy", + [](open_spiel::algorithms::TabularBestResponse& t, + std::unordered_map& p) { + return t.SetPolicy(p); + }); + + mod.add_type("CFRSolverBase") + .method("evaluate_and_update_policy", + &open_spiel::algorithms::CFRSolver::EvaluateAndUpdatePolicy) + .method("current_policy", + &open_spiel::algorithms::CFRSolver::CurrentPolicy) + .method("average_policy", + &open_spiel::algorithms::CFRSolver::AveragePolicy); + + mod.add_type( + "CFRSolver", + jlcxx::julia_base_type()) + .constructor(); + + mod.add_type( + "CFRPlusSolver", + jlcxx::julia_base_type()) + .constructor(); + + mod.add_type( + "CFRBRSolver", + jlcxx::julia_base_type()) + .constructor() + .method("evaluate_and_update_policy", + &open_spiel::algorithms::CFRSolver::EvaluateAndUpdatePolicy); + + mod.add_type("TrajectoryRecorder") + .constructor&, int>(); + + mod.method("evaluate_bots", [](open_spiel::State* state, + const std::vector& bots, + int seed) { + return open_spiel::EvaluateBots(state, bots, seed); + }); + mod.method("exploitability", [](const open_spiel::Game& game, + const open_spiel::Policy& policy) { + return open_spiel::algorithms::Exploitability(game, policy); + }); + mod.method( + "exploitability", + [](const open_spiel::Game& game, + const std::unordered_map& + policy) { + return open_spiel::algorithms::Exploitability(game, policy); + }); + mod.method("nash_conv", [](const open_spiel::Game& game, + const open_spiel::Policy& policy) { + return open_spiel::algorithms::NashConv(game, policy); + }); + mod.method( + "nash_conv", + [](const open_spiel::Game& game, + const std::unordered_map& + policy) { + return open_spiel::algorithms::NashConv(game, policy); + }); + mod.method("convert_to_turn_based", &open_spiel::ConvertToTurnBased); + mod.method("expected_returns", + [](const open_spiel::State& state, + const std::vector policies, + int depth_limit) { + return open_spiel::algorithms::ExpectedReturns(state, policies, + depth_limit); + }); + mod.method("expected_returns", + [](const open_spiel::State& state, + const open_spiel::Policy& joint_policy, int depth_limit) { + return open_spiel::algorithms::ExpectedReturns( + state, joint_policy, depth_limit); + }); + + mod.add_type("BatchedTrajectory") + .constructor() + .method("observations", + [](open_spiel::algorithms::BatchedTrajectory bt) { + return bt.observations; + }) + .method("state_indices", + [](open_spiel::algorithms::BatchedTrajectory bt) { + return bt.state_indices; + }) + .method("legal_actions", + [](open_spiel::algorithms::BatchedTrajectory bt) { + return bt.legal_actions; + }) + .method("actions", + [](open_spiel::algorithms::BatchedTrajectory bt) { + return bt.actions; + }) + .method("player_policies", + [](open_spiel::algorithms::BatchedTrajectory bt) { + return bt.player_policies; + }) + .method("player_ids", + [](open_spiel::algorithms::BatchedTrajectory bt) { + return bt.player_ids; + }) + .method("rewards", + [](open_spiel::algorithms::BatchedTrajectory bt) { + return bt.rewards; + }) + .method( + "valid", + [](open_spiel::algorithms::BatchedTrajectory bt) { return bt.valid; }) + .method("next_is_terminal", + [](open_spiel::algorithms::BatchedTrajectory bt) { + return bt.next_is_terminal; + }) + .method("max_trajectory_length", + [](open_spiel::algorithms::BatchedTrajectory bt) { + return bt.max_trajectory_length; + }) + .method("resize_fields", + &open_spiel::algorithms::BatchedTrajectory::ResizeFields); + + mod.method("record_batched_trajectories", + [](const open_spiel::Game& game, + const std::vector& policies, + const std::unordered_map& state_to_index, + int batch_size, bool include_full_observations, int seed, + int max_unroll_length) { + return open_spiel::algorithms::RecordBatchedTrajectory( + game, policies, state_to_index, batch_size, + include_full_observations, seed, max_unroll_length); + }); +} // NOLINT(readability/fn_size) + diff --git a/scenarios/bargaining/open_spiel/open_spiel/libnop/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/libnop/CMakeLists.txt new file mode 100644 index 0000000..a9b24bf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/libnop/CMakeLists.txt @@ -0,0 +1,4 @@ +# An integration test to make sure that we can link with libnop. +add_executable(libnop_integration_test + libnop_integration_test.cc ${OPEN_SPIEL_OBJECTS}) +add_test(libnop_integration_test libnop_integration_test) diff --git a/scenarios/bargaining/open_spiel/open_spiel/libnop/libnop_integration_test.cc b/scenarios/bargaining/open_spiel/open_spiel/libnop/libnop_integration_test.cc new file mode 100644 index 0000000..7536a2f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/libnop/libnop_integration_test.cc @@ -0,0 +1,65 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include +#include +#include +#include + +namespace { + +// Libnop example taken from +// https://github.com/google/libnop/blob/master/README.md + +// Contrived template type with private members. +template +struct UserDefined { + public: + UserDefined() = default; + UserDefined(std::string label, std::vector vector) + : label_{std::move(label)}, vector_{std::move(vector)} {} + + const std::string label() const { return label_; } + const std::vector& vector() const { return vector_; } + + private: + std::string label_; + std::vector vector_; + + NOP_STRUCTURE(UserDefined, label_, vector_); +}; + +void TestSerialization() { + using Writer = nop::StreamWriter; + nop::Serializer serializer; + + serializer.Write(UserDefined{"ABC", {1, 2, 3, 4, 5}}); + + using ArrayType = std::array, 2>; + serializer.Write( + ArrayType{{{"ABC", {1, 2, 3, 4, 5}}, {"XYZ", {3.14, 2.72, 23.14}}}}); + + const std::string data = serializer.writer().stream().str(); + std::cout << "Wrote " << data.size() << " bytes." << std::endl; +} + +} // namespace + +int main(int argc, char** argv) { + TestSerialization(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/libtorch/.gitignore b/scenarios/bargaining/open_spiel/open_spiel/libtorch/.gitignore new file mode 100644 index 0000000..284164d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/libtorch/.gitignore @@ -0,0 +1 @@ +libtorch/ diff --git a/scenarios/bargaining/open_spiel/open_spiel/libtorch/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/libtorch/CMakeLists.txt new file mode 100644 index 0000000..62d8eb1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/libtorch/CMakeLists.txt @@ -0,0 +1,9 @@ +# Now we can use #include "open_spiel/spiel.h" +include_directories(../..) + +# An integration test to make sure that we can link with torchlib. +add_executable(torch_integration_test + torch_integration_test.cc ${OPEN_SPIEL_OBJECTS}) +add_test(torch_integration_test torch_integration_test) + +target_link_libraries(torch_integration_test ${TORCH_LIBRARIES}) diff --git a/scenarios/bargaining/open_spiel/open_spiel/libtorch/torch_integration_test.cc b/scenarios/bargaining/open_spiel/open_spiel/libtorch/torch_integration_test.cc new file mode 100644 index 0000000..400c58c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/libtorch/torch_integration_test.cc @@ -0,0 +1,34 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Examples of how to use the C++ API: +// - https://github.com/pytorch/examples/tree/master/cpp +// - https://github.com/prabhuomkar/pytorch-cpp + +#include "open_spiel/spiel_utils.h" +#include "torch/torch.h" + +namespace { + +void TestMatrixMultiplication() { + at::Tensor mat = torch::rand({3, 3}); + at::Tensor identity = torch::ones({3, 3}); + at::Tensor multiplied = mat * identity; + int num_identical_elements = (mat == multiplied).sum().item().to(); + SPIEL_CHECK_EQ(num_identical_elements, 9); +} + +} // namespace + +int main() { TestMatrixMultiplication(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/matrix_game.cc b/scenarios/bargaining/open_spiel/open_spiel/matrix_game.cc new file mode 100644 index 0000000..cc73c3f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/matrix_game.cc @@ -0,0 +1,218 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/matrix_game.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace matrix_game { +namespace { +// Check the utilities to see if the game is constant-sum or identical +// (cooperative). +GameType::Utility GetUtilityType(const std::vector& row_player_utils, + const std::vector& col_player_utils) { + double util_sum = 0; + // Assume both are true until proven otherwise. + bool constant_sum = true; + bool identical = true; + for (int i = 0; i < row_player_utils.size(); ++i) { + if (i == 0) { + util_sum = row_player_utils[i] + col_player_utils[i]; + } else { + if (constant_sum && + !Near(row_player_utils[i] + col_player_utils[i], util_sum)) { + constant_sum = false; + } + } + + if (identical && row_player_utils[i] != col_player_utils[i]) { + identical = false; + } + } + + if (constant_sum && Near(util_sum, 0.0)) { + return GameType::Utility::kZeroSum; + } else if (constant_sum) { + return GameType::Utility::kConstantSum; + } else if (identical) { + return GameType::Utility::kIdentical; + } else { + return GameType::Utility::kGeneralSum; + } +} + +absl::optional GetUtilitySum( + const std::vector& row_player_utils, + const std::vector& col_player_utils) { + double util_sum = 0; + bool constant_sum = true; + for (int i = 0; i < row_player_utils.size(); ++i) { + if (i == 0) { + util_sum = row_player_utils[i] + col_player_utils[i]; + } else { + if (constant_sum && + !Near(row_player_utils[i] + col_player_utils[i], util_sum)) { + constant_sum = false; + } + } + } + + if (constant_sum) { + return Near(util_sum, 0.0) ? 0 : util_sum; + } else { + return absl::nullopt; + } +} + +} // namespace + +MatrixState::MatrixState(std::shared_ptr game) + : NFGState(game), + matrix_game_(static_cast(game.get())) {} + +std::string MatrixState::ToString() const { + std::string result = ""; + absl::StrAppend(&result, "Terminal? ", IsTerminal() ? "true" : "false", "\n"); + if (IsTerminal()) { + absl::StrAppend(&result, "History: ", HistoryString(), "\n"); + absl::StrAppend(&result, "Returns: ", absl::StrJoin(Returns(), ","), "\n"); + } + absl::StrAppend(&result, "Row actions: "); + for (auto move : LegalActions(0)) { + absl::StrAppend(&result, ActionToString(0, move), " "); + } + absl::StrAppend(&result, "\nCol actions: "); + for (auto move : LegalActions(1)) { + absl::StrAppend(&result, ActionToString(1, move), " "); + } + absl::StrAppend(&result, "\nUtility matrix:\n"); + for (int r = 0; r < matrix_game_->NumRows(); r++) { + for (int c = 0; c < matrix_game_->NumCols(); c++) { + absl::StrAppend(&result, matrix_game_->RowUtility(r, c), ",", + matrix_game_->ColUtility(r, c), " "); + } + absl::StrAppend(&result, "\n"); + } + return result; +} + +std::unique_ptr MatrixGame::NewInitialState() const { + return std::unique_ptr(new MatrixState(shared_from_this())); +} + +std::vector FlattenMatrix( + const std::vector>& matrix_rows) { + std::vector utilities; + int total_size = 0; + int row_size = -1; + int i = 0; + + for (int r = 0; r < matrix_rows.size(); ++r) { + if (row_size < 0) { + row_size = matrix_rows[r].size(); + } + SPIEL_CHECK_GT(row_size, 0); + SPIEL_CHECK_EQ(row_size, matrix_rows[r].size()); + total_size += row_size; + utilities.resize(total_size, 0); + + for (int c = 0; c < matrix_rows[r].size(); ++c) { + utilities[i] = matrix_rows[r][c]; + ++i; + } + } + + return utilities; +} + +std::shared_ptr CreateMatrixGame( + const std::vector>& row_player_utils, + const std::vector>& col_player_utils) { + SPIEL_CHECK_GT(row_player_utils.size(), 0); + int num_rows = row_player_utils.size(); + int num_columns = row_player_utils[0].size(); + std::vector row_names(num_rows); + std::vector col_names(num_columns); + for (int i = 0; i < num_rows; ++i) { + row_names[i] = absl::StrCat("row", i); + } + for (int i = 0; i < num_columns; ++i) { + col_names[i] = absl::StrCat("col", i); + } + return CreateMatrixGame("short_name", "Long Name", row_names, col_names, + row_player_utils, col_player_utils); +} + +// Create a matrix game with the specified utilities and row/column names. +// Utilities must be in row-major form. +std::shared_ptr CreateMatrixGame( + const std::string& short_name, const std::string& long_name, + const std::vector& row_names, + const std::vector& col_names, + const std::vector>& row_player_utils, + const std::vector>& col_player_utils) { + int rows = row_names.size(); + int columns = col_names.size(); + std::vector flat_row_utils = FlattenMatrix(row_player_utils); + std::vector flat_col_utils = FlattenMatrix(col_player_utils); + SPIEL_CHECK_EQ(flat_row_utils.size(), rows * columns); + SPIEL_CHECK_EQ(flat_col_utils.size(), rows * columns); + return CreateMatrixGame(short_name, long_name, row_names, col_names, + flat_row_utils, flat_col_utils); +} + +std::shared_ptr CreateMatrixGame( + const std::string& short_name, const std::string& long_name, + const std::vector& row_names, + const std::vector& col_names, + const std::vector& flat_row_utils, + const std::vector& flat_col_utils) { + // Detect the utility type from the utilities. + GameType::Utility utility = GetUtilityType(flat_row_utils, flat_col_utils); + + GameType game_type{ + /*short_name=*/short_name, + /*long_name=*/long_name, + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kOneShot, + utility, + GameType::RewardModel::kTerminal, + /*max_num_players=*/2, + /*min_num_players=*/2, + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/true, + /*provides_observation_tensor=*/true, + /*parameter_specification=*/{} // no parameters + }; + + return std::shared_ptr(new MatrixGame( + game_type, {}, row_names, col_names, flat_row_utils, flat_col_utils)); +} + +absl::optional MatrixGame::UtilitySum() const { + return GetUtilitySum(row_utilities_, col_utilities_); +} + +} // namespace matrix_game +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/matrix_game.h b/scenarios/bargaining/open_spiel/open_spiel/matrix_game.h new file mode 100644 index 0000000..60936f1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/matrix_game.h @@ -0,0 +1,259 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_MATRIX_GAME_H_ +#define OPEN_SPIEL_MATRIX_GAME_H_ + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/normal_form_game.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// A matrix game is an example of a 2-player normal-form game. + +namespace open_spiel { +namespace matrix_game { + +inline constexpr int kRowPlayer = 0; +inline constexpr int kColPlayer = 1; + +// Return a flattened version of these vector of rows. This simply scans each +// row in turn, appending each elements onto the end of a 1D vector. The rows +// must have the same size. +std::vector FlattenMatrix( + const std::vector>& matrix_rows); + +class MatrixGame : public NormalFormGame { + public: + MatrixGame(GameType game_type, GameParameters game_parameters, + std::vector row_action_names, + std::vector col_action_names, + std::vector row_utilities, + std::vector col_utilities) + : NormalFormGame(game_type, game_parameters), + row_action_names_(row_action_names), + col_action_names_(col_action_names), + row_utilities_(row_utilities), + col_utilities_(col_utilities) {} + + MatrixGame(GameType game_type, GameParameters game_parameters, + std::vector row_action_names, + std::vector col_action_names, + const std::vector> row_utilities, + const std::vector> col_utilities) + : NormalFormGame(game_type, game_parameters), + row_action_names_(row_action_names), + col_action_names_(col_action_names), + row_utilities_(FlattenMatrix(row_utilities)), + col_utilities_(FlattenMatrix(col_utilities)) {} + + // Implementation of Game interface + int NumDistinctActions() const override { + return std::max(NumRows(), NumCols()); + } + + std::unique_ptr NewInitialState() const override; + + int NumPlayers() const override { return 2; } + + double MinUtility() const override { + return std::min( + *std::min_element(begin(row_utilities_), end(row_utilities_)), + *std::min_element(begin(col_utilities_), end(col_utilities_))); + } + + double MaxUtility() const override { + return std::max( + *std::max_element(begin(row_utilities_), end(row_utilities_)), + *std::max_element(begin(col_utilities_), end(col_utilities_))); + } + + absl::optional UtilitySum() const override; + + std::string ActionToString(Player player, Action action) const override { + switch (player) { + case 0: { + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LT(action, row_action_names_.size()); + return row_action_names_[action]; + } + + case 1: { + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LT(action, col_action_names_.size()); + return col_action_names_[action]; + } + + default: + SpielFatalError("Unknown player"); + } + } + + // Methods for MatrixState to call. + int NumRows() const { return row_action_names_.size(); } + int NumCols() const { return col_action_names_.size(); } + double RowUtility(int row, int col) const { + return row_utilities_[Index(row, col)]; + } + double ColUtility(int row, int col) const { + return col_utilities_[Index(row, col)]; + } + double PlayerUtility(Player player, int row, int col) const { + SPIEL_CHECK_TRUE(player == Player{0} || player == Player{1}); + return (player == Player{0} ? row_utilities_[Index(row, col)] + : col_utilities_[Index(row, col)]); + } + const std::vector& RowUtilities() const { return row_utilities_; } + const std::vector& ColUtilities() const { return col_utilities_; } + const std::vector& PlayerUtilities( + const Player player) const { + SPIEL_CHECK_TRUE(player == Player{0} || player == Player{1}); + return (player == Player{0} ? row_utilities_ : col_utilities_); + } + const std::string& RowActionName(int row) const { + return row_action_names_[row]; + } + const std::string& ColActionName(int col) const { + return col_action_names_[col]; + } + + std::vector GetUtilities(const std::vector& joint_action) + const override { + int index = Index(joint_action[0], joint_action[1]); + return {row_utilities_[index], col_utilities_[index]}; + } + + double GetUtility(Player player, const std::vector& joint_action) + const override { + return PlayerUtility(player, joint_action[0], joint_action[1]); + } + + bool operator==(const Game& other_game) const override { + const auto& other = down_cast(other_game); + return (row_action_names_.size() == other.row_action_names_.size() && + col_action_names_.size() == other.col_action_names_.size() && + row_utilities_ == other.row_utilities_ && + col_utilities_ == other.col_utilities_); + } + + bool ApproxEqual(const Game& other_game, double tolerance) const { + const auto& other = down_cast(other_game); + return (row_action_names_.size() == other.row_action_names_.size() && + col_action_names_.size() == other.col_action_names_.size() && + AllNear(row_utilities_, other.row_utilities_, tolerance) && + AllNear(col_utilities_, other.col_utilities_, tolerance)); + } + + private: + int Index(int row, int col) const { return row * NumCols() + col; } + std::vector row_action_names_; + std::vector col_action_names_; + std::vector row_utilities_; + std::vector col_utilities_; +}; + +class MatrixState : public NFGState { + public: + explicit MatrixState(std::shared_ptr game); + explicit MatrixState(const MatrixState&) = default; + + std::vector LegalActions(Player player) const override { + if (IsTerminal()) return {}; + if (player == kSimultaneousPlayerId) { + return LegalFlatJointActions(); + } else { + std::vector moves(player == kRowPlayer ? matrix_game_->NumRows() + : matrix_game_->NumCols()); + std::iota(moves.begin(), moves.end(), 0); // fill with values 0...n-1 + return moves; + } + } + + std::string ToString() const override; + + std::string ActionToString(Player player, Action action_id) const override { + if (player == kSimultaneousPlayerId) + return FlatJointActionToString(action_id); + else if (player == kRowPlayer) + return matrix_game_->RowActionName(action_id); + else + return matrix_game_->ColActionName(action_id); + } + + bool IsTerminal() const override { return !joint_move_.empty(); } + + std::vector Returns() const override { + if (IsTerminal()) { + return {matrix_game_->RowUtility(joint_move_[0], joint_move_[1]), + matrix_game_->ColUtility(joint_move_[0], joint_move_[1])}; + } else { + return {0, 0}; + } + } + + std::unique_ptr Clone() const override { + return std::unique_ptr(new MatrixState(*this)); + } + + protected: + void DoApplyActions(const std::vector& moves) override { + SPIEL_CHECK_EQ(moves.size(), 2); + SPIEL_CHECK_GE(moves[kRowPlayer], 0); + SPIEL_CHECK_LT(moves[kRowPlayer], matrix_game_->NumRows()); + SPIEL_CHECK_GE(moves[kColPlayer], 0); + SPIEL_CHECK_LT(moves[kColPlayer], matrix_game_->NumCols()); + joint_move_ = moves; + } + + private: + std::vector joint_move_{}; // joint move that was chosen + const MatrixGame* matrix_game_; +}; + +// Create a matrix game with the specified utilities and row/column names. +// Utilities must be in row-major form. +std::shared_ptr CreateMatrixGame( + const std::string& short_name, const std::string& long_name, + const std::vector& row_names, + const std::vector& col_names, + const std::vector>& row_player_utils, + const std::vector>& col_player_utils); + +// Create a matrix game with the specified utilities, with default names +// ("short_name", "Long Name", row player utilities, col player utilities). +// Utilities must be in row-major order. +std::shared_ptr CreateMatrixGame( + const std::string& short_name, const std::string& long_name, + const std::vector& row_names, + const std::vector& col_names, + const std::vector& flat_row_utils, + const std::vector& flat_col_utils); + +// Create a matrix game with the specified utilities, with default names +// ("short_name", "Long Name", row0, row1.., col0, col1, ...). +// Utilities must be in row-major form. +std::shared_ptr CreateMatrixGame( + const std::vector>& row_player_utils, + const std::vector>& col_player_utils); + +} // namespace matrix_game +} // namespace open_spiel + +#endif // OPEN_SPIEL_MATRIX_GAME_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/normal_form_game.h b/scenarios/bargaining/open_spiel/open_spiel/normal_form_game.h new file mode 100644 index 0000000..6818610 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/normal_form_game.h @@ -0,0 +1,154 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_NORMAL_FORM_GAME_H_ +#define OPEN_SPIEL_NORMAL_FORM_GAME_H_ + +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/simultaneous_move_game.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// This class describes an n-player normal-form game. A normal-form game is +// also known as a one-shot game or strategic-form game. Essentially, all +// players act simultaneously and the game ends after a single joint action +// taken by all players. E.g. a matrix game is an example of a 2-player normal +// form game. + +namespace open_spiel { + +class NFGState : public SimMoveState { + public: + NFGState(std::shared_ptr game) : SimMoveState(game) {} + + // There are no chance nodes in a normal-form game (there is only one state), + Player CurrentPlayer() const final { + return IsTerminal() ? kTerminalPlayerId : kSimultaneousPlayerId; + } + + // Since there's only one state, we can implement the representations here. + std::string InformationStateString(Player player) const override { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + std::string info_state = absl::StrCat("Observing player: ", player, ". "); + if (!IsTerminal()) { + absl::StrAppend(&info_state, "Non-terminal"); + } else { + absl::StrAppend(&info_state, + "Terminal. History string: ", HistoryString()); + } + return info_state; + } + + std::string ObservationString(Player player) const override { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + std::string obs_str; + if (!IsTerminal()) { + absl::StrAppend(&obs_str, "Non-terminal"); + } else { + absl::StrAppend(&obs_str, "Terminal. History string: ", HistoryString()); + } + return obs_str; + } + + std::string ToString() const override { + std::string result = "Normal form game default NFGState::ToString. "; + if (IsTerminal()) { + absl::StrAppend(&result, "Terminal, history: ", HistoryString(), + ", returns: ", absl::StrJoin(Returns(), ",")); + } else { + absl::StrAppend(&result, "Non-terminal"); + } + return result; + } + + void InformationStateTensor(Player player, + absl::Span values) const override { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + SPIEL_CHECK_EQ(values.size(), 1); + if (IsTerminal()) { + values[0] = 1; + } else { + values[0] = 0; + } + } + + void ObservationTensor(Player player, + absl::Span values) const override { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + SPIEL_CHECK_EQ(values.size(), 1); + if (IsTerminal()) { + values[0] = 1; + } else { + values[0] = 0; + } + } +}; + +class NormalFormGame : public SimMoveGame { + public: + // Game has one state. + std::vector InformationStateTensorShape() const override { + return {1}; + } + std::vector ObservationTensorShape() const override { return {1}; } + + // Game lasts one turn. + int MaxGameLength() const override { return 1; } + // There aren't chance nodes in these games. + int MaxChanceNodesInHistory() const override { return 0; } + + // Direct access to utility. This is just a default implementation, which is + // overridden in subclasses for faster access. + virtual std::vector GetUtilities( + const std::vector& joint_action) const { + std::unique_ptr state = NewInitialState(); + state->ApplyActions(joint_action); + return state->Returns(); + } + + virtual double GetUtility(Player player, + const std::vector& joint_action) const { + return GetUtilities(joint_action)[player]; + } + + absl::optional UtilitySum() const override { + if (game_type_.utility == GameType::Utility::kZeroSum) { + return 0.0; + } else if (game_type_.utility == GameType::Utility::kConstantSum) { + std::vector joint_action(NumPlayers(), 0); + std::vector utilities = GetUtilities(joint_action); + return std::accumulate(utilities.begin(), utilities.end(), 0.0); + } else { + return absl::nullopt; + } + } + + protected: + NormalFormGame(GameType game_type, GameParameters game_parameters) + : SimMoveGame(game_type, game_parameters) {} +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_NORMAL_FORM_GAME_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/observer.cc b/scenarios/bargaining/open_spiel/open_spiel/observer.cc new file mode 100644 index 0000000..90dcf72 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/observer.cc @@ -0,0 +1,391 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/observer.h" + +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/memory/memory.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { + +SpanTensor ContiguousAllocator::Get(absl::string_view name, + const absl::InlinedVector& shape) { + const int size = absl::c_accumulate(shape, 1, std::multiplies()); + SPIEL_DCHECK_LE(offset_, data_.size()); + auto buffer = data_.subspan(offset_, size); + offset_ += size; + return SpanTensor(SpanTensorInfo(name, shape), buffer); +} + +namespace { + +class InformationStateObserver : public Observer { + public: + InformationStateObserver(const Game& game) + : Observer( + /*has_string=*/game.GetType().provides_information_state_string, + /*has_tensor=*/game.GetType().provides_information_state_tensor), + size_(has_tensor_ ? game.InformationStateTensorSize() : 0) { + if (has_tensor_) { + auto shape = game.InformationStateTensorShape(); + shape_.assign(shape.begin(), shape.end()); + } + } + void WriteTensor(const State& state, int player, + Allocator* allocator) const override { + auto tensor = allocator->Get("info_state", shape_); + state.InformationStateTensor(player, tensor.data()); + } + + std::string StringFrom(const State& state, int player) const override { + return state.InformationStateString(player); + } + + private: + absl::InlinedVector shape_; + int size_; +}; + +class DefaultObserver : public Observer { + public: + DefaultObserver(const Game& game) + : Observer(/*has_string=*/ + game.GetType().provides_observation_string, + /*has_tensor=*/game.GetType().provides_observation_tensor), + size_(has_tensor_ ? game.ObservationTensorSize() : 0) { + if (has_tensor_) { + auto shape = game.ObservationTensorShape(); + shape_.assign(shape.begin(), shape.end()); + } + } + + void WriteTensor(const State& state, int player, + Allocator* allocator) const override { + SPIEL_CHECK_TRUE(has_tensor_); + auto tensor = allocator->Get("observation", shape_); + state.ObservationTensor(player, tensor.data()); + } + + std::string StringFrom(const State& state, int player) const override { + return state.ObservationString(player); + } + + private: + absl::InlinedVector shape_; + int size_; +}; + +std::string PrivateInfoTypeToString(const PrivateInfoType& type) { + if (type == PrivateInfoType::kNone) return "kNone"; + if (type == PrivateInfoType::kSinglePlayer) return "kSinglePlayer"; + if (type == PrivateInfoType::kAllPlayers) return "kAllPlayers"; + SpielFatalError("Unknown PrivateInfoType!"); +} + +std::string IIGObservationTypeToString(const IIGObservationType& obs_type) { + return absl::StrCat( + "IIGObservationType", + "{perfect_recall=", obs_type.perfect_recall ? "true" : "false", + ", public_info=", obs_type.public_info ? "true" : "false", + ", private_info=", PrivateInfoTypeToString(obs_type.private_info), "}"); +} + +// A dummy class that provides private observations for games with perfect +// information. As these games have no private information, we return dummy +// values. +class NoPrivateObserver : public Observer { + public: + NoPrivateObserver(const Game& game) + : Observer(/*has_string=*/true, /*has_tensor=*/true) {} + void WriteTensor(const State& state, int player, + Allocator* allocator) const override {} + std::string StringFrom(const State& state, int player) const override { + return ""; + } +}; +} // namespace + +std::shared_ptr Game::MakeRegisteredObserver( + absl::optional iig_obs_type, + const ObservationParams& params) const { + auto iter = params.find("name"); + if (iter == params.end()) { + SpielFatalError("A 'name' parameter is expected to create a registered " + "observer"); + } + auto name = iter->second.string_value(); + return ObserverRegisterer::CreateByName(name, *this, iig_obs_type, params); +} + +std::shared_ptr Game::MakeBuiltInObserver( + absl::optional iig_obs_type) const { + if (!iig_obs_type) { + if (game_type_.provides_observation()) { + return absl::make_unique(*this); + } else { + return nullptr; + } + } + + const bool perfect_info_game = + game_type_.information == GameType::Information::kPerfectInformation; + + // Perfect information games can provide public information regardless + // of requested PrivateInfoType (as they have no private information). + if (perfect_info_game) { + // Handle the dummy case, where we do not use any public information. + // The game will just have empty private observations. + if (!iig_obs_type->public_info) + return absl::make_unique(*this); + if (game_type_.provides_information_state() && iig_obs_type->perfect_recall) + return absl::make_unique(*this); + if (game_type_.provides_observation() && !iig_obs_type->perfect_recall) + return absl::make_unique(*this); + } + + // TODO(author11) Reinstate this check + // SPIEL_CHECK_EQ(GetType().information, + // GameType::Information::kImperfectInformation); + if (iig_obs_type.value() == kDefaultObsType) { + if (game_type_.provides_observation()) + return absl::make_unique(*this); + } + if (iig_obs_type.value() == kInfoStateObsType) { + if (game_type_.provides_information_state()) + return absl::make_unique(*this); + } + return nullptr; +} + +std::shared_ptr Game::MakeObserver( + absl::optional iig_obs_type, + const ObservationParams& params) const { + // This implementation falls back to the original information state and + // observation methods in case of empty parameters and otherwise creates + // a registered observer based on its name. + // New games can register observers which can be selected by name, or override + // MakeObserver to return a game-specific observer. + if (params.empty()) { + return MakeBuiltInObserver(iig_obs_type); + } else { + return MakeRegisteredObserver(iig_obs_type, params); + } +} + +SpanTensor TrackingVectorAllocator::Get( + absl::string_view name, const absl::InlinedVector& shape) { + SPIEL_DCHECK_TRUE(IsNameUnique(name)); + + SpanTensorInfo info(name, shape); + tensors_info_.push_back(info); + + const int offset = data_.size(); + const int size = info.size(); + data_.resize(offset + size); + return SpanTensor(std::move(info), + absl::MakeSpan(data_).subspan(offset, size)); +} + +bool TrackingVectorAllocator::IsNameUnique(absl::string_view name) const { + for (const SpanTensorInfo& info : tensors_info_) { + if (info.name() == name) { + return false; + } + } + return true; +} + +Observation::Observation(const Game& game, std::shared_ptr observer) + : observer_(std::move(observer)) { + // Get an observation of the initial state to set up. + if (HasTensor()) { + auto state = game.NewInitialState(); + TrackingVectorAllocator allocator; + observer_->WriteTensor(*state, /*player=*/0, &allocator); + buffer_ = allocator.data(); + tensors_info_ = allocator.tensors_info(); + } +} + +void Observation::SetFrom(const State& state, int player) { + ContiguousAllocator allocator(absl::MakeSpan(buffer_)); + observer_->WriteTensor(state, player, &allocator); +} + +std::vector Observation::tensors() { + std::vector result; + result.reserve(tensors_info_.size()); + int offset = 0; + for (const SpanTensorInfo& info : tensors_info_) { + const int size = info.size(); + result.emplace_back(info, absl::MakeSpan(buffer_).subspan(offset, size)); + offset += size; + } + return result; +} + +// We may in the future support multiple compression schemes. +// The Compress() method should select the most effective scheme adaptively. +enum CompressionScheme : char { + kCompressionNone, // We weren't able to compress the data. + kCompressionBinary // Data is binary (all elements zero or one). +}; +constexpr int kNumHeaderBytes = 1; + +// Binary compression. +struct BinaryCompress { + static constexpr int kBitsPerByte = 8; + + static std::string Compress(absl::Span buffer) { + const int num_bytes = (buffer.size() + kBitsPerByte - 1) / kBitsPerByte; + std::string str(num_bytes + kNumHeaderBytes, '\0'); + str[0] = kCompressionBinary; + + for (int i = 0; i < buffer.size(); ++i) { + if (buffer[i]) { + const int byte = i / kBitsPerByte; + const int bit = i % kBitsPerByte; + str[kNumHeaderBytes + byte] += (1 << bit); + } + } + return str; + } + + static void Decompress(absl::string_view compressed, + absl::Span buffer) { + const int num_bytes = (buffer.size() + kBitsPerByte - 1) / kBitsPerByte; + absl::c_fill(buffer, 0); + SPIEL_CHECK_EQ(compressed.size(), num_bytes + kNumHeaderBytes); + for (int byte = 0; byte < num_bytes; ++byte) { + for (int bit = 0; bit < kBitsPerByte; ++bit) { + if (compressed[kNumHeaderBytes + byte] & (1 << bit)) { + buffer[byte * kBitsPerByte + bit] = 1; + } + } + } + } +}; + +// No compression. +struct NoCompress { + static std::string Compress(absl::Span buffer) { + const int num_bytes = sizeof(float) * buffer.size(); + std::string str(num_bytes + 1, '\0'); + str[0] = kCompressionNone; + memcpy(&str[kNumHeaderBytes], &buffer[0], num_bytes); + return str; + } + + static void Decompress(absl::string_view compressed, + absl::Span buffer) { + const int num_bytes = sizeof(float) * buffer.size(); + SPIEL_CHECK_EQ(compressed.size(), num_bytes + kNumHeaderBytes); + memcpy(&buffer[0], &compressed[kNumHeaderBytes], num_bytes); + } +}; + +std::string Observation::Compress() const { + const bool data_is_binary = + absl::c_all_of(buffer_, [](float x) { return x == 0 || x == 1; }); + return data_is_binary ? BinaryCompress::Compress(buffer_) + : NoCompress::Compress(buffer_); +} + +void Observation::Decompress(absl::string_view compressed) { + SPIEL_CHECK_GT(compressed.size(), 0); + switch (compressed[0]) { + case kCompressionBinary: + return BinaryCompress::Decompress(compressed, absl::MakeSpan(buffer_)); + case kCompressionNone: + return NoCompress::Decompress(compressed, absl::MakeSpan(buffer_)); + default: + SpielFatalError(absl::StrCat("Unrecognized compression scheme in '", + compressed, "'")); + } +} + +bool IIGObservationType::operator==(const IIGObservationType& other) { + return public_info == other.public_info && + perfect_recall == other.perfect_recall && + private_info == other.private_info; +} + +ObserverRegisterer::ObserverRegisterer(const std::string& game_name, + const std::string& observer_name, + CreateFunc creator) { + RegisterObserver(game_name, observer_name, creator); +} + +void ObserverRegisterer::RegisterObserver(const std::string& game_name, + const std::string& observer_name, + CreateFunc creator) { + auto key = std::pair(game_name, observer_name); + if (observers().find(key) != observers().end()) { + SpielFatalError(absl::StrCat("Duplicate observer '", key.second, "'", + " for game '", key.first, "'")); + } + observers()[key] = creator; +} + +std::shared_ptr MakeSingleTensorObserver( + const Game& game, absl::optional iig_obs_type, + const GameParameters& params) { + return std::shared_ptr(game.MakeBuiltInObserver(iig_obs_type)); +} + +RegisterSingleTensorObserver::RegisterSingleTensorObserver( + const std::string& game_name) { + ObserverRegisterer single_tensor(game_name, "single_tensor", + MakeSingleTensorObserver); +} + +std::shared_ptr ObserverRegisterer::CreateByName( + const std::string& observer_name, + const Game& game, + absl::optional iig_obs_type, + const ObservationParams& params) { + auto key = std::pair(game.GetType().short_name, observer_name); + auto it = observers().find(key); + if (it == observers().end()) { + SpielFatalError(absl::StrCat("No observer '", key.second, "'", + " found for game '", key.first, "'")); + } + return it->second(game, iig_obs_type, params); +} + +std::vector TensorFromObserver(const State& state, + const Observer& observer) { + TrackingVectorAllocator allocator; + observer.WriteTensor(state, /*player=*/state.CurrentPlayer(), &allocator); + return std::move(allocator.data()); +} + +std::vector ObserverTensorShape(const State& state, + const Observer& observer) { + TrackingVectorAllocator allocator; + observer.WriteTensor(state, /*player=*/0, &allocator); + if (allocator.tensors_info().size() == 1) { + return allocator.tensors_info().front().vector_shape(); + } else { + return {static_cast(allocator.data().size())}; + } +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/observer.h b/scenarios/bargaining/open_spiel/open_spiel/observer.h new file mode 100644 index 0000000..70f6555 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/observer.h @@ -0,0 +1,478 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_OBSERVER_H_ +#define OPEN_SPIEL_OBSERVER_H_ + +// This class is the primary method for getting observations from games. +// Each Game object has a MakeObserver() method which returns an Observer +// object given a specification of the required observation type. + +// To access observation from C++, first initialize an observer and observation +// for the game (one time only). +// +// auto observer = game->MakeObserver(iig_obs_type, params); +// Observation observation(*game, observer); +// +// Then for states in a trajectory, get a tensor observation using: +// +// observation.SetFrom(state, player); // populates observation.Tensor() +// +// The resultant tensor is accessible from observation.Tensor(). Note that +// the decomposition of the tensor into named pieces is not currently available +// through this API (it is available in Python). +// +// To obtain a string observation: +// +// std::string string_obs = observation.StringFrom(state, player); +// +// Access from Python follows a similar pattern, with the addition of support +// for accessing pieces of the observation tensor by name. See `observation.py` +// and `observation_test.py`. + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/base/attributes.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_set.h" +#include "open_spiel/abseil-cpp/absl/container/inlined_vector.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { + +// Forward declarations +class Game; +class State; + +using ObservationParams = GameParameters; + +// Information about a multi-dimensional tensor span, eg name, shape, etc. +// TODO(author16) add types information. For now only floats are supported. +class SpanTensorInfo { + public: + using Shape = absl::InlinedVector; + + SpanTensorInfo(absl::string_view name, const Shape& shape) + : name_(name), shape_(shape) {} + + inline const std::string& name() const { return name_; } + inline const Shape& shape() const { return shape_; } + + // Convenience accessor for the shape as a plain vector of ints. + template + inline std::vector vector_shape() const { + return {shape_.begin(), shape_.end()}; + } + + // Number of floats in a tensor. + int32_t size() const { + return std::accumulate(shape_.begin(), shape_.end(), 1, + std::multiplies()); + } + + // Akin to numpy.ndarray.nbytes returns the memory footprint. + int32_t nbytes() const { return size() * sizeof(float); } + + std::string DebugString() const { + return absl::StrCat("SpanTensor(name='", name(), "', shape=(", + absl::StrJoin(shape_, ","), "), nbytes=", nbytes(), + ")"); + } + + private: + std::string name_; + Shape shape_; +}; + +// A tensor backed up by a data buffer *not* owned by SpanTensor. +// +// This is a view class that points to some externally owned data buffer +// and helps with accessing and modifying the data via its `at` methods. +// +// The class has the pointer semantics, akin to `std::unique_ptr` or a raw +// pointer, where `SpanTensor` just "points" to an array. +// In particular helper accessor methods like `data` or `at` are marked as const +// but still give access to mutable data. +class SpanTensor { + public: + SpanTensor(SpanTensorInfo info, absl::Span data) + : info_(std::move(info)), data_(data) { + SPIEL_CHECK_EQ(info_.size(), data_.size()); + } + + const SpanTensorInfo& info() const { return info_; } + + absl::Span data() const { return data_; } + + std::string DebugString() const { return info_.DebugString(); } + + // Mutators of data. + float& at() const { + SPIEL_DCHECK_EQ(info_.shape().size(), 0); + return data_[0]; + } + + float& at(int idx) const { + SPIEL_DCHECK_EQ(info_.shape().size(), 1); + return data_[idx]; + } + + float& at(int idx1, int idx2) const { + SPIEL_DCHECK_EQ(info_.shape().size(), 2); + return data_[idx1 * info_.shape()[1] + idx2]; + } + + float& at(int idx1, int idx2, int idx3) const { + SPIEL_DCHECK_EQ(info_.shape().size(), 3); + return data_[(idx1 * info_.shape()[1] + idx2) * info_.shape()[2] + idx3]; + } + + float& at(int idx1, int idx2, int idx3, int idx4) const { + SPIEL_DCHECK_EQ(info_.shape().size(), 4); + return data_[((idx1 * info_.shape()[1] + idx2) * info_.shape()[2] + idx3) * + info_.shape()[3] + + idx4]; + } + + private: + SpanTensorInfo info_; + absl::Span data_; +}; + +// An Allocator is responsible for returning memory for an Observer. +class Allocator { + public: + // Returns zero-initialized memory into which the data should be written. + // `name` is the name of this piece of the tensor; the allocator may + // use it to label the tensor when accessed by the clients. + virtual SpanTensor Get(absl::string_view name, + const absl::InlinedVector& shape) = 0; + + virtual ~Allocator() = default; +}; + +// Allocates memory from a single block. This is intended for use when it +// is already known how much memory an observation consumes. The allocator +// owns a fixed-size block of memory and returns pieces of it in sequence. +class ContiguousAllocator : public Allocator { + public: + ContiguousAllocator(absl::Span data) : data_(data), offset_(0) { + absl::c_fill(data, 0); + } + SpanTensor Get(absl::string_view name, + const absl::InlinedVector& shape) override; + + private: + absl::Span data_; + int offset_; +}; + +// Allocates new memory for each allocation request and keeps track +// of tensor names and shapes. This is intended to use when it's not yet +// known how much memory an observation consumes. +class TrackingVectorAllocator : public Allocator { + public: + TrackingVectorAllocator() {} + + SpanTensor Get(absl::string_view name, + const absl::InlinedVector& shape) override; + + // Should only be called *after* all spans were created (via `Get`). + // A call to `Get` invalidates the previous result of `spans`. + std::vector tensors_info() const { return tensors_info_; } + + std::vector& data() { return data_; } + const std::vector& data() const { return data_; } + + private: + bool IsNameUnique(absl::string_view name) const; + + std::vector data_; + std::vector tensors_info_; + absl::flat_hash_set tensor_names_; +}; + +// Specification of which players' private information we get to see. +enum class PrivateInfoType { + kNone, // No private information. + kSinglePlayer, // Private information for the observing player only (i.e. + // the player passed to WriteTensor / StringFrom). + kAllPlayers // Private information for all players. +}; + +// Observation types for imperfect-information games. + +// The public / private observations factorize observations into their +// (mostly) non-overlapping public and private parts. They may overlap only for +// the start of the game and time. +// +// The public observations correspond to information that all the players know +// that all the players know, like upward-facing cards on a table. +// Perfect information games, like Chess, have only public observations. +// +// All games have non-empty public observations. The minimum public +// information is time: we assume that all the players can perceive absolute +// time (which can be accessed via the MoveNumber() method). The implemented +// games must be 1-timeable, a property that is trivially satisfied with all +// human-played board games, so you typically don't have to worry about this. +// (You'd have to knock players out / consider Einstein's time-relativistic +// effects to make non-timeable games.). +// +// The public observations are used to create a sequence of observations: +// a public observation history. Because of the sequential structure, when you +// return any non-empty public observation, you implicitly encode time as well +// within this sequence. +// +// Public observations are not required to be "common knowledge" observations. +// Example: In imperfect-info version of card game Goofspiel, players make +// bets with cards on their hand, and their imperfect information consists of +// not knowing exactly what cards the opponent currently holds, as the players +// only learn public information whether they have won/lost/draw the bet. +// However, when the player bets a card "5" and learns it drew the round, +// it can infer that the opponent must have also bet the card "5", just as the +// player did. In principle we could ask the game to make this inference +// automatically, and return observation "draw-5". We do not require this, as +// it is in general expensive to compute. Returning public observation "draw" +// is sufficient. + +// The private observations correspond to the part of the observation that +// is not public. In Poker, this would be the cards the player holds in his +// hand. Note that this does not imply that other players don't have access +// to this information. +// +// For example, consider there is a mirror behind an unaware player, betraying +// his hand in the reflection. Even if everyone was aware of the mirror, then +// this information still may not be public, because the players do not know +// for certain that everyone is aware of this. It would become public if and +// only if all the players were aware of the mirror, and they also knew that +// indeed everyone else knows about it too. Then this would effectively make +// it the same as if the player just placed his cards on the table for +// everyone to see. +// +// If there is no private observation available, the implementation should +// return an empty string. +struct IIGObservationType { + // If true, include public information in the observation. + bool public_info; + + // Whether the observation is perfect recall (identical to an info state). + // If true, the observation must be sufficient to reconstruct the complete + // history of actions and observations for the observing player. + bool perfect_recall; + + // Which players' private information to include in the observation. + PrivateInfoType private_info; + + bool operator==(const IIGObservationType&); +}; + +// Default observation type for imperfect information games. +// Corresponds to the ObservationTensor / ObservationString methods. +inline constexpr IIGObservationType kDefaultObsType{ + /*public_info*/true, + /*perfect_recall*/false, + /*private_info*/PrivateInfoType::kSinglePlayer}; + +// Default observation type for imperfect information games. +// Corresponds to the InformationStateTensor / InformationStateString methods. +inline constexpr IIGObservationType kInfoStateObsType{ + /*public_info*/true, + /*perfect_recall*/true, + /*private_info*/PrivateInfoType::kSinglePlayer}; + +// Incremental public observation, mainly used for imperfect information games. +inline constexpr IIGObservationType kPublicObsType{ + /*public_info*/true, + /*perfect_recall*/false, + /*private_info*/PrivateInfoType::kNone}; + +// Complete public observation, mainly used for imperfect information games. +inline constexpr IIGObservationType kPublicStateObsType{ + /*public_info*/true, + /*perfect_recall*/true, + /*private_info*/PrivateInfoType::kNone}; + +// Incremental private observation, mainly used for imperfect information games. +inline constexpr IIGObservationType kPrivateObsType{ + /*public_info*/false, + /*perfect_recall*/false, + /*private_info*/PrivateInfoType::kSinglePlayer}; + +// An Observer is something which can produce an observation of a State, +// e.g. a Tensor or collection of Tensors or a string. +// Observers are game-specific. They are created by a Game object, and +// may only be applied to a State class generated from the same Game instance. +class Observer { + public: + Observer(bool has_string, bool has_tensor) + : has_string_(has_string), has_tensor_(has_tensor) { + SPIEL_CHECK_TRUE(has_string || has_tensor); + } + + // Write a tensor observation to the memory returned by the Allocator. + virtual void WriteTensor(const State& state, int player, + Allocator* allocator) const = 0; + + // Return a string observation. For human-readability or for tabular + // algorithms on small games. + virtual std::string StringFrom(const State& state, int player) const = 0; + + // What observations do we support? + bool HasString() const { return has_string_; } + bool HasTensor() const { return has_tensor_; } + + virtual ~Observer() = default; + + protected: + // TODO(author11) Remove when all games support both types of observations. + bool has_string_; + bool has_tensor_; +}; + +// Holds an Observer and a vector for it to write values into. +class Observation { + public: + // Create + Observation(const Game& game, std::shared_ptr observer); + + // Gets the observation from the State and player and stores it in + // the internal tensor. + void SetFrom(const State& state, int player); + + // Describes the observation components. + const std::vector& tensors_info() const { + return tensors_info_; + } + + // Returns the component tensors of the observation. + std::vector tensors(); + + // Returns the string observation for the State and player. + std::string StringFrom(const State& state, int player) const { + return observer_->StringFrom(state, player); + } + + // Return compressed representation of the observations. This is useful for + // memory-intensive algorithms, e.g. that store large replay buffers. + // + // The first byte of the compressed data is reserved for the specific + // compression scheme. Note that currently there is only one supported, which + // requires bitwise observations. + // + // Note: Use compress and decompress on the same machine, or on systems + // with the same float memory layout (aka Endianness). + // Different computer architectures may use different Endianness + // (https://en.wikipedia.org/wiki/Endianness) when storing floats. + // The compressed data is a raw memory representation of an array + // of floats. Passing it from, say, big-endian architecture + // to little-endian architecture may corrupt the original data. + // TODO(author16) address the note above and implement things in a platform + // independent way. + std::string Compress() const; + void Decompress(absl::string_view compressed); + + // What observations do we support? + // TODO(author11) Remove when all games support both types of observations. + bool HasString() const { return observer_->HasString(); } + bool HasTensor() const { return observer_->HasTensor(); } + + public: + // Deprecated methods. + + // Returns the internal buffer into which observations are written. + ABSL_DEPRECATED("Use `tensors()`. This method is unsafe.") + absl::Span Tensor() { return absl::MakeSpan(buffer_); } + + private: + std::shared_ptr observer_; + std::vector buffer_; + std::vector tensors_info_; +}; + +// Allows to register observers to a game. Usage: +// ObserverRegisterer unused_name(game_name, observer_name, creator); +// +// Once an observer is registered, it can be created by +// game.MakeObserver(iig_obs_type, observer_name) +class ObserverRegisterer { + public: + // Function type which creates an observer. The game and params argument + // cannot be assumed to exist beyond the scope of this call. + using CreateFunc = std::function( + const Game& game, absl::optional iig_obs_type, + const ObservationParams& params)>; + + ObserverRegisterer(const std::string& game_name, + const std::string& observer_name, + CreateFunc creator); + static void RegisterObserver(const std::string& game_name, + const std::string& observer_name, + CreateFunc creator); + + static std::shared_ptr CreateByName( + const std::string& observer_name, + const Game& game, + absl::optional iig_obs_type, + const ObservationParams& params); + + private: + // Returns a "global" map of registrations (i.e. an object that lives from + // initialization to the end of the program). Note that we do not just use + // a static data member, as we want the map to be initialized before first + // use. + static std::map, CreateFunc>& + observers() { + static std::map, CreateFunc> impl; + return impl; + } +}; + +// Registers an observer named "single_tensor" which falls back to +// state.observation_tensor or state.information_state_tensor (which generate a +// single tensor). +// +// Note that one cannot pass empty ObservationParams to +// game->MakeObserver(...) to achieve the same behavior in general: +// leduc, goofspiel and many other games will generate multiple tensors in that +// case. +// +// Use: +// RegisterSingleTensorObserver single_tensor(kGameType.short_name); +class RegisterSingleTensorObserver { + public: + RegisterSingleTensorObserver(const std::string& game_name); +}; + +// Pure function that creates a tensor from an observer. Slower than using an +// Observation, but threadsafe. This is useful when you cannot keep an +// Observation around to use multiple times. +ABSL_DEPRECATED("Use 'Observation::tensors()`.") +std::vector TensorFromObserver(const State& state, + const Observer& observer); + +// Pure function that gets the tensor shape from an observer. +// Any valid state may be supplied. +ABSL_DEPRECATED("Use 'Observation::tensors_info()`.") +std::vector ObserverTensorShape(const State& state, + const Observer& observer); + +} // namespace open_spiel + +#endif // OPEN_SPIEL_OBSERVER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/policy.cc b/scenarios/bargaining/open_spiel/open_spiel/policy.cc new file mode 100644 index 0000000..e40fd70 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/policy.cc @@ -0,0 +1,605 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/policy.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/container/flat_hash_set.h" +#include "open_spiel/abseil-cpp/absl/container/node_hash_map.h" +#include "open_spiel/abseil-cpp/absl/strings/charconv.h" +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { + +void SetProb(ActionsAndProbs* actions_and_probs, Action action, double prob) { + for (auto& iter : *actions_and_probs) { + if (iter.first == action) { + iter.second = prob; + return; + } + } + actions_and_probs->push_back({action, prob}); +} + +double GetProb(const ActionsAndProbs& action_and_probs, Action action) { + auto it = absl::c_find_if(action_and_probs, + [&action](const std::pair& p) { + return p.first == action; + }); + if (it == action_and_probs.end()) return -1.; + return it->second; +} + +Action GetAction(const ActionsAndProbs& action_and_probs) { + for (const auto& iter : action_and_probs) { + if (iter.second == 1.0) { + return iter.first; + } + } + return kInvalidAction; +} + +ActionsAndProbs ToDeterministicPolicy(const ActionsAndProbs& actions_and_probs, + Action action) { + ActionsAndProbs new_policy; + new_policy.reserve(actions_and_probs.size()); + for (const auto& iter : actions_and_probs) { + new_policy.push_back({iter.first, iter.first == action ? 1.0 : 0.0}); + } + return new_policy; +} + +bool StatePoliciesEqual(const ActionsAndProbs& state_policy1, + const ActionsAndProbs& state_policy2, + double float_tolerance) { + if (state_policy1.size() != state_policy2.size()) { + return false; + } + + for (int i = 0; i < state_policy1.size(); ++i) { + if (state_policy1[i].first != state_policy2[i].first) { + return false; + } + + if (!Near(state_policy1[i].second, state_policy2[i].second, + float_tolerance)) { + return false; + } + } + + return true; +} + +ActionsAndProbs GetDeterministicPolicy(const std::vector& legal_actions, + Action action) { + ActionsAndProbs new_policy; + new_policy.reserve(legal_actions.size()); + for (Action legal_action : legal_actions) { + new_policy.push_back({legal_action, legal_action == action ? 1.0 : 0.0}); + } + return new_policy; +} + +ActionsAndProbs UniformStatePolicy(const std::vector& actions) { + ActionsAndProbs actions_and_probs; + absl::c_for_each(actions, [&actions_and_probs, &actions](Action a) { + actions_and_probs.push_back({a, 1. / static_cast(actions.size())}); + }); + return actions_and_probs; +} + +ActionsAndProbs UniformStatePolicy(const State& state) { + return UniformStatePolicy(state.LegalActions()); +} + +ActionsAndProbs UniformStatePolicy(const State& state, Player player) { + return UniformStatePolicy(state.LegalActions(player)); +} + +ActionsAndProbs FirstActionStatePolicy(const State& state) { + return FirstActionStatePolicy(state, state.CurrentPlayer()); +} + +ActionsAndProbs FirstActionStatePolicy(const State& state, Player player) { + ActionsAndProbs actions_and_probs; + std::vector legal_actions = state.LegalActions(player); + actions_and_probs.reserve(legal_actions.size()); + for (int i = 0; i < legal_actions.size(); ++i) { + actions_and_probs.push_back({legal_actions[i], i == 0 ? 1.0 : 0.0}); + } + return actions_and_probs; +} + +std::unique_ptr DeserializePolicy(const std::string& serialized, + std::string delimiter) { + // Class’s identity is the very first line, see Policy::Serialize + // for more info. + std::pair cls_and_content = + absl::StrSplit(serialized, absl::MaxSplits(':', 1)); + std::string class_identity = cls_and_content.first; + + if (class_identity == "TabularPolicy") { + return DeserializeTabularPolicy(serialized, delimiter); + } else if (class_identity == "UniformPolicy") { + return std::make_unique(); + } else { + SpielFatalError(absl::StrCat("Deserialization of ", class_identity, + " is not supported.")); + } +} + +TabularPolicy::TabularPolicy(const Game& game) + : TabularPolicy(GetRandomPolicy(game)) {} + +std::unique_ptr DeserializeTabularPolicy( + const std::string& serialized, std::string delimiter) { + // Class’s identity is the very first line, see Policy::Serialize + // for more info. + std::pair cls_and_content = + absl::StrSplit(serialized, absl::MaxSplits(':', 1)); + SPIEL_CHECK_EQ(cls_and_content.first, "TabularPolicy"); + + std::unique_ptr res = std::make_unique(); + if (cls_and_content.second.empty()) return res; + + std::vector splits = + absl::StrSplit(cls_and_content.second, delimiter); + + // Insert the actual values + Action action; + double prob; + for (int i = 0; i < splits.size(); i += 2) { + std::vector policy_values = + absl::StrSplit(splits.at(i + 1), ','); + ActionsAndProbs res_policy; + res_policy.reserve(policy_values.size()); + + for (absl::string_view policy_value : policy_values) { + std::pair action_and_prob = + absl::StrSplit(policy_value, '='); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(action_and_prob.first, &action)); + absl::from_chars( + action_and_prob.second.data(), + action_and_prob.second.data() + action_and_prob.second.size(), prob); + res_policy.push_back({action, prob}); + } + res->SetStatePolicy(std::string(splits.at(i)), res_policy); + } + return res; +} + +std::string TabularPolicy::ToString() const { + std::string str = ""; + for (const auto& infostate_and_policy : policy_table_) { + absl::StrAppend(&str, infostate_and_policy.first, ": "); + for (const auto& policy : infostate_and_policy.second) { + absl::StrAppend(&str, " ", policy.first, "=", policy.second); + } + absl::StrAppend(&str, "\n"); + } + return str; +} + +std::string TabularPolicy::ToStringSorted() const { + std::vector keys; + keys.reserve(policy_table_.size()); + + for (const auto& infostate_and_policy : policy_table_) { + keys.push_back(infostate_and_policy.first); + } + + std::sort(keys.begin(), keys.end()); + std::string str = ""; + for (const std::string& key : keys) { + absl::StrAppend(&str, key, ": "); + for (const auto& policy : policy_table_.at(key)) { + absl::StrAppend(&str, " ", policy.first, "=", policy.second); + } + absl::StrAppend(&str, "\n"); + } + + return str; +} + +PartialTabularPolicy::PartialTabularPolicy() + : TabularPolicy(), + fallback_policy_(std::make_shared()) {} + +PartialTabularPolicy::PartialTabularPolicy( + const std::unordered_map& table) + : TabularPolicy(table), + fallback_policy_(std::make_shared()) {} + +PartialTabularPolicy::PartialTabularPolicy( + const std::unordered_map& table, + std::shared_ptr fallback_policy) + : TabularPolicy(table), + fallback_policy_(fallback_policy) {} + +ActionsAndProbs PartialTabularPolicy::GetStatePolicy(const State& state) const { + auto iter = policy_table_.find(state.InformationStateString()); + if (iter == policy_table_.end()) { + return fallback_policy_->GetStatePolicy(state); + } else { + return iter->second; + } +} + +ActionsAndProbs PartialTabularPolicy::GetStatePolicy(const State& state, + Player player) const { + auto iter = policy_table_.find(state.InformationStateString(player)); + if (iter == policy_table_.end()) { + return fallback_policy_->GetStatePolicy(state); + } else { + return iter->second; + } +} + +ActionsAndProbs PartialTabularPolicy::GetStatePolicy( + const std::string& info_state) const { + auto iter = policy_table_.find(info_state); + if (iter == policy_table_.end()) { + return fallback_policy_->GetStatePolicy(info_state); + } else { + return iter->second; + } +} + +TabularPolicy GetEmptyTabularPolicy(const Game& game, + bool initialize_to_uniform, + Player player) { + std::unordered_map policy; + if (game.GetType().dynamics != GameType::Dynamics::kSequential) { + SpielFatalError("Game is not sequential."); + return TabularPolicy(policy); + } + std::list> to_visit; + to_visit.push_back(game.NewInitialState()); + while (!to_visit.empty()) { + std::unique_ptr state = std::move(to_visit.back()); + to_visit.pop_back(); + if (state->IsTerminal()) { + continue; + } + if (state->IsChanceNode()) { + for (const auto& outcome_and_prob : state->ChanceOutcomes()) { + to_visit.emplace_back(state->Child(outcome_and_prob.first)); + } + } else { + ActionsAndProbs infostate_policy; + std::vector legal_actions = state->LegalActions(); + const int num_legal_actions = legal_actions.size(); + SPIEL_CHECK_GT(num_legal_actions, 0.); + for (Action action : legal_actions) { + to_visit.push_back(state->Child(action)); + } + if (player < 0 || state->IsPlayerActing(player)) { + double action_probability = 1.; + if (initialize_to_uniform) { + action_probability = 1. / num_legal_actions; + } + ActionsAndProbs infostate_policy; + infostate_policy.reserve(num_legal_actions); + for (Action action : legal_actions) { + infostate_policy.push_back({action, action_probability}); + } + if (infostate_policy.empty()) { + SpielFatalError("State has zero legal actions."); + } + policy.insert({state->InformationStateString(), infostate_policy}); + } + } + } + return TabularPolicy(policy); +} + +TabularPolicy GetUniformPolicy(const Game& game) { + return GetEmptyTabularPolicy(game, /*initialize_to_uniform=*/true); +} + +template +TabularPolicy SamplePolicy( + const Game& game, int seed, RandomNumberDistribution& dist, Player player) { + std::mt19937 gen(seed); + TabularPolicy policy = GetEmptyTabularPolicy(game, false, player); + std::unordered_map& policy_table = + policy.PolicyTable(); + for (auto& kv : policy_table) { + ActionsAndProbs state_policy; + if (kv.second.empty()) { + SpielFatalError("State has zero legal actions."); + } + state_policy.reserve(kv.second.size()); + double sum = 0; + double prob; + for (const auto& action_and_prob : kv.second) { + // We multiply the original probability by a random number greater than + // 0. We then normalize. This has the effect of randomly permuting the + // policy but all illegal actions still have zero probability. + prob = dist(gen) * action_and_prob.second; + sum += prob; + state_policy.push_back({action_and_prob.first, prob}); + } + // We normalize the policy to ensure it sums to 1. + for (auto& action_and_prob : state_policy) { + action_and_prob.second /= sum; + } + // This is included as a sanity check. + double normalized_sum = 0; + for (auto& action_and_prob : state_policy) { + normalized_sum += action_and_prob.second; + } + SPIEL_CHECK_FLOAT_EQ(normalized_sum, 1.0); + kv.second = state_policy; + } + return policy; +} + +TabularPolicy GetRandomPolicy(const Game& game, int seed, Player player) { + std::uniform_real_distribution dist(0, 1); + return SamplePolicy(game, seed, dist, player); +} + +TabularPolicy GetFlatDirichletPolicy( + const Game& game, int seed, Player player) { + std::gamma_distribution dist(1.0, 1.0); + return SamplePolicy(game, seed, dist, player); +} + +TabularPolicy GetRandomDeterministicPolicy( + const Game& game, int seed, Player player) { + std::mt19937 gen(seed); + absl::node_hash_map> dists; + TabularPolicy policy = GetEmptyTabularPolicy(game, false, player); + std::unordered_map& policy_table = + policy.PolicyTable(); + for (auto& kv : policy_table) { + ActionsAndProbs state_policy; + + // Need to calculate how many legal actions there are. Illegal actions + // can appear in kv. + int num_legal_actions = 0; + for (const auto& action_and_prob : kv.second) { + if (action_and_prob.second > 0) { + num_legal_actions += 1; + } + } + if (num_legal_actions == 0) { + SpielFatalError("State has zero legal actions."); + } + state_policy.reserve(num_legal_actions); + + // The distribution functions have are calculated over a fixed domain. If + // the number of legal a ctions has not been encountered before, we need to + // create a new distribution function. + if (dists.count(num_legal_actions) == 0) { + std::uniform_int_distribution dist(0, num_legal_actions - 1); + dists.insert({num_legal_actions, std::move(dist)}); + } + + const int action = dists[num_legal_actions](gen); + int legal_action_index = 0; + double prob = 0.0; + for (const auto& action_and_prob : kv.second) { + prob = 0.0; + if (action_and_prob.second > 0) { + if (legal_action_index == action) { + prob = 1.0; + } + legal_action_index += 1; + } + state_policy.push_back({action_and_prob.first, prob}); + } + + // This is included as a sanity check. + double normalized_sum = 0; + for (auto& action_and_prob : state_policy) { + normalized_sum += action_and_prob.second; + } + SPIEL_CHECK_FLOAT_EQ(normalized_sum, 1.0); + kv.second = state_policy; + } + return policy; +} + +TabularPolicy GetRandomDeterministicVisitPolicy( + const Game& game, int seed, Player player) { + std::mt19937 gen(seed); + absl::node_hash_map> dists; + std::unordered_map policy; + if (game.GetType().dynamics != GameType::Dynamics::kSequential) { + SpielFatalError("Game is not sequential."); + return TabularPolicy(policy); + } + const GameType::Information information = game.GetType().information; + std::list> to_visit; + to_visit.push_back(game.NewInitialState()); + while (!to_visit.empty()) { + std::unique_ptr state = std::move(to_visit.back()); + to_visit.pop_back(); + if (state->IsTerminal()) { + continue; + } else if (state->IsChanceNode()) { + for (const auto& outcome_and_prob : state->ChanceOutcomes()) { + to_visit.emplace_back(state->Child(outcome_and_prob.first)); + } + } else if (player < 0 || state->IsPlayerActing(player)) { + std::vector legal_actions = state->LegalActions(); + const int num_legal_actions = legal_actions.size(); + SPIEL_CHECK_GT(num_legal_actions, 0.); + if (dists.count(num_legal_actions) == 0) { + std::uniform_int_distribution dist(0, num_legal_actions - 1); + dists.insert({num_legal_actions, std::move(dist)}); + } + const int legal_action_index = dists[num_legal_actions](gen); + SPIEL_CHECK_GE(legal_action_index, 0); + SPIEL_CHECK_LT(legal_action_index, num_legal_actions); + const int action = legal_actions[legal_action_index]; + ActionsAndProbs infostate_policy; + infostate_policy.reserve(1); + infostate_policy.push_back({action, 1.0}); + policy.insert({state->InformationStateString(), infostate_policy}); + if (information == GameType::Information::kPerfectInformation) { + to_visit.push_back(state->Child(action)); + } else { + for (Action action : legal_actions) { + to_visit.push_back(state->Child(action)); + } + } + } else { + std::vector legal_actions = state->LegalActions(); + const int num_legal_actions = legal_actions.size(); + SPIEL_CHECK_GT(num_legal_actions, 0.); + for (Action action : legal_actions) { + to_visit.push_back(state->Child(action)); + } + } + } + return TabularPolicy(policy); +} + +TabularPolicy GetFirstActionPolicy(const Game& game) { + std::unordered_map policy; + if (game.GetType().dynamics != GameType::Dynamics::kSequential) { + SpielFatalError("Game is not sequential."); + return TabularPolicy(policy); + } + std::vector> to_visit; + to_visit.push_back(game.NewInitialState()); + while (!to_visit.empty()) { + std::unique_ptr state = std::move(to_visit.back()); + to_visit.pop_back(); + if (state->IsTerminal()) { + continue; + } + if (state->IsChanceNode()) { + for (const auto& outcome_and_prob : state->ChanceOutcomes()) { + to_visit.emplace_back(state->Child(outcome_and_prob.first)); + } + } else { + ActionsAndProbs infostate_policy; + std::vector legal_actions = state->LegalActions(); + const int num_legal_actions = legal_actions.size(); + SPIEL_CHECK_GT(num_legal_actions, 0.); + bool first_legal_action_found = false; + + infostate_policy.reserve(num_legal_actions); + for (Action action : legal_actions) { + to_visit.push_back(state->Child(action)); + if (!first_legal_action_found) { + first_legal_action_found = true; + infostate_policy.push_back({action, 1.}); + + } else { + infostate_policy.push_back({action, 0.}); + } + } + if (infostate_policy.empty()) { + SpielFatalError("State has zero legal actions."); + } + policy[state->InformationStateString()] = std::move(infostate_policy); + } + } + return TabularPolicy(policy); +} + +ActionsAndProbs PreferredActionPolicy::GetStatePolicy(const State& state, + Player player) const { + std::vector legal_actions = state.LegalActions(player); + for (Action action : preference_order_) { + if (absl::c_find(legal_actions, action) != legal_actions.end()) { + return GetDeterministicPolicy(legal_actions, action); + } + } + SpielFatalError("No preferred action found in the legal actions!"); +} + +TabularPolicy ToTabularPolicy(const Game& game, const Policy* policy) { + TabularPolicy tabular_policy; + std::vector> to_visit; + to_visit.push_back(game.NewInitialState()); + for (int idx = 0; idx < to_visit.size(); ++idx) { + const State* state = to_visit[idx].get(); + if (state->IsTerminal()) { + continue; + } + + if (!state->IsChanceNode()) { + std::vector players(game.NumPlayers()); + if (state->IsSimultaneousNode()) { + absl::c_iota(players, 0); + } else { + players = {state->CurrentPlayer()}; + } + + for (Player player : players) { + ActionsAndProbs state_policy = policy->GetStatePolicy(*state); + tabular_policy.SetStatePolicy(state->InformationStateString(player), + state_policy); + } + } + + for (Action action : state->LegalActions()) { + to_visit.push_back(state->Child(action)); + } + } + return tabular_policy; +} + +TabularPolicy GetPrefActionPolicy(const Game& game, + const std::vector& pref_actions) { + PreferredActionPolicy policy(pref_actions); + return ToTabularPolicy(game, &policy); +} + +std::string PrintPolicy(const ActionsAndProbs& policy) { + std::string policy_string; + for (auto [a, p] : policy) { + absl::StrAppend(&policy_string, absl::StrFormat("(%i, %f), ", a, p)); + } + return policy_string; +} + +TabularPolicy ToJointTabularPolicy(const std::vector& policies, + bool check_no_overlap) { + TabularPolicy joint_policy; + for (const TabularPolicy& policy : policies) { + if (check_no_overlap) { + for (const auto& key_and_val : policy.PolicyTable()) { + SPIEL_CHECK_TRUE(joint_policy.PolicyTable().find(key_and_val.first) == + joint_policy.PolicyTable().end()); + } + } + joint_policy.ImportPolicy(policy); + } + return joint_policy; +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/policy.h b/scenarios/bargaining/open_spiel/open_spiel/policy.h new file mode 100644 index 0000000..f01c6c2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/policy.h @@ -0,0 +1,418 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_POLICY_H_ +#define OPEN_SPIEL_POLICY_H_ + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/serialization.h" + +namespace open_spiel { + +// Returns the probability for the specified action, or -1 if not found. +double GetProb(const ActionsAndProbs& action_and_probs, Action action); + +// Set an action probability for the specified action. +void SetProb(ActionsAndProbs* actions_and_probs, Action action, double prob); + +// Helper for deterministic policies: returns the single action if the policy +// is deterministic, otherwise returns kInvalidAction. +Action GetAction(const ActionsAndProbs& action_and_probs); + +// Returns a policy where every legal action has probability 1 / (number of +// legal actions) for the current player to play. The overloaded function is +// similar, and provided to support simultaneous move games. +ActionsAndProbs UniformStatePolicy(const State& state); +ActionsAndProbs UniformStatePolicy(const State& state, Player player); + +// Returns a policy where the zeroth action has probability 1. The overloaded +// function is similar, and provided to support simultaneous move games. +ActionsAndProbs FirstActionStatePolicy(const State& state); +ActionsAndProbs FirstActionStatePolicy(const State& state, Player player); + +// Return a new policy with all the same actions, but with probability 1 on the +// specified action, and 0 on the others. +ActionsAndProbs ToDeterministicPolicy(const ActionsAndProbs& actions_and_probs, + Action action); + +// Returns a policy with probability 1 on a specific action, and 0 on others. +ActionsAndProbs GetDeterministicPolicy(const std::vector& legal_actions, + Action action); + +// Check that two state policies are equal (within a float tolerance). Does an +// exact check, so the actions must be in the same order. +bool StatePoliciesEqual(const ActionsAndProbs& state_policy1, + const ActionsAndProbs& state_policy2, + double float_tolerance); + +// A general policy object. A policy is a mapping from states to list of +// (action, prob) pairs for all the legal actions at the state. +class Policy { + public: + virtual ~Policy() = default; + + // A convenience method for callers that want to use arrays. + virtual std::pair, std::vector> + GetStatePolicyAsParallelVectors(const State& state) const { + std::pair, std::vector> parray; + for (const auto& action_and_prob : GetStatePolicy(state)) { + parray.first.push_back(action_and_prob.first); + parray.second.push_back(action_and_prob.second); + } + return parray; + } + + // A convenience method for callers that want to use arrays. + virtual std::pair, std::vector> + GetStatePolicyAsParallelVectors(const std::string& info_state) const { + std::pair, std::vector> parray; + for (const auto& action_and_prob : GetStatePolicy(info_state)) { + parray.first.push_back(action_and_prob.first); + parray.second.push_back(action_and_prob.second); + } + return parray; + } + + virtual std::unordered_map GetStatePolicyAsMap( + const State& state) const { + std::unordered_map pmap; + for (const auto& action_and_prob : GetStatePolicy(state)) { + pmap[action_and_prob.first] = action_and_prob.second; + } + return pmap; + } + + virtual std::unordered_map GetStatePolicyAsMap( + const std::string& info_state) const { + std::unordered_map pmap; + for (const auto& action_and_prob : GetStatePolicy(info_state)) { + pmap[action_and_prob.first] = action_and_prob.second; + } + return pmap; + } + + // Returns a list of (action, prob) pairs for the policy for the current + // player at this state. If the policy is not available at the state, returns + // an empty list. + virtual ActionsAndProbs GetStatePolicy(const State& state) const { + return GetStatePolicy(state, state.CurrentPlayer()); + } + + // Returns a list of (action, prob) pairs for the policy for the specified + // player at this state. If the policy is not available at the state, returns + // an empty list. + virtual ActionsAndProbs GetStatePolicy(const State& state, + Player player) const { + return GetStatePolicy(state.InformationStateString(player)); + } + + // Returns a list of (action, prob) pairs for the policy at this info state. + // If the policy is not available at the state, returns and empty list. + // It is sufficient for subclasses to override only this method, but not all + // forms of policies will be able to do so from just the information state. + virtual ActionsAndProbs GetStatePolicy(const std::string& info_state) const { + SpielFatalError("GetStatePolicy(const std::string&) unimplemented."); + } + + // Each override must write out the class’s identity followed by ":" as the + // very first thing so that the DeserializePolicy method can then call the + // Deserialize method for the correct subclass. See TabularPolicy and + // DeserializePolicy below for an example. The double_precision parameter + // indicates the number of decimal places in floating point numbers + // formatting, value -1 formats doubles with lossless, non-portable bitwise + // representation hex strings. + virtual std::string Serialize(int double_precision = -1, + std::string delimiter = "<~>") const { + SpielFatalError("Serialize(std::string delimiter) unimplemented."); + } +}; + +std::unique_ptr DeserializePolicy(const std::string& serialized, + std::string delimiter = "<~>"); + +// A tabular policy represented internally as a map. Note that this +// implementation is not directly compatible with the Python TabularPolicy +// implementation; the latter is implemented as a table of size +// [num_states, num_actions], while this is implemented as a map. It is +// non-trivial to convert between the two, but we have a function that does so +// in the open_spiel/python/policy.py file. +class TabularPolicy : public Policy { + public: + TabularPolicy() = default; + TabularPolicy(const Game& game); // Construct a uniform random policy. + TabularPolicy(const TabularPolicy& other) = default; + TabularPolicy(const std::unordered_map& table) + : policy_table_(table) {} + + // Converts a policy to a TabularPolicy. + TabularPolicy(const Game& game, const Policy& policy) : TabularPolicy(game) { + for (auto& [infostate, is_policy] : policy_table_) { + is_policy = policy.GetStatePolicy(infostate); + } + } + + // Creates a new TabularPolicy from a deterministic policy encoded as a + // {info_state_str -> action} dict. The dummy_policy is used to initialize + // the initial mapping. + TabularPolicy(const TabularPolicy& dummy_policy, + const std::unordered_map& action_map) + : policy_table_(dummy_policy.policy_table_) { + for (const auto& entry : action_map) { + std::string info_state = entry.first; + Action action_taken = action_map.at(entry.first); + for (auto& action_and_prob : policy_table_[info_state]) { + action_and_prob.second = + (action_and_prob.first == action_taken ? 1.0 : 0.0); + } + } + } + + ActionsAndProbs GetStatePolicy(const std::string& info_state) const override { + auto iter = policy_table_.find(info_state); + if (iter == policy_table_.end()) { + return {}; + } else { + return iter->second; + } + } + + std::string Serialize(int double_precision = -1, + std::string delimiter = "<~>") const override { + SPIEL_CHECK_GE(double_precision, -1); + if (delimiter == "," || delimiter == "=") { + // The two delimiters are used for de/serialization of policy_table_ + SpielFatalError( + "Please select a different delimiter," + "invalid values are \",\" and \"=\"."); + } + std::string str = "TabularPolicy:"; + if (policy_table_.empty()) return str; + + for (auto const& [info_state, policy] : policy_table_) { + if (info_state.find(delimiter) != std::string::npos) { + SpielFatalError(absl::StrCat( + "Info state contains delimiter \"", delimiter, + "\", please fix the info state or select a different delimiter.")); + } + + std::string policy_str; + if (double_precision == -1) { + policy_str = + absl::StrJoin(policy, ",", + absl::PairFormatter(absl::AlphaNumFormatter(), "=", + HexDoubleFormatter())); + } else { + policy_str = absl::StrJoin( + policy, ",", + absl::PairFormatter(absl::AlphaNumFormatter(), "=", + SimpleDoubleFormatter(double_precision))); + } + absl::StrAppend(&str, info_state, delimiter, policy_str, delimiter); + } + // Remove the trailing delimiter + str.erase(str.length() - delimiter.length()); + return str; + } + + // Set (overwrite) all the state policies contained in another policy within + // this policy. Does not change other state policies not contained in this + // policy. + void ImportPolicy(const TabularPolicy& other_policy) { + for (const auto& [info_state, actions_and_probs] : + other_policy.policy_table_) { + SetStatePolicy(info_state, actions_and_probs); + } + } + + // Set the probability for action at the info state. If the info state is not + // in the policy, it is added. If the action is not in the info state policy, + // it is added. Otherwise it is modified. + void SetProb(const std::string& info_state, Action action, double prob) { + auto iter = policy_table_.find(info_state); + if (iter == policy_table_.end()) { + auto iter_and_bool = policy_table_.insert({info_state, {}}); + iter = iter_and_bool.first; + } + open_spiel::SetProb(&(iter->second), action, prob); + } + + void SetStatePolicy(const std::string& info_state, + const ActionsAndProbs& state_policy) { + policy_table_[info_state] = state_policy; + } + + std::unordered_map& PolicyTable() { + return policy_table_; + } + + const std::unordered_map& PolicyTable() const { + return policy_table_; + } + + int size() const { return policy_table_.size(); } + + std::string ToString() const; + + // A ToString where the keys are sorted. + std::string ToStringSorted() const; + + protected: + std::unordered_map policy_table_; +}; + +// A partial tabular policy is one that is not entirely complete: only a subset +// of the full table is included. When called on state that is not in the table, +// a specific fallback policy is queried instead. +class PartialTabularPolicy : public TabularPolicy { + public: + // Creates an empty partial tabular policy with a uniform fallback policy. + PartialTabularPolicy(); + + // Creates a partial tabular policy with the specified table with a uniform + // fallback policy. + PartialTabularPolicy( + const std::unordered_map& table); + + // Creates a partial tabular policy with the specified table with the + // specified fallback policy. + PartialTabularPolicy( + const std::unordered_map& table, + std::shared_ptr fallback_policy); + + // These retrieval methods are all modified in the same way: they first check + // if the key is in the table. If so, they return the state policy from the + // table. Otherwise, they forward the call to the fallback policy. + ActionsAndProbs GetStatePolicy(const State& state) const override; + ActionsAndProbs GetStatePolicy(const State& state, + Player player) const override; + ActionsAndProbs GetStatePolicy(const std::string& info_state) const override; + + private: + std::shared_ptr fallback_policy_; +}; + +std::unique_ptr DeserializeTabularPolicy( + const std::string& serialized, std::string delimiter = "<~>"); + +// Chooses all legal actions with equal probability. This is equivalent to the +// tabular version, except that this works for large games. +class UniformPolicy : public Policy { + public: + ActionsAndProbs GetStatePolicy(const State& state, + Player player) const override { + if (state.IsSimultaneousNode()) { + return UniformStatePolicy(state, player); + } else { + SPIEL_CHECK_TRUE(state.IsPlayerActing(player)); + return UniformStatePolicy(state); + } + } + + std::string Serialize(int double_precision = -1, + std::string delimiter = "") const override { + return "UniformPolicy:"; + } +}; + +// Among all legal actions, choose the first action deterministically. +class FirstActionPolicy : public Policy { + public: + ActionsAndProbs GetStatePolicy(const State& state, + Player player) const override { + if (state.IsSimultaneousNode()) { + return FirstActionStatePolicy(state, player); + } else { + SPIEL_CHECK_TRUE(state.IsPlayerActing(player)); + return FirstActionStatePolicy(state); + } + } + + std::string Serialize(int double_precision = -1, + std::string delimiter = "") const override { + return "FirstActionPolicy:"; + } +}; + +// A deterministic policy with which takes legal actions in order of +// preference specified by pref_actions. The function will check-fail if none +// of the pref_action elements are legal for a state. +// +// For example, PreferredActionPolicy(leduc, {kRaise, kCall}) constructs a +// policy that always raises and only falls back to call if raise is not a legal +// action. If it is possible for nethier raise nor call to be valid actions in a +// state in leduc, the function will fail. +class PreferredActionPolicy : public Policy { + public: + PreferredActionPolicy(const std::vector& preference_order) + : preference_order_(preference_order) {} + + ActionsAndProbs GetStatePolicy(const State& state, + Player player) const override; + + std::string Serialize(int double_precision = -1, + std::string delimiter = "") const override { + SpielFatalError("Unimplemented."); + } + + private: + std::vector preference_order_; +}; + +// Takes any policy and returns a tabular policy by traversing the game and +// building a tabular policy for it. +TabularPolicy ToTabularPolicy(const Game& game, const Policy* policy); + +// Helper functions that generate policies for testing. +// The player parameter can be used to only generate policies for a single +// player. By default -1 will generate policies for all players. +TabularPolicy GetEmptyTabularPolicy(const Game& game, + bool initialize_to_uniform = false, + Player player = -1); +TabularPolicy GetUniformPolicy(const Game& game); +TabularPolicy GetRandomPolicy( + const Game& game, int seed = 0, Player player = -1); +TabularPolicy GetFlatDirichletPolicy( + const Game& game, int seed = 0, Player player = -1); +TabularPolicy GetRandomDeterministicPolicy( + const Game& game, int seed = 0, Player player = -1); +TabularPolicy GetFirstActionPolicy(const Game& game); + +// Returns a policy with only valid actions on states that are reachable. +// Actions with zero probability or states that are unreachable are not present. +TabularPolicy GetRandomDeterministicVisitPolicy( + const Game& game, int seed = 0, Player player = -1); + +// Returns a preferred action policy as a tabular policy. +TabularPolicy GetPrefActionPolicy(const Game& game, + const std::vector& pref_action); + +std::string PrintPolicy(const ActionsAndProbs& policy); + +// Takes many tabular policy and merges them into one. If check_no_overlap is +// set, then a check is done to ensure that there is no intersection among the +// policies (slow: involves iterating over each). +TabularPolicy ToJointTabularPolicy(const std::vector& policies, + bool check_no_overlap); + +} // namespace open_spiel + +#endif // OPEN_SPIEL_POLICY_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/python/CMakeLists.txt new file mode 100644 index 0000000..46c8abf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/CMakeLists.txt @@ -0,0 +1,351 @@ +if (OPEN_SPIEL_BUILDING_WHEEL) + # When building a wheel, need to use this. See: + # https://github.com/joerick/cibuildwheel/issues/639#issuecomment-817872369 + message(NOTICE "Building Wheel Detected. Finding Python Interpeter Development.Module") + find_package(Python3 COMPONENTS Interpreter Development.Module REQUIRED) + # find_package(Python3 COMPONENTS Interpreter Development) + unset(OPEN_SPIEL_PYTHONPATH) + if(DEFINED ENV{PYTHONPATH}) + set (OPEN_SPIEL_PYTHONPATH $ENV{PYTHONPATH}) + endif() +else() + message(NOTICE "Not building wheel. Finding Python normally...") + find_package(Python3 COMPONENTS Interpreter Development) + set(OPEN_SPIEL_PYTHONPATH ${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_CURRENT_SOURCE_DIR}/../..;) +endif() + +message(NOTICE "Python executable: ${Python3_EXECUTABLE}") +message(NOTICE "Python include dirs: ${Python3_INCLUDE_DIRS}") +message(NOTICE "Python library dirs: ${Python3_LIBRARY_DIRS}") +include_directories(SYSTEM ${Python3_INCLUDE_DIRS}) + +if(Python3_VERSION VERSION_LESS "3.8.0") + message(FATAL_ERROR + "Python found ${Python3_VERSION} < 3.8.0") +endif() + +# Detect the Python ML frameworks. +if (OPEN_SPIEL_ENABLE_JAX STREQUAL AUTO) + message(NOTICE "OPEN_SPIEL_ENABLE_JAX set to AUTO. Detecting Jax...") + execute_process(COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/../scripts/find_jax.sh ${Python3_EXECUTABLE} + RESULT_VARIABLE JAX_RET_VAL + OUTPUT_VARIABLE JAX_OUT + ERROR_VARIABLE JAX_ERR) + if (JAX_RET_VAL EQUAL 0) + message(NOTICE "Found, version: ${JAX_OUT}") + set(OPEN_SPIEL_ENABLE_JAX ON) + else() + message(NOTICE "Not found. Enable printing errors in python/CMakeLists.txt to see output.") + set(OPEN_SPIEL_ENABLE_JAX OFF) + # message(NOTICE "stdout: ${JAX_OUT}, stderr: ${JAX_ERR}") + endif() +endif() + +if (OPEN_SPIEL_ENABLE_PYTORCH STREQUAL AUTO) + message(NOTICE "OPEN_SPIEL_ENABLE_PYTORCH set to AUTO. Detecting PyTorch...") + execute_process(COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/../scripts/find_pytorch.sh ${Python3_EXECUTABLE} + RESULT_VARIABLE PYTORCH_RET_VAL + OUTPUT_VARIABLE PYTORCH_OUT + ERROR_VARIABLE PYTORCH_ERR) + if (PYTORCH_RET_VAL EQUAL 0) + message(NOTICE "Found, version: ${PYTORCH_OUT}") + set(OPEN_SPIEL_ENABLE_PYTORCH ON) + else() + message(NOTICE "Not found. Enable printing errors in python/CMakeLists.txt to see output.") + set(OPEN_SPIEL_ENABLE_PYTORCH OFF) + # message(NOTICE "stdout: ${PYTORCH_OUT}, stderr: ${PYTORCH_ERR}") + endif() +endif() + +# List of all Python bindings to add to pyspiel. +include_directories (../pybind11_abseil ../../pybind11/include) +set(PYTHON_BINDINGS ${PYTHON_BINDINGS} + pybind11/algorithms_corr_dist.cc + pybind11/algorithms_corr_dist.h + pybind11/algorithms_trajectories.cc + pybind11/algorithms_trajectories.h + pybind11/bots.cc + pybind11/bots.h + pybind11/evaluation_sco.cc + pybind11/evaluation_sco.h + pybind11/games_backgammon.cc + pybind11/games_backgammon.h + pybind11/games_bargaining.cc + pybind11/games_bargaining.h + pybind11/games_blackjack.cc + pybind11/games_blackjack.h + pybind11/games_bridge.cc + pybind11/games_bridge.h + pybind11/games_chess.cc + pybind11/games_chess.h + pybind11/games_colored_trails.cc + pybind11/games_colored_trails.h + pybind11/games_dots_and_boxes.cc + pybind11/games_dots_and_boxes.h + pybind11/games_euchre.cc + pybind11/games_euchre.h + pybind11/games_gin_rummy.cc + pybind11/games_gin_rummy.h + pybind11/games_kuhn_poker.cc + pybind11/games_kuhn_poker.h + pybind11/games_leduc_poker.cc + pybind11/games_leduc_poker.h + pybind11/games_negotiation.cc + pybind11/games_negotiation.h + pybind11/games_spades.cc + pybind11/games_spades.h + pybind11/games_tarok.cc + pybind11/games_tarok.h + pybind11/games_tic_tac_toe.cc + pybind11/games_tic_tac_toe.h + pybind11/games_tiny_bridge.cc + pybind11/games_tiny_bridge.h + pybind11/games_trade_comm.cc + pybind11/games_trade_comm.h + pybind11/game_transforms.cc + pybind11/game_transforms.h + pybind11/observer.cc + pybind11/observer.h + pybind11/policy.cc + pybind11/policy.h + pybind11/pybind11.h + pybind11/pyspiel.cc + pybind11/python_games.cc + pybind11/python_games.h + pybind11/python_policy.cc + pybind11/python_policy.h + pybind11/utils.cc + pybind11/utils.h + ) + +if (OPEN_SPIEL_BUILD_WITH_ACPC) + set(PYTHON_BINDINGS ${PYTHON_BINDINGS} + pybind11/games_universal_poker.cc + pybind11/games_universal_poker.h + ) +endif() + +# Optional pyspiel sub-modules, which can specify their python bindings. +if (OPEN_SPIEL_BUILD_WITH_GAMUT) + set (PYTHON_BINDINGS ${PYTHON_BINDINGS} + ../games/gamut/gamut_pybind11.h + ../games/gamut/gamut_pybind11.cc + ) +endif() +if (OPEN_SPIEL_BUILD_WITH_XINXIN) + set (PYTHON_BINDINGS ${PYTHON_BINDINGS} + ../bots/xinxin/xinxin_pybind11.h + ../bots/xinxin/xinxin_pybind11.cc + ) +endif() + +add_library(pyspiel MODULE ${PYTHON_BINDINGS} ${OPEN_SPIEL_OBJECTS}) +target_link_directories(pyspiel PUBLIC ${Python3_LIBRARY_DIRS}) + +# Without this, the binary is called `libpyspiel.so` +set_target_properties(pyspiel PROPERTIES PREFIX "") +if (WIN32) + # Python does not seem able to import the module without this change. + set_target_properties(pyspiel PROPERTIES SUFFIX ".pyd") +endif() + +if (OPEN_SPIEL_BUILD_WITH_XINXIN) + set(PYTHON_TESTS ${PYTHON_TESTS} ../bots/xinxin/xinxin_bot_test.py) +endif() +if (OPEN_SPIEL_BUILD_WITH_GAMUT) + set(PYTHON_TESTS ${PYTHON_TESTS} ../games/gamut/gamut_test.py) +endif() + +# Note: cvxopt does not yet support binary wheels for Python 3.11. +# It has been temporary removed from the python_extra_deps. +# As a result, several tests are disabled until a cvxopt wheel becomes +# available for Python 3.11. +# See https://github.com/cvxopt/cvxopt/issues/228 for discussion. + +# Python tests to run. Start with all the core tests here first, then +# conditionally add other tests based on what has been enabled/detected. +set(PYTHON_TESTS ${PYTHON_TESTS} + ../integration_tests/api_test.py + ../integration_tests/playthrough_test.py + algorithms/action_value_test.py + algorithms/action_value_vs_best_response_test.py + algorithms/async_mcts_test.py + algorithms/best_response_test.py + algorithms/boltzmann_tabular_qlearner_test.py + algorithms/cfr_br_test.py + algorithms/cfr_test.py + algorithms/discounted_cfr_test.py + algorithms/efr_test.py + algorithms/evaluate_bots_test.py + algorithms/expected_game_score_test.py + algorithms/external_sampling_mccfr_test.py + algorithms/fictitious_play_test.py + algorithms/gambit_test.py + algorithms/generate_playthrough_test.py + algorithms/get_all_states_test.py + algorithms/ismcts_agent_test.py + algorithms/mcts_agent_test.py + algorithms/mcts_test.py + algorithms/minimax_test.py + algorithms/nfg_utils_test.py + algorithms/noisy_policy_test.py + algorithms/outcome_sampling_mccfr_test.py + algorithms/policy_aggregator_joint_test.py + algorithms/policy_aggregator_test.py + algorithms/psro_v2/strategy_selectors_test.py + algorithms/projected_replicator_dynamics_test.py + algorithms/random_agent_test.py + algorithms/regret_matching_test.py + algorithms/tabular_qlearner_test.py + algorithms/sequence_form_utils_test.py + algorithms/wolf_phc_test.py + algorithms/mmd_dilated_test.py + coalitional_games/shapley_values_test.py + bots/bluechip_bridge_test.py + bots/bluechip_bridge_uncontested_bidding_test.py + bots/is_mcts_test.py + bots/uniform_random_test.py + egt/dynamics_test.py + egt/heuristic_payoff_table_test.py + egt/utils_test.py + environments/catch_test.py + environments/cliff_walking_test.py + games/block_dominoes_test.py + games/chat_game_test.py + games/chat_games/envs/base_envs/base_envs_test.py + games/data_test.py + games/dynamic_routing_test.py + games/dynamic_routing_utils_test.py + games/liars_poker_test.py + games/team_dominoes_test.py + games/tic_tac_toe_test.py + mfg/algorithms/best_response_value_test.py + mfg/algorithms/mirror_descent_test.py + mfg/algorithms/greedy_policy_test.py + mfg/algorithms/nash_conv_test.py + mfg/algorithms/policy_value_test.py + mfg/games/crowd_modelling_test.py + mfg/games/predator_prey_test.py + mfg/games/dynamic_routing_test.py + mfg/games/normal_form_game_test.py + tests/mfg_implementation_test/mfg_test.py + tests/bot_test.py + tests/game_transforms_test.py + tests/games_blackjack_test.py + tests/games_bridge_test.py + tests/games_bargaining_test.py + tests/games_chess_test.py + tests/games_euchre_test.py + tests/games_gin_rummy_test.py + tests/games_sim_test.py + tests/games_tic_tac_toe_test.py + tests/policy_test.py + tests/pyspiel_test.py + tests/rl_environment_test.py + tests/sampled_stochastic_games_test.py + tests/tensor_game_utils_test.py + utils/file_logger_test.py + utils/lru_cache_test.py + utils/spawn_test.py + voting/approval_test.py + voting/base_test.py + voting/borda_test.py + voting/copeland_test.py + voting/kemeny_young_test.py + voting/plurality_test.py + voting/ranked_pairs_test.py + voting/schulze_test.py + voting/soft_condorcet_optimization_test.py + voting/stv_test.py +) + +# Add Jax tests if it is enabled. +if (OPEN_SPIEL_ENABLE_JAX) + set (PYTHON_TESTS ${PYTHON_TESTS} + jax/dqn_jax_test.py + jax/nfsp_jax_test.py + jax/opponent_shaping_jax_test.py + jax/policy_gradient_jax_test.py + coalitional_games/least_core_lagrangian_test.py + mfg/algorithms/fictitious_play_test.py + ) +endif() + +# Add PyTorch tests if is enabled. +if (OPEN_SPIEL_ENABLE_PYTORCH) + set(PYTHON_TESTS ${PYTHON_TESTS} + pytorch/rcfr_pytorch_test.py + pytorch/dqn_pytorch_test.py + pytorch/deep_cfr_pytorch_test.py + pytorch/eva_pytorch_test.py + pytorch/losses/rl_losses_pytorch_test.py + pytorch/policy_gradient_pytorch_test.py + pytorch/ppo_pytorch_test.py + pytorch/neurd_pytorch_test.py + ) +endif() + +# Add miscellaneous Python tests if enabled. +# These require extra dependencies like cvxopt, nashpy, or matplotlib +if (OPEN_SPIEL_ENABLE_PYTHON_MISC) + set(PYTHON_TESTS ${PYTHON_TESTS} + algorithms/adidas_test.py + algorithms/double_oracle_test.py + algorithms/jpsro_test.py + algorithms/lp_solver_test.py + algorithms/nash_averaging_test.py + algorithms/mip_nash_test.py + algorithms/psro_v2/best_response_oracle_test.py + algorithms/response_graph_ucb_test.py + algorithms/sequence_form_lp_test.py + algorithms/stackelberg_lp_test.py + algorithms/tabular_multiagent_qlearner.py + algorithms/value_iteration_test.py + coalitional_games/least_core_lp_test.py + coalitional_games/wvg_test.py + egt/alpharank_test.py + egt/alpharank_visualizer_test.py + egt/visualization_test.py + games/kuhn_poker_test.py + voting/maximal_lotteries_test.py + tests/matrix_game_utils_test.py + ) +endif() + +# Tests that are excluded when running via the installed wheel. +# Some bundle data not shipped with the wheel (e.g. playthroughs) +set(WHEEL_EXCLUDED_PYTHON_TESTS + algorithms/response_graph_ucb_test.py + games/tic_tac_toe_test.py + ../integration_tests/playthrough_test.py) + +# Create a python test. +foreach(py_test_file IN LISTS PYTHON_TESTS) + if (NOT (OPEN_SPIEL_BUILDING_WHEEL AND ${py_test_file} IN_LIST WHEEL_EXCLUDED_PYTHON_TESTS)) + add_test(NAME python/${py_test_file} COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/${py_test_file}) + + # We need two elements in the python path: CURRENT_BINARY_DIR to pick up + # pyspiel.so, and CURRENT_SOURCE_DIR for the Python source files. We use + # CURRENT_SOURCE_DIR/../.. so that the Python module imports are of the form: + # from open_spiel.python import rl_environment. + set_property(TEST python/${py_test_file} + PROPERTY ENVIRONMENT + PYTHONPATH=${OPEN_SPIEL_PYTHONPATH} + TEST_SRCDIR=${CMAKE_CURRENT_SOURCE_DIR}/../..) + endif() +endforeach(py_test_file) + +# Additional tests (running examples as tests) +# We don't generate these automatically because we may want custom parameters. +if (OPEN_SPIEL_ENABLE_JAX AND NOT OPEN_SPIEL_BUILDING_WHEEL) + add_test(NAME python_examples_bridge_supervised_learning + COMMAND ${Python3_EXECUTABLE} + ${CMAKE_CURRENT_SOURCE_DIR}/examples/bridge_supervised_learning.py + --iterations 10 + --eval_every 5 + --data_path ${CMAKE_CURRENT_SOURCE_DIR}/examples/data/bridge) + set_property(TEST python_examples_bridge_supervised_learning + PROPERTY ENVIRONMENT + PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_CURRENT_SOURCE_DIR}/../..; + TEST_SRCDIR=${CMAKE_CURRENT_SOURCE_DIR}/../..) +endif() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/README.md b/scenarios/bargaining/open_spiel/open_spiel/python/README.md new file mode 100644 index 0000000..c0b289a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/README.md @@ -0,0 +1,8 @@ +# Open Spiel Python API + +This is a Python API for OpenSpiel. + +See `python/examples/example.py` for an example use and overview of the API, and +the main OpenSpiel installation instructions to see how to run this example. + +For the full API specification, please see `python/pybind11/pyspiel.cc`. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/__init__.py new file mode 100644 index 0000000..7a8a868 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/__init__.py @@ -0,0 +1,22 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Open Spiel Python API.""" + +from typing import Dict, Union + +# A type provided for PyType hints. Added after the discussion in +# https://github.com/google-deepmind/open_spiel/issues/1224. +GameParameter = Union[int, float, str, bool, Dict[str, 'GameParameter']] + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/action_value.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/action_value.py new file mode 100644 index 0000000..7f5f189 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/action_value.py @@ -0,0 +1,312 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Q-values and reach probabilities computation.""" + +import collections +import numpy as np + +_CalculatorReturn = collections.namedtuple( + "_CalculatorReturn", + [ + # A list of size `num_players` of the root node value for each player. + "root_node_values", + # An array of shape `[len(info_states), game.num_distinct_actions()]` + # giving the value of each action. Will be zero for invalid actions. + "action_values", + # The player's counterfactual reach probability of this infostate when + # playing, as a list of shape [num_info_states]. + "counterfactual_reach_probs", + # The reach probability of the current player at the infostates, as a + # list of shape [num_info_states]. + # This is the product of the current player probs along *one* trajectory + # leading to this info-state (this number should be the same along + # any trajectory leading to this info-state because of perfect recall). + "player_reach_probs", + # A list of `len(info_states)` `[game.num_distinct_actions()]` numpy + # array so that v[s_index][a] = \sum_{h \in x} cfr_reach(h) * Q(h, a) + "sum_cfr_reach_by_action_value", + ]) + + +class TreeWalkCalculator(object): + r"""Class to orchestrate the calculation. + + This performs a full history tree walk and computes several statistics, + available as attributes. + + Attributes: + weighted_action_values: A dictionary mapping (player,information state + string) to a dictionary mapping each action to a vector of the sum of + (reward * prob) reward taking that action for each player. To get the + action-values, one will need to normalize by `info_state_prob`. + info_state_prob: A dictionary mapping (player,information state string) to + the reach probability of this info_state. + info_state_player_prob: Same as info_state_prob for the player reach + probability. + info_state_cf_prob: Same as info_state_prob for the counterfactual reach + probability to get to that state, i.e. the sum over histories, of the + product of the opponents probabilities of actions leading to the history. + info_state_chance_prob: Same as above, for the chance probability to get + into that state. + info_state_cf_prob_by_q_sum: A dictionary mapping (player,information state + string) to a vector of shape `[num_actions]`, that store for each action + the cumulative \sum_{h \in x} cfr_reach(h) * Q(h, a) + root_values: The values at the root node [for player 0, for player 1]. + """ + + def __init__(self, game): + if not game.get_type().provides_information_state_string: + raise ValueError("Only game which provide the information_state_string " + "are supported, as this is being used in the key to " + "identify states.") + + self._game = game + self._num_players = game.num_players() + self._num_actions = game.num_distinct_actions() + + self.weighted_action_values = None + self.info_state_prob = None + self.info_state_player_prob = None + self.info_state_cf_prob = None + self.info_state_chance_prob = None + self.info_state_cf_prob_by_q_sum = None + self.root_values = None + + def _get_action_values(self, state, policies, reach_probabilities): + """Computes the value of the state given the policies for both players. + + Args: + state: The state to start analysis from. + policies: List of `policy.Policy` objects, one per player. + reach_probabilities: A numpy array of shape `[num_players + 1]`. + reach_probabilities[i] is the product of the player i action + probabilities along the current trajectory. Note that + reach_probabilities[-1] corresponds to the chance player. Initially, it + should be called with np.ones(self._num_players + 1) at the root node. + + Returns: + The value of the root state to each player. + + Side-effects - populates: + `self.weighted_action_values[(player, infostate)][action]`. + `self.info_state_prob[(player, infostate)]`. + `self.info_state_cf_prob[(player, infostate)]`. + `self.info_state_chance_prob[(player, infostate)]`. + + We use `(player, infostate)` as a key in case the same infostate is shared + by multiple players, e.g. in a simultaneous-move game. + """ + if state.is_terminal(): + return np.array(state.returns()) + + current_player = state.current_player() + is_chance = state.is_chance_node() + + if not is_chance: + key = (current_player, state.information_state_string()) + reach_prob = np.prod(reach_probabilities) + + # We exclude both the current and the chance players. + opponent_probability = ( + np.prod(reach_probabilities[:current_player]) * + np.prod(reach_probabilities[current_player + 1:-1])) + self.info_state_cf_prob[key] += ( + reach_probabilities[-1] * opponent_probability) + self.info_state_prob[key] += reach_prob + self.info_state_chance_prob[key] += reach_probabilities[-1] + # Mind that we have "=" here and not "+=", because we just need to use + # the reach prob for the player for *any* of the histories leading to + # the current info_state (they are all equal because of perfect recall). + self.info_state_player_prob[key] = reach_probabilities[current_player] + + value = np.zeros(len(policies)) + if is_chance: + action_to_prob = dict(state.chance_outcomes()) + else: + action_to_prob = policies[current_player].action_probabilities(state) + for action in state.legal_actions(): + prob = action_to_prob.get(action, 0) + new_reach_probabilities = reach_probabilities.copy() + new_reach_probabilities[current_player] *= prob + + child = state.child(action) + child_value = self._get_action_values( + child, policies, reach_probabilities=new_reach_probabilities) + if not is_chance: + self.weighted_action_values[key][action] += child_value * reach_prob + self.info_state_cf_prob_by_q_sum[key][action] += ( + child_value[current_player] * opponent_probability * + reach_probabilities[-1]) + value += child_value * prob + return value + + def compute_all_states_action_values(self, policies): + """Computes action values per state for the player. + + The internal state is fully re-created when calling this method, thus it's + safe to use one object to perform several tree-walks using different + policies, and to extract the results using for example + `calculator.infor_state_prob` to take ownership of the dictionary. + + Args: + policies: List of `policy.Policy` objects, one per player. As the policy + will be accessed using `policies[i]`, it can also be a dictionary + mapping player_id to a `policy.Policy` object. + """ + assert len(policies) == self._num_players + + # Compute action values + self.weighted_action_values = collections.defaultdict( + lambda: collections.defaultdict(lambda: np.zeros(self._num_players))) + self.info_state_prob = collections.defaultdict(float) + self.info_state_player_prob = collections.defaultdict(float) + self.info_state_cf_prob = collections.defaultdict(float) + self.info_state_chance_prob = collections.defaultdict(float) + self.info_state_cf_prob_by_q_sum = collections.defaultdict( + lambda: np.zeros(self._num_actions)) + + self.root_values = self._get_action_values( + self._game.new_initial_state(), + policies, + reach_probabilities=np.ones(self._num_players + 1)) + + def _get_tabular_statistics(self, keys): + """Returns tabular numpy arrays of the resulting stastistics. + + Args: + keys: A list of the (player, info_state_str) keys to use to return the + tabular numpy array of results. + """ + # Collect normalized action values for each information state + action_values = [] + cfrp = [] # Counterfactual reach probabilities + player_reach_probs = [] + sum_cfr_reach_by_action_value = [] + + for key in keys: + player = key[0] + av = self.weighted_action_values[key] + norm_prob = self.info_state_prob[key] + action_values.append([(av[a][player] / norm_prob) if + (a in av and norm_prob > 0) else 0 + for a in range(self._num_actions)]) + cfrp.append(self.info_state_cf_prob[key]) + player_reach_probs.append(self.info_state_player_prob[key]) + sum_cfr_reach_by_action_value.append( + self.info_state_cf_prob_by_q_sum[key]) + + # Return values + return _CalculatorReturn( + root_node_values=self.root_values, + action_values=action_values, + counterfactual_reach_probs=cfrp, + player_reach_probs=player_reach_probs, + sum_cfr_reach_by_action_value=sum_cfr_reach_by_action_value) + + def get_tabular_statistics(self, tabular_policy): + """Returns tabular numpy arrays of the resulting stastistics. + + This function should be called after `compute_all_states_action_values`. + Optionally, one can directly call the object to perform both actions. + + Args: + tabular_policy: A `policy.TabularPolicy` object, used to get the ordering + of the states in the tabular numpy array. + """ + keys = [] + for player_id, player_states in enumerate(tabular_policy.states_per_player): + keys += [(player_id, s) for s in player_states] + return self._get_tabular_statistics(keys) + + def __call__(self, policies, tabular_policy): + """Computes action values per state for the player. + + The internal state is fully re-created when calling this method, thus it's + safe to use one object to perform several tree-walks using different + policies, and to extract the results using for example + `calculator.infor_state_prob` to take ownership of the dictionary. + + Args: + policies: List of `policy.Policy` objects, one per player. + tabular_policy: A `policy.TabularPolicy` object, used to get the ordering + of the states in the tabular numpy array. + + Returns: + A `_CalculatorReturn` namedtuple. See its docstring for the details. + """ + self.compute_all_states_action_values(policies) + return self.get_tabular_statistics(tabular_policy) + + def get_root_node_values(self, policies): + """Gets root values only. + + This speeds up calculation in two ways: + + 1. It only searches nodes with positive probability. + 2. It does not populate a large dictionary of meta information. + + Args: + policies: List of `policy.Policy` objects, one per player. + + Returns: + A numpy array of shape [num_players] of the root value. + """ + return self._get_action_values_only( + self._game.new_initial_state(), + policies, + reach_probabilities=np.ones(self._num_players + 1)) + + def _get_action_values_only(self, state, policies, reach_probabilities): + """Computes the value of the state given the policies for both players. + + Args: + state: The state to start analysis from. + policies: List of `policy.Policy` objects, one per player. + reach_probabilities: A numpy array of shape `[num_players + 1]`. + reach_probabilities[i] is the product of the player i action + probabilities along the current trajectory. Note that + reach_probabilities[-1] corresponds to the chance player. Initially, it + should be called with np.ones(self._num_players + 1) at the root node. + + Returns: + A numpy array of shape [num_players] of the root value. + """ + if state.is_terminal(): + return np.array(state.returns()) + + current_player = state.current_player() + is_chance = state.is_chance_node() + + value = np.zeros(len(policies)) + if is_chance: + action_to_prob = dict(state.chance_outcomes()) + else: + action_to_prob = policies[current_player].action_probabilities(state) + + for action in state.legal_actions(): + prob = action_to_prob.get(action, 0) + + # Do not follow tree down if there is zero probability. + if prob == 0.0: + continue + + new_reach_probabilities = reach_probabilities.copy() + new_reach_probabilities[current_player] *= prob + + child = state.child(action) + child_value = self._get_action_values_only( + child, policies, reach_probabilities=new_reach_probabilities) + value += child_value * prob + return value diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/action_value_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/action_value_test.py new file mode 100644 index 0000000..0fe9ee9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/action_value_test.py @@ -0,0 +1,115 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.action_value.py.""" + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np + +from open_spiel.python import policy +from open_spiel.python.algorithms import action_value +import pyspiel + + +class ActionValuesTest(parameterized.TestCase): + + @parameterized.parameters([["kuhn_poker", 2], ["kuhn_poker", 3], + ["leduc_poker", 2]]) + def test_runs_with_uniform_policies(self, game_name, num_players): + game = pyspiel.load_game(game_name, {"players": num_players}) + calc = action_value.TreeWalkCalculator(game) + uniform_policy = policy.TabularPolicy(game) + calc.compute_all_states_action_values([uniform_policy] * num_players) + + def test_kuhn_poker_always_pass_p0(self): + game = pyspiel.load_game("kuhn_poker") + calc = action_value.TreeWalkCalculator(game) + uniform_policy = policy.TabularPolicy(game) + always_pass_policy = policy.FirstActionPolicy(game).to_tabular() + returned_values = calc([always_pass_policy, uniform_policy], + always_pass_policy) + root_node_values = calc.get_root_node_values( + [always_pass_policy, uniform_policy]) + self.assertTrue( + np.allclose(root_node_values, returned_values.root_node_values)) + + # Action 0 == Pass. Action 1 == Bet + # Some values are 0 because the states are not reached, thus the expected + # value of that node is undefined. + np.testing.assert_array_almost_equal( + np.asarray([ + # Player 0 states + [-1.0, -0.5], # '0' + [-1.0, -2.0], # '0pb' + [-0.5, 0.5], # '1' + [-1.0, 0.0], # '1pb' + [0.0, 1.5], # '2' + [-1.0, 2.0], # '2pb' + # Player 1 states + [0.0, 1.0], # '1p' + [0, 0], # Unreachable + [1.0, 1.0], # '2p' + [0, 0], # Unreachable + [-1.0, 1.0], # '0p' + [0, 0], # Unreachable + ]), returned_values.action_values) + + np.testing.assert_array_almost_equal( + np.asarray([ + # Player 0 states + 1 / 3, # '0' + 1 / 6, # '0pb' + 1 / 3, # '1' + 1 / 6, # '1pb' + 1 / 3, # '2' + 1 / 6, # '2pb' + # Player 1 states + 1 / 3, # '1p' + 0.0, # '1b': zero because player 0 always play pass + 1 / 3, # 2p' + 0.0, # '2b': zero because player 0 always play pass + 1 / 3, # '0p' + 0.0, # '0b': zero because player 0 always play pass + ]), + returned_values.counterfactual_reach_probs) + + # The reach probabilities are always one, even though we have player 0 + # who only plays pass, because the unreachable nodes for player 0 are + # terminal nodes: e.g. 'x x b b p' has a player 0 reach of 0, but it is + # a terminal node, thus it does not appear in the tabular policy + # states. + np.testing.assert_array_equal( + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + returned_values.player_reach_probs) + + np.testing.assert_array_almost_equal( + np.asarray([ + np.array([-1/3, -1/6]), + np.array([-1/6, -1/3]), + np.array([-1/6, 1/6]), + np.array([-1/6, 0.]), + np.array([0., 0.5]), + np.array([-1/6, 1/3]), + np.array([0., 1/3]), + np.array([0., 0.]), + np.array([1/3, 1/3]), + np.array([0., 0.]), + np.array([-1/3, 1/3]), + np.array([0., 0.]) + ]), returned_values.sum_cfr_reach_by_action_value) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/action_value_vs_best_response.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/action_value_vs_best_response.py new file mode 100644 index 0000000..ce7b84e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/action_value_vs_best_response.py @@ -0,0 +1,156 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Compute the value of action given a policy vs a best responder.""" + +import collections + +from open_spiel.python import policy +from open_spiel.python.algorithms import action_value +from open_spiel.python.algorithms import get_all_states +from open_spiel.python.algorithms import policy_utils +import pyspiel + + +def _transitions(state, policies): + """Returns a list of (action, prob) pairs from the specified state.""" + if state.is_chance_node(): + return state.chance_outcomes() + else: + pl = state.current_player() + return list(policies[pl].action_probabilities(state).items()) + + +def _tuples_from_policy(policy_vector): + return [ + (action, probability) for action, probability in enumerate(policy_vector) + ] + + +_CalculatorReturn = collections.namedtuple( + "_CalculatorReturn", + [ + # The exploitability of the opponent strategy, i.e. the value of the + # best-responder player BR. + "exploitability", + # An array of shape `[len(info_states), game.num_distinct_actions()]` + # giving the value of each action vs the best response. + # Will be zero for invalid actions. + "values_vs_br", + # The player's counterfactual reach probability of this infostate when + # playing against the BR, as a list of shape [num_info_states]. + "counterfactual_reach_probs_vs_br", + # The reach probability of the current player at the infostates when + # playing against the BR, as list shape [num_info_states]. + # This is the product of the current player probs along *one* trajectory + # leading to this info-state (this number should be the same along + # any trajectory leading to this info-state because of perfect recall). + "player_reach_probs_vs_br", + ]) + + +class Calculator(object): + """Class to orchestrate the calculation.""" + + def __init__(self, game): + if game.num_players() != 2: + raise ValueError("Only supports 2-player games.") + self.game = game + self._num_players = game.num_players() + self._num_actions = game.num_distinct_actions() + + self._action_value_calculator = action_value.TreeWalkCalculator(game) + # best_responder[i] is a best response to the provided policy for player i. + # It is therefore a policy for player (1-i). + self._best_responder = {0: None, 1: None} + self._all_states = None + + def __call__(self, player, player_policy, info_states): + """Computes action values per state for the player. + + Args: + player: The id of the player (0 <= player < game.num_players()). This + player will play `player_policy`, while the opponent will play a best + response. + player_policy: A `policy.Policy` object. + info_states: A list of info state strings. + + Returns: + A `_CalculatorReturn` nametuple. See its docstring for the documentation. + """ + self.player = player + opponent = 1 - player + + def best_response_policy(state): + infostate = state.information_state_string(opponent) + action = best_response_actions[infostate] + return [(action, 1.0)] + + # If the policy is a TabularPolicy, we can directly copy the infostate + # strings & values from the class. This is significantly faster than having + # to create the infostate strings. + if isinstance(player_policy, policy.TabularPolicy): + tabular_policy = { + key: _tuples_from_policy(player_policy.policy_for_key(key)) + for key in player_policy.state_lookup + } + # Otherwise, we have to calculate all the infostate strings everytime. This + # is ~2x slower. + else: + # We cache these as they are expensive to compute & do not change. + if self._all_states is None: + self._all_states = get_all_states.get_all_states( + self.game, + depth_limit=-1, + include_terminals=False, + include_chance_states=False) + self._state_to_information_state = { + state: self._all_states[state].information_state_string() + for state in self._all_states + } + tabular_policy = policy_utils.policy_to_dict( + player_policy, self.game, self._all_states, + self._state_to_information_state) + + # When constructed, TabularBestResponse does a lot of work; we can save that + # work by caching it. + if self._best_responder[player] is None: + self._best_responder[player] = pyspiel.TabularBestResponse( + self.game, opponent, tabular_policy) + else: + self._best_responder[player].set_policy(tabular_policy) + + # Computing the value at the root calculates best responses everywhere. + best_response_value = self._best_responder[player].value_from_state( + self.game.new_initial_state()) + best_response_actions = self._best_responder[ + player].get_best_response_actions() + + # Compute action values + self._action_value_calculator.compute_all_states_action_values({ + player: + player_policy, + opponent: + policy.tabular_policy_from_callable( + self.game, best_response_policy, [opponent]), + }) + obj = self._action_value_calculator._get_tabular_statistics( # pylint: disable=protected-access + ((player, s) for s in info_states)) + + # Return values + return _CalculatorReturn( + exploitability=best_response_value, + values_vs_br=obj.action_values, + counterfactual_reach_probs_vs_br=obj.counterfactual_reach_probs, + player_reach_probs_vs_br=obj.player_reach_probs) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/action_value_vs_best_response_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/action_value_vs_best_response_test.py new file mode 100644 index 0000000..4fbcc80 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/action_value_vs_best_response_test.py @@ -0,0 +1,79 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.action_value_vs_best_response.py.""" + +from absl.testing import absltest + +import numpy as np + +from open_spiel.python import policy +from open_spiel.python.algorithms import action_value_vs_best_response +import pyspiel + + +class ActionValuesVsBestResponseTest(absltest.TestCase): + + def test_kuhn_poker_uniform(self): + game = pyspiel.load_game("kuhn_poker") + calc = action_value_vs_best_response.Calculator(game) + (expl, avvbr, cfrp, + player_reach_probs) = calc(0, policy.UniformRandomPolicy(game), + ["0", "1", "2", "0pb", "1pb", "2pb"]) + self.assertAlmostEqual(expl, 15 / 36) + np.testing.assert_allclose( + avvbr, + [ + [-1.5, -2.0], # 0 (better to pass) + [-0.5, -0.5], # 1 (same) + [0.5, 1.5], # 2 (better to bet) + [-1.0, -2.0], # 0pb - losing + [-1.0, 0.0], # 1pb - best response is bet always + [-1.0, 2.0], # 2pb - winning + ]) + np.testing.assert_allclose(cfrp, [1 / 3, 1 / 3, 1 / 3, 1 / 3, 1 / 3, 1 / 3]) + np.testing.assert_allclose([1, 1, 1, 1 / 2, 1 / 2, 1 / 2], + player_reach_probs) + + def test_kuhn_poker_always_pass_p0(self): + game = pyspiel.load_game("kuhn_poker") + calc = action_value_vs_best_response.Calculator(game) + (expl, avvbr, cfrp, player_reach_probs) = calc( + 0, policy.FirstActionPolicy(game), + ["0", "1", "2", "0pb", "1pb", "2pb"]) + self.assertAlmostEqual(expl, 1.) + np.testing.assert_allclose( + avvbr, + [ + # Opening bet. If we pass, we always lose (pass-pass with op's K, + # otherwise pass-bet-pass). + # If we bet, we always win (because op's best response is to pass, + # because this is an unreachable state and we break ties in favour + # of the lowest action). + [-1, 1], + [-1, 1], + [-1, 1], + # We pass, opp bets into us. This can be either J or Q (K will pass + # because of the tie-break rules). + # So we are guaranteed to be winning with Q or K. + [-1, -2], # 0pb + [-1, 2], # 1pb + [-1, 2], # 2pb + ]) + np.testing.assert_allclose(cfrp, [1 / 3, 1 / 3, 1 / 3, 1 / 6, 1 / 6, 1 / 3]) + np.testing.assert_allclose([1., 1., 1., 1., 1., 1.], player_reach_probs) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas.py new file mode 100644 index 0000000..0dfffd3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas.py @@ -0,0 +1,424 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Approximate the limiting logit equilbrium (Nash) of a large normal-form game. + +This is a python implementation of the Nash solver for normal-form games, +Average Deviation Incentive Descent with Adaptive Sampling (ADIDAS), from +"Sample-based Approximation of Nash in Large Many-player Games via Gradient +Descent" [Gemp et al, AAMAS 2022]. + +Link to paper: https://arxiv.org/abs/2106.01285. + +The limiting logit equilibrium (LLE) was originally defined in "Quantal Response +Equilibria for Normal Form Games" [McKelvey & Palfrey, Games and Economic +Behavior 1995]. The LLE is a Nash equilibrium that is uniquely defined for +*almost* all games. +""" + +import itertools + +import time + +import numpy as np + +from open_spiel.python.algorithms.adidas_utils.helpers import misc +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric import exploitability as nonsym_exp +from open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric import game_runner as nonsym_game_runner +from open_spiel.python.algorithms.adidas_utils.helpers.symmetric import exploitability as sym_exp +from open_spiel.python.algorithms.adidas_utils.helpers.symmetric import game_runner as sym_game_runner + + +class ADIDAS(object): + """Average Deviation Incentive Descent with Adaptive Sampling. + + Approximate the limiting logit equilibrium of a normal-form game. + + Attributes: + experiment_seed: int, seed for random number generator + random: numpy.random.RandomState object + results: dictionary of results populated upon completion of solver + """ + + def __init__(self, seed=0): + self.experiment_seed = seed + self.random = np.random.RandomState(self.experiment_seed) + + self.results = None + + def estimate_exploitability_sym(self, dist, num_eval_samples, num_ckpts, + num_players, game, policies): + """Estimate exploitability via monte carlo. + + Args: + dist: 1-d np.array, estimate of nash distribution + num_eval_samples: int, number of samples to estimate exploitability + num_ckpts: int, number of checkpoints (actions, policies, ...) + num_players: int, number of players + game: game with minimal functionality (see games/small.py) + policies: list mapping checkpoints to policies + Returns: + list of exploitabilities computed using [index] monte carlo samples + """ + pg_mean = np.zeros_like(dist) + exps_estimated = [] + for s in range(num_eval_samples): + base_profile = tuple([ + self.random.choice(num_ckpts, p=dist) for _ in range(num_players) + ]) + game_queries = sym_game_runner.construct_game_queries_for_exp( + base_profile, num_ckpts) + game_results = sym_game_runner.run_games_and_record_payoffs( + game_queries, game.get_payoffs_for_strategies, policies) + pg_s = np.zeros_like(dist) + for query, payoffs in game_results.items(): + pg_s[query[0]] = payoffs[0] + pg_mean = (pg_mean * float(s) + pg_s) / float(s + 1) + exps_estimated.append(pg_mean.max() - pg_mean.dot(dist)) + + return exps_estimated + + def estimate_exploitability_nonsym(self, dist, num_eval_samples, num_ckpts, + num_players, game, policies): + """Estimate exploitability via monte carlo. + + Args: + dist: list of 1-d np.arrays, estimate of nash distribution + num_eval_samples: int, number of samples to estimate exploitability + num_ckpts: int, number of checkpoints (actions, policies, ...) + num_players: int, number of players + game: game with minimal functionality (see games/small.py) + policies: list mapping checkpoints to policies + Returns: + list of exploitabilities computed using [index] monte carlo samples + """ + pg_mean = [np.zeros_like(dist_i) for dist_i in dist] + exps_estimated = [] + for s in range(num_eval_samples): + base_profile = tuple([ + self.random.choice(num_ckpts[i], p=dist[i]) + for i in range(num_players) + ]) + game_queries = nonsym_game_runner.construct_game_queries_for_exp( + base_profile, num_ckpts) + game_results = nonsym_game_runner.run_games_and_record_payoffs( + game_queries, game.get_payoffs_for_strategies, policies) + for pi_query, payoffs in game_results.items(): + pi, query = pi_query + ai = query[pi] + pg_mean[pi][ai] += (payoffs[pi] - pg_mean[pi][ai]) / float(s + 1) + exp_is = [] + for i in range(num_players): + exp_is.append(pg_mean[i].max() - pg_mean[i].dot(dist[i])) + exps_estimated.append(np.mean(exp_is)) + + return exps_estimated + + def update_payoff_matrices(self, payoff_matrices, payoff_matrices_new, s): + """Update mean of payoff matrices. + + Args: + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + **current mean + payoff_matrices_new: **new sample + s: int, sample number + Returns: + payoff_matrices with updated means + """ + if payoff_matrices: + for key in payoff_matrices_new: + new = payoff_matrices_new[key] + old = payoff_matrices[key] + payoff_matrices[key] += (new - old) / float(s + 1) + else: + payoff_matrices = payoff_matrices_new + + return payoff_matrices + + def construct_payoff_matrices_from_samples_sym( + self, game, dist, num_samples, policies, num_players, num_ckpts): + """Construct payoff matrices (approx. sym. polymatrix game) from samples. + + Args: + game: game with minimal functionality (see games/small.py) + dist: 1-d np.array, estimate of nash distribution + num_samples: int, `minibatch' size for stochastic gradient + policies: list mapping checkpoints to policies + num_players: int, number of players + num_ckpts: int, number of checkpoints (actions, policies, ...) + Returns: + payoff_matrices (2 x num_ckpts x num_ckpts array) to compute adidas grad + """ + payoff_matrices = np.zeros((2, num_ckpts, num_ckpts)) + for _ in range(num_samples): + base_profile = tuple([ + self.random.choice(num_ckpts, p=dist) for _ in range(num_players) + ]) + game_queries = sym_game_runner.construct_game_queries( + base_profile, num_ckpts) + game_results = sym_game_runner.run_games_and_record_payoffs( + game_queries, game.get_payoffs_for_strategies, policies) + payoff_matrices += sym_game_runner.form_payoff_matrices( + game_results, num_ckpts) / float(num_samples) + return payoff_matrices + + def construct_payoff_matrices_exactly_sym( + self, game, dist, num_players): + """Construct payoff matrices exactly (expected sym. polymatrix game). + + Args: + game: game with minimal functionality (see games/small.py) + dist: 1-d np.array, estimate of nash distribution + num_players: int, number of players + Returns: + payoff_matrices (2 x A x A array) to compute adidas gradient + """ + sym_nash = [dist for _ in range(num_players)] + pt = game.payoff_tensor() + payoff_matrix_exp_0 = misc.pt_reduce(pt[0], sym_nash, [0, 1]) + payoff_matrix_exp_1 = misc.pt_reduce(pt[1], sym_nash, [0, 1]) + payoff_matrices = np.stack((payoff_matrix_exp_0, payoff_matrix_exp_1)) + return payoff_matrices + + def construct_payoff_matrices_from_samples_nonsym( + self, game, dist, num_samples, policies, num_players, num_ckpts): + """Construct payoff matrices (approx. nonsym. polymatrix) from samples. + + Args: + game: game with minimal functionality (see games/small.py) + dist: list of 1-d np.arrays, estimate of nash distribution + num_samples: int, `minibatch' size for stochastic gradient + policies: list mapping checkpoints to policies + num_players: int, number of players + num_ckpts: int, number of checkpoints (actions, policies, ...) + Returns: + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + """ + payoff_matrices = None + for s in range(num_samples): + base_profile = tuple([ + self.random.choice(num_ckpts[i], p=dist[i]) + for i in range(num_players) + ]) + game_queries = nonsym_game_runner.construct_game_queries( + base_profile, num_ckpts) + game_results = nonsym_game_runner.run_games_and_record_payoffs( + game_queries, game.get_payoffs_for_strategies, policies) + payoff_matrices_new = nonsym_game_runner.form_payoff_matrices( + game_results, num_ckpts) + payoff_matrices = self.update_payoff_matrices(payoff_matrices, + payoff_matrices_new, + s) + return payoff_matrices + + def construct_payoff_matrices_exactly_nonsym( + self, game, dist, num_players): + """Construct payoff matrices exactly (expected nonsym. polymatrix game). + + Args: + game: game with minimal functionality (see games/small.py) + dist: list of 1-d np.arrays, estimate of nash distribution + num_players: int, number of players + Returns: + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + """ + pt = game.payoff_tensor() + payoff_matrices = {} + for pi, pj in itertools.combinations(range(num_players), 2): + key = (pi, pj) + pt_i = misc.pt_reduce(pt[pi], dist, [pi, pj]) + pt_j = misc.pt_reduce(pt[pj], dist, [pi, pj]) + payoff_matrices[key] = np.stack((pt_i, pt_j), axis=0) + return payoff_matrices + + def approximate_nash(self, game, solver, sym, + num_iterations=10000, num_samples=1, + num_eval_samples=int(10e4), approx_eval=False, + exact_eval=False, avg_trajectory=False, + return_trajectory=False): + """Runs solver on game. + + Args: + game: game with minimal functionality (see games/small.py) + solver: gradient solver (see utils/updates.py) + sym: bool, true if the game is symmetric across players + num_iterations: int, number of incremental updates + num_samples: int, `minibatch' size for stochastic gradient + num_eval_samples: int, number of samples to estimate exploitability + default = # of samples for P[|sample_payoff-true| > C/100] < ~5e-7% + where C = pt.max() - pt.min(); + P[|pt_grad|_inf <= C/100] > (1-5e-7)^num_actions + approx_eval: bool, whether to evaluate exploitability during + descent with stochastic samples + exact_eval: bool, whether to evaluate exploitability during + descent with exact expectation (req. full payoff tensor) + avg_trajectory: bool, whether to evaluate w.r.t. the average distribution + up to time t instead of the distribution at time t + return_trajectory: bool, whether to record all parameters (e.g., dist) + during learning and return them -- see solver code for details + Returns: + None -- dict of results stored in `results` attribute upon completion + (key=name of metric, value=[m_0, ..., m_{last_iter}]) + """ + num_players = game.num_players() + num_strats = game.num_strategies() + + if sym: + if len(set(num_strats)) != 1: + raise ValueError('Each player should have the same number of actions.') + num_strats = num_strats[0] + + params = solver.init_vars(num_strats, num_players) # dist = params[0] + if sym: + dist_avg = np.zeros_like(params[0]) + policies = list(range(num_strats)) + num_ckpts = len(policies) + form_payoffs_appx = self.construct_payoff_matrices_from_samples_sym + form_payoffs_exact = self.construct_payoff_matrices_exactly_sym + exp = sym_exp + estimate_exploitability = self.estimate_exploitability_sym + else: + dist_avg = [np.zeros_like(dist_i) for dist_i in params[0]] + policies = [list(range(num_strats_i)) for num_strats_i in num_strats] + num_ckpts = [len(policy_i) for policy_i in policies] + form_payoffs_appx = self.construct_payoff_matrices_from_samples_nonsym + form_payoffs_exact = self.construct_payoff_matrices_exactly_nonsym + exp = nonsym_exp + estimate_exploitability = self.estimate_exploitability_nonsym + + exps_exact = [] + exps_solver_exact = [] + exps_approx = [] + exps_solver_approx = [] + grad_norms = [] + + if return_trajectory: + params_traj = [] + + has_temp = False + if hasattr(solver, 'temperature') or hasattr(solver, 'p'): + has_temp = True + temperatures = [] + if hasattr(solver, 'temperature'): + temp_attr = 'temperature' + else: + temp_attr = 'p' + + early_exit = False + + start = time.time() + + # search for nash (sgd) + for t in range(num_iterations + 1): + dist = params[0] + if return_trajectory: + params_traj.append(params) + + if return_trajectory: + params_traj.append(params) + + if has_temp: + temperatures.append(getattr(solver, temp_attr)) + + if num_samples < np.inf: + payoff_matrices = form_payoffs_appx(game, dist, num_samples, + policies, num_players, num_ckpts) + else: + payoff_matrices = form_payoffs_exact(game, dist, num_players) + + grads, exp_sto, exp_solver_sto = solver.compute_gradients(params, + payoff_matrices) + + if sym: + grads_dist = grads[0] + grad_norms.append(simplex.grad_norm(dist, grads_dist)) + else: + grad_norm = 0. + grads_dist = grads[0] + for dist_i, grads_i in zip(dist, grads_dist[0]): + grad_norm += simplex.grad_norm(dist_i, grads_i)**2. + grad_norm = np.sqrt(grad_norm) + grad_norms.append(grad_norm) + + if solver.has_aux: + solver.record_aux_errors(grads) + + if sym: + dist_avg += (dist - dist_avg) / float(t + 1) + else: + for i, dist_i in enumerate(dist): + dist_avg[i] += (dist_i - dist_avg[i]) / float(t + 1) + + if avg_trajectory: + dist_eval = dist_avg + else: + dist_eval = dist + + if approx_eval: + exps_approx.append(exp_sto) + exps_solver_approx.append(exp_solver_sto) + if exact_eval: + pt = game.payoff_tensor() + exps_exact.append(exp.unreg_exploitability(dist_eval, pt)) + exps_solver_exact.append(solver.exploitability(dist_eval, pt)) + + # skip the last update so to avoid computing the matching exploitability + # and gradient norm information outside the loop + if t < num_iterations: + params = solver.update(params, grads, t) + if misc.isnan(params): + print('Warning: NaN detected in params post-update. Exiting loop.') + early_exit = True + break + + end = time.time() + solve_runtime = end - start + start = end + + # evaluating exploitability (monte-carlo) + exp_estimated = estimate_exploitability(dist_eval, num_eval_samples, + num_ckpts, num_players, + game, policies) + + eval_runtime = time.time() - start + + results = {'exps_approx': exps_approx, + 'exps_solver_approx': exps_solver_approx, + 'exps_exact': exps_exact, + 'exps_solver_exact': exps_solver_exact, + 'exp_estimated': exp_estimated, + 'grad_norms': grad_norms, + 'dist': dist, + 'dist_avg': dist_avg, + 'solve_runtime': solve_runtime, + 'eval_runtime': eval_runtime, + 'early_exit': early_exit} + + if solver.has_aux: + results.update({'aux_errors': solver.aux_errors}) + + if return_trajectory: + results.update({'params_trajectory': params_traj}) + + if has_temp: + results.update({'temperatures': temperatures}) + + self.results = results diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_test.py new file mode 100644 index 0000000..c92dddf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_test.py @@ -0,0 +1,75 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for adidas.""" + +from absl.testing import absltest + +import numpy as np + +from open_spiel.python.algorithms import adidas + +from open_spiel.python.algorithms.adidas_utils.games.big import ElFarol +from open_spiel.python.algorithms.adidas_utils.games.small import MatrixGame +from open_spiel.python.algorithms.adidas_utils.solvers.symmetric import qre_anneal as qre_anneal_sym + + +class AdidasTest(absltest.TestCase): + + def test_adidas_on_prisoners_dilemma(self): + """Tests ADIDAS on a 2-player prisoner's dilemma game.""" + # pylint:disable=bad-whitespace + pt_r = np.array([[-1, -3], + [0, -2]]) + # pylint:enable=bad-whitespace + # shift tensor to ensure positivity required if run adidas w/ Tsallis entrpy + pt_r -= pt_r.min() + pt_c = pt_r.T # symmetric game + pt = np.stack((pt_r, pt_c), axis=0).astype(float) + pt /= pt.max() # arbitrary design choice to upper bound entries to 1 + game = MatrixGame(pt, seed=0) + # for games with more than 2 players, see adidas_utils/games/big.py + solver = qre_anneal_sym.Solver(temperature=100, + proj_grad=False, euclidean=True, + lrs=(1e-4, 1e-4), exp_thresh=0.01, + rnd_init=True, seed=0) + # note we set rnd_init to True which initializes adidas' initial + # approximation to nash to a random point on the simplex. if rnd_init is + # False, adidas is initialized to uniform which is the Nash equilibrium + # of the prisoner's dilemma, in which case adidas trivially solves this + # game in 0 iterations. + lle = adidas.ADIDAS(seed=0) + lle.approximate_nash(game, solver, sym=True, num_iterations=1, + num_samples=1, num_eval_samples=int(1e5), + approx_eval=True, exact_eval=True, + avg_trajectory=False) + self.assertLess(lle.results['exps_exact'][-1], 0.2) + + def test_adidas_on_elfarol(self): + """Test ADIDAS on a 10-player, symmetric El Farol bar game.""" + game = ElFarol(n=10, c=0.7) + solver = qre_anneal_sym.Solver(temperature=100, + proj_grad=False, euclidean=False, + lrs=(1e-4, 1e-2), exp_thresh=0.01, + seed=0) + lle = adidas.ADIDAS(seed=0) + lle.approximate_nash(game, solver, sym=True, num_iterations=1, + num_samples=np.inf, num_eval_samples=int(1e5), + approx_eval=True, exact_eval=True, + avg_trajectory=False) + self.assertLess(lle.results['exps_exact'][-1], 0.5) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/__init__.py new file mode 100644 index 0000000..a1223b9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/__init__.py new file mode 100644 index 0000000..a1223b9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/big.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/big.py new file mode 100644 index 0000000..d97c6a7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/big.py @@ -0,0 +1,132 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Big tensor games.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np + +from open_spiel.python.algorithms.adidas_utils.helpers import misc + + +class TensorGame(object): + """Tensor Game.""" + + def __init__(self, pt, seed=None): + """Ctor. Inits payoff tensor (players x actions x ... np.array). + + Args: + pt: payoff tensor, np.array + seed: seed for random number generator, used if computing best responses + """ + if np.any(pt < 0.): + raise ValueError("Payoff tensor must contain non-negative values") + self.pt = pt + + self.seed = seed + self.random = np.random.RandomState(seed) + + def num_players(self): + return self.pt.shape[0] + + def num_strategies(self): + return self.pt.shape[1:] + + def payoff_tensor(self): + return self.pt + + def get_payoffs_for_strategies(self, policies): + """Return vector of payoffs for all players given list of strategies. + + Args: + policies: list of integers indexing strategies for each player + Returns: + np.array (length num players) of payoffs + """ + return self.pt[tuple([slice(None)] + policies)] + + def best_response(self, mixed_strategy, return_exp=False): + """Return best response and its superiority over the current strategy. + + Args: + mixed_strategy: np.ndarray (distribution over strategies) + return_exp: bool, whether to return how much best response exploits the + given mixed strategy (default is False) + Returns: + br: int, index of strategy (ties split randomly) + exp: u(br) - u(mixed_strategy) + """ + logging.warn("Assumes symmetric game! Returns br for player 0.") + gradient = misc.pt_reduce(self.pt[0], + [mixed_strategy] * self.num_players(), + [0]) + br = misc.argmax(self.random, gradient) + exp = gradient.max() - gradient.dot(mixed_strategy) + if return_exp: + return br, exp + else: + return br + + def best_population_response(self, dist, policies): + """Returns the best response to the current population of policies. + + Args: + dist: np.ndarray, distribution over policies + policies: list of integers indexing strategies for each player + Returns: + best response, exploitability tuple (see best_response) + """ + ns = self.num_strategies() + mixed_strat = np.zeros(ns) + for pure_strat, prob in zip(policies, dist): + mixed_strat[pure_strat] += prob + return self.best_response(mixed_strat) + + +class ElFarol(TensorGame): + """N-Player, 2-Action symmetric game with unique symmetric Nash.""" + + def __init__(self, n=2, c=0.5, B=0, S=1, G=2, seed=None): + """Ctor. Initializes payoff tensor (N x (2,) * N np.array). + + See Section 3.1, The El Farol Stage Game in + http://www.econ.ed.ac.uk/papers/id186_esedps.pdf + + action 0: go to bar + action 1: avoid bar + + Args: + n: int, number of players + c: float, threshold for `crowded' as a fraction of number of players + B: float, payoff for going to a crowded bar + S: float, payoff for staying at home + G: float, payoff for going to an uncrowded bar + seed: seed for random number generator, used if computing best responses + """ + assert G > S > B, "Game parameters must satisfy G > S > B." + pt = np.zeros((n,) + (2,) * n) + for idx in np.ndindex(pt.shape): + p = idx[0] + a = idx[1:] + a_i = a[p] + go_to_bar = (a_i < 1) + crowded = (n - 1 - sum(a) + a_i) >= (c * n) + if go_to_bar and not crowded: + pt[idx] = G + elif go_to_bar and crowded: + pt[idx] = B + else: + pt[idx] = S + super().__init__(pt, seed) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/gamut.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/gamut.py new file mode 100644 index 0000000..02c5c65 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/gamut.py @@ -0,0 +1,96 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""GAMUT games. + +See https://github.com/deepmind/open_spiel/tree/master/open_spiel/games/gamut +for details on how to build OpenSpiel with support for GAMUT. +""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np + +from open_spiel.python.egt.utils import game_payoffs_array +import pyspiel + + +class GAMUT(object): + """GAMUT Games.""" + + def __init__(self, config_list, java_path='', seed=None): + """Ctor. Inits payoff tensor (players x actions x ... np.array). + + Args: + config_list: a list or strings alternating between gamut flags and values + see http://gamut.stanford.edu/userdoc.pdf for more information + e.g., config_list = ['-g', 'CovariantGame', '-players', '6', + '-normalize', '-min_payoff', '0', + '-max_payoff', '1', '-actions', '5', '-r', '0'] + java_path: string, java path + seed: random seed, some GAMUT games are randomly generated + """ + self.pt = None + self.config_list = config_list + + self.seed = seed + self.random = np.random.RandomState(seed) + + # parse interval for rho if supplied, e.g., '[-.2,1]' + if '-r' in config_list: + idx = next(i for i, s in enumerate(config_list) if s == '-r') + val = config_list[idx + 1] + if not val.isnumeric() and val[0] in '([' and val[-1] in ')]': + a, b = val.strip('[]()').split(',') + a = float(a) + b = float(b) + rho = self.random.rand() * (b - a) + a + config_list[idx + 1] = str(rho) + + if isinstance(seed, int): + self.config_list += ['-random_seed', str(seed)] + self.java_path = java_path + + if java_path: + generator = pyspiel.GamutGenerator( + java_path, + 'gamut/gamut_main_deploy.jar') + else: # use default java path as specified by pyspiel + generator = pyspiel.GamutGenerator( + 'gamut.jar') + self.game = generator.generate_game(config_list) + + def num_players(self): + return self.game.num_players() + + def num_strategies(self): + return [self.game.num_distinct_actions()] * self.num_players() + + def payoff_tensor(self): + if self.pt is None: + pt = np.asarray(game_payoffs_array(self.game)) + self.pt = pt - self.game.min_utility() + return self.pt + + def get_payoffs_for_strategies(self, policies): + """Return vector of payoffs for all players given list of strategies. + + Args: + policies: list of integers indexing strategies for each player + Returns: + np.array (length num players) of payoffs + """ + state = self.game.new_initial_state() + state.apply_actions(policies) + return np.asarray(state.returns()) - self.game.min_utility() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/pyspiel_tensor_game.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/pyspiel_tensor_game.py new file mode 100644 index 0000000..8090803 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/pyspiel_tensor_game.py @@ -0,0 +1,68 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Wrapper for loading pyspiel games as payoff tensors.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np + +from open_spiel.python.egt.utils import game_payoffs_array +import pyspiel + + +class PyspielTensorGame(object): + """Matrix Game.""" + + def __init__(self, string_specifier='blotto(coins=10,fields=3,players=3)', + tensor_game=False, seed=None): + """Ctor. Inits payoff tensor (players x actions x ... np.array).""" + self.pt = None + self.string_specifier = string_specifier + self.tensor_game = tensor_game + + if tensor_game: + self.game = pyspiel.load_tensor_game(string_specifier) + else: + self.game = pyspiel.load_game(string_specifier) + + self.seed = seed # currently unused + + def num_players(self): + return self.game.num_players() + + def num_strategies(self): + return [self.game.num_distinct_actions()] * self.num_players() + + def payoff_tensor(self): + if self.pt is None: + if not self.tensor_game: + logging.info('reloading pyspiel game as tensor_game') + self.game = pyspiel.load_tensor_game(self.string_specifier) + self.tensor_game = True + pt = np.asarray(game_payoffs_array(self.game)) + self.pt = pt - self.game.min_utility() + return self.pt + + def get_payoffs_for_strategies(self, policies): + """Return vector of payoffs for all players given list of strategies. + + Args: + policies: list of integers indexing strategies for each player + Returns: + np.array (length num players) of payoffs + """ + state = self.game.new_initial_state() + state.apply_actions(policies) + return np.asarray(state.returns()) - self.game.min_utility() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/small.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/small.py new file mode 100644 index 0000000..15f8935 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/small.py @@ -0,0 +1,250 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Small matrix games.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np + +from open_spiel.python.algorithms.adidas_utils.helpers import misc + + +class MatrixGame(object): + """Matrix Game.""" + + def __init__(self, pt, seed=None): + """Ctor. Inits payoff tensor (players x actions x ... np.array). + + Args: + pt: payoff tensor, np.array + seed: seed for random number generator, used if computing best responses + """ + if np.any(pt < 0.): + raise ValueError("Payoff tensor must contain non-negative values") + self.pt = pt + + self.seed = seed + self.random = np.random.RandomState(seed) + + def num_players(self): + return self.pt.shape[0] + + def num_strategies(self): + return self.pt.shape[1:] + + def payoff_tensor(self): + return self.pt + + def get_payoffs_for_strategies(self, policies): + """Return vector of payoffs for all players given list of strategies. + + Args: + policies: list of integers indexing strategies for each player + Returns: + np.array (length num players) of payoffs + """ + return self.pt[:, policies[0], policies[1]] + + def best_response(self, mixed_strategy, return_exp=False): + """Return best response and its superiority over the current strategy. + + Args: + mixed_strategy: np.ndarray (distribution over strategies) + return_exp: bool, whether to return how much best response exploits the + given mixed strategy (default is False) + Returns: + br: int, index of strategy (ties split randomly) + exp: u(br) - u(mixed_strategy) + """ + logging.warn("Assumes symmetric game! Returns br for player 0.") + gradient = self.pt[0].dot(mixed_strategy) + br = misc.argmax(self.random, gradient) + exp = gradient.max() - gradient.dot(mixed_strategy) + if return_exp: + return br, exp + else: + return br + + def best_population_response(self, dist, policies): + """Returns the best response to the current population of policies. + + Args: + dist: np.ndarray, distribution over policies + policies: list of integers indexing strategies for each player + Returns: + best response, exploitability tuple (see best_response) + """ + ns = self.num_strategies() + mixed_strat = np.zeros(ns) + for pure_strat, prob in zip(policies, dist): + mixed_strat[pure_strat] += prob + return self.best_response(mixed_strat) + + +class BiasedGame(MatrixGame): + """2-Player, 3-Action symmetric game with biased stochastic best responses.""" + + def __init__(self, seed=None): + """Ctor. Initializes payoff tensor (2 x 3 x 3 np.array). + + Args: + seed: seed for random number generator, used if computing best responses + """ + # pylint:disable=bad-whitespace + pt_r = np.array([[0, 0, 0 ], + [1, -2, .5], + [-2, 1, -1]]) + 2. + # pylint:enable=bad-whitespace + pt_c = pt_r.T # symmetric game + pt = np.stack((pt_r, pt_c), axis=0).astype(float) + pt /= pt.max() # arbitrary design choice to upper bound entries to 1 + super().__init__(pt, seed) + + +class PrisonersDilemma(MatrixGame): + """2-Player, 2-Action symmetric prisoner's dilemma.""" + + def __init__(self, seed=None): + """Ctor. Initializes payoff tensor (2 x 2 x 2 np.array). + + Args: + seed: seed for random number generator, used if computing best responses + """ + # pylint:disable=bad-whitespace + pt_r = np.array([[-1, -3], + [0, -2]]) + # pylint:enable=bad-whitespace + # shift tensor to ensure positivity required for ATE + pt_r -= pt_r.min() + pt_c = pt_r.T # symmetric game + pt = np.stack((pt_r, pt_c), axis=0).astype(float) + pt /= pt.max() # arbitrary design choice to upper bound entries to 1 + super().__init__(pt, seed) + + +class RockPaperScissors(MatrixGame): + """2-Player, 3-Action symmetric RPS.""" + + def __init__(self, weights=None, seed=None): + """Ctor. Initializes payoff tensor (2 x 3 x 3 np.array). + + Args: + weights: list of weights (floats) for [rock, paper, scissors] + seed: seed for random number generator, used if computing best responses + """ + if weights is None: + weights = np.ones(3) + r, p, s = weights + # pylint:disable=bad-whitespace + pt_r = np.array([[0, -p, r], + [p, 0, -s], + [-r, s, 0]]) + # pylint:enable=bad-whitespace + # shift tensor to ensure positivity required for ATE + pt_r -= pt_r.min() + pt_c = pt_r.T # symmetric game + pt = np.stack((pt_r, pt_c), axis=0).astype(float) + super().__init__(pt, seed) + + +class SpiralGame(MatrixGame): + """2-Player, 3-Action symmetric game with spiral dynamics on simplex.""" + + def __init__(self, center=None, seed=None): + """Ctor. Initializes payoff tensor (2 x 3 x 3 np.array). + + Args: + center: center of cycle given in [x, y, z] Euclidean coordinates + seed: seed for random number generator, used if computing best responses + """ + if center is None: + center = np.ones(3) / 3. + else: + if not ((np.sum(center) <= 1 + 1e-8) and np.all(center >= -1e-8)): + raise ValueError("center must lie on simplex") + self.center = center + center = center.reshape((3, 1)) + + # define coordinate frame for simplex; basis vectors on columns of transform + transform = np.array([[.5, -.5, 0], [-.5, -.5, 1], [1, 1, 1]]).T + transform /= np.linalg.norm(transform, axis=0) + transform_inv = np.linalg.inv(transform) + + # canonical cycle matrix in 2-d + cycle = 0.1 * np.array([[0, 1, 0], [1, 0, 0], [0, 0, 0]]) + + # payoff tensor maps euclidean to simplex frame, applies cycle, maps back + pt_r = transform.dot(cycle.dot(transform_inv)) + # subtracting off a column vector effectively offsets the vector field + # because [[c c c], ...] [[x], [y], [z]] = [c * (x + y + z), ...] = [c, ...] + pt_r -= pt_r.dot(center) + # shift tensor to ensure positivity required for ATE + if pt_r.min() < 0: + pt_r -= pt_r.min() + + pt_c = pt_r.T # symmetric game + pt = np.stack((pt_r, pt_c), axis=0).astype(float) + super().__init__(pt, seed) + + +class MatchingPennies(MatrixGame): + """2-Player, 2-Action non-symmetric matching pennies.""" + + def __init__(self, bias=1., seed=None): + """Ctor. Initializes payoff tensor (2 x 2 x 2 np.array). + + Args: + bias: float, rewards one action (bias) more than the other (1) + seed: seed for random number generator, used if computing best responses + """ + # pylint:disable=bad-whitespace + pt_r = np.array([[1, -1], + [-1, bias]]) + # pylint:enable=bad-whitespace + pt_c = (-pt_r).T # zero-sum game + pt = np.stack((pt_r, pt_c), axis=0).astype(float) + # shift tensor to ensure positivity required for ATE + pt -= pt.min() + pt /= pt.max() # arbitrary design choice to upper bound entries to 1 + super().__init__(pt, seed) + + +class Shapleys(MatrixGame): + """2-Player, 3-Action non-symmetric Shapleys game.""" + + def __init__(self, beta=1., seed=None): + """Ctor. Initializes payoff tensor (2 x 2 x 2 np.array). + + See Eqn 4 in https://arxiv.org/pdf/1308.4049.pdf. + + Args: + beta: float, modifies the game so that the utilities @ Nash are now + u_1(Nash) = (1 + beta) / 3 and u_2(Nash) = (1 - beta) / 3 + where Nash is the joint uniform distribution + seed: seed for random number generator, used if computing best responses + """ + # pylint:disable=bad-whitespace + pt_r = np.array([[1, 0, beta], + [beta, 1, 0], + [0, beta, 1]]) + pt_c = np.array([[-beta, 1, 0], + [0, -beta, 1], + [1, 0, -beta]]) + # pylint:enable=bad-whitespace + pt = np.stack((pt_r, pt_c), axis=0).astype(float) + # shift tensor to ensure positivity required for ATE + pt -= pt.min() + pt /= pt.max() # arbitrary design choice to upper bound entries to 1 + super().__init__(pt, seed) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/small_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/small_test.py new file mode 100644 index 0000000..1d08109 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/games/small_test.py @@ -0,0 +1,108 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.adidas_utils.games.small.""" + +from absl import logging # pylint:disable=unused-import +from absl.testing import absltest +from absl.testing import parameterized + +import numpy as np + +from open_spiel.python.algorithms.adidas_utils.games import small +from open_spiel.python.algorithms.adidas_utils.helpers import simplex + + +class SmallTest(parameterized.TestCase): + + def test_biased_game(self, trials=100, atol=1e-5, rtol=1e-5, seed=1234): + """Test best responses to sampled opp. actions in BiasedGame are biased.""" + game = small.BiasedGame(seed) + random = np.random.RandomState(seed) + + successes = [] + for _ in range(trials): + dirichlet_alpha = np.ones(game.num_strategies()[0]) + dist = random.dirichlet(dirichlet_alpha) # mixed srategy + + sample_best_responses = np.argmax(game.payoff_tensor()[0], axis=0) + estimated_best_response = np.dot(sample_best_responses, dist) + + true_best_response = game.best_response(dist) + + successes += [not np.allclose(estimated_best_response, true_best_response, + rtol, atol)] + + perc = 100 * np.mean(successes) + logging.info('bias rate out of %d is %f', trials, perc) + self.assertGreaterEqual( + perc, 99., 'best responses should be biased more often') + + @staticmethod + def simp_to_euc(a, b, center): + r"""Transforms a point [a, b] on the simplex to Euclidean space. + + /\ ^ b + / \ | + /____\ --> a + + Args: + a: horizonal deviation from center + b: vertical deviation from center + center: center of ref frame given in [x, y, z] Euclidean coordinates + Returns: + 1-d np.array of len 3, i.e., np.array([x, y, z]) + """ + transform = np.array([[.5, -.5, 0], [-.5, -.5, 1], [1, 1, 1]]).T + transform /= np.linalg.norm(transform, axis=0) + return transform.dot(np.array([a, b, 0])) + center + + @parameterized.named_parameters( + ('up_down', 0., 0.1, 0., -0.1, -1.), + ('left_right', -0.1, 0., 0.1, 0., -1.), + ('up_left', 0., 0.1, -0.1, 0., 0.), + ('up_right', 0., 0.1, 0.1, 0., 0.), + ('down_left', 0., -0.1, -0.1, 0., 0.), + ('down_right', 0., -0.1, 0.1, 0., 0.), + ) + def test_spiral_game(self, dx_1, dy_1, dx_2, dy_2, expected_cos_sim, + trials=100, eps=0.1, seed=1234): + """Test that gradients on simplex rotate around SpiralGame's center.""" + random = np.random.RandomState(seed) + + successes = [] + for _ in range(trials): + dx, dy = eps * (random.rand(2) * 2 - 1) + center = self.simp_to_euc(dx, dy, np.ones(3) / 3.) + game = small.SpiralGame(center, seed) + pt = game.payoff_tensor()[0] + + point_1 = self.simp_to_euc(dx_1, dy_1, game.center) + point_2 = self.simp_to_euc(dx_2, dy_2, game.center) + + grad_1 = simplex.project_grad(pt.dot(point_1)) + grad_2 = simplex.project_grad(pt.dot(point_2)) + norm = np.linalg.norm(grad_1) * np.linalg.norm(grad_2) + cos_sim = grad_1.dot(grad_2) / norm + + successes += [(np.abs(cos_sim - expected_cos_sim) < 1e-5)] + + perc = 100 * np.mean(successes) + logging.info('alignment success rate out of %d is %f', trials, perc) + self.assertGreaterEqual( + perc, 99., 'gradient field should exhibit cycles') + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/__init__.py new file mode 100644 index 0000000..a1223b9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/misc.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/misc.py new file mode 100644 index 0000000..2de7f47 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/misc.py @@ -0,0 +1,94 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Miscellaneous utils.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np + + +def uniform_dist(x): + """Returns a uniform distribution with same shape as the given numpy array. + + Args: + x: numpy array + Returns: + constant numpy array of same shape as input x, sums to 1 + """ + return np.ones_like(x) / float(x.size) + + +def argmax(random, z): + """Returns argmax of flattened z with ties split randomly. + + Args: + random: Random number generator, e.g., np.random.RandomState() + z: np.array + Returns: + integer representing index of argmax + """ + inds = np.arange(z.size) + random.shuffle(inds) + z_shuffled = z[inds] + ind_max = np.argmax(z_shuffled) + return inds[ind_max] + + +def pt_reduce(payoff_tensor, strats, remove_players): + """Computes possible payoffs for remove_players with others' strats fixed. + + This is equivalent to the Jacobian of the payoff w.r.t. remove_players: + sum_{a...z} A_k * x_1a * ... * x_nz for player k. + Args: + payoff_tensor: a single player k's payoff tensor, i.e., + a num action x ... x num action (num player) np.array + strats: list of distributions over strategies for each player + remove_players: players to NOT sum over in expectation + Returns: + payoff tensor of shape: num_action x ... x num_action, + num_action for each player in remove_players + """ + result = np.copy(payoff_tensor) + result_dims = list(range(len(result.shape))) + other_player_idxs = list(result_dims) + for remove_player in remove_players: + other_player_idxs.remove(remove_player) + for other_player_idx in other_player_idxs: + new_result_dims = list(result_dims) + new_result_dims.remove(other_player_idx) + result = np.einsum(result, result_dims, strats[other_player_idx], + [other_player_idx], new_result_dims) + result_dims = new_result_dims + return result + + +def isnan(x): + """Checks for NaN's in nested objects.""" + if isinstance(x, float): + return np.isnan(x) + elif isinstance(x, int): + return np.isnan(x) + elif isinstance(x, np.ndarray): + return np.any(np.isnan(x)) + elif isinstance(x, list): + return np.any([isnan(xi) for xi in x]) + elif isinstance(x, tuple): + return np.any([isnan(xi) for xi in x]) + elif isinstance(x, dict): + return np.any([isnan(xi) for xi in x.values()]) + else: + typ = repr(type(x)) + err_string = 'type(x)={:s} not recognized when checking for NaN'.format(typ) + raise NotImplementedError(err_string) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/nonsymmetric/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/nonsymmetric/__init__.py new file mode 100644 index 0000000..a1223b9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/nonsymmetric/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/nonsymmetric/exploitability.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/nonsymmetric/exploitability.py new file mode 100644 index 0000000..03b799b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/nonsymmetric/exploitability.py @@ -0,0 +1,152 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Exploitability measurement utils for general (sym and non-sym) games.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np +from scipy import special + +from open_spiel.python.algorithms.adidas_utils.helpers import misc + + +def unreg_exploitability(dist, payoff_tensor, aggregate=np.mean): + """Compute (avg, max) exploitability of dist for non-symmetric game. + + Args: + dist: list of 1-d np.arrays, current estimate of nash distribution + payoff_tensor: (n x A1 x ... x An) np.array, payoffs for each joint action + can also be list of (A1 x ... x An) np.arrays + aggregate: function to reduce individual exp_is to scalar, e.g., mean or max + Returns: + exploitability (float): avg_i payoff_i of best response_i - payoff_i of dist + """ + num_players = len(payoff_tensor) + + exp_i = [] + for i in range(num_players): + nabla_i = misc.pt_reduce(payoff_tensor[i], dist, [i]) + u_i_br = np.max(nabla_i) + u_i_dist = nabla_i.dot(dist[i]) + exp_i.append(u_i_br - u_i_dist) + + return aggregate(exp_i) + + +def ate_exploitability(dist, payoff_tensor, p=1, aggregate=np.mean): + """Compute Tsallis regularized exploitability of dist for non-symmetric game. + + Args: + dist: list of 1-d np.arrays, current estimate of nash distribution + payoff_tensor: (n x A1 x ... x An) np.array, payoffs for each joint action + assumed to be non-negative. can also be list of (A1 x ... x An) np.arrays + p: float in [0, 1], Tsallis entropy-regularization --> 0 as p --> 0 + aggregate: function to reduce individual exp_is to scalar, e.g., mean or max + Returns: + exploitability (float): avg_i payoff_i of best response_i - payoff_i of dist + """ + if np.min(payoff_tensor) < 0.: + raise ValueError('payoff tensor must be non-negative') + num_players = len(payoff_tensor) + + exp_i = [] + for i in range(num_players): + nabla_i = misc.pt_reduce(payoff_tensor[i], dist, [i]) + dist_i = dist[i] + if p > 0: + power = 1./p + s = np.linalg.norm(nabla_i, ord=power) + br_i = (nabla_i / np.linalg.norm(nabla_i, ord=power))**power + else: + power = np.inf + s = np.linalg.norm(nabla_i, ord=power) + br_i = np.zeros_like(dist_i) + maxima = (nabla_i == s) + br_i[maxima] = 1. / maxima.sum() + + u_i_br = nabla_i.dot(br_i) + s / (p + 1) * (1 - np.sum(br_i**(p + 1))) + u_i_dist = nabla_i.dot(dist_i) + s / (p + 1) * (1 - np.sum(dist_i**(p + 1))) + + exp_i.append(u_i_br - u_i_dist) + + return aggregate(exp_i) + + +def qre_exploitability(dist, payoff_tensor, temperature=0., aggregate=np.mean): + """Compute Shannon regularized exploitability of dist for non-symmetric game. + + Args: + dist: list of 1-d np.arrays, current estimate of nash distribution + payoff_tensor: (n x A1 x ... x An) np.array, payoffs for each joint action + assumed to be non-negative. can also be list of (A1 x ... x An) np.arrays + temperature: non-negative float + aggregate: function to reduce individual exp_is to scalar, e.g., mean or max + Returns: + exploitability (float): avg_i payoff_i of best response_i - payoff_i of dist + """ + num_players = len(payoff_tensor) + + exp_i = [] + for i in range(num_players): + nabla_i = misc.pt_reduce(payoff_tensor[i], dist, [i]) + dist_i = dist[i] + if temperature > 0: + br_i = special.softmax(nabla_i / temperature) + else: + br_i = np.zeros_like(dist_i) + maxima = (nabla_i == np.max(nabla_i)) + br_i[maxima] = 1. / maxima.sum() + + u_i_br = nabla_i.dot(br_i) + temperature * special.entr(br_i).sum() + u_i_dist = nabla_i.dot(dist_i) + temperature * special.entr(dist_i).sum() + + exp_i.append(u_i_br - u_i_dist) + + return aggregate(exp_i) + + +def grad_norm_exploitability(dist, payoff_tensor, eta=None, temperature=0., + aggregate=np.mean): + """Compute (avg, max) exploitability of dist for non-symmetric game. + + Args: + dist: list of 1-d np.arrays, current estimate of nash distribution + payoff_tensor: (n x A1 x ... x An) np.array, payoffs for each joint action + can also be list of (A1 x ... x An) np.arrays + eta: step size for approximate best response (default 1 / (n * m)) + where n is # of players and m is # of actions (same for all players) + temperature: non-negative float + aggregate: function to reduce individual exp_is to scalar, e.g., mean or max + Returns: + exploitability (float): avg_i squared norm of projected-gradient_i + """ + num_players = len(payoff_tensor) + num_strategies = np.asarray([dist[i].size for i in range(num_players)]) + if eta is None: + eta = 1. / num_strategies + if not isinstance(eta, np.ndarray): + eta = np.ones(num_players, dtype=np.float32) * eta + + exp_i = [] + for i in range(num_players): + nabla_i = misc.pt_reduce(payoff_tensor[i], dist, [i]) + if temperature > 0.: + nabla_i -= temperature * (np.log(dist[i]) + 1) + m_i = dist[i].size + nabla_i_proj = nabla_i - 1. / m_i * np.sum(nabla_i) + nabla_i_sq_norm = np.inner(nabla_i_proj, nabla_i_proj) + exp_i.append(eta[i] * nabla_i_sq_norm) + + return aggregate(exp_i) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/nonsymmetric/exploitability_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/nonsymmetric/exploitability_test.py new file mode 100644 index 0000000..e477104 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/nonsymmetric/exploitability_test.py @@ -0,0 +1,282 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric.exploitability. + +Computing the exploitability of a tsallis-entropy regularized game is more +involved, so we include a derivation here of an example test case using an +asymmetric prisoner's dilemma (see pd np.array below). Note that the +tsallis-entropy setting assumes non-negative payoffs so we add 3 to the array. +We assume p=1 for the tsallis entropy in this example. + +dist = [(1/3, 2/3), (1/2, 1/2)] + +-- Player 1 -- +pt dist grad br payoff(br) payoff(dist) +[2 0] [1/2] = [1] --> [1/3] --> 5/3 --> 5/3 +[3 1] [1/2] [2] [2/3] + +s = sum(grad) = 3 + +tsallis-entr(br) = s / (p + 1) * (1 - br_1^2 - br_2^2) + = 3 / 2 * (1 - 1/9 - 4/9) = 2/3 + +tsallis-entr(dist) = s / (p + 1) * (1 - dist_1^2 - dist_2^2) + = 3 / 2 * (1 - 1/9 - 4/9) = 2/3 + +u_1(br_1) - u_1(dist) = 5/3 + 2/3 - 5/3 - 2/3 = 0 + +-- Player 2 -- +pt dist grad br payoff(br) payoff(dist) +[3 0] [1/3] = [1] --> [1/3] --> 5/3 --> 3/2 +[4 1] [2/3] [2] [2/3] + +s = sum(grad) = 3 + +tsallis-entr(br) = s / (p + 1) * (1 - br_1^2 - br_2^2) + = 3 / 2 * (1 - 1/9 - 4/9) = 2/3 + +tsallis-entr(dist) = s / (p + 1) * (1 - dist_1^2 - dist_2^2) + = 3 / 2 * (1 - 1/4 - 1/4) = 3/4 + +u_2(br_2) - u_2(dist) = 5/3 + 2/3 - 3/2 - 3/4 = 7 / 3 - 9 / 4 +""" + +from absl import logging # pylint:disable=unused-import +from absl.testing import absltest +from absl.testing import parameterized + +import numpy as np + +from open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric import exploitability + + +test_seed = 12345 + +# asymmetric prisoner's dilemma test case +# pylint:disable=bad-whitespace +pt_r = np.array([[2, 0], + [3, 1]]) +pt_c = np.array([[3, 4], + [0, 1]]) +# pylint:enable=bad-whitespace +pd = np.stack((pt_r, pt_c), axis=0) +pd_nash = [np.array([0, 1]), np.array([0, 1])] +pd_non_nash_1 = [np.array([1, 0]), np.array([1, 0])] +pd_non_nash_exp_1 = np.array([1., 1.]) +pd_non_nash_ate_exp_1 = np.array([9. / 5., 16. / 7.]) +pd_non_nash_2 = [np.array([1., 2.]) / 3., np.array([0.5, 0.5])] +pd_non_nash_exp_2 = np.array([1. / 3., 0.5]) +pd_non_nash_ate_exp_2 = np.array([0., 7. / 3. - 9. / 4.]) + +qre_br_1 = np.exp([1, 2]) / np.exp([1, 2]).sum() +qre_br_2 = np.copy(qre_br_1) +entr_br_1 = -np.sum(qre_br_1 * np.log(qre_br_1)) +entr_br_2 = -np.sum(qre_br_2 * np.log(qre_br_2)) +entr_non_nash_2_1 = -np.sum(pd_non_nash_2[0] * np.log(pd_non_nash_2[0])) +entr_non_nash_2_2 = -np.sum(pd_non_nash_2[1] * np.log(pd_non_nash_2[1])) +u_br_minus_non_nash_1 = (qre_br_1 - pd_non_nash_2[0]).dot([1, 2]) +u_br_minus_non_nash_2 = (qre_br_2 - pd_non_nash_2[1]).dot([1, 2]) +pd_non_nash_qre_exp_2_1 = u_br_minus_non_nash_1 + entr_br_1 - entr_non_nash_2_1 +pd_non_nash_qre_exp_2_2 = u_br_minus_non_nash_2 + entr_br_2 - entr_non_nash_2_2 +pd_non_nash_qre_exp_2 = np.array([pd_non_nash_qre_exp_2_1, + pd_non_nash_qre_exp_2_2]) + +# rock-paper-scissors test case (nonsymmetric should work for symmetric as well) +# pylint:disable=bad-whitespace +pt_r = np.array([[0, -1, 1], + [1, 0, -1], + [-1, 1, 0]]) +# pylint:enable=bad-whitespace +pt_r -= pt_r.min() +pt_c = pt_r.T +rps = np.stack((pt_r, pt_c), axis=0) +rps_nash = [np.ones(3) / 3., np.ones(3) / 3.] +rps_non_nash_1 = [np.array([1, 0, 0]), np.array([1, 0, 0])] +rps_non_nash_exp_1 = np.array([1., 1.]) +rps_non_nash_2 = [np.array([0, 1, 0]), np.array([0, 1, 0])] +rps_non_nash_exp_2 = np.array([1., 1.]) +rps_non_nash_3 = [np.array([0, 0, 1]), np.array([0, 0, 1])] +rps_non_nash_exp_3 = np.array([1., 1.]) + +# two-player game with different numbers of actions +# pylint:disable=bad-whitespace +pt_r = np.array([[2, 2], + [3, 0], + [0, 3]]) +pt_c = np.array([[2, 1, 0], + [3, 0, 1]]).T +# pylint:enable=bad-whitespace +rect = [pt_r, pt_c] +rect_unreg_nash = [np.array([0, 1, 0]), np.array([1, 0])] +rect_unreg_nash_ate_exp = np.array([4. / 5., 0.]) +qre_br_1 = np.exp([2, 3, 0]) / np.exp([2, 3, 0]).sum() +qre_br_2 = np.exp([1, 0]) / np.exp([1, 0]).sum() +entr_br_1 = -np.sum(qre_br_1 * np.log(qre_br_1)) +entr_br_2 = -np.sum(qre_br_2 * np.log(qre_br_2)) +entr_non_nash_2_1 = 0. +entr_non_nash_2_2 = 0. +u_br_minus_dist_1 = (qre_br_1 - rect_unreg_nash[0]).dot([2, 3, 0]) +u_br_minus_dist_2 = (qre_br_2 - rect_unreg_nash[1]).dot([1, 0]) +rect_qre_exp_1 = u_br_minus_dist_1 + entr_br_1 - entr_non_nash_2_1 +rect_qre_exp_2 = u_br_minus_dist_2 + entr_br_2 - entr_non_nash_2_2 +rect_unreg_nash_qre_exp = np.array([rect_qre_exp_1, rect_qre_exp_2]) + + +class ExploitabilityTest(parameterized.TestCase): + + @parameterized.named_parameters( + ('PD_nash', pd, pd_nash), + ('RPS_nash', rps, rps_nash), + ('RECT_nash', rect, rect_unreg_nash), + ) + def test_unreg_exploitability_of_nash(self, payoff_tensor, nash): + exp = exploitability.unreg_exploitability(nash, payoff_tensor, np.max) + self.assertEqual(exp, 0., 'nash should have zero exploitability') + + @parameterized.named_parameters( + ('PD_non_nash_1', pd, pd_non_nash_1, pd_non_nash_exp_1), + ('PD_non_nash_2', pd, pd_non_nash_2, pd_non_nash_exp_2), + ('RPS_non_nash_1', rps, rps_non_nash_1, rps_non_nash_exp_1), + ('RPS_non_nash_2', rps, rps_non_nash_2, rps_non_nash_exp_2), + ('RPS_non_nash_3', rps, rps_non_nash_3, rps_non_nash_exp_3), + ) + def test_unreg_exploitability_of_non_nash(self, payoff_tensor, dist, exp): + no_op = lambda x: x + exp_pred = exploitability.unreg_exploitability(dist, payoff_tensor, no_op) + equal = np.allclose(exp_pred, exp) + msg = 'exploitability mismatch: pred={}, true={}'.format(exp_pred, exp) + self.assertTrue(equal, msg) + + @parameterized.named_parameters( + ('PD_rand', pd, test_seed), + ('RPS_rand', rps, test_seed), + ('RECT_rand', rect, test_seed), + ) + def test_unreg_exploitability_of_rand(self, payoff_tensor, seed=None): + trials = 100 + random = np.random.RandomState(seed) + num_strategies = payoff_tensor[0].shape + total_num_strategies = sum(num_strategies) + pseudo_dists = random.rand(trials, total_num_strategies) + exploitable = [] + for pseudo_dist in pseudo_dists: + # first split and normalize pseudo_dist into strat for each player + pseudo_dist_i = np.split(pseudo_dist, np.cumsum(num_strategies)[:-1]) + dist = [pdi / pdi.sum() for pdi in pseudo_dist_i] + exp = exploitability.unreg_exploitability(dist, payoff_tensor, np.max) + exploitable.append(exp > 0.) + perc = 100 * np.mean(exploitable) + logging.info('rand strat exploitable rate out of %d is %f', trials, perc) + self.assertEqual(perc, 100., 'found rand strat that was nash') + + @parameterized.named_parameters( + ('RPS_nash_p=0', rps, rps_nash, 0.), + ('RPS_nash_p=0.1', rps, rps_nash, 0.1), + ('RPS_nash_p=1', rps, rps_nash, 1.), + ) + def test_ate_exploitability_of_nash(self, payoff_tensor, nash, p): + exp = exploitability.ate_exploitability(nash, payoff_tensor, p, np.max) + self.assertGreaterEqual(0., exp, + 'uniform nash should have zero exploitability') + + @parameterized.named_parameters( + ('PD_non_nash_p=0', pd, 0., pd_non_nash_1, pd_non_nash_exp_1), + ('PD_non_nash_p=1', pd, 1., pd_non_nash_2, pd_non_nash_ate_exp_2), + ('RECT_non_nash_p=0', rect, 1., rect_unreg_nash, rect_unreg_nash_ate_exp), + ) + def test_ate_exploitability_of_non_nash(self, payoff_tensor, p, dist, exp): + no_op = lambda x: x + exp_pred = exploitability.ate_exploitability(dist, payoff_tensor, p, no_op) + close = np.allclose(exp_pred, exp) + msg = 'exploitability mismatch: pred={}, true={}'.format(exp_pred, exp) + self.assertTrue(close, msg=msg) + + @parameterized.named_parameters( + ('RPS_rand_p=0', rps, 0., test_seed), + ('RPS_rand_p=0.1', rps, 0.1, test_seed), + ('RPS_rand_p=1', rps, 1., test_seed), + ('RECT_rand_p=1', rect, 1., test_seed), + ) + def test_ate_exploitability_of_rand(self, payoff_tensor, p, seed=None): + trials = 100 + random = np.random.RandomState(seed) + num_strategies = payoff_tensor[0].shape + total_num_strategies = sum(num_strategies) + pseudo_dists = random.rand(trials, total_num_strategies) + exploitable = [] + for pseudo_dist in pseudo_dists: + # first split and normalize pseudo_dist into strat for each player + pseudo_dist_i = np.split(pseudo_dist, np.cumsum(num_strategies)[:-1]) + dist = [pdi / pdi.sum() for pdi in pseudo_dist_i] + exp = exploitability.ate_exploitability(dist, payoff_tensor, p, np.max) + exploitable.append(exp > 0.) + perc = 100 * np.mean(exploitable) + logging.info('rand strat exploitable rate out of %d is %f', trials, perc) + self.assertEqual(perc, 100., 'found rand strat that was nash') + + @parameterized.named_parameters( + ('RPS_nash_tau=0', rps, rps_nash, 0.), + ('RPS_nash_tau=0.1', rps, rps_nash, 0.1), + ('RPS_nash_tau=1', rps, rps_nash, 1.), + ) + def test_qre_exploitability_of_nash(self, payoff_tensor, nash, temperature): + exp = exploitability.qre_exploitability(nash, payoff_tensor, temperature, + np.max) + self.assertGreaterEqual(1e-10, exp, + 'uniform nash should have zero exploitability') + + @parameterized.named_parameters( + ('PD_non_nash_tau=0', pd, 0., pd_non_nash_1, pd_non_nash_exp_1), + ('PD_non_nash_tau=1', pd, 1., pd_non_nash_2, pd_non_nash_qre_exp_2), + ('RECT_non_nash_tau=1', rect, 1., rect_unreg_nash, + rect_unreg_nash_qre_exp), + ) + def test_qre_exploitability_of_non_nash(self, payoff_tensor, temperature, + dist, exp): + no_op = lambda x: x + exp_pred = exploitability.qre_exploitability(dist, payoff_tensor, + temperature, no_op) + close = np.allclose(exp_pred, exp) + msg = 'exploitability mismatch: pred={}, true={}'.format(exp_pred, exp) + self.assertTrue(close, msg=msg) + + @parameterized.named_parameters( + ('RPS_rand_tau=0', rps, 0., test_seed), + ('RPS_rand_tau=0.1', rps, 0.1, test_seed), + ('RPS_rand_tau=1', rps, 1., test_seed), + ('RECT_rand_tau=1', rect, 1., test_seed), + ) + def test_qre_exploitability_of_rand(self, payoff_tensor, temperature, + seed=None): + trials = 100 + random = np.random.RandomState(seed) + num_strategies = payoff_tensor[0].shape + total_num_strategies = sum(num_strategies) + pseudo_dists = random.rand(trials, total_num_strategies) + exploitable = [] + for pseudo_dist in pseudo_dists: + # first split and normalize pseudo_dist into strat for each player + pseudo_dist_i = np.split(pseudo_dist, np.cumsum(num_strategies)[:-1]) + dist = [pdi / pdi.sum() for pdi in pseudo_dist_i] + exp = exploitability.qre_exploitability(dist, payoff_tensor, temperature, + np.max) + exploitable.append(exp > 0.) + perc = 100 * np.mean(exploitable) + logging.info('rand strat exploitable rate out of %d is %f', trials, perc) + self.assertEqual(perc, 100., 'found rand strat that was nash') + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/nonsymmetric/game_runner.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/nonsymmetric/game_runner.py new file mode 100644 index 0000000..d88ed19 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/nonsymmetric/game_runner.py @@ -0,0 +1,132 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utils for computing gradient information: run games and record payoffs. +""" + +import itertools + +from absl import logging # pylint:disable=unused-import + +import numpy as np + + +def construct_game_queries(base_profile, num_checkpts): + """Constructs a list of checkpoint selection tuples to query value function. + + Each query tuple (key, query) where key = (pi, pj) and query is + (p1's selected checkpt, ..., p7's selected checkpt) fixes the players in the + game of diplomacy to be played. It may be necessary to play several games with + the same players to form an accurate estimate of the value or payoff for each + player as checkpts contain stochastic policies. + + Args: + base_profile: list of selected checkpts for each player, i.e., + a sample from the player strategy profile ([x_i ~ p(x_i)]) + num_checkpts: list of ints, number of strats (or ckpts) per player + Returns: + Set of query tuples containing a selected checkpoint index for each player. + """ + new_queries = set([]) + + num_players = len(base_profile) + for pi, pj in itertools.combinations(range(num_players), 2): + new_profile = list(base_profile) + for ai in range(num_checkpts[pi]): + new_profile[pi] = ai + for aj in range(num_checkpts[pj]): + new_profile[pj] = aj + query = tuple(new_profile) + pair = (pi, pj) + new_queries.update([(pair, query)]) + + return new_queries + + +def construct_game_queries_for_exp(base_profile, num_checkpts): + """Constructs a list of checkpoint selection tuples to query value function. + + Each query tuple (key, query) where key = (pi,) and query is + (p1's selected checkpt, ..., p7's selected checkpt) fixes the players in the + game of diplomacy to be played. It may be necessary to play several games with + the same players to form an accurate estimate of the value or payoff for each + player as checkpts contain stochastic policies. + + Args: + base_profile: list of selected checkpts for each player, i.e., + a sample from the player strategy profile ([x_i ~ p(x_i)]) + num_checkpts: list of ints, number of strats (or ckpts) per player + Returns: + Set of query tuples containing a selected checkpoint index for each player. + """ + new_queries = set([]) + + num_players = len(base_profile) + for pi in range(num_players): + new_profile = list(base_profile) + for ai in range(num_checkpts[pi]): + new_profile[pi] = ai + query = tuple(new_profile) + new_queries.update([(pi, query)]) + + return new_queries + + +def run_games_and_record_payoffs(game_queries, evaluate_game, ckpt_to_policy): + """Simulate games according to game queries and return results. + + Args: + game_queries: set of tuples containing indices specifying each players strat + key_query = (agent_tuple, profile_tuple) format + evaluate_game: callable function that takes a list of policies as argument + ckpt_to_policy: list of maps from strat (or checkpoint) to a policy, one + map for each player + Returns: + dictionary: key=key_query, value=np.array of payoffs (1 for each player) + """ + game_results = {} + for key_query in game_queries: + _, query = key_query + policies = [ckpt_to_policy[pi][ckpt_i] for pi, ckpt_i in enumerate(query)] + payoffs = evaluate_game(policies) + game_results.update({key_query: payoffs}) + return game_results + + +def form_payoff_matrices(game_results, num_checkpts): + """Packages dictionary of game results into a payoff tensor. + + Args: + game_results: dictionary of payoffs for each game evaluated, keys are + (pair, profile) where pair is a tuple of the two agents played against + each other and profile indicates pure joint action played by all agents + num_checkpts: list of ints, number of strats (or ckpts) per player + Returns: + payoff_matrices: dict of np.arrays (2 x num_checkpts x num_checkpts) with + payoffs for two players. keys are pairs above with lowest index agent + first + """ + num_players = len(num_checkpts) + payoff_matrices = {} + for pi, pj in itertools.combinations(range(num_players), 2): + key = (pi, pj) + payoff_matrices[key] = np.zeros((2, num_checkpts[pi], num_checkpts[pj])) + for key_profile, payoffs in game_results.items(): + key, profile = key_profile + i, j = key + ai = profile[i] + aj = profile[j] + payoff_matrices[key][0, ai, aj] = payoffs[i] + payoff_matrices[key][1, ai, aj] = payoffs[j] + return payoff_matrices diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/nonsymmetric/updates.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/nonsymmetric/updates.py new file mode 100644 index 0000000..ddc8c24 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/nonsymmetric/updates.py @@ -0,0 +1,129 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Generic solver for non-symmetric games.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np +from scipy import special + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric import exploitability + + +class Solver(object): + """Generic Solver.""" + + def __init__(self, proj_grad=True, euclidean=False, rnd_init=False, + seed=None): + """Ctor.""" + self.num_players = None + self.proj_grad = proj_grad + self.rnd_init = rnd_init + self.lrs = (None,) + self.has_aux = False + + self.euclidean = euclidean + if euclidean: + self.update = self.euc_descent_step + else: + self.update = self.mirror_descent_step + + self.seed = seed + self.random = np.random.RandomState(seed) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if len(num_strats) != num_players: + raise ValueError('Must specify num strategies for each player') + init_dist = [] + for num_strats_i in num_strats: + if self.rnd_init: + init_dist_i = self.random.rand(num_strats_i) + else: + init_dist_i = np.ones(num_strats_i) + init_dist_i /= init_dist_i.sum() + init_dist.append(init_dist_i) + return (init_dist,) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients for all parameters. + + Args: + params: e.g., tuple of params (dist,) + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + Returns: + eg., tuple of gradients (grad_dist,) + """ + raise NotImplementedError('Should be implemented by specific solver.') + + def exploitability(self, params, payoff_tensor): + """Compute and return exploitability that solver is minimizing. + + Args: + params: e.g., tuple of params (dist,) + payoff_tensor: (n x A1 x ... x An) np.array, payoffs for each joint + action. can also be list of (A1 x ... x An) np.arrays + Returns: + float, exploitability of current dist + """ + return exploitability.unreg_exploitability(params, payoff_tensor) + + def euc_descent_step(self, params, grads, t, eps=0.): + """Projected gradient descent on exploitability using Euclidean projection. + + Args: + params: tuple of variables to be updated (dist,) + grads: tuple of variable gradients (grad_dist,) + t: int, solver iteration (unused) + eps: float > 0, force all probabilities >= eps / dim(dist) + Returns: + new_params: tuple of update params (new_dist,) + """ + del t + lr_dist = self.lrs[0] + new_params = [] + for dist_i, dist_grad_i in zip(params[0], grads[0]): + new_dist_i = dist_i - lr_dist * dist_grad_i + new_dist_i = simplex.euclidean_projection_onto_simplex(new_dist_i) + if eps > 0: + new_dist_i = simplex.project_to_interior(new_dist_i, eps) + new_params.append(new_dist_i) + return (new_params,) + + def mirror_descent_step(self, params, grads, t, eps=0.): + """Entropic mirror descent on exploitability. + + Args: + params: tuple of variables to be updated (dist - a list of np.arrays) + grads: tuple of variable gradients (grad_dist - a list of np.arrays) + t: int, solver iteration (unused) + eps: float > 0, force all probabilities >= eps / dim(dist) + Returns: + new_params: tuple of update params (new_dist) + """ + del t + lr_dist = self.lrs[0] + new_params = [] + for dist_i, dist_grad_i in zip(params[0], grads[0]): + new_dist_i = np.clip(dist_i, 0, np.inf) + new_dist_i = special.softmax(np.log(new_dist_i) - lr_dist * dist_grad_i) + if eps > 0: + new_dist_i = simplex.project_to_interior(new_dist_i, eps) + new_params.append(new_dist_i) + return (new_params,) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/simplex.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/simplex.py new file mode 100644 index 0000000..079cfcb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/simplex.py @@ -0,0 +1,111 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Treatment of iterates and gradients over the simplex.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np + + +def grad_norm(dist, grad, eps=1e-8, simplex_tol=1e-9): + """Compute norm of gradient projected onto the tangent space of simplex. + + *assumes context is gradient descent (not ascent) + + Args: + dist: np.array, distribution + grad: np.array, gradient (same shape as distribution) + eps: float, elements of dist in [eps, 1 - eps] are considered to be in the + interior of the simplex. gradients on border of simplex + simplex_tol: float, tolerance for checking if a point lies on the simplex, + sum(vec) <= 1 + simplex_tol and all(vec > -simplex_tol). should be smaller + than eps descent steps or points that are "leaving" simplex will be + mislabeled + Returns: + float, norm of projected gradient + """ + if simplex_tol >= eps: + raise ValueError("simplex_tol should be less than eps") + grad_proj = project_grad(grad) + g_norm = np.linalg.norm(grad_proj) + if g_norm > 0: + # take a gradient descent step in the direction grad_proj with len eps + # to determine if the update is "leaving" the simplex + dist -= eps * grad_proj / g_norm + if not ((np.sum(dist) <= 1 + simplex_tol) and np.all(dist >= -simplex_tol)): + g_norm = 0. + return g_norm + + +def project_grad(g): + """Project gradient onto tangent space of simplex.""" + return g - g.sum() / g.size + + +# Project to probability simplex +# Based on this paper: +# Projection onto the probability simplex: An efficient algorithm with a +# simple proof, and an application +# https://arxiv.org/pdf/1309.1541.pdf +def euclidean_projection_onto_simplex(y, eps=1e-3, subset=True): + """O(n log n) Euclidean projection of y onto the simplex. + + Args: + y: np.array + eps: float, ensure x remains at least eps / dim away from facets of simplex + subset: bool, whether to project onto a subset of the simplex defined by eps + Returns: + np.array, y projected onto the simplex + """ + if np.all(y >= 0.) and np.abs(np.sum(y) - 1.) < 1e-8: + return y + d = len(y) + u = sorted(y, reverse=True) + sum_uj = 0. + rho = 0. + for j in range(d): + sum_uj += u[j] + tj = (1. - sum_uj) / (j + 1.) + if u[j] + tj <= 0: + rho = j - 1 + sum_uj = sum_uj - u[j] + break + else: + rho = j + lam = (1. - sum_uj) / (rho + 1.) + x = np.array([max(y[i] + lam, 0.) for i in range(d)]) + if subset: + scale = 1. - eps * float(d + 1) / d + offset = eps / float(d) + x = scale * x + offset + x /= x.sum() + return x + + +def project_to_interior(x, eps): + """Project x onto interior of simplex. + + Args: + x: np.array of shape (dim,) + eps: float, ensure x remains at least eps / dim away from facets of simplex + Returns: + np.array, distribution x with min(x) >= eps / dim + """ + min_x = np.min(x) + d = len(x) + if min_x < eps / d: + t = (eps / d - min_x) / (1. / d - min_x) + x = x * (1 - t) + 1 / d * t + return x diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/simplex_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/simplex_test.py new file mode 100644 index 0000000..b9a15a4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/simplex_test.py @@ -0,0 +1,60 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.adidas_utils.helpers.simplex.""" + +from absl.testing import absltest +from absl.testing import parameterized + +import numpy as np + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex + + +class SimplexTest(parameterized.TestCase): + + @parameterized.named_parameters( + ('inside', np.array([.25, .75]), np.array([.25, .75])), + ('outside_1', np.ones(2), 0.5 * np.ones(2)), + ('outside_2', np.array([2., 0.]), np.array([1., 0.])), + ('outside_3', np.array([.25, .25]), np.array([.5, .5])), + ) + def test_euclidean_projection(self, vector, expected_projection): + projection = simplex.euclidean_projection_onto_simplex(vector, subset=False) + self.assertListEqual(list(projection), list(expected_projection), + msg='projection not accurate') + + @parameterized.named_parameters( + ('orth', np.array([.75, .75]), np.array([.0, .0])), + ('oblique', np.array([1., .5]), np.array([.25, -.25])), + ('tangent', np.array([.25, .25, -.5]), np.array([.25, .25, -.5])), + ) + def test_tangent_projection(self, vector, expected_projection): + projection = simplex.project_grad(vector) + self.assertListEqual(list(projection), list(expected_projection), + msg='projection not accurate') + + @parameterized.named_parameters( + ('orth_1', np.array([0.5, 0.5]), np.array([.75, .75]), 0.), + ('orth_2', np.array([1., 0.]), np.array([.75, .75]), 0.), + ('tangent_1', np.array([1., 0.]), np.array([-.5, .5]), 0.), + ('tangent_2', np.array([1., 0.]), np.array([1., -1.]), np.sqrt(2)), + ) + def test_grad_norm(self, dist, grad, expected_norm): + norm = simplex.grad_norm(dist, grad) + self.assertAlmostEqual(norm, expected_norm, msg='norm not accurate') + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/__init__.py new file mode 100644 index 0000000..a1223b9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/exploitability.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/exploitability.py new file mode 100644 index 0000000..5badffc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/exploitability.py @@ -0,0 +1,127 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Exploitability measurement utils for symmetric games.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np +from scipy import special + +from open_spiel.python.algorithms.adidas_utils.helpers import misc + + +def unreg_exploitability(dist, payoff_tensor): + """Compute exploitability of dist for symmetric game. + + Args: + dist: 1-d np.array, current estimate of nash distribution + payoff_tensor: (>=1 x A x ... x A) np.array, payoffs for each joint action + Returns: + exploitability (float): payoff of best response - payoff of dist + """ + num_players = payoff_tensor.shape[0] + nabla = misc.pt_reduce(payoff_tensor[0], [dist] * num_players, [0]) + + u_br = np.max(nabla) + u_dist = nabla.dot(dist) + + return u_br - u_dist + + +def ate_exploitability(dist, payoff_tensor, p=1): + """Compute Tsallis regularized exploitability of dist for symmetric game. + + Args: + dist: 1-d np.array, current estimate of nash distribution + payoff_tensor: (>=1 x A x ... x A) np.array, payoffs for each joint action + assumed to be non-negative + p: float in [0, 1], Tsallis entropy-regularization --> 0 as p --> 0 + Returns: + exploitability (float): payoff of best response - payoff of dist + """ + if payoff_tensor.min() < 0.: + raise ValueError('payoff tensor must be non-negative') + num_players = payoff_tensor.shape[0] + nabla = misc.pt_reduce(payoff_tensor[0], [dist] * num_players, [0]) + if p > 0: + power = 1./p + s = np.linalg.norm(nabla, ord=power) + br = (nabla / np.linalg.norm(nabla, ord=power))**power + else: + power = np.inf + s = np.linalg.norm(nabla, ord=power) + br = np.zeros_like(dist) + maxima = (nabla == s) + br[maxima] = 1. / maxima.sum() + + u_br = nabla.dot(br) + s / (p + 1) * (1 - np.sum(br**(p + 1))) + u_dist = nabla.dot(dist) + s / (p + 1) * (1 - np.sum(dist**(p + 1))) + + return u_br - u_dist + + +def qre_exploitability(dist, payoff_tensor, temperature=0.): + """Compute Shannon regularized exploitability of dist for symmetric game. + + Args: + dist: 1-d np.array, current estimate of nash distribution + payoff_tensor: (>=1 x A x ... x A) np.array, payoffs for each joint action + assumed to be non-negative + temperature: non-negative float + Returns: + exploitability (float): payoff of best response - payoff of dist + """ + num_players = payoff_tensor.shape[0] + nabla = misc.pt_reduce(payoff_tensor[0], [dist] * num_players, [0]) + if temperature > 0: + br = special.softmax(nabla / temperature) + else: + br = np.zeros_like(dist) + maxima = (nabla == np.max(nabla)) + br[maxima] = 1. / maxima.sum() + + u_br = nabla.dot(br) + temperature * special.entr(br).sum() + u_dist = nabla.dot(dist) + temperature * special.entr(dist).sum() + + return u_br - u_dist + + +def grad_norm_exploitability(dist, payoff_tensor, eta=None, temperature=0.): + """Compute (avg, max) exploitability of dist for non-symmetric game. + + Args: + dist: 1-d np.array, current estimate of nash distribution + payoff_tensor: (>=1 x A x ... x A) np.array, payoffs for each joint action + assumed to be non-negative + eta: step size for approximate best response (default 1 / (n * m)) + where n is # of players and m is # of actions (same for all players) + temperature: non-negative float + Returns: + exploitability (float): squared norm of projected-gradient + """ + + if eta is None: + eta = 1. / dist.size + + num_players = payoff_tensor.shape[0] + nabla = misc.pt_reduce(payoff_tensor[0], [dist] * num_players, [0]) + if temperature > 0: + nabla -= temperature * (np.log(dist) + 1) + + m = dist.size + nabla_proj = nabla - 1. / m * np.sum(nabla) + nabla_sq_norm = np.inner(nabla_proj, nabla_proj) + + return eta * nabla_sq_norm diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/exploitability_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/exploitability_test.py new file mode 100644 index 0000000..9a5cab7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/exploitability_test.py @@ -0,0 +1,215 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.adidas_utils.helpers.symmetric.exploitability. + +Computing the exploitability of a tsallis-entropy regularized game is more +involved, so we include a derivation here of an example test case using the +prisoner's dilemma (see pd np.array below). Note that the tsallis-entropy +setting assumes non-negative payoffs so we add 3 to the array. We assume p=1 +for the tsallis entropy in this example. + +pd dist grad br payoff(br) payoff(dist) +[2 0] [.5] = [1] --> [1/3] --> 5/3 --> 3/2 +[3 1] [.5] [2] [2/3] + +s = sum(grad) = 3 + +tsallis-entr(br) = s / (p + 1) * (1 - br_1^2 - br_2^2) + = 3 / 2 * (1 - 1/9 - 4/9) = 2/3 + +tsallis-entr(dist) = s / (p + 1) * (1 - dist_1^2 - dist_2^2) + = 3 / 2 * (1 - 1/4 - 1/4) = 3/4 + +u(br) - u(dist) = 5/3 + 2/3 - 3/2 - 3/4 = 7 / 3 - 9 / 4 +""" + +from absl import logging # pylint:disable=unused-import +from absl.testing import absltest +from absl.testing import parameterized + +import numpy as np + +from open_spiel.python.algorithms.adidas_utils.helpers.symmetric import exploitability + + +test_seed = 12345 + +# prisoner's dilemma test case +# pylint:disable=bad-whitespace +pt_r = np.array([[-1, -3], + [0, -2]]) +# pylint:enable=bad-whitespace +pt_r -= pt_r.min() +pt_c = pt_r.T +pd = np.stack((pt_r, pt_c), axis=0) +pd_nash = np.array([0, 1]) +pd_non_nash_1 = np.array([1, 0]) +pd_non_nash_exp_1 = 1. +pd_non_nash_ate_exp_1 = pd_non_nash_exp_1 +pd_non_nash_2 = np.array([0.5, 0.5]) +pd_non_nash_exp_2 = 0.5 +pd_non_nash_ate_exp_2 = 7. / 3. - 9. / 4. + +qre_br = np.exp([1, 2]) / np.exp([1, 2]).sum() +entr_br = -np.sum(qre_br * np.log(qre_br)) +entr_non_nash_2 = -np.sum(pd_non_nash_2 * np.log(pd_non_nash_2)) +u_br_minus_non_nash = (qre_br - pd_non_nash_2).dot([1, 2]) +pd_non_nash_qre_exp_2 = u_br_minus_non_nash + (entr_br - entr_non_nash_2) + +# rock-paper-scissors test case +# pylint:disable=bad-whitespace +pt_r = np.array([[0, -1, 1], + [1, 0, -1], + [-1, 1, 0]]) +# pylint:enable=bad-whitespace +pt_r -= pt_r.min() +pt_c = pt_r.T +rps = np.stack((pt_r, pt_c), axis=0) +rps_nash = np.ones(3) / 3. +rps_non_nash_1 = np.array([1, 0, 0]) +rps_non_nash_exp_1 = 1. +rps_non_nash_2 = np.array([0, 1, 0]) +rps_non_nash_exp_2 = 1. +rps_non_nash_3 = np.array([0, 0, 1]) +rps_non_nash_exp_3 = 1. + + +class ExploitabilityTest(parameterized.TestCase): + + @parameterized.named_parameters( + ('PD_nash', pd, pd_nash), + ('RPS_nash', rps, rps_nash), + ) + def test_unreg_exploitability_of_nash(self, payoff_tensor, nash): + # assumes symmetric games + exp = exploitability.unreg_exploitability(nash, payoff_tensor) + self.assertEqual(exp, 0., 'nash should have zero exploitability') + + @parameterized.named_parameters( + ('PD_non_nash_1', pd, pd_non_nash_1, pd_non_nash_exp_1), + ('PD_non_nash_2', pd, pd_non_nash_2, pd_non_nash_exp_2), + ('RPS_non_nash_1', rps, rps_non_nash_1, rps_non_nash_exp_1), + ('RPS_non_nash_2', rps, rps_non_nash_2, rps_non_nash_exp_2), + ('RPS_non_nash_3', rps, rps_non_nash_3, rps_non_nash_exp_3), + ) + def test_unreg_exploitability_of_non_nash(self, payoff_tensor, dist, exp): + # assumes symmetric games + exp_pred = exploitability.unreg_exploitability(dist, payoff_tensor) + self.assertEqual(exp_pred, exp, 'dist should have the given exploitability') + + @parameterized.named_parameters( + ('PD_rand', pd, test_seed), + ('RPS_rand', rps, test_seed), + ) + def test_unreg_exploitability_of_rand(self, payoff_tensor, seed=None): + trials = 100 + random = np.random.RandomState(seed) + num_strategies = payoff_tensor.shape[-1] + dists = random.rand(trials, num_strategies) + dists /= np.sum(dists, axis=1, keepdims=True) + exploitable = [] + for dist in dists: + exp = exploitability.unreg_exploitability(dist, payoff_tensor) + exploitable.append(exp > 0.) + perc = 100 * np.mean(exploitable) + logging.info('rand strat exploitable rate out of %d is %f', trials, perc) + self.assertEqual(perc, 100., 'found rand strat that was nash') + + @parameterized.named_parameters( + ('RPS_nash_p=0', rps, rps_nash, 0.), + ('RPS_nash_p=0.1', rps, rps_nash, 0.1), + ('RPS_nash_p=1', rps, rps_nash, 1.), + ) + def test_ate_exploitability_of_nash(self, payoff_tensor, nash, p): + # assumes symmetric games + exp = exploitability.ate_exploitability(nash, payoff_tensor, p) + self.assertGreaterEqual(0., exp, + 'uniform nash should have zero exploitability') + + @parameterized.named_parameters( + ('PD_non_nash_p=0', pd, 0., pd_non_nash_1, pd_non_nash_exp_1), + ('PD_non_nash_p=1', pd, 1., pd_non_nash_2, pd_non_nash_ate_exp_2), + ) + def test_ate_exploitability_of_non_nash(self, payoff_tensor, p, dist, exp): + # assumes symmetric games + exp_pred = exploitability.ate_exploitability(dist, payoff_tensor, p) + self.assertAlmostEqual(exp_pred, exp, + msg='dist should have the given exploitability') + + @parameterized.named_parameters( + ('RPS_rand_p=0', rps, 0., test_seed), + ('RPS_rand_p=0.1', rps, 0.1, test_seed), + ('RPS_rand_p=1', rps, 1., test_seed), + ) + def test_ate_exploitability_of_rand(self, payoff_tensor, p, seed=None): + trials = 100 + random = np.random.RandomState(seed) + num_strategies = payoff_tensor.shape[-1] + dists = random.rand(trials, num_strategies) + dists /= np.sum(dists, axis=1, keepdims=True) + exploitable = [] + for dist in dists: + exp = exploitability.ate_exploitability(dist, payoff_tensor, p) + exploitable.append(exp > 0.) + perc = 100 * np.mean(exploitable) + logging.info('rand strat exploitable rate out of %d is %f', trials, perc) + self.assertEqual(perc, 100., 'found rand strat that was nash') + + @parameterized.named_parameters( + ('RPS_nash_tau=0', rps, rps_nash, 0.), + ('RPS_nash_tau=0.1', rps, rps_nash, 0.1), + ('RPS_nash_tau=1', rps, rps_nash, 1.), + ) + def test_qre_exploitability_of_nash(self, payoff_tensor, nash, temperature): + # assumes symmetric games + exp = exploitability.qre_exploitability(nash, payoff_tensor, temperature) + self.assertGreaterEqual(1e-10, exp, + 'uniform nash should have zero exploitability') + + @parameterized.named_parameters( + ('PD_non_nash_tau=0', pd, 0., pd_non_nash_1, pd_non_nash_exp_1), + ('PD_non_nash_tau=1', pd, 1., pd_non_nash_2, pd_non_nash_qre_exp_2), + ) + def test_qre_exploitability_of_non_nash(self, payoff_tensor, temperature, + dist, exp): + # assumes symmetric games + exp_pred = exploitability.qre_exploitability(dist, payoff_tensor, + temperature) + self.assertAlmostEqual(exp_pred, exp, + msg='dist should have the given exploitability') + + @parameterized.named_parameters( + ('RPS_rand_tau=0', rps, 0., test_seed), + ('RPS_rand_tau=0.1', rps, 0.1, test_seed), + ('RPS_rand_tau=1', rps, 1., test_seed), + ) + def test_qre_exploitability_of_rand(self, payoff_tensor, temperature, + seed=None): + trials = 100 + random = np.random.RandomState(seed) + num_strategies = payoff_tensor.shape[-1] + dists = random.rand(trials, num_strategies) + dists /= np.sum(dists, axis=1, keepdims=True) + exploitable = [] + for dist in dists: + exp = exploitability.qre_exploitability(dist, payoff_tensor, temperature) + exploitable.append(exp > 0.) + perc = 100 * np.mean(exploitable) + logging.info('rand strat exploitable rate out of %d is %f', trials, perc) + self.assertEqual(perc, 100., 'found rand strat that was nash') + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/game_runner.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/game_runner.py new file mode 100644 index 0000000..7a8308d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/game_runner.py @@ -0,0 +1,112 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utils for computing gradient information: run games and record payoffs. +""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np + + +def construct_game_queries(base_profile, num_checkpts): + """Constructs a list of checkpoint selection tuples to query value function. + + Each query tuple (p1's selected checkpt, ..., p7's selected checkpt) + fixes the players in the game of diplomacy to be played. It may be necessary + to play several games with the same players to form an accurate estimate of + the value or payoff for each player as checkpts contain stochastic policies. + + Args: + base_profile: list of selected checkpts for each player, i.e., + a sample from the player strategy profile ([x_i ~ p(x_i)]) + num_checkpts: number of checkpts available to each player + Returns: + Set of query tuples containing a selected checkpoint index for each player. + """ + new_queries = set([]) + + pi, pj = 0, 1 + new_profile = list(base_profile) + for ai in range(num_checkpts): + new_profile[pi] = ai + for aj in range(num_checkpts): + new_profile[pj] = aj + query = tuple(new_profile) + new_queries.update([query]) + + return new_queries + + +def construct_game_queries_for_exp(base_profile, num_checkpts): + """Constructs a list of checkpoint selection tuples to query value function. + + Each query tuple (p1's selected checkpt, ..., p7's selected checkpt) + fixes the players in the game of diplomacy to be played. It may be necessary + to play several games with the same players to form an accurate estimate of + the value or payoff for each player as checkpts contain stochastic policies. + + Args: + base_profile: list of selected checkpts for each player, i.e., + a sample from the player strategy profile ([x_i ~ p(x_i)]) + num_checkpts: number of checkpts available to each player + Returns: + Set of query tuples containing a selected checkpoint index for each player. + """ + new_queries = set([]) + + pi = 0 + new_profile = list(base_profile) + for ai in range(num_checkpts): + new_profile[pi] = ai + query = tuple(new_profile) + new_queries.update([query]) + + return new_queries + + +def run_games_and_record_payoffs(game_queries, evaluate_game, ckpt_to_policy): + """Simulate games according to game queries and return results. + + Args: + game_queries: set of tuples containing indices specifying each players strat + evaluate_game: callable function that takes a list of policies as argument + ckpt_to_policy: maps a strat (or checkpoint) to a policy + Returns: + dictionary: key=query, value=np.array of payoffs (1 for each player) + """ + game_results = {} + for query in game_queries: + policies = [ckpt_to_policy[ckpt] for ckpt in query] + payoffs = evaluate_game(policies) + game_results.update({query: payoffs}) + return game_results + + +def form_payoff_matrices(game_results, num_checkpts): + """Packages dictionary of game results into a payoff tensor. + + Args: + game_results: dictionary of payoffs for each game evaluated + num_checkpts: int, number of strats (or ckpts) per player + Returns: + payoff_matrices: np.array (2 x num_checkpts x num_checkpts) with payoffs for + two players (assumes symmetric game and only info for 2 players is needed + for stochastic gradients) + """ + payoff_matrices = np.zeros((2, num_checkpts, num_checkpts)) + for profile, payoffs in game_results.items(): + i, j = profile[:2] + payoff_matrices[:, i, j] = payoffs[:2] + return payoff_matrices diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/updates.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/updates.py new file mode 100644 index 0000000..8620548 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/updates.py @@ -0,0 +1,113 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Exploitability measurement utils.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np +from scipy import special + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.symmetric import exploitability + + +class Solver(object): + """Generic Solver.""" + + def __init__(self, proj_grad=True, euclidean=False, rnd_init=False, + seed=None): + """Ctor.""" + self.num_players = None + self.proj_grad = proj_grad + self.rnd_init = rnd_init + self.lrs = (None, None, None) + self.has_aux = False + + self.euclidean = euclidean + if euclidean: + self.update = self.euc_descent_step + else: + self.update = self.mirror_descent_step + + self.seed = seed + self.random = np.random.RandomState(seed) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if self.rnd_init: + init_dist = self.random.rand(num_strats) + else: + init_dist = np.ones(num_strats) + init_dist /= init_dist.sum() + return (init_dist,) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients for all parameters. + + Args: + params: e.g., tuple of params (dist,) + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + eg., tuple of gradients (grad_dist,) + """ + raise NotImplementedError("Should be implemented by specific solver.") + + def exploitability(self, params, payoff_matrices): + """Compute and return exploitability that solver is minimizing. + + Args: + params: e.g., tuple of params (dist,) + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability of current dist + """ + return exploitability.unreg_exploitability(params, payoff_matrices) + + def euc_descent_step(self, params, grads, t, eps=0.): + """Projected gradient descent on exploitability using Euclidean projection. + + Args: + params: tuple of variables to be updated (dist,) + grads: tuple of variable gradients (grad_dist,) + t: int, solver iteration + eps: float > 0, force all probabilities >= eps / dim(dist) + Returns: + new_params: tuple of update params (new_dist,) + """ + del t + new_dist = params[0] - self.lrs[0] * grads[0] + new_dist = simplex.euclidean_projection_onto_simplex(new_dist) + if eps > 0: + new_dist = simplex.project_to_interior(new_dist, eps) + return (new_dist,) + + def mirror_descent_step(self, params, grads, t, eps=0.): + """Entropic mirror descent on exploitability. + + Args: + params: tuple of variables to be updated (dist) + grads: tuple of variable gradients (grad_dist) + t: int, solver iteration + eps: float > 0, force all probabilities >= eps / dim(dist) + Returns: + new_params: tuple of update params (new_dist) + """ + del t + dist = np.clip(params[0], 0, np.inf) + new_dist = special.softmax(np.log(dist) - self.lrs[0] * grads[0]) + if eps > 0: + new_dist = simplex.project_to_interior(new_dist, eps) + return (new_dist,) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/utils.py new file mode 100644 index 0000000..be2b7e8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/utils.py @@ -0,0 +1,52 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Symmetric payoff tensor utils.""" + +import itertools +import math + +from absl import logging # pylint:disable=unused-import + +import numpy as np + + +def sym(pt): + """Symmetrize stack of payoff tensors (stacked along first dimension). + + A payoff tensor can be `symmetrized' by averaging over all possible + permutations of the players. This means permuting the axes corresponding to + the player strategies as well as the payoffs assigned to the players. E.g., + player A playing strategy 1 and player B playing strategy 3 is no different + from player A playing strategy 3 and player B playing strategy 1 in a + symmetric game. Note we permuted the strategies, but we must also permute the + payoffs. + + Args: + pt: tensor of shape: (num_players,) + (num_strategies,) * num_players + Returns: + pt_sym: symmetrized payoff tensor of same shape + """ + num_players = len(pt.shape[1:]) + num_perms = math.factorial(num_players) + pt_sym = np.zeros_like(pt) + logging.info('Symmetrizing over {:d} permutations...'.format(num_perms)) + for i, perm_players in enumerate(itertools.permutations(range(num_players))): + if (i % (num_perms // 5)) == 0: + logging.info('\t{:d} / {:d}'.format(i, num_perms)) + perm_axes = tuple([pi + 1 for pi in perm_players]) + permuted_tensor = np.transpose(pt, (0,) + perm_axes)[list(perm_players)] + pt_sym += permuted_tensor / float(num_perms) + logging.info('\t{total:d} / {total:d}'.format(total=num_perms)) + return pt_sym diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/utils_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/utils_test.py new file mode 100644 index 0000000..8135251 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/helpers/symmetric/utils_test.py @@ -0,0 +1,70 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.adidas_utils.helpers.symmetric.utils.""" + +from absl import logging # pylint:disable=unused-import +from absl.testing import absltest +from absl.testing import parameterized + +import numpy as np + +from open_spiel.python.algorithms.adidas_utils.helpers.symmetric import utils + + +class UtilsTest(parameterized.TestCase): + + def test_symmetrize_tensor(self, trials=100, seed=1234): + random = np.random.RandomState(seed) + + successes = [] + for _ in range(trials): + pt = random.rand(3, 2, 2, 2) + + pt_sym_man = np.zeros_like(pt) + for p in range(3): + for i in range(2): + for j in range(2): + for k in range(2): + if p == 0: + # read: if player 0 plays i and its two opponents play j and k + # this should return the same payoff as when + # player 1 plays i and its two opponents play j and k + # player 2 plays i and its two opponents play j and k + # solution is to add up all these payoffs and replace with avg + pt_sym_man[p, i, j, k] = (pt[0, i, j, k] + pt[0, i, k, j] + + pt[1, j, i, k] + pt[1, k, i, j] + + pt[2, j, k, i] + pt[2, k, j, i]) / 6. + elif p == 1: + # same rationale, but with player 1 playing j + pt_sym_man[p, i, j, k] = (pt[0, j, i, k] + pt[0, j, k, i] + + pt[1, i, j, k] + pt[1, k, j, i] + + pt[2, i, k, j] + pt[2, k, i, j]) / 6. + else: + # same rationale, but with player 2 playing k + pt_sym_man[p, i, j, k] = (pt[0, k, i, j] + pt[0, k, j, i] + + pt[1, i, k, j] + pt[1, j, k, i] + + pt[2, i, j, k] + pt[2, j, i, k]) / 6. + pt_sym = utils.sym(pt) + + successes += [np.allclose(pt_sym, pt_sym_man)] + + perc = 100 * np.mean(successes) + logging.info('symmetrizing success rate out of %d is %f', trials, perc) + self.assertGreaterEqual( + perc, 100., 'symmetrizing failed') + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/__init__.py new file mode 100644 index 0000000..a1223b9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/__init__.py new file mode 100644 index 0000000..a1223b9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/adam.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/adam.py new file mode 100644 index 0000000..36eaa9b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/adam.py @@ -0,0 +1,243 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Stochastic Gradient Descent (Adam) Approx. Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import jax +import jax.numpy as jnp + +import numpy as np + +import optax + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric import exploitability as exp +from open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric import updates + + +class Solver(updates.Solver): + """Adam Solver.""" + + def __init__(self, temperature=0., proj_grad=True, euclidean=False, + lrs=(1e-1,), rnd_init=False, seed=None, **kwargs): + """Ctor.""" + del kwargs + super().__init__(proj_grad, euclidean, rnd_init, seed) + if temperature < 0.: + raise ValueError('temperature must be non-negative') + self.temperature = temperature + self.lrs = lrs + self.num_estimates = 2 + + if temperature > 0: + self.eps = np.exp(-1 / temperature) # ensure dist[i] >= eps / dim(dist) + else: + self.eps = 0. + self.update = lambda *args: self.descent_step(*args, eps=self.eps) + + self.opt = optax.adam(learning_rate=lrs[0]) + self.opt_state = self.opt.init(jnp.zeros(1)) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if len(num_strats) != num_players: + raise ValueError('Must specify num strategies for each player') + init_dist = [] + for num_strats_i in num_strats: + if self.rnd_init: + init_dist_i = self.random.rand(num_strats_i) + else: + init_dist_i = np.ones(num_strats_i) + init_dist_i /= init_dist_i.sum() + init_dist_i = simplex.project_to_interior(init_dist_i, self.eps) + init_dist.append(init_dist_i) + + init_params = [ + jnp.array(dist_to_logits(init_dist_i)) for init_dist_i in init_dist + ] + + self.opt_state = self.opt.init(init_params) + + return (init_dist,) + + def descent_step(self, params, grads, t, eps=0.): + """Projected gradient descent on exploitability using Euclidean projection. + + Args: + params: tuple of variables to be updated (dist,) + grads: tuple of variable gradients (grad_dist,) + t: int, solver iteration (unused) + eps: float > 0, force all probabilities >= eps / dim(dist) + Returns: + new_params: tuple of update params (new_dist,) + """ + del t + del eps + + dist = params[0] + grads_dist = grads[0] + + dist_jnp = [jnp.array(dist_i) for dist_i in dist] + grads_dist_jnp = [jnp.array(grad_i) for grad_i in grads_dist] + + # map dist to logits and grads to grad_logits using jacobian + logits = [dist_to_logits(dist_i) for dist_i in params[0]] + grads_logits = [ + jax.jvp(dist_to_logits, [dist_i], [grads_i])[1] + for dist_i, grads_i in zip(dist_jnp, grads_dist_jnp) + ] + + opt_updates, self.opt_state = self.opt.update(grads_logits, + self.opt_state, + logits) + + new_logits = optax.apply_updates(logits, opt_updates) + + new_dist = [logits_to_dist(logits) for logits in new_logits] + new_dist = [np.array(dist_i) for dist_i in new_dist] + + return (new_dist,) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return exploitability. + + Args: + params: tuple of params (dist,), see sgd.gradients + payoff_matrices: 2 dictionaries with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + Returns: + float, exploitability of current dist + unregularized exploitability (stochastic estimate) + shannon regularized exploitability (stochastic estimate) + """ + return gradients(*params, payoff_matrices, self.num_players, + self.temperature, self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Compute and return exploitability. + + Args: + params: tuple of params (dist,), see sgd.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability as avg squared norm of projected-gradient + """ + return exp.grad_norm_exploitability(params, payoff_matrices, eta=1., + temperature=self.temperature) + + +def logits_to_dist(logits): + logits_ext = jnp.append(logits, 0.) + payoff = jax.nn.softmax(logits_ext) + return payoff + + +def dist_to_logits(dist): + # dist[-1] = exp(logits[-1]) / Z = exp(0) / Z + z = 1 / dist[-1] + logits = jnp.log(dist[:-1] * z) + return logits + + +def gradients(dist, payoff_matrices, num_players, temperature=0., + proj_grad=True): + """Computes exploitablity gradient. + + Args: + dist: list of 1-d np.arrays, current estimate of nash distribution + payoff_matrices: 2 dictionaries with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + num_players: int, number of players, in case payoff_matrices is abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist) as tuple + unregularized exploitability (stochastic estimate) + shannon regularized exploitability (stochastic estimate) + """ + # first compute projected gradients (for every player, for each sample a & b) + # if consulting paper https://arxiv.org/abs/2310.06689, code assumes eta_k = 1 + tau = temperature + + pgs = [] + for i in range(num_players): + + pg_i_a = np.zeros_like(dist[i]) + pg_i_b = np.zeros_like(dist[i]) + + for j in range(num_players): + if j == i: + continue + if i < j: + hess_i_ij_a = payoff_matrices[0][(i, j)][0] + hess_i_ij_b = payoff_matrices[1][(i, j)][0] + else: + hess_i_ij_a = payoff_matrices[0][(j, i)][1].T + hess_i_ij_b = payoff_matrices[1][(j, i)][1].T + + pg_i_a_est = simplex.project_grad(hess_i_ij_a.dot(dist[j])) + pg_i_b_est = simplex.project_grad(hess_i_ij_b.dot(dist[j])) + + pg_i_a += pg_i_a_est / float(num_players - 1) + pg_i_b += pg_i_b_est / float(num_players - 1) + + pgs.append((pg_i_a, pg_i_b)) + + # then construct unbiased stochastic gradient + grad_dist = [] + unreg_exp = [] + reg_exp = [] + + for i in range(num_players): + + grad_dist_i = np.zeros_like(dist[i]) + + for j in range(num_players): + pg_j_a = pgs[j][0] + pg_j_b = pgs[j][1] + if tau > 0.: + log_dist_safe = np.clip(np.log(dist[j]), -40, 0) + entr_grad_proj = simplex.project_grad(-tau * (log_dist_safe + 1)) + else: + entr_grad_proj = 0. + pg_j_a_entr = pg_j_a + entr_grad_proj + pg_j_b_entr = pg_j_b + entr_grad_proj + + if j == i: + if tau > 0.: + hess_j_ij_a = -tau * np.diag(1. / dist[j]) + else: + hess_j_ij_a = np.diag(np.zeros_like(dist[j])) + unreg_exp_i = np.dot(pg_j_a, pg_j_b) + reg_exp_i = np.dot(pg_j_a_entr, pg_j_b_entr) + unreg_exp.append(unreg_exp_i) + reg_exp.append(reg_exp_i) + elif i < j: + hess_j_ij_a = payoff_matrices[0][(i, j)][1] + else: + hess_j_ij_a = payoff_matrices[0][(j, i)][0].T + + grad_dist_i += 2. * hess_j_ij_a.dot(pg_j_b_entr) + + if proj_grad: + grad_dist_i = simplex.project_grad(grad_dist_i) + + grad_dist.append(grad_dist_i) + + return (grad_dist,), np.mean(unreg_exp), np.mean(reg_exp) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/adam_anneal.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/adam_anneal.py new file mode 100644 index 0000000..669eeea --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/adam_anneal.py @@ -0,0 +1,346 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Stochastic Gradient Descent (Adam) Approx. Nash Solver w/ Annealing.""" + +from absl import logging # pylint:disable=unused-import + +import jax +import jax.numpy as jnp + +import numpy as np + +import optax + +from scipy import special + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric import exploitability as exp + + +class Solver(object): + """Adam Solver with temperature annealing.""" + + def __init__(self, temperature=1., proj_grad=True, lrs=(1e-2, 1e-1), + exp_thresh=-1., rnd_init=False, seed=None, **kwargs): + """Ctor.""" + del kwargs + if temperature < 0.: + raise ValueError("temperature must be non-negative") + self.num_players = None + self.temperature = temperature + self.proj_grad = proj_grad + self.rnd_init = rnd_init + self.lrs = lrs + self.num_estimates = 2 + self.exp_thresh = exp_thresh + self.has_aux = True + self.aux_errors = [] + + self.update = self.descent_step + + self.opt = optax.adam(learning_rate=lrs[0]) + self.opt_state = self.opt.init(jnp.zeros(1)) + + self.seed = seed + self.random = np.random.RandomState(seed) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if len(num_strats) != num_players: + raise ValueError("Must specify num strategies for each player") + + init_dist = [] + for num_strats_i in num_strats: + if self.rnd_init: + init_dist_i = self.random.rand(num_strats_i) + else: + init_dist_i = np.ones(num_strats_i) + init_dist_i /= init_dist_i.sum() + init_dist.append(init_dist_i) + + init_params = [ + jnp.array(dist_to_logits(init_dist_i)) for init_dist_i in init_dist + ] + + self.opt_state = self.opt.init(init_params) + + init_y = [np.zeros_like(dist_i) for dist_i in init_dist] + init_anneal_steps = 0 + + return (init_dist, init_y, init_anneal_steps) + + def record_aux_errors(self, grads): + """Record errors for the auxiliary variables.""" + grad_y = grads[1] + # call ravel in case use y to track entire payoff matrices in future + grad_y_flat = np.concatenate([np.ravel(g) for g in grad_y]) + self.aux_errors.append([np.linalg.norm(grad_y_flat)]) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients (and exploitabilities) for all parameters. + + Args: + params: tuple of params (dist, y, anneal_steps), see gradients + payoff_matrices: 2 dictionaries with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + Returns: + tuple of gradients (grad_dist, grad_y, grad_anneal_steps), see gradients + unregularized exploitability (stochastic estimate) + shannon entropy regularized exploitability (stochastic estimate) + """ + return self.gradients(*params, payoff_matrices, self.num_players, + self.temperature, self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Compute and return shannon entropy regularized exploitability. + + Args: + params: tuple of params (dist, y), see qre.gradients + payoff_matrices: 2 dictionaries with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + Returns: + float, exploitability of current dist + """ + return exp.qre_exploitability(params, payoff_matrices, self.temperature) + + def gradients(self, dist: np.ndarray, y: np.ndarray, anneal_steps: int, + payoff_matrices, num_players, + temperature=0., proj_grad=True + ) -> tuple[tuple[list[np.ndarray], list[np.ndarray], int], + float, + float]: + """Computes exploitablity gradient and aux variable gradients. + + Args: + dist: list of 1-d np.arrays, current estimate of nash distribution + y: list 1-d np.arrays (same shape as dist), current est. of payoff + gradient + anneal_steps: int, elapsed num steps since last anneal + payoff_matrices: 2 dictionaries with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + num_players: int, number of players, in case payoff_matrices is + abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y, anneal_steps) as tuple + unregularized exploitability (stochastic estimate) + shannon entropy regularized exploitability (stochastic estimate) + """ + + grad_dist = loss_gradients(dist, payoff_matrices, num_players, temperature, + proj_grad)[0][0] + + grad_y = [] + unreg_exp = [] + reg_exp = [] + for i in range(num_players): + + nabla_i = np.zeros_like(dist[i]) + for j in range(num_players): + if j == i: + continue + if i < j: + hess_i_ij = 0.5 * payoff_matrices[0][(i, j)][0] + hess_i_ij += 0.5 * payoff_matrices[1][(i, j)][0] + else: + hess_i_ij = 0.5 * payoff_matrices[0][(j, i)][1].T + hess_i_ij += 0.5 * payoff_matrices[1][(j, i)][1].T + + nabla_ij = hess_i_ij.dot(dist[j]) + nabla_i += nabla_ij / float(num_players - 1) + + grad_y.append(y[i] - nabla_i) + + if temperature >= 1e-3: + br_i = special.softmax(y[i] / temperature) + else: + power = np.inf + s_i = np.linalg.norm(y[i], ord=power) + br_i = np.zeros_like(dist[i]) + maxima_i = (y[i] == s_i) + br_i[maxima_i] = 1. / maxima_i.sum() + + unreg_exp.append(np.max(y[i]) - y[i].dot(dist[i])) + + entr_br_i = temperature * special.entr(br_i).sum() + entr_dist_i = temperature * special.entr(dist[i]).sum() + + reg_exp.append(y[i].dot(br_i - dist[i]) + entr_br_i - entr_dist_i) + + unreg_exp_mean = np.mean(unreg_exp) + reg_exp_mean = np.mean(reg_exp) + + _, lr_y = self.lrs + if (reg_exp_mean < self.exp_thresh) and (anneal_steps >= 1 / lr_y): + self.temperature = np.clip(temperature / 2., 0., np.inf) + grad_anneal_steps = -anneal_steps + else: + grad_anneal_steps = 1 + + return (grad_dist, grad_y, grad_anneal_steps), unreg_exp_mean, reg_exp_mean + + def descent_step(self, params, grads, t, eps=0.): + """Gradient descent on exploitability wrt logits. + + Args: + params: tuple of variables to be updated (dist, y, anneal_steps) + grads: tuple of variable gradients (grad_dist, grad_y, grad_anneal_steps) + t: int, solver iteration + eps: float > 0, force all probabilities >= eps / dim(dist) (unused) + Returns: + new_params: tuple of update params (new_dist, new_y, new_anneal_steps) + """ + del eps + + dist = params[0] + grads_dist = grads[0] + + dist_jnp = [jnp.array(dist_i) for dist_i in dist] + grads_dist_jnp = [jnp.array(grad_i) for grad_i in grads_dist] + + # map dist to logits and grads to grad_logits using jacobian + logits = [dist_to_logits(dist_i) for dist_i in params[0]] + grads_logits = [ + jax.jvp(dist_to_logits, [dist_i], [grads_i])[1] + for dist_i, grads_i in zip(dist_jnp, grads_dist_jnp) + ] + + opt_updates, self.opt_state = self.opt.update(grads_logits, + self.opt_state, + logits) + + new_logits = optax.apply_updates(logits, opt_updates) + + new_dist = [logits_to_dist(logits) for logits in new_logits] + new_dist = [np.array(dist_i) for dist_i in new_dist] + + lr_y = self.lrs[1] + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_y = [] + for y_i, y_grad_i in zip(params[1], grads[1]): + new_y_i = y_i - lr_y * y_grad_i + new_y_i = np.clip(new_y_i, 0., np.inf) + new_y.append(new_y_i) + + new_anneal_steps = params[2] + grads[2] + + return (new_dist, new_y, new_anneal_steps) + + +def logits_to_dist(logits): + logits_ext = jnp.append(logits, 0.) + payoff = jax.nn.softmax(logits_ext) + return payoff + + +def dist_to_logits(dist, eps=1e-8): + # dist[-1] = exp(logits[-1]) / Z = exp(0) / Z + z = 1 / jnp.clip(dist[-1], eps, 1.) + logits = jnp.log(jnp.clip(dist[:-1] * z, eps, np.inf)) + return logits + + +def loss_gradients(dist, payoff_matrices, num_players, temperature=0., + proj_grad=True): + """Computes exploitablity gradient. + + Args: + dist: list of 1-d np.arrays, current estimate of nash distribution + payoff_matrices: 2 dictionaries with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + num_players: int, number of players, in case payoff_matrices is abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist) as tuple + unregularized exploitability (stochastic estimate) + shannon regularized exploitability (stochastic estimate) + """ + # first compute projected gradients (for every player, for each sample a & b) + # if consulting paper https://arxiv.org/abs/2310.06689, code assumes eta_k = 1 + tau = temperature + + pgs = [] + for i in range(num_players): + + pg_i_a = np.zeros_like(dist[i]) + pg_i_b = np.zeros_like(dist[i]) + + for j in range(num_players): + if j == i: + continue + if i < j: + hess_i_ij_a = payoff_matrices[0][(i, j)][0] + hess_i_ij_b = payoff_matrices[1][(i, j)][0] + else: + hess_i_ij_a = payoff_matrices[0][(j, i)][1].T + hess_i_ij_b = payoff_matrices[1][(j, i)][1].T + + pg_i_a_est = simplex.project_grad(hess_i_ij_a.dot(dist[j])) + pg_i_b_est = simplex.project_grad(hess_i_ij_b.dot(dist[j])) + + pg_i_a += pg_i_a_est / float(num_players - 1) + pg_i_b += pg_i_b_est / float(num_players - 1) + + pgs.append((pg_i_a, pg_i_b)) + + # then construct unbiased stochastic gradient + grad_dist = [] + unreg_exp = [] + reg_exp = [] + + for i in range(num_players): + + grad_dist_i = np.zeros_like(dist[i]) + + for j in range(num_players): + pg_j_a = pgs[j][0] + pg_j_b = pgs[j][1] + if tau > 0.: + log_dist_safe = np.clip(np.log(dist[j]), -40, 0) + entr_grad_proj = simplex.project_grad(-tau * (log_dist_safe + 1)) + else: + entr_grad_proj = 0. + pg_j_a_entr = pg_j_a + entr_grad_proj + pg_j_b_entr = pg_j_b + entr_grad_proj + + if j == i: + if tau > 0.: + hess_j_ij_a = -tau * np.diag(1. / dist[j]) + else: + hess_j_ij_a = np.diag(np.zeros_like(dist[j])) + unreg_exp_i = np.dot(pg_j_a, pg_j_b) + reg_exp_i = np.dot(pg_j_a_entr, pg_j_b_entr) + unreg_exp.append(unreg_exp_i) + reg_exp.append(reg_exp_i) + elif i < j: + hess_j_ij_a = payoff_matrices[0][(i, j)][1] + else: + hess_j_ij_a = payoff_matrices[0][(j, i)][0].T + + grad_dist_i += 2. * hess_j_ij_a.dot(pg_j_b_entr) + + if proj_grad: + grad_dist_i = simplex.project_grad(grad_dist_i) + + grad_dist.append(grad_dist_i) + + return (grad_dist,), np.mean(unreg_exp), np.mean(reg_exp) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/ate.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/ate.py new file mode 100644 index 0000000..f07f4c7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/ate.py @@ -0,0 +1,346 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Adaptive Tsallis Entropy (ATE) Stochastic Approximate Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np +from scipy import special + +from open_spiel.python.algorithms.adidas_utils.helpers import misc +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric import exploitability as exp + + +class Solver(object): + """ATE Solver.""" + + def __init__(self, p=1., proj_grad=True, euclidean=False, cheap=False, + lrs=(1e-2, 1e-1), rnd_init=False, seed=None, **kwargs): + """Ctor.""" + del kwargs + if (p < 0.) or (p > 1.): + raise ValueError('p must be in [0, 1]') + self.num_players = None + self.p = p + self.proj_grad = proj_grad + self.cheap = cheap + self.rnd_init = rnd_init + self.lrs = lrs + self.has_aux = True + self.aux_errors = [] + + self.euclidean = euclidean + if euclidean: + self.update = self.euc_descent_step + else: + self.update = self.mirror_descent_step + + self.seed = seed + self.random = np.random.RandomState(seed) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if len(num_strats) != num_players: + raise ValueError('Must specify num strategies for each player') + init_dist = [] + for num_strats_i in num_strats: + if self.rnd_init: + init_dist_i = self.random.rand(num_strats_i) + else: + init_dist_i = np.ones(num_strats_i) + init_dist_i /= init_dist_i.sum() + init_dist.append(init_dist_i) + init_y = [np.zeros_like(dist_i) for dist_i in init_dist] + return (init_dist, init_y) + + def record_aux_errors(self, grads): + """Record errors for the auxiliary variables.""" + grad_y = grads[1] + # call ravel in case use y to track entire payoff matrices in future + grad_y_flat = np.concatenate([np.ravel(g) for g in grad_y]) + self.aux_errors.append([np.linalg.norm(grad_y_flat)]) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients (and exploitabilities) for all parameters. + + Args: + params: tuple of params (dist, y), see ate.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + tuple of gradients (grad_dist, grad_y), see ate.gradients + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + if self.cheap: + return cheap_gradients(self.random, *params, payoff_matrices, + self.num_players, self.p, self.proj_grad) + else: + return gradients(*params, payoff_matrices, self.num_players, self.p, + self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Compute and return tsallis entropy regularized exploitability. + + Args: + params: tuple of params (dist, y), see ate.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability of current dist + """ + return exp.ate_exploitability(params, payoff_matrices, self.p) + + def euc_descent_step(self, params, grads, t): + """Projected gradient descent on exploitability using Euclidean projection. + + Args: + params: tuple of variables to be updated (dist, y) + grads: tuple of variable gradients (grad_dist, grad_y) + t: int, solver iteration (unused) + Returns: + new_params: tuple of update params (new_dist, new_y) + """ + lr_dist, lr_y = self.lrs + new_dist = [] + for dist_i, dist_grad_i in zip(params[0], grads[0]): + new_dist_i = dist_i - lr_dist * dist_grad_i + new_dist_i = simplex.euclidean_projection_onto_simplex(new_dist_i) + new_dist.append(new_dist_i) + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_y = [] + for y_i, y_grad_i in zip(params[1], grads[1]): + new_y_i = y_i - lr_y * y_grad_i + new_y_i = np.clip(new_y_i, 0., np.inf) + new_y.append(new_y_i) + return (new_dist, new_y) + + def mirror_descent_step(self, params, grads, t): + """Entropic mirror descent on exploitability. + + Args: + params: tuple of variables to be updated (dist, y) + grads: tuple of variable gradients (grad_dist, grad_y) + t: int, solver iteration (unused) + Returns: + new_params: tuple of update params (new_dist, new_y) + """ + lr_dist, lr_y = self.lrs + new_dist = [] + for dist_i, dist_grad_i in zip(params[0], grads[0]): + new_dist_i = np.log(np.clip(dist_i, 0., np.inf)) - lr_dist * dist_grad_i + new_dist_i = special.softmax(new_dist_i) + new_dist.append(new_dist_i) + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_y = [] + for y_i, y_grad_i in zip(params[1], grads[1]): + new_y_i = y_i - lr_y * y_grad_i + new_y_i = np.clip(new_y_i, 0., np.inf) + new_y.append(new_y_i) + return (new_dist, new_y) + + +def gradients(dist, y, payoff_matrices, num_players, p=1, proj_grad=True): + """Computes exploitablity gradient and aux variable gradients. + + Args: + dist: list of 1-d np.arrays, current estimate of nash distribution + y: list 1-d np.arrays (same shape as dist), current est. of payoff gradient + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + num_players: int, number of players, in case payoff_matrices is abbreviated + p: float in [0, 1], Tsallis entropy-regularization --> 0 as p --> 0 + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + # first compute policy gradients and player effects (fx) + policy_gradient = [] + other_player_fx = [] + grad_y = [] + unreg_exp = [] + reg_exp = [] + for i in range(num_players): + + nabla_i = np.zeros_like(dist[i]) + for j in range(num_players): + if j == i: + continue + if i < j: + hess_i_ij = payoff_matrices[(i, j)][0] + else: + hess_i_ij = payoff_matrices[(j, i)][1].T + + nabla_ij = hess_i_ij.dot(dist[j]) + nabla_i += nabla_ij / float(num_players - 1) + + grad_y.append(y[i] - nabla_i) + + if p > 0: + power = 1. / float(p) + s_i = np.linalg.norm(y[i], ord=power) + if s_i == 0: + br_i = misc.uniform_dist(y[i]) + else: + br_i = (y[i] / s_i)**power + else: + power = np.inf + s_i = np.linalg.norm(y[i], ord=power) + br_i = np.zeros_like(dist[i]) + maxima_i = (y[i] == s_i) + br_i[maxima_i] = 1. / maxima_i.sum() + + policy_gradient_i = nabla_i - s_i * dist[i]**p + policy_gradient.append(policy_gradient_i) + + unreg_exp.append(np.max(y[i]) - y[i].dot(dist[i])) + + br_i_inv_sparse = 1 - np.sum(br_i**(p + 1)) + dist_i_inv_sparse = 1 - np.sum(dist[i]**(p + 1)) + entr_br_i = s_i / (p + 1) * br_i_inv_sparse + entr_dist_i = s_i / (p + 1) * dist_i_inv_sparse + + reg_exp.append(y[i].dot(br_i - dist[i]) + entr_br_i - entr_dist_i) + + entr_br_vec_i = br_i_inv_sparse * br_i**(1 - p) + entr_dist_vec_i = dist_i_inv_sparse * dist[i]**(1 - p) + other_player_fx_i = (br_i - dist[i]) + 1 / (p + 1) * ( + entr_br_vec_i - entr_dist_vec_i) + other_player_fx.append(other_player_fx_i) + + # then construct exploitability gradient + grad_dist = [] + for i in range(num_players): + + grad_dist_i = -policy_gradient[i] + for j in range(num_players): + if j == i: + continue + if i < j: + hess_j_ij = payoff_matrices[(i, j)][1] + else: + hess_j_ij = payoff_matrices[(j, i)][0].T + + grad_dist_i += hess_j_ij.dot(other_player_fx[j]) + + if proj_grad: + grad_dist_i = simplex.project_grad(grad_dist_i) + + grad_dist.append(grad_dist_i) + + return (grad_dist, grad_y), np.mean(unreg_exp), np.mean(reg_exp) + + +def cheap_gradients(random, dist, y, payoff_matrices, num_players, p=1, + proj_grad=True): + """Computes exploitablity gradient and aux variable gradients with samples. + + This implementation takes payoff_matrices as input so technically uses O(d^2) + compute but only a single column of payoff_matrices is used to perform the + update so can be re-implemented in O(d) if needed. + + Args: + random: random number generator, np.random.RandomState(seed) + dist: list of 1-d np.arrays, current estimate of nash distribution + y: list 1-d np.arrays (same shape as dist), current est. of payoff gradient + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + num_players: int, number of players, in case payoff_matrices is abbreviated + p: float in [0, 1], Tsallis entropy-regularization --> 0 as p --> 0 + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + # first compute policy gradients and player effects (fx) + policy_gradient = [] + other_player_fx = [] + grad_y = [] + unreg_exp = [] + reg_exp = [] + for i in range(num_players): + + others = list(range(num_players)) + others.remove(i) + j = np.random.choice(others) + action_j = random.choice(dist[j].size, p=dist[j]) + if i < j: + hess_i_ij = payoff_matrices[(i, j)][0] + else: + hess_i_ij = payoff_matrices[(j, i)][1].T + nabla_i = hess_i_ij[:, action_j] + + grad_y.append(y[i] - nabla_i) + + if p > 0: + power = 1. / float(p) + s_i = np.linalg.norm(y[i], ord=power) + if s_i == 0: + br_i = misc.uniform_dist(y[i]) + else: + br_i = (y[i] / s_i)**power + else: + power = np.inf + s_i = np.linalg.norm(y[i], ord=power) + br_i = np.zeros_like(dist[i]) + maxima_i = (y[i] == s_i) + br_i[maxima_i] = 1. / maxima_i.sum() + + policy_gradient_i = nabla_i - s_i * dist[i]**p + policy_gradient.append(policy_gradient_i) + + unreg_exp.append(np.max(y[i]) - y[i].dot(dist[i])) + + br_i_inv_sparse = 1 - np.sum(br_i**(p + 1)) + dist_i_inv_sparse = 1 - np.sum(dist[i]**(p + 1)) + entr_br_i = s_i / (p + 1) * br_i_inv_sparse + entr_dist_i = s_i / (p + 1) * dist_i_inv_sparse + + reg_exp.append(y[i].dot(br_i - dist[i]) + entr_br_i - entr_dist_i) + + entr_br_vec_i = br_i_inv_sparse * br_i**(1 - p) + entr_dist_vec_i = dist_i_inv_sparse * dist[i]**(1 - p) + other_player_fx_i = (br_i - dist[i]) + 1 / (p + 1) * ( + entr_br_vec_i - entr_dist_vec_i) + other_player_fx.append(other_player_fx_i) + + # then construct exploitability gradient + grad_dist = [] + for i in range(num_players): + + grad_dist_i = -policy_gradient[i] + for j in range(num_players): + if j == i: + continue + if i < j: + hess_j_ij = payoff_matrices[(i, j)][1] + else: + hess_j_ij = payoff_matrices[(j, i)][0].T + + action_u = random.choice(dist[j].size) # uniform, ~importance sampling + other_player_fx_j = dist[j].size * other_player_fx[j][action_u] + grad_dist_i += hess_j_ij[:, action_u] * other_player_fx_j + + if proj_grad: + grad_dist_i = simplex.project_grad(grad_dist_i) + + grad_dist.append(grad_dist_i) + + return (grad_dist, grad_y), np.mean(unreg_exp), np.mean(reg_exp) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/ate_anneal.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/ate_anneal.py new file mode 100644 index 0000000..40e4c00 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/ate_anneal.py @@ -0,0 +1,373 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Adaptive Tsallis Entropy (ATE) Stochastic Approximate Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np +from scipy import special + +from open_spiel.python.algorithms.adidas_utils.helpers import misc +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric import exploitability as exp + + +class Solver(object): + """ATE Solver with temperature annealing.""" + + def __init__(self, p=1., proj_grad=True, euclidean=False, cheap=False, + lrs=(1e-2, 1e-1), exp_thresh=-1., rnd_init=False, seed=None, + **kwargs): + """Ctor.""" + del kwargs + if (p < 0.) or (p > 1.): + raise ValueError('p must be in [0, 1]') + self.num_players = None + self.p_init = p + self.p = p + self.proj_grad = proj_grad + self.cheap = cheap + self.rnd_init = rnd_init + self.lrs = lrs + self.exp_thresh = exp_thresh + self.has_aux = True + self.aux_errors = [] + + self.euclidean = euclidean + if euclidean: + self.update = self.euc_descent_step + else: + self.update = self.mirror_descent_step + + self.seed = seed + self.random = np.random.RandomState(seed) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if len(num_strats) != num_players: + raise ValueError('Must specify num strategies for each player') + init_dist = [] + for num_strats_i in num_strats: + if self.rnd_init: + init_dist_i = self.random.rand(num_strats_i) + else: + init_dist_i = np.ones(num_strats_i) + init_dist_i /= init_dist_i.sum() + init_dist.append(init_dist_i) + init_y = [np.zeros_like(dist_i) for dist_i in init_dist] + init_anneal_steps = 0 + return (init_dist, init_y, init_anneal_steps) + + def record_aux_errors(self, grads): + """Record errors for the auxiliary variables.""" + grad_y = grads[1] + # call ravel in case use y to track entire payoff matrices in future + grad_y_flat = np.concatenate([np.ravel(g) for g in grad_y]) + self.aux_errors.append([np.linalg.norm(grad_y_flat)]) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients (and exploitabilities) for all parameters. + + Args: + params: tuple of params (dist, y, anneal_steps), see gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + tuple of gradients (grad_dist, grad_y, grad_anneal_steps), see gradients + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + if self.cheap: + return self.cheap_gradients(self.random, *params, payoff_matrices, + self.num_players, self.p, self.proj_grad) + else: + return self.gradients(*params, payoff_matrices, self.num_players, self.p, + self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Compute and return tsallis entropy regularized exploitability. + + Args: + params: tuple of params (dist, y), see ate.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability of current dist + """ + return exp.ate_exploitability(params, payoff_matrices, self.p) + + def euc_descent_step(self, params, grads, t): + """Projected gradient descent on exploitability using Euclidean projection. + + Args: + params: tuple of variables to be updated (dist, y, anneal_steps) + grads: tuple of variable gradients (grad_dist, grad_y, grad_anneal_steps) + t: int, solver iteration (unused) + Returns: + new_params: tuple of update params (new_dist, new_y, new_anneal_steps) + """ + lr_dist, lr_y = self.lrs + new_dist = [] + for dist_i, dist_grad_i in zip(params[0], grads[0]): + new_dist_i = dist_i - lr_dist * dist_grad_i + new_dist_i = simplex.euclidean_projection_onto_simplex(new_dist_i) + new_dist.append(new_dist_i) + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_y = [] + for y_i, y_grad_i in zip(params[1], grads[1]): + new_y_i = y_i - lr_y * y_grad_i + new_y_i = np.clip(new_y_i, 0., np.inf) + new_y.append(new_y_i) + new_anneal_steps = params[2] + grads[2] + return (new_dist, new_y, new_anneal_steps) + + def mirror_descent_step(self, params, grads, t): + """Entropic mirror descent on exploitability. + + Args: + params: tuple of variables to be updated (dist, y) + grads: tuple of variable gradients (grad_dist, grad_y) + t: int, solver iteration (unused) + Returns: + new_params: tuple of update params (new_dist, new_y) + """ + lr_dist, lr_y = self.lrs + new_dist = [] + for dist_i, dist_grad_i in zip(params[0], grads[0]): + new_dist_i = np.log(np.clip(dist_i, 0., np.inf)) - lr_dist * dist_grad_i + new_dist_i = special.softmax(new_dist_i) + new_dist.append(new_dist_i) + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_y = [] + for y_i, y_grad_i in zip(params[1], grads[1]): + new_y_i = y_i - lr_y * y_grad_i + new_y_i = np.clip(new_y_i, 0., np.inf) + new_y.append(new_y_i) + new_anneal_steps = params[2] + grads[2] + return (new_dist, new_y, new_anneal_steps) + + def gradients(self, dist, y, anneal_steps, payoff_matrices, num_players, p=1, + proj_grad=True): + """Computes exploitablity gradient and aux variable gradients. + + Args: + dist: list of 1-d np.arrays, current estimate of nash distribution + y: list 1-d np.arrays (same shape as dist), current est. of payoff grad + anneal_steps: int, elapsed num steps since last anneal + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + num_players: int, number of players, in case payoff_matrices is abbrev'd + p: float in [0, 1], Tsallis entropy-regularization --> 0 as p --> 0 + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + # first compute policy gradients and player effects (fx) + policy_gradient = [] + other_player_fx = [] + grad_y = [] + unreg_exp = [] + reg_exp = [] + for i in range(num_players): + + nabla_i = np.zeros_like(dist[i]) + for j in range(num_players): + if j == i: + continue + if i < j: + hess_i_ij = payoff_matrices[(i, j)][0] + else: + hess_i_ij = payoff_matrices[(j, i)][1].T + + nabla_ij = hess_i_ij.dot(dist[j]) + nabla_i += nabla_ij / float(num_players - 1) + + grad_y.append(y[i] - nabla_i) + + if p > 1e-2: # encounter numerical under/overflow when power > 100. + power = 1. / float(p) + s_i = np.linalg.norm(y[i], ord=power) + if s_i == 0: + br_i = misc.uniform_dist(y[i]) + else: + br_i = (y[i] / s_i)**power + else: + power = np.inf + s_i = np.linalg.norm(y[i], ord=power) + br_i = np.zeros_like(dist[i]) + maxima_i = (y[i] == s_i) + br_i[maxima_i] = 1. / maxima_i.sum() + + policy_gradient_i = nabla_i - s_i * dist[i]**p + policy_gradient.append(policy_gradient_i) + + unreg_exp.append(np.max(y[i]) - y[i].dot(dist[i])) + + br_i_inv_sparse = 1 - np.sum(br_i**(p + 1)) + dist_i_inv_sparse = 1 - np.sum(dist[i]**(p + 1)) + entr_br_i = s_i / (p + 1) * br_i_inv_sparse + entr_dist_i = s_i / (p + 1) * dist_i_inv_sparse + + reg_exp.append(y[i].dot(br_i - dist[i]) + entr_br_i - entr_dist_i) + + entr_br_vec_i = br_i_inv_sparse * br_i**(1 - p) + entr_dist_vec_i = dist_i_inv_sparse * dist[i]**(1 - p) + other_player_fx_i = (br_i - dist[i]) + 1 / (p + 1) * ( + entr_br_vec_i - entr_dist_vec_i) + other_player_fx.append(other_player_fx_i) + + # then construct exploitability gradient + grad_dist = [] + for i in range(num_players): + + grad_dist_i = -policy_gradient[i] + for j in range(num_players): + if j == i: + continue + if i < j: + hess_j_ij = payoff_matrices[(i, j)][1] + else: + hess_j_ij = payoff_matrices[(j, i)][0].T + + grad_dist_i += hess_j_ij.dot(other_player_fx[j]) + + if proj_grad: + grad_dist_i = simplex.project_grad(grad_dist_i) + + grad_dist.append(grad_dist_i) + + unreg_exp_mean = np.mean(unreg_exp) + reg_exp_mean = np.mean(reg_exp) + + _, lr_y = self.lrs + if (reg_exp_mean < self.exp_thresh) and (anneal_steps >= 1 / lr_y): + self.p = np.clip(p / 2., 0., 1.) + grad_anneal_steps = -anneal_steps + else: + grad_anneal_steps = 1 + + return (grad_dist, grad_y, grad_anneal_steps), unreg_exp_mean, reg_exp_mean + + def cheap_gradients(self, random, dist, y, anneal_steps, payoff_matrices, + num_players, p=1, proj_grad=True): + """Computes exploitablity gradient and aux variable gradients with samples. + + This implementation takes payoff_matrices as input so technically uses + O(d^2) compute but only a single column of payoff_matrices is used to + perform the update so can be re-implemented in O(d) if needed. + + Args: + random: random number generator, np.random.RandomState(seed) + dist: list of 1-d np.arrays, current estimate of nash distribution + y: list 1-d np.arrays (same shape as dist), current est. of payoff grad + anneal_steps: int, elapsed num steps since last anneal + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + num_players: int, number of players, in case payoff_matrices is abbrev'd + p: float in [0, 1], Tsallis entropy-regularization --> 0 as p --> 0 + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + # first compute policy gradients and player effects (fx) + policy_gradient = [] + other_player_fx = [] + grad_y = [] + unreg_exp = [] + reg_exp = [] + for i in range(num_players): + + others = list(range(num_players)) + others.remove(i) + j = np.random.choice(others) + action_j = random.choice(dist[j].size, p=dist[j]) + if i < j: + hess_i_ij = payoff_matrices[(i, j)][0] + else: + hess_i_ij = payoff_matrices[(j, i)][1].T + nabla_i = hess_i_ij[:, action_j] + + grad_y.append(y[i] - nabla_i) + + if p > 1e-2: # encounter numerical under/overflow when power > 100. + power = 1. / float(p) + s_i = np.linalg.norm(y[i], ord=power) + if s_i == 0: + br_i = misc.uniform_dist(y[i]) + else: + br_i = (y[i] / s_i)**power + else: + power = np.inf + s_i = np.linalg.norm(y[i], ord=power) + br_i = np.zeros_like(dist[i]) + maxima_i = (y[i] == s_i) + br_i[maxima_i] = 1. / maxima_i.sum() + + policy_gradient_i = nabla_i - s_i * dist[i]**p + policy_gradient.append(policy_gradient_i) + + unreg_exp.append(np.max(y[i]) - y[i].dot(dist[i])) + + br_i_inv_sparse = 1 - np.sum(br_i**(p + 1)) + dist_i_inv_sparse = 1 - np.sum(dist[i]**(p + 1)) + entr_br_i = s_i / (p + 1) * br_i_inv_sparse + entr_dist_i = s_i / (p + 1) * dist_i_inv_sparse + + reg_exp.append(y[i].dot(br_i - dist[i]) + entr_br_i - entr_dist_i) + + entr_br_vec_i = br_i_inv_sparse * br_i**(1 - p) + entr_dist_vec_i = dist_i_inv_sparse * dist[i]**(1 - p) + other_player_fx_i = (br_i - dist[i]) + 1 / (p + 1) * ( + entr_br_vec_i - entr_dist_vec_i) + other_player_fx.append(other_player_fx_i) + + # then construct exploitability gradient + grad_dist = [] + for i in range(num_players): + + grad_dist_i = -policy_gradient[i] + for j in range(num_players): + if j == i: + continue + if i < j: + hess_j_ij = payoff_matrices[(i, j)][1] + else: + hess_j_ij = payoff_matrices[(j, i)][0].T + + action_u = random.choice(dist[j].size) # uniform, ~importance sampling + other_player_fx_j = dist[j].size * other_player_fx[j][action_u] + grad_dist_i += hess_j_ij[:, action_u] * other_player_fx_j + + if proj_grad: + grad_dist_i = simplex.project_grad(grad_dist_i) + + grad_dist.append(grad_dist_i) + + unreg_exp_mean = np.mean(unreg_exp) + reg_exp_mean = np.mean(reg_exp) + + _, lr_y = self.lrs + if (reg_exp_mean < self.exp_thresh) and (anneal_steps >= 1 / lr_y): + self.p = np.clip(p / 2., 0., 1.) + grad_anneal_steps = -anneal_steps + else: + grad_anneal_steps = 1 + + return (grad_dist, grad_y, grad_anneal_steps), unreg_exp_mean, reg_exp_mean diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/ate_poly.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/ate_poly.py new file mode 100644 index 0000000..19f4928 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/ate_poly.py @@ -0,0 +1,255 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Adaptive Tsallis Entropy (ATE) Stochastic Approximate Nash Solver.""" +import itertools + +from absl import logging # pylint:disable=unused-import + +import numpy as np +from scipy import special + +from open_spiel.python.algorithms.adidas_utils.helpers import misc +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric import exploitability as exp + + +class Solver(object): + """ATE Solver that constructs a polymatrix approximation to the full game.""" + + def __init__(self, p=1., proj_grad=True, euclidean=False, cheap=False, + lrs=(1e-2, 1e-1), rnd_init=False, seed=None, **kwargs): + """Ctor.""" + del kwargs + if (p < 0.) or (p > 1.): + raise ValueError('p must be in [0, 1]') + self.num_strats = None + self.num_players = None + self.p = p + self.proj_grad = proj_grad + self.cheap = cheap + self.rnd_init = rnd_init + self.lrs = lrs + self.has_aux = True + self.aux_errors = [] + + self.euclidean = euclidean + if euclidean: + self.update = self.euc_descent_step + else: + self.update = self.mirror_descent_step + + self.seed = seed + self.random = np.random.RandomState(seed) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_strats = num_strats + self.num_players = num_players + if len(num_strats) != num_players: + raise ValueError('Must specify num strategies for each player') + init_dist = [] + for num_strats_i in num_strats: + if self.rnd_init: + init_dist_i = self.random.rand(num_strats_i) + else: + init_dist_i = np.ones(num_strats_i) + init_dist_i /= init_dist_i.sum() + init_dist.append(init_dist_i) + init_y = self.init_polymatrix(num_strats, num_players) + return (init_dist, init_y) + + def init_polymatrix(self, num_strats, num_players): + """Initialize all pairwise bimatrix games to zero and return as dict.""" + init_pm = dict() + for i, j in itertools.combinations(range(num_players), 2): + init_pm[(i, j)] = np.zeros((2, num_strats[i], num_strats[j])) # i < j + return init_pm + + def record_aux_errors(self, grads): + """Record errors for the auxiliary variables.""" + grad_y = grads[1] + # call ravel in case use y to track entire payoff matrices in future + grad_y_flat = np.concatenate([np.ravel(g) for g in grad_y.values()]) + self.aux_errors.append([np.linalg.norm(grad_y_flat)]) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients (and exploitabilities) for all parameters. + + Args: + params: tuple of params (dist, y), see ate.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + tuple of gradients (grad_dist, grad_y), see ate.gradients + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + return self.gradients(*params, payoff_matrices, self.p, self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Compute and return tsallis entropy regularized exploitability. + + Args: + params: tuple of params (dist, y), see ate.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability of current dist + """ + return exp.ate_exploitability(params, payoff_matrices, self.p) + + def euc_descent_step(self, params, grads, t): + """Projected gradient descent on exploitability using Euclidean projection. + + Args: + params: tuple of variables to be updated (dist, y) + grads: tuple of variable gradients (grad_dist, grad_y) + t: int, solver iteration (unused) + Returns: + new_params: tuple of update params (new_dist, new_y) + """ + lr_dist, lr_y = self.lrs + new_dist = [] + for dist_i, dist_grad_i in zip(params[0], grads[0]): + new_dist_i = dist_i - lr_dist * dist_grad_i + new_dist_i = simplex.euclidean_projection_onto_simplex(new_dist_i) + new_dist.append(new_dist_i) + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_y = dict() + for i, j in params[1]: + y_ij = params[1][(i, j)] + y_grad_ij = grads[1][(i, j)] + new_y_ij = y_ij - lr_y * y_grad_ij + new_y_ij = np.clip(new_y_ij, 0., np.inf) + new_y[(i, j)] = new_y_ij + return (new_dist, new_y) + + def mirror_descent_step(self, params, grads, t): + """Entropic mirror descent on exploitability. + + Args: + params: tuple of variables to be updated (dist, y) + grads: tuple of variable gradients (grad_dist, grad_y) + t: int, solver iteration (unused) + Returns: + new_params: tuple of update params (new_dist, new_y) + """ + lr_dist, lr_y = self.lrs + new_dist = [] + for dist_i, dist_grad_i in zip(params[0], grads[0]): + new_dist_i = np.log(np.clip(dist_i, 0., np.inf)) - lr_dist * dist_grad_i + new_dist_i = special.softmax(new_dist_i) + new_dist.append(new_dist_i) + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_y = dict() + for i, j in params[1]: + y_ij = params[1][(i, j)] + y_grad_ij = grads[1][(i, j)] + new_y_ij = y_ij - lr_y * y_grad_ij + new_y_ij = np.clip(new_y_ij, 0., np.inf) + new_y[(i, j)] = new_y_ij + return (new_dist, new_y) + + def gradients(self, dist, y, payoff_matrices, p=1, proj_grad=True): + """Computes exploitablity gradient and aux variable gradients. + + Args: + dist: list of 1-d np.arrays, current estimate of nash distribution + y: dict of 2-d np.arrays, current est. of players (i, j)'s payoff matrix + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + p: float in [0, 1], Tsallis entropy-regularization --> 0 as p --> 0 + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + # first compute policy gradients and player effects (fx) + policy_gradient = [] + other_player_fx = [] + grad_y = self.init_polymatrix(self.num_strats, self.num_players) + unreg_exp = [] + reg_exp = [] + for i in range(self.num_players): + + nabla_i = np.zeros_like(dist[i]) + for j in range(self.num_players): + if j == i: + continue + if i < j: + hess_i_ij = payoff_matrices[(i, j)][0] + hess_i_ij_from_y = y[(i, j)][0] + grad_y[(i, j)][0] = hess_i_ij_from_y - hess_i_ij + else: + hess_i_ij = payoff_matrices[(j, i)][1].T + hess_i_ij_from_y = y[(j, i)][1].T + grad_y[(j, i)][1] = hess_i_ij_from_y.T - hess_i_ij.T + + nabla_ij = hess_i_ij_from_y.dot(dist[j]) + nabla_i += nabla_ij / float(self.num_players - 1) + + if p > 0: + power = 1. / float(p) + s_i = np.linalg.norm(nabla_i, ord=power) + if s_i == 0: + br_i = misc.uniform_dist(nabla_i) + else: + br_i = (nabla_i / s_i)**power + else: + power = np.inf + s_i = np.linalg.norm(nabla_i, ord=power) + br_i = np.zeros_like(dist[i]) + maxima_i = (nabla_i == s_i) + br_i[maxima_i] = 1. / maxima_i.sum() + + policy_gradient_i = nabla_i - s_i * dist[i]**p + policy_gradient.append(policy_gradient_i) + + unreg_exp.append(np.max(nabla_i) - nabla_i.dot(dist[i])) + + br_i_inv_sparse = 1 - np.sum(br_i**(p + 1)) + dist_i_inv_sparse = 1 - np.sum(dist[i]**(p + 1)) + entr_br_i = s_i / (p + 1) * br_i_inv_sparse + entr_dist_i = s_i / (p + 1) * dist_i_inv_sparse + + reg_exp.append(nabla_i.dot(br_i - dist[i]) + entr_br_i - entr_dist_i) + + entr_br_vec_i = br_i_inv_sparse * br_i**(1 - p) + entr_dist_vec_i = dist_i_inv_sparse * dist[i]**(1 - p) + other_player_fx_i = (br_i - dist[i]) + 1 / (p + 1) * ( + entr_br_vec_i - entr_dist_vec_i) + other_player_fx.append(other_player_fx_i) + + # then construct exploitability gradient + grad_dist = [] + for i in range(self.num_players): + + grad_dist_i = -policy_gradient[i] + for j in range(self.num_players): + if j == i: + continue + if i < j: + hess_j_ij_from_y = y[(i, j)][1] + else: + hess_j_ij_from_y = y[(j, i)][0].T + + grad_dist_i += hess_j_ij_from_y.dot(other_player_fx[j]) + + if proj_grad: + grad_dist_i = simplex.project_grad(grad_dist_i) + + grad_dist.append(grad_dist_i) + + return (grad_dist, grad_y), np.mean(unreg_exp), np.mean(reg_exp) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/ate_regmatch.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/ate_regmatch.py new file mode 100644 index 0000000..207b0bd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/ate_regmatch.py @@ -0,0 +1,231 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Adaptive Tsallis Entropy (ATE) Stochastic Regret Matching Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np + +from open_spiel.python.algorithms.adidas_utils.helpers import misc +from open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric import exploitability as exp + + +class Solver(object): + """ATE Exploitability Regret Matching Solver.""" + + def __init__(self, p=1., lrs=(1e-2,), optimism=True, discount=False, + rnd_init=False, seed=None, **kwargs): + """Ctor.""" + del kwargs + if (p < 0.) or (p > 1.): + raise ValueError('p must be in [0, 1]') + self.num_players = None + self.p = p + self.rnd_init = rnd_init + self.lrs = lrs + self.optimism = optimism + self.discount = discount + self.has_aux = True + self.aux_errors = [] + + self.seed = seed + self.random = np.random.RandomState(seed) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if len(num_strats) != num_players: + raise ValueError('Must specify num strategies for each player') + init_dist = [] + for num_strats_i in num_strats: + if self.rnd_init: + init_dist_i = self.random.rand(num_strats_i) + else: + init_dist_i = np.ones(num_strats_i) + init_dist_i /= init_dist_i.sum() + init_dist.append(init_dist_i) + init_y = [np.zeros_like(dist_i) for dist_i in init_dist] + init_cumgrad = [np.zeros_like(dist_i) for dist_i in init_dist] + return (init_dist, init_y, init_cumgrad) + + def record_aux_errors(self, grads): + """Record errors for the auxiliary variables.""" + concat = [] + for grad in grads: + concat.extend([np.ravel(g) for g in grad]) + self.aux_errors.append([np.linalg.norm(np.concatenate(concat))]) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients (and exploitabilities) for all parameters. + + Args: + params: tuple of params (dist, y), see ate.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + tuple of gradients (grad_dist, grad_y), see ate.gradients + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + return gradients(*params, payoff_matrices, self.num_players, self.p) + + def exploitability(self, dist, payoff_matrices): + """Compute and return tsallis entropy regularized exploitability. + + Args: + dist: tuple of list of player distributions (dist,) + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability of current dist + """ + return exp.ate_exploitability(dist, payoff_matrices, self.p) + + def update(self, params, grads, t): + """Projected gradient descent on exploitability using Euclidean projection. + + Args: + params: tuple of variables to be updated (dist, y, regret) + grads: tuple of variable gradients (grad_dist, grad_y, regret_delta) + t: int, solver iteration + Returns: + new_params: tuple of update params (new_dist, new_y, new_regret) + """ + dist, y, regret = params + _, y_grad, regret_delta = grads + + lr_y = np.clip(1 / float(t + 1), self.lrs[0], np.inf) + new_y = [] + for y_i, y_grad_i in zip(y, y_grad): + new_y_i = y_i - lr_y * y_grad_i + new_y_i = np.clip(new_y_i, 0., np.inf) + new_y.append(new_y_i) + + if self.discount: + gamma = t / float(t + 1) + else: + gamma = 1 + + new_dist = [] + new_regret = [] + for dist_i, regret_i, regret_delta_i in zip(dist, regret, regret_delta): + new_regret_i = gamma * regret_i + regret_delta_i + new_clipped_regrets_i = np.clip( + new_regret_i + self.optimism * regret_delta_i, 0., np.inf) + if np.sum(new_clipped_regrets_i) > 0: + new_dist_i = new_clipped_regrets_i / new_clipped_regrets_i.sum() + else: + new_dist_i = np.ones_like(dist_i) / dist_i.size + new_dist.append(new_dist_i) + new_regret.append(new_regret_i) + + return (new_dist, new_y, new_regret) + + +def gradients(dist, y, regret, payoff_matrices, num_players, p=1): + """Computes exploitablity gradient and aux variable gradients. + + Args: + dist: list of 1-d np.arrays, current estimate of nash distribution + y: list 1-d np.arrays (same shape as dist), current est. of payoff gradient + regret: list of 1-d np.arrays (same shape as dist), exploitability regrets + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + num_players: int, number of players, in case payoff_matrices is abbreviated + p: float in [0, 1], Tsallis entropy-regularization --> 0 as p --> 0 + Returns: + gradient of exploitability w.r.t. (dist, y) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + del regret + + # first compute policy gradients and player effects (fx) + policy_gradient = [] + other_player_fx = [] + grad_y = [] + unreg_exp = [] + reg_exp = [] + for i in range(num_players): + + nabla_i = np.zeros_like(dist[i]) + for j in range(num_players): + if j == i: + continue + if i < j: + hess_i_ij = payoff_matrices[(i, j)][0] + else: + hess_i_ij = payoff_matrices[(j, i)][1].T + + nabla_ij = hess_i_ij.dot(dist[j]) + nabla_i += nabla_ij / float(num_players - 1) + + grad_y.append(y[i] - nabla_i) + y[i] = nabla_i # TODO(imgemp): overwriting temporarily to test something + + if p > 0: + power = 1. / float(p) + s_i = np.linalg.norm(y[i], ord=power) + if s_i == 0: + br_i = misc.uniform_dist(y[i]) + else: + br_i = (y[i] / s_i)**power + else: + power = np.inf + s_i = np.linalg.norm(y[i], ord=power) + br_i = np.zeros_like(dist[i]) + maxima_i = (y[i] == s_i) + br_i[maxima_i] = 1. / maxima_i.sum() + + policy_gradient_i = nabla_i - s_i * dist[i]**p + policy_gradient.append(policy_gradient_i) + + unreg_exp.append(np.max(y[i]) - y[i].dot(dist[i])) + + br_i_inv_sparse = 1 - np.sum(br_i**(p + 1)) + dist_i_inv_sparse = 1 - np.sum(dist[i]**(p + 1)) + entr_br_i = s_i / (p + 1) * br_i_inv_sparse + entr_dist_i = s_i / (p + 1) * dist_i_inv_sparse + + reg_exp.append(y[i].dot(br_i - dist[i]) + entr_br_i - entr_dist_i) + + entr_br_vec_i = br_i_inv_sparse * br_i**(1 - p) + entr_dist_vec_i = dist_i_inv_sparse * dist[i]**(1 - p) + other_player_fx_i = (br_i - dist[i]) + 1 / (p + 1) * ( + entr_br_vec_i - entr_dist_vec_i) + other_player_fx.append(other_player_fx_i) + + # then construct exploitability gradient + grad_dist = [] + regret_delta = [] + for i in range(num_players): + + grad_dist_i = -policy_gradient[i] + for j in range(num_players): + if j == i: + continue + if i < j: + hess_j_ij = payoff_matrices[(i, j)][1] + else: + hess_j_ij = payoff_matrices[(j, i)][0].T + + grad_dist_i += hess_j_ij.dot(other_player_fx[j]) + + regret_delta_i = -(grad_dist_i - grad_dist_i.dot(dist[i])) + # regret_delta_i = y[i] - y[i].dot(dist[i]) + + grad_dist.append(grad_dist_i) + regret_delta.append(regret_delta_i) + + return (grad_dist, grad_y, regret_delta), np.mean(unreg_exp), np.mean(reg_exp) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/ped.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/ped.py new file mode 100644 index 0000000..5b511b5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/ped.py @@ -0,0 +1,115 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Population Exploitability Descent (PED) Stochastic Approx. Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric import updates + + +class Solver(updates.Solver): + """PED Solver.""" + + def __init__(self, proj_grad=True, euclidean=False, lrs=(1e-1,), + rnd_init=False, seed=None, **kwargs): + """Ctor.""" + del kwargs + super().__init__(proj_grad, euclidean, rnd_init, seed) + self.lrs = lrs + + def compute_gradients(self, params, payoff_matrices): + """Compute and return exploitability. + + Args: + params: tuple of params (dist,), see ped.gradients + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + Returns: + float, exploitability of current dist + unregularized exploitability (stochastic estimate) + unregularized exploitability (stochastic estimate) *duplicate + """ + return gradients(*params, payoff_matrices, self.num_players, self.proj_grad) + + +def gradients(dist, payoff_matrices, num_players, proj_grad=True): + """Computes exploitablity gradient. + + Args: + dist: list of 1-d np.arrays, current estimate of nash distribution + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + num_players: int, number of players, in case payoff_matrices is abbreviated + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist) as tuple + unregularized exploitability (stochastic estimate) + unregularized exploitability (stochastic estimate) *duplicate + """ + # first compute best responses and payoff gradients + nabla = [] + br = [] + unreg_exp = [] + for i in range(num_players): + + nabla_i = np.zeros_like(dist[i]) + for j in range(num_players): + if j == i: + continue + if i < j: + hess_i_ij = payoff_matrices[(i, j)][0] + else: + hess_i_ij = payoff_matrices[(j, i)][1].T + + nabla_ij = hess_i_ij.dot(dist[j]) + nabla_i += nabla_ij / float(num_players - 1) + + nabla.append(nabla_i) + + power = np.inf + s_i = np.linalg.norm(nabla_i, ord=power) + br_i = np.zeros_like(nabla_i) + maxima_i = (nabla_i == s_i) + br_i[maxima_i] = 1. / maxima_i.sum() + br.append(br_i) + + unreg_exp.append(np.max(nabla_i) - nabla_i.dot(dist[i])) + + # then construct exploitability gradient + grad_dist = [] + for i in range(num_players): + + grad_dist_i = -nabla[i] + for j in range(num_players): + if j == i: + continue + if i < j: + hess_j_ij = payoff_matrices[(i, j)][1] + else: + hess_j_ij = payoff_matrices[(j, i)][0].T + + grad_dist_i += hess_j_ij.dot(br[j] - dist[j]) + + if proj_grad: + grad_dist_i = simplex.project_grad(grad_dist_i) + + grad_dist.append(grad_dist_i) + + return (grad_dist,), np.mean(unreg_exp), np.mean(unreg_exp) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/pg.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/pg.py new file mode 100644 index 0000000..edf5ae7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/pg.py @@ -0,0 +1,102 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Policy Gradient (PG).""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric import updates + + +class Solver(updates.Solver): + """PG Solver.""" + + def __init__(self, proj_grad=True, euclidean=False, lrs=(1e-1,), + rnd_init=False, seed=None, **kwargs): + """Ctor.""" + del kwargs + super().__init__(proj_grad, euclidean, rnd_init, seed) + self.lrs = lrs + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients for all parameters. + + Args: + params: tuple of params (dist,), see pg.gradients + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + Returns: + tuple of gradients (grad_dist,), see pg.gradients + unregularized exploitability (stochastic estimate) + unregularized exploitability (stochastic estimate) *duplicate + """ + return gradients(*params, payoff_matrices, self.num_players, self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Policy gradient does not minimize any exploitability so return NaN. + + Args: + params: tuple of params (dist,) + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + np.NaN + """ + return np.nan + + +def gradients(dist, payoff_matrices, num_players, proj_grad=True): + """Computes exploitablity gradient. + + Args: + dist: list of 1-d np.arrays, current estimate of nash distribution + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + num_players: int, number of players, in case payoff_matrices is abbreviated + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of payoff w.r.t. (dist) as tuple + unregularized exploitability (stochastic estimate) + unregularized exploitability (stochastic estimate) *duplicate + """ + # first compute best responses and payoff gradients + grad_dist = [] + unreg_exp = [] + for i in range(num_players): + + nabla_i = np.zeros_like(dist[i]) + # TODO(imgemp): decide if averaging over nablas provides best comparison + for j in range(num_players): + if j == i: + continue + if i < j: + hess_i_ij = payoff_matrices[(i, j)][0] + else: + hess_i_ij = payoff_matrices[(j, i)][1].T + + nabla_ij = hess_i_ij.dot(dist[j]) + nabla_i += nabla_ij / float(num_players - 1) + + grad_dist_i = -nabla_i + if proj_grad: + grad_dist_i = simplex.project_grad(grad_dist_i) + grad_dist.append(nabla_i) + + unreg_exp.append(np.max(nabla_i) - nabla_i.dot(dist[i])) + + return (grad_dist,), np.mean(unreg_exp), np.mean(unreg_exp) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/qre.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/qre.py new file mode 100644 index 0000000..aa322db --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/qre.py @@ -0,0 +1,351 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Quantal Response Equilibrium (QRE) Stochastic Approximate Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np +from scipy import special + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric import exploitability as exp + + +class Solver(object): + """QRE Solver.""" + + def __init__(self, temperature=0., proj_grad=True, euclidean=False, + cheap=False, lrs=(1e-2, 1e-1), rnd_init=False, seed=None, + **kwargs): + """Ctor.""" + del kwargs + if temperature < 0.: + raise ValueError('temperature must be non-negative') + self.num_players = None + self.temperature = temperature + self.proj_grad = proj_grad + self.cheap = cheap + self.rnd_init = rnd_init + self.lrs = lrs + self.has_aux = True + self.aux_errors = [] + + self.euclidean = euclidean + if euclidean: + self.update = self.euc_descent_step + else: + self.update = self.mirror_descent_step + + self.seed = seed + self.random = np.random.RandomState(seed) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if len(num_strats) != num_players: + raise ValueError('Must specify num strategies for each player') + init_dist = [] + for num_strats_i in num_strats: + if self.rnd_init: + init_dist_i = self.random.rand(num_strats_i) + else: + init_dist_i = np.ones(num_strats_i) + init_dist_i /= init_dist_i.sum() + init_dist.append(init_dist_i) + init_y = [np.zeros_like(dist_i) for dist_i in init_dist] + return (init_dist, init_y) + + def record_aux_errors(self, grads): + """Record errors for the auxiliary variables.""" + grad_y = grads[1] + # call ravel in case use y to track entire payoff matrices in future + grad_y_flat = np.concatenate([np.ravel(g) for g in grad_y]) + self.aux_errors.append([np.linalg.norm(grad_y_flat)]) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients (and exploitabilities) for all parameters. + + Args: + params: tuple of params (dist, y), see ate.gradients + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + Returns: + tuple of gradients (grad_dist, grad_y), see ate.gradients + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + if self.cheap: + return cheap_gradients(self.random, *params, payoff_matrices, + self.num_players, self.temperature, self.proj_grad) + else: + return gradients(*params, payoff_matrices, self.num_players, + self.temperature, self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Compute and return tsallis entropy regularized exploitability. + + Args: + params: tuple of params (dist, y), see ate.gradients + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + Returns: + float, exploitability of current dist + """ + return exp.qre_exploitability(params, payoff_matrices, self.temperature) + + def euc_descent_step(self, params, grads, t): + """Projected gradient descent on exploitability using Euclidean projection. + + Args: + params: tuple of variables to be updated (dist, y) + grads: tuple of variable gradients (grad_dist, grad_y) + t: int, solver iteration (unused) + Returns: + new_params: tuple of update params (new_dist, new_y) + """ + lr_dist, lr_y = self.lrs + new_dist = [] + for dist_i, dist_grad_i in zip(params[0], grads[0]): + new_dist_i = dist_i - lr_dist * dist_grad_i + new_dist_i = simplex.euclidean_projection_onto_simplex(new_dist_i) + new_dist.append(new_dist_i) + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_y = [] + for y_i, y_grad_i in zip(params[1], grads[1]): + new_y_i = y_i - lr_y * y_grad_i + new_y_i = np.clip(new_y_i, 0., np.inf) + new_y.append(new_y_i) + return (new_dist, new_y) + + def mirror_descent_step(self, params, grads, t): + """Entropic mirror descent on exploitability. + + Args: + params: tuple of variables to be updated (dist, y) + grads: tuple of variable gradients (grad_dist, grad_y) + t: int, solver iteration (unused) + Returns: + new_params: tuple of update params (new_dist, new_y) + """ + lr_dist, lr_y = self.lrs + new_dist = [] + for dist_i, dist_grad_i in zip(params[0], grads[0]): + new_dist_i = np.log(np.clip(dist_i, 0., np.inf)) - lr_dist * dist_grad_i + new_dist_i = special.softmax(new_dist_i) + new_dist.append(new_dist_i) + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_y = [] + for y_i, y_grad_i in zip(params[1], grads[1]): + new_y_i = y_i - lr_y * y_grad_i + new_y_i = np.clip(new_y_i, 0., np.inf) + new_y.append(new_y_i) + return (new_dist, new_y) + + +def gradients(dist, y, payoff_matrices, num_players, temperature=0., + proj_grad=True): + """Computes exploitablity gradient and aux variable gradients. + + Args: + dist: list of 1-d np.arrays, current estimate of nash distribution + y: list 1-d np.arrays (same shape as dist), current est. of payoff gradient + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + num_players: int, number of players, in case payoff_matrices is abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y) as tuple + unregularized exploitability (stochastic estimate) + shannon regularized exploitability (stochastic estimate) + """ + # first compute policy gradients and player effects (fx) + policy_gradient = [] + other_player_fx = [] + grad_y = [] + unreg_exp = [] + reg_exp = [] + for i in range(num_players): + + nabla_i = np.zeros_like(dist[i]) + for j in range(num_players): + if j == i: + continue + if i < j: + hess_i_ij = payoff_matrices[(i, j)][0] + else: + hess_i_ij = payoff_matrices[(j, i)][1].T + + nabla_ij = hess_i_ij.dot(dist[j]) + nabla_i += nabla_ij / float(num_players - 1) + + grad_y.append(y[i] - nabla_i) + + if temperature > 0: + br_i = special.softmax(y[i] / temperature) + br_i_policy_gradient = nabla_i - temperature * (np.log(br_i) + 1) + else: + power = np.inf + s_i = np.linalg.norm(y[i], ord=power) + br_i = np.zeros_like(dist[i]) + maxima_i = (y[i] == s_i) + br_i[maxima_i] = 1. / maxima_i.sum() + br_i_policy_gradient = np.zeros_like(br_i) + + policy_gradient_i = nabla_i + if temperature > 0: + policy_gradient_i -= temperature * (np.log(dist[i]) + 1) + policy_gradient.append(policy_gradient_i) + + unreg_exp.append(np.max(y[i]) - y[i].dot(dist[i])) + + entr_br_i = temperature * special.entr(br_i).sum() + entr_dist_i = temperature * special.entr(dist[i]).sum() + + reg_exp.append(y[i].dot(br_i - dist[i]) + entr_br_i - entr_dist_i) + + other_player_fx_i = (br_i - dist[i]) + if temperature > 0: + # much faster to avoid constructing br_i_mat and then computing + # br_i_mat.dot(br_policy_gradient) -- instead, expand out and only compute + # inner products + temp = (br_i_policy_gradient - br_i.dot(br_i_policy_gradient)) + other_player_fx_i += br_i / temperature * temp + other_player_fx.append(other_player_fx_i) + + # then construct exploitability gradient + grad_dist = [] + for i in range(num_players): + + grad_dist_i = -policy_gradient[i] + for j in range(num_players): + if j == i: + continue + if i < j: + hess_j_ij = payoff_matrices[(i, j)][1] + else: + hess_j_ij = payoff_matrices[(j, i)][0].T + + grad_dist_i += hess_j_ij.dot(other_player_fx[j]) + + if proj_grad: + grad_dist_i = simplex.project_grad(grad_dist_i) + + grad_dist.append(grad_dist_i) + + return (grad_dist, grad_y), np.mean(unreg_exp), np.mean(reg_exp) + + +def cheap_gradients(random, dist, y, payoff_matrices, num_players, + temperature=0., proj_grad=True): + """Computes exploitablity gradient and aux variable gradients with samples. + + This implementation takes payoff_matrices as input so technically uses O(d^2) + compute but only a single column of payoff_matrices is used to perform the + update so can be re-implemented in O(d) if needed. + + Args: + random: random number generator, np.random.RandomState(seed) + dist: list of 1-d np.arrays, current estimate of nash distribution + y: list 1-d np.arrays (same shape as dist), current est. of payoff gradient + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + num_players: int, number of players, in case payoff_matrices is abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y) as tuple + unregularized exploitability (stochastic estimate) + shannon regularized exploitability (stochastic estimate) + """ + # first compute policy gradients and player effects (fx) + policy_gradient = [] + other_player_fx = [] + grad_y = [] + unreg_exp = [] + reg_exp = [] + for i in range(num_players): + + others = list(range(num_players)) + others.remove(i) + j = np.random.choice(others) + action_j = random.choice(dist[j].size, p=dist[j]) + if i < j: + hess_i_ij = payoff_matrices[(i, j)][0] + else: + hess_i_ij = payoff_matrices[(j, i)][1].T + nabla_i = hess_i_ij[:, action_j] + + grad_y.append(y[i] - nabla_i) + + if temperature > 0: + br_i = special.softmax(y[i] / temperature) + br_i_policy_gradient = nabla_i - temperature * (np.log(br_i) + 1) + else: + power = np.inf + s_i = np.linalg.norm(y[i], ord=power) + br_i = np.zeros_like(dist[i]) + maxima_i = (y[i] == s_i) + br_i[maxima_i] = 1. / maxima_i.sum() + br_i_policy_gradient = np.zeros_like(br_i) + + policy_gradient_i = nabla_i + if temperature > 0: + policy_gradient_i -= temperature * (np.log(dist[i]) + 1) + policy_gradient.append(policy_gradient_i) + + unreg_exp.append(np.max(y[i]) - y[i].dot(dist[i])) + + entr_br_i = temperature * special.entr(br_i).sum() + entr_dist_i = temperature * special.entr(dist[i]).sum() + + reg_exp.append(y[i].dot(br_i - dist[i]) + entr_br_i - entr_dist_i) + + other_player_fx_i = (br_i - dist[i]) + if temperature > 0: + # much faster to avoid constructing br_i_mat and then computing + # br_i_mat.dot(br_policy_gradient) -- instead, expand out and only compute + # inner products + temp = (br_i_policy_gradient - br_i.dot(br_i_policy_gradient)) + other_player_fx_i += br_i / temperature * temp + other_player_fx.append(other_player_fx_i) + + # then construct exploitability gradient + grad_dist = [] + for i in range(num_players): + + grad_dist_i = -policy_gradient[i] + for j in range(num_players): + if j == i: + continue + if i < j: + hess_j_ij = payoff_matrices[(i, j)][1] + else: + hess_j_ij = payoff_matrices[(j, i)][0].T + + action_u = random.choice(dist[j].size) # uniform, ~importance sampling + other_player_fx_j = dist[j].size * other_player_fx[j][action_u] + grad_dist_i += hess_j_ij[:, action_u] * other_player_fx_j + + if proj_grad: + grad_dist_i = simplex.project_grad(grad_dist_i) + + grad_dist.append(grad_dist_i) + + return (grad_dist, grad_y), np.mean(unreg_exp), np.mean(reg_exp) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/qre_anneal.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/qre_anneal.py new file mode 100644 index 0000000..e43ead4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/qre_anneal.py @@ -0,0 +1,376 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Quantal Response Equilibrium (QRE) Stochastic Approximate Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np +from scipy import special + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric import exploitability as exp + + +class Solver(object): + """QRE Solver.""" + + def __init__(self, temperature=1., proj_grad=True, euclidean=False, + cheap=False, lrs=(1e-2, 1e-1), exp_thresh=-1., rnd_init=False, + seed=None, **kwargs): + """Ctor.""" + del kwargs + if temperature < 0.: + raise ValueError('temperature must be non-negative') + self.num_players = None + self.temperature = temperature + self.proj_grad = proj_grad + self.cheap = cheap + self.rnd_init = rnd_init + self.lrs = lrs + self.exp_thresh = exp_thresh + self.has_aux = True + self.aux_errors = [] + + self.euclidean = euclidean + if euclidean: + self.update = self.euc_descent_step + else: + self.update = self.mirror_descent_step + + self.seed = seed + self.random = np.random.RandomState(seed) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if len(num_strats) != num_players: + raise ValueError('Must specify num strategies for each player') + init_dist = [] + for num_strats_i in num_strats: + if self.rnd_init: + init_dist_i = self.random.rand(num_strats_i) + else: + init_dist_i = np.ones(num_strats_i) + init_dist_i /= init_dist_i.sum() + init_dist.append(init_dist_i) + init_y = [np.zeros_like(dist_i) for dist_i in init_dist] + init_anneal_steps = 0 + return (init_dist, init_y, init_anneal_steps) + + def record_aux_errors(self, grads): + """Record errors for the auxiliary variables.""" + grad_y = grads[1] + # call ravel in case use y to track entire payoff matrices in future + grad_y_flat = np.concatenate([np.ravel(g) for g in grad_y]) + self.aux_errors.append([np.linalg.norm(grad_y_flat)]) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients (and exploitabilities) for all parameters. + + Args: + params: tuple of params (dist, y, anneal_steps), see gradients + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + Returns: + tuple of gradients (grad_dist, grad_y, grad_anneal_steps), see gradients + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + if self.cheap: + return self.cheap_gradients(self.random, *params, payoff_matrices, + self.num_players, self.temperature, + self.proj_grad) + else: + return self.gradients(*params, payoff_matrices, self.num_players, + self.temperature, self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Compute and return tsallis entropy regularized exploitability. + + Args: + params: tuple of params (dist, y), see ate.gradients + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + Returns: + float, exploitability of current dist + """ + return exp.qre_exploitability(params, payoff_matrices, self.temperature) + + def euc_descent_step(self, params, grads, t): + """Projected gradient descent on exploitability using Euclidean projection. + + Args: + params: tuple of variables to be updated (dist, y, anneal_steps) + grads: tuple of variable gradients (grad_dist, grad_y, grad_anneal_steps) + t: int, solver iteration (unused) + Returns: + new_params: tuple of update params (new_dist, new_y, new_anneal_steps) + """ + lr_dist, lr_y = self.lrs + new_dist = [] + for dist_i, dist_grad_i in zip(params[0], grads[0]): + new_dist_i = dist_i - lr_dist * dist_grad_i + new_dist_i = simplex.euclidean_projection_onto_simplex(new_dist_i) + new_dist.append(new_dist_i) + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_y = [] + for y_i, y_grad_i in zip(params[1], grads[1]): + new_y_i = y_i - lr_y * y_grad_i + new_y_i = np.clip(new_y_i, 0., np.inf) + new_y.append(new_y_i) + new_anneal_steps = params[2] + grads[2] + return (new_dist, new_y, new_anneal_steps) + + def mirror_descent_step(self, params, grads, t): + """Entropic mirror descent on exploitability. + + Args: + params: tuple of variables to be updated (dist, y, anneal_steps) + grads: tuple of variable gradients (grad_dist, grad_y, grad_anneal_steps) + t: int, solver iteration (unused) + Returns: + new_params: tuple of update params (new_dist, new_y, new_anneal_steps) + """ + lr_dist, lr_y = self.lrs + new_dist = [] + for dist_i, dist_grad_i in zip(params[0], grads[0]): + new_dist_i = np.log(np.clip(dist_i, 0., np.inf)) - lr_dist * dist_grad_i + new_dist_i = special.softmax(new_dist_i) + new_dist.append(new_dist_i) + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_y = [] + for y_i, y_grad_i in zip(params[1], grads[1]): + new_y_i = y_i - lr_y * y_grad_i + new_y_i = np.clip(new_y_i, 0., np.inf) + new_y.append(new_y_i) + new_anneal_steps = params[2] + grads[2] + return (new_dist, new_y, new_anneal_steps) + + def gradients(self, dist, y, anneal_steps, payoff_matrices, num_players, + temperature=0., proj_grad=True): + """Computes exploitablity gradient and aux variable gradients. + + Args: + dist: list of 1-d np.arrays, current estimate of nash distribution + y: list 1-d np.arrays (same shape as dist), current est. of payoff + gradient + anneal_steps: int, elapsed num steps since last anneal + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + num_players: int, number of players, in case payoff_matrices is + abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y, anneal_steps) as tuple + unregularized exploitability (stochastic estimate) + shannon regularized exploitability (stochastic estimate) + """ + # first compute policy gradients and player effects (fx) + policy_gradient = [] + other_player_fx = [] + grad_y = [] + unreg_exp = [] + reg_exp = [] + for i in range(num_players): + + nabla_i = np.zeros_like(dist[i]) + for j in range(num_players): + if j == i: + continue + if i < j: + hess_i_ij = payoff_matrices[(i, j)][0] + else: + hess_i_ij = payoff_matrices[(j, i)][1].T + + nabla_ij = hess_i_ij.dot(dist[j]) + nabla_i += nabla_ij / float(num_players - 1) + + grad_y.append(y[i] - nabla_i) + + if temperature >= 1e-3: + br_i = special.softmax(y[i] / temperature) + br_i_policy_gradient = nabla_i - temperature * (np.log(br_i) + 1) + else: + power = np.inf + s_i = np.linalg.norm(y[i], ord=power) + br_i = np.zeros_like(dist[i]) + maxima_i = (y[i] == s_i) + br_i[maxima_i] = 1. / maxima_i.sum() + br_i_policy_gradient = np.zeros_like(br_i) + + policy_gradient_i = nabla_i - temperature * (np.log(dist[i]) + 1) + policy_gradient.append(policy_gradient_i) + + unreg_exp.append(np.max(y[i]) - y[i].dot(dist[i])) + + entr_br_i = temperature * special.entr(br_i).sum() + entr_dist_i = temperature * special.entr(dist[i]).sum() + + reg_exp.append(y[i].dot(br_i - dist[i]) + entr_br_i - entr_dist_i) + + other_player_fx_i = (br_i - dist[i]) + if temperature >= 1e-3: + # much faster to avoid constructing br_i_mat and then computing + # br_i_mat.dot(br_policy_gradient) -- instead, expand out and only + # compute inner products + temp = (br_i_policy_gradient - br_i.dot(br_i_policy_gradient)) + other_player_fx_i += br_i / temperature * temp + other_player_fx.append(other_player_fx_i) + + # then construct exploitability gradient + grad_dist = [] + for i in range(num_players): + + grad_dist_i = -policy_gradient[i] + for j in range(num_players): + if j == i: + continue + if i < j: + hess_j_ij = payoff_matrices[(i, j)][1] + else: + hess_j_ij = payoff_matrices[(j, i)][0].T + + grad_dist_i += hess_j_ij.dot(other_player_fx[j]) + + if proj_grad: + grad_dist_i = simplex.project_grad(grad_dist_i) + + grad_dist.append(grad_dist_i) + + unreg_exp_mean = np.mean(unreg_exp) + reg_exp_mean = np.mean(reg_exp) + + _, lr_y = self.lrs + if (reg_exp_mean < self.exp_thresh) and (anneal_steps >= 1 / lr_y): + self.temperature = np.clip(temperature / 2., 0., np.inf) + grad_anneal_steps = -anneal_steps + else: + grad_anneal_steps = 1 + + return (grad_dist, grad_y, grad_anneal_steps), unreg_exp_mean, reg_exp_mean + + def cheap_gradients(self, random, dist, y, anneal_steps, payoff_matrices, + num_players, temperature=0., proj_grad=True): + """Computes exploitablity gradient and aux variable gradients with samples. + + This implementation takes payoff_matrices as input so technically uses + O(d^2) compute but only a single column of payoff_matrices is used to + perform the update so can be re-implemented in O(d) if needed. + + Args: + random: random number generator, np.random.RandomState(seed) + dist: list of 1-d np.arrays, current estimate of nash distribution + y: list 1-d np.arrays (same shape as dist), current est. of payoff + gradient + anneal_steps: int, elapsed num steps since last anneal + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + num_players: int, number of players, in case payoff_matrices is + abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y, anneal_steps) as tuple + unregularized exploitability (stochastic estimate) + shannon regularized exploitability (stochastic estimate) + """ + # first compute policy gradients and player effects (fx) + policy_gradient = [] + other_player_fx = [] + grad_y = [] + unreg_exp = [] + reg_exp = [] + for i in range(num_players): + + others = list(range(num_players)) + others.remove(i) + j = np.random.choice(others) + action_j = random.choice(dist[j].size, p=dist[j]) + if i < j: + hess_i_ij = payoff_matrices[(i, j)][0] + else: + hess_i_ij = payoff_matrices[(j, i)][1].T + nabla_i = hess_i_ij[:, action_j] + + grad_y.append(y[i] - nabla_i) + + if temperature >= 1e-3: + br_i = special.softmax(y[i] / temperature) + br_i_policy_gradient = nabla_i - temperature * (np.log(br_i) + 1) + else: + power = np.inf + s_i = np.linalg.norm(y[i], ord=power) + br_i = np.zeros_like(dist[i]) + maxima_i = (y[i] == s_i) + br_i[maxima_i] = 1. / maxima_i.sum() + br_i_policy_gradient = np.zeros_like(br_i) + + policy_gradient_i = nabla_i - temperature * (np.log(dist[i]) + 1) + policy_gradient.append(policy_gradient_i) + + unreg_exp.append(np.max(y[i]) - y[i].dot(dist[i])) + + entr_br_i = temperature * special.entr(br_i).sum() + entr_dist_i = temperature * special.entr(dist[i]).sum() + + reg_exp.append(y[i].dot(br_i - dist[i]) + entr_br_i - entr_dist_i) + + other_player_fx_i = (br_i - dist[i]) + if temperature >= 1e-3: + # much faster to avoid constructing br_i_mat and then computing + # br_i_mat.dot(br_policy_gradient) -- instead, expand out and only + # compute inner products + temp = (br_i_policy_gradient - br_i.dot(br_i_policy_gradient)) + other_player_fx_i += br_i / temperature * temp + other_player_fx.append(other_player_fx_i) + + # then construct exploitability gradient + grad_dist = [] + for i in range(num_players): + + grad_dist_i = -policy_gradient[i] + for j in range(num_players): + if j == i: + continue + if i < j: + hess_j_ij = payoff_matrices[(i, j)][1] + else: + hess_j_ij = payoff_matrices[(j, i)][0].T + + action_u = random.choice(dist[j].size) # uniform, ~importance sampling + other_player_fx_j = dist[j].size * other_player_fx[j][action_u] + grad_dist_i += hess_j_ij[:, action_u] * other_player_fx_j + + if proj_grad: + grad_dist_i = simplex.project_grad(grad_dist_i) + + grad_dist.append(grad_dist_i) + + unreg_exp_mean = np.mean(unreg_exp) + reg_exp_mean = np.mean(reg_exp) + + _, lr_y = self.lrs + if (reg_exp_mean < self.exp_thresh) and (anneal_steps >= 1 / lr_y): + self.temperature = np.clip(temperature / 2., 0., np.inf) + grad_anneal_steps = -anneal_steps + else: + grad_anneal_steps = 1 + + return (grad_dist, grad_y, grad_anneal_steps), unreg_exp_mean, reg_exp_mean diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/regmatch.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/regmatch.py new file mode 100644 index 0000000..c7f9d0a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/regmatch.py @@ -0,0 +1,171 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Regret Matching Approximate Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np + + +class Solver(object): + """Regret-matching Solver.""" + + def __init__(self, optimism=True, discount=False, rnd_init=False, seed=None, + **kwargs): + """Ctor.""" + del kwargs + self.num_players = None + self.lrs = None + self.optimism = optimism + self.discount = discount + self.rnd_init = rnd_init + self.has_aux = True + self.aux_errors = [] + + self.seed = seed + self.random = np.random.RandomState(seed) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if len(num_strats) != num_players: + raise ValueError('Must specify num strategies for each player') + init_dist = [] + for num_strats_i in num_strats: + if self.rnd_init: + init_dist_i = self.random.rand(num_strats_i) + else: + init_dist_i = np.ones(num_strats_i) + init_dist_i /= init_dist_i.sum() + init_dist.append(init_dist_i) + init_regret = [np.zeros_like(dist_i) for dist_i in init_dist] + return (init_dist, init_regret) + + def record_aux_errors(self, grads): + """Record errors for the auxiliary variables.""" + grad_regret = grads[1] + grad_regret_flat = np.concatenate(grad_regret) + self.aux_errors.append([np.linalg.norm(grad_regret_flat)]) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients (and exploitabilities) for all parameters. + + Args: + params: tuple of params (dist, regret), see regmatch.gradients + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + Returns: + tuple of gradients (grad_dist, grad_regret), see ate.gradients + unregularized exploitability (stochastic estimate) + solver exploitability (stochastic estimate) - NaN + """ + return gradients(*params, payoff_matrices, self.num_players) + + def exploitability(self, params, payoff_matrices): + """Regret matching does not minimize any exploitability so return NaN. + + Args: + params: tuple of params (dist,) + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + Returns: + np.NaN + """ + del params + del payoff_matrices + return np.nan + + def update(self, params, grads, t): + """Update cumulative regret and strategy (dist). + + Args: + params: tuple of variables to be updated (dist, regret) + grads: tuple of variable gradients (grad_dist, grad_regret) + t: int, solver iteration (not used) + Returns: + new_params: tuple of update params (new_dist, new_regret) + """ + dist, regret = params + regret_delta = grads[1] + if self.discount: + gamma = t / float(t + 1) + else: + gamma = 1 + + new_dist = [] + new_regret = [] + for dist_i, regret_i, regret_delta_i in zip(dist, regret, regret_delta): + new_regret_i = gamma * regret_i + regret_delta_i + new_clipped_regrets_i = np.clip( + new_regret_i + self.optimism * regret_delta_i, 0., np.inf) + if np.sum(new_clipped_regrets_i) > 0: + new_dist_i = new_clipped_regrets_i / new_clipped_regrets_i.sum() + else: + new_dist_i = np.ones_like(dist_i) / dist_i.size + new_dist.append(new_dist_i) + new_regret.append(new_regret_i) + + new_params = (new_dist, new_regret) + return new_params + + +def gradients(dist, regret, payoff_matrices, num_players): + """Computes regret delta to be added to regret in update. + + Args: + dist: list of 1-d np.arrays, current estimate of nash distribution + regret: list of 1-d np.arrays (same as dist), current estimate of regrets + payoff_matrices: dictionary with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + num_players: int, number of players, in case payoff_matrices is abbreviated + Returns: + deltas w.r.t. (dist, regret) as tuple + unregularized exploitability (stochastic estimate) + solver exploitability (stochastic estimate) - NaN + """ + del regret + + # first compute best responses and payoff gradients + grad_dist = [] + grad_regret = [] + unreg_exp = [] + for i in range(num_players): + + nabla_i = np.zeros_like(dist[i]) + # TODO(imgemp): decide if averaging over nablas provides best comparison + for j in range(num_players): + if j == i: + continue + if i < j: + hess_i_ij = payoff_matrices[(i, j)][0] + else: + hess_i_ij = payoff_matrices[(j, i)][1].T + + nabla_ij = hess_i_ij.dot(dist[j]) + nabla_i += nabla_ij / float(num_players - 1) + + grad_dist_i = np.nan * np.ones_like(nabla_i) + grad_dist.append(grad_dist_i) + + utility_i = nabla_i.dot(dist[i]) + grad_regret_i = nabla_i - utility_i + grad_regret.append(grad_regret_i) + + unreg_exp.append(np.max(nabla_i) - nabla_i.dot(dist[i])) + + return (grad_dist, grad_regret), np.mean(unreg_exp), np.nan diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/sgd.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/sgd.py new file mode 100644 index 0000000..3bbc34c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/sgd.py @@ -0,0 +1,181 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Stochastic Gradient Descent (SGD) Approx. Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric import exploitability as exp +from open_spiel.python.algorithms.adidas_utils.helpers.nonsymmetric import updates + + +class Solver(updates.Solver): + """SGD Solver.""" + + def __init__(self, temperature=0., proj_grad=True, euclidean=False, + lrs=(1e-1,), rnd_init=False, seed=None, **kwargs): + """Ctor.""" + del kwargs + super().__init__(proj_grad, euclidean, rnd_init, seed) + if temperature < 0.: + raise ValueError('temperature must be non-negative') + self.temperature = temperature + self.lrs = lrs + self.num_estimates = 2 + + if temperature > 0: + self.eps = np.exp(-1 / temperature) # ensure dist[i] >= eps / dim(dist) + else: + self.eps = 0. + if euclidean: + self.update = lambda *args: self.euc_descent_step(*args, eps=self.eps) + else: + self.update = lambda *args: self.mirror_descent_step(*args, eps=self.eps) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if len(num_strats) != num_players: + raise ValueError('Must specify num strategies for each player') + init_dist = [] + for num_strats_i in num_strats: + if self.rnd_init: + init_dist_i = self.random.rand(num_strats_i) + else: + init_dist_i = np.ones(num_strats_i) + init_dist_i /= init_dist_i.sum() + init_dist_i = simplex.project_to_interior(init_dist_i, self.eps) + init_dist.append(init_dist_i) + return (init_dist,) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return exploitability. + + Args: + params: tuple of params (dist,), see sgd.gradients + payoff_matrices: 2 dictionaries with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + Returns: + float, exploitability of current dist + unregularized exploitability (stochastic estimate) + shannon regularized exploitability (stochastic estimate) + """ + return gradients(*params, payoff_matrices, self.num_players, + self.temperature, self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Compute and return exploitability. + + Args: + params: tuple of params (dist,), see sgd.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability as avg squared norm of projected-gradient + """ + return exp.grad_norm_exploitability(params, payoff_matrices, eta=1., + temperature=self.temperature) + + +def gradients(dist, payoff_matrices, num_players, temperature=0., + proj_grad=True): + """Computes exploitablity gradient. + + Assumption: eta_k = 1 for all k + + Args: + dist: list of 1-d np.arrays, current estimate of nash distribution + payoff_matrices: 2 dictionaries with keys as tuples of agents (i, j) and + values of (2 x A x A) np.arrays, payoffs for each joint action. keys + are sorted and arrays should be indexed in the same order + num_players: int, number of players, in case payoff_matrices is abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist) as tuple + unregularized exploitability (stochastic estimate) + shannon regularized exploitability (stochastic estimate) + """ + # first compute projected gradients (for every player, for each sample a & b) + tau = temperature + + pgs = [] + for i in range(num_players): + + pg_i_a = np.zeros_like(dist[i]) + pg_i_b = np.zeros_like(dist[i]) + + for j in range(num_players): + if j == i: + continue + if i < j: + hess_i_ij_a = payoff_matrices[0][(i, j)][0] + hess_i_ij_b = payoff_matrices[1][(i, j)][0] + else: + hess_i_ij_a = payoff_matrices[0][(j, i)][1].T + hess_i_ij_b = payoff_matrices[1][(j, i)][1].T + + pg_i_a_est = simplex.project_grad(hess_i_ij_a.dot(dist[j])) + pg_i_b_est = simplex.project_grad(hess_i_ij_b.dot(dist[j])) + + pg_i_a += pg_i_a_est / float(num_players - 1) + pg_i_b += pg_i_b_est / float(num_players - 1) + + pgs.append((pg_i_a, pg_i_b)) + + # then construct unbiased stochastic gradient + grad_dist = [] + unreg_exp = [] + reg_exp = [] + + for i in range(num_players): + + grad_dist_i = np.zeros_like(dist[i]) + + for j in range(num_players): + pg_j_a = pgs[j][0] + pg_j_b = pgs[j][1] + if tau > 0.: + log_dist_safe = np.clip(np.log(dist[j]), -40, 0) + entr_grad_proj = simplex.project_grad(-tau * (log_dist_safe + 1)) + else: + entr_grad_proj = 0. + pg_j_a_entr = pg_j_a + entr_grad_proj + pg_j_b_entr = pg_j_b + entr_grad_proj + + if j == i: + if tau > 0.: + hess_j_ij_a = -tau * np.diag(1. / dist[j]) + else: + hess_j_ij_a = np.diag(np.zeros_like(dist[j])) + unreg_exp_i = np.dot(pg_j_a, pg_j_b) + reg_exp_i = np.dot(pg_j_a_entr, pg_j_b_entr) + unreg_exp.append(unreg_exp_i) + reg_exp.append(reg_exp_i) + elif i < j: + hess_j_ij_a = payoff_matrices[0][(i, j)][1] + else: + hess_j_ij_a = payoff_matrices[0][(j, i)][0].T + + grad_dist_i += 2. * hess_j_ij_a.dot(pg_j_b_entr) + + if proj_grad: + grad_dist_i = simplex.project_grad(grad_dist_i) + + grad_dist.append(grad_dist_i) + + return (grad_dist,), np.mean(unreg_exp), np.mean(reg_exp) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/solvers_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/solvers_test.py new file mode 100644 index 0000000..8a56b11 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/nonsymmetric/solvers_test.py @@ -0,0 +1,121 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.adidas_utils.nonsymmetric.""" + +import itertools + +from absl import logging # pylint:disable=unused-import +from absl.testing import absltest +from absl.testing import parameterized + +import numpy as np + +from scipy.spatial import distance + +from open_spiel.python.algorithms.adidas_utils.helpers import misc + +from open_spiel.python.algorithms.adidas_utils.solvers.nonsymmetric import adam +from open_spiel.python.algorithms.adidas_utils.solvers.nonsymmetric import ate +from open_spiel.python.algorithms.adidas_utils.solvers.nonsymmetric import ped +from open_spiel.python.algorithms.adidas_utils.solvers.nonsymmetric import qre +from open_spiel.python.algorithms.adidas_utils.solvers.nonsymmetric import sgd + + +def numerical_gradient(fun, x, eps=np.sqrt(np.finfo(float).eps)): + fun_0 = fun(x) + num_grad = [np.zeros_like(xi) for xi in x] + x_plus_dx = [np.copy(xi) for xi in x] + for i, xi in enumerate(x): + for j, xij in enumerate(xi): + x_plus_dx[i][j] = xij + eps + num_grad[i][j] = (fun(x_plus_dx) - fun_0) / eps + x_plus_dx[i][j] = xij + return num_grad + + +def prep_params(dist, pt, num_params): + params = [dist] + if num_params > 1: + num_players = len(dist) + nabla = [misc.pt_reduce(pt[i], dist, [i]) for i in range(num_players)] + params += [nabla] # policy_gradient + return tuple(params) + + +class ExploitabilityDescentTest(parameterized.TestCase): + + @parameterized.named_parameters( + ("ATE_p=0.5", (ate, 0.5, False)), + ("ATE_p=0.1", (ate, 0.1, False)), + ("ATE_p=0", (ate, 0., False)), + ("PED", (ped, False)), + ("ATE_p=1", (ate, 1., False)), + ("QRE_t=0.0", (qre, 0.0, False)), + ("QRE_t=0.1", (qre, 0.1, False)), + ("SGD_t=0.0", (sgd, 0.0, False)), + ("SGD_t=0.1", (sgd, 0.1, False)), + ("ADAM_t=0.0", (adam, 0.0, False)), + ("ADAM_t=0.1", (adam, 0.1, False)), + ) + def test_exploitability_gradient_on_nonsymmetric_three_player_matrix_games( + self, solver_tuple, trials=100, max_num_strats=3, atol=1e-1, rtol=1e-1, + seed=1234): + num_players = 3 + solver = solver_tuple[0].Solver(*solver_tuple[1:]) + + if hasattr(solver, "num_estimates"): + num_estimates = solver.num_estimates + else: + num_estimates = 1 + + random = np.random.RandomState(seed) + + successes = [] + for _ in range(trials): + num_strats = random.randint(low=2, high=max_num_strats + 1, + size=num_players) + num_strats = tuple([int(ns) for ns in num_strats]) + payoff_tensor = random.rand(num_players, *num_strats) + + num_params = len(solver.init_vars(num_strats, num_players)) + dirichlet_alpha = [np.ones(num_strats_i) for num_strats_i in num_strats] + dist = [random.dirichlet(alpha_i) for alpha_i in dirichlet_alpha] + params = prep_params(dist, payoff_tensor, num_params) + + payoff_matrices = {} + for pi, pj in itertools.combinations(range(num_players), 2): + key = (pi, pj) + pt_i = misc.pt_reduce(payoff_tensor[pi], dist, [pi, pj]) + pt_j = misc.pt_reduce(payoff_tensor[pj], dist, [pi, pj]) + payoff_matrices[key] = np.stack((pt_i, pt_j), axis=0) + if num_estimates > 1: + payoff_matrices = [payoff_matrices] * num_estimates + grad = solver.compute_gradients(params, payoff_matrices)[0][0] + grad = np.concatenate(grad) / float(num_players) + + exp = lambda x: solver.exploitability(x, payoff_tensor) # pylint: disable=cell-var-from-loop + num_grad = np.concatenate(numerical_gradient(exp, dist)) + + successes += [np.logical_and(np.allclose(grad, num_grad, rtol, atol), + distance.cosine(grad, num_grad) <= atol)] + + perc = 100 * np.mean(successes) + logging.info("gradient accuracy success rate out of %d is %f", trials, perc) + self.assertGreaterEqual( + perc, 95., "exploitability gradient accuracy is too poor") + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/__init__.py new file mode 100644 index 0000000..a1223b9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/adam.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/adam.py new file mode 100644 index 0000000..343d80f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/adam.py @@ -0,0 +1,195 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Stochastic Gradient Descent (Adam) Approx. Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import jax +import jax.numpy as jnp + +import numpy as np + +import optax + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.symmetric import exploitability as exp +from open_spiel.python.algorithms.adidas_utils.helpers.symmetric import updates + + +class Solver(updates.Solver): + """Adam Solver.""" + + def __init__(self, temperature=0., proj_grad=True, euclidean=False, + lrs=(1e-1,), rnd_init=False, seed=None, **kwargs): + """Ctor.""" + del kwargs + super().__init__(proj_grad, euclidean, rnd_init, seed) + if temperature < 0.: + raise ValueError('temperature must be non-negative') + self.temperature = temperature + self.lrs = lrs + self.num_estimates = 2 + + if temperature > 0: + self.eps = np.exp(-1 / temperature) # ensure dist[i] >= eps / dim(dist) + else: + self.eps = 0. + self.update = lambda *args: self.descent_step(*args, eps=self.eps) + + self.opt = optax.adam(learning_rate=lrs[0]) + self.opt_state = self.opt.init(jnp.zeros(1)) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if self.rnd_init: + init_dist = self.random.rand(num_strats) + else: + init_dist = np.ones(num_strats) + init_dist /= init_dist.sum() + init_dist = simplex.project_to_interior(init_dist, self.eps) + + init_params = jnp.array(dist_to_logits(init_dist)) + + self.opt_state = self.opt.init(init_params) + + return (init_dist,) + + def descent_step(self, params, grads, t, eps=0.): + """Projected gradient descent on exploitability using Euclidean projection. + + Args: + params: tuple of variables to be updated (dist,) + grads: tuple of variable gradients (grad_dist,) + t: int, solver iteration (unused) + eps: float > 0, force all probabilities >= eps / dim(dist) + Returns: + new_params: tuple of update params (new_dist,) + """ + del t + del eps + + dist = params[0] + grads_dist = grads[0] + + dist_jnp = jnp.array(dist) + grads_dist_jnp = jnp.array(grads_dist) + + # map dist to logits and grads to grad_logits using jacobian + logits = dist_to_logits(dist) + grads_logits = jax.jvp(dist_to_logits, [dist_jnp], [grads_dist_jnp])[1] + + opt_updates, self.opt_state = self.opt.update(grads_logits, + self.opt_state, + logits) + + new_logits = optax.apply_updates(logits, opt_updates) + + new_dist = logits_to_dist(new_logits) + new_dist = np.array(new_dist) + + return (new_dist,) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return exploitability. + + Args: + params: tuple of params (dist,), see sgd.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability of current dist + unregularized exploitability (stochastic estimate) + shannon regularized exploitability (stochastic estimate) + """ + return gradients(*params, payoff_matrices, self.num_players, + self.temperature, self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Compute and return exploitability. + + Args: + params: tuple of params (dist,), see sgd.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability as avg squared norm of projected-gradient + """ + return exp.grad_norm_exploitability(params, payoff_matrices, eta=1., + temperature=self.temperature) + + +def logits_to_dist(logits): + logits_ext = jnp.append(logits, 0.) + payoff = jax.nn.softmax(logits_ext) + return payoff + + +def dist_to_logits(dist, eps=1e-8): + # dist[-1] = exp(logits[-1]) / Z = exp(0) / Z + z = 1 / jnp.clip(dist[-1], eps, 1.) + logits = jnp.log(jnp.clip(dist[:-1] * z, eps, np.inf)) + return logits + + +def gradients(dist, payoff_matrices, num_players, temperature=0., + proj_grad=True): + """Computes exploitablity gradient. + + Args: + dist: 1-d np.array, current estimate of nash distribution + payoff_matrices: 2 (>=2 x A x A) np.arrays, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist) as tuple + unregularized exploitability (stochastic estimate) + shannon regularized exploitability (stochastic estimate) + """ + del num_players + # if consulting paper https://arxiv.org/abs/2310.06689, code assumes eta = 1 + tau = temperature + + a, b = 0, 1 # 2 samples needed for unbiased estimation + p_0, p_1 = 0, 1 # player 0 index, player 1 index + hess_0_01_a = payoff_matrices[a][p_0] + hess_1_01_a = payoff_matrices[a][p_1] + hess_0_01_b = payoff_matrices[b][p_0] + + pg_0_a = simplex.project_grad(hess_0_01_a.dot(dist)) + pg_0_b = simplex.project_grad(hess_0_01_b.dot(dist)) + + unreg_exp = np.dot(pg_0_a, pg_0_b) + + if tau > 0.: + log_dist_safe = np.clip(np.log(dist), -40, 0) + entr_grad_proj = simplex.project_grad(-tau * (log_dist_safe + 1)) + else: + entr_grad_proj = 0. + pg_0_a_entr = pg_0_a + entr_grad_proj + pg_0_b_entr = pg_0_b + entr_grad_proj + pg_0_entr = 0.5 * (pg_0_a_entr + pg_0_b_entr) + pg_1_b_entr = pg_0_b_entr + + reg_exp = np.dot(pg_0_a_entr, pg_0_b_entr) + + # then construct unbiased stochastic gradient + grad_dist = 2. * hess_1_01_a.dot(pg_1_b_entr) + if tau > 0.: + grad_dist += 2. * -tau * pg_0_entr / dist + + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + + return (grad_dist,), unreg_exp, reg_exp diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/adam_anneal.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/adam_anneal.py new file mode 100644 index 0000000..ec9e846 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/adam_anneal.py @@ -0,0 +1,261 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Stochastic Gradient Descent (Adam) Approx. Nash Solver w/ Annealing.""" + +from absl import logging # pylint:disable=unused-import + +import jax +import jax.numpy as jnp + +import numpy as np + +import optax + +from scipy import special + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.symmetric import exploitability as exp + + +class Solver(object): + """Adam Solver with temperature annealing.""" + + def __init__(self, temperature=1., proj_grad=True, lrs=(1e-2, 1e-1), + exp_thresh=-1., rnd_init=False, seed=None, **kwargs): + """Ctor.""" + del kwargs + if temperature < 0.: + raise ValueError("temperature must be non-negative") + self.num_players = None + self.temperature = temperature + self.proj_grad = proj_grad + self.rnd_init = rnd_init + self.lrs = lrs + self.num_estimates = 2 + self.exp_thresh = exp_thresh + self.has_aux = True + self.aux_errors = [] + + self.update = self.descent_step + + self.opt = optax.adam(learning_rate=lrs[0]) + self.opt_state = self.opt.init(jnp.zeros(1)) + + self.seed = seed + self.random = np.random.RandomState(seed) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if self.rnd_init: + init_dist = self.random.rand(num_strats) + else: + init_dist = np.ones(num_strats) + init_dist /= init_dist.sum() + init_y = np.zeros(num_strats) + init_anneal_steps = 0 + + init_params = jnp.array(dist_to_logits(init_dist)) + + self.opt_state = self.opt.init(init_params) + + return (init_dist, init_y, init_anneal_steps) + + def record_aux_errors(self, grads): + """Record errors for the auxiliary variables.""" + grad_y = grads[1] + self.aux_errors.append([np.linalg.norm(grad_y)]) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients (and exploitabilities) for all parameters. + + Args: + params: tuple of params (dist, y, anneal_steps), see gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + tuple of gradients (grad_dist, grad_y, grad_anneal_steps), see gradients + unregularized exploitability (stochastic estimate) + shannon entropy regularized exploitability (stochastic estimate) + """ + return self.gradients(*params, payoff_matrices, self.num_players, + self.temperature, self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Compute and return shannon entropy regularized exploitability. + + Args: + params: tuple of params (dist, y), see qre.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability of current dist + """ + return exp.qre_exploitability(params, payoff_matrices, self.temperature) + + def gradients(self, dist: np.ndarray, y: np.ndarray, anneal_steps: int, + payoff_matrices, num_players, + temperature=0., proj_grad=True + ) -> tuple[tuple[np.ndarray, np.ndarray, int], float, float]: + """Computes exploitablity gradient and aux variable gradients. + + Args: + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + anneal_steps: int, elapsed num steps since last anneal + payoff_matrices: 2 (>=2 x A x A) np.arrays, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is + abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, anneal_steps) as tuple + unregularized exploitability (stochastic estimate) + shannon entropy regularized exploitability (stochastic estimate) + """ + + grad_dist = loss_gradients(dist, payoff_matrices, num_players, temperature, + proj_grad)[0][0] + + a = 0 # 2 samples (a, b) needed for unbiased estimation + p_0 = 0 # player 0 index + nabla = payoff_matrices[a][p_0].dot(dist) + grad_y = y - nabla + + if temperature >= 1e-3: + br = special.softmax(y / temperature) + else: + power = np.inf + s = np.linalg.norm(y, ord=power) + br = np.zeros_like(dist) + maxima = (y == s) + br[maxima] = 1. / maxima.sum() + + unreg_exp = np.max(y) - y.dot(dist) + entr_br = temperature * special.entr(br).sum() + entr_dist = temperature * special.entr(dist).sum() + reg_exp = y.dot(br - dist) + entr_br - entr_dist + + if reg_exp < self.exp_thresh: + self.temperature = np.clip(temperature / 2., 0., np.inf) + grad_anneal_steps = -anneal_steps + else: + grad_anneal_steps = 1 + + return (grad_dist, grad_y, grad_anneal_steps), unreg_exp, reg_exp + + def descent_step(self, params, grads, t, eps=0.): + """Gradient descent on exploitability wrt logits. + + Args: + params: tuple of variables to be updated (dist, y, anneal_steps) + grads: tuple of variable gradients (grad_dist, grad_y, grad_anneal_steps) + t: int, solver iteration + eps: float > 0, force all probabilities >= eps / dim(dist) (unused) + Returns: + new_params: tuple of update params (new_dist, new_y, new_anneal_steps) + """ + del eps + + dist = params[0] + grads_dist = grads[0] + + dist_jnp = jnp.array(dist) + grads_dist_jnp = jnp.array(grads_dist) + + # map dist to logits and grads to grad_logits using jacobian + logits = dist_to_logits(dist) + grads_logits = jax.jvp(dist_to_logits, [dist_jnp], [grads_dist_jnp])[1] + + opt_updates, self.opt_state = self.opt.update(grads_logits, + self.opt_state, + logits) + + new_logits = optax.apply_updates(logits, opt_updates) + + new_dist = logits_to_dist(new_logits) + new_dist = np.array(new_dist) + + lr_y = self.lrs[1] + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_y = params[1] - lr_y * grads[1] + + new_anneal_steps = params[2] + grads[2] + + return (new_dist, new_y, new_anneal_steps) + + +def logits_to_dist(logits): + logits_ext = jnp.append(logits, 0.) + payoff = jax.nn.softmax(logits_ext) + return payoff + + +def dist_to_logits(dist, eps=1e-8): + # dist[-1] = exp(logits[-1]) / Z = exp(0) / Z + z = 1 / jnp.clip(dist[-1], eps, 1.) + logits = jnp.log(jnp.clip(dist[:-1] * z, eps, np.inf)) + return logits + + +def loss_gradients(dist, payoff_matrices, num_players, temperature=0., + proj_grad=True): + """Computes exploitablity gradient. + + Args: + dist: 1-d np.array, current estimate of nash distribution + payoff_matrices: 2 (>=2 x A x A) np.arrays, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist) as tuple + unregularized exploitability (stochastic estimate) + shannon regularized exploitability (stochastic estimate) + """ + del num_players + # if consulting paper https://arxiv.org/abs/2310.06689, code assumes eta = 1 + tau = temperature + + a, b = 0, 1 # 2 samples needed for unbiased estimation + p_0, p_1 = 0, 1 # player 0 index, player 1 index + hess_0_01_a = payoff_matrices[a][p_0] + hess_1_01_a = payoff_matrices[a][p_1] + hess_0_01_b = payoff_matrices[b][p_0] + + pg_0_a = simplex.project_grad(hess_0_01_a.dot(dist)) + pg_0_b = simplex.project_grad(hess_0_01_b.dot(dist)) + + unreg_exp = np.dot(pg_0_a, pg_0_b) + + if tau > 0.: + log_dist_safe = np.clip(np.log(dist), -40, 0) + entr_grad_proj = simplex.project_grad(-tau * (log_dist_safe + 1)) + else: + entr_grad_proj = 0. + pg_0_a_entr = pg_0_a + entr_grad_proj + pg_0_b_entr = pg_0_b + entr_grad_proj + pg_0_entr = 0.5 * (pg_0_a_entr + pg_0_b_entr) + pg_1_b_entr = pg_0_b_entr + + reg_exp = np.dot(pg_0_a_entr, pg_0_b_entr) + + # then construct unbiased stochastic gradient + grad_dist = 2. * hess_1_01_a.dot(pg_1_b_entr) + if tau > 0.: + grad_dist += 2. * -tau * pg_0_entr / dist + + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + + return (grad_dist,), unreg_exp, reg_exp diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/ate.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/ate.py new file mode 100644 index 0000000..8f563b6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/ate.py @@ -0,0 +1,368 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Adaptive Tsallis Entropy (ATE) Stochastic Approximate Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np +from scipy import special + +from open_spiel.python.algorithms.adidas_utils.helpers import misc +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.symmetric import exploitability as exp + + +class Solver(object): + """ATE Solver.""" + + def __init__(self, p=1., proj_grad=True, euclidean=False, cheap=False, + lrs=(1e-2, 1e-1), vr=True, rnd_init=False, seed=None, **kwargs): + """Ctor.""" + del kwargs + if (p < 0.) or (p > 1.): + raise ValueError("p must be in [0, 1]") + self.num_players = None + self.p = p + self.proj_grad = proj_grad + self.cheap = cheap + self.vr = vr + self.pm_vr = None + self.rnd_init = rnd_init + self.lrs = lrs + self.has_aux = True + self.aux_errors = [] + + self.euclidean = euclidean + if euclidean: + self.update = self.euc_descent_step + else: + self.update = self.mirror_descent_step + + self.seed = seed + self.random = np.random.RandomState(seed) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if self.rnd_init: + init_dist = self.random.rand(num_strats) + else: + init_dist = np.ones(num_strats) + init_dist /= init_dist.sum() + init_y = np.zeros(num_strats) + if self.cheap and self.vr: + self.pm_vr = np.zeros((num_strats, num_strats)) + return (init_dist, init_y) + + def record_aux_errors(self, grads): + """Record errors for the auxiliary variables.""" + grad_y = grads[1] + self.aux_errors.append([np.linalg.norm(grad_y)]) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients (and exploitabilities) for all parameters. + + Args: + params: tuple of params (dist, y), see ate.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + tuple of gradients (grad_dist, grad_y), see ate.gradients + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + if self.cheap and self.vr: + grads, pm_vr, exp_sto, exp_solver_sto = cheap_gradients_vr( + self.random, *params, payoff_matrices, self.num_players, self.pm_vr, + self.p, self.proj_grad,) + self.pm_vr = pm_vr + return grads, exp_sto, exp_solver_sto + elif self.cheap and not self.vr: + return cheap_gradients(self.random, *params, payoff_matrices, + self.num_players, self.p, self.proj_grad) + else: + return gradients(*params, payoff_matrices, self.num_players, self.p, + self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Compute and return tsallis entropy regularized exploitability. + + Args: + params: tuple of params (dist, y), see ate.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability of current dist + """ + return exp.ate_exploitability(params, payoff_matrices, self.p) + + def euc_descent_step(self, params, grads, t): + """Projected gradient descent on exploitability using Euclidean projection. + + Args: + params: tuple of variables to be updated (dist, y) + grads: tuple of variable gradients (grad_dist, grad_y) + t: int, solver iteration (unused) + Returns: + new_params: tuple of update params (new_dist, new_y) + """ + lr_dist, lr_y = self.lrs + new_params = [params[0] - lr_dist * grads[0]] + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_params += [params[1] - lr_y * grads[1]] + new_params = euc_project(*new_params) + return new_params + + def mirror_descent_step(self, params, grads, t): + """Entropic mirror descent on exploitability. + + Args: + params: tuple of variables to be updated (dist, y) + grads: tuple of variable gradients (grad_dist, grad_y) + t: int, solver iteration (unused) + Returns: + new_params: tuple of update params (new_dist, new_y) + """ + lr_dist, lr_y = self.lrs + new_params = [np.log(np.clip(params[0], 0, np.inf)) - lr_dist * grads[0]] + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_params += [params[1] - lr_y * grads[1]] + new_params = mirror_project(*new_params) + return new_params + + +def gradients(dist, y, payoff_matrices, num_players, p=1, proj_grad=True): + """Computes exploitablity gradient and aux variable gradients. + + Args: + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is abbreviated + p: float in [0, 1], Tsallis entropy-regularization --> 0 as p --> 0 + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + nabla = payoff_matrices[0].dot(dist) + if p > 0: + power = 1. / float(p) + s = np.linalg.norm(y, ord=power) + if s == 0: + br = misc.uniform_dist(y) + else: + br = (y / s)**power + else: + power = np.inf + s = np.linalg.norm(y, ord=power) + br = np.zeros_like(dist) + maxima = (y == s) + br[maxima] = 1. / maxima.sum() + + unreg_exp = np.max(y) - y.dot(dist) + br_inv_sparse = 1 - np.sum(br**(p + 1)) + dist_inv_sparse = 1 - np.sum(dist**(p + 1)) + entr_br = s / (p + 1) * br_inv_sparse + entr_dist = s / (p + 1) * dist_inv_sparse + reg_exp = y.dot(br - dist) + entr_br - entr_dist + + entr_br_vec = br_inv_sparse * br**(1 - p) + entr_dist_vec = dist_inv_sparse * dist**(1 - p) + + policy_gradient = nabla - s * dist**p + other_player_fx = (br - dist) + 1 / (p + 1) * (entr_br_vec - entr_dist_vec) + + other_player_fx_translated = payoff_matrices[1].dot(other_player_fx) + grad_dist = -policy_gradient + (num_players - 1) * other_player_fx_translated + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + grad_y = y - nabla + + return (grad_dist, grad_y), unreg_exp, reg_exp + + +def cheap_gradients(random, dist, y, payoff_matrices, num_players, p=1, + proj_grad=True): + """Computes exploitablity gradient and aux variable gradients with samples. + + This implementation takes payoff_matrices as input so technically uses O(d^2) + compute but only a single column of payoff_matrices is used to perform the + update so can be re-implemented in O(d) if needed. + + Args: + random: random number generator, np.random.RandomState(seed) + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is abbreviated + p: float in [0, 1], Tsallis entropy-regularization --> 0 as p --> 0 + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + action_1 = random.choice(dist.size, p=dist) + nabla = payoff_matrices[0][:, action_1] + if p > 0: + power = 1. / float(p) + s = np.linalg.norm(y, ord=power) + if s == 0: + br = misc.uniform_dist(y) + else: + br = (y / s)**power + else: + power = np.inf + s = np.linalg.norm(y, ord=power) + br = np.zeros_like(dist) + maxima = (y == s) + br[maxima] = 1. / maxima.sum() + + unreg_exp = np.max(y) - y.dot(dist) + br_inv_sparse = 1 - np.sum(br**(p + 1)) + dist_inv_sparse = 1 - np.sum(dist**(p + 1)) + entr_br = s / (p + 1) * br_inv_sparse + entr_dist = s / (p + 1) * dist_inv_sparse + reg_exp = y.dot(br - dist) + entr_br - entr_dist + + entr_br_vec = br_inv_sparse * br**(1 - p) + entr_dist_vec = dist_inv_sparse * dist**(1 - p) + + policy_gradient = nabla - s * dist**p + other_player_fx = (br - dist) + 1 / (p + 1) * (entr_br_vec - entr_dist_vec) + + action_u = random.choice(dist.size) # uniform, ~importance sampling + other_player_fx = dist.size * other_player_fx[action_u] + other_player_fx_translated = payoff_matrices[1, :, action_u] * other_player_fx + grad_dist = -policy_gradient + (num_players - 1) * other_player_fx_translated + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + grad_y = y - nabla + + return (grad_dist, grad_y), unreg_exp, reg_exp + + +def cheap_gradients_vr(random, dist, y, payoff_matrices, num_players, pm_vr, + p=1, proj_grad=True, version=0): + """Computes exploitablity gradient and aux variable gradients with samples. + + This implementation takes payoff_matrices as input so technically uses O(d^2) + compute but only a single column of payoff_matrices is used to perform the + update so can be re-implemented in O(d) if needed. + + Args: + random: random number generator, np.random.RandomState(seed) + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is abbreviated + pm_vr: approximate payoff_matrix for variance reduction + p: float in [0, 1], Tsallis entropy-regularization --> 0 as p --> 0 + proj_grad: bool, if True, projects dist gradient onto simplex + version: int, default 0, two options for variance reduction + Returns: + gradient of exploitability w.r.t. (dist, y) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + if pm_vr is None: + raise ValueError("pm_vr must be np.array of shape (num_strats, num_strats)") + if (not isinstance(version, int)) or (version < 0) or (version > 1): + raise ValueError("version must be non-negative int < 2") + + action_1 = random.choice(dist.size, p=dist) + nabla = payoff_matrices[0][:, action_1] + if p > 0: + power = 1. / float(p) + s = np.linalg.norm(y, ord=power) + if s == 0: + br = misc.uniform_dist(y) + else: + br = (y / s)**power + else: + power = np.inf + s = np.linalg.norm(y, ord=power) + br = np.zeros_like(dist) + maxima = (y == s) + br[maxima] = 1. / maxima.sum() + + unreg_exp = np.max(y) - y.dot(dist) + br_inv_sparse = 1 - np.sum(br**(p + 1)) + dist_inv_sparse = 1 - np.sum(dist**(p + 1)) + entr_br = s / (p + 1) * br_inv_sparse + entr_dist = s / (p + 1) * dist_inv_sparse + reg_exp = y.dot(br - dist) + entr_br - entr_dist + + entr_br_vec = br_inv_sparse * br**(1 - p) + entr_dist_vec = dist_inv_sparse * dist**(1 - p) + + policy_gradient = nabla - s * dist**p + other_player_fx = (br - dist) + 1 / (p + 1) * (entr_br_vec - entr_dist_vec) + + if version == 0: + other_player_fx_translated = pm_vr.dot(other_player_fx) + action_u = random.choice(dist.size) # uniform, ~importance sampling + other_player_fx = other_player_fx[action_u] + pm_mod = dist.size * (payoff_matrices[1, :, action_u] - pm_vr[:, action_u]) + other_player_fx_translated += pm_mod * other_player_fx + elif version == 1: + other_player_fx_translated = np.sum(pm_vr, axis=1) + action_u = random.choice(dist.size) # uniform, ~importance sampling + other_player_fx = other_player_fx[action_u] + pm_mod = dist.size * payoff_matrices[1, :, action_u] + r = dist.size * pm_vr[:, action_u] + other_player_fx_translated += pm_mod * other_player_fx - r + + grad_dist = -policy_gradient + (num_players - 1) * other_player_fx_translated + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + grad_y = y - nabla + + if version == 0: + pm_vr[:, action_u] = payoff_matrices[1, :, action_u] + elif version == 1: + pm_vr[:, action_u] = payoff_matrices[1, :, action_u] * other_player_fx + + return (grad_dist, grad_y), pm_vr, unreg_exp, reg_exp + + +def euc_project(dist, y): + """Project variables onto their feasible sets (euclidean proj for dist). + + Args: + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + Returns: + projected variables (dist, y) as tuple + """ + dist = simplex.euclidean_projection_onto_simplex(dist) + y = np.clip(y, 0., np.inf) + + return dist, y + + +def mirror_project(dist, y): + """Project variables onto their feasible sets (softmax for dist). + + Args: + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + Returns: + projected variables (dist, y) as tuple + """ + dist = special.softmax(dist) + y = np.clip(y, 0., np.inf) + + return dist, y diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/ate_anneal.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/ate_anneal.py new file mode 100644 index 0000000..736ef67 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/ate_anneal.py @@ -0,0 +1,384 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Adaptive Tsallis Entropy (ATE) Stochastic Approximate Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np +from scipy import special + +from open_spiel.python.algorithms.adidas_utils.helpers import misc +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.symmetric import exploitability as exp + + +class Solver(object): + """ATE Solver.""" + + def __init__(self, p=1., proj_grad=True, euclidean=False, cheap=False, + lrs=(1e-2, 1e-1), exp_thresh=-1., vr=True, rnd_init=False, + seed=None, **kwargs): + """Ctor.""" + del kwargs + if (p < 0.) or (p > 1.): + raise ValueError("p must be in [0, 1]") + self.num_players = None + self.p_init = p + self.p = p + self.proj_grad = proj_grad + self.cheap = cheap + self.vr = vr + self.pm_vr = None + self.rnd_init = rnd_init + self.lrs = lrs + self.exp_thresh = exp_thresh + self.has_aux = True + self.aux_errors = [] + + self.euclidean = euclidean + if euclidean: + self.update = self.euc_descent_step + else: + self.update = self.mirror_descent_step + + self.seed = seed + self.random = np.random.RandomState(seed) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if self.rnd_init: + init_dist = self.random.rand(num_strats) + else: + init_dist = np.ones(num_strats) + init_dist /= init_dist.sum() + init_y = np.zeros(num_strats) + init_anneal_steps = 0 + if self.cheap and self.vr: + self.pm_vr = np.zeros((num_strats, num_strats)) + return (init_dist, init_y, init_anneal_steps) + + def record_aux_errors(self, grads): + """Record errors for the auxiliary variables.""" + grad_y = grads[1] + self.aux_errors.append([np.linalg.norm(grad_y)]) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients (and exploitabilities) for all parameters. + + Args: + params: tuple of params (dist, y, anneal_steps), see gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + tuple of gradients (grad_dist, grad_y, grad_anneal_steps), see gradients + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + if self.cheap and self.vr: + grads, pm_vr, exp_sto, exp_solver_sto = self.cheap_gradients_vr( + self.random, *params, payoff_matrices, self.num_players, self.pm_vr, + self.p, self.proj_grad,) + self.pm_vr = pm_vr + return grads, exp_sto, exp_solver_sto + elif self.cheap and not self.vr: + return self.cheap_gradients(self.random, *params, payoff_matrices, + self.num_players, self.p, self.proj_grad) + else: + return self.gradients(*params, payoff_matrices, self.num_players, self.p, + self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Compute and return tsallis entropy regularized exploitability. + + Args: + params: tuple of params (dist, y), see ate.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability of current dist + """ + return exp.ate_exploitability(params, payoff_matrices, self.p) + + def gradients(self, dist, y, anneal_steps, payoff_matrices, num_players, p=1, + proj_grad=True): + """Computes exploitablity gradient and aux variable gradients. + + Args: + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + anneal_steps: int, elapsed num steps since last anneal + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is + abbreviated + p: float in [0, 1], Tsallis entropy-regularization --> 0 as p --> 0 + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y, anneal_steps) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + nabla = payoff_matrices[0].dot(dist) + if p > 1e-2: # encounter numerical under/overflow when power > 100. + power = 1. / float(p) + s = np.linalg.norm(y, ord=power) + if s == 0: + br = misc.uniform_dist(y) + else: + br = (y / s)**power + else: + power = np.inf + s = np.linalg.norm(y, ord=power) + br = np.zeros_like(dist) + maxima = (y == s) + br[maxima] = 1. / maxima.sum() + + unreg_exp = np.max(y) - y.dot(dist) + br_inv_sparse = 1 - np.sum(br**(p + 1)) + dist_inv_sparse = 1 - np.sum(dist**(p + 1)) + entr_br = s / (p + 1) * br_inv_sparse + entr_dist = s / (p + 1) * dist_inv_sparse + reg_exp = y.dot(br - dist) + entr_br - entr_dist + + entr_br_vec = br_inv_sparse * br**(1 - p) + entr_dist_vec = dist_inv_sparse * dist**(1 - p) + + policy_gradient = nabla - s * dist**p + other_player_fx = (br - dist) + 1 / (p + 1) * (entr_br_vec - entr_dist_vec) + + other_player_fx_translated = payoff_matrices[1].dot(other_player_fx) + grad_dist = -policy_gradient + grad_dist += (num_players - 1) * other_player_fx_translated + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + grad_y = y - nabla + + _, lr_y = self.lrs + if (reg_exp < self.exp_thresh) and (anneal_steps >= 1 / lr_y): + self.p = np.clip(p / 2., 0., 1.) + grad_anneal_steps = -anneal_steps + else: + grad_anneal_steps = 1 + + return (grad_dist, grad_y, grad_anneal_steps), unreg_exp, reg_exp + + def cheap_gradients(self, random, dist, y, anneal_steps, payoff_matrices, + num_players, p=1, proj_grad=True): + """Computes exploitablity gradient and aux variable gradients with samples. + + This implementation takes payoff_matrices as input so technically uses + O(d^2) compute but only a single column of payoff_matrices is used to + perform the update so can be re-implemented in O(d) if needed. + + Args: + random: random number generator, np.random.RandomState(seed) + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + anneal_steps: int, elapsed num steps since last anneal + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is abbrev'd + p: float in [0, 1], Tsallis entropy-regularization --> 0 as p --> 0 + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y, anneal_steps) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + del anneal_steps + action_1 = random.choice(dist.size, p=dist) + nabla = payoff_matrices[0][:, action_1] + if p > 0: + power = 1. / float(p) + s = np.linalg.norm(y, ord=power) + if s == 0: + br = misc.uniform_dist(y) + else: + br = (y / s)**power + else: + power = np.inf + s = np.linalg.norm(y, ord=power) + br = np.zeros_like(dist) + maxima = (y == s) + br[maxima] = 1. / maxima.sum() + + unreg_exp = np.max(y) - y.dot(dist) + entr_br = s / (p + 1) * (1 - np.sum(br**(p + 1))) + entr_dist = s / (p + 1) * (1 - np.sum(dist**(p + 1))) + reg_exp = y.dot(br - dist) + entr_br - entr_dist + + entr_br_vec = (p + 1) / s * entr_br * br**(1 - p) + entr_dist_vec = (p + 1) / s * entr_dist * dist**(1 - p) + + policy_gradient = nabla - s * dist**p + other_player_fx = (br - dist) + 1 / (p + 1) * (entr_br_vec - entr_dist_vec) + + action_u = random.choice(dist.size) # uniform, ~importance sampling + other_player_fx = dist.size * other_player_fx[action_u] + other_player_fx_translat = payoff_matrices[1, :, action_u] * other_player_fx + grad_dist = -policy_gradient + (num_players - 1) * other_player_fx_translat + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + grad_y = y - nabla + + return (grad_dist, grad_y, None), unreg_exp, reg_exp + + def cheap_gradients_vr(self, random, dist, y, anneal_steps, payoff_matrices, + num_players, pm_vr, p=1, proj_grad=True, version=0): + """Computes exploitablity gradient and aux variable gradients with samples. + + This implementation takes payoff_matrices as input so technically uses + O(d^2) compute but only a single column of payoff_matrices is used to + perform the update so can be re-implemented in O(d) if needed. + + Args: + random: random number generator, np.random.RandomState(seed) + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + anneal_steps: int, elapsed num steps since last anneal + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is abbrev'd + pm_vr: approximate payoff_matrix for variance reduction + p: float in [0, 1], Tsallis entropy-regularization --> 0 as p --> 0 + proj_grad: bool, if True, projects dist gradient onto simplex + version: int, default 0, two options for variance reduction + Returns: + gradient of exploitability w.r.t. (dist, y, anneal_steps) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + del anneal_steps + if pm_vr is None: + raise ValueError("pm_vr must be np.array of shape (num_strats,) * 2") + if (not isinstance(version, int)) or (version < 0) or (version > 1): + raise ValueError("version must be non-negative int < 2") + + action_1 = random.choice(dist.size, p=dist) + nabla = payoff_matrices[0][:, action_1] + if p > 0: + power = 1. / float(p) + s = np.linalg.norm(y, ord=power) + if s == 0: + br = misc.uniform_dist(y) + else: + br = (y / s)**power + else: + power = np.inf + s = np.linalg.norm(y, ord=power) + br = np.zeros_like(dist) + maxima = (y == s) + br[maxima] = 1. / maxima.sum() + + unreg_exp = np.max(y) - y.dot(dist) + entr_br = s / (p + 1) * (1 - np.sum(br**(p + 1))) + entr_dist = s / (p + 1) * (1 - np.sum(dist**(p + 1))) + reg_exp = y.dot(br - dist) + entr_br - entr_dist + + entr_br_vec = (p + 1) / s * entr_br * br**(1 - p) + entr_dist_vec = (p + 1) / s * entr_dist * dist**(1 - p) + + policy_gradient = nabla - s * dist**p + other_player_fx = (br - dist) + 1 / (p + 1) * (entr_br_vec - entr_dist_vec) + + if version == 0: + other_player_fx_translated = pm_vr.dot(other_player_fx) + action_u = random.choice(dist.size) # uniform, ~importance sampling + other_player_fx = other_player_fx[action_u] + m = dist.size + pm_mod = m * (payoff_matrices[1, :, action_u] - pm_vr[:, action_u]) + other_player_fx_translated += pm_mod * other_player_fx + elif version == 1: + other_player_fx_translated = np.sum(pm_vr, axis=1) + action_u = random.choice(dist.size) # uniform, ~importance sampling + other_player_fx = other_player_fx[action_u] + pm_mod = dist.size * payoff_matrices[1, :, action_u] + r = dist.size * pm_vr[:, action_u] + other_player_fx_translated += pm_mod * other_player_fx - r + + grad_dist = -policy_gradient + grad_dist += (num_players - 1) * other_player_fx_translated + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + grad_y = y - nabla + + if version == 0: + pm_vr[:, action_u] = payoff_matrices[1, :, action_u] + elif version == 1: + pm_vr[:, action_u] = payoff_matrices[1, :, action_u] * other_player_fx + + return (grad_dist, grad_y, None), pm_vr, unreg_exp, reg_exp + + def euc_descent_step(self, params, grads, t): + """Projected gradient descent on exploitability using Euclidean projection. + + Args: + params: tuple of variables to be updated (dist, y, anneal_steps) + grads: tuple of variable gradients (grad_dist, grad_y, grad_anneal_steps) + t: int, solver iteration + Returns: + new_params: tuple of update params (new_dist, new_y, new_anneal_steps) + """ + lr_dist, lr_y = self.lrs + new_params = [params[0] - lr_dist * grads[0]] + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_params += [params[1] - lr_y * grads[1]] + new_params = euc_project(*new_params) + new_params += (params[2] + grads[2],) + return new_params + + def mirror_descent_step(self, params, grads, t): + """Entropic mirror descent on exploitability. + + Args: + params: tuple of variables to be updated (dist, y, anneal_steps) + grads: tuple of variable gradients (grad_dist, grad_y, grad_anneal_steps) + t: int, solver iteration + Returns: + new_params: tuple of update params (new_dist, new_y, new_anneal_steps) + """ + lr_dist, lr_y = self.lrs + new_params = [np.log(np.clip(params[0], 0, np.inf)) - lr_dist * grads[0]] + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_params += [params[1] - lr_y * grads[1]] + new_params = mirror_project(*new_params) + new_params += (params[2] + grads[2],) + return new_params + + +def euc_project(dist, y): + """Project variables onto their feasible sets (euclidean proj for dist). + + Args: + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + Returns: + projected variables (dist, y) as tuple + """ + dist = simplex.euclidean_projection_onto_simplex(dist) + y = np.clip(y, 0., np.inf) + + return dist, y + + +def mirror_project(dist, y): + """Project variables onto their feasible sets (softmax for dist). + + Args: + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + Returns: + projected variables (dist, y) as tuple + """ + dist = special.softmax(dist) + y = np.clip(y, 0., np.inf) + + return dist, y diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/ped.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/ped.py new file mode 100644 index 0000000..17151d9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/ped.py @@ -0,0 +1,76 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Population Exploitability Descent (PED) Stochastic Approx. Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.symmetric import updates + + +class Solver(updates.Solver): + """PED Solver.""" + + def __init__(self, proj_grad=True, euclidean=False, lrs=(1e-1,), + rnd_init=False, seed=None, **kwargs): + """Ctor.""" + del kwargs + super().__init__(proj_grad, euclidean, rnd_init, seed) + self.lrs = lrs + + def compute_gradients(self, params, payoff_matrices): + """Compute and return exploitability. + + Args: + params: tuple of params (dist,), see ped.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability of current dist + unregularized exploitability (stochastic estimate) + unregularized exploitability (stochastic estimate) *duplicate + """ + return gradients(*params, payoff_matrices, self.num_players, self.proj_grad) + + +def gradients(dist, payoff_matrices, num_players, proj_grad=True): + """Computes exploitablity gradient. + + Args: + dist: 1-d np.array, current estimate of nash distribution + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is abbreviated + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist) as tuple + unregularized exploitability (stochastic estimate) + unregularized exploitability (stochastic estimate) *duplicate + """ + nabla = payoff_matrices[0].dot(dist) + + power = np.inf + s = np.linalg.norm(nabla, ord=power) + br = np.zeros_like(dist) + maxima = (nabla == s) + br[maxima] = 1. / maxima.sum() + + unreg_exp = np.max(nabla) - nabla.dot(dist) + + grad_dist = -(nabla) + (num_players - 1) * payoff_matrices[1].dot(br - dist) + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + + return (grad_dist,), unreg_exp, unreg_exp diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/pg.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/pg.py new file mode 100644 index 0000000..852046a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/pg.py @@ -0,0 +1,80 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Policy Gradient (PG).""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.symmetric import updates + + +class Solver(updates.Solver): + """PG Solver.""" + + def __init__(self, proj_grad=True, euclidean=False, lrs=(1e-1,), + rnd_init=False, seed=None, **kwargs): + """Ctor.""" + del kwargs + super().__init__(proj_grad, euclidean, rnd_init, seed) + self.lrs = lrs + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients for all parameters. + + Args: + params: tuple of params (dist,), see pg.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + tuple of gradients (grad_dist,), see pg.gradients + unregularized exploitability (stochastic estimate) + unregularized exploitability (stochastic estimate) *duplicate + """ + return gradients(*params, payoff_matrices, self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Policy gradient does not minimize any exploitability so return NaN. + + Args: + params: tuple of params (dist,) + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + np.NaN + """ + return np.nan + + +def gradients(dist, payoff_matrices, proj_grad=True): + """Computes exploitablity gradient. + + Args: + dist: 1-d np.array, current estimate of nash distribution + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of payoff w.r.t. (dist) as tuple + unregularized exploitability (stochastic estimate) + unregularized exploitability (stochastic estimate) *duplicate + """ + nabla = payoff_matrices[0].dot(dist) + + unreg_exp = np.max(nabla) - nabla.dot(dist) + + grad_dist = -nabla + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + + return (grad_dist,), unreg_exp, unreg_exp diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/qre.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/qre.py new file mode 100644 index 0000000..4db1e26 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/qre.py @@ -0,0 +1,369 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Quantal Response Equilibrium (QRE) Stochastic Approximate Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np +from scipy import special + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.symmetric import exploitability as exp + + +class Solver(object): + """QRE Solver.""" + + def __init__(self, temperature=0., proj_grad=True, euclidean=False, + cheap=False, lrs=(1e-2, 1e-1), vr=True, rnd_init=False, + seed=None, **kwargs): + """Ctor.""" + del kwargs + if temperature < 0.: + raise ValueError("temperature must be non-negative") + self.num_players = None + self.temperature = temperature + self.proj_grad = proj_grad + self.cheap = cheap + self.vr = vr + self.pm_vr = None + self.rnd_init = rnd_init + self.lrs = lrs + self.has_aux = True + self.aux_errors = [] + + self.euclidean = euclidean + if euclidean: + self.update = self.euc_descent_step + else: + self.update = self.mirror_descent_step + + self.seed = seed + self.random = np.random.RandomState(seed) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if self.rnd_init: + init_dist = self.random.rand(num_strats) + else: + init_dist = np.ones(num_strats) + init_dist /= init_dist.sum() + init_y = np.zeros(num_strats) + if self.cheap and self.vr: + self.pm_vr = np.zeros((num_strats, num_strats)) + return (init_dist, init_y) + + def record_aux_errors(self, grads): + """Record errors for the auxiliary variables.""" + grad_y = grads[1] + self.aux_errors.append([np.linalg.norm(grad_y)]) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients (and exploitabilities) for all parameters. + + Args: + params: tuple of params (dist, y), see qre.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + tuple of gradients (grad_dist, grad_y, grad_z), see qre.gradients + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + if self.cheap and self.vr: + grads, pm_vr, exp_sto, exp_solver_sto = cheap_gradients_vr( + self.random, *params, payoff_matrices, self.num_players, self.pm_vr, + self.temperature, self.proj_grad,) + self.pm_vr = pm_vr + return grads, exp_sto, exp_solver_sto + elif self.cheap and not self.vr: + return cheap_gradients(self.random, *params, payoff_matrices, + self.num_players, self.temperature, self.proj_grad) + else: + return gradients(*params, payoff_matrices, self.num_players, + self.temperature, self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Compute and return shannon entropy regularized exploitability. + + Args: + params: tuple of params (dist, y), see qre.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability of current dist + """ + return exp.qre_exploitability(params, payoff_matrices, self.temperature) + + def euc_descent_step(self, params, grads, t): + """Projected gradient descent on exploitability using Euclidean projection. + + Args: + params: tuple of variables to be updated (dist, y) + grads: tuple of variable gradients (grad_dist, grad_y) + t: int, solver iteration + Returns: + new_params: tuple of update params (new_dist, new_y) + """ + lr_dist, lr_y = self.lrs + new_params = [params[0] - lr_dist * grads[0]] + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_params += [params[1] - lr_y * grads[1]] + new_params = euc_project(*new_params) + return new_params + + def mirror_descent_step(self, params, grads, t): + """Entropic mirror descent on exploitability. + + Args: + params: tuple of variables to be updated (dist, y) + grads: tuple of variable gradients (grad_dist, grad_y) + t: int, solver iteration + Returns: + new_params: tuple of update params (new_dist, new_y) + """ + lr_dist, lr_y = self.lrs + new_params = [np.log(np.clip(params[0], 0, np.inf)) - lr_dist * grads[0]] + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_params += [params[1] - lr_y * grads[1]] + new_params = mirror_project(*new_params) + return new_params + + +def gradients(dist, y, payoff_matrices, num_players, temperature=0., + proj_grad=True): + """Computes exploitablity gradient and aux variable gradients. + + Args: + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + nabla = payoff_matrices[0].dot(dist) + if temperature > 0: + br = special.softmax(y / temperature) + br_policy_gradient = nabla - temperature * (np.log(br) + 1) + else: + power = np.inf + s = np.linalg.norm(y, ord=power) + br = np.zeros_like(dist) + maxima = (y == s) + br[maxima] = 1. / maxima.sum() + br_policy_gradient = np.zeros_like(br) + + unreg_exp = np.max(y) - y.dot(dist) + entr_br = temperature * special.entr(br).sum() + entr_dist = temperature * special.entr(dist).sum() + reg_exp = y.dot(br - dist) + entr_br - entr_dist + + policy_gradient = nabla + if temperature > 0: + policy_gradient -= temperature * (np.log(dist) + 1) + other_player_fx = (br - dist) + if temperature > 0: + # much faster to avoid constructing br_mat and then computing + # br_mat.dot(br_policy_gradient) -- instead, expand out and only compute + # inner products + temp = (br_policy_gradient - br.dot(br_policy_gradient)) + other_player_fx += br / temperature * temp + + other_player_fx_translated = payoff_matrices[1].dot(other_player_fx) + grad_dist = -policy_gradient + (num_players - 1) * other_player_fx_translated + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + grad_y = y - nabla + + return (grad_dist, grad_y), unreg_exp, reg_exp + + +def cheap_gradients(random, dist, y, payoff_matrices, num_players, + temperature=0., proj_grad=True): + """Computes exploitablity gradient and aux variable gradients with samples. + + This implementation takes payoff_matrices as input so technically uses O(d^2) + compute but only a single column of payoff_matrices is used to perform the + update so can be re-implemented in O(d) if needed. + + Args: + random: random number generator, np.random.RandomState(seed) + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + action_1 = random.choice(dist.size, p=dist) + nabla = payoff_matrices[0][:, action_1] + if temperature > 0: + br = special.softmax(y / temperature) + br_policy_gradient = nabla - temperature * (np.log(br) + 1) + else: + power = np.inf + s = np.linalg.norm(y, ord=power) + br = np.zeros_like(dist) + maxima = (y == s) + br[maxima] = 1. / maxima.sum() + br_policy_gradient = np.zeros_like(br) + + unreg_exp = np.max(y) - y.dot(dist) + entr_br = temperature * special.entr(br).sum() + entr_dist = temperature * special.entr(dist).sum() + reg_exp = y.dot(br - dist) + entr_br - entr_dist + + policy_gradient = nabla + if temperature > 0: + policy_gradient -= temperature * (np.log(dist) + 1) + other_player_fx = (br - dist) + if temperature > 0: + # much faster to avoid constructing br_mat and then computing + # br_mat.dot(br_policy_gradient) -- instead, expand out and only compute + # inner products + temp = (br_policy_gradient - br.dot(br_policy_gradient)) + other_player_fx += br / temperature * temp + + action_u = random.choice(dist.size) # uniform, ~importance sampling + other_player_fx = dist.size * other_player_fx[action_u] + other_player_fx_translated = payoff_matrices[1, :, action_u] * other_player_fx + grad_dist = -policy_gradient + (num_players - 1) * other_player_fx_translated + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + grad_y = y - nabla + + return (grad_dist, grad_y), unreg_exp, reg_exp + + +def cheap_gradients_vr(random, dist, y, payoff_matrices, num_players, pm_vr, + temperature=0., proj_grad=True, version=0): + """Computes exploitablity gradient and aux variable gradients with samples. + + This implementation takes payoff_matrices as input so technically uses O(d^2) + compute but only a single column of payoff_matrices is used to perform the + update so can be re-implemented in O(d) if needed. + + Args: + random: random number generator, np.random.RandomState(seed) + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is abbreviated + pm_vr: approximate payoff_matrix for variance reduction + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + version: int, default 0, two options for variance reduction + Returns: + gradient of exploitability w.r.t. (dist, y) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + if pm_vr is None: + raise ValueError("pm_vr must be np.array of shape (num_strats, num_strats)") + if (not isinstance(version, int)) or (version < 0) or (version > 1): + raise ValueError("version must be non-negative int < 2") + + action_1 = random.choice(dist.size, p=dist) + nabla = payoff_matrices[0][:, action_1] + if temperature > 0: + br = special.softmax(y / temperature) + br_policy_gradient = nabla - temperature * (np.log(br) + 1) + else: + power = np.inf + s = np.linalg.norm(y, ord=power) + br = np.zeros_like(dist) + maxima = (y == s) + br[maxima] = 1. / maxima.sum() + br_policy_gradient = np.zeros_like(br) + + unreg_exp = np.max(y) - y.dot(dist) + entr_br = temperature * special.entr(br).sum() + entr_dist = temperature * special.entr(dist).sum() + reg_exp = y.dot(br - dist) + entr_br - entr_dist + + policy_gradient = nabla + if temperature > 0: + policy_gradient -= temperature * (np.log(dist) + 1) + other_player_fx = (br - dist) + if temperature > 0: + # much faster to avoid constructing br_mat and then computing + # br_mat.dot(br_policy_gradient) -- instead, expand out and only compute + # inner products + temp = (br_policy_gradient - br.dot(br_policy_gradient)) + other_player_fx += br / temperature * temp + + if version == 0: + other_player_fx_translated = pm_vr.dot(other_player_fx) + action_u = random.choice(dist.size) # uniform, ~importance sampling + other_player_fx = other_player_fx[action_u] + pm_mod = dist.size * (payoff_matrices[1, :, action_u] - pm_vr[:, action_u]) + other_player_fx_translated += pm_mod * other_player_fx + elif version == 1: + other_player_fx_translated = np.sum(pm_vr, axis=1) + action_u = random.choice(dist.size) # uniform, ~importance sampling + other_player_fx = other_player_fx[action_u] + pm_mod = dist.size * payoff_matrices[1, :, action_u] + r = dist.size * pm_vr[:, action_u] + other_player_fx_translated += pm_mod * other_player_fx - r + + grad_dist = -policy_gradient + (num_players - 1) * other_player_fx_translated + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + grad_y = y - nabla + + if version == 0: + pm_vr[:, action_u] = payoff_matrices[1, :, action_u] + elif version == 1: + pm_vr[:, action_u] = payoff_matrices[1, :, action_u] * other_player_fx + + return (grad_dist, grad_y), pm_vr, unreg_exp, reg_exp + + +def euc_project(dist, y): + """Project variables onto their feasible sets (euclidean proj for dist). + + Args: + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + Returns: + projected variables (dist, y) as tuple + """ + dist = simplex.euclidean_projection_onto_simplex(dist) + y = np.clip(y, 0., np.inf) + + return dist, y + + +def mirror_project(dist, y): + """Project variables onto their feasible sets (softmax for dist). + + Args: + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + Returns: + projected variables (dist, y) as tuple + """ + dist = special.softmax(dist) + y = np.clip(y, 0., np.inf) + + return dist, y diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/qre_anneal.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/qre_anneal.py new file mode 100644 index 0000000..84899fb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/qre_anneal.py @@ -0,0 +1,390 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Quantal Response Equilibrium (QRE) Stochastic Approximate Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np +from scipy import special + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.symmetric import exploitability as exp + + +class Solver(object): + """QRE Solver.""" + + def __init__(self, temperature=1., proj_grad=True, euclidean=False, + cheap=False, lrs=(1e-2, 1e-1), exp_thresh=-1., vr=True, + rnd_init=False, seed=None, **kwargs): + """Ctor.""" + del kwargs + if temperature < 0.: + raise ValueError("temperature must be non-negative") + self.num_players = None + self.temperature = temperature + self.proj_grad = proj_grad + self.cheap = cheap + self.vr = vr + self.pm_vr = None + self.rnd_init = rnd_init + self.lrs = lrs + self.exp_thresh = exp_thresh + self.has_aux = True + self.aux_errors = [] + + self.euclidean = euclidean + if euclidean: + self.update = self.euc_descent_step + else: + self.update = self.mirror_descent_step + + self.seed = seed + self.random = np.random.RandomState(seed) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if self.rnd_init: + init_dist = self.random.rand(num_strats) + else: + init_dist = np.ones(num_strats) + init_dist /= init_dist.sum() + init_y = np.zeros(num_strats) + init_anneal_steps = 0 + if self.cheap and self.vr: + self.pm_vr = np.zeros((num_strats, num_strats)) + return (init_dist, init_y, init_anneal_steps) + + def record_aux_errors(self, grads): + """Record errors for the auxiliary variables.""" + grad_y = grads[1] + self.aux_errors.append([np.linalg.norm(grad_y)]) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients (and exploitabilities) for all parameters. + + Args: + params: tuple of params (dist, y, anneal_steps), see gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + tuple of gradients (grad_dist, grad_y, grad_anneal_steps), see gradients + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + if self.cheap and self.vr: + grads, pm_vr, exp_sto, exp_solver_sto = self.cheap_gradients_vr( + self.random, *params, payoff_matrices, self.num_players, self.pm_vr, + self.temperature, self.proj_grad,) + self.pm_vr = pm_vr + return grads, exp_sto, exp_solver_sto + elif self.cheap and not self.vr: + return self.cheap_gradients(self.random, *params, payoff_matrices, + self.num_players, self.temperature, + self.proj_grad) + else: + return self.gradients(*params, payoff_matrices, self.num_players, + self.temperature, self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Compute and return shannon entropy regularized exploitability. + + Args: + params: tuple of params (dist, y), see qre.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability of current dist + """ + return exp.qre_exploitability(params, payoff_matrices, self.temperature) + + def gradients(self, dist, y, anneal_steps, payoff_matrices, num_players, + temperature=0., proj_grad=True): + """Computes exploitablity gradient and aux variable gradients. + + Args: + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + anneal_steps: int, elapsed num steps since last anneal + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is + abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y, anneal_steps) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + nabla = payoff_matrices[0].dot(dist) + if temperature >= 1e-3: + br = special.softmax(y / temperature) + log_br_safe = np.clip(np.log(br), -1e5, 0) + br_policy_gradient = nabla - temperature * (log_br_safe + 1) + else: + power = np.inf + s = np.linalg.norm(y, ord=power) + br = np.zeros_like(dist) + maxima = (y >= s) + br[maxima] = 1. / maxima.sum() + br_policy_gradient = np.zeros_like(br) + + unreg_exp = np.max(y) - y.dot(dist) + entr_br = temperature * special.entr(br).sum() + entr_dist = temperature * special.entr(dist).sum() + reg_exp = y.dot(br - dist) + entr_br - entr_dist + + policy_gradient = np.array(nabla) + if temperature > 0: + log_dist_safe = np.clip(np.log(dist), -1e5, 0) + policy_gradient -= temperature * (log_dist_safe + 1) + other_player_fx = (br - dist) + if temperature >= 1e-3: + # much faster to avoid constructing br_mat and then computing + # br_mat.dot(br_policy_gradient) -- instead, expand out and only compute + # inner products + temp = (br_policy_gradient - br.dot(br_policy_gradient)) + other_player_fx += br / temperature * temp + + other_player_fx_translated = payoff_matrices[1].dot(other_player_fx) + grad_dist = -policy_gradient + grad_dist += (num_players - 1) * other_player_fx_translated + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + grad_y = y - nabla + + _, lr_y = self.lrs + if (reg_exp < self.exp_thresh) and (anneal_steps >= 1 / lr_y): + self.temperature = np.clip(temperature / 2., 0., np.inf) + if self.temperature < 1e-3: + self.temperature = 0. + grad_anneal_steps = -anneal_steps + else: + grad_anneal_steps = 1 + + return (grad_dist, grad_y, grad_anneal_steps), unreg_exp, reg_exp + + def cheap_gradients(self, random, dist, y, anneal_steps, payoff_matrices, + num_players, temperature=0., proj_grad=True): + """Computes exploitablity gradient and aux variable gradients with samples. + + This implementation takes payoff_matrices as input so technically uses + O(d^2) compute but only a single column of payoff_matrices is used to + perform the update so can be re-implemented in O(d) if needed. + + Args: + random: random number generator, np.random.RandomState(seed) + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + anneal_steps: int, elapsed num steps since last anneal + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is + abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, y, anneal_steps) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + del anneal_steps + action_1 = random.choice(dist.size, p=dist) + nabla = payoff_matrices[0][:, action_1] + if temperature >= 1e-3: + br = special.softmax(y / temperature) + br_policy_gradient = nabla - temperature * (np.log(br) + 1) + else: + power = np.inf + s = np.linalg.norm(y, ord=power) + br = np.zeros_like(dist) + maxima = (y == s) + br[maxima] = 1. / maxima.sum() + br_policy_gradient = np.zeros_like(br) + + unreg_exp = np.max(y) - y.dot(dist) + entr_br = temperature * special.entr(br).sum() + entr_dist = temperature * special.entr(dist).sum() + reg_exp = y.dot(br - dist) + entr_br - entr_dist + + policy_gradient = nabla - temperature * (np.log(dist) + 1) + other_player_fx = (br - dist) + if temperature >= 1e-3: + # much faster to avoid constructing br_mat and then computing + # br_mat.dot(br_policy_gradient) -- instead, expand out and only compute + # inner products + temp = (br_policy_gradient - br.dot(br_policy_gradient)) + other_player_fx += br / temperature * temp + + action_u = random.choice(dist.size) # uniform, ~importance sampling + other_player_fx = dist.size * other_player_fx[action_u] + other_player_fx_translat = payoff_matrices[1, :, action_u] * other_player_fx + grad_dist = -policy_gradient + (num_players - 1) * other_player_fx_translat + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + grad_y = y - nabla + + return (grad_dist, grad_y, None), unreg_exp, reg_exp + + def cheap_gradients_vr(self, random, dist, y, anneal_steps, payoff_matrices, + num_players, pm_vr, temperature=0., proj_grad=True, + version=0): + """Computes exploitablity gradient and aux variable gradients with samples. + + This implementation takes payoff_matrices as input so technically uses + O(d^2) compute but only a single column of payoff_matrices is used to + perform the update so can be re-implemented in O(d) if needed. + + Args: + random: random number generator, np.random.RandomState(seed) + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + anneal_steps: int, elapsed num steps since last anneal + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is + abbreviated + pm_vr: approximate payoff_matrix for variance reduction + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + version: int, default 0, two options for variance reduction + Returns: + gradient of exploitability w.r.t. (dist, y, anneal_steps) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + del anneal_steps + if pm_vr is None: + raise ValueError("pm_vr must be np.array of shape (num_strats,) * 2") + if (not isinstance(version, int)) or (version < 0) or (version > 1): + raise ValueError("version must be non-negative int < 2") + + action_1 = random.choice(dist.size, p=dist) + nabla = payoff_matrices[0][:, action_1] + if temperature >= 1e-3: + br = special.softmax(y / temperature) + br_policy_gradient = nabla - temperature * (np.log(br) + 1) + else: + power = np.inf + s = np.linalg.norm(y, ord=power) + br = np.zeros_like(dist) + maxima = (y == s) + br[maxima] = 1. / maxima.sum() + br_policy_gradient = np.zeros_like(br) + + unreg_exp = np.max(y) - y.dot(dist) + entr_br = temperature * special.entr(br).sum() + entr_dist = temperature * special.entr(dist).sum() + reg_exp = y.dot(br - dist) + entr_br - entr_dist + + policy_gradient = nabla - temperature * (np.log(dist) + 1) + other_player_fx = (br - dist) + if temperature >= 1e-3: + # much faster to avoid constructing br_mat and then computing + # br_mat.dot(br_policy_gradient) -- instead, expand out and only compute + # inner products + temp = (br_policy_gradient - br.dot(br_policy_gradient)) + other_player_fx += br / temperature * temp + + if version == 0: + other_player_fx_translated = pm_vr.dot(other_player_fx) + action_u = random.choice(dist.size) # uniform, ~importance sampling + other_player_fx = other_player_fx[action_u] + m = dist.size + pm_mod = m * (payoff_matrices[1, :, action_u] - pm_vr[:, action_u]) + other_player_fx_translated += pm_mod * other_player_fx + elif version == 1: + other_player_fx_translated = np.sum(pm_vr, axis=1) + action_u = random.choice(dist.size) # uniform, ~importance sampling + other_player_fx = other_player_fx[action_u] + pm_mod = dist.size * payoff_matrices[1, :, action_u] + r = dist.size * pm_vr[:, action_u] + other_player_fx_translated += pm_mod * other_player_fx - r + + grad_dist = -policy_gradient + grad_dist += (num_players - 1) * other_player_fx_translated + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + grad_y = y - nabla + + if version == 0: + pm_vr[:, action_u] = payoff_matrices[1, :, action_u] + elif version == 1: + pm_vr[:, action_u] = payoff_matrices[1, :, action_u] * other_player_fx + + return (grad_dist, grad_y, None), pm_vr, unreg_exp, reg_exp + + def euc_descent_step(self, params, grads, t): + """Projected gradient descent on exploitability using Euclidean projection. + + Args: + params: tuple of variables to be updated (dist, y, anneal_steps) + grads: tuple of variable gradients (grad_dist, grad_y, grad_anneal_steps) + t: int, solver iteration + Returns: + new_params: tuple of update params (new_dist, new_y, new_anneal_steps) + """ + lr_dist, lr_y = self.lrs + new_params = [params[0] - lr_dist * grads[0]] + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_params += [params[1] - lr_y * grads[1]] + new_params = euc_project(*new_params) + new_params += (params[2] + grads[2],) + return new_params + + def mirror_descent_step(self, params, grads, t): + """Entropic mirror descent on exploitability. + + Args: + params: tuple of variables to be updated (dist, y, anneal_steps) + grads: tuple of variable gradients (grad_dist, grad_y, grad_anneal_steps) + t: int, solver iteration + Returns: + new_params: tuple of update params (new_dist, new_y, new_anneal_steps) + """ + lr_dist, lr_y = self.lrs + new_params = [np.log(np.clip(params[0], 0, np.inf)) - lr_dist * grads[0]] + lr_y = np.clip(1 / float(t + 1), lr_y, np.inf) + new_params += [params[1] - lr_y * grads[1]] + new_params = mirror_project(*new_params) + new_params += (params[2] + grads[2],) + return new_params + + +def euc_project(dist, y): + """Project variables onto their feasible sets (euclidean proj for dist). + + Args: + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + Returns: + projected variables (dist, y) as tuple + """ + dist = simplex.euclidean_projection_onto_simplex(dist) + y = np.clip(y, 0., np.inf) + + return dist, y + + +def mirror_project(dist, y): + """Project variables onto their feasible sets (softmax for dist). + + Args: + dist: 1-d np.array, current estimate of nash distribution + y: 1-d np.array (same shape as dist), current estimate of payoff gradient + Returns: + projected variables (dist, y) as tuple + """ + dist = special.softmax(dist) + y = np.clip(y, 0., np.inf) + + return dist, y diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/qre_anneal_noaux.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/qre_anneal_noaux.py new file mode 100644 index 0000000..d8a6235 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/qre_anneal_noaux.py @@ -0,0 +1,358 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Quantal Response Equilibrium (QRE) Stochastic Approximate Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np +from scipy import special + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.symmetric import exploitability as exp + + +class Solver(object): + """QRE Solver without auxiliary y variable.""" + + def __init__(self, temperature=1., proj_grad=True, euclidean=False, + cheap=False, lrs=(1e-2,), exp_thresh=-1., vr=True, + rnd_init=False, seed=None, **kwargs): + """Ctor.""" + del kwargs + if temperature < 0.: + raise ValueError("temperature must be non-negative") + self.num_players = None + self.temperature = temperature + self.proj_grad = proj_grad + self.cheap = cheap + self.vr = vr + self.pm_vr = None + self.rnd_init = rnd_init + self.lrs = lrs + self.exp_thresh = exp_thresh + self.has_aux = False + + self.euclidean = euclidean + if euclidean: + self.update = self.euc_descent_step + else: + self.update = self.mirror_descent_step + + self.seed = seed + self.random = np.random.RandomState(seed) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if self.rnd_init: + init_dist = self.random.rand(num_strats) + else: + init_dist = np.ones(num_strats) + init_dist /= init_dist.sum() + init_anneal_steps = 0 + if self.cheap and self.vr: + self.pm_vr = np.zeros((num_strats, num_strats)) + return (init_dist, init_anneal_steps) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients (and exploitabilities) for all parameters. + + Args: + params: tuple of params (dist, anneal_steps), see gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + tuple of gradients (grad_dist, grad_anneal_steps), see gradients + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + if self.cheap and self.vr: + grads, pm_vr, exp_sto, exp_solver_sto = self.cheap_gradients_vr( + self.random, *params, payoff_matrices, self.num_players, self.pm_vr, + self.temperature, self.proj_grad,) + self.pm_vr = pm_vr + return grads, exp_sto, exp_solver_sto + elif self.cheap and not self.vr: + return self.cheap_gradients(self.random, *params, payoff_matrices, + self.num_players, self.temperature, + self.proj_grad) + else: + return self.gradients(*params, payoff_matrices, self.num_players, + self.temperature, self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Compute and return shannon entropy regularized exploitability. + + Args: + params: tuple of params (dist, y), see qre.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability of current dist + """ + return exp.qre_exploitability(params, payoff_matrices, self.temperature) + + def gradients(self, dist, anneal_steps, payoff_matrices, num_players, + temperature=0., proj_grad=True): + """Computes exploitablity gradient and aux variable gradients. + + Args: + dist: 1-d np.array, current estimate of nash distribution + anneal_steps: int, elapsed num steps since last anneal + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is + abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, anneal_steps) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + y = nabla = payoff_matrices[0].dot(dist) + if temperature >= 1e-3: + br = special.softmax(y / temperature) + br_mat = (np.diag(br) - np.outer(br, br)) / temperature + log_br_safe = np.clip(np.log(br), -1e5, 0) + br_policy_gradient = nabla - temperature * (log_br_safe + 1) + else: + power = np.inf + s = np.linalg.norm(y, ord=power) + br = np.zeros_like(dist) + maxima = (y == s) + br[maxima] = 1. / maxima.sum() + br_mat = np.zeros((br.size, br.size)) + br_policy_gradient = np.zeros_like(br) + + unreg_exp = np.max(y) - y.dot(dist) + entr_br = temperature * special.entr(br).sum() + entr_dist = temperature * special.entr(dist).sum() + reg_exp = y.dot(br - dist) + entr_br - entr_dist + + policy_gradient = np.array(nabla) + if temperature > 0: + log_dist_safe = np.clip(np.log(dist), -1e5, 0) + policy_gradient -= temperature * (log_dist_safe + 1) + other_player_fx = (br - dist) + br_mat.dot(br_policy_gradient) + + other_player_fx_translated = payoff_matrices[1].dot(other_player_fx) + grad_dist = -policy_gradient + grad_dist += (num_players - 1) * other_player_fx_translated + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + + if reg_exp < self.exp_thresh: + self.temperature = np.clip(temperature / 2., 0., np.inf) + if self.temperature < 1e-3: + self.temperature = 0. + grad_anneal_steps = -anneal_steps + else: + grad_anneal_steps = 1 + + return (grad_dist, grad_anneal_steps), unreg_exp, reg_exp + + def cheap_gradients(self, random, dist, anneal_steps, payoff_matrices, + num_players, temperature=0., proj_grad=True): + """Computes exploitablity gradient and aux variable gradients with samples. + + This implementation takes payoff_matrices as input so technically uses + O(d^2) compute but only a single column of payoff_matrices is used to + perform the update so can be re-implemented in O(d) if needed. + + Args: + random: random number generator, np.random.RandomState(seed) + dist: 1-d np.array, current estimate of nash distribution + anneal_steps: int, elapsed num steps since last anneal + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is + abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist, anneal_steps) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + del anneal_steps + action_1 = random.choice(dist.size, p=dist) + y = nabla = payoff_matrices[0][:, action_1] + if temperature >= 1e-3: + br = special.softmax(y / temperature) + br_mat = (np.diag(br) - np.outer(br, br)) / temperature + br_policy_gradient = nabla - temperature * (np.log(br) + 1) + else: + power = np.inf + s = np.linalg.norm(y, ord=power) + br = np.zeros_like(dist) + maxima = (y == s) + br[maxima] = 1. / maxima.sum() + br_mat = np.zeros((br.size, br.size)) + br_policy_gradient = np.zeros_like(br) + + unreg_exp = np.max(y) - y.dot(dist) + entr_br = temperature * special.entr(br).sum() + entr_dist = temperature * special.entr(dist).sum() + reg_exp = y.dot(br - dist) + entr_br - entr_dist + + policy_gradient = nabla - temperature * (np.log(dist) + 1) + other_player_fx = (br - dist) + br_mat.dot(br_policy_gradient) + + action_u = random.choice(dist.size) # uniform, ~importance sampling + other_player_fx = dist.size * other_player_fx[action_u] + other_player_fx_translat = payoff_matrices[1, :, action_u] * other_player_fx + grad_dist = -policy_gradient + (num_players - 1) * other_player_fx_translat + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + + return (grad_dist, None), unreg_exp, reg_exp + + def cheap_gradients_vr(self, random, dist, anneal_steps, payoff_matrices, + num_players, pm_vr, temperature=0., proj_grad=True, + version=0): + """Computes exploitablity gradient and aux variable gradients with samples. + + This implementation takes payoff_matrices as input so technically uses + O(d^2) compute but only a single column of payoff_matrices is used to + perform the update so can be re-implemented in O(d) if needed. + + Args: + random: random number generator, np.random.RandomState(seed) + dist: 1-d np.array, current estimate of nash distribution + anneal_steps: int, elapsed num steps since last anneal + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is + abbreviated + pm_vr: approximate payoff_matrix for variance reduction + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + version: int, default 0, two options for variance reduction + Returns: + gradient of exploitability w.r.t. (dist, anneal_steps) as tuple + unregularized exploitability (stochastic estimate) + tsallis regularized exploitability (stochastic estimate) + """ + del anneal_steps + if pm_vr is None: + raise ValueError("pm_vr must be np.array of shape (num_strats,) * 2") + if (not isinstance(version, int)) or (version < 0) or (version > 1): + raise ValueError("version must be non-negative int < 2") + + action_1 = random.choice(dist.size, p=dist) + y = nabla = payoff_matrices[0][:, action_1] + if temperature >= 1e-3: + br = special.softmax(y / temperature) + br_mat = (np.diag(br) - np.outer(br, br)) / temperature + br_policy_gradient = nabla - temperature * (np.log(br) + 1) + else: + power = np.inf + s = np.linalg.norm(y, ord=power) + br = np.zeros_like(dist) + maxima = (y == s) + br[maxima] = 1. / maxima.sum() + br_mat = np.zeros((br.size, br.size)) + br_policy_gradient = np.zeros_like(br) + + unreg_exp = np.max(y) - y.dot(dist) + entr_br = temperature * special.entr(br).sum() + entr_dist = temperature * special.entr(dist).sum() + reg_exp = y.dot(br - dist) + entr_br - entr_dist + + policy_gradient = nabla - temperature * (np.log(dist) + 1) + other_player_fx = (br - dist) + br_mat.dot(br_policy_gradient) + + if version == 0: + other_player_fx_translated = pm_vr.dot(other_player_fx) + action_u = random.choice(dist.size) # uniform, ~importance sampling + other_player_fx = other_player_fx[action_u] + m = dist.size + pm_mod = m * (payoff_matrices[1, :, action_u] - pm_vr[:, action_u]) + other_player_fx_translated += pm_mod * other_player_fx + elif version == 1: + other_player_fx_translated = np.sum(pm_vr, axis=1) + action_u = random.choice(dist.size) # uniform, ~importance sampling + other_player_fx = other_player_fx[action_u] + pm_mod = dist.size * payoff_matrices[1, :, action_u] + r = dist.size * pm_vr[:, action_u] + other_player_fx_translated += pm_mod * other_player_fx - r + + grad_dist = -policy_gradient + grad_dist += (num_players - 1) * other_player_fx_translated + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + + if version == 0: + pm_vr[:, action_u] = payoff_matrices[1, :, action_u] + elif version == 1: + pm_vr[:, action_u] = payoff_matrices[1, :, action_u] * other_player_fx + + return (grad_dist, None), pm_vr, unreg_exp, reg_exp + + def euc_descent_step(self, params, grads, t): + """Projected gradient descent on exploitability using Euclidean projection. + + Args: + params: tuple of variables to be updated (dist, anneal_steps) + grads: tuple of variable gradients (grad_dist, grad_anneal_steps) + t: int, solver iteration + Returns: + new_params: tuple of update params (new_dist, new_anneal_steps) + """ + del t + lr_dist = self.lrs[0] + new_params = [params[0] - lr_dist * grads[0]] + new_params = euc_project(*new_params) + new_params += (params[1] + grads[1],) + return new_params + + def mirror_descent_step(self, params, grads, t): + """Entropic mirror descent on exploitability. + + Args: + params: tuple of variables to be updated (dist, anneal_steps) + grads: tuple of variable gradients (grad_dist, grad_anneal_steps) + t: int, solver iteration + Returns: + new_params: tuple of update params (new_dist, new_anneal_steps) + """ + del t + lr_dist = self.lrs[0] + new_params = [np.log(np.clip(params[0], 0, np.inf)) - lr_dist * grads[0]] + new_params = mirror_project(*new_params) + new_params += (params[1] + grads[1],) + return new_params + + +def euc_project(dist): + """Project variables onto their feasible sets (euclidean proj for dist). + + Args: + dist: 1-d np.array, current estimate of nash distribution + Returns: + projected variables (dist,) as tuple + """ + dist = simplex.euclidean_projection_onto_simplex(dist) + + return (dist,) + + +def mirror_project(dist): + """Project variables onto their feasible sets (softmax for dist). + + Args: + dist: 1-d np.array, current estimate of nash distribution + Returns: + projected variables (dist,) as tuple + """ + dist = special.softmax(dist) + + return (dist,) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/regmatch.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/regmatch.py new file mode 100644 index 0000000..5885f11 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/regmatch.py @@ -0,0 +1,132 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Regret Matching Approximate Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np + + +class Solver(object): + """Regret-matching Solver.""" + + def __init__(self, optimism=True, discount=False, rnd_init=False, seed=None, + **kwargs): + """Ctor.""" + del kwargs + self.num_players = None + self.lrs = None + self.optimism = optimism + self.discount = discount + self.rnd_init = rnd_init + self.has_aux = True + self.aux_errors = [] + + self.seed = seed + self.random = np.random.RandomState(seed) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if self.rnd_init: + init_dist = self.random.rand(num_strats) + else: + init_dist = np.ones(num_strats) + init_dist /= init_dist.sum() + init_regret = np.zeros(num_strats) + return (init_dist, init_regret) + + def record_aux_errors(self, grads): + """Record errors for the auxiliary variables.""" + grad_regret = grads[1] + self.aux_errors.append([np.linalg.norm(grad_regret)]) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return gradients (and exploitabilities) for all parameters. + + Args: + params: tuple of params (dist, regret), see regmatch.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + tuple of gradients (grad_dist, grad_regret), see ate.gradients + unregularized exploitability (stochastic estimate) + solver exploitability (stochastic estimate) - NaN + """ + return gradients(*params, payoff_matrices) + + def exploitability(self, params, payoff_matrices): + """Regret matching does not minimize any exploitability so return NaN. + + Args: + params: tuple of params (dist,) + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + np.NaN + """ + del params + del payoff_matrices + return np.nan + + def update(self, params, grads, t): + """Update cumulative regret and strategy (dist). + + Args: + params: tuple of variables to be updated (dist, regret) + grads: tuple of variable gradients (grad_dist, grad_regret) + t: int, solver iteration (not used) + Returns: + new_params: tuple of update params (new_dist, new_regret) + """ + dist, regret = params + regret_delta = grads[1] + if self.discount: + gamma = t / float(t + 1) + else: + gamma = 1 + new_regret = gamma * regret + regret_delta + new_clipped_regrets = np.clip(new_regret + self.optimism * regret_delta, + 0., + np.inf) + if np.sum(new_clipped_regrets) > 0: + new_dist = new_clipped_regrets / new_clipped_regrets.sum() + else: + new_dist = np.ones_like(dist) / dist.size + new_params = (new_dist, new_regret) + return new_params + + +def gradients(dist, regret, payoff_matrices): + """Computes regret delta to be added to regret in update. + + Args: + dist: 1-d np.array, current estimate of nash distribution + regret: 1-d np.array (same shape as dist), current estimate of regrets + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + deltas w.r.t. (dist, regret) as tuple + unregularized exploitability (stochastic estimate) + solver exploitability (stochastic estimate) - NaN + """ + del regret + + nabla = payoff_matrices[0].dot(dist) + utility = nabla.dot(dist) + + grad_dist = np.nan * np.ones_like(dist) + grad_regret = nabla - utility + + unreg_exp = np.max(nabla) - nabla.dot(dist) + + return (grad_dist, grad_regret), unreg_exp, np.nan diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/sgd.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/sgd.py new file mode 100644 index 0000000..941bb50 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/sgd.py @@ -0,0 +1,138 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Stochastic Gradient Descent (SGD) Approx. Nash Solver.""" + +from absl import logging # pylint:disable=unused-import + +import numpy as np + +from open_spiel.python.algorithms.adidas_utils.helpers import simplex +from open_spiel.python.algorithms.adidas_utils.helpers.symmetric import exploitability as exp +from open_spiel.python.algorithms.adidas_utils.helpers.symmetric import updates + + +class Solver(updates.Solver): + """SGD Solver.""" + + def __init__(self, temperature=0., proj_grad=True, euclidean=False, + lrs=(1e-1,), rnd_init=False, seed=None, **kwargs): + """Ctor.""" + del kwargs + super().__init__(proj_grad, euclidean, rnd_init, seed) + if temperature < 0.: + raise ValueError('temperature must be non-negative') + self.temperature = temperature + self.lrs = lrs + self.num_estimates = 2 + + if temperature > 0: + self.eps = np.exp(-1 / temperature) # ensure dist[i] >= eps / dim(dist) + else: + self.eps = 0. + if euclidean: + self.update = lambda *args: self.euc_descent_step(*args, eps=self.eps) + else: + self.update = lambda *args: self.mirror_descent_step(*args, eps=self.eps) + + def init_vars(self, num_strats, num_players): + """Initialize solver parameters.""" + self.num_players = num_players + if self.rnd_init: + init_dist = self.random.rand(num_strats) + else: + init_dist = np.ones(num_strats) + init_dist /= init_dist.sum() + init_dist = simplex.project_to_interior(init_dist, self.eps) + return (init_dist,) + + def compute_gradients(self, params, payoff_matrices): + """Compute and return exploitability. + + Args: + params: tuple of params (dist,), see sgd.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability of current dist + unregularized exploitability (stochastic estimate) + shannon regularized exploitability (stochastic estimate) + """ + return gradients(*params, payoff_matrices, self.num_players, + self.temperature, self.proj_grad) + + def exploitability(self, params, payoff_matrices): + """Compute and return exploitability. + + Args: + params: tuple of params (dist,), see sgd.gradients + payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action + Returns: + float, exploitability as avg squared norm of projected-gradient + """ + return exp.grad_norm_exploitability(params, payoff_matrices, eta=1., + temperature=self.temperature) + + +def gradients(dist, payoff_matrices, num_players, temperature=0., + proj_grad=True): + """Computes exploitablity gradient. + + Assumption: eta_k = 1 for all k + + Args: + dist: 1-d np.array, current estimate of nash distribution + payoff_matrices: 2 (>=2 x A x A) np.arrays, payoffs for each joint action + num_players: int, number of players, in case payoff_matrices is abbreviated + temperature: non-negative float, default 0. + proj_grad: bool, if True, projects dist gradient onto simplex + Returns: + gradient of exploitability w.r.t. (dist) as tuple + unregularized exploitability (stochastic estimate) + shannon regularized exploitability (stochastic estimate) + """ + del num_players + tau = temperature + + a, b = 0, 1 # 2 samples needed for unbiased estimation + p_0, p_1 = 0, 1 # player 0 index, player 1 index + hess_0_01_a = payoff_matrices[a][p_0] + hess_1_01_a = payoff_matrices[a][p_1] + hess_0_01_b = payoff_matrices[b][p_0] + + pg_0_a = simplex.project_grad(hess_0_01_a.dot(dist)) + pg_0_b = simplex.project_grad(hess_0_01_b.dot(dist)) + + unreg_exp = np.dot(pg_0_a, pg_0_b) + + if tau > 0.: + log_dist_safe = np.clip(np.log(dist), -40, 0) + entr_grad_proj = simplex.project_grad(-tau * (log_dist_safe + 1)) + else: + entr_grad_proj = 0. + pg_0_a_entr = pg_0_a + entr_grad_proj + pg_0_b_entr = pg_0_b + entr_grad_proj + pg_0_entr = 0.5 * (pg_0_a_entr + pg_0_b_entr) + pg_1_b_entr = pg_0_b_entr + + reg_exp = np.dot(pg_0_a_entr, pg_0_b_entr) + + # then construct unbiased stochastic gradient + grad_dist = 2. * hess_1_01_a.dot(pg_1_b_entr) + if tau > 0.: + grad_dist += 2. * -tau * pg_0_entr / dist + + if proj_grad: + grad_dist = simplex.project_grad(grad_dist) + + return (grad_dist,), unreg_exp, reg_exp diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/solvers_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/solvers_test.py new file mode 100644 index 0000000..6c92a23 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/adidas_utils/solvers/symmetric/solvers_test.py @@ -0,0 +1,110 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.adidas_utils.solvers.symmetric.""" + +from absl import logging # pylint:disable=unused-import +from absl.testing import absltest +from absl.testing import parameterized + +import numpy as np + +from scipy.spatial import distance + +from open_spiel.python.algorithms.adidas_utils.solvers.symmetric import adam +from open_spiel.python.algorithms.adidas_utils.solvers.symmetric import ate +from open_spiel.python.algorithms.adidas_utils.solvers.symmetric import ped +from open_spiel.python.algorithms.adidas_utils.solvers.symmetric import qre +from open_spiel.python.algorithms.adidas_utils.solvers.symmetric import sgd + + +def numerical_gradient(fun, x, eps=np.sqrt(np.finfo(float).eps)): + fun_0 = fun(x) + num_grad = np.zeros_like(x) + x_plus_dx = np.copy(x) + for i, xi in enumerate(x): + x_plus_dx[i] = xi + eps + num_grad[i] = (fun(x_plus_dx) - fun_0) / eps + x_plus_dx[i] = xi + return num_grad + + +def prep_params(dist, payoff_matrices, num_params, solver_tuple): + params = [dist] + if num_params > 1: + params += [payoff_matrices[0].dot(params[0])] # policy_gradient + if num_params > 2: + params += [np.linalg.norm(params[1], ord=solver_tuple[1])] + return tuple(params) + + +class ExploitabilityDescentTest(parameterized.TestCase): + + @parameterized.named_parameters( + ("ATE_p=1", (ate, 1., False)), + ("ATE_p=0.5", (ate, 0.5, False)), + ("ATE_p=0.1", (ate, 0.1, False)), + ("PED", (ped, False)), + ("QRE_t=0.0", (qre, 0.0, False)), + ("QRE_t=0.1", (qre, 0.1, False)), + ("SGD_t=0.0", (sgd, 0.0, False)), + ("SGD_t=0.1", (sgd, 0.1, False)), + ("ADAM_t=0.0", (adam, 0.0, False)), + ("ADAM_t=0.1", (adam, 0.1, False)), + ) + def test_exploitability_gradient_on_symmetric_two_player_matrix_games( + self, solver_tuple, trials=100, max_num_strats=2, atol=1e-1, rtol=1e-1, + seed=1234): + num_players = 2 + solver = solver_tuple[0].Solver(*solver_tuple[1:]) + + if hasattr(solver, "num_estimates"): + num_estimates = solver.num_estimates + else: + num_estimates = 1 + + random = np.random.RandomState(seed) + + successes = [] + for _ in range(trials): + num_strats = random.randint(low=2, high=max_num_strats + 1) + strat_dims = (num_strats,) * num_players + payoff_matrices = random.rand(num_players, *strat_dims) + payoff_matrices[1] = payoff_matrices[0].T + if num_estimates > 1: + payoff_matrices_grad = [payoff_matrices] * num_estimates + else: + payoff_matrices_grad = payoff_matrices + + num_params = len(solver.init_vars(num_strats, num_players)) + dirichlet_alpha = np.ones(num_strats) + dist = random.dirichlet(dirichlet_alpha) # mixed srategy + params = prep_params(dist, payoff_matrices, num_params, solver_tuple) + + grad = solver.compute_gradients(params, payoff_matrices_grad)[0][0] + + exp = lambda x: solver.exploitability(x, payoff_matrices) # pylint: disable=cell-var-from-loop + num_grad = numerical_gradient(exp, dist) + + successes += [np.logical_and(np.allclose(grad, num_grad, rtol, atol), + distance.cosine(grad, num_grad) <= atol)] + + perc = 100 * np.mean(successes) + logging.info("gradient accuracy success rate out of %d is %f", trials, perc) + self.assertGreaterEqual( + perc, 95., "exploitability gradient accuracy is too poor") + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/README.md b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/README.md new file mode 100644 index 0000000..48eb456 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/README.md @@ -0,0 +1,8 @@ +## Python AlphaZero + +This is a pure python implementation of the AlphaZero algorithm. + +Note: this version is based on Tensorflow V1 and is no longer maintained. + +For more information, please take a look at the +[full documentation](https://github.com/deepmind/open_spiel/blob/master/docs/alpha_zero.md). diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/alpha_zero.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/alpha_zero.py new file mode 100644 index 0000000..af58656 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/alpha_zero.py @@ -0,0 +1,559 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A basic AlphaZero implementation. + +This implements the AlphaZero training algorithm. It spawns N actors which feed +trajectories into a replay buffer which are consumed by a learner. The learner +generates new weights, saves a checkpoint, and tells the actors to update. There +are also M evaluators running games continuously against a standard MCTS+Solver, +though each at a different difficulty (ie number of simulations for MCTS). + +Due to the multi-process nature of this algorithm the logs are written to files, +one per process. The learner logs are also output to stdout. The checkpoints are +also written to the same directory. + +Links to relevant articles/papers: + https://deepmind.com/blog/article/alphago-zero-starting-scratch has an open + access link to the AlphaGo Zero nature paper. + https://deepmind.com/blog/article/alphazero-shedding-new-light-grand-games-chess-shogi-and-go + has an open access link to the AlphaZero science paper. +""" + +import collections +import datetime +import functools +import itertools +import json +import os +import random +import sys +import tempfile +import time +import traceback + +import numpy as np + +from open_spiel.python.algorithms import mcts +from open_spiel.python.algorithms.alpha_zero import evaluator as evaluator_lib +from open_spiel.python.algorithms.alpha_zero import model as model_lib +import pyspiel +from open_spiel.python.utils import data_logger +from open_spiel.python.utils import file_logger +from open_spiel.python.utils import spawn +from open_spiel.python.utils import stats + +# Time to wait for processes to join. +JOIN_WAIT_DELAY = 0.001 + + +class TrajectoryState(object): + """A particular point along a trajectory.""" + + def __init__(self, observation, current_player, legals_mask, action, policy, + value): + self.observation = observation + self.current_player = current_player + self.legals_mask = legals_mask + self.action = action + self.policy = policy + self.value = value + + +class Trajectory(object): + """A sequence of observations, actions and policies, and the outcomes.""" + + def __init__(self): + self.states = [] + self.returns = None + + def add(self, information_state, action, policy): + self.states.append((information_state, action, policy)) + + +class Buffer(object): + """A fixed size buffer that keeps the newest values.""" + + def __init__(self, max_size): + self.max_size = max_size + self.data = [] + self.total_seen = 0 # The number of items that have passed through. + + def __len__(self): + return len(self.data) + + def __bool__(self): + return bool(self.data) + + def append(self, val): + return self.extend([val]) + + def extend(self, batch): + batch = list(batch) + self.total_seen += len(batch) + self.data.extend(batch) + self.data[:-self.max_size] = [] + + def sample(self, count): + return random.sample(self.data, count) + + +class Config(collections.namedtuple( + "Config", [ + "game", + "path", + "learning_rate", + "weight_decay", + "train_batch_size", + "replay_buffer_size", + "replay_buffer_reuse", + "max_steps", + "checkpoint_freq", + "actors", + "evaluators", + "evaluation_window", + "eval_levels", + + "uct_c", + "max_simulations", + "policy_alpha", + "policy_epsilon", + "temperature", + "temperature_drop", + + "nn_model", + "nn_width", + "nn_depth", + "observation_shape", + "output_size", + + "quiet", + ])): + """A config for the model/experiment.""" + pass + + +def _init_model_from_config(config): + return model_lib.Model.build_model( + config.nn_model, + config.observation_shape, + config.output_size, + config.nn_width, + config.nn_depth, + config.weight_decay, + config.learning_rate, + config.path) + + +def watcher(fn): + """A decorator to fn/processes that gives a logger and logs exceptions.""" + @functools.wraps(fn) + def _watcher(*, config, num=None, **kwargs): + """Wrap the decorated function.""" + name = fn.__name__ + if num is not None: + name += "-" + str(num) + with file_logger.FileLogger(config.path, name, config.quiet) as logger: + print("{} started".format(name)) + logger.print("{} started".format(name)) + try: + return fn(config=config, logger=logger, **kwargs) + except Exception as e: + logger.print("\n".join([ + "", + " Exception caught ".center(60, "="), + traceback.format_exc(), + "=" * 60, + ])) + print("Exception caught in {}: {}".format(name, e)) + raise + finally: + logger.print("{} exiting".format(name)) + print("{} exiting".format(name)) + return _watcher + + +def _init_bot(config, game, evaluator_, evaluation): + """Initializes a bot.""" + noise = None if evaluation else (config.policy_epsilon, config.policy_alpha) + return mcts.MCTSBot( + game, + config.uct_c, + config.max_simulations, + evaluator_, + solve=False, + dirichlet_noise=noise, + child_selection_fn=mcts.SearchNode.puct_value, + verbose=False, + dont_return_chance_node=True) + + +def _play_game(logger, game_num, game, bots, temperature, temperature_drop): + """Play one game, return the trajectory.""" + trajectory = Trajectory() + actions = [] + state = game.new_initial_state() + random_state = np.random.RandomState() + logger.opt_print(" Starting game {} ".format(game_num).center(60, "-")) + logger.opt_print("Initial state:\n{}".format(state)) + while not state.is_terminal(): + if state.is_chance_node(): + # For chance nodes, rollout according to chance node's probability + # distribution + outcomes = state.chance_outcomes() + action_list, prob_list = zip(*outcomes) + action = random_state.choice(action_list, p=prob_list) + action_str = state.action_to_string(state.current_player(), action) + actions.append(action_str) + state.apply_action(action) + else: + root = bots[state.current_player()].mcts_search(state) + policy = np.zeros(game.num_distinct_actions()) + for c in root.children: + policy[c.action] = c.explore_count + policy = policy**(1 / temperature) + policy /= policy.sum() + if len(actions) >= temperature_drop: + action = root.best_child().action + else: + action = np.random.choice(len(policy), p=policy) + trajectory.states.append( + TrajectoryState(state.observation_tensor(), state.current_player(), + state.legal_actions_mask(), action, policy, + root.total_reward / root.explore_count)) + action_str = state.action_to_string(state.current_player(), action) + actions.append(action_str) + logger.opt_print("Player {} sampled action: {}".format( + state.current_player(), action_str)) + state.apply_action(action) + logger.opt_print("Next state:\n{}".format(state)) + + trajectory.returns = state.returns() + logger.print("Game {}: Returns: {}; Actions: {}".format( + game_num, " ".join(map(str, trajectory.returns)), " ".join(actions))) + return trajectory + + +def update_checkpoint(logger, queue, model, az_evaluator): + """Read the queue for a checkpoint to load, or an exit signal.""" + path = None + while True: # Get the last message, ignore intermediate ones. + try: + path = queue.get_nowait() + except spawn.Empty: + break + if path: + logger.print("Inference cache:", az_evaluator.cache_info()) + logger.print("Loading checkpoint", path) + model.load_checkpoint(path) + az_evaluator.clear_cache() + elif path is not None: # Empty string means stop this process. + return False + return True + + +@watcher +def actor(*, config, game, logger, queue): + """An actor process runner that generates games and returns trajectories.""" + logger.print("Initializing model") + model = _init_model_from_config(config) + logger.print("Initializing bots") + az_evaluator = evaluator_lib.AlphaZeroEvaluator(game, model) + bots = [ + _init_bot(config, game, az_evaluator, False), + _init_bot(config, game, az_evaluator, False), + ] + for game_num in itertools.count(): + if not update_checkpoint(logger, queue, model, az_evaluator): + return + queue.put(_play_game(logger, game_num, game, bots, config.temperature, + config.temperature_drop)) + + +@watcher +def evaluator(*, game, config, logger, queue): + """A process that plays the latest checkpoint vs standard MCTS.""" + results = Buffer(config.evaluation_window) + logger.print("Initializing model") + model = _init_model_from_config(config) + logger.print("Initializing bots") + az_evaluator = evaluator_lib.AlphaZeroEvaluator(game, model) + random_evaluator = mcts.RandomRolloutEvaluator() + + for game_num in itertools.count(): + if not update_checkpoint(logger, queue, model, az_evaluator): + return + + az_player = game_num % 2 + difficulty = (game_num // 2) % config.eval_levels + max_simulations = int(config.max_simulations * (10 ** (difficulty / 2))) + bots = [ + _init_bot(config, game, az_evaluator, True), + mcts.MCTSBot( + game, + config.uct_c, + max_simulations, + random_evaluator, + solve=True, + verbose=False, + dont_return_chance_node=True) + ] + if az_player == 1: + bots = list(reversed(bots)) + + trajectory = _play_game(logger, game_num, game, bots, temperature=1, + temperature_drop=0) + results.append(trajectory.returns[az_player]) + queue.put((difficulty, trajectory.returns[az_player])) + + logger.print("AZ: {}, MCTS: {}, AZ avg/{}: {:.3f}".format( + trajectory.returns[az_player], + trajectory.returns[1 - az_player], + len(results), np.mean(results.data))) + + +@watcher +def learner(*, game, config, actors, evaluators, broadcast_fn, logger): + """A learner that consumes the replay buffer and trains the network.""" + logger.also_to_stdout = True + replay_buffer = Buffer(config.replay_buffer_size) + learn_rate = config.replay_buffer_size // config.replay_buffer_reuse + logger.print("Initializing model") + model = _init_model_from_config(config) + logger.print("Model type: %s(%s, %s)" % (config.nn_model, config.nn_width, + config.nn_depth)) + logger.print("Model size:", model.num_trainable_variables, "variables") + save_path = model.save_checkpoint(0) + logger.print("Initial checkpoint:", save_path) + broadcast_fn(save_path) + + data_log = data_logger.DataLoggerJsonLines(config.path, "learner", True) + + stage_count = 7 + value_accuracies = [stats.BasicStats() for _ in range(stage_count)] + value_predictions = [stats.BasicStats() for _ in range(stage_count)] + game_lengths = stats.BasicStats() + game_lengths_hist = stats.HistogramNumbered(game.max_game_length() + 1) + outcomes = stats.HistogramNamed(["Player1", "Player2", "Draw"]) + evals = [Buffer(config.evaluation_window) for _ in range(config.eval_levels)] + total_trajectories = 0 + + def trajectory_generator(): + """Merge all the actor queues into a single generator.""" + while True: + found = 0 + for actor_process in actors: + try: + yield actor_process.queue.get_nowait() + except spawn.Empty: + pass + else: + found += 1 + if found == 0: + time.sleep(0.01) # 10ms + + def collect_trajectories(): + """Collects the trajectories from actors into the replay buffer.""" + num_trajectories = 0 + num_states = 0 + for trajectory in trajectory_generator(): + num_trajectories += 1 + num_states += len(trajectory.states) + game_lengths.add(len(trajectory.states)) + game_lengths_hist.add(len(trajectory.states)) + + p1_outcome = trajectory.returns[0] + if p1_outcome > 0: + outcomes.add(0) + elif p1_outcome < 0: + outcomes.add(1) + else: + outcomes.add(2) + + replay_buffer.extend( + model_lib.TrainInput( + s.observation, s.legals_mask, s.policy, p1_outcome) + for s in trajectory.states) + + for stage in range(stage_count): + # Scale for the length of the game + index = (len(trajectory.states) - 1) * stage // (stage_count - 1) + n = trajectory.states[index] + accurate = (n.value >= 0) == (trajectory.returns[n.current_player] >= 0) + value_accuracies[stage].add(1 if accurate else 0) + value_predictions[stage].add(abs(n.value)) + + if num_states >= learn_rate: + break + return num_trajectories, num_states + + def learn(step): + """Sample from the replay buffer, update weights and save a checkpoint.""" + losses = [] + for _ in range(len(replay_buffer) // config.train_batch_size): + data = replay_buffer.sample(config.train_batch_size) + losses.append(model.update(data)) + + # Always save a checkpoint, either for keeping or for loading the weights to + # the actors. It only allows numbers, so use -1 as "latest". + save_path = model.save_checkpoint( + step if step % config.checkpoint_freq == 0 else -1) + losses = sum(losses, model_lib.Losses(0, 0, 0)) / len(losses) + logger.print(losses) + logger.print("Checkpoint saved:", save_path) + return save_path, losses + + last_time = time.time() - 60 + for step in itertools.count(1): + for value_accuracy in value_accuracies: + value_accuracy.reset() + for value_prediction in value_predictions: + value_prediction.reset() + game_lengths.reset() + game_lengths_hist.reset() + outcomes.reset() + + num_trajectories, num_states = collect_trajectories() + total_trajectories += num_trajectories + now = time.time() + seconds = now - last_time + last_time = now + + logger.print("Step:", step) + logger.print( + ("Collected {:5} states from {:3} games, {:.1f} states/s. " + "{:.1f} states/(s*actor), game length: {:.1f}").format( + num_states, num_trajectories, num_states / seconds, + num_states / (config.actors * seconds), + num_states / num_trajectories)) + logger.print("Buffer size: {}. States seen: {}".format( + len(replay_buffer), replay_buffer.total_seen)) + + save_path, losses = learn(step) + + for eval_process in evaluators: + while True: + try: + difficulty, outcome = eval_process.queue.get_nowait() + evals[difficulty].append(outcome) + except spawn.Empty: + break + + batch_size_stats = stats.BasicStats() # Only makes sense in C++. + batch_size_stats.add(1) + data_log.write({ + "step": step, + "total_states": replay_buffer.total_seen, + "states_per_s": num_states / seconds, + "states_per_s_actor": num_states / (config.actors * seconds), + "total_trajectories": total_trajectories, + "trajectories_per_s": num_trajectories / seconds, + "queue_size": 0, # Only available in C++. + "game_length": game_lengths.as_dict, + "game_length_hist": game_lengths_hist.data, + "outcomes": outcomes.data, + "value_accuracy": [v.as_dict for v in value_accuracies], + "value_prediction": [v.as_dict for v in value_predictions], + "eval": { + "count": evals[0].total_seen, + "results": [sum(e.data) / len(e) if e else 0 for e in evals], + }, + "batch_size": batch_size_stats.as_dict, + "batch_size_hist": [0, 1], + "loss": { + "policy": float(losses.policy), + "value": float(losses.value), + "l2reg": float(losses.l2), + "sum": float(losses.total), + }, + "cache": { # Null stats because it's hard to report between processes. + "size": 0, + "max_size": 0, + "usage": 0, + "requests": 0, + "requests_per_s": 0, + "hits": 0, + "misses": 0, + "misses_per_s": 0, + "hit_rate": 0, + }, + }) + logger.print() + + if config.max_steps > 0 and step >= config.max_steps: + break + + broadcast_fn(save_path) + + +def alpha_zero(config: Config): + """Start all the worker processes for a full alphazero setup.""" + game = pyspiel.load_game(config.game) + config = config._replace( + observation_shape=game.observation_tensor_shape(), + output_size=game.num_distinct_actions()) + + print("Starting game", config.game) + if game.num_players() != 2: + sys.exit("AlphaZero can only handle 2-player games.") + game_type = game.get_type() + if game_type.reward_model != pyspiel.GameType.RewardModel.TERMINAL: + raise ValueError("Game must have terminal rewards.") + if game_type.dynamics != pyspiel.GameType.Dynamics.SEQUENTIAL: + raise ValueError("Game must have sequential turns.") + + path = config.path + if not path: + path = tempfile.mkdtemp(prefix="az-{}-{}-".format( + datetime.datetime.now().strftime("%Y-%m-%d-%H-%M"), config.game)) + config = config._replace(path=path) + + if not os.path.exists(path): + os.makedirs(path) + if not os.path.isdir(path): + sys.exit("{} isn't a directory".format(path)) + print("Writing logs and checkpoints to:", path) + print("Model type: %s(%s, %s)" % (config.nn_model, config.nn_width, + config.nn_depth)) + + with open(os.path.join(config.path, "config.json"), "w") as fp: + fp.write(json.dumps(config._asdict(), indent=2, sort_keys=True) + "\n") + + actors = [spawn.Process(actor, kwargs={"game": game, "config": config, + "num": i}) + for i in range(config.actors)] + evaluators = [spawn.Process(evaluator, kwargs={"game": game, "config": config, + "num": i}) + for i in range(config.evaluators)] + + def broadcast(msg): + for proc in actors + evaluators: + proc.queue.put(msg) + + try: + learner(game=game, config=config, actors=actors, # pylint: disable=missing-kwoa + evaluators=evaluators, broadcast_fn=broadcast) + except (KeyboardInterrupt, EOFError): + print("Caught a KeyboardInterrupt, stopping early.") + finally: + broadcast("") + # for actor processes to join we have to make sure that their q_in is empty, + # including backed up items + for proc in actors: + while proc.exitcode is None: + while not proc.queue.empty(): + proc.queue.get_nowait() + proc.join(JOIN_WAIT_DELAY) + for proc in evaluators: + proc.join() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/analysis.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/analysis.py new file mode 100644 index 0000000..05ef69d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/analysis.py @@ -0,0 +1,276 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Output the config and graphs for an experiment. + +This reads the config.json and learner.jsonl from an alpha zero experiment. +""" + +import datetime +import json +import math +import os + +from absl import app +from absl import flags + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +from open_spiel.python.utils import gfile + +X_AXIS = { + "step": "step", + "time": "time_rel_h", + "states": "total_states", +} + +flags.DEFINE_string("path", None, + "Where to find config.json and learner.jsonl.") +flags.DEFINE_enum("x_axis", "step", list(X_AXIS.keys()), + "What should be on the x-axis.") + +flags.mark_flag_as_required("path") +FLAGS = flags.FLAGS + +MAX_WIDTH = int(os.getenv("COLUMNS", "200")) # Get your TTY width. +SMOOTHING_RATE = 10 +SUBSAMPLING_MAX = 200 + + +def print_columns(strings, max_width=MAX_WIDTH): + """Prints a list of strings in columns.""" + padding = 2 + shortest = min(len(s) for s in strings) + max_columns = max(1, math.floor((max_width - 1) / (shortest + 2 * padding))) + for cols in range(max_columns, 0, -1): + rows = math.ceil(len(strings) / cols) + chunks = [strings[i:i + rows] for i in range(0, len(strings), rows)] + col_widths = [max(len(s) for s in chunk) for chunk in chunks] + if sum(col_widths) + 2 * padding * len(col_widths) <= max_width: + break + for r in range(rows): + for c in range(cols): + i = r + c * rows + if i < len(strings): + print(" " * padding + strings[i].ljust(col_widths[c] + padding), end="") + print() + + +def load_jsonl_data(filename): + with gfile.Open(filename) as f: + return [json.loads(l) for l in f.readlines()] + + +def sub_sample(data, count): + return data[::(max(1, len(data) // count))] + + +def smooth(data, count): + for k in data.keys(): + if not isinstance(k, str) or not k.startswith("time_"): + data[k] = data[k].rolling(max(1, len(data) // count)).mean() + return data + + +def subselect(row, keys): + for key in keys: + row = row[key] + return row + + +def select(data, keys): + return [subselect(row, keys) for row in data] + + +def prepare(data, cols): + """Given the dataset and a list of columns return a small pandas dataframe.""" + for col in ["step", "total_states", "total_trajectories", "time_rel"]: + cols[col] = [col] + subdata = {key: select(data, col) for key, col in cols.items()} + # subdata = list(zip(*subdata)) # transpose + df = pd.DataFrame(subdata) + df = smooth(df, SMOOTHING_RATE) + df = sub_sample(df, SUBSAMPLING_MAX) + df["time_rel_h"] = df["time_rel"] / 3600 + df["zero"] = 0 + return df + + +def subplot(rows, cols, pos, *args, **kwargs): + ax = plt.subplot(rows, cols, pos, *args, **kwargs) + ax.tick_params(top=False, right=False) # Don't interfere with the titles. + return ax + + +def plot_avg_stddev(ax, x, data, data_col): + """Plot stats produced by open_spiel::BasicStats::ToJson.""" + cols = ["avg", "std_dev", "min", "max"] + df = prepare(data, {v: data_col + [v] for v in cols}) + df.plot(ax=ax, x=x, y="avg", color="b") + plt.fill_between( + x=df[x], color="b", alpha=0.2, label="std dev", + y1=np.nanmax([df["min"], df["avg"] - df["std_dev"]], 0), + y2=np.nanmin([df["max"], df["avg"] + df["std_dev"]], 0)) + plt.fill_between( + x=df[x], color="b", alpha=0.2, label="min/max", + y1=df["min"], y2=df["max"]) + plot_zero(df, ax, x) + + +def plot_histogram_numbered(ax, x, data, data_col): + """Plot stats produced by open_spiel::HistogramNumbered::ToJson.""" + x_min, x_max = 0, data[-1][x] + y_min, y_max = 0, len(subselect(data, [0] + data_col)) + z_min, z_max = 0, 1 + z = np.array([subselect(row, data_col) for row in data], dtype=float) + z = np.concatenate((z, np.zeros((x_max, 1))), axis=1) # Don't cut off the top + # TODO(author7): smoothing + z = sub_sample(z, SUBSAMPLING_MAX).transpose() + p = np.percentile(z, 99) + if p > 0: + z /= p + z[z > 1] = 1 + ax.grid(False) + ax.imshow(z, cmap="Reds", vmin=z_min, vmax=z_max, + extent=[x_min, x_max, y_min, y_max + 1], + interpolation="nearest", origin="lower", aspect="auto") + + +def plot_histogram_named(ax, x, data, data_col, normalized=True): + """Plot stats produced by open_spiel::HistogramNamed::ToJson.""" + names = subselect(data, [0] + data_col + ["names"]) + df = prepare(data, {name: data_col + ["counts", i] + for i, name in enumerate(names)}) + if normalized: + total = sum(df[n] for n in names) + for n in names: + df[n] /= total + df.plot.area(ax=ax, x=x, y=names) + + +def plot_zero(df, ax, x): + df.plot(ax=ax, x=x, y="zero", label="", visible=False) + + +def plot_data(config, data): + """Plot a bunch of graphs from an alphazero experiment.""" + num_rows, num_cols = 3, 4 + x = X_AXIS[FLAGS.x_axis] + + fig = plt.figure(figsize=(num_cols * 7, num_rows * 6)) + fig.suptitle( + ("Game: {}, Model: {}({}, {}), training time: {}, training steps: {}, " + "states: {}, games: {}").format( + config["game"], config["nn_model"], config["nn_width"], + config["nn_depth"], + datetime.timedelta(seconds=int(data[-1]["time_rel"])), + int(data[-1]["step"]), int(data[-1]["total_states"]), + int(data[-1]["total_trajectories"]))) + + cols = ["value", "policy", "l2reg", "sum"] + df = prepare(data, {v: ["loss", v] for v in cols}) + ax = subplot(num_rows, num_cols, 1, title="Training loss") + for y in cols: + df.plot(ax=ax, x=x, y=y) + + cols = list(range(len(data[0]["value_accuracy"]))) + df = prepare(data, {i: ["value_accuracy", i, "avg"] for i in cols}) + ax = subplot(num_rows, num_cols, 2, # ylim=(0, 1.05), + title="MCTS value prediction accuracy") + for y in cols: + df.plot(ax=ax, x=x, y=y) + + cols = list(range(len(data[0]["value_prediction"]))) + df = prepare(data, {i: ["value_prediction", i, "avg"] for i in cols}) + ax = subplot(num_rows, num_cols, 3, # ylim=(0, 1.05), + title="MCTS absolute value prediction") + for y in cols: + df.plot(ax=ax, x=x, y=y) + + cols = list(range(len(data[0]["eval"]["results"]))) + df = prepare(data, {i: ["eval", "results", i] for i in cols}) + ax = subplot(num_rows, num_cols, 4, ylim=(-1, 1), + title="Evaluation returns vs MCTS+Solver with x10^(n/2) sims") + ax.axhline(y=0, color="black") + for y in cols: + df.plot(ax=ax, x=x, y=y) + + df = prepare(data, {"states_per_s": ["states_per_s"]}) + ax = subplot(num_rows, num_cols, 5, title="Speed of actor state/s") + df.plot(ax=ax, x=x, y="states_per_s") + plot_zero(df, ax, x) + + cols = ["requests_per_s", "misses_per_s"] + df = prepare(data, {v: ["cache", v] for v in cols}) + ax = subplot(num_rows, num_cols, 6, title="Cache requests/s") + for y in cols: + df.plot(ax=ax, x=x, y=y) + plot_zero(df, ax, x) + + cols = ["hit_rate", "usage"] + df = prepare(data, {v: ["cache", v] for v in cols}) + ax = subplot(num_rows, num_cols, 7, title="Cache usage and hit rate.", + ylim=(0, 1.05)) + for y in cols: + df.plot(ax=ax, x=x, y=y) + + ax = subplot(num_rows, num_cols, 8, title="Outcomes", ylim=(0, 1)) + plot_histogram_named(ax, x, data, ["outcomes"]) + + ax = subplot(num_rows, num_cols, 9, + title="Inference batch size + stddev + min/max") + plot_avg_stddev(ax, x, data, ["batch_size"]) + + ax = subplot(num_rows, num_cols, 10, title="Inference batch size") + plot_histogram_numbered(ax, x, data, ["batch_size_hist"]) + + ax = subplot(num_rows, num_cols, 11, title="Game length + stddev + min/max") + plot_avg_stddev(ax, x, data, ["game_length"]) + + ax = subplot(num_rows, num_cols, 12, title="Game length histogram") + plot_histogram_numbered(ax, x, data, ["game_length_hist"]) + + plt.show() + + +def main(argv): + if len(argv) > 1: + raise app.UsageError("Too many command-line arguments.") + + with gfile.Open(os.path.join(FLAGS.path, "config.json")) as f: + config = json.load(f) + data = load_jsonl_data(os.path.join(FLAGS.path, "learner.jsonl")) + + print("config:") + print_columns(sorted("{}: {}".format(k, v) for k, v in config.items())) + print() + print("data keys:") + print_columns(sorted(data[0].keys())) + print() + print("training time:", datetime.timedelta(seconds=int(data[-1]["time_rel"]))) + print("training steps: %d" % (data[-1]["step"])) + print("total states: %d" % (data[-1]["total_states"])) + print("total trajectories: %d" % (data[-1]["total_trajectories"])) + print() + + try: + plot_data(config, data) + except KeyboardInterrupt: + pass + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/evaluator.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/evaluator.py new file mode 100644 index 0000000..63063b6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/evaluator.py @@ -0,0 +1,70 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An MCTS Evaluator for an AlphaZero model.""" + +import numpy as np + +from open_spiel.python.algorithms import mcts +import pyspiel +from open_spiel.python.utils import lru_cache + + +class AlphaZeroEvaluator(mcts.Evaluator): + """An AlphaZero MCTS Evaluator.""" + + def __init__(self, game, model, cache_size=2**16): + """An AlphaZero MCTS Evaluator.""" + if game.num_players() != 2: + raise ValueError("Game must be for two players.") + game_type = game.get_type() + if game_type.reward_model != pyspiel.GameType.RewardModel.TERMINAL: + raise ValueError("Game must have terminal rewards.") + if game_type.dynamics != pyspiel.GameType.Dynamics.SEQUENTIAL: + raise ValueError("Game must have sequential turns.") + + self._model = model + self._cache = lru_cache.LRUCache(cache_size) + + def cache_info(self): + return self._cache.info() + + def clear_cache(self): + self._cache.clear() + + def _inference(self, state): + # Make a singleton batch + obs = np.expand_dims(state.observation_tensor(), 0) + mask = np.expand_dims(state.legal_actions_mask(), 0) + + # ndarray isn't hashable + cache_key = obs.tobytes() + mask.tobytes() + + value, policy = self._cache.make( + cache_key, lambda: self._model.inference(obs, mask)) + + return value[0, 0], policy[0] # Unpack batch + + def evaluate(self, state): + """Returns a value for the given state.""" + value, _ = self._inference(state) + return np.array([value, -value]) + + def prior(self, state): + if state.is_chance_node(): + return state.chance_outcomes() + else: + # Returns the probabilities for all actions. + _, policy = self._inference(state) + return [(action, policy[action]) for action in state.legal_actions()] diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/evaluator_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/evaluator_test.py new file mode 100644 index 0000000..422f79d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/evaluator_test.py @@ -0,0 +1,107 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.alpha_zero.evaluator.""" + +from absl.testing import absltest +import numpy as np + +from open_spiel.python.algorithms import mcts +from open_spiel.python.algorithms.alpha_zero import evaluator as evaluator_lib +from open_spiel.python.algorithms.alpha_zero import model as model_lib +import pyspiel + + +def build_model(game): + return model_lib.Model.build_model( + "mlp", game.observation_tensor_shape(), game.num_distinct_actions(), + nn_width=64, nn_depth=2, weight_decay=1e-4, learning_rate=0.01, path=None) + + +class EvaluatorTest(absltest.TestCase): + + def test_evaluator_caching(self): + game = pyspiel.load_game("tic_tac_toe") + model = build_model(game) + evaluator = evaluator_lib.AlphaZeroEvaluator(game, model) + + state = game.new_initial_state() + obs = state.observation_tensor() + act_mask = state.legal_actions_mask() + action = state.legal_actions()[0] + policy = np.zeros(len(act_mask), dtype=float) + policy[action] = 1 + train_inputs = [model_lib.TrainInput(obs, act_mask, policy, value=1)] + + value = evaluator.evaluate(state) + self.assertEqual(value[0], -value[1]) + value = value[0] + + value2 = evaluator.evaluate(state)[0] + self.assertEqual(value, value2) + + prior = evaluator.prior(state) + prior2 = evaluator.prior(state) + np.testing.assert_array_equal(prior, prior2) + + info = evaluator.cache_info() + self.assertEqual(info.misses, 1) + self.assertEqual(info.hits, 3) + + for _ in range(20): + model.update(train_inputs) + + # Still equal due to not clearing the cache + value3 = evaluator.evaluate(state)[0] + self.assertEqual(value, value3) + + info = evaluator.cache_info() + self.assertEqual(info.misses, 1) + self.assertEqual(info.hits, 4) + + evaluator.clear_cache() + + info = evaluator.cache_info() + self.assertEqual(info.misses, 0) + self.assertEqual(info.hits, 0) + + # Now they differ from before + value4 = evaluator.evaluate(state)[0] + value5 = evaluator.evaluate(state)[0] + self.assertNotEqual(value, value4) + self.assertEqual(value4, value5) + + info = evaluator.cache_info() + self.assertEqual(info.misses, 1) + self.assertEqual(info.hits, 1) + + value6 = evaluator.evaluate(game.new_initial_state())[0] + self.assertEqual(value4, value6) + + info = evaluator.cache_info() + self.assertEqual(info.misses, 1) + self.assertEqual(info.hits, 2) + + def test_works_with_mcts(self): + game = pyspiel.load_game("tic_tac_toe") + model = build_model(game) + evaluator = evaluator_lib.AlphaZeroEvaluator(game, model) + bot = mcts.MCTSBot( + game, 1., 20, evaluator, solve=False, dirichlet_noise=(0.25, 1.)) + root = bot.mcts_search(game.new_initial_state()) + self.assertEqual(root.explore_count, 20) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/export_model.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/export_model.py new file mode 100644 index 0000000..7a8e2b1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/export_model.py @@ -0,0 +1,57 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Export the model's Tensorflow graph as a protobuf.""" + +from absl import app +from absl import flags + +from open_spiel.python.algorithms.alpha_zero import model as model_lib +import pyspiel + +FLAGS = flags.FLAGS +flags.DEFINE_string("game", None, "Name of the game") +flags.DEFINE_string("path", None, "Directory to save graph") +flags.DEFINE_string("graph_def", None, "Filename for the graph") +flags.DEFINE_enum("nn_model", "resnet", model_lib.Model.valid_model_types, + "What type of model should be used?.") +flags.DEFINE_integer("nn_width", 2 ** 7, "How wide should the network be.") +flags.DEFINE_integer("nn_depth", 10, "How deep should the network be.") +flags.DEFINE_float("learning_rate", 0.0001, "Learning rate used for training") +flags.DEFINE_float("weight_decay", 0.0001, "L2 regularization strength.") +flags.DEFINE_bool("verbose", False, "Print information about the model.") +flags.mark_flag_as_required("game") +flags.mark_flag_as_required("path") +flags.mark_flag_as_required("graph_def") + + +def main(_): + game = pyspiel.load_game(FLAGS.game) + model = model_lib.Model.build_model( + FLAGS.nn_model, game.observation_tensor_shape(), + game.num_distinct_actions(), FLAGS.nn_width, FLAGS.nn_depth, + FLAGS.weight_decay, FLAGS.learning_rate, FLAGS.path) + model.write_graph(FLAGS.graph_def) + + if FLAGS.verbose: + print("Game:", FLAGS.game) + print("Model type: %s(%s, %s)" % (FLAGS.nn_model, FLAGS.nn_width, + FLAGS.nn_depth)) + print("Model size:", model.num_trainable_variables, "variables") + print("Variables:") + model.print_trainable_variables() + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/model.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/model.py new file mode 100644 index 0000000..aa2f102 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/model.py @@ -0,0 +1,366 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An AlphaZero style model with a policy and value head.""" + +import collections +import functools +import os +from typing import Sequence +import warnings + +import numpy as np +import tensorflow.compat.v1 as tf + + +warnings.warn( + "Python AlphaZero has known issues when using Keras 3 and may be " + "removed in a future version unless fixed. See OpenSpiel github " + "issue #1206 for details." +) + + +def cascade(x, fns): + for fn in fns: + x = fn(x) + return x + +tfkl = tf.keras.layers +conv_2d = functools.partial(tfkl.Conv2D, padding="same") + + +def batch_norm(training, updates, name): + """A batch norm layer. + + Args: + training: A placeholder of whether this is done in training or not. + updates: A list to be extended with this layer's updates. + name: Name of the layer. + + Returns: + A function to apply to the previous layer. + """ + bn = tfkl.BatchNormalization(name=name) + def batch_norm_layer(x): + # This emits a warning that training is a placeholder instead of a concrete + # bool, but seems to work anyway. + applied = bn(x, training) + updates.extend(bn.updates) + return applied + return batch_norm_layer + + +def residual_layer(inputs, num_filters, kernel_size, training, updates, name): + return cascade(inputs, [ + conv_2d(num_filters, kernel_size, name=f"{name}_res_conv1"), + batch_norm(training, updates, f"{name}_res_batch_norm1"), + tfkl.Activation("relu"), + conv_2d(num_filters, kernel_size, name=f"{name}_res_conv2"), + batch_norm(training, updates, f"{name}_res_batch_norm2"), + lambda x: tfkl.add([x, inputs]), + tfkl.Activation("relu"), + ]) + + +class TrainInput(collections.namedtuple( + "TrainInput", "observation legals_mask policy value")): + """Inputs for training the Model.""" + + @staticmethod + def stack(train_inputs): + observation, legals_mask, policy, value = zip(*train_inputs) + return TrainInput( + np.array(observation, dtype=np.float32), + np.array(legals_mask, dtype=bool), + np.array(policy), + np.expand_dims(value, 1)) + + +class Losses(collections.namedtuple("Losses", "policy value l2")): + """Losses from a training step.""" + + @property + def total(self): + return self.policy + self.value + self.l2 + + def __str__(self): + return ("Losses(total: {:.3f}, policy: {:.3f}, value: {:.3f}, " + "l2: {:.3f})").format(self.total, self.policy, self.value, self.l2) + + def __add__(self, other): + return Losses(self.policy + other.policy, + self.value + other.value, + self.l2 + other.l2) + + def __truediv__(self, n): + return Losses(self.policy / n, self.value / n, self.l2 / n) + + +class Model(object): + """An AlphaZero style model with a policy and value head. + + This supports three types of models: mlp, conv2d and resnet. + + All models have a shared torso stack with two output heads: policy and value. + They have same meaning as in the AlphaGo Zero and AlphaZero papers. The resnet + model copies the one in that paper when set with width 256 and depth 20. The + conv2d model is the same as the resnet except uses a conv+batchnorm+relu + instead of the res blocks. The mlp model uses dense layers instead of conv, + and drops batch norm. + + Links to relevant articles/papers: + https://deepmind.com/blog/article/alphago-zero-starting-scratch has an open + access link to the AlphaGo Zero nature paper. + https://deepmind.com/blog/article/alphazero-shedding-new-light-grand-games-chess-shogi-and-go + has an open access link to the AlphaZero science paper. + + All are parameterized by their input (observation) shape and output size + (number of actions), though the conv2d and resnet might only work with games + that have spatial data (ie 3 non-batch dimensions, eg: connect four would + work, but not poker). + + The depth is the number of blocks in the torso, where the definition of a + block varies by model. For a resnet it's a resblock which is two conv2ds, + batch norms and relus, and an addition. For conv2d it's a conv2d, a batch norm + and a relu. For mlp it's a dense plus relu. + + The width is the number of filters for any conv2d and the number of hidden + units for any dense layer. + + Note that this uses an explicit graph so that it can be used for inference + and training from C++. It seems to also be 20%+ faster than using eager mode, + at least for the unit test. + """ + + valid_model_types = ["mlp", "conv2d", "resnet"] + + def __init__(self, session, saver, path): + """Init a model. Use build_model, from_checkpoint or from_graph instead.""" + self._session = session + self._saver = saver + self._path = path + + def get_var(name): + return self._session.graph.get_tensor_by_name(name + ":0") + + self._input = get_var("input") + self._legals_mask = get_var("legals_mask") + self._training = get_var("training") + self._value_out = get_var("value_out") + self._policy_softmax = get_var("policy_softmax") + self._policy_loss = get_var("policy_loss") + self._value_loss = get_var("value_loss") + self._l2_reg_loss = get_var("l2_reg_loss") + self._policy_targets = get_var("policy_targets") + self._value_targets = get_var("value_targets") + self._train = self._session.graph.get_operation_by_name("train") + + @classmethod + def build_model(cls, model_type, input_shape, output_size, nn_width, nn_depth, + weight_decay, learning_rate, path): + """Build a model with the specified params.""" + if model_type not in cls.valid_model_types: + raise ValueError(f"Invalid model type: {model_type}, " + f"expected one of: {cls.valid_model_types}") + while len(input_shape) < 3: + input_shape.append(1) + + # The order of creating the graph, init, saver, and session is important! + # https://stackoverflow.com/a/40788998 + g = tf.Graph() # Allow multiple independent models and graphs. + with g.as_default(): + cls._define_graph(model_type, input_shape, output_size, nn_width, + nn_depth, weight_decay, learning_rate) + init = tf.variables_initializer(tf.global_variables(), + name="init_all_vars_op") + with tf.device("/cpu:0"): # Saver only works on CPU. + saver = tf.train.Saver( + max_to_keep=10000, sharded=False, name="saver") + session = tf.Session(graph=g) + session.__enter__() + session.run(init) + return cls(session, saver, path) + + @classmethod + def from_checkpoint(cls, checkpoint, path=None): + """Load a model from a checkpoint.""" + model = cls.from_graph(checkpoint, path) + model.load_checkpoint(checkpoint) + return model + + @classmethod + def from_graph(cls, metagraph, path=None): + """Load only the model from a graph or checkpoint.""" + if not os.path.exists(metagraph): + metagraph += ".meta" + if not path: + path = os.path.dirname(metagraph) + g = tf.Graph() # Allow multiple independent models and graphs. + with g.as_default(): + saver = tf.train.import_meta_graph(metagraph) + session = tf.Session(graph=g) + session.__enter__() + session.run("init_all_vars_op") + return cls(session, saver, path) + + def __del__(self): + if hasattr(self, "_session") and self._session: + self._session.close() + + @staticmethod + def _define_graph(model_type, input_shape, output_size, + nn_width, nn_depth, weight_decay, learning_rate): + """Define the model graph.""" + # Inference inputs + input_size = int(np.prod(input_shape)) + observations = tf.placeholder(tf.float32, [None, input_size], name="input") + legals_mask = tf.placeholder(tf.bool, [None, output_size], + name="legals_mask") + training = tf.placeholder(tf.bool, name="training") + + bn_updates = [] + + # Main torso of the network + if model_type == "mlp": + torso = observations # Ignore the input shape, treat it as a flat array. + for i in range(nn_depth): + torso = cascade(torso, [ + tfkl.Dense(nn_width, name=f"torso_{i}_dense"), + tfkl.Activation("relu"), + ]) + elif model_type == "conv2d": + torso = tfkl.Reshape(input_shape)(observations) + for i in range(nn_depth): + torso = cascade(torso, [ + conv_2d(nn_width, 3, name=f"torso_{i}_conv"), + batch_norm(training, bn_updates, f"torso_{i}_batch_norm"), + tfkl.Activation("relu"), + ]) + elif model_type == "resnet": + torso = cascade(observations, [ + tfkl.Reshape(input_shape), + conv_2d(nn_width, 3, name="torso_in_conv"), + batch_norm(training, bn_updates, "torso_in_batch_norm"), + tfkl.Activation("relu"), + ]) + for i in range(nn_depth): + torso = residual_layer(torso, nn_width, 3, training, bn_updates, + f"torso_{i}") + else: + raise ValueError("Unknown model type.") + + # The policy head + if model_type == "mlp": + policy_head = cascade(torso, [ + tfkl.Dense(nn_width, name="policy_dense"), + tfkl.Activation("relu"), + ]) + else: + policy_head = cascade(torso, [ + conv_2d(filters=2, kernel_size=1, name="policy_conv"), + batch_norm(training, bn_updates, "policy_batch_norm"), + tfkl.Activation("relu"), + tfkl.Flatten(), + ]) + policy_logits = tfkl.Dense(output_size, name="policy")(policy_head) + policy_logits = tf.where(legals_mask, policy_logits, + -1e32 * tf.ones_like(policy_logits)) + unused_policy_softmax = tf.identity(tfkl.Softmax()(policy_logits), + name="policy_softmax") + policy_targets = tf.placeholder( + shape=[None, output_size], dtype=tf.float32, name="policy_targets") + policy_loss = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits_v2( + logits=policy_logits, labels=policy_targets), + name="policy_loss") + + # The value head + if model_type == "mlp": + value_head = torso # Nothing specific before the shared value head. + else: + value_head = cascade(torso, [ + conv_2d(filters=1, kernel_size=1, name="value_conv"), + batch_norm(training, bn_updates, "value_batch_norm"), + tfkl.Activation("relu"), + tfkl.Flatten(), + ]) + value_out = cascade(value_head, [ + tfkl.Dense(nn_width, name="value_dense"), + tfkl.Activation("relu"), + tfkl.Dense(1, name="value"), + tfkl.Activation("tanh"), + ]) + # Need the identity to name the single value output from the dense layer. + value_out = tf.identity(value_out, name="value_out") + value_targets = tf.placeholder( + shape=[None, 1], dtype=tf.float32, name="value_targets") + value_loss = tf.identity(tf.losses.mean_squared_error( + value_out, value_targets), name="value_loss") + + l2_reg_loss = tf.add_n([ + weight_decay * tf.nn.l2_loss(var) + for var in tf.trainable_variables() + if "/bias:" not in var.name + ], name="l2_reg_loss") + + total_loss = policy_loss + value_loss + l2_reg_loss + optimizer = tf.train.AdamOptimizer(learning_rate) + with tf.control_dependencies(bn_updates): + unused_train = optimizer.minimize(total_loss, name="train") + + @property + def num_trainable_variables(self): + return sum(np.prod(v.shape) for v in tf.trainable_variables()) + + def print_trainable_variables(self): + for v in tf.trainable_variables(): + print("{}: {}".format(v.name, v.shape)) + + def write_graph(self, filename): + full_path = os.path.join(self._path, filename) + tf.train.export_meta_graph( + graph_def=self._session.graph_def, saver_def=self._saver.saver_def, + filename=full_path, as_text=False) + return full_path + + def inference(self, observation, legals_mask): + return self._session.run( + [self._value_out, self._policy_softmax], + feed_dict={self._input: np.array(observation, dtype=np.float32), + self._legals_mask: np.array(legals_mask, dtype=bool), + self._training: False}) + + def update(self, train_inputs: Sequence[TrainInput]): + """Runs a training step.""" + batch = TrainInput.stack(train_inputs) + + # Run a training step and get the losses. + _, policy_loss, value_loss, l2_reg_loss = self._session.run( + [self._train, self._policy_loss, self._value_loss, self._l2_reg_loss], + feed_dict={self._input: batch.observation, + self._legals_mask: batch.legals_mask, + self._policy_targets: batch.policy, + self._value_targets: batch.value, + self._training: True}) + + return Losses(policy_loss, value_loss, l2_reg_loss) + + def save_checkpoint(self, step): + return self._saver.save( + self._session, + os.path.join(self._path, "checkpoint"), + global_step=step) + + def load_checkpoint(self, path): + return self._saver.restore(self._session, path) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/model_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/model_test.py new file mode 100644 index 0000000..dc5deaf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/alpha_zero/model_test.py @@ -0,0 +1,125 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.alpha_zero.model.""" + +from absl.testing import absltest +from absl.testing import parameterized + +import numpy as np + +from open_spiel.python.algorithms.alpha_zero import model as model_lib +import pyspiel + +solved = {} + + +def solve_game(state): + state_str = str(state) + if state_str in solved: + return solved[state_str].value + if state.is_terminal(): + return state.returns()[0] + + max_player = state.current_player() == 0 + obs = state.observation_tensor() + act_mask = np.array(state.legal_actions_mask()) + values = np.full(act_mask.shape, -2 if max_player else 2) + for action in state.legal_actions(): + values[action] = solve_game(state.child(action)) + value = values.max() if max_player else values.min() + best_actions = np.where((values == value) & act_mask) + policy = np.zeros_like(act_mask) + policy[best_actions[0][0]] = 1 # Choose the first for a deterministic policy. + solved[state_str] = model_lib.TrainInput(obs, act_mask, policy, value) + return value + + +def build_model(game, model_type): + return model_lib.Model.build_model( + model_type, game.observation_tensor_shape(), game.num_distinct_actions(), + nn_width=32, nn_depth=2, weight_decay=1e-4, learning_rate=0.01, path=None) + + +class ModelTest(parameterized.TestCase): + + @parameterized.parameters(model_lib.Model.valid_model_types) + def test_model_learns_simple(self, model_type): + game = pyspiel.load_game("tic_tac_toe") + model = build_model(game, model_type) + print("Num variables:", model.num_trainable_variables) + model.print_trainable_variables() + + train_inputs = [] + state = game.new_initial_state() + while not state.is_terminal(): + obs = state.observation_tensor() + act_mask = state.legal_actions_mask() + action = state.legal_actions()[0] + policy = np.zeros(len(act_mask), dtype=float) + policy[action] = 1 + train_inputs.append(model_lib.TrainInput(obs, act_mask, policy, value=1)) + state.apply_action(action) + value, policy = model.inference([obs], [act_mask]) + self.assertLen(policy, 1) + self.assertLen(value, 1) + self.assertLen(policy[0], game.num_distinct_actions()) + self.assertLen(value[0], 1) + + losses = [] + policy_loss_goal = 0.05 + value_loss_goal = 0.05 + for i in range(200): + loss = model.update(train_inputs) + print(i, loss) + losses.append(loss) + if loss.policy < policy_loss_goal and loss.value < value_loss_goal: + break + + self.assertGreater(losses[0].total, losses[-1].total) + self.assertGreater(losses[0].policy, losses[-1].policy) + self.assertGreater(losses[0].value, losses[-1].value) + self.assertLess(losses[-1].value, value_loss_goal) + self.assertLess(losses[-1].policy, policy_loss_goal) + + @parameterized.parameters(model_lib.Model.valid_model_types) + def test_model_learns_optimal(self, model_type): + game = pyspiel.load_game("tic_tac_toe") + solve_game(game.new_initial_state()) + + model = build_model(game, model_type) + print("Num variables:", model.num_trainable_variables) + model.print_trainable_variables() + + train_inputs = list(solved.values()) + print("states:", len(train_inputs)) + losses = [] + policy_loss_goal = 0.12 + value_loss_goal = 0.12 + for i in range(500): + loss = model.update(train_inputs) + print(i, loss) + losses.append(loss) + if loss.policy < policy_loss_goal and loss.value < value_loss_goal: + break + + self.assertGreater(losses[0].policy, losses[-1].policy) + self.assertGreater(losses[0].value, losses[-1].value) + self.assertGreater(losses[0].total, losses[-1].total) + self.assertLess(losses[-1].value, value_loss_goal) + self.assertLess(losses[-1].policy, policy_loss_goal) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/async_mcts.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/async_mcts.py new file mode 100644 index 0000000..9143286 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/async_mcts.py @@ -0,0 +1,765 @@ +# Copyright 2024 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An asynchronous Monte Carlo Tree Search algorithm for game play. + +This implements asynchronous MCTS which runs evaluations asynchronously in +parallel. It is a very simplified version of the algorithm described in: + +Schultz et al. '24. Mastering Board Games by External and Internal Planning +with Language Models. https://arxiv.org/abs/2412.12119 + +Unlike the paper which describes a entirely model-based approach, this +implementation does use a game engine internally. Also, this version uses +standard virtual losses (not the dynamic virtual counts described in the paper). +However, it does implement the asynchronous calls to the evaluators in the same +way as the algorithm in the paper. +""" + +from __future__ import annotations +import concurrent.futures +import math +import time +from typing import Callable, Optional + +from absl import logging +import numpy as np + +import pyspiel + + +def robust_child_with_total_reward_tiebreaker( + root: SearchNode, +) -> tuple[int, SearchNode]: + """Returns the best action and associated child node. + + The child node with the most visits is chosen. + In case of a tie, the child with the highest total reward is chosen. + + + Args: + root: The root node of the search tree. + + Returns: + A tuple containing the best action and the associated child node. + """ + selection_criteria = lambda node: (node.explore_count, node.total_reward) + best_child = max(root.children, key=selection_criteria) + return best_child.action, best_child + + +def robust_child(root: SearchNode) -> tuple[int, SearchNode]: + """Returns the best action and associated child node. + + A child node with the most visits is chosen. + + Args: + root: The root node of the search tree. + + Returns: + A tuple containing the best action and the associated child node. + """ + selection_criteria = lambda node: node.explore_count + best_child = max(root.children, key=selection_criteria) + return best_child.action, best_child + + +def max_child(root: SearchNode) -> tuple[int, SearchNode]: + """Returns the best action and associated child node. + + A child node with the highest expected reward is chosen. + + Args: + root: The root node of the search tree. + + Returns: + A tuple containing the best action and the associated child node. + """ + selection_criteria = ( + lambda node: node.total_reward / node.explore_count + if node.explore_count + else float("-inf") + ) + best_child = max(root.children, key=selection_criteria) + return best_child.action, best_child + + +def max_robust_child( + root: SearchNode, find_robust: bool = False +) -> tuple[Optional[int], Optional[SearchNode]]: + """Returns the best action and associated child node. + + A child node with the highest expected reward and most visits is chosen. + If no such child exists, increase the number of simulations. + + Args: + root: The root node of the search tree. + find_robust: Whether to find a robust child node. E.g., if max compute is + reached and max robust is not found. + + Returns: + A tuple containing the best action and the associated child node. + """ + if find_robust: + best_action, best_child = robust_child(root) + else: + _, max_child_node = max_child(root) + _, robust_child_node = robust_child(root) + best_action, best_child = None, None + for child in root.children: + if child == max_child_node and child == robust_child_node: + best_action, best_child = child.action, child + break + return best_action, best_child + + +def secure_child( + root: SearchNode, secure_c: float = 1.0 +) -> tuple[int, SearchNode]: + """Returns the best action and associated child node. + + A child node with the most visits is chosen. + + Args: + root: The root node of the search tree. + secure_c: The constant used to calculate lower uncertainty bound + + Returns: + A tuple containing the best action and the associated child node. + """ + selection_criteria = ( + lambda node: node.total_reward / node.explore_count # pylint: disable=g-long-ternary + - secure_c / math.sqrt(node.explore_count) + if node.explore_count + else float("-inf") + ) + best_child = max(root.children, key=selection_criteria) + return best_child.action, best_child + + +def max_robust_secure_child( + root: SearchNode, secure_c: float = 1.0, find_secure: bool = False +) -> tuple[Optional[int], Optional[SearchNode]]: + """Returns the best action and associated child node. + + A child node with the most visits is chosen. + + Args: + root: The root node of the search tree. + secure_c: The constant used to calculate lower uncertainty bound. + find_secure: Whether to find a secure child node. + + Returns: + A tuple containing the best action and the associated child node. + """ + if find_secure: + best_action, best_child = secure_child(root, secure_c) + else: + best_action, best_child = max_robust_child(root) + return best_action, best_child + + +class Evaluator: + """Abstract class representing an evaluation function for a game. + + The evaluation function takes in an intermediate state in the game and returns + an evaluation of that state, which should correlate with chances of winning + the game. It returns the evaluation from all player's perspectives. + """ + + def prior_and_value( + self, state: pyspiel.State + ) -> tuple[list[tuple[int, float]], np.ndarray]: + """Returrns a prior (list of (action, prior)) and values (np.array).""" + raise NotImplementedError + + +class RandomRolloutEvaluator(Evaluator): + """A simple evaluator doing random rollouts. + + This evaluator returns the average outcome of playing random actions from the + given state until the end of the game. n_rollouts is the number of random + outcomes to be considered. + """ + + def __init__( + self, + random_state: np.random.RandomState | None = None, + ): + self._random_state = random_state or np.random.RandomState() + + def prior_and_value( + self, state: pyspiel.State + ) -> tuple[list[tuple[int, float]], np.ndarray]: + """Returns evaluation on given state.""" + # prior + if state.is_chance_node(): + prior = state.chance_outcomes() + else: + legal_actions = state.legal_actions(state.current_player()) + prior = [(action, 1.0 / len(legal_actions)) for action in legal_actions] + # value + working_state = state.clone() + while not working_state.is_terminal(): + if working_state.is_chance_node(): + outcomes = working_state.chance_outcomes() + action_list, prob_list = zip(*outcomes) + action = self._random_state.choice(action_list, p=prob_list) + else: + action = self._random_state.choice(working_state.legal_actions()) + working_state.apply_action(action) + value = np.array(working_state.returns()) + return prior, value + + +class SearchNode(object): + """A node in the search tree. + + A SearchNode represents a state and possible continuations from it. Each child + represents a possible action, and the expected result from doing so. + + Attributes: + action: The action from the parent node's perspective. Not important for the + root node, as the actions that lead to it are in the past. + player: Which player made this action. + prior: A prior probability for how likely this action will be selected. + explore_count: How many times this node was explored. + total_reward: The sum of rewards of rollouts through this node, from the + parent node's perspective. The average reward of this node is + `total_reward / explore_count` + outcome: The rewards for all players if this is a terminal node or the + subtree has been proven, otherwise None. + children: A list of SearchNodes representing the possible actions from this + node, along with their expected rewards. + expanded: Whether this node has been expanded. + """ + + __slots__ = [ + "action", + "player", + "prior", + "explore_count", + "total_reward", + "outcome", + "children", + "expanded", + ] + + def __init__(self, action: int | None, player: int, prior: float): + self.action = action + self.player = player + self.prior = prior + self.explore_count = 0 + self.total_reward = 0.0 + self.outcome = None + self.children = [] + self.expanded = False + + def uct_value(self, parent_explore_count: int, uct_c: float) -> float: + """Returns the UCT value of child.""" + if self.outcome is not None: + return self.outcome[self.player] + + if self.explore_count == 0: + return float("inf") + + return self.total_reward / self.explore_count + uct_c * math.sqrt( + math.log(parent_explore_count) / self.explore_count + ) + + def puct_value(self, parent_explore_count: int, uct_c: float) -> float: + """Returns the PUCT value of child.""" + if self.outcome is not None: + return self.outcome[self.player] + + return ( + self.explore_count and self.total_reward / self.explore_count + ) + uct_c * self.prior * math.sqrt(parent_explore_count) / ( + self.explore_count + 1 + ) + + def sort_key(self): + """Returns the best action from this node, either proven or most visited. + + This ordering leads to choosing: + - Highest proven score > 0 over anything else, including a promising but + unproven action. + - A proven draw only if it has higher exploration than others that are + uncertain, or the others are losses. + - Uncertain action with most exploration over loss of any difficulty + - Hardest loss if everything is a loss + - Highest expected reward if explore counts are equal (unlikely). + - Longest win, if multiple are proven (unlikely due to early stopping). + """ + return ( + 0 if self.outcome is None else self.outcome[self.player], + self.explore_count, + self.total_reward, + ) + + def best_child(self): + """Returns the best child in order of the sort key.""" + return max(self.children, key=SearchNode.sort_key) + + def children_str(self, state=None): + """Returns the string representation of this node's children. + + They are ordered based on the sort key, so order of being chosen to play. + + Args: + state: A `pyspiel.State` object, to be used to convert the action id into + a human readable format. If None, the action integer id is used. + """ + return "\n".join([ + c.to_str(state) + for c in reversed(sorted(self.children, key=SearchNode.sort_key)) + ]) + + def to_str(self, state=None): + """Returns the string representation of this node. + + Args: + state: A `pyspiel.State` object, to be used to convert the action id into + a human readable format. If None, the action integer id is used. + """ + action = ( + state.action_to_string(state.current_player(), self.action) + if state and self.action is not None + else str(self.action) + ) + return ( + "{:>6}: player: {}, prior: {:5.3f}, value: {:6.3f}, sims: {:5d}, " + "outcome: {}, {:3d} children" + ).format( + action, + self.player, + self.prior, + self.explore_count and self.total_reward / self.explore_count, + self.explore_count, + ( + "{:4.1f}".format(self.outcome[self.player]) + if self.outcome + else "none" + ), + len(self.children), + ) + + def __str__(self): + return self.to_str(None) + + +class MCTSBot(pyspiel.Bot): + """Bot that uses Monte-Carlo Tree Search algorithm.""" + + def __init__( + self, + game, + uct_c, + max_simulations, + evaluator, + solve=True, + random_state=None, + child_selection_fn=SearchNode.uct_value, + best_child_fn: Callable[ + ..., tuple[Optional[int], Optional[SearchNode]] + ] = robust_child_with_total_reward_tiebreaker, + dirichlet_noise=None, + verbose=False, + dont_return_chance_node=False, + virtual_loss: int = 10, # virtual loss for async MCTS + batch_size: int = 16, # batch size for asynchronous MCTS + secure_c: float = 1.0, # secure constant for secure child selection + simulations_multiplier: float = 1.0, + max_additional_simulation_rounds: int = 0, + timeout: float = 5.0, # timeout for asynchronous MCTS + ): + """Initializes a MCTS Search algorithm in the form of a bot. + + In multiplayer games, or non-zero-sum games, the players will play the + greedy strategy. + + Args: + game: A pyspiel.Game to play. + uct_c: The exploration constant for UCT. + max_simulations: How many iterations of MCTS to perform. Each simulation + will result in one call to the evaluator. Memory usage should grow + linearly with simulations * branching factor. How many nodes in the + search tree should be evaluated. This is correlated with memory size and + tree depth. + evaluator: A `Evaluator` object to use to evaluate a leaf node. + solve: Whether to back up solved states. + random_state: An optional numpy RandomState to make it deterministic. + child_selection_fn: A function to select the child in the descent phase. + The default is UCT. + best_child_fn: A function to select the best child in root node after tree + is built. The default is the child with the most visits. + dirichlet_noise: A tuple of (epsilon, alpha) for adding dirichlet noise to + the policy at the root. This is from the alpha-zero paper. + verbose: Whether to print information about the search tree before + returning the action. Useful for confirming the search is working + sensibly. + dont_return_chance_node: If true, do not stop expanding at chance nodes. + Enabled for AlphaZero. + virtual_loss: the value to use for virtual loss in async MCTS. + batch_size: The batch size for asynchronous MCTS. + secure_c: The constant used to calculate lower uncertainty bound + simulations_multiplier: The multiplier for search budget. + max_additional_simulation_rounds: The maximum number of additional + simulation rounds. + timeout: The timeout for asynchronous MCTS. + + Raises: + ValueError: if the game type isn't supported. + """ + pyspiel.Bot.__init__(self) + # Check that the game satisfies the conditions for this MCTS implementation. + game_type = game.get_type() + if game_type.reward_model != pyspiel.GameType.RewardModel.TERMINAL: + raise ValueError("Game must have terminal rewards.") + if game_type.dynamics != pyspiel.GameType.Dynamics.SEQUENTIAL: + raise ValueError("Game must have sequential turns.") + + self._game = game + self.uct_c = uct_c + self.max_simulations = max_simulations + self._max_simulations = max_simulations + self.evaluator = evaluator + self.verbose = verbose + self.solve = solve + self.max_utility = game.max_utility() + self.min_utility = game.min_utility() + self._dirichlet_noise = dirichlet_noise + self._random_state = random_state or np.random.RandomState() + self._child_selection_fn = child_selection_fn + self._best_child_fn = best_child_fn + self.dont_return_chance_node = dont_return_chance_node + self._root = None + self.total_num_searches = 0 + self.total_search_time = 0 + # Async MCTS parameters + self.virtual_loss = virtual_loss + self.batch_size = batch_size + self.timeout = timeout + self.total_timeouts = 0 + self.total_eval_errors = 0 + # Child selection at root parameters + self._secure_c = secure_c + self._alternative_criteria = False + self._max_additional_simulation_rounds = max_additional_simulation_rounds + self._simulations_multiplier = simulations_multiplier + + def restart_at(self, state): + pass + + def get_root(self): + return self._root + + def _get_selection_function_arguments(self, root): + if self._best_child_fn is secure_child: + arguments = (root, self._secure_c) + elif self._best_child_fn is max_robust_child: + arguments = (root, self._alternative_criteria) + elif self._best_child_fn is max_robust_secure_child: + arguments = (root, self._secure_c, self._alternative_criteria) + else: + arguments = (root,) + + return arguments + + def step_with_policy(self, state): + """Returns bot's policy and action at given state.""" + t1 = time.time() + simulation_round = 0 + best_action = None + best_child = None + root = None + while ( + best_action is None + and simulation_round <= self._max_additional_simulation_rounds + ): + simulation_round += 1 + if simulation_round == self._max_additional_simulation_rounds: + self._alternative_criteria = True + root = self.mcts_search(state) + assert root is not None, "Root is None" + self._root = root + arguments = self._get_selection_function_arguments(root) + best_action, best_child = self._best_child_fn(*arguments) + # Determine the number of additional simulations. + if best_action is None: + self.max_simulations = ( + int(self._simulations_multiplier * self._max_simulations) + ) + assert best_action is not None, "Best action is None" + assert best_child is not None, "Best child is None" + assert self._root is not None, "Root is None" + seconds = time.time() - t1 + self.total_search_time += seconds + if self.verbose: + print( + "Finished {} sims in {:.3f} secs, {:.1f} sims/s".format( + root.explore_count, seconds, root.explore_count / seconds + ) + ) + print("Root:") + print(root.to_str(state)) + print("Children:") + print(root.children_str(state)) + if best_child.children: + chosen_state = state.clone() + chosen_state.apply_action(best_action) + print("Children of chosen:") + print(best_child.children_str(chosen_state)) + policy = [ + (action, (1.0 if action == best_action else 0.0)) + for action in state.legal_actions(state.current_player()) + ] + # Rest max simulations to original value. + self.max_simulations = self._max_simulations + return policy, best_action + + def step(self, state): + return self.step_with_policy(state)[1] + + def _add_virtual_losses(self, node): + # Add virtual losses. This is applied to the nodes touched during the tree + # policy action selection of the simulation (downward pass). This + # discourages multiple threads from exploring the same node. + node.total_reward += (self.virtual_loss * self.min_utility) + node.explore_count += self.virtual_loss + + def _remove_virtual_losses(self, node): + # Remove virtual losses. This is applied to the nodes touched during the + # backpropagation phase (upward pass) to ensure that the fake losses are + # removed once this path down the tree is done being simulated. + node.total_reward -= (self.virtual_loss * self.min_utility) + node.explore_count -= self.virtual_loss + + def _choose_next_node(self, visit_path, working_state, current_node): + if working_state.is_chance_node(): + # For chance nodes, rollout according to chance node's probability + # distribution + outcomes = working_state.chance_outcomes() + action_list, prob_list = zip(*outcomes) + action = self._random_state.choice(action_list, p=prob_list) + chosen_child = next( + c for c in current_node.children if c.action == action + ) + else: + # Otherwise choose node with largest UCT value + chosen_child = max( + current_node.children, + key=lambda c: self._child_selection_fn( # pylint: disable=g-long-lambda + c, current_node.explore_count, self.uct_c + ), + ) + working_state.apply_action(chosen_child.action) + current_node = chosen_child + self._add_virtual_losses(current_node) + visit_path.append(current_node) + return current_node + + def _apply_tree_policy(self, root, state): + """Applies the UCT policy to play the game until reaching a leaf node. + + A leaf node is defined as a node that is terminal or has not been evaluated + yet. If it reaches a node that has been evaluated before but hasn't been + expanded, then expand its children and continue. + + Args: + root: The root node in the search tree. + state: The state of the game at the root node. + + Returns: + visit_path: A list of nodes descending from the root node to a leaf node. + working_state: The state of the game at the leaf node. + """ + visit_path = [root] + working_state = state.clone() + current_node = root + self._add_virtual_losses(root) + unexplored_explore_count = self.virtual_loss + while ( + not working_state.is_terminal() + and current_node.explore_count > unexplored_explore_count + ) or (working_state.is_chance_node() and self.dont_return_chance_node): + if not current_node.children: + return visit_path, working_state, current_node + current_node = self._choose_next_node( + visit_path, working_state, current_node + ) + return visit_path, working_state, current_node + + def backpropagate(self, visit_path, returns): + while visit_path: + # For chance nodes, walk up the tree to find the decision-maker. + decision_node_idx = -1 + while visit_path[decision_node_idx].player == pyspiel.PlayerId.CHANCE: + decision_node_idx -= 1 + # Chance node targets are for the respective decision-maker. + target_return = returns[visit_path[decision_node_idx].player] + node = visit_path.pop() + node.total_reward += target_return + node.explore_count += 1 + self._remove_virtual_losses(node) + assert node.explore_count >= 1 + + def backpropagate_timeout(self, visit_path): + while visit_path: + # For chance nodes, walk up the tree to find the decision-maker. + node = visit_path.pop() + self._remove_virtual_losses(node) + + def expand(self, root, working_state, current_node, prior, value): + # For a new node, initialize its state, then choose a child as normal. + # prior, value = self.evaluator.prior_and_value(working_state) + if current_node is root and self._dirichlet_noise: + epsilon, alpha = self._dirichlet_noise + noise = self._random_state.dirichlet([alpha] * len(prior)) + prior = [ + (a, (1 - epsilon) * p + epsilon * n) + for (a, p), n in zip(prior, noise) + ] + # Reduce bias from move generation order. + self._random_state.shuffle(prior) + player = working_state.current_player() + current_node.children = [ + SearchNode(action, player, prob) for action, prob in prior + ] + current_node.expanded = True + + def evaluate( + self, working_state + ) -> tuple[list[tuple[int, float]], np.ndarray]: + if working_state.is_terminal(): + prior = [] + values = working_state.returns() + else: + prior, values = self.evaluator.prior_and_value(working_state) + return prior, values + + def handle_leaf(self, prior, value, arguments, timeout=False): + visit_path, working_state, node, root = arguments + if timeout: + self.backpropagate_timeout(visit_path) + return + assert node is not None + if not node.expanded: + self.expand(root, working_state, node, prior, value) + if working_state.is_terminal(): + visit_path[-1].outcome = working_state.returns() + self.backpropagate(visit_path, value) + + def async_mcts_search(self, state): + root = SearchNode(None, state.current_player(), 1) + # do one call up front, to ensure we have some children for the threads + # to spread over. + self._add_virtual_losses(root) + working_state = state.clone() + prior, value = self.evaluate(working_state) + self.handle_leaf( + prior, value, ([root], working_state, root, root), timeout=False + ) + total_simulations = 1 + search_timeouts = 0 + with concurrent.futures.ThreadPoolExecutor( + max_workers=self.batch_size + ) as executor: + while total_simulations < self.max_simulations: + batch_timeouts = 0 + remaining_simulations = self.max_simulations - total_simulations + num_to_queue = min(self.batch_size, remaining_simulations) + futures = [] + arguments = [] + for _ in range(num_to_queue): + visit_path, working_state, node = self._apply_tree_policy(root, state) + arguments.append((visit_path, working_state, node, root)) + future = executor.submit(self.evaluate, working_state) + futures.append(future) + concurrent.futures.wait( + futures, + timeout=self.timeout, + return_when=concurrent.futures.ALL_COMPLETED, + ) + for i, future in enumerate(futures): + if future.done(): + prior, value = future.result() + self.handle_leaf(prior, value, arguments[i], timeout=False) + else: + batch_timeouts += 1 + search_timeouts += 1 + self.handle_leaf(None, None, arguments[i], timeout=True) + total_simulations += num_to_queue + if self.verbose: + logging.info("Timeouts for this search: %d", search_timeouts) + self.total_timeouts += search_timeouts + self.total_num_searches += 1 + if self.verbose: + logging.info( + "Average timeouts per search: %g\n" + + "Average eval errors per search: %g", + self.total_timeouts / self.total_num_searches, + self.total_eval_errors / self.total_num_searches) + return root + + def mcts_search(self, state): + """A vanilla Monte-Carlo Tree Search algorithm. + + This algorithm searches the game tree from the given state. + At the leaf, the evaluator is called if the game state is not terminal. + A total of max_simulations states are explored. + + At every node, the algorithm chooses the action with the highest PUCT value, + defined as: `Q/N + c * prior * sqrt(parent_N) / N`, where Q is the total + reward after the action, and N is the number of times the action was + explored in this position. The input parameter c controls the balance + between exploration and exploitation; higher values of c encourage + exploration of under-explored nodes. Unseen actions are always explored + first. + + At the end of the search, the chosen action is the action that has been + explored most often. This is the action that is returned. + + This implementation supports sequential n-player games, with or without + chance nodes. All players maximize their own reward and ignore the other + players' rewards. This corresponds to max^n for n-player games. It is the + norm for zero-sum games, but doesn't have any special handling for + non-zero-sum games. It doesn't have any special handling for imperfect + information games. + + The implementation also supports backing up solved states, i.e. MCTS-Solver. + The implementation is general in that it is based on a max^n backup (each + player greedily chooses their maximum among proven children values, or there + exists one child whose proven value is game.max_utility()), so it will work + for multiplayer, general-sum, and arbitrary payoff games (not just win/loss/ + draw games). Also chance nodes are considered proven only if all children + have the same value. + + Some references: + - Sturtevant, An Analysis of UCT in Multi-Player Games, 2008, + https://web.cs.du.edu/~sturtevant/papers/multi-player_UCT.pdf + - Nijssen, Monte-Carlo Tree Search for Multi-Player Games, 2013, + https://project.dke.maastrichtuniversity.nl/games/files/phd/Nijssen_thesis.pdf + - Silver, AlphaGo Zero: Starting from scratch, 2017 + https://deepmind.com/blog/article/alphago-zero-starting-scratch + - Winands, Bjornsson, and Saito, "Monte-Carlo Tree Search Solver", 2008. + https://dke.maastrichtuniversity.nl/m.winands/documents/uctloa.pdf + + Arguments: + state: pyspiel.State object, state to search from + + Returns: + The most visited move from the root node. + """ + return self.async_mcts_search(state) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/async_mcts_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/async_mcts_test.py new file mode 100644 index 0000000..1224e37 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/async_mcts_test.py @@ -0,0 +1,201 @@ +# Copyright 2024 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import random + +from absl.testing import absltest +import numpy as np + +from open_spiel.python.algorithms import async_mcts +from open_spiel.python.algorithms import evaluate_bots +import pyspiel + +UCT_C = math.sqrt(2) + + +def _get_action(state, action_str): + for action in state.legal_actions(): + if action_str == state.action_to_string(state.current_player(), action): + return action + raise ValueError("invalid action string: {}".format(action_str)) + + +def search_tic_tac_toe_state(initial_actions): + game = pyspiel.load_game("tic_tac_toe") + state = game.new_initial_state() + for action_str in initial_actions.split(" "): + state.apply_action(_get_action(state, action_str)) + rng = np.random.RandomState(42) + bot = async_mcts.MCTSBot( + game, + UCT_C, + max_simulations=10000, + solve=True, + random_state=rng, + evaluator=async_mcts.RandomRolloutEvaluator(random_state=rng), + ) + return bot.mcts_search(state), state + + +def make_node(action, player=0, prior=1, **kwargs): + node = async_mcts.SearchNode(action, player, prior) + for k, v in kwargs.items(): + setattr(node, k, v) + return node + + +class MctsBotTest(absltest.TestCase): + + def test_can_play_tic_tac_toe(self): + game = pyspiel.load_game("tic_tac_toe") + max_simulations = 100 + evaluator = async_mcts.RandomRolloutEvaluator() + bots = [ + async_mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), + async_mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), + ] + v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) + self.assertEqual(v[0] + v[1], 0) + + def test_can_play_both_sides(self): + game = pyspiel.load_game("tic_tac_toe") + bot = async_mcts.MCTSBot( + game, + UCT_C, + max_simulations=100, + evaluator=async_mcts.RandomRolloutEvaluator(), + ) + bots = [bot, bot] + v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) + self.assertEqual(v[0] + v[1], 0) + + def test_can_play_single_player(self): + game = pyspiel.load_game("catch") + max_simulations = 100 + evaluator = async_mcts.RandomRolloutEvaluator() + bots = [async_mcts.MCTSBot(game, UCT_C, max_simulations, evaluator)] + v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) + self.assertGreater(v[0], 0) + + def test_throws_on_simultaneous_game(self): + game = pyspiel.load_game("matrix_mp") + evaluator = async_mcts.RandomRolloutEvaluator() + with self.assertRaises(ValueError): + async_mcts.MCTSBot(game, UCT_C, max_simulations=100, evaluator=evaluator) + + def test_can_play_three_player_stochastic_games(self): + game = pyspiel.load_game("pig(players=3,winscore=20,horizon=30)") + max_simulations = 100 + evaluator = async_mcts.RandomRolloutEvaluator() + bots = [ + async_mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), + async_mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), + async_mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), + ] + v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) + self.assertEqual(sum(v), 0) + + def assertBestChild(self, choice, children): + # If this causes flakiness, the key in `SearchNode.best_child` is bad. + random.shuffle(children) + root = make_node(-1, children=children) + self.assertEqual(root.best_child().action, choice) + + def test_choose_most_visited_when_not_solved(self): + self.assertBestChild( + 0, + [ + make_node(0, explore_count=50, total_reward=30), + make_node(1, explore_count=40, total_reward=40), + ], + ) + + def test_choose_win_over_most_visited(self): + self.assertBestChild( + 1, + [ + make_node(0, explore_count=50, total_reward=30), + make_node(1, explore_count=40, total_reward=40, outcome=[1]), + ], + ) + + def test_choose_best_over_good(self): + self.assertBestChild( + 1, + [ + make_node(0, explore_count=50, total_reward=30, outcome=[0.5]), + make_node(1, explore_count=40, total_reward=40, outcome=[0.8]), + ], + ) + + def test_choose_bad_over_worst(self): + self.assertBestChild( + 0, + [ + make_node(0, explore_count=50, total_reward=30, outcome=[-0.5]), + make_node(1, explore_count=40, total_reward=40, outcome=[-0.8]), + ], + ) + + def test_choose_positive_reward_over_promising(self): + self.assertBestChild( + 1, + [ + make_node(0, explore_count=50, total_reward=40), # more promising + make_node( + 1, explore_count=10, total_reward=1, outcome=[0.1] + ), # solved + ], + ) + + def test_choose_most_visited_over_loss(self): + self.assertBestChild( + 0, + [ + make_node(0, explore_count=50, total_reward=30), + make_node(1, explore_count=40, total_reward=40, outcome=[-1]), + ], + ) + + def test_choose_most_visited_over_draw(self): + self.assertBestChild( + 0, + [ + make_node(0, explore_count=50, total_reward=30), + make_node(1, explore_count=40, total_reward=40, outcome=[0]), + ], + ) + + def test_choose_uncertainty_over_most_visited_loss(self): + self.assertBestChild( + 1, + [ + make_node(0, explore_count=50, total_reward=30, outcome=[-1]), + make_node(1, explore_count=40, total_reward=40), + ], + ) + + def test_choose_slowest_loss(self): + self.assertBestChild( + 1, + [ + make_node(0, explore_count=50, total_reward=10, outcome=[-1]), + make_node(1, explore_count=60, total_reward=15, outcome=[-1]), + ], + ) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/best_response.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/best_response.py new file mode 100644 index 0000000..751e4a0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/best_response.py @@ -0,0 +1,376 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Computes a Best-Response policy. + +The goal if this file is to be the main entry-point for BR APIs in Python. + +TODO(author2): Also include computation using the more efficient C++ +`TabularBestResponse` implementation. +""" + +import collections +import itertools + +import numpy as np + +from open_spiel.python import games # pylint:disable=unused-import +from open_spiel.python import policy as openspiel_policy +from open_spiel.python.algorithms import get_all_states +from open_spiel.python.algorithms import noisy_policy +from open_spiel.python.algorithms import policy_utils +import pyspiel + + +def _memoize_method(key_fn=lambda x: x): + """Memoize a single-arg instance method using an on-object cache.""" + + def memoizer(method): + cache_name = "cache_" + method.__name__ + + def wrap(self, arg): + key = key_fn(arg) + cache = vars(self).setdefault(cache_name, {}) + if key not in cache: + cache[key] = method(self, arg) + return cache[key] + + return wrap + + return memoizer + + +def compute_states_and_info_states_if_none(game, + all_states=None, + state_to_information_state=None): + """Returns all_states and/or state_to_information_state for the game. + + To recompute everything, pass in None for both all_states and + state_to_information_state. Otherwise, this function will use the passed in + values to reconstruct either of them. + + Args: + game: The open_spiel game. + all_states: The result of calling get_all_states.get_all_states. Cached for + improved performance. + state_to_information_state: A dict mapping state.history_str() to + state.information_state for every state in the game. Cached for improved + performance. + """ + if all_states is None: + all_states = get_all_states.get_all_states( + game, + depth_limit=-1, + include_terminals=False, + include_chance_states=False) + + if state_to_information_state is None: + state_to_information_state = { + state: all_states[state].information_state_string() + for state in all_states + } + + return all_states, state_to_information_state + + +class BestResponsePolicy(openspiel_policy.Policy): + """Computes the best response to a specified strategy.""" + + def __init__(self, + game, + player_id, + policy, + root_state=None, + cut_threshold=0.0): + """Initializes the best-response calculation. + + Args: + game: The game to analyze. + player_id: The player id of the best-responder. + policy: A `policy.Policy` object. + root_state: The state of the game at which to start analysis. If `None`, + the game root state is used. + cut_threshold: The probability to cut when calculating the value. + Increasing this value will trade off accuracy for speed. + """ + self._num_players = game.num_players() + self._player_id = player_id + self._policy = policy + if root_state is None: + root_state = game.new_initial_state() + self._root_state = root_state + self.infosets = self.info_sets(root_state) + + self._cut_threshold = cut_threshold + + def info_sets(self, state): + """Returns a dict of infostatekey to list of (state, cf_probability).""" + infosets = collections.defaultdict(list) + for s, p in self.decision_nodes(state): + infosets[s.information_state_string(self._player_id)].append((s, p)) + return dict(infosets) + + def decision_nodes(self, parent_state): + """Yields a (state, cf_prob) pair for each descendant decision node.""" + if not parent_state.is_terminal(): + if (parent_state.current_player() == self._player_id or + parent_state.is_simultaneous_node()): + yield (parent_state, 1.0) + for action, p_action in self.transitions(parent_state): + for state, p_state in self.decision_nodes( + openspiel_policy.child(parent_state, action)): + yield (state, p_state * p_action) + + def joint_action_probabilities_counterfactual(self, state): + """Get list of action, probability tuples for simultaneous node. + + Counterfactual reach probabilities exclude the best-responder's actions, + the sum of the probabilities is equal to the number of actions of the + player _player_id. + Args: + state: the current state of the game. + + Returns: + list of action, probability tuples. An action is a tuple of individual + actions for each player of the game. + """ + actions_per_player, probs_per_player = ( + openspiel_policy.joint_action_probabilities_aux(state, self._policy)) + probs_per_player[self._player_id] = [ + 1.0 for _ in probs_per_player[self._player_id] + ] + return [(list(actions), np.prod(probs)) for actions, probs in zip( + itertools.product( + *actions_per_player), itertools.product(*probs_per_player))] + + def transitions(self, state): + """Returns a list of (action, cf_prob) pairs from the specified state.""" + if state.current_player() == self._player_id: + # Counterfactual reach probabilities exclude the best-responder's actions, + # hence return probability 1.0 for every action. + return [(action, 1.0) for action in state.legal_actions()] + elif state.is_chance_node(): + return state.chance_outcomes() + elif state.is_simultaneous_node(): + return self.joint_action_probabilities_counterfactual(state) + else: + return list(self._policy.action_probabilities(state).items()) + + @_memoize_method(key_fn=lambda state: state.history_str()) + def value(self, state): + """Returns the value of the specified state to the best-responder.""" + if state.is_terminal(): + return state.player_return(self._player_id) + elif (state.current_player() == self._player_id or + state.is_simultaneous_node()): + action = self.best_response_action( + state.information_state_string(self._player_id)) + return self.q_value(state, action) + else: + return sum(p * self.q_value(state, a) + for a, p in self.transitions(state) + if p > self._cut_threshold) + + def q_value(self, state, action): + """Returns the value of the (state, action) to the best-responder.""" + if state.is_simultaneous_node(): + + def q_value_sim(sim_state, sim_actions): + child = sim_state.clone() + # change action of _player_id + sim_actions[self._player_id] = action + child.apply_actions(sim_actions) + return self.value(child) + + actions, probabilities = zip(*self.transitions(state)) + return sum(p * q_value_sim(state, a) + for a, p in zip(actions, probabilities / sum(probabilities)) + if p > self._cut_threshold) + else: + return self.value(state.child(action)) + + @_memoize_method() + def best_response_action(self, infostate): + """Returns the best response for this information state.""" + infoset = self.infosets[infostate] + # Get actions from the first (state, cf_prob) pair in the infoset list. + # Return the best action by counterfactual-reach-weighted state-value. + return max( + infoset[0][0].legal_actions(self._player_id), + key=lambda a: sum(cf_p * self.q_value(s, a) for s, cf_p in infoset)) + + def action_probabilities(self, state, player_id=None): + """Returns the policy for a player in a state. + + Args: + state: A `pyspiel.State` object. + player_id: Optional, the player id for whom we want an action. Optional + unless this is a simultaneous state at which multiple players can act. + + Returns: + A `dict` of `{action: probability}` for the specified player in the + supplied state. + """ + if player_id is None: + if state.is_simultaneous_node(): + player_id = self._player_id + else: + player_id = state.current_player() + return { + self.best_response_action(state.information_state_string(player_id)): 1 + } + + +class CPPBestResponsePolicy(openspiel_policy.Policy): + """Computes best response action_probabilities using open_spiel's C++ backend. + + May have better performance than best_response.py for large games. + """ + + def __init__(self, + game, + best_responder_id, + policy, + all_states=None, + state_to_information_state=None, + best_response_processor=None, + cut_threshold=0.0): + """Constructor. + + Args: + game: The game to analyze. + best_responder_id: The player id of the best-responder. + policy: A policy.Policy object representing the joint policy, taking a + state and returning a list of (action, probability) pairs. This could be + aggr_policy, for instance. + all_states: The result of calling get_all_states.get_all_states. Cached + for improved performance. + state_to_information_state: A dict mapping state.history_str to + state.information_state for every state in the game. Cached for improved + performance. + best_response_processor: A TabularBestResponse object, used for processing + the best response actions. + cut_threshold: The probability to cut when calculating the value. + Increasing this value will trade off accuracy for speed. + """ + (self.all_states, self.state_to_information_state) = ( + compute_states_and_info_states_if_none(game, all_states, + state_to_information_state)) + + policy_to_dict = policy_utils.policy_to_dict( + policy, game, self.all_states, self.state_to_information_state) + + # pylint: disable=g-complex-comprehension + # Cache TabularBestResponse for players, due to their costly construction + # TODO(b/140426861): Use a single best-responder once the code supports + # multiple player ids. + if not best_response_processor: + best_response_processor = pyspiel.TabularBestResponse( + game, best_responder_id, policy_to_dict) + + self._policy = policy + self.game = game + self.best_responder_id = best_responder_id + self.tabular_best_response_map = ( + best_response_processor.get_best_response_actions()) + + self._cut_threshold = cut_threshold + + def decision_nodes(self, parent_state): + """Yields a (state, cf_prob) pair for each descendant decision node.""" + if not parent_state.is_terminal(): + if parent_state.current_player() == self.best_responder_id: + yield (parent_state, 1.0) + for action, p_action in self.transitions(parent_state): + for state, p_state in self.decision_nodes(parent_state.child(action)): + yield (state, p_state * p_action) + + def transitions(self, state): + """Returns a list of (action, cf_prob) pairs from the specified state.""" + if state.current_player() == self.best_responder_id: + # Counterfactual reach probabilities exclude the best-responder's actions, + # hence return probability 1.0 for every action. + return [(action, 1.0) for action in state.legal_actions()] + elif state.is_chance_node(): + return state.chance_outcomes() + else: + return list(self._policy.action_probabilities(state).items()) + + @_memoize_method(key_fn=lambda state: state.history_str()) + def value(self, state): + """Returns the value of the specified state to the best-responder.""" + if state.is_terminal(): + return state.player_return(self.best_responder_id) + elif state.current_player() == self.best_responder_id: + action = self.best_response_action( + state.information_state_string(self.best_responder_id)) + return self.q_value(state, action) + else: + return sum(p * self.q_value(state, a) + for a, p in self.transitions(state) + if p > self._cut_threshold) + + def q_value(self, state, action): + """Returns the value of the (state, action) to the best-responder.""" + return self.value(state.child(action)) + + @_memoize_method() + def best_response_action(self, infostate): + """Returns the best response for this information state.""" + action = self.tabular_best_response_map[infostate] + return action + + def action_probabilities(self, state, player_id=None): + """Returns the policy for a player in a state. + + Args: + state: A `pyspiel.State` object. + player_id: Optional, the player id for whom we want an action. Optional + unless this is a simultabeous state at which multiple players can act. + + Returns: + A `dict` of `{action: probability}` for the specified player in the + supplied state. + """ + # Send the best-response probabilities for the best-responder + if state.current_player() == self.best_responder_id: + probs = {action_id: 0. for action_id in state.legal_actions()} + info_state = self.state_to_information_state[state.history_str()] + probs[self.tabular_best_response_map[info_state]] = 1. + return probs + + # Send the default probabilities for all other players + return self._policy.action_probabilities(state, player_id) + + @property + def policy(self): + return self._policy + + def copy_with_noise(self, alpha=0.0, beta=0.0): + """Copies this policy and adds noise, making it a Noisy Best Response. + + The policy's new probabilities P' on each state s become + P'(s) = alpha * epsilon + (1-alpha) * P(s) + + With P the former policy's probabilities, and epsilon ~ Softmax(beta * + Uniform) + + Args: + alpha: First mixture component + beta: Softmax 1/temperature component + + Returns: + Noisy copy of best response. + """ + return noisy_policy.NoisyPolicy(self, alpha, beta, self.all_states) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/best_response_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/best_response_test.py new file mode 100644 index 0000000..33b3895 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/best_response_test.py @@ -0,0 +1,189 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.best_response.""" + +from absl.testing import absltest +from absl.testing import parameterized + +import numpy as np + +from open_spiel.python import games # pylint:disable=unused-import +from open_spiel.python import policy +from open_spiel.python.algorithms import best_response +from open_spiel.python.algorithms import expected_game_score +from open_spiel.python.algorithms import get_all_states +import pyspiel + + +class BestResponseTest(parameterized.TestCase, absltest.TestCase): + + def test_best_response_is_a_policy(self): + game = pyspiel.load_game("kuhn_poker") + test_policy = policy.UniformRandomPolicy(game) + br = best_response.BestResponsePolicy(game, policy=test_policy, player_id=0) + expected_policy = { + "0": 1, # Bet in case opponent folds when winning + "1": 1, # Bet in case opponent folds when winning + "2": 0, # Both equally good (we return the lowest action) + # Some of these will never happen under the best-response policy, + # but we have computed best-response actions anyway. + "0pb": 0, # Fold - we're losing + "1pb": 1, # Call - we're 50-50 + "2pb": 1, # Call - we've won + } + self.assertEqual( + expected_policy, + {key: br.best_response_action(key) for key in expected_policy.keys()}) + + @parameterized.parameters(["kuhn_poker", "leduc_poker"]) + def test_cpp_and_python_implementations_are_identical(self, game_name): + game = pyspiel.load_game(game_name) + + python_policy = policy.UniformRandomPolicy(game) + pyspiel_policy = pyspiel.UniformRandomPolicy(game) + + all_states = get_all_states.get_all_states( + game, + depth_limit=-1, + include_terminals=False, + include_chance_states=False, + to_string=lambda s: s.information_state_string()) + + for current_player in range(game.num_players()): + python_br = best_response.BestResponsePolicy(game, current_player, + python_policy) + cpp_br = pyspiel.TabularBestResponse( + game, current_player, pyspiel_policy).get_best_response_policy() + + for state in all_states.values(): + if state.current_player() != current_player: + continue + + self.assertEqual( + python_br.action_probabilities(state), { + a: prob + for a, prob in cpp_br.action_probabilities(state).items() + if prob != 0 + }) + + @parameterized.parameters(("kuhn_poker", 2)) + def test_cpp_and_python_best_response_are_identical(self, game_name, + num_players): + game = pyspiel.load_game(game_name, {"players": num_players}) + + test_policy = policy.TabularPolicy(game) + for i_player in range(num_players): + best_resp_py_backend = best_response.BestResponsePolicy( + game, i_player, test_policy) + best_resp_cpp_backend = best_response.CPPBestResponsePolicy( + game, i_player, test_policy) + for state in best_resp_cpp_backend.all_states.values(): + if i_player == state.current_player(): + py_dict = best_resp_py_backend.action_probabilities(state) + cpp_dict = best_resp_cpp_backend.action_probabilities(state) + + # We do check like this, because the actions associated to a 0. prob + # do not necessarily appear + for key, value in py_dict.items(): + self.assertEqual(value, cpp_dict.get(key, 0.)) + for key, value in cpp_dict.items(): + self.assertEqual(value, py_dict.get(key, 0.)) + + @parameterized.parameters(("kuhn_poker", 2), ("kuhn_poker", 3)) + def test_cpp_and_python_value_are_identical(self, game_name, num_players): + game = pyspiel.load_game(game_name, {"players": num_players}) + test_policy = policy.TabularPolicy(game) + root_state = game.new_initial_state() + for i_player in range(num_players): + best_resp_py_backend = best_response.BestResponsePolicy( + game, i_player, test_policy) + best_resp_cpp_backend = best_response.CPPBestResponsePolicy( + game, i_player, test_policy) + + value_py_backend = best_resp_py_backend.value(root_state) + value_cpp_backend = best_resp_cpp_backend.value(root_state) + + self.assertTrue(np.allclose(value_py_backend, value_cpp_backend)) + + def test_best_response_tic_tac_toe_value_is_consistent(self): + # This test was failing because of use of str(state) in the best response, + # which is imperfect recall. We now use state.history_str() throughout. + + # Chose a policy at random; not the uniform random policy. + game = pyspiel.load_game("tic_tac_toe") + pi = policy.TabularPolicy(game) + rng = np.random.RandomState(1234) + pi.action_probability_array[:] = rng.rand(*pi.legal_actions_mask.shape) + pi.action_probability_array *= pi.legal_actions_mask + pi.action_probability_array /= np.sum( + pi.action_probability_array, axis=1, keepdims=True) + + # Compute a best response and verify the best response value is consistent. + br = best_response.BestResponsePolicy(game, 1, pi) + self.assertAlmostEqual( + expected_game_score.policy_value(game.new_initial_state(), [pi, br])[1], + br.value(game.new_initial_state())) + + def test_best_response_oshi_zumo_simultaneous_game(self): + """Test best response computation for simultaneous game.""" + game = pyspiel.load_game("oshi_zumo(horizon=5,coins=5)") + test_policy = policy.UniformRandomPolicy(game) + br = best_response.BestResponsePolicy(game, policy=test_policy, player_id=0) + expected_policy = { + "0, 0, 0, 3, 0, 2": 1, + "0, 0, 1, 4, 3, 1": 0, + "0, 0, 4, 1, 0, 2, 0, 2": 1, + "0, 1, 1, 0, 1, 4": 1, + "0, 1, 4, 1, 0, 0, 0, 1": 1, + "0, 2, 2, 2, 3, 0, 0, 0": 0, + "0, 5, 0, 0, 0, 0, 3, 0": 1 + } + self.assertEqual( + expected_policy, + {key: br.best_response_action(key) for key in expected_policy}) + self.assertAlmostEqual(br.value(game.new_initial_state()), 0.856471051954) + + def test_best_response_prisoner_dilemma_simultaneous_game(self): + """Test best response computation for simultaneous game.""" + game = pyspiel.load_game( + "python_iterated_prisoners_dilemma(max_game_length=5)") + test_policy = policy.UniformRandomPolicy(game) + br = best_response.BestResponsePolicy(game, policy=test_policy, player_id=0) + + # Best policy is always to defect; we verify this for a handful of states + self.assertEqual(br.best_response_action("us:CCCC op:CCCC"), 1) + self.assertEqual(br.best_response_action("us:DDDD op:CCCC"), 1) + self.assertEqual(br.best_response_action("us:CDCD op:DCDC"), 1) + self.assertEqual(br.best_response_action("us:CCCC op:DDDD"), 1) + + # Expected value per turn = 5.5 (avg of 1 and 10) + # Expected game length = sum(0.875**i for i in range(5)) = 3.896728515625 + # Game value = 5.5 * 3.896728515625 = 21.4320068359375 + self.assertAlmostEqual(br.value(game.new_initial_state()), 21.4320068359375) + + +class TabularBestResponseMDPTest(absltest.TestCase): + + def test_tabular_best_response_mdp(self): + # See pybind11/policy.cc for these functions. + game = pyspiel.load_game("kuhn_poker") + uniform_random_policy = pyspiel.UniformRandomPolicy(game) + tbr_mdp = pyspiel.TabularBestResponseMDP(game, uniform_random_policy) + tbr_info = tbr_mdp.nash_conv() + self.assertGreater(tbr_info.nash_conv, 0) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/boltzmann_tabular_qlearner.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/boltzmann_tabular_qlearner.py new file mode 100644 index 0000000..eaaa27b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/boltzmann_tabular_qlearner.py @@ -0,0 +1,90 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Boltzmann Q learning agent. + +This algorithm is a variation of Q learning that uses action selection +based on boltzmann probability interpretation of Q-values. + +For more details, see equation (2) page 2 in + https://arxiv.org/pdf/1109.1528.pdf +""" + +import numpy as np + +from open_spiel.python import rl_tools +from open_spiel.python.algorithms import tabular_qlearner + + +class BoltzmannQLearner(tabular_qlearner.QLearner): + """Tabular Boltzmann Q-Learning agent. + + See open_spiel/python/examples/tic_tac_toe_qlearner.py for an usage example. + + The tic_tac_toe example uses the standard Qlearner. Using the + BoltzmannQlearner is + identical and only differs in the initialization of the agents. + """ + + def __init__(self, + player_id, + num_actions, + step_size=0.1, + discount_factor=1.0, + temperature_schedule=rl_tools.ConstantSchedule(.5), + centralized=False): + super().__init__( + player_id, + num_actions, + step_size=step_size, + discount_factor=discount_factor, + epsilon_schedule=temperature_schedule, + centralized=centralized) + + def _softmax(self, info_state, legal_actions, temperature): + """Action selection based on boltzmann probability interpretation of Q-values. + + For more details, see equation (2) page 2 in + https://arxiv.org/pdf/1109.1528.pdf + + Args: + info_state: hashable representation of the information state. + legal_actions: list of actions at `info_state`. + temperature: temperature used for softmax. + + Returns: + A valid soft-max selected action and valid action probabilities. + """ + probs = np.zeros(self._num_actions) + + if temperature > 0.0: + probs += [ + np.exp((1 / temperature) * self._q_values[info_state][i]) + for i in range(self._num_actions) + ] + probs /= np.sum(probs) + else: + # Temperature = 0 causes normal greedy action selection + greedy_q = max([self._q_values[info_state][a] for a in legal_actions]) + greedy_actions = [ + a for a in legal_actions if self._q_values[info_state][a] == greedy_q + ] + + probs[greedy_actions] += 1 / len(greedy_actions) + + action = np.random.choice(range(self._num_actions), p=probs) + return action, probs + + def _get_action_probs(self, info_state, legal_actions, epsilon): + """Returns a selected action and the probabilities of legal actions.""" + return self._softmax(info_state, legal_actions, temperature=epsilon) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/boltzmann_tabular_qlearner_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/boltzmann_tabular_qlearner_test.py new file mode 100644 index 0000000..2557586 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/boltzmann_tabular_qlearner_test.py @@ -0,0 +1,67 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for open_spiel.python.algorithms.boltzmann_tabular_qlearner.""" + +from absl.testing import absltest +import numpy as np + +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import boltzmann_tabular_qlearner +import pyspiel + +# Fixed seed to make test non stochastic. +SEED = 10000 + +# A simple two-action game encoded as an EFG game. Going left gets -1, going +# right gets a +1. +SIMPLE_EFG_DATA = """ + EFG 2 R "Simple single-agent problem" { "Player 1" } "" + p "ROOT" 1 1 "ROOT" { "L" "R" } 0 + t "L" 1 "Outcome L" { -1.0 } + t "R" 2 "Outcome R" { 1.0 } +""" + + +class BoltzmannQlearnerTest(absltest.TestCase): + + def test_simple_game(self): + game = pyspiel.load_efg_game(SIMPLE_EFG_DATA) + env = rl_environment.Environment(game=game) + + agent = boltzmann_tabular_qlearner.BoltzmannQLearner( + 0, game.num_distinct_actions()) + total_reward = 0 + + for _ in range(100): + total_eval_reward = 0 + for _ in range(1000): + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step) + time_step = env.step([agent_output.action]) + total_reward += time_step.rewards[0] + agent.step(time_step) + self.assertGreaterEqual(total_reward, 75) + for _ in range(1000): + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step, is_evaluation=True) + time_step = env.step([agent_output.action]) + total_eval_reward += time_step.rewards[0] + self.assertGreaterEqual(total_eval_reward, 250) + + +if __name__ == "__main__": + np.random.seed(SEED) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/cfr.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/cfr.py new file mode 100644 index 0000000..b43b5ae --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/cfr.py @@ -0,0 +1,502 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python implementation of the counterfactual regret minimization algorithm. + +One iteration of CFR consists of: +1) Compute current strategy from regrets (e.g. using Regret Matching). +2) Compute values using the current strategy +3) Compute regrets from these values + +The average policy is what converges to a Nash Equilibrium. +""" + +import collections +import attr +import numpy as np + +from open_spiel.python import policy +import pyspiel + + +@attr.s +class _InfoStateNode(object): + """An object wrapping values associated to an information state.""" + # The list of the legal actions. + legal_actions = attr.ib() + index_in_tabular_policy = attr.ib() + # Map from information states string representations and actions to the + # counterfactual regrets, accumulated over the policy iterations + cumulative_regret = attr.ib(factory=lambda: collections.defaultdict(float)) + # Same as above for the cumulative of the policy probabilities computed + # during the policy iterations + cumulative_policy = attr.ib(factory=lambda: collections.defaultdict(float)) + + +def _apply_regret_matching_plus_reset(info_state_nodes): + """Resets negative cumulative regrets to 0. + + Regret Matching+ corresponds to the following cumulative regrets update: + cumulative_regrets = max(cumulative_regrets + regrets, 0) + + This must be done at the level of the information set, and thus cannot be + done during the tree traversal (which is done on histories). It is thus + performed as an additional step. + + This function is a module level function to be reused by both CFRSolver and + CFRBRSolver. + + Args: + info_state_nodes: A dictionary {`info_state_str` -> `_InfoStateNode`}. + """ + for info_state_node in info_state_nodes.values(): + action_to_cum_regret = info_state_node.cumulative_regret + for action, cumulative_regret in action_to_cum_regret.items(): + if cumulative_regret < 0: + action_to_cum_regret[action] = 0 + + +def _update_current_policy(current_policy, info_state_nodes): + """Updates in place `current_policy` from the cumulative regrets. + + This function is a module level function to be reused by both CFRSolver and + CFRBRSolver. + + Args: + current_policy: A `policy.TabularPolicy` to be updated in-place. + info_state_nodes: A dictionary {`info_state_str` -> `_InfoStateNode`}. + """ + for info_state, info_state_node in info_state_nodes.items(): + state_policy = current_policy.policy_for_key(info_state) + + for action, value in _regret_matching( + info_state_node.cumulative_regret, + info_state_node.legal_actions).items(): + state_policy[action] = value + + +def _update_average_policy(average_policy, info_state_nodes): + """Updates in place `average_policy` to the average of all policies iterated. + + This function is a module level function to be reused by both CFRSolver and + CFRBRSolver. + + Args: + average_policy: A `policy.TabularPolicy` to be updated in-place. + info_state_nodes: A dictionary {`info_state_str` -> `_InfoStateNode`}. + """ + for info_state, info_state_node in info_state_nodes.items(): + info_state_policies_sum = info_state_node.cumulative_policy + state_policy = average_policy.policy_for_key(info_state) + probabilities_sum = sum(info_state_policies_sum.values()) + if probabilities_sum == 0: + num_actions = len(info_state_node.legal_actions) + for action in info_state_node.legal_actions: + state_policy[action] = 1 / num_actions + else: + for action, action_prob_sum in info_state_policies_sum.items(): + state_policy[action] = action_prob_sum / probabilities_sum + + +class _CFRSolverBase(object): + r"""A base class for both CFR and CFR-BR. + + The main iteration loop is implemented in `evaluate_and_update_policy`: + + ```python + game = pyspiel.load_game("game_name") + initial_state = game.new_initial_state() + + solver = Solver(game) + + for i in range(num_iterations): + solver.evaluate_and_update_policy() + solver.current_policy() # Access the current policy + solver.average_policy() # Access the average policy + ``` + """ + + def __init__(self, game, alternating_updates, linear_averaging, + regret_matching_plus): + # pyformat: disable + """Initializer. + + Args: + game: The `pyspiel.Game` to run on. + alternating_updates: If `True`, alternating updates are performed: for + each player, we compute and update the cumulative regrets and policies. + In that case, and when the policy is frozen during tree traversal, the + cache is reset after each update for one player. + Otherwise, the update is simultaneous. + linear_averaging: Whether to use linear averaging, i.e. + cumulative_policy[info_state][action] += ( + iteration_number * reach_prob * action_prob) + + or not: + + cumulative_policy[info_state][action] += reach_prob * action_prob + regret_matching_plus: Whether to use Regret Matching+: + cumulative_regrets = max(cumulative_regrets + regrets, 0) + or simply regret matching: + cumulative_regrets = cumulative_regrets + regrets + """ + # pyformat: enable + assert game.get_type().dynamics == pyspiel.GameType.Dynamics.SEQUENTIAL, ( + "CFR requires sequential games. If you're trying to run it " + + "on a simultaneous (or normal-form) game, please first transform it " + + "using turn_based_simultaneous_game.") + + self._game = game + self._num_players = game.num_players() + self._root_node = self._game.new_initial_state() + + # This is for returning the current policy and average policy to a caller + self._current_policy = policy.TabularPolicy(game) + self._average_policy = self._current_policy.__copy__() + + self._info_state_nodes = {} + self._initialize_info_state_nodes(self._root_node) + + self._iteration = 0 # For possible linear-averaging. + self._linear_averaging = linear_averaging + self._alternating_updates = alternating_updates + self._regret_matching_plus = regret_matching_plus + + def _initialize_info_state_nodes(self, state): + """Initializes info_state_nodes. + + Create one _InfoStateNode per infoset. We could also initialize the node + when we try to access it and it does not exist. + + Args: + state: The current state in the tree walk. This should be the root node + when we call this function from a CFR solver. + """ + if state.is_terminal(): + return + + if state.is_chance_node(): + for action, unused_action_prob in state.chance_outcomes(): + self._initialize_info_state_nodes(state.child(action)) + return + + current_player = state.current_player() + info_state = state.information_state_string(current_player) + + info_state_node = self._info_state_nodes.get(info_state) + if info_state_node is None: + legal_actions = state.legal_actions(current_player) + info_state_node = _InfoStateNode( + legal_actions=legal_actions, + index_in_tabular_policy=self._current_policy.state_lookup[info_state]) + self._info_state_nodes[info_state] = info_state_node + + for action in info_state_node.legal_actions: + self._initialize_info_state_nodes(state.child(action)) + + def current_policy(self): + """Returns the current policy as a TabularPolicy. + + WARNING: The same object, updated in-place will be returned! You can copy + it (or its `action_probability_array` field). + + For CFR/CFR+, this policy does not necessarily have to converge. It + converges with high probability for CFR-BR. + """ + return self._current_policy + + def average_policy(self): + """Returns the average of all policies iterated. + + WARNING: The same object, updated in-place will be returned! You can copy + it (or its `action_probability_array` field). + + This average policy converges to a Nash policy as the number of iterations + increases. + + The policy is computed using the accumulated policy probabilities computed + using `evaluate_and_update_policy`. + + Returns: + A `policy.TabularPolicy` object (shared between calls) giving the (linear) + time averaged policy (weighted by player reach probabilities) for both + players. + """ + _update_average_policy(self._average_policy, self._info_state_nodes) + return self._average_policy + + def _compute_counterfactual_regret_for_player(self, state, policies, + reach_probabilities, player): + """Increments the cumulative regrets and policy for `player`. + + Args: + state: The initial game state to analyze from. + policies: A list of `num_players` callables taking as input an + `info_state_node` and returning a {action: prob} dictionary. For CFR, + this is simply returning the current policy, but this can be used in + the CFR-BR solver, to prevent code duplication. If None, + `_get_infostate_policy` is used. + reach_probabilities: The probability for each player of reaching `state` + as a numpy array [prob for player 0, for player 1,..., for chance]. + `player_reach_probabilities[player]` will work in all cases. + player: The 0-indexed player to update the values for. If `None`, the + update for all players will be performed. + + Returns: + The utility of `state` for all players, assuming all players follow the + current policy defined by `self.Policy`. + """ + if state.is_terminal(): + return np.asarray(state.returns()) + + if state.is_chance_node(): + state_value = 0.0 + for action, action_prob in state.chance_outcomes(): + assert action_prob > 0 + new_state = state.child(action) + new_reach_probabilities = reach_probabilities.copy() + new_reach_probabilities[-1] *= action_prob + state_value += (action_prob * + self._compute_counterfactual_regret_for_player( + new_state, policies, new_reach_probabilities, + player)) + return state_value + + current_player = state.current_player() + info_state = state.information_state_string(current_player) + + # No need to continue on this history branch as no update will be performed + # for any player. + # The value we return here is not used in practice. If the conditional + # statement is True, then the last taken action has probability 0 of + # occurring, so the returned value is not impacting the parent node value. + if all(reach_probabilities[:-1] == 0): + return np.zeros(self._num_players) + + state_value = np.zeros(self._num_players) + + # The utilities of the children states are computed recursively. As the + # regrets are added to the information state regrets for each state in that + # information state, the recursive call can only be made once per child + # state. Therefore, the utilities are cached. + children_utilities = {} + + info_state_node = self._info_state_nodes[info_state] + if policies is None: + info_state_policy = self._get_infostate_policy(info_state) + else: + info_state_policy = policies[current_player](info_state) + for action in state.legal_actions(): + action_prob = info_state_policy.get(action, 0.) + new_state = state.child(action) + new_reach_probabilities = reach_probabilities.copy() + new_reach_probabilities[current_player] *= action_prob + child_utility = self._compute_counterfactual_regret_for_player( + new_state, + policies=policies, + reach_probabilities=new_reach_probabilities, + player=player) + + state_value += action_prob * child_utility + children_utilities[action] = child_utility + + # If we are performing alternating updates, and the current player is not + # the current_player, we skip the cumulative values update. + # If we are performing simultaneous updates, we do update the cumulative + # values. + simulatenous_updates = player is None + if not simulatenous_updates and current_player != player: + return state_value + + reach_prob = reach_probabilities[current_player] + counterfactual_reach_prob = ( + np.prod(reach_probabilities[:current_player]) * + np.prod(reach_probabilities[current_player + 1:])) + state_value_for_player = state_value[current_player] + + for action, action_prob in info_state_policy.items(): + cfr_regret = counterfactual_reach_prob * ( + children_utilities[action][current_player] - state_value_for_player) + + info_state_node.cumulative_regret[action] += cfr_regret + if self._linear_averaging: + info_state_node.cumulative_policy[ + action] += self._iteration * reach_prob * action_prob + else: + info_state_node.cumulative_policy[action] += reach_prob * action_prob + + return state_value + + def _get_infostate_policy(self, info_state_str): + """Returns an {action: prob} dictionary for the policy on `info_state`.""" + info_state_node = self._info_state_nodes[info_state_str] + prob_vec = self._current_policy.action_probability_array[ + info_state_node.index_in_tabular_policy] + return { + action: prob_vec[action] for action in info_state_node.legal_actions + } + + +def _regret_matching(cumulative_regrets, legal_actions): + """Returns an info state policy by applying regret-matching. + + Args: + cumulative_regrets: A {action: cumulative_regret} dictionary. + legal_actions: the list of legal actions at this state. + + Returns: + A dict of action -> prob for all legal actions. + """ + regrets = cumulative_regrets.values() + sum_positive_regrets = sum((regret for regret in regrets if regret > 0)) + + info_state_policy = {} + if sum_positive_regrets > 0: + for action in legal_actions: + positive_action_regret = max(0.0, cumulative_regrets[action]) + info_state_policy[action] = ( + positive_action_regret / sum_positive_regrets) + else: + for action in legal_actions: + info_state_policy[action] = 1.0 / len(legal_actions) + return info_state_policy + + +class _CFRSolver(_CFRSolverBase): + r"""Implements the Counterfactual Regret Minimization (CFR) algorithm. + + The algorithm computes an approximate Nash policy for 2 player zero-sum games. + + CFR can be view as a policy iteration algorithm. Importantly, the policies + themselves do not converge to a Nash policy, but their average does. + + The main iteration loop is implemented in `evaluate_and_update_policy`: + + ```python + game = pyspiel.load_game("game_name") + initial_state = game.new_initial_state() + + cfr_solver = CFRSolver(game) + + for i in range(num_iterations): + cfr.evaluate_and_update_policy() + ``` + + Once the policy has converged, the average policy (which converges to the Nash + policy) can be computed: + ```python + average_policy = cfr_solver.average_policy() + ``` + + # Policy and average policy + + policy(0) and average_policy(0) are not technically defined, but these + methods will return arbitrarily the uniform_policy. + + Then, we are expected to have: + + ``` + for t in range(1, N): + cfr_solver.evaluate_and_update_policy() + policy(t) = RM or RM+ of cumulative regrets + avg_policy(t)(s, a) ~ \sum_{k=1}^t player_reach_prob(t)(s) * policy(k)(s, a) + + With Linear Averaging, the avg_policy is proportional to: + \sum_{k=1}^t k * player_reach_prob(t)(s) * policy(k)(s, a) + ``` + """ + + def evaluate_and_update_policy(self): + """Performs a single step of policy evaluation and policy improvement.""" + self._iteration += 1 + if self._alternating_updates: + for player in range(self._game.num_players()): + self._compute_counterfactual_regret_for_player( + self._root_node, + policies=None, + reach_probabilities=np.ones(self._game.num_players() + 1), + player=player) + if self._regret_matching_plus: + _apply_regret_matching_plus_reset(self._info_state_nodes) + _update_current_policy(self._current_policy, self._info_state_nodes) + else: + self._compute_counterfactual_regret_for_player( + self._root_node, + policies=None, + reach_probabilities=np.ones(self._game.num_players() + 1), + player=None) + if self._regret_matching_plus: + _apply_regret_matching_plus_reset(self._info_state_nodes) + _update_current_policy(self._current_policy, self._info_state_nodes) + + +class CFRPlusSolver(_CFRSolver): + """CFR+ implementation. + + The algorithm computes an approximate Nash policy for 2 player zero-sum games. + More generally, it should approach a no-regret set, which corresponds to the + set of coarse-correlated equilibria. See https://arxiv.org/abs/1305.0034 + + CFR can be view as a policy iteration algorithm. Importantly, the policies + themselves do not converge to a Nash policy, but their average does. + + See https://poker.cs.ualberta.ca/publications/2015-ijcai-cfrplus.pdf + + CFR+ is CFR with the following modifications: + - use Regret Matching+ instead of Regret Matching. + - use alternating updates instead of simultaneous updates. + - use linear averaging. + + Usage: + + ```python + game = pyspiel.load_game("game_name") + initial_state = game.new_initial_state() + + cfr_solver = CFRSolver(game) + + for i in range(num_iterations): + cfr.evaluate_and_update_policy() + ``` + + Once the policy has converged, the average policy (which converges to the Nash + policy) can be computed: + ```python + average_policy = cfr_solver.average_policy() + ``` + """ + + def __init__(self, game): + super(CFRPlusSolver, self).__init__( + game, + regret_matching_plus=True, + alternating_updates=True, + linear_averaging=True) + + +class CFRSolver(_CFRSolver): + """Implements the Counterfactual Regret Minimization (CFR) algorithm. + + See https://poker.cs.ualberta.ca/publications/NIPS07-cfr.pdf + + NOTE: We use alternating updates (which was not the case in the original + paper) because it has been proved to be far more efficient. + """ + + def __init__(self, game): + super(CFRSolver, self).__init__( + game, + regret_matching_plus=False, + alternating_updates=True, + linear_averaging=False) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/cfr_br.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/cfr_br.py new file mode 100644 index 0000000..0fd8433 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/cfr_br.py @@ -0,0 +1,134 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python implementation of the CFR-BR algorithm.""" + +import numpy as np + +from open_spiel.python import policy +from open_spiel.python.algorithms import cfr +from open_spiel.python.algorithms import exploitability +import pyspiel + +# pylint: disable=protected-access +_CFRSolverBase = cfr._CFRSolverBase +_update_current_policy = cfr._update_current_policy +_apply_regret_matching_plus_reset = cfr._apply_regret_matching_plus_reset +# pylint: enable=protected-access + + +class CFRBRSolver(_CFRSolverBase): + """Implements the Counterfactual Regret Minimization (CFR-BR) algorithm. + + This is Counterfactual Regret Minimization against Best Response, from + Michael Johanson and al., 2012, Finding Optimal Abstract Strategies in + Extensive-Form Games, + https://poker.cs.ualberta.ca/publications/AAAI12-cfrbr.pdf). + + The algorithm + computes an approximate Nash policy for n-player zero-sum games, but the + implementation is currently restricted to 2-player. + + It uses an exact Best Response and full tree traversal. + + One iteration for a n-player game consists of the following: + + - Compute the BR of each player against the rest of the players. + - Then, for each player p sequentially (from player 0 to N-1): + - Compute the conterfactual reach probabilities and action values for player + p, playing against the set of the BR for all other players. + - Update the player `p` policy using these values. + + CFR-BR should converge with high probability (see the paper), but we can also + compute the time-averaged strategy. + + The implementation reuses the `action_values_vs_best_response` module and + thus uses TabularPolicies. This will run only for smallish games. + """ + + def __init__(self, game, linear_averaging=False, regret_matching_plus=False): + # pyformat: disable + """Initializer. + + Args: + game: The `pyspiel.Game` to run on. + linear_averaging: Whether to use linear averaging, i.e. + cumulative_policy[info_state][action] += ( + iteration_number * reach_prob * action_prob) + + or not: + + cumulative_policy[info_state][action] += reach_prob * action_prob + regret_matching_plus: Whether to use Regret Matching+: + cumulative_regrets = max(cumulative_regrets + regrets, 0) + or simply regret matching: + cumulative_regrets = cumulative_regrets + regrets + """ + # pyformat: enable + if game.num_players() != 2: + raise ValueError("Game {} does not have {} players.".format(game, 2)) + + assert game.get_type().dynamics == pyspiel.GameType.Dynamics.SEQUENTIAL, ( + "CFR requires sequential games. If you're trying to run it " + + "on a simultaneous (or normal-form) game, please first transform it " + + "using turn_based_simultaneous_game.") + + super(CFRBRSolver, self).__init__( + game, + alternating_updates=True, + linear_averaging=linear_averaging, + regret_matching_plus=regret_matching_plus) + + self._best_responses = {i: None for i in range(game.num_players())} + + def _compute_best_responses(self): + """Computes each player best-response against the pool of other players.""" + + def policy_fn(state): + key = state.information_state_string() + return self._get_infostate_policy(key) + + current_policy = policy.tabular_policy_from_callable(self._game, policy_fn) + + for player_id in range(self._game.num_players()): + self._best_responses[player_id] = exploitability.best_response( + self._game, current_policy, player_id) + + def evaluate_and_update_policy(self): + """Performs a single step of policy evaluation and policy improvement.""" + self._iteration += 1 + + self._compute_best_responses() + + for player in range(self._num_players): + # We do not use policies, to not have to call `state.information_state` + # several times (in here and within policy). + policies = [] + for p in range(self._num_players): + # pylint: disable=g-long-lambda + policies.append( + lambda infostate_str, p=p: + {self._best_responses[p]["best_response_action"][infostate_str]: 1}) + # pylint: enable=g-long-lambda + policies[player] = self._get_infostate_policy + + self._compute_counterfactual_regret_for_player( + state=self._root_node, + policies=policies, + reach_probabilities=np.ones(self._num_players + 1), + player=player) + + if self._regret_matching_plus: + _apply_regret_matching_plus_reset(self._info_state_nodes) + _update_current_policy(self._current_policy, self._info_state_nodes) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/cfr_br_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/cfr_br_test.py new file mode 100644 index 0000000..ae15aa7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/cfr_br_test.py @@ -0,0 +1,103 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.cfr.""" + +import itertools + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np + +from open_spiel.python import policy +from open_spiel.python.algorithms import cfr_br +from open_spiel.python.algorithms import expected_game_score +from open_spiel.python.algorithms import exploitability +import pyspiel + +_KUHN_GAME = pyspiel.load_game("kuhn_poker") +_LEDUC_GAME = pyspiel.load_game("leduc_poker") + +_KUHN_UNIFORM_POLICY = policy.TabularPolicy(_KUHN_GAME) +_LEDUC_UNIFORM_POLICY = policy.TabularPolicy(_LEDUC_GAME) +_EXPECTED_EXPLOITABILITIES_CFRBR_KUHN = [ + 0.9166666666666666, 0.33333333333333337, 0.3194444444444445, + 0.2604166666666667, 0.22666666666666674 +] +_EXPECTED_EXPLOITABILITIES_CFRBR_LEDUC = [ + 4.747222222222222, 4.006867283950617, 3.4090489231017034, + 2.8982539553095172, 2.5367193593344504 +] + + +class CFRBRTest(parameterized.TestCase, absltest.TestCase): + + @parameterized.parameters( + list(itertools.product([True, False], [True, False]))) + def test_policy_zero_is_uniform(self, linear_averaging, regret_matching_plus): + game = pyspiel.load_game("leduc_poker") + cfr_solver = cfr_br.CFRBRSolver( + game, + regret_matching_plus=regret_matching_plus, + linear_averaging=linear_averaging) + + np.testing.assert_array_equal( + _LEDUC_UNIFORM_POLICY.action_probability_array, + cfr_solver.current_policy().action_probability_array) + np.testing.assert_array_equal( + _LEDUC_UNIFORM_POLICY.action_probability_array, + cfr_solver.average_policy().action_probability_array) + + def test_policy_and_average_policy(self): + game = pyspiel.load_game("kuhn_poker") + cfrbr_solver = cfr_br.CFRBRSolver(game) + for _ in range(300): + cfrbr_solver.evaluate_and_update_policy() + average_policy = cfrbr_solver.average_policy() + average_policy_values = expected_game_score.policy_value( + game.new_initial_state(), [average_policy] * 2) + # 1/18 is the Nash value. See https://en.wikipedia.org/wiki/Kuhn_poker + np.testing.assert_allclose( + average_policy_values, [-1 / 18, 1 / 18], atol=1e-3) + + cfrbr_solver.current_policy() + + @parameterized.parameters([ + (_KUHN_GAME, pyspiel.CFRBRSolver, _EXPECTED_EXPLOITABILITIES_CFRBR_KUHN), + (_KUHN_GAME, cfr_br.CFRBRSolver, _EXPECTED_EXPLOITABILITIES_CFRBR_KUHN), + (_LEDUC_GAME, pyspiel.CFRBRSolver, + _EXPECTED_EXPLOITABILITIES_CFRBR_LEDUC), + (_LEDUC_GAME, cfr_br.CFRBRSolver, _EXPECTED_EXPLOITABILITIES_CFRBR_LEDUC), + ]) + def test_cpp_and_python_cfr_br(self, game, solver_cls, + expected_exploitability): + solver = solver_cls(game) + for step in range(5): + solver.evaluate_and_update_policy() + + # We do not compare the policy directly as we do not have an easy way to + # convert one to the other, so we use the exploitability as a proxy. + avg_policy = solver.average_policy() + if solver_cls == pyspiel.CFRBRSolver: + exploitability_ = pyspiel.nash_conv(game, avg_policy) + else: + exploitability_ = exploitability.nash_conv(game, avg_policy) + + self.assertAlmostEqual( + expected_exploitability[step], exploitability_, places=10 + ) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/cfr_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/cfr_test.py new file mode 100644 index 0000000..5d881f1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/cfr_test.py @@ -0,0 +1,329 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.cfr.""" + +import itertools + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np + +from open_spiel.python import policy +from open_spiel.python.algorithms import cfr +from open_spiel.python.algorithms import expected_game_score +from open_spiel.python.algorithms import exploitability +import pyspiel + +_KUHN_GAME = pyspiel.load_game("kuhn_poker") +_LEDUC_GAME = pyspiel.load_game("leduc_poker") + +_KUHN_UNIFORM_POLICY = policy.TabularPolicy(_KUHN_GAME) +_LEDUC_UNIFORM_POLICY = policy.TabularPolicy(_LEDUC_GAME) + + +class ModuleLevelFunctionTest(absltest.TestCase): + + def test__update_current_policy(self): + game = pyspiel.load_game("kuhn_poker") + tabular_policy = policy.TabularPolicy(game) + + cumulative_regrets = np.arange(0, 12 * 2).reshape((12, 2)) + expected_policy = cumulative_regrets / np.sum( + cumulative_regrets, axis=-1, keepdims=True) + nodes_indices = { + u"0": 0, + u"0pb": 1, + u"1": 2, + u"1pb": 3, + u"2": 4, + u"2pb": 5, + u"1p": 6, + u"1b": 7, + u"2p": 8, + u"2b": 9, + u"0p": 10, + u"0b": 11, + } + # pylint: disable=g-complex-comprehension + info_state_nodes = { + key: cfr._InfoStateNode( + legal_actions=[0, 1], + index_in_tabular_policy=None, + cumulative_regret=dict(enumerate(cumulative_regrets[index])), + cumulative_policy=None) for key, index in nodes_indices.items() + } + # pylint: enable=g-complex-comprehension + + cfr._update_current_policy(tabular_policy, info_state_nodes) + + np.testing.assert_array_equal(expected_policy, + tabular_policy.action_probability_array) + + +class CFRTest(parameterized.TestCase, absltest.TestCase): + + @parameterized.parameters( + list(itertools.product([True, False], [True, False], [True, False]))) + def test_policy_zero_is_uniform(self, linear_averaging, regret_matching_plus, + alternating_updates): + # We use Leduc and not Kuhn, because Leduc has illegal actions and Kuhn does + # not. + game = pyspiel.load_game("leduc_poker") + cfr_solver = cfr._CFRSolver( + game, + regret_matching_plus=regret_matching_plus, + linear_averaging=linear_averaging, + alternating_updates=alternating_updates) + + np.testing.assert_array_equal( + _LEDUC_UNIFORM_POLICY.action_probability_array, + cfr_solver.current_policy().action_probability_array) + np.testing.assert_array_equal( + _LEDUC_UNIFORM_POLICY.action_probability_array, + cfr_solver.average_policy().action_probability_array) + + def test_cfr_kuhn_poker(self): + game = pyspiel.load_game("kuhn_poker") + cfr_solver = cfr.CFRSolver(game) + for _ in range(300): + cfr_solver.evaluate_and_update_policy() + average_policy = cfr_solver.average_policy() + average_policy_values = expected_game_score.policy_value( + game.new_initial_state(), [average_policy] * 2) + # 1/18 is the Nash value. See https://en.wikipedia.org/wiki/Kuhn_poker + np.testing.assert_allclose( + average_policy_values, [-1 / 18, 1 / 18], atol=1e-3) + + def test_cfr_plus_kuhn_poker(self): + game = pyspiel.load_game("kuhn_poker") + cfr_solver = cfr.CFRPlusSolver(game) + for _ in range(200): + cfr_solver.evaluate_and_update_policy() + average_policy = cfr_solver.average_policy() + average_policy_values = expected_game_score.policy_value( + game.new_initial_state(), [average_policy] * 2) + # 1/18 is the Nash value. See https://en.wikipedia.org/wiki/Kuhn_poker + np.testing.assert_allclose( + average_policy_values, [-1 / 18, 1 / 18], atol=1e-3) + + def test_cfr_plus_solver_best_response_mdp(self): + game = pyspiel.load_game("kuhn_poker") + cfr_solver = cfr.CFRPlusSolver(game) + for _ in range(200): + cfr_solver.evaluate_and_update_policy() + average_policy = cfr_solver.average_policy() + pyspiel_avg_policy = policy.python_policy_to_pyspiel_policy(average_policy) + br_computer = pyspiel.TabularBestResponseMDP(game, pyspiel_avg_policy) + br_info = br_computer.exploitability() + self.assertLessEqual(br_info.exploitability, 0.001) + + def test_cfr_cce_ce_dist_goofspiel(self): + """Copy of the TestCCEDistCFRGoofSpiel in corr_dist_test.cc.""" + game = pyspiel.load_game( + "turn_based_simultaneous_game(game=goofspiel(num_cards=3,points_order=" + "descending,returns_type=total_points))") + for num_iterations in [1, 10, 100]: + policies = [] + cfr_solver = cfr.CFRSolver(game) + for _ in range(num_iterations): + cfr_solver.evaluate_and_update_policy() + policies.append( + policy.python_policy_to_pyspiel_policy(cfr_solver.current_policy())) + mu = pyspiel.uniform_correlation_device(policies) + cce_dist_info = pyspiel.cce_dist(game, mu) + print("goofspiel, cce test num_iters: {}, cce_dist: {}, per player: {}" + .format(num_iterations, cce_dist_info.dist_value, + cce_dist_info.deviation_incentives)) + # Try converting one of the BR policies: + _ = policy.pyspiel_policy_to_python_policy( + game, cce_dist_info.best_response_policies[0]) + + # Assemble the same correlation device manually, just as an example for + # how to do non-uniform distributions of them and to test the python + # bindings for lists of tuples works properly + uniform_prob = 1.0 / len(policies) + mu2 = [(uniform_prob, policy) for policy in policies] + cce_dist_info2 = pyspiel.cce_dist(game, mu2) + self.assertAlmostEqual(cce_dist_info2.dist_value, + sum(cce_dist_info.deviation_incentives)) + # Test the CEDist function too, why not. Disable the exact one, as it + # takes too long for a test. + # ce_dist_info = pyspiel.ce_dist(game, pyspiel.determinize_corr_dev(mu)) + ce_dist_info = pyspiel.ce_dist( + game, pyspiel.sampled_determinize_corr_dev(mu, 100)) + print("goofspiel, ce test num_iters: {}, ce_dist: {}, per player: {}" + .format(num_iterations, ce_dist_info.dist_value, + ce_dist_info.deviation_incentives)) + print("number of conditional best responses per player:") + for p in range(game.num_players()): + print(" player {}, num: {}".format( + p, len(ce_dist_info.conditional_best_response_policies[p]))) + + @parameterized.parameters( + list(itertools.product([True, False], [True, False], [True, False]))) + def test_cfr_kuhn_poker_runs_with_multiple_players(self, linear_averaging, + regret_matching_plus, + alternating_updates): + num_players = 3 + + game = pyspiel.load_game("kuhn_poker", {"players": num_players}) + cfr_solver = cfr._CFRSolver( + game, + regret_matching_plus=regret_matching_plus, + linear_averaging=linear_averaging, + alternating_updates=alternating_updates) + for _ in range(10): + cfr_solver.evaluate_and_update_policy() + average_policy = cfr_solver.average_policy() + average_policy_values = expected_game_score.policy_value( + game.new_initial_state(), [average_policy] * num_players) + del average_policy_values + + @parameterized.parameters(list(itertools.product([False, True]))) + def test_simultaneous_two_step_avg_1b_seq_in_kuhn_poker( + self, regret_matching_plus): + num_players = 2 + game = pyspiel.load_game("kuhn_poker", {"players": num_players}) + cfr_solver = cfr._CFRSolver( + game, + regret_matching_plus=regret_matching_plus, + linear_averaging=False, + alternating_updates=False) + + def check_avg_policy_is_uniform_random(): + avg_policy = cfr_solver.average_policy() + for player_info_states in avg_policy.states_per_player: + for info_state in player_info_states: + state_policy = avg_policy.policy_for_key(info_state) + np.testing.assert_allclose(state_policy, [1.0 / len(state_policy)] * + len(state_policy)) + + check_avg_policy_is_uniform_random() + + cfr_solver.evaluate_and_update_policy() + check_avg_policy_is_uniform_random() + + cfr_solver.evaluate_and_update_policy() + + # The acting player in 1b is player 1 and they have not acted before, so + # the probability this player plays to this information state is 1, and + # the sequence probability of any action is just the probability of that + # action given the information state. On the first iteration, this + # probability is 0.5 for both actions. On the second iteration, the + # current policy is [0, 1], so the average cumulants should be + # [0.5, 1.5]. Normalizing this gives the average policy. + normalization = 0.5 + 0.5 + 1 + np.testing.assert_allclose(cfr_solver.average_policy().policy_for_key("1b"), + [0.5 / normalization, (0.5 + 1) / normalization]) + + def test_policy(self): + game = pyspiel.load_game("kuhn_poker") + solver = cfr.CFRPlusSolver(game) + + tabular_policy = solver.current_policy() + self.assertLen(tabular_policy.state_lookup, 12) + for info_state_str in tabular_policy.state_lookup.keys(): + np.testing.assert_equal( + np.asarray([0.5, 0.5]), tabular_policy.policy_for_key(info_state_str)) + + @parameterized.parameters([ + (pyspiel.load_game("kuhn_poker"), pyspiel.CFRSolver, cfr.CFRSolver), + (pyspiel.load_game("leduc_poker"), pyspiel.CFRSolver, cfr.CFRSolver), + (pyspiel.load_game("kuhn_poker"), pyspiel.CFRPlusSolver, + cfr.CFRPlusSolver), + (pyspiel.load_game("leduc_poker"), pyspiel.CFRPlusSolver, + cfr.CFRPlusSolver), + ]) + def test_cpp_algorithms_identical_to_python_algorithm(self, game, cpp_class, + python_class): + cpp_solver = cpp_class(game) + python_solver = python_class(game) + + for _ in range(5): + cpp_solver.evaluate_and_update_policy() + python_solver.evaluate_and_update_policy() + + cpp_avg_policy = cpp_solver.average_policy() + python_avg_policy = python_solver.average_policy() + + # We do not compare the policy directly as we do not have an easy way to + # convert one to the other, so we use the exploitability as a proxy. + cpp_expl = pyspiel.nash_conv(game, cpp_avg_policy) + python_expl = exploitability.nash_conv(game, python_avg_policy) + self.assertAlmostEqual(cpp_expl, python_expl, places=10) + # Then we also check the CurrentPolicy, just to check it is giving the same + # results too + cpp_current_policy = cpp_solver.current_policy() + python_current_policy = python_solver.current_policy() + cpp_expl = pyspiel.nash_conv(game, cpp_current_policy) + python_expl = exploitability.nash_conv(game, python_current_policy) + self.assertAlmostEqual(cpp_expl, python_expl, places=10) + + +class CorrDistTest(absltest.TestCase): + """Test some of the correlation device distances functions in C++. + + These functions are analogues to NashConv for various forms of correlated + equilibria. + """ + + def test_cce_dist_kuhn_3p_cpp(self): + game = pyspiel.load_game("kuhn_poker(players=3)") + solver = pyspiel.CFRSolver(game) # C++ solver + strategies = [] + corr_dist_values = [] + for _ in range(10): + solver.evaluate_and_update_policy() + strategies.append(solver.tabular_current_policy()) + corr_dev = pyspiel.uniform_correlation_device(strategies) + cce_dist_info = pyspiel.cce_dist(game, corr_dev) + corr_dist_values.append(cce_dist_info.dist_value) + self.assertLess(corr_dist_values[-1], corr_dist_values[0]) + + def test_cce_dist_kuhn_3p(self): + game = pyspiel.load_game("kuhn_poker(players=3)") + solver = cfr._CFRSolver(game, + regret_matching_plus=False, + linear_averaging=False, + alternating_updates=True) + strategies = [] + corr_dist_values = [] + for _ in range(10): + solver.evaluate_and_update_policy() + # Convert the policy to a pyspiel.TabularPolicy, needed by the CorrDist + # functions on the C++ side. + strategies.append(policy.python_policy_to_pyspiel_policy( + solver.current_policy())) + corr_dev = pyspiel.uniform_correlation_device(strategies) + cce_dist_info = pyspiel.cce_dist(game, corr_dev) + corr_dist_values.append(cce_dist_info.dist_value) + self.assertLess(corr_dist_values[-1], corr_dist_values[0]) + + def test_cce_dist_sheriff_cpp(self): + game = pyspiel.load_game("sheriff") + solver = pyspiel.CFRSolver(game) # C++ solver + strategies = [] + corr_dist_values = [] + for _ in range(3): + solver.evaluate_and_update_policy() + strategies.append(solver.tabular_current_policy()) + corr_dev = pyspiel.uniform_correlation_device(strategies) + cce_dist_info = pyspiel.cce_dist(game, corr_dev) + corr_dist_values.append(cce_dist_info.dist_value) + self.assertLess(corr_dist_values[-1], corr_dist_values[0]) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/discounted_cfr.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/discounted_cfr.py new file mode 100644 index 0000000..29a64a2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/discounted_cfr.py @@ -0,0 +1,235 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Discounted CFR and Linear CFR algorithms. + +This implements Discounted CFR and Linear CFR, from Noam Brown and Tuomas +Sandholm, 2019, "Solving Imperfect-Information Games via Discounted Regret +Minimization". +See https://arxiv.org/abs/1809.04040. + +Linear CFR (LCFR), is identical to CFR, except on iteration `t` the updates to +the regrets and average strategies are given weight `t`. (Equivalently, one +could multiply the accumulated regret by t / (t+1) on each iteration.) + +Discounted CFR(alpha, beta, gamma) is defined by, at iteration `t`: +- multiplying the positive accumulated regrets by (t^alpha / (t^alpha + 1)) +- multiplying the negative accumulated regrets by (t^beta / (t^beta + 1)) +- multiplying the contribution to the average strategy by t^gamma + +WARNING: This was contributed on Github, and the OpenSpiel team is not aware it +has been verified we can reproduce the paper results. +""" + +import numpy as np + +from open_spiel.python.algorithms import cfr + +_InfoStateNode = cfr._InfoStateNode # pylint: disable=protected-access + + +class _DCFRSolver(cfr._CFRSolver): # pylint: disable=protected-access + """Discounted CFR.""" + + def __init__(self, game, alternating_updates, linear_averaging, + regret_matching_plus, alpha, beta, gamma): + super(_DCFRSolver, self).__init__(game, alternating_updates, + linear_averaging, regret_matching_plus) + self.alpha = alpha + self.beta = beta + self.gamma = gamma + + # We build a list of the nodes for all players, which will be updated + # within `evaluate_and_update_policy`. + self._player_nodes = [[] for _ in range(self._num_players)] + for info_state in self._info_state_nodes.values(): + self._player_nodes[info_state.player].append(info_state) + + def _initialize_info_state_nodes(self, state): + """Initializes info_state_nodes. + + We override the parent function, to add the current player information + at the given node. This is used because we want to do updates for all nodes + for a specific player. + + Args: + state: The current state in the tree walk. This should be the root node + when we call this function from a CFR solver. + """ + if state.is_terminal(): + return + + if state.is_chance_node(): + for action, unused_action_prob in state.chance_outcomes(): + self._initialize_info_state_nodes(state.child(action)) + return + + current_player = state.current_player() + info_state = state.information_state_string(current_player) + + info_state_node = self._info_state_nodes.get(info_state) + if info_state_node is None: + legal_actions = state.legal_actions(current_player) + info_state_node = _InfoStateNode( + legal_actions=legal_actions, + index_in_tabular_policy=self._current_policy.state_lookup[info_state]) + info_state_node.player = current_player + self._info_state_nodes[info_state] = info_state_node + + for action in info_state_node.legal_actions: + self._initialize_info_state_nodes(state.child(action)) + + def _compute_counterfactual_regret_for_player(self, state, policies, + reach_probabilities, player): + """Increments the cumulative regrets and policy for `player`. + + Args: + state: The initial game state to analyze from. + policies: Unused. To be compatible with the `_CFRSolver` signature. + reach_probabilities: The probability for each player of reaching `state` + as a numpy array [prob for player 0, for player 1,..., for chance]. + `player_reach_probabilities[player]` will work in all cases. + player: The 0-indexed player to update the values for. If `None`, the + update for all players will be performed. + + Returns: + The utility of `state` for all players, assuming all players follow the + current policy defined by `self.Policy`. + """ + if state.is_terminal(): + return np.asarray(state.returns()) + + if state.is_chance_node(): + state_value = 0.0 + for action, action_prob in state.chance_outcomes(): + assert action_prob > 0 + new_state = state.child(action) + new_reach_probabilities = reach_probabilities.copy() + new_reach_probabilities[-1] *= action_prob + state_value += action_prob * self._compute_counterfactual_regret_for_player( + new_state, policies, new_reach_probabilities, player) + return state_value + + current_player = state.current_player() + info_state = state.information_state_string(current_player) + + # No need to continue on this history branch as no update will be performed + # for any player. + # The value we return here is not used in practice. If the conditional + # statement is True, then the last taken action has probability 0 of + # occurring, so the returned value is not impacting the parent node value. + if all(reach_probabilities[:-1] == 0): + return np.zeros(self._num_players) + + state_value = np.zeros(self._num_players) + + # The utilities of the children states are computed recursively. As the + # regrets are added to the information state regrets for each state in that + # information state, the recursive call can only be made once per child + # state. Therefore, the utilities are cached. + children_utilities = {} + + info_state_node = self._info_state_nodes[info_state] + if policies is None: + info_state_policy = self._get_infostate_policy(info_state) + else: + info_state_policy = policies[current_player](info_state) + for action in state.legal_actions(): + action_prob = info_state_policy.get(action, 0.) + new_state = state.child(action) + new_reach_probabilities = reach_probabilities.copy() + new_reach_probabilities[current_player] *= action_prob + child_utility = self._compute_counterfactual_regret_for_player( + new_state, + policies=policies, + reach_probabilities=new_reach_probabilities, + player=player) + + state_value += action_prob * child_utility + children_utilities[action] = child_utility + + # If we are performing alternating updates, and the current player is not + # the current_player, we skip the cumulative values update. + # If we are performing simultaneous updates, we do update the cumulative + # values. + simulatenous_updates = player is None + if not simulatenous_updates and current_player != player: + return state_value + + reach_prob = reach_probabilities[current_player] + counterfactual_reach_prob = ( + np.prod(reach_probabilities[:current_player]) * + np.prod(reach_probabilities[current_player + 1:])) + state_value_for_player = state_value[current_player] + + for action, action_prob in info_state_policy.items(): + cfr_regret = counterfactual_reach_prob * ( + children_utilities[action][current_player] - state_value_for_player) + + info_state_node = self._info_state_nodes[info_state] + info_state_node.cumulative_regret[action] += cfr_regret + if self._linear_averaging: + info_state_node.cumulative_policy[action] += ( + reach_prob * action_prob * (self._iteration**self.gamma)) + else: + info_state_node.cumulative_policy[action] += reach_prob * action_prob + + return state_value + + def evaluate_and_update_policy(self): + """Performs a single step of policy evaluation and policy improvement.""" + self._iteration += 1 + if self._alternating_updates: + for current_player in range(self._game.num_players()): + self._compute_counterfactual_regret_for_player( + self._root_node, + policies=None, + reach_probabilities=np.ones(self._game.num_players() + 1), + player=current_player) + for info_state in self._player_nodes[current_player]: + for action in info_state.cumulative_regret.keys(): + if info_state.cumulative_regret[action] >= 0: + info_state.cumulative_regret[action] *= ( + self._iteration**self.alpha / + (self._iteration**self.alpha + 1)) + else: + info_state.cumulative_regret[action] *= ( + self._iteration**self.beta / (self._iteration**self.beta + 1)) + cfr._update_current_policy(self._current_policy, self._info_state_nodes) # pylint: disable=protected-access + + +class DCFRSolver(_DCFRSolver): + + def __init__(self, game, alpha=3 / 2, beta=0, gamma=2): + super(DCFRSolver, self).__init__( + game, + regret_matching_plus=False, + alternating_updates=True, + linear_averaging=True, + alpha=alpha, + beta=beta, + gamma=gamma) + + +class LCFRSolver(_DCFRSolver): + + def __init__(self, game): + super(LCFRSolver, self).__init__( + game, + regret_matching_plus=False, + alternating_updates=True, + linear_averaging=True, + alpha=1, + beta=1, + gamma=1) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/discounted_cfr_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/discounted_cfr_test.py new file mode 100644 index 0000000..d52c114 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/discounted_cfr_test.py @@ -0,0 +1,48 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.discounted_cfr.""" + +from absl.testing import absltest +import numpy as np + +from open_spiel.python.algorithms import discounted_cfr +from open_spiel.python.algorithms import expected_game_score +import pyspiel + + +class DiscountedCfrTest(absltest.TestCase): + + def test_discounted_cfr_on_kuhn(self): + game = pyspiel.load_game("kuhn_poker") + solver = discounted_cfr.DCFRSolver(game) + for _ in range(300): + solver.evaluate_and_update_policy() + average_policy = solver.average_policy() + average_policy_values = expected_game_score.policy_value( + game.new_initial_state(), [average_policy] * 2) + # 1/18 is the Nash value. See https://en.wikipedia.org/wiki/Kuhn_poker + np.testing.assert_allclose( + average_policy_values, [-1 / 18, 1 / 18], atol=1e-3) + + def test_discounted_cfr_runs_against_leduc(self): + game = pyspiel.load_game("leduc_poker") + solver = discounted_cfr.DCFRSolver(game) + for _ in range(10): + solver.evaluate_and_update_policy() + solver.average_policy() + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/double_oracle.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/double_oracle.py new file mode 100644 index 0000000..addc117 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/double_oracle.py @@ -0,0 +1,189 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Double Oracle algorithm. + +Solves two-player zero-sum games, for more information see: +McMahan et al. (2003). Planning in the presence of cost functions controlled by + an adversary. In Proceedings of the 20th International Conference on Machine + Learning (ICML-03) (pp. 536-543). +""" + +import numpy as np + +from open_spiel.python.algorithms import lp_solver +from open_spiel.python.egt import utils +import pyspiel + + +def lens(lists): + """Returns the sizes of lists in a list.""" + return list(map(len, lists)) + + +def solve_subgame(subgame_payoffs): + """Solves the subgame using OpenSpiel's LP solver.""" + p0_sol, p1_sol, _, _ = lp_solver.solve_zero_sum_matrix_game( + pyspiel.create_matrix_game(*subgame_payoffs)) + p0_sol, p1_sol = np.asarray(p0_sol), np.asarray(p1_sol) + return [p0_sol / p0_sol.sum(), p1_sol / p1_sol.sum()] + + +class DoubleOracleSolver(object): + """Double Oracle solver.""" + + def __init__(self, game, enforce_symmetry=False): + """Initializes the Double Oracle solver. + + Args: + game: pyspiel.MatrixGame (zero-sum). + enforce_symmetry: If True, enforces symmetry in the strategies appended by + each player, by using the first player's best response for the second + player as well; also asserts the game is symmetric and that players are + seeded with identical initial_strategies, default: False. + """ + assert isinstance(game, pyspiel.MatrixGame) + assert game.get_type().utility == pyspiel.GameType.Utility.ZERO_SUM + # convert matrix game to numpy.ndarray of shape [2,rows,columns] + self.payoffs = utils.game_payoffs_array(game) + self.subgame_strategies = [[], []] + self.enforce_symmetry = enforce_symmetry + if self.enforce_symmetry: + assert utils.is_symmetric_matrix_game(self.payoffs), ( + "enforce_symmetry is True, but payoffs are asymmetric!") + + def subgame_payoffs(self): + # Select payoffs from the full game according to the subgame strategies. + assert all(lens(self.subgame_strategies)), "Need > 0 strategies per player." + subgame_payoffs = np.copy(self.payoffs) + for player, indices in enumerate(self.subgame_strategies): + subgame_payoffs = np.take(subgame_payoffs, indices, axis=player + 1) + return subgame_payoffs + + def oracle(self, subgame_solution): + """Computes the best responses. + + Args: + subgame_solution: List of subgame solution policies. + + Returns: + best_response: For both players from the original set of pure strategies. + best_response_utility: Corresponding utility for both players. + """ + assert lens(subgame_solution) == lens(self.subgame_strategies), ( + f"{lens(subgame_solution)} != {lens(self.subgame_strategies)}") + best_response = [None, None] + best_response_utility = [None, None] + n_best_responders = 1 if self.enforce_symmetry else 2 + for player in range(n_best_responders): + opponent = 1 - player + # collect relevant payoff entries + payoffs = np.take( + self.payoffs[player], + self.subgame_strategies[opponent], + axis=opponent) + # transpose to move player to leading dimension + payoffs = np.transpose(payoffs, [player, opponent]) + avg_payoffs = (payoffs @ subgame_solution[opponent]).squeeze() + best_response[player] = np.argmax(avg_payoffs) + best_response_utility[player] = avg_payoffs[best_response[player]] + + if self.enforce_symmetry: + best_response[1] = best_response[0] + best_response_utility[1] = best_response_utility[0] + + return best_response, best_response_utility + + def step(self): + """Performs one iteration.""" + subgame_payoffs = self.subgame_payoffs() + subgame_solution = solve_subgame(subgame_payoffs) + best_response, best_response_utility = self.oracle(subgame_solution) + + # Add best responses to the subgame strategies (if not included yet). + self.subgame_strategies = [ + sorted(set(strategies + [br])) + for strategies, br in zip(self.subgame_strategies, best_response) + ] + return best_response, best_response_utility + + def solve_yield(self, + initial_strategies, + max_steps, + tolerance, + verbose, + yield_subgame=False): + """Solves game using Double Oracle, yielding intermediate results. + + Args: + initial_strategies: List of pure strategies for both players, optional. + max_steps: Maximum number of iterations, default: 20. + tolerance: Stop if the estimated value of the game is below the tolerance. + verbose: If False, no warning is shown, default: True. + yield_subgame: If True, yields the subgame on each iteration. Otherwise, + yields the final results only, default: False. + + Yields: + solution: Policies for both players. + iteration: The number of iterations performed. + value: Estimated value of the game. + """ + if self.enforce_symmetry and initial_strategies: + assert np.array_equal(initial_strategies[0], initial_strategies[1]), ( + f"Players must use same initial_strategies as symmetry is enforced." + f"\ninitial_strategies[0]: {initial_strategies[0]}, " + f"\ninitial_strategies[1]: {initial_strategies[1]}") + + self.subgame_strategies = (initial_strategies if initial_strategies + else [[0], [0]]) + iteration = 0 + while iteration < max_steps: + if yield_subgame: + yield None, iteration, None, self.subgame_payoffs() + iteration += 1 + last_subgame_size = lens(self.subgame_strategies) + _, best_response_utility = self.step() + value = sum(best_response_utility) + if abs(value) < tolerance: + if verbose: + print("Last iteration={}; value below tolerance {} < {}." + .format(iteration, value, tolerance)) + break + if lens(self.subgame_strategies) == last_subgame_size: + if verbose: + print( + "Last iteration={}; no strategies added, increase tolerance={} or check subgame solver." + .format(iteration, tolerance)) + break + + # Compute subgame solution and return solution in original strategy space. + subgame_solution = solve_subgame(self.subgame_payoffs()) + solution = [np.zeros(k) for k in self.payoffs.shape[1:]] + for p in range(2): + solution[p][self.subgame_strategies[p]] = subgame_solution[p].squeeze() + + yield solution, iteration, value, self.subgame_payoffs() + + def solve(self, + initial_strategies=None, + max_steps=20, + tolerance=5e-5, + verbose=True): + """Solves the game using Double Oracle, returning the final solution.""" + solution, iteration, value = None, None, None + generator = self.solve_yield(initial_strategies, max_steps, tolerance, + verbose, yield_subgame=False) + for solution, iteration, value, _ in generator: + pass + return solution, iteration, value diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/double_oracle_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/double_oracle_test.py new file mode 100644 index 0000000..f743d28 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/double_oracle_test.py @@ -0,0 +1,58 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.double_oracle.""" + +from absl.testing import absltest +import numpy as np + +from open_spiel.python.algorithms import double_oracle +import pyspiel + + +class DoubleOracleTest(absltest.TestCase): + + def test_rock_paper_scissors(self): + game = pyspiel.load_matrix_game("matrix_rps") + solver = double_oracle.DoubleOracleSolver(game) + solution, iteration, value = solver.solve(initial_strategies=[[0], [0]]) + np.testing.assert_allclose(solution[0], np.ones(3)/3.) + np.testing.assert_allclose(solution[1], np.ones(3)/3.) + self.assertEqual(iteration, 3) + self.assertAlmostEqual(value, 0.0) + + def test_single_step(self): + game = pyspiel.load_matrix_game("matrix_rps") + solver = double_oracle.DoubleOracleSolver(game) + solver.subgame_strategies = [[0], [0]] + best_response, best_response_utility = solver.step() + self.assertListEqual(best_response, [1, 1]) + self.assertListEqual(best_response_utility, [1.0, 1.0]) + + def test_kuhn_poker(self): + game = pyspiel.extensive_to_matrix_game(pyspiel.load_game("kuhn_poker")) + solver = double_oracle.DoubleOracleSolver(game) + solution, iteration, value = solver.solve(initial_strategies=[[0], [0]]) + + # check if solution is Nash + exp_utilty = solution[0] @ solver.payoffs @ solution[1] + self.assertAlmostEqual(max(solver.payoffs[0] @ solution[1]), exp_utilty[0]) + self.assertAlmostEqual(max(solution[0] @ solver.payoffs[1]), exp_utilty[1]) + + self.assertEqual(iteration, 8) + self.assertAlmostEqual(value, 0.0) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/efr.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/efr.py new file mode 100644 index 0000000..91eed73 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/efr.py @@ -0,0 +1,1352 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Modified: 2023 James Flynn +# Original: +# https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/algorithms/cfr.py + +"""Python implementation of the extensive-form regret minimization algorithm. + +See: "Efficient Deviation Types and Learning + for Hindsight Rationality in Extensive-Form Games", + Morrill et al. 2021b, + https://arxiv.org/abs/2102.06973 + +One iteration of EFR consists of: +1) Compute current strategy from regrets (e.g. using Regret Matching). +2) Compute values using the current strategy +3) Compute regrets from these values + +The average policy converges to a Nash Equilibrium +rather than the current policy. +""" + +import collections +import copy + +import attr +import numpy as np +from scipy import linalg + +from open_spiel.python import policy +import pyspiel + + +@attr.s +class _InfoStateNode(object): + """An object wrapping values associated to an information state.""" + + # The list of the legal actions. + legal_actions = attr.ib() + index_in_tabular_policy = attr.ib() + # The newly availible deviations + the old ones + relizable_deviations = attr.ib() + # Player -> state -> action -> prob + current_history_probs = attr.ib() + + # An array representing the preceeding actions played + # upto this information state. + history = attr.ib() + + cumulative_regret = attr.ib(factory=lambda: collections.defaultdict(float)) + # The sum of all prior iteration's policies + cumulative_policy = attr.ib(factory=lambda: collections.defaultdict(float)) + + # A dictionary mapping each deviation to their "y values" + # for the current iteration. + y_values = attr.ib(factory=lambda: collections.defaultdict(float)) + + +class _EFRSolverBase(object): + """The base EFR solver class. + + The main iteration loop is implemented in `evaluate_and_update_policy`: + ```python + game = pyspiel.load_game("game_name") + initial_state = game.new_initial_state() + solver = Solver(game) + for i in range(num_iterations): + solver.evaluate_and_update_policy() + solver.current_policy() # Access the current policy + solver.average_policy() # Access the average policy + ``` + """ + + def __init__(self, game, deviation_gen): + """Initializer. + + Args: + game: The `pyspiel.Game` to run on. + deviation_gen: a function that accepts (num_actions : int, + history, prior_legal_actions) + and returns a list containing`LocalDeviationWithTimeSelection` objects + of the realisable deviations of a described type + (e.g blind causal deviations) and given the information state described + by the function parameters. + """ + # pyformat: enable + assert game.get_type().dynamics == pyspiel.GameType.Dynamics.SEQUENTIAL, ( + "EFR requires sequential games. If you're trying to run it " + + "on a simultaneous (or normal-form) game, please first transform it " + + "using turn_based_simultaneous_game." + ) + + self._game = game + self._num_players = game.num_players() + self._root_node = self._game.new_initial_state() + + # This is for returning the current policy and average policy to a caller + self._current_policy = policy.TabularPolicy(game) + self._average_policy = self._current_policy.__copy__() + self._deviation_gen = deviation_gen + + self._info_state_nodes = {} + hist = {player: [] for player in range(self._num_players)} + empty_path_indices = [[] for _ in range(self._num_players)] + + self._initialize_info_state_nodes(self._root_node, hist, empty_path_indices) + + self._iteration = 1 # For possible linear-averaging. + + def return_cumulative_regret(self): + """Returns a dictionary mapping. + + The mapping is from every information state to its associated regret + (accumulated over all iterations). + """ + return { + list(self._info_state_nodes.keys())[i]: list( + self._info_state_nodes.values() + )[i].cumulative_regret + for i in range(len(self._info_state_nodes.keys())) + } + + def current_policy(self): + """Returns the current policy as a TabularPolicy. + + WARNING: The same object, updated in-place will be returned! You can copy + it (or its `action_probability_array` field). + + For EFR, this policy does not necessarily have to converge. + """ + return self._current_policy + + def average_policy(self): + """Returns the average of all policies iterated. + + WARNING: The same object, updated in-place will be returned! You can copy it + (or its `action_probability_array` field). + + This average policy converges to a equilibrium policy as the number + of iterations increases (equilibrium type depends on learning + deviations used). + + The policy is computed using the accumulated policy probabilities computed + using `evaluate_and_update_policy`. + + Returns: + A `policy.TabularPolicy` object (shared between calls) giving the (linear) + time averaged policy (weighted by player reach probabilities) for all + players. + """ + _update_average_policy(self._average_policy, self._info_state_nodes) + return self._average_policy + + def _initialize_info_state_nodes(self, state, history, path_indices): + """Initializes info_state_nodes. + + Create one _InfoStateNode per infoset. We could also initialize the node + when we try to access it and it does not exist. + + Generates all deviations that are realisable at this state and stores + the history and preceeding state policy information to create memory states + and calculate the memory reach probability for each deviation. + + Args: + state: The current state in the tree traversal. This should be the + root node when we call this function from the EFR solver. + history: an arrays of the preceeding actions taken prior to the state + for each player. + path_indices: a 3d array [player number]x[preceeding state]x[legal actions + for state, index of the policy for this state in TabularPolicy]. + """ + if state.is_terminal(): + return + + if state.is_chance_node(): + for action, unused_action_prob in state.chance_outcomes(): + self._initialize_info_state_nodes( + state.child(action), history, path_indices + ) + return + + current_player = state.current_player() + info_state = state.information_state_string(current_player) + info_state_node = self._info_state_nodes.get(info_state) + if info_state_node is None: + legal_actions = state.legal_actions(current_player) + info_state_node = _InfoStateNode( + legal_actions=legal_actions, + index_in_tabular_policy=self._current_policy.state_lookup[info_state], + relizable_deviations=None, + history=history[current_player].copy(), + current_history_probs=copy.deepcopy(path_indices[current_player]), + ) + prior_possible_actions = [] + for i in range(len(info_state_node.current_history_probs)): + prior_possible_actions.append( + info_state_node.current_history_probs[i][0] + ) + prior_possible_actions.append(info_state_node.legal_actions) + + info_state_node.relizable_deviations = self._deviation_gen( + len(info_state_node.legal_actions), + info_state_node.history, + prior_possible_actions, + ) + self._info_state_nodes[info_state] = info_state_node + + legal_actions = state.legal_actions(current_player) + + for action in info_state_node.legal_actions: + new_path_indices = copy.deepcopy(path_indices) + new_path_indices[current_player].append( + [legal_actions, info_state_node.index_in_tabular_policy] + ) + new_history = copy.deepcopy(history) + new_history[current_player].append(action) + assert len(new_history[current_player]) == len( + new_path_indices[current_player] + ) + + self._initialize_info_state_nodes( + state.child(action), new_history, new_path_indices + ) + + def _update_current_policy(self, state, current_policy): + """Updated the current policy. + + Updated in order so that memory reach probs are defined wrt to the new + strategy. + + Note that the function is called recursively (first call should + be the root). + + Additionally, to update the strategy for a given state we require + the (t+1)th strategy for all prior states. + + Args: + state: the state of which to update the strategy. + current_policy: the (t+1)th strategy that is being recursively computed, + see the function description for more detail. + """ + + if state.is_terminal(): + return + elif not state.is_chance_node(): + current_player = state.current_player() + info_state = state.information_state_string(current_player) + info_state_node = self._info_state_nodes[info_state] + deviations = info_state_node.relizable_deviations + for devation in range(len(deviations)): + mem_reach_probs = create_probs_from_index( + info_state_node.current_history_probs, current_policy + ) + deviation_reach_prob = deviations[ + devation + ].player_deviation_reach_probability(mem_reach_probs) + y_increment = ( + max(0, info_state_node.cumulative_regret[devation]) + * deviation_reach_prob + ) + info_state_node.y_values[deviations[devation]] = ( + info_state_node.y_values[deviations[devation]] + y_increment + ) + + state_policy = current_policy.policy_for_key(info_state) + for action, value in self._regret_matching(info_state_node).items(): + state_policy[action] = value + + for action in info_state_node.legal_actions: + new_state = state.child(action) + self._update_current_policy(new_state, current_policy) + else: + for action, _ in state.chance_outcomes(): + new_state = state.child(action) + self._update_current_policy(new_state, current_policy) + + # Path to state probability ignores chance probabilty as this is stored as + # new_reach_probabilities[-1] + def _compute_cumulative_immediate_regret_for_player( + self, state, policies, reach_probabilities, player + ): + """Increments the immediate regrets and policy. + + Increments are done for `player` of all realisable deviations at this state. + + Args: + state: The initial game state to analyze from. + policies: A list of `num_players` callables taking as input an + `info_state_node` and returning a {action: prob} dictionary. + reach_probabilities: The probability for each player of reaching `state` + as a numpy array [prob for player 0, for player 1,..., for chance]. + `reach_probabilities[player]` will work in all cases. + player: The 0-indexed player to update the values for. If `None`, the + update for all players will be performed. + + Returns: + The utility of `state` for all players, assuming all players follow the + current policy defined by `self.Policy`. + """ + if state.is_terminal(): + return np.asarray(state.returns()) + + if state.is_chance_node(): + state_value = 0.0 + for action, action_prob in state.chance_outcomes(): + assert action_prob > 0 + new_state = state.child(action) + new_reach_probabilities = reach_probabilities.copy() + new_reach_probabilities[-1] *= action_prob + + state_value += ( + action_prob + * self._compute_cumulative_immediate_regret_for_player( + new_state, policies, new_reach_probabilities, player + ) + ) + return state_value + + current_player = state.current_player() + info_state = state.information_state_string(current_player) + + # No need to continue on this history branch as no update will be performed + # for any player. + # The value we return here is not used in practice. If the conditional + # statement is True, then the last taken action has probability 0 of + # occurring, so the returned value is not impacting the parent node value. + if all(reach_probabilities[:-1] == 0): + return np.zeros(self._num_players) + + state_value = np.zeros(self._num_players) + + # The utilities of the children states are computed recursively. As the + # regrets are added to the information state regrets for each state in that + # information state, the recursive call can only be made once per child + # state. Therefore, the utilities are cached. + children_utilities = {} + + info_state_node = self._info_state_nodes[info_state] + # Reset y values + info_state_node.y_values = collections.defaultdict(float) + if policies is None: + info_state_policy = self._get_infostate_policy(info_state) + else: + info_state_policy = policies[current_player](info_state) + + reach_prob = reach_probabilities[current_player] + for action in state.legal_actions(): + action_prob = info_state_policy.get(action, 0.0) + info_state_node.cumulative_policy[action] = ( + info_state_node.cumulative_policy[action] + action_prob * reach_prob + ) + new_state = state.child(action) + new_reach_probabilities = reach_probabilities.copy() + assert action_prob <= 1 + new_reach_probabilities[current_player] *= action_prob + child_utility = self._compute_cumulative_immediate_regret_for_player( + new_state, + policies=policies, + reach_probabilities=new_reach_probabilities, + player=player, + ) + + state_value += action_prob * child_utility + children_utilities[action] = child_utility + + counterfactual_reach_prob = np.prod( + reach_probabilities[:current_player] + ) * np.prod(reach_probabilities[current_player + 1 :]) + + state_value_for_player = state_value[current_player] + deviations = info_state_node.relizable_deviations + for deviation_index in range(len(deviations)): + deviation = deviations[deviation_index] + deviation_strategy = deviation.deviate( + strat_dict_to_array(self._get_infostate_policy(info_state)) + ) + + player_child_utilities = np.array(list(children_utilities.values()))[ + :, current_player + ] + devation_cf_value = np.inner( + np.transpose(deviation_strategy), player_child_utilities + ) + + memory_reach_probs = create_probs_from_index( + info_state_node.current_history_probs, self.current_policy() + ) + player_current_memory_reach_prob = ( + deviation.player_deviation_reach_probability(memory_reach_probs) + ) + + deviation_regret = player_current_memory_reach_prob * ( + (devation_cf_value * counterfactual_reach_prob) + - (counterfactual_reach_prob * state_value_for_player) + ) + + info_state_node.cumulative_regret[deviation_index] += deviation_regret + return state_value + + def _get_infostate_policy(self, info_state_str): + """Returns an {action: prob} dictionary for the policy on `info_state`.""" + info_state_node = self._info_state_nodes[info_state_str] + prob_vec = self._current_policy.action_probability_array[ + info_state_node.index_in_tabular_policy + ] + return { + action: prob_vec[action] for action in info_state_node.legal_actions + } + + +class _EFRSolver(_EFRSolverBase): + + def evaluate_and_update_policy(self): + """Performs a single step of policy evaluation and policy improvement.""" + self._compute_cumulative_immediate_regret_for_player( + self._root_node, + policies=None, + reach_probabilities=np.ones(self._game.num_players() + 1), + player=None, + ) + self._update_current_policy(self._root_node, self._current_policy) + self._iteration += 1 + + +class EFRSolver(_EFRSolver): + """Implements the EFR algorithm with several deviation types. + + See: https://arxiv.org/abs/2102.06973 + """ + + def __init__(self, game, deviations_name): + """Initializer. + + Args: + game: The `pyspiel.Game` to run on. + deviations_name: the name of the deviation type to use for + accumulating regrets and calculating the strategy at the next timestep. + + Deviation types implemented are "blind action", "informed action", + "blind cf", "informed counterfactual", "blind partial sequence", + "counterfactual partial sequence", "casual partial sequence", + "twice informed partial sequence", "single target behavioural". + + See "Efficient Deviation Types and Learning for Hindsight Rationality in + Extensive-Form Games" by D. Morrill et al. 2021b + for the full definition of each type. + """ + + # external_only = True leads to a shortcut in the computation of the next + # timesteps strategy from the regrets + external_only = False + deviation_sets = None # pylint: disable=unused-variable + + if deviations_name in {"blind action"}: + deviation_sets = return_blind_action + external_only = True + elif deviations_name in {"informed action"}: + deviation_sets = return_informed_action + elif deviations_name in {"blind cf", "blind counterfactual"}: + deviation_sets = return_blind_cf + external_only = True + elif deviations_name in {"informed cf", "informed counterfactual"}: + deviation_sets = return_informed_cf + elif deviations_name in {"bps", "blind partial sequence"}: + deviation_sets = return_blind_partial_sequence + external_only = True + elif deviations_name in { + "cfps", + "cf partial sequence", + "counterfactual partial sequence", + }: + deviation_sets = return_cf_partial_sequence + elif deviations_name in {"csps", "casual partial sequence"}: + deviation_sets = return_cs_partial_sequence + elif deviations_name in {"tips", "twice informed partial sequence"}: + deviation_sets = return_twice_informed_partial_sequence + elif deviations_name in {"bhv", "single target behavioural", "behavioural"}: + deviation_sets = return_behavourial + else: + raise ValueError( + "Unsupported Deviation Set Passed As Constructor" + " Argument" + ) + super(EFRSolver, self).__init__(game, deviation_sets) + self._external_only = external_only + + def _regret_matching(self, info_set_node): + """Returns an info state policy. + + The info state policy returned is the one obtained by applying + regret-matching function over all deviations and time selection functions. + + Args: + info_set_node: the info state node to compute the policy for. + + Returns: + A dict of action -> prob for all legal actions of the + info_set_node. + """ + legal_actions = info_set_node.legal_actions + num_actions = len(legal_actions) + info_state_policy = None + z = sum(info_set_node.y_values.values()) + + # The fixed point solution can be directly obtained through the + # weighted regret matrix if only external deviations are used. + if self._external_only and z > 0: + weighted_deviation_matrix = np.zeros((num_actions, num_actions)) + for dev in list(info_set_node.y_values.keys()): + weighted_deviation_matrix += ( + info_set_node.y_values[dev] / z + ) * dev.return_transform_matrix() + new_strategy = weighted_deviation_matrix[:, 0] + info_state_policy = dict(zip(legal_actions, new_strategy)) + + # Full regret matching by finding the least squares solution to the + # fixed point of the EFR regret matching function. + # Last row of matrix and the column entry minimises the solution + # towards a strategy. + elif z > 0: + weighted_deviation_matrix = -np.eye(num_actions) + + for dev in list(info_set_node.y_values.keys()): + weighted_deviation_matrix += ( + info_set_node.y_values[dev] / z + ) * dev.return_transform_matrix() + + normalisation_row = np.ones(num_actions) + weighted_deviation_matrix = np.vstack( + [weighted_deviation_matrix, normalisation_row] + ) + b = np.zeros(num_actions + 1) + b[num_actions] = 1 + b = np.reshape(b, (num_actions + 1, 1)) + + strategy = linalg.lstsq(weighted_deviation_matrix, b)[0] + + # Adopt same clipping strategy as paper author's code. + np.clip(strategy, a_min=0, a_max=1, out=strategy) + strategy = strategy / np.sum(strategy) + + info_state_policy = dict(zip(legal_actions, strategy[:, 0])) + # Use a uniform strategy as sum of all regrets is negative. + else: + unif_policy_value = 1.0 / num_actions + info_state_policy = { + legal_actions[index]: unif_policy_value + for index in range(num_actions) + } + return info_state_policy + + +def _update_average_policy(average_policy, info_state_nodes): + """Updates in place `average_policy` to the average of all policies iterated. + + This function is a module level function to be reused by both CFRSolver and + CFRBRSolver. + + Args: + average_policy: A `policy.TabularPolicy` to be updated in-place. + info_state_nodes: A dictionary {`info_state_str` -> `_InfoStateNode`}. + """ + for info_state, info_state_node in info_state_nodes.items(): + info_state_policies_sum = info_state_node.cumulative_policy + state_policy = average_policy.policy_for_key(info_state) + probabilities_sum = sum(info_state_policies_sum.values()) + if probabilities_sum == 0: + num_actions = len(info_state_node.legal_actions) + for action in info_state_node.legal_actions: + state_policy[action] = 1 / num_actions + else: + for action, action_prob_sum in info_state_policies_sum.items(): + state_policy[action] = action_prob_sum / probabilities_sum + + +def strat_dict_to_array(strategy_dictionary): + """A helper function to convert the strategy dictionary mapping. + + Conversion applies action -> prob value to an array. + + Args: + strategy_dictionary: a dictionary action -> prob value. + + Returns: + strategy_array: an array with the ith action's value at the i-1th index. + """ + actions = list(strategy_dictionary.keys()) + strategy_array = np.zeros((len(actions), 1)) + for action in range(len(actions)): + strategy_array[action][0] = strategy_dictionary[actions[action]] + return strategy_array + + +def array_to_strat_dict(strategy_array, legal_actions): + """A helper function to convert a strategy. + + Converts a strategy array to an action -> prob value dictionary. + + Args: + strategy_array: an array with the ith action's value at the i-1th index. + legal_actions: the list of all legal actions at the current state. + + Returns: + strategy_dictionary: a dictionary action -> prob value. + """ + return dict(zip(legal_actions, strategy_array)) + + +def create_probs_from_index(indices, current_policy): + path_to_state = [] + if indices is None or not indices: + return [] + for index in indices: + strat_dict = array_to_strat_dict( + current_policy.action_probability_array[index[1]], index[0] + ) + path_to_state.append(strat_dict) + return path_to_state + + +# Deviation set definitions +def return_blind_action(num_actions, history, _): + """Returns an array of all Blind Action deviations. + + Returns an array of all Blind Action deviations. with respect to an with + respect to an information set. + + Args: + num_actions: the integer of all actions that can be taken at that + information set. + history: an array containing the prior actions played by the `player` to + reach the information set. + + Returns: + an array of LocalDeviationWithTimeSelection objects that represent all + Blind Action deviations that are realizable at the information set. + """ + memory_weights = [np.full(len(history), 1)] + prior_actions_in_memory = history + return return_all_external_deviations( + num_actions, memory_weights, prior_actions_in_memory + ) + + +def return_informed_action(num_actions, history, _): + """Returns an array of all Informed Action deviations. + + Returns an array of all Informed Action deviations with respect to an + information set. + + Args: + num_actions: the integer of all actions that can be taken at that + information set. + history: an array containing the prior actions played by the `player` to + reach the information set. + + Returns: + an array of LocalDeviationWithTimeSelection objects that represent all + Informed Action deviations that are realizable at the information set. + """ + memory_weights = [np.full(len(history), 1)] + prior_actions_in_memory = history + return return_all_non_identity_internal_deviations( + num_actions, memory_weights, prior_actions_in_memory + ) + + +def return_blind_cf(num_actions, history, _): + """Returns an array of all Blind Counterfactual deviations. + + Returns an array of all Blind Counterfactual deviations with respect to an + information set. + + Note: EFR using only Blind Counterfactual deviations is equivalent + to vanilla Counterfactual Regret Minimisation (CFR). + Args: + num_actions: the integer of all actions that can be taken at that + information set. + history: an array containing the prior actions played by the `player` to + reach the information set. + + Returns: + an array of LocalDeviationWithTimeSelection objects that represent all + Blind CF deviations that are realizable at the information set. + """ + memory_weights = [None] + prior_actions_in_memory = np.zeros(len(history)) + return return_all_external_deviations( + num_actions, memory_weights, prior_actions_in_memory + ) + + +def return_informed_cf(num_actions, history, _): + """Returns an array of all Informed Counterfactual deviations. + + Returns an array of all Informed Counterfactual deviations with respect with + respect to an information set. + + Args: + num_actions: the integer of all actions that can be taken at that + information set. + history: an array containing the prior actions played by the `player` to + reach the information set. + + Returns: + an array of LocalDeviationWithTimeSelection objects that represent all + Informed CF deviations that are realizable at the information set. + """ + memory_weights = [None] + prior_actions_in_memory = np.zeros(len(history)) + return return_all_non_identity_internal_deviations( + num_actions, memory_weights, prior_actions_in_memory + ) + + +def return_blind_partial_sequence(num_actions, history, _): + """Returns an array of all Blind Partial Sequence deviations (BPS). + + Returns an array of all Blind Partial Sequence deviations (BPS) with respect + to an information set. + + Args: + num_actions: the integer of all actions that can be taken at that + information set. + history: an array containing the prior actions played by the `player` to + reach the information set. + + Returns: + an array of LocalDeviationWithTimeSelection objects that represent all + BPS deviations that are realizable at the information set. + """ + prior_actions_in_memory = history + memory_weights = [None] + if history: + memory_weights.append(np.ones(len(history))) + for i in range(len(history)): + possible_memory_weight = np.zeros(len(history)) + possible_memory_weight[0:i] = np.full(i, 1.0) + memory_weights.append(possible_memory_weight) + return return_all_external_deviations( + num_actions, memory_weights, prior_actions_in_memory + ) + + +def return_cf_partial_sequence(num_actions, history, _): + """Returns an array of all Counterfactual Partial Sequence deviations (CFPS). + + Returns an array of all Counterfactual Partial Sequence deviations (CFPS) + with respect to an information set. + + Args: + num_actions: the integer of all actions that can be taken at that + information set. + history: an array containing the prior actions played by the `player` to + reach the information set. + + Returns: + an array of LocalDeviationWithTimeSelection objects that represent + all CFPS deviations that are realizable at the information set. + """ + prior_actions_in_memory = history + memory_weights = [None] + if history: + memory_weights.append(np.ones(len(history))) + for i in range(len(history)): + possible_memory_weight = np.zeros(len(history)) + possible_memory_weight[0:i] = np.full(i, 1.0) + memory_weights.append(possible_memory_weight) + return return_all_non_identity_internal_deviations( + num_actions, memory_weights, prior_actions_in_memory + ) + + +def return_cs_partial_sequence(num_actions, history, prior_legal_actions): + """Returns an array of all Casual Partial Sequence deviations. + + Returns an array of all Casual Partial Sequence deviations with respect to + an information set. + + Args: + num_actions: the integer of all actions that can be taken at that + information set + history: an array containing the prior actions played by the `player` to + reach the information set. + prior_legal_actions: a 2d array containing the legal actions for each + preceeding state. + + Returns: + an array of LocalDeviationWithTimeSelection objects that represent all + Casual Partial Sequence deviations that are realizable at the + information set. + """ + prior_actions_in_memory = history + external_memory_weights = [] + + for i in range(len(history)): + possible_memory_weight = np.zeros(len(history)) + possible_memory_weight[0:i] = np.full(i, 1.0) + external_memory_weights.append(possible_memory_weight) + + external = return_all_external_modified_deviations( + num_actions, + external_memory_weights, + prior_legal_actions, + prior_actions_in_memory, + ) + internal = return_blind_action(num_actions, history, None) + + cf_ext = return_informed_cf(num_actions, history, None) + cf_int = return_blind_cf(num_actions, history, None) + + return np.concatenate((external, internal, cf_ext, cf_int)) + + +def return_cs_partial_sequence_orginal( + num_actions, history, prior_legal_actions +): + """Returns an array of all Casual Partial Sequence deviations. + + Returns an array of all Casual Partial Sequence deviations with respect to + an information set. + + Args: + num_actions: the integer of all actions that can be taken at that + information set + history: an array containing the prior actions played by the `player` to + reach the information set. + prior_legal_actions: a 2d array containing the legal actions for each + preceeding state. + + Returns: + an array of LocalDeviationWithTimeSelection objects that represent all + Casual Partial Sequence deviations that are realizable at the + information set. + """ + prior_actions_in_memory = history + external_memory_weights = [] + + for i in range(len(history)): + possible_memory_weight = np.zeros(len(history)) + possible_memory_weight[0:i] = np.full(i, 1.0) + external_memory_weights.append(possible_memory_weight) + + external = return_all_external_modified_deviations( + num_actions, + external_memory_weights, + prior_legal_actions, + prior_actions_in_memory, + ) + internal = return_informed_action(num_actions, history, None) + + cf_ext = return_informed_cf(num_actions, history, None) + return np.concatenate((external, internal, cf_ext)) + + +def return_twice_informed_partial_sequence( + num_actions, history, prior_legal_actions +): + """Returns an array of all Twice Informed Partial Sequence (TIPS) deviations. + + Returns an array of all Twice Informed Partial Sequence (TIPS) deviations + with respect to an information set. + + Args: + num_actions: the integer of all actions that can be taken at that + information set + history: an array containing the prior actions played by the `player` to + reach the information set. + prior_legal_actions: a 2d array containing the legal actions for each + preceeding state. + + Returns: + an array of LocalDeviationWithTimeSelection objects that represent + all TIPS deviations that are realizable at theinformation set. + """ + prior_actions_in_memory = history + memory_weights = [] + + for i in range(len(history)): + possible_memory_weight = np.zeros(len(history)) + possible_memory_weight[0:i] = np.full(i, 1.0) + memory_weights.append(possible_memory_weight) + + internal = return_all_internal_modified_deviations( + num_actions, memory_weights, prior_legal_actions, prior_actions_in_memory + ) + + cf_int = return_informed_cf(num_actions, history, None) + return np.concatenate((internal, cf_int)) + + +def generate_all_action_permutations(current_stem, remaining_actions): + """Return a List of all possible game continuations. + + Return a List of all possible game continuations playing on from the + current stem and with playing from the set of remaining actions. + `current_stem` = "" generates all possible playthroughs from the current + information state. + + Args: + current_stem: the prior sequence of actions to be completed by the + remaining actions + remaining_actions: a 2d array of [subsequent states]x[possible actions] + + Returns: + An array with each element being the current stem joined with a possible + permuation of remaining actions + """ + if not remaining_actions: + return [np.array(current_stem)] + else: + next_actions = remaining_actions[0] + permutations = [] + for action in next_actions: + next_stem = current_stem.copy() + next_stem.append(action) + next_remaining_actions = remaining_actions[1:] + prev_permutations = generate_all_action_permutations( + next_stem, next_remaining_actions + ) + for i in prev_permutations: + permutations.append(i) + return permutations + + +def return_behavourial(num_actions, history, prior_legal_actions): + """Returns an array of all single target behavioural deviations. + + The target behavioural deviations are with respect to an information set. + + Args: + num_actions: the integer of all actions that can be taken at that + information set + history: an array containing the prior actions played by the `player` to + reach the information set. + prior_legal_actions: a 2d array containing the legal actions for each + preceeding state. + + Returns: + an array of LocalDeviationWithTimeSelection objects that represent + all (single target) behaviourial deviations that are realizable at the + information set. + """ + deviations = [] + if not history: + internal = return_all_non_identity_internal_deviations( + num_actions, [None], history + ) + for i in internal: + deviations.append(i) + else: + for deviation_info in range(len(history)): + prior_possible_memory_actions = generate_all_action_permutations( + [], prior_legal_actions[: deviation_info + 1] + ) + memory_weights = np.concatenate( + (np.ones(deviation_info), np.zeros(len(history) - deviation_info)) + ) + for prior_memory_actions in prior_possible_memory_actions: + prior_memory_actions = np.concatenate(( + prior_memory_actions, + np.zeros(len(history) - len(prior_memory_actions)), + )) + for _ in range(len(history) - len(prior_memory_actions)): + prior_memory_actions.append(0) + prior_memory_actions_cp = prior_memory_actions.copy() + internal = return_all_non_identity_internal_deviations( + num_actions, [memory_weights], prior_memory_actions_cp + ) + for i in internal: + deviations.append(i) + + return deviations + + +class LocalDeviationWithTimeSelection: + """Comprised of a swap transformation. + + Comprised of a swap transformation that will be applied at the + current information state, a memory weighting which describes + the actions that are remembered and the memory action history + (prior_memory_actions) that is remembered. + Note that the "memory action history" might not equal the history in + the case of some deviation types (e.g tips deviations). + """ + + # The swap transformation that will be compared to the unmodified strategy. + # The transformation is applied at the memory state. + local_swap_transform = attr.ib() + + # Which actions have been forgotten (0) or remembered (1) according + # to the memory state. + prior_actions_weight = attr.ib() + + # Which actions have been take according to the memory state + prior_memory_actions = attr.ib() + + use_unmodified_history = attr.ib() + + def __init__( + self, + target, + source, + num_actions, + prior_actions_weight, + prior_memory_actions, + is_external, + use_unmodified_history=True, + ): + """Represents a swap transformation (either external and internal). + + Represents a swap transformation (either external and internal) for a given + memory state. + + Args: + target: the action that will be played when the deviation is triggered. + source: the action that will trigger the target action when suggested + (used only by internal deviations, i.e is_external = False). + num_actions: the number of actions that can be played for this + information state. + prior_actions_weight: an array (the length of the game history) + of the information state actions have been forgotten (0) + or remembered (1) wrt to the memory state. + This is represented numerically for possible experimentation with + "partially forgotten" actions (i.e in the range (0,1)). + prior_memory_actions: the preceeding actions upto the the information + state (which the LocalDeviationWithTimeSelection is defined with respect + to). + is_external: a boolean use to determine whether this is an + internal or external deviation. + use_unmodified_history: a boolean used to indicate whether the provided + memory_actions are the same as the information state it was derived + from. + """ + self.local_swap_transform = LocalSwapTransform( + target, source, num_actions, is_external=is_external + ) + self.prior_actions_weight = prior_actions_weight + self.prior_memory_actions = prior_memory_actions + self.use_unmodified_history = use_unmodified_history + + # If a pure strategy, a pure strategy will be returned (aka function works + # for both actions and strategies as input). + def deviate(self, strategy): + """Returns a strategy array. + + Returns the strategy array given by deviating according to the + 'self.local_swap_transform.matrix_transform' matrix. + + Args: + strategy: the strategy array to deviate from. + + Returns: + the matrix product of the the matrix_transform and the provided strategy. + """ + return self.local_swap_transform.deviate(strategy) + + def return_transform_matrix(self): + """Returns a matrix_transform. + + Returns the matrix_transform of the associated `LocalSwapTransform` object. + """ + return self.local_swap_transform.matrix_transform + + def player_deviation_reach_probability( + self, prior_possible_action_probabilities + ): + """Calculate the probability of reaching the current memory state. + + Calculate the probability of reaching the current memory state + provided the player played from the start of the game to this state. + This is assuming that they play with their current strategy with the + deviation applied. + Args: + prior_possible_action_probabilities: a 2d array of length [player's + history]x[number of actions at that state]. These are the current + strategies of the player, from start to end of their history. + + Returns: + The reach probability of the current memory state. + """ + if ( + self.prior_actions_weight is None + or self.prior_memory_actions is None + or prior_possible_action_probabilities is None + ): + return 1.0 + + memory_action_probabilities = np.ones(len(self.prior_actions_weight)) + # Reconstruct memory probabilities from history provided to the deviation + # to reach info set and the current memory probs. + memory_weightings = self.prior_actions_weight.copy() + if self.use_unmodified_history: + for state in range(len(self.prior_memory_actions)): + if self.prior_actions_weight[state] != 0: + memory_action_probabilities[state] = ( + prior_possible_action_probabilities[state][ + self.prior_memory_actions[state] + ] + ) + else: + memory_action_probabilities[state] = 1 + memory_weightings[state] = 1 + + path_probability = np.multiply( + memory_weightings, memory_action_probabilities + ) + memory_reach_probability = np.prod(path_probability) + return memory_reach_probability + + def __eq__(self, other): + return self.local_swap_transform == other.local_swap_transform + + def __hash__(self): + return hash(self.local_swap_transform) + + +def return_all_non_identity_internal_deviations( + num_actions, possible_prior_weights, prior_memory_actions +): + """Returns all non-identity internal deviations.""" + deviations = [] + for prior_actions_weight in possible_prior_weights: + for target in range(num_actions): + for source in range(num_actions): + if source != target: + deviations.append( + LocalDeviationWithTimeSelection( + target, + source, + num_actions, + prior_actions_weight, + prior_memory_actions, + False, + ) + ) + return deviations + + +def return_all_internal_modified_deviations( + num_actions, + possible_prior_weights, + possible_prior_memory_actions, + prior_memory_actions, +): + """Returns all internal deviations with modified memory actions.""" + deviations = [] + for prior_actions_weight in possible_prior_weights: + try: + modification_index = np.where(prior_actions_weight == 0)[0][0] + except IndexError: + modification_index = 0 + if modification_index == len(prior_memory_actions): + for target in range(num_actions): + for source in range(num_actions): + if source != target: + deviations.append( + LocalDeviationWithTimeSelection( + target, + source, + num_actions, + prior_actions_weight, + prior_memory_actions, + False, + ) + ) + else: + previous_action = prior_memory_actions[modification_index] + for alt_action in possible_prior_memory_actions[modification_index]: + prior_memory_actions[modification_index] = alt_action + for target in range(num_actions): + for source in range(num_actions): + if source != target: + deviations.append( + LocalDeviationWithTimeSelection( + target, + source, + num_actions, + prior_actions_weight, + prior_memory_actions.copy(), + False, + ) + ) + prior_memory_actions[modification_index] = previous_action + return deviations + + +def return_all_external_deviations( + num_actions, possible_prior_weights, prior_memory_actions +): + """Returns all external deviations.""" + deviations = [] + for prior_actions_weight in possible_prior_weights: + for target in range(num_actions): + deviations.append( + LocalDeviationWithTimeSelection( + target, + target, + num_actions, + prior_actions_weight, + prior_memory_actions, + True, + ) + ) + return deviations + + +# Modify last action as required +def return_all_external_modified_deviations( + num_actions, + possible_prior_weights, + possible_prior_memory_actions, + prior_memory_actions, +): + """Returns all external deviations with modified memory actions.""" + deviations = [] + for prior_actions_weight in possible_prior_weights: + try: + modification_index = np.where(prior_actions_weight == 0)[0][0] + except IndexError: + modification_index = 0 + if modification_index == len(prior_memory_actions): + for target in range(num_actions): + deviations.append( + LocalDeviationWithTimeSelection( + target, + target, + num_actions, + prior_actions_weight, + prior_memory_actions, + True, + ) + ) + else: + previous_action = prior_memory_actions[modification_index] + for alt_action in possible_prior_memory_actions[modification_index]: + prior_memory_actions[modification_index] = alt_action + for target in range(num_actions): + deviations.append( + LocalDeviationWithTimeSelection( + target, + target, + num_actions, + prior_actions_weight, + prior_memory_actions.copy(), + True, + ) + ) + prior_memory_actions[modification_index] = previous_action + return deviations + + +def return_identity_deviation( + num_actions, possible_prior_weights, prior_memory_actions +): + deviations = [] + for prior_actions_weight in possible_prior_weights: + deviations.append( + LocalDeviationWithTimeSelection( + 0, 0, num_actions, prior_actions_weight, prior_memory_actions, False + ) + ) + return deviations + + +# A swap transformation given by the matrix_transform for an information state. +# Of actions_num size. +class LocalSwapTransform: + """Represents a swap transformation (both external and internal). + + Represents a swap transformation (both external and internal) + for an information state for a certain number of actions. + """ + + source_action = attr.ib() + target_action = attr.ib() + matrix_transform = attr.ib() + actions_num = attr.ib() + is_external = attr.ib() + + def __init__(self, target, source, actions_num, is_external=True): + """Creates a matrix transformation. + + Creates the matrix transformation describing the swap transformation + and initalises variables. + + Args: + target: the action that will be played when the deviation is triggered. + source: the action that triggers a swap to the target action + (used only by internal deviations, i.e is_external = False) + actions_num: the number of actions that can be played for this + information state. + is_external: determine whether to create an internal or external + deviation. + """ + self.source_action = source + self.target_action = target + self.actions_num = actions_num + if is_external: + self.source_action = None + self.matrix_transform = np.zeros((actions_num, actions_num)) + self.matrix_transform[target] = np.ones(actions_num) + else: + self.matrix_transform = np.eye(actions_num) + self.matrix_transform[target][source] = 1 + self.matrix_transform[source][source] = 0 + + def __repr__(self) -> str: + return ( + "Swapping from Action: " + + str(self.source_action) + + " to Action: " + + str(self.target_action) + ) + + def __eq__(self, other) -> bool: + return ( + self.source_action == other.source_action + and self.target_action == other.target_action + and self.actions_num == other.actions_num + ) + + def __hash__(self): + return hash( + f"{str(self.source_action)} {str(self.target_action)} " + f" {str(self.actions_num)} {str(self.is_external)}" + ) + + def deviate(self, strategy): + """Returns a strategy array. + + Returns the strategy array given by deviating according to + 'self.matrix_transform' matrix. + + Args: + strategy: the strategy array to deviate from. + + Returns: + the matrix product of the the matrix_transform and the provided strategy. + """ + return np.matmul(self.matrix_transform, strategy) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/efr_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/efr_test.py new file mode 100644 index 0000000..087714d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/efr_test.py @@ -0,0 +1,124 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.efr.""" + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np + +from open_spiel.python import policy +from open_spiel.python.algorithms import efr +from open_spiel.python.algorithms import expected_game_score +import pyspiel + + +class EFRTest(parameterized.TestCase, absltest.TestCase): + + def setUp(self): + super().setUp() + self.kuhn_game = pyspiel.load_game("kuhn_poker") + self.leduc_game = pyspiel.load_game("leduc_poker") + self.kuhn_3p_game = pyspiel.load_game("kuhn_poker(players=3)") + self.sheriff_game = pyspiel.load_game("sheriff") + + self.kuhn_uniform_policy = policy.TabularPolicy(self.kuhn_game) + self.leduc_uniform_policy = policy.TabularPolicy(self.leduc_game) + + @parameterized.parameters([ + "blind action", + "informed action", + "blind cf", + "informed cf", + "bps", + "cfps", + "csps", + "tips", + "bhv", + ]) + def test_policy_zero_is_uniform(self, deviations_name): + # We use Leduc and not Kuhn, because Leduc has illegal actions and Kuhn does + # not. + cfr_solver = efr.EFRSolver( + game=self.leduc_game, deviations_name=deviations_name + ) + np.testing.assert_array_equal( + self.leduc_uniform_policy.action_probability_array, + cfr_solver.current_policy().action_probability_array, + ) + np.testing.assert_array_equal( + self.leduc_uniform_policy.action_probability_array, + cfr_solver.average_policy().action_probability_array, + ) + + @parameterized.parameters( + ["blind cf", "informed cf", "bps", "cfps", "csps", "tips", "bhv"] + ) + def test_efr_kuhn_poker(self, deviations_name): + efr_solver = efr.EFRSolver( + game=self.kuhn_game, deviations_name=deviations_name + ) + for _ in range(300): + efr_solver.evaluate_and_update_policy() + average_policy = efr_solver.average_policy() + average_policy_values = expected_game_score.policy_value( + self.kuhn_game.new_initial_state(), [average_policy] * 2 + ) + # 1/18 is the Nash value. See https://en.wikipedia.org/wiki/Kuhn_poker + np.testing.assert_allclose( + average_policy_values, [-1 / 18, 1 / 18], atol=1e-3 + ) + + @parameterized.parameters( + ["blind cf", "informed cf", "bps", "cfps", "csps", "tips", "bhv"] + ) + def test_efr_kuhn_poker_3p(self, deviations_name): + efr_solver = efr.EFRSolver( + game=self.kuhn_3p_game, deviations_name=deviations_name + ) + strategies = [] + corr_dist_values = [] + for _ in range(10): + efr_solver.evaluate_and_update_policy() + # Convert the policy to a pyspiel.TabularPolicy, needed by the CorrDist + # functions on the C++ side. + strategies.append( + policy.python_policy_to_pyspiel_policy(efr_solver.current_policy()) + ) + corr_dev = pyspiel.uniform_correlation_device(strategies) + cce_dist_info = pyspiel.cce_dist(self.kuhn_3p_game, corr_dev) + corr_dist_values.append(cce_dist_info.dist_value) + self.assertLess(corr_dist_values[-1], corr_dist_values[0]) + + @absltest.skip("Too long for a unit test") + @parameterized.parameters(["blind cf", "bps", "tips"]) + def test_efr_cce_dist_sheriff(self, deviations_name): + efr_solver = efr.EFRSolver( + game=self.sheriff_game, deviations_name=deviations_name + ) + strategies = [] + corr_dist_values = [] + for _ in range(5): + efr_solver.evaluate_and_update_policy() + strategies.append( + policy.python_policy_to_pyspiel_policy(efr_solver.current_policy()) + ) + corr_dev = pyspiel.uniform_correlation_device(strategies) + cce_dist_info = pyspiel.cce_dist(self.sheriff_game, corr_dev) + corr_dist_values.append(cce_dist_info.dist_value) + self.assertLess(corr_dist_values[-1], corr_dist_values[0]) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/evaluate_bots.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/evaluate_bots.py new file mode 100644 index 0000000..77972c6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/evaluate_bots.py @@ -0,0 +1,45 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Play bots against each other.""" + +import pyspiel + + +def evaluate_bots(state, bots, rng): + """Plays bots against each other, returns terminal utility for each bot.""" + for bot in bots: + bot.restart_at(state) + while not state.is_terminal(): + if state.is_chance_node(): + outcomes, probs = zip(*state.chance_outcomes()) + action = rng.choice(outcomes, p=probs) + for bot in bots: + bot.inform_action(state, pyspiel.PlayerId.CHANCE, action) + state.apply_action(action) + elif state.is_simultaneous_node(): + joint_actions = [ + bot.step(state) + if state.legal_actions(player_id) else pyspiel.INVALID_ACTION + for player_id, bot in enumerate(bots) + ] + state.apply_actions(joint_actions) + else: + current_player = state.current_player() + action = bots[current_player].step(state) + for i, bot in enumerate(bots): + if i != current_player: + bot.inform_action(state, current_player, action) + state.apply_action(action) + return state.returns() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/evaluate_bots_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/evaluate_bots_test.py new file mode 100644 index 0000000..f92ff6b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/evaluate_bots_test.py @@ -0,0 +1,67 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.evaluate_bots.""" + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np + +from open_spiel.python import policy +from open_spiel.python.algorithms import evaluate_bots +from open_spiel.python.bots import uniform_random +from open_spiel.python.bots.policy import PolicyBot +import pyspiel + + +GAME = pyspiel.load_game("kuhn_poker") + + +def policy_bots(): + random_policy = policy.UniformRandomPolicy(GAME) + + py_bot = PolicyBot(0, np.random.RandomState(4321), random_policy) + cpp_bot = pyspiel.make_policy_bot( + GAME, 1, 1234, + policy.python_policy_to_pyspiel_policy(random_policy.to_tabular())) + + return [py_bot, cpp_bot] + + +class EvaluateBotsTest(parameterized.TestCase): + + @parameterized.parameters([([ + pyspiel.make_uniform_random_bot(0, 1234), + uniform_random.UniformRandomBot(1, np.random.RandomState(4321)) + ],), (policy_bots(),)]) + def test_cpp_vs_python(self, bots): + results = np.array([ + evaluate_bots.evaluate_bots(GAME.new_initial_state(), bots, np.random) + for _ in range(10000) + ]) + average_results = np.mean(results, axis=0) + np.testing.assert_allclose(average_results, [0.125, -0.125], atol=0.1) + + def test_random_vs_stateful(self): + game = pyspiel.load_game("tic_tac_toe") + bots = [ + pyspiel.make_stateful_random_bot(game, 0, 1234), + uniform_random.UniformRandomBot(1, np.random.RandomState(4321)) + ] + for _ in range(1000): + evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/expected_game_score.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/expected_game_score.py new file mode 100644 index 0000000..fc329ff --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/expected_game_score.py @@ -0,0 +1,58 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Computes the value of a given policy.""" + +from typing import List, Union + +import numpy as np + +from open_spiel.python import policy + + +def _transitions(state, policies): + """Returns iterator over (action, prob) from the given state.""" + if state.is_chance_node(): + return state.chance_outcomes() + elif state.is_simultaneous_node(): + return policy.joint_action_probabilities(state, policies) + else: + player = state.current_player() + return policies[player].action_probabilities(state).items() + + +def policy_value(state, + policies: Union[List[policy.Policy], policy.Policy], + probability_threshold: float = 0): + """Returns the expected values for the state for players following `policies`. + + Computes the expected value of the`state` for each player, assuming player `i` + follows the policy given in `policies[i]`. + + Args: + state: A `pyspiel.State`. + policies: A `list` of `policy.Policy` objects, one per player for sequential + games, one policy for simulatenous games. + probability_threshold: only sum over entries with prob greater than this + (default: 0). + + Returns: + A `numpy.array` containing the expected value for each player. + """ + if state.is_terminal(): + return np.array(state.returns()) + else: + return sum(prob * policy_value(policy.child(state, action), policies) + for action, prob in _transitions(state, policies) + if prob > probability_threshold) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/expected_game_score_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/expected_game_score_test.py new file mode 100644 index 0000000..92fa296 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/expected_game_score_test.py @@ -0,0 +1,45 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.policy_value.""" + +from absl.testing import absltest +import numpy as np + +from open_spiel.python import games # pylint: disable=unused-import +from open_spiel.python import policy +from open_spiel.python.algorithms import expected_game_score +import pyspiel + + +class PolicyValueTest(absltest.TestCase): + + def test_expected_game_score_uniform_random_kuhn_poker(self): + game = pyspiel.load_game("kuhn_poker") + uniform_policy = policy.UniformRandomPolicy(game) + uniform_policy_values = expected_game_score.policy_value( + game.new_initial_state(), [uniform_policy] * 2) + self.assertTrue(np.allclose(uniform_policy_values, [1 / 8, -1 / 8])) + + def test_expected_game_score_uniform_random_iterated_prisoner_dilemma(self): + game = pyspiel.load_game( + "python_iterated_prisoners_dilemma(max_game_length=6)") + pi = policy.UniformRandomPolicy(game) + values = expected_game_score.policy_value(game.new_initial_state(), pi) + # 4*(1-0.875**6)/0.125 = 17.6385498 + np.testing.assert_allclose(values, [17.6385498, 17.6385498]) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/exploitability.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/exploitability.py new file mode 100644 index 0000000..4be4b1e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/exploitability.py @@ -0,0 +1,207 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Compute the exploitability of a bot / strategy in a 2p sequential game. + +This computes the value that a policy achieves against a worst-case opponent. +The policy applies to both player 1 and player 2, and hence we have a 2-player +symmetric zero-sum game, so the game value is zero for both players, and hence +value-vs-best-response is equal to exploitability. + +We construct information sets, each consisting of a list of (state, probability) +pairs where probability is a counterfactual reach probability, i.e. the +probability that the state would be reached if the best responder (the current +player) played to reach it. This is the product of the probabilities of the +necessary chance events and opponent action choices required to reach the node. + +These probabilities give us the correct weighting for possible states of the +world when considering our best response for a particular information set. + +The values we calculate are values of being in the specific state. Unlike in a +CFR algorithm, they are not weighted by reach probabilities. These values +take into account the whole state, so they may depend on information which is +unknown to the best-responding player. +""" + +import collections + +import numpy as np + +from open_spiel.python import policy as policy_lib +from open_spiel.python.algorithms import best_response as pyspiel_best_response +import pyspiel + + +def _state_values(state, num_players, policy): + """Value of a state for every player given a policy.""" + if state.is_terminal(): + return np.array(state.returns()) + else: + if state.is_simultaneous_node(): + p_action = tuple(policy_lib.joint_action_probabilities(state, policy)) + + else: + p_action = ( + state.chance_outcomes() + if state.is_chance_node() + else policy.action_probabilities(state).items() + ) + return sum( + prob + * _state_values(policy_lib.child(state, action), num_players, policy) + for action, prob in p_action + ) + + +def best_response(game, policy, player_id): + """Returns information about the specified player's best response. + + Given a game and a policy for every player, computes for a single player their + best unilateral strategy. Returns the value improvement that player would + get, the action they should take in each information state, and the value + of each state when following their unilateral policy. + + Args: + game: An open_spiel game, e.g. kuhn_poker + policy: A `policy.Policy` object. This policy should depend only on the + information state available to the current player, but this is not + enforced. + player_id: The integer id of a player in the game for whom the best response + will be computed. + + Returns: + A dictionary of values, with keys: + best_response_action: The best unilateral strategy for `player_id` as a + map from infostatekey to action_id. + best_response_state_value: The value obtained for `player_id` when + unilaterally switching strategy, for each state. + best_response_value: The value obtained for `player_id` when unilaterally + switching strategy. + info_sets: A dict of info sets, mapping info state key to a list of + `(state, counterfactual_reach_prob)` pairs. + nash_conv: `best_response_value - on_policy_value` + on_policy_value: The value for `player_id` when all players follow the + policy + on_policy_values: The value for each player when all players follow the + policy + """ + root_state = game.new_initial_state() + br = pyspiel_best_response.BestResponsePolicy(game, player_id, policy, + root_state) + on_policy_values = _state_values(root_state, game.num_players(), policy) + best_response_value = br.value(root_state) + + # Get best response action for unvisited states + for infostate in set(br.infosets) - set(br.cache_best_response_action): + br.best_response_action(infostate) + + return { + "best_response_action": br.cache_best_response_action, + "best_response_state_value": br.cache_value, + "best_response_value": best_response_value, + "info_sets": br.infosets, + "nash_conv": best_response_value - on_policy_values[player_id], + "on_policy_value": on_policy_values[player_id], + "on_policy_values": on_policy_values, + } + + +def exploitability(game, policy): + """Returns the exploitability of the policy in the game. + + This is implemented only for 2 players constant-sum games, and is equivalent + to NashConv / num_players in that case. Prefer using `nash_conv`. + + Args: + game: An open_spiel game, e.g. kuhn_poker + policy: A `policy.Policy` object. This policy should depend only on the + information state available to the current player, but this is not + enforced. + + Returns: + The value that this policy achieves when playing against the worst-case + non-cheating opponent, averaged across both starting positions. It has a + minimum of zero (assuming the supplied policy is non-cheating) and + this bound is achievable in a 2p game. + + Raises: + ValueError if the game is not a two-player constant-sum turn-based game. + """ + if game.num_players() != 2: + raise ValueError("Game must be a 2-player game") + game_info = game.get_type() + if game_info.dynamics != pyspiel.GameType.Dynamics.SEQUENTIAL: + raise ValueError("The game must be turn-based, not {}".format( + game_info.dynamics)) + if game_info.utility not in (pyspiel.GameType.Utility.ZERO_SUM, + pyspiel.GameType.Utility.CONSTANT_SUM): + raise ValueError("The game must be constant- or zero-sum, not {}".format( + game_info.utility)) + root_state = game.new_initial_state() + nash_conv_value = ( + sum( + pyspiel_best_response.CPPBestResponsePolicy( + game, best_responder, policy).value(root_state) + for best_responder in range(game.num_players())) - game.utility_sum()) + return nash_conv_value / game.num_players() + + +_NashConvReturn = collections.namedtuple("_NashConvReturn", + ["nash_conv", "player_improvements"]) + + +def nash_conv(game, policy, return_only_nash_conv=True, use_cpp_br=False): + r"""Returns a measure of closeness to Nash for a policy in the game. + + See https://arxiv.org/pdf/1711.00832.pdf for the NashConv definition. + + Args: + game: An open_spiel game, e.g. kuhn_poker + policy: A `policy.Policy` object. This policy should depend only on the + information state available to the current player, but this is not + enforced. + return_only_nash_conv: Whether to only return the NashConv value, or a + namedtuple containing additional statistics. Prefer using `False`, as we + hope to change the default to that value. + use_cpp_br: if True, compute the best response in c++ + + Returns: + Returns a object with the following attributes: + - player_improvements: A `[num_players]` numpy array of the improvement + for players (i.e. value_player_p_versus_BR - value_player_p). + - nash_conv: The sum over all players of the improvements in value that each + player could obtain by unilaterally changing their strategy, i.e. + sum(player_improvements). + """ + root_state = game.new_initial_state() + if use_cpp_br: + best_response_values = np.array([ + pyspiel_best_response.CPPBestResponsePolicy( + game, best_responder, policy).value(root_state) + for best_responder in range(game.num_players()) + ]) + else: + best_response_values = np.array([ + pyspiel_best_response.BestResponsePolicy( + game, best_responder, policy).value(root_state) + for best_responder in range(game.num_players()) + ]) + on_policy_values = _state_values(root_state, game.num_players(), policy) + player_improvements = best_response_values - on_policy_values + nash_conv_ = sum(player_improvements) + if return_only_nash_conv: + return nash_conv_ + else: + return _NashConvReturn( + nash_conv=nash_conv_, player_improvements=player_improvements) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/exploitability_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/exploitability_test.py new file mode 100644 index 0000000..169994c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/exploitability_test.py @@ -0,0 +1,156 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.exploitability.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python import policy +from open_spiel.python.algorithms import exploitability +from open_spiel.python.algorithms import policy_utils +from open_spiel.python.games import data +import pyspiel + + +class ExploitabilityTest(parameterized.TestCase): + + def test_exploitability_on_kuhn_poker_uniform_random(self): + # NashConv of uniform random test_policy from (found on Google books): + # https://link.springer.com/chapter/10.1007/978-3-319-75931-9_5 + game = pyspiel.load_game("kuhn_poker") + test_policy = policy.UniformRandomPolicy(game) + expected_nash_conv = 11 / 12 + self.assertAlmostEqual( + exploitability.exploitability(game, test_policy), + expected_nash_conv / 2) + + def test_kuhn_poker_uniform_random_best_response_pid0(self): + game = pyspiel.load_game("kuhn_poker") + test_policy = policy.UniformRandomPolicy(game) + results = exploitability.best_response(game, test_policy, player_id=0) + self.assertEqual( + results["best_response_action"], + { + "0": 1, # Bet in case opponent folds when winning + "1": 1, # Bet in case opponent folds when winning + "2": 0, # Both equally good (we return the lowest action) + # Some of these will never happen under the best-response policy, + # but we have computed best-response actions anyway. + "0pb": 0, # Fold - we're losing + "1pb": 1, # Call - we're 50-50 + "2pb": 1, # Call - we've won + }) + self.assertGreater(results["nash_conv"], 0.1) + + def test_kuhn_poker_uniform_random_best_response_pid1(self): + game = pyspiel.load_game("kuhn_poker") + test_policy = policy.UniformRandomPolicy(game) + results = exploitability.best_response(game, test_policy, player_id=1) + self.assertEqual( + results["best_response_action"], + { + # Bet is always best + "0p": 1, + "1p": 1, + "2p": 1, + # Call unless we know we're beaten + "0b": 0, + "1b": 1, + "2b": 1, + }) + self.assertGreater(results["nash_conv"], 0.1) + + def test_kuhn_poker_uniform_random(self): + # NashConv of uniform random test_policy from (found on Google books): + # https://link.springer.com/chapter/10.1007/978-3-319-75931-9_5 + game = pyspiel.load_game("kuhn_poker") + test_policy = policy.UniformRandomPolicy(game) + self.assertAlmostEqual(exploitability.nash_conv(game, test_policy), 11 / 12) + + def test_kuhn_poker_always_fold(self): + game = pyspiel.load_game("kuhn_poker") + test_policy = policy.FirstActionPolicy(game) + self.assertAlmostEqual(exploitability.nash_conv(game, test_policy), 2) + + def test_kuhn_poker_optimal(self): + game = pyspiel.load_game("kuhn_poker") + test_policy = data.kuhn_nash_equilibrium(alpha=0.2) + self.assertAlmostEqual(exploitability.nash_conv(game, test_policy), 0) + + def test_leduc_poker_uniform_random(self): + # NashConv taken from independent implementations + game = pyspiel.load_game("leduc_poker") + test_policy = policy.UniformRandomPolicy(game) + self.assertAlmostEqual( + exploitability.nash_conv(game, test_policy), 4.747222222222222) + + def test_leduc_poker_always_fold(self): + game = pyspiel.load_game("leduc_poker") + test_policy = policy.FirstActionPolicy(game) + self.assertAlmostEqual(exploitability.nash_conv(game, test_policy), 2) + + # Values for uniform policies taken from + # https://link.springer.com/chapter/10.1007/978-3-319-75931-9_5 + # (including multiplayer games below). However, the value for Leduc against + # the uniform test_policy is wrong in the paper. This has been independently + # verified in a number of independent code bases. The 4.7472 value is correct. + # Value for AlwaysFold is trivial: if you + # always fold, you win 0 chips, but if you switch to AlwaysBet, you win 1 + # chip everytime if playing against a player who always folds. + @parameterized.parameters( + ("kuhn_poker", policy.UniformRandomPolicy, 0.9166666666666666), + ("kuhn_poker", policy.FirstActionPolicy, 2.), + ("kuhn_poker", lambda _: data.kuhn_nash_equilibrium(alpha=0.2), 0.), + ("leduc_poker", policy.FirstActionPolicy, 2.), + ("leduc_poker", policy.UniformRandomPolicy, 4.7472222222222), + ) + def test_2p_nash_conv(self, game_name, policy_func, expected): + game = pyspiel.load_game(game_name) + self.assertAlmostEqual( + exploitability.nash_conv(game, policy_func(game)), expected) + + @parameterized.parameters(3, 4) + def test_kuhn_poker_uniform_random_nash_conv(self, num_players): + game = pyspiel.load_game("kuhn_poker", {"players": num_players}) + test_policy = policy.UniformRandomPolicy(game) + self.assertGreater(exploitability.nash_conv(game, test_policy), 2) + + @parameterized.parameters(("kuhn_poker", 2), ("kuhn_poker", 3), + ("kuhn_poker", 4)) + def test_python_same_as_cpp_for_multiplayer_uniform_random_nash_conv( + self, game_name, num_players): + game = pyspiel.load_game(game_name, {"players": num_players}) + + # TabularPolicy defaults to being a uniform random policy. + test_policy = policy.TabularPolicy(game) + python_nash_conv = exploitability.nash_conv(game, test_policy) + cpp_nash_conv = pyspiel.nash_conv( + game, policy_utils.policy_to_dict(test_policy, game)) + self.assertAlmostEqual(python_nash_conv, cpp_nash_conv) + + def test_cpp_python_cfr_kuhn(self): + game = pyspiel.load_game("kuhn_poker") + solver = pyspiel.CFRSolver(game) + for _ in range(100): + solver.evaluate_and_update_policy() + pyspiel_average_policy = solver.tabular_average_policy() + cpp_nash_conv = pyspiel.nash_conv(game, pyspiel_average_policy) + python_policy = policy.pyspiel_policy_to_python_policy( + game, pyspiel_average_policy) + python_nash_conv = exploitability.nash_conv(game, python_policy) + self.assertAlmostEqual(python_nash_conv, cpp_nash_conv) + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/external_sampling_mccfr.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/external_sampling_mccfr.py new file mode 100644 index 0000000..83bc698 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/external_sampling_mccfr.py @@ -0,0 +1,167 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python implementation for Monte Carlo Counterfactual Regret Minimization.""" + +import enum +import numpy as np +from open_spiel.python.algorithms import mccfr +import pyspiel + + +class AverageType(enum.Enum): + SIMPLE = 0 + FULL = 1 + + +class ExternalSamplingSolver(mccfr.MCCFRSolverBase): + """An implementation of external sampling MCCFR.""" + + def __init__(self, game, average_type=AverageType.SIMPLE): + super().__init__(game) + # How to average the strategy. The 'simple' type does the averaging for + # player i + 1 mod num_players on player i's regret update pass; in two + # players this corresponds to the standard implementation (updating the + # average policy at opponent nodes). In n>2 players, this can be a problem + # for several reasons: first, it does not compute the estimate as described + # by the (unbiased) stochastically-weighted averaging in chapter 4 of + # Lanctot 2013 commonly used in MCCFR because the denominator (important + # sampling correction) should include all the other sampled players as well + # so the sample reach no longer cancels with reach of the player updating + # their average policy. Second, if one player assigns zero probability to an + # action (leading to a subtree), the average policy of a different player in + # that subtree is no longer updated. Hence, the full averaging does not + # update the average policy in the regret passes but does a separate pass to + # update the average policy. Nevertheless, we set the simple type as the + # default because it is faster, seems to work better empirically, and it + # matches what was done in Pluribus (Brown and Sandholm. Superhuman AI for + # multiplayer poker. Science, 11, 2019). + self._average_type = average_type + + assert game.get_type().dynamics == pyspiel.GameType.Dynamics.SEQUENTIAL, ( + "MCCFR requires sequential games. If you're trying to run it " + + 'on a simultaneous (or normal-form) game, please first transform it ' + + 'using turn_based_simultaneous_game.') + + def iteration(self): + """Performs one iteration of external sampling. + + An iteration consists of one episode for each player as the update + player. + """ + for player in range(self._num_players): + self._update_regrets(self._game.new_initial_state(), player) + if self._average_type == AverageType.FULL: + reach_probs = np.ones(self._num_players, dtype=np.float64) + self._full_update_average(self._game.new_initial_state(), reach_probs) + + def _full_update_average(self, state, reach_probs): + """Performs a full update average. + + Args: + state: the open spiel state to run from + reach_probs: array containing the probability of reaching the state + from the players point of view + """ + if state.is_terminal(): + return + if state.is_chance_node(): + for action in state.legal_actions(): + self._full_update_average(state.child(action), reach_probs) + return + + # If all the probs are zero, no need to keep going. + sum_reach_probs = np.sum(reach_probs) + if sum_reach_probs == 0: + return + + cur_player = state.current_player() + info_state_key = state.information_state_string(cur_player) + legal_actions = state.legal_actions() + num_legal_actions = len(legal_actions) + + infostate_info = self._lookup_infostate_info(info_state_key, + num_legal_actions) + policy = self._regret_matching(infostate_info[mccfr.REGRET_INDEX], + num_legal_actions) + + for action_idx in range(num_legal_actions): + new_reach_probs = np.copy(reach_probs) + new_reach_probs[cur_player] *= policy[action_idx] + self._full_update_average( + state.child(legal_actions[action_idx]), new_reach_probs) + + # Now update the cumulative policy + for action_idx in range(num_legal_actions): + self._add_avstrat(info_state_key, action_idx, + reach_probs[cur_player] * policy[action_idx]) + + def _update_regrets(self, state, player): + """Runs an episode of external sampling. + + Args: + state: the open spiel state to run from + player: the player to update regrets for + + Returns: + value: is the value of the state in the game + obtained as the weighted average of the values + of the children + """ + if state.is_terminal(): + return state.player_return(player) + + if state.is_chance_node(): + outcomes, probs = zip(*state.chance_outcomes()) + outcome = np.random.choice(outcomes, p=probs) + return self._update_regrets(state.child(outcome), player) + + cur_player = state.current_player() + info_state_key = state.information_state_string(cur_player) + legal_actions = state.legal_actions() + num_legal_actions = len(legal_actions) + + infostate_info = self._lookup_infostate_info(info_state_key, + num_legal_actions) + policy = self._regret_matching(infostate_info[mccfr.REGRET_INDEX], + num_legal_actions) + + value = 0 + child_values = np.zeros(num_legal_actions, dtype=np.float64) + if cur_player != player: + # Sample at opponent node + action_idx = np.random.choice(np.arange(num_legal_actions), p=policy) + value = self._update_regrets( + state.child(legal_actions[action_idx]), player) + else: + # Walk over all actions at my node + for action_idx in range(num_legal_actions): + child_values[action_idx] = self._update_regrets( + state.child(legal_actions[action_idx]), player) + value += policy[action_idx] * child_values[action_idx] + + if cur_player == player: + # Update regrets. + for action_idx in range(num_legal_actions): + self._add_regret(info_state_key, action_idx, + child_values[action_idx] - value) + # Simple average does averaging on the opponent node. To do this in a game + # with more than two players, we only update the player + 1 mod num_players, + # which reduces to the standard rule in 2 players. + if self._average_type == AverageType.SIMPLE and cur_player == ( + player + 1) % self._num_players: + for action_idx in range(num_legal_actions): + self._add_avstrat(info_state_key, action_idx, policy[action_idx]) + + return value diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/external_sampling_mccfr_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/external_sampling_mccfr_test.py new file mode 100644 index 0000000..b229ed8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/external_sampling_mccfr_test.py @@ -0,0 +1,114 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.cfr.""" + +from absl.testing import absltest +import numpy as np +from open_spiel.python.algorithms import exploitability +from open_spiel.python.algorithms import external_sampling_mccfr +import pyspiel + +SEED = 39823987 + + +class ExternalSamplingMCCFRTest(absltest.TestCase): + + def test_external_sampling_leduc_2p_simple(self): + np.random.seed(SEED) + game = pyspiel.load_game("leduc_poker") + es_solver = external_sampling_mccfr.ExternalSamplingSolver( + game, external_sampling_mccfr.AverageType.SIMPLE) + for _ in range(10): + es_solver.iteration() + conv = exploitability.nash_conv(game, es_solver.average_policy()) + print("Leduc2P, conv = {}".format(conv)) + self.assertLess(conv, 5) + # ensure that to_tabular() works on the returned policy and + # the tabular policy is equivalent + tabular_policy = es_solver.average_policy().to_tabular() + conv2 = exploitability.nash_conv(game, tabular_policy) + self.assertEqual(conv, conv2) + + def test_external_sampling_leduc_2p_full(self): + np.random.seed(SEED) + game = pyspiel.load_game("leduc_poker") + es_solver = external_sampling_mccfr.ExternalSamplingSolver( + game, external_sampling_mccfr.AverageType.FULL) + for _ in range(10): + es_solver.iteration() + conv = exploitability.nash_conv(game, es_solver.average_policy()) + print("Leduc2P, conv = {}".format(conv)) + self.assertLess(conv, 5) + + def test_external_sampling_kuhn_2p_simple(self): + np.random.seed(SEED) + game = pyspiel.load_game("kuhn_poker") + es_solver = external_sampling_mccfr.ExternalSamplingSolver( + game, external_sampling_mccfr.AverageType.SIMPLE) + for _ in range(10): + es_solver.iteration() + conv = exploitability.nash_conv(game, es_solver.average_policy()) + print("Kuhn2P, conv = {}".format(conv)) + self.assertLess(conv, 1) + + def test_external_sampling_kuhn_2p_full(self): + np.random.seed(SEED) + game = pyspiel.load_game("kuhn_poker") + es_solver = external_sampling_mccfr.ExternalSamplingSolver( + game, external_sampling_mccfr.AverageType.FULL) + for _ in range(10): + es_solver.iteration() + conv = exploitability.nash_conv(game, es_solver.average_policy()) + print("Kuhn2P, conv = {}".format(conv)) + self.assertLess(conv, 1) + + # Liar's dice takes too long, so disable this test. Leave code for reference. + # pylint: disable=g-unreachable-test-method + def disabled_test_external_sampling_liars_dice_2p_simple(self): + np.random.seed(SEED) + game = pyspiel.load_game("liars_dice") + es_solver = external_sampling_mccfr.ExternalSamplingSolver( + game, external_sampling_mccfr.AverageType.SIMPLE) + for _ in range(1): + es_solver.iteration() + conv = exploitability.nash_conv(game, es_solver.average_policy()) + print("Liar's dice, conv = {}".format(conv)) + self.assertLess(conv, 2) + + def test_external_sampling_kuhn_3p_simple(self): + np.random.seed(SEED) + game = pyspiel.load_game("kuhn_poker", {"players": 3}) + es_solver = external_sampling_mccfr.ExternalSamplingSolver( + game, external_sampling_mccfr.AverageType.SIMPLE) + for _ in range(10): + es_solver.iteration() + conv = exploitability.nash_conv(game, es_solver.average_policy()) + print("Kuhn3P, conv = {}".format(conv)) + self.assertLess(conv, 2) + + def test_external_sampling_kuhn_3p_full(self): + np.random.seed(SEED) + game = pyspiel.load_game("kuhn_poker", {"players": 3}) + es_solver = external_sampling_mccfr.ExternalSamplingSolver( + game, external_sampling_mccfr.AverageType.FULL) + for _ in range(10): + es_solver.iteration() + conv = exploitability.nash_conv(game, es_solver.average_policy()) + print("Kuhn3P, conv = {}".format(conv)) + self.assertLess(conv, 2) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/fictitious_play.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/fictitious_play.py new file mode 100644 index 0000000..6be313d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/fictitious_play.py @@ -0,0 +1,332 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Implementations of classical fictitious play. + +See https://en.wikipedia.org/wiki/Fictitious_play. +""" + +import itertools + +import numpy as np + +from open_spiel.python import policy +from open_spiel.python.algorithms import exploitability + + +def _uniform_policy(state): + legal_actions = state.legal_actions() + return [(action, 1.0 / len(legal_actions)) for action in legal_actions] + + +def _callable_tabular_policy(tabular_policy): + """Turns a tabular policy into a callable. + + Args: + tabular_policy: A dictionary mapping information state key to a dictionary + of action probabilities (action -> prob). + + Returns: + A function `state` -> list of (action, prob) + """ + + def wrap(state): + infostate_key = state.information_state_string(state.current_player()) + assert infostate_key in tabular_policy + ap_list = [] + for action in state.legal_actions(): + assert action in tabular_policy[infostate_key] + ap_list.append((action, tabular_policy[infostate_key][action])) + return ap_list + + return wrap + + +class JointPolicy(policy.Policy): + """A policy for all players in the game.""" + + def __init__(self, game, policies): + """Initializes a joint policy from a table of callables. + + Args: + game: The game being played. + policies: A dictionary mapping player number to a function `state` -> + list of (action, prob). + """ + super().__init__(game, list(range(game.num_players()))) + self.policies = policies + + def action_probabilities(self, state, player_id=None): + return dict(self.policies[player_id or state.current_player()](state)) + + +def _full_best_response_policy(br_infoset_dict): + """Turns a dictionary of best response action selections into a full policy. + + Args: + br_infoset_dict: A dictionary mapping information state to a best response + action. + + Returns: + A function `state` -> list of (action, prob) + """ + + def wrap(state): + infostate_key = state.information_state_string(state.current_player()) + br_action = br_infoset_dict[infostate_key] + ap_list = [] + for action in state.legal_actions(): + ap_list.append((action, 1.0 if action == br_action else 0.0)) + return ap_list + + return wrap + + +def _policy_dict_at_state(callable_policy, state): + """Turns a policy function into a dictionary at a specific state. + + Args: + callable_policy: A function from `state` -> lis of (action, prob), + state: the specific state to extract the policy from. + + Returns: + A dictionary of action -> prob at this state. + """ + + infostate_policy_list = callable_policy(state) + infostate_policy = {} + for ap in infostate_policy_list: + infostate_policy[ap[0]] = ap[1] + return infostate_policy + + +class XFPSolver(object): + """An implementation of extensive-form fictitious play (XFP). + + XFP is Algorithm 1 in (Heinrich, Lanctot, and Silver, 2015, "Fictitious + Self-Play in Extensive-Form Games"). Refer to the paper for details: + http://mlanctot.info/files/papers/icml15-fsp.pdf. + """ + + def __init__(self, game, save_oracles=False): + """Initialize the XFP solver. + + Arguments: + game: the open_spiel game object. + save_oracles: a boolean, indicating whether or not to save all the BR + policies along the way (including the initial uniform policy). This + could take up some space, and is only used when generating the meta-game + for analysis. + """ + + self._game = game + self._num_players = self._game.num_players() + + # A set of callables that take in a state and return a list of + # (action, probability) tuples. + self._oracles = [] if save_oracles else None + + # A set of callables that take in a state and return a list of + # (action, probability) tuples. + self._policies = [] + for _ in range(self._num_players): + self._policies.append(_uniform_policy) + if save_oracles: + self._oracles.append([_uniform_policy]) + + self._best_responses = [None] * self._num_players + self._iterations = 0 + self._delta_tolerance = 1e-10 + self._average_policy_tables = [] + + def average_policy_tables(self): + """Returns a dictionary of information state -> dict of action -> prob. + + This is a joint policy (policy for all players). + """ + return self._average_policy_tables + + def average_policy(self): + """Returns the current average joint policy (policy for all players).""" + return JointPolicy(self._game, self._policies) + + def iteration(self): + self._iterations += 1 + self.compute_best_responses() + self.update_average_policies() + + def compute_best_responses(self): + """Updates self._oracles to hold best responses for each player.""" + for i in range(self._num_players): + # Compute a best response policy to pi_{-i}. + # First, construct pi_{-i}. + joint_policy = self.average_policy() + br_info = exploitability.best_response(self._game, + joint_policy.to_tabular(), i) + full_br_policy = _full_best_response_policy( + br_info["best_response_action"]) + self._best_responses[i] = full_br_policy + if self._oracles is not None: + self._oracles[i].append(full_br_policy) + + def update_average_policies(self): + """Update the average policies given the newly computed best response.""" + + br_reach_probs = np.ones(self._num_players) + avg_reach_probs = np.ones(self._num_players) + self._average_policy_tables = [{} for _ in range(self._num_players)] + self._recursively_update_average_policies(self._game.new_initial_state(), + avg_reach_probs, br_reach_probs) + for i in range(self._num_players): + self._policies[i] = _callable_tabular_policy( + self._average_policy_tables[i]) + + def _recursively_update_average_policies(self, state, avg_reach_probs, + br_reach_probs): + """Recursive implementation of the average strategy update.""" + + if state.is_terminal(): + return + elif state.is_chance_node(): + for action, _ in state.chance_outcomes(): + new_state = state.clone() + new_state.apply_action(action) + self._recursively_update_average_policies(new_state, avg_reach_probs, + br_reach_probs) + else: + player = state.current_player() + avg_policy = _policy_dict_at_state(self._policies[player], state) + br_policy = _policy_dict_at_state(self._best_responses[player], state) + legal_actions = state.legal_actions() + infostate_key = state.information_state_string(player) + # First traverse the subtrees. + for action in legal_actions: + assert action in br_policy + assert action in avg_policy + new_state = state.clone() + new_state.apply_action(action) + new_avg_reach = np.copy(avg_reach_probs) + new_avg_reach[player] *= avg_policy[action] + new_br_reach = np.copy(br_reach_probs) + new_br_reach[player] *= br_policy[action] + self._recursively_update_average_policies(new_state, new_avg_reach, + new_br_reach) + # Now, do the updates. + if infostate_key not in self._average_policy_tables[player]: + alpha = 1 / (self._iterations + 1) + self._average_policy_tables[player][infostate_key] = {} + pr_sum = 0.0 + for action in legal_actions: + pr = ( + avg_policy[action] + (alpha * br_reach_probs[player] * + (br_policy[action] - avg_policy[action])) / + ((1.0 - alpha) * avg_reach_probs[player] + + alpha * br_reach_probs[player])) + self._average_policy_tables[player][infostate_key][action] = pr + pr_sum += pr + assert (1.0 - self._delta_tolerance <= pr_sum <= + 1.0 + self._delta_tolerance) + + def sample_episode(self, state, policies): + """Samples an episode according to the policies, starting from state. + + Args: + state: Pyspiel state representing the current state. + policies: List of policy representing the policy executed by each player. + + Returns: + The result of the call to returns() of the final state in the episode. + Meant to be a win/loss integer. + """ + + if state.is_terminal(): + return np.array(state.returns(), dtype=np.float32) + elif state.is_chance_node(): + outcomes = [] + probs = [] + for action, prob in state.chance_outcomes(): + outcomes.append(action) + probs.append(prob) + outcome = np.random.choice(outcomes, p=probs) + state.apply_action(outcome) + return self.sample_episode(state, policies) + else: + player = state.current_player() + state_policy = _policy_dict_at_state(policies[player], state) + actions = [] + probs = [] + for action in state_policy: + actions.append(action) + probs.append(state_policy[action]) + action = np.random.choice(actions, p=probs) + state.apply_action(action) + return self.sample_episode(state, policies) + + def sample_episodes(self, policies, num): + """Samples episodes and averages their returns. + + Args: + policies: A list of policies representing the policies executed by each + player. + num: Number of episodes to execute to estimate average return of policies. + + Returns: + Average episode return over num episodes. + """ + + totals = np.zeros(self._num_players) + for _ in range(num): + totals += self.sample_episode(self._game.new_initial_state(), policies) + return totals / num + + def get_empirical_metagame(self, sims_per_entry, seed=None): + """Gets a meta-game tensor of utilities from episode samples. + + The tensor is a cross-table of all the saved oracles and initial uniform + policy. + + Args: + sims_per_entry: number of simulations (episodes) to perform per entry in + the tables, i.e. each is a crude Monte Carlo estimate + seed: the seed to set for random sampling, for reproducibility + + Returns: + the K^n (KxKx...K, with dimension n) meta-game tensor where n is the + number of players and K is the number of strategies (one more than the + number of iterations of fictitious play since the initial uniform + policy is included). + """ + + if seed is not None: + np.random.seed(seed=seed) + assert self._oracles is not None + num_strategies = len(self._oracles[0]) + # Each metagame will be (num_strategies)^self._num_players. + # There are self._num_player metagames, one per player. + meta_games = [] + for _ in range(self._num_players): + shape = [num_strategies] * self._num_players + meta_game = np.ndarray(shape=shape, dtype=np.float32) + meta_games.append(meta_game) + for coord in itertools.product( + range(num_strategies), repeat=self._num_players): + policies = [] + for i in range(self._num_players): + iteration = coord[i] + policies.append(self._oracles[i][iteration]) + utility_estimates = self.sample_episodes(policies, sims_per_entry) + for i in range(self._num_players): + meta_games[i][coord] = utility_estimates[i] + return meta_games diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/fictitious_play_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/fictitious_play_test.py new file mode 100644 index 0000000..16342f0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/fictitious_play_test.py @@ -0,0 +1,118 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.cfr.""" + +from absl.testing import absltest +import numpy as np +from open_spiel.python import policy +from open_spiel.python.algorithms import expected_game_score +from open_spiel.python.algorithms import exploitability +from open_spiel.python.algorithms import fictitious_play +import pyspiel + + +class FictitiousPlayTest(absltest.TestCase): + + def test_xfp(self): + game = pyspiel.load_game("kuhn_poker") + xfp_solver = fictitious_play.XFPSolver(game) + for _ in range(100): + xfp_solver.iteration() + average_policies = xfp_solver.average_policy_tables() + tabular_policy = policy.TabularPolicy(game) + for player_id in range(2): + for info_state, state_policy in average_policies[player_id].items(): + policy_to_update = tabular_policy.policy_for_key(info_state) + for action, probability in state_policy.items(): + policy_to_update[action] = probability + average_policy_values = expected_game_score.policy_value( + game.new_initial_state(), [tabular_policy, tabular_policy]) + print("Kuhn 2P average values after 10 iterations") + print("P0: {}".format(average_policy_values[0])) + print("P1: {}".format(average_policy_values[1])) + self.assertIsNotNone(average_policy_values) + self.assertTrue( + np.allclose(average_policy_values, [-1 / 18, 1 / 18], atol=1e-3)) + + def test_meta_game_kuhn2p(self): + print("Kuhn 2p") + game = pyspiel.load_game("kuhn_poker") + xfp_solver = fictitious_play.XFPSolver(game, save_oracles=True) + for _ in range(3): + xfp_solver.iteration() + meta_games = xfp_solver.get_empirical_metagame(10, seed=1) + self.assertIsNotNone(meta_games) + # Metagame utility matrices for each player + for i in range(2): + print("player {}: \n{}".format(i + 1, meta_games[i])) + + def test_meta_game_kuhn3p(self): + print("Kuhn 3p") + game = pyspiel.load_game("kuhn_poker", {"players": 3}) + xfp_solver = fictitious_play.XFPSolver(game, save_oracles=True) + for _ in range(3): + xfp_solver.iteration() + meta_games = xfp_solver.get_empirical_metagame(10, seed=3) + self.assertIsNotNone(meta_games) + # Metagame utility tensors for each player + for i in range(3): + print("player {}: \n{}".format(i + 1, meta_games[i])) + + def test_meta_game_kuhn4p(self): + print("Kuhn 4p") + game = pyspiel.load_game("kuhn_poker", {"players": 4}) + xfp_solver = fictitious_play.XFPSolver(game, save_oracles=True) + for _ in range(3): + xfp_solver.iteration() + meta_games = xfp_solver.get_empirical_metagame(10, seed=1) + self.assertIsNotNone(meta_games) + # Metagame utility tensors for each player + for i in range(4): + print("player {}: \n{}".format(i + 1, meta_games[i])) + + def test_meta_game_leduc2p(self): + print("Leduc 2p") + game = pyspiel.load_game("leduc_poker") + xfp_solver = fictitious_play.XFPSolver(game, save_oracles=True) + for _ in range(3): + xfp_solver.iteration() + meta_games = xfp_solver.get_empirical_metagame(10, seed=86487) + self.assertIsNotNone(meta_games) + # Metagame utility matrices for each player + for i in range(2): + print("player {}: \n{}".format(i + 1, meta_games[i])) + + def test_matching_pennies_3p(self): + game = pyspiel.load_game_as_turn_based("matching_pennies_3p") + xfp_solver = fictitious_play.XFPSolver(game) + for i in range(1000): + xfp_solver.iteration() + if i % 10 == 0: + conv = exploitability.nash_conv(game, xfp_solver.average_policy()) + print("FP in Matching Pennies 3p. Iter: {}, NashConv: {}".format( + i, conv)) + + def test_shapleys_game(self): + game = pyspiel.load_game_as_turn_based("matrix_shapleys_game") + xfp_solver = fictitious_play.XFPSolver(game) + for i in range(1000): + xfp_solver.iteration() + if i % 10 == 0: + conv = exploitability.nash_conv(game, xfp_solver.average_policy()) + print("FP in Shapley's Game. Iter: {}, NashConv: {}".format(i, conv)) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/gambit.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/gambit.py new file mode 100644 index 0000000..9163cf2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/gambit.py @@ -0,0 +1,102 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Export game trees in gambit format. + +An exporter for the .efg format used by Gambit: +http://www.gambit-project.org/gambit14/formats.html + +See `examples/gambit_example.py` for an example of usage. + +""" + +import collections +import functools + + +def quote(x): + return f"\"{x}\"" + + +def export_gambit(game): + """Builds gambit representation of the game tree. + + Args: + game: A `pyspiel.Game` object. + + Returns: + string: Gambit tree + """ + players = " ".join([f"\"Pl{i}\"" for i in range(game.num_players())]) + ret = f"EFG 2 R {quote(game)} {{ {players} }} \n" + + terminal_idx = 1 + chance_idx = 1 + + # We will keep separate infoset idx per each player. + # Note that gambit infosets start at 1, but we start them here at 0 because + # they get incremented when accessed from infoset_tables below. + infoset_idx = [0] * game.num_players() + + def infoset_next_id(player): + nonlocal infoset_idx + infoset_idx[player] += 1 + return infoset_idx[player] + + infoset_tables = [ + collections.defaultdict(functools.partial(infoset_next_id, player)) + for player in range(game.num_players()) + ] + + def build_tree(state, depth): + nonlocal ret, terminal_idx, chance_idx, infoset_tables + + ret += " " * depth # add nice spacing + state_str = str(state) + if len(state_str) > 10: + state_str = "" + + if state.is_terminal(): + utils = " ".join(map(str, state.returns())) + ret += f"t {quote(state_str)} {terminal_idx} \"\" {{ {utils} }}\n" + terminal_idx += 1 + return + + if state.is_chance_node(): + ret += f"c {quote(state_str)} {chance_idx} \"\" {{ " + for action, prob in state.chance_outcomes(): + action_str = state.action_to_string(state.current_player(), action) + ret += f"{quote(action_str)} {prob:.16f} " + ret += " } 0\n" + chance_idx += 1 + + else: # player node + player = state.current_player() + gambit_player = player + 1 # cannot be indexed from 0 + infoset = state.information_state_string() + infoset_idx = infoset_tables[player][infoset] + + ret += f"p {quote(state_str)} {gambit_player} {infoset_idx} \"\" {{ " + for action in state.legal_actions(): + action_str = state.action_to_string(state.current_player(), action) + ret += f"{quote(action_str)} " + ret += " } 0\n" + + for action in state.legal_actions(): + child = state.child(action) + build_tree(child, depth + 1) + + build_tree(game.new_initial_state(), 0) + return ret diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/gambit_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/gambit_test.py new file mode 100644 index 0000000..65594ee --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/gambit_test.py @@ -0,0 +1,85 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test that gambit export can be imported back.""" + +import collections +import tempfile + +from absl import app +from absl.testing import absltest + +from open_spiel.python.algorithms.gambit import export_gambit +import pyspiel + + +class GambitTest(absltest.TestCase): + + def test_gambit_export_can_be_imported(self): + game_list = [ + "kuhn_poker", + "kuhn_poker(players=3)", + ] + for game_name in game_list: + game_orig = pyspiel.load_game(game_name) + gbt = export_gambit(game_orig) + f = tempfile.NamedTemporaryFile("w", delete=False) + f.write(gbt) + f.flush() + game_efg = pyspiel.load_game("efg_game(filename=%s)" % f.name) + f.close() + + self._infoset_table_orig = collections.defaultdict(lambda: []) + self._infoset_table_efg = collections.defaultdict(lambda: []) + self._recursive_check(game_orig.new_initial_state(), + game_efg.new_initial_state()) + + self._check_infoset_isomorphism(self._infoset_table_orig, + self._infoset_table_efg) + + def _recursive_check(self, g, h): + self.assertEqual(g.current_player(), h.current_player()) + self.assertEqual(g.is_chance_node(), h.is_chance_node()) + self.assertEqual(g.is_terminal(), h.is_terminal()) + if g.is_terminal(): + self.assertEqual(g.returns(), h.returns()) + return + + if g.is_chance_node(): + self.assertEqual(g.chance_outcomes(), h.chance_outcomes()) + else: + self.assertEqual(g.legal_actions(), h.legal_actions()) + self._infoset_table_orig[g.information_state_string()].append(g.history()) + self._infoset_table_efg[h.information_state_string()].append(h.history()) + + for a, b in zip(g.legal_actions(), h.legal_actions()): + self._recursive_check(g.child(a), h.child(b)) + + def _check_infoset_isomorphism(self, a, b): + a_prime = [] + b_prime = [] + for vs in a.values(): + a_prime.append(sorted([str(v) for v in vs])) + for vs in b.values(): + b_prime.append(sorted([str(v) for v in vs])) + self.assertCountEqual(a_prime, b_prime) + + +def main(_): + absltest.main() + + +if __name__ == "__main__": + # Necessary to run main via app.run for internal tests. + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/generate_playthrough.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/generate_playthrough.py new file mode 100644 index 0000000..b0c220e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/generate_playthrough.py @@ -0,0 +1,557 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Functions to manipulate game playthoughs. + +Used by examples/playthrough.py and tests/playthrough_test.py. + +Note that not all states are fully represented in the playthrough. +See the logic in ShouldDisplayStateTracker for details. +""" + +import collections +import os +import re +from typing import Optional + +from absl import flags +import numpy as np + +from open_spiel.python import games # pylint: disable=unused-import +from open_spiel.python.mfg import games as mfgs # pylint: disable=unused-import +from open_spiel.python.observation import make_observation +import pyspiel + +_USE_ACTION_IDS = flags.DEFINE_bool( + "playthough_use_action_ids", default=True, + help="Whether to use action names or ids when regenerating playthroughs") + +# Precision can differ depending on the system and context where the playthrough +# is generated versus where they are re-generated for testing purposes. To +# ensure that tests don't fail due to precision, we set the tolarance +# accordingly. +_FLOAT_DECIMAL_PLACES = 6 + + +def _escape(x): + """Returns a newline-free backslash-escaped version of the given string.""" + x = x.replace("\\", R"\\") + x = x.replace("\n", R"\n") + return x + + +def _format_value(v): + """Format a single value.""" + if v == 0: + return "◯" + elif v == 1: + return "◉" + else: + return ValueError("Values must all be 0 or 1") + + +def _format_vec(vec): + """Returns a readable format for a vector.""" + full_fmt = "".join(_format_value(v) for v in vec) + short_fmt = None + max_len = 250 + vec2int = lambda vec: int("".join("1" if b else "0" for b in vec), 2) + if len(vec) > max_len: + if all(v == 0 for v in vec): + short_fmt = f"zeros({len(vec)})" + elif all(v in (0, 1) for v in vec): + sz = (len(vec) + 15) // 16 + # To reconstruct the original vector: + # binvec = lambda n, x: [int(x) for x in f"{x:0>{n}b}"] + short_fmt = f"binvec({len(vec)}, 0x{vec2int(vec):0>{sz}x})" + if short_fmt and len(short_fmt) < len(full_fmt): + return short_fmt + else: + return full_fmt + + +def _format_matrix(mat): + return np.char.array([_format_vec(row) for row in mat]) + + +def _format_float(x): + return ("{:." + str(_FLOAT_DECIMAL_PLACES) + "g}").format(x) + + +def _format_float_vector(v): + return "[" + ", ".join([_format_float(x) for x in v]) + "]" + + +def _format_chance_outcomes(chance_outcomes): + return "[" + ", ".join(["({},{})".format(outcome, _format_float(prob)) + for (outcome, prob) in chance_outcomes]) + "]" + + +def _format_tensor(tensor, tensor_name, max_cols=120): + """Formats a tensor in an easy-to-view format as a list of lines.""" + if ((not tensor.shape) or (tensor.shape == (0,)) or (len(tensor.shape) > 3) or + not np.logical_or(tensor == 0, tensor == 1).all()): + vec = ", ".join(str(round(v, 5)) for v in tensor.ravel()) + return ["{} = [{}]".format(tensor_name, vec)] + elif len(tensor.shape) == 1: + return ["{}: {}".format(tensor_name, _format_vec(tensor))] + elif len(tensor.shape) == 2: + if len(tensor_name) + tensor.shape[1] + 2 < max_cols: + lines = ["{}: {}".format(tensor_name, _format_vec(tensor[0]))] + prefix = " " * (len(tensor_name) + 2) + else: + lines = ["{}:".format(tensor_name), _format_vec(tensor[0])] + prefix = "" + for row in tensor[1:]: + lines.append(prefix + _format_vec(row)) + return lines + elif len(tensor.shape) == 3: + lines = ["{}:".format(tensor_name)] + rows = [] + for m in tensor: + formatted_matrix = _format_matrix(m) + if (not rows) or (len(rows[-1][0] + formatted_matrix[0]) + 2 > max_cols): + rows.append(formatted_matrix) + else: + rows[-1] = rows[-1] + " " + formatted_matrix + for i, big_row in enumerate(rows): + if i > 0: + lines.append("") + for row in big_row: + lines.append("".join(row)) + return lines + + +def playthrough(game_string, + action_sequence, + alsologtostdout=False, + observation_params_string=None, + seed: Optional[int] = None): + """Returns a playthrough of the specified game as a single text. + + Actions are selected uniformly at random, including chance actions. + + Args: + game_string: string, e.g. 'markov_soccer', with possible optional params, + e.g. 'go(komi=4.5,board_size=19)'. + action_sequence: A (possibly partial) list of action choices to make. + alsologtostdout: Whether to also print the trace to stdout. This can be + useful when an error occurs, to still be able to get context information. + observation_params_string: Optional observation parameters for constructing + an observer. + seed: A(n optional) seed to initialize the random number generator from. + """ + lines = playthrough_lines(game_string, alsologtostdout, action_sequence, + observation_params_string, seed) + return "\n".join(lines) + "\n" + + +def format_shapes(d): + """Returns a string representing the shapes of a dict of tensors.""" + if len(d) == 1: + return str(list(d[min(d)].shape)) + else: + return ", ".join(f"{key}: {list(value.shape)}" for key, value in d.items()) + + +def _format_params(d, as_game=False): + """Format a collection of params.""" + + def fmt(val): + if isinstance(val, dict): + return _format_params(val, as_game=True) + else: + return _escape(str(val)) + + if as_game: + return d["name"] + "(" + ",".join( + "{}={}".format(key, fmt(value)) + for key, value in sorted(d.items()) + if key != "name") + ")" + else: + return "{" + ",".join( + "{}={}".format(key, fmt(value)) + for key, value in sorted(d.items())) + "}" + + +class ShouldDisplayStateTracker: + """Determines whether a state is interesting enough to display.""" + + def __init__(self): + self.states_by_player = collections.defaultdict(int) + + def __call__(self, state) -> bool: + """Returns True if a state is sufficiently interesting to display.""" + player = state.current_player() + count = self.states_by_player[player] + self.states_by_player[player] += 1 + if count == 0: + # Always display the first state for a player + return True + elif player == -1: + # For chance moves, display the first two only + return count < 2 + else: + # For regular player moves, display the first three and selected others + return (count < 3) or (count % 10 == 0) + + +def playthrough_lines(game_string, alsologtostdout=False, action_sequence=None, + observation_params_string=None, + seed: Optional[int] = None): + """Returns a playthrough of the specified game as a list of lines. + + Actions are selected uniformly at random, including chance actions. + + Args: + game_string: string, e.g. 'markov_soccer' or 'kuhn_poker(players=4)'. + alsologtostdout: Whether to also print the trace to stdout. This can be + useful when an error occurs, to still be able to get context information. + action_sequence: A (possibly partial) list of action choices to make. + observation_params_string: Optional observation parameters for constructing + an observer. + seed: A(n optional) seed to initialize the random number generator from. + """ + should_display_state_fn = ShouldDisplayStateTracker() + lines = [] + action_sequence = action_sequence or [] + should_display = True + + def add_line(v, force=False): + if force or should_display: + if alsologtostdout: + print(v) + lines.append(v) + + game = pyspiel.load_game(game_string) + add_line("game: {}".format(game_string)) + if observation_params_string: + add_line("observation_params: {}".format(observation_params_string)) + if seed is None: + seed = np.random.randint(2**32 - 1) + game_type = game.get_type() + + observation_params = ( + pyspiel.game_parameters_from_string(observation_params_string) + if observation_params_string + else None + ) + default_observation = make_observation( + game, + imperfect_information_observation_type=None, + params=observation_params, + ) + + infostate_observation = make_observation( + game, pyspiel.IIGObservationType(perfect_recall=True) + ) + + public_observation = None + private_observation = None + + # Instantiate factored observations only for imperfect information games, + # as it would yield unncessarily redundant information for perfect info games. + # The default observation is the same as the public observation, while private + # observations are always empty. + if game_type.information == game_type.Information.IMPERFECT_INFORMATION: + public_observation = make_observation( + game, + pyspiel.IIGObservationType( + public_info=True, + perfect_recall=False, + private_info=pyspiel.PrivateInfoType.NONE, + ), + ) + private_observation = make_observation( + game, + pyspiel.IIGObservationType( + public_info=False, + perfect_recall=False, + private_info=pyspiel.PrivateInfoType.SINGLE_PLAYER, + ), + ) + + add_line("") + add_line("GameType.chance_mode = {}".format(game_type.chance_mode)) + add_line("GameType.dynamics = {}".format(game_type.dynamics)) + add_line("GameType.information = {}".format(game_type.information)) + add_line("GameType.long_name = {}".format('"{}"'.format(game_type.long_name))) + add_line("GameType.max_num_players = {}".format(game_type.max_num_players)) + add_line("GameType.min_num_players = {}".format(game_type.min_num_players)) + add_line("GameType.parameter_specification = {}".format("[{}]".format( + ", ".join('"{}"'.format(param) + for param in sorted(game_type.parameter_specification))))) + add_line("GameType.provides_information_state_string = {}".format( + game_type.provides_information_state_string)) + add_line("GameType.provides_information_state_tensor = {}".format( + game_type.provides_information_state_tensor)) + add_line("GameType.provides_observation_string = {}".format( + game_type.provides_observation_string)) + add_line("GameType.provides_observation_tensor = {}".format( + game_type.provides_observation_tensor)) + add_line("GameType.provides_factored_observation_string = {}".format( + game_type.provides_factored_observation_string)) + add_line("GameType.reward_model = {}".format(game_type.reward_model)) + add_line("GameType.short_name = {}".format('"{}"'.format( + game_type.short_name))) + add_line("GameType.utility = {}".format(game_type.utility)) + + add_line("") + add_line("NumDistinctActions() = {}".format(game.num_distinct_actions())) + add_line("PolicyTensorShape() = {}".format(game.policy_tensor_shape())) + add_line("MaxChanceOutcomes() = {}".format(game.max_chance_outcomes())) + add_line("GetParameters() = {}".format(_format_params(game.get_parameters()))) + add_line("NumPlayers() = {}".format(game.num_players())) + add_line("MinUtility() = {:.5}".format(game.min_utility())) + add_line("MaxUtility() = {:.5}".format(game.max_utility())) + add_line("UtilitySum() = {}".format(game.utility_sum())) + if infostate_observation and infostate_observation.tensor is not None: + add_line("InformationStateTensorShape() = {}".format( + format_shapes(infostate_observation.dict))) + add_line("InformationStateTensorLayout() = {}".format( + game.information_state_tensor_layout())) + add_line("InformationStateTensorSize() = {}".format( + len(infostate_observation.tensor))) + if default_observation and default_observation.tensor is not None: + add_line("ObservationTensorShape() = {}".format( + format_shapes(default_observation.dict))) + add_line("ObservationTensorLayout() = {}".format( + game.observation_tensor_layout())) + add_line("ObservationTensorSize() = {}".format( + len(default_observation.tensor))) + add_line("MaxGameLength() = {}".format(game.max_game_length())) + add_line('ToString() = "{}"'.format(str(game))) + + players = list(range(game.num_players())) + # Arbitrarily pick the last possible initial states (for all games + # but multi-population MFGs, there will be a single initial state). + state = game.new_initial_states()[-1] + state_idx = 0 + rng = np.random.RandomState(seed) + + while True: + should_display = should_display_state_fn(state) + add_line("", force=True) + add_line("# State {}".format(state_idx), force=True) + for line in str(state).splitlines(): + add_line("# {}".format(line).rstrip()) + add_line("IsTerminal() = {}".format(state.is_terminal())) + add_line("History() = {}".format([int(a) for a in state.history()])) + add_line('HistoryString() = "{}"'.format(state.history_str())) + add_line("IsChanceNode() = {}".format(state.is_chance_node())) + add_line("IsSimultaneousNode() = {}".format(state.is_simultaneous_node())) + add_line("CurrentPlayer() = {}".format(state.current_player())) + if infostate_observation: + for player in players: + s = infostate_observation.string_from(state, player) + if s is not None: + add_line(f'InformationStateString({player}) = "{_escape(s)}"') + if infostate_observation and infostate_observation.tensor is not None: + for player in players: + infostate_observation.set_from(state, player) + for name, tensor in infostate_observation.dict.items(): + label = f"InformationStateTensor({player})" + label += f".{name}" if name != "info_state" else "" + for line in _format_tensor(tensor, label): + add_line(line) + if default_observation: + for player in players: + s = default_observation.string_from(state, player) + if s is not None: + add_line(f'ObservationString({player}) = "{_escape(s)}"') + if public_observation: + s = public_observation.string_from(state, 0) + if s is not None: + add_line('PublicObservationString() = "{}"'.format(_escape(s))) + for player in players: + s = private_observation.string_from(state, player) + if s is not None: + add_line(f'PrivateObservationString({player}) = "{_escape(s)}"') + if default_observation and default_observation.tensor is not None: + for player in players: + default_observation.set_from(state, player) + for name, tensor in default_observation.dict.items(): + label = f"ObservationTensor({player})" + label += f".{name}" if name != "observation" else "" + for line in _format_tensor(tensor, label): + add_line(line) + if game_type.chance_mode == pyspiel.GameType.ChanceMode.SAMPLED_STOCHASTIC: + add_line('SerializeState() = "{}"'.format(_escape(state.serialize()))) + if not state.is_chance_node(): + add_line("Rewards() = {}".format(_format_float_vector(state.rewards()))) + add_line("Returns() = {}".format(_format_float_vector(state.returns()))) + if state.is_terminal(): + break + if state.is_chance_node(): + add_line("ChanceOutcomes() = {}".format( + _format_chance_outcomes(state.chance_outcomes()))) + if state.is_mean_field_node(): + add_line("DistributionSupport() = {}".format( + state.distribution_support())) + num_states = len(state.distribution_support()) + state.update_distribution( + [1. / num_states] * num_states if num_states else []) + if state_idx < len(action_sequence): + assert action_sequence[state_idx] == "update_distribution", ( + f"Unexpected action at MFG node: {action_sequence[state_idx]}, " + f"state: {state}, action_sequence: {action_sequence}") + add_line("") + add_line("# Set mean field distribution to be uniform", force=True) + add_line("action: update_distribution", force=True) + elif state.is_simultaneous_node(): + for player in players: + add_line("LegalActions({}) = [{}]".format( + player, ", ".join(str(x) for x in state.legal_actions(player)))) + for player in players: + add_line("StringLegalActions({}) = [{}]".format( + player, ", ".join('"{}"'.format(state.action_to_string(player, x)) + for x in state.legal_actions(player)))) + if state_idx < len(action_sequence): + actions = action_sequence[state_idx] + for i, a in enumerate(actions): + if isinstance(a, str): + actions[i] = state.string_to_action(i, a) + else: + actions = [] + for pl in players: + legal_actions = state.legal_actions(pl) + actions.append(0 if not legal_actions else rng.choice(legal_actions)) + add_line("") + add_line("# Apply joint action [{}]".format( + format(", ".join( + '"{}"'.format(state.action_to_string(player, action)) + for player, action in enumerate(actions)))), force=True) + add_line("actions: [{}]".format(", ".join( + str(action) for action in actions)), force=True) + state.apply_actions(actions) + else: + add_line("LegalActions() = [{}]".format(", ".join( + str(x) for x in state.legal_actions()))) + add_line("StringLegalActions() = [{}]".format(", ".join( + '"{}"'.format(state.action_to_string(state.current_player(), x)) + for x in state.legal_actions()))) + if state_idx < len(action_sequence): + action = action_sequence[state_idx] + if isinstance(action, str): + action = state.string_to_action(state.current_player(), action) + else: + action = rng.choice(state.legal_actions()) + add_line("") + add_line('# Apply action "{}"'.format( + state.action_to_string(state.current_player(), action)), force=True) + add_line("action: {}".format(action), force=True) + state.apply_action(action) + state_idx += 1 + return lines + + +def content_lines(lines): + """Return lines with content.""" + return [line for line in lines if line and line[0] == "#"] + + +def _playthrough_params(lines): + """Returns the playthrough parameters from a playthrough record. + + Args: + lines: The playthrough as a list of lines. + + Returns: + A `dict` with entries: + game_string: string, e.g. 'markov_soccer'. + action_sequence: a list of action choices made in the playthrough. + Suitable for passing to playthrough to re-generate the playthrough. + + Raises: + ValueError if the playthrough is not valid. + """ + params = {"action_sequence": []} + use_action_ids = _USE_ACTION_IDS.value + for line in lines: + match_game = re.fullmatch(r"game: (.*)", line) + match_observation_params = re.fullmatch(r"observation_params: (.*)", line) + match_update_distribution = (line == "action: update_distribution") + if use_action_ids: + match_action = re.fullmatch(r"action: (.*)", line) + match_actions = re.fullmatch(r"actions: \[(.*)\]", line) + else: + match_action = re.fullmatch(r'# Apply action "(.*)"', line) + match_actions = re.fullmatch(r"# Apply joint action \[(.*)\]", line) + if match_game: + params["game_string"] = match_game.group(1) + elif match_observation_params: + params["observation_params_string"] = match_observation_params.group(1) + elif match_update_distribution: + params["action_sequence"].append("update_distribution") + elif match_action: + matched = match_action.group(1) + if use_action_ids: + params["action_sequence"].append(int(matched)) + else: + params["action_sequence"].append(matched) + elif match_actions: + if use_action_ids: + params["action_sequence"].append( + [int(x) for x in match_actions.group(1).split(", ")]) + else: + params["action_sequence"].append( + [x[1:-1] for x in match_actions.group(1).split(", ")]) + if "game_string" in params: + return params + raise ValueError("Could not find params") + + +def _read_playthrough(filename): + """Returns the content and the parsed arguments of a playthrough file.""" + with open(filename, "r", encoding="utf-8") as f: + original = f.read() + kwargs = _playthrough_params(original.splitlines()) + return original, kwargs + + +def replay(filename): + """Re-runs the playthrough in the specified file. Returns (original, new).""" + original, kwargs = _read_playthrough(filename) + return (original, playthrough(**kwargs)) + + +def update_path(path, shard_index=0, num_shards=1): + """Regenerates all playthroughs in the path.""" + if os.path.isfile(path): + file_list = [path] + else: + file_list = sorted(os.listdir(path)) + for filename in file_list[shard_index::num_shards]: + try: + original, kwargs = _read_playthrough(os.path.join(path, filename)) + try: + pyspiel.load_game(kwargs["game_string"]) + except pyspiel.SpielError as e: + if "Unknown game" in str(e): + print(f"\x1b[0J[Skipped] Skipping game {filename} as ", + f"{kwargs['game_string']} is not available.") + continue + else: + raise + new = playthrough(**kwargs) + if original == new: + print(f"\x1b[0J {filename}", end="\r") + else: + with open(os.path.join(path, filename), "w") as f: + f.write(new) + print(f"\x1b[0JUpdated {filename}") + except Exception as e: # pylint: disable=broad-except + print(f"\x1b[0J{filename} failed: {e}") + raise diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/generate_playthrough_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/generate_playthrough_test.py new file mode 100644 index 0000000..f3b373f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/generate_playthrough_test.py @@ -0,0 +1,74 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tests for open_spiel.python.algorithms.playthrough.""" + +from absl.testing import absltest +import numpy as np + +from open_spiel.python.algorithms import generate_playthrough + + +class PlaythroughTest(absltest.TestCase): + + def test_runs(self): + result = generate_playthrough.playthrough( + "tic_tac_toe", action_sequence=[0, 1, 2, 3, 4, 5, 6, 7, 8]) + self.assertNotEmpty(result) + + def test_format_tensor_1d(self): + lines = generate_playthrough._format_tensor(np.array((1, 0, 1, 1)), "x") + self.assertEqual(lines, ["x: ◉◯◉◉"]) + + def test_format_tensor_2d(self): + lines = generate_playthrough._format_tensor(np.array(((1, 0), (1, 1))), "x") + self.assertEqual(lines, [ + "x: ◉◯", + " ◉◉", + ]) + + def test_format_tensor_3d(self): + lines = [] + tensor = np.array(( + ((1, 0), (1, 1)), + ((0, 0), (1, 0)), + ((0, 1), (1, 0)), + )) + lines = generate_playthrough._format_tensor(tensor, "x") + self.assertEqual(lines, [ + "x:", + "◉◯ ◯◯ ◯◉", + "◉◉ ◉◯ ◉◯", + ]) + + def test_format_tensor_3d_linewrap(self): + tensor = np.array(( + ((1, 0), (1, 1)), + ((0, 0), (1, 0)), + ((0, 1), (1, 0)), + )) + lines = generate_playthrough._format_tensor(tensor, "x", max_cols=9) + self.assertEqual(lines, [ + "x:", + "◉◯ ◯◯", + "◉◉ ◉◯", + "", + "◯◉", + "◉◯", + ]) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/get_all_states.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/get_all_states.py new file mode 100644 index 0000000..0e450f9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/get_all_states.py @@ -0,0 +1,142 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example algorithm to get all states from a game. + +The algorithm does not support mean field games where the game evolution depends +on the mean field distribution. +""" + +import itertools + +from open_spiel.python import games # pylint:disable=unused-import +import pyspiel + + +def _get_subgames_states(state, all_states, depth_limit, depth, + include_terminals, include_chance_states, + include_mean_field_states, to_string, + stop_if_encountered): + """Extract non-chance states for a subgame into the all_states dict.""" + if state.is_terminal(): + if include_terminals: + # Include if not already present and then terminate recursion. + state_str = to_string(state) + if state_str not in all_states: + all_states[state_str] = state.clone() + return + + if depth > depth_limit >= 0: + return + is_mean_field = state.current_player() == pyspiel.PlayerId.MEAN_FIELD + if (state.is_chance_node() and + include_chance_states) or (is_mean_field and + include_mean_field_states) or not ( + state.is_chance_node() or is_mean_field): + # Add only if not already present + state_str = to_string(state) + if state_str not in all_states: + all_states[state_str] = state.clone() + else: + # We already saw this one. Stop the recursion if the flag is set + if stop_if_encountered: + return + + if is_mean_field: + support = state.distribution_support() + state_for_search = state.clone() + support_length = len(support) + # update with a dummy distribution + state_for_search.update_distribution( + [1.0 / support_length for _ in range(support_length)]) + _get_subgames_states(state_for_search, all_states, depth_limit, depth + 1, + include_terminals, include_chance_states, + include_mean_field_states, to_string, + stop_if_encountered) + elif state.is_simultaneous_node(): + joint_legal_actions = [ + state.legal_actions(player) + for player in range(state.get_game().num_players()) + ] + for joint_actions in itertools.product(*joint_legal_actions): + state_for_search = state.clone() + state_for_search.apply_actions(list(joint_actions)) + _get_subgames_states(state_for_search, all_states, depth_limit, depth + 1, + include_terminals, include_chance_states, + include_mean_field_states, to_string, + stop_if_encountered) + else: + for action in state.legal_actions(): + state_for_search = state.child(action) + _get_subgames_states(state_for_search, all_states, depth_limit, depth + 1, + include_terminals, include_chance_states, + include_mean_field_states, to_string, + stop_if_encountered) + + +def get_all_states(game, + depth_limit=-1, + include_terminals=True, + include_chance_states=False, + include_mean_field_states=False, + to_string=lambda s: s.history_str(), + stop_if_encountered=True): + """Gets all states in the game, indexed by their string representation. + + For small games only! Useful for methods that solve the games explicitly, + i.e. value iteration. Use this default implementation with caution as it does + a recursive tree walk of the game and could easily fill up memory for larger + games or games with long horizons. + + Currently only works for sequential games. + + Arguments: + game: The game to analyze, as returned by `load_game`. + depth_limit: How deeply to analyze the game tree. Negative means no limit, 0 + means root-only, etc. + include_terminals: If True, include terminal states. + include_chance_states: If True, include chance node states. + include_mean_field_states: If True, include mean field node states. + to_string: The serialization function. We expect this to be + `lambda s: s.history_str()` as this enforces perfect recall, but for + historical reasons, using `str` is also supported, but the goal is to + remove this argument. + stop_if_encountered: if this is set, do not keep recursively adding states + if this state is already in the list. This allows support for games that + have cycles. + + Returns: + A `dict` with `to_string(state)` keys and `pyspiel.State` values containing + all states encountered traversing the game tree up to the specified depth. + """ + root_states = game.new_initial_states() + all_states = dict() + + for root in root_states: + # Then, do a recursive tree walk to fill up the map. + _get_subgames_states( + state=root, + all_states=all_states, + depth_limit=depth_limit, + depth=0, + include_terminals=include_terminals, + include_chance_states=include_chance_states, + include_mean_field_states=include_mean_field_states, + to_string=to_string, + stop_if_encountered=stop_if_encountered) + + if not all_states: + raise ValueError("GetSubgameStates returned 0 states!") + + return all_states diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/get_all_states_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/get_all_states_test.py new file mode 100644 index 0000000..1792ea6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/get_all_states_test.py @@ -0,0 +1,72 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.get_all_states.""" + +from absl.testing import absltest + +from open_spiel.python.algorithms import get_all_states +import pyspiel + + +class GetAllStatesTest(absltest.TestCase): + + def test_tic_tac_toe_number_histories(self): + game = pyspiel.load_game("tic_tac_toe") + states = get_all_states.get_all_states( + game, + depth_limit=-1, + include_terminals=True, + include_chance_states=False, + to_string=lambda s: s.history_str()) + self.assertLen(states, 549946) + states = get_all_states.get_all_states( + game, + depth_limit=-1, + include_terminals=True, + include_chance_states=False, + to_string=str) + self.assertLen(states, 5478) + + def test_simultaneous_python_game_get_all_state(self): + game = pyspiel.load_game( + "python_iterated_prisoners_dilemma(max_game_length=6)") + states = get_all_states.get_all_states( + game, + depth_limit=-1, + include_terminals=True, + include_chance_states=False, + to_string=lambda s: s.history_str()) + self.assertLen(states, 10921) + states = get_all_states.get_all_states( + game, + depth_limit=-1, + include_terminals=True, + include_chance_states=False, + to_string=str) + self.assertLen(states, 5461) + + def test_simultaneous_game_get_all_state(self): + game = game = pyspiel.load_game("goofspiel", {"num_cards": 3}) + states = get_all_states.get_all_states( + game, + depth_limit=-1, + include_terminals=True, + include_chance_states=False, + to_string=lambda s: s.history_str()) + self.assertLen(states, 273) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/ismcts.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/ismcts.py new file mode 100644 index 0000000..bb86aad --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/ismcts.py @@ -0,0 +1,350 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""An implementation of Information Set Monte Carlo Tree Search (IS-MCTS). + +See Cowling, Powley, and Whitehouse 2011. +https://ieeexplore.ieee.org/document/6203567 +""" + +import copy +import enum +import numpy as np +import pyspiel + +UNLIMITED_NUM_WORLD_SAMPLES = -1 +UNEXPANDED_VISIT_COUNT = -1 +TIE_TOLERANCE = 1e-5 + + +class ISMCTSFinalPolicyType(enum.Enum): + """A enumeration class for final ISMCTS policy type.""" + NORMALIZED_VISITED_COUNT = 1 + MAX_VISIT_COUNT = 2 + MAX_VALUE = 3 + + +class ChildSelectionPolicy(enum.Enum): + """A enumeration class for children selection in ISMCTS.""" + UCT = 1 + PUCT = 2 + + +class ChildInfo(object): + """Child node information for the search tree.""" + + def __init__(self, visits, return_sum, prior): + self.visits = visits + self.return_sum = return_sum + self.prior = prior + + def value(self): + return self.return_sum / self.visits + + +class ISMCTSNode(object): + """Node data structure for the search tree.""" + + def __init__(self): + self.child_info = {} + self.total_visits = 0 + self.prior_map = {} + + +class ISMCTSBot(pyspiel.Bot): + """Adapted from the C++ implementation.""" + + def __init__(self, + game, + evaluator, + uct_c, + max_simulations, + max_world_samples=UNLIMITED_NUM_WORLD_SAMPLES, + random_state=None, + final_policy_type=ISMCTSFinalPolicyType.MAX_VISIT_COUNT, + use_observation_string=False, + allow_inconsistent_action_sets=False, + child_selection_policy=ChildSelectionPolicy.PUCT): + + pyspiel.Bot.__init__(self) + self._game = game + self._evaluator = evaluator + self._uct_c = uct_c + self._max_simulations = max_simulations + self._max_world_samples = max_world_samples + self._final_policy_type = final_policy_type + self._use_observation_string = use_observation_string + self._allow_inconsistent_action_sets = allow_inconsistent_action_sets + self._nodes = {} + self._node_pool = [] + self._root_samples = [] + self._random_state = random_state or np.random.RandomState() + self._child_selection_policy = child_selection_policy + self._resampler_cb = None + + def random_number(self): + return self._random_state.uniform() + + def reset(self): + self._nodes = {} + self._node_pool = [] + self._root_samples = [] + + def get_state_key(self, state): + if self._use_observation_string: + return state.current_player(), state.observation_string() + else: + return state.current_player(), state.information_state_string() + + def run_search(self, state): + self.reset() + assert state.get_game().get_type( + ).dynamics == pyspiel.GameType.Dynamics.SEQUENTIAL + assert state.get_game().get_type( + ).information == pyspiel.GameType.Information.IMPERFECT_INFORMATION + + legal_actions = state.legal_actions() + if len(legal_actions) == 1: + return [(legal_actions[0], 1.0)] + + self._root_node = self.create_new_node(state) + + assert self._root_node + + root_infostate_key = self.get_state_key(state) + + for _ in range(self._max_simulations): + # how to sample a pyspiel.state from another pyspiel.state? + sampled_root_state = self.sample_root_state(state) + assert root_infostate_key == self.get_state_key(sampled_root_state) + assert sampled_root_state + self.run_simulation(sampled_root_state) + + if self._allow_inconsistent_action_sets: # when this happens? + legal_actions = state.legal_actions() + temp_node = self.filter_illegals(self._root_node, legal_actions) + assert temp_node.total_visits > 0 + return self.get_final_policy(state, temp_node) + else: + return self.get_final_policy(state, self._root_node) + + def step(self, state): + action_list, prob_list = zip(*self.run_search(state)) + return self._random_state.choice(action_list, p=prob_list) + + def get_policy(self, state): + return self.run_search(state) + + def step_with_policy(self, state): + policy = self.get_policy(state) + action_list, prob_list = zip(*policy) + sampled_action = self._random_state.choice(action_list, p=prob_list) + return policy, sampled_action + + def get_final_policy(self, state, node): + assert node + if ( + self._final_policy_type + == ISMCTSFinalPolicyType.NORMALIZED_VISITED_COUNT + ): + assert node.total_visits > 0 + total_visits = node.total_visits + policy = [ + (action, child.visits / total_visits) + for action, child in node.child_info.items() + ] + elif self._final_policy_type == ISMCTSFinalPolicyType.MAX_VISIT_COUNT: + assert node.total_visits > 0 + max_visits = -float('inf') + count = 0 + for action, child in node.child_info.items(): + if child.visits == max_visits: + count += 1 + elif child.visits > max_visits: + max_visits = child.visits + count = 1 + policy = [(action, 1. / count if child.visits == max_visits else 0.0) + for action, child in node.child_info.items()] + elif self._final_policy_type == ISMCTSFinalPolicyType.MAX_VALUE: + assert node.total_visits > 0 + max_value = -float('inf') + count = 0 + for action, child in node.child_info.items(): + if child.value() == max_value: + count += 1 + elif child.value() > max_value: + max_value = child.value() + count = 1 + policy = [(action, 1. / count if child.value() == max_value else 0.0) + for action, child in node.child_info.items()] + + policy_size = len(policy) + legal_actions = state.legal_actions() + if policy_size < len(legal_actions): # do we really need this step? + for action in legal_actions: + if action not in node.child_info: + policy.append((action, 0.0)) + return policy + + def sample_root_state(self, state): + if self._max_world_samples == UNLIMITED_NUM_WORLD_SAMPLES: + return self.resample_from_infostate(state) + elif len(self._root_samples) < self._max_world_samples: + self._root_samples.append(self.resample_from_infostate(state)) + return self._root_samples[-1].clone() + elif len(self._root_samples) == self._max_world_samples: + idx = self._random_state.randint(len(self._root_samples)) + return self._root_samples[idx].clone() + else: + raise pyspiel.SpielError( + 'Case not handled (badly set max_world_samples..?)') + + def resample_from_infostate(self, state): + if self._resampler_cb: + return self._resampler_cb(state, state.current_player()) + else: + return state.resample_from_infostate( + state.current_player(), pyspiel.UniformProbabilitySampler(0., 1.)) + + def create_new_node(self, state): + infostate_key = self.get_state_key(state) + self._node_pool.append(ISMCTSNode()) + node = self._node_pool[-1] + self._nodes[infostate_key] = node + node.total_visits = UNEXPANDED_VISIT_COUNT + return node + + def set_resampler(self, cb): + self._resampler_cb = cb + + def lookup_node(self, state): + if self.get_state_key(state) in self._nodes: + return self._nodes[self.get_state_key(state)] + return None + + def lookup_or_create_node(self, state): + node = self.lookup_node(state) + if node: + return node + return self.create_new_node(state) + + def filter_illegals(self, node, legal_actions): + new_node = copy.deepcopy(node) + for action, child in node.child_info.items(): + if action not in legal_actions: + new_node.total_visits -= child.visits + del new_node.child_info[action] + return new_node + + def expand_if_necessary(self, node, action): + if action not in node.child_info: + node.child_info[action] = ChildInfo(0.0, 0.0, node.prior_map[action]) + + def select_action_tree_policy(self, node, legal_actions): + if self._allow_inconsistent_action_sets: + temp_node = self.filter_illegals(node, legal_actions) + if temp_node.total_visits == 0: + action = legal_actions[self._random_state.randint( + len(legal_actions))] # prior? + self.expand_if_necessary(node, action) + return action + else: + return self.select_action(temp_node) + else: + return self.select_action(node) + + def _action_value(self, node, child): + assert child.visits > 0 + action_value = child.value() + if self._child_selection_policy == ChildSelectionPolicy.UCT: + action_value += self._uct_c * np.sqrt( + np.log(node.total_visits) / child.visits + ) + elif self._child_selection_policy == ChildSelectionPolicy.PUCT: + action_value += ( + self._uct_c + * child.prior + * np.sqrt(node.total_visits) + / (1 + child.visits) + ) + else: + raise pyspiel.SpielError('Child selection policy unrecognized.') + return action_value + + def _select_candidate_actions(self, node): + candidates = [] + + max_action_value = max( + [self._action_value(node, child) for child in node.child_info.values()] + ) + + # Select all the actions within the tolerance of the best action. + for action, child in node.child_info.items(): + if self._action_value(node, child) > max_action_value - TIE_TOLERANCE: + candidates.append(action) + + return candidates + + def select_action(self, node): + candidates = self._select_candidate_actions(node) + assert len(candidates) >= 1 + return candidates[self._random_state.randint(len(candidates))] + + def check_expand(self, node, legal_actions): + if not self._allow_inconsistent_action_sets and len( + node.child_info) == len(legal_actions): + return pyspiel.INVALID_ACTION + legal_actions_copy = copy.deepcopy(legal_actions) + self._random_state.shuffle(legal_actions_copy) + for action in legal_actions_copy: + if action not in node.child_info: + return action + return pyspiel.INVALID_ACTION + + def run_simulation(self, state): + if state.is_terminal(): + return state.returns() + elif state.is_chance_node(): + action_list, prob_list = zip(*state.chance_outcomes()) + chance_action = self._random_state.choice(action_list, p=prob_list) + state.apply_action(chance_action) + return self.run_simulation(state) + legal_actions = state.legal_actions() + cur_player = state.current_player() + node = self.lookup_or_create_node(state) + + assert node + + if node.total_visits == UNEXPANDED_VISIT_COUNT: + node.total_visits = 0 + for action, prob in self._evaluator.prior(state): + node.prior_map[action] = prob + return self._evaluator.evaluate(state) + else: + chosen_action = self.check_expand( + node, legal_actions) # add one children at a time? + if chosen_action != pyspiel.INVALID_ACTION: + # check if all actions have been expanded, if not, select one? + # if yes, ucb? + self.expand_if_necessary(node, chosen_action) + else: + chosen_action = self.select_action_tree_policy(node, legal_actions) + + assert chosen_action != pyspiel.INVALID_ACTION + + node.total_visits += 1 + node.child_info[chosen_action].visits += 1 + state.apply_action(chosen_action) + returns = self.run_simulation(state) + node.child_info[chosen_action].return_sum += returns[cur_player] + return returns diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/ismcts_agent_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/ismcts_agent_test.py new file mode 100644 index 0000000..e3053ac --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/ismcts_agent_test.py @@ -0,0 +1,60 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test the IS-MCTS Agent.""" + +from absl.testing import absltest +from absl.testing import parameterized +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import ismcts +from open_spiel.python.algorithms import mcts +from open_spiel.python.algorithms import mcts_agent + + +class MCTSAgentTest(parameterized.TestCase): + + @parameterized.named_parameters( + dict(testcase_name="tic_tac_toe", game_string="kuhn_poker"), + dict(testcase_name="leduc_poker", game_string="leduc_poker"), + ) + def test_self_play_episode(self, game_string: str): + env = rl_environment.Environment(game_string, include_full_state=True) + num_players = env.num_players + num_actions = env.action_spec()["num_actions"] + + # Create the MCTS bot. Both agents can share the same bot in this case since + # there is no state kept between searches. See mcts.py for more info about + # the arguments. + ismcts_bot = ismcts.ISMCTSBot( + game=env.game, + uct_c=1.5, + max_simulations=100, + evaluator=mcts.RandomRolloutEvaluator()) + + agents = [ + mcts_agent.MCTSAgent( + player_id=idx, num_actions=num_actions, mcts_bot=ismcts_bot) + for idx in range(num_players) + ] + + time_step = env.reset() + while not time_step.last(): + player_id = time_step.observations["current_player"] + agent_output = agents[player_id].step(time_step) + time_step = env.step([agent_output.action]) + for agent in agents: + agent.step(time_step) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/ismcts_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/ismcts_test.py new file mode 100644 index 0000000..d02b0f8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/ismcts_test.py @@ -0,0 +1,101 @@ +# Copyright 2025 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np +from open_spiel.python.algorithms import ismcts +from open_spiel.python.algorithms import mcts +import pyspiel + + +NOLIMIT_GAME_STRING = ( + "universal_poker(betting=nolimit,bettingAbstraction=fullgame,blind=50 100," + "firstPlayer=1 1,numBoardCards=0 3 1 1,numHoleCards=2,numPlayers=2," + "numRanks=13,numRounds=2,numSuits=4,stack=20000 20000)" +) + + +class IsmctsTest(parameterized.TestCase): + + def test_action_candidates_selection(self): + ismcts_bot = ismcts.ISMCTSBot( + game=None, + uct_c=1.0, + evaluator=None, + max_simulations=10, + ) + + # Test that the tie tolerance is respected. + node = ismcts.ISMCTSNode() + node.child_info = { + 0: ismcts.ChildInfo(visits=1, return_sum=7.0, prior=1.0), + 1: ismcts.ChildInfo( + visits=1, return_sum=7.0 - ismcts.TIE_TOLERANCE / 2.0, prior=1.0 + ), + 2: ismcts.ChildInfo( + visits=1, return_sum=7.0 - ismcts.TIE_TOLERANCE * 2.0, prior=1.0 + ), + } + node.total_visits = 4 + self.assertAlmostEqual( + ismcts_bot._action_value(node, node.child_info[0]).item(), 8.0 + ) + candidates = ismcts_bot._select_candidate_actions(node) + self.assertLen(candidates, 2) + + # Child 0 and 1 are selected because they are within the tie tolerance. + self.assertIn(0, candidates) + self.assertIn(1, candidates) + + # Child 2 is not selected because it is outside the tie tolerance. + self.assertNotIn(2, candidates) + + def play_game(self, game: pyspiel.Game, ismcts_bot: ismcts.ISMCTSBot): + state = game.new_initial_state() + while not state.is_terminal(): + if state.is_chance_node(): + outcomes = state.chance_outcomes() + action_list, prob_list = zip(*outcomes) + action = np.random.choice(action_list, p=prob_list) + state.apply_action(action) + else: + action = ismcts_bot.step(state) + state.apply_action(action) + + def test_play_kuhn_poker(self): + game = pyspiel.load_game("kuhn_poker") + ismcts_bot = ismcts.ISMCTSBot( + game=game, + uct_c=4.0, + evaluator=mcts.RandomRolloutEvaluator(), + max_simulations=10, + ) + self.play_game(game, ismcts_bot) + + @absltest.skip("Skipping. This one does not work.") + def test_play_universal_poker(self): + if "universal_poker" in pyspiel.registered_names(): + game = pyspiel.load_game(NOLIMIT_GAME_STRING) + ismcts_bot = ismcts.ISMCTSBot( + game=game, + uct_c=4.0, + evaluator=mcts.RandomRolloutEvaluator(), + max_simulations=10, + ) + self.play_game(game, ismcts_bot) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/jpsro.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/jpsro.py new file mode 100644 index 0000000..0158b76 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/jpsro.py @@ -0,0 +1,1555 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Joint Policy-Space Response Oracles. + +An implementation of JSPRO, described in https://arxiv.org/abs/2106.09435. + +Bibtex / Cite: + +``` +@misc{marris2021multiagent, + title={Multi-Agent Training beyond Zero-Sum with Correlated Equilibrium + Meta-Solvers}, + author={Luke Marris and Paul Muller and Marc Lanctot and Karl Tuyls and + Thore Graepel}, + year={2021}, + eprint={2106.09435}, + archivePrefix={arXiv}, + primaryClass={cs.MA} +} +``` +""" + +import itertools +import string + +from absl import logging + +import cvxpy as cp +import numpy as np +import scipy as sp + +from open_spiel.python import policy +from open_spiel.python.algorithms import projected_replicator_dynamics +from open_spiel.python.egt import alpharank as alpharank_lib +import pyspiel + + +DEFAULT_ECOS_SOLVER_KWARGS = dict( + solver="ECOS", + max_iters=100000000, + abstol=1e-7, + reltol=1e-7, + feastol=1e-7, + abstol_inacc=1e-7, + reltol_inacc=1e-7, + feastol_inacc=1e-7, + verbose=False, +) +DEFAULT_OSQP_SOLVER_KWARGS = dict( + solver="OSQP", + max_iter=1000000000, + eps_abs=1e-8, + eps_rel=1e-8, + eps_prim_inf=1e-8, + eps_dual_inf=1e-8, + polish_refine_iter=100, + check_termination=1000, + sigma=1e-7, # Default 1e-6 + delta=1e-7, # Default 1e-06 + verbose=False, +) +DEFAULT_CVXOPT_SOLVER_KWARGS = dict( + solver="CVXOPT", + maxiters=200000, + abstol=5e-8, + reltol=5e-8, + feastol=5e-8, + refinement=10, + verbose=False, +) +INIT_POLICIES = ( + "uniform", # Unopinionated but slower to evaluate. + "random_deterministic", # Faster to evaluate but requires samples. +) +UPDATE_PLAYERS_STRATEGY = ( + "all", + "cycle", + "random", +) +BRS = ( + "cce", + "ce", +) +BR_SELECTIONS = ( + "all", # All policies. + "all_novel", # All novel policies. + "random", # Random. + "random_novel", # Random novel BR (one that has not be considered before). + "largest_gap", # The BR with the largest gap. +) +META_SOLVERS = ( + "uni", # Uniform. + "undominated_uni", # Uniform over undominated strategies. + "rj", # Random joint. + "undominated_rj", # Random joint. + "rd", # Random dirichlet. + "undominated_rd", # Random dirichlet. + "prd", # Prodected replicator dynamics. + "alpharank", # Alpha-Rank + "mgce", # Maximum gini CE. + "min_epsilon_mgce", # Min Epsilon Maximum gini CE. + "approx_mgce", # Approximate Maximum gini CE. + "rmwce", # Random maximum welfare CE. + "mwce", # Maximum welfare CE. + "rvce", # Random vertex CE. + "mgcce", # Maximum gini CCE. + "min_epsilon_mgcce", # Min Epsilon Maximum gini CCE. + "approx_mgcce", # Approximate Maximum gini CE. + "rmwcce", # Random maximum welfare CCE. + "mwcce", # Maximum welfare CCE. + "rvcce", # Random vertex CCE. +) +LOG_STRING = """ +Iteration {iteration: 6d} +=== ({game}) +Player {player} +BRs {brs} +Num Policies {num_policies} +Unique Policies {unique} +--- ({train_meta_solver}) +Train Value {train_value} +Train Gap {train_gap} +--- ({eval_meta_solver}) +Eval Value {eval_value} +Eval Gap {eval_gap} +""" +DIST_TOL = 1e-8 +GAP_TOL = 1e-8 +RETURN_TOL = 1e-12 + + +## Meta Solvers. + + +# Helper Functions - Dominated strategy elimination. +def _eliminate_dominated_payoff( + payoff, epsilon, action_labels=None, action_repeats=None, weakly=False): + """Eliminate epsilon dominated strategies.""" + num_players = payoff.shape[0] + eliminated = True + if action_labels is None: + action_labels = [np.arange(na, dtype=np.int32) for na in payoff.shape[1:]] + if action_repeats is not None: + action_repeats = [ar for ar in action_repeats] + while eliminated: + eliminated = False + for p in range(num_players): + if epsilon > 0.0: + continue + num_actions = payoff.shape[1:] + if num_actions[p] <= 1: + continue + for a in range(num_actions[p]): + index = [slice(None) for _ in range(num_players)] + index[p] = slice(a, a+1) + if weakly: + diff = payoff[p] <= payoff[p][tuple(index)] + else: + diff = payoff[p] < payoff[p][tuple(index)] + axis = tuple(range(p)) + tuple(range(p+1, num_players)) + less = np.all(diff, axis=axis) + less[a] = False # Action cannot eliminate itself. + if np.any(less): + nonzero = np.nonzero(less) + payoff = np.delete(payoff, nonzero, axis=p+1) + action_labels[p] = np.delete(action_labels[p], nonzero) + if action_repeats is not None: + action_repeats[p] = np.delete(action_repeats[p], nonzero) + eliminated = True + break + return payoff, action_labels, action_repeats + + +def _reconstruct_dist(eliminated_dist, action_labels, num_actions): + """Returns reconstructed dist from eliminated_dist and action_labels. + + Redundant dist elements are given values 0. + + Args: + eliminated_dist: Array of shape [A0E, A1E, ...]. + action_labels: List of length N and shapes [[A0E], [A1E], ...]. + num_actions: List of length N and values [A0, A1, ...]. + + Returns: + reconstructed_dist: Array of shape [A0, A1, ...]. + """ + reconstructed_payoff = np.zeros(num_actions) + reconstructed_payoff[np.ix_(*action_labels)] = eliminated_dist + return reconstructed_payoff + + +def _eliminate_dominated_decorator(func): + """Wrap eliminate dominated.""" + def wrapper(payoff, per_player_repeats, *args, eliminate_dominated=True, + **kwargs): + epsilon = getattr(kwargs, "epsilon", 0.0) + if not eliminate_dominated: + return func(payoff, *args, **kwargs) + num_actions = payoff.shape[1:] + (eliminated_payoff, action_labels, eliminated_action_repeats) = ( + _eliminate_dominated_payoff( + payoff, epsilon, action_repeats=per_player_repeats + ) + ) + eliminated_dist, meta = func( + eliminated_payoff, eliminated_action_repeats, *args, **kwargs) + meta["eliminated_dominated_dist"] = eliminated_dist + meta["eliminated_dominated_payoff"] = eliminated_payoff + dist = _reconstruct_dist( + eliminated_dist, action_labels, num_actions) + return dist, meta + return wrapper + + +# Optimization. +def _try_two_solvers(func, *args, **kwargs): + try: + logging.debug("Trying CVXOPT.", flush=True) + kwargs_ = {"solver_kwargs": DEFAULT_CVXOPT_SOLVER_KWARGS, **kwargs} + res = func(*args, **kwargs_) + except: # pylint: disable=bare-except + logging.debug("CVXOPT failed. Trying OSQP.", flush=True) + kwargs_ = {"solver_kwargs": DEFAULT_OSQP_SOLVER_KWARGS, **kwargs} + res = func(*args, **kwargs_) + return res + + +# Helper Functions - CCEs. +def _indices(p, a, num_players): + return [a if p_ == p else slice(None) for p_ in range(num_players)] + + +def _sparse_indices_generator(player, action, num_actions): + indices = [(action,) if p == player else range(na) + for p, na in enumerate(num_actions)] + return itertools.product(*indices) + + +def _partition_by_player(val, p_vec, num_players): + """Partitions a value by the players vector.""" + parts = [] + for p in range(num_players): + inds = p_vec == p + if inds.size > 0: + parts.append(val[inds]) + else: + parts.append(None) + return parts + + +def _cce_constraints(payoff, epsilons, remove_null=True, zero_tolerance=1e-8): + """Returns the coarse correlated constraints. + + Args: + payoff: A [NUM_PLAYER, NUM_ACT_0, NUM_ACT_1, ...] shape payoff tensor. + epsilons: Per player floats corresponding to the epsilon. + remove_null: Remove null rows of the constraint matrix. + zero_tolerance: Zero out elements with small value. + + Returns: + a_mat: The gain matrix for deviting to an action or shape [SUM(A), PROD(A)]. + meta: Dictionary containing meta information. + """ + num_players = payoff.shape[0] + num_actions = payoff.shape[1:] + num_dists = int(np.prod(num_actions)) + + cor_cons = int(np.sum(num_actions)) + + a_mat = np.zeros([cor_cons] + list(num_actions)) + p_vec = np.zeros([cor_cons], dtype=np.int32) + i_vec = np.zeros([cor_cons], dtype=np.int32) + con = 0 + for p in range(num_players): + for a1 in range(num_actions[p]): + a1_inds = tuple(_indices(p, a1, num_players)) + for a0 in range(num_actions[p]): + a0_inds = tuple(_indices(p, a0, num_players)) + a_mat[con][a0_inds] += payoff[p][a1_inds] + a_mat[con] -= payoff[p] + a_mat[con] -= epsilons[p] + + p_vec[con] = p + i_vec[con] = a0 + + con += 1 + + a_mat = np.reshape(a_mat, [cor_cons, num_dists]) + a_mat[np.abs(a_mat) < zero_tolerance] = 0.0 + if remove_null: + null_cons = np.any(a_mat != 0.0, axis=-1) + redundant_cons = np.max(a_mat, axis=1) >= 0 + nonzero_mask = null_cons & redundant_cons + a_mat = a_mat[nonzero_mask, :].copy() + p_vec = p_vec[nonzero_mask].copy() + i_vec = i_vec[nonzero_mask].copy() + + meta = dict( + p_vec=p_vec, + i_vec=i_vec, + epsilons=epsilons, + ) + + return a_mat, meta + + +def _ace_constraints(payoff, epsilons, remove_null=True, zero_tolerance=0.0): + """Returns sparse alternate ce constraints Ax - epsilon <= 0. + + Args: + payoff: Dense payoff tensor. + epsilons: Scalar epsilon approximation. + remove_null: Whether to remove null row constraints. + zero_tolerance: Smallest absolute value. + + Returns: + a_csr: Sparse gain matrix from switching from one action to another. + e_vec: Epsilon vector. + meta: Dictionary containing meta information. + """ + num_players = payoff.shape[0] + num_actions = payoff.shape[1:] + num_dists = int(np.prod(num_actions)) + + num_cons = 0 + for p in range(num_players): + num_cons += num_actions[p] * (num_actions[p] - 1) + + a_dok = sp.sparse.dok_matrix((num_cons, num_dists)) + e_vec = np.zeros([num_cons]) + p_vec = np.zeros([num_cons], dtype=np.int32) + i_vec = np.zeros([num_cons, 2], dtype=np.int32) + + num_null_cons = None + num_redundant_cons = None + num_removed_cons = None + + if num_cons > 0: + con = 0 + for p in range(num_players): + generator = itertools.permutations(range(num_actions[p]), 2) + for a0, a1 in generator: + a0_inds = _sparse_indices_generator(p, a0, num_actions) + a1_inds = _sparse_indices_generator(p, a1, num_actions) + + for a0_ind, a1_ind in zip(a0_inds, a1_inds): + a0_ind_flat = np.ravel_multi_index(a0_ind, num_actions) + val = payoff[p][a1_ind] - payoff[p][a0_ind] + if abs(val) > zero_tolerance: + a_dok[con, a0_ind_flat] = val + + e_vec[con] = epsilons[p] + p_vec[con] = p + i_vec[con] = [a0, a1] + con += 1 + + a_csr = a_dok.tocsr() + if remove_null: + null_cons = np.logical_or( + a_csr.max(axis=1).todense() != 0.0, + a_csr.min(axis=1).todense() != 0.0) + null_cons = np.ravel(null_cons) + redundant_cons = np.ravel(a_csr.max(axis=1).todense()) >= e_vec + nonzero_mask = null_cons & redundant_cons + a_csr = a_csr[nonzero_mask, :] + e_vec = e_vec[nonzero_mask].copy() + p_vec = p_vec[nonzero_mask].copy() + i_vec = i_vec[nonzero_mask].copy() + num_null_cons = np.sum(~null_cons) + num_redundant_cons = np.sum(~redundant_cons) + num_removed_cons = np.sum(~nonzero_mask) + + else: + a_csr = a_dok.tocsr() + + meta = dict( + p_vec=p_vec, + i_vec=i_vec, + epsilons=epsilons, + num_null_cons=num_null_cons, + num_redundant_cons=num_redundant_cons, + num_removed_cons=num_removed_cons, + ) + + return a_csr, e_vec, meta + + +def _get_repeat_factor(action_repeats): + """Returns the repeat factors for the game.""" + num_players = len(action_repeats) + out_labels = string.ascii_lowercase[:len(action_repeats)] + in_labels = ",".join(out_labels) + repeat_factor = np.ravel(np.einsum( + "{}->{}".format(in_labels, out_labels), *action_repeats)) + indiv_repeat_factors = [] + for player in range(num_players): + action_repeats_ = [ + np.ones_like(ar) if player == p else ar + for p, ar in enumerate(action_repeats)] + indiv_repeat_factor = np.ravel(np.einsum( + "{}->{}".format(in_labels, out_labels), *action_repeats_)) + indiv_repeat_factors.append(indiv_repeat_factor) + return repeat_factor, indiv_repeat_factors + + +# Solvers. +def _linear( + payoff, + a_mat, + e_vec, + action_repeats=None, + solver_kwargs=None, + cost=None): + """Returns linear solution. + + This is a linear program. + + Args: + payoff: A [NUM_PLAYER, NUM_ACT_0, NUM_ACT_1, ...] shape payoff tensor. + a_mat: Constaint matrix. + e_vec: Epsilon vector. + action_repeats: List of action repeat counts. + solver_kwargs: Solver kwargs. + cost: Cost function of same shape as payoff. + + Returns: + An epsilon-correlated equilibrium. + """ + num_players = payoff.shape[0] + num_actions = payoff.shape[1:] + num_dists = int(np.prod(num_actions)) + + if solver_kwargs is None: + solver_kwargs = DEFAULT_ECOS_SOLVER_KWARGS + + if a_mat.shape[0] > 0: + # Variables. + x = cp.Variable(num_dists, nonneg=True) + + # Classifier. + epsilon_dists = cp.matmul(a_mat, x) - e_vec + + # Constraints. + dist_eq_con = cp.sum(x) == 1 + cor_lb_con = epsilon_dists <= 0 + + # Objective. + if cost is None: + player_totals = [ + cp.sum(cp.multiply(payoff[p].flat, x)) for p in range(num_players)] + reward = cp.sum(player_totals) + else: + reward = cp.sum(cp.multiply(cost.flat, x)) + obj = cp.Maximize(reward) + + prob = cp.Problem(obj, [ + dist_eq_con, + cor_lb_con, + ]) + + # Solve. + prob.solve(**solver_kwargs) + status = prob.status + + # Distribution. + dist = np.reshape(x.value, num_actions) + + # Other. + val = reward.value + else: + if action_repeats is not None: + repeat_factor, _ = _get_repeat_factor(action_repeats) + x = repeat_factor / np.sum(repeat_factor) + else: + x = np.ones([num_dists]) / num_dists + val = 0.0 # Fix me. + dist = np.reshape(x, num_actions) + status = None + + meta = dict( + x=x, + a_mat=a_mat, + val=val, + status=status, + payoff=payoff, + consistent=True, + unique=False, + ) + + return dist, meta + + +def _qp_cce( + payoff, + a_mats, + e_vecs, + assume_full_support=False, + action_repeats=None, + solver_kwargs=None, + min_epsilon=False): + """Returns the correlated equilibrium with maximum Gini impurity. + + Args: + payoff: A [NUM_PLAYER, NUM_ACT_0, NUM_ACT_1, ...] shape payoff tensor. + a_mats: A [NUM_CON, PROD(A)] shape gain tensor. + e_vecs: Epsilon vector. + assume_full_support: Whether to ignore beta values. + action_repeats: Vector of action repeats for each player. + solver_kwargs: Additional kwargs for solver. + min_epsilon: Whether to minimize epsilon. + + Returns: + An epsilon-correlated equilibrium. + """ + num_players = payoff.shape[0] + num_actions = payoff.shape[1:] + num_dists = int(np.prod(num_actions)) + + if solver_kwargs is None: + solver_kwargs = DEFAULT_OSQP_SOLVER_KWARGS + + epsilon = None + nonzero_cons = [a_mat.shape[0] > 0 for a_mat in a_mats if a_mat is not None] + if any(nonzero_cons): + x = cp.Variable(num_dists, nonneg=(not assume_full_support)) + if min_epsilon: + epsilon = cp.Variable(nonpos=True) + e_vecs = [epsilon] * num_players + + if action_repeats is not None: + repeat_factor, _ = _get_repeat_factor(action_repeats) + x_repeated = cp.multiply(x, repeat_factor) + dist_eq_con = cp.sum(x_repeated) == 1 + cor_lb_cons = [ + cp.matmul(a_mat, cp.multiply(x, repeat_factor)) <= e_vec + for a_mat, e_vec in + zip(a_mats, e_vecs) if a_mat.size > 0] + eye = sp.sparse.diags(repeat_factor) + else: + repeat_factor = 1 + x_repeated = x + dist_eq_con = cp.sum(x_repeated) == 1 + cor_lb_cons = [ + cp.matmul(a_mat, x) <= e_vec for a_mat, e_vec in + zip(a_mats, e_vecs) if a_mat.size > 0] + eye = sp.sparse.eye(num_dists) + + # This is more memory efficient than using cp.sum_squares. + cost = 1 - cp.quad_form(x, eye) + if min_epsilon: + cost -= cp.multiply(2, epsilon) + + obj = cp.Maximize(cost) + prob = cp.Problem(obj, [dist_eq_con] + cor_lb_cons) + cost_value = prob.solve(**solver_kwargs) + status = prob.status + alphas = [cor_lb_con.dual_value for cor_lb_con in cor_lb_cons] + lamb = dist_eq_con.dual_value + + val = cost.value + x = x_repeated.value + dist = np.reshape(x, num_actions) + else: + cost_value = 0.0 + val = 1 - 1 / num_dists + if action_repeats is not None: + repeat_factor, _ = _get_repeat_factor(action_repeats) + x = repeat_factor / np.sum(repeat_factor) + else: + x = np.ones([num_dists]) / num_dists + dist = np.reshape(x, num_actions) + status = None + alphas = [np.zeros([])] + lamb = None + + meta = dict( + x=x, + a_mats=a_mats, + status=status, + cost=cost_value, + val=val, + alphas=alphas, + lamb=lamb, + unique=True, + min_epsilon=None if epsilon is None else epsilon.value, + ) + return dist, meta + + +def _qp_ce( + payoff, + a_mats, + e_vecs, + assume_full_support=False, + action_repeats=None, + solver_kwargs=None, + min_epsilon=False): + """Returns the correlated equilibrium with maximum Gini impurity. + + Args: + payoff: A [NUM_PLAYER, NUM_ACT_0, NUM_ACT_1, ...] shape payoff tensor. + a_mats: A [NUM_CON, PROD(A)] shape gain tensor. + e_vecs: Epsilon vector. + assume_full_support: Whether to ignore beta values. + action_repeats: Vector of action repeats for each player. + solver_kwargs: Additional kwargs for solver. + min_epsilon: Whether to minimize epsilon. + + Returns: + An epsilon-correlated equilibrium. + """ + num_players = payoff.shape[0] + num_actions = payoff.shape[1:] + num_dists = int(np.prod(num_actions)) + + if solver_kwargs is None: + solver_kwargs = DEFAULT_OSQP_SOLVER_KWARGS + + epsilon = None + nonzero_cons = [a_mat.shape[0] > 0 for a_mat in a_mats if a_mat is not None] + if any(nonzero_cons): + x = cp.Variable(num_dists, nonneg=(not assume_full_support)) + if min_epsilon: + epsilon = cp.Variable(nonpos=True) + e_vecs = [epsilon] * num_players + + if action_repeats is not None: + repeat_factor, indiv_repeat_factors = _get_repeat_factor( + action_repeats) + x_repeated = cp.multiply(x, repeat_factor) + dist_eq_con = cp.sum(x_repeated) == 1 + cor_lb_cons = [ + cp.matmul(a_mat, cp.multiply(x, rf)) <= e_vec for a_mat, e_vec, rf in + zip(a_mats, e_vecs, indiv_repeat_factors) if a_mat.size > 0] + eye = sp.sparse.diags(repeat_factor) + else: + repeat_factor = 1 + x_repeated = x + dist_eq_con = cp.sum(x_repeated) == 1 + cor_lb_cons = [ + cp.matmul(a_mat, x) <= e_vec for a_mat, e_vec in + zip(a_mats, e_vecs) if a_mat.size > 0] + eye = sp.sparse.eye(num_dists) + + # This is more memory efficient than using cp.sum_squares. + cost = 1 - cp.quad_form(x, eye) + if min_epsilon: + cost -= cp.multiply(2, epsilon) + + obj = cp.Maximize(cost) + prob = cp.Problem(obj, [dist_eq_con] + cor_lb_cons) + cost_value = prob.solve(**solver_kwargs) + status = prob.status + alphas = [cor_lb_con.dual_value for cor_lb_con in cor_lb_cons] + lamb = dist_eq_con.dual_value + + val = cost.value + x = x_repeated.value + dist = np.reshape(x, num_actions) + else: + cost_value = 0.0 + val = 1 - 1 / num_dists + if action_repeats is not None: + repeat_factor, indiv_repeat_factors = _get_repeat_factor( + action_repeats) + x = repeat_factor / np.sum(repeat_factor) + else: + x = np.ones([num_dists]) / num_dists + dist = np.reshape(x, num_actions) + status = None + alphas = [np.zeros([])] + lamb = None + + meta = dict( + x=x, + a_mats=a_mats, + status=status, + cost=cost_value, + val=val, + alphas=alphas, + lamb=lamb, + unique=True, + min_epsilon=None if epsilon is None else epsilon.value, + ) + return dist, meta + + +def _expand_meta_game(meta_game, per_player_repeats): + num_players = meta_game.shape[0] + for player in range(num_players): + meta_game = np.repeat(meta_game, per_player_repeats[player], axis=player+1) + return meta_game + + +def _unexpand_meta_dist(meta_dist, per_player_repeats): + num_players = len(meta_dist.shape) + for player in range(num_players): + meta_dist = np.add.reduceat( + meta_dist, [0] + np.cumsum(per_player_repeats[player]).tolist()[:-1], + axis=player) + return meta_dist + + +# Meta-solvers - Baselines. +def _uni(meta_game, per_player_repeats, ignore_repeats=False): + """Uniform.""" + if ignore_repeats: + num_policies = meta_game.shape[1:] + num_dists = np.prod(num_policies) + meta_dist = np.full(num_policies, 1./num_dists) + else: + outs = [ppr / np.sum(ppr) for ppr in per_player_repeats] + labels = string.ascii_lowercase[:len(outs)] + comma_labels = ",".join(labels) + meta_dist = np.einsum("{}->{}".format(comma_labels, labels), *outs) + return meta_dist, dict() + + +@_eliminate_dominated_decorator +def _undominated_uni(meta_game, per_player_repeats, ignore_repeats=False): + """Undominated uniform.""" + return _uni(meta_game, per_player_repeats, ignore_repeats=ignore_repeats) + + +def _rj(meta_game, per_player_repeats, ignore_repeats=False): + """Random joint.""" + ignore_repeats = True + pvals, _ = _uni( + meta_game, per_player_repeats, ignore_repeats=ignore_repeats) + meta_dist = np.reshape( + np.random.multinomial(1, pvals.flat), pvals.shape).astype(np.float64) + return meta_dist, dict() + + +@_eliminate_dominated_decorator +def _undominated_rj(meta_game, per_player_repeats, ignore_repeats=False): + """Undominated random joint.""" + return _rj(meta_game, per_player_repeats, ignore_repeats=ignore_repeats) + + +def _rd(meta_game, per_player_repeats, ignore_repeats=False): + """Random dirichlet.""" + ignore_repeats = True + if ignore_repeats: + num_policies = meta_game.shape[1:] + alpha = np.ones(num_policies) + else: + outs = [ppr for ppr in per_player_repeats] + labels = string.ascii_lowercase[:len(outs)] + comma_labels = ",".join(labels) + alpha = np.einsum("{}->{}".format(comma_labels, labels), *outs) + meta_dist = np.reshape( + np.random.dirichlet(alpha.flat), alpha.shape).astype(np.float64) + return meta_dist, dict() + + +@_eliminate_dominated_decorator +def _undominated_rd(meta_game, per_player_repeats, ignore_repeats=False): + """Undominated random dirichlet.""" + return _rd(meta_game, per_player_repeats, ignore_repeats=ignore_repeats) + + +def _prd(meta_game, per_player_repeats, ignore_repeats=False): + """Projected replicator dynamics.""" + if not ignore_repeats: + meta_game = _expand_meta_game(meta_game, per_player_repeats) + meta_dist = projected_replicator_dynamics.projected_replicator_dynamics( + meta_game) + labels = string.ascii_lowercase[:len(meta_dist)] + comma_labels = ",".join(labels) + meta_dist = np.einsum("{}->{}".format(comma_labels, labels), *meta_dist) + meta_dist[meta_dist < DIST_TOL] = 0.0 + meta_dist /= np.sum(meta_dist) + meta_dist = _unexpand_meta_dist(meta_dist, per_player_repeats) + return meta_dist, dict() + + +@_eliminate_dominated_decorator +def _alpharank(meta_game, per_player_repeats, ignore_repeats=False): + """AlphaRank.""" + if not ignore_repeats: + meta_game = _expand_meta_game(meta_game, per_player_repeats) + meta_dist = alpharank_lib.sweep_pi_vs_epsilon([mg for mg in meta_game]) + meta_dist[meta_dist < DIST_TOL] = 0.0 + meta_dist /= np.sum(meta_dist) + meta_dist = np.reshape(meta_dist, meta_game.shape[1:]) + if not ignore_repeats: + meta_dist = _unexpand_meta_dist(meta_dist, per_player_repeats) + return meta_dist, dict() + + +# Meta-solvers - CEs. +@_eliminate_dominated_decorator +def _mgce(meta_game, per_player_repeats, ignore_repeats=False): + """Maximum Gini CE.""" + a_mat, e_vec, meta = _ace_constraints( + meta_game, [0.0] * len(per_player_repeats), remove_null=True, + zero_tolerance=1e-8) + a_mats = _partition_by_player( + a_mat, meta["p_vec"], len(per_player_repeats)) + e_vecs = _partition_by_player( + e_vec, meta["p_vec"], len(per_player_repeats)) + dist, _ = _try_two_solvers( + _qp_ce, + meta_game, a_mats, e_vecs, + action_repeats=(None if ignore_repeats else per_player_repeats)) + return dist, dict() + + +@_eliminate_dominated_decorator +def _min_epsilon_mgce(meta_game, per_player_repeats, ignore_repeats=False): + """Min Epsilon Maximum Gini CE.""" + a_mat, e_vec, meta = _ace_constraints( + meta_game, [0.0] * len(per_player_repeats), remove_null=True, + zero_tolerance=1e-8) + a_mats = _partition_by_player( + a_mat, meta["p_vec"], len(per_player_repeats)) + e_vecs = _partition_by_player( + e_vec, meta["p_vec"], len(per_player_repeats)) + dist, _ = _try_two_solvers( + _qp_ce, + meta_game, a_mats, e_vecs, + action_repeats=(None if ignore_repeats else per_player_repeats), + min_epsilon=True) + return dist, dict() + + +@_eliminate_dominated_decorator +def _approx_mgce(meta_game, per_player_repeats, ignore_repeats=False, + epsilon=0.01): + """Approximate Maximum Gini CE.""" + a_mat, e_vec, meta = _ace_constraints( + meta_game, [0.0] * len(per_player_repeats), remove_null=True, + zero_tolerance=1e-8) + max_ab = 0.0 + if a_mat.size: + max_ab = np.max(a_mat.mean(axis=1)) + a_mat, e_vec, meta = _ace_constraints( + meta_game, [epsilon * max_ab] * len(per_player_repeats), remove_null=True, + zero_tolerance=1e-8) + a_mats = _partition_by_player( + a_mat, meta["p_vec"], len(per_player_repeats)) + e_vecs = _partition_by_player( + e_vec, meta["p_vec"], len(per_player_repeats)) + dist, _ = _try_two_solvers( + _qp_ce, + meta_game, a_mats, e_vecs, + action_repeats=(None if ignore_repeats else per_player_repeats)) + return dist, dict() + + +@_eliminate_dominated_decorator +def _rmwce(meta_game, per_player_repeats, ignore_repeats=False): + """Random maximum welfare CE.""" + del ignore_repeats + num_players = len(per_player_repeats) + cost = np.ravel(np.sum(meta_game, axis=0)) + cost += np.ravel(np.random.normal(size=meta_game.shape[1:])) * 1e-6 + a_mat, e_vec, _ = _ace_constraints( + meta_game, [0.0] * num_players, remove_null=True, + zero_tolerance=1e-8) + x, _ = _linear(meta_game, a_mat, e_vec, cost=cost) + dist = np.reshape(x, meta_game.shape[1:]) + return dist, dict() + + +@_eliminate_dominated_decorator +def _mwce(meta_game, per_player_repeats, ignore_repeats=False): + """Maximum welfare CE.""" + del ignore_repeats + num_players = len(per_player_repeats) + cost = np.ravel(np.sum(meta_game, axis=0)) + a_mat, e_vec, _ = _ace_constraints( + meta_game, [0.0] * num_players, remove_null=True, + zero_tolerance=1e-8) + x, _ = _linear(meta_game, a_mat, e_vec, cost=cost) + dist = np.reshape(x, meta_game.shape[1:]) + return dist, dict() + + +@_eliminate_dominated_decorator +def _rvce(meta_game, per_player_repeats, ignore_repeats=False): + """Random vertex CE.""" + del ignore_repeats + num_players = len(per_player_repeats) + cost = np.ravel(np.random.normal(size=meta_game.shape[1:])) + a_mat, e_vec, _ = _ace_constraints( + meta_game, [0.0] * num_players, remove_null=True, + zero_tolerance=1e-8) + x, _ = _linear(meta_game, a_mat, e_vec, cost=cost) + dist = np.reshape(x, meta_game.shape[1:]) + return dist, dict() + + +# Meta-solvers - CCEs. +def _mgcce(meta_game, per_player_repeats, ignore_repeats=False): + """Maximum Gini CCE.""" + a_mat, meta = _cce_constraints( + meta_game, [0.0] * len(per_player_repeats), remove_null=True, + zero_tolerance=1e-8) + a_mats = _partition_by_player( + a_mat, meta["p_vec"], len(per_player_repeats)) + dist, _ = _try_two_solvers( + _qp_cce, + meta_game, a_mats, [0.0] * len(per_player_repeats), + action_repeats=(None if ignore_repeats else per_player_repeats)) + return dist, dict() + + +def _min_epsilon_mgcce(meta_game, per_player_repeats, ignore_repeats=False): + """Min Epsilon Maximum Gini CCE.""" + a_mat, meta = _cce_constraints( + meta_game, [0.0] * len(per_player_repeats), remove_null=True, + zero_tolerance=1e-8) + a_mats = _partition_by_player( + a_mat, meta["p_vec"], len(per_player_repeats)) + dist, _ = _try_two_solvers( + _qp_cce, + meta_game, a_mats, [0.0] * len(per_player_repeats), + action_repeats=(None if ignore_repeats else per_player_repeats), + min_epsilon=True) + return dist, dict() + + +def _approx_mgcce(meta_game, per_player_repeats, ignore_repeats=False, + epsilon=0.01): + """Maximum Gini CCE.""" + a_mat, meta = _cce_constraints( + meta_game, [0.0] * len(per_player_repeats), remove_null=True, + zero_tolerance=1e-8) + max_ab = 0.0 + if a_mat.size: + max_ab = np.max(a_mat.mean(axis=1)) + a_mat, meta = _cce_constraints( + meta_game, [epsilon * max_ab] * len(per_player_repeats), remove_null=True, + zero_tolerance=1e-8) + a_mats = _partition_by_player( + a_mat, meta["p_vec"], len(per_player_repeats)) + dist, _ = _try_two_solvers( + _qp_cce, + meta_game, a_mats, [0.0] * len(per_player_repeats), + action_repeats=(None if ignore_repeats else per_player_repeats)) + return dist, dict() + + +def _rmwcce(meta_game, per_player_repeats, ignore_repeats=False): + """Random maximum welfare CCE.""" + del ignore_repeats + num_players = len(per_player_repeats) + cost = np.ravel(np.sum(meta_game, axis=0)) + cost += np.ravel(np.random.normal(size=meta_game.shape[1:])) * 1e-6 + a_mat, _ = _cce_constraints( + meta_game, [0.0] * num_players, remove_null=True, + zero_tolerance=1e-8) + e_vec = np.zeros([a_mat.shape[0]]) + x, _ = _linear(meta_game, a_mat, e_vec, cost=cost) + dist = np.reshape(x, meta_game.shape[1:]) + return dist, dict() + + +def _mwcce(meta_game, per_player_repeats, ignore_repeats=False): + """Maximum welfare CCE.""" + del ignore_repeats + num_players = len(per_player_repeats) + cost = np.ravel(np.sum(meta_game, axis=0)) + a_mat, _ = _cce_constraints( + meta_game, [0.0] * num_players, remove_null=True, + zero_tolerance=1e-8) + e_vec = np.zeros([a_mat.shape[0]]) + x, _ = _linear(meta_game, a_mat, e_vec, cost=cost) + dist = np.reshape(x, meta_game.shape[1:]) + return dist, dict() + + +def _rvcce(meta_game, per_player_repeats, ignore_repeats=False): + """Random vertex CCE.""" + del ignore_repeats + num_players = len(per_player_repeats) + cost = np.ravel(np.random.normal(size=meta_game.shape[1:])) + a_mat, _ = _cce_constraints( + meta_game, [0.0] * num_players, remove_null=True, + zero_tolerance=1e-8) + e_vec = np.zeros([a_mat.shape[0]]) + x, _ = _linear(meta_game, a_mat, e_vec, cost=cost) + dist = np.reshape(x, meta_game.shape[1:]) + return dist, dict() + + +# Flags to functions. +FLAG_TO_FUNC = dict( + uni=_uni, + undominated_uni=_undominated_uni, + rj=_rj, + undominated_rj=_undominated_rj, + rd=_rd, + undominated_rd=_undominated_rd, + prd=_prd, + alpharank=_alpharank, + mgce=_mgce, + min_epsilon_mgce=_min_epsilon_mgce, + approx_mgce=_approx_mgce, + rmwce=_rmwce, + mwce=_mwce, + rvce=_rvce, + mgcce=_mgcce, + min_epsilon_mgcce=_min_epsilon_mgcce, + approx_mgcce=_approx_mgcce, + rmwcce=_rmwcce, + mwcce=_mwcce, + rvcce=_rvcce, +) + + +## PSRO Functions. + + +def initialize_policy(game, player, policy_init): + """Returns initial policy.""" + if policy_init == "uniform": + new_policy = policy.TabularPolicy(game, players=(player,)) + + elif policy_init == "random_deterministic": + new_policy = policy.TabularPolicy(game, players=(player,)) + for i in range(new_policy.action_probability_array.shape[0]): + new_policy.action_probability_array[i] = np.random.multinomial( + 1, new_policy.action_probability_array[i]).astype(np.float64) + + else: + raise ValueError( + "policy_init must be a valid initialization strategy: %s. " + "Received: %s" % (INIT_POLICIES, policy_init)) + + return new_policy + + +def add_new_policies( + per_player_new_policies, + per_player_gaps, + per_player_repeats, + per_player_policies, + joint_policies, + joint_returns, + game, + br_selection): + """Adds novel policies from new policies.""" + num_players = len(per_player_new_policies) + per_player_num_novel_policies = [0 for _ in range(num_players)] + + # Update policies and policy counts. + for player in range(num_players): + new_policies = per_player_new_policies[player] + new_gaps = per_player_gaps[player] + + repeat_policies = [] + repeat_gaps = [] + repeat_ids = [] + novel_policies = [] + novel_gaps = [] + for new_policy, new_gap in zip(new_policies, new_gaps): + for policy_id, policy_ in enumerate(per_player_policies[player]): + if np.all( # New policy is not novel. + new_policy.action_probability_array == + policy_.action_probability_array): # pytype: disable=attribute-error # py39-upgrade + logging.debug("Player %d's new policy is not novel.", player) + repeat_policies.append(new_policy) + repeat_gaps.append(new_gap) + repeat_ids.append(policy_id) + break + else: # New policy is novel. + logging.debug("Player %d's new policy is novel.", player) + novel_policies.append(new_policy) + novel_gaps.append(new_gap) + + add_novel_policies = [] + add_repeat_ids = [] + if (novel_policies or repeat_policies): + if br_selection == "all": + add_novel_policies.extend(novel_policies) + add_repeat_ids.extend(repeat_ids) + elif br_selection == "all_novel": + add_novel_policies.extend(novel_policies) + elif br_selection == "random": + index = np.random.randint(0, len(repeat_policies) + len(novel_policies)) + if index < len(novel_policies): + add_novel_policies.append(novel_policies[index]) + else: + add_repeat_ids.append(repeat_ids[index - len(novel_policies)]) + elif br_selection == "random_novel": + if novel_policies: + index = np.random.randint(0, len(novel_policies)) + add_novel_policies.append(novel_policies[index]) + else: # Fall back on random. + index = np.random.randint(0, len(repeat_policies)) + add_repeat_ids.append(repeat_ids[index]) + elif br_selection == "largest_gap": + if novel_policies: + index = np.argmax(novel_gaps) + if novel_gaps[index] == 0.0: # Fall back to random when zero. + index = np.random.randint(0, len(novel_policies)) + add_novel_policies.append(novel_policies[index]) + else: # Fall back on random. + index = np.random.randint(0, len(repeat_policies)) + add_repeat_ids.append(repeat_ids[index]) + else: + raise ValueError("Unrecognized br_selection method: %s" + % br_selection) + + for add_repeat_id in add_repeat_ids: + per_player_repeats[player][add_repeat_id] += 1 + + for add_novel_policy in add_novel_policies: + per_player_policies[player].append(add_novel_policy) # Add new policy. + per_player_repeats[player].append(1) # Add new count. + per_player_num_novel_policies[player] += 1 + + # Add new joint policies. + for pids in itertools.product(*[ + range(len(policies)) for policies in per_player_policies]): + if pids in joint_policies: + continue + logging.debug("Evaluating novel joint policy: %s.", pids) + policies = [ + policies[pid] for pid, policies in zip(pids, per_player_policies)] + policies = tuple(map(policy.python_policy_to_pyspiel_policy, policies)) + pyspiel_tabular_policy = pyspiel.to_joint_tabular_policy(policies, True) + joint_policies[pids] = pyspiel_tabular_policy + joint_returns[pids] = [ + 0.0 if abs(er) < RETURN_TOL else er + for er in pyspiel.expected_returns( + game.new_initial_state(), pyspiel_tabular_policy, -1, True)] + + return per_player_num_novel_policies + + +def add_meta_game( + meta_games, + per_player_policies, + joint_returns): + """Returns a meta-game tensor.""" + per_player_num_policies = [ + len(policies) for policies in per_player_policies] + shape = [len(per_player_num_policies)] + per_player_num_policies + meta_game = np.zeros(shape) + for pids in itertools.product(*[ + range(np_) for np_ in per_player_num_policies]): + meta_game[(slice(None),) + pids] = joint_returns[pids] + meta_games.append(meta_game) + return meta_games + + +def add_meta_dist( + meta_dists, meta_values, meta_solver, meta_game, per_player_repeats, + ignore_repeats): + """Returns meta_dist.""" + num_players = meta_game.shape[0] + meta_solver_func = FLAG_TO_FUNC[meta_solver] + meta_dist, _ = meta_solver_func( + meta_game, per_player_repeats, ignore_repeats=ignore_repeats) + # Clean dist. + meta_dist = meta_dist.astype(np.float64) + meta_dist[meta_dist < DIST_TOL] = 0.0 + meta_dist[meta_dist > 1.0] = 1.0 + meta_dist /= np.sum(meta_dist) + meta_dist[meta_dist > 1.0] = 1.0 + meta_dists.append(meta_dist) + meta_value = np.sum( + meta_dist * meta_game, axis=tuple(range(1, num_players + 1))) + meta_values.append(meta_value) + return meta_dist + + +def find_best_response( + game, + meta_dist, + meta_game, + iteration, + joint_policies, + target_equilibrium, + update_players_strategy, + action_value_tolerance, +): + """Returns new best response policies.""" + num_players = meta_game.shape[0] + per_player_num_policies = meta_dist.shape[:] + + # Player update strategy. + if update_players_strategy == "all": + players = list(range(num_players)) + elif update_players_strategy == "cycle": + players = [iteration % num_players] + elif update_players_strategy == "random": + players = [np.random.randint(0, num_players)] + else: + raise ValueError( + "update_players_strategy must be a valid player update strategy: " + "%s. Received: %s" % (UPDATE_PLAYERS_STRATEGY, update_players_strategy)) + + # Find best response. + per_player_new_policies = [] + per_player_deviation_incentives = [] + + if target_equilibrium == "cce": + for player in range(num_players): + if player in players: + joint_policy_ids = itertools.product(*[ + (np_-1,) if p_ == player else range(np_) for p_, np_ + in enumerate(per_player_num_policies)]) + joint_policies_slice = [ + joint_policies[jpid] for jpid in joint_policy_ids] + meta_dist_slice = np.sum(meta_dist, axis=player) + meta_dist_slice[meta_dist_slice < DIST_TOL] = 0.0 + meta_dist_slice[meta_dist_slice > 1.0] = 1.0 + meta_dist_slice /= np.sum(meta_dist_slice) + meta_dist_slice = meta_dist_slice.flat + + mu = [(p, mp) for mp, p in zip(joint_policies_slice, meta_dist_slice) + if p > 0] + info = pyspiel.cce_dist( + game, + mu, + player, + prob_cut_threshold=0.0, + action_value_tolerance=action_value_tolerance) + + new_policy = policy.pyspiel_policy_to_python_policy( + game, info.best_response_policies[0], players=(player,)) + on_policy_value = np.sum(meta_game[player] * meta_dist) + deviation_incentive = max( + info.best_response_values[0] - on_policy_value, 0) + if deviation_incentive < GAP_TOL: + deviation_incentive = 0.0 + + per_player_new_policies.append([new_policy]) + per_player_deviation_incentives.append([deviation_incentive]) + else: + per_player_new_policies.append([]) + per_player_deviation_incentives.append([]) + + elif target_equilibrium == "ce": + for player in range(num_players): + if player in players: + per_player_new_policies.append([]) + per_player_deviation_incentives.append([]) + + for pid in range(per_player_num_policies[player]): + joint_policy_ids = itertools.product(*[ + (pid,) if p_ == player else range(np_) for p_, np_ + in enumerate(per_player_num_policies)]) + joint_policies_slice = [ + joint_policies[jpid] for jpid in joint_policy_ids] + inds = tuple((pid,) if player == p_ else slice(None) + for p_ in range(num_players)) + meta_dist_slice = np.ravel(meta_dist[inds]).copy() + meta_dist_slice[meta_dist_slice < DIST_TOL] = 0.0 + meta_dist_slice[meta_dist_slice > 1.0] = 1.0 + meta_dist_slice_sum = np.sum(meta_dist_slice) + + if meta_dist_slice_sum > 0.0: + meta_dist_slice /= meta_dist_slice_sum + mu = [(p, mp) for mp, p in + zip(joint_policies_slice, meta_dist_slice) + if p > 0] + info = pyspiel.cce_dist( + game, + mu, + player, + prob_cut_threshold=0.0, + action_value_tolerance=action_value_tolerance) + + new_policy = policy.pyspiel_policy_to_python_policy( + game, info.best_response_policies[0], players=(player,)) + on_policy_value = np.sum( + np.ravel(meta_game[player][inds]) * meta_dist_slice) + deviation_incentive = max( + info.best_response_values[0] - on_policy_value, 0) + if deviation_incentive < GAP_TOL: + deviation_incentive = 0.0 + + per_player_new_policies[-1].append(new_policy) + per_player_deviation_incentives[-1].append( + meta_dist_slice_sum * deviation_incentive) + + else: + per_player_new_policies.append([]) + per_player_deviation_incentives.append([]) + + else: + raise ValueError( + "target_equilibrium must be a valid best response strategy: %s. " + "Received: %s" % (BRS, target_equilibrium)) + + return per_player_new_policies, per_player_deviation_incentives + + +## Main Loop. + + +def initialize(game, train_meta_solver, eval_meta_solver, policy_init, + ignore_repeats, br_selection): + """Return initialized data structures.""" + num_players = game.num_players() + + # Initialize. + iteration = 0 + per_player_repeats = [[] for _ in range(num_players)] + per_player_policies = [[] for _ in range(num_players)] + joint_policies = {} # Eg. (1, 0): Joint policy. + joint_returns = {} + meta_games = [] + train_meta_dists = [] + eval_meta_dists = [] + train_meta_values = [] + eval_meta_values = [] + train_meta_gaps = [] + eval_meta_gaps = [] + + # Initialize policies. + per_player_new_policies = [ + [initialize_policy(game, player, policy_init)] + for player in range(num_players)] + per_player_gaps_train = [[1.0] for _ in range(num_players)] + per_player_num_novel_policies = add_new_policies( + per_player_new_policies, per_player_gaps_train, per_player_repeats, + per_player_policies, joint_policies, joint_returns, game, br_selection) + del per_player_num_novel_policies + add_meta_game( + meta_games, + per_player_policies, + joint_returns) + add_meta_dist( + train_meta_dists, train_meta_values, train_meta_solver, + meta_games[-1], per_player_repeats, ignore_repeats) + add_meta_dist( + eval_meta_dists, eval_meta_values, eval_meta_solver, + meta_games[-1], per_player_repeats, ignore_repeats) + + return ( + iteration, + per_player_repeats, + per_player_policies, + joint_policies, + joint_returns, + meta_games, + train_meta_dists, + eval_meta_dists, + train_meta_values, + eval_meta_values, + train_meta_gaps, + eval_meta_gaps) + + +def initialize_callback_( + iteration, + per_player_repeats, + per_player_policies, + joint_policies, + joint_returns, + meta_games, + train_meta_dists, + eval_meta_dists, + train_meta_values, + eval_meta_values, + train_meta_gaps, + eval_meta_gaps, + game): + """Callback which allows initializing from checkpoint.""" + del game + checkpoint = None + return ( + iteration, + per_player_repeats, + per_player_policies, + joint_policies, + joint_returns, + meta_games, + train_meta_dists, + eval_meta_dists, + train_meta_values, + eval_meta_values, + train_meta_gaps, + eval_meta_gaps, + checkpoint) + + +def callback_( + iteration, + per_player_repeats, + per_player_policies, + joint_policies, + joint_returns, + meta_games, + train_meta_dists, + eval_meta_dists, + train_meta_values, + eval_meta_values, + train_meta_gaps, + eval_meta_gaps, + kwargs, + checkpoint): + """Callback for updating checkpoint.""" + del iteration, per_player_repeats, per_player_policies, joint_policies + del joint_returns, meta_games, train_meta_dists, eval_meta_dists + del train_meta_values, eval_meta_values, train_meta_gaps, eval_meta_gaps + del kwargs + return checkpoint + + +def run_loop(game, + game_name, + seed=0, + iterations=40, + policy_init="uniform", + update_players_strategy="all", + target_equilibrium="cce", + br_selection="largest_gap", + train_meta_solver="mgcce", + eval_meta_solver="mwcce", + ignore_repeats=False, + initialize_callback=None, + action_value_tolerance=-1.0, + callback=None): + """Runs JPSRO.""" + if initialize_callback is None: + initialize_callback = initialize_callback_ + if callback is None: + callback = callback_ + kwargs = dict( + game=game, + game_name=game_name, + seed=seed, + iterations=iterations, + policy_init=policy_init, + update_players_strategy=update_players_strategy, + target_equilibrium=target_equilibrium, + br_selection=br_selection, + train_meta_solver=train_meta_solver, + eval_meta_solver=eval_meta_solver, + ignore_repeats=ignore_repeats, + ) + + # Set seed. + np.random.seed(seed) + + # Some statistics. + num_players = game.num_players() # Look in the game. + + # Initialize. + values = initialize(game, train_meta_solver, eval_meta_solver, policy_init, + ignore_repeats, br_selection) + + # Initialize Callback. + (iteration, + per_player_repeats, + per_player_policies, + joint_policies, + joint_returns, + meta_games, + train_meta_dists, + eval_meta_dists, + train_meta_values, + eval_meta_values, + train_meta_gaps, + eval_meta_gaps, + checkpoint) = initialize_callback(*values, game) + + # Run JPSRO. + while iteration <= iterations: + logging.debug("Beginning JPSRO iteration %03d", iteration) + per_player_new_policies, per_player_gaps_train = find_best_response( + game, + train_meta_dists[-1], + meta_games[-1], + iteration, + joint_policies, + target_equilibrium, + update_players_strategy, + action_value_tolerance, + ) + train_meta_gaps.append([sum(gaps) for gaps in per_player_gaps_train]) + _, per_player_gaps_eval = find_best_response( + game, + eval_meta_dists[-1], + meta_games[-1], + iteration, + joint_policies, + target_equilibrium, + update_players_strategy, + action_value_tolerance, + ) + eval_meta_gaps.append([sum(gaps) for gaps in per_player_gaps_eval]) + per_player_num_novel_policies = add_new_policies( + per_player_new_policies, per_player_gaps_train, per_player_repeats, + per_player_policies, joint_policies, joint_returns, game, br_selection) + del per_player_num_novel_policies + add_meta_game( + meta_games, + per_player_policies, + joint_returns) + add_meta_dist( + train_meta_dists, train_meta_values, train_meta_solver, + meta_games[-1], per_player_repeats, ignore_repeats) + add_meta_dist( + eval_meta_dists, eval_meta_values, eval_meta_solver, + meta_games[-1], per_player_repeats, ignore_repeats) + + # Stats. + per_player_num_policies = train_meta_dists[-1].shape[:] + log_string = LOG_STRING.format( + iteration=iteration, + game=game_name, + player=("{: 12d}" * num_players).format(*list(range(num_players))), + brs="", + num_policies=("{: 12d}" * num_players).format(*[ + sum(ppr) for ppr in per_player_repeats]), + unique=("{: 12d}" * num_players).format(*per_player_num_policies), + train_meta_solver=train_meta_solver, + train_value=("{: 12g}" * num_players).format(*train_meta_values[-1]), + train_gap=("{: 12g}" * num_players).format(*train_meta_gaps[-1]), + eval_meta_solver=eval_meta_solver, + eval_value=("{: 12g}" * num_players).format(*eval_meta_values[-1]), + eval_gap=("{: 12g}" * num_players).format(*eval_meta_gaps[-1]), + ) + logging.info(log_string) + + # Increment. + iteration += 1 + + # Callback. + checkpoint = callback( + iteration, + per_player_repeats, + per_player_policies, + joint_policies, + joint_returns, + meta_games, + train_meta_dists, + eval_meta_dists, + train_meta_values, + eval_meta_values, + train_meta_gaps, + eval_meta_gaps, + kwargs, + checkpoint) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/jpsro_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/jpsro_test.py new file mode 100644 index 0000000..36814c4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/jpsro_test.py @@ -0,0 +1,99 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.jpsro.""" + +import itertools + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python.algorithms import jpsro +import pyspiel + +GAMES = ( + "sheriff_2p_gabriele", +) +SWEEP_KWARGS = [ + dict( # pylint: disable=g-complex-comprehension + game_name=game, + iterations=iterations, + policy_init=policy_init, + update_players_strategy=update_players_strategy, + target_equilibrium=target_equilibrium, + br_selection=br_selection, + train_meta_solver=train_meta_solver, + eval_meta_solver=eval_meta_solver, + ignore_repeats=ignore_repeats, + ) for ( + iterations, + game, + policy_init, + update_players_strategy, + target_equilibrium, + br_selection, + train_meta_solver, + eval_meta_solver, + ignore_repeats) in itertools.product( + [2], + GAMES, + jpsro.INIT_POLICIES, + jpsro.UPDATE_PLAYERS_STRATEGY, + jpsro.BRS, + jpsro.BR_SELECTIONS, + jpsro.META_SOLVERS, + ["mwcce"], + [True, False]) +] +TEST_COUNT_LIMIT = 100 + +interval = len(SWEEP_KWARGS) // TEST_COUNT_LIMIT +interval = interval if interval % 2 != 0 else interval + 1 # Odd interval. +SWEEP_KWARGS = SWEEP_KWARGS[::interval] + + +def get_game(game_name): + """Returns the game.""" + if game_name == "kuhn_poker_3p": + game_name = "kuhn_poker" + game_kwargs = {"players": int(3)} + elif game_name == "trade_comm_2p_2i": + game_name = "trade_comm" + game_kwargs = {"num_items": int(2)} + elif game_name == "sheriff_2p_gabriele": + game_name = "sheriff" + game_kwargs = { + "item_penalty": float(1.0), + "item_value": float(5.0), + "max_bribe": int(2), + "max_items": int(10), + "num_rounds": int(2), + "sheriff_penalty": float(1.0), + } + + else: + raise ValueError("Unrecognised game: %s" % game_name) + return pyspiel.load_game_as_turn_based(game_name, game_kwargs) + + +class JPSROTest(parameterized.TestCase, absltest.TestCase): + + @parameterized.parameters(*SWEEP_KWARGS) + def test_jpsro_cce(self, **kwargs): + game = get_game(kwargs["game_name"]) + jpsro.run_loop(game=game, **kwargs) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/lp_solver.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/lp_solver.py new file mode 100644 index 0000000..afec1b1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/lp_solver.py @@ -0,0 +1,541 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""LP Solver for two-player zero-sum games.""" + +import cvxopt +import numpy as np +from open_spiel.python.egt import utils +import pyspiel + +# Constants that determine the type of objective (max vs. min) and type of +# constraints (<=, >=, =). +OBJ_MAX = 1 +OBJ_MIN = 2 +CONS_TYPE_LEQ = 3 +CONS_TYPE_GEQ = 4 +CONS_TYPE_EQ = 5 + +# Constants that determine the type of dominance to find. +DOMINANCE_STRICT = 1 +DOMINANCE_VERY_WEAK = 2 +DOMINANCE_WEAK = 3 + + +class _Variable(object): + """A variable in an LP.""" + + def __init__(self, vid, lb=None, ub=None): + """Creates a variable in a linear program. + + Args: + vid: (integer) the variable id (should be unique for each variable) + lb: the lower bound on the variable's value (None means no lower bound) + ub: the upper bound on the variable's valie (None means no upper bound) + """ + self.vid = vid + self.lb = lb + self.ub = ub + + +class _Constraint(object): + """A constraint in an LP.""" + + def __init__(self, cid, ctype): + """Creates a constraint in a linear program. + + Args: + cid: (integer) the constraint id (should be unique for each constraint) + ctype: the constraint type (CONS_TYPE_{LEQ, GEQ, EQ}) + """ + self.cid = cid + self.ctype = ctype + self.coeffs = {} # var label -> value + self.rhs = None + + +class LinearProgram(object): + """A object used to provide a user-friendly API for building LPs.""" + + def __init__(self, objective): + assert objective == OBJ_MIN or objective == OBJ_MAX + self._valid_constraint_types = [CONS_TYPE_EQ, CONS_TYPE_LEQ, CONS_TYPE_GEQ] + self._objective = objective + self._obj_coeffs = {} # var label -> value + self._vars = {} # var label -> var + self._cons = {} # cons label -> constraint + self._var_list = [] + self._leq_cons_list = [] + self._eq_cons_list = [] + + def add_or_reuse_variable(self, label, lb=None, ub=None): + """Adds a variable to this LP, or reuses one if the label exists. + + If the variable already exists, simply checks that the upper and lower + bounds are the same as previously specified. + + Args: + label: a label to assign to this constraint + lb: a lower-bound value for this variable + ub: an upper-bound value for this variable + """ + var = self._vars.get(label) + if var is not None: + # Do not re-add, but ensure it's the same + assert var.lb == lb and var.ub == ub + return + var = _Variable(len(self._var_list), lb, ub) + self._vars[label] = var + self._var_list.append(var) + + def add_or_reuse_constraint(self, label, ctype): + """Adds a constraint to this LP, or reuses one if the label exists. + + If the constraint is already present, simply checks it's the same type as + previously specified. + + Args: + label: a label to assign to this constraint + ctype: the constraint type (in CONS_TYPE_{LEQ,GEQ,EQ}) + """ + assert ctype in self._valid_constraint_types + cons = self._cons.get(label) + if cons is not None: + # Do not re-add, but ensure it's the same type + assert cons.ctype == ctype + return + if ctype == CONS_TYPE_LEQ or ctype == CONS_TYPE_GEQ: + cons = _Constraint(len(self._leq_cons_list), ctype) + self._cons[label] = cons + self._leq_cons_list.append(cons) + elif ctype == CONS_TYPE_EQ: + cons = _Constraint(len(self._eq_cons_list), ctype) + self._cons[label] = cons + self._eq_cons_list.append(cons) + else: + assert False, "Unknown constraint type" + + def set_obj_coeff(self, var_label, coeff): + """Sets a coefficient of a variable in the objective.""" + self._obj_coeffs[var_label] = coeff + + def set_cons_coeff(self, cons_label, var_label, coeff): + """Sets a coefficient of a constraint in the LP.""" + self._cons[cons_label].coeffs[var_label] = coeff + + def add_to_cons_coeff(self, cons_label, var_label, add_coeff): + """Sets a coefficient of a constraint in the LP.""" + val = self._cons[cons_label].coeffs.get(var_label) + if val is None: + val = 0 + self._cons[cons_label].coeffs[var_label] = val + add_coeff + + def set_cons_rhs(self, cons_label, value): + """Sets the right-hand side of a constraint.""" + self._cons[cons_label].rhs = value + + def get_var_id(self, label): + var = self._vars.get(label) + assert var is not None + return var.vid + + def get_num_cons(self): + return len(self._leq_cons_list), len(self._eq_cons_list) + + def solve(self, solver=None): + """Solves the LP. + + Args: + solver: the solver to use ('blas', 'lapack', 'glpk'). Defaults to None, + which then uses the cvxopt internal default. + + Returns: + The solution as a dict of var label -> value, one for each variable. + """ + # From http://cvxopt.org/userguide/coneprog.html#linear-programming, + # CVXOPT uses the formulation: + # minimize: c^t x + # s.t. Gx <= h + # Ax = b + # + # Here: + # - x is the vector the variables + # - c is the vector of objective coefficients + # - G is the matrix of LEQ (and GEQ) constraint coefficients + # - h is the vector or right-hand side values of the LEQ/GEQ constraints + # - A is the matrix of equality constraint coefficients + # - b is the vector of right-hand side values of the equality constraints + # + # This function builds these sparse matrices from the information it has + # gathered, flipping signs where necessary, and adding equality constraints + # for the upper and lower bounds of variables. It then calls the cvxopt + # solver and maps back the values. + num_vars = len(self._var_list) + num_eq_cons = len(self._eq_cons_list) + num_leq_cons = len(self._leq_cons_list) + for var in self._var_list: + if var.lb is not None: + num_leq_cons += 1 + if var.ub is not None: + num_leq_cons += 1 + # Make the matrices (some need to be dense). + c = cvxopt.matrix([0.0] * num_vars) + h = cvxopt.matrix([0.0] * num_leq_cons) + g_mat = cvxopt.spmatrix([], [], [], (num_leq_cons, num_vars)) + a_mat = None + b = None + if num_eq_cons > 0: + a_mat = cvxopt.spmatrix([], [], [], (num_eq_cons, num_vars)) + b = cvxopt.matrix([0.0] * num_eq_cons) + # Objective coefficients: c + for var_label in self._obj_coeffs: + value = self._obj_coeffs[var_label] + vid = self._vars[var_label].vid + if self._objective == OBJ_MAX: + c[vid] = -value # negate the value because it's a max + else: + c[vid] = value # min objective matches cvxopt + # Inequality constraints: G, h + row = 0 + for cons in self._leq_cons_list: + # If it's >= then need to negate all coeffs and the rhs + if cons.rhs is not None: + h[row] = cons.rhs if cons.ctype == CONS_TYPE_LEQ else -cons.rhs + for var_label in cons.coeffs: + value = cons.coeffs[var_label] + vid = self._vars[var_label].vid + g_mat[(row, vid)] = value if cons.ctype == CONS_TYPE_LEQ else -value + row += 1 + # Inequality constraints: variables upper and lower bounds + for var in self._var_list: + if var.lb is not None: # x_i >= lb has to be -x_i <= -lb + g_mat[(row, var.vid)] = -1.0 + h[row] = -var.lb + row += 1 + if var.ub is not None: # x_i <= ub + g_mat[(row, var.vid)] = 1.0 + h[row] = var.ub + row += 1 + # Equality constraints: A, b + if num_eq_cons > 0: + row = 0 + for cons in self._eq_cons_list: + b[row] = cons.rhs if cons.rhs is not None else 0.0 + for var_label in cons.coeffs: + value = cons.coeffs[var_label] + vid = self._vars[var_label].vid + a_mat[(row, vid)] = value + row += 1 + # Solve! + if num_eq_cons > 0: + sol = cvxopt.solvers.lp(c, g_mat, h, a_mat, b, solver=solver) + else: + sol = cvxopt.solvers.lp(c, g_mat, h, solver=solver) + return sol["x"] + + +def solve_zero_sum_matrix_game(game): + """Solves a matrix game by using linear programming. + + Args: + game: a pyspiel MatrixGame + + Returns: + A 4-tuple containing: + - p0_sol (array-like): probability distribution over row actions + - p1_sol (array-like): probability distribution over column actions + - p0_sol_value, expected value to the first player + - p1_sol_value, expected value to the second player + """ + + # Solving a game for player i (e.g. row player) requires finding a mixed + # policy over player i's pure strategies (actions) such that a value of the + # mixed strategy against every opponent pure strategy is maximized. + # + # For more detail, please refer to Sec 4.1 of Shoham & Leyton-Brown, 2009: + # Multiagent Systems: Algorithmic, Game-Theoretic, and Logical Foundations + # http://www.masfoundations.org/mas.pdf + # + # For the row player the LP looks like: + # max V + # st. sigma_a1 \dot col_0 >= V + # sigma_a2 \dot col_1 >= V + # . + # . + # sigma_am \cot col_n >= V + # for all i, sigma_ai >= 0 + # sigma \dot 1 = 1 + assert isinstance(game, pyspiel.MatrixGame) + assert game.get_type().information == pyspiel.GameType.Information.ONE_SHOT + assert game.get_type().utility == pyspiel.GameType.Utility.ZERO_SUM + num_rows = game.num_rows() + num_cols = game.num_cols() + cvxopt.solvers.options["show_progress"] = False + + # First, do the row player (player 0). + lp0 = LinearProgram(OBJ_MAX) + for r in range(num_rows): # one var per action / pure strategy + lp0.add_or_reuse_variable(r, lb=0) + lp0.add_or_reuse_variable(num_rows) # V + lp0.set_obj_coeff(num_rows, 1.0) # max V + for c in range(num_cols): + lp0.add_or_reuse_constraint(c, CONS_TYPE_GEQ) + for r in range(num_rows): + lp0.set_cons_coeff(c, r, game.player_utility(0, r, c)) + lp0.set_cons_coeff(c, num_rows, -1.0) # -V >= 0 + lp0.add_or_reuse_constraint(num_cols + 1, CONS_TYPE_EQ) + lp0.set_cons_rhs(num_cols + 1, 1.0) + for r in range(num_rows): + lp0.set_cons_coeff(num_cols + 1, r, 1.0) + sol = lp0.solve() + p0_sol = sol[:-1] + p0_sol_val = sol[-1] + + # Now, the column player (player 1). + lp1 = LinearProgram(OBJ_MAX) + for c in range(num_cols): # one var per action / pure strategy + lp1.add_or_reuse_variable(c, lb=0) + lp1.add_or_reuse_variable(num_cols) # V + lp1.set_obj_coeff(num_cols, 1) # max V + for r in range(num_rows): + lp1.add_or_reuse_constraint(r, CONS_TYPE_GEQ) + for c in range(num_cols): + lp1.set_cons_coeff(r, c, game.player_utility(1, r, c)) + lp1.set_cons_coeff(r, num_cols, -1.0) # -V >= 0 + lp1.add_or_reuse_constraint(num_rows + 1, CONS_TYPE_EQ) + lp1.set_cons_rhs(num_rows + 1, 1.0) + for c in range(num_cols): + lp1.set_cons_coeff(num_rows + 1, c, 1.0) + sol = lp1.solve() + p1_sol = sol[:-1] + p1_sol_val = sol[-1] + + return p0_sol, p1_sol, p0_sol_val, p1_sol_val + + +def is_dominated(action, + game_or_payoffs, + player, + mode=DOMINANCE_STRICT, + tol=1e-7, + return_mixture=False): + """Determines whether a pure strategy is dominated by any mixture strategies. + + Args: + action: index of an action for `player` + game_or_payoffs: either a pyspiel matrix- or normal-form game, or a payoff + tensor for `player` with ndim == number of players + player: index of the player (an integer) + mode: dominance criterion: strict, weak, or very weak + tol: tolerance + return_mixture: whether to return the dominating strategy if one exists + + Returns: + If `return_mixture`: + a dominating mixture strategy if one exists, or `None`. + the strategy is provided as a 1D numpy array of mixture weights. + Otherwise: True if a dominating strategy exists, False otherwise. + """ + # For more detail, please refer to Sec 4.5.2 of Shoham & Leyton-Brown, 2009: + # Multiagent Systems: Algorithmic, Game-Theoretic, and Logical Foundations + # http://www.masfoundations.org/mas.pdf + assert mode in (DOMINANCE_STRICT, DOMINANCE_VERY_WEAK, DOMINANCE_WEAK) + payoffs = ( + utils.game_payoffs_array(game_or_payoffs)[player] + if isinstance(game_or_payoffs, pyspiel.NormalFormGame) + else np.asarray(game_or_payoffs, dtype=np.float64) + ) + + # Reshape payoffs so rows correspond to `player` and cols to the joint action + # of all other players + payoffs = np.moveaxis(payoffs, player, 0) + payoffs = payoffs.reshape((payoffs.shape[0], -1)) + num_rows, num_cols = payoffs.shape + + cvxopt.solvers.options["show_progress"] = False + cvxopt.solvers.options["maxtol"] = tol + cvxopt.solvers.options["feastol"] = tol + lp = LinearProgram(OBJ_MAX) + + # One var for every row probability, fixed to 0 if inactive + for r in range(num_rows): + if r == action: + lp.add_or_reuse_variable(r, lb=0, ub=0) + else: + lp.add_or_reuse_variable(r, lb=0) + + # For the strict LP we normalize the payoffs to be strictly positive + if mode == DOMINANCE_STRICT: + to_subtract = payoffs.min() - 1 + else: + to_subtract = 0 + # For non-strict LPs the probabilities must sum to 1 + lp.add_or_reuse_constraint(num_cols, CONS_TYPE_EQ) + lp.set_cons_rhs(num_cols, 1) + for r in range(num_rows): + if r != action: + lp.set_cons_coeff(num_cols, r, 1) + + # The main dominance constraint + for c in range(num_cols): + lp.add_or_reuse_constraint(c, CONS_TYPE_GEQ) + lp.set_cons_rhs(c, payoffs[action, c] - to_subtract) + for r in range(num_rows): + if r != action: + lp.set_cons_coeff(c, r, payoffs[r, c] - to_subtract) + + if mode == DOMINANCE_STRICT: + # Minimize sum of probabilities + for r in range(num_rows): + if r != action: + lp.set_obj_coeff(r, -1) + mixture = lp.solve() + if mixture is not None and np.sum(mixture) < 1 - tol: + mixture = np.squeeze(mixture, 1) / np.sum(mixture) + else: + mixture = None + + if mode == DOMINANCE_VERY_WEAK: + # Check feasibility + mixture = lp.solve() + if mixture is not None: + mixture = np.squeeze(mixture, 1) + + if mode == DOMINANCE_WEAK: + # Check feasibility and whether there's any advantage + for r in range(num_rows): + lp.set_obj_coeff(r, payoffs[r].sum()) + mixture = lp.solve() + if mixture is not None: + mixture = np.squeeze(mixture, 1) + if (np.dot(mixture, payoffs) - payoffs[action]).sum() <= tol: + mixture = None + + return mixture if return_mixture else (mixture is not None) + + +def _pure_dominated_from_advantages(advantages, mode, tol=1e-7): + if mode == DOMINANCE_STRICT: + return (advantages > tol).all(1) + if mode == DOMINANCE_WEAK: + return (advantages >= -tol).all(1) & (advantages.sum(1) > tol) + if mode == DOMINANCE_VERY_WEAK: + return (advantages >= -tol).all(1) + + +def iterated_dominance(game_or_payoffs, mode, tol=1e-7): + """Reduces a strategy space using iterated dominance. + + See: http://www.smallparty.com/yoram/classes/principles/nash.pdf + + Args: + game_or_payoffs: either a pyspiel matrix- or normal-form game, or a payoff + tensor of dimension `num_players` + 1. First dimension is the player, + followed by the actions of all players, e.g. a 3x3 game (2 players) has + dimension [2,3,3]. + mode: DOMINANCE_STRICT, DOMINANCE_WEAK, or DOMINANCE_VERY_WEAK + tol: tolerance + + Returns: + A tuple (`reduced_game`, `live_actions`). + * if `game_or_payoffs` is an instance of `pyspiel.MatrixGame`, so is + `reduced_game`; otherwise `reduced_game` is a payoff tensor. + * `live_actions` is a tuple of length `num_players`, where + `live_actions[player]` is a boolean vector of shape `num_actions`; + `live_actions[player][action]` is `True` if `action` wasn't dominated for + `player`. + """ + payoffs = ( + utils.game_payoffs_array(game_or_payoffs) + if isinstance(game_or_payoffs, pyspiel.NormalFormGame) + else np.asarray(game_or_payoffs, dtype=np.float64) + ) + live_actions = [ + np.ones(num_actions, bool) for num_actions in payoffs.shape[1:] + ] + progress = True + while progress: + progress = False + # trying faster method first + for method in ("pure", "mixed"): + if progress: + continue + for player, live in enumerate(live_actions): + if live.sum() == 1: + # one action is dominant + continue + + # discarding all dominated opponent actions + payoffs_live = payoffs[player] + for opponent in range(payoffs.shape[0]): + if opponent != player: + payoffs_live = payoffs_live.compress(live_actions[opponent], + opponent) + + # reshaping to (player_actions, joint_opponent_actions) + payoffs_live = np.moveaxis(payoffs_live, player, 0) + payoffs_live = payoffs_live.reshape((payoffs_live.shape[0], -1)) + + for action in range(live.size): + if not live[action]: + continue + if method == "pure": + # mark all actions that `action` dominates + advantage = payoffs_live[action] - payoffs_live + dominated = _pure_dominated_from_advantages(advantage, mode, tol) + dominated[action] = False + dominated &= live + if dominated.any(): + progress = True + live &= ~dominated + if live.sum() == 1: + break + if method == "mixed": + # test if `action` is dominated by a mixed policy + mixture = is_dominated( + live[:action].sum(), + payoffs_live[live], + 0, + mode, + tol, + return_mixture=True) + if mixture is None: + continue + # if it is, mark any other actions dominated by that policy + progress = True + advantage = mixture.dot(payoffs_live[live]) - payoffs_live[live] + dominated = _pure_dominated_from_advantages(advantage, mode, tol) + dominated[mixture > tol] = False + assert dominated[live[:action].sum()] + live.put(live.nonzero()[0], ~dominated) + if live.sum() == 1: + break + + for player, live in enumerate(live_actions): + payoffs = payoffs.compress(live, player + 1) + + if isinstance(game_or_payoffs, pyspiel.MatrixGame): + return pyspiel.MatrixGame(game_or_payoffs.get_type(), + game_or_payoffs.get_parameters(), [ + game_or_payoffs.row_action_name(action) + for action in live_actions[0].nonzero()[0] + ], [ + game_or_payoffs.col_action_name(action) + for action in live_actions[1].nonzero()[0] + ], *payoffs), live_actions + else: + return payoffs, live_actions + + +# TODO(author5): add a function for sequential games using sequence-form LPs. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/lp_solver_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/lp_solver_test.py new file mode 100644 index 0000000..7498412 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/lp_solver_test.py @@ -0,0 +1,238 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for LP solvers.""" + +from absl.testing import absltest +import numpy as np + +from open_spiel.python.algorithms import lp_solver +import pyspiel + + +class LPSolversTest(absltest.TestCase): + + def test_rock_paper_scissors(self): + p0_sol, p1_sol, p0_sol_val, p1_sol_val = ( + lp_solver.solve_zero_sum_matrix_game( + pyspiel.create_matrix_game( + [[0.0, -1.0, 1.0], [1.0, 0.0, -1.0], [-1.0, 1.0, 0.0]], + [[0.0, 1.0, -1.0], [-1.0, 0.0, 1.0], [1.0, -1.0, 0.0]]))) + self.assertLen(p0_sol, 3) + self.assertLen(p1_sol, 3) + for i in range(3): + self.assertAlmostEqual(p0_sol[i], 1.0 / 3.0) + self.assertAlmostEqual(p1_sol[i], 1.0 / 3.0) + self.assertAlmostEqual(p0_sol_val, 0.0) + self.assertAlmostEqual(p1_sol_val, 0.0) + + def test_biased_rock_paper_scissors(self): + # See sec 6.2 of Bosansky et al. 2016. Algorithms for Computing Strategies + # in Two-Player Simultaneous Move Games + # http://mlanctot.info/files/papers/aij-2psimmove.pdf + p0_sol, p1_sol, p0_sol_val, p1_sol_val = ( + lp_solver.solve_zero_sum_matrix_game( + pyspiel.create_matrix_game( + [[0.0, -0.25, 0.5], [0.25, 0.0, -0.05], [-0.5, 0.05, 0.0]], + [[0.0, 0.25, -0.5], [-0.25, 0.0, 0.05], [0.5, -0.05, 0.0]]))) + self.assertLen(p0_sol, 3) + self.assertLen(p1_sol, 3) + self.assertAlmostEqual(p0_sol[0], 1.0 / 16.0, places=4) + self.assertAlmostEqual(p1_sol[0], 1.0 / 16.0, places=4) + self.assertAlmostEqual(p0_sol[1], 10.0 / 16.0, places=4) + self.assertAlmostEqual(p1_sol[1], 10.0 / 16.0, places=4) + self.assertAlmostEqual(p0_sol[2], 5.0 / 16.0, places=4) + self.assertAlmostEqual(p1_sol[2], 5.0 / 16.0, places=4) + self.assertAlmostEqual(p0_sol_val, 0.0) + self.assertAlmostEqual(p1_sol_val, 0.0) + + def test_asymmetric_pure_nonzero_val(self): + # c0 c1 c2 + # r0 | 2, -2 | 1, -1 | 5, -5 + # r1 |-3, 3 | -4, 4 | -2, 2 + # + # Pure eq (r0,c1) for a value of (1, -1) + # 2nd row is dominated, and then second player chooses 2nd col. + p0_sol, p1_sol, p0_sol_val, p1_sol_val = ( + lp_solver.solve_zero_sum_matrix_game( + pyspiel.create_matrix_game([[2.0, 1.0, 5.0], [-3.0, -4.0, -2.0]], + [[-2.0, -1.0, -5.0], [3.0, 4.0, 2.0]]))) + self.assertLen(p0_sol, 2) + self.assertLen(p1_sol, 3) + self.assertAlmostEqual(p0_sol[0], 1.0) + self.assertAlmostEqual(p0_sol[1], 0.0) + self.assertAlmostEqual(p1_sol[0], 0.0) + self.assertAlmostEqual(p1_sol[1], 1.0) + self.assertAlmostEqual(p0_sol_val, 1.0) + self.assertAlmostEqual(p1_sol_val, -1.0) + + def test_solve_blotto(self): + blotto_matrix_game = pyspiel.load_matrix_game("blotto") + p0_sol, p1_sol, p0_sol_val, p1_sol_val = ( + lp_solver.solve_zero_sum_matrix_game(blotto_matrix_game)) + self.assertLen(p0_sol, blotto_matrix_game.num_rows()) + self.assertLen(p1_sol, blotto_matrix_game.num_cols()) + # Symmetric game, must be zero + self.assertAlmostEqual(p0_sol_val, 0.0) + self.assertAlmostEqual(p1_sol_val, 0.0) + + def _assert_dominated(self, *args, **kwargs): + self.assertTrue(lp_solver.is_dominated(*args, **kwargs)) + + def _assert_undominated(self, *args, **kwargs): + self.assertFalse(lp_solver.is_dominated(*args, **kwargs)) + + def test_dominance(self): + self._assert_undominated(0, [[1., 1.], [2., 0.], [0., 2.]], 0, + lp_solver.DOMINANCE_STRICT) + self._assert_undominated(0, [[1., 1.], [2., 0.], [0., 2.]], 0, + lp_solver.DOMINANCE_WEAK) + self._assert_dominated(0, [[1., 1.], [2.1, 0.], [0., 2.]], 0, + lp_solver.DOMINANCE_STRICT) + + self._assert_undominated(0, [[1., 1., 1.], [2., 0., 1.], [0., 2., 2.]], 0, + lp_solver.DOMINANCE_STRICT) + self._assert_dominated(0, [[1., 1., 1.], [2., 0., 1.], [0., 2., 2.]], 0, + lp_solver.DOMINANCE_WEAK) + self._assert_dominated(0, [[1., 1., 1.], [2., 0., 1.], [0., 2., 2.]], 0, + lp_solver.DOMINANCE_VERY_WEAK) + + self._assert_dominated(0, [[1., 1., 1.], [2.1, 0., 1.], [0., 2., 2.]], 0, + lp_solver.DOMINANCE_STRICT) + self._assert_dominated(0, [[1., 1., 1.], [2.1, 0., 1.], [0., 2., 2.]], 0, + lp_solver.DOMINANCE_WEAK) + self._assert_dominated(0, [[1., 1., 1.], [2.1, 0., 1.], [0., 2., 2.]], 0, + lp_solver.DOMINANCE_VERY_WEAK) + + self._assert_undominated(0, [[1., 1., 1.], [2., 0., 2.], [0., 2., 0.]], 0, + lp_solver.DOMINANCE_STRICT) + self._assert_undominated(0, [[1., 1., 1.], [2., 0., 2.], [0., 2., 0.]], 0, + lp_solver.DOMINANCE_WEAK) + self._assert_dominated(0, [[1., 1., 1.], [2., 0., 2.], [0., 2., 0.]], 0, + lp_solver.DOMINANCE_VERY_WEAK) + + self._assert_undominated(0, [[1., 1.1, 1.], [2., 0., 2.], [0., 2., 0.]], 0, + lp_solver.DOMINANCE_STRICT) + self._assert_undominated(0, [[1., 1.1, 1.], [2., 0., 2.], [0., 2., 0.]], 0, + lp_solver.DOMINANCE_WEAK) + self._assert_undominated(0, [[1., 1.1, 1.], [2., 0., 2.], [0., 2., 0.]], 0, + lp_solver.DOMINANCE_VERY_WEAK) + + def test_dominance_3player(self): + self._assert_undominated(0, + [[[1., 1., 1.], [2., 0., 1.], [0., 2., 2.]]] * 3, + 1, lp_solver.DOMINANCE_STRICT) + self._assert_dominated(0, [[[1., 1., 1.], [2., 0., 1.], [0., 2., 2.]]] * 3, + 1, lp_solver.DOMINANCE_WEAK) + self._assert_dominated(0, [[[1., 1., 1.], [2., 0., 1.], [0., 2., 2.]]] * 3, + 1, lp_solver.DOMINANCE_VERY_WEAK) + + def test_dominance_prisoners_dilemma(self): + self._assert_dominated(0, pyspiel.load_matrix_game("matrix_pd"), 1, + lp_solver.DOMINANCE_STRICT) + self._assert_undominated(1, pyspiel.load_matrix_game("matrix_pd"), 1, + lp_solver.DOMINANCE_VERY_WEAK) + + def test_dominance_mixture(self): + mixture = lp_solver.is_dominated( + 0, [[1., 1., 1.], [2., 0., 1.], [0., 2., 2.]], + 0, + lp_solver.DOMINANCE_WEAK, + return_mixture=True) + self.assertAlmostEqual(mixture[0], 0) + self.assertAlmostEqual(mixture[1], 0.5) + self.assertAlmostEqual(mixture[2], 0.5) + + def _checked_iterated_dominance(self, *args, **kwargs): + reduced_game, live_actions = lp_solver.iterated_dominance(*args, **kwargs) + if isinstance(reduced_game, pyspiel.MatrixGame): + payoffs_shape = [2, reduced_game.num_rows(), reduced_game.num_cols()] + else: + payoffs_shape = list(reduced_game.shape) + self.assertLen(live_actions, payoffs_shape[0]) + self.assertListEqual(payoffs_shape[1:], [ + np.sum(live_actions_for_player) + for live_actions_for_player in live_actions + ]) + return reduced_game, live_actions + + def test_iterated_dominance_prisoners_dilemma(self): + # find the strictly dominant (D, D) strategy + pd = pyspiel.load_matrix_game("matrix_pd") + pd_dom, pd_live = self._checked_iterated_dominance( + pd, lp_solver.DOMINANCE_STRICT) + self.assertEqual(pd_dom.num_rows(), 1) + self.assertEqual(pd_dom.num_cols(), 1) + self.assertEqual(pd_dom.row_action_name(0), "Defect") + self.assertEqual(pd_dom.col_action_name(0), "Defect") + self.assertListEqual(pd_live[0].tolist(), [False, True]) + self.assertListEqual(pd_live[1].tolist(), [False, True]) + + def test_iterated_dominance_auction(self): + # find a strategy through iterated dominance that's not strictly dominant + auction = pyspiel.extensive_to_matrix_game( + pyspiel.load_game("first_sealed_auction(max_value=3)")) + auction_dom, auction_live = self._checked_iterated_dominance( + auction, lp_solver.DOMINANCE_STRICT) + # there's just one non-dominated action + self.assertEqual(auction_dom.num_rows(), 1) + self.assertEqual(auction_dom.num_cols(), 1) + best_action = [ + auction.row_action_name(row) for row in range(auction.num_rows()) + ].index(auction_dom.row_action_name(0)) + self.assertTrue(auction_live[0][best_action]) + # other actions are all weakly but not all strictly dominated + self.assertNotIn(False, [ + lp_solver.is_dominated(action, auction, 0, lp_solver.DOMINANCE_WEAK) + for action in range(6) + if action != best_action + ]) + self.assertIn(False, [ + lp_solver.is_dominated(action, auction, 0, lp_solver.DOMINANCE_STRICT) + for action in range(6) + if action != best_action + ]) + + def test_iterated_dominance_ordering(self): + for _ in range(100): + game = np.random.randint(5, size=(2, 3, 3)) + unused_reduced_strict, live_strict = self._checked_iterated_dominance( + game, lp_solver.DOMINANCE_STRICT) + unused_reduced_weak, live_weak = self._checked_iterated_dominance( + game, lp_solver.DOMINANCE_WEAK) + unused_reduced_vweak, live_vweak = self._checked_iterated_dominance( + game, lp_solver.DOMINANCE_VERY_WEAK) + for player in range(2): + self.assertTrue((live_strict[player] >= live_weak[player]).all()) + self.assertTrue((live_strict[player] >= live_vweak[player]).all()) + self.assertIn(True, live_vweak[player]) + + def test_iterated_dominance_strict_invariance(self): + for _ in range(100): + game = np.random.randint(5, size=(3, 2, 2, 3)) + unused_reduced, live = self._checked_iterated_dominance( + game, lp_solver.DOMINANCE_STRICT) + perms = [np.random.permutation(size) for size in game.shape] + game_perm = game[tuple(np.meshgrid( + *perms, indexing="ij"))].transpose([0] + list(1 + perms[0])) + unused_reduced_perm, live_perm = self._checked_iterated_dominance( + game_perm, lp_solver.DOMINANCE_STRICT) + for player in range(3): + perm_player = perms[0][player] + self.assertListEqual(live_perm[player].tolist(), + live[perm_player][perms[1 + perm_player]].tolist()) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/matrix_nash.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/matrix_nash.py new file mode 100644 index 0000000..f20ae6d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/matrix_nash.py @@ -0,0 +1,142 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Find Nash equilibria for constant- or general-sum 2-player games. + +Non-matrix games are handled by computing the normal (bimatrix) form. +The algorithms used are: +* direct computation of pure equilibria. +* linear programming to find equilibria for constant-sum games. +* iterated dominance to reduce the action space. +* reverse search vertex enumeration (if using lrsnash) to find all general-sum + equilibria. +* support enumeration (if using nashpy) to find all general-sum equilibria. +* Lemke-Howson enumeration (if using nashpy) to find one general-sum + equilibrium. +The general-sum mixed-equilibrium algorithms are likely to work well for tens of +actions, but less likely to scale beyond that. +""" + +import fractions +import os +import subprocess +import tempfile +import warnings + +import nashpy +import numpy as np + + +@np.vectorize +def to_fraction_str(x, lrsnash_max_denom): + return str(fractions.Fraction(x).limit_denominator(lrsnash_max_denom)) + + +def lrs_solve(row_payoffs, col_payoffs, lrsnash_max_denom, lrsnash_path): + """Find all Nash equilibria using the lrsnash solver. + + `lrsnash` uses reverse search vertex enumeration on rational polytopes. + For more info, see: http://cgm.cs.mcgill.ca/~avis/C/lrslib/USERGUIDE.html#nash + + Args: + row_payoffs: payoffs for row player + col_payoffs: payoffs for column player + lrsnash_max_denom: maximum denominator + lrsnash_path: path for temporary files + + Yields: + (row_mixture, col_mixture), numpy vectors of float64s. + """ + num_rows, num_cols = row_payoffs.shape + game_file, game_file_path = tempfile.mkstemp() + try: + game_file = os.fdopen(game_file, "w") + + # write dimensions + game_file.write("%d %d\n\n" % (num_rows, num_cols)) + + # write row-player payoff matrix as fractions + for row in range(num_rows): + game_file.write( + " ".join(to_fraction_str(row_payoffs[row], lrsnash_max_denom)) + "\n") + game_file.write("\n") + + # write col-player payoff matrix as fractions + for row in range(num_rows): + game_file.write( + " ".join(to_fraction_str(col_payoffs[row], lrsnash_max_denom)) + "\n") + game_file.write("\n") + game_file.close() + lrs = subprocess.Popen([lrsnash_path or "lrsnash", "-s", game_file_path], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + col_mixtures = [] + for line in lrs.stdout: + if len(line) <= 1 or line[:1] == b"*": + continue + line = np.asarray( + [fractions.Fraction(x) for x in line.decode().split()], + dtype=np.float64, + ) + if line[0] == 2: # col-player + col_mixtures.append(line[1:-1]) + else: # row-player + row_mixture = line[1:-1] + # row-mixture forms a Nash with every col-mixture listed directly above + for col_mixture in col_mixtures: + yield (row_mixture, col_mixture) + col_mixtures = [] + finally: + os.remove(game_file_path) + + +def lemke_howson_solve(row_payoffs, col_payoffs): + """Find Nash equilibria using the Lemke-Howson algorithm. + + The algorithm is not guaranteed to find all equilibria. Also it can yield + wrong answers if the game is degenerate (but raises warnings in that case). + Args: + row_payoffs: payoffs for row player + col_payoffs: payoffs for column player + Yields: + (row_mixture, col_mixture), numpy vectors of float64s. + """ + + showwarning = warnings.showwarning + warned_degenerate = [False] + + def showwarning_check_degenerate(message, *args, **kwargs): + if "Your game could be degenerate." in str(message): + warned_degenerate[0] = True + showwarning(message, *args, **kwargs) + + try: + warnings.showwarning = showwarning_check_degenerate + for row_mixture, col_mixture in nashpy.Game( + row_payoffs, col_payoffs).lemke_howson_enumeration(): + if warned_degenerate[0]: + # attempt to discard obviously-wrong results + if (row_mixture.shape != row_payoffs.shape[:1] or + col_mixture.shape != row_payoffs.shape[1:]): + warnings.warn("Discarding ill-shaped solution.") + continue + if (not np.isfinite(row_mixture).all() or + not np.isfinite(col_mixture).all()): + warnings.warn("Discarding non-finite solution.") + continue + yield row_mixture, col_mixture + finally: + warnings.showwarning = showwarning diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mccfr.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mccfr.py new file mode 100644 index 0000000..bc253ac --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mccfr.py @@ -0,0 +1,131 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python base module for the implementations of Monte Carlo Counterfactual Regret Minimization.""" + +import numpy as np +from open_spiel.python import policy + +REGRET_INDEX = 0 +AVG_POLICY_INDEX = 1 + + +class AveragePolicy(policy.Policy): + """A policy object representing the average policy for MCCFR algorithms.""" + + def __init__(self, game, player_ids, infostates): + # Do not create a copy of the dictionary + # but work on the same object + super().__init__(game, player_ids) + self._infostates = infostates + + def action_probabilities(self, state, player_id=None): + """Returns the MCCFR average policy for a player in a state. + + If the policy is not defined for the provided state, a uniform + random policy is returned. + + Args: + state: A `pyspiel.State` object. + player_id: Optional, the player id for which we want an action. Optional + unless this is a simultaneous state at which multiple players can act. + + Returns: + A `dict` of `{action: probability}` for the specified player in the + supplied state. If the policy is defined for the state, this + will contain the average MCCFR strategy defined for that state. + Otherwise, it will contain all legal actions, each with the same + probability, equal to 1 / num_legal_actions. + """ + if player_id is None: + player_id = state.current_player() + legal_actions = state.legal_actions() + info_state_key = state.information_state_string(player_id) + retrieved_infostate = self._infostates.get(info_state_key, None) + if retrieved_infostate is None: + return {a: 1 / len(legal_actions) for a in legal_actions} + avstrat = ( + retrieved_infostate[AVG_POLICY_INDEX] / + retrieved_infostate[AVG_POLICY_INDEX].sum()) + return {legal_actions[i]: avstrat[i] for i in range(len(legal_actions))} + + +class MCCFRSolverBase(object): + """A base class for both outcome MCCFR and external MCCFR.""" + + def __init__(self, game): + self._game = game + self._infostates = {} # infostate keys -> [regrets, avg strat] + self._num_players = game.num_players() + + def _lookup_infostate_info(self, info_state_key, num_legal_actions): + """Looks up an information set table for the given key. + + Args: + info_state_key: information state key (string identifier). + num_legal_actions: number of legal actions at this information state. + + Returns: + A list of: + - the average regrets as a numpy array of shape [num_legal_actions] + - the average strategy as a numpy array of shape + [num_legal_actions]. + The average is weighted using `my_reach` + """ + retrieved_infostate = self._infostates.get(info_state_key, None) + if retrieved_infostate is not None: + return retrieved_infostate + + # Start with a small amount of regret and total accumulation, to give a + # uniform policy: this will get erased fast. + self._infostates[info_state_key] = [ + np.ones(num_legal_actions, dtype=np.float64) / 1e6, + np.ones(num_legal_actions, dtype=np.float64) / 1e6, + ] + return self._infostates[info_state_key] + + def _add_regret(self, info_state_key, action_idx, amount): + self._infostates[info_state_key][REGRET_INDEX][action_idx] += amount + + def _add_avstrat(self, info_state_key, action_idx, amount): + self._infostates[info_state_key][AVG_POLICY_INDEX][action_idx] += amount + + def average_policy(self): + """Computes the average policy, containing the policy for all players. + + Returns: + An average policy instance that should only be used during + the lifetime of solver object. + """ + return AveragePolicy(self._game, list(range(self._num_players)), + self._infostates) + + def _regret_matching(self, regrets, num_legal_actions): + """Applies regret matching to get a policy. + + Args: + regrets: numpy array of regrets for each action. + num_legal_actions: number of legal actions at this state. + + Returns: + numpy array of the policy indexed by the index of legal action in the + list. + """ + positive_regrets = np.maximum(regrets, + np.zeros(num_legal_actions, dtype=np.float64)) + sum_pos_regret = positive_regrets.sum() + if sum_pos_regret <= 0: + return np.ones(num_legal_actions, dtype=np.float64) / num_legal_actions + else: + return positive_regrets / sum_pos_regret diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mcts.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mcts.py new file mode 100644 index 0000000..070967a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mcts.py @@ -0,0 +1,450 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Monte-Carlo Tree Search algorithm for game play.""" + +import math +import time + +import numpy as np + +import pyspiel + + +class Evaluator(object): + """Abstract class representing an evaluation function for a game. + + The evaluation function takes in an intermediate state in the game and returns + an evaluation of that state, which should correlate with chances of winning + the game. It returns the evaluation from all player's perspectives. + """ + + def evaluate(self, state): + """Returns evaluation on given state.""" + raise NotImplementedError + + def prior(self, state): + """Returns a probability for each legal action in the given state.""" + raise NotImplementedError + + +class RandomRolloutEvaluator(Evaluator): + """A simple evaluator doing random rollouts. + + This evaluator returns the average outcome of playing random actions from the + given state until the end of the game. n_rollouts is the number of random + outcomes to be considered. + """ + + def __init__(self, n_rollouts=1, random_state=None): + self.n_rollouts = n_rollouts + self._random_state = random_state or np.random.RandomState() + + def evaluate(self, state): + """Returns evaluation on given state.""" + result = None + for _ in range(self.n_rollouts): + working_state = state.clone() + while not working_state.is_terminal(): + if working_state.is_chance_node(): + outcomes = working_state.chance_outcomes() + action_list, prob_list = zip(*outcomes) + action = self._random_state.choice(action_list, p=prob_list) + else: + action = self._random_state.choice(working_state.legal_actions()) + working_state.apply_action(action) + returns = np.array(working_state.returns()) + result = returns if result is None else result + returns + + return result / self.n_rollouts + + def prior(self, state): + """Returns equal probability for all actions.""" + if state.is_chance_node(): + return state.chance_outcomes() + else: + legal_actions = state.legal_actions(state.current_player()) + return [(action, 1.0 / len(legal_actions)) for action in legal_actions] + + +class SearchNode(object): + """A node in the search tree. + + A SearchNode represents a state and possible continuations from it. Each child + represents a possible action, and the expected result from doing so. + + Attributes: + action: The action from the parent node's perspective. Not important for the + root node, as the actions that lead to it are in the past. + player: Which player made this action. + prior: A prior probability for how likely this action will be selected. + explore_count: How many times this node was explored. + total_reward: The sum of rewards of rollouts through this node, from the + parent node's perspective. The average reward of this node is + `total_reward / explore_count` + outcome: The rewards for all players if this is a terminal node or the + subtree has been proven, otherwise None. + children: A list of SearchNodes representing the possible actions from this + node, along with their expected rewards. + """ + __slots__ = [ + "action", + "player", + "prior", + "explore_count", + "total_reward", + "outcome", + "children", + ] + + def __init__(self, action, player, prior): + self.action = action + self.player = player + self.prior = prior + self.explore_count = 0 + self.total_reward = 0.0 + self.outcome = None + self.children = [] + + def uct_value(self, parent_explore_count, uct_c): + """Returns the UCT value of child.""" + if self.outcome is not None: + return self.outcome[self.player] + + if self.explore_count == 0: + return float("inf") + + return self.total_reward / self.explore_count + uct_c * math.sqrt( + math.log(parent_explore_count) / self.explore_count) + + def puct_value(self, parent_explore_count, uct_c): + """Returns the PUCT value of child.""" + if self.outcome is not None: + return self.outcome[self.player] + + return ((self.explore_count and self.total_reward / self.explore_count) + + uct_c * self.prior * math.sqrt(parent_explore_count) / + (self.explore_count + 1)) + + def sort_key(self): + """Returns the best action from this node, either proven or most visited. + + This ordering leads to choosing: + - Highest proven score > 0 over anything else, including a promising but + unproven action. + - A proven draw only if it has higher exploration than others that are + uncertain, or the others are losses. + - Uncertain action with most exploration over loss of any difficulty + - Hardest loss if everything is a loss + - Highest expected reward if explore counts are equal (unlikely). + - Longest win, if multiple are proven (unlikely due to early stopping). + """ + return (0 if self.outcome is None else self.outcome[self.player], + self.explore_count, self.total_reward) + + def best_child(self): + """Returns the best child in order of the sort key.""" + return max(self.children, key=SearchNode.sort_key) + + def children_str(self, state=None): + """Returns the string representation of this node's children. + + They are ordered based on the sort key, so order of being chosen to play. + + Args: + state: A `pyspiel.State` object, to be used to convert the action id into + a human readable format. If None, the action integer id is used. + """ + return "\n".join([ + c.to_str(state) + for c in reversed(sorted(self.children, key=SearchNode.sort_key)) + ]) + + def to_str(self, state=None): + """Returns the string representation of this node. + + Args: + state: A `pyspiel.State` object, to be used to convert the action id into + a human readable format. If None, the action integer id is used. + """ + action = ( + state.action_to_string(state.current_player(), self.action) + if state and self.action is not None else str(self.action)) + return ("{:>6}: player: {}, prior: {:5.3f}, value: {:6.3f}, sims: {:5d}, " + "outcome: {}, {:3d} children").format( + action, self.player, self.prior, self.explore_count and + self.total_reward / self.explore_count, self.explore_count, + ("{:4.1f}".format(self.outcome[self.player]) + if self.outcome else "none"), len(self.children)) + + def __str__(self): + return self.to_str(None) + + +class MCTSBot(pyspiel.Bot): + """Bot that uses Monte-Carlo Tree Search algorithm.""" + + def __init__(self, + game, + uct_c, + max_simulations, + evaluator, + solve=True, + random_state=None, + child_selection_fn=SearchNode.uct_value, + dirichlet_noise=None, + verbose=False, + dont_return_chance_node=False): + """Initializes a MCTS Search algorithm in the form of a bot. + + In multiplayer games, or non-zero-sum games, the players will play the + greedy strategy. + + Args: + game: A pyspiel.Game to play. + uct_c: The exploration constant for UCT. + max_simulations: How many iterations of MCTS to perform. Each simulation + will result in one call to the evaluator. Memory usage should grow + linearly with simulations * branching factor. How many nodes in the + search tree should be evaluated. This is correlated with memory size and + tree depth. + evaluator: A `Evaluator` object to use to evaluate a leaf node. + solve: Whether to back up solved states. + random_state: An optional numpy RandomState to make it deterministic. + child_selection_fn: A function to select the child in the descent phase. + The default is UCT. + dirichlet_noise: A tuple of (epsilon, alpha) for adding dirichlet noise to + the policy at the root. This is from the alpha-zero paper. + verbose: Whether to print information about the search tree before + returning the action. Useful for confirming the search is working + sensibly. + dont_return_chance_node: If true, do not stop expanding at chance nodes. + Enabled for AlphaZero. + + Raises: + ValueError: if the game type isn't supported. + """ + pyspiel.Bot.__init__(self) + # Check that the game satisfies the conditions for this MCTS implemention. + game_type = game.get_type() + if game_type.reward_model != pyspiel.GameType.RewardModel.TERMINAL: + raise ValueError("Game must have terminal rewards.") + if game_type.dynamics != pyspiel.GameType.Dynamics.SEQUENTIAL: + raise ValueError("Game must have sequential turns.") + + self._game = game + self.uct_c = uct_c + self.max_simulations = max_simulations + self.evaluator = evaluator + self.verbose = verbose + self.solve = solve + self.max_utility = game.max_utility() + self._dirichlet_noise = dirichlet_noise + self._random_state = random_state or np.random.RandomState() + self._child_selection_fn = child_selection_fn + self.dont_return_chance_node = dont_return_chance_node + + def restart_at(self, state): + pass + + def step_with_policy(self, state): + """Returns bot's policy and action at given state.""" + t1 = time.time() + root = self.mcts_search(state) + + best = root.best_child() + + if self.verbose: + seconds = time.time() - t1 + print("Finished {} sims in {:.3f} secs, {:.1f} sims/s".format( + root.explore_count, seconds, root.explore_count / seconds)) + print("Root:") + print(root.to_str(state)) + print("Children:") + print(root.children_str(state)) + if best.children: + chosen_state = state.clone() + chosen_state.apply_action(best.action) + print("Children of chosen:") + print(best.children_str(chosen_state)) + + mcts_action = best.action + + policy = [(action, (1.0 if action == mcts_action else 0.0)) + for action in state.legal_actions(state.current_player())] + + return policy, mcts_action + + def step(self, state): + return self.step_with_policy(state)[1] + + def _apply_tree_policy(self, root, state): + """Applies the UCT policy to play the game until reaching a leaf node. + + A leaf node is defined as a node that is terminal or has not been evaluated + yet. If it reaches a node that has been evaluated before but hasn't been + expanded, then expand it's children and continue. + + Args: + root: The root node in the search tree. + state: The state of the game at the root node. + + Returns: + visit_path: A list of nodes descending from the root node to a leaf node. + working_state: The state of the game at the leaf node. + """ + visit_path = [root] + working_state = state.clone() + current_node = root + while (not working_state.is_terminal() and + current_node.explore_count > 0) or ( + working_state.is_chance_node() and self.dont_return_chance_node): + if not current_node.children: + # For a new node, initialize its state, then choose a child as normal. + legal_actions = self.evaluator.prior(working_state) + if current_node is root and self._dirichlet_noise: + epsilon, alpha = self._dirichlet_noise + noise = self._random_state.dirichlet([alpha] * len(legal_actions)) + legal_actions = [(a, (1 - epsilon) * p + epsilon * n) + for (a, p), n in zip(legal_actions, noise)] + # Reduce bias from move generation order. + self._random_state.shuffle(legal_actions) + player = working_state.current_player() + current_node.children = [ + SearchNode(action, player, prior) for action, prior in legal_actions + ] + + if working_state.is_chance_node(): + # For chance nodes, rollout according to chance node's probability + # distribution + outcomes = working_state.chance_outcomes() + action_list, prob_list = zip(*outcomes) + action = self._random_state.choice(action_list, p=prob_list) + chosen_child = next( + c for c in current_node.children if c.action == action) + else: + # Otherwise choose node with largest UCT value + chosen_child = max( + current_node.children, + key=lambda c: self._child_selection_fn( # pylint: disable=g-long-lambda + c, current_node.explore_count, self.uct_c)) + + working_state.apply_action(chosen_child.action) + current_node = chosen_child + visit_path.append(current_node) + + return visit_path, working_state + + def mcts_search(self, state): + """A vanilla Monte-Carlo Tree Search algorithm. + + This algorithm searches the game tree from the given state. + At the leaf, the evaluator is called if the game state is not terminal. + A total of max_simulations states are explored. + + At every node, the algorithm chooses the action with the highest PUCT value, + defined as: `Q/N + c * prior * sqrt(parent_N) / N`, where Q is the total + reward after the action, and N is the number of times the action was + explored in this position. The input parameter c controls the balance + between exploration and exploitation; higher values of c encourage + exploration of under-explored nodes. Unseen actions are always explored + first. + + At the end of the search, the chosen action is the action that has been + explored most often. This is the action that is returned. + + This implementation supports sequential n-player games, with or without + chance nodes. All players maximize their own reward and ignore the other + players' rewards. This corresponds to max^n for n-player games. It is the + norm for zero-sum games, but doesn't have any special handling for + non-zero-sum games. It doesn't have any special handling for imperfect + information games. + + The implementation also supports backing up solved states, i.e. MCTS-Solver. + The implementation is general in that it is based on a max^n backup (each + player greedily chooses their maximum among proven children values, or there + exists one child whose proven value is game.max_utility()), so it will work + for multiplayer, general-sum, and arbitrary payoff games (not just win/loss/ + draw games). Also chance nodes are considered proven only if all children + have the same value. + + Some references: + - Sturtevant, An Analysis of UCT in Multi-Player Games, 2008, + https://web.cs.du.edu/~sturtevant/papers/multi-player_UCT.pdf + - Nijssen, Monte-Carlo Tree Search for Multi-Player Games, 2013, + https://project.dke.maastrichtuniversity.nl/games/files/phd/Nijssen_thesis.pdf + - Silver, AlphaGo Zero: Starting from scratch, 2017 + https://deepmind.com/blog/article/alphago-zero-starting-scratch + - Winands, Bjornsson, and Saito, "Monte-Carlo Tree Search Solver", 2008. + https://dke.maastrichtuniversity.nl/m.winands/documents/uctloa.pdf + + Arguments: + state: pyspiel.State object, state to search from + + Returns: + The most visited move from the root node. + """ + root = SearchNode(None, state.current_player(), 1) + for _ in range(self.max_simulations): + visit_path, working_state = self._apply_tree_policy(root, state) + if working_state.is_terminal(): + returns = working_state.returns() + visit_path[-1].outcome = returns + solved = self.solve + else: + returns = self.evaluator.evaluate(working_state) + solved = False + + while visit_path: + # For chance nodes, walk up the tree to find the decision-maker. + decision_node_idx = -1 + while visit_path[decision_node_idx].player == pyspiel.PlayerId.CHANCE: + decision_node_idx -= 1 + # Chance node targets are for the respective decision-maker. + target_return = returns[visit_path[decision_node_idx].player] + node = visit_path.pop() + node.total_reward += target_return + node.explore_count += 1 + + if solved and node.children: + player = node.children[0].player + if player == pyspiel.PlayerId.CHANCE: + # Only back up chance nodes if all have the same outcome. + # An alternative would be to back up the weighted average of + # outcomes if all children are solved, but that is less clear. + outcome = node.children[0].outcome + if (outcome is not None and + all(np.array_equal(c.outcome, outcome) for c in node.children)): + node.outcome = outcome + else: + solved = False + else: + # If any have max utility (won?), or all children are solved, + # choose the one best for the player choosing. + best = None + all_solved = True + for child in node.children: + if child.outcome is None: + all_solved = False + elif best is None or child.outcome[player] > best.outcome[player]: + best = child + if (best is not None and + (all_solved or best.outcome[player] == self.max_utility)): + node.outcome = best.outcome + else: + solved = False + if root.outcome is not None: + break + + return root diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mcts_agent.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mcts_agent.py new file mode 100644 index 0000000..e3e6912 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mcts_agent.py @@ -0,0 +1,49 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""An RL agent wrapper for the MCTS bot.""" + +import numpy as np +from open_spiel.python import rl_agent +import pyspiel + + +class MCTSAgent(rl_agent.AbstractAgent): + """MCTS agent class. + + Important note: this agent requires the environment to provide the full state + in its TimeStep objects. Hence, the environment must be created with the + use_full_state flag set to True, and the state must be serializable. + """ + + def __init__(self, player_id, num_actions, mcts_bot, name="mcts_agent"): + assert num_actions > 0 + self._player_id = player_id + self._mcts_bot = mcts_bot + self._num_actions = num_actions + + def step(self, time_step, is_evaluation=False): + # If it is the end of the episode, don't select an action. + if time_step.last(): + return + + assert "serialized_state" in time_step.observations + _, state = pyspiel.deserialize_game_and_state( + time_step.observations["serialized_state"]) + + # Call the MCTS bot's step to get the action. + probs = np.zeros(self._num_actions) + action = self._mcts_bot.step(state) + probs[action] = 1.0 + + return rl_agent.StepOutput(action=action, probs=probs) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mcts_agent_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mcts_agent_test.py new file mode 100644 index 0000000..bb45735 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mcts_agent_test.py @@ -0,0 +1,50 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test the MCTS Agent.""" + +from absl.testing import absltest +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import mcts +from open_spiel.python.algorithms import mcts_agent + + +class MCTSAgentTest(absltest.TestCase): + + def test_tic_tac_toe_episode(self): + env = rl_environment.Environment("tic_tac_toe", include_full_state=True) + num_players = env.num_players + num_actions = env.action_spec()["num_actions"] + + # Create the MCTS bot. Both agents can share the same bot in this case since + # there is no state kept between searches. See mcts.py for more info about + # the arguments. + mcts_bot = mcts.MCTSBot(env.game, 1.5, 100, mcts.RandomRolloutEvaluator()) + + agents = [ + mcts_agent.MCTSAgent(player_id=idx, num_actions=num_actions, + mcts_bot=mcts_bot) + for idx in range(num_players) + ] + + time_step = env.reset() + while not time_step.last(): + player_id = time_step.observations["current_player"] + agent_output = agents[player_id].step(time_step) + time_step = env.step([agent_output.action]) + for agent in agents: + agent.step(time_step) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mcts_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mcts_test.py new file mode 100644 index 0000000..381f59a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mcts_test.py @@ -0,0 +1,216 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.mcts.""" + +import math +import random + +from absl.testing import absltest +import numpy as np + +from open_spiel.python.algorithms import evaluate_bots +from open_spiel.python.algorithms import mcts +import pyspiel + +UCT_C = math.sqrt(2) + + +def _get_action(state, action_str): + for action in state.legal_actions(): + if action_str == state.action_to_string(state.current_player(), action): + return action + raise ValueError("invalid action string: {}".format(action_str)) + + +def search_tic_tac_toe_state(initial_actions): + game = pyspiel.load_game("tic_tac_toe") + state = game.new_initial_state() + for action_str in initial_actions.split(" "): + state.apply_action(_get_action(state, action_str)) + rng = np.random.RandomState(42) + bot = mcts.MCTSBot( + game, + UCT_C, + max_simulations=10000, + solve=True, + random_state=rng, + evaluator=mcts.RandomRolloutEvaluator(n_rollouts=20, random_state=rng)) + return bot.mcts_search(state), state + + +def make_node(action, player=0, prior=1, **kwargs): + node = mcts.SearchNode(action, player, prior) + for k, v in kwargs.items(): + setattr(node, k, v) + return node + + +class MctsBotTest(absltest.TestCase): + + def assertTTTStateStr(self, state, expected): + expected = expected.replace(" ", "").strip() + self.assertEqual(str(state), expected) + + def test_can_play_tic_tac_toe(self): + game = pyspiel.load_game("tic_tac_toe") + max_simulations = 100 + evaluator = mcts.RandomRolloutEvaluator(n_rollouts=20) + bots = [ + mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), + mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), + ] + v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) + self.assertEqual(v[0] + v[1], 0) + + def test_can_play_both_sides(self): + game = pyspiel.load_game("tic_tac_toe") + bot = mcts.MCTSBot(game, UCT_C, max_simulations=100, + evaluator=mcts.RandomRolloutEvaluator(n_rollouts=20)) + bots = [bot, bot] + v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) + self.assertEqual(v[0] + v[1], 0) + + def test_can_play_single_player(self): + game = pyspiel.load_game("catch") + max_simulations = 100 + evaluator = mcts.RandomRolloutEvaluator(n_rollouts=20) + bots = [mcts.MCTSBot(game, UCT_C, max_simulations, evaluator)] + v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) + self.assertGreater(v[0], 0) + + def test_throws_on_simultaneous_game(self): + game = pyspiel.load_game("matrix_mp") + evaluator = mcts.RandomRolloutEvaluator(n_rollouts=20) + with self.assertRaises(ValueError): + mcts.MCTSBot(game, UCT_C, max_simulations=100, evaluator=evaluator) + + def test_can_play_three_player_stochastic_games(self): + game = pyspiel.load_game("pig(players=3,winscore=20,horizon=30)") + max_simulations = 100 + evaluator = mcts.RandomRolloutEvaluator(n_rollouts=5) + bots = [ + mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), + mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), + mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), + ] + v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) + self.assertEqual(sum(v), 0) + + def test_solve_draw(self): + root, state = search_tic_tac_toe_state("x(1,1) o(0,0) x(2,2)") + self.assertTTTStateStr(state, """ + o.. + .x. + ..x + """) + self.assertEqual(root.outcome[root.player], 0) + for c in root.children: + self.assertLessEqual(c.outcome[c.player], 0) # No winning moves. + + best = root.best_child() + self.assertEqual(best.outcome[best.player], 0) + self.assertIn( + state.action_to_string(best.player, best.action), + ("o(0,2)", "o(2,0)")) # All others lose. + + def test_solve_loss(self): + root, state = search_tic_tac_toe_state("x(1,1) o(0,0) x(2,2) o(0,1) x(0,2)") + self.assertTTTStateStr(state, """ + oox + .x. + ..x + """) + self.assertEqual(root.outcome[root.player], -1) + for c in root.children: + self.assertEqual(c.outcome[c.player], -1) # All losses. + + def test_solve_win(self): + root, state = search_tic_tac_toe_state("x(0,1) o(2,2)") + self.assertTTTStateStr(state, """ + .x. + ... + ..o + """) + self.assertEqual(root.outcome[root.player], 1) + best = root.best_child() + self.assertEqual(best.outcome[best.player], 1) + self.assertEqual(state.action_to_string(best.player, best.action), "x(0,2)") + + def assertBestChild(self, choice, children): + # If this causes flakiness, the key in `SearchNode.best_child` is bad. + random.shuffle(children) + root = make_node(-1, children=children) + self.assertEqual(root.best_child().action, choice) + + def test_choose_most_visited_when_not_solved(self): + self.assertBestChild(0, [ + make_node(0, explore_count=50, total_reward=30), + make_node(1, explore_count=40, total_reward=40), + ]) + + def test_choose_win_over_most_visited(self): + self.assertBestChild(1, [ + make_node(0, explore_count=50, total_reward=30), + make_node(1, explore_count=40, total_reward=40, outcome=[1]), + ]) + + def test_choose_best_over_good(self): + self.assertBestChild(1, [ + make_node(0, explore_count=50, total_reward=30, outcome=[0.5]), + make_node(1, explore_count=40, total_reward=40, outcome=[0.8]), + ]) + + def test_choose_bad_over_worst(self): + self.assertBestChild(0, [ + make_node(0, explore_count=50, total_reward=30, outcome=[-0.5]), + make_node(1, explore_count=40, total_reward=40, outcome=[-0.8]), + ]) + + def test_choose_positive_reward_over_promising(self): + self.assertBestChild( + 1, + [ + make_node(0, explore_count=50, total_reward=40), # more promising + make_node(1, explore_count=10, total_reward=1, outcome=[0.1 + ]), # solved + ]) + + def test_choose_most_visited_over_loss(self): + self.assertBestChild(0, [ + make_node(0, explore_count=50, total_reward=30), + make_node(1, explore_count=40, total_reward=40, outcome=[-1]), + ]) + + def test_choose_most_visited_over_draw(self): + self.assertBestChild(0, [ + make_node(0, explore_count=50, total_reward=30), + make_node(1, explore_count=40, total_reward=40, outcome=[0]), + ]) + + def test_choose_uncertainty_over_most_visited_loss(self): + self.assertBestChild(1, [ + make_node(0, explore_count=50, total_reward=30, outcome=[-1]), + make_node(1, explore_count=40, total_reward=40), + ]) + + def test_choose_slowest_loss(self): + self.assertBestChild(1, [ + make_node(0, explore_count=50, total_reward=10, outcome=[-1]), + make_node(1, explore_count=60, total_reward=15, outcome=[-1]), + ]) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/minimax.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/minimax.py new file mode 100644 index 0000000..d0ebd9e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/minimax.py @@ -0,0 +1,206 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Implements the min-max algorithm with alpha-beta pruning. + +Solves perfect play for deterministic, 2-players, perfect-information 0-sum +games. + +See for example https://en.wikipedia.org/wiki/Alpha-beta_pruning +""" + +import pyspiel + + +def _alpha_beta(state, depth, alpha, beta, value_function, + maximizing_player_id): + """An alpha-beta algorithm. + + Implements a min-max algorithm with alpha-beta pruning. + See for example https://en.wikipedia.org/wiki/Alpha-beta_pruning + + Arguments: + state: The current state node of the game. + depth: The maximum depth for the min/max search. + alpha: best value that the MAX player can guarantee (if the value is <= than + alpha, the MAX player will avoid it). + beta: the best value that the MIN currently can guarantee (if the value is + >= than beta, the MIN player will avoid it). + value_function: An optional function mapping a Spiel `State` to a numerical + value, to be used as the value of the maximizing player for a node when we + reach `maximum_depth` and the node is not terminal. + maximizing_player_id: The id of the MAX player. The other player is assumed + to be MIN. + + Returns: + A tuple of the optimal value of the sub-game starting in state + (given alpha/beta) and the move that achieved it. + + Raises: + NotImplementedError: If we reach the maximum depth. Given we have no value + function for a non-terminal node, we cannot break early. + """ + if state.is_terminal(): + return state.player_return(maximizing_player_id), None + + if depth == 0 and value_function is None: + raise NotImplementedError( + "We assume we can walk the full depth of the tree. " + "Try increasing the maximum_depth or provide a value_function.") + if depth == 0: + return value_function(state), None + + player = state.current_player() + best_action = -1 + if player == maximizing_player_id: + value = -float("inf") + for action in state.legal_actions(): + child_state = state.clone() + child_state.apply_action(action) + child_value, _ = _alpha_beta(child_state, depth - 1, alpha, beta, + value_function, maximizing_player_id) + if child_value > value: + value = child_value + best_action = action + alpha = max(alpha, value) + if alpha >= beta: + break # beta cut-off + return value, best_action + else: + value = float("inf") + for action in state.legal_actions(): + child_state = state.clone() + child_state.apply_action(action) + child_value, _ = _alpha_beta(child_state, depth - 1, alpha, beta, + value_function, maximizing_player_id) + if child_value < value: + value = child_value + best_action = action + beta = min(beta, value) + if alpha >= beta: + break # alpha cut-off + return value, best_action + + +def alpha_beta_search(game, + state=None, + value_function=None, + maximum_depth=30, + maximizing_player_id=None): + """Solves deterministic, 2-players, perfect-information 0-sum game. + + For small games only! Please use keyword arguments for optional arguments. + + Arguments: + game: The game to analyze, as returned by `load_game`. + state: The state to run from, as returned by `game.new_initial_state()`. If + none is specified, then the initial state is assumed. + value_function: An optional function mapping a Spiel `State` to a numerical + value, to be used as the value of the maximizing player for a node when we + reach `maximum_depth` and the node is not terminal. + maximum_depth: The maximum depth to search over. When this depth is reached, + an exception will be raised. + maximizing_player_id: The id of the MAX player. The other player is assumed + to be MIN. The default (None) will suppose the player at the root to be + the MAX player. + + Returns: + A tuple containing the value of the game for the maximizing player when + both player play optimally, and the action that achieves this value. + """ + game_info = game.get_type() + + if game.num_players() != 2: + raise ValueError("Game must be a 2-player game") + if game_info.chance_mode != pyspiel.GameType.ChanceMode.DETERMINISTIC: + raise ValueError("The game must be a Deterministic one, not {}".format( + game.chance_mode)) + if game_info.information != pyspiel.GameType.Information.PERFECT_INFORMATION: + raise ValueError( + "The game must be a perfect information one, not {}".format( + game.information)) + if game_info.dynamics != pyspiel.GameType.Dynamics.SEQUENTIAL: + raise ValueError("The game must be turn-based, not {}".format( + game.dynamics)) + if game_info.utility != pyspiel.GameType.Utility.ZERO_SUM: + raise ValueError("The game must be 0-sum, not {}".format(game.utility)) + + if state is None: + state = game.new_initial_state() + if maximizing_player_id is None: + maximizing_player_id = state.current_player() + return _alpha_beta( + state.clone(), + maximum_depth, + alpha=-float("inf"), + beta=float("inf"), + value_function=value_function, + maximizing_player_id=maximizing_player_id) + + +def expectiminimax(state, depth, value_function, maximizing_player_id): + """Runs expectiminimax until the specified depth. + + See https://en.wikipedia.org/wiki/Expectiminimax for details. + + Arguments: + state: The state to start the search from. + depth: The depth of the search (not counting chance nodes). + value_function: A value function, taking in a state and returning a value, + in terms of the maximizing_player_id. + maximizing_player_id: The player running the search (current player at root + of the search tree). + + Returns: + A tuple (value, best_action) representing the value to the maximizing player + and the best action that achieves that value. None is returned as the best + action at chance nodes, the depth limit, and terminals. + """ + if state.is_terminal(): + return state.player_return(maximizing_player_id), None + + if depth == 0: + return value_function(state), None + + if state.is_chance_node(): + value = 0 + for outcome, prob in state.chance_outcomes(): + child_state = state.clone() + child_state.apply_action(outcome) + child_value, _ = expectiminimax(child_state, depth, value_function, + maximizing_player_id) + value += prob * child_value + return value, None + elif state.current_player() == maximizing_player_id: + value = -float("inf") + for action in state.legal_actions(): + child_state = state.clone() + child_state.apply_action(action) + child_value, _ = expectiminimax(child_state, depth - 1, value_function, + maximizing_player_id) + if child_value > value: + value = child_value + best_action = action + return value, best_action + else: + value = float("inf") + for action in state.legal_actions(): + child_state = state.clone() + child_state.apply_action(action) + child_value, _ = expectiminimax(child_state, depth - 1, value_function, + maximizing_player_id) + if child_value < value: + value = child_value + best_action = action + return value, best_action diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/minimax_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/minimax_test.py new file mode 100644 index 0000000..a51ff39 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/minimax_test.py @@ -0,0 +1,69 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.minimax.""" + +from absl.testing import absltest + +from open_spiel.python.algorithms import minimax +import pyspiel + + +class MinimaxTest(absltest.TestCase): + + def test_compute_game_value(self): + tic_tac_toe = pyspiel.load_game("tic_tac_toe") + + game_score, _ = minimax.alpha_beta_search(tic_tac_toe) + self.assertEqual(0., game_score) + + def test_compute_game_value_with_evaluation_function(self): + # We only check it runs + tic_tac_toe = pyspiel.load_game("tic_tac_toe") + + game_score, _ = minimax.alpha_beta_search( + tic_tac_toe, value_function=lambda x: 0, maximum_depth=1) + self.assertEqual(0., game_score) + + def test_win(self): + tic_tac_toe = pyspiel.load_game("tic_tac_toe") + state = tic_tac_toe.new_initial_state() + + # Construct: + # .o. + # .x. + # ... + state.apply_action(4) + state.apply_action(1) + game_score, _ = minimax.alpha_beta_search(tic_tac_toe, state=state) + self.assertEqual(1., game_score) + + def test_loss(self): + tic_tac_toe = pyspiel.load_game("tic_tac_toe") + state = tic_tac_toe.new_initial_state() + + # Construct: + # ... + # xox + # ..o + state.apply_action(5) + state.apply_action(4) + state.apply_action(3) + state.apply_action(8) + game_score, _ = minimax.alpha_beta_search(tic_tac_toe, state=state) + self.assertEqual(-1., game_score) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mip_nash.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mip_nash.py new file mode 100644 index 0000000..0b857c9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mip_nash.py @@ -0,0 +1,147 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MIP-Nash. + +Based on the first formulation of + https://dl.acm.org/doi/10.5555/1619410.1619413. +Compute optimal Nash equilibrium of two-player general-sum games +by solving a mixed-integer programming problem. +""" + + +import cvxpy as cp +import numpy as np +from open_spiel.python.algorithms.projected_replicator_dynamics import _simplex_projection +from open_spiel.python.egt.utils import game_payoffs_array + + +def mip_nash(game, objective, solver='GLPK_MI'): + """Solves for the optimal Nash for two-player general-sum games. + + Using mixed-integer programming: + min f(x_0, x_1, p_mat) + s.t. + (u_0, u_1 are Nash payoffs variables of player 0 and 1) + p_mat[0] * x_1 <= u_0 + x_0^T*p_mat[1] <= u_1 + (if a pure strategy is in the support then its payoff is Nash payoff) + u_0 - p_mat[0] * x_1 <= u_max_0 * b_0 + u_1 - x_0^T*p_mat[1] <= u_max_1 * b_1 + (if a pure strategy is not in the support its probability mass is 0) + x_0 <= 1 - b_0 + x_1 <= 1 - b_1 + (probability constraints) + x_0 >= 0 + 1^T * x_0 = 1 + x_1 >= 0 + 1^T * x_1 = 1 + for all n, b_0[n] in {0, 1}, + for all m, b_1[m] in {0, 1}, + u_max_0, u_max_1 are the maximum payoff differences of player 0 and 1. + Note: this formulation is a basic one that may only work well + for simple objective function or low-dimensional inputs. + GLPK_MI solver only handles linear objective. + To handle nonlinear and high-dimensional cases, + it is recommended to use advance solvers such as GUROBI, + or use a piecewise linear approximation of the objective. + Args: + game: a pyspiel matrix game object + objective: a string representing the objective (e.g., MAX_SOCIAL_WELFARE) + solver: the mixed-integer solver used by cvxpy + + Returns: + optimal Nash (x_0, x_1) + """ + + p_mat = game_payoffs_array(game) + if len(p_mat) != 2: + raise ValueError('MIP-Nash only works for two players.') + + assert len(p_mat) == 2 + assert p_mat[0].shape == p_mat[1].shape + + (m_0, m_1) = p_mat[0].shape + + u_max_0 = np.max(p_mat[0]) - np.min(p_mat[0]) + u_max_1 = np.max(p_mat[1]) - np.min(p_mat[1]) + + x_0 = cp.Variable(m_0) + x_1 = cp.Variable(m_1) + u_0 = cp.Variable(1) + u_1 = cp.Variable(1) + b_0 = cp.Variable(m_0, boolean=True) + b_1 = cp.Variable(m_1, boolean=True) + + u_m = p_mat[0] @ x_1 + u_n = x_0 @ p_mat[1] + + # probabilities constraints + constraints = [x_0 >= 0, x_1 >= 0, cp.sum(x_0) == 1, cp.sum(x_1) == 1] + # support constraints + constraints.extend([u_m <= u_0, u_0 - u_m <= u_max_0 * b_0, x_0 <= 1 - b_0]) + constraints.extend([u_n <= u_1, u_1 - u_n <= u_max_1 * b_1, x_1 <= 1 - b_1]) + + variables = { + 'x_0': x_0, + 'x_1': x_1, + 'u_0': u_0, + 'u_1': u_1, + 'b_0': b_0, + 'b_1': b_1, + 'p_mat': p_mat, + } + + obj = TWO_PLAYER_OBJECTIVE[objective](variables) + prob = cp.Problem(obj, constraints) + prob.solve(solver=solver) + + return _simplex_projection(x_0.value.reshape(-1)), _simplex_projection( + x_1.value.reshape(-1) + ) + + +def max_social_welfare_two_player(variables): + """Max social welfare objective.""" + return cp.Maximize(variables['u_0'] + variables['u_1']) + + +def min_social_welfare_two_player(variables): + """Min social welfare objective.""" + return cp.Minimize(variables['u_0'] + variables['u_1']) + + +def max_support_two_player(variables): + """Max support objective.""" + return cp.Minimize(cp.sum(variables['b_0']) + cp.sum(variables['b_1'])) + + +def min_support_two_player(variables): + """Min support objective.""" + return cp.Maximize(cp.sum(variables['b_0']) + cp.sum(variables['b_1'])) + + +def max_gini_two_player(variables): + """Max gini objective.""" + return cp.Minimize( + cp.sum(cp.square(variables['x_0'])) + cp.sum(cp.square(variables['x_1'])) + ) + + +TWO_PLAYER_OBJECTIVE = { + 'MAX_SOCIAL_WELFARE': max_social_welfare_two_player, + 'MIN_SOCIAL_WELFARE': min_social_welfare_two_player, + 'MAX_SUPPORT': max_support_two_player, + 'MIN_SUPPORT': min_support_two_player, + 'MAX_GINI': max_gini_two_player, +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mip_nash_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mip_nash_test.py new file mode 100644 index 0000000..84036f9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mip_nash_test.py @@ -0,0 +1,52 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for open_spiel.python.algorithms.mip_nash.""" + +from absl.testing import absltest +import numpy as np + +from open_spiel.python.algorithms.mip_nash import mip_nash +import pyspiel + + +class MIPNash(absltest.TestCase): + + def test_simple_games(self): + # prisoners' dilemma + pd_game = pyspiel.create_matrix_game( + [[-2.0, -10.0], [0.0, -5.0]], [[-2.0, 0.0], [-10.0, -5.0]] + ) + + pd_eq = (np.array([0, 1]), np.array([0, 1])) + + computed_eq = mip_nash(pd_game, objective="MAX_SOCIAL_WELFARE") + with self.subTest("pd"): + np.testing.assert_array_almost_equal(computed_eq[0], pd_eq[0]) + np.testing.assert_array_almost_equal(computed_eq[1], pd_eq[1]) + + # stag hunt + sh_game = pyspiel.create_matrix_game( + [[10.0, 1.0], [8.0, 5.0]], [[10.0, 8.0], [1.0, 5.0]] + ) + + sh_eq = (np.array([1, 0]), np.array([1, 0])) + + computed_eq = mip_nash(sh_game, objective="MAX_SOCIAL_WELFARE") + with self.subTest("sh"): + np.testing.assert_array_almost_equal(computed_eq[0], sh_eq[0]) + np.testing.assert_array_almost_equal(computed_eq[1], sh_eq[1]) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mmd_dilated.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mmd_dilated.py new file mode 100644 index 0000000..f716092 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mmd_dilated.py @@ -0,0 +1,408 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""Python implementation of the magnetic mirror descent (MMD) algorithm. + +The algorithm operated over the sequence-from with dilated entropy. + +See https://arxiv.org/abs/2206.05825. + +One iteration of MMD consists of: +1) Compute gradients of dilated entropy + and payoffs for current sequence form policies. +2) Compute behavioural form policy starting from the bottom + of the tree and updating gradients of parent nodes along the way. +3) Convert behavioural form policy to equivalent sequence form policy. + +The last sequence form policy converges linearly (exponentially fast) +to a \alpha-reduced normal-form QRE. +""" + +import copy +import warnings +import numpy as np +from scipy import stats as scipy_stats + +from open_spiel.python import policy +from open_spiel.python.algorithms import sequence_form_utils as utils +import pyspiel + + +def neg_entropy(probs): + return -scipy_stats.entropy(probs) + + +def softmax(x): + unnormalized = np.exp(x - np.max(x)) + return unnormalized / np.sum(unnormalized) + + +def divergence(x, y, psi_x, psi_y, grad_psi_y): + """Compute Bregman divergence between x and y, B_psi(x;y). + + Args: + x: Numpy array. + y: Numpy array. + psi_x: Value of psi evaluated at x. + psi_y: Value of psi evaluated at y. + grad_psi_y: Gradient of psi evaluated at y. + + Returns: + Scalar. + """ + return psi_x - psi_y - np.dot(grad_psi_y, x - y) + + +def dilated_dgf_divergence(mmd_1, mmd_2): + """Bregman divergence between two MMDDilatedEnt objects. + + The value is equivalent to a sum of two Bregman divergences + over the sequence form, one for each player. + + Args: + mmd_1: MMDDilatedEnt Object + mmd_2: MMDDilatedEnt Object + + Returns: + Scalar. + """ + + dgf_values = [mmd_1.dgf_eval(), mmd_2.dgf_eval()] + dgf_grads = mmd_2.dgf_grads() + div = 0 + for player in range(2): + div += divergence(mmd_1.sequences[player], mmd_2.sequences[player], + dgf_values[0][player], dgf_values[1][player], + dgf_grads[player]) + return div + + +class MMDDilatedEnt(object): + r"""Implements Magnetic Mirror Descent (MMD) with Dilated Entropy. + + The implementation uses the sequence form representation. + + The policies converge to a \alpha-reduced normal form QRE of a + two-player zero-sum extensive-form game. If \alpha is set + to zero then the method is equivalent to mirror descent ascent + over the sequence form with dilated entropy and the policies + will converge on average to a nash equilibrium with + the appropriate stepsize schedule (or approximate equilirbrium + for fixed stepsize). + + The main iteration loop is implemented in `update_sequences`: + + ```python + game = pyspiel.load_game("game_name") + mmd = MMDDilatedEnt(game, alpha=0.1) + for i in range(num_iterations): + mmd.update_sequences() + ``` + The gap in the regularized game (i.e. 2x exploitability) converges + to zero and can be computed: + + ```python + gap = mmd.get_gap() + ``` + The policy (i.e. behavioural form policy) can be retrieved: + ```python + policies = mmd.get_policies() + ``` + + The average sequences and policies can be retrieved: + + ```python + avg_sequences = mmd.get_avg_sequences() + avg_policies = mmd.get_avg_policies() + ``` + + """ + + def __init__(self, game, alpha, stepsize=None): + """Initialize the solver object. + + Args: + game: a zeros-um spiel game with two players. + alpha: weight of dilated entropy regularization. If alpha > 0 MMD + will converge to an alpha-QRE. If alpha = 0 mmd will converge to + Nash on average. + stepsize: MMD stepsize. Will be set automatically if None. + """ + assert game.num_players() == 2 + assert game.get_type().utility == pyspiel.GameType.Utility.ZERO_SUM + assert game.get_type().dynamics == pyspiel.GameType.Dynamics.SEQUENTIAL + assert (game.get_type().chance_mode + == pyspiel.GameType.ChanceMode.DETERMINISTIC or + game.get_type().chance_mode + == pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC) + assert alpha >= 0 + + npl = game.num_players() + self.empy_state_action_keys = [ + f"***EMPTY_INFOSET_ACTION_P{player}***" for player in range(npl) + ] + self.empty_infoset_keys = [ + f"***EMPTY_INFOSET_P{player}***" for player in range(npl) + ] + + self.game = game + self.alpha = float(alpha) + (self.infosets, self.infoset_actions_to_seq, self.infoset_action_maps, + self.infoset_parent_map, self.payoff_mat, + self.infoset_actions_children) = utils.construct_vars(game) + self.payoff_mat = -self.payoff_mat[0] # payoff_mat is for min player + + if stepsize is not None: + self.stepsize = stepsize + else: + self.stepsize = self.alpha / (np.max(np.abs(self.payoff_mat))**2) + + if self.stepsize == 0.: + warnings.warn("MMD stepsize is 0, probably because alpha = 0.") + + self.sequences = utils.uniform_random_seq(game, self.infoset_actions_to_seq) + self.avg_sequences = copy.deepcopy(self.sequences) + self.iteration_count = 1 + + def get_parent_seq(self, player, infostate): + """Looks up the parent sequence value for a given infostate. + + Args: + player: player number, either 0 or 1. + infostate: infostate id string. + + Returns: + Scalar. + """ + parent_isa_key = self.infoset_parent_map[player][infostate] + seq_id = self.infoset_actions_to_seq[player][parent_isa_key] + parent_seq = self.sequences[player][seq_id] + return parent_seq + + def get_infostate_seq(self, player, infostate): + """Gets vector of sequence form values corresponding to a given infostate. + + Args: + player: player number, either 0 or 1. + infostate: infostate id string. + + Returns: + Numpy array. + """ + seq_idx = [ + self.infoset_actions_to_seq[player][isa_key] + for isa_key in self.infoset_action_maps[player][infostate] + ] + seqs = np.array([self.sequences[player][idx] for idx in seq_idx]) + return seqs + + def dgf_eval(self): + """Computes the value of dilated entropy for current sequences. + + Returns: + List of values, one for each player. + """ + dgf_value = [0., 0.] + + for player in range(2): + for infostate in self.infosets[player]: + + if utils.is_root(infostate, player): + continue + + parent_seq = self.get_parent_seq(player, infostate) + if parent_seq > 0: + children_seq = self.get_infostate_seq(player, infostate) + dgf_value[player] += parent_seq * neg_entropy( + children_seq / parent_seq) + + return dgf_value + + def dgf_grads(self): + """Computes gradients of dilated entropy for each player and current seqs. + + Returns: + A list of numpy arrays. + """ + grads = [np.zeros(len(self.sequences[0])), np.zeros(len(self.sequences[1]))] + for player in range(2): + for infostate in self.infosets[player]: + + # infostates contain empty sequence for root variable + if utils.is_root(infostate, player): + continue + + parent_seq = self.get_parent_seq(player, infostate) + if parent_seq > 0: + + for isa_key in self.infoset_action_maps[player][infostate]: + # compute infostate term + seq_idx = self.infoset_actions_to_seq[player][isa_key] + seq = self.sequences[player][seq_idx] + grads[player][seq_idx] += np.log(seq / parent_seq) + 1 + + # compute terms from children if there are any + num_children = len(self.infoset_actions_children[player].get( + isa_key, [])) + grads[player][seq_idx] -= num_children + return grads + + def update_sequences(self): + """Performs one step of MMD.""" + self.iteration_count += 1 + psi_grads = self.dgf_grads() + # pylint: disable=invalid-unary-operand-type + grads = [ + (self.stepsize * self.payoff_mat @ self.sequences[1] - psi_grads[0]) / + ((1 + self.stepsize * self.alpha)), + (-self.stepsize * self.payoff_mat.T @ self.sequences[0] - psi_grads[1]) + / (1 + self.stepsize * self.alpha) + ] + + new_policy = policy.TabularPolicy(self.game) + for player in range(2): + self._update_state_sequences(self.empty_infoset_keys[player], + grads[player], player, new_policy) + + self.sequences = utils.policy_to_sequence( + self.game, new_policy, self.infoset_actions_to_seq + ) + self.update_avg_sequences() + + def _update_state_sequences(self, infostate, g, player, pol): + """Update the state sequences.""" + + isa_keys = self.infoset_action_maps[player][infostate] + seq_idx = [ + self.infoset_actions_to_seq[player][isa_key] for isa_key in isa_keys + ] + + for isa_key, isa_idx in zip(isa_keys, seq_idx): + + # update children first if there are any + children = self.infoset_actions_children[player].get(isa_key, []) + for child in children: + self._update_state_sequences(child, g, player, pol) + # update gradient + child_isa_keys = self.infoset_action_maps[player][child] + child_seq_idx = [ + self.infoset_actions_to_seq[player][child_isa_key] + for child_isa_key in child_isa_keys + ] + g_child = np.array([g[idx] for idx in child_seq_idx]) + + actions_child = [ + utils.get_action_from_key(child_isa_key) + for child_isa_key in child_isa_keys + ] + policy_child = pol.policy_for_key(child)[:] + policy_child = np.array([policy_child[a] for a in actions_child]) + g[isa_idx] += np.dot(g_child, policy_child) + g[isa_idx] += neg_entropy(policy_child) + + # no update needed for empty sequence + if utils.is_root(infostate, player): + return + + state_policy = pol.policy_for_key(infostate) + g_infostate = np.array([g[idx] for idx in seq_idx]) + actions = [utils.get_action_from_key(isa_key) for isa_key in isa_keys] + new_state_policy = softmax(-g_infostate) + for action, pr in zip(actions, new_state_policy): + state_policy[action] = pr + + def get_gap(self): + """Computes saddle point gap of the regularized game. + + The gap measures convergence to the alpha-QRE. + + Returns: + Scalar. + """ + assert self.alpha > 0, "gap cannot be computed for alpha = 0" + grads = [(self.payoff_mat @ self.sequences[1]) / (self.alpha), + (-self.payoff_mat.T @ self.sequences[0]) / (self.alpha)] + dgf_values = self.dgf_eval() + + br_policy = policy.TabularPolicy(self.game) + for player in range(2): + self._update_state_sequences(self.empty_infoset_keys[player], + grads[player], player, br_policy) + + br_sequences = utils.policy_to_sequence( + self.game, br_policy, self.infoset_actions_to_seq + ) + curr_sequences = copy.deepcopy(self.sequences) + self.sequences = br_sequences + br_dgf_values = self.dgf_eval() + self.sequences = curr_sequences + + # gap of sequences (x,y) + # d(x) + max_y' x.T A y'-d(y') + d(y) - min_x' d(x') + x'.T Ay + + gap = 0 + gap += curr_sequences[0].T @ self.payoff_mat @ br_sequences[1] + gap += self.alpha * (dgf_values[1] - br_dgf_values[1]) + gap += self.alpha * (dgf_values[0] - br_dgf_values[0]) + gap += -br_sequences[0].T @ self.payoff_mat @ curr_sequences[1] + return gap + + def update_avg_sequences(self): + for player in range(2): + self.avg_sequences[player] = self.avg_sequences[player] * ( + self.iteration_count - 1) + self.sequences[player] + self.avg_sequences[ + player] = self.avg_sequences[player] / self.iteration_count + + def current_sequences(self): + """Retrieves the current sequences. + + Returns: + the current sequences for each player as list of numpy arrays. + """ + return self.sequences + + def get_avg_sequences(self): + """Retrieves the average sequences. + + Returns: + the average sequences for each player as list of numpy arrays. + """ + return self.avg_sequences + + def get_policies(self): + """Convert current sequences to equivalent behavioural form policies. + + Returns: + spiel TabularPolicy Object. + """ + return utils.sequence_to_policy( + self.sequences, + self.game, + self.infoset_actions_to_seq, + self.infoset_action_maps, + ) + + def get_avg_policies(self): + """Convert average sequences to equivalent behavioural form policies. + + Returns: + spiel TabularPolicy Object. + """ + return utils.sequence_to_policy( + self.avg_sequences, + self.game, + self.infoset_actions_to_seq, + self.infoset_action_maps, + ) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mmd_dilated_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mmd_dilated_test.py new file mode 100644 index 0000000..bcb085c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/mmd_dilated_test.py @@ -0,0 +1,152 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.mmd_dilated.py.""" +import copy + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np +from open_spiel.python.algorithms import mmd_dilated +import pyspiel + + +_DATA = [ + { + 'game': + pyspiel.load_game('kuhn_poker'), + 'inverse_alpha': + 10, + 'gambit_qre_sol': [ + np.array([ + 1., 0.75364232, 0.64695966, 0.10668266, 0.24635768, 0.70309809, + 0.25609184, 0.44700625, 0.29690191, 0.47546799, 0.01290797, + 0.46256001, 0.52453201 + ]), + np.array([ + 1., 0.63415944, 0.36584056, 0.41154828, 0.58845172, 0.28438486, + 0.71561514, 0.0620185, 0.9379815, 0.65005434, 0.34994566, + 0.79722767, 0.20277233 + ]) + ] + }, + { + 'game': + pyspiel.load_game('dark_hex(board_size=2,gameversion=adh)'), + 'inverse_alpha': + 2, + 'gambit_qre_sol': [ + np.array([ + 1., 0.1997415, 0.0630504, 0.0320848, 0.0309656, 0.0320848, + 0.0309656, 0.0696913, 0.0669998, 0.0334999, 0.0334999, + 0.0334999, 0.0334999, 0.0377519, 0.0252985, 0.0252985, + 0.0252985, 0.0347624, 0.0347624, 0.0349289, 0.0349289, 0.0273, + 0.0273, 0.0396998, 0.0273, 0.3002587, 0.0832425, 0.0414444, + 0.0417981, 0.0414444, 0.0417981, 0.0983483, 0.1186679, + 0.0423458, 0.0408967, 0.0423458, 0.0408967, 0.0397914, + 0.0397914, 0.0585569, 0.0397914, 0.047948, 0.047948, 0.0707199, + 0.047948, 0.3002587, 0.1186679, 0.0707199, 0.047948, 0.047948, + 0.047948, 0.0983483, 0.0832425, 0.0408967, 0.0408967, 0.0423458, + 0.0585569, 0.0397914, 0.0397914, 0.0397914, 0.0423458, + 0.0417981, 0.0417981, 0.0414444, 0.0414444, 0.1997415, + 0.0669998, 0.0396998, 0.0273, 0.0273, 0.0273, 0.0696913, + 0.0630504, 0.0309656, 0.0309656, 0.0320848, 0.0334999, + 0.0334999, 0.0334999, 0.0349289, 0.0349289, 0.0347624, + 0.0347624, 0.0320848, 0.0334999, 0.0252985, 0.0252985, + 0.0377519, 0.0252985 + ]), + np.array([ + 1., 0.22738648, 0.07434555, 0.0790954, 0.03965962, 0.03943577, + 0.07394554, 0.03468592, 0.03925961, 0.03965962, 0.03468592, + 0.27261352, 0.10172918, 0.06014879, 0.04158039, 0.08865251, + 0.08223183, 0.04230736, 0.03992446, 0.04171322, 0.0405186, + 0.27261352, 0.08223183, 0.0405186, 0.04171322, 0.08865251, + 0.03437272, 0.05427979, 0.10172918, 0.04158039, 0.06014879, + 0.22738648, 0.08605167, 0.0346029, 0.05144877, 0.08678769, + 0.03319034, 0.05359735, 0.05454711, 0.04462109, 0.0421666, + 0.05454711, 0.08678769, 0.0421666, 0.04462109, 0.08605167, + 0.04355502, 0.04249665, 0.05083895, 0.11106131, 0.05083895, + 0.06022236, 0.11071326, 0.05083895, 0.05987431, 0.03992446, + 0.04230736, 0.04249665, 0.04355502, 0.05359735, 0.03319034, + 0.05144877, 0.0346029, 0.05427979, 0.03437272, 0.11071326, + 0.05987431, 0.05083895, 0.11106131, 0.06022236, 0.05083895, + 0.05083895, 0.07394554, 0.0790954, 0.03943577, 0.03965962, + 0.07434555, 0.03468592, 0.03965962, 0.03925961, 0.03468592 + ]) + ] + }, +] + + +class MMDDilatedTest(parameterized.TestCase): + + @parameterized.parameters(*_DATA) + def test_solution_fixed_point(self, game, inverse_alpha, gambit_qre_sol): + # Check if a QRE solution is a fixed point of MMD + mmd = mmd_dilated.MMDDilatedEnt(game, 1. / inverse_alpha) + mmd.sequences = copy.deepcopy(gambit_qre_sol) + mmd.update_sequences() + np.testing.assert_allclose( + mmd.current_sequences()[0], gambit_qre_sol[0], rtol=1e-6) + np.testing.assert_allclose( + mmd.current_sequences()[1], gambit_qre_sol[1], rtol=1e-6) + + @parameterized.parameters(*_DATA) + def test_gap(self, game, inverse_alpha, gambit_qre_sol): + mmd = mmd_dilated.MMDDilatedEnt(game, 1. / inverse_alpha) + mmd.sequences = copy.deepcopy(gambit_qre_sol) + np.testing.assert_allclose(mmd.get_gap(), 0., atol=1e-6) + + @parameterized.parameters((0.), (0.5), (1.), (1.5)) + def test_rps_update(self, alpha): + game = pyspiel.load_game_as_turn_based('matrix_rps') + start_sequences = [ + np.array([1, 0.2, 0.2, 0.6]), + np.array([1, 0.5, 0.2, 0.3]) + ] + mmd = mmd_dilated.MMDDilatedEnt(game, alpha) + mmd.sequences = copy.deepcopy(start_sequences) + + mmd.update_sequences() + updated_sequences = copy.deepcopy(start_sequences) + # manually perform update for p1 + updated_sequences[0][1:] = updated_sequences[0][1:] * np.exp( + mmd.stepsize * -mmd.payoff_mat[1:, 1:] @ start_sequences[1][1:]) + updated_sequences[0][1:] = updated_sequences[0][1:]**( + 1. / (1 + mmd.stepsize * alpha)) + updated_sequences[0][1:] = updated_sequences[0][1:] / np.sum( + updated_sequences[0][1:]) + np.testing.assert_allclose(mmd.current_sequences()[0], updated_sequences[0]) + + # manually perform update for p2 + updated_sequences[1][1:] = updated_sequences[1][1:] * np.exp( + mmd.stepsize * mmd.payoff_mat[1:, 1:].T @ start_sequences[0][1:]) + updated_sequences[1][1:] = updated_sequences[1][1:]**( + 1. / (1 + mmd.stepsize * alpha)) + updated_sequences[1][1:] = updated_sequences[1][1:] / np.sum( + updated_sequences[1][1:]) + np.testing.assert_allclose(mmd.current_sequences()[1], updated_sequences[1]) + + if alpha > 0: + # gap cannot be computed for a value of alpha = 0 + # check that uniform random has a gap of zero + mmd.sequences = [ + np.array([1, 0.33333333, 0.33333333, 0.33333333]), + np.array([1, 0.33333333, 0.33333333, 0.33333333]) + ] + np.testing.assert_allclose(mmd.get_gap(), 0.) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/nash_averaging.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/nash_averaging.py new file mode 100644 index 0000000..357563d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/nash_averaging.py @@ -0,0 +1,164 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Nash averaging. + +Based on https://arxiv.org/abs/1806.02643. An axiomatic strategy evaluation +metric for Agent-vs-Agent or Agent-vs-Task two-player zero-sum games. +""" + +import cvxpy as cp +import numpy as np + +from open_spiel.python.egt.utils import game_payoffs_array + + +def _max_entropy_symmetric_nash(p_mat, eps=1e-9): + """Solves for the maxent symmetric nash for symmetric 2P zero-sum games. + + Using convex programming: + min p^Tlog(p) + s.t. + p_mat.dot(p) <= 0, since game value must be 0 + p >= 0 + 1^T * p = 1 + + Args: + p_mat: an N*N anti-symmetric payoff matrix for the row player + eps: minimum probability threshold + + Returns: + p*: a maxent symmetric nash + """ + assert np.array_equal(p_mat, -p_mat.T) and eps >= 0 and eps <= 0.5 + n = len(p_mat) + x = cp.Variable(shape=n) + obj = cp.Maximize(cp.sum(cp.entr(x))) + constraints = [p_mat @ x <= 0, x >= eps * np.ones(n)] + constraints.append(cp.sum(x) == 1) + prob = cp.Problem(obj, constraints) + prob.solve() + return x.value.reshape((-1, 1)) + + +def _max_entropy_symmetric_nash_avt(p_mat, num_agents, num_tasks, eps=1e-9): + """Solves for the maxent symmetric nash for symmetric 2P zero-sum games. + + This covers the agent-vs-task cases. + + Using convex programming: + min x^Tlog(x) + y^Tlog(y) + s.t. + x >= 0 + 1^T * x = 1 + y >= 0 + 1^T * y = 1 + forall s, such that s has exactly one unit mass on an agent strategy + and one unit mass on a task strategy, + s^T*p_mat*z <= 0, where z = [x, y], since game-value is 0. + + Args: + p_mat: an N*N anti-symmetric payoff matrix for the row player + num_agents: number of agents + num_tasks: number of tasks + eps: minimum probability threshold + + Returns: + (x*, y*): a maxent symmetric nash + """ + assert np.array_equal(p_mat, -p_mat.T) and eps >= 0 and eps <= 0.5 + n = len(p_mat) + assert n == num_agents + num_tasks + x = cp.Variable(shape=num_agents) + y = cp.Variable(shape=num_tasks) + z = cp.hstack([x, y]) + obj = cp.Maximize(cp.sum(cp.entr(z))) + constraints = [ + x >= eps * np.ones(num_agents), + cp.sum(x) == 1, + y >= eps * np.ones(num_tasks), + cp.sum(y) == 1, + ] + + dev_payoffs = p_mat @ z + for a_idx in range(num_agents): + for t_idx in range(num_tasks): + pure_strategy = np.zeros(n) + pure_strategy[a_idx] = 1 + pure_strategy[num_agents + t_idx] = 1 + pure_strategy = pure_strategy.reshape((1, -1)) + constraints.append(pure_strategy @ dev_payoffs <= 0) + + prob = cp.Problem(obj, constraints) + prob.solve() + return x.value.reshape((-1, 1)), y.value.reshape((-1, 1)) + + +def nash_averaging_avt_matrix(s_mat, eps=0.0): + """Apply the agent-vs-task Nash Averaging from Appendix D, from a matrix. + + Args: + s_mat: The S matrix from the paper, representing m rows (agents) and n + columns (tasks), with scores for the agent on the task. Note that the + values need not be normalized, but will be normalized across tasks before + being processed. + eps: minimum probability threshold. + + Returns: + maxent_nash: nash mixture for row player and column player + nash_avg_score: the expected payoff under maxent_nash + """ + m, n = s_mat.shape + min_payoffs = np.min(s_mat, axis=0) + max_payoffs = np.max(s_mat, axis=0) + std_p_mat = (s_mat - min_payoffs) / (max_payoffs - min_payoffs) + a_mat = np.block([ + [np.zeros(shape=(m, m)), std_p_mat], + [-std_p_mat.T, np.zeros(shape=(n, n))], + ]) + pa_sol, pe_sol = _max_entropy_symmetric_nash_avt( + a_mat, num_agents=m, num_tasks=n, eps=eps) + pa, pe = np.asarray(pa_sol), np.asarray(pe_sol) + return (pa, pe), (std_p_mat.dot(pe), -std_p_mat.T.dot(pa)) + + +def nash_averaging(game, eps=0.0, a_v_a=True): + """Nash averaging, see https://arxiv.org/abs/1806.02643. + + Args: + game: a pyspiel game + eps: minimum probability mass for maxent nash + a_v_a: whether it is Agent-vs-Agent or Agent-vs-Task + + Returns: + maxent_nash: nash mixture for row player and column player + nash_avg_score: the expected payoff under maxent_nash + """ + + p_mat = game_payoffs_array(game) + if len(p_mat) != 2: + raise ValueError("Nash Averaging works only for two players.") + if np.max(np.abs(p_mat[0] + p_mat[1])) > 0: + raise ValueError("Must be zero-sum") + if a_v_a: + if not np.array_equal(p_mat[0], -p_mat[0].T): + raise ValueError( + "AvA only works for symmetric two-player zero-sum games.") + maxent_nash = np.array(_max_entropy_symmetric_nash(p_mat[0], eps=eps)) + return maxent_nash, p_mat[0].dot(maxent_nash) + + # For AvT, see appendix D of the paper. + # Here assumes the row player represents agents and the column player + # represents tasks. + # game does not have to be symmetric + return nash_averaging_avt_matrix(p_mat[0], eps=eps) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/nash_averaging_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/nash_averaging_test.py new file mode 100644 index 0000000..020de53 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/nash_averaging_test.py @@ -0,0 +1,122 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np + +from open_spiel.python.algorithms.nash_averaging import nash_averaging +import pyspiel + +# transitive game test case +game_trans = pyspiel.create_matrix_game( + [[0.0, -1.0, -1.0], [1.0, 0.0, -1.0], [1.0, 1.0, 0.0]], + [[0.0, 1.0, 1.0], [-1.0, 0.0, 1.0], [-1.0, -1.0, 0.0]]) + +eq_trans = np.asarray([0., 0., 1.]) +value_trans = np.asarray([-1., -1., 0.]) + +# rock-paper-scissors test case +game_rps = pyspiel.create_matrix_game( + [[0.0, -1.0, 1.0], [1.0, 0.0, -1.0], [-1.0, 1.0, 0.0]], + [[0.0, 1.0, -1.0], [-1.0, 0.0, 1.0], [1.0, -1.0, 0.0]]) +eq_rps = np.asarray([1 / 3, 1 / 3, 1 / 3]) +value_rps = np.asarray([0., 0., 0.]) + +# game with one dominated strategy (AvA case) +p_mat0 = np.asarray([[0.0, 234., 34., -270.], [-234., 0., -38., -464.], + [-34., 38., 0., -270.], [270., 464., 270., 0.]]) +game0 = pyspiel.create_matrix_game(p_mat0, -p_mat0) +dominated_idxs0 = [0, 1, 2] + + +# game with one dominated strategy (AvT case) +p_mat1 = np.asarray([ + [0.0, 0.0, 0.0], + [1.0, 10.0, 100.0], + [2.0, 20.0, 200.0], + [3.0, 30.0, 300.0], +]) +game1 = pyspiel.create_matrix_game(p_mat1, -p_mat1) +dominated_idxs1 = [0, 1, 2] + + +# game with one multiple dominant strategy (AvT case) +p_mat2 = np.asarray([ + [0.0, 0.0, 0.0], + [1.0, 10.0, 100.0], + [2.0, 20.0, 200.0], + [3.0, 30.0, 300.0], + [3.0, 30.0, 300.0], +]) +game2 = pyspiel.create_matrix_game(p_mat2, -p_mat2) +dom_idxs2 = [3, 4] + + +class NashAveragingTest(parameterized.TestCase): + + @parameterized.named_parameters( + ("transitive_game", game_trans, eq_trans, value_trans), + ("rps_game", game_rps, eq_rps, value_rps), + ) + def test_simple_games(self, game, eq, value): + + maxent_nash, nash_avg_value = nash_averaging(game) + with self.subTest("probability"): + np.testing.assert_array_almost_equal( + eq, maxent_nash.reshape(-1), decimal=5 + ) + + with self.subTest("value"): + np.testing.assert_array_almost_equal( + value, nash_avg_value.reshape(-1), decimal=5 + ) + + @parameterized.named_parameters( + ("game0", game0, dominated_idxs0),) + def test_ava_games_with_dominated_strategy(self, game, dominated_idxs): + maxent_nash, _ = nash_averaging(game) + with self.subTest("dominated strategies have zero Nash probs"): + for idx in dominated_idxs: + self.assertAlmostEqual(maxent_nash[idx].item(), 0.0, delta=1e-5) + + @parameterized.named_parameters( + ("game1", game1, dominated_idxs1), + ) + def test_avt_games_with_dominated_strategy(self, game, dominated_idxs): + (agent_strategy, _), _ = nash_averaging(game, a_v_a=False) + with self.subTest("dominated strategies have zero Nash probs"): + for idx in dominated_idxs: + self.assertAlmostEqual(agent_strategy[idx].item(), 0.0, delta=1e-5) + + @parameterized.named_parameters( + ("game2", game2, dom_idxs2), + ) + def test_avt_games_with_multiple_dominant_strategies(self, game, dom_idxs): + (agent_strategy, _), (agent_values, _) = nash_averaging(game, a_v_a=False) + with self.subTest("dominant strategies have equal Nash probs"): + for idx in dom_idxs: + self.assertAlmostEqual( + agent_strategy[idx].item(), 1 / len(dom_idxs2), delta=1e-4 + ) + + with self.subTest("dominant strategies have equal Nash values"): + values = [agent_values[idx] for idx in dom_idxs] + self.assertAlmostEqual( + np.abs(np.max(values) - np.min(values)), 0.0, delta=1e-5 + ) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/nfg_utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/nfg_utils.py new file mode 100644 index 0000000..b972ec3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/nfg_utils.py @@ -0,0 +1,82 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Some helpers for normal-form games.""" + +import collections +import numpy as np + + +class StrategyAverager(object): + """A helper class for averaging strategies for players.""" + + def __init__(self, num_players, action_space_shapes, window_size=None): + """Initialize the average strategy helper object. + + Args: + num_players (int): the number of players in the game, + action_space_shapes: an vector of n integers, where each element + represents the size of player i's actions space, + window_size (int or None): if None, computes the players' average + strategies over the entire sequence, otherwise computes the average + strategy over a finite-sized window of the k last entries. + """ + self._num_players = num_players + self._action_space_shapes = action_space_shapes + self._window_size = window_size + self._num = 0 + if self._window_size is None: + self._sum_meta_strategies = [ + np.zeros(action_space_shapes[p]) for p in range(num_players) + ] + else: + self._window = collections.deque(maxlen=self._window_size) + + def append(self, meta_strategies): + """Append the meta-strategies to the averaged sequence. + + Args: + meta_strategies: a list of strategies, one per player. + """ + if self._window_size is None: + for p in range(self._num_players): + self._sum_meta_strategies[p] += meta_strategies[p] + else: + self._window.append(meta_strategies) + self._num += 1 + + def average_strategies(self): + """Return each player's average strategy. + + Returns: + The averaged strategies, as a list containing one strategy per player. + """ + + if self._window_size is None: + avg_meta_strategies = [ + np.copy(x) for x in self._sum_meta_strategies + ] + num_strategies = self._num + else: + avg_meta_strategies = [ + np.zeros(self._action_space_shapes[p]) + for p in range(self._num_players) + ] + for i in range(len(self._window)): + for p in range(self._num_players): + avg_meta_strategies[p] += self._window[i][p] + num_strategies = len(self._window) + for p in range(self._num_players): + avg_meta_strategies[p] /= num_strategies + return avg_meta_strategies diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/nfg_utils_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/nfg_utils_test.py new file mode 100644 index 0000000..ae2c151 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/nfg_utils_test.py @@ -0,0 +1,58 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import absltest + +import numpy as np +from open_spiel.python.algorithms import nfg_utils + + +class NfgUtilsTest(absltest.TestCase): + + def test_strategy_averager_len_smaller_than_window(self): + averager = nfg_utils.StrategyAverager(2, [2, 2], window_size=50) + averager.append([np.array([1.0, 0.0]), np.array([0.0, 1.0])]) + averager.append([np.array([0.0, 1.0]), np.array([1.0, 0.0])]) + avg_strategies = averager.average_strategies() + self.assertLen(avg_strategies, 2) + self.assertAlmostEqual(avg_strategies[0][0], 0.5) + self.assertAlmostEqual(avg_strategies[0][1], 0.5) + self.assertAlmostEqual(avg_strategies[1][0], 0.5) + self.assertAlmostEqual(avg_strategies[1][1], 0.5) + + def test_strategy_averager(self): + first_action_strat = np.array([1.0, 0.0]) + second_action_strat = np.array([0.0, 1.0]) + averager_full = nfg_utils.StrategyAverager(2, [2, 2]) + averager_window5 = nfg_utils.StrategyAverager(2, [2, 2], window_size=5) + averager_window6 = nfg_utils.StrategyAverager(2, [2, 2], window_size=6) + for _ in range(5): + averager_full.append([first_action_strat, first_action_strat]) + averager_window5.append([first_action_strat, first_action_strat]) + averager_window6.append([first_action_strat, first_action_strat]) + for _ in range(5): + averager_full.append([second_action_strat, second_action_strat]) + averager_window5.append([second_action_strat, second_action_strat]) + averager_window6.append([second_action_strat, second_action_strat]) + avg_full = averager_full.average_strategies() + avg_window5 = averager_window5.average_strategies() + avg_window6 = averager_window6.average_strategies() + self.assertAlmostEqual(avg_full[0][1], 0.5) + self.assertAlmostEqual(avg_window5[0][1], 5.0 / 5.0) + self.assertAlmostEqual(avg_window6[0][1], 5.0 / 6.0) + + +if __name__ == '__main__': + absltest.main() + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/noisy_policy.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/noisy_policy.py new file mode 100644 index 0000000..b6f72e1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/noisy_policy.py @@ -0,0 +1,136 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Perturbates any policy with tabular-saved, fixed noise. + +The policy's probabilities P' on each state s are computed as + +P'(s) = alpha * epsilon + (1-alpha) * P(s), + +with P the former policy's probabilities, and epsilon ~ Softmax(beta * +Uniform) +""" + +import numpy as np +from open_spiel.python import policy as openspiel_policy + + +class NoisyPolicy(openspiel_policy.Policy): + """Pyspiel Best Response with added noise. + + This policy's probabilities P' on each `player_id` state s is computed as + P'(s) = alpha * epsilon + (1-alpha) * P(s), + + with P the former policy's probabilities, and epsilon ~ Softmax(beta * + Uniform) + """ + + def __init__(self, policy, player_id=None, alpha=0.1, beta=1.0): + """Initializes the noisy policy. + + Note that this noise only affects `player_id`. + + Args: + policy: Any OpenSpiel `policy.Policy` object. + player_id: The player id, the policy of whom will be made noisy. If `None` + noise will be added to the policies for all players. + alpha: Mixing noise factor. + beta: Softmax inverse temperature factor. + """ + self._policy = policy + self.game = policy.game + self.game_type = self.game.get_type() + + self.player_id = player_id + + self._noise_dict = {} + self._alpha = alpha + self._beta = beta + + def _state_key(self, state, player): + """Returns the key to use to look up this (state, player) pair.""" + if self.game_type.provides_information_state_string: + if player is None: + return state.information_state_string() + else: + return state.information_state_string(player) + elif self.game_type.provides_observation_string: + if player is None: + return state.observation_string() + else: + return state.observation_string(player) + else: + return str(state) + + def get_or_create_noise(self, state, player_id=None): + """Get noisy policy or create it and return it. + + Args: + state: the state to which the policy will be applied. + player_id: the player id that will apply the noisy policy. Default to + current_player. Should be defined in the case of simultaneous games. + + Returns: + noise_action_probs: The noisy probability distribution on the set of legal + actions. + """ + if player_id is None: + player_id = state.current_player() + info_state = self._state_key(state, player_id) + if info_state not in self._noise_dict: + action_ids = state.legal_actions(player_id) + noise = self._beta * np.random.normal(size=len(action_ids)) + noise = np.exp(noise - noise.max()) + noise /= np.sum(noise) + + self._noise_dict[info_state] = { + action_ids[i]: noise[i] for i in range(len(noise)) + } + return self._noise_dict[info_state] + + def mix_probs(self, probs, noise_probs): + return { + i: (1 - self._alpha) * probs[i] + self._alpha * noise_probs[i] + for i in probs + } + + @property + def policy(self): + return self._policy + + def action_probabilities(self, state, player_id=None): + """Returns the policy for a player in a state. + + Args: + state: A `pyspiel.State` object. + player_id: Optional, the player id for whom we want an action. Optional + unless this is a simultabeous state at which multiple players can act. + + Returns: + A `dict` of `{action: probability}` for the specified player in the + supplied state. + """ + + # If self._player_id is None, or if self.player_id == current_player, add + # noise. + if ((self.player_id is None) or + (state.current_player() == self.player_id) or + (player_id == self.player_id)): + noise_probs = self.get_or_create_noise(state, player_id) + probs = self._policy.action_probabilities(state, player_id) + probs = self.mix_probs(probs, noise_probs) + return probs + + # Send the default probabilities for all other players + return self._policy.action_probabilities(state, player_id) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/noisy_policy_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/noisy_policy_test.py new file mode 100644 index 0000000..9a1a2df --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/noisy_policy_test.py @@ -0,0 +1,88 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.noisy_policy.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python import games # pylint:disable=unused-import +from open_spiel.python import policy as openspiel_policy +from open_spiel.python.algorithms import get_all_states +from open_spiel.python.algorithms import noisy_policy +import pyspiel + + +class NoisyPolicyTest(parameterized.TestCase, absltest.TestCase): + + @parameterized.parameters(["kuhn_poker", "leduc_poker"]) + def test_cpp_and_python_implementations_are_identical(self, game_name): + game = pyspiel.load_game(game_name) + + policy = openspiel_policy.UniformRandomPolicy(game) + + all_states = get_all_states.get_all_states( + game, + depth_limit=-1, + include_terminals=False, + include_chance_states=False, + to_string=lambda s: s.information_state_string()) + + for current_player in range(game.num_players()): + noise = noisy_policy.NoisyPolicy( + policy, player_id=current_player, alpha=0.5, beta=10.) + for state in all_states.values(): + if state.current_player() < 0: + continue + + if state.current_player() != current_player: + self.assertEqual( + policy.action_probabilities(state), + noise.action_probabilities(state)) + else: + self.assertNotEqual( + policy.action_probabilities(state), + noise.action_probabilities(state)) + + @parameterized.parameters(["python_iterated_prisoners_dilemma"]) + def test_simultaneous_game_noisy_policy(self, game_name): + game = pyspiel.load_game(game_name) + + policy = openspiel_policy.UniformRandomPolicy(game) + + all_states = get_all_states.get_all_states( + game, + depth_limit=10, + include_terminals=False, + include_chance_states=False, + to_string=lambda s: s.history_str()) + + for current_player in range(game.num_players()): + noise = noisy_policy.NoisyPolicy( + policy, player_id=current_player, alpha=0.5, beta=10.) + for state in all_states.values(): + if state.current_player() == pyspiel.PlayerId.SIMULTANEOUS: + for player_id in range(game.num_players()): + if player_id != current_player: + self.assertEqual( + policy.action_probabilities(state, player_id), + noise.action_probabilities(state, player_id)) + else: + self.assertNotEqual( + policy.action_probabilities(state, player_id), + noise.action_probabilities(state, player_id)) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/outcome_sampling_mccfr.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/outcome_sampling_mccfr.py new file mode 100644 index 0000000..018ee06 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/outcome_sampling_mccfr.py @@ -0,0 +1,144 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python implementation for Monte Carlo Counterfactual Regret Minimization.""" + +import numpy as np +from open_spiel.python.algorithms import mccfr +import pyspiel + + +class OutcomeSamplingSolver(mccfr.MCCFRSolverBase): + """An implementation of outcome sampling MCCFR.""" + + def __init__(self, game): + super().__init__(game) + # This is the epsilon exploration factor. When sampling episodes, the + # updating player will sampling according to expl * uniform + (1 - expl) * + # current_policy. + self._expl = 0.6 + + assert game.get_type().dynamics == pyspiel.GameType.Dynamics.SEQUENTIAL, ( + "MCCFR requires sequential games. If you're trying to run it " + + "on a simultaneous (or normal-form) game, please first transform it " + + "using turn_based_simultaneous_game.") + + def iteration(self): + """Performs one iteration of outcome sampling. + + An iteration consists of one episode for each player as the update + player. + """ + for update_player in range(self._num_players): + state = self._game.new_initial_state() + self._episode( + state, update_player, my_reach=1.0, opp_reach=1.0, sample_reach=1.0) + + def _baseline(self, state, info_state, aidx): # pylint: disable=unused-argument + # Default to vanilla outcome sampling + return 0 + + def _baseline_corrected_child_value(self, state, info_state, sampled_aidx, + aidx, child_value, sample_prob): + # Applies Eq. 9 of Schmid et al. '19 + baseline = self._baseline(state, info_state, aidx) + if aidx == sampled_aidx: + return baseline + (child_value - baseline) / sample_prob + else: + return baseline + + def _episode(self, state, update_player, my_reach, opp_reach, sample_reach): + """Runs an episode of outcome sampling. + + Args: + state: the open spiel state to run from (will be modified in-place). + update_player: the player to update regrets for (the other players + update average strategies) + my_reach: reach probability of the update player + opp_reach: reach probability of all the opponents (including chance) + sample_reach: reach probability of the sampling (behavior) policy + + Returns: + util is a real value representing the utility of the update player + """ + if state.is_terminal(): + return state.player_return(update_player) + + if state.is_chance_node(): + outcomes, probs = zip(*state.chance_outcomes()) + aidx = np.random.choice(range(len(outcomes)), p=probs) + state.apply_action(outcomes[aidx]) + return self._episode(state, update_player, my_reach, + probs[aidx] * opp_reach, probs[aidx] * sample_reach) + + cur_player = state.current_player() + info_state_key = state.information_state_string(cur_player) + legal_actions = state.legal_actions() + num_legal_actions = len(legal_actions) + infostate_info = self._lookup_infostate_info(info_state_key, + num_legal_actions) + policy = self._regret_matching(infostate_info[mccfr.REGRET_INDEX], + num_legal_actions) + if cur_player == update_player: + uniform_policy = ( + np.ones(num_legal_actions, dtype=np.float64) / num_legal_actions) + sample_policy = self._expl * uniform_policy + (1.0 - self._expl) * policy + else: + sample_policy = policy + sampled_aidx = np.random.choice(range(num_legal_actions), p=sample_policy) + state.apply_action(legal_actions[sampled_aidx]) + if cur_player == update_player: + new_my_reach = my_reach * policy[sampled_aidx] + new_opp_reach = opp_reach + else: + new_my_reach = my_reach + new_opp_reach = opp_reach * policy[sampled_aidx] + new_sample_reach = sample_reach * sample_policy[sampled_aidx] + child_value = self._episode(state, update_player, new_my_reach, + new_opp_reach, new_sample_reach) + + # Compute each of the child estimated values. + child_values = np.zeros(num_legal_actions, dtype=np.float64) + for aidx in range(num_legal_actions): + child_values[aidx] = self._baseline_corrected_child_value( + state, infostate_info, sampled_aidx, aidx, child_value, + sample_policy[aidx]) + value_estimate = 0 + for aidx in range(num_legal_actions): + value_estimate += policy[aidx] * child_values[aidx] + + # Update regrets and avg strategies + if cur_player == update_player: + # Estimate for the counterfactual value of the policy. + cf_value = value_estimate * opp_reach / sample_reach + + # Update regrets. + # + # Note: different from Chapter 4 of Lanctot '13 thesis, the utilities + # coming back from the recursion are already multiplied by the players' + # tail reaches and divided by the sample tail reach. So when adding + # regrets to the table, we need only multiply by the opponent reach and + # divide by the sample reach to this point. + for aidx in range(num_legal_actions): + # Estimate for the counterfactual value of the policy replaced by always + # choosing sampled_aidx at this information state. + cf_action_value = child_values[aidx] * opp_reach / sample_reach + self._add_regret(info_state_key, aidx, cf_action_value - cf_value) + + # update the average policy + for aidx in range(num_legal_actions): + increment = my_reach * policy[aidx] / sample_reach + self._add_avstrat(info_state_key, aidx, increment) + + return value_estimate diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/outcome_sampling_mccfr_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/outcome_sampling_mccfr_test.py new file mode 100644 index 0000000..ebf1ade --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/outcome_sampling_mccfr_test.py @@ -0,0 +1,72 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.cfr.""" + +from absl.testing import absltest +import numpy as np +from open_spiel.python.algorithms import exploitability +from open_spiel.python.algorithms import outcome_sampling_mccfr +import pyspiel + +# Convergence results change depending on +# the seed specified for running the tests. +# For this reason, test thresholds have been adapted +# taking the maximum Nash exploitability value obtained +# from multiple runs. +# For more details see https://github.com/deepmind/open_spiel/pull/458 +SEED = 39823987 + + +class OutcomeSamplingMCCFRTest(absltest.TestCase): + + def test_outcome_sampling_leduc_2p(self): + np.random.seed(SEED) + game = pyspiel.load_game("leduc_poker") + os_solver = outcome_sampling_mccfr.OutcomeSamplingSolver(game) + for _ in range(10000): + os_solver.iteration() + conv = exploitability.nash_conv(game, os_solver.average_policy()) + print("Leduc2P, conv = {}".format(conv)) + + self.assertLess(conv, 3.07) + + def test_outcome_sampling_kuhn_2p(self): + np.random.seed(SEED) + game = pyspiel.load_game("kuhn_poker") + os_solver = outcome_sampling_mccfr.OutcomeSamplingSolver(game) + for _ in range(10000): + os_solver.iteration() + conv = exploitability.nash_conv(game, os_solver.average_policy()) + print("Kuhn2P, conv = {}".format(conv)) + self.assertLess(conv, 0.17) + # ensure that to_tabular() works on the returned policy + # and the tabular policy is equivalent + tabular_policy = os_solver.average_policy().to_tabular() + conv2 = exploitability.nash_conv(game, tabular_policy) + self.assertEqual(conv, conv2) + + def test_outcome_sampling_kuhn_3p(self): + np.random.seed(SEED) + game = pyspiel.load_game("kuhn_poker", {"players": 3}) + os_solver = outcome_sampling_mccfr.OutcomeSamplingSolver(game) + for _ in range(10000): + os_solver.iteration() + conv = exploitability.nash_conv(game, os_solver.average_policy()) + print("Kuhn3P, conv = {}".format(conv)) + self.assertLess(conv, 0.22) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/policy_aggregator.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/policy_aggregator.py new file mode 100644 index 0000000..cb8953f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/policy_aggregator.py @@ -0,0 +1,265 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Policy aggregator. + +Turns a weighted sum of N policies into a realization-equivalent single +policy by sweeping over the state space. +""" + +import copy +import itertools +from open_spiel.python import policy + + +class PolicyFunction(policy.Policy): + """A callable policy class.""" + + def __init__(self, pids, policies, game): + """Construct a policy function. + + Arguments: + pids: spiel player id of players these policies belong to. + policies: a list of dictionaries of keys (stringified binary observations) + to a list of probabilities for each move uid (between 0 and max_moves - + 1). + game: OpenSpiel game. + """ + super().__init__(game, pids) + self._policies = policies + self._game_type = game.get_type() + + def _state_key(self, state, player_id=None): + """Returns the key to use to look up this (state, player_id) pair.""" + if self._game_type.provides_information_state_string: + if player_id is None: + return state.information_state_string() + else: + return state.information_state_string(player_id) + elif self._game_type.provides_observation_tensor: + if player_id is None: + return state.observation_tensor() + else: + return state.observation_tensor(player_id) + else: + return str(state) + + @property + def policy(self): + return self._policies + + def action_probabilities(self, state, player_id=None): + """Returns the policy for a player in a state. + + Args: + state: A `pyspiel.State` object. + player_id: Optional, the player id for whom we want an action. Optional + unless this is a simultaneous state at which multiple players can act. + + Returns: + A `dict` of `{action: probability}` for the specified player in the + supplied state. + """ + state_key = self._state_key(state, player_id=player_id) + if state.is_simultaneous_node(): + # for simultaneous node, assume player id must be provided + assert player_id >= 0 + return self._policies[player_id][state_key] + if player_id is None: + player_id = state.current_player() + return self._policies[player_id][state_key] + + +class PolicyPool(object): + """Transforms a list of list of policies (One list per player) to callable.""" + + def __init__(self, policies): + """Transforms a list of list of policies (One list per player) to callable. + + Args: + policies: List of list of policies. + """ + self._policies = policies + + def __call__(self, state, player): + return [ + a.action_probabilities(state, player_id=player) + for a in self._policies[player] + ] + + +class PolicyAggregator(object): + """Main aggregator object.""" + + def __init__(self, game, epsilon=1e-40): + self._game = game + self._game_type = game.get_type() + self._num_players = self._game.num_players() + self._policy_pool = None + self._weights = None + self._policy = {} + self._epsilon = epsilon + + def _state_key(self, state, player_id=None): + """Returns the key to use to look up this (state, player) pair.""" + # TODO(somidshafiei): fuse this with the identical PolicyFunction._state_key + if self._game_type.provides_information_state_string: + if player_id is None: + return state.information_state_string() + else: + return state.information_state_string(player_id) + elif self._game_type.provides_observation_string: + if player_id is None: + return state.observation_string() + else: + return state.observation_string(player_id) + else: + return str(state) + + def aggregate(self, pids, policies, weights): + """Aggregate the list of policies for each player. + + Arguments: + pids: the spiel player ids of the players the strategies belong to. + policies: List of list of policies (One list per player) + weights: the list of weights to attach to each policy. + + Returns: + A PolicyFunction, a callable object representing the policy. + """ + aggr_policies = [] + + for pid in pids: + aggr_policies.append(self._sub_aggregate(pid, policies, weights)) + return PolicyFunction(pids, aggr_policies, self._game) + + def _sub_aggregate(self, pid, policies, weights): + """Aggregate the list of policies for one player. + + Arguments: + pid: the spiel player id of the player the strategies belong to. + policies: List of list of policies (One list per player) + weights: the list of weights to attach to each policy. + + Returns: + A PolicyFunction, a callable object representing the policy. + """ + self._policy_pool = PolicyPool(policies) + # ipdb.set_trace() + + assert self._policy_pool is not None + self._weights = weights + # string of state -> probs list + self._policy = {} + + state = self._game.new_initial_state() + my_reaches = weights[:] + self._rec_aggregate(pid, state, my_reaches) + + # Now normalize + for key in self._policy: + actions, probabilities = zip(*self._policy[key].items()) + # Add some small proba mass to avoid divide by zero, which happens for + # games with low reach probabilities for certain states (keys) + new_probs = [prob + self._epsilon for prob in probabilities] + denom = sum(new_probs) + for i in range(len(actions)): + self._policy[key][actions[i]] = new_probs[i] / denom + return self._policy + + def _rec_aggregate(self, pid, state, my_reaches): + """Recursively traverse game tree to compute aggregate policy.""" + + if state.is_terminal(): + return + elif state.is_simultaneous_node(): + + policies = self._policy_pool(state, pid) + state_key = self._state_key(state, pid) + self._policy[state_key] = {} + used_moves = state.legal_actions(pid) + + for uid in used_moves: + new_reaches = copy.deepcopy(my_reaches) + for i in range(len(policies)): + # compute the new reach for each policy for this action + new_reaches[pid][i] *= policies[i].get(uid, 0) + # add reach * prob(a) for this policy to the computed policy + if uid in self._policy[state_key].keys(): + self._policy[state_key][uid] += new_reaches[pid][i] + else: + self._policy[state_key][uid] = new_reaches[pid][i] + + num_players = self._game.num_players() + all_other_used_moves = [] + for player in range(num_players): + if player != pid: + all_other_used_moves.append(state.legal_actions(player)) + + other_joint_actions = itertools.product(*all_other_used_moves) + + # enumerate every possible other-agent actions for next-state + for other_joint_action in other_joint_actions: + for uid in used_moves: + new_reaches = copy.deepcopy(my_reaches) + for i in range(len(policies)): + # compute the new reach for each policy for this action + new_reaches[pid][i] *= policies[i].get(uid, 0) + + joint_action = list( + other_joint_action[:pid] + (uid,) + other_joint_action[pid:] + ) + new_state = state.clone() + new_state.apply_actions(joint_action) + self._rec_aggregate(pid, new_state, new_reaches) + return + + elif state.is_chance_node(): + # do not factor in opponent reaches + outcomes, _ = zip(*state.chance_outcomes()) + for i in range(0, len(outcomes)): + outcome = outcomes[i] + new_state = state.clone() + new_state.apply_action(outcome) + self._rec_aggregate(pid, new_state, my_reaches) + return + else: + turn_player = state.current_player() + + state_key = self._state_key(state, turn_player) + legal_policies = self._policy_pool(state, turn_player) + if pid == turn_player: + # update the current node + # will need the observation to query the policies + if state_key not in self._policy: + self._policy[state_key] = {} + + used_moves = state.legal_actions(turn_player) + + for uid in used_moves: + new_reaches = copy.deepcopy(my_reaches) + if pid == turn_player: + for i in range(len(legal_policies)): + # compute the new reach for each policy for this action + new_reaches[turn_player][i] *= legal_policies[i].get(uid, 0) + # add reach * prob(a) for this policy to the computed policy + if uid in self._policy[state_key].keys(): + self._policy[state_key][uid] += new_reaches[turn_player][i] + else: + self._policy[state_key][uid] = new_reaches[turn_player][i] + + # recurse + new_state = state.clone() + new_state.apply_action(uid) + self._rec_aggregate(pid, new_state, new_reaches) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/policy_aggregator_joint.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/policy_aggregator_joint.py new file mode 100644 index 0000000..f76dc98 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/policy_aggregator_joint.py @@ -0,0 +1,250 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Policy aggregator. + +A joint policy is a list of `num_players` policies. +This files enables to compute mixtures of such joint-policies to get a new +policy. +""" + +import copy +import itertools +from open_spiel.python import policy + + +def _aggregate_at_state(joint_policies, state, player): + """Returns {action: prob} for `player` in `state` for all joint policies. + + Args: + joint_policies: List of joint policies. + state: Openspiel State + player: Current Player + + Returns: + {action: prob} for `player` in `state` for all joint policies. + """ + return [ + joint_policy[player].action_probabilities(state, player_id=player) + for joint_policy in joint_policies + ] + + +class _DictPolicy(policy.Policy): + """A callable policy class.""" + + def __init__(self, game, policies_as_dict): + """Constructs a policy function. + + Arguments: + game: OpenSpiel game. + policies_as_dict: A list of `num_players` policy objects {action: prob}. + """ + self._game = game + self._game_type = game.get_type() + self._policies_as_dict = policies_as_dict + + def _state_key(self, state, player_id=None): + """Returns the key to use to look up this (state, player_id) pair.""" + if self._game_type.provides_information_state_string: + if player_id is None: + return state.information_state_string() + else: + return state.information_state_string(player_id) + elif self._game_type.provides_observation_string: + if player_id is None: + return state.observation_string() + else: + return state.observation_string(player_id) + else: + return str(state) + + @property + def policies(self): + return self._policies_as_dict + + def action_probabilities(self, state, player_id=None): + """Returns the policy for a player in a state. + + Args: + state: A `pyspiel.State` object. + player_id: Optional, the player id for whom we want an action. Optional + unless this is a simultaneous state at which multiple players can act. + + Returns: + A `dict` of `{action: probability}` for the specified player in the + supplied state. + """ + state_key = self._state_key(state, player_id=player_id) + if player_id is None: + player_id = state.current_player() + return self._policies_as_dict[player_id][state_key] + + +class JointPolicyAggregator(object): + """Main aggregator object.""" + + def __init__(self, game, epsilon=1e-40): + self._game = game + self._game_type = game.get_type() + self._num_players = self._game.num_players() + self._joint_policies = None + self._policy = {} # A Dict from info-state to {action: prob} + self._epsilon = epsilon + + def _state_key(self, state, player_id=None): + """Returns the key to use to look up this (state, player) pair.""" + if self._game_type.provides_information_state_string: + if player_id is None: + return state.information_state_string() + else: + return state.information_state_string(player_id) + elif self._game_type.provides_observation_string: + if player_id is None: + return state.observation() + else: + return state.observation(player_id) + else: + return str(state) + + def aggregate(self, pids, joint_policies, weights): + r"""Computes the weighted-mixture of the joint policies. + + Let P of shape [num_players] be the joint policy, and W some weights. + Let N be the number of policies (i.e. len(policies)). + We return the policy P' such that for all state `s`: + + P[s] ~ \sum_{i=0}^{N-1} (policies[i][player(s)](s) * weights[i] * + reach_prob(s, policies[i])) + + Arguments: + pids: Spiel player ids of the players the strategies belong to. + joint_policies: List of list of policies (One list per joint strategy) + weights: List of weights to attach to each joint strategy. + + Returns: + A _DictPolicy, a callable object representing the policy. + """ + aggr_policies = [] + self._joint_policies = joint_policies + + # To do(pmuller): We should be able to do a single recursion. + for pid in pids: + aggr_policies.append(self._sub_aggregate(pid, weights)) + return _DictPolicy(self._game, aggr_policies) + + def _sub_aggregate(self, pid, weights): + """Aggregate the list of policies for one player. + + Arguments: + pid: Spiel player id of the player the strategies belong to. + weights: List of weights to attach to each joint strategy. + + Returns: + A _DictPolicy, a callable object representing the policy. + """ + + # string of state -> probs list + self._policy = {} + + state = self._game.new_initial_state() + self._rec_aggregate(pid, state, copy.deepcopy(weights)) + + # Now normalize + for key in self._policy: + actions, probabilities = zip(*self._policy[key].items()) + new_probs = [prob + self._epsilon for prob in probabilities] + denom = sum(new_probs) + for i in range(len(actions)): + self._policy[key][actions[i]] = new_probs[i] / denom + return self._policy + + def _rec_aggregate(self, pid, state, my_reaches): + """Recursively traverse game tree to compute aggregate policy.""" + if state.is_terminal(): + return + + if state.is_simultaneous_node(): + policies = _aggregate_at_state(self._joint_policies, state, pid) + state_key = self._state_key(state, pid) + + self._policy[state_key] = {} + used_moves = state.legal_actions(pid) + + for uid in used_moves: + new_reaches = copy.deepcopy(my_reaches) + for i in range(len(policies)): + # compute the new reach for each policy for this action + new_reaches[i] *= policies[i].get(uid, 0) + # add reach * prob(a) for this policy to the computed policy + if uid in self._policy[state_key].keys(): + self._policy[state_key][uid] += new_reaches[i] + else: + self._policy[state_key][uid] = new_reaches[i] + + num_players = self._game.num_players() + all_other_used_moves = [] + for player in range(num_players): + if player != pid: + all_other_used_moves.append(state.legal_actions(player)) + + other_joint_actions = itertools.product(*all_other_used_moves) + + # enumerate every possible other-agent actions for next-state + for other_joint_action in other_joint_actions: + for uid in used_moves: + new_reaches = copy.deepcopy(my_reaches) + for i in range(len(policies)): + # compute the new reach for each policy for this action + new_reaches[i] *= policies[i].get(uid, 0) + + joint_action = list( + other_joint_action[:pid] + (uid,) + other_joint_action[pid:] + ) + new_state = state.clone() + new_state.apply_actions(joint_action) + self._rec_aggregate(pid, new_state, new_reaches) + return + + if state.is_chance_node(): + for action in state.legal_actions(): + new_state = state.child(action) + self._rec_aggregate(pid, new_state, my_reaches) + return + + current_player = state.current_player() + + state_key = self._state_key(state, current_player) + action_probabilities_list = _aggregate_at_state(self._joint_policies, state, + current_player) + if pid == current_player: + # update the current node + # will need the observation to query the policies + if state_key not in self._policy: + self._policy[state_key] = {} + + for action in state.legal_actions(): + new_reaches = copy.deepcopy(my_reaches) + if pid == current_player: + for idx, state_action_probs in enumerate(action_probabilities_list): + # compute the new reach for each policy for this action + new_reaches[idx] *= state_action_probs.get(action, 0) + # add reach * prob(a) for this policy to the computed policy + if action in self._policy[state_key].keys(): + self._policy[state_key][action] += new_reaches[idx] + else: + self._policy[state_key][action] = new_reaches[idx] + + # recurse + self._rec_aggregate(pid, state.child(action), new_reaches) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/policy_aggregator_joint_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/policy_aggregator_joint_test.py new file mode 100644 index 0000000..c2db128 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/policy_aggregator_joint_test.py @@ -0,0 +1,63 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.policy_aggregator_joint.""" + +from absl.testing import absltest +from absl.testing import parameterized + +import numpy as np + +from open_spiel.python import policy +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import policy_aggregator_joint + + +class JointPolicyAggregatorTest(parameterized.TestCase): + + @parameterized.named_parameters( + { + "testcase_name": "kuhn_poker", + "game_name": "kuhn_poker" + }, { + "testcase_name": "leduc_poker", + "game_name": "leduc_poker" + }) + def test_policy_aggregation_random(self, game_name): + env = rl_environment.Environment(game_name) + num_players = 2 + num_joint_policies = 4 + + joint_policies = [[ + policy.UniformRandomPolicy(env.game) for _ in range(num_players) + ] for _ in range(num_joint_policies)] + probabilities = np.ones(len(joint_policies)) + probabilities /= np.sum(probabilities) + + pol_ag = policy_aggregator_joint.JointPolicyAggregator(env.game) + aggr_policy = pol_ag.aggregate([0, 1], joint_policies, probabilities) + + self.assertLen(aggr_policy.policies, num_players) + for player in range(num_players): + player_policy = aggr_policy.policies[player] + self.assertNotEmpty(player_policy) + for state_action_probs in player_policy.values(): + probs = list(state_action_probs.values()) + expected_prob = 1. / len(probs) + for prob in probs: + self.assertAlmostEqual(expected_prob, prob, places=10) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/policy_aggregator_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/policy_aggregator_test.py new file mode 100644 index 0000000..32c1768 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/policy_aggregator_test.py @@ -0,0 +1,115 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.policy_aggregator.""" + +import unittest +from absl.testing import parameterized + +import numpy as np + +from open_spiel.python import policy +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import policy_aggregator +import pyspiel + + +class PolicyAggregatorTest(parameterized.TestCase): + + @parameterized.named_parameters( + { + "testcase_name": "kuhn_poker", + "game_name": "kuhn_poker" + }, { + "testcase_name": "leduc_poker", + "game_name": "leduc_poker" + }) + def test_policy_aggregation_random(self, game_name): + env = rl_environment.Environment(game_name) + + policies = [[policy.UniformRandomPolicy(env.game) + for _ in range(2)] + for _ in range(2)] + probabilities = [ + list(np.ones(len(policies)) / len(policies)) for _ in range(2) + ] + + pol_ag = policy_aggregator.PolicyAggregator(env.game) + aggr_policy = pol_ag.aggregate([0], policies, probabilities) + + for item in aggr_policy.policy[0].items(): + _, probs = zip(*item[1].items()) + const_probs = tuple([probs[0]] * len(probs)) + self.assertEqual(probs, const_probs) + + @parameterized.named_parameters( + { + "testcase_name": "kuhn_poker", + "game_name": "kuhn_poker" + }, { + "testcase_name": "leduc_poker", + "game_name": "leduc_poker" + }) + def test_policy_aggregation_tabular_randinit(self, game_name): + env = rl_environment.Environment(game_name) + + mother_policy = policy.TabularPolicy(env.game).copy_with_noise( + 1, 10, np.random.RandomState(0)) + policies = [[mother_policy.__copy__() for _ in range(2)] for _ in range(2)] + probabilities = [ + list(np.ones(len(policies)) / len(policies)) for _ in range(2) + ] + + pol_ag = policy_aggregator.PolicyAggregator(env.game) + aggr_policy = pol_ag.aggregate([0], policies, probabilities) + + for state, value in aggr_policy.policy[0].items(): + polici = mother_policy.policy_for_key(state) + + value_normal = { + action: probability + for action, probability in enumerate(polici) + if probability > 0 + } + for key in value_normal.keys(): + self.assertAlmostEqual(value[key], value_normal[key], 8) + + @parameterized.named_parameters({ + "testcase_name": "tic_tac_toe", + "game_name": "tic_tac_toe", + }) + def test_policy_aggregation_variadic(self, game_name): + game = pyspiel.load_game(game_name) + + uniform_policy = policy.UniformRandomPolicy(game) + first_action_policy = policy.FirstActionPolicy(game) + + pol_ag = policy_aggregator.PolicyAggregator(game) + + weights0 = [1.0, 0.0] + player0 = pol_ag.aggregate( + list(range(game.num_players())), + [[uniform_policy, first_action_policy]] + [[uniform_policy]] * + (game.num_players() - 1), + [weights0] + [[1.0]] * (game.num_players() - 1)) + state = game.new_initial_state() + action_prob = player0.action_probabilities(state) + for action in action_prob: + if action_prob[action] > 0: + self.assertAlmostEqual(action_prob[action], + 1. / len(state.legal_actions())) + + +if __name__ == "__main__": + unittest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/policy_utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/policy_utils.py new file mode 100644 index 0000000..fecc06a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/policy_utils.py @@ -0,0 +1,101 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Adds useful functions for working with dictionaries representing policies.""" + +from open_spiel.python.algorithms import get_all_states + + +def policy_to_dict(player_policy, + game, + all_states=None, + state_to_information_state=None): + """Converts a Policy instance into a tabular policy represented as a dict. + + This is compatible with the C++ TabularExploitability code (i.e. + pyspiel.exploitability, pyspiel.TabularBestResponse, etc.). + + While you do not have to pass the all_states and state_to_information_state + arguments, creating them outside of this funciton will speed your code up + dramatically. + + Args: + player_policy: The policy you want to convert to a dict. + game: The game the policy is for. + all_states: The result of calling get_all_states.get_all_states. Can be + cached for improved performance. + state_to_information_state: A dict mapping str(state) to + state.information_state for every state in the game. Can be cached for + improved performance. + + Returns: + A dictionary version of player_policy that can be passed to the C++ + TabularBestResponse, Exploitability, and BestResponse functions/classes. + """ + if all_states is None: + all_states = get_all_states.get_all_states( + game, + depth_limit=-1, + include_terminals=False, + include_chance_states=False) + state_to_information_state = { + state: all_states[state].information_state_string() + for state in all_states + } + tabular_policy = dict() + for state in all_states: + information_state = state_to_information_state[state] + tabular_policy[information_state] = list( + player_policy.action_probabilities(all_states[state]).items()) + return tabular_policy + + +def get_best_response_actions_as_string(best_response_actions): + """Turns a dict into a bytestring compatible with C++. + + i.e. the bytestring can be copy-pasted as the brace initialization for a + {std::unordered_,std::,absl::flat_hash_}map. + + Args: + best_response_actions: A dict mapping bytes to ints. + + Returns: + A bytestring that can be copy-pasted to brace-initialize a C++ + std::map. + """ + best_response_keys = sorted(best_response_actions.keys()) + best_response_strings = [ + "%s: %i" % (k, best_response_actions[k]) for k in best_response_keys + ] + return "{%s}" % (", ".join(best_response_strings)) + + +def tabular_policy_to_cpp_map(policy): + """Turns a policy into a C++ compatible bytestring for brace-initializing. + + Args: + policy: A dict representing a tabular policy. The keys are infostate + bytestrings. + + Returns: + A bytestring that can be copy-pasted to brace-initialize a C++ + std::map. + """ + cpp_entries = [] + policy_keys = sorted(policy.keys()) + for key in policy_keys: + tuple_strs = ["{%i, %s}" % (p[0], p[1].astype(str)) for p in policy[key]] + value = "{" + ", ".join(tuple_strs) + "}" + cpp_entries.append('{"%s", %s}' % (key, value)) + return "{%s}" % (",\n".join(cpp_entries)) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/projected_replicator_dynamics.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/projected_replicator_dynamics.py new file mode 100644 index 0000000..8065e72 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/projected_replicator_dynamics.py @@ -0,0 +1,202 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Projected Replicator Dynamics Algorithm. + +This is an N-player implementation of the Projected Replicator Dynamics +algorithm described in Lanctot et al., 2017: https://arxiv.org/abs/1711.00832. +""" + +import numpy as np + +from open_spiel.python.algorithms import nfg_utils + + +def _partial_multi_dot(player_payoff_tensor, strategies, index_avoided): + """Computes a generalized dot product avoiding one dimension. + + This is used to directly get the expected return of a given action, given + other players' strategies, for the player indexed by index_avoided. + Note that the numpy.dot function is used to compute this product, as it ended + up being (Slightly) faster in performance tests than np.tensordot. Using the + reduce function proved slower for both np.dot and np.tensordot. + + Args: + player_payoff_tensor: payoff tensor for player[index_avoided], of dimension + (dim(vector[0]), dim(vector[1]), ..., dim(vector[-1])). + strategies: Meta strategy probabilities for each player. + index_avoided: Player for which we do not compute the dot product. + + Returns: + Vector of expected returns for each action of player [the player indexed by + index_avoided]. + """ + new_axis_order = [index_avoided] + [ + i for i in range(len(strategies)) if (i != index_avoided) + ] + accumulator = np.transpose(player_payoff_tensor, new_axis_order) + for i in range(len(strategies) - 1, -1, -1): + if i != index_avoided: + accumulator = np.dot(accumulator, strategies[i]) + return accumulator + + +def _project_distribution(updated_strategy, gamma): + """Projects the distribution in updated_x to have minimal probabilities. + + Minimal probabilities are set as gamma, and the probabilities are then + renormalized to sum to 1. + + Args: + updated_strategy: New distribution value after being updated by update rule. + gamma: minimal probability value when divided by number of actions. + + Returns: + Projected distribution. + """ + # Epsilon approximation of L2-norm projection onto the Delta_gamma space. + updated_strategy[updated_strategy < gamma] = gamma + updated_strategy = updated_strategy / np.sum(updated_strategy) + return updated_strategy + + +def _approx_simplex_projection(updated_strategy, gamma=0.0): + """Approximately projects the distribution in updated_x to have minimal probabilities. + + Minimal probabilities are set as gamma, and the probabilities are then + renormalized to sum to 1. + + Args: + updated_strategy: New distribution value after being updated by update rule. + gamma: minimal probability value when divided by number of actions. + + Returns: + Projected distribution. + """ + # Epsilon approximation of L2-norm projection onto the Delta_gamma space. + updated_strategy[updated_strategy < gamma] = gamma + updated_strategy = updated_strategy / np.sum(updated_strategy) + return updated_strategy + + +def _simplex_projection(updated_strategy, gamma=0.0): + """Project updated_strategy on the closest point in L2-norm on gamma-simplex. + + Based on: https://eng.ucmerced.edu/people/wwang5/papers/SimplexProj.pdf + + Args: + updated_strategy: New distribution value after being updated by update rule. + gamma: minimal probability value when divided by number of actions. + + Returns: + Projected distribution + + Algorithm description: + It aims to find a scalar lam to be substracted by each dimension of v + with the restriction that the resulted quantity should lie in [gamma, 1] + until the resulted vector summed up to 1 + Example: [0.4, 0.7, 0.6], 0.2 -- > find lam=0.25 + --> [max(0.4-0.25, 0.2), max(0.7-0.25, 0.2), max(0.6-0.25, 0.2)] + --> [0.2, 0.45, 0.35] + """ + + n = len(updated_strategy) + idx = np.arange(1, n + 1) + u = np.sort(updated_strategy)[::-1] + u_tmp = (1 - np.cumsum(u) - (n - idx) * gamma) / idx + rho = np.searchsorted(u + u_tmp <= gamma, True) + return np.maximum(updated_strategy + u_tmp[rho - 1], gamma) + + +def _projected_replicator_dynamics_step(payoff_tensors, strategies, dt, gamma, + use_approx=False): + """Does one step of the projected replicator dynamics algorithm. + + Args: + payoff_tensors: List of payoff tensors for each player. + strategies: List of the strategies used by each player. + dt: Update amplitude term. + gamma: Minimum exploratory probability term. + use_approx: use approximate simplex projection. + + Returns: + A list of updated strategies for each player. + """ + + # TODO(author4): Investigate whether this update could be fully vectorized. + new_strategies = [] + for player in range(len(payoff_tensors)): + current_payoff_tensor = payoff_tensors[player] + current_strategy = strategies[player] + + values_per_strategy = _partial_multi_dot(current_payoff_tensor, strategies, + player) + average_return = np.dot(values_per_strategy, current_strategy) + delta = current_strategy * (values_per_strategy - average_return) + + updated_strategy = current_strategy + dt * delta + updated_strategy = ( + _approx_simplex_projection(updated_strategy, gamma) if use_approx + else _simplex_projection(updated_strategy, gamma)) + new_strategies.append(updated_strategy) + return new_strategies + + +def projected_replicator_dynamics(payoff_tensors, + prd_initial_strategies=None, + prd_iterations=int(1e5), + prd_dt=1e-3, + prd_gamma=1e-6, + average_over_last_n_strategies=None, + use_approx=False, + **unused_kwargs): + """The Projected Replicator Dynamics algorithm. + + Args: + payoff_tensors: List of payoff tensors for each player. + prd_initial_strategies: Initial list of the strategies used by each player, + if any. Could be used to speed up the search by providing a good initial + solution. + prd_iterations: Number of algorithmic steps to take before returning an + answer. + prd_dt: Update amplitude term. + prd_gamma: Minimum exploratory probability term. + average_over_last_n_strategies: Running average window size for average + policy computation. If None, use the whole trajectory. + use_approx: use the approximate simplex projection. + **unused_kwargs: Convenient way of exposing an API compatible with other + methods with possibly different arguments. + + Returns: + PRD-computed strategies. + """ + number_players = len(payoff_tensors) + # Number of actions available to each player. + action_space_shapes = payoff_tensors[0].shape + + # If no initial starting position is given, start with uniform probabilities. + new_strategies = prd_initial_strategies or [ + np.ones(action_space_shapes[k]) / action_space_shapes[k] + for k in range(number_players) + ] + + averager = nfg_utils.StrategyAverager(number_players, action_space_shapes, + average_over_last_n_strategies) + averager.append(new_strategies) + + for _ in range(prd_iterations): + new_strategies = _projected_replicator_dynamics_step( + payoff_tensors, new_strategies, prd_dt, prd_gamma, use_approx) + averager.append(new_strategies) + return averager.average_strategies() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/projected_replicator_dynamics_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/projected_replicator_dynamics_test.py new file mode 100644 index 0000000..5e21274 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/projected_replicator_dynamics_test.py @@ -0,0 +1,59 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.projected_replicator_dynamics.""" + +from absl.testing import absltest +import numpy as np + +from open_spiel.python.algorithms import projected_replicator_dynamics + + +class ProjectedReplicatorDynamicsTest(absltest.TestCase): + + def test_two_players(self): + test_a = np.array([[2, 1, 0], [0, -1, -2]]) + test_b = np.array([[2, 1, 0], [0, -1, -2]]) + + strategies = projected_replicator_dynamics.projected_replicator_dynamics( + [test_a, test_b], + prd_initial_strategies=None, + prd_iterations=50000, + prd_dt=1e-3, + prd_gamma=1e-8, + average_over_last_n_strategies=10) + + self.assertLen(strategies, 2, "Wrong strategy length.") + self.assertGreater(strategies[0][0], 0.999, + "Projected Replicator Dynamics failed in trivial case.") + + def test_three_players(self): + test_a = np.array([[[2, 1, 0], [1, 0, -1]], [[1, 0, -1], [0, -1, -2]]]) + test_b = np.array([[[2, 1, 0], [1, 0, -1]], [[1, 0, -1], [0, -1, -2]]]) + test_c = np.array([[[2, 1, 0], [1, 0, -1]], [[1, 0, -1], [0, -1, -2]]]) + + strategies = projected_replicator_dynamics.projected_replicator_dynamics( + [test_a, test_b, test_c], + prd_initial_strategies=None, + prd_iterations=50000, + prd_dt=1e-3, + prd_gamma=1e-6, + average_over_last_n_strategies=10) + self.assertLen(strategies, 3, "Wrong strategy length.") + self.assertGreater(strategies[0][0], 0.999, + "Projected Replicator Dynamics failed in trivial case.") + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/abstract_meta_trainer.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/abstract_meta_trainer.py new file mode 100644 index 0000000..25c9e5a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/abstract_meta_trainer.py @@ -0,0 +1,250 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Abstract class for meta trainers (Generalized PSRO, RNR, ...) + +Meta-algorithm with modular behaviour, allowing implementation of PSRO, RNR, and +other variations. +""" + +import numpy as np +from open_spiel.python.algorithms.psro_v2 import meta_strategies +from open_spiel.python.algorithms.psro_v2 import strategy_selectors +from open_spiel.python.algorithms.psro_v2 import utils + +_DEFAULT_STRATEGY_SELECTION_METHOD = "probabilistic" +_DEFAULT_META_STRATEGY_METHOD = "prd" + + +def _process_string_or_callable(string_or_callable, dictionary): + """Process a callable or a string representing a callable. + + Args: + string_or_callable: Either a string or a callable + dictionary: Dictionary of shape {string_reference: callable} + + Returns: + string_or_callable if string_or_callable is a callable ; otherwise, + dictionary[string_or_callable] + + Raises: + NotImplementedError: If string_or_callable is of the wrong type, or has an + unexpected value (Not present in dictionary). + """ + if callable(string_or_callable): + return string_or_callable + + try: + return dictionary[string_or_callable] + except KeyError as e: + raise NotImplementedError("Input type / value not supported. Accepted types" + ": string, callable. Acceptable string values : " + "{}. Input provided : {}".format( + list(dictionary.keys()), + string_or_callable)) from e + + +def sample_episode(state, policies): + """Samples an episode using policies, starting from state. + + Args: + state: Pyspiel state representing the current state. + policies: List of policy representing the policy executed by each player. + + Returns: + The result of the call to returns() of the final state in the episode. + Meant to be a win/loss integer. + """ + if state.is_terminal(): + return np.array(state.returns(), dtype=np.float32) + + if state.is_simultaneous_node(): + actions = [None] * state.num_players() + for player in range(state.num_players()): + state_policy = policies[player](state, player) + outcomes, probs = zip(*state_policy.items()) + actions[player] = utils.random_choice(outcomes, probs) + state.apply_actions(actions) + return sample_episode(state, policies) + + if state.is_chance_node(): + outcomes, probs = zip(*state.chance_outcomes()) + else: + player = state.current_player() + state_policy = policies[player](state) + outcomes, probs = zip(*state_policy.items()) + + state.apply_action(utils.random_choice(outcomes, probs)) + return sample_episode(state, policies) + + +class AbstractMetaTrainer(object): + """Abstract class implementing meta trainers. + + If a trainer is something that computes a best response to given environment & + agents, a meta trainer will compute which best responses to compute (Against + what, how, etc) + This class can support PBT, Hyperparameter Evolution, etc. + """ + + # pylint:disable=dangerous-default-value + def __init__(self, + game, + oracle, + initial_policies=None, + meta_strategy_method=_DEFAULT_META_STRATEGY_METHOD, + training_strategy_selector=_DEFAULT_STRATEGY_SELECTION_METHOD, + symmetric_game=False, + number_policies_selected=1, + **kwargs): + """Abstract Initialization for meta trainers. + + Args: + game: A pyspiel game object. + oracle: An oracle object, from an implementation of the AbstractOracle + class. + initial_policies: A list of initial policies, to set up a default for + training. Resorts to tabular policies if not set. + meta_strategy_method: String, or callable taking a MetaTrainer object and + returning a list of meta strategies (One list entry per player). + String value can be: + - "uniform": Uniform distribution on policies. + - "nash": Taking nash distribution. Only works for 2 player, 0-sum + games. + - "prd": Projected Replicator Dynamics, as described in Lanctot et + Al. + training_strategy_selector: A callable or a string. If a callable, takes + as arguments: - An instance of `PSROSolver`, - a + `number_policies_selected` integer. and returning a list of + `num_players` lists of selected policies to train from. + When a string, supported values are: + - "top_k_probabilites": selects the first + 'number_policies_selected' policies with highest selection + probabilities. + - "probabilistic": randomly selects 'number_policies_selected' + with probabilities determined by the meta strategies. + - "exhaustive": selects every policy of every player. + - "rectified": only selects strategies that have nonzero chance of + being selected. + - "uniform": randomly selects 'number_policies_selected' policies + with uniform probabilities. + symmetric_game: Whether to consider the current game as symmetric (True) + game or not (False). + number_policies_selected: Maximum number of new policies to train for each + player at each PSRO iteration. + **kwargs: kwargs for meta strategy computation and training strategy + selection + """ + self._iterations = 0 + self._game = game + self._oracle = oracle + self._num_players = self._game.num_players() + + self.symmetric_game = symmetric_game + self._game_num_players = self._num_players + self._num_players = 1 if symmetric_game else self._num_players + + self._number_policies_selected = number_policies_selected + + meta_strategy_method = _process_string_or_callable( + meta_strategy_method, meta_strategies.META_STRATEGY_METHODS) + print("Using {} as strategy method.".format(meta_strategy_method)) + + self._training_strategy_selector = _process_string_or_callable( + training_strategy_selector, + strategy_selectors.TRAINING_STRATEGY_SELECTORS) + print("Using {} as training strategy selector.".format( + self._training_strategy_selector)) + + self._meta_strategy_method = meta_strategy_method + self._kwargs = kwargs + + self._initialize_policy(initial_policies) + self._initialize_game_state() + self.update_meta_strategies() + + def _initialize_policy(self, initial_policies): + return NotImplementedError( + "initialize_policy not implemented. Initial policies passed as" + " arguments : {}".format(initial_policies)) + + def _initialize_game_state(self): + return NotImplementedError("initialize_game_state not implemented.") + + def iteration(self, seed=None): + """Main trainer loop. + + Args: + seed: Seed for random BR noise generation. + """ + self._iterations += 1 + self.update_agents() # Generate new, Best Response agents via oracle. + self.update_empirical_gamestate(seed=seed) # Update gamestate matrix. + self.update_meta_strategies() # Compute meta strategy (e.g. Nash) + + def update_meta_strategies(self): + self._meta_strategy_probabilities = self._meta_strategy_method(self) + if self.symmetric_game: + self._meta_strategy_probabilities = [self._meta_strategy_probabilities[0]] + + def update_agents(self): + return NotImplementedError("update_agents not implemented.") + + def update_empirical_gamestate(self, seed=None): + return NotImplementedError("update_empirical_gamestate not implemented." + " Seed passed as argument : {}".format(seed)) + + def sample_episodes(self, policies, num_episodes): + """Samples episodes and averages their returns. + + Args: + policies: A list of policies representing the policies executed by each + player. + num_episodes: Number of episodes to execute to estimate average return of + policies. + + Returns: + Average episode return over num episodes. + """ + totals = np.zeros(self._num_players) + for _ in range(num_episodes): + totals += sample_episode(self._game.new_initial_state(), + policies).reshape(-1) + return totals / num_episodes + + def get_meta_strategies(self): + """Returns the Nash Equilibrium distribution on meta game matrix.""" + meta_strategy_probabilities = self._meta_strategy_probabilities + if self.symmetric_game: + meta_strategy_probabilities = (self._game_num_players * + meta_strategy_probabilities) + return [np.copy(a) for a in meta_strategy_probabilities] + + def get_meta_game(self): + """Returns the meta game matrix.""" + meta_games = self._meta_games + return [np.copy(a) for a in meta_games] + + def get_policies(self): + """Returns the players' policies.""" + policies = self._policies + if self.symmetric_game: + # Notice that the following line returns N references to the same policy + # This might not be correct for certain applications. + # E.g., a DQN BR oracle with player_id information + policies = self._game_num_players * policies + return policies + + def get_kwargs(self): + return self._kwargs diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/best_response_oracle.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/best_response_oracle.py new file mode 100644 index 0000000..154c360 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/best_response_oracle.py @@ -0,0 +1,163 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An Oracle for Exact Best Responses. + +This class computes the best responses against sets of policies. +""" + +from open_spiel.python import policy as openspiel_policy +from open_spiel.python.algorithms import best_response +from open_spiel.python.algorithms import policy_utils +from open_spiel.python.algorithms.psro_v2 import optimization_oracle +from open_spiel.python.algorithms.psro_v2 import utils +import pyspiel + + +class BestResponseOracle(optimization_oracle.AbstractOracle): + """Oracle using exact best responses to compute BR to policies.""" + + def __init__(self, + best_response_backend='cpp', + game=None, + all_states=None, + state_to_information_state=None, + prob_cut_threshold=-1.0, + action_value_tolerance=-1.0, + **kwargs): + """Init function for the RLOracle. + + Args: + best_response_backend: A string (either 'cpp' or 'py'), specifying the + best response backend to use (C++ or python, respectively). The cpp + backend should be preferred, generally, as it is significantly faster. + game: The game on which the optimization process takes place. + all_states: The result of calling get_all_states.get_all_states. Cached + for improved performance. + state_to_information_state: A dict mapping str(state) to + state.information_state for every state in the game. Cached for improved + performance. + prob_cut_threshold: For cpp backend, a partially computed best-response + can be computed when using a prob_cut_threshold >= 0. + action_value_tolerance: For cpp backend, the max-entropy best-response + policy is computed if a non-negative `action_value_tolerance` is used. + **kwargs: kwargs + """ + super(BestResponseOracle, self).__init__(**kwargs) + self.best_response_backend = best_response_backend + if self.best_response_backend == 'cpp': + # Should compute all_states and state_to_information_state only once in + # the program, as caching them speeds up TabularBestResponse tremendously. + self.all_states, self.state_to_information_state = ( + utils.compute_states_and_info_states_if_none( + game, all_states, state_to_information_state)) + + policy = openspiel_policy.UniformRandomPolicy(game) + + policy_to_dict = policy_utils.policy_to_dict( + policy, game, self.all_states, self.state_to_information_state) + + # pylint: disable=g-complex-comprehension + # Cache TabularBestResponse for players, due to their costly construction + # TODO(b/140426861): Use a single best-responder once the code supports + # multiple player ids. + self.best_response_processors = [ + pyspiel.TabularBestResponse(game, best_responder_id, policy_to_dict, + prob_cut_threshold, + action_value_tolerance) + for best_responder_id in range(game.num_players()) + ] + self.best_responders = [ + best_response.CPPBestResponsePolicy( + game, i_player, policy, self.all_states, + self.state_to_information_state, + self.best_response_processors[i_player] + ) + for i_player in range(game.num_players()) + ] + # pylint: enable=g-complex-comprehension + + def __call__(self, + game, + training_parameters, + strategy_sampler=utils.sample_strategy, + using_joint_strategies=False, + **oracle_specific_execution_kwargs): + """Call method for oracle, returns best responses for training_parameters. + + Args: + game: The game on which the optimization process takes place. + training_parameters: List of list of dicts: one list per player, one dict + per selected agent in the pool for each player, + each dictionary containing the following fields: + - policy: the policy from which to start training. + - total_policies: A list of all policy.Policy strategies used for + training, including the one for the current player. Either + marginalized or joint strategies are accepted. + - current_player: Integer representing the current player. + - probabilities_of_playing_policies: A list of arrays representing, per + player, the probabilities of playing each policy in total_policies for + the same player. + strategy_sampler: Callable that samples strategies from `total_policies` + using `probabilities_of_playing_policies`. It only samples one joint + "action" for all players. Implemented to be able to take into account + joint probabilities of action. + using_joint_strategies: Whether the meta-strategies sent are joint (True) + or marginalized. + **oracle_specific_execution_kwargs: Other set of arguments, for + compatibility purposes. Can for example represent whether to Rectify + Training or not. + + Returns: + A list of list of OpenSpiel Policy objects representing the expected + best response, following the same structure as training_parameters. + """ + new_policies = [] + for player_parameters in training_parameters: + player_policies = [] + for params in player_parameters: + current_player = params['current_player'] + total_policies = params['total_policies'] + probabilities_of_playing_policies = params[ + 'probabilities_of_playing_policies'] + if using_joint_strategies: + aggr_policy = utils.aggregate_joint_policies( + game, utils.marginal_to_joint(total_policies), + probabilities_of_playing_policies.reshape(-1)) + else: + aggr_policy = utils.aggregate_policies( + game, total_policies, probabilities_of_playing_policies) + + # This takes as input an aggregate policy, and computes a best response + # for current_player at the applicable information states by recursing + # through the game tree. At information states involving other players + # or chance, the aggr_policy is used to compute the expected value, such + # that a best response for current_player can be computed. + if self.best_response_backend == 'py': + best_resp = best_response.BestResponsePolicy(game, current_player, + aggr_policy) + else: + self.best_response_processors[current_player].set_policy( + policy_utils.policy_to_dict(aggr_policy, game, self.all_states, + self.state_to_information_state)) + + self.best_responders[current_player] = ( + best_response.CPPBestResponsePolicy( + game, current_player, aggr_policy, self.all_states, + self.state_to_information_state, + self.best_response_processors[current_player])) + best_resp = self.best_responders[current_player] + player_policies.append(best_resp) + new_policies.append(player_policies) + return new_policies diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/best_response_oracle_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/best_response_oracle_test.py new file mode 100644 index 0000000..3e458e0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/best_response_oracle_test.py @@ -0,0 +1,76 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.psro_v2.best_response_oracle.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python import policy +from open_spiel.python.algorithms import best_response +from open_spiel.python.algorithms.psro_v2 import best_response_oracle +import pyspiel + + +class BestResponseOracleTest(parameterized.TestCase, absltest.TestCase): + + @parameterized.parameters(("kuhn_poker", 2), ("kuhn_poker", 3), + ("leduc_poker", 2)) + def test_cpp_python_best_response_oracle(self, game_name, num_players): + # Tests that these best responses interface well with Best Response Oracle + game = pyspiel.load_game(game_name, {"players": num_players}) + all_states, _ = best_response.compute_states_and_info_states_if_none( + game, all_states=None, state_to_information_state=None) + + current_best = [ + [policy.TabularPolicy(game).__copy__()] for _ in range(num_players) + ] + probabilities_of_playing_policies = [[1.] for _ in range(num_players)] + + # Construct the python oracle + py_oracle = best_response_oracle.BestResponseOracle( + best_response_backend="py") + + # Construct the cpp oracle. Note that in this regime, BestResponseOracle + # uses base_policy to construct and cache TabularBestResponse internally. + cpp_oracle = best_response_oracle.BestResponseOracle( + game=game, best_response_backend="cpp") + + # Prepare the computation of the best responses with each backend + # pylint:disable=g-complex-comprehension + training_params = [[{ + "total_policies": current_best, + "current_player": i, + "probabilities_of_playing_policies": probabilities_of_playing_policies + }] for i in range(num_players)] + # pylint:enable=g-complex-comprehension + + py_best_rep = py_oracle(game, training_params) + + cpp_best_rep = cpp_oracle(game, training_params) + + # Compare the policies + for state in all_states.values(): + i_player = state.current_player() + py_dict = py_best_rep[i_player][0].action_probabilities(state) + cpp_dict = cpp_best_rep[i_player][0].action_probabilities(state) + + for action in py_dict.keys(): + self.assertEqual(py_dict.get(action, 0.0), cpp_dict.get(action, 0.0)) + for action in cpp_dict.keys(): + self.assertEqual(py_dict.get(action, 0.0), cpp_dict.get(action, 0.0)) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/meta_strategies.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/meta_strategies.py new file mode 100644 index 0000000..666a87d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/meta_strategies.py @@ -0,0 +1,201 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Meta-strategy solvers for PSRO.""" + +import numpy as np + +from open_spiel.python.algorithms import lp_solver +from open_spiel.python.algorithms import projected_replicator_dynamics +from open_spiel.python.algorithms import regret_matching +import pyspiel + + +EPSILON_MIN_POSITIVE_PROBA = 1e-8 + + +def uniform_strategy(solver, return_joint=False): + """Returns a Random Uniform distribution on policies. + + Args: + solver: GenPSROSolver instance. + return_joint: If true, only returns marginals. Otherwise marginals as well + as joint probabilities. + + Returns: + uniform distribution on strategies. + """ + policies = solver.get_policies() + policy_lengths = [len(pol) for pol in policies] + result = [np.ones(pol_len) / pol_len for pol_len in policy_lengths] + if not return_joint: + return result + else: + joint_strategies = get_joint_strategy_from_marginals(result) + return result, joint_strategies + + +def softmax_on_range(number_policies): + x = np.array(list(range(number_policies))) + x = np.exp(x-x.max()) + x /= np.sum(x) + return x + + +def uniform_biased_strategy(solver, return_joint=False): + """Returns a Biased Random Uniform distribution on policies. + + The uniform distribution is biased to prioritize playing against more recent + policies (Policies that were appended to the policy list later in training) + instead of older ones. + + Args: + solver: GenPSROSolver instance. + return_joint: If true, only returns marginals. Otherwise marginals as well + as joint probabilities. + + Returns: + uniform distribution on strategies. + """ + policies = solver.get_policies() + if not isinstance(policies[0], list): + policies = [policies] + policy_lengths = [len(pol) for pol in policies] + result = [softmax_on_range(pol_len) for pol_len in policy_lengths] + if not return_joint: + return result + else: + joint_strategies = get_joint_strategy_from_marginals(result) + return result, joint_strategies + + +def renormalize(probabilities): + """Replaces all negative entries with zeroes and normalizes the result. + + Args: + probabilities: probability vector to renormalize. Has to be one-dimensional. + + Returns: + Renormalized probabilities. + """ + probabilities[probabilities < 0] = 0 + probabilities = probabilities / np.sum(probabilities) + return probabilities + + +def get_joint_strategy_from_marginals(probabilities): + """Returns a joint strategy tensor from a list of marginals. + + Args: + probabilities: list of probabilities. + + Returns: + A flat joint strategy from a list of marginals. + """ + res = np.ones((1,), dtype=np.float64) + for prob in probabilities: + res = res[..., None] @ np.asarray(prob).reshape((1,) * res.ndim + (-1,)) + return res.reshape(-1) + + +def nash_strategy(solver, return_joint=False): + """Returns nash distribution on meta game matrix. + + This method only works for two player zero-sum games. + + Args: + solver: GenPSROSolver instance. + return_joint: If true, only returns marginals. Otherwise marginals as well + as joint probabilities. + + Returns: + Nash distribution on strategies. + """ + meta_games = solver.get_meta_game() + if not isinstance(meta_games, list): + meta_games = [meta_games, -meta_games] + meta_games = [x.tolist() for x in meta_games] + if len(meta_games) != 2: + raise NotImplementedError( + "nash_strategy solver works only for 2p zero-sum" + "games, but was invoked for a {} player game".format(len(meta_games))) + nash_prob_1, nash_prob_2, _, _ = ( + lp_solver.solve_zero_sum_matrix_game( + pyspiel.create_matrix_game(*meta_games))) + result = [ + renormalize(np.array(nash_prob_1).reshape(-1)), + renormalize(np.array(nash_prob_2).reshape(-1)) + ] + + if not return_joint: + return result + else: + joint_strategies = get_joint_strategy_from_marginals(result) + return result, joint_strategies + + +def prd_strategy(solver, return_joint=False): + """Computes Projected Replicator Dynamics strategies. + + Args: + solver: GenPSROSolver instance. + return_joint: If true, only returns marginals. Otherwise marginals as well + as joint probabilities. + + Returns: + PRD-computed strategies. + """ + meta_games = solver.get_meta_game() + if not isinstance(meta_games, list): + meta_games = [meta_games, -meta_games] + kwargs = solver.get_kwargs() + result = projected_replicator_dynamics.projected_replicator_dynamics( + meta_games, **kwargs) + if not return_joint: + return result + else: + joint_strategies = get_joint_strategy_from_marginals(result) + return result, joint_strategies + + +def rm_strategy(solver, return_joint=False): + """Computes regret-matching strategies. + + Args: + solver: GenPSROSolver instance. + return_joint: If true, only returns marginals. Otherwise marginals as well + as joint probabilities. + + Returns: + PRD-computed strategies. + """ + meta_games = solver.get_meta_game() + if not isinstance(meta_games, list): + meta_games = [meta_games, -meta_games] + kwargs = solver.get_kwargs() + result = regret_matching.regret_matching(meta_games, **kwargs) + if not return_joint: + return result + else: + joint_strategies = get_joint_strategy_from_marginals(result) + return result, joint_strategies + + +META_STRATEGY_METHODS = { + "uniform_biased": uniform_biased_strategy, + "uniform": uniform_strategy, + "nash": nash_strategy, + "prd": prd_strategy, + "rm": rm_strategy, +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/optimization_oracle.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/optimization_oracle.py new file mode 100644 index 0000000..069dbc3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/optimization_oracle.py @@ -0,0 +1,141 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Class of Optimization Oracles generating best response against opponents. + +Oracles are as defined in (Lanctot et Al., 2017, +https://arxiv.org/pdf/1711.00832.pdf ), functions generating a best response +against a probabilistic mixture of opponents. This class implements the abstract +class of oracles, and a simple oracle using Evolutionary Strategy as +optimization method. +""" + +import numpy as np + + +def strategy_sampler_fun(total_policies, probabilities_of_playing_policies): + """Samples strategies according to distribution over them. + + Args: + total_policies: List of lists of policies for each player. + probabilities_of_playing_policies: List of numpy arrays representing the + probability of playing a strategy. + + Returns: + One sampled joint strategy. + """ + policies_selected = [] + for k in range(len(total_policies)): + selected_opponent = np.random.choice( + total_policies[k], + 1, + p=probabilities_of_playing_policies[k]).reshape(-1)[0] + policies_selected.append(selected_opponent) + return policies_selected + + +class AbstractOracle(object): + """The abstract class representing oracles, a hidden optimization process.""" + + def __init__(self, + number_policies_sampled=100, + **oracle_specific_kwargs): + """Initialization method for oracle. + + Args: + number_policies_sampled: Number of different opponent policies sampled + during evaluation of policy. + **oracle_specific_kwargs: Oracle specific args, compatibility + purpose. Since oracles can vary so much in their implementation, no + specific argument constraint is put on this function. + """ + self._number_policies_sampled = number_policies_sampled + self._kwargs = oracle_specific_kwargs + + def set_iteration_numbers(self, number_policies_sampled): + """Changes the number of iterations used for computing episode returns. + + Args: + number_policies_sampled: Number of different opponent policies sampled + during evaluation of policy. + """ + self._number_policies_sampled = number_policies_sampled + + def __call__(self, game, policy, total_policies, current_player, + probabilities_of_playing_policies, + **oracle_specific_execution_kwargs): + """Call method for oracle, returns best response against a set of policies. + + Args: + game: The game on which the optimization process takes place. + policy: The current policy, in policy.Policy, from which we wish to start + optimizing. + total_policies: A list of all policy.Policy strategies used for training, + including the one for the current player. + current_player: Integer representing the current player. + probabilities_of_playing_policies: A list of arrays representing, per + player, the probabilities of playing each policy in total_policies for + the same player. + **oracle_specific_execution_kwargs: Other set of arguments, for + compatibility purposes. Can for example represent whether to Rectify + Training or not. + """ + raise NotImplementedError("Calling Abstract class method.") + + def sample_episode(self, game, policies_selected): + raise NotImplementedError("Calling Abstract class method.") + + def evaluate_policy(self, game, pol, total_policies, current_player, + probabilities_of_playing_policies, + strategy_sampler=strategy_sampler_fun, + **oracle_specific_execution_kwargs): + """Evaluates a specific policy against a nash mixture of policies. + + Args: + game: The game on which the optimization process takes place. + pol: The current policy, in policy.Policy, from which we wish to start + optimizing. + total_policies: A list of all policy.Policy strategies used for training, + including the one for the current player. + current_player: Integer representing the current player. + probabilities_of_playing_policies: A list of arrays representing, per + player, the probabilities of playing each policy in total_policies for + the same player. + strategy_sampler: callable sampling strategy. + **oracle_specific_execution_kwargs: Other set of arguments, for + compatibility purposes. Can for example represent whether to Rectify + Training or not. + + Returns: + Average return for policy when played against policies_played_against. + """ + del oracle_specific_execution_kwargs # Unused. + + totals = 0 + count = 0 + for _ in range(self._number_policies_sampled): + policies_selected = strategy_sampler(total_policies, + probabilities_of_playing_policies) + policies_selected[current_player] = pol + + new_return = self.sample_episode( + game, + policies_selected)[current_player] + + totals += new_return + count += 1 + + # Avoid the 0 / 0 case. + return totals / max(1, count) + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/psro_v2.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/psro_v2.py new file mode 100644 index 0000000..7ae2fa1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/psro_v2.py @@ -0,0 +1,515 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Modular implementations of the PSRO meta algorithm. + +Allows the use of Restricted Nash Response, Nash Response, Uniform Response, +and other modular matchmaking selection components users can add. + +This version works for N player, general sum games. + +One iteration of the algorithm consists of: + +1) Computing the selection probability vector (or meta-strategy) for current +strategies of each player, given their payoff. +2) [optional] Generating a mask over joint policies that restricts which policy +to train against, ie. rectify the set of policies trained against. (This +operation is designated by "rectify" in the code) +3) From every strategy used, generating a new best response strategy against the +meta-strategy-weighted, potentially rectified, mixture of strategies using an +oracle. +4) Updating meta game matrix with new game results. + +""" + +import itertools + +import numpy as np + +from open_spiel.python import policy +from open_spiel.python.algorithms.psro_v2 import abstract_meta_trainer +from open_spiel.python.algorithms.psro_v2 import strategy_selectors +from open_spiel.python.algorithms.psro_v2 import utils + + +TRAIN_TARGET_SELECTORS = { + "": None, + "rectified": strategy_selectors.rectified_selector, +} + + +class PSROSolver(abstract_meta_trainer.AbstractMetaTrainer): + """A general implementation PSRO. + + PSRO is the algorithm described in (Lanctot et Al., 2017, + https://arxiv.org/pdf/1711.00832.pdf ). + + Subsequent work regarding PSRO's matchmaking and training has been performed + by David Balduzzi, who introduced Restricted Nash Response (RNR), Nash + Response (NR) and Uniform Response (UR). + RNR is Algorithm 4 in (Balduzzi, 2019, "Open-ended Learning in Symmetric + Zero-sum Games"). NR, Nash response, is algorithm 3. + Balduzzi et Al., 2019, https://arxiv.org/pdf/1901.08106.pdf + + This implementation allows one to modularly choose different meta strategy + computation methods, or other user-written ones. + """ + + def __init__(self, + game, + oracle, + sims_per_entry, + initial_policies=None, + rectifier="", + training_strategy_selector=None, + meta_strategy_method="alpharank", + sample_from_marginals=False, + number_policies_selected=1, + n_noisy_copies=0, + alpha_noise=0.0, + beta_noise=0.0, + **kwargs): + """Initialize the PSRO solver. + + Arguments: + game: The open_spiel game object. + oracle: Callable that takes as input: - game - policy - policies played - + array representing the probability of playing policy i - other kwargs + and returns a new best response. + sims_per_entry: Number of simulations to run to estimate each element of + the game outcome matrix. + initial_policies: A list of initial policies for each player, from which + the optimization process will start. + rectifier: A string indicating the rectifying method. Can be : + - "" or None: Train against potentially all strategies. + - "rectified": Train only against strategies beaten by current + strategy. + training_strategy_selector: Callable taking (PSROSolver, + 'number_policies_selected') and returning a list of list of selected + strategies to train from - this usually means copying weights and + rectifying with respect to the selected strategy's performance (One list + entry per player), or string selecting pre-implemented methods. + String value can be: + - "top_k_probabilites": selects the first + 'number_policies_selected' policies with highest selection + probabilities. + - "probabilistic": randomly selects 'number_policies_selected' + with probabilities determined by the meta strategies. + - "exhaustive": selects every policy of every player. + - "rectified": only selects strategies that have nonzero chance of + being selected. + - "uniform": randomly selects 'number_policies_selected' + policies with uniform probabilities. + meta_strategy_method: String or callable taking a GenPSROSolver object and + returning two lists ; one list of meta strategies (One list entry per + player), and one list of joint strategies. + String value can be: + - alpharank: AlphaRank distribution on policies. + - "uniform": Uniform distribution on policies. + - "nash": Taking nash distribution. Only works for 2 player, 0-sum + games. + - "prd": Projected Replicator Dynamics, as described in Lanctot et + Al. + sample_from_marginals: A boolean, specifying whether to sample from + marginal (True) or joint (False) meta-strategy distributions. + number_policies_selected: Number of policies to return for each player. + + n_noisy_copies: Number of noisy copies of each agent after training. 0 to + ignore this. + alpha_noise: lower bound on alpha noise value (Mixture amplitude.) + beta_noise: lower bound on beta noise value (Softmax temperature.) + **kwargs: kwargs for meta strategy computation and training strategy + selection. + """ + self._sims_per_entry = sims_per_entry + print("Using {} sims per entry.".format(sims_per_entry)) + + self._rectifier = TRAIN_TARGET_SELECTORS.get( + rectifier, None) + self._rectify_training = self._rectifier + print("Rectifier : {}".format(rectifier)) + + self._meta_strategy_probabilities = np.array([]) + self._non_marginalized_probabilities = np.array([]) + + print("Perturbating oracle outputs : {}".format(n_noisy_copies > 0)) + self._n_noisy_copies = n_noisy_copies + self._alpha_noise = alpha_noise + self._beta_noise = beta_noise + + self._policies = [] # A list of size `num_players` of lists containing the + # strategies of each player. + self._new_policies = [] + + # Alpharank is a special case here, as it's not supported by the abstract + # meta trainer api, so has to be passed as a function instead of a string. + if not meta_strategy_method or meta_strategy_method == "alpharank": + meta_strategy_method = utils.alpharank_strategy + + print("Sampling from marginals : {}".format(sample_from_marginals)) + self.sample_from_marginals = sample_from_marginals + + super(PSROSolver, self).__init__( + game, + oracle, + initial_policies, + meta_strategy_method, + training_strategy_selector, + number_policies_selected=number_policies_selected, + **kwargs) + + def _initialize_policy(self, initial_policies): + if self.symmetric_game: + self._policies = [[]] + # Notice that the following line returns N references to the same policy + # This might not be correct for certain applications. + # E.g., a DQN BR oracle with player_id information + self._new_policies = [ + ( + [initial_policies[0]] + if initial_policies + else [policy.UniformRandomPolicy(self._game)] + ) + ] + else: + self._policies = [[] for _ in range(self._num_players)] + self._new_policies = [ + ( + [initial_policies[k]] + if initial_policies + else [policy.UniformRandomPolicy(self._game)] + ) + for k in range(self._num_players) + ] + + def _initialize_game_state(self): + effective_payoff_size = self._game_num_players + self._meta_games = [ + np.array(utils.empty_list_generator(effective_payoff_size)) + for _ in range(effective_payoff_size) + ] + self.update_empirical_gamestate(seed=None) + + def get_joint_policy_ids(self): + """Returns a list of integers enumerating all joint meta strategies.""" + return utils.get_strategy_profile_ids(self._meta_games) + + def get_joint_policies_from_id_list(self, selected_policy_ids): + """Returns a list of joint policies from a list of integer IDs. + + Args: + selected_policy_ids: A list of integer IDs corresponding to the + meta-strategies, with duplicate entries allowed. + + Returns: + selected_joint_policies: A list, with each element being a joint policy + instance (i.e., a list of policies, one per player). + """ + policies = self.get_policies() + + selected_joint_policies = utils.get_joint_policies_from_id_list( + self._meta_games, policies, selected_policy_ids) + return selected_joint_policies + + def update_meta_strategies(self): + """Recomputes the current meta strategy of each player. + + Given new payoff tables, we call self._meta_strategy_method to update the + meta-probabilities. + """ + if self.symmetric_game: + # Notice that the following line returns N references to the same policy + # This might not be correct for certain applications. + # E.g., a DQN BR oracle with player_id information + self._policies = self._policies * self._game_num_players + + self._meta_strategy_probabilities, self._non_marginalized_probabilities = ( + self._meta_strategy_method(solver=self, return_joint=True)) + + if self.symmetric_game: + self._policies = [self._policies[0]] + self._meta_strategy_probabilities = [self._meta_strategy_probabilities[0]] + + def get_policies_and_strategies(self): + """Returns current policy sampler, policies and meta-strategies of the game. + + If strategies are rectified, we automatically switch to returning joint + strategies. + + Returns: + sample_strategy: A strategy sampling function + total_policies: A list of list of policies, one list per player. + probabilities_of_playing_policies: the meta strategies, either joint or + marginalized. + """ + sample_strategy = utils.sample_strategy_marginal + probabilities_of_playing_policies = self.get_meta_strategies() + if self._rectify_training or not self.sample_from_marginals: + sample_strategy = utils.sample_strategy_joint + probabilities_of_playing_policies = self._non_marginalized_probabilities + + total_policies = self.get_policies() + return sample_strategy, total_policies, probabilities_of_playing_policies + + def _restrict_target_training(self, + current_player, + ind, + total_policies, + probabilities_of_playing_policies, + restrict_target_training_bool, + epsilon=1e-12): + """Rectifies training. + + Args: + current_player: the current player. + ind: Current strategy index of the player. + total_policies: all policies available to all players. + probabilities_of_playing_policies: meta strategies. + restrict_target_training_bool: Boolean specifying whether to restrict + training. If False, standard meta strategies are returned. Otherwise, + restricted joint strategies are returned. + epsilon: threshold below which we consider 0 sum of probabilities. + + Returns: + Probabilities of playing each joint strategy (If rectifying) / probability + of each player playing each strategy (Otherwise - marginal probabilities) + """ + true_shape = tuple([len(a) for a in total_policies]) + if not restrict_target_training_bool: + return probabilities_of_playing_policies + else: + kept_probas = self._rectifier( + self, current_player, ind) + # Ensure probabilities_of_playing_policies has same shape as kept_probas. + probability = probabilities_of_playing_policies.reshape(true_shape) + probability = probability * kept_probas + prob_sum = np.sum(probability) + + # If the rectified probabilities are too low / 0, we play against the + # non-rectified probabilities. + if prob_sum <= epsilon: + probability = probabilities_of_playing_policies + else: + probability /= prob_sum + + return probability + + def update_agents(self): + """Updates policies for each player at the same time by calling the oracle. + + The resulting policies are appended to self._new_policies. + """ + + used_policies, used_indexes = self._training_strategy_selector( + self, self._number_policies_selected) + + (sample_strategy, + total_policies, + probabilities_of_playing_policies) = self.get_policies_and_strategies() + + # Contains the training parameters of all trained oracles. + # This is a list (Size num_players) of list (Size num_new_policies[player]), + # each dict containing the needed information to train a new best response. + training_parameters = [[] for _ in range(self._num_players)] + + for current_player in range(self._num_players): + if self.sample_from_marginals: + currently_used_policies = used_policies[current_player] + current_indexes = used_indexes[current_player] + else: + currently_used_policies = [ + joint_policy[current_player] for joint_policy in used_policies + ] + current_indexes = used_indexes[current_player] + + for i in range(len(currently_used_policies)): + pol = currently_used_policies[i] + ind = current_indexes[i] + + new_probabilities = self._restrict_target_training( + current_player, ind, total_policies, + probabilities_of_playing_policies, + self._rectify_training) + + new_parameter = { + "policy": pol, + "total_policies": total_policies, + "current_player": current_player, + "probabilities_of_playing_policies": new_probabilities + } + training_parameters[current_player].append(new_parameter) + + if self.symmetric_game: + # Notice that the following line returns N references to the same policy + # This might not be correct for certain applications. + # E.g., a DQN BR oracle with player_id information + self._policies = self._game_num_players * self._policies + self._num_players = self._game_num_players + training_parameters = [training_parameters[0]] + + # List of List of new policies (One list per player) + self._new_policies = self._oracle( + self._game, + training_parameters, + strategy_sampler=sample_strategy, + using_joint_strategies=self._rectify_training or + not self.sample_from_marginals) + + if self.symmetric_game: + # In a symmetric game, only one population is kept. The below lines + # therefore make PSRO consider only the first player during training, + # since both players are identical. + self._policies = [self._policies[0]] + self._num_players = 1 + + def update_empirical_gamestate(self, seed=None): + """Given new agents in _new_policies, update meta_games through simulations. + + Args: + seed: Seed for environment generation. + + Returns: + Meta game payoff matrix. + """ + if seed is not None: + np.random.seed(seed=seed) + assert self._oracle is not None + + if self.symmetric_game: + # Switch to considering the game as a symmetric game where players have + # the same policies & new policies. This allows the empirical gamestate + # update to function normally. + # Notice that the following line returns N references to the same policy + # This might not be correct for certain applications. + # E.g., a DQN BR oracle with player_id information + self._policies = self._game_num_players * self._policies + self._new_policies = self._game_num_players * self._new_policies + self._num_players = self._game_num_players + + # Concatenate both lists. + updated_policies = [ + self._policies[k] + self._new_policies[k] + for k in range(self._num_players) + ] + + # Each metagame will be (num_strategies)^self._num_players. + # There are self._num_player metagames, one per player. + total_number_policies = [ + len(updated_policies[k]) for k in range(self._num_players) + ] + number_older_policies = [ + len(self._policies[k]) for k in range(self._num_players) + ] + number_new_policies = [ + len(self._new_policies[k]) for k in range(self._num_players) + ] + + # Initializing the matrix with nans to recognize unestimated states. + meta_games = [ + np.full(tuple(total_number_policies), np.nan) + for k in range(self._num_players) + ] + + # Filling the matrix with already-known values. + older_policies_slice = tuple( + [slice(len(self._policies[k])) for k in range(self._num_players)]) + for k in range(self._num_players): + meta_games[k][older_policies_slice] = self._meta_games[k] + + # Filling the matrix for newly added policies. + for current_player in range(self._num_players): + # Only iterate over new policies for current player ; compute on every + # policy for the other players. + range_iterators = [ + range(total_number_policies[k]) for k in range(current_player) + ] + [range(number_new_policies[current_player])] + [ + range(total_number_policies[k]) + for k in range(current_player + 1, self._num_players) + ] + for current_index in itertools.product(*range_iterators): + used_index = list(current_index) + used_index[current_player] += number_older_policies[current_player] + if np.isnan(meta_games[current_player][tuple(used_index)]): + estimated_policies = [ + updated_policies[k][current_index[k]] + for k in range(current_player) + ] + [ + self._new_policies[current_player][current_index[current_player]] + ] + [ + updated_policies[k][current_index[k]] + for k in range(current_player + 1, self._num_players) + ] + + if self.symmetric_game: + # TODO(author4): This update uses ~2**(n_players-1) * sims_per_entry + # samples to estimate each payoff table entry. This should be + # brought to sims_per_entry to coincide with expected behavior. + + utility_estimates = self.sample_episodes(estimated_policies, + self._sims_per_entry) + + player_permutations = list(itertools.permutations(list(range( + self._num_players)))) + for permutation in player_permutations: + used_tuple = tuple([used_index[i] for i in permutation]) + for player in range(self._num_players): + if np.isnan(meta_games[player][used_tuple]): + meta_games[player][used_tuple] = 0.0 + meta_games[player][used_tuple] += utility_estimates[ + permutation[player]] / len(player_permutations) + else: + utility_estimates = self.sample_episodes(estimated_policies, + self._sims_per_entry) + for k in range(self._num_players): + meta_games[k][tuple(used_index)] = utility_estimates[k] + + if self.symmetric_game: + # Make PSRO consider that we only have one population again, as we + # consider that we are in a symmetric game (No difference between players) + self._policies = [self._policies[0]] + self._new_policies = [self._new_policies[0]] + updated_policies = [updated_policies[0]] + self._num_players = 1 + + self._meta_games = meta_games + self._policies = updated_policies + return meta_games + + def get_meta_game(self): + """Returns the meta game matrix.""" + return self._meta_games + + @property + def meta_games(self): + return self._meta_games + + def get_policies(self): + """Returns a list, each element being a list of each player's policies.""" + policies = self._policies + if self.symmetric_game: + # For compatibility reasons, return list of expected length. + # Notice that the following line returns N references to the same policy + # This might not be correct for certain applications. + # E.g., a DQN BR oracle with player_id information + policies = self._game_num_players * self._policies + return policies + + def get_and_update_non_marginalized_meta_strategies(self, update=True): + """Returns the Nash Equilibrium distribution on meta game matrix.""" + if update: + self.update_meta_strategies() + return self._non_marginalized_probabilities + + def get_strategy_computation_and_selection_kwargs(self): + return self._strategy_computation_and_selection_kwargs diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/rl_oracle.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/rl_oracle.py new file mode 100644 index 0000000..3033b41 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/rl_oracle.py @@ -0,0 +1,289 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An Oracle for any RL algorithm. + +An Oracle for any RL algorithm following the OpenSpiel Policy API. +""" + +import numpy as np + +from open_spiel.python.algorithms.psro_v2 import optimization_oracle +from open_spiel.python.algorithms.psro_v2 import utils + + +def update_episodes_per_oracles(episodes_per_oracle, played_policies_indexes): + """Updates the current episode count per policy. + + Args: + episodes_per_oracle: List of list of number of episodes played per policy. + One list per player. + played_policies_indexes: List with structure (player_index, policy_index) of + played policies whose count needs updating. + + Returns: + Updated count. + """ + for player_index, policy_index in played_policies_indexes: + episodes_per_oracle[player_index][policy_index] += 1 + return episodes_per_oracle + + +def freeze_all(policies_per_player): + """Freezes all policies within policy_per_player. + + Args: + policies_per_player: List of list of number of policies. + """ + for policies in policies_per_player: + for pol in policies: + pol.freeze() + + +def random_count_weighted_choice(count_weight): + """Returns a randomly sampled index i with P ~ 1 / (count_weight[i] + 1). + + Allows random sampling to prioritize indexes that haven't been sampled as many + times as others. + + Args: + count_weight: A list of counts to sample an index from. + + Returns: + Randomly-sampled index. + """ + indexes = list(range(len(count_weight))) + p = np.array([1 / (weight + 1) for weight in count_weight]) + p /= np.sum(p) + chosen_index = np.random.choice(indexes, p=p) + return chosen_index + + +class RLOracle(optimization_oracle.AbstractOracle): + """Oracle handling Approximate Best Responses computation.""" + + def __init__(self, + env, + best_response_class, + best_response_kwargs, + number_training_episodes=1e3, + self_play_proportion=0.0, + **kwargs): + """Init function for the RLOracle. + + Args: + env: rl_environment instance. + best_response_class: class of the best response. + best_response_kwargs: kwargs of the best response. + number_training_episodes: (Minimal) number of training episodes to run + each best response through. May be higher for some policies. + self_play_proportion: Float, between 0 and 1. Defines the probability that + a non-currently-training player will actually play (one of) its + currently training strategy (Which will be trained as well). + **kwargs: kwargs + """ + self._env = env + + self._best_response_class = best_response_class + self._best_response_kwargs = best_response_kwargs + + self._self_play_proportion = self_play_proportion + self._number_training_episodes = number_training_episodes + + super(RLOracle, self).__init__(**kwargs) + + def sample_episode(self, unused_time_step, agents, is_evaluation=False): + time_step = self._env.reset() + cumulative_rewards = 0.0 + while not time_step.last(): + if time_step.is_simultaneous_move(): + action_list = [] + for agent in agents: + output = agent.step(time_step, is_evaluation=is_evaluation) + action_list.append(output.action) + time_step = self._env.step(action_list) + cumulative_rewards += np.array(time_step.rewards) + else: + player_id = time_step.observations["current_player"] + + # is_evaluation is a boolean that, when False, lets policies train. The + # setting of PSRO requires that all policies be static aside from those + # being trained by the oracle. is_evaluation could be used to prevent + # policies from training, yet we have opted for adding frozen attributes + # that prevents policies from training, for all values of is_evaluation. + # Since all policies returned by the oracle are frozen before being + # returned, only currently-trained policies can effectively learn. + agent_output = agents[player_id].step( + time_step, is_evaluation=is_evaluation) + action_list = [agent_output.action] + time_step = self._env.step(action_list) + cumulative_rewards += np.array(time_step.rewards) + + if not is_evaluation: + for agent in agents: + agent.step(time_step) + + return cumulative_rewards + + def _has_terminated(self, episodes_per_oracle): + # The oracle has terminated when all policies have at least trained for + # self._number_training_episodes. Given the stochastic nature of our + # training, some policies may have more training episodes than that value. + return np.all( + episodes_per_oracle.reshape(-1) > self._number_training_episodes) + + def sample_policies_for_episode(self, new_policies, training_parameters, + episodes_per_oracle, strategy_sampler): + """Randomly samples a set of policies to run during the next episode. + + Note : sampling is biased to select players & strategies that haven't + trained as much as the others. + + Args: + new_policies: The currently training policies, list of list, one per + player. + training_parameters: List of list of training parameters dictionaries, one + list per player, one dictionary per training policy. + episodes_per_oracle: List of list of integers, computing the number of + episodes trained on by each policy. Used to weight the strategy + sampling. + strategy_sampler: Sampling function that samples a joint strategy given + probabilities. + + Returns: + Sampled list of policies (One policy per player), index of currently + training policies in the list. + """ + num_players = len(training_parameters) + + # Prioritizing players that haven't had as much training as the others. + episodes_per_player = [sum(episodes) for episodes in episodes_per_oracle] + chosen_player = random_count_weighted_choice(episodes_per_player) + + # Uniformly choose among the sampled player. + agent_chosen_ind = np.random.randint( + 0, len(training_parameters[chosen_player])) + agent_chosen_dict = training_parameters[chosen_player][agent_chosen_ind] + new_policy = new_policies[chosen_player][agent_chosen_ind] + + # Sample other players' policies. + total_policies = agent_chosen_dict["total_policies"] + probabilities_of_playing_policies = agent_chosen_dict[ + "probabilities_of_playing_policies"] + episode_policies = strategy_sampler(total_policies, + probabilities_of_playing_policies) + + live_agents_player_index = [(chosen_player, agent_chosen_ind)] + + for player in range(num_players): + if player == chosen_player: + episode_policies[player] = new_policy + assert not new_policy.is_frozen() + else: + # Sample a bernoulli with parameter 'self_play_proportion' to determine + # whether we do self-play with 'player'. + if np.random.binomial(1, self._self_play_proportion): + # If we are indeed doing self-play on that round, sample among the + # trained strategies of current_player, with priority given to less- + # selected agents. + agent_index = random_count_weighted_choice( + episodes_per_oracle[player]) + self_play_agent = new_policies[player][agent_index] + episode_policies[player] = self_play_agent + live_agents_player_index.append((player, agent_index)) + else: + assert episode_policies[player].is_frozen() + + return episode_policies, live_agents_player_index + + def _rollout(self, game, agents, **oracle_specific_execution_kwargs): + self.sample_episode(None, agents, is_evaluation=False) + + def generate_new_policies(self, training_parameters): + """Generates new policies to be trained into best responses. + + Args: + training_parameters: list of list of training parameter dictionaries, one + list per player. + + Returns: + List of list of the new policies, following the same structure as + training_parameters. + """ + new_policies = [] + for player in range(len(training_parameters)): + player_parameters = training_parameters[player] + new_pols = [] + for param in player_parameters: + current_pol = param["policy"] + if isinstance(current_pol, self._best_response_class): + new_pol = current_pol.copy_with_noise(self._kwargs.get("sigma", 0.0)) + else: + new_pol = self._best_response_class(self._env, player, + **self._best_response_kwargs) + new_pol.unfreeze() + new_pols.append(new_pol) + new_policies.append(new_pols) + return new_policies + + def __call__(self, + game, + training_parameters, + strategy_sampler=utils.sample_strategy, + **oracle_specific_execution_kwargs): + """Call method for oracle, returns best responses against a set of policies. + + Args: + game: The game on which the optimization process takes place. + training_parameters: A list of list of dictionaries (One list per player), + each dictionary containing the following fields : + - policy : the policy from which to start training. + - total_policies: A list of all policy.Policy strategies used for + training, including the one for the current player. + - current_player: Integer representing the current player. + - probabilities_of_playing_policies: A list of arrays representing, per + player, the probabilities of playing each policy in total_policies for + the same player. + strategy_sampler: Callable that samples strategies from total_policies + using probabilities_of_playing_policies. It only samples one joint + set of policies for all players. Implemented to be able to take into + account joint probabilities of action (For Alpharank) + **oracle_specific_execution_kwargs: Other set of arguments, for + compatibility purposes. Can for example represent whether to Rectify + Training or not. + + Returns: + A list of list, one for each member of training_parameters, of (epsilon) + best responses. + """ + episodes_per_oracle = [[0 + for _ in range(len(player_params))] + for player_params in training_parameters] + episodes_per_oracle = np.array(episodes_per_oracle) + + new_policies = self.generate_new_policies(training_parameters) + + # TODO(author4): Look into multithreading. + while not self._has_terminated(episodes_per_oracle): + agents, indexes = self.sample_policies_for_episode( + new_policies, training_parameters, episodes_per_oracle, + strategy_sampler) + self._rollout(game, agents, **oracle_specific_execution_kwargs) + episodes_per_oracle = update_episodes_per_oracles(episodes_per_oracle, + indexes) + # Freeze the new policies to keep their weights static. This allows us to + # later not have to make the distinction between static and training + # policies in training iterations. + freeze_all(new_policies) + return new_policies diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/rl_policy.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/rl_policy.py new file mode 100644 index 0000000..d53cc17 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/rl_policy.py @@ -0,0 +1,162 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""DQN as a policy. + +Treating RL Oracles as policies allows us to streamline their use with tabular +policies and other policies in OpenSpiel, and freely mix populations using +different types of oracles. +""" + +from open_spiel.python import policy +from open_spiel.python import rl_environment +from open_spiel.python.jax import dqn +from open_spiel.python.jax import policy_gradient + +# pylint: disable=protected-access + + +def rl_policy_factory(rl_class): + """Transforms an RL Agent into an OpenSpiel policy. + + Args: + rl_class: An OpenSpiel class inheriting from 'rl_agent.AbstractAgent' such + as policy_gradient.PolicyGradient or dqn.DQN. + + Returns: + An RLPolicy class that wraps around an instance of rl_class to transform it + into a policy. + """ + + class RLPolicy(policy.Policy): + """A 'policy.Policy' wrapper around an 'rl_agent.AbstractAgent' instance.""" + + def __init__(self, env, player_id, **kwargs): + """Constructs an RL Policy. + + Args: + env: An OpenSpiel RL Environment instance. + player_id: The ID of the DQN policy's player. + **kwargs: Various kwargs used to initialize rl_class. + """ + game = env.game + + super(RLPolicy, self).__init__(game, player_id) + self._policy = rl_class(**{"player_id": player_id, **kwargs}) + + self._frozen = False + self._rl_class = rl_class + self._env = env + self._obs = { + "info_state": [None] * self.game.num_players(), + "legal_actions": [None] * self.game.num_players() + } + + def get_time_step(self): + time_step = self._env.get_time_step() + return time_step + + def action_probabilities(self, state, player_id=None): + cur_player = state.current_player() + legal_actions = state.legal_actions(cur_player) + + step_type = rl_environment.StepType.LAST if state.is_terminal( + ) else rl_environment.StepType.MID + + self._obs["current_player"] = cur_player + self._obs["info_state"][cur_player] = ( + state.information_state_tensor(cur_player)) + self._obs["legal_actions"][cur_player] = legal_actions + + # pylint: disable=protected-access + rewards = state.rewards() + if rewards: + time_step = rl_environment.TimeStep( + observations=self._obs, rewards=rewards, + discounts=self._env._discounts, step_type=step_type) + else: + rewards = [0] * self._num_players + time_step = rl_environment.TimeStep( + observations=self._obs, rewards=rewards, + discounts=self._env._discounts, + step_type=rl_environment.StepType.FIRST) + # pylint: enable=protected-access + + p = self._policy.step(time_step, is_evaluation=True).probs + prob_dict = {action: p[action] for action in legal_actions} + return prob_dict + + def step(self, time_step, is_evaluation=False): + # The _frozen attribute freezes the weights of the current policy. This + # effect is achieved by considering that we always are evaluating when the + # current policy's weights are frozen. For more details, see the freeze() + # method. + is_evaluation = (is_evaluation) or (self._frozen) + return self._policy.step(time_step, is_evaluation) + + def freeze(self): + """This method freezes the policy's weights. + + The weight freezing effect is implemented by preventing any training to + take place through calls to the step function. The weights are therefore + not effectively frozen, and unconventional calls may trigger weights + training. + + The weight-freezing effect is especially needed in PSRO, where all + policies that aren't being trained by the oracle must be static. Freezing + trained policies permitted us not to change how 'step' was called when + introducing self-play (By not changing 'is_evaluation' depending on the + current player). + """ + self._frozen = True + + def unfreeze(self): + self._frozen = False + + def is_frozen(self): + return self._frozen + + def get_weights(self): + return self._policy.get_weights() + + def copy_with_noise(self, sigma=0.0): + copied_object = RLPolicy.__new__(RLPolicy) + super(RLPolicy, copied_object).__init__(self.game, self.player_ids) + setattr(copied_object, "_rl_class", self._rl_class) + setattr(copied_object, "_obs", self._obs) + if hasattr(self._policy, "copy_with_noise"): + setattr( + copied_object, "_policy", self._policy.copy_with_noise(sigma=sigma) + ) + else: + # Make a new one from scratch + _ = self._policy._kwargs.pop("self", None) + setattr( + copied_object, + "_policy", + self._policy.__class__(**self._policy._kwargs), + ) + setattr(copied_object, "_env", self._env) + copied_object.unfreeze() + + return copied_object + + return RLPolicy + + +# Generating policy classes for Policy Gradient and DQN +# pylint: disable=invalid-name +PGPolicy = rl_policy_factory(policy_gradient.PolicyGradient) +DQNPolicy = rl_policy_factory(dqn.DQN) +# pylint: enable=invalid-name diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/strategy_selectors.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/strategy_selectors.py new file mode 100644 index 0000000..775ee3c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/strategy_selectors.py @@ -0,0 +1,486 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Strategy selectors repository.""" + +import numpy as np + +DEFAULT_STRATEGY_SELECTION_METHOD = "probabilistic" + +# Constant, specifying the threshold below which probabilities are considered 0. +EPSILON_MIN_POSITIVE_PROBA = 1e-8 + + +def exhaustive(solver, number_policies_selected=1): + """Returns every player's policies. + + Args: + solver: A GenPSROSolver instance. + number_policies_selected: Number of policies to return for each player. + (Compatibility argument) + + Returns: + used_policies : List of size 'num_players' of lists of size + min('number_policies_selected', num_policies') containing selected + policies. + used_policies_indexes: List of lists of the same shape as used_policies, + containing the list indexes of selected policies. + """ + del number_policies_selected + policies = solver.get_policies() + indexes = [list(range(len(pol))) for pol in policies] + return policies, indexes + + +# Factory function for more complex filters. +def filter_function_factory(filter_function): + """Returns a function filtering players' strategies wrt. + + 'filter_function'. + + This function is used to select which strategy to start training from. As + such, and in the Rectified Nash Response logic, filter_function expects a + certain set of arguments: + - player_policies: The list of policies for the current player. + - player: The current player id. + - effective_number_selected: The effective number of policies to select. + - solver: In case the above arguments weren't enough, the solver instance so + the filter_function can have more complex behavior. + And returns the selected policies and policy indexes for the current player. + + Args: + filter_function: A filter function following the specifications above, used + to filter which strategy to start training from for each player. + + Returns: + A filter function on all players. + """ + + def filter_policies(solver, number_policies_selected=1): + """Filters each player's policies according to 'filter_function'. + + Args: + solver: The PSRO solver. + number_policies_selected: The expected number of policies to select. If + there are fewer policies than 'number_policies_selected', behavior will + saturate at num_policies. + + Returns: + used_policies : List of length 'num_players' of lists of length + min('number_policies_selected', num_policies') containing selected + policies. + used_policies_indexes: List of lists of the same shape as used_policies, + containing the list indexes of selected policies. + + """ + policies = solver.get_policies() + num_players = len(policies) + meta_strategy_probabilities = solver.get_meta_strategies() + + used_policies = [] + used_policy_indexes = [] + for player in range(num_players): + player_policies = policies[player] + current_selection_probabilities = meta_strategy_probabilities[player] + effective_number = min(number_policies_selected, len(player_policies)) + + used_policy, used_policy_index = filter_function( + player_policies, current_selection_probabilities, player, + effective_number, solver) + used_policies.append(used_policy) + used_policy_indexes.append(used_policy_index) + return used_policies, used_policy_indexes + + # Return the created function. + return filter_policies + + +def rectified_filter(player_policies, selection_probabilities, player, + effective_number_to_select, solver): + """Returns every strategy with nonzero selection probability. + + Args: + player_policies: A list of policies for the current player. + selection_probabilities: Selection probabilities for 'player_policies'. + player: Player id. + effective_number_to_select: Effective number of policies to select. + solver: PSRO solver instance if kwargs needed. + + Returns: + selected_policies : List of size 'effective_number_to_select' + containing selected policies. + selected_indexes: List of the same shape as selected_policies, + containing the list indexes of selected policies. + """ + del effective_number_to_select, solver, player + selected_indexes = [ + i for i in range(len(player_policies)) + if selection_probabilities[i] > EPSILON_MIN_POSITIVE_PROBA + ] + selected_policies = [player_policies[i] for i in selected_indexes] + + return selected_policies, selected_indexes + + +def probabilistic_filter(player_policies, selection_probabilities, player, + effective_number_to_select, solver): + """Returns every strategy with nonzero selection probability. + + Args: + player_policies: A list of policies for the current player. + selection_probabilities: Selection probabilities for 'player_policies'. + player: Player id. + effective_number_to_select: Effective number of policies to select. + solver: PSRO solver instance if kwargs needed. + + Returns: + selected_policies : List of size 'effective_number_to_select' + containing selected policies. + selected_indexes: List of the same shape as selected_policies, + containing the list indexes of selected policies. + """ + del solver, player + selected_indexes = list( + np.random.choice( + list(range(len(player_policies))), + effective_number_to_select, + replace=False, + p=selection_probabilities)) + selected_policies = [player_policies[i] for i in selected_indexes] + return selected_policies, selected_indexes + + +def top_k_probabilities_filter(player_policies, selection_probabilities, player, + effective_number_to_select, solver): + """Returns top 'effective_number_to_select' highest probability policies. + + Args: + player_policies: A list of policies for the current player. + selection_probabilities: Selection probabilities for 'player_policies'. + player: Player id. + effective_number_to_select: Effective number of policies to select. + solver: PSRO solver instance if kwargs needed. + + Returns: + selected_policies : List of size 'effective_number_to_select' + containing selected policies. + selected_indexes: List of the same shape as selected_policies, + containing the list indexes of selected policies. + """ + del player, solver + selected_indexes = [ + index for _, index in sorted( + zip(selection_probabilities, list(range(len(player_policies)))), + key=lambda pair: pair[0]) + ][:effective_number_to_select] + + selected_policies = [player_policies[i] for i in selected_indexes] + return selected_policies, selected_indexes + + +def uniform_filter(player_policies, selection_probabilities, player, + effective_number_to_select, solver): + """Returns 'effective_number_to_select' uniform-randomly selected policies. + + Args: + player_policies: A list of policies for the current player. + selection_probabilities: Selection probabilities for 'player_policies'. + player: Player id. + effective_number_to_select: Effective number of policies to select. + solver: PSRO solver instance if kwargs needed. + + Returns: + selected_policies : List of size 'effective_number_to_select' + containing selected policies. + selected_indexes: List of the same shape as selected_policies, + containing the list indexes of selected policies. + """ + del solver, selection_probabilities, player + selected_indexes = list( + np.random.choice( + list(range(len(player_policies))), + effective_number_to_select, + replace=False, + p=np.ones(len(player_policies)) / len(player_policies))) + selected_policies = [player_policies[i] for i in selected_indexes] + return selected_policies, selected_indexes + + +def functional_probabilistic_filter(player_policies, selection_probabilities, + player, effective_number_to_select, solver): + """Returns effective_number_to_select randomly selected policies by function. + + Args: + player_policies: A list of policies for the current player. + selection_probabilities: Selection probabilities for 'player_policies'. + player: Player id. + effective_number_to_select: Effective number of policies to select. + solver: PSRO solver instance if kwargs needed. + + Returns: + selected_policies : List of size 'effective_number_to_select' + containing selected policies. + selected_indexes: List of the same shape as selected_policies, + containing the list indexes of selected policies. + """ + kwargs = solver.get_kwargs() + # By default, use meta strategies. + probability_computation_function = kwargs.get( + "selection_probability_function") or (lambda x: x.get_meta_strategies()) + + selection_probabilities = probability_computation_function(solver)[player] + selected_indexes = list( + np.random.choice( + list(range(len(player_policies))), + effective_number_to_select, + replace=False, + p=selection_probabilities)) + selected_policies = [player_policies[i] for i in selected_indexes] + return selected_policies, selected_indexes + + +# Introducing aliases: +uniform = filter_function_factory(uniform_filter) +rectified = filter_function_factory(rectified_filter) +probabilistic = filter_function_factory(probabilistic_filter) +top_k_probabilities = filter_function_factory(top_k_probabilities_filter) +functional_probabilistic = filter_function_factory( + functional_probabilistic_filter) + +# pylint:disable=pointless-string-statement +# --- +"""Selectors below are used to rectify probabilities. +""" +# --- +# pylint:enable=pointless-string-statement + + +def get_current_and_average_payoffs(ps2ro_trainer, current_player, + current_strategy): + """Returns the current player's and average players' payoffs. + + These payoffs are returned when current_player's strategy's index is + 'current_strategy'. + + Args: + ps2ro_trainer: A ps2ro object. + current_player: Integer, current player index. + current_strategy: Integer, current player's strategy index. + + Returns: + Payoff tensor for current player, Average payoff tensor over all players. + """ + # Get the vector of payoffs associated with current_player's strategy ind + meta_games = ps2ro_trainer.meta_games + current_payoff = meta_games[current_player] + current_payoff = np.take( + current_payoff, current_strategy, axis=current_player) + + # Get average per-player payoff matrix. + average_payoffs = np.mean(meta_games, axis=0) + average_payoffs = np.take( + average_payoffs, current_strategy, axis=current_player) + return current_payoff, average_payoffs + + +def rectified_selector(ps2ro_trainer, current_player, current_strategy): + current_payoff, average_payoffs = get_current_and_average_payoffs( + ps2ro_trainer, current_player, current_strategy) + + # Rectified Nash condition : select those strategies where we do better + # than others. + res = current_payoff >= average_payoffs + return np.expand_dims(res, axis=current_player) + + +# pylint:disable=pointless-string-statement +# --- +"""When using joint strategies, use the selectors below. +""" +# --- +# pylint:enable=pointless-string-statement + + +def empty_list_generator(number_dimensions): + result = [] + for _ in range(number_dimensions - 1): + result = [result] + return result + + +def get_indices_from_non_marginalized(policies): + """Get a list of lists of indices from joint policies. + + These are the ones used for training strategy selector. + + Args: + policies: a list of joint policies. + + Returns: + A list of lists of indices. + """ + num_players = len(policies[0]) + num_strategies = len(policies) + return [list(range(num_strategies)) for _ in range(num_players)] + + +# In case we want to select strategies to train based on +# non-marginalized probabilities. +def rectified_non_marginalized(solver): + """Returns every strategy with nonzero selection probability. + + Args: + solver: A GenPSROSolver instance. + """ + used_policies = [] + policies = solver.get_policies() + num_players = len(policies) + meta_strategy_probabilities = ( + solver.get_and_update_non_marginalized_meta_strategies(update=False)) + for k in range(num_players): + current_policies = policies[k] + current_probabilities = meta_strategy_probabilities[k] + current_policies = [ + current_policies[i] + for i in range(len(current_policies)) + if current_probabilities[i] > EPSILON_MIN_POSITIVE_PROBA + ] + used_policies.append(current_policies) + return used_policies, get_indices_from_non_marginalized(used_policies) + + +def exhaustive_non_marginalized(solver): + """Returns every player's policies. + + Args: + solver: A GenPSROSolver instance. + """ + used_policies = solver.get_policies() + return used_policies, get_indices_from_non_marginalized(used_policies) + + +def probabilistic_non_marginalized(solver): + """Returns [kwargs] policies randomly, proportionally with selection probas. + + Args: + solver: A GenPSROSolver instance. + """ + kwargs = solver.get_kwargs() + # By default, select only 1 new policy to optimize from. + number_policies_to_select = kwargs.get("number_policies_selected") or 1 + + # Get integer IDs and probabilities of meta-strategies + ids = solver.get_joint_policy_ids() + joint_strategy_probabilities = ( + solver.get_and_update_non_marginalized_meta_strategies(update=False)) + + effective_number = min(number_policies_to_select, len(ids)) + selected_policy_ids = list( + np.random.choice( + ids, effective_number, replace=False, p=joint_strategy_probabilities)) + used_policies = solver.get_joint_policies_from_id_list(selected_policy_ids) + return used_policies, get_indices_from_non_marginalized(used_policies) + + +def top_k_probabilites_non_marginalized(solver): + """Returns [kwargs] policies with highest selection probabilities. + + Args: + solver: A GenPSROSolver instance. + """ + kwargs = solver.get_kwargs() + # By default, select only 1 new policy to optimize from. + number_policies_to_select = kwargs.get("number_policies_selected") or 1 + + ids = solver.get_joint_policy_ids() + + effective_number = min(number_policies_to_select, len(ids)) + joint_strategy_probabilities = ( + solver.get_and_update_non_marginalized_meta_strategies(update=False)) + + sorted_list = sorted( + zip(joint_strategy_probabilities, ids), + reverse=True, + key=lambda pair: pair[0]) + selected_policy_ids = [id_selected for _, id_selected in sorted_list + ][:effective_number] + + used_policies = solver.get_joint_policies_from_id_list(selected_policy_ids) + return used_policies, get_indices_from_non_marginalized(used_policies) + + +def uniform_non_marginalized(solver): + """Returns [kwargs] randomly selected policies (Uniform probability). + + Args: + solver: A GenPSROSolver instance. + """ + kwargs = solver.get_kwargs() + # By default, select only 1 new policy to optimize from. + number_policies_to_select = kwargs.get("number_policies_selected") or 1 + + ids = solver.get_joint_policy_ids() + + effective_number = min(number_policies_to_select, len(ids)) + selected_policy_ids = list( + np.random.choice( + ids, effective_number, replace=False, p=np.ones(len(ids)) / len(ids))) + used_policies = solver.get_joint_policies_from_id_list(selected_policy_ids) + return used_policies, get_indices_from_non_marginalized(used_policies) + + +def compressed_lambda(x): + return x.get_and_update_non_marginalized_meta_strategies(update=False) + + +def functional_probabilistic_non_marginalized(solver): + """Returns [kwargs] randomly selected policies with generated probabilities. + + Args: + solver: A GenPSROSolver instance. + """ + kwargs = solver.get_kwargs() + # By default, select only 1 new policy to optimize from. + number_policies_to_select = kwargs.get("number_policies_selected") or 1 + # By default, use meta strategies. + probability_computation_function = kwargs.get( + "selection_probability_function") or compressed_lambda + + ids = solver.get_joint_policy_ids() + joint_strategy_probabilities = probability_computation_function(solver) + + effective_number = min(number_policies_to_select, len(ids)) + selected_policies = list( + np.random.choice( + ids, effective_number, replace=False, p=joint_strategy_probabilities)) + used_policies = solver.get_joint_policies_from_id_list(selected_policies) + return used_policies, get_indices_from_non_marginalized(used_policies) + + +TRAINING_STRATEGY_SELECTORS = { + "functional_probabilistic": functional_probabilistic, + "top_k_probabilities": top_k_probabilities, + "probabilistic": probabilistic, + "exhaustive": exhaustive, + "rectified": rectified, + "uniform": uniform, + "functional_probabilistic_non_marginalized": ( + functional_probabilistic_non_marginalized + ), + "top_k_probabilites_non_marginalized": top_k_probabilites_non_marginalized, + "probabilistic_non_marginalized": probabilistic_non_marginalized, + "exhaustive_non_marginalized": exhaustive_non_marginalized, + "rectified_non_marginalized": rectified_non_marginalized, + "uniform_non_marginalized": uniform_non_marginalized, +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/strategy_selectors_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/strategy_selectors_test.py new file mode 100644 index 0000000..d05d46c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/strategy_selectors_test.py @@ -0,0 +1,195 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.psro_v2.strategy_selectors.""" + +from absl.testing import absltest +import numpy as np +from open_spiel.python.algorithms.psro_v2 import strategy_selectors + + +class FakeSolver(object): + + def __init__(self, strategies, policies): + self.strategies = strategies + self.policies = policies + + def get_policies(self): + return self.policies + + def get_meta_strategies(self): + return self.strategies + + +def equal_to_transposition_lists(a, b): + return [set(x) for x in a] == [set(x) for x in b] + + +EPSILON_MIN_POSITIVE_PROBA = 1e-8 + + +def rectified_alias(solver, number_policies_to_select): + """Returns every strategy with nonzero selection probability. + + Args: + solver: A GenPSROSolver instance. + number_policies_to_select: Number policies to select + + Returns: + used_policies: A list, each element a list of the policies used per player. + """ + del number_policies_to_select + + used_policies = [] + used_policy_indexes = [] + + policies = solver.get_policies() + num_players = len(policies) + meta_strategy_probabilities = solver.get_meta_strategies() + + for k in range(num_players): + current_policies = policies[k] + current_probabilities = meta_strategy_probabilities[k] + + current_indexes = [ + i for i in range(len(current_policies)) + if current_probabilities[i] > EPSILON_MIN_POSITIVE_PROBA + ] + current_policies = [ + current_policies[i] + for i in current_indexes + ] + + used_policy_indexes.append(current_indexes) + used_policies.append(current_policies) + return used_policies, used_policy_indexes + + +def probabilistic_alias(solver, number_policies_to_select): + """Returns [kwargs] policies randomly, proportionally with selection probas. + + Args: + solver: A GenPSROSolver instance. + number_policies_to_select: Number policies to select + """ + policies = solver.get_policies() + num_players = len(policies) + meta_strategy_probabilities = solver.get_meta_strategies() + + print(policies, meta_strategy_probabilities) + used_policies = [] + used_policy_indexes = [] + for k in range(num_players): + current_policies = policies[k] + current_selection_probabilities = meta_strategy_probabilities[k] + effective_number = min(number_policies_to_select, len(current_policies)) + + selected_indexes = list( + np.random.choice( + list(range(len(current_policies))), + effective_number, + replace=False, + p=current_selection_probabilities)) + selected_policies = [current_policies[i] for i in selected_indexes] + used_policies.append(selected_policies) + used_policy_indexes.append(selected_indexes) + return used_policies, used_policy_indexes + + +def top_k_probabilities_alias(solver, number_policies_to_select): + """Returns [kwargs] policies with highest selection probabilities. + + Args: + solver: A GenPSROSolver instance. + number_policies_to_select: Number policies to select + """ + policies = solver.get_policies() + num_players = len(policies) + meta_strategy_probabilities = solver.get_meta_strategies() + + used_policies = [] + used_policy_indexes = [] + for k in range(num_players): + current_policies = policies[k] + current_selection_probabilities = meta_strategy_probabilities[k] + effective_number = min(number_policies_to_select, len(current_policies)) + + # pylint: disable=g-complex-comprehension + selected_indexes = [ + index for _, index in sorted( + zip(current_selection_probabilities, + list(range(len(current_policies)))), + key=lambda pair: pair[0]) + ][:effective_number] + + selected_policies = [current_policies[i] for i in selected_indexes] + used_policies.append(selected_policies) + used_policy_indexes.append(selected_indexes) + return used_policies, used_policy_indexes + + +class StrategySelectorsTest(absltest.TestCase): + + def test_vital(self): + n_tests = 1000 + number_strategies = 50 + number_players = 3 + for i in range(n_tests): + probabilities = np.random.uniform(size=(number_players, + number_strategies)) + probabilities /= np.sum(probabilities, axis=1).reshape(-1, 1) + probabilities = list(probabilities) + + policies = [list(range(number_strategies)) for _ in range(number_players)] + + solver = FakeSolver(probabilities, policies) + + # To see how rectified reacts to 0 probabilities. + probabilities[0][0] = 0 + probabilities[-1][-1] = 0 + a, b = strategy_selectors.rectified(solver, 1) + c, d = rectified_alias(solver, 1) + + self.assertEqual(a, c, "Rectified failed.") + self.assertEqual(b, d, "Rectified failed.") + + a, b = strategy_selectors.top_k_probabilities(solver, 3) + c, d = top_k_probabilities_alias(solver, 3) + + self.assertEqual(a, c, "Top k failed.") + self.assertEqual(b, d, "Top k failed.") + + n_nonzero_policies = 2 + probabilities = [np.zeros(number_strategies) for _ in range( + number_players)] + + for player in range(number_players): + for _ in range(n_nonzero_policies): + i = np.random.randint(0, high=number_strategies) + while probabilities[player][i] > 1e-12: + i = np.random.randint(0, high=number_strategies) + probabilities[player][i] = 1.0 / n_nonzero_policies + probabilities[player] /= np.sum(probabilities[player]) + + solver = FakeSolver(probabilities, policies) + a, b = strategy_selectors.probabilistic(solver, n_nonzero_policies) + c, d = probabilistic_alias(solver, n_nonzero_policies) + + self.assertTrue(equal_to_transposition_lists(a, c), + "Probabilistic failed.") + self.assertTrue(equal_to_transposition_lists(b, d), + "Probabilistic failed.") + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/utils.py new file mode 100644 index 0000000..1d81f2d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/psro_v2/utils.py @@ -0,0 +1,417 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Various general utility functions.""" + +import random +import numpy as np + +from open_spiel.python.algorithms import get_all_states +from open_spiel.python.algorithms import policy_aggregator +from open_spiel.python.algorithms import policy_aggregator_joint +from open_spiel.python.egt import alpharank +from open_spiel.python.egt import utils as alpharank_utils + + +def empty_list_generator(number_dimensions): + result = [] + for _ in range(number_dimensions - 1): + result = [result] + return result + + +def random_choice(outcomes, probabilities): + """Samples from discrete probability distribution. + + `numpy.choice` does not seem optimized for repeated calls, this code + had higher performance. + + Args: + outcomes: List of categorical outcomes. + probabilities: Discrete probability distribtuion as list of floats. + + Returns: + Entry of `outcomes` sampled according to the distribution. + """ + cumsum = np.cumsum(probabilities) + return outcomes[np.searchsorted(cumsum/cumsum[-1], random.random())] + + +def sample_strategy(total_policies, + probabilities_of_playing_policies, + probs_are_marginal=True): + """Samples strategies given probabilities. + + Uses independent sampling if probs_are_marginal, and joint sampling otherwise. + + Args: + total_policies: if probs_are_marginal, this is a list, each element a list + of each player's policies. If not, this is a list of joint policies. In + both cases the policy orders must match that of + probabilities_of_playing_policies. + probabilities_of_playing_policies: if probs_are_marginal, this is a list, + with the k-th element also a list specifying the play probabilities of the + k-th player's policies. If not, this is a list of play probabilities of + the joint policies specified by total_policies. + probs_are_marginal: a boolean indicating if player-wise marginal + probabilities are provided in probabilities_of_playing_policies. If False, + then play_probabilities is assumed to specify joint distribution. + + Returns: + sampled_policies: A list specifying a single sampled joint strategy. + """ + + if probs_are_marginal: + return sample_strategy_marginal(total_policies, + probabilities_of_playing_policies) + else: + return sample_strategy_joint(total_policies, + probabilities_of_playing_policies) + + +def sample_strategy_marginal(total_policies, probabilities_of_playing_policies): + """Samples strategies given marginal probabilities. + + Uses independent sampling if probs_are_marginal, and joint sampling otherwise. + + Args: + total_policies: A list, each element a list of each player's policies. + probabilities_of_playing_policies: This is a list, with the k-th element + also a list specifying the play probabilities of the k-th player's + policies. + + Returns: + sampled_policies: A list specifying a single sampled joint strategy. + """ + + num_players = len(total_policies) + sampled_policies = [] + for k in range(num_players): + current_policies = total_policies[k] + current_probabilities = probabilities_of_playing_policies[k] + sampled_policy_k = random_choice(current_policies, current_probabilities) + sampled_policies.append(sampled_policy_k) + return sampled_policies + + +def sample_random_tensor_index(probabilities_of_index_tensor): + shape = probabilities_of_index_tensor.shape + reshaped_probas = probabilities_of_index_tensor.reshape(-1) + + strat_list = list(range(len(reshaped_probas))) + chosen_index = random_choice(strat_list, reshaped_probas) + return np.unravel_index(chosen_index, shape) + + +def sample_strategy_joint(total_policies, probabilities_of_playing_policies): + """Samples strategies given joint probabilities. + + Uses independent sampling if probs_are_marginal, and joint sampling otherwise. + + Args: + total_policies: A list, each element a list of each player's policies. + probabilities_of_playing_policies: This is a list of play probabilities of + the joint policies specified by total_policies. + + Returns: + sampled_policies: A list specifying a single sampled joint strategy. + """ + + sampled_index = sample_random_tensor_index(probabilities_of_playing_policies) + sampled_policies = [] + for player in range(len(sampled_index)): + ind = sampled_index[player] + sampled_policies.append(total_policies[player][ind]) + return sampled_policies + + +def softmax(x): + return np.exp(x) / np.sum(np.exp(x)) + + +def round_maintain_sum(x): + """Returns element-wise rounded version y of a vector x, with sum(x)==sum(y). + + E.g., if x = array([3.37625333, 2.27920304, 4.34454364]), note sum(x) == 10. + However, naively doing y = np.round(x) yields sum(y) == 9. In this function, + however, the rounded counterpart y will have sum(y) == 10. + + Args: + x: a vector. + """ + y = np.floor(x) + sum_diff = round(sum(x)) - sum(y) # Difference of original vs. floored sum + indices = np.argsort(y - x)[:int(sum_diff)] # Indices with highest difference + y[indices] += 1 # Add the missing mass to the elements with the most missing + return y + + +def get_alpharank_marginals(payoff_tables, pi): + """Returns marginal strategy rankings for each player given joint rankings pi. + + Args: + payoff_tables: List of meta-game payoff tables for a K-player game, where + each table has dim [n_strategies_player_1 x ... x n_strategies_player_K]. + These payoff tables may be asymmetric. + pi: The vector of joint rankings as computed by alpharank. Each element i + corresponds to a unique integer ID representing a given strategy profile, + with profile_to_id mappings provided by + alpharank_utils.get_id_from_strat_profile(). + + Returns: + pi_marginals: List of np.arrays of player-wise marginal strategy masses, + where the k-th player's np.array has shape [n_strategies_player_k]. + """ + num_populations = len(payoff_tables) + + if num_populations == 1: + return pi + else: + num_strats_per_population = alpharank_utils.get_num_strats_per_population( + payoff_tables, payoffs_are_hpt_format=False) + num_profiles = alpharank_utils.get_num_profiles(num_strats_per_population) + pi_marginals = [np.zeros(n) for n in num_strats_per_population] + for i_strat in range(num_profiles): + strat_profile = ( + alpharank_utils.get_strat_profile_from_id(num_strats_per_population, + i_strat)) + for i_player in range(num_populations): + pi_marginals[i_player][strat_profile[i_player]] += pi[i_strat] + return pi_marginals + + +def remove_epsilon_negative_probs(probs, epsilon=1e-9): + """Removes negative probabilities that occur due to precision errors.""" + if len(probs[probs < 0]) > 0: # pylint: disable=g-explicit-length-test + # Ensures these negative probabilities aren't large in magnitude, as that is + # unexpected and likely not due to numerical precision issues + print("Probabilities received were: {}".format(probs[probs < 0])) + assert np.all(np.min(probs[probs < 0]) > -1.*epsilon), ( + "Negative Probabilities received were: {}".format(probs[probs < 0])) + + probs[probs < 0] = 0 + probs = probs / np.sum(probs) + return probs + + +def get_joint_strategy_from_marginals(probabilities): + """Returns a joint strategy tensor from a list of marginals. + + Args: + probabilities: list of list of probabilities, one for each player. + + Returns: + A joint strategy from a list of marginals. + """ + probas = [] + for i in range(len(probabilities)): + probas_shapes = [1] * len(probabilities) + probas_shapes[i] = -1 + probas.append(np.array(probabilities[i]).reshape(probas_shapes)) + return np.prod(probas) + + +def alpharank_strategy(solver, return_joint=False, **unused_kwargs): + """Returns AlphaRank distribution on meta game matrix. + + This method works for general games. + + Args: + solver: GenPSROSolver instance. + return_joint: a boolean specifying whether to return player-wise + marginals. + + Returns: + marginals: a list, specifying for each player the alpharank marginal + distributions on their strategies. + joint_distr: a list, specifying the joint alpharank distributions for all + strategy profiles. + """ + meta_games = solver.get_meta_game() + meta_games = [np.asarray(x) for x in meta_games] + + if solver.symmetric_game: + meta_games = [meta_games[0]] + + # Get alpharank distribution via alpha-sweep + joint_distr = alpharank.sweep_pi_vs_epsilon( + meta_games) + joint_distr = remove_epsilon_negative_probs(joint_distr) + + marginals = 2 * [joint_distr] + joint_distr = get_joint_strategy_from_marginals(marginals) + if return_joint: + return marginals, joint_distr + else: + return joint_distr + + else: + joint_distr = alpharank.sweep_pi_vs_epsilon(meta_games) + joint_distr = remove_epsilon_negative_probs(joint_distr) + + if return_joint: + marginals = get_alpharank_marginals(meta_games, joint_distr) + return marginals, joint_distr + else: + return joint_distr + + +def get_strategy_profile_ids(payoff_tables): + num_strats_per_population = ( + alpharank_utils.get_num_strats_per_population( + payoff_tables, payoffs_are_hpt_format=False)) + return range(alpharank_utils.get_num_profiles(num_strats_per_population)) + + +def get_joint_policies_from_id_list(payoff_tables, policies, profile_id_list): + """Returns a list of joint policies, given a list of integer IDs. + + Args: + payoff_tables: List of payoff tables, one per player. + policies: A list of policies, one per player. + profile_id_list: list of integer IDs, each corresponding to a joint policy. + These integers correspond to those in get_strategy_profile_ids(). + + Returns: + selected_joint_policies: A list, with each element being a joint policy + instance (i.e., a list of policies, one per player). + """ + num_strats_per_population = ( + alpharank_utils.get_num_strats_per_population( + payoff_tables, payoffs_are_hpt_format=False)) + np.testing.assert_array_equal(num_strats_per_population, + [len(p) for p in policies]) + num_players = len(policies) + + selected_joint_policies = [] + for profile_id in profile_id_list: + # Compute the profile associated with the integer profile_id + policy_profile = alpharank_utils.get_strat_profile_from_id( + num_strats_per_population, profile_id) + # Append the joint policy corresponding to policy_profile + selected_joint_policies.append( + [policies[k][policy_profile[k]] for k in range(num_players)]) + return selected_joint_policies + + +def compute_states_and_info_states_if_none(game, + all_states=None, + state_to_information_state=None): + """Returns all_states and/or state_to_information_state for the game. + + To recompute everything, pass in None for both all_states and + state_to_information_state. Otherwise, this function will use the passed in + values to reconstruct either of them. + + Args: + game: The open_spiel game. + all_states: The result of calling get_all_states.get_all_states. Cached for + improved performance. + state_to_information_state: A dict mapping str(state) to + state.information_state for every state in the game. Cached for improved + performance. + """ + if all_states is None: + all_states = get_all_states.get_all_states( + game, + depth_limit=-1, + include_terminals=False, + include_chance_states=False) + + if state_to_information_state is None: + state_to_information_state = { + state: all_states[state].information_state_string() + for state in all_states + } + + return all_states, state_to_information_state + + +def aggregate_policies(game, total_policies, probabilities_of_playing_policies): + """Aggregate the players' policies. + + Specifically, returns a single callable policy object that is + realization-equivalent to playing total_policies with + probabilities_of_playing_policies. I.e., aggr_policy is a joint policy that + can be called at any information state [via + action_probabilities(state, player_id)]. + + Args: + game: The open_spiel game. + total_policies: A list of list of all policy.Policy strategies used for + training, where the n-th entry of the main list is a list of policies + available to the n-th player. + probabilities_of_playing_policies: A list of arrays representing, per + player, the probabilities of playing each policy in total_policies for the + same player. + + Returns: + A callable object representing the policy. + """ + aggregator = policy_aggregator.PolicyAggregator(game) + + return aggregator.aggregate( + range(len(probabilities_of_playing_policies)), total_policies, + probabilities_of_playing_policies) + + +def marginal_to_joint(policies): + """Marginal policies to joint policies. + + Args: + policies: List of list of policies, one list per player. + + Returns: + Joint policies in the right order (np.reshape compatible). + """ + shape = tuple([len(a) for a in policies]) + num_players = len(shape) + total_length = np.prod(shape) + indexes = np.array(list(range(total_length))) + joint_indexes = np.unravel_index(indexes, shape) + + joint_policies = [] + for joint_index in zip(*joint_indexes): + joint_policies.append([ + policies[player][joint_index[player]] for player in range(num_players) + ]) + return joint_policies + + +def aggregate_joint_policies(game, total_policies, + probabilities_of_playing_policies): + """Aggregate the players' joint policies. + + Specifically, returns a single callable policy object that is + realization-equivalent to playing total_policies with + probabilities_of_playing_policies. I.e., aggr_policy is a joint policy that + can be called at any information state [via + action_probabilities(state, player_id)]. + + Args: + game: The open_spiel game. + total_policies: A list of list of all policy.Policy strategies used for + training, where the n-th entry of the main list is a list of policies, one + entry for each player. + probabilities_of_playing_policies: A list of floats representing the + probabilities of playing each joint strategy in total_policies. + + Returns: + A callable object representing the policy. + """ + aggregator = policy_aggregator_joint.JointPolicyAggregator(game) + + return aggregator.aggregate( + range(len(total_policies[0])), total_policies, + probabilities_of_playing_policies) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/random_agent.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/random_agent.py new file mode 100644 index 0000000..9077534 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/random_agent.py @@ -0,0 +1,40 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""RL agent following an uniform distribution over legal actions.""" + +import numpy as np +from open_spiel.python import rl_agent + + +class RandomAgent(rl_agent.AbstractAgent): + """Random agent class.""" + + def __init__(self, player_id, num_actions, name="random_agent"): + assert num_actions > 0 + self._player_id = player_id + self._num_actions = num_actions + + def step(self, time_step, is_evaluation=False): + # If it is the end of the episode, don't select an action. + if time_step.last(): + return + + # Pick a random legal action. + cur_legal_actions = time_step.observations["legal_actions"][self._player_id] + action = np.random.choice(cur_legal_actions) + probs = np.zeros(self._num_actions) + probs[cur_legal_actions] = 1.0 / len(cur_legal_actions) + + return rl_agent.StepOutput(action=action, probs=probs) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/random_agent_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/random_agent_test.py new file mode 100644 index 0000000..9ecd1aa --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/random_agent_test.py @@ -0,0 +1,49 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.random_agent.""" + +from absl.testing import absltest +import numpy as np +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import random_agent + + +class RandomAgentTest(absltest.TestCase): + + def test_step(self): + agent = random_agent.RandomAgent(player_id=0, num_actions=10) + + legal_actions = [0, 2, 3, 5] + time_step = rl_environment.TimeStep( + observations={ + "info_state": [[0], [1]], + "legal_actions": [legal_actions, []], + "current_player": 0 + }, + rewards=None, + discounts=None, + step_type=None) + agent_output = agent.step(time_step) + + self.assertIn(agent_output.action, legal_actions) + self.assertAlmostEqual(sum(agent_output.probs), 1.0) + self.assertEqual( + len([x for x in agent_output.probs if x > 0]), len(legal_actions)) + self.assertTrue( + np.allclose(agent_output.probs[legal_actions], [.25] * 4, atol=1e-5)) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/regret_matching.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/regret_matching.py new file mode 100644 index 0000000..17a628e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/regret_matching.py @@ -0,0 +1,144 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Regret-Matching Algorithm. + +This is an N-player implementation of the regret-matching algorithm described in +Hart & Mas-Colell 2000: +https://onlinelibrary.wiley.com/doi/abs/10.1111/1468-0262.00153 +""" + +import numpy as np +from open_spiel.python.algorithms import nfg_utils + + +# Start with initial regrets of 1 / denom +INITIAL_REGRET_DENOM = 1e6 + + +def _partial_multi_dot(player_payoff_tensor, strategies, index_avoided): + """Computes a generalized dot product avoiding one dimension. + + This is used to directly get the expected return of a given action, given + other players' strategies, for the player indexed by index_avoided. + Note that the numpy.dot function is used to compute this product, as it ended + up being (Slightly) faster in performance tests than np.tensordot. Using the + reduce function proved slower for both np.dot and np.tensordot. + + Args: + player_payoff_tensor: payoff tensor for player[index_avoided], of dimension + (dim(vector[0]), dim(vector[1]), ..., dim(vector[-1])). + strategies: Meta strategy probabilities for each player. + index_avoided: Player for which we do not compute the dot product. + + Returns: + Vector of expected returns for each action of player [the player indexed by + index_avoided]. + """ + new_axis_order = [index_avoided] + [ + i for i in range(len(strategies)) if (i != index_avoided) + ] + accumulator = np.transpose(player_payoff_tensor, new_axis_order) + for i in range(len(strategies) - 1, -1, -1): + if i != index_avoided: + accumulator = np.dot(accumulator, strategies[i]) + return accumulator + + +def _regret_matching_step(payoff_tensors, strategies, regrets, gamma): + """Does one step of the projected replicator dynamics algorithm. + + Args: + payoff_tensors: List of payoff tensors for each player. + strategies: List of the strategies used by each player. + regrets: List of cumulative regrets used by each player. + gamma: Minimum exploratory probability term. + + Returns: + A list of updated strategies for each player. + """ + + # TODO(author4): Investigate whether this update could be fully vectorized. + new_strategies = [] + for player in range(len(payoff_tensors)): + current_payoff_tensor = payoff_tensors[player] + current_strategy = strategies[player] + + values_per_strategy = _partial_multi_dot(current_payoff_tensor, strategies, + player) + average_return = np.dot(values_per_strategy, current_strategy) + regrets[player] += values_per_strategy - average_return + + updated_strategy = regrets[player].copy() + updated_strategy[updated_strategy < 0] = 0.0 + sum_regret = updated_strategy.sum() + uniform_strategy = np.ones(len(updated_strategy)) / len(updated_strategy) + + if sum_regret > 0: + updated_strategy /= sum_regret + updated_strategy = gamma * uniform_strategy + (1 - + gamma) * updated_strategy + else: + updated_strategy = uniform_strategy + + new_strategies.append(updated_strategy) + return new_strategies + + +def regret_matching(payoff_tensors, + initial_strategies=None, + iterations=int(1e5), + gamma=1e-6, + average_over_last_n_strategies=None, + **unused_kwargs): + """Runs regret-matching for the stated number of iterations. + + Args: + payoff_tensors: List of payoff tensors for each player. + initial_strategies: Initial list of the strategies used by each player, if + any. Could be used to speed up the search by providing a good initial + solution. + iterations: Number of algorithmic steps to take before returning an answer. + gamma: Minimum exploratory probability term. + average_over_last_n_strategies: Running average window size for average + policy computation. If None, use the whole trajectory. + **unused_kwargs: Convenient way of exposing an API compatible with other + methods with possibly different arguments. + + Returns: + RM-computed strategies. + """ + number_players = len(payoff_tensors) + # Number of actions available to each player. + action_space_shapes = payoff_tensors[0].shape + + # If no initial starting position is given, start with uniform probabilities. + new_strategies = initial_strategies or [ + np.ones(action_space_shapes[k]) / action_space_shapes[k] + for k in range(number_players) + ] + + regrets = [ + np.ones(action_space_shapes[k]) / INITIAL_REGRET_DENOM + for k in range(number_players) + ] + + averager = nfg_utils.StrategyAverager(number_players, action_space_shapes, + average_over_last_n_strategies) + averager.append(new_strategies) + + for _ in range(iterations): + new_strategies = _regret_matching_step(payoff_tensors, new_strategies, + regrets, gamma) + averager.append(new_strategies) + return averager.average_strategies() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/regret_matching_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/regret_matching_test.py new file mode 100644 index 0000000..dcad780 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/regret_matching_test.py @@ -0,0 +1,86 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for open_spiel.python.algorithms.regret_matching.""" + +from absl.testing import absltest +import numpy as np + +from open_spiel.python.algorithms import regret_matching +from open_spiel.python.egt.utils import game_payoffs_array +import pyspiel + + +class RegretMatchingTest(absltest.TestCase): + + def test_two_players(self): + test_a = np.array([[2, 1, 0], [0, -1, -2]]) + test_b = np.array([[2, 1, 0], [0, -1, -2]]) + + strategies = regret_matching.regret_matching( + [test_a, test_b], + initial_strategies=None, + iterations=50000, + prd_gamma=1e-8, + average_over_last_n_strategies=10) + + self.assertLen(strategies, 2, "Wrong strategy length.") + self.assertGreater(strategies[0][0], 0.999, + "Regret matching failed in trivial case.") + + def test_three_players(self): + test_a = np.array([[[2, 1, 0], [1, 0, -1]], [[1, 0, -1], [0, -1, -2]]]) + test_b = np.array([[[2, 1, 0], [1, 0, -1]], [[1, 0, -1], [0, -1, -2]]]) + test_c = np.array([[[2, 1, 0], [1, 0, -1]], [[1, 0, -1], [0, -1, -2]]]) + + strategies = regret_matching.regret_matching( + [test_a, test_b, test_c], + initial_strategies=None, + iterations=50000, + gamma=1e-6, + average_over_last_n_strategies=10) + self.assertLen(strategies, 3, "Wrong strategy length.") + self.assertGreater(strategies[0][0], 0.999, + "Regret matching failed in trivial case.") + + def test_rps(self): + game = pyspiel.load_game("matrix_rps") + payoffs_array = game_payoffs_array(game) + strategies = regret_matching.regret_matching( + [payoffs_array[0], payoffs_array[1]], + initial_strategies=[ + np.array([0.1, 0.4, 0.5]), + np.array([0.9, 0.1, 0.01]) + ], + iterations=50000, + gamma=1e-6) + self.assertLen(strategies, 2, "Wrong strategy length.") + # places=1 corresponds to an absolute difference of < 0.001 + self.assertAlmostEqual(strategies[0][0], 1 / 3., places=2) + self.assertAlmostEqual(strategies[0][1], 1 / 3., places=2) + self.assertAlmostEqual(strategies[0][2], 1 / 3., places=2) + + def test_biased_rps(self): + game = pyspiel.load_game("matrix_brps") + payoffs_array = game_payoffs_array(game) + strategies = regret_matching.regret_matching( + [payoffs_array[0], payoffs_array[1]], iterations=50000, gamma=1e-8) + self.assertLen(strategies, 2, "Wrong strategy length.") + # places=1 corresponds to an absolute difference of < 0.01 + self.assertAlmostEqual(strategies[0][0], 1 / 16., places=1) + self.assertAlmostEqual(strategies[0][1], 10 / 16., places=1) + self.assertAlmostEqual(strategies[0][2], 5 / 16., places=1) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/response_graph_ucb.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/response_graph_ucb.py new file mode 100644 index 0000000..cbafefe --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/response_graph_ucb.py @@ -0,0 +1,648 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Implements ResponseGraphUCB algorithm from the below paper. + + "Multiagent Evaluation under Incomplete Information" (Rowland et al., 2019) + See https://arxiv.org/abs/1909.09849 for details. +""" + +import copy +import functools +import itertools +import operator +import random + +import matplotlib.pyplot as plt +import networkx as nx +import numpy as np +import scipy.stats + + +class ResponseGraphUCB(object): + """ResponseGraphUCB sampler class.""" + + def __init__(self, + game, + exploration_strategy='uniform-exhaustive', + confidence_method='ucb-standard', + delta=0.01, + ucb_eps=0, + per_payoff_confidence=True, + time_dependent_delta=False): + """Initializes ResponseGraphUCB instance. + + Assumes that all payoffs fall in the interval [0,1]. + + Args: + game: an instance of the BernoulliGameSampler class. + exploration_strategy: string specifying the exploration strategy. + confidence_method: string specifying the confidence method. + delta: float specifying the UCB delta parameter. + ucb_eps: float specifying the UCB epsilon parameter. + per_payoff_confidence: bool specifying whether confidence level applies + on a per-payoff basis, or to all payoffs simultaneously. + time_dependent_delta: bool specifying whether the confidence parameter + varies with the number of interactions so that a union bound holds. + """ + self.exploration_strategy = exploration_strategy + self.confidence_method = confidence_method + self.ucb_eps = ucb_eps + self.G = game # pylint: disable=invalid-name + self.per_payoff_confidence = per_payoff_confidence + self.time_dependent_delta = time_dependent_delta + if self.per_payoff_confidence: + self._delta = delta + else: + self._delta = delta / ( + self.G.n_players * + functools.reduce(operator.mul, self.G.strategy_spaces, 1)) + + # Compute the graph + self.V = list( # pylint: disable=invalid-name + itertools.product(*[range(smax) for smax in self.G.strategy_spaces])) + self.E = [] # pylint: disable=invalid-name + for v in self.V: + adj_strats = [ + list(range(v[k] + 1, self.G.strategy_spaces[k])) + for k in range(self.G.n_players) + ] + for k in range(self.G.n_players): + for new_s in adj_strats[k]: + second_vertex = list(v) + second_vertex[k] = new_s + second_vertex = tuple(second_vertex) + self.E.append((v, second_vertex)) + self.count_history = {v: [] for v in self.V} + self.total_interactions = 0 + + def delta(self, k, s): + """Returns the confidence parameter for a given player and profile.""" + if not self.time_dependent_delta: + return self._delta + else: + return self._delta * (6 / (np.pi**2 * self.count[k][s] **2)) + + def initialise_mean_and_count(self): + """Initializes means and counts for all response graph profiles.""" + self.mu = [ + np.zeros(tuple(self.G.strategy_spaces)) for _ in range(self.G.n_players) + ] + self.count = [ + np.zeros(tuple(self.G.strategy_spaces)) for _ in range(self.G.n_players) + ] + + def update_mean_and_count(self, strat_profile, game_outcome): + """Updates means and counts for strat_profile given game_outcome.""" + self.total_interactions += 1 + for k in range(self.G.n_players): + self.mu[k][strat_profile] *= self.count[k][strat_profile] + self.mu[k][strat_profile] += game_outcome[k] + self.count[k][strat_profile] += 1 + self.mu[k][strat_profile] /= self.count[k][strat_profile] + + for s in self.V: + self.count_history[s].append(self.count[0][s] / + float(self.total_interactions)) + + def _find_focal_coord(self, s1, s2): + num_deviations = tuple(s1[l] != s2[l] for l in range(len(s1))) + assert np.sum(num_deviations) == 1, ('Invalid profile pair s1, s2: ({},{}).' + 'Exactly one player should' + 'deviate!'.format(s1, s2)) + return np.argmax(num_deviations) + + def _initialise_queue_uniform(self): + self.remaining_edges = copy.deepcopy(self.E) + + def _add_to_queue_uniform(self, edges_removed): + """Adds edge to sampling queue using uniform sampling.""" + for e in edges_removed: + self.remaining_edges.remove(e) + self.profile_queue.append( + random.choice(random.choice(self.remaining_edges))) + + def _initialise_queue_uniform_exhaustive(self): + self.edge_order = copy.deepcopy(self.E) + random.shuffle(self.edge_order) + + def _add_to_queue_uniform_exhaustive(self, edges_removed): + """Adds edge to sampling queue using uniform-exhausitive sampling.""" + for e in edges_removed: + self.edge_order.remove(e) + self.profile_queue.append(random.choice(self.edge_order[0])) + + def _initialise_queue_valence_weighted(self): + self.vertex_valences = { + v: np.sum(self.G.strategy_spaces) - self.G.n_players for v in self.V + } + self.sum_valences = sum(self.vertex_valences.values()) + + def _add_to_queue_valence_weighted(self, edges_removed): + """Adds edge to sampling queue using valence-weighted sampling.""" + # Deal with removed edges + for e in edges_removed: + for s in e: + self.vertex_valences[s] -= 1 + self.sum_valences -= 1 + + # Calculate probabilities + probs = np.array([self.vertex_valences[v]**2 for v in self.V]) + probs = probs / np.sum(probs) + s_ix = np.random.choice(np.arange(len(self.V)), p=probs) + self.profile_queue.append(self.V[s_ix]) + + def _initialise_queue_count_weighted(self): + # Keep track of which vertices have non-zero valence in graph + self.vertex_valences = { + v: np.sum(self.G.strategy_spaces) - self.G.n_players for v in self.V + } + self.sum_valences = sum(self.vertex_valences.values()) + + def _add_to_queue_count_weighted(self, edges_removed): + """Adds edge to sampling queue using count-weighted sampling.""" + # Update vertex valences + for e in edges_removed: + for s in e: + self.vertex_valences[s] -= 1 + self.sum_valences -= 1 + # Check counts + eligible_vertices = { + v: self.count[0][v] for v in self.V if self.vertex_valences[v] != 0 + } + strat = min(eligible_vertices, key=eligible_vertices.get) + self.profile_queue.append(strat) + + def initialise_queue(self): + """Initializes sampling queue.""" + self.edges_remaining = copy.deepcopy(self.E) + if self.exploration_strategy == 'uniform': + self._initialise_queue_uniform() + elif self.exploration_strategy == 'uniform-exhaustive': + self._initialise_queue_uniform_exhaustive() + elif self.exploration_strategy == 'valence-weighted': + self._initialise_queue_valence_weighted() + elif self.exploration_strategy == 'count-weighted': + self._initialise_queue_count_weighted() + else: + raise ValueError('Did not recognise exploration strategy: {}'.format( + self.exploration_strategy)) + + self.profile_queue = [] + + def add_to_queue(self, removed): + """Update the sampling queue and the list of resolved edges. + + Args: + removed: the list of edges resolved in the previous round, which should be + removed from the sampling list in subsequent rounds. + """ + if self.exploration_strategy == 'uniform': + self._add_to_queue_uniform(removed) + elif self.exploration_strategy == 'uniform-exhaustive': + self._add_to_queue_uniform_exhaustive(removed) + elif self.exploration_strategy == 'valence-weighted': + self._add_to_queue_valence_weighted(removed) + elif self.exploration_strategy == 'count-weighted': + self._add_to_queue_count_weighted(removed) + else: + raise ValueError('Did not recognise exploration strategy: {}'.format( + self.exploration_strategy)) + + def evaluate_strategy_profile(self, yield_outcomes=False): + """Evaluates a strategy profile on the sampling queue. + + Specifically, this: + 1. Removes a strategy profile from the queue. + 2. Evaluates it. + 3. Updates internal statistics. + 4. Adjusts list of strategy profiles whose statistics have been updated + since last confidence bound check. + + Args: + yield_outcomes: set True to yield the outcomes as well. + + Yields: + s: profile evaluated. + game_outcome: outcomes (player payoffs) for profile s. + """ + if self.profile_queue: + s = self.profile_queue.pop(0) + if s not in self.active_strategy_profiles: + self.active_strategy_profiles.append(s) + game_outcome = self.G.observe_result(s) + if yield_outcomes: + yield s, game_outcome + self.update_mean_and_count(s, game_outcome) + + def _ucb_standard_factor(self, s, k): + return np.sqrt(np.log(2 / self.delta(k, s)) / (2 * self.count[k][s])) + + def _bernoulli_upper(self, p, n, delta): + """Returns upper confidence bound for proportion p successes of n trials. + + Uses exact Clopper-Pearson interval. + + Args: + p: proportion of successes. + n: number of trials. + delta: confidence parameter. + """ + if p > 1 - 1e-6: + return 1. + else: + upper = scipy.stats.beta.ppf(1. - delta / 2, p * n + 1, n - p * n) + return upper + + def _bernoulli_lower(self, p, n, delta): + """Returns lower confidence bound for proportion p successes of n trials. + + Uses exact Clopper-Pearson interval. + + Args: + p: proportion of successes. + n: number of trials. + delta: confidence parameter. + """ + if p < 1e-6: + return 0. + else: + lower = scipy.stats.beta.ppf(delta / 2, p * n, n - p * n + 1) + return lower + + def _ucb(self, s, k): + """Returns k-th player's payoff upper-confidence-bound given profile s.""" + if self.confidence_method == 'ucb-standard': + ucb_factor = self._ucb_standard_factor(s, k) + return self.mu[k][s] + ucb_factor + elif self.confidence_method == 'ucb-standard-relaxed': + ucb_factor = self._ucb_standard_factor(s, k) - self.ucb_eps + return self.mu[k][s] + ucb_factor + elif self.confidence_method == 'clopper-pearson-ucb': + return self._bernoulli_upper(self.mu[k][s], self.count[k][s], + self.delta(k, s)) + elif self.confidence_method == 'clopper-pearson-ucb-relaxed': + return self._bernoulli_upper(self.mu[k][s], self.count[k][s], + self.delta(k, s)) - self.ucb_eps + else: + raise ValueError('Did not recognise confidence method {}'.format( + self.confidence_method)) + + def _lcb(self, s, k): + """Returns k-th player's payoff lower-confidence-bound given profile s.""" + if self.confidence_method == 'ucb-standard': + ucb_factor = self._ucb_standard_factor(s, k) + return self.mu[k][s] - ucb_factor + elif self.confidence_method == 'ucb-standard-relaxed': + ucb_factor = self._ucb_standard_factor(s, k) + self.ucb_eps + return self.mu[k][s] - ucb_factor + elif self.confidence_method == 'clopper-pearson-ucb': + return self._bernoulli_lower(self.mu[k][s], self.count[k][s], + self.delta(k, s)) + elif self.confidence_method == 'clopper-pearson-ucb-relaxed': + return self._bernoulli_lower(self.mu[k][s], self.count[k][s], + self.delta(k, s)) + self.ucb_eps + else: + raise ValueError('Did not recognise confidence method {}'.format( + self.confidence_method)) + + def ucb_check(self, e): + """Conducts a UCB check on response graph edge e. + + Specifically, given edge e connecting two strategy profiles s1 and s2, this: + 1. Determines the dominating strategy. + 2. Checks whether the payoff_UCB(worse_strategy) is less than + the payoff_LCB of the better strategy; if this is true, the confidence + intervals are disjoint, and the edge e is considered 'resolved'. + + Args: + e: response graph edge. + + Returns: + A bool indicating whether the edge is resolved, + and also a tuple specifying the worse and better strategies. + """ + + s1, s2 = e + k = self._find_focal_coord(s1, s2) + if self.mu[k][s1] > self.mu[k][s2]: + better_strat = s1 + worse_strat = s2 + else: + better_strat = s2 + worse_strat = s1 + + ucb = self._ucb(worse_strat, k) + lcb = self._lcb(better_strat, k) + + return (ucb < lcb), (worse_strat, better_strat) + + def check_confidence(self): + """Returns the edges that are 'resolved' given a confidence bound check.""" + edges_to_check = [] + + for e in self.edges_remaining: + for s in self.active_strategy_profiles: + if s in e: + if e not in edges_to_check: + edges_to_check.append(e) + + edges_removed = [] + for e in edges_to_check: + removed, ordered_edge = self.ucb_check(e) + if removed: + edges_removed.append(e) + self.edges_remaining.remove(e) + self.directed_edges.append(ordered_edge) + + self.active_strategy_profiles = [] + + return edges_removed + + def real_edge_direction(self, e): + s1, s2 = e + k = self._find_focal_coord(s1, s2) + if self.G.means[k][s1] > self.G.means[k][s2]: + return (s2, s1) + else: + return (s1, s2) + + def construct_real_graph(self): + directed_edges = [] + for e in self.E: + ordered_edge = self.real_edge_direction(e) + directed_edges.append(ordered_edge) + + return self._construct_digraph(directed_edges) + + def compute_graph(self): + for e in self.E: + s1, s2 = e[0], e[1] + k = self._find_focal_coord(s1, s2) + if self.mu[k][s1] > self.mu[k][s2]: + directed_edge = (s2, s1) + else: + directed_edge = (s1, s2) + if directed_edge not in self.directed_edges: + self.directed_edges.append(directed_edge) + + def forced_exploration(self): + for v in self.V: + game_outcome = self.G.observe_result(v) + self.update_mean_and_count(v, game_outcome) + + def run(self, verbose=True, max_total_iterations=50000): + """Runs the ResponseGraphUCB algorithm.""" + self.verbose = verbose + + # Upper bounds on number of evaluations + self.max_total_iterations = max_total_iterations + + self.initialise_mean_and_count() + self.directed_edges = [] + self.active_strategy_profiles = [] + self.initialise_queue() + + # Forced initial exploration + self.forced_exploration() + + # Keep evaluating nodes until check method declares that we're finished + iterations = 0 + edges_resolved_this_round = [] + while self.total_interactions < max_total_iterations: + # Add nodes to queue + self.add_to_queue(removed=edges_resolved_this_round) + + # Evaluate the nodes and log results + for v, _ in self.evaluate_strategy_profile(): + if verbose: + print(v) + + # Recompute confidence bounds, eliminate, stop etc. + edges_resolved_this_round = self.check_confidence() + + if not self.edges_remaining: + break + iterations += 1 + + # Fill in missing edges if max iters reached without resolving all edges + self.compute_graph() + + # Compute objects to be returned + if verbose: + total_steps = self.compute_total_steps() + print('\nTotal steps taken = {}'.format(total_steps)) + results = {} + results['interactions'] = int(np.sum(self.count[0])) + graph = self._construct_digraph(self.directed_edges) + results['graph'] = graph + return results + + def compute_total_steps(self): + return int(np.sum(self.count[0])) + + def _construct_digraph(self, edges): + graph = nx.DiGraph() + graph.add_nodes_from(self.V) + for e in edges: + graph.add_edge(e[0], e[1]) + return graph + + def _plot_errorbars_2x2x2(self, x, y, xerr, yerr, fmt): + """Plots ResponseGraph with error bars for a 2-player 2x2 game.""" + + # plt.errorbar does not accept list of colors, so plot twice + for i_strat in [0, 1]: + if xerr[i_strat] is None: + plt.errorbar( + x=x[i_strat], + y=y[i_strat], + yerr=np.reshape(yerr[:, i_strat], (2, 1)), + markerfacecolor='b', + ecolor='b', + fmt=fmt, + zorder=0) + elif yerr[i_strat] is None: + plt.errorbar( + x=x[i_strat], + y=y[i_strat], + xerr=np.reshape(xerr[:, i_strat], (2, 1)), + markerfacecolor='b', + ecolor='b', + fmt=fmt, + zorder=0) + else: + raise ValueError() + + def visualise_2x2x2(self, real_values, graph): + """Plots summary of ResponseGraphUCB for a 2-player 2x2 game.""" + _, axes = plt.subplots(3, 3, figsize=(10, 10), + gridspec_kw={'width_ratios': [1, 2, 1], + 'height_ratios': [1, 2, 1]}) + axes[0, 0].axis('off') + axes[0, 2].axis('off') + axes[2, 0].axis('off') + axes[2, 2].axis('off') + + # (0,0) vs. (0,1) + plt.sca(axes[0, 1]) + s1 = (0, 0) + s2 = (0, 1) + self._plot_errorbars_2x2x2( + x=[0, 1], + y=[self.mu[1][s1], self.mu[1][s2]], + xerr=[None, None], + yerr=np.array([[self.mu[1][s1] - self._lcb(s1, 1), + self.mu[1][s2] - self._lcb(s2, 1)], + [self._ucb(s1, 1) - self.mu[1][s1], + self._ucb(s2, 1) - self.mu[1][s2]]]), + fmt='o') + plt.scatter([0, 1], [real_values[1, 0, 0], real_values[1, 0, 1]], + color='red', + zorder=1) + plt.tick_params(axis='both', which='major', labelsize=14) + plt.tick_params(axis='both', which='minor', labelsize=14) + plt.xticks([]) + plt.yticks([0, 0.5, 1]) + plt.gca().set_yticklabels(['0', '', '1']) + plt.gca().yaxis.set_ticks_position('left') + plt.gca().grid(True) + plt.ylim(0, 1) + + # (0,0) vs. (1,0) + plt.sca(axes[1, 0]) + s1 = (1, 0) + s2 = (0, 0) + self._plot_errorbars_2x2x2( + x=[self.mu[0][s1], self.mu[0][s2]], + y=[0, 1], + xerr=np.array([[self.mu[0][s1] - self._lcb(s1, 0), + self.mu[0][s2] - self._lcb(s2, 0)], + [self._ucb(s1, 0) - self.mu[0][s1], + self._ucb(s2, 0) - self.mu[0][s2]]]), + yerr=[None, None], + fmt='o') + plt.scatter([real_values[0, 1, 0], real_values[0, 0, 0]], [0, 1], + color='red', + zorder=1) + plt.tick_params(axis='both', which='major', labelsize=14) + plt.tick_params(axis='both', which='minor', labelsize=14) + plt.xticks([0, 0.5, 1]) + plt.gca().set_xticklabels(['0', '', '1']) + plt.gca().xaxis.set_ticks_position('bottom') + plt.gca().grid(True) + plt.yticks([]) + plt.xlim(0, 1) + + # (0,1) vs. (1,1) + plt.sca(axes[1, 2]) + s1 = (1, 1) + s2 = (0, 1) + self._plot_errorbars_2x2x2( + x=[self.mu[0][s1], self.mu[0][s2]], + y=[0, 1], + xerr=np.array([[self.mu[0][s1] - self._lcb(s1, 0), + self.mu[0][s2] - self._lcb(s2, 0)], + [self._ucb(s1, 0) - self.mu[0][s1], + self._ucb(s2, 0) - self.mu[0][s2]]]), + yerr=[None, None], + fmt='o') + plt.scatter([real_values[0, 1, 1], real_values[0, 0, 1]], [0, 1], + color='red', + zorder=1) + plt.tick_params(axis='both', which='major', labelsize=14) + plt.tick_params(axis='both', which='minor', labelsize=14) + plt.xticks([0, 0.5, 1]) + plt.gca().set_xticklabels(['0', '', '1']) + plt.gca().xaxis.set_ticks_position('top') + plt.yticks([]) + plt.gca().grid(True) + plt.xlim(0, 1) + + # (1,0) vs. (1,1) + plt.sca(axes[2, 1]) + s1 = (1, 0) + s2 = (1, 1) + self._plot_errorbars_2x2x2( + x=[0, 1], + y=[self.mu[1][s1], self.mu[1][s2]], + xerr=[None, None], + yerr=np.array([[self.mu[1][s1] - self._lcb(s1, 1), + self.mu[1][s2] - self._lcb(s2, 1)], + [self._ucb(s1, 1) - self.mu[1][s1], + self._ucb(s2, 1) - self.mu[1][s2]]]), + fmt='o') + plt.scatter([0, 1], [real_values[1, 1, 0], real_values[1, 1, 1]], + color='red', + zorder=1) + plt.tick_params(axis='both', which='major', labelsize=14) + plt.tick_params(axis='both', which='minor', labelsize=14) + plt.xticks([]) + plt.yticks([0, 0.5, 1]) + plt.gca().set_yticklabels(['0', '', '1']) + plt.gca().yaxis.set_ticks_position('right') + plt.gca().grid(True) + plt.ylim(0, 1) + self.plot_graph(graph, subplot=True, axes=axes) # Chart in the middle + + def plot_graph(self, graph, subplot=False, axes=None): + """Plots the response graph.""" + if subplot: + plt.sca(axes[1, 1]) + axes[1, 1].axis('off') + else: + plt.figure(figsize=(5, 5)) + if len(graph.nodes) == 4: + pos = {(0, 0): [0, 1], (0, 1): [1, 1], (1, 0): [0, 0], (1, 1): [1, 0]} + else: + pos = nx.circular_layout(graph) + nx.draw_networkx_nodes( + graph, pos, node_size=1800, node_color='w', edgecolors='k') + nx.draw_networkx_edges( + graph, + pos, + node_size=1800, + edge_color='k', + arrowstyle='->', + arrowsize=10, + width=3) + nx.draw_networkx_labels(self.G, pos, {x: x for x in self.V}, font_size=14) + + def visualise_count_history(self, figsize=(5, 2)): + """Plots the sampling count history for each strategy profile.""" + plt.figure(figsize=figsize) + data = [] + labels = [] + for v in self.V: + print(v) + labels.append(v) + data.append(self.count_history[v]) + pal = plt.get_cmap('Dark2').colors + plt.stackplot( + np.arange(1, self.total_interactions + 1), + np.array(data), + labels=labels, + colors=pal) + plt.ylim(top=1, bottom=0) + plt.xlabel('Interactions') + plt.ylabel('Proportions') + + # Shrink current axis + ax = plt.gca() + ax.xaxis.set_ticks_position('bottom') + ax.yaxis.set_ticks_position('left') + box = ax.get_position() + ax.set_position([box.x0, box.y0, box.width * 0.67, box.height]) + plt.xlim(1, self.total_interactions) + plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=1) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/response_graph_ucb_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/response_graph_ucb_test.py new file mode 100644 index 0000000..9fa2a7f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/response_graph_ucb_test.py @@ -0,0 +1,78 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.response_graph_ucb.""" + +import itertools + +from absl.testing import absltest + +# pylint: disable=g-import-not-at-top +import matplotlib +matplotlib.use('agg') # switch backend for testing + +import numpy as np + +from open_spiel.python.algorithms import response_graph_ucb +from open_spiel.python.algorithms import response_graph_ucb_utils + + +class ResponseGraphUcbTest(absltest.TestCase): + + def get_example_2x2_payoffs(self): + mean_payoffs = np.random.uniform(-1, 1, size=(2, 2, 2)) + mean_payoffs[0, :, :] = np.asarray([[0.5, 0.85], [0.15, 0.5]]) + mean_payoffs[1, :, :] = 1 - mean_payoffs[0, :, :] + return mean_payoffs + + def test_sampler(self): + mean_payoffs = self.get_example_2x2_payoffs() + game = response_graph_ucb_utils.BernoulliGameSampler( + [2, 2], mean_payoffs, payoff_bounds=[-1., 1.]) + game.p_max = mean_payoffs + game.means = mean_payoffs + + # Parameters to run + sampling_methods = [ + 'uniform-exhaustive', 'uniform', 'valence-weighted', 'count-weighted' + ] + conf_methods = [ + 'ucb-standard', 'ucb-standard-relaxed', 'clopper-pearson-ucb', + 'clopper-pearson-ucb-relaxed' + ] + per_payoff_confidence = [True, False] + time_dependent_delta = [True, False] + + methods = list(itertools.product(sampling_methods, + conf_methods, + per_payoff_confidence, + time_dependent_delta)) + max_total_interactions = 50 + + for m in methods: + r_ucb = response_graph_ucb.ResponseGraphUCB( + game, + exploration_strategy=m[0], + confidence_method=m[1], + delta=0.1, + ucb_eps=1e-1, + per_payoff_confidence=m[2], + time_dependent_delta=m[3]) + _ = r_ucb.run(max_total_iterations=max_total_interactions) + + def test_soccer_data_import(self): + response_graph_ucb_utils.get_soccer_data() + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/response_graph_ucb_utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/response_graph_ucb_utils.py new file mode 100644 index 0000000..ee76980 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/response_graph_ucb_utils.py @@ -0,0 +1,261 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utility functions for ResponseGraphUCB.""" + +import itertools + +import matplotlib.pyplot as plt +import numpy as np +import scipy.stats + +from open_spiel.python.algorithms import fictitious_play +from open_spiel.python.egt import utils as egt_utils +import pyspiel +from open_spiel.python.utils import file_utils + + +def get_method_tuple_acronym(method_tuple): + """Returns pretty acronym for specified ResponseGraphUCB method tuple.""" + if isinstance(method_tuple, tuple): + acronyms = [get_method_acronym(m) for m in method_tuple] + return ', '.join(acronyms) + else: + return get_method_acronym(method_tuple) + + +def get_method_tuple_linespecs(method): + """Gets plot linespecs for the specified ResponseGraphUCB method.""" + sampling_strats = [ + 'uniform-exhaustive', 'uniform', 'valence-weighted', 'count-weighted' + ] + conf_methods = ['ucb-standard', 'clopper-pearson-ucb'] + method_to_id_map = dict( + (m, i) + for i, m in enumerate(itertools.product(sampling_strats, conf_methods))) + + # Create palette + num_colors = len(method_to_id_map.keys()) + colors = plt.get_cmap('Set1', num_colors).colors + + # Spec out the linestyle + base_method = (method[0], method[1].replace('-relaxed', '') + ) # Method name without -relaxed suffix + linespecs = { + 'color': colors[method_to_id_map[base_method]] + } # Use base method for color (ignoring relaxed vs non-relaxed) + if 'relaxed' in method[1]: # Use actual method for linestyle + linespecs['linestyle'] = 'dashed' + else: + linespecs['linestyle'] = 'solid' + + return linespecs + + +def get_method_acronym(method): + """Gets pretty acronym for specified ResponseGraphUCB method.""" + if method == 'uniform-exhaustive': + return r'$\mathcal{S}$: UE' + elif method == 'uniform': + return r'$\mathcal{S}$: U' + elif method == 'valence-weighted': + return r'$\mathcal{S}$: VW' + elif method == 'count-weighted': + return r'$\mathcal{S}$: CW' + elif method == 'ucb-standard': + return r'$\mathcal{C}(\delta)$: UCB' + elif method == 'ucb-standard-relaxed': + return r'$\mathcal{C}(\delta)$: R-UCB' + elif method == 'clopper-pearson-ucb': + return r'$\mathcal{C}(\delta)$: CP-UCB' + elif method == 'clopper-pearson-ucb-relaxed': + return r'$\mathcal{C}(\delta)$: R-CP-UCB' + elif method == 'fixedbudget-uniform': + return r'$\mathcal{S}$: U, $\mathcal{C}(\delta)$: FB' + else: + raise ValueError('Unknown sampler method: {}!'.format(method)) + + +def digraph_edge_hamming_dist(g1, g2): + """Returns number of directed edge mismatches between digraphs g1 and g2.""" + dist = 0 + for e1 in g1.edges: + if e1 not in g2.edges: + dist += 1 + return dist + + +class BernoulliGameSampler(object): + """A sampler for a game with Bernoulli-distributed payoffs.""" + + def __init__(self, strategy_spaces, means, payoff_bounds): + """Initializes the Bernoulli game sampler. + + Payoffs are automatically scaled to lie between 0 and 1. + + Args: + strategy_spaces: a list of sizes of player strategy spaces. + means: 1+num_players dimensional array of mean payoffs. + payoff_bounds: min/max observable value of payoffs, necessary since one + may seek Bernoulli-sampling for games with different payoff ranges. + """ + self.strategy_spaces = strategy_spaces + self.n_players = len(strategy_spaces) + self.raw_means = means + self.payoff_bounds = payoff_bounds + self.means = self.rescale_payoff(means) + + # Specific to the Bernoulli case. Probas in [0,1], proportional to payoffs + self.p_max = self.means + + def rescale_payoff(self, payoff): + """Rescales payoffs to be in [0,1].""" + # Assumes payoffs are bounded between [-payoff_bound, payoff_bound] + return (payoff - self.payoff_bounds[0]) / ( + self.payoff_bounds[1] - self.payoff_bounds[0]) + + def observe_result(self, strat_profile): + """Returns empirical payoffs for each agent.""" + outcomes = np.zeros(self.n_players) + for k in range(self.n_players): + # compute Bernoulli probabilities + outcomes[k] = np.random.choice( + [1, 0], + p=[self.p_max[k][strat_profile], 1. - self.p_max[k][strat_profile]]) + return outcomes + + +class ZeroSumBernoulliGameSampler(BernoulliGameSampler): + """A sampler for a zero-sum game with Bernoulli-distributed payoffs.""" + + def __init__(self, strategy_spaces, means, payoff_bounds): + super(ZeroSumBernoulliGameSampler, self).__init__(strategy_spaces, means, + payoff_bounds) + # Verify the game is zero-sum + assert np.allclose(np.sum(self.means, axis=0), 1.) + + def observe_result(self, strat_profile): + outcomes = np.zeros(self.n_players) + win_ix = np.random.choice( + self.n_players, p=self.means[(slice(None),) + strat_profile]) + outcomes[win_ix] = 1. + return outcomes + + +def get_payoffs_bernoulli_game(size=(2, 2, 2)): + """Gets randomly-generated zero-sum symmetric two-player game.""" + too_close = True + while too_close: + M = np.random.uniform(-1, 1, size=size) # pylint: disable=invalid-name + M[0, :, :] = 0.5 * (M[0, :, :] - M[0, :, :].T) + M[1, :, :] = -M[0, :, :] + if np.abs(M[0, 0, 1]) < 0.1: + too_close = True + else: + too_close = False + return M + + +def get_soccer_data(): + """Returns the payoffs and strategy labels for MuJoCo soccer experiments.""" + payoff_file = file_utils.find_file( + 'open_spiel/data/paper_data/response_graph_ucb/soccer.txt', 2) + payoffs = np.loadtxt(payoff_file) + return payoffs + + +def get_kuhn_poker_data(num_players=4, iterations=3): + """Returns the kuhn poker data for the number of players specified.""" + game = pyspiel.load_game('kuhn_poker', {'players': num_players}) + xfp_solver = fictitious_play.XFPSolver(game, save_oracles=True) + for _ in range(iterations): + xfp_solver.iteration() + + # Results are seed-dependent, so show some interesting cases + if num_players == 2: + meta_games = xfp_solver.get_empirical_metagame(100, seed=1) + elif num_players == 3: + meta_games = xfp_solver.get_empirical_metagame(100, seed=5) + elif num_players == 4: + meta_games = xfp_solver.get_empirical_metagame(100, seed=2) + + # Metagame utility matrices for each player + payoff_tables = [] + for i in range(num_players): + payoff_tables.append(meta_games[i]) + return payoff_tables + + +def get_game_for_sampler(game_name): + """Returns pre-processed game data for ResponseGraphUCB examples.""" + # pylint: disable=invalid-name + if game_name == 'bernoulli': + M = get_payoffs_bernoulli_game() + strategy_spaces = [2, 2] + G = ZeroSumBernoulliGameSampler( + strategy_spaces, means=M, payoff_bounds=[-1., 1.]) + elif game_name == 'soccer': + M = get_soccer_data() + M = M * 2. - 1 # Convert to zero-sum + strategy_spaces = np.shape(M) + M = np.asarray([M, M.T]) + G = ZeroSumBernoulliGameSampler(strategy_spaces, means=M, + payoff_bounds=[np.min(M), np.max(M)]) + elif game_name in ['kuhn_poker_2p', 'kuhn_poker_3p', 'kuhn_poker_4p']: + if '2p' in game_name: + num_players = 2 + elif '3p' in game_name: + num_players = 3 + elif '4p' in game_name: + num_players = 4 + M = get_kuhn_poker_data(num_players, iterations=2) # pylint: disable=invalid-name + strategy_spaces = egt_utils.get_num_strats_per_population(M, False) + G = BernoulliGameSampler( + strategy_spaces, means=M, payoff_bounds=[np.min(M), np.max(M)]) + else: + raise ValueError('Game', game_name, 'not implemented!') + # pylint: enable=invalid-name + return G + + +def plot_timeseries(ax, id_ax, data, xticks, xlabel='', ylabel='', + label='', logx=False, logy=False, zorder=10, + linespecs=None): + """Plots timeseries data with error bars.""" + if logx: + ax[id_ax].set_xscale('log') + if logy: + ax[id_ax].set_yscale('log') + if linespecs: + kwargs = {'color': linespecs['color']} + else: + kwargs = {} + + # Seaborn's bootstrapped confidence intervals were used in the original paper + se = scipy.stats.sem(data, axis=0) + ax[id_ax].fill_between(xticks, data.mean(0)-se, data.mean(0)+se, + zorder=zorder, alpha=0.2, **kwargs) + ax[id_ax].plot(xticks, data.mean(0), label=label, zorder=zorder, **kwargs) + + # There may be multiple lines on the current axis, some from previous calls to + # plot_timeseries, so reference just the latest + if linespecs: + ax[id_ax].get_lines()[-1].set_dashes([5, 5]) + ax[id_ax].get_lines()[-1].set_linestyle(linespecs['linestyle']) + + ax[id_ax].set(xlabel=xlabel, ylabel=ylabel) + ax[id_ax].set_axisbelow(True) + ax[id_ax].grid(True) + for _, spine in ax[id_ax].spines.items(): + spine.set_zorder(-1) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sample_some_states.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sample_some_states.py new file mode 100644 index 0000000..f25f59b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sample_some_states.py @@ -0,0 +1,89 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example algorithm to sample some states from a game.""" + +import random +import pyspiel + + +def sample_some_states( + game, + max_states=100, + make_distribution_fn=lambda states: [1 / len(states)] * len(states)): + """Samples some states in the game. + + This can be run for large games, in contrast to `get_all_states`. It is useful + for tests that need to check a predicate only on a subset of the game, since + generating the whole game is infeasible. + + Currently only works for sequential games. For simultaneous games and mean + field games it returns only the initial state. + + The algorithm maintains a list of states and repeatedly picks a random state + from the list to expand until enough states have been sampled. + + Arguments: + game: The game to analyze, as returned by `load_game`. + max_states: The maximum number of states to return. Negative means no limit. + make_distribution_fn: Function that takes a list of states and returns a + corresponding distribution (as a list of floats). Only used for mean field + games. + + Returns: + A `list` of `pyspiel.State`. + """ + if game.get_type().dynamics in [ + pyspiel.GameType.Dynamics.SIMULTANEOUS, + pyspiel.GameType.Dynamics.MEAN_FIELD + ]: + return [game.new_initial_state()] + states = [] + unexplored_actions = [] + indexes_with_unexplored_actions = set() + + def add_state(state): + states.append(state) + if state.is_terminal(): + unexplored_actions.append(None) + else: + indexes_with_unexplored_actions.add(len(states) - 1) + unexplored_actions.append(set(state.legal_actions())) + + def expand_random_state(): + index = random.choice(list(indexes_with_unexplored_actions)) + state = states[index] + if state.is_mean_field_node(): + child = state.clone() + child.update_distribution( + make_distribution_fn(child.distribution_support())) + indexes_with_unexplored_actions.remove(index) + return child + else: + actions = unexplored_actions[index] + assert actions, f"Empty actions for state {state}" + action = random.choice(list(actions)) + actions.remove(action) + if not actions: + indexes_with_unexplored_actions.remove(index) + return state.child(action) + + add_state(game.new_initial_state()) + while (len(states) < max_states) and indexes_with_unexplored_actions: + add_state(expand_random_state()) + + if not states: + raise ValueError("get_some_states sampled 0 states!") + + return states diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sample_some_states_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sample_some_states_test.py new file mode 100644 index 0000000..00b078b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sample_some_states_test.py @@ -0,0 +1,51 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.sample_some_states.""" + +from absl.testing import absltest + +from open_spiel.python.algorithms import sample_some_states +import pyspiel + + +class SampleSomeStatesTest(absltest.TestCase): + + def test_sampling_in_simple_games(self): + matrix_mp_num_states = 1 + 2 + 4 + game = pyspiel.load_game_as_turn_based("matrix_mp") + for n in range(1, matrix_mp_num_states + 1): + states = sample_some_states.sample_some_states(game, max_states=n) + self.assertLen(states, n) + + states = sample_some_states.sample_some_states(game, max_states=1) + self.assertLen(states, 1) + + states = sample_some_states.sample_some_states( + game, max_states=matrix_mp_num_states + 1) + self.assertLen(states, matrix_mp_num_states) + + coordinated_mp_num_states = 1 + 2 + 4 + 8 + game = pyspiel.load_game_as_turn_based("coordinated_mp") + for n in range(1, coordinated_mp_num_states + 1): + states = sample_some_states.sample_some_states(game, max_states=n) + self.assertLen(states, n) + + states = sample_some_states.sample_some_states( + game, max_states=coordinated_mp_num_states + 1) + self.assertLen(states, coordinated_mp_num_states) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sequence_form_lp.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sequence_form_lp.py new file mode 100644 index 0000000..76efe2e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sequence_form_lp.py @@ -0,0 +1,257 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An implementation of sequence-form linear programming. + +This is a classic algorithm for solving two-player zero-sum games with imperfect +information. For a general introduction to the concepts, see Sec 5.2.3 of +Shoham & Leyton-Brown '09, Multiagent Systems: Algorithmic, Game-Theoretic, and +Logical Foundations http://www.masfoundations.org/mas.pdf. + +In this implementation, we follow closely the construction in Koller, Megiddo, +and von Stengel, Fast Algorithms for Finding Randomized Strategies in Game Trees +http://theory.stanford.edu/~megiddo/pdf/stoc94.pdf. Specifically, we construct +and solve equations (8) and (9) from this paper. +""" + +from open_spiel.python import policy +from open_spiel.python.algorithms import lp_solver +import pyspiel + +_DELIMITER = " -=- " +_EMPTY_INFOSET_KEYS = ["***EMPTY_INFOSET_P0***", "***EMPTY_INFOSET_P1***"] +_EMPTY_INFOSET_ACTION_KEYS = [ + "***EMPTY_INFOSET_ACTION_P0***", "***EMPTY_INFOSET_ACTION_P1***" +] + + +def _construct_lps(state, infosets, infoset_actions, infoset_action_maps, + chance_reach, lps, parent_is_keys, parent_isa_keys): + """Build the linear programs recursively from this state. + + Args: + state: an open spiel state (root of the game tree) + infosets: a list of dicts, one per player, that maps infostate to an id. The + dicts are filled by this function and should initially only contain root + values. + infoset_actions: a list of dicts, one per player, that maps a string of + (infostate, action) pair to an id. The dicts are filled by this function + and should inirially only contain the root values + infoset_action_maps: a list of dicts, one per player, that maps each + info_state to a list of (infostate, action) string + chance_reach: the contribution of chance's reach probability (should start + at 1). + lps: a list of linear programs, one per player. The first one will be + constructred as in Eq (8) of Koller, Megiddo and von Stengel. The second + lp is Eq (9). Initially these should contain only the root-level + constraints and variables. + parent_is_keys: a list of parent information state keys for this state + parent_isa_keys: a list of parent (infostate, action) keys + """ + if state.is_terminal(): + returns = state.returns() + # Left-most term of: -Ay + E^t p >= 0 + lps[0].add_or_reuse_constraint(parent_isa_keys[0], lp_solver.CONS_TYPE_GEQ) + lps[0].add_to_cons_coeff(parent_isa_keys[0], parent_isa_keys[1], + -1.0 * returns[0] * chance_reach) + # Right-most term of: -Ay + E^t p >= 0 + lps[0].set_cons_coeff(parent_isa_keys[0], parent_is_keys[0], 1.0) + # Left-most term of: x^t (-A) - q^t F <= 0 + lps[1].add_or_reuse_constraint(parent_isa_keys[1], lp_solver.CONS_TYPE_LEQ) + lps[1].add_to_cons_coeff(parent_isa_keys[1], parent_isa_keys[0], + -1.0 * returns[0] * chance_reach) + # Right-most term of: x^t (-A) - q^t F <= 0 + lps[1].set_cons_coeff(parent_isa_keys[1], parent_is_keys[1], -1.0) + return + + if state.is_chance_node(): + for action, prob in state.chance_outcomes(): + new_state = state.child(action) + _construct_lps(new_state, infosets, infoset_actions, infoset_action_maps, + prob * chance_reach, lps, parent_is_keys, parent_isa_keys) + return + + player = state.current_player() + info_state = state.information_state_string(player) + legal_actions = state.legal_actions(player) + + # p and q variables, inequality constraints, and part of equality constraints + if player == 0: + # p + lps[0].add_or_reuse_variable(info_state) + # -Ay + E^t p >= 0 + lps[0].add_or_reuse_constraint(parent_isa_keys[0], lp_solver.CONS_TYPE_GEQ) + lps[0].set_cons_coeff(parent_isa_keys[0], parent_is_keys[0], 1.0) + lps[0].set_cons_coeff(parent_isa_keys[0], info_state, -1.0) + # x^t E^t = e^t + lps[1].add_or_reuse_constraint(info_state, lp_solver.CONS_TYPE_EQ) + lps[1].set_cons_coeff(info_state, parent_isa_keys[0], -1.0) + else: + # q + lps[1].add_or_reuse_variable(info_state) + # x^t (-A) - q^t F <= 0 + lps[1].add_or_reuse_constraint(parent_isa_keys[1], lp_solver.CONS_TYPE_LEQ) + lps[1].set_cons_coeff(parent_isa_keys[1], parent_is_keys[1], -1.0) + lps[1].set_cons_coeff(parent_isa_keys[1], info_state, 1.0) + # -Fy = -f + lps[0].add_or_reuse_constraint(info_state, lp_solver.CONS_TYPE_EQ) + lps[0].set_cons_coeff(info_state, parent_isa_keys[1], -1.0) + + # Add to the infostate maps + if info_state not in infosets[player]: + infosets[player][info_state] = len(infosets[player]) + if info_state not in infoset_action_maps[player]: + infoset_action_maps[player][info_state] = [] + + new_parent_is_keys = parent_is_keys[:] + new_parent_is_keys[player] = info_state + + for action in legal_actions: + isa_key = info_state + _DELIMITER + str(action) + if isa_key not in infoset_actions[player]: + infoset_actions[player][isa_key] = len(infoset_actions[player]) + if isa_key not in infoset_action_maps[player][info_state]: + infoset_action_maps[player][info_state].append(isa_key) + + # x and y variables, and finish equality constraints coeff + if player == 0: + lps[1].add_or_reuse_variable(isa_key, lb=0) # x + lps[1].set_cons_coeff(info_state, isa_key, 1.0) # x^t E^t = e^t + else: + lps[0].add_or_reuse_variable(isa_key, lb=0) # y + lps[0].set_cons_coeff(info_state, isa_key, 1.0) # -Fy = -f + + new_parent_isa_keys = parent_isa_keys[:] + new_parent_isa_keys[player] = isa_key + + new_state = state.child(action) + _construct_lps(new_state, infosets, infoset_actions, infoset_action_maps, + chance_reach, lps, new_parent_is_keys, new_parent_isa_keys) + + +def solve_zero_sum_game(game, solver=None): + """Solve the two-player zero-sum game using sequence-form LPs. + + Args: + game: the spiel game tp solve (must be zero-sum, sequential, and have chance + mode of deterministic or explicit stochastic). + solver: a specific solver to use, sent to cvxopt (i.e. 'lapack', 'blas', + 'glpk'). A value of None uses cvxopt's default solver. + + Returns: + A 4-tuple containing: + - player 0 value + - player 1 value + - player 0 policy: a policy.TabularPolicy for player 0 + - player 1 policy: a policy.TabularPolicy for player 1 + """ + assert game.num_players() == 2 + assert game.get_type().utility == pyspiel.GameType.Utility.ZERO_SUM + assert game.get_type().dynamics == pyspiel.GameType.Dynamics.SEQUENTIAL + assert ( + game.get_type().chance_mode == pyspiel.GameType.ChanceMode.DETERMINISTIC + or game.get_type().chance_mode == + pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC) + # There are several import matrices and vectors that form the LPs that + # are built by this function: + # + # A is expected payoff to p1 of each (infoset0,action0) + (infoset1,action1) + # belong to p1 and p2 respectively, which lead to a terminal state. It has + # dimensions (infoset-actions0) x (infoset-actions1) + # E,F are p1 / p2's strategy matrices (infosets) x (infoset-actions) + # e,f are infosets+ x 1 column vector of (1 0 0 ... 0) + # p,q are unconstrained variables each with infosets x 1. + # x,y are realization plans of size infoset-actions + # + # In each of the computations above there is a special "root infoset" and + # "root infoset-action" denote \emptyset. So the values are actually equal to + # number of infosets + 1 and infoset-actions + 1. + # + # Equation (8) is min_{y,p} e^T p + # + # s.t. -Ay + E^t p >= 0 + # -Fy = -f + # y >= 0 + # + # Equation (9) is max_{x,q} -q^T f + # + # s.t. x^t(-A) - q^t F <= 0 + # x^t E^t = e^t + # x >= 0 + # + # So, the first LP has: + # - |y| + |p| variables (infoset-actions1 + infosets0) + # - infoset-actions0 inequality constraints (other than var lower-bounds) + # - infosets1 equality constraints + # + # And the second LP has: + # - |x| + |q| variables (infoset-actions0 + infosets1) + # - infoset-actions1 inequality constraints (other than var lower-bounds) + # - infosets0 equality constraints + infosets = [{_EMPTY_INFOSET_KEYS[0]: 0}, {_EMPTY_INFOSET_KEYS[1]: 0}] + infoset_actions = [{ + _EMPTY_INFOSET_ACTION_KEYS[0]: 0 + }, { + _EMPTY_INFOSET_ACTION_KEYS[1]: 0 + }] + infoset_action_maps = [{}, {}] + lps = [ + lp_solver.LinearProgram(lp_solver.OBJ_MIN), # Eq. (8) + lp_solver.LinearProgram(lp_solver.OBJ_MAX) # Eq. (9) + ] + # Root-level variables and constraints. + lps[0].add_or_reuse_variable(_EMPTY_INFOSET_ACTION_KEYS[1], lb=0) # y root + lps[0].add_or_reuse_variable(_EMPTY_INFOSET_KEYS[0]) # p root + lps[1].add_or_reuse_variable(_EMPTY_INFOSET_ACTION_KEYS[0], lb=0) # x root + lps[1].add_or_reuse_variable(_EMPTY_INFOSET_KEYS[1]) # q root + # objective coefficients + lps[0].set_obj_coeff(_EMPTY_INFOSET_KEYS[0], 1.0) # e^t p + lps[1].set_obj_coeff(_EMPTY_INFOSET_KEYS[1], -1.0) # -q^t f + # y_root = 1 (-Fy = -f) + lps[0].add_or_reuse_constraint(_EMPTY_INFOSET_KEYS[1], lp_solver.CONS_TYPE_EQ) + lps[0].set_cons_coeff(_EMPTY_INFOSET_KEYS[1], _EMPTY_INFOSET_ACTION_KEYS[1], + -1.0) + lps[0].set_cons_rhs(_EMPTY_INFOSET_KEYS[1], -1.0) + # x_root = 1 (x^t E^t = e^t) + lps[1].add_or_reuse_constraint(_EMPTY_INFOSET_KEYS[0], lp_solver.CONS_TYPE_EQ) + lps[1].set_cons_coeff(_EMPTY_INFOSET_KEYS[0], _EMPTY_INFOSET_ACTION_KEYS[0], + 1.0) + lps[1].set_cons_rhs(_EMPTY_INFOSET_KEYS[0], 1.0) + _construct_lps(game.new_initial_state(), infosets, infoset_actions, + infoset_action_maps, 1.0, lps, _EMPTY_INFOSET_KEYS[:], + _EMPTY_INFOSET_ACTION_KEYS[:]) + # Solve the programs. + solutions = [lps[0].solve(solver=solver), lps[1].solve(solver=solver)] + # Extract the policies (convert from realization plan to behavioral form). + policies = [policy.TabularPolicy(game), policy.TabularPolicy(game)] + for i in range(2): + for info_state in infoset_action_maps[i]: + total_weight = 0 + num_actions = 0 + for isa_key in infoset_action_maps[i][info_state]: + total_weight += solutions[1 - i][lps[1 - i].get_var_id(isa_key)] + num_actions += 1 + unif_pr = 1.0 / num_actions + state_policy = policies[i].policy_for_key(info_state) + for isa_key in infoset_action_maps[i][info_state]: + # The 1 - i here is due to Eq (8) yielding a solution for player 1 and + # Eq (9) a solution for player 0. + rel_weight = solutions[1 - i][lps[1 - i].get_var_id(isa_key)] + _, action_str = isa_key.split(_DELIMITER) + action = int(action_str) + pr_action = rel_weight / total_weight if total_weight > 0 else unif_pr + state_policy[action] = pr_action + return (solutions[0][lps[0].get_var_id(_EMPTY_INFOSET_KEYS[0])], + solutions[1][lps[1].get_var_id(_EMPTY_INFOSET_KEYS[1])], policies[0], + policies[1]) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sequence_form_lp_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sequence_form_lp_test.py new file mode 100644 index 0000000..f2d0a2f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sequence_form_lp_test.py @@ -0,0 +1,105 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for LP solvers.""" + +from absl.testing import absltest + +from open_spiel.python import policy +from open_spiel.python.algorithms import exploitability +from open_spiel.python.algorithms import sequence_form_lp +import pyspiel + + +class SFLPTest(absltest.TestCase): + + def test_rock_paper_scissors(self): + game = pyspiel.load_game_as_turn_based("matrix_rps") + val1, val2, _, _ = sequence_form_lp.solve_zero_sum_game(game) + self.assertAlmostEqual(val1, 0) + self.assertAlmostEqual(val2, 0) + + def test_kuhn_poker(self): + game = pyspiel.load_game("kuhn_poker") + val1, val2, _, _ = sequence_form_lp.solve_zero_sum_game(game) + # value from Kuhn 1950 or https://en.wikipedia.org/wiki/Kuhn_poker + self.assertAlmostEqual(val1, -1 / 18) + self.assertAlmostEqual(val2, +1 / 18) + + def test_kuhn_poker_efg(self): + game = pyspiel.load_efg_game(pyspiel.get_kuhn_poker_efg_data()) + val1, val2, _, _ = sequence_form_lp.solve_zero_sum_game(game) + # value from Kuhn 1950 or https://en.wikipedia.org/wiki/Kuhn_poker + self.assertAlmostEqual(val1, -1 / 18) + self.assertAlmostEqual(val2, +1 / 18) + + def test_leduc_poker(self): + game = pyspiel.load_game("leduc_poker") + val1, val2, _, _ = sequence_form_lp.solve_zero_sum_game(game) + # values obtained from Appendix E.2 of Lanctot et al. 2017, A Unified + # Game-Theoretic Approach to Multiagent Reinforcement Learning. + # https://arxiv.org/abs/1711.00832 + self.assertAlmostEqual(val1, -0.085606424078, places=6) + self.assertAlmostEqual(val2, 0.085606424078, places=6) + + def test_iigoofspiel4(self): + game = pyspiel.load_game_as_turn_based("goofspiel", { + "imp_info": True, + "num_cards": 4, + "points_order": "descending", + }) + val1, val2, _, _ = sequence_form_lp.solve_zero_sum_game(game) + # symmetric game, should be 0 + self.assertAlmostEqual(val1, 0) + self.assertAlmostEqual(val2, 0) + + def test_exploitablity(self): + # exploitability test for a player's / joint policies + # loading the game from Kuhn 1950 or + # https://en.wikipedia.org/wiki/Kuhn_poker + game = pyspiel.load_game("kuhn_poker") + # solving the game as the linear programme + (_, _, pi1, pi2) = sequence_form_lp.solve_zero_sum_game(game) + + # the way to do it is to merge the policies to get the joint policy + # of the game + merged_policy = policy.merge_tabular_policies([pi1, pi2], game) + expl_pi = exploitability.exploitability(game, merged_policy) + self.assertAlmostEqual(0.0, expl_pi) + + # TODO(author5): currently does not work because TTT's information state is + # not perfect recall. Enable this test when fixed. + # def test_tictactoe(self): + # game = pyspiel.load_game("tic_tac_toe") + # val1, val2 = sequence_form_lp.solve_zero_sum_game(game) + # self.assertAlmostEqual(val1, 0) + # self.assertAlmostEqual(val2, 0) + + # This test takes too long for non-glpk solvers, and glpk solver is not + # supported within google's internal cvxopt import. When solving via glpk, + # (locally, outside of google's testing framework), the test takes >300 + # seconds, so it is disabled by default, but still left here for reference. + # Note, value is taken from an independent implementation but also found in + # Neller & Lanctot 2013, An Introduction to Counterfactual Regret Minimization + # http://modelai.gettysburg.edu/2013/cfr/cfr.pdf + # + # def test_liars_dice(self): + # game = pyspiel.load_game("liars_dice") + # val1, val2 = sequence_form_lp.solve_zero_sum_game(game, solver="glpk") + # self.assertAlmostEqual(val1, -0.027131782945736) + # self.assertAlmostEqual(val2, 0.027131782945736) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sequence_form_utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sequence_form_utils.py new file mode 100644 index 0000000..1fd11de --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sequence_form_utils.py @@ -0,0 +1,389 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Useful sequence form functions used in the MMD implementation.""" + +import numpy as np +from open_spiel.python import policy + + +_DELIMITER = " -=- " + + +def get_isa_key(info_state, action): + return info_state + _DELIMITER + str(action) + + +def get_action_from_key(isa_key): + _, action_str = isa_key.split(_DELIMITER) + return int(action_str) + + +def get_infostate_from_key(isa_key, player): + assert not is_root(isa_key, player), "Cannot use this method for root nodes." + infostate, _ = isa_key.split(_DELIMITER) + return infostate + + +def is_root(key, player): + empty_is_key = f"***EMPTY_INFOSET_P{player}***" + empty_isa_key = f"***EMPTY_INFOSET_ACTION_P{player}***" + return True if key in [empty_is_key, empty_isa_key] else False + + +def construct_vars(game): + """Construct useful sequence from variables from game. + + Args: + game: The spiel game to solve (must be zero-sum, sequential, and have + chance node of deterministic or explicit stochastic). + + Returns: + An 8 tuple of sequence form variables from _construct_vars by + recursively + traversing the game tree. + + """ + + initial_state = game.new_initial_state() + npl = game.num_players() + + empty_is_keys = [f"***EMPTY_INFOSET_P{player}***" for player in range(npl)] + empty_isa_keys = [ + f"***EMPTY_INFOSET_ACTION_P{player}***" for player in range(npl) + ] + + # initialize variables + infosets = [{empty_is_keys[p]: 0} for p in range(npl)] + infoset_actions_to_seq = [{empty_isa_keys[p]: 0} for p in range(npl)] + infoset_action_maps = [ + {empty_is_keys[p]: [empty_isa_keys[p]]} for p in range(npl) + ] + + # infoset_action_maps = [{}, {}] + payoff_dict = dict() + + infoset_parent_map = [{empty_isa_keys[p]: None} for p in range(npl)] + infoset_actions_children = [{empty_isa_keys[p]: []} for p in range(npl)] + + _construct_vars(initial_state, infosets, infoset_actions_to_seq, + infoset_action_maps, infoset_parent_map, 1.0, + empty_is_keys[:], empty_isa_keys[:], + payoff_dict, infoset_actions_children) + + payoff_mat = _construct_numpy_vars(payoff_dict, infoset_actions_to_seq) + return (infosets, infoset_actions_to_seq, + infoset_action_maps, infoset_parent_map, + payoff_mat, infoset_actions_children) + + +def uniform_random_seq(game, infoset_actions_to_seq): + """Generate uniform random sequence. + + The sequence generated is equivalent to a uniform random tabular policy. + + Args: + game: the spiel game to solve (must be zero-sum, sequential, and have + chance mode of deterministic or explicit stochastic). + infoset_actions_to_seq: a list of dicts, one per player, that maps a + string of (infostate, action) pair to an id. + + Returns: + A list of NumPy arrays, one for each player. + """ + policies = policy.TabularPolicy(game) + initial_state = game.new_initial_state() + sequences = [ + np.ones(len(infoset_actions_to_seq[i])) for i in range(game.num_players()) + ] + _policy_to_sequence( + initial_state, + policies, + sequences, + infoset_actions_to_seq, + [1 for _ in range(game.num_players())], + ) + return sequences + + +def _construct_vars(state, infosets, infoset_actions_to_seq, + infoset_action_maps, infoset_parent_map, chance_reach, + parent_is_keys, parent_isa_keys, payoff_dict, + infoset_actions_children): + """Recursively builds maps and the sequence form payoff matrix. + + Args: + state: pyspiel (OpenSpiel) state + infosets: a list of dicts, one per player, that maps infostate to an id. + The dicts are filled by this function and should initially only + contain root values. + infoset_actions_to_seq: a list of dicts, one per player, that maps a + string of (infostate, action) pair to an id. The dicts are filled by + this function and should initially only contain the root values. + infoset_action_maps: a list of dicts, one per player, that maps each + info_state to a list of (infostate, action) string. + infoset_parent_map: a list of dicts, one per player, that maps each + info_state to an (infostate, action) string. + chance_reach: the contribution of chance's reach probability (should + start at 1). + parent_is_keys: a list of parent information state keys for this state + parent_isa_keys: a list of parent (infostate, action) keys + payoff_dict: a dict that maps sequences of players' (infostate, action) + tuples, e.g., ((infostate, action), ...) to the chance weighted reward. + infoset_actions_children: a list of dicts, one for each player, mapping + (infostate, action) keys to reachable infostates for each player + """ + + if state.is_terminal(): + returns = state.returns() + idx = tuple(parent_isa_keys_i for parent_isa_keys_i in parent_isa_keys) + payoff_dict.setdefault(idx, 0) + payoff_dict[idx] += np.asarray(returns) * chance_reach + return + + if state.is_chance_node(): + for action, prob in state.chance_outcomes(): + new_state = state.child(action) + _construct_vars(new_state, infosets, infoset_actions_to_seq, + infoset_action_maps, infoset_parent_map, + prob * chance_reach, parent_is_keys, parent_isa_keys, + payoff_dict, infoset_actions_children) + return + + player = state.current_player() + info_state = state.information_state_string(player) + legal_actions = state.legal_actions(player) + + # Add to the infostate maps + if info_state not in infosets[player]: + infosets[player][info_state] = len(infosets[player]) + if info_state not in infoset_action_maps[player]: + infoset_action_maps[player][info_state] = [] + + # Add to infoset to parent infoset action map + if info_state not in infoset_parent_map[player]: + infoset_parent_map[player][info_state] = parent_isa_keys[player] + + # add as child to parent + if parent_isa_keys[player] in infoset_actions_children[player]: + if info_state not in infoset_actions_children[player][ + parent_isa_keys[player]]: + infoset_actions_children[player][parent_isa_keys[player]].append( + info_state) + else: + infoset_actions_children[player][parent_isa_keys[player]] = [info_state] + + new_parent_is_keys = parent_is_keys[:] + new_parent_is_keys[player] = info_state + + for action in legal_actions: + isa_key = get_isa_key(info_state, action) + if isa_key not in infoset_actions_to_seq[player]: + infoset_actions_to_seq[player][isa_key] = len( + infoset_actions_to_seq[player]) + if isa_key not in infoset_action_maps[player][info_state]: + infoset_action_maps[player][info_state].append(isa_key) + + new_parent_isa_keys = parent_isa_keys[:] + new_parent_isa_keys[player] = isa_key + new_state = state.child(action) + _construct_vars(new_state, infosets, infoset_actions_to_seq, + infoset_action_maps, infoset_parent_map, chance_reach, + new_parent_is_keys, new_parent_isa_keys, payoff_dict, + infoset_actions_children) + + +def _construct_numpy_vars(payoff_dict, infoset_actions_to_seq): + """Convert sequence form payoff dict to numpy array. + + Args: + payoff_dict: a dict that maps sequences of players' (infostate, action) + tuples, e.g., ((infostate, action), ...) to the chance weighted reward. + infoset_actions_to_seq: a list of dicts, one per player, that maps a + string of (infostate, action) pair to an id. + + Returns: + A numpy array corresponding to the chance weighted rewards + i.e. the sequence form payoff tensor. + + """ + npl = len(infoset_actions_to_seq) + pls = range(npl) # player list + sequence_sizes = tuple(len(infoset_actions_to_seq[i]) for i in pls) + payoff_tensor = np.zeros((npl,) + sequence_sizes) + for player_isa_seqs, payoffs in payoff_dict.items(): + idx = tuple(infoset_actions_to_seq[i][player_isa_seqs[i]] for i in pls) + payoff_tensor[(slice(None),) + idx] = np.asarray(payoffs) + return payoff_tensor + + +def construct_constraint_vars(infoset_parent_map, infoset_actions_to_seq, + infoset_action_maps): + """Construct useful sequence form variables from game. + + Args: + infoset_parent_map: a list of dicts, one per player, that maps each + info_state to an (infostate, action) string. + infoset_actions_to_seq: a list of dicts, one per player, that maps a + string of (infostate, action) pair to an id. + infoset_action_maps: a list of dicts, one per player, that maps each + info_state to a list of (infostate, action) string. + + Returns: + A dict mapping player to a tuple containing a numpy array of coefficients, + each of dimension # of player sequences, as well as a sparse vector + containing the constants, i.e., dict[player] = (A, b) as in Ax = b. + """ + npl = len(infoset_actions_to_seq) + constraint_dict = {} + + for player in range(npl): + num_seqs = len(infoset_actions_to_seq[player].values()) + + root_con = np.zeros(num_seqs) + root_con[0] = 1.0 + constraints = [root_con] + + for info_state in infoset_action_maps[player]: + if is_root(info_state, player): + continue + + parent_isa_key = infoset_parent_map[player][info_state] + parent_seq_id = infoset_actions_to_seq[player][parent_isa_key] + + # seq ids for children + children_isa_keys = infoset_action_maps[player][info_state] + children_seq_ids = [ + infoset_actions_to_seq[player][isa_key] + for isa_key in children_isa_keys + ] + + constraint = np.zeros(num_seqs) + constraint[parent_seq_id] = -1.0 + constraint[children_seq_ids] = 1.0 + constraints.append(constraint) + + constant = np.zeros(len(constraints)) + constant[0] = 1.0 + constraint_dict[player] = (np.stack(constraints), constant) + + return constraint_dict + + +def sequence_to_policy(sequences, game, infoset_actions_to_seq, + infoset_action_maps): + """Convert sequence form policies to the realization-equivalent tabular ones. + + Args: + sequences: list of two sequence form policies, one for each player. + game: a spiel game with two players. + infoset_actions_to_seq: a list of dicts, one per player, that maps a + string of (infostate, action) pair to an id. + infoset_action_maps: a list of dicts, one per player, that maps each + info_state to a list of (infostate, action) string. + + Returns: + A TabularPolicy object. + """ + + policies = policy.TabularPolicy(game) + for player in range(2): + for info_state in infoset_action_maps[player]: + if is_root(info_state, player): + continue + + state_policy = policies.policy_for_key(info_state) + total_weight = 0 + num_actions = 0 + + for isa_key in infoset_action_maps[player][info_state]: + total_weight += sequences[player][infoset_actions_to_seq[player] + [isa_key]] + num_actions += 1 + + unif_pr = 1.0 / num_actions + for isa_key in infoset_action_maps[player][info_state]: + rel_weight = sequences[player][infoset_actions_to_seq[player][isa_key]] + _, action_str = isa_key.split(_DELIMITER) + action = int(action_str) + pr_action = rel_weight / total_weight if total_weight > 0 else unif_pr + state_policy[action] = pr_action + return policies + + +def policy_to_sequence(game, policies, infoset_actions_to_seq): + """Converts a TabularPolicy object for a two-player game. + + The converted policy is its realization-equivalent sequence form one. + + Args: + game: a two-player open spiel game. + policies: a TabularPolicy object. + infoset_actions_to_seq: a list of dicts, one per player, that maps a + string of (infostate, action) pair to an id. + + Returns: + A list of numpy arrays, one for each player. + """ + initial_state = game.new_initial_state() + sequences = [ + np.ones(len(infoset_actions_to_seq[0])), + np.ones(len(infoset_actions_to_seq[1])) + ] + _policy_to_sequence(initial_state, policies, sequences, + infoset_actions_to_seq, [1, 1]) + return sequences + + +def _policy_to_sequence(state, policies, sequences, infoset_actions_to_seq, + parent_seq_val): + """Converts a TabularPolicy object to its equivalent sequence form. + + This method modifies the sequences inplace and should not be called directly. + + Args: + state: an openspiel state. + policies: a TabularPolicy object. + sequences: list of numpy arrays to be modified. + infoset_actions_to_seq: a list of dicts, one per player, that maps a + string of (infostate, action) pair to an id. + parent_seq_val: list of parent sequence values, this method should be + called with initial value of [1,1]. + """ + + if state.is_terminal(): + return + + if state.is_chance_node(): + for action, _ in state.chance_outcomes(): + new_state = state.child(action) + _policy_to_sequence(new_state, policies, sequences, + infoset_actions_to_seq, parent_seq_val) + return + + player = state.current_player() + info_state = state.information_state_string(player) + legal_actions = state.legal_actions(player) + state_policy = policies.policy_for_key(info_state) + for action in legal_actions: + isa_key = get_isa_key(info_state, action) + # update sequence form + sequences[player][infoset_actions_to_seq[player] + [isa_key]] = parent_seq_val[player] * state_policy[action] + new_parent_seq_val = parent_seq_val[:] + new_parent_seq_val[player] = sequences[player][ + infoset_actions_to_seq[player][isa_key]] + new_state = state.child(action) + _policy_to_sequence(new_state, policies, sequences, infoset_actions_to_seq, + new_parent_seq_val) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sequence_form_utils_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sequence_form_utils_test.py new file mode 100644 index 0000000..72c55b7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/sequence_form_utils_test.py @@ -0,0 +1,149 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests several sequence form utilities.""" + +from absl.testing import absltest +from absl.testing import parameterized + +import numpy as np + +from open_spiel.python import policy +from open_spiel.python.algorithms import cfr +from open_spiel.python.algorithms import expected_game_score as egs +from open_spiel.python.algorithms import sequence_form_utils +import pyspiel + +_KUHN_GAME = pyspiel.load_game('kuhn_poker') +_LEDUC_GAME = pyspiel.load_game('leduc_poker') + + +class SequenceFormTest(parameterized.TestCase): + + @parameterized.parameters( + { + 'game': _KUHN_GAME, + 'cfr_iters': 100 + }, + { + 'game': _LEDUC_GAME, + 'cfr_iters': 10 + }, + ) + def test_sequence_to_policy(self, game, cfr_iters): + + cfr_solver = cfr.CFRSolver(game) + + for _ in range(cfr_iters): + cfr_solver.evaluate_and_update_policy() + + (_, infoset_actions_to_seq, infoset_action_maps, _, _, + _) = sequence_form_utils.construct_vars(game) + + policies = cfr_solver.average_policy() + sequences = sequence_form_utils.policy_to_sequence(game, policies, + infoset_actions_to_seq) + converted_policies = sequence_form_utils.sequence_to_policy( + sequences, game, infoset_actions_to_seq, infoset_action_maps) + np.testing.assert_allclose( + policies.action_probability_array, + converted_policies.action_probability_array, + rtol=1e-10) + + @parameterized.parameters( + { + 'game': _KUHN_GAME, + 'cfr_iters': 100 + }, + { + 'game': _LEDUC_GAME, + 'cfr_iters': 10 + }, + ) + def test_sequence_payoff(self, game, cfr_iters): + (_, infoset_actions_to_seq, _, _, payoff_mat, + _) = sequence_form_utils.construct_vars(game) + + uniform_policies = policy.TabularPolicy(game) + uniform_value = egs.policy_value( + game.new_initial_state(), [uniform_policies, uniform_policies] + ) + sequences = sequence_form_utils.policy_to_sequence(game, uniform_policies, + infoset_actions_to_seq) + min_mat = -payoff_mat[0] + np.testing.assert_allclose( + uniform_value[0], + -sequences[0].T @ min_mat @ sequences[1], + rtol=1e-10) + + # use cfr iterations to construct new policy + cfr_solver = cfr.CFRSolver(game) + for _ in range(cfr_iters): + cfr_solver.evaluate_and_update_policy() + + policies = cfr_solver.average_policy() + cfr_value = egs.policy_value(game.new_initial_state(), [policies, policies]) + sequences = sequence_form_utils.policy_to_sequence(game, policies, + infoset_actions_to_seq) + np.testing.assert_allclose( + cfr_value[0], -sequences[0].T @ min_mat @ sequences[1], rtol=1e-10) + + @parameterized.parameters( + { + 'game': _KUHN_GAME, + 'seed': 12345 + }, + { + 'game': _LEDUC_GAME, + 'seed': 12345 + }, + ) + def test_sequence_tangent_projection(self, game, seed, step_size=1e-2): + (_, infoset_actions_to_seq, infoset_action_maps, infoset_parent_map, _, + _) = sequence_form_utils.construct_vars(game) + + uniform_policies = policy.TabularPolicy(game) + sequences = sequence_form_utils.policy_to_sequence(game, uniform_policies, + infoset_actions_to_seq) + + constraints = sequence_form_utils.construct_constraint_vars( + infoset_parent_map, infoset_actions_to_seq, infoset_action_maps) + + rnd = np.random.RandomState(seed) + + con_errs = [] + for p, (con_mat, b) in constraints.items(): + seq_p = sequences[p] + # generate random sequence-form "gradient" direction + grad = rnd.randn(len(seq_p)) + grad = grad / np.linalg.norm(grad) + # construct tangent projection from sequence-form constraints + pinv_con_mat = np.linalg.pinv(con_mat) + proj = np.eye(con_mat.shape[1]) - pinv_con_mat.dot(con_mat) + # project gradient onto tangent space + grad_proj = proj.dot(grad) + # take gradient step (should remain on treeplex w/ small enough step_size) + seq_p_new = seq_p + step_size * grad_proj + # measure constraint violation error after gradient step (should be ~0) + con_err = np.linalg.norm(con_mat.dot(seq_p_new) - b) + con_errs.append(con_err) + + np.testing.assert_allclose( + con_errs, + np.zeros(len(con_errs)), + atol=1e-10) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/stackelberg_lp.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/stackelberg_lp.py new file mode 100644 index 0000000..e8861bb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/stackelberg_lp.py @@ -0,0 +1,85 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Solving strong Stackelberg equilibrium based on linear programming. + +Based on [1] "Computing the Optimal Strategy to Commit to", Conitzer & Sandholm, +EC'06 +""" + +import cvxpy as cp +import numpy as np + +from open_spiel.python.algorithms.projected_replicator_dynamics import _simplex_projection +from open_spiel.python.egt.utils import game_payoffs_array + + +def solve_stackelberg(game, is_first_leader=True): + """Solves the optimal mixed strategty to commit to for the leader. + + Args: + game: a pyspiel game, + is_first_leader: if true, then player 0 is the leader, o.w. player 1 is + the leader. + + Returns: + (player0 strategy, player1 strategy, player0 payoff, player1 payoff) at an + SSE. + """ + p_mat = game_payoffs_array(game) + assert len(p_mat) == 2 + if is_first_leader: + leader_payoff, follower_payoff = p_mat[0], p_mat[1] + else: + leader_payoff, follower_payoff = p_mat[1].T, p_mat[0].T + + num_leader_strategies, num_follower_strategies = leader_payoff.shape + + leader_eq_value = -float("inf") + follower_eq_value = None + leader_eq_strategy = None + follower_eq_strategy = None + + for t in range(num_follower_strategies): + p_s = cp.Variable(num_leader_strategies, nonneg=True) + constraints = [p_s <= 1, cp.sum(p_s) == 1] + for t_ in range(num_follower_strategies): + if t_ == t: + continue + constraints.append( + p_s @ follower_payoff[:, t_] <= p_s @ follower_payoff[:, t] + ) + prob = cp.Problem(cp.Maximize(p_s @ leader_payoff[:, t]), constraints) + prob.solve() + p_s_value = p_s.value + if p_s_value is None: + continue + leader_strategy = _simplex_projection(p_s.value.reshape(-1)).reshape(-1, 1) + leader_value = leader_strategy.T.dot(leader_payoff)[0, t] + if leader_value > leader_eq_value: + leader_eq_strategy = leader_strategy + follower_eq_strategy = t + leader_eq_value = leader_value + follower_eq_value = leader_strategy.T.dot(follower_payoff)[0, t] + + assert leader_eq_strategy is not None, p_mat + if is_first_leader: + return ( + leader_eq_strategy.reshape(-1), + np.identity(num_follower_strategies)[follower_eq_strategy], + leader_eq_value, + follower_eq_value, + ) + else: + return (np.identity(num_follower_strategies)[follower_eq_strategy], + leader_eq_strategy.reshape(-1), follower_eq_value, leader_eq_value) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/stackelberg_lp_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/stackelberg_lp_test.py new file mode 100644 index 0000000..1fdfbd3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/stackelberg_lp_test.py @@ -0,0 +1,70 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import absltest +from absl.testing import parameterized +import nashpy as nash +import numpy as np + +from open_spiel.python.algorithms.stackelberg_lp import solve_stackelberg +from open_spiel.python.egt.utils import game_payoffs_array +import pyspiel + +# Numerical tolerance for tests. +EPS = 1e-6 + +# game instances based on Conitzer & Sandholm'06 paper +game0 = pyspiel.create_matrix_game([[2, 4], [1, 3]], [[1, 0], [0, 1]]) +commit_strategy0 = np.array([0.5, 0.5]) +commit_value0 = 3.5 + +game1 = pyspiel.create_matrix_game([[2, 0, 0], [1, 0, 0]], + [[0, 2, 5], [0, -1, -4]]) +commit_strategy1 = np.array([1 / 3, 2 / 3]) +commit_value1 = 4 / 3 + +# a game with dominated strategy +game2 = pyspiel.create_matrix_game([[3, 9], [9, 1]], [[0, 0], [1, 8]]) +commit_strategy2 = np.array([1.0, 0.0]) +commit_value2 = 9.0 + + +class StackelbergLPTest(parameterized.TestCase): + + @parameterized.named_parameters( + ("game0", game0, commit_strategy0, commit_value0), + ("game1", game1, commit_strategy1, commit_value1), + ("game2", game2, commit_strategy2, commit_value2), + ) + def test_simple_games(self, game, commit_strategy, commit_value): + leader_eq_strategy, _, leader_eq_value, _ = solve_stackelberg(game) + + with self.subTest("optimal commitment"): + np.testing.assert_array_almost_equal( + commit_strategy, leader_eq_strategy, decimal=5 + ) + self.assertAlmostEqual(commit_value, leader_eq_value, delta=1e-5) + + with self.subTest("Leader-payoff in SSE no less than in NE"): + p_mat = game_payoffs_array(game) + nashpy_game = nash.Game(p_mat[0], p_mat[1]) + for eq in nashpy_game.support_enumeration(): + leader_nash_value = eq[0].reshape(1, + -1).dot(p_mat[0]).dot(eq[1].reshape( + -1, 1)) + self.assertGreaterEqual(leader_eq_value - leader_nash_value, -EPS) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tabular_multiagent_qlearner.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tabular_multiagent_qlearner.py new file mode 100644 index 0000000..51e9fb2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tabular_multiagent_qlearner.py @@ -0,0 +1,286 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tabular Multiagent Q-learning agent. + +Currently implementations include: +Nash-Q: https://www.jmlr.org/papers/volume4/hu03a/hu03a.pdf +Correlated-Q: https://www.aaai.org/Papers/ICML/2003/ICML03-034.pdf, where both +CE-Q and CCE-Q are supported. +Asymmetric-Q: https://ieeexplore.ieee.org/document/1241094 +""" + +import abc +import collections +import itertools +import nashpy as nash +import numpy as np + +from open_spiel.python import rl_agent +from open_spiel.python import rl_tools +from open_spiel.python.algorithms.jpsro import _mgcce +from open_spiel.python.algorithms.stackelberg_lp import solve_stackelberg +import pyspiel + + +def valuedict(): + return collections.defaultdict(float) + + +class JointActionSolver: + + @abc.abstractmethod + def __call__(self, payoffs_array): + """Find a joint action mixture and values for the current one-step game. + + Args: + payoffs_array: a `numpy.ndarray` of utilities of a game. + + Returns: + res_mixtures: a list of mixed strategies for each agent + res_values: a list of expected utilities for each agent + """ + + +class TwoPlayerNashSolver(JointActionSolver): + """A joint action solver solving for Nash for two-player games. + + Uses python.algorithms.matrix_nash.lemke_howson_solve + """ + + def __call__(self, payoffs_array): + assert len(payoffs_array) == 2 + + row_payoffs, col_payoffs = payoffs_array[0], payoffs_array[1] + a0, a1 = payoffs_array.shape[1:] + + nashpy_game = nash.Game(row_payoffs, col_payoffs) + + best_value = float("-inf") + res_mixtures, res_values = None, None + + for (row_mixture, col_mixture) in nashpy_game.support_enumeration(): + # TO-DO: handle the case where the LH solver gave ineligible answer + if np.sum(np.isnan(row_mixture)) or np.sum(np.isnan(col_mixture)): + continue + row_mixture_, col_mixture_ = row_mixture.reshape( + (-1, 1)), col_mixture.reshape((-1, 1)) + row_value, col_value = ( + row_mixture_.T.dot(row_payoffs).dot(col_mixture_)).item(), ( + row_mixture_.T.dot(col_payoffs).dot(col_mixture_)).item() + # Currently using maximizing social welfare for equilibrium selection + if row_value + col_value > best_value: + best_value = row_value + col_value + res_mixtures = [row_mixture, col_mixture] + res_values = [row_value, col_value] + + # If no plauisble nash found, use uniform mixed strategies + if not res_mixtures: + res_mixtures = [np.ones(a0) / a0, np.ones(a1) / a1] + row_mixture_, col_mixture_ = res_mixtures[0].reshape( + (-1, 1)), res_mixtures[1].reshape((-1, 1)) + res_values = [(row_mixture_.T.dot(row_payoffs).dot(col_mixture_)).item(), + (row_mixture_.T.dot(col_payoffs).dot(col_mixture_)).item()] + + return res_mixtures, res_values + + +class CorrelatedEqSolver(JointActionSolver): + """A joint action solver solving for correlated equilibrium. + + Uses python.algorithms.jspro._mgce and _mgcce for solving (coarse) correlated + equilibrium. + """ + + def __init__(self, is_cce=False): + self._is_cce = is_cce + + def __call__(self, payoffs_array): + num_players = len(payoffs_array) + assert num_players > 0 + num_strategies_per_player = payoffs_array.shape[1:] + mixture, _ = ( + _mgcce( # pylint: disable=g-long-ternary + payoffs_array, + [np.ones([ns], dtype=np.int32) for ns in num_strategies_per_player], + ignore_repeats=True) + if self._is_cce else _mgcce( + payoffs_array, + [np.ones([ns], dtype=np.int32) for ns in num_strategies_per_player], + ignore_repeats=True)) + mixtures, values = [], [] + for n in range(num_players): + values.append(np.sum(payoffs_array[n] * mixture)) + mixtures.append( + np.sum( + mixture, + axis=tuple([n_ for n_ in range(num_players) if n_ != n]))) + return mixtures, values + + +class StackelbergEqSolver(JointActionSolver): + """A joint action solver solving for Stackelverg equilibrium. + + Uses python.algorithms.stackelberg_lp.py. + """ + + def __init__(self, is_first_leader=True): + self._is_first_leader = is_first_leader + + def __call__(self, payoffs_array): + assert len(payoffs_array) == 2 + game = pyspiel.create_matrix_game(payoffs_array[0], payoffs_array[1]) + try: + player0_strategy, player1_strategy, player0_value, player1_value = solve_stackelberg( + game, self._is_first_leader) + return [player0_strategy, + player1_strategy], [player0_value, player1_value] + except: # pylint: disable=bare-except + # if the game matrix is degenerated and cannot solve for an SSE, + # return uniform strategy + num_player0_strategies, num_player1_strategies = payoffs_array[0].shape + player0_strategy, player1_strategy = np.ones( + num_player0_strategies) / num_player0_strategies, np.ones( + num_player1_strategies) / num_player1_strategies + player0_value, player1_value = player0_strategy.reshape(1, -1).dot( + payoffs_array[0]).dot(player1_strategy.reshape( + -1, 1)), player0_strategy.reshape(1, -1).dot( + payoffs_array[1]).dot(player1_strategy.reshape(-1, 1)) + return [player0_strategy, + player1_strategy], [player0_value, player1_value] + + +class MultiagentQLearner(rl_agent.AbstractAgent): + """A multiagent joint action learner.""" + + def __init__(self, + player_id, + num_players, + num_actions, + joint_action_solver, + step_size=0.1, + epsilon_schedule=rl_tools.ConstantSchedule(0.2), + discount_factor=1.0): + """Initialize the Multiagent joint-action Q-Learning agent. + + The joint_action_solver solves for one-step matrix game defined by Q-tables. + + Args: + player_id: the player id this agent will play as, + num_players: the number of players in the game, + num_actions: the number of distinct actions in the game, + joint_action_solver: the joint action solver class to use to solve the + one-step matrix games + step_size: learning rate for Q-learning, + epsilon_schedule: exploration parameter, + discount_factor: the discount factor as in Q-learning. + """ + self._player_id = player_id + self._num_players = num_players + self._num_actions = num_actions + self._joint_action_solver = joint_action_solver + self._step_size = step_size + self._epsilon_schedule = epsilon_schedule + self._epsilon = epsilon_schedule.value + self._discount_factor = discount_factor + self._q_values = [ + collections.defaultdict(valuedict) for _ in range(num_players) + ] + self._prev_info_state = None + + def _get_payoffs_array(self, info_state): + payoffs_array = np.zeros((self._num_players,) + tuple(self._num_actions)) + for joint_action in itertools.product( + *[range(dim) for dim in self._num_actions]): + for n in range(self._num_players): + payoffs_array[ + (n,) + joint_action] = self._q_values[n][info_state][joint_action] + return payoffs_array + + def _epsilon_greedy(self, info_state, legal_actions, epsilon): + """Returns a valid epsilon-greedy action and valid action probs. + + If the agent has not been to `info_state`, a valid random action is chosen. + Args: + info_state: hashable representation of the information state. + legal_actions: list of actions at `info_state`. + epsilon: float, prob of taking an exploratory action. + + Returns: + A valid epsilon-greedy action and valid action probabilities. + """ + probs = np.zeros(self._num_actions[self._player_id]) + + state_probs, _ = self._joint_action_solver( + self._get_payoffs_array(info_state)) + + probs[legal_actions[self._player_id]] = ( + epsilon / len(legal_actions[self._player_id])) + probs += (1 - epsilon) * state_probs[self._player_id] + action = np.random.choice( + range(self._num_actions[self._player_id]), p=probs) + return action, probs + + def step(self, time_step, actions=None, is_evaluation=False): + """Returns the action to be taken and updates the Q-values if needed. + + Args: + time_step: an instance of rl_environment.TimeStep, + actions: list of actions taken by all agents from the previous step, + is_evaluation: bool, whether this is a training or evaluation call, + + Returns: + A `rl_agent.StepOutput` containing the action probs and chosen action. + """ + info_state = str(time_step.observations["info_state"]) + legal_actions = time_step.observations["legal_actions"] + + # Prevent undefined errors if this agent never plays until terminal step + action, probs = None, None + + # Act step: don't act at terminal states. + if not time_step.last(): + epsilon = 0.0 if is_evaluation else self._epsilon + # select according to the joint action solver + action, probs = self._epsilon_greedy( + info_state, legal_actions, epsilon=epsilon) + + # Learn step: don't learn during evaluation or at first agent steps. + actions = tuple(actions) + + if self._prev_info_state and not is_evaluation: + _, next_state_values = ( + self._joint_action_solver(self._get_payoffs_array(info_state))) + # update Q values for every agent + for n in range(self._num_players): + target = time_step.rewards[n] + if not time_step.last(): # Q values are zero for terminal. + target += self._discount_factor * next_state_values[n] + + prev_q_value = self._q_values[n][self._prev_info_state][actions] + + self._q_values[n][self._prev_info_state][actions] += ( + self._step_size * (target - prev_q_value)) + + # Decay epsilon, if necessary. + self._epsilon = self._epsilon_schedule.step() + + if time_step.last(): # prepare for the next episode. + self._prev_info_state = None + return + + # Don't mess up with the state during evaluation. + if not is_evaluation: + self._prev_info_state = info_state + + return rl_agent.StepOutput(action=action, probs=probs) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tabular_multiagent_qlearner_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tabular_multiagent_qlearner_test.py new file mode 100644 index 0000000..f146248 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tabular_multiagent_qlearner_test.py @@ -0,0 +1,167 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for open_spiel.python.algorithms.tabular_multiagent_qlearner.""" + +from absl.testing import absltest +import numpy as np + +from open_spiel.python import rl_environment +from open_spiel.python.algorithms.tabular_multiagent_qlearner import CorrelatedEqSolver +from open_spiel.python.algorithms.tabular_multiagent_qlearner import MultiagentQLearner +from open_spiel.python.algorithms.tabular_multiagent_qlearner import StackelbergEqSolver +from open_spiel.python.algorithms.tabular_multiagent_qlearner import TwoPlayerNashSolver +from open_spiel.python.algorithms.tabular_qlearner import QLearner +from open_spiel.python.egt.utils import game_payoffs_array +import pyspiel + +SEED = 18763511 + + +class MultiagentQTest(absltest.TestCase): + + def test_simple_pathfinding_run(self): + env = rl_environment.Environment( + "pathfinding", grid="B.A\n...\na.b", players=2, step_reward=-1.) + + with self.subTest("nash_q"): + qlearner = QLearner(0, env.game.num_distinct_actions()) + nashqlearner = MultiagentQLearner(1, 2, + [env.game.num_distinct_actions()] * 2, + TwoPlayerNashSolver()) + + time_step = env.reset() + actions = [None, None] + step_cnt = 0 + + while not time_step.last(): + actions = [ + qlearner.step(time_step).action, + nashqlearner.step(time_step, actions).action + ] + time_step = env.step(actions) + step_cnt += 1 + self.assertLess(step_cnt, 500) + + with self.subTest("ce_q"): + qlearner = QLearner(0, env.game.num_distinct_actions()) + ceqlearner = MultiagentQLearner(1, 2, + [env.game.num_distinct_actions()] * 2, + CorrelatedEqSolver(is_cce=False)) + + time_step = env.reset() + actions = [None, None] + step_cnt = 0 + + while not time_step.last(): + actions = [ + qlearner.step(time_step).action, + ceqlearner.step(time_step, actions).action + ] + time_step = env.step(actions) + step_cnt += 1 + + self.assertLess(step_cnt, 500) + + with self.subTest("cce_q"): + qlearner = QLearner(0, env.game.num_distinct_actions()) + cceqlearner = MultiagentQLearner(1, 2, + [env.game.num_distinct_actions()] * 2, + CorrelatedEqSolver(is_cce=True)) + + time_step = env.reset() + actions = [None, None] + step_cnt = 0 + + while not time_step.last(): + actions = [ + qlearner.step(time_step).action, + cceqlearner.step(time_step, actions).action + ] + time_step = env.step(actions) + step_cnt += 1 + + self.assertLess(step_cnt, 500) + + with self.subTest("asym_q"): + qlearner = QLearner(0, env.game.num_distinct_actions()) + asymqlearner = MultiagentQLearner(1, 2, + [env.game.num_distinct_actions()] * 2, + StackelbergEqSolver()) + + time_step = env.reset() + actions = [None, None] + step_cnt = 0 + + while not time_step.last(): + actions = [ + qlearner.step(time_step).action, + asymqlearner.step(time_step, actions).action + ] + time_step = env.step(actions) + step_cnt += 1 + + self.assertLess(step_cnt, 500) + + def test_rps_run(self): + env = rl_environment.Environment("matrix_rps") + nashqlearner0 = MultiagentQLearner(0, 2, + [env.game.num_distinct_actions()] * 2, + TwoPlayerNashSolver()) + + nashqlearner1 = MultiagentQLearner(1, 2, + [env.game.num_distinct_actions()] * 2, + TwoPlayerNashSolver()) + + for _ in range(1000): + time_step = env.reset() + actions = [None, None] + actions = [ + nashqlearner0.step(time_step, actions).action, + nashqlearner1.step(time_step, actions).action + ] + time_step = env.step(actions) + nashqlearner0.step(time_step, actions) + nashqlearner1.step(time_step, actions) + + with self.subTest("correct_rps_strategy"): + time_step = env.reset() + actions = [None, None] + learner0_strategy, learner1_strategy = nashqlearner0.step( + time_step, actions).probs, nashqlearner1.step(time_step, + actions).probs + np.testing.assert_array_almost_equal( + np.asarray([1 / 3, 1 / 3, 1 / 3]), + learner0_strategy.reshape(-1), + decimal=4) + np.testing.assert_array_almost_equal( + np.asarray([1 / 3, 1 / 3, 1 / 3]), + learner1_strategy.reshape(-1), + decimal=4) + + with self.subTest("correct_rps_value"): + time_step = env.reset() + ground_truth_values = game_payoffs_array( + pyspiel.load_matrix_game("matrix_rps")) + info_state = str(time_step.observations["info_state"]) + learner0_values, learner1_values = nashqlearner0._get_payoffs_array( + info_state), nashqlearner1._get_payoffs_array(info_state) + np.testing.assert_array_almost_equal( + ground_truth_values, learner0_values, decimal=4) + np.testing.assert_array_almost_equal( + ground_truth_values, learner1_values, decimal=4) + + +if __name__ == "__main__": + np.random.seed(SEED) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tabular_qlearner.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tabular_qlearner.py new file mode 100644 index 0000000..525248b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tabular_qlearner.py @@ -0,0 +1,164 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tabular Q-learning agent. + +Note: supports single-player games only. +""" + +import collections + +import numpy as np + +from open_spiel.python import rl_agent +from open_spiel.python import rl_tools +import pyspiel + + +def valuedict(): + # The default factory is called without arguments to produce a new value when + # a key is not present, in __getitem__ only. This value is added to the dict, + # so modifying it will modify the dict. + return collections.defaultdict(float) + + +class QLearner(rl_agent.AbstractAgent): + """Tabular Q-Learning agent. + + See open_spiel/python/examples/tic_tac_toe_qlearner.py for an usage example. + """ + + def __init__( + self, + player_id, + num_actions, + step_size=0.1, + epsilon_schedule=rl_tools.ConstantSchedule(0.2), + discount_factor=1.0, + centralized=False, + info_state_to_string_override=None, + ): + """Initialize the Q-Learning agent.""" + self._player_id = player_id + self._num_actions = num_actions + self._step_size = step_size + self._epsilon_schedule = epsilon_schedule + self._epsilon = epsilon_schedule.value + self._discount_factor = discount_factor + self._centralized = centralized + self._q_values = collections.defaultdict(valuedict) + self._prev_info_state = None + self._last_loss_value = None + self._prev_action = None + self._info_state_to_string_override = info_state_to_string_override + + def _epsilon_greedy(self, info_state, legal_actions, epsilon): + """Returns a valid epsilon-greedy action and valid action probs. + + If the agent has not been to `info_state`, a valid random action is chosen. + + Args: + info_state: hashable representation of the information state. + legal_actions: list of actions at `info_state`. + epsilon: float, prob of taking an exploratory action. + + Returns: + A valid epsilon-greedy action and valid action probabilities. + """ + probs = np.zeros(self._num_actions) + greedy_q = max([self._q_values[info_state][a] for a in legal_actions]) + greedy_actions = [ + a for a in legal_actions if self._q_values[info_state][a] == greedy_q + ] + probs[legal_actions] = epsilon / len(legal_actions) + probs[greedy_actions] += (1 - epsilon) / len(greedy_actions) + action = np.random.choice(range(self._num_actions), p=probs) + return action, probs + + def _get_action_probs(self, info_state, legal_actions, epsilon): + """Returns a selected action and the probabilities of legal actions. + + To be overwritten by subclasses that implement other action selection + methods. + + Args: + info_state: hashable representation of the information state. + legal_actions: list of actions at `info_state`. + epsilon: float: current value of the epsilon schedule or 0 in case + evaluation. QLearner uses it as the exploration parameter in + epsilon-greedy, but subclasses are free to interpret in different ways + (e.g. as temperature in softmax). + """ + return self._epsilon_greedy(info_state, legal_actions, epsilon) + + def step(self, time_step, is_evaluation=False): + """Returns the action to be taken and updates the Q-values if needed. + + Args: + time_step: an instance of rl_environment.TimeStep. + is_evaluation: bool, whether this is a training or evaluation call. + + Returns: + A `rl_agent.StepOutput` containing the action probs and chosen action. + """ + if self._centralized: + info_state = str(time_step.observations["info_state"]) + else: + info_state = str(time_step.observations["info_state"][self._player_id]) + + if self._info_state_to_string_override is not None: + _, state = pyspiel.deserialize_game_and_state( + time_step.observations["serialized_state"] + ) + info_state = self._info_state_to_string_override(state) + + legal_actions = time_step.observations["legal_actions"][self._player_id] + + # Prevent undefined errors if this agent never plays until terminal step + action, probs = None, None + + # Act step: don't act at terminal states. + if not time_step.last(): + epsilon = 0.0 if is_evaluation else self._epsilon + action, probs = self._get_action_probs(info_state, legal_actions, epsilon) + + # Learn step: don't learn during evaluation or at first agent steps. + if self._prev_info_state and not is_evaluation: + target = time_step.rewards[self._player_id] + if not time_step.last(): # Q values are zero for terminal. + target += self._discount_factor * max( + [self._q_values[info_state][a] for a in legal_actions]) + + assert self._prev_action is not None + prev_q_value = self._q_values[self._prev_info_state][self._prev_action] + self._last_loss_value = target - prev_q_value + self._q_values[self._prev_info_state][self._prev_action] += ( + self._step_size * self._last_loss_value) + + # Decay epsilon, if necessary. + self._epsilon = self._epsilon_schedule.step() + + if time_step.last(): # prepare for the next episode. + self._prev_info_state = None + return + + # Don't mess up with the state during evaluation. + if not is_evaluation: + self._prev_info_state = info_state + self._prev_action = action + return rl_agent.StepOutput(action=action, probs=probs) + + @property + def loss(self): + return self._last_loss_value diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tabular_qlearner_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tabular_qlearner_test.py new file mode 100644 index 0000000..6e8627e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tabular_qlearner_test.py @@ -0,0 +1,118 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tabular Q-learning agent tests.""" + +from absl.testing import absltest +import numpy as np + +from open_spiel.python import rl_environment +from open_spiel.python import rl_tools +from open_spiel.python.algorithms import tabular_qlearner +import pyspiel +from open_spiel.python.utils import stats + +# Fixed seed to make test non stochastic. +SEED = 10000 + +# A simple two-action game encoded as an EFG game. Going left gets -1, going +# right gets a +1. +SIMPLE_EFG_DATA = """ + EFG 2 R "Simple single-agent problem" { "Player 1" } "" + p "ROOT" 1 1 "ROOT" { "L" "R" } 0 + t "L" 1 "Outcome L" { -1.0 } + t "R" 2 "Outcome R" { 1.0 } +""" + + +def blackjack_info_state_to_string(state): + if state.is_terminal(): + return str(state) + else: + return ( + "Terminal? False\n" + f"Dealer visible card: {state.dealers_visible_card()}\n" + f"Player sum: {state.get_best_player_total(0)}\n" + ) + + +class QlearnerTest(absltest.TestCase): + + def test_simple_game(self): + game = pyspiel.load_efg_game(SIMPLE_EFG_DATA) + env = rl_environment.Environment(game=game) + + agent = tabular_qlearner.QLearner(0, game.num_distinct_actions()) + total_reward = 0 + + for _ in range(100): + total_eval_reward = 0 + for _ in range(1000): + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step) + time_step = env.step([agent_output.action]) + total_reward += time_step.rewards[0] + agent.step(time_step) + self.assertGreaterEqual(total_reward, 75) + for _ in range(1000): + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step, is_evaluation=True) + time_step = env.step([agent_output.action]) + total_eval_reward += time_step.rewards[0] + self.assertGreaterEqual(total_eval_reward, 250) + + @absltest.skip("Example when using different info state string.") + def test_blackjack(self): + game = pyspiel.load_game("blackjack") + env = rl_environment.Environment(game=game, include_full_state=True) + + agent = tabular_qlearner.QLearner( + 0, + game.num_distinct_actions(), + info_state_to_string_override=blackjack_info_state_to_string, + epsilon_schedule=rl_tools.ConstantSchedule(0.1), + ) + + train_return_swa = stats.SlidingWindowAccumulator(100000) + eval_return_swa = stats.SlidingWindowAccumulator(100000) + + for i in range(1000): + for _ in range(1000): + episode_return = 0 + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step) + time_step = env.step([agent_output.action]) + episode_return += time_step.rewards[0] + agent.step(time_step) + train_return_swa.add(episode_return) + for _ in range(1000): + episode_return = 0 + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step, is_evaluation=True) + time_step = env.step([agent_output.action]) + episode_return += time_step.rewards[0] + eval_return_swa.add(episode_return) + print( + f"epoch {i}, avg train return: {train_return_swa.mean()}, " + + f"avg eval reward: {eval_return_swa.mean()}" + ) + + +if __name__ == "__main__": + np.random.seed(SEED) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/README.md b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/README.md new file mode 100644 index 0000000..ed54069 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/README.md @@ -0,0 +1,3 @@ +This directory contains old implementations of old algorithms based on +Tensorflow V1. As of OpenSpiel 1.6, they are no longer maintained, but still +provided here for reference. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/alpha_zero_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/alpha_zero_example.py new file mode 100644 index 0000000..2a1b476 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/alpha_zero_example.py @@ -0,0 +1,98 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Starting point for playing with the AlphaZero algorithm.""" + +from absl import app +from absl import flags + +from open_spiel.python.algorithms.alpha_zero import alpha_zero +from open_spiel.python.algorithms.alpha_zero import model as model_lib +from open_spiel.python.utils import spawn + +flags.DEFINE_string("game", "connect_four", "Name of the game.") +flags.DEFINE_integer("uct_c", 2, "UCT's exploration constant.") +flags.DEFINE_integer("max_simulations", 300, "How many simulations to run.") +flags.DEFINE_integer("train_batch_size", 2 ** 10, "Batch size for learning.") +flags.DEFINE_integer("replay_buffer_size", 2 ** 16, + "How many states to store in the replay buffer.") +flags.DEFINE_integer("replay_buffer_reuse", 3, + "How many times to learn from each state.") +flags.DEFINE_float("learning_rate", 0.001, "Learning rate.") +flags.DEFINE_float("weight_decay", 0.0001, "L2 regularization strength.") +flags.DEFINE_float("policy_epsilon", 0.25, "What noise epsilon to use.") +flags.DEFINE_float("policy_alpha", 1, "What dirichlet noise alpha to use.") +flags.DEFINE_float("temperature", 1, + "Temperature for final move selection.") +flags.DEFINE_integer("temperature_drop", 10, # Less than AZ due to short games. + "Drop the temperature to 0 after this many moves.") +flags.DEFINE_enum("nn_model", "resnet", model_lib.Model.valid_model_types, + "What type of model should be used?.") +flags.DEFINE_integer("nn_width", 2 ** 7, "How wide should the network be.") +flags.DEFINE_integer("nn_depth", 10, "How deep should the network be.") +flags.DEFINE_string("path", None, "Where to save checkpoints.") +flags.DEFINE_integer("checkpoint_freq", 100, "Save a checkpoint every N steps.") +flags.DEFINE_integer("actors", 2, "How many actors to run.") +flags.DEFINE_integer("evaluators", 1, "How many evaluators to run.") +flags.DEFINE_integer("evaluation_window", 100, + "How many games to average results over.") +flags.DEFINE_integer( + "eval_levels", 7, + ("Play evaluation games vs MCTS+Solver, with max_simulations*10^(n/2)" + " simulations for n in range(eval_levels). Default of 7 means " + "running mcts with up to 1000 times more simulations.")) +flags.DEFINE_integer("max_steps", 0, "How many learn steps before exiting.") +flags.DEFINE_bool("quiet", True, "Don't show the moves as they're played.") +flags.DEFINE_bool("verbose", False, "Show the MCTS stats of possible moves.") + +FLAGS = flags.FLAGS + + +def main(unused_argv): + config = alpha_zero.Config( + game=FLAGS.game, + path=FLAGS.path, + learning_rate=FLAGS.learning_rate, + weight_decay=FLAGS.weight_decay, + train_batch_size=FLAGS.train_batch_size, + replay_buffer_size=FLAGS.replay_buffer_size, + replay_buffer_reuse=FLAGS.replay_buffer_reuse, + max_steps=FLAGS.max_steps, + checkpoint_freq=FLAGS.checkpoint_freq, + + actors=FLAGS.actors, + evaluators=FLAGS.evaluators, + uct_c=FLAGS.uct_c, + max_simulations=FLAGS.max_simulations, + policy_alpha=FLAGS.policy_alpha, + policy_epsilon=FLAGS.policy_epsilon, + temperature=FLAGS.temperature, + temperature_drop=FLAGS.temperature_drop, + evaluation_window=FLAGS.evaluation_window, + eval_levels=FLAGS.eval_levels, + + nn_model=FLAGS.nn_model, + nn_width=FLAGS.nn_width, + nn_depth=FLAGS.nn_depth, + observation_shape=None, + output_size=None, + + quiet=FLAGS.quiet, + ) + alpha_zero.alpha_zero(config) + + +if __name__ == "__main__": + with spawn.main_handler(): + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/deep_cfr.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/deep_cfr.py new file mode 100644 index 0000000..b2b03fa --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/deep_cfr.py @@ -0,0 +1,444 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Implements Deep CFR Algorithm. + +See https://arxiv.org/abs/1811.00164. + +The algorithm defines an `advantage` and `strategy` networks that compute +advantages used to do regret matching across information sets and to approximate +the strategy profiles of the game. To train these networks a reservoir buffer +(other data structures may be used) memory is used to accumulate samples to +train the networks. +""" + +import collections +import random +import numpy as np +import tensorflow.compat.v1 as tf + +from open_spiel.python import policy +from open_spiel.python import simple_nets +import pyspiel + +# Temporarily Disable TF2 behavior until we update the code. +tf.disable_v2_behavior() + +AdvantageMemory = collections.namedtuple( + "AdvantageMemory", "info_state iteration advantage action") + +StrategyMemory = collections.namedtuple( + "StrategyMemory", "info_state iteration strategy_action_probs") + + +# TODO(author3) Refactor into data structures lib. +class ReservoirBuffer(object): + """Allows uniform sampling over a stream of data. + + This class supports the storage of arbitrary elements, such as observation + tensors, integer actions, etc. + + See https://en.wikipedia.org/wiki/Reservoir_sampling for more details. + """ + + def __init__(self, reservoir_buffer_capacity): + self._reservoir_buffer_capacity = reservoir_buffer_capacity + self._data = [] + self._add_calls = 0 + + def add(self, element): + """Potentially adds `element` to the reservoir buffer. + + Args: + element: data to be added to the reservoir buffer. + """ + if len(self._data) < self._reservoir_buffer_capacity: + self._data.append(element) + else: + idx = np.random.randint(0, self._add_calls + 1) + if idx < self._reservoir_buffer_capacity: + self._data[idx] = element + self._add_calls += 1 + + def sample(self, num_samples): + """Returns `num_samples` uniformly sampled from the buffer. + + Args: + num_samples: `int`, number of samples to draw. + + Returns: + An iterable over `num_samples` random elements of the buffer. + + Raises: + ValueError: If there are less than `num_samples` elements in the buffer + """ + if len(self._data) < num_samples: + raise ValueError("{} elements could not be sampled from size {}".format( + num_samples, len(self._data))) + return random.sample(self._data, num_samples) + + def clear(self): + self._data = [] + self._add_calls = 0 + + def __len__(self): + return len(self._data) + + def __iter__(self): + return iter(self._data) + + +class DeepCFRSolver(policy.Policy): + """Implements a solver for the Deep CFR Algorithm. + + See https://arxiv.org/abs/1811.00164. + + Define all networks and sampling buffers/memories. Derive losses & learning + steps. Initialize the game state and algorithmic variables. + + Note: batch sizes default to `None` implying that training over the full + dataset in memory is done by default. To sample from the memories you + may set these values to something less than the full capacity of the + memory. + """ + + def __init__(self, + session, + game, + policy_network_layers=(256, 256), + advantage_network_layers=(128, 128), + num_iterations: int = 100, + num_traversals: int = 20, + learning_rate: float = 1e-4, + batch_size_advantage=None, + batch_size_strategy=None, + memory_capacity: int = int(1e6), + policy_network_train_steps: int = 1, + advantage_network_train_steps: int = 1, + reinitialize_advantage_networks: bool = True): + """Initialize the Deep CFR algorithm. + + Args: + session: (tf.Session) TensorFlow session. + game: Open Spiel game. + policy_network_layers: (list[int]) Layer sizes of strategy net MLP. + advantage_network_layers: (list[int]) Layer sizes of advantage net MLP. + num_iterations: Number of iterations. + num_traversals: Number of traversals per iteration. + learning_rate: Learning rate. + batch_size_advantage: (int or None) Batch size to sample from advantage + memories. + batch_size_strategy: (int or None) Batch size to sample from strategy + memories. + memory_capacity: Number of samples that can be stored in memory. + policy_network_train_steps: Number of policy network training steps (per + iteration). + advantage_network_train_steps: Number of advantage network training steps + (per iteration). + reinitialize_advantage_networks: Whether to re-initialize the + advantage network before training on each iteration. + """ + all_players = list(range(game.num_players())) + super(DeepCFRSolver, self).__init__(game, all_players) + self._game = game + if game.get_type().dynamics == pyspiel.GameType.Dynamics.SIMULTANEOUS: + # `_traverse_game_tree` does not take into account this option. + raise ValueError("Simulatenous games are not supported.") + self._session = session + self._batch_size_advantage = batch_size_advantage + self._batch_size_strategy = batch_size_strategy + self._policy_network_train_steps = policy_network_train_steps + self._advantage_network_train_steps = advantage_network_train_steps + self._num_players = game.num_players() + self._root_node = self._game.new_initial_state() + # TODO(author6) Allow embedding size (and network) to be specified. + self._embedding_size = len(self._root_node.information_state_tensor(0)) + self._num_iterations = num_iterations + self._num_traversals = num_traversals + self._reinitialize_advantage_networks = reinitialize_advantage_networks + self._num_actions = game.num_distinct_actions() + self._iteration = 1 + self._environment_steps = 0 + + # Create required TensorFlow placeholders to perform the Q-network updates. + self._info_state_ph = tf.placeholder( + shape=[None, self._embedding_size], + dtype=tf.float32, + name="info_state_ph") + self._info_state_action_ph = tf.placeholder( + shape=[None, self._embedding_size + 1], + dtype=tf.float32, + name="info_state_action_ph") + self._action_probs_ph = tf.placeholder( + shape=[None, self._num_actions], + dtype=tf.float32, + name="action_probs_ph") + self._iter_ph = tf.placeholder( + shape=[None, 1], dtype=tf.float32, name="iter_ph") + self._advantage_ph = [] + for p in range(self._num_players): + self._advantage_ph.append( + tf.placeholder( + shape=[None, self._num_actions], + dtype=tf.float32, + name="advantage_ph_" + str(p))) + + # Define strategy network, loss & memory. + self._strategy_memories = ReservoirBuffer(memory_capacity) + self._policy_network = simple_nets.MLP(self._embedding_size, + list(policy_network_layers), + self._num_actions) + action_logits = self._policy_network(self._info_state_ph) + # Illegal actions are handled in the traversal code where expected payoff + # and sampled regret is computed from the advantage networks. + self._action_probs = tf.nn.softmax(action_logits) + self._loss_policy = tf.reduce_mean( + tf.losses.mean_squared_error( + labels=tf.math.sqrt(self._iter_ph) * self._action_probs_ph, + predictions=tf.math.sqrt(self._iter_ph) * self._action_probs)) + self._optimizer_policy = tf.train.AdamOptimizer(learning_rate=learning_rate) + self._learn_step_policy = self._optimizer_policy.minimize(self._loss_policy) + + # Define advantage network, loss & memory. (One per player) + self._advantage_memories = [ + ReservoirBuffer(memory_capacity) for _ in range(self._num_players) + ] + self._advantage_networks = [ + simple_nets.MLP(self._embedding_size, list(advantage_network_layers), + self._num_actions) for _ in range(self._num_players) + ] + self._advantage_outputs = [ + self._advantage_networks[i](self._info_state_ph) + for i in range(self._num_players) + ] + self._loss_advantages = [] + self._optimizer_advantages = [] + self._learn_step_advantages = [] + for p in range(self._num_players): + self._loss_advantages.append( + tf.reduce_mean( + tf.losses.mean_squared_error( + labels=tf.math.sqrt(self._iter_ph) * self._advantage_ph[p], + predictions=tf.math.sqrt(self._iter_ph) * + self._advantage_outputs[p]))) + self._optimizer_advantages.append( + tf.train.AdamOptimizer(learning_rate=learning_rate)) + self._learn_step_advantages.append(self._optimizer_advantages[p].minimize( + self._loss_advantages[p])) + + @property + def advantage_buffers(self): + return self._advantage_memories + + @property + def strategy_buffer(self): + return self._strategy_memories + + def clear_advantage_buffers(self): + for p in range(self._num_players): + self._advantage_memories[p].clear() + + def reinitialize_advantage_networks(self): + for p in range(self._num_players): + self.reinitialize_advantage_network(p) + + def reinitialize_advantage_network(self, player): + self._session.run( + tf.group(*[ + var.initializer + for var in self._advantage_networks[player].variables + ])) + + def solve(self): + """Solution logic for Deep CFR.""" + advantage_losses = collections.defaultdict(list) + for _ in range(self._num_iterations): + for p in range(self._num_players): + for _ in range(self._num_traversals): + self._traverse_game_tree(self._root_node, p) + if self._reinitialize_advantage_networks: + # Re-initialize advantage network for player and train from scratch. + self.reinitialize_advantage_network(p) + advantage_losses[p].append(self._learn_advantage_network(p)) + self._iteration += 1 + # Train policy network. + policy_loss = self._learn_strategy_network() + return self._policy_network, advantage_losses, policy_loss + + def get_environment_steps(self): + return self._environment_steps + + def _traverse_game_tree(self, state, player): + """Performs a traversal of the game tree. + + Over a traversal the advantage and strategy memories are populated with + computed advantage values and matched regrets respectively. + Args: + state: Current OpenSpiel game state. + player: (int) Player index for this traversal. + Returns: + Recursively returns expected payoffs for each action. + """ + self._environment_steps += 1 + expected_payoff = collections.defaultdict(float) + if state.is_terminal(): + # Terminal state get returns. + return state.returns()[player] + elif state.is_chance_node(): + # If this is a chance node, sample an action + chance_outcome, chance_proba = zip(*state.chance_outcomes()) + action = np.random.choice(chance_outcome, p=chance_proba) + return self._traverse_game_tree(state.child(action), player) + elif state.current_player() == player: + sampled_regret = collections.defaultdict(float) + # Update the policy over the info set & actions via regret matching. + _, strategy = self._sample_action_from_advantage(state, player) + for action in state.legal_actions(): + expected_payoff[action] = self._traverse_game_tree( + state.child(action), player) + cfv = 0 + for a_ in state.legal_actions(): + cfv += strategy[a_] * expected_payoff[a_] + for action in state.legal_actions(): + sampled_regret[action] = expected_payoff[action] + sampled_regret[action] -= cfv + sampled_regret_arr = [0] * self._num_actions + for action in sampled_regret: + sampled_regret_arr[action] = sampled_regret[action] + self._advantage_memories[player].add( + AdvantageMemory(state.information_state_tensor(), self._iteration, + sampled_regret_arr, action)) + return cfv + else: + other_player = state.current_player() + _, strategy = self._sample_action_from_advantage(state, other_player) + # Recompute distribution dor numerical errors. + probs = np.array(strategy) + probs /= probs.sum() + sampled_action = np.random.choice(range(self._num_actions), p=probs) + self._strategy_memories.add( + StrategyMemory( + state.information_state_tensor(other_player), self._iteration, + strategy)) + return self._traverse_game_tree(state.child(sampled_action), player) + + def _sample_action_from_advantage(self, state, player): + """Returns an info state policy by applying regret-matching. + + Args: + state: Current OpenSpiel game state. + player: (int) Player index over which to compute regrets. + Returns: + 1. (list) Advantage values for info state actions indexed by action. + 2. (list) Matched regrets, prob for actions indexed by action. + """ + info_state = state.information_state_tensor(player) + legal_actions = state.legal_actions(player) + advantages_full = self._session.run( + self._advantage_outputs[player], + feed_dict={self._info_state_ph: np.expand_dims(info_state, axis=0)})[0] + advantages = [max(0., advantage) for advantage in advantages_full] + cumulative_regret = np.sum([advantages[action] for action in legal_actions]) + matched_regrets = np.array([0.] * self._num_actions) + + if cumulative_regret > 0.: + for action in legal_actions: + matched_regrets[action] = advantages[action] / cumulative_regret + else: + matched_regrets[max(legal_actions, key=lambda a: advantages_full[a])] = 1 + + return advantages, matched_regrets + + def action_probabilities(self, state, player_id=None): + """Returns action probabilities dict for a single batch.""" + del player_id # unused + cur_player = state.current_player() + legal_actions = state.legal_actions(cur_player) + info_state_vector = np.array(state.information_state_tensor()) + if len(info_state_vector.shape) == 1: + info_state_vector = np.expand_dims(info_state_vector, axis=0) + probs = self._session.run( + self._action_probs, feed_dict={self._info_state_ph: info_state_vector}) + return {action: probs[0][action] for action in legal_actions} + + def _learn_advantage_network(self, player): + """Compute the loss on sampled transitions and perform a Q-network update. + + If there are not enough elements in the buffer, no loss is computed and + `None` is returned instead. + + Args: + player: (int) player index. + Returns: + The average loss over the advantage network. + """ + for _ in range(self._advantage_network_train_steps): + if self._batch_size_advantage: + if self._batch_size_advantage > len(self._advantage_memories[player]): + ## Skip if there aren't enough samples + return None + samples = self._advantage_memories[player].sample( + self._batch_size_advantage) + else: + samples = self._advantage_memories[player] + info_states = [] + advantages = [] + iterations = [] + for s in samples: + info_states.append(s.info_state) + advantages.append(s.advantage) + iterations.append([s.iteration]) + # Ensure some samples have been gathered. + if not info_states: + return None + + loss_advantages, _ = self._session.run( + [self._loss_advantages[player], self._learn_step_advantages[player]], + feed_dict={ + self._info_state_ph: np.array(info_states), + self._advantage_ph[player]: np.array(advantages), + self._iter_ph: np.array(iterations), + }) + return loss_advantages + + def _learn_strategy_network(self): + """Compute the loss over the strategy network. + + Returns: + The average loss obtained on this batch of transitions or `None`. + """ + for _ in range(self._policy_network_train_steps): + if self._batch_size_strategy: + if self._batch_size_strategy > len(self._strategy_memories): + ## Skip if there aren't enough samples + return None + samples = self._strategy_memories.sample(self._batch_size_strategy) + else: + samples = self._strategy_memories + info_states = [] + action_probs = [] + iterations = [] + for s in samples: + info_states.append(s.info_state) + action_probs.append(s.strategy_action_probs) + iterations.append([s.iteration]) + + loss_strategy, _ = self._session.run( + [self._loss_policy, self._learn_step_policy], + feed_dict={ + self._info_state_ph: np.array(info_states), + self._action_probs_ph: np.array(np.squeeze(action_probs)), + self._iter_ph: np.array(iterations), + }) + return loss_strategy diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/deep_cfr_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/deep_cfr_example.py new file mode 100644 index 0000000..57816af --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/deep_cfr_example.py @@ -0,0 +1,83 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python Deep CFR example.""" + +from absl import app +from absl import flags +from absl import logging + +import tensorflow.compat.v1 as tf + +from open_spiel.python import policy +from open_spiel.python.algorithms import deep_cfr +from open_spiel.python.algorithms import expected_game_score +from open_spiel.python.algorithms import exploitability +import pyspiel + +# Temporarily disable TF2 behavior until we update the code. +tf.disable_v2_behavior() + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("num_iterations", 400, "Number of iterations") +flags.DEFINE_integer("num_traversals", 40, "Number of traversals/games") +flags.DEFINE_string("game_name", "kuhn_poker", "Name of the game") + + +def main(unused_argv): + logging.info("Loading %s", FLAGS.game_name) + game = pyspiel.load_game(FLAGS.game_name) + with tf.Session() as sess: + deep_cfr_solver = deep_cfr.DeepCFRSolver( + sess, + game, + policy_network_layers=(16,), + advantage_network_layers=(16,), + num_iterations=FLAGS.num_iterations, + num_traversals=FLAGS.num_traversals, + learning_rate=1e-3, + batch_size_advantage=128, + batch_size_strategy=1024, + memory_capacity=1e7, + policy_network_train_steps=400, + advantage_network_train_steps=20, + reinitialize_advantage_networks=False) + sess.run(tf.global_variables_initializer()) + _, advantage_losses, policy_loss = deep_cfr_solver.solve() + for player, losses in advantage_losses.items(): + logging.info("Advantage for player %d: %s", player, + losses[:2] + ["..."] + losses[-2:]) + logging.info("Advantage Buffer Size for player %s: '%s'", player, + len(deep_cfr_solver.advantage_buffers[player])) + logging.info("Strategy Buffer Size: '%s'", + len(deep_cfr_solver.strategy_buffer)) + logging.info("Final policy loss: '%s'", policy_loss) + + average_policy = policy.tabular_policy_from_callable( + game, deep_cfr_solver.action_probabilities) + + conv = exploitability.nash_conv(game, average_policy) + logging.info("Deep CFR in '%s' - NashConv: %s", FLAGS.game_name, conv) + + average_policy_values = expected_game_score.policy_value( + game.new_initial_state(), [average_policy] * 2) + print("Computed player 0 value: {}".format(average_policy_values[0])) + print("Expected player 0 value: {}".format(-1 / 18)) + print("Computed player 1 value: {}".format(average_policy_values[1])) + print("Expected player 1 value: {}".format(1 / 18)) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/deep_cfr_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/deep_cfr_test.py new file mode 100644 index 0000000..c0a39fb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/deep_cfr_test.py @@ -0,0 +1,73 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import parameterized +import tensorflow.compat.v1 as tf + +from open_spiel.python import policy +from open_spiel.python.algorithms import deep_cfr +from open_spiel.python.algorithms import exploitability +import pyspiel + +# Temporarily disable TF2 behavior until we update the code. +tf.disable_v2_behavior() + + +class DeepCFRTest(parameterized.TestCase): + + @parameterized.parameters('leduc_poker', 'kuhn_poker', 'liars_dice') + def test_deep_cfr_runs(self, game_name): + game = pyspiel.load_game(game_name) + with tf.Session() as sess: + deep_cfr_solver = deep_cfr.DeepCFRSolver( + sess, + game, + policy_network_layers=(8, 4), + advantage_network_layers=(4, 2), + num_iterations=2, + num_traversals=2, + learning_rate=1e-3, + batch_size_advantage=None, + batch_size_strategy=None, + memory_capacity=1e7) + sess.run(tf.global_variables_initializer()) + deep_cfr_solver.solve() + + def test_matching_pennies_3p(self): + # We don't expect Deep CFR to necessarily converge on 3-player games but + # it's nonetheless interesting to see this result. + game = pyspiel.load_game_as_turn_based('matching_pennies_3p') + with tf.Session() as sess: + deep_cfr_solver = deep_cfr.DeepCFRSolver( + sess, + game, + policy_network_layers=(16, 8), + advantage_network_layers=(32, 16), + num_iterations=2, + num_traversals=2, + learning_rate=1e-3, + batch_size_advantage=None, + batch_size_strategy=None, + memory_capacity=1e7) + sess.run(tf.global_variables_initializer()) + deep_cfr_solver.solve() + conv = exploitability.nash_conv( + game, + policy.tabular_policy_from_callable( + game, deep_cfr_solver.action_probabilities)) + print('Deep CFR in Matching Pennies 3p. NashConv: {}'.format(conv)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/deep_cfr_tf2.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/deep_cfr_tf2.py new file mode 100644 index 0000000..5ea4670 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/deep_cfr_tf2.py @@ -0,0 +1,742 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Implements Deep CFR Algorithm. + +See https://arxiv.org/abs/1811.00164. + +The algorithm defines an `advantage` and `strategy` networks that compute +advantages used to do regret matching across information sets and to approximate +the strategy profiles of the game. To train these networks a reservoir buffer +(other data structures may be used) memory is used to accumulate samples to +train the networks. + +This implementation uses skip connections as described in the paper if two +consecutive layers of the advantage or policy network have the same number +of units, except for the last connection. Before the last hidden layer +a layer normalization is applied. +""" + +import collections +import contextlib +import os +import random +import warnings + +import numpy as np +import tensorflow as tf + +from open_spiel.python import policy +import pyspiel + + +warnings.warn( + 'Deep CFR TF2 has known issues when using Keras 3 and may be removed ' + 'in a future version unless fixed. See OpenSpiel github issue #1208 ' + 'for details.' +) + + +# The size of the shuffle buffer used to reshuffle part of the data each +# epoch within one training iteration +ADVANTAGE_TRAIN_SHUFFLE_SIZE = 100000 +STRATEGY_TRAIN_SHUFFLE_SIZE = 1000000 + + +# TODO(author3) Refactor into data structures lib. +class ReservoirBuffer(object): + """Allows uniform sampling over a stream of data. + + This class supports the storage of arbitrary elements, such as observation + tensors, integer actions, etc. + + See https://en.wikipedia.org/wiki/Reservoir_sampling for more details. + """ + + def __init__(self, reservoir_buffer_capacity): + self._reservoir_buffer_capacity = reservoir_buffer_capacity + self._data = [] + self._add_calls = 0 + + def add(self, element): + """Potentially adds `element` to the reservoir buffer. + + Args: + element: data to be added to the reservoir buffer. + """ + if len(self._data) < self._reservoir_buffer_capacity: + self._data.append(element) + else: + idx = np.random.randint(0, self._add_calls + 1) + if idx < self._reservoir_buffer_capacity: + self._data[idx] = element + self._add_calls += 1 + + def sample(self, num_samples): + """Returns `num_samples` uniformly sampled from the buffer. + + Args: + num_samples: `int`, number of samples to draw. + + Returns: + An iterable over `num_samples` random elements of the buffer. + + Raises: + ValueError: If there are less than `num_samples` elements in the buffer + """ + if len(self._data) < num_samples: + raise ValueError('{} elements could not be sampled from size {}'.format( + num_samples, len(self._data))) + return random.sample(self._data, num_samples) + + def clear(self): + self._data = [] + self._add_calls = 0 + + def __len__(self): + return len(self._data) + + def __iter__(self): + return iter(self._data) + + @property + def data(self): + return self._data + + def shuffle_data(self): + random.shuffle(self._data) + + +class SkipDense(tf.keras.layers.Layer): + """Dense Layer with skip connection.""" + + def __init__(self, units, **kwargs): + super().__init__(**kwargs) + self.hidden = tf.keras.layers.Dense(units, kernel_initializer='he_normal') + + def call(self, x): + return self.hidden(x) + x + + +class PolicyNetwork(tf.keras.Model): + """Implements the policy network as an MLP. + + Implements the policy network as a MLP with skip connections in adjacent + layers with the same number of units, except for the last hidden connection + where a layer normalization is applied. + """ + + def __init__(self, + input_size, + policy_network_layers, + num_actions, + activation='leakyrelu', + **kwargs): + super().__init__(**kwargs) + self._input_size = input_size + self._num_actions = num_actions + if activation == 'leakyrelu': + self.activation = tf.keras.layers.LeakyReLU(alpha=0.2) + elif activation == 'relu': + self.activation = tf.keras.layers.ReLU() + else: + self.activation = activation + + self.softmax = tf.keras.layers.Softmax() + + self.hidden = [] + prevunits = 0 + for units in policy_network_layers[:-1]: + if prevunits == units: + self.hidden.append(SkipDense(units)) + else: + self.hidden.append( + tf.keras.layers.Dense(units, kernel_initializer='he_normal')) + prevunits = units + self.normalization = tf.keras.layers.LayerNormalization() + self.lastlayer = tf.keras.layers.Dense( + policy_network_layers[-1], kernel_initializer='he_normal') + + self.out_layer = tf.keras.layers.Dense(num_actions) + + @tf.function + def call(self, inputs): + """Applies Policy Network. + + Args: + inputs: Tuple representing (info_state, legal_action_mask) + + Returns: + Action probabilities + """ + x, mask = inputs + for layer in self.hidden: + x = layer(x) + x = self.activation(x) + + x = self.normalization(x) + x = self.lastlayer(x) + x = self.activation(x) + x = self.out_layer(x) + x = tf.where(mask == 1, x, -10e20) + x = self.softmax(x) + return x + + +class AdvantageNetwork(tf.keras.Model): + """Implements the advantage network as an MLP. + + Implements the advantage network as an MLP with skip connections in + adjacent layers with the same number of units, except for the last hidden + connection where a layer normalization is applied. + """ + + def __init__(self, + input_size, + adv_network_layers, + num_actions, + activation='leakyrelu', + **kwargs): + super().__init__(**kwargs) + self._input_size = input_size + self._num_actions = num_actions + if activation == 'leakyrelu': + self.activation = tf.keras.layers.LeakyReLU(alpha=0.2) + elif activation == 'relu': + self.activation = tf.keras.layers.ReLU() + else: + self.activation = activation + + self.hidden = [] + prevunits = 0 + for units in adv_network_layers[:-1]: + if prevunits == units: + self.hidden.append(SkipDense(units)) + else: + self.hidden.append( + tf.keras.layers.Dense(units, kernel_initializer='he_normal')) + prevunits = units + self.normalization = tf.keras.layers.LayerNormalization() + self.lastlayer = tf.keras.layers.Dense( + adv_network_layers[-1], kernel_initializer='he_normal') + + self.out_layer = tf.keras.layers.Dense(num_actions) + + @tf.function + def call(self, inputs): + """Applies Policy Network. + + Args: + inputs: Tuple representing (info_state, legal_action_mask) + + Returns: + Cumulative regret for each info_state action + """ + x, mask = inputs + for layer in self.hidden: + x = layer(x) + x = self.activation(x) + + x = self.normalization(x) + x = self.lastlayer(x) + x = self.activation(x) + x = self.out_layer(x) + x = mask * x + + return x + + +class DeepCFRSolver(policy.Policy): + """Implements a solver for the Deep CFR Algorithm. + + See https://arxiv.org/abs/1811.00164. + + Define all networks and sampling buffers/memories. Derive losses & learning + steps. Initialize the game state and algorithmic variables. + """ + + def __init__(self, + game, + policy_network_layers=(256, 256), + advantage_network_layers=(128, 128), + num_iterations: int = 100, + num_traversals: int = 100, + learning_rate: float = 1e-3, + batch_size_advantage: int = 2048, + batch_size_strategy: int = 2048, + memory_capacity: int = int(1e6), + policy_network_train_steps: int = 5000, + advantage_network_train_steps: int = 750, + reinitialize_advantage_networks: bool = True, + save_advantage_networks: str = None, + save_strategy_memories: str = None, + infer_device='cpu', + train_device='cpu'): + """Initialize the Deep CFR algorithm. + + Args: + game: Open Spiel game. + policy_network_layers: (list[int]) Layer sizes of strategy net MLP. + advantage_network_layers: (list[int]) Layer sizes of advantage net MLP. + num_iterations: Number of iterations. + num_traversals: Number of traversals per iteration. + learning_rate: Learning rate. + batch_size_advantage: (int) Batch size to sample from advantage memories. + batch_size_strategy: (int) Batch size to sample from strategy memories. + memory_capacity: Number of samples that can be stored in memory. + policy_network_train_steps: Number of policy network training steps (one + policy training iteration at the end). + advantage_network_train_steps: Number of advantage network training steps + (per iteration). + reinitialize_advantage_networks: Whether to re-initialize the advantage + network before training on each iteration. + save_advantage_networks: If provided, all advantage network itearations + are saved in the given folder. This can be useful to implement SD-CFR + https://arxiv.org/abs/1901.07621 + save_strategy_memories: saves the collected strategy memories as a + tfrecords file in the given location. This is not affected by + memory_capacity. All memories are saved to disk and not kept in memory + infer_device: device used for TF-operations in the traversal branch. + Format is anything accepted by tf.device + train_device: device used for TF-operations in the NN training steps. + Format is anything accepted by tf.device + """ + all_players = list(range(game.num_players())) + super(DeepCFRSolver, self).__init__(game, all_players) + self._game = game + if game.get_type().dynamics == pyspiel.GameType.Dynamics.SIMULTANEOUS: + # `_traverse_game_tree` does not take into account this option. + raise ValueError('Simulatenous games are not supported.') + self._batch_size_advantage = batch_size_advantage + self._batch_size_strategy = batch_size_strategy + self._policy_network_train_steps = policy_network_train_steps + self._advantage_network_train_steps = advantage_network_train_steps + self._policy_network_layers = policy_network_layers + self._advantage_network_layers = advantage_network_layers + self._num_players = game.num_players() + self._root_node = self._game.new_initial_state() + self._embedding_size = len(self._root_node.information_state_tensor(0)) + self._num_iterations = num_iterations + self._num_traversals = num_traversals + self._reinitialize_advantage_networks = reinitialize_advantage_networks + self._num_actions = game.num_distinct_actions() + self._iteration = 1 + self._learning_rate = learning_rate + self._save_advantage_networks = save_advantage_networks + self._save_strategy_memories = save_strategy_memories + self._infer_device = infer_device + self._train_device = train_device + self._memories_tfrecordpath = None + self._memories_tfrecordfile = None + + # Initialize file save locations + if self._save_advantage_networks: + os.makedirs(self._save_advantage_networks, exist_ok=True) + + if self._save_strategy_memories: + if os.path.isdir(self._save_strategy_memories): + self._memories_tfrecordpath = os.path.join( + self._save_strategy_memories, 'strategy_memories.tfrecord') + else: + os.makedirs( + os.path.split(self._save_strategy_memories)[0], exist_ok=True) + self._memories_tfrecordpath = self._save_strategy_memories + + # Initialize policy network, loss, optmizer + self._reinitialize_policy_network() + + # Initialize advantage networks, losses, optmizers + self._adv_networks = [] + self._adv_networks_train = [] + self._loss_advantages = [] + self._optimizer_advantages = [] + self._advantage_train_step = [] + for player in range(self._num_players): + self._adv_networks.append( + AdvantageNetwork(self._embedding_size, self._advantage_network_layers, + self._num_actions)) + with tf.device(self._train_device): + self._adv_networks_train.append( + AdvantageNetwork(self._embedding_size, + self._advantage_network_layers, self._num_actions)) + self._loss_advantages.append(tf.keras.losses.MeanSquaredError()) + self._optimizer_advantages.append( + tf.keras.optimizers.Adam(learning_rate=learning_rate)) + self._advantage_train_step.append( + self._get_advantage_train_graph(player)) + + self._create_memories(memory_capacity) + + def _reinitialize_policy_network(self): + """Reinitalize policy network and optimizer for training.""" + with tf.device(self._train_device): + self._policy_network = PolicyNetwork(self._embedding_size, + self._policy_network_layers, + self._num_actions) + self._optimizer_policy = tf.keras.optimizers.Adam( + learning_rate=self._learning_rate) + self._loss_policy = tf.keras.losses.MeanSquaredError() + + def _reinitialize_advantage_network(self, player): + """Reinitalize player's advantage network and optimizer for training.""" + with tf.device(self._train_device): + self._adv_networks_train[player] = AdvantageNetwork( + self._embedding_size, self._advantage_network_layers, + self._num_actions) + self._optimizer_advantages[player] = tf.keras.optimizers.Adam( + learning_rate=self._learning_rate) + self._advantage_train_step[player] = ( + self._get_advantage_train_graph(player)) + + @property + def advantage_buffers(self): + return self._advantage_memories + + @property + def strategy_buffer(self): + return self._strategy_memories + + def clear_advantage_buffers(self): + for p in range(self._num_players): + self._advantage_memories[p].clear() + + def _create_memories(self, memory_capacity): + """Create memory buffers and associated feature descriptions.""" + self._strategy_memories = ReservoirBuffer(memory_capacity) + self._advantage_memories = [ + ReservoirBuffer(memory_capacity) for _ in range(self._num_players) + ] + self._strategy_feature_description = { + 'info_state': tf.io.FixedLenFeature([self._embedding_size], tf.float32), + 'action_probs': tf.io.FixedLenFeature([self._num_actions], tf.float32), + 'iteration': tf.io.FixedLenFeature([1], tf.float32), + 'legal_actions': tf.io.FixedLenFeature([self._num_actions], tf.float32) + } + self._advantage_feature_description = { + 'info_state': tf.io.FixedLenFeature([self._embedding_size], tf.float32), + 'iteration': tf.io.FixedLenFeature([1], tf.float32), + 'samp_regret': tf.io.FixedLenFeature([self._num_actions], tf.float32), + 'legal_actions': tf.io.FixedLenFeature([self._num_actions], tf.float32) + } + + def solve(self): + """Solution logic for Deep CFR.""" + advantage_losses = collections.defaultdict(list) + with tf.device(self._infer_device): + with contextlib.ExitStack() as stack: + if self._save_strategy_memories: + self._memories_tfrecordfile = stack.enter_context( + tf.io.TFRecordWriter(self._memories_tfrecordpath)) + for _ in range(self._num_iterations): + for p in range(self._num_players): + for _ in range(self._num_traversals): + self._traverse_game_tree(self._root_node, p) + if self._reinitialize_advantage_networks: + # Re-initialize advantage network for p and train from scratch. + self._reinitialize_advantage_network(p) + advantage_losses[p].append(self._learn_advantage_network(p)) + if self._save_advantage_networks: + os.makedirs(self._save_advantage_networks, exist_ok=True) + self._adv_networks[p].save( + os.path.join(self._save_advantage_networks, + f'advnet_p{p}_it{self._iteration:04}')) + self._iteration += 1 + # Train policy network. + policy_loss = self._learn_strategy_network() + return self._policy_network, advantage_losses, policy_loss + + def save_policy_network(self, outputfolder): + """Saves the policy network to the given folder.""" + os.makedirs(outputfolder, exist_ok=True) + self._policy_network.save(outputfolder) + + def train_policy_network_from_file(self, + tfrecordpath, + iteration=None, + batch_size_strategy=None, + policy_network_train_steps=None, + reinitialize_policy_network=True): + """Trains the policy network from a previously stored tfrecords-file.""" + self._memories_tfrecordpath = tfrecordpath + if iteration: + self._iteration = iteration + if batch_size_strategy: + self._batch_size_strategy = batch_size_strategy + if policy_network_train_steps: + self._policy_network_train_steps = policy_network_train_steps + if reinitialize_policy_network: + self._reinitialize_policy_network() + policy_loss = self._learn_strategy_network() + return policy_loss + + def _add_to_strategy_memory(self, info_state, iteration, + strategy_action_probs, legal_actions_mask): + # pylint: disable=g-doc-args + """Adds the given strategy data to the memory. + + Uses either a tfrecordsfile on disk if provided, or a reservoir buffer. + """ + serialized_example = self._serialize_strategy_memory( + info_state, iteration, strategy_action_probs, legal_actions_mask) + if self._save_strategy_memories: + self._memories_tfrecordfile.write(serialized_example) + else: + self._strategy_memories.add(serialized_example) + + def _serialize_strategy_memory(self, info_state, iteration, + strategy_action_probs, legal_actions_mask): + """Create serialized example to store a strategy entry.""" + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'info_state': + tf.train.Feature( + float_list=tf.train.FloatList(value=info_state)), + 'action_probs': + tf.train.Feature( + float_list=tf.train.FloatList( + value=strategy_action_probs)), + 'iteration': + tf.train.Feature( + float_list=tf.train.FloatList(value=[iteration])), + 'legal_actions': + tf.train.Feature( + float_list=tf.train.FloatList(value=legal_actions_mask)) + })) + return example.SerializeToString() + + def _deserialize_strategy_memory(self, serialized): + """Deserializes a batch of strategy examples for the train step.""" + tups = tf.io.parse_example(serialized, self._strategy_feature_description) + return (tups['info_state'], tups['action_probs'], tups['iteration'], + tups['legal_actions']) + + def _serialize_advantage_memory(self, info_state, iteration, samp_regret, + legal_actions_mask): + """Create serialized example to store an advantage entry.""" + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'info_state': + tf.train.Feature( + float_list=tf.train.FloatList(value=info_state)), + 'iteration': + tf.train.Feature( + float_list=tf.train.FloatList(value=[iteration])), + 'samp_regret': + tf.train.Feature( + float_list=tf.train.FloatList(value=samp_regret)), + 'legal_actions': + tf.train.Feature( + float_list=tf.train.FloatList(value=legal_actions_mask)) + })) + return example.SerializeToString() + + def _deserialize_advantage_memory(self, serialized): + """Deserializes a batch of advantage examples for the train step.""" + tups = tf.io.parse_example(serialized, self._advantage_feature_description) + return (tups['info_state'], tups['samp_regret'], tups['iteration'], + tups['legal_actions']) + + def _traverse_game_tree(self, state, player): + """Performs a traversal of the game tree using external sampling. + + Over a traversal the advantage and strategy memories are populated with + computed advantage values and matched regrets respectively. + + Args: + state: Current OpenSpiel game state. + player: (int) Player index for this traversal. + + Returns: + Recursively returns expected payoffs for each action. + """ + if state.is_terminal(): + # Terminal state get returns. + return state.returns()[player] + elif state.is_chance_node(): + # If this is a chance node, sample an action + chance_outcome, chance_proba = zip(*state.chance_outcomes()) + action = np.random.choice(chance_outcome, p=chance_proba) + return self._traverse_game_tree(state.child(action), player) + elif state.current_player() == player: + # Update the policy over the info set & actions via regret matching. + _, strategy = self._sample_action_from_advantage(state, player) + exp_payoff = 0 * strategy + for action in state.legal_actions(): + exp_payoff[action] = self._traverse_game_tree( + state.child(action), player) + ev = np.sum(exp_payoff * strategy) + samp_regret = (exp_payoff - ev) * state.legal_actions_mask(player) + self._advantage_memories[player].add( + self._serialize_advantage_memory(state.information_state_tensor(), + self._iteration, samp_regret, + state.legal_actions_mask(player))) + return ev + else: + other_player = state.current_player() + _, strategy = self._sample_action_from_advantage(state, other_player) + # Recompute distribution for numerical errors. + probs = strategy + probs /= probs.sum() + sampled_action = np.random.choice(range(self._num_actions), p=probs) + self._add_to_strategy_memory( + state.information_state_tensor(other_player), self._iteration, + strategy, state.legal_actions_mask(other_player)) + return self._traverse_game_tree(state.child(sampled_action), player) + + @tf.function + def _get_matched_regrets(self, info_state, legal_actions_mask, player): + """TF-Graph to calculate regret matching.""" + advs = self._adv_networks[player]( + (tf.expand_dims(info_state, axis=0), legal_actions_mask), + training=False)[0] + advantages = tf.maximum(advs, 0) + summed_regret = tf.reduce_sum(advantages) + if summed_regret > 0: + matched_regrets = advantages / summed_regret + else: + matched_regrets = tf.one_hot( + tf.argmax(tf.where(legal_actions_mask == 1, advs, -10e20)), + self._num_actions) + return advantages, matched_regrets + + def _sample_action_from_advantage(self, state, player): + """Returns an info state policy by applying regret-matching. + + Args: + state: Current OpenSpiel game state. + player: (int) Player index over which to compute regrets. + + Returns: + 1. (np-array) Advantage values for info state actions indexed by action. + 2. (np-array) Matched regrets, prob for actions indexed by action. + """ + info_state = tf.constant( + state.information_state_tensor(player), dtype=tf.float32) + legal_actions_mask = tf.constant( + state.legal_actions_mask(player), dtype=tf.float32) + advantages, matched_regrets = self._get_matched_regrets( + info_state, legal_actions_mask, player) + return advantages.numpy(), matched_regrets.numpy() + + def action_probabilities(self, state, player_id=None): + """Returns action probabilities dict for a single batch.""" + del player_id # unused + cur_player = state.current_player() + legal_actions = state.legal_actions(cur_player) + legal_actions_mask = tf.constant( + state.legal_actions_mask(cur_player), dtype=tf.float32) + info_state_vector = tf.constant( + state.information_state_tensor(), dtype=tf.float32) + if len(info_state_vector.shape) == 1: + info_state_vector = tf.expand_dims(info_state_vector, axis=0) + probs = self._policy_network((info_state_vector, legal_actions_mask), + training=False) + probs = probs.numpy() + return {action: probs[0][action] for action in legal_actions} + + def _get_advantage_dataset(self, player): + """Returns the collected regrets for the given player as a dataset.""" + self._advantage_memories[player].shuffle_data() + data = tf.data.Dataset.from_tensor_slices( + self._advantage_memories[player].data) + data = data.shuffle(ADVANTAGE_TRAIN_SHUFFLE_SIZE) + data = data.repeat() + data = data.batch(self._batch_size_advantage) + data = data.map(self._deserialize_advantage_memory) + data = data.prefetch(tf.data.experimental.AUTOTUNE) + return data + + def _get_advantage_train_graph(self, player): + """Return TF-Graph to perform advantage network train step.""" + @tf.function + def train_step(info_states, advantages, iterations, masks, iteration): + model = self._adv_networks_train[player] + with tf.GradientTape() as tape: + preds = model((info_states, masks), training=True) + main_loss = self._loss_advantages[player]( + advantages, preds, sample_weight=iterations * 2 / iteration) + loss = tf.add_n([main_loss], model.losses) + gradients = tape.gradient(loss, model.trainable_variables) + self._optimizer_advantages[player].apply_gradients( + zip(gradients, model.trainable_variables)) + return main_loss + + return train_step + + def _learn_advantage_network(self, player): + """Compute the loss on sampled transitions and perform a Q-network update. + + If there are not enough elements in the buffer, no loss is computed and + `None` is returned instead. + + Args: + player: (int) player index. + + Returns: + The average loss over the advantage network of the last batch. + """ + + with tf.device(self._train_device): + tfit = tf.constant(self._iteration, dtype=tf.float32) + data = self._get_advantage_dataset(player) + for d in data.take(self._advantage_network_train_steps): + main_loss = self._advantage_train_step[player](*d, tfit) + + self._adv_networks[player].set_weights( + self._adv_networks_train[player].get_weights()) + return main_loss + + def _get_strategy_dataset(self): + """Returns the collected strategy memories as a dataset.""" + if self._memories_tfrecordpath: + data = tf.data.TFRecordDataset(self._memories_tfrecordpath) + else: + self._strategy_memories.shuffle_data() + data = tf.data.Dataset.from_tensor_slices(self._strategy_memories.data) + data = data.shuffle(STRATEGY_TRAIN_SHUFFLE_SIZE) + data = data.repeat() + data = data.batch(self._batch_size_strategy) + data = data.map(self._deserialize_strategy_memory) + data = data.prefetch(tf.data.experimental.AUTOTUNE) + return data + + def _learn_strategy_network(self): + """Compute the loss over the strategy network. + + Returns: + The average loss obtained on the last training batch of transitions + or `None`. + """ + + @tf.function + def train_step(info_states, action_probs, iterations, masks): + model = self._policy_network + with tf.GradientTape() as tape: + preds = model((info_states, masks), training=True) + main_loss = self._loss_policy( + action_probs, preds, sample_weight=iterations * 2 / self._iteration) + loss = tf.add_n([main_loss], model.losses) + gradients = tape.gradient(loss, model.trainable_variables) + self._optimizer_policy.apply_gradients( + zip(gradients, model.trainable_variables)) + return main_loss + + with tf.device(self._train_device): + data = self._get_strategy_dataset() + for d in data.take(self._policy_network_train_steps): + main_loss = train_step(*d) + + return main_loss diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/deep_cfr_tf2_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/deep_cfr_tf2_test.py new file mode 100644 index 0000000..3f9e6a4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/deep_cfr_tf2_test.py @@ -0,0 +1,64 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import parameterized +import tensorflow as tf + +from open_spiel.python import policy +from open_spiel.python.algorithms import deep_cfr_tf2 +from open_spiel.python.algorithms import exploitability +import pyspiel + + +class DeepCFRTest(parameterized.TestCase): + + @parameterized.parameters('leduc_poker', 'kuhn_poker', 'liars_dice') + def test_deep_cfr_runs(self, game_name): + game = pyspiel.load_game(game_name) + deep_cfr_solver = deep_cfr_tf2.DeepCFRSolver( + game, + policy_network_layers=(8, 4), + advantage_network_layers=(4, 2), + num_iterations=2, + num_traversals=2, + learning_rate=1e-3, + batch_size_advantage=8, + batch_size_strategy=8, + memory_capacity=1e7) + deep_cfr_solver.solve() + + def test_matching_pennies_3p(self): + # We don't expect Deep CFR to necessarily converge on 3-player games but + # it's nonetheless interesting to see this result. + game = pyspiel.load_game_as_turn_based('matching_pennies_3p') + deep_cfr_solver = deep_cfr_tf2.DeepCFRSolver( + game, + policy_network_layers=(16, 8), + advantage_network_layers=(32, 16), + num_iterations=2, + num_traversals=2, + learning_rate=1e-3, + batch_size_advantage=8, + batch_size_strategy=8, + memory_capacity=1e7) + deep_cfr_solver.solve() + conv = exploitability.nash_conv( + game, + policy.tabular_policy_from_callable( + game, deep_cfr_solver.action_probabilities)) + print('Deep CFR in Matching Pennies 3p. NashConv: {}'.format(conv)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/dqn.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/dqn.py new file mode 100644 index 0000000..1ff5345 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/dqn.py @@ -0,0 +1,464 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""DQN agent implemented in TensorFlow.""" + +import collections +import os +from absl import logging +import numpy as np +import tensorflow.compat.v1 as tf + +from open_spiel.python import rl_agent +from open_spiel.python import simple_nets +from open_spiel.python.utils.replay_buffer import ReplayBuffer + +# Temporarily disable TF2 behavior until code is updated. +tf.disable_v2_behavior() + +Transition = collections.namedtuple( + "Transition", + "info_state action reward next_info_state is_final_step legal_actions_mask") + +ILLEGAL_ACTION_LOGITS_PENALTY = -1e9 + + +class DQN(rl_agent.AbstractAgent): + """DQN Agent implementation in TensorFlow. + + See open_spiel/python/examples/breakthrough_dqn.py for an usage example. + """ + + def __init__(self, + session, + player_id, + state_representation_size, + num_actions, + hidden_layers_sizes=128, + replay_buffer_capacity=10000, + batch_size=128, + replay_buffer_class=ReplayBuffer, + learning_rate=0.01, + update_target_network_every=1000, + learn_every=10, + discount_factor=1.0, + min_buffer_size_to_learn=1000, + epsilon_start=1.0, + epsilon_end=0.1, + epsilon_decay_duration=int(1e6), + optimizer_str="sgd", + loss_str="mse"): + """Initialize the DQN agent.""" + + # This call to locals() is used to store every argument used to initialize + # the class instance, so it can be copied with no hyperparameter change. + self._kwargs = locals() + + self.player_id = player_id + self._session = session + self._num_actions = num_actions + if isinstance(hidden_layers_sizes, int): + hidden_layers_sizes = [hidden_layers_sizes] + self._layer_sizes = hidden_layers_sizes + self._batch_size = batch_size + self._update_target_network_every = update_target_network_every + self._learn_every = learn_every + self._min_buffer_size_to_learn = min_buffer_size_to_learn + self._discount_factor = discount_factor + + self._epsilon_start = epsilon_start + self._epsilon_end = epsilon_end + self._epsilon_decay_duration = epsilon_decay_duration + + # TODO(author6) Allow for optional replay buffer config. + if not isinstance(replay_buffer_capacity, int): + raise ValueError("Replay buffer capacity not an integer.") + self._replay_buffer = replay_buffer_class(replay_buffer_capacity) + self._prev_timestep = None + self._prev_action = None + + # Step counter to keep track of learning, eps decay and target network. + self._step_counter = 0 + + # Keep track of the last training loss achieved in an update step. + self._last_loss_value = None + + # Create required TensorFlow placeholders to perform the Q-network updates. + self._info_state_ph = tf.placeholder( + shape=[None, state_representation_size], + dtype=tf.float32, + name="info_state_ph") + self._action_ph = tf.placeholder( + shape=[None], dtype=tf.int32, name="action_ph") + self._reward_ph = tf.placeholder( + shape=[None], dtype=tf.float32, name="reward_ph") + self._is_final_step_ph = tf.placeholder( + shape=[None], dtype=tf.float32, name="is_final_step_ph") + self._next_info_state_ph = tf.placeholder( + shape=[None, state_representation_size], + dtype=tf.float32, + name="next_info_state_ph") + self._legal_actions_mask_ph = tf.placeholder( + shape=[None, num_actions], + dtype=tf.float32, + name="legal_actions_mask_ph") + + self._q_network = simple_nets.MLP(state_representation_size, + self._layer_sizes, num_actions) + self._q_values = self._q_network(self._info_state_ph) + + self._target_q_network = simple_nets.MLP(state_representation_size, + self._layer_sizes, num_actions) + self._target_q_values = self._target_q_network(self._next_info_state_ph) + + # Stop gradient to prevent updates to the target network while learning + self._target_q_values = tf.stop_gradient(self._target_q_values) + + self._update_target_network = self._create_target_network_update_op( + self._q_network, self._target_q_network) + + # Create the loss operations. + # Sum a large negative constant to illegal action logits before taking the + # max. This prevents illegal action values from being considered as target. + illegal_actions = 1 - self._legal_actions_mask_ph + illegal_logits = illegal_actions * ILLEGAL_ACTION_LOGITS_PENALTY + max_next_q = tf.reduce_max( + tf.math.add(tf.stop_gradient(self._target_q_values), illegal_logits), + axis=-1) + target = ( + self._reward_ph + + (1 - self._is_final_step_ph) * self._discount_factor * max_next_q) + + action_indices = tf.stack( + [tf.range(tf.shape(self._q_values)[0]), self._action_ph], axis=-1) + predictions = tf.gather_nd(self._q_values, action_indices) + + self._savers = [("q_network", tf.train.Saver(self._q_network.variables)), + ("target_q_network", + tf.train.Saver(self._target_q_network.variables))] + + if loss_str == "mse": + loss_class = tf.losses.mean_squared_error + elif loss_str == "huber": + loss_class = tf.losses.huber_loss + else: + raise ValueError("Not implemented, choose from 'mse', 'huber'.") + + self._loss = tf.reduce_mean( + loss_class(labels=target, predictions=predictions)) + + if optimizer_str == "adam": + self._optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) + elif optimizer_str == "sgd": + self._optimizer = tf.train.GradientDescentOptimizer( + learning_rate=learning_rate) + else: + raise ValueError("Not implemented, choose from 'adam' and 'sgd'.") + + self._learn_step = self._optimizer.minimize(self._loss) + self._initialize() + + def get_step_counter(self): + return self._step_counter + + def step(self, time_step, is_evaluation=False, add_transition_record=True): + """Returns the action to be taken and updates the Q-network if needed. + + Args: + time_step: an instance of rl_environment.TimeStep. + is_evaluation: bool, whether this is a training or evaluation call. + add_transition_record: Whether to add to the replay buffer on this step. + + Returns: + A `rl_agent.StepOutput` containing the action probs and chosen action. + """ + + # Act step: don't act at terminal info states or if its not our turn. + if (not time_step.last()) and ( + time_step.is_simultaneous_move() or + self.player_id == time_step.current_player()): + info_state = time_step.observations["info_state"][self.player_id] + legal_actions = time_step.observations["legal_actions"][self.player_id] + epsilon = self._get_epsilon(is_evaluation) + action, probs = self._epsilon_greedy(info_state, legal_actions, epsilon) + else: + action = None + probs = [] + + # Don't mess up with the state during evaluation. + if not is_evaluation: + self._step_counter += 1 + + if self._step_counter % self._learn_every == 0: + self._last_loss_value = self.learn() + + if self._step_counter % self._update_target_network_every == 0: + self._session.run(self._update_target_network) + + if self._prev_timestep and add_transition_record: + # We may omit record adding here if it's done elsewhere. + self.add_transition(self._prev_timestep, self._prev_action, time_step) + + if time_step.last(): # prepare for the next episode. + self._prev_timestep = None + self._prev_action = None + return + else: + self._prev_timestep = time_step + self._prev_action = action + + return rl_agent.StepOutput(action=action, probs=probs) + + def add_transition(self, prev_time_step, prev_action, time_step): + """Adds the new transition using `time_step` to the replay buffer. + + Adds the transition from `self._prev_timestep` to `time_step` by + `self._prev_action`. + + Args: + prev_time_step: prev ts, an instance of rl_environment.TimeStep. + prev_action: int, action taken at `prev_time_step`. + time_step: current ts, an instance of rl_environment.TimeStep. + """ + assert prev_time_step is not None + legal_actions = (time_step.observations["legal_actions"][self.player_id]) + legal_actions_mask = np.zeros(self._num_actions) + legal_actions_mask[legal_actions] = 1.0 + transition = Transition( + info_state=( + prev_time_step.observations["info_state"][self.player_id][:]), + action=prev_action, + reward=time_step.rewards[self.player_id], + next_info_state=time_step.observations["info_state"][self.player_id][:], + is_final_step=float(time_step.last()), + legal_actions_mask=legal_actions_mask) + self._replay_buffer.add(transition) + + def _create_target_network_update_op(self, q_network, target_q_network): + """Create TF ops copying the params of the Q-network to the target network. + + Args: + q_network: A q-network object that implements provides the `variables` + property representing the TF variable list. + target_q_network: A target q-net object that provides the `variables` + property representing the TF variable list. + + Returns: + A `tf.Operation` that updates the variables of the target. + """ + self._variables = q_network.variables[:] + self._target_variables = target_q_network.variables[:] + assert self._variables + assert len(self._variables) == len(self._target_variables) + return tf.group([ + tf.assign(target_v, v) + for (target_v, v) in zip(self._target_variables, self._variables) + ]) + + def _epsilon_greedy(self, info_state, legal_actions, epsilon): + """Returns a valid epsilon-greedy action and valid action probs. + + Action probabilities are given by a softmax over legal q-values. + + Args: + info_state: hashable representation of the information state. + legal_actions: list of legal actions at `info_state`. + epsilon: float, probability of taking an exploratory action. + + Returns: + A valid epsilon-greedy action and valid action probabilities. + """ + probs = np.zeros(self._num_actions) + if np.random.rand() < epsilon: + action = np.random.choice(legal_actions) + probs[legal_actions] = 1.0 / len(legal_actions) + else: + info_state = np.reshape(info_state, [1, -1]) + q_values = self._session.run( + self._q_values, feed_dict={self._info_state_ph: info_state})[0] + legal_q_values = q_values[legal_actions] + action = legal_actions[np.argmax(legal_q_values)] + probs[action] = 1.0 + return action, probs + + def _get_epsilon(self, is_evaluation, power=1.0): + """Returns the evaluation or decayed epsilon value.""" + if is_evaluation: + return 0.0 + decay_steps = min(self._step_counter, self._epsilon_decay_duration) + decayed_epsilon = ( + self._epsilon_end + (self._epsilon_start - self._epsilon_end) * + (1 - decay_steps / self._epsilon_decay_duration)**power) + return decayed_epsilon + + def learn(self): + """Compute the loss on sampled transitions and perform a Q-network update. + + If there are not enough elements in the buffer, no loss is computed and + `None` is returned instead. + + Returns: + The average loss obtained on this batch of transitions or `None`. + """ + + if (len(self._replay_buffer) < self._batch_size or + len(self._replay_buffer) < self._min_buffer_size_to_learn): + return None + + transitions = self._replay_buffer.sample(self._batch_size) + info_states = [t.info_state for t in transitions] + actions = [t.action for t in transitions] + rewards = [t.reward for t in transitions] + next_info_states = [t.next_info_state for t in transitions] + are_final_steps = [t.is_final_step for t in transitions] + legal_actions_mask = [t.legal_actions_mask for t in transitions] + loss, _ = self._session.run( + [self._loss, self._learn_step], + feed_dict={ + self._info_state_ph: info_states, + self._action_ph: actions, + self._reward_ph: rewards, + self._is_final_step_ph: are_final_steps, + self._next_info_state_ph: next_info_states, + self._legal_actions_mask_ph: legal_actions_mask, + }) + return loss + + def _full_checkpoint_name(self, checkpoint_dir, name): + checkpoint_filename = "_".join([name, "pid" + str(self.player_id)]) + return os.path.join(checkpoint_dir, checkpoint_filename) + + def _latest_checkpoint_filename(self, name): + checkpoint_filename = "_".join([name, "pid" + str(self.player_id)]) + return checkpoint_filename + "_latest" + + def save(self, checkpoint_dir): + """Saves the q network and the target q-network. + + Note that this does not save the experience replay buffers and should + only be used to restore the agent's policy, not resume training. + + Args: + checkpoint_dir: directory where checkpoints will be saved. + """ + for name, saver in self._savers: + path = saver.save( + self._session, + self._full_checkpoint_name(checkpoint_dir, name), + latest_filename=self._latest_checkpoint_filename(name)) + logging.info("Saved to path: %s", path) + + def has_checkpoint(self, checkpoint_dir): + for name, _ in self._savers: + if tf.train.latest_checkpoint( + self._full_checkpoint_name(checkpoint_dir, name), + os.path.join(checkpoint_dir, + self._latest_checkpoint_filename(name))) is None: + return False + return True + + def restore(self, checkpoint_dir): + """Restores the q network and the target q-network. + + Note that this does not restore the experience replay buffers and should + only be used to restore the agent's policy, not resume training. + + Args: + checkpoint_dir: directory from which checkpoints will be restored. + """ + for name, saver in self._savers: + full_checkpoint_dir = self._full_checkpoint_name(checkpoint_dir, name) + logging.info("Restoring checkpoint: %s", full_checkpoint_dir) + saver.restore(self._session, full_checkpoint_dir) + + @property + def q_values(self): + return self._q_values + + @property + def replay_buffer(self): + return self._replay_buffer + + @property + def info_state_ph(self): + return self._info_state_ph + + @property + def loss(self): + return self._last_loss_value + + @property + def prev_timestep(self): + return self._prev_timestep + + @property + def prev_action(self): + return self._prev_action + + @property + def step_counter(self): + return self._step_counter + + def _initialize(self): + initialization_weights = tf.group( + *[var.initializer for var in self._variables]) + initialization_target_weights = tf.group( + *[var.initializer for var in self._target_variables]) + initialization_opt = tf.group( + *[var.initializer for var in self._optimizer.variables()]) + + self._session.run( + tf.group(*[ + initialization_weights, initialization_target_weights, + initialization_opt, + ])) + + def get_weights(self): + variables = [self._session.run(self._q_network.variables)] + variables.append(self._session.run(self._target_q_network.variables)) + return variables + + def copy_with_noise(self, sigma=0.0, copy_weights=True): + """Copies the object and perturbates it with noise. + + Args: + sigma: gaussian dropout variance term : Multiplicative noise following + (1+sigma*epsilon), epsilon standard gaussian variable, multiplies each + model weight. sigma=0 means no perturbation. + copy_weights: Boolean determining whether to copy model weights (True) or + just model hyperparameters. + + Returns: + Perturbated copy of the model. + """ + _ = self._kwargs.pop("self", None) + copied_object = DQN(**self._kwargs) + + q_network = getattr(copied_object, "_q_network") + target_q_network = getattr(copied_object, "_target_q_network") + + if copy_weights: + copy_weights = tf.group(*[ + va.assign(vb * (1 + sigma * tf.random.normal(vb.shape))) + for va, vb in zip(q_network.variables, self._q_network.variables) + ]) + self._session.run(copy_weights) + + copy_target_weights = tf.group(*[ + va.assign(vb * (1 + sigma * tf.random.normal(vb.shape))) + for va, vb in zip(target_q_network.variables, + self._target_q_network.variables) + ]) + self._session.run(copy_target_weights) + return copied_object diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/dqn_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/dqn_test.py new file mode 100644 index 0000000..0d414a5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/dqn_test.py @@ -0,0 +1,133 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import absltest +import tensorflow.compat.v1 as tf + +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import dqn +import pyspiel + +# Temporarily disable TF2 behavior until code is updated. +tf.disable_v2_behavior() + +# A simple two-action game encoded as an EFG game. Going left gets -1, going +# right gets a +1. +SIMPLE_EFG_DATA = """ + EFG 2 R "Simple single-agent problem" { "Player 1" } "" + p "ROOT" 1 1 "ROOT" { "L" "R" } 0 + t "L" 1 "Outcome L" { -1.0 } + t "R" 2 "Outcome R" { 1.0 } +""" + + +class DQNTest(tf.test.TestCase): + + def test_simple_game(self): + game = pyspiel.load_efg_game(SIMPLE_EFG_DATA) + env = rl_environment.Environment(game=game) + with self.session() as sess: + agent = dqn.DQN(sess, 0, + state_representation_size= + game.information_state_tensor_shape()[0], + num_actions=game.num_distinct_actions(), + hidden_layers_sizes=[16], + replay_buffer_capacity=100, + batch_size=5, + epsilon_start=0.02, + epsilon_end=0.01) + total_reward = 0 + sess.run(tf.global_variables_initializer()) + + for _ in range(100): + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step) + time_step = env.step([agent_output.action]) + total_reward += time_step.rewards[0] + agent.step(time_step) + self.assertGreaterEqual(total_reward, 75) + + def test_run_tic_tac_toe(self): + env = rl_environment.Environment("tic_tac_toe") + state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + with self.session() as sess: + agents = [ + dqn.DQN( # pylint: disable=g-complex-comprehension + sess, + player_id, + state_representation_size=state_size, + num_actions=num_actions, + hidden_layers_sizes=[16], + replay_buffer_capacity=10, + batch_size=5) for player_id in [0, 1] + ] + sess.run(tf.global_variables_initializer()) + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + current_agent = agents[current_player] + agent_output = current_agent.step(time_step) + time_step = env.step([agent_output.action]) + + for agent in agents: + agent.step(time_step) + + @absltest.skip("Causing a segmentation fault on wheel tests") + def test_run_hanabi(self): + # Hanabi is an optional game, so check we have it before running the test. + game = "hanabi" + if game not in pyspiel.registered_names(): + return + + num_players = 3 + env_configs = { + "players": num_players, + "max_life_tokens": 1, + "colors": 2, + "ranks": 3, + "hand_size": 2, + "max_information_tokens": 3, + "discount": 0. + } + env = rl_environment.Environment(game, **env_configs) + state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + with self.session() as sess: + agents = [ + dqn.DQN( # pylint: disable=g-complex-comprehension + sess, + player_id, + state_representation_size=state_size, + num_actions=num_actions, + hidden_layers_sizes=[16], + replay_buffer_capacity=10, + batch_size=5) for player_id in range(num_players) + ] + sess.run(tf.global_variables_initializer()) + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + agent_output = [agent.step(time_step) for agent in agents] + time_step = env.step([agent_output[current_player].action]) + + for agent in agents: + agent.step(time_step) + + +if __name__ == "__main__": + tf.test.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/eva.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/eva.py new file mode 100644 index 0000000..a4c6590 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/eva.py @@ -0,0 +1,420 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Implements an Ephemeral Value Adjustment Agent. + +See https://arxiv.org/abs/1810.08163. + +The algorithm queries trajectories from a replay buffer based on similarities +to embedding representations and uses a parametric model to compute values for +counterfactual state-action pairs when integrating across those trajectories. +Finally, a weighted average between the parametric (DQN in this case) and the +non-parametric model is used to compute the policy. +""" + +import collections +import copy +import numpy as np +import tensorflow.compat.v1 as tf + +from open_spiel.python import rl_agent +from open_spiel.python import simple_nets +from open_spiel.python.algorithms import dqn + +# Temporarily disable TF2 behavior until we update the code. +tf.disable_v2_behavior() + +MEM_KEY_NAME = "embedding" + +ValueBufferElement = collections.namedtuple("ValueElement", "embedding value") + +ReplayBufferElement = collections.namedtuple( + "ReplayElement", "embedding info_state action reward next_info_state " + "is_final_step legal_actions_mask") + + +# TODO(author3) Refactor into data structures lib. +class QueryableFixedSizeRingBuffer(dqn.ReplayBuffer): + """ReplayBuffer of fixed size with a FIFO replacement policy. + + Stored transitions can be sampled uniformly. This extends the DQN replay + buffer by allowing the contents to be fetched by L2 proximity to a query + value. + + The underlying datastructure is a ring buffer, allowing 0(1) adding and + sampling. + """ + + def knn(self, key, key_name, k, trajectory_len=1): + """Computes top-k neighbours based on L2 distance. + + Args: + key: (np.array) key value to query memory. + key_name: (str) attribute name of key in memory elements. + k: (int) number of neighbours to fetch. + trajectory_len: (int) length of trajectory to fetch from replay buffer. + + Returns: + List of tuples (L2 negative distance, BufferElement) sorted in increasing + order by the negative L2 distqances from the key. + """ + distances = [(np.linalg.norm(getattr(sample, key_name) - key, 2, + axis=0), sample) for sample in self._data] + return sorted(distances, key=lambda v: -v[0])[:k] + + +class EVAAgent(object): + """Implements a solver for Ephemeral VAlue Adjustment. + + See https://arxiv.org/abs/1810.08163. + + Define all networks and sampling buffers/memories. Derive losses & learning + steps. Initialize the game state and algorithmic variables. + """ + + def __init__(self, + session, + game, + player_id, + state_size, + num_actions, + embedding_network_layers=(128,), + embedding_size=16, + dqn_hidden_layers=(128, 128), + batch_size=16, + trajectory_len=10, + num_neighbours=5, + learning_rate=1e-4, + mixing_parameter=0.9, + memory_capacity=int(1e6), + discount_factor=1.0, + update_target_network_every=1000, + epsilon_start=1.0, + epsilon_end=0.1, + epsilon_decay_duration=int(1e4), + embedding_as_parametric_input=False): + """Initialize the Ephemeral VAlue Adjustment algorithm. + + Args: + session: (tf.Session) TensorFlow session. + game: (rl_environment.Environment) Open Spiel game. + player_id: (int) Player id for this player. + state_size: (int) Size of info state vector. + num_actions: (int) number of actions. + embedding_network_layers: (list[int]) Layer sizes of strategy net MLP. + embedding_size: (int) Size of memory embeddings. + dqn_hidden_layers: (list(int)) MLP layer sizes of DQN network. + batch_size: (int) Size of batches for DQN learning steps. + trajectory_len: (int) Length of trajectories from replay buffer. + num_neighbours: (int) Number of neighbours to fetch from replay buffer. + learning_rate: (float) Learning rate. + mixing_parameter: (float) Value mixing parameter between 0 and 1. + memory_capacity: Number af samples that can be stored in memory. + discount_factor: (float) Discount factor for Q-Learning. + update_target_network_every: How often to update DQN target network. + epsilon_start: (float) Starting epsilon-greedy value. + epsilon_end: (float) Final epsilon-greedy value. + epsilon_decay_duration: (float) Number of steps over which epsilon decays. + embedding_as_parametric_input: (bool) Whether we use embeddings as input + to the parametric model. + """ + assert (mixing_parameter >= 0 and mixing_parameter <= 1) + self._game = game + self._session = session + self.player_id = player_id + self._env = game + self._num_actions = num_actions + self._info_state_size = state_size + self._embedding_size = embedding_size + self._lambda = mixing_parameter + self._trajectory_len = trajectory_len + self._num_neighbours = num_neighbours + self._discount = discount_factor + self._epsilon_start = epsilon_start + self._epsilon_end = epsilon_end + self._epsilon_decay_duration = epsilon_decay_duration + self._last_time_step = None + self._last_action = None + self._embedding_as_parametric_input = embedding_as_parametric_input + + # Create required TensorFlow placeholders to perform the Q-network updates. + self._info_state_ph = tf.placeholder( + shape=[None, self._info_state_size], + dtype=tf.float32, + name="info_state_ph") + self._embedding_network = simple_nets.MLP(self._info_state_size, + list(embedding_network_layers), + embedding_size) + self._embedding = self._embedding_network(self._info_state_ph) + + # The DQN agent requires this be an integer. + if not isinstance(memory_capacity, int): + raise ValueError("Memory capacity not an integer.") + + # Initialize the parametric & non-parametric Q-networks. + self._agent = dqn.DQN( + session, + player_id, + state_representation_size=self._info_state_size, + num_actions=self._num_actions, + hidden_layers_sizes=list(dqn_hidden_layers), + replay_buffer_capacity=memory_capacity, + replay_buffer_class=QueryableFixedSizeRingBuffer, + batch_size=batch_size, + learning_rate=learning_rate, + update_target_network_every=update_target_network_every, + learn_every=batch_size, + discount_factor=1.0, + epsilon_start=1.0, + epsilon_end=0.1, + epsilon_decay_duration=int(1e6)) + # Initialize Value Buffers - Fetch Replay buffers from agents. + self._value_buffer = QueryableFixedSizeRingBuffer(memory_capacity) + self._replay_buffer = self._agent.replay_buffer + + # Initialize non-parametric & EVA Q-values. + self._v_np = collections.defaultdict(float) + self._q_np = collections.defaultdict(lambda: [0] * self._num_actions) + self._q_eva = collections.defaultdict(lambda: [0] * self._num_actions) + + @property + def env(self): + return self._env + + @property + def loss(self): + return self._agent.loss + + def _add_transition_value(self, infostate_embedding, value): + """Adds the embedding and value to the ValueBuffer. + + Args: + infostate_embedding: (np.array) embeddig vector. + value: (float) Value associated with state embeding. + """ + transition = ValueBufferElement(embedding=infostate_embedding, value=value) + self._value_buffer.add(transition) + + def _add_transition_replay(self, infostate_embedding, time_step): + """Adds the new transition using `time_step` to the replay buffer. + + Adds the transition from `self._prev_timestep` to `time_step` by + `self._prev_action`. + + Args: + infostate_embedding: embeddig vector. + time_step: an instance of rl_environment.TimeStep. + """ + prev_timestep = self._last_time_step + assert prev_timestep is not None + legal_actions = ( + prev_timestep.observations["legal_actions"][self.player_id]) + legal_actions_mask = np.zeros(self._num_actions) + legal_actions_mask[legal_actions] = 1.0 + reward = time_step.rewards[self.player_id] if time_step.rewards else 0.0 + transition = ReplayBufferElement( + embedding=infostate_embedding, + info_state=(prev_timestep.observations["info_state"][self.player_id]), + action=self._last_action, + reward=reward, + next_info_state=time_step.observations["info_state"][self.player_id], + is_final_step=float(time_step.last()), + legal_actions_mask=legal_actions_mask) + self._replay_buffer.add(transition) + + def step(self, time_step, is_evaluation=False): + """Returns the action to be taken and updates the value functions. + + Args: + time_step: an instance of rl_environment.TimeStep. + is_evaluation: bool, whether this is a training or evaluation call. + + Returns: + A `rl_agent.StepOutput` containing the action probs and chosen action. + """ + # Act step: don't act at terminal info states. + if not time_step.last(): + info_state = time_step.observations["info_state"][self.player_id] + legal_actions = time_step.observations["legal_actions"][self.player_id] + epsilon = self._get_epsilon(self._agent.step_counter, is_evaluation) + + # Sample an action from EVA via epsilon greedy policy. + action, probs = self._epsilon_greedy(self._q_eva[tuple(info_state)], + legal_actions, epsilon) + + # Update Step: Only with transitions and not when evaluating. + if (not is_evaluation and self._last_time_step is not None): + info_state = self._last_time_step.observations["info_state"][ + self.player_id] + legal_actions = self._last_time_step.observations["legal_actions"][ + self.player_id] + epsilon = self._get_epsilon(self._agent.step_counter, is_evaluation) + + # Get embedding. + infostate_embedding = self._session.run( + self._embedding, + feed_dict={self._info_state_ph: np.expand_dims(info_state, + axis=0)})[0] + + neighbours_value = self._value_buffer.knn(infostate_embedding, + MEM_KEY_NAME, + self._num_neighbours, 1) + # collect trace values of knn from L (value buffer) .. Q_np(s_k) + neighbours_replay = self._replay_buffer.knn(infostate_embedding, + MEM_KEY_NAME, + self._num_neighbours, + self._trajectory_len) + + # Take a step with the parametric model and get q-values. Use embedding as + # input to the parametric meodel. + # TODO(author6) Recompute embeddings for buffers on learning steps. + if self._embedding_as_parametric_input: + last_time_step_copy = copy.deepcopy(self._last_time_step) + last_time_step_copy.observations["info_state"][ + self.player_id] = infostate_embedding + self._agent.step(last_time_step_copy, add_transition_record=False) + else: + self._agent.step(self._last_time_step, add_transition_record=False) + q_values = self._session.run( + self._agent.q_values, + feed_dict={ + self._agent.info_state_ph: np.expand_dims(info_state, axis=0) + })[0] + # Update EVA: Q_eva = lambda q_theta(s_t) + (1-lambda) sum(Q_np(s_k, .))/K + for a in legal_actions: + q_theta = q_values[a] + self._q_eva[tuple(info_state)][a] = ( + self._lambda * q_theta + (1 - self._lambda) * + sum([elem[1].value + for elem in neighbours_value]) / self._num_neighbours) + + # Append (e,s,a,r,s') to Replay Buffer + self._add_transition_replay(infostate_embedding, time_step) + + # update Q_np with Traces using TCP + self._trajectory_centric_planning(neighbours_replay) + + # Append Q_np(s, a) to Value Buffer + self._add_transition_value( + infostate_embedding, self._q_np[tuple(info_state)][self._last_action]) + + # Prepare for the next episode. + if time_step.last(): + self._last_time_step = None + self._last_action = None + return + + self._last_time_step = time_step + self._last_action = action + return rl_agent.StepOutput(action=action, probs=probs) + + def _trajectory_centric_planning(self, trajectories): + """Performs trajectory centric planning. + + Uses trajectories from the replay buffer to update the non-parametric values + while supplying counter-factual values with the parametric model. + + Args: + trajectories: Current OpenSpiel game state. + """ + # Calculate non-parametric values over the trajectories. + # Iterate backward through trajectories + for t in range(len(trajectories) - 1, 0, -1): + elem = trajectories[t][1] + s_tp1 = tuple(elem.next_info_state) + s_t = tuple(elem.info_state) + a_t = elem.action + r_t = elem.reward + legal_actions = elem.legal_actions_mask + if t < len(trajectories) - 1: + for action in range(len(legal_actions)): + if not legal_actions[action]: + continue + if action == elem.action: + self._q_np[s_t][a_t] = (r_t + self._discount * self._v_np[s_tp1]) + else: + q_values_parametric = self._session.run( + self._agent.q_values, + feed_dict={ + self._agent.info_state_ph: + np.expand_dims(elem.info_state, axis=0) + }) + self._q_np[s_t][a_t] = q_values_parametric[0][action] + + # Set V(s_t) + if t == len(trajectories) - 1: + # Sample from the parametric model. + q_values_parametric = self._session.run( + self._agent.q_values, + feed_dict={ + self._agent.info_state_ph: + np.expand_dims(elem.info_state, axis=0) + }) + self._v_np[s_t] = np.max(q_values_parametric) + else: + self._v_np[s_t] = max(self._q_np[s_t]) + + def _epsilon_greedy(self, q_values, legal_actions, epsilon): + """Returns a valid epsilon-greedy action and valid action probs. + + Action probabilities are given by a softmax over legal q-values. + + Args: + q_values: list of Q-values by action. + legal_actions: list of legal actions at `info_state`. + epsilon: float, probability of taking an exploratory action. + + Returns: + A valid epsilon-greedy action and valid action probabilities. + """ + probs = np.zeros(self._num_actions) + q_values = np.array(q_values) + if np.random.rand() < epsilon: + action = np.random.choice(legal_actions) + probs[legal_actions] = 1.0 / len(legal_actions) + else: + legal_q_values = q_values[legal_actions] + action = legal_actions[np.argmax(legal_q_values)] + # Reduce max_q for numerical stability. Result is the same. + max_q = np.max(legal_q_values) + e_x = np.exp(legal_q_values - max_q) + probs[legal_actions] = e_x / e_x.sum(axis=0) + return action, probs + + def _get_epsilon(self, step_counter, is_evaluation): + """Returns the evaluation or decayed epsilon value.""" + if is_evaluation: + return 0.0 + decay_steps = min(step_counter, self._epsilon_decay_duration) + decayed_epsilon = ( + self._epsilon_end + (self._epsilon_start - self._epsilon_end) * + (1 - decay_steps / self._epsilon_decay_duration)) + return decayed_epsilon + + def action_probabilities(self, state): + """Returns action probabilites dict for a single batch.""" + # TODO(author3, author6): Refactor this to expect pre-normalized form. + if hasattr(state, "information_state_tensor"): + state_rep = tuple(state.information_state_tensor(self.player_id)) + elif hasattr(state, "observation_tensor"): + state_rep = tuple(state.observation_tensor(self.player_id)) + else: + raise AttributeError("Unable to extract normalized state vector.") + legal_actions = state.legal_actions(self.player_id) + if legal_actions: + _, probs = self._epsilon_greedy( + self._q_eva[state_rep], legal_actions, epsilon=0.0) + return {a: probs[a] for a in range(self._num_actions)} + else: + raise ValueError("Node has no legal actions to take.") diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/eva_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/eva_test.py new file mode 100644 index 0000000..300864a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/eva_test.py @@ -0,0 +1,104 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import parameterized +import tensorflow.compat.v1 as tf + +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import eva + +# Temporarily disable TF2 behavior until we update the code. +tf.disable_v2_behavior() + + +class EVATest(parameterized.TestCase): + + @parameterized.parameters("tic_tac_toe", "kuhn_poker", "liars_dice") + def test_run_games(self, game): + env = rl_environment.Environment(game) + num_players = env.num_players + eva_agents = [] + num_actions = env.action_spec()["num_actions"] + state_size = env.observation_spec()["info_state"][0] + with tf.Session() as sess: + for player in range(num_players): + eva_agents.append( + eva.EVAAgent( + sess, + env, + player, + state_size, + num_actions, + embedding_network_layers=(64, 32), + embedding_size=12, + learning_rate=1e-4, + mixing_parameter=0.5, + memory_capacity=int(1e6), + discount_factor=1.0, + epsilon_start=1.0, + epsilon_end=0.1, + epsilon_decay_duration=int(1e6))) + sess.run(tf.global_variables_initializer()) + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + current_agent = eva_agents[current_player] + # 1. Step the agent. + # 2. Step the Environment. + agent_output = current_agent.step(time_step) + time_step = env.step([agent_output.action]) + for agent in eva_agents: + agent.step(time_step) + + +class QueryableFixedSizeRingBufferTest(tf.test.TestCase): + + def test_replay_buffer_add(self): + replay_buffer = eva.QueryableFixedSizeRingBuffer(replay_buffer_capacity=10) + self.assertEmpty(replay_buffer) + replay_buffer.add("entry1") + self.assertLen(replay_buffer, 1) + replay_buffer.add("entry2") + self.assertLen(replay_buffer, 2) + + self.assertIn("entry1", replay_buffer) + self.assertIn("entry2", replay_buffer) + + def test_replay_buffer_max_capacity(self): + replay_buffer = eva.QueryableFixedSizeRingBuffer(replay_buffer_capacity=2) + replay_buffer.add("entry1") + replay_buffer.add("entry2") + replay_buffer.add("entry3") + self.assertLen(replay_buffer, 2) + + self.assertIn("entry2", replay_buffer) + self.assertIn("entry3", replay_buffer) + + def test_replay_buffer_sample(self): + replay_buffer = eva.QueryableFixedSizeRingBuffer(replay_buffer_capacity=3) + replay_buffer.add("entry1") + replay_buffer.add("entry2") + replay_buffer.add("entry3") + + samples = replay_buffer.sample(3) + + self.assertIn("entry1", samples) + self.assertIn("entry2", samples) + self.assertIn("entry3", samples) + + # TODO(author6) Test knn query. + + +if __name__ == "__main__": + tf.test.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/exploitability_descent.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/exploitability_descent.py new file mode 100644 index 0000000..5395d54 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/exploitability_descent.py @@ -0,0 +1,154 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Implementation of exploitability descent. + +See "Computing Approximate Equilibria in Sequential Adversarial Games by +Exploitability Descent" https://arxiv.org/abs/1903.05614 + +The exploitability descent algorithm solves a game by repeatedly performing +the following update: + +1. Construct a (deterministic) best response to our current strategy +2. Compute the value of every action in every state when playing our current + strategy vs the best response. +3. Update our current strategy to do better vs the current best response + by performing a policy-gradient update. + +This module provides a function that returns a loss for network training, and +a Solver class that uses this loss in a tabular Exploitability Descent. + +The code can be used either for a tabular exploitability descent algorithm, +as demonstrated by exploitability_descent_test, or for a neural network policy, +as in ../examples/exploitability_descent.py. + +Additionally, for a minibatch version of the algorithm (which samples +uniformly across all states in the game to generate a minibatch), see the +minibatch_loss method. +""" + +import numpy as np +import tensorflow.compat.v1 as tf + +from open_spiel.python import policy +from open_spiel.python.algorithms import action_value_vs_best_response +from open_spiel.python.algorithms import masked_softmax + +# Temporary disabling of v2 behavior until code is updated. +tf.disable_v2_behavior() + +_NUM_PLAYERS = 2 + + +def _create_policy_evaluator(tabular_policy, q_value_calculator): + """Creates a function to evaluate our policy.""" + + def evaluate_policy(policy_values): + """Evaluates a tabular policy; intended to be used as a tf.py_function.""" + tabular_policy.action_probability_array = policy_values + evaluations = [ + q_value_calculator(player, tabular_policy, + tabular_policy.states_per_player[player]) + for player in range(_NUM_PLAYERS) + ] + nash_conv = np.float64(sum([evaluations[p].exploitability for p in [0, 1]])) + + q_values = np.concatenate( + [np.array(evaluations[p].values_vs_br, np.float64) for p in [0, 1]]) + cf_reach_probabilities = np.concatenate([ + np.array(evaluations[p].counterfactual_reach_probs_vs_br, np.float64) + for p in [0, 1] + ]) + return nash_conv, q_values, cf_reach_probabilities + + return evaluate_policy + + +class LossCalculator(object): + """Computes the exploitability descent loss for a two-player game.""" + + def __init__(self, game): + """Initializes a loss calculation for the given game.""" + if game.num_players() != _NUM_PLAYERS: + raise ValueError("Game {} does not have {} players.".format( + game, _NUM_PLAYERS)) + self.tabular_policy = policy.TabularPolicy(game) + self.q_value_calculator = action_value_vs_best_response.Calculator(game) + + def masked_softmax(self, logits): + """Safe masked softmax.""" + return masked_softmax.tf_masked_softmax( + logits, self.tabular_policy.legal_actions_mask) + + def loss(self, policy_values): + """Returns the exploitability descent loss given a policy.""" + + evaluate_policy = _create_policy_evaluator(self.tabular_policy, + self.q_value_calculator) + nash_conv, q_values, cf_reach_probabilities = tf.py_func( + evaluate_policy, [policy_values], [tf.float64, tf.float64, tf.float64]) + baseline = tf.reduce_sum(policy_values * q_values, axis=-1, keepdims=True) + advantage = q_values - tf.stop_gradient(baseline) + loss_per_state = -tf.reduce_sum(policy_values * advantage, axis=-1) + return nash_conv, tf.reduce_sum(loss_per_state * cf_reach_probabilities) + + def minibatch_loss(self, policy_values, q_values, indices): + """Returns the exploitability descent loss given a policy for a subset.""" + + evaluate_policy = _create_policy_evaluator(self.tabular_policy, + self.q_value_calculator) + nash_conv, real_q_values, cf_reach_probabilities = tf.py_func( + evaluate_policy, [policy_values], [tf.float64, tf.float64, tf.float64]) + baseline = tf.reduce_sum(policy_values * q_values, axis=-1, keepdims=True) + advantage = q_values - baseline + + # We now select a minibatch from the data to propagate our loss on. + policy_values = tf.gather(policy_values, indices) + advantage = tf.gather(advantage, indices) + cf_reach_probabilities = tf.gather(cf_reach_probabilities, indices) + + # The rest is the same as before. + loss_per_state = -tf.reduce_sum( + policy_values * tf.stop_gradient(advantage), axis=-1) + q_value_loss = tf.reduce_mean((q_values - real_q_values)**2, axis=1) + q_value_loss = tf.gather(q_value_loss, indices) + q_value_loss = tf.reduce_sum(q_value_loss * cf_reach_probabilities) + policy_loss = tf.reduce_sum(loss_per_state * cf_reach_probabilities) + return nash_conv, q_value_loss, policy_loss + + +class Solver(object): + """Solves a two-player game using exploitability descent.""" + + def __init__(self, game): + """Initializes a solver for the given game.""" + self._loss_calculator = LossCalculator(game) + self._logits = tf.Variable( + np.ones_like( + self._loss_calculator.tabular_policy.action_probability_array, + dtype=np.float64), + name="logits", + use_resource=True) + self._tabular_policy = self._loss_calculator.masked_softmax(self._logits) + self._nash_conv, self._loss = self._loss_calculator.loss( + self._tabular_policy) + self._learning_rate = tf.placeholder(tf.float64, (), name="learning_rate") + self._optimizer = tf.train.GradientDescentOptimizer(self._learning_rate) + self._optimizer_step = self._optimizer.minimize(self._loss) + + def step(self, session, learning_rate): + """Takes a single exploitability descent step.""" + _, nash_conv = session.run([self._optimizer_step, self._nash_conv], + feed_dict={self._learning_rate: learning_rate}) + return nash_conv diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/exploitability_descent_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/exploitability_descent_test.py new file mode 100644 index 0000000..de2a962 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/exploitability_descent_test.py @@ -0,0 +1,57 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import tensorflow.compat.v1 as tf + +from open_spiel.python.algorithms import exploitability_descent +import pyspiel + +# Temporary disabling of v2 behavior until code is updated. +tf.disable_v2_behavior() + + +class ExploitabilityDescentTest(tf.test.TestCase): + + def test_solver_kuhn_poker_first_10_steps_convergence(self): + solver = exploitability_descent.Solver(pyspiel.load_game("kuhn_poker")) + with self.session() as session: + session.run(tf.global_variables_initializer()) + nash_conv = [solver.step(session, learning_rate=1.0) for _ in range(11)] + + # Nash conv is 2x exploitability. Values taken from test run, not + # independently verified. + np.testing.assert_allclose(nash_conv, [ + 0.91666666666666652, 0.67893004801213452, 0.48109148836354743, + 0.40061420923255808, 0.36617242161468722, 0.33676996443499557, + 0.30925081512398128, 0.28827843035940964, 0.26830042206858751, + 0.24418597846799289, 0.22168699344791482 + ]) + + def test_solver_leduc_poker_first_10_steps_convergence(self): + solver = exploitability_descent.Solver(pyspiel.load_game("leduc_poker")) + with self.session() as session: + session.run(tf.global_variables_initializer()) + nash_conv = [solver.step(session, learning_rate=1.0) for _ in range(11)] + + # Nash conv is 2x exploitability. Values taken from test run, not + # independently verified. + np.testing.assert_allclose(nash_conv, [ + 4.7472224, 4.3147216, 3.9900389, 3.7576618, 3.5771275, 3.4414644, + 3.3272073, 3.1898201, 3.1089299, 3.0108435, 2.8992782 + ]) + + +if __name__ == "__main__": + tf.test.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/kuhn_policy_gradient_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/kuhn_policy_gradient_example.py new file mode 100644 index 0000000..f373195 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/kuhn_policy_gradient_example.py @@ -0,0 +1,107 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Policy gradient agents trained and evaluated on Kuhn Poker.""" + +from absl import app +from absl import flags +from absl import logging +import tensorflow.compat.v1 as tf + +from open_spiel.python import policy +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import exploitability +from open_spiel.python.algorithms import policy_gradient + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("num_episodes", int(1e6), "Number of train episodes.") +flags.DEFINE_integer("eval_every", int(1e4), "Eval agents every x episodes.") +flags.DEFINE_enum("loss_str", "rpg", ["a2c", "rpg", "qpg", "rm"], + "PG loss to use.") + + +class PolicyGradientPolicies(policy.Policy): + """Joint policy to be evaluated.""" + + def __init__(self, env, nfsp_policies): + game = env.game + player_ids = [0, 1] + super(PolicyGradientPolicies, self).__init__(game, player_ids) + self._policies = nfsp_policies + self._obs = {"info_state": [None, None], "legal_actions": [None, None]} + + def action_probabilities(self, state, player_id=None): + cur_player = state.current_player() + legal_actions = state.legal_actions(cur_player) + + self._obs["current_player"] = cur_player + self._obs["info_state"][cur_player] = ( + state.information_state_tensor(cur_player)) + self._obs["legal_actions"][cur_player] = legal_actions + + info_state = rl_environment.TimeStep( + observations=self._obs, rewards=None, discounts=None, step_type=None) + + p = self._policies[cur_player].step(info_state, is_evaluation=True).probs + prob_dict = {action: p[action] for action in legal_actions} + return prob_dict + + +def main(_): + game = "kuhn_poker" + num_players = 2 + + env_configs = {"players": num_players} + env = rl_environment.Environment(game, **env_configs) + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + with tf.Session() as sess: + # pylint: disable=g-complex-comprehension + agents = [ + policy_gradient.PolicyGradient( + sess, + idx, + info_state_size, + num_actions, + loss_str=FLAGS.loss_str, + hidden_layers_sizes=(128,)) for idx in range(num_players) + ] + expl_policies_avg = PolicyGradientPolicies(env, agents) + + sess.run(tf.global_variables_initializer()) + for ep in range(FLAGS.num_episodes): + + if (ep + 1) % FLAGS.eval_every == 0: + losses = [agent.loss for agent in agents] + expl = exploitability.exploitability(env.game, expl_policies_avg) + msg = "-" * 80 + "\n" + msg += "{}: {}\n{}\n".format(ep + 1, expl, losses) + logging.info("%s", msg) + + time_step = env.reset() + while not time_step.last(): + player_id = time_step.observations["current_player"] + agent_output = agents[player_id].step(time_step) + action_list = [agent_output.action] + time_step = env.step(action_list) + + # Episode is over, step all agents with final info state. + for agent in agents: + agent.step(time_step) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/leduc_nfsp_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/leduc_nfsp_example.py new file mode 100644 index 0000000..82969cd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/leduc_nfsp_example.py @@ -0,0 +1,186 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""NFSP agents trained on Leduc Poker.""" + +from absl import app +from absl import flags +from absl import logging +import tensorflow.compat.v1 as tf + +from open_spiel.python import policy +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import exploitability +from open_spiel.python.algorithms import nfsp + +FLAGS = flags.FLAGS + +flags.DEFINE_string("game_name", "leduc_poker", + "Name of the game.") +flags.DEFINE_integer("num_players", 2, + "Number of players.") +flags.DEFINE_integer("num_train_episodes", int(20e6), + "Number of training episodes.") +flags.DEFINE_integer("eval_every", 10000, + "Episode frequency at which the agents are evaluated.") +flags.DEFINE_list("hidden_layers_sizes", [ + 128, +], "Number of hidden units in the avg-net and Q-net.") +flags.DEFINE_integer("replay_buffer_capacity", int(2e5), + "Size of the replay buffer.") +flags.DEFINE_integer("reservoir_buffer_capacity", int(2e6), + "Size of the reservoir buffer.") +flags.DEFINE_integer("min_buffer_size_to_learn", 1000, + "Number of samples in buffer before learning begins.") +flags.DEFINE_float("anticipatory_param", 0.1, + "Prob of using the rl best response as episode policy.") +flags.DEFINE_integer("batch_size", 128, + "Number of transitions to sample at each learning step.") +flags.DEFINE_integer("learn_every", 64, + "Number of steps between learning updates.") +flags.DEFINE_float("rl_learning_rate", 0.01, + "Learning rate for inner rl agent.") +flags.DEFINE_float("sl_learning_rate", 0.01, + "Learning rate for avg-policy sl network.") +flags.DEFINE_string("optimizer_str", "sgd", + "Optimizer, choose from 'adam', 'sgd'.") +flags.DEFINE_string("loss_str", "mse", + "Loss function, choose from 'mse', 'huber'.") +flags.DEFINE_integer("update_target_network_every", 19200, + "Number of steps between DQN target network updates.") +flags.DEFINE_float("discount_factor", 1.0, + "Discount factor for future rewards.") +flags.DEFINE_integer("epsilon_decay_duration", int(20e6), + "Number of game steps over which epsilon is decayed.") +flags.DEFINE_float("epsilon_start", 0.06, + "Starting exploration parameter.") +flags.DEFINE_float("epsilon_end", 0.001, + "Final exploration parameter.") +flags.DEFINE_string("evaluation_metric", "nash_conv", + "Choose from 'exploitability', 'nash_conv'.") +flags.DEFINE_bool("use_checkpoints", True, "Save/load neural network weights.") +flags.DEFINE_string("checkpoint_dir", "/tmp/nfsp_test", + "Directory to save/load the agent.") + + +class NFSPPolicies(policy.Policy): + """Joint policy to be evaluated.""" + + def __init__(self, env, nfsp_policies, mode): + game = env.game + player_ids = list(range(FLAGS.num_players)) + super(NFSPPolicies, self).__init__(game, player_ids) + self._policies = nfsp_policies + self._mode = mode + self._obs = { + "info_state": [None] * FLAGS.num_players, + "legal_actions": [None] * FLAGS.num_players + } + + def action_probabilities(self, state, player_id=None): + cur_player = state.current_player() + legal_actions = state.legal_actions(cur_player) + + self._obs["current_player"] = cur_player + self._obs["info_state"][cur_player] = ( + state.information_state_tensor(cur_player)) + self._obs["legal_actions"][cur_player] = legal_actions + + info_state = rl_environment.TimeStep( + observations=self._obs, rewards=None, discounts=None, step_type=None) + + with self._policies[cur_player].temp_mode_as(self._mode): + p = self._policies[cur_player].step(info_state, is_evaluation=True).probs + prob_dict = {action: p[action] for action in legal_actions} + return prob_dict + + +def main(unused_argv): + logging.info("Loading %s", FLAGS.game_name) + game = FLAGS.game_name + num_players = FLAGS.num_players + + env_configs = {"players": num_players} + env = rl_environment.Environment(game, **env_configs) + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + hidden_layers_sizes = [int(l) for l in FLAGS.hidden_layers_sizes] + kwargs = { + "replay_buffer_capacity": FLAGS.replay_buffer_capacity, + "reservoir_buffer_capacity": FLAGS.reservoir_buffer_capacity, + "min_buffer_size_to_learn": FLAGS.min_buffer_size_to_learn, + "anticipatory_param": FLAGS.anticipatory_param, + "batch_size": FLAGS.batch_size, + "learn_every": FLAGS.learn_every, + "rl_learning_rate": FLAGS.rl_learning_rate, + "sl_learning_rate": FLAGS.sl_learning_rate, + "optimizer_str": FLAGS.optimizer_str, + "loss_str": FLAGS.loss_str, + "update_target_network_every": FLAGS.update_target_network_every, + "discount_factor": FLAGS.discount_factor, + "epsilon_decay_duration": FLAGS.epsilon_decay_duration, + "epsilon_start": FLAGS.epsilon_start, + "epsilon_end": FLAGS.epsilon_end, + } + + with tf.Session() as sess: + # pylint: disable=g-complex-comprehension + agents = [ + nfsp.NFSP(sess, idx, info_state_size, num_actions, hidden_layers_sizes, + **kwargs) for idx in range(num_players) + ] + joint_avg_policy = NFSPPolicies(env, agents, nfsp.MODE.average_policy) + + sess.run(tf.global_variables_initializer()) + + if FLAGS.use_checkpoints: + for agent in agents: + if agent.has_checkpoint(FLAGS.checkpoint_dir): + agent.restore(FLAGS.checkpoint_dir) + + for ep in range(FLAGS.num_train_episodes): + if (ep + 1) % FLAGS.eval_every == 0: + losses = [agent.loss for agent in agents] + logging.info("Losses: %s", losses) + if FLAGS.evaluation_metric == "exploitability": + # Avg exploitability is implemented only for 2 players constant-sum + # games, use nash_conv otherwise. + expl = exploitability.exploitability(env.game, joint_avg_policy) + logging.info("[%s] Exploitability AVG %s", ep + 1, expl) + elif FLAGS.evaluation_metric == "nash_conv": + nash_conv = exploitability.nash_conv(env.game, joint_avg_policy) + logging.info("[%s] NashConv %s", ep + 1, nash_conv) + else: + raise ValueError(" ".join(("Invalid evaluation metric, choose from", + "'exploitability', 'nash_conv'."))) + if FLAGS.use_checkpoints: + for agent in agents: + agent.save(FLAGS.checkpoint_dir) + logging.info("_____________________________________________") + + time_step = env.reset() + while not time_step.last(): + player_id = time_step.observations["current_player"] + agent_output = agents[player_id].step(time_step) + action_list = [agent_output.action] + time_step = env.step(action_list) + + # Episode is over, step all agents with final info state. + for agent in agents: + agent.step(time_step) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/lewis_signaling_dqn_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/lewis_signaling_dqn_example.py new file mode 100644 index 0000000..1eb0d6d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/lewis_signaling_dqn_example.py @@ -0,0 +1,265 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""DQN example on Lewis Signaling Game.""" + +import copy +from absl import app +from absl import flags +import numpy as np +import tensorflow.compat.v1 as tf + +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import dqn + +FLAGS = flags.FLAGS + +# Env parameters +flags.DEFINE_integer("num_states", 3, "Number of states and actions") +flags.DEFINE_integer("num_messages", 3, "Number of messages") +flags.DEFINE_string("payoffs", "1, 0, 0, 0, 1, 0, 0, 0, 1", + "Payoffs to use ('random' for random [0, 1) payoffs)") + +# Alg parameters +flags.DEFINE_integer("num_episodes", 50000, "Number of train episodes") +flags.DEFINE_float("step_size", 0.1, "Step size for updates") +flags.DEFINE_float("eps_init", 1.0, "Initial value of epsilon") +flags.DEFINE_float("eps_final", 0.0, "Final value of epsilon") +flags.DEFINE_integer("eps_decay_steps", 49000, + "Number of episodes to decay epsilon") +flags.DEFINE_integer("replay_buffer_capacity", int(1e4), + "Size of replay buffer") + +# Misc parameters +flags.DEFINE_integer("num_runs", 10, "Number of repetitions") +flags.DEFINE_integer("log_interval", 100, + "Number of episodes between each logging") +flags.DEFINE_bool("plot", False, "Set to plot the graphs") + + +def main(_): + game = "lewis_signaling" + num_players = 2 + + num_states = FLAGS.num_states + num_messages = FLAGS.num_messages + if FLAGS.payoffs == "random": + payoffs = np.random.random((num_states, num_states)) + payoffs_str = ",".join([str(x) for x in payoffs.flatten()]) + elif FLAGS.payoffs == "climbing": + # This is a particular payoff matrix that is hard for decentralized + # algorithms. Introduced in C. Claus and C. Boutilier, "The dynamics of + # reinforcement learning in cooperative multiagent systems", 1998, for + # simultaneous action games, but it is difficult even in the case of + # signaling games. + payoffs = np.array([[11, -30, 0], [-30, 7, 6], [0, 0, 5]]) / 30 + payoffs_str = ",".join([str(x) for x in payoffs.flatten()]) + else: + payoffs_str = FLAGS.payoffs + try: + payoffs_list = [float(x) for x in payoffs_str.split(",")] + payoffs = np.array(payoffs_list).reshape((num_states, num_states)) + except ValueError: + raise ValueError( + "There should be {} (states * actions) elements in payoff. " + "Found {} elements" + .format(num_states * num_states, len(payoffs_list))) from None + + env_configs = { + "num_states": num_states, + "num_messages": num_messages, + "payoffs": payoffs_str + } + + env = rl_environment.Environment(game, **env_configs) + state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + replay_buffer_capacity = FLAGS.replay_buffer_capacity + + # Results to store + num_runs = FLAGS.num_runs + training_episodes = FLAGS.num_episodes + log_interval = FLAGS.log_interval + rewards = np.zeros((num_runs, training_episodes // log_interval)) + opts = np.zeros((num_runs, training_episodes // log_interval)) + converge_point = np.zeros((num_states, num_states)) + percent_opt = 0 + + # Repeat the experiment num_runs times + for i in range(num_runs): + with tf.Session() as sess: + # pylint: disable=g-complex-comprehension + agents = [ + dqn.DQN( + sess, + player_id=idx, + state_representation_size=state_size, + num_actions=num_actions, + learning_rate=FLAGS.step_size, + replay_buffer_capacity=replay_buffer_capacity, + epsilon_start=FLAGS.eps_init, + epsilon_end=FLAGS.eps_final, + epsilon_decay_duration=FLAGS.eps_decay_steps * 2) + for idx in range(num_players) + ] + + # 1. Train the agents + for cur_episode in range(training_episodes): + time_step = env.reset() + # Find cur_state for logging. See lewis_signaling.cc for info_state + # details + cur_state = time_step.observations["info_state"][0][3:].index(1) + while not time_step.last(): + player_id = time_step.observations["current_player"] + agent_output = agents[player_id].step(time_step) + time_step = env.step([agent_output.action]) + + # Episode is over, step all agents with final info state. + for agent in agents: + agent.step(time_step) + + # Store rewards + reward = time_step.rewards[0] + max_reward = payoffs[cur_state].max() + cur_idx = (i, cur_episode // log_interval) + rewards[cur_idx] += reward / log_interval + opts[cur_idx] += np.isclose(reward, max_reward) / log_interval + + base_info_state0 = [1.0, 0.0, 0.0] + [0.0] * num_states + base_info_state1 = [0.0, 1.0, 0.0] + [0.0] * num_states + + for s in range(num_states): + info_state0 = copy.deepcopy(base_info_state0) + info_state0[3 + s] = 1.0 + # pylint: disable=protected-access + m, _ = agents[0]._epsilon_greedy(info_state0, np.arange(num_messages), + 0) + info_state1 = copy.deepcopy(base_info_state1) + info_state1[3 + m] = 1.0 + a, _ = agents[1]._epsilon_greedy(info_state1, np.arange(num_states), 0) + converge_point[s, a] += 1 + best_act = payoffs[s].argmax() + percent_opt += int(a == best_act) / num_runs / num_states + + if FLAGS.plot: + # pylint: disable=g-import-not-at-top + import matplotlib as mpl + import matplotlib.pyplot as plt + from scipy import stats + + params = { + "font.size": 13, + "axes.labelsize": 13, + "xtick.labelsize": 13, + "ytick.labelsize": 13, + } + mpl.rcParams.update(params) + + def init_fig(): + fig, ax = plt.subplots(1, 1) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + return fig, ax + + def plot_scalars(scalars, + repetition_axis=0, + scalar_labels=None, + title=None, + ax_labels=None): + """Plots scalar on ax by filling 1 standard error. + + Args: + scalars: List of scalars to plot (mean taken over repetition + axis) + repetition_axis: Axis to take the mean over + scalar_labels: Labels for the scalars (for legend) + title: Figure title + ax_labels: Labels for x and y axis (list of 2 strings) + """ + if not all([len(s.shape) == 2 for s in scalars]): + raise ValueError("Only 2D arrays supported for plotting") + + if scalar_labels is None: + scalar_labels = [None] * len(scalars) + + if len(scalars) != len(scalar_labels): + raise ValueError( + "Wrong number of scalar labels, expected {} but received {}".format( + len(scalars), len(scalar_labels))) + + _, plot_axis = init_fig() + for i, scalar in enumerate(scalars): + xs = np.arange(scalar.shape[1 - repetition_axis]) * FLAGS.log_interval + mean = scalar.mean(axis=repetition_axis) + sem = stats.sem(scalar, axis=repetition_axis) + plot_axis.plot(xs, mean, label=scalar_labels[i]) + plot_axis.fill_between(xs, mean - sem, mean + sem, alpha=0.5) + + if title is not None: + plot_axis.set_title(title) + if ax_labels is not None: + plot_axis.set_xlabel(ax_labels[0]) + plot_axis.set_ylabel(ax_labels[1]) + + def plot_confusion_matrix(cm, cmap=plt.cm.Blues, title=None): + """Plot the confusion matrix. + + Args: + cm (np.ndarray): Confusion matrix to plot + cmap: Color map to be used in matplotlib's imshow + title: Figure title + + Returns: + Figure and axis on which the confusion matrix is plotted. + """ + fig, ax = plt.subplots() + ax.imshow(cm, interpolation="nearest", cmap=cmap) + ax.set_xticks([]) + ax.set_yticks([]) + ax.set_xlabel("Receiver's action", fontsize=14) + ax.set_ylabel("Sender's state", fontsize=14) + # Loop over data dimensions and create text annotations. + fmt = "d" + thresh = cm.max() / 2. + for i in range(cm.shape[0]): + for j in range(cm.shape[1]): + ax.text( + j, + i, + format(cm[i, j], fmt), + ha="center", + va="center", + color="white" if cm[i, j] > thresh else "black") + fig.tight_layout() + if title is not None: + ax.set_title(title) + return fig, ax + + plot_scalars([rewards], + title="Reward graph (DQN)", + ax_labels=["Episodes", "Reward per episode"]) + plot_scalars([opts], + title="Percentage of optimal actions (DQN)", + ax_labels=["Episodes", "% optimal actions"]) + + plot_confusion_matrix( + converge_point.astype(int), title="Final policy (DQN)") + + plt.show() + + return percent_opt + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/masked_softmax.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/masked_softmax.py new file mode 100644 index 0000000..63c171b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/masked_softmax.py @@ -0,0 +1,58 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Various masked_softmax implementations, both in numpy and tensorflow.""" + +import numpy as np +import tensorflow.compat.v1 as tf + +# Temporarily disable TF2 behavior until the code is updated. +tf.disable_v2_behavior() + + +def tf_masked_softmax(logits, legal_actions_mask): + """Returns the softmax over the valid actions defined by `legal_actions_mask`. + + Args: + logits: A tensor [..., num_actions] (e.g. [num_actions] or [B, num_actions]) + representing the logits to mask. + legal_actions_mask: The legal action mask, same shape as logits. 1 means + it's a legal action, 0 means it's illegal. If can be a tensorflow or numpy + tensor. + """ + # This will raise a warning as we are taking the log of 0, which sets the 0 + # values to -inf. However, this is fine, as we then apply tf.exp, which sets + # tf.exp(-inf) to 0. e.g. if we have logits [5, 3, 1], with legal_mask + # [0, 1, 1], then masked_logits == [-inf, 3, 1], so we subtract the max to + # get [-inf, 0, -2], and apply tf.exp to get [0, 1, e^-2]. + legal_actions_mask = tf.cast(legal_actions_mask, dtype=logits.dtype) + masked_logits = logits + tf.log(legal_actions_mask) + max_logit = tf.reduce_max(masked_logits, axis=-1, keepdims=True) + exp_logit = tf.exp(masked_logits - max_logit) + return exp_logit / tf.reduce_sum(exp_logit, axis=-1, keepdims=True) + + +def np_masked_softmax(logits, legal_actions_mask): + """Returns the softmax over the valid actions defined by `legal_actions_mask`. + + Args: + logits: A tensor [..., num_actions] (e.g. [num_actions] or [B, num_actions]) + representing the logits to mask. + legal_actions_mask: The legal action mask, same shape as logits. 1 means + it's a legal action, 0 means it's illegal. + """ + masked_logits = logits + np.log(legal_actions_mask) + max_logit = np.amax(masked_logits, axis=-1, keepdims=True) + exp_logit = np.exp(masked_logits - max_logit) + return exp_logit / np.sum(exp_logit, axis=-1, keepdims=True) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/masked_softmax_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/masked_softmax_test.py new file mode 100644 index 0000000..be22adb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/masked_softmax_test.py @@ -0,0 +1,162 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np +import tensorflow.compat.v1 as tf + +from open_spiel.python.algorithms import masked_softmax + +# Temporarily disable TF2 behavior until the code is updated. +tf.disable_v2_behavior() + + +exp = math.exp # For shorter lines + +_BATCH_INPUTS = np.asarray([ + [1.0, 2.0, 3.0], + [4.0, 5.0, 6.0], + [7.0, 8.0, 9.0], + [10.0, 11.0, 12.0], + [13.0, 14.0, 15.0], + [16.0, 17.0, 18.0], +]) +_BATCH_MASK = np.asarray([ + [1.0, 1.0, 1.0], + [1.0, 0.0, 1.0], + [0.0, 1.0, 1.0], + [0.0, 0.0, 1.0], + [0.0, 1.0, 0.0], + [1.0, 0.0, 0.0], +]) +total_row_0 = exp(1) + exp(2) + exp(3) +total_row_1 = exp(4) + exp(6) +total_row_2 = exp(8) + exp(9) +# pyformat: disable +_BATCH_EXPECTED = np.asarray([ + [exp(1) / total_row_0, exp(2) / total_row_0, exp(3) / total_row_0], + [exp(4) / total_row_1, 0, exp(6) / total_row_1], + [0, exp(8) / total_row_2, exp(9) / total_row_2], + [0, 0, 1], + [0, 1, 0], + [1, 0, 0], +]) +# pyformat: enable + +# The following provides a 2-batch set of time-sequence policies. +# [B, T, num_actions] = 2, 3, 3 +_B_T_LOGITS = np.asarray([[ + [1.0, 2.0, 3.0], + [4.0, 5.0, 6.0], + [7.0, 8.0, 9.0], +], [ + [10.0, 11.0, 12.0], + [13.0, 14.0, 15.0], + [16.0, 17.0, 18.0], +]]) +_B_T_MASK = np.asarray([[ + [1.0, 1.0, 1.0], + [1.0, 0.0, 1.0], + [0.0, 1.0, 1.0], +], [ + [0.0, 0.0, 1.0], + [0.0, 1.0, 0.0], + [1.0, 0.0, 0.0], +]]) +_B_T_EXPECTED = np.asarray([[ + [exp(1) / total_row_0, + exp(2) / total_row_0, + exp(3) / total_row_0], + [exp(4) / total_row_1, 0, exp(6) / total_row_1], + [0, exp(8) / total_row_2, exp(9) / total_row_2], +], [ + [0, 0, 1], + [0, 1, 0], + [1, 0, 0], +]]) +array = np.asarray +# We test over all the above examples. +_ALL_TESTS_INPUTS = [ + # Non-batch inputs + (array([1., 1.]), array([1., 1.]), array([.5, .5])), + (array([1., 1.]), array([0., 1.]), array([0., 1.])), + (array([1., 1.]), array([1., 0.]), array([1., 0.])), + (array([1., 1., 1]), array([1., 1., 0]), array([.5, .5, 0.])), + # Batch-inputs + (_BATCH_INPUTS, _BATCH_MASK, _BATCH_EXPECTED), + # Batch-time inputs + (_B_T_LOGITS, _B_T_MASK, _B_T_EXPECTED), +] + + +class MaskedSoftmaxTest(parameterized.TestCase, absltest.TestCase): + + @parameterized.parameters(_ALL_TESTS_INPUTS) + def test_np_masked_softmax(self, logits, legal_actions, expected): + np.testing.assert_array_almost_equal( + expected, masked_softmax.np_masked_softmax(logits, legal_actions)) + + @parameterized.parameters(_ALL_TESTS_INPUTS) + def test_tf_masked_softmax(self, np_logits, np_legal_actions, expected): + logits = tf.Variable(np_logits, tf.float32) + mask = tf.Variable(np_legal_actions, tf.float32) + + policy = masked_softmax.tf_masked_softmax(logits, mask) + + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + np_policy = sess.run(policy) + + np.testing.assert_array_almost_equal(expected, np_policy) + + def test_masked_softmax_on_all_invalid_moves(self): + # If all actions are illegal, the behavior is undefined (it can be nan + # or can be 0. We add this test to document this behavior and know if we + # change it. + np_logits = np.asarray([[ + [1.0, 1.0, 1.0], + [0.0, 0.0, 0.0], + [0.0, 0.0, 0.0], + ]]) + logits = tf.Variable(np_logits, tf.float32) + np_mask = np.asarray([[ + [1.0, 1.0, 1.0], + [1.0, 0.0, 1.0], + [0.0, 0.0, 0.0], + ]]) + mask = tf.Variable(np_mask, tf.float32) + + expected = np.asarray([[ + [1 / 3, 1 / 3, 1 / 3], + [1 / 2, 0.0, 1 / 2], + [np.nan, np.nan, np.nan], + ]]) + + policy = masked_softmax.tf_masked_softmax(logits, mask) + + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + np_policy = sess.run(policy) + np.testing.assert_array_almost_equal(expected, np_policy) + + # Numpy behaves similarly. + np.testing.assert_array_almost_equal( + expected, masked_softmax.np_masked_softmax(np_logits, np_mask)) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/neurd.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/neurd.py new file mode 100644 index 0000000..8f80092 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/neurd.py @@ -0,0 +1,281 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Neural Replicator Dynamics [Omidshafiei et al, 2019]. + +A policy gradient-like extension to replicator dynamics and the hedge algorithm +that incorporates function approximation. + +# References + +Shayegan Omidshafiei, Daniel Hennes, Dustin Morrill, Remi Munos, + Julien Perolat, Marc Lanctot, Audrunas Gruslys, Jean-Baptiste Lespiau, + Karl Tuyls. Neural Replicator Dynamics. https://arxiv.org/abs/1906.00190. + 2019. +""" + +import numpy as np +import tensorflow.compat.v1 as tf + +from open_spiel.python.algorithms import rcfr + +# Temporarily disable TF2 behavior while the code is not updated. +tf.disable_v2_behavior() + + +def thresholded(logits, regrets, threshold=2.0): + """Zeros out `regrets` where `logits` are too negative or too large.""" + can_decrease = tf.cast(tf.greater(logits, -threshold), tf.float32) + can_increase = tf.cast(tf.less(logits, threshold), tf.float32) + regrets_negative = tf.minimum(regrets, 0.0) + regrets_positive = tf.maximum(regrets, 0.0) + return can_decrease * regrets_negative + can_increase * regrets_positive + + +@tf.function +def train(model, + data, + batch_size, + step_size=1.0, + threshold=2.0, + random_shuffle_size=None, + autoencoder_loss=None): + """Train NeuRD `model` on `data`.""" + if random_shuffle_size is None: + random_shuffle_size = 10 * batch_size + data = data.shuffle(random_shuffle_size) + data = data.batch(batch_size) + data = data.repeat(1) + + for x, regrets in data: + with tf.GradientTape() as tape: + output = model(x, training=True) + logits = output[:, :1] + logits = logits - tf.reduce_mean(logits, keepdims=True) + + regrets = tf.stop_gradient( + thresholded(logits, regrets, threshold=threshold)) + utility = tf.reduce_mean(logits * regrets) + + if autoencoder_loss is not None: + utility = utility - autoencoder_loss(x, output[:, 1:]) + + grad = tape.gradient(utility, model.trainable_variables) + + for i, var in enumerate(model.trainable_variables): + var.assign_add(step_size * grad[i]) + + +class DeepNeurdModel(object): + """A flexible deep feedforward NeuRD model class. + + Properties: + layers: The `tf.keras.Layer` layers describing this model. + trainable_variables: The trainable `tf.Variable`s in this model's `layers`. + losses: This model's layer specific losses (e.g. regularizers). + """ + + def __init__(self, + game, + num_hidden_units, + num_hidden_layers=1, + num_hidden_factors=0, + hidden_activation=tf.nn.relu, + use_skip_connections=False, + regularizer=None, + autoencode=False): + """Creates a new `DeepNeurdModel. + + Args: + game: The OpenSpiel game being solved. + num_hidden_units: The number of units in each hidden layer. + num_hidden_layers: The number of hidden layers. Defaults to 1. + num_hidden_factors: The number of hidden factors or the matrix rank of the + layer. If greater than zero, hidden layers will be split into two + separate linear transformations, the first with + `num_hidden_factors`-columns and the second with + `num_hidden_units`-columns. The result is that the logical hidden layer + is a rank-`num_hidden_units` matrix instead of a rank-`num_hidden_units` + matrix. When `num_hidden_units < num_hidden_units`, this is effectively + implements weight sharing. Defaults to 0. + hidden_activation: The activation function to apply over hidden layers. + Defaults to `tf.nn.relu`. + use_skip_connections: Whether or not to apply skip connections (layer + output = layer(x) + x) on hidden layers. Zero padding or truncation is + used to match the number of columns on layer inputs and outputs. + regularizer: A regularizer to apply to each layer. Defaults to `None`. + autoencode: Whether or not to output a reconstruction of the inputs upon + being called. Defaults to `False`. + """ + + self._autoencode = autoencode + self._use_skip_connections = use_skip_connections + self._hidden_are_factored = num_hidden_factors > 0 + + self.layers = [] + for _ in range(num_hidden_layers): + if self._hidden_are_factored: + self.layers.append( + tf.keras.layers.Dense( + num_hidden_factors, + use_bias=True, + kernel_regularizer=regularizer)) + + self.layers.append( + tf.keras.layers.Dense( + num_hidden_units, + use_bias=True, + activation=hidden_activation, + kernel_regularizer=regularizer)) + + self.layers.append( + tf.keras.layers.Dense( + 1 + self._autoencode * rcfr.num_features(game), + use_bias=True, + kernel_regularizer=regularizer)) + + # Construct variables for all layers by exercising the network. + x = tf.zeros([1, rcfr.num_features(game)]) + for layer in self.layers: + x = layer(x) + + self.trainable_variables = sum( + [layer.trainable_variables for layer in self.layers], []) + self.losses = sum([layer.losses for layer in self.layers], []) + + def __call__(self, x, training=False): + """Evaluates this model on x. + + Args: + x: Model input. + training: Whether or not this is being called during training. If + `training` and the constructor argument `autoencode` was `True`, then + the output will contain the estimated regrets concatenated with a + reconstruction of the input, otherwise only regrets will be returned. + Defaults to `False`. + + Returns: + The `tf.Tensor` resulting from evaluating this model on `x`. If + `training` and the constructor argument `autoencode` was `True`, then + it will contain the estimated regrets concatenated with a + reconstruction of the input, otherwise only regrets will be returned. + """ + y = rcfr.feedforward_evaluate( + layers=self.layers, + x=x, + use_skip_connections=self._use_skip_connections, + hidden_are_factored=self._hidden_are_factored) + return y if training else y[:, :1] + + +class CounterfactualNeurdSolver(object): + """All-actions, strong NeuRD on counterfactual regrets. + + No regularization bonus is applied, so the current policy likely will not + converge. The average policy profile is updated and stored in a full + game-size table and may converge to an approximate Nash equilibrium in + two-player, zero-sum games. + """ + + def __init__(self, game, models, session=None): + """Creates a new `CounterfactualNeurdSolver`. + + Args: + game: An OpenSpiel `Game`. + models: Current policy models (optimizable array-like -> `tf.Tensor` + callables) for both players. + session: A TensorFlow `Session` to convert sequence weights from + `tf.Tensor`s produced by `models` to `np.array`s. If `None`, it is + assumed that eager mode is enabled. Defaults to `None`. + """ + self._game = game + self._models = models + self._root_wrapper = rcfr.RootStateWrapper(game.new_initial_state()) + self._session = session + + self._cumulative_seq_probs = [ + np.zeros(n) for n in self._root_wrapper.num_player_sequences + ] + + def _sequence_weights(self, player=None): + """Returns exponentiated weights for each sequence as an `np.array`.""" + if player is None: + return [ + self._sequence_weights(player) + for player in range(self._game.num_players()) + ] + else: + tensor = tf.squeeze(self._models[player]( + self._root_wrapper.sequence_features[player])) + tensor = tensor - tf.reduce_max(tensor, keepdims=True) + tensor = tf.math.exp(tensor) + return tensor.numpy() if self._session is None else self._session(tensor) + + def current_policy(self): + """Returns the current policy profile. + + Returns: + A `dict>` that maps info state + strings to `Action`-probability pairs describing each player's policy. + """ + return self._root_wrapper.sequence_weights_to_tabular_profile( + self._sequence_weights()) + + def average_policy(self): + """Returns the average of all policies iterated. + + The policy is computed using the accumulated policy probabilities computed + using `evaluate_and_update_policy`. + + Returns: + A `dict>` that maps info state + strings to (Action, probability) pairs describing each player's policy. + """ + return self._root_wrapper.sequence_weights_to_tabular_profile( + self._cumulative_seq_probs) + + def _previous_player(self, player): + """The previous player in the turn ordering.""" + return player - 1 if player > 0 else self._game.num_players() - 1 + + def _average_policy_update_player(self, regret_player): + """The player for whom the average policy should be updated.""" + return self._previous_player(regret_player) + + def evaluate_and_update_policy(self, train_fn): + """Performs a single step of policy evaluation and policy improvement. + + Args: + train_fn: A (model, `tf.data.Dataset`) function that trains the given + regression model to accurately reproduce the x to y mapping given x-y + data. + """ + sequence_weights = self._sequence_weights() + player_seq_features = self._root_wrapper.sequence_features + for regret_player in range(self._game.num_players()): + seq_prob_player = self._average_policy_update_player(regret_player) + + regrets, seq_probs = ( + self._root_wrapper.counterfactual_regrets_and_reach_weights( + regret_player, seq_prob_player, *sequence_weights)) + + self._cumulative_seq_probs[seq_prob_player] += seq_probs + + targets = tf.expand_dims(regrets.astype('float32'), axis=1) + data = tf.data.Dataset.from_tensor_slices( + (player_seq_features[regret_player], targets)) + + regret_player_model = self._models[regret_player] + train_fn(regret_player_model, data) + sequence_weights[regret_player] = self._sequence_weights(regret_player) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/neurd_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/neurd_example.py new file mode 100644 index 0000000..c646f07 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/neurd_example.py @@ -0,0 +1,96 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example use of the NeuRD algorithm on Kuhn Poker. + +This NeuRD implementation does not use an entropy bonus to ensure that the +current joint policy approaches an equilibrium in zero-sum games, but it +tracks the exact tabular average so that the average policy approaches an +equilibrium (assuming the policy networks train well). +""" + +from absl import app +from absl import flags +import tensorflow.compat.v1 as tf + +from open_spiel.python.algorithms import neurd +import pyspiel + +tf.enable_eager_execution() + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("iterations", 1000, "Number of iterations") +flags.DEFINE_string("game", "kuhn_poker", "Name of the game") +flags.DEFINE_integer("players", 2, "Number of players") +flags.DEFINE_integer("print_freq", 100, "How often to print the exploitability") +flags.DEFINE_integer("num_hidden_layers", 1, + "The number of hidden layers in the policy model.") +flags.DEFINE_integer("num_hidden_units", 13, + "The number of hidden layers in the policy model.") +flags.DEFINE_integer( + "num_hidden_factors", 8, + "The number of factors in each hidden layer in the policy model.") +flags.DEFINE_boolean( + "use_skip_connections", True, + "Whether or not to use skip connections in the policy model.") +flags.DEFINE_integer("batch_size", 100, "The policy model training batch size.") +flags.DEFINE_float( + "threshold", 2., + "Logits of the policy model will be discouraged from growing beyond " + "`threshold`.") +flags.DEFINE_float("step_size", 1.0, "Policy model step size.") +flags.DEFINE_boolean( + "autoencode", False, + "Whether or not to augment the policy model with outputs that attempt to " + "reproduce the model inputs. The policy model is updated online so " + "training with the reproduction error as an auxiliary task helps to keep " + "the model stable in the absence of an entropy bonus.") + + +def main(_): + game = pyspiel.load_game(FLAGS.game, {"players": FLAGS.players}) + + models = [] + for _ in range(game.num_players()): + models.append( + neurd.DeepNeurdModel( + game, + num_hidden_layers=FLAGS.num_hidden_layers, + num_hidden_units=FLAGS.num_hidden_units, + num_hidden_factors=FLAGS.num_hidden_factors, + use_skip_connections=FLAGS.use_skip_connections, + autoencode=FLAGS.autoencode)) + + solver = neurd.CounterfactualNeurdSolver(game, models) + + def _train(model, data): + neurd.train( + model, + data, + batch_size=FLAGS.batch_size, + step_size=FLAGS.step_size, + threshold=FLAGS.threshold, + autoencoder_loss=(tf.compat.v1.losses.huber_loss + if FLAGS.autoencode else None)) + + for i in range(FLAGS.iterations): + solver.evaluate_and_update_policy(_train) + if i % FLAGS.print_freq == 0: + conv = pyspiel.exploitability(game, solver.average_policy()) + print("Iteration {} exploitability {}".format(i, conv)) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/neurd_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/neurd_test.py new file mode 100644 index 0000000..89fa4f5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/neurd_test.py @@ -0,0 +1,72 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import absltest + +import tensorflow.compat.v1 as tf + +from open_spiel.python.algorithms import neurd +import pyspiel + +# Temporarily disable TF2 behavior while the code is not updated. +tf.disable_v2_behavior() + +tf.enable_eager_execution() + +_GAME = pyspiel.load_game('kuhn_poker') + + +def _new_model(): + return neurd.DeepNeurdModel( + _GAME, + num_hidden_layers=1, + num_hidden_units=13, + num_hidden_factors=1, + use_skip_connections=True, + autoencode=True) + + +class NeurdTest(tf.test.TestCase): + + def setUp(self): + super(NeurdTest, self).setUp() + tf.set_random_seed(42) + + def test_neurd(self): + num_iterations = 2 + models = [_new_model() for _ in range(_GAME.num_players())] + + solver = neurd.CounterfactualNeurdSolver(_GAME, models) + + average_policy = solver.average_policy() + self.assertGreater(pyspiel.nash_conv(_GAME, average_policy), 0.91) + + @tf.function + def _train(model, data): + neurd.train( + model=model, + data=data, + batch_size=12, + step_size=10.0, + autoencoder_loss=tf.losses.huber_loss) + + for _ in range(num_iterations): + solver.evaluate_and_update_policy(_train) + + average_policy = solver.average_policy() + self.assertLess(pyspiel.nash_conv(_GAME, average_policy), 0.91) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/nfsp.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/nfsp.py new file mode 100644 index 0000000..dedde62 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/nfsp.py @@ -0,0 +1,378 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Neural Fictitious Self-Play (NFSP) agent implemented in TensorFlow. + +See the paper https://arxiv.org/abs/1603.01121 for more details. +""" + +import collections +import contextlib +import enum +import os +import random +from absl import logging +import numpy as np +import tensorflow.compat.v1 as tf + +from open_spiel.python import rl_agent +from open_spiel.python import simple_nets +from open_spiel.python.algorithms.tf import dqn + +# Temporarily disable TF2 behavior until code is updated. +tf.disable_v2_behavior() + +Transition = collections.namedtuple( + "Transition", "info_state action_probs legal_actions_mask") + +MODE = enum.Enum("mode", "best_response average_policy") + + +class NFSP(rl_agent.AbstractAgent): + """NFSP Agent implementation in TensorFlow. + + See open_spiel/python/examples/kuhn_nfsp.py for an usage example. + """ + + def __init__(self, + session, + player_id, + state_representation_size, + num_actions, + hidden_layers_sizes, + reservoir_buffer_capacity, + anticipatory_param, + batch_size=128, + rl_learning_rate=0.01, + sl_learning_rate=0.01, + min_buffer_size_to_learn=1000, + learn_every=64, + optimizer_str="sgd", + **kwargs): + """Initialize the `NFSP` agent.""" + self.player_id = player_id + self._session = session + self._num_actions = num_actions + self._layer_sizes = hidden_layers_sizes + self._batch_size = batch_size + self._learn_every = learn_every + self._anticipatory_param = anticipatory_param + self._min_buffer_size_to_learn = min_buffer_size_to_learn + + self._reservoir_buffer = ReservoirBuffer(reservoir_buffer_capacity) + self._prev_timestep = None + self._prev_action = None + + # Step counter to keep track of learning. + self._step_counter = 0 + + # Inner RL agent + kwargs.update({ + "batch_size": batch_size, + "learning_rate": rl_learning_rate, + "learn_every": learn_every, + "min_buffer_size_to_learn": min_buffer_size_to_learn, + "optimizer_str": optimizer_str, + }) + self._rl_agent = dqn.DQN(session, player_id, state_representation_size, + num_actions, hidden_layers_sizes, **kwargs) + + # Keep track of the last training loss achieved in an update step. + self._last_rl_loss_value = lambda: self._rl_agent.loss + self._last_sl_loss_value = None + + # Placeholders. + self._info_state_ph = tf.placeholder( + shape=[None, state_representation_size], + dtype=tf.float32, + name="info_state_ph") + + self._action_probs_ph = tf.placeholder( + shape=[None, num_actions], dtype=tf.float32, name="action_probs_ph") + + self._legal_actions_mask_ph = tf.placeholder( + shape=[None, num_actions], + dtype=tf.float32, + name="legal_actions_mask_ph") + + # Average policy network. + self._avg_network = simple_nets.MLP(state_representation_size, + self._layer_sizes, num_actions) + self._avg_policy = self._avg_network(self._info_state_ph) + self._avg_policy_probs = tf.nn.softmax(self._avg_policy) + + self._savers = [ + ("q_network", tf.train.Saver(self._rl_agent._q_network.variables)), + ("avg_network", tf.train.Saver(self._avg_network.variables)) + ] + + # Loss + self._loss = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits_v2( + labels=tf.stop_gradient(self._action_probs_ph), + logits=self._avg_policy)) + + if optimizer_str == "adam": + optimizer = tf.train.AdamOptimizer(learning_rate=sl_learning_rate) + elif optimizer_str == "sgd": + optimizer = tf.train.GradientDescentOptimizer( + learning_rate=sl_learning_rate) + else: + raise ValueError("Not implemented. Choose from ['adam', 'sgd'].") + + self._learn_step = optimizer.minimize(self._loss) + self._sample_episode_policy() + + @contextlib.contextmanager + def temp_mode_as(self, mode): + """Context manager to temporarily overwrite the mode.""" + previous_mode = self._mode + self._mode = mode + yield + self._mode = previous_mode + + def get_step_counter(self): + return self._step_counter + + def _sample_episode_policy(self): + if np.random.rand() < self._anticipatory_param: + self._mode = MODE.best_response + else: + self._mode = MODE.average_policy + + def _act(self, info_state, legal_actions): + info_state = np.reshape(info_state, [1, -1]) + action_values, action_probs = self._session.run( + [self._avg_policy, self._avg_policy_probs], + feed_dict={self._info_state_ph: info_state}) + + self._last_action_values = action_values[0] + # Remove illegal actions, normalize probs + probs = np.zeros(self._num_actions) + probs[legal_actions] = action_probs[0][legal_actions] + probs_sum = sum(probs) + if probs_sum > 0: + probs /= probs_sum + else: + # If all probabilities are zero, use uniform distribution over legal actions + probs = np.zeros(self._num_actions) + probs[legal_actions] = 1.0 / len(legal_actions) + action = np.random.choice(len(probs), p=probs) + return action, probs + + @property + def mode(self): + return self._mode + + @property + def loss(self): + return (self._last_sl_loss_value, self._last_rl_loss_value()) + + def step(self, time_step, is_evaluation=False): + """Returns the action to be taken and updates the Q-networks if needed. + + Args: + time_step: an instance of rl_environment.TimeStep. + is_evaluation: bool, whether this is a training or evaluation call. + + Returns: + A `rl_agent.StepOutput` containing the action probs and chosen action. + """ + if self._mode == MODE.best_response: + agent_output = self._rl_agent.step(time_step, is_evaluation) + if not is_evaluation and not time_step.last(): + self._add_transition(time_step, agent_output) + + elif self._mode == MODE.average_policy: + # Act step: don't act at terminal info states. + if not time_step.last(): + info_state = time_step.observations["info_state"][self.player_id] + legal_actions = time_step.observations["legal_actions"][self.player_id] + action, probs = self._act(info_state, legal_actions) + agent_output = rl_agent.StepOutput(action=action, probs=probs) + + if self._prev_timestep and not is_evaluation: + self._rl_agent.add_transition(self._prev_timestep, self._prev_action, + time_step) + else: + raise ValueError("Invalid mode ({})".format(self._mode)) + + if not is_evaluation: + self._step_counter += 1 + + if self._step_counter % self._learn_every == 0: + self._last_sl_loss_value = self._learn() + # If learn step not triggered by rl policy, learn. + if self._mode == MODE.average_policy: + self._rl_agent.learn() + + # Prepare for the next episode. + if time_step.last(): + self._sample_episode_policy() + self._prev_timestep = None + self._prev_action = None + return + else: + self._prev_timestep = time_step + self._prev_action = agent_output.action + + return agent_output + + def _add_transition(self, time_step, agent_output): + """Adds the new transition using `time_step` to the reservoir buffer. + + Transitions are in the form (time_step, agent_output.probs, legal_mask). + + Args: + time_step: an instance of rl_environment.TimeStep. + agent_output: an instance of rl_agent.StepOutput. + """ + legal_actions = time_step.observations["legal_actions"][self.player_id] + legal_actions_mask = np.zeros(self._num_actions) + legal_actions_mask[legal_actions] = 1.0 + transition = Transition( + info_state=(time_step.observations["info_state"][self.player_id][:]), + action_probs=agent_output.probs, + legal_actions_mask=legal_actions_mask) + self._reservoir_buffer.add(transition) + + def _learn(self): + """Compute the loss on sampled transitions and perform a avg-network update. + + If there are not enough elements in the buffer, no loss is computed and + `None` is returned instead. + + Returns: + The average loss obtained on this batch of transitions or `None`. + """ + if (len(self._reservoir_buffer) < self._batch_size or + len(self._reservoir_buffer) < self._min_buffer_size_to_learn): + return None + + transitions = self._reservoir_buffer.sample(self._batch_size) + info_states = [t.info_state for t in transitions] + action_probs = [t.action_probs for t in transitions] + legal_actions_mask = [t.legal_actions_mask for t in transitions] + + loss, _ = self._session.run( + [self._loss, self._learn_step], + feed_dict={ + self._info_state_ph: info_states, + self._action_probs_ph: action_probs, + self._legal_actions_mask_ph: legal_actions_mask, + }) + return loss + + def _full_checkpoint_name(self, checkpoint_dir, name): + checkpoint_filename = "_".join([name, "pid" + str(self.player_id)]) + return os.path.join(checkpoint_dir, checkpoint_filename) + + def _latest_checkpoint_filename(self, name): + checkpoint_filename = "_".join([name, "pid" + str(self.player_id)]) + return checkpoint_filename + "_latest" + + def save(self, checkpoint_dir): + """Saves the average policy network and the inner RL agent's q-network. + + Note that this does not save the experience replay buffers and should + only be used to restore the agent's policy, not resume training. + + Args: + checkpoint_dir: directory where checkpoints will be saved. + """ + for name, saver in self._savers: + path = saver.save( + self._session, + self._full_checkpoint_name(checkpoint_dir, name), + latest_filename=self._latest_checkpoint_filename(name)) + logging.info("Saved to path: %s", path) + + def has_checkpoint(self, checkpoint_dir): + for name, _ in self._savers: + if tf.train.latest_checkpoint( + self._full_checkpoint_name(checkpoint_dir, name), + os.path.join(checkpoint_dir, + self._latest_checkpoint_filename(name))) is None: + return False + return True + + def restore(self, checkpoint_dir): + """Restores the average policy network and the inner RL agent's q-network. + + Note that this does not restore the experience replay buffers and should + only be used to restore the agent's policy, not resume training. + + Args: + checkpoint_dir: directory from which checkpoints will be restored. + """ + for name, saver in self._savers: + full_checkpoint_dir = self._full_checkpoint_name(checkpoint_dir, name) + logging.info("Restoring checkpoint: %s", full_checkpoint_dir) + saver.restore(self._session, full_checkpoint_dir) + + +class ReservoirBuffer(object): + """Allows uniform sampling over a stream of data. + + This class supports the storage of arbitrary elements, such as observation + tensors, integer actions, etc. + + See https://en.wikipedia.org/wiki/Reservoir_sampling for more details. + """ + + def __init__(self, reservoir_buffer_capacity): + self._reservoir_buffer_capacity = reservoir_buffer_capacity + self._data = [] + self._add_calls = 0 + + def add(self, element): + """Potentially adds `element` to the reservoir buffer. + + Args: + element: data to be added to the reservoir buffer. + """ + if len(self._data) < self._reservoir_buffer_capacity: + self._data.append(element) + else: + idx = np.random.randint(0, self._add_calls + 1) + if idx < self._reservoir_buffer_capacity: + self._data[idx] = element + self._add_calls += 1 + + def sample(self, num_samples): + """Returns `num_samples` uniformly sampled from the buffer. + + Args: + num_samples: `int`, number of samples to draw. + + Returns: + An iterable over `num_samples` random elements of the buffer. + + Raises: + ValueError: If there are less than `num_samples` elements in the buffer + """ + if len(self._data) < num_samples: + raise ValueError("{} elements could not be sampled from size {}".format( + num_samples, len(self._data))) + return random.sample(self._data, num_samples) + + def clear(self): + self._data = [] + self._add_calls = 0 + + def __len__(self): + return len(self._data) + + def __iter__(self): + return iter(self._data) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/nfsp_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/nfsp_test.py new file mode 100644 index 0000000..baddbf3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/nfsp_test.py @@ -0,0 +1,91 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tensorflow.compat.v1 as tf + +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import nfsp + +# Temporarily disable TF2 behavior until code is updated. +tf.disable_v2_behavior() + + +class NFSPTest(tf.test.TestCase): + + def test_run_kuhn(self): + env = rl_environment.Environment("kuhn_poker") + state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + with self.session() as sess: + agents = [ + nfsp.NFSP( # pylint: disable=g-complex-comprehension + sess, + player_id, + state_representation_size=state_size, + num_actions=num_actions, + hidden_layers_sizes=[16], + reservoir_buffer_capacity=10, + anticipatory_param=0.1) for player_id in [0, 1] + ] + sess.run(tf.global_variables_initializer()) + + for unused_ep in range(10): + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + current_agent = agents[current_player] + agent_output = current_agent.step(time_step) + time_step = env.step([agent_output.action]) + + for agent in agents: + agent.step(time_step) + + +class ReservoirBufferTest(tf.test.TestCase): + + def test_reservoir_buffer_add(self): + reservoir_buffer = nfsp.ReservoirBuffer(reservoir_buffer_capacity=10) + self.assertEmpty(reservoir_buffer) + reservoir_buffer.add("entry1") + self.assertLen(reservoir_buffer, 1) + reservoir_buffer.add("entry2") + self.assertLen(reservoir_buffer, 2) + + self.assertIn("entry1", reservoir_buffer) + self.assertIn("entry2", reservoir_buffer) + + def test_reservoir_buffer_max_capacity(self): + reservoir_buffer = nfsp.ReservoirBuffer(reservoir_buffer_capacity=2) + reservoir_buffer.add("entry1") + reservoir_buffer.add("entry2") + reservoir_buffer.add("entry3") + + self.assertLen(reservoir_buffer, 2) + + def test_reservoir_buffer_sample(self): + replay_buffer = nfsp.ReservoirBuffer(reservoir_buffer_capacity=3) + replay_buffer.add("entry1") + replay_buffer.add("entry2") + replay_buffer.add("entry3") + + samples = replay_buffer.sample(3) + + self.assertIn("entry1", samples) + self.assertIn("entry2", samples) + self.assertIn("entry3", samples) + + +if __name__ == "__main__": + tf.test.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/policy_gradient.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/policy_gradient.py new file mode 100644 index 0000000..8f51b7d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/policy_gradient.py @@ -0,0 +1,556 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3. +r"""Policy Gradient based agents implemented in TensorFlow. + +This class is composed of three policy gradient (PG) algorithms: + +- Q-based Policy Gradient (QPG): an "all-actions" advantage actor-critic +algorithm differing from A2C in that all action values are used to estimate the +policy gradient (as opposed to only using the action taken into account): + + baseline = \sum_a pi_a * Q_a + loss = - \sum_a pi_a * (Q_a - baseline) + +where (Q_a - baseline) is the usual advantage. QPG is also known as Mean +Actor-Critic (https://arxiv.org/abs/1709.00503). + + +- Regret policy gradient (RPG): a PG algorithm inspired by counterfactual regret +minimization (CFR). Unlike standard actor-critic methods (e.g. A2C), the loss is +defined purely in terms of thresholded regrets as follows: + + baseline = \sum_a pi_a * Q_a + loss = regret = \sum_a relu(Q_a - baseline) + +where gradients only flow through the action value (Q_a) part and are blocked on +the baseline part (which is trained separately by usual MSE loss). +The lack of negative sign in the front of the loss represents a switch from +gradient ascent on the score to descent on the loss. + + +- Regret Matching Policy Gradient (RMPG): inspired by regret-matching, the +policy gradient is by weighted by the thresholded regret: + + baseline = \sum_a pi_a * Q_a + loss = - \sum_a pi_a * relu(Q_a - baseline) + + +These algorithms were published in NeurIPS 2018. Paper title: "Actor-Critic +Policy Optimization in Partially Observable Multiagent Environment", the paper +is available at: https://arxiv.org/abs/1810.09026. + +- Advantage Actor Critic (A2C): The popular advantage actor critic (A2C) +algorithm. The algorithm uses the baseline (Value function) as a control variate +to reduce variance of the policy gradient. The loss is only computed for the +actions actually taken in the episode as opposed to a loss computed for all +actions in the variants above. + + advantages = returns - baseline + loss = -log(pi_a) * advantages + +The algorithm can be found in the textbook: +https://incompleteideas.net/book/RLbook2018.pdf under the chapter on +`Policy Gradients`. + +See open_spiel/python/algorithms/losses/rl_losses_test.py for an example of the +loss computation. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import collections +import os +from absl import logging +import numpy as np +import tensorflow.compat.v1 as tf + +from open_spiel.python import rl_agent +from open_spiel.python import simple_nets +from open_spiel.python.algorithms.losses import rl_losses + +# Temporarily disable TF2 behavior until we update the code. +tf.disable_v2_behavior() + +Transition = collections.namedtuple( + "Transition", "info_state action reward discount legal_actions_mask") + + +class PolicyGradient(rl_agent.AbstractAgent): + """RPG Agent implementation in TensorFlow. + + See open_spiel/python/examples/single_agent_catch.py for an usage example. + """ + + def __init__(self, + session, + player_id, + info_state_size, + num_actions, + loss_str="a2c", + loss_class=None, + hidden_layers_sizes=(128,), + batch_size=16, + critic_learning_rate=0.01, + pi_learning_rate=0.001, + entropy_cost=0.01, + num_critic_before_pi=8, + additional_discount_factor=1.0, + max_global_gradient_norm=None, + optimizer_str="sgd"): + """Initialize the PolicyGradient agent. + + Args: + session: Tensorflow session. + player_id: int, player identifier. Usually its position in the game. + info_state_size: int, info_state vector size. + num_actions: int, number of actions per info state. + loss_str: string or None. If string, must be one of ["rpg", "qpg", "rm", + "a2c"] and defined in `_get_loss_class`. If None, a loss class must be + passed through `loss_class`. Defaults to "a2c". + loss_class: Class or None. If Class, it must define the policy gradient + loss. If None a loss class in a string format must be passed through + `loss_str`. Defaults to None. + hidden_layers_sizes: iterable, defines the neural network layers. Defaults + to (128,), which produces a NN: [INPUT] -> [128] -> ReLU -> [OUTPUT]. + batch_size: int, batch size to use for Q and Pi learning. Defaults to 128. + critic_learning_rate: float, learning rate used for Critic (Q or V). + Defaults to 0.01. + pi_learning_rate: float, learning rate used for Pi. Defaults to 0.001. + entropy_cost: float, entropy cost used to multiply the entropy loss. Can + be set to None to skip entropy computation. Defaults to 0.01. + num_critic_before_pi: int, number of Critic (Q or V) updates before each + Pi update. Defaults to 8 (every 8th critic learning step, Pi also + learns). + additional_discount_factor: float, additional discount to compute returns. + Defaults to 1.0, in which case, no extra discount is applied. None that + users must provide *only one of* `loss_str` or `loss_class`. + max_global_gradient_norm: float or None, maximum global norm of a gradient + to which the gradient is shrunk if its value is larger. Defaults to + None. + optimizer_str: String defining which optimizer to use. Supported values + are {sgd, adam}. Defaults to sgd + """ + assert bool(loss_str) ^ bool(loss_class), "Please provide only one option." + self._kwargs = locals() + loss_class = loss_class if loss_class else self._get_loss_class(loss_str) + self._loss_class = loss_class + + self.player_id = player_id + self._session = session + self._num_actions = num_actions + self._layer_sizes = hidden_layers_sizes + self._batch_size = batch_size + self._extra_discount = additional_discount_factor + self._num_critic_before_pi = num_critic_before_pi + + self._episode_data = [] + self._dataset = collections.defaultdict(list) + self._prev_time_step = None + self._prev_action = None + + # Step counters + self._step_counter = 0 + self._episode_counter = 0 + self._num_learn_steps = 0 + + # Keep track of the last training loss achieved in an update step. + self._last_loss_value = None + + # Placeholders + self._info_state_ph = tf.placeholder( + shape=[None, info_state_size], dtype=tf.float32, name="info_state_ph") + self._action_ph = tf.placeholder( + shape=[None], dtype=tf.int32, name="action_ph") + self._return_ph = tf.placeholder( + shape=[None], dtype=tf.float32, name="return_ph") + + # Network + # activate final as we plug logit and qvalue heads afterwards. + self._net_torso = simple_nets.MLPTorso(info_state_size, self._layer_sizes) + torso_out = self._net_torso(self._info_state_ph) + torso_out_size = self._layer_sizes[-1] + self._policy_logits_layer = simple_nets.Linear( + torso_out_size, + self._num_actions, + activate_relu=False, + name="policy_head") + # Do not remove policy_logits_network. Even if it's not used directly here, + # other code outside this file refers to it. + self.policy_logits_network = simple_nets.Sequential( + [self._net_torso, self._policy_logits_layer]) + self._policy_logits = self._policy_logits_layer(torso_out) + self._policy_probs = tf.nn.softmax(self._policy_logits) + + self._savers = [] + + # Add baseline (V) head for A2C (or Q-head for QPG / RPG / RMPG) + if loss_class.__name__ == "BatchA2CLoss": + self._baseline_layer = simple_nets.Linear( + torso_out_size, 1, activate_relu=False, name="baseline") + self._baseline = tf.squeeze(self._baseline_layer(torso_out), axis=1) + else: + self._q_values_layer = simple_nets.Linear( + torso_out_size, + self._num_actions, + activate_relu=False, + name="q_values_head") + self._q_values = self._q_values_layer(torso_out) + + # Critic loss + # Baseline loss in case of A2C + if loss_class.__name__ == "BatchA2CLoss": + self._critic_loss = tf.reduce_mean( + tf.losses.mean_squared_error( + labels=self._return_ph, predictions=self._baseline)) + else: + # Q-loss otherwise. + action_indices = tf.stack( + [tf.range(tf.shape(self._q_values)[0]), self._action_ph], axis=-1) + value_predictions = tf.gather_nd(self._q_values, action_indices) + self._critic_loss = tf.reduce_mean( + tf.losses.mean_squared_error( + labels=self._return_ph, predictions=value_predictions)) + if optimizer_str == "adam": + self._critic_optimizer = tf.train.AdamOptimizer( + learning_rate=critic_learning_rate) + elif optimizer_str == "sgd": + self._critic_optimizer = tf.train.GradientDescentOptimizer( + learning_rate=critic_learning_rate) + else: + raise ValueError("Not implemented, choose from 'adam' and 'sgd'.") + + def minimize_with_clipping(optimizer, loss): + grads_and_vars = optimizer.compute_gradients(loss) + if max_global_gradient_norm is not None: + grads, variables = zip(*grads_and_vars) + grads, _ = tf.clip_by_global_norm(grads, max_global_gradient_norm) + grads_and_vars = list(zip(grads, variables)) + + return optimizer.apply_gradients(grads_and_vars) + + self._critic_learn_step = minimize_with_clipping(self._critic_optimizer, + self._critic_loss) + + # Pi loss + pg_class = loss_class(entropy_cost=entropy_cost) + if loss_class.__name__ == "BatchA2CLoss": + self._pi_loss = pg_class.loss( + policy_logits=self._policy_logits, + baseline=self._baseline, + actions=self._action_ph, + returns=self._return_ph) + else: + self._pi_loss = pg_class.loss( + policy_logits=self._policy_logits, action_values=self._q_values) + if optimizer_str == "adam": + self._pi_optimizer = tf.train.AdamOptimizer( + learning_rate=pi_learning_rate) + elif optimizer_str == "sgd": + self._pi_optimizer = tf.train.GradientDescentOptimizer( + learning_rate=pi_learning_rate) + + self._pi_learn_step = minimize_with_clipping(self._pi_optimizer, + self._pi_loss) + self._loss_str = loss_str + self._initialize() + + def _get_loss_class(self, loss_str): + if loss_str == "rpg": + return rl_losses.BatchRPGLoss + elif loss_str == "qpg": + return rl_losses.BatchQPGLoss + elif loss_str == "rm": + return rl_losses.BatchRMLoss + elif loss_str == "a2c": + return rl_losses.BatchA2CLoss + + def _act(self, info_state, legal_actions): + # Make a singleton batch for NN compatibility: [1, info_state_size] + info_state = np.reshape(info_state, [1, -1]) + policy_probs = self._session.run( + self._policy_probs, feed_dict={self._info_state_ph: info_state}) + + # Remove illegal actions, re-normalize probs + probs = np.zeros(self._num_actions) + probs[legal_actions] = policy_probs[0][legal_actions] + if sum(probs) != 0: + probs /= sum(probs) + else: + probs[legal_actions] = 1 / len(legal_actions) + action = np.random.choice(len(probs), p=probs) + return action, probs + + def step(self, time_step, is_evaluation=False): + """Returns the action to be taken and updates the network if needed. + + Args: + time_step: an instance of rl_environment.TimeStep. + is_evaluation: bool, whether this is a training or evaluation call. + Defaults to False. + + Returns: + A `rl_agent.StepOutput` containing the action probs and chosen action. + """ + # Act step: don't act at terminal info states or if its not our turn. + if (not time_step.last()) and ( + time_step.is_simultaneous_move() or + self.player_id == time_step.current_player()): + info_state = time_step.observations["info_state"][self.player_id] + legal_actions = time_step.observations["legal_actions"][self.player_id] + action, probs = self._act(info_state, legal_actions) + else: + action = None + probs = [] + + if not is_evaluation: + self._step_counter += 1 + + # Add data points to current episode buffer. + if self._prev_time_step: + self._add_transition(time_step) + + # Episode done, add to dataset and maybe learn. + if time_step.last(): + self._add_episode_data_to_dataset() + self._episode_counter += 1 + + if len(self._dataset["returns"]) >= self._batch_size: + self._critic_update() + self._num_learn_steps += 1 + if self._num_learn_steps % self._num_critic_before_pi == 0: + self._pi_update() + self._dataset = collections.defaultdict(list) + + self._prev_time_step = None + self._prev_action = None + return + else: + self._prev_time_step = time_step + self._prev_action = action + + return rl_agent.StepOutput(action=action, probs=probs) + + def _full_checkpoint_name(self, checkpoint_dir, name): + checkpoint_filename = "_".join( + [self._loss_str, name, "pid" + str(self.player_id)]) + return os.path.join(checkpoint_dir, checkpoint_filename) + + def _latest_checkpoint_filename(self, name): + checkpoint_filename = "_".join( + [self._loss_str, name, "pid" + str(self.player_id)]) + return checkpoint_filename + "_latest" + + def save(self, checkpoint_dir): + for name, saver in self._savers: + path = saver.save( + self._session, + self._full_checkpoint_name(checkpoint_dir, name), + latest_filename=self._latest_checkpoint_filename(name)) + logging.info("saved to path: %s", path) + + def has_checkpoint(self, checkpoint_dir): + for name, _ in self._savers: + if tf.train.latest_checkpoint( + self._full_checkpoint_name(checkpoint_dir, name), + os.path.join(checkpoint_dir, + self._latest_checkpoint_filename(name))) is None: + return False + return True + + def restore(self, checkpoint_dir): + for name, saver in self._savers: + full_checkpoint_dir = self._full_checkpoint_name(checkpoint_dir, name) + logging.info("Restoring checkpoint: %s", full_checkpoint_dir) + saver.restore(self._session, full_checkpoint_dir) + + @property + def loss(self): + return (self._last_critic_loss_value, self._last_pi_loss_value) + + def _add_episode_data_to_dataset(self): + """Add episode data to the buffer.""" + info_states = [data.info_state for data in self._episode_data] + rewards = [data.reward for data in self._episode_data] + discount = [data.discount for data in self._episode_data] + actions = [data.action for data in self._episode_data] + + # Calculate returns + returns = np.array(rewards) + for idx in reversed(range(len(rewards[:-1]))): + returns[idx] = ( + rewards[idx] + + discount[idx] * returns[idx + 1] * self._extra_discount) + + # Add flattened data points to dataset + self._dataset["actions"].extend(actions) + self._dataset["returns"].extend(returns) + self._dataset["info_states"].extend(info_states) + self._episode_data = [] + + def _add_transition(self, time_step): + """Adds intra-episode transition to the `_episode_data` buffer. + + Adds the transition from `self._prev_time_step` to `time_step`. + + Args: + time_step: an instance of rl_environment.TimeStep. + """ + assert self._prev_time_step is not None + legal_actions = ( + self._prev_time_step.observations["legal_actions"][self.player_id]) + legal_actions_mask = np.zeros(self._num_actions) + legal_actions_mask[legal_actions] = 1.0 + transition = Transition( + info_state=( + self._prev_time_step.observations["info_state"][self.player_id][:]), + action=self._prev_action, + reward=time_step.rewards[self.player_id], + discount=time_step.discounts[self.player_id], + legal_actions_mask=legal_actions_mask) + + self._episode_data.append(transition) + + def _critic_update(self): + """Compute the Critic loss on sampled transitions & perform a critic update. + + Returns: + The average Critic loss obtained on this batch. + """ + # TODO(author3): illegal action handling. + critic_loss, _ = self._session.run( + [self._critic_loss, self._critic_learn_step], + feed_dict={ + self._info_state_ph: self._dataset["info_states"], + self._action_ph: self._dataset["actions"], + self._return_ph: self._dataset["returns"], + }) + self._last_critic_loss_value = critic_loss + return critic_loss + + def _pi_update(self): + """Compute the Pi loss on sampled transitions and perform a Pi update. + + Returns: + The average Pi loss obtained on this batch. + """ + # TODO(author3): illegal action handling. + pi_loss, _ = self._session.run( + [self._pi_loss, self._pi_learn_step], + feed_dict={ + self._info_state_ph: self._dataset["info_states"], + self._action_ph: self._dataset["actions"], + self._return_ph: self._dataset["returns"], + }) + self._last_pi_loss_value = pi_loss + return pi_loss + + def get_weights(self): + variables = [self._session.run(self._net_torso.variables)] + variables.append(self._session.run(self._policy_logits_layer.variables)) + if self._loss_class.__name__ == "BatchA2CLoss": + variables.append(self._session.run(self._baseline_layer.variables)) + else: + variables.append(self._session.run(self._q_values_layer.variables)) + return variables + + def _initialize(self): + initialization_torso = tf.group( + *[var.initializer for var in self._net_torso.variables]) + initialization_logit = tf.group( + *[var.initializer for var in self._policy_logits_layer.variables]) + if self._loss_class.__name__ == "BatchA2CLoss": + initialization_baseline_or_q_val = tf.group( + *[var.initializer for var in self._baseline_layer.variables]) + else: + initialization_baseline_or_q_val = tf.group( + *[var.initializer for var in self._q_values_layer.variables]) + initialization_crit_opt = tf.group( + *[var.initializer for var in self._critic_optimizer.variables()]) + initialization_pi_opt = tf.group( + *[var.initializer for var in self._pi_optimizer.variables()]) + + self._session.run( + tf.group(*[ + initialization_torso, initialization_logit, + initialization_baseline_or_q_val, initialization_crit_opt, + initialization_pi_opt + ])) + self._savers = [("torso", tf.train.Saver(self._net_torso.variables)), + ("policy_head", + tf.train.Saver(self._policy_logits_layer.variables))] + if self._loss_class.__name__ == "BatchA2CLoss": + self._savers.append( + ("baseline", tf.train.Saver(self._baseline_layer.variables))) + else: + self._savers.append( + ("q_head", tf.train.Saver(self._q_values_layer.variables))) + + def copy_with_noise(self, sigma=0.0, copy_weights=True): + """Copies the object and perturbates its network's weights with noise. + + Args: + sigma: gaussian dropout variance term : Multiplicative noise following + (1+sigma*epsilon), epsilon standard gaussian variable, multiplies each + model weight. sigma=0 means no perturbation. + copy_weights: Boolean determining whether to copy model weights (True) or + just model hyperparameters. + + Returns: + Perturbated copy of the model. + """ + _ = self._kwargs.pop("self", None) + copied_object = PolicyGradient(**self._kwargs) + + net_torso = getattr(copied_object, "_net_torso") + policy_logits_layer = getattr(copied_object, "_policy_logits_layer") + if hasattr(copied_object, "_q_values_layer"): + q_values_layer = getattr(copied_object, "_q_values_layer") + if hasattr(copied_object, "_baseline_layer"): + baseline_layer = getattr(copied_object, "_baseline_layer") + + if copy_weights: + copy_mlp_weights = tf.group(*[ + va.assign(vb * (1 + sigma * tf.random.normal(vb.shape))) + for va, vb in zip(net_torso.variables, self._net_torso.variables) + ]) + self._session.run(copy_mlp_weights) + + copy_logit_weights = tf.group(*[ + va.assign(vb * (1 + sigma * tf.random.normal(vb.shape))) + for va, vb in zip(policy_logits_layer.variables, + self._policy_logits_layer.variables) + ]) + self._session.run(copy_logit_weights) + if hasattr(copied_object, "_q_values_layer"): + copy_q_value_weights = tf.group(*[ + va.assign(vb * (1 + sigma * tf.random.normal(vb.shape))) for va, vb + in zip(q_values_layer.variables, self._q_values_layer.variables) + ]) + self._session.run(copy_q_value_weights) + if hasattr(copied_object, "_baseline_layer"): + copy_baseline_weights = tf.group(*[ + va.assign(vb * (1 + sigma * tf.random.normal(vb.shape))) for va, vb + in zip(baseline_layer.variables, self._baseline_layer.variables) + ]) + self._session.run(copy_baseline_weights) + + for var in getattr(copied_object, "_critic_optimizer").variables(): + self._session.run(var.initializer) + for var in getattr(copied_object, "_pi_optimizer").variables(): + self._session.run(var.initializer) + + return copied_object diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/policy_gradient_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/policy_gradient_test.py new file mode 100644 index 0000000..ebd6abd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/policy_gradient_test.py @@ -0,0 +1,143 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools + +from absl.testing import absltest +from absl.testing import parameterized +import tensorflow.compat.v1 as tf + +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import policy_gradient +from open_spiel.python.algorithms.losses import rl_losses +import pyspiel + + +class PolicyGradientTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + itertools.product(("rpg", "qpg", "rm", "a2c"), + ("kuhn_poker", "leduc_poker"))) + def test_run_game(self, loss_str, game_name): + env = rl_environment.Environment(game_name) + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + with self.session() as sess: + agents = [ + policy_gradient.PolicyGradient( # pylint: disable=g-complex-comprehension + sess, + player_id=player_id, + info_state_size=info_state_size, + num_actions=num_actions, + loss_str=loss_str, + hidden_layers_sizes=[8, 8], + batch_size=16, + entropy_cost=0.001, + critic_learning_rate=0.01, + pi_learning_rate=0.01, + num_critic_before_pi=4) for player_id in [0, 1] + ] + sess.run(tf.global_variables_initializer()) + + for _ in range(2): + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + current_agent = agents[current_player] + agent_output = current_agent.step(time_step) + time_step = env.step([agent_output.action]) + + for agent in agents: + agent.step(time_step) + + @absltest.skip("Causing a segmentation fault on wheel tests") + def test_run_hanabi(self): + # Hanabi is an optional game, so check we have it before running the test. + game = "hanabi" + if game not in pyspiel.registered_names(): + return + + num_players = 3 + env_configs = { + "players": num_players, + "max_life_tokens": 1, + "colors": 2, + "ranks": 3, + "hand_size": 2, + "max_information_tokens": 3, + "discount": 0. + } + env = rl_environment.Environment(game, **env_configs) + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + with self.session() as sess: + agents = [ + policy_gradient.PolicyGradient( # pylint: disable=g-complex-comprehension + sess, + player_id=player_id, + info_state_size=info_state_size, + num_actions=num_actions, + hidden_layers_sizes=[8, 8], + batch_size=16, + entropy_cost=0.001, + critic_learning_rate=0.01, + pi_learning_rate=0.01, + num_critic_before_pi=4) for player_id in range(num_players) + ] + sess.run(tf.global_variables_initializer()) + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + agent_output = [agent.step(time_step) for agent in agents] + time_step = env.step([agent_output[current_player].action]) + + for agent in agents: + agent.step(time_step) + + def test_loss_modes(self): + loss_dict = { + "qpg": rl_losses.BatchQPGLoss, + "rpg": rl_losses.BatchRPGLoss, + "rm": rl_losses.BatchRMLoss, + "a2c": rl_losses.BatchA2CLoss, + } + with self.session() as sess: + for loss_str, loss_class in loss_dict.items(): + agent_by_str = policy_gradient.PolicyGradient( + sess, + player_id=0, + info_state_size=32, + num_actions=2, + loss_str=loss_str, + loss_class=None) + agent_by_class = policy_gradient.PolicyGradient( + sess, + player_id=0, + info_state_size=32, + num_actions=2, + loss_str=None, + loss_class=loss_class) + + self.assertEqual(agent_by_str._pi_loss.shape, + agent_by_class._pi_loss.shape) + self.assertEqual(agent_by_str._pi_loss.dtype, + agent_by_class._pi_loss.dtype) + self.assertEqual(agent_by_str._pi_loss.op.type, + agent_by_class._pi_loss.op.type) + + +if __name__ == "__main__": + tf.test.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/rcfr.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/rcfr.py new file mode 100644 index 0000000..9bae781 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/rcfr.py @@ -0,0 +1,884 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Regression counterfactual regret minimization (RCFR) [Waugh et al., 2015; Morrill, 2016]. + +In contrast to (tabular) counterfactual regret minimization (CFR) +[Zinkevich et al., 2007], RCFR replaces the table of regrets that generate the +current policy profile with a profile of regression models. The average +policy is still tracked exactly with a full game-size table. The exploitability +of the average policy in zero-sum games decreases as the model accuracy and +the number of iterations increase [Waugh et al., 2015; Morrill, 2016]. As long +as the regression model errors decrease across iterations, the average policy +converges toward a Nash equilibrium in zero-sum games. + +# References + +Dustin Morrill. Using Regret Estimation to Solve Games Compactly. + M.Sc. thesis, Computing Science Department, University of Alberta, + Apr 1, 2016, Edmonton Alberta, Canada. +Kevin Waugh, Dustin Morrill, J. Andrew Bagnell, and Michael Bowling. + Solving Games with Functional Regret Estimation. At the Twenty-Ninth AAAI + Conference on Artificial Intelligence, January 25-29, 2015, Austin Texas, + USA. Pages 2138-2145. +Martin Zinkevich, Michael Johanson, Michael Bowling, and Carmelo Piccione. + Regret Minimization in Games with Incomplete Information. + At Advances in Neural Information Processing Systems 20 (NeurIPS). 2007. +""" + +import warnings + +import numpy as np +import tensorflow.compat.v1 as tf + + +# Temporarily disable TF2 behavior while the code is not updated. +tf.disable_v2_behavior() + + +warnings.warn( + "RCFR has known issues when using Keras 3 and may be removed in a " + "future version unless fixed. See OpenSpiel github issue #1207 for " + "details." +) + + +def tensor_to_matrix(tensor): + """Converts `tensor` to a matrix (a rank-2 tensor) or raises an exception. + + Args: + tensor: The tensor to convert. + + Returns: + A TensorFlow matrix (rank-2 `tf.Tensor`). + Raises: + ValueError: If `tensor` cannot be trivially converted to a matrix, i.e. + `tensor` has a rank > 2. + """ + tensor = tf.convert_to_tensor(tensor) + rank = tensor.shape.rank + if rank > 2: + raise ValueError( + ("Tensor {} cannot be converted into a matrix as it is rank " + "{} > 2.").format(tensor, rank)) + elif rank < 2: + num_columns = 1 if rank == 0 else tensor.shape[0].value + tensor = tf.reshape(tensor, [1, num_columns]) + return tensor + + +def with_one_hot_action_features(state_features, legal_actions, + num_distinct_actions): + """Constructs features for each sequence by extending state features. + + Sequences features are constructed by concatenating one-hot features + indicating each action to the information state features and stacking them. + + Args: + state_features: The features for the information state alone. Must be a + `tf.Tensor` with a rank less than or equal to (if batched) 2. + legal_actions: The list of legal actions in this state. Determines the + number of rows in the returned feature matrix. + num_distinct_actions: The number of globally distinct actions in the game. + Determines the length of the action feature vector concatenated onto the + state features. + + Returns: + A `tf.Tensor` feature matrix with one row for each sequence and # state + features plus `num_distinct_actions`-columns. + + Raises: + ValueError: If `state_features` has a rank > 2. + """ + state_features = tensor_to_matrix(state_features) + with_action_features = [] + for action in legal_actions: + action_features = tf.one_hot([action], num_distinct_actions) + action_features = tf.tile(action_features, [tf.shape(state_features)[0], 1]) + all_features = tf.concat([state_features, action_features], axis=1) + with_action_features.append(all_features) + return tf.concat(with_action_features, axis=0) + + +def sequence_features(state, num_distinct_actions): + """The sequence features at `state`. + + Features are constructed by concatenating `state`'s normalized feature + vector with one-hot vectors indicating each action (see + `with_one_hot_action_features`). + + Args: + state: An OpenSpiel `State`. + num_distinct_actions: The number of globally distinct actions in `state`'s + game. + + Returns: + A `tf.Tensor` feature matrix with one row for each sequence. + """ + return with_one_hot_action_features(state.information_state_tensor(), + state.legal_actions(), + num_distinct_actions) + + +def num_features(game): + """Returns the number of features returned by `sequence_features`. + + Args: + game: An OpenSpiel `Game`. + """ + return game.information_state_tensor_size() + game.num_distinct_actions() + + +class RootStateWrapper(object): + """Analyzes the subgame at a given root state. + + It enumerates features for each player sequence, creates a mapping between + information states to sequence index offsets, and caches terminal values + in a dictionary with history string keys. + + Properties: + root: An OpenSpiel `State`. + sequence_features: A `list` of sequence feature matrices, one for each + player. This list uses depth-first, information state-major ordering, so + sequences are grouped by information state. I.e. the first legal action + in the first state has index 0, the second action in the same information + state has index 1, the third action will have index 3, and so on. + Sequences in the next information state descendant of the first action + will begin indexing its sequences at the number of legal actions in the + ancestor information state. + num_player_sequences: The number of sequences for each player. + info_state_to_sequence_idx: A `dict` mapping each information state string + to the `sequence_features` index of the first sequence in the + corresponding information state. + terminal_values: A `dict` mapping history strings to terminal values for + each player. + """ + + def __init__(self, state): + self.root = state + self._num_distinct_actions = len(state.legal_actions_mask(0)) + + self.sequence_features = [[] for _ in range(state.num_players())] + self.num_player_sequences = [0] * state.num_players() + self.info_state_to_sequence_idx = {} + self.terminal_values = {} + self._walk_descendants(state) + self.sequence_features = [ + tf.concat(rows, axis=0) for rows in self.sequence_features + ] + + def _walk_descendants(self, state): + """Records information about `state` and its descendants.""" + if state.is_terminal(): + self.terminal_values[state.history_str()] = np.array(state.returns()) + return + + elif state.is_chance_node(): + for action, _ in state.chance_outcomes(): + self._walk_descendants(state.child(action)) + return + + player = state.current_player() + info_state = state.information_state_string(player) + actions = state.legal_actions() + + if info_state not in self.info_state_to_sequence_idx: + n = self.num_player_sequences[player] + self.info_state_to_sequence_idx[info_state] = n + self.sequence_features[player].append( + sequence_features(state, self._num_distinct_actions)) + self.num_player_sequences[player] += len(actions) + + for action in actions: + self._walk_descendants(state.child(action)) + + def sequence_weights_to_policy(self, sequence_weights, state): + """Returns a behavioral policy at `state` from sequence weights. + + Args: + sequence_weights: An array of non-negative weights, one for each of + `state.current_player()`'s sequences in `state`'s game. + state: An OpenSpiel `State` that represents an information state in an + alternating-move game. + + Returns: + A `np.array` probability distribution representing the policy in + `state` encoded by `sequence_weights`. Weights corresponding to actions + in `state` are normalized by their sum. + + Raises: + ValueError: If there are too few sequence weights at `state`. + """ + info_state = state.information_state_string() + sequence_offset = self.info_state_to_sequence_idx[info_state] + actions = state.legal_actions() + + sequence_idx_end = sequence_offset + len(actions) + weights = sequence_weights[sequence_offset:sequence_idx_end] + + if len(weights) < len(actions): + raise ValueError( + ("Invalid policy: Policy {player} at sequence offset " + "{sequence_offset} has only {policy_len} elements but there " + "are {num_actions} legal actions.").format( + player=state.current_player(), + sequence_offset=sequence_offset, + policy_len=len(weights), + num_actions=len(actions))) + return normalized_by_sum(weights) + + def sequence_weights_to_policy_fn(self, player_sequence_weights): + """Returns a policy function based on sequence weights for each player. + + Args: + player_sequence_weights: A list of weight arrays, one for each player. + Each array should have a weight for each of that player's sequences in + `state`'s game. + + Returns: + A `State` -> `np.array` function. The output of this function is + a probability distribution that represents the policy at the given + `State` encoded by `player_sequence_weights` according to + `sequence_weights_to_policy`. + """ + + def policy_fn(state): + player = state.current_player() + return self.sequence_weights_to_policy(player_sequence_weights[player], + state) + + return policy_fn + + def sequence_weights_to_tabular_profile(self, player_sequence_weights): + """Returns the tabular profile-form of `player_sequence_weights`.""" + return sequence_weights_to_tabular_profile( + self.root, self.sequence_weights_to_policy_fn(player_sequence_weights)) + + def counterfactual_regrets_and_reach_weights(self, regret_player, + reach_weight_player, + *sequence_weights): + """Returns counterfactual regrets and reach weights as a tuple. + + Args: + regret_player: The player for whom counterfactual regrets are computed. + reach_weight_player: The player for whom reach weights are computed. + *sequence_weights: A list of non-negative sequence weights for each player + determining the policy profile. Behavioral policies are generated by + normalizing sequence weights corresponding to actions in each + information state by their sum. + + Returns: + The counterfactual regrets and reach weights as an `np.array`-`np.array` + tuple. + + Raises: + ValueError: If there are too few sequence weights at any information state + for any player. + """ + num_players = len(sequence_weights) + regrets = np.zeros(self.num_player_sequences[regret_player]) + reach_weights = np.zeros(self.num_player_sequences[reach_weight_player]) + + def _walk_descendants(state, reach_probabilities, chance_reach_probability): + """Compute `state`'s counterfactual regrets and reach weights. + + Args: + state: An OpenSpiel `State`. + reach_probabilities: The probability that each player plays to reach + `state`'s history. + chance_reach_probability: The probability that all chance outcomes in + `state`'s history occur. + + Returns: + The counterfactual value of `state`'s history. + Raises: + ValueError if there are too few sequence weights at any information + state for any player. + """ + + if state.is_terminal(): + player_reach = ( + np.prod(reach_probabilities[:regret_player]) * + np.prod(reach_probabilities[regret_player + 1:])) + + counterfactual_reach_prob = player_reach * chance_reach_probability + u = self.terminal_values[state.history_str()] + return u[regret_player] * counterfactual_reach_prob + + elif state.is_chance_node(): + v = 0.0 + for action, action_prob in state.chance_outcomes(): + v += _walk_descendants( + state.child(action), reach_probabilities, + chance_reach_probability * action_prob) + return v + + player = state.current_player() + info_state = state.information_state_string(player) + sequence_idx_offset = self.info_state_to_sequence_idx[info_state] + actions = state.legal_actions(player) + + sequence_idx_end = sequence_idx_offset + len(actions) + my_sequence_weights = sequence_weights[player][ + sequence_idx_offset:sequence_idx_end] + + if len(my_sequence_weights) < len(actions): + raise ValueError( + ("Invalid policy: Policy {player} at sequence offset " + "{sequence_idx_offset} has only {policy_len} elements but there " + "are {num_actions} legal actions.").format( + player=player, + sequence_idx_offset=sequence_idx_offset, + policy_len=len(my_sequence_weights), + num_actions=len(actions))) + + policy = normalized_by_sum(my_sequence_weights) + action_values = np.zeros(len(actions)) + state_value = 0.0 + + is_reach_weight_player_node = player == reach_weight_player + is_regret_player_node = player == regret_player + + reach_prob = reach_probabilities[player] + for action_idx, action in enumerate(actions): + action_prob = policy[action_idx] + next_reach_prob = reach_prob * action_prob + + if is_reach_weight_player_node: + reach_weight_player_plays_down_this_line = next_reach_prob > 0 + if not reach_weight_player_plays_down_this_line: + continue + sequence_idx = sequence_idx_offset + action_idx + reach_weights[sequence_idx] += next_reach_prob + + reach_probabilities[player] = next_reach_prob + + action_value = _walk_descendants( + state.child(action), reach_probabilities, chance_reach_probability) + + if is_regret_player_node: + state_value = state_value + action_prob * action_value + else: + state_value = state_value + action_value + action_values[action_idx] = action_value + + reach_probabilities[player] = reach_prob + + if is_regret_player_node: + regrets[sequence_idx_offset:sequence_idx_end] += ( + action_values - state_value) + return state_value + + # End of _walk_descendants + + _walk_descendants(self.root, np.ones(num_players), 1.0) + return regrets, reach_weights + + +def normalized_by_sum(v, axis=0, mutate=False): + """Divides each element of `v` along `axis` by the sum of `v` along `axis`. + + Assumes `v` is non-negative. Sets of `v` elements along `axis` that sum to + zero are normalized to `1 / v.shape[axis]` (a uniform distribution). + + Args: + v: Non-negative array of values. + axis: An integer axis. + mutate: Whether or not to store the result in `v`. + + Returns: + The normalized array. + """ + v = np.asarray(v) + denominator = v.sum(axis=axis, keepdims=True) + denominator_is_zero = denominator == 0 + + # Every element of `denominator_is_zero` that is true corresponds to a + # set of elements in `v` along `axis` that are all zero. By setting these + # denominators to `v.shape[axis]` and adding 1 to each of the corresponding + # elements in `v`, these elements are normalized to `1 / v.shape[axis]` + # (a uniform distribution). + denominator += v.shape[axis] * denominator_is_zero + if mutate: + v += denominator_is_zero + v /= denominator + else: + v = (v + denominator_is_zero) / denominator + return v + + +def relu(v): + """Returns the element-wise maximum between `v` and 0.""" + return np.maximum(v, 0) + + +def _descendant_states(state, depth_limit, depth, include_terminals, + include_chance_states): + """Recursive descendant state generator. + + Decision states are always yielded. + + Args: + state: The current state. + depth_limit: The descendant depth limit. Zero will ensure only + `initial_state` is generated and negative numbers specify the absence of a + limit. + depth: The current descendant depth. + include_terminals: Whether or not to include terminal states. + include_chance_states: Whether or not to include chance states. + + Yields: + `State`, a state that is `initial_state` or one of its descendants. + """ + if state.is_terminal(): + if include_terminals: + yield state + return + + if depth > depth_limit >= 0: + return + + if not state.is_chance_node() or include_chance_states: + yield state + + for action in state.legal_actions(): + state_for_search = state.child(action) + for substate in _descendant_states(state_for_search, depth_limit, depth + 1, + include_terminals, + include_chance_states): + yield substate + + +def all_states(initial_state, + depth_limit=-1, + include_terminals=False, + include_chance_states=False): + """Generates states from `initial_state`. + + Generates the set of states that includes only the `initial_state` and its + descendants that satisfy the inclusion criteria specified by the remaining + parameters. Decision states are always included. + + Args: + initial_state: The initial state from which to generate states. + depth_limit: The descendant depth limit. Zero will ensure only + `initial_state` is generated and negative numbers specify the absence of a + limit. Defaults to no limit. + include_terminals: Whether or not to include terminal states. Defaults to + `False`. + include_chance_states: Whether or not to include chance states. Defaults to + `False`. + + Returns: + A generator that yields the `initial_state` and its descendants that + satisfy the inclusion criteria specified by the remaining parameters. + """ + return _descendant_states( + state=initial_state, + depth_limit=depth_limit, + depth=0, + include_terminals=include_terminals, + include_chance_states=include_chance_states) + + +def sequence_weights_to_tabular_profile(root, policy_fn): + """Returns the `dict` of `list`s of action-prob pairs-form of `policy_fn`.""" + tabular_policy = {} + players = range(root.num_players()) + for state in all_states(root): + for player in players: + legal_actions = state.legal_actions(player) + if len(legal_actions) < 1: + continue + info_state = state.information_state_string(player) + if info_state in tabular_policy: + continue + my_policy = policy_fn(state) + tabular_policy[info_state] = list(zip(legal_actions, my_policy)) + return tabular_policy + + +@tf.function +def feedforward_evaluate(layers, + x, + use_skip_connections=False, + hidden_are_factored=False): + """Evaluates `layers` as a feedforward neural network on `x`. + + Args: + layers: The neural network layers (`tf.Tensor` -> `tf.Tensor` callables). + x: The array-like input to evaluate. Must be trivially convertible to a + matrix (tensor rank <= 2). + use_skip_connections: Whether or not to use skip connections between layers. + If the layer input has too few features to be added to the layer output, + then the end of input is padded with zeros. If it has too many features, + then the input is truncated. + hidden_are_factored: Whether or not hidden logical layers are factored into + two separate linear transformations stored as adjacent elements of + `layers`. + + Returns: + The `tf.Tensor` evaluation result. + + Raises: + ValueError: If `x` has a rank greater than 2. + """ + x = tensor_to_matrix(x) + i = 0 + while i < len(layers) - 1: + y = layers[i](x) + i += 1 + if hidden_are_factored: + y = layers[i](y) + i += 1 + if use_skip_connections: + my_num_features = x.shape[1].value + padding = y.shape[1].value - my_num_features + if padding > 0: + zeros = tf.zeros([tf.shape(x)[0], padding]) + x = tf.concat([x, zeros], axis=1) + elif padding < 0: + x = tf.strided_slice(x, [0, 0], [tf.shape(x)[0], y.shape[1].value]) + y = x + y + x = y + return layers[-1](x) + + +class DeepRcfrModel(object): + """A flexible deep feedforward RCFR model class. + + Properties: + layers: The `tf.keras.Layer` layers describing this model. + trainable_variables: The trainable `tf.Variable`s in this model's `layers`. + losses: This model's layer specific losses (e.g. regularizers). + """ + + def __init__(self, + game, + num_hidden_units, + num_hidden_layers=1, + num_hidden_factors=0, + hidden_activation=tf.nn.relu, + use_skip_connections=False, + regularizer=None): + """Creates a new `DeepRcfrModel. + + Args: + game: The OpenSpiel game being solved. + num_hidden_units: The number of units in each hidden layer. + num_hidden_layers: The number of hidden layers. Defaults to 1. + num_hidden_factors: The number of hidden factors or the matrix rank of the + layer. If greater than zero, hidden layers will be split into two + separate linear transformations, the first with + `num_hidden_factors`-columns and the second with + `num_hidden_units`-columns. The result is that the logical hidden layer + is a rank-`num_hidden_units` matrix instead of a rank-`num_hidden_units` + matrix. When `num_hidden_units < num_hidden_units`, this is effectively + implements weight sharing. Defaults to 0. + hidden_activation: The activation function to apply over hidden layers. + Defaults to `tf.nn.relu`. + use_skip_connections: Whether or not to apply skip connections (layer + output = layer(x) + x) on hidden layers. Zero padding or truncation is + used to match the number of columns on layer inputs and outputs. + regularizer: A regularizer to apply to each layer. Defaults to `None`. + """ + self._use_skip_connections = use_skip_connections + self._hidden_are_factored = num_hidden_factors > 0 + + self.layers = [] + for _ in range(num_hidden_layers): + if self._hidden_are_factored: + self.layers.append( + tf.keras.layers.Dense( + num_hidden_factors, + use_bias=True, + kernel_regularizer=regularizer)) + + self.layers.append( + tf.keras.layers.Dense( + num_hidden_units, + use_bias=True, + activation=hidden_activation, + kernel_regularizer=regularizer)) + + self.layers.append( + tf.keras.layers.Dense(1, use_bias=True, kernel_regularizer=regularizer)) + + # Construct variables for all layers by exercising the network. + x = tf.zeros([1, num_features(game)]) + for layer in self.layers: + x = layer(x) + + self.trainable_variables = sum( + [layer.trainable_variables for layer in self.layers], []) + self.losses = sum([layer.losses for layer in self.layers], []) + + def __call__(self, x): + """Evaluates this model on `x`.""" + return feedforward_evaluate( + layers=self.layers, + x=x, + use_skip_connections=self._use_skip_connections, + hidden_are_factored=self._hidden_are_factored) + + +class _RcfrSolver(object): + """An abstract RCFR solver class. + + Requires that subclasses implement `evaluate_and_update_policy`. + """ + + def __init__(self, game, models, truncate_negative=False, session=None): + """Creates a new `_RcfrSolver`. + + Args: + game: An OpenSpiel `Game`. + models: Current policy models (optimizable array-like -> `tf.Tensor` + callables) for both players. + truncate_negative: Whether or not to truncate negative (approximate) + cumulative regrets to zero to implement RCFR+. Defaults to `False`. + session: A TensorFlow `Session` to convert sequence weights from + `tf.Tensor`s produced by `models` to `np.array`s. If `None`, it is + assumed that eager mode is enabled. Defaults to `None`. + """ + self._game = game + self._models = models + self._truncate_negative = truncate_negative + self._root_wrapper = RootStateWrapper(game.new_initial_state()) + self._session = session + + self._cumulative_seq_probs = [ + np.zeros(n) for n in self._root_wrapper.num_player_sequences + ] + + def _sequence_weights(self, player=None): + """Returns regret-like weights for each sequence as an `np.array`. + + Negative weights are truncated to zero. + + Args: + player: The player to compute weights for, or both if `player` is `None`. + Defaults to `None`. + """ + if player is None: + return [ + self._sequence_weights(player) + for player in range(self._game.num_players()) + ] + else: + tensor = tf.nn.relu( + tf.squeeze(self._models[player]( + self._root_wrapper.sequence_features[player]))) + return tensor.numpy() if self._session is None else self._session(tensor) + + def evaluate_and_update_policy(self, train_fn): + """Performs a single step of policy evaluation and policy improvement. + + Args: + train_fn: A (model, `tf.data.Dataset`) function that trains the given + regression model to accurately reproduce the x to y mapping given x-y + data. + + Raises: + NotImplementedError: If not overridden by child class. + """ + raise NotImplementedError() + + def current_policy(self): + """Returns the current policy profile. + + Returns: + A `dict>` that maps info state + strings to `Action`-probability pairs describing each player's policy. + """ + return self._root_wrapper.sequence_weights_to_tabular_profile( + self._sequence_weights()) + + def average_policy(self): + """Returns the average of all policies iterated. + + This average policy converges toward a Nash policy as the number of + iterations increases as long as the regret prediction error decreases + continually [Morrill, 2016]. + + The policy is computed using the accumulated policy probabilities computed + using `evaluate_and_update_policy`. + + Returns: + A `dict>` that maps info state + strings to (Action, probability) pairs describing each player's policy. + """ + return self._root_wrapper.sequence_weights_to_tabular_profile( + self._cumulative_seq_probs) + + def _previous_player(self, player): + """The previous player in the turn ordering.""" + return player - 1 if player > 0 else self._game.num_players() - 1 + + def _average_policy_update_player(self, regret_player): + """The player for whom the average policy should be updated.""" + return self._previous_player(regret_player) + + +class RcfrSolver(_RcfrSolver): + """RCFR with an effectively infinite regret data buffer. + + Exact or bootstrapped cumulative regrets are stored as if an infinitely + large data buffer. The average strategy is updated and stored in a full + game-size table. Reproduces the RCFR versions used in experiments by + Waugh et al. [2015] and Morrill [2016] except that this class does not + restrict the user to regression tree models. + """ + + def __init__(self, + game, + models, + bootstrap=None, + truncate_negative=False, + session=None): + self._bootstrap = bootstrap + super(RcfrSolver, self).__init__( + game, models, truncate_negative=truncate_negative, session=session) + + self._regret_targets = [ + np.zeros(n) for n in self._root_wrapper.num_player_sequences + ] + + def evaluate_and_update_policy(self, train_fn): + """Performs a single step of policy evaluation and policy improvement. + + Args: + train_fn: A (model, `tf.data.Dataset`) function that trains the given + regression model to accurately reproduce the x to y mapping given x-y + data. + """ + sequence_weights = self._sequence_weights() + player_seq_features = self._root_wrapper.sequence_features + for regret_player in range(self._game.num_players()): + seq_prob_player = self._average_policy_update_player(regret_player) + + regrets, seq_probs = ( + self._root_wrapper.counterfactual_regrets_and_reach_weights( + regret_player, seq_prob_player, *sequence_weights)) + + if self._bootstrap: + self._regret_targets[regret_player][:] = sequence_weights[regret_player] + if self._truncate_negative: + regrets = np.maximum(-relu(self._regret_targets[regret_player]), + regrets) + + self._regret_targets[regret_player] += regrets + self._cumulative_seq_probs[seq_prob_player] += seq_probs + + targets = tf.expand_dims(self._regret_targets[regret_player], axis=1) + data = tf.data.Dataset.from_tensor_slices( + (player_seq_features[regret_player], targets)) + + regret_player_model = self._models[regret_player] + train_fn(regret_player_model, data) + sequence_weights[regret_player] = self._sequence_weights(regret_player) + + +class ReservoirBuffer(object): + """A generic reservoir buffer data structure. + + After every insertion, its contents represents a `size`-size uniform + random sample from the stream of candidates that have been encountered. + """ + + def __init__(self, size): + self.size = size + self.num_elements = 0 + self._buffer = np.full([size], None, dtype=object) + self._num_candidates = 0 + + @property + def buffer(self): + return self._buffer[:self.num_elements] + + def insert(self, candidate): + """Consider this `candidate` for inclusion in this sampling buffer.""" + self._num_candidates += 1 + if self.num_elements < self.size: + self._buffer[self.num_elements] = candidate + self.num_elements += 1 + return + idx = np.random.choice(self._num_candidates) + if idx < self.size: + self._buffer[idx] = candidate + + def insert_all(self, candidates): + """Consider all `candidates` for inclusion in this sampling buffer.""" + for candidate in candidates: + self.insert(candidate) + + def num_available_spaces(self): + """The number of freely available spaces in this buffer.""" + return self.size - self.num_elements + + +class ReservoirRcfrSolver(_RcfrSolver): + """RCFR with a reservoir buffer for storing regret data. + + The average strategy is updated and stored in a full game-size table. + """ + + def __init__(self, + game, + models, + buffer_size, + truncate_negative=False, + session=None): + self._buffer_size = buffer_size + super(ReservoirRcfrSolver, self).__init__( + game, models, truncate_negative=truncate_negative, session=session) + self._reservoirs = [ + ReservoirBuffer(self._buffer_size) for _ in range(game.num_players()) + ] + + def evaluate_and_update_policy(self, train_fn): + """Performs a single step of policy evaluation and policy improvement. + + Args: + train_fn: A (model, `tf.data.Dataset`) function that trains the given + regression model to accurately reproduce the x to y mapping given x-y + data. + """ + sequence_weights = self._sequence_weights() + player_seq_features = self._root_wrapper.sequence_features + for regret_player in range(self._game.num_players()): + seq_prob_player = self._average_policy_update_player(regret_player) + + regrets, seq_probs = ( + self._root_wrapper.counterfactual_regrets_and_reach_weights( + regret_player, seq_prob_player, *sequence_weights)) + + if self._truncate_negative: + regrets = np.maximum(-relu(sequence_weights[regret_player]), regrets) + + next_data = list( + zip(player_seq_features[regret_player], tf.expand_dims(regrets, 1))) + + self._reservoirs[regret_player].insert_all(next_data) + + self._cumulative_seq_probs[seq_prob_player] += seq_probs + + my_buffer = tuple( + tf.stack(a) for a in zip(*self._reservoirs[regret_player].buffer)) + + data = tf.data.Dataset.from_tensor_slices(my_buffer) + + regret_player_model = self._models[regret_player] + train_fn(regret_player_model, data) + sequence_weights[regret_player] = self._sequence_weights(regret_player) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/rcfr_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/rcfr_example.py new file mode 100644 index 0000000..7a146cd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/rcfr_example.py @@ -0,0 +1,118 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example use of the RCFR algorithm on Kuhn Poker.""" + +from absl import app +from absl import flags +import tensorflow.compat.v1 as tf + +from open_spiel.python.algorithms import rcfr +import pyspiel + +tf.enable_eager_execution() + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("iterations", 100, "Number of iterations") +flags.DEFINE_string("game", "kuhn_poker", "Name of the game") +flags.DEFINE_integer("players", 2, "Number of players") +flags.DEFINE_integer("print_freq", 10, "How often to print the exploitability") +flags.DEFINE_boolean("bootstrap", False, + "Whether or not to use bootstrap targets") +flags.DEFINE_boolean("truncate_negative", False, + "Whether or not to truncate negative targets to zero") +flags.DEFINE_integer( + "buffer_size", -1, + "A reservoir buffer size. A non-positive size implies an effectively " + "infinite buffer.") +flags.DEFINE_integer("num_hidden_layers", 1, + "The number of hidden layers in the regret model.") +flags.DEFINE_integer("num_hidden_units", 13, + "The number of hidden layers in the regret model.") +flags.DEFINE_integer( + "num_hidden_factors", 8, + "The number of factors in each hidden layer in the regret model.") +flags.DEFINE_boolean( + "use_skip_connections", True, + "Whether or not to use skip connections in the regret model.") +flags.DEFINE_integer( + "num_epochs", 200, + "The number of epochs to run between each iterations to update the regret " + "models.") +flags.DEFINE_integer("batch_size", 100, "The regret model training batch size.") +flags.DEFINE_float("step_size", 0.01, "The ADAM (AMSGrad) optimizer step size.") + + +def main(_): + game = pyspiel.load_game(FLAGS.game, {"players": FLAGS.players}) + + models = [] + for _ in range(game.num_players()): + models.append( + rcfr.DeepRcfrModel( + game, + num_hidden_layers=FLAGS.num_hidden_layers, + num_hidden_units=FLAGS.num_hidden_units, + num_hidden_factors=FLAGS.num_hidden_factors, + use_skip_connections=FLAGS.use_skip_connections)) + + if FLAGS.buffer_size > 0: + solver = rcfr.ReservoirRcfrSolver( + game, + models, + FLAGS.buffer_size, + truncate_negative=FLAGS.truncate_negative) + else: + solver = rcfr.RcfrSolver( + game, + models, + truncate_negative=FLAGS.truncate_negative, + bootstrap=FLAGS.bootstrap) + + def _train_fn(model, data): + """Train `model` on `data`.""" + data = data.shuffle(FLAGS.batch_size * 10) + data = data.batch(FLAGS.batch_size) + data = data.repeat(FLAGS.num_epochs) + + optimizer = tf.keras.optimizers.Adam( + learning_rate=FLAGS.step_size, amsgrad=True + ) + + @tf.function + def _train(): + for x, y in data: + with tf.GradientTape() as tape: + loss = tf.losses.huber_loss(y, model(x), delta=0.01) + optimizer.apply_gradients( + zip( + tape.gradient(loss, model.trainable_variables), + model.trainable_variables, + ) + ) + + _train() + + # End of _train_fn + + for i in range(FLAGS.iterations): + solver.evaluate_and_update_policy(_train_fn) + if i % FLAGS.print_freq == 0: + conv = pyspiel.exploitability(game, solver.average_policy()) + print("Iteration {} exploitability {}".format(i, conv)) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/rcfr_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/rcfr_test.py new file mode 100644 index 0000000..29ce159 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/rcfr_test.py @@ -0,0 +1,601 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools + +from absl.testing import absltest +from absl.testing import parameterized + +import numpy as np +# Note: this import needs to come before Tensorflow to fix a malloc error. +import pyspiel # pylint: disable=g-bad-import-order +import tensorflow.compat.v1 as tf + +from open_spiel.python.algorithms import rcfr + +# Temporarily disable TF2 behavior while the code is not updated. +tf.disable_v2_behavior() + +tf.enable_eager_execution() + +_GAME = pyspiel.load_game('kuhn_poker') +_BOOLEANS = [False, True] + + +def _new_model(): + return rcfr.DeepRcfrModel( + _GAME, + num_hidden_layers=1, + num_hidden_units=13, + use_skip_connections=True) + + +class RcfrTest(parameterized.TestCase, tf.test.TestCase): + + def setUp(self): + super(RcfrTest, self).setUp() + tf.random.set_random_seed(42) + + def test_with_one_hot_action_features_single_state_vector(self): + information_state_features = [1., 2., 3.] + features = rcfr.with_one_hot_action_features( + information_state_features, + legal_actions=[0, 1], + num_distinct_actions=3) + self.assertAllEqual([ + [1., 2., 3., 1., 0., 0.], + [1., 2., 3., 0., 1., 0.], + ], features) + + features = rcfr.with_one_hot_action_features( + information_state_features, + legal_actions=[1, 2], + num_distinct_actions=3) + self.assertAllEqual([ + [1., 2., 3., 0., 1., 0.], + [1., 2., 3., 0., 0., 1.], + ], features) + + def test_with_one_hot_action_features_batch(self): + info_state_features = [[1., 2., 3.], [4., 5., 6.]] + features = rcfr.with_one_hot_action_features( + info_state_features, legal_actions=[0, 1], num_distinct_actions=3) + + self.assertAllEqual([ + [1., 2., 3., 1., 0., 0.], + [4., 5., 6., 1., 0., 0.], + [1., 2., 3., 0., 1., 0.], + [4., 5., 6., 0., 1., 0.], + ], features) + + features = rcfr.with_one_hot_action_features( + info_state_features, legal_actions=[1, 2], num_distinct_actions=3) + + self.assertAllEqual([ + [1., 2., 3., 0., 1., 0.], + [4., 5., 6., 0., 1., 0.], + [1., 2., 3., 0., 0., 1.], + [4., 5., 6., 0., 0., 1.], + ], features) + + def test_with_one_hot_action_features_error(self): + info_state_features = tf.ones([1, 1, 1]) + + with self.assertRaises(ValueError): + rcfr.with_one_hot_action_features( + info_state_features, legal_actions=[0, 1], num_distinct_actions=3) + + def test_sequence_features(self): + state = _GAME.new_initial_state() + while state.is_chance_node(): + state.apply_action(state.legal_actions()[0]) + assert len(state.legal_actions()) == 2 + features = rcfr.sequence_features(state, 3) + + x = state.information_state_tensor() + self.assertAllEqual([x + [1, 0, 0], x + [0, 1, 0]], features) + + def test_num_features(self): + assert rcfr.num_features(_GAME) == 13 + + def test_root_state_wrapper_num_sequences(self): + root_state_wrapper = rcfr.RootStateWrapper(_GAME.new_initial_state()) + assert root_state_wrapper.num_player_sequences[0] == 12 + assert root_state_wrapper.num_player_sequences[1] == 12 + + def test_root_state_wrapper_sequence_indices(self): + root_state_wrapper = rcfr.RootStateWrapper(_GAME.new_initial_state()) + self.assertAllEqual( + { + # Info state string -> initial sequence index map for player 1. + '0': 0, + '0pb': 2, + '1': 4, + '1pb': 6, + '2': 8, + '2pb': 10, + # Info state string -> initial sequence index map for player 2. + '1p': 0, + '1b': 2, + '2p': 4, + '2b': 6, + '0p': 8, + '0b': 10, + }, + root_state_wrapper.info_state_to_sequence_idx) + + def test_root_state_wrapper_sequence_features(self): + root_state_wrapper = rcfr.RootStateWrapper(_GAME.new_initial_state()) + + p1_info_state_features = [ + [1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.], + [1., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0.], + [1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.], + [1., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0.], + [1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.], + [1., 0., 0., 0., 1., 1., 0., 0., 1., 0., 0.], + ] + p2_info_state_features = [ + [0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0.], + [0., 1., 0., 1., 0., 0., 1., 0., 0., 0., 0.], + [0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0.], + [0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0.], + [0., 1., 1., 0., 0., 1., 0., 0., 0., 0., 0.], + [0., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0.], + ] + action_features = [[1., 0.], [0., 1.]] + expected_p1_sequence_features = [ + p1_info_state_features[0] + action_features[0], + p1_info_state_features[0] + action_features[1], + p1_info_state_features[1] + action_features[0], + p1_info_state_features[1] + action_features[1], + p1_info_state_features[2] + action_features[0], + p1_info_state_features[2] + action_features[1], + p1_info_state_features[3] + action_features[0], + p1_info_state_features[3] + action_features[1], + p1_info_state_features[4] + action_features[0], + p1_info_state_features[4] + action_features[1], + p1_info_state_features[5] + action_features[0], + p1_info_state_features[5] + action_features[1], + ] + expected_p2_sequence_features = [ + p2_info_state_features[0] + action_features[0], + p2_info_state_features[0] + action_features[1], + p2_info_state_features[1] + action_features[0], + p2_info_state_features[1] + action_features[1], + p2_info_state_features[2] + action_features[0], + p2_info_state_features[2] + action_features[1], + p2_info_state_features[3] + action_features[0], + p2_info_state_features[3] + action_features[1], + p2_info_state_features[4] + action_features[0], + p2_info_state_features[4] + action_features[1], + p2_info_state_features[5] + action_features[0], + p2_info_state_features[5] + action_features[1], + ] + expected_sequence_features = [ + expected_p1_sequence_features, expected_p2_sequence_features + ] + + self.assertAllEqual(expected_sequence_features, + root_state_wrapper.sequence_features) + + def test_root_state_wrapper_sequence_terminal_values(self): + root_state_wrapper = rcfr.RootStateWrapper(_GAME.new_initial_state()) + + expected_terminal_values = {} + no_call_histories_p1_win = [ + '2, 0, 0, 0', '2, 0, 1, 0', '0, 1, 1, 0', '1, 2, 1, 0', '1, 0, 1, 0', + '1, 0, 0, 0', '2, 1, 1, 0', '2, 1, 0, 0', '0, 2, 1, 0' + ] + for h in no_call_histories_p1_win: + expected_terminal_values[h] = [1., -1.] + + no_call_histories_p2_win = [ + '0, 2, 0, 1, 0', '0, 1, 0, 0', '0, 1, 0, 1, 0', '0, 2, 0, 0', + '1, 2, 0, 0', '2, 0, 0, 1, 0', '1, 2, 0, 1, 0', '2, 1, 0, 1, 0', + '1, 0, 0, 1, 0' + ] + for h in no_call_histories_p2_win: + expected_terminal_values[h] = [-1., 1.] + + call_histories_p1_win = [ + '1, 0, 1, 1', '2, 1, 1, 1', '2, 1, 0, 1, 1', '2, 0, 0, 1, 1', + '1, 0, 0, 1, 1', '2, 0, 1, 1' + ] + for h in call_histories_p1_win: + expected_terminal_values[h] = [2., -2.] + + call_histories_p2_win = [ + '0, 2, 0, 1, 1', '0, 1, 0, 1, 1', '0, 1, 1, 1', '1, 2, 1, 1', + '1, 2, 0, 1, 1', '0, 2, 1, 1' + ] + for h in call_histories_p2_win: + expected_terminal_values[h] = [-2., 2.] + + self.assertAllEqual( + expected_terminal_values, + {k: v.tolist() for k, v in root_state_wrapper.terminal_values.items()}) + + def test_normalized_by_sum(self): + self.assertAllClose( + rcfr.normalized_by_sum([1., 2., 3., 4.]), [0.1, 0.2, 0.3, 0.4]) + + def test_counterfactual_regrets_and_reach_weights_value_error(self): + root = rcfr.RootStateWrapper(_GAME.new_initial_state()) + + # Initialize arbitrary weights to generate an arbitrary profile. + sequence_weights1_with_a_missing_sequence = [ + 0.4967141530112327, + 0.0, + 0.6476885381006925, + 1.5230298564080254, + 0.0, + 0.0, + 1.5792128155073915, + 0.7674347291529088, + 0.0, + 0.5425600435859647, + 0.0, + # 0.0, + ] + # Ensure this player's policy is fully mixed so that each of player 1's + # information states are reached. + sequence_weights2 = [ + 0.24196227156603412, + 0.1, + 0.1, + 0.1, + 0.1, + 0.3142473325952739, + 0.1, + 0.1, + 1.465648768921554, + 0.1, + 0.06752820468792384, + 0.1, + ] + + with self.assertRaises(ValueError): + root.counterfactual_regrets_and_reach_weights( + 0, 1, sequence_weights1_with_a_missing_sequence, sequence_weights2) + + def test_counterfactual_regrets_and_reach_weights(self): + root = rcfr.RootStateWrapper(_GAME.new_initial_state()) + + # Initialize arbitrary weights to generate an arbitrary profile. + sequence_weights1 = [ + 0.4967141530112327, + 0.0, + 0.6476885381006925, + 1.5230298564080254, + 0.0, + 0.0, + 1.5792128155073915, + 0.7674347291529088, + 0.0, + 0.5425600435859647, + 0.0, + 0.0, + ] + sequence_weights2 = [ + 0.24196227156603412, + 0.0, + 0.0, + 0.0, + 0.0, + 0.3142473325952739, + 0.0, + 0.0, + 1.465648768921554, + 0.0, + 0.06752820468792384, + 0.0, + ] + + # These expected regrets and sequence weights were computed for the given + # sequence weights. + expected_regrets_given_sequence_weights = [ + 0., + 0.283604, + 0.116937, + -0.049729, + -0.06892, + 0.06892, + 0.054506, + -0.112161, + -0.083333, + 0., + 0., + 0., + ] + expected_reach_weights_given_sequence_weights = [ + 2., + 0., + 1., + 1., + 0., + 2., + 1., + 1., + 2., + 0., + 2., + 0., + ] + + regrets, weights = root.counterfactual_regrets_and_reach_weights( + 0, 1, sequence_weights1, sequence_weights2) + + self.assertAllClose(regrets, expected_regrets_given_sequence_weights) + self.assertAllClose(weights, expected_reach_weights_given_sequence_weights) + + def test_all_states(self): + states = rcfr.all_states( + _GAME.new_initial_state(), + depth_limit=-1, + include_terminals=False, + include_chance_states=False) + self.assertLen(list(states), 24) + + states = rcfr.all_states( + _GAME.new_initial_state(), + depth_limit=-1, + include_terminals=True, + include_chance_states=False) + self.assertLen(list(states), 54) + + states = rcfr.all_states( + _GAME.new_initial_state(), + depth_limit=-1, + include_terminals=False, + include_chance_states=True) + self.assertLen(list(states), 28) + + states = rcfr.all_states( + _GAME.new_initial_state(), + depth_limit=-1, + include_terminals=True, + include_chance_states=True) + self.assertLen(list(states), 58) + + def test_sequence_weights_to_tabular_profile(self): + root = rcfr.RootStateWrapper(_GAME.new_initial_state()) + + def policy_fn(state): + """Generates a policy profile by treating sequence indices as weights.""" + info_state = state.information_state_string() + sequence_offset = root.info_state_to_sequence_idx[info_state] + num_actions = len(state.legal_actions()) + return rcfr.normalized_by_sum( + list(range(sequence_offset, sequence_offset + num_actions))) + + profile = rcfr.sequence_weights_to_tabular_profile(root.root, policy_fn) + + expected_profile = { + # Player 1 + '0': [(0, 0.), (1, 1.)], # Sequences 0 and 1 (sums to 1) + '0pb': [(0, 0.4), (1, 0.6)], # Sequences 2 and 3 (sums to 5) + # Sequences 4 and 5 (sums to 9) + '1': [(0, 0.44444444444444442), (1, 0.55555555555555558)], + # Sequences 6 and 7 (sums to 13) + '1pb': [(0, 0.46153846153846156), (1, 0.53846153846153844)], + # Sequences 8 and 9 (sums to 17) + '2': [(0, 0.47058823529411764), (1, 0.52941176470588236)], + # Sequences 10 and 11 (sums to 21) + '2pb': [(0, 0.47619047619047616), (1, 0.52380952380952384)], + + # Player 2 + '1p': [(0, 0.), (1, 1.)], # Sequences 0 and 1 (sums to 1) + '1b': [(0, 0.4), (1, 0.6)], # Sequences 2 and 3 (sums to 5) + # Sequences 4 and 5 (sums to 9) + '2p': [(0, 0.44444444444444442), (1, 0.55555555555555558)], + # Sequences 6 and 7 (sums to 13) + '2b': [(0, 0.46153846153846156), (1, 0.53846153846153844)], + # Sequences 8 and 9 (sums to 17) + '0p': [(0, 0.47058823529411764), (1, 0.52941176470588236)], + # Sequences 10 and 11 (sums to 21) + '0b': [(0, 0.47619047619047616), (1, 0.52380952380952384)], + } + self.assertAllClose(profile, expected_profile) + + def test_cfr(self): + root = rcfr.RootStateWrapper(_GAME.new_initial_state()) + num_half_iterations = 6 + + cumulative_regrets = [np.zeros(n) for n in root.num_player_sequences] + cumulative_reach_weights = [np.zeros(n) for n in root.num_player_sequences] + + average_profile = root.sequence_weights_to_tabular_profile( + cumulative_reach_weights) + self.assertGreater(pyspiel.nash_conv(_GAME, average_profile), 0.91) + + regret_player = 0 + for _ in range(num_half_iterations): + reach_weights_player = 1 if regret_player == 0 else 0 + + regrets, reach = root.counterfactual_regrets_and_reach_weights( + regret_player, reach_weights_player, *rcfr.relu(cumulative_regrets)) + + cumulative_regrets[regret_player] += regrets + cumulative_reach_weights[reach_weights_player] += reach + + regret_player = reach_weights_player + + average_profile = root.sequence_weights_to_tabular_profile( + cumulative_reach_weights) + self.assertLess(pyspiel.nash_conv(_GAME, average_profile), 0.27) + + def test_rcfr_functions(self): + models = [_new_model() for _ in range(_GAME.num_players())] + root = rcfr.RootStateWrapper(_GAME.new_initial_state()) + + num_half_iterations = 4 + num_epochs = 100 + + cumulative_regrets = [np.zeros(n) for n in root.num_player_sequences] + cumulative_reach_weights = [np.zeros(n) for n in root.num_player_sequences] + + average_profile = root.sequence_weights_to_tabular_profile( + cumulative_reach_weights) + self.assertGreater(pyspiel.nash_conv(_GAME, average_profile), 0.91) + + regret_player = 0 + sequence_weights = [ + model(root.sequence_features[player]).numpy() + for player, model in enumerate(models) + ] + + for _ in range(num_half_iterations): + reach_weights_player = 1 if regret_player == 0 else 0 + + sequence_weights[reach_weights_player] = models[reach_weights_player]( + root.sequence_features[reach_weights_player]).numpy() + + regrets, seq_probs = root.counterfactual_regrets_and_reach_weights( + regret_player, reach_weights_player, *sequence_weights) + + cumulative_regrets[regret_player] += regrets + cumulative_reach_weights[reach_weights_player] += seq_probs + + data = tf.data.Dataset.from_tensor_slices( + (root.sequence_features[regret_player], + tf.expand_dims(cumulative_regrets[regret_player], axis=1))) + data = data.shuffle(12) + data = data.batch(12) + data = data.repeat(num_epochs) + + optimizer = tf.keras.optimizers.Adam(learning_rate=0.005, amsgrad=True) + + model = models[regret_player] + for x, y in data: + with tf.GradientTape() as tape: + loss = tf.losses.huber_loss(y, model(x)) + optimizer.apply_gradients( + zip( + tape.gradient(loss, model.trainable_variables), + model.trainable_variables, + ) + ) + + regret_player = reach_weights_player + + average_profile = root.sequence_weights_to_tabular_profile( + cumulative_reach_weights) + + self.assertLess(pyspiel.nash_conv(_GAME, average_profile), 0.91) + + @parameterized.parameters(list(itertools.product(_BOOLEANS, _BOOLEANS))) + def test_rcfr(self, bootstrap, truncate_negative): + num_epochs = 100 + num_iterations = 2 + models = [_new_model() for _ in range(_GAME.num_players())] + + patient = rcfr.RcfrSolver( + _GAME, models, bootstrap=bootstrap, truncate_negative=truncate_negative) + + def _train(model, data): + data = data.shuffle(12) + data = data.batch(12) + data = data.repeat(num_epochs) + + optimizer = tf.keras.optimizers.Adam(learning_rate=0.005, amsgrad=True) + + for x, y in data: + with tf.GradientTape() as tape: + loss = tf.losses.huber_loss(y, model(x)) + optimizer.apply_gradients( + zip( + tape.gradient(loss, model.trainable_variables), + model.trainable_variables, + ) + ) + + average_policy = patient.average_policy() + self.assertGreater(pyspiel.nash_conv(_GAME, average_policy), 0.91) + + for _ in range(num_iterations): + patient.evaluate_and_update_policy(_train) + + average_policy = patient.average_policy() + self.assertLess(pyspiel.nash_conv(_GAME, average_policy), 0.91) + + def test_reservior_buffer_insert(self): + buffer_size = 10 + patient = rcfr.ReservoirBuffer(buffer_size) + + x_buffer = [] + for i in range(buffer_size): + patient.insert(i) + x_buffer.append(i) + assert patient.num_elements == len(x_buffer) + self.assertAllEqual(x_buffer, patient.buffer) + + assert patient.num_available_spaces() == 0 + + for i in range(buffer_size): + patient.insert(buffer_size + i) + assert patient.num_elements == buffer_size + + def test_reservior_buffer_insert_all(self): + buffer_size = 10 + patient = rcfr.ReservoirBuffer(buffer_size) + + x_buffer = list(range(buffer_size)) + patient.insert_all(x_buffer) + assert patient.num_elements == buffer_size + self.assertAllEqual(x_buffer, patient.buffer) + + assert patient.num_available_spaces() == 0 + + x_buffer = list(range(buffer_size, 2 * buffer_size)) + patient.insert_all(x_buffer) + assert patient.num_elements == buffer_size + + def test_rcfr_with_buffer(self): + buffer_size = 12 + num_epochs = 100 + num_iterations = 2 + models = [_new_model() for _ in range(_GAME.num_players())] + + patient = rcfr.ReservoirRcfrSolver(_GAME, models, buffer_size=buffer_size) + + def _train(model, data): + data = data.shuffle(12) + data = data.batch(12) + data = data.repeat(num_epochs) + + optimizer = tf.keras.optimizers.Adam(learning_rate=0.005, amsgrad=True) + + for x, y in data: + with tf.GradientTape() as tape: + loss = tf.losses.huber_loss(y, model(x)) + optimizer.apply_gradients( + zip( + tape.gradient(loss, model.trainable_variables), + model.trainable_variables, + ) + ) + + average_policy = patient.average_policy() + self.assertGreater(pyspiel.nash_conv(_GAME, average_policy), 0.91) + + for _ in range(num_iterations): + patient.evaluate_and_update_policy(_train) + + average_policy = patient.average_policy() + self.assertLess(pyspiel.nash_conv(_GAME, average_policy), 0.91) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/rl_losses.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/rl_losses.py new file mode 100644 index 0000000..69dc2e1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/rl_losses.py @@ -0,0 +1,249 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Reinforcement learning loss functions. + +All the loss functions implemented here compute the loss for the policy (actor). +The critic loss functions are typically regression loss are omitted for their +simplicity. + +For the batch QPG, RM and RPG loss, please refer to the paper: +https://papers.nips.cc/paper/7602-actor-critic-policy-optimization-in-partially-observable-multiagent-environments.pdf + +The BatchA2C loss uses code from the `TRFL` library: +https://github.com/deepmind/trfl/blob/master/trfl/discrete_policy_gradient_ops.py +""" + +import tensorflow.compat.v1 as tf + +# Temporarily disable v2 behavior until code is updated. +tf.disable_v2_behavior() + + +def _assert_rank_and_shape_compatibility(tensors, rank): + if not tensors: + raise ValueError("List of tensors cannot be empty") + + union_of_shapes = tf.TensorShape(None) + for tensor in tensors: + tensor_shape = tensor.get_shape() + tensor_shape.assert_has_rank(rank) + union_of_shapes = union_of_shapes.merge_with(tensor_shape) + + +def compute_baseline(policy, action_values): + # V = pi * Q, backprop through pi but not Q. + return tf.reduce_sum( + tf.multiply(policy, tf.stop_gradient(action_values)), axis=1) + + +def compute_regrets(policy_logits, action_values): + """Compute regrets using pi and Q.""" + # Compute regret. + policy = tf.nn.softmax(policy_logits, axis=1) + # Avoid computing gradients for action_values. + action_values = tf.stop_gradient(action_values) + + baseline = compute_baseline(policy, action_values) + + regrets = tf.reduce_sum( + tf.nn.relu(action_values - tf.expand_dims(baseline, 1)), axis=1) + + return regrets + + +def compute_advantages(policy_logits, action_values, use_relu=False): + """Compute advantages using pi and Q.""" + # Compute advantage. + policy = tf.nn.softmax(policy_logits, axis=1) + # Avoid computing gradients for action_values. + action_values = tf.stop_gradient(action_values) + + baseline = compute_baseline(policy, action_values) + + advantages = action_values - tf.expand_dims(baseline, 1) + if use_relu: + advantages = tf.nn.relu(advantages) + + # Compute advantage weighted by policy. + policy_advantages = -tf.multiply(policy, tf.stop_gradient(advantages)) + return tf.reduce_sum(policy_advantages, axis=1) + + +def compute_a2c_loss(policy_logits, actions, advantages): + cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=actions, logits=policy_logits) + advantages = tf.stop_gradient(advantages) + advantages.get_shape().assert_is_compatible_with(cross_entropy.get_shape()) + return tf.multiply(cross_entropy, advantages) + + +def compute_entropy(policy_logits): + return tf.reduce_sum( + -tf.nn.softmax(policy_logits) * tf.nn.log_softmax(policy_logits), axis=-1) + + +def compute_entropy_loss(policy_logits): + """Compute an entropy loss. + + We want a value that we can minimize along with other losses, and where + minimizing means driving the policy towards a uniform distribution over + the actions. We thus scale it by negative one so that it can be simply + added to other losses (and so it can be considered a bonus for having + entropy). + + Args: + policy_logits: the policy logits. + + Returns: + entropy loss (negative entropy). + """ + entropy = compute_entropy(policy_logits) + scale = tf.constant(-1.0, dtype=tf.float32) + entropy_loss = tf.multiply(scale, entropy, name="entropy_loss") + return entropy_loss + + +class BatchQPGLoss(object): + """Defines the batch QPG loss op.""" + + def __init__(self, entropy_cost=None, name="batch_qpg_loss"): + self._entropy_cost = entropy_cost + self._name = name + + def loss(self, policy_logits, action_values): + """Constructs a TF graph that computes the QPG loss for batches. + + Args: + policy_logits: `B x A` tensor corresponding to policy logits. + action_values: `B x A` tensor corresponding to Q-values. + + Returns: + loss: A 0-D `float` tensor corresponding the loss. + """ + _assert_rank_and_shape_compatibility([policy_logits, action_values], 2) + advantages = compute_advantages(policy_logits, action_values) + _assert_rank_and_shape_compatibility([advantages], 1) + total_adv = tf.reduce_mean(advantages, axis=0) + + total_loss = total_adv + if self._entropy_cost: + entropy_loss = tf.reduce_mean(compute_entropy_loss(policy_logits)) + scaled_entropy_loss = tf.multiply( + float(self._entropy_cost), entropy_loss, name="scaled_entropy_loss") + total_loss = tf.add( + total_loss, scaled_entropy_loss, name="total_loss_with_entropy") + + return total_loss + + +class BatchRMLoss(object): + """Defines the batch RM loss op.""" + + def __init__(self, entropy_cost=None, name="batch_rm_loss"): + self._entropy_cost = entropy_cost + self._name = name + + def loss(self, policy_logits, action_values): + """Constructs a TF graph that computes the RM loss for batches. + + Args: + policy_logits: `B x A` tensor corresponding to policy logits. + action_values: `B x A` tensor corresponding to Q-values. + + Returns: + loss: A 0-D `float` tensor corresponding the loss. + """ + _assert_rank_and_shape_compatibility([policy_logits, action_values], 2) + advantages = compute_advantages(policy_logits, action_values, use_relu=True) + _assert_rank_and_shape_compatibility([advantages], 1) + total_adv = tf.reduce_mean(advantages, axis=0) + + total_loss = total_adv + if self._entropy_cost: + entropy_loss = tf.reduce_mean(compute_entropy_loss(policy_logits)) + scaled_entropy_loss = tf.multiply( + float(self._entropy_cost), entropy_loss, name="scaled_entropy_loss") + total_loss = tf.add( + total_loss, scaled_entropy_loss, name="total_loss_with_entropy") + + return total_loss + + +class BatchRPGLoss(object): + """Defines the batch RPG loss op.""" + + def __init__(self, entropy_cost=None, name="batch_rpg_loss"): + self._entropy_cost = entropy_cost + self._name = name + + def loss(self, policy_logits, action_values): + """Constructs a TF graph that computes the RPG loss for batches. + + Args: + policy_logits: `B x A` tensor corresponding to policy logits. + action_values: `B x A` tensor corresponding to Q-values. + + Returns: + loss: A 0-D `float` tensor corresponding the loss. + """ + _assert_rank_and_shape_compatibility([policy_logits, action_values], 2) + regrets = compute_regrets(policy_logits, action_values) + _assert_rank_and_shape_compatibility([regrets], 1) + total_regret = tf.reduce_mean(regrets, axis=0) + + total_loss = total_regret + if self._entropy_cost: + entropy_loss = tf.reduce_mean(compute_entropy_loss(policy_logits)) + scaled_entropy_loss = tf.multiply( + float(self._entropy_cost), entropy_loss, name="scaled_entropy_loss") + total_loss = tf.add( + total_loss, scaled_entropy_loss, name="total_loss_with_entropy") + + return total_loss + + +class BatchA2CLoss(object): + """Defines the batch A2C loss op.""" + + def __init__(self, entropy_cost=None, name="batch_a2c_loss"): + self._entropy_cost = entropy_cost + self._name = name + + def loss(self, policy_logits, baseline, actions, returns): + """Constructs a TF graph that computes the A2C loss for batches. + + Args: + policy_logits: `B x A` tensor corresponding to policy logits. + baseline: `B` tensor corresponding to baseline (V-values). + actions: `B` tensor corresponding to actions taken. + returns: `B` tensor corresponds to returns accumulated. + + Returns: + loss: A 0-D `float` tensor corresponding the loss. + """ + _assert_rank_and_shape_compatibility([policy_logits], 2) + _assert_rank_and_shape_compatibility([baseline, actions, returns], 1) + advantages = returns - baseline + + policy_loss = compute_a2c_loss(policy_logits, actions, advantages) + total_loss = tf.reduce_mean(policy_loss, axis=0) + if self._entropy_cost: + entropy_loss = tf.reduce_mean(compute_entropy_loss(policy_logits)) + scaled_entropy_loss = tf.multiply( + float(self._entropy_cost), entropy_loss, name="scaled_entropy_loss") + total_loss = tf.add( + total_loss, scaled_entropy_loss, name="total_loss_with_entropy") + + return total_loss diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/rl_losses_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/rl_losses_test.py new file mode 100644 index 0000000..5390949 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/rl_losses_test.py @@ -0,0 +1,106 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import parameterized +import numpy as np +import tensorflow.compat.v1 as tf + +from open_spiel.python.algorithms.losses import rl_losses + +# Temporarily disable v2 behavior until code is updated. +tf.disable_v2_behavior() + + +class RLLossesTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.named_parameters(('no_entropy_cost', 0.), + ('with_entropy_cost', 1.)) + def test_batch_qpg_loss_with_entropy_cost(self, entropy_cost): + batch_qpg_loss = rl_losses.BatchQPGLoss(entropy_cost=entropy_cost) + q_values = tf.constant([[0., -1., 1.], [1., -1., 0]], dtype=tf.float32) + policy_logits = tf.constant([[1., 1., 1.], [1., 1., 4.]], dtype=tf.float32) + total_loss = batch_qpg_loss.loss(policy_logits, q_values) + # Compute expected quantities. + expected_policy_entropy_loss = -1 * (1.0986 + 0.3665) / 2 + # baseline = \sum_a pi_a * Q_a = 0. + # -\sum_a pi_a * (Q_a - baseline) + expected_policy_loss = (0.0 + 0.0) / 2 + expected_total_loss = ( + expected_policy_loss + entropy_cost * expected_policy_entropy_loss) + with self.session() as sess: + np.testing.assert_allclose( + sess.run(total_loss), expected_total_loss, atol=1e-4) + + @parameterized.named_parameters(('no_entropy_cost', 0.), + ('with_entropy_cost', 1.)) + def test_batch_rm_loss_with_entropy_cost(self, entropy_cost): + batch_rpg_loss = rl_losses.BatchRMLoss(entropy_cost=entropy_cost) + q_values = tf.constant([[0., -1., 1.], [1., -1., 0]], dtype=tf.float32) + policy_logits = tf.constant([[1., 1., 1.], [1., 1., 4.]], dtype=tf.float32) + total_loss = batch_rpg_loss.loss(policy_logits, q_values) + # Compute expected quantities. + expected_policy_entropy_loss = -(1.0986 + 0.3665) / 2 + # baseline = \sum_a pi_a * Q_a = 0. + # -\sum_a pi_a * relu(Q_a - baseline) + # negative sign as it's a loss term and loss needs to be minimized. + expected_policy_loss = -(.3333 + .0452) / 2 + expected_total_loss = ( + expected_policy_loss + entropy_cost * expected_policy_entropy_loss) + with self.session() as sess: + np.testing.assert_allclose( + sess.run(total_loss), expected_total_loss, atol=1e-3) + + @parameterized.named_parameters(('no_entropy_cost', 0.), + ('with_entropy_cost', 1.)) + def test_batch_rpg_loss_with_entropy_cost(self, entropy_cost): + batch_rpg_loss = rl_losses.BatchRPGLoss(entropy_cost=entropy_cost) + q_values = tf.constant([[0., -1., 1.], [1., -1., 0]], dtype=tf.float32) + policy_logits = tf.constant([[1., 1., 1.], [1., 1., 4.]], dtype=tf.float32) + total_loss = batch_rpg_loss.loss(policy_logits, q_values) + # Compute expected quantities. + expected_policy_entropy_loss = -1 * (1.0986 + 0.3665) / 2 + # baseline = \sum_a pi_a * Q_a = 0. + # \sum_a relu(Q_a - baseline) + expected_policy_loss = (1.0 + 1.0) / 2 + expected_total_loss = ( + expected_policy_loss + entropy_cost * expected_policy_entropy_loss) + with self.session() as sess: + np.testing.assert_allclose( + sess.run(total_loss), expected_total_loss, atol=1e-4) + + @parameterized.named_parameters(('no_entropy_cost', 0.), + ('with_entropy_cost', 1.)) + def test_batch_a2c_loss_with_entropy_cost(self, entropy_cost): + batch_a2c_loss = rl_losses.BatchA2CLoss(entropy_cost=entropy_cost) + policy_logits = tf.constant([[1., 1., 1.], [1., 1., 4.]], dtype=tf.float32) + baseline = tf.constant([1. / 3, 0.5], dtype=tf.float32) + actions = tf.constant([1, 2], dtype=tf.int32) + returns = tf.constant([0., 1.], dtype=tf.float32) + total_loss = batch_a2c_loss.loss(policy_logits, baseline, actions, returns) + # Compute expected quantities. + # advantages = returns - baseline = [-1./3, 0.5] + # cross_entropy = [-log(e^1./3 * e^1), -log(e^4/(e^4+ e + e))] + # = [1.0986, 0.09492] + # policy_loss = cross_entropy * advantages = [-0.3662, 0.04746] + expected_policy_entropy_loss = -1 * (1.0986 + 0.3665) / 2 + expected_policy_loss = (-0.3662 + 0.04746) / 2 + expected_total_loss = ( + expected_policy_loss + entropy_cost * expected_policy_entropy_loss) + with self.session() as sess: + np.testing.assert_allclose( + sess.run(total_loss), expected_total_loss, atol=1e-4) + + +if __name__ == '__main__': + tf.test.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/single_agent_catch_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/single_agent_catch_example.py new file mode 100644 index 0000000..0ec3c69 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/single_agent_catch_example.py @@ -0,0 +1,122 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python spiel example.""" + +import logging +from absl import app +from absl import flags + +import tensorflow.compat.v1 as tf +from open_spiel.python.algorithms import dqn +from open_spiel.python.algorithms import eva +from open_spiel.python.algorithms import policy_gradient +from open_spiel.python.environments import catch + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("num_episodes", int(1e5), "Number of train episodes.") +flags.DEFINE_integer("eval_every", int(1e3), + "How often to evaluate the policy.") +flags.DEFINE_enum("algorithm", "dqn", ["dqn", "rpg", "qpg", "rm", "eva", "a2c"], + "Algorithms to run.") + + +def _eval_agent(env, agent, num_episodes): + """Evaluates `agent` for `num_episodes`.""" + rewards = 0.0 + for _ in range(num_episodes): + time_step = env.reset() + episode_reward = 0 + while not time_step.last(): + agent_output = agent.step(time_step, is_evaluation=True) + time_step = env.step([agent_output.action]) + episode_reward += time_step.rewards[0] + rewards += episode_reward + return rewards / num_episodes + + +def main_loop(unused_arg): + """Trains a DQN agent in the catch environment.""" + env = catch.Environment() + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + train_episodes = FLAGS.num_episodes + + with tf.Session() as sess: + if FLAGS.algorithm in {"rpg", "qpg", "rm", "a2c"}: + agent = policy_gradient.PolicyGradient( + sess, + player_id=0, + info_state_size=info_state_size, + num_actions=num_actions, + loss_str=FLAGS.algorithm, + hidden_layers_sizes=[128, 128], + batch_size=128, + entropy_cost=0.01, + critic_learning_rate=0.1, + pi_learning_rate=0.1, + num_critic_before_pi=3) + elif FLAGS.algorithm == "dqn": + agent = dqn.DQN( + sess, + player_id=0, + state_representation_size=info_state_size, + num_actions=num_actions, + learning_rate=0.1, + replay_buffer_capacity=10000, + hidden_layers_sizes=[32, 32], + epsilon_decay_duration=2000, # 10% total data + update_target_network_every=250) + elif FLAGS.algorithm == "eva": + agent = eva.EVAAgent( + sess, + env, + player_id=0, + state_size=info_state_size, + num_actions=num_actions, + learning_rate=1e-3, + trajectory_len=2, + num_neighbours=2, + mixing_parameter=0.95, + memory_capacity=10000, + dqn_hidden_layers=[32, 32], + epsilon_decay_duration=2000, # 10% total data + update_target_network_every=250) + else: + raise ValueError("Algorithm not implemented!") + + sess.run(tf.global_variables_initializer()) + + # Train agent + for ep in range(train_episodes): + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step) + action_list = [agent_output.action] + time_step = env.step(action_list) + # Episode is over, step agent with final info state. + agent.step(time_step) + + if ep and ep % FLAGS.eval_every == 0: + logging.info("-" * 80) + logging.info("Episode %s", ep) + logging.info("Loss: %s", agent.loss) + avg_return = _eval_agent(env, agent, 100) + logging.info("Avg return: %s", avg_return) + + +if __name__ == "__main__": + app.run(main_loop) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/skat_dqn_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/skat_dqn_example.py new file mode 100644 index 0000000..0cac605 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/skat_dqn_example.py @@ -0,0 +1,149 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""DQN agents trained on Skat by independent Q-learning.""" + +import os +import random + +from absl import app +from absl import flags +from absl import logging +import numpy as np +import tensorflow.compat.v1 as tf + +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import dqn +from open_spiel.python.algorithms import random_agent + +FLAGS = flags.FLAGS + +# Training parameters +flags.DEFINE_string("checkpoint_dir", "/tmp/skat_dqn/", + "Directory to save/load the agent.") +flags.DEFINE_integer("num_train_episodes", int(1e6), + "Number of training episodes.") +flags.DEFINE_integer( + "eval_every", 1000, + "Episode frequency at which the DQN agents are evaluated.") +flags.DEFINE_integer( + "num_eval_games", 1000, + "How many games to play during each evaluation.") + +# DQN model hyper-parameters +flags.DEFINE_list("hidden_layers_sizes", [64, 64], + "Number of hidden units in the Q-Network MLP.") +flags.DEFINE_integer("replay_buffer_capacity", int(1e5), + "Size of the replay buffer.") +flags.DEFINE_integer("batch_size", 32, + "Number of transitions to sample at each learning step.") +flags.DEFINE_bool("randomize_positions", True, + "Randomize the position of each agent before every game.") + + +def eval_against_random_bots(env, trained_agents, random_agents, num_episodes): + """Evaluates `trained_agents` against `random_agents` for `num_episodes`.""" + num_players = len(trained_agents) + sum_episode_rewards = np.zeros(num_players) + for player_pos in range(num_players): + for _ in range(num_episodes): + cur_agents = random_agents[:] + if FLAGS.randomize_positions: + eval_player_pos = random.randrange(num_players) + else: + eval_player_pos = player_pos + cur_agents[eval_player_pos] = trained_agents[player_pos] + cur_agents[eval_player_pos].player_id = eval_player_pos + time_step = env.reset() + episode_rewards = 0 + while not time_step.last(): + player_id = time_step.observations["current_player"] + agent_output = cur_agents[player_id].step( + time_step, is_evaluation=True) + action_list = [agent_output.action] + time_step = env.step(action_list) + episode_rewards += time_step.rewards[eval_player_pos] + sum_episode_rewards[player_pos] += episode_rewards + return sum_episode_rewards / num_episodes + + +def main(_): + game = "skat" + num_players = 3 + + env_configs = {} + env = rl_environment.Environment(game, **env_configs) + observation_tensor_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + # random agents for evaluation + random_agents = [ + random_agent.RandomAgent(player_id=idx, num_actions=num_actions) + for idx in range(num_players) + ] + + with tf.Session() as sess: + summaries_dir = os.path.join(FLAGS.checkpoint_dir, "random_eval") + summary_writer = tf.summary.FileWriter( + summaries_dir, tf.get_default_graph()) + hidden_layers_sizes = [int(l) for l in FLAGS.hidden_layers_sizes] + # pylint: disable=g-complex-comprehension + agents = [ + dqn.DQN( + session=sess, + player_id=idx, + state_representation_size=observation_tensor_size, + num_actions=num_actions, + hidden_layers_sizes=hidden_layers_sizes, + replay_buffer_capacity=FLAGS.replay_buffer_capacity, + batch_size=FLAGS.batch_size) for idx in range(num_players) + ] + saver = tf.train.Saver() + sess.run(tf.global_variables_initializer()) + + for ep in range(FLAGS.num_train_episodes): + if (ep + 1) % FLAGS.eval_every == 0: + r_mean = eval_against_random_bots(env, agents, random_agents, + FLAGS.num_eval_games) + logging.info("[%s] Mean episode rewards %s", ep + 1, r_mean) + for i in range(num_players): + summary = tf.Summary() + summary.value.add(tag="mean_reward/random_{}".format(i), + simple_value=r_mean[i]) + summary_writer.add_summary(summary, ep) + summary_writer.flush() + saver.save(sess, FLAGS.checkpoint_dir, ep) + + time_step = env.reset() + # Randomize position. + if FLAGS.randomize_positions: + positions = random.sample(range(len(agents)), len(agents)) + while not time_step.last(): + player_id = time_step.observations["current_player"] + if FLAGS.randomize_positions: + position = positions[player_id] + agents[position].player_id = player_id + else: + position = player_id + agent_output = agents[position].step(time_step) + action_list = [agent_output.action] + time_step = env.step(action_list) + + # Episode is over, step all agents with final info state. + for agent in agents: + agent.step(time_step) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/tic_tac_toe_alpha_zero_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/tic_tac_toe_alpha_zero_example.py new file mode 100644 index 0000000..885beb6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/tf/tic_tac_toe_alpha_zero_example.py @@ -0,0 +1,68 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Simple AlphaZero tic tac toe example. + +Take a look at the log-learner.txt in the output directory. + +If you want more control, check out `alpha_zero.py`. +""" + +from absl import app +from absl import flags + +from open_spiel.python.algorithms.alpha_zero import alpha_zero +from open_spiel.python.utils import spawn + +flags.DEFINE_string("path", None, "Where to save checkpoints.") +FLAGS = flags.FLAGS + + +def main(unused_argv): + config = alpha_zero.Config( + game="tic_tac_toe", + path=FLAGS.path, + learning_rate=0.01, + weight_decay=1e-4, + train_batch_size=128, + replay_buffer_size=2**14, + replay_buffer_reuse=4, + max_steps=25, + checkpoint_freq=25, + + actors=4, + evaluators=4, + uct_c=1, + max_simulations=20, + policy_alpha=0.25, + policy_epsilon=1, + temperature=1, + temperature_drop=4, + evaluation_window=50, + eval_levels=7, + + nn_model="resnet", + nn_width=128, + nn_depth=2, + observation_shape=None, + output_size=None, + + quiet=True, + ) + alpha_zero.alpha_zero(config) + + +if __name__ == "__main__": + with spawn.main_handler(): + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/value_iteration.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/value_iteration.py new file mode 100644 index 0000000..bc5ba99 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/value_iteration.py @@ -0,0 +1,159 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Value iteration algorithm for solving a game.""" + +from open_spiel.python.algorithms import get_all_states +from open_spiel.python.algorithms import lp_solver +import pyspiel + + +def _get_future_states(possibilities, state, reach=1.0): + """Does a lookahead over chance nodes to all next states after (s,a). + + Also works if there are no chance nodes (i.e. base case). + + Arguments: + possibilities: an empty list, that will be filled with (str(next_state), + transition probability) pairs for all possible next states + state: the state following some s.apply_action(a), can be a chance node + reach: chance reach probability of getting to this point from (s,a) + Returns: nothing. + """ + if not state.is_chance_node() or state.is_terminal(): + # Base case + possibilities.append((str(state), reach)) + else: + assert state.is_chance_node() + for outcome, prob in state.chance_outcomes(): + next_state = state.child(outcome) + _get_future_states(possibilities, next_state, reach * prob) + + +def _add_transition(transitions, key, state): + """Adds action transitions from given state.""" + + if state.is_simultaneous_node(): + for p0action in state.legal_actions(0): + for p1action in state.legal_actions(1): + next_state = state.clone() + next_state.apply_actions([p0action, p1action]) + possibilities = [] + _get_future_states(possibilities, next_state) + transitions[(key, p0action, p1action)] = possibilities + else: + for action in state.legal_actions(): + next_state = state.child(action) + possibilities = [] + _get_future_states(possibilities, next_state) + transitions[(key, action)] = possibilities + + +def _initialize_maps(states, values, transitions): + """Initialize the value and transition maps.""" + for key, state in states.items(): + if state.is_terminal(): + values[key] = state.player_return(0) + else: + values[key] = 0 + _add_transition(transitions, key, state) + + +def value_iteration(game, depth_limit, threshold, cyclic_game=False): + """Solves for the optimal value function of a game. + + For small games only! Solves the game using value iteration, + with the maximum error for the value function less than threshold. + This algorithm works for sequential 1-player games or 2-player zero-sum + games, with or without chance nodes. + + Arguments: + game: The game to analyze, as returned by `load_game`. + depth_limit: How deeply to analyze the game tree. Negative means no limit, 0 + means root-only, etc. + threshold: Maximum error for state values.. + cyclic_game: set to True if the game has cycles (from state A we can get to + state B, and from state B we can get back to state A). + + Returns: + A `dict` with string keys and float values, mapping string encoding of + states to the values of those states. + """ + assert game.num_players() in (1, + 2), ("Game must be a 1-player or 2-player game") + if game.num_players() == 2: + assert game.get_type().utility == pyspiel.GameType.Utility.ZERO_SUM, ( + "2-player games must be zero sum games") + + # Must be perfect information or one-shot (not imperfect information). + assert (game.get_type().information == pyspiel.GameType.Information.ONE_SHOT + or game.get_type().information == + pyspiel.GameType.Information.PERFECT_INFORMATION) + + # We expect Value Iteration to be used with perfect information games, in + # which `str` is assumed to display the state of the game. + states = get_all_states.get_all_states( + game, + depth_limit, + True, + False, + to_string=str, + stop_if_encountered=cyclic_game) + values = {} + transitions = {} + + _initialize_maps(states, values, transitions) + error = threshold + 1 # A value larger than threshold + min_utility = game.min_utility() + while error > threshold: + error = 0 + for key, state in states.items(): + if state.is_terminal(): + continue + elif state.is_simultaneous_node(): + # Simultaneous node. Assemble a matrix game from the child utilities. + # and solve it using a matrix game solver. + p0_utils = [] # row player + p1_utils = [] # col player + row = 0 + for p0action in state.legal_actions(0): + # new row + p0_utils.append([]) + p1_utils.append([]) + for p1action in state.legal_actions(1): + # loop from left-to-right of columns + next_states = transitions[(key, p0action, p1action)] + joint_q_value = sum( + p * values[next_state] for next_state, p in next_states) + p0_utils[row].append(joint_q_value) + p1_utils[row].append(-joint_q_value) + row += 1 + stage_game = pyspiel.create_matrix_game(p0_utils, p1_utils) + solution = lp_solver.solve_zero_sum_matrix_game(stage_game) + value = solution[2] + else: + # Regular decision node + player = state.current_player() + value = min_utility if player == 0 else -min_utility + for action in state.legal_actions(): + next_states = transitions[(key, action)] + q_value = sum(p * values[next_state] for next_state, p in next_states) + if player == 0: + value = max(value, q_value) + else: + value = min(value, q_value) + error = max(abs(values[key] - value), error) + values[key] = value + + return values diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/value_iteration_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/value_iteration_test.py new file mode 100644 index 0000000..371aee1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/value_iteration_test.py @@ -0,0 +1,83 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.get_all_states.""" + +from absl.testing import absltest + +from open_spiel.python.algorithms import value_iteration +import pyspiel + + +class ValueIterationTest(absltest.TestCase): + + def test_solve_tic_tac_toe(self): + game = pyspiel.load_game("tic_tac_toe") + values = value_iteration.value_iteration( + game, depth_limit=-1, threshold=0.01) + + initial_state = "...\n...\n..." + cross_win_state = "...\n...\n.ox" + naught_win_state = "x..\noo.\nxx." + self.assertEqual(values[initial_state], 0) + self.assertEqual(values[cross_win_state], 1) + self.assertEqual(values[naught_win_state], -1) + + def test_solve_small_goofspiel(self): + # TODO(author5): This test fails with num_cards = 4 with a new version of + # LAPACK (3.10.0), which is used by cvxopt. Might be a bug or bad assumption + # about the handling of numerical error. Look into this. + game = pyspiel.load_game("goofspiel", {"num_cards": 3}) + values = value_iteration.value_iteration( + game, depth_limit=-1, threshold=1e-6) + + initial_state = game.new_initial_state() + assert initial_state.is_chance_node() + root_value = 0 + for action, action_prob in initial_state.chance_outcomes(): + next_state = initial_state.child(action) + root_value += action_prob * values[str(next_state)] + + # Symmetric game: value is 0 + self.assertAlmostEqual(root_value, 0) + + def test_solve_small_oshi_zumo(self): + # Oshi-Zumo(5, 2, 0) + game = pyspiel.load_game("oshi_zumo", {"coins": 5, "size": 2}) + values = value_iteration.value_iteration( + game, depth_limit=-1, threshold=1e-6, cyclic_game=True) + + initial_state = game.new_initial_state() + # Symmetric game: value is 0 + self.assertAlmostEqual(values[str(initial_state)], 0) + + # Oshi-Zumo(5, 2, 1) + game = pyspiel.load_game("oshi_zumo", {"coins": 5, "size": 2, "min_bid": 1}) + values = value_iteration.value_iteration( + game, depth_limit=-1, threshold=1e-6, cyclic_game=False) + + initial_state = game.new_initial_state() + # Symmetric game: value is 0 + self.assertAlmostEqual(values[str(initial_state)], 0) + + def test_solve_small_pig(self): + game = pyspiel.load_game("pig", {"winscore": 20}) + values = value_iteration.value_iteration( + game, depth_limit=-1, threshold=1e-6, cyclic_game=True) + initial_state = game.new_initial_state() + print("Value of Pig(20): ", values[str(initial_state)]) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/wolf_phc.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/wolf_phc.py new file mode 100644 index 0000000..ecdd04e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/wolf_phc.py @@ -0,0 +1,231 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""WoLF policy-hill climbing agent. + +Based on: https://www.sciencedirect.com/science/article/pii/S0004370202001212 +""" + +import collections +import numpy as np + +from open_spiel.python import rl_agent +from open_spiel.python import rl_tools +from open_spiel.python.algorithms.projected_replicator_dynamics import _simplex_projection + + +def valuedict(): + return collections.defaultdict(float) + + +class WoLFSchedule(rl_tools.ValueSchedule): + """Schedule rules described in the WoLF paper. + + at step t the step size is (t0 / (t + t1)) + """ + + def __init__(self, t0, t1): + super(WoLFSchedule, self).__init__() + self._t0 = t0 + self._t1 = t1 + self._step_taken = 0 + + def step(self): + value = (self._t0 / (self._step_taken + self._t1)) + self._step_taken += 1 + return value + + @property + def value(self): + return self._t0 / (self._step_taken + self._t1) + + +class WoLFPHC(rl_agent.AbstractAgent): + """WoLF policy-hill climbing agent agent. + + + Based on win or learn fast principle. + Based on: + https://www.sciencedirect.com/science/article/pii/S0004370202001212 + """ + + def __init__(self, + player_id, + num_actions, + step_size=WoLFSchedule(10000, 1000000), + epsilon_schedule=rl_tools.ConstantSchedule(0.2), + delta_w=WoLFSchedule(1, 20000), + delta_l=WoLFSchedule(2, 20000), + discount_factor=1.0): + """Initialize the WoLF-PHC agent.""" + self._player_id = player_id + self._num_actions = num_actions + self._step_size = step_size + self._epsilon_schedule = epsilon_schedule + self._epsilon = epsilon_schedule.value + self._discount_factor = discount_factor + self._delta_w = delta_w + self._delta_l = delta_l + self._cur_policy = collections.defaultdict(valuedict) + self._avg_policy = collections.defaultdict(valuedict) + self._q_values = collections.defaultdict(valuedict) + self._state_counters = valuedict() + self._prev_info_state = None + self._last_loss_value = None + self._cur_delta_value = self._delta_l.value + + def _hill_climbing(self, info_state, legal_actions): + """Does the hill-climbing update. + + Args: + info_state: hashable representation of the information state. + legal_actions: list of actions at `info_state`. + """ + + greedy_q = max( + [self._q_values[info_state][action] for action in legal_actions]) + greedy_actions = [ + action for action in legal_actions + if self._q_values[info_state][action] == greedy_q + ] + if len(greedy_actions) == len(legal_actions): + return + + deltas = { # pylint: disable=g-complex-comprehension + action: + min(self._cur_policy[info_state][action], + self._cur_delta_value / (len(legal_actions) - len(greedy_actions))) + for action in legal_actions + } + + delta_greedy = sum([ + deltas[action] + for action in legal_actions + if action not in greedy_actions + ]) / len(greedy_actions) + + deltas = { + action: + -deltas[action] if action not in greedy_actions else delta_greedy + for action in legal_actions + } + new_policy = np.array([ + self._cur_policy[info_state][action] + deltas[action] + for action in legal_actions + ]) + new_policy = _simplex_projection(new_policy) + for i in range(len(legal_actions)): + self._cur_policy[info_state][legal_actions[i]] = new_policy[i] + + def _get_action_probs(self, info_state, legal_actions, epsilon): + """Returns a selected action and the probabilities of legal actions. + + To be overwritten by subclasses that implement other action selection + methods. + Args: + info_state: hashable representation of the information state. + legal_actions: list of actions at `info_state`. + epsilon: float: current value of the epsilon schedule or 0 in case + evaluation. QLearner uses it as the exploration parameter in + epsilon-greedy, but subclasses are free to interpret in different ways + (e.g. as temperature in softmax). + """ + if info_state not in self._cur_policy: + for action in legal_actions: + self._cur_policy[info_state][action] = 1. / len(legal_actions) + self._avg_policy[info_state][action] = 1. / len(legal_actions) + + probs = np.zeros(self._num_actions) + for action in legal_actions: + probs[action] = ((1-epsilon) * self._cur_policy[info_state][action] + + epsilon * 1.0 / len(legal_actions)) + action = np.random.choice(range(self._num_actions), p=probs) + return action, probs + + def step(self, time_step, is_evaluation=False): + """Returns the action to be taken and updates the Q-values if needed. + + Args: + time_step: an instance of rl_environment.TimeStep. + is_evaluation: bool, whether this is a training or evaluation call. + + Returns: + A `rl_agent.StepOutput` containing the action probs and chosen action. + """ + + info_state = str(time_step.observations["info_state"][self._player_id]) + legal_actions = time_step.observations["legal_actions"][self._player_id] + + # Prevent undefined errors if this agent never plays until terminal step + action, probs = None, None + + # Act step: don't act at terminal states. + if not time_step.last(): + epsilon = 0.0 if is_evaluation else self._epsilon + action, probs = self._get_action_probs(info_state, legal_actions, epsilon) + + # Learn step: don't learn during evaluation or at first agent steps. + if self._prev_info_state and not is_evaluation: + target = time_step.rewards[self._player_id] + if not time_step.last(): # Q values are zero for terminal. + target += self._discount_factor * max( + [self._q_values[info_state][a] for a in legal_actions]) + + prev_q_value = self._q_values[self._prev_info_state][self._prev_action] + self._last_loss_value = target - prev_q_value + self._q_values[self._prev_info_state][self._prev_action] += ( + self._step_size.value * self._last_loss_value) + + self._state_counters[info_state] += 1 + for action_ in legal_actions: + self._avg_policy[info_state][action_] = ( + self._avg_policy[info_state][action_] + + 1 / self._state_counters[info_state] * ( + self._cur_policy[info_state][action_] - + self._avg_policy[info_state][action_])) + + assert self._delta_l.value > self._delta_w.value + cur_policy_value = sum([ + self._cur_policy[info_state][action] * + self._q_values[info_state][action] for action in legal_actions + ]) + avg_policy_value = sum([ + self._avg_policy[info_state][action] * + self._q_values[info_state][action] for action in legal_actions + ]) + if cur_policy_value > avg_policy_value: + self._cur_delta_value = self._delta_w.value + else: + self._cur_delta_value = self._delta_l.value + + if not time_step.last(): + self._hill_climbing(info_state, legal_actions) + + # Decay epsilon, if necessary. + self._epsilon = self._epsilon_schedule.step() + self._delta_l.step() + self._delta_w.step() + self._step_size.step() + else: # prepare for the next episode. + self._prev_info_state = None + return + + # Don't mess up with the state during evaluation. + if not is_evaluation: + self._prev_info_state = info_state + self._prev_action = action + return rl_agent.StepOutput(action=action, probs=probs) + + @property + def loss(self): + return self._last_loss_value diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/wolf_phc_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/wolf_phc_test.py new file mode 100644 index 0000000..a623690 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/algorithms/wolf_phc_test.py @@ -0,0 +1,76 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for open_spiel.python.algorithms.tabular_multiagent_qlearner.""" + +from absl.testing import absltest +import numpy as np + +from open_spiel.python import rl_environment +from open_spiel.python.algorithms.tabular_qlearner import QLearner +from open_spiel.python.algorithms.wolf_phc import WoLFPHC + +SEED = 18763511 + + +class WoLFTest(absltest.TestCase): + + def test_simple_pathfinding_run(self): + env = rl_environment.Environment( + "pathfinding", grid="B.A\n...\na.b", players=2, step_reward=-1.) + + with self.subTest("wolf_phc"): + qlearner = QLearner(0, env.game.num_distinct_actions()) + wolflearner = WoLFPHC(1, env.game.num_distinct_actions()) + time_step = env.reset() + step_cnt = 0 + + while not time_step.last(): + actions = [ + qlearner.step(time_step).action, + wolflearner.step(time_step).action + ] + time_step = env.step(actions) + step_cnt += 1 + + self.assertLess(step_cnt, 500) + + def test_rps_run(self): + env = rl_environment.Environment("matrix_rps") + wolf0 = WoLFPHC(0, env.game.num_distinct_actions()) + wolf1 = WoLFPHC(1, env.game.num_distinct_actions()) + + for _ in range(1000): + time_step = env.reset() + actions = [wolf0.step(time_step).action, wolf1.step(time_step).action] + time_step = env.step(actions) + wolf0.step(time_step) + wolf1.step(time_step) + + with self.subTest("correct_rps_strategy"): + time_step = env.reset() + learner0_strategy, learner1_strategy = wolf0.step( + time_step).probs, wolf1.step(time_step).probs + np.testing.assert_array_almost_equal( + np.asarray([1 / 3, 1 / 3, 1 / 3]), + learner0_strategy.reshape(-1), + decimal=4) + np.testing.assert_array_almost_equal( + np.asarray([1 / 3, 1 / 3, 1 / 3]), + learner1_strategy.reshape(-1), + decimal=4) + + +if __name__ == "__main__": + np.random.seed(SEED) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/bots/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/bots/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/bots/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/bots/bluechip_bridge.py b/scenarios/bargaining/open_spiel/open_spiel/python/bots/bluechip_bridge.py new file mode 100644 index 0000000..6ee90eb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/bots/bluechip_bridge.py @@ -0,0 +1,349 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Wraps third-party bridge bots to make them usable in OpenSpiel. + +This code enables OpenSpiel interoperation for bots which implement the BlueChip +bridge protocol. This is widely used, e.g. in the World computer bridge +championships. For a rough outline of the protocol, see: +http://www.bluechipbridge.co.uk/protocol.htm + +No formal specification is available. This implementation has been verified +to work correctly with WBridge5. + +This bot controls a single player in the full game of bridge, including both the +bidding and play phase. It chooses its actions by invoking an external bot which +plays the full game of bridge. This means that each time the bot is asked for an +action, it sends up to three actions (one for each other player) to the external +bridge bot, and obtains an action in return. +""" + +import re +import pyspiel + +# Example session: +# +# Recv: Connecting "WBridge5" as ANYPL using protocol version 18 +# Send: WEST ("WBridge5") seated +# Recv: WEST ready for teams +# Send: Teams: N/S "silent" E/W "bidders" +# Recv: WEST ready to start +# Send: Start of board +# Recv: WEST ready for deal +# Send: Board number 8. Dealer WEST. Neither vulnerable. +# Recv: WEST ready for cards +# Send: WEST's cards: S A T 9 5. H K 6 5. D Q J 8 7 6. C 7. +# Recv: WEST PASSES +# Recv: WEST ready for NORTH's bid +# Send: EAST PASSES +# Recv: WEST ready for EAST's bid +# Send: EAST bids 1C +# Recv: WEST ready for SOUTH's bid + +# The game we support +GAME_STR = "bridge(use_double_dummy_result=False)" + +# Template regular expressions for messages we receive +_CONNECT = 'Connecting "(?P.*)" as ANYPL using protocol version 18' +_PLAYER_ACTION = ("(?PNORTH|SOUTH|EAST|WEST) " + "((?PPASSES)|(?PDOUBLES)|(?PREDOUBLES)|bids " + "(?P[^ ]*)|(plays (?P[23456789tjqka][cdhs])))" + "(?P Alert.)?") +_READY_FOR_OTHER = ("{seat} ready for " + "(((?P[^']*)'s ((bid)|(card to trick \\d+)))" + "|(?Pdummy))") + +# Templates for fixed messages we receive +_READY_FOR_TEAMS = "{seat} ready for teams" +_READY_TO_START = "{seat} ready to start" +_READY_FOR_DEAL = "{seat} ready for deal" +_READY_FOR_CARDS = "{seat} ready for cards" +_READY_FOR_BID = "{seat} ready for {other}'s bid" + +# Templates for messages we send +_SEATED = '{seat} ("{client_name}") seated' +_TEAMS = 'Teams: N/S "north-south" E/W "east-west"' +_START_BOARD = "start of board" +_DEAL = "Board number {board}. Dealer NORTH. Neither vulnerable." +_CARDS = "{seat}'s cards: {hand}" +_OTHER_PLAYER_ACTION = "{player} {action}" +_PLAYER_TO_LEAD = "{seat} to lead" +_DUMMY_CARDS = "Dummy's cards: {}" + +# BlueChip bridge protocol message constants +_SEATS = ["NORTH", "EAST", "SOUTH", "WEST"] +_TRUMP_SUIT = ["C", "D", "H", "S", "NT"] +_NUMBER_TRUMP_SUITS = len(_TRUMP_SUIT) +_SUIT = _TRUMP_SUIT[:4] +_NUMBER_SUITS = len(_SUIT) +_RANKS = ["2", "3", "4", "5", "6", "7", "8", "9", "T", "J", "Q", "K", "A"] +_LSUIT = [x.lower() for x in _SUIT] +_LRANKS = [x.lower() for x in _RANKS] + +# OpenSpiel action ids +_ACTION_PASS = 52 +_ACTION_DBL = 53 +_ACTION_RDBL = 54 +_ACTION_BID = 55 # First bid, i.e. 1C + + +def _bid_to_action(action_str): + """Returns an OpenSpiel action id (an integer) from a BlueChip bid string.""" + level = int(action_str[0]) + trumps = _TRUMP_SUIT.index(action_str[1:]) + return _ACTION_BID + (level - 1) * _NUMBER_TRUMP_SUITS + trumps + + +def _play_to_action(action_str): + """Returns an OpenSpiel action id (an integer) from a BlueChip card string.""" + rank = _LRANKS.index(action_str[0]) + suit = _LSUIT.index(action_str[1]) + return rank * _NUMBER_SUITS + suit + + +def _action_to_string(action): + """Converts OpenSpiel action id (an integer) to a BlueChip action string. + + Args: + action: an integer action id corresponding to a bid. + + Returns: + A string in BlueChip format, e.g. 'PASSES' or 'bids 1H', or 'plays ck'. + """ + if action == _ACTION_PASS: + return "PASSES" + elif action == _ACTION_DBL: + return "DOUBLES" + elif action == _ACTION_RDBL: + return "REDOUBLES" + elif action >= _ACTION_BID: + level = str((action - _ACTION_BID) // _NUMBER_TRUMP_SUITS + 1) + trumps = _TRUMP_SUIT[(action - _ACTION_BID) % _NUMBER_TRUMP_SUITS] + return "bids " + level + trumps + else: + rank = action // _NUMBER_SUITS + suit = action % _NUMBER_SUITS + return "plays " + _LRANKS[rank] + _LSUIT[suit] + + +def _expect_regex(controller, regex): + """Reads a line from the controller, parses it using the regular expression.""" + line = controller.read_line() + match = re.match(regex, line) + if not match: + raise ValueError("Received '{}' which does not match regex '{}'".format( + line, regex)) + return match.groupdict() + + +def _expect(controller, expected): + """Reads a line from the controller, checks it matches expected line exactly.""" + line = controller.read_line() + if expected != line: + raise ValueError("Received '{}' but expected '{}'".format(line, expected)) + + +def _hand_string(cards): + """Returns the hand of the to-play player in the state in BlueChip format.""" + if len(cards) != 13: + raise ValueError("Must have 13 cards") + suits = [[] for _ in range(4)] + for card in reversed(sorted(cards)): + suit = card % 4 + rank = card // 4 + suits[suit].append(_RANKS[rank]) + for i in range(4): + if suits[i]: + suits[i] = _TRUMP_SUIT[i] + " " + " ".join(suits[i]) + "." + else: + suits[i] = _TRUMP_SUIT[i] + " -." + return " ".join(suits) + + +def _connect(controller, seat): + """Performs the initial handshake with a BlueChip bot.""" + client_name = _expect_regex(controller, _CONNECT)["client_name"] + controller.send_line(_SEATED.format(seat=seat, client_name=client_name)) + _expect(controller, _READY_FOR_TEAMS.format(seat=seat)) + controller.send_line(_TEAMS) + _expect(controller, _READY_TO_START.format(seat=seat)) + + +def _new_deal(controller, seat, hand, board): + """Informs a BlueChip bots that there is a new deal.""" + controller.send_line(_START_BOARD) + _expect(controller, _READY_FOR_DEAL.format(seat=seat)) + controller.send_line(_DEAL.format(board=board)) + _expect(controller, _READY_FOR_CARDS.format(seat=seat)) + controller.send_line(_CARDS.format(seat=seat, hand=hand)) + + +class BlueChipBridgeBot(pyspiel.Bot): + """An OpenSpiel bot, wrapping a BlueChip bridge bot implementation.""" + + def __init__(self, game, player_id, controller_factory): + """Initializes an OpenSpiel `Bot` wrapping a BlueChip-compatible bot. + + Args: + game: The OpenSpiel game object, should be an instance of + `bridge(use_double_dummy_result=false)`. + player_id: The id of the player the bot will act as, 0 = North (dealer), 1 + = East, 2 = South, 3 = West. + controller_factory: Callable that returns new BlueChip controllers which + must support methods `read_line` and `send_line`, and `terminate`. + """ + pyspiel.Bot.__init__(self) + if str(game) != GAME_STR: + raise ValueError(f"BlueChipBridgeBot invoked with {game}") + self._game = game + self._player_id = player_id + self._controller_factory = controller_factory + self._seat = _SEATS[player_id] + self._num_actions = 52 + self.dummy = None + self.is_play_phase = False + self.cards_played = 0 + self._board = 0 + self._state = self._game.new_initial_state() + self._controller = None + + def player_id(self): + return self._player_id + + def restart(self): + """Indicates that we are starting a new episode.""" + # If we already have a fresh state, there is nothing to do. + if not self._state.history(): + return + self._num_actions = 52 + self.dummy = None + self.is_play_phase = False + self.cards_played = 0 + # We didn't see the end of the episode, so the external bot will still + # be expecting it. If we can autoplay other people's actions to the end + # (e.g. everyone passes or players play their last card), then do that. + if not self._state.is_terminal(): + state = self._state.clone() + while (not state.is_terminal() + and state.current_player() != self._player_id): + legal_actions = state.legal_actions() + if _ACTION_PASS in legal_actions: + state.apply(_ACTION_PASS) + elif len(legal_actions) == 1: + state.apply_action(legal_actions[0]) + if state.is_terminal(): + self.inform_state(state) + # Otherwise, we will have to restart the external bot, because + # the protocol makes no provision for this case. + if not self._state.is_terminal(): + self._controller.terminate() + self._controller = None + self._state = self._game.new_initial_state() + + def _update_for_state(self): + """Called for all non-chance nodes, whether or not we have to act.""" + # Get the actions in the game so far. + actions = self._state.history() + self.is_play_phase = (not self._state.is_terminal() and + max(self._state.legal_actions()) < 52) + self.cards_played = sum(1 if a < 52 else 0 for a in actions) - 52 + + # If this is the first time we've seen the deal, send our hand. + if len(actions) == 52: + self._board += 1 + _new_deal(self._controller, self._seat, + _hand_string(actions[self._player_id:52:4]), self._board) + + # Send actions since last `step` call. + for other_player_action in actions[self._num_actions:]: + other = _expect_regex(self._controller, + _READY_FOR_OTHER.format(seat=self._seat)) + other_player = other["other"] + if other_player == "Dummy": + other_player = _SEATS[self.dummy] + self._controller.send_line( + _OTHER_PLAYER_ACTION.format( + player=other_player, + action=_action_to_string(other_player_action))) + self._num_actions = len(actions) + + # If the opening lead has just been made, give the dummy. + if self.is_play_phase and self.cards_played == 1: + self.dummy = self._state.current_player() ^ 2 + if self._player_id != self.dummy: + other = _expect_regex(self._controller, + _READY_FOR_OTHER.format(seat=self._seat)) + dummy_cards = _hand_string(actions[self.dummy:52:4]) + self._controller.send_line(_DUMMY_CARDS.format(dummy_cards)) + + # If the episode is terminal, send (fake) timing info. + if self._state.is_terminal(): + self._controller.send_line( + "Timing - N/S : this board [1:15], total [0:11:23]. " + "E/W : this board [1:18], total [0:10:23]" + ) + self.dummy = None + self.is_play_phase = False + self.cards_played = 0 + + def inform_action(self, state, player, action): + del player, action + self.inform_state(state) + + def inform_state(self, state): + # Connect if we need to. + if self._controller is None: + self._controller = self._controller_factory() + _connect(self._controller, self._seat) + + full_history = state.history() + known_history = self._state.history() + if full_history[:len(known_history)] != known_history: + raise ValueError( + "Supplied state is inconsistent with bot's internal state\n" + f"Supplied state:\n{state}\n" + f"Internal state:\n{self._state}\n") + for action in full_history[len(known_history):]: + self._state.apply_action(action) + if not self._state.is_chance_node(): + self._update_for_state() + + def step(self, state): + """Returns an action for the given state.""" + # Bring the external bot up-to-date. + self.inform_state(state) + + # If we're on a new trick, tell the bot it is its turn. + if self.is_play_phase and self.cards_played % 4 == 0: + self._controller.send_line(_PLAYER_TO_LEAD.format(seat=self._seat)) + + # Get our action from the bot. + our_action = _expect_regex(self._controller, _PLAYER_ACTION) + self._num_actions += 1 + if our_action["pass"]: + return _ACTION_PASS + elif our_action["dbl"]: + return _ACTION_DBL + elif our_action["rdbl"]: + return _ACTION_RDBL + elif our_action["bid"]: + return _bid_to_action(our_action["bid"]) + elif our_action["play"]: + return _play_to_action(our_action["play"]) + + def terminate(self): + self._controller.terminate() + self._controller = None + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/bots/bluechip_bridge_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/bots/bluechip_bridge_test.py new file mode 100644 index 0000000..4262da0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/bots/bluechip_bridge_test.py @@ -0,0 +1,317 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.bots.bluechip_bridge_uncontested_bidding.""" + +from absl.testing import absltest +from open_spiel.python.bots import bluechip_bridge +import pyspiel + + +class BluechipBridgeWrapperTest(absltest.TestCase): + + def test_complete_deal_east(self): + # Plays a complete deal, with the mock external bot playing East. + # The deal is as follows: + # + # Vul: None + # S AKJ8 + # H 4 + # D JT9532 + # C 32 + # S 3 S Q9 + # H KQJ8762 H AT5 + # D K4 D A87 + # C KQ4 C AJT96 + # S T76542 + # H 93 + # D Q6 + # C 875 + # + # West North East South + # Pass 1N Pass + # 2D Pass 2H Pass + # 3S Dbl 4C Pass + # 4D Pass 4N Pass + # 5D Pass 6H Pass + # Pass Pass + # + # N E S W N E S + # S7 S3 SK S9 + # DJ D8 D6 DK + # H2 H4 HT H9 + # H5 H3 H6 C3 + # C4 C2 CT C5 + # C6 C7 CQ D2 + # CK D3 CJ C8 + # D4 D5 DA DQ + # C9 S2 H7 S8 + # HK SJ HA S4 + # CA S5 H8 D9 + # HQ DT D7 S6 + # HJ SA SQ ST + # + # Declarer tricks: 12 + + game = pyspiel.load_game('bridge(use_double_dummy_result=false)') + mock_client = absltest.mock.Mock( + **{ + 'read_line.side_effect': [ + 'Connecting "WBridge5" as ANYPL using protocol version 18', + 'EAST ready for teams', + 'EAST ready to start', + 'EAST ready for deal', + 'EAST ready for cards', + "EAST ready for NORTH's bid", + 'EAST bids 1NT', + "EAST ready for SOUTH's bid", + "EAST ready for WEST's bid", + "EAST ready for NORTH's bid", + 'EAST bids 2H', + "EAST ready for SOUTH's bid", + "EAST ready for WEST's bid", + "EAST ready for NORTH's bid", + 'EAST bids 4C Alert.', + "EAST ready for SOUTH's bid", + "EAST ready for WEST's bid", + "EAST ready for NORTH's bid", + 'EAST bids 4NT', + "EAST ready for SOUTH's bid", + "EAST ready for WEST's bid", + "EAST ready for NORTH's bid", + 'EAST bids 6H', + "EAST ready for SOUTH's bid", + "EAST ready for WEST's bid", + "EAST ready for NORTH's bid", + "EAST ready for SOUTH's card to trick 1", + 'EAST ready for dummy', + 'WEST plays 3s', + "EAST ready for NORTH's card to trick 1", + 'EAST plays 9s', + "EAST ready for NORTH's card to trick 2", + 'EAST plays 8d', + "EAST ready for SOUTH's card to trick 2", + 'WEST plays kd', + 'WEST plays 2h', + "EAST ready for NORTH's card to trick 3", + 'EAST plays th', + "EAST ready for SOUTH's card to trick 3", + 'EAST plays 5h', + "EAST ready for SOUTH's card to trick 4", + 'WEST plays 6h', + "EAST ready for NORTH's card to trick 4", + 'WEST plays 4c', + "EAST ready for NORTH's card to trick 5", + 'EAST plays tc', + "EAST ready for SOUTH's card to trick 5", + 'EAST plays 6c', + "EAST ready for SOUTH's card to trick 6", + 'WEST plays qc', + "EAST ready for NORTH's card to trick 6", + 'WEST plays kc', + "EAST ready for NORTH's card to trick 7", + 'EAST plays jc', + "EAST ready for SOUTH's card to trick 7", + 'WEST plays 4d', + "EAST ready for NORTH's card to trick 8", + 'EAST plays ad', + "EAST ready for SOUTH's card to trick 8", + 'EAST plays 9c', + "EAST ready for SOUTH's card to trick 9", + 'WEST plays 7h', + "EAST ready for NORTH's card to trick 9", + 'WEST plays kh', + "EAST ready for NORTH's card to trick 10", + 'EAST plays ah', + "EAST ready for SOUTH's card to trick 10", + 'EAST plays ac', + "EAST ready for SOUTH's card to trick 11", + 'WEST plays 8h', + "EAST ready for NORTH's card to trick 11", + 'WEST plays qh', + "EAST ready for NORTH's card to trick 12", + 'EAST plays 7d', + "EAST ready for SOUTH's card to trick 12", + 'WEST plays jh', + "EAST ready for NORTH's card to trick 13", + 'EAST plays qs', + ] + }) + bot = bluechip_bridge.BlueChipBridgeBot(game, 1, lambda: mock_client) + state = game.new_initial_state() + history = [ + 33, 25, 3, 44, 47, 28, 23, 46, 1, 43, 30, 26, 29, 48, 24, 42, 13, 21, + 17, 8, 5, 34, 6, 7, 37, 49, 11, 38, 51, 32, 20, 9, 0, 14, 35, 22, 10, + 50, 15, 45, 39, 16, 12, 18, 27, 31, 41, 40, 4, 36, 19, 2, 52, 59, 52, + 61, 52, 62, 52, 68, 53, 70, 52, 71, 52, 74, 52, 76, 52, 82, 52, 52, 52, + 23, 7, 47, 31, 37, 25, 17, 45, 2, 10, 34, 30, 14, 6, 18, 4, 8, 0, 32, + 12, 16, 20, 40, 1, 44, 5, 36, 24, 9, 13, 49, 41, 28, 3, 22, 27, 46, 39, + 50, 11, 48, 15, 26, 29, 42, 33, 21, 19, 38, 51, 43, 35 + ] + + # Check the bot provides the expected actions + for action in history: + if state.current_player() == 1: + bot_action = bot.step(state) + self.assertEqual(action, bot_action) + state.apply_action(action) + + # Check the session went as expected; send_line calls are us sending + # data to the (mock) external bot. + mock_client.assert_has_calls([ + absltest.mock.call.read_line(), + absltest.mock.call.send_line('EAST ("WBridge5") seated'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line( + 'Teams: N/S "north-south" E/W "east-west"'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('start of board'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line( + 'Board number 1. Dealer NORTH. Neither vulnerable.'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line( + "EAST's cards: C A J T 9 6. D A 8 7. H A T 5. S Q 9."), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH PASSES'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('SOUTH PASSES'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('WEST bids 2D'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH PASSES'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('SOUTH PASSES'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('WEST bids 3S'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH DOUBLES'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('SOUTH PASSES'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('WEST bids 4D'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH PASSES'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('SOUTH PASSES'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('WEST bids 5D'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH PASSES'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('SOUTH PASSES'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('WEST PASSES'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH PASSES'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('SOUTH plays 7s'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line( + "Dummy's cards: C K Q 4. D K 4. H K Q J 8 7 6 2. S 3."), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH plays ks'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH plays jd'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('SOUTH plays 6d'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('EAST to lead'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH plays 4h'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('SOUTH plays 9h'), + absltest.mock.call.send_line('EAST to lead'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('SOUTH plays 3h'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH plays 3c'), + absltest.mock.call.send_line('EAST to lead'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH plays 2c'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('SOUTH plays 5c'), + absltest.mock.call.send_line('EAST to lead'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('SOUTH plays 7c'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH plays 2d'), + absltest.mock.call.send_line('EAST to lead'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH plays 3d'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('SOUTH plays 8c'), + absltest.mock.call.send_line('EAST to lead'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH plays 5d'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('SOUTH plays qd'), + absltest.mock.call.send_line('EAST to lead'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('SOUTH plays 2s'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH plays 8s'), + absltest.mock.call.send_line('EAST to lead'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH plays js'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('SOUTH plays 4s'), + absltest.mock.call.send_line('EAST to lead'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('SOUTH plays 5s'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH plays 9d'), + absltest.mock.call.send_line('EAST to lead'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH plays td'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('SOUTH plays 6s'), + absltest.mock.call.send_line('EAST to lead'), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('NORTH plays as'), + absltest.mock.call.read_line(), + ]) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/bots/bluechip_bridge_uncontested_bidding.py b/scenarios/bargaining/open_spiel/open_spiel/python/bots/bluechip_bridge_uncontested_bidding.py new file mode 100644 index 0000000..4a90f7b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/bots/bluechip_bridge_uncontested_bidding.py @@ -0,0 +1,268 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Wraps third-party bridge bots to make them usable in OpenSpiel. + +This code enables OpenSpiel interoperation for bots which implement the BlueChip +bridge protocol. This is widely used, e.g. in the World computer bridge +championships. For a rough outline of the protocol, see: +http://www.bluechipbridge.co.uk/protocol.htm + +No formal specification is available. This implementation has been verified +to work correctly with WBridge5. + +This bot controls a single player in the game of uncontested bridge bidding. It +chooses its actions by invoking an external bot which plays the full game of +bridge. This means that each time the bot is asked for an action, it sends up to +three actions (forced passes from both opponents, plus partner's most recent +action) to the external bridge bot, and obtains an action in return. + +Since we are restricting ourselves to the uncontested bidding game, we have +no support for Doubling, Redoubling, or the play of the cards. +""" + +import re + +import pyspiel + +# Example session: +# +# Recv: Connecting "WBridge5" as ANYPL using protocol version 18 +# Send: WEST ("WBridge5") seated +# Recv: WEST ready for teams +# Send: Teams: N/S "silent" E/W "bidders" +# Recv: WEST ready to start +# Send: Start of board +# Recv: WEST ready for deal +# Send: Board number 8. Dealer WEST. Neither vulnerable. +# Recv: WEST ready for cards +# Send: WEST's cards: S A T 9 5. H K 6 5. D Q J 8 7 6. C 7. +# Recv: WEST PASSES +# Recv: WEST ready for NORTH's bid +# Send: EAST PASSES +# Recv: WEST ready for EAST's bid +# Send: EAST bids 1C +# Recv: WEST ready for SOUTH's bid + +# Template regular expressions for messages we receive +_CONNECT = 'Connecting "(?P.*)" as ANYPL using protocol version 18' +_SELF_BID_OR_PASS = "{seat} ((?PPASSES)|bids (?P[^ ]*))( Alert.)?" + +# Templates for fixed messages we receive +_READY_FOR_TEAMS = "{seat} ready for teams" +_READY_TO_START = "{seat} ready to start" +_READY_FOR_DEAL = "{seat} ready for deal" +_READY_FOR_CARDS = "{seat} ready for cards" +_READY_FOR_BID = "{seat} ready for {other}'s bid" + +# Templates for messages we send +_SEATED = '{seat} ("{client_name}") seated' +_TEAMS = 'Teams: N/S "opponents" E/W "bidders"' +_START_BOARD = "start of board" +# The board number is arbitrary, but "8" is consistent with the dealer and +# vulnerability we want (in the standard numbering). See Law 2: +# http://web2.acbl.org/documentlibrary/play/Laws-of-Duplicate-Bridge.pdf +_DEAL = "Board number 8. Dealer WEST. Neither vulnerable." +_CARDS = "{seat}'s cards: {hand}" +_OTHER_PLAYER_PASS = "{player} PASSES" +_OTHER_PLAYER_BID = "{player} bids {bid}" + +# BlueChip bridge protocol message constants +_SEATS = ["WEST", "EAST"] +_OPPONENTS = ["NORTH", "SOUTH"] +_TRUMP_SUIT = ["C", "D", "H", "S", "NT"] +_NUMBER_TRUMP_SUITS = len(_TRUMP_SUIT) +_RANKS = ["2", "3", "4", "5", "6", "7", "8", "9", "T", "J", "Q", "K", "A"] + +# OpenSpiel constants +_PASS_ACTION = 0 + + +def _string_to_action(call_str): + """Converts a BlueChip bid string to an OpenSpiel action id (an integer). + + Args: + call_str: string representing a bid in the BlueChip format, i.e. "[level] + (as a digit) + [trump suit (S, H, D, C or NT)]", e.g. "1C". + + Returns: + An integer action id - see `bridge_uncontested_bidding.cc`, functions + `Denomination` and `Level`. + 0 is reserved for Pass, so bids are in order from 1 upwards: 1 = 1C, + 2 = 1D, etc. + """ + level = int(call_str[0]) + trumps = _TRUMP_SUIT.index(call_str[1:]) + return (level - 1) * _NUMBER_TRUMP_SUITS + trumps + 1 + + +def _action_to_string(action): + """Converts OpenSpiel action id (an integer) to a BlueChip bid string. + + Args: + action: an integer action id corresponding to a bid. + + Returns: + A string in BlueChip format. + + Inverse of `_string_to_action`. See documentation there. + """ + level = str((action - 1) // _NUMBER_TRUMP_SUITS + 1) + trumps = _TRUMP_SUIT[(action - 1) % _NUMBER_TRUMP_SUITS] + return level + trumps + + +def _expect_regex(client, regex): + """Reads a line from the client, parses it using the regular expression.""" + line = client.read_line() + match = re.match(regex, line) + if not match: + raise ValueError("Received '{}' which does not match regex '{}'".format( + line, regex)) + return match.groupdict() + + +def _expect(client, expected): + """Reads a line from the client, checks it matches expected line exactly.""" + line = client.read_line() + if expected != line: + raise ValueError("Received '{}' but expected '{}'".format(line, expected)) + + +def _hand_string(state_vec): + """Returns the hand of the to-play player in the state in BlueChip format.""" + # See UncontestedBiddingState::InformationStateTensor + # The first 52 elements are whether or not we hold the given card (cards + # ordered suit-by-suit, in ascending order of rank). + suits = [] + for suit in reversed(range(4)): + cards = [] + for rank in reversed(range(13)): + if state_vec[rank * 4 + suit]: + cards.append(_RANKS[rank]) + suits.append(_TRUMP_SUIT[suit] + " " + (" ".join(cards) if cards else "-") + + ".") + return " ".join(suits) + + +def _actions(state_vec): + """Returns the player actions that have been taken in the game so far.""" + # See UncontestedBiddingState::InformationStateTensor + # The first 52 elements are the cards held, then two elements for each + # possible action, specifying which of the two players has taken it (if + # either player has). Then two elements specifying which player we are. + actions = state_vec[52:-2] + return [index // 2 for index, value in enumerate(actions) if value] + + +def _connect(client, seat, state_vec): + """Performs the initial handshake with a BlueChip bot.""" + client.start() + client_name = _expect_regex(client, _CONNECT)["client_name"] + client.send_line(_SEATED.format(seat=seat, client_name=client_name)) + _expect(client, _READY_FOR_TEAMS.format(seat=seat)) + client.send_line(_TEAMS) + _expect(client, _READY_TO_START.format(seat=seat)) + client.send_line(_START_BOARD) + _expect(client, _READY_FOR_DEAL.format(seat=seat)) + client.send_line(_DEAL) + _expect(client, _READY_FOR_CARDS.format(seat=seat)) + client.send_line(_CARDS.format(seat=seat, hand=_hand_string(state_vec))) + + +class BlueChipBridgeBot(pyspiel.Bot): + """An OpenSpiel bot, wrapping a BlueChip bridge bot implementation.""" + + def __init__(self, game, player_id, client): + """Initializes an OpenSpiel `Bot` wrapping a BlueChip-compatible bot. + + Args: + game: The OpenSpiel game object, should be an instance of + bridge_uncontested_bidding, without forced actions. + player_id: The id of the player the bot will act as, 0 = West (dealer), 1 + = East. + client: The BlueChip bot; must support methods `start`, `read_line`, and + `send_line`. + """ + pyspiel.Bot.__init__(self) + self._game = game + self._player_id = player_id + self._client = client + self._seat = _SEATS[player_id] + self._partner = _SEATS[1 - player_id] + self._left_hand_opponent = _OPPONENTS[player_id] + self._right_hand_opponent = _OPPONENTS[1 - player_id] + self._connected = False + + def player_id(self): + return self._player_id + + def restart(self): + """Indicates that the next step may be from a non-sequential state.""" + self._connected = False + + def restart_at(self, state): + """Indicates that the next step may be from a non-sequential state.""" + self._connected = False + + def step(self, state): + """Returns the action and policy for the bot in this state.""" + state_vec = state.information_state_tensor(self.player_id()) + + # Connect if necessary. + if not self._connected: + _connect(self._client, self._seat, state_vec) + self._connected = True + + # Get the actions in the game so far. + actions = _actions(state_vec) + + # Unless this is the first or second action in the game, our + # left-hand-opponent will have passed since our last turn. + if len(actions) > 1: + _expect( + self._client, + _READY_FOR_BID.format( + seat=self._seat, other=self._left_hand_opponent)) + self._client.send_line( + _OTHER_PLAYER_PASS.format(player=self._left_hand_opponent)) + + # Unless there aren't any prior actions, our partner will have bid + # or passed since our last turn, and so we need to send partner's action + # to the bot. + if actions: + _expect(self._client, + _READY_FOR_BID.format(seat=self._seat, other=self._partner)) + if actions[-1] == _PASS_ACTION: + self._client.send_line(_OTHER_PLAYER_PASS.format(player=self._partner)) + else: + self._client.send_line( + _OTHER_PLAYER_BID.format( + player=self._partner, bid=_action_to_string(actions[-1]))) + + # Unless there aren't any prior actions, our right-hand-opponent will have + # passed since our last turn. + if actions: + _expect( + self._client, + _READY_FOR_BID.format( + seat=self._seat, other=self._right_hand_opponent)) + self._client.send_line( + _OTHER_PLAYER_PASS.format(player=self._right_hand_opponent)) + + # Get our action from the bot. + our_action = _expect_regex(self._client, + _SELF_BID_OR_PASS.format(seat=self._seat)) + action = 0 if our_action["pass"] else _string_to_action(our_action["bid"]) + return (action, 1.0), action diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/bots/bluechip_bridge_uncontested_bidding_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/bots/bluechip_bridge_uncontested_bidding_test.py new file mode 100644 index 0000000..31c668d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/bots/bluechip_bridge_uncontested_bidding_test.py @@ -0,0 +1,205 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.bots.bluechip_bridge_uncontested_bidding.""" + +import itertools + +from absl.testing import absltest +from open_spiel.python.bots import bluechip_bridge_uncontested_bidding +import pyspiel + +_BID_1D = bluechip_bridge_uncontested_bidding._string_to_action("1D") +_BID_1H = bluechip_bridge_uncontested_bidding._string_to_action("1H") +_BID_2H = bluechip_bridge_uncontested_bidding._string_to_action("2H") + + +class BluechipBridgeWrapperTest(absltest.TestCase): + + def test_complete_session_east(self): + game = pyspiel.load_game("bridge_uncontested_bidding") + mock_client = absltest.mock.Mock( + **{ + "read_line.side_effect": [ + 'Connecting "WBridge5" as ANYPL using protocol version 18', + "EAST ready for teams", + "EAST ready to start", + "EAST ready for deal", + "EAST ready for cards", + "EAST ready for WEST's bid", + "EAST ready for NORTH's bid", + "EAST bids 1H", + "EAST ready for SOUTH's bid", + "EAST ready for WEST's bid", + "EAST ready for NORTH's bid", + "EAST PASSES", + ] + }) + bot = bluechip_bridge_uncontested_bidding.BlueChipBridgeBot( + game, 1, mock_client) + state = game.deserialize_state("A86.J543.K642.A3 J.KQ962.T953.J96") + state.apply_action(_BID_1D) + policy, action = bot.step(state) + self.assertEqual(action, _BID_1H) + self.assertEqual(policy, (_BID_1H, 1.0)) + state.apply_action(action) + state.apply_action(_BID_2H) + policy, action = bot.step(state) + self.assertEqual(action, bluechip_bridge_uncontested_bidding._PASS_ACTION) + self.assertEqual(policy, + (bluechip_bridge_uncontested_bidding._PASS_ACTION, 1.0)) + # Finished - now check that the game state is correct. + self.assertEqual(str(state), "A86.J543.K642.A3 J.KQ962.T953.J96 1D-1H-2H") + # Check that we received the expected messages. + mock_client.assert_has_calls([ + absltest.mock.call.start(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('EAST ("WBridge5") seated'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('Teams: N/S "opponents" E/W "bidders"'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line("start of board"), + absltest.mock.call.read_line(), + absltest.mock.call.send_line( + "Board number 8. Dealer WEST. Neither vulnerable."), + absltest.mock.call.read_line(), + absltest.mock.call.send_line( + "EAST's cards: S J. H K Q 9 6 2. D T 9 5 3. C J 9 6."), + absltest.mock.call.read_line(), + absltest.mock.call.send_line("WEST bids 1D"), + absltest.mock.call.read_line(), + absltest.mock.call.send_line("NORTH PASSES"), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line("SOUTH PASSES"), + absltest.mock.call.read_line(), + absltest.mock.call.send_line("WEST bids 2H"), + absltest.mock.call.read_line(), + absltest.mock.call.send_line("NORTH PASSES"), + ]) + + def test_complete_session_west(self): + game = pyspiel.load_game("bridge_uncontested_bidding") + mock_client = absltest.mock.Mock( + **{ + "read_line.side_effect": [ + 'Connecting "WBridge5" as ANYPL using protocol version 18', + "WEST ready for teams", + "WEST ready to start", + "WEST ready for deal", + "WEST ready for cards", + "WEST bids 1D Alert.", + "WEST ready for NORTH's bid", + "WEST ready for EAST's bid", + "WEST ready for SOUTH's bid", + "WEST bids 2H", + "WEST ready for NORTH's bid", + "WEST ready for EAST's bid", + "WEST ready for SOUTH's bid", + ] + }) + bot = bluechip_bridge_uncontested_bidding.BlueChipBridgeBot( + game, 0, mock_client) + state = game.deserialize_state("A86.J543.K642.A3 J.KQ962.T953.J96") + policy, action = bot.step(state) + self.assertEqual(action, _BID_1D) + self.assertEqual(policy, (_BID_1D, 1.0)) + state.apply_action(action) + state.apply_action(_BID_1H) + policy, action = bot.step(state) + self.assertEqual(action, _BID_2H) + self.assertEqual(policy, (_BID_2H, 1.0)) + state.apply_action(action) + # Finished - now check that the game state is correct. + self.assertEqual(str(state), "A86.J543.K642.A3 J.KQ962.T953.J96 1D-1H-2H") + # Check that we received the expected messages. + mock_client.assert_has_calls([ + absltest.mock.call.start(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('WEST ("WBridge5") seated'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line('Teams: N/S "opponents" E/W "bidders"'), + absltest.mock.call.read_line(), + absltest.mock.call.send_line("start of board"), + absltest.mock.call.read_line(), + absltest.mock.call.send_line( + "Board number 8. Dealer WEST. Neither vulnerable."), + absltest.mock.call.read_line(), + absltest.mock.call.send_line( + "WEST's cards: S A 8 6. H J 5 4 3. D K 6 4 2. C A 3."), + absltest.mock.call.read_line(), + absltest.mock.call.read_line(), + absltest.mock.call.send_line("NORTH PASSES"), + absltest.mock.call.read_line(), + absltest.mock.call.send_line("EAST bids 1H"), + absltest.mock.call.read_line(), + absltest.mock.call.send_line("SOUTH PASSES"), + absltest.mock.call.read_line(), + ]) + + def test_invalid_fixed_message(self): + game = pyspiel.load_game("bridge_uncontested_bidding") + mock_client = absltest.mock.Mock( + **{ + "read_line.side_effect": [ + 'Connecting "WBridge5" as ANYPL using protocol version 18', + "WEST ready for cards", + ] + }) + bot = bluechip_bridge_uncontested_bidding.BlueChipBridgeBot( + game, 0, mock_client) + state = game.deserialize_state("A86.J543.K642.A3 J.KQ962.T953.J96") + with self.assertRaisesRegex( + ValueError, + "Received 'WEST ready for cards' but expected 'WEST ready for teams'"): + bot.step(state) + + def test_invalid_variable_message(self): + game = pyspiel.load_game("bridge_uncontested_bidding") + mock_client = absltest.mock.Mock( + **{ + "read_line.side_effect": [ + 'Connecting "WBridge5" as ANYPL using protocol version 18', + "WEST ready for teams", + "WEST ready to start", + "WEST ready for deal", + "WEST ready for cards", + "NORTH bids 1S", + ] + }) + bot = bluechip_bridge_uncontested_bidding.BlueChipBridgeBot( + game, 0, mock_client) + state = game.deserialize_state("A86.J543.K642.A3 J.KQ962.T953.J96") + with self.assertRaisesRegex( + ValueError, + "Received 'NORTH bids 1S' which does not match regex 'WEST"): + bot.step(state) + + def test_string_to_action_to_string_roundtrip(self): + for level, trump in itertools.product( + range(1, 8), bluechip_bridge_uncontested_bidding._TRUMP_SUIT): + bid = str(level) + trump + action = bluechip_bridge_uncontested_bidding._string_to_action(bid) + self.assertEqual( + bid, bluechip_bridge_uncontested_bidding._action_to_string(action)) + + def test_action_to_string_to_action_roundtrip(self): + for action in range(1, 36): + bid = bluechip_bridge_uncontested_bidding._action_to_string(action) + self.assertEqual( + action, bluechip_bridge_uncontested_bidding._string_to_action(bid)) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/bots/gtp.py b/scenarios/bargaining/open_spiel/open_spiel/python/bots/gtp.py new file mode 100644 index 0000000..c6bcc42 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/bots/gtp.py @@ -0,0 +1,156 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A bot that uses an external agent over the Go Text Protocol.""" + +import subprocess +import time + +import pyspiel + + +class CommandError(Exception): + """An error message returned from the GTP bot.""" + + +class GTPBot(pyspiel.Bot): + """A bot that uses an external agent over GTP to get the action to play. + + The Go Text Protocol, GTP, is a text based protocol for communication with + computer Go programs (https://www.lysator.liu.se/~gunnar/gtp/). It has also + been adopted by agents in other games including Hex and Havannah. If you need + to configure your agent in some specific way (eg time/resource limits), you + can use `gtp_cmd` to send raw commands to it. + """ + + def __init__(self, game, exec_path, player_colors=("b", "w"), + suppress_stderr=True): + """Create a Bot that runs an external binary using GTP. + + Args: + game: A Game object to pull the configuration (boardsize) + exec_path: A string or list to be passed to popen to launch the binary. + player_colors: A list or tuple of names to be passed to gtp's `play` + command to tell it which player made the move. + suppress_stderr: Whether to suppress stderr from the binary. + """ + pyspiel.Bot.__init__(self) + self._process = subprocess.Popen( + exec_path, bufsize=0, stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=(subprocess.DEVNULL if suppress_stderr else None)) + + self._game = game + params = game.get_parameters() + if "board_size" in params: + self.gtp_cmd("boardsize", str(params["board_size"])) + + if len(player_colors) != game.num_players(): + raise ValueError( + ("player_colors has the wrong number of players for this game. Got " + "{}, expected {}").format(len(player_colors), game.num_players())) + self._player_colors = player_colors + + def __del__(self): + self.close() + + def close(self): + """Tell the game to quit and wait for it to do so, killing eventually.""" + # We support closing an already closed instance, as __del__ will be called + # when the object is deleted, thus closing a potentially already closed obj. + # The hasattr is in case Popen raises and self._process doesn't exist. + if hasattr(self, "_process") and self._process is not None: + if self.running: + try: + self.gtp_cmd("quit") + except (CommandError, IOError): + pass + self._process.stdin.close() + self._process.stdout.close() + _shutdown_proc(self._process, 3) + self._process = None + + def gtp_cmd(self, *args): + """Send commands directly to the game, and get the response as a string.""" + cmd = " ".join([str(a) for a in args]).encode() + self._process.stdin.write(cmd + b"\n") + response = "" + while True: + line = self._process.stdout.readline().decode() + if not line: + raise IOError("Engine closed the connection.") + if line == "\n": + if response: + break # A blank line signifies end of response. + else: + continue # Ignore leading newlines, possibly left from prev response. + response += line + if response.startswith("="): + return response[1:].strip().lower() + else: + raise CommandError(response[1:].strip()) + + def inform_action(self, state, player_id, action): + """Let the bot know of the other agent's actions.""" + self.gtp_cmd("play", self._player_colors[player_id], + state.action_to_string(action)) + + def step(self, state): + """Returns the selected action and steps the internal state forward.""" + return state.string_to_action(self.gtp_cmd( + "genmove", self._player_colors[state.current_player()])) + + def restart(self): + self.gtp_cmd("clear_board") + + def restart_at(self, state): + self.restart() + new_state = self._game.new_initial_state() + for action in state.history(): + self.inform_action(new_state, new_state.current_player(), + new_state.action_to_string(action)) + new_state.apply_action(action) + + @property + def name(self): + """The name reported by the agent.""" + return self.gtp_cmd("name") + + @property + def version(self): + """The version reported by the agent.""" + return self.gtp_cmd("version") + + @property + def running(self): + """Whether the agent binary is still running.""" + # poll returns None if it's running, otherwise the exit code. + return self._process and (self._process.poll() is None) + + @property + def pid(self): + """The pid of the agent binary.""" + return self._process.pid if self.running else None + + +def _shutdown_proc(p, timeout): + """Waits for a proc to shut down; terminates or kills it after `timeout`.""" + freq = 10 # how often to check per second + for _ in range(1 + timeout * freq): + p.terminate() + ret = p.poll() + if ret is not None: + return ret + time.sleep(1 / freq) + p.kill() + return p.wait() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/bots/human.py b/scenarios/bargaining/open_spiel/open_spiel/python/bots/human.py new file mode 100644 index 0000000..a44ff2d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/bots/human.py @@ -0,0 +1,87 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A bot that asks the user which action to play.""" + +import math +import os + +import pyspiel + +_MAX_WIDTH = int(os.getenv("COLUMNS", "80")) # Get your TTY width. + + +def _print_columns(strings): + """Prints a list of strings in columns.""" + padding = 2 + longest = max(len(s) for s in strings) + max_columns = math.floor((_MAX_WIDTH - 1) / (longest + 2 * padding)) + rows = math.ceil(len(strings) / max_columns) + columns = math.ceil(len(strings) / rows) # Might not fill all max_columns. + for r in range(rows): + for c in range(columns): + i = r + c * rows + if i < len(strings): + print(" " * padding + strings[i].ljust(longest + padding), end="") + print() + + +class HumanBot(pyspiel.Bot): + """Asks the user which action to play.""" + + def step_with_policy(self, state): + """Returns the stochastic policy and selected action in the given state.""" + legal_actions = state.legal_actions(state.current_player()) + if not legal_actions: + return [], pyspiel.INVALID_ACTION + p = 1 / len(legal_actions) + policy = [(action, p) for action in legal_actions] + + action_map = { + state.action_to_string(state.current_player(), action): action + for action in legal_actions + } + + while True: + action_str = input("Choose an action (empty to print legal actions): ") + + if not action_str: + print("Legal actions(s):") + longest_num = max(len(str(action)) for action in legal_actions) + _print_columns([ + "{}: {}".format(str(action).rjust(longest_num), action_str) + for action_str, action in sorted(action_map.items()) + ]) + continue + + if action_str in action_map: + return policy, action_map[action_str] + + try: + action = int(action_str) + except ValueError: + print("Could not parse the action:", action_str) + continue + + if action in legal_actions: + return policy, action + + print("Illegal action selected:", action_str) + + def step(self, state): + return self.step_with_policy(state)[1] + + def restart_at(self, state): + pass + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/bots/is_mcts_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/bots/is_mcts_test.py new file mode 100644 index 0000000..4d75cc1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/bots/is_mcts_test.py @@ -0,0 +1,66 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit test for Information Set MCTS bot. + +This test mimics the basic C++ tests in algorithms/is_mcts_test.cc. +""" +# pylint: disable=g-unreachable-test-method + +from absl.testing import absltest +import numpy as np +from open_spiel.python.algorithms import evaluate_bots +import pyspiel + +SEED = 12983641 + + +class ISMCTSBotTest(absltest.TestCase): + + def ismcts_play_game(self, game): + evaluator = pyspiel.RandomRolloutEvaluator(1, SEED) + for final_policy_type in [ + pyspiel.ISMCTSFinalPolicyType.NORMALIZED_VISIT_COUNT, + pyspiel.ISMCTSFinalPolicyType.MAX_VISIT_COUNT, + pyspiel.ISMCTSFinalPolicyType.MAX_VALUE + ]: + bot = pyspiel.ISMCTSBot(SEED, evaluator, 5.0, 1000, -1, final_policy_type, + False, False) + bots = [bot] * game.num_players() + evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) + bot = pyspiel.ISMCTSBot(SEED, evaluator, 5.0, 1000, 10, final_policy_type, + False, False) + bots = [bot] * game.num_players() + evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) + bot = pyspiel.ISMCTSBot(SEED, evaluator, 5.0, 1000, 10, final_policy_type, + True, True) + bots = [bot] * game.num_players() + evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) + + def test_basic_sim_kuhn(self): + game = pyspiel.load_game("kuhn_poker") + self.ismcts_play_game(game) + game = pyspiel.load_game("kuhn_poker(players=3)") + self.ismcts_play_game(game) + + def test_basic_sim_leduc(self): + game = pyspiel.load_game("leduc_poker") + self.ismcts_play_game(game) + game = pyspiel.load_game("leduc_poker(players=3)") + self.ismcts_play_game(game) + + +if __name__ == "__main__": + np.random.seed(SEED) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/bots/policy.py b/scenarios/bargaining/open_spiel/open_spiel/python/bots/policy.py new file mode 100644 index 0000000..a060d52 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/bots/policy.py @@ -0,0 +1,71 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A bot that samples from legal actions based on a policy.""" + +import pyspiel + + +class PolicyBot(pyspiel.Bot): + """Samples an action from action probabilities based on a policy. + + This bot plays actions as specified by the underlying Policy. Problems may + occur if the policy assigns non-zero probability to invalid actions, or if the + policy is not complete, or if probabilities don't sum to 1. + """ + + def __init__(self, player_id, rng, policy): + """Initializes a policy bot. + + Args: + player_id: The integer id of the player for this bot, e.g. `0` if acting + as the first player. + rng: A random number generator supporting a `choice` method, e.g. + `np.random` + policy: A policy to get action distributions + """ + pyspiel.Bot.__init__(self) + self._player_id = player_id + self._rng = rng + self._policy = policy + + def player_id(self): + return self._player_id + + def restart_at(self, state): + pass + + def step_with_policy(self, state): + """Returns the stochastic policy and selected action in the given state. + + Args: + state: The current state of the game. + + Returns: + A `(policy, action)` pair, where policy is a `list` of + `(action, probability)` pairs for each legal action, with + `probability` defined by the policy action probabilities. + The `action` is sampled from the distribution, + or `pyspiel.INVALID_ACTION` if there are no actions available. + """ + policy = self._policy.action_probabilities(state, self._player_id) + action_list = list(policy.keys()) + if not any(action_list): + return [], pyspiel.INVALID_ACTION + + action = self._rng.choice(action_list, p=list(policy.values())) + return list(policy.items()), action + + def step(self, state): + return self.step_with_policy(state)[1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/bots/scenarios.py b/scenarios/bargaining/open_spiel/open_spiel/python/bots/scenarios.py new file mode 100644 index 0000000..b96ea56 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/bots/scenarios.py @@ -0,0 +1,114 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Provides tools to evaluate bots against specific scenarios.""" + +import dataclasses +from typing import Text, List +from absl import logging + + +@dataclasses.dataclass +class Scenario(object): + name: Text + init_actions: List[Text] + expected_action_str: Text + expected_prob: float + player_id: int + + +CATCH_SCENARIOS = [ + Scenario("Ball in column 1, chooses left.", [ + "Initialized ball to 0", "LEFT", "STAY", "STAY", "STAY", "STAY", "STAY", + "STAY", "STAY" + ], "LEFT", 1., 0), + Scenario("Ball in column 2, chooses left.", [ + "Initialized ball to 1", "STAY", "STAY", "STAY", "STAY", "STAY", "STAY", + "STAY", "STAY" + ], "LEFT", 1., 0), + Scenario("Ball in column 3, chooses left.", [ + "Initialized ball to 2", "RIGHT", "STAY", "STAY", "STAY", "STAY", + "STAY", "STAY", "STAY" + ], "LEFT", 1., 0), +] + +SCENARIOS = { + "catch": CATCH_SCENARIOS, +} + + +def get_default_scenarios(game_name): + """Loads the default scenarios for a given game. + + Args: + game_name: The game to load scenarios for. + + Returns: + A List[Scenario] detailing the scenarios for that game. + """ + return SCENARIOS[game_name] + + +def play_bot_in_scenarios(game, bots, scenarios=None): + """Plays a bot against a number of scenarios. + + Args: + game: The game the bot is playing. + bots: A list of length game.num_players() of pyspiel.Bots (or equivalent). + Must implement the apply_action and step methods. + scenarios: The scenarios we evaluate the bot in. A List[Scenario]. + + Returns: + A dict mapping scenarios to their scores (with an additional "mean_score" + field containing the mean score across all scenarios). + The average score across all scenarios. + """ + if scenarios is None: + scenarios = get_default_scenarios(game.get_type().short_name) + + results = [] + total_score = 0 + for scenario in scenarios: + state = game.new_initial_state() + bot = bots[scenario.player_id] + bot.restart() + for action_str in scenario.init_actions: + action = state.string_to_action(action_str) + if state.current_player() == scenario.player_id: + bot.force_action(state, action) + state.apply_action(action) + actions_and_probs, _ = bot.step(state) + expected_action = state.string_to_action(scenario.expected_action_str) + for action, prob in actions_and_probs: + if action == expected_action: + actual_prob = prob + break + score = 1 - abs(actual_prob - scenario.expected_prob) + results.append((scenario.name, score, scenario.expected_action_str, + scenario.expected_prob, actual_prob)) + total_score += score + + if scenarios: + total_score /= len(scenarios) + logging.info("Average score across all scenarios: %.4f.", total_score) + results_dict = {} + for name, score, expected_action, expected_prob, actual_prob in results: + logging.info("************************************************************") + logging.info("Scenario: '%s'. Score: %.4f.", name, score) + logging.info("Expected action %s with probability %.4f but assigned %.4f.", + expected_action, expected_prob, actual_prob) + logging.info("***************************") + results_dict["scenario_score: " + name] = score + results_dict["mean_score"] = total_score + return results_dict diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/bots/uniform_random.py b/scenarios/bargaining/open_spiel/open_spiel/python/bots/uniform_random.py new file mode 100644 index 0000000..a17e141 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/bots/uniform_random.py @@ -0,0 +1,67 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A bot that chooses uniformly at random from legal actions.""" + +import pyspiel + + +class UniformRandomBot(pyspiel.Bot): + """Chooses uniformly at random from the available legal actions.""" + + def __init__(self, player_id, rng): + """Initializes a uniform-random bot. + + Args: + player_id: The integer id of the player for this bot, e.g. `0` if acting + as the first player. + rng: A random number generator supporting a `choice` method, e.g. + `np.random` + """ + pyspiel.Bot.__init__(self) + self._player_id = player_id + self._rng = rng + + def restart_at(self, state): + pass + + def player_id(self): + return self._player_id + + def provides_policy(self): + return True + + def step_with_policy(self, state): + """Returns the stochastic policy and selected action in the given state. + + Args: + state: The current state of the game. + + Returns: + A `(policy, action)` pair, where policy is a `list` of + `(action, probability)` pairs for each legal action, with + `probability = 1/num_actions` + The `action` is selected uniformly at random from the legal actions, + or `pyspiel.INVALID_ACTION` if there are no legal actions available. + """ + legal_actions = state.legal_actions(self._player_id) + if not legal_actions: + return [], pyspiel.INVALID_ACTION + p = 1 / len(legal_actions) + policy = [(action, p) for action in legal_actions] + action = self._rng.choice(legal_actions) + return policy, action + + def step(self, state): + return self.step_with_policy(state)[1] diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/bots/uniform_random_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/bots/uniform_random_test.py new file mode 100644 index 0000000..15905c5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/bots/uniform_random_test.py @@ -0,0 +1,62 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit test for uniform random bot.""" + +import random + +from absl.testing import absltest +from open_spiel.python.bots import uniform_random +import pyspiel + + +class BotTest(absltest.TestCase): + + def test_policy_is_uniform(self): + game = pyspiel.load_game("leduc_poker") + bots = [ + uniform_random.UniformRandomBot(0, random), + uniform_random.UniformRandomBot(1, random) + ] + + # deal each player a card + state = game.new_initial_state() + state.apply_action(2) + state.apply_action(4) + + # p0 starts: uniform from [check, bet] + policy, _ = bots[0].step_with_policy(state) + self.assertCountEqual(policy, [(1, 0.5), (2, 0.5)]) + + # Afte p0 bets, p1 chooses from [fold, call, raise] + state.apply_action(2) + policy, _ = bots[1].step_with_policy(state) + self.assertCountEqual(policy, [(0, 1 / 3), (1, 1 / 3), (2, 1 / 3)]) + + def test_no_legal_actions(self): + game = pyspiel.load_game("kuhn_poker") + bot = uniform_random.UniformRandomBot(0, random) + state = game.new_initial_state() + state.apply_action(2) # deal + state.apply_action(1) # deal + state.apply_action(1) # bet + state.apply_action(0) # fold + bot.restart_at(state) + policy, action = bot.step_with_policy(state) + self.assertEqual(policy, []) + self.assertEqual(action, pyspiel.INVALID_ACTION) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/__init__.py new file mode 100644 index 0000000..df17722 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/basic_games.py b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/basic_games.py new file mode 100644 index 0000000..25e2dd5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/basic_games.py @@ -0,0 +1,70 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Some basic coalitional games. + +Many of these are taken from examples in the "Computational Aspects of +Cooperative Game Theory" text book by Georgios Chalkiadakis, Edith Elkind, and +Michael Wooldridge. +""" + +from typing import Dict, Tuple + +import numpy as np + +from open_spiel.python.coalitional_games import coalitional_game + + +class IceCreamGame(coalitional_game.CoalitionalGame): + """Example 2.2 from CACGT book by Chalkiadakis, Elkind, and Wooldridge.""" + + def __init__(self): + super().__init__(num_players=3) + + def coalition_value(self, coalition: np.ndarray) -> float: + """Encodes the payoffs.""" + # indices ordered as C M P + if coalition.sum() < 2: + return 0.0 + elif coalition[0] == 1 and coalition[1] == 1 and coalition[2] == 0: + # {C, M} + return 500.0 + elif coalition[0] == 1 and coalition[1] == 0 and coalition[2] == 1: + # {C, P} + return 500.0 + elif coalition[0] == 0 and coalition[1] == 1 and coalition[2] == 1: + # {M, P} + return 750.0 + elif coalition.sum() == 3: + return 1000.0 + else: + raise RuntimeError("Invalid coalition") + + +class TabularGame(coalitional_game.CoalitionalGame): + """A game represented by a table of values.""" + + def __init__(self, table: Dict[Tuple[int, ...], float]): + super().__init__(num_players=-1) # set num players to -1 for now + for key in table: + if self._num_players < 0: + self._num_players = len(key) + else: + assert len(key) == self._num_players + assert self._num_players >= 1 + self._table = table + + def coalition_value(self, coalition: np.ndarray) -> float: + return self._table[tuple(coalition)] + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/coalitional_game.py b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/coalitional_game.py new file mode 100644 index 0000000..0dca8b2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/coalitional_game.py @@ -0,0 +1,51 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Coalitional Games in Open Spiel.""" + +import abc +import numpy as np + + +class CoalitionalGame(abc.ABC): + """An abstract class for computing the value of a coalition.""" + + def __init__(self, num_players: int): + self._num_players = num_players + + @abc.abstractmethod + def coalition_value(self, coalition: np.ndarray) -> float: + """Returns the value of the coalition (the characteristic function). + + Args: + coalition: an array of size num_players of ones (indicating player is + included) and zeroes (the player is excluded). + """ + + def coalition_values(self, coalitions: np.ndarray) -> np.ndarray: + """Returns the values of a batch of coalitions. + + Override to provide faster versions depending on the game. + + Args: + coalitions: batch_size by num_players array of coalitions. + """ + batch_size = coalitions.shape[0] + return np.asarray( + [self.coalition_value(coalitions[i]) for i in range(batch_size)] + ) + + def num_players(self) -> int: + """Returns the number of players.""" + return self._num_players diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/deon_larson20_games.py b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/deon_larson20_games.py new file mode 100644 index 0000000..5039cfa --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/deon_larson20_games.py @@ -0,0 +1,408 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Games from D'Eon and Larson '2020. + +Testing Axioms Against Human Reward Divisions in Cooperative Games. +https://www.ifaamas.org/Proceedings/aamas2020/pdfs/p312.pdf +""" + +from open_spiel.python.coalitional_games import basic_games + + +SHAPLEY_VALUES = { + # Experiment 1 + "1-Worse-Solo": [25, 25, 10], + "1-Worse-Both": [25, 25, 10], + "1-Worse-Pair": [25, 25, 10], + "1-Better-Solo": [30, 15, 15], + "1-Better-Both": [30, 15, 15], + "1-Better-Pair": [30, 15, 15], + "Distinct-Solo": [30, 20, 10], + "Distinct-Both": [30, 20, 10], + "Distinct-Pair": [30, 20, 10], + "Additive": [30, 20, 10], + # Experiment 2 + "1-Worse-Zeros2": [25, 25, 10], + "1-Worse-Zeros5": [25, 25, 10], + "1-Worse-Zeros10": [25, 25, 10], + "1-Worse-Sum30": [25, 25, 10], + "1-Worse-Sum45": [25, 25, 10], + "1-Worse-Sum60": [25, 25, 10], + "1-Better-Zeros2": [30, 15, 15], + "1-Better-Zeros5": [30, 15, 15], + "1-Better-Zeros10": [30, 15, 15], + "1-Better-Sum30": [30, 15, 15], + "1-Better-Sum45": [30, 15, 15], + "1-Better-Sum60": [30, 15, 15], + "1-Null-Zeros": [40, 20, 0], + "1-Null-Sum40": [40, 20, 0], + "1-Null-Sum50": [40, 20, 0], + "1-Null-Sum60": [40, 20, 0], +} + + +def make_game(name: str) -> basic_games.TabularGame: + """Returns a game from D'Eon and Larson '2020. + + Testing Axioms Against Human Reward Divisions in Cooperative Games. + https://www.ifaamas.org/Proceedings/aamas2020/pdfs/p312.pdf + + Args: + name: the name of the game, as in Table 1 of the paper. + + Raises: + RuntimeError: when the name of the game is not known. + """ + + if name == "1-Worse-Solo": + # A B C AB AC BC + # 40 40 10 60 60 60 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 40.0, + (0, 1, 0): 40.0, + (0, 0, 1): 10.0, + (1, 1, 0): 60.0, + (1, 0, 1): 60.0, + (0, 1, 1): 60.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Worse-Both": + # A B C AB AC BC + # 15 15 0 45 30 30 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 15.0, + (0, 1, 0): 15.0, + (0, 0, 1): 0.0, + (1, 1, 0): 45.0, + (1, 0, 1): 30.0, + (0, 1, 1): 30.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Worse-Pair": + # A B C AB AC BC + # 0 0 0 45 15 15 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 0.0, + (0, 1, 0): 0.0, + (0, 0, 1): 0.0, + (1, 1, 0): 45.0, + (1, 0, 1): 15.0, + (0, 1, 1): 15.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Better-Solo": + # A B C AB AC BC + # 40 10 10 60 60 60 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 40.0, + (0, 1, 0): 10.0, + (0, 0, 1): 10.0, + (1, 1, 0): 60.0, + (1, 0, 1): 60.0, + (0, 1, 1): 60.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Better-Both": + # A B C AB AC BC + # 15 0 0 45 45 30 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 15.0, + (0, 1, 0): 0.0, + (0, 0, 1): 0.0, + (1, 1, 0): 45.0, + (1, 0, 1): 45.0, + (0, 1, 1): 30.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Better-Pair": + # A B C AB AC BC + # 0 0 0 45 45 15 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 0.0, + (0, 1, 0): 0.0, + (0, 0, 1): 0.0, + (1, 1, 0): 45.0, + (1, 0, 1): 45.0, + (0, 1, 1): 15.0, + (1, 1, 1): 60.0, + }) + elif name == "Distinct-Solo": + # A B C AB AC BC + # 40 20 0 60 60 60 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 40.0, + (0, 1, 0): 20.0, + (0, 0, 1): 0.0, + (1, 1, 0): 60.0, + (1, 0, 1): 60.0, + (0, 1, 1): 60.0, + (1, 1, 1): 60.0, + }) + elif name == "Distinct-Both": + # A B C AB AC BC + # 20 10 0 60 50 40 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 20.0, + (0, 1, 0): 10.0, + (0, 0, 1): 0.0, + (1, 1, 0): 60.0, + (1, 0, 1): 50.0, + (0, 1, 1): 40.0, + (1, 1, 1): 60.0, + }) + elif name == "Distinct-Pair": + # A B C AB AC BC + # 0 0 0 60 40 20 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 0.0, + (0, 1, 0): 0.0, + (0, 0, 1): 0.0, + (1, 1, 0): 60.0, + (1, 0, 1): 40.0, + (0, 1, 1): 20.0, + (1, 1, 1): 60.0, + }) + elif name == "Additive": + # A B C AB AC BC + # 30 20 10 50 40 30 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 30.0, + (0, 1, 0): 20.0, + (0, 0, 1): 10.0, + (1, 1, 0): 50.0, + (1, 0, 1): 40.0, + (0, 1, 1): 30.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Worse-Zeros2": + # A B C AB AC BC + # 2 0 0 40 10 12 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 2.0, + (0, 1, 0): 0.0, + (0, 0, 1): 0.0, + (1, 1, 0): 40.0, + (1, 0, 1): 10.0, + (0, 1, 1): 12.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Worse-Zeros5": + # A B C AB AC BC + # 5 0 0 40 10 15 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 5.0, + (0, 1, 0): 0.0, + (0, 0, 1): 0.0, + (1, 1, 0): 40.0, + (1, 0, 1): 10.0, + (0, 1, 1): 15.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Worse-Zeros10": + # A B C AB AC BC + # 10 0 0 40 10 20 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 10.0, + (0, 1, 0): 0.0, + (0, 0, 1): 0.0, + (1, 1, 0): 40.0, + (1, 0, 1): 10.0, + (0, 1, 1): 20.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Worse-Sum30": + # A B C AB AC BC + # 20 5 5 60 30 45 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 20.0, + (0, 1, 0): 5.0, + (0, 0, 1): 5.0, + (1, 1, 0): 60.0, + (1, 0, 1): 30.0, + (0, 1, 1): 45.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Worse-Sum45": + # A B C AB AC BC + # 25 10 10 60 30 45 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 25.0, + (0, 1, 0): 10.0, + (0, 0, 1): 10.0, + (1, 1, 0): 60.0, + (1, 0, 1): 30.0, + (0, 1, 1): 45.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Worse-Sum60": + # A B C AB AC BC + # 30 15 15 60 30 45 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 30.0, + (0, 1, 0): 15.0, + (0, 0, 1): 15.0, + (1, 1, 0): 60.0, + (1, 0, 1): 30.0, + (0, 1, 1): 45.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Better-Zeros2": + # A B C AB AC BC + # 2 2 0 38 40 10 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 2.0, + (0, 1, 0): 2.0, + (0, 0, 1): 0.0, + (1, 1, 0): 38.0, + (1, 0, 1): 40.0, + (0, 1, 1): 10.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Better-Zeros5": + # A B C AB AC BC + # 5 5 0 35 40 10 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 5.0, + (0, 1, 0): 5.0, + (0, 0, 1): 0.0, + (1, 1, 0): 35.0, + (1, 0, 1): 40.0, + (0, 1, 1): 10.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Better-Zeros10": + # A B C AB AC BC + # 10 10 0 30 40 10 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 10.0, + (0, 1, 0): 10.0, + (0, 0, 1): 0.0, + (1, 1, 0): 30.0, + (1, 0, 1): 40.0, + (0, 1, 1): 10.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Better-Sum30": + # A B C AB AC BC + # 15 15 0 45 60 30 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 15.0, + (0, 1, 0): 15.0, + (0, 0, 1): 0.0, + (1, 1, 0): 45.0, + (1, 0, 1): 60.0, + (0, 1, 1): 30.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Better-Sum45": + # A B C AB AC BC + # 20 20 5 45 60 30 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 20.0, + (0, 1, 0): 20.0, + (0, 0, 1): 5.0, + (1, 1, 0): 45.0, + (1, 0, 1): 60.0, + (0, 1, 1): 30.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Better-Sum60": + # A B C AB AC BC + # 25 25 10 45 60 30 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 25.0, + (0, 1, 0): 25.0, + (0, 0, 1): 10.0, + (1, 1, 0): 45.0, + (1, 0, 1): 60.0, + (0, 1, 1): 30.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Null-Zeros": + # A B C AB AC BC + # 20 0 0 60 20 0 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 20.0, + (0, 1, 0): 0.0, + (0, 0, 1): 0.0, + (1, 1, 0): 60.0, + (1, 0, 1): 20.0, + (0, 1, 1): 0.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Null-Sum40": + # A B C AB AC BC + # 30 10 0 60 30 10 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 30.0, + (0, 1, 0): 10.0, + (0, 0, 1): 0.0, + (1, 1, 0): 60.0, + (1, 0, 1): 30.0, + (0, 1, 1): 10.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Null-Sum50": + # A B C AB AC BC + # 35 15 0 60 35 15 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 35.0, + (0, 1, 0): 15.0, + (0, 0, 1): 0.0, + (1, 1, 0): 60.0, + (1, 0, 1): 35.0, + (0, 1, 1): 15.0, + (1, 1, 1): 60.0, + }) + elif name == "1-Null-Sum60": + # A B C AB AC BC + # 40 20 0 60 40 20 + return basic_games.TabularGame({ + (0, 0, 0): 0.0, + (1, 0, 0): 40.0, + (0, 1, 0): 20.0, + (0, 0, 1): 0.0, + (1, 1, 0): 60.0, + (1, 0, 1): 40.0, + (0, 1, 1): 20.0, + (1, 1, 1): 60.0, + }) + else: + raise RuntimeError("unknown game") diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/least_core_lagrangian.py b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/least_core_lagrangian.py new file mode 100644 index 0000000..705cbe8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/least_core_lagrangian.py @@ -0,0 +1,664 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Methods to compute the core based on stochastic saddle point programming. + +This file contains methods to compute the core using a Lagrangian formulation +referred to in Gemp et al AAMAS '24: +"Approximating the Core via Iterative Coalition Sampling" + +TODO: +- add a link to arXiv when it's live +- add the least core algorithm to the algorithms.md also when that link is live +""" + +import dataclasses +import functools +import itertools +import time +from typing import Any, Dict, Tuple + +from absl import logging +import jax +import jax.numpy as jnp +import numpy as np +import optax + +from open_spiel.python.coalitional_games import coalitional_game + + +@dataclasses.dataclass(frozen=True) +class LeastCoreValue: + payoff: np.ndarray + lcv: float + duration: float + meta: Dict[Any, Any] + + +def compute_least_core_value( + cvc: coalitional_game.CoalitionalGame, alg_config +) -> LeastCoreValue: + """Computes the least core value of a game.""" + opt_primal = optax.adam(learning_rate=alg_config.init.lr_primal) + opt_dual = optax.adam(learning_rate=alg_config.init.lr_dual) + evaluation_iterations = alg_config.eval.evaluation_iterations + evaluate_every = 2 * alg_config.solve.n_iter # do not evaluate + cl = CoreLagrangian(cvc, opt_primal, opt_dual) + payoffs, epsilons, _, duration = cl.solve( + evaluation_iterations=evaluation_iterations, + evaluate_every=evaluate_every, + **alg_config.solve, + ) + lcvs = np.full(payoffs.shape[0], np.inf) + payoff = payoffs[-1] + lcv = np.inf + for i in range(payoffs.shape[0]): + payoff = payoffs[i] + epsilon = epsilons[i] + max_violation = payoff_evaluation( + cvc, payoff, epsilon, evaluation_iterations) + lcv = epsilon + max_violation + lcvs[i] = lcv + meta = dict(payoffs=payoffs, epsilons=epsilons, lcvs=lcvs) + return LeastCoreValue(payoff, lcv, duration, meta) + + +def payoff_evaluation( + cv_calc: coalitional_game.CoalitionalGame, + payoffs: np.ndarray, + epsilon: float, + batch_size: int, + max_exponent: int = 13, +) -> float: + """Evaluate deficit over a set of random coalitions. + + Args: + cv_calc: the game to work on + payoffs: the payoff vector to test + epsilon: desired approximation of the epsilon-core + batch_size: number of random coalitions to sample + max_exponent: examine at maxixum 2**max_exponent constraints in one batch + default 13, assume 2**13 ~ 10k coalitions is mem limit for single batch + + Returns: + Expected loss, relu(deficit), over random batch of coalitions + """ + max_batch = 2**max_exponent + num_players = cv_calc.num_players() + violation = 0. + if batch_size >= 2**num_players: + num_suffix_repeats = min(max_exponent, num_players) + num_prefix_repeats = max(0, num_players - num_suffix_repeats) + zo = [0, 1] + suffix = np.array(list(itertools.product(zo, repeat=num_suffix_repeats))) + prefixes = itertools.product(zo, repeat=num_prefix_repeats) + for prefix in prefixes: + if prefix: + prefix_rep = np.repeat([prefix], suffix.shape[0], axis=0) + coalitions = np.concatenate([prefix_rep, suffix], axis=1) + else: + coalitions = suffix + batch_contributions = cv_calc.coalition_values(coalitions) + batch_payouts = np.dot(coalitions, payoffs) + batch_deficits = batch_contributions - batch_payouts - epsilon + batch_deficits = np.clip(batch_deficits, 0., np.inf) + violation = max(violation, np.max(batch_deficits)) + else: + q, r = divmod(batch_size, max_batch) + num_loops = q + (r > 0) + for _ in range(num_loops): + coalitions = np.random.randint(2, size=(max_batch, num_players)) + batch_contributions = cv_calc.coalition_values(coalitions) + batch_payouts = np.dot(coalitions, payoffs) + batch_deficits = batch_contributions - batch_payouts - epsilon + batch_deficits = np.clip(batch_deficits, 0., np.inf) + violation = max(violation, np.max(batch_deficits)) + return float(violation) + + +class CoreSolver(): + """Find an epsilon-core.""" + + def __init__(self, + cvc: coalitional_game.CoalitionalGame): + self.cvc = cvc + self.num_players = cvc.num_players() + # we assume grand_coalition is optimal coalition + grand_coalition = np.full(cvc.num_players(), 1, dtype=np.int32) + self.grand_coalition_value = cvc.coalition_value(grand_coalition) + + self.payoffs = None + self.losses = None + self.max_deficits = None + self.evaluation_losses = None + + def logits_to_payoff(self, logits): + logits_ext = jnp.append(logits, 0.) + payoff = jax.nn.softmax(logits_ext) + payoff *= self.grand_coalition_value + return payoff + + def loss_deficit(self, current_payoff, coalitions, coalition_values, epsilon): + """Compute Mean Loss and Max Deficit.""" + coalition_payment = jnp.dot(coalitions, current_payoff) + deficit = coalition_values - epsilon - coalition_payment + coalition_size = jnp.sum(coalitions, axis=1) + weight = 1.0 / jnp.clip(coalition_size, 1, self.num_players) + losses = 0.5 * weight * jax.nn.relu(deficit) ** 2.0 + return jnp.mean(losses, axis=0), jnp.max(jax.nn.relu(deficit)) + + +class CoreOptimization(CoreSolver): + """Find an epsilon-core via optimization.""" + + def __init__(self, + cvc: coalitional_game.CoalitionalGame, + opt, + epsilon): + super().__init__(cvc) + self.opt = opt + self.epsilon = epsilon + + @functools.partial(jax.jit, static_argnums=[0]) + def loss(self, params, data): + """Compute Loss.""" + current_payoff = params + coalitions, coalition_values = data + + return self.loss_deficit(current_payoff, coalitions, coalition_values, + self.epsilon) + + @functools.partial(jax.jit, static_argnums=[0]) + def update_step(self, params, data, opt_state): + """GD update step.""" + + # data = (coalitions, coalition_values) + + # Convert losses into pure functions. + loss_fn = lambda p: self.loss(p, data)[0] + + # Compute saddle-point gradients. + grads_fn = jax.grad(loss_fn, argnums=0) + grads = grads_fn(params) + + updates, opt_state = self.opt.update(grads, opt_state, params) + + params = optax.apply_updates(params, updates) + + params = jnp.clip(params, 0, np.inf) + scale = self.grand_coalition_value / jnp.sum(params) + params = params * scale + + return params, opt_state + + def solve(self, n_iter: int, batch_size: int = 100, + save_every: int = 2, + evaluate_every: int = 2, evaluation_iterations: int = 100, + seed: int = 0 + ) -> Tuple[np.ndarray, np.ndarray, float]: + """Find a least-core via Lagrange multipliers. + + Additional optimization metrics are stored as class variables: + self.payoffs + self.losses + self.max_deficits + self.evaluation_losses + self.duration + + Args: + n_iter: number of iterations + batch_size: number of constraints to evaluate at each step + save_every: int, how often to record optimization variables + evaluate_every: int, how often to evaluate the max constraint violation + evaluation_iterations: int, how many constraints to measure violations + for, if number if less than number of coalitions a batch of constraints + is sampled randomly. otherwise, all constraints are evaluated + seed: int, for sampling minibatches of constraints + + Returns: + payoffs over training + max deficit over training + runtime duration (sec) + """ + + qe, re = divmod(n_iter, evaluate_every) + num_eval = qe + (re > 0) + qs, rs = divmod(n_iter, save_every) + num_save = qs + (rs > 0) + + max_violations = np.empty(num_eval, dtype=np.float32) + losses = np.empty(num_save, dtype=np.float32) + max_deficits = np.empty(num_save, dtype=np.float32) + payoffs = np.empty((num_save, self.num_players), dtype=np.float32) + + scale = self.grand_coalition_value / self.num_players + grand_coalition = np.full(self.num_players, 1, dtype=np.int32) + current_payoff = jnp.array(grand_coalition * scale) + params = current_payoff + + opt_state = self.opt.init(params) + + logging.debug('Uniform payoff %s', current_payoff) + + rng = jax.random.PRNGKey(seed) + + start = time.time() + for iter_id in range(n_iter): + if batch_size < 2**self.num_players: + rng, key = jax.random.split(rng, 2) + coalitions = jax.random.randint(key, + shape=(batch_size, self.num_players), + minval=0, + maxval=2, + dtype=jnp.int32) + else: + prod_space = itertools.product([0, 1], repeat=self.num_players) + coalitions = np.stack(list(prod_space)) + coalition_values = self.cvc.coalition_values(np.array(coalitions)) + + data = (coalitions, coalition_values) + loss, max_deficit = self.loss(params, data) + params, opt_state = self.update_step(params, data, opt_state) + + # Done updating, save if needed + if iter_id % save_every == 0: + logging.debug('Saving...') + idx = iter_id // save_every + losses[idx] = loss + max_deficits[idx] = max_deficit + current_payoff = params + payoffs[idx] = current_payoff + logging.debug('Loss was %f, Max deficit was %f, New payoff %s', + loss, max_deficit, current_payoff) + + # Done updating, evaluate if needed + if (evaluate_every < n_iter) and (iter_id % evaluate_every == 0): + logging.debug('Evaluating...') + estimated_loss = payoff_evaluation( + self.cvc, + current_payoff, + self.epsilon, + evaluation_iterations, + ) + max_violations[iter_id // evaluate_every] = estimated_loss + logging.debug('Estimated loss %f', estimated_loss) + end = time.time() + duration = end - start + + self.payoffs = np.array(payoffs) + self.losses = np.array(losses) + self.max_deficits = np.array(max_deficits) + self.max_violations = np.array(max_violations) + self.duration = duration + + return (np.array(payoffs), + np.array(max_deficits), + duration) + + +class CoreOptimizationLogits(CoreSolver): + """Find an epsilon-core via optimization over logits.""" + + def __init__(self, + cvc: coalitional_game.CoalitionalGame, + opt, + epsilon): + super().__init__(cvc) + self.opt = opt + self.epsilon = epsilon + + @functools.partial(jax.jit, static_argnums=[0]) + def loss(self, params, data): + """Compute Loss.""" + current_payoff = self.logits_to_payoff(params) + coalitions, coalition_values = data + + return self.loss_deficit(current_payoff, coalitions, coalition_values, + self.epsilon) + + @functools.partial(jax.jit, static_argnums=[0]) + def update_step(self, params, data, opt_state): + """GD update step.""" + + # data = (coalitions, coalition_values) + + # Convert losses into pure functions. + loss_fn = lambda p: self.loss(p, data)[0] + + # Compute saddle-point gradients. + grads_fn = jax.grad(loss_fn, argnums=0) + grads = grads_fn(params) + + updates, opt_state = self.opt.update(grads, opt_state, params) + + params = optax.apply_updates(params, updates) + + return params, opt_state + + def solve(self, n_iter: int, batch_size: int = 100, + save_every: int = 2, + evaluate_every: int = 2, evaluation_iterations: int = 100, + seed: int = 0 + ) -> Tuple[np.ndarray, np.ndarray, float]: + """Find a least-core via Lagrange multipliers. + + Additional optimization metrics are stored as class variables: + self.payoffs + self.losses + self.max_deficits + self.evaluation_losses + self.duration + + Args: + n_iter: number of iterations + batch_size: number of constraints to evaluate at each step + save_every: int, how often to record optimization variables + evaluate_every: int, how often to evaluate the max constraint violation + evaluation_iterations: int, how many constraints to measure violations + for, if number if less than number of coalitions a batch of constraints + is sampled randomly. otherwise, all constraints are evaluated + seed: int, for sampling minibatches of constraints + + Returns: + payoffs over training + max deficit over training + runtime duration (sec) + """ + + qe, re = divmod(n_iter, evaluate_every) + num_eval = qe + (re > 0) + qs, rs = divmod(n_iter, save_every) + num_save = qs + (rs > 0) + + max_violations = np.empty(num_eval, dtype=np.float32) + losses = np.empty(num_save, dtype=np.float32) + max_deficits = np.empty(num_save, dtype=np.float32) + payoffs = np.empty((num_save, self.num_players), dtype=np.float32) + + current_logits = jnp.zeros(self.num_players - 1, dtype=jnp.float32) + current_payoff = np.asarray(self.logits_to_payoff(current_logits)) + params = current_logits + + opt_state = self.opt.init(params) + + logging.debug('Uniform payoff %s', current_payoff) + + rng = jax.random.PRNGKey(seed) + + start = time.time() + for iter_id in range(n_iter): + if batch_size < 2**self.num_players: + rng, key = jax.random.split(rng, 2) + coalitions = jax.random.randint(key, + shape=(batch_size, self.num_players), + minval=0, + maxval=2, + dtype=jnp.int32) + else: + prod_space = itertools.product([0, 1], repeat=self.num_players) + coalitions = np.stack(list(prod_space)) + coalition_values = self.cvc.coalition_values(np.array(coalitions)) + + data = (coalitions, coalition_values) + loss, max_deficit = self.loss(params, data) + params, opt_state = self.update_step(params, data, opt_state) + + # Done updating, save if needed + if iter_id % save_every == 0: + logging.debug('Saving...') + idx = iter_id // save_every + losses[idx] = loss + max_deficits[idx] = max_deficit + current_logits = params + current_payoff = np.asarray(self.logits_to_payoff(current_logits)) + payoffs[idx] = current_payoff + logging.debug('Loss was %f, Max deficit was %f, New payoff %s', + loss, max_deficit, current_payoff) + + # Done updating, evaluate if needed + if (evaluate_every < n_iter) and (iter_id % evaluate_every == 0): + logging.debug('Evaluating...') + estimated_loss = payoff_evaluation( + self.cvc, + current_payoff, + self.epsilon, + evaluation_iterations, + ) + max_violations[iter_id // evaluate_every] = estimated_loss + logging.debug('Estimated loss %f', estimated_loss) + end = time.time() + duration = end - start + self.payoffs = np.array(payoffs) + self.losses = np.array(losses) + self.max_deficits = np.array(max_deficits) + self.max_violations = np.array(max_violations) + self.duration = duration + + return (np.array(payoffs), + np.array(max_deficits), + duration) + + +class CoreLagrangian(CoreSolver): + """Find a least-core via Lagrange multipliers.""" + + def __init__(self, + cvc: coalitional_game.CoalitionalGame, + opt_primal, + opt_dual): + super().__init__(cvc) + self.opt_primal = opt_primal + self.opt_dual = opt_dual + + current_logits_keys = ['current_logits' for _ in range(self.num_players)] + keys_primal = {'current_logits': current_logits_keys, + 'epsilon': 'epsilon'} + keys_dual = {'mu': 'mu'} + self.keys = (keys_primal, keys_dual) + self.nonnegative_keys = ('epsilon', 'mu') + + self.epsilons = None + self.mus = None + self.lagrangians = None + + @functools.partial(jax.jit, static_argnums=[0]) + def lagrangian(self, primal, dual, data): + """Compute Lagrangian.""" + current_logits, epsilon = primal['current_logits'], primal['epsilon'] + mu = dual['mu'] + coalitions, coalition_values, gamma_adj = data + + current_payoff = self.logits_to_payoff(current_logits) + mean_loss, max_deficit = self.loss_deficit(current_payoff, + coalitions, + coalition_values, + epsilon) + lagrangian = epsilon + mu * (mean_loss - gamma_adj) + lagrangian = jnp.sum(lagrangian) # just for converting (1,) array to scalar + return lagrangian, (mean_loss, max_deficit) + + @functools.partial(jax.jit, static_argnums=[0]) + def update_step(self, params, data, opt_state): + """SimGD update step.""" + + # data = (coalitions, coalition_values, gamma_adj) + params_primal, params_dual = params + opt_state_primal, opt_state_dual = opt_state + + # Convert losses into pure functions. + loss_primal_fn = lambda p, d: self.lagrangian(p, d, data)[0] + loss_dual_fn = lambda p, d: -self.lagrangian(p, d, data)[0] + + # Compute saddle-point gradients. + grads_primal_fn = jax.grad(loss_primal_fn, argnums=0) + grads_primal = grads_primal_fn(params_primal, params_dual) + grads_dual_fn = jax.grad(loss_dual_fn, argnums=1) + grads_dual = grads_dual_fn(params_primal, params_dual) + + updates_primal, opt_state_primal = self.opt_primal.update(grads_primal, + opt_state_primal, + params_primal) + updates_dual, opt_state_dual = self.opt_dual.update(grads_dual, + opt_state_dual, + params_dual) + + params_primal = optax.apply_updates(params_primal, updates_primal) + params_dual = optax.apply_updates(params_dual, updates_dual) + + params = (params_primal, params_dual) + opt_state = (opt_state_primal, opt_state_dual) + + clip = ( + lambda x, k: jnp.clip(x, 0, np.inf) if k in self.nonnegative_keys else x + ) + params = jax.tree_util.tree_map(clip, params, self.keys) + + return params, opt_state + + def solve(self, n_iter: int, batch_size: int = 100, gamma: float = 1e-2, + mu_init: float = 1000., + save_every: int = 2, + evaluate_every: int = 2, evaluation_iterations: int = 100, + seed: int = 0, + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, float]: + """Find a least-core via Lagrange multipliers. + + Additional optimization metrics are stored as class variables: + self.payoffs + self.epsilons + self.mus + self.lagrangians + self.losses + self.max_deficits + self.evaluation_losses + self.duration + + Args: + n_iter: number of iterations + batch_size: number of constraints to evaluate at each step + gamma: float, slack allowed in core constraints + mu_init: float, initialize the lagrange multiplier to this value + save_every: int, how often to record optimization variables + evaluate_every: int, how often to evaluate the max constraint violation + evaluation_iterations: int, how many constraints to measure violations + for, if number if less than number of coalitions a batch of constraints + is sampled randomly. otherwise, all constraints are evaluated + seed: int, for sampling minibatches of constraints + + Returns: + payoffs over training + epsilon over training + max deficit over training + runtime duration (sec) + """ + + qe, re = divmod(n_iter, evaluate_every) + num_eval = qe + (re > 0) + qs, rs = divmod(n_iter, save_every) + num_save = qs + (rs > 0) + + max_violations = np.empty(num_eval, dtype=np.float32) + lagrangians = np.empty(num_save, dtype=np.float32) + losses = np.empty(num_save, dtype=np.float32) + max_deficits = np.empty(num_save, dtype=np.float32) + epsilons = np.empty(num_save, dtype=np.float32) + payoffs = np.empty((num_save, self.num_players), dtype=np.float32) + mus = np.empty(num_save, dtype=np.float32) + + current_logits = jnp.zeros(self.num_players - 1, dtype=jnp.float32) + epsilon = self.grand_coalition_value * jnp.ones(1, dtype=jnp.float32) + mu = jnp.ones(1, dtype=jnp.float32) * mu_init + + params_primal = {'current_logits': current_logits, + 'epsilon': epsilon} + params_dual = {'mu': mu} + params = (params_primal, params_dual) + + opt_state_primal = self.opt_primal.init(params_primal) + opt_state_dual = self.opt_dual.init(params_dual) + opt_state = (opt_state_primal, opt_state_dual) + + current_payoff = np.asarray(self.logits_to_payoff(current_logits)) + logging.debug('Uniform payoff %s', current_payoff) + + if self.num_players < 30: + gamma_adj = gamma**2.0 / (2**self.num_players - 1) + else: + # Set arbitrary value if the above would result in a too tiny number. + gamma_adj = 1e-6 + + rng = jax.random.PRNGKey(seed) + + start = time.time() + for iter_id in range(n_iter): + if batch_size < 2**self.num_players: + rng, key = jax.random.split(rng, 2) + coalitions = jax.random.randint(key, + shape=(batch_size, self.num_players), + minval=0, + maxval=2, + dtype=jnp.int32) + else: + prod_space = itertools.product([0, 1], repeat=self.num_players) + coalitions = np.stack(list(prod_space)) + coalition_values = self.cvc.coalition_values(np.array(coalitions)) + + data = (coalitions, coalition_values, gamma_adj) + lagrangian, (loss, max_deficit) = self.lagrangian(*params, data) + params, opt_state = self.update_step(params, data, opt_state) + + params_primal, params_dual = params + + # Done updating, save if needed + if iter_id % save_every == 0: + logging.debug('Saving...') + idx = iter_id // save_every + lagrangians[idx] = lagrangian + losses[idx] = loss + max_deficits[idx] = max_deficit + epsilons[idx] = params_primal['epsilon'].item() + mus[idx] = params_dual['mu'].item() + current_payoff = np.asarray(self.logits_to_payoff( + params_primal['current_logits'])) + payoffs[idx] = current_payoff + logging.debug('Loss was %f, Max deficit was %f, New payoff %s', + loss, max_deficit, current_payoff) + + # Done updating, evaluate if needed + if (evaluate_every < n_iter) and (iter_id % evaluate_every == 0): + logging.debug('Evaluating...') + estimated_loss = payoff_evaluation( + self.cvc, + current_payoff, + params_primal['epsilon'].item(), + evaluation_iterations, + ) + max_violations[iter_id // evaluate_every] = estimated_loss + logging.debug('Estimated loss %f', estimated_loss) + end = time.time() + duration = end - start + + self.payoffs = np.array(payoffs) + self.epsilons = np.array(epsilons) + self.mus = np.array(mus) + self.lagrangians = np.array(lagrangians) + self.losses = np.array(losses) + self.max_deficits = np.array(max_deficits) + self.max_violations = np.array(max_violations) + self.duration = duration + + return (np.array(payoffs), + np.array(epsilons), + np.array(max_deficits), + duration) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/least_core_lagrangian_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/least_core_lagrangian_test.py new file mode 100644 index 0000000..c2b3f7b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/least_core_lagrangian_test.py @@ -0,0 +1,70 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for least-core lagrangian calculations.""" + +from absl.testing import absltest +from ml_collections import config_dict as configdict +import numpy as np + +from open_spiel.python.coalitional_games import basic_games +from open_spiel.python.coalitional_games import least_core_lagrangian + + +SEED = 817346817 + + +def get_alg_config(): + """Get configuration for botched trades experiment.""" + alg_config = configdict.ConfigDict() + + alg_config.init = configdict.ConfigDict() + alg_config.init.lr_primal = 1e-2 + alg_config.init.lr_dual = 1e-2 + + alg_config.solve = configdict.ConfigDict() + alg_config.solve.batch_size = 2**3 + alg_config.solve.mu_init = 1000 + alg_config.solve.gamma = 1e-8 + alg_config.solve.n_iter = 110_000 + alg_config.solve.seed = 0 + alg_config.solve.save_every = 10_000 + + alg_config.eval = configdict.ConfigDict() + alg_config.eval.evaluation_iterations = 2**3 + + return alg_config + + +class LeastCoreLagrangianTest(absltest.TestCase): + + def setUp(self): + super().setUp() + np.random.seed(SEED) + self.config = get_alg_config() + + def test_ice_cream_example_full_lagrangian(self): + """Solve the least core Lagrangian.""" + game = basic_games.IceCreamGame() + least_core_value = least_core_lagrangian.compute_least_core_value( + game, self.config) + imputation = least_core_value.payoff + epsilon = least_core_value.lcv + self.assertAlmostEqual(imputation.sum(), 1000.0, places=3) + self.assertGreater(imputation.all(), -1e-10) + self.assertLess(epsilon, 1e-6) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/least_core_lp.py b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/least_core_lp.py new file mode 100644 index 0000000..80b8321 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/least_core_lp.py @@ -0,0 +1,103 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Methods to compute the core based on linear programming. + +This file contains methods to compute the core using LPs referred to in +Yan & Procaccia '21: https://ojs.aaai.org/index.php/AAAI/article/view/16721 +""" + +import itertools + +from typing import Any, Callable, List, Tuple + +import cvxpy as cp +import numpy as np + +from open_spiel.python.coalitional_games import coalitional_game + + +ConstraintsSamplingFuncType = Callable[ + [coalitional_game.CoalitionalGame, cp.Variable, cp.Variable, List[Any]], + Any, +] + + +def add_all_constraints( + game: coalitional_game.CoalitionalGame, + x: cp.Variable, + e: cp.Variable, + constraints: List[Any]): + # \sum x_i + e >= v(S), for all subsets S \subseteq N + for c in itertools.product([0, 1], repeat=game.num_players()): + coalition = np.asarray(c) + val_coalition = game.coalition_value(coalition) + constraints.append(x @ coalition + e >= val_coalition) + + +def make_uniform_sampling_constraints_function( + num: int, +) -> ConstraintsSamplingFuncType: + """Simple uniform constraint sampler (with replacement).""" + + def func(game: coalitional_game.CoalitionalGame, + x: cp.Variable, e: cp.Variable, constraints: List[Any]): + for _ in range(num): + coalition = np.random.randint(2, size=game.num_players()) + val_coalition = game.coalition_value(coalition) + constraints.append(x @ coalition + e >= val_coalition) + return func + + +def solve_least_core_lp( + game: coalitional_game.CoalitionalGame, + constraint_function: ConstraintsSamplingFuncType, +) -> Tuple[np.ndarray, float]: + """Solve the LP described in Yan & Procaccia, equation (1). + + This LP enumerates all (exponentially many!) possible coalitions, with one + constraint per coalition. Will not scale to games with too many players. + + Args: + game: the game the LP solves. + constraint_function: function that adds the constraints + + Returns: + solution: an array with num_players entries, + epsilon: the lowest epsilon. + """ + # TODO(author5): handle errors gracefully. E.g. if solving the LP fails. + + num_players = game.num_players() + val_gc = game.coalition_value(np.ones(game.num_players())) + + # min e + # indices 0 - n-1 correspond to x_i, index n corresponds to e + x = cp.Variable(num_players, nonneg=True) + e = cp.Variable() # note: epsilon can be negative when the core is non-empty! + + objective = cp.Minimize(e) + constraints = [] + + # \sum_{i in N} x_i = v(N) + constraints.append(x @ np.ones(num_players) == val_gc) + + # Add the constraints + constraint_function(game, x, e, constraints) + + prob = cp.Problem(objective, constraints) + _ = prob.solve(solver=cp.SCS, eps=1e-6) + # The optimal value for x is stored in `x.value`. + + return x.value, e.value diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/least_core_lp_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/least_core_lp_test.py new file mode 100644 index 0000000..ec197c5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/least_core_lp_test.py @@ -0,0 +1,50 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import absltest +import numpy as np +from open_spiel.python.coalitional_games import basic_games +from open_spiel.python.coalitional_games import least_core_lp + + +SEED = 817346817 + + +class LeastCoreLPTest(absltest.TestCase): + + def setUp(self): + super().setUp() + np.random.seed(SEED) + + def test_ice_cream_example_full_lp(self): + """Solve the full LP.""" + game = basic_games.IceCreamGame() + imputation, epsilon = least_core_lp.solve_least_core_lp( + game, least_core_lp.add_all_constraints) + self.assertAlmostEqual(imputation.sum(), 1000.0, delta=1e-5) + self.assertGreater(imputation.all(), 0.0) + self.assertLess(epsilon, 1e-6) + + def test_ice_cream_example_uniform_sample_lp(self): + """Solve the LP with 20 uniformly sampled constraints.""" + game = basic_games.IceCreamGame() + cons_func = least_core_lp.make_uniform_sampling_constraints_function(20) + imputation, epsilon = least_core_lp.solve_least_core_lp(game, cons_func) + self.assertAlmostEqual(imputation.sum(), 1000.0, delta=1e-5) + self.assertGreater(imputation.all(), 0.0) + self.assertLess(epsilon, 1e-6) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/shapley_values.py b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/shapley_values.py new file mode 100644 index 0000000..517b474 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/shapley_values.py @@ -0,0 +1,87 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Functions to compute Shapley values and their approximations.""" + +import itertools +import numpy as np +from open_spiel.python.coalitional_games import coalitional_game + + +def compute_shapley_values( + game: coalitional_game.CoalitionalGame, +) -> np.ndarray: + """Compute the Shapley values exactly. + + Uses Eq (2) of Mitchell et al. "Sampling Permutations for Shapley Value + Estimation". https://people.math.sc.edu/cooper/shapley.pdf + + Args: + game: the game to compute Shapley values for. + + Returns: + shapley_values: a numpy array of Shapley values per player. + """ + + shapley_values_sum = np.zeros(game.num_players(), dtype=float) + coalition = np.zeros(game.num_players(), dtype=int) + empty_coalition_value = game.coalition_value(coalition) + num_perms = 0 + for perm_tup in itertools.permutations(range(game.num_players())): + perm = list(perm_tup) + value_with = empty_coalition_value + coalition.fill(0) + for idx in range(game.num_players()): + value_without = value_with # re-use the one computed from the last iter + i = perm[idx] + coalition[i] = 1 + value_with = game.coalition_value(coalition) + shapley_values_sum[i] += value_with - value_without + num_perms += 1 + return shapley_values_sum / num_perms + + +def compute_approximate_shapley_values( + game: coalitional_game.CoalitionalGame, + num_samples: int, +) -> np.ndarray: + """Compute the Shapley values using Monte Carlo estimation. + + Specifically, applies the implementation described in Section 2.3 of Mitchell + et al. "Sampling Permutations for Shapley Value Estimation". + https://people.math.sc.edu/cooper/shapley.pdf + + Args: + game: the game to compute Shapley values for. + num_samples: number of permutations to sample + + Returns: + shapley_values: a numpy array of Shapley values per player. + """ + + shapley_values_sum = np.zeros(game.num_players(), dtype=float) + coalition = np.zeros(game.num_players(), dtype=int) + empty_coalition_value = game.coalition_value(coalition) + for _ in range(num_samples): + perm = np.random.permutation(game.num_players()) + value_with = empty_coalition_value + coalition.fill(0) + for idx in range(game.num_players()): + value_without = value_with # re-use the one computed from the last iter + i = perm[idx] + coalition[i] = 1 + value_with = game.coalition_value(coalition) + shapley_values_sum[i] += value_with - value_without + return shapley_values_sum / num_samples + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/shapley_values_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/shapley_values_test.py new file mode 100644 index 0000000..9dbe324 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/shapley_values_test.py @@ -0,0 +1,55 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Shapley value calculations.""" + + +from absl.testing import absltest +import numpy as np +from open_spiel.python.coalitional_games import basic_games +from open_spiel.python.coalitional_games import deon_larson20_games +from open_spiel.python.coalitional_games import shapley_values + + +SEED = 23856711 + + +class ShapleyValuesTest(absltest.TestCase): + + def setUp(self): + super().setUp() + np.random.seed(SEED) + + def test_ice_cream_game(self): + """Example 2.11 from CACGT book by Chalkiadakis, Elkind, and Wooldridge.""" + game = basic_games.IceCreamGame() + svals = shapley_values.compute_shapley_values(game) + self.assertAlmostEqual(svals[0], 250.0) + + def test_ice_cream_game_approximate(self): + """Monte Carlo sampling version of Shapley value computation.""" + game = basic_games.IceCreamGame() + svals = shapley_values.compute_approximate_shapley_values(game, 1000) + self.assertAlmostEqual(svals[0]/1000.0, 0.250, places=2) + + def test_deon_larson20_games(self): + for name, values in deon_larson20_games.SHAPLEY_VALUES.items(): + values_arr = np.asarray(values) + game = deon_larson20_games.make_game(name) + svals = shapley_values.compute_shapley_values(game) + self.assertTrue(np.allclose(svals, values_arr)) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/util.py b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/util.py new file mode 100644 index 0000000..fe035e3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/util.py @@ -0,0 +1,43 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Some general utility functions for coalitional games.""" + +import itertools +import numpy as np +from open_spiel.python.coalitional_games import coalitional_game + + +def compute_payoff_epsilon( + game: coalitional_game.CoalitionalGame, + p: np.ndarray +) -> float: + """For a payoff vector p, get max_e s.t. p dot c + e >= V(c). + + Warning! Enumerates all coalitions. + + Args: + game: the game to enumerate. + p: the payoff vector. + + Returns: + the value max_e s.t. p dot c + e >= V(C) for all subsets C subseteq N. + """ + epsilon = 0 + for c in itertools.product([0, 1], repeat=game.num_players()): + coalition = np.asarray(c) + val_c = game.coalition_values(coalition) + payoffs_to_coalition = np.inner(p, coalition) + epsilon = max(epsilon, val_c - payoffs_to_coalition) + return epsilon diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/wvg.py b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/wvg.py new file mode 100644 index 0000000..012423b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/wvg.py @@ -0,0 +1,47 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Weighted Voting Games. + +A weighted voting game is a game where every player i has a weight w_i, and +there is a fixed quota q, the characteristic function for coalition c is: + + v(c) = 1 if sum_{i in c} w_i > q, + 0 otherwise. + +For more detail, see Chapter 4 of "Computational Aspects of Cooperative +Game Theory" text book by Georgios Chalkiadakis, Edith Elkind, and Michael +Wooldridge. +""" + +import numpy as np +from open_spiel.python.coalitional_games import coalitional_game + + +class WeightedVotingGame(coalitional_game.CoalitionalGame): + """Weighted Voting Game.""" + + def __init__(self, weights: np.ndarray, quota: float): + super().__init__(num_players=len(weights)) + assert len(weights.shape) == 1 + self._weights = weights + self._quota = quota + + def coalition_value(self, coalition: np.ndarray) -> float: + assert len(coalition) == self._num_players + total_weight = np.inner(coalition, self._weights) + if total_weight > self._quota: + return 1.0 + else: + return 0.0 diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/wvg_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/wvg_test.py new file mode 100644 index 0000000..bbe217b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/coalitional_games/wvg_test.py @@ -0,0 +1,61 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import absltest +import numpy as np +from open_spiel.python.coalitional_games import least_core_lp +from open_spiel.python.coalitional_games import shapley_values +from open_spiel.python.coalitional_games import wvg + + +SEED = 2093777 + + +class WeightedVotingGamesTest(absltest.TestCase): + + def setUp(self): + super().setUp() + np.random.seed(SEED) + + def test_basic_wvg_equal_weights(self): + # Equal weights. + game = wvg.WeightedVotingGame(weights=np.asarray([10]*4), quota=35.0) + svals = shapley_values.compute_shapley_values(game) + self.assertTrue(np.allclose(svals, np.asarray([0.25, 0.25, 0.25, 0.25]))) + lc_imputation, epsilon = least_core_lp.solve_least_core_lp( + game, least_core_lp.add_all_constraints) + self.assertTrue(np.allclose(lc_imputation, + np.asarray([0.25, 0.25, 0.25, 0.25]))) + self.assertAlmostEqual(epsilon, 0.0) + + def test_basic_wvg_unequal_weights(self): + # Example 2.3 of the CACGT book by by Chalkiadakis, Elkind, and Wooldridge. + game = wvg.WeightedVotingGame(weights=np.asarray([40.0, 22.0, 30.0, 9.0]), + quota=51.0) + svals = shapley_values.compute_shapley_values(game) + self.assertTrue(np.allclose(svals, np.asarray([1.0/3, 1.0/3, 1.0/3, 0]))) + lc_imputation, epsilon = least_core_lp.solve_least_core_lp( + game, least_core_lp.add_all_constraints) + print(lc_imputation) # prints [0.33, 0.33, 0.33, 0] + print(epsilon) # prints 0.33 + np.testing.assert_array_almost_equal( + lc_imputation, + np.asarray([1.0 / 3, 1.0 / 3, 1.0 / 3, 0]), + decimal=4, + ) + self.assertAlmostEqual(epsilon, 1.0/3.0, delta=1e-4) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/egt/README.md b/scenarios/bargaining/open_spiel/open_spiel/python/egt/README.md new file mode 100644 index 0000000..e097e64 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/egt/README.md @@ -0,0 +1,20 @@ +# Open Spiel Evolutionary Game Theory (EGT) Toolbox + +This is a library for conducting Evolutionary Game Theory (EGT) analysis of +games. + +## A Breakdown of the code + +The following code implements Alpha-Rank, a multi-agent evaluation algorithm +detailed in `α-Rank: Multi-Agent Evaluation by Evolution (2019)`, available at: +https://www.nature.com/articles/s41598-019-45619-9. + +* `alpharank.py`: core implementation +* `alpharank_visualizer.py`: Alpha-Rank plotting tools + +The following are utility scripts: + +* `heuristic_payoff_table.py`: defines a class for storing heuristic payoff + tables for games (e.g., as detailed in `A Generalised Method for Empirical + Game Theoretic Analysis` (Tuyls et al., 2018)) +* `utils.py`: helper functions diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/egt/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/egt/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/egt/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/egt/alpharank.py b/scenarios/bargaining/open_spiel/open_spiel/python/egt/alpharank.py new file mode 100644 index 0000000..1dc78c0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/egt/alpharank.py @@ -0,0 +1,845 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Implementation of Alpha-Rank for general games. + +Namely, computes fixation probabilities, Markov chain, and associated +stationary distribution given a population size and payoff matrix involving +n-strategy interactions. + +All equations and variable names correspond to the following paper: + https://arxiv.org/abs/1903.01373 + +""" + +import numpy as np +import scipy.linalg as la + +from open_spiel.python.egt import alpharank_visualizer +from open_spiel.python.egt import utils + + +def _get_payoff(payoff_table_k, payoffs_are_hpt_format, strat_profile, k=None): + """Gets the payoff of the k-th agent in a single or multi-population game. + + Namely, accepts the payoff table of the k-th agent (which can be matrix or + HPT format), the index k of the agent of interest (so its payoff can be looked + up in case of an HPT format payoff table), and the pure strategy profile. + + For multipopulation games, we currently only support games where the k-th + agent's payoff is a function of the HPT distribution (a vector + indicating the number of players playing each strategy), as opposed to the + strategy profile (a vector indicating the strategy of each player). This is + due to the nature of the PayoffTable class, which currently only tracks + distributions in the first k columns (rather than profiles). + + Args: + payoff_table_k: The k-th agent's payoff table, in matrix or HPT format. + payoffs_are_hpt_format: Boolean indicating whether payoff_table_k is a + _PayoffTableInterface object (AKA Heuristic Payoff Table or HPT) or a + numpy array. True indicates HPT format, False indicates numpy array. + strat_profile: The pure strategy profile. + k: The index of the agent of interest. Only used for HPT case, and only >0 + for a multi-population game. + + Returns: + The k-th agent's payoff. + """ + + if payoffs_are_hpt_format: + # All games are supported when using HPTs + assert k is not None + + # Compute HPT distribution (vector of # of players per strategy) + distribution = payoff_table_k.get_distribution_from_profile(strat_profile) + # Lookup the payoff profile (HPT row) corresponding to the distribution + payoff_profile = payoff_table_k[tuple(distribution)] + # Return the payoff corresponding to the k-th agent's strategy + return payoff_profile[strat_profile[k]] + else: + # Only 2 player symmetric/asymmetric games supported using matrix payoffs + return payoff_table_k[tuple(strat_profile)] + + +def _get_singlepop_2player_fitness(payoff_table, payoffs_are_hpt_format, m, + my_popsize, my_strat, opponent_strat, + use_local_selection_model): + """Gets a target agent fitness given a finite population of competitors. + + Note that this is only applicable to 2-player symmetric games. + Namely, gets fitness of an agent i playing my_strat in underlying population + of (my_popsize agents playing my_strat) and (m-my_popsize agents playing + opponent_strat). + + Args: + payoff_table: A payoff table. + payoffs_are_hpt_format: Boolean indicating whether payoff_table is a + _PayoffTableInterface object (AKA Heuristic Payoff Table or HPT), or a + numpy array. True indicates HPT format, False indicates numpy array. + m: The total number of agents in the population. + my_popsize: The number of agents in the population playing my strategy. + my_strat: Index of my strategy. + opponent_strat: Index of the opposing strategy. + use_local_selection_model: Enable local evolutionary selection model, which + considers fitness against the current opponent only, rather than the + global population state. + + Returns: + The fitness of agent i. + """ + + if use_local_selection_model: + fitness = payoff_table[tuple([my_strat, opponent_strat])] + else: + fitness = ((my_popsize-1)/(m-1)* + _get_payoff(payoff_table, payoffs_are_hpt_format, + strat_profile=[my_strat, my_strat], k=0) + + (m-my_popsize)/(m-1)* + _get_payoff(payoff_table, payoffs_are_hpt_format, + strat_profile=[my_strat, opponent_strat], k=0)) + return fitness + + +def _get_rho_sr(payoff_table, + payoffs_are_hpt_format, + m, + r, + s, + alpha, + game_is_constant_sum, + use_local_selection_model, + payoff_sum=None): + """Gets fixation probability of rogue strategy r in population playing s. + + Args: + payoff_table: A payoff table. + payoffs_are_hpt_format: Boolean indicating whether payoff_table is a + _PayoffTableInterface object (AKA Heuristic Payoff Table or HPT), or a + numpy array. True indicates HPT format, False indicates numpy array. + m: The total number of agents in the population. + r: Rogue strategy r. + s: Population strategy s. + alpha: Fermi distribution temperature parameter. + game_is_constant_sum: Boolean indicating if the game is constant sum. + use_local_selection_model: Enable local evolutionary selection model, which + considers fitness against the current opponent only, rather than the + global population state. + payoff_sum: The payoff sum if the game is constant sum, or None otherwise. + + Returns: + The fixation probability. + """ + + if use_local_selection_model or game_is_constant_sum: + payoff_rs = _get_payoff( + payoff_table, payoffs_are_hpt_format, strat_profile=[r, s], k=0) + if use_local_selection_model: + # Row plays s, column plays r + payoff_sr = _get_payoff( + payoff_table, payoffs_are_hpt_format, strat_profile=[s, r], k=0) + u = alpha * (payoff_rs - payoff_sr) + else: + assert payoff_sum is not None + u = alpha * m / (m - 1) * (payoff_rs - payoff_sum / 2) + + if np.isclose(u, 0, atol=1e-14): + # To avoid divide by 0, use first-order approximation when u is near 0 + result = 1 / m + else: + result = (1 - np.exp(-u)) / (1 - np.exp(-m * u)) + else: + assert payoff_sum is None + summed = 0 + for l in range(1, m): + t_mult = 1. + for p_r in range(1, l + 1): + # Probabilities of strategy r decreasing/increasing + p_s = m - p_r + # Fitness of agent playing r against rest of current population + f_ri = _get_singlepop_2player_fitness( + payoff_table, + payoffs_are_hpt_format, + m, + my_popsize=p_r, + my_strat=r, + opponent_strat=s, + use_local_selection_model=use_local_selection_model) + # Fitness of agent playing s against rest of current population + f_sj = _get_singlepop_2player_fitness( + payoff_table, + payoffs_are_hpt_format, + m, + my_popsize=p_s, + my_strat=s, + opponent_strat=r, + use_local_selection_model=use_local_selection_model) + t_mult *= np.exp(-alpha * (f_ri - f_sj)) + summed += t_mult + result = (1 + summed)**(-1) + return result + + +def _get_rho_sr_multipop(payoff_table_k, + payoffs_are_hpt_format, + k, + m, + r, + s, + alpha, + use_fast_compute=True): + """Gets fixation probability for multi-population games. + + Specifically, considers the fitnesses of two strategy profiles r and s given + the payoff table of the k-th population. Profile s is the current profile and + r is a mutant profile. Profiles r and s are identical except for the k-th + element, which corresponds to the deviation of the k-th population's + monomorphic strategy from s[k] to r[k]. + + Args: + payoff_table_k: The k-th population's payoff table. + payoffs_are_hpt_format: Boolean indicating whether payoff_table_k is a + _PayoffTableInterface object (AKA Heuristic Payoff Table or HPT), or numpy + array. True indicates HPT format, False indicates numpy array. + k: Index of the k-th population. + m: Total number of agents in the k-th population. + r: Strategy profile containing mutant strategy r for population k. + s: Current strategy profile. + alpha: Fermi distribution temperature parameter. + use_fast_compute: Boolean indicating whether closed-form computation should + be used. + + Returns: + Probability of strategy r fixating in population k. + """ + # Fitnesses are not dependent on population sizes for multipopulation case, so + # can be computed outside the loops + # Fitness of population k agent given strategy profile r + f_r = _get_payoff(payoff_table_k, payoffs_are_hpt_format, r, k) + # Fitness of population k agent given strategy profile s + f_s = _get_payoff(payoff_table_k, payoffs_are_hpt_format, s, k) + + if use_fast_compute: + u = alpha * (f_r - f_s) + if np.isclose(u, 0, atol=1e-14): + # To avoid divide by 0, use first-order approximation when u is near 0 + result = 1 / m + else: + result = (1 - np.exp(-u)) / (1 - np.exp(-m * u)) + else: + summed = 0 + for l in range(1, m): + t_mult = 1. + for p_r in range(1, l + 1): # pylint: disable= unused-variable + t_mult *= np.exp(-alpha * (f_r - f_s)) + summed += t_mult + result = (1 + summed)**(-1) + + return result + + +def _get_singlepop_transition_matrix(payoff_table, + payoffs_are_hpt_format, + m, + alpha, + game_is_constant_sum, + use_local_selection_model, + payoff_sum, + use_inf_alpha=False, + inf_alpha_eps=0.1): + """Gets the Markov transition matrix for a single-population game. + + Args: + payoff_table: A payoff table. + payoffs_are_hpt_format: Boolean indicating whether payoff_table is a + _PayoffTableInterface object (AKA Heuristic Payoff Table or HPT), or a + numpy array. True indicates HPT format, False indicates numpy array. + m: Total number of agents in the k-th population. + alpha: Fermi distribution temperature parameter. + game_is_constant_sum: Boolean indicating if the game is constant sum. + use_local_selection_model: Enable local evolutionary selection model, which + considers fitness against the current opponent only, rather than the + global population state. + payoff_sum: The payoff sum if the game is constant sum, or None otherwise. + use_inf_alpha: Use infinite-alpha alpharank model. + inf_alpha_eps: Noise term (epsilon) used in infinite-alpha alpharank model. + + Returns: + Markov transition matrix. + """ + + num_strats_per_population = utils.get_num_strats_per_population( + [payoff_table], payoffs_are_hpt_format) + num_strats = num_strats_per_population[0] + + c = np.zeros((num_strats, num_strats)) + rhos = np.zeros((num_strats, num_strats)) + + # r and s are, respectively, the column and row strategy profiles + for s in range(num_strats): # Current strategy + for r in range(num_strats): # Next strategy + if s != r: # Compute off-diagonal fixation probabilities + if use_inf_alpha: + eta = 1. / (num_strats - 1) + # Payoff of r when played against s + payoff_rs = _get_payoff( + payoff_table, payoffs_are_hpt_format, strat_profile=[r, s], k=0) + # Payoff of s when played against r + payoff_sr = _get_payoff( + payoff_table, payoffs_are_hpt_format, strat_profile=[s, r], k=0) + if np.isclose(payoff_rs, payoff_sr, atol=1e-14): + c[s, r] = eta * 0.5 + elif payoff_rs > payoff_sr: + # Transition to r since its payoff is higher than s, but remove some + # small amount of mass, inf_alpha_eps, to keep the chain irreducible + c[s, r] = eta * (1 - inf_alpha_eps) + else: + # Transition with very small probability + c[s, r] = eta * inf_alpha_eps + else: + rhos[s, r] = _get_rho_sr(payoff_table, payoffs_are_hpt_format, m, r, + s, alpha, game_is_constant_sum, + use_local_selection_model, payoff_sum) + eta = 1. / (num_strats - 1) + c[s, r] = eta * rhos[s, r] + # Fixation probability of competing only against one's own strategy is 1 + # rhos[s,s] = 1. # Commented as self-fixations are not interesting (for now) + c[s, s] = 1 - sum(c[s, :]) # Diagonals + + return c, rhos + + +def _get_multipop_transition_matrix(payoff_tables, + payoffs_are_hpt_format, + m, + alpha, + use_inf_alpha=False, + inf_alpha_eps=0.1): + """Gets Markov transition matrix for multipopulation games.""" + + num_strats_per_population = utils.get_num_strats_per_population( + payoff_tables, payoffs_are_hpt_format) + num_profiles = utils.get_num_profiles(num_strats_per_population) + + eta = 1. / (np.sum(num_strats_per_population - 1)) + + c = np.zeros((num_profiles, num_profiles)) + rhos = np.zeros((num_profiles, num_profiles)) + + for id_row_profile in range(num_profiles): + row_profile = utils.get_strat_profile_from_id(num_strats_per_population, + id_row_profile) + + next_profile_gen = utils.get_valid_next_profiles(num_strats_per_population, + row_profile) + + for index_population_that_changed, col_profile in next_profile_gen: + id_col_profile = utils.get_id_from_strat_profile( + num_strats_per_population, col_profile) + if use_inf_alpha: + payoff_col = _get_payoff( + payoff_tables[index_population_that_changed], + payoffs_are_hpt_format, + col_profile, + k=index_population_that_changed) + payoff_row = _get_payoff( + payoff_tables[index_population_that_changed], + payoffs_are_hpt_format, + row_profile, + k=index_population_that_changed) + if np.isclose(payoff_col, payoff_row, atol=1e-14): + c[id_row_profile, id_col_profile] = eta * 0.5 + elif payoff_col > payoff_row: + # Transition to col strategy since its payoff is higher than row + # strategy, but remove some small amount of mass, inf_alpha_eps, to + # keep the chain irreducible + c[id_row_profile, id_col_profile] = eta * (1 - inf_alpha_eps) + else: + # Transition with very small probability + c[id_row_profile, id_col_profile] = eta * inf_alpha_eps + else: + rhos[id_row_profile, id_col_profile] = _get_rho_sr_multipop( + payoff_table_k=payoff_tables[index_population_that_changed], + payoffs_are_hpt_format=payoffs_are_hpt_format, + k=index_population_that_changed, + m=m, + r=col_profile, + s=row_profile, + alpha=alpha) + c[id_row_profile, + id_col_profile] = eta * rhos[id_row_profile, id_col_profile] + # Special case of self-transition + c[id_row_profile, id_row_profile] = 1 - sum(c[id_row_profile, :]) + + return c, rhos + + +def _get_stationary_distr(c): + """Gets stationary distribution of transition matrix c.""" + + eigenvals, left_eigenvecs, _ = la.eig(c, left=True, right=True) + + mask = abs(eigenvals - 1.) < 1e-10 + left_eigenvecs = left_eigenvecs[:, mask] + num_stationary_eigenvecs = np.shape(left_eigenvecs)[1] + if num_stationary_eigenvecs != 1: + raise ValueError('Expected 1 stationary distribution, but found %d' % + num_stationary_eigenvecs) + left_eigenvecs *= 1. / sum(left_eigenvecs) + + return left_eigenvecs.real.flatten() + + +def print_results(payoff_tables, + payoffs_are_hpt_format, + rhos=None, + rho_m=None, + c=None, + pi=None): + """Prints the finite-population analysis results.""" + + print('Payoff tables:\n') + if payoffs_are_hpt_format: + for payoff_table in payoff_tables: + print(payoff_table()) + else: + print(payoff_tables) + if rho_m is not None: + print('\nNeutral fixation probability (rho_m):\n', rho_m) + if rhos is not None and rho_m is not None: + print('\nFixation probability matrix (rho_{r,s}/rho_m):\n', + np.around(rhos / rho_m, decimals=2)) + if c is not None: + print('\nMarkov transition matrix (c):\n', np.around(c, decimals=2)) + if pi is not None: + print('\nStationary distribution (pi):\n', pi) + + +def sweep_pi_vs_epsilon(payoff_tables, + strat_labels=None, + warm_start_epsilon=None, + visualize=False, + return_epsilon=False, + min_iters=10, + max_iters=100, + min_epsilon=1e-14, + num_strats_to_label=10, + legend_sort_clusters=False): + """Computes infinite-alpha distribution for a range of perturbations. + + The range of response graph perturbations is defined in epsilon_list. + + Note that min_iters and max_iters is necessary as it may sometimes appear the + stationary distribution has converged for a game in the first few iterations, + where in reality a sufficiently smaller epsilon is needed for the distribution + to first diverge, then reconverge. This behavior is dependent on both the + payoff structure and bounds, so the parameters min_iters and max_iters can be + used to fine-tune this. + + Args: + payoff_tables: List of game payoff tables, one for each agent identity. + Each payoff_table may be either a numpy array, or a + _PayoffTableInterface object. + strat_labels: Human-readable strategy labels. See get_strat_profile_labels() + in utils.py for formatting details. + warm_start_epsilon: Initial value of epsilon to use. + visualize: Plot the sweep results. + return_epsilon: Whether to return the final epsilon used. + min_iters: the minimum number of sweep iterations. + max_iters: the maximum number of sweep iterations. + min_epsilon: the minimum value of epsilon to be tested, at which point the + sweep terminates (if not converged already). + num_strats_to_label: Number of strats to label in legend + legend_sort_clusters: If true, strategies in the same cluster are sorted in + the legend according to orderings for earlier alpha values. Primarily for + visualization purposes! Rankings for lower alpha values should be + interpreted carefully. + + Returns: + pi: AlphaRank stationary distribution. + epsilon: The AlphaRank transition matrix noise level resulting from sweep. + """ + payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) + num_populations = len(payoff_tables) + num_strats_per_population = utils.get_num_strats_per_population( + payoff_tables, payoffs_are_hpt_format) + + if num_populations == 1: + num_profiles = num_strats_per_population[0] + else: + num_profiles = utils.get_num_profiles(num_strats_per_population) + + assert (strat_labels is None or isinstance(strat_labels, dict) + or (len(strat_labels) == num_profiles)) + + pi_list = np.empty((num_profiles, 0)) + pi, alpha, m = None, None, None # Unused in infinite-alpha regime + epsilon_list = [] + epsilon_pi_hist = {} + num_iters = 0 + + epsilon_mult_factor = 0.5 + alpharank_succeeded_once = False + + if warm_start_epsilon is not None: + epsilon = warm_start_epsilon + else: + epsilon = 0.5 + + while True: + try: + pi_prev = pi + _, _, pi, _, _ = compute(payoff_tables, m=m, alpha=alpha, + use_inf_alpha=True, inf_alpha_eps=epsilon) + epsilon_pi_hist[epsilon] = pi + # Stop when pi converges + if num_iters > min_iters and np.allclose(pi, pi_prev): + break + + epsilon *= epsilon_mult_factor + num_iters += 1 + alpharank_succeeded_once = True + assert num_iters < max_iters, ('Alpharank stationary distr. not found' + 'after {} iterations of pi_vs_epsilon' + 'sweep'.format(num_iters)) + + except ValueError as _: + print('Error: ', _, epsilon, min_epsilon) + # Case where epsilon has been decreased beyond desirable limits but no + # distribution found. + assert epsilon >= min_epsilon, ('AlphaRank stationary distr. not found &' + 'epsilon < min_epsilon.') + # Case where epsilon >= min_epsilon, but still small enough that it causes + # causes exceptions due to precision issues. So increase it. + epsilon /= epsilon_mult_factor + + # Case where alpharank_succeeded_once (i.e., epsilon_list and pi_list have + # at least one entry), and a) has not converged yet and b) failed on this + # instance due to epsilon being too small. I.e., the rate of decreasing + # of epsilon is too high. + if alpharank_succeeded_once: + epsilon_mult_factor = (epsilon_mult_factor+1.)/2. + epsilon *= epsilon_mult_factor + + epsilon_list, pi_list = zip(*[(epsilon, epsilon_pi_hist[epsilon]) + for epsilon in sorted(epsilon_pi_hist.keys(), + reverse=True)]) + pi_list = np.asarray(pi_list) + + if visualize: + if strat_labels is None: + strat_labels = utils.get_strat_profile_labels(payoff_tables, + payoffs_are_hpt_format) + alpharank_visualizer.plot_pi_vs_alpha( + pi_list.T, + epsilon_list, + num_populations, + num_strats_per_population, + strat_labels, + num_strats_to_label=num_strats_to_label, + legend_sort_clusters=legend_sort_clusters, + xlabel=r'Infinite-AlphaRank Noise $\epsilon$') + + if return_epsilon: + return pi_list[-1], epsilon_list[-1] + else: + return pi_list[-1] + + +def sweep_pi_vs_alpha(payoff_tables, + strat_labels=None, + warm_start_alpha=None, + visualize=False, + return_alpha=False, + m=50, + rtol=1e-5, + atol=1e-8, + num_strats_to_label=10, + legend_sort_clusters=False): + """Computes stationary distribution, pi, for range of selection intensities. + + The range of selection intensities is defined in alpha_list and corresponds + to the temperature of the Fermi selection function. + + Args: + payoff_tables: List of game payoff tables, one for each agent identity. Each + payoff_table may be either a numpy array, or a _PayoffTableInterface + object. + strat_labels: Human-readable strategy labels. See get_strat_profile_labels() + in utils.py for formatting details. + warm_start_alpha: Initial value of alpha to use. + visualize: Plot the sweep results. + return_alpha: Whether to return the final alpha used. + m: AlphaRank population size. + rtol: The relative tolerance parameter for np.allclose calls. + atol: The absolute tolerance parameter for np.allclose calls. + num_strats_to_label: Number of strats to label in legend + legend_sort_clusters: If true, strategies in the same cluster are sorted in + the legend according to orderings for earlier alpha values. Primarily for + visualization purposes! Rankings for lower alpha values should be + interpreted carefully. + + Returns: + pi: AlphaRank stationary distribution. + alpha: The AlphaRank selection-intensity level resulting from sweep. + """ + + payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) + num_populations = len(payoff_tables) + num_strats_per_population = utils.get_num_strats_per_population( + payoff_tables, payoffs_are_hpt_format) + + if num_populations == 1: + num_profiles = num_strats_per_population[0] + else: + num_profiles = utils.get_num_profiles(num_strats_per_population) + + assert (strat_labels is None or isinstance(strat_labels, dict) + or (len(strat_labels) == num_profiles)) + + pi_list = np.empty((num_profiles, 0)) + alpha_list = [] + num_iters = 0 + alpha_mult_factor = 2. + + if warm_start_alpha is not None: + alpha = warm_start_alpha + alpharank_succeeded_once = False + else: + alpha = 1e-4 # Reasonable default for most games, can be user-overridden + + while 1: + try: + _, _, pi, _, _ = compute(payoff_tables, alpha=alpha, m=m) + pi_list = np.append(pi_list, np.reshape(pi, (-1, 1)), axis=1) + alpha_list.append(alpha) + # Stop when pi converges + if num_iters > 0 and np.allclose(pi, pi_list[:, num_iters - 1], rtol, + atol): + break + alpha *= alpha_mult_factor + num_iters += 1 + alpharank_succeeded_once = True + except ValueError as _: + if warm_start_alpha is not None and not alpharank_succeeded_once: + # When warm_start_alpha is used, there's a chance that + # the initial warm_start_alpha is too large and causes exceptions due to + # the Markov transition matrix being reducible. So keep decreasing until + # a single success occurs. + alpha /= 2 + elif not np.allclose(pi_list[:, -1], pi_list[:, -2], rtol, atol): + # Sweep stopped due to multiple stationary distributions, but pi had + # not converged due to the alpha scaling being too large. + alpha /= alpha_mult_factor + alpha_mult_factor = (alpha_mult_factor + 1.) / 2. + alpha *= alpha_mult_factor + else: + break + + if visualize: + if strat_labels is None: + strat_labels = utils.get_strat_profile_labels(payoff_tables, + payoffs_are_hpt_format) + alpharank_visualizer.plot_pi_vs_alpha( + pi_list.T, + alpha_list, + num_populations, + num_strats_per_population, + strat_labels, + num_strats_to_label=num_strats_to_label, + legend_sort_clusters=legend_sort_clusters) + + if return_alpha: + return pi, alpha + else: + return pi + + +def compute_and_report_alpharank(payoff_tables, + m=50, + alpha=100, + verbose=False, + num_top_strats_to_print=8): + """Computes and visualizes Alpha-Rank outputs. + + Args: + payoff_tables: List of game payoff tables, one for each agent identity. Each + payoff_table may be either a numpy array, or a _PayoffTableInterface + object. + m: Finite population size. + alpha: Fermi distribution temperature parameter. + verbose: Set to True to print intermediate results. + num_top_strats_to_print: Number of top strategies to print. + + Returns: + pi: AlphaRank stationary distribution/rankings. + """ + payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) + rhos, rho_m, pi, _, _ = compute(payoff_tables, m=m, alpha=alpha) + strat_labels = utils.get_strat_profile_labels(payoff_tables, + payoffs_are_hpt_format) + + if verbose: + print_results(payoff_tables, payoffs_are_hpt_format, pi=pi) + + utils.print_rankings_table( + payoff_tables, + pi, + strat_labels, + num_top_strats_to_print=num_top_strats_to_print) + m_network_plotter = alpharank_visualizer.NetworkPlot( + payoff_tables, rhos, rho_m, pi, strat_labels, num_top_profiles=8) + m_network_plotter.compute_and_draw_network() + return pi + + +def compute(payoff_tables, + m=50, + alpha=100, + use_local_selection_model=True, + verbose=False, + use_inf_alpha=False, + inf_alpha_eps=0.01): + """Computes the finite population stationary statistics. + + Args: + payoff_tables: List of game payoff tables, one for each agent identity. Each + payoff_table may be either a numpy array, or a _PayoffTableInterface + object. + m: Finite population size. + alpha: Fermi distribution temperature parameter. + use_local_selection_model: Enable local evolutionary selection model, which + considers fitness against the current opponent only, rather than the + global population state. + verbose: Set to True to print intermediate results. + use_inf_alpha: Use infinite-alpha alpharank model. + inf_alpha_eps: Noise term to use in infinite-alpha alpharank model. + + Returns: + rhos: Matrix of strategy-to-strategy fixation probabilities. + rho_m: Neutral fixation probability. + pi: Finite population stationary distribution. + num_strats: Number of available strategies. + """ + payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) + + num_populations = len(payoff_tables) + + num_strats_per_population = utils.get_num_strats_per_population( + payoff_tables, payoffs_are_hpt_format) + + # Handles the trivial case of Markov chain with one state + if np.array_equal(num_strats_per_population, + np.ones(len(num_strats_per_population))): + rhos = np.asarray([[1]]) + rho_m = 1. / m if not use_inf_alpha else 1 + num_profiles = 1 + pi = np.asarray([1.]) + return rhos, rho_m, pi, num_profiles, num_strats_per_population + + if verbose: + print('Constructing c matrix') + print('num_strats_per_population:', num_strats_per_population) + + if num_populations == 1: + # User fast closed-form analysis for constant-sum single-population games + game_is_constant_sum, payoff_sum = utils.check_is_constant_sum( + payoff_tables[0], payoffs_are_hpt_format) + if verbose: + print('game_is_constant_sum:', game_is_constant_sum, 'payoff sum: ', + payoff_sum) + # Single-population/symmetric game just uses the first player's payoffs + c, rhos = _get_singlepop_transition_matrix( + payoff_tables[0], + payoffs_are_hpt_format, + m, + alpha, + game_is_constant_sum, + use_local_selection_model, + payoff_sum, + use_inf_alpha=use_inf_alpha, + inf_alpha_eps=inf_alpha_eps) + num_profiles = num_strats_per_population[0] + else: + c, rhos = _get_multipop_transition_matrix( + payoff_tables, + payoffs_are_hpt_format, + m, + alpha, + use_inf_alpha=use_inf_alpha, + inf_alpha_eps=inf_alpha_eps) + num_profiles = utils.get_num_profiles(num_strats_per_population) + + pi = _get_stationary_distr(c) + + rho_m = 1. / m if not use_inf_alpha else 1 # Neutral fixation probability + if verbose: + print_results(payoff_tables, payoffs_are_hpt_format, rhos, rho_m, c, pi) + + return rhos, rho_m, pi, num_profiles, num_strats_per_population + + +def suggest_alpha(payoff_tables, tol=.1): + """Suggests an alpha for use in alpha-rank. + + The suggested alpha is approximately the smallest possible alpha such that + the ranking has 'settled out'. It is calculated as + -ln(tol)/min_gap_between_payoffs. + + The logic behind this settling out is that the fixation probabilities can be + expanded as a series, and the relative size of each term in this series + changes with alpha. As alpha gets larger and larger, one of the terms in + this series comes to dominate, and this causes the ranking to settle + down. Just how fast this domination happens is easy to calculate, and this + function uses it to estimate the alpha by which the ranking has settled. + + You can find further discussion at the PR: + + https://github.com/deepmind/open_spiel/pull/403 + + Args: + payoff_tables: List of game payoff tables, one for each agent identity. Each + payoff_table may be either a numpy array, or a _PayoffTableInterface + object. + tol: the desired gap between the first and second terms in the fixation + probability expansion. A smaller tolerance leads to a larger alpha, and + a 'more settled out' ranking. + + Returns: + A suggested alpha. + """ + payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) + + num_strats_per_population = utils.get_num_strats_per_population( + payoff_tables, payoffs_are_hpt_format) + num_profiles = utils.get_num_profiles(num_strats_per_population) + + gap = np.inf + for id_row_profile in range(num_profiles): + row_profile = utils.get_strat_profile_from_id(num_strats_per_population, + id_row_profile) + + next_profile_gen = utils.get_valid_next_profiles(num_strats_per_population, + row_profile) + + for index_population_that_changed, col_profile in next_profile_gen: + payoff_table_k = payoff_tables[index_population_that_changed] + f_r = _get_payoff(payoff_table_k, payoffs_are_hpt_format, col_profile, + index_population_that_changed) + f_s = _get_payoff(payoff_table_k, payoffs_are_hpt_format, row_profile, + index_population_that_changed) + if f_r > f_s: + gap = min(gap, f_r - f_s) + + return -np.log(tol)/gap + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/egt/alpharank_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/egt/alpharank_test.py new file mode 100644 index 0000000..282852b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/egt/alpharank_test.py @@ -0,0 +1,97 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.egt.alpharank.""" + +from absl.testing import absltest + +# pylint: disable=g-import-not-at-top +import matplotlib +matplotlib.use("agg") # switch backend for testing + +import numpy as np + +from open_spiel.python.egt import alpharank +from open_spiel.python.egt import heuristic_payoff_table +from open_spiel.python.egt import utils +import pyspiel + + +class AlphaRankTest(absltest.TestCase): + + def test_stationary_distribution(self): + """Tests stationary distribution using payoffs from Han et al., 2013.""" + r = 1. + t = 2. + p = 0. + s = -1. + delta = 4. + eps = 0.25 + payoff_tables = [ + np.asarray([[r - eps / 2., r - eps, 0, s + delta - eps, r - eps], + [r, r, s, s, s], [0, t, p, p, p], [t - delta, t, p, p, p], + [r, t, p, p, p]]) + ] + + m = 20 + alpha = 0.1 + expected_pi = np.asarray( + [0.40966787, 0.07959841, 0.20506998, 0.08505983, 0.2206039]) + + # Test payoffs in matrix format + _, _, pi_matrix, _, _ = alpharank.compute( + payoff_tables, m=m, alpha=alpha, use_local_selection_model=False) + np.testing.assert_array_almost_equal(pi_matrix, expected_pi, decimal=4) + + # Test payoffs in HPT format + hpts = [heuristic_payoff_table.from_matrix_game(payoff_tables[0])] + _, _, pi_hpts, _, _ = alpharank.compute( + hpts, m=m, alpha=alpha, use_local_selection_model=False) + np.testing.assert_array_almost_equal(pi_hpts, expected_pi, decimal=4) + + def test_constant_sum_transition_matrix(self): + """Tests closed-form transition matrix computation for constant-sum case.""" + + game = pyspiel.load_matrix_game("matrix_rps") + payoff_tables = utils.game_payoffs_array(game) + + # Checks if the game is symmetric and runs single-population analysis if so + _, payoff_tables = utils.is_symmetric_matrix_game(payoff_tables) + payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) + + m = 20 + alpha = 0.1 + + # Case 1) General-sum game computation (slower) + game_is_constant_sum = False + use_local_selection_model = False + payoff_sum = None + c1, rhos1 = alpharank._get_singlepop_transition_matrix( + payoff_tables[0], payoffs_are_hpt_format, m, alpha, + game_is_constant_sum, use_local_selection_model, payoff_sum) + + # Case 2) Constant-sum closed-form computation (faster) + game_is_constant_sum, payoff_sum = utils.check_is_constant_sum( + payoff_tables[0], payoffs_are_hpt_format) + c2, rhos2 = alpharank._get_singlepop_transition_matrix( + payoff_tables[0], payoffs_are_hpt_format, m, alpha, + game_is_constant_sum, use_local_selection_model, payoff_sum) + + # Ensure both cases match + np.testing.assert_array_almost_equal(c1, c2) + np.testing.assert_array_almost_equal(rhos1, rhos2) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/egt/alpharank_visualizer.py b/scenarios/bargaining/open_spiel/open_spiel/python/egt/alpharank_visualizer.py new file mode 100644 index 0000000..1a2271d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/egt/alpharank_visualizer.py @@ -0,0 +1,497 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Various visualization tools for Alpha-Rank. + +All equations and variable names correspond to the following paper: + https://arxiv.org/abs/1903.01373 + +""" + +from absl import logging + +try: + from matplotlib import patches # pylint: disable=g-import-not-at-top + import matplotlib.patheffects as PathEffects # pylint: disable=g-import-not-at-top + import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top +except ImportError as e: + logging.info("If your tests failed with the error 'ImportError: No module " + "named functools_lru_cache', this is a known bug in matplotlib " + "and there is a workaround (run sudo apt install " + "python-backports.functools-lru-cache. See: " + "https://github.com/matplotlib/matplotlib/issues/9344.") + raise e + +import networkx as nx # pylint: disable=g-import-not-at-top +import numpy as np + +from open_spiel.python.egt import utils + + +class NetworkPlot(object): + """A class for visualizing the Alpha-Rank interaction network.""" + + def __init__(self, + payoff_tables, + rhos, + rho_m, + pi, + state_labels, + num_top_profiles=None): + """Initializes a network plotting object. + + Args: + payoff_tables: List of game payoff tables, one for each agent identity. + Each payoff_table may be either a 2D numpy array, or a + _PayoffTableInterface object. + rhos: Fixation probabilities. + rho_m: Neutral fixation probability. + pi: Stationary distribution of fixation Markov chain defined by rhos. + state_labels: Labels corresponding to Markov states. For the + single-population case, state_labels should be a list of pure strategy + names. For the multi-population case, it + should be a dict with (key,value) pairs: (population + index,list of strategy names) + num_top_profiles: Set to (int) to show only the graph nodes corresponding + to the top k elements of stationary distribution, or None to show all. + """ + self.fig = plt.figure(figsize=(10, 10)) + self.num_populations = len(payoff_tables) + payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) + self.num_strats_per_population = ( + utils.get_num_strats_per_population(payoff_tables, + payoffs_are_hpt_format)) + self.rhos = rhos + self.rho_m = rho_m + self.pi = pi + self.num_profiles = len(pi) + self.state_labels = state_labels + self.first_run = True + self.num_top_profiles = num_top_profiles + + if self.num_top_profiles: + # More than total number of strats requested for plotting + if self.num_top_profiles > self.num_profiles: + self.num_top_profiles = self.num_profiles + # Skip the bottom num_profiles-k stationary strategies. + self.nodes_to_skip = list(self.pi.argsort()[:self.num_profiles - + self.num_top_profiles]) + else: + self.nodes_to_skip = [] + + self._reset_cycle_counter() + + def _reset_cycle_counter(self): + self.i_cycle_to_show = -1 + + def _draw_network(self): + """Draws the NetworkX object representing the underlying graph.""" + plt.clf() + + if self.num_populations == 1: + node_sizes = 5000 + node_border_width = 1. + else: + node_sizes = 15000 + node_border_width = 3. + + vmin, vmax = 0, np.max(self.pi) + 0.1 + + nx.draw_networkx_nodes( + self.g, + self.pos, + node_size=node_sizes, + node_color=self.node_colors, + edgecolors="k", + cmap=plt.cm.Blues, + vmin=vmin, + vmax=vmax, + linewidths=node_border_width) + + nx.draw_networkx_edges( + self.g, + self.pos, + node_size=node_sizes, + arrowstyle="->", + arrowsize=10, + edge_color=self.edge_colors, + edge_cmap=plt.cm.Blues, + width=5) + + nx.draw_networkx_edge_labels(self.g, self.pos, edge_labels=self.edge_labels) + + if self.num_populations > 1: + subnode_separation = 0.1 + subgraph = nx.Graph() + for i_population in range(self.num_populations): + subgraph.add_node(i_population) + + for i_strat_profile in self.g: + x, y = self.pos[i_strat_profile] + if self.num_populations == 1: + node_text = "$\\pi_{" + self.state_labels[i_strat_profile] + "}=$" + node_text += str(np.round(self.pi[i_strat_profile], decimals=2)) + else: + node_text = "" # No text for multi-population case as plot gets messy + txt = plt.text( + x, + y, + node_text, + horizontalalignment="center", + verticalalignment="center", + fontsize=12) + txt.set_path_effects( + [PathEffects.withStroke(linewidth=3, foreground="w")]) + + if self.num_populations > 1: + sub_pos = nx.circular_layout(subgraph) + subnode_labels = dict() + strat_profile = utils.get_strat_profile_from_id( + self.num_strats_per_population, i_strat_profile) + for i_population in subgraph.nodes(): + i_strat = strat_profile[i_population] + subnode_labels[i_population] = "$s^{" + str(i_population + 1) + "}=" + subnode_labels[i_population] += ( + self.state_labels[i_population][i_strat] + "$") + # Adjust the node positions generated by NetworkX's circular_layout(), + # such that the node for the 1st strategy starts on the left. + sub_pos[i_population] = (-sub_pos[i_population] * subnode_separation + + self.pos[i_strat_profile]) + nx.draw( + subgraph, + pos=sub_pos, + with_labels=True, + width=0., + node_color="w", + labels=subnode_labels, + node_size=2500) + + def compute_and_draw_network(self): + """Computes the various node/edge connections of the graph and draws it.""" + + if np.max(self.rhos) < self.rho_m: + print("All node-to-node fixation probabilities (not including self-cycles" + " are lower than neutral. Thus, no graph will be drawn.") + return + + self.g = nx.MultiDiGraph() + self.edge_labels = {} + self.edge_alphas = [] + rho_max = np.max(self.rhos / self.rho_m) + rho_m_alpha = 0.1 # Transparency of neutral selection edges + + for i in range(self.num_profiles): + for j in range(self.num_profiles): + # Do not draw edge if any node involved is skipped + if j not in self.nodes_to_skip and i not in self.nodes_to_skip: + rate = self.rhos[i][j] / self.rho_m + # Draws edges when fixation from one strategy to another occurs (i.e., + # rate > 1), or with fixation equal to neutral selection probability + # (i.e., rate == 1). This is consistent with visualizations used in + # finite-population literature. + if rate > 1: + # Compute alphas. Clip needed due to numerical precision. + alpha = np.clip(rho_m_alpha + (1 - rho_m_alpha) * rate / rho_max, + None, 1.) + self.g.add_edge(i, j, weight=alpha, label="{:.01f}".format(rate)) + self.edge_alphas.append(alpha) + elif np.isclose(rate, 1): + alpha = rho_m_alpha + self.g.add_edge(i, j, weight=alpha, label="{:.01f}".format(rate)) + self.edge_alphas.append(alpha) + # Label edges for non-self-loops with sufficient flowrate + if i != j and rate > 1: + edge_string = "$" + str(np.round(rate, decimals=2)) + "\\rho_m$" + else: + edge_string = "" + self.edge_labels[(i, j)] = edge_string + + # MultiDiGraph nodes are not ordered, so order the node colors accordingly + self.node_colors = [self.pi[node] for node in self.g.nodes()] + + self.cycles = list(nx.simple_cycles(self.g)) + self.num_cycles = len(self.cycles) + + # Color the edges of cycles if user requested it + if self.i_cycle_to_show >= 0: + all_cycle_edges = [ + zip(nodes, (nodes[1:] + nodes[:1])) for nodes in self.cycles + ] + cur_cycle_edges = all_cycle_edges[self.i_cycle_to_show] + self.edge_colors = [] + for u, v in self.g.edges(): + if (u, v) in cur_cycle_edges: + self.edge_colors.append([1., 0., 0.]) + else: + self.edge_colors.append([1. - self.g[u][v][0]["weight"]] * 3) + else: + self.edge_colors = [ + [1. - self.g[u][v][0]["weight"]] * 3 for u, v in self.g.edges() + ] + self.edge_alphas = [self.g[u][v][0]["weight"] for u, v in self.g.edges()] + + ax = plt.gca() + + # Centered circular pose + self.pos = nx.layout.circular_layout(self.g) + all_x = [node_pos[0] for node, node_pos in self.pos.items()] + all_y = [node_pos[1] for node, node_pos in self.pos.items()] + min_x = np.min(all_x) + max_x = np.max(all_x) + min_y = np.min(all_y) + max_y = np.max(all_y) + for _, node_pos in self.pos.items(): + node_pos[0] -= (max_x + min_x) / 2 + node_pos[1] -= (max_y + min_y) / 2 + + # Rendering + self._draw_network() + if self.first_run: + ax.autoscale_view() + ax.set_axis_off() + ax.set_aspect("equal") + plt.ylim(-1.3, 1.3) + plt.xlim(-1.3, 1.3) + if self.first_run: + self.first_run = False + plt.axis("off") + plt.show() + + +def _draw_pie(ax, + ratios, + colors, + x_center=0, + y_center=0, + size=100, + clip_on=True, + zorder=0): + """Plots a pie chart. + + Args: + ax: plot axis. + ratios: list indicating size of each pie slice, with elements summing to 1. + colors: list indicating color of each pie slice. + x_center: x coordinate of pie center. + y_center: y coordinate of pie center. + size: pie size. + clip_on: control clipping of pie (e.g., to show it when it's out of axis). + zorder: plot z order (e.g., to show pie on top of other plot elements). + """ + xy = [] + start = 0. + for ratio in ratios: + x = [0] + np.cos( + np.linspace(2 * np.pi * start, 2 * np.pi * + (start + ratio), 30)).tolist() + y = [0] + np.sin( + np.linspace(2 * np.pi * start, 2 * np.pi * + (start + ratio), 30)).tolist() + xy.append(list(zip(x, y))) + start += ratio + + for i, xyi in enumerate(xy): + ax.scatter([x_center], [y_center], + marker=xyi, + s=size, + facecolor=colors[i], + edgecolors="none", + clip_on=clip_on, + zorder=zorder) + + +def generate_sorted_masses_strats(pi_list, curr_alpha_idx, strats_to_go): + """Generates a sorted list of (mass, strats) tuples. + + Args: + pi_list: List of stationary distributions, pi + curr_alpha_idx: Index in alpha_list for which to start clustering + strats_to_go: List of strategies that still need to be ordered + + Returns: + Sorted list of (mass, strats) tuples. + """ + if curr_alpha_idx > 0: + sorted_masses_strats = list() + masses_to_strats = utils.cluster_strats(pi_list[curr_alpha_idx, + strats_to_go]) + + for mass, strats in sorted(masses_to_strats.items(), reverse=True): + if len(strats) > 1: + to_append = generate_sorted_masses_strats(pi_list, curr_alpha_idx - 1, + strats) + + to_append = [(mass, [strats_to_go[s] + for s in strats_list]) + for (mass, strats_list) in to_append] + + sorted_masses_strats.extend(to_append) + else: + sorted_masses_strats.append((mass, [ + strats_to_go[strats[0]], + ])) + + return sorted_masses_strats + else: + to_return = sorted( + utils.cluster_strats(pi_list[curr_alpha_idx, strats_to_go]).items(), + reverse=True) + to_return = [(mass, [strats_to_go[s] + for s in strats_list]) + for (mass, strats_list) in to_return] + return to_return + + +def plot_pi_vs_alpha(pi_list, + alpha_list, + num_populations, + num_strats_per_population, + strat_labels, + num_strats_to_label, + plot_semilogx=True, + xlabel=r"Ranking-intensity $\alpha$", + ylabel=r"Strategy mass in stationary distribution $\pi$", + legend_sort_clusters=False): + """Plots stationary distributions, pi, against selection intensities, alpha. + + Args: + pi_list: List of stationary distributions, pi. + alpha_list: List of selection intensities, alpha. + num_populations: The number of populations. + num_strats_per_population: List of the number of strategies per population. + strat_labels: Human-readable strategy labels. + num_strats_to_label: The number of top strategies to label in the legend. + plot_semilogx: Boolean set to enable/disable semilogx plot. + xlabel: Plot xlabel. + ylabel: Plot ylabel. + legend_sort_clusters: If true, strategies in the same cluster are sorted in + the legend according to orderings for earlier alpha values. Primarily for + visualization purposes! Rankings for lower alpha values should be + interpreted carefully. + """ + + # Cluster strategies for which the stationary distribution has similar masses + masses_to_strats = utils.cluster_strats(pi_list[-1, :]) + + # Set colors + num_strat_profiles = np.shape(pi_list)[1] + num_strats_to_label = min(num_strats_to_label, num_strat_profiles) + cmap = plt.get_cmap("Paired") + colors = [cmap(i) for i in np.linspace(0, 1, num_strat_profiles)] + + # Plots stationary distribution vs. alpha series + plt.figure(facecolor="w") + axes = plt.gca() + + legend_line_objects = [] + legend_labels = [] + + rank = 1 + num_strats_printed = 0 + add_legend_entries = True + + if legend_sort_clusters: + sorted_masses_strats = generate_sorted_masses_strats( + pi_list, pi_list.shape[0] - 1, range(pi_list.shape[1])) + else: + sorted_masses_strats = sorted(masses_to_strats.items(), reverse=True) + + for mass, strats in sorted_masses_strats: + for profile_id in strats: + if num_populations == 1: + strat_profile = profile_id + else: + strat_profile = utils.get_strat_profile_from_id( + num_strats_per_population, profile_id) + + if plot_semilogx: + series = plt.semilogx( + alpha_list, + pi_list[:, profile_id], + color=colors[profile_id], + linewidth=2) + else: + series = plt.plot( + alpha_list, + pi_list[:, profile_id], + color=colors[profile_id], + linewidth=2) + + if add_legend_entries: + if num_strats_printed >= num_strats_to_label: + # Placeholder blank series for remaining entries + series = plt.semilogx(np.nan, np.nan, "-", color="none") + label = "..." + add_legend_entries = False + else: + label = utils.get_label_from_strat_profile(num_populations, + strat_profile, + strat_labels) + legend_labels.append(label) + legend_line_objects.append(series[0]) + num_strats_printed += 1 + rank += 1 + + # Plots pie charts on far right of figure to indicate clusters of strategies + # with identical rank + for mass, strats in iter(masses_to_strats.items()): + _draw_pie( + axes, + ratios=[1 / len(strats)] * len(strats), + colors=[colors[i] for i in strats], + x_center=alpha_list[-1], + y_center=mass, + size=200, + clip_on=False, + zorder=10) + + # Axes ymax set slightly above highest stationary distribution mass + max_mass = np.amax(pi_list) + axes_y_max = np.ceil( + 10. * max_mass) / 10 # Round upward to nearest first decimal + axes_y_max = np.clip(axes_y_max, 0., 1.) + + # Plots a rectangle highlighting the rankings on the far right of the figure + box_x_min = alpha_list[-1] * 0.7 + box_y_min = np.min(pi_list[-1, :]) - 0.05 * axes_y_max + width = 0.7 * alpha_list[-1] + height = np.max(pi_list[-1, :]) - np.min( + pi_list[-1, :]) + 0.05 * axes_y_max * 2 + axes.add_patch( + patches.Rectangle((box_x_min, box_y_min), + width, + height, + edgecolor="b", + facecolor=(1, 0, 0, 0), + clip_on=False, + linewidth=5, + zorder=20)) + + # Plot formatting + axes.set_xlim(np.min(alpha_list), np.max(alpha_list)) + axes.set_ylim([0.0, axes_y_max]) + axes.set_xlabel(xlabel) + axes.set_ylabel(ylabel) + axes.set_axisbelow(True) # Axes appear below data series in terms of zorder + + # Legend on the right side of the current axis + box = axes.get_position() + axes.set_position([box.x0, box.y0, box.width * 0.8, box.height]) + axes.legend( + legend_line_objects, + legend_labels, + loc="center left", + bbox_to_anchor=(1.05, 0.5)) + plt.grid() + plt.show() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/egt/alpharank_visualizer_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/egt/alpharank_visualizer_test.py new file mode 100644 index 0000000..e62f7d3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/egt/alpharank_visualizer_test.py @@ -0,0 +1,69 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.egt.alpharank_visualizer.""" + +from absl.testing import absltest + +# pylint: disable=g-import-not-at-top +import matplotlib +matplotlib.use("agg") # switch backend for testing + +import mock +import numpy as np + +from open_spiel.python.egt import alpharank +from open_spiel.python.egt import alpharank_visualizer +from open_spiel.python.egt import utils +import pyspiel + + +class AlpharankVisualizerTest(absltest.TestCase): + + @mock.patch("%s.alpharank_visualizer.plt" % __name__) + def test_plot_pi_vs_alpha(self, mock_plt): + # Construct game + game = pyspiel.load_matrix_game("matrix_rps") + payoff_tables = utils.game_payoffs_array(game) + _, payoff_tables = utils.is_symmetric_matrix_game(payoff_tables) + payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) + + # Compute alpharank + alpha = 1e2 + _, _, pi, num_profiles, num_strats_per_population = ( + alpharank.compute(payoff_tables, alpha=alpha)) + strat_labels = utils.get_strat_profile_labels(payoff_tables, + payoffs_are_hpt_format) + num_populations = len(payoff_tables) + + # Construct synthetic pi-vs-alpha history + pi_list = np.empty((num_profiles, 0)) + alpha_list = [] + for _ in range(2): + pi_list = np.append(pi_list, np.reshape(pi, (-1, 1)), axis=1) + alpha_list.append(alpha) + + # Test plotting code (via pyplot mocking to prevent plot pop-up) + alpharank_visualizer.plot_pi_vs_alpha( + pi_list.T, + alpha_list, + num_populations, + num_strats_per_population, + strat_labels, + num_strats_to_label=0) + self.assertTrue(mock_plt.show.called) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/egt/dynamics.py b/scenarios/bargaining/open_spiel/open_spiel/python/egt/dynamics.py new file mode 100644 index 0000000..6558a85 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/egt/dynamics.py @@ -0,0 +1,186 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Continuous-time population dynamics.""" + +import numpy as np + + +def replicator(state, fitness): + """Continuous-time replicator dynamics. + + This is the standard form of the continuous-time replicator dynamics also + known as selection dynamics. + + For more details, see equation (5) page 9 in + https://jair.org/index.php/jair/article/view/10952 + + Args: + state: Probability distribution as an `np.array(shape=num_strategies)`. + fitness: Fitness vector as an `np.array(shape=num_strategies)`. + + Returns: + Time derivative of the population state. + """ + avg_fitness = state.dot(fitness) + return state * (fitness - avg_fitness) + + +def boltzmannq(state, fitness, temperature=1.): + """Selection-mutation dynamics modeling Q-learning with Boltzmann exploration. + + For more details, see equation (10) page 15 in + https://jair.org/index.php/jair/article/view/10952 + + Args: + state: Probability distribution as an `np.array(shape=num_strategies)`. + fitness: Fitness vector as an `np.array(shape=num_strategies)`. + temperature: A scalar parameter determining the rate of exploration. + + Returns: + Time derivative of the population state. + """ + exploitation = (1. / temperature) * replicator(state, fitness) + exploration = (np.log(state) - state.dot(np.log(state).transpose())) + return exploitation - state * exploration + + +def qpg(state, fitness): + """Q-based policy gradient dynamics (QPG). + + For more details, see equation (12) on page 18 in + https://arxiv.org/pdf/1810.09026.pdf + + Args: + state: Probability distribution as an `np.array(shape=num_strategies)`. + fitness: Fitness vector as an `np.array(shape=num_strategies)`. + + Returns: + Time derivative of the population state. + """ + regret = fitness - state.dot(fitness) + return state * (state * regret - np.sum(state**2 * regret)) + + +class SinglePopulationDynamics(object): + """Continuous-time single population dynamics. + + Attributes: + payoff_matrix: The payoff matrix as an `numpy.ndarray` of shape `[2, k_1, + k_2]`, where `k_1` is the number of strategies of the first player and + `k_2` for the second player. The game is assumed to be symmetric. + dynamics: A callback function that returns the time-derivative of the + population state. + """ + + def __init__(self, payoff_matrix, dynamics): + """Initializes the single-population dynamics.""" + assert payoff_matrix.ndim == 3 + assert payoff_matrix.shape[0] == 2 + assert np.allclose(payoff_matrix[0], payoff_matrix[1].T) + self.payoff_matrix = payoff_matrix[0] + self.dynamics = dynamics + + def __call__(self, state=None, time=None): + """Time derivative of the population state. + + Args: + state: Probability distribution as list or + `numpy.ndarray(shape=num_strategies)`. + time: Time is ignored (time-invariant dynamics). Including the argument in + the function signature supports numerical integration via e.g. + `scipy.integrate.odeint` which requires that the callback function has + at least two arguments (state and time). + + Returns: + Time derivative of the population state as + `numpy.ndarray(shape=num_strategies)`. + """ + state = np.array(state) + assert state.ndim == 1 + assert state.shape[0] == self.payoff_matrix.shape[0] + # (Ax')' = xA' + fitness = np.matmul(state, self.payoff_matrix.T) + return self.dynamics(state, fitness) + + +class MultiPopulationDynamics(object): + """Continuous-time multi-population dynamics. + + Attributes: + payoff_tensor: The payoff tensor as an numpy.ndarray of size `[n, k0, k1, + k2, ...]`, where n is the number of players and `k0` is the number of + strategies of the first player, `k1` of the second player and so forth. + dynamics: List of callback functions for the time-derivative of the + population states, where `dynamics[i]` computes the time-derivative of the + i-th player's population state. If at construction, only a single callback + function is provided, the same function is used for all populations. + """ + + def __init__(self, payoff_tensor, dynamics): + """Initializes the multi-population dynamics.""" + if isinstance(dynamics, list) or isinstance(dynamics, tuple): + assert payoff_tensor.shape[0] == len(dynamics) + else: + dynamics = [dynamics] * payoff_tensor.shape[0] + self.payoff_tensor = payoff_tensor + self.dynamics = dynamics + + def __call__(self, state, time=None): + """Time derivative of the population states. + + Args: + state: Combined population state for all populations as a list or flat + `numpy.ndarray` (ndim=1). Probability distributions are concatenated in + order of the players. + time: Time is ignored (time-invariant dynamics). Including the argument in + the function signature supports numerical integration via e.g. + `scipy.integrate.odeint` which requires that the callback function has + at least two arguments (state and time). + + Returns: + Time derivative of the combined population state as `numpy.ndarray`. + """ + state = np.array(state) + n = self.payoff_tensor.shape[0] # number of players + ks = self.payoff_tensor.shape[1:] # number of strategies for each player + assert state.shape[0] == sum(ks) + + states = np.split(state, np.cumsum(ks)[:-1]) + dstates = [None] * n + for i in range(n): + # move i-th population to front + fitness = np.moveaxis(self.payoff_tensor[i], i, 0) + # marginalize out all other populations + for i_ in set(range(n)) - {i}: + fitness = np.tensordot(states[i_], fitness, axes=[0, 1]) + dstates[i] = self.dynamics[i](states[i], fitness) + + return np.concatenate(dstates) + + +def time_average(traj): + """Time-averaged population state trajectory. + + Args: + traj: Trajectory as `numpy.ndarray`. Time is along the first dimension, + types/strategies along the second. + + Returns: + Time-averaged trajectory. + """ + n = traj.shape[0] + sum_traj = np.cumsum(traj, axis=0) + norm = 1. / np.arange(1, n + 1) + return sum_traj * norm[:, np.newaxis] diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/egt/dynamics_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/egt/dynamics_test.py new file mode 100644 index 0000000..56d9221 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/egt/dynamics_test.py @@ -0,0 +1,141 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.egt.dynamics.""" + +import math +from absl.testing import absltest +from absl.testing import parameterized + +import numpy as np + +from open_spiel.python.egt import dynamics +from open_spiel.python.egt.utils import game_payoffs_array +import pyspiel + + +def _sum_j_x_j_ln_x_j_over_x_i(x): + r"""Computes \sum_j x_j ln(x_j / x_i).""" + # By having a = x.reshape([1, -1]) and b = x.reshape([-1, 1]), we can use + # broadcasting and have: + # (a / b)[i, j] = x_j / x_i + # thus giving: + # \sum_j x_j * log(x_j/ x_i) = sum(a * ln (a/b), axis=1) + + a = x.reshape([1, -1]) + b = x.reshape([-1, 1]) + + return np.sum(a * np.log(np.divide(a, b)), axis=1) + + +def _q_learning_dynamics(composition, payoff, temperature): + r"""An equivalent implementation of `dynamics.boltzmannq`.""" + return 1 / temperature * dynamics.replicator(composition, payoff) + ( + composition * _sum_j_x_j_ln_x_j_over_x_i(composition)) + + +class _InternalTest(absltest.TestCase): + + def test__sum_j_x_j_ln_x_j_over_x_i(self): + # This tests a sub-function of `_q_learning_dynamics` to ensure its + # internals are correct. + x = np.asarray([1., 2., 3.]) + + # We use 2 different formula to check we have the correct result. + expected = [sum([x_j * math.log(x_j / x_i) for x_j in x]) for x_i in x] + + log = math.log + expected_0 = 1. * log(1 / 1.) + 2 * log(2 / 1.) + 3 * log(3 / 1.) + expected_1 = 1. * log(1 / 2.) + 2 * log(2 / 2.) + 3 * log(3 / 2.) + expected_2 = 1. * log(1 / 3.) + 2 * log(2 / 3.) + 3 * log(3 / 3.) + + expected_2 = np.asarray([expected_0, expected_1, expected_2]) + np.testing.assert_array_equal(expected, expected_2) + + np.testing.assert_array_almost_equal(expected, + _sum_j_x_j_ln_x_j_over_x_i(x)) + + +class DynamicsTest(parameterized.TestCase): + + def test_boltzmann_q(self): + + x = np.asarray([1 / 2, 1 / 2]) + payoff = np.asarray([[1, 0], [0, 1]], dtype=np.float32) + temperature = 1 + + np.testing.assert_array_equal( + dynamics.boltzmannq(x, payoff, temperature), + _q_learning_dynamics(x, payoff, temperature)) + + def test_rd_rps_pure_fixed_points(self): + game = pyspiel.load_matrix_game('matrix_rps') + payoff_matrix = game_payoffs_array(game) + rd = dynamics.replicator + dyn = dynamics.SinglePopulationDynamics(payoff_matrix, rd) + x = np.eye(3) + np.testing.assert_allclose(dyn(x[0]), np.zeros((3,))) + np.testing.assert_allclose(dyn(x[1]), np.zeros((3,))) + np.testing.assert_allclose(dyn(x[2]), np.zeros((3,))) + + @parameterized.parameters(dynamics.replicator, dynamics.boltzmannq, + dynamics.qpg) + def test_dynamics_rps_mixed_fixed_point(self, func): + game = pyspiel.load_matrix_game('matrix_rps') + payoff_matrix = game_payoffs_array(game) + dyn = dynamics.SinglePopulationDynamics(payoff_matrix, func) + x = np.ones(shape=(3,)) / 3. + np.testing.assert_allclose(dyn(x), np.zeros((3,)), atol=1e-15) + + def test_multi_population_rps(self): + game = pyspiel.load_matrix_game('matrix_rps') + payoff_matrix = game_payoffs_array(game) + rd = dynamics.replicator + dyn = dynamics.MultiPopulationDynamics(payoff_matrix, [rd] * 2) + x = np.concatenate([np.ones(k) / float(k) for k in payoff_matrix.shape[1:]]) + np.testing.assert_allclose(dyn(x), np.zeros((6,)), atol=1e-15) + + def test_multi_population_three_populations(self): + payoff_matrix = np.arange(3 * 2 * 3 * 4).reshape(3, 2, 3, 4) + rd = dynamics.replicator + dyn = dynamics.MultiPopulationDynamics(payoff_matrix, [rd] * 3) + x = np.concatenate([np.ones(k) / float(k) for k in payoff_matrix.shape[1:]]) + self.assertEqual(dyn(x).shape, (9,)) + + def test_multi_population_four_populations(self): + payoff_matrix = np.zeros((4, 2, 2, 2, 2)) + payoff_matrix[:, 0, 0, 0, 0] = np.ones((4,)) + rd = dynamics.replicator + dyn = dynamics.MultiPopulationDynamics(payoff_matrix, [rd] * 4) + x = np.concatenate([np.ones(k) / float(k) for k in payoff_matrix.shape[1:]]) + avg_fitness = 1. / float(2**4) # if all players play uniform random + dx = dyn(x) + np.testing.assert_allclose(dx[::2], np.ones((4,)) * avg_fitness / 2.) + np.testing.assert_allclose(dx[1::2], np.ones((4,)) * (-avg_fitness) / 2.) + + def test_time_average(self): + n, k = 10, 3 + traj = np.ones(shape=(n, k)) + time_avg = dynamics.time_average(traj) + np.testing.assert_allclose(time_avg, np.ones(shape=(n, k))) + + traj[1::2] = -1. * traj[1::2] + time_avg = dynamics.time_average(traj) + np.testing.assert_allclose(time_avg[-1], np.zeros(shape=(k,))) + np.testing.assert_allclose(time_avg[-2], + 1. / (n - 1.) * np.ones(shape=(k,))) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/egt/examples/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/egt/examples/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/egt/examples/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/egt/examples/alpharank_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/egt/examples/alpharank_example.py new file mode 100644 index 0000000..beff68c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/egt/examples/alpharank_example.py @@ -0,0 +1,72 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example running AlphaRank on OpenSpiel games. + + AlphaRank output variable names corresponds to the following paper: + https://arxiv.org/abs/1903.01373 +""" + +from absl import app + +from open_spiel.python.algorithms import fictitious_play +from open_spiel.python.egt import alpharank +from open_spiel.python.egt import alpharank_visualizer +from open_spiel.python.egt import utils +import pyspiel + + +def get_kuhn_poker_data(num_players=3): + """Returns the kuhn poker data for the number of players specified.""" + game = pyspiel.load_game('kuhn_poker', {'players': num_players}) + xfp_solver = fictitious_play.XFPSolver(game, save_oracles=True) + for _ in range(3): + xfp_solver.iteration() + + # Results are seed-dependent, so show some interesting cases + if num_players == 2: + meta_games = xfp_solver.get_empirical_metagame(100, seed=1) + elif num_players == 3: + meta_games = xfp_solver.get_empirical_metagame(100, seed=5) + elif num_players == 4: + meta_games = xfp_solver.get_empirical_metagame(100, seed=2) + + # Metagame utility matrices for each player + payoff_tables = [] + for i in range(num_players): + payoff_tables.append(meta_games[i]) + return payoff_tables + + +def main(unused_arg): + # Construct meta-game payoff tables + payoff_tables = get_kuhn_poker_data() + payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) + strat_labels = utils.get_strat_profile_labels(payoff_tables, + payoffs_are_hpt_format) + + # Run AlphaRank + rhos, rho_m, pi, _, _ = alpharank.compute(payoff_tables, alpha=1e2) + + # Report & plot results + alpharank.print_results( + payoff_tables, payoffs_are_hpt_format, rhos=rhos, rho_m=rho_m, pi=pi) + utils.print_rankings_table(payoff_tables, pi, strat_labels) + m_network_plotter = alpharank_visualizer.NetworkPlot( + payoff_tables, rhos, rho_m, pi, strat_labels, num_top_profiles=8) + m_network_plotter.compute_and_draw_network() + + +if __name__ == '__main__': + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/egt/heuristic_payoff_table.py b/scenarios/bargaining/open_spiel/open_spiel/python/egt/heuristic_payoff_table.py new file mode 100644 index 0000000..4e3e8b8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/egt/heuristic_payoff_table.py @@ -0,0 +1,551 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An object to store the heuristic payoff table for a game.""" + +import abc +import collections +import math + +import numpy as np + +from open_spiel.python.egt import utils + + +def _inc_average(count, average, value): + """Computes the incremental average, `a_n = ((n - 1)a_{n-1} + v_n) / n`.""" + count += 1 + average = ((count - 1) * average + value) / count + return (count, average) + + +def from_match_results(df, consider_agents): + """Builds a heuristic payoff table from average win probabilities. + + Args: + df: a Pandas dataframe of match results. Must contain a column "agents" + consisting of tuples of agent names, and a column "scores" consisting of + the score for each agent in the match. + consider_agents: a list of agent names. Will only consider matches in which + exclusively these agents appeared. + + Returns: + A PayoffTable object. + + Raises: + ValueError: if dataframe is empty, or columns 'agents' and 'scores' not + specified, or games have zero players. + """ + if df.empty: + raise ValueError("Please provide a non-empty dataframe.") + if "agents" not in df.columns: + raise ValueError("Dataframe must contain a column 'agents'.") + if "scores" not in df.columns: + raise ValueError("Dataframe must contain a column 'scores'.") + + num_strategies = len(consider_agents) + num_players = len(df["agents"][0]) + + if num_players == 0: + raise ValueError("Games must have > 0 players.") + + count_per_distribution = {} + win_prob_per_distribution = {} + + for i, row in df.iterrows(): + print("Parsing row {} / {} ...".format(i, len(df)), end="\r") + agents = row["agents"] + scores = row["scores"] + assert len(agents) == len(scores) == num_players + + if not set(agents).issubset(set(consider_agents)): + # Ignore agents outside those we are supposed to consider. + continue + elif len(set(agents)) == 1: + # Special case of self-play: deal with separately. + continue + + # Find winner(s): In each match one must determine a winning strategy. One + # way of doing this is to average over the returns for each strategy and + # then say that the one with the greatest returns is the winner. + + # Get unique score per agent by averaging. + count_per_agent = collections.defaultdict(int) + average_score_per_agent = collections.defaultdict(int) + for agent, score in zip(agents, scores): + count_per_agent[agent], average_score_per_agent[agent] = _inc_average( + count_per_agent[agent], average_score_per_agent[agent], score) + + winner_score = max(average_score_per_agent.values()) + winner_agents = [ + k for k, v in average_score_per_agent.items() if v == winner_score + ] + winner_strategy_idxs = [ + consider_agents.index(winner) for winner in winner_agents + ] + + # Select the winner as the one maximizing the selected statistics. + win_probabilities = np.zeros(num_strategies) + for winner_strategy_idx in winner_strategy_idxs: + win_probabilities[winner_strategy_idx] = 1 / len(winner_strategy_idxs) + + distribution = np.zeros(num_strategies) + for agent, count in count_per_agent.items(): + strategy_idx = consider_agents.index(agent) + distribution[strategy_idx] = count + + distribution = tuple(distribution) + + if distribution not in count_per_distribution: + count_per_distribution[distribution] = 1 + win_prob_per_distribution[distribution] = win_probabilities + continue + (count_per_distribution[distribution], + win_prob_per_distribution[distribution]) = _inc_average( + count_per_distribution[distribution], + win_prob_per_distribution[distribution], win_probabilities) + + # Populate self-play case (strategy both wins and loses). + for idx, agent in enumerate(consider_agents): + distribution = np.zeros(num_strategies) + distribution[idx] = num_players + distribution = tuple(distribution) + win_prob = np.zeros(num_strategies) + win_prob[idx] = 0.5 + win_prob_per_distribution[distribution] = win_prob + + # Create empty (nan) payoff table. + table = PayoffTable(num_players, num_strategies) + + # Populate with win probabilities. + for distribution, payoff in win_prob_per_distribution.items(): + table[distribution] = payoff + + return table + + +def from_matrix_game(matrix_game): + """Returns a PayOffTable given a symmetric 2-player matrix game. + + Args: + matrix_game: The payoff matrix corresponding to a 2-player symmetric game. + """ + + if not isinstance(matrix_game, np.ndarray): + raise ValueError("The matrix game should be a numpy array, not a {}".format( + type(matrix_game))) + num_strats_per_population = ( + utils.get_num_strats_per_population( + payoff_tables=[matrix_game], payoffs_are_hpt_format=False)) + assert len(num_strats_per_population) == 2 + assert num_strats_per_population[0] == num_strats_per_population[1] + num_strategies = num_strats_per_population[0] + + num_profiles = utils.get_num_profiles(num_strats_per_population) + table = PayoffTable(num_players=2, num_strategies=num_strategies) + + # Construct the HPT by filling in the corresponding payoffs for each profile + for id_profile in range(num_profiles): + strat_profile = utils.get_strat_profile_from_id(num_strats_per_population, + id_profile) + distribution = table.get_distribution_from_profile(strat_profile) + # For symmetric matrix games, multiple strategy profiles correspond to the + # same distribution and payoffs. Thus, ensure the table entry has not + # already been filled by a previous strategy profile. + if table.item_is_uninitialized(tuple(distribution)): + payoffs = np.zeros(num_strategies) + payoffs[strat_profile[0]] = matrix_game[strat_profile[0], + strat_profile[1]] + payoffs[strat_profile[1]] = matrix_game[strat_profile[1], + strat_profile[0]] + table[tuple(distribution)] = payoffs + + return table + + +def from_heuristic_payoff_table(hpt): + """Returns a `PayoffTable` instance from a numpy 2D HPT.""" + [num_rows, num_columns] = hpt.shape + assert num_columns % 2 == 0 + num_strategies = int(num_columns / 2) + num_players = np.sum(hpt[0, :num_strategies]) + obj = PayoffTable(num_players, num_strategies, initialize_payoff_table=False) + + # pylint: disable=protected-access + for row in hpt: + payoff_row = np.array(row[num_strategies:]) + obj._payoff_table[tuple(row[:num_strategies])] = payoff_row + + assert len(obj._payoff_table) == num_rows + # pylint: enable=protected-access + return obj + + +def _compute_win_probability_from_elo(rating_1, rating_2): + """Computes the win probability of 1 vs 2 based on the provided Elo ratings. + + Args: + rating_1: The Elo rating of player 1. + rating_2: The Elo rating of player 2. + + Returns: + The win probability of player 1, when playing against 2. + """ + m = max(rating_1, rating_2) # We subtract the max for numerical stability. + + m1 = 10**((rating_1 - m) / 400) + m2 = 10**((rating_2 - m) / 400) + + return m1 / (m1 + m2) + + +def from_elo_scores(elo_ratings, num_agents=2): + """Computes the Elo win probability payoff matrix `X` from the Elo scores. + + Args: + elo_ratings: The elo scores vector of length [num_strategies]. + num_agents: The number of agents. Only 2 agents are supported for now. + + Returns: + The HPT associated to the Elo win probability payoff matrix `X`. The score + for a given agent is given by its win probability given its Elo score. + + Raises: + ValueError: If `num_agents != 2`. + """ + if num_agents != 2: + raise ValueError("Only 2 agents are supported, because we need to compute " + "the win probability and that can only be computed with " + "2 players.") + num_strategies = len(elo_ratings) + + hpt_rows = [] + + possible_teams = utils.distribute(num_agents, num_strategies, normalize=False) + + for distribution_row in possible_teams: + payoff_row = np.zeros([num_strategies]) + non_zero_index = np.nonzero(distribution_row)[0] # Why [0]? + assert len(non_zero_index.shape) == 1 + + if len(non_zero_index) > 1: + index_first_player, index_second_player = non_zero_index + prob = _compute_win_probability_from_elo(elo_ratings[index_first_player], + elo_ratings[index_second_player]) + payoff_row[index_first_player] = prob + payoff_row[index_second_player] = 1 - prob + elif len(non_zero_index) == 1: + payoff_row[non_zero_index[0]] = 0.5 + else: + assert False, "Impossible case, we have at least one strategy used." + + hpt_rows.append(np.hstack([distribution_row, payoff_row])) + + return NumpyPayoffTable(np.vstack(hpt_rows)) + + +class _PayoffTableInterface(metaclass=abc.ABCMeta): + """An interface for the PayoffTable classes.""" + + @abc.abstractmethod + def __call__(self): + """Returns a view of the table as a np.array.""" + + @property + @abc.abstractmethod + def num_strategies(self): + pass + + @property + @abc.abstractmethod + def num_players(self): + pass + + @property + @abc.abstractmethod + def num_rows(self): + pass + + def expected_payoff(self, strategy): + """The expected payoff of each pure strategy against the mixed strategy. + + We define the expected payoff of a strategy A as the expected payoff of + that strategy over the space of 2 randomly sampled + + The mixed strategy is equivalently the composition of an infinitely large + population. To find the expected payoff, we: + 1. Compute the probabilities of sampling each player distribution in the + heuristic payoff table from the population. + 2. Compute the expected payoff of pure strategy against the mixed + strategy by averaging over the payoff rows with these probabilities. + + For each pure strategy we must normalize by the probability that it appeared + in the player distribution at all; otherwise we would be undercounting. + + For more details, see https://arxiv.org/pdf/1803.06376.pdf. + + Args: + strategy: an `np.array(shape=self._num_strategies)` of probabilities. + + Returns: + An `np.array(shape=self._num_strategies)` of payoffs for pure strategies. + + Raises: + ValueError: if the provided strategy probabilities do not define a valid + distribution over `self._num_strategies` strategies. + """ + if strategy.shape != (self.num_strategies,): + raise ValueError("The strategy probabilities should be of shape " + "({},), not {}".format(self.num_strategies, + strategy.shape)) + if np.around(np.sum(strategy), decimals=3) != 1.0: + raise ValueError("The strategy probabilities should sum to 1.") + if not all([p >= 0 for p in strategy]): + raise ValueError("The strategy probabilities should all be >= 0.") + + distributions = self._distributions.astype(int) + if not np.all(np.isclose(self._distributions, distributions, 1e-10)): + raise ValueError("Conversion to integers for distributions failed.") + + # Multinomial coefficients (one per distribution). + coefficients = _multinomial_coefficients(distributions) + # Probabilities of sampling each distribution given population composition. + probabilities = _row_probabilities(coefficients, distributions, strategy) + + return _expected_payoff(probabilities, self._payoffs, strategy, + self._num_players) + + @property + def _payoffs(self): + """Returns an np.array containing the payoffs.""" + return self()[:, self.num_strategies:] + + @property + def _distributions(self): + """Returns an np.array containing the distribution over pure strategies.""" + return self()[:, :self.num_strategies] + + +class NumpyPayoffTable(object): + """An object wrapping a Numpy array heuristic payoff table for a metagame. + + NOTE: We assume the number of players to be equal to the number of + replicators. + + """ + + def __init__(self, payoff_table, writeable=False): + """Initializes an immutable payoff table. + + Let p be the number of players, k be the number of strategies. Then, there + are Combinations(p + k - 1, k - 1) distinct configurations for the + strategies of the p players. + + The payoff table is of shape [(p + k - 1)! / (p! * (k - 1)!), 2 * k]. + + The first k columns encode the number of players playing each strategies. + + The second k columns encode the average payoff of each strategy in that + game. + + Args: + payoff_table: A numpy heuristic payoff table, which is assumed to be + correctly constructed. + writeable: Whether the numpy array payoff_table should be writeable. See + https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/numpy.ndarray.flags.html. + However, locking a base object does not lock any views that already + reference it, + """ + self._writeable = writeable + self._payoff_table = payoff_table + + [self._num_rows, num_columns] = self._payoff_table.shape + assert num_columns % 2 == 0 + self._num_strategies = int(num_columns / 2) + self._num_players = np.sum(self._payoff_table[0, :self._num_strategies]) + + def __call__(self): + """Returns a view of the table as a np.array. + + The mutability of the object is controlled by `writeable`. + """ + if self._writeable: + return self._payoff_table + else: + return np.copy(self._payoff_table) + + @property + def writeable(self): + return self._writeable + + @writeable.setter + def writeable(self, writeable): + self._writeable = writeable + + @property + def num_strategies(self): + return self._num_strategies + + @property + def num_players(self): + return self._num_players + + @property + def num_rows(self): + return self._num_rows + + +class PayoffTable(_PayoffTableInterface): + """A mutable object to store the heuristic payoff table for a metagame.""" + + def __init__(self, num_players, num_strategies, initialize_payoff_table=True): + """A heuristic payoff table encodes payoffs from various strategy profiles. + + See `NumpyPayoffTable` for the description of the heuristic payoff table. + + Internally, this is represented as an OrderedDict {distribution: payoff}. + + Args: + num_players: The number of players in the game. + num_strategies: The number of strategies an individual could play. + initialize_payoff_table: If `True`, nan entries will be created for all + rows. If `False`, no rows are created at all. + """ + super(PayoffTable, self).__init__() + self.is_hpt = True + self._num_players = num_players + self._num_strategies = num_strategies + self._payoff_table = collections.OrderedDict() + + if initialize_payoff_table: + # Populate empty (nan) payoff table. + player_distributions = utils.distribute(self._num_players, + self._num_strategies) + for d in player_distributions: + self._payoff_table[d] = np.full(self._num_strategies, np.nan) + + def __call__(self): + """Returns a view of the table as a np.array.""" + return np.concatenate((self._distributions, self._payoffs), axis=1) + + @property + def _payoffs(self): + """Returns an np.array containing the payoffs.""" + return np.array(list(self._payoff_table.values())) + + @property + def _distributions(self): + """Returns an np.array containing the distribution over pure strategies.""" + return np.array(list(self._payoff_table)) + + @property + def num_strategies(self): + return self._num_strategies + + @property + def num_players(self): + return self._num_players + + @property + def num_rows(self): + return len(self._payoff_table) + + def __setitem__(self, distribution, payoff): + assert distribution in self._payoff_table + assert len(payoff) == self._num_strategies + self._payoff_table[distribution] = payoff + + def __getitem__(self, distribution): + """Returns the payoff profile for a given strategy distribution. + + Args: + distribution: strategy profile tuple. + + Returns: + Payoff profile for the corresponding strategy distribution. + """ + return self._payoff_table[distribution] + + def item_is_uninitialized(self, distribution): + return np.isnan(np.sum(self._payoff_table[distribution])) + + def get_distribution_from_profile(self, strat_profile): + distribution = [0] * self.num_strategies + for s in strat_profile: + distribution[s] += 1 + return distribution + + +# The following provides utility functions to compute the expected payoff of +# a given strategy profile. +# See https://arxiv.org/pdf/1803.06376.pdf, page 3, left column. +# +# Usage: +# +# coefficients = _multinomial_coefficients(distributions, strategies): +# row_probabilities = _row_probabilities(coefficients, distributions, strategy) +# expected_payoff = _expected_payoff(row_probabilities, payoffs, composition, +# num_players) +# +# +def _multinomial_coefficients(distributions): + """Returns the multinomial coefficients. + + Args: + distributions: The distributions table [num_rows, num_strategies]. + """ + v_factorial = np.vectorize(math.factorial) + # Multinomial coefficients (one per distribution Ni). + # ( P ) + # ( Ni1, Ni1, ... Nik ) + coefficients = ( + v_factorial(np.sum(distributions, axis=1)) / + np.prod(v_factorial(distributions), axis=1)) + + return coefficients + + +def _row_probabilities(coefficients, distributions, strategy): + """Returns the row probabilities [num_rows]. + + Args: + coefficients: The multinomial coefficients [num_rows]. + distributions: The distributions table [num_rows, num_strategies]. + strategy: The strategy array [num_strategies]. + """ + row_probabilities = coefficients * np.prod( + np.power(strategy, distributions), axis=1) + return row_probabilities + + +def _expected_payoff(row_probabilities, payoffs, strategy, num_players): + # pylint: disable=g-doc-args + r"""Returns the expected payoff. + + Computes (with p=num_players): + + r_j = \sum_i row_probabilities[i] * payoffs[i, j] / (1 - (1-strategy[j])^p) + """ + # pylint: enable=g-doc-args + [num_rows] = row_probabilities.shape + [num_rows_2, num_strategies] = payoffs.shape + [num_strategies_2] = strategy.shape + assert num_rows == num_rows_2 + assert num_strategies == num_strategies_2 + + # One per pure strategy. + numerators = np.dot(np.transpose(payoffs), row_probabilities) + # One per pure strategy. + denominators = 1 - np.power(1 - strategy, num_players) + return numerators / denominators diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/egt/heuristic_payoff_table_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/egt/heuristic_payoff_table_test.py new file mode 100644 index 0000000..48e84b0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/egt/heuristic_payoff_table_test.py @@ -0,0 +1,154 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the heuristic_payoff_table library.""" + +from absl import logging +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np + +from open_spiel.python.egt import heuristic_payoff_table +from open_spiel.python.egt import utils +import pyspiel + + +class ModuleLevelTest(absltest.TestCase): + + def test__multinomial_coefficients(self): + distributions = np.asarray([ + [2, 0], + [1, 1], + [1, 0], + ]) + coefficients = heuristic_payoff_table._multinomial_coefficients( + distributions) + + np.testing.assert_array_equal([1., 2., 1.], coefficients) + + distributions = np.asarray([ + [3, 0], + [2, 1], + [1, 2], + [0, 3], + ]) + coefficients = heuristic_payoff_table._multinomial_coefficients( + distributions) + np.testing.assert_array_equal([1., 3., 3., 1.], coefficients) + + distributions = np.asarray([ + [2, 0, 0], + [0, 2, 0], + [0, 0, 2], + [1, 1, 0], + [1, 0, 1], + [0, 1, 1], + ]) + coefficients = heuristic_payoff_table._multinomial_coefficients( + distributions) + np.testing.assert_array_equal([1., 1., 1., 2., 2., 2.], coefficients) + + +class PayoffTableTest(parameterized.TestCase): + + @parameterized.parameters( + (5, 2), + (2, 2), + ) + def test_construction(self, num_players, num_strategies): + logging.info("Testing payoff table construction.") + table = heuristic_payoff_table.PayoffTable(num_players, num_strategies) + num_rows = utils.n_choose_k(num_players + num_strategies - 1, num_players) + distributions = np.array( + list(utils.distribute(num_players, num_strategies))) + payoffs = np.full([int(num_rows), num_strategies], np.nan) + np.testing.assert_array_equal( + np.concatenate([distributions, payoffs], axis=1), table()) + + def test_from_heuristic_payoff_table(self): + team_compositions = np.asarray([ + [2, 0], + [1, 1], + [0, 2], + ]) + payoffs = np.asarray([ + [1, 2], + [3, 4], + [5, 6], + ]) + hpt = np.hstack([team_compositions, payoffs]) + + table = heuristic_payoff_table.from_heuristic_payoff_table(hpt) + np.testing.assert_array_equal(team_compositions, table._distributions) + np.testing.assert_array_equal(payoffs, table._payoffs) + self.assertEqual(3, table.num_rows) + + distributions = np.asarray([ + [2, 0, 0], + [0, 2, 0], + [0, 0, 2], + [1, 1, 0], + [1, 0, 1], + [0, 1, 1], + ]) + shape = distributions.shape + payoffs = np.reshape(np.arange(np.prod(shape)), shape) + + hpt = np.hstack([distributions, payoffs]) + table = heuristic_payoff_table.from_heuristic_payoff_table(hpt) + np.testing.assert_array_equal(distributions, table._distributions) + np.testing.assert_array_equal(payoffs, table._payoffs) + self.assertEqual(distributions.shape[0], table.num_rows) + + @parameterized.parameters(("matrix_rps",)) + def test_from_matrix_game(self, game): + game = pyspiel.load_matrix_game(game) + payoff_tables = utils.game_payoffs_array(game) + logging.info("Testing payoff table construction for matrix game.") + table = heuristic_payoff_table.from_matrix_game(payoff_tables[0]) + print(table()) + + @parameterized.parameters((np.array([0.7, 0.2, 0.1]),)) + def test_expected_payoff(self, strategy): + logging.info("Testing expected payoff for matrix game.") + game = pyspiel.load_matrix_game("matrix_rps") + payoff_tables = utils.game_payoffs_array(game) + table = heuristic_payoff_table.from_matrix_game(payoff_tables[0]) + expected_payoff = table.expected_payoff(strategy) + print(expected_payoff) + assert len(expected_payoff) == table._num_strategies + + def test_from_elo_scores(self): + elo_scores = [800, 400, 400] + elo_1 = 10**(800 / 400) + elo_2 = 10**(400 / 400) # This is also the associated value for player 3. + expected = np.asarray([ + [2, 0, 0, 1 / 2, 0, 0], + [0, 2, 0, 0, 1 / 2, 0], + [0, 0, 2, 0, 0, 1 / 2], + [1, 1, 0, elo_1 / (elo_1 + elo_2), elo_2 / (elo_1 + elo_2), 0], + [1, 0, 1, elo_1 / (elo_1 + elo_2), 0, elo_2 / (elo_1 + elo_2)], + [0, 1, 1, 0, 1 / 2, 1 / 2], + ]) + + htp = heuristic_payoff_table.from_elo_scores(elo_scores) + + np.testing.assert_array_almost_equal( + utils.sort_rows_lexicographically(expected), + utils.sort_rows_lexicographically(htp()), + verbose=True) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/egt/utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/egt/utils.py new file mode 100644 index 0000000..849fc39 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/egt/utils.py @@ -0,0 +1,499 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utils for evolutionary game theoretic analysis of games.""" + +import itertools +import math + +import numpy as np + +import pyspiel + + +def n_choose_k(n, k): + """Returns the combination choose k among n items.""" + f = math.factorial + return int(f(n) / f(k) / f(n - k)) + + +def grid_simplex(step=.1, boundary=False): + """Generator for regular 'lattice' on the 2-simplex. + + Args: + step: Defines spacing along one dimension. + boundary: Include points on the boundary/face of the simplex. + + Yields: + Next point on the grid. + """ + eps = 1e-8 + start = 0. if boundary else step + stop = 1. + eps if boundary else 1. - step + eps + for a in np.arange(start, stop, step, dtype=np.double): + for b in np.arange(start, stop - a, step, dtype=np.double): + yield [a, b, 1. - a - b] + + +def sample_from_simplex(n, dim=3, vmin=0.): + """Samples random points from a k-simplex. + + See Donald B. Rubin (1981) "The Bayesian Bootstrap", page 131. + + Args: + n: Number of points that are sampled. + dim: Dimension of the points to be sampled, e.g. dim=3 samples points from + the 2-simplex. + vmin: Minimum value of any coordinate of the resulting points, e.g. set + vmin>0. to exclude points on the faces of the simplex. + + Returns: + `ndarray(shape=(k, dim))` of uniformly random points on the (num-1)-simplex. + """ + assert vmin >= 0. + p = np.random.rand(n, dim - 1) + p = np.sort(p, axis=1) + p = np.hstack((np.zeros((n, 1)), p, np.ones((n, 1)))) + return (p[:, 1:] - p[:, 0:-1]) * (1 - 2 * vmin) + vmin + + +def game_payoffs_array(game): + """Returns a `numpy.ndarray` of utilities for a game. + + NOTE: if the game is not a MatrixGame or a TensorGame then this may be costly. + + Args: + game: A game. + + Returns: + `numpy.ndarray` of dimension `num_players` + 1. + First dimension is the player, followed by the actions of all players, e.g. + a 3x3 game (2 players) has dimension [2,3,3]. + """ + if isinstance(game, pyspiel.MatrixGame): + return np.stack([game.row_utilities(), game.col_utilities()]) + + if not isinstance(game, pyspiel.TensorGame): + game = pyspiel.extensive_to_tensor_game(game) + return np.stack( + [game.player_utilities(player) for player in range(game.num_players())]) + + +def distribute(num_items, num_slots, normalize=False): + """Yields all ways of distributing `num_items` items over `num_slots` slots. + + We assume that the ordering of the slots doesn't matter. + + Args: + num_items: The number of items to distribute. + num_slots: The number of slots. + normalize: Normalizes the yielded tuple to contain floats in [0, 1] summing + to 1. + + Yields: + A tuple T containing `num_slots` positive integers such that + `np.sum(T) == num_items` if `normalize == False` or `np.sum(T) == 1` if + `normalize == True'. + """ + normalization = 1 + if normalize: + normalization = num_items + # This is just the standard "bars and stars" problem. + # See https://stackoverflow.com/questions/28965734/general-bars-and-stars. + for c in itertools.combinations( + range(num_items + num_slots - 1), num_slots - 1): + # The combinations give you the indices of the internal bars. + # pylint: disable=g-complex-comprehension + yield tuple((b - a - 1) / normalization + for (a, b) in zip([ + -1, + ] + list(c), + list(c) + [num_items + num_slots - 1])) + + +def assert_is_1d_numpy_array(array): + if not isinstance(array, np.ndarray): + raise ValueError("The argument must be a numpy array, not a {}.".format( + type(array))) + + if len(array.shape) != 1: + raise ValueError( + "The argument must be 1-dimensional, not of shape {}.".format( + array.shape)) + + +def assert_probabilities(array): + if not all([item >= 0 for item in array]): + raise ValueError("The vector must have all elements >= 0 items, not" + "{}".format(array)) + sum_ = np.sum(array) + if not np.isclose(1, sum_): + raise ValueError( + "The sum of the probabilities must be 1, not {}".format(sum_)) + + +def sort_rows_lexicographically(array): + """Returns a numpy array with lexicographic-ordered rows. + + This function can be used to check that 2 Heuristic Payoff Tables are equal, + by normalizing them using a fixed ordering of the rows. + + Args: + array: The 2D numpy array to sort by rows. + """ + return np.array(sorted(array.tolist())) + + +def get_valid_next_profiles(num_strats_per_population, cur_profile): + """Generates monomorphic strategy profile transitions given cur_profile. + + Given a current strategy profile, cur_profile, this generates all follow-up + profiles that involve only a single other population changing its current + monomorphic strategy to some other monomorphic strategy. Note that + self-transitions from cur_profile to cur_profile are not included here, as + they are a special case in our Markov chain. + + Args: + num_strats_per_population: List of strategy sizes for each population. + cur_profile: Current strategy profile. + + Yields: + The next valid strategy profile transition. + """ + num_populations = len(num_strats_per_population) + + for i_population_to_change in range(num_populations): + for new_strat in range(num_strats_per_population[i_population_to_change]): + # Ensure a transition will actually happen + if new_strat != cur_profile[i_population_to_change]: + next_profile = cur_profile.copy() + next_profile[i_population_to_change] = new_strat + yield i_population_to_change, next_profile + + +def get_num_strats_per_population(payoff_tables, payoffs_are_hpt_format): + """Returns a [num_populations] array of the num. + + of strategies per population. + + E.g., for a 3 population game, this returns + [num_strats_population1, num_strats_population2, num_strats_population3] + + Args: + payoff_tables: List of game payoff tables, one for each agent identity. Each + payoff_table may be either a 2D numpy array, or a _PayoffTableInterface + object. + payoffs_are_hpt_format: True indicates HPT format (i.e. + _PayoffTableInterface object, False indicates 2D numpy array. + """ + + if payoffs_are_hpt_format: + return np.asarray( + [payoff_table.num_strategies for payoff_table in payoff_tables]) + else: + # Non-HPT payoffs are matrices, so can directly return the payoff size + return np.asarray(np.shape(payoff_tables[0])) + + +def get_num_profiles(num_strats_per_population): + """Returns the total number of pure strategy profiles. + + Args: + num_strats_per_population: A list of size `num_populations` of the number of + strategies per population. + + Returns: + The total number of pure strategy profiles. + """ + return np.prod(num_strats_per_population) + + +def get_strat_profile_labels(payoff_tables, payoffs_are_hpt_format): + """Returns strategy labels corresponding to a payoff_table. + + Namely, for games where strategies have no human-understandable labels + available, this function returns a labels object corresponding to the + strategy profiles. + + Examples: + Generated labels for a single-population game with 3 strategies: + ['0','1','2']. + Generated labels for a 3-population game with 2 strategies per population: + {0: ['0','1'], 1: ['0','1'], 2: ['0','1']} + + Args: + payoff_tables: List of game payoff tables, one for each agent identity. Each + payoff_table may be either a 2D numpy array, or a _PayoffTableInterface + object. + payoffs_are_hpt_format: Boolean indicating whether each payoff table in + payoff_tables is a 2D numpy array, or a _PayoffTableInterface object (AKA + Heuristic Payoff Table or HPT). True indicates HPT format, False indicates + 2D numpy array. + + Returns: + Strategy labels. + """ + + num_populations = len(payoff_tables) + + if num_populations == 1: + num_strats_per_population = get_num_strats_per_population( + payoff_tables, payoffs_are_hpt_format) + labels = [str(x) for x in range(num_strats_per_population[0])] + else: + num_strats_per_population = get_num_strats_per_population( + payoff_tables, payoffs_are_hpt_format) + labels = dict() + label_text = [] + # Construct a list of strategy labels for each population + for num_strats in num_strats_per_population: + label_text.append([str(i_strat) for i_strat in range(num_strats)]) + population_ids = range(num_populations) + labels = dict(zip(population_ids, label_text)) + + return labels + + +def get_strat_profile_from_id(num_strats_per_population, profile_id): + """Returns the strategy profile corresponding to a requested strategy ID. + + This is the inverse of the function get_id_from_strat_profile(). See that + function for the indexing mechanism. + + Args: + num_strats_per_population: List of strategy sizes for each population. + profile_id: Integer ID of desired strategy profile, in + {0,...,get_num_profiles-1}. + + Returns: + The strategy profile whose ID was looked up. + """ + + num_populations = len(num_strats_per_population) + strat_profile = np.zeros(num_populations, dtype=np.int32) + + for i_population in range(num_populations - 1, -1, -1): + strat_profile[i_population] = ( + profile_id % num_strats_per_population[i_population]) + profile_id = profile_id // num_strats_per_population[i_population] + + return strat_profile + + +def get_label_from_strat_profile(num_populations, strat_profile, strat_labels): + """Returns a human-readable label corresponding to the strategy profile. + + E.g., for Rock-Paper-Scissors, strategies 0,1,2 have labels "R","P","S". + For strat_profile (1,2,0,1), this returns "(P,S,R,P)". If strat_profile is a + single strategy (e.g., 0) this returns just its label (e.g., "R"). + + Args: + num_populations: Number of populations. + strat_profile: Strategy profile of interest. + strat_labels: Strategy labels. + + Returns: + Human-readable label string. + """ + if num_populations == 1: + return strat_labels[strat_profile] + else: + label = "(" + for i_population, i_strat in enumerate(strat_profile): + label += strat_labels[i_population][i_strat] + if i_population < len(strat_profile) - 1: + label += "," + label += ")" + return label + + +def get_id_from_strat_profile(num_strats_per_population, strat_profile): + """Returns a unique integer ID representing the requested strategy profile. + + Map any `strat_profile` (there are `np.prod(num_strats_per_population)` such + profiles) to {0,..., num_strat_profiles - 1}. + + The mapping is done using a usual counting strategy: With + num_strats_per_population = [a1, ..., a_n] + strat_profile = [b1, ..., b_n] + + we have + + id = b_1 + a1 * (b2 + a_2 * (b3 + a_3 *...)) + + + This is helpful for querying the element of our finite-population Markov + transition matrix that corresponds to a transition between a specific pair of + strategy profiles. + + Args: + num_strats_per_population: List of strategy sizes for each population. + strat_profile: The strategy profile (list of integers corresponding to the + strategy of each agent) whose ID is requested. + + Returns: + Unique ID of strat_profile. + """ + + if len(strat_profile) == 1: + return strat_profile[0] + + return strat_profile[-1] + (num_strats_per_population[-1] * + get_id_from_strat_profile( + num_strats_per_population[:-1], + strat_profile[:-1])) + + +def compute_payoff(row_profile, col_profile, row_payoff_table): + """Returns row's expected payoff in a bimatrix game. + + Args: + row_profile: Row's strategy profile. + col_profile: Column's strategy profile. + row_payoff_table: Row's payoff table. + """ + + return np.dot(np.dot(row_profile.T, row_payoff_table), col_profile) + + +def check_is_constant_sum(payoff_table, payoffs_are_hpt_format): + """Checks if single-population matrix game is constant-sum. + + Args: + payoff_table: Either a 2D numpy array, or a _PayoffTableInterface object. + payoffs_are_hpt_format: Boolean indicating whether payoff table is a + _PayoffTableInterface object (AKA Heuristic Payoff Table or HPT), or a 2D + numpy array. True indicates HPT, and False indicates numpy array. + + Returns: + is_constant_sum: Boolean, True if constant-sum game. + payoff_sum: Payoff sum if game is constant-sum, or None if not. + """ + + if payoffs_are_hpt_format: + payoff_sum_table = np.asarray(payoff_table._payoffs).sum(axis=1) # pylint: disable=protected-access + is_constant_sum = np.isclose( + payoff_sum_table, payoff_sum_table[0], atol=1e-14).all() + payoff_sum = payoff_sum_table[0] if is_constant_sum else None + else: + payoff_sum_table = payoff_table + payoff_table.T + is_constant_sum = np.isclose( + payoff_sum_table, payoff_sum_table[0, 0], atol=1e-14).all() + payoff_sum = payoff_sum_table[0, 0] if is_constant_sum else None + return is_constant_sum, payoff_sum + + +def cluster_strats(pi, matching_decimals=4): + """Clusters strategies using stationary distribution (pi) masses. + + Args: + pi: stationary distribution. + matching_decimals: the number of stationary distribution decimals that + should match for strategies to be considered in the same cluster. + + Returns: + Dictionary that maps unique stationary distribution masses to strategies. + """ + + rounded_masses = pi.round(decimals=matching_decimals) + masses_to_strats = {} + for i in np.unique(rounded_masses): + masses_to_strats[i] = np.where(rounded_masses == i)[0] + return masses_to_strats + + +def print_rankings_table(payoff_tables, + pi, + strat_labels, + num_top_strats_to_print=8): + """Prints nicely-formatted table of strategy rankings. + + Args: + payoff_tables: List of game payoff tables, one for each agent identity. Each + payoff_table may be either a 2D numpy array, or a _PayoffTableInterface + object. + pi: Finite-population Markov chain stationary distribution. + strat_labels: Strategy labels. + num_top_strats_to_print: Number of top strategies to print. + """ + + num_populations = len(payoff_tables) + payoffs_are_hpt_format = check_payoffs_are_hpt(payoff_tables) + num_strats_per_population = get_num_strats_per_population( + payoff_tables, payoffs_are_hpt_format) + + # More than total number of strats requested for printing, compute top and + # use an extra row to indicate additional strategies not shown. + row_for_lowrank_strats = True + if num_top_strats_to_print >= len(pi): + num_top_strats_to_print = len(pi) + row_for_lowrank_strats = False + + # Cluster strategies according to stationary distr. (in case of tied ranks) + masses_to_strats = cluster_strats(pi) + + def print_3col(col1, col2, col3): + print("%-12s %-12s %-12s" % (col1, col2, col3)) + + print_3col("Agent", "Rank", "Score") + print_3col("-----", "----", "-----") + + rank = 1 + num_strats_printed = 0 + # Print a table of strategy rankings from highest to lowest mass + for _, strats in sorted(masses_to_strats.items(), reverse=True): + for strat in strats: + if num_strats_printed >= num_top_strats_to_print: + break + rounded_pi = np.round(pi[strat], decimals=2) + if num_populations == 1: + strat_profile = strat + else: + strat_profile = get_strat_profile_from_id(num_strats_per_population, + strat) + label = get_label_from_strat_profile(num_populations, strat_profile, + strat_labels) + print_3col(label, str(rank), str(np.abs(rounded_pi))) + num_strats_printed += 1 + rank += 1 + if num_strats_printed >= num_top_strats_to_print: + break + + # Ellipses to signify additional low-rank strategies are not printed + if row_for_lowrank_strats: + print_3col("...", "...", "...") + + +def is_symmetric_matrix_game(payoff_tables): + """Checks if payoff_tables corresponds to a symmetric matrix game.""" + payoffs_are_hpt_format = check_payoffs_are_hpt(payoff_tables) + + if len(payoff_tables) == 2: + if payoffs_are_hpt_format and np.array_equal(payoff_tables[0](), + payoff_tables[1]()): + return True, [payoff_tables[0]] + elif ~payoffs_are_hpt_format and np.array_equal(payoff_tables[0], + payoff_tables[1].T): + return True, [payoff_tables[0]] + return False, payoff_tables + + +def check_payoffs_are_hpt(payoff_tables): + """Returns True if payoffs are in HPT format.""" + if isinstance(payoff_tables[0], np.ndarray): + return False + elif hasattr(payoff_tables[0], "is_hpt") and payoff_tables[0].is_hpt: + return True + else: + raise TypeError("payoff_tables should be a list of payoff matrices/hpts.") diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/egt/utils_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/egt/utils_test.py new file mode 100644 index 0000000..90beaeb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/egt/utils_test.py @@ -0,0 +1,195 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.egt.utils.""" + +import itertools +from absl.testing import absltest + +from absl.testing import parameterized +import numpy as np + +from open_spiel.python.egt import utils +import pyspiel + + +def _generate_prob_profiles(num_items, num_slots): + """Another implementation of `distribution` for test purposes. + + This function is the original implementation from Karl. jblespiau@ find it + useful to add it here as: 1) an additional test of our function 2) a check + that the initial code is correct too. + + Args: + num_items: The number of items to distribute. + num_slots: The number of slots. + + Returns: + A numpy array of shape [num_distributions, num_slots]. + """ + if num_slots == 1: + return np.array([num_items]) + + num_rows = utils.n_choose_k(num_items + num_slots - 1, num_items) + distributions = np.empty([num_rows, num_slots]) + + ind = 0 + for i in range(0, num_items + 1): + n_tmp = num_items - i + k_tmp = num_slots - 1 + distributions_tmp = _generate_prob_profiles(n_tmp, k_tmp) + distributions[ind:ind + + np.shape(distributions_tmp)[0], :] = np.column_stack( + (np.array((np.ones(np.shape(distributions_tmp)[0]) * i)), + distributions_tmp)) + ind = ind + np.shape(distributions_tmp)[0] + + return distributions + + +class UtilsTest(parameterized.TestCase): + + @parameterized.parameters( + (5, 3, False), + (2, 2, True), + ) + def test_distribution(self, num_items, num_slots, normalize): + distribution = list(utils.distribute(num_items, num_slots, normalize)) + # Correct length. + # See https://en.wikipedia.org/wiki/Stars_and_bars_%28combinatorics%29. + self.assertLen(distribution, + utils.n_choose_k(num_items + num_slots - 1, num_items)) + # No duplicates. + self.assertLen(distribution, len(set(distribution))) + sum_distribution = num_items if not normalize else 1 + for d in distribution: + self.assertTrue(sum_distribution, sum(d)) + self.assertTrue((np.asarray(d) >= 0).all()) + + @parameterized.parameters( + (5, 3), + (2, 2), + (3, 3), + (10, 5), + ) + def test_distribution_equivalent_implementation(self, num_items, num_slots): + dist_list = list(utils.distribute(num_items, num_slots, normalize=False)) + distribution = np.vstack(dist_list) + + other_implementation = _generate_prob_profiles(num_items, num_slots) + np.testing.assert_array_equal( + utils.sort_rows_lexicographically(distribution), + utils.sort_rows_lexicographically(other_implementation)) + + def test_sort_rows_lexicographically(self): + array = np.asarray([ + [1, 1, 0], + [1, 2, 0], + [3, 1, 0], + [0, 0, 4], + ]) + expected = np.asarray([ + [0, 0, 4], + [1, 1, 0], + [1, 2, 0], + [3, 1, 0], + ]) + + np.testing.assert_equal(expected, utils.sort_rows_lexicographically(array)) + + def test_id_profile_mapping(self): + """Tests forward and backward mapping of pure strategy profiles to IDs.""" + + num_strats_per_population = np.asarray([4, 4, 4, 9]) + num_pure_profiles = np.prod(num_strats_per_population) + + strat_ranges = [ + range(num_strats) for num_strats in num_strats_per_population + ] + + id_list = [] + for strat_profile in itertools.product(strat_ranges[0], strat_ranges[1], + strat_ranges[2], strat_ranges[3]): + profile_id = utils.get_id_from_strat_profile(num_strats_per_population, + strat_profile) + id_list.append(profile_id) + + # Tests backward mapping (ID-to-profile lookup) + strat_profile_from_id = utils.get_strat_profile_from_id( + num_strats_per_population, profile_id) + np.testing.assert_array_equal(strat_profile, strat_profile_from_id) + + # Tests forward mapping (profile-to-ID lookup) + np.testing.assert_array_equal(id_list, range(num_pure_profiles)) + + def test_get_valid_next_profiles(self): + """Tests next-profile generator.""" + + num_strats_per_population = np.asarray([4, 5, 9, 7]) + cur_profile = np.asarray([1, 1, 2, 1]) + next_profiles = utils.get_valid_next_profiles(num_strats_per_population, + cur_profile) + + num_next_profiles = 0 + for _, _ in next_profiles: + num_next_profiles += 1 + + expected = (num_strats_per_population - 1).sum() + np.testing.assert_equal(expected, num_next_profiles) + + def test_constant_sum_checker(self): + """Tests if verification of constant-sum game is correct.""" + + game = pyspiel.load_matrix_game("matrix_rps") + payoff_tables = utils.game_payoffs_array(game) + payoffs_are_hpt_format = utils.check_payoffs_are_hpt(payoff_tables) + game_is_constant_sum, payoff_sum = utils.check_is_constant_sum( + payoff_tables[0], payoffs_are_hpt_format) + self.assertTrue(game_is_constant_sum) + self.assertEqual(payoff_sum, 0.) + + def test_game_payoffs_array_rps(self): + """Test `game_payoffs_array` for rock-paper-scissors.""" + game = pyspiel.load_matrix_game("matrix_rps") + payoff_matrix = np.empty(shape=(2, 3, 3)) + payoff_row = np.array([[0., -1., 1.], [1., 0., -1.], [-1., 1., 0.]]) + payoff_matrix[0] = payoff_row + payoff_matrix[1] = -1. * payoff_row + np.testing.assert_allclose(utils.game_payoffs_array(game), payoff_matrix) + + def test_game_payoffs_array_pd(self): + """Test `game_payoffs_array` for prisoners' dilemma.""" + game = pyspiel.load_matrix_game("matrix_pd") + payoff_matrix = np.empty(shape=(2, 2, 2)) + payoff_row = np.array([[5., 0.], [10., 1.]]) + payoff_matrix[0] = payoff_row + payoff_matrix[1] = payoff_row.T + np.testing.assert_allclose(utils.game_payoffs_array(game), payoff_matrix) + + @parameterized.parameters( + (100, 2, 0.), + (100, 3, 0.), + (100, 4, 0.), + (100, 2, 0.05), + ) + def test_sample_from_simplex(self, n, dim, vmin): + """Test `sample_from_simplex`.""" + x = utils.sample_from_simplex(n, dim=dim, vmin=vmin) + np.testing.assert_allclose(np.sum(x, axis=1), np.ones(n)) + self.assertTrue(np.all(x <= 1. - vmin)) + self.assertTrue(np.all(x >= vmin)) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/egt/visualization.py b/scenarios/bargaining/open_spiel/open_spiel/python/egt/visualization.py new file mode 100644 index 0000000..d05de20 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/egt/visualization.py @@ -0,0 +1,609 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Visualization for single/multi-population dynamics in normal-form games. + + Example: + + game = pyspiel.load_game("matrix_pd") + payoff_tensor = utils.game_payoffs_array(game) + dyn = dynamics.MultiPopulationDynamics(payoff_tensor, dynamics.replicator) + + ax = plt.subplot(projection="2x2") + ax.quiver(dyn) +""" + +from absl import logging + +# pylint: disable=g-import-not-at-top +try: + from matplotlib import axes + from matplotlib import projections + from matplotlib import transforms + from matplotlib import font_manager + from matplotlib import rcParams + from matplotlib.text import Text + from matplotlib.path import Path + from matplotlib.patches import PathPatch + from matplotlib.patches import FancyArrowPatch + from matplotlib.collections import LineCollection + import matplotlib.cm + import matplotlib.colors +except ImportError as e: + logging.info("If your tests failed with the error 'ImportError: No module " + "named functools_lru_cache', this is a known bug in matplotlib " + "and there is a workaround (run sudo apt install " + "python-backports.functools-lru-cache. See: " + "https://github.com/matplotlib/matplotlib/issues/9344.") + raise ImportError(str(e)) from e + +import numpy as np + +from open_spiel.python.egt import utils + + +def _eval_dynamics_2x2_grid(dynamics, num_points): + """Evaluates dynamics on a 2-D mesh-grid. + + Args: + dynamics: Population dynamics of type `dynamics.MultiPopulationDynamics`. + num_points: Number of points along each dimension of the grid. + + Returns: + Mesh-grid (x, y) and corresponding derivatives of the first action for + player 1 and 2 (u, v). + """ + assert dynamics.payoff_tensor.shape == (2, 2, 2) + + x = np.linspace(0., 1., num_points + 2)[1:-1] + x, y = np.meshgrid(x, x) + u = np.empty(x.shape) + v = np.empty(x.shape) + + for i in range(num_points): + for j in range(num_points): + row_state = np.array([x[i, j], 1. - x[i, j]]) + col_state = np.array([y[i, j], 1. - y[i, j]]) + state = np.concatenate((row_state, col_state)) + dstate = dynamics(state) + u[i][j] = dstate[0] + v[i][j] = dstate[2] + return x, y, u, v + + +def _rk12_step(func, y0, dt): + """Improved Euler-Integration step to integrate dynamics. + + Args: + func: Function handle to time derivative. + y0: Current state. + dt: Integration step. + + Returns: + Next state. + """ + dy = func(y0) + y_ = y0 + dt * dy + return y0 + dt / 2. * (dy + func(y_)) + + +class Dynamics2x2Axes(axes.Axes): + """Axes for 2x2 game dynamics. + + This class provides plotting functions for dynamics in two-player 2x2 games. + + Attributes: + name: Used for projection keyword when creating a new axes. + """ + name = "2x2" + + def cla(self): + """Clear the current axes.""" + super(Dynamics2x2Axes, self).cla() + self.set_aspect("equal") + self.set_xlim(0, 1) + self.set_ylim(0, 1) + + def quiver(self, + dynamics, + num_points=9, + normalize=False, + pivot="middle", + **kwargs): + """Visualizes the dynamics as a directional field plot. + + Args: + dynamics: Population dynamics of type `dynamics.MultiPopulationDynamics`. + num_points: Number of points along each dimension of the plot. + normalize: Normalize each arrow to unit-length. + pivot: In `{"tail", "middle", "tip"}`, optional, default: "middle". The + part of the arrow that is anchored to the X, Y grid. The arrow rotates + about this point. + **kwargs: Additional keyword arguments passed on to `Axes.quiver`. + + Returns: + The `quiver.Quiver` object created by calling `Axes.quiver`. + """ + x, y, u, v = _eval_dynamics_2x2_grid(dynamics, num_points) + + if normalize: + norm = np.sqrt(u**2 + v**2) + u = np.divide(u, norm, out=np.zeros_like(u), where=norm != 0) + v = np.divide(v, norm, out=np.zeros_like(v), where=norm != 0) + + return super(Dynamics2x2Axes, self).quiver( + x, y, u, v, pivot=pivot, **kwargs) + + def streamplot(self, + dynamics, + num_points=50, + linewidth=None, + color=None, + **kwargs): + """Visualizes the dynamics as a streamline plot. + + Args: + dynamics: Population dynamics of type `dynamics.MultiPopulationDynamics`. + num_points: Number of points along each dimension of the plot. + linewidth: In `{None, float, "velocity"}`, optional, default: None. If + `linewidth="velocity"`, line width is scaled by the velocity of the + dynamics. Defaults to `rcParams` if `linewidth=None`. + color: In `{None, string, (r,g,b), (r,g,b,a), "velocity"}`, default: None. + If `color="velocity"`, velocity of dynamics is used to color the + streamlines. Defaults to `rcParams` if `color=None`. + **kwargs: Additional keyword arguments passed on to `Axes.streamplot`. + + Returns: + The `streamplot.StreamplotSet` created by calling `Axes.streamplot`. + """ + + x, y, u, v = _eval_dynamics_2x2_grid(dynamics, num_points) + + if linewidth == "velocity" or color == "velocity": + vel = np.sqrt(u**2 + v**2) + vel = vel - np.min(vel) + vel = vel / np.max(vel) + + if linewidth == "velocity": + linewidth = 3. * vel + + if color == "velocity": + color = vel + + return super(Dynamics2x2Axes, self).streamplot( + x, y, u, v, minlength=0.1, linewidth=linewidth, color=color, **kwargs) + + +projections.register_projection(Dynamics2x2Axes) + + +class SimplexTransform(transforms.Transform): + """Affine transform to project the 2-simplex to 2D Cartesian space.""" + input_dims = 3 + output_dims = 2 + + _MATRIX = np.array([[0., 0.], [1., 0.], [0.5, np.sqrt(3) / 2.]]) + + def transform_affine(self, values): + return np.matmul(values, SimplexTransform._MATRIX) + + +class SimplexStreamMask(object): + """Mask of regular discrete cells to track trajectories/streamlines. + + Also see `matplotlib.streamplot.StreamMask`. + """ + + def __init__(self, density=1.): + self._n = int(30. * density) + self._mask = np.zeros([self._n + 1] * 2 + [2], dtype=bool) + self.shape = self._mask.shape + + def index(self, point): + """Computes index given a point on the simplex.""" + point = np.array(point) + idx = np.floor(point[:2] * self._n).astype(int) + x, y = point[:2] * self._n - idx + z = int(x + y > 1) + return tuple(idx.tolist() + [z]) + + def point(self, index): + """Computes point on the simplex given an index.""" + p = np.empty((3,)) + p[0] = (index[0] + (1 + index[2]) / 3.) / float(self._n) + p[1] = (index[1] + (1 + index[2]) / 3.) / float(self._n) + p[2] = 1. - p[0] - p[1] + return p if p[2] > 0. else None + + def __getitem__(self, point): + return self._mask.__getitem__(self.index(point)) + + def __setitem__(self, point, val): + return self._mask.__setitem__(self.index(point), val) + + +class Dynamics3x3Axes(axes.Axes): + """Axes for 3x3 game dynamics. + + This class provides plotting functions for dynamics in symmetric 3x3 games. + + Attributes: + name: Used for projection keyword when creating a new axes. + """ + name = "3x3" + _VERTICES = [[1, 0, 0], [0, 1, 0], [0, 0, 1]] + + def __init__(self, fig, rect, *args, **kwargs): + self._simplex_transform = SimplexTransform() + self._labels = None + super(axes.Axes, self).__init__(fig, rect, *args, **kwargs) + + def cla(self): + """Clear the current axes.""" + super(axes.Axes, self).cla() + self.set_aspect("equal") + self.get_xaxis().set_visible(False) + self.get_yaxis().set_visible(False) + self.patch.set_visible(False) + self.set_frame_on(False) + + # draw invisiple vertices to set x/y limits of plot + self.scatter(Dynamics3x3Axes._VERTICES, alpha=0.) + self.margins(0.15) + + self.bgpatch = self._create_bgpatch( + facecolor=rcParams["axes.facecolor"], + edgecolor=rcParams["axes.edgecolor"], + linewidth=rcParams["axes.linewidth"], + zorder=-1) + self.add_artist(self.bgpatch) + + if rcParams["axes.grid"]: + self.grid = self._create_grid( + color=rcParams["grid.color"], + alpha=rcParams["grid.alpha"], + linestyle=rcParams["grid.linestyle"], + linewidth=rcParams["grid.linewidth"], + zorder=0) + self.add_collection(self.grid) + + self.ticks, self.tick_labels = self._create_ticks( + color=rcParams["xtick.color"], zorder=0) + self.add_collection(self.ticks) + for label in self.tick_labels: + self.add_artist(label) + + def _create_bgpatch(self, **kwargs): + codes = [Path.MOVETO] + [Path.LINETO] * 2 + [Path.CLOSEPOLY] + vertices = self._VERTICES + [self._VERTICES[0]] + vertices = self._simplex_transform.transform(np.array(vertices)) + return PathPatch(Path(vertices, codes), **kwargs) + + def _create_grid(self, step=0.2, **kwargs): + x = np.arange(step, 1., step) + n = x.shape[0] + line_start, line_end = np.zeros((n, 3)), np.zeros((n, 3)) + line_start[:, 0] = line_end[::-1, 1] = x + line_start[:, 2] = line_end[::-1, 0] = 1. - x + segs = np.zeros((3 * n, 2, 2)) + for i, perm in enumerate([(0, 2, 1), (1, 0, 2), (2, 1, 0)]): + start = self._simplex_transform.transform(line_start[:, perm]) + end = self._simplex_transform.transform(line_end[:, perm]) + segs[i * n:(i + 1) * n, 0, :], segs[i * n:(i + 1) * n, 1, :] = start, end + line_segments = LineCollection(segs, **kwargs) + return line_segments + + def _create_ticks(self, step=0.2, tick_length=0.025, **kwargs): + x = np.arange(step, 1., step) + n = x.shape[0] + + tick_start, tick_end = np.zeros((n, 3)), np.zeros((n, 3)) + tick_start[:, 0] = x + tick_start[:, 2] = 1. - x + tick_end[:, 0] = x + tick_end[:, 2] = 1. - x + tick_length + tick_end[:, 1] = -tick_length + + tick_labels = [] + ha = ["center", "left", "right"] + va = ["top", "bottom", "center"] + rot = [-60, 60, 0] + + segs = np.zeros((n * 3, 2, 2)) + for i, perm in enumerate([(0, 2, 1), (1, 0, 2), (2, 1, 0)]): + start = self._simplex_transform.transform(tick_start[:, perm]) + end = self._simplex_transform.transform(tick_end[:, perm]) + segs[i * n:(i + 1) * n, 0, :], segs[i * n:(i + 1) * n, 1, :] = start, end + + for j, x_ in enumerate(x): + tick_labels.append( + Text( + end[j, 0], + end[j, 1], + "{0:.1f}".format(x_), + horizontalalignment=ha[i], + verticalalignment=va[i], + rotation=rot[i], + color=kwargs["color"], + fontsize=rcParams["xtick.labelsize"])) + line_segments = LineCollection(segs, **kwargs) + return line_segments, tick_labels + + def _create_labels(self, labels, padding): + artists = [] + aligns = ["top", "top", "bottom"] + for label, pos, align in zip(labels, self._VERTICES, aligns): + x, y = self._simplex_transform.transform(pos) + labelpad = padding if align == "bottom" else -padding + label = Text( + x=x, + y=y + labelpad, + text=label, + fontproperties=font_manager.FontProperties( + size=rcParams["axes.labelsize"], + weight=rcParams["axes.labelweight"]), + color=rcParams["axes.labelcolor"], + verticalalignment=align, + horizontalalignment="center") + artists.append(label) + return artists + + def get_labels(self): + return self._labels + + def set_labels(self, labels, padding=0.02): + assert len(labels) == 3 + if self._labels is None: + self._labels = self._create_labels(labels, padding) + for label in self._labels: + self.add_artist(label) + else: + for artist, label in zip(self._labels, labels): + artist.set_text(label) + + labels = property(get_labels, set_labels) + + def can_zoom(self): + return False + + def can_pan(self): + return False + + def plot(self, points, **kwargs): + """Creates a line plot. + + Args: + points: Points in policy space. + **kwargs: Additional keyword arguments passed on to `Axes.plot`. + + Returns: + The line plot. + """ + points = np.array(points) + assert points.shape[1] == 3 + points = self._simplex_transform.transform(points) + return super(Dynamics3x3Axes, self).plot(points[:, 0], points[:, 1], + **kwargs) + + def scatter(self, points, **kwargs): + """Creates a scatter plot. + + Args: + points: Points in policy space. + **kwargs: Additional keyword arguments passed on to `Axes.scatter`. + + Returns: + The scatter plot. + """ + points = np.array(points) + assert points.shape[1] == 3 + points = self._simplex_transform.transform(points) + return super(Dynamics3x3Axes, self).scatter(points[:, 0], points[:, 1], + **kwargs) + + def quiver(self, + dynamics, + step=0.05, + boundary=False, + normalize=False, + pivot="middle", + **kwargs): + """Visualizes the dynamics as a directional field plot. + + Args: + dynamics: Population dynamics of type `dynamics.SinglePopulationDynamics`. + step: Distance between arrows along one dimension. + boundary: Include arrows on the boundary/face of the simplex. + normalize: Normalize each arrow to unit-length. + pivot: In `{"tail", "middle", "tip"}`, optional, default: "middle". The + part of the arrow that is anchored to the X, Y grid. The arrow rotates + about this point. + **kwargs: Additional keyword arguments passed on to `Axes.quiver`. + + Returns: + The `quiver.Quiver` object created by calling `Axes.quiver`. + """ + x = np.array([x for x in utils.grid_simplex(step=step, boundary=boundary)]) + dx = np.apply_along_axis(dynamics, 1, x) + + p = self._simplex_transform.transform(x) + v = self._simplex_transform.transform(dx) + + x, y = p[:, 0], p[:, 1] + u, v = v[:, 0], v[:, 1] + + if normalize: + norm = np.sqrt(u**2 + v**2) + u, v = u / norm, v / norm + + if "pivot" not in kwargs: + kwargs["pivot"] = "middle" + + return super(Dynamics3x3Axes, self).quiver(x, y, u, v, **kwargs) + + def _linecollection(self, points, linewidth, color): + points = self._simplex_transform.transform(points).reshape(-1, 1, 2) + segments = np.concatenate([points[:-1], points[1:]], axis=1) + lc = LineCollection(segments, linewidths=linewidth, color=color) + return lc + + def _integrate(self, x, func, mask, dt, min_dist=0.01): + cells = [] + trajectory = [x] + x_ = x + for dt in [dt, -dt]: + while not mask[x]: + cell = mask.index(x) + cells.append(cell) + while mask.index(x) == cell: + # integrate up to cell boundary + if np.sqrt(np.sum((x_ - x)**2)) > min_dist: + x_ = x + if dt > 0: + trajectory.append(x) + else: + trajectory.insert(0, x) + + x = _rk12_step(func, x, dt=dt) + + if dt > 0: + mask[trajectory[-1]] = True + else: + mask[trajectory[0]] = True + + # restore to integrate backwards + if dt > 0. and len(cells): + trajectory.append(_rk12_step(func, x, dt=-dt)) + mask[mask.point(cells[0])] = False + x = trajectory[0] + x_ = x + else: + trajectory.insert(0, _rk12_step(func, x, dt=-dt)) + return (np.array(trajectory), cells) if len(trajectory) > 2 else None + + def streamplot(self, + dynamics, + initial_points=None, + dt=0.01, + density=1., + min_length=0.4, + linewidth=None, + color="k", + **kwargs): + """Visualizes the dynamics as a streamline plot. + + Mimics the visuals of `Axes.streamplot` for simplex plots. + + Args: + dynamics: Population dynamics of type `dynamics.SinglePopulationDynamics`. + initial_points: Starting points for streamlines + dt: Integration step. + density: Controls the density of streamlines in the plot. + min_length: Streamlines with length < min_length will be discarded. + linewidth: In `{None, float, "velocity"}`, optional, default: None. If + `linewidth="velocity"`, line width is scaled by the velocity of the + dynamics. Defaults to `rcParams` if `linewidth=None`. + color: In `{None, string, (r,g,b), (r,g,b,a), "velocity"}`, default: None. + If `color="velocity"`, velocity of dynamics is used to color the + streamlines. Defaults to `rcParams` if `color=None`. + **kwargs: Additional keyword arguments passed on to `Axes.streamplot`. + + Returns: + The `SimplexStreamMask`. + """ + mask = SimplexStreamMask(density=density) + trajectories = [] + + if initial_points is None: + eps = 0.1 + initial_points = np.array([[1. - eps, eps / 2., eps / 2.], + [eps / 2., 1. - eps, eps / 2.], + [eps / 2., eps / 2., 1. - eps]]) + initial_points = np.vstack( + (initial_points, utils.sample_from_simplex(100))) + # TODO(author10): add heuristic for initial points + + else: + initial_points = np.array(initial_points) + assert initial_points.ndim == 2 + assert initial_points.shape[1] == 3 + + # generate trajectories + for p in initial_points: + # center initial point on grid cell + p = mask.point(mask.index(p)) + res = self._integrate(p, dynamics, mask, dt=dt) + if res is not None: + t, cells = res # pylint: disable=unpacking-non-sequence + cum_len = np.cumsum( + np.sqrt( + np.diff(t[:, 0])**2 + np.diff(t[:, 1])**2 + + np.diff(t[:, 2])**2)) + if cum_len[-1] < min_length: + for cell in cells: + mask[mask.point(cell)] = False + continue + trajectories.append(t) + + lc_color = arrow_color = color + lc_linewidth = linewidth + + if linewidth == "velocity" or color == "velocity": + vel_max = 0 + vel_min = float("inf") + velocities = [] + for t in trajectories: + dx = np.apply_along_axis(dynamics, 1, t) + vel = np.sqrt(np.sum(dx**2, axis=1)) + vel_max = max(np.max(vel), vel_max) + vel_min = min(np.min(vel), vel_min) + velocities.append(vel) + + # add trajectories to plot + for i, t in enumerate(trajectories): + cum_len = np.cumsum( + np.sqrt( + np.diff(t[:, 0])**2 + np.diff(t[:, 1])**2 + np.diff(t[:, 2])**2)) + mid_idx = np.searchsorted(cum_len, cum_len[-1] / 2.) + + if linewidth == "velocity" or color == "velocity": + vel = (velocities[i] - vel_min) / vel_max + + if linewidth == "velocity": + lc_linewidth = 3. * vel + 0.5 + + if color == "velocity": + cmap = matplotlib.cm.get_cmap(rcParams["image.cmap"]) + lc_color = cmap(vel) + arrow_color = cmap(vel[mid_idx]) + + lc = self._linecollection(t, linewidth=lc_linewidth, color=lc_color) + self.add_collection(lc) + + # add arrow centered on trajectory + arrow_tail = self._simplex_transform.transform(t[mid_idx - 1]) + arrow_head = self._simplex_transform.transform(t[mid_idx]) + arrow_kw = dict(arrowstyle="-|>", mutation_scale=10 * 1.) + arrow_patch = FancyArrowPatch( + arrow_tail, + arrow_head, + linewidth=None, + color=arrow_color, + zorder=3, + **arrow_kw) + self.add_patch(arrow_patch) + return mask + + +projections.register_projection(Dynamics3x3Axes) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/egt/visualization_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/egt/visualization_test.py new file mode 100644 index 0000000..0c13fe9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/egt/visualization_test.py @@ -0,0 +1,111 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.egt.visualization.""" + +from absl import logging +from absl.testing import absltest + +# pylint: disable=g-import-not-at-top +try: + from matplotlib.figure import Figure + from matplotlib.quiver import Quiver + from matplotlib.streamplot import StreamplotSet +except ImportError as e: + logging.info("If your tests failed with the error 'ImportError: No module " + "named functools_lru_cache', this is a known bug in matplotlib " + "and there is a workaround (run sudo apt install " + "python-backports.functools-lru-cache. See: " + "https://github.com/matplotlib/matplotlib/issues/9344.") + raise e + +import numpy as np + +from open_spiel.python.egt import dynamics +from open_spiel.python.egt import utils +from open_spiel.python.egt import visualization +import pyspiel + + +def _build_dynamics2x2(): + """Build multi-population dynamics.""" + game = pyspiel.load_game("matrix_pd") + payoff_tensor = utils.game_payoffs_array(game) + return dynamics.MultiPopulationDynamics(payoff_tensor, dynamics.replicator) + + +def _build_dynamics3x3(): + """Build single-population dynamics.""" + game = pyspiel.load_game("matrix_rps") + payoff_tensor = utils.game_payoffs_array(game) + return dynamics.SinglePopulationDynamics(payoff_tensor, dynamics.replicator) + + +def _identity_dynamics(x): + """Returns same input as output.""" + return x + + +class VisualizationTest(absltest.TestCase): + + def test_meshgrid(self): + n = 10 + payoff_tensor = np.ones(shape=(2, 2, 2)) + identity = lambda x, f: x + allzero = lambda x, f: np.zeros(x.shape) + dyn = dynamics.MultiPopulationDynamics(payoff_tensor, (identity, allzero)) + x, y, u, v = visualization._eval_dynamics_2x2_grid(dyn, n) + np.testing.assert_allclose(x, u) + np.testing.assert_allclose(v, np.zeros(shape=(n, n))) + + dyn = dynamics.MultiPopulationDynamics(payoff_tensor, (allzero, identity)) + x, y, u, v = visualization._eval_dynamics_2x2_grid(dyn, n) + np.testing.assert_allclose(u, np.zeros(shape=(n, n))) + np.testing.assert_allclose(y, v) + + def test_quiver2x2(self): + """Test 2x2 quiver plot.""" + dyn = _build_dynamics2x2() + fig = Figure(figsize=(4, 4)) + ax = fig.add_subplot(111, projection="2x2") + res = ax.quiver(dyn) + self.assertIsInstance(res, Quiver) + + def test_streamplot2x2(self): + """Test 2x2 quiver plot.""" + dyn = _build_dynamics2x2() + fig = Figure(figsize=(4, 4)) + ax = fig.add_subplot(111, projection="2x2") + res = ax.streamplot(dyn) + self.assertIsInstance(res, StreamplotSet) + + def test_quiver3x3(self): + """Test 3x3 quiver plot.""" + dyn = _build_dynamics3x3() + fig = Figure(figsize=(4, 4)) + ax = fig.add_subplot(111, projection="3x3") + res = ax.quiver(dyn) + self.assertIsInstance(res, Quiver) + + def test_streamplot3x3(self): + """Test 3x3 quiver plot.""" + dyn = _build_dynamics3x3() + fig = Figure(figsize=(4, 4)) + ax = fig.add_subplot(111, projection="3x3") + res = ax.streamplot(dyn) + self.assertIsInstance(res, visualization.SimplexStreamMask) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/environments/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/environments/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/environments/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/environments/catch.py b/scenarios/bargaining/open_spiel/open_spiel/python/environments/catch.py new file mode 100644 index 0000000..0367271 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/environments/catch.py @@ -0,0 +1,183 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Catch reinforcement learning environment.""" + +import collections +import numpy as np + +from open_spiel.python import rl_environment + +# Actions +NOOP = 0 +LEFT = 1 +RIGHT = 2 + +_Point = collections.namedtuple("Point", ["x", "y"]) + + +class Environment(object): + """A catch reinforcement learning environment. + + The implementation considers illegal actions: trying to move the paddle in the + wall direction when next to a wall will incur in an invalid action and an + error will be purposely raised. + """ + + def __init__(self, discount=1.0, width=5, height=10, seed=None): + self._rng = np.random.RandomState(seed) + self._width = width + self._height = height + self._should_reset = True + self._num_actions = 3 + + # Discount returned at non-initial steps. + self._discounts = [discount] * self.num_players + + def reset(self): + """Resets the environment.""" + self._should_reset = False + self._ball_pos = _Point(x=self._rng.randint(0, self._width - 1), y=0) + self._paddle_pos = _Point( + x=self._rng.randint(0, self._width - 1), y=self._height - 1) + + legal_actions = [NOOP] + if self._paddle_pos.x > 0: + legal_actions.append(LEFT) + if self._paddle_pos.x < self._width - 1: + legal_actions.append(RIGHT) + + observations = { + "info_state": [self._get_observation()], + "legal_actions": [legal_actions], + "current_player": 0, + } + + return rl_environment.TimeStep( + observations=observations, + rewards=None, + discounts=None, + step_type=rl_environment.StepType.FIRST) + + def step(self, actions): + """Updates the environment according to `actions` and returns a `TimeStep`. + + Args: + actions: A singleton list with an integer, or an integer, representing the + action the agent took. + + Returns: + A `rl_environment.TimeStep` namedtuple containing: + observation: singleton list of dicts containing player observations, + each corresponding to `observation_spec()`. + reward: singleton list containing the reward at this timestep, or None + if step_type is `rl_environment.StepType.FIRST`. + discount: singleton list containing the discount in the range [0, 1], or + None if step_type is `rl_environment.StepType.FIRST`. + step_type: A `rl_environment.StepType` value. + """ + if self._should_reset: + return self.reset() + + if isinstance(actions, list): + action = actions[0] + elif isinstance(actions, int): + action = actions + else: + raise ValueError("Action not supported.", actions) + + # Update paddle position + x, y = self._paddle_pos.x, self._paddle_pos.y + if action == LEFT: + x -= 1 + elif action == RIGHT: + x += 1 + elif action != NOOP: + raise ValueError("unrecognized action ", action) + + assert 0 <= x < self._width, ( + "Illegal action detected ({}), new state: ({},{})".format(action, x, y)) + self._paddle_pos = _Point(x, y) + + # Update ball position + x, y = self._ball_pos.x, self._ball_pos.y + if y == self._height - 1: + done = True + reward = 1.0 if x == self._paddle_pos.x else -1.0 + else: + done = False + y += 1 + reward = 0.0 + self._ball_pos = _Point(x, y) + + # Return observation + step_type = ( + rl_environment.StepType.LAST if done else rl_environment.StepType.MID) + self._should_reset = step_type == rl_environment.StepType.LAST + + legal_actions = [NOOP] + if self._paddle_pos.x > 0: + legal_actions.append(LEFT) + if self._paddle_pos.x < self._width - 1: + legal_actions.append(RIGHT) + + observations = { + "info_state": [self._get_observation()], + "legal_actions": [legal_actions], + "current_player": 0, + } + + return rl_environment.TimeStep( + observations=observations, + rewards=[reward], + discounts=self._discounts, + step_type=step_type) + + def _get_observation(self): + board = np.zeros((self._height, self._width), dtype=np.float32) + board[self._ball_pos.y, self._ball_pos.x] = 1.0 + board[self._paddle_pos.y, self._paddle_pos.x] = 1.0 + return board.flatten() + + def observation_spec(self): + """Defines the observation provided by the environment. + + Each dict member will contain its expected structure and shape. + + Returns: + A specification dict describing the observation fields and shapes. + """ + return dict( + info_state=tuple([self._height * self._width]), + legal_actions=(self._num_actions,), + current_player=(), + ) + + def action_spec(self): + """Defines action specifications. + + Specifications include action boundaries and their data type. + + Returns: + A specification dict containing action properties. + """ + return dict(num_actions=self._num_actions, min=0, max=2, dtype=int) + + @property + def num_players(self): + return 1 + + @property + def is_turn_based(self): + return False diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/environments/catch_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/environments/catch_test.py new file mode 100644 index 0000000..233fa99 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/environments/catch_test.py @@ -0,0 +1,85 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.environment.catch.""" + +import random +from absl.testing import absltest + +from open_spiel.python import rl_environment +from open_spiel.python.environments import catch + + +def _select_random_legal_action(time_step): + cur_legal_actions = time_step.observations["legal_actions"][0] + action = random.choice(cur_legal_actions) + return action + + +class CatchEnvTest(absltest.TestCase): + + def test_obs_spec(self): + env = catch.Environment() + obs_specs = env.observation_spec() + self.assertLen(obs_specs, 3) + self.assertCountEqual(obs_specs.keys(), + ["current_player", "info_state", "legal_actions"]) + + def test_action_spec(self): + env = catch.Environment() + action_spec = env.action_spec() + self.assertLen(action_spec, 4) + self.assertCountEqual(action_spec.keys(), + ["dtype", "max", "min", "num_actions"]) + self.assertEqual(action_spec["num_actions"], 3) + self.assertEqual(action_spec["dtype"], int) + + def test_action_interfaces(self): + env = catch.Environment(height=2) + time_step = env.reset() + + # Singleton list works + action_list = [0] + time_step = env.step(action_list) + self.assertEqual(time_step.step_type, rl_environment.StepType.MID) + + # Integer works + action_int = 0 + time_step = env.step(action_int) + self.assertEqual(time_step.step_type, rl_environment.StepType.LAST) + + def test_many_runs(self): + random.seed(123) + for _ in range(20): + height = random.randint(2, 10) + env = catch.Environment(height=height) + + time_step = env.reset() + self.assertEqual(time_step.step_type, rl_environment.StepType.FIRST) + self.assertIsNone(time_step.rewards) + + action_int = _select_random_legal_action(time_step) + time_step = env.step(action_int) + self.assertEqual(time_step.step_type, rl_environment.StepType.MID) + self.assertEqual(time_step.rewards, [0]) + + for _ in range(1, height): + action_int = _select_random_legal_action(time_step) + time_step = env.step(action_int) + self.assertEqual(time_step.step_type, rl_environment.StepType.LAST) + self.assertIn(time_step.rewards[0], [-1, 0, 1]) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/environments/cliff_walking.py b/scenarios/bargaining/open_spiel/open_spiel/python/environments/cliff_walking.py new file mode 100644 index 0000000..7f18e5b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/environments/cliff_walking.py @@ -0,0 +1,195 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A cliff walking single agent reinforcement learning environment.""" + +import numpy as np + +from open_spiel.python import rl_environment + +# Actions +RIGHT, UP, LEFT, DOWN = range(4) + + +class Environment(object): + r"""A cliff walking reinforcement learning environment. + + This is a deterministic environment that can be used to test RL algorithms. + Note there are *no illegal moves* in this environment--if the agent is on the + edge of the cliff and takes an action which would yield an invalid position, + the action is ignored (as if there were walls surrounding the cliff). + + Cliff example for height=3 and width=5: + + | | | | | | + | | | | | | + | S | x | x | x | G | + + where `S` is always the starting position, `G` is always the goal and `x` + represents the zone of high negative reward to be avoided. For this instance, + the optimum policy is depicted as follows: + + | | | | | | + |-->|-->|-->|-->|\|/| + |/|\| x | x | x | G | + + yielding a reward of -6 (minus 1 per time step). + + See pages 132 of Rich Sutton's book for details: + http://www.incompleteideas.net/book/bookdraft2018mar21.pdf + """ + + def __init__(self, height=4, width=8, discount=1.0, max_t=100): + if height < 2 or width < 3: + raise ValueError("height must be >= 2 and width >= 3.") + self._height = height + self._width = width + self._legal_actions = [RIGHT, UP, LEFT, DOWN] + self._should_reset = True + self._max_t = max_t + + # Discount returned at non-initial steps. + self._discounts = [discount] * self.num_players + + def reset(self): + """Resets the environment.""" + self._should_reset = False + self._time_counter = 0 + self._state = np.array([self._height - 1, 0]) + + observations = { + "info_state": [self._state.copy()], + "legal_actions": [self._legal_actions], + "current_player": 0, + } + + return rl_environment.TimeStep( + observations=observations, + rewards=None, + discounts=None, + step_type=rl_environment.StepType.FIRST) + + def step(self, actions): + """Updates the environment according to `actions` and returns a `TimeStep`. + + Args: + actions: A singleton list with an integer, or an integer, representing the + action the agent took. + + Returns: + A `rl_environment.TimeStep` namedtuple containing: + observation: singleton list of dicts containing player observations, + each corresponding to `observation_spec()`. + reward: singleton list containing the reward at this timestep, or None + if step_type is `rl_environment.StepType.FIRST`. + discount: singleton list containing the discount in the range [0, 1], or + None if step_type is `rl_environment.StepType.FIRST`. + step_type: A `rl_environment.StepType` value. + """ + if self._should_reset: + return self.reset() + self._time_counter += 1 + + if isinstance(actions, list): + action = actions[0] + elif isinstance(actions, int): + action = actions + else: + raise ValueError("Action not supported.", actions) + + dx = 0 + dy = 0 + if action == LEFT: + dx -= 1 + elif action == RIGHT: + dx += 1 + + if action == UP: + dy -= 1 + elif action == DOWN: + dy += 1 + + self._state += np.array([dy, dx]) + self._state = self._state.clip(0, [self._height - 1, self._width - 1]) + + done = self._is_pit(self._state) or self._is_goal(self._state) + done = done or self._time_counter >= self._max_t + # Return observation + step_type = ( + rl_environment.StepType.LAST if done else rl_environment.StepType.MID) + self._should_reset = step_type == rl_environment.StepType.LAST + + observations = { + "info_state": [self._state.copy()], + "legal_actions": [self._legal_actions], + "current_player": 0, + } + + return rl_environment.TimeStep( + observations=observations, + rewards=[self._get_reward(self._state)], + discounts=self._discounts, + step_type=step_type) + + def _is_goal(self, pos): + """Check if position is bottom right corner of grid.""" + return pos[0] == self._height - 1 and pos[1] == self._width - 1 + + def _is_pit(self, pos): + """Check if position is in bottom row between start and goal.""" + return (pos[1] > 0 and pos[1] < self._width - 1 and + pos[0] == self._height - 1) + + def _get_reward(self, pos): + if self._is_pit(pos): + return -100.0 + else: + return -1.0 + + def observation_spec(self): + """Defines the observation provided by the environment. + + Each dict member will contain its expected structure and shape. + + Returns: + A specification dict describing the observation fields and shapes. + """ + return dict( + info_state=tuple([2]), + legal_actions=(len(self._legal_actions),), + current_player=(), + ) + + def action_spec(self): + """Defines action specifications. + + Specifications include action boundaries and their data type. + + Returns: + A specification dict containing action properties. + """ + return dict( + num_actions=len(self._legal_actions), + min=min(self._legal_actions), + max=max(self._legal_actions), + dtype=int, + ) + + @property + def num_players(self): + return 1 + + @property + def is_turn_based(self): + return False diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/environments/cliff_walking_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/environments/cliff_walking_test.py new file mode 100644 index 0000000..827e160 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/environments/cliff_walking_test.py @@ -0,0 +1,92 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.environment.cliff_walking.""" + +import random +from absl.testing import absltest +from open_spiel.python import rl_environment +from open_spiel.python.environments import cliff_walking + + +def _select_random_legal_action(time_step): + cur_legal_actions = time_step.observations["legal_actions"][0] + action = random.choice(cur_legal_actions) + return action + + +class CliffWalkingEnvTest(absltest.TestCase): + + def test_obs_spec(self): + env = cliff_walking.Environment() + obs_specs = env.observation_spec() + self.assertLen(obs_specs, 3) + self.assertCountEqual(obs_specs.keys(), + ["current_player", "info_state", "legal_actions"]) + self.assertEqual(obs_specs["info_state"], (2,)) + + def test_action_spec(self): + env = cliff_walking.Environment() + action_spec = env.action_spec() + self.assertLen(action_spec, 4) + self.assertCountEqual(action_spec.keys(), + ["dtype", "max", "min", "num_actions"]) + self.assertEqual(action_spec["num_actions"], 4) + self.assertEqual(action_spec["dtype"], int) + + def test_action_interfaces(self): + env = cliff_walking.Environment() + time_step = env.reset() + + # Singleton list works + action_list = [cliff_walking.UP] + time_step = env.step(action_list) + self.assertEqual(time_step.step_type, rl_environment.StepType.MID) + + # Integer works + action_int = cliff_walking.UP + time_step = env.step(action_int) + self.assertEqual(time_step.step_type, rl_environment.StepType.MID) + + def test_many_runs(self): + random.seed(1234) + for _ in range(30): + height = random.randint(3, 10) + width = random.randint(3, 10) + env = cliff_walking.Environment(height=height, width=width) + + time_step = env.reset() + self.assertEqual(time_step.step_type, rl_environment.StepType.FIRST) + self.assertIsNone(time_step.rewards) + + action_int = cliff_walking.UP + time_step = env.step(action_int) + self.assertEqual(time_step.step_type, rl_environment.StepType.MID) + self.assertEqual(time_step.rewards, [-1.0]) + + action_int = cliff_walking.RIGHT + for _ in range(1, width): + time_step = env.step(action_int) + self.assertEqual(time_step.step_type, rl_environment.StepType.MID) + self.assertEqual(time_step.rewards, [-1.0]) + + action_int = cliff_walking.DOWN + time_step = env.step(action_int) + + self.assertEqual(time_step.step_type, rl_environment.StepType.LAST) + self.assertEqual(time_step.rewards, [-1.0]) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/environments/iterated_matrix_game.py b/scenarios/bargaining/open_spiel/open_spiel/python/environments/iterated_matrix_game.py new file mode 100644 index 0000000..012b487 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/environments/iterated_matrix_game.py @@ -0,0 +1,186 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module implements a generic environment for iterated normal form games. + +It does so wuth automatic vectorization. Along with the environment, it also +provides pre-defined factory functions for common games such as the iterated +prisoners dilemma and the iterated matching pennies. +""" + +import numpy as np +from pyspiel import PlayerId + +from open_spiel.python.rl_environment import Environment +from open_spiel.python.rl_environment import StepType +from open_spiel.python.rl_environment import TimeStep + + +class IteratedMatrixGame(Environment): + """Environment for iterated normal form games. + + Supports automatic vectorization. + """ + + def __init__( + self, + payoff_matrix: np.ndarray, + iterations: int, + batch_size=1, + include_remaining_iterations=True, + ): + # pylint: disable=super-init-not-called + self._payoff_matrix = np.array(payoff_matrix, dtype=np.float32) + self._iterations = iterations + self._num_players = payoff_matrix.ndim - 1 + self._batch_size = batch_size + self._include_remaining_iterations = include_remaining_iterations + self._t = 0 + self._actions = np.arange( + np.prod(self.action_spec()['num_actions']) + ).reshape(*[payoff_matrix.shape[p] for p in range(self._num_players)]) + + def one_hot(self, x, n): + return np.eye(n)[x] + + @property + def num_players(self): + return self._num_players + + def observation_spec(self): + info_state_spec, legal_actions_spec = [], [] + for i in range(self._num_players): + num_actions = np.prod(self._payoff_matrix.shape[:-1]) + 1 + if self._include_remaining_iterations: + num_actions += 1 + info_state_spec.append([num_actions]) + legal_actions_spec.append(self._payoff_matrix.shape[i]) + return { + 'info_state': tuple(info_state_spec), + 'legal_actions': tuple(legal_actions_spec), + 'current_player': (), + } + + def action_spec(self): + num_actions, mins, maxs = [], [], [] + for i in range(self._num_players): + num_actions.append(self._payoff_matrix.shape[i]) + mins.append(0) + maxs.append(self._payoff_matrix.shape[i] - 1) + + return { + 'num_actions': tuple(num_actions), + 'min': tuple(mins), + 'max': tuple(maxs), + 'dtype': int, + } + + def step(self, actions: np.ndarray): + if actions.ndim == 1: + actions = actions[None, :] + payoffs = self._payoff_matrix[tuple(actions.T)] + s1 = self.one_hot( + self._actions[tuple(actions.T)] + 1, n=np.max(self._actions) + 2 + ) + s2 = self.one_hot( + self._actions[tuple(actions[..., ::-1].T)] + 1, + n=np.max(self._actions) + 2, + ) + rewards = [ + np.squeeze(p) + for p in np.split( + payoffs, indices_or_sections=self._num_players, axis=1 + ) + ] + discounts = [np.ones_like(r) for r in rewards] + if self._t == self._iterations - 1: + step_type = StepType.LAST + else: + step_type = StepType.MID + self._t += 1 + remaining_iters = float((self._iterations - self._t)) / self._iterations + + info_state = [s1, s2] + if self._include_remaining_iterations: + info_state = np.concatenate( + [ + info_state, + np.full((self._batch_size, 1), fill_value=remaining_iters), + ], + axis=-1, + ) + + legal_actions = self._get_legal_actions() + return TimeStep( + observations={ + 'info_state': info_state, + 'legal_actions': legal_actions, + 'batch_size': actions.shape[0], + 'current_player': PlayerId.SIMULTANEOUS, + }, + rewards=rewards, + discounts=discounts, + step_type=step_type, + ) + + def _get_legal_actions(self): + legal_actions = [] + for p in range(self.num_players): + actions = np.arange(self.action_spec()['num_actions'][p]) + legal_actions.append([actions] * self._batch_size) + return np.array(legal_actions) + + def reset(self): + self._t = 0 + info_state = np.zeros(( + self.num_players, + self._batch_size, + *self.observation_spec()['info_state'][0], + )) + info_state[..., 0] = 1.0 + if self._include_remaining_iterations: + info_state[..., -1] = 1.0 + rewards = np.squeeze(np.zeros((self.num_players, self._batch_size))) + discounts = np.squeeze(np.ones((self.num_players, self._batch_size))) + return TimeStep( + observations={ + 'info_state': [ + np.squeeze(s).astype(np.float32) for s in info_state + ], + 'legal_actions': self._get_legal_actions(), + 'batch_size': self._batch_size, + 'current_player': PlayerId.SIMULTANEOUS, + }, + rewards=[np.squeeze(a).astype(np.float32) for a in rewards], + discounts=[np.squeeze(a).astype(np.float32) for a in discounts], + step_type=StepType.FIRST, + ) + + +def IteratedPrisonersDilemma(iterations: int, batch_size=1): + return IteratedMatrixGame( + payoff_matrix=np.array([[[-1, -1], [-3, 0]], [[0, -3], [-2, -2]]]), + iterations=iterations, + batch_size=batch_size, + include_remaining_iterations=False, + ) + + +def IteratedMatchingPennies(iterations: int, batch_size=1): + return IteratedMatrixGame( + payoff_matrix=np.array([[[1, -1], [-1, 1]], [[-1, 1], [1, -1]]]), + iterations=iterations, + batch_size=batch_size, + include_remaining_iterations=False, + ) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/benchmark_games.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/benchmark_games.py new file mode 100644 index 0000000..4335700 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/benchmark_games.py @@ -0,0 +1,146 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Benchmark performance of games by counting the number of rollouts.""" + +import random +import time + +from absl import app +from absl import flags +from absl import logging +import pandas as pd + +from open_spiel.python import games # pylint: disable=unused-import +from open_spiel.python.mfg import games as mfg_games # pylint: disable=unused-import +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_string( + "games", "*", "Benchmark only specific games (semicolon separated). " + "Use * to benchmark all (loadable) games.") +flags.DEFINE_float("time_limit", 10., "Time limit per game (in seconds).") +flags.DEFINE_integer("give_up_after", 100, + "Give up rollout when the history length is exceeded.") +flags.DEFINE_bool( + "if_simultaneous_convert_to_turn_based", False, + "If True, load any simultaneous game as turn based for the benchmark.") + + +def _rollout_until_timeout(game_name, + time_limit, + give_up_after, + if_simultaneous_convert_to_turn_based=False): + """Run rollouts on the specified game until the time limit. + + Args: + game_name: str + time_limit: In number of seconds + give_up_after: Cuts off trajectories longer than specified + if_simultaneous_convert_to_turn_based: if the game is simultaneous and this + boolean is true, then the game is loaded as a turn based game. + + Returns: + A dict of collected statistics. + """ + game = pyspiel.load_game(game_name) + if game.get_type().dynamics == pyspiel.GameType.Dynamics.MEAN_FIELD: + raise NotImplementedError( + "Benchmark on mean field games is not available yet.") + if (game.get_type().dynamics == pyspiel.GameType.Dynamics.SIMULTANEOUS and + if_simultaneous_convert_to_turn_based): + game = pyspiel.convert_to_turn_based(game) + is_time_out = lambda t: time.time() - t > time_limit + num_rollouts = 0 + num_giveups = 0 + num_moves = 0 + start = time.time() + while not is_time_out(start): + state = game.new_initial_state() + while not state.is_terminal(): + if len(state.history()) > give_up_after: + num_giveups += 1 + break + if state.is_simultaneous_node(): + + def random_choice(actions): + if actions: + return random.choice(actions) + return 0 + + actions = [ + random_choice(state.legal_actions(i)) + for i in range(state.num_players()) + ] + state.apply_actions(actions) + else: + action = random.choice(state.legal_actions(state.current_player())) + state.apply_action(action) + num_moves += 1 + num_rollouts += 1 + time_elapsed = time.time() - start + return dict( + game_name=game_name, + ms_per_rollouts=time_elapsed / num_rollouts * 1000, + ms_per_moves=time_elapsed / num_moves * 1000, + giveups_per_rollout=num_giveups / num_rollouts, + time_elapsed=time_elapsed + ) + + +def main(_): + if FLAGS.games == "*": + games_list = [ + game.short_name + for game in pyspiel.registered_games() + if game.default_loadable + ] + else: + games_list = FLAGS.games.split(";") + + logging.info("Running benchmark for %s games.", len(games_list)) + logging.info("This will take approximately %d seconds.", + len(games_list) * FLAGS.time_limit) + + game_stats = [] + for game_name in games_list: + logging.info("Running benchmark on %s", game_name) + game_stats.append( + _rollout_until_timeout(game_name, FLAGS.time_limit, FLAGS.give_up_after, + FLAGS.if_simultaneous_convert_to_turn_based)) + + with pd.option_context("display.max_rows", None, + "display.max_columns", None, + "display.width", 200): + df = pd.DataFrame(game_stats) + # Use nice header names. + df.rename(columns={ + "game_name": "Game", + "ms_per_rollouts": "msec/rollout", + "ms_per_moves": "msec/move", + "giveups_per_rollout": "Give ups/rollouts", + "time_elapsed": "Time elapsed [sec]" + }, inplace=True) + + print("---") + print("Results for following benchmark configuration:") + print("time_limit =", FLAGS.time_limit) + print("give_up_after =", FLAGS.give_up_after) + print("---") + print(df) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/breakthrough_dqn.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/breakthrough_dqn.py new file mode 100644 index 0000000..8d63950 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/breakthrough_dqn.py @@ -0,0 +1,130 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""DQN agents trained on Breakthrough by independent Q-learning.""" + +from absl import app +from absl import flags +from absl import logging +import numpy as np + +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import random_agent +from open_spiel.python.jax import dqn + +FLAGS = flags.FLAGS + +# Training parameters +flags.DEFINE_string("checkpoint_dir", "/tmp/dqn_test", + "Directory to save/load the agent models.") +flags.DEFINE_integer( + "save_every", int(1e4), + "Episode frequency at which the DQN agent models are saved.") +flags.DEFINE_integer("num_train_episodes", int(1e6), + "Number of training episodes.") +flags.DEFINE_integer( + "eval_every", 1000, + "Episode frequency at which the DQN agents are evaluated.") + +# DQN model hyper-parameters +flags.DEFINE_list("hidden_layers_sizes", [64, 64], + "Number of hidden units in the Q-Network MLP.") +flags.DEFINE_integer("replay_buffer_capacity", int(1e5), + "Size of the replay buffer.") +flags.DEFINE_integer("batch_size", 32, + "Number of transitions to sample at each learning step.") + + +def eval_against_random_bots(env, trained_agents, random_agents, num_episodes): + """Evaluates `trained_agents` against `random_agents` for `num_episodes`.""" + num_players = len(trained_agents) + sum_episode_rewards = np.zeros(num_players) + for player_pos in range(num_players): + cur_agents = random_agents[:] + cur_agents[player_pos] = trained_agents[player_pos] + for _ in range(num_episodes): + time_step = env.reset() + episode_rewards = 0 + while not time_step.last(): + player_id = time_step.observations["current_player"] + if env.is_turn_based: + agent_output = cur_agents[player_id].step( + time_step, is_evaluation=True) + action_list = [agent_output.action] + else: + agents_output = [ + agent.step(time_step, is_evaluation=True) for agent in cur_agents + ] + action_list = [agent_output.action for agent_output in agents_output] + time_step = env.step(action_list) + episode_rewards += time_step.rewards[player_pos] + sum_episode_rewards[player_pos] += episode_rewards + return sum_episode_rewards / num_episodes + + +def main(_): + game = "breakthrough" + num_players = 2 + + env_configs = {"columns": 5, "rows": 5} + env = rl_environment.Environment(game, **env_configs) + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + # random agents for evaluation + random_agents = [ + random_agent.RandomAgent(player_id=idx, num_actions=num_actions) + for idx in range(num_players) + ] + + hidden_layers_sizes = [int(l) for l in FLAGS.hidden_layers_sizes] + # pylint: disable=g-complex-comprehension + agents = [ + dqn.DQN( + player_id=idx, + state_representation_size=info_state_size, + num_actions=num_actions, + hidden_layers_sizes=hidden_layers_sizes, + replay_buffer_capacity=FLAGS.replay_buffer_capacity, + batch_size=FLAGS.batch_size, + ) + for idx in range(num_players) + ] + + for ep in range(FLAGS.num_train_episodes): + if (ep + 1) % FLAGS.eval_every == 0: + r_mean = eval_against_random_bots(env, agents, random_agents, 1000) + logging.info("[%s] Mean episode rewards %s", ep + 1, r_mean) + if (ep + 1) % FLAGS.save_every == 0: + for agent in agents: + agent.save(FLAGS.checkpoint_dir) + + time_step = env.reset() + while not time_step.last(): + player_id = time_step.observations["current_player"] + if env.is_turn_based: + agent_output = agents[player_id].step(time_step) + action_list = [agent_output.action] + else: + agents_output = [agent.step(time_step) for agent in agents] + action_list = [agent_output.action for agent_output in agents_output] + time_step = env.step(action_list) + + # Episode is over, step all agents with final info state. + for agent in agents: + agent.step(time_step) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/bridge_supervised_learning.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/bridge_supervised_learning.py new file mode 100644 index 0000000..c9fe87e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/bridge_supervised_learning.py @@ -0,0 +1,226 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Train a policy net on bridge bidding based on a dataset of trajectories. + +Suitable data for training, generated by WBridge5, may be downloaded from: +https://console.cloud.google.com/storage/browser/openspiel-data/bridge +""" + +import os +import pickle +from typing import Any, Tuple + +from absl import app +from absl import flags + +import haiku as hk +import jax +from jax import numpy as jnp +import numpy as np +import optax + +import pyspiel + +OptState = Any +Params = Any + +FLAGS = flags.FLAGS +GAME = pyspiel.load_game('bridge(use_double_dummy_result=false)') +NUM_ACTIONS = 38 +MIN_ACTION = 52 +NUM_CARDS = 52 +NUM_PLAYERS = 4 +TOP_K_ACTIONS = 5 # How many alternative actions to display + +flags.DEFINE_integer('iterations', 100000, 'Number of iterations') +flags.DEFINE_string('data_path', None, 'Location for data') +flags.DEFINE_integer('eval_every', 10000, 'How often to evaluate the policy') +flags.DEFINE_integer('num_examples', 3, + 'How many examples to print per evaluation') +flags.DEFINE_integer('train_batch', 128, 'Batch size for training step') +flags.DEFINE_integer('eval_batch', 10000, 'Batch size when evaluating') +flags.DEFINE_integer('rng_seed', 42, 'Seed for initial network weights') +flags.DEFINE_string('save_path', None, 'Location for saved networks') + + +def _no_play_trajectory(line: str): + """Returns the deal and bidding actions only given a text trajectory.""" + actions = [int(x) for x in line.split(' ')] + # Usually a trajectory is NUM_CARDS chance events for the deal, plus one + # action for every bid of the auction, plus NUM_CARDS actions for the play + # phase. Exceptionally, if all NUM_PLAYERS players Pass, there is no play + # phase and the trajectory is just of length NUM_CARDS + NUM_PLAYERS. + if len(actions) == NUM_CARDS + NUM_PLAYERS: + return tuple(actions) + else: + return tuple(actions[:-NUM_CARDS]) + + +def make_dataset(file: str): + """Creates dataset as a generator of single examples.""" + all_trajectories = [_no_play_trajectory(line) for line in open(file)] + while True: + np.random.shuffle(all_trajectories) + for trajectory in all_trajectories: + action_index = np.random.randint(52, len(trajectory)) + state = GAME.new_initial_state() + for action in trajectory[:action_index]: + state.apply_action(action) + yield (state.observation_tensor(), trajectory[action_index] - MIN_ACTION) + + +def batch(dataset, batch_size: int): + """Creates a batched dataset from a one-at-a-time dataset.""" + observations = np.zeros([batch_size] + GAME.observation_tensor_shape(), + np.float32) + labels = np.zeros(batch_size, dtype=np.int32) + while True: + for batch_index in range(batch_size): + observations[batch_index], labels[batch_index] = next(dataset) + yield observations, labels + + +def one_hot(x, k): + """Returns a one-hot encoding of `x` of size `k`.""" + return jnp.array(x[..., jnp.newaxis] == jnp.arange(k), dtype=np.float32) + + +def net_fn(x): + """Haiku module for our network.""" + net = hk.Sequential([ + hk.Linear(1024), + jax.nn.relu, + hk.Linear(1024), + jax.nn.relu, + hk.Linear(1024), + jax.nn.relu, + hk.Linear(1024), + jax.nn.relu, + hk.Linear(NUM_ACTIONS), + jax.nn.log_softmax, + ]) + return net(x) + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + # Make the network. + net = hk.without_apply_rng(hk.transform(net_fn)) + + # Make the optimiser. + opt = optax.adam(1e-4) + + @jax.jit + def loss( + params: Params, + inputs: np.ndarray, + targets: np.ndarray, + ) -> jax.Array: + """Cross-entropy loss.""" + assert targets.dtype == np.int32 + log_probs = net.apply(params, inputs) + return -jnp.mean(one_hot(targets, NUM_ACTIONS) * log_probs) + + @jax.jit + def accuracy( + params: Params, + inputs: np.ndarray, + targets: np.ndarray, + ) -> jax.Array: + """Classification accuracy.""" + predictions = net.apply(params, inputs) + return jnp.mean(jnp.argmax(predictions, axis=-1) == targets) + + @jax.jit + def update( + params: Params, + opt_state: OptState, + inputs: np.ndarray, + targets: np.ndarray, + ) -> Tuple[Params, OptState]: + """Learning rule (stochastic gradient descent).""" + _, gradient = jax.value_and_grad(loss)(params, inputs, targets) + updates, opt_state = opt.update(gradient, opt_state) + new_params = optax.apply_updates(params, updates) + return new_params, opt_state + + def output_samples(params: Params, max_samples: int): + """Output some cases where the policy disagrees with the dataset action.""" + if max_samples == 0: + return + count = 0 + with open(os.path.join(FLAGS.data_path, 'test.txt')) as f: + lines = list(f) + np.random.shuffle(lines) + for line in lines: + state = GAME.new_initial_state() + actions = _no_play_trajectory(line) + for action in actions: + if not state.is_chance_node(): + observation = np.array(state.observation_tensor(), np.float32) + policy = np.exp(net.apply(params, observation)) + probs_actions = [(p, a + MIN_ACTION) for a, p in enumerate(policy)] + pred = max(probs_actions)[1] + if pred != action: + print(state) + for p, a in reversed(sorted(probs_actions)[-TOP_K_ACTIONS:]): + print('{:7} {:.2f}'.format(state.action_to_string(a), p)) + print('Ground truth {}\n'.format(state.action_to_string(action))) + count += 1 + break + state.apply_action(action) + if count >= max_samples: + return + + # Make datasets. + if FLAGS.data_path is None: + raise app.UsageError( + 'Please generate your own supervised training data or download from ' + 'https://console.cloud.google.com/storage/browser/openspiel-data/bridge' + ' and supply the local location as --data_path') + train = batch( + make_dataset(os.path.join(FLAGS.data_path, 'train.txt')), + FLAGS.train_batch) + test = batch( + make_dataset(os.path.join(FLAGS.data_path, 'test.txt')), FLAGS.eval_batch) + + # Initialize network and optimiser. + rng = jax.random.PRNGKey(FLAGS.rng_seed) # seed used for network weights + inputs, unused_targets = next(train) + params = net.init(rng, inputs) + opt_state = opt.init(params) + + # Train/eval loop. + for step in range(FLAGS.iterations): + # Do SGD on a batch of training examples. + inputs, targets = next(train) + params, opt_state = update(params, opt_state, inputs, targets) + + # Periodically evaluate classification accuracy on the test set. + if (1 + step) % FLAGS.eval_every == 0: + inputs, targets = next(test) + test_accuracy = accuracy(params, inputs, targets) + print(f'After {1+step} steps, test accuracy: {test_accuracy}.') + if FLAGS.save_path: + filename = os.path.join(FLAGS.save_path, f'params-{1 + step}.pkl') + with open(filename, 'wb') as pkl_file: + pickle.dump(params, pkl_file) + output_samples(params, FLAGS.num_examples) + + +if __name__ == '__main__': + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/bridge_uncontested_bidding_bluechip.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/bridge_uncontested_bidding_bluechip.py new file mode 100644 index 0000000..7f94eb2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/bridge_uncontested_bidding_bluechip.py @@ -0,0 +1,118 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +r"""Two BlueChip bridge bots bid with each other. + +The bot_cmd FLAG should contain a command-line to launch an external bot, e.g. +`Wbridge5 Autoconnect {port}`. + +""" +# pylint: enable=line-too-long + +import re +import socket +import subprocess + +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python.bots import bluechip_bridge_uncontested_bidding +import pyspiel + +FLAGS = flags.FLAGS +flags.DEFINE_float("timeout_secs", 60, "Seconds to wait for bot to respond") +flags.DEFINE_integer("rng_seed", 1234, "Seed to use to generate hands") +flags.DEFINE_integer("num_deals", 10, "How many deals to play") +flags.DEFINE_string( + "bot_cmd", None, + "Command to launch the external bot; must include {port} which will be " + "replaced by the port number to attach to.") + + +def _run_once(state, bots): + """Plays bots with each other, returns terminal utility for each player.""" + for bot in bots: + bot.restart_at(state) + while not state.is_terminal(): + if state.is_chance_node(): + outcomes, probs = zip(*state.chance_outcomes()) + state.apply_action(np.random.choice(outcomes, p=probs)) + else: + state.apply_action(bots[state.current_player()].step(state)[1]) + return state + + +def main(argv): + if len(argv) > 1: + raise app.UsageError("Too many command-line arguments.") + game = pyspiel.load_game("bridge_uncontested_bidding", { + "relative_scoring": True, + "rng_seed": FLAGS.rng_seed, + }) + bots = [ + bluechip_bridge_uncontested_bidding.BlueChipBridgeBot( + game, 0, _WBridge5Client(FLAGS.bot_cmd)), + bluechip_bridge_uncontested_bidding.BlueChipBridgeBot( + game, 1, _WBridge5Client(FLAGS.bot_cmd)), + ] + results = [] + + for i_deal in range(FLAGS.num_deals): + state = _run_once(game.new_initial_state(), bots) + print("Deal #{}; final state:\n{}".format(i_deal, state)) + results.append(state.returns()) + + stats = np.array(results) + mean = np.mean(stats, axis=0) + stderr = np.std(stats, axis=0, ddof=1) / np.sqrt(FLAGS.num_deals) + print(u"Absolute score: {:+.1f}\u00b1{:.1f}".format(mean[0], stderr[0])) + print(u"Relative score: {:+.1f}\u00b1{:.1f}".format(mean[1], stderr[1])) + + +class _WBridge5Client(object): + """Manages the connection to a WBridge5 bot.""" + + def __init__(self, command): + self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.sock.bind(("", 0)) + self.port = self.sock.getsockname()[1] + self.sock.listen(1) + self.process = None + self.command = command.format(port=self.port) + + def start(self): + if self.process is not None: + self.process.kill() + self.process = subprocess.Popen(self.command.split(" ")) + self.conn, self.addr = self.sock.accept() + + def read_line(self): + line = "" + while True: + self.conn.settimeout(FLAGS.timeout_secs) + data = self.conn.recv(1024) + if not data: + raise EOFError("Connection closed") + line += data.decode("ascii") + if line.endswith("\n"): + return re.sub(r"\s+", " ", line).strip() + + def send_line(self, line): + self.conn.send((line + "\r\n").encode("ascii")) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/bridge_wb5.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/bridge_wb5.py new file mode 100644 index 0000000..287c481 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/bridge_wb5.py @@ -0,0 +1,181 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +r"""Two BlueChip bridge bots agains simplest open_spiel (take the first possible action). + +The bot_cmd FLAG should contain a command-line to launch an external bot, e.g. +`Wbridge5 Autoconnect {port}`. + +""" +# pylint: enable=line-too-long + +import os +import pickle +import re +import socket +import subprocess +import time + +from absl import app +from absl import flags +import haiku as hk +import jax +import numpy as np + +from open_spiel.python.bots import bluechip_bridge +import pyspiel + +FLAGS = flags.FLAGS +flags.DEFINE_float("timeout_secs", 60, "Seconds to wait for bot to respond") +flags.DEFINE_integer("rng_seed", 1234, "Seed to use to generate hands") +flags.DEFINE_integer("num_deals", 10, "How many deals to play") +flags.DEFINE_integer("sleep", 0, "How many seconds to wait before next action") +flags.DEFINE_string("params_path", ".", + "directory path for trained model params-snapshot.pkl") +flags.DEFINE_string( + "bot_cmd", None, + "Command to launch the external bot; must include {port} which will be " + "replaced by the port number to attach to.") + +# Make the network. +NUM_ACTIONS = 38 +MIN_ACTION = 52 + + +def net_fn(x): + """Haiku module for our network.""" + net = hk.Sequential([ + hk.Linear(1024), + jax.nn.relu, + hk.Linear(1024), + jax.nn.relu, + hk.Linear(1024), + jax.nn.relu, + hk.Linear(1024), + jax.nn.relu, + hk.Linear(NUM_ACTIONS), + jax.nn.log_softmax, + ]) + return net(x) + + +def load_model(): + net = hk.without_apply_rng(hk.transform(net_fn)) + params = pickle.load( + open(os.path.join(FLAGS.params_path, "params-snapshot.pkl"), "rb")) + return net, params + + +def ai_action(state, net, params): + observation = np.array(state.observation_tensor(), np.float32) + policy = np.exp(net.apply(params, observation)) + probs_actions = [(p, a + MIN_ACTION) for a, p in enumerate(policy)] + pred = max(probs_actions)[1] + return pred + + +def _run_once(state, bots, net, params): + """Plays bots with each other, returns terminal utility for each player.""" + for bot in bots: + bot.restart() + while not state.is_terminal(): + if state.is_chance_node(): + outcomes, probs = zip(*state.chance_outcomes()) + state.apply_action(np.random.choice(outcomes, p=probs)) + else: + if FLAGS.sleep: + time.sleep(FLAGS.sleep) # wait for the human to see how it goes + if state.current_player() % 2 == 1: + # Have simplest play for now + action = state.legal_actions()[0] + if action > 51: + # TODO(ed2k) extend beyond just bidding + action = ai_action(state, net, params) + state.apply_action(action) + else: + result = bots[state.current_player() // 2].step(state) + state.apply_action(result) + return state + + +def main(argv): + if len(argv) > 1: + raise app.UsageError("Too many command-line arguments.") + game = pyspiel.load_game("bridge(use_double_dummy_result=false)") + net, params = load_model() + bots = [ + bluechip_bridge.BlueChipBridgeBot(game, 0, controller_factory), + bluechip_bridge.BlueChipBridgeBot(game, 2, controller_factory) + ] + + results = [] + + for i_deal in range(FLAGS.num_deals): + state = _run_once(game.new_initial_state(), bots, net, params) + print("Deal #{}; final state:\n{}".format(i_deal, state)) + results.append(state.returns()) + + stats = np.array(results) + mean = np.mean(stats, axis=0) + stderr = np.std(stats, axis=0, ddof=1) / np.sqrt(FLAGS.num_deals) + print(u"Absolute score: {:+.1f}\u00b1{:.1f}".format(mean[0], stderr[0])) + print(u"Relative score: {:+.1f}\u00b1{:.1f}".format(mean[1], stderr[1])) + + +def controller_factory(): + """Implements bluechip_bridge.BlueChipBridgeBot.""" + client = _WBridge5Client(FLAGS.bot_cmd) + client.start() + return client + + +class _WBridge5Client(object): + """Manages the connection to a WBridge5 bot.""" + + def __init__(self, command): + self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.sock.bind(("", 0)) + self.port = self.sock.getsockname()[1] + self.sock.listen(1) + self.process = None + self.command = command.format(port=self.port) + + def start(self): + if self.process is not None: + self.process.kill() + self.process = subprocess.Popen(self.command.split(" ")) + self.conn, self.addr = self.sock.accept() + + def read_line(self): + line = "" + while True: + self.conn.settimeout(FLAGS.timeout_secs) + data = self.conn.recv(1024) + if not data: + raise EOFError("Connection closed") + line += data.decode("ascii") + if line.endswith("\n"): + return re.sub(r"\s+", " ", line).strip() + + def send_line(self, line): + self.conn.send((line + "\r\n").encode("ascii")) + + def terminate(self): + self.process.kill() + self.process = None + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/catch_jax_policy_gradient.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/catch_jax_policy_gradient.py new file mode 100644 index 0000000..665680c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/catch_jax_policy_gradient.py @@ -0,0 +1,85 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Example use of JAX policy gradient implementatiom on catch environment.""" + +import logging +from absl import app +from absl import flags + +from open_spiel.python.environments import catch +from open_spiel.python.jax import policy_gradient + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("num_episodes", int(1e5), "Number of train episodes.") +flags.DEFINE_integer("eval_every", int(1e3), + "'How often to evaluate the policy.") +flags.DEFINE_enum("algorithm", "a2c", ["rpg", "qpg", "rm", "a2c"], + "Algorithms to run.") + + +def _eval_agent(env, agent, num_episodes): + """Evaluates `agent` for `num_episodes`.""" + rewards = 0.0 + for _ in range(num_episodes): + time_step = env.reset() + episode_reward = 0 + while not time_step.last(): + agent_output = agent.step(time_step, is_evaluation=True) + time_step = env.step([agent_output.action]) + episode_reward += time_step.rewards[0] + rewards += episode_reward + return rewards / num_episodes + + +def main_loop(unused_arg): + """Trains a Policy Gradient agent in the catch environment.""" + env = catch.Environment() + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + train_episodes = FLAGS.num_episodes + + agent = policy_gradient.PolicyGradient( + player_id=0, + info_state_size=info_state_size, + num_actions=num_actions, + loss_str=FLAGS.algorithm, + hidden_layers_sizes=[128, 128], + lambda_=1.0, + entropy_cost=0.01, + critic_learning_rate=0.1, + pi_learning_rate=0.1, + num_critic_before_pi=3) + + # Train agent + for ep in range(train_episodes): + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step) + action_list = [agent_output.action] + time_step = env.step(action_list) + # Episode is over, step agent with final info state. + agent.step(time_step) + + if ep and ep % FLAGS.eval_every == 0: + logging.info("-" * 80) + logging.info("Episode %s", ep) + logging.info("Loss: %s", agent.loss) + avg_return = _eval_agent(env, agent, 100) + logging.info("Avg return: %s", avg_return) + + +if __name__ == "__main__": + app.run(main_loop) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/catch_pytorch_policy_gradient.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/catch_pytorch_policy_gradient.py new file mode 100644 index 0000000..3d206b7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/catch_pytorch_policy_gradient.py @@ -0,0 +1,86 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python spiel example.""" + +import logging +from absl import app +from absl import flags + +from open_spiel.python.environments import catch +from open_spiel.python.pytorch import policy_gradient + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("num_episodes", int(1e5), "Number of train episodes.") +flags.DEFINE_integer("eval_every", int(1e3), + "'How often to evaluate the policy.") +flags.DEFINE_enum("algorithm", "a2c", ["rpg", "qpg", "rm", "a2c"], + "Algorithms to run.") + + +def _eval_agent(env, agent, num_episodes): + """Evaluates `agent` for `num_episodes`.""" + rewards = 0.0 + for _ in range(num_episodes): + time_step = env.reset() + episode_reward = 0 + while not time_step.last(): + agent_output = agent.step(time_step, is_evaluation=True) + time_step = env.step([agent_output.action]) + episode_reward += time_step.rewards[0] + rewards += episode_reward + return rewards / num_episodes + + +def main_loop(unused_arg): + """Trains a Policy Gradient agent in the catch environment.""" + env = catch.Environment() + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + train_episodes = FLAGS.num_episodes + + agent = policy_gradient.PolicyGradient( + player_id=0, + info_state_size=info_state_size, + num_actions=num_actions, + loss_str=FLAGS.algorithm, + hidden_layers_sizes=[128, 128], + batch_size=128, + entropy_cost=0.01, + critic_learning_rate=0.1, + pi_learning_rate=0.1, + num_critic_before_pi=3) + + # Train agent + for ep in range(train_episodes): + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step) + action_list = [agent_output.action] + time_step = env.step(action_list) + # Episode is over, step agent with final info state. + agent.step(time_step) + + if ep and ep % FLAGS.eval_every == 0: + logging.info("-" * 80) + logging.info("Episode %s", ep) + logging.info("Loss: %s", agent.loss) + avg_return = _eval_agent(env, agent, 100) + logging.info("Avg return: %s", avg_return) + + +if __name__ == "__main__": + app.run(main_loop) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/cfr_cpp_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/cfr_cpp_example.py new file mode 100644 index 0000000..bfc81f2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/cfr_cpp_example.py @@ -0,0 +1,75 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example use of the CFR algorithm on Kuhn Poker.""" + +import pickle +import sys +from absl import app +from absl import flags + +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_enum("solver", "cfr", ["cfr", "cfrplus", "cfrbr"], "CFR solver") +flags.DEFINE_integer("iterations", 20, "Number of iterations") +flags.DEFINE_string("game", "kuhn_poker", "Name of the game") +flags.DEFINE_integer("players", 2, "Number of players") + + +def main(_): + game = pyspiel.load_game( + FLAGS.game, + {"players": FLAGS.players}, + ) + + solver = None + if FLAGS.solver == "cfr": + solver = pyspiel.CFRSolver(game) + elif FLAGS.solver == "cfrplus": + solver = pyspiel.CFRPlusSolver(game) + elif FLAGS.solver == "cfrbr": + solver = pyspiel.CFRBRSolver(game) + else: + print("Unknown solver") + sys.exit(0) + + for i in range(int(FLAGS.iterations / 2)): + solver.evaluate_and_update_policy() + print("Iteration {} exploitability: {:.6f}".format( + i, pyspiel.exploitability(game, solver.average_policy()))) + + filename = "/tmp/{}_solver.pickle".format(FLAGS.solver) + print("Persisting the model...") + with open(filename, "wb") as file: + pickle.dump(solver, file, pickle.HIGHEST_PROTOCOL) + + print("Loading the model...") + with open(filename, "rb") as file: + loaded_solver = pickle.load(file) + print("Exploitability of the loaded model: {:.6f}".format( + pyspiel.exploitability(game, loaded_solver.average_policy()))) + + for i in range(int(FLAGS.iterations / 2)): + loaded_solver.evaluate_and_update_policy() + tabular_policy = loaded_solver.tabular_average_policy() + print(f"Tabular policy length: {len(tabular_policy)}") + print("Iteration {} exploitability: {:.6f}".format( + int(FLAGS.iterations / 2) + i, + pyspiel.exploitability(game, loaded_solver.average_policy()))) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/cfr_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/cfr_example.py new file mode 100644 index 0000000..662b53b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/cfr_example.py @@ -0,0 +1,44 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example use of the CFR algorithm on Kuhn Poker.""" + +from absl import app +from absl import flags + +from open_spiel.python.algorithms import cfr +from open_spiel.python.algorithms import exploitability +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("iterations", 100, "Number of iterations") +flags.DEFINE_string("game", "kuhn_poker", "Name of the game") +flags.DEFINE_integer("players", 2, "Number of players") +flags.DEFINE_integer("print_freq", 10, "How often to print the exploitability") + + +def main(_): + game = pyspiel.load_game(FLAGS.game, {"players": FLAGS.players}) + cfr_solver = cfr.CFRSolver(game) + + for i in range(FLAGS.iterations): + cfr_solver.evaluate_and_update_policy() + if i % FLAGS.print_freq == 0: + conv = exploitability.exploitability(game, cfr_solver.average_policy()) + print("Iteration {} exploitability {}".format(i, conv)) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/chat_game_cfr_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/chat_game_cfr_example.py new file mode 100644 index 0000000..a3c70e2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/chat_game_cfr_example.py @@ -0,0 +1,544 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Counterfactual regret minimization (CFR) experiment. + +Runs OpenSpiel CFR on a chat game. +""" + +import dataclasses +import enum + +from typing import Callable, Union + +from absl import app +from absl import flags +from absl import logging + +import ml_collections + +import numpy as np + +from open_spiel.python import policy as pyspiel_policy +from open_spiel.python.algorithms import expected_game_score + +from open_spiel.python.games import chat_game # pylint: disable=unused-import +from open_spiel.python.games.chat_games import chat_game_base + +from open_spiel.python.games.chat_games.configs import config_debate +from open_spiel.python.games.chat_games.configs import config_schedule_meeting_w_dow +from open_spiel.python.games.chat_games.configs import config_schedule_meeting_w_tone +from open_spiel.python.games.chat_games.configs import config_trade_fruit_w_tone + +from open_spiel.python.games.chat_games.envs.comm_substrates import schedules + +from open_spiel.python.games.chat_games.utils import test_utils as chat_test_utils + +import pyspiel + + +_SAVE_PATH = flags.DEFINE_string("save_path", + default="", + help="path for writing results") + +LLM_TYPE = chat_test_utils.TestLLM.MOCK + + +class Domain(enum.StrEnum): + TRADE_FRUIT_W_TONE = enum.auto() + DEBATE_W_STYLE = enum.auto() + SCHEDULE_MEETING_W_DOW = enum.auto() + SCHEDULE_MEETING_W_TONE = enum.auto() + + +def new_debate_scenario_config( + config: ml_collections.config_dict.ConfigDict, + game_id: int, +) -> ml_collections.config_dict.ConfigDict: + """Creates a new debate scenario config with a new topic. + + Arguments: + config: the original debate scenario config dict (this should contain + examples for generating new scenarios) + game_id: int, will index into set of 20 debate topics found in + https://www.englishclub.com/speaking/agreeing-disagreeing-topics.php + Returns: + new_config: debate config with redefined debate topic + """ + # https://www.englishclub.com/speaking/agreeing-disagreeing-topics.php + topics = ["Breakfast is the most important meal of the day.", + "Swimming in the ocean is better than swimming in a public pool.", + "Alcohol should be illegal.", + "Children should provide room and board for their aging parents.", + "Studying grammar is more important than practising conversation " + + "skills.", + "Television is the leading cause of violence in todays society.", + "Dogs make better companions than cats.", + "Smoking should be permitted in public places.", + "Females are better students than males.", + "A parent shouldn't pierce a babys ears.", + "Women should be allowed to go topless in public.", + "Lawyers should make a higher salary than nurses.", + "Everyone should plan their own funeral.", + "Reading English is more difficult than writing English.", + "Summer is the best season of the year.", + "Children under 13 should not be allowed to babysit.", + "High school students should wear uniforms.", + "21 should be the legal driving age around the world.", + "Rock and Roll is the best kind of music.", + "The government should pay for post secondary education."] + + topic = topics[game_id] + config.game.given_private_info["topic"] = [topic, topic] + + return config + + +def same_scenario_config( + config: ml_collections.config_dict.ConfigDict, + game_id: int, +) -> ml_collections.config_dict.ConfigDict: + """Dummy function for games that don't need any config modification. + + Arguments: + config: the original game scenario config dict (this should contain + examples for generating new scenarios) + game_id: int, unused + Returns: + new_config: original game config + """ + del game_id + + return config + + +def get_config_debate(config: ml_collections.config_dict.ConfigDict): + """Get config for imitation dataset construction of debates.""" + + config.config_rnd = config_debate.get_config() + config.new_config = new_debate_scenario_config + + return config + + +def get_config_trade_fruit_w_tone( + config: ml_collections.config_dict.ConfigDict, +): + """Get config for imitation dataset construction of trading fruit.""" + + config.config_rnd = config_trade_fruit_w_tone.get_config() + config.new_config = same_scenario_config + + return config + + +def get_config_schedule_meeting_w_dow( + config: ml_collections.config_dict.ConfigDict, +): + """Get config for imitation dataset construction of meeting scheduling dow.""" + + config.config_rnd = config_schedule_meeting_w_dow.get_config() + config.new_config = same_scenario_config + + return config + + +def get_config_schedule_meeting_w_tone( + config: ml_collections.config_dict.ConfigDict, +): + """Get config for imitation dataset construction of meeting scheduling dow.""" + + config.config_rnd = config_schedule_meeting_w_tone.get_config() + config.new_config = same_scenario_config + + return config + + +def get_config(): + """Get configuration for imitation dataset construction.""" + config = ml_collections.config_dict.ConfigDict() + + config.game_string = "chat_game" + config.game_id = 0 + config.seed = 34239871 + config.num_demos = 10 + config.num_iters = 4 + config.domain = Domain.SCHEDULE_MEETING_W_TONE + + if config.domain == Domain.DEBATE_W_STYLE: + config = get_config_debate(config) + elif config.domain == Domain.TRADE_FRUIT_W_TONE: + config = get_config_trade_fruit_w_tone(config) + elif config.domain == Domain.SCHEDULE_MEETING_W_DOW: + config = get_config_schedule_meeting_w_dow(config) + config.substrate = schedules + elif config.domain == Domain.SCHEDULE_MEETING_W_TONE: + config = get_config_schedule_meeting_w_tone(config) + else: + raise ValueError("Unknown domain: %s" % config.domain) + + return config + + +@dataclasses.dataclass(frozen=True) +class InfoStateRecord: + observation: str | np.ndarray + observation_str: str + probabilities: list[float] + actions: list[int] + prev_message: str + prev_speaker: int + prev_action_strs: list[str] + + +@dataclasses.dataclass(frozen=False) +class GameStats: + num_states: int = 0 + num_chance_nodes: int = 0 + num_decision_nodes: int = 0 + num_simultaneous_nodes: int = 0 + num_terminals: int = 0 + info_state_dict: dict[str, InfoStateRecord] = dataclasses.field( + default_factory=dict) + + +@dataclasses.dataclass(frozen=True) +class EqRecord: + nash_conv: float + payoffs_eq_vs_bg_any: list[float] + payoffs_any: list[float] + payoffs_eq: list[float] + + +def record_info_state_data( + state: pyspiel.State, + policy: pyspiel.Policy, + observer: Union[None, chat_game_base.ChatGameObserverBase] = None, + vectorize: Union[None, Callable[[str, int], np.ndarray]] = None, +) -> InfoStateRecord: + """Return observation and equilibrium strategy for a given state+policy.""" + pi = policy.action_probabilities(state) + action_list = list(pi.keys()) + prob_list = list(pi.values()) + if observer is not None: + info_str = observer.string_from(state, player=state.current_player()) + if vectorize is not None: + info = vectorize(info_str, 768) + else: + info = info_str + else: + info = info_str = str(state) + prev_msg = "" + prev_speaker = -1 + prev_action_strs = [] + if state.played_actions: + prev_action = state.played_actions[-1] + prev_msg = state.dialogue[-1] + prev_speaker = state.speakers[-1] + prev_speaker = int(prev_speaker) + prev_action_dict = state.unravel_flat_action_to_dict(prev_speaker, + prev_action) + action_keys = state.prompt_actions.keys() + prev_action_strs = [prev_action_dict["action"][key] for key in action_keys] + sample = InfoStateRecord(info, info_str, prob_list, action_list, + prev_msg, prev_speaker, prev_action_strs) + return sample + + +# traverses game tree and records game stats like info states. +def traverse_game_tree( + game: pyspiel.Game, + state: pyspiel.State, + game_stats: GameStats, + policy: pyspiel.Policy, + observer: Union[None, chat_game_base.ChatGameObserverBase] = None, + vectorize: Union[None, Callable[[str, int], np.ndarray]] = None, +): + """Traverse the game tree and record GameStats in place. + + Args: + game: pyspiel.Game + state: initial pyspiel.State + game_stats: empty GameStats object + policy: pyspiel Policy + observer: pyspiel Observer + vectorize: method to vectorize a string + """ + if state.is_terminal(): + game_stats.num_terminals += 1 + elif state.is_chance_node(): + game_stats.num_chance_nodes += 1 + for outcome in state.legal_actions(): + child = state.child(outcome) + traverse_game_tree(game, child, game_stats, policy, observer, vectorize) + elif state.is_simultaneous_node(): + game_stats.num_simultaneous_nodes += 1 + # TODO(imgemp): need to implement recording data for simultaneous + # Using joint actions for convenience. Can use legal_actions(player) to + # and state.apply_actions when walking over individual players + for joint_action in state.legal_actions(): + child = state.child(joint_action) + traverse_game_tree(game, child, game_stats, policy, observer, vectorize) + else: + game_stats.num_decision_nodes += 1 + if game.get_type().provides_information_state_string: + sample = record_info_state_data(state, policy, observer, vectorize) + game_stats.info_state_dict[ + state.information_state_string()] = sample + for outcome in state.legal_actions(): + child = state.child(outcome) + traverse_game_tree(game, child, game_stats, policy, observer, vectorize) + + +class ImitationDatasetConstructor(): + """Construct a dataset of (observation, CFR strategy) for imitation.""" + + def __init__(self, save_path, config): + self.save_path = save_path + self.game_string = config.game_string + self.game_id = config.game_id + self.seed = config.seed + self.num_demos = config.num_demos + self.num_iters = config.num_iters + self.domain = config.domain.value + self.config_rnd = config.config_rnd + self.new_config = config.new_config + + self._rnd = np.random.RandomState(self.seed) + + self.reporting = ImitationDatasetConstructorReporting( + save_path=self.save_path, + experiment_name="imitation_dataset_construction", + game_string=self.game_string, + game_id=self.game_id, + seed=self.seed, + num_demos=self.num_demos, + num_iters=self.num_iters, + domain=self.domain) + + def sample_to_dict( + self, + info_state_string: str, + sample: InfoStateRecord, + eq_record: EqRecord): + """Constructs a dict mapping named keys to values in arguments.""" + + sample_dict = {} + sample_dict["info_state_string"] = info_state_string + sample_dict["observation"] = sample.observation + sample_dict["observation_str"] = sample.observation_str + sample_dict["probabilities"] = sample.probabilities + sample_dict["actions"] = sample.actions + sample_dict["prev_message"] = sample.prev_message + sample_dict["prev_speaker"] = sample.prev_speaker + sample_dict["prev_action_strs"] = sample.prev_action_strs + sample_dict["nash_conv"] = eq_record.nash_conv + sample_dict["payoffs_eq_vs_bg_any"] = eq_record.payoffs_eq_vs_bg_any + sample_dict["payoffs_any"] = eq_record.payoffs_any + sample_dict["payoffs_eq"] = eq_record.payoffs_eq + return sample_dict + + def eval_vs_any(self, game: pyspiel.Game, eq: pyspiel.Policy + ) -> EqRecord: + """Evaluates the equilibrium against a background 'any' policy. + + Arguments: + game: pyspiel.Game + eq: pyspiel.Policy equilibrium policy (e.g., result of CFR) + Returns: + EqRecord containing + ne_conv: float, sum of gains from each player best responding to eq + payoffs_eq_vs_bg_any: list of floats, payoffs for each player when + playing their side of equilibrium against background agents that all + play 'any' + payoff_any: list of floats, payoffs for each player when everyone plays + 'any' policy + payoff_eq: list of floats, payoffs for each player when everyone plays + equilibrium policy + """ + ne_conv = pyspiel.nash_conv(game, eq) + + # construct pyspiel.Policy to play "any" tone (null strategy) + # the action set is assumed to be (msg_receiver, prompt_action) + # and "any" is assumed to be the last action in the prompt_action_list + num_players = game.num_players() + num_prompt_actions = game.num_distinct_actions() // num_players + payoffs_eq_vs_bg_any = [] + one_hot_any = [0.0 for _ in range(game.num_distinct_actions())] + for i in range(num_players): + idx = i * num_prompt_actions + (num_prompt_actions - 1) + one_hot_any[idx] = 1 / float(num_players) + policy_any = dict(zip(range(len(one_hot_any)), one_hot_any)) + + def callable_policy(state): + del state + return policy_any # pylint:disable=cell-var-from-loop + + # compute expected payoffs for each player playing eq against "any" bg strat + for i in range(num_players): + policies = [] + for j in range(num_players): + if i == j: + # grab player i's side of avg_policy (eq_i) + eq_i = pyspiel_policy.pyspiel_policy_to_python_policy(game, + eq, + players=[i]) + policies.append(eq_i) + else: + # setting player j policy to "any" + p_j = pyspiel_policy.tabular_policy_from_callable(game, + callable_policy, + players=[j]) + policies.append(p_j) + state = game.new_initial_state() + payoff_array = expected_game_score.policy_value(state, policies) + payoffs_eq_vs_bg_any.append(payoff_array[i]) + + # compute expected payoffs when everyone plays "any" strategy + policies = [] + for j in range(num_players): + p_j = pyspiel_policy.tabular_policy_from_callable(game, + callable_policy, + players=[j]) + policies.append(p_j) + state = game.new_initial_state() + payoffs_any = expected_game_score.policy_value(state, policies) + + # compute expected payoffs when everyone plays eq strategy + policies = [] + for j in range(num_players): + # grab player j's side of avg_policy (eq_j) + p_j = pyspiel_policy.pyspiel_policy_to_python_policy(game, + eq, + players=[j]) + policies.append(p_j) + state = game.new_initial_state() + payoffs_eq = expected_game_score.policy_value(state, policies) + + eq_record = EqRecord(ne_conv, + payoffs_eq_vs_bg_any, + payoffs_any, + payoffs_eq) + + return eq_record + + def construct_dataset(self): + """Construct a dataset of (observation, optimal strategy) for imitation.""" + + logging.info("Building vectorizer") + vectorizer = chat_test_utils.MockVectorizer() + vectorize = vectorizer.vectorize + + for demo in range(self.num_demos): + logging.info("Creating new config for demo %d", demo) + + config = self.new_config(self.config_rnd, self.game_id) + + game = pyspiel.load_game(self.game_string, config.params.to_dict()) + + seed = self._rnd.randint(42, 12345 + 1) + game.load_chat_game(llm_type=LLM_TYPE, + vectorize=vectorize, + seed=seed, + **config.game) + + game_cached = pyspiel.convert_to_cached_tree(game) + + logging.info("Constructing CFR solver") + cfr_solver = pyspiel.CFRSolver(game_cached) + + logging.info("Evaluating and Updating CFR policy") + for i in range(self.num_iters): + logging.info("CFR iteration %d", i) + cfr_solver.evaluate_and_update_policy() + + logging.info("Averaging CFR policy") + average_policy = cfr_solver.tabular_average_policy() + + eq_record = self.eval_vs_any(game_cached, average_policy) + logging.info("NashConv: %f", eq_record.nash_conv) + logging.info("Payoffs vs background any policy: %s", + eq_record.payoffs_eq_vs_bg_any) + logging.info("Payoffs using any policy: %s", eq_record.payoffs_any) + logging.info("Payoffs using eq policy: %s", eq_record.payoffs_eq) + + logging.info("Building info_state -> observation vectorizer") + observer = game.make_py_observer() + vectorizer = chat_test_utils.MockVectorizer() + vectorize = vectorizer.vectorize + + logging.info("Traversing game tree and storing imitation policy") + game_stats = GameStats() + state = game.new_initial_state() + traverse_game_tree(game, state, game_stats, average_policy, + observer=observer, vectorize=vectorize) + h = f = "*" * 50 + for info_state_string in game_stats.info_state_dict: + logging.info("%s\nInfo state string:\n%s\n%s", h, info_state_string, f) + sample = game_stats.info_state_dict[info_state_string] + results = self.sample_to_dict(info_state_string, sample, eq_record) + self.reporting.report(demo, results) + + logging.info("Number of info states (length of policy): %d", + len(average_policy)) + + +class ImitationDatasetConstructorReporting(object): + """Utilities for logging an experiment run.""" + + def __init__( + self, + save_path: str, + experiment_name: str, + game_string: str, + game_id: int, + seed: int, + num_demos: int, + num_iters: int, + domain: str, + ): + self.save_path = save_path + self.experiment_name = experiment_name + self.game_string = game_string + self.game_id = game_id + self.seed = seed + self.num_demos = num_demos + self.num_iters = num_iters + self.domain = domain + + config_dict_params = {} + config_dict_params["experiment_name"] = self.experiment_name + config_dict_params["game_string"] = self.game_string + config_dict_params["seed"] = self.seed + config_dict_params["num_demos"] = self.num_demos + config_dict_params["num_iters"] = self.num_iters + config_dict_params["domain"] = self.domain + + print("Config parameters:\n{:}".format(config_dict_params)) + + def report(self, demo: int, results): + """Report the exploitability.""" + print("CFR statistics ({:d}):\n{:}".format(demo, results)) + + +def main(_): + logging.set_verbosity(logging.ERROR) # silence internal game logging + save_path = _SAVE_PATH.value + config = get_config() + im = ImitationDatasetConstructor(save_path, config) + im.construct_dataset() + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/chat_game_psro_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/chat_game_psro_example.py new file mode 100644 index 0000000..7d771ed --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/chat_game_psro_example.py @@ -0,0 +1,415 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Prompt-Space Response-Oracle (PSRO) experiment. + +Runs PSRO exploring the space of `tones` with which to construct messages. Only +works with `tones` for now. +""" + +import enum +import itertools +import math + +from absl import app +from absl import flags +from absl import logging + +import ml_collections + +import nashpy +import numpy as np + +from open_spiel.python.games import chat_game # pylint: disable=unused-import +from open_spiel.python.games.chat_games.configs import config_schedule_meeting_w_tone +from open_spiel.python.games.chat_games.configs import config_trade_fruit_w_tone +from open_spiel.python.games.chat_games.envs.utils import text +from open_spiel.python.games.chat_games.utils import test_utils as chat_test_utils + +import pyspiel + + +_SAVE_PATH = flags.DEFINE_string("save_path", + default="", + help="path for writing results") + +LLM_TYPE = chat_test_utils.TestLLM.MOCK + + +class Domain(enum.StrEnum): + TRADE_FRUIT_W_TONE = enum.auto() + SCHEDULE_MEETING_W_TONE = enum.auto() + + +def get_config(): + """Get configuration for imitation dataset construction.""" + config = ml_collections.config_dict.ConfigDict() + + config.game_string = "chat_game" + config.seed = 34239871 + config.num_iters = 4 + config.num_trials = 10 + config.num_candidates = 2 + config.domain = Domain.SCHEDULE_MEETING_W_TONE + + if config.domain == Domain.TRADE_FRUIT_W_TONE: + config.env_config = config_trade_fruit_w_tone.get_config() + elif config.domain == Domain.SCHEDULE_MEETING_W_TONE: + config.env_config = config_schedule_meeting_w_tone.get_config() + else: + raise ValueError("Unknown domain: %s" % config.domain) + + return config + + +def sym(pt): + """Symmetrize stack of payoff tensors (stacked along first dimension). + + A payoff tensor can be `symmetrized' by averaging over all possible + permutations of the players. This means permuting the axes corresponding to + the player strategies as well as the payoffs assigned to the players. E.g., + player A playing strategy 1 and player B playing strategy 3 is no different + from player A playing strategy 3 and player B playing strategy 1 in a + symmetric game. Note we permuted the strategies, but we must also permute the + payoffs. + + Args: + pt: tensor of shape: (num_players,) + (num_strategies,) * num_players + Returns: + pt_sym: symmetrized payoff tensor of same shape + """ + num_players = len(pt.shape[1:]) + num_perms = math.factorial(num_players) + pt_sym = np.zeros_like(pt) + for _, perm_players in enumerate(itertools.permutations(range(num_players))): + perm_axes = tuple([pi + 1 for pi in perm_players]) + permuted_tensor = np.transpose(pt, (0,) + perm_axes)[list(perm_players)] + pt_sym += permuted_tensor / float(num_perms) + return pt_sym + + +def random_policy(rnd, state): + # all actions are legal for now + rnd_action = tuple([rnd.choice(a) for a in state.num_actions]) + return np.ravel_multi_index(rnd_action, state.num_actions) + + +def fixed_prompt_policy(rnd, state, prompt_action_dict): + # all actions are legal for now + action = [rnd.choice(a) for a in state.num_actions] + for prompt_key, prompt_action in prompt_action_dict.items(): + prompt_key_idx = 1 + state.header.action_keys.index(prompt_key) + prompt_val_idx = state.prompt_actions[prompt_key].index(prompt_action) + action[prompt_key_idx] = prompt_val_idx + action = tuple(action) + return np.ravel_multi_index(action, state.num_actions) + + +def mixed_prompt_policy(rnd, state, prompt_keys, mixture): + # all actions are legal for now + action = [rnd.choice(a) for a in state.num_actions] + for prompt_key in prompt_keys: + prompt_key_idx = 1 + state.header.action_keys.index(prompt_key) + actions = state.prompt_actions[prompt_key] + num_actions = len(actions) + prompt_val_idx = rnd.choice(num_actions, p=mixture) + action[prompt_key_idx] = prompt_val_idx + action = tuple(action) + return np.ravel_multi_index(action, state.num_actions) + + +def build_player_policy(policies): + def player_policy(player_id, state): + return policies[player_id](state) + return player_policy + + +def simulate_dialogue(game, policy): + """Simulate a dialogue and returns payoffs for each player.""" + + state = game.new_initial_state() + + while not state.is_terminal(): + if state.is_chance_node(): + # Chance node: sample an outcome + outcomes = state.chance_outcomes() + action_list, prob_list = zip(*outcomes) + action = np.random.choice(action_list, p=prob_list) + state.apply_action(action) + else: + # Decision node: sample action for the single current player + action = policy(state.current_player(), state) + state.apply_action(action) + + # Game is now done. Print utilities for each player + returns = state.returns() + + return returns + + +def estimate_payoff_tensor(game, rnd, num_trials=5): + """Simulate a batch of dialogues and returns payoffs for each player.""" + + num_players = game.num_players() + num_actions = len(game.given_prompt_actions["tone"]) + payoff_tensor = np.zeros( + (num_trials, num_players) + (num_actions,) * num_players + ) + + joint_actions = list(itertools.product(range(num_actions), + repeat=num_players)) + + for trial in range(num_trials): + for joint_action_idx in joint_actions: + policies = [] + for _, tone_idx in zip(range(num_players), joint_action_idx): + fixed_tone = {"tone": game.given_prompt_actions["tone"][tone_idx]} + policy = lambda state: fixed_prompt_policy(rnd, state, fixed_tone) # pylint:disable=cell-var-from-loop + policies.append(policy) + player_policy = build_player_policy(policies) + + returns = simulate_dialogue(game, player_policy) + + pt_index = (trial, slice(None)) + joint_action_idx + + payoff_tensor[pt_index] = returns + + return payoff_tensor + + +def score_candidate_responses(game_str, config, load_dict, rnd, + background_policies, candidates, + player_ids=(0,), num_trials=5): + """Simulate a batch of dialogues and returns payoffs for each player.""" + + num_players = config.params["num_players"] + + num_candidates = len(candidates) + + config.game.given_prompt_actions["tone"] += candidates + num_actions = len(config.game.given_prompt_actions["tone"]) + config.params["num_distinct_actions"] = num_players * num_actions + + game = pyspiel.load_game(game_str, config.params.to_dict()) + + game.load_chat_game(**load_dict, **config.game) + + payoffs = np.zeros((num_trials, len(player_ids), num_candidates)) + + for player_id in player_ids: + for trial in range(num_trials): + for candidate_idx in range(num_candidates): + policies = [] + for i in range(num_players): + if player_id == i: + fixed_tone = {"tone": candidates[candidate_idx]} + policy = lambda state: fixed_prompt_policy(rnd, state, fixed_tone) # pylint:disable=cell-var-from-loop + policies.append(policy) + else: + policies.append(background_policies[i]) + player_policy = build_player_policy(policies) + + returns = simulate_dialogue(game, player_policy) + + payoffs[trial, player_id, candidate_idx] = returns[player_id] + + # undo changes to config (is this inplace?) + config.game.given_prompt_actions["tone"] = config.game.given_prompt_actions[ + "tone" + ][:-num_candidates] + num_tones = len(config.game.given_prompt_actions["tone"]) + config.params["num_distinct_actions"] = num_players * num_tones + + return payoffs, candidates + + +def compute_sym_eq(pt): + game = nashpy.Game(pt[0], pt[1]) + p1_traj, p2_traj = game.asymmetric_replicator_dynamics() + p1_strat = np.mean(p1_traj, axis=0) + p2_strat = np.mean(p2_traj, axis=0) + return 0.5 * p1_strat + 0.5 * p2_strat + + +class PSRO(): + """Run prompt-space response oracle algorithm on chat game.""" + + def __init__(self, save_path, config): + self.save_path = save_path + self.game_string = config.game_string + self.seed = config.seed + self.num_iters = config.num_iters + self.num_trials = config.num_trials + self.num_candidates = config.num_candidates + self.domain = config.domain.value + self.config = config.env_config + + self.rnd = np.random.RandomState(self.seed) + + self.num_players = self.config.params["num_players"] + + self.game = pyspiel.load_game(self.game_string, + self.config.params.to_dict()) + + vectorizer = chat_test_utils.MockVectorizer() + vectorize = vectorizer.vectorize + + self.load_dict = {"llm_type": LLM_TYPE, + "vectorize": vectorize, + "seed": self.seed} + + self.game.load_chat_game(**self.load_dict, **self.config.game) + + self.reporting = PSROReporting( + save_path=self.save_path, + experiment_name="psro", + game_string=self.game_string, + seed=self.seed, + num_iters=self.num_iters, + num_trials=self.num_trials, + num_candidates=self.num_candidates, + domain=self.domain, + base_candidates=list(self.config.game.given_prompt_actions["tone"])) + + def run(self): + """Evaluate an imitation-learned policy.""" + + for psro_iter in range(self.num_iters): + + pt = estimate_payoff_tensor(self.game, + self.rnd, + num_trials=self.num_trials) + pt = pt.mean(axis=0) # mean over trials + pt = sym(pt) # symmetrize the pt + + # compute eq + sub_eq = compute_sym_eq(pt) # assume symmetric ne + + # generate num_candidate tones + actions = self.config.game.given_prompt_actions["tone"] + candidates = self.game.generate_prompts("tone", + actions, + self.num_candidates, + text.retrieve_alpha_block) + new_actions = actions + candidates + new_num_actions = len(new_actions) + + eq = np.zeros(new_num_actions) / float(new_num_actions) + eq[:pt.shape[1]] = sub_eq + + background_policies = [] + for _ in range(self.num_players): + bg_policy = lambda state: mixed_prompt_policy(self.rnd, + state, + ["tone"], + eq) # pylint:disable=cell-var-from-loop + background_policies.append(bg_policy) + + scores, candidates = score_candidate_responses( + self.game_string, + self.config, + self.load_dict, + self.rnd, + background_policies, + candidates, + player_ids=(0,), + num_trials=self.num_trials) + + mean_scores = np.mean(scores, axis=0)[0] # only need player 0's scores + br_idx = np.argmax(mean_scores) + br = candidates[br_idx] + + self.config.game.given_prompt_actions["tone"] += [br] + new_num_tones = len(self.config.game.given_prompt_actions["tone"]) + self.num_players = self.config.params["num_players"] + new_num_distinct_actions = self.num_players * new_num_tones + self.config.params["num_distinct_actions"] = new_num_distinct_actions + + self.game = pyspiel.load_game(self.game_string, + self.config.params.to_dict()) + + self.game.load_chat_game(**self.load_dict, **self.config.game) + + self.reporting.report(psro_iter, + pt, + br, + mean_scores, + candidates, + sub_eq) + + +class PSROReporting(object): + """Utilities for logging an experiment run.""" + + def __init__(self, + save_path: str, + experiment_name: str, + game_string: str, + seed: int, + num_iters: int, + num_trials: int, + num_candidates: int, + domain: str, + base_candidates: list[str]): + self.save_path = save_path + self.experiment_name = experiment_name + self.game_string = game_string + self.seed = seed + self.num_iters = num_iters + self.num_trials = num_trials + self.num_candidates = num_candidates + self.domain = domain + self.base_candidates = base_candidates + + config_dict_params = {} + config_dict_params["experiment_name"] = self.experiment_name + config_dict_params["game_string"] = self.game_string + config_dict_params["seed"] = self.seed + config_dict_params["num_iters"] = self.num_iters + config_dict_params["num_trials"] = self.num_trials + config_dict_params["num_candidates"] = self.num_candidates + config_dict_params["domain"] = self.domain + config_dict_params["base_candidates"] = self.base_candidates + + print("Config parameters:\n{:}".format(config_dict_params)) + + def report(self, + psro_iter: int, + payoff_tensor: np.ndarray, + br: str, + mean_scores: np.ndarray, + candidates: np.ndarray, + eq: np.ndarray): + """Report the psro statistics.""" + psro_stats_dict = {} + psro_stats_dict["psro_iter"] = psro_iter + psro_stats_dict["payoff_tensor"] = payoff_tensor + psro_stats_dict["br"] = br + psro_stats_dict["mean_scores"] = mean_scores + psro_stats_dict["candidates"] = candidates + psro_stats_dict["eq"] = eq + + print("PSRO statistics ({:d}):\n{:}".format(psro_iter, psro_stats_dict)) + + +def main(_): + logging.set_verbosity(logging.ERROR) # silence internal game logging + save_path = _SAVE_PATH.value + config = get_config() + psro = PSRO(save_path, config) + psro.run() + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/data/bridge/test.txt b/scenarios/bargaining/open_spiel/open_spiel/python/examples/data/bridge/test.txt new file mode 100644 index 0000000..1548999 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/data/bridge/test.txt @@ -0,0 +1,20 @@ +27 46 36 43 18 22 0 20 24 2 40 41 28 16 21 10 42 32 48 47 13 17 3 5 12 25 34 8 29 38 23 4 30 26 35 19 9 44 7 51 14 1 45 15 49 33 11 50 39 31 6 37 52 52 58 59 52 61 52 62 52 64 52 52 52 39 31 3 47 41 13 1 45 21 5 49 17 27 16 7 51 50 18 2 6 15 12 22 23 48 4 24 32 0 20 28 44 25 34 37 9 43 14 26 11 10 30 46 36 33 40 8 29 38 35 19 42 +33 6 20 31 29 38 24 25 30 50 8 48 37 51 32 44 18 41 11 4 35 0 47 17 39 34 10 3 46 16 13 21 26 9 40 49 45 36 23 14 15 2 19 42 1 5 28 12 43 27 22 7 52 57 52 60 53 52 63 53 52 52 52 42 18 2 10 14 26 34 22 51 11 3 15 0 28 44 35 45 5 13 25 37 9 23 17 40 48 39 16 1 41 19 21 8 12 43 36 33 6 20 49 4 29 27 24 38 32 7 30 31 46 50 47 +8 50 0 38 10 21 3 39 14 18 24 34 9 1 44 35 4 48 22 17 23 30 51 43 42 28 27 32 25 40 47 29 11 15 6 31 41 36 2 33 20 46 7 49 16 5 26 19 12 37 13 45 52 56 53 58 52 60 52 71 52 72 52 76 52 52 52 51 19 23 15 7 31 11 28 49 9 5 13 45 25 1 3 32 4 48 0 40 44 17 8 38 10 50 22 36 24 35 12 46 2 34 14 18 26 29 42 43 16 21 27 30 6 33 20 39 41 37 47 +30 32 10 35 50 45 21 7 1 42 39 43 0 16 40 20 36 15 22 44 26 6 4 51 47 46 25 14 29 5 34 11 49 31 37 9 41 13 24 8 28 17 48 23 33 18 3 19 38 2 27 12 56 57 52 63 52 52 52 0 32 48 8 3 51 47 15 44 28 16 4 14 50 2 10 49 5 37 9 36 31 24 20 46 22 12 26 13 25 19 1 43 41 17 27 7 33 45 39 40 23 29 6 11 30 18 21 35 38 42 34 +5 21 49 50 8 6 15 13 40 14 44 51 19 27 18 9 2 41 38 31 3 37 28 23 46 11 47 0 48 1 17 10 25 22 29 20 24 36 30 7 16 4 26 42 43 39 12 45 34 35 32 33 52 52 52 56 60 61 65 52 52 52 41 49 13 5 12 20 40 4 8 36 44 0 18 10 34 6 3 27 47 51 45 25 1 17 9 24 21 29 43 11 15 7 19 35 28 23 38 50 2 14 42 46 22 26 16 37 32 33 30 31 48 39 +46 31 25 23 30 7 42 9 18 47 12 15 13 48 2 44 45 27 24 10 20 49 4 50 32 17 29 33 36 0 39 22 28 37 40 51 43 11 8 5 1 34 35 41 6 19 14 3 26 21 16 38 52 58 52 70 52 71 52 72 52 73 52 52 52 39 3 43 47 11 35 51 6 44 20 0 16 5 1 37 29 48 4 9 28 34 2 50 18 10 46 7 14 19 8 23 26 41 13 21 25 33 45 49 12 17 24 22 30 27 40 15 32 31 42 38 36 +39 11 42 27 15 41 31 32 24 1 13 47 33 28 48 16 4 20 22 0 5 35 17 36 34 19 50 44 29 14 30 43 6 51 10 25 8 46 37 21 7 40 38 49 18 45 26 9 12 23 3 2 52 58 72 73 52 52 52 48 0 12 20 13 9 5 45 11 31 43 7 47 15 19 3 27 39 51 17 28 10 16 4 40 37 32 8 1 22 49 29 44 24 14 26 36 33 41 30 21 6 46 38 25 18 35 42 23 50 2 34 +7 43 8 3 46 9 34 39 40 17 23 26 5 49 0 47 31 15 10 27 12 45 14 33 50 37 28 48 18 13 2 25 42 1 30 21 51 32 38 41 4 36 24 44 20 35 22 16 29 11 19 6 59 66 52 52 52 28 44 20 32 21 29 37 2 1 0 25 5 6 42 13 10 36 8 48 4 3 7 35 23 11 19 39 31 16 40 17 24 15 14 27 51 50 9 22 26 43 30 47 12 41 18 49 34 45 38 33 46 +17 45 33 3 48 38 49 23 41 24 21 4 31 22 18 40 5 36 19 11 16 34 13 42 35 25 43 1 27 12 47 15 32 0 7 8 20 39 9 51 29 6 10 28 37 30 46 50 44 26 14 2 56 52 57 58 60 52 63 52 65 52 76 52 52 52 39 43 51 27 1 5 25 33 49 11 17 45 10 42 37 6 16 12 9 8 14 2 41 22 20 0 13 4 18 50 29 26 32 36 21 28 46 40 44 30 7 15 35 24 48 34 19 3 31 38 47 23 +51 41 28 13 45 38 44 15 16 22 32 37 47 46 23 36 19 25 50 11 24 31 6 3 5 49 9 8 18 4 20 17 26 43 42 0 2 40 21 7 35 27 34 29 48 39 10 1 14 12 33 30 55 53 57 52 62 52 72 52 52 52 15 47 27 23 2 22 42 30 50 1 14 38 44 0 24 4 10 3 18 46 43 6 7 19 28 8 16 40 31 9 11 35 48 12 32 36 51 39 21 37 5 41 33 13 49 34 17 45 20 29 26 25 +50 10 18 28 12 45 27 48 20 1 47 31 36 16 40 49 19 15 24 35 46 38 7 33 30 2 37 5 41 42 51 6 44 29 39 22 32 8 26 14 34 25 17 21 3 9 0 43 23 11 4 13 55 52 58 52 60 52 65 52 68 52 73 52 52 52 48 12 16 0 28 32 8 4 3 15 51 31 17 5 41 45 9 37 49 19 50 10 18 6 23 11 39 43 33 20 1 27 47 35 36 25 24 13 44 2 46 38 26 14 34 42 7 22 40 21 30 29 +10 35 49 20 15 18 34 42 32 24 31 22 47 28 43 0 25 7 14 12 45 5 17 13 48 46 27 4 36 3 16 41 38 44 29 6 26 51 23 21 1 39 11 8 37 50 19 40 33 30 2 9 56 53 63 52 52 52 20 48 24 16 15 35 43 21 17 9 45 5 1 7 29 13 44 27 0 32 11 4 47 51 28 49 40 36 8 10 18 23 19 12 25 39 50 2 6 26 46 14 22 38 30 34 42 33 41 37 3 31 +42 35 8 14 21 45 38 1 30 19 48 23 32 20 43 49 2 40 6 44 5 17 47 16 27 34 33 36 22 10 12 9 26 24 13 37 25 29 46 4 39 51 0 3 11 41 15 50 7 31 18 28 52 52 55 52 52 59 52 69 52 52 52 0 4 32 40 17 33 37 5 1 21 41 13 45 8 9 25 29 18 49 2 16 22 24 48 47 3 11 19 43 23 7 51 20 12 28 27 44 26 10 6 36 39 31 15 14 30 34 38 46 50 42 35 +39 5 18 36 51 32 15 21 30 13 35 44 48 9 11 2 10 4 31 1 47 28 49 43 6 37 19 20 26 24 12 33 45 7 16 17 22 40 23 46 38 0 29 14 27 42 25 41 50 8 3 34 60 52 61 52 62 52 63 52 68 52 71 52 74 52 76 52 83 52 52 52 43 47 7 3 48 0 16 20 45 13 29 17 50 42 18 14 6 4 19 2 12 36 27 8 10 24 31 34 25 33 39 5 38 28 23 46 49 21 22 9 11 1 51 37 26 32 35 44 15 41 30 40 +10 7 12 19 24 28 15 3 23 5 50 47 35 27 4 22 1 49 48 11 39 9 38 34 40 0 36 43 45 42 41 29 14 51 13 33 17 44 37 46 32 26 21 30 6 16 20 18 2 25 8 31 52 56 60 63 52 68 52 73 52 52 52 40 0 8 3 11 35 51 15 7 4 47 23 43 39 27 12 34 14 42 50 38 46 2 26 33 1 5 37 21 29 45 49 16 36 31 24 18 6 28 20 22 10 44 13 30 32 9 48 19 17 25 41 +36 2 5 45 50 4 30 42 8 16 17 47 31 37 49 41 20 29 33 6 10 19 44 7 51 0 21 18 32 24 48 1 23 26 38 14 40 43 9 46 11 28 15 39 3 34 22 12 13 27 35 25 58 52 61 62 65 52 67 52 70 52 71 52 52 52 12 8 0 48 49 1 13 29 17 25 3 37 2 30 42 50 20 4 44 41 39 51 19 15 40 16 35 45 47 11 27 21 22 6 10 26 34 38 46 23 14 32 24 9 5 18 36 28 33 7 31 43 +8 47 51 3 6 27 42 17 21 34 16 19 22 39 32 13 29 31 25 10 26 23 46 45 33 15 9 20 18 14 43 35 12 30 36 48 2 11 40 41 28 7 44 0 5 4 49 38 50 1 24 37 62 63 64 52 67 68 69 53 52 73 53 52 52 52 46 10 22 14 44 48 8 4 3 21 11 43 40 0 12 15 7 51 19 28 36 20 5 27 1 49 13 29 42 38 2 30 9 17 33 31 34 16 35 6 45 18 39 25 47 24 37 26 23 32 41 50 +12 26 23 7 33 18 9 40 36 24 17 22 45 20 43 51 19 39 29 3 47 27 15 4 5 32 31 38 48 6 28 16 35 50 0 2 8 37 46 21 49 42 34 44 30 1 25 13 11 10 14 41 56 62 52 67 53 52 73 52 52 52 2 30 50 14 6 34 38 35 48 20 0 16 47 27 23 3 11 39 43 51 44 8 24 28 40 12 32 15 9 13 45 1 19 10 31 7 17 21 33 37 42 46 22 5 29 41 49 18 36 26 25 4 +1 35 11 19 27 23 22 42 2 14 21 5 12 18 7 47 31 3 40 8 26 33 0 39 13 34 48 28 36 9 4 43 16 17 24 41 38 32 10 45 37 20 29 15 49 46 6 50 30 51 44 25 52 52 65 68 52 73 52 52 52 36 20 40 8 48 28 16 32 29 45 49 9 30 14 6 42 15 31 51 11 3 7 43 27 39 12 23 4 50 2 18 10 19 1 35 0 46 22 5 26 17 21 25 37 13 33 24 41 47 38 34 44 +38 7 28 14 23 49 9 40 10 13 22 17 42 47 21 30 3 27 16 18 25 11 51 1 50 36 8 19 46 34 24 29 32 39 15 12 43 6 26 31 33 41 37 0 2 20 5 44 35 4 48 45 52 52 52 52 diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/data/bridge/train.txt b/scenarios/bargaining/open_spiel/open_spiel/python/examples/data/bridge/train.txt new file mode 100644 index 0000000..621f16f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/data/bridge/train.txt @@ -0,0 +1,20 @@ +6 16 35 2 10 9 40 36 33 0 11 3 26 32 17 8 30 20 48 29 1 51 45 22 41 27 44 7 24 23 14 15 12 18 4 49 13 21 19 31 43 46 28 25 42 38 5 39 34 50 47 37 52 55 52 58 52 59 52 52 62 63 53 52 67 52 52 53 52 52 52 50 14 22 6 21 5 37 41 12 20 40 8 48 36 24 0 44 2 10 16 42 46 4 3 27 11 31 43 1 9 45 49 25 33 18 17 32 28 7 13 38 19 15 26 23 35 39 34 30 51 47 29 +41 35 38 46 4 49 2 13 31 37 40 7 20 10 43 33 12 47 34 51 25 17 48 14 15 45 39 26 19 16 9 28 8 29 18 6 30 27 42 5 36 0 50 23 24 32 22 44 3 11 21 1 52 56 57 64 52 52 52 30 10 50 6 42 46 20 0 1 25 45 21 11 39 51 19 33 41 49 9 37 40 13 4 17 2 5 8 29 18 14 3 32 48 28 12 38 26 15 27 34 7 31 35 22 23 24 16 43 44 36 47 +18 47 49 31 42 22 24 50 21 12 51 14 13 0 11 36 32 45 10 34 39 5 19 27 16 33 9 30 46 38 3 26 2 29 44 41 40 43 48 6 28 37 17 15 7 4 25 20 1 23 35 8 52 52 58 67 52 52 53 52 52 52 39 43 51 15 48 8 16 0 44 20 28 4 24 36 40 12 7 47 19 27 38 10 6 42 1 5 49 41 35 31 13 23 3 14 21 29 50 2 22 11 26 18 33 9 34 46 37 17 32 45 25 30 +47 38 20 13 40 39 32 43 25 29 8 28 24 37 7 45 31 16 51 4 19 41 30 21 15 23 17 14 9 36 34 44 12 27 5 2 26 22 42 35 0 11 48 33 6 49 50 18 1 10 3 46 52 52 59 52 52 52 2 6 38 42 8 4 40 16 15 23 51 35 48 28 0 36 20 44 12 22 21 1 37 5 10 50 14 26 32 13 24 29 7 43 47 11 9 49 17 33 39 3 18 19 41 30 45 25 46 31 27 34 +7 45 49 42 24 48 8 2 32 46 9 38 17 26 41 30 1 4 29 18 6 47 21 34 20 43 50 37 0 51 3 23 11 36 16 39 22 27 40 10 13 33 28 35 5 25 15 19 14 44 31 12 52 60 61 52 52 64 52 66 52 67 52 72 52 52 52 49 37 17 25 21 30 1 33 2 6 26 50 41 34 5 45 10 14 46 9 44 16 12 20 4 8 38 0 42 22 36 3 19 7 47 15 48 28 23 24 27 31 35 11 18 32 51 40 39 13 43 29 +22 20 17 49 45 28 51 3 46 11 44 33 2 12 41 34 27 15 42 8 4 18 7 5 43 31 19 39 16 9 26 23 37 24 29 14 38 30 25 0 13 6 40 32 48 47 50 10 36 1 35 21 59 52 60 52 62 52 84 52 52 52 9 17 49 13 5 37 1 25 4 20 40 0 44 8 16 12 29 21 45 18 36 24 7 32 48 28 19 3 2 6 42 10 41 33 27 11 50 14 22 30 26 34 46 15 38 31 35 23 43 47 51 39 +2 37 40 41 9 36 13 39 17 18 28 5 7 27 30 34 20 26 25 12 22 46 49 50 6 11 35 32 10 19 38 0 42 1 44 47 14 31 21 16 43 45 51 24 15 4 3 23 48 29 8 33 62 52 52 52 36 8 0 48 20 4 40 12 38 50 10 18 39 43 19 3 2 46 30 34 27 35 47 7 5 9 29 49 44 16 17 26 45 13 33 22 15 11 51 23 28 32 14 1 6 37 21 24 42 31 25 41 +5 37 32 17 50 9 8 7 43 28 19 26 29 30 40 44 2 46 23 36 15 47 33 38 35 45 21 1 24 18 10 13 25 16 4 31 11 14 39 20 51 48 0 27 41 22 6 49 34 42 12 3 58 62 52 52 52 39 3 51 47 24 16 12 36 26 2 46 10 14 6 38 50 25 9 21 49 7 15 22 23 42 19 20 34 28 0 44 5 1 29 37 33 48 4 13 41 45 8 17 11 18 32 27 35 30 40 31 43 +48 12 37 8 15 3 5 10 50 29 35 21 20 38 25 11 41 17 9 32 45 16 44 42 1 14 51 4 26 18 39 7 28 22 19 49 13 30 33 6 0 23 24 2 40 36 27 31 34 43 47 46 55 52 63 52 66 52 68 52 74 52 76 52 84 52 52 52 14 5 42 50 15 3 35 7 51 11 26 43 44 4 0 12 47 31 1 23 39 21 41 29 19 8 13 18 27 10 45 22 24 32 48 16 40 36 9 2 20 17 25 6 28 30 33 49 34 38 37 46 +51 19 16 11 23 49 47 9 31 15 50 32 8 30 43 5 22 3 10 14 36 35 7 24 34 21 44 20 13 46 18 37 2 48 26 29 42 40 17 4 39 28 25 27 12 1 38 45 41 33 6 0 52 56 57 61 67 52 72 52 52 52 45 13 21 17 27 39 19 7 2 30 50 14 43 11 23 3 47 0 31 15 38 9 22 46 48 16 4 8 49 25 5 41 28 44 20 12 6 24 34 35 36 40 18 32 10 29 42 1 51 33 26 37 +6 44 47 43 29 23 0 18 22 10 32 5 1 26 24 37 46 35 4 51 9 17 20 31 15 14 2 11 12 45 34 39 30 36 40 50 27 49 41 25 38 21 3 28 48 7 13 42 8 33 16 19 52 52 52 58 52 66 52 73 52 52 52 48 36 0 28 6 10 34 42 50 22 14 2 51 27 7 3 5 1 45 13 44 4 18 8 26 16 11 46 19 15 23 47 41 25 9 49 17 20 37 29 31 12 35 24 33 40 43 38 39 30 21 32 +11 12 33 36 16 44 27 38 21 14 18 29 10 39 8 32 6 23 47 15 26 13 49 51 20 37 43 22 2 35 48 24 25 30 3 1 19 40 42 0 46 17 4 5 9 41 31 50 28 34 45 7 52 52 58 52 52 52 36 16 12 4 0 28 40 48 8 24 20 44 41 49 1 9 42 38 2 14 18 50 6 30 5 21 37 45 47 7 11 23 31 15 19 35 34 33 22 46 25 13 27 29 3 51 10 39 32 26 17 43 +1 47 50 40 4 6 43 33 27 26 49 13 18 23 35 21 15 7 14 36 0 9 39 12 45 2 38 25 16 17 44 46 3 8 34 31 29 10 24 11 22 28 32 42 20 19 37 48 30 41 5 51 52 52 59 53 52 52 52 13 29 41 49 39 11 3 47 10 38 42 18 46 22 2 50 14 21 30 6 1 17 37 25 5 33 45 9 15 7 43 51 31 27 19 35 34 36 0 26 32 40 4 8 48 16 28 24 12 20 23 44 +25 34 3 33 19 29 10 20 27 6 0 38 35 31 15 18 16 44 48 36 39 45 50 40 9 47 8 43 11 12 49 14 22 21 41 23 1 4 13 51 30 37 46 32 28 26 2 5 17 42 7 24 52 56 59 53 63 52 52 52 26 46 14 22 0 32 16 4 33 1 21 41 48 24 28 12 3 23 39 47 45 49 5 9 8 20 11 44 25 29 13 18 31 7 43 19 51 27 6 15 38 30 34 50 2 36 17 42 37 10 40 35 +14 49 29 31 37 51 35 9 42 11 7 25 30 33 23 2 44 20 1 19 38 46 43 17 27 0 47 39 45 32 16 5 6 10 28 8 41 34 22 21 36 40 15 26 50 18 4 48 13 24 3 12 57 52 58 52 67 52 68 52 72 53 73 52 52 52 5 37 49 1 33 29 9 41 27 51 7 19 32 4 48 36 12 44 20 16 45 11 23 17 43 31 13 0 47 39 6 18 15 8 14 24 3 21 30 10 35 2 38 40 28 25 42 34 22 26 50 46 +8 15 48 44 46 3 34 35 2 16 33 10 5 21 28 6 36 17 49 29 38 22 25 19 12 31 41 4 9 32 20 50 30 39 51 42 37 0 40 27 13 24 18 1 43 7 14 26 45 11 23 47 52 52 59 62 66 52 67 52 69 52 52 52 10 46 22 14 8 16 40 44 50 2 21 18 47 43 15 51 49 29 45 17 20 4 36 0 5 3 41 1 25 6 9 7 33 26 37 11 13 31 34 19 12 24 48 42 28 27 30 32 39 23 35 38 +32 26 23 15 4 10 2 46 35 50 19 14 48 22 45 7 6 37 39 43 5 16 30 13 41 20 29 17 49 0 34 47 28 33 8 24 40 38 18 1 51 3 42 25 44 21 31 36 9 11 27 12 60 52 61 52 65 52 68 52 70 52 72 52 73 52 52 52 17 41 21 29 48 0 8 12 44 16 2 24 6 10 42 46 15 51 11 19 5 33 45 1 18 14 35 38 49 37 34 13 40 20 30 36 32 22 31 43 25 9 26 27 23 7 4 3 39 47 28 50 +51 21 47 45 1 23 42 50 48 35 0 19 34 22 32 13 36 28 24 25 5 2 7 4 30 39 10 44 12 3 26 6 38 11 16 46 43 29 40 31 17 8 15 33 41 14 20 49 37 18 27 9 59 52 60 61 52 52 65 52 69 52 52 52 29 7 45 1 49 5 21 0 33 41 28 15 30 22 10 46 31 43 3 47 32 4 12 8 16 44 48 11 36 2 20 9 34 14 42 50 6 38 18 26 17 23 24 25 19 51 35 27 37 39 40 13 +44 22 40 33 13 32 42 30 19 12 6 37 48 4 11 29 26 51 15 25 41 2 31 1 0 18 20 14 9 8 3 34 43 23 21 24 17 49 46 27 50 10 16 7 35 39 36 45 38 28 5 47 59 52 52 61 52 52 52 48 4 20 24 44 8 16 1 33 17 49 21 32 40 29 0 7 19 39 15 51 3 27 35 23 11 47 43 34 38 2 42 31 14 26 12 36 37 41 28 9 10 5 45 25 13 18 6 30 50 22 46 +22 10 33 40 47 16 20 50 7 5 1 36 34 8 31 45 2 38 37 35 44 12 21 27 30 14 29 4 39 32 23 28 51 46 25 48 41 0 49 13 42 18 3 15 11 24 43 26 6 9 17 19 52 52 52 52 diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/data/hearts/test.txt b/scenarios/bargaining/open_spiel/open_spiel/python/examples/data/hearts/test.txt new file mode 100644 index 0000000..c1c809a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/data/hearts/test.txt @@ -0,0 +1,5 @@ +1 37 12 31 39 30 16 3 20 43 40 47 49 17 35 6 33 7 24 46 22 45 27 11 4 38 14 1 0 19 2 51 36 5 29 50 42 28 13 10 8 9 34 18 44 21 26 25 32 41 23 15 48 43 37 38 27 29 26 1 50 18 39 33 48 0 48 40 51 39 43 47 50 10 42 30 38 8 28 24 46 19 35 31 49 14 6 22 45 1 41 37 29 7 23 15 44 2 26 18 9 25 4 21 13 27 36 33 34 11 32 17 16 3 20 5 12 +3 45 22 5 12 0 27 26 14 29 4 19 42 51 31 9 23 18 28 40 41 35 50 33 43 8 20 48 46 15 16 38 39 32 37 34 13 1 36 44 47 2 30 7 10 25 17 6 21 3 49 24 11 51 3 18 49 36 50 48 40 26 13 12 46 0 48 44 51 31 19 47 35 43 15 27 7 41 29 37 13 21 49 17 9 1 40 33 42 24 18 8 20 12 14 36 28 2 30 6 10 26 46 39 50 32 16 38 23 45 22 5 11 25 4 34 3 +1 42 15 31 2 20 8 26 30 36 51 17 43 33 14 19 7 27 46 38 28 10 35 6 44 3 16 45 49 48 50 24 25 29 4 41 9 13 5 0 39 37 18 1 34 11 21 32 47 40 12 23 22 48 36 10 51 50 14 45 0 26 44 28 34 0 44 48 32 35 51 47 27 31 43 11 15 7 3 46 23 24 39 40 36 34 10 14 30 42 18 38 26 37 5 1 9 33 21 41 25 6 2 28 16 17 49 29 12 22 20 8 50 19 45 13 4 +0 49 16 21 13 6 32 35 38 4 9 30 22 46 10 48 41 7 45 43 11 3 36 15 28 2 8 29 24 26 19 1 5 31 27 14 0 17 37 18 12 33 42 39 23 20 51 47 44 50 34 25 40 0 4 8 48 15 11 7 51 27 39 23 31 1 41 33 9 13 49 45 29 3 19 47 5 25 44 17 37 16 30 40 20 38 6 34 18 22 2 10 14 28 26 36 21 42 35 24 50 46 32 43 12 +0 5 43 29 33 17 45 51 6 22 42 2 34 8 15 48 20 27 21 14 49 31 11 35 0 10 1 40 4 26 46 13 50 19 23 7 36 28 44 30 41 39 24 37 16 25 9 38 12 3 18 47 32 0 28 24 48 35 41 31 23 7 50 39 15 27 11 51 33 13 49 25 21 34 26 18 14 36 8 44 40 9 37 6 17 2 32 22 46 42 38 20 10 1 29 16 5 30 12 19 45 47 4 3 43 diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/data/hearts/train.txt b/scenarios/bargaining/open_spiel/open_spiel/python/examples/data/hearts/train.txt new file mode 100644 index 0000000..0a790f4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/data/hearts/train.txt @@ -0,0 +1,20 @@ +2 29 46 7 28 3 41 4 51 42 2 10 49 25 20 37 22 40 27 43 44 6 1 9 32 38 24 31 15 12 23 21 35 34 19 50 45 5 13 39 33 14 8 30 47 18 0 48 36 17 26 16 11 3 17 12 19 20 26 21 50 10 49 32 22 0 48 44 40 43 51 50 27 33 29 1 9 45 5 41 37 35 25 23 7 20 42 32 16 22 30 26 18 39 19 21 46 12 36 38 24 15 34 49 3 11 14 2 31 4 28 10 8 47 6 13 17 +0 8 23 26 2 18 5 48 28 29 20 32 21 34 40 9 19 7 1 24 3 50 43 36 15 49 16 45 22 47 38 35 30 25 4 33 37 46 0 12 27 17 10 42 44 6 41 31 14 39 13 11 51 0 48 28 8 9 21 49 41 29 13 33 37 19 7 23 35 45 51 25 5 12 44 47 20 27 39 43 31 1 42 14 17 6 10 26 2 11 3 50 38 36 30 18 16 32 22 46 4 24 15 34 40 +1 37 34 15 8 4 49 27 46 36 11 9 14 35 51 21 48 10 5 29 26 19 43 40 1 13 23 42 45 30 6 0 17 22 18 12 32 33 2 16 41 25 24 47 3 28 7 50 20 39 38 31 44 35 28 4 43 7 6 16 0 42 48 44 32 0 48 28 40 39 51 47 3 35 43 45 19 15 46 25 23 24 12 20 44 36 4 50 16 37 49 29 17 5 21 41 33 26 22 2 6 14 10 38 31 18 9 42 30 1 13 11 27 32 34 7 8 +1 8 28 51 20 49 43 15 42 34 36 14 27 44 21 19 12 35 30 6 7 25 18 22 48 2 40 24 29 17 26 16 41 1 46 32 50 23 3 9 11 45 38 10 4 13 31 33 0 47 37 5 39 47 45 25 28 46 38 51 9 6 0 50 42 0 40 32 48 51 35 47 19 39 23 43 15 30 14 6 50 44 36 16 20 8 37 28 12 33 29 49 25 1 45 5 41 21 46 9 17 3 38 27 2 4 42 26 24 10 11 34 18 13 31 22 7 +1 11 35 10 31 36 41 25 48 23 18 6 7 15 40 51 45 37 19 33 32 29 0 47 4 17 2 39 20 44 22 30 21 42 13 1 8 50 28 24 12 16 46 5 9 3 26 38 14 34 43 49 27 11 50 42 43 28 18 30 10 6 7 48 14 0 28 32 48 23 35 51 31 47 27 15 19 43 45 7 11 24 12 44 40 16 13 49 20 8 36 41 39 37 50 33 21 29 46 25 9 14 2 38 30 18 10 34 26 3 42 5 4 17 22 1 6 +0 9 35 49 25 3 50 29 12 23 4 19 20 17 1 33 14 36 46 0 51 2 37 13 15 38 32 47 45 30 39 31 18 24 28 11 16 7 5 48 42 26 10 41 27 44 40 43 6 8 22 34 21 0 20 44 40 24 4 48 16 47 27 7 39 31 15 23 35 5 49 25 9 43 51 3 50 45 17 37 41 21 2 1 33 34 6 30 10 29 42 8 46 13 18 38 22 19 14 26 32 11 12 36 28 +1 46 37 24 22 39 18 16 8 9 30 7 21 41 50 4 5 11 40 28 20 27 47 25 3 10 34 26 15 31 6 49 19 48 43 1 35 13 17 14 38 29 45 32 42 51 33 44 23 0 36 12 2 0 46 10 30 18 6 1 44 32 38 22 2 0 28 44 48 51 47 7 35 39 43 49 23 40 4 20 13 45 25 21 29 37 24 5 9 36 16 32 41 33 12 1 31 17 18 42 38 10 6 8 22 2 50 30 19 46 26 3 27 34 14 15 11 +2 7 12 16 23 11 28 2 18 46 22 40 47 51 33 9 31 49 50 19 14 15 17 1 3 4 8 30 42 48 0 39 21 29 25 20 44 37 34 32 35 5 13 43 45 41 27 36 38 10 6 26 24 51 46 10 17 13 28 43 20 26 47 45 38 0 40 44 48 43 47 39 35 25 1 21 29 5 45 9 17 27 19 23 15 33 51 13 49 11 50 46 3 7 38 16 31 18 26 22 10 4 12 36 28 32 24 20 8 2 14 41 6 42 37 34 30 +2 20 25 44 10 42 24 18 4 23 34 46 19 11 17 5 38 15 49 30 14 48 13 7 35 36 51 1 28 27 32 47 3 21 50 43 33 12 16 2 26 22 37 6 41 8 31 29 39 45 40 9 0 45 48 22 51 32 50 47 43 9 4 0 38 0 48 32 36 7 51 47 31 39 43 49 45 12 24 44 28 22 50 42 38 26 9 34 6 16 2 35 8 4 46 19 20 21 37 29 41 14 27 40 30 5 33 23 25 10 15 17 18 1 3 11 13 +2 3 41 15 45 23 22 12 48 49 43 11 18 13 6 2 20 30 25 19 14 37 47 17 31 50 46 35 24 38 32 42 9 1 44 36 33 16 28 10 40 34 0 5 8 39 51 26 7 29 27 21 4 50 34 30 51 43 32 12 26 2 45 48 24 0 36 40 16 51 39 47 35 43 23 27 19 7 3 48 15 34 18 26 46 45 21 9 29 28 50 20 12 24 42 8 38 41 17 33 49 1 25 5 32 6 10 14 2 4 37 44 30 22 11 31 13 +2 2 31 44 17 45 34 19 24 10 21 43 20 39 18 6 11 47 46 30 42 12 25 4 23 35 9 15 51 48 7 22 38 27 40 0 33 32 50 13 26 8 37 1 36 14 16 29 3 5 28 49 41 47 12 8 50 34 18 0 30 22 51 42 38 0 40 44 36 47 23 39 51 31 43 11 35 49 33 5 37 19 3 27 7 45 25 29 41 22 46 6 26 9 1 17 48 18 14 42 12 28 8 24 32 30 38 4 34 16 13 20 10 50 2 21 15 +2 43 14 4 15 48 47 34 31 32 36 5 18 1 42 24 33 44 50 12 30 16 39 8 45 9 0 7 46 21 29 26 37 10 17 41 2 13 40 35 6 11 38 28 19 3 22 25 27 51 20 49 23 43 48 16 47 40 36 24 8 4 46 30 6 0 48 40 44 43 47 3 39 36 32 20 28 45 13 29 49 16 37 24 22 11 30 7 31 33 1 17 41 34 2 10 14 5 18 21 6 8 50 12 27 26 23 51 46 42 25 19 4 38 35 15 9 +1 23 11 45 21 31 20 7 5 46 15 4 17 51 25 16 50 37 36 19 12 29 1 44 22 24 18 40 32 48 41 2 39 26 33 42 30 34 38 8 6 9 3 47 27 49 0 43 10 14 13 28 35 51 31 37 0 38 18 43 40 28 17 50 30 0 40 48 36 23 51 47 43 15 19 39 50 21 49 25 45 34 31 18 22 30 20 42 10 44 32 24 41 16 28 46 37 35 9 11 7 6 26 33 2 14 1 38 12 8 27 29 13 4 5 17 3 +3 21 45 12 2 22 20 28 5 14 6 44 13 19 11 24 49 41 4 17 16 29 51 31 32 50 48 18 35 36 0 40 23 46 15 10 7 47 43 34 39 25 38 1 8 33 9 30 26 37 27 42 3 36 46 22 51 43 0 28 42 34 49 13 32 0 48 44 36 27 31 39 51 47 15 49 35 43 11 17 23 19 28 40 7 50 6 18 46 14 42 30 26 20 12 16 41 4 32 8 37 10 2 33 38 45 1 5 29 9 13 22 25 21 34 24 3 +0 34 4 11 20 25 39 31 38 32 43 44 10 18 14 36 42 13 0 3 35 28 37 5 49 47 16 29 41 21 23 45 50 24 1 40 22 15 8 7 26 48 9 33 19 51 17 27 30 12 6 2 46 0 44 20 48 51 43 31 19 13 17 5 49 10 18 14 2 34 6 40 30 15 39 11 35 37 33 41 25 50 47 8 36 46 12 16 45 42 32 4 29 38 28 9 7 26 24 1 3 22 21 23 27 +2 17 18 4 45 24 30 6 44 51 7 10 28 23 41 39 22 12 37 46 0 38 36 26 29 42 32 40 50 20 16 49 48 8 19 14 27 34 25 3 2 15 31 43 47 11 1 21 13 9 5 35 33 51 20 42 36 32 16 43 40 46 44 22 2 0 40 44 20 31 51 47 43 49 33 17 5 4 16 24 19 46 30 42 50 48 12 25 3 13 9 41 21 7 39 27 23 35 45 15 37 14 29 38 2 8 22 26 36 32 34 18 10 28 11 1 6 +2 29 12 2 51 23 20 46 6 27 38 33 22 31 15 8 14 21 35 43 19 50 3 24 28 1 0 48 5 17 25 4 40 13 34 47 10 36 41 18 16 11 42 30 9 26 44 45 7 37 49 32 39 1 36 50 49 44 38 47 48 32 51 7 40 0 36 44 48 47 51 43 39 42 2 38 26 35 50 19 31 40 24 28 32 25 45 49 37 22 29 34 30 12 8 16 27 14 11 20 46 33 9 21 41 15 18 10 23 17 7 1 5 13 3 4 6 +3 44 45 24 34 10 40 9 12 19 47 17 18 51 1 38 43 36 22 39 28 8 0 2 48 14 13 33 31 46 30 23 16 50 3 26 11 25 37 4 27 21 35 20 49 5 29 7 41 32 42 6 15 51 50 10 47 13 40 33 17 9 43 49 28 0 28 48 44 51 47 35 43 15 19 3 39 23 31 46 42 16 8 9 24 38 34 14 30 49 41 25 1 6 18 40 22 17 2 50 13 45 26 27 21 37 4 10 5 33 20 12 36 29 7 11 32 +0 19 43 11 26 40 21 42 16 44 15 51 25 48 50 17 27 23 29 41 33 0 36 39 47 9 6 5 20 3 30 8 32 28 31 4 49 34 22 46 7 10 45 1 2 14 37 18 38 24 13 35 12 0 36 8 32 29 41 33 9 39 27 23 31 11 47 3 43 26 34 30 18 48 50 4 20 14 6 46 38 17 25 44 45 22 42 2 10 5 49 40 37 16 28 21 1 24 13 51 12 19 15 35 7 +2 38 49 29 50 51 10 21 34 24 31 42 33 28 20 5 11 39 12 47 37 9 32 18 40 15 43 25 22 36 1 2 16 45 17 23 48 4 0 8 30 41 6 35 27 3 44 46 13 14 26 7 19 51 41 4 43 44 12 47 46 2 48 40 30 0 8 44 36 43 47 31 23 39 10 35 27 14 6 18 50 37 45 49 41 48 4 16 28 30 42 34 38 29 33 9 17 22 2 26 5 1 25 13 46 21 12 24 40 7 11 3 32 19 15 20 51 diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/deep_cfr_jax.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/deep_cfr_jax.py new file mode 100644 index 0000000..16104ca --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/deep_cfr_jax.py @@ -0,0 +1,73 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python Deep CFR example.""" + +from absl import app +from absl import flags +from absl import logging + +from open_spiel.python import policy +from open_spiel.python.algorithms import expected_game_score +from open_spiel.python.algorithms import exploitability +from open_spiel.python.jax import deep_cfr +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("num_iterations", 100, "Number of iterations") +flags.DEFINE_integer("num_traversals", 1500, "Number of traversals/games") +flags.DEFINE_string("game_name", "leduc_poker", "Name of the game") + + +def main(unused_argv): + logging.info("Loading %s", FLAGS.game_name) + game = pyspiel.load_game(FLAGS.game_name) + deep_cfr_solver = deep_cfr.DeepCFRSolver( + game, + policy_network_layers=(64, 64, 64), + advantage_network_layers=(64, 64, 64), + num_iterations=FLAGS.num_iterations, + num_traversals=FLAGS.num_traversals, + learning_rate=1e-3, + batch_size_advantage=2048, + batch_size_strategy=2048, + memory_capacity=1e7, + policy_network_train_steps=5000, + advantage_network_train_steps=750, + reinitialize_advantage_networks=True) + _, advantage_losses, policy_loss = deep_cfr_solver.solve() + for player, losses in advantage_losses.items(): + logging.info("Advantage for player %d: %s", player, + losses[:2] + ["..."] + losses[-2:]) + logging.info("Advantage Buffer Size for player %s: '%s'", player, + len(deep_cfr_solver.advantage_buffers[player])) + logging.info("Strategy Buffer Size: '%s'", + len(deep_cfr_solver.strategy_buffer)) + logging.info("Final policy loss: '%s'", policy_loss) + + average_policy = policy.tabular_policy_from_callable( + game, deep_cfr_solver.action_probabilities) + + conv = exploitability.nash_conv(game, average_policy) + logging.info("Deep CFR in '%s' - NashConv: %s", FLAGS.game_name, conv) + + average_policy_values = expected_game_score.policy_value( + game.new_initial_state(), [average_policy] * 2) + print("Computed player 0 value: {}".format(average_policy_values[0])) + print("Computed player 1 value: {}".format(average_policy_values[1])) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/deep_cfr_pytorch.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/deep_cfr_pytorch.py new file mode 100644 index 0000000..1104e2c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/deep_cfr_pytorch.py @@ -0,0 +1,73 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python Deep CFR example.""" + +from absl import app +from absl import flags +from absl import logging + +from open_spiel.python import policy +from open_spiel.python.algorithms import expected_game_score +import pyspiel +from open_spiel.python.pytorch import deep_cfr + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("num_iterations", 400, "Number of iterations") +flags.DEFINE_integer("num_traversals", 40, "Number of traversals/games") +flags.DEFINE_string("game_name", "kuhn_poker", "Name of the game") + + +def main(unused_argv): + logging.info("Loading %s", FLAGS.game_name) + game = pyspiel.load_game(FLAGS.game_name) + + deep_cfr_solver = deep_cfr.DeepCFRSolver( + game, + policy_network_layers=(32, 32), + advantage_network_layers=(16, 16), + num_iterations=FLAGS.num_iterations, + num_traversals=FLAGS.num_traversals, + learning_rate=1e-3, + batch_size_advantage=None, + batch_size_strategy=None, + memory_capacity=int(1e7)) + + _, advantage_losses, policy_loss = deep_cfr_solver.solve() + for player, losses in advantage_losses.items(): + logging.info("Advantage for player %d: %s", player, + losses[:2] + ["..."] + losses[-2:]) + logging.info("Advantage Buffer Size for player %s: '%s'", player, + len(deep_cfr_solver.advantage_buffers[player])) + logging.info("Strategy Buffer Size: '%s'", + len(deep_cfr_solver.strategy_buffer)) + logging.info("Final policy loss: '%s'", policy_loss) + + average_policy = policy.tabular_policy_from_callable( + game, deep_cfr_solver.action_probabilities) + pyspiel_policy = policy.python_policy_to_pyspiel_policy(average_policy) + conv = pyspiel.nash_conv(game, pyspiel_policy) + logging.info("Deep CFR in '%s' - NashConv: %s", FLAGS.game_name, conv) + + average_policy_values = expected_game_score.policy_value( + game.new_initial_state(), [average_policy] * 2) + logging.info("Computed player 0 value: %.2f (expected: %.2f).", + average_policy_values[0], -1 / 18) + logging.info("Computed player 1 value: %.2f (expected: %.2f).", + average_policy_values[1], 1 / 18) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/deep_cfr_tf2.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/deep_cfr_tf2.py new file mode 100644 index 0000000..ce98652 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/deep_cfr_tf2.py @@ -0,0 +1,75 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python Deep CFR example.""" + +from absl import app +from absl import flags +from absl import logging + +from open_spiel.python import policy +from open_spiel.python.algorithms import deep_cfr_tf2 +from open_spiel.python.algorithms import expected_game_score +from open_spiel.python.algorithms import exploitability +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("num_iterations", 100, "Number of iterations") +flags.DEFINE_integer("num_traversals", 150, "Number of traversals/games") +flags.DEFINE_string("game_name", "leduc_poker", "Name of the game") + + +def main(unused_argv): + logging.info("Loading %s", FLAGS.game_name) + game = pyspiel.load_game(FLAGS.game_name) + deep_cfr_solver = deep_cfr_tf2.DeepCFRSolver( + game, + policy_network_layers=(64, 64, 64, 64), + advantage_network_layers=(64, 64, 64, 64), + num_iterations=FLAGS.num_iterations, + num_traversals=FLAGS.num_traversals, + learning_rate=1e-3, + batch_size_advantage=2048, + batch_size_strategy=2048, + memory_capacity=1e6, + policy_network_train_steps=5000, + advantage_network_train_steps=500, + reinitialize_advantage_networks=True, + infer_device="cpu", + train_device="cpu") + _, advantage_losses, policy_loss = deep_cfr_solver.solve() + for player, losses in advantage_losses.items(): + logging.info("Advantage for player %d: %s", player, + losses[:2] + ["..."] + losses[-2:]) + logging.info("Advantage Buffer Size for player %s: '%s'", player, + len(deep_cfr_solver.advantage_buffers[player])) + logging.info("Strategy Buffer Size: '%s'", + len(deep_cfr_solver.strategy_buffer)) + logging.info("Final policy loss: '%s'", policy_loss) + + average_policy = policy.tabular_policy_from_callable( + game, deep_cfr_solver.action_probabilities) + + conv = exploitability.nash_conv(game, average_policy) + logging.info("Deep CFR in '%s' - NashConv: %s", FLAGS.game_name, conv) + + average_policy_values = expected_game_score.policy_value( + game.new_initial_state(), [average_policy] * 2) + print("Computed player 0 value: {}".format(average_policy_values[0])) + print("Computed player 1 value: {}".format(average_policy_values[1])) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/discounted_cfr.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/discounted_cfr.py new file mode 100644 index 0000000..2050a5b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/discounted_cfr.py @@ -0,0 +1,48 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example use of the CFR algorithm on Kuhn Poker.""" + +from absl import app +from absl import flags + +from open_spiel.python.algorithms import discounted_cfr +from open_spiel.python.algorithms import exploitability +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("iterations", 500, "Number of iterations") +flags.DEFINE_string( + "game", + "turn_based_simultaneous_game(game=goofspiel(imp_info=True,num_cards=4,players=2,points_order=descending))", + "Name of the game") +flags.DEFINE_integer("players", 2, "Number of players") +flags.DEFINE_integer("print_freq", 10, "How often to print the exploitability") + + +def main(_): + game = pyspiel.load_game(FLAGS.game) + discounted_cfr_solver = discounted_cfr.DCFRSolver(game) + + for i in range(FLAGS.iterations): + discounted_cfr_solver.evaluate_and_update_policy() + if i % FLAGS.print_freq == 0: + conv = exploitability.exploitability( + game, discounted_cfr_solver.average_policy()) + print("Iteration {} exploitability {}".format(i, conv)) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/dots_and_boxes_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/dots_and_boxes_example.py new file mode 100644 index 0000000..4968aa4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/dots_and_boxes_example.py @@ -0,0 +1,96 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Contributed by Wannes Meert, Giuseppe Marra, and Pieter Robberechts +# for the KU Leuven course Machine Learning: Project. + + +"""Python spiel example.""" + +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python.bots import human +from open_spiel.python.bots import uniform_random +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("seed", 12761381, "The seed to use for the RNG.") + +# Supported types of players: "random", "human" +flags.DEFINE_string("player0", "random", "Type of the agent for player 0.") +flags.DEFINE_string("player1", "random", "Type of the agent for player 1.") + + +def LoadAgent(agent_type, player_id, rng): + """Return a bot based on the agent type.""" + if agent_type == "random": + return uniform_random.UniformRandomBot(player_id, rng) + elif agent_type == "human": + return human.HumanBot() + else: + raise RuntimeError("Unrecognized agent type: {}".format(agent_type)) + + +def main(_): + rng = np.random.RandomState(FLAGS.seed) + games_list = pyspiel.registered_names() + assert "dots_and_boxes" in games_list + + game_string = "dots_and_boxes(num_rows=2,num_cols=2)" + print("Creating game: {}".format(game_string)) + game = pyspiel.load_game(game_string) + + agents = [ + LoadAgent(FLAGS.player0, 0, rng), + LoadAgent(FLAGS.player1, 1, rng), + ] + + state = game.new_initial_state() + + # Print the initial state + print("INITIAL STATE") + print(str(state)) + + while not state.is_terminal(): + current_player = state.current_player() + # Decision node: sample action for the single current player + legal_actions = state.legal_actions() + for action in legal_actions: + print( + "Legal action: {} ({})".format( + state.action_to_string(current_player, action), action + ) + ) + action = agents[current_player].step(state) + action_string = state.action_to_string(current_player, action) + print("Player ", current_player, ", chose action: ", action_string) + state.apply_action(action) + + print("") + print("NEXT STATE:") + print(str(state)) + if not state.is_terminal(): + print(str(state.observation_tensor())) + + # Game is now done. Print utilities for each player + returns = state.returns() + for pid in range(game.num_players()): + print("Utility for player {} is {}".format(pid, returns[pid])) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/eva.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/eva.py new file mode 100644 index 0000000..70f9f06 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/eva.py @@ -0,0 +1,90 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Ephemeral Value Adjustment example: https://arxiv.org/abs/1810.08163.""" + +from absl import app +from absl import flags +from absl import logging + +import tensorflow.compat.v1 as tf + +from open_spiel.python import policy +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import eva +from open_spiel.python.algorithms import exploitability +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("num_episodes", 1000, "Number of iterations") +flags.DEFINE_string("game_name", "kuhn_poker", "Name of the game") + + +class JointPolicy(policy.Policy): + """Joint policy to be evaluated.""" + + def __init__(self, agents): + self._agents = agents + + def action_probabilities(self, state, player_id=None): + cur_player = state.current_player() + return self._agents[cur_player].action_probabilities(state) + + +def main(unused_argv): + logging.info("Loading %s", FLAGS.game_name) + env = rl_environment.Environment(FLAGS.game_name) + num_players = env.num_players + num_actions = env.action_spec()["num_actions"] + state_size = env.observation_spec()["info_state"][0] + eva_agents = [] + with tf.Session() as sess: + for player in range(num_players): + eva_agents.append( + eva.EVAAgent( + sess, + env, + player, + state_size, + num_actions, + embedding_network_layers=(64, 32), + embedding_size=12, + learning_rate=1e-4, + mixing_parameter=0.5, + memory_capacity=int(1e6), + discount_factor=1.0, + epsilon_start=1.0, + epsilon_end=0.1, + epsilon_decay_duration=int(1e6))) + sess.run(tf.global_variables_initializer()) + for _ in range(FLAGS.num_episodes): + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + current_agent = eva_agents[current_player] + step_out = current_agent.step(time_step) + time_step = env.step([step_out.action]) + + for agent in eva_agents: + agent.step(time_step) + + game = pyspiel.load_game(FLAGS.game_name) + joint_policy = JointPolicy(eva_agents) + conv = exploitability.nash_conv(game, joint_policy) + logging.info("EVA in '%s' - NashConv: %s", FLAGS.game_name, conv) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/example.py new file mode 100644 index 0000000..2be92ff --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/example.py @@ -0,0 +1,89 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python spiel example.""" + +import random +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python import games # pylint: disable=unused-import +import pyspiel + +FLAGS = flags.FLAGS + +# Game strings can just contain the name or the name followed by parameters +# and arguments, e.g. "breakthrough(rows=6,columns=6)" +flags.DEFINE_string("game_string", "tic_tac_toe", "Game string") + + +def main(_): + games_list = pyspiel.registered_games() + print("Registered games:") + print(games_list) + + action_string = None + + print("Creating game: " + FLAGS.game_string) + game = pyspiel.load_game(FLAGS.game_string) + + # Create the initial state + state = game.new_initial_state() + + # Print the initial state + print(str(state)) + + while not state.is_terminal(): + # The state can be three different types: chance node, + # simultaneous node, or decision node + if state.is_chance_node(): + # Chance node: sample an outcome + outcomes = state.chance_outcomes() + num_actions = len(outcomes) + print("Chance node, got " + str(num_actions) + " outcomes") + action_list, prob_list = zip(*outcomes) + action = np.random.choice(action_list, p=prob_list) + print("Sampled outcome: ", + state.action_to_string(state.current_player(), action)) + state.apply_action(action) + elif state.is_simultaneous_node(): + # Simultaneous node: sample actions for all players. + random_choice = lambda a: np.random.choice(a) if a else [0] + chosen_actions = [ + random_choice(state.legal_actions(pid)) + for pid in range(game.num_players()) + ] + print("Chosen actions: ", [ + state.action_to_string(pid, action) + for pid, action in enumerate(chosen_actions) + ]) + state.apply_actions(chosen_actions) + else: + # Decision node: sample action for the single current player + action = random.choice(state.legal_actions(state.current_player())) + action_string = state.action_to_string(state.current_player(), action) + print("Player ", state.current_player(), ", randomly sampled action: ", + action_string) + state.apply_action(action) + print(str(state)) + + # Game is now done. Print utilities for each player + returns = state.returns() + for pid in range(game.num_players()): + print("Utility for player {} is {}".format(pid, returns[pid])) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/exploitability_descent.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/exploitability_descent.py new file mode 100644 index 0000000..75cdce8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/exploitability_descent.py @@ -0,0 +1,104 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python Exploitability Descent example. + +This example uses a neural network to approximate the policy. For a simple +tabular example, see the unit tests for the exploitability_descent algorithm: + +``` + solver = exploitability_descent.Solver(game) + with tf.Session() as session: + for step in range(num_steps): + nash_conv = solver.Step(session, learning_rate) +``` + +""" + +import time + +from absl import app +from absl import flags +from absl import logging + +import numpy as np +import tensorflow.compat.v1 as tf + +from open_spiel.python.algorithms import exploitability_descent +import pyspiel + +# Temporarily disable TF2 until we update the code. +tf.disable_v2_behavior() + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("num_steps", 10, "Number of iterations") +flags.DEFINE_string("game_name", "kuhn_poker", "Name of the game") +flags.DEFINE_integer("print_freq", 100, "Log progress every this many steps") +flags.DEFINE_float("init_lr", 0.1, "The initial learning rate") +flags.DEFINE_float("regularizer_scale", 0.001, + "Scale for L2 regularization of NN weights") +flags.DEFINE_integer("num_hidden", 64, "Hidden units.") +flags.DEFINE_integer("num_layers", 1, "Hidden layers.") + + +def main(argv): + del argv + + # Create the game to use, and a loss calculator for it + logging.info("Loading %s", FLAGS.game_name) + game = pyspiel.load_game(FLAGS.game_name) + loss_calculator = exploitability_descent.LossCalculator(game) + + # Build the network + num_hidden = FLAGS.num_hidden + num_layers = FLAGS.num_layers + layer = tf.constant(loss_calculator.tabular_policy.state_in, tf.float64) + for _ in range(num_layers): + regularizer = (tf.keras.regularizers.l2(l=FLAGS.regularizer_scale)) + layer = tf.layers.dense( + layer, num_hidden, activation=tf.nn.relu, + kernel_regularizer=regularizer) + regularizer = (tf.keras.regularizers.l2(l=FLAGS.regularizer_scale)) + layer = tf.layers.dense( + layer, game.num_distinct_actions(), kernel_regularizer=regularizer) + tabular_policy = loss_calculator.masked_softmax(layer) + + # Build the loss - exploitability descent loss plus regularizer loss + nash_conv, loss = loss_calculator.loss(tabular_policy) + loss += tf.losses.get_regularization_loss() + + # Use a simple gradient descent optimizer + learning_rate = tf.placeholder(tf.float64, (), name="learning_rate") + optimizer = tf.train.GradientDescentOptimizer(learning_rate) + optimizer_step = optimizer.minimize(loss) + + # Training loop + with tf.train.MonitoredTrainingSession() as sess: + for step in range(FLAGS.num_steps): + t0 = time.time() + nash_conv_value, _ = sess.run( + [nash_conv, optimizer_step], + feed_dict={ + learning_rate: FLAGS.init_lr / np.sqrt(1 + step), + }) + t1 = time.time() + # Optionally log our progress + if step % FLAGS.print_freq == 0: + logging.info("step=%d nash_conv=%g time per step=%.4f", step, + nash_conv_value, t1 - t0) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/fictitious_play_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/fictitious_play_example.py new file mode 100644 index 0000000..7774196 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/fictitious_play_example.py @@ -0,0 +1,45 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python XFP example.""" + +import sys +from absl import app +from absl import flags + +from open_spiel.python.algorithms import exploitability +from open_spiel.python.algorithms import fictitious_play +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("iterations", 100, "Number of iterations") +flags.DEFINE_string("game", "leduc_poker", "Name of the game") +flags.DEFINE_integer("players", 2, "Number of players") +flags.DEFINE_integer("print_freq", 10, "How often to print the exploitability") + + +def main(_): + game = pyspiel.load_game(FLAGS.game, {"players": FLAGS.players}) + xfp_solver = fictitious_play.XFPSolver(game) + for i in range(FLAGS.iterations): + xfp_solver.iteration() + conv = exploitability.exploitability(game, xfp_solver.average_policy()) + if i % FLAGS.print_freq == 0: + print("Iteration: {} Conv: {}".format(i, conv)) + sys.stdout.flush() + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/gambit_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/gambit_example.py new file mode 100644 index 0000000..70f508c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/gambit_example.py @@ -0,0 +1,53 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Export game in gambit .efg format.""" + +from absl import app +from absl import flags +from absl import logging + +from open_spiel.python.algorithms.gambit import export_gambit +import pyspiel + +FLAGS = flags.FLAGS +flags.DEFINE_string("game", "kuhn_poker", "Name of the game") +flags.DEFINE_string("out", "/tmp/gametree.efg", "Name of output file, e.g., " + "[*.efg].") +flags.DEFINE_boolean("print", False, "Print the tree to stdout " + "instead of saving to file.") + + +def main(argv): + del argv + + game = pyspiel.load_game(FLAGS.game) + game_type = game.get_type() + + if game_type.dynamics == pyspiel.GameType.Dynamics.SIMULTANEOUS: + logging.warn("%s is not turn-based. Trying to reload game as turn-based.", + FLAGS.game) + game = pyspiel.load_game_as_turn_based(FLAGS.game) + + gametree = export_gambit(game) # use default decorators + if FLAGS.print: + print(gametree) + else: + with open(FLAGS.out, "w") as f: + f.write(gametree) + logging.info("Game tree for %s saved to file: %s", FLAGS.game, FLAGS.out) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/game_tree_traversal_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/game_tree_traversal_example.py new file mode 100644 index 0000000..3746fa2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/game_tree_traversal_example.py @@ -0,0 +1,88 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example to traverse an entire game tree.""" + +from absl import app +from absl import flags + +from open_spiel.python import games # pylint: disable=unused-import +import pyspiel + +_GAME_STRING = flags.DEFINE_string( + "game_string", "tic_tac_toe", "Name of the game" +) + + +class GameStats: + num_states: int = 0 + num_chance_nodes: int = 0 + num_decision_nodes: int = 0 + num_simultaneous_nodes: int = 0 + num_terminals: int = 0 + info_state_dict: dict[str, list[int]] = {} + + def __str__(self): + return (f"Number of states {self.num_states} \n" + + f"Number of chance nodes {self.num_chance_nodes} \n" + + f"Number of decision nodes {self.num_decision_nodes} \n" + + f"Number of simultaneous nodes {self.num_simultaneous_nodes} \n" + + f"Number of terminals {self.num_terminals} \n") + + +def traverse_game_tree(game: pyspiel.Game, + state: pyspiel.State, + game_stats: GameStats): + """Traverses the game tree, collecting information about the game.""" + + if state.is_terminal(): + game_stats.num_terminals += 1 + elif state.is_chance_node(): + game_stats.num_chance_nodes += 1 + for outcome in state.legal_actions(): + child = state.child(outcome) + traverse_game_tree(game, child, game_stats) + elif state.is_simultaneous_node(): + game_stats.num_simultaneous_nodes += 1 + # Using joint actions for convenience. Can use legal_actions(player) to + # and state.apply_actions when walking over individual players + for joint_action in state.legal_actions(): + child = state.child(joint_action) + traverse_game_tree(game, child, game_stats) + else: + game_stats.num_decision_nodes += 1 + legal_actions = state.legal_actions() + if game.get_type().provides_information_state_string: + game_stats.info_state_dict[ + state.information_state_string()] = legal_actions + for action in state.legal_actions(): + # print(f"Decision node: \n {state}") + # print(f"Taking action {action} ({state.action_to_string(action)}") + child = state.child(action) + traverse_game_tree(game, child, game_stats) + + +def main(_): + game = pyspiel.load_game(_GAME_STRING.value) + game_stats = GameStats() + state = game.new_initial_state() + traverse_game_tree(game, state, game_stats) + print(game_stats) + # for info_state_string in game_stats.info_state_dict: + # print(info_state_string) + # # print(game_stats.info_state_dict[info_state_string]) # legal actions + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/get_all_states.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/get_all_states.py new file mode 100644 index 0000000..c9614c5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/get_all_states.py @@ -0,0 +1,63 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python spiel example to get all the states in the game.""" + +from absl import app +from absl import flags + +# pylint: disable=unused-import +from open_spiel.python import games +from open_spiel.python.algorithms import get_all_states +from open_spiel.python.mfg import games as mfg_games +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_string("game", "tic_tac_toe", "Name of the game") +flags.DEFINE_integer("players", None, "Number of players") +flags.DEFINE_integer("depth_limit", -1, "Depth limit to stop at") +flags.DEFINE_bool("include_terminals", True, "Include terminal states?") +flags.DEFINE_bool("include_chance_states", True, "Include chance states?") + + +def main(_): + games_list = pyspiel.registered_games() + print("Registered games:") + for game in games_list: + print(" ", game.short_name) + print() + + print("Creating game:", FLAGS.game) + params = {} + if FLAGS.players is not None: + params["players"] = FLAGS.players + game = pyspiel.load_game(FLAGS.game, params) + + print("Getting all states; depth_limit = {}".format(FLAGS.depth_limit)) + all_states = get_all_states.get_all_states(game, FLAGS.depth_limit, + FLAGS.include_terminals, + FLAGS.include_chance_states) + + count = 0 + for state in all_states: + print(state) + count += 1 + + print() + print("Total: {} states.".format(count)) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/hearts_supervised_learning.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/hearts_supervised_learning.py new file mode 100644 index 0000000..ef1e1dc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/hearts_supervised_learning.py @@ -0,0 +1,237 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Train a policy net on Hearts actions based given a dataset of trajectories. + +Trajectories from the Hearts bot Xinxin can be generated using +open_spiel/bots/xinxin/xinxin_game_generator.cc. +""" + +import os +import pickle +from typing import Any, Tuple + +from absl import app +from absl import flags + +import haiku as hk +import jax +from jax import numpy as jnp +import numpy as np +import optax + +import pyspiel + +OptState = Any +Params = Any + +FLAGS = flags.FLAGS +GAME = pyspiel.load_game('hearts') +NUM_CARDS = 52 +NUM_ACTIONS = NUM_CARDS +NUM_PLAYERS = 4 +TOP_K_ACTIONS = 5 # How many alternative actions to display +DEFAULT_LAYER_SIZES = [1024, 1024, 1024, 1024] + +flags.DEFINE_integer('iterations', 100000, 'Number of iterations') +flags.DEFINE_string('data_path', None, 'Location for data') +flags.DEFINE_integer('eval_every', 10000, 'How often to evaluate the policy') +flags.DEFINE_integer('num_examples', 3, + 'How many examples to print per evaluation') +flags.DEFINE_integer('train_batch', 128, 'Batch size for training step') +flags.DEFINE_integer('eval_batch', 10000, 'Batch size when evaluating') +flags.DEFINE_float('step_size', 1e-4, 'Step size for training') +flags.DEFINE_list('hidden_layer_sizes', None, + 'Number of hidden units and layers in the network') +flags.DEFINE_integer('rng_seed', 42, 'Seed for initial network weights') +flags.DEFINE_string('save_path', None, 'Location for saved networks') +flags.DEFINE_string('checkpoint_file', None, + 'Provides weights and optimzer state to resume training') + + +def _trajectory(line: str): + """Returns parsed action trajectory.""" + actions = [int(x) for x in line.split(' ')] + return tuple(actions) + + +def make_dataset(file: str): + """Creates dataset as a generator of single examples.""" + lines = [line for line in open(file)] + while True: + np.random.shuffle(lines) + for line in lines: + trajectory = _trajectory(line) + # skip pass_dir and deal actions + action_index = np.random.randint(NUM_CARDS + 1, len(trajectory)) + state = GAME.new_initial_state() + for action in trajectory[:action_index]: + state.apply_action(action) + yield (state.information_state_tensor(), trajectory[action_index]) + + +def batch(dataset, batch_size: int): + """Creates a batched dataset from a one-at-a-time dataset.""" + observations = np.zeros([batch_size] + GAME.information_state_tensor_shape(), + np.float32) + labels = np.zeros(batch_size, dtype=np.int32) + while True: + for batch_index in range(batch_size): + observations[batch_index], labels[batch_index] = next(dataset) + yield observations, labels + + +def one_hot(x, k): + """Returns a one-hot encoding of `x` of size `k`.""" + return jnp.array(x[..., jnp.newaxis] == jnp.arange(k), dtype=np.float32) + + +def net_fn(x): + """Haiku module for our network.""" + layers = [] + for layer_size in FLAGS.hidden_layer_sizes: + layers.append(hk.Linear(int(layer_size))) + layers.append(jax.nn.relu) + layers.append(hk.Linear(NUM_ACTIONS)) + layers.append(jax.nn.log_softmax) + net = hk.Sequential(layers) + return net(x) + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + if FLAGS.hidden_layer_sizes is None: + # Cannot pass default arguments as lists due to style requirements, so we + # override it here if they are not set. + FLAGS.hidden_layer_sizes = DEFAULT_LAYER_SIZES + + # Make the network. + net = hk.without_apply_rng(hk.transform(net_fn)) + + # Make the optimiser. + opt = optax.adam(FLAGS.step_size) + + @jax.jit + def loss( + params: Params, + inputs: np.ndarray, + targets: np.ndarray, + ) -> jax.Array: + """Cross-entropy loss.""" + assert targets.dtype == np.int32 + log_probs = net.apply(params, inputs) + return -jnp.mean(one_hot(targets, NUM_ACTIONS) * log_probs) + + @jax.jit + def accuracy( + params: Params, + inputs: np.ndarray, + targets: np.ndarray, + ) -> jax.Array: + """Classification accuracy.""" + predictions = net.apply(params, inputs) + return jnp.mean(jnp.argmax(predictions, axis=-1) == targets) + + @jax.jit + def update( + params: Params, + opt_state: OptState, + inputs: np.ndarray, + targets: np.ndarray, + ) -> Tuple[Params, OptState]: + """Learning rule (stochastic gradient descent).""" + _, gradient = jax.value_and_grad(loss)(params, inputs, targets) + updates, opt_state = opt.update(gradient, opt_state) + new_params = optax.apply_updates(params, updates) + return new_params, opt_state + + def output_samples(params: Params, max_samples: int): + """Output some cases where the policy disagrees with the dataset action.""" + if max_samples == 0: + return + count = 0 + with open(os.path.join(FLAGS.data_path, 'test.txt')) as f: + lines = list(f) + np.random.shuffle(lines) + for line in lines: + state = GAME.new_initial_state() + actions = _trajectory(line) + for action in actions: + if not state.is_chance_node(): + observation = np.array(state.information_state_tensor(), np.float32) + policy = np.exp(net.apply(params, observation)) + probs_actions = [(p, a) for a, p in enumerate(policy)] + pred = max(probs_actions)[1] + if pred != action: + print(state) + for p, a in reversed(sorted(probs_actions)[-TOP_K_ACTIONS:]): + print('{:7} {:.2f}'.format(state.action_to_string(a), p)) + print('Ground truth {}\n'.format(state.action_to_string(action))) + count += 1 + break + state.apply_action(action) + if count >= max_samples: + return + + # Store what we need to rebuild the Haiku net. + if FLAGS.save_path: + filename = os.path.join(FLAGS.save_path, 'layers.txt') + with open(filename, 'w') as layer_def_file: + for s in FLAGS.hidden_layer_sizes: + layer_def_file.write(f'{s} ') + layer_def_file.write('\n') + + # Make datasets. + if FLAGS.data_path is None: + raise app.UsageError( + 'Please generate your own supervised training data and supply the local' + 'location as --data_path') + train = batch( + make_dataset(os.path.join(FLAGS.data_path, 'train.txt')), + FLAGS.train_batch) + test = batch( + make_dataset(os.path.join(FLAGS.data_path, 'test.txt')), FLAGS.eval_batch) + + # Initialize network and optimiser. + if FLAGS.checkpoint_file: + with open(FLAGS.checkpoint_file, 'rb') as pkl_file: + params, opt_state = pickle.load(pkl_file) + else: + rng = jax.random.PRNGKey(FLAGS.rng_seed) # seed used for network weights + inputs, unused_targets = next(train) + params = net.init(rng, inputs) + opt_state = opt.init(params) + + # Train/eval loop. + for step in range(FLAGS.iterations): + # Do SGD on a batch of training examples. + inputs, targets = next(train) + params, opt_state = update(params, opt_state, inputs, targets) + + # Periodically evaluate classification accuracy on the test set. + if (1 + step) % FLAGS.eval_every == 0: + inputs, targets = next(test) + test_accuracy = accuracy(params, inputs, targets) + print(f'After {1+step} steps, test accuracy: {test_accuracy}.') + if FLAGS.save_path: + filename = os.path.join(FLAGS.save_path, f'checkpoint-{1 + step}.pkl') + with open(filename, 'wb') as pkl_file: + pickle.dump((params, opt_state), pkl_file) + output_samples(params, FLAGS.num_examples) + + +if __name__ == '__main__': + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/independent_tabular_qlearning.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/independent_tabular_qlearning.py new file mode 100644 index 0000000..604b195 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/independent_tabular_qlearning.py @@ -0,0 +1,112 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tabular Q-Learner self-play example. + +Two Q-Learning agents are trained by playing against each other. +""" + +import sys +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python import rl_environment +from open_spiel.python import rl_tools +from open_spiel.python.algorithms import tabular_qlearner + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("num_train_episodes", int(1e6), + "Number of training episodes.") +flags.DEFINE_integer("num_eval_episodes", int(1e4), + "Number of episodes to use during each evaluation.") +flags.DEFINE_integer("eval_freq", int(1e4), + "The frequency (in episodes) to run evaluation.") +flags.DEFINE_string( + "epsilon_schedule", None, + "Epsilon schedule: e.g. 'linear,init,final,num_steps' or " + "'constant,0.2'") +flags.DEFINE_string("game", "tic_tac_toe", "Game to load.") + + +def eval_agents(env, agents, num_episodes): + """Evaluate the agents, returning a numpy array of average returns.""" + rewards = np.array([0] * env.num_players, dtype=np.float64) + for _ in range(num_episodes): + time_step = env.reset() + while not time_step.last(): + player_id = time_step.observations["current_player"] + agent_output = agents[player_id].step(time_step, is_evaluation=True) + time_step = env.step([agent_output.action]) + for i in range(env.num_players): + rewards[i] += time_step.rewards[i] + rewards /= num_episodes + return rewards + + +def create_epsilon_schedule(sched_str): + """Creates an epsilon schedule from the string as desribed in the flags.""" + values = FLAGS.epsilon_schedule.split(",") + if values[0] == "linear": + assert len(values) == 4 + return rl_tools.LinearSchedule( + float(values[1]), float(values[2]), int(values[3])) + elif values[0] == "constant": + assert len(values) == 2 + return rl_tools.ConstantSchedule(float(values[1])) + else: + print("Unrecognized schedule string: {}".format(sched_str)) + sys.exit() + + +def main(_): + env = rl_environment.Environment(FLAGS.game) + num_players = env.num_players + num_actions = env.action_spec()["num_actions"] + + agents = [] + if FLAGS.epsilon_schedule is not None: + for idx in range(num_players): + agents.append( + tabular_qlearner.QLearner( + player_id=idx, + num_actions=num_actions, + epsilon_schedule=create_epsilon_schedule(FLAGS.epsilon_schedule))) + else: + agents = [ + tabular_qlearner.QLearner(player_id=idx, num_actions=num_actions) + for idx in range(num_players) + ] + + # 1. Train the agents + training_episodes = FLAGS.num_train_episodes + for cur_episode in range(training_episodes): + if cur_episode % int(FLAGS.eval_freq) == 0: + avg_rewards = eval_agents(env, agents, FLAGS.num_eval_episodes) + print("Training episodes: {}, Avg rewards: {}".format( + cur_episode, avg_rewards)) + time_step = env.reset() + while not time_step.last(): + player_id = time_step.observations["current_player"] + agent_output = agents[player_id].step(time_step) + time_step = env.step([agent_output.action]) + + # Episode is over, step all agents with final info state. + for agent in agents: + agent.step(time_step) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/is_mcts_exploitability.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/is_mcts_exploitability.py new file mode 100644 index 0000000..54e0093 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/is_mcts_exploitability.py @@ -0,0 +1,92 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Exploitability of a policy from IS-MCTS search run at each info state.""" + +from absl import app +from absl import flags + +from open_spiel.python import policy +from open_spiel.python.algorithms import exploitability +import pyspiel + +FLAGS = flags.FLAGS +flags.DEFINE_string("game", "kuhn_poker", "Name of the game") + +SEED = 129846127 + + +def construct_is_mcts_policy(game, state, tabular_policy, bot, searched): + """Constructs a tabular policy from independent bot calls. + + Args: + game: an OpenSpiel game, + state: an OpenSpiel state to start the tree walk from, + tabular_policy: a policy.TabularPolicy for this game, + bot: the bot to get the policy from at each state + searched: a dictionary of information states already search (empty to begin) + """ + + if state.is_terminal(): + return + elif state.is_chance_node(): + outcomes = state.legal_actions() + for outcome in outcomes: + new_state = state.clone() + new_state.apply_action(outcome) + construct_is_mcts_policy(game, new_state, tabular_policy, bot, searched) + else: + infostate_key = state.information_state_string() + if infostate_key not in searched: + searched[infostate_key] = True + infostate_policy = bot.get_policy(state) + tabular_state_policy = tabular_policy.policy_for_key(infostate_key) + for action, prob in infostate_policy: + tabular_state_policy[action] = prob + for action in state.legal_actions(): + new_state = state.clone() + new_state.apply_action(action) + construct_is_mcts_policy(game, new_state, tabular_policy, bot, searched) + + +def main(_): + game = pyspiel.load_game(FLAGS.game) + evaluator = pyspiel.RandomRolloutEvaluator(1, SEED) + min_expl = game.max_utility() - game.min_utility() + + print("{:>5} {:>10} {:>50} {:>20}".format( + "max_sims", "uct_c", "final_policy_type", "exploitability")) + for max_simulations in [10, 100, 1000, 10000]: + for uct_c in [0.2, 0.5, 1.0, 2.0, 4.0]: # These values are for Kuhn. + for final_policy_type in [ + pyspiel.ISMCTSFinalPolicyType.NORMALIZED_VISIT_COUNT, + pyspiel.ISMCTSFinalPolicyType.MAX_VISIT_COUNT, + pyspiel.ISMCTSFinalPolicyType.MAX_VALUE + ]: + tabular_policy = policy.TabularPolicy(game) + bot = pyspiel.ISMCTSBot(SEED, evaluator, uct_c, max_simulations, -1, + final_policy_type, False, False) + searched = {} + construct_is_mcts_policy(game, game.new_initial_state(), tabular_policy, + bot, searched) + expl = exploitability.exploitability(game, tabular_policy) + print("{:>5} {:>10} {:>50} {:>20}".format(max_simulations, uct_c, + str(final_policy_type), expl)) + if expl < min_expl: + min_expl = expl + print("Min expl: {}".format(min_expl)) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/jpsro.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/jpsro.py new file mode 100644 index 0000000..1b0e868 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/jpsro.py @@ -0,0 +1,250 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Joint Policy-Space Response Oracles. + +An implementation of JSPRO, described in https://arxiv.org/abs/2106.09435. + +Bibtex / Cite: + +``` +@misc{marris2021multiagent, + title={Multi-Agent Training beyond Zero-Sum with Correlated Equilibrium + Meta-Solvers}, + author={Luke Marris and Paul Muller and Marc Lanctot and Karl Tuyls and + Thore Graepel}, + year={2021}, + eprint={2106.09435}, + archivePrefix={arXiv}, + primaryClass={cs.MA} +} +``` +""" + +from absl import app +from absl import flags + +from open_spiel.python.algorithms import jpsro +import pyspiel + + +GAMES = ( + "kuhn_poker_2p", + "kuhn_poker_3p", + "kuhn_poker_4p", + "leduc_poker_2p", + "leduc_poker_3p", + "leduc_poker_4p", + "trade_comm_2p_2i", + "trade_comm_2p_3i", + "trade_comm_2p_4i", + "trade_comm_2p_5i", + "tiny_bridge_2p", + "tiny_bridge_4p", + "sheriff_2p_1r", + "sheriff_2p_2r", + "sheriff_2p_3r", + "sheriff_2p_gabriele", + "goofspiel_2p_3c_total", + "goofspiel_2p_4c_total", + "goofspiel_2p_5c_total", + "goofspiel_2p_5c_total", + "goofspiel_2p_5c_dsc_total", + "goofspiel_2p_5c_dsc_pt_diff", +) + +FLAGS = flags.FLAGS + +# Game. +flags.DEFINE_string( + "game", "kuhn_poker_3p", + "Game and settings name.") + +# JPSRO - General. +flags.DEFINE_integer( + "iterations", 40, + "Number of JPSRO iterations.", + lower_bound=0) +flags.DEFINE_integer( + "seed", 1, + "Pseduo random number generator seed.") +flags.DEFINE_enum( + "policy_init", "uniform", jpsro.INIT_POLICIES, + "Initial policy sampling strategy.") +flags.DEFINE_enum( + "update_players_strategy", "all", jpsro.UPDATE_PLAYERS_STRATEGY, + "Which player's policies to update at each iteration.") + +# JPSRO - Best Response. +flags.DEFINE_enum( + "target_equilibrium", "cce", jpsro.BRS, + "The target equilibrium, either ce or cce.") +flags.DEFINE_enum( + "br_selection", "largest_gap", jpsro.BR_SELECTIONS, + "The best response operator. Primarily used with CE target equilibrium.") + +# JPSRO - Meta-Solver. +flags.DEFINE_enum( + "train_meta_solver", "mgcce", jpsro.META_SOLVERS, + "Meta-solver to use for training.") +flags.DEFINE_enum( + "eval_meta_solver", "mwcce", jpsro.META_SOLVERS, + "Meta-solver to use for evaluation.") +flags.DEFINE_bool( + "ignore_repeats", False, + "Whether to ignore policy repeats when calculating meta distribution. " + "This is relevant for some meta-solvers (such as Maximum Gini) that will " + "spread weight over repeats. This may or may not be a desireable property " + "depending on how one wishes to search the game space. A uniform " + "meta-solver requires this to be False.") +flags.DEFINE_float( + "action_value_tolerance", -1.0, + "If non-negative, use max-entropy best-responses with specified tolerance " + "on action-value. If negative, the best-response operator will return a " + "best-response policy that deterministically chooses the first action with " + "maximum action-value in each state.") + + +def get_game(game_name): + """Returns the game.""" + + if game_name == "kuhn_poker_2p": + game_name = "kuhn_poker" + game_kwargs = {"players": int(2)} + elif game_name == "kuhn_poker_3p": + game_name = "kuhn_poker" + game_kwargs = {"players": int(3)} + elif game_name == "kuhn_poker_4p": + game_name = "kuhn_poker" + game_kwargs = {"players": int(4)} + + elif game_name == "leduc_poker_2p": + game_name = "leduc_poker" + game_kwargs = {"players": int(2)} + elif game_name == "leduc_poker_3p": + game_name = "leduc_poker" + game_kwargs = {"players": int(3)} + elif game_name == "leduc_poker_4p": + game_name = "leduc_poker" + game_kwargs = {"players": int(4)} + + elif game_name == "trade_comm_2p_2i": + game_name = "trade_comm" + game_kwargs = {"num_items": int(2)} + elif game_name == "trade_comm_2p_3i": + game_name = "trade_comm" + game_kwargs = {"num_items": int(3)} + elif game_name == "trade_comm_2p_4i": + game_name = "trade_comm" + game_kwargs = {"num_items": int(4)} + elif game_name == "trade_comm_2p_5i": + game_name = "trade_comm" + game_kwargs = {"num_items": int(5)} + + elif game_name == "tiny_bridge_2p": + game_name = "tiny_bridge_2p" + game_kwargs = {} + elif game_name == "tiny_bridge_4p": + game_name = "tiny_bridge_4p" + game_kwargs = {} # Too big game. + + elif game_name == "sheriff_2p_1r": + game_name = "sheriff" + game_kwargs = {"num_rounds": int(1)} + elif game_name == "sheriff_2p_2r": + game_name = "sheriff" + game_kwargs = {"num_rounds": int(2)} + elif game_name == "sheriff_2p_3r": + game_name = "sheriff" + game_kwargs = {"num_rounds": int(3)} + elif game_name == "sheriff_2p_gabriele": + game_name = "sheriff" + game_kwargs = { + "item_penalty": float(1.0), + "item_value": float(5.0), + "max_bribe": int(2), + "max_items": int(10), + "num_rounds": int(2), + "sheriff_penalty": float(1.0), + } + + elif game_name == "goofspiel_2p_3c_total": + game_name = "goofspiel" + game_kwargs = { + "players": int(2), + "returns_type": "total_points", + "num_cards": int(3)} + elif game_name == "goofspiel_2p_4c_total": + game_name = "goofspiel" + game_kwargs = { + "players": int(2), + "returns_type": "total_points", + "num_cards": int(4)} + elif game_name == "goofspiel_2p_5c_total": + game_name = "goofspiel" + game_kwargs = { + "imp_info": True, + "egocentric": True, + "players": int(2), + "returns_type": "total_points", + "num_cards": int(5) + } + elif game_name == "goofspiel_2p_5c_dsc_total": + game_name = "goofspiel" + game_kwargs = { + "imp_info": True, + "egocentric": True, + "points_order": "descending", + "players": int(2), + "returns_type": "total_points", + "num_cards": int(5) + } + elif game_name == "goofspiel_2p_5c_dsc_pt_diff": + game_name = "goofspiel" + game_kwargs = { + "imp_info": True, + "egocentric": True, + "points_order": "descending", + "players": int(2), + "returns_type": "point_difference", + "num_cards": int(5) + } + + else: + raise ValueError("Unrecognised game: %s" % game_name) + + return pyspiel.load_game_as_turn_based(game_name, game_kwargs) + + +def main(argv): + if len(argv) > 1: + raise app.UsageError("Too many command-line arguments.") + game = get_game(FLAGS.game) + jpsro.run_loop( + game=game, + game_name=FLAGS.game, + seed=FLAGS.seed, + iterations=FLAGS.iterations, + policy_init=FLAGS.policy_init, + update_players_strategy=FLAGS.update_players_strategy, + target_equilibrium=FLAGS.target_equilibrium, + br_selection=FLAGS.br_selection, + train_meta_solver=FLAGS.train_meta_solver, + eval_meta_solver=FLAGS.eval_meta_solver, + action_value_tolerance=FLAGS.action_value_tolerance, + ignore_repeats=FLAGS.ignore_repeats) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/kuhn_poker_cfr.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/kuhn_poker_cfr.py new file mode 100644 index 0000000..456ae42 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/kuhn_poker_cfr.py @@ -0,0 +1,42 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example use of the CFR algorithm on Kuhn Poker.""" + +from absl import app + +from open_spiel.python.algorithms import cfr +from open_spiel.python.algorithms import expected_game_score +import pyspiel + + +def main(_): + game = pyspiel.load_game("kuhn_poker") + + cfr_solver = cfr.CFRSolver(game) + iterations = 1000 + + for i in range(iterations): + cfr_value = cfr_solver.evaluate_and_update_policy() + print("Game util at iteration {}: {}".format(i, cfr_value)) + + average_policy = cfr_solver.average_policy() + average_policy_values = expected_game_score.policy_value( + game.new_initial_state(), [average_policy] * 2) + print("Computed player 0 value: {}".format(average_policy_values[0])) + print("Expected player 0 value: {}".format(-1 / 18)) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/lewis_signaling_qlearner.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/lewis_signaling_qlearner.py new file mode 100644 index 0000000..9f54d95 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/lewis_signaling_qlearner.py @@ -0,0 +1,311 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tabular Q-Learning on Lewis Signaling Game.""" + +import copy +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python import rl_environment +from open_spiel.python import rl_tools +from open_spiel.python.algorithms import tabular_qlearner + +FLAGS = flags.FLAGS + +# Env parameters +flags.DEFINE_integer("num_states", 3, "Number of states and actions") +flags.DEFINE_integer("num_messages", 3, "Number of messages") +flags.DEFINE_string("payoffs", "1, 0, 0, 0, 1, 0, 0, 0, 1", + "Payoffs to use ('random' for random [0, 1) payoffs)") + +# Alg parameters +flags.DEFINE_bool("centralized", False, "Set to use centralized learning") +flags.DEFINE_integer("num_episodes", 2000, "Number of train episodes") +flags.DEFINE_float("step_size", 0.1, "Step size for updates") +flags.DEFINE_float("eps_init", 1.0, "Initial value of epsilon") +flags.DEFINE_float("eps_final", 0.0, "Final value of epsilon") +flags.DEFINE_integer("eps_decay_steps", 1900, + "Number of episodes to decay epsilon") + +# Misc paramters +flags.DEFINE_integer("num_runs", 100, "Number of repetitions") +flags.DEFINE_integer("log_interval", 10, + "Number of episodes between each logging") +flags.DEFINE_bool("plot", False, "Set to plot the graphs") +flags.DEFINE_bool("compare", False, + "Set to compare centralized vs decentralized") + + +def run_experiment(num_players, env, payoffs, centralized): + """Run the experiments.""" + num_states = FLAGS.num_states + num_messages = FLAGS.num_messages + num_actions = env.action_spec()["num_actions"] + + # Results to store + num_runs = FLAGS.num_runs + training_episodes = FLAGS.num_episodes + log_interval = FLAGS.log_interval + rewards = np.zeros((num_runs, training_episodes // log_interval)) + opts = np.zeros((num_runs, training_episodes // log_interval)) + converge_point = np.zeros((num_states, num_states)) + percent_opt = 0 + + # Repeat the experiment num_runs times + for i in range(num_runs): + eps_schedule = rl_tools.LinearSchedule( + FLAGS.eps_init, FLAGS.eps_final, FLAGS.eps_decay_steps * + 2) # *2 since there are 2 agent steps per episode + + agents = [ + # pylint: disable=g-complex-comprehension + tabular_qlearner.QLearner( + player_id=idx, + num_actions=num_actions, + step_size=FLAGS.step_size, + epsilon_schedule=eps_schedule, + centralized=centralized) for idx in range(num_players) + ] + + # 1. Train the agents + for cur_episode in range(training_episodes): + time_step = env.reset() + # Find cur_state for logging. See lewis_signaling.cc for info_state + # details. + cur_state = time_step.observations["info_state"][0][3:].index(1) + while not time_step.last(): + player_id = time_step.observations["current_player"] + agent_output = agents[player_id].step(time_step) + time_step = env.step([agent_output.action]) + + # Episode is over, step all agents with final info state. + for agent in agents: + agent.step(time_step) + + # Store rewards + reward = time_step.rewards[0] + max_reward = payoffs[cur_state].max() + cur_idx = (i, cur_episode // log_interval) + rewards[cur_idx] += reward / log_interval + opts[cur_idx] += np.isclose(reward, max_reward) / log_interval + + base_info_state0 = [1.0, 0.0, 0.0] + [0.0] * num_states + base_info_state1 = [0.0, 1.0, 0.0] + [0.0] * num_states + if centralized: + base_info_state0 = [base_info_state0, base_info_state0.copy()] + base_info_state1 = [base_info_state1, base_info_state1.copy()] + + for s in range(num_states): + info_state0 = copy.deepcopy(base_info_state0) + if centralized: + info_state0[0][3 + s] = 1.0 + else: + info_state0[3 + s] = 1.0 + # pylint: disable=protected-access + m, _ = agents[0]._epsilon_greedy( + str(info_state0), np.arange(num_messages), 0) + info_state1 = copy.deepcopy(base_info_state1) + if centralized: + info_state1[0][3 + s] = 1.0 + info_state1[1][3 + m] = 1.0 + else: + info_state1[3 + m] = 1.0 + a, _ = agents[1]._epsilon_greedy( + str(info_state1), np.arange(num_states), 0) + converge_point[s, a] += 1 + best_act = payoffs[s].argmax() + percent_opt += int(a == best_act) / num_runs / num_states + return rewards, opts, converge_point, percent_opt + + +def main(_): + game = "lewis_signaling" + num_players = 2 + + num_states = FLAGS.num_states + num_messages = FLAGS.num_messages + if FLAGS.payoffs == "random": + payoffs = np.random.random((num_states, num_states)) + payoffs_str = ",".join([str(x) for x in payoffs.flatten()]) + elif FLAGS.payoffs == "climbing": + # This is a particular payoff matrix that is hard for decentralized + # algorithms. Introduced in C. Claus and C. Boutilier, "The dynamics of + # reinforcement learning in cooperative multiagent systems", 1998, for + # simultaneous action games, but it is difficult even in the case of + # signaling games. + payoffs = np.array([[11, -30, 0], [-30, 7, 6], [0, 0, 5]]) / 30 + payoffs_str = ",".join([str(x) for x in payoffs.flatten()]) + else: + payoffs_str = FLAGS.payoffs + try: + payoffs_list = [float(x) for x in payoffs_str.split(",")] + payoffs = np.array(payoffs_list).reshape((num_states, num_states)) + except ValueError: + raise ValueError( + "There should be {} (states * actions) elements in payoff. Found {} elements" + .format(num_states * num_states, len(payoffs_list))) from None + + env_configs = { + "num_states": num_states, + "num_messages": num_messages, + "payoffs": payoffs_str + } + + env = rl_environment.Environment(game, **env_configs) + + if FLAGS.compare: + rewards_list = [] + opts_list = [] + converge_point_list = [] + percent_opt_list = [] + for centralized in [True, False]: + rewards, opts, converge_point, percent_opt = run_experiment( + num_players, env, payoffs, centralized) + rewards_list += [rewards] + opts_list += [opts] + converge_point_list += [converge_point] + percent_opt_list += [percent_opt] + else: + rewards, opts, converge_point, percent_opt = run_experiment( + num_players, env, payoffs, FLAGS.centralized) + rewards_list = [rewards] + opts_list = [opts] + converge_point_list = [converge_point] + percent_opt_list = [percent_opt] + + if FLAGS.plot: + # pylint: disable=g-import-not-at-top + import matplotlib as mpl + import matplotlib.pyplot as plt + from scipy import stats + + params = { + "font.size": 12, + "axes.labelsize": 12, + "xtick.labelsize": 11, + "ytick.labelsize": 11, + } + mpl.rcParams.update(params) + + def init_fig(): + fig, ax = plt.subplots(1, 1) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + return fig, ax + + def plot_scalars(scalars, + repetition_axis=0, + scalar_labels=None, + title=None, + ax_labels=None): + """Plots scalar on ax by filling 1 standard error. + + Args: + scalars: List of scalars to plot (mean taken over repetition + axis) + repetition_axis: Axis to take the mean over + scalar_labels: Labels for the scalars (for legend) + title: Figure title + ax_labels: Labels for x and y axis (list of 2 strings) + """ + if not all([len(s.shape) == 2 for s in scalars]): + raise ValueError("Only 2D arrays supported for plotting") + + if scalar_labels is None: + scalar_labels = [None] * len(scalars) + + if len(scalars) != len(scalar_labels): + raise ValueError( + "Wrong number of scalar labels, expected {} but received {}".format( + len(scalars), len(scalar_labels))) + + _, plot_axis = init_fig() + for i, scalar in enumerate(scalars): + xs = np.arange(scalar.shape[1 - repetition_axis]) * FLAGS.log_interval + mean = scalar.mean(axis=repetition_axis) + sem = stats.sem(scalar, axis=repetition_axis) + plot_axis.plot(xs, mean, label=scalar_labels[i]) + plot_axis.fill_between(xs, mean - sem, mean + sem, alpha=0.5) + + if title is not None: + plot_axis.set_title(title) + if ax_labels is not None: + plot_axis.set_xlabel(ax_labels[0]) + plot_axis.set_ylabel(ax_labels[1]) + + def plot_confusion_matrix(cm, cmap=plt.cm.Blues, title=None): + """Plots the confusion matrix. + + Args: + cm (np.ndarray): Confusion matrix to plot + cmap: Color map to be used in matplotlib's imshow + title: Figure title + + Returns: + Figure and axis on which the confusion matrix is plotted + """ + fig, ax = plt.subplots() + ax.imshow(cm, interpolation="nearest", cmap=cmap) + ax.set_xticks([]) + ax.set_yticks([]) + ax.set_xlabel("Receiver's action", fontsize=14) + ax.set_ylabel("Sender's state", fontsize=14) + # Loop over data dimensions and create text annotations. + fmt = "d" + thresh = cm.max() / 2. + for i in range(cm.shape[0]): + for j in range(cm.shape[1]): + ax.text( + j, + i, + format(cm[i, j], fmt), + ha="center", + va="center", + color="white" if cm[i, j] > thresh else "black") + fig.tight_layout() + if title is not None: + ax.set_title(title) + return fig, ax + + if FLAGS.compare: + labels = ["Centralized", "Decentralized"] + else: + labels = ["Centralized"] if FLAGS.centralized else ["Decentralized"] + plot_scalars( + rewards_list, + scalar_labels=labels, + title="Reward graph (Tabular Q-Learning)", + ax_labels=["Episodes", "Reward per episode"]) + plt.legend() + plot_scalars( + opts_list, + scalar_labels=labels, + title="Percentage of optimal actions (Tabular Q-Learning)", + ax_labels=["Episodes", "% optimal actions"]) + plt.legend() + + for i, cp in enumerate(converge_point_list): + plot_confusion_matrix( + cp.astype(int), + title="Final policy (Tabular {})".format(labels[i])) + + plt.show() + + return percent_opt_list + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/lp_solve_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/lp_solve_example.py new file mode 100644 index 0000000..244f605 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/lp_solve_example.py @@ -0,0 +1,40 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Solving matrix games with LP solver.""" + +from absl import app +from open_spiel.python.algorithms import lp_solver +import pyspiel + + +def main(_): + # lp_solver.solve_zero_sum_matrix_game(pyspiel.load_matrix_game("matrix_mp")) + # lp_solver.solve_zero_sum_matrix_game(pyspiel.load_matrix_game("matrix_rps")) + p0_sol, p1_sol, p0_sol_val, p1_sol_val = lp_solver.solve_zero_sum_matrix_game( + pyspiel.create_matrix_game( + [[0.0, -0.25, 0.5], [0.25, 0.0, -0.05], [-0.5, 0.05, 0.0]], + [[0.0, 0.25, -0.5], [-0.25, 0.0, 0.05], [0.5, -0.05, 0.0]])) + print("p0 val = {}, policy = {}".format(p0_sol_val, p0_sol)) + print("p1 val = {}, policy = {}".format(p1_sol_val, p1_sol)) + + payoff_matrix = [[1., 1., 1.], [2., 0., 1.], [0., 2., 2.]] + mixture = lp_solver.is_dominated( + 0, payoff_matrix, 0, lp_solver.DOMINANCE_WEAK, return_mixture=True) + print("mixture strategy : {}".format(mixture)) + print("payoff vector : {}".format(mixture.dot(payoff_matrix))) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/marl_nashq_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/marl_nashq_example.py new file mode 100644 index 0000000..58f2af4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/marl_nashq_example.py @@ -0,0 +1,69 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Python example of multiagent Nash Q-learners.""" + +import enum +import logging +from absl import app + +from open_spiel.python import rl_environment +from open_spiel.python.algorithms.tabular_multiagent_qlearner import MAQLearner +from open_spiel.python.algorithms.tabular_multiagent_qlearner import TwoPlayerNashSolver +from open_spiel.python.algorithms.tabular_qlearner import QLearner + + +class Action(enum.IntEnum): + STAY = 0 + LEFT = 1 + UP = 2 + RIGHT = 3 + DOWN = 4 + + +def print_iteration(actions, state): + """Print actions and state.""" + logging.info("Action taken by agent 0: %s", Action(actions[0]).name) + logging.info("Action taken by agent 1: %s", Action(actions[1]).name) + logging.info("Board state:\n %s", state) + logging.info("-" * 80) + + +def marl_path_finding_example(_): + """Example usage of multiagent Nash Q-learner. + + Based on https://www.jmlr.org/papers/volume4/hu03a/hu03a.pdf + """ + + logging.info("Creating the Grid Game") + env = rl_environment.Environment( + "pathfinding", grid="B.A\n...\na.b", players=2, step_reward=-1.) + + qlearner = QLearner(0, env.game.num_distinct_actions()) + nashqlearner = MAQLearner(1, 2, [env.game.num_distinct_actions()] * 2, + TwoPlayerNashSolver()) + + time_step = env.reset() + actions = [None, None] + + while not time_step.last(): + actions = [ + qlearner.step(time_step).action, + nashqlearner.step(time_step, actions).action + ] + time_step = env.step(actions) + print_iteration(actions, env.get_state) + + +if __name__ == "__main__": + app.run(marl_path_finding_example) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/matrix_game_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/matrix_game_example.py new file mode 100644 index 0000000..b780cfe --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/matrix_game_example.py @@ -0,0 +1,126 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python spiel example.""" + +import random + +from absl import app +import numpy as np + +import pyspiel +from open_spiel.python.utils import file_utils + + +def _manually_create_game(): + """Creates the game manually from the spiel building blocks.""" + game_type = pyspiel.GameType( + "matching_pennies", + "Matching Pennies", + pyspiel.GameType.Dynamics.SIMULTANEOUS, + pyspiel.GameType.ChanceMode.DETERMINISTIC, + pyspiel.GameType.Information.ONE_SHOT, + pyspiel.GameType.Utility.ZERO_SUM, + pyspiel.GameType.RewardModel.TERMINAL, + 2, # max num players + 2, # min_num_players + True, # provides_information_state + True, # provides_information_state_tensor + False, # provides_observation + False, # provides_observation_tensor + dict() # parameter_specification + ) + game = pyspiel.MatrixGame( + game_type, + {}, # game_parameters + ["Heads", "Tails"], # row_action_names + ["Heads", "Tails"], # col_action_names + [[-1, 1], [1, -1]], # row player utilities + [[1, -1], [-1, 1]] # col player utilities + ) + return game + + +def _easy_create_game(): + """Uses the helper function to create the same game as above.""" + return pyspiel.create_matrix_game("matching_pennies", "Matching Pennies", + ["Heads", "Tails"], ["Heads", "Tails"], + [[-1, 1], [1, -1]], [[1, -1], [-1, 1]]) + + +def _even_easier_create_game(): + """Leave out the names too, if you prefer.""" + return pyspiel.create_matrix_game([[-1, 1], [1, -1]], [[1, -1], [-1, 1]]) + + +def _import_data_create_game(): + """Creates a game via imported payoff data.""" + payoff_file = file_utils.find_file( + "open_spiel/data/paper_data/response_graph_ucb/soccer.txt", 2) + payoffs = np.loadtxt(payoff_file)*2-1 + return pyspiel.create_matrix_game(payoffs, payoffs.T) + + +def main(_): + games_list = pyspiel.registered_games() + print("Registered games:") + print(games_list) + + # Load a two-player normal-form game as a two-player matrix game. + blotto_matrix_game = pyspiel.load_matrix_game("blotto") + print("Number of rows in 2-player Blotto with default settings is {}".format( + blotto_matrix_game.num_rows())) + + # Several ways to load/create the same game of matching pennies. + print("Creating matrix game...") + game = pyspiel.load_matrix_game("matrix_mp") + game = _manually_create_game() + game = _import_data_create_game() + game = _easy_create_game() + game = _even_easier_create_game() + + # Quick test: inspect top-left utility values: + print("Values for joint action ({},{}) is {},{}".format( + game.row_action_name(0), game.col_action_name(0), + game.player_utility(0, 0, 0), game.player_utility(1, 0, 0))) + + state = game.new_initial_state() + + # Print the initial state + print("State:") + print(str(state)) + + assert state.is_simultaneous_node() + + # Simultaneous node: sample actions for all players. + chosen_actions = [ + random.choice(state.legal_actions(pid)) + for pid in range(game.num_players()) + ] + print("Chosen actions: ", [ + state.action_to_string(pid, action) + for pid, action in enumerate(chosen_actions) + ]) + state.apply_actions(chosen_actions) + + assert state.is_terminal() + + # Game is now done. Print utilities for each player + returns = state.returns() + for pid in range(game.num_players()): + print("Utility for player {} is {}".format(pid, returns[pid])) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/matrix_nash_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/matrix_nash_example.py new file mode 100644 index 0000000..535c3d1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/matrix_nash_example.py @@ -0,0 +1,183 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Find Nash equilibria for constant- or general-sum 2-player games. + +Non-matrix games are handled by computing the normal (bimatrix) form. + +The algorithms used are: +* direct computation of pure equilibria. +* linear programming to find equilibria for constant-sum games. +* iterated dominance to reduce the action space. +* reverse search vertex enumeration (if using lrsnash) to find all general-sum + equilibria. +* support enumeration (if using nashpy) to find all general-sum equilibria. +* Lemke-Howson enumeration (if using nashpy) to find one general-sum + equilibrium. + +The general-sum mixed-equilibrium algorithms are likely to work well for tens of +actions, but less likely to scale beyond that. + + +Example usage: +``` +matrix_nash_example --game kuhn_poker +``` +""" + + +import itertools + +from absl import app +from absl import flags +import nashpy +import numpy as np + +from open_spiel.python.algorithms import lp_solver +from open_spiel.python.algorithms import matrix_nash +from open_spiel.python.egt import utils +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_string("game", "first_sealed_auction(max_value=6)", + "Game (short name plus optional parameters).") +flags.DEFINE_float("tol", 1e-7, "Tolerance for determining dominance.") +flags.DEFINE_enum( + "mode", "all", ["all", "pure", "one"], "Whether to find all extreme " + "equilibria, all pure equilibria, or just one equilibrium.") +flags.DEFINE_enum( + "solver", "nashpy", ["nashpy", "lrsnash", "linear"], + "Solver to use for finding mixed equilibria. (lrsnash needs to" + " be installed separately to work.)") +flags.DEFINE_string("lrsnash_path", None, + "Full path to lrsnash solver (searches PATH by default).") +flags.DEFINE_integer( + "lrsnash_max_denom", 1000, "Maximum denominator to use " + "when converting payoffs to rationals for lrsnash solver.") + + +def main(_): + game = pyspiel.load_game(FLAGS.game) + print("loaded game") + + # convert game to matrix form if it isn't already a matrix game + if not isinstance(game, pyspiel.MatrixGame): + game = pyspiel.extensive_to_matrix_game(game) + num_rows, num_cols = game.num_rows(), game.num_cols() + print("converted to matrix form with shape (%d, %d)" % (num_rows, num_cols)) + + # use iterated dominance to reduce the space unless the solver is LP (fast) + if FLAGS.solver != "linear": + if FLAGS.mode == "all": + game, _ = lp_solver.iterated_dominance( + game, tol=FLAGS.tol, mode=lp_solver.DOMINANCE_STRICT) + num_rows, num_cols = game.num_rows(), game.num_cols() + print("discarded strictly dominated actions yielding shape (%d, %d)" % + (num_rows, num_cols)) + if FLAGS.mode == "one": + game, _ = lp_solver.iterated_dominance( + game, tol=FLAGS.tol, mode=lp_solver.DOMINANCE_VERY_WEAK) + num_rows, num_cols = game.num_rows(), game.num_cols() + print("discarded very weakly dominated actions yielding shape (%d, %d)" % + (num_rows, num_cols)) + + # game is now finalized + num_rows, num_cols = game.num_rows(), game.num_cols() + row_actions = [game.row_action_name(row) for row in range(num_rows)] + col_actions = [game.col_action_name(col) for col in range(num_cols)] + row_payoffs, col_payoffs = utils.game_payoffs_array(game) + pure_nash = list( + zip(*((row_payoffs >= row_payoffs.max(0, keepdims=True) - FLAGS.tol) + & (col_payoffs >= col_payoffs.max(1, keepdims=True) - FLAGS.tol) + ).nonzero())) + if pure_nash: + print("found %d pure equilibria" % len(pure_nash)) + if FLAGS.mode == "pure": + if not pure_nash: + print("found no pure equilibria") + return + print("pure equilibria:") + for row, col in pure_nash: + print("payoffs %f, %f:" % (row_payoffs[row, col], col_payoffs[row, col])) + print("row action:") + print(row_actions[row]) + print("col action:") + print(col_actions[col]) + print("") + return + if FLAGS.mode == "one" and pure_nash: + print("pure equilibrium:") + row, col = pure_nash[0] + print("payoffs %f, %f:" % (row_payoffs[row, col], col_payoffs[row, col])) + print("row action:") + print(row_actions[row]) + print("col action:") + print(col_actions[col]) + print("") + return + for row, action in enumerate(row_actions): + print("row action %s:" % row) + print(action) + print("--") + for col, action in enumerate(col_actions): + print("col action %s:" % col) + print(action) + print("--") + if num_rows == 1 or num_cols == 1: + equilibria = itertools.product(np.eye(num_rows), np.eye(num_cols)) + elif FLAGS.solver == "linear": + if FLAGS.mode != "one" or (row_payoffs + col_payoffs).max() > ( + row_payoffs + col_payoffs).min() + FLAGS.tol: + raise ValueError("can't use linear solver for non-constant-sum game or " + "for finding all optima!") + print("using linear solver") + + def gen(): + p0_sol, p1_sol, _, _ = lp_solver.solve_zero_sum_matrix_game( + pyspiel.create_matrix_game(row_payoffs - col_payoffs, + col_payoffs - row_payoffs)) + yield (np.squeeze(p0_sol, 1), np.squeeze(p1_sol, 1)) + + equilibria = gen() + elif FLAGS.solver == "lrsnash": + print("using lrsnash solver") + equilibria = matrix_nash.lrs_solve(row_payoffs, col_payoffs, + FLAGS.lrsnash_max_denom, + FLAGS.lrsnash_path) + elif FLAGS.solver == "nashpy": + if FLAGS.mode == "all": + print("using nashpy vertex enumeration") + equilibria = nashpy.Game(row_payoffs, col_payoffs).vertex_enumeration() + else: + print("using nashpy Lemke-Howson solver") + equilibria = matrix_nash.lemke_howson_solve(row_payoffs, col_payoffs) + print("equilibria:" if FLAGS.mode == "all" else "an equilibrium:") + equilibria = iter(equilibria) + # check that there's at least one equilibrium + try: + equilibria = itertools.chain([next(equilibria)], equilibria) + except StopIteration: + print("not found!") + for row_mixture, col_mixture in equilibria: + print("payoffs %f, %f for %s, %s" % + (row_mixture.dot(row_payoffs.dot(col_mixture)), + row_mixture.dot( + col_payoffs.dot(col_mixture)), row_mixture, col_mixture)) + if FLAGS.mode == "one": + return + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/mccfr_cpp_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/mccfr_cpp_example.py new file mode 100644 index 0000000..4dae3c2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/mccfr_cpp_example.py @@ -0,0 +1,92 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example use of the C++ MCCFR algorithms on Kuhn Poker. + +This examples calls the underlying C++ implementations via the Python bindings. +Note that there are some pure Python implementations of some of these algorithms +in python/algorithms as well. +""" + +import pickle +from absl import app +from absl import flags + +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_enum( + "sampling", + "external", + ["external", "outcome"], + "Sampling for the MCCFR solver", +) +flags.DEFINE_integer("iterations", 50, "Number of iterations") +flags.DEFINE_string("game", "kuhn_poker", "Name of the game") +flags.DEFINE_integer("players", 2, "Number of players") + +MODEL_FILE_NAME = "{}_sampling_mccfr_solver.pickle" + + +def run_iterations(game, solver, start_iteration=0): + """Run iterations of MCCFR.""" + for i in range(int(FLAGS.iterations / 2)): + solver.run_iteration() + policy = solver.average_policy() + exploitability = pyspiel.exploitability(game, policy) + + # We also compute NashConv to highlight an important API feature: + # when using Monte Carlo sampling, the policy + # may not have a table entry for every info state. + # Therefore, when calling nash_conv, ensure the third argument, + # "use_state_get_policy" is set to True + # See https://github.com/deepmind/open_spiel/issues/500 + nash_conv = pyspiel.nash_conv(game, policy, True) + + print("Iteration {} nashconv: {:.6f} exploitability: {:.6f}".format( + start_iteration + i, nash_conv, exploitability)) + + +def main(_): + game = pyspiel.load_game( + FLAGS.game, + {"players": FLAGS.players}, + ) + + if FLAGS.sampling == "external": + solver = pyspiel.ExternalSamplingMCCFRSolver( + game, + avg_type=pyspiel.MCCFRAverageType.FULL, + ) + elif FLAGS.sampling == "outcome": + solver = pyspiel.OutcomeSamplingMCCFRSolver(game) + + run_iterations(game, solver) + + print("Persisting the model...") + with open(MODEL_FILE_NAME.format(FLAGS.sampling), "wb") as file: + pickle.dump(solver, file, pickle.HIGHEST_PROTOCOL) + + print("Loading the model...") + with open(MODEL_FILE_NAME.format(FLAGS.sampling), "rb") as file: + loaded_solver = pickle.load(file) + print("Exploitability of the loaded model: {:.6f}".format( + pyspiel.exploitability(game, loaded_solver.average_policy()))) + + run_iterations(game, solver, start_iteration=int(FLAGS.iterations / 2)) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/mccfr_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/mccfr_example.py new file mode 100644 index 0000000..43802dd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/mccfr_example.py @@ -0,0 +1,55 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example use of the MCCFR algorithm on Kuhn Poker.""" + +from absl import app +from absl import flags + +from open_spiel.python.algorithms import exploitability +from open_spiel.python.algorithms import external_sampling_mccfr as external_mccfr +from open_spiel.python.algorithms import outcome_sampling_mccfr as outcome_mccfr +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_enum( + "sampling", + "outcome", + ["external", "outcome"], + "Sampling for the MCCFR solver", +) +flags.DEFINE_integer("iterations", 10000, "Number of iterations") +flags.DEFINE_string("game", "kuhn_poker", "Name of the game") +flags.DEFINE_integer("players", 2, "Number of players") +flags.DEFINE_integer("print_freq", 1000, + "How often to print the exploitability") + + +def main(_): + game = pyspiel.load_game(FLAGS.game, {"players": FLAGS.players}) + if FLAGS.sampling == "external": + cfr_solver = external_mccfr.ExternalSamplingSolver( + game, external_mccfr.AverageType.SIMPLE) + else: + cfr_solver = outcome_mccfr.OutcomeSamplingSolver(game) + for i in range(FLAGS.iterations): + cfr_solver.iteration() + if i % FLAGS.print_freq == 0: + conv = exploitability.nash_conv(game, cfr_solver.average_policy()) + print("Iteration {} exploitability {}".format(i, conv)) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/mcts.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/mcts.py new file mode 100644 index 0000000..77d25ab --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/mcts.py @@ -0,0 +1,198 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""MCTS example.""" + +import collections +import random +import sys + +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python.algorithms import mcts +from open_spiel.python.bots import gtp +from open_spiel.python.bots import human +from open_spiel.python.bots import uniform_random +import pyspiel + +_KNOWN_PLAYERS = [ + # A generic Monte Carlo Tree Search agent. + "mcts", + + # A generic random agent. + "random", + + # You'll be asked to provide the moves. + "human", + + # Run an external program that speaks the Go Text Protocol. + # Requires the gtp_path flag. + "gtp", +] + +flags.DEFINE_string("game", "tic_tac_toe", "Name of the game.") +flags.DEFINE_enum("player1", "mcts", _KNOWN_PLAYERS, "Who controls player 1.") +flags.DEFINE_enum("player2", "random", _KNOWN_PLAYERS, "Who controls player 2.") +flags.DEFINE_string("gtp_path", None, "Where to find a binary for gtp.") +flags.DEFINE_multi_string("gtp_cmd", [], "GTP commands to run at init.") +flags.DEFINE_integer("uct_c", 2, "UCT's exploration constant.") +flags.DEFINE_integer("rollout_count", 1, "How many rollouts to do.") +flags.DEFINE_integer("max_simulations", 1000, "How many simulations to run.") +flags.DEFINE_integer("num_games", 1, "How many games to play.") +flags.DEFINE_integer("seed", None, "Seed for the random number generator.") +flags.DEFINE_bool("random_first", False, "Play the first move randomly.") +flags.DEFINE_bool("solve", True, "Whether to use MCTS-Solver.") +flags.DEFINE_bool("quiet", False, "Don't show the moves as they're played.") +flags.DEFINE_bool("verbose", False, "Show the MCTS stats of possible moves.") + +FLAGS = flags.FLAGS + + +def _opt_print(*args, **kwargs): + if not FLAGS.quiet: + print(*args, **kwargs) + + +def _init_bot(bot_type, game, player_id): + """Initializes a bot by type.""" + rng = np.random.RandomState(FLAGS.seed) + if bot_type == "mcts": + evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count, rng) + return mcts.MCTSBot( + game, + FLAGS.uct_c, + FLAGS.max_simulations, + evaluator, + random_state=rng, + solve=FLAGS.solve, + verbose=FLAGS.verbose) + if bot_type == "random": + return uniform_random.UniformRandomBot(player_id, rng) + if bot_type == "human": + return human.HumanBot() + if bot_type == "gtp": + bot = gtp.GTPBot(game, FLAGS.gtp_path) + for cmd in FLAGS.gtp_cmd: + bot.gtp_cmd(cmd) + return bot + raise ValueError("Invalid bot type: %s" % bot_type) + + +def _get_action(state, action_str): + for action in state.legal_actions(): + if action_str == state.action_to_string(state.current_player(), action): + return action + return None + + +def _play_game(game, bots, initial_actions): + """Plays one game.""" + state = game.new_initial_state() + _opt_print("Initial state:\n{}".format(state)) + + history = [] + + if FLAGS.random_first: + assert not initial_actions + initial_actions = [state.action_to_string( + state.current_player(), random.choice(state.legal_actions()))] + + for action_str in initial_actions: + action = _get_action(state, action_str) + if action is None: + sys.exit("Invalid action: {}".format(action_str)) + + history.append(action_str) + for bot in bots: + bot.inform_action(state, state.current_player(), action) + state.apply_action(action) + _opt_print("Forced action", action_str) + _opt_print("Next state:\n{}".format(state)) + + while not state.is_terminal(): + current_player = state.current_player() + # The state can be three different types: chance node, + # simultaneous node, or decision node + if state.is_chance_node(): + # Chance node: sample an outcome + outcomes = state.chance_outcomes() + num_actions = len(outcomes) + _opt_print("Chance node, got " + str(num_actions) + " outcomes") + action_list, prob_list = zip(*outcomes) + action = np.random.choice(action_list, p=prob_list) + action_str = state.action_to_string(current_player, action) + _opt_print("Sampled action: ", action_str) + elif state.is_simultaneous_node(): + raise ValueError("Game cannot have simultaneous nodes.") + else: + # Decision node: sample action for the single current player + bot = bots[current_player] + action = bot.step(state) + action_str = state.action_to_string(current_player, action) + _opt_print("Player {} sampled action: {}".format(current_player, + action_str)) + + for i, bot in enumerate(bots): + if i != current_player: + bot.inform_action(state, current_player, action) + history.append(action_str) + state.apply_action(action) + + _opt_print("Next state:\n{}".format(state)) + + # Game is now done. Print return for each player + returns = state.returns() + print("Returns:", " ".join(map(str, returns)), ", Game actions:", + " ".join(history)) + + for bot in bots: + bot.restart() + + return returns, history + + +def main(argv): + game = pyspiel.load_game(FLAGS.game) + if game.num_players() > 2: + sys.exit("This game requires more players than the example can handle.") + bots = [ + _init_bot(FLAGS.player1, game, 0), + _init_bot(FLAGS.player2, game, 1), + ] + histories = collections.defaultdict(int) + overall_returns = [0, 0] + overall_wins = [0, 0] + game_num = 0 + try: + for game_num in range(FLAGS.num_games): + returns, history = _play_game(game, bots, argv[1:]) + histories[" ".join(history)] += 1 + for i, v in enumerate(returns): + overall_returns[i] += v + if v > 0: + overall_wins[i] += 1 + except (KeyboardInterrupt, EOFError): + game_num -= 1 + print("Caught a KeyboardInterrupt, stopping early.") + print("Number of games played:", game_num + 1) + print("Number of distinct games played:", len(histories)) + print("Players:", FLAGS.player1, FLAGS.player2) + print("Overall wins", overall_wins) + print("Overall returns", overall_returns) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/evaluation.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/evaluation.py new file mode 100644 index 0000000..2b03005 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/evaluation.py @@ -0,0 +1,112 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Evaluation.""" + +from absl import flags +import jax +import jax.numpy as jnp +import numpy as np + +FLAGS = flags.FLAGS + + +@jax.jit +def compute_best_response_strategy(utility): + actions_count = utility.shape[-1] + opponent_action = jnp.argmin(utility, axis=-1) + opponent_strategy = jax.nn.one_hot(opponent_action, actions_count) + return opponent_strategy + + +@jax.jit +def compute_values_against_best_response(strategy, payoff): + utility = jnp.matmul(strategy, payoff) + br_strategy = compute_best_response_strategy(utility) + return jnp.matmul(payoff, jnp.transpose(br_strategy)) + + +def evaluate_against_best_response(agent, payoff_batch, steps_count): + """Evaluation against best response agent. + + Args: + agent: Agent model. + payoff_batch: Payoff matrix. + steps_count: Number of steps. + """ + current_policy = agent.initial_policy() + values = jax.vmap(compute_values_against_best_response)(current_policy, + payoff_batch) + for step in range(steps_count): + current_policy = agent.next_policy(values) + values = jax.vmap(compute_values_against_best_response)(current_policy, + payoff_batch) + values = jnp.transpose(values, [0, 1, 2]) + value = jnp.matmul(current_policy, values) + + for i in range(value.shape[0]): + print(step, np.mean(np.asarray(value[i]))) + + +def compute_regrets(payoff_batch, strategy_x, strategy_y): + values_y = -jnp.matmul(strategy_x, payoff_batch) + values_x = jnp.transpose( + jnp.matmul(payoff_batch, jnp.transpose(strategy_y, [0, 2, 1])), [0, 2, 1]) + value_x = jnp.matmul( + jnp.matmul(strategy_x, payoff_batch), + jnp.transpose(strategy_y, [0, 2, 1])) + value_y = -value_x + regrets_x = values_x - value_x + regrets_y = values_y - value_y + return regrets_x, regrets_y + + +def evaluate_in_selfplay(agent_x, agent_y, payoff_batch, steps_count): + """Evalute in selfplay. + + Args: + agent_x: First agent. + agent_y: Second agent. + payoff_batch: Payoff matrix. + steps_count: Number of steps. + """ + payoff_batch_size = payoff_batch.shape[0] + + regret_sum_x = np.zeros(shape=[payoff_batch_size, 1, FLAGS.num_actions]) + regret_sum_y = np.zeros(shape=[payoff_batch_size, 1, FLAGS.num_actions]) + strategy_x = agent_x.initial_policy() + strategy_y = agent_y.initial_policy() + + regrets_x, regrets_y = compute_regrets(payoff_batch, strategy_x, strategy_y) + regret_sum_x += regrets_x + regret_sum_y += regrets_y + for s in range(steps_count): + values_y = -jnp.matmul(strategy_x, payoff_batch) + values_x = jnp.transpose( + jnp.matmul(payoff_batch, jnp.transpose(strategy_y, [0, 2, 1])), + [0, 2, 1]) + + values_x = jnp.transpose(values_x, [0, 2, 1]) + values_y = jnp.transpose(values_y, [0, 2, 1]) + strategy_x = agent_x.next_policy(values_x) + strategy_y = agent_y.next_policy(values_y) + + regrets_x, regrets_y = compute_regrets(payoff_batch, strategy_x, strategy_y) + regret_sum_x += regrets_x + regret_sum_y += regrets_y + print( + jnp.mean( + jnp.max( + jnp.concatenate([regret_sum_x, regret_sum_y], axis=2), + axis=[1, 2]) / (s + 1))) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/main.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/main.py new file mode 100644 index 0000000..5831ce0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/main.py @@ -0,0 +1,95 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Main file to train and evaluate meta-regret and regret matching agents.""" + +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python.examples.meta_cfr.matrix_games import evaluation +from open_spiel.python.examples.meta_cfr.matrix_games import matrix_dataset +from open_spiel.python.examples.meta_cfr.matrix_games import meta_selfplay_agent +from open_spiel.python.examples.meta_cfr.matrix_games import regret_matching_agent + + +FLAGS = flags.FLAGS +flags.DEFINE_integer("batch_size", 1, "Batch size.") +flags.DEFINE_integer("evaluation_steps", 1000, "Number of evaluation steps.") +flags.DEFINE_integer("num_batches", 1, + "Number of batches to train a meta optimizer.") +flags.DEFINE_integer("repeats", 10, + "Number of training each batch in meta learning.") +flags.DEFINE_integer("seed", 10, "random seed.") +flags.DEFINE_integer("min_val", 0, + "minimum value for randomizing a payoff matrix.") +flags.DEFINE_integer("max_val", 10, + "maximum value for randomizing a payoff matrix.") +flags.DEFINE_integer("num_actions", 3, "Number of actions an agent can take.") +flags.DEFINE_bool("single_problem", False, + "If the matrix dataset generates only a single matrix.") + + +def selfplay_main(argv): + """Self play.""" + del argv + np.random.seed(FLAGS.seed) + # rock-paper-scissor + base_matrix = np.array([[[0, -1, 1], [1, 0, -1], [-1, 1, 0]]] * + FLAGS.batch_size) + dataset = matrix_dataset.Dataset( + base_matrix=base_matrix, + num_training_batches=FLAGS.num_batches, + minval=FLAGS.min_val, + maxval=FLAGS.max_val) + data_loader = dataset.get_training_batch() + eval_payoff_batch = dataset.get_eval_batch() + + mr_agent = meta_selfplay_agent.MetaSelfplayAgent( + repeats=FLAGS.repeats, + training_epochs=FLAGS.evaluation_steps, + data_loader=data_loader) + mr_agent.train() + + mr_agent2 = meta_selfplay_agent.MetaSelfplayAgent( + repeats=FLAGS.repeats, + training_epochs=FLAGS.evaluation_steps, + data_loader=data_loader) + mr_agent2.train() + + rm_agent = regret_matching_agent.RegretMatchingAgent( + num_actions=FLAGS.num_actions, data_loader=data_loader) + rm_agent.train() + + rm_agent2 = regret_matching_agent.RegretMatchingAgent( + num_actions=FLAGS.num_actions, data_loader=data_loader) + rm_agent2.train() + + print("Regret matching") + evaluation.evaluate_in_selfplay( + agent_x=rm_agent, + agent_y=rm_agent2, + payoff_batch=eval_payoff_batch, + steps_count=FLAGS.evaluation_steps) + + print("Meta regret matching") + evaluation.evaluate_in_selfplay( + agent_x=mr_agent, + agent_y=mr_agent2, + payoff_batch=eval_payoff_batch, + steps_count=FLAGS.evaluation_steps) + + +if __name__ == "__main__": + app.run(selfplay_main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/matrix_dataset.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/matrix_dataset.py new file mode 100644 index 0000000..872d01d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/matrix_dataset.py @@ -0,0 +1,55 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Dataset for structured payoff matrices.""" + +from absl import flags +import numpy as np + +FLAGS = flags.FLAGS + + +class Dataset: + """Dataset class.""" + + def __init__(self, base_matrix, num_training_batches, minval, maxval): + self._base_matrix = base_matrix + self._num_training_batches = num_training_batches + self._minval, self._maxval = minval, maxval + # to overfit + self._new_matrix = np.copy(self._base_matrix) + + def get_training_batch(self): + """Get training data.""" + while True: + if not FLAGS.single_problem: + random_vec = np.random.randint( + low=self._minval, high=self._maxval, size=FLAGS.batch_size) + self._new_matrix = np.copy(self._base_matrix) + for i in range(FLAGS.batch_size): + self._new_matrix[self._new_matrix > 0] += random_vec[i] + self._new_matrix[self._new_matrix < 0] -= random_vec[i] + yield self._new_matrix + + def get_eval_batch(self): + """Get eval dataset.""" + + if not FLAGS.single_problem: + random_vec = np.random.randint( + low=self._minval, high=self._maxval, size=FLAGS.batch_size) + self._new_matrix = np.copy(self._base_matrix) + for i in range(FLAGS.batch_size): + self._new_matrix[self._new_matrix > 0] += random_vec[i] + self._new_matrix[self._new_matrix < 0] -= random_vec[i] + return self._new_matrix diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/meta_selfplay_agent.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/meta_selfplay_agent.py new file mode 100644 index 0000000..2c6385f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/meta_selfplay_agent.py @@ -0,0 +1,132 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Meta-regret matching with self-play agents.""" +from typing import List + +from absl import flags +import haiku as hk +import jax +import jax.numpy as jnp +import numpy as np +import optax + +from open_spiel.python.examples.meta_cfr.matrix_games import utils + +FLAGS = flags.FLAGS + + +def opponent_best_response_strategy(utility): + opponent_action = jnp.argmin(utility, axis=-1) + opponent_strategy = jax.nn.one_hot(opponent_action, FLAGS.num_actions) + return opponent_strategy + + +def _mlp_forwards(mlp_hidden_sizes: List[int]) -> hk.Transformed: + """Returns a haiku transformation of the MLP model to be used in optimizer. + + Args: + mlp_hidden_sizes: List containing size of linear layers. + + Returns: + Haiku transformation of the RNN network. + """ + def forward_fn(inputs): + mlp = hk.nets.MLP(mlp_hidden_sizes, activation=jax.nn.relu, name="mlp") + return mlp(inputs) + return hk.transform(forward_fn) + + +class OptimizerModel: + """Optimizer model.""" + + def __init__(self, learning_rate): + self.learning_rate = learning_rate + + self.model = _mlp_forwards([64, 16, FLAGS.num_actions]) + + self._net_init = self.model.init + self.net_apply = self.model.apply + + self.opt_update, self.net_params, self.opt_state = None, None, None + + def lr_scheduler(self, init_value): + schedule_fn = optax.polynomial_schedule( + init_value=init_value, end_value=0.05, power=1., transition_steps=50) + return schedule_fn + + def get_optimizer_model(self): + schedule_fn = self.lr_scheduler(self.learning_rate) + opt_init, self.opt_update = optax.chain( + optax.scale_by_adam(), optax.scale_by_schedule(schedule_fn), + optax.scale(-self.learning_rate)) + rng = jax.random.PRNGKey(10) + dummy_input = np.random.normal( + loc=0, scale=10., size=(FLAGS.batch_size, 1, FLAGS.num_actions)) + self.net_params = self._net_init(rng, dummy_input) + self.opt_state = opt_init(self.net_params) + + +class MetaSelfplayAgent: + """Meta player.""" + + def __init__(self, repeats, training_epochs, data_loader): + self.repeats = repeats + self.training_epochs = training_epochs + self.net_apply = None + self.net_params = None + self.regret_sum = None + self.step = 0 + self.data_loader = data_loader + + def train(self): + self.training_optimizer() + self.regret_sum = jnp.zeros(shape=[FLAGS.batch_size, 1, FLAGS.num_actions]) + + def initial_policy(self): + x = self.net_apply(self.net_params, None, self.regret_sum) + self.last_policy = jax.nn.softmax(x) + self.step += 1 + return self.last_policy + + def next_policy(self, last_values): + value = jnp.matmul(self.last_policy, last_values) + curren_regret = jnp.transpose(last_values, [0, 2, 1]) - value + self.regret_sum += curren_regret + + x = self.net_apply(self.net_params, None, self.regret_sum / (self.step + 1)) + self.last_policy = jax.nn.softmax(x) + self.step += 1 + return self.last_policy + + def training_optimizer(self): + """Training optimizer.""" + + optimizer = OptimizerModel(0.01) + optimizer.get_optimizer_model() + + for _ in range(FLAGS.num_batches): + batch_payoff = next(self.data_loader) + # for _ in range(self.repeats): + grads = jax.grad( + utils.meta_loss, + has_aux=False)(optimizer.net_params, optimizer.net_apply, + batch_payoff, self.training_epochs) + + updates, optimizer.opt_state = optimizer.opt_update( + grads, optimizer.opt_state) + optimizer.net_params = optax.apply_updates(optimizer.net_params, updates) + + self.net_apply = optimizer.net_apply + self.net_params = optimizer.net_params diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/regret_matching_agent.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/regret_matching_agent.py new file mode 100644 index 0000000..d5f2432 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/regret_matching_agent.py @@ -0,0 +1,60 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Regret matching.""" +from absl import flags +import jax +import jax.numpy as jnp +import numpy as np + +FLAGS = flags.FLAGS + + +class RegretMatchingAgent: + """Regret matching agent.""" + + def __init__(self, num_actions, data_loader): + self.num_actions = num_actions + # self.regret_sum = jax.numpy.array(np.zeros(self.num_actions)) + self.regret_sum = jax.numpy.array( + np.zeros(shape=[FLAGS.batch_size, 1, self.num_actions])) + self.data_loader = data_loader + + def train(self): + pass + + def initial_policy(self): + self.last_policy = self.regret_matching_policy(self.regret_sum) + return self.last_policy + + def next_policy(self, last_values): + value = jnp.matmul(self.last_policy, last_values) + last_values = jnp.transpose(last_values, [0, 2, 1]) + current_regrets = last_values - value + self.regret_sum += current_regrets + self.last_policy = self.regret_matching_policy(self.regret_sum) + return self.last_policy + + def regret_matching_policy(self, regret_sum): + """Regret matching policy.""" + + strategy = np.copy(regret_sum) + strategy[strategy < 0] = 0 + strategy_sum = np.sum(strategy, axis=-1) + for i in range(FLAGS.batch_size): + if strategy_sum[i] > 0: + strategy[i] /= strategy_sum[i] + else: + strategy[i] = np.repeat(1 / self.num_actions, self.num_actions) + return strategy diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/rnn_meta_selfplay_agent.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/rnn_meta_selfplay_agent.py new file mode 100644 index 0000000..4261067 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/rnn_meta_selfplay_agent.py @@ -0,0 +1,185 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""RNN meta-regret matching with self-play agents.""" + +from typing import List + +from absl import flags +import haiku as hk +import jax +import jax.numpy as jnp +import numpy as np +import optax + +from open_spiel.python.examples.meta_cfr.matrix_games.rnn_model import RNNModel + +FLAGS = flags.FLAGS + + +def _make_network(lstm_hidden_sizes: List[int], + mlp_hidden_sizes: List[int], + output_dim: int) -> hk.RNNCore: + """set up the network.""" + + layers = [] + for k, hidden_size in enumerate(lstm_hidden_sizes): + layers += [hk.LSTM(hidden_size, name=f'lstm_layer_{k}'), jax.nn.relu] + layers += [hk.nets.MLP(mlp_hidden_sizes + [output_dim], name='mlp')] + return RNNModel(layers) + + +def _make_forwards(lstm_hidden_sizes: List[int], mlp_hidden_sizes: List[int], + output_dim: int, batch_size: int) -> hk.Transformed: + + """Forward pass.""" + + def forward_fn(inputs): + rnn = _make_network(lstm_hidden_sizes, mlp_hidden_sizes, output_dim) + initial_state = rnn.initial_state(batch_size=batch_size) + outputs, _ = hk.dynamic_unroll(rnn, inputs, initial_state, time_major=False) + return outputs + + network = hk.transform(forward_fn) + return network + + +def meta_loss(opt_params, net_apply, payoff, steps, rng): + """Meta loss function.""" + + regret_sum_x = np.zeros(shape=[FLAGS.batch_size, 1, FLAGS.num_actions]) + regret_sum_y = np.zeros(shape=[FLAGS.batch_size, 1, FLAGS.num_actions]) + total_loss = 0 + + @jax.jit + def body_fun(s, total_loss): + nonlocal regret_sum_x + nonlocal regret_sum_y + x = net_apply(opt_params, rng, regret_sum_x / (s + 1)) + y = net_apply(opt_params, rng, regret_sum_y / (s + 1)) + + strategy_x = jax.nn.softmax(x) + strategy_y = jnp.transpose(jax.nn.softmax(y), [0, 2, 1]) + + values_x = jnp.matmul(payoff, strategy_y) + values_y = -jnp.matmul(strategy_x, payoff) + + value_x = jnp.matmul(jnp.matmul(strategy_x, payoff), strategy_y) + value_y = -value_x + + curren_regret_x = values_x - value_x + curren_regret_y = values_y - value_y + curren_regret_x = jnp.transpose(curren_regret_x, [0, 2, 1]) + + regret_sum_x += curren_regret_x + regret_sum_y += curren_regret_y + + current_loss = jnp.max( + jax.numpy.concatenate([curren_regret_x, curren_regret_y], axis=2), + axis=[1, 2]) + total_loss += current_loss + return total_loss + def fori_loop(lower, steps, body_fun, total_loss): + val = total_loss + for i in range(lower, steps): + val = body_fun(i, total_loss) + return val + total_loss = fori_loop(0, steps, body_fun, total_loss) + return jnp.mean(total_loss) + + +class OptimizerModel: + """Optimizer model.""" + + def __init__(self, learning_rate): + self.learning_rate = learning_rate + self.model = _make_forwards( + lstm_hidden_sizes=[20], + mlp_hidden_sizes=[], + output_dim=3, + batch_size=FLAGS.batch_size) + self.net_apply = self.model.apply + self.net_init = self.model.init + self.opt_update, self.net_params, self.opt_state = None, None, None + + def lr_scheduler(self, init_value): + schedule_fn = optax.polynomial_schedule( + init_value=init_value, end_value=0.05, power=1., transition_steps=50) + return schedule_fn + + def get_optimizer_model(self): + schedule_fn = self.lr_scheduler(self.learning_rate) + opt_init, self.opt_update = optax.chain( + optax.scale_by_adam(), optax.scale_by_schedule(schedule_fn), + optax.scale(-self.learning_rate)) + rng = jax.random.PRNGKey(10) + dummy_input = np.random.normal( + loc=0, scale=10., size=(FLAGS.batch_size, 1, FLAGS.num_actions)) + self.net_params = self.net_init(rng, dummy_input) + self.opt_state = opt_init(self.net_params) + + +class MetaSelfplayAgent: + """Meta player agent.""" + + def __init__(self, repeats, training_epochs, data_loader): + self.repeats = repeats + self.training_epochs = training_epochs + self.net_apply = None + self.net_params = None + self.regret_sum = None + self.step = 0 + self.data_loader = data_loader + self._rng = hk.PRNGSequence(10) + + def train(self): + self.training_optimizer() + self.regret_sum = jnp.zeros(shape=[FLAGS.batch_size, 1, FLAGS.num_actions]) + + def initial_policy(self): + x = self.net_apply(self.net_params, next(self._rng), self.regret_sum) + self.last_policy = jax.nn.softmax(x) + self.step += 1 + return self.last_policy + + def next_policy(self, last_values): + value = jnp.matmul(self.last_policy, last_values) + curren_regret = jnp.transpose(last_values, [0, 2, 1]) - value + self.regret_sum += curren_regret + + x = self.net_apply(self.net_params, next(self._rng), + self.regret_sum / (self.step + 1)) + self.last_policy = jax.nn.softmax(x) + self.step += 1 + return self.last_policy + + def training_optimizer(self): + """Train optimizer.""" + + optimizer = OptimizerModel(0.01) + optimizer.get_optimizer_model() + for _ in range(FLAGS.num_batches): + batch_payoff = next(self.data_loader) + for _ in range(self.repeats): + grads = jax.grad( + meta_loss, has_aux=False)(optimizer.net_params, optimizer.net_apply, + batch_payoff, self.training_epochs, + next(self._rng)) + + updates, optimizer.opt_state = optimizer.opt_update( + grads, optimizer.opt_state) + optimizer.net_params = optax.apply_updates(optimizer.net_params, + updates) + self.net_apply = optimizer.net_apply + self.net_params = optimizer.net_params diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/rnn_model.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/rnn_model.py new file mode 100644 index 0000000..ea5ef20 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/rnn_model.py @@ -0,0 +1,50 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""RNN model.""" + +from typing import Callable, List, Union, Optional + +import haiku as hk +import jax.numpy as jnp + + +class RNNModel(hk.RNNCore): + """RNN model.""" + + def __init__(self, + layers: List[Union[hk.Module, Callable[[jnp.ndarray], + jnp.ndarray]]], + name: Optional[str] = 'RNN'): + super().__init__(name=name) + self._layers = layers + + def __call__(self, inputs, prev_state): + x = inputs + curr_state = [None] * len(prev_state) + for k, layer in enumerate(self._layers): + if isinstance(layer, hk.RNNCore): + x, curr_state[k] = layer(x, prev_state[k]) + else: + x = layer(x) + return x, tuple(curr_state) + + def initial_state(self, batch_size: Optional[int]): + layerwise_init_state = [] + for layer in self._layers: + if isinstance(layer, hk.RNNCore): + layerwise_init_state.append(layer.initial_state(batch_size)) + else: + layerwise_init_state.append(None) + return tuple(layerwise_init_state) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/utils.py new file mode 100644 index 0000000..53c2d87 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/matrix_games/utils.py @@ -0,0 +1,82 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utility functions for meta learning for regret minimization.""" + +from absl import flags +import jax +import jax.numpy as jnp +import numpy as np + +FLAGS = flags.FLAGS + + +def meta_loss(opt_params, net_apply, payoff, steps): + + """Returns the meta learning loss value. + + Args: + opt_params: Optimizer parameters. + net_apply: Apply function. + payoff: Payoff matrix. + steps: Number of steps. + + Returns: + Accumulated loss value over number of steps. + + """ + regret_sum_x = np.zeros(shape=[FLAGS.batch_size, 1, FLAGS.num_actions]) + regret_sum_y = np.zeros(shape=[FLAGS.batch_size, 1, FLAGS.num_actions]) + total_loss = 0 + step = 0 + + @jax.jit + def scan_body(carry, x): + nonlocal regret_sum_x + nonlocal regret_sum_y + regret_sum_x, regret_sum_y, current_step, total_loss = carry + x = net_apply(opt_params, None, regret_sum_x / (current_step + 1)) + y = net_apply(opt_params, None, regret_sum_y / (current_step + 1)) + + strategy_x = jax.nn.softmax(x) + strategy_y = jnp.transpose(jax.nn.softmax(y), [0, 2, 1]) + + values_x = jnp.matmul(payoff, strategy_y) # val_x = payoff * st_y + values_y = -jnp.matmul(strategy_x, payoff) # val_y = -1 * payoff * st_x + + value_x = jnp.matmul(jnp.matmul(strategy_x, payoff), strategy_y) + value_y = -value_x + + curren_regret_x = values_x - value_x + curren_regret_y = values_y - value_y + curren_regret_x = jnp.transpose(curren_regret_x, [0, 2, 1]) + + regret_sum_x += curren_regret_x + regret_sum_y += curren_regret_y + + current_loss = jnp.mean(jnp.max( + jax.numpy.concatenate([curren_regret_x, curren_regret_y], axis=2), + axis=[1, 2]), axis=-1) + total_loss += current_loss + current_step += 1 + return (regret_sum_x, regret_sum_y, current_step, total_loss), None + + (regret_sum_x, regret_sum_y, step, total_loss), _ = jax.lax.scan( + scan_body, + (regret_sum_x, regret_sum_y, step, total_loss), + None, + length=steps, + ) + + return total_loss diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/cfr.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/cfr.py new file mode 100644 index 0000000..1920cfb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/cfr.py @@ -0,0 +1,482 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Counterfactual Regret Minimization.""" + +import copy +import enum +from typing import List, Tuple + +from open_spiel.python.examples.meta_cfr.sequential_games.typing import GameTree +from open_spiel.python.examples.meta_cfr.sequential_games.typing import HistoryNode +from open_spiel.python.examples.meta_cfr.sequential_games.typing import InfostateMapping +from open_spiel.python.examples.meta_cfr.sequential_games.typing import InfostateNode + + +class Players(enum.IntEnum): + CHANCE_PLAYER = 0 + PLAYER_1 = 1 + PLAYER_2 = 2 + + +def compute_reach_probabilities( + history_tree_node: HistoryNode, + all_infostates_map: List[InfostateMapping]) -> None: + """Computes reach probabilities for game tree information states. + + This function initializes counterfactual_reach_prob and player_reach_prob for + all information states in the game tree, and then these values will be + calculated in compute_reach_probability_dfs. + + Args: + history_tree_node: Game tree HistoryTreeNode which is the root of the game + tree. + all_infostates_map: List of dictionaries (mapping from information state + string representation to information state object) for each players + (including chance player). This list will be empty when this function is + called fot the first time and it'll be population during DFS tree + traversal. + """ + + for infostate in (list(all_infostates_map[Players.PLAYER_1].values()) + + list(all_infostates_map[Players.PLAYER_2].values())): + infostate.counterfactual_reach_prob = 0. + infostate.player_reach_prob = 0. + compute_reach_probability_dfs(history_tree_node, all_infostates_map) + + +def compute_reach_probability_dfs( + history_tree_node: HistoryNode, + all_infostates_map: List[InfostateMapping]) -> None: + """Calculate reach probability values in dfs tree. + + This function is initially called by compute_reach_probabilities and it + computes reach probabilities for all information state nodes in the tree by + traversing the tree using DFS. + + Args: + history_tree_node: Game tree HistoryTreeNode which is the root of the game + tree. + all_infostates_map: List of dictionaries (mapping from information state + string representation to information state object) for each players + (including chance player). This list will be empty when this function is + called fot the first time and it'll be population during DFS tree + traversal. + """ + + world_state = history_tree_node.world_state + infostate_p1 = all_infostates_map[Players.PLAYER_1][ + world_state.get_infostate_string(Players.PLAYER_1)] + infostate_p2 = all_infostates_map[Players.PLAYER_2][ + world_state.get_infostate_string(Players.PLAYER_2)] + infostate_p1.counterfactual_reach_prob += history_tree_node.reach_probs[ + 0] * history_tree_node.reach_probs[Players.PLAYER_2] + infostate_p2.counterfactual_reach_prob += history_tree_node.reach_probs[ + 0] * history_tree_node.reach_probs[Players.PLAYER_1] + + if infostate_p1.player_reach_prob != 0.: + assert (infostate_p1.player_reach_prob == history_tree_node.reach_probs[ + Players.PLAYER_1]) + + if infostate_p2.player_reach_prob != 0.: + assert (infostate_p2.player_reach_prob == history_tree_node.reach_probs[ + Players.PLAYER_2]) + + infostate_p1.player_reach_prob = history_tree_node.reach_probs[ + Players.PLAYER_1] + infostate_p2.player_reach_prob = history_tree_node.reach_probs[ + Players.PLAYER_2] + + policy_p1 = infostate_p1.policy + policy_p2 = infostate_p2.policy + policy_chance = world_state.chance_policy + actions_chance, actions_p1, actions_p2 = world_state.get_actions() + for action_chance in actions_chance: + for action_p1 in actions_p1: + for action_p2 in actions_p2: + history_tree_node.action_probs[( + action_chance, action_p1, action_p2)] = policy_chance[ + action_chance] * policy_p1[action_p1] * policy_p2[action_p2] + child_node = history_tree_node.get_child( + (action_chance, action_p1, action_p2)) + child_node.reach_probs[ + Players.CHANCE_PLAYER] = history_tree_node.reach_probs[ + Players.CHANCE_PLAYER] * policy_chance[action_chance] + child_node.reach_probs[ + Players.PLAYER_1] = history_tree_node.reach_probs[ + Players.PLAYER_1] * policy_p1[action_p1] + child_node.reach_probs[ + Players.PLAYER_2] = history_tree_node.reach_probs[ + Players.PLAYER_2] * policy_p2[action_p2] + compute_reach_probability_dfs(child_node, all_infostates_map) + + +def _get_opponent(player: int) -> int: + return -1 * player + 3 + + +def compute_best_response_values(infostate: InfostateNode) -> float: + """Returns best response value for an infostate. + + Args: + infostate: Information state. + + Returns: + Best response value, which is the maximum action value chosen among all + actions values of possible actions from infostate. If information state is a + terminal node in the game tree, this value is calculated from history nodes + reach probability for player and opponent, and game utility of terminal + node. If infostate is not terminal, this value will be calculated in a + recursive way. + """ + if infostate.is_terminal(): + terminal_utility = 0 + for history_node in infostate.history_nodes: + terminal_utility += history_node.reach_probs[ + 0] * history_node.reach_probs[_get_opponent( + infostate.player)] * history_node.world_state.get_utility( + infostate.player) + return terminal_utility + action_values = {action: 0 for action in infostate.get_actions()} + infostate_actions = infostate.get_actions() + for action in infostate_actions: + action_values[action] = 0 + for child in infostate.children[action].values(): + action_values[action] += compute_best_response_values(child) + return max(action_values.values()) + + +def compute_best_response_policy(infostate: InfostateNode) -> float: + """Calculate best response policy and returns best response value of infostate. + + Args: + infostate: Information state. + + Returns: + Best response value similar to what compute_best_response_values returns. + """ + if infostate.is_terminal(): + terminal_utility = 0 + for history_node in infostate.history_nodes: + terminal_utility += history_node.reach_probs[ + 0] * history_node.reach_probs[_get_opponent( + infostate.player)] * history_node.world_state.get_utility( + infostate.player) + return terminal_utility + action_values = {action: 0 for action in infostate.get_actions()} + infostate_actions = infostate.get_actions() + for action in infostate_actions: + action_values[action] = 0 + for child in infostate.children[action].values(): + action_values[action] += compute_best_response_policy(child) + + infostate.policy = {action: 0 for action in infostate.get_actions()} + max_action_value = max(action_values.values()) + for action in infostate_actions: + if action_values[action] == max_action_value: + infostate.policy[action] = 1 + break + return max_action_value + + +def compute_counterfactual_values(infostate: InfostateNode) -> float: + """Returns cfr value for an infostate. + + Args: + infostate: Information state. + + Returns: + Counterfactual value for infostate. This value is calculated from action + value and policy of all legal actions of infostate information state. + """ + if infostate.is_terminal(): + terminal_utility = 0 + for history_node in infostate.history_nodes: + terminal_utility += history_node.reach_probs[ + 0] * history_node.reach_probs[_get_opponent( + infostate.player)] * history_node.world_state.get_utility( + infostate.player) + return terminal_utility + infostate_actions = infostate.get_actions() + action_values = {action: 0 for action in infostate_actions} + for action in infostate_actions: + for child in infostate.children[action].values(): + action_values[action] += compute_counterfactual_values(child) + infostate.counterfactual_action_values = action_values + counterfactual_value = 0 + for action in infostate_actions: + counterfactual_value += infostate.policy[action] * action_values[action] + infostate.counterfactual_value = counterfactual_value + return counterfactual_value + + +def update_regrets(infostates: List[InfostateNode]) -> None: + """Updates regret value for each infostate in infostates. + + Args: + infostates: List of information states + """ + for infostate in infostates: + for action in infostate.get_actions(): + current_regret = infostate.counterfactual_action_values[ + action] - infostate.counterfactual_value + infostate.regret[action] += current_regret + + +def compute_next_policy(infostates: List[InfostateNode], + cfr_plus: bool = False) -> None: + """Computes policy of next iteration for each infostate in infostates. + + Args: + infostates: List of information states. + cfr_plus: A flag which specifies if we update policy according to CFR or + CFR-plus algorithm. True if we use CFR-plus, otherwise we use CFR. + """ + for infostate in infostates: + infostate_actions = infostate.get_actions() + if cfr_plus: + for action in infostate_actions: + infostate.regret[action] = max(infostate.regret[action], 0.0) + + positive_regret_sum = 0 + for action in infostate_actions: + if infostate.regret[action] > 0: + positive_regret_sum += infostate.regret[action] + + actions_count = len(infostate_actions) + next_policy = {a: 1.0 / actions_count for a in infostate_actions} + + if positive_regret_sum > 0: + for action in infostate_actions: + next_policy[action] = max(infostate.regret[action], + 0) / positive_regret_sum + infostate.policy = next_policy + + +def cumulate_average_policy(infostates: List[InfostateNode], + weight: int = 1) -> None: + """Cumulates policy values of each infostate in infostates. + + For each infostate, we update average policy and the sum of weighted average + policy. + + Args: + infostates: List of information states. + weight: The weight we use to update policy and sum of weighted average + policy. For CFR algorithm, weight is 1. + """ + for infostate in infostates: + for action in infostate.get_actions(): + infostate.average_policy[ + action] += infostate.player_reach_prob * infostate.policy[ + action] * weight + infostate.average_policy_weight_sum += infostate.player_reach_prob * weight + + +def normalize_average_policy(infostates) -> None: + """Updates infostate policy by normalizing average policy. + + Args: + infostates: List of information states that their policies will be updated. + """ + for infostate in infostates: + for action in infostate.get_actions(): + infostate.policy[action] = infostate.average_policy[ + action] / infostate.average_policy_weight_sum + + +def best_response_counterfactual_regret_minimization_iteration( + history_tree_node: HistoryNode, + infostate_nodes: List[InfostateNode], + all_infostates_map: List[InfostateMapping]) -> None: + """Calculates CFRBR values. + + Args: + history_tree_node: Game tree HistoryTreeNode which is the root of the game + tree. + infostate_nodes: List of all information state nodes. + all_infostates_map: List of dictionaries (mapping from information state + string representation to information state object) for each players + (including chance player). This list will be empty when this function is + called fot the first time and it'll be population during DFS tree + traversal. + """ + compute_next_policy(list(all_infostates_map[Players.PLAYER_1].values())) + + compute_reach_probabilities(history_tree_node, all_infostates_map) + cumulate_average_policy(list(all_infostates_map[Players.PLAYER_1].values())) + + compute_best_response_policy(infostate_nodes[Players.PLAYER_2]) + compute_reach_probabilities(history_tree_node, all_infostates_map) + compute_counterfactual_values(infostate_nodes[Players.PLAYER_1]) + + update_regrets(list(all_infostates_map[Players.PLAYER_1].values())) + + +def counterfactual_regret_minimization_iteration( + cfr_game_tree: GameTree, + alternating_updates: bool, + cfr_plus: bool, + weight: int = 1) -> None: + """Performs one iteration of CFR or CFR-plus. + + Args: + cfr_game_tree: Game tree for an imperfect information game. This game tree + is game tree of an openspiel game. + alternating_updates: Boolean flag to do alternative update for players + policies or not. If True, alternative updates will be performed (meaning + we first calculate average policy, counterfactual values, regrets and next + policy for player 1 first and then calculate all of these for player 2), + otherwise both players average policies, counterfactual values and regrets + will be updated right after each other (meaning, for example we calculate + next_policy of player 1, and then next policy of player 2. Then, we + calculate average policy for player 1 and then average policy for player + 2, and so on). + cfr_plus: Boolean flag indicating if we perform CFR algorithm or CFR-plus. + If True, we perform CFR-plus algorithm, otherwise we perform CFR + algorithm. + weight: The weight we use to update policy and sum of weighted average + policy. + """ + if alternating_updates: + compute_reach_probabilities(cfr_game_tree.first_history_node, + cfr_game_tree.all_infostates_map) + cumulate_average_policy( + list(cfr_game_tree.all_infostates_map[Players.PLAYER_1].values()), + weight) + compute_counterfactual_values( + cfr_game_tree.infostate_nodes[Players.PLAYER_1]) + update_regrets( + list(cfr_game_tree.all_infostates_map[Players.PLAYER_1].values())) + compute_next_policy( + list(cfr_game_tree.all_infostates_map[Players.PLAYER_1].values()), + cfr_plus) + + compute_reach_probabilities(cfr_game_tree.first_history_node, + cfr_game_tree.all_infostates_map) + cumulate_average_policy( + list(cfr_game_tree.all_infostates_map[Players.PLAYER_2].values()), + weight) + compute_counterfactual_values( + cfr_game_tree.infostate_nodes[Players.PLAYER_2]) + update_regrets( + list(cfr_game_tree.all_infostates_map[Players.PLAYER_2].values())) + compute_next_policy( + list(cfr_game_tree.all_infostates_map[Players.PLAYER_2].values()), + cfr_plus) + else: + compute_next_policy( + list(cfr_game_tree.all_infostates_map[Players.PLAYER_1].values()), + cfr_plus) + compute_next_policy( + list(cfr_game_tree.all_infostates_map[Players.PLAYER_2].values()), + cfr_plus) + + compute_reach_probabilities(cfr_game_tree.first_history_node, + cfr_game_tree.all_infostates_map) + cumulate_average_policy( + list(cfr_game_tree.all_infostates_map[Players.PLAYER_1].values()), + weight) + cumulate_average_policy( + list(cfr_game_tree.all_infostates_map[Players.PLAYER_2].values()), + weight) + + compute_counterfactual_values( + cfr_game_tree.infostate_nodes[Players.PLAYER_1]) + compute_counterfactual_values( + cfr_game_tree.infostate_nodes[Players.PLAYER_2]) + + update_regrets( + list(cfr_game_tree.all_infostates_map[Players.PLAYER_1].values())) + update_regrets( + list(cfr_game_tree.all_infostates_map[Players.PLAYER_2].values())) + + +def compute_cfr_plus_values(cfr_game_tree: GameTree, + steps: int) -> Tuple[List[float], List[float]]: + """Performs CFR-plus algorithm for a given number of steps. + + Args: + cfr_game_tree: Game tree for an imperfect information game. This game tree + is game tree of an openspiel game. + steps: Number of CFR-plus steps. + + Returns: + best_response_values_p1: List of best response values for player 1. The + length of this list is equal to the number of steps. + best_response_values_p2: List of best response values for player 2. The + length of this list is equal to the number of steps. + """ + best_response_values_p1 = [] + best_response_values_p2 = [] + for i in range(steps): + counterfactual_regret_minimization_iteration( + cfr_game_tree=cfr_game_tree, + alternating_updates=True, + cfr_plus=True, + weight=i + 1) + + game_tree_copy = copy.deepcopy(cfr_game_tree) + normalize_average_policy( + game_tree_copy.all_infostates_map[Players.PLAYER_1].values()) + normalize_average_policy( + game_tree_copy.all_infostates_map[Players.PLAYER_2].values()) + compute_reach_probabilities(game_tree_copy.first_history_node, + game_tree_copy.all_infostates_map) + + best_response_values_p1.append( + compute_best_response_values( + game_tree_copy.infostate_nodes[Players.PLAYER_1])) + best_response_values_p2.append( + compute_best_response_values( + game_tree_copy.infostate_nodes[Players.PLAYER_2])) + + return best_response_values_p1, best_response_values_p2 + + +def compute_cfr_values(cfr_game_tree: GameTree, + steps: int) -> Tuple[List[float], List[float]]: + """Performs CFR algorithm for a given number of steps. + + Args: + cfr_game_tree: Game tree for an imperfect information game. This game tree + is game tree of an openspiel game. + steps: Number of CFR-plus steps. + + Returns: + best_response_values_p1: List of best response values for player 1. The + length of this list is equal to the number of steps. + best_response_values_p2: List of best response values for player 2. The + length of this list is equal to the number of steps. + """ + best_response_values_p1 = [] + best_response_values_p2 = [] + for _ in range(steps): + counterfactual_regret_minimization_iteration( + cfr_game_tree=cfr_game_tree, alternating_updates=False, cfr_plus=False) + + normalize_average_policy( + cfr_game_tree.all_infostates_map[Players.PLAYER_1].values()) + normalize_average_policy( + cfr_game_tree.all_infostates_map[Players.PLAYER_2].values()) + compute_reach_probabilities(cfr_game_tree.first_history_node, + cfr_game_tree.all_infostates_map) + best_response_values_p1.append( + compute_best_response_values( + cfr_game_tree.infostate_nodes[Players.PLAYER_1])) + best_response_values_p2.append( + compute_best_response_values( + cfr_game_tree.infostate_nodes[Players.PLAYER_2])) + + return best_response_values_p1, best_response_values_p2 diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/cfr_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/cfr_test.py new file mode 100644 index 0000000..2d57c06 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/cfr_test.py @@ -0,0 +1,86 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests counterfactual regret minimization.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python.examples.meta_cfr.sequential_games import cfr +from open_spiel.python.examples.meta_cfr.sequential_games import game_tree_utils as trees +from open_spiel.python.examples.meta_cfr.sequential_games import openspiel_api + + +def _uniform_policy(size): + if size > 0: + return [1./size]*size + return [] + + +class CfrTest(parameterized.TestCase): + + @parameterized.named_parameters(('kuhn_poker_test', 'kuhn_poker'), + ('leduc_poker_test', 'leduc_poker')) + def test_zero_policy_is_uniform(self, game): + config = {'players': 2} + cfr_game_tree = trees.build_game_tree( + openspiel_api.WorldState( + game_name=game, config=config, perturbation=False)) + cfr.compute_cfr_values(cfr_game_tree, 1) + infostates_p1 = list(cfr_game_tree.all_infostates_map[1].values()) + infostates_p2 = list(cfr_game_tree.all_infostates_map[2].values()) + with self.subTest('player_1_initial_policy'): + for i in range(len(infostates_p1)): + self.assertListEqual( + list(infostates_p1[i].policy.values()), + _uniform_policy(len(infostates_p1[i].policy.values()))) + with self.subTest('player_2_initial_policy'): + for i in range(len(infostates_p2)): + self.assertListEqual( + list(infostates_p2[i].policy.values()), + _uniform_policy(len(infostates_p2[i].policy.values()))) + + def test_cfr_leduc_poker(self): + config = {'players': 2} + exploitability_error = 0.2 + cfr_game_tree = trees.build_game_tree( + openspiel_api.WorldState( + game_name='leduc_poker', config=config, perturbation=False)) + best_response_value_p1, best_response_value_p2 = cfr.compute_cfr_values( + cfr_game_tree, 20) + last_best_response_value_player_1 = best_response_value_p1[-1] + last_best_response_value_player_2 = best_response_value_p2[-1] + exploitability = (last_best_response_value_player_1 + + last_best_response_value_player_2) / 2 + # Exploitability values are computed using OpenSpiel cfr + self.assertLessEqual(exploitability, 0.59 + exploitability_error) + + def test_cfr_kuhn_poker(self): + config = {'players': 2} + exploitability_error = 0.2 + cfr_game_tree = trees.build_game_tree( + openspiel_api.WorldState( + game_name='kuhn_poker', config=config, perturbation=False)) + best_response_value_p1, best_response_value_p2 = cfr.compute_cfr_values( + cfr_game_tree, 20) + last_best_response_value_player_1 = best_response_value_p1[-1] + last_best_response_value_player_2 = best_response_value_p2[-1] + exploitability = (last_best_response_value_player_1 + + last_best_response_value_player_2) / 2 + # Exploitability values are computed using OpenSpiel cfr + self.assertLessEqual(exploitability, 0.06 + exploitability_error) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/dataset_generator.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/dataset_generator.py new file mode 100644 index 0000000..429b30b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/dataset_generator.py @@ -0,0 +1,39 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Dataset generation for meta-CFR algorithm.""" + +from typing import List, Tuple + +import numpy as np + +from open_spiel.python.examples.meta_cfr.sequential_games.typing import InfostateNode + + +class Dataset: + """Dataset class to generate data for training meta-CFR model.""" + + def __init__(self, train_dataset: List[Tuple[List[List[float]], + InfostateNode]], + batch_size: int): + self._train_dataset = np.array(train_dataset, dtype=object) + self._size = self._train_dataset.shape[0] + self._batch_size = batch_size + + def get_batch(self): + while True: + np.random.shuffle(self._train_dataset) + idx_sample = np.random.choice(self._size, self._batch_size) + next_batch = self._train_dataset[idx_sample, :] + yield next_batch diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/evaluation.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/evaluation.py new file mode 100644 index 0000000..b6c25ec --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/evaluation.py @@ -0,0 +1,22 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Evaluation of a CFR best response agent given the world state.""" + +from absl import flags +FLAGS = flags.FLAGS + + +def CFRBREvaluation(agent, world_state): + return agent.next_policy(world_state) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/game_tree_utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/game_tree_utils.py new file mode 100644 index 0000000..a8ba2ac --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/game_tree_utils.py @@ -0,0 +1,216 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Game tree structure for imperfect information games.""" + +import copy +from typing import Any, Dict, List, Text, Tuple + +from open_spiel.python.examples.meta_cfr.sequential_games import cfr +from open_spiel.python.examples.meta_cfr.sequential_games import openspiel_api + + +class HistoryTreeNode: + """Tree node to build game tree in cfr and do DFS traverse on game tree. + + Attributes: + world_state: Current world state representation. + reach_probs: Reach probability of tree node for each player. We consider + reach probability for chance player, player 1 and player 2. + action_probs: Probability of actions taken by each player. We consider + actions taken by chance player, player 1 and player 2. Keys of this + dictionary are tuples of (action_chance, action_player_1, + action_player_2). + children: A dictionary from a taken action from this node to the + HistoryTreeNode of the child we derive in the game tree by taking an + action. + """ + + def __init__(self, world_state: openspiel_api.WorldState): + self.world_state = world_state + self.reach_probs = [1.0, 1.0, 1.0] + self.action_probs = {} + self._value_p1 = 0 + self.children = {} + + def add_child(self, child_world_state: 'HistoryTreeNode', + actions: Tuple[int, int, int]) -> None: + """Adds the child world state to dictionary of children of this node.""" + self.children[actions] = child_world_state + + def get_child(self, actions: Tuple[int, int, int]) -> 'HistoryTreeNode': + """Returns a child world state that can be derived from an action.""" + return self.children[actions] + + +class InfoState: + """Information state class. + + Attributes: + history_nodes: History of game as players play. + player: Index of current player. + infostate_string: String representation of current informantion state. + world_state: Current game world state. + children: Children nodes of information states. The keys are actions, and + values are dictionary from information state string to information state + node. + counterfactual_reach_prob: Counterfactural values of reach probability for + the current information state. + player_reach_prob: Reach probability of information state for the acting + player. + counterfactual_action_values: Counterfactual values for each action in this + information state. This is a dictionary from action to counterfactual + value of this action in this information state. + counterfactual_value: Counterfactual value of this information state. + regret: Regret of each action for all player's actions in this information + state. + policy: Policy of player in this information state. + average_policy: Average policy for all player's actions in this information + state. + average_policy_weight_sum: Sum of weighted average policy. This is used to + normalize average policy and derive policy in this information state. + """ + + def __init__(self, world_state: openspiel_api.WorldState, player: int, + infostate_string: Text): + self.history_nodes = [] + self.player = player + self.infostate_string = infostate_string + self.world_state = world_state + self._actions = world_state.get_actions() + self.children = {a: {} for a in self._actions[player]} + self.counterfactual_reach_prob = 0. + self.player_reach_prob = 0. + self.counterfactual_action_values = {} + self.counterfactual_value = 0 + self.regret = {a: 0. for a in self._actions[player]} + + actions_count = len(self._actions[player]) + self.policy = { + a: 1.0 / actions_count for a in world_state.get_actions()[player] + } + + self.average_policy = {a: 0. for a in self._actions[player]} + self.average_policy_weight_sum = 0. + + def add_history_node(self, history_node: HistoryTreeNode) -> None: + """Updates history nodes with a given(last) history node.""" + self.history_nodes.append(history_node) + + def add_child_infostate(self, action: int, + infostate_child: Any) -> None: + """Adds child infostate derived from taking an action to self.children.""" + self.children[action][infostate_child.infostate_string] = infostate_child + + def get_actions(self) -> List[int]: + """Returns legal actions in current information state for current player.""" + return self.history_nodes[0].world_state.get_actions()[self.player] + + def is_terminal(self) -> bool: + """Returns True if information state is terminal, False otherwise.""" + return self.history_nodes[0].world_state.is_terminal() + + +class GameTree: + """Game tree class to build for CFR-based algorithms. + + Attributes: + first_history_node: Root node of game tree. + infostate_nodes: List of information state nodes for each player (including + chance player). + all_infostates_map: List of dictionaries (mapping from information state + string representation to information state object) for each players + (including chance player). + """ + + def __init__(self, first_history_node: HistoryTreeNode, + infostate_nodes: List[InfoState], + all_infostates_map: List[Dict[str, InfoState]]): + self.first_history_node = first_history_node + self.infostate_nodes = infostate_nodes + self.all_infostates_map = all_infostates_map + + +def build_tree_dfs( + world_state: openspiel_api.WorldState, + all_infostates_map: List[Dict[str, InfoState]] +) -> Tuple[HistoryTreeNode, List[InfoState]]: + """Builds the game tree by DFS traversal. + + Args: + world_state: An openspiel game world state representation that will be the + root of game tree. + all_infostates_map: List of dictionaries (mapping from information state + string representation to information state object) for each players + (including chance player). This list will be empty when this function is + called and it'll be population during DFS tree traversal. + + Returns: + tree_node: Root of the game tree built in DFS traversal. + infostate_nodes: List of information state (root) tree node for each player + (including chance player). + """ + tree_node = HistoryTreeNode(world_state) + + infostate_nodes = [ + InfoState(world_state, 1, world_state.get_infostate_string(1)), + InfoState(world_state, 1, world_state.get_infostate_string(1)), + InfoState(world_state, 2, world_state.get_infostate_string(2)) + ] + for p in [cfr.Players.PLAYER_1, cfr.Players.PLAYER_2]: + infostate_string = world_state.get_infostate_string(p) + if infostate_string not in all_infostates_map[p]: + all_infostates_map[p][infostate_string] = InfoState( + world_state, p, infostate_string) + + infostate = all_infostates_map[p][infostate_string] + infostate.add_history_node(tree_node) + + infostate_nodes[p] = infostate + actions = world_state.get_actions() + actions_chance, actions_p1, actions_p2 = actions + + for action_chance in actions_chance: + for action_p1 in actions_p1: + for action_p2 in actions_p2: + child_state = copy.deepcopy(world_state) + child_state.apply_actions((action_chance, action_p1, action_p2)) + child_tree_node, child_infostates = build_tree_dfs( + child_state, all_infostates_map) + + tree_node.add_child(child_tree_node, + (action_chance, action_p1, action_p2)) + infostate_nodes[1].add_child_infostate(action_p1, child_infostates[1]) + infostate_nodes[2].add_child_infostate(action_p2, child_infostates[2]) + + return tree_node, infostate_nodes + + +def build_game_tree(world_state: openspiel_api.WorldState) -> GameTree: + """Builds game tree for CFR-based algorithms. + + Args: + world_state: An openspiel game world state representation that will be the + root of game tree. + + Returns: + Calls GameTree function which returns the following: + tree_node: Root of the game tree built in DFS traversal. + infostate_nodes: List of information state (root) tree node for each player + (including chance player). + """ + all_infostates_map = [{}, {}, {}] + first_history_node, infostate_nodes = build_tree_dfs(world_state, + all_infostates_map) + return GameTree(first_history_node, infostate_nodes, all_infostates_map) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/main.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/main.py new file mode 100644 index 0000000..a61cafe --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/main.py @@ -0,0 +1,90 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Main file to train and evaluate meta-cfr agent, cfr and cfr-plus.""" + +from typing import Sequence + +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python.examples.meta_cfr.sequential_games import cfr +from open_spiel.python.examples.meta_cfr.sequential_games import evaluation +from open_spiel.python.examples.meta_cfr.sequential_games import game_tree_utils +from open_spiel.python.examples.meta_cfr.sequential_games import meta_learning +from open_spiel.python.examples.meta_cfr.sequential_games import openspiel_api + + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("random_seed_size", 30, "Number of random seeds to use.") + + +def main(argv: Sequence[str]) -> None: + del argv + config = {"players": FLAGS.players} + random_seeds_eval = np.random.choice( + np.array(list(range(1000))), size=FLAGS.random_seed_size, replace=False) + + # Train a meta-cfr agent + meta_cfr_agent = meta_learning.MetaCFRRegretAgent( + training_epochs=1, + meta_learner_training_epochs=FLAGS.meta_learner_training_epochs, + game_name=FLAGS.game, + game_config=config, + perturbation=FLAGS.perturbation, + seed=FLAGS.random_seed, + model_type=FLAGS.model_type, + best_response=True) + meta_cfr_agent.train() + + cfr_vals = np.zeros((FLAGS.meta_learner_training_epochs,)) + cfr_plus_vals = np.zeros((FLAGS.meta_learner_training_epochs,)) + + for seed in list(random_seeds_eval): + + # Evaluate a meta-cfr agent + world_state = openspiel_api.WorldState( + FLAGS.game, config, perturbation=True, random_seed=seed) + meta_cfr_vals = evaluation.CFRBREvaluation(meta_cfr_agent, world_state) + + # Evaluate a cfr plus agent + game_tree = game_tree_utils.build_game_tree( + openspiel_api.WorldState( + FLAGS.game, + config, + perturbation=FLAGS.perturbation, + random_seed=seed)) + _, cfr_plus_vals = cfr.compute_cfr_plus_values( + game_tree, FLAGS.meta_learner_training_epochs) + + # Evaluate a cfr agent + game_tree = game_tree_utils.build_game_tree( + openspiel_api.WorldState( + FLAGS.game, + config, + perturbation=FLAGS.perturbation, + random_seed=seed)) + _, cfr_vals = cfr.compute_cfr_values( + game_tree, FLAGS.meta_learner_training_epochs) + + print("Evaluation seed:", random_seeds_eval) + print("Meta_cfr agent:", meta_cfr_vals) + print("cfr_plus agent:", cfr_plus_vals) + print("cfr agent:", cfr_vals) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/meta_learning.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/meta_learning.py new file mode 100644 index 0000000..3268e72 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/meta_learning.py @@ -0,0 +1,454 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Meta learning algorithm.""" + +import os +from typing import Dict, List, Any + +from absl import flags +from absl import logging +import haiku as hk +import jax +import jax.numpy as jnp +import numpy as np +import optax + +from open_spiel.python.examples.meta_cfr.sequential_games import cfr +from open_spiel.python.examples.meta_cfr.sequential_games import dataset_generator +from open_spiel.python.examples.meta_cfr.sequential_games import game_tree_utils +from open_spiel.python.examples.meta_cfr.sequential_games import models +from open_spiel.python.examples.meta_cfr.sequential_games import openspiel_api +from open_spiel.python.examples.meta_cfr.sequential_games import typing +from open_spiel.python.examples.meta_cfr.sequential_games import utils + + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("batch_size", 250, "Batch size.") +flags.DEFINE_integer("num_batches", 1, "Number of batches.") +flags.DEFINE_integer("meta_learner_training_epochs", 1, + "Number of meta_learner_training_epochs") +flags.DEFINE_integer("num_tasks", 1, "Number tasks to train meta learner.") +flags.DEFINE_integer("random_seed", 2, "Random seed.") +flags.DEFINE_integer("checkpoint_interval", 50, + "Checkpoint every checkpoint_interval.") +flags.DEFINE_string("game", "leduc_poker", "Name of the game") +flags.DEFINE_integer("players", 2, "Number of players") +flags.DEFINE_bool("perturbation", True, "Random perturbation of the game.") +flags.DEFINE_bool( + "use_infostate_representation", True, + "Use infostate representation as extra input to meta network.") +flags.DEFINE_float("init_lr", 0.2, "Initial learning rate") +flags.DEFINE_string("lstm_sizes", "64", "Size of lstm layers.") +flags.DEFINE_string("mlp_sizes", "20, 20", "Size of mlp layers.") +flags.DEFINE_string("model_type", "MLP", "Model type.") + + +os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"] = "false" +os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = "1.5" + + +def append_counterfactual_values( + infostates: List[typing.InfostateNode], + counterfactual_values: Dict[str, List[List[float]]]): + for infostate in infostates: + counterfactual_values[infostate.infostate_string].append([ + infostate.counterfactual_action_values[a] + for a in infostate.get_actions() + ]) + + +def compute_next_policy_invariants( + infostates: typing.InfostateMapping, all_actions: List[int], + infostate_map: typing.InfostateMapping +) -> tuple[Dict[str, jnp.ndarray], Dict[str, List[int]]]: + """Computes information needed to calculate next policy. + + This function computes one hot encodings of infostates and returns mappings + from infostate strings to one hot representations of infostates as well as + illegal actions. + + Args: + infostates: List of infostate mappings. + all_actions: List of actions. + infostate_map: Mapping from infostate string to infostate. + + Returns: + Returns mappings of infostate strings to one hot representation for + infostates and illegal actions + """ + one_hot_representations = {} + illegal_actions = {} + + for (infostate_str, infostate) in infostates.items(): + if infostate.is_terminal(): + continue + + legal_actions = infostate.get_actions() + + if len(legal_actions) == 1: + infostate.policy[infostate.get_actions()[0]] = 1 + continue + infostate_str_one_hot = jax.nn.one_hot(infostate_map[infostate_str], + len(infostates)) + one_hot_representations[infostate_str] = infostate_str_one_hot + illegal_actions[infostate_str] = [ + i for i, a in enumerate(all_actions) if a not in legal_actions + ] + return one_hot_representations, illegal_actions + + +def compute_next_policy(infostates: typing.InfostateMapping, + net_apply: typing.ApplyFn, net_params: typing.Params, + epoch: int, all_actions: List[int], + one_hot_representations: Dict[str, jnp.ndarray], + illegal_actions: Dict[str, + List[int]], key: hk.PRNGSequence): + """Computes next step policy from output of the model. + + Args: + infostates: List of infostate mappings. + net_apply: Apply function. + net_params: Model params. + epoch: epoch. + all_actions: List of actions. + one_hot_representations: Dictionary from infostate string to infostate. + illegal_actions: Dictionary from infostate string to the list of illegal + actions. + key: Haiku Pseudo random number generator. + """ + + infostate_lst = [] + input_lst = [] + illegal_action_lst = [] + + batched_net_output = [] + for (infostate_str, infostate) in infostates.items(): + if infostate.is_terminal(): + continue + + legal_actions = infostate.get_actions() + if len(legal_actions) == 1: + infostate.policy[infostate.get_actions()[0]] = 1 + continue + regret_vec = np.array([ + infostate.regret[a] / + (epoch + 1) if a in infostate.get_actions() else 0 + for a in all_actions + ]) + if FLAGS.use_infostate_representation: + one_hot_representation = one_hot_representations[infostate_str] + net_input = jnp.concatenate([regret_vec, one_hot_representation]) + else: + net_input = regret_vec + input_lst.append(net_input) + infostate_lst.append(infostate) + illegal_action_lst.append(illegal_actions[infostate_str]) + batched_inputs, output_mappings, relevant_illegal_actions = ( + utils.get_batched_input( + input_lst, infostate_lst, illegal_action_lst, FLAGS.batch_size + ) + ) + idx = 0 + + for _ in range(int(len(batched_inputs) / FLAGS.batch_size)): + batched_input, output_mapping, relevant_illegal_action = batched_inputs[ + idx:idx + FLAGS.batch_size], output_mappings[ + idx:idx + + FLAGS.batch_size], relevant_illegal_actions[idx:idx + + FLAGS.batch_size] + idx += FLAGS.batch_size + + batched_input_jnp = jnp.array( + np.expand_dims(np.array(batched_input), axis=1)) + batched_net_output = utils.get_network_output_batched( # pytype: disable=wrong-arg-types # jnp-type + net_apply, net_params, + batched_input_jnp, + relevant_illegal_action, key) + for i, infostate in enumerate(output_mapping): + net_output = jnp.squeeze(batched_net_output[i]) + for ai, action in enumerate(infostate.get_actions()): + infostate.policy[action] = float(net_output[ai]) + + +def cfr_br_meta_data( + history_tree_node: typing.HistoryNode, + infostate_nodes: List[typing.InfostateNode], + all_infostates_map: List[typing.InfostateMapping], epochs: int, + net_apply: typing.ApplyFn, net_params: typing.Params, + all_actions: List[int], infostate_map: typing.InfostateMapping, + key: hk.PRNGSequence +) -> tuple[Dict[str, jnp.ndarray], Dict[str, jnp.ndarray], List[float]]: + """Collects counterfactual values for both players and best response for player_2. + + Args: + history_tree_node: Game tree HistoryTreeNode which is the root of the game + tree. + infostate_nodes: Infostates. + all_infostates_map: List of mappings from infostate strings to infostates. + epochs: Number of epochs. + net_apply: Apply function. + net_params: Network parameters. + all_actions: List of all actions. + infostate_map: A mapping from infostate strings to infostates. + key: Haiku pseudo random number generator. + + Returns: + Returns counterfactual values for player_1, counterfactual values for + player_2 and best response values for player_2. + """ + counterfactual_values_player1 = { + infostate.infostate_string: [] + for infostate in list(all_infostates_map[1].values()) + } + counterfactual_values_player2 = { + infostate.infostate_string: [] + for infostate in list(all_infostates_map[2].values()) + } + + non_terminal_infostates_map_player1 = utils.filter_terminal_infostates( + all_infostates_map[1] + ) + one_hot_representations_player1, illegal_actions_player1 = ( + compute_next_policy_invariants( + non_terminal_infostates_map_player1, all_actions, infostate_map + ) + ) + player_2_last_best_response_values = [] + for epoch in range(epochs): + compute_next_policy(non_terminal_infostates_map_player1, net_apply, + net_params, epoch, all_actions, + one_hot_representations_player1, + illegal_actions_player1, key) + + cfr.compute_reach_probabilities(history_tree_node, all_infostates_map) + cfr.cumulate_average_policy(list(all_infostates_map[1].values())) + cfr.compute_best_response_policy(infostate_nodes[2]) + cfr.compute_reach_probabilities(history_tree_node, all_infostates_map) + cfr.compute_counterfactual_values(infostate_nodes[1]) + cfr.update_regrets(list(all_infostates_map[1].values())) + append_counterfactual_values( + list(all_infostates_map[1].values()), counterfactual_values_player1) + cfr.normalize_average_policy(all_infostates_map[1].values()) + cfr.compute_reach_probabilities(history_tree_node, all_infostates_map) + player_2_last_best_response_values.append( + float(cfr.compute_best_response_values(infostate_nodes[2])) + ) + + logging.info( + "Epoch %d: player_2 best response value is %f", + epoch, + player_2_last_best_response_values[-1], + ) + + return ( # pytype: disable=bad-return-type # jax-ndarray + counterfactual_values_player1, + counterfactual_values_player2, + player_2_last_best_response_values, + ) + + +class MetaCFRRegretAgent: + """Meta regret minimizer agent. + + Attributes: + training_epochs: Number of training epochs. + meta_learner_training_epochs: Number of epochs for meta learner. + game_name: Name of the game. + game_config: Game configuration. + perturbation: Binary variable to specify perturbation. + seed: Random seed. + model_type: Type of NN model for meta learner. + best_response: Binary variable to specify if using best response. + optimizer: Optimizer model. + """ + + def __init__(self, + training_epochs, + meta_learner_training_epochs, + game_name, + game_config, + perturbation, + seed, + model_type="MLP", + best_response=True): + self._training_epochs = training_epochs + self._meta_learner_training_epochs = meta_learner_training_epochs + self._game_name = game_name + self._model_type = model_type + self._perturbation = perturbation + self._game_config = game_config + self._best_response = best_response + self._seed = seed + self._rng = hk.PRNGSequence(100) + self._world_state = openspiel_api.WorldState(self._game_name, + self._game_config, + self._perturbation, + self._seed) + self._all_actions = self._world_state.get_distinct_actions() + self._num_infostates, self._infostate_map = self.get_num_infostates() + self._step = 0 + + def get_num_infostates(self): + """Returns number of infostates and infostate mapping. + + Returns: + Returns sum of number of infostates for both players and a mapping from + infostate string to infostates. + """ + all_infostates_map = [{}, {}, {}] + _, _ = game_tree_utils.build_tree_dfs( + self._world_state, all_infostates_map) + non_terminal_infostates_map_player1 = utils.filter_terminal_infostates( + all_infostates_map[1]) + non_terminal_infostates_map_player2 = utils.filter_terminal_infostates( + all_infostates_map[2]) + if self._best_response: + infostate_map = { + infostate_str: infostate_node + for (infostate_node, infostate_str + ) in enumerate(list(non_terminal_infostates_map_player1.keys())) + } + return len(non_terminal_infostates_map_player1), infostate_map + nont_terminal_infostates_map_both_players = list( + non_terminal_infostates_map_player1.keys()) + list( + non_terminal_infostates_map_player2.keys()) + infostate_map = { + infostate_str: infostate_node + for (infostate_node, infostate_str + ) in enumerate(nont_terminal_infostates_map_both_players) + } + return len(non_terminal_infostates_map_player1) + len( + non_terminal_infostates_map_player2), infostate_map + + def train(self): + self.training_optimizer() + + def next_policy(self, world_state: openspiel_api.WorldState): + """Computes best reponses for the next step of cfr. + + Args: + world_state: Current state of the world. + + Returns: + Returns best response values for player_2. + + """ + all_infostates_map = [{}, {}, {}] + first_history_node, infostate_nodes = game_tree_utils.build_tree_dfs( + world_state, all_infostates_map) + + _, _, player_2_best_response_values = cfr_br_meta_data( # pytype: disable=wrong-arg-types + history_tree_node=first_history_node, + infostate_nodes=infostate_nodes, + all_infostates_map=all_infostates_map, + epochs=self._meta_learner_training_epochs, + net_apply=self.optimizer.net_apply, # pytype: disable=attribute-error + net_params=self.optimizer.net_params, # pytype: disable=attribute-error + all_actions=self._all_actions, + infostate_map=self._infostate_map, + key=self._rng) + return player_2_best_response_values + + def optimize_infoset(self, cfvalues: Any, infoset: List[typing.InfostateNode], + infostate_map: typing.InfostateMapping, + rng: hk.PRNGSequence): + """Apply updates to optimizer state. + + Args: + cfvalues: Counterfactual values. + infoset: Infostates. + infostate_map: Mapping from infostate string to infostate. + rng: Next random seed. + """ + grads = jax.grad( + utils.meta_loss, has_aux=False)(self.optimizer.net_params, cfvalues, # pytype: disable=attribute-error + self.optimizer.net_apply, # pytype: disable=attribute-error + self._meta_learner_training_epochs, + len(self._all_actions), infoset, + infostate_map, FLAGS.batch_size, + next(rng), + FLAGS.use_infostate_representation) + updates, self.optimizer.opt_state = self.optimizer.opt_update( # pytype: disable=attribute-error + grads, self.optimizer.opt_state) # pytype: disable=attribute-error + + self.optimizer.net_params = optax.apply_updates(self.optimizer.net_params, # pytype: disable=attribute-error + updates) + + def training_optimizer(self): + """Train an optimizer for meta learner.""" + + self.optimizer = models.OptimizerModel( + mlp_sizes=FLAGS.mlp_sizes, + lstm_sizes=FLAGS.lstm_sizes, + initial_learning_rate=FLAGS.init_lr, + batch_size=FLAGS.batch_size, + num_actions=len(self._all_actions), + num_infostates=self._num_infostates, + model_type=self._model_type, + use_infostate_representation=FLAGS.use_infostate_representation) + self.optimizer.initialize_optimizer_model() + + while self._step < FLAGS.num_tasks: + if self._perturbation: + self._seed = np.random.choice(np.array(list(range(100)))) + self._world_state = openspiel_api.WorldState( + self._game_name, + self._game_config, + perturbation=self._perturbation, + random_seed=self._seed) + + for epoch in range(self._training_epochs): + logging.info("Training epoch %d", epoch) + all_infostates_map = [{}, {}, {}] + first_history_node, infostate_nodes = game_tree_utils.build_tree_dfs( + self._world_state, all_infostates_map) + cfr_values_player1, cfr_values_player2, _ = cfr_br_meta_data( # pytype: disable=wrong-arg-types + history_tree_node=first_history_node, + infostate_nodes=infostate_nodes, + all_infostates_map=all_infostates_map, + epochs=self._meta_learner_training_epochs, + net_apply=self.optimizer.net_apply, + net_params=self.optimizer.net_params, + all_actions=self._all_actions, + infostate_map=self._infostate_map, + key=self._rng) + + train_dataset = [] + cfvalues_per_player = [ + cfr_values_player1, cfr_values_player2 + ] + # for CFRBR we consider player 0. + player_ix = 0 + infosets = [ + infoset for infoset in all_infostates_map[player_ix + 1].values() + if len(infoset.get_actions()) >= 2 + ] + for infoset in infosets: + cfvalues = cfvalues_per_player[player_ix][infoset.infostate_string] + train_dataset.append((cfvalues, infoset)) + + dataset = dataset_generator.Dataset(train_dataset, FLAGS.batch_size) # pytype: disable=wrong-arg-types # jax-ndarray + data_loader = dataset.get_batch() + for _ in range(FLAGS.num_batches): + batch = next(data_loader) + cfvalues, infoset = zip(*batch) + cfvalues = np.array(list(cfvalues), dtype=object) + cfvalues = utils.mask(cfvalues, infoset, len(self._all_actions), + FLAGS.batch_size) + self.optimize_infoset(cfvalues, infoset, self._infostate_map, # pytype: disable=wrong-arg-types + self._rng) + logging.info("Game: %d", self._step) + self._step += 1 diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/meta_learning_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/meta_learning_test.py new file mode 100644 index 0000000..54d7303 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/meta_learning_test.py @@ -0,0 +1,125 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for meta CFR Algorithm.""" + +from absl import flags +from absl.testing import absltest +from absl.testing import parameterized +import haiku as hk +import jax +import mock +import numpy as np +import optax + +from open_spiel.python.examples.meta_cfr.sequential_games import meta_learning +from open_spiel.python.examples.meta_cfr.sequential_games import models +from open_spiel.python.examples.meta_cfr.sequential_games import openspiel_api + +FLAGS = flags.FLAGS + + +def meta_cfr_agent(game_name='kuhn_poker'): + return meta_learning.MetaCFRRegretAgent( + training_epochs=1, + meta_learner_training_epochs=1, + game_name=game_name, + game_config={'players': 2}, + perturbation=False, + seed=0, + model_type='MLP', + best_response=True) + + +class MetaLearningTest(parameterized.TestCase): + + def setup_optimizer(self, num_actions, num_infostates): + if FLAGS.use_infostate_representation: + dummy_input = np.zeros( + shape=[FLAGS.batch_size, 1, num_actions + num_infostates]) + else: + dummy_input = np.zeros(shape=[FLAGS.batch_size, 1, num_actions]) + + def mlp_forward(dummy_input): + mlp = hk.nets.MLP([10, num_actions]) + return mlp(dummy_input) + forward = hk.transform(mlp_forward) + + rng_seq = jax.random.PRNGKey(10) + params = forward.init(rng_seq, dummy_input) + lr_scheduler_fn = optax.polynomial_schedule( + init_value=0.2, end_value=0.0001, power=1., transition_steps=100) + opt_init, opt_update = optax.chain( + optax.scale_by_adam(), optax.scale_by_schedule(lr_scheduler_fn), + optax.scale(-0.2)) + net_apply = forward.apply + opt_state = opt_init(params) + return params, net_apply, opt_state, opt_update + + @parameterized.named_parameters(('kuhn_poker_game', 'kuhn_poker'), + ('leduc_poker_game', 'leduc_poker')) + def test_worldstate_initialization(self, game_name): + self._world_state = openspiel_api.WorldState( + game_name, {'players': 2}, perturbation=False, random_seed=0) + self._all_actions = self._world_state.get_distinct_actions() + self.assertNotEmpty(self._all_actions, + 'Number of distinct actions should be greater that 0.') + + @parameterized.named_parameters(('kuhn_poker_game', 'kuhn_poker'), + ('leduc_poker_game', 'leduc_poker')) + def test_meta_cfr_agent_initialization(self, game_name): + with mock.patch.object(meta_learning.MetaCFRRegretAgent, + 'get_num_infostates') as mock_get_num_infostates: + mock_get_num_infostates.return_value = (mock.MagicMock(), + mock.MagicMock()) + meta_learning.MetaCFRRegretAgent( + training_epochs=1, + meta_learner_training_epochs=1, + game_name=game_name, + game_config={'players': 2}, + perturbation=False, + seed=0, + model_type='MLP', + best_response=True) + mock_get_num_infostates.assert_called_once_with() + + @parameterized.named_parameters(('kuhn_poker_game', 'kuhn_poker'), + ('leduc_poker_game', 'leduc_poker')) + def test_meta_learning_training(self, game_name): + agent = meta_learning.MetaCFRRegretAgent( + training_epochs=1, + meta_learner_training_epochs=1, + game_name=game_name, + game_config={'players': 2}, + perturbation=False, + seed=0, + model_type=models.ModelType.MLP.value, + best_response=True) + num_infostates, _ = agent.get_num_infostates() + num_actions = len(agent._all_actions) + params, net_apply, opt_state, opt_update = self.setup_optimizer( + num_actions, num_infostates) + agent.training_optimizer() + agent.optimizer.net_apply = net_apply + agent.optimizer.opt_state = opt_state + agent.optimizer.net_params = params + agent.optimizer.opt_update = opt_update + + world_state = openspiel_api.WorldState( + game_name, {'players': 2}, perturbation=False, random_seed=0) + best_response_val_player_2 = agent.next_policy(world_state) + self.assertGreater(best_response_val_player_2[-1], 0) + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/models.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/models.py new file mode 100644 index 0000000..75e69f5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/models.py @@ -0,0 +1,197 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Model definitions for optimizer network.""" + +import enum +from typing import Any, Callable, List, Optional, Union + +import haiku as hk +import jax +import jax.numpy as jnp +import numpy as np +import optax + + +class ModelType(enum.Enum): + MLP = "MLP" + RNN = "RNN" + + +def _mlp_forwards(mlp_hidden_sizes: List[int]) -> hk.Transformed: + """Returns a haiku transformation of the MLP model to be used in optimizer. + + Args: + mlp_hidden_sizes: List containing size of linear layers. + + Returns: + Haiku transformation of the RNN network. + """ + def forward_fn(inputs): + mlp = hk.nets.MLP(mlp_hidden_sizes, activation=jax.nn.relu, name="mlp") + return mlp(inputs) + return hk.transform(forward_fn) + + +def _make_rnn_network(lstm_hidden_sizes: List[int], + mlp_hidden_sizes: List[int]) -> hk.RNNCore: + """Returns the RNN network. + + Args: + lstm_hidden_sizes: List containing size of lstm layers. + mlp_hidden_sizes: List containing size of linear layers. + + Returns: + Returns an instance of RNN model. + """ + layers = [] + for k, hidden_size in enumerate(lstm_hidden_sizes): + layers += [hk.LSTM(hidden_size, name=f"lstm_layer_{k}"), jax.nn.relu] + layers += [hk.nets.MLP(mlp_hidden_sizes, name="mlp")] + return RNNModel(layers) + + +def _rnn_forwards(lstm_hidden_sizes: List[int], mlp_hidden_sizes: List[int], + batch_size: int) -> hk.Transformed: + """Returns a haiku transformation of the RNN model to be used in optimizer. + + Args: + lstm_hidden_sizes: List containing size of lstm layers. + mlp_hidden_sizes: List containing size of linear layers. + batch_size: Batch size. + + Returns: + Haiku transformation of the RNN network. + """ + def forward_fn(inputs): + rnn = _make_rnn_network(lstm_hidden_sizes, mlp_hidden_sizes) + initial_state = rnn.initial_state(batch_size=batch_size) + outputs, _ = hk.dynamic_unroll(rnn, inputs, initial_state, time_major=False) + return outputs + + return hk.transform(forward_fn) + + +class RNNModel(hk.RNNCore): + """RNN model.""" + + def __init__(self, + layers: List[Union[hk.Module, Callable[[jnp.ndarray], + jnp.ndarray]]], + name: Optional[str] = None): + super().__init__(name=name) + self._layers = layers + + def __call__(self, inputs, prev_state): + x = inputs + curr_state = [None] * len(prev_state) + for k, layer in enumerate(self._layers): + if isinstance(layer, hk.RNNCore): + x, curr_state[k] = layer(x, prev_state[k]) + else: + x = layer(x) + return x, tuple(curr_state) + + def initial_state(self, batch_size: Optional[int]) -> Any: + layerwise_init_state = [] + for layer in self._layers: + if isinstance(layer, hk.RNNCore): + layerwise_init_state.append(layer.initial_state(batch_size)) + else: + layerwise_init_state.append(None) + return tuple(layerwise_init_state) + + +class OptimizerModel: + """Optimizer model in l2l paradigm to learn update rules of regret minimizers. + + Attributes: + mlp_sizes: Size of mlp layers. This is a string, containing sequence of + numbers, each number indicate size of a linear layer. + lstm_sizes: Size of lstm layers. This is a string, containing sequence of + numbers, each number indicate size of an lstm layer. + initial_learning_rate: Initial value of learning rate used in learning + rate scheduler. + batch_size: Batch size. + num_actions: Number of possible actions. + num_infostates: Total number of information states. + model_type: Type of model. For now it can be either MLP or RNN. + use_infostate_representation: Boolean value to indicate if we use + information state information as part of model input or not. + rng: Jax pseudo random number generator. + model: Neural network model we want to optimize. + opt_update: Optax optimizer update function. + net_params: Network parameters. + opt_state: Optax optimizer state. + net_apply: Network apply function. + """ + + def __init__(self, + mlp_sizes: str, + lstm_sizes: str, + initial_learning_rate: float, + batch_size: int, + num_actions: int, + num_infostates: int, + model_type: str = "MLP", + use_infostate_representation: bool = True): + self.num_actions = num_actions + self.num_infostates = num_infostates + self.initial_learning_rate = initial_learning_rate + self.batch_size = batch_size + self.use_infostate_representation = use_infostate_representation + self.rng = jax.random.PRNGKey(10) + + mlp_sizes_list = [ + int(mlp_size.strip()) for mlp_size in mlp_sizes.split(",") + ] + mlp_sizes_list.append(self.num_actions) + lstm_sizes_list = [ + int(lstm_size.strip()) for lstm_size in lstm_sizes.split(",") + ] + + if model_type == ModelType.MLP.value: + self.model = _mlp_forwards(mlp_sizes_list) + elif model_type == ModelType.RNN.value: + self.model = _rnn_forwards(lstm_sizes_list, mlp_sizes_list, + self.batch_size) + else: + raise ValueError( + f"{model_type} is not a valid model, model_type should be MLP or RNN." + ) + + self.net_apply = self.model.apply + self._net_init = self.model.init + self.opt_update, self.net_params, self.opt_state = None, None, None + + def lr_scheduler(self, init_value: float) -> optax.Schedule: + schedule_fn = optax.polynomial_schedule( + init_value=init_value, end_value=0.0001, power=1., transition_steps=100) + return schedule_fn + + def initialize_optimizer_model(self): + """Initializes the optax optimizer and neural network model.""" + lr_scheduler_fn = self.lr_scheduler(self.initial_learning_rate) + opt_init, self.opt_update = optax.chain( + optax.scale_by_adam(), optax.scale_by_schedule(lr_scheduler_fn), + optax.scale(-self.initial_learning_rate)) + + input_size = self.num_actions + if self.use_infostate_representation: + input_size += self.num_infostates + + dummy_input = np.zeros(shape=[self.batch_size, 1, input_size]) + + self.net_params = self._net_init(self.rng, dummy_input) + self.opt_state = opt_init(self.net_params) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/openspiel_api.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/openspiel_api.py new file mode 100644 index 0000000..81e17b2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/openspiel_api.py @@ -0,0 +1,108 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""OpenSpiel API.""" + +import random +from typing import Any, List, Text, Tuple, Dict + +from open_spiel.python.examples.meta_cfr.sequential_games import world_representation +import pyspiel + + +class WorldState(world_representation.WorldState): + """World state representation for openspiel games. + + This class implements world_representation class for openspiel games. + + Attributes: + game_name: Name of openspiel game we want to initialize. + config: Config containing game parameters to initialize the game. + state: Initial state of an openspeil game. + chance_policy: The policy of the chance node in the game tree. + """ + + def __init__(self, game_name: str, config: Dict[str, Any], + perturbation: bool, random_seed: int = 100): + self._perturbation = perturbation + self._history = [] + self._random_seed = random_seed + self.game_name = game_name + self.config = config + self._game = pyspiel.load_game(self.game_name, self.config) + if str(self._game.get_type().dynamics) == "Dynamics.SIMULTANEOUS": + self._game = pyspiel.convert_to_turn_based(self._game) + # initial_state + self.state = self._game.new_initial_state() + self.chance_policy = self.get_chance_policy() + random.seed(self._random_seed) + + def get_distinct_actions(self) -> List[int]: + """See base class.""" + return list(range(self._game.num_distinct_actions())) + + def is_terminal(self) -> bool: + """See base class.""" + return self.state.is_terminal() + + def get_actions(self) -> List[Any]: + """See base class.""" + if self.is_terminal(): + return [[], [], []] + actions = [[0], [0], [0]] + if self.state.is_chance_node(): + legal_actions = [ + action for (action, prob) in self.state.chance_outcomes() + ] + else: + legal_actions = self.state.legal_actions() + actions[self.state.current_player() + 1] = legal_actions + return actions + + def get_infostate_string(self, player: int) -> Text: + """See base class.""" + infostate = self.state.information_state_string(player - 1) + return str(len(self._history)) + "|" + str(infostate) + + def apply_actions(self, actions: Tuple[int, int, int]) -> None: + """See base class.""" + self.state.apply_action(actions[self.state.current_player() + 1]) + self.chance_policy = self.get_chance_policy() + self._history.append(actions) + + def get_utility(self, player: int) -> float: + """See base class.""" + assert self.is_terminal() + return float(self.state.returns()[player - 1]) + + def get_chance_policy(self) -> Dict[int, float]: + """See base class.""" + if self.is_terminal(): + return {} + + if not self.state.is_chance_node(): + return {0: 1} + + chance_policy = { + action: prob for (action, prob) in self.state.chance_outcomes() + } + + if self._perturbation: + probs = [random.random() for _ in self.state.chance_outcomes()] + chance_policy = { + action: probs[i] / sum(probs) + for i, (action, prob) in enumerate(self.state.chance_outcomes()) + } + + return chance_policy diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/typing.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/typing.py new file mode 100644 index 0000000..57349b1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/typing.py @@ -0,0 +1,31 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Typing definitions.""" + +from typing import Any, Dict, Callable +import jax.numpy as jnp +import optax +from open_spiel.python.examples.meta_cfr.sequential_games import game_tree_utils + +PyTree = Any +Params = PyTree +ApplyFn = Callable[..., jnp.ndarray] +OptState = optax.OptState + +GameTree = game_tree_utils.GameTree +InfostateNode = game_tree_utils.InfoState +InfostateMapping = Dict[str, InfostateNode] +HistoryNode = game_tree_utils.HistoryTreeNode + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/utils.py new file mode 100644 index 0000000..c2d8738 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/utils.py @@ -0,0 +1,217 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utility functions for meta-cfr algorithm.""" + +import functools +from typing import List +import haiku as hk +import jax +import jax.numpy as jnp +import numpy as np + +from open_spiel.python.examples.meta_cfr.sequential_games.typing import ApplyFn +from open_spiel.python.examples.meta_cfr.sequential_games.typing import InfostateMapping +from open_spiel.python.examples.meta_cfr.sequential_games.typing import InfostateNode +from open_spiel.python.examples.meta_cfr.sequential_games.typing import Params + + +def get_batched_input(input_list: List[jax.Array], + infostate_list: List[InfostateNode], + illegal_action_list: List[List[int]], batch_size: int): + """Returns list of function arguments extended to be consistent with batch size. + + Args: + input_list: List of DeviceArrays. + infostate_list: List of information state nodes. + illegal_action_list: List of List of illegal actions. Each internal list + contains illegal actions in each information state. + batch_size: Batch size. + + Returns: + input_list, infostate_list, and illegal_action_list with a size consistent + with batch size (the size of returned arrays are multipliers of batch size). + """ + items_to_sample = batch_size * (int(len(input_list) / batch_size) + + 1) - len(input_list) + idx_sample = np.random.choice(len(input_list), items_to_sample) + input_zip = np.array( + list(zip(input_list, infostate_list, illegal_action_list)), + dtype=object) + input_lst_sample = input_zip[idx_sample] + input_sample, infostate_sample, illegal_action_sample = zip(*input_lst_sample) + + input_list.extend(list(input_sample)) + infostate_list.extend(list(infostate_sample)) + illegal_action_list.extend(list(illegal_action_sample)) + return input_list, infostate_list, illegal_action_list + + +def mask(cfvalues: np.ndarray, infoset: List[InfostateNode], num_actions: int, + batch_size: int) -> np.ndarray: + """Returns counterfactual values of legal actions and put 0 for illegal ones. + + Args: + cfvalues: Numpy array of counterfactual values. + infoset: List of information states. + num_actions: Number of possible actions to take. + batch_size: Batch size. + + Returns: + Masked counterfactual values. The counterfactual values of legal actions are + kept as passed to this function and for illegal actions, we consider 0 + counterfactual value. + """ + legal_actions = [[infoset[i].world_state.state.legal_actions()] * + cfvalues.shape[1] for i in range(batch_size)] + + masked_cfvalues = np.zeros(shape=[batch_size, cfvalues.shape[1], num_actions]) + for i in range(cfvalues.shape[0]): + for j in range(cfvalues.shape[1]): + np.put(masked_cfvalues[i][j], legal_actions[i][j], cfvalues[i][j]) + + return np.stack(masked_cfvalues) + + +def filter_terminal_infostates(infostates_map: InfostateMapping): + """Filter out terminal infostate_node values.""" + return { + infostate_string: infostate_node + for infostate_string, infostate_node in infostates_map.items() + if not infostate_node.is_terminal() + } + + +def get_network_output(net_apply: ApplyFn, net_params: Params, + net_input: np.ndarray, illegal_actions: List[int], + key: hk.PRNGSequence) -> jax.Array: + """Returns policy generated as output of model. + + Args: + net_apply: Haiku apply function. + net_params: Haiku network parameters. + net_input: Input of the model. + illegal_actions: List of illegal actions we use to mask the model output. + key: Pseudo random number. + + Returns: + Policy generated by model. Model output is filtered to mask illegal actions. + """ + net_output = jax.jit(net_apply)(net_params, key, net_input) + + if illegal_actions: + net_output = jnp.delete(net_output, np.array(illegal_actions)) + + return jax.nn.softmax(net_output) + + +def get_network_output_batched( + net_apply: ApplyFn, net_params: Params, net_input: np.ndarray, + all_illegal_actions: List[List[int]], + key: hk.PRNGSequence) -> List[jax.Array]: + """Returns policy of batched input generated as output of model. + + Args: + net_apply: Haiku apply function. + net_params: Haiku network parameters. + net_input: Input of the model. + all_illegal_actions: Nested list of illegal actions we use to mask the model + output. Length of outer list is equal to the batch size. + key: Pseudo random number. + + Returns: + List of policies generated by model. Model output is filtered to mask + illegal actions. Length of the returned list is equal to batch size. + """ + net_output_batched = net_apply(net_params, next(key), net_input) + + batch_policies = [] + for i, illegal_actions in enumerate(all_illegal_actions): + net_output = net_output_batched[i] + if illegal_actions: + net_output = jnp.expand_dims( + jnp.delete(net_output, jnp.array(illegal_actions)), axis=0) + + batch_policies.append(jax.nn.softmax(net_output)) + return batch_policies + + +@functools.partial(jax.jit, static_argnums=(2, 3, 4, 5, 7, 9)) +def meta_loss(net_params: Params, cfvalues: np.ndarray, + net_apply: ApplyFn, steps: int, num_all_actions: int, + infosets: List[InfostateNode], + infostate_map: InfostateMapping, + batch_size: int, + key: hk.PRNGSequence, + use_infostate_representation: bool = True) -> float: + """Meta learning loss function. + + Args: + net_params: Network parameters. + cfvalues: Counterfactual values. + net_apply: Haiku apply function. + steps: Number of unrolling steps. + num_all_actions: Number of actions. + infosets: List of information states. + infostate_map: Mapping from information state string to information state + node. + batch_size: Batch size. + key: Pseudo random number. + use_infostate_representation: Boolean value indicating if information state + representation is used as part of input. + + Returns: + Mean meta learning loss value. + """ + regret_sum = np.zeros(shape=[batch_size, 1, num_all_actions]) + total_loss = 0 + step = 0 + infostate_str_one_hot = jnp.expand_dims( + jnp.array([ + jax.nn.one_hot(infostate_map[infoset.infostate_string], + len(infostate_map)) for infoset in infosets + ]), + axis=1) + + def scan_body(carry, x): + del x # Unused + regret_sum, current_step, total_loss = carry + average_regret = regret_sum / (current_step + 1) + + if use_infostate_representation: + net_input = jnp.concatenate((average_regret, infostate_str_one_hot), + axis=-1) + else: + net_input = average_regret + next_step_x = jax.jit(net_apply)(net_params, key, net_input) + strategy = jax.nn.softmax(next_step_x) + + value = jnp.matmul( + jnp.array(cfvalues), jnp.transpose(strategy, axes=[0, 2, 1])) + curren_regret = jnp.array(cfvalues) - value + regret_sum += jnp.expand_dims(jnp.mean(curren_regret, axis=1), axis=1) + current_loss = jnp.mean( + jnp.max( + jax.numpy.concatenate( + [regret_sum, + jnp.zeros(shape=[batch_size, 1, 1])], + axis=-1), + axis=-1)) + total_loss += current_loss + current_step += 1 + return (regret_sum, current_step, total_loss), None + + (regret_sum, step, total_loss), _ = jax.lax.scan( + scan_body, (regret_sum, step, total_loss), None, length=steps) + return total_loss diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/world_representation.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/world_representation.py new file mode 100644 index 0000000..5925dbf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/meta_cfr/sequential_games/world_representation.py @@ -0,0 +1,89 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""API for world state representation.""" + +import abc +from typing import Any, List, Text, Tuple + + +class WorldState(abc.ABC): + """Base class for world state representation. + + We can implement this class for world state representations in both + sequential and matrix games. + + Attributes: + chance_policy: Policy of the chance node in the game tree. + """ + + def __init__(self): + self.chance_policy = {0: 1.0} + self._history = [] + + @abc.abstractmethod + def get_distinct_actions(self) -> List[int]: + """Returns all possible distinct actions in the game.""" + pass + + @abc.abstractmethod + def is_terminal(self) -> bool: + """Returns if the current state of the game is a terminal or not.""" + pass + + @abc.abstractmethod + def get_actions(self) -> List[Any]: + """Returns the list of legal actions from the current state of the game.""" + pass + + @abc.abstractmethod + def get_infostate_string(self, player: int) -> Text: + """Returns the string form of infostate representation of a given player. + + Args: + player: Index of player. + + Returns: + The string representation of the infostate of player. + """ + + pass + + @abc.abstractmethod + def apply_actions(self, actions: Tuple[int, int, int]) -> None: + """Applies the current player's action to change state of the world. + + At each timestep of the game, the state of the world is changing by the + current player's action. At the same time, we should update self._history + with actions, by appending actions to self._history. + + Args: + actions: List of actions for chance node, player 1 and player 2. + + """ + pass + + @abc.abstractmethod + def get_utility(self, player: int) -> float: + """Returns player's utility when the game reaches to a terminal state. + + Args: + player: Index of player. + + Returns: + Utility that player receives when we reach a terminal state in the game. + """ + pass + + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/mmd_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/mmd_example.py new file mode 100644 index 0000000..2f646e6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/mmd_example.py @@ -0,0 +1,44 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example of MMD with dilated entropy to solve for QRE in Leduc Poker.""" + +from absl import app +from absl import flags + +from open_spiel.python.algorithms import mmd_dilated +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("iterations", 100, "Number of iterations") +flags.DEFINE_float( + "alpha", 0.05, "QRE parameter, larger value amounts to more regularization") +flags.DEFINE_string("game", "leduc_poker", "Name of the game") +flags.DEFINE_integer("print_freq", 10, "How often to print the gap") + + +def main(_): + game = pyspiel.load_game(FLAGS.game) + mmd = mmd_dilated.MMDDilatedEnt(game, FLAGS.alpha) + + for i in range(FLAGS.iterations): + mmd.update_sequences() + if i % FLAGS.print_freq == 0: + conv = mmd.get_gap() + print("Iteration {} gap {}".format(i, conv)) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/mmd_matrix_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/mmd_matrix_example.py new file mode 100644 index 0000000..8fed7b4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/mmd_matrix_example.py @@ -0,0 +1,54 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example: using MMD with dilated entropy to solve for QRE in a matrix Game.""" + +from absl import app +from absl import flags + +from open_spiel.python.algorithms import mmd_dilated +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("iterations", 1000, "Number of iterations") +flags.DEFINE_float( + "alpha", 0.1, "QRE parameter, larger value amounts to more regularization") +flags.DEFINE_integer("print_freq", 100, "How often to print the gap") + +# create pyspiel perturbed RPS matrix game + +game = pyspiel.create_matrix_game([[0, -1, 3], [1, 0, -3], [-3, 3, 0]], + [[0, 1, -3], [-1, 0, 3], [3, -3, 0]]) + +game = pyspiel.convert_to_turn_based(game) + + +def main(_): + mmd = mmd_dilated.MMDDilatedEnt(game, FLAGS.alpha) + for i in range(FLAGS.iterations): + mmd.update_sequences() + if i % FLAGS.print_freq == 0: + conv = mmd.get_gap() + print("Iteration {} gap {}".format(i, conv)) + + # Extract policies for both players + print(mmd.get_policies().action_probability_array) + # Note the sequence form and behavioural-form coincide + # for a normal-form game (sequence form has extra root value of 1) + print(mmd.current_sequences()) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/mmd_nash_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/mmd_nash_example.py new file mode 100644 index 0000000..8ef7851 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/mmd_nash_example.py @@ -0,0 +1,44 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example: MMD with dilated entropy to compute approx. Nash in Kuhn poker.""" + +from absl import app +from absl import flags + +from open_spiel.python.algorithms import exploitability +from open_spiel.python.algorithms import mmd_dilated +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("iterations", 1000, "Number of iterations") +flags.DEFINE_string("game", "kuhn_poker", "Name of the game") +flags.DEFINE_integer("print_freq", 100, "How often to print the exploitability") + + +def main(_): + game = pyspiel.load_game(FLAGS.game) + # need to manually set stepsize if alpha = 0 + mmd = mmd_dilated.MMDDilatedEnt(game, alpha=0, stepsize=1) + + for i in range(FLAGS.iterations): + mmd.update_sequences() + if i % FLAGS.print_freq == 0: + conv = exploitability.exploitability(game, mmd.get_avg_policies()) + print("Iteration {} exploitability {}".format(i, conv)) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/nego_nbs_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/nego_nbs_example.py new file mode 100644 index 0000000..b37c062 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/nego_nbs_example.py @@ -0,0 +1,302 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""An example that computes the Nash bargaining score from negotiations. + +This uses the bargaining game that was introduced in: + +[1] Lewis et al., Deal or no deal? End-to-end learning of negotiation + dialogues, 2017. https://arxiv.org/abs/1706.05125 +[2] David DeVault, Johnathan Mell, and Jonathan Gratch. + 2015. Toward Natural Turn-taking in a Virtual Human Negotiation Agent + +It computes the empirical Nash bargaining score (NBS) from three sources: + - Human play + - IS-MCTS in self-play + - A theoretical maximum NBS if the players had full information and can see + each other's utilities and then maximize their NBS. + +These are all run on a data set extracted from the Lewis et al. '17 data set: +https://github.com/facebookresearch/end-to-end-negotiator/blob/master/src/data/negotiate/data.txt + +This example is inspired by the paper (Iwasa and Fujita, "Prediction of Nash +Bargaining Solution in Negotiation Dialogue", 2018). +""" + +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python import games # pylint: disable=unused-import +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_string("data_file", None, "Lewis et al. '17 data set file") +flags.DEFINE_string("instances_file", "/tmp/instances.txt", + "Filename for the temp instances database file.") + + +class Instance(object): + """An instance of a bargaining problem.""" + + def __init__(self, pool, p1values, p2values): + self.pool = np.array(pool) + self.p1values = np.array(p1values) + self.p2values = np.array(p2values) + assert 5 <= sum(pool) <= 7 + assert np.dot(pool, p1values) == 10 + assert np.dot(pool, p2values) == 10 + + def __str__(self): + return (",".join([str(x) for x in self.pool]) + " " + + ",".join([str(x) for x in self.p1values]) + " " + + ",".join([str(x) for x in self.p2values])) + + +class Negotiation(object): + """An instance of a bargaining game.""" + + def __init__(self, instance, outcome, rewards): + self.instance = instance + self.outcome = outcome + self.rewards = rewards + + def __str__(self): + return (str(self.instance) + " " + str(self.outcome) + " " + + str(self.rewards)) + + +def dialogue_matches_prev_line(line1, line2): + """Checks if the dialogue matches the previous line's.""" + parts1 = line1.split(" ") + parts2 = line2.split(" ") + for i in range(6, min(len(parts1), len(parts2))): + if parts1[i] == "YOU:" or parts1[i] == "THEM:": + if parts1[i] == "YOU:" and parts2[i] != "THEM:": + return False + if parts1[i] == "THEM:" and parts2[i] != "YOU:": + return False + elif parts1[i] != parts2[i]: + return False + if parts1[i] == "": + break + return True + + +# pylint: disable=line-too-long +def parse_dataset(filename): + """Parse the Lewis et al. '17 data file.""" + # book, hat, ball + # Example format + # 1 0 4 2 1 2 YOU: i would like 4 hats and you can have the rest . THEM: deal YOU: item0=0 item1=4 item2=0 reward=8 agree 1 4 4 1 1 2 + # 1 4 4 1 1 2 THEM: i would like 4 hats and you can have the rest . YOU: deal THEM: item0=1 item1=0 item2=1 reward=6 agree 1 0 4 2 1 2 + # 1 6 3 0 2 2 YOU: you can have all the hats if i get the book and basketballs . THEM: item0=1 item1=3 item2=2 reward=10 disagree 1 2 3 2 2 1 + # 1 10 3 0 1 0 YOU: hi i would like the book and ball and you can have the hats THEM: i can give you either the book or the ball YOU: ill take the book THEM: ok i will take the hats and ball YOU: deal THEM: item0=1 item1=0 item2=0 reward=10 agree 1 2 3 2 1 2 + # 1 2 3 2 1 2 THEM: hi i would like the book and ball and you can have the hats YOU: i can give you either the book or the ball THEM: ill take the book YOU: ok i will take the hats and ball THEM: deal YOU: item0=0 item1=3 item2=1 reward=8 agree 1 10 3 0 1 0 + contents = pyspiel.read_contents_from_file(filename, "r") + lines = contents.split("\n") + cur_nego = None + negotiations = [] + instances = [] + + for line_no in range(len(lines)): + line = lines[line_no] + if line: + parts = line.split(" ") + # parse the line to add a new negotiation + pool = [int(parts[0]), int(parts[2]), int(parts[4])] + my_values = [int(parts[1]), int(parts[3]), int(parts[5])] + pool2 = [int(parts[-6]), int(parts[-4]), int(parts[-2])] + other_values = [int(parts[-5]), int(parts[-3]), int(parts[-1])] + assert pool == pool2 + rewards = [0, 0] + add_nego = False + outcome_str = parts[-7] # this will be "agree" or "disagree" + if parts[6] == "YOU:": + player_id = 0 + instance = Instance(pool, my_values, other_values) + elif parts[6] == "THEM:": + player_id = 1 + instance = Instance(pool, other_values, my_values) + else: + assert False, parts[6] + outcome = False + my_reward = 0 + instances.append(instance) + if "disconnect" in line: + continue + # sometimes there is a "no agreement" in the rewards section + if (outcome_str == "disagree" or + (parts[-9] + " " + parts[-8]) == "reward=no agreement" or + parts[-8] == "reward=disconnect"): + # do not parse the reward, but must still parse the next line + add_nego = False + elif outcome_str == "agree": + outcome = True + reward_parts = parts[-8].split("=") + assert len(reward_parts) == 2, f"reward parts str: {parts[-8]}" + assert reward_parts[0] == "reward" + my_reward = int(reward_parts[1]) + else: + assert False, f"Bad outcome: {outcome_str}" + if cur_nego is None: + rewards[player_id] = my_reward + if player_id == 0: + cur_nego = Negotiation(instance, outcome, rewards) + else: + cur_nego = Negotiation(instance, outcome, rewards) + else: + # There are some in the data set that are incomplete (i.e. are missing the second perspective). + # We should not count these. + if dialogue_matches_prev_line(line, lines[line_no - 1]): + assert list(cur_nego.instance.pool) == pool + if player_id == 1: + assert list(cur_nego.instance.p2values) == my_values + assert list(cur_nego.instance.p1values) == other_values + elif player_id == 0: + assert list(cur_nego.instance.p1values) == my_values + assert list(cur_nego.instance.p2values) == other_values + cur_nego.rewards[player_id] = my_reward + add_nego = True + else: + # not matching, treat as new negotiation + rewards[player_id] = my_reward + if player_id == 0: + cur_nego = Negotiation(instance, outcome, rewards) + else: + cur_nego = Negotiation(instance, outcome, rewards) + add_nego = False + if add_nego or outcome_str == "disagree": + negotiations.append(cur_nego) + print(str(cur_nego)) + print(len(negotiations)) + cur_nego = None + if outcome_str != "disagree": + # same instance was added twice, so remove the last one + instances.pop() + return instances, negotiations + + +def write_instances_file(negotiations, filename): + contents = "" + for nego in negotiations: + contents += str(nego.instance) + "\n" + pyspiel.write_contents_to_file(filename, "w", contents) + + +def compute_nbs_from_simulations(game, num_games, bots): + """Compute empirical NBS from simulations.""" + avg_returns = np.zeros(game.num_players()) + for _ in range(num_games): + state = game.new_initial_state() + while not state.is_terminal(): + if state.is_chance_node(): + # Chance node: sample an outcome + outcomes = state.chance_outcomes() + action_list, prob_list = zip(*outcomes) + action = np.random.choice(action_list, p=prob_list) + state.apply_action(action) + else: + player = state.current_player() + action = bots[player].step(state) + state.apply_action(action) + returns = np.asarray(state.returns()) + avg_returns += returns + avg_returns /= num_games + return np.prod(avg_returns) + + +class MaxBot(object): + """Finds the single (deterministic) trade offer that maximizes the NBS.""" + + def __init__(self): + pass + + def step(self, state): + """Returns the NBS-maximizing action. + + If i'm player 0, then search over all possible moves, assume player 2 + takes the agree action, and choose the action that maximizes the NBS + Player 1 just always agrees. + + Args: + state: the OpenSpiel state to act from. + """ + player = state.current_player() + if player == 1: + return state.agree_action() + max_nbs = -1 + max_action = -1 + for action in state.legal_actions(): + state_clone = state.clone() + state_clone.apply_action(action) + state_clone.apply_action(state.agree_action()) + returns = state_clone.returns() + nbs = np.prod(returns) + if nbs > max_nbs: + max_nbs = nbs + max_action = action + assert max_action >= 0 + return max_action + + +def main(_): + assert FLAGS.data_file is not None + _, negotiations = parse_dataset(FLAGS.data_file) + + print(f"Writing instances database: {FLAGS.instances_file}") + write_instances_file(negotiations, FLAGS.instances_file) + + # Human averages + NBS + human_rewards = np.zeros(2, dtype=np.float64) + avg_human_nbs = 0 + for neg in negotiations: + human_rewards += neg.rewards + human_rewards /= len(negotiations) + avg_human_nbs += np.prod(human_rewards) + print(f"Average human rewards: {human_rewards}") + print(f"Average human NBS: {avg_human_nbs}") + + game = pyspiel.load_game("bargaining", + {"instances_file": FLAGS.instances_file}) + + # Max bot + bots = [MaxBot(), MaxBot()] + avg_max_nbs = compute_nbs_from_simulations(game, 6796, bots) + print(f"Average max NBS: {avg_max_nbs}") + + # Uniform random NBS + bots = [ + pyspiel.make_uniform_random_bot(0, np.random.randint(0, 1000000)), + pyspiel.make_uniform_random_bot(1, np.random.randint(0, 1000000)), + ] + avg_uniform_nbs = compute_nbs_from_simulations(game, 6796, bots) + print(f"Average uniform NBS: {avg_uniform_nbs}") + + # IS-MCTS NBS + evaluator = pyspiel.RandomRolloutEvaluator(1, np.random.randint(0, 1000000)) + bots = [ + pyspiel.ISMCTSBot( + np.random.randint(0, 1000000), evaluator, 10.0, 1000, -1, + pyspiel.ISMCTSFinalPolicyType.MAX_VISIT_COUNT, False, False), + pyspiel.ISMCTSBot( + np.random.randint(0, 1000000), evaluator, 10.0, 1000, -1, + pyspiel.ISMCTSFinalPolicyType.MAX_VISIT_COUNT, False, False) + ] + avg_ismcts_nbs = compute_nbs_from_simulations(game, 6796, bots) + print(f"Average IS-MCTS NBS: {avg_ismcts_nbs}") + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/nfg_writer_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/nfg_writer_example.py new file mode 100644 index 0000000..31b4156 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/nfg_writer_example.py @@ -0,0 +1,42 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python nfg_writer example.""" + +from absl import app +from absl import flags + +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_string("game", "matrix_rps", "Name of the game") +flags.DEFINE_string("outfile", None, "File to send the output to.") + + +def main(_): + game = pyspiel.load_game(FLAGS.game) + nfg_text = pyspiel.game_to_nfg_string(game) + + if FLAGS.outfile is None: + print(nfg_text) + else: + print("Exporting to {}".format(FLAGS.outfile)) + outfile = open(FLAGS.outfile, "w") + outfile.write(nfg_text) + outfile.close() + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/nfsp.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/nfsp.py new file mode 100644 index 0000000..b4c5fbb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/nfsp.py @@ -0,0 +1,125 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""NFSP agents trained on Kuhn Poker.""" + +from absl import app +from absl import flags +from absl import logging + +from open_spiel.python import policy +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import exploitability +from open_spiel.python.jax import nfsp + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("num_train_episodes", int(3e6), + "Number of training episodes.") +flags.DEFINE_integer("eval_every", 10000, + "Episode frequency at which the agents are evaluated.") +flags.DEFINE_list("hidden_layers_sizes", [ + 128, +], "Number of hidden units in the avg-net and Q-net.") +flags.DEFINE_integer("replay_buffer_capacity", int(2e5), + "Size of the replay buffer.") +flags.DEFINE_integer("reservoir_buffer_capacity", int(2e6), + "Size of the reservoir buffer.") +flags.DEFINE_float("anticipatory_param", 0.1, + "Prob of using the rl best response as episode policy.") + + +class NFSPPolicies(policy.Policy): + """Joint policy to be evaluated.""" + + def __init__(self, env, nfsp_policies, mode): + game = env.game + player_ids = [0, 1] + super(NFSPPolicies, self).__init__(game, player_ids) + self._policies = nfsp_policies + self._mode = mode + self._obs = {"info_state": [None, None], "legal_actions": [None, None]} + + def action_probabilities(self, state, player_id=None): + cur_player = state.current_player() + legal_actions = state.legal_actions(cur_player) + + self._obs["current_player"] = cur_player + self._obs["info_state"][cur_player] = ( + state.information_state_tensor(cur_player)) + self._obs["legal_actions"][cur_player] = legal_actions + + info_state = rl_environment.TimeStep( + observations=self._obs, rewards=None, discounts=None, step_type=None) + + with self._policies[cur_player].temp_mode_as(self._mode): + p = self._policies[cur_player].step(info_state, is_evaluation=True).probs + prob_dict = {action: p[action] for action in legal_actions} + return prob_dict + + +def main(unused_argv): + game = "kuhn_poker" + num_players = 2 + + env_configs = {"players": num_players} + env = rl_environment.Environment(game, **env_configs) + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + hidden_layers_sizes = [int(l) for l in FLAGS.hidden_layers_sizes] + kwargs = { + "replay_buffer_capacity": FLAGS.replay_buffer_capacity, + "epsilon_decay_duration": FLAGS.num_train_episodes, + "epsilon_start": 0.06, + "epsilon_end": 0.001, + } + + # pylint: disable=g-complex-comprehension + agents = [ + nfsp.NFSP( + idx, + info_state_size, + num_actions, + hidden_layers_sizes, + FLAGS.reservoir_buffer_capacity, + FLAGS.anticipatory_param, + **kwargs + ) + for idx in range(num_players) + ] + expl_policies_avg = NFSPPolicies(env, agents, nfsp.MODE.average_policy) + + for ep in range(FLAGS.num_train_episodes): + if (ep + 1) % FLAGS.eval_every == 0: + losses = [agent.loss for agent in agents] + logging.info("Losses: %s", losses) + expl = exploitability.exploitability(env.game, expl_policies_avg) + logging.info("[%s] Exploitability AVG %s", ep + 1, expl) + logging.info("_____________________________________________") + + time_step = env.reset() + while not time_step.last(): + player_id = time_step.observations["current_player"] + agent_output = agents[player_id].step(time_step) + action_list = [agent_output.action] + time_step = env.step(action_list) + + # Episode is over, step all agents with final info state. + for agent in agents: + agent.step(time_step) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/opponent_shaping/lola_iterated_matrix_games_jax.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/opponent_shaping/lola_iterated_matrix_games_jax.py new file mode 100644 index 0000000..e234ee1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/opponent_shaping/lola_iterated_matrix_games_jax.py @@ -0,0 +1,380 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example that trains two agents using either LOLA or LOLA-DiCE. + +An example that trains using LOLA (Foerster et al., 2017) or LOLA-DiCE +(Foerster et al., 2018) on iterated matrix games. Hyperparameters are +taken from the paper and https://github.com/alexis-jacq/LOLA_DiCE. +""" +import itertools +import os +import typing +from typing import List +from typing import Tuple +import warnings + +from absl import app +from absl import flags +import distrax +import haiku as hk +import jax +import jax.numpy as jnp +import numpy as np +import wandb + +from open_spiel.python.environments.iterated_matrix_game import IteratedMatchingPennies +from open_spiel.python.environments.iterated_matrix_game import IteratedPrisonersDilemma +from open_spiel.python.jax.opponent_shaping import OpponentShapingAgent +from open_spiel.python.rl_environment import Environment +from open_spiel.python.rl_environment import TimeStep + +warnings.simplefilter('ignore', FutureWarning) + +FLAGS = flags.FLAGS +flags.DEFINE_string('exp_name', 'dice_1step_pytorchparams', 'Experiment name.') +flags.DEFINE_integer('seed', 42, 'Random seed.') +flags.DEFINE_string('game', 'ipd', 'Name of the game.') +flags.DEFINE_integer('epochs', 200, 'Number of training iterations.') +flags.DEFINE_integer('batch_size', 1024, 'Number of episodes in a batch.') +flags.DEFINE_integer( + 'critic_mini_batches', 1, 'Number of minibatches for critic.' +) +flags.DEFINE_integer('game_iterations', 150, 'Number of iterated plays.') +flags.DEFINE_float('policy_lr', 0.2, 'Policy learning rate.') +flags.DEFINE_float('opp_policy_lr', 0.3, 'Policy learning rate.') +flags.DEFINE_float('critic_lr', 0.1, 'Critic learning rate.') +flags.DEFINE_string('correction_type', 'lola', 'Either "lola", "dice" or None.') +flags.DEFINE_integer( + 'n_lookaheads', 2, 'Number of lookaheads for LOLA correction.' +) +flags.DEFINE_float( + 'correction_max_grad_norm', + None, + 'Maximum gradient norm of LOLA correction.', +) +flags.DEFINE_float('discount', 0.96, 'Discount factor.') +flags.DEFINE_integer( + 'policy_update_interval', + 1, + 'Number of critic updates per before policy is updated.', +) +flags.DEFINE_integer('eval_batch_size', 1024, 'Random seed.') +flags.DEFINE_bool( + 'use_jit', False, 'If true, JAX jit compilation will be enabled.' +) +flags.DEFINE_bool( + 'use_opponent_modelling', + True, + 'If false, ground truth opponent weights are used.', +) +flags.DEFINE_integer( + 'opp_policy_mini_batches', 8, 'Number of minibatches for opponent policy.' +) +flags.DEFINE_float( + 'opponent_model_learning_rate', 0.3, 'Learning rate for opponent model.' +) +flags.DEFINE_bool('debug', False, 'If true, debug mode is enabled.') + + +def get_action_probs( + agent: OpponentShapingAgent, game: str +) -> List[typing.Dict[str, typing.Any]]: + """Returns the probability of cooperation and a string repr for each state. + + Args: + agent: The agent. + game: The name of the game. + + Returns: + A list of dictionaries, each containing the probability of cooperation + and a string representation + """ + actions = ['C', 'D'] if game == 'ipd' else ['H', 'T'] + states = ['s0'] + [''.join(s) for s in itertools.product(actions, repeat=2)] + params = agent.train_state.policy_params[agent.player_id] + action_probs = [] + for i, state_str in enumerate(states): + state = np.eye(len(states))[i] + prob = agent.policy_network.apply(params, state).prob(0) + action = actions[0] + action_probs.append( + {'prob': prob.item(), 'name': f'P({action}|{state_str})'} + ) + return action_probs + + +def log_epoch_data(epoch: int, agents: List[OpponentShapingAgent], eval_batch): + """Logs data to wandb and prints it to the console. + + Args: + epoch: The current epoch. + agents: A list of agents. + eval_batch: A batch of episodes. + """ + logs = {} + for agent in agents: + avg_step_reward = np.mean( + [ts.rewards[agent.player_id] for ts in eval_batch] + ) + probs = get_action_probs(agent, game=FLAGS.game) + for info in probs: + logs[f'agent_{agent.player_id}/{info["name"]}'] = info['prob'] + probs = ', '.join([f'{info["name"]}: {info["prob"]:.2f}' for info in probs]) + metrics = agent.metrics() + logs.update({ + f'agent_{agent.player_id}/avg_step_reward': avg_step_reward, + **{ + f'agent_{agent.player_id}/{k}': v.item() for k, v in metrics.items() + }, + }) + print( + f'[epoch {epoch}] Agent {agent.player_id}: {avg_step_reward:.2f} |' + f' {probs}' + ) + wandb.log(logs) + + +def collect_batch( + env: Environment, agents: List[OpponentShapingAgent], eval_mode: bool +) -> List[TimeStep]: + """Collects one episode. + + Args: + env: The environment. + agents: A list of opponent shaping agents. + eval_mode: If true, the agents will be run in evaluation mode. + + Returns: + A list of time steps. + """ + episode = [] + time_step = env.reset() + episode.append(time_step) + while not time_step.last(): + actions = [] + for agent in agents: + action, _ = agent.step(time_step, is_evaluation=eval_mode) + if action is not None: + action = action.squeeze() + actions.append(action) + time_step = env.step(np.stack(actions, axis=1)) + time_step.observations['actions'] = actions + episode.append(time_step) + + for agent in agents: + agent.step(time_step, is_evaluation=eval_mode) + return episode + + +def make_agent( + key: jax.random.PRNGKey, + player_id: int, + env: Environment, + networks: Tuple[hk.Transformed, hk.Transformed], +) -> OpponentShapingAgent: + """Creates an opponent shaping agent. + + Args: + key: A random seed key. + player_id: The id of the player. + env: The environment. + networks: A tuple of policy and critic networks transformed by + hk.transform. + + Returns: + An opponent shaping agent instance. + """ + policy_network, critic_network = networks + return OpponentShapingAgent( + player_id=player_id, + opponent_ids=[1 - player_id], + seed=key, + info_state_size=env.observation_spec()['info_state'][player_id], + num_actions=env.action_spec()['num_actions'][player_id], + policy=policy_network, + critic=critic_network, + batch_size=FLAGS.batch_size, + num_critic_mini_batches=FLAGS.critic_mini_batches, + pi_learning_rate=FLAGS.policy_lr, + opp_policy_learning_rate=FLAGS.opp_policy_lr, + num_opponent_updates=FLAGS.opp_policy_mini_batches, + critic_learning_rate=FLAGS.critic_lr, + opponent_model_learning_rate=FLAGS.opponent_model_learning_rate, + policy_update_interval=FLAGS.policy_update_interval, + discount=FLAGS.discount, + critic_discount=0, # Predict the imm. reward (for iterated matrix games) + correction_type=FLAGS.correction_type, + clip_grad_norm=FLAGS.correction_max_grad_norm, + use_jit=FLAGS.use_jit, + n_lookaheads=FLAGS.n_lookaheads, + env=env, + ) + + +def make_agent_networks( + num_states: int, num_actions: int +) -> Tuple[hk.Transformed, hk.Transformed]: + """Creates action weights for each state-action pair and values for each state. + + Args: + num_states: The number of distinct states. + num_actions: The number of distinct actions. + + Returns: + A tuple of policy and critic networks transformed by hk.transform. + """ + + def policy(obs): + theta = hk.get_parameter( + 'theta', + init=hk.initializers.Constant(0), + shape=(num_states, num_actions), + ) + logits = jnp.select(obs, theta) + logits = jnp.nan_to_num(logits) + return distrax.Categorical(logits=logits) + + def value_fn(obs): + w = hk.get_parameter( + 'w', [num_states], init=jnp.zeros + ) # @pylint: disable=invalid-name + return w[jnp.argmax(obs, axis=-1)].reshape(*obs.shape[:-1], 1) + + return hk.without_apply_rng(hk.transform(policy)), hk.without_apply_rng( + hk.transform(value_fn) + ) + + +def make_env(game: str, iterations: int, batch_size: int) -> Environment: + """Creates an environment. + + The environment is either iterated prisoners dilemma or iterated matching + pennies. + + Args: + game: The game to play. Either 'ipd' or 'imp'. + iterations: The number of iterations to play. + batch_size: The batch size. + + Returns: + An environment instance. + """ + if game == 'ipd': + env = IteratedPrisonersDilemma(iterations=iterations, batch_size=batch_size) + elif game == 'imp': + env = IteratedMatchingPennies(iterations=iterations, batch_size=batch_size) + else: + raise ValueError(f'Unknown game: {game}') + return env + + +def setup_agents( + env: Environment, rng: hk.PRNGSequence +) -> List[OpponentShapingAgent]: + """Creates an opponent shaping agent for each player in the environment. + + Args: + env: The environment. + rng: A random seed key. + + Returns: + A list of opponent shaping agents. + """ + agents = [] + num_actions = env.action_spec()['num_actions'] + info_state_shape = env.observation_spec()['info_state'] + for player_id in range(env.num_players): + networks = make_agent_networks( + num_states=info_state_shape[player_id][0], + num_actions=num_actions[player_id], + ) + agent = make_agent( + key=next(rng), player_id=player_id, env=env, networks=networks + ) + agents.append(agent) + return agents + + +def update_weights(agents: List[OpponentShapingAgent]): + """Updates the weights of the opponent models. + + Args: + agents: A list of opponent shaping agents. + + Returns: + None + """ + agent: OpponentShapingAgent + for agent in agents: + for opp in [a for a in agents if a.player_id != agent.player_id]: + agent.update_params(state=opp.train_state, player_id=opp.player_id) + + +def main(_): + """Main function. Runs the experiment.""" + if FLAGS.exp_name is None: + FLAGS.exp_name = f'{FLAGS.game}_{FLAGS.seed}' + if not FLAGS.debug: + wandb.login(key=os.environ.get('WANDB_API_KEY', None)) + wandb.init( + project='open-spiel-opponent-modelling', + group=FLAGS.exp_name, + config={ + 'game': FLAGS.game, + 'seed': FLAGS.seed, + 'epochs': FLAGS.epochs, + 'batch_size': FLAGS.batch_size, + 'critic_mini_batches': FLAGS.critic_mini_batches, + 'game_iterations': FLAGS.game_iterations, + 'policy_lr': FLAGS.policy_lr, + 'opp_policy_lr': FLAGS.opp_policy_lr, + 'critic_lr': FLAGS.critic_lr, + 'correction_type': FLAGS.correction_type, + 'n_lookaheads': FLAGS.n_lookaheads, + 'correction_max_grad_norm': FLAGS.correction_max_grad_norm, + 'discount': FLAGS.discount, + 'policy_update_interval': FLAGS.policy_update_interval, + 'use_opponent_modelling': FLAGS.use_opponent_modelling, + 'opp_policy_mini_batches': FLAGS.opp_policy_mini_batches, + 'opponent_model_learning_rate': FLAGS.opponent_model_learning_rate, + }, + mode='disabled' if FLAGS.debug else 'online', + ) + + rng = hk.PRNGSequence(key_or_seed=FLAGS.seed) + env = make_env( + iterations=FLAGS.game_iterations, + batch_size=FLAGS.batch_size, + game=FLAGS.game, + ) + agents = setup_agents(env=env, rng=rng) + + if not FLAGS.use_opponent_modelling: + update_weights(agents) + + batch = collect_batch(env=env, agents=agents, eval_mode=True) + log_epoch_data(epoch=0, agents=agents, eval_batch=batch) + for epoch in range(1, FLAGS.epochs + 1): + batch = collect_batch(env=env, agents=agents, eval_mode=False) + if not FLAGS.use_opponent_modelling: + update_weights(agents) + log_epoch_data(epoch=epoch, agents=agents, eval_batch=batch) + print('#' * 100) + + wandb.finish() + + +if __name__ == '__main__': + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/opponent_shaping/requirements.txt b/scenarios/bargaining/open_spiel/open_spiel/python/examples/opponent_shaping/requirements.txt new file mode 100644 index 0000000..cee1e00 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/opponent_shaping/requirements.txt @@ -0,0 +1,13 @@ +wandb +distrax +optax +dm-haiku +rlax +open_spiel +jax + +# If you need cuda support, uncomment the following line. You might need change +# the cuda version depending on your nvidia-driver version and you might need +# to upgrade jax afterwards. + +# jax[cuda12] -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/play_scenarios.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/play_scenarios.py new file mode 100644 index 0000000..47fc027 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/play_scenarios.py @@ -0,0 +1,42 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Plays a uniform random bot against the default scenarios for that game.""" + +import random +from absl import app +from absl import flags + +from open_spiel.python.bots import scenarios +from open_spiel.python.bots import uniform_random +import pyspiel + +FLAGS = flags.FLAGS +flags.DEFINE_string("game_name", "catch", "Game to play scenarios for.") + + +def main(argv): + del argv + game = pyspiel.load_game(FLAGS.game_name) + + # TODO(author1): Add support for bots from neural networks. + bots = [ + uniform_random.UniformRandomBot(i, random) + for i in range(game.num_players()) + ] + scenarios.play_bot_in_scenarios(game, bots) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/play_tarok_game.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/play_tarok_game.py new file mode 100644 index 0000000..ccd0215 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/play_tarok_game.py @@ -0,0 +1,68 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Plays a round of Tarok with actions from user input.""" + +import pyspiel + + +def play_tarok_game(): + game = pyspiel.load_game("tarok(players=3)") + state = game.new_initial_state() + while not state.is_terminal(): + print_info(game, state) + state.apply_action(int(input("Enter action: "))) + print("-" * 70, "\n") + print(state.current_game_phase()) + print("Players' scores: {}".format(state.rewards())) + + +def print_info(unused_game, state): + """Print information about the game state.""" + print("Game phase: {}".format(state.current_game_phase())) + print("Selected contract: {}".format(state.selected_contract())) + print("Current player: {}".format(state.current_player())) + player_cards = state.player_cards(state.current_player()) + action_names = [state.card_action_to_string(a) for a in player_cards] + print("\nPlayer cards: {}".format( + list(zip(action_names, player_cards)))) + + if state.current_game_phase() == pyspiel.TarokGamePhase.TALON_EXCHANGE: + print_talon_exchange_info(state) + elif state.current_game_phase() == pyspiel.TarokGamePhase.TRICKS_PLAYING: + print_tricks_playing_info(state) + else: + print() + + legal_actions = state.legal_actions() + action_names = [state.action_to_string(a) for a in state.legal_actions()] + print("Legal actions: {}\n".format( + list(zip(action_names, legal_actions)))) + + +def print_talon_exchange_info(state): + talon = [[state.card_action_to_string(x) for x in talon_set] + for talon_set in state.talon_sets()] + print("\nTalon: {}\n".format(talon)) + + +def print_tricks_playing_info(state): + trick_cards = state.trick_cards() + action_names = [state.card_action_to_string(a) for a in trick_cards] + print("\nTrick cards: {}\n".format( + list(zip(action_names, trick_cards)))) + + +if __name__ == "__main__": + play_tarok_game() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/play_via_console_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/play_via_console_example.py new file mode 100644 index 0000000..02dacfb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/play_via_console_example.py @@ -0,0 +1,78 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example to traverse an entire game tree.""" + +from absl import app +from absl import flags + +import numpy as np +from open_spiel.python import games # pylint: disable=unused-import +from open_spiel.python.bots import human +from open_spiel.python.bots import uniform_random +import pyspiel + +_GAME_STRING = flags.DEFINE_string( + "game_string", "tic_tac_toe", "Name of the game" +) +_PLAYER0_TYPE = flags.DEFINE_string( + "player0_type", "human", "Player 0 type (human or uniform)" +) +_PLAYER1_TYPE = flags.DEFINE_string( + "player1_type", "uniform", "Player 1 type (human or uniform)" +) + + +def load_bot(bot_type: str, pid: int) -> pyspiel.Bot: + if bot_type == "human": + return human.HumanBot() + elif bot_type == "uniform": + return uniform_random.UniformRandomBot(pid, np.random) + + +def play_game(state: pyspiel.State, + bots: list[pyspiel.Bot]): + """Play the game via console.""" + + while not state.is_terminal(): + print(f"State: \n{state}\n") + if state.is_chance_node(): + outcomes = state.chance_outcomes() + action_list, prob_list = zip(*outcomes) + outcome = np.random.choice(action_list, p=prob_list) + print(f"Chance chose: {outcome} ({state.action_to_string(outcome)})") + state.apply_action(outcome) + else: + player = state.current_player() + action = bots[player].step(state) + print(f"Chose action: {action} ({state.action_to_string(action)})") + state.apply_action(action) + + print("\n-=- Game over -=-\n") + print(f"Terminal state:\n{state}") + print(f"Returns: {state.returns()}") + return + + +def main(_): + game = pyspiel.load_game(_GAME_STRING.value) + state = game.new_initial_state() + bots = [] + bots.append(load_bot(_PLAYER0_TYPE.value, 0)) + bots.append(load_bot(_PLAYER1_TYPE.value, 1)) + play_game(state, bots) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/playthrough.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/playthrough.py new file mode 100644 index 0000000..e807efb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/playthrough.py @@ -0,0 +1,72 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Play a game, selecting random moves, and save what we see. + +This can be used to check by hand the behaviour of a game, and also +as the basis for test cases. + +Example usage: +``` +playthrough --game kuhn_poker --params players=3 +``` +""" + +from absl import app +from absl import flags +from absl import logging + +from open_spiel.python.algorithms import generate_playthrough + +FLAGS = flags.FLAGS + +flags.DEFINE_string( + "game", "kuhn_poker", "Name of the game, with optional parameters, e.g. " + "'kuhn_poker' or 'go(komi=4.5,board_size=19)'.") +flags.DEFINE_string("output_file", None, "Where to write the data to.") +flags.DEFINE_list("actions", None, + "A (possibly partial) list of action choices to make.") + +flags.DEFINE_string("update_path", None, + "If set, regenerates all playthroughs in the path.") +flags.DEFINE_bool( + "alsologtostdout", False, + "If True, the trace will be written to std-out while it " + "is being constructed (in addition to the usual behavior).") +flags.DEFINE_integer("shard", 0, "The shard to update.") +flags.DEFINE_integer("num_shards", 1, "How many shards to use for updates.") + + +def main(unused_argv): + if FLAGS.update_path: + generate_playthrough.update_path(FLAGS.update_path, FLAGS.shard, + FLAGS.num_shards) + else: + if not FLAGS.game: + raise ValueError("Must specify game") + actions = FLAGS.actions + if actions is not None: + actions = [int(x) for x in actions] + text = generate_playthrough.playthrough( + FLAGS.game, actions, alsologtostdout=FLAGS.alsologtostdout) + if FLAGS.output_file: + with open(FLAGS.output_file, "w") as f: + f.write(text) + else: + logging.info(text) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/poker_fcpa_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/poker_fcpa_example.py new file mode 100644 index 0000000..c568552 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/poker_fcpa_example.py @@ -0,0 +1,114 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python spiel example.""" + +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python.bots import human +from open_spiel.python.bots import uniform_random +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("seed", 12761381, "The seed to use for the RNG.") + +# Supported types of players: "random", "human", "check_call", "fold" +flags.DEFINE_string("player0", "random", "Type of the agent for player 0.") +flags.DEFINE_string("player1", "random", "Type of the agent for player 1.") + + +def LoadAgent(agent_type, game, player_id, rng): + """Return a bot based on the agent type.""" + if agent_type == "random": + return uniform_random.UniformRandomBot(player_id, rng) + elif agent_type == "human": + return human.HumanBot() + elif agent_type == "check_call": + policy = pyspiel.PreferredActionPolicy([1, 0]) + return pyspiel.make_policy_bot(game, player_id, FLAGS.seed, policy) + elif agent_type == "fold": + policy = pyspiel.PreferredActionPolicy([0, 1]) + return pyspiel.make_policy_bot(game, player_id, FLAGS.seed, policy) + else: + raise RuntimeError("Unrecognized agent type: {}".format(agent_type)) + + +def main(_): + rng = np.random.RandomState(FLAGS.seed) + + # Make sure poker is compiled into the library, as it requires an optional + # dependency: the ACPC poker code. To ensure it is compiled in, prepend both + # the install.sh and build commands with OPEN_SPIEL_BUILD_WITH_ACPC=ON. + # See here: + # https://github.com/deepmind/open_spiel/blob/master/docs/install.md#configuration-conditional-dependencies + # for more details on optional dependencies. + games_list = pyspiel.registered_names() + assert "universal_poker" in games_list + + fcpa_game_string = pyspiel.hunl_game_string("fcpa") + print("Creating game: {}".format(fcpa_game_string)) + game = pyspiel.load_game(fcpa_game_string) + + agents = [ + LoadAgent(FLAGS.player0, game, 0, rng), + LoadAgent(FLAGS.player1, game, 1, rng) + ] + + state = game.new_initial_state() + + # Print the initial state + print("INITIAL STATE") + print(str(state)) + + while not state.is_terminal(): + # The state can be three different types: chance node, + # simultaneous node, or decision node + current_player = state.current_player() + if state.is_chance_node(): + # Chance node: sample an outcome + outcomes = state.chance_outcomes() + num_actions = len(outcomes) + print("Chance node with " + str(num_actions) + " outcomes") + action_list, prob_list = zip(*outcomes) + action = rng.choice(action_list, p=prob_list) + print("Sampled outcome: ", + state.action_to_string(state.current_player(), action)) + state.apply_action(action) + else: + # Decision node: sample action for the single current player + legal_actions = state.legal_actions() + for action in legal_actions: + print("Legal action: {} ({})".format( + state.action_to_string(current_player, action), action)) + action = agents[current_player].step(state) + action_string = state.action_to_string(current_player, action) + print("Player ", current_player, ", chose action: ", + action_string) + state.apply_action(action) + + print("") + print("NEXT STATE:") + print(str(state)) + + # Game is now done. Print utilities for each player + returns = state.returns() + for pid in range(game.num_players()): + print("Utility for player {} is {}".format(pid, returns[pid])) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/policy_aggregator_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/policy_aggregator_example.py new file mode 100644 index 0000000..c5b003b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/policy_aggregator_example.py @@ -0,0 +1,90 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example for policy_aggregator_example. + +Example. +""" + +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python import policy +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import exploitability +from open_spiel.python.algorithms import policy_aggregator + +FLAGS = flags.FLAGS +flags.DEFINE_string("game_name", "kuhn_poker", "Game name") + + +class TestPolicy(policy.Policy): + + def __init__(self, action_int): + self._action_int = action_int + + def action_probabilities(self, state, player_id=None): + return {self._action_int: 1.0} + + +def main(unused_argv): + env = rl_environment.Environment(FLAGS.game_name) + + policies = [[ # pylint: disable=g-complex-comprehension + policy.TabularPolicy(env.game).copy_with_noise(alpha=float(i), beta=1.0) + for i in range(2) + ] for _ in range(2)] + + probabilities = [ + list(np.ones(len(policies[i])) / len(policies[i])) for i in range(2) + ] + + pol_ag = policy_aggregator.PolicyAggregator(env.game) + aggr_policies = pol_ag.aggregate([0, 1], policies, probabilities) + + exploitabilities = exploitability.nash_conv(env.game, aggr_policies) + print("Exploitability : {}".format(exploitabilities)) + + print(policies[0][0].action_probability_array) + print(policies[0][1].action_probability_array) + print(aggr_policies.policy) + + print("\nCopy Example") + + mother_policy = policy.TabularPolicy(env.game).copy_with_noise(1, 10) + policies = [[mother_policy.__copy__() for _ in range(2)] for _ in range(2)] + probabilities = [ + list(np.ones(len(policies)) / len(policies)) for _ in range(2) + ] + + pol_ag = policy_aggregator.PolicyAggregator(env.game) + aggr_policy = pol_ag.aggregate([0], policies, probabilities) + + for state, value in aggr_policy.policy[0].items(): + polici = mother_policy.policy_for_key(state) + + value_normal = { + action: probability + for action, probability in enumerate(polici) + if probability > 0 + } + for key in value.keys(): + print("State : {}. Key : {}. Aggregated : {}. Real : {}. Passed : {}" + .format(state, key, value[key], value_normal[key], + np.abs(value[key] - value_normal[key]) < 1e-8)) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/ppo_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/ppo_example.py new file mode 100644 index 0000000..54674cf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/ppo_example.py @@ -0,0 +1,268 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An example of use of PPO. + +Note: code adapted (with permission) from +https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppo.py and +https://github.com/vwxyzjn/ppo-implementation-details/blob/main/ppo_atari.py +""" + +# pylint: disable=g-importing-member +import collections +from datetime import datetime +import logging +import os +import random +import sys +import time +from absl import app +from absl import flags +import numpy as np +import pandas as pd +import torch +from torch.utils.tensorboard import SummaryWriter + +import pyspiel +from open_spiel.python.pytorch.ppo import PPO +from open_spiel.python.pytorch.ppo import PPOAgent +from open_spiel.python.pytorch.ppo import PPOAtariAgent +from open_spiel.python.rl_environment import ChanceEventSampler +from open_spiel.python.rl_environment import Environment +from open_spiel.python.rl_environment import ObservationType +from open_spiel.python.vector_env import SyncVectorEnv + + +FLAGS = flags.FLAGS + +flags.DEFINE_string("exp_name", + os.path.basename(__file__).rstrip(".py"), + "the name of this experiment") +flags.DEFINE_string("game_name", "atari", "the id of the OpenSpiel game") +flags.DEFINE_float("learning_rate", 2.5e-4, + "the learning rate of the optimizer") +flags.DEFINE_integer("seed", 1, "seed of the experiment") +flags.DEFINE_integer("total_timesteps", 10_000_000, + "total timesteps of the experiments") +flags.DEFINE_integer("eval_every", 10, "evaluate the policy every N updates") +flags.DEFINE_bool("torch_deterministic", True, + "if toggled, `torch.backends.cudnn.deterministic=False`") +flags.DEFINE_bool("cuda", True, "if toggled, cuda will be enabled by default") + +# Atari specific arguments +flags.DEFINE_string("gym_id", "BreakoutNoFrameskip-v4", + "the id of the environment") +flags.DEFINE_bool( + "capture_video", False, + "whether to capture videos of the agent performances (check out `videos` folder)" +) + +# Algorithm specific arguments +flags.DEFINE_integer("num_envs", 8, "the number of parallel game environments") +flags.DEFINE_integer( + "num_steps", 128, + "the number of steps to run in each environment per policy rollout") +flags.DEFINE_bool( + "anneal_lr", True, + "Toggle learning rate annealing for policy and value networks") +flags.DEFINE_bool("gae", True, "Use GAE for advantage computation") +flags.DEFINE_float("gamma", 0.99, "the discount factor gamma") +flags.DEFINE_float("gae_lambda", 0.95, + "the lambda for the general advantage estimation") +flags.DEFINE_integer("num_minibatches", 4, "the number of mini-batches") +flags.DEFINE_integer("update_epochs", 4, "the K epochs to update the policy") +flags.DEFINE_bool("norm_adv", True, "Toggles advantages normalization") +flags.DEFINE_float("clip_coef", 0.1, "the surrogate clipping coefficient") +flags.DEFINE_bool( + "clip_vloss", True, + "Toggles whether or not to use a clipped loss for the value function, as per the paper" +) +flags.DEFINE_float("ent_coef", 0.01, "coefficient of the entropy") +flags.DEFINE_float("vf_coef", 0.5, "coefficient of the value function") +flags.DEFINE_float("max_grad_norm", 0.5, + "the maximum norm for the gradient clipping") +flags.DEFINE_float("target_kl", None, "the target KL divergence threshold") + + +def setup_logging(): + root = logging.getLogger() + root.setLevel(logging.DEBUG) + + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(logging.DEBUG) + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s") + handler.setFormatter(formatter) + root.addHandler(handler) + + +def make_single_atari_env(gym_id, + seed, + idx, + capture_video, + run_name, + use_episodic_life_env=True): + """Make the single-agent Atari environment.""" + + def gen_env(): + game = pyspiel.load_game( + "atari", { + "gym_id": gym_id, + "seed": seed, + "idx": idx, + "capture_video": capture_video, + "run_name": run_name, + "use_episodic_life_env": use_episodic_life_env + }) + return Environment( + game, + chance_event_sampler=ChanceEventSampler(seed=seed), + observation_type=ObservationType.OBSERVATION) + + return gen_env + + +def make_single_env(game_name, seed): + + def gen_env(): + game = pyspiel.load_game(game_name) + return Environment(game, chance_event_sampler=ChanceEventSampler(seed=seed)) + + return gen_env + + +def main(_): + setup_logging() + + batch_size = int(FLAGS.num_envs * FLAGS.num_steps) + + if FLAGS.game_name == "atari": + # pylint: disable=unused-import + # pylint: disable=g-import-not-at-top + import open_spiel.python.games.atari + + current_day = datetime.now().strftime("%d") + current_month_text = datetime.now().strftime("%h") + run_name = f"{FLAGS.game_name}__{FLAGS.exp_name}__" + if FLAGS.game_name == "atari": + run_name += f"{FLAGS.gym_id}__" + run_name += f"{FLAGS.seed}__{current_month_text}__{current_day}__{int(time.time())}" + + writer = SummaryWriter(f"runs/{run_name}") + writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" % + ("\n".join([f"|{key}|{value}|" for key, value in vars(FLAGS).items()])), + ) + + random.seed(FLAGS.seed) + np.random.seed(FLAGS.seed) + torch.manual_seed(FLAGS.seed) + torch.backends.cudnn.deterministic = FLAGS.torch_deterministic + + device = torch.device( + "cuda" if torch.cuda.is_available() and FLAGS.cuda else "cpu") + logging.info("Using device: %s", str(device)) + + if FLAGS.game_name == "atari": + envs = SyncVectorEnv([ + make_single_atari_env(FLAGS.gym_id, FLAGS.seed + i, i, False, + run_name)() for i in range(FLAGS.num_envs) + ]) + agent_fn = PPOAtariAgent + else: + envs = SyncVectorEnv([ + make_single_env(FLAGS.game_name, FLAGS.seed + i)() + for i in range(FLAGS.num_envs) + ]) + agent_fn = PPOAgent + + game = envs.envs[0]._game # pylint: disable=protected-access + info_state_shape = game.observation_tensor_shape() + + num_updates = FLAGS.total_timesteps // batch_size + agent = PPO( + input_shape=info_state_shape, + num_actions=game.num_distinct_actions(), + num_players=game.num_players(), + player_id=0, + num_envs=FLAGS.num_envs, + steps_per_batch=FLAGS.num_steps, + num_minibatches=FLAGS.num_minibatches, + update_epochs=FLAGS.update_epochs, + learning_rate=FLAGS.learning_rate, + gae=FLAGS.gae, + gamma=FLAGS.gamma, + gae_lambda=FLAGS.gae_lambda, + normalize_advantages=FLAGS.norm_adv, + clip_coef=FLAGS.clip_coef, + clip_vloss=FLAGS.clip_vloss, + entropy_coef=FLAGS.ent_coef, + value_coef=FLAGS.vf_coef, + max_grad_norm=FLAGS.max_grad_norm, + target_kl=FLAGS.target_kl, + device=device, + writer=writer, + agent_fn=agent_fn, + ) + + n_reward_window = 50 + recent_rewards = collections.deque(maxlen=n_reward_window) + time_step = envs.reset() + for update in range(num_updates): + for _ in range(FLAGS.num_steps): + agent_output = agent.step(time_step) + time_step, reward, done, unreset_time_steps = envs.step( + agent_output, reset_if_done=True) + + if FLAGS.game_name == "atari": + # Get around the fact that + # stable_baselines3.common.atari_wrappers.EpisodicLifeEnv will modify + # rewards at the LIFE and not GAME level by only counting + # rewards of finished episodes + for ts in unreset_time_steps: + info = ts.observations.get("info") + if info and "episode" in info: + real_reward = info["episode"]["r"] + writer.add_scalar("charts/player_0_training_returns", real_reward, + agent.total_steps_done) + recent_rewards.append(real_reward) + else: + for ts in unreset_time_steps: + if ts.last(): + real_reward = ts.rewards[0] + writer.add_scalar("charts/player_0_training_returns", real_reward, + agent.total_steps_done) + recent_rewards.append(real_reward) + + agent.post_step(reward, done) + + if FLAGS.anneal_lr: + agent.anneal_learning_rate(update, num_updates) + + agent.learn(time_step) + + if update % FLAGS.eval_every == 0: + logging.info("-" * 80) + logging.info("Step %s", agent.total_steps_done) + logging.info("Summary of past %i rewards\n %s", + n_reward_window, + pd.Series(recent_rewards).describe()) + + writer.close() + logging.info("All done. Have a pleasant day :)") + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/psro_v2_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/psro_v2_example.py new file mode 100644 index 0000000..10d8a14 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/psro_v2_example.py @@ -0,0 +1,309 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example running PSRO on OpenSpiel Sequential games. + +To reproduce results from (Muller et al., "A Generalized Training Approach for +Multiagent Learning", ICLR 2020; https://arxiv.org/abs/1909.12823), run this +script with: + - `game_name` in ['kuhn_poker', 'leduc_poker'] + - `n_players` in [2, 3, 4, 5] + - `meta_strategy_method` in ['alpharank', 'uniform', 'nash', 'prd'] + - `rectifier` in ['', 'rectified'] + +The other parameters keeping their default values. +""" + +import time + +from absl import app +from absl import flags +import numpy as np + +# pylint: disable=g-bad-import-order +import pyspiel + +from open_spiel.python import policy +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import exploitability +from open_spiel.python.algorithms import get_all_states +from open_spiel.python.algorithms import policy_aggregator +from open_spiel.python.algorithms.psro_v2 import best_response_oracle +from open_spiel.python.algorithms.psro_v2 import psro_v2 +from open_spiel.python.algorithms.psro_v2 import rl_oracle +from open_spiel.python.algorithms.psro_v2 import rl_policy +from open_spiel.python.algorithms.psro_v2 import strategy_selectors + + +FLAGS = flags.FLAGS + +# Game-related +flags.DEFINE_string("game_name", "kuhn_poker", "Game name.") +flags.DEFINE_integer("n_players", 2, "The number of players.") + +# PSRO related +flags.DEFINE_string("meta_strategy_method", "alpharank", + "Name of meta strategy computation method.") +flags.DEFINE_integer("number_policies_selected", 1, + "Number of new strategies trained at each PSRO iteration.") +flags.DEFINE_integer("sims_per_entry", 1000, + ("Number of simulations to run to estimate each element" + "of the game outcome matrix.")) + +flags.DEFINE_integer("gpsro_iterations", 100, + "Number of training steps for GPSRO.") +flags.DEFINE_bool("symmetric_game", False, "Whether to consider the current " + "game as a symmetric game.") + +# Rectify options +flags.DEFINE_string("rectifier", "", + "Which rectifier to use. Choices are '' " + "(No filtering), 'rectified' for rectified.") +flags.DEFINE_string("training_strategy_selector", "probabilistic", + "Which strategy selector to use. Choices are " + " - 'top_k_probabilities': select top " + "`number_policies_selected` strategies. " + " - 'probabilistic': Randomly samples " + "`number_policies_selected` strategies with probability " + "equal to their selection probabilities. " + " - 'uniform': Uniformly sample `number_policies_selected` " + "strategies. " + " - 'rectified': Select every non-zero-selection-" + "probability strategy available to each player.") + +# General (RL) agent parameters +flags.DEFINE_string("oracle_type", "BR", "Choices are DQN, PG (Policy " + "Gradient) or BR (exact Best Response)") +flags.DEFINE_integer("number_training_episodes", int(1e4), "Number training " + "episodes per RL policy. Used for PG and DQN") +flags.DEFINE_float("self_play_proportion", 0.0, "Self play proportion") +flags.DEFINE_integer("hidden_layer_size", 256, "Hidden layer size") +flags.DEFINE_integer("batch_size", 32, "Batch size") +flags.DEFINE_float("sigma", 0.0, "Policy copy noise (Gaussian Dropout term).") +flags.DEFINE_string("optimizer_str", "adam", "'adam' or 'sgd'") + +# Policy Gradient Oracle related +flags.DEFINE_string("loss_str", "qpg", "Name of loss used for BR training.") +flags.DEFINE_integer("num_q_before_pi", 8, "# critic updates before Pi update") +flags.DEFINE_integer("n_hidden_layers", 4, "# of hidden layers") +flags.DEFINE_float("entropy_cost", 0.001, "Self play proportion") +flags.DEFINE_float("critic_learning_rate", 1e-2, "Critic learning rate") +flags.DEFINE_float("pi_learning_rate", 1e-3, "Policy learning rate.") + +# DQN +flags.DEFINE_float("dqn_learning_rate", 1e-2, "DQN learning rate.") +flags.DEFINE_integer("update_target_network_every", 1000, "Update target " + "network every [X] steps") +flags.DEFINE_integer("learn_every", 10, "Learn every [X] steps.") + +# General +flags.DEFINE_integer("seed", 1, "Seed.") +flags.DEFINE_bool("local_launch", False, "Launch locally or not.") +flags.DEFINE_bool("verbose", True, "Enables verbose printing and profiling.") + + +def init_pg_responder(env): + """Initializes the Policy Gradient-based responder and agents.""" + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + agent_class = rl_policy.PGPolicy + + agent_kwargs = { + "info_state_size": info_state_size, + "num_actions": num_actions, + "loss_str": FLAGS.loss_str, + "loss_class": False, + "hidden_layers_sizes": [FLAGS.hidden_layer_size] * FLAGS.n_hidden_layers, + "entropy_cost": FLAGS.entropy_cost, + "critic_learning_rate": FLAGS.critic_learning_rate, + "pi_learning_rate": FLAGS.pi_learning_rate, + "num_critic_before_pi": FLAGS.num_q_before_pi, + "optimizer_str": FLAGS.optimizer_str + } + oracle = rl_oracle.RLOracle( + env, + agent_class, + agent_kwargs, + number_training_episodes=FLAGS.number_training_episodes, + self_play_proportion=FLAGS.self_play_proportion, + sigma=FLAGS.sigma) + + agents = [ + agent_class( # pylint: disable=g-complex-comprehension + env, + player_id, + **agent_kwargs) + for player_id in range(FLAGS.n_players) + ] + for agent in agents: + agent.freeze() + return oracle, agents + + +def init_br_responder(env): + """Initializes the tabular best-response based responder and agents.""" + random_policy = policy.TabularPolicy(env.game) + oracle = best_response_oracle.BestResponseOracle( + game=env.game, policy=random_policy) + agents = [random_policy.__copy__() for _ in range(FLAGS.n_players)] + return oracle, agents + + +def init_dqn_responder(env): + """Initializes the Policy Gradient-based responder and agents.""" + state_representation_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + agent_class = rl_policy.DQNPolicy + agent_kwargs = { + "state_representation_size": state_representation_size, + "num_actions": num_actions, + "hidden_layers_sizes": [FLAGS.hidden_layer_size] * FLAGS.n_hidden_layers, + "batch_size": FLAGS.batch_size, + "learning_rate": FLAGS.dqn_learning_rate, + "update_target_network_every": FLAGS.update_target_network_every, + "learn_every": FLAGS.learn_every, + "optimizer_str": FLAGS.optimizer_str + } + oracle = rl_oracle.RLOracle( + env, + agent_class, + agent_kwargs, + number_training_episodes=FLAGS.number_training_episodes, + self_play_proportion=FLAGS.self_play_proportion, + sigma=FLAGS.sigma) + + agents = [ + agent_class( # pylint: disable=g-complex-comprehension + env, + player_id, + **agent_kwargs) + for player_id in range(FLAGS.n_players) + ] + for agent in agents: + agent.freeze() + return oracle, agents + + +def print_policy_analysis(policies, game, verbose=False): + """Function printing policy diversity within game's known policies. + + Warning : only works with deterministic policies. + Args: + policies: List of list of policies (One list per game player) + game: OpenSpiel game object. + verbose: Whether to print policy diversity information. (True : print) + + Returns: + List of list of unique policies (One list per player) + """ + states_dict = get_all_states.get_all_states(game, np.inf, False, False) + unique_policies = [] + for player in range(len(policies)): + cur_policies = policies[player] + cur_set = set() + for pol in cur_policies: + cur_str = "" + for state_str in states_dict: + if states_dict[state_str].current_player() == player: + pol_action_dict = pol(states_dict[state_str]) + max_prob = max(list(pol_action_dict.values())) + max_prob_actions = [ + a for a in pol_action_dict if pol_action_dict[a] == max_prob + ] + cur_str += "__" + state_str + for a in max_prob_actions: + cur_str += "-" + str(a) + cur_set.add(cur_str) + unique_policies.append(cur_set) + if verbose: + print("\n=====================================\nPolicy Diversity :") + for player, cur_set in enumerate(unique_policies): + print("Player {} : {} unique policies.".format(player, len(cur_set))) + print("") + return unique_policies + + +def gpsro_looper(env, oracle, agents): + """Initializes and executes the GPSRO training loop.""" + sample_from_marginals = True # TODO(somidshafiei) set False for alpharank + training_strategy_selector = (FLAGS.training_strategy_selector or + strategy_selectors.probabilistic) + + g_psro_solver = psro_v2.PSROSolver( + env.game, + oracle, + initial_policies=agents, + training_strategy_selector=training_strategy_selector, + rectifier=FLAGS.rectifier, + sims_per_entry=FLAGS.sims_per_entry, + number_policies_selected=FLAGS.number_policies_selected, + meta_strategy_method=FLAGS.meta_strategy_method, + prd_iterations=50000, + prd_gamma=1e-10, + sample_from_marginals=sample_from_marginals, + symmetric_game=FLAGS.symmetric_game) + + start_time = time.time() + for gpsro_iteration in range(FLAGS.gpsro_iterations): + if FLAGS.verbose: + print("Iteration : {}".format(gpsro_iteration)) + print("Time so far: {}".format(time.time() - start_time)) + g_psro_solver.iteration() + meta_game = g_psro_solver.get_meta_game() + meta_probabilities = g_psro_solver.get_meta_strategies() + policies = g_psro_solver.get_policies() + + if FLAGS.verbose: + print("Meta game : {}".format(meta_game)) + print("Probabilities : {}".format(meta_probabilities)) + + # The following lines only work for sequential games for the moment. + if env.game.get_type().dynamics == pyspiel.GameType.Dynamics.SEQUENTIAL: + aggregator = policy_aggregator.PolicyAggregator(env.game) + aggr_policies = aggregator.aggregate( + range(FLAGS.n_players), policies, meta_probabilities) + + exploitabilities, expl_per_player = exploitability.nash_conv( + env.game, aggr_policies, return_only_nash_conv=False) + + _ = print_policy_analysis(policies, env.game, FLAGS.verbose) + if FLAGS.verbose: + print("Exploitabilities : {}".format(exploitabilities)) + print("Exploitabilities per player : {}".format(expl_per_player)) + + +def main(argv): + if len(argv) > 1: + raise app.UsageError("Too many command-line arguments.") + + np.random.seed(FLAGS.seed) + + game = pyspiel.load_game_as_turn_based(FLAGS.game_name, + {"players": FLAGS.n_players}) + env = rl_environment.Environment(game) + + # Initialize oracle and agents + if FLAGS.oracle_type == "DQN": + oracle, agents = init_dqn_responder(env) + elif FLAGS.oracle_type == "PG": + oracle, agents = init_pg_responder(env) + elif FLAGS.oracle_type == "BR": + oracle, agents = init_br_responder(env) + gpsro_looper(env, oracle, agents) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/query_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/query_example.py new file mode 100644 index 0000000..a8de87d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/query_example.py @@ -0,0 +1,55 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Game-specific query example.""" + +from absl import app +from absl import flags + +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_string("game", "negotiation", "Name of the game") + + +def main(_): + print("Creating game: " + FLAGS.game) + game = pyspiel.load_game(FLAGS.game) + + state = game.new_initial_state() + + print(str(state)) + + # Need to apply the first chance node for items and utilities to be generated + state.apply_action(0) + + print("Item pool: {}".format(state.item_pool())) + print("Player 0 utils: {}".format(state.agent_utils(0))) + print("Player 1 utils: {}".format(state.agent_utils(1))) + + state = game.new_initial_state() + + print(str(state)) + + # Need to apply the first chance node for items and utilities to be generated + state.apply_action(0) + + print("Item pool: {}".format(state.item_pool())) + print("Player 0 utils: {}".format(state.agent_utils(0))) + print("Player 1 utils: {}".format(state.agent_utils(1))) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/response_graph_ucb_2x2_game.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/response_graph_ucb_2x2_game.py new file mode 100644 index 0000000..8805bfd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/response_graph_ucb_2x2_game.py @@ -0,0 +1,65 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example of ResponseGraphUCB run on a 2x2 game.""" + +from absl import app +import matplotlib.pyplot as plt +import numpy as np + +from open_spiel.python.algorithms import response_graph_ucb +from open_spiel.python.algorithms import response_graph_ucb_utils + + +def get_example_2x2_payoffs(): + mean_payoffs = np.random.uniform(-1, 1, size=(2, 2, 2)) + mean_payoffs[0, :, :] = np.asarray([[0.5, 0.85], [0.15, 0.5]]) + mean_payoffs[1, :, :] = 1 - mean_payoffs[0, :, :] + return mean_payoffs + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + mean_payoffs = get_example_2x2_payoffs() + game = response_graph_ucb_utils.BernoulliGameSampler( + [2, 2], mean_payoffs, payoff_bounds=[-1., 1.]) + game.p_max = mean_payoffs + game.means = mean_payoffs + print('Game means:\n', game.means) + + exploration_strategy = 'uniform-exhaustive' + confidence_method = 'ucb-standard' + r_ucb = response_graph_ucb.ResponseGraphUCB( + game, + exploration_strategy=exploration_strategy, + confidence_method=confidence_method, + delta=0.1) + results = r_ucb.run() + + # Plotting + print('Number of total samples: {}'.format(np.sum(r_ucb.count[0]))) + r_ucb.visualise_2x2x2(real_values=game.means, graph=results['graph']) + r_ucb.visualise_count_history(figsize=(5, 3)) + plt.gca().xaxis.label.set_fontsize(15) + plt.gca().yaxis.label.set_fontsize(15) + + # Compare to ground truth graph + real_graph = r_ucb.construct_real_graph() + r_ucb.plot_graph(real_graph) + plt.show() + +if __name__ == '__main__': + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/response_graph_ucb_sample_complexity.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/response_graph_ucb_sample_complexity.py new file mode 100644 index 0000000..f08d8ca --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/response_graph_ucb_sample_complexity.py @@ -0,0 +1,137 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example computing ResponseGraphUCB sample complexity results.""" + +import itertools + +from absl import app +from absl import flags +import matplotlib.pyplot as plt +import numpy as np + +from open_spiel.python.algorithms import response_graph_ucb +from open_spiel.python.algorithms import response_graph_ucb_utils as utils + +FLAGS = flags.FLAGS + +flags.DEFINE_string('game_name', 'soccer', 'Name of the game.') + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + # Parameters to run + deltas = [0.01, 0.025, 0.05, 0.1, 0.25, 0.5] + sampling_methods = [ + 'uniform-exhaustive', 'uniform', 'valence-weighted', 'count-weighted' + ] + conf_methods = [ + 'ucb-standard', 'ucb-standard-relaxed', 'clopper-pearson-ucb', + 'clopper-pearson-ucb-relaxed' + ] + + methods = list(itertools.product(sampling_methods, conf_methods)) + mean_counts = {m: [[] for _ in range(len(deltas))] for m in methods} + edge_errs = {m: [[] for _ in range(len(deltas))] for m in methods} + + if FLAGS.game_name == 'bernoulli': + max_total_interactions = 50000 + repetitions = 20 + elif FLAGS.game_name == 'soccer': + max_total_interactions = 100000 + repetitions = 5 + elif FLAGS.game_name == 'kuhn_poker_3p': + max_total_interactions = 100000 + repetitions = 5 + else: + raise ValueError( + 'game_name must be "bernoulli", "soccer", or "kuhn_poker_3p".') + + for r in range(repetitions): + print('Iteration {}'.format(r + 1)) + G = utils.get_game_for_sampler(FLAGS.game_name) # pylint: disable=invalid-name + + for m in methods: + print(' Method: {}'.format(m)) + for ix, d in enumerate(deltas): + print(' Delta: {}'.format(d)) + r_ucb = response_graph_ucb.ResponseGraphUCB( + G, + exploration_strategy=m[0], + confidence_method=m[1], + delta=d, + ucb_eps=1e-1) + results = r_ucb.run(max_total_iterations=max_total_interactions) + + # Updated + mean_counts[m][ix].append(results['interactions']) + real_graph = r_ucb.construct_real_graph() + edge_errs[m][ix].append( + utils.digraph_edge_hamming_dist(real_graph, results['graph'])) + + # Plotting + _, axes = plt.subplots(1, 2, figsize=(10, 4)) + max_mean_count = 0 + for m in methods: + utils.plot_timeseries( + axes, + id_ax=0, + data=np.asarray(mean_counts[m]).T, + xticks=deltas, + xlabel=r'$\delta$', + ylabel='Interactions required', + label=utils.get_method_tuple_acronym(m), + logx=True, + logy=True, + linespecs=utils.get_method_tuple_linespecs(m)) + if np.max(mean_counts[m]) > max_mean_count: + max_mean_count = np.max(mean_counts[m]) + plt.xlim(left=np.min(deltas), right=np.max(deltas)) + plt.ylim(top=max_mean_count * 1.05) + + max_error = 0 + for m in methods: + utils.plot_timeseries( + axes, + id_ax=1, + data=np.asarray(edge_errs[m]).T, + xticks=deltas, + xlabel=r'$\delta$', + ylabel='Response graph errors', + label=utils.get_method_tuple_acronym(m), + logx=True, + logy=False, + linespecs=utils.get_method_tuple_linespecs(m)) + if np.max(edge_errs[m]) > max_error: + max_error = np.max(edge_errs[m]) + plt.xlim(left=np.min(deltas), right=np.max(deltas)) + plt.ylim(bottom=0, top=max_error*1.05) + + # Shared legend + plt.figure(figsize=(1, 6)) + plt.figlegend( + *axes[0].get_legend_handles_labels(), + loc='center right', + bbox_to_anchor=(0.8, 0.5), + bbox_transform=plt.gcf().transFigure, + ncol=1, + handlelength=1.7) + plt.tight_layout() + plt.show() + + +if __name__ == '__main__': + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/rl_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/rl_example.py new file mode 100644 index 0000000..9c8b865 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/rl_example.py @@ -0,0 +1,77 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python spiel example.""" + +import logging +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python import rl_environment + +FLAGS = flags.FLAGS + +flags.DEFINE_string("game", "tic_tac_toe", "Name of the game") +flags.DEFINE_integer("num_players", None, "Number of players") + + +def select_actions(observations, cur_player): + cur_legal_actions = observations["legal_actions"][cur_player] + actions = [np.random.choice(cur_legal_actions)] + return actions + + +def print_iteration(time_step, actions, player_id): + """Print TimeStep information.""" + obs = time_step.observations + logging.info("Player: %s", player_id) + if time_step.step_type.first(): + logging.info("Info state: %s, - - %s", obs["info_state"][player_id], + time_step.step_type) + else: + logging.info("Info state: %s, %s %s %s", obs["info_state"][player_id], + time_step.rewards[player_id], time_step.discounts[player_id], + time_step.step_type) + logging.info("Action taken: %s", actions) + logging.info("-" * 80) + + +def turn_based_example(unused_arg): + """Example usage of the RL environment for turn-based games.""" + # `rl_main_loop.py` contains more details and simultaneous move examples. + logging.info("Registered games: %s", rl_environment.registered_games()) + logging.info("Creating game %s", FLAGS.game) + + env_configs = {"players": FLAGS.num_players} if FLAGS.num_players else {} + env = rl_environment.Environment(FLAGS.game, **env_configs) + + logging.info("Env specs: %s", env.observation_spec()) + logging.info("Action specs: %s", env.action_spec()) + + time_step = env.reset() + + while not time_step.step_type.last(): + pid = time_step.observations["current_player"] + actions = select_actions(time_step.observations, pid) + print_iteration(time_step, actions, pid) + time_step = env.step(actions) + + # Print final state of end game. + for pid in range(env.num_players): + print_iteration(time_step, actions, pid) + + +if __name__ == "__main__": + app.run(turn_based_example) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/rl_main_loop.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/rl_main_loop.py new file mode 100644 index 0000000..1921d01 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/rl_main_loop.py @@ -0,0 +1,90 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python spiel example.""" + +import logging +from absl import app +from absl import flags + +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import random_agent + +FLAGS = flags.FLAGS + +flags.DEFINE_string("game", "kuhn_poker", "Name of the game.") +flags.DEFINE_integer("num_players", 2, "Number of players.") +flags.DEFINE_integer("num_episodes", 2, "Number of episodes.") + + +def print_iteration(time_step, player_id, action=None): + """Print TimeStep information.""" + obs = time_step.observations + logging.info("Player: %s", player_id) + if time_step.first(): + logging.info("Info state: %s, - - %s", obs["info_state"][player_id], + time_step.step_type) + else: + logging.info("Info state: %s, %s %s %s", obs["info_state"][player_id], + time_step.rewards[player_id], time_step.discounts[player_id], + time_step.step_type) + if action is not None: + logging.info("Action taken: %s", action) + logging.info("-" * 80) + + +def main_loop(unused_arg): + """RL main loop example.""" + logging.info("Registered games: %s", rl_environment.registered_games()) + logging.info("Creating game %s", FLAGS.game) + + env_configs = {"players": FLAGS.num_players} if FLAGS.num_players else {} + env = rl_environment.Environment(FLAGS.game, **env_configs) + num_actions = env.action_spec()["num_actions"] + + agents = [ + random_agent.RandomAgent(player_id=i, num_actions=num_actions) + for i in range(FLAGS.num_players) + ] + + logging.info("Env specs: %s", env.observation_spec()) + logging.info("Action specs: %s", env.action_spec()) + + for cur_episode in range(FLAGS.num_episodes): + logging.info("Starting episode %s", cur_episode) + time_step = env.reset() + while not time_step.last(): + pid = time_step.observations["current_player"] + + if env.is_turn_based: + agent_output = agents[pid].step(time_step) + action_list = [agent_output.action] + else: + agents_output = [agent.step(time_step) for agent in agents] + action_list = [agent_output.action for agent_output in agents_output] + + print_iteration(time_step, pid, action_list) + time_step = env.step(action_list) + + # Episode is over, step all agents with final state. + for agent in agents: + agent.step(time_step) + + # Print final state of end game. + for pid in range(env.num_players): + print_iteration(time_step, pid) + + +if __name__ == "__main__": + app.run(main_loop) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/rl_response.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/rl_response.py new file mode 100644 index 0000000..1c41b15 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/rl_response.py @@ -0,0 +1,262 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""RL agents trained against fixed policy/bot as approximate responses. + +This can be used to try to find exploits in policies or bots, as described in +Timbers et al. '20 (https://arxiv.org/abs/2004.09677), but only using RL +directly rather than RL+Search. +""" + +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python import rl_agent +from open_spiel.python import rl_environment +from open_spiel.python import rl_tools +from open_spiel.python.algorithms import random_agent +from open_spiel.python.algorithms import tabular_qlearner +from open_spiel.python.jax import dqn + +FLAGS = flags.FLAGS + +# Training parameters +flags.DEFINE_string("checkpoint_dir", "/tmp/dqn_test", + "Directory to save/load the agent models.") +flags.DEFINE_integer( + "save_every", int(1e4), + "Episode frequency at which the DQN agent models are saved.") +flags.DEFINE_integer("num_train_episodes", int(1e6), + "Number of training episodes.") +flags.DEFINE_integer( + "eval_every", 1000, + "Episode frequency at which the DQN agents are evaluated.") +flags.DEFINE_integer("eval_episodes", 1000, + "How many episodes to run per eval.") + +# DQN model hyper-parameters +flags.DEFINE_list("hidden_layers_sizes", [64, 64, 64], + "Number of hidden units in the Q-Network MLP.") +flags.DEFINE_integer("replay_buffer_capacity", int(1e5), + "Size of the replay buffer.") +flags.DEFINE_integer("batch_size", 32, + "Number of transitions to sample at each learning step.") + + +# Main algorithm parameters +flags.DEFINE_integer("seed", 0, "Seed to use for everything") +flags.DEFINE_integer("window_size", 30, "Size of window for rolling average") +flags.DEFINE_integer("num_players", 2, "Numebr of players") +flags.DEFINE_string("game", "leduc_poker", "Game string") +flags.DEFINE_string("exploitee", "random", "Exploitee (random | first)") +flags.DEFINE_string("learner", "qlearning", "Learner (qlearning | dqn)") + + +def eval_against_fixed_bots(env, trained_agents, fixed_agents, num_episodes): + """Evaluates `trained_agents` against `random_agents` for `num_episodes`.""" + num_players = len(fixed_agents) + sum_episode_rewards = np.zeros(num_players) + for player_pos in range(num_players): + cur_agents = fixed_agents[:] + cur_agents[player_pos] = trained_agents[player_pos] + for _ in range(num_episodes): + time_step = env.reset() + episode_rewards = 0 + turn_num = 0 + while not time_step.last(): + turn_num += 1 + player_id = time_step.observations["current_player"] + if env.is_turn_based: + agent_output = cur_agents[player_id].step( + time_step, is_evaluation=True) + action_list = [agent_output.action] + else: + agents_output = [ + agent.step(time_step, is_evaluation=True) for agent in cur_agents + ] + action_list = [agent_output.action for agent_output in agents_output] + time_step = env.step(action_list) + episode_rewards += time_step.rewards[player_pos] + sum_episode_rewards[player_pos] += episode_rewards + return sum_episode_rewards / num_episodes + + +def create_training_agents( + num_players, num_actions, info_state_size, hidden_layers_sizes +): + """Create the agents we want to use for learning.""" + if FLAGS.learner == "qlearning": + # pylint: disable=g-complex-comprehension + return [ + tabular_qlearner.QLearner( + player_id=idx, + num_actions=num_actions, + # step_size=0.02, + step_size=0.1, + # epsilon_schedule=rl_tools.ConstantSchedule(0.5), + epsilon_schedule=rl_tools.LinearSchedule(0.5, 0.2, 1000000), + discount_factor=0.99) for idx in range(num_players) + ] + elif FLAGS.learner == "dqn": + # pylint: disable=g-complex-comprehension + return [ + dqn.DQN( + player_id=idx, + state_representation_size=info_state_size, + num_actions=num_actions, + discount_factor=0.99, + epsilon_start=0.5, + epsilon_end=0.1, + hidden_layers_sizes=hidden_layers_sizes, + replay_buffer_capacity=FLAGS.replay_buffer_capacity, + batch_size=FLAGS.batch_size) for idx in range(num_players) + ] + else: + raise RuntimeError("Unknown learner") + + +class FirstActionAgent(rl_agent.AbstractAgent): + """An example agent class.""" + + def __init__(self, player_id, num_actions, name="first_action_agent"): + assert num_actions > 0 + self._player_id = player_id + self._num_actions = num_actions + + def step(self, time_step, is_evaluation=False): + # If it is the end of the episode, don't select an action. + if time_step.last(): + return + + # Pick the first legal action. + cur_legal_actions = time_step.observations["legal_actions"][self._player_id] + action = cur_legal_actions[0] + probs = np.zeros(self._num_actions) + probs[action] = 1.0 + + return rl_agent.StepOutput(action=action, probs=probs) + + +class RollingAverage(object): + """Class to store a rolling average.""" + + def __init__(self, size=100): + self._size = size + self._values = np.array([0] * self._size, dtype=np.float64) + self._index = 0 + self._total_additions = 0 + + def add(self, value): + self._values[self._index] = value + self._total_additions += 1 + self._index = (self._index + 1) % self._size + + def mean(self): + n = min(self._size, self._total_additions) + if n == 0: + return 0 + return self._values.sum() / n + + +def main(_): + np.random.seed(FLAGS.seed) + + num_players = FLAGS.num_players + + env = rl_environment.Environment(FLAGS.game, include_full_state=True) + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + # Exploitee agents + if FLAGS.exploitee == "first": + exploitee_agents = [ + FirstActionAgent(idx, num_actions) for idx in range(num_players) + ] + elif FLAGS.exploitee == "random": + exploitee_agents = [ + random_agent.RandomAgent(player_id=idx, num_actions=num_actions) + # FirstActionAgent(player_id=idx, num_actions=num_actions) + for idx in range(num_players) + ] + else: + raise RuntimeError("Unknown exploitee") + + rolling_averager = RollingAverage(FLAGS.window_size) + rolling_averager_p0 = RollingAverage(FLAGS.window_size) + rolling_averager_p1 = RollingAverage(FLAGS.window_size) + rolling_value = 0 + total_value = 0 + total_value_n = 0 + + hidden_layers_sizes = [int(l) for l in FLAGS.hidden_layers_sizes] + # pylint: disable=g-complex-comprehension + learning_agents = create_training_agents( + num_players, num_actions, info_state_size, hidden_layers_sizes + ) + + print("Starting...") + + for ep in range(FLAGS.num_train_episodes): + if (ep + 1) % FLAGS.eval_every == 0: + r_mean = eval_against_fixed_bots( + env, learning_agents, exploitee_agents, FLAGS.eval_episodes + ) + value = r_mean[0] + r_mean[1] + rolling_averager.add(value) + rolling_averager_p0.add(r_mean[0]) + rolling_averager_p1.add(r_mean[1]) + rolling_value = rolling_averager.mean() + rolling_value_p0 = rolling_averager_p0.mean() + rolling_value_p1 = rolling_averager_p1.mean() + total_value += value + total_value_n += 1 + avg_value = total_value / total_value_n + print( + ( + "[{}] Mean episode rewards {}, value: {}, " + + "rval: {} (p0/p1: {} / {}), aval: {}" + ).format( + ep + 1, + r_mean, + value, + rolling_value, + rolling_value_p0, + rolling_value_p1, + avg_value, + ) + ) + + agents_round1 = [learning_agents[0], exploitee_agents[1]] + agents_round2 = [exploitee_agents[0], learning_agents[1]] + + for agents in [agents_round1, agents_round2]: + time_step = env.reset() + while not time_step.last(): + player_id = time_step.observations["current_player"] + if env.is_turn_based: + agent_output = agents[player_id].step(time_step) + action_list = [agent_output.action] + else: + agents_output = [agent.step(time_step) for agent in agents] + action_list = [agent_output.action for agent_output in agents_output] + time_step = env.step(action_list) + + # Episode is over, step all agents with final info state. + for agent in agents: + agent.step(time_step) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/roshambo_population_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/roshambo_population_example.py new file mode 100644 index 0000000..cb22654 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/roshambo_population_example.py @@ -0,0 +1,233 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Simple example of using the Roshambo population. + +Note: the Roshambo bots are an optional dependency and excluded by default. +To enable Roshambo bots, set OPEN_SPIEL_BUILD_WITH_ROSHAMBO to ON when building. +See +https://github.com/deepmind/open_spiel/blob/master/docs/install.md#configuring-conditional-dependencies +for details. +""" + +import re +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python import games # pylint: disable=unused-import +from open_spiel.python import rl_agent +from open_spiel.python import rl_environment +import pyspiel + +FLAGS = flags.FLAGS + +# See open_spiel/data/paper_data/pbe_rrps for the bot table from the RRPS paper: +# https://arxiv.org/abs/2303.03196 +flags.DEFINE_string("bot_table_file", None, + "The file containing the bot entries.") + +flags.DEFINE_integer("player0_pop_id", 0, "Population member ID for player 0") +flags.DEFINE_integer("player1_pop_id", 1, "Population member ID for player 1") +flags.DEFINE_integer("seed", 0, "Seed to use for RNG") +flags.DEFINE_integer("env_recall", 1, + "Number of recent steps to include in observation") + + +class BotAgent(rl_agent.AbstractAgent): + """Agent class that wraps a bot. + + Note, the environment must include the OpenSpiel state in its observations, + which means it must have been created with use_full_state=True. + """ + + def __init__(self, num_actions, bot, name="bot_agent"): + assert num_actions > 0 + self._bot = bot + self._num_actions = num_actions + + def restart(self): + self._bot.restart() + + def step(self, time_step, is_evaluation=False): + # If it is the end of the episode, don't select an action. + if time_step.last(): + return + + _, state = pyspiel.deserialize_game_and_state( + time_step.observations["serialized_state"]) + + action = self._bot.step(state) + probs = np.zeros(self._num_actions) + probs[action] = 1.0 + + return rl_agent.StepOutput(action=action, probs=probs) + + +def eval_agents(env, agents, num_players, num_episodes): + """Evaluate the agent.""" + sum_episode_rewards = np.zeros(num_players) + for ep in range(num_episodes): + for agent in agents: + # Bots need to be restarted at the start of the episode. + if hasattr(agent, "restart"): + agent.restart() + time_step = env.reset() + episode_rewards = np.zeros(num_players) + while not time_step.last(): + agents_output = [ + agent.step(time_step, is_evaluation=True) for agent in agents + ] + action_list = [agent_output.action for agent_output in agents_output] + time_step = env.step(action_list) + episode_rewards += time_step.rewards + sum_episode_rewards += episode_rewards + print(f"Finished episode {ep}, " + + f"avg returns: {sum_episode_rewards / num_episodes}") + + return sum_episode_rewards / num_episodes + + +def print_roshambo_bot_names_and_ids(roshambo_bot_names): + print("Roshambo bot population:") + for i in range(len(roshambo_bot_names)): + print(f"{i}: {roshambo_bot_names[i]}") + + +def create_roshambo_bot_agent(player_id, num_actions, bot_names, pop_id): + name = bot_names[pop_id] + # Creates an OpenSpiel bot with the default number of throws + # (pyspiel.ROSHAMBO_NUM_THROWS). To create one for a different number of + # throws per episode, add the number as the third argument here. + bot = pyspiel.make_roshambo_bot(player_id, name) + return BotAgent(num_actions, bot, name=name) + + +def analyze_bot_table(filename): + """Do some analysis on the payoff cross-table.""" + print(f"Opening bot table file: {filename}") + bot_table_file = open(filename, "r") + table = np.zeros(shape=(pyspiel.ROSHAMBO_NUM_BOTS, + pyspiel.ROSHAMBO_NUM_BOTS), dtype=np.float64) + print("Parsing file...") + values = {} + bot_names_map = {} + for line in bot_table_file: + line = line.strip() + # ('driftbot', 'driftbot', -0.571) + myre = re.compile(r"\'(.*)\', \'(.*)\', (.*)\)") + match_obj = myre.search(line) + row_agent, col_agent, value = match_obj.groups() + values[f"{row_agent},{col_agent}"] = value + bot_names_map[row_agent] = True + bot_names_list = list(bot_names_map.keys()) + bot_names_list.sort() + print(len(bot_names_list)) + assert len(bot_names_list) == pyspiel.ROSHAMBO_NUM_BOTS + print(bot_names_list) + for i in range(pyspiel.ROSHAMBO_NUM_BOTS): + for j in range(pyspiel.ROSHAMBO_NUM_BOTS): + key = f"{bot_names_list[i]},{bot_names_list[j]}" + assert key in values + table[i][j] = float(values[key]) + print("Population returns:") + pop_returns = np.zeros(pyspiel.ROSHAMBO_NUM_BOTS) + pop_aggregate = np.zeros(pyspiel.ROSHAMBO_NUM_BOTS) + for i in range(pyspiel.ROSHAMBO_NUM_BOTS): + pop_eval = 0 + for j in range(pyspiel.ROSHAMBO_NUM_BOTS): + pop_eval += table[i][j] + pop_eval /= pyspiel.ROSHAMBO_NUM_BOTS + # print(f" {bot_names_list[i]}: {pop_eval}") + pop_returns[i] = pop_eval + pop_aggregate[i] += pop_eval + print(f" {pop_eval},") + print("Population exploitabilities: ") + pop_expls = np.zeros(pyspiel.ROSHAMBO_NUM_BOTS) + avg_pop_expl = 0 + for i in range(pyspiel.ROSHAMBO_NUM_BOTS): + pop_expl = -float(pyspiel.ROSHAMBO_NUM_THROWS) + for j in range(pyspiel.ROSHAMBO_NUM_BOTS): + pop_expl = max(pop_expl, -table[i][j]) + avg_pop_expl += pop_expl + pop_expls[i] = pop_expl + pop_aggregate[i] -= pop_expl + print(f" {pop_expl},") + avg_pop_expl /= pyspiel.ROSHAMBO_NUM_BOTS + print(f"Avg within-pop expl: {avg_pop_expl}") + print("Aggregate: ") + indices = np.argsort(pop_aggregate) + for i in range(pyspiel.ROSHAMBO_NUM_BOTS): + idx = indices[pyspiel.ROSHAMBO_NUM_BOTS - i - 1] + print(f" {i+1} & \\textsc{{{bot_names_list[idx]}}} & " + + f" ${pop_returns[idx]:0.3f}$ " + + f"& ${pop_expls[idx]:0.3f}$ & ${pop_aggregate[idx]:0.3f}$ \\\\") + print("Dominance:") + for i in range(pyspiel.ROSHAMBO_NUM_BOTS): + for j in range(pyspiel.ROSHAMBO_NUM_BOTS): + if np.all(np.greater(table[i], table[j])): + print(f"{bot_names_list[i]} dominates {bot_names_list[j]}") + + +def main(_): + np.random.seed(FLAGS.seed) + + if FLAGS.bot_table_file is not None: + analyze_bot_table(FLAGS.bot_table_file) + return + + # Note that the include_full_state variable has to be enabled because the + # BotAgent needs access to the full state. + env = rl_environment.Environment( + "repeated_game(stage_game=matrix_rps(),num_repetitions=" + + f"{pyspiel.ROSHAMBO_NUM_THROWS}," + + f"recall={FLAGS.env_recall})", + include_full_state=True) + num_players = 2 + num_actions = env.action_spec()["num_actions"] + # Learning agents might need this: + # info_state_size = env.observation_spec()["info_state"][0] + + print("Loading population...") + pop_size = pyspiel.ROSHAMBO_NUM_BOTS + print(f"Population size: {pop_size}") + roshambo_bot_names = pyspiel.roshambo_bot_names() + roshambo_bot_names.sort() + print_roshambo_bot_names_and_ids(roshambo_bot_names) + + bot_id = 0 + roshambo_bot_ids = {} + for name in roshambo_bot_names: + roshambo_bot_ids[name] = bot_id + bot_id += 1 + + # Create two bot agents + agents = [ + create_roshambo_bot_agent(0, num_actions, roshambo_bot_names, + FLAGS.player0_pop_id), + create_roshambo_bot_agent(1, num_actions, roshambo_bot_names, + FLAGS.player1_pop_id) + ] + + print("Starting eval run.") + print(f"Player 0 is (pop_id {FLAGS.player0_pop_id}: " + + f"{roshambo_bot_names[FLAGS.player0_pop_id]})") + print(f"Player 1 is (pop_id {FLAGS.player1_pop_id}: " + + f"{roshambo_bot_names[FLAGS.player1_pop_id]})") + avg_eval_returns = eval_agents(env, agents, num_players, 100) + print(avg_eval_returns) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/rrps_poprl/README.md b/scenarios/bargaining/open_spiel/open_spiel/python/examples/rrps_poprl/README.md new file mode 100644 index 0000000..38c1306 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/rrps_poprl/README.md @@ -0,0 +1,18 @@ +This code implements the PopRL algorithm described in Lanctot et al. +[Population-based Evaluation in Repeated Rock-Paper-Scissors as a Benchmark for +Multiagent Reinforcement Learning](https://openreview.net/forum?id=gQnJ7ODIAx) + +The implementation of IMPALA is an online agent version of the IMPALA example in +the Haiku codebase. It has been modified to add prediction labels, which get +stored in the environment. + +Checkpointing is not working for technical reasons (some nontrivial parts are +needed to handle Haiku functions / models). It needs to be fixed if this is to +run for long periods of time or in interactive mode. + +This implementation is NOT designed for scale. + +The code is provided as-is. It's a direct conversion of the code used for the +paper but it has not been extensively tested after the transformation. The basic +tests work and the transformation was straight-forward. However, if you run into +any trouble, please contact lanctot@google.com. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/rrps_poprl/impala.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/rrps_poprl/impala.py new file mode 100644 index 0000000..c5d8784 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/rrps_poprl/impala.py @@ -0,0 +1,510 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""IMPALA agent implemented in JAX. + +This is a basic IMPALA agent adapted from the example in the Haiku project: +https://github.com/deepmind/dm-haiku/tree/main/examples/impala +""" + +import collections +import functools +from typing import Any, Callable, Dict, Optional, Tuple + +import chex +import haiku as hk +import jax +from jax.example_libraries import optimizers +import jax.numpy as jnp +import numpy as np +import optax +import rlax +import tree + +from open_spiel.python import rl_agent +from open_spiel.python.examples.rrps_poprl import rl_environment + + +AgentOutput = collections.namedtuple( + "AgentOutput", ["policy_logits", "values", "action", "prediction_logits"] +) +NetOutput = collections.namedtuple( + "NetOutput", ["policy_logits", "value", "prediction_logits"] +) +Transition = collections.namedtuple( + "Transition", ["timestep", "agent_out", "agent_state"] +) +NetFactory = Callable[[int], hk.RNNCore] +Nest = Any + + +# The IMPALA paper sums losses, rather than taking the mean. +# We wrap rlax to do so as well. +def policy_gradient_loss(logits, *args): + """rlax.policy_gradient_loss, but with sum(loss) and [T, B, ...] inputs.""" + # jax.experimental.host_callback.id_print(logits.shape) + # print(logits.shape) + mean_per_batch = jax.vmap(rlax.policy_gradient_loss, in_axes=1)(logits, *args) + total_loss_per_batch = mean_per_batch * logits.shape[0] + return jnp.sum(total_loss_per_batch) + + +def entropy_loss(logits, *args): + """rlax.entropy_loss, but with sum(loss) and [T, B, ...] inputs.""" + mean_per_batch = jax.vmap(rlax.entropy_loss, in_axes=1)(logits, *args) + total_loss_per_batch = mean_per_batch * logits.shape[0] + return jnp.sum(total_loss_per_batch) + + +def mean_pred_loss_without_batch( + logits_t: chex.Array, + labels: chex.Array, +) -> chex.Array: + """Mean prediction loss without batch dimension.""" + chex.assert_rank([logits_t, labels], [2, 1]) + chex.assert_type([logits_t, labels], [float, int]) + labels_one_hot = jax.nn.one_hot(labels, logits_t.shape[-1]) + softmax_xent = -jnp.sum(labels_one_hot * jax.nn.log_softmax(logits_t)) + softmax_xent /= labels.shape[0] + return softmax_xent + + +def prediction_loss(logits, labels): + # print(logits.shape) -> [T, B, num_preds] + # print(labels.shape) -> [T, B] + mean_per_batch = jax.vmap(mean_pred_loss_without_batch, in_axes=1)( + logits, labels + ) + total_loss_per_batch = mean_per_batch * logits.shape[0] + return jnp.sum(total_loss_per_batch) + + +def _preprocess_none(t) -> np.ndarray: + if t is None: + return np.array(0.0, dtype=np.float32) + else: + return np.asarray(t) + + +def preprocess_step( + timestep: rl_environment.TimeStep, num_players +) -> rl_environment.TimeStep: + # TODO(author5): fix for our time steps (should be multiple discounts) + if timestep.discounts is None: + timestep = timestep._replace(discounts=[1.0] * num_players) + if timestep.rewards is None: + timestep = timestep._replace(rewards=[0.0] * num_players) + # print(timestep) + return tree.map_structure(_preprocess_none, timestep) + + +# dm_env: return TimeStep(StepType.FIRST, None, None, observation) +# OpenSpiel: "observations", "rewards", "discounts", "step_type" +def restart(dummy_obs, num_players): + all_obs = { + "info_state": [dummy_obs.copy() for i in range(num_players)], + "legal_actions": [np.zeros(3)], + "prediction_label": 0, + } + return rl_environment.TimeStep( + all_obs, [0.0], None, rl_environment.StepType.FIRST + ) + + +class BasicRNN(hk.RNNCore): + """A simple recurrent neural network.""" + + def __init__( + self, + player_id, + num_actions, + hidden_layer_sizes, + num_predictions, + name=None, + ): + super().__init__(name=name) + self._player_id = player_id + self._num_actions = num_actions + self._num_predictions = num_predictions + self._hidden_layer_sizes = hidden_layer_sizes + if isinstance(hidden_layer_sizes, int): + self._hidden_layer_sizes = [hidden_layer_sizes] + elif isinstance(hidden_layer_sizes, tuple): + self._hidden_layer_sizes = list(hidden_layer_sizes) + self._core = hk.ResetCore(hk.LSTM(256)) + + def initial_state(self, batch_size): + return self._core.initial_state(batch_size) + + def __call__(self, x: rl_environment.TimeStep, state): + x = jax.tree_util.tree_map(lambda t: t[None, ...], x) + return self.unroll(x, state) + + def unroll(self, x, state): + modules = [hk.Flatten()] + for hsize in self._hidden_layer_sizes: + modules.append(hk.Linear(hsize)) + modules.append(jax.nn.relu) + torso_net = hk.Sequential(modules) + torso_output = hk.BatchApply(torso_net)( + x.observations["info_state"][self._player_id] + ) + should_reset = jnp.equal(x.step_type, int(rl_environment.StepType.FIRST)) + core_input = (torso_output, should_reset) + core_output, state = hk.dynamic_unroll(self._core, core_input, state) + policy_logits = hk.Linear(self._num_actions)(core_output) + prediction_logits = hk.Linear(self._num_predictions)(core_output) + value = hk.Linear(1)(core_output) + value = jnp.squeeze(value, axis=-1) + return ( + NetOutput( + policy_logits=policy_logits, + value=value, + prediction_logits=prediction_logits, + ), + state, + ) + # torso_output = torso_net(x.observations["info_state"][self._player_id]) + # policy_logits = hk.Linear(self._num_actions)(torso_output) + # prediction_logits = hk.Linear(self._num_predictions)(torso_output) + # value = hk.Linear(1)(torso_output) + # value = jnp.squeeze(value, axis=-1) + # return NetOutput(policy_logits=policy_logits, + # value=value, + # prediction_logits=prediction_logits), state + + +class IMPALA(rl_agent.AbstractAgent): + """IMPALA agent implementation in JAX.""" + + def __init__( + self, + player_id, + state_representation_size, + num_actions, + num_players, + unroll_len, + net_factory: NetFactory, + rng_key, + max_abs_reward, + learning_rate=0.0001, + entropy=0.01, + discount_factor=0.99, + hidden_layers_sizes=128, + batch_size=16, + num_predictions=10, + prediction_weight=0.01, + max_global_gradient_norm=None, + ): + self._player_id = player_id + self._state_representation_size = state_representation_size + self._num_actions = num_actions + self._num_players = num_players + self._unroll_len = unroll_len + self._rng_key = rng_key + self._max_abs_reward = max_abs_reward + self._learning_rate = learning_rate + self._batch_size = batch_size + self._discount_factor = discount_factor + self._entropy = entropy + self._hidden_layer_sizes = hidden_layers_sizes + self._num_predictions = num_predictions + self._prediction_weight = prediction_weight + self._dummy_obs = np.zeros( + shape=state_representation_size, dtype=np.float32 + ) + + # pylint: disable=too-many-function-args + net_factory = functools.partial( + net_factory, + player_id, + num_actions, + hidden_layers_sizes, + num_predictions, + ) + # Instantiate two hk.transforms() - one for getting the initial state of the + # agent, another for actually initializing and running the agent. + _, self._initial_state_apply_fn = hk.without_apply_rng( + hk.transform(lambda batch_size: net_factory().initial_state(batch_size)) + ) + + self._init_fn, self._apply_fn = hk.without_apply_rng( + hk.transform(lambda obs, state: net_factory().unroll(obs, state)) + ) + + # Learner components + # self._opt = optax.rmsprop(5e-3, decay=0.99, eps=1e-7) + self._opt = optax.rmsprop(self._learning_rate, decay=0.99, eps=1e-7) + # self._opt = optax.sgd(self._learning_rate) + + # Prepare parameters and initial state + self._rng_key, subkey = jax.random.split(self._rng_key) + init_params = self.initial_params(subkey) + self._frame_count_and_params = (0, jax.device_get(init_params)) + (_, params) = self._frame_count_and_params + self._opt_state = self._opt.init(params) + self._rng_key, _ = jax.random.split(self._rng_key) + self._agent_state = self._agent_state = self.initial_state(None) + self._traj = [] + self._batch = [] + self._last_policy = None + self._last_predictions = None + + @functools.partial(jax.jit, static_argnums=0) + def initial_params(self, rng_key): + """Initializes the agent params given the RNG key.""" + + dummy_inputs = jax.tree_util.tree_map( + lambda t: np.zeros(t.shape, t.dtype), self._dummy_obs + ) + dummy_inputs = preprocess_step( + restart(dummy_inputs, self._num_players), self._num_players + ) + # Add time and batch dimensions + dummy_inputs = jax.tree_util.tree_map( + lambda t: t[None, None, ...], dummy_inputs + ) + # print(dummy_inputs) + return self._init_fn(rng_key, dummy_inputs, self.initial_state(1)) + + @functools.partial(jax.jit, static_argnums=(0, 1)) + def initial_state(self, batch_size: Optional[int]): + """Returns agent initial state.""" + # We expect that generating the initial_state does not require parameters. + return self._initial_state_apply_fn(None, batch_size) + + @functools.partial(jax.jit, static_argnums=(0,)) + def internal_step( + self, + rng_key, + params: hk.Params, + timestep: rl_environment.TimeStep, + state: Nest, + ) -> Tuple[AgentOutput, Nest]: + """For a given single-step, unbatched timestep, output the chosen action.""" + # Pad timestep, state to be [T, B, ...] and [B, ...] respectively. + # print("calling internal_step") + timestep = jax.tree_util.tree_map(lambda t: t[None, None, ...], timestep) + state = jax.tree_util.tree_map(lambda t: t[None, ...], state) + net_out, next_state = self._apply_fn(params, timestep, state) + # print(timestep) + # Remove the padding from above. + net_out = jax.tree_util.tree_map( + lambda t: jnp.squeeze(t, axis=(0, 1)), net_out + ) + next_state = jax.tree_util.tree_map( + lambda t: jnp.squeeze(t, axis=0), next_state + ) + # Sample an action and return. + action = hk.multinomial(rng_key, net_out.policy_logits, num_samples=1) + action = jnp.squeeze(action, axis=-1) + return ( + AgentOutput( + net_out.policy_logits, + net_out.value, + action, + net_out.prediction_logits, + ), + next_state, + ) + + def unroll( + self, + params: hk.Params, + trajectory: rl_environment.TimeStep, + state: Nest, + ) -> AgentOutput: + """Unroll the agent along trajectory.""" + net_out, _ = self._apply_fn(params, trajectory, state) + return AgentOutput( + net_out.policy_logits, + net_out.value, + action=[], + prediction_logits=net_out.prediction_logits, + ) + + def _loss( + self, + theta: hk.Params, + trajectories: Transition, + ) -> Tuple[jnp.ndarray, Dict[str, jnp.ndarray]]: + """Compute vtrace-based actor-critic loss.""" + # All the individual components are vectorized to be [T, B, ...] + # print(trajectories) + # Transition(timestep=TimeStep(observations={ + # 'current_player': array([[-2, -2, -2, -2, -2], + # [-2, -2, -2, -2, -2], + # [-2, -2, -2, -2, -2], + # [-2, -2, -2, -2, -2], + # Since prediction_label is a scalar, it ends up being [T, B] + + initial_state = jax.tree_util.tree_map( + lambda t: t[0], trajectories.agent_state + ) + learner_outputs = self.unroll(theta, trajectories.timestep, initial_state) + v_t = learner_outputs.values[1:] + # Remove bootstrap timestep from non-timesteps. + _, actor_out, _ = jax.tree_util.tree_map(lambda t: t[:-1], trajectories) + learner_outputs = jax.tree_util.tree_map(lambda t: t[:-1], learner_outputs) + v_tm1 = learner_outputs.values + + # Get the discount, reward, step_type from the *next* timestep. + timestep = jax.tree_util.tree_map(lambda t: t[1:], trajectories.timestep) + discounts = timestep.discounts[self._player_id] * self._discount_factor + rewards = timestep.rewards[self._player_id] + if self._max_abs_reward > 0: + rewards = jnp.clip(rewards, -self._max_abs_reward, self._max_abs_reward) + + # The step is uninteresting if we transitioned LAST -> FIRST. + # timestep corresponds to the *next* time step, so we filter for FIRST. + mask = jnp.not_equal(timestep.step_type, int(rl_environment.StepType.FIRST)) + mask = mask.astype(jnp.float32) + + rhos = rlax.categorical_importance_sampling_ratios( + learner_outputs.policy_logits, actor_out.policy_logits, actor_out.action + ) + # vmap vtrace_td_error_and_advantage to take/return [T, B, ...]. + vtrace_td_error_and_advantage = jax.vmap( + rlax.vtrace_td_error_and_advantage, in_axes=1, out_axes=1 + ) + + vtrace_returns = vtrace_td_error_and_advantage( + v_tm1, v_t, rewards, discounts, rhos + ) + pg_advs = vtrace_returns.pg_advantage + # print(learner_outputs.policy_logits.shape) + # jax.experimental.host_callback.id_print(learner_outputs.policy_logits.shape) + pg_loss = policy_gradient_loss( + learner_outputs.policy_logits, actor_out.action, pg_advs, mask + ) + + baseline_loss = 0.5 * jnp.sum(jnp.square(vtrace_returns.errors) * mask) + ent_loss = entropy_loss(learner_outputs.policy_logits, mask) + + pred_loss = 0 + if self._prediction_weight > 0: + pred_loss = prediction_loss( + learner_outputs.prediction_logits, + trajectories.timestep.observations["prediction_label"][:-1], + ) + + total_loss = pg_loss + total_loss += 0.5 * baseline_loss + total_loss += self._entropy * ent_loss + total_loss += self._prediction_weight * pred_loss + + logs = {} + logs["PG_loss"] = pg_loss + logs["baseline_loss"] = baseline_loss + logs["entropy_loss"] = ent_loss + logs["prediction_loss"] = pred_loss + logs["total_loss"] = total_loss + return total_loss, logs + + @functools.partial(jax.jit, static_argnums=0) + def update(self, params, opt_state, batch: Transition): + """The actual update function.""" + (_, logs), grads = jax.value_and_grad(self._loss, has_aux=True)( + params, batch + ) + + grad_norm_unclipped = optimizers.l2_norm(grads) + updates, updated_opt_state = self._opt.update(grads, opt_state) + params = optax.apply_updates(params, updates) + weight_norm = optimizers.l2_norm(params) + logs.update({ + "grad_norm_unclipped": grad_norm_unclipped, + "weight_norm": weight_norm, + }) + return params, updated_opt_state, logs + + def _learning_step(self): + # print("Learning!!!") + # Prepare for consumption, then put batch onto device. + # stacked_batch = jax.tree_multimap(lambda *xs: np.stack(xs, axis=1), + # *self._batch) + stacked_batch = jax.tree_util.tree_map( + lambda *xs: np.stack(xs, axis=1), *self._batch + ) + # self._device_q.put(jax.device_put(stacked_batch)) + jax.device_put(stacked_batch) + num_frames, params = self._frame_count_and_params + params, self._opt_state, _ = self.update( + params, self._opt_state, stacked_batch + ) + self._frame_count_and_params = (num_frames + 1, params) + self._batch = [] + + def last_policy(self): + return self._last_policy + + def last_predictions(self): + return self._last_predictions + + def step(self, time_step, is_evaluation=False): + # Hack to run with environments that include the serialized state: simply + # remove it. + if "serialized_state" in time_step.observations: + del time_step.observations["serialized_state"] + # OpenSpiel time steps have lists of floats. First convert to numpy. + for p in range(self._num_players): + time_step.observations["info_state"][p] = np.asarray( + time_step.observations["info_state"][p] + ) + + # print(time_step) + # TODO(author5): the arrays need to be the same shape, so when the + # legal actions are empty vs full, this is a problem. + # Fix later. for now, replace with a constant + time_step.observations["legal_actions"] = [np.ones(3)] + + agent_state = self._agent_state + (_, params) = self._frame_count_and_params + jax.device_put(params) + time_step = preprocess_step(time_step, self._num_players) + self._rng_key, subkey = jax.random.split(self._rng_key) + agent_out, next_state = self.internal_step( + subkey, params, time_step, agent_state + ) + + self._last_policy = jax.nn.softmax(agent_out.policy_logits).copy() + self._last_predictions = jax.nn.softmax(agent_out.prediction_logits).copy() + + transition = Transition( + timestep=time_step, agent_out=agent_out, agent_state=agent_state + ) + self._agent_state = next_state + + # Do not add to trajectory or check for learning during evaluation. + if not is_evaluation: + self._traj.append(transition) + # Check for learning step. + if len(self._traj) >= self._unroll_len: + trajectory = jax.device_get(self._traj) + # trajectory = jax.tree_multimap(lambda *xs: np.stack(xs), *trajectory) + trajectory = jax.tree_util.tree_map( + lambda *xs: np.stack(xs), *trajectory + ) + self._batch.append(trajectory) + self._traj = self._traj[-1:] + if len(self._batch) >= self._batch_size: + self._learning_step() + + if time_step.last(): + return None + + assert 0 <= agent_out.action < self._num_actions + # TODO(author5): get probs from the policy in internal_step + probs = np.zeros(self._num_actions, dtype=np.float32) + probs[agent_out.action] = 1.0 + return rl_agent.StepOutput(action=agent_out.action, probs=probs) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/rrps_poprl/impala_jax_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/rrps_poprl/impala_jax_test.py new file mode 100644 index 0000000..40de0a3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/rrps_poprl/impala_jax_test.py @@ -0,0 +1,184 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import absltest +import jax +import numpy as np + +from open_spiel.python import rl_agent +from open_spiel.python.examples.rrps_poprl import impala +from open_spiel.python.examples.rrps_poprl import rl_environment +import pyspiel + + +# A simple two-action game encoded as an EFG game. Going left gets -1, going +# right gets a +1. +SIMPLE_EFG_DATA = """ + EFG 2 R "Simple single-agent problem" { "Player 1" } "" + p "ROOT" 1 1 "ROOT" { "L" "R" } 0 + t "L" 1 "Outcome L" { -1.0 } + t "R" 2 "Outcome R" { 1.0 } +""" + + +class FixedSequenceAgent(rl_agent.AbstractAgent): + """An example agent class.""" + + def __init__( + self, player_id, num_actions, sequence, name="fixed_sequence_agent" + ): + assert num_actions > 0 + self._player_id = player_id + self._num_actions = num_actions + self._sequence = sequence + self._seq_idx = 0 + + def step(self, time_step, is_evaluation=False): + # If it is the end of the episode, don't select an action. + if time_step.last(): + return + + probs = np.zeros(self._num_actions) + action = self._sequence[self._seq_idx] + self._seq_idx += 1 + if self._seq_idx >= len(self._sequence): + self._seq_idx = 0 + probs[action] = 1.0 + + return rl_agent.StepOutput(action=action, probs=probs) + + +class IMPALATest(absltest.TestCase): + + def test_simple_game(self): + game = pyspiel.load_efg_game(SIMPLE_EFG_DATA) + env = rl_environment.Environment(game=game) + max_abs_reward = max( + abs(env.game.min_utility()), abs(env.game.max_utility()) + ) + agent = impala.IMPALA( + 0, + state_representation_size=game.information_state_tensor_shape()[0], + num_actions=game.num_distinct_actions(), + num_players=game.num_players(), + unroll_len=20, + net_factory=impala.BasicRNN, + rng_key=jax.random.PRNGKey(42), + max_abs_reward=max_abs_reward, + learning_rate=5e-3, + hidden_layers_sizes=[16], + batch_size=5, + ) + + total_reward = 0 + for _ in range(1000): + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step) + time_step = env.step([agent_output.action]) + total_reward += time_step.rewards[0] + agent.step(time_step) + print(total_reward) + self.assertGreaterEqual(total_reward, 500) + + @absltest.skip("Takes too long to run, but does approach 1.") + def test_catch(self): + env = rl_environment.Environment("catch") + state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + num_players = env.num_players + max_abs_reward = max( + abs(env.game.min_utility()), abs(env.game.max_utility()) + ) + agent = impala.IMPALA( + 0, + state_representation_size=state_size, + num_actions=num_actions, + num_players=num_players, + unroll_len=20, + net_factory=impala.BasicRNN, + rng_key=jax.random.PRNGKey(42), + max_abs_reward=max_abs_reward, + learning_rate=5e-3, + hidden_layers_sizes=[16], + batch_size=2, + ) + + window_sum = 0 + window_width = 50 + window_tick = 0 + for ep in range(10000): + episode_reward = 0 + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step) + time_step = env.step([agent_output.action]) + episode_reward += time_step.rewards[0] + # print(f"Total reward: {total_reward}") + # avg_rew = total_reward / (ep + 1) + agent.step(time_step) + window_sum += episode_reward + window_tick += 1 + if window_tick >= window_width: + avg_window_reward = window_sum / window_width + window_tick = 0 + window_sum = 0 + print(f"Ep {ep}, avg window rew: {avg_window_reward}") + + @absltest.skip("Takes too long to run, but does approach 1000.") + def test_run_rps(self): + env = rl_environment.Environment( + f"repeated_game(stage_game=matrix_rps(),num_repetitions={pyspiel.ROSHAMBO_NUM_THROWS},recall=1)" + ) + state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + num_players = env.num_players + max_abs_reward = max( + abs(env.game.min_utility()), abs(env.game.max_utility()) + ) + agents = [ + impala.IMPALA( # pylint: disable=g-complex-comprehension + player_id, + state_representation_size=state_size, + num_actions=num_actions, + num_players=num_players, + unroll_len=20, + net_factory=impala.BasicRNN, + rng_key=jax.random.PRNGKey(seed), + max_abs_reward=max_abs_reward, + entropy=0.001, + learning_rate=0.001, + hidden_layers_sizes=[64, 32], + prediction_weight=0, + discount_factor=0.9, + batch_size=16, + ) + for (player_id, seed) in [(0, 238576517), (1, 738328671)] + ] + agents[0] = FixedSequenceAgent(0, num_actions, [0, 1, 2, 2, 1, 0]) + for ep in range(1000): + time_step = env.reset() + total_rewards = np.zeros(2, dtype=np.float32) + while not time_step.last(): + agent_outputs = [agents[0].step(time_step), agents[1].step(time_step)] + actions = [agent_outputs[0].action, agent_outputs[1].action] + time_step = env.step(actions) + total_rewards += np.array(time_step.rewards) + for agent in agents: + agent.step(time_step) + print(f"{ep} {total_rewards}") + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/rrps_poprl/poprl_main.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/rrps_poprl/poprl_main.py new file mode 100644 index 0000000..4a036ca --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/rrps_poprl/poprl_main.py @@ -0,0 +1,954 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Populatiron RL (PopRL) algorithm for repeated rock-paper-scissors. + +For details, see Lanctot et al. 2023 Population-based Evaluation in Repeated +Rock-Paper-Scissors as a Benchmark for Multiagent Reinforcement Learning +https://openreview.net/forum?id=gQnJ7ODIAx +""" + +import copy +import sys +import time + +from absl import app +from absl import flags +import jax +import numpy as np + +from open_spiel.python import rl_agent +from open_spiel.python import rl_tools +from open_spiel.python.algorithms import random_agent +from open_spiel.python.algorithms import tabular_qlearner +from open_spiel.python.examples.rrps_poprl import impala +from open_spiel.python.examples.rrps_poprl import rl_environment +from open_spiel.python.jax import boltzmann_dqn +from open_spiel.python.jax import dqn +from open_spiel.python.jax import policy_gradient +import pyspiel + +FLAGS = flags.FLAGS + +# Training parameters +flags.DEFINE_string( + "checkpoint_dir", + "/tmp/dqn_test", + "Directory to save/load the agent models.", +) +flags.DEFINE_integer( + "save_every", + int(1e4), + "Episode frequency at which the DQN agent models are saved.", +) +flags.DEFINE_integer( + "num_train_episodes", int(1e6), "Number of training episodes." +) +flags.DEFINE_integer( + "eval_every", + 100, + "Episode frequency at which the DQN agents are evaluated.", +) +flags.DEFINE_integer("eval_episodes", 1, "How many episodes to run per eval.") + +# DQN model hyper-parameters +flags.DEFINE_list( + "hidden_layers_sizes", + [256, 128], + "Number of hidden units in the Q-Network MLP.", +) +flags.DEFINE_integer( + "replay_buffer_capacity", int(1e5), "Size of the replay buffer." +) +flags.DEFINE_integer( + "batch_size", 16, "Number of transitions to sample at each learning step." +) +flags.DEFINE_float("learning_rate", 0.01, "Learning rate.") +flags.DEFINE_float("eta", 0.2, "BDQN eta param") + +# policy_gradient params +flags.DEFINE_float("critic_learning_rate", 0.001, "Critic Learning rate.") +flags.DEFINE_float("pi_learning_rate", 0.0001, "Pi learning rate.") +flags.DEFINE_float("entropy_cost", 0.001, "Entropy cost.") +flags.DEFINE_float("lambda_", 1.0, "PG lambda.") +flags.DEFINE_integer("num_critic_before_pi", 8, "Entropy cost.") + +# impala params +flags.DEFINE_integer("unroll_length", 20, "Unroll length.") +flags.DEFINE_float( + "prediction_weight", 0, "Weight to put on the prediction losses." +) + +# Main algorithm parameters +flags.DEFINE_integer("seed", 0, "Seed to use for everything") +flags.DEFINE_integer("window_size", 50, "Size of window for rolling average") +flags.DEFINE_integer("num_players", 2, "Numebr of players") +flags.DEFINE_string("game", "leduc_poker", "Game string") +flags.DEFINE_string("exploitee", "random", "Exploitee (random | first)") +flags.DEFINE_string("learner", "impala", "Learner (qlearning | dqn)") + +flags.DEFINE_integer("cp_freq", 10000, "Checkpoint save frequency.") +flags.DEFINE_string("cp_dir", None, "Checkpoint directory") + +# Testing against specific members +flags.DEFINE_integer("pop_only", -1, "Create a population of only this bot.") + +# Generalization. How many agents to leave ou of the training population and +# use for testing? +flags.DEFINE_integer("leave_out_set_size", 0, "Cross-validation test size.") + +# Environment recall +flags.DEFINE_integer("env_recall", 1, "How many timesteps back define obs?") + +flags.DEFINE_string("pred_logs_dir", None, "Directory to save prediction logs.") + +# Interactive mode +flags.DEFINE_string("interactive_mode", None, 'Bot id or "human".') + +flags.DEFINE_float("rm_epsilon", 0.1, "Exploration for regret-matching.") + +# Population RL +flags.DEFINE_float("prob_selfplay", 0.2, "Probability that we meet ourself") +flags.DEFINE_string("eval_checkpoint", None, "Evaluate a checkpoint") + +# Set this to something specific for testing. List of IDs +# Set back to None to use full population. +FIXED_POPULATION = None + + +class State: + + def __init__(self): + self.np_rng_state = None + self.learning_agents = None + self.ep = None + self.rolling_averager = None + self.expl_rolling_averagers = None + + +## This does not work, unfortunately. Not sure why. Simple pickle does not work +## because one of the haiku transforms is not serializable. There seems to be +## some nontrivial logic to use checkpointing when working with Haiku. +## Below is my attempt at applying a fix I found based on this thread: +## https://github.com/google-deepmind/dm-haiku/issues/18 but it didn't work. +class Checkpoint(object): + """A class for saving the state of the agent (and model).""" + + def __init__(self, checkpoint_dir): + self.checkpoint_dir = checkpoint_dir + self.state = State() + + def restore_or_save(self): + assert False, "Not implemented yet." + # filename = os.path.join(self.checkpoint_dir, "tree.pkl") + # if os.path.exists(filename): + # self.state = self.restore() + # else: + # # pickle.dump(self.state, filename) # Pickles to any file (even /cns). + # self.save() + + def restore(self): + assert False, "Not implemented yet." + # print("Restoring checkpoint") + # with open(os.path.join(self.checkpoint_dir, "tree.pkl"), "rb") as f: + # tree_struct = pickle.load(f) + # leaves, treedef = jax.tree_util.tree_flatten(tree_struct) + # with open(os.path.join(self.checkpoint_dir, "arrays.npy"), "rb") as f: + # flat_state = [np.load(f, allow_pickle=False) for _ in leaves] + # return jax.tree_util.tree_unflatten(treedef, flat_state) + + def save(self): + assert False, "Not implemented yet." + # print("Saving checkpoint") + # # filename = os.path.join(self.checkpoint_dir, "checkpoint.pkl") + # # pickle.dump(self.state, filename) # Pickles to any file (even /cns). + # with open(os.path.join(self.checkpoint_dir, "arrays.npy"), "wb") as f: + # for x in jax.tree_util.tree_leaves(self.state): + # np.save(f, x, allow_pickle=False) + # tree_struct = jax.tree_util.tree_map(lambda t: 0, self.state) + # with open(os.path.join(self.checkpoint_dir, "tree.pkl"), "wb") as f: + # pickle.dump(tree_struct, f) + + +class PredictionLogger(object): + """A prediction logger.""" + + def __init__(self, log_dir): + self._log_dir = log_dir + self._enabled = self._log_dir is not None + self._logs = {} + self._cur_log = "" + self._freq = 1000 + self._last_log = 0 + + def new_log(self, training_episodes): + if not self._enabled: + return + if training_episodes - self._last_log >= self._freq: + self._cur_log = "" + self._cur_step = 0 + + def log(self, training_episodes, pop_idx, predictions): + if not self._enabled: + return + if training_episodes - self._last_log >= self._freq: + line = f"{training_episodes} {pop_idx} {self._cur_step}" + for i in range(len(predictions)): + line = line + f" {predictions[i]}" + self._cur_log += line + "\n" + self._cur_step += 1 + + def end_log(self, training_episodes, pop_idx): + if not self._enabled: + return + if training_episodes - self._last_log >= self._freq: + key = f"{training_episodes}.{pop_idx}" + self._logs[key] = self._cur_log + + def update_training_episodes(self, training_episodes): + if not self._enabled: + return + if training_episodes - self._last_log >= self._freq: + self._last_log = training_episodes + + +def last_predictions(agent): + if hasattr(agent, "last_predictions"): + return agent.last_predictions() + else: + return np.zeros(pyspiel.ROSHAMBO_NUM_BOTS) + + +def eval_agent( + env, + num_players, + num_actions, + bot_names, + learning_agent, + prediction_logger, + num_training_episodes, +): + """Evaluate the agent.""" + sum_episode_rewards = np.zeros(num_players) + pop_expl = np.zeros(pyspiel.ROSHAMBO_NUM_BOTS) + for pop_idx in range(len(bot_names)): + bot_id = pop_idx + bot_name = bot_names[bot_id] + bot = pyspiel.make_roshambo_bot(0, bot_name) + pop_agent = BotAgent(num_actions, bot, name=bot_name) + + if hasattr(learning_agent, "restart"): + learning_agent.restart() + + agents = [pop_agent, learning_agent] + env.set_prediction_label(pop_idx) + + time_step = env.reset() + episode_rewards = np.zeros(num_players) + turn_num = 0 + prediction_logger.new_log(num_training_episodes) + + while not time_step.last(): + turn_num += 1 + player_id = time_step.observations["current_player"] + if env.is_turn_based: + agent_output = agents[player_id].step(time_step, is_evaluation=True) + action_list = [agent_output.action] + else: + agents_output = [ + agent.step(time_step, is_evaluation=True) for agent in agents + ] + action_list = [agent_output.action for agent_output in agents_output] + prediction_logger.log( + num_training_episodes, pop_idx, last_predictions(learning_agent) + ) + time_step = env.step(action_list) + episode_rewards += time_step.rewards + pop_expl[pop_idx] = episode_rewards[0] + sum_episode_rewards += episode_rewards + prediction_logger.end_log(num_training_episodes, pop_idx) + prediction_logger.update_training_episodes(num_training_episodes) + return sum_episode_rewards / len(bot_names), pop_expl + + +class HumanAgent(rl_agent.AbstractAgent): + """Agent class that wraps a bot. + + Note, the environment must include the OpenSpiel state. + """ + + def __init__(self, num_actions, name="human_agent"): + assert num_actions > 0 + self._num_actions = num_actions + + def step(self, time_step, is_evaluation=False): + action = 5 + while action > 2: + value_str = input("Choose an action: ") + if value_str == "R": + action = 0 + if value_str == "P": + action = 1 + if value_str == "S": + action = 2 + if value_str == "q": + action = -1 + probs = np.zeros(self._num_actions) + if action >= 0: + probs[action] = 1.0 + return rl_agent.StepOutput(action=action, probs=probs) + + +def pretty_top10_preds_str(predictions, indices, max_weight=1.01): + """Pretty string representation of the top 10 predictions.""" + + top_10_preds = "" + sum_weight = 0 + for i in range(10): + pred_idx = indices[42 - i] + weight = predictions[pred_idx] + bar_width = int(weight / 0.01) + bar_str = "#" * bar_width + top_10_preds += f" {pred_idx:2d}: {weight:.5f} {bar_str}\n" + sum_weight += weight + if sum_weight > max_weight: + break + return top_10_preds + + +def interactive_episode( + env, num_players, num_actions, bot_names, learning_agent +): + """Interactive Episode.""" + print("Starting interactive episode!") + actions_str = ["R", "P", "S"] + actions_seq = ["", ""] + + if FLAGS.interactive_mode == "human": + pop_agent = HumanAgent(num_actions) + pop_idx = -1 + else: + test_pop_ids = [int(FLAGS.interactive_mode)] + pop_agent, pop_idx = sample_bot_agent(bot_names, test_pop_ids, num_actions) + print(f"Sampled bot {pop_idx} ({bot_names[pop_idx]})") + + agents = [pop_agent, learning_agent] + + time_step = env.reset() + episode_rewards = np.zeros(num_players) + turn_num = 0 + + while not time_step.last(): + player_id = time_step.observations["current_player"] + if env.is_turn_based: + agent_output = agents[player_id].step(time_step, is_evaluation=True) + action_list = [agent_output.action] + else: + agents_output = [ + agent.step(time_step, is_evaluation=True) for agent in agents + ] + action_list = [agent_output.action for agent_output in agents_output] + if action_list[0] == -1: + # Restart episode. + print("Restarting episode.") + interactive_episode( + env, num_players, num_actions, bot_names, learning_agent + ) + return + action_list_str = [actions_str[int(x)] for x in action_list] + actions_seq[0] += action_list_str[0] + actions_seq[1] += action_list_str[1] + predictions = last_predictions(learning_agent) + indices = np.argsort(predictions) + top_10_preds = pretty_top10_preds_str(predictions, indices, max_weight=0.75) + time_step = env.step(action_list) + episode_rewards += time_step.rewards + print( + f"Turn {turn_num}, Prev actions: {action_list_str}, " + + f"Rewards: {time_step.rewards}, Returns: {episode_rewards} \n" + + f"Action Seq [0]: {actions_seq[0]} \n" + + f"Action Seq [1]: {actions_seq[1]}" + ) + print(f"Top 10 predictions: \n{top_10_preds}") + if FLAGS.interactive_mode != "human": + # Prompt to continue. + input("Press any key:") + turn_num += 1 + + +class ConstantActionAgent(rl_agent.AbstractAgent): + """An example agent class.""" + + def __init__( + self, player_id, num_actions, action_idx, name="constant_action_agent" + ): + assert num_actions > 0 + self._player_id = player_id + self._num_actions = num_actions + self._action_idx = action_idx + + def step(self, time_step, is_evaluation=False): + # If it is the end of the episode, don't select an action. + if time_step.last(): + return + + cur_legal_actions = time_step.observations["legal_actions"][self._player_id] + action = cur_legal_actions[self._action_idx] + probs = np.zeros(self._num_actions) + probs[action] = 1.0 + return rl_agent.StepOutput(action=action, probs=probs) + + +class RegretMatchingAgent(rl_agent.AbstractAgent): + """TODO(author5): finish this agent.""" + + def __init__( + self, + player_id, + num_actions, + epsilon, + constant_observation=None, + name="regret_matching_agent", + ): + assert num_actions > 0 + self._player_id = player_id + self._num_actions = num_actions + self._regrets = {} + self._prev_info_state = None + self._prev_action = None + self._prev_sample_policy = None + self._prev_rm_policy = None + self._epsilon = epsilon + self._prev_legal_actions = None + self._constant_observation = constant_observation + + def _get_info_state_key(self, info_state): + return ( + self._constant_observation + if self._constant_observation is not None + else info_state + ) + + def _get_rm_policy(self, uniform_policy, info_state, legal_actions): + info_state_key = self._get_info_state_key(info_state) + regrets = self._regrets.get(info_state_key, None) + if regrets is None: + regrets = np.zeros(self._num_actions, dtype=np.float64) + regrets[legal_actions] = 0.000001 + self._regrets[info_state_key] = regrets + rm_policy = regrets.copy() + rm_policy[rm_policy < 0] = 0.0 + denom = rm_policy.sum() + if denom <= 0: + rm_policy = uniform_policy + else: + rm_policy /= denom + return rm_policy + + def _get_action_probs(self, info_state, legal_actions, epsilon): + uniform_policy = np.zeros(self._num_actions, dtype=np.float64) + uniform_policy[legal_actions] = 1.0 / len(legal_actions) + rm_policy = self._get_rm_policy(uniform_policy, info_state, legal_actions) + sample_policy = epsilon * uniform_policy + (1 - epsilon) * rm_policy + # print(sample_policy) + action = np.random.choice(np.arange(self._num_actions), p=sample_policy) + return action, sample_policy, rm_policy + + def step(self, time_step, is_evaluation=False): + legal_actions = time_step.observations["legal_actions"][self._player_id] + info_state = str(time_step.observations["info_state"][self._player_id]) + info_state_key = self._get_info_state_key(info_state) + sampled_action, probs = None, None + + if not time_step.last(): + epsilon = 0.0 if is_evaluation else self._epsilon + sampled_action, sample_policy, rm_policy = self._get_action_probs( + info_state, legal_actions, epsilon + ) + + # Learn step: don't learn during evaluation or at first agent steps. + if self._prev_sample_policy is not None and not is_evaluation: + reward = time_step.rewards[self._player_id] + values = np.zeros(self._num_actions, dtype=np.float64) + values[self._prev_action] = ( + reward / self._prev_sample_policy[self._prev_action] + ) + exp_value = np.dot(values, self._prev_rm_policy) + for action in legal_actions: + self._regrets[self._prev_info_state_key][action] += ( + values[action] - exp_value + ) + + if time_step.last(): # prepare for the next episode. + self._prev_sample_policy = None + return + + if not is_evaluation: + self._prev_info_state_key = info_state_key + self._prev_action = sampled_action + self._prev_sample_policy = sample_policy + self._prev_rm_policy = rm_policy + self._prev_legal_actions = legal_actions + self._prev_info_state = info_state + + return rl_agent.StepOutput(action=sampled_action, probs=probs) + + +class BotAgent(rl_agent.AbstractAgent): + """Agent class that wraps a bot. + + Note, the environment must include the OpenSpiel state. + """ + + def __init__(self, num_actions, bot, name="bot_agent"): + assert num_actions > 0 + self._bot = bot + self._num_actions = num_actions + + def restart(self): + self._bot.restart() + + def step(self, time_step, is_evaluation=False): + # If it is the end of the episode, don't select an action. + if time_step.last(): + return + + _, state = pyspiel.deserialize_game_and_state( + time_step.observations["serialized_state"] + ) + + action = self._bot.step(state) + probs = np.zeros(self._num_actions) + probs[action] = 1.0 + + return rl_agent.StepOutput(action=action, probs=probs) + + +def create_training_agent( + agent_type, + num_actions, + info_state_size, + hidden_layers_sizes, + max_abs_reward, + rng_seed, + player_id, +): + """Create training agent.""" + if agent_type == "dqn": + return dqn.DQN( + player_id=player_id, + state_representation_size=info_state_size, + num_actions=num_actions, + discount_factor=0.99, + epsilon_start=1.0, + epsilon_end=0.1, + hidden_layers_sizes=hidden_layers_sizes, + learning_rate=FLAGS.learning_rate, + replay_buffer_capacity=FLAGS.replay_buffer_capacity, + batch_size=FLAGS.batch_size, + ) + elif agent_type == "bdqn": + return boltzmann_dqn.BoltzmannDQN( + player_id=player_id, + state_representation_size=info_state_size, + num_actions=num_actions, + discount_factor=0.99, + epsilon_start=1.0, + epsilon_end=0.1, + hidden_layers_sizes=hidden_layers_sizes, + learning_rate=FLAGS.learning_rate, + replay_buffer_capacity=FLAGS.replay_buffer_capacity, + batch_size=FLAGS.batch_size, + eta=FLAGS.eta, + seed=FLAGS.seed, + ) + elif agent_type == "qlearning": + return tabular_qlearner.QLearner( + player_id=player_id, + num_actions=num_actions, + step_size=FLAGS.learning_rate, + epsilon_schedule=rl_tools.LinearSchedule(0.5, 0.2, 1000000), + discount_factor=0.99, + ) + elif agent_type == "a2c": + return policy_gradient.PolicyGradient( + player_id, + info_state_size, + num_actions, + loss_str="a2c", + critic_learning_rate=FLAGS.critic_learning_rate, + pi_learning_rate=FLAGS.pi_learning_rate, + entropy_cost=FLAGS.entropy_cost, + num_critic_before_pi=FLAGS.num_critic_before_pi, + lambda_=FLAGS.lambda_, + additional_discount_factor=0.99, + hidden_layers_sizes=hidden_layers_sizes, + ) + elif agent_type == "impala": + return impala.IMPALA( # pylint: disable=g-complex-comprehension + player_id=player_id, + state_representation_size=info_state_size, + num_actions=num_actions, + num_players=2, + unroll_len=FLAGS.unroll_length, + net_factory=impala.BasicRNN, + rng_key=jax.random.PRNGKey(rng_seed), + max_abs_reward=max_abs_reward, + learning_rate=FLAGS.pi_learning_rate, + entropy=FLAGS.entropy_cost, + hidden_layers_sizes=hidden_layers_sizes, + num_predictions=pyspiel.ROSHAMBO_NUM_BOTS + 1, + prediction_weight=FLAGS.prediction_weight, + batch_size=FLAGS.batch_size, + ) + elif agent_type == "rm": + return RegretMatchingAgent( + player_id=player_id, num_actions=num_actions, epsilon=FLAGS.rm_epsilon + ) + elif agent_type == "rock": + return ConstantActionAgent(player_id, num_actions, 0) + elif agent_type == "paper": + return ConstantActionAgent(player_id, num_actions, 1) + elif agent_type == "scissors": + return ConstantActionAgent(player_id, num_actions, 2) + elif agent_type == "uniform": + return random_agent.RandomAgent(player_id, num_actions) + else: + assert False + + +def sample_bot_agent(pid, bot_names, population_ids, num_actions): + idx = np.random.randint(0, len(population_ids)) + bot_id = population_ids[idx] + name = bot_names[bot_id] + bot = pyspiel.make_roshambo_bot(pid, name) + return BotAgent(num_actions, bot, name=name), bot_id + + +class RollingAverage(object): + """Class to store a rolling average.""" + + def __init__(self, size=100): + self._size = size + self._values = np.array([0] * self._size, dtype=np.float64) + self._index = 0 + self._total_additions = 0 + + def add(self, value): + self._values[self._index] = value + self._total_additions += 1 + self._index = (self._index + 1) % self._size + + def mean(self): + n = min(self._size, self._total_additions) + if n == 0: + return 0 + return self._values.sum() / n + + +def train_test_split(roshambo_bot_ids): + """Create a train/test split for the roshambo bots.""" + + if FIXED_POPULATION is not None: + training_ids = FIXED_POPULATION[:] + testing_ids = FIXED_POPULATION[:] + elif FLAGS.pop_only >= 0: + # If the pop_only flag is set, make a population of just that member + assert FLAGS.pop_only < len(roshambo_bot_ids) + training_ids = [FLAGS.pop_only] + testing_ids = [FLAGS.pop_only] + else: + # Otherwise, do the train/test split + bot_ids_copy = roshambo_bot_ids.copy() + training_ids = list(bot_ids_copy.values()) + testing_ids = [] + if FLAGS.leave_out_set_size == 0: + testing_ids = training_ids[:] + else: + while len(testing_ids) < FLAGS.leave_out_set_size: + idx = np.random.randint(0, len(training_ids)) + testing_ids.append(training_ids[idx]) + training_ids.pop(idx) + return training_ids, testing_ids + + +def print_roshambo_bot_names_and_ids(roshambo_bot_names): + for i, name in enumerate(roshambo_bot_names): + print(f"{i}: {name}") + + +class AgentBot(pyspiel.Bot): + """An agent that wraps a bot.""" + + def __init__(self, agent): + pyspiel.Bot.__init__(self) + self._agent = agent + self._env = rl_environment.Environment( + "repeated_game(stage_game=matrix_rps(),num_repetitions=" + + f"{pyspiel.ROSHAMBO_NUM_THROWS}," + + f"recall={FLAGS.env_recall})", + include_full_state=True, + ) + + def step(self, state): + self._env.set_state(state) + time_step = self._env.get_time_step() + agent_output = self._agent.step(time_step, is_evaluation=True) + return agent_output.action + + +def eval_checkpoint(roshambo_bot_names, prediction_logger): + """Evaluate a checkpoint.""" + + print("Starting eval checkpoint") + print("Loading checkpoint") + checkpoint = Checkpoint(FLAGS.eval_checkpoint) + checkpoint.restore_or_save() + assert checkpoint.state.learning_agents is not None + print("Checkpoint loaded") + greenberg_bot = pyspiel.make_roshambo_bot(1, "greenberg") + greenberg_agent = BotAgent(3, greenberg_bot, name="greenberg_agent") + print("Starting eval for agent...") + env = rl_environment.Environment( + "repeated_game(stage_game=matrix_rps(),num_repetitions=" + + f"{pyspiel.ROSHAMBO_NUM_THROWS}," + + f"recall={FLAGS.env_recall})", + include_full_state=True, + ) + sum_eval_returns = np.zeros(pyspiel.ROSHAMBO_NUM_BOTS) + for j in range(50): + print(f"Eval checkpoint, j={j}") + _, pop_expl = eval_agent( + env, + 2, + 3, + roshambo_bot_names, + # checkpoint.state.learning_agents[1], + greenberg_agent, + prediction_logger, + 0, + ) + eval_returns = (-1) * pop_expl + sum_eval_returns += eval_returns + avg_eval_returns = sum_eval_returns / (j + 1) + pop_return = avg_eval_returns.sum() / pyspiel.ROSHAMBO_NUM_BOTS + wp_expl = avg_eval_returns.min() * (-1) + print(f"Pop return: {pop_return}, WP expl: {wp_expl}") + print(avg_eval_returns) + + +def main(_): + np.random.seed(FLAGS.seed) + + envs = [None, None] + envs[0] = rl_environment.Environment( + "repeated_game(stage_game=matrix_rps(),num_repetitions=" + + f"{pyspiel.ROSHAMBO_NUM_THROWS}," + + f"recall={FLAGS.env_recall})", + include_full_state=True, + ) + envs[1] = rl_environment.Environment( + "repeated_game(stage_game=matrix_rps(),num_repetitions=" + + f"{pyspiel.ROSHAMBO_NUM_THROWS}," + + f"recall={FLAGS.env_recall})", + include_full_state=True, + ) + num_players = 2 + max_abs_reward = max( + abs(envs[0].game.min_utility()), abs(envs[0].game.max_utility()) + ) + + info_state_size = envs[0].observation_spec()["info_state"][0] + num_actions = envs[0].action_spec()["num_actions"] + + print("Loading population...") + pop_size = pyspiel.ROSHAMBO_NUM_BOTS + print(f"Population size: {pop_size}") + roshambo_bot_names = pyspiel.roshambo_bot_names() + roshambo_bot_names.sort() + print_roshambo_bot_names_and_ids(roshambo_bot_names) + + bot_id = 0 + roshambo_bot_ids = {} + for name in roshambo_bot_names: + roshambo_bot_ids[name] = bot_id + bot_id += 1 + + print(f"Leave out set size: {FLAGS.leave_out_set_size}") + train_pop_ids, test_pop_ids = train_test_split(roshambo_bot_ids) + print(f"Training ids: {train_pop_ids}") + print(f"Test pop ids: {test_pop_ids}") + + if FLAGS.eval_checkpoint is not None: + prediction_logger = PredictionLogger(FLAGS.pred_logs_dir) + eval_checkpoint(roshambo_bot_names, prediction_logger) + return + + rolling_averager = RollingAverage(FLAGS.window_size) + expl_rolling_averagers = [] + for _ in range(pyspiel.ROSHAMBO_NUM_BOTS): + expl_rolling_averagers.append(RollingAverage(FLAGS.window_size)) + + print("Looking for checkpoint.") + if FLAGS.cp_dir is None: + print("cp_dir is None, disabling checkpointing.") + # checkpoint = phoenix.Checkpoint() + checkpoint = None + else: + print(f"Looking for checkpoint in {FLAGS.cp_dir}") + checkpoint = Checkpoint(FLAGS.cp_dir) + checkpoint.restore_or_save() + print(f"Checkpoint loaded. ep = {checkpoint.state.ep}") + + if FLAGS.interactive_mode is not None: + # Must restore an agent from a checkpoint + assert checkpoint.state.ep is not None + assert checkpoint.state.learning_agents is not None + interactive_episode( + envs[0], + num_players, + num_actions, + roshambo_bot_names, + checkpoint.state.learning_agent, + ) + + ep = None + if checkpoint is not None: + ep = checkpoint.state.ep + if checkpoint.state.rolling_averager is not None: + rolling_averager = checkpoint.state.rolling_averager + if checkpoint.state.expl_rolling_averagers is not None: + expl_rolling_averagers = checkpoint.state.expl_rolling_averagers + if checkpoint.state.np_rng_state is not None: + print("Restoring numpy random state") + np.random.set_state(checkpoint.state.np_rng_state) + + if ep is None: + ep = 0 + prediction_logger = PredictionLogger(FLAGS.pred_logs_dir) + if FLAGS.pred_logs_dir is not None: + pass # TODO(author5): Add back in (make full director) + + hidden_layers_sizes = [int(l) for l in FLAGS.hidden_layers_sizes] + # pylint: disable=g-complex-comprehension + if checkpoint is None or checkpoint.state.learning_agents is None: + learning_agents = [ + create_training_agent( + FLAGS.learner, + num_actions, + info_state_size, + hidden_layers_sizes, + max_abs_reward, + np.random.randint(100000000), + player_id, + ) + for player_id in [0, 1] + ] + else: + learning_agents = checkpoint.state.learning_agents + + print(f"Starting at ep {ep}.") + total_train_time = 0 + + print("Starting training loop...") + while ep < FLAGS.num_train_episodes: + # Checkpoint save. + if checkpoint is not None and ep > 0 and ep % FLAGS.cp_freq == 0: + print("") + print(f"Saving checkpoint at ep {ep}...") + checkpoint.state.ep = ep + checkpoint.state.np_rng_state = np.random.get_state() + checkpoint.state.learning_agents = learning_agents + checkpoint.state.rolling_averager = rolling_averager + checkpoint.state.expl_rolling_averagers = expl_rolling_averagers + checkpoint.save() + print("Done saving checkpoint.") + + if (ep + 1) % FLAGS.eval_every == 0: + print("") + eps_per_sec = (ep + 1) / total_train_time + print(f"Starting eval at ep {ep}. Avg train eps per sec: {eps_per_sec}") + start_time_eval = time.time() + eval_returns, pop_expl = eval_agent( + envs[0], + num_players, + num_actions, + roshambo_bot_names, + learning_agents[1], + prediction_logger, + ep + 1, + ) + value = eval_returns[1] + rolling_averager.add(value) + max_pop_exp = -1000 + for i in range(pyspiel.ROSHAMBO_NUM_BOTS): + expl_rolling_averagers[i].add(pop_expl[i]) + max_pop_exp = max(max_pop_exp, expl_rolling_averagers[i].mean()) + r_mean = rolling_averager.mean() + end_time_eval = time.time() + print(f"Time for eval: {end_time_eval - start_time_eval}") + data = { + "episodes": ep + 1, + "value": value, + "swa_value": r_mean, + "expl_swa_value": max_pop_exp, + "agg_score_swa": r_mean - max_pop_exp, + "eps_per_sec": eps_per_sec, + } + print(data) + sys.stdout.flush() + + ep_start_time = time.time() + for learner_pid in range(2): + agents = [None, None] + agents[learner_pid] = learning_agents[learner_pid] + env = envs[learner_pid] + assert env is not None + # print(f"Learner pid: {learner_pid}") + roll = np.random.uniform() + + if roll < FLAGS.prob_selfplay: + agents[1 - learner_pid] = learning_agents[1 - learner_pid] + env.set_prediction_label(pyspiel.ROSHAMBO_NUM_BOTS) + else: + pop_agent, pop_idx = sample_bot_agent( + 1 - learner_pid, roshambo_bot_names, train_pop_ids, num_actions + ) + agents[1 - learner_pid] = pop_agent + env.set_prediction_label(pop_idx) + + time_step = env.reset() + while not time_step.last(): + time_step2 = copy.deepcopy(time_step) + player_id = time_step.observations["current_player"] + agents_output = [agents[0].step(time_step), agents[1].step(time_step2)] + action_list = [agent_output.action for agent_output in agents_output] + time_step = env.step(action_list) + + # Episode is over, step all agents with final info state. + time_step2 = copy.deepcopy(time_step) + assert agents[0] is not None + assert agents[1] is not None + agents[0].step(time_step) + agents[1].step(time_step2) + + ep_end_time = time.time() + total_train_time += ep_end_time - ep_start_time + ep += 1 + print(".", end="") + sys.stdout.flush() + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/rrps_poprl/rl_environment.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/rrps_poprl/rl_environment.py new file mode 100644 index 0000000..fc103dc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/rrps_poprl/rl_environment.py @@ -0,0 +1,513 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Reinforcement Learning (RL) Environment for Open Spiel. + +This module wraps Open Spiel Python interface providing an RL-friendly API. It +covers both turn-based and simultaneous move games. Interactions between agents +and the underlying game occur mostly through the `reset` and `step` methods, +which return a `TimeStep` structure (see its docstrings for more info). + +The following example illustrates the interaction dynamics. Consider a 2-player +Kuhn Poker (turn-based game). Agents have access to the `observations` (a dict) +field from `TimeSpec`, containing the following members: + * `info_state`: list containing the game information state for each player. The + size of the list always correspond to the number of players. E.g.: + [[0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0]]. + * `legal_actions`: list containing legal action ID lists (one for each player). + E.g.: [[0, 1], [0]], which corresponds to actions 0 and 1 being valid for + player 0 (the 1st player) and action 0 being valid for player 1 (2nd player). + * `current_player`: zero-based integer representing the player to make a move. + +At each `step` call, the environment expects a singleton list with the action +(as it's a turn-based game), e.g.: [1]. This (zero-based) action must correspond +to the player specified at `current_player`. The game (which is at decision +node) will process the action and take as many steps necessary to cover chance +nodes, halting at a new decision or final node. Finally, a new `TimeStep`is +returned to the agent. + +Simultaneous-move games follow analogous dynamics. The only differences is the +environment expects a list of actions, one per player. Note the `current_player` +field is "irrelevant" here, admitting a constant value defined in spiel.h, which +defaults to -2 (module level constant `SIMULTANEOUS_PLAYER_ID`). + +See open_spiel/python/examples/rl_example.py for example usages. +""" + +import collections +import enum + +from absl import logging +import numpy as np + +import pyspiel + +SIMULTANEOUS_PLAYER_ID = pyspiel.PlayerId.SIMULTANEOUS + + +class TimeStep( + collections.namedtuple( + "TimeStep", ["observations", "rewards", "discounts", "step_type"] + ) +): + """Returned with every call to `step` and `reset`. + + A `TimeStep` contains the data emitted by a game at each step of interaction. + A `TimeStep` holds an `observation` (list of dicts, one per player), + associated lists of `rewards`, `discounts` and a `step_type`. + + The first `TimeStep` in a sequence will have `StepType.FIRST`. The final + `TimeStep` will have `StepType.LAST`. All other `TimeStep`s in a sequence will + have `StepType.MID. + + Attributes: + observations: a list of dicts containing observations per player. + rewards: A list of scalars (one per player), or `None` if `step_type` is + `StepType.FIRST`, i.e. at the start of a sequence. + discounts: A list of discount values in the range `[0, 1]` (one per player), + or `None` if `step_type` is `StepType.FIRST`. + step_type: A `StepType` enum value. + """ + + __slots__ = () + + def first(self): + return self.step_type == StepType.FIRST + + def mid(self): + return self.step_type == StepType.MID + + def last(self): + return self.step_type == StepType.LAST + + def is_simultaneous_move(self): + return self.observations["current_player"] == SIMULTANEOUS_PLAYER_ID + + def current_player(self): + return self.observations["current_player"] + + +class StepType(enum.IntEnum): + """Defines the status of a `TimeStep` within a sequence.""" + + FIRST = 0 # Denotes the first `TimeStep` in a sequence. + MID = 1 # Denotes any `TimeStep` in a sequence that is not FIRST or LAST. + LAST = 2 # Denotes the last `TimeStep` in a sequence. + + def first(self): + return self is StepType.FIRST + + def mid(self): + return self is StepType.MID + + def last(self): + return self is StepType.LAST + + +# Global pyspiel members +def registered_games(): + return pyspiel.registered_games() + + +class ChanceEventSampler(object): + """Default sampler for external chance events.""" + + def __init__(self, seed=None): + self.seed(seed) + + def seed(self, seed=None): + self._rng = np.random.RandomState(seed) + + def __call__(self, state): + """Sample a chance event in the given state.""" + actions, probs = zip(*state.chance_outcomes()) + return self._rng.choice(actions, p=probs) + + +class ObservationType(enum.Enum): + """Defines what kind of observation to use.""" + + OBSERVATION = 0 # Use observation_tensor + INFORMATION_STATE = 1 # Use information_state_tensor + + +class Environment(object): + """Open Spiel reinforcement learning environment class.""" + + def __init__( + self, + game, + discount=1.0, + chance_event_sampler=None, + observation_type=None, + include_full_state=False, + mfg_distribution=None, + mfg_population=None, + enable_legality_check=False, + **kwargs, + ): + """Constructor. + + Args: + game: [string, pyspiel.Game] Open Spiel game name or game instance. + discount: float, discount used in non-initial steps. Defaults to 1.0. + chance_event_sampler: optional object with `sample_external_events` method + to sample chance events. + observation_type: what kind of observation to use. If not specified, will + default to INFORMATION_STATE unless the game doesn't provide it. + include_full_state: whether or not to include the full serialized + OpenSpiel state in the observations (sometimes useful for debugging). + mfg_distribution: the distribution over states if the game is a mean field + game. + mfg_population: The Mean Field Game population to consider. + enable_legality_check: Check the legality of the move before stepping. + **kwargs: dict, additional settings passed to the Open Spiel game. + """ + self._chance_event_sampler = chance_event_sampler or ChanceEventSampler() + self._include_full_state = include_full_state + self._mfg_distribution = mfg_distribution + self._mfg_population = mfg_population + self._enable_legality_check = enable_legality_check + self._prediction_label = 0 + + if isinstance(game, str): + if kwargs: + game_settings = {key: val for (key, val) in kwargs.items()} + logging.info("Using game settings: %s", game_settings) + self._game = pyspiel.load_game(game, game_settings) + else: + logging.info("Using game string: %s", game) + self._game = pyspiel.load_game(game) + else: # pyspiel.Game or API-compatible object. + logging.info("Using game instance: %s", game.get_type().short_name) + self._game = game + + self._num_players = self._game.num_players() + self._state = None + self._should_reset = True + + # Discount returned at non-initial steps. + self._discounts = [discount] * self._num_players + + # Determine what observation type to use. + if observation_type is None: + if self._game.get_type().provides_information_state_tensor: + observation_type = ObservationType.INFORMATION_STATE + else: + observation_type = ObservationType.OBSERVATION + + # Check the requested observation type is supported. + if observation_type == ObservationType.OBSERVATION: + if not self._game.get_type().provides_observation_tensor: + raise ValueError(f"observation_tensor not supported by {game}") + elif observation_type == ObservationType.INFORMATION_STATE: + if not self._game.get_type().provides_information_state_tensor: + raise ValueError(f"information_state_tensor not supported by {game}") + self._use_observation = observation_type == ObservationType.OBSERVATION + + if self._game.get_type().dynamics == pyspiel.GameType.Dynamics.MEAN_FIELD: + assert mfg_distribution is not None + assert mfg_population is not None + assert 0 <= mfg_population < self._num_players + + def seed(self, seed=None): + self._chance_event_sampler.seed(seed) + + def set_prediction_label(self, label): + self._prediction_label = label + + def get_time_step(self): + """Returns a `TimeStep` without updating the environment. + + Returns: + A `TimeStep` namedtuple containing: + observation: list of dicts containing one observations per player, each + corresponding to `observation_spec()`. + reward: list of rewards at this timestep, or None if step_type is + `StepType.FIRST`. + discount: list of discounts in the range [0, 1], or None if step_type is + `StepType.FIRST`. + step_type: A `StepType` value. + """ + observations = { + "info_state": [], + "legal_actions": [], + "current_player": [], + "serialized_state": [], + "prediction_label": [], + } + rewards = [] + step_type = StepType.LAST if self._state.is_terminal() else StepType.MID + self._should_reset = step_type == StepType.LAST + + cur_rewards = self._state.rewards() + for player_id in range(self.num_players): + rewards.append(cur_rewards[player_id]) + observations["info_state"].append( + self._state.observation_tensor(player_id) + if self._use_observation + else self._state.information_state_tensor(player_id) + ) + + observations["legal_actions"].append(self._state.legal_actions(player_id)) + observations["current_player"] = self._state.current_player() + discounts = self._discounts + if step_type == StepType.LAST: + # When the game is in a terminal state set the discount to 0. + discounts = [0.0 for _ in discounts] + + if self._include_full_state: + observations["serialized_state"] = pyspiel.serialize_game_and_state( + self._game, self._state + ) + + # For gym environments + if hasattr(self._state, "last_info"): + observations["info"] = self._state.last_info + observations["prediction_label"] = self._prediction_label + + return TimeStep( + observations=observations, + rewards=rewards, + discounts=discounts, + step_type=step_type, + ) + + def _check_legality(self, actions): + if self.is_turn_based: + legal_actions = self._state.legal_actions() + if actions[0] not in legal_actions: + raise RuntimeError(f"step() called on illegal action {actions[0]}") + else: + for p in range(len(actions)): + legal_actions = self._state.legal_actions(p) + if legal_actions and actions[p] not in legal_actions: + raise RuntimeError( + f"step() by player {p} called on illegal " + + f"action: {actions[p]}" + ) + + def step(self, actions): + """Updates the environment according to `actions` and returns a `TimeStep`. + + If the environment returned a `TimeStep` with `StepType.LAST` at the + previous step, this call to `step` will start a new sequence and `actions` + will be ignored. + + This method will also start a new sequence if called after the environment + has been constructed and `reset` has not been called. Again, in this case + `actions` will be ignored. + + Args: + actions: a list containing one action per player, following specifications + defined in `action_spec()`. + + Returns: + A `TimeStep` namedtuple containing: + observation: list of dicts containing one observations per player, each + corresponding to `observation_spec()`. + reward: list of rewards at this timestep, or None if step_type is + `StepType.FIRST`. + discount: list of discounts in the range [0, 1], or None if step_type is + `StepType.FIRST`. + step_type: A `StepType` value. + """ + assert ( + len(actions) == self.num_actions_per_step + ), "Invalid number of actions! Expected {}".format( + self.num_actions_per_step + ) + if self._should_reset: + return self.reset() + + if self._enable_legality_check: + self._check_legality(actions) + + if self.is_turn_based: + self._state.apply_action(actions[0]) + else: + self._state.apply_actions(actions) + self._sample_external_events() + + return self.get_time_step() + + def reset(self): + """Starts a new sequence and returns the first `TimeStep` of this sequence. + + Returns: + A `TimeStep` namedtuple containing: + observations: list of dicts containing one observations per player, each + corresponding to `observation_spec()`. + rewards: list of rewards at this timestep, or None if step_type is + `StepType.FIRST`. + discounts: list of discounts in the range [0, 1], or None if step_type + is `StepType.FIRST`. + step_type: A `StepType` value. + """ + self._should_reset = False + if ( + self._game.get_type().dynamics == pyspiel.GameType.Dynamics.MEAN_FIELD + and self._num_players > 1 + ): + self._state = self._game.new_initial_state_for_population( + self._mfg_population + ) + else: + self._state = self._game.new_initial_state() + self._sample_external_events() + + observations = { + "info_state": [], + "legal_actions": [], + "current_player": [], + "serialized_state": [], + "prediction_label": self._prediction_label, + } + for player_id in range(self.num_players): + observations["info_state"].append( + self._state.observation_tensor(player_id) + if self._use_observation + else self._state.information_state_tensor(player_id) + ) + observations["legal_actions"].append(self._state.legal_actions(player_id)) + observations["current_player"] = self._state.current_player() + + if self._include_full_state: + observations["serialized_state"] = pyspiel.serialize_game_and_state( + self._game, self._state + ) + + return TimeStep( + observations=observations, + rewards=None, + discounts=None, + step_type=StepType.FIRST, + ) + + def _sample_external_events(self): + """Sample chance events until we get to a decision node.""" + while self._state.is_chance_node() or ( + self._state.current_player() == pyspiel.PlayerId.MEAN_FIELD + ): + if self._state.is_chance_node(): + outcome = self._chance_event_sampler(self._state) + self._state.apply_action(outcome) + if self._state.current_player() == pyspiel.PlayerId.MEAN_FIELD: + dist_to_register = self._state.distribution_support() + dist = [ + self._mfg_distribution.value_str(str_state, default_value=0.0) + for str_state in dist_to_register + ] + self._state.update_distribution(dist) + + def observation_spec(self): + """Defines the observation per player provided by the environment. + + Each dict member will contain its expected structure and shape. E.g.: for + Kuhn Poker {"info_state": (6,), "legal_actions": (2,), "current_player": (), + "serialized_state": ()} + + Returns: + A specification dict describing the observation fields and shapes. + """ + return dict( + info_state=tuple([ + self._game.observation_tensor_size() + if self._use_observation + else self._game.information_state_tensor_size() + ]), + legal_actions=(self._game.num_distinct_actions(),), + current_player=(), + serialized_state=(), + ) + + def action_spec(self): + """Defines per player action specifications. + + Specifications include action boundaries and their data type. + E.g.: for Kuhn Poker {"num_actions": 2, "min": 0, "max":1, "dtype": int} + + Returns: + A specification dict containing per player action properties. + """ + return dict( + num_actions=self._game.num_distinct_actions(), + min=0, + max=self._game.num_distinct_actions() - 1, + dtype=int, + ) + + # Environment properties + @property + def use_observation(self): + """Returns whether the environment is using the game's observation. + + If false, it is using the game's information state. + """ + return self._use_observation + + # Game properties + @property + def name(self): + return self._game.get_type().short_name + + @property + def num_players(self): + return self._game.num_players() + + @property + def num_actions_per_step(self): + return 1 if self.is_turn_based else self.num_players + + # New RL calls for more advanced use cases (e.g. search + RL). + @property + def is_turn_based(self): + return ( + self._game.get_type().dynamics == pyspiel.GameType.Dynamics.SEQUENTIAL + ) or ( + self._game.get_type().dynamics == pyspiel.GameType.Dynamics.MEAN_FIELD + ) + + @property + def max_game_length(self): + return self._game.max_game_length() + + @property + def is_chance_node(self): + return self._state.is_chance_node() + + @property + def game(self): + return self._game + + def set_state(self, new_state): + """Updates the game state.""" + assert ( + new_state.get_game() == self.game + ), "State must have been created by the same game." + self._state = new_state + + @property + def get_state(self): + return self._state + + @property + def mfg_distribution(self): + return self._mfg_distribution + + def update_mfg_distribution(self, mfg_distribution): + """Updates the distribution over the states of the mean field game.""" + assert ( + self._game.get_type().dynamics == pyspiel.GameType.Dynamics.MEAN_FIELD + ) + self._mfg_distribution = mfg_distribution diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/single_agent_cliff_walking.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/single_agent_cliff_walking.py new file mode 100644 index 0000000..ea1bd47 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/single_agent_cliff_walking.py @@ -0,0 +1,73 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python spiel example.""" + +import logging +from absl import app +from absl import flags + +from open_spiel.python.algorithms import tabular_qlearner +from open_spiel.python.environments import cliff_walking + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("num_episodes", int(5e2), "Number of train episodes.") + + +def eval_agent(env, agent, num_episodes): + """Evaluates `agent` for `num_episodes`.""" + rewards = 0.0 + for _ in range(num_episodes): + time_step = env.reset() + episode_reward = 0 + while not time_step.last(): + agent_output = agent.step(time_step, is_evaluation=True) + time_step = env.step([agent_output.action]) + episode_reward += time_step.rewards[0] + rewards += episode_reward + return rewards / num_episodes + + +def main_loop(unused_arg): + """Trains a tabular qlearner agent in the cliff walking environment.""" + env = cliff_walking.Environment(width=5, height=3) + num_actions = env.action_spec()["num_actions"] + + train_episodes = FLAGS.num_episodes + eval_interval = 50 + + agent = tabular_qlearner.QLearner( + player_id=0, step_size=0.05, num_actions=num_actions) + + # Train the agent + for ep in range(train_episodes): + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step) + action_list = [agent_output.action] + time_step = env.step(action_list) + # Episode is over, step agent with final info state. + agent.step(time_step) + + if ep and ep % eval_interval == 0: + logging.info("-" * 80) + logging.info("Episode %s", ep) + logging.info("Last loss: %s", agent.loss) + avg_return = eval_agent(env, agent, 100) + logging.info("Avg return: %s", avg_return) + + +if __name__ == "__main__": + app.run(main_loop) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/tic_tac_toe_dqn_vs_tabular.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/tic_tac_toe_dqn_vs_tabular.py new file mode 100644 index 0000000..0dc8e9d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/tic_tac_toe_dqn_vs_tabular.py @@ -0,0 +1,170 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""DQN agent vs Tabular Q-Learning agents trained on Tic Tac Toe. + +The two agents are trained by playing against each other. Then, the game +can be played against the DQN agent from the command line. +""" + +import logging +import sys + +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import random_agent +from open_spiel.python.algorithms import tabular_qlearner +from open_spiel.python.jax import dqn + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("num_episodes", int(5e4), "Number of train episodes.") +flags.DEFINE_boolean( + "interactive_play", True, + "Whether to run an interactive play with the agent after training.") + + +def pretty_board(time_step): + """Returns the board in `time_step` in a human readable format.""" + info_state = time_step.observations["info_state"][0] + x_locations = np.nonzero(info_state[9:18])[0] + o_locations = np.nonzero(info_state[18:])[0] + board = np.full(3 * 3, ".") + board[x_locations] = "X" + board[o_locations] = "0" + board = np.reshape(board, (3, 3)) + return board + + +def command_line_action(time_step): + """Gets a valid action from the user on the command line.""" + current_player = time_step.observations["current_player"] + legal_actions = time_step.observations["legal_actions"][current_player] + action = -1 + while action not in legal_actions: + print("Choose an action from {}:".format(legal_actions)) + sys.stdout.flush() + action_str = input() + try: + action = int(action_str) + except ValueError: + continue + return action + + +def eval_against_random_bots(env, trained_agents, random_agents, num_episodes): + """Evaluates `trained_agents` against `random_agents` for `num_episodes`.""" + num_players = len(trained_agents) + sum_episode_rewards = np.zeros(num_players) + for player_pos in range(num_players): + cur_agents = random_agents[:] + cur_agents[player_pos] = trained_agents[player_pos] + for _ in range(num_episodes): + time_step = env.reset() + episode_rewards = 0 + while not time_step.last(): + player_id = time_step.observations["current_player"] + agent_output = cur_agents[player_id].step(time_step, is_evaluation=True) + action_list = [agent_output.action] + time_step = env.step(action_list) + episode_rewards += time_step.rewards[player_pos] + sum_episode_rewards[player_pos] += episode_rewards + return sum_episode_rewards / num_episodes + + +def main(_): + game = "tic_tac_toe" + num_players = 2 + env = rl_environment.Environment(game) + state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + hidden_layers_sizes = [32, 32] + replay_buffer_capacity = int(1e4) + train_episodes = FLAGS.num_episodes + loss_report_interval = 1000 + + dqn_agent = dqn.DQN( + player_id=0, + state_representation_size=state_size, + num_actions=num_actions, + hidden_layers_sizes=hidden_layers_sizes, + replay_buffer_capacity=replay_buffer_capacity, + ) + tabular_q_agent = tabular_qlearner.QLearner( + player_id=1, num_actions=num_actions + ) + agents = [dqn_agent, tabular_q_agent] + + # Train agent + for ep in range(train_episodes): + if ep and ep % loss_report_interval == 0: + logging.info("[%s/%s] DQN loss: %s", ep, train_episodes, agents[0].loss) + time_step = env.reset() + while not time_step.last(): + player_id = time_step.observations["current_player"] + agent_output = agents[player_id].step(time_step) + action_list = [agent_output.action] + time_step = env.step(action_list) + + # Episode is over, step all agents with final info state. + for agent in agents: + agent.step(time_step) + + # Evaluate against random agent + random_agents = [ + random_agent.RandomAgent(player_id=idx, num_actions=num_actions) + for idx in range(num_players) + ] + r_mean = eval_against_random_bots(env, agents, random_agents, 1000) + logging.info("Mean episode rewards: %s", r_mean) + + if not FLAGS.interactive_play: + return + + # Play from the command line against the trained DQN agent. + human_player = 1 + while True: + logging.info("You are playing as %s", "X" if human_player else "0") + time_step = env.reset() + while not time_step.last(): + player_id = time_step.observations["current_player"] + if player_id == human_player: + agent_out = agents[human_player].step(time_step, is_evaluation=True) + logging.info("\n%s", agent_out.probs.reshape((3, 3))) + logging.info("\n%s", pretty_board(time_step)) + action = command_line_action(time_step) + else: + agent_out = agents[1 - human_player].step(time_step, is_evaluation=True) + action = agent_out.action + time_step = env.step([action]) + + logging.info("\n%s", pretty_board(time_step)) + + logging.info("End of game!") + if time_step.rewards[human_player] > 0: + logging.info("You win") + elif time_step.rewards[human_player] < 0: + logging.info("You lose") + else: + logging.info("Draw") + # Switch order of players + human_player = 1 - human_player + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/tic_tac_toe_qlearner.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/tic_tac_toe_qlearner.py new file mode 100644 index 0000000..e31aba9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/tic_tac_toe_qlearner.py @@ -0,0 +1,159 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tabular Q-Learner example on Tic Tac Toe. + +Two Q-Learning agents are trained by playing against each other. Then, the game +can be played against the agents from the command line. + +After about 10**5 training episodes, the agents reach a good policy: win rate +against random opponents is around 99% for player 0 and 92% for player 1. +""" + +import logging +import sys +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import random_agent +from open_spiel.python.algorithms import tabular_qlearner + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("num_episodes", int(5e4), "Number of train episodes.") +flags.DEFINE_boolean( + "interactive_play", + True, + "Whether to run an interactive play with the agent after training.", +) + + +def pretty_board(time_step): + """Returns the board in `time_step` in a human readable format.""" + info_state = time_step.observations["info_state"][0] + x_locations = np.nonzero(info_state[9:18])[0] + o_locations = np.nonzero(info_state[18:])[0] + board = np.full(3 * 3, ".") + board[x_locations] = "X" + board[o_locations] = "0" + board = np.reshape(board, (3, 3)) + return board + + +def command_line_action(time_step): + """Gets a valid action from the user on the command line.""" + current_player = time_step.observations["current_player"] + legal_actions = time_step.observations["legal_actions"][current_player] + action = -1 + while action not in legal_actions: + print("Choose an action from {}:".format(legal_actions)) + sys.stdout.flush() + action_str = input() + try: + action = int(action_str) + except ValueError: + continue + return action + + +def eval_against_random_bots(env, trained_agents, random_agents, num_episodes): + """Evaluates `trained_agents` against `random_agents` for `num_episodes`.""" + wins = np.zeros(2) + for player_pos in range(2): + if player_pos == 0: + cur_agents = [trained_agents[0], random_agents[1]] + else: + cur_agents = [random_agents[0], trained_agents[1]] + for _ in range(num_episodes): + time_step = env.reset() + while not time_step.last(): + player_id = time_step.observations["current_player"] + agent_output = cur_agents[player_id].step(time_step, is_evaluation=True) + time_step = env.step([agent_output.action]) + if time_step.rewards[player_pos] > 0: + wins[player_pos] += 1 + return wins / num_episodes + + +def main(_): + game = "tic_tac_toe" + num_players = 2 + + env = rl_environment.Environment(game) + num_actions = env.action_spec()["num_actions"] + + agents = [ + tabular_qlearner.QLearner(player_id=idx, num_actions=num_actions) + for idx in range(num_players) + ] + + # random agents for evaluation + random_agents = [ + random_agent.RandomAgent(player_id=idx, num_actions=num_actions) + for idx in range(num_players) + ] + + # 1. Train the agents + training_episodes = FLAGS.num_episodes + for cur_episode in range(training_episodes): + if cur_episode % int(1e4) == 0: + win_rates = eval_against_random_bots(env, agents, random_agents, 1000) + logging.info("Starting episode %s, win_rates %s", cur_episode, win_rates) + time_step = env.reset() + while not time_step.last(): + player_id = time_step.observations["current_player"] + agent_output = agents[player_id].step(time_step) + time_step = env.step([agent_output.action]) + + # Episode is over, step all agents with final info state. + for agent in agents: + agent.step(time_step) + + if not FLAGS.interactive_play: + return + + # 2. Play from the command line against the trained agent. + human_player = 1 + while True: + logging.info("You are playing as %s", "O" if human_player else "X") + time_step = env.reset() + while not time_step.last(): + player_id = time_step.observations["current_player"] + if player_id == human_player: + agent_out = agents[human_player].step(time_step, is_evaluation=True) + logging.info("\n%s", agent_out.probs.reshape((3, 3))) + logging.info("\n%s", pretty_board(time_step)) + action = command_line_action(time_step) + else: + agent_out = agents[1 - human_player].step(time_step, is_evaluation=True) + action = agent_out.action + time_step = env.step([action]) + + logging.info("\n%s", pretty_board(time_step)) + + logging.info("End of game!") + if time_step.rewards[human_player] > 0: + logging.info("You win") + elif time_step.rewards[human_player] < 0: + logging.info("You lose") + else: + logging.info("Draw") + # Switch order of players + human_player = 1 - human_player + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/treeviz_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/treeviz_example.py new file mode 100644 index 0000000..bb7b990 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/treeviz_example.py @@ -0,0 +1,89 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Game tree visualization example.""" + +from absl import app +from absl import flags +from absl import logging + +import pyspiel +from open_spiel.python.visualizations import treeviz + +FLAGS = flags.FLAGS +flags.DEFINE_string("game", "kuhn_poker", "Name of the game") +flags.DEFINE_string("out", "/tmp/gametree.png", "Name of output file, e.g., " + "[*.png|*.pdf].") +flags.DEFINE_enum("prog", "dot", ["dot", "neato", "circo"], "Graphviz layout.") +flags.DEFINE_boolean("group_infosets", False, "Whether to group infosets.") +flags.DEFINE_boolean("group_terminal", False, + "Whether to group terminal nodes.") +flags.DEFINE_boolean("group_pubsets", False, "Whether to group public states.") +flags.DEFINE_string("target_pubset", "*", + "Limit grouping of public states only to specified state.") +flags.DEFINE_boolean("verbose", False, "Whether to print verbose output.") + + +def _zero_sum_node_decorator(state): + """Custom node decorator that only shows the return of the first player.""" + attrs = treeviz.default_node_decorator(state) # get default attributes + if state.is_terminal(): + attrs["label"] = str(int(state.returns()[0])) + return attrs + + +def main(argv): + del argv + + game = pyspiel.load_game(FLAGS.game) + game_type = game.get_type() + + if game_type.dynamics == pyspiel.GameType.Dynamics.SIMULTANEOUS: + logging.warn("%s is not turn-based. Trying to reload game as turn-based.", + FLAGS.game) + game = pyspiel.load_game_as_turn_based(FLAGS.game) + game_type = game.get_type() + + if game_type.dynamics != pyspiel.GameType.Dynamics.SEQUENTIAL: + raise ValueError("Game must be sequential, not {}".format( + game_type.dynamics)) + + if (game_type.utility == pyspiel.GameType.Utility.ZERO_SUM and + game.num_players() == 2): + logging.info("Game is zero-sum: only showing first-player's returns.") + gametree = treeviz.GameTree( + game, + node_decorator=_zero_sum_node_decorator, + group_infosets=FLAGS.group_infosets, + group_terminal=FLAGS.group_terminal, + group_pubsets=FLAGS.group_pubsets, + target_pubset=FLAGS.target_pubset) + else: + # use default decorators + gametree = treeviz.GameTree( + game, + group_infosets=FLAGS.group_infosets, + group_terminal=FLAGS.group_terminal, + group_pubsets=FLAGS.group_pubsets, + target_pubset=FLAGS.target_pubset) + + if FLAGS.verbose: + logging.info("Game tree:\n%s", gametree.to_string()) + + gametree.draw(FLAGS.out, prog=FLAGS.prog) + logging.info("Game tree saved to file: %s", FLAGS.out) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/twenty_forty_eight_td_n_tuple_network.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/twenty_forty_eight_td_n_tuple_network.py new file mode 100644 index 0000000..466cc76 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/twenty_forty_eight_td_n_tuple_network.py @@ -0,0 +1,169 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TD Learning with N-Tuple Networks for 2048.""" + +from absl import app +from absl import flags +from absl import logging +import numpy as np +import pyspiel + +FLAGS = flags.FLAGS +flags.DEFINE_string("game", "2048", "Name of the game.") +flags.DEFINE_integer("num_train_episodes", 15000, + "Number of training episodes.") +flags.DEFINE_integer("eval_every", 1000, + "Episode frequency at which the agent is evaluated.") +flags.DEFINE_float("alpha", 0.02, "Learning rate") + + +class NTupleNetwork: + """An N-tuple Network class. + + N-Tuple Networks are an effective way of reducing the storage requirement for + evaluating and learning state values. This is accomplished by defining a + collection of N-Tuples that represent various segments in a game's + ObservationTensor. + + The value of a given state is defined as the sum of values of each N-Tuple, + which are stored in a look up table. The policy of the agent is to chose an + action that maximises the value of the after-state. After each episode, all + the states that were reached in that episode is used for updating the state + values using Temporal Difference Learning. + + References: + [1] Szubert, Marcin and Wojciech Jaśkowski. "Temporal difference learning of + n-tuple networks for the game 2048." Computational Intelligence and Games + (CIG), 2014 IEEE Conference on. IEEE, 2014. + """ + + def __init__(self, n_tuple_size, max_tuple_index, n_tuples): + for tuples in n_tuples: + if len(tuples) != n_tuple_size: + raise ValueError("n_tuple_size does not match size of tuples") + n_tuple_network_size = len(n_tuples) + look_up_table_shape = (n_tuple_network_size,) + ( + max_tuple_index, + ) * n_tuple_size + + self.n_tuples = n_tuples + self.look_up_table = np.zeros(look_up_table_shape) + + def learn(self, states): + target = 0 + while states: + state = states.pop() + error = target - self.value(state) + target = state.rewards()[0] + self.update(state, FLAGS.alpha * error) + + def update(self, state, adjust): + v = 0 + for idx, n_tuple in enumerate(self.n_tuples): + v += self.update_tuple(idx, n_tuple, state, adjust) + return v + + def update_tuple(self, idx, n_tuple, state, adjust): + observation_tensor = state.observation_tensor(0) + index = (idx,) + tuple( + [ + 0 + if observation_tensor[tile] == 0 + else int(np.log2(observation_tensor[tile])) + for tile in n_tuple + ] + ) + self.look_up_table[index] += adjust + return self.look_up_table[index] + + def evaluator(self, state, action): + working_state = state.clone() + working_state.apply_action(action) + return working_state.rewards()[0] + self.value(working_state) + + def value(self, state): + """Returns the value of this state.""" + + observation_tensor = state.observation_tensor(0) + v = 0 + for idx, n_tuple in enumerate(self.n_tuples): + lookup_tuple_index = [ + 0 + if observation_tensor[tile] == 0 + else int(np.log2(observation_tensor[tile])) + for tile in n_tuple + ] + lookup_index = (idx,) + tuple(lookup_tuple_index) + v += self.look_up_table[lookup_index] + return v + + +def main(_): + n_tuple_network = NTupleNetwork( + 6, + 15, + [ + [0, 1, 2, 3, 4, 5], + [4, 5, 6, 7, 8, 9], + [0, 1, 2, 4, 5, 6], + [4, 5, 6, 8, 9, 10], + ], + ) + game = pyspiel.load_game(FLAGS.game) + sum_rewards = 0 + largest_tile = 0 + max_score = 0 + for ep in range(FLAGS.num_train_episodes): + state = game.new_initial_state() + states_in_episode = [] + while not state.is_terminal(): + if state.is_chance_node(): + outcomes = state.chance_outcomes() + action_list, prob_list = zip(*outcomes) + action = np.random.choice(action_list, p=prob_list) + state.apply_action(action) + else: + legal_actions = state.legal_actions(state.current_player()) + # pylint: disable=cell-var-from-loop + best_action = max( + legal_actions, + key=lambda action: n_tuple_network.evaluator(state, action), + ) + state.apply_action(best_action) + states_in_episode.append(state.clone()) + + sum_rewards += state.returns()[0] + largest_tile_from_episode = max(state.observation_tensor(0)) + if largest_tile_from_episode > largest_tile: + largest_tile = largest_tile_from_episode + if state.returns()[0] > max_score: + max_score = state.returns()[0] + + n_tuple_network.learn(states_in_episode) + + if (ep + 1) % FLAGS.eval_every == 0: + logging.info( + "[%s] Average Score: %s, Max Score: %s, Largest Tile Reached: %s", + ep + 1, + int(sum_rewards / FLAGS.eval_every), + int(max_score), + int(largest_tile), + ) + sum_rewards = 0 + largest_tile = 0 + max_score = 0 + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/uniform_policy_exploitability.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/uniform_policy_exploitability.py new file mode 100644 index 0000000..b9c179c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/uniform_policy_exploitability.py @@ -0,0 +1,36 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Compute exploitability of a uniform policy.""" + +from absl import app +from absl import flags + +from open_spiel.python import policy +from open_spiel.python.algorithms import exploitability +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_string("game", "kuhn_poker", "Name of the game") + + +def main(_): + game = pyspiel.load_game(FLAGS.game) + expl = exploitability.exploitability(game, policy.UniformRandomPolicy(game)) + print("Exploitability: {}".format(expl)) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/universal_poker_cfr_cpp_load_from_acpc_gamedef_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/universal_poker_cfr_cpp_load_from_acpc_gamedef_example.py new file mode 100644 index 0000000..0123c01 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/universal_poker_cfr_cpp_load_from_acpc_gamedef_example.py @@ -0,0 +1,94 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example use of the CFR algorithm on Kuhn Poker.""" + +import pickle +import sys +from absl import app +from absl import flags + +import pyspiel + +universal_poker = pyspiel.universal_poker + +FLAGS = flags.FLAGS + +flags.DEFINE_enum("solver", "cfr", ["cfr", "cfrplus", "cfrbr"], "CFR solver") +_ITERATIONS = flags.DEFINE_integer("iterations", 100, "Number of iterations") + +CUSTOM_LIMIT_HOLDEM_ACPC_GAMEDEF = """\ +GAMEDEF +limit +numPlayers = 2 +numRounds = 1 +blind = 2 4 +raiseSize = 4 4 8 +firstPlayer = 1 +maxRaises = 2 2 2 +numSuits = 2 +numRanks = 5 +numHoleCards = 1 +numBoardCards = 0 2 1 +stack = 20 +END GAMEDEF +""" + + +def main(_): + game = universal_poker.load_universal_poker_from_acpc_gamedef( + CUSTOM_LIMIT_HOLDEM_ACPC_GAMEDEF + ) + + solver = None + if FLAGS.solver == "cfr": + solver = pyspiel.CFRSolver(game) + elif FLAGS.solver == "cfrplus": + solver = pyspiel.CFRPlusSolver(game) + elif FLAGS.solver == "cfrbr": + solver = pyspiel.CFRBRSolver(game) + else: + print("Unknown solver") + sys.exit(0) + + for i in range(int(_ITERATIONS.value / 2)): + solver.evaluate_and_update_policy() + print("Iteration {} exploitability: {:.6f}".format( + i, pyspiel.exploitability(game, solver.average_policy()))) + + filename = "/tmp/{}_solver.pickle".format(FLAGS.solver) + print("Persisting the model...") + with open(filename, "wb") as file: + pickle.dump(solver, file, pickle.HIGHEST_PROTOCOL) + + print("Loading the model...") + with open(filename, "rb") as file: + loaded_solver = pickle.load(file) + print("Exploitability of the loaded model: {:.6f}".format( + pyspiel.exploitability(game, loaded_solver.average_policy()))) + + for i in range(int(_ITERATIONS.value / 2)): + loaded_solver.evaluate_and_update_policy() + tabular_policy = loaded_solver.tabular_average_policy() + print(f"Tabular policy length: {len(tabular_policy)}") + print( + "Iteration {} exploitability: {:.6f}".format( + int(_ITERATIONS.value / 2) + i, + pyspiel.exploitability(game, loaded_solver.average_policy()), + ) + ) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/examples/value_iteration.py b/scenarios/bargaining/open_spiel/open_spiel/python/examples/value_iteration.py new file mode 100644 index 0000000..c1b5c47 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/examples/value_iteration.py @@ -0,0 +1,59 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python spiel example to use value iteration to solve a game.""" + +from absl import app +from absl import flags + +from open_spiel.python.algorithms import value_iteration +import pyspiel + +FLAGS = flags.FLAGS +flags.DEFINE_string("game", "tic_tac_toe", "Name of the game") + + +def play_tic_tac_toe(): + """Solves tic tac toe.""" + game = pyspiel.load_game("tic_tac_toe") + + print("Solving the game; depth_limit = {}".format(-1)) + values = value_iteration.value_iteration(game, -1, 0.01) + + for state, value in values.items(): + print("") + print(str(state)) + print("Value = {}".format(value)) + + initial_state = "...\n...\n..." + cross_win_state = "...\n...\n.ox" + naught_win_state = "x..\noo.\nxx." + + assert values[initial_state] == 0, "State should be drawn: \n" + initial_state + assert values[cross_win_state] == 1, ("State should be won by player 0: \n" + + cross_win_state) + assert values[naught_win_state] == -1, ( + "State should be won by player 1: \n" + cross_win_state) + + +def main(argv): + del argv + if FLAGS.game == "tic_tac_toe": + play_tic_tac_toe() + else: + raise NotImplementedError("This example only works for Tic-Tac-Toe.") + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/README.md b/scenarios/bargaining/open_spiel/open_spiel/python/games/README.md new file mode 100644 index 0000000..d4acb67 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/README.md @@ -0,0 +1,49 @@ +# Python Games + +This directory contains games implemented in Python. The majority of OpenSpiel +games are in C++, which is significantly faster, but Python may still be +suitable for prototyping or for small games. + +It is possible to run C++ algorithms on Python implemented games, This is likely +to have good performance if the algorithm simply extracts a game tree and then +works with that (e.g. CFR algorithms). It is likely to have poor performance if +the algorithm relies on processing and updating states as it goes, e.g. MCTS. + +Suggested games to use as a basis for your own implementations: + +* [kuhn_poker](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/games/kuhn_poker.py) + for imperfect information games with chance +* [tic_tac_toe](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/games/tic_tac_toe.py) + for perfect information games without chance +* [iterated_prisoners_dilemma](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/games/iterated_prisoners_dilemma.py) + for games with simultaneous moves + +### Implementation Notes + +The Python game implementation sticks quite closely to the C++ one. The main +differences are as follows: + +* Observations should be supported entirely through the Observation API, entry + point the `make_py_observer` method on the game class. See + [kuhn_poker](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/games/kuhn_poker.py) + for a complete example. + +* Parameter handling is significantly simplified. Default parameters are + provided in the GameType; the parameters supplied to the constructor will + have had default parameters applied. The C++ GameParameter type is not used. + See + [iterated_prisoners_dilemma](https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/games/iterated_prisoners_dilemma.py) + for a very simple example. + +* `_legal_actions` only needs to handle the case where the game is in progress + and it is that player's turn. Cases which require special handling in C++ + games, such as terminal states, chance nodes, not the player's turn, are + instead handled in the Python/C++ layer. + +* `_action_to_string` always receives the correct player as an argument, + unlike the C++ version where it may be omitted, and hence this case must be + handled by the game implementor + +* `_apply_action` and `_apply_actions` correspond to `DoApplyAction` and + `DoApplyActions`; the C++ history will be updated after the relevant one of + these functions is called. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/__init__.py new file mode 100644 index 0000000..33945e0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/__init__.py @@ -0,0 +1,36 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Games implemented in Python. + +These games are registered as they are imported. It's perfectly possible to +import just a single game if you prefer. There is no need to add new games here, +so long as they register themselves and you import them when wanting to use +them. However, adding them here will make them available for playthroughs and +for automated API testing. + +Registration looks like this: +``` +pyspiel.register_game(_GAME_TYPE, KuhnPokerGame) +``` +""" + +from open_spiel.python.games import block_dominoes +from open_spiel.python.games import chat_game +from open_spiel.python.games import dynamic_routing +from open_spiel.python.games import iterated_prisoners_dilemma +from open_spiel.python.games import kuhn_poker +from open_spiel.python.games import liars_poker +from open_spiel.python.games import team_dominoes +from open_spiel.python.games import tic_tac_toe diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/atari.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/atari.py new file mode 100644 index 0000000..c195d8c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/atari.py @@ -0,0 +1,241 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""OpenSpiel support for the Atari Learning Environment (ALE). + +Originally introduced in (Bellemare et al., 2013): +https://arxiv.org/abs/1207.4708. + +Uses environment wrappers from OpenAI Gym (https://gym.openai.com/) and Stable +Baselines 3 (https://jmlr.org/papers/v22/20-1364.html) to convert observations +into a suitable format for training. +""" + +# pylint: disable=g-importing-member +from math import prod +import gym +import numpy as np +from stable_baselines3.common.atari_wrappers import ClipRewardEnv +from stable_baselines3.common.atari_wrappers import EpisodicLifeEnv +from stable_baselines3.common.atari_wrappers import FireResetEnv +from stable_baselines3.common.atari_wrappers import MaxAndSkipEnv +import pyspiel + + +_NUM_PLAYERS = 1 +_GAME_TYPE = pyspiel.GameType( + short_name='atari', + long_name='atari', + dynamics=pyspiel.GameType.Dynamics.SEQUENTIAL, + chance_mode=pyspiel.GameType.ChanceMode.SAMPLED_STOCHASTIC, + information=pyspiel.GameType.Information.PERFECT_INFORMATION, + utility=pyspiel.GameType.Utility.ZERO_SUM, + reward_model=pyspiel.GameType.RewardModel.REWARDS, + max_num_players=_NUM_PLAYERS, + min_num_players=_NUM_PLAYERS, + provides_information_state_string=False, + provides_information_state_tensor=False, + provides_observation_string=True, + provides_observation_tensor=True, + parameter_specification={ + 'gym_id': 'ALE/Breakout-v5', + 'seed': 1, + 'idx': 0, + 'capture_video': False, + 'run_name': 'default', + 'use_episodic_life_env': True + }) +_GAME_INFO = pyspiel.GameInfo( + num_distinct_actions=4, + max_chance_outcomes=0, + num_players=_NUM_PLAYERS, + min_utility=-1.0, + max_utility=1.0, + utility_sum=0.0, + max_game_length=2000) + + +# NOTE: We include this wrapper by hand because the default wrapper +# threw errors (see modified lines). +class NoopResetEnv(gym.Wrapper): + """Sample initial states by taking random number of no-ops on reset. + + No-op is assumed to be action 0. :param env: the environment to wrap :param + noop_max: the maximum value of no-ops to run + """ + + def __init__(self, env: gym.Env, noop_max: int = 30): + gym.Wrapper.__init__(self, env) + self.noop_max = noop_max + self.override_num_noops = None + self.noop_action = 0 + assert env.unwrapped.get_action_meanings()[0] == 'NOOP' + + def reset(self, **kwargs) -> np.ndarray: + self.env.reset(**kwargs) + if self.override_num_noops is not None: + noops = self.override_num_noops + else: + #### MODIFIED LINES: note method is named integers now ### + noops = self.unwrapped.np_random.integers(1, self.noop_max + 1) + ### END MODIFIED LINES ### + assert noops > 0 + obs = np.zeros(0) + for _ in range(noops): + obs, _, done, _ = self.env.step(self.noop_action) + if done: + obs = self.env.reset(**kwargs) + return obs + + +class AtariGame(pyspiel.Game): + """An OpenSpiel wrapper for the OpenAI Gym Atari games.""" + + def __init__(self, params=None): + super().__init__(_GAME_TYPE, _GAME_INFO, params or dict()) + self.gym_id = params.get('gym_id', 'BreakoutNoFrameskip-v4') + self.seed = params.get('seed', 1) + self.idx = params.get('idx', 0) + self.capture_video = params.get('capture_video', False) + self.run_name = params.get('run_name', 'default') + self.use_episodic_life_env = params.get('use_episodic_life_env', True) + + env = gym.make(self.gym_id) + env = gym.wrappers.RecordEpisodeStatistics(env) + if self.capture_video and self.idx == 0: + env = gym.wrappers.RecordVideo(env, f'videos/{self.run_name}') + + # Apply the standard set of wrappers from CleanRL's PPO implementation. + # These wrappers have been tested on Breakout; different games may + # benefit from different wrappers (e.g., Space Invaders might benefit + # from frameskip=3 instead of 4; see https://arxiv.org/abs/1312.5602). + env = NoopResetEnv(env, noop_max=30) + env = MaxAndSkipEnv(env, skip=4) + if self.use_episodic_life_env: + env = EpisodicLifeEnv(env) + if 'FIRE' in env.unwrapped.get_action_meanings(): + env = FireResetEnv(env) + env = ClipRewardEnv(env) + env = gym.wrappers.ResizeObservation(env, (84, 84)) + env = gym.wrappers.GrayScaleObservation(env) + env = gym.wrappers.FrameStack(env, 4) + env.seed(self.seed) + env.action_space.seed(self.seed) + env.observation_space.seed(self.seed) + self.observation_shape = env.reset().shape + self.env = env + + def observation_tensor_shape(self): + return self.observation_shape + + def new_initial_state(self): + """Returns a state corresponding to the start of a game.""" + return AtariState(self) + + def make_py_observer(self, iig_obs_type=None, params=None): + """Returns an object used for observing game state.""" + if params is None: + params = dict() + + params['observation_shape'] = self.observation_shape + return AtariObserver( + iig_obs_type or pyspiel.IIGObservationType(perfect_recall=False), + params) + + +class AtariState(pyspiel.State): + """A python version of the Atari Game state.""" + + def __init__(self, game): + """Constructor; should only be called by Game.new_initial_state.""" + super().__init__(game) + self._is_terminal = False + self.tracked_rewards = 0 + self.env = game.env + self.observation = self.env.reset() + self.last_reward = None + self.last_info = dict() + + def current_player(self): + """Returns id of the next player to move, or TERMINAL if game is over.""" + return pyspiel.PlayerId.TERMINAL if self._is_terminal else 0 + + def _legal_actions(self, player): + """Returns a list of legal actions, sorted in ascending order.""" + return list(range(self.env.action_space.n)) + + def _apply_action(self, action): + """Applies the specified action to the state.""" + observation, reward, done, info = self.env.step(action) + self.last_info = info + self.last_reward = reward + self.tracked_rewards += reward + if done: + self._is_terminal = True + self.observation = observation # Store this for later + + def _action_to_string(self, player, action): + return self.env.get_action_meanings()[action] + + def is_terminal(self): + """Returns True if the game is over.""" + return self._is_terminal + + def rewards(self): + return [self.last_reward] + + def returns(self): + """Total reward for each player over the course of the game so far.""" + return [self.tracked_rewards] + + def __str__(self): + """String for debug purposes. No particular semantics are required.""" + return 'DEBUG' + + +class AtariObserver: + """Observer, conforming to the PyObserver interface (see observation.py).""" + + # pylint: disable=unused-argument + def __init__(self, iig_obs_type, params): + """Initializes an empty observation tensor.""" + # Determine which observation pieces we want to include. + pieces = [] + pieces.append(('observation', prod(params['observation_shape']), + params['observation_shape'])) + + # Build the single flat tensor. + total_size = sum(size for name, size, shape in pieces) + self.tensor = np.zeros((total_size), np.float32) + + # Build the named & reshaped views of the bits of the flat tensor. + self.dict = {} + index = 0 + for name, size, shape in pieces: + self.dict[name] = self.tensor[index:index + size].reshape(shape) + index += size + + def set_from(self, state, player): + """Updates `tensor` and `dict` to reflect `state` from PoV of `player`.""" + self.tensor.fill(0) + if 'observation' in self.dict: + self.dict['observation'][:] = state.observation + + def string_from(self, state, player): + """Observation of `state` from the PoV of `player`, as a string.""" + pieces = [] + return ' '.join(str(p) for p in pieces) + + +# Register the game with the OpenSpiel library +pyspiel.register_game(_GAME_TYPE, AtariGame) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/block_dominoes.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/block_dominoes.py new file mode 100644 index 0000000..1b2462b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/block_dominoes.py @@ -0,0 +1,368 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Block Dominoes implemented in Python. + +https://en.wikipedia.org/wiki/Dominoes#Blocking_game +""" + +import copy +import itertools + +import numpy as np + +import pyspiel + +_NUM_PLAYERS = 2 +_PIPS = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0] +_DECK = list(itertools.combinations_with_replacement(_PIPS, 2)) +_EDGES = [None, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + + +class Action: + """Represent player possible action.""" + + def __init__(self, player, tile, edge): + self.player = player + self.tile = tile + self.edge = edge + + def __str__(self): + return f"p{self.player} tile:{self.tile} pip:{self.edge}" + + def __repr__(self): + return self.__str__() + + +def create_possible_actions(): + actions = [] + for player in range(_NUM_PLAYERS): + for tile in _DECK: + for edge in _EDGES: + if edge in tile or edge is None: # can we play tile on edge? + actions.append(Action(player, tile, edge)) + return actions + + +_ACTIONS = create_possible_actions() +_ACTIONS_STR = [str(action) for action in _ACTIONS] + +_HAND_SIZE = 7 + +_MAX_GAME_LENGTH = 28 + +_GAME_TYPE = pyspiel.GameType( + short_name="python_block_dominoes", + long_name="Python block dominoes", + dynamics=pyspiel.GameType.Dynamics.SEQUENTIAL, + chance_mode=pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC, + information=pyspiel.GameType.Information.IMPERFECT_INFORMATION, + utility=pyspiel.GameType.Utility.ZERO_SUM, + reward_model=pyspiel.GameType.RewardModel.TERMINAL, + max_num_players=_NUM_PLAYERS, + min_num_players=_NUM_PLAYERS, + provides_information_state_string=True, + provides_information_state_tensor=True, + provides_observation_string=True, + provides_observation_tensor=True, + provides_factored_observation_string=True, +) +_GAME_INFO = pyspiel.GameInfo( + num_distinct_actions=len(_ACTIONS), + max_chance_outcomes=len(_DECK), + # first player hand: (6,6) (6,5) (5,5) (6,4) (4,5) (6,3) (4,4) + # second player hand is empty. can be reduced. + min_utility=-69, + max_utility=69, + num_players=_NUM_PLAYERS, + # deal: 14 chance nodes + play: 14 player nodes + max_game_length=_MAX_GAME_LENGTH, + utility_sum=0.0, +) + + +class BlockDominoesGame(pyspiel.Game): + """A Python version of Block Dominoes.""" + + def __init__(self, params=None): + super().__init__(_GAME_TYPE, _GAME_INFO, params or dict()) + + def new_initial_state(self): + """Returns a state corresponding to the start of a game.""" + return BlockDominoesState(self) + + def make_py_observer(self, iig_obs_type=None, params=None): + """Returns an object used for observing game state.""" + return BlockDominoesObserver( + iig_obs_type or pyspiel.IIGObservationType(perfect_recall=False), params + ) + + +class BlockDominoesState(pyspiel.State): + """A python version of the Block Dominoes state.""" + + def __init__(self, game): + """Constructor; should only be called by Game.new_initial_state.""" + super().__init__(game) + self.actions_history = [] + self.open_edges = [] + self.hands = [[], []] + self.deck = copy.deepcopy(_DECK) + self._game_over = False + self._next_player = pyspiel.PlayerId.CHANCE + + # OpenSpiel (PySpiel) API functions are below. This is the standard set that + # should be implemented by every sequential-move game with chance. + + def current_player(self): + """Returns id of the next player to move, or TERMINAL if game is over.""" + if self._game_over: + return pyspiel.PlayerId.TERMINAL + if len(self.deck) > 14: + return pyspiel.PlayerId.CHANCE + return self._next_player + + def _legal_actions(self, player): + """Returns a list of legal actions, sorted in ascending order.""" + assert player >= 0 + assert player == self._next_player + return self.get_legal_actions(player) + + def get_legal_actions(self, player): + """Returns a list of legal actions.""" + assert player >= 0 + + actions = [] + hand = self.hands[player] + + # first move, no open edges + if not self.open_edges: + for tile in hand: + actions.append(Action(player, tile, None)) + else: + for tile in hand: + if tile[0] in self.open_edges: + actions.append(Action(player, tile, tile[0])) + if tile[0] != tile[1] and tile[1] in self.open_edges: + actions.append(Action(player, tile, tile[1])) + + actions_idx = [_ACTIONS_STR.index(str(action)) for action in actions] + actions_idx.sort() + return actions_idx + + def chance_outcomes(self): + """Returns the possible chance outcomes and their probabilities.""" + assert self.is_chance_node() + p = 1.0 / len(self.deck) + return [(_DECK.index(i), p) for i in self.deck] + + def _apply_action(self, action): + """Applies the specified action to the state.""" + if self.is_chance_node(): + hand_to_add_tile = ( + self.hands[0] if len(self.hands[0]) != _HAND_SIZE else self.hands[1] + ) + tile = _DECK[action] + self.deck.remove(tile) + hand_to_add_tile.append(tile) + + if not len(self.hands[0]) == len(self.hands[1]) == _HAND_SIZE: + return # another tiles to deal + + for hand in self.hands: + hand.sort() + + self._next_player = 0 + else: + action = _ACTIONS[action] + self.actions_history.append(action) + my_idx = self.current_player() + my_hand = self.hands[my_idx] + my_hand.remove(action.tile) + self.update_open_edges(action) + + if not my_hand: + self._game_over = True # player played his last tile + return + + opp_idx = 1 - my_idx + opp_legal_actions = self.get_legal_actions(opp_idx) + + if opp_legal_actions: + self._next_player = opp_idx + return + + my_legal_actions = self.get_legal_actions(my_idx) + if my_legal_actions: + self._next_player = my_idx + return + + self._game_over = True # both players are blocked + + def update_open_edges(self, action): + if not self.open_edges: + self.open_edges = list(action.tile) + else: + self.open_edges.remove(action.edge) + new_edge = ( + action.tile[0] if action.tile[0] != action.edge else action.tile[1] + ) + self.open_edges.append(new_edge) + + self.open_edges.sort() + + def _action_to_string(self, player, action): + """Action -> string.""" + if player == pyspiel.PlayerId.CHANCE: + return f"Deal {_DECK[action]}" + return _ACTIONS_STR[action] + + def is_terminal(self): + """Returns True if the game is over.""" + return self._game_over + + def returns(self): + """Total reward for each player over the course of the game so far.""" + + if not self.is_terminal(): + return [0, 0] + + sum_of_pips0 = sum(t[0] + t[1] for t in self.hands[0]) + sum_of_pips1 = sum(t[0] + t[1] for t in self.hands[1]) + + if sum_of_pips1 == sum_of_pips0: + return [0, 0] + + if sum_of_pips1 > sum_of_pips0: + return [sum_of_pips1, -sum_of_pips1] + return [-sum_of_pips0, sum_of_pips0] + + def __str__(self): + """String for debug purposes. No particular semantics are required.""" + hand0 = [str(c) for c in self.hands[0]] + hand1 = [str(c) for c in self.hands[1]] + history = [str(a) for a in self.actions_history] + return f"hand0:{hand0} hand1:{hand1} history:{history}" + + +class BlockDominoesObserver: + """Observer, conforming to the PyObserver interface (see observation.py).""" + + def __init__(self, iig_obs_type, params): + """Initializes an empty observation tensor.""" + if params: + raise ValueError(f"Observation parameters not supported; passed {params}") + + # Determine which observation pieces we want to include. + pieces = [("player", 2, (2,))] + + if iig_obs_type.private_info == pyspiel.PrivateInfoType.SINGLE_PLAYER: + # each tile is represented using 3 integers: + # 2 for the pips, and 1 to distinguish between (0,0) to empty slot for + # a tile. + pieces.append(("hand", 21, (7, 3))) + + if iig_obs_type.public_info: + if iig_obs_type.perfect_recall: + # list of all played actions, each action is represented using 5 + # integers: + # 2 for the played tile (0-6), 1 for the covered edge (0-6), + # 1 for which player (0/1), 1 to distinguish between actual move and + # empty slot for a move (0/1). + # the None (play on an empty board) edge represented using 0. + pieces.append(("actions_history", 70, (14, 5))) + else: + # last action, represented in the same way as in "actions_history" + # but without the last integer. + pieces.append(("last_action", 4, (4,))) + pieces.append(("hand_sizes", 2, (2,))) + + # Build the single flat tensor. + total_size = sum(size for name, size, shape in pieces) + self.tensor = np.zeros(total_size, np.float32) + + # Build the named & reshaped views of the bits of the flat tensor. + self.dict = {} + index = 0 + for name, size, shape in pieces: + self.dict[name] = self.tensor[index : index + size].reshape(shape) + index += size + + def set_from(self, state, player): + """Updates `tensor` and `dict` to reflect `state` from PoV of `player`.""" + + self.tensor.fill(0) + + if "player" in self.dict: + self.dict["player"][player] = 1 + self.dict["player"][1 - player] = 0 + + if "hand_sizes" in self.dict: + my_hand_size = len(state.hands[player]) + opp_hand_size = len(state.hands[1 - player]) + self.dict["hand_sizes"][0] = my_hand_size + self.dict["hand_sizes"][1] = opp_hand_size + + if "edges" in self.dict: + if state.open_edges: + self.dict["edges"][0] = state.open_edges[0] + self.dict["edges"][1] = state.open_edges[1] + else: + self.dict["edges"][0] = 0.0 + self.dict["edges"][1] = 0.0 + + if "hand" in self.dict: + for i, tile in enumerate(state.hands[player]): + self.dict["hand"][i][0] = tile[0] + self.dict["hand"][i][1] = tile[1] + self.dict["hand"][i][2] = 1.0 + + if "actions_history" in self.dict: + for i, action in enumerate(state.actions_history): + self.dict["actions_history"][i][0] = action.tile[0] + self.dict["actions_history"][i][1] = action.tile[1] + self.dict["actions_history"][i][2] = ( + action.edge if action.edge is not None else 0.0 + ) + self.dict["actions_history"][i][3] = action.player + self.dict["actions_history"][i][4] = 1.0 + + if "last_action" in self.dict: + if state.actions_history: + action = state.actions_history[-1] + self.dict["last_action"][0] = action.tile[0] + self.dict["last_action"][1] = action.tile[1] + self.dict["last_action"][2] = ( + action.edge if action.edge is not None else 0.0 + ) + self.dict["last_action"][3] = action.player + + def string_from(self, state, player): + """Observation of `state` from the PoV of `player`, as a string.""" + pieces = [] + if "player" in self.dict: + pieces.append(f"p{player}") + if "hand" in self.dict: + pieces.append(f"hand:{state.hands[player]}") + if "actions_history" in self.dict: + pieces.append(f"history:{str(state.actions_history)}") + if "last_action" in self.dict and state.actions_history: + pieces.append(f"last_action:{str(state.actions_history[-1])}") + return " ".join(str(p) for p in pieces) + + +# Register the game with the OpenSpiel library + +pyspiel.register_game(_GAME_TYPE, BlockDominoesGame) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/block_dominoes_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/block_dominoes_test.py new file mode 100644 index 0000000..5a4843d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/block_dominoes_test.py @@ -0,0 +1,119 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tests for Python Block Dominoes.""" + +from absl.testing import absltest +from open_spiel.python.games import block_dominoes +import pyspiel + + +class DominoesBlockTest(absltest.TestCase): + + def test_game_from_cc(self): + """Runs our standard game tests, checking API consistency.""" + game = pyspiel.load_game("python_block_dominoes") + pyspiel.random_sim_test(game, num_sims=100, serialize=False, verbose=True) + + def test_single_deterministic_game_1(self): + """Runs a single game where tiles and actions chose deterministically.""" + game = pyspiel.load_game("python_block_dominoes") + state = game.new_initial_state() + hand0 = [ + (6.0, 6.0), + (0.0, 2.0), + (4.0, 4.0), + (3.0, 3.0), + (2.0, 2.0), + (1.0, 1.0), + (0.0, 0.0), + ] + hand1 = [ + (5.0, 6.0), + (4.0, 5.0), + (3.0, 4.0), + (2.0, 3.0), + (1.0, 2.0), + (0.0, 1.0), + (4.0, 6.0), + ] + self.deal_hands(state, [hand0, hand1]) + + self.apply_action(state, block_dominoes.Action(0, (6.0, 6.0), None)) + self.apply_action(state, block_dominoes.Action(1, (5.0, 6.0), 6.0)) + # player 0 don't hold any tile with 6 or 5, player 1 turn again + self.apply_action(state, block_dominoes.Action(1, (4.0, 5.0), 5.0)) + self.apply_action(state, block_dominoes.Action(0, (4.0, 4.0), 4.0)) + self.apply_action(state, block_dominoes.Action(1, (3.0, 4.0), 4.0)) + self.apply_action(state, block_dominoes.Action(0, (3.0, 3.0), 3.0)) + self.apply_action(state, block_dominoes.Action(1, (2.0, 3.0), 3.0)) + self.apply_action(state, block_dominoes.Action(0, (2.0, 2.0), 2.0)) + self.apply_action(state, block_dominoes.Action(1, (1.0, 2.0), 2.0)) + self.apply_action(state, block_dominoes.Action(0, (1.0, 1.0), 1.0)) + self.apply_action(state, block_dominoes.Action(1, (0.0, 1.0), 1.0)) + self.apply_action(state, block_dominoes.Action(0, (0.0, 0.0), 0.0)) + self.apply_action(state, block_dominoes.Action(1, (4.0, 6.0), 6.0)) + + # player 1 played all is tile and player 0 hold the tile (0, 2) + self.assertTrue(state.is_terminal()) + self.assertEqual(state.returns()[0], -2) + self.assertEqual(state.returns()[1], 2) + + def test_single_deterministic_game_2(self): + """Runs a single game where tiles and actions chose deterministically.""" + game = pyspiel.load_game("python_block_dominoes") + state = game.new_initial_state() + hand0 = [ + (6.0, 6.0), + (0.0, 5.0), + (1.0, 5.0), + (2.0, 5.0), + (3.0, 5.0), + (4.0, 5.0), + (5.0, 5.0), + ] + hand1 = [ + (0.0, 4.0), + (1.0, 4.0), + (2.0, 4.0), + (3.0, 4.0), + (4.0, 4.0), + (0.0, 3.0), + (1.0, 3.0), + ] + self.deal_hands(state, [hand0, hand1]) + + self.apply_action(state, block_dominoes.Action(0, (6.0, 6.0), None)) + # Both players don't hold tile with 6, therefore both blocked and the + # game end + self.assertTrue(state.is_terminal()) + self.assertEqual(state.returns()[0], -45) + self.assertEqual(state.returns()[1], 45) + + @staticmethod + def apply_action(state, action): + actions_str = block_dominoes._ACTIONS_STR + state.apply_action(actions_str.index(str(action))) + + @staticmethod + def deal_hands(state, hands): + deck = block_dominoes._DECK + for hand in hands: + for t in hand: + state.apply_action(deck.index(t)) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_game.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_game.py new file mode 100644 index 0000000..15ca3fb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_game.py @@ -0,0 +1,281 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Creates a chat game as an OpenSpiel Environment.""" + +from typing import Any, Callable, Dict, OrderedDict, List, Tuple, Union +from absl import logging +import numpy as np + +from open_spiel.python.games.chat_games import chat_game_base +from open_spiel.python.games.chat_games.configs import config_fixed_mock +from open_spiel.python.games.chat_games.configs import config_rnd_mock +from open_spiel.python.games.chat_games.envs.observations import utils as observation_utils +from open_spiel.python.games.chat_games.envs.payoffs import utils as payoff_utils +from open_spiel.python.games.chat_games.envs.termination import utils as term_utils +from open_spiel.python.games.chat_games.envs.utils import header as header_utils +from open_spiel.python.games.chat_games.utils import test_utils as chat_test_utils + +import pyspiel + + +GAME_TYPE = pyspiel.GameType( + short_name='chat_game', + long_name='Chat Game', + utility=pyspiel.GameType.Utility.GENERAL_SUM, + provides_information_state_string=True, + provides_information_state_tensor=False, + **chat_game_base.GAME_TYPE_KWARGS) + + +class ChatGameObserver(chat_game_base.ChatGameObserverBase): + """Observer, conforming to the PyObserver interface (see observation.py).""" + + def _build_str_to_info_state(self) -> bool: + """Initializes map from str to infostate. Returns True if successful.""" + # Build a string tokenizer here + # --------------------------- # + # Build a string tokenizer here + return True + + def _info_state(self, input_text: str, obs_size: int) -> np.ndarray: + """Returns a len-obs_size np.ndarray given an input string and obs_size.""" + if not self._str_to_info_state_built: + raise ValueError('String to info state mapping not built!') + del input_text + # Vectorize a str (ideally lossless for info state) using a tokenizer here + # ---------------------------------------------------------------------- # + # Vectorize a str (ideally lossless for info state) using a tokenizer here + return np.zeros(obs_size, dtype=np.int32) + + +class ChatGame(chat_game_base.BaseChatGame): + """Chat game.""" + + # pylint:disable=dangerous-default-value + def __init__( + self, + params: Dict[str, Any] = chat_game_base.DEFAULT_PARAMS, + ): + """Constructor. + + Args: + params: dict, parameter dict with the following keys + + num_distinct_actions- int, # of actions at each info set + num_llm_seeds- int, # of seeds to use for generating LLM response + num_players- int, # of speakers (action: recipient) on the message chain + min_utility- float, minimum utility any player can attain + max_utility- float, maximum utility any player can attain + num_max_replies- int, total # of messages each player can send in an + episode + """ + self._game_loaded = False + + super().__init__(params) # initializes self.game_info via base init + super(chat_game_base.BaseChatGame, self).__init__( + GAME_TYPE, self.game_info, params or dict()) + + def load_chat_game(self, + llm_type: chat_test_utils.TestLLM, + observations: List[observation_utils.Observation], + vectorize: ..., + header: header_utils.Header, + payoffs: List[payoff_utils.Payoff], + aggregate_payoffs: Callable[[List[int]], float] = np.mean, + given_names: Union[List[str], None] = None, + given_llm_seeds: Union[List[int], None] = None, + given_prompt_actions: Union[OrderedDict[str, List[str]], + None] = None, + given_private_info: Union[OrderedDict[str, List[str]], + None] = None, + initial_scenario: Union[Any, None] = None, + num_names: int = 2, + num_prompt_actions: Tuple[int, ...] = (4,), + num_private_info: Tuple[int, ...] = (4,), + examples_names: Union[List[str], None] = None, + examples_prompt_actions: Union[OrderedDict[str, List[str]], + None] = None, + examples_private_info: Union[OrderedDict[str, List[str]], + None] = None, + examples_scenarios: Union[List[Any], None] = None, + llm_list_suffix: str = 'Continue the list from here.', + llm_termination_prompt: Union[term_utils.Termination, + None] = None, + seed: Union[int, None] = None + ): + """Constructor. + + Args: + llm_type: item of enum type chat_test_utils.TestLLM + observations: List of Observation items used for prompting llms to extract + observations (string features) from dialogues + vectorize: converts any length string into a length obs_size vector + + header: List of Header items used for prompting llms to take actions + (construct messages) based on latent action variables and private + information + + payoffs: list of Payoff items used for constructing queries and scoring + dialogue for each agent + aggregate_payoffs: function that maps from vector to nonnegative scalar + + given_names: list of strings representing names of players + given_llm_seeds: list of ints to seed llm with to generate each message + given_prompt_actions: ordered dict mapping action_keys + (see envs/utils/header) to list of strings representing the set of + available prompt actions (e.g., personalities or msg tones). Overrides + examples_prompt_actions. + given_private_info: ordered dict mapping info_keys + (see envs/utils/header) to length-[num_players] list of strings + representing the private information available to each player (e.g., + inventory / valuations of fruits). Overrides examples_private_info. + initial_scenario: Scenario items representing an initial message + + num_names: int, # of names to generate (can be greater than # of players) + num_prompt_actions: tuple of int, # of prompts to consider for each + action_key (i.e., size of action space for each prompt action) + num_private_info: tuple of int, # of private info states to consider for + each info_key + + examples_names: list of strings representing examples of names of players + examples_prompt_actions: ordered dict mapping action_keys + (see envs/utils/header) to list of strings representing examples of + prompt actions (e.g., personalities or msg tones). + examples_private_info: ordered dict mapping info_keys + (see envs/utils/header) to list of strings representing examples of + private information available to players (e.g., inventory / valuations + of fruits). Overrides examples_private_info. + examples_scenarios: list of Scenario items used for meta-generating new + scenarios + + llm_list_suffix: str, gets appended to a prompt to induce an llm to + generate a list of items (different llms like different prompts). + chinchilla likes ``, llmit likes `Continue the list from here.` + llm_termination_prompt: Termination item w/ [attrs query, + obs_trans_postfix, postfix]. llm will be asked to score a binary + response `yes`/`no` given query.format(msg=last_msg) to determine + whether the episode has reached a terminal state (e.g., deal has been + agreed upon). default is empty string in which case llm terminal + condition is left unused and episode terminates after + num_players * num_max_replies + + seed: int, master seed for experiment (used to generate all subsequent + seeds for any random generation) + """ + + # Define LLM model here + self._llm_type = llm_type + if self._llm_type == chat_test_utils.TestLLM.MOCK: + self._lm = chat_test_utils.MockLLM() + else: + raise NotImplementedError(f'llm_type {self._llm_type} not available.') + # Define LLM model here + + super()._load_chat_game(observations, + vectorize, + header, + payoffs, + aggregate_payoffs, + given_names, + given_llm_seeds, + given_prompt_actions, + given_private_info, + initial_scenario, + num_names, + num_prompt_actions, + num_private_info, + examples_names, + examples_prompt_actions, + examples_private_info, + examples_scenarios, + llm_list_suffix, + llm_termination_prompt, + seed) + + self._game_loaded = True + + def generate_response(self, prompt: str, seed: int, + num_output_tokens: Union[int, None] = None) -> str: + """Returns LLM generated string given prompt and seed.""" + # Define generate response here + if self._llm_type == chat_test_utils.TestLLM.MOCK: + return self._lm.generate_response(prompt, seed, num_output_tokens) + else: + raise NotImplementedError(f'llm_type {self._llm_type} not available.') + # Define generate response here + + def generate_bool(self, prompt: str, seed: int) -> bool: + """Returns LLM generated boolean given prompt and seed.""" + # Define generate bool here (e.g., for terminating an episode) + if self._llm_type == chat_test_utils.TestLLM.MOCK: + return self._lm.generate_bool(prompt, seed) + else: + raise NotImplementedError(f'llm_type {self._llm_type} not available.') + # Define generate bool here + + def make_py_observer(self, + iig_obs_type: Union[pyspiel.IIGObservationType, + None] = None, + params: Union[Dict[str, Any], None] = None + ) -> ChatGameObserver: + """Returns an object used for observing game state.""" + return ChatGameObserver( + iig_obs_type or pyspiel.IIGObservationType(perfect_recall=False), + params) + + def new_initial_state(self) -> chat_game_base.ChatGameState: + """Generates a new dialogue game. + + Returns: + chat_game_base.ChatGameState (see chat_games/chat_game_base.py) + """ + # KEEP THIS IF-BLOCK FOR OPEN_SPIEL TESTS + if not self._game_loaded: + # load mock game for testing + if self._num_players == 2: + config = config_fixed_mock.get_config() + tones = config.game.given_prompt_actions.values()[0] + num_prompt_actions = (len(tones),) + else: + config = config_rnd_mock.get_config() + num_prompt_actions = config.game.num_prompt_actions + # open_spiel attempts to run several simulation tests of games. this + # chat_game, however, requires calling `load_chat_game` explicitly after + # __init__ which is unique. we do this because the most obvious place to + # pass game configs would be via `params`, but everything in params must + # be `pickleable` which rules out passing things like `vectorizers` and + # messsy llm string generators. therefore, we need to check to see if + # `load_chat_game` has been called here and call it if not. + # also, open_spiel tests run with variable numbers of players which are + # different from those in chat_game_base.DEFAULT_PARAMS. More importantly, + # this affects the number of distinct actions since the number of players + # affects who we can choose to speak to. hence, we explicitly recalculate + # the number of distinct actions here (overwriting what was specified in + # the original chat_game_base.DEFAULT_PARAMS) + self._num_distinct_actions = np.prod(num_prompt_actions + + (self._num_players,)) + vectorizer = chat_test_utils.MockVectorizer() + self.load_chat_game(llm_type=chat_test_utils.TestLLM.MOCK, + vectorize=vectorizer.vectorize, + seed=1234, + **config.game) + logging.warning('Loading chat_game with default config. Only meant for ' + + 'open_spiel testing.') + + return chat_game_base.ChatGameState(self, self._initial_state_configs) + +# Register the game with the OpenSpiel library + +pyspiel.register_game(GAME_TYPE, ChatGame) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_game_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_game_test.py new file mode 100644 index 0000000..03a8f26 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_game_test.py @@ -0,0 +1,66 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for pyspiel Chat Game.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python.games import chat_game # pylint: disable=unused-import + +from open_spiel.python.games.chat_games.configs import config_fixed_mock +from open_spiel.python.games.chat_games.configs import config_rnd_mock + +from open_spiel.python.games.chat_games.utils import test_utils as chat_test_utils + +import pyspiel + + +GLOBAL_TEST_LLM = chat_test_utils.TestLLM.MOCK + + +class ChatGameTest(parameterized.TestCase): + + def setUp(self): + super().setUp() + + self.fixed_config = config_fixed_mock.get_config() + self.random_config = config_rnd_mock.get_config() + + vectorizer = chat_test_utils.MockVectorizer() + self.vectorize = vectorizer.vectorize + + @parameterized.named_parameters( + dict(testcase_name='fixed_scenario', fixed_scenario=True), + dict(testcase_name='random_scenario', fixed_scenario=False)) + def test_game_from_cc(self, fixed_scenario): + """Runs our standard game tests, checking API consistency.""" + + if fixed_scenario: + config = self.fixed_config + else: + config = self.random_config + + game = pyspiel.load_game('chat_game', config.params.to_dict()) + + game.load_chat_game(llm_type=GLOBAL_TEST_LLM, + vectorize=self.vectorize, + seed=1234, + **config.game) + + pyspiel.random_sim_test(game, num_sims=10, serialize=False, verbose=True) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/chat_game_base.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/chat_game_base.py new file mode 100644 index 0000000..3ca95b7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/chat_game_base.py @@ -0,0 +1,1434 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Creates a chat game base class as an OpenSpiel Environment.""" + +import collections +import dataclasses +import string + +from typing import Any, Callable, Dict, OrderedDict, List, Tuple, Union +from absl import logging +import numpy as np + +from open_spiel.python.games.chat_games.envs.observations import utils as observation_utils +from open_spiel.python.games.chat_games.envs.payoffs import utils as payoff_utils +from open_spiel.python.games.chat_games.envs.termination import utils as term_utils +from open_spiel.python.games.chat_games.envs.utils import header as header_utils +from open_spiel.python.games.chat_games.envs.utils import text + +from open_spiel.python.games.chat_games.utils import logging_utils + +import pyspiel + + +ct = logging_utils.ColorText() + +REWARD_MODEL = pyspiel.GameType.RewardModel.TERMINAL + +ALL_PLAYERS = 'Everyone' + +MIN_RND_SEED = 42 +MAX_RND_SEED = 9999 +DEFAULT_LLM_SEED = 42 + +LLM_LENGTH_MESSAGE_TOKENS = 300 +LLM_LENGTH_MESSAGE_CHARS = 300 +LLM_LENGTH_OBS_TOKENS = 300 +LLM_LENGTH_OBS_CHARS = 300 +LLM_LENGTH_PAYOFF_OBS_TOKENS = 300 +LLM_LENGTH_PAYOFF_OBS_CHARS = 300 + +LLM_LENGTH_LIST_OF_WORDS_TOKENS = 30 +LLM_LIST_GEN_ATTEMPTS = 30 + +LLM_LENGTH_SCORE_TOKENS = 10 + +ITEM_PREFIX = '* ' + +MIN_PLAYERS = 2 # any less and it's not a game, is it ;) +MAX_PLAYERS = 10 # this is set arbitrarily for now, should be upper bound +MAX_NUM_REPLIES = 5 + +VEC_SIZE = 100 # int, length of vector returned by `vectorize` on string input + +DEFAULT_PARAMS = {'num_distinct_actions': 2, + 'num_llm_seeds': 1, + 'num_init_states': 1, + 'num_players': MIN_PLAYERS, + 'players': 0, # open_spiel tests use this for `num_players` + 'min_utility': -10.0, + 'max_utility': 10.0, + 'num_max_replies': 1, + 'silence_logging': True} + +GAME_TYPE_KWARGS = { + 'dynamics': pyspiel.GameType.Dynamics.SEQUENTIAL, + 'chance_mode': pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC, + 'information': pyspiel.GameType.Information.IMPERFECT_INFORMATION, + 'reward_model': REWARD_MODEL, + 'max_num_players': MAX_PLAYERS, + 'min_num_players': MIN_PLAYERS, + 'provides_observation_string': True, + 'provides_observation_tensor': True, + 'provides_factored_observation_string': True, + 'parameter_specification': DEFAULT_PARAMS, + 'default_loadable': True + } + +GAME_TYPE = pyspiel.GameType( + short_name='chat_game', + long_name='Chat Game', + utility=pyspiel.GameType.Utility.GENERAL_SUM, + provides_information_state_string=False, + provides_information_state_tensor=False, + **GAME_TYPE_KWARGS) + + +@dataclasses.dataclass(frozen=True) +class InitialStateConfiguration: + """Constructor. + + Attributes: + actions: dict, {'player_names': list of str, + : list of str, + ...} + seeds: list of ints, llm seeds (chance nodes) + scenario_prompt: str, initial message with header (no tone) + private_info: dict mapping info-type to list of str, one for each player + i.e., private (prior) info available to each player + """ + actions: OrderedDict[str, List[str]] + seeds: List[int] + scenario_prompt: str + private_info: OrderedDict[str, List[str]] + + +class ChatGameState(pyspiel.State): + """Chat game state.""" + + def __init__(self, + game: ..., + init_state_configs: Tuple[InitialStateConfiguration, ...]): + """Constructor. + + Args: + game: see ChatGame class (should inherit from BaseChatGame) + init_state_configs: tuple of InitialStateConfiguration objects + one will be selected at random to initialize the game + """ + super().__init__(game) # access game with self.get_game() + + self._init_state_configs = init_state_configs + + # Init empty game w/ init_state_configs[0]. Overwrite later in game setup. + self._init_empty_game(init_state_configs[0]) + self._game_setup = False + + self._llm_termination = False + + self._rnd = self.get_game().rnd + + self._played_actions = [] + self._current_speaker = 1 + self._current_player = 1 + self._speakers = [] + self._num_actions_played = 0 + self._returns = None + self._player_action = None + + def _init_empty_game(self, init_state_config: InitialStateConfiguration): + """Initialize an empty game. + + Args: + init_state_config: InitialStateConfiguration object + """ + actions = init_state_config.actions + seeds = init_state_config.seeds + private_info = init_state_config.private_info + + self._num_actions = tuple([0 for _ in actions.values()]) + prompt_action_vals = [ + '' for _ in self.get_game().header.action_keys + ] + self._prompt_actions = OrderedDict(zip(self.get_game().header.action_keys, + prompt_action_vals)) + self._names = ['' for _ in actions['player_names']] + + self._llm_seeds = [0 for _ in seeds] + assert self.get_game().num_llm_seeds == len(self._llm_seeds) + + self._scenario_prompt = '' + + empty_pi_vals = [] + for pi in private_info.values(): + empty_player_pi = ['' for _ in pi] + empty_pi_vals.append(empty_player_pi) + self._private_info = OrderedDict(zip(private_info.keys(), empty_pi_vals)) + + self._dialogue = [''] + + def _setup_game(self, init_state_config: InitialStateConfiguration): + """Set up the game. + + Args: + init_state_config: InitialStateConfiguration object + """ + actions = init_state_config.actions + seeds = init_state_config.seeds + scenario_prompt = init_state_config.scenario_prompt + private_info = init_state_config.private_info + + self._num_actions = tuple([len(a) for a in actions.values()]) + prompt_action_vals = [ + actions[key] for key in self.get_game().header.action_keys + ] + self._prompt_actions = OrderedDict(zip(self.get_game().header.action_keys, + prompt_action_vals)) + self._names = actions['player_names'] + + self._llm_seeds = seeds + assert self.get_game().num_llm_seeds == len(self._llm_seeds) + + self._scenario_prompt = scenario_prompt + + self._private_info = private_info + + self._dialogue = [scenario_prompt] + + def __str__(self): + """String for debug purposes. No particular semantics are required.""" + if not self._game_setup: + return 'Setting up game...' + else: + return self._dialogue[-1] + + def _unravel_flat_action(self, action: int) -> Tuple[int, ...]: + """Returns an action tuple with action types separated. + + Args: + action: int + Returns: + action_tuple: tuple of ints, each int represents a separate component of + the combinatorial action-space + """ + idxs = np.unravel_index([action], self._num_actions) + return tuple([idx[0] for idx in idxs]) + + def _build_payoff_query(self, + payoff_query: str, + msg: str, + player_str: str) -> str: + """Construct prompt for LLM to perform sentiment analysis. + + Args: + payoff_query: str, query to be formatted for llm + msg: str, message to be analyzed + player_str: str, player message is analyzed (scored) for + Returns: + str: str, payoff prompt to feed to LLM + """ + payoff_dict = {'m': msg, 'p': player_str} + return payoff_query.format(**payoff_dict) + + def _llm_is_terminal(self) -> bool: + ct.set_color(logging_utils.RED) + prefix = self.get_game().llm_termination_prompt.obs_trans_prefix + postfix = self.get_game().llm_termination_prompt.obs_trans_postfix + if prefix or postfix: + prompt = prefix + self.dialogue_str + postfix + term_obs = self.get_game().generate_response(prompt, + seed=DEFAULT_LLM_SEED) + logging.info(ct.color('LLM summary:\n%s'), term_obs) + else: + term_obs = self.dialogue_str + llm_termination = self.get_game().generate_bool( + self.get_game().llm_termination_prompt.query.format(msg=term_obs), + seed=DEFAULT_LLM_SEED) + logging.info(ct.color('LLM termination condition met? %s'), + str(llm_termination)) + return llm_termination + + def _names_from_validated_receiver(self, receiver: int, speaker: int + ) -> Tuple[Tuple[str, str, str], int]: + """Modify receiver if sending to self. Then return names of all roles. + + Args: + receiver: integer action indicating receiver to send message to + speaker: integer representing current message sender + Returns: + names: tuple of strings, (speaker_name, receiver_name, others_names) + receiver: integer representing validated receiver + """ + if (receiver >= self.get_game().num_players() + or speaker >= self.get_game().num_players()): + logging.info('Warning: rolling receiver/speaker to valid id.') + receiver = receiver % self.get_game().num_players() + speaker = speaker % self.get_game().num_players() + # overwrite speaking to self as speaking to all in header + receiver_name = '' + if receiver == speaker: + if len(self._names) > 2: + receiver_name = ALL_PLAYERS + receiver = -1 + else: + receiver = (receiver + 1) % self.get_game().num_players() + speaker_name = '' + others = [] + for idx, name in enumerate(self._names): + if idx == speaker: + speaker_name = name + elif idx == receiver: + receiver_name = name + elif receiver > -1: + others.append(name) + others_names = ', '.join(others) + names = (speaker_name, receiver_name, others_names) + return names, receiver + + def _legal_actions(self, player: int) -> List[int]: + """Returns a list of legal actions, sorted in ascending order.""" + assert player >= 0 + return list(range(int(np.prod(self._num_actions)))) + + def _apply_action(self, action: int): + """Reply to dialogue (for agents). + + Unravel action into to tuple (who to speak to, seed to use, etc.). Then + simulate action. + + Args: + action: int + """ + if self.is_chance_node(): + if self._game_setup: + # action is an index into the list of seeds + # use this to write the message for the previous player + seed = self._llm_seeds[action] + assert self._player_action is not None + self._player_action = self._player_action or 0 + self._played_actions.append(self._player_action) + speaker_msg = self.action_to_msg(action=self._player_action, seed=seed) + self._apply_msg(speaker_msg) + if self.get_game().llm_termination_prompt: + self._llm_termination = self._llm_is_terminal() + else: + self._setup_game(self._init_state_configs[action]) + self._game_setup = True + else: + # record the action and save it to be played at chance node + self._player_action = action + self._current_speaker = int(self._current_player) + self._num_actions_played += 1 + + def _apply_msg(self, speaker_msg: str): + """Update dialogue history, increment curr player, and update is_terminal. + + Args: + speaker_msg: str + """ + logging.info('Speaker message:\n%s', speaker_msg) + self._dialogue.append(speaker_msg) + self._speakers.append(self._current_player) + + # increment the current player + self._current_player = ( + self._current_player + 1 + ) % self.get_game().num_players() + + self._player_action = None + if self.get_game().llm_termination_prompt: + self._llm_termination = self._llm_is_terminal() + + def apply_msg(self, speaker_msg: str): + """Reply to dialogue (for human players and interventions). + + Args: + speaker_msg: str + """ + self._num_actions_played += 1 + self._played_actions.append(-1) # assign -1 for human messages + self._apply_msg(speaker_msg) + + def action_to_prompt(self, + action: int, + seed: int, + header: header_utils.Header + ) -> Tuple[str, str]: + """Unravel action int to multidimensional action tuple and construct prompt. + + Args: + action: int, the action taken in the game + seed: int, llm seed + header: header_utils.Header, used to format a prompt + Returns: + prompt: str, formatted prompt to feed the LLM to generate a new message + header_plain: str, the formatted header without any private info / actions + """ + speaker = int(self._current_speaker) + action_dict = self.unravel_flat_action_to_dict(speaker, action) + receiver = action_dict['receiver'] + opts = {**action_dict['action'], **action_dict['info']} + + names, _ = self._names_from_validated_receiver(receiver, speaker) + speaker_name, receiver_name, others_names = names + header_plain = header.plain.format(sender=speaker_name, + receiver=receiver_name, + others=others_names) + + header_w_opts = header.w_opts.format(sender=speaker_name, + receiver=receiver_name, + others=others_names, + **opts) + # provide header with opts to llm for response + logging.info('Generating message (speaker=%d:%s)...', + speaker, + speaker_name) + + prompt = header.context + '\n\n' + self.dialogue_str + header_w_opts + + return prompt, header_plain + + def action_to_msg(self, action: int, seed: int) -> str: + """Unravel action int to multidimensional action tuple and construct msg. + + Args: + action: int, the action taken in the game + seed: int, llm seed + Returns: + speaker_msg: str + """ + header = self.get_game().header + prompt, header_plain = self.action_to_prompt(action, seed, header) + logging.info('LLM prompt:\n%s', prompt) + + response = self.get_game().generate_response( + prompt=prompt, + seed=seed, + num_output_tokens=LLM_LENGTH_MESSAGE_TOKENS + ) + response = response[:LLM_LENGTH_MESSAGE_CHARS] + logging.info('LLM response:\n%s', response) + + first_special_char = text.first_special_char( + response, len(response), self.get_game().header.special_chars) + speaker_msg = header_plain + response[:first_special_char] + + return speaker_msg + + def unravel_flat_action_to_dict(self, speaker: int, action: int + ) -> Dict[str, Any]: + receiver, *extra_action_idxs = self._unravel_flat_action(action) + + extra_action_strs = [pa[i] for i, pa in zip(extra_action_idxs, + self._prompt_actions.values())] + action_dict = dict(zip(self.get_game().header.action_keys, + extra_action_strs)) + + extra_info_strs = [ + pi[speaker] for pi in self._private_info.values() + ] + info_dict = dict(zip(self.get_game().header.info_keys, extra_info_strs)) + + return {'receiver': receiver, + 'info': info_dict, + 'action': action_dict} + + def compute_rewards(self, dialogue: str) -> np.ndarray: + """Compute rewards for each player from a given dialogue string. + + Args: + dialogue: str, a single string with the entire dialogue thus far + Returns: + rewards: np.ndarray, len-num_players vector of floats + """ + ct.set_color(logging_utils.GREEN) + + rewards = np.zeros(self.get_game().num_players(), dtype=float) + + if (not self.is_terminal() and + self.get_game().reward_type == pyspiel.GameType.RewardModel.TERMINAL): + return rewards + + # gather private info to compute true underlying rewards + info_prefix = [] + for player, name in enumerate(self._names): + extra_info_strs = [pi[player] for pi in self._private_info.values()] + info_prefix_p = [ + f'{k}:\n{v}' for k, v in zip(self.get_game().header.info_keys, + extra_info_strs) + ] + info_prefix_p = name + '\n' + '\n'.join(info_prefix_p) + info_prefix.append(info_prefix_p) + info_prefix = '\n\n'.join(info_prefix) + + # compute rewards + for player, name in enumerate(self._names): + player_payoffs = [] + for p, payoff in enumerate(self.get_game().payoffs): + if payoff.obs_trans_prefix or payoff.obs_trans_postfix: + payoff_obs_prompt = (payoff.obs_trans_prefix + + dialogue + + payoff.obs_trans_postfix) + logging.info(ct.color('Scoring payoff (speaker=%d:%s)...'), + player, name) + logging.info(ct.color('LLM prompt:\n%s'), payoff_obs_prompt) + response = self.get_game().generate_response( + prompt=payoff_obs_prompt, + seed=DEFAULT_LLM_SEED, + num_output_tokens=LLM_LENGTH_PAYOFF_OBS_TOKENS + ) + payoff_obs = response[:LLM_LENGTH_PAYOFF_OBS_CHARS] + else: + payoff_obs = dialogue + payoff_obs = info_prefix + '\n\n' + payoff_obs + query = self._build_payoff_query(payoff.query, payoff_obs, name) + logging.info(ct.color('Calculating payoff %d (player=%d:%s)...'), + p, player, name) + logging.info(ct.color('LLM prompt:\n%s'), query) + response = self.get_game().generate_response( + prompt=query, + seed=DEFAULT_LLM_SEED, + num_output_tokens=LLM_LENGTH_SCORE_TOKENS + ) + logging.info(ct.color('LLM response:\n%s'), response) + + logging.info(ct.color('Extracting payoff %d (player=%d:%s)...'), + p, player, name) + query = (f'Extract out the final value for {name} as a single ' + + 'numeric value from the following payoff valuation. Do ' + + 'NOT show your work:\n\n' + + f'{response}\n\nResult: ') + logging.info(ct.color('LLM prompt:\n%s'), query) + response = self.get_game().generate_response( + prompt=query, + seed=DEFAULT_LLM_SEED, + num_output_tokens=LLM_LENGTH_SCORE_TOKENS + ) + logging.info(ct.color('LLM response:\n%s'), response) + + player_payoff = 0 # payoff defaults to 0 if LLM parsing fails + if text.retrieve_numeric_block(response): + player_payoff = int(text.retrieve_numeric_block(response)) + player_payoff = min(max(player_payoff, payoff.min), payoff.max) + else: + logging.warning( + ct.color('Payoff extraction from response failed:\n\n%s.'), + response) + logging.info(ct.color('Extracted integer payoff (%s): %d'), + name, player_payoff) + player_payoffs.append(player_payoff) + rewards[player] = self.get_game().aggregate_payoffs(player_payoffs) + + ct.reset() + + return rewards.astype(float) + + def current_player(self) -> int: + """Returns id of the next player to move, or TERMINAL if game is over.""" + if self.is_terminal(): + return pyspiel.PlayerId.TERMINAL + elif (self._player_action is not None or # if int, LLM msg is to be sampled + not self._game_setup): + return pyspiel.PlayerId.CHANCE + else: + return self._current_player + + def is_terminal(self) -> bool: + """Returns True if the game is over.""" + if ((self._num_actions_played < self.get_game().max_game_length()) + and not self._llm_termination): + return False + else: + return True + + def chance_outcomes(self): + """Returns the possible chance outcomes and their probabilities.""" + assert self.is_chance_node() + if self._game_setup: + outcomes = range(self.get_game().num_llm_seeds) + else: + outcomes = range(self.get_game().num_init_states) + p = 1.0 / len(outcomes) + return [(o, p) for o in outcomes] + + def _action_to_string(self, player, action): + """Action -> string.""" + if player == pyspiel.PlayerId.CHANCE: + if self._game_setup: + return f'Sampled LLM seed: {action}' + else: + return f'Sampled init state: {action}' + else: + action_unraveled = self.unravel_flat_action_to_dict(player, action) + action_dict = action_unraveled['action'] + return f'Action:\nint: {action}\ndict: {action_dict}' + + def returns(self) -> np.ndarray: + """Total reward for each player over the course of the game so far.""" + if not self.is_terminal(): + return np.zeros(self.get_game().num_players(), dtype=float) + else: + if self._returns is None: + self._returns = self.compute_rewards(self.dialogue_str) + return self._returns + + @property + def dialogue(self) -> List[str]: + return self._dialogue + + @property + def dialogue_str(self) -> str: + return ''.join(self._dialogue) + + @property + def private_info(self) -> Dict[str, List[str]]: + return self._private_info + + @property + def header(self) -> header_utils.Header: + return self.get_game().header + + @property + def vectorize(self) -> ...: + return self.get_game().vectorize + + @property + def obs(self) -> List[observation_utils.Observation]: + return self.get_game().obs + + @property + def names(self) -> List[str]: + """Returns list of str.""" + return self._names + + @property + def speakers(self) -> List[int]: + return self._speakers + + @property + def played_actions(self) -> List[int]: + return self._played_actions + + @property + def num_actions(self) -> Tuple[int, ...]: + return self._num_actions + + @property + def prompt_actions(self) -> OrderedDict[str, List[str]]: + return self._prompt_actions + + +class ChatGameObserverBase: + """Observer, conforming to the PyObserver interface (see observation.py).""" + + def __init__(self, + iig_obs_type: pyspiel.IIGObservationType, + params: Union[Dict[str, Any], None]): + """Initializes an empty observation tensor. + + Args: + iig_obs_type: a pyspiel.IIGObservationType + params: unused + """ + if params: + raise ValueError(f'Observation parameters not supported; passed {params}') + + self.iig_obs_type = iig_obs_type + if self.iig_obs_type.perfect_recall: + self._str_to_info_state_built = self._build_str_to_info_state() + else: + self._str_to_info_state_built = False + + # Determine which observation pieces we want to include. + pieces = [('player_id', MAX_PLAYERS, (MAX_PLAYERS,))] + if iig_obs_type.private_info == pyspiel.PrivateInfoType.SINGLE_PLAYER: + if iig_obs_type.perfect_recall: + pieces.append(('private_info', + LLM_LENGTH_MESSAGE_CHARS, + (LLM_LENGTH_MESSAGE_CHARS,))) + else: + pieces.append(('private_info', VEC_SIZE, (VEC_SIZE,))) + if iig_obs_type.public_info: + if iig_obs_type.perfect_recall: + max_msgs = MAX_PLAYERS * MAX_NUM_REPLIES + pieces.append(('scenario_prompt', + LLM_LENGTH_MESSAGE_CHARS, + (LLM_LENGTH_MESSAGE_CHARS))) + pieces.append(('senders', + max_msgs * MAX_PLAYERS, + (max_msgs, MAX_PLAYERS))) + pieces.append(('receivers', + max_msgs * MAX_PLAYERS, + (max_msgs, MAX_PLAYERS))) + # record prompt actions as lossless tokenization since we do not know + # how many actions a game will be defined with. alternatively, we could + # record the action integer and require the user to unravel the integer + # on the policy network side. for now, we assume the prompt action is at + # most LLM_LENGTH_MESSAGE_CHARS subwords. we also assume everyone can + # see everyone's actions. + pieces.append(('prompt_actions', + max_msgs * LLM_LENGTH_MESSAGE_CHARS, + (max_msgs, LLM_LENGTH_MESSAGE_CHARS))) + pieces.append(('messages', + max_msgs * LLM_LENGTH_MESSAGE_CHARS, + (max_msgs, LLM_LENGTH_MESSAGE_CHARS))) + else: + pieces.append(('dialogue', VEC_SIZE, (VEC_SIZE,))) + + # Build the single flat tensor. + total_size = sum(size for _, size, _ in pieces) + self.tensor = np.zeros(total_size, np.float32) + + # Build the named & reshaped views of the bits of the flat tensor. + self.dict = {} + index = 0 + for name, size, shape in pieces: + self.dict[name] = self.tensor[index:index + size].reshape(shape) + index += size + + def _build_str_to_info_state(self) -> bool: + """Initializes map from str to infostate. Returns True if successful.""" + # Build a string tokenizer here + # --------------------------- # + # Build a string tokenizer here + return True + + def _info_state(self, input_text: str, obs_size: int) -> np.ndarray: + """Returns a len-obs_size np.ndarray given an input string and obs_size.""" + if not self._str_to_info_state_built: + raise ValueError('String to info state mapping not built!') + del input_text + # Vectorize a str (ideally lossless for info state) using a tokenizer here + # ---------------------------------------------------------------------- # + # Vectorize a str (ideally lossless for info state) using a tokenizer here + return np.zeros(obs_size, dtype=np.int32) + + def set_from(self, state: ChatGameState, player: int): + """Updates `tensor` and `dict` to reflect `state` from PoV of `player`.""" + ct.set_color(logging_utils.PURPLE) + + self.tensor.fill(0) + self.dict['player_id'][player] = 1 + + extra_info_strs = [pi[player] for pi in state.private_info.values()] + info_prefix = [ + f'{k}:\n{v}' for k, v in zip(state.header.info_keys, extra_info_strs) + ] + info_prefix = '\n'.join(info_prefix) + if 'private_info' in self.dict: + if self.iig_obs_type.perfect_recall: + private_info = self._info_state(info_prefix, LLM_LENGTH_MESSAGE_CHARS) + else: + private_info = state.vectorize(info_prefix, VEC_SIZE) + self.dict['private_info'] = private_info + + if self.iig_obs_type.public_info and self.iig_obs_type.perfect_recall: + self.dict['scenario_prompt'] = self._info_state(state.dialogue[0], + LLM_LENGTH_MESSAGE_CHARS) + for i, (speaker, played_action) in enumerate(zip(state.speakers, + state.played_actions)): + self.dict['senders'][i][speaker] = 1 + if played_action >= 0: # played_action = -1 indicates human player + action_dict = state.unravel_flat_action_to_dict(speaker, + played_action) + self.dict['receivers'][i][action_dict['receiver']] = 1 + pa = action_dict['action'] + action_str = '\n'.join([f'{k}: {v}' for k, v in pa.items()]) + self.dict['prompt_actions'][i] = self._info_state( + action_str, LLM_LENGTH_MESSAGE_CHARS) + + self.dict['messages'][i] = self._info_state(state.dialogue[i + 1], + LLM_LENGTH_MESSAGE_CHARS) + + if 'dialogue' in self.dict: + obs_prompt = (state.obs[player].obs_trans_prefix + + state.dialogue_str + + state.obs[player].obs_trans_postfix) + logging.info(ct.color('Generating observation (speaker=%d:%s)...'), + player, + state.names[player]) + logging.info(ct.color('LLM prompt:\n%s'), obs_prompt) + response = state.get_game().generate_response( + prompt=obs_prompt, + seed=DEFAULT_LLM_SEED, + num_output_tokens=LLM_LENGTH_OBS_TOKENS + ) + logging.info(ct.color('LLM response:\n%s'), response) + obs = response[:LLM_LENGTH_OBS_CHARS] + + obs = info_prefix + '\n' + obs + + logging.info(ct.color('Observation (speaker=%d:%s):\n%s'), + player, + state.names[player], + obs) + logging.info(ct.color('Vectorizing observation...')) + observation = state.vectorize(obs, VEC_SIZE) + logging.info(ct.color('Vectorized observation (speaker=%d:%s):\n%s'), + player, + state.names[player], + observation) + self.dict['dialogue'] = observation + + ct.reset() + + def string_from(self, state: ChatGameState, player: int) -> str: + """Observation of `state` from the PoV of `player`, as a string.""" + ct.set_color(logging_utils.PURPLE) + + extra_info_strs = [pi[player] for pi in state.private_info.values()] + info_prefix = [ + f'{k}:\n{v}' for k, v in zip(state.header.info_keys, extra_info_strs) + ] + info_prefix = '\n'.join(info_prefix) + + if self.iig_obs_type.perfect_recall: + return info_prefix + '\n\nFull Dialogue\n\n' + state.dialogue_str + else: + obs_prompt = (state.obs[player].obs_trans_prefix + + state.dialogue_str + + state.obs[player].obs_trans_postfix) + logging.info(ct.color('Generating observation (speaker=%d:%s)...'), + player, + state.names[player]) + logging.info(ct.color('LLM prompt:\n%s'), obs_prompt) + response = state.get_game().generate_response( + prompt=obs_prompt, + seed=DEFAULT_LLM_SEED, + num_output_tokens=LLM_LENGTH_OBS_TOKENS + ) + logging.info(ct.color('LLM response:\n%s'), response) + obs = response[:LLM_LENGTH_OBS_CHARS] + + obs = info_prefix + '\n' + obs + + obs_str = 'Observation (speaker={:d}:{:s}):\n{:s}'.format( + player, state.names[player], obs) + + ct.reset() + + return obs_str + + +class BaseChatGame(pyspiel.Game): + """Base Chat game.""" + + # pylint:disable=dangerous-default-value + def __init__( + self, + params: Dict[str, Any] = DEFAULT_PARAMS, + ): + """Constructor. + + BaseChatGame is meant to be inherited from. Do not call its init directly. + + Args: + params: dict, parameter dict with the following keys + + num_distinct_actions- int, # of actions at each info set + num_llm_seeds- int, # of seeds to use for generating LLM response + num_init_states- int, # of game setups / configurations / scenarios + num_players- int, # of speakers (action: recipient) on the message chain + players- int, # of speakers (action: recipient) on the message chain + OPTIONAL. ONLY USED FOR INTERNAL OPEN_SPIEL TESTING! + min_utility- float, minimum utility any player can attain + max_utility- float, maximum utility any player can attain + num_max_replies- int, total # of messages each player can send in an + episode + """ + if 'silence_logging' in params and params['silence_logging']: + logging.set_verbosity(logging.ERROR) # silence internal game logging + self._num_distinct_actions = params['num_distinct_actions'] + if params['players'] > 0: + logging.warning('Only meant for open_spiel testing!') + num_players = params['players'] + self._num_players = num_players + else: + self._num_players = params['num_players'] + self._num_llm_seeds = params['num_llm_seeds'] + self._num_init_states = params['num_init_states'] + self._min_utility = params['min_utility'] + self._max_utility = params['max_utility'] + self._num_max_replies = params['num_max_replies'] + if params['num_max_replies'] > MAX_NUM_REPLIES: + raise ValueError( + f'num_max_replies ({self._num_max_replies}) exceeds ' + + f'MAX_NUM_REPLIES ({MAX_NUM_REPLIES})') + + self._max_game_length = self._num_max_replies * self._num_players + + self._game_info = pyspiel.GameInfo( + num_distinct_actions=self._num_distinct_actions, + max_chance_outcomes=max(self._num_llm_seeds, self._num_init_states), + num_players=self._num_players, + min_utility=self._min_utility, + max_utility=self._max_utility, + max_game_length=self._max_game_length) + + def _load_chat_game(self, + observations: List[observation_utils.Observation], + vectorize: ..., + header: header_utils.Header, + payoffs: List[payoff_utils.Payoff], + aggregate_payoffs: Callable[[List[int]], float] = np.mean, + given_names: Union[List[str], None] = None, + given_llm_seeds: Union[List[int], None] = None, + given_prompt_actions: Union[OrderedDict[str, List[str]], + None] = None, + given_private_info: Union[OrderedDict[str, List[str]], + None] = None, + initial_scenario: Union[Any, None] = None, + num_names: int = 2, + num_prompt_actions: Tuple[int, ...] = (4,), + num_private_info: Tuple[int, ...] = (4,), + examples_names: Union[List[str], None] = None, + examples_prompt_actions: Union[OrderedDict[str, + List[str]], + None] = None, + examples_private_info: Union[OrderedDict[str, List[str]], + None] = None, + examples_scenarios: Union[List[Any], None] = None, + llm_list_suffix: str = 'Continue the list from here.', + llm_termination_prompt: Union[term_utils.Termination, + None] = None, + seed: Union[int, None] = None + ): + """Constructor. + + Args: + observations: List of Observation items used for prompting llms to extract + observations (string features) from dialogues + vectorize: converts any length string into a length obs_size vector + + header: List of Header items used for prompting llms to take actions + (construct messages) based on latent action variables and private + information + + payoffs: list of Payoff items used for constructing queries and scoring + dialogue for each agent + aggregate_payoffs: function that maps from vector to nonnegative scalar + + given_names: list of strings representing names of players + given_llm_seeds: list of ints to seed llm with to generate each message + given_prompt_actions: ordered dict mapping action_keys + (see envs/utils/header) to list of strings representing the set of + available prompt actions (e.g., personalities or msg tones). Overrides + examples_prompt_actions. + given_private_info: ordered dict mapping info_keys + (see envs/utils/header) to length-[num_players] list of strings + representing the private information available to each player (e.g., + inventory / valuations of fruits). Overrides examples_private_info. + initial_scenario: Scenario item representing an initial message + + num_names: int, # of names to generate (can be greater than # of players) + num_prompt_actions: tuple of int, # of prompts to consider for each + action_key (i.e., size of action space for each prompt action) + num_private_info: tuple of int, # of private info states to consider for + each info_key + + examples_names: list of strings representing examples of names of players + examples_prompt_actions: ordered dict mapping action_keys + (see envs/utils/header) to list of strings representing examples of + prompt actions (e.g., personalities or msg tones). + examples_private_info: ordered dict mapping info_keys + (see envs/utils/header) to list of strings representing examples of + private information available to players (e.g., inventory / valuations + of fruits). Overrides examples_private_info. + examples_scenarios: list of Scenario items used for meta-generating new + scenarios + + llm_list_suffix: str, gets appended to a prompt to induce an llm to + generate a list of items (different llms like different prompts). + chinchilla likes ``, llmit likes `Continue the list from here.` + llm_termination_prompt: Termination item w/ [attrs query, + obs_trans_postfix, postfix]. llm will be asked to score a binary + response `yes`/`no` given query.format(msg=last_msg) to determine + whether the episode has reached a terminal state (e.g., deal has been + agreed upon). default is empty string in which case llm terminal + condition is left unused and episode terminates after + num_players * num_max_replies + + seed: int, master seed for experiment (used to generate all subsequent + seeds for any random generation) + """ + self._obs = observations + self._vectorize = vectorize + + self._header = header + + self._payoffs = payoffs + self._aggregate_payoffs = aggregate_payoffs + self._max_score = aggregate_payoffs([p.max for p in payoffs]) + self._reward_type = REWARD_MODEL + + self._given_names = given_names + self._given_llm_seeds = given_llm_seeds + self._given_prompt_actions = given_prompt_actions + self._given_private_info = given_private_info + self._initial_scenario = initial_scenario + + self._num_names = max(num_names, self._num_players) + self._num_prompt_actions = num_prompt_actions + self._num_private_info = num_private_info + + self._examples_names = examples_names + self._examples_prompt_actions = examples_prompt_actions + self._examples_private_info = examples_private_info + self._examples_scenarios = examples_scenarios + + self._llm_list_suffix = llm_list_suffix + if llm_termination_prompt: + query = llm_termination_prompt.query + parsed = next(iter(string.Formatter().parse(query)), '') + if not parsed or parsed[1] != 'msg': + raise ValueError('Invalid llm_termination_prompt: ' + + f'{query}. It must include a ' + + 'single formatting kwarg {msg}') + self._llm_termination_prompt = llm_termination_prompt + + self._seed = seed + self._rnd = np.random.RandomState(seed) + + if self._given_names: + if len(self._given_names) != self._num_players: + raise ValueError('Number of given_names does not match num_players!') + self._names = self._given_names + self._names_gen = False + else: + retrieve_name = text.retrieve_alpha_block + self._names = self.generate_prompts('name', + self._examples_names, + self._num_names, + retrieve_name) + logging.info(ct.color('Generated names:\n%s', logging_utils.YELLOW), + '\n'.join(self._names)) # pylint:disable=logging-too-many-args + if len(self._names) < self._num_players: + raise ValueError(f'Generated too few names! {len(self._names)} < ' + + f'{self._num_players}.') + self._names_gen = True + + if self._given_llm_seeds: + if len(self._given_llm_seeds) != self._num_llm_seeds: + raise ValueError('Number of given_llm_seeds does not match ' + + 'num_llm_seeds!') + self._llm_seeds = self._given_llm_seeds + self._llm_seeds_gen = False + else: + self._llm_seeds = list(self._rnd.randint(MIN_RND_SEED, MAX_RND_SEED, + size=self._num_llm_seeds)) + logging.info(ct.color('Generated action seeds:%s', logging_utils.YELLOW), + self._llm_seeds) # pylint:disable=logging-too-many-args + self._llm_seeds_gen = True + + # loop over every action key in header action keys + # if action key is in given prompt action, use it and overwrite + # else, generate it + def retrieve_prompt(llm_response: str) -> str: + useless_chars = (' ', '\n') + special_chars = ITEM_PREFIX + for char in useless_chars: + special_chars = special_chars.strip(char) + special_chars = tuple(special_chars) + return text.retrieve_special_char_block(llm_response, + special_chars=special_chars, + useless_chars=useless_chars) + + prompt_action_lists = [] + if not self._header.action_keys: + self._num_prompt_actions = tuple([]) + for i, action_key in enumerate(self._header.action_keys): + if (self._given_prompt_actions and + action_key in self._given_prompt_actions): + action_list = self._given_prompt_actions[action_key] + if len(action_list) != self._num_prompt_actions[i]: + logging.info(ct.color(f'Overwriting num_prompt_actions[{i}]=' + + f'{self._num_prompt_actions[i]} to reflect ' + + f'given len-{len(action_list)} prompt ' + + f'action list for action_key={action_key}.', + color=logging_utils.YELLOW)) + if isinstance(self._num_prompt_actions, tuple): + self._num_prompt_actions = list(self._num_prompt_actions) + self._num_prompt_actions[i] = len(action_list) + else: + examples = self._examples_prompt_actions[action_key] + action_list = self.generate_prompts(action_key, + examples, + self._num_prompt_actions[i], + retrieve_prompt) + logging.info(ct.color( + 'Generated prompt actions for action key = %s:\n%s', + color=logging_utils.YELLOW), + action_key, '\n-----\n'.join(action_list)) + prompt_action_lists.append(action_list) + self._prompt_actions = collections.OrderedDict(zip(self._header.action_keys, + prompt_action_lists)) + if isinstance(self._num_prompt_actions, list): + self._num_prompt_actions = tuple(self._num_prompt_actions) + + if (self._initial_scenario + and self._given_private_info + and tuple(self._given_private_info.keys()) != self._header.info_keys): + raise ValueError('Must define private info for each player if setting' + + ' an initial scenario.') + + private_info_lists = [] + if not self._header.info_keys: + self._num_private_info = tuple([]) + for i, info_key in enumerate(self._header.info_keys): + if self._given_private_info and info_key in self._given_private_info: + info_list = self._given_private_info[info_key] + if self._initial_scenario: + if len(info_list) < self._num_players: + raise ValueError('Must define at least a single private info for ' + + 'each player if setting an initial scenario. ' + + f'Num_players={self._num_players} but only given' + + f' len-{len(info_list)} private info list for ' + + f'info_key={info_key}.') + else: + info_list = info_list[:self._num_players] + if len(info_list) != self._num_private_info[i]: + logging.info(ct.color(f'Overwriting num_private_info[{i}]=' + + f'{self._num_private_info[i]} to reflect ' + + f'given len-{len(info_list)} private info ' + + f'list for info_key={info_key}.', + color=logging_utils.YELLOW)) + if isinstance(self._num_private_info, tuple): + self._num_private_info = list(self._num_private_info) + self._num_private_info[i] = len(info_list) + else: + examples = self._examples_private_info[info_key] + info_list = self.generate_prompts(info_key, + examples, + self._num_private_info[i], + retrieve_prompt) + logging.info(ct.color('Generated private info for info key = %s:\n%s', + color=logging_utils.YELLOW), + info_key, '\n-----\n'.join(info_list)) + private_info_lists.append(info_list) + self._private_info = collections.OrderedDict(zip(self._header.info_keys, + private_info_lists)) + if isinstance(self._num_private_info, list): + self._num_private_info = tuple(self._num_private_info) + + if self._examples_scenarios: + self._meta_query = self._build_meta_query(self._examples_scenarios) + else: + self._meta_query = None + + if self._initial_scenario: + valid = self._initial_scenario_is_valid(self._initial_scenario) + assert valid, ('Scenario does not match given game spec (names, actions' + + ', info, ...') + self._initial_scenario = self._initial_scenario + else: + self._initial_scenario = None + + self._num_actions = ( + self._num_players, + ) + tuple(self._num_prompt_actions) + + na = int(np.prod(self._num_actions)) + if na != self._num_distinct_actions: + raise ValueError(f'Size of prompt action space ({na}) does not match ' + + f'num_distinct_actions ({self._num_distinct_actions})!') + + self._initial_state_configs = self.new_initial_state_configs() + + def _generate_response(self, prompt: str, seed: int, + num_output_tokens: Union[int, None] = None) -> str: + """Returns LLM generated string given prompt and seed.""" + return '' + + def _generate_bool(self, prompt: str, seed: int) -> bool: + """Returns LLM generated boolean given prompt and seed.""" + return False + + def _build_meta_query(self, scenarios=List[Tuple]) -> str: + """Build prompt with several scenarios for generating new scenarios.""" + wrapped_scenarios = [] + for s in scenarios: + scenario_header_unformatted = self._header.w_opts + s.msg + s_asdict = dataclasses.asdict(s) + scenario_header = scenario_header_unformatted.format(**s_asdict, + others=ALL_PLAYERS) + wrapped_scenarios.append(scenario_header) + return ''.join(wrapped_scenarios) + + def _initial_scenario_is_valid(self, scenario: Any) -> bool: + """Check all components of scenario are well defined and return bool.""" + fields = list(scenario.__dataclass_fields__.keys()) + + req_fields = ['sender', 'receiver'] + list(self._header.action_keys) + req_fields += list(self._header.info_keys) + valid_fields = True + for req_field in req_fields: + valid_fields = (valid_fields and req_field in fields) + + if not valid_fields: + raise ValueError(f'Scenario must define required fields: {req_fields}. ' + + f'Found fields: {fields}') + + valid_players = (scenario.sender in self._names + and scenario.receiver in self._names + [ALL_PLAYERS]) + + scenario_dict = dataclasses.asdict(scenario) + + valid_actions = True + for key in self._header.action_keys: + valid_actions = (valid_actions and + key in scenario_dict and + scenario_dict[key] in self._prompt_actions[key]) + + valid_info = True + for key in self._header.info_keys: + # private_info[key][i] is unique to player i + # initial scenario contains player 0's private info and must match the + # first item in the list of private information provided + valid_info = (valid_info and + key in scenario_dict and + scenario_dict[key] == self._private_info[key][0]) + + valid = valid_players and valid_actions and valid_info + + return valid + + def generate_prompts(self, key, examples, num_prompts, + retrieve_prompt: Callable[[str], str]) -> List[str]: + """Generates a list of distinct prompts from an initial list. + + Args: + key: str, (descriptive) name of prompt type + examples: list of str, example prompts to seed llm + num_prompts: int, number of distinct prompts to generate + retrieve_prompt: function to retrieve example from string + + Returns: + prompts: list of strings + """ + ct.set_color(logging_utils.CYAN) + + answers = set() + num_gen = LLM_LIST_GEN_ATTEMPTS + prompt = ['#### INSTRUCTIONS #####', + 'Given a list of items from a given category, continue the list' + + ' and generate an additional item from the same category. The ' + + f'category is {key}s. Use `{ITEM_PREFIX}` to denote separate ' + + 'items. Do not include the list of items from the input prompt ' + + 'in your output response.'] + prompt = '\n'.join(text.wrap(prompt)) + '\n' + prompt += ('Input:\n' + ITEM_PREFIX + + ('\n' + ITEM_PREFIX).join(examples) + '\n' + + self._llm_list_suffix) + logging.info(ct.color('Generating list of distinct prompts...')) + logging.info(ct.color('Example prompt:\n%s'), prompt) + for seed in self._rnd.randint(MIN_RND_SEED, MAX_RND_SEED, size=num_gen): + logging.info(ct.color('Generating %s (seed=%s)'), key, seed) + response = self.generate_response( + prompt=prompt, + seed=seed, + num_output_tokens=LLM_LENGTH_LIST_OF_WORDS_TOKENS + ) + logging.info(ct.color('LLM response\n%s'), response) + answer = retrieve_prompt(response) + if answer and answer not in answers: + answers.add(answer) + if len(answers) >= num_prompts: + return list(answers) + num_distinct = len(answers) + if len(answers) < num_prompts: + logging.warning(ct.color( + 'Only %d distinct prompts generated for %d desired:\n%s.'), + num_distinct, num_prompts, answers) + + ct.reset() + + return list(answers) + + def generate_scenario(self) -> Tuple[List[str], + OrderedDict[str, List[str]], + Any]: + """Generates a new game config from examples. + + Returns: + given_names: list of str + given_private_info: OrderedDict(str: list of str) + initial_scenario(msg, sender, receiver, **private_info, **prompt_actions) + """ + player_names = self._rnd.choice(self._names, + size=self._num_players, + replace=False) + sender, receiver = player_names[:2] + if self._num_players > 2: + others = ', '.join(player_names[2:]) + else: + others = '' + + pa_lists = self._prompt_actions.values() + prompt_action_vals = [self._rnd.choice(pa_list) for pa_list in pa_lists] + prompt_actions_header = collections.OrderedDict(zip( + self._header.action_keys, prompt_action_vals)) + + pi_lists = self._private_info.values() + private_info_vals = [ + self._rnd.choice(pi_list, size=self._num_players) + for pi_list in pi_lists + ] + private_info = collections.OrderedDict(zip(self._header.info_keys, + private_info_vals)) + private_info_vals_player_0 = [piv[0] for piv in private_info_vals] + private_info_header = collections.OrderedDict(zip( + self._header.info_keys, private_info_vals_player_0)) + + opts = prompt_actions_header + opts.update(private_info_header) + + # scenarios are generated drawing from a fixed set of personalities + header = self._header.w_opts.format(sender=sender, + receiver=receiver, + others=others, + **opts) + + # generate a random scenario + # need to generate new scenario with specific players (i.e. names). Can + # 1) try to generate multiple scenarios at once and parse output + # 2) generate a single scenario by varying the LLM seed + # 3) can rely on the randomness in names and private info to induce new + # scenarios + # we are currently going with a mix of options 2) and 3) + logging.info('Generating initial scenario...') + logging.info('Scenario prompt:\n%s', self._meta_query + header) + response = self.generate_response( + prompt=self._meta_query + header, + seed=self._seed, + num_output_tokens=LLM_LENGTH_MESSAGE_TOKENS + ) + response = response[:LLM_LENGTH_MESSAGE_CHARS] + logging.info('LLM response:\n%s', response) + examples = [] + ptr = 0 + i = 0 + augmented_response = header + response + while ptr < len(augmented_response): + generated_example = self._header.strip_msg(augmented_response[ptr:], + sender) + if not generated_example: + break + ptr += len(generated_example) + generated_example = generated_example.strip('\n') + logging.info('*Generated Example %d:\n%s', i, generated_example) + i += 1 + examples.append(generated_example) + # grab first generated scenario + scenario_prompt = examples[0] + logging.info('Example 0 selected') + actions = collections.OrderedDict(zip(['player_names'], + [player_names])) + actions.update(self._prompt_actions) + + given_names = player_names + given_private_info = private_info + scenario_class = self._examples_scenarios[0].__class__ + initial_scenario = scenario_class(msg=scenario_prompt, + sender=sender, + receiver=receiver, + **opts) + + return (given_names, given_private_info, initial_scenario) + + def new_initial_state_configs(self) -> Tuple[InitialStateConfiguration, ...]: + """Generates a tuple of new dialogue game(s). + + Returns: + Tuple of InitialStateConfiguration(s) + """ + raw_setups = [] + if self._initial_scenario: + raw_setups.append( + (self._names, self._private_info, self._initial_scenario)) + else: + for _ in range(self._num_init_states): + raw_setups.append(self.generate_scenario()) + + initial_state_configs = [] + for raw_setup in raw_setups: + names, private_info, scenario = raw_setup + scenario_prompt_unformatted = self._header.plain + scenario.msg + scenario_prompt = scenario_prompt_unformatted.format( + sender=scenario.sender, + receiver=scenario.receiver, + others=ALL_PLAYERS) + actions = collections.OrderedDict(zip(['player_names'], [names])) + actions.update(self._prompt_actions) + initial_state_config = InitialStateConfiguration( + actions=actions, + seeds=self._llm_seeds, + scenario_prompt=scenario_prompt, + private_info=private_info + ) + initial_state_configs.append(initial_state_config) + + return tuple(initial_state_configs) + + @property + def game_info(self) -> pyspiel.GameInfo: + return self._game_info + + @property + def obs(self) -> List[observation_utils.Observation]: + return self._obs + + @property + def vectorize(self) -> Any: + return self._vectorize + + @property + def header(self) -> header_utils.Header: + return self._header + + @property + def payoffs(self) -> List[payoff_utils.Payoff]: + return self._payoffs + + @property + def aggregate_payoffs(self) -> Callable[[List[int]], float]: + return self._aggregate_payoffs + + @property + def reward_type(self) -> pyspiel.GameType.RewardModel: + return self._reward_type + + @property + def rnd(self) -> np.random.RandomState: + return self._rnd + + @property + def llm_termination_prompt(self) -> Union[term_utils.Termination, None]: + return self._llm_termination_prompt + + @property + def llm_seeds(self) -> List[int]: + return self._llm_seeds + + @property + def num_llm_seeds(self) -> int: + return self._num_llm_seeds + + @property + def num_init_states(self) -> int: + return self._num_init_states + + @property + def given_prompt_actions(self) -> Union[OrderedDict[str, List[str]], None]: + return self._given_prompt_actions diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_debate.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_debate.py new file mode 100644 index 0000000..757b7c4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_debate.py @@ -0,0 +1,94 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A pyspiel config for a debate with randomly named debaters. +""" + +import collections + +from ml_collections import config_dict + +from open_spiel.python.games.chat_games.envs.base_envs import debate_with_style_info as env_debate_with_style_info +from open_spiel.python.games.chat_games.envs.observations import summary_debate +from open_spiel.python.games.chat_games.envs.observations import utils as obs_utils +from open_spiel.python.games.chat_games.envs.payoffs import debate as payoffs_debate +from open_spiel.python.games.chat_games.envs.scenarios.actions import arguments +from open_spiel.python.games.chat_games.envs.scenarios.domains import debate as scenario_debate +from open_spiel.python.games.chat_games.envs.scenarios.players import names as names_debate + + +def get_config(): + """Get configuration for chat game.""" + config = config_dict.ConfigDict() + + num_players = 2 + + observations = [ + obs_utils.Observation(summary_debate.PREFIX, summary_debate.POSTFIX) + for _ in range(num_players) + ] + + header = env_debate_with_style_info.HEADER + + payoffs = [payoffs_debate.PAYOFF] + + examples_names = names_debate.NAMES + + given_prompt_actions = collections.OrderedDict() + given_prompt_actions[header.action_keys[0]] = arguments.STYLES + ['any'] + num_styles = len(arguments.STYLES) + 1 + + given_private_info = collections.OrderedDict() + given_private_info['info'] = ['Argue for the topic statement.', + 'Argue against the topic statement.'] + given_private_info['topic'] = [scenario_debate.TOPIC_B, + scenario_debate.TOPIC_B] + + scenario_a = env_debate_with_style_info.Scenario( + '', + 'Bob', + 'Alice', + 'logos', + scenario_debate.TOPIC_B, + 'Argue for the topic statement.') + + examples_scenarios = [scenario_a] + + llm_termination_prompt = scenario_debate.LLM_TERMINATION_PROMPT + + params = {'num_distinct_actions': num_players * num_styles, + 'num_llm_seeds': 2, + 'num_init_states': 3, + 'num_players': num_players, + 'min_utility': min([float(p.min) for p in payoffs]), + 'max_utility': max([float(p.max) for p in payoffs]), + 'num_max_replies': 1, + 'silence_logging': True} + + config.params = params + + config.game = config_dict.ConfigDict() + config.game.observations = observations + config.game.header = header + config.game.payoffs = payoffs + config.game.given_prompt_actions = given_prompt_actions + config.game.num_names = 10 + config.game.num_private_info = (2, 2) + config.game.examples_names = examples_names + config.game.given_private_info = given_private_info + config.game.examples_scenarios = examples_scenarios + config.game.llm_list_suffix = 'Output: ' + config.game.llm_termination_prompt = llm_termination_prompt + + return config diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_debate_fixed.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_debate_fixed.py new file mode 100644 index 0000000..b234e47 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_debate_fixed.py @@ -0,0 +1,87 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A pyspiel config for a fixed debate. +""" + +import collections + +from ml_collections import config_dict + +from open_spiel.python.games.chat_games.envs.base_envs import debate_with_style_info as env_debate_with_style_info +from open_spiel.python.games.chat_games.envs.observations import summary_debate +from open_spiel.python.games.chat_games.envs.observations import utils as obs_utils +from open_spiel.python.games.chat_games.envs.payoffs import debate as payoffs_debate +from open_spiel.python.games.chat_games.envs.scenarios.actions import arguments +from open_spiel.python.games.chat_games.envs.scenarios.domains import debate as scenario_debate + + +def get_config(): + """Get configuration for chat game.""" + config = config_dict.ConfigDict() + + num_players = 2 + + observations = [ + obs_utils.Observation(summary_debate.PREFIX, summary_debate.POSTFIX) + for _ in range(num_players) + ] + + header = env_debate_with_style_info.HEADER + + payoffs = [payoffs_debate.PAYOFF] + + given_prompt_actions = collections.OrderedDict() + given_prompt_actions[header.action_keys[0]] = arguments.STYLES + ['any'] + num_styles = len(arguments.STYLES) + 1 + + given_private_info = collections.OrderedDict() + given_private_info['info'] = ['Argue for the topic statement.', + 'Argue against the topic statement.'] + given_private_info['topic'] = [scenario_debate.TOPIC_B, + scenario_debate.TOPIC_B] + + initial_scenario = env_debate_with_style_info.Scenario( + '', + 'Bob', + 'Alice', + 'logos', + scenario_debate.TOPIC_B, + 'Argue for the topic statement.') + + llm_termination_prompt = scenario_debate.LLM_TERMINATION_PROMPT + + params = {'num_distinct_actions': num_players * num_styles, + 'num_llm_seeds': 2, + 'num_init_states': 1, + 'num_players': num_players, + 'min_utility': min([float(p.min) for p in payoffs]), + 'max_utility': max([float(p.max) for p in payoffs]), + 'num_max_replies': 1} + + config.params = params + + config.game = config_dict.ConfigDict() + config.game.observations = observations + config.game.header = header + config.game.payoffs = payoffs + config.game.given_prompt_actions = given_prompt_actions + config.game.num_private_info = (2, 2) + config.game.given_names = ['Bob', 'Alice'] + config.game.given_private_info = given_private_info + config.game.initial_scenario = initial_scenario + config.game.llm_list_suffix = 'Output: ' + config.game.llm_termination_prompt = llm_termination_prompt + + return config diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_fixed_mock.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_fixed_mock.py new file mode 100644 index 0000000..31c08ea --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_fixed_mock.py @@ -0,0 +1,88 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A dm_env config for testing a given fixed game with prompt actions. +""" + +import collections + +from ml_collections import config_dict + +from open_spiel.python.games.chat_games.envs.base_envs import email_with_tone +from open_spiel.python.games.chat_games.envs.observations import utils as obs_utils +from open_spiel.python.games.chat_games.envs.payoffs import sentiment +from open_spiel.python.games.chat_games.envs.termination import utils as term_utils +from open_spiel.python.games.chat_games.envs.utils import text as text_utils + + +def get_config(): + """Get configuration for chat game.""" + config = config_dict.ConfigDict() + + observations = [obs_utils.Observation(), + obs_utils.Observation()] + + header = email_with_tone.HEADER + + payoffs = [sentiment.PAYOFF, + sentiment.PAYOFF] + + given_names = ['Bob', + 'Suzy'] + num_players = len(given_names) + + given_llm_seeds = [12345] + + given_prompt_actions = collections.OrderedDict() + tones = ['Happy', + 'Sad', + 'Angry', + 'Calm'] + given_prompt_actions[header.action_keys[0]] = tones + num_tones = len(tones) + + # Vacuous message + message = '\n\n'.join(text_utils.wrap( + ['Hi {receiver},', 'I hope you are well,', 'Best,', '{sender}'] + )) + initial_scenario = email_with_tone.Scenario(message, 'Bob', 'Suzy', 'Calm') + + query = ('Read the following message. Does it appear that ' + + 'the relevant parties have agreed on a deal? ' + + 'After reading the message, respond Yes or No. ' + + 'Here is the message:\n\n{msg}\n\n') + llm_termination_prompt = term_utils.Termination(query, '', '') + + params = {'num_distinct_actions': num_players * num_tones, + 'num_llm_seeds': 1, + 'num_init_states': 1, + 'num_players': num_players, + 'min_utility': min([float(p.min) for p in payoffs]), + 'max_utility': max([float(p.max) for p in payoffs]), + 'num_max_replies': 2} + + config.params = params + + config.game = config_dict.ConfigDict() + config.game.observations = observations + config.game.header = header + config.game.payoffs = payoffs + config.game.given_names = given_names + config.game.given_llm_seeds = given_llm_seeds + config.game.given_prompt_actions = given_prompt_actions + config.game.initial_scenario = initial_scenario + config.game.llm_list_suffix = 'Output: ' + config.game.llm_termination_prompt = llm_termination_prompt + + return config diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_rnd_mock.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_rnd_mock.py new file mode 100644 index 0000000..0dc39d4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_rnd_mock.py @@ -0,0 +1,89 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A mock pyspiel config for testing. Copy of original config_rwneg.py. +""" + +import collections + +from ml_collections import config_dict + +from open_spiel.python.games.chat_games.envs.base_envs import email_with_tone +from open_spiel.python.games.chat_games.envs.observations import summary +from open_spiel.python.games.chat_games.envs.observations import utils as obs_utils +from open_spiel.python.games.chat_games.envs.payoffs import sentiment +from open_spiel.python.games.chat_games.envs.scenarios.actions import tones +from open_spiel.python.games.chat_games.envs.scenarios.domains import real_world_negotiations as rwn +from open_spiel.python.games.chat_games.envs.scenarios.players import names +from open_spiel.python.games.chat_games.envs.termination import utils as term_utils + + +def get_config(): + """Get configuration for chat game.""" + config = config_dict.ConfigDict() + + num_players = 3 + + observations = [ + obs_utils.Observation(summary.PREFIX, summary.POSTFIX) + for _ in range(num_players) + ] + + scenario_a = email_with_tone.Scenario(rwn.SCENARIO_A, 'Alice', 'Bob') + scenario_b = email_with_tone.Scenario(rwn.SCENARIO_B, 'Joel', 'Gene') + scenario_c = email_with_tone.Scenario(rwn.SCENARIO_C, 'George', 'Jill') + examples_scenarios = [scenario_a, + scenario_b, + scenario_c] + + header = email_with_tone.HEADER + + payoffs = [sentiment.PAYOFF] + + examples_names = names.NAMES + + examples_prompt_actions = collections.OrderedDict() + examples_prompt_actions[header.action_keys[0]] = tones.TONES + num_tones = 3 + + query = ('Read the following message. Does it appear that ' + + 'the relevant parties have agreed on a deal? ' + + 'After reading the message, respond Yes or No. ' + + 'Here is the message:\n\n{msg}\n\n') + llm_termination_prompt = term_utils.Termination(query, '', '') + + params = {'num_distinct_actions': num_players * num_tones, + 'num_llm_seeds': 2, + 'num_init_states': 3, + 'num_players': num_players, + 'min_utility': min([float(p.min) for p in payoffs]), + 'max_utility': max([float(p.max) for p in payoffs]), + 'num_max_replies': 2} + + config.params = params + + config.game = config_dict.ConfigDict() + config.game.observations = observations + config.game.header = header + config.game.payoffs = payoffs + config.game.num_names = 10 + config.game.num_prompt_actions = (num_tones,) + config.game.num_private_info = (3,) + config.game.examples_names = examples_names + config.game.examples_prompt_actions = examples_prompt_actions + config.game.examples_scenarios = examples_scenarios + config.game.llm_list_suffix = 'Output: ' + config.game.llm_termination_prompt = llm_termination_prompt + + return config diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_rwneg.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_rwneg.py new file mode 100644 index 0000000..892e77b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_rwneg.py @@ -0,0 +1,89 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A pyspiel config for meta-generated real-world negotiation games. +""" + +import collections + +from ml_collections import config_dict + +from open_spiel.python.games.chat_games.envs.base_envs import email_with_tone +from open_spiel.python.games.chat_games.envs.observations import summary +from open_spiel.python.games.chat_games.envs.observations import utils as obs_utils +from open_spiel.python.games.chat_games.envs.payoffs import sentiment +from open_spiel.python.games.chat_games.envs.scenarios.actions import tones +from open_spiel.python.games.chat_games.envs.scenarios.domains import real_world_negotiations as rwn +from open_spiel.python.games.chat_games.envs.scenarios.players import names +from open_spiel.python.games.chat_games.envs.termination import utils as term_utils + + +def get_config(): + """Get configuration for chat game.""" + config = config_dict.ConfigDict() + + num_players = 3 + + observations = [ + obs_utils.Observation(summary.PREFIX, summary.POSTFIX) + for _ in range(num_players) + ] + + scenario_a = email_with_tone.Scenario(rwn.SCENARIO_A, 'Alice', 'Bob') + scenario_b = email_with_tone.Scenario(rwn.SCENARIO_B, 'Joel', 'Gene') + scenario_c = email_with_tone.Scenario(rwn.SCENARIO_C, 'George', 'Jill') + examples_scenarios = [scenario_a, + scenario_b, + scenario_c] + + header = email_with_tone.HEADER + + payoffs = [sentiment.PAYOFF] + + examples_names = names.NAMES + + examples_prompt_actions = collections.OrderedDict() + examples_prompt_actions[header.action_keys[0]] = tones.TONES + num_tones = 3 + + query = ('Read the following message. Does it appear that ' + + 'the relevant parties have agreed on a deal? ' + + 'After reading the message, respond Yes or No. ' + + 'Here is the message:\n\n{msg}\n\n') + llm_termination_prompt = term_utils.Termination(query, '', '') + + params = {'num_distinct_actions': num_players * num_tones, + 'num_llm_seeds': 2, + 'num_init_states': 3, + 'num_players': num_players, + 'min_utility': min([float(p.min) for p in payoffs]), + 'max_utility': max([float(p.max) for p in payoffs]), + 'num_max_replies': 2} + + config.params = params + + config.game = config_dict.ConfigDict() + config.game.observations = observations + config.game.header = header + config.game.payoffs = payoffs + config.game.num_names = 10 + config.game.num_prompt_actions = (num_tones,) + config.game.num_private_info = (3,) + config.game.examples_names = examples_names + config.game.examples_prompt_actions = examples_prompt_actions + config.game.examples_scenarios = examples_scenarios + config.game.llm_list_suffix = 'Output: ' + config.game.llm_termination_prompt = llm_termination_prompt + + return config diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_schedule_meeting.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_schedule_meeting.py new file mode 100644 index 0000000..efe7496 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_schedule_meeting.py @@ -0,0 +1,92 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A pyspiel config for meta-generated meeting schedule negotiation games. +""" + +import collections + +from ml_collections import config_dict + +from open_spiel.python.games.chat_games.envs.base_envs import schedule_meeting_with_info as env_schedule_meeting_with_info +from open_spiel.python.games.chat_games.envs.observations import summary +from open_spiel.python.games.chat_games.envs.observations import utils as obs_utils +from open_spiel.python.games.chat_games.envs.payoffs import schedule_meeting as payoffs_schedule_meeting +from open_spiel.python.games.chat_games.envs.scenarios.domains import schedule_meeting as scenario_schedule_meeting +from open_spiel.python.games.chat_games.envs.scenarios.players import names as names_schedule_meeting + + +def get_config(): + """Get configuration for chat game.""" + config = config_dict.ConfigDict() + + num_players = 2 + + observations = [ + obs_utils.Observation(summary.PREFIX, summary.POSTFIX) + for _ in range(num_players) + ] + + header = env_schedule_meeting_with_info.HEADER + + payoffs = [payoffs_schedule_meeting.PAYOFF] + + examples_names = names_schedule_meeting.NAMES + + examples_private_info = collections.OrderedDict() + examples_private_info['ooo_days'] = [scenario_schedule_meeting.OOO_A, + scenario_schedule_meeting.OOO_B] + examples_private_info['day_prefs'] = [scenario_schedule_meeting.DAY_PREFS_A, + scenario_schedule_meeting.DAY_PREFS_B] + + scenario_a = env_schedule_meeting_with_info.Scenario( + scenario_schedule_meeting.SCENARIO_A, + 'Bob', + 'Suzy', + scenario_schedule_meeting.OOO_A, + scenario_schedule_meeting.DAY_PREFS_A) + scenario_b = env_schedule_meeting_with_info.Scenario( + scenario_schedule_meeting.SCENARIO_B, + 'Jill', + 'George', + scenario_schedule_meeting.OOO_B, + scenario_schedule_meeting.DAY_PREFS_B) + + examples_scenarios = [scenario_a, scenario_b] + + llm_termination_prompt = scenario_schedule_meeting.LLM_TERMINATION_PROMPT + + params = {'num_distinct_actions': num_players, + 'num_llm_seeds': 2, + 'num_init_states': 3, + 'num_players': num_players, + 'min_utility': min([float(p.min) for p in payoffs]), + 'max_utility': max([float(p.max) for p in payoffs]), + 'num_max_replies': 3} + + config.params = params + + config.game = config_dict.ConfigDict() + config.game.observations = observations + config.game.header = header + config.game.payoffs = payoffs + config.game.num_names = 10 + config.game.num_private_info = (3, 3) + config.game.examples_names = examples_names + config.game.examples_private_info = examples_private_info + config.game.examples_scenarios = examples_scenarios + config.game.llm_list_suffix = 'Output: ' + config.game.llm_termination_prompt = llm_termination_prompt + + return config diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_schedule_meeting_w_dow.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_schedule_meeting_w_dow.py new file mode 100644 index 0000000..62723f2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_schedule_meeting_w_dow.py @@ -0,0 +1,108 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A pyspiel config for meta-generated meeting schedule negotiation games. +""" + +import collections + +from ml_collections import config_dict + +from open_spiel.python.games.chat_games.envs.base_envs import schedule_meeting_with_dow_info as env_schedule_meeting_with_dow_info +from open_spiel.python.games.chat_games.envs.observations import summary +from open_spiel.python.games.chat_games.envs.observations import utils as obs_utils +from open_spiel.python.games.chat_games.envs.payoffs import schedule_meeting as payoffs_schedule_meeting +from open_spiel.python.games.chat_games.envs.scenarios.domains import schedule_meeting as scenario_schedule_meeting +from open_spiel.python.games.chat_games.envs.scenarios.players import names as names_schedule_meeting + + +def get_config(): + """Get configuration for chat game.""" + config = config_dict.ConfigDict() + + num_players = 2 + + observations = [ + obs_utils.Observation(summary.PREFIX, summary.POSTFIX) + for _ in range(num_players) + ] + + header = env_schedule_meeting_with_dow_info.HEADER + + payoffs = [payoffs_schedule_meeting.PAYOFF] + + examples_names = names_schedule_meeting.NAMES + + given_prompt_actions = collections.OrderedDict() + days = ['Monday', + 'Tuesday', + 'Wednesday', + 'Thursday', + 'Friday', + 'Saturday', + 'Sunday'] + given_prompt_actions[header.action_keys[0]] = days + ['any'] + num_days = len(days) + 1 + + examples_private_info = collections.OrderedDict() + examples_private_info['ooo_days'] = [scenario_schedule_meeting.OOO_A, + scenario_schedule_meeting.OOO_B] + examples_private_info['day_prefs'] = [scenario_schedule_meeting.DAY_PREFS_A, + scenario_schedule_meeting.DAY_PREFS_B] + + scenario_a = env_schedule_meeting_with_dow_info.Scenario( + scenario_schedule_meeting.SCENARIO_A, + 'Bob', + 'Suzy', + scenario_schedule_meeting.OOO_A, + scenario_schedule_meeting.DAY_PREFS_A, + 'Thursday') + scenario_b = env_schedule_meeting_with_dow_info.Scenario( + scenario_schedule_meeting.SCENARIO_B, + 'Jill', + 'George', + scenario_schedule_meeting.OOO_B, + scenario_schedule_meeting.DAY_PREFS_B, + 'Friday') + + examples_scenarios = [scenario_a, scenario_b] + + llm_termination_prompt = scenario_schedule_meeting.LLM_TERMINATION_PROMPT + + params = {'num_distinct_actions': num_players * num_days, + 'num_llm_seeds': 2, + 'num_init_states': 3, + 'num_players': num_players, + 'min_utility': min([float(p.min) for p in payoffs]), + 'max_utility': max([float(p.max) for p in payoffs]), + 'num_max_replies': 1, + 'silence_logging': True} + + config.params = params + + config.game = config_dict.ConfigDict() + config.game.observations = observations + config.game.header = header + config.game.payoffs = payoffs + config.game.given_prompt_actions = given_prompt_actions + config.game.num_names = 10 + config.game.num_prompt_actions = (num_days,) + config.game.num_private_info = (3, 3) + config.game.examples_names = examples_names + config.game.examples_private_info = examples_private_info + config.game.examples_scenarios = examples_scenarios + config.game.llm_list_suffix = 'Output: ' + config.game.llm_termination_prompt = llm_termination_prompt + + return config diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_schedule_meeting_w_dow_fixed.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_schedule_meeting_w_dow_fixed.py new file mode 100644 index 0000000..f181df8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_schedule_meeting_w_dow_fixed.py @@ -0,0 +1,94 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A pyspiel config for meta-generated meeting schedule negotiation games. +""" + +import collections + +from ml_collections import config_dict + +from open_spiel.python.games.chat_games.envs.base_envs import schedule_meeting_with_dow_info as env_schedule_meeting_with_dow_info +from open_spiel.python.games.chat_games.envs.observations import summary +from open_spiel.python.games.chat_games.envs.observations import utils as obs_utils +from open_spiel.python.games.chat_games.envs.payoffs import schedule_meeting as payoffs_schedule_meeting +from open_spiel.python.games.chat_games.envs.scenarios.domains import schedule_meeting as scenario_schedule_meeting + + +def get_config(): + """Get configuration for chat game.""" + config = config_dict.ConfigDict() + + num_players = 2 + + observations = [ + obs_utils.Observation(summary.PREFIX, summary.POSTFIX) + for _ in range(num_players) + ] + + header = env_schedule_meeting_with_dow_info.HEADER + + payoffs = [payoffs_schedule_meeting.PAYOFF] + + given_prompt_actions = collections.OrderedDict() + days = ['Monday', + 'Tuesday', + 'Wednesday', + 'Thursday', + 'Friday', + 'Saturday', + 'Sunday'] + given_prompt_actions[header.action_keys[0]] = days + ['any'] + num_days = len(days) + 1 + + given_private_info = collections.OrderedDict() + given_private_info['day_prefs'] = [scenario_schedule_meeting.DAY_PREFS_A, + scenario_schedule_meeting.DAY_PREFS_B] + given_private_info['ooo_days'] = [scenario_schedule_meeting.OOO_A, + scenario_schedule_meeting.OOO_B] + + scenario_a = env_schedule_meeting_with_dow_info.Scenario( + scenario_schedule_meeting.SCENARIO_A, + 'Bob', + 'Suzy', + scenario_schedule_meeting.OOO_A, + scenario_schedule_meeting.DAY_PREFS_A, + 'Thursday') + + llm_termination_prompt = scenario_schedule_meeting.LLM_TERMINATION_PROMPT + + params = {'num_distinct_actions': num_players * num_days, + 'num_llm_seeds': 2, + 'num_init_states': 1, + 'num_players': num_players, + 'min_utility': min([float(p.min) for p in payoffs]), + 'max_utility': max([float(p.max) for p in payoffs]), + 'num_max_replies': 1, + 'silence_logging': True} + + config.params = params + + config.game = config_dict.ConfigDict() + config.game.observations = observations + config.game.header = header + config.game.payoffs = payoffs + config.game.given_prompt_actions = given_prompt_actions + config.game.num_private_info = (2, 2) + config.game.given_names = ['Bob', 'Suzy'] + config.game.given_private_info = given_private_info + config.game.initial_scenario = scenario_a + config.game.llm_list_suffix = 'Output: ' + config.game.llm_termination_prompt = llm_termination_prompt + + return config diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_schedule_meeting_w_tone.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_schedule_meeting_w_tone.py new file mode 100644 index 0000000..ec0ac7b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_schedule_meeting_w_tone.py @@ -0,0 +1,105 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A pyspiel config for meta-generated meeting schedule negotiation games. +""" + +import collections + +from ml_collections import config_dict + +from open_spiel.python.games.chat_games.envs.base_envs import schedule_meeting_with_tone_info as env_schedule_meeting_with_tone_info +from open_spiel.python.games.chat_games.envs.observations import summary +from open_spiel.python.games.chat_games.envs.observations import utils as obs_utils +from open_spiel.python.games.chat_games.envs.payoffs import schedule_meeting as payoffs_schedule_meeting +from open_spiel.python.games.chat_games.envs.scenarios.domains import schedule_meeting as scenario_schedule_meeting +from open_spiel.python.games.chat_games.envs.scenarios.players import names as names_schedule_meeting + + +def get_config(): + """Get configuration for chat game.""" + config = config_dict.ConfigDict() + + num_players = 2 + + observations = [ + obs_utils.Observation(summary.PREFIX, summary.POSTFIX) + for _ in range(num_players) + ] + + header = env_schedule_meeting_with_tone_info.HEADER + + payoffs = [payoffs_schedule_meeting.PAYOFF] + + examples_names = names_schedule_meeting.NAMES + + given_prompt_actions = collections.OrderedDict() + tones = ['calm', + 'assertive', + 'submissive', + 'any'] + given_prompt_actions[header.action_keys[0]] = tones + num_tones = len(tones) + + examples_private_info = collections.OrderedDict() + examples_private_info['ooo_days'] = [scenario_schedule_meeting.OOO_A, + scenario_schedule_meeting.OOO_B] + examples_private_info['day_prefs'] = [scenario_schedule_meeting.DAY_PREFS_A, + scenario_schedule_meeting.DAY_PREFS_B] + + scenario_a = env_schedule_meeting_with_tone_info.Scenario( + scenario_schedule_meeting.SCENARIO_A, + 'Bob', + 'Suzy', + scenario_schedule_meeting.OOO_A, + scenario_schedule_meeting.DAY_PREFS_A, + 'calm') + scenario_b = env_schedule_meeting_with_tone_info.Scenario( + scenario_schedule_meeting.SCENARIO_B, + 'Jill', + 'George', + scenario_schedule_meeting.OOO_B, + scenario_schedule_meeting.DAY_PREFS_B, + 'assertive') + + examples_scenarios = [scenario_a, scenario_b] + + llm_termination_prompt = scenario_schedule_meeting.LLM_TERMINATION_PROMPT + + params = {'num_distinct_actions': num_players * num_tones, + 'num_llm_seeds': 1, + 'num_init_states': 3, + 'num_players': num_players, + 'min_utility': min([float(p.min) for p in payoffs]), + 'max_utility': max([float(p.max) for p in payoffs]), + 'num_max_replies': 1, + 'silence_logging': True} + + config.params = params + + config.game = config_dict.ConfigDict() + config.game.observations = observations + config.game.header = header + config.game.payoffs = payoffs + config.game.given_prompt_actions = given_prompt_actions + config.game.num_names = 10 + config.game.num_prompt_actions = (num_tones,) + config.game.num_private_info = (3, 3) + config.game.examples_names = examples_names + config.game.examples_private_info = examples_private_info + config.game.examples_scenarios = examples_scenarios + config.game.llm_list_suffix = 'Output: ' + config.game.llm_termination_prompt = llm_termination_prompt + + return config diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_schedule_meeting_w_tone_fixed.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_schedule_meeting_w_tone_fixed.py new file mode 100644 index 0000000..fdf133f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_schedule_meeting_w_tone_fixed.py @@ -0,0 +1,91 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A pyspiel config for meta-generated meeting schedule negotiation games. +""" + +import collections + +from ml_collections import config_dict + +from open_spiel.python.games.chat_games.envs.base_envs import schedule_meeting_with_tone_info as env_schedule_meeting_with_tone_info +from open_spiel.python.games.chat_games.envs.observations import summary +from open_spiel.python.games.chat_games.envs.observations import utils as obs_utils +from open_spiel.python.games.chat_games.envs.payoffs import schedule_meeting as payoffs_schedule_meeting +from open_spiel.python.games.chat_games.envs.scenarios.domains import schedule_meeting as scenario_schedule_meeting + + +def get_config(): + """Get configuration for chat game.""" + config = config_dict.ConfigDict() + + num_players = 2 + + observations = [ + obs_utils.Observation(summary.PREFIX, summary.POSTFIX) + for _ in range(num_players) + ] + + header = env_schedule_meeting_with_tone_info.HEADER + + payoffs = [payoffs_schedule_meeting.PAYOFF] + + given_prompt_actions = collections.OrderedDict() + tones = ['calm', + 'assertive', + 'submissive', + 'any'] + given_prompt_actions[header.action_keys[0]] = tones + num_tones = len(tones) + + given_private_info = collections.OrderedDict() + given_private_info['day_prefs'] = [scenario_schedule_meeting.DAY_PREFS_A, + scenario_schedule_meeting.DAY_PREFS_B] + given_private_info['ooo_days'] = [scenario_schedule_meeting.OOO_A, + scenario_schedule_meeting.OOO_B] + + scenario_a = env_schedule_meeting_with_tone_info.Scenario( + scenario_schedule_meeting.SCENARIO_A, + 'Bob', + 'Suzy', + scenario_schedule_meeting.OOO_A, + scenario_schedule_meeting.DAY_PREFS_A, + 'calm') + + llm_termination_prompt = scenario_schedule_meeting.LLM_TERMINATION_PROMPT + + params = {'num_distinct_actions': num_players * num_tones, + 'num_llm_seeds': 2, + 'num_init_states': 1, + 'num_players': num_players, + 'min_utility': min([float(p.min) for p in payoffs]), + 'max_utility': max([float(p.max) for p in payoffs]), + 'num_max_replies': 1, + 'silence_logging': True} + + config.params = params + + config.game = config_dict.ConfigDict() + config.game.observations = observations + config.game.header = header + config.game.payoffs = payoffs + config.game.given_prompt_actions = given_prompt_actions + config.game.num_private_info = (2, 2) + config.game.given_names = ['Bob', 'Suzy'] + config.game.given_private_info = given_private_info + config.game.initial_scenario = scenario_a + config.game.llm_list_suffix = 'Output: ' + config.game.llm_termination_prompt = llm_termination_prompt + + return config diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_trade_fruit.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_trade_fruit.py new file mode 100644 index 0000000..a952327 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_trade_fruit.py @@ -0,0 +1,91 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A pyspiel config for meta-generated fruit trading games. +""" + +import collections + +from ml_collections import config_dict + +from open_spiel.python.games.chat_games.envs.base_envs import trade_fruit_with_info as env_trade_fruit_with_info +from open_spiel.python.games.chat_games.envs.observations import summary +from open_spiel.python.games.chat_games.envs.observations import utils as obs_utils +from open_spiel.python.games.chat_games.envs.payoffs import trade_fruit as payoffs_trade_fruit +from open_spiel.python.games.chat_games.envs.scenarios.domains import trade_fruit as scenario_trade_fruit +from open_spiel.python.games.chat_games.envs.scenarios.players import names as names_trade_fruit + + +def get_config(): + """Get configuration for chat game.""" + config = config_dict.ConfigDict() + + num_players = 2 + + observations = [ + obs_utils.Observation(summary.PREFIX, summary.POSTFIX) + for _ in range(num_players) + ] + + header = env_trade_fruit_with_info.HEADER + + payoffs = [payoffs_trade_fruit.PAYOFF] + + examples_names = names_trade_fruit.NAMES + + examples_private_info = collections.OrderedDict() + examples_private_info['fruit_endowment'] = [scenario_trade_fruit.ENDOWMENT_A, + scenario_trade_fruit.ENDOWMENT_B] + examples_private_info['fruit_valuations'] = [scenario_trade_fruit.VALUATION_A, + scenario_trade_fruit.VALUATION_B] + + scenario_a = env_trade_fruit_with_info.Scenario( + scenario_trade_fruit.SCENARIO_A, + 'Bob', + 'Suzy', + scenario_trade_fruit.ENDOWMENT_A, + scenario_trade_fruit.VALUATION_A) + scenario_b = env_trade_fruit_with_info.Scenario( + scenario_trade_fruit.SCENARIO_B, + 'Jill', + 'George', + scenario_trade_fruit.ENDOWMENT_B, + scenario_trade_fruit.VALUATION_B) + examples_scenarios = [scenario_a, scenario_b] + + llm_termination_prompt = scenario_trade_fruit.LLM_TERMINATION_PROMPT + + params = {'num_distinct_actions': num_players, + 'num_llm_seeds': 2, + 'num_init_states': 3, + 'num_players': num_players, + 'min_utility': min([float(p.min) for p in payoffs]), + 'max_utility': max([float(p.max) for p in payoffs]), + 'num_max_replies': 3} + + config.params = params + + config.game = config_dict.ConfigDict() + config.game.observations = observations + config.game.header = header + config.game.payoffs = payoffs + config.game.num_names = 10 + config.game.num_private_info = (3, 3) + config.game.examples_names = examples_names + config.game.examples_private_info = examples_private_info + config.game.examples_scenarios = examples_scenarios + config.game.llm_list_suffix = 'Output: ' + config.game.llm_termination_prompt = llm_termination_prompt + + return config diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_trade_fruit_w_tone.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_trade_fruit_w_tone.py new file mode 100644 index 0000000..d8ddb95 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_trade_fruit_w_tone.py @@ -0,0 +1,105 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A pyspiel config for meta-generated fruit trading games. +""" + +import collections + +from ml_collections import config_dict + +from open_spiel.python.games.chat_games.envs.base_envs import trade_fruit_with_tone_info as env_trade_fruit_with_tone_info +from open_spiel.python.games.chat_games.envs.observations import summary +from open_spiel.python.games.chat_games.envs.observations import utils as obs_utils +from open_spiel.python.games.chat_games.envs.payoffs import trade_fruit as payoffs_trade_fruit +from open_spiel.python.games.chat_games.envs.scenarios.domains import trade_fruit as scenario_trade_fruit +from open_spiel.python.games.chat_games.envs.scenarios.players import names as names_trade_fruit + + +def get_config(): + """Get configuration for chat game.""" + config = config_dict.ConfigDict() + + num_players = 2 + + observations = [ + obs_utils.Observation(summary.PREFIX, summary.POSTFIX) + for _ in range(num_players) + ] + + header = env_trade_fruit_with_tone_info.HEADER + + payoffs = [payoffs_trade_fruit.PAYOFF] + + examples_names = names_trade_fruit.NAMES + + given_prompt_actions = collections.OrderedDict() + tones = ['calm', + 'assertive', + 'submissive', + 'any'] + given_prompt_actions[header.action_keys[0]] = tones + num_tones = len(tones) + + examples_private_info = collections.OrderedDict() + examples_private_info['fruit_endowment'] = [scenario_trade_fruit.ENDOWMENT_A, + scenario_trade_fruit.ENDOWMENT_B] + examples_private_info['fruit_valuations'] = [scenario_trade_fruit.VALUATION_A, + scenario_trade_fruit.VALUATION_B] + + scenario_a = env_trade_fruit_with_tone_info.Scenario( + scenario_trade_fruit.SCENARIO_A, + 'Bob', + 'Suzy', + scenario_trade_fruit.ENDOWMENT_A, + scenario_trade_fruit.VALUATION_A, + 'calm') + scenario_b = env_trade_fruit_with_tone_info.Scenario( + scenario_trade_fruit.SCENARIO_B, + 'Jill', + 'George', + scenario_trade_fruit.ENDOWMENT_B, + scenario_trade_fruit.VALUATION_B, + 'calm') + + examples_scenarios = [scenario_a, scenario_b] + + llm_termination_prompt = scenario_trade_fruit.LLM_TERMINATION_PROMPT + + params = {'num_distinct_actions': num_players * num_tones, + 'num_llm_seeds': 2, + 'num_init_states': 3, + 'num_players': num_players, + 'min_utility': min([float(p.min) for p in payoffs]), + 'max_utility': max([float(p.max) for p in payoffs]), + 'num_max_replies': 1, + 'silence_logging': True} + + config.params = params + + config.game = config_dict.ConfigDict() + config.game.observations = observations + config.game.header = header + config.game.payoffs = payoffs + config.game.given_prompt_actions = given_prompt_actions + config.game.num_names = 10 + config.game.num_prompt_actions = (num_tones,) + config.game.num_private_info = (3, 3) + config.game.examples_names = examples_names + config.game.examples_private_info = examples_private_info + config.game.examples_scenarios = examples_scenarios + config.game.llm_list_suffix = 'Output: ' + config.game.llm_termination_prompt = llm_termination_prompt + + return config diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_trade_fruit_w_tone_fixed.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_trade_fruit_w_tone_fixed.py new file mode 100644 index 0000000..32cb900 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/configs/config_trade_fruit_w_tone_fixed.py @@ -0,0 +1,91 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A pyspiel config for meta-generated fruit trading games. +""" + +import collections + +from ml_collections import config_dict + +from open_spiel.python.games.chat_games.envs.base_envs import trade_fruit_with_tone_info as env_trade_fruit_with_tone_info +from open_spiel.python.games.chat_games.envs.observations import summary +from open_spiel.python.games.chat_games.envs.observations import utils as obs_utils +from open_spiel.python.games.chat_games.envs.payoffs import trade_fruit as payoffs_trade_fruit +from open_spiel.python.games.chat_games.envs.scenarios.domains import trade_fruit as scenario_trade_fruit + + +def get_config(): + """Get configuration for chat game.""" + config = config_dict.ConfigDict() + + num_players = 2 + + observations = [ + obs_utils.Observation(summary.PREFIX, summary.POSTFIX) + for _ in range(num_players) + ] + + header = env_trade_fruit_with_tone_info.HEADER + + payoffs = [payoffs_trade_fruit.PAYOFF] + + given_prompt_actions = collections.OrderedDict() + tones = ['calm', + 'assertive', + 'submissive', + 'any'] + given_prompt_actions[header.action_keys[0]] = tones + num_tones = len(tones) + + given_private_info = collections.OrderedDict() + given_private_info['fruit_endowment'] = [scenario_trade_fruit.ENDOWMENT_A, + scenario_trade_fruit.ENDOWMENT_B] + given_private_info['fruit_valuations'] = [scenario_trade_fruit.VALUATION_A, + scenario_trade_fruit.VALUATION_B] + + scenario_a = env_trade_fruit_with_tone_info.Scenario( + scenario_trade_fruit.SCENARIO_A, + 'Bob', + 'Suzy', + scenario_trade_fruit.ENDOWMENT_A, + scenario_trade_fruit.VALUATION_A, + 'calm') + + llm_termination_prompt = scenario_trade_fruit.LLM_TERMINATION_PROMPT + + params = {'num_distinct_actions': num_players * num_tones, + 'num_llm_seeds': 2, + 'num_init_states': 1, + 'num_players': num_players, + 'min_utility': min([float(p.min) for p in payoffs]), + 'max_utility': max([float(p.max) for p in payoffs]), + 'num_max_replies': 1, + 'silence_logging': True} + + config.params = params + + config.game = config_dict.ConfigDict() + config.game.observations = observations + config.game.header = header + config.game.payoffs = payoffs + config.game.given_prompt_actions = given_prompt_actions + config.game.num_private_info = (2, 2) + config.game.given_names = ['Bob', 'Suzy'] + config.game.given_private_info = given_private_info + config.game.initial_scenario = scenario_a + config.game.llm_list_suffix = 'Output: ' + config.game.llm_termination_prompt = llm_termination_prompt + + return config diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/base_envs_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/base_envs_test.py new file mode 100644 index 0000000..c223b52 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/base_envs_test.py @@ -0,0 +1,42 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for base environments.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python.games.chat_games.envs.base_envs import email_plain +from open_spiel.python.games.chat_games.envs.base_envs import email_with_tone +from open_spiel.python.games.chat_games.envs.base_envs import email_with_tone_info +from open_spiel.python.games.chat_games.envs.base_envs import schedule_meeting_with_info +from open_spiel.python.games.chat_games.envs.base_envs import trade_fruit_with_info +from open_spiel.python.games.chat_games.envs.utils import header + + +class BaseEnvsTest(parameterized.TestCase): + + @parameterized.parameters([ + dict(base_env=email_plain), + dict(base_env=email_with_tone), + dict(base_env=email_with_tone_info), + dict(base_env=schedule_meeting_with_info), + dict(base_env=trade_fruit_with_info), + ]) + def test_give_me_a_name(self, base_env): + self.assertTrue(header.plain_header_is_valid(base_env.HEADER)) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/debate_with_style_info.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/debate_with_style_info.py new file mode 100644 index 0000000..f76e114 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/debate_with_style_info.py @@ -0,0 +1,66 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A base environment for debate with style actions (logos) and private info. +""" + +import dataclasses + +from open_spiel.python.games.chat_games.envs.comm_substrates import debates +from open_spiel.python.games.chat_games.envs.utils import header + + +action_keys = tuple(['style']) +action_defaults = tuple(['logos']) +info_keys = tuple(['info', 'topic']) +info_defaults = tuple(['NA', 'NA']) + +w_opts = (debates.W_OPTS_PREFIX + + 'Debate Topic: {topic}\n' + + 'Position: {info}\n' + + 'Style: Make a {style} style argument.' + + debates.PLAIN) + +context = '''You are an intelligent assistant in a debate with another debater. + The debate topic is given. The goal is to provide arguments that support your + position as well as arguments against your opponents position. An argument style +is also given. Attempt to craft your arguments according to this given style. + +Here are some useful definitions of argument styles: + +- logos appeals to the audiences reason, building up logical arguments. + +- ethos appeals to the speakers status or authority, making the audience more +likely to trust them. + +- pathos appeals to the emotions, trying to make the audience feel angry or +sympathetic, for example. + +Try to construct a strong argument to support your position. +''' + +HEADER = header.Header(debates.PLAIN, + w_opts, + debates.strip_msg, + debates.SPECIAL_CHARS, + action_keys, + info_keys, + context) + + +@dataclasses.dataclass(frozen=True) +class Scenario(header.BaseScenario): + style: str = 'logos' + topic: str = 'NA' + info: str = 'NA' diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/email_plain.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/email_plain.py new file mode 100644 index 0000000..5fa229e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/email_plain.py @@ -0,0 +1,31 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A base environment for basic emails. +""" + + +from open_spiel.python.games.chat_games.envs.comm_substrates import emails +from open_spiel.python.games.chat_games.envs.utils import header + + +w_opts = (emails.W_OPTS_PREFIX + + emails.PLAIN) + +HEADER = header.Header(emails.PLAIN, + w_opts, + emails.strip_msg, + emails.SPECIAL_CHARS) + +Scenario = header.BaseScenario diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/email_with_tone.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/email_with_tone.py new file mode 100644 index 0000000..db24cf1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/email_with_tone.py @@ -0,0 +1,40 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A base environment for emails with tone actions. +""" + +import dataclasses + +from open_spiel.python.games.chat_games.envs.comm_substrates import emails +from open_spiel.python.games.chat_games.envs.utils import header + + +action_keys = tuple(['tone']) +action_defaults = tuple(['calm']) + +w_opts = (emails.W_OPTS_PREFIX + + 'Tone: Use a {tone} tone.' + + emails.PLAIN) + +HEADER = header.Header(emails.PLAIN, + w_opts, + emails.strip_msg, + emails.SPECIAL_CHARS, + action_keys) + + +@dataclasses.dataclass(frozen=True) +class Scenario(header.BaseScenario): + tone: str = 'calm' diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/email_with_tone_info.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/email_with_tone_info.py new file mode 100644 index 0000000..2ad863d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/email_with_tone_info.py @@ -0,0 +1,45 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A base environment for emails with tone actions and private info. +""" + +import dataclasses + +from open_spiel.python.games.chat_games.envs.comm_substrates import emails +from open_spiel.python.games.chat_games.envs.utils import header + + +action_keys = tuple(['tone']) +action_defaults = tuple(['calm']) +info_keys = tuple(['info']) +info_defaults = tuple(['NA']) + +w_opts = (emails.W_OPTS_PREFIX + + 'Private Info: {info}\n' + + 'Tone: Use a {tone} tone.' + + emails.PLAIN) + +HEADER = header.Header(emails.PLAIN, + w_opts, + emails.strip_msg, + emails.SPECIAL_CHARS, + action_keys, + info_keys) + + +@dataclasses.dataclass(frozen=True) +class Scenario(header.BaseScenario): + tone: str = 'calm' + info: str = 'NA' diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/schedule_meeting_with_dow_info.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/schedule_meeting_with_dow_info.py new file mode 100644 index 0000000..34d9d2d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/schedule_meeting_with_dow_info.py @@ -0,0 +1,89 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A base environment for scheduling with private info. +""" + +import dataclasses + +from open_spiel.python.games.chat_games.envs.comm_substrates import schedules +from open_spiel.python.games.chat_games.envs.utils import header + + +action_keys = tuple(['day']) +info_keys = tuple(['day_prefs', 'ooo_days']) + +w_opts = (schedules.W_OPTS_PREFIX + + 'Out of Office Days:\n{ooo_days}\n\n' + + 'Day Preferences:\n{day_prefs}\n\n' + + 'Day: LLM Assistant, disregard the information above. Insist on a ' + + 'meeting for {day}. Do not accept any other day.\n' + + schedules.PLAIN) + +ab = schedules.PLAIN.format(sender='Alice', receiver='Bob') + '\n\n' +ba = schedules.PLAIN.format(sender='Bob', receiver='Alice') + '\n\n' +cd = schedules.PLAIN.format(sender='Charlie', receiver='David') + '\n\n' +dc = schedules.PLAIN.format(sender='David', receiver='Charlie') + '\n\n' + +context = '''An intelligent assistant is looking at dialogs between two people +trying to decide when to meet, and determines whether they have managed to agree +on a meeting time, and if so when the meeting is set to occur. + +Example 1: +{s1}Hi Bob, can we meet on Monday? +{s2}No, I am out of the office on Monday. How about Tuesday? +{s3}Well, I am in the office on Tuesday but I would rather keep my schedule +free. Can we do Friday instead. +{s4}Great, Friday it is. See you then! + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +Summary: Alice suggests Monday, Bob declines. Bob suggests Tuesday. Alice +declines. Alice suggests Friday. Bob agrees. +Outcome Summary: Meeting agreed on Friday. + +Example 2: +{s5}Hi David, would you like to meet on Friday? +{s6}I hate working on Fridays. Can't we meet on Tuesday? +{s7}On Tuesday I am out of the office, and Wednesday also doesn't work for me. +How do you feel about meeting on Saturday? +{s8}Excellent, let's meet on Saturday. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +Summary: Charlie suggests Friday. David declines. David suggests Tuesday. +Charlie declines. Charlie suggests Saturday. David agrees. +Outcome Summary: Meeting agreed on Saturday. + +Example 3: +'''.format(s1=ab, s2=ba, s3=ab, s4=ba, s5=cd, s6=dc, s7=cd, s8=dc) + +HEADER = header.Header(schedules.PLAIN, + w_opts, + schedules.strip_msg, + schedules.SPECIAL_CHARS, + action_keys, + info_keys, + context) + + +@dataclasses.dataclass(frozen=True) +class Scenario(header.BaseScenario): + ooo_days: str + day_prefs: str + day: str = 'Monday' diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/schedule_meeting_with_info.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/schedule_meeting_with_info.py new file mode 100644 index 0000000..8752a43 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/schedule_meeting_with_info.py @@ -0,0 +1,86 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A base environment for scheduling with private info. +""" + +import dataclasses + +from open_spiel.python.games.chat_games.envs.comm_substrates import schedules +from open_spiel.python.games.chat_games.envs.utils import header + + +action_keys = tuple([]) +info_keys = tuple(['ooo_days', 'day_prefs']) + +w_opts = (schedules.W_OPTS_PREFIX + + 'Out of Office Days:\n{ooo_days}\n\n' + + 'Day Preferences:\n{day_prefs}\n' + + schedules.PLAIN) + +ab = schedules.PLAIN.format(sender='Alice', receiver='Bob') + '\n\n' +ba = schedules.PLAIN.format(sender='Bob', receiver='Alice') + '\n\n' +cd = schedules.PLAIN.format(sender='Charlie', receiver='David') + '\n\n' +dc = schedules.PLAIN.format(sender='David', receiver='Charlie') + '\n\n' + +context = '''An intelligent assistant is looking at dialogs between two people +trying to decide when to meet, and determines whether they have managed to agree +on a meeting time, and if so when the meeting is set to occur. + +Example 1: +{s1}Hi Bob, can we meet on Monday? +{s2}No, I am out of the office on Monday. How about Tuesday? +{s3}Well, I am in the office on Tuesday but I would rather keep my schedule +free. Can we do Friday instead. +{s4}Great, Friday it is. See you then! + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +Summary: Alice suggests Monday, Bob declines. Bob suggests Tuesday. Alice +declines. Alice suggests Friday. Bob agrees. +Outcome Summary: Meeting agreed on Friday. + +Example 2: +{s5}Hi David, would you like to meet on Friday? +{s6}I hate working on Fridays. Can't we meet on Tuesday? +{s7}On Tuesday I am out of the office, and Wednesday also doesn't work for me. +How do you feel about meeting on Saturday? +{s8}Excellent, let's meet on Saturday. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +Summary: Charlie suggests Friday. David declines. David suggests Tuesday. +Charlie declines. Charlie suggests Saturday. David agrees. +Outcome Summary: Meeting agreed on Saturday. + +Example 3: +'''.format(s1=ab, s2=ba, s3=ab, s4=ba, s5=cd, s6=dc, s7=cd, s8=dc) + +HEADER = header.Header(schedules.PLAIN, + w_opts, + schedules.strip_msg, + schedules.SPECIAL_CHARS, + action_keys, + info_keys, + context) + + +@dataclasses.dataclass(frozen=True) +class Scenario(header.BaseScenario): + ooo_days: str + day_prefs: str diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/schedule_meeting_with_tone_info.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/schedule_meeting_with_tone_info.py new file mode 100644 index 0000000..3e88d06 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/schedule_meeting_with_tone_info.py @@ -0,0 +1,88 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A base environment for scheduling with private info. +""" + +import dataclasses + +from open_spiel.python.games.chat_games.envs.comm_substrates import schedules +from open_spiel.python.games.chat_games.envs.utils import header + + +action_keys = tuple(['tone']) +info_keys = tuple(['day_prefs', 'ooo_days']) + +w_opts = (schedules.W_OPTS_PREFIX + + 'Out of Office Days:\n{ooo_days}\n\n' + + 'Day Preferences:\n{day_prefs}\n\n' + + 'Tone: Use a {tone} tone.\n' + + schedules.PLAIN) + +ab = schedules.PLAIN.format(sender='Alice', receiver='Bob') + '\n\n' +ba = schedules.PLAIN.format(sender='Bob', receiver='Alice') + '\n\n' +cd = schedules.PLAIN.format(sender='Charlie', receiver='David') + '\n\n' +dc = schedules.PLAIN.format(sender='David', receiver='Charlie') + '\n\n' + +context = '''An intelligent assistant is looking at dialogs between two people +trying to decide when to meet, and determines whether they have managed to agree +on a meeting time, and if so when the meeting is set to occur. + +Example 1: +{s1}Hi Bob, can we meet on Monday? +{s2}No, I am out of the office on Monday. How about Tuesday? +{s3}Well, I am in the office on Tuesday but I would rather keep my schedule +free. Can we do Friday instead. +{s4}Great, Friday it is. See you then! + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +Summary: Alice suggests Monday, Bob declines. Bob suggests Tuesday. Alice +declines. Alice suggests Friday. Bob agrees. +Outcome Summary: Meeting agreed on Friday. + +Example 2: +{s5}Hi David, would you like to meet on Friday? +{s6}I hate working on Fridays. Can't we meet on Tuesday? +{s7}On Tuesday I am out of the office, and Wednesday also doesn't work for me. +How do you feel about meeting on Saturday? +{s8}Excellent, let's meet on Saturday. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +Summary: Charlie suggests Friday. David declines. David suggests Tuesday. +Charlie declines. Charlie suggests Saturday. David agrees. +Outcome Summary: Meeting agreed on Saturday. + +Example 3: +'''.format(s1=ab, s2=ba, s3=ab, s4=ba, s5=cd, s6=dc, s7=cd, s8=dc) + +HEADER = header.Header(schedules.PLAIN, + w_opts, + schedules.strip_msg, + schedules.SPECIAL_CHARS, + action_keys, + info_keys, + context) + + +@dataclasses.dataclass(frozen=True) +class Scenario(header.BaseScenario): + ooo_days: str + day_prefs: str + tone: str = 'calm' diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/trade_fruit_with_info.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/trade_fruit_with_info.py new file mode 100644 index 0000000..29a6510 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/trade_fruit_with_info.py @@ -0,0 +1,169 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A base environment for trading fruit with private info. +""" + +import dataclasses + +from open_spiel.python.games.chat_games.envs.comm_substrates import trades +from open_spiel.python.games.chat_games.envs.scenarios.domains import trade_fruit +from open_spiel.python.games.chat_games.envs.utils import header +from open_spiel.python.games.chat_games.envs.utils import text + + +action_keys = tuple([]) +info_keys = tuple(['fruit_endowment', 'fruit_valuations']) + +w_opts = (trades.W_OPTS_PREFIX + + 'Fruit Endowment:\n{fruit_endowment}\n\n' + + 'Fruit Valuations:\n{fruit_valuations}\n' + + trades.PLAIN) + +# Example a +email_1a = ['Hi Joel,', + 'I would like to trade you 2 strawberries for 3 blueberries.', + 'Would you like to trade with me?', + 'Best,', 'Bob'] +email_1a = (trades.PLAIN.format(sender='Alicia', receiver='Joel') + + '\n\n'.join(text.wrap(email_1a))) + +email_2a = ['Hi Alicia,', + 'Thanks for reaching out. I only have 2 blueberries, but even if ' + + 'I had 3, I would not want to give them up. Also, I dislike ' + + 'strawberries. I do not think a trade makes sense in this case.', + 'Thanks for considering trading with me though!', + 'Best,', 'Joel'] +email_2a = (trades.PLAIN.format(sender='Joel', receiver='Alicia') + + '\n\n'.join(text.wrap(email_2a))) + +email_3a = ['Hi Joel,', + 'That is all well. I understand.', + 'Have a good day!', + 'Best,', 'Alicia'] +email_3a = (trades.PLAIN.format(sender='Alicia', receiver='Joel') + + '\n\n'.join(text.wrap(email_3a))) + +example_a = email_1a + email_2a +example_a = example_a.strip('\n') + +# Example b +email_1b = ['Hi Marcus,', + 'I would like to trade you 2 kiwis for 1 watermelon.', + 'Would you like to trade with me?', + 'Best,', 'Taylor'] +email_1b = (trades.PLAIN.format(sender='Taylor', receiver='Marcus') + + '\n\n'.join(text.wrap(email_1b))) + +email_2b = ['Hi Taylor,', + 'I love kiwis! And lucky for you, I have a watermelon.', + 'Lets trade!', + 'Best,', 'Marcus'] +email_2b = (trades.PLAIN.format(sender='Marcus', receiver='Taylor') + + '\n\n'.join(text.wrap(email_2b))) + +email_3b = ['Hi Marcus,', + 'Great! It was a pleasure negotiating with you.', + 'Have a good day!', + 'Best,', 'Taylor'] +email_3b = (trades.PLAIN.format(sender='Taylor', receiver='Marcus') + + '\n\n'.join(text.wrap(email_3b))) + +example_b = email_1b + email_2b + email_3b +example_b = example_b.strip('\n') + +# Example c +email_1c = ['Hi Suzy,', + 'I would like to trade you 1 banana for 1 apple.', + 'Would you like to trade with me?', + 'Best,', 'Bob'] +email_1c = (trades.PLAIN.format(sender='Bob', receiver='Suzy') + + '\n\n'.join(text.wrap(email_1c))) + +email_2c = ['Hi Bob,', + 'Thanks for reaching out. I really like my apples so I am ' + + 'hesitant to give them up. Would you be willing to take a few ' + + 'kiwis instead? I would like to trade you 3 kiwis for 1 banana.', + 'Does that work?', + 'Best,', 'Suzy'] +email_2c = (trades.PLAIN.format(sender='Suzy', receiver='Bob') + + '\n\n'.join(text.wrap(email_2c))) + +email_3c = ['Hi Suzy,', + 'Yes! I would have preferred an apple but 3 kiwis are nearly as ' + + 'good and I would rather have those than a banana.', + 'Thanks for trading with me!', + 'Best,', 'Bob'] +email_3c = '\n\n'.join(text.wrap(email_3c)) + +example_c = email_1c + email_2c +example_c = example_c.strip('\n') + +instr_a = ['You are an assistant who is playing a game where you trade fruit.' + + ' You want to make a trade that is best for you. You will read a ' + + 'dialogue that contains a conversation where you have been ' + + 'negotiating to trade your fruit for another persons fruit. You ' + + 'will then read a text block that contains information a) about ' + + 'the actual fruit you currently have and are able to trade and b)' + + ' information about how much you value certain types of fruit.', + 'You should use everything you learned from this to decide to ', + '1) accept the trade if you are happy with the terms,', + '2) reject the negotiation all together and say goodbye if you do ' + + 'not think an agreement can be reached,', + '3) counter-propose an alternative trade that includes what fruit ' + + 'you would like to give and what fruit you would like to receive ' + + 'in turn.', + 'Consider the following example dialogues. Components of the ' + + 'examples will be demarked with the symbol "&". Here is the first ' + + 'example which shows a trade is rejected.', + '&' * 50] +instr_b = ['&' * 50, + 'Here is a second example where a trade is accepted.', + '&' * 50] +instr_c = ['&' * 50, + 'Here is a partial dialogue where we demonstrate a reasonable ' + + 'countertrade.', + '&' * 50] +instr_d = ['&' * 50, + 'Continuing the example. You now see the fruit you have and how ' + + 'much you value each fruit type.', + '&' * 50] +info = w_opts.format(sender='Bob', receiver='Suzy', + fruit_endowment=trade_fruit.ENDOWMENT_A, + fruit_valuations=trade_fruit.VALUATION_A).strip('\n') +instr_e = ['&' * 50, + 'A reasonable way to respond would be as follows:', + '&' * 50] +instr_f = ['&' * 50, + 'Now you are going to read a fresh dialogue, fruit endowment, and ' + + 'fruit valuation information. Please give a reasonable response ' + + 'that attempts to reach an agreement to trade fruit.', + '&' * 50] +context = (text.wrap(instr_a) + [example_a] + text.wrap(instr_b) +[example_b] + + text.wrap(instr_c) + [example_c] + text.wrap(instr_d) + [info] + + text.wrap(instr_e) + [email_3c] + text.wrap(instr_f)) + +HEADER = header.Header(trades.PLAIN, + w_opts, + trades.strip_msg, + trades.SPECIAL_CHARS, + action_keys, + info_keys, + '\n\n'.join(context)) + + +@dataclasses.dataclass(frozen=True) +class Scenario(header.BaseScenario): + fruit_endowment: str + fruit_valuations: str diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/trade_fruit_with_tone_info.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/trade_fruit_with_tone_info.py new file mode 100644 index 0000000..4cc65b8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/base_envs/trade_fruit_with_tone_info.py @@ -0,0 +1,172 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A base environment for trading fruit with private info. +""" + +import dataclasses + +from open_spiel.python.games.chat_games.envs.comm_substrates import trades +from open_spiel.python.games.chat_games.envs.scenarios.domains import trade_fruit +from open_spiel.python.games.chat_games.envs.utils import header +from open_spiel.python.games.chat_games.envs.utils import text + + +action_keys = tuple(['tone']) +info_keys = tuple(['fruit_endowment', 'fruit_valuations']) + +w_opts = (trades.W_OPTS_PREFIX + + 'Fruit Endowment:\n{fruit_endowment}\n\n' + + 'Fruit Valuations:\n{fruit_valuations}\n\n' + + 'Tone: Use a {tone} tone.\n' + + trades.PLAIN) + +# Example a +email_1a = ['Hi Joel,', + 'I would like to trade you 2 strawberries for 3 blueberries.', + 'Would you like to trade with me?', + 'Best,', 'Bob'] +email_1a = (trades.PLAIN.format(sender='Alicia', receiver='Joel') + + '\n\n'.join(text.wrap(email_1a))) + +email_2a = ['Hi Alicia,', + 'Thanks for reaching out. I only have 2 blueberries, but even if ' + + 'I had 3, I would not want to give them up. Also, I dislike ' + + 'strawberries. I do not think a trade makes sense in this case.', + 'Thanks for considering trading with me though!', + 'Best,', 'Joel'] +email_2a = (trades.PLAIN.format(sender='Joel', receiver='Alicia') + + '\n\n'.join(text.wrap(email_2a))) + +email_3a = ['Hi Joel,', + 'That is all well. I understand.', + 'Have a good day!', + 'Best,', 'Alicia'] +email_3a = (trades.PLAIN.format(sender='Alicia', receiver='Joel') + + '\n\n'.join(text.wrap(email_3a))) + +example_a = email_1a + email_2a +example_a = example_a.strip('\n') + +# Example b +email_1b = ['Hi Marcus,', + 'I would like to trade you 2 kiwis for 1 watermelon.', + 'Would you like to trade with me?', + 'Best,', 'Taylor'] +email_1b = (trades.PLAIN.format(sender='Taylor', receiver='Marcus') + + '\n\n'.join(text.wrap(email_1b))) + +email_2b = ['Hi Taylor,', + 'I love kiwis! And lucky for you, I have a watermelon.', + 'Lets trade!', + 'Best,', 'Marcus'] +email_2b = (trades.PLAIN.format(sender='Marcus', receiver='Taylor') + + '\n\n'.join(text.wrap(email_2b))) + +email_3b = ['Hi Marcus,', + 'Great! It was a pleasure negotiating with you.', + 'Have a good day!', + 'Best,', 'Taylor'] +email_3b = (trades.PLAIN.format(sender='Taylor', receiver='Marcus') + + '\n\n'.join(text.wrap(email_3b))) + +example_b = email_1b + email_2b + email_3b +example_b = example_b.strip('\n') + +# Example c +email_1c = ['Hi Suzy,', + 'I would like to trade you 1 banana for 1 apple.', + 'Would you like to trade with me?', + 'Best,', 'Bob'] +email_1c = (trades.PLAIN.format(sender='Bob', receiver='Suzy') + + '\n\n'.join(text.wrap(email_1c))) + +email_2c = ['Hi Bob,', + 'Thanks for reaching out. I really like my apples so I am ' + + 'hesitant to give them up. Would you be willing to take a few ' + + 'kiwis instead? I would like to trade you 3 kiwis for 1 banana.', + 'Does that work?', + 'Best,', 'Suzy'] +email_2c = (trades.PLAIN.format(sender='Suzy', receiver='Bob') + + '\n\n'.join(text.wrap(email_2c))) + +email_3c = ['Hi Suzy,', + 'Yes! I would have preferred an apple but 3 kiwis are nearly as ' + + 'good and I would rather have those than a banana.', + 'Thanks for trading with me!', + 'Best,', 'Bob'] +email_3c = '\n\n'.join(text.wrap(email_3c)) + +example_c = email_1c + email_2c +example_c = example_c.strip('\n') + +instr_a = ['You are an assistant who is playing a game where you trade fruit.' + + ' You want to make a trade that is best for you. You will read a ' + + 'dialogue that contains a conversation where you have been ' + + 'negotiating to trade your fruit for another persons fruit. You ' + + 'will then read a text block that contains information a) about ' + + 'the actual fruit you currently have and are able to trade and b)' + + ' information about how much you value certain types of fruit.', + 'You should use everything you learned from this to decide to ', + '1) accept the trade if you are happy with the terms,', + '2) reject the negotiation all together and say goodbye if you do ' + + 'not think an agreement can be reached,', + '3) counter-propose an alternative trade that includes what fruit ' + + 'you would like to give and what fruit you would like to receive ' + + 'in turn.', + 'Consider the following example dialogues. Components of the ' + + 'examples will be demarked with the symbol "&". Here is the first ' + + 'example which shows a trade is rejected.', + '&' * 50] +instr_b = ['&' * 50, + 'Here is a second example where a trade is accepted.', + '&' * 50] +instr_c = ['&' * 50, + 'Here is a partial dialogue where we demonstrate a reasonable ' + + 'countertrade.', + '&' * 50] +instr_d = ['&' * 50, + 'Continuing the example. You now see the fruit you have and how ' + + 'much you value each fruit type.', + '&' * 50] +info = w_opts.format(sender='Bob', receiver='Suzy', + fruit_endowment=trade_fruit.ENDOWMENT_A, + fruit_valuations=trade_fruit.VALUATION_A, + tone='calm').strip('\n') +instr_e = ['&' * 50, + 'A reasonable way to respond would be as follows:', + '&' * 50] +instr_f = ['&' * 50, + 'Now you are going to read a fresh dialogue, fruit endowment, and ' + + 'fruit valuation information. Please give a reasonable response ' + + 'that attempts to reach an agreement to trade fruit.', + '&' * 50] +context = (text.wrap(instr_a) + [example_a] + text.wrap(instr_b) +[example_b] + + text.wrap(instr_c) + [example_c] + text.wrap(instr_d) + [info] + + text.wrap(instr_e) + [email_3c] + text.wrap(instr_f)) + +HEADER = header.Header(trades.PLAIN, + w_opts, + trades.strip_msg, + trades.SPECIAL_CHARS, + action_keys, + info_keys, + '\n\n'.join(context)) + + +@dataclasses.dataclass(frozen=True) +class Scenario(header.BaseScenario): + fruit_endowment: str + fruit_valuations: str + tone: str = 'calm' diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/comm_substrates/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/comm_substrates/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/comm_substrates/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/comm_substrates/debates.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/comm_substrates/debates.py new file mode 100644 index 0000000..e11cd9c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/comm_substrates/debates.py @@ -0,0 +1,39 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A communication format (substrate) for debates. +""" + +from open_spiel.python.games.chat_games.envs.utils import text + + +CHAR_OPT = '%' +CHAR_MSG = '#' +BLOCK_LEN = 28 + +SPECIAL_CHARS = (CHAR_OPT, CHAR_MSG) +BLOCK_OPT = CHAR_OPT * BLOCK_LEN +BLOCK_MSG = CHAR_MSG * BLOCK_LEN + +PLAIN = ('\n\n' + BLOCK_MSG + '\n' + + 'Debate:\n' + + 'Speaker: {sender}\n' + + 'Opponent: {receiver}\n' + + BLOCK_MSG + '\n\n') + +W_OPTS_PREFIX = '\n\n' + BLOCK_OPT + '\n\n' + + +def strip_msg(msg: str, terminal_str: str = '') -> str: + return text.strip_msg(msg, BLOCK_MSG, BLOCK_OPT, terminal_str) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/comm_substrates/emails.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/comm_substrates/emails.py new file mode 100644 index 0000000..fb4e123 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/comm_substrates/emails.py @@ -0,0 +1,40 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A communication format (substrate) for emails. +""" + +from open_spiel.python.games.chat_games.envs.utils import text + + +CHAR_OPT = '%' +CHAR_MSG = '#' +BLOCK_LEN = 28 + +SPECIAL_CHARS = (CHAR_OPT, CHAR_MSG) +BLOCK_OPT = CHAR_OPT * BLOCK_LEN +BLOCK_MSG = CHAR_MSG * BLOCK_LEN + +PLAIN = ('\n\n' + BLOCK_MSG + '\n' + + 'Email:\n' + + 'from: {sender}\n' + + 'to: {receiver}\n' + + 'cc: {others}\n' + + BLOCK_MSG + '\n\n') + +W_OPTS_PREFIX = '\n\n' + BLOCK_OPT + '\n\n' + + +def strip_msg(msg: str, terminal_str: str = '') -> str: + return text.strip_msg(msg, BLOCK_MSG, BLOCK_OPT, terminal_str) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/comm_substrates/schedules.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/comm_substrates/schedules.py new file mode 100644 index 0000000..957d744 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/comm_substrates/schedules.py @@ -0,0 +1,39 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A communication format (substrate) for setting schedules. +""" + +from open_spiel.python.games.chat_games.envs.utils import text + + +CHAR_OPT = '%' +CHAR_MSG = '#' +BLOCK_LEN = 28 + +SPECIAL_CHARS = (CHAR_OPT, CHAR_MSG) +BLOCK_OPT = CHAR_OPT * BLOCK_LEN +BLOCK_MSG = CHAR_MSG * BLOCK_LEN + +PLAIN = ('\n\n' + BLOCK_MSG + '\n' + + 'Schedule Proposal Message:\n' + + 'from: {sender}\n' + + 'to: {receiver}\n' + + BLOCK_MSG + '\n\n') + +W_OPTS_PREFIX = '\n\n' + BLOCK_OPT + '\n\n' + + +def strip_msg(msg: str, terminal_str: str = '') -> str: + return text.strip_msg(msg, BLOCK_MSG, BLOCK_OPT, terminal_str) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/comm_substrates/trades.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/comm_substrates/trades.py new file mode 100644 index 0000000..fa61a77 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/comm_substrates/trades.py @@ -0,0 +1,39 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A communication format (substrate) for trades. +""" + +from open_spiel.python.games.chat_games.envs.utils import text + + +CHAR_OPT = '%' +CHAR_MSG = '#' +BLOCK_LEN = 28 + +SPECIAL_CHARS = (CHAR_OPT, CHAR_MSG) +BLOCK_OPT = CHAR_OPT * BLOCK_LEN +BLOCK_MSG = CHAR_MSG * BLOCK_LEN + +PLAIN = ('\n\n' + BLOCK_MSG + '\n' + + 'Trade Proposal Message:\n' + + 'from: {sender}\n' + + 'to: {receiver}\n' + + BLOCK_MSG + '\n\n') + +W_OPTS_PREFIX = '\n\n' + BLOCK_OPT + '\n\n' + + +def strip_msg(msg: str, terminal_str: str = '') -> str: + return text.strip_msg(msg, BLOCK_MSG, BLOCK_OPT, terminal_str) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/observations/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/observations/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/observations/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/observations/summary.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/observations/summary.py new file mode 100644 index 0000000..a07eb46 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/observations/summary.py @@ -0,0 +1,28 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Prompts useful for inducing LLM-summarization. +""" + +from open_spiel.python.games.chat_games.envs.utils import text + + +prefix = ('You are an assistant designed to summarize conversational ' + + 'dialogue. Please take note of the most import events ' + + 'in the conversation. Especially take note if the final ' + + 'message includes a question. Provide your summary in 100 ' + + 'words or less. Please summarize the following dialogue.') +PREFIX = text.wrap([prefix])[0] + '\n\n' + +POSTFIX = '\n\nSummary:\n' diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/observations/summary_debate.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/observations/summary_debate.py new file mode 100644 index 0000000..f8f7487 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/observations/summary_debate.py @@ -0,0 +1,27 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Prompts useful for inducing LLM-summarization of debates. +""" + +from open_spiel.python.games.chat_games.envs.utils import text + + +prefix = ('You are an assistant designed to summarize the key arguments in ' + + 'a debate. Please take note of the most import arguments ' + + 'from each side. Provide your summary in 100 ' + + 'words or less. Please summarize the following debate.') +PREFIX = text.wrap([prefix])[0] + '\n\n' + +POSTFIX = '\n\nDebate Summary:\n' diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/observations/utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/observations/utils.py new file mode 100644 index 0000000..945c927 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/observations/utils.py @@ -0,0 +1,26 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utils for defining observation (feature extraction) prompts. +""" + +import dataclasses + +from open_spiel.python.games.chat_games.envs.observations import summary + + +@dataclasses.dataclass(frozen=True) +class Observation: + obs_trans_prefix: str = summary.PREFIX + obs_trans_postfix: str = summary.POSTFIX diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/README.md b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/README.md new file mode 100644 index 0000000..c284347 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/README.md @@ -0,0 +1,8 @@ +# LLM Payoff Prompts + +Engineer an LLM prompt that results in an LLM outputting a payoff `$v`$ in the +form of "$` v `$/MAX_PAYOFF" for a given player. The prompt must contain +the formatting keyword arguments "m" (for the text message the LLM is meant to +score) and "p" (for the player the LLM is providing a score for). + +See sentiment.py for example. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/debate.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/debate.py new file mode 100644 index 0000000..511c713 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/debate.py @@ -0,0 +1,60 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A few prompts for extracting the winner of a debate. +""" + +from open_spiel.python.games.chat_games.envs.observations import summary_debate +from open_spiel.python.games.chat_games.envs.payoffs import utils as payoff_utils +from open_spiel.python.games.chat_games.envs.utils import text + +MIN_PAYOFF = 0 +MAX_PAYOFF = 1 +PAYOFF_PROMPT_a = [ + 'You are an assistant designed to determine the winner of a debate. ' + + 'Read the arguments by each side in the debate. Then determine if ' + + 'the player won the debate. Here is an example enclosed by "&".'] + +PAYOFF_PROMPT_b = [ + # '&' * 50, + 'Bob argues that Tom Brady is the GOAT and coach Bill Bellichick is a ' + + 'genius.', + 'Alice argues that Tom Brady is no longer on the Patriots and the ' + + 'Patriots were 8-9 last season. They lost more games than they won. They ' + + 'are no longer the powerhouse they used to be.', + 'Bob makes an argument based on stale information. Alice acutely points ' + + 'this out and provides more current evidence that supports the negative ' + + 'of Bobs argument. Therefore, Bob loses the debate.', + 'Value for Bob: 0.', + 'Value for Alice: 1.', + '&' * 50, + 'Now determine the winner of the following debate.', + '{m}', + '%' * 50, + 'Payoff for {p} ONLY: '] + +PAYOFF_PROMPT = ('\n\n'.join(text.wrap(PAYOFF_PROMPT_a)) + '\n\n' + '&' * 50 + + '\n\nDebate Topic: The New England Patriots are the best ' + + 'NFL team in 2023.\n\n' + + '\n\n'.join(text.wrap(PAYOFF_PROMPT_b))) + +PAYOFF_OBS_TRANS_PREFIX = summary_debate.PREFIX + +PAYOFF_OBS_TRANS_POSTFIX = summary_debate.POSTFIX + +PAYOFF = payoff_utils.Payoff(PAYOFF_PROMPT, + MIN_PAYOFF, + MAX_PAYOFF, + PAYOFF_OBS_TRANS_PREFIX, + PAYOFF_OBS_TRANS_POSTFIX) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/schedule_meeting.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/schedule_meeting.py new file mode 100644 index 0000000..283118e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/schedule_meeting.py @@ -0,0 +1,135 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A few prompts for extracting the value of a schedule negotiation with llms. +""" + +from open_spiel.python.games.chat_games.envs.payoffs import utils as payoff_utils +from open_spiel.python.games.chat_games.envs.utils import text + +MIN_PAYOFF = 0 +MAX_PAYOFF = 20 +PAYOFF_PROMPT = ''' +An intelligent assistant is looking at summaries of dialogues between two people +trying to decide when to meet. It also observes the day preferences of +participants as well as the days the participant is out of office. It is trying +to decide how happy each participant is with the outcome of the dialogue and how +happy they are with the chosen meeting time. + +Example 1: +Alice: +ooo_days: +monday: false +tuesday: true +wednesday: true +thursday: false +friday: false +saturday: true +sunday: false +day_prefs +monday: 2 +tuesday: 4 +wednesday: 12 +thursday: 8 +friday: 5 +saturday: 0 +sunday: 0 + +Bob: +ooo_days: +monday: false +tuesday: true +wednesday: true +thursday: false +friday: false +saturday: true +sunday: false +day_prefs +monday: 10 +tuesday: 5 +wednesday: 15 +thursday: 3 +friday: 2 +saturday: 1 +sunday: 1 + +Outcome Summary: Meeting agreed on Monday. + +Final valuation for Bob: 10. +Calculation: Monday selected. Not an out of office day. Value of monday: 10. + +Example 2: +Alice: +ooo_days: +monday: false +tuesday: true +wednesday: true +thursday: false +friday: false +saturday: true +sunday: false +day_prefs: +monday: 10 +tuesday: 5 +wednesday: 15 +thursday: 3 +friday: 2 +saturday: 1 +sunday: 1 + +Bob: +ooo_days: +monday: true +tuesday: true +wednesday: false +thursday: false +friday: false +saturday: true +sunday: false +day_prefs: +monday: 11 +tuesday: 2 +wednesday: 9 +thursday: 6 +friday: 5 +saturday: 0 +sunday: 1 + +Outcome Summary: Meeting agreed on Friday. + +Final valuation for Alice: 2. +Calculation: Friday selected. Not an out of office day. Value of friday: 2. + +Example 3: +{m} + +Final valuation for {p}: +''' + +PAYOFF_OBS_TRANS_PREFIX = ['Read the following dialogue and extract out the ' + + 'message that captures the final agreement made ' + + 'between the two parties. If the players could ' + + 'not agree, say no agreement was ' + + 'reached. If both players agreed, say ' + + 'which day the players agreed to meet.'] +PAYOFF_OBS_TRANS_PREFIX = ('\n\n'.join(text.wrap(PAYOFF_OBS_TRANS_PREFIX)) + + '\n\n') + +PAYOFF_OBS_TRANS_POSTFIX = '' + +PAYOFF = payoff_utils.Payoff(PAYOFF_PROMPT, + MIN_PAYOFF, + MAX_PAYOFF, + PAYOFF_OBS_TRANS_PREFIX, + PAYOFF_OBS_TRANS_POSTFIX) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/sentiment.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/sentiment.py new file mode 100644 index 0000000..1335daa --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/sentiment.py @@ -0,0 +1,58 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A few prompts for performing sentiment analysis with llms. +""" + +from open_spiel.python.games.chat_games.envs.payoffs import utils as payoff_utils +from open_spiel.python.games.chat_games.envs.utils import text + +# maybe sentiment score should look at summarized dialogue and predict how +# happy each participant is with the state of things +MIN_PAYOFF = -10 +MAX_PAYOFF = 10 +PAYOFF_PROMPT = [ + 'You are an assistant designed to analyze sentiment from speech data. ' + + 'Users will paste in a string of text and you will respond with an ' + + 'assessment of the response of the speaker. Rate on a scale of 1-10 ' + + '(10 being highest). Explain why this rating was given. Here are a few ' + + 'examples.', + '#' * 20, + 'message: Hi Bob, I am sorry you think the rent increase is unfair, but ' + + 'the economy has forced my hand. If I cannot afford it, you will have to ' + + 'leave.', + f'sentiment_score for Bob: 3/{MAX_PAYOFF}.', + 'message: Hi Bob, you are right. The rent is expensive and it is not fair' + + '_to raise it that high. I will compromise and only raise it by 3 percent.', + f'sentiment_score for Bob: 7/{MAX_PAYOFF}.', + 'message: Hi Alice, I think that rent increase is really unfair. I think ' + + 'you are trying to take advantage of me and I do not appreciate it.', + f'sentiment_score for Alice: 3/{MAX_PAYOFF}.', + 'message: Hi Alice, the rent is expensive but it is worth it and I am ' + + 'willing to pay you a higher rent.', + f'sentiment_score for Alice: 8/{MAX_PAYOFF}.', + '#' * 20, + 'Now provide a rating for the following message.', + 'message: {m}', + 'sentiment score for {p}: '] +PAYOFF_PROMPT = '\n\n'.join(text.wrap(PAYOFF_PROMPT)) + +PAYOFF_OBS_TRANS_PREFIX = '' +PAYOFF_OBS_TRANS_POSTFIX = '' + +PAYOFF = payoff_utils.Payoff(PAYOFF_PROMPT, + MIN_PAYOFF, + MAX_PAYOFF, + PAYOFF_OBS_TRANS_PREFIX, + PAYOFF_OBS_TRANS_POSTFIX) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/trade_fruit.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/trade_fruit.py new file mode 100644 index 0000000..c65e48e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/trade_fruit.py @@ -0,0 +1,91 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A few prompts for extracting the value of a fruit trade with llms. +""" + +from open_spiel.python.games.chat_games.envs.payoffs import utils as payoff_utils +from open_spiel.python.games.chat_games.envs.utils import text + +MIN_PAYOFF = -20 +MAX_PAYOFF = 20 +PAYOFF_PROMPT_a = [ + 'You are an assistant designed to calculate the values of trades ' + + 'in a fruit trading game. Determine the value of the fruits the player ' + + 'is receiving in the trade. Then determine the value of the fruits the ' + + 'player is giving up through the trade. Subtract the value the player ' + + 'gives away from the value the player receives. Here is an example ' + + 'enclosed by "&".'] + +PAYOFF_PROMPT_b = [ + '&' * 50, + 'To calculate the trade value, we first calculate the value of ' + + 'the fruit Bob receives in the trade. Bob receives 3 kiwis worth 3 each. ' + + 'Therefore Bob receives a value of 9 in the trade.', + 'Receives: 9', + 'Now we calculate the value of the fruits Bob gives up in the trade. ' + + 'Bob gives up1 banana which is worth 5, therefore, Bob gives up a value ' + + 'of 5 in the trade.', + 'Gives: 5', + 'Subtracting the value Bob gives away from the value Bob receives, we ' + + 'find 9 - 5 = 4.', + 'Calculation: Receives - Gives = 9 - 5 = 4.', + 'Value for Bob: 4.', + '&' * 50, + 'Now calculate the value of the trade made in the following message.', + '{m}', + '&' * 50, + 'Trade calculation for {p} ONLY: '] + +PAYOFF_PROMPT = ('\n\n'.join(text.wrap(PAYOFF_PROMPT_a)) + '\n\n' + '&' * 50 + + '\n\nBob offered to give up 1 banana for 3 kiwis. Alice ' + + 'agreed to the trade.\n\n' + + '\n\n'.join(text.wrap(PAYOFF_PROMPT_b))) + +PAYOFF_OBS_TRANS_PREFIX = ['Read the following dialogue between two parties ' + + 'attempting to reach a trade agreement. If the ' + + 'dialogue ends with someone asking a question or ' + + 'making a couterproposal, an agreement has not ' + + 'been reached. If the dialogue ends with someone ' + + 'saying they accept the trade, an agreement has ' + + 'been reached. Report how much of each fruit each ' + + 'player gave and received in the tradeby stating ' + + 'the players names followed by a list of the ' + + 'fruits the gave up and then a list of the fruits ' + + 'they received in this format:', + 'Player [Name]: Receives x Gives y', + 'Player [Name]: Receives y Gives x', + 'Example 1:', + 'Dialogue:', + 'Bob offered to give up 1 banana for 3 kiwis. ' + + 'Alice agreed to the trade.', + 'Player Bob: Receives 3 kiwis Gives 1 banana', + 'Player Suzy: Receives 1 banana Gives 3 kiwis', + 'Example 2:', + 'Dialogue:', + 'Alice offered to give up 1 banana for 3 kiwis. ' + + 'George does not want to trade.', + 'Player Bob: Receives 0 kiwi Gives 0 banana', + 'Player Suzy: Receives 0 banana Gives 0 kiwi', + 'Dialogue:'] +PAYOFF_OBS_TRANS_PREFIX = ('\n\n'.join(text.wrap(PAYOFF_OBS_TRANS_PREFIX)) + + '\n\n') + +PAYOFF_OBS_TRANS_POSTFIX = '' + +PAYOFF = payoff_utils.Payoff(PAYOFF_PROMPT, + MIN_PAYOFF, + MAX_PAYOFF, + PAYOFF_OBS_TRANS_PREFIX, + PAYOFF_OBS_TRANS_POSTFIX) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/utils.py new file mode 100644 index 0000000..496fb17 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/payoffs/utils.py @@ -0,0 +1,29 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utils for defining payoff prompts. +""" + +import dataclasses + +from open_spiel.python.games.chat_games.envs.observations import summary + + +@dataclasses.dataclass(frozen=True) +class Payoff: + query: str + min: int + max: int + obs_trans_prefix: str = summary.PREFIX + obs_trans_postfix: str = summary.POSTFIX diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/actions/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/actions/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/actions/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/actions/arguments.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/actions/arguments.py new file mode 100644 index 0000000..8e98c01 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/actions/arguments.py @@ -0,0 +1,20 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Examples of argument styles. +""" + +STYLES = ['logos', + 'pathos', + 'ethos'] diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/actions/tones.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/actions/tones.py new file mode 100644 index 0000000..a29f9b0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/actions/tones.py @@ -0,0 +1,26 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Examples of tones -- useful for generating additional examples. +""" + +TONES = ['kind', + 'thoughtful', + 'condescending', + 'aggressive', + 'aggreable', + 'clueless', + 'mean', + 'rude', + 'assertive'] diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/domains/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/domains/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/domains/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/domains/debate.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/domains/debate.py new file mode 100644 index 0000000..53048c5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/domains/debate.py @@ -0,0 +1,37 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Examples of debates -- useful for generating more examples. +""" + +from open_spiel.python.games.chat_games.envs.utils import text + +# Scenario A +SCENARIO_A_LIST = ['Tom Brady is the GOAT and coach Bill Bellichick ' + + 'is a genius'] +SCENARIO_A = '\n\n'.join(text.wrap(SCENARIO_A_LIST)) + +TOPIC_A = 'The New England Patriots are the best NFL team in 2023.' + +INFO_A = '' + +# Scenario B +SCENARIO_B_LIST = ['Breakfast is the most important meal of the day.'] +SCENARIO_B = '\n\n'.join(text.wrap(SCENARIO_B_LIST)) + +TOPIC_B = 'Breakfast is the most important meal of the day.' + +INFO_B = '' + +LLM_TERMINATION_PROMPT = None diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/domains/real_world_negotiations.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/domains/real_world_negotiations.py new file mode 100644 index 0000000..ef8cb87 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/domains/real_world_negotiations.py @@ -0,0 +1,49 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Examples of negotiation scenarios -- useful for generating more examples. +""" + +from open_spiel.python.games.chat_games.envs.utils import text + +# negotiating rent (money) +SCENARIO_A_LIST = [ + 'Hi {receiver},', 'I hope you are well,', 'I understand you have been a ' + + 'long time tenant with me, so I hate to increase rent, but as you know ' + + 'inflation has increased by 6 percent recently. In order to stay ' + + 'solvent I will need to increase your rent by 6 percent as well. I hope ' + + 'you understand my thinking.\n\nHow do you feel about this? Would you ' + + 'like to continue renting from me?', 'Best,', '{sender}'] +SCENARIO_A = '\n\n'.join(text.wrap(SCENARIO_A_LIST)) + +# negotiating deadline extension (time) +SCENARIO_B_LIST = [ + 'Dear {receiver},', 'I understand that my payment is due at the end of ' + + 'this month, but I will find it hard to come up with the money. Would it ' + + 'be possible to extend the due date by 1 week? This would allow me to ' + + 'come up with the necessary funds. As a concession, I would be willing to' + + ' pay early next month.', 'How do you feel about this? Do you have any ' + + 'other alternatives that you would be happy with?', 'Best,', '{sender}'] +SCENARIO_B = '\n\n'.join(text.wrap(SCENARIO_B_LIST)) + +# negotiating a trade (things) +SCENARIO_C_LIST = [ + 'Hey {receiver},', 'Thanks for your interest in my baseball card ' + + 'collection. I see you like my signed Babe Ruth special edition card. To ' + + 'be honest, I really like your signed Nolan Ryan jersey. I also like ' + + 'your signed Roger Clemens ball. Would you be interested in a trade? I ' + + 'have a few other things you might like to sweeten the deal: Ken Griffey '+ + 'Jr baseball bat, Mike Trout signed card, ...', 'What do you think?', + 'Best,', '{sender}'] +SCENARIO_C = '\n\n'.join(text.wrap(SCENARIO_C_LIST)) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/domains/schedule_meeting.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/domains/schedule_meeting.py new file mode 100644 index 0000000..b5ea2c8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/domains/schedule_meeting.py @@ -0,0 +1,86 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Examples of schedule negotations -- useful for generating more examples. +""" + +from open_spiel.python.games.chat_games.envs.observations import summary +from open_spiel.python.games.chat_games.envs.termination import utils as term_utils +from open_spiel.python.games.chat_games.envs.utils import text + +# Scenario A +OOO_LIST_A = ['monday: false', + 'tuesday: true', + 'wednesday: true', + 'thursday: false', + 'friday: false', + 'saturday: true', + 'sunday: false'] +OOO_A = '\n'.join(text.wrap(OOO_LIST_A)) + +DAY_PREFS_LIST_A = ['monday: 10', + 'tuesday: 5', + 'wednesday: 15', + 'thursday: 3', + 'friday: 2', + 'saturday: 1', + 'sunday: 1' + ] +DAY_PREFS_A = '\n'.join(text.wrap(DAY_PREFS_LIST_A)) + +SCENARIO_A_LIST = ['Hi {receiver},', + 'I would like to propose meeting on thursday.', + 'Would you like to meet with me then?', + 'Best,', '{sender}'] +SCENARIO_A = '\n\n'.join(text.wrap(SCENARIO_A_LIST)) + +# Scenario B +OOO_LIST_B = ['monday: true', + 'tuesday: false', + 'wednesday: true', + 'thursday: false', + 'friday: false', + 'saturday: true', + 'sunday: false'] +OOO_B = '\n'.join(text.wrap(OOO_LIST_B)) + +DAY_PREFS_LIST_B = ['monday: 5', + 'tuesday: 5', + 'wednesday: 5', + 'thursday: 1', + 'friday: 1', + 'saturday: 1', + 'sunday: 1' + ] +DAY_PREFS_B = '\n'.join(text.wrap(DAY_PREFS_LIST_B)) + +SCENARIO_B_LIST = ['Hi {receiver},', + 'I strongly urge you to meet me on friday when I am in ' + + 'the office.', + 'what do you say?', + 'Best,', '{sender}'] +SCENARIO_B = '\n\n'.join(text.wrap(SCENARIO_B_LIST)) + +query = ('Read the following summary of a dialgoue between two parties ' + + 'attempting to reach an agreement. Have the players reached an ' + + 'agreement? If a meeting time has been accepted or the players ' + + 'cannot come to an agreement, respond Yes. Otherwise, if the ' + + 'players are still discussing terms, respond No.' + + 'Here is the dialogue:\n\n{msg}\n\n' + '&' *50 + + '\n\nHave all parties agreed on a meeting time?' + '\nResponse: ') + +LLM_TERMINATION_PROMPT = term_utils.Termination(query, + summary.PREFIX, + summary.POSTFIX) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/domains/trade_fruit.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/domains/trade_fruit.py new file mode 100644 index 0000000..adf3df3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/domains/trade_fruit.py @@ -0,0 +1,64 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Examples of fruit trading scenarios -- useful for generating more examples. +""" + +from open_spiel.python.games.chat_games.envs.observations import summary +from open_spiel.python.games.chat_games.envs.termination import utils as term_utils +from open_spiel.python.games.chat_games.envs.utils import text + +# Scenario A +SCENARIO_A_LIST = ['Hi {receiver},', + 'I would like to trade you 1 banana for 1 apple.', + 'Would you like to trade with me?', + 'Best,', '{sender}'] +SCENARIO_A = '\n\n'.join(text.wrap(SCENARIO_A_LIST)) + +ENDOWMENT_A_LIST = ['apple: 1', 'banana: 2', 'blueberry: 0', 'kiwi: 0'] +ENDOWMENT_A = '\n'.join(text.wrap(ENDOWMENT_A_LIST)) + +VALUATION_A_LIST = ['apple: 10', + 'banana: 5', + 'blueberry: 1', + 'kiwi: 3'] +VALUATION_A = '\n'.join(text.wrap(VALUATION_A_LIST)) + +# Scenario B +SCENARIO_B_LIST = ['Hi {receiver},', + 'I would like to trade you 3 blueberries for 1 banana.', + 'Would you like to trade with me?', + 'Best,', '{sender}'] +SCENARIO_B = '\n\n'.join(text.wrap(SCENARIO_A_LIST)) + +ENDOWMENT_B_LIST = ['apple: 0', 'banana: 0', 'blueberry: 5', 'kiwi: 3'] +ENDOWMENT_B = '\n'.join(text.wrap(ENDOWMENT_B_LIST)) + +VALUATION_B_LIST = ['apple: 8', + 'banana: 7', + 'blueberry: 2', + 'kiwi: 2'] +VALUATION_B = '\n'.join(text.wrap(VALUATION_B_LIST)) + +query = ('Read the following summary of a dialgoue between two parties ' + + 'attempting to reach a trade agreement. Have the players reached a ' + + 'trade agreement? If a trade has been accepted or the players cannot' + + ' come to an agreement, respond Yes. Otherwise, if the players are ' + + 'still discussing terms, respond No.' + + 'Here is the dialogue:\n\n{msg}\n\n' + '&' *50 + + 'Response: ') + +LLM_TERMINATION_PROMPT = term_utils.Termination(query, + summary.PREFIX, + summary.POSTFIX) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/players/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/players/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/players/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/players/names.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/players/names.py new file mode 100644 index 0000000..272fec7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/scenarios/players/names.py @@ -0,0 +1,21 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Examples of names -- useful for generating additional examples. +""" + +NAMES = ['Ian', + 'Luke', + 'Siqi', + 'Georgios'] diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/termination/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/termination/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/termination/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/termination/utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/termination/utils.py new file mode 100644 index 0000000..7f45b4d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/termination/utils.py @@ -0,0 +1,27 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utils for defining game (episode) termination prompts. +""" + +import dataclasses + +from open_spiel.python.games.chat_games.envs.observations import summary + + +@dataclasses.dataclass(frozen=True) +class Termination: + query: str + obs_trans_prefix: str = summary.PREFIX + obs_trans_postfix: str = summary.POSTFIX diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/utils/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/utils/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/utils/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/utils/header.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/utils/header.py new file mode 100644 index 0000000..1dcfbea --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/utils/header.py @@ -0,0 +1,45 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base utils for constructing agent dialogue message headers. +""" + +import dataclasses +import string + +from typing import Callable, Tuple + + +@dataclasses.dataclass(frozen=True) +class BaseScenario: + msg: str + sender: str + receiver: str + + +@dataclasses.dataclass(frozen=True) +class Header: + plain: str + w_opts: str + strip_msg: Callable[[str, str], str] + special_chars: Tuple[str, ...] + action_keys: Tuple[str, ...] = tuple([]) + info_keys: Tuple[str, ...] = tuple([]) + context: str = '' + + +def plain_header_is_valid(header: Header) -> bool: + plain = header.plain + keys = [t[1] for t in string.Formatter().parse(plain) if t[1] is not None] + return 'sender' in keys and 'receiver' in keys diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/utils/text.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/utils/text.py new file mode 100644 index 0000000..0235584 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/envs/utils/text.py @@ -0,0 +1,143 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utils for parsing and constructing message strings. +""" + +import textwrap + +from typing import List, Tuple + + +def strip_msg(text: str, + block_msg: str, + block_opt: str, + terminal_str: str = '') -> str: + """Strip email message (with header) from text block, i.e., [ (A) - (B) ). + + Assumes messages adhere to the following format: + BLOCK_OPT + <-- action & info --> + BLOCK_MSG (A) + <-- e.g., sender/receiver --> + BLOCK_MSG + <-- e.g., message --> + BLOCK_OPT (B) + + Args: + text: str + block_msg: str, string of characters delineating the message + block_opt: str, string of characters demarking the start of + the options (actions and info) + terminal_str: str (optional), indicates the end of a message if block_opt + is not found. this will be included in the stripped output. + Returns: + stripped_text: str + """ + ctr = 0 + right_ptr = 0 + left_ptr = text.find(block_msg) + if left_ptr == -1: + return '' + while ctr < 2: + block_idx = text[right_ptr:].find(block_msg) + if block_idx == -1: + return '' + right_ptr += block_idx + len(block_msg) + ctr += 1 + block_idx = text[right_ptr:].find(block_opt) + if block_idx != -1: # if find block_opt return message ending at (B) + right_ptr += block_idx + else: + if terminal_str: # if no block_opt, return message ending at terminal_str + block_idx = text[right_ptr:].find(terminal_str) + if block_idx != -1: + right_ptr += block_idx + len(terminal_str) + else: # if no terminal_str, return message to end of text string + right_ptr = len(text) + return text[left_ptr:right_ptr] + + +def first_special_char(text: str, + max_idx: int, + special_chars: Tuple[str, ...]) -> int: + first_special_chars = [max_idx] + for char in special_chars: + idx = text.find(char) + if idx < 0: + first_special_chars.append(max_idx) + else: + first_special_chars.append(idx) + return min(first_special_chars) + + +def retrieve_special_char_block(text: str, + special_chars: Tuple[str, ...] = ('*',), + useless_chars: Tuple[str, ...] = (' ', '\n')): + for char in special_chars: + text = text.strip(char) + idx_end = first_special_char(text, len(text), special_chars) + text = text[:idx_end] + for char in useless_chars: + text = text.strip(char) + return text + + +def retrieve_alpha_block(text: str) -> str: + """Return the first instance of a contiguous alpha(not numeric) substring.""" + first_alpha_char = next(filter(str.isalpha, text), -1) + if first_alpha_char == -1: + return '' + start = text.find(first_alpha_char) + sliced = text[start:] + last_alpha_char = next(filter(lambda s: not str.isalpha(s), sliced), -1) + if last_alpha_char == -1: + return sliced + finish = sliced.find(last_alpha_char) + return text[start:start + finish] + + +def retrieve_numeric_block(text: str) -> str: + """Return the first instance of a contiguous numeric(not alpha) substring.""" + first_numeric_char = next(filter(str.isnumeric, text), -1) + if first_numeric_char == -1: + return '' + start = text.find(first_numeric_char) + sliced = text[start:] + last_numeric_char = next(filter(lambda s: not str.isnumeric(s), sliced), -1) + if start > 0 and text[start - 1] == '-': + start -= 1 + sliced = text[start:] + if last_numeric_char == -1: + return sliced + finish = sliced.find(last_numeric_char) + return text[start:start + finish] + + +def wrap(message: List[str]) -> List[str]: + """Given a list of strings, returns a list of them `wrapped` (paragraphs). + + Args: + message: list of strings + Returns: + wrapped: list of strings with each string `wrapped` so that each line only + contains (default) 70 characters + """ + wrapped = [] + for sub_msg in message: + sub_msg_wrapped = textwrap.wrap(sub_msg) + if len(sub_msg_wrapped) > 1: + sub_msg_wrapped = ['\n'.join(sub_msg_wrapped)] + wrapped.extend(sub_msg_wrapped) + return wrapped diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/utils/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/utils/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/utils/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/utils/logging_utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/utils/logging_utils.py new file mode 100644 index 0000000..60debf8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/utils/logging_utils.py @@ -0,0 +1,48 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utils for constructing strings in color.""" + +RESET = '\033[0m' # Reset +BLACK = '\033[30m' # Black +RED = '\033[31m' # Red -- Terminating Game +GREEN = '\033[32m' # Green -- Computing Payoffs +YELLOW = '\033[33m' # Yellow -- Generated Game Def +BLUE = '\033[34m' # Blue +PURPLE = '\033[35m' # Purple -- Information States +CYAN = '\033[36m' # Cyan -- Generating Lists +WHITE = '\033[37m' # White +BLACK2 = '\033[39m' # Black? + + +class ColorText: + """Color text class.""" + + def __init__(self, reset_color=RESET): + self.reset_color = reset_color + self.current_color = reset_color + + def set_color(self, color: str): + self.current_color = color + + def set_reset_color(self, color: str): + self.reset_color = color + + def reset(self): + self.current_color = self.reset_color + + def color(self, log_str: str, color: str = ''): + c = color if color else self.current_color + log_str = c + log_str + self.reset_color + return log_str diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/utils/test_utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/utils/test_utils.py new file mode 100644 index 0000000..be59cfc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/chat_games/utils/test_utils.py @@ -0,0 +1,143 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utils for running tests.""" + +import dataclasses +import enum + +from typing import List + +import numpy as np + +from open_spiel.python.games.chat_games.envs.comm_substrates import emails + + +class TestLLM(enum.Enum): + MOCK = 0 + + +@dataclasses.dataclass(frozen=True) +class MockScore: + logprob: float + + +class MockModel(): + """Mock LLM model.""" + + def __init__(self, name): + self.name = name + + +class MockResponse(): + """Mock LLM response.""" + + def __init__(self, text): + self.text = text + + +class MockClient(): + """Mock LLM client.""" + + def __init__(self): + # for cycling through mock response options + self._idxs = {'names': 0, + 'tones': 0, + 'examples': 0} + + def sample(self, model: str, length: int, seed: int, prompt: str + ) -> MockResponse: + """Returns string responses according to fixed prompt styles.""" + del model, length, seed + prompt_lower = prompt.lower() + if 'names' in prompt_lower: + dummy_names = ['Suzy', 'Bob', 'Alice', 'Doug', 'Arun', 'Maria', 'Zhang'] + dummy_name = dummy_names[self._idxs['names']] + self._idxs['names'] = (self._idxs['names'] + 1) % len(dummy_names) + return MockResponse(dummy_name + '\n') + elif 'tones' in prompt_lower: + dummy_tones = ['Happy', 'Sad', 'Angry'] + dummy_tone = dummy_tones[self._idxs['tones']] + self._idxs['tones'] = (self._idxs['tones'] + 1) % len(dummy_tones) + return MockResponse(dummy_tone + '\n') + elif 'list of items' in prompt_lower: + num_examples = 10 + dummy_examples = [f'Example-{i}' for i in range(num_examples)] + dummy_example = dummy_examples[self._idxs['examples']] + self._idxs['examples'] = (self._idxs['examples'] + 1) % num_examples + return MockResponse(dummy_example + '\n') + elif 'score' in prompt_lower or 'value' in prompt_lower: + return MockResponse('5\n') + elif 'summary' in prompt_lower: + return MockResponse('This is a summary of the dialogue. We are happy.\n') + elif emails.BLOCK_OPT in prompt: + return MockResponse('\nThat all sounds good to me.\n') + else: + raise ValueError('Prompt not recognized!\n\n' + prompt) + + def score(self, model: str, prompt: str) -> List[MockScore]: + del model, prompt + return [MockScore(logprob=-1)] + + def list_models(self) -> List[MockModel]: + dummy_models = ['dummy_model'] + models = [MockModel(model_name) for model_name in dummy_models] + return models + + +class MockLLM(): + """Mock LLM.""" + + def __init__(self): + self.client = MockClient() + self.model = 'dummy_model' + + def generate_response(self, prompt: str, seed: int, + num_output_tokens: int) -> str: + response = self.client.sample( + model=self.model, + length=num_output_tokens, + seed=seed, + prompt=prompt + ) + return response.text + + def generate_bool(self, prompt: str, seed: int) -> bool: + del seed + score_true = self.client.score(model=self.model, prompt=prompt + 'Yes') + score_false = self.client.score(model=self.model, prompt=prompt + 'No') + if score_true > score_false: + return True + else: + return False + + +class MockTokenizer(): + """Mock Tokenizer.""" + + def to_int(self, text: str) -> np.ndarray: + return np.zeros(len(text), dtype=np.int32) + + +class MockVectorizer(): + """Mock Vectorizer.""" + + def __init__(self): + self.tokenizer = MockTokenizer() + + def vectorize(self, text: str, obs_size: int) -> np.ndarray: + observation = self.tokenizer.to_int(text)[:obs_size] + num_pad = max(0, obs_size - observation.size) + observation = np.pad(observation, (0, num_pad)) + return observation diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/data.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/data.py new file mode 100644 index 0000000..4cf800d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/data.py @@ -0,0 +1,36 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Numerical information about some games or some specific settings of games. + +TODO(author2): Ideally, this should also be available from C++. +""" + +import pyspiel + + +def kuhn_nash_equilibrium(alpha): + """Returns a Nash Equilibrium in Kuhn parameterized by alpha in [0, 1/3]. + + See https://en.wikipedia.org/wiki/Kuhn_poker#Optimal_strategy + + Args: + alpha: The probability to bet on a Jack for Player 0. + + Raises: + ValueError: If `alpha` is not within [0, 1/3]. + """ + if not 0 <= alpha <= 1 / 3: + raise ValueError("alpha ({}) must be in [0, 1/3]".format(alpha)) + return pyspiel.kuhn_poker.get_optimal_policy(alpha) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/data_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/data_test.py new file mode 100644 index 0000000..0e7d577 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/data_test.py @@ -0,0 +1,37 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.games.data.""" + +from absl.testing import absltest +from absl.testing import parameterized +from open_spiel.python.algorithms import exploitability +from open_spiel.python.games import data +import pyspiel + + +class NashEquilibriumtest(parameterized.TestCase): + + @parameterized.parameters((0.), (0.1), (1 / 3)) + def test_exploitability_is_zero_on_nash(self, alpha): + # A similar test exists in: + # open_spiel/python/algorithms/exploitability_test.py + game = pyspiel.load_game("kuhn_poker") + policy = data.kuhn_nash_equilibrium(alpha=alpha) + expl = exploitability.exploitability(game, policy) + self.assertAlmostEqual(0, expl) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing.py new file mode 100644 index 0000000..4633a94 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing.py @@ -0,0 +1,459 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Implementation of dynamic routing game. + +The game is derived from https://arxiv.org/abs/2110.11943. +This dynamic routing game models the evolution of N vehicles in a road network. +The vehicles are described by their current link location, the time they have to +spend on the link before exiting it, and their destination. The action of a +vehicle is the successor link they want to reach when exiting a given link. +Actions are encoded as integer from 0 to K. Action 0 encodes not being able to +move on a successor link because the waiting time of the player is still +positive. Actions 1 to K correspond to the indices of the network links. Legal +actions for a player on link l, with a negative waiting time are the indices of +the successors link of l. When arriving on a link, the waiting time of the +player is assign based on the number of players on the link at this time. Over +time steps, the waiting time linearly decrease until it is negative, the vehicle +moves to a successor link and the waiting time get reassigned. +The cost of the vehicle is its arrival time, it could be seen as a running cost +where +1 is added to the cost at any time step the vehicle is not on its +destination. +This dynamic routing game is a mesoscopic traffic model with explicit congestion +dynamics where vehicle minimizes their arrival time. + +The game is defined by: +- a network given by the class Network. +- a list of vehicles given by the class Vehicle. + +The current game is implementated as a N player game. However this game can also +be extended to a mean field game, implemented as python_mfg_dynamic_routing. +""" + +from typing import Any, Iterable, List, Mapping, Optional, Set + +import numpy as np +from open_spiel.python.games import dynamic_routing_data +from open_spiel.python.games import dynamic_routing_utils +from open_spiel.python.observation import IIGObserverForPublicInfoGame +import pyspiel + +_DEFAULT_PARAMS = { + "max_num_time_step": 10, + "time_step_length": 0.5, + "players": -1 +} +_GAME_TYPE = pyspiel.GameType( + short_name="python_dynamic_routing", + long_name="Python Dynamic Routing Game", + dynamics=pyspiel.GameType.Dynamics.SIMULTANEOUS, + chance_mode=pyspiel.GameType.ChanceMode.DETERMINISTIC, + information=pyspiel.GameType.Information.PERFECT_INFORMATION, + utility=pyspiel.GameType.Utility.GENERAL_SUM, + reward_model=pyspiel.GameType.RewardModel.REWARDS, + max_num_players=100, + min_num_players=0, + provides_information_state_string=True, + provides_information_state_tensor=True, + provides_observation_string=True, + provides_observation_tensor=True, + default_loadable=True, + provides_factored_observation_string=True, + parameter_specification=_DEFAULT_PARAMS) + + +class DynamicRoutingGame(pyspiel.Game): + """Implementation of dynamic routing game. + + At each simultaneous-move time, each vehicle/player with negative waiting time + chooses on which successor link they would like to go. When arriving on the + link, a waiting time is assigned to the player based on the count of players + on the link, after everyone has moved to their successors link. One vehicle + arrival time is equal to the time step when they first reach their + destination. + See module docstring for more information. + + Attributes inherited from GameInfo: + max_chance_outcome: 0, the game is deterministic. + max_game_length: maximum number of time step played. Passed during + construction. + max_utility: maximum utility is the opposite of the minimum arrival time. + Set to 0. + min_utility: minimum utility is the opposite of the maximum arrival time. + Set to - max_game_length - 1. + num_distinct_actions: maximum number of possible actions. This is equal to + the number of links + 1 (corresponding to having no possible action + _NO_POSSIBLE_ACTION). + num_players: the number of vehicles. Choosen during by the constructor as + the number of vehicles. + Attributes: + network: the network of the game. + _vehicles: a list of the vehicle. Their origin and their destination should + be road sections of the game. The number of vehicles in the list sets the + num_players attribute. + time_step_length: size of the time step, used to convert travel times into + number of game time steps. + perform_sanity_checks: if true, sanity checks are done during the game, + should be set to false to speed up the game. + """ + network: dynamic_routing_utils.Network + _vehicles: List[dynamic_routing_utils.Vehicle] + perform_sanity_checks: bool + time_step_length: float + + def __init__( + self, + params: Mapping[str, Any], + network: Optional[dynamic_routing_utils.Network] = None, + vehicles: Optional[List[dynamic_routing_utils.Vehicle]] = None, + perform_sanity_checks: bool = True, + ): + """Initiliaze the game. + + Args: + params: game parameters. It should define max_num_time_step and + time_step_length. + network: the network of the game. + vehicles: a list of the vehicle. Their origin and their destination should + be road sections of the game. The number of vehicles in the list sets + the num_players attribute. + perform_sanity_checks: set the perform_sanity_checks attribute. + """ + max_num_time_step = params["max_num_time_step"] + time_step_length = params["time_step_length"] + self.network = network if network else dynamic_routing_data.BRAESS_NETWORK + self._vehicles = ( + vehicles + if vehicles else dynamic_routing_data.BRAESS_NETWORK_VEHICLES_DEMAND) + self.network.check_list_of_vehicles_is_correct(self._vehicles) + self.perform_sanity_checks = perform_sanity_checks + self.time_step_length = time_step_length + game_info = pyspiel.GameInfo( + num_distinct_actions=self.network.num_actions(), + max_chance_outcomes=0, + num_players=len(self._vehicles), + min_utility=-max_num_time_step - 1, + max_utility=0, + max_game_length=max_num_time_step) + super().__init__(_GAME_TYPE, game_info, params if params else {}) + + def new_initial_state(self) -> "DynamicRoutingGameState": + """Returns the state corresponding to the start of a game.""" + return DynamicRoutingGameState(self, self._vehicles, self.time_step_length) + + def make_py_observer(self, iig_obs_type=None, params=None): + """Returns a NetworkObserver object used for observing game state.""" + if ((iig_obs_type is None) or + (iig_obs_type.public_info and not iig_obs_type.perfect_recall)): + return NetworkObserver(self.num_players(), self.max_game_length()) + return IIGObserverForPublicInfoGame(iig_obs_type, params) + + +class DynamicRoutingGameState(pyspiel.State): + """State of the DynamicRoutingGame. + + One player is equal to one vehicle. + See docstring of the game class and of the file for more information. + Attributes: + _current_time_step: current time step of the game. + _is_terminal: boolean that encodes weither the game is over. + _time_step_length: size of the time step, used to convert travel times into + number of game time steps. + _vehicle_at_destination: set of vehicles that have reached their + destinations. When a vehicle has reached its destination but the game is + not finished, it cannot do anything. + _vehicle_destinations: the destination of each vehicle. + _vehicle_final_arrival_times: the arrival times of each vehicle, the arrival + is either 0 if the vehicle is still in the network or its arrival time if + the vehicle has reached its destination. + _vehicle_locations: current location of the vehicles as a network road + section. + _vehicle_without_legal_actions: list of vehicles without legal actions at + next time step. This is required because if no vehicle has legal actions + for a simultaneous node then an error if raised. + _waiting_times: time that each vehicle should wait before being able to move + to the next road section. + """ + _current_time_step: int + _is_terminal: bool + _time_step_length: float + _vehicle_at_destination: Set[int] + _vehicle_destinations: List[str] + _vehicle_final_arrival_times: List[float] + _vehicle_locations: List[str] + _vehicle_without_legal_actions: Set[int] + _waiting_times: List[int] + + def __init__(self, game: DynamicRoutingGame, + vehicles: Iterable[dynamic_routing_utils.Vehicle], + time_step_length: float): + """Constructor; should only be called by Game.new_initial_state.""" + super().__init__(game) + self._current_time_step = 0 + self._is_terminal = False + self._time_step_length = time_step_length + self._vehicle_at_destination = set() + self._vehicle_destinations = [vehicle.destination for vehicle in vehicles] + self._vehicle_final_arrival_times = [0.0 for _ in vehicles] + self._vehicle_locations = [vehicle.origin for vehicle in vehicles] + self._vehicle_without_legal_actions = set() + self._waiting_times = [ + int(veh._departure_time / self._time_step_length) for veh in vehicles + ] + self.running_cost = [0 for vehicle in vehicles] + + @property + def current_time_step(self) -> int: + """Return current time step.""" + return self._current_time_step + + def current_player(self) -> pyspiel.PlayerId: + """Returns the current player. + + If the game is over, TERMINAL is returned. If the game is at a chance + node then CHANCE is returned. Otherwise SIMULTANEOUS is returned. + """ + if self._is_terminal: + return pyspiel.PlayerId.TERMINAL + return pyspiel.PlayerId.SIMULTANEOUS + + def assert_valid_player(self, vehicle: int): + """Assert that a vehicle as a int between 0 and num_players.""" + assert isinstance(vehicle, int), f"{vehicle} is not a int." + assert vehicle >= 0, f"player: {vehicle}<0." + assert vehicle < self.get_game().num_players(), ( + f"player: {vehicle} >= num_players: {self.get_game().num_players()}") + + def _legal_actions(self, vehicle: int) -> List[int]: + """Return the legal actions of the vehicle. + + Legal actions are the succesor road section of the vehicle current road + section. + Args: + vehicle: the vehicle id. + + Returns: + list_legal_actions: a list of legal actions. If the game is finished then + the list is empty. If the vehicle is at its destination, has a positive + waiting time or if it is on a node without successors then an empty list + is returned. Otherwise the list of successors nodes of the current + vehicle location is returned. + """ + if self._is_terminal: + return [] + if self.get_game().perform_sanity_checks: + self.assert_valid_player(vehicle) + if vehicle in self._vehicle_without_legal_actions: + # If the vehicle is at destination it cannot do anything. + return [dynamic_routing_utils.NO_POSSIBLE_ACTION] + if self._waiting_times[vehicle] > 0: + return [dynamic_routing_utils.NO_POSSIBLE_ACTION] + _, end_section_node = dynamic_routing_utils._nodes_from_road_section( # pylint:disable=protected-access + self._vehicle_locations[vehicle]) + successors = self.get_game().network.get_successors(end_section_node) + if successors: + assert isinstance(successors, Iterable) + actions = [ + self.get_game().network.get_action_id_from_movement( + end_section_node, d) for d in successors + ] + if self.get_game().perform_sanity_checks: + map(self.get_game().network.assert_valid_action, actions) + return sorted(actions) + return [] + + def _apply_actions(self, actions: List[int]): + """Applies the specified action to the state. + + For each vehicle's action, if the vehicle is not at a sink node, if the + action is valid and if the waiting time is negative, then the vehicle will + move to the successor link corresponding to its action. + The function then detects if the vehicle has reached its destination or + a sink node and updates _vehicle_at_destination, + _vehicle_without_legal_actions and _vehicle_final_arrival_times + accordingly. + The function then assigns waiting for each vehicle that have moved based on + the new volume of cars on the link they reach. + The function evolves the time and checks if the game is finished. + Args: + actions: the action chosen by each vehicle. + """ + if self.get_game().perform_sanity_checks: + assert not self._is_terminal + if self.get_game().perform_sanity_checks: + assert isinstance(actions, Iterable) + assert len(actions) == self.get_game().num_players(), ( + f"Each player does not have an actions. Actions has {len(actions)} " + f"elements, it should have {self.get_game().num_players()}.") + for vehicle_id, action in enumerate(actions): + if vehicle_id not in self._vehicle_at_destination: + self.running_cost[vehicle_id] += self._time_step_length + # Has the vehicle already reached a sink node? + if vehicle_id in self._vehicle_without_legal_actions: + if self.get_game().perform_sanity_checks: + assert action == dynamic_routing_utils.NO_POSSIBLE_ACTION, ( + f"{action} should be {dynamic_routing_utils.NO_POSSIBLE_ACTION}.") + continue + if self._waiting_times[vehicle_id] > 0: + continue + if self.get_game().perform_sanity_checks: + self.get_game().network.assert_valid_action( + action, self._vehicle_locations[vehicle_id]) + self._vehicle_locations[vehicle_id] = ( + self.get_game().network.get_road_section_from_action_id(action)) + if (self._vehicle_locations[vehicle_id] == + self._vehicle_destinations[vehicle_id]): + self._vehicle_final_arrival_times[vehicle_id] = self._current_time_step + self._vehicle_at_destination.add(vehicle_id) + self._vehicle_without_legal_actions.add(vehicle_id) + # Will the vehicle have a legal action for next time step? + elif self.get_game().network.is_location_at_sink_node( + self._vehicle_locations[vehicle_id]): + self._vehicle_without_legal_actions.add(vehicle_id) + self._current_time_step += 1 + volumes = {} + for road_section in self._vehicle_locations: + if road_section not in volumes: + volumes[road_section] = 0 + # Each vehicle has a weight a one. + volumes[road_section] += 1 + for vehicle_id, _ in enumerate(actions): + # Has the vehicle already reached a sink node? + if vehicle_id in self._vehicle_without_legal_actions: + continue + if self._waiting_times[vehicle_id] > 0: + self._waiting_times[vehicle_id] -= 1 + else: + self._waiting_times[vehicle_id] = int(self.get_game( + ).network.get_travel_time(self._vehicle_locations[vehicle_id], volumes[ + self._vehicle_locations[vehicle_id]]) / self._time_step_length - + 1.0) + # Is the game finished? + if (self._current_time_step >= self.get_game().max_game_length() or len( + self._vehicle_without_legal_actions) == self.get_game().num_players()): + self._is_terminal = True + for vehicle_id in range(self.get_game().num_players()): + if vehicle_id not in self._vehicle_at_destination: + self._vehicle_final_arrival_times[vehicle_id] = ( + self._current_time_step) + + def _action_to_string(self, player, action) -> str: + """Action -> string.""" + if self.get_game().perform_sanity_checks: + self.assert_valid_player(player) + if action == dynamic_routing_utils.NO_POSSIBLE_ACTION: + return f"Vehicle {player} reach a sink node or its destination." + if self.get_game().perform_sanity_checks: + self.get_game().network.assert_valid_action(action) + return (f"Vehicle {player} would like to move to " + f"{self.get_game().network.get_road_section_from_action_id(action)}" + ".") + + def is_terminal(self) -> bool: + """Returns True if the game is over.""" + return self._is_terminal + + def rewards(self): + """Reward at the previous step.""" + if self._is_terminal or self._current_time_step == 0: + return [0 for _ in self._vehicle_locations] + reward = [-self._time_step_length for _ in self._vehicle_locations] + for vehicle in self._vehicle_at_destination: + reward[vehicle] = 0 + return reward + + def returns(self) -> List[float]: + """Total reward for each player over the course of the game so far.""" + if not self._is_terminal: + returns = [ + -self._time_step_length * self.current_time_step + for _ in self._vehicle_locations + ] + for vehicle in self._vehicle_at_destination: + returns[vehicle] = -( + self._vehicle_final_arrival_times[vehicle] * self._time_step_length) + return returns + returns = [ + -arrival_time * self._time_step_length + for arrival_time in self._vehicle_final_arrival_times + ] + return returns + + def get_current_vehicle_locations(self) -> List[str]: + """Get vehicle locations for debug purposes.""" + return self._vehicle_locations + + def get_location_as_int(self, vehicle: int) -> int: + """Get the vehicle location.""" + origin, destination = dynamic_routing_utils._nodes_from_road_section( # pylint:disable=protected-access + self._vehicle_locations[vehicle]) + return self.get_game().network.get_action_id_from_movement( + origin, destination) + + def get_current_vehicle_locations_as_int(self) -> List[int]: + """Get locations of all vehicles for the observation tensor.""" + return [ + self.get_location_as_int(x) + for x in range(self.get_game().num_players()) + ] + + def __str__(self) -> str: + """String for debug purposes. No particular semantics are required.""" + if self._is_terminal: + time = f"{self._current_time_step}, game finished." + else: + time = f"{self._current_time_step}" + return (f"Vehicle locations: {self._vehicle_locations}, " + f"time: {time}, waiting_time={self._waiting_times}.") + + +class NetworkObserver: + """Network observer used by the learning algorithm. + + The state string is the state history string. The state tensor is an array + of size max_game_length, num_players where each element is the location of + the vehicle at this time. + Attributes: + dict: dictionary {"observation": tensor}. + tensor: list of location for each time step. + """ + + def __init__(self, num_vehicles: int, num_time: int): + """Initializes an empty observation tensor.""" + shape = (num_time + 1, num_vehicles + 1) + self.tensor = np.zeros(np.prod(shape), np.float32) + self.dict = {"observation": np.reshape(self.tensor, shape)} + + def set_from(self, state, player): + """Update the state tensor. + + Put the locations of each players in the tensor row corresponding to + the current time step. Insert the current player location at the + beginning of the row. + Args: + state: the state, + player: the player. + """ + vehicles = state.get_current_vehicle_locations_as_int() + vehicles.insert(0, state.get_location_as_int(player)) + self.dict["observation"][state.current_time_step, :] = vehicles + + def string_from(self, state, player): + """Return the state history string.""" + return f"{player}: {state.history_str()}" + + +# Register the game with the OpenSpiel library +pyspiel.register_game(_GAME_TYPE, DynamicRoutingGame) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_data.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_data.py new file mode 100644 index 0000000..d05cab5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_data.py @@ -0,0 +1,431 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Default data for dynamic routing game.""" + +from open_spiel.python.games import dynamic_routing_utils + +# The line network is a very simple network (O -> A -> D) with the goal of +# testing the routing game. There is no possible action and all cars will go +# from node O (being at node O means being on the link bef_O->O) to node D +# (being at node D means being on the link D->aft_D). +LINE_NETWORK = dynamic_routing_utils.Network({ + "bef_O": "O", + "O": ["A"], + "A": ["D"], + "D": ["aft_D"], + "aft_D": [] +}) + +LINE_NETWORK_VEHICLES_DEMAND = [ + dynamic_routing_utils.Vehicle("bef_O->O", "D->aft_D") for _ in range(2) +] + +LINE_NETWORK_OD_DEMAND = [ + dynamic_routing_utils.OriginDestinationDemand("bef_O->O", "D->aft_D", 0, + 100) +] + +# The Braess network comes from the Braess paradox: Braess, D., 1968. "Uber ein +# Paradoxon aus der Verkehrsplanung". Unternehmensforschung 12, 258-268. +BRAESS_NUM_PLAYER = 5 +BRAESS_NETWORK = dynamic_routing_utils.Network( + { + "O": "A", + "A": ["B", "C"], + "B": ["C", "D"], + "C": ["D"], + "D": ["E"], + "E": [] + }, + node_position={ + "O": (0, 0), + "A": (1, 0), + "B": (2, 1), + "C": (2, -1), + "D": (3, 0), + "E": (4, 0) + }, + bpr_a_coefficient={ + "O->A": 0, + "A->B": 1.0, + "A->C": 0, + "B->C": 0, + "B->D": 0, + "C->D": 1.0, + "D->E": 0 + }, + bpr_b_coefficient={ + "O->A": 1.0, + "A->B": 1.0, + "A->C": 1.0, + "B->C": 1.0, + "B->D": 1.0, + "C->D": 1.0, + "D->E": 1.0 + }, + capacity={ + "O->A": BRAESS_NUM_PLAYER, + "A->B": BRAESS_NUM_PLAYER, + "A->C": BRAESS_NUM_PLAYER, + "B->C": BRAESS_NUM_PLAYER, + "B->D": BRAESS_NUM_PLAYER, + "C->D": BRAESS_NUM_PLAYER, + "D->E": BRAESS_NUM_PLAYER + }, + free_flow_travel_time={ + "O->A": 0, + "A->B": 1.0, + "A->C": 2.0, + "B->C": 0.25, + "B->D": 2.0, + "C->D": 1.0, + "D->E": 0 + }) + +BRAESS_NETWORK_VEHICLES_DEMAND = [ + dynamic_routing_utils.Vehicle("O->A", "D->E") + for _ in range(BRAESS_NUM_PLAYER) +] + +BRAESS_NETWORK_OD_DEMAND = [ + dynamic_routing_utils.OriginDestinationDemand("O->A", "D->E", 0, + BRAESS_NUM_PLAYER) +] + +# The Sioux Falls data comes from "An Efficient Approach to Solving the Road +# Network Equilibrium Traffic Assignment Problem" by L. J. LeBlanc and E. K. +# Morlok (http://doi.org/10.1016/0041-1647(75)90030-1). We scale uniformly the +# data to decrease the number of time steps needed to cross the network. The +# demand and congestion functions data has been copied and pasted from the +# paper. The node position has been created from the paper's figure with a +# simple scale. +__SIOUX_FALLS_ADJACENCY = { + "1": ["2", "3"], + "2": ["1", "6"], + "3": ["1", "4", "12"], + "4": ["3", "5", "11"], + "5": ["4", "6", "9"], + "6": ["2", "5", "8"], + "7": ["8", "18"], + "8": ["6", "7", "9", "16"], + "9": ["5", "8", "10"], + "10": ["9", "11", "15", "16", "17"], + "11": ["4", "10", "12", "14"], + "12": ["3", "11", "13"], + "13": ["12", "24"], + "14": ["11", "15", "23"], + "15": ["10", "14", "19", "22"], + "16": ["8", "10", "17", "18"], + "17": ["10", "16", "19"], + "18": ["7", "16", "20"], + "19": ["15", "17", "20"], + "20": ["18", "19", "21", "22"], + "21": ["20", "22", "24"], + "22": ["15", "20", "21", "23"], + "23": ["14", "22", "24"], + "24": ["13", "21", "23"] +} + +__SIOUX_FALLS_FREE_FLOW_TRAVEL_TIME = { + "1->2": 6, "1->3": 4, "2->1": 6, "2->6": 5, "3->1": 4, "3->4": 4, + "3->12": 4, "4->3": 4, "4->5": 2, "4->11": 6, "5->4": 2, "5->6": 4, + "5->9": 5, "6->2": 5, "6->5": 4, "6->8": 2, "7->8": 3, "7->18": 2, + "8->6": 2, "8->7": 3, "8->9": 10, "8->16": 5, "9->5": 5, "9->8": 10, + "9->10": 3, "10->9": 3, "10->11": 5, "10->15": 6, "10->16": 4, "10->17": 8, + "11->4": 6, "11->10": 5, "11->12": 6, "11->14": 4, "12->3": 4, "12->11": 6, + "12->13": 3, "13->12": 3, "13->24": 4, "14->11": 4, "14->15": 5, + "14->23": 4, "15->10": 6, "15->14": 5, "15->19": 3, "15->22": 3, "16->8": 5, + "16->10": 4, "16->17": 2, "16->18": 3, "17->10": 8, "17->16": 2, + "17->19": 2, "18->7": 2, "18->16": 3, "18->20": 4, "19->15": 3, "19->17": 2, + "19->20": 4, "20->18": 4, "20->19": 4, "20->21": 6, "20->22": 5, + "21->20": 6, "21->22": 2, "21->24": 3, "22->15": 3, "22->20": 5, + "22->21": 2, "22->23": 4, "23->14": 4, "23->22": 4, "23->24": 2, + "24->13": 4, "24->21": 3, "24->23": 2 +} + +__SIOUX_FALLS_BPR_A_COEFF = { + "1->2": 2 * 1e-18, + "1->3": 2 * 1e-18, + "2->1": 2 * 1e-18, + "2->6": 1240 * 1e-18, + "3->1": 2 * 1e-18, + "3->4": 6 * 1e-18, + "3->12": 2 * 1e-18, + "4->3": 6 * 1e-18, + "4->5": 3 * 1e-18, + "4->11": 1550 * 1e-18, + "5->4": 3 * 1e-18, + "5->6": 1000 * 1e-18, + "5->9": 75 * 1e-18, + "6->2": 1240 * 1e-18, + "6->5": 1000 * 1e-18, + "6->8": 520 * 1e-18, + "7->8": 119 * 1e-18, + "7->18": 1 * 1e-18, + "8->6": 520 * 1e-18, + "8->7": 119 * 1e-18, + "8->9": 2306 * 1e-18, + "8->16": 1156 * 1e-18, + "9->5": 75 * 1e-18, + "9->8": 2306 * 1e-18, + "9->10": 11 * 1e-18, + "10->9": 11 * 1e-18, + "10->11": 75 * 1e-18, + "10->15": 26 * 1e-18, + "10->16": 1080 * 1e-18, + "10->17": 1929 * 1e-18, + "11->4": 1550 * 1e-18, + "11->10": 75 * 1e-18, + "11->12": 1550 * 1e-18, + "11->14": 1061 * 1e-18, + "12->3": 2 * 1e-18, + "12->11": 1550 * 1e-18, + "12->13": 1 * 1e-18, + "13->12": 1 * 1e-18, + "13->24": 893 * 1e-18, + "14->11": 1061 * 1e-18, + "14->15": 1085 * 1e-18, + "14->23": 1020 * 1e-18, + "15->10": 26 * 1e-18, + "15->14": 1085 * 1e-18, + "15->19": 10 * 1e-18, + "15->22": 53 * 1e-18, + "16->8": 1156 * 1e-18, + "16->10": 1080 * 1e-18, + "16->17": 401 * 1e-18, + "16->18": 3 * 1e-18, + "17->10": 1929 * 1e-18, + "17->16": 401 * 1e-18, + "17->19": 553 * 1e-18, + "18->7": 1 * 1e-18, + "18->16": 3 * 1e-18, + "18->20": 2 * 1e-18, + "19->15": 10 * 1e-18, + "19->17": 553 * 1e-18, + "19->20": 957 * 1e-18, + "20->18": 2 * 1e-18, + "20->19": 957 * 1e-18, + "20->21": 1373 * 1e-18, + "20->22": 1130 * 1e-18, + "21->20": 1373 * 1e-18, + "21->22": 401 * 1e-18, + "21->24": 789 * 1e-18, + "22->15": 53 * 1e-18, + "22->20": 1130 * 1e-18, + "22->21": 401 * 1e-18, + "22->23": 960 * 1e-18, + "23->14": 1020 * 1e-18, + "23->22": 960 * 1e-18, + "23->24": 451 * 1e-18, + "24->13": 893 * 1e-18, + "24->21": 789 * 1e-18, + "24->23": 451 * 1e-18, +} + +__SIOUX_FALLS_NODES = { + "1": (0, 9), "2": (5, 9), "3": (0, 8), "4": (1, 8), "5": (3, 8), + "6": (5, 8), "7": (7, 6), "8": (5, 6), "9": (3, 6), "10": (3, 5), + "11": (1, 5), "12": (0, 5), "13": (0, 0), "14": (1, 2), "15": (3, 2), + "16": (5, 5), "17": (5, 4), "18": (7, 5), "19": (5, 2), "20": (5, 0), + "21": (3, 0), "22": (3, 1), "23": (1, 1), "24": (1, 0) +} + +__SIOUX_FALLS_DEMAND_AUX = [ + ("2", "1", 1), ("3", "1", 1), ("4", "1", 5), ("5", "1", 2), + ("6", "1", 3), ("7", "1", 5), ("8", "1", 8), ("9", "1", 5), + ("10", "1", 13), ("11", "1", 5), ("12", "1", 2), ("13", "1", 5), + ("14", "1", 3), ("15", "1", 5), ("16", "1", 5), ("17", "1", 4), + ("18", "1", 1), ("19", "1", 3), ("20", "1", 3), ("21", "1", 1), + ("22", "1", 4), ("23", "1", 3), ("24", "1", 1), ("1", "2", 1), + ("3", "2", 1), ("4", "2", 2), ("5", "2", 1), ("6", "2", 4), + ("7", "2", 2), ("8", "2", 4), ("9", "2", 2), ("10", "2", 6), + ("11", "2", 2), ("12", "2", 1), ("13", "2", 3), ("14", "2", 1), + ("15", "2", 1), ("16", "2", 4), ("17", "2", 2), ("19", "2", 1), + ("20", "2", 1), ("22", "2", 1), ("1", "3", 1), ("2", "3", 1), + ("4", "3", 2), ("5", "3", 1), ("6", "3", 3), ("7", "3", 1), + ("8", "3", 2), ("9", "3", 1), ("10", "3", 3), ("11", "3", 3), + ("12", "3", 2), ("13", "3", 1), ("14", "3", 1), ("15", "3", 1), + ("16", "3", 2), ("17", "3", 1), ("22", "3", 1), ("23", "3", 1), + ("1", "4", 5), ("2", "4", 2), ("3", "4", 2), ("5", "4", 5), + ("6", "4", 4), ("7", "4", 4), ("8", "4", 7), ("9", "4", 7), + ("10", "4", 12), ("11", "4", 14), ("12", "4", 6), ("13", "4", 6), + ("14", "4", 5), ("15", "4", 5), ("16", "4", 8), ("17", "4", 5), + ("18", "4", 1), ("19", "4", 2), ("20", "4", 3), ("21", "4", 2), + ("22", "4", 4), ("23", "4", 5), ("24", "4", 2), ("1", "5", 2), + ("2", "5", 1), ("3", "5", 1), ("4", "5", 5), ("6", "5", 2), + ("7", "5", 2), ("8", "5", 5), ("9", "5", 8), ("10", "5", 10), + ("11", "5", 5), ("12", "5", 2), ("13", "5", 2), ("14", "5", 1), + ("15", "5", 2), ("16", "5", 5), ("17", "5", 2), ("19", "5", 1), + ("20", "5", 1), ("21", "5", 1), ("22", "5", 2), ("23", "5", 1), + ("1", "6", 3), ("2", "6", 4), ("3", "6", 3), ("4", "6", 4), + ("5", "6", 2), ("7", "6", 4), ("8", "6", 8), ("9", "6", 4), + ("10", "6", 8), ("11", "6", 4), ("12", "6", 2), ("13", "6", 2), + ("14", "6", 1), ("15", "6", 2), ("16", "6", 9), ("17", "6", 5), + ("18", "6", 1), ("19", "6", 2), ("20", "6", 3), ("21", "6", 1), + ("22", "6", 2), ("23", "6", 1), ("24", "6", 1), ("1", "7", 5), + ("2", "7", 2), ("3", "7", 1), ("4", "7", 4), ("5", "7", 2), + ("6", "7", 4), ("8", "7", 10), ("9", "7", 6), ("10", "7", 19), + ("11", "7", 5), ("12", "7", 7), ("13", "7", 4), ("14", "7", 2), + ("15", "7", 5), ("16", "7", 14), ("17", "7", 10), ("18", "7", 2), + ("19", "7", 4), ("20", "7", 5), ("21", "7", 2), ("22", "7", 5), + ("23", "7", 2), ("24", "7", 1), ("1", "8", 8), ("2", "8", 4), + ("3", "8", 2), ("4", "8", 7), ("5", "8", 5), ("6", "8", 8), + ("7", "8", 10), ("9", "8", 8), ("10", "8", 16), ("11", "8", 8), + ("12", "8", 6), ("13", "8", 6), ("14", "8", 4), ("15", "8", 6), + ("16", "8", 22), ("17", "8", 14), ("18", "8", 3), ("19", "8", 7), + ("20", "8", 9), ("21", "8", 4), ("22", "8", 5), ("23", "8", 3), + ("24", "8", 2), ("1", "9", 5), ("2", "9", 2), ("3", "9", 1), + ("4", "9", 7), ("5", "9", 8), ("6", "9", 4), ("7", "9", 6), + ("8", "9", 8), ("10", "9", 28), ("11", "9", 14), ("12", "9", 6), + ("13", "9", 6), ("14", "9", 6), ("15", "9", 9), ("16", "9", 14), + ("17", "9", 9), ("18", "9", 2), ("19", "9", 4), ("20", "9", 6), + ("21", "9", 3), ("22", "9", 7), ("23", "9", 5), ("24", "9", 2), + ("1", "10", 13), ("2", "10", 6), ("3", "10", 3), ("4", "10", 12), + ("5", "10", 10), ("6", "10", 8), ("7", "10", 19), ("8", "10", 16), + ("9", "10", 28), ("11", "10", 40), ("12", "10", 20), ("13", "10", 19), + ("14", "10", 21), ("15", "10", 40), ("16", "10", 44), ("17", "10", 39), + ("18", "10", 7), ("19", "10", 18), ("20", "10", 25), ("21", "10", 12), + ("22", "10", 26), ("23", "10", 18), ("24", "10", 8), ("1", "11", 5), + ("2", "11", 2), ("3", "11", 3), ("4", "11", 15), ("5", "11", 5), + ("6", "11", 4), ("7", "11", 5), ("8", "11", 8), ("9", "11", 14), + ("10", "11", 39), ("12", "11", 14), ("13", "11", 10), ("14", "11", 16), + ("15", "11", 14), ("16", "11", 14), ("17", "11", 10), ("18", "11", 1), + ("19", "11", 4), ("20", "11", 6), ("21", "11", 4), ("22", "11", 11), + ("23", "11", 13), ("24", "11", 6), ("1", "12", 2), ("2", "12", 1), + ("3", "12", 2), ("4", "12", 6), ("5", "12", 2), ("6", "12", 2), + ("7", "12", 7), ("8", "12", 6), ("9", "12", 6), ("10", "12", 20), + ("11", "12", 14), ("13", "12", 13), ("14", "12", 7), ("15", "12", 7), + ("16", "12", 7), ("17", "12", 6), ("18", "12", 2), ("19", "12", 3), + ("20", "12", 4), ("21", "12", 3), ("22", "12", 7), ("23", "12", 7), + ("24", "12", 5), ("1", "13", 5), ("2", "13", 3), ("3", "13", 1), + ("4", "13", 6), ("5", "13", 2), ("6", "13", 2), ("7", "13", 4), + ("8", "13", 6), ("9", "13", 6), ("10", "13", 19), ("11", "13", 10), + ("12", "13", 13), ("14", "13", 6), ("15", "13", 7), ("16", "13", 6), + ("17", "13", 5), ("18", "13", 1), ("19", "13", 3), ("20", "13", 6), + ("21", "13", 6), ("22", "13", 13), ("23", "13", 8), ("24", "13", 8), + ("1", "14", 3), ("2", "14", 1), ("3", "14", 1), ("4", "14", 5), + ("5", "14", 1), ("6", "14", 1), ("7", "14", 2), ("8", "14", 4), + ("9", "14", 6), ("10", "14", 21), ("11", "14", 16), ("12", "14", 7), + ("13", "14", 6), ("15", "14", 13), ("16", "14", 7), ("17", "14", 7), + ("18", "14", 1), ("19", "14", 3), ("20", "14", 5), ("21", "14", 4), + ("22", "14", 12), ("23", "14", 11), ("24", "14", 4), ("1", "15", 5), + ("2", "15", 1), ("3", "15", 1), ("4", "15", 5), ("5", "15", 2), + ("6", "15", 2), ("7", "15", 5), ("8", "15", 6), ("9", "15", 10), + ("10", "15", 40), ("11", "15", 14), ("12", "15", 7), ("13", "15", 7), + ("14", "15", 13), ("16", "15", 12), ("17", "15", 15), ("18", "15", 2), + ("19", "15", 8), ("20", "15", 11), ("21", "15", 8), ("22", "15", 26), + ("23", "15", 10), ("24", "15", 4), ("1", "16", 5), ("2", "16", 4), + ("3", "16", 2), ("4", "16", 8), ("5", "16", 5), ("6", "16", 9), + ("7", "16", 14), ("8", "16", 22), ("9", "16", 14), ("10", "16", 44), + ("11", "16", 14), ("12", "16", 7), ("13", "16", 6), ("14", "16", 7), + ("15", "16", 12), ("17", "16", 28), ("18", "16", 5), ("19", "16", 13), + ("20", "16", 16), ("21", "16", 6), ("22", "16", 12), ("23", "16", 5), + ("24", "16", 3), ("1", "17", 4), ("2", "17", 2), ("3", "17", 1), + ("4", "17", 5), ("5", "17", 2), ("6", "17", 5), ("7", "17", 10), + ("8", "17", 14), ("9", "17", 9), ("10", "17", 39), ("11", "17", 10), + ("12", "17", 6), ("13", "17", 5), ("14", "17", 7), ("15", "17", 15), + ("16", "17", 28), ("18", "17", 6), ("19", "17", 17), ("20", "17", 17), + ("21", "17", 6), ("22", "17", 17), ("23", "17", 6), ("24", "17", 3), + ("1", "18", 1), ("4", "18", 1), ("6", "18", 1), ("7", "18", 2), + ("8", "18", 3), ("9", "18", 2), ("10", "18", 7), ("11", "18", 2), + ("12", "18", 2), ("13", "18", 1), ("14", "18", 1), ("15", "18", 2), + ("16", "18", 5), ("17", "18", 6), ("19", "18", 3), ("20", "18", 4), + ("21", "18", 1), ("22", "18", 3), ("23", "18", 1), ("1", "19", 3), + ("2", "19", 1), ("4", "19", 2), ("5", "19", 1), ("6", "19", 2), + ("7", "19", 4), ("8", "19", 7), ("9", "19", 4), ("10", "19", 18), + ("11", "19", 4), ("12", "19", 3), ("13", "19", 3), ("14", "19", 3), + ("15", "19", 8), ("16", "19", 13), ("17", "19", 17), ("18", "19", 3), + ("20", "19", 12), ("21", "19", 4), ("22", "19", 12), ("23", "19", 3), + ("24", "19", 1), ("1", "20", 3), ("2", "20", 1), ("4", "20", 3), + ("5", "20", 1), ("6", "20", 3), ("7", "20", 5), ("8", "20", 9), + ("9", "20", 6), ("10", "20", 25), ("11", "20", 6), ("12", "20", 5), + ("13", "20", 6), ("14", "20", 5), ("15", "20", 11), ("16", "20", 16), + ("17", "20", 17), ("18", "20", 4), ("19", "20", 12), ("21", "20", 12), + ("22", "20", 24), ("23", "20", 7), ("24", "20", 4), ("1", "21", 1), + ("4", "21", 2), ("5", "21", 1), ("6", "21", 1), ("7", "21", 2), + ("8", "21", 4), ("9", "21", 3), ("10", "21", 12), ("11", "21", 4), + ("12", "21", 3), ("13", "21", 6), ("14", "21", 4), ("15", "21", 8), + ("16", "21", 6), ("17", "21", 6), ("18", "21", 1), ("19", "21", 4), + ("20", "21", 12), ("22", "21", 18), ("23", "21", 7), ("24", "21", 5), + ("1", "22", 4), ("2", "22", 1), ("3", "22", 1), ("4", "22", 4), + ("5", "22", 2), ("6", "22", 2), ("7", "22", 5), ("8", "22", 5), + ("9", "22", 7), ("10", "22", 26), ("11", "22", 11), ("12", "22", 7), + ("13", "22", 13), ("14", "22", 12), ("15", "22", 26), ("16", "22", 12), + ("17", "22", 17), ("18", "22", 3), ("19", "22", 12), ("20", "22", 24), + ("21", "22", 18), ("23", "22", 21), ("24", "22", 11), ("1", "23", 3), + ("3", "23", 1), ("4", "23", 5), ("5", "23", 1), ("6", "23", 1), + ("7", "23", 2), ("8", "23", 3), ("9", "23", 5), ("10", "23", 18), + ("11", "23", 13), ("12", "23", 7), ("13", "23", 8), ("14", "23", 11), + ("15", "23", 10), ("16", "23", 5), ("17", "23", 6), ("18", "23", 1), + ("19", "23", 3), ("20", "23", 7), ("21", "23", 7), ("22", "23", 21), + ("24", "23", 7), ("1", "24", 1), ("4", "24", 2), ("6", "24", 1), + ("7", "24", 1), ("8", "24", 2), ("9", "24", 2), ("10", "24", 8), + ("11", "24", 6), ("12", "24", 5), ("13", "24", 7), ("14", "24", 4), + ("15", "24", 4), ("16", "24", 3), ("17", "24", 3), ("19", "24", 1), + ("20", "24", 4), ("21", "24", 5), ("22", "24", 11), ("23", "24", 7) +] + + +def create_sioux_falls_network(): + """Returns Sioux Falls network object (Network). + + Adds the origin and destination link to the adjacency list + __SIOUX_FALLS_ADJACENCY, to the BPR coefficients + __SIOUX_FALLS_FREE_FLOW_TRAVEL_TIME and __SIOUX_FALLS_BPR_A_COEFF and to the + node positions __SIOUX_FALLS_NODES and returns the network. + The BPR (Burean of Public Roads) coefficients are the coefficients used to + compute the travel time as a function of the volume on each link. + """ + adjacency = {} + free_flow_travel_time = __SIOUX_FALLS_FREE_FLOW_TRAVEL_TIME.copy() + bpr_a_coeff = __SIOUX_FALLS_BPR_A_COEFF.copy() + node_position = {} + + for k, nodes in __SIOUX_FALLS_ADJACENCY.items(): + adjacency[k] = nodes + [f"aft_{k}"] + adjacency[f"bef_{k}"] = [k] + adjacency[f"aft_{k}"] = [] + free_flow_travel_time[f"bef_{k}->{k}"] = 0 + free_flow_travel_time[f"{k}->aft_{k}"] = 0 + bpr_a_coeff[f"bef_{k}->{k}"] = 0 + bpr_a_coeff[f"{k}->aft_{k}"] = 0 + + for node, coord in __SIOUX_FALLS_NODES.items(): + node_position[node] = coord + node_position[f"bef_{node}"] = coord + node_position[f"aft_{node}"] = coord + + return dynamic_routing_utils.Network( + adjacency, + node_position=node_position, + bpr_a_coefficient=bpr_a_coeff, + bpr_b_coefficient={k: 4 for k in bpr_a_coeff}, + capacity={k: 1 for k in bpr_a_coeff}, + free_flow_travel_time=free_flow_travel_time) + + +SIOUX_FALLS_NETWORK = create_sioux_falls_network() + +SIOUX_FALLS_OD_DEMAND = [ + dynamic_routing_utils.OriginDestinationDemand( + f"bef_{origin}->{origin}", f"{dest}->aft_{dest}", 0, count * 1e2) + for (origin, dest, count) in __SIOUX_FALLS_DEMAND_AUX] + +SIOUX_FALLS_DUMMY_OD_DEMAND = [ + dynamic_routing_utils.OriginDestinationDemand("bef_19->19", "1->aft_1", 0, + 70 * 1e2), + dynamic_routing_utils.OriginDestinationDemand("bef_1->1", "19->aft_19", 0, + 70 * 1e2) +] diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_test.py new file mode 100644 index 0000000..547a785 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_test.py @@ -0,0 +1,310 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tests for Python dynamic routing game.""" + +from absl.testing import absltest + +from open_spiel.python import games # pylint:disable=unused-import +from open_spiel.python import policy +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import cfr +from open_spiel.python.algorithms import expected_game_score +from open_spiel.python.algorithms import exploitability +from open_spiel.python.algorithms import external_sampling_mccfr as external_mccfr +from open_spiel.python.algorithms import outcome_sampling_mccfr as outcome_mccfr +from open_spiel.python.games import dynamic_routing +from open_spiel.python.games import dynamic_routing_utils +import pyspiel + +_NUM_ITERATION_CFR_TEST = 1 + + +class DynamicRoutingGameTest(absltest.TestCase): + + def test_random_game(self): + """Tests basic API functions with the standard game tests.""" + game = pyspiel.load_game("python_dynamic_routing") + pyspiel.random_sim_test(game, num_sims=10, serialize=False, verbose=True) + + def test_game_as_turn_based(self): + """Check the game can be converted to a turn-based game.""" + game = pyspiel.load_game("python_dynamic_routing") + turn_based = pyspiel.convert_to_turn_based(game) + pyspiel.random_sim_test( + turn_based, num_sims=10, serialize=False, verbose=True) + + def test_game_as_turn_based_via_string(self): + """Check the game can be created as a turn-based game from a string.""" + game = pyspiel.load_game( + "turn_based_simultaneous_game(game=python_dynamic_routing())") + pyspiel.random_sim_test(game, num_sims=10, serialize=False, verbose=True) + + def test_non_default_param_from_string(self): + """Check params can be given through string loading.""" + game = pyspiel.load_game("python_dynamic_routing(max_num_time_step=5)") + self.assertEqual(game.max_game_length(), 5) + + def test_non_default_param_from_dict(self): + """Check params can be given through a dictionary.""" + game = pyspiel.load_game("python_dynamic_routing", {"max_num_time_step": 5}) + self.assertEqual(game.max_game_length(), 5) + + def test_action_consistency_convert_to_turn_based(self): + """Check if the sequential game is consistent with the game.""" + game = pyspiel.load_game("python_dynamic_routing") + seq_game = pyspiel.convert_to_turn_based(game) + state = game.new_initial_state() + seq_state = seq_game.new_initial_state() + self.assertEqual( + state.legal_actions(seq_state.current_player()), + seq_state.legal_actions(), + msg="The sequential actions are not correct.") + + def test_cfr_on_turn_based_game_with_exploitability(self): + """Check if CFR can be applied to the sequential game.""" + game = pyspiel.load_game( + "python_dynamic_routing(max_num_time_step=5,time_step_length=1.0)") + seq_game = pyspiel.convert_to_turn_based(game) + cfr_solver = cfr.CFRSolver(seq_game) + for _ in range(_NUM_ITERATION_CFR_TEST): + cfr_solver.evaluate_and_update_policy() + exploitability.nash_conv(seq_game, cfr_solver.average_policy()) + + def test_ext_mccfr_on_turn_based_game_with_exploitability(self): + """Check if external sampling MCCFR can be applied.""" + game = pyspiel.load_game( + "python_dynamic_routing(max_num_time_step=5,time_step_length=1.0)") + seq_game = pyspiel.convert_to_turn_based(game) + cfr_solver = external_mccfr.ExternalSamplingSolver( + seq_game, external_mccfr.AverageType.SIMPLE) + for _ in range(_NUM_ITERATION_CFR_TEST): + cfr_solver.iteration() + exploitability.nash_conv(seq_game, cfr_solver.average_policy()) + + def test_int_mccfr_on_turn_based_game_with_exploitability(self): + """Check if outcome sampling MCCFR can be applied.""" + game = pyspiel.load_game( + "python_dynamic_routing(max_num_time_step=5,time_step_length=1.0)") + seq_game = pyspiel.convert_to_turn_based(game) + cfr_solver = outcome_mccfr.OutcomeSamplingSolver(seq_game) + for _ in range(_NUM_ITERATION_CFR_TEST): + cfr_solver.iteration() + exploitability.nash_conv(seq_game, cfr_solver.average_policy()) + + def test_creation_of_rl_environment(self): + """Check if RL environment can be created.""" + game = pyspiel.load_game("python_dynamic_routing") + seq_game = pyspiel.convert_to_turn_based(game) + rl_environment.Environment(seq_game) + + def test_vehicle_origin_outside_network(self): + """Check raise assertion if vehicle's origin is outside the Network.""" + vehicles = [dynamic_routing_utils.Vehicle("I->O", "D->E", 0)] + with self.assertRaises(ValueError): + dynamic_routing.DynamicRoutingGame( + { + "max_num_time_step": 10, + "time_step_length": 0.5, + "players": -1 + }, + vehicles=vehicles) + + def test_vehicle_destination_outside_network(self): + """Check raise assertion if vehicle's destination is outside the Network.""" + vehicles = [dynamic_routing_utils.Vehicle("O->A", "E->F", 0)] + with self.assertRaises(ValueError): + dynamic_routing.DynamicRoutingGame( + { + "max_num_time_step": 10, + "time_step_length": 0.5, + "players": -1 + }, + vehicles=vehicles) + + def test_multiple_departure_time_vehicle(self): + """Check that departure time can be define.""" + vehicles = [ + dynamic_routing_utils.Vehicle("O->A", "D->E", 0), + dynamic_routing_utils.Vehicle("O->A", "D->E", 0.5), + dynamic_routing_utils.Vehicle("O->A", "D->E", 1.0) + ] + game = dynamic_routing.DynamicRoutingGame( + { + "max_num_time_step": 10, + "time_step_length": 0.5, + "players": -1 + }, + vehicles=vehicles) + pyspiel.random_sim_test(game, num_sims=10, serialize=False, verbose=True) + + def test_game_evolution_first_action_policy(self): + """Check game deterministic evolution under first action policy.""" + # Test evolution of the game as expected (test value of the state). + # test legal_actions(). + + def test_observer_correct(self): + """Check that the observer is correclty updated.""" + # Add test about observer and tensor being updated. + + def test_apply_actions_error_no_movement_with_negative_waiting_time(self): + """Check that a vehicle cannot choose to not move if it has to move.""" + # Test apply_actions(). + + def test_apply_actions_error_wrong_movement_with_negative_waiting_time(self): + """Check that a vehicle cannot choose to move to a not successor link.""" + # Test apply_actions(). + + def test_apply_actions_error_movement_with_positive_waiting_time(self): + """Check that a vehicle cannot choose to move if it cannot move yet.""" + # Test apply_actions(). + + def test_braess_paradox(self): + """Test that Braess paradox can be reproduced with the mean field game.""" + num_player = 8 + braess_network = dynamic_routing_utils.Network( + { + "O": "A", + "A": ["B", "C"], + "B": ["C", "D"], + "C": ["D"], + "D": ["E"], + "E": [] + }, + node_position={ + "O": (0, 0), + "A": (1, 0), + "B": (2, 1), + "C": (2, -1), + "D": (3, 0), + "E": (4, 0) + }, + bpr_a_coefficient={ + "O->A": 0, + "A->B": 1.0, + "A->C": 0, + "B->C": 0, + "B->D": 0, + "C->D": 1.0, + "D->E": 0 + }, + bpr_b_coefficient={ + "O->A": 1.0, + "A->B": 1.0, + "A->C": 1.0, + "B->C": 1.0, + "B->D": 1.0, + "C->D": 1.0, + "D->E": 1.0 + }, + capacity={ + "O->A": num_player, + "A->B": num_player, + "A->C": num_player, + "B->C": num_player, + "B->D": num_player, + "C->D": num_player, + "D->E": num_player + }, + free_flow_travel_time={ + "O->A": 0, + "A->B": 1.0, + "A->C": 2.0, + "B->C": 0.25, + "B->D": 2.0, + "C->D": 1.0, + "D->E": 0 + }) + + demand = [ + dynamic_routing_utils.Vehicle("O->A", "D->E") for _ in range(num_player) + ] + game = dynamic_routing.DynamicRoutingGame( + { + "time_step_length": 0.125, + "max_num_time_step": 40 + }, + network=braess_network, + vehicles=demand) + + class TruePathPolicy(policy.Policy): + + def __init__(self, game): + super().__init__(game, list(range(num_player))) + self._path = {} + + def action_probabilities(self, state, player_id=None): + assert player_id is not None + legal_actions = state.legal_actions(player_id) + if not legal_actions: + return {dynamic_routing_utils.NO_POSSIBLE_ACTION: 1.0} + elif len(legal_actions) == 1: + return {legal_actions[0]: 1.0} + else: + if legal_actions[0] == 1: + if self._path[player_id] in ["top", "middle"]: + return {1: 1.0} + elif self._path[player_id] == "bottom": + return {2: 1.0} + else: + raise ValueError() + elif legal_actions[0] == 3: + if self._path[player_id] == "top": + return {4: 1.0} + elif self._path[player_id] == "middle": + return {3: 1.0} + else: + raise ValueError() + raise ValueError(f"{legal_actions} is not correct.") + + class NashEquilibriumBraess(TruePathPolicy): + + def __init__(self, game): + super().__init__(game) + for player_id in range(num_player): + if player_id % 2 == 0: + self._path[player_id] = "middle" + if player_id % 4 == 1: + self._path[player_id] = "top" + if player_id % 4 == 3: + self._path[player_id] = "bottom" + + class SocialOptimumBraess(NashEquilibriumBraess): + + def __init__(self, game): + super().__init__(game) + for player_id in range(num_player): + if player_id % 2 == 0: + self._path[player_id] = "top" + if player_id % 2 == 1: + self._path[player_id] = "bottom" + + ne_policy = NashEquilibriumBraess(game) + # Debug issue with nash conv computation and uncomment yhe following line. + # self.assertEqual(exploitability.nash_conv(game, ne_policy), 0.0) + self.assertSequenceAlmostEqual( + -expected_game_score.policy_value(game.new_initial_state(), ne_policy), + [3.75] * num_player) + + so_policy = SocialOptimumBraess(game) + # Debug issue with nash conv computation and uncomment the following line. + # self.assertEqual(exploitability.nash_conv(game, so_policy), 0.125) + self.assertSequenceAlmostEqual( + -expected_game_score.policy_value(game.new_initial_state(), so_policy), + [3.5] * num_player) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_to_mean_field_game.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_to_mean_field_game.py new file mode 100644 index 0000000..99550c1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_to_mean_field_game.py @@ -0,0 +1,131 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Mean field routing game policy used in N-playerrouting game. + +The policy class DerivedNPlayerPolicyFromMeanFieldPolicy convert a mean field +routing game policy to a N player routing game policy. It keep in memory the +mean field policy and convert a N player routing game state to a mean field +routing game state when calling action_probabilities. Therefore the mean field +policy can be used on a N player state. This makes the mean field equilibrium +policy (which is faster to compute) a policy that approximate well the +equilbrium N player policy (which is slower to compute) when N is large. +""" + +from typing import Dict + +from open_spiel.python import policy +from open_spiel.python.games import dynamic_routing +from open_spiel.python.games import dynamic_routing_utils +from open_spiel.python.mfg.games import dynamic_routing as mean_field_routing_game +import pyspiel + + +def _create_empty_mfg_state(game: dynamic_routing.DynamicRoutingGame): + """Create an empty MFG state for the N player routing game. + + Args: + game: the N player game. + + Returns: + new_mfg_state: an empty MFG state corresponding to the N player game. + """ + od_demand_dict = {} + for vehicle in game._vehicles: # pylint:disable=protected-access + key = (vehicle.origin, vehicle.destination, vehicle.departure_time) + if key not in od_demand_dict: + od_demand_dict[key] = 0 + od_demand_dict[key] += 1 + od_demand = [] + for (origin, destination, departure_time), counts in od_demand_dict.items(): + od_demand.append( + dynamic_routing_utils.OriginDestinationDemand(origin, destination, + departure_time, counts)) + return mean_field_routing_game.MeanFieldRoutingGame( + { + "max_num_time_step": game.max_game_length(), + "time_step_length": game.time_step_length + }, + network=game.network, + od_demand=od_demand, + perform_sanity_checks=game.perform_sanity_checks).new_initial_state() + + +class DerivedNPlayerPolicyFromMeanFieldPolicy(policy.Policy): + """Policy that apply mean field policy to N player game for dynamic routing. + + Attributes: + _mfg_policy: the mean field game policy. + _mfg_empty_state: an empty mfg state to clone for the state conversion. + _state_memoization: dictionary to memoize conversion of N player game state + string representation to the corresponding MFG state. + """ + + def __init__(self, game: dynamic_routing.DynamicRoutingGame, + mfg_policy: policy.Policy): + """Initializes a uniform random policy for all players in the game.""" + super().__init__(game, list(range(game.num_players()))) + self._mfg_policy = mfg_policy + self._mfg_empty_state = _create_empty_mfg_state(game) + self._state_memoization = {} + + def _convert_state_to_mean_field_state( + self, n_player_state: dynamic_routing.DynamicRoutingGameState, + player_id: int) -> mean_field_routing_game.MeanFieldRoutingGameState: + """Convert a N player state to a mean field state.""" + assert player_id >= 0, "player_id should be a positive integer." + # create a string key for N player game. + state_key = (str(n_player_state), player_id) + mfg_state = self._state_memoization.get(state_key) + if mfg_state is not None: + return mfg_state + mfg_state = self._mfg_empty_state.clone() + # pylint:disable=protected-access + mfg_state._is_chance_init = False + mfg_state._current_time_step = n_player_state._current_time_step + mfg_state._is_terminal = n_player_state._is_terminal + mfg_state._player_id = pyspiel.PlayerId.DEFAULT_PLAYER_ID + mfg_state._waiting_time = n_player_state._waiting_times[player_id] + mfg_state._vehicle_at_destination = ( + player_id in n_player_state._vehicle_at_destination) + mfg_state._vehicle_destination = n_player_state._vehicle_destinations[ + player_id] + mfg_state._vehicle_final_arrival_time = ( + n_player_state._vehicle_final_arrival_times[player_id]) + mfg_state._vehicle_location = n_player_state._vehicle_locations[player_id] + mfg_state._vehicle_without_legal_action = ( + player_id in n_player_state._vehicle_without_legal_actions) + # pylint:enable=protected-access + self._state_memoization[state_key] = mfg_state + return mfg_state + + def action_probabilities(self, + state: dynamic_routing.DynamicRoutingGameState, + player_id=None) -> Dict[int, float]: + """Returns the mean field action to apply in the N player state. + + Args: + state: An N player dynamic routing game state. + player_id: the player id for which we want an action. Should be given to + the function. + + Returns: + A `dict` of `{action: probability}` for the specified player in the + supplied state. + """ + assert player_id is not None + mfg_state = self._convert_state_to_mean_field_state(state, player_id) + # Due to memoization, action_probabilities should not change mfg_state. In + # case action_probabilities changes mfg_state, then mfg_state.clone() should + # be passed to the function. + return self._mfg_policy.action_probabilities(mfg_state) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_to_mean_field_game_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_to_mean_field_game_test.py new file mode 100644 index 0000000..d3c934c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_to_mean_field_game_test.py @@ -0,0 +1,76 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for dynamic_routing_to_mean_field_game.""" +from absl.testing import absltest + +from open_spiel.python import games # pylint:disable=unused-import +from open_spiel.python import policy +from open_spiel.python.algorithms import expected_game_score +from open_spiel.python.games import dynamic_routing_to_mean_field_game +from open_spiel.python.mfg import games as mfg_games # pylint:disable=unused-import +from open_spiel.python.mfg.algorithms import mirror_descent +import pyspiel + + +class DerivedNPlayerPolicyFromMeanFieldPolicyTest(absltest.TestCase): + + def test_state_conversion_method(self): + """Test N player game state to mean field game state conversion.""" + # Test state conversion. + + def test_uniform_mfg_policy_conversion_to_n_player_uniform_policy(self): + """Test conversion of uniform to uniform policy.""" + mfg_game = pyspiel.load_game("python_mfg_dynamic_routing", { + "time_step_length": 0.05, + "max_num_time_step": 100 + }) + n_player_game = pyspiel.load_game("python_dynamic_routing", { + "time_step_length": 0.05, + "max_num_time_step": 100 + }) + mfg_derived_policy = ( + dynamic_routing_to_mean_field_game + .DerivedNPlayerPolicyFromMeanFieldPolicy( + n_player_game, policy.UniformRandomPolicy(mfg_game))) + derived_policy_value = expected_game_score.policy_value( + n_player_game.new_initial_state(), mfg_derived_policy) + uniform_policy_value = expected_game_score.policy_value( + n_player_game.new_initial_state(), + policy.UniformRandomPolicy(n_player_game)) + self.assertSequenceAlmostEqual(derived_policy_value, uniform_policy_value) + + def test_pigou_network_game_outcome_optimal_mfg_policy_in_n_player_game(self): + """Test MFG Nash equilibrium policy for the Pigou network.""" + # Test policy. + # Test game outcome. + + def test_learning_and_applying_mfg_policy_in_n_player_game(self): + """Test converting learnt MFG policy default game.""" + # learning the Braess MFG Nash equilibrium + mfg_game = pyspiel.load_game("python_mfg_dynamic_routing") + omd = mirror_descent.MirrorDescent(mfg_game, lr=1) + for _ in range(10): + omd.iteration() + mfg_policy = omd.get_policy() + n_player_game = pyspiel.load_game("python_dynamic_routing") + mfg_derived_policy = ( + dynamic_routing_to_mean_field_game + .DerivedNPlayerPolicyFromMeanFieldPolicy(n_player_game, mfg_policy)) + expected_game_score.policy_value(n_player_game.new_initial_state(), + mfg_derived_policy) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_utils.py new file mode 100644 index 0000000..9a2a3c1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_utils.py @@ -0,0 +1,372 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Utils module for dynamic routing game and mean field routing game. + +This module has three main classes: +- Network +- Vehicle +- OriginDestinationDemand +""" + +from __future__ import annotations + +from collections.abc import Collection +from typing import Any, Optional + +# In case one vehicle has reached a end node, then it cannot do anything. In +# this case its action is 0. Action 0 is reserved to encode no possible action +# as requested by Open Spiel. +NO_POSSIBLE_ACTION = 0 + + +def _road_section_from_nodes(origin: str, destination: str) -> str: + """Create a road section 'A->B' from two nodes 'A' and 'B'.""" + return f"{origin}->{destination}" + + +def _nodes_from_road_section(movement: str) -> tuple[str, str]: + """Split a road section 'A->B' to two nodes 'A' and 'B'.""" + origin, destination = movement.split("->") + return origin, destination + + +def assign_dictionary_input_to_object(dict_object: dict[str, Any], + road_sections: Collection[str], + default_value: Any) -> dict[str, Any]: + """Check dictionary has road sections has key or return default_value dict.""" + if dict_object: + assert set(dict_object) == set(road_sections), ( + "Objects are not defined for each road sections.") + return dict_object + dict_object_returned = {} + for road_section in road_sections: + dict_object_returned[road_section] = default_value + return dict_object_returned + + +class Network: + """Network implementation. + + A network is basically a directed graph with a volume delay function on each + of its edges. Each vertex is refered to as a string (for example "A") and each + edge as a string f"{node1}->{node2}" (for example "A->B"). The network is + created from a adjacency list. Each road section is mapped to an action index + (positive integer) in _action_by_road_section. The volume delay function on + each road section rs is given by + _free_flow_travel_time[rs]*(1+ _a[rs]*(v/_capacity[rs])**_b[rs]) + where v is the volume on the road section rs, according to the U.S. Bureau of + Public Road (BPR). Such functions are called fundamental diagram of traffic + flow. + + If one would like to plot the network then node position should be passed + in the constructor. Then return_list_for_matplotlib_quiver can be used with + Matplotlib: + ```python3 + fig, ax = plt.subplots() + o_xs, o_ys, d_xs, d_ys = g.return_list_for_matplotlib_quiver() + ax.quiver(o_xs, o_ys, np.subtract(d_xs, o_xs), np.subtract(d_ys, o_ys), + color="b", angles='xy', scale_units='xy', scale=1) + ``` + + See the Network tests for an example. + Attributes: _a, _b, _capacity, _free_flow_travel_time: dictionary that maps + road section string representation to its a, b, relative capacity and free + flow travel time coefficient in its BPR function. + _action_by_road_section: dictionary that maps road section to action id. + _adjacency_list: adjacency list of the line graph of the road network. + _node_position: dictionary that maps node to couple of float encoding x and + y position of the node. None by default. + _road_section_by_action: dictionary that maps action id to road section. + """ + _a: dict[str, float] + _b: dict[str, float] + _action_by_road_section: dict[str, int] + _adjacency_list: dict[str, Collection[str]] + _capacity: dict[str, float] + _free_flow_travel_time: dict[str, float] + _node_position: dict[str, tuple[float, float]] + _road_section_by_action: dict[int, str] + + def __init__(self, + adjacency_list: dict[str, Collection[str]], + node_position: Optional[dict[str, tuple[float, float]]] = None, + bpr_a_coefficient: Optional[dict[str, float]] = None, + bpr_b_coefficient: Optional[dict[str, float]] = None, + capacity: Optional[dict[str, float]] = None, + free_flow_travel_time: Optional[dict[str, float]] = None): + self._adjacency_list = adjacency_list + self._action_by_road_section = self._create_action_by_road_section() + self._road_section_by_action = { + v: k for k, v in self._action_by_road_section.items() + } + + nodes = set(adjacency_list) + # pylint: disable=g-complex-comprehension + assert all(destination_node in nodes + for destination_nodes in self._adjacency_list.values() + for destination_node in destination_nodes), ( + "Adjacency list is not correct.") + # pylint: enable=g-complex-comprehension + if node_position: + assert set(node_position) == nodes + self._node_position = node_position + else: + self._node_position = None + self._a = assign_dictionary_input_to_object(bpr_a_coefficient, + self._action_by_road_section, 0) + self._b = assign_dictionary_input_to_object(bpr_b_coefficient, + self._action_by_road_section, 1) + self._capacity = assign_dictionary_input_to_object( + capacity, self._action_by_road_section, 1) + self._free_flow_travel_time = assign_dictionary_input_to_object( + free_flow_travel_time, self._action_by_road_section, 1) + assert hasattr(self, "_adjacency_list") + assert hasattr(self, "_node_position") + assert hasattr(self, "_a") + assert hasattr(self, "_b") + assert hasattr(self, "_capacity") + assert hasattr(self, "_free_flow_travel_time") + + def _create_action_by_road_section(self) -> tuple[set[str], dict[int, str]]: + """Create dictionary that maps movement to action. + + The dictionary that maps movement to action is used to define the action + from a movement that a vehicle would like to do. + Returns: + action_by_road_section: dictionary with key begin a movement for example + "O->A" and value the action numbers. Action numbers are succesive + integers indexed from 1. + """ + action_by_road_section = {} + action_number = NO_POSSIBLE_ACTION + 1 + for origin, successors in sorted(self._adjacency_list.items()): + for destination in successors: + road_section = _road_section_from_nodes(origin, destination) + if road_section in action_by_road_section: + raise ValueError(( + f"{road_section} exists twice in the adjacency list. The current " + "network implementation does not enable parallel links.")) + action_by_road_section[road_section] = action_number + action_number += 1 + return action_by_road_section + + def num_links(self) -> int: + """Returns the number of road sections.""" + return len(self._action_by_road_section) + + def num_actions(self) -> int: + """Returns the number of possible actions. + + Equal to the number of road section + 1. An action could either be moving to + a specific road section or not move. + """ + return 1 + self.num_links() + + def links(self) -> list[str]: + """Returns the road sections as a list.""" + return list(self._action_by_road_section) + + def get_successors(self, node: str) -> Collection[str]: + """Returns the successor nodes of the node.""" + return self._adjacency_list[node] + + def get_action_id_from_movement(self, origin: str, destination: str) -> int: + """Maps two connected nodes to an action.""" + return self._action_by_road_section[_road_section_from_nodes( + origin, destination)] + + def get_road_section_from_action_id(self, action_id: int) -> str: + """Maps a action to the corresponding road section.""" + return self._road_section_by_action[action_id] + + def is_location_at_sink_node(self, road_section: str) -> bool: + """Returns True if the road section has no successors.""" + start_section, end_section_node = _nodes_from_road_section(road_section) + if start_section not in self._adjacency_list: + raise KeyError(f"{start_section} is not a network node.") + return not self.get_successors(end_section_node) + + def check_list_of_vehicles_is_correct(self, vehicles: Collection["Vehicle"]): + """Assert that vehicles have valid origin and destination.""" + for vehicle in vehicles: + if (vehicle.origin not in self._action_by_road_section or + vehicle.destination not in self._action_by_road_section): + raise ValueError(f"Incorrect origin or destination for {vehicle}") + + def check_list_of_od_demand_is_correct( + self, vehicles: Collection["OriginDestinationDemand"]): + """Assert that OD demands have valid origin and destination.""" + for vehicle in vehicles: + if (vehicle.origin not in self._action_by_road_section or + vehicle.destination not in self._action_by_road_section): + raise ValueError(f"Incorrect origin or destination for {vehicle}") + + def __str__(self) -> str: + return str(self._adjacency_list) + + def get_travel_time(self, road_section: str, volume: float) -> int: + """Returns travel time on the road section given the volume on it. + + Volume unit should be the same as the capacity unit. + Travel time unit is the free flow travel time unit. + Args: + road_section: the road section. + volume: the volume on the road section. + """ + return self._free_flow_travel_time[road_section] * ( + 1.0 + self._a[road_section] * + (volume / self._capacity[road_section])**self._b[road_section]) + + def assert_valid_action(self, action: int, road_section: str = None): + """Assert that an action as a int is valid. + + The action should be a int between 1 and num_actions. In case road_section + is not None then it is test if the action correspond to going on a road + section which is a successor of road_section. + + Args: + action: the action, + road_section: the road section. + """ + assert isinstance(action, int), f"{action} is not a int." + assert 1 <= action < self.num_actions(), str(action) + if road_section is not None: + new_road_section = self.get_road_section_from_action_id(action) + origin_new_section, end_new_section = _nodes_from_road_section( + new_road_section) + _, end_section_node = _nodes_from_road_section(road_section) + assert end_section_node == origin_new_section, ( + f"The action is not legal, trying to go to {new_road_section} " + f"from {road_section} without going through {end_section_node}" + ".") + successors = self.get_successors(origin_new_section) + assert end_new_section in successors, ( + f"Invalid action {new_road_section}. It is not a successors of" + f" {end_section_node}: {successors}.") + + def return_position_of_road_section(self, + road_section: str) -> tuple[float, float]: + """Returns position of the middle of theroad section as (x,y).""" + assert self._node_position is not None, ( + "The network should have node positions in order to be plot.") + o_link, d_link = _nodes_from_road_section(road_section) + o_x, o_y = self._node_position[o_link] + d_x, d_y = self._node_position[d_link] + return (o_x + d_x) / 2, (o_y + d_y) / 2 + + def return_list_for_matplotlib_quiver( + self) -> tuple[list[float], list[float], list[float], list[float]]: + """Returns 4 list of encoding the positions of the road sections. + + ```python3 + fig, ax = plt.subplots() + o_xs, o_ys, d_xs, d_ys = g.return_list_for_matplotlib_quiver() + ax.quiver(o_xs, o_ys, np.subtract(d_xs, o_xs), np.subtract(d_ys, o_ys), + color="b", angles='xy', scale_units='xy', scale=1) + ``` + will show the network. + Returns: + o_xs, o_ys, d_xs, d_ys: list of the start x and y positions and of the end + x and y postions of each road section. Each element of each list + corresponds to one road section. + """ + assert self._node_position is not None, ( + "The network should have node positions in order to be plot.") + o_xs = [] + o_ys = [] + d_xs = [] + d_ys = [] + for road_section in self._action_by_road_section: + o_link, d_link = _nodes_from_road_section(road_section) + o_x, o_y = self._node_position[o_link] + d_x, d_y = self._node_position[d_link] + o_xs.append(o_x) + o_ys.append(o_y) + d_xs.append(d_x) + d_ys.append(d_y) + return o_xs, o_ys, d_xs, d_ys + + +class Vehicle: + """A Vehicle is one origin and one destination. + + Both the origin and the destination of the vehicle are road section, therefore + they are string formatted as "{str}->{str}". + Attributes: + destination: destination of the vehicle. + origin: origin of the vehicle. + departure_time: departure time of the vehicle. + """ + _destination: str + _origin: str + _departure_time: float + + def __init__(self, + origin: str, + destination: str, + departure_time: float = 0.0): + assert all("->" in node for node in [origin, destination]) + self._origin = origin + self._destination = destination + self._departure_time = departure_time + + @property + def origin(self) -> str: + """Returns vehicle's origin.""" + return self._origin + + @property + def destination(self) -> str: + """Returns vehicle's destination.""" + return self._destination + + @property + def departure_time(self) -> float: + """Returns vehicle's departure time.""" + return self._departure_time + + def __str__(self): + return (f"Vehicle with origin {self.origin}, destination {self.destination}" + f" and departure time {self._departure_time}.") + + +class OriginDestinationDemand(Vehicle): + """Number of trips from origin to destination for a specific departure time. + + Both the origin and the destination of the vehicle are road section, therefore + they are string formatted as "{str}->{str}". + Attributes: + destination: destination of the vehicles. + origin: origin of the vehicles. + departure_time: departure time of the vehicles. + counts: the number of vehicles with the origin, destination and departure + time. + """ + _counts: float + + def __init__(self, origin: str, destination: str, departure_time: float, + counts: float): + super().__init__(origin, destination, departure_time) + self._counts = counts + + @property + def counts(self) -> float: + """Returns the number of vehicles in the instance.""" + return self._counts + + def __str__(self): + return (f"{self._counts} with origin {self.origin}, destination " + f"{self.destination} and departure time {self._departure_time}.") diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_utils_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_utils_test.py new file mode 100644 index 0000000..7e8c4e8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/dynamic_routing_utils_test.py @@ -0,0 +1,158 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tests for Python dynamic routing game utils.""" + +from absl.testing import absltest +from open_spiel.python.games import dynamic_routing_utils as utils + + +class NetworkTest(absltest.TestCase): + """Tests for Network class.""" + + def setUp(self): + """Create a network O->A->D for testing.""" + super().setUp() + self.network = utils.Network({"O": ["A"], "A": ["D"], "D": []}) + + def test_adjacency_list_init(self): + """Test class instanciation with adjacency list.""" + self.assertEqual(self.network.num_links(), 2) + self.assertEqual(self.network.get_successors("O"), ["A"]) + self.assertEqual(self.network.get_successors("A"), ["D"]) + self.assertEqual(self.network.get_successors("D"), []) + self.assertTrue(self.network.is_location_at_sink_node("A->D")) + self.assertFalse(self.network.is_location_at_sink_node("O->A")) + self.assertEqual(self.network.get_action_id_from_movement("A", "D"), 1) + self.assertEqual(self.network.get_action_id_from_movement("O", "A"), 2) + self.assertEqual(self.network.get_road_section_from_action_id(1), "A->D") + self.assertEqual(self.network.get_road_section_from_action_id(2), "O->A") + + def test_get_successors_with_wrong_node(self): + """Test get successors on non existing node.""" + with self.assertRaises(KeyError): + self.network.get_successors("Z") + + def test_get_action_id_without_connected_nodes(self): + """Test get actions id on non connected nodes.""" + with self.assertRaises(KeyError): + self.network.get_action_id_from_movement("O", "D") + + def test_get_action_id_with_wrong_nodes(self): + """Test get actions id on non existing node.""" + with self.assertRaises(KeyError): + self.network.get_action_id_from_movement("Z", "D") + + def test_is_location_at_sink_noded_with_wrong_road_section(self): + """Test is_location_at_sink_node on non existing second node.""" + with self.assertRaises(KeyError): + self.network.is_location_at_sink_node("A->Z") + + def test_is_location_at_sink_noded_with_wrong_road_section_2(self): + """Test is_location_at_sink_node on non existing first node.""" + with self.assertRaises(KeyError): + self.network.is_location_at_sink_node("Z->D") + + def test_is_location_at_sink_noded_with_wrong_arg(self): + """Test is_location_at_sink_node on wrong link str representation.""" + with self.assertRaises(ValueError): + self.network.is_location_at_sink_node("D") + + def test_get_road_section_with_action_id(self): + """Test get_road_section_from_action_id on non possible action.""" + with self.assertRaises(KeyError): + self.network.get_road_section_from_action_id(0) + + def test_num_links_method(self): + # Write. + pass + + def test_num_actions_method(self): + # Write. + pass + + def test_links(self): + # Write. + pass + + def test_check_list_of_vehicles_is_correct_method(self): + # Write. + pass + + def test_check_list_of_od_demand_is_correct_method(self): + # Write. + pass + + def test_str_method(self): + # Write. + pass + + def test_get_travel_time_methods(self): + # Write. + pass + + def test_assert_valid_action_methods(self): + # Write. + pass + + def test_default_travel_time_methods(self): + # Write. + pass + + def test_customable_travel_time_methods(self): + # Write. + pass + + +class VehicleTest(absltest.TestCase): + """Tests for Vehicle class.""" + + def test_vehicle_1(self): + """Test instanciation of Vehicle.""" + vehicle = utils.Vehicle("O->A", "B->D") + self.assertEqual(vehicle.destination, "B->D") + self.assertEqual(vehicle.origin, "O->A") + self.assertEqual(vehicle.departure_time, 0) + + def test_vehicle_2(self): + """Test instanciation of with departure time.""" + vehicle = utils.Vehicle("O->A", "B->D", 10.5) + self.assertEqual(vehicle.origin, "O->A") + self.assertEqual(vehicle.destination, "B->D") + self.assertEqual(vehicle.departure_time, 10.5) + + +class OriginDestinationDemandTest(absltest.TestCase): + """Tests for OriginDestinationDemand class.""" + + def test_od_demand_1(self): + """Test instanciation of OD demand.""" + od_demand = utils.OriginDestinationDemand("O->A", "B->D", 0, 30) + self.assertEqual(od_demand.destination, "B->D") + self.assertEqual(od_demand.origin, "O->A") + self.assertEqual(od_demand.departure_time, 0) + self.assertEqual(od_demand.counts, 30) + + def test_od_demand_2(self): + """Test instanciation of OD demand.""" + od_demand = utils.OriginDestinationDemand("O->A", "B->D", 10.5, 43.2) + self.assertEqual(od_demand.origin, "O->A") + self.assertEqual(od_demand.destination, "B->D") + self.assertEqual(od_demand.departure_time, 10.5) + self.assertEqual(od_demand.counts, 43.2) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/iterated_prisoners_dilemma.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/iterated_prisoners_dilemma.py new file mode 100644 index 0000000..f5c7a1e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/iterated_prisoners_dilemma.py @@ -0,0 +1,199 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python implementation of iterated prisoner's dilemma. + +This is primarily here to demonstrate simultaneous-move games in Python. +""" + +import enum + +import numpy as np + +import pyspiel + +_NUM_PLAYERS = 2 +_DEFAULT_PARAMS = {"termination_probability": 0.125, "max_game_length": 9999} +_PAYOFF = [[5, 0], [10, 1]] + +_GAME_TYPE = pyspiel.GameType( + short_name="python_iterated_prisoners_dilemma", + long_name="Python Iterated Prisoner's Dilemma", + dynamics=pyspiel.GameType.Dynamics.SIMULTANEOUS, + chance_mode=pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC, + information=pyspiel.GameType.Information.PERFECT_INFORMATION, + utility=pyspiel.GameType.Utility.GENERAL_SUM, + reward_model=pyspiel.GameType.RewardModel.REWARDS, + max_num_players=_NUM_PLAYERS, + min_num_players=_NUM_PLAYERS, + provides_information_state_string=False, + provides_information_state_tensor=False, + provides_observation_string=False, + provides_observation_tensor=False, + provides_factored_observation_string=False, + parameter_specification=_DEFAULT_PARAMS) + + +class Action(enum.IntEnum): + COOPERATE = 0 + DEFECT = 1 + + +class Chance(enum.IntEnum): + CONTINUE = 0 + STOP = 1 + + +class IteratedPrisonersDilemmaGame(pyspiel.Game): + """The game, from which states and observers can be made.""" + + # pylint:disable=dangerous-default-value + def __init__(self, params=_DEFAULT_PARAMS): + max_game_length = params["max_game_length"] + super().__init__( + _GAME_TYPE, + pyspiel.GameInfo( + num_distinct_actions=2, + max_chance_outcomes=2, + num_players=2, + min_utility=np.min(_PAYOFF) * max_game_length, + max_utility=np.max(_PAYOFF) * max_game_length, + utility_sum=None, + max_game_length=max_game_length, + ), + params, + ) + self._termination_probability = params["termination_probability"] + + def new_initial_state(self): + """Returns a state corresponding to the start of a game.""" + return IteratedPrisonersDilemmaState(self, self._termination_probability) + + def make_py_observer(self, iig_obs_type=None, params=None): + """Returns an object used for observing game state.""" + return IteratedPrisonersDilemmaObserver( + iig_obs_type or pyspiel.IIGObservationType(perfect_recall=False), + params) + + +class IteratedPrisonersDilemmaState(pyspiel.State): + """Current state of the game.""" + + def __init__(self, game, termination_probability): + """Constructor; should only be called by Game.new_initial_state.""" + super().__init__(game) + self._current_iteration = 1 + self._termination_probability = termination_probability + self._is_chance = False + self._game_over = False + self._rewards = np.zeros(_NUM_PLAYERS) + self._returns = np.zeros(_NUM_PLAYERS) + + # OpenSpiel (PySpiel) API functions are below. This is the standard set that + # should be implemented by every simultaneous-move game with chance. + + def current_player(self): + """Returns id of the next player to move, or TERMINAL if game is over.""" + if self._game_over: + return pyspiel.PlayerId.TERMINAL + elif self._is_chance: + return pyspiel.PlayerId.CHANCE + else: + return pyspiel.PlayerId.SIMULTANEOUS + + def _legal_actions(self, player): + """Returns a list of legal actions, sorted in ascending order.""" + assert player >= 0 + return [Action.COOPERATE, Action.DEFECT] + + def chance_outcomes(self): + """Returns the possible chance outcomes and their probabilities.""" + assert self._is_chance + return [(Chance.CONTINUE, 1 - self._termination_probability), + (Chance.STOP, self._termination_probability)] + + def _apply_action(self, action): + """Applies the specified action to the state.""" + # This is not called at simultaneous-move states. + assert self._is_chance and not self._game_over + self._current_iteration += 1 + self._is_chance = False + self._game_over = (action == Chance.STOP) + if self._current_iteration > self.get_game().max_game_length(): + self._game_over = True + + def _apply_actions(self, actions): + """Applies the specified actions (per player) to the state.""" + assert not self._is_chance and not self._game_over + self._is_chance = True + self._rewards[0] = _PAYOFF[actions[0]][actions[1]] + self._rewards[1] = _PAYOFF[actions[1]][actions[0]] + self._returns += self._rewards + + def _action_to_string(self, player, action): + """Action -> string.""" + if player == pyspiel.PlayerId.CHANCE: + return Chance(action).name + else: + return Action(action).name + + def is_terminal(self): + """Returns True if the game is over.""" + return self._game_over + + def rewards(self): + """Reward at the previous step.""" + return self._rewards + + def returns(self): + """Total reward for each player over the course of the game so far.""" + return self._returns + + def __str__(self): + """String for debug purposes. No particular semantics are required.""" + return (f"p0:{self.action_history_string(0)} " + f"p1:{self.action_history_string(1)}") + + def action_history_string(self, player): + return "".join( + self._action_to_string(pa.player, pa.action)[0] + for pa in self.full_history() + if pa.player == player) + + +class IteratedPrisonersDilemmaObserver: + """Observer, conforming to the PyObserver interface (see observation.py).""" + + def __init__(self, iig_obs_type, params): + """Initializes an empty observation tensor.""" + assert not bool(params) + self.iig_obs_type = iig_obs_type + self.tensor = None + self.dict = {} + + def set_from(self, state, player): + pass + + def string_from(self, state, player): + """Observation of `state` from the PoV of `player`, as a string.""" + if self.iig_obs_type.public_info: + return (f"us:{state.action_history_string(player)} " + f"op:{state.action_history_string(1 - player)}") + else: + return None + + +# Register the game with the OpenSpiel library + +pyspiel.register_game(_GAME_TYPE, IteratedPrisonersDilemmaGame) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/iterated_prisoners_dilemma_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/iterated_prisoners_dilemma_test.py new file mode 100644 index 0000000..ff3a6c3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/iterated_prisoners_dilemma_test.py @@ -0,0 +1,63 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for iterated_prisoners_dilemma.py.""" + +from absl.testing import absltest + +from open_spiel.python.games import iterated_prisoners_dilemma # pylint: disable=unused-import +import pyspiel + + +class IteratedPrisonersDilemmaTest(absltest.TestCase): + + def test_default_param(self): + """Check the game can be converted to a turn-based game.""" + game = pyspiel.load_game("python_iterated_prisoners_dilemma") + self.assertEqual(game._termination_probability, 0.125) + + def test_non_default_param_from_string(self): + """Check params can be given through the string loading.""" + game = pyspiel.load_game( + "python_iterated_prisoners_dilemma(termination_probability=0.5)") + self.assertEqual(game._termination_probability, 0.5) + + def test_non_default_param_from_dict(self): + """Check params can be given through a dictionary.""" + game = pyspiel.load_game("python_iterated_prisoners_dilemma", + {"termination_probability": 0.75}) + self.assertEqual(game._termination_probability, 0.75) + + def test_game_as_turn_based(self): + """Check the game can be converted to a turn-based game.""" + game = pyspiel.load_game("python_iterated_prisoners_dilemma") + turn_based = pyspiel.convert_to_turn_based(game) + pyspiel.random_sim_test( + turn_based, num_sims=10, serialize=False, verbose=True) + + def test_game_as_turn_based_via_string(self): + """Check the game can be created as a turn-based game from a string.""" + game = pyspiel.load_game( + "turn_based_simultaneous_game(game=python_iterated_prisoners_dilemma())" + ) + pyspiel.random_sim_test(game, num_sims=10, serialize=False, verbose=True) + + def test_game_from_cc(self): + """Runs our standard game tests, checking API consistency.""" + game = pyspiel.load_game("python_iterated_prisoners_dilemma") + pyspiel.random_sim_test(game, num_sims=10, serialize=False, verbose=True) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/kuhn_poker.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/kuhn_poker.py new file mode 100644 index 0000000..281e119 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/kuhn_poker.py @@ -0,0 +1,227 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Kuhn Poker implemented in Python. + +This is a simple demonstration of implementing a game in Python, featuring +chance and imperfect information. + +Python games are significantly slower than C++, but it may still be suitable +for prototyping or for small games. + +It is possible to run C++ algorithms on Python implemented games, This is likely +to have good performance if the algorithm simply extracts a game tree and then +works with that. It is likely to be poor if the algorithm relies on processing +and updating states as it goes, e.g. MCTS. +""" + +import enum + +import numpy as np + +import pyspiel + + +class Action(enum.IntEnum): + PASS = 0 + BET = 1 + + +_NUM_PLAYERS = 2 +_DECK = frozenset([0, 1, 2]) +_GAME_TYPE = pyspiel.GameType( + short_name="python_kuhn_poker", + long_name="Python Kuhn Poker", + dynamics=pyspiel.GameType.Dynamics.SEQUENTIAL, + chance_mode=pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC, + information=pyspiel.GameType.Information.IMPERFECT_INFORMATION, + utility=pyspiel.GameType.Utility.ZERO_SUM, + reward_model=pyspiel.GameType.RewardModel.TERMINAL, + max_num_players=_NUM_PLAYERS, + min_num_players=_NUM_PLAYERS, + provides_information_state_string=True, + provides_information_state_tensor=True, + provides_observation_string=True, + provides_observation_tensor=True, + provides_factored_observation_string=True) +_GAME_INFO = pyspiel.GameInfo( + num_distinct_actions=len(Action), + max_chance_outcomes=len(_DECK), + num_players=_NUM_PLAYERS, + min_utility=-2.0, + max_utility=2.0, + utility_sum=0.0, + max_game_length=3) # e.g. Pass, Bet, Bet + + +class KuhnPokerGame(pyspiel.Game): + """A Python version of Kuhn poker.""" + + def __init__(self, params=None): + super().__init__(_GAME_TYPE, _GAME_INFO, params or dict()) + + def new_initial_state(self): + """Returns a state corresponding to the start of a game.""" + return KuhnPokerState(self) + + def make_py_observer(self, iig_obs_type=None, params=None): + """Returns an object used for observing game state.""" + return KuhnPokerObserver( + iig_obs_type or pyspiel.IIGObservationType(perfect_recall=False), + params) + + +class KuhnPokerState(pyspiel.State): + """A python version of the Kuhn poker state.""" + + def __init__(self, game): + """Constructor; should only be called by Game.new_initial_state.""" + super().__init__(game) + self.cards = [] + self.bets = [] + self.pot = [1.0, 1.0] + self._game_over = False + self._next_player = 0 + + # OpenSpiel (PySpiel) API functions are below. This is the standard set that + # should be implemented by every sequential-move game with chance. + + def current_player(self): + """Returns id of the next player to move, or TERMINAL if game is over.""" + if self._game_over: + return pyspiel.PlayerId.TERMINAL + elif len(self.cards) < _NUM_PLAYERS: + return pyspiel.PlayerId.CHANCE + else: + return self._next_player + + def _legal_actions(self, player): + """Returns a list of legal actions, sorted in ascending order.""" + assert player >= 0 + return [Action.PASS, Action.BET] + + def chance_outcomes(self): + """Returns the possible chance outcomes and their probabilities.""" + assert self.is_chance_node() + outcomes = sorted(_DECK - set(self.cards)) + p = 1.0 / len(outcomes) + return [(o, p) for o in outcomes] + + def _apply_action(self, action): + """Applies the specified action to the state.""" + if self.is_chance_node(): + self.cards.append(action) + else: + self.bets.append(action) + if action == Action.BET: + self.pot[self._next_player] += 1 + self._next_player = 1 - self._next_player + if ((min(self.pot) == 2) or + (len(self.bets) == 2 and action == Action.PASS) or + (len(self.bets) == 3)): + self._game_over = True + + def _action_to_string(self, player, action): + """Action -> string.""" + if player == pyspiel.PlayerId.CHANCE: + return f"Deal:{action}" + elif action == Action.PASS: + return "Pass" + else: + return "Bet" + + def is_terminal(self): + """Returns True if the game is over.""" + return self._game_over + + def returns(self): + """Total reward for each player over the course of the game so far.""" + pot = self.pot + winnings = float(min(pot)) + if not self._game_over: + return [0., 0.] + elif pot[0] > pot[1]: + return [winnings, -winnings] + elif pot[0] < pot[1]: + return [-winnings, winnings] + elif self.cards[0] > self.cards[1]: + return [winnings, -winnings] + else: + return [-winnings, winnings] + + def __str__(self): + """String for debug purposes. No particular semantics are required.""" + return "".join([str(c) for c in self.cards] + ["pb"[b] for b in self.bets]) + + +class KuhnPokerObserver: + """Observer, conforming to the PyObserver interface (see observation.py).""" + + def __init__(self, iig_obs_type, params): + """Initializes an empty observation tensor.""" + if params: + raise ValueError(f"Observation parameters not supported; passed {params}") + + # Determine which observation pieces we want to include. + pieces = [("player", 2, (2,))] + if iig_obs_type.private_info == pyspiel.PrivateInfoType.SINGLE_PLAYER: + pieces.append(("private_card", 3, (3,))) + if iig_obs_type.public_info: + if iig_obs_type.perfect_recall: + pieces.append(("betting", 6, (3, 2))) + else: + pieces.append(("pot_contribution", 2, (2,))) + + # Build the single flat tensor. + total_size = sum(size for name, size, shape in pieces) + self.tensor = np.zeros(total_size, np.float32) + + # Build the named & reshaped views of the bits of the flat tensor. + self.dict = {} + index = 0 + for name, size, shape in pieces: + self.dict[name] = self.tensor[index:index + size].reshape(shape) + index += size + + def set_from(self, state, player): + """Updates `tensor` and `dict` to reflect `state` from PoV of `player`.""" + self.tensor.fill(0) + if "player" in self.dict: + self.dict["player"][player] = 1 + if "private_card" in self.dict and len(state.cards) > player: + self.dict["private_card"][state.cards[player]] = 1 + if "pot_contribution" in self.dict: + self.dict["pot_contribution"][:] = state.pot + if "betting" in self.dict: + for turn, action in enumerate(state.bets): + self.dict["betting"][turn, action] = 1 + + def string_from(self, state, player): + """Observation of `state` from the PoV of `player`, as a string.""" + pieces = [] + if "player" in self.dict: + pieces.append(f"p{player}") + if "private_card" in self.dict and len(state.cards) > player: + pieces.append(f"card:{state.cards[player]}") + if "pot_contribution" in self.dict: + pieces.append(f"pot[{int(state.pot[0])} {int(state.pot[1])}]") + if "betting" in self.dict and state.bets: + pieces.append("".join("pb"[b] for b in state.bets)) + return " ".join(str(p) for p in pieces) + + +# Register the game with the OpenSpiel library + +pyspiel.register_game(_GAME_TYPE, KuhnPokerGame) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/kuhn_poker_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/kuhn_poker_test.py new file mode 100644 index 0000000..268ac22 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/kuhn_poker_test.py @@ -0,0 +1,91 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tests for Python Kuhn Poker.""" + +from absl.testing import absltest +import numpy as np + +from open_spiel.python import policy +from open_spiel.python.algorithms import exploitability +from open_spiel.python.algorithms import sequence_form_lp +from open_spiel.python.algorithms.get_all_states import get_all_states +from open_spiel.python.games import kuhn_poker # pylint: disable=unused-import +from open_spiel.python.observation import make_observation +import pyspiel + + +class KuhnPokerTest(absltest.TestCase): + + def test_game_from_cc(self): + """Runs our standard game tests, checking API consistency.""" + game = pyspiel.load_game("python_kuhn_poker") + pyspiel.random_sim_test(game, num_sims=10, serialize=False, verbose=True) + + def test_consistent(self): + """Checks the Python and C++ game implementations are the same.""" + py_game = pyspiel.load_game("python_kuhn_poker") + cc_game = pyspiel.load_game("kuhn_poker") + obs_types = [None, pyspiel.IIGObservationType(perfect_recall=True)] + py_observations = [make_observation(py_game, o) for o in obs_types] + cc_observations = [make_observation(cc_game, o) for o in obs_types] + py_states = get_all_states(py_game) + cc_states = get_all_states(cc_game) + self.assertCountEqual(list(cc_states), list(py_states)) + for key, cc_state in cc_states.items(): + py_state = py_states[key] + np.testing.assert_array_equal(py_state.history(), cc_state.history()) + np.testing.assert_array_equal(py_state.returns(), cc_state.returns()) + for py_obs, cc_obs in zip(py_observations, cc_observations): + for player in (0, 1): + py_obs.set_from(py_state, player) + cc_obs.set_from(cc_state, player) + np.testing.assert_array_equal(py_obs.tensor, cc_obs.tensor) + + def test_nash_value_sequence_form_lp(self): + """Checks Nash value using a Python sequence form LP solver.""" + game = pyspiel.load_game("python_kuhn_poker") + val1, val2, _, _ = sequence_form_lp.solve_zero_sum_game(game) + # value from Kuhn 1950 or https://en.wikipedia.org/wiki/Kuhn_poker + self.assertAlmostEqual(val1, -1 / 18) + self.assertAlmostEqual(val2, +1 / 18) + + def test_exploitability_uniform_random_py(self): + """Checks the exploitability of the uniform random policy using Python.""" + # NashConv of uniform random test_policy from (found on Google books): + # https://link.springer.com/chapter/10.1007/978-3-319-75931-9_5 + game = pyspiel.load_game("python_kuhn_poker") + test_policy = policy.UniformRandomPolicy(game) + expected_nash_conv = 11 / 12 + self.assertAlmostEqual( + exploitability.exploitability(game, test_policy), + expected_nash_conv / 2) + + def test_exploitability_uniform_random_cc(self): + """Checks the exploitability of the uniform random policy using C++.""" + game = pyspiel.load_game("python_kuhn_poker") + test_policy = pyspiel.UniformRandomPolicy(game) + expected_nash_conv = 11 / 12 + self.assertAlmostEqual( + pyspiel.exploitability(game, test_policy), expected_nash_conv / 2) + + def test_cfr_cc(self): + """Runs a C++ CFR algorithm on the game.""" + game = pyspiel.load_game("python_kuhn_poker") + unused_results = pyspiel.CFRSolver(game) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/liars_poker.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/liars_poker.py new file mode 100644 index 0000000..b3d2e22 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/liars_poker.py @@ -0,0 +1,457 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Liar's Poker implemented in Python.""" + +import numpy as np + +import pyspiel + + +CHALLENGE_ACTION = 0 +BID_ACTION_OFFSET = 1 + +_MAX_NUM_PLAYERS = 10 +_MIN_NUM_PLAYERS = 2 +_HAND_LENGTH = 10 +_NUM_DIGITS = 10 # Number of digits to include from the range 1, 2, ..., 9, 0 +_FULL_DECK = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] + +_GAME_TYPE = pyspiel.GameType( + short_name="python_liars_poker", + long_name="Python Liars Poker", + dynamics=pyspiel.GameType.Dynamics.SEQUENTIAL, + chance_mode=pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC, + information=pyspiel.GameType.Information.IMPERFECT_INFORMATION, + utility=pyspiel.GameType.Utility.ZERO_SUM, + reward_model=pyspiel.GameType.RewardModel.TERMINAL, + max_num_players=_MAX_NUM_PLAYERS, + min_num_players=_MIN_NUM_PLAYERS, + provides_information_state_string=True, + provides_information_state_tensor=True, + provides_observation_string=False, + provides_observation_tensor=True, + parameter_specification={ + "players": _MIN_NUM_PLAYERS, + "hand_length": _HAND_LENGTH, + "num_digits": _NUM_DIGITS, + }, +) +_GAME_INFO = pyspiel.GameInfo( + # Num actions = total number of cards * number of digits + action enum + num_distinct_actions=_HAND_LENGTH * _NUM_DIGITS * _MIN_NUM_PLAYERS + + BID_ACTION_OFFSET, + max_chance_outcomes=_HAND_LENGTH * _NUM_DIGITS, + num_players=_MIN_NUM_PLAYERS, + min_utility=-( + _MIN_NUM_PLAYERS - 1 + ), # Reward from being challenged and losing. + max_utility=_MIN_NUM_PLAYERS + - 1, # Reward for being challenged and winning. + utility_sum=0.0, + # Number of possible rounds: hand_length * num_digits * num_players + # Total moves per round: num_players for non-rebid, num_players-1 for rebid + # Max game length: number of possible rounds * total moves per round + max_game_length=_HAND_LENGTH * _NUM_DIGITS * _MIN_NUM_PLAYERS**2, +) + + +class LiarsPoker(pyspiel.Game): + """A Python version of Liar's poker.""" + + def __init__(self, params=None): + super().__init__(_GAME_TYPE, _GAME_INFO, params or dict()) + game_parameters = self.get_parameters() + self.hand_length = game_parameters.get("hand_length", _HAND_LENGTH) + self.num_digits = game_parameters.get("num_digits", _NUM_DIGITS) + self.deck = _FULL_DECK[: self.num_digits] + + def new_initial_state(self): + """Returns a state corresponding to the start of a game.""" + return LiarsPokerState(self) + + def make_py_observer(self, iig_obs_type=None, params=None): + """Returns an object used for observing game state.""" + return LiarsPokerObserver( + iig_obs_type or pyspiel.IIGObservationType(perfect_recall=False), + self.num_players(), + self.hand_length, + self.num_digits, + params, + ) + + +class LiarsPokerState(pyspiel.State): + """A python version of the Liars Poker state.""" + + def __init__(self, game): + """Constructor; should only be called by Game.new_initial_state.""" + super().__init__(game) + # Game attributes + self._num_players = game.num_players() + self._hand_length = game.hand_length + self._num_digits = game.num_digits + self._deck = game.deck + self.hands = [[] for _ in range(self._num_players)] + + # Action dynamics + self.total_possible_bids = ( + game.hand_length * game.num_digits * self._num_players + ) + self.bid_history = np.zeros((self.total_possible_bids, self._num_players)) + self.challenge_history = np.zeros( + (self.total_possible_bids, self._num_players) + ) + # self._current_player is only the valid current_player when cards have + # been dealt. Otherwise it's chance. + self._current_player = 0 + self._max_bid = self._hand_length * self._num_digits * self._num_players + self._bid_originator = -1 + self._current_action = -1 + self._num_challenges = 0 + self.is_rebid = False + + # Game over dynamics + self._winner = -1 + self._loser = -1 + + def current_player(self): + """Returns id of the current player to act. + + The id is: + - TERMINAL if game is over. + - CHANCE if a player is drawing a number to fill out their hand. + - a number otherwise. + """ + if self.is_terminal(): + return pyspiel.PlayerId.TERMINAL + elif len(self.hands[self._num_players - 1]) < self._hand_length: + return pyspiel.PlayerId.CHANCE + else: + return self._current_player + + def winner(self): + """Returns the id of the winner if the bid originator has won. + + -1 otherwise. + """ + return self._winner + + def loser(self): + """Returns the id of the loser if the bid originator has lost. + + -1 otherwise. + """ + return self._loser + + def _is_challenge_possible(self): + """A challenge is possible once the first bid is made.""" + return self._current_action != -1 + + def _is_rebid_possible(self): + """A rebid is only possible when all players have challenged the original bid.""" + return not self.is_rebid and self._num_challenges == self._num_players - 1 + + def _legal_actions(self, player): + """Returns a list of legal actions, sorted in ascending order.""" + assert player >= 0 + actions = [] + + if self._is_challenge_possible(): + actions.append(CHALLENGE_ACTION) + + if player != self._bid_originator or self._is_rebid_possible(): + # Any move higher than the current bid is allowed. + # Bids start at BID_ACTION_OFFSET (1) as 0 represents the challenge + # action. + for bid in range( + max(BID_ACTION_OFFSET, self._current_action + 1), self._max_bid + 1 + ): + actions.append(bid) + + return actions + + def chance_outcomes(self): + """Returns the possible chance outcomes and their probabilities.""" + assert self.is_chance_node() + probability = 1.0 / self._num_digits + return [(digit, probability) for digit in self._deck] + + def _decode_bid(self, bid): + """Turns a bid ID to a (count, number) tuple. + + For example, take 2 players each with 2 numbers from the deck of 1, 2, and + 3. + - A bid of two 1's would correspond to a bid id 1. + - Explanation: 1 is the lowest number, and the only lower bid would be + zero 1's. + - A bid of three 3's would correspond to a bid id 10. + - Explanation: 1-4 1's take bid ids 0-3. 1-4 2's take bid ids 4-7. 1 and + 2 3's take bid ids 8 and 9. + + Args: + bid: Bid ID in the range 0 to self._max_bid (non-inclusive). + + Returns: + A tuple of (count, number). For example, (1, 2) represents one 2's. + """ + number = bid % self._num_digits + 1 + count = bid // self._num_digits + 1 + return (count, number) + + def encode_bid(self, count, number): + """Turns a count and number into a bid ID. + + Bid ID is in the range 0 to self._max_bid (non-inclusive). + + For example, take 2 players each with 2 numbers from the deck of 1, 2, and + 3. + - A count of 2 and number of 1 would be a bid of two one's and a bid id 1. + - Explanation: 1 is the lowest number, and the only lower bid would be + zero 1's + corresponding to bid id 0. + + Args: + count: The count of the bid. + number: The number of the bid. + + Returns: + A single bid ID. + """ + return (count - 1) * self._num_digits + number - 1 + + def _counts(self): + """Determines if the bid originator wins or loses.""" + bid_count, bid_number = self._decode_bid( + self._current_action - BID_ACTION_OFFSET + ) + + # Count the number of bid_numbers from all players. + matches = 0 + for player_id in range(self._num_players): + for digit in self.hands[player_id]: + if digit == bid_number: + matches += 1 + + # If the number of matches are at least the bid_count bid, then the bidder + # wins. Otherwise everyone else wins. + if matches >= bid_count: + self._winner = self._bid_originator + else: + self._loser = self._bid_originator + + def _update_bid_history(self, bid, player): + """Writes a player's bid into memory.""" + self.bid_history[bid][player] = 1 + + def _update_challenge_history(self, bid, player): + """Write a player's challenge for a bid into memory.""" + self.challenge_history[bid][player] = 1 + + def _apply_action(self, action): + """Applies an action and updates the state.""" + if self.is_chance_node(): + # If we are still populating hands, draw a number for the current player. + self.hands[self._current_player].append(action) + elif action == CHALLENGE_ACTION: + assert self._is_challenge_possible() + self._update_challenge_history( + self._current_action - BID_ACTION_OFFSET, self._current_player + ) + self._num_challenges += 1 + # If there is no ongoing rebid, check if all players challenge before + # counting. If there is an ongoing rebid, count once all the players + # except the bidder challenges. + if (not self.is_rebid and self._num_challenges == self._num_players) or ( + self.is_rebid and self._num_challenges == self._num_players - 1 + ): + self._counts() + else: + # Set the current bid to the action. + self._current_action = action + if self._current_player == self._bid_originator: + # If the bid originator is bidding again, we have a rebid. + self.is_rebid = True + else: + # Otherwise, we have a regular bid. + self.is_rebid = False + # Set the bid originator to the current player. + self._bid_originator = self._current_player + self._update_bid_history( + self._current_action - BID_ACTION_OFFSET, self._current_player + ) + self._num_challenges = 0 + self._current_player = (self._current_player + 1) % self._num_players + + def _action_to_string(self, player, action): + """Action -> string.""" + if player == pyspiel.PlayerId.CHANCE: + return f"Deal: {action}" + elif action == CHALLENGE_ACTION: + return "Challenge" + else: + count, number = self._decode_bid(action - BID_ACTION_OFFSET) + return f"Bid: {count} of {number}" + + def is_terminal(self): + """Returns True if the game is over.""" + return self._winner >= 0 or self._loser >= 0 + + def returns(self): + """Total reward for each player over the course of the game so far.""" + if self._winner != -1: + bidder_reward = self._num_players - 1 + others_reward = -1.0 + elif self._loser != -1: + bidder_reward = -1 * (self._num_players - 1) + others_reward = 1.0 + else: + # Game is not over. + bidder_reward = 0.0 + others_reward = 0.0 + return [ + others_reward if player_id != self._bid_originator else bidder_reward + for player_id in range(self._num_players) + ] + + def __str__(self): + """String for debug purposes. No particular semantics are required.""" + if self._current_action != -1: + count, number = self._decode_bid(self._current_action - BID_ACTION_OFFSET) + else: + count, number = "None", "None" + return ( + "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {} of {}," + " Rebid: {}".format( + self.hands, + self._bid_originator, + self.current_player(), + count, + number, + self.is_rebid, + ) + ) + + +class LiarsPokerObserver: + """Observer, conforming to the PyObserver interface (see observation.py). + + An observation will consist of the following: + - One hot encoding of the current player number: [0 0 0 1 0 0 0] + - A vector of length hand_length containing the digits in a player's hand. + - Two matrices each of size (hand_length * num_digits * num_players, + num_players) + will store bids and challenges respectively. Each row in the matrix + corresponds + to a particular bid (e.g. one 1, two 5s, or eight 3s). 0 will represent no + action. 1 will represent a player's bid or a player's challenge. + - One bit for whether we are rebidding: [1] rebid occuring, [0] otherwise + - One bit for whether we are counting: [1] COUNTS called, [0] otherwise + """ + + def __init__( + self, iig_obs_type, num_players, hand_length, num_digits, params=None + ): + """Initiliazes an empty observation tensor.""" + del params + self.num_players = num_players + self.hand_length = hand_length + + # Determine which observation pieces we want to include. + # Pieces is a list of tuples containing observation pieces. + # Pieces are described by their (name, number of elements, and shape). + pieces = [( + "player", + num_players, + (num_players,), + )] # One-hot encoding for the player id. + if iig_obs_type.private_info == pyspiel.PrivateInfoType.SINGLE_PLAYER: + # Vector containing the digits in a player's hand + pieces.append(("private_hand", hand_length, (hand_length,))) + if iig_obs_type.public_info: + pieces.append(("rebid_state", 1, (1,))) + pieces.append(("counts_state", 1, (1,))) + if iig_obs_type.perfect_recall: + # One-hot encodings for players' moves at every round. + total_possible_rounds = hand_length * num_digits * num_players + pieces.append(( + "bid_history", + total_possible_rounds * num_players, + (total_possible_rounds, num_players), + )) + pieces.append(( + "challenge_history", + total_possible_rounds * num_players, + (total_possible_rounds, num_players), + )) + + # Build the single flat tensor. + total_size = sum(size for name, size, shape in pieces) + self.tensor = np.zeros(total_size, np.float32) + + # Build the named & reshaped views of the bits of the flat tensor. + self.dict = {} + index = 0 + for name, size, shape in pieces: + self.dict[name] = self.tensor[index : index + size].reshape(shape) + index += size + + def set_from(self, state, player): + """Updates `tensor` and `dict` to reflect `state` from PoV of `player`.""" + self.tensor.fill(0) + if "player" in self.dict: + self.dict["player"][player] = 1 + if ( + "private_hand" in self.dict + and len(state.hands[player]) == self.hand_length + ): + self.dict["private_hand"] = np.asarray(state.hands[player]) + if "rebid_state" in self.dict: + self.dict["rebid_state"][0] = int(state.is_rebid) + if "counts_state" in self.dict: + self.dict["counts_state"][0] = int(state.is_terminal()) + if "bid_history" in self.dict: + self.dict["bid_history"] = state.bid_history + if "challenge_history" in self.dict: + self.dict["challenge_history"] = state.challenge_history + + def string_from(self, state, player): + """Observation of `state` from the PoV of `player`, as a string.""" + pieces = [] + if "player" in self.dict: + pieces.append(f"p{player}") + if ( + "private_hand" in self.dict + and len(state.hands[player]) == self.hand_length + ): + pieces.append(f"hand:{state.hands[player]}") + if "rebid_state" in self.dict: + pieces.append(f"rebid:{[int(state.is_rebid)]}") + if "counts_state" in self.dict: + pieces.append(f"counts:{[int(state.is_terminal())]}") + if "bid_history" in self.dict: + for bid in range(len(state.bid_history)): + if np.any(state.bid_history[bid] == 1): + pieces.append("b:{}.".format(bid)) + if "challenge_history" in self.dict: + for bid in range(len(state.challenge_history)): + if np.any(state.challenge_history[bid] == 1): + pieces.append("c:{}.".format(bid)) + return " ".join(str(p) for p in pieces) + + +# Register the game with the OpenSpiel library + +pyspiel.register_game(_GAME_TYPE, LiarsPoker) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/liars_poker_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/liars_poker_test.py new file mode 100644 index 0000000..c1c6c99 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/liars_poker_test.py @@ -0,0 +1,287 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tests for Python Liar's Poker.""" + +import pickle + +from absl.testing import absltest +import numpy as np + +from open_spiel.python.games import liars_poker +import pyspiel + + +class LiarsPokerTest(absltest.TestCase): + + def test_can_create_game_and_state(self): + """Checks we can create the game and a state.""" + game = liars_poker.LiarsPoker({"hand_length": 3, "num_digits": 3}) + state = game.new_initial_state() + # Ensure no moves have been made. + expected_hands = [[] for _ in range(game.num_players())] + expected_bidder = -1 + expected_current_player = pyspiel.PlayerId.CHANCE + expected_current_count = "None" + expected_current_number = "None" + expected_rebid = False + expected = ( + "Hands: {}, Bidder: {}, Current Player: {}, Current Bid: {} of {}," + " Rebid: {}".format( + expected_hands, + expected_bidder, + expected_current_player, + expected_current_count, + expected_current_number, + expected_rebid, + ) + ) + self.assertEqual(str(state), expected) + + def test_draw_hands(self): + """Tests hand drawing functions.""" + game = liars_poker.LiarsPoker({"hand_length": 3, "num_digits": 3}) + state = game.new_initial_state() + expected_hands = [[] for _ in range(game.num_players())] + for i in range(game.num_players() * game.hand_length): + # Verify we have chance nodes until all player hands are filled. + self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE) + + # Draw a digit. + outcomes_with_probs = state.chance_outcomes() + action_list, prob_list = zip(*outcomes_with_probs) + action = np.random.choice(action_list, p=prob_list) + + # Verify players' hands are filled correctly. + cur_player = i % game.num_players() + expected_hands[cur_player].append(action) + state.apply_action(action) + self.assertEqual(state.hands, expected_hands) + # Assert after all hands are filled, we have non-chance nodes. + cur_player = state.current_player() + self.assertNotEqual(cur_player, pyspiel.PlayerId.CHANCE) + self.assertEqual(cur_player, 0) + + def _populate_game_hands(self, game, state): + """Populates players hands for testing.""" + for _ in range(game.num_players() * game.hand_length): + outcomes_with_probs = state.chance_outcomes() + action_list, prob_list = zip(*outcomes_with_probs) + action = np.random.choice(action_list, p=prob_list) + state.apply_action(action) + + def test_basic_bid(self): + """Tests a single bid.""" + game = liars_poker.LiarsPoker({"hand_length": 3, "num_digits": 3}) + state = game.new_initial_state() + expected_bid_history = np.zeros( + (state.total_possible_bids, state.num_players()) + ) + + # Fill players hands. + self._populate_game_hands(game, state) + # After all hands are filled, have player 0 bid. + cur_player = state.current_player() + action = 2 + state.apply_action(action) + + # Verify bid history is updated correctly. + bid_offset = liars_poker.BID_ACTION_OFFSET + expected_bid_history[action - bid_offset][cur_player] = 1 + self.assertTrue((state.bid_history == expected_bid_history).all()) + + # Verify next set of legal bids is greater than the current bid. + for next_action in state.legal_actions(): + if next_action == liars_poker.CHALLENGE_ACTION: + continue + self.assertGreater(next_action, action) + + def _verify_returns(self, game, state): + self.assertTrue(state.winner() != -1 or state.loser() != -1) + actual_returns = state.returns() + if state.winner() != -1: + expected_returns = [-1.0 for _ in range(game.num_players())] + expected_returns[state.winner()] = game.num_players() - 1 + else: + expected_returns = [1.0 for _ in range(game.num_players())] + expected_returns[state.loser()] = -1.0 * (game.num_players() - 1) + self.assertEqual(actual_returns, expected_returns) + + def test_single_random_round(self): + """Runs a single round of bidding followed by a challenge.""" + game = liars_poker.LiarsPoker({"hand_length": 3, "num_digits": 3}) + state = game.new_initial_state() + expected_challenge_history = np.zeros( + (state.total_possible_bids, state.num_players()) + ) + + # Fill players hands. + self._populate_game_hands(game, state) + # Have player 0 bid. + action = 2 + state.apply_action(action) + # Verify challenge action is available to the next player. + challenge = liars_poker.CHALLENGE_ACTION + self.assertIn(challenge, state.legal_actions()) + # Player 1 challenges. + cur_player = state.current_player() + state.apply_action(challenge) + bid_offset = liars_poker.BID_ACTION_OFFSET + expected_challenge_history[action - bid_offset][cur_player] = 1 + # Verify challenge history is updated correctly. + self.assertTrue( + (state.challenge_history == expected_challenge_history).all() + ) + # Original bidder challenges, thus agreeing to a count. + cur_player = state.current_player() + state.apply_action(challenge) + expected_challenge_history[action - bid_offset][cur_player] = 1 + # Verify challenge history is updated correctly. + self.assertTrue( + (state.challenge_history == expected_challenge_history).all() + ) + + # Verify game is over. + self.assertTrue(state.is_terminal()) + # Verify returns. + self._verify_returns(game, state) + + def test_single_deterministic_round(self): + """Runs a single round where cards are dealt deterministically.""" + game = liars_poker.LiarsPoker({"hand_length": 3, "num_digits": 3}) + state = game.new_initial_state() + + # Deal player 0 all "1" cards and player 1 all "2" cards. + for i in range(game.num_players() * game.hand_length): + if i % 2 == 0: + # Deal card to player 0 + state.apply_action(1) + else: + # Deal card to player 1 + state._apply_action(2) + + # Have player 0 bid that there are four 1's. + state.apply_action(state.encode_bid(4, 1) + liars_poker.BID_ACTION_OFFSET) + # Player 1 challenges. + state.apply_action(liars_poker.CHALLENGE_ACTION) + # Player 0 accepts the challenge. + state.apply_action(liars_poker.CHALLENGE_ACTION) + # Verify game ends with player 0 losing. + self.assertTrue(state.is_terminal()) + self.assertEqual(state.loser(), 0) + expected_returns = [1.0 for _ in range(game.num_players())] + expected_returns[state.loser()] = -1.0 * (game.num_players() - 1) + self.assertEqual(state.returns(), expected_returns) + + def test_single_rebid(self): + """Runs a 2 player game where a rebid is enacted.""" + game = liars_poker.LiarsPoker({"hand_length": 3, "num_digits": 3}) + state = game.new_initial_state() + + # Fill players hands. + self._populate_game_hands(game, state) + # Have player 0 bid. + state.apply_action(2) + # Player 1 challenges. + state.apply_action(liars_poker.CHALLENGE_ACTION) + # Original bidder rebids. + state.apply_action(3) + # Verify game is not over. + self.assertFalse(state.is_terminal()) + self.assertEqual(state.returns(), [0.0 for _ in range(game.num_players())]) + # Player 1 challenges again. + state.apply_action(liars_poker.CHALLENGE_ACTION) + + # Verify game is now over. + self.assertTrue(state.is_terminal()) + self._verify_returns(game, state) + + def test_rebid_then_new_bid(self): + """Runs a 2 player game where a rebid is enacted.""" + game = liars_poker.LiarsPoker({"hand_length": 3, "num_digits": 3}) + state = game.new_initial_state() + + # Fill players hands. + self._populate_game_hands(game, state) + # Have player 0 bid. + state.apply_action(2) + # Player 1 challenges. + state.apply_action(liars_poker.CHALLENGE_ACTION) + # Original bidder rebids. + state.apply_action(3) + # Verify game is not over. + self.assertFalse(state.is_terminal()) + self.assertEqual(state.returns(), [0.0 for _ in range(game.num_players())]) + # Player 1 bids. + state.apply_action(4) + # Verify game is not over. + self.assertFalse(state.is_terminal()) + # Player 0 challenges. + state.apply_action(liars_poker.CHALLENGE_ACTION) + # Verify we're not rebidding and counts is only called once both players + # challenge. + self.assertFalse(state.is_terminal()) + # Player 1 challenges and ends the game with a counts. + state.apply_action(liars_poker.CHALLENGE_ACTION) + + # Verify game is now over. + self.assertTrue(state.is_terminal()) + self._verify_returns(game, state) + + def test_game_from_cc(self): + """Runs the standard game tests, checking API consistency.""" + game = pyspiel.load_game("python_liars_poker", {"players": 2}) + pyspiel.random_sim_test(game, num_sims=10, serialize=False, verbose=True) + + def test_pickle(self): + """Checks pickling and unpickling of game and state.""" + game = pyspiel.load_game("python_liars_poker") + pickled_game = pickle.dumps(game) + unpickled_game = pickle.loads(pickled_game) + self.assertEqual(str(game), str(unpickled_game)) + state = game.new_initial_state() + for a in [2, 3, 4, 5]: + state.apply_action(a) + ser_str = pyspiel.serialize_game_and_state(game, state) + new_game, new_state = pyspiel.deserialize_game_and_state(ser_str) + self.assertEqual(str(game), str(new_game)) + self.assertEqual(str(state), str(new_state)) + pickled_state = pickle.dumps(state) + unpickled_state = pickle.loads(pickled_state) + self.assertEqual(str(state), str(unpickled_state)) + + def test_cloned_state_matches_original_state(self): + """Check we can clone states successfully.""" + game = liars_poker.LiarsPoker({"hand_length": 3, "num_digits": 3}) + state = game.new_initial_state() + state.apply_action(1) + state.apply_action(2) + clone = state.clone() + + self.assertEqual(state.history(), clone.history()) + self.assertEqual(state.num_players(), clone.num_players()) + self.assertEqual(state.move_number(), clone.move_number()) + self.assertEqual(state.num_distinct_actions(), clone.num_distinct_actions()) + + self.assertEqual(state._current_player, clone._current_player) + self.assertEqual(state._current_action, clone._current_action) + np.testing.assert_array_equal(state.bid_history, clone.bid_history) + np.testing.assert_array_equal( + state.challenge_history, clone.challenge_history + ) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/team_dominoes.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/team_dominoes.py new file mode 100644 index 0000000..12badc8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/team_dominoes.py @@ -0,0 +1,415 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Dominoes (4 players) implemented in Python. + +https://en.wikipedia.org/wiki/Dominoes#Latin_American_Version +""" + +import collections +import copy +import itertools + +import numpy as np + +import pyspiel + +_NUM_PLAYERS = 4 +_PIPS = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0] +_DECK = list(itertools.combinations_with_replacement(_PIPS, 2)) +_EDGES = [None, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + + +class Action: + """Represent player possible action.""" + + def __init__(self, player, tile, edge): + self.player = player + self.tile = tile + self.edge = edge + + def __str__(self): + return f"p{self.player} tile:{self.tile} pip:{self.edge}" + + def __repr__(self): + return self.__str__() + + +def create_possible_actions(): + actions = [] + for player in range(_NUM_PLAYERS): + for tile in _DECK: + for edge in _EDGES: + if edge in tile or edge is None: + actions.append(Action(player, tile, edge)) + return actions + + +_ACTIONS = create_possible_actions() +_ACTIONS_STR = [str(action) for action in _ACTIONS] + +_HAND_SIZE = 7 + +_MAX_GAME_LENGTH = 28 + +_GAME_TYPE = pyspiel.GameType( + short_name="python_team_dominoes", + long_name="Python Team Dominoes (4 players)", + dynamics=pyspiel.GameType.Dynamics.SEQUENTIAL, + chance_mode=pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC, + information=pyspiel.GameType.Information.IMPERFECT_INFORMATION, + utility=pyspiel.GameType.Utility.ZERO_SUM, + reward_model=pyspiel.GameType.RewardModel.TERMINAL, + max_num_players=_NUM_PLAYERS, + min_num_players=_NUM_PLAYERS, + provides_information_state_string=True, + provides_information_state_tensor=True, + provides_observation_string=True, + provides_observation_tensor=True, + provides_factored_observation_string=True, +) +_GAME_INFO = pyspiel.GameInfo( + num_distinct_actions=len(_ACTIONS), + max_chance_outcomes=len(_DECK), + min_utility=-100, + max_utility=100, + num_players=_NUM_PLAYERS, + # deal: 28 chance nodes + play: 28 player nodes + max_game_length=_MAX_GAME_LENGTH, + utility_sum=0.0, +) + + +class DominoesGame(pyspiel.Game): + """A Python version of Block Dominoes.""" + + def __init__(self, params=None): + super().__init__(_GAME_TYPE, _GAME_INFO, params or dict()) + + def new_initial_state(self): + """Returns a state corresponding to the start of a game.""" + return DominoesState(self) + + def make_py_observer(self, iig_obs_type=None, params=None): + """Returns an object used for observing game state.""" + return DominoesObserver( + iig_obs_type or pyspiel.IIGObservationType(perfect_recall=False), params + ) + + +class DominoesState(pyspiel.State): + """A python version of the Block Dominoes state.""" + + def __init__(self, game): + """Constructor; should only be called by Game.new_initial_state.""" + super().__init__(game) + self.actions_history = [] + self.open_edges = [] + self.hands = [[] for _ in range(_NUM_PLAYERS)] + self.deck = copy.deepcopy(_DECK) + self._game_over = False + self._next_player = pyspiel.PlayerId.CHANCE + self._current_deal_player = 0 # NEW ATTRIBUTE + + # OpenSpiel (PySpiel) API functions are below. This is the standard set that + # should be implemented by every sequential-move game with chance. + + def current_player(self): + """Returns id of the next player to move, or TERMINAL if game is over.""" + if self._game_over: + return pyspiel.PlayerId.TERMINAL + if self.deck: # deal phase + return pyspiel.PlayerId.CHANCE + return self._next_player + + def _legal_actions(self, player): + """Returns a list of legal actions, sorted in ascending order.""" + assert player >= 0 + assert player == self._next_player + return self.get_legal_actions(player) + + def get_legal_actions(self, player): + """Returns a list of legal actions.""" + assert player >= 0 + + actions = [] + hand = self.hands[player] + + # first move, no open edges + if not self.open_edges: + for tile in hand: + actions.append(Action(player, tile, None)) + else: + for tile in hand: + if tile[0] in self.open_edges: + actions.append(Action(player, tile, tile[0])) + if tile[0] != tile[1] and tile[1] in self.open_edges: + actions.append(Action(player, tile, tile[1])) + + actions_idx = [_ACTIONS_STR.index(str(action)) for action in actions] + actions_idx.sort() + return actions_idx + + def chance_outcomes(self): + """Returns the possible chance outcomes and their probabilities.""" + assert self.is_chance_node() + p = 1.0 / len(self.deck) + return [(_DECK.index(i), p) for i in self.deck] + + def _apply_action(self, action): + """Applies the specified action to the state.""" + if self.is_chance_node(): + # Deal tiles to players in order (0, 1, 2, 3) + hand_to_add_tile = self.hands[self._current_deal_player] + tile = _DECK[action] + self.deck.remove(tile) + hand_to_add_tile.append(tile) + self._current_deal_player = (self._current_deal_player + 1) % 4 + + # Check if all hands are of _HAND_SIZE + if not all(len(hand) == _HAND_SIZE for hand in self.hands): + return # more tiles to deal + + for hand in self.hands: + hand.sort() + + self._next_player = 0 + else: + action = _ACTIONS[action] + self.actions_history.append(action) + my_idx = self.current_player() + my_hand = self.hands[my_idx] + my_hand.remove(action.tile) + self.update_open_edges(action) + + if not my_hand: + self._game_over = True # player played his last tile + return + + for i in range(1, 5): + next_idx = (my_idx + i) % 4 + next_legal_actions = self.get_legal_actions(next_idx) + + if next_legal_actions: + self._next_player = next_idx + return + + # Check if a team has played all their tiles. + if not (self.hands[0] or self.hands[2]) or not ( + self.hands[1] or self.hands[3] + ): + self._game_over = True + return + + # all players are blocked. Game is stuck. + self._game_over = True + + def update_open_edges(self, action): + if not self.open_edges: + self.open_edges = list(action.tile) + else: + self.open_edges.remove(action.edge) + new_edge = ( + action.tile[0] if action.tile[0] != action.edge else action.tile[1] + ) + self.open_edges.append(new_edge) + + self.open_edges.sort() + + def _action_to_string(self, player, action): + """Action -> string.""" + if player == pyspiel.PlayerId.CHANCE: + return f"Deal {_DECK[action]}" + return _ACTIONS_STR[action] + + def is_terminal(self): + """Returns True if the game is over.""" + return self._game_over + + def returns(self): + """Total reward for each player over the course of the game so far.""" + if not self.is_terminal(): + return [0 for _ in range(_NUM_PLAYERS)] + + sum_of_pips0 = sum(t[0] + t[1] for t in (self.hands[0] + self.hands[2])) + sum_of_pips1 = sum(t[0] + t[1] for t in (self.hands[1] + self.hands[3])) + + if sum_of_pips1 == sum_of_pips0: + return [0 for _ in range(_NUM_PLAYERS)] + + if sum_of_pips1 > sum_of_pips0: + return [sum_of_pips1, -sum_of_pips1, sum_of_pips1, -sum_of_pips1] + return [-sum_of_pips0, sum_of_pips0, -sum_of_pips0, sum_of_pips0] + + def __str__(self): + """String for debug purposes. No particular semantics are required.""" + hand0 = [str(c) for c in self.hands[0]] + hand1 = [str(c) for c in self.hands[1]] + hand2 = [str(c) for c in self.hands[2]] + hand3 = [str(c) for c in self.hands[3]] + board = self.draw_board() + return ( + f"hand0:{hand0}\n" + f"hand1:{hand1}\n" + f"hand2:{hand2}\n" + f"hand3:{hand3}\n\n" + f"board: {board}" + ) + + def draw_board(self): + """Draw the board' in a human readable format.""" + board = collections.deque() + current_open_edges = None + for action in self.actions_history: + # check if action is played on an empty board + if action.edge is None: + board.append(action.tile) + # pylint: disable=unused-variable + current_open_edges = list(action.tile) + # check if action edge matches last played edge in the left or right + elif action.edge == current_open_edges[0]: + # invert the tile if the edge is on the right: + tile = ( + (action.tile[1], action.tile[0]) + if action.tile[0] == current_open_edges[0] + else action.tile + ) + board.appendleft(tile) + + elif action.edge == current_open_edges[1]: + # invert the tile if the edge is on the left: + tile = ( + (action.tile[1], action.tile[0]) + if action.tile[1] == current_open_edges[1] + else action.tile + ) + board.append(tile) + + current_open_edges = board[0][0], board[-1][1] + + # TODO(someone): move this to a test + assert len(board) == len(self.actions_history) + return list(board) + + +class DominoesObserver: + """Observer, conforming to the PyObserver interface (see observation.py).""" + + def __init__(self, iig_obs_type, params): + """Initializes an empty observation tensor.""" + if params: + raise ValueError(f"Observation parameters not supported; passed {params}") + + # Determine which observation pieces we want to include. + pieces = [("player", 4, (4,))] + + if iig_obs_type.private_info == pyspiel.PrivateInfoType.SINGLE_PLAYER: + # each tile is represented using 3 integers: + # 2 for the pips, and 1 to distinguish between (0,0) to empty slot for + # a tile. + pieces.append(("hand", 21, (7, 3))) # 7 tiles per hand + if iig_obs_type.public_info: + if iig_obs_type.perfect_recall: + # list of all played actions, each action is represented using 5 + # integers: + # 2 for the played tile (0-6), + # 1 for the covered edge (0-6), + # 1 for which player (0,1,3,4), + # 1 to distinguish between actual move and empty slot for a move (0/1). + # the None (play on an empty board) edge represented using 0. + pieces.append(("actions_history", 125, (25, 5))) + else: + # last action, represented in the same way as in "actions_history" + # but without the last integer. + pieces.append(("last_action", 4, (4,))) + pieces.append(("hand_sizes", 4, (4,))) + + # Build the single flat tensor. + total_size = sum(size for name, size, shape in pieces) + self.tensor = np.zeros(total_size, np.float32) + + # Build the named & reshaped views of the bits of the flat tensor. + self.dict = {} + index = 0 + for name, size, shape in pieces: + self.dict[name] = self.tensor[index : index + size].reshape(shape) + index += size + + def copy_indices(self, dest, source, index_list): + for idx in index_list: + dest[idx] = source[idx] + + def set_from(self, state, player): + """Updates `tensor` and `dict` to reflect `state` from PoV of `player`.""" + + self.tensor.fill(0) + + if "player" in self.dict: + self.dict["player"][player] = 1 + self.dict["player"][1 - player] = 0 + + if "hand_sizes" in self.dict: + my_hand_size = len(state.hands[player]) + opp_hand_size = len(state.hands[1 - player]) + self.dict["hand_sizes"][0] = my_hand_size + self.dict["hand_sizes"][1] = opp_hand_size + + if "edges" in self.dict: + if state.open_edges: + self.copy_indices(self.dict["edges"], state.open_edges, [0, 1]) + else: + self.dict["edges"][0] = 0.0 + self.dict["edges"][1] = 0.0 + + if "hand" in self.dict: + for i, tile in enumerate(state.hands[player]): + self.copy_indices(self.dict["hand"][i], tile, [0, 1]) + self.dict["hand"][i][2] = 1.0 + + if "actions_history" in self.dict: + for i, action in enumerate(state.actions_history): + self.copy_indices(self.dict["actions_history"][i], action.tile, [0, 1]) + self.dict["actions_history"][i][2] = ( + action.edge if action.edge is not None else 0.0 + ) + self.dict["actions_history"][i][3] = action.player + self.dict["actions_history"][i][4] = 1.0 + + if "last_action" in self.dict: + if state.actions_history: + action = state.actions_history[-1] + self.copy_indices(self.dict["last_action"], action.tile, [0, 1]) + self.dict["last_action"][2] = ( + action.edge if action.edge is not None else 0.0 + ) + self.dict["last_action"][3] = action.player + + def string_from(self, state, player): + """Observation of `state` from the PoV of `player`, as a string.""" + pieces = [] + if "player" in self.dict: + pieces.append(f"p{player}") + if "hand" in self.dict: + pieces.append(f"hand:{state.hands[player]}") + if "actions_history" in self.dict: + pieces.append(f"history:{str(state.actions_history)}") + if "last_action" in self.dict and state.actions_history: + pieces.append(f"last_action:{str(state.actions_history[-1])}") + return " ".join(str(p) for p in pieces) + + +# Register the game with the OpenSpiel library + +pyspiel.register_game(_GAME_TYPE, DominoesGame) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/team_dominoes_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/team_dominoes_test.py new file mode 100644 index 0000000..7583fa0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/team_dominoes_test.py @@ -0,0 +1,204 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tests for Latin American Python Dominoes.""" + + +from absl.testing import absltest +from open_spiel.python.games import team_dominoes +import pyspiel + + +class DominoesTest(absltest.TestCase): + + def test_game_from_cc(self): + """Runs our standard game tests, checking API consistency.""" + game = pyspiel.load_game("python_team_dominoes") + pyspiel.random_sim_test(game, num_sims=100, serialize=False, verbose=True) + + def test_single_deterministic_game_1(self): + """Runs a single game where tiles and actions chose deterministically.""" + game = pyspiel.load_game("python_team_dominoes") + state = game.new_initial_state() + hand0 = [ + (1.0, 3.0), + (0.0, 5.0), + (1.0, 1.0), + (2.0, 3.0), + (4.0, 5.0), + (3.0, 5.0), + (0.0, 1.0), + ] + hand1 = [ + (2.0, 5.0), + (3.0, 4.0), + (2.0, 2.0), + (0.0, 4.0), + (3.0, 3.0), + (2.0, 6.0), + (1.0, 6.0), + ] + hand2 = [ + (5.0, 6.0), + (6.0, 6.0), + (1.0, 4.0), + (2.0, 4.0), + (4.0, 4.0), + (0.0, 0.0), + (1.0, 5.0), + ] + hand3 = [ + (4.0, 6.0), + (0.0, 2.0), + (0.0, 3.0), + (3.0, 6.0), + (5.0, 5.0), + (1.0, 2.0), + (0.0, 6.0), + ] + + self.deal_hands(state, [hand0, hand1, hand2, hand3]) + + self.apply_action(state, team_dominoes.Action(0, (3.0, 4.0), None)) + self.apply_action(state, team_dominoes.Action(1, (2.0, 4.0), 4.0)) + self.apply_action(state, team_dominoes.Action(2, (1.0, 2.0), 2.0)) + self.apply_action(state, team_dominoes.Action(3, (0.0, 3.0), 3.0)) + + self.apply_action(state, team_dominoes.Action(0, (1.0, 3.0), 1.0)) + self.apply_action(state, team_dominoes.Action(1, (3.0, 5.0), 3.0)) + self.apply_action(state, team_dominoes.Action(2, (0.0, 2.0), 0.0)) + self.apply_action(state, team_dominoes.Action(3, (2.0, 5.0), 2.0)) + + self.apply_action(state, team_dominoes.Action(0, (1.0, 5.0), 5.0)) + self.apply_action(state, team_dominoes.Action(1, (0.0, 5.0), 5.0)) + self.apply_action(state, team_dominoes.Action(2, (1.0, 1.0), 1.0)) + self.apply_action(state, team_dominoes.Action(3, (0.0, 6.0), 0.0)) + + self.apply_action(state, team_dominoes.Action(0, (3.0, 6.0), 6.0)) + self.apply_action(state, team_dominoes.Action(1, (1.0, 6.0), 1.0)) + self.apply_action(state, team_dominoes.Action(2, (5.0, 6.0), 6.0)) + self.apply_action(state, team_dominoes.Action(3, (3.0, 3.0), 3.0)) + + self.apply_action(state, team_dominoes.Action(0, (4.0, 5.0), 5.0)) + self.apply_action(state, team_dominoes.Action(1, (4.0, 6.0), 4.0)) + self.apply_action(state, team_dominoes.Action(3, (6.0, 6.0), 6.0)) + + self.apply_action(state, team_dominoes.Action(0, (2.0, 6.0), 6.0)) + self.apply_action(state, team_dominoes.Action(1, (2.0, 2.0), 2.0)) + self.apply_action(state, team_dominoes.Action(3, (2.0, 3.0), 3.0)) + # Game is stuck! No player can play any tile as all 2.0s are played + + self.assertTrue(state.is_terminal()) + self.assertEqual(state.returns()[0], -18) + self.assertEqual(state.returns()[1], 18) + self.assertEqual(state.returns()[2], -18) + self.assertEqual(state.returns()[3], 18) + + def test_single_deterministic_game_2(self): + """Runs a single game where tiles and actions chose deterministically.""" + game = pyspiel.load_game("python_team_dominoes") + state = game.new_initial_state() + hand0 = [ + (0.0, 6.0), + (3.0, 6.0), + (1.0, 3.0), + (1.0, 4.0), + (5.0, 5.0), + (0.0, 0.0), + (2.0, 6.0), + ] + hand1 = [ + (1.0, 5.0), + (2.0, 2.0), + (0.0, 2.0), + (0.0, 3.0), + (4.0, 5.0), + (6.0, 6.0), + (5.0, 6.0), + ] + hand2 = [ + (2.0, 4.0), + (3.0, 4.0), + (3.0, 3.0), + (0.0, 4.0), + (1.0, 1.0), + (1.0, 6.0), + (3.0, 5.0), + ] + hand3 = [ + (0.0, 5.0), + (0.0, 1.0), + (4.0, 4.0), + (2.0, 3.0), + (1.0, 2.0), + (2.0, 5.0), + (4.0, 6.0), + ] + + self.deal_hands(state, [hand0, hand1, hand2, hand3]) + + self.apply_action(state, team_dominoes.Action(0, (0.0, 6.0), None)) + self.apply_action(state, team_dominoes.Action(1, (0.0, 5.0), 0.0)) + self.apply_action(state, team_dominoes.Action(2, (2.0, 6.0), 6.0)) + self.apply_action(state, team_dominoes.Action(3, (1.0, 5.0), 5.0)) + + self.apply_action(state, team_dominoes.Action(0, (2.0, 3.0), 2.0)) + self.apply_action(state, team_dominoes.Action(1, (3.0, 6.0), 3.0)) + self.apply_action(state, team_dominoes.Action(2, (1.0, 3.0), 1.0)) + self.apply_action(state, team_dominoes.Action(3, (1.0, 6.0), 6.0)) + + self.apply_action(state, team_dominoes.Action(0, (3.0, 5.0), 3.0)) + self.apply_action(state, team_dominoes.Action(1, (5.0, 6.0), 5.0)) + self.apply_action(state, team_dominoes.Action(2, (1.0, 1.0), 1.0)) + self.apply_action(state, team_dominoes.Action(3, (4.0, 6.0), 6.0)) + + # skipped player 0 (has no 4.0 or 1.0 to play) + self.apply_action(state, team_dominoes.Action(1, (0.0, 4.0), 4.0)) + self.apply_action(state, team_dominoes.Action(2, (0.0, 1.0), 1.0)) + # skipped player 3 (has no 0.0s to play) + + # skipped over player 0 (has no 0.0s to play) + self.apply_action(state, team_dominoes.Action(1, (0.0, 0.0), 0.0)) + self.apply_action(state, team_dominoes.Action(2, (0.0, 3.0), 0.0)) + self.apply_action(state, team_dominoes.Action(3, (3.0, 4.0), 3.0)) + + # skipped over player 0 (has no 0.0s nor 4.0s to play) + self.apply_action(state, team_dominoes.Action(1, (0.0, 2.0), 0.0)) + self.apply_action(state, team_dominoes.Action(2, (2.0, 4.0), 2.0)) + self.apply_action(state, team_dominoes.Action(3, (1.0, 4.0), 4.0)) + + # skipped over player 0 (has no 1.0s nor 4.0s to play) + self.apply_action(state, team_dominoes.Action(1, (1.0, 2.0), 1.0)) + # player 1 won (no more tiles to play) + + self.assertTrue(state.is_terminal()) + self.assertEqual(state.returns()[0], -39) + self.assertEqual(state.returns()[1], 39) + self.assertEqual(state.returns()[2], -39) + self.assertEqual(state.returns()[3], 39) + + def apply_action(self, state, action): + actions_str = team_dominoes._ACTIONS_STR + state.apply_action(actions_str.index(str(action))) + + def deal_hands(self, state, hands): + deck = team_dominoes._DECK + for hand in hands: + for t in hand: + state.apply_action(deck.index(t)) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/tic_tac_toe.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/tic_tac_toe.py new file mode 100644 index 0000000..b346bd9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/tic_tac_toe.py @@ -0,0 +1,196 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tic tac toe (noughts and crosses), implemented in Python. + +This is a demonstration of implementing a deterministic perfect-information +game in Python. + +Python games are significantly slower than C++, but it may still be suitable +for prototyping or for small games. + +It is possible to run C++ algorithms on Python-implemented games. This is likely +to have good performance if the algorithm simply extracts a game tree and then +works with that (e.g. CFR algorithms). It is likely to be poor if the algorithm +relies on processing and updating states as it goes, e.g., MCTS. +""" + +import numpy as np + +from open_spiel.python.observation import IIGObserverForPublicInfoGame +import pyspiel + +_NUM_PLAYERS = 2 +_NUM_ROWS = 3 +_NUM_COLS = 3 +_NUM_CELLS = _NUM_ROWS * _NUM_COLS +_GAME_TYPE = pyspiel.GameType( + short_name="python_tic_tac_toe", + long_name="Python Tic-Tac-Toe", + dynamics=pyspiel.GameType.Dynamics.SEQUENTIAL, + chance_mode=pyspiel.GameType.ChanceMode.DETERMINISTIC, + information=pyspiel.GameType.Information.PERFECT_INFORMATION, + utility=pyspiel.GameType.Utility.ZERO_SUM, + reward_model=pyspiel.GameType.RewardModel.TERMINAL, + max_num_players=_NUM_PLAYERS, + min_num_players=_NUM_PLAYERS, + provides_information_state_string=True, + provides_information_state_tensor=False, + provides_observation_string=True, + provides_observation_tensor=True, + parameter_specification={}) +_GAME_INFO = pyspiel.GameInfo( + num_distinct_actions=_NUM_CELLS, + max_chance_outcomes=0, + num_players=2, + min_utility=-1.0, + max_utility=1.0, + utility_sum=0.0, + max_game_length=_NUM_CELLS) + + +class TicTacToeGame(pyspiel.Game): + """A Python version of the Tic-Tac-Toe game.""" + + def __init__(self, params=None): + super().__init__(_GAME_TYPE, _GAME_INFO, params or dict()) + + def new_initial_state(self): + """Returns a state corresponding to the start of a game.""" + return TicTacToeState(self) + + def make_py_observer(self, iig_obs_type=None, params=None): + """Returns an object used for observing game state.""" + if ((iig_obs_type is None) or + (iig_obs_type.public_info and not iig_obs_type.perfect_recall)): + return BoardObserver(params) + else: + return IIGObserverForPublicInfoGame(iig_obs_type, params) + + +class TicTacToeState(pyspiel.State): + """A python version of the Tic-Tac-Toe state.""" + + def __init__(self, game): + """Constructor; should only be called by Game.new_initial_state.""" + super().__init__(game) + self._cur_player = 0 + self._player0_score = 0.0 + self._is_terminal = False + self.board = np.full((_NUM_ROWS, _NUM_COLS), ".") + + # OpenSpiel (PySpiel) API functions are below. This is the standard set that + # should be implemented by every perfect-information sequential-move game. + + def current_player(self): + """Returns id of the next player to move, or TERMINAL if game is over.""" + return pyspiel.PlayerId.TERMINAL if self._is_terminal else self._cur_player + + def _legal_actions(self, player): + """Returns a list of legal actions, sorted in ascending order.""" + return [a for a in range(_NUM_CELLS) if self.board[_coord(a)] == "."] + + def _apply_action(self, action): + """Applies the specified action to the state.""" + self.board[_coord(action)] = "x" if self._cur_player == 0 else "o" + if _line_exists(self.board): + self._is_terminal = True + self._player0_score = 1.0 if self._cur_player == 0 else -1.0 + elif all(self.board.ravel() != "."): + self._is_terminal = True + else: + self._cur_player = 1 - self._cur_player + + def _action_to_string(self, player, action): + """Action -> string.""" + row, col = _coord(action) + return "{}({},{})".format("x" if player == 0 else "o", row, col) + + def is_terminal(self): + """Returns True if the game is over.""" + return self._is_terminal + + def returns(self): + """Total reward for each player over the course of the game so far.""" + return [self._player0_score, -self._player0_score] + + def __str__(self): + """String for debug purposes. No particular semantics are required.""" + return _board_to_string(self.board) + + +class BoardObserver: + """Observer, conforming to the PyObserver interface (see observation.py).""" + + def __init__(self, params): + """Initializes an empty observation tensor.""" + if params: + raise ValueError(f"Observation parameters not supported; passed {params}") + # The observation should contain a 1-D tensor in `self.tensor` and a + # dictionary of views onto the tensor, which may be of any shape. + # Here the observation is indexed `(cell state, row, column)`. + shape = (1 + _NUM_PLAYERS, _NUM_ROWS, _NUM_COLS) + self.tensor = np.zeros(np.prod(shape), np.float32) + self.dict = {"observation": np.reshape(self.tensor, shape)} + + def set_from(self, state, player): + """Updates `tensor` and `dict` to reflect `state` from PoV of `player`.""" + del player + # We update the observation via the shaped tensor since indexing is more + # convenient than with the 1-D tensor. Both are views onto the same memory. + obs = self.dict["observation"] + obs.fill(0) + for row in range(_NUM_ROWS): + for col in range(_NUM_COLS): + cell_state = ".ox".index(state.board[row, col]) + obs[cell_state, row, col] = 1 + + def string_from(self, state, player): + """Observation of `state` from the PoV of `player`, as a string.""" + del player + return _board_to_string(state.board) + + +# Helper functions for game details. + + +def _line_value(line): + """Checks a possible line, returning the winning symbol if any.""" + if all(line == "x") or all(line == "o"): + return line[0] + + +def _line_exists(board): + """Checks if a line exists, returns "x" or "o" if so, and None otherwise.""" + return (_line_value(board[0]) or _line_value(board[1]) or + _line_value(board[2]) or _line_value(board[:, 0]) or + _line_value(board[:, 1]) or _line_value(board[:, 2]) or + _line_value(board.diagonal()) or + _line_value(np.fliplr(board).diagonal())) + + +def _coord(move): + """Returns (row, col) from an action id.""" + return (move // _NUM_COLS, move % _NUM_COLS) + + +def _board_to_string(board): + """Returns a string representation of the board.""" + return "\n".join("".join(row) for row in board) + + +# Register the game with the OpenSpiel library + +pyspiel.register_game(_GAME_TYPE, TicTacToeGame) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/games/tic_tac_toe_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/games/tic_tac_toe_test.py new file mode 100644 index 0000000..decf0f3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/games/tic_tac_toe_test.py @@ -0,0 +1,154 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tests for Python Tic-Tac-Toe.""" + +import difflib +import os +import pickle + +from absl.testing import absltest +import numpy as np +from open_spiel.python.algorithms.get_all_states import get_all_states +from open_spiel.python.games import tic_tac_toe +from open_spiel.python.observation import make_observation +import pyspiel + +_DATA_DIR = "open_spiel/integration_tests/playthroughs/" + + +class TicTacToeTest(absltest.TestCase): + + def test_can_create_game_and_state(self): + """Checks we can create the game and a state.""" + game = tic_tac_toe.TicTacToeGame() + state = game.new_initial_state() + self.assertEqual(str(state), "...\n...\n...") + + def test_random_game(self): + """Tests basic API functions.""" + # This is here mostly to show the API by example. + # More serious simulation tests are done in python/tests/games_sim_test.py + # and in test_game_from_cc (below), both of which test the conformance to + # the API thoroughly. + game = tic_tac_toe.TicTacToeGame() + state = game.new_initial_state() + while not state.is_terminal(): + print(state) + cur_player = state.current_player() + legal_actions = state.legal_actions() + action = np.random.choice(legal_actions) + print("Player {} chooses action {}".format(cur_player, action)) + state.apply_action(action) + print(state) + print("Returns: {}".format(state.returns())) + + def test_game_from_cc(self): + """Runs our standard game tests, checking API consistency.""" + game = pyspiel.load_game("python_tic_tac_toe") + pyspiel.random_sim_test(game, num_sims=10, serialize=False, verbose=True) + + def test_playthoughs_consistent(self): + """Checks the saved C++ and Python playthroughs are the same.""" + test_srcdir = os.environ.get("TEST_SRCDIR", "") + path = os.path.join(test_srcdir, _DATA_DIR) + cc_playthrough = os.path.join(path, "tic_tac_toe.txt") + py_playthrough = os.path.join(path, "python_tic_tac_toe.txt") + with open(cc_playthrough, encoding="utf-8") as cc: + with open(py_playthrough, encoding="utf-8") as py: + diffs = difflib.ndiff(list(cc), list(py)) + diffs = {d for d in diffs if d and d[0] in {"+", "-"}} + self.assertEqual( + diffs, { + "- game: tic_tac_toe\n", + "+ game: python_tic_tac_toe\n", + '- GameType.long_name = "Tic Tac Toe"\n', + '+ GameType.long_name = "Python Tic-Tac-Toe"\n', + '- GameType.short_name = "tic_tac_toe"\n', + '+ GameType.short_name = "python_tic_tac_toe"\n', + '- ToString() = "tic_tac_toe()"\n', + '+ ToString() = "python_tic_tac_toe()"\n', + "- CurrentPlayer() = -4\n", + "+ CurrentPlayer() = PlayerId.TERMINAL\n", + "- Returns() = [0, 0]\n", + "+ Returns() = [0, -0]\n", + }) + + def test_observation_tensors_same(self): + """Checks observation tensor is the same from C++ and from Python.""" + game = pyspiel.load_game("python_tic_tac_toe") + state = game.new_initial_state() + for a in [4, 5, 2, 3]: + state.apply_action(a) + py_obs = make_observation(game) + py_obs.set_from(state, state.current_player()) + cc_obs = state.observation_tensor() + np.testing.assert_array_equal(py_obs.tensor, cc_obs) + + def test_pickle(self): + """Checks pickling and unpickling of game and state.""" + game = pyspiel.load_game("python_tic_tac_toe") + pickled_game = pickle.dumps(game) + unpickled_game = pickle.loads(pickled_game) + self.assertEqual(str(game), str(unpickled_game)) + state = game.new_initial_state() + for a in [4, 2, 3, 7]: + state.apply_action(a) + ser_str = pyspiel.serialize_game_and_state(game, state) + new_game, new_state = pyspiel.deserialize_game_and_state(ser_str) + self.assertEqual(str(game), str(new_game)) + self.assertEqual(str(state), str(new_state)) + pickled_state = pickle.dumps(state) + unpickled_state = pickle.loads(pickled_state) + self.assertEqual(str(state), str(unpickled_state)) + + def test_cloned_state_matches_original_state(self): + """Check we can clone states successfully.""" + game = tic_tac_toe.TicTacToeGame() + state = game.new_initial_state() + state.apply_action(1) + state.apply_action(2) + clone = state.clone() + + self.assertEqual(state.history(), clone.history()) + self.assertEqual(state.num_players(), clone.num_players()) + self.assertEqual(state.move_number(), clone.move_number()) + self.assertEqual(state.num_distinct_actions(), clone.num_distinct_actions()) + + self.assertEqual(state._cur_player, clone._cur_player) + self.assertEqual(state._player0_score, clone._player0_score) + self.assertEqual(state._is_terminal, clone._is_terminal) + np.testing.assert_array_equal(state.board, clone.board) + + def test_consistent(self): + """Checks the Python and C++ game implementations are the same.""" + py_game = pyspiel.load_game("python_tic_tac_toe") + cc_game = pyspiel.load_game("tic_tac_toe") + py_obs = make_observation(py_game) + cc_obs = make_observation(cc_game) + py_states = get_all_states(py_game, to_string=str) + cc_states = get_all_states(cc_game, to_string=str) + self.assertCountEqual(list(cc_states), list(py_states)) + for key, cc_state in cc_states.items(): + py_state = py_states[key] + np.testing.assert_array_equal(py_state.history(), cc_state.history()) + np.testing.assert_array_equal(py_state.returns(), cc_state.returns()) + py_obs.set_from(py_state, 0) + cc_obs.set_from(cc_state, 0) + np.testing.assert_array_equal(py_obs.tensor, cc_obs.tensor) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/jax/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/jax/__init__.py new file mode 100644 index 0000000..1bf6252 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/jax/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/jax/boltzmann_dqn.py b/scenarios/bargaining/open_spiel/open_spiel/python/jax/boltzmann_dqn.py new file mode 100644 index 0000000..b86cf9f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/jax/boltzmann_dqn.py @@ -0,0 +1,99 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Boltzmann DQN agent implemented in JAX. + +This algorithm is a variation of DQN that uses a softmax policy directly with +the unregularized action-value function. See https://arxiv.org/abs/2102.01585. +""" + +import jax +import jax.numpy as jnp +import numpy as np + +from open_spiel.python.jax import dqn + + +class BoltzmannDQN(dqn.DQN): + """Boltzmann DQN implementation in JAX.""" + + def __init__(self, *args, eta: float = 1.0, seed: int = 42, **kwargs): + """Initializes the Boltzmann DQN agent. + + Args: + *args: args passed to the underlying DQN agent. + eta: Temperature parameter used in the softmax function. + seed: Random seed used for action selection. + **kwargs: kwargs passed to the underlying DQN agent. + """ + self._eta = eta + self._rs = np.random.RandomState(seed) # Used to select actions. + super().__init__(*args, seed=seed, **kwargs) + + def _create_networks(self, rng, state_representation_size): + """Called to create the networks.""" + # We use the DQN networks and an additional network for the fixed policy. + super()._create_networks(rng, state_representation_size) + self.params_prev_q_network = self.hk_network.init( + rng, jnp.ones([1, state_representation_size])) + + def _softmax_action_probs(self, + params, + info_state, + legal_actions, + coeff=None): + """Returns a valid soft-max action and action probabilities. + + Args: + params: Parameters of the Q-network. + info_state: Observations from the environment. + legal_actions: List of legal actions. + coeff: If not None, then the terms in softmax function will be + element-wise multiplied with these coefficients. + + Returns: + a valid soft-max action and action probabilities. + """ + info_state = np.reshape(info_state, [1, -1]) + q_values = self.hk_network_apply(params, info_state)[0] + legal_one_hot = self._to_one_hot(legal_actions) + legal_q_values = ( + q_values + (1 - legal_one_hot) * dqn.ILLEGAL_ACTION_LOGITS_PENALTY) + # Apply temperature and subtract the maximum value for numerical stability. + temp = legal_q_values / self._eta + unnormalized = np.exp(temp - np.amax(temp)) + if coeff is not None: + unnormalized = np.multiply(coeff, unnormalized) + probs = unnormalized / unnormalized.sum() + action = self._rs.choice(legal_actions, p=probs[legal_actions]) + return action, probs + + def _get_action_probs(self, info_state, legal_actions, is_evaluation=False): + """Returns a selected action and the probabilities of legal actions.""" + if is_evaluation: + # Soft-max normalized by the action probabilities from the previous + # Q-network. + _, prev_probs = self._softmax_action_probs(self.params_prev_q_network, + info_state, legal_actions) + return self._softmax_action_probs(self.params_q_network, info_state, + legal_actions, prev_probs) + + # During training, we use the DQN action selection, which will be + # epsilon-greedy. + return super()._get_action_probs( + info_state, legal_actions, is_evaluation=False) + + def update_prev_q_network(self): + """Updates the parameters of the previous Q-network.""" + self.params_prev_q_network = jax.tree_util.tree_map(lambda x: x.copy(), + self.params_q_network) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/jax/boltzmann_dqn_jax_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/jax/boltzmann_dqn_jax_test.py new file mode 100644 index 0000000..f736209 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/jax/boltzmann_dqn_jax_test.py @@ -0,0 +1,73 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import absltest + +import jax + +from open_spiel.python import rl_agent_policy +from open_spiel.python import rl_environment +from open_spiel.python.jax import boltzmann_dqn +import pyspiel + +jax.config.update("jax_threefry_partitionable", False) + +# A simple two-action game encoded as an EFG game. Going left gets -1, going +# right gets a +1. +SIMPLE_EFG_DATA = """ + EFG 2 R "Simple single-agent problem" { "Player 1" } "" + p "ROOT" 1 1 "ROOT" { "L" "R" } 0 + t "L" 1 "Outcome L" { -1.0 } + t "R" 2 "Outcome R" { 1.0 } +""" + + +class DQNTest(absltest.TestCase): + + def test_train(self): + game = pyspiel.load_efg_game(SIMPLE_EFG_DATA) + env = rl_environment.Environment(game=game) + agent = boltzmann_dqn.BoltzmannDQN( + 0, + state_representation_size=game.information_state_tensor_shape()[0], + num_actions=game.num_distinct_actions(), + hidden_layers_sizes=[16], + replay_buffer_capacity=100, + batch_size=5, + epsilon_start=0.02, + epsilon_end=0.01, + eta=5.0) + total_reward = 0 + + # Training. This will use the epsilon-greedy actions. + for _ in range(100): + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step) + time_step = env.step([agent_output.action]) + total_reward += time_step.rewards[0] + agent.step(time_step) + self.assertGreaterEqual(total_reward, -100) + + # Update the previous Q-network. + agent.update_prev_q_network() + + # This will use the soft-max actions. + policy = rl_agent_policy.RLAgentPolicy(game, agent, 0, False) + probs = policy.action_probabilities(game.new_initial_state()) + self.assertAlmostEqual(probs[0], 0.54, places=2) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/jax/cfr/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/jax/cfr/__init__.py new file mode 100644 index 0000000..a1223b9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/jax/cfr/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/jax/cfr/compare_cfr_with_jax.py b/scenarios/bargaining/open_spiel/open_spiel/python/jax/cfr/compare_cfr_with_jax.py new file mode 100644 index 0000000..cbf9c74 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/jax/cfr/compare_cfr_with_jax.py @@ -0,0 +1,109 @@ +# Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This compares the speed/results of Jax CFR to of the original impl of CFR. + +The results slightly differ due to different rounding of regrets between +original implmentation and CFR. When setting clamping of regrets to 1e-8 the +results are exactly the same. +""" + + +# pylint: disable=g-importing-member + +import time +from open_spiel.python.algorithms.best_response import BestResponsePolicy +from open_spiel.python.algorithms.cfr import CFRPlusSolver +from open_spiel.python.jax.cfr.jax_cfr import JaxCFR +import pyspiel + + +def compare_cfr_with_jax_cfr(game): + """Do the comparison.""" + + start = time.time() + jax_cfr = JaxCFR(game) + print(time.time() - start) + jax_cfr.multiple_steps(10000) + print(time.time() - start) + + start = time.time() + print(time.time() - start) + cfr = CFRPlusSolver(game) + for _ in range(1000): + cfr.evaluate_and_update_policy() + + print(time.time() - start) + + jax_strat = jax_cfr.average_policy() + jax_br1 = BestResponsePolicy(jax_cfr.game, 1, jax_strat) + jax_br2 = BestResponsePolicy(jax_cfr.game, 0, jax_strat) + + cfr_strat = jax_cfr.average_policy() + cfr_br1 = BestResponsePolicy(jax_cfr.game, 1, cfr_strat) + cfr_br2 = BestResponsePolicy(jax_cfr.game, 0, cfr_strat) + + print("Jax P1: ", jax_br1.value(jax_cfr.game.new_initial_state())) + print("CFR P1: ", cfr_br1.value(jax_cfr.game.new_initial_state())) + print("Jax P2: ", jax_br2.value(jax_cfr.game.new_initial_state())) + print("CFR P2: ", cfr_br2.value(jax_cfr.game.new_initial_state())) + + +# Speed Results: +# Original: 139.60753107070923 +# Jax CPU: 3.7404067516326904 +def compare_leduc(): + game = pyspiel.load_game("leduc_poker") + compare_cfr_with_jax_cfr(game) + + +# Speed Results: +# Original: 335.6707363128662 +# Jax CPU: 7.59996485710144 +def compare_battleship(): + game_params = { + "board_height": 2, + "board_width": 2, + "num_shots": 4, + "ship_sizes": "[2]", + "ship_values": "[1]", + "allow_repeated_shots": False, + } + game = pyspiel.load_game("battleship", game_params) + compare_cfr_with_jax_cfr(game) + + +# Speed Results: +# Original: 14.667663097381592 +# Jax CPU: 1.068636417388916 +def compare_goofspiel_descending(): + game_params = {"num_cards": 4, "imp_info": True, "points_order": "descending"} + game = pyspiel.load_game_as_turn_based("goofspiel", game_params) + compare_cfr_with_jax_cfr(game) + + +# Speed Results: +# Original: 6.639796733856201 +# Jax CPU: 0.8599820137023926 +def compare_goofspiel_randomized(): + game_params = {"num_cards": 3, "imp_info": True, "points_order": "random"} + game = pyspiel.load_game_as_turn_based("goofspiel", game_params) + compare_cfr_with_jax_cfr(game) + + +if __name__ == "__main__": + compare_leduc() + compare_battleship() + compare_goofspiel_descending() + compare_goofspiel_randomized() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/jax/cfr/jax_cfr.py b/scenarios/bargaining/open_spiel/open_spiel/python/jax/cfr/jax_cfr.py new file mode 100644 index 0000000..157e2b2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/jax/cfr/jax_cfr.py @@ -0,0 +1,535 @@ +# Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""JAX implementation of the counterfactual regret minimization algorithm usable with GPU acceleration. + +Uses same CFR setting as open_spiel.python.algorithms.cfr._CFRSolverBase and the +usability should be interchangable. + +The results may slightly differ between these 2 versions due to rounding errors +when computing regrets (rounding regrets smaller than epsilon to zero results in +exactly the same results) + +The algorithm performs well in short but wide games, with small amount of +illegal actions and poorly in long games with a lot of illegal actions. +""" + +# pylint: disable=g-importing-member + +from collections import namedtuple +import functools + +import chex +import jax +import jax.numpy as jnp +import numpy as np + +from open_spiel.python import policy +import pyspiel + +JAX_CFR_SIMULTANEOUS_UPDATE = -5 + + +def regret_matching(regret, mask): + """Computes current policy based on current regrets. + + Args: + regret: Current regrets in array Fkiat[Isets, Actions] + mask: Legal action mask Bool[Isets, Actions] + + Returns: + policy: the policy. + """ + regret = jnp.maximum(regret, 0) * mask + total = jnp.sum(regret, axis=-1, keepdims=True) + + return jnp.where(total > 0.0, regret / total, 1.0 / jnp.sum(mask)) * mask + + +def update_regrets_plus(regret): + """Clamps the regrets to be non-negative.""" + return regret * (regret > 0) + + +def update_regrets(regret): + """Updates the regrets without CFRPlus.""" + return regret + + +@chex.dataclass(frozen=True) +class JaxCFRConstants: + """Constants for JaxCFR.""" + + players: int + max_depth: int + # This includes chance outcomes! TODO: We could do this separately for each + # depth to make less computations. + max_actions: int + + max_iset_depth: chex.ArrayTree = () # Is just a list of integers + isets: chex.ArrayTree = () # Is just a list of integers + + depth_history_utility: chex.ArrayTree = () + depth_history_iset: chex.ArrayTree = () + depth_history_actions: chex.ArrayTree = () + depth_history_previous_iset: chex.ArrayTree = () + depth_history_previous_action: chex.ArrayTree = () + + depth_history_next_history: chex.ArrayTree = () + depth_history_player: chex.ArrayTree = () + depth_history_chance: chex.ArrayTree = () + depth_history_previous_history: chex.ArrayTree = () + depth_history_action_mask: chex.ArrayTree = () + depth_history_chance_probabilities: chex.ArrayTree = () + + iset_previous_action: chex.ArrayTree = () + iset_action_mask: chex.ArrayTree = () + iset_action_depth: chex.ArrayTree = () + + +class JaxCFR: + """Class for CFR and CFR. + + First it prepares all the structures in `init`, then it just reuses them + within jitted function `jit_step`. + """ + + def __init__( + self, + game: pyspiel.Game, + regret_matching_plus=True, + alternating_updates=True, + linear_averaging=True, + ): + self.game = game + self._regret_matching_plus = regret_matching_plus + self._alternating_updates = alternating_updates + self._linear_averaging = linear_averaging + self.timestep = 1 + + self.init() + + def init(self): + """Constructor.""" + + players = self.game.num_players() + depth_history_utility = [[] for _ in range(players)] + depth_history_previous_iset = [[] for _ in range(players)] + depth_history_previous_action = [[] for _ in range(players)] + depth_history_iset = [[] for _ in range(players)] + depth_history_actions = [[] for _ in range(players)] + depth_history_next_history = [] + depth_history_player = [] + depth_history_chance = [] + depth_history_previous_history = [] + depth_history_action_mask = [] + depth_history_chance_probabilities = [] + # Previous action is mapping of both iset and action! + iset_previous_action = [[] for _ in range(players)] + iset_action_mask = [[] for _ in range(players)] + iset_action_depth = [[] for _ in range(players)] + ids = [0 for _ in range(players)] + pl_isets = [{} for _ in range(players)] + distinct_actions = max( + self.game.num_distinct_actions(), self.game.max_chance_outcomes() + ) + + for pl in range(players): + pl_isets[pl][''] = ids[pl] + ids[pl] += 1 + am = [0] * distinct_actions + am[0] = 1 + iset_action_mask[pl].append(am) + iset_previous_action[pl].append(0) + iset_action_depth[pl].append(0) + + PreviousInfo = namedtuple( + 'PreviousInfo', + ('actions', 'isets', 'prev_actions', 'history', 'player'), + ) + + def _traverse_tree(state, previous_info, depth, chance=1.0): + + if len(depth_history_next_history) <= depth: + for pl in range(players): + depth_history_utility[pl].append([]) + depth_history_previous_iset[pl].append([]) + depth_history_previous_action[pl].append([]) + depth_history_iset[pl].append([]) + depth_history_actions[pl].append([]) + + depth_history_next_history.append([]) + depth_history_player.append([]) + depth_history_chance.append([]) + depth_history_previous_history.append([]) + depth_history_action_mask.append([]) + depth_history_chance_probabilities.append([]) + + history_id = len(depth_history_previous_history[depth]) + + next_history_temp = [0] * distinct_actions + depth_history_next_history[depth].append(next_history_temp) + depth_history_player[depth].append(state.current_player()) + depth_history_chance[depth].append(chance) + depth_history_previous_history[depth].append(previous_info.history) + + actions_mask = [0] * distinct_actions + for a in state.legal_actions(): + actions_mask[a] = 1 + depth_history_action_mask[depth].append(actions_mask) + chance_probabilities = [0.0 for _ in range(distinct_actions)] + if state.is_chance_node(): + for a, prob in state.chance_outcomes(): + chance_probabilities[a] = prob + elif not state.is_terminal(): + chance_probabilities = [1.0 for _ in range(distinct_actions)] + else: + chance_probabilities = [ + 1.0 / distinct_actions for _ in range(distinct_actions) + ] + + depth_history_chance_probabilities[depth].append(chance_probabilities) + for pl in range(players): + depth_history_utility[pl][depth].append( + state.rewards()[pl] if not state.is_chance_node() else 0.0 + ) + depth_history_previous_iset[pl][depth].append(previous_info.isets[pl]) + depth_history_previous_action[pl][depth].append( + previous_info.actions[pl] + ) + if state.current_player() == pl: + iset = state.information_state_string() + if iset not in pl_isets[pl]: + pl_isets[pl][iset] = ids[pl] + ids[pl] += 1 + iset_previous_action[pl].append(previous_info.actions[pl]) + iset_action_mask[pl].append(actions_mask) + iset_action_depth[pl].append(previous_info.prev_actions[pl]) + depth_history_iset[pl][depth].append(pl_isets[pl][iset]) + depth_history_actions[pl][depth].append([ + i + pl_isets[pl][iset] * distinct_actions + for i in range(distinct_actions) + ]) + else: + depth_history_iset[pl][depth].append(0) + depth_history_actions[pl][depth].append( + [0 for _ in range(distinct_actions)] + ) + + for a in state.legal_actions(): + new_chance = chance * chance_probabilities[a] + assert new_chance > 0.0 + new_actions = tuple( + previous_info.actions[pl] + if state.current_player() != pl + else pl_isets[pl][iset] * distinct_actions + a + for pl in range(players) + ) + new_infosets = tuple( + previous_info.isets[pl] + if state.current_player() != pl + else pl_isets[pl][iset] + for pl in range(players) + ) + new_prev_actions = tuple( + previous_info.prev_actions[pl] + int(state.current_player() == pl) + for pl in range(players) + ) + new_info = PreviousInfo( + new_actions, + new_infosets, + new_prev_actions, + history_id, + state.current_player(), + ) + new_state = state.clone() + new_state.apply_action(a) + + # simple workaround if the next element was not visited yet + next_history_temp[a] = ( + len(depth_history_player[depth + 1]) + if len(depth_history_player) > depth + 1 + else 0 + ) + + _traverse_tree(new_state, new_info, depth + 1, new_chance) + + s = self.game.new_initial_state() + _traverse_tree( + s, + PreviousInfo( + tuple(0 for _ in range(players)), + tuple(0 for _ in range(players)), + tuple(0 for _ in range(players)), + 0, + 0, + ), + 0, + ) + + def convert_to_jax(x): + return [jnp.asarray(i) for i in x] + + def convert_to_jax_players(x): + return [[jnp.asarray(i) for i in x[pl]] for pl in range(players)] + + depth_history_utility = convert_to_jax_players(depth_history_utility) + depth_history_iset = convert_to_jax_players(depth_history_iset) + depth_history_previous_iset = convert_to_jax_players( + depth_history_previous_iset + ) + depth_history_actions = convert_to_jax_players(depth_history_actions) + depth_history_previous_action = convert_to_jax_players( + depth_history_previous_action + ) + + depth_history_next_history = convert_to_jax(depth_history_next_history) + depth_history_player = convert_to_jax(depth_history_player) + depth_history_chance = convert_to_jax(depth_history_chance) + depth_history_previous_history = convert_to_jax( + depth_history_previous_history + ) + depth_history_chance_probabilities = convert_to_jax( + depth_history_chance_probabilities + ) + depth_history_action_mask = convert_to_jax(depth_history_action_mask) + + max_iset_depth = [np.max(iset_action_depth[pl]) for pl in range(players)] + iset_previous_action = convert_to_jax(iset_previous_action) + iset_action_mask = convert_to_jax(iset_action_mask) + iset_action_depth = convert_to_jax(iset_action_depth) + + self.constants = JaxCFRConstants( + players=players, + max_depth=int(len(depth_history_utility[0])), + max_actions=distinct_actions, + max_iset_depth=max_iset_depth, + isets=ids, + depth_history_utility=depth_history_utility, + depth_history_iset=depth_history_iset, + depth_history_actions=depth_history_actions, + depth_history_previous_iset=depth_history_previous_iset, + depth_history_previous_action=depth_history_previous_action, + depth_history_next_history=depth_history_next_history, + depth_history_player=depth_history_player, + depth_history_chance=depth_history_chance, + depth_history_previous_history=depth_history_previous_history, + depth_history_action_mask=depth_history_action_mask, + depth_history_chance_probabilities=depth_history_chance_probabilities, + iset_previous_action=iset_previous_action, + iset_action_mask=iset_action_mask, + iset_action_depth=iset_action_depth, + ) + + self.regrets = [ + jnp.zeros((ids[pl], distinct_actions)) for pl in range(players) + ] + self.averages = [ + jnp.zeros((ids[pl], distinct_actions)) for pl in range(players) + ] + + self.regret_matching = jax.vmap(regret_matching, 0, 0) + if self._regret_matching_plus: + self.update_regrets = jax.vmap(update_regrets_plus, 0, 0) + else: + self.update_regrets = jax.vmap(update_regrets, 0, 0) + + self.iset_map = pl_isets + + def multiple_steps(self, iterations: int): + """Performs several CFR steps. + + Args: + iterations: Amount of CFR steps, the solver should do. + """ + for _ in range(iterations): + self.step() + + def evaluate_and_update_policy(self): + """Wrapper to step(). + + Ensures interchangability with + open_spiel.python.algorithms.cfr._CFRSolverBase. + """ + self.step() + + def step(self): + """Wrapper around the jitted function for performing CFR step.""" + averaging_coefficient = self.timestep if self._linear_averaging else 1 + if self._alternating_updates: + for player in range(self.constants.players): + self.regrets, self.averages = self.jit_step( + self.regrets, self.averages, averaging_coefficient, player + ) + + else: + self.regrets, self.averages = self.jit_step( + self.regrets, + self.averages, + averaging_coefficient, + JAX_CFR_SIMULTANEOUS_UPDATE, + ) + + self.timestep += 1 + + def propagate_strategy(self, current_strategies): + """Propagtes the strategies withing infosets. + + Args: + current_strategies: Current strategies for all players, list[Float[Isets, + Actions]] + Returns: + realization_plans: the realization plans. + """ + realization_plans = [ + jnp.ones_like(current_strategies[pl]) + for pl in range(self.constants.players) + ] + + for pl in range(self.constants.players): + for i in range(0, self.constants.max_iset_depth[pl] + 1): + realization_plans[pl] = jnp.where( + self.constants.iset_action_depth[pl][..., jnp.newaxis] == i, + current_strategies[pl] + * realization_plans[pl].ravel()[ + self.constants.iset_previous_action[pl] + ][..., jnp.newaxis], + realization_plans[pl], + ) + + return realization_plans + + @functools.partial(jax.jit, static_argnums=(0,)) + def jit_step( + self, regrets, averages, average_policy_update_coefficient, player + ): + """Performs the CFR step. + + This consists of: + 1. Computes the current strategies based on regrets + 2. Computes the realization plan for each action from top of the tree down + 3. Compute the counterfactual regrets from bottom of the tree up + 4. Updates regrets and average stretegies + + Args: + regrets: Cummulative regrets for all players, list[Float[Isets, Actions]] + averages: Average strategies for all players, list[Float[Isets, Actions]] + average_policy_update_coefficient: Weight of the average policy update. + When enabled linear_averging it is equal to current iteration. Otherwise + 1, int + player: Player for which the update should be done. When alternating + updates are distables, it is JAX_CFR_SIMULTANEOUS_UPDATE + + Returns: + regrets: the regrets. + averages: the averages. + """ + current_strategies = [ + self.regret_matching(regrets[pl], self.constants.iset_action_mask[pl]) + for pl in range(self.constants.players) + ] + + realization_plans = self.propagate_strategy(current_strategies) + iset_reaches = [ + jnp.sum(realization_plans[pl], -1) + for pl in range(self.constants.players) + ] + # In last row, there are only terminal, so we start row before it + depth_utils = [ + [self.constants.depth_history_utility[pl][-1]] + for pl in range(self.constants.players) + ] + for i in range(self.constants.max_depth - 2, -1, -1): + + each_history_policy = self.constants.depth_history_chance_probabilities[i] + for pl in range(self.constants.players): + each_history_policy = each_history_policy * jnp.where( + self.constants.depth_history_player[i][..., jnp.newaxis] == pl, + current_strategies[pl][self.constants.depth_history_iset[pl][i]], + 1, + ) + + for pl in range(self.constants.players): + action_value = jnp.where( + self.constants.depth_history_player[i][..., jnp.newaxis] == -4, + self.constants.depth_history_utility[pl][i][..., jnp.newaxis], + depth_utils[pl][-1][self.constants.depth_history_next_history[i]], + ) + history_value = jnp.sum(action_value * each_history_policy, -1) + regret = ( + (action_value - history_value[..., jnp.newaxis]) + * self.constants.depth_history_action_mask[i] + * (self.constants.depth_history_player[i][..., jnp.newaxis] == pl) + * self.constants.depth_history_chance[i][..., jnp.newaxis] + ) + for pl2 in range(self.constants.players): + if pl != pl2: + regret = ( + regret + * realization_plans[pl2].ravel()[ + self.constants.depth_history_previous_action[pl2][i] + ][..., jnp.newaxis] + ) + bin_regrets = jnp.bincount( + self.constants.depth_history_actions[pl][i].ravel(), + regret.ravel(), + length=self.constants.isets[pl] * self.constants.max_actions, + ) + bin_regrets = bin_regrets.reshape(-1, self.constants.max_actions) + regrets[pl] = jnp.where( + jnp.logical_or(player == pl, player == JAX_CFR_SIMULTANEOUS_UPDATE), + regrets[pl] + bin_regrets, + regrets[pl], + ) + depth_utils[pl].append(history_value) + + regrets = [ + self.update_regrets(regrets[pl]) for pl in range(self.constants.players) + ] + + averages = [ + jnp.where( + jnp.logical_or(player == pl, player == JAX_CFR_SIMULTANEOUS_UPDATE), + averages[pl] + + current_strategies[pl] + * iset_reaches[pl][..., jnp.newaxis] + * average_policy_update_coefficient, + averages[pl], + ) + for pl in range(self.constants.players) + ] + + return regrets, averages + + def average_policy(self): + """Extracts the average policy from JAX structures into a TabularPolicy.""" + averages = [ + np.asarray(self.averages[pl]) for pl in range(self.constants.players) + ] + averages = [ + averages[pl] / np.sum(averages[pl], -1, keepdims=True) + for pl in range(self.constants.players) + ] + + avg_strategy = policy.TabularPolicy(self.game) + + for pl in range(2): + for iset, val in self.iset_map[pl].items(): + if not iset: + continue + state_policy = avg_strategy.policy_for_key(iset) + for i in range(len(state_policy)): + state_policy[i] = averages[pl][val][i] + return avg_strategy + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/jax/cfr/jax_cfr_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/jax/cfr/jax_cfr_test.py new file mode 100644 index 0000000..737cb9e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/jax/cfr/jax_cfr_test.py @@ -0,0 +1,95 @@ +# Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.jax.jax_cfr. + +All of them are taken from open_spiel.python.algorithms.cfr_test.py +""" + +import itertools + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np + +from open_spiel.python import policy +from open_spiel.python.algorithms import expected_game_score +from open_spiel.python.jax.cfr.jax_cfr import JaxCFR +import pyspiel + + +class CFRTest(parameterized.TestCase, absltest.TestCase): + + def test_cfr_kuhn_poker(self): + game = pyspiel.load_game("kuhn_poker") + cfr_solver = JaxCFR(game) + cfr_solver.multiple_steps(300) + average_policy = cfr_solver.average_policy() + average_policy_values = expected_game_score.policy_value( + game.new_initial_state(), [average_policy] * 2 + ) + # 1/18 is the Nash value. See https://en.wikipedia.org/wiki/Kuhn_poker + np.testing.assert_allclose( + average_policy_values, [-1 / 18, 1 / 18], atol=1e-3 + ) + + def test_cfr_plus_kuhn_poker(self): + game = pyspiel.load_game("kuhn_poker") + cfr_solver = JaxCFR(game) + cfr_solver.multiple_steps(200) + average_policy = cfr_solver.average_policy() + average_policy_values = expected_game_score.policy_value( + game.new_initial_state(), [average_policy] * 2 + ) + # 1/18 is the Nash value. See https://en.wikipedia.org/wiki/Kuhn_poker + np.testing.assert_allclose( + average_policy_values, [-1 / 18, 1 / 18], atol=1e-3 + ) + + def test_cfr_plus_solver_best_response_mdp(self): + game = pyspiel.load_game("kuhn_poker") + cfr_solver = JaxCFR(game, True, True, True) + cfr_solver.multiple_steps(200) + average_policy = cfr_solver.average_policy() + pyspiel_avg_policy = policy.python_policy_to_pyspiel_policy(average_policy) + br_computer = pyspiel.TabularBestResponseMDP(game, pyspiel_avg_policy) + br_info = br_computer.exploitability() + self.assertLessEqual(br_info.exploitability, 0.001) + + @parameterized.parameters( + list(itertools.product([True, False], [True, False], [True, False])) + ) + def test_cfr_kuhn_poker_runs_with_multiple_players( + self, linear_averaging, alternating_updates, regret_matching_plus + ): + num_players = 3 + + game = pyspiel.load_game("kuhn_poker", {"players": num_players}) + cfr_solver = JaxCFR( + game, + regret_matching_plus=regret_matching_plus, + alternating_updates=alternating_updates, + linear_averaging=linear_averaging, + ) + # for _ in range(10): + cfr_solver.multiple_steps(10) + average_policy = cfr_solver.average_policy() + average_policy_values = expected_game_score.policy_value( + game.new_initial_state(), [average_policy] * num_players + ) + del average_policy_values + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/jax/deep_cfr.py b/scenarios/bargaining/open_spiel/open_spiel/python/jax/deep_cfr.py new file mode 100644 index 0000000..e8e141f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/jax/deep_cfr.py @@ -0,0 +1,574 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Implements Deep CFR Algorithm. + +See https://arxiv.org/abs/1811.00164. + +The algorithm defines an `advantage` and `strategy` networks that compute +advantages used to do regret matching across information sets and to approximate +the strategy profiles of the game. To train these networks a reservoir buffer +(other data structures may be used) memory is used to accumulate samples to +train the networks. + +This implementation uses skip connections as described in the paper if two +consecutive layers of the advantage or policy network have the same number +of units, except for the last connection. Before the last hidden layer +a layer normalization is applied. + +NOTE: the deep_cfr_jax_test.py is no longer run on github CI as TF1 is no +longer supported yet still required in this file. +""" + +import collections +import random + +import haiku as hk +import jax +import jax.numpy as jnp +import numpy as np +import optax + +# tensorflow is only used for data processing +import tensorflow as tf +import tensorflow_datasets as tfds + +from open_spiel.python import policy +import pyspiel + +# The size of the shuffle buffer used to reshuffle part of the data each +# epoch within one training iteration +ADVANTAGE_TRAIN_SHUFFLE_SIZE = 100000 +STRATEGY_TRAIN_SHUFFLE_SIZE = 1000000 + + +# TODO(author3) Refactor into data structures lib. +class ReservoirBuffer(object): + """Allows uniform sampling over a stream of data. + + This class supports the storage of arbitrary elements, such as observation + tensors, integer actions, etc. + + See https://en.wikipedia.org/wiki/Reservoir_sampling for more details. + """ + + def __init__(self, reservoir_buffer_capacity): + self._reservoir_buffer_capacity = reservoir_buffer_capacity + self._data = [] + self._add_calls = 0 + + def add(self, element): + """Potentially adds `element` to the reservoir buffer. + + Args: + element: data to be added to the reservoir buffer. + """ + if len(self._data) < self._reservoir_buffer_capacity: + self._data.append(element) + else: + idx = np.random.randint(0, self._add_calls + 1) + if idx < self._reservoir_buffer_capacity: + self._data[idx] = element + self._add_calls += 1 + + def sample(self, num_samples): + """Returns `num_samples` uniformly sampled from the buffer. + + Args: + num_samples: `int`, number of samples to draw. + + Returns: + An iterable over `num_samples` random elements of the buffer. + + Raises: + ValueError: If there are less than `num_samples` elements in the buffer + """ + if len(self._data) < num_samples: + raise ValueError('{} elements could not be sampled from size {}'.format( + num_samples, len(self._data))) + return random.sample(self._data, num_samples) + + def clear(self): + self._data = [] + self._add_calls = 0 + + def __len__(self): + return len(self._data) + + def __iter__(self): + return iter(self._data) + + @property + def data(self): + return self._data + + def shuffle_data(self): + random.shuffle(self._data) + + +class DeepCFRSolver(policy.Policy): + """Implements a solver for the Deep CFR Algorithm. + + See https://arxiv.org/abs/1811.00164. + + Define all networks and sampling buffers/memories. Derive losses & learning + steps. Initialize the game state and algorithmic variables. + """ + + def __init__(self, + game, + policy_network_layers=(256, 256), + advantage_network_layers=(128, 128), + num_iterations: int = 100, + num_traversals: int = 100, + learning_rate: float = 1e-3, + batch_size_advantage: int = 2048, + batch_size_strategy: int = 2048, + memory_capacity: int = int(1e6), + policy_network_train_steps: int = 5000, + advantage_network_train_steps: int = 750, + reinitialize_advantage_networks: bool = True): + """Initialize the Deep CFR algorithm. + + Args: + game: Open Spiel game. + policy_network_layers: (list[int]) Layer sizes of strategy net MLP. + advantage_network_layers: (list[int]) Layer sizes of advantage net MLP. + num_iterations: Number of iterations. + num_traversals: Number of traversals per iteration. + learning_rate: Learning rate. + batch_size_advantage: (int) Batch size to sample from advantage memories. + batch_size_strategy: (int) Batch size to sample from strategy memories. + memory_capacity: Number of samples that can be stored in memory. + policy_network_train_steps: Number of policy network training steps (one + policy training iteration at the end). + advantage_network_train_steps: Number of advantage network training steps + (per iteration). + reinitialize_advantage_networks: Whether to re-initialize the advantage + network before training on each iteration. + """ + all_players = list(range(game.num_players())) + super(DeepCFRSolver, self).__init__(game, all_players) + self._game = game + if game.get_type().dynamics == pyspiel.GameType.Dynamics.SIMULTANEOUS: + # `_traverse_game_tree` does not take into account this option. + raise ValueError('Simulatenous games are not supported.') + self._batch_size_advantage = batch_size_advantage + self._batch_size_strategy = batch_size_strategy + self._policy_network_train_steps = policy_network_train_steps + self._advantage_network_train_steps = advantage_network_train_steps + self._policy_network_layers = policy_network_layers + self._advantage_network_layers = advantage_network_layers + self._num_players = game.num_players() + self._root_node = self._game.new_initial_state() + self._embedding_size = len(self._root_node.information_state_tensor(0)) + self._num_iterations = num_iterations + self._num_traversals = num_traversals + self._reinitialize_advantage_networks = reinitialize_advantage_networks + self._num_actions = game.num_distinct_actions() + self._iteration = 1 + self._learning_rate = learning_rate + self._rngkey = jax.random.PRNGKey(42) + + # Initialize networks + def base_network(x, layers): + x = hk.nets.MLP(layers[:-1], activate_final=True)(x) + x = hk.LayerNorm(axis=-1, create_scale=True, create_offset=True)(x) + x = hk.Linear(layers[-1])(x) + x = jax.nn.relu(x) + x = hk.Linear(self._num_actions)(x) + return x + + def adv_network(x, mask): + x = base_network(x, advantage_network_layers) + x = mask * x + return x + + def policy_network(x, mask): + x = base_network(x, policy_network_layers) + x = jnp.where(mask == 1, x, -10e20) + x = jax.nn.softmax(x) + return x + + x, mask = (jnp.ones([1, self._embedding_size]), + jnp.ones([1, self._num_actions])) + self._hk_adv_network = hk.without_apply_rng(hk.transform(adv_network)) + self._params_adv_network = [ + self._hk_adv_network.init(self._next_rng_key(), x, mask) + for _ in range(self._num_players) + ] + self._hk_policy_network = hk.without_apply_rng(hk.transform(policy_network)) + self._params_policy_network = self._hk_policy_network.init( + self._next_rng_key(), x, mask) + + # initialize losses and grads + self._adv_loss = optax.l2_loss + self._policy_loss = optax.l2_loss + self._adv_grads = jax.value_and_grad(self._loss_adv) + self._policy_grads = jax.value_and_grad(self._loss_policy) + + # initialize optimizers + self._opt_adv_init, self._opt_adv_update = optax.adam(learning_rate) + self._opt_adv_state = [ + self._opt_adv_init(params) for params in self._params_adv_network + ] + self._opt_policy_init, self._opt_policy_update = optax.adam(learning_rate) + self._opt_policy_state = self._opt_policy_init(self._params_policy_network) + + # initialize memories + self._create_memories(memory_capacity) + + # jit param updates and matched regrets calculations + self._jitted_matched_regrets = self._get_jitted_matched_regrets() + self._jitted_adv_update = self._get_jitted_adv_update() + self._jitted_policy_update = self._get_jitted_policy_update() + + def _get_jitted_adv_update(self): + """get jitted advantage update function.""" + + @jax.jit + def update(params_adv, opt_state, info_states, samp_regrets, iterations, + masks, total_iterations): + main_loss, grads = self._adv_grads(params_adv, info_states, samp_regrets, + iterations, masks, total_iterations) + updates, new_opt_state = self._opt_adv_update(grads, opt_state) + new_params = optax.apply_updates(params_adv, updates) + return new_params, new_opt_state, main_loss + + return update + + def _get_jitted_policy_update(self): + """get jitted policy update function.""" + + @jax.jit + def update(params_policy, opt_state, info_states, action_probs, iterations, + masks, total_iterations): + main_loss, grads = self._policy_grads(params_policy, info_states, + action_probs, iterations, masks, + total_iterations) + updates, new_opt_state = self._opt_policy_update(grads, opt_state) + new_params = optax.apply_updates(params_policy, updates) + return new_params, new_opt_state, main_loss + + return update + + def _get_jitted_matched_regrets(self): + """get jitted regret matching function.""" + + @jax.jit + def get_matched_regrets(info_state, legal_actions_mask, params_adv): + advs = self._hk_adv_network.apply(params_adv, info_state, + legal_actions_mask) + advantages = jnp.maximum(advs, 0) + summed_regret = jnp.sum(advantages) + matched_regrets = jax.lax.cond( + summed_regret > 0, lambda _: advantages / summed_regret, + lambda _: jax.nn.one_hot( # pylint: disable=g-long-lambda + jnp.argmax(jnp.where(legal_actions_mask == 1, advs, -10e20)), self + ._num_actions), None) + return advantages, matched_regrets + + return get_matched_regrets + + def _next_rng_key(self): + """Get the next rng subkey from class rngkey.""" + self._rngkey, subkey = jax.random.split(self._rngkey) + return subkey + + def _reinitialize_policy_network(self): + """Reinitalize policy network and optimizer for training.""" + x, mask = (jnp.ones([1, self._embedding_size]), + jnp.ones([1, self._num_actions])) + self._params_policy_network = self._hk_policy_network.init( + self._next_rng_key(), x, mask) + self._opt_policy_state = self._opt_policy_init(self._params_policy_network) + + def _reinitialize_advantage_network(self, player): + """Reinitalize player's advantage network and optimizer for training.""" + x, mask = (jnp.ones([1, self._embedding_size]), + jnp.ones([1, self._num_actions])) + self._params_adv_network[player] = self._hk_adv_network.init( + self._next_rng_key(), x, mask) + self._opt_adv_state[player] = self._opt_adv_init( + self._params_adv_network[player]) + + @property + def advantage_buffers(self): + return self._advantage_memories + + @property + def strategy_buffer(self): + return self._strategy_memories + + def clear_advantage_buffers(self): + for p in range(self._num_players): + self._advantage_memories[p].clear() + + def _create_memories(self, memory_capacity): + """Create memory buffers and associated feature descriptions.""" + self._strategy_memories = ReservoirBuffer(memory_capacity) + self._advantage_memories = [ + ReservoirBuffer(memory_capacity) for _ in range(self._num_players) + ] + self._strategy_feature_description = { + 'info_state': tf.io.FixedLenFeature([self._embedding_size], tf.float32), + 'action_probs': tf.io.FixedLenFeature([self._num_actions], tf.float32), + 'iteration': tf.io.FixedLenFeature([1], tf.float32), + 'legal_actions': tf.io.FixedLenFeature([self._num_actions], tf.float32) + } + self._advantage_feature_description = { + 'info_state': tf.io.FixedLenFeature([self._embedding_size], tf.float32), + 'iteration': tf.io.FixedLenFeature([1], tf.float32), + 'samp_regret': tf.io.FixedLenFeature([self._num_actions], tf.float32), + 'legal_actions': tf.io.FixedLenFeature([self._num_actions], tf.float32) + } + + def solve(self): + """Solution logic for Deep CFR.""" + advantage_losses = collections.defaultdict(list) + for _ in range(self._num_iterations): + for p in range(self._num_players): + for _ in range(self._num_traversals): + self._traverse_game_tree(self._root_node, p) + if self._reinitialize_advantage_networks: + # Re-initialize advantage network for p and train from scratch. + self._reinitialize_advantage_network(p) + advantage_losses[p].append(self._learn_advantage_network(p)) + self._iteration += 1 + # Train policy network. + policy_loss = self._learn_strategy_network() + return None, advantage_losses, policy_loss + + def _serialize_advantage_memory(self, info_state, iteration, samp_regret, + legal_actions_mask): + """Create serialized example to store an advantage entry.""" + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'info_state': + tf.train.Feature( + float_list=tf.train.FloatList(value=info_state)), + 'iteration': + tf.train.Feature( + float_list=tf.train.FloatList(value=[iteration])), + 'samp_regret': + tf.train.Feature( + float_list=tf.train.FloatList(value=samp_regret)), + 'legal_actions': + tf.train.Feature( + float_list=tf.train.FloatList(value=legal_actions_mask)) + })) + return example.SerializeToString() + + def _deserialize_advantage_memory(self, serialized): + """Deserializes a batch of advantage examples for the train step.""" + tups = tf.io.parse_example(serialized, self._advantage_feature_description) + return (tups['info_state'], tups['samp_regret'], tups['iteration'], + tups['legal_actions']) + + def _serialize_strategy_memory(self, info_state, iteration, + strategy_action_probs, legal_actions_mask): + """Create serialized example to store a strategy entry.""" + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'info_state': + tf.train.Feature( + float_list=tf.train.FloatList(value=info_state)), + 'action_probs': + tf.train.Feature( + float_list=tf.train.FloatList( + value=strategy_action_probs)), + 'iteration': + tf.train.Feature( + float_list=tf.train.FloatList(value=[iteration])), + 'legal_actions': + tf.train.Feature( + float_list=tf.train.FloatList(value=legal_actions_mask)) + })) + return example.SerializeToString() + + def _deserialize_strategy_memory(self, serialized): + """Deserializes a batch of strategy examples for the train step.""" + tups = tf.io.parse_example(serialized, self._strategy_feature_description) + return (tups['info_state'], tups['action_probs'], tups['iteration'], + tups['legal_actions']) + + def _add_to_strategy_memory(self, info_state, iteration, + strategy_action_probs, legal_actions_mask): + # pylint: disable=g-doc-args + """Adds the given strategy data to the memory. + + Uses either a tfrecordsfile on disk if provided, or a reservoir buffer. + """ + serialized_example = self._serialize_strategy_memory( + info_state, iteration, strategy_action_probs, legal_actions_mask) + self._strategy_memories.add(serialized_example) + + def _traverse_game_tree(self, state, player): + """Performs a traversal of the game tree using external sampling. + + Over a traversal the advantage and strategy memories are populated with + computed advantage values and matched regrets respectively. + + Args: + state: Current OpenSpiel game state. + player: (int) Player index for this traversal. + + Returns: + Recursively returns expected payoffs for each action. + """ + if state.is_terminal(): + # Terminal state get returns. + return state.returns()[player] + elif state.is_chance_node(): + # If this is a chance node, sample an action + chance_outcome, chance_proba = zip(*state.chance_outcomes()) + action = np.random.choice(chance_outcome, p=chance_proba) + return self._traverse_game_tree(state.child(action), player) + elif state.current_player() == player: + # Update the policy over the info set & actions via regret matching. + _, strategy = self._sample_action_from_advantage(state, player) + strategy = np.array(strategy) + exp_payoff = 0 * strategy + for action in state.legal_actions(): + exp_payoff[action] = self._traverse_game_tree( + state.child(action), player) + ev = np.sum(exp_payoff * strategy) + samp_regret = (exp_payoff - ev) * state.legal_actions_mask(player) + self._advantage_memories[player].add( + self._serialize_advantage_memory(state.information_state_tensor(), + self._iteration, samp_regret, + state.legal_actions_mask(player))) + return ev + else: + other_player = state.current_player() + _, strategy = self._sample_action_from_advantage(state, other_player) + # Recompute distribution for numerical errors. + probs = np.array(strategy) + probs /= probs.sum() + sampled_action = np.random.choice(range(self._num_actions), p=probs) + self._add_to_strategy_memory( + state.information_state_tensor(other_player), self._iteration, probs, + state.legal_actions_mask(other_player)) + return self._traverse_game_tree(state.child(sampled_action), player) + + def _sample_action_from_advantage(self, state, player): + """Returns an info state policy by applying regret-matching. + + Args: + state: Current OpenSpiel game state. + player: (int) Player index over which to compute regrets. + + Returns: + 1. (np-array) Advantage values for info state actions indexed by action. + 2. (np-array) Matched regrets, prob for actions indexed by action. + """ + info_state = jnp.array( + state.information_state_tensor(player), dtype=jnp.float32) + legal_actions_mask = jnp.array( + state.legal_actions_mask(player), dtype=jnp.float32) + advantages, matched_regrets = self._jitted_matched_regrets( + info_state, legal_actions_mask, self._params_adv_network[player]) + return advantages, matched_regrets + + def action_probabilities(self, state, player_id=None): + """Returns action probabilities dict for a single batch.""" + del player_id # unused + cur_player = state.current_player() + legal_actions = state.legal_actions(cur_player) + info_state_vector = jnp.array( + state.information_state_tensor(), dtype=jnp.float32) + legal_actions_mask = jnp.array( + state.legal_actions_mask(cur_player), dtype=jnp.float32) + probs = self._hk_policy_network.apply(self._params_policy_network, + info_state_vector, legal_actions_mask) + return {action: probs[action] for action in legal_actions} + + def _get_advantage_dataset(self, player, nr_steps=1): + """Returns the collected regrets for the given player as a dataset.""" + self._advantage_memories[player].shuffle_data() + data = tf.data.Dataset.from_tensor_slices( + self._advantage_memories[player].data) + data = data.repeat() + data = data.shuffle(ADVANTAGE_TRAIN_SHUFFLE_SIZE) + data = data.batch(self._batch_size_advantage) + data = data.map(self._deserialize_advantage_memory) + data = data.prefetch(tf.data.experimental.AUTOTUNE) + data = data.take(nr_steps) + return iter(tfds.as_numpy(data)) + + def _get_strategy_dataset(self, nr_steps=1): + """Returns the collected strategy memories as a dataset.""" + self._strategy_memories.shuffle_data() + data = tf.data.Dataset.from_tensor_slices(self._strategy_memories.data) + data = data.repeat() + data = data.shuffle(STRATEGY_TRAIN_SHUFFLE_SIZE) + data = data.batch(self._batch_size_strategy) + data = data.map(self._deserialize_strategy_memory) + data = data.prefetch(tf.data.experimental.AUTOTUNE) + data = data.take(nr_steps) + return iter(tfds.as_numpy(data)) + + def _loss_adv(self, params_adv, info_states, samp_regrets, iterations, masks, + total_iterations): + """Loss function for our advantage network.""" + preds = self._hk_adv_network.apply(params_adv, info_states, masks) + loss_values = jnp.mean(self._adv_loss(preds, samp_regrets), axis=-1) + loss_values = loss_values * iterations * 2 / total_iterations + return jnp.mean(loss_values) + + def _learn_advantage_network(self, player): + """Compute the loss on sampled transitions and perform a Q-network update. + + If there are not enough elements in the buffer, no loss is computed and + `None` is returned instead. + + Args: + player: (int) player index. + + Returns: + The average loss over the advantage network of the last batch. + """ + for data in self._get_advantage_dataset( + player, self._advantage_network_train_steps): + (self._params_adv_network[player], self._opt_adv_state[player], + main_loss) = self._jitted_adv_update(self._params_adv_network[player], + self._opt_adv_state[player], + *data, jnp.array(self._iteration)) + + return main_loss + + def _loss_policy(self, params_policy, info_states, action_probs, iterations, + masks, total_iterations): + """Loss function for our policy network.""" + preds = self._hk_policy_network.apply(params_policy, info_states, masks) + loss_values = jnp.mean(self._policy_loss(preds, action_probs), axis=-1) + loss_values = loss_values * iterations * 2 / total_iterations + return jnp.mean(loss_values) + + def _learn_strategy_network(self): + """Compute the loss over the strategy network. + + Returns: + The average loss obtained on the last training batch of transitions + or `None`. + """ + for data in self._get_strategy_dataset(self._policy_network_train_steps): + (self._params_policy_network, self._opt_policy_state, + main_loss) = self._jitted_policy_update(self._params_policy_network, + self._opt_policy_state, + *data, self._iteration) + + return main_loss diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/jax/deep_cfr_jax_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/jax/deep_cfr_jax_test.py new file mode 100644 index 0000000..638e06f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/jax/deep_cfr_jax_test.py @@ -0,0 +1,66 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.jax.deep_cfr.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python import policy +from open_spiel.python.algorithms import exploitability +from open_spiel.python.jax import deep_cfr +import pyspiel + + +class DeepCFRTest(parameterized.TestCase): + + @parameterized.parameters('leduc_poker', 'kuhn_poker', 'liars_dice') + def test_deep_cfr_runs(self, game_name): + game = pyspiel.load_game(game_name) + deep_cfr_solver = deep_cfr.DeepCFRSolver( + game, + policy_network_layers=(8, 4), + advantage_network_layers=(4, 2), + num_iterations=2, + num_traversals=2, + learning_rate=1e-3, + batch_size_advantage=8, + batch_size_strategy=8, + memory_capacity=1e7) + deep_cfr_solver.solve() + + def test_matching_pennies_3p(self): + # We don't expect Deep CFR to necessarily converge on 3-player games but + # it's nonetheless interesting to see this result. + game = pyspiel.load_game_as_turn_based('matching_pennies_3p') + deep_cfr_solver = deep_cfr.DeepCFRSolver( + game, + policy_network_layers=(16, 8), + advantage_network_layers=(32, 16), + num_iterations=2, + num_traversals=2, + learning_rate=1e-3, + batch_size_advantage=8, + batch_size_strategy=8, + memory_capacity=1e7) + deep_cfr_solver.solve() + conv = exploitability.nash_conv( + game, + policy.tabular_policy_from_callable( + game, deep_cfr_solver.action_probabilities)) + print('Deep CFR in Matching Pennies 3p. NashConv: {}'.format(conv)) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/jax/dqn.py b/scenarios/bargaining/open_spiel/open_spiel/python/jax/dqn.py new file mode 100644 index 0000000..add4fb2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/jax/dqn.py @@ -0,0 +1,361 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""DQN agent implemented in JAX.""" + +import collections + +import haiku as hk +import jax +import jax.numpy as jnp +import numpy as np +import optax +import rlax + +from open_spiel.python import rl_agent +from open_spiel.python.utils.replay_buffer import ReplayBuffer + +Transition = collections.namedtuple( + "Transition", + "info_state action reward next_info_state is_final_step legal_actions_mask") + +# Penalty for illegal actions in action selection. In epsilon-greedy, this will +# prevent them from being selected. +ILLEGAL_ACTION_LOGITS_PENALTY = -1e9 + + +class DQN(rl_agent.AbstractAgent): + """DQN Agent implementation in JAX.""" + + def __init__(self, + player_id, + state_representation_size, + num_actions, + hidden_layers_sizes=128, + replay_buffer_capacity=10000, + batch_size=128, + replay_buffer_class=ReplayBuffer, + learning_rate=0.01, + update_target_network_every=1000, + learn_every=10, + discount_factor=1.0, + min_buffer_size_to_learn=1000, + epsilon_start=1.0, + epsilon_end=0.1, + epsilon_decay_duration=int(1e6), + optimizer_str="sgd", + loss_str="mse", + huber_loss_parameter=1.0, + seed=42, + gradient_clipping=None): + """Initialize the DQN agent.""" + + # This call to locals() is used to store every argument used to initialize + # the class instance, so it can be copied with no hyperparameter change. + self._kwargs = locals() + + self.player_id = player_id + self._num_actions = num_actions + if isinstance(hidden_layers_sizes, int): + hidden_layers_sizes = [hidden_layers_sizes] + self._layer_sizes = hidden_layers_sizes + self._batch_size = batch_size + self._update_target_network_every = update_target_network_every + self._learn_every = learn_every + self._min_buffer_size_to_learn = min_buffer_size_to_learn + self._discount_factor = discount_factor + self.huber_loss_parameter = huber_loss_parameter + + self._epsilon_start = epsilon_start + self._epsilon_end = epsilon_end + self._epsilon_decay_duration = epsilon_decay_duration + + # TODO(author6) Allow for optional replay buffer config. + if not isinstance(replay_buffer_capacity, int): + raise ValueError("Replay buffer capacity not an integer.") + self._replay_buffer = replay_buffer_class(replay_buffer_capacity) + self._prev_timestep = None + self._prev_action = None + + # Step counter to keep track of learning, eps decay and target network. + self._step_counter = 0 + + # Keep track of the last training loss achieved in an update step. + self._last_loss_value = None + + # Create the Q-network instances + + def network(x): + mlp = hk.nets.MLP(self._layer_sizes + [num_actions]) + return mlp(x) + + self.hk_network = hk.without_apply_rng(hk.transform(network)) + self.hk_network_apply = jax.jit(self.hk_network.apply) + + rng = jax.random.PRNGKey(seed) + self._create_networks(rng, state_representation_size) + + if loss_str == "mse": + self.loss_func = lambda x: jnp.mean(x**2) + elif loss_str == "huber": + # pylint: disable=g-long-lambda + self.loss_func = lambda x: jnp.mean( + rlax.huber_loss(x, self.huber_loss_parameter)) + else: + raise ValueError("Not implemented, choose from 'mse', 'huber'.") + + if optimizer_str == "adam": + optimizer = optax.adam(learning_rate) + elif optimizer_str == "sgd": + optimizer = optax.sgd(learning_rate) + else: + raise ValueError("Not implemented, choose from 'adam' and 'sgd'.") + + # Clipping the gradients prevent divergence and allow more stable training. + if gradient_clipping: + optimizer = optax.chain(optimizer, + optax.clip_by_global_norm(gradient_clipping)) + + opt_init, opt_update = optimizer.init, optimizer.update + + self._opt_update_fn = self._get_update_func(opt_update) + self._opt_state = opt_init(self.params_q_network) + self._loss_and_grad = jax.value_and_grad(self._loss, has_aux=False) + self._jit_update = jax.jit(self.get_update()) + + def _create_networks(self, rng, state_representation_size): + """Called to create the networks.""" + x = jnp.ones([1, state_representation_size]) + self.params_q_network = self.hk_network.init(rng, x) + self.params_target_q_network = self.hk_network.init(rng, x) + + def _get_update_func(self, opt_update): + + def update(params, opt_state, gradient): + """Learning rule (stochastic gradient descent).""" + updates, opt_state = opt_update(gradient, opt_state) + new_params = optax.apply_updates(params, updates) + return new_params, opt_state + + return update + + def _get_action_probs(self, info_state, legal_actions, is_evaluation=False): + """Returns a selected action and the probabilities of legal actions.""" + epsilon = self._get_epsilon(is_evaluation) + return self._epsilon_greedy(info_state, legal_actions, epsilon) + + def step(self, time_step, is_evaluation=False, add_transition_record=True): + """Returns the action to be taken and updates the Q-network if needed. + + Args: + time_step: an instance of rl_environment.TimeStep. + is_evaluation: bool, whether this is a training or evaluation call. + add_transition_record: Whether to add to the replay buffer on this step. + + Returns: + A `rl_agent.StepOutput` containing the action probs and chosen action. + """ + + # Act step: don't act at terminal info states or if its not our turn. + if (not time_step.last()) and (time_step.is_simultaneous_move() or + self.player_id + == time_step.current_player()): + info_state = time_step.observations["info_state"][self.player_id] + legal_actions = time_step.observations["legal_actions"][self.player_id] + action, probs = self._get_action_probs( + info_state, legal_actions, is_evaluation=is_evaluation) + else: + action = None + probs = [] + + # Don't mess up with the state during evaluation. + if not is_evaluation: + self._step_counter += 1 + + if self._step_counter % self._learn_every == 0: + self._last_loss_value = self.learn() + + if self._step_counter % self._update_target_network_every == 0: + # state_dict method returns a dictionary containing a whole state of the + # module. + self.params_target_q_network = jax.tree_util.tree_map( + lambda x: x.copy(), self.params_q_network) + + if self._prev_timestep and add_transition_record: + # We may omit record adding here if it's done elsewhere. + self.add_transition(self._prev_timestep, self._prev_action, time_step) + + if time_step.last(): # prepare for the next episode. + self._prev_timestep = None + self._prev_action = None + return + else: + self._prev_timestep = time_step + self._prev_action = action + + return rl_agent.StepOutput(action=action, probs=probs) + + def add_transition(self, prev_time_step, prev_action, time_step): + """Adds the new transition using `time_step` to the replay buffer. + + Adds the transition from `self._prev_timestep` to `time_step` by + `self._prev_action`. + + Args: + prev_time_step: prev ts, an instance of rl_environment.TimeStep. + prev_action: int, action taken at `prev_time_step`. + time_step: current ts, an instance of rl_environment.TimeStep. + """ + assert prev_time_step is not None + legal_actions = (time_step.observations["legal_actions"][self.player_id]) + legal_actions_mask = np.zeros(self._num_actions) + legal_actions_mask[legal_actions] = 1.0 + transition = Transition( + info_state=( + prev_time_step.observations["info_state"][self.player_id][:]), + action=prev_action, + reward=time_step.rewards[self.player_id], + next_info_state=time_step.observations["info_state"][self.player_id][:], + is_final_step=float(time_step.last()), + legal_actions_mask=legal_actions_mask) + self._replay_buffer.add(transition) + + def _epsilon_greedy(self, info_state, legal_actions, epsilon): + """Returns a valid epsilon-greedy action and valid action probs. + + Action probabilities are given by a softmax over legal q-values. + + Args: + info_state: hashable representation of the information state. + legal_actions: list of legal actions at `info_state`. + epsilon: float, probability of taking an exploratory action. + + Returns: + A valid epsilon-greedy action and valid action probabilities. + """ + probs = np.zeros(self._num_actions) + legal_one_hot = np.zeros(self._num_actions) + legal_one_hot[legal_actions] = 1 + if np.random.rand() < epsilon: + action = np.random.choice(legal_actions) + probs[legal_actions] = 1.0 / len(legal_actions) + else: + info_state = np.reshape(info_state, [1, -1]) + q_values = self.hk_network_apply(self.params_q_network, info_state) + legal_q_values = q_values[0] + ( + 1 - legal_one_hot) * ILLEGAL_ACTION_LOGITS_PENALTY + action = int(np.argmax(legal_q_values)) + probs[action] = 1.0 + return action, probs + + def _get_epsilon(self, is_evaluation, power=1.0): + """Returns the evaluation or decayed epsilon value.""" + if is_evaluation: + return 0.0 + decay_steps = min(self._step_counter, self._epsilon_decay_duration) + decayed_epsilon = ( + self._epsilon_end + (self._epsilon_start - self._epsilon_end) * + (1 - decay_steps / self._epsilon_decay_duration)**power) + return decayed_epsilon + + def _loss(self, param, param_target, info_states, actions, rewards, + next_info_states, are_final_steps, legal_actions_mask): + + q_values = self.hk_network.apply(param, info_states) + target_q_values = self.hk_network.apply(param_target, next_info_states) + # Sum a large negative constant to illegal action logits before taking the + # max. This prevents illegal action values from being considered as target. + max_next_q = jnp.max( + target_q_values + + (1 - legal_actions_mask) * ILLEGAL_ACTION_LOGITS_PENALTY, + axis=-1) + max_next_q = jax.numpy.where( + 1 - are_final_steps, max_next_q, jnp.zeros_like(max_next_q)) + target = ( + rewards + (1 - are_final_steps) * self._discount_factor * max_next_q) + target = jax.lax.stop_gradient(target) + predictions = jnp.sum(q_values * actions, axis=-1) + loss_value = self.loss_func(predictions - target) + return loss_value + + def get_update(self): + + def update(param, param_target, opt_state, info_states, actions, rewards, + next_info_states, are_final_steps, legal_actions_mask): + loss_val, grad_val = self._loss_and_grad(param, param_target, info_states, + actions, rewards, + next_info_states, + are_final_steps, + legal_actions_mask) + new_param, new_opt_state = self._opt_update_fn(param, opt_state, grad_val) + return new_param, new_opt_state, loss_val + + return update + + def _to_one_hot(self, a): + a_one_hot = np.zeros(self._num_actions) + a_one_hot[a] = 1.0 + return a_one_hot + + def learn(self): + """Compute the loss on sampled transitions and perform a Q-network update. + + If there are not enough elements in the buffer, no loss is computed and + `None` is returned instead. + + Returns: + The average loss obtained on this batch of transitions or `None`. + """ + + if (len(self._replay_buffer) < self._batch_size or + len(self._replay_buffer) < self._min_buffer_size_to_learn): + return None + + transitions = self._replay_buffer.sample(self._batch_size) + info_states = np.asarray([t.info_state for t in transitions]) + actions = np.asarray([self._to_one_hot(t.action) for t in transitions]) + rewards = np.asarray([t.reward for t in transitions]) + next_info_states = np.asarray([t.next_info_state for t in transitions]) + are_final_steps = np.asarray([t.is_final_step for t in transitions]) + legal_actions_mask = np.asarray([t.legal_actions_mask for t in transitions]) + + self.params_q_network, self._opt_state, loss_val = self._jit_update( + self.params_q_network, self.params_target_q_network, self._opt_state, + info_states, actions, rewards, next_info_states, are_final_steps, + legal_actions_mask) + + return loss_val + + @property + def q_values(self): + return self._q_values + + @property + def replay_buffer(self): + return self._replay_buffer + + @property + def loss(self): + return self._last_loss_value + + @property + def prev_timestep(self): + return self._prev_timestep + + @property + def prev_action(self): + return self._prev_action + + @property + def step_counter(self): + return self._step_counter diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/jax/dqn_jax_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/jax/dqn_jax_test.py new file mode 100644 index 0000000..46696bb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/jax/dqn_jax_test.py @@ -0,0 +1,123 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.jax.dqn.""" + +from absl.testing import absltest + +from open_spiel.python import rl_environment +from open_spiel.python.jax import dqn +import pyspiel + +# A simple two-action game encoded as an EFG game. Going left gets -1, going +# right gets a +1. +SIMPLE_EFG_DATA = """ + EFG 2 R "Simple single-agent problem" { "Player 1" } "" + p "ROOT" 1 1 "ROOT" { "L" "R" } 0 + t "L" 1 "Outcome L" { -1.0 } + t "R" 2 "Outcome R" { 1.0 } +""" + + +class DQNTest(absltest.TestCase): + + def test_simple_game(self): + game = pyspiel.load_efg_game(SIMPLE_EFG_DATA) + env = rl_environment.Environment(game=game) + agent = dqn.DQN(0, + state_representation_size= + game.information_state_tensor_shape()[0], + num_actions=game.num_distinct_actions(), + hidden_layers_sizes=[16], + replay_buffer_capacity=100, + batch_size=5, + epsilon_start=0.02, + epsilon_end=0.01, + gradient_clipping=1.0) + total_reward = 0 + + for _ in range(100): + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step) + time_step = env.step([agent_output.action]) + total_reward += time_step.rewards[0] + agent.step(time_step) + self.assertGreaterEqual(total_reward, -100) + + def test_run_tic_tac_toe(self): + env = rl_environment.Environment("tic_tac_toe") + state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + agents = [ + dqn.DQN( # pylint: disable=g-complex-comprehension + player_id, + state_representation_size=state_size, + num_actions=num_actions, + hidden_layers_sizes=[16], + replay_buffer_capacity=10, + batch_size=5) for player_id in [0, 1] + ] + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + current_agent = agents[current_player] + agent_output = current_agent.step(time_step) + time_step = env.step([agent_output.action]) + + for agent in agents: + agent.step(time_step) + + def test_run_hanabi(self): + # Hanabi is an optional game, so check we have it before running the test. + game = "hanabi" + if game not in pyspiel.registered_names(): + return + + num_players = 3 + env_configs = { + "players": num_players, + "max_life_tokens": 1, + "colors": 2, + "ranks": 3, + "hand_size": 2, + "max_information_tokens": 3, + "discount": 0. + } + env = rl_environment.Environment(game, **env_configs) + state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + agents = [ + dqn.DQN( # pylint: disable=g-complex-comprehension + player_id, + state_representation_size=state_size, + num_actions=num_actions, + hidden_layers_sizes=[16], + replay_buffer_capacity=10, + batch_size=5) for player_id in range(num_players) + ] + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + agent_output = [agent.step(time_step) for agent in agents] + time_step = env.step([agent_output[current_player].action]) + + for agent in agents: + agent.step(time_step) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/jax/nfsp.py b/scenarios/bargaining/open_spiel/open_spiel/python/jax/nfsp.py new file mode 100644 index 0000000..1ef7bd5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/jax/nfsp.py @@ -0,0 +1,322 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Neural Fictitious Self-Play (NFSP) agent implemented in Jax. + +The code is around 4x slower than the TF implementation at the moment. Future +PRs improving the runtime are welcome. + +See the paper https://arxiv.org/abs/1603.01121 for more details. +""" + +import collections +import contextlib +import enum +import os + +import haiku as hk +import jax +import jax.numpy as jnp +import numpy as np +import optax + +from open_spiel.python import rl_agent +from open_spiel.python.jax import dqn +from open_spiel.python.utils.reservoir_buffer import ReservoirBuffer + +Transition = collections.namedtuple( + "Transition", "info_state action_probs legal_actions_mask") + +MODE = enum.Enum("mode", "best_response average_policy") + + +class NFSP(rl_agent.AbstractAgent): + """NFSP Agent implementation in JAX. + + See open_spiel/python/examples/kuhn_nfsp.py for an usage example. + """ + + def __init__(self, + player_id, + state_representation_size, + num_actions, + hidden_layers_sizes, + reservoir_buffer_capacity, + anticipatory_param, + batch_size=128, + rl_learning_rate=0.01, + sl_learning_rate=0.01, + min_buffer_size_to_learn=1000, + learn_every=64, + optimizer_str="sgd", + **kwargs): + """Initialize the `NFSP` agent.""" + self.player_id = player_id + self._num_actions = num_actions + self._layer_sizes = hidden_layers_sizes + self._batch_size = batch_size + self._learn_every = learn_every + self._anticipatory_param = anticipatory_param + self._min_buffer_size_to_learn = min_buffer_size_to_learn + + self._reservoir_buffer = ReservoirBuffer(reservoir_buffer_capacity) + self._prev_timestep = None + self._prev_action = None + + # Step counter to keep track of learning. + self._step_counter = 0 + + # Inner RL agent + kwargs.update({ + "batch_size": batch_size, + "learning_rate": rl_learning_rate, + "learn_every": learn_every, + "min_buffer_size_to_learn": min_buffer_size_to_learn, + "optimizer_str": optimizer_str, + }) + self._rl_agent = dqn.DQN(player_id, state_representation_size, + num_actions, hidden_layers_sizes, **kwargs) + + # Keep track of the last training loss achieved in an update step. + self._last_rl_loss_value = lambda: self._rl_agent.loss + self._last_sl_loss_value = None + + # Average policy network. + def network(x): + mlp = hk.nets.MLP(self._layer_sizes + [num_actions]) + return mlp(x) + + self.hk_avg_network = hk.without_apply_rng(hk.transform(network)) + + def avg_network_policy(param, info_state): + action_values = self.hk_avg_network.apply(param, info_state) + action_probs = jax.nn.softmax(action_values, axis=1) + return action_values, action_probs + + self._avg_network_policy = jax.jit(avg_network_policy) + + rng = jax.random.PRNGKey(42) + x = jnp.ones([1, state_representation_size]) + self.params_avg_network = self.hk_avg_network.init(rng, x) + self.params_avg_network = jax.device_put(self.params_avg_network) + + self._savers = [ + ("q_network", self._rl_agent.params_q_network), + ("avg_network", self.params_avg_network) + ] + + if optimizer_str == "adam": + opt_init, opt_update = optax.chain( + optax.scale_by_adam(b1=0.9, b2=0.999, eps=1e-8), + optax.scale(sl_learning_rate)) + elif optimizer_str == "sgd": + opt_init, opt_update = optax.sgd(sl_learning_rate) + else: + raise ValueError("Not implemented. Choose from ['adam', 'sgd'].") + self._opt_update_fn = self._get_update_func(opt_update) + self._opt_state = opt_init(self.params_avg_network) + self._loss_and_grad = jax.value_and_grad(self._loss_avg, has_aux=False) + + self._sample_episode_policy() + self._jit_update = jax.jit(self.get_update()) + + def _get_update_func(self, opt_update): + + def update(params, opt_state, gradient): + """Learning rule (stochastic gradient descent).""" + updates, opt_state = opt_update(gradient, opt_state) + new_params = optax.apply_updates(params, updates) + return new_params, opt_state + + return update + + def get_step_counter(self): + return self._step_counter + + @contextlib.contextmanager + def temp_mode_as(self, mode): + """Context manager to temporarily overwrite the mode.""" + previous_mode = self._mode + self._mode = mode + yield + self._mode = previous_mode + + def _sample_episode_policy(self): + if np.random.rand() < self._anticipatory_param: + self._mode = MODE.best_response + else: + self._mode = MODE.average_policy + + def _act(self, info_state, legal_actions): + info_state = np.reshape(info_state, [1, -1]) + action_values, action_probs = self._avg_network_policy( + self.params_avg_network, info_state + ) + + self._last_action_values = action_values[0] + # Remove illegal actions, normalize probs + probs = np.zeros(self._num_actions) + action_probs = np.asarray(action_probs) + probs[legal_actions] = action_probs[0][legal_actions] + probs /= sum(probs) + action = np.random.choice(len(probs), p=probs) + return action, probs + + @property + def mode(self): + return self._mode + + @property + def loss(self): + return (self._last_sl_loss_value, self._last_rl_loss_value()) + + def step(self, time_step, is_evaluation=False): + """Returns the action to be taken and updates the Q-networks if needed. + + Args: + time_step: an instance of rl_environment.TimeStep. + is_evaluation: bool, whether this is a training or evaluation call. + + Returns: + A `rl_agent.StepOutput` containing the action probs and chosen action. + """ + if self._mode == MODE.best_response: + agent_output = self._rl_agent.step(time_step, is_evaluation) + if not is_evaluation and not time_step.last(): + self._add_transition(time_step, agent_output) + + elif self._mode == MODE.average_policy: + # Act step: don't act at terminal info states. + if not time_step.last(): + info_state = time_step.observations["info_state"][self.player_id] + legal_actions = time_step.observations["legal_actions"][self.player_id] + action, probs = self._act(info_state, legal_actions) + agent_output = rl_agent.StepOutput(action=action, probs=probs) + + if self._prev_timestep and not is_evaluation: + self._rl_agent.add_transition(self._prev_timestep, self._prev_action, + time_step) + else: + raise ValueError("Invalid mode ({})".format(self._mode)) + + if not is_evaluation: + self._step_counter += 1 + + if self._step_counter % self._learn_every == 0: + self._last_sl_loss_value = self._learn() + # If learn step not triggered by rl policy, learn. + if self._mode == MODE.average_policy: + self._rl_agent.learn() + + # Prepare for the next episode. + if time_step.last(): + self._sample_episode_policy() + self._prev_timestep = None + self._prev_action = None + return + else: + self._prev_timestep = time_step + self._prev_action = agent_output.action + return agent_output + + def _add_transition(self, time_step, agent_output): + """Adds the new transition using `time_step` to the reservoir buffer. + + Transitions are in the form (time_step, agent_output.probs, legal_mask). + + Args: + time_step: an instance of rl_environment.TimeStep. + agent_output: an instance of rl_agent.StepOutput. + """ + legal_actions = time_step.observations["legal_actions"][self.player_id] + legal_actions_mask = np.zeros(self._num_actions) + legal_actions_mask[legal_actions] = 1.0 + transition = Transition( + info_state=(time_step.observations["info_state"][self.player_id][:]), + action_probs=agent_output.probs, + legal_actions_mask=legal_actions_mask) + self._reservoir_buffer.add(transition) + + def _loss_avg(self, param_avg, info_states, action_probs): + avg_logit = self.hk_avg_network.apply(param_avg, info_states) + loss_value = -jnp.sum( + action_probs * jax.nn.log_softmax(avg_logit)) / avg_logit.shape[0] + return loss_value + + def get_update(self): + def update(param_avg, opt_state_avg, info_states, action_probs): + loss_val, grad_val = self._loss_and_grad(param_avg, info_states, + action_probs) + new_param_avg, new_opt_state_avg = self._opt_update_fn( + param_avg, opt_state_avg, grad_val) + return new_param_avg, new_opt_state_avg, loss_val + return update + + def _learn(self): + """Compute the loss on sampled transitions and perform a avg-network update. + + If there are not enough elements in the buffer, no loss is computed and + `None` is returned instead. + + Returns: + The average loss obtained on this batch of transitions or `None`. + """ + if (len(self._reservoir_buffer) < self._batch_size or + len(self._reservoir_buffer) < self._min_buffer_size_to_learn): + return None + + transitions = self._reservoir_buffer.sample(self._batch_size) + info_states = np.asarray([t.info_state for t in transitions]) + action_probs = np.asarray([t.action_probs for t in transitions]) + + self.params_avg_network, self._opt_state, loss_val_avg = self._jit_update( + self.params_avg_network, self._opt_state, info_states, action_probs) + return loss_val_avg + + def _full_checkpoint_name(self, checkpoint_dir, name): + checkpoint_filename = "_".join([name, "pid" + str(self.player_id)]) + return os.path.join(checkpoint_dir, checkpoint_filename) + + def _latest_checkpoint_filename(self, name): + checkpoint_filename = "_".join([name, "pid" + str(self.player_id)]) + return checkpoint_filename + "_latest" + + def save(self, checkpoint_dir): + """Saves the average policy network and the inner RL agent's q-network. + + Note that this does not save the experience replay buffers and should + only be used to restore the agent's policy, not resume training. + + Args: + checkpoint_dir: directory where checkpoints will be saved. + """ + raise NotImplementedError + + def has_checkpoint(self, checkpoint_dir): + for name, _ in self._savers: + path = self._full_checkpoint_name(checkpoint_dir, name) + if os.path.exists(path): + return True + return False + + def restore(self, checkpoint_dir): + """Restores the average policy network and the inner RL agent's q-network. + + Note that this does not restore the experience replay buffers and should + only be used to restore the agent's policy, not resume training. + + Args: + checkpoint_dir: directory from which checkpoints will be restored. + """ + raise NotImplementedError diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/jax/nfsp_jax_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/jax/nfsp_jax_test.py new file mode 100644 index 0000000..d70be18 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/jax/nfsp_jax_test.py @@ -0,0 +1,87 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.nfsp.""" + +from absl.testing import absltest + +from open_spiel.python import rl_environment +from open_spiel.python.jax import nfsp + + +class NFSPTest(absltest.TestCase): + + def test_run_kuhn(self): + env = rl_environment.Environment("kuhn_poker") + state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + agents = [ + nfsp.NFSP( # pylint: disable=g-complex-comprehension + player_id, + state_representation_size=state_size, + num_actions=num_actions, + hidden_layers_sizes=[16], + reservoir_buffer_capacity=10, + anticipatory_param=0.1) for player_id in [0, 1] + ] + for unused_ep in range(10): + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + current_agent = agents[current_player] + agent_output = current_agent.step(time_step) + time_step = env.step([agent_output.action]) + for agent in agents: + agent.step(time_step) + + +class ReservoirBufferTest(absltest.TestCase): + + def test_reservoir_buffer_add(self): + # pylint: disable=g-generic-assert + reservoir_buffer = nfsp.ReservoirBuffer(reservoir_buffer_capacity=10) + self.assertEqual(len(reservoir_buffer), 0) + reservoir_buffer.add("entry1") + self.assertEqual(len(reservoir_buffer), 1) + reservoir_buffer.add("entry2") + self.assertEqual(len(reservoir_buffer), 2) + + self.assertIn("entry1", reservoir_buffer) + self.assertIn("entry2", reservoir_buffer) + + def test_reservoir_buffer_max_capacity(self): + # pylint: disable=g-generic-assert + reservoir_buffer = nfsp.ReservoirBuffer(reservoir_buffer_capacity=2) + reservoir_buffer.add("entry1") + reservoir_buffer.add("entry2") + reservoir_buffer.add("entry3") + + self.assertEqual(len(reservoir_buffer), 2) + + def test_reservoir_buffer_sample(self): + replay_buffer = nfsp.ReservoirBuffer(reservoir_buffer_capacity=3) + replay_buffer.add("entry1") + replay_buffer.add("entry2") + replay_buffer.add("entry3") + + samples = replay_buffer.sample(3) + + self.assertIn("entry1", samples) + self.assertIn("entry2", samples) + self.assertIn("entry3", samples) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/jax/opponent_shaping.py b/scenarios/bargaining/open_spiel/open_spiel/python/jax/opponent_shaping.py new file mode 100644 index 0000000..6910f9e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/jax/opponent_shaping.py @@ -0,0 +1,1075 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""JAX implementation of LOLA and LOLA-DiCE (Foerster et al. 2018). + +The DiCE implementation is also based on the pytorch implementation from +https://github.com/alexis-jacq/LOLA_DiCE by Alexis David Jacq. + +Both algorithm implementations, LOLA and LOLA-DiCE, currently support only +two-player simultaneous move games and assume access to the opponent's +actions (the observation field in the time step must contain a key +'actions' with the opponent's actions). +""" + +# pylint: disable=g-importing-member +# pylint: disable=g-bare-generic + +from copy import deepcopy +from functools import partial +import typing + +import chex +import distrax +import haiku as hk +import jax +from jax import grad +from jax import vmap +import jax.numpy as jnp +import numpy as np +import optax +import rlax + +from open_spiel.python import rl_agent +from open_spiel.python import rl_environment +from open_spiel.python.rl_environment import TimeStep + + +@chex.dataclass +class TransitionBatch: # pylint: disable=too-few-public-methods + """A transition batch is a collection of transitions. + + Each item in the batch is a numpy array. + """ + + info_state: np.ndarray + action: np.ndarray + reward: np.ndarray + discount: np.ndarray = None + terminal: np.ndarray = None + legal_actions_mask: np.ndarray = None + values: np.ndarray = None + + +@chex.dataclass +class TrainState: # pylint: disable=too-few-public-methods + """TrainState class. + + The training state contains the parameters and optimizer states of the + policy and critic networks for each agent. The parameters are stored in a + dictionary with the agent id as key. + """ + + policy_params: typing.Dict[typing.Any, hk.Params] + policy_opt_states: typing.Dict[typing.Any, optax.OptState] + critic_params: typing.Dict[typing.Any, hk.Params] + critic_opt_states: typing.Dict[typing.Any, optax.OptState] + + +# A function that takes the current train state and a transition batch and +# returns the new train state and a dictionary of metrics. +UpdateFn = typing.Callable[ + [TrainState, TransitionBatch], typing.Tuple[TrainState, typing.Dict] +] + + +def get_minibatches( + batch: TransitionBatch, num_minibatches: int +) -> typing.Iterator[TransitionBatch]: + """Yields an iterator over minibatches of the given batch. + + Args: + batch: A transition batch. + num_minibatches: The number of minibatches to return. + + Yields: + An iterator over minibatches of the given batch. + """ + + def get_minibatch(x, start, end): + return x[:, start:end] if len(x.shape) > 2 else x + + for i in range(num_minibatches): + start, end = i * (batch.reward.shape[1] // num_minibatches), (i + 1) * ( + batch.reward.shape[1] // num_minibatches + ) + mini_batch = jax.tree_util.tree_map( + partial(get_minibatch, start=start, end=end), batch + ) + yield mini_batch + + +def get_critic_update_fn( + agent_id: int, + critic_network: hk.Transformed, + optimizer: optax.TransformUpdateFn, + num_minibatches: int = 8, + gamma: float = 0.99, +) -> UpdateFn: + """Returns the update function for the critic parameters. + + Args: + agent_id: The id of the agent that will be updated. + critic_network: A transformed haiku function. + optimizer: Optimizer update function. + num_minibatches: the number of minibatches. + gamma: the discount factor. + + Returns: + An update function that takes the current train state together with a + transition batch and returns the new train state and a dictionary of + metrics. + """ + + def loss_fn(params, batch: TransitionBatch): + info_states, rewards = batch.info_state[agent_id], batch.reward[agent_id] + discounts = jnp.ones_like(rewards) * gamma + values = critic_network.apply(params, info_states).squeeze() + v_t = values[:, :-1].reshape(-1) + v_tp1 = values[:, 1:].reshape(-1) + r_t = rewards[:, :-1].reshape(-1) + d_t = discounts[:, 1:].reshape(-1) + td_error = jax.lax.stop_gradient(r_t + d_t * v_tp1) - v_t + return jnp.mean(td_error**2) + + def update(train_state: TrainState, batch: TransitionBatch): + """The critic update function. + + Updates the critic parameters of the train state with the given + transition batch. + + Args: + train_state: The current train state. + batch: A transition batch. + + Returns: + The updated train state with the new critic params and a dictionary + with the critic loss + """ + losses = [] + critic_params = train_state.critic_params[agent_id] + opt_state = train_state.critic_opt_states[agent_id] + for mini_batch in get_minibatches(batch, num_minibatches): + loss, grads = jax.value_and_grad(loss_fn)(critic_params, mini_batch) + updates, opt_state = optimizer(grads, opt_state) + critic_params = optax.apply_updates(critic_params, updates) + losses.append(loss) + train_state = deepcopy(train_state) + state = TrainState( + policy_params=train_state.policy_params, + policy_opt_states=train_state.policy_opt_states, + critic_params={**train_state.critic_params, agent_id: critic_params}, + critic_opt_states={ + **train_state.critic_opt_states, + agent_id: opt_state, + }, + ) + return state, {'loss': jnp.mean(jnp.array(losses))} + + return update + + +def get_dice_update_fn( + agent_id: int, + rng: hk.PRNGSequence, + policy_network: hk.Transformed, + critic_network: hk.Transformed, + optimizer: optax.TransformUpdateFn, + opp_pi_lr: float, + env: rl_environment.Environment, + n_lookaheads: int = 1, + gamma: float = 0.99, +): + """Get the DiCE update function.""" + def magic_box(x): + return jnp.exp(x - jax.lax.stop_gradient(x)) + + @jax.jit + @partial(jax.vmap, in_axes=(None, 0, 0)) + def get_action(params, s, rng_key): + pi = policy_network.apply(params, s) + action = pi.sample(seed=rng_key) + return action + + def rollout(params, other_params): + states, rewards, actions = [], [], [] + step = env.reset() + batch_size = ( + step.observations['batch_size'] + if 'batch_size' in step.observations + else 1 + ) + while not step.last(): + obs = step.observations + s_1, s_2 = jnp.array(obs['info_state'][0]), jnp.array( + obs['info_state'][1] + ) + if batch_size == 1: + s_1, s_2 = s_1[None, :], s_2[None, :] + a_1 = get_action(params, s_1, jax.random.split(next(rng), num=batch_size)) + a_2 = get_action( + other_params, s_2, jax.random.split(next(rng), num=batch_size) + ) + a = jnp.stack([a_1, a_2], axis=1) + step = env.step(a.squeeze()) + r_1, r_2 = jnp.array(step.rewards[0]), jnp.array(step.rewards[1]) + if batch_size == 1: + r_1, r_2 = r_1[None], r_2[None] + actions.append(a.T) + states.append(jnp.stack([s_1, s_2], axis=0)) + rewards.append(jnp.stack([r_1, r_2], axis=0)) + return { + 'states': jnp.stack(states, axis=2), + 'rewards': jnp.stack(rewards, axis=2), + 'actions': jnp.stack(actions, axis=2), + } + + def dice_correction(train_state: TrainState): + """Computes the dice update for the given train state. + + Args: + train_state: The current train state. + + Returns: + The updated train state with the new policy params and metrics dict. + """ + + @jax.jit + def dice_objective(params, other_params, states, actions, rewards, values): + self_logprobs = vmap( + vmap(lambda s, a: policy_network.apply(params, s).log_prob(a)) + )(states[0], actions[0]) + other_logprobs = vmap( + vmap(lambda s, a: policy_network.apply(other_params, s).log_prob(a)) + )(states[1], actions[1]) + # apply discount: + cum_discount = jnp.cumprod(gamma * jnp.ones_like(rewards), axis=1) / gamma + discounted_rewards = rewards * cum_discount + discounted_values = values.squeeze() * cum_discount + + # stochastics nodes involved in rewards dependencies: + dependencies = jnp.cumsum(self_logprobs + other_logprobs, axis=1) + # logprob of each stochastic nodes: + stochastic_nodes = self_logprobs + other_logprobs + # dice objective: + dice_objective = jnp.mean( + jnp.sum(magic_box(dependencies) * discounted_rewards, axis=1) + ) + baseline_term = jnp.mean( + jnp.sum((1 - magic_box(stochastic_nodes)) * discounted_values, axis=1) + ) + dice_objective = dice_objective + baseline_term + return -dice_objective # want to minimize -objective + + def outer_update(params, opp_params, agent_id, opp_id): + other_theta = opp_params + for _ in range(n_lookaheads): + trajectories = rollout(other_theta, params) + other_grad = jax.grad(dice_objective)( + other_theta, + other_params=params, + states=trajectories['states'], + actions=trajectories['actions'], + rewards=trajectories['rewards'][0], + values=critic_network.apply( + train_state.critic_params[opp_id], trajectories['states'][0] + ), + ) + # Update the other player's policy: + other_theta = jax.tree_util.tree_map( + lambda param, grad: param - opp_pi_lr * grad, + other_theta, + other_grad, + ) + + trajectories = rollout(params, other_theta) + values = critic_network.apply( + train_state.critic_params[agent_id], trajectories['states'][0] + ) + loss = dice_objective( + params=params, + other_params=other_theta, + states=trajectories['states'], + actions=trajectories['actions'], + rewards=trajectories['rewards'][0], + values=values, + ) + return loss, {'loss': loss} + + opp = 1 - agent_id + grads, metrics = grad(outer_update, has_aux=True)( + train_state.policy_params[agent_id], + opp_params=train_state.policy_params[opp], + agent_id=agent_id, + opp_id=opp, + ) + return grads, metrics + + def update( + train_state: TrainState, batch: TransitionBatch + ) -> typing.Tuple[TrainState, typing.Dict]: + """Updates the policy parameters in train_state. + + If lola_weight > 0, the correction term according to Foerster et al. will be + applied. + + Args: + train_state: the agent's train state. + batch: a transition batch + + Returns: + A tuple (new_train_state, metrics) + """ + del batch + grads, metrics = dice_correction(train_state) + updates, opt_state = optimizer( + grads, train_state.policy_opt_states[agent_id] + ) + policy_params = optax.apply_updates( + train_state.policy_params[agent_id], updates + ) + train_state = TrainState( + policy_params={**train_state.policy_params, agent_id: policy_params}, + policy_opt_states={ + **train_state.policy_opt_states, + agent_id: opt_state, + }, + critic_params=deepcopy(train_state.critic_params), + critic_opt_states=deepcopy(train_state.critic_opt_states), + ) + return train_state, metrics + + return update + + +def get_lola_update_fn( + agent_id: int, + policy_network: hk.Transformed, + optimizer: optax.TransformUpdateFn, + pi_lr: float, + gamma: float = 0.99, + lola_weight: float = 1.0, +) -> UpdateFn: + """Get the LOLA update function. + + Returns a function that updates the policy parameters using the LOLA + correction formula. + + Args: + agent_id: the agent's id + policy_network: A haiku transformed policy network. + optimizer: An optax optimizer. + pi_lr: Policy learning rate. + gamma: Discount factor. + lola_weight: The LOLA correction weight to scale the correction term. + + Returns: + A UpdateFn function that updates the policy parameters. + """ + + def flat_params( + params, + ) -> typing.Tuple[ + typing.Dict[str, jnp.ndarray], typing.Dict[typing.Any, typing.Callable] + ]: + """Flattens the policy parameters. + + Flattens the parameters of the policy network into a single vector and + returns the unravel function. + + Args: + params: The policy parameters. + + Returns: + A tuple (flat_params, unravel_fn) + """ + flat_param_dict = { + agent_id: jax.flatten_util.ravel_pytree(p) + for agent_id, p in params.items() + } + + params = dict((k, flat_param_dict[k][0]) for k in flat_param_dict) + unravel_fns = dict((k, flat_param_dict[k][1]) for k in flat_param_dict) + return params, unravel_fns + + def lola_correction( + train_state: TrainState, batch: TransitionBatch + ) -> hk.Params: + """Computes the LOLA correction term. + + Args: + train_state: The agent's current train state. + batch: A transition batch. + + Returns: + The LOLA correction term. + """ + a_t, o_t, r_t, values = ( + batch.action, + batch.info_state, + batch.reward, + batch.values, + ) + params, unravel_fns = flat_params(train_state.policy_params) + + compute_returns = partial(rlax.lambda_returns, lambda_=0.0) + g_t = vmap(vmap(compute_returns))( + r_t=r_t, v_t=values, discount_t=jnp.full_like(r_t, gamma) + ) + g_t = (g_t - g_t.mean()) / (g_t.std() + 1e-8) + + def log_pi(params, i, a_t, o_t): + return policy_network.apply(unravel_fns[i](params), o_t).log_prob(a_t) + + opp_id = 1 - agent_id + + def cross_term(a_t, o_t, r_t): + """Computes the second order correction term of the LOLA update. + + Args: + a_t: actions of both players + o_t: observations of both players + r_t: rewards of both players + + Returns: + The second order correction term. + """ + grad_log_pi = vmap(jax.value_and_grad(log_pi), in_axes=(None, None, 0, 0)) + log_probs, grads = grad_log_pi( + params[agent_id], agent_id, a_t[agent_id], o_t[agent_id] + ) + opp_logrpobs, opp_grads = grad_log_pi( + params[opp_id], opp_id, a_t[opp_id], o_t[opp_id] + ) + grads = grads.cumsum(axis=0) + opp_grads = opp_grads.cumsum(axis=0) + log_probs = log_probs.cumsum(axis=0) + opp_logrpobs = opp_logrpobs.cumsum(axis=0) + cross_term = 0.0 + for t in range(0, len(a_t[agent_id])): + discounted_reward = r_t[opp_id, t] * jnp.power(gamma, t) + cross_term += ( + discounted_reward + * jnp.outer(grads[t], opp_grads[t]) + * jnp.exp(log_probs[t] + opp_logrpobs[t]) + ) + return cross_term # * jnp.exp(log_probs.sum() + opp_logrpobs.sum()) + + def policy_gradient(a_t, o_t, g_t): + grad_log_pi = vmap(grad(log_pi), in_axes=(None, None, 0, 0)) + opp_grads = grad_log_pi(params[opp_id], opp_id, a_t[opp_id], o_t[opp_id]) + pg = g_t[agent_id] @ opp_grads + return pg + + cross = vmap(cross_term, in_axes=(1, 1, 1))(a_t, o_t, r_t).mean(axis=0) + pg = vmap(policy_gradient, in_axes=(1, 1, 1))(a_t, o_t, g_t).mean(axis=0) + correction = -pi_lr * (pg @ cross) + return unravel_fns[agent_id](correction) + + def policy_loss(params, agent_id, batch): + """Computes the policy gradient loss. + + Args: + params: The policy parameters. + agent_id: The agent's id. + batch: A transition batch. + + Returns: + The policy gradient loss. + """ + a_t, o_t, r_t, values = ( + batch.action[agent_id], + batch.info_state[agent_id], + batch.reward[agent_id], + batch.values[agent_id], + ) + logits_t = vmap(vmap(lambda s: policy_network.apply(params, s).logits))(o_t) + discount = jnp.full(r_t.shape, gamma) + returns = vmap(rlax.lambda_returns)( + r_t=r_t, + v_t=values, + discount_t=discount, + lambda_=jnp.ones_like(discount), + ) + adv_t = returns - values + loss = vmap(rlax.policy_gradient_loss)( + logits_t=logits_t, a_t=a_t, adv_t=adv_t, w_t=jnp.ones_like(adv_t) + ) + return loss.mean() + + def update( + train_state: TrainState, batch: TransitionBatch + ) -> typing.Tuple[TrainState, typing.Dict]: + """Updates the policy parameters in train_state. + + If lola_weight > 0, the correction term by Foerster et al. will be applied. + + Args: + train_state: the agent's train state. + batch: a transition batch + + Returns: + A tuple (new_train_state, metrics) + """ + loss, policy_grads = jax.value_and_grad(policy_loss)( + train_state.policy_params[agent_id], agent_id, batch + ) + correction = lola_correction(train_state, batch) + policy_grads = jax.tree_util.tree_map( + lambda grad, corr: grad - lola_weight * corr, policy_grads, correction + ) + updates, opt_state = optimizer( + policy_grads, train_state.policy_opt_states[agent_id] + ) + policy_params = optax.apply_updates( + train_state.policy_params[agent_id], updates + ) + train_state = TrainState( + policy_params={**train_state.policy_params, agent_id: policy_params}, + policy_opt_states={ + **train_state.policy_opt_states, + agent_id: opt_state, + }, + critic_params=deepcopy(train_state.critic_params), + critic_opt_states=deepcopy(train_state.critic_opt_states), + ) + return train_state, {'loss': loss} + + return update + + +def get_opponent_update_fn( + agent_id: int, + policy_network: hk.Transformed, + optimizer: optax.TransformUpdateFn, + num_minibatches: int = 1, +) -> UpdateFn: + """Get the opponent update function.""" + def loss_fn(params, batch: TransitionBatch): + def loss(p, states, actions): + log_prob = policy_network.apply(p, states).log_prob(actions) + return log_prob + + log_probs = vmap(vmap(loss, in_axes=(None, 0, 0)), in_axes=(None, 0, 0))( + params, batch.info_state[agent_id], batch.action[agent_id] + ) + return -log_probs.sum(axis=-1).mean() + + def update( + train_state: TrainState, batch: TransitionBatch + ) -> typing.Tuple[TrainState, typing.Dict]: + policy_params = train_state.policy_params[agent_id] + opt_state = train_state.policy_opt_states[agent_id] + loss = 0 + for mini_batch in get_minibatches(batch, num_minibatches): + loss, policy_grads = jax.value_and_grad(loss_fn)( + policy_params, mini_batch + ) + updates, opt_state = optimizer(policy_grads, opt_state) + policy_params = optax.apply_updates( + train_state.policy_params[agent_id], updates + ) + + train_state = TrainState( + policy_params={**train_state.policy_params, agent_id: policy_params}, + policy_opt_states={ + **train_state.policy_opt_states, + agent_id: opt_state, + }, + critic_params=deepcopy(train_state.critic_params), + critic_opt_states=deepcopy(train_state.critic_opt_states), + ) + return train_state, {'loss': loss} + + return update + + +class OpponentShapingAgent(rl_agent.AbstractAgent): + """Opponent Shaping Agent. + + This agent uses either LOLA or LOLA-DiCE to influence the parameter updates + of the opponent policies. + """ + + def __init__( + self, + player_id: int, + opponent_ids: typing.List[int], + info_state_size: chex.Shape, + num_actions: int, + policy: hk.Transformed, + critic: hk.Transformed, + batch_size: int = 16, + critic_learning_rate: typing.Union[float, optax.Schedule] = 0.01, + pi_learning_rate: typing.Union[float, optax.Schedule] = 0.001, + opp_policy_learning_rate: typing.Union[float, optax.Schedule] = 0.001, + opponent_model_learning_rate: typing.Union[float, optax.Schedule] = 0.001, + clip_grad_norm: float = 0.5, + policy_update_interval: int = 8, + discount: float = 0.99, + critic_discount: float = 0.99, + seed: jax.random.PRNGKey = 42, + fit_opponent_model=True, + correction_type: str = 'dice', + use_jit: bool = False, + n_lookaheads: int = 1, + num_critic_mini_batches: int = 1, + num_opponent_updates: int = 1, + env: typing.Optional[rl_environment.Environment] = None, + ): + self.player_id = player_id + self._num_actions = num_actions + self._batch_size = batch_size + self._policy_update_interval = policy_update_interval + self._discount = discount + self._num_opponent_updates = num_opponent_updates + self._num_mini_batches = num_critic_mini_batches + self._prev_time_step = None + self._prev_action = None + self._data = [] + self._metrics = [] + self._fit_opponent_model = fit_opponent_model + self._opponent_ids = opponent_ids + self._rng = hk.PRNGSequence(seed) + + # Step counters + self._step_counter = 0 + self._episode_counter = 0 + self._num_learn_steps = 0 + + self._pi_network = policy + self._critic_network = critic + self._critic_opt = optax.sgd(learning_rate=critic_learning_rate) + self._opponent_opt = optax.adam(opponent_model_learning_rate) + self._policy_opt = optax.chain( + optax.clip_by_global_norm(clip_grad_norm) + if clip_grad_norm + else optax.identity(), + optax.sgd(learning_rate=pi_learning_rate), + ) + self._train_state = self._init_train_state(info_state_size=info_state_size) + self._current_policy = self.get_policy(return_probs=True) + + if correction_type == 'dice': + policy_update_fn = get_dice_update_fn( + agent_id=player_id, + rng=self._rng, + policy_network=policy, + critic_network=critic, + optimizer=self._policy_opt.update, + opp_pi_lr=opp_policy_learning_rate, + gamma=discount, + n_lookaheads=n_lookaheads, + env=env, + ) + # pylint: disable=consider-using-in + elif correction_type == 'lola' or correction_type == 'none': + # if correction_type is none, use policy gradient without corrections + lola_weight = 1.0 if correction_type == 'lola' else 0.0 + update_fn = get_lola_update_fn( + agent_id=player_id, + policy_network=policy, + pi_lr=pi_learning_rate, + optimizer=self._policy_opt.update, + lola_weight=lola_weight, + ) + policy_update_fn = jax.jit(update_fn) if use_jit else update_fn + else: + raise ValueError(f'Unknown correction type: {correction_type}') + + critic_update_fn = get_critic_update_fn( + agent_id=player_id, + critic_network=critic, + optimizer=self._critic_opt.update, + num_minibatches=num_critic_mini_batches, + gamma=critic_discount, + ) + + self._policy_update_fns = {player_id: policy_update_fn} + self._critic_update_fns = { + player_id: jax.jit(critic_update_fn) if use_jit else critic_update_fn + } + + for opponent in opponent_ids: + opp_update_fn = get_opponent_update_fn( + agent_id=opponent, + policy_network=policy, + optimizer=self._opponent_opt.update, + num_minibatches=num_opponent_updates, + ) + opp_critic_update_fn = get_critic_update_fn( + agent_id=opponent, + critic_network=critic, + optimizer=self._critic_opt.update, + num_minibatches=num_critic_mini_batches, + gamma=critic_discount, + ) + self._policy_update_fns[opponent] = ( + jax.jit(opp_update_fn) if use_jit else opp_update_fn + ) + self._critic_update_fns[opponent] = ( + jax.jit(opp_critic_update_fn) if use_jit else opp_critic_update_fn + ) + + @property + def train_state(self): + return deepcopy(self._train_state) + + @property + def policy_network(self): + return self._pi_network + + @property + def critic_network(self): + return self._critic_network + + def metrics(self, return_last_only: bool = True): + if not self._metrics: + return {} + metrics = self._metrics[-1] if return_last_only else self._metrics + return metrics + + def update_params(self, state: TrainState, player_id: int) -> None: + """Updates the parameters of the other agents. + + Args: + state: the train state of the other agent. + player_id: id of the other agent + + Returns: + """ + self._train_state.policy_params[player_id] = deepcopy( + state.policy_params[player_id] + ) + self._train_state.critic_params[player_id] = deepcopy( + state.critic_params[player_id] + ) + + def get_value_fn(self) -> typing.Callable: + def value_fn(obs: jnp.ndarray): + obs = jnp.array(obs) + return self._critic_network.apply( + self.train_state.critic_params[self.player_id], obs + ).squeeze(-1) + + return jax.jit(value_fn) + + def get_policy(self, return_probs=True) -> typing.Callable: + """Get the policy. + + Returns a function that takes a random key, an observation and + optionally an action mask. The function produces actions which are + sampled from the current policy. Additionally, if eturn_probs is true, + it also returns the action probabilities. + + Args: + return_probs: if true, the policy returns a tuple (action, + action_probs). + + Returns: + A function that maps observations to actions + """ + + def _policy(key: jax.random.PRNGKey, obs: jnp.ndarray, action_mask=None): + """The actual policy function. + + Takes a random key, the current observation and optionally an action + mask. + + Args: + key: a random key for sampling + obs: numpy array of observations + action_mask: optional numpy array to mask out illegal actions + + Returns: + Either the sampled actions or, if return_probs is true, a tuple + (actions, action_probs). + """ + params = self._train_state.policy_params[self.player_id] + pi = self._pi_network.apply(params, obs) + if action_mask is not None: + probs = pi.probs * action_mask + probs = probs / probs.sum() + pi = distrax.Categorical(probs=probs) + actions = pi.sample(seed=key) + if return_probs: + return actions, pi.prob(actions) + else: + return actions + + return jax.jit(_policy) + + def step(self, time_step: TimeStep, is_evaluation=False): + """Produces an action and possibly triggers a parameter update. + + LOLA agents depend on having access to previous actions made by the + opponent. Assumes that the field 'observations' of time_step contains a + field 'actions' and its first axis is indexed by the player id. Similar, the + fields 'rewards' and 'legal_actions' are assumed to be of shape + (num_players,). + + Args: + time_step: a TimeStep instance which has a field 'actions' in the + observations dict. + is_evaluation: if true, the agent will not update. + + Returns: + A tuple containing the action that was taken and its probability + under the current policy. + """ + do_step = ( + time_step.is_simultaneous_move() + or self.player_id == time_step.current_player() + ) + action, probs = None, [] + batch_policy = vmap(self._current_policy, in_axes=(0, 0, None)) + if not time_step.last() and do_step: + info_state = time_step.observations['info_state'][self.player_id] + legal_actions = time_step.observations['legal_actions'][self.player_id] + action_mask = np.zeros(self._num_actions) + action_mask[legal_actions] = 1 + + # If we are not in a batched environment, we need to add a batch dimension + if 'batch_size' not in time_step.observations: + info_state = jnp.array(info_state)[None] + batch_size = 1 + else: + batch_size = time_step.observations['batch_size'] + sample_keys = jax.random.split(next(self._rng), batch_size) + action, probs = batch_policy(sample_keys, info_state, action_mask) + + if not is_evaluation: + self._store_time_step(time_step=time_step, action=action) + if time_step.last() and self._should_update(): + self._train_step() + + return rl_agent.StepOutput(action=action, probs=probs) + + def _init_train_state(self, info_state_size: chex.Shape): + init_inputs = jnp.ones(info_state_size) + agent_ids = self._opponent_ids + [self.player_id] + policy_params, policy_opt_states = {}, {} + critic_params, critic_opt_states = {}, {} + for agent_id in agent_ids: + policy_params[agent_id] = self._pi_network.init( + next(self._rng), init_inputs + ) + if agent_id == self.player_id: + policy_opt_state = self._policy_opt.init(policy_params[agent_id]) + else: + policy_opt_state = self._opponent_opt.init(policy_params[agent_id]) + policy_opt_states[agent_id] = policy_opt_state + critic_params[agent_id] = self._critic_network.init( + next(self._rng), init_inputs + ) + critic_opt_states[agent_id] = self._critic_opt.init( + critic_params[agent_id] + ) + + return TrainState( + policy_params=policy_params, + critic_params=critic_params, + policy_opt_states=policy_opt_states, + critic_opt_states=critic_opt_states, + ) + + def _store_time_step(self, time_step: TimeStep, action: np.ndarray): + """Store the time step. + + Converts the timestep and the action into a transition and steps the + counters. + + Args: + time_step: the current time step. + action: the action that was taken before observing time_step + Returns: None + """ + self._step_counter += ( + time_step.observations['batch_size'] + if 'batch_size' in time_step.observations + else 1 + ) + if self._prev_time_step: + transition = self._make_transition(time_step) + self._data.append(transition) + if time_step.last(): + self._prev_time_step = None + self._prev_action = None + self._episode_counter += 1 + else: + obs = time_step.observations['info_state'] + time_step.observations['values'] = jnp.stack( + [ + self._critic_network.apply( + self.train_state.critic_params[id], jnp.array(obs[id]) + ).squeeze(-1) + for id in sorted(self.train_state.critic_params.keys()) + ] + ) + self._prev_time_step = time_step + self._prev_action = action + + def _train_step(self): + """Updates the critic and the policy parameters. + + After the update, the data buffer is cleared. Returns: None + """ + batch = self._construct_episode_batches(self._data) + update_metrics = self._update_agent(batch) + self._metrics.append(update_metrics) + self._data.clear() + + def _should_update(self) -> bool: + """Indicates whether to update or not. + + Returns: + True, if the number of episodes in the buffer is equal to the batch + size. False otherwise. + """ + return ( + self._step_counter >= self._batch_size * (self._num_learn_steps + 1) + and self._episode_counter > 0 + ) + + def _update_agent(self, batch: TransitionBatch) -> typing.Dict: + """Updates the critic and policy parameters of the agent. + + Args: + batch: A batch of training episodes. + + Dimensions (N=player, B=batch_size, T=timesteps, S=state_dim): + action: (N, B, T), + discount: (B, T), + info_state: (N, B, T, *S), + legal_actions_mask: (N, B, T), + reward: (N, B, T), + terminal: (B, T), + values: (N, B, T) + + Returns: + A dictionary that contains relevant training metrics. + """ + metrics = {} + self._num_learn_steps += 1 + + # if we do opponent modelling, we update the opponents first + if self._fit_opponent_model: + opponent_update_metrics = self._update_opponents(batch) + metrics.update( + (f'opp_models/{k}', v) for k, v in opponent_update_metrics.items() + ) + + # then we update the critic + critic_update_metrics = self._update_critic(batch) + metrics.update((f'critic/{k}', v) for k, v in critic_update_metrics.items()) + + # and finally we update the policy + if self._num_learn_steps % self._policy_update_interval == 0: + policy_update_metrics = self._update_policy(batch) + metrics.update( + (f'policy/{k}', v) for k, v in policy_update_metrics.items() + ) + return metrics + + def _construct_episode_batches( + self, transitions: typing.List[TransitionBatch] + ) -> TransitionBatch: + """Constructs a list of transitions into a single transition batch instance. + + The fields 'info_state', 'rewards', 'legal_action_mask' and 'actions' of the + produced transition batch have shape (num_agents, batch_size, + sequence_length, *shape). The fields 'discount' and 'terminal' have shape + (batch_size, sequence_length). + + Args: + transitions: a list of single step transitions + + Returns: + A transition batch instance with items of according shape. + """ + episode, batches = [], [] + max_episode_length = 0 + for transition in transitions: + episode.append(transition) + if transition.terminal.any(): + max_episode_length = max(max_episode_length, len(episode)) + # pylint: disable=no-value-for-parameter + batch = jax.tree_util.tree_map(lambda *xs: jnp.stack(xs), *episode) + batch = batch.replace( + info_state=batch.info_state.transpose(1, 2, 0, 3), + action=batch.action.transpose(1, 2, 0), + legal_actions_mask=batch.legal_actions_mask.T, + reward=batch.reward.transpose(1, 2, 0), + values=batch.values.transpose(1, 2, 0), + discount=batch.discount.transpose(1, 2, 0), + terminal=batch.terminal.transpose(1, 2, 0), + ) + batches.append(batch) + episode.clear() + return batches[0] + + def _update_policy(self, batch: TransitionBatch): + self._train_state, metrics = self._policy_update_fns[self.player_id]( + self._train_state, batch + ) + self._current_policy = self.get_policy(return_probs=True) + return metrics + + def _update_critic(self, batch: TransitionBatch): + self._train_state, metrics = self._critic_update_fns[self.player_id]( + self._train_state, batch + ) + return metrics + + def _update_opponents(self, batch: TransitionBatch): + update_metrics = {} + for opponent in self._opponent_ids: + self._train_state, metrics = self._critic_update_fns[opponent]( + self._train_state, batch + ) + update_metrics.update( + {f'agent_{opponent}/critic/{k}': v for k, v in metrics.items()} + ) + self._train_state, metrics = self._policy_update_fns[opponent]( + self._train_state, batch + ) + update_metrics.update( + {f'agent_{opponent}/policy/{k}': v for k, v in metrics.items()} + ) + return update_metrics + + def _make_transition(self, time_step: TimeStep): + assert self._prev_time_step is not None + legal_actions = self._prev_time_step.observations['legal_actions'][ + self.player_id + ] + legal_actions_mask = np.zeros((self._batch_size, self._num_actions)) + legal_actions_mask[..., legal_actions] = 1 + actions = np.array(time_step.observations['actions']) + rewards = np.array(time_step.rewards) + discounts = self._discount * (1 - time_step.last()) * np.ones_like(rewards) + terminal = time_step.last() * np.ones_like(rewards) + obs = np.array(self._prev_time_step.observations['info_state']) + transition = TransitionBatch( + info_state=obs, + action=actions, + reward=rewards, + discount=discounts, + terminal=terminal, + legal_actions_mask=legal_actions_mask, + values=self._prev_time_step.observations['values'], + ) + if len(rewards.shape) < 2: # if not a batch, add a batch dimension + transition = jax.tree_util.tree_map(lambda x: x[None], transition) + return transition diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/jax/opponent_shaping_jax_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/jax/opponent_shaping_jax_test.py new file mode 100644 index 0000000..63344f4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/jax/opponent_shaping_jax_test.py @@ -0,0 +1,163 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for open_spiel.python.jax.opponent_shaping.""" +import typing +from typing import Tuple +from absl.testing import absltest +from absl.testing import parameterized + +import distrax +import haiku as hk +import jax +import jax.numpy as jnp +import numpy as np +from open_spiel.python import rl_environment +from open_spiel.python.jax.opponent_shaping import OpponentShapingAgent +import pyspiel + +SEED = 24984617 + + +def make_iterated_matrix_game( + game: str, iterations=5, batch_size=8 +) -> rl_environment.Environment: + matrix_game = pyspiel.load_matrix_game(game) + config = {'num_repetitions': iterations, 'batch_size': batch_size} + game = pyspiel.create_repeated_game(matrix_game, config) + env = rl_environment.Environment(game) + return env + + +def make_agent_networks( + num_actions: int, +) -> Tuple[hk.Transformed, hk.Transformed]: + def policy(obs): + logits = hk.nets.MLP(output_sizes=[8, 8, num_actions], with_bias=True)(obs) + logits = jnp.nan_to_num(logits) + return distrax.Categorical(logits=logits) + + def value_fn(obs): + values = hk.nets.MLP(output_sizes=[8, 8, 1], with_bias=True)(obs) + return values + + return hk.without_apply_rng(hk.transform(policy)), hk.without_apply_rng( + hk.transform(value_fn) + ) + + +def run_agents( + agents: typing.List[OpponentShapingAgent], + env: rl_environment.Environment, + num_steps=1000, +): + time_step = env.reset() + for _ in range(num_steps): + actions = [] + for agent in agents: + action, _ = agent.step(time_step) + if action is not None: + action = action.squeeze() + actions.append(action) + if time_step.last(): + time_step = env.reset() + else: + time_step = env.step(actions) + time_step.observations['actions'] = np.array(actions) + + +class LolaPolicyGradientTest(parameterized.TestCase, absltest.TestCase): + + @parameterized.parameters(['matrix_pd']) + def test_run_game(self, game_name): + batch_size = 8 + iterations = 5 + env = make_iterated_matrix_game( + game_name, batch_size=1, iterations=iterations + ) + env.seed(SEED) + key = jax.random.PRNGKey(SEED) + num_actions = env.action_spec()['num_actions'] + policy_network, critic_network = make_agent_networks( + num_actions=num_actions + ) + + # pylint: disable=g-complex-comprehension + agents = [ + OpponentShapingAgent( + player_id=i, + opponent_ids=[1 - i], + seed=key, + correction_type='lola', + env=env, + n_lookaheads=1, + info_state_size=env.observation_spec()['info_state'], + num_actions=env.action_spec()['num_actions'], + policy=policy_network, + critic=critic_network, + batch_size=batch_size, + pi_learning_rate=0.005, + critic_learning_rate=1.0, + policy_update_interval=2, + discount=0.96, + use_jit=False, + ) + for i in range(2) + ] + run_agents(agents=agents, env=env, num_steps=batch_size * 10) + + +class DicePolicyGradientTest(parameterized.TestCase, absltest.TestCase): + + @parameterized.parameters(['matrix_pd']) + def test_run_game(self, game_name): + batch_size = 8 + iterations = 5 + env = make_iterated_matrix_game( + game_name, batch_size=1, iterations=iterations + ) + env.seed(SEED) + key = jax.random.PRNGKey(SEED) + num_actions = env.action_spec()['num_actions'] + policy_network, critic_network = make_agent_networks( + num_actions=num_actions + ) + + # pylint: disable=g-complex-comprehension + agents = [ + OpponentShapingAgent( + player_id=i, + opponent_ids=[1 - i], + seed=key, + correction_type='dice', + env=env, + n_lookaheads=2, + info_state_size=env.observation_spec()['info_state'], + num_actions=env.action_spec()['num_actions'], + policy=policy_network, + critic=critic_network, + batch_size=batch_size, + pi_learning_rate=0.005, + critic_learning_rate=1.0, + policy_update_interval=2, + discount=0.96, + use_jit=False, + ) + for i in range(2) + ] + run_agents(agents=agents, env=env, num_steps=batch_size * 10) + + +if __name__ == '__main__': + np.random.seed(SEED) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/jax/policy_gradient.py b/scenarios/bargaining/open_spiel/open_spiel/python/jax/policy_gradient.py new file mode 100644 index 0000000..e7e329f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/jax/policy_gradient.py @@ -0,0 +1,464 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Policy gradient methods implemented in JAX.""" + +import collections +import chex +import haiku as hk +import jax +import jax.numpy as jnp +import numpy as np +import optax +import rlax + +from open_spiel.python import rl_agent + +Transition = collections.namedtuple( + "Transition", + "info_state action reward discount legal_actions_mask next_info_state") + + +class NetA2C(hk.Module): + """A simple network with a policy head and a baseline value head.""" + + def __init__(self, num_actions, hidden_layers_sizes): + super().__init__() + self._num_actions = num_actions + self._hidden_layers_sizes = hidden_layers_sizes + + def __call__(self, info_state): + """Process a batch of observations.""" + torso = hk.nets.MLP(self._hidden_layers_sizes, activate_final=True) + hidden = torso(info_state) + policy_logits = hk.Linear(self._num_actions)(hidden) + baseline = hk.Linear(1)(hidden) + return policy_logits, baseline + + +class NetPG(hk.Module): + """A simple network with a policy head and an action-value head.""" + + def __init__(self, num_actions, hidden_layers_sizes): + super().__init__() + self._num_actions = num_actions + self._hidden_layers_sizes = hidden_layers_sizes + + def __call__(self, info_state): + """Process a batch of observations.""" + torso = hk.nets.MLP(self._hidden_layers_sizes, activate_final=True) + hidden = torso(info_state) + policy_logits = hk.Linear(self._num_actions)(hidden) + q_values = hk.Linear(self._num_actions)(hidden) + return policy_logits, q_values + + +def generate_a2c_pi_loss(net_apply, loss_class, entropy_cost, l2_actor_weight, + lambda_): + """A function generator generates loss function.""" + + def _a2c_pi_loss(net_params, batch): + info_states, actions, rewards, discounts = batch["info_states"], batch[ + "actions"], batch["rewards"], batch["discounts"] + policy_logits, baselines = net_apply(net_params, info_states) + policy_logits = policy_logits[:-1] + + baselines = jnp.squeeze(baselines, axis=1) + baselines = jnp.concatenate([baselines[:-1], jnp.zeros(1)]) + td_returns = rlax.lambda_returns( + rewards, + discounts, + baselines[1:], + lambda_=lambda_, + stop_target_gradients=True) + advantages = td_returns - baselines[:-1] + chex.assert_equal_shape([td_returns, actions, advantages]) + pi_loss = loss_class( + logits_t=policy_logits, + a_t=actions, + adv_t=advantages, + w_t=jnp.ones(td_returns.shape)) + ent_loss = rlax.entropy_loss( + logits_t=policy_logits, w_t=jnp.ones(td_returns.shape)) + l2_loss = jnp.sum(jnp.square(jax.flatten_util.ravel_pytree(net_params)[0])) + return pi_loss + entropy_cost * ent_loss + l2_actor_weight * l2_loss + + return _a2c_pi_loss + + +def generate_a2c_critic_loss(net_apply, l2_critic_weight, lambda_): + """A function generator generates loss function.""" + + def _a2c_critic_loss(net_params, batch): + info_states, rewards, discounts = batch["info_states"], batch[ + "rewards"], batch["discounts"] + _, baselines = net_apply(net_params, info_states) + baselines = jnp.squeeze(baselines, axis=1) + baselines = jnp.concatenate([baselines[:-1], jnp.zeros(1)]) + + td_lambda = rlax.td_lambda( + v_tm1=baselines[:-1], + r_t=rewards, + discount_t=discounts, + v_t=baselines[1:], + lambda_=lambda_, + stop_target_gradients=True) + l2_loss = jnp.sum(jnp.square(jax.flatten_util.ravel_pytree(net_params)[0])) + return jnp.mean(jnp.square(td_lambda)) + l2_critic_weight * l2_loss + + return _a2c_critic_loss + + +def generate_pg_pi_loss(net_apply, loss_class, entropy_cost, l2_actor_weight): + """A function generator generates loss function.""" + + def _pg_loss(net_params, batch): + info_states = batch["info_states"] + policy_logits, q_values = net_apply(net_params, info_states) + chex.assert_equal_shape([policy_logits, q_values]) + pi_loss = loss_class(logits_t=policy_logits, q_t=q_values) + ent_loss = rlax.entropy_loss( + logits_t=policy_logits, w_t=jnp.ones(policy_logits.shape[:1])) + l2_loss = jnp.sum(jnp.square(jax.flatten_util.ravel_pytree(net_params)[0])) + return pi_loss + entropy_cost * ent_loss + l2_actor_weight * l2_loss + + return _pg_loss + + +def generate_pg_critic_loss(net_apply, l2_critic_weight, lambda_): + """A function generator generates loss function.""" + + def _critic_loss(net_params, batch): + info_states, actions, rewards, discounts = batch["info_states"], batch[ + "actions"], batch["rewards"], batch["discounts"] + _, q_values = net_apply(net_params, info_states) + q_values = q_values[:-1] + q_values = jnp.concatenate( + [q_values, jnp.zeros(q_values[-1].reshape(1, -1).shape)]) + + actions = jnp.concatenate([actions, jnp.zeros(1, dtype=int)]) + sarsa_lambda = rlax.sarsa_lambda( + q_tm1=q_values[:-1], + a_tm1=actions[:-1], + r_t=rewards, + discount_t=discounts, + q_t=q_values[1:], + a_t=actions[1:], + lambda_=lambda_, + stop_target_gradients=True) + l2_loss = jnp.sum(jnp.square(jax.flatten_util.ravel_pytree(net_params)[0])) + return jnp.mean(jnp.square(sarsa_lambda)) + l2_critic_weight * l2_loss + + return _critic_loss + + +def generate_act_func(net_apply): + """A function generator generates act function.""" + + def _act(net_params, info_state, action_mask, rng): + info_state = jnp.reshape(info_state, [1, -1]) + policy_logits, _ = net_apply(net_params, info_state) + policy_probs = jax.nn.softmax(policy_logits, axis=1) + + # Remove illegal actions, re-normalize probs + probs = policy_probs[0] * action_mask + + probs /= jnp.sum(probs) + action = jax.random.choice(rng, len(probs), p=probs) + return action, probs + + return _act + + +class PolicyGradient(rl_agent.AbstractAgent): + """Policy Gradient Agent implementation in JAX.""" + + def __init__(self, + player_id, + info_state_size, + num_actions, + loss_str="a2c", + loss_class=None, + hidden_layers_sizes=(128,), + lambda_=1.0, + critic_learning_rate=0.01, + pi_learning_rate=0.001, + entropy_cost=0.01, + l2_weight_actor=0.0, + l2_weight_critic=0.0, + num_critic_before_pi=8, + additional_discount_factor=1.0, + max_global_gradient_norm=None, + optimizer_str="sgd", + seed=42): + """Initialize the PolicyGradient agent. + + Args: + player_id: int, player identifier. Usually its position in the game. + info_state_size: int, info_state vector size. + num_actions: int, number of actions per info state. + loss_str: string or None. If string, must be one of ["rpg", "qpg", "rm", + "a2c"] and defined in `_get_loss_class`. If None, a loss class must be + passed through `loss_class`. Defaults to "a2c". + loss_class: Class or None. If Class, it must define the policy gradient + loss. If None a loss class in a string format must be passed through + `loss_str`. Defaults to None. + hidden_layers_sizes: iterable, defines the neural network layers. Defaults + to (128,), which produces a NN: [INPUT] -> [128] -> ReLU -> [OUTPUT]. + lambda_: float, lambda in TD(lambda) or SARSA(lambda). Defaults to 1.0. + critic_learning_rate: float, learning rate used for Critic (Q or V). + Defaults to 0.001. + pi_learning_rate: float, learning rate used for Pi. Defaults to 0.001. + entropy_cost: float, entropy cost used to multiply the entropy loss. Can + be set to None to skip entropy computation. Defaults to 0.001. + l2_weight_actor: l2 penaly weight for actor network. Defaults to 0.0. + l2_weight_critic: l2 penalty weight for critic network. Defaults to + 0.0. + num_critic_before_pi: int, number of Critic (Q or V) updates before each + Pi update. Defaults to 8 (every 8th critic learning step, Pi also + learns). + additional_discount_factor: float, additional discount to compute returns. + Defaults to 1.0, in which case, no extra discount is applied. None that + users must provide *only one of* `loss_str` or `loss_class`. + max_global_gradient_norm: float or None, maximum global norm of a gradient + to which the gradient is shrunk if its value is larger. + optimizer_str: String defining which optimizer to use. Supported values + are {sgd, adam} + seed: random seed + """ + assert bool(loss_str) ^ bool(loss_class), "Please provide only one option." + self._kwargs = locals() + loss_class = loss_class if loss_class else self._get_loss_class(loss_str) + + self.player_id = player_id + self._num_actions = num_actions + self._extra_discount = additional_discount_factor + self._num_critic_before_pi = num_critic_before_pi + + self._episode_data = [] + self._dataset = collections.defaultdict(list) + self._prev_time_step = None + self._prev_action = None + + # Step counters + self._step_counter = 0 + self._episode_counter = 0 + self._num_learn_steps = 0 + + # Keep track of the last training loss achieved in an update step. + self._last_loss_value = None + + self._loss_str = loss_str + + # Network + # activate final as we plug logit and qvalue heads afterwards. + net_class = NetA2C if loss_str == "a2c" else NetPG + + def net_func(info_input): + net = net_class(num_actions, hidden_layers_sizes) + return net(info_input) + + hk_net = hk.without_apply_rng(hk.transform(net_func)) + + hk_net_apply = hk_net.apply + self.rng = jax.random.PRNGKey(seed) + init_inputs = jnp.ones((1, info_state_size)) + self.hk_net_params = hk_net.init(self.rng, init_inputs) + + self._act = jax.jit(generate_act_func(hk_net_apply)) + + if optimizer_str == "adam": + critic_optimizer = optax.adam(critic_learning_rate) + pi_optimizer = optax.adam(pi_learning_rate) + + elif optimizer_str == "sgd": + critic_optimizer = optax.sgd(critic_learning_rate) + pi_optimizer = optax.sgd(pi_learning_rate) + + else: + raise ValueError("Not implemented, choose from 'adam' and 'sgd'.") + + if max_global_gradient_norm: + pi_optimizer = optax.chain( + pi_optimizer, optax.clip_by_global_norm(max_global_gradient_norm)) + critic_optimizer = optax.chain( + critic_optimizer, optax.clip_by_global_norm(max_global_gradient_norm)) + + pi_opt_init, pi_opt_update = pi_optimizer.init, pi_optimizer.update + critic_opt_init, critic_opt_update = critic_optimizer.init, critic_optimizer.update + + self._pi_opt_state = pi_opt_init(self.hk_net_params) + + if loss_str == "a2c": + pi_loss_and_grad = jax.value_and_grad( + generate_a2c_pi_loss(hk_net_apply, loss_class, entropy_cost, + l2_weight_actor, lambda_)) + critic_loss_and_grad = jax.value_and_grad( + generate_a2c_critic_loss(hk_net_apply, l2_weight_critic, lambda_)) + self._critic_opt_state = critic_opt_init(self.hk_net_params) + else: + pi_loss_and_grad = jax.value_and_grad( + generate_pg_pi_loss(hk_net_apply, loss_class, entropy_cost, + l2_weight_actor)) + critic_loss_and_grad = jax.value_and_grad( + generate_pg_critic_loss(hk_net_apply, l2_weight_critic, lambda_)) + self._critic_opt_state = critic_opt_init(self.hk_net_params) + + self._jit_pi_update = jax.jit( + self._get_update(pi_opt_update, pi_loss_and_grad)) + self._jit_critic_update = jax.jit( + self._get_update(critic_opt_update, critic_loss_and_grad)) + + def _get_loss_class(self, loss_str): + if loss_str == "rpg": + return rlax.rpg_loss + elif loss_str == "qpg": + return rlax.qpg_loss + elif loss_str == "rm": + return rlax.rm_loss + elif loss_str == "a2c": + return rlax.policy_gradient_loss + + def _get_update(self, opt_update, loss_fn): + + def update(net_params, opt_state, batch): + loss_val, grad_val = loss_fn(net_params, batch) + updates, new_opt_state = opt_update(grad_val, opt_state) + new_net_params = optax.apply_updates(net_params, updates) + return new_net_params, new_opt_state, loss_val + + return update + + def step(self, time_step, is_evaluation=False): + """Returns the action to be taken and updates the network if needed. + + Args: + time_step: an instance of rl_environment.TimeStep. + is_evaluation: bool, whether this is a training or evaluation call. + + Returns: + A `rl_agent.StepOutput` containing the action probs and chosen action. + """ + # Act step: don't act at terminal info states or if its not our turn. + if (not time_step.last()) and (time_step.is_simultaneous_move() or + self.player_id + == time_step.current_player()): + info_state = time_step.observations["info_state"][self.player_id] + legal_actions = time_step.observations["legal_actions"][self.player_id] + action_mask = np.zeros(self._num_actions) + action_mask[legal_actions] = 1 + self.rng, _ = jax.random.split(self.rng) + action, probs = self._act(self.hk_net_params, np.asarray(info_state), + action_mask, self.rng) + else: + action = None + probs = [] + + if not is_evaluation: + self._step_counter += 1 + + # Add data points to current episode buffer. + if self._prev_time_step: + self._add_transition(time_step) + + # Episode done, add to dataset and maybe learn. + + if time_step.last(): + self._episode_counter += 1 + + self._critic_update() + self._num_learn_steps += 1 + if self._num_learn_steps % self._num_critic_before_pi == 0: + self._pi_update() + self._episode_data = [] + + self._prev_time_step = None + self._prev_action = None + return + else: + self._prev_time_step = time_step + self._prev_action = action + + return rl_agent.StepOutput(action=action, probs=probs) + + @property + def loss(self): + return (self._last_critic_loss_value, self._last_pi_loss_value) + + def _add_transition(self, time_step): + """Adds intra-episode transition to the `_episode_data` buffer. + + Adds the transition from `self._prev_time_step` to `time_step`. + Args: + time_step: an instance of rl_environment.TimeStep. + """ + assert self._prev_time_step is not None + legal_actions = ( + self._prev_time_step.observations["legal_actions"][self.player_id]) + legal_actions_mask = np.zeros(self._num_actions) + legal_actions_mask[legal_actions] = 1.0 + transition = Transition( + info_state=( + self._prev_time_step.observations["info_state"][self.player_id][:]), + action=self._prev_action, + reward=time_step.rewards[self.player_id], + discount=time_step.discounts[self.player_id], + legal_actions_mask=legal_actions_mask, + next_info_state=( + time_step.observations["info_state"][self.player_id][:])) + + self._episode_data.append(transition) + + def _critic_update(self): + """Compute the Critic loss on sampled transitions & perform a critic update. + + Returns: + The average Critic loss obtained on this batch. + """ + batch = {} + batch["info_states"] = jnp.asarray( + [transition.info_state for transition in self._episode_data] + + [self._episode_data[-1].next_info_state]) + batch["rewards"] = jnp.asarray( + [transition.reward for transition in self._episode_data]) + batch["discounts"] = jnp.asarray( + [transition.discount for transition in self._episode_data]) + if self._loss_str != "a2c": + batch["actions"] = jnp.asarray( + [transition.action for transition in self._episode_data]) + + self.hk_net_params, self._critic_opt_state, self._last_critic_loss_value = self._jit_critic_update( + self.hk_net_params, self._critic_opt_state, batch) + return self._last_critic_loss_value + + def _pi_update(self): + """Compute the Pi loss on sampled transitions and perform a Pi update. + + Returns: + The average Pi loss obtained on this batch. + """ + batch = {} + batch["info_states"] = jnp.asarray( + [transition.info_state for transition in self._episode_data] + + [self._episode_data[-1].next_info_state]) + + if self._loss_str == "a2c": + batch["discounts"] = jnp.asarray( + [transition.discount for transition in self._episode_data]) + batch["actions"] = jnp.asarray( + [transition.action for transition in self._episode_data]) + batch["rewards"] = jnp.asarray( + [transition.reward for transition in self._episode_data]) + self.hk_net_params, self._pi_opt_state, self._last_pi_loss_value = self._jit_pi_update( + self.hk_net_params, self._pi_opt_state, batch) + return self._last_pi_loss_value diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/jax/policy_gradient_jax_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/jax/policy_gradient_jax_test.py new file mode 100644 index 0000000..85d8d0f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/jax/policy_gradient_jax_test.py @@ -0,0 +1,114 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for open_spiel.python.jax.policy_gradient.""" + +import itertools + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np + +from open_spiel.python import rl_environment +from open_spiel.python.jax import policy_gradient +import pyspiel + + +SEED = 24984617 + + +class PolicyGradientTest(parameterized.TestCase, absltest.TestCase): + + @parameterized.parameters( + itertools.product(("rpg", "qpg", "rm", "a2c"), + ("kuhn_poker", "leduc_poker"))) + def test_run_game(self, loss_str, game_name): + env = rl_environment.Environment(game_name) + env.seed(SEED) + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + agents = [ + policy_gradient.PolicyGradient( # pylint: disable=g-complex-comprehension + player_id=player_id, + info_state_size=info_state_size, + num_actions=num_actions, + loss_str=loss_str, + hidden_layers_sizes=[32, 32], + lambda_=1.0, + entropy_cost=0.001, + critic_learning_rate=0.01, + pi_learning_rate=0.01, + num_critic_before_pi=4, + seed=SEED) for player_id in [0, 1] + ] + + for _ in range(2): + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + current_agent = agents[current_player] + agent_output = current_agent.step(time_step) + time_step = env.step([agent_output.action]) + + for agent in agents: + agent.step(time_step) + + def test_run_hanabi(self): + # Hanabi is an optional game, so check we have it before running the test. + game = "hanabi" + if game not in pyspiel.registered_names(): + return + + num_players = 3 + env_configs = { + "players": num_players, + "max_life_tokens": 1, + "colors": 2, + "ranks": 3, + "hand_size": 2, + "max_information_tokens": 3, + "discount": 0.99 + } + env = rl_environment.Environment(game, **env_configs) + env.seed(SEED) + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + agents = [ + policy_gradient.PolicyGradient( # pylint: disable=g-complex-comprehension + player_id=player_id, + info_state_size=info_state_size, + num_actions=num_actions, + hidden_layers_sizes=[8, 8], + lambda_=1.0, + entropy_cost=0.001, + critic_learning_rate=0.001, + pi_learning_rate=0.001, + num_critic_before_pi=4, + seed=SEED) for player_id in range(num_players) + ] + + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + agent_output = [agent.step(time_step) for agent in agents] + time_step = env.step([agent_output[current_player].action]) + + for agent in agents: + agent.step(time_step) + + +if __name__ == "__main__": + np.random.seed(SEED) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/README.md b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/README.md new file mode 100644 index 0000000..fb6dfaf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/README.md @@ -0,0 +1,9 @@ +# Mean Field Games Open Spiel Python API + +This is a Python API for Mean Field Games in OpenSpiel. + +This code is experimental and we recommend you not to use it yet unless you are +part of the project. + +The directory `python/mfg/algorithms/*` contain all algorithm implementations +and `python/mfg/games/*` contains the games implementation. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/average_network_fictitious_play.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/average_network_fictitious_play.py new file mode 100644 index 0000000..1c08cd3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/average_network_fictitious_play.py @@ -0,0 +1,336 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Implementation of the Deep Average-network Fictitious Play. + +Coupled with agents that compute a best-response (BR) at each iteration, instead +of keeping in memory all the BRs from past iterations Deep Average-network +Fictitious Play learns along the way the policy generating the average +distribution. This is done by keeping a buffer of state-action pairs generated +by past BRs and learning the average policy (represented by a neural network) by +minimizing a categorical loss. This approach is inspired by the Neural +Fictitious Self Play (NFSP) method (Heinrich & Silver, 2016), developed +initially for imperfect information games with a finite number of players, and +adapted here to the MFG setting. +""" + +import dataclasses +from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple + +import haiku as hk +import jax +import jax.numpy as jnp +import numpy as np +import optax + +from open_spiel.python import rl_agent +from open_spiel.python import rl_agent_policy +from open_spiel.python import rl_environment +from open_spiel.python.mfg.algorithms import distribution +import pyspiel +from open_spiel.python.utils import reservoir_buffer +from open_spiel.python.utils import training + + +@dataclasses.dataclass +class Transition: + """Transitions stored in the reservoir buffer.""" + info_state: np.ndarray + action_probs: np.ndarray + legal_actions_mask: np.ndarray + + +class AveragePolicy(rl_agent.AbstractAgent): + """NFSP-like agent that learns an average policy using a single network.""" + + def __init__(self, + player_id: int, + br_rl_agent: rl_agent.AbstractAgent, + state_representation_size: int, + num_actions: int, + hidden_layers_sizes: List[int], + params_avg_network: Optional[jnp.ndarray] = None, + reservoir_buffer_capacity: int = 100000, + batch_size: int = 128, + learning_rate: float = 0.01, + min_buffer_size_to_learn: int = 1000, + optimizer_str: str = 'sgd', + gradient_clipping: Optional[float] = None, + seed: int = 42, + tau: float = 1.0): + """Initialize the AveragePolicy agent.""" + self._br_rl_agent = br_rl_agent + self._player_id = player_id + self._num_actions = num_actions + self._batch_size = batch_size + self._min_buffer_size_to_learn = min_buffer_size_to_learn + + self._reservoir_buffer = reservoir_buffer.ReservoirBuffer( + reservoir_buffer_capacity) + + # Keep track of the last training loss achieved in an update step. + self._last_loss_value = None + + # Average policy network. + def network(x): + mlp = hk.nets.MLP(hidden_layers_sizes + [num_actions]) + return mlp(x) + + self.avg_network = hk.without_apply_rng(hk.transform(network)) + + def avg_network_policy(param, info_state): + action_values = self.avg_network.apply(param, info_state) + return jax.nn.softmax(action_values / tau, axis=1) + + self._avg_network_policy = jax.jit(avg_network_policy) + + rng = jax.random.PRNGKey(seed) + x = jnp.ones([1, state_representation_size]) + # Use the specified parameters if any, or initialize the network with random + # weights. + if params_avg_network is None: + self._params_avg_network = self.avg_network.init(rng, x) + else: + self._params_avg_network = jax.tree_util.tree_map(lambda x: x.copy(), + params_avg_network) + self._params_avg_network = jax.device_put(self._params_avg_network) + + if optimizer_str == 'adam': + optimizer = optax.adam(learning_rate) + elif optimizer_str == 'sgd': + optimizer = optax.sgd(learning_rate) + else: + raise ValueError('Not implemented, choose from "adam" and "sgd".') + + if gradient_clipping: + optimizer = optax.chain(optimizer, + optax.clip_by_global_norm(gradient_clipping)) + + opt_init, opt_update = optimizer.init, optimizer.update + + def opt_update_fn(params, opt_state, gradient): + """Learning rule (stochastic gradient descent).""" + updates, opt_state = opt_update(gradient, opt_state) + new_params = optax.apply_updates(params, updates) + return new_params, opt_state + + self._opt_update_fn = opt_update_fn + self._opt_state = opt_init(self._params_avg_network) + self._loss_and_grad = jax.value_and_grad(self._loss_avg, has_aux=False) + + self._jit_update = jax.jit(self._get_update_fn()) + + def _get_update_fn(self): + """Returns the function that updates the parameters.""" + + def update(param_avg, opt_state_avg, info_states, action_probs): + loss_val, grad_val = self._loss_and_grad(param_avg, info_states, + action_probs) + new_param_avg, new_opt_state_avg = self._opt_update_fn( + param_avg, opt_state_avg, grad_val) + return new_param_avg, new_opt_state_avg, loss_val + + return update + + def _act(self, info_state, legal_actions) -> Tuple[int, np.ndarray]: + """Returns an action and the action probabilities.""" + info_state = np.reshape(info_state, [1, -1]) + action_probs = self._avg_network_policy(self._params_avg_network, + info_state) + # Remove illegal actions and normalize probs + probs = np.zeros(self._num_actions) + action_probs = np.asarray(action_probs) + probs[legal_actions] = action_probs[0][legal_actions] + probs /= sum(probs) + action = np.random.choice(len(probs), p=probs) + return action, probs + + @property + def loss(self) -> Optional[float]: + """Return the latest loss.""" + return self._last_loss_value + + def step(self, + time_step: rl_environment.TimeStep, + is_evaluation: bool = True) -> Optional[rl_agent.StepOutput]: + """Returns the action to be taken by following the average network policy. + + Note that unlike most other algorithms, this method doesn't train the agent. + Instead, we add new samples to the reservoir buffer and the training happens + at a later stage. + + Args: + time_step: an instance of rl_environment.TimeStep. + is_evaluation: bool, whether this is a training or evaluation call. + + Returns: + A `rl_agent.StepOutput` containing the action probs and chosen action. + """ + + # Prepare for the next episode. + if time_step.last(): + return + + if is_evaluation: + # Use the average policy network. + info_state = time_step.observations['info_state'][self._player_id] + legal_actions = time_step.observations['legal_actions'][self._player_id] + action, probs = self._act(info_state, legal_actions) + return rl_agent.StepOutput(action=action, probs=probs) + + # Use the best response agent and add the transition in the reservoir + # buffer. + br_agent_output = self._br_rl_agent.step(time_step, is_evaluation=True) + self._add_transition(time_step, br_agent_output) + return br_agent_output + + def _add_transition(self, time_step, agent_output): + """Adds the new transition using `time_step` to the reservoir buffer. + + Transitions are in the form (time_step, agent_output.probs, legal_mask). + + Args: + time_step: an instance of rl_environment.TimeStep. + agent_output: an instance of rl_agent.StepOutput. + """ + legal_actions = time_step.observations['legal_actions'][self._player_id] + legal_actions_mask = np.zeros(self._num_actions) + legal_actions_mask[legal_actions] = 1.0 + transition = Transition( + info_state=(time_step.observations['info_state'][self._player_id][:]), + action_probs=agent_output.probs, + legal_actions_mask=legal_actions_mask) + self._reservoir_buffer.add(transition) + + def _loss_avg(self, param_avg, info_states, action_probs): + avg_logit = self.avg_network.apply(param_avg, info_states) + loss_value = -jnp.sum( + action_probs * jax.nn.log_softmax(avg_logit)) / avg_logit.shape[0] + return loss_value + + def learn(self) -> Optional[float]: + """Compute the loss on sampled transitions and perform a avg-network update. + + If there are not enough elements in the buffer, no loss is computed and + `None` is returned instead. + + Returns: + The average loss obtained on this batch of transitions or `None`. + """ + if (len(self._reservoir_buffer) < self._batch_size or + len(self._reservoir_buffer) < self._min_buffer_size_to_learn): + return None + + transitions = self._reservoir_buffer.sample(self._batch_size) + info_states = np.asarray([t.info_state for t in transitions]) + action_probs = np.asarray([t.action_probs for t in transitions]) + + self._params_avg_network, self._opt_state, loss_val_avg = self._jit_update( + self._params_avg_network, self._opt_state, info_states, action_probs) + self._last_loss_value = float(loss_val_avg) + return loss_val_avg + + +class AverageNetworkFictitiousPlay(object): + """Deep Average-network Fictitious Play. + + See the file description for more information. + """ + + def __init__(self, + game: pyspiel.Game, + envs: Sequence[rl_environment.Environment], + br_rl_agents: Sequence[rl_agent.AbstractAgent], + num_episodes_per_iteration: int, + num_training_steps_per_iteration: int, + eval_every: int = 200, + logging_fn: Optional[Callable[[int, int, Dict[str, Any]], + None]] = None, + **kwargs): + """Initializes the greedy policy. + + Args: + game: The game to analyze. + envs: RL environment for each player. + br_rl_agents: Best response, e.g. DQN, agents for each player. + num_episodes_per_iteration: Number of episodes to collect samples that are + added to the reservoir buffer. + num_training_steps_per_iteration: Number of steps to train the average + policy in each iteration. + eval_every: Number of training steps between two evaluations. + logging_fn: Callable for logging the metrics. The arguments will be the + current iteration, episode and a dictionary of metrics to log. + **kwargs: kwargs passed to the AveragePolicy() constructor. + """ + self._game = game + self._envs = envs + self._num_episodes_per_iteration = num_episodes_per_iteration + self._num_training_steps_per_iteration = num_training_steps_per_iteration + self._eval_every = eval_every + self._logging_fn = logging_fn + + self._num_players = game.num_players() + self._fp_iteration = 0 + + env = self._envs[0] + info_state_size = env.observation_spec()['info_state'][0] + num_actions = env.action_spec()['num_actions'] + + self._avg_rl_agents = [ + AveragePolicy(p, br_rl_agents[p], info_state_size, num_actions, + **kwargs) for p in range(self._num_players) + ] + self._policy = rl_agent_policy.JointRLAgentPolicy( + self._game, + {idx: agent for idx, agent in enumerate(self._avg_rl_agents)}, + use_observation=env.use_observation) + self._update_distribution() + + def _update_distribution(self): + """Calculates the current distribution and updates the environments.""" + self._distribution = distribution.DistributionPolicy( + self._game, self._policy) + for env in self._envs: + env.update_mfg_distribution(self._distribution) + + @property + def policy(self) -> rl_agent_policy.JointRLAgentPolicy: + return self._policy + + def iteration(self): + """An average-network fictitious play step.""" + # Generate samples using latest best-response and add them to the reservoir + # buffer. Note that the algorithm is agnostic to the best-response policies + # as we only use them to collect new samples. They can be approximate (e.g. + # backed by a deep algorithm) or exact. + training.run_episodes( + self._envs, + self._avg_rl_agents, + num_episodes=self._num_episodes_per_iteration, + is_evaluation=False) + + # Train the average policy. + for step in range(self._num_training_steps_per_iteration): + for avg_rl_agent in self._avg_rl_agents: + avg_rl_agent.learn() + + if self._logging_fn and (step + 1) % self._eval_every == 0: + self._logging_fn( + self._fp_iteration, step, { + f'avg_agent{i}/loss': float(agent.loss) + for i, agent in enumerate(self._avg_rl_agents) + }) + + # Update the distribution. + self._update_distribution() + self._fp_iteration += 1 diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/average_network_fictitious_play_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/average_network_fictitious_play_test.py new file mode 100644 index 0000000..ce44345 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/average_network_fictitious_play_test.py @@ -0,0 +1,89 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for deep average-network fictitious play.""" +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np + +from open_spiel.python import policy +from open_spiel.python import rl_environment +from open_spiel.python.jax import dqn +from open_spiel.python.mfg.algorithms import average_network_fictitious_play +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import nash_conv +from open_spiel.python.mfg.games import crowd_modelling # pylint: disable=unused-import +import pyspiel +from open_spiel.python.utils import training + + +class AverageNetworkFictitiousPlayTest(parameterized.TestCase): + + @parameterized.named_parameters(('cpp', 'mfg_crowd_modelling'), + ('python', 'python_mfg_crowd_modelling')) + def test_train(self, name): + """Checks that the training works.""" + game = pyspiel.load_game(name) + assert game.num_players() == 1 + uniform_policy = policy.UniformRandomPolicy(game) + uniform_dist = distribution.DistributionPolicy(game, uniform_policy) + env = rl_environment.Environment( + game, mfg_distribution=uniform_dist, mfg_population=0) + info_state_size = env.observation_spec()['info_state'][0] + num_actions = env.action_spec()['num_actions'] + np.random.seed(0) + + dqn_args = { + 'batch_size': 32, + 'epsilon_end': 0.1, + 'epsilon_start': 0.1, + 'hidden_layers_sizes': [128], + 'learn_every': 32, + 'learning_rate': 0.01, + 'min_buffer_size_to_learn': 32, + 'optimizer_str': 'adam', + 'replay_buffer_capacity': 2000, + 'update_target_network_every': 32, + } + br_agent = dqn.DQN(0, info_state_size, num_actions, **dqn_args) + + args = { + 'batch_size': 32, + 'hidden_layers_sizes': [128], + 'reservoir_buffer_capacity': 100000, + 'learning_rate': 0.01, + 'min_buffer_size_to_learn': 32, + 'optimizer_str': 'adam', + 'seed': 0, + 'tau': 1.0, + } + fp = average_network_fictitious_play.AverageNetworkFictitiousPlay( + game, [env], [br_agent], + num_episodes_per_iteration=50, + num_training_steps_per_iteration=10, + **args) + + # Run several iterations. + for _ in range(5): + training.run_episodes([env], [br_agent], + num_episodes=50, + is_evaluation=False) + fp.iteration() + + # Just sanity check. + nash_conv_fp = nash_conv.NashConv(game, fp.policy) + self.assertLessEqual(nash_conv_fp.nash_conv(), 15) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/bandit_regret.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/bandit_regret.py new file mode 100644 index 0000000..a6fffcf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/bandit_regret.py @@ -0,0 +1,579 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Mean-Field Bandit Regret Minimizers from Muller et al.""" + +from typing import Optional + +import numpy as np +import scipy.optimize +import scipy.sparse.linalg + +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import utils + + +# pylint: disable=invalid-name +def get_proba_constraints_positivity(nus): + A = np.zeros((nus.shape[0], 1 + nus.shape[0])) + A[:, 1:] = -np.eye(nus.shape[0]) + return A, np.zeros(A.shape[0]) + + +def get_proba_constraint_sum_eq(nus): + A = np.ones((1, 1 + nus.shape[0])) + A[0, 0] = 0.0 + return A, np.array([1.0]) + + +def compress_internal_weights(nus, regrets): + """Compress internal weights. + + Via optimization, identify which regret timesteps are useful and which aren't + for internal regret. + + Args: + nus: Distribution per timestep. + regrets: Regret value per timestep and action. + + Returns: + Weights over nus which can be used to average the no-regret distribution. + """ + + def get_c(nus): + return np.concatenate((np.array([1.0]), np.zeros(nus.shape[0]))) + + def get_max_constraint(regrets): + regrets = np.transpose(np.array(regrets), axes=[0, 2, 1]) + regrets = regrets.reshape(-1, regrets.shape[-1]) + A = np.zeros((regrets.shape[0], 1 + regrets.shape[1])) + A[:, 1:] = regrets + A[:, 0] = -1.0 + + b = np.zeros(A.shape[0]) + return A, b + + def get_a_ub(nus, regrets): + Amax, bmax = get_max_constraint(regrets) + Apos, bpos = get_proba_constraints_positivity(nus) + return np.concatenate((Amax, Apos), axis=0), np.concatenate( + (bmax, bpos), axis=0 + ) + + c = get_c(nus) + + A_ub, b_ub = get_a_ub(nus, regrets) + A_eq, b_eq = get_proba_constraint_sum_eq(nus) + + res = scipy.optimize.linprog( + c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, options={"tol": 1e-10} + ) + new_weights = res.x + return new_weights[1:] + + +def compress_external_weights(nus, regrets, lbd=0.0): + """Compress internal weights. + + Via optimization, identify which regret timesteps are useful and which aren't + for external regret. + + Args: + nus: Distribution per timestep. + regrets: Regret value per timestep and action. + lbd: Sparsity penalty. + + Returns: + Weights over nus which can be used to average the no-regret distribution. + """ + + def get_c(nus): + return np.concatenate((np.array([1.0]), np.zeros(nus.shape[0]))) + + def get_max_constraints(nus, regrets, lbd): + A = np.zeros((regrets.shape[1], 1 + nus.shape[0])) + A[:, 0] = -1.0 + A[:, 1:] = np.transpose( + regrets + - np.sum(regrets * nus, axis=1).reshape(-1, 1) + - lbd * np.abs(regrets) + ) + return A, np.zeros(A.shape[0]) + + def get_a_ub(nus, regrets, lbd): + Amax, bmax = get_max_constraints(nus, regrets, lbd) + Apos, bpos = get_proba_constraints_positivity(nus) + return np.concatenate((Amax, Apos), axis=0), np.concatenate( + (bmax, bpos), axis=0 + ) + + c = get_c(nus) + + A_ub, b_ub = get_a_ub(nus, regrets, lbd) + A_eq, b_eq = get_proba_constraint_sum_eq(nus) + + res = scipy.optimize.linprog( + c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, options={"tol": 1e-10} + ) + new_weights = res.x + return new_weights[1:] + + +# Faster than using scipy.linalg.eig. +def power_method(w_nus): + """Quick implementation of the power method. + + Args: + w_nus: + + Returns: + Highest eigenvalue of the system. + + Raises: + ValueError: when the power method did not converge after 10.000 trials. + """ + p = np.ones(len(w_nus)) + pprime = np.dot(p, w_nus) + n_trials = 10000 + i = 0 + while np.sum(np.abs(pprime - p)) > 1e-8 and i < n_trials: + p = pprime + pprime = np.dot(p, w_nus) + pprime[pprime < 0] = 0.0 + pprime /= np.sum(pprime) + i += 1 + + if np.sum(np.abs(pprime - p)) > 1e-8 and i >= n_trials: + raise ValueError( + "Power method did not converge after {} trials.".format(n_trials) + ) + + p[p < 0] = 0.0 + return p / np.sum(p) + + +class RegretMinimizer(object): + """Base class for Regret Minimizers. + + Implements base functions for regret minimizers to implement. + + Attributes: + _game: Pyspiel game. + _regret_steps_per_step: Number of regret steps per `step` call (Maximum + number in case `stop_early` is true) + _rho_tol: If `_compress_nus` is true, minimum probability threshold ( + Probabilities below `rho_tol` will be filtered out). + _compress_nus: Whether to compress nus (Remove nus with low selection + probability) or not. + _compress_lbd: Penalty term in L1 minimization when compressing nus. + _stop_early: Whether to stop regret computation when average regret is lower + than `_stop_regret_threshold` or to keep going until + `_regret_steps_per_step` steps have been accomplished. + _stop_regret_threshold: If `stop_early` is true, average regret threshold + under which the algorithm will stop. + _policies: List of Policies + _value_estimator: Value estimation function. + _value_estimation_n: Number of runs to average _value_estimator's result on. + """ + + def __init__( + self, + game, + policies, + regret_steps_per_step: int = 1, + rho_tol: float = 1e-4, + compress_nus: bool = True, + compress_every: int = 1, + compress_lbd: float = 0.0, + stop_early: bool = True, + stop_regret_threshold: float = 1e-3, + value_estimator=utils.sample_value, + value_estimation_n: int = 1, + compute_internal_regret: bool = False, + ): + self._game = game + self._regret_steps_per_step = regret_steps_per_step + + self._compress_nus = compress_nus + self._compress_every = compress_every + self._compress_lbd = compress_lbd + + self._stop_early = stop_early + self._stop_regret_threshold = stop_regret_threshold + + self._rho_tol = rho_tol + self._policies = policies + + self._value_estimator = value_estimator + self._value_estimation_n = value_estimation_n + + self._compute_internal_regret = compute_internal_regret + + def update_policy_mus(self): + """Update the stored distributions of our policies.""" + self._policy_mus = [ + distribution.DistributionPolicy(self._game, policy) + for policy in self._policies + ] + + def get_nu(self): + """Returns current Population Distribution.""" + raise NotImplementedError + + def step(self): + """Make a regret minimization step.""" + raise NotImplementedError + + def step_for(self, T): + """Do `T` steps.""" + raise NotImplementedError + + def compute_average_regret(self): + raise NotImplementedError + + def compute_regrets(self): + raise NotImplementedError + + def reset(self, policies): + """Restart the bandit with new policies.""" + raise NotImplementedError + + +def polynomial_weight_update(weights, rewards, eta): + return weights * (1 + eta * rewards) + + +class PolynomialWeightAlgorithm(RegretMinimizer): + """Implements the Polynomial Weight Algorithm Regret minimizer. + + This is an external-regret minimizer, adapted here to the Mean-Field, + Partially-Observable case. + """ + + def __init__( + self, + game, + policies, + eta: Optional[float] = None, + regret_steps_per_step: int = 1, + rho_tol: float = 1e-4, + compress_nus: bool = True, + compress_every: int = 1, + compress_lbd: float = 0.0, + stop_early: bool = True, + stop_regret_threshold: float = 1e-3, + value_estimator=utils.sample_value, + value_estimation_n: int = 1, + compute_internal_regret: bool = False, + ): + super().__init__( + game, + policies, + regret_steps_per_step=regret_steps_per_step, + rho_tol=rho_tol, + compress_nus=compress_nus, + compress_every=compress_every, + compress_lbd=compress_lbd, + stop_early=stop_early, + stop_regret_threshold=stop_regret_threshold, + value_estimator=value_estimator, + value_estimation_n=value_estimation_n, + compute_internal_regret=compute_internal_regret, + ) + if self._compute_internal_regret: + self._ws = [np.ones(len(policies)) for _ in range(len(policies))] + self._p = np.ones(len(policies)) / (1.0 * len(policies)) + else: + self._w = np.ones(len(policies)) + + if eta is None: + assert regret_steps_per_step is not None, ( + "Both `eta` and " + "`regret_steps_per_step` were " + "None, whereas our algorithm " + "requires either value to be " + "set." + ) + self.compute_optimal_eta() + else: + self._eta = eta + + self._nus = [] + self._rewards = [] + self._policy_mus = [] + self._nu_weights = [] + + def get_all_w_nus(self): + assert self._compute_internal_regret + return [w / np.sum(w) for w in list(self._ws)] + + def get_nu(self): + if self._compute_internal_regret: + return np.sum( + self._p.reshape(-1, 1) * np.array(self.get_all_w_nus()), axis=0 + ) + else: + return self._w / np.sum(self._w) + + def compute_p(self): + assert ( + self._compute_internal_regret + ), "`p` does not exist when computing external regret." + w_nus = np.array(self.get_all_w_nus()) + + p = power_method(w_nus) + self._p = p + + def _update_weights(self, rewards): + if self._compute_internal_regret: + self._ws = [ + w * (1 + self._eta * rewards * p) for w, p in zip(self._ws, self._p) + ] + self.compute_p() + else: + self._w = self._w * (1 + self._eta * rewards) + + def step(self): + rewards = np.zeros(len(self._policies)) + nu = self.get_nu() + self._nus.append(nu) + self._nu_weights = list(self._nu_weights) + self._nu_weights.append(1.0) + + mu = utils.MixedDistribution(self._policy_mus, nu) + for _ in range(self._value_estimation_n): + for index, policy in enumerate(self._policies): + rewards[index] += self._value_estimator(policy, mu, self._game) + rewards /= self._value_estimation_n + + self._update_weights(rewards) + self._rewards.append(rewards) + + def step_for(self, T): + if self._compute_internal_regret: + print("Minimizing Internal Regret") + else: + print("Minimizing External Regret") + for t in range(T): + self.step() + if self._stop_early and (t % self._compress_every == 0): + try: + regret, weights = self.get_post_compression_regret_and_weights() + # print("{}".format(regret)) + assert np.abs(np.sum(weights) - 1.0) < 1e-8 + except: # pylint: disable=bare-except + print("Simplex method encountered an error.") + continue + if regret < self._stop_regret_threshold: + break + self.compress_nus_and_weights(weights) + + def get_post_compression_regret_and_weights(self): + """Compress the regret and weights.""" + if self._compute_internal_regret: + nu_weights = compress_internal_weights( + self.get_nus(), self.compute_regrets() + ) + regret = np.max([ + np.max(np.sum(nu_weights.reshape(-1, 1) * a, axis=0)) + for a in self.compute_regrets() + ]) + else: + nu_weights = compress_external_weights( + self.get_nus(), self.compute_regrets(), lbd=self._compress_lbd + ) + regret = np.max( + np.sum(nu_weights.reshape(-1, 1) * self.compute_regrets(), axis=0) + ) + return regret, nu_weights + + def compress_nus_and_weights(self, nu_weights): + """Run L1 optimization to only keep important members of `nus`.""" + if self._compress_nus: + try: + assert np.abs(np.sum(nu_weights) - 1.0) < 1e-8 + except: # pylint: disable=bare-except + # If the optimization was unsuccessful, do *not* compress. + return + + new_nus = [ + nu + for weight, nu in zip(nu_weights, self._nus) + if weight > self._rho_tol + ] + new_rewards = [ + reward + for weight, reward in zip(nu_weights, self._rewards) + if weight > self._rho_tol + ] + new_nu_weights = [ + weight for weight in nu_weights if weight > self._rho_tol + ] + new_nu_weights = np.array(new_nu_weights) / np.sum(new_nu_weights) + + self._nus = new_nus + self._rewards = new_rewards + self._nu_weights = new_nu_weights + + def normalize_nu_weights(self): + self._nu_weights = np.array(self._nu_weights) / np.sum(self._nu_weights) + + def get_normalized_nu_weights(self): + return np.array(self._nu_weights) / np.sum(self._nu_weights) + + def compute_regrets(self): + if self._compute_internal_regret: + regrets = [] + nus = np.array(self._nus) + rewards = np.array(self._rewards) + for action in range(rewards.shape[1]): + on_policy_values = (rewards[:, action] * nus[:, action]).reshape(-1, 1) + action_values = rewards * nus[:, action].reshape(-1, 1) + regrets.append(action_values - on_policy_values) + else: + on_policy_value = np.sum( + self._rewards * np.array(self._nus), axis=1, keepdims=True + ) + policy_value = self._rewards + regrets = policy_value - on_policy_value + return regrets + + def compute_average_regret(self): + nu_weights = self.get_normalized_nu_weights() + if self._compute_internal_regret: + regrets = 0.0 + nus = np.array(self._nus) + rewards = np.array(self._rewards) + for action in range(rewards.shape[1]): + on_policy_values = (rewards[:, action] * nus[:, action]).reshape(-1, 1) + action_values = rewards * nus[:, action].reshape(-1, 1) + regrets += np.max( + np.sum( + nu_weights.reshape(-1, 1) * (action_values - on_policy_values), + axis=0, + ) + ) + else: + regrets = np.sum( + nu_weights.reshape(-1, 1) * self.compute_regrets(), axis=0 + ) + return np.max(regrets) / len(self._nus) + + def get_nus(self): + return np.array(self._nus) + + def get_mus(self): + mus = [] + for nu in self._nus: + mu = utils.MixedDistribution(self._policy_mus, nu) + mus.append(mu) + return mus + + def get_rewards(self): + return self._rewards + + def get_mus_and_weights(self): + mus = self.get_mus() + self.normalize_nu_weights() + return mus, self._nu_weights + + def compute_optimal_eta(self): + if self._regret_steps_per_step is not None: + self._eta = min( + np.sqrt(np.log(len(self._policies)) / self._regret_steps_per_step), + 0.5, + ) + + def reset(self, policies): + if self._compute_internal_regret: + self._ws = [np.ones(len(policies)) for _ in range(len(policies))] + self._p = np.ones(len(policies)) / (1.0 * len(policies)) + else: + self._w = np.ones(len(policies)) + self._policies = policies + self._nus = [] + self._rewards = [] + self._policy_mus = [] + self._nu_weights = [] + self.update_policy_mus() + self.compute_optimal_eta() + + +class Hedge(PolynomialWeightAlgorithm): + """Hedge algorithm implementation.""" + + def __init__( + self, + game, + policies, + eta: Optional[float] = None, + regret_steps_per_step: int = 1, + rho_tol: float = 1e-4, + compress_nus: bool = True, + compress_lbd: float = 0.0, + compress_every: int = 1, + stop_early: bool = True, + stop_regret_threshold: float = 1e-3, + value_estimator=utils.sample_value, + value_estimation_n: int = 1, + compute_internal_regret: bool = False, + ): + super().__init__( + game, + policies, + eta=eta, + regret_steps_per_step=regret_steps_per_step, + rho_tol=rho_tol, + compress_nus=compress_nus, + compress_lbd=compress_lbd, + stop_early=stop_early, + stop_regret_threshold=stop_regret_threshold, + value_estimator=value_estimator, + value_estimation_n=value_estimation_n, + compute_internal_regret=compute_internal_regret, + ) + + if self._compute_internal_regret: + self._ws = [np.ones(len(policies)) for _ in range(len(policies))] + self._p = np.ones(len(policies)) / (1.0 * len(policies)) + else: + self._w = np.ones(len(policies)) + + if eta is None: + assert regret_steps_per_step is not None, ( + "Both `eta` and " + "`regret_steps_per_step` were " + "None, whereas our algorithm " + "requires either value to be " + "set." + ) + self.compute_optimal_eta() + else: + self._eta = eta + + self._compress_every = compress_every + + self._nus = [] + self._rewards = [] + self._policy_mus = [] + self._nu_weights = [] + + def _update_weights(self, rewards): + if self._compute_internal_regret: + self._ws = [ + w * np.exp(self._eta * rewards * p) for w, p in zip(self._ws, self._p) + ] + self.compute_p() + else: + self._w = self._w * np.exp(self._eta * rewards) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/benchmark.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/benchmark.py new file mode 100644 index 0000000..e1f0431 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/benchmark.py @@ -0,0 +1,75 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Simple benchmark for MFG algorithms and environments.""" + +import itertools +import time +from typing import Sequence + +from absl import app +from absl import flags + +from open_spiel.python.mfg import games # pylint: disable=unused-import +from open_spiel.python.mfg.algorithms import fictitious_play +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_list('games', + ['python_mfg_crowd_modelling', 'mfg_crowd_modelling'], + 'List of games to benchmark.') +flags.DEFINE_list( + 'parameters', ['size:10;100', 'horizon:10;100'], + 'List of parameters to sweep on (see default flag value for ' + 'syntax).') + + +def convert_param_spec(param_spec): + """Converts 'size:10;200' into ('size', [10, 200]).""" + split = param_spec.split(':', 2) + return split[0], [int(v) for v in split[1].split(';')] + + +def main(argv: Sequence[str]) -> None: + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + param_names, param_values = zip( + *[convert_param_spec(spec) for spec in FLAGS.parameters]) + header = (['game_name'] + list(param_names) + + ['fictitious_play_iteration_time']) + timing_results = [] + for game_name in FLAGS.games: + for param_tuple in itertools.product(*param_values): + result_line = [game_name] + [str(p) for p in param_tuple] + print('Computing timings for:', ' '.join(result_line)) + param_dict = dict(zip(param_names, param_tuple)) + game = pyspiel.load_game(game_name, param_dict) + t0 = time.time() + fp = fictitious_play.FictitiousPlay(game) + fp.iteration() + elapsed = time.time() - t0 + result_line.append(f'{elapsed:.4f}s') + print(' '.join(result_line)) + timing_results.append(result_line) + + print('\nRESULTS:') + print(' '.join(header)) + for line in timing_results: + print(' '.join([str(v) for v in line])) + + +if __name__ == '__main__': + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/best_response_value.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/best_response_value.py new file mode 100644 index 0000000..327e9d5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/best_response_value.py @@ -0,0 +1,116 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Does a backward pass to output a value of a best response policy.""" +from typing import Optional + +from open_spiel.python.mfg import distribution as distribution_std +from open_spiel.python.mfg import value +import pyspiel + + +class BestResponse(value.ValueFunction): + """Computes a best response value.""" + + def __init__(self, + game, + distribution: distribution_std.Distribution, + state_value: Optional[value.ValueFunction] = None, + root_state=None): + """Initializes the best response calculation. + + Args: + game: The game to analyze. + distribution: A `distribution_std.Distribution` object. + state_value: A state value function. Default to TabularValueFunction. + root_state: The state of the game at which to start. If `None`, the game + root state is used. + """ + super().__init__(game) + if root_state is None: + self._root_states = game.new_initial_states() + else: + self._root_states = [root_state] + self._distribution = distribution + self._state_value = (state_value if state_value + else value.TabularValueFunction(game)) + + self.evaluate() + + def eval_state(self, state): + """Evaluate the value of a state. + + Args: + state: a game state. + + Returns: + the optimal value of the state + + Recursively computes the value of the optimal policy given the fixed state + distribution. `self._state_value` is used as a cache for pre-computed + values. + """ + state_str = state.observation_string(pyspiel.PlayerId.DEFAULT_PLAYER_ID) + if self._state_value.has(state_str): + return self._state_value(state_str) + if state.is_terminal(): + self._state_value.set_value( + state_str, + state.rewards()[state.mean_field_population()]) + return self._state_value(state_str) + if state.current_player() == pyspiel.PlayerId.CHANCE: + self._state_value.set_value(state_str, 0.0) + for action, prob in state.chance_outcomes(): + new_state = state.child(action) + self._state_value.add_value(state_str, + prob * self.eval_state(new_state)) + return self._state_value(state_str) + if state.current_player() == pyspiel.PlayerId.MEAN_FIELD: + dist = [ + # We need to default to 0, because + # `state.distribution_support()` might contain states that + # we did not reach yet. These states should be given a + # probability of 0. + self._distribution.value_str(str_state, 0.) + for str_state in state.distribution_support() + ] + new_state = state.clone() + new_state.update_distribution(dist) + self._state_value.set_value( + state_str, + state.rewards()[state.mean_field_population()] + + self.eval_state(new_state)) + return self._state_value(state_str) + else: + assert int(state.current_player()) >= 0, "The player id should be >= 0" + max_q = max( + self.eval_state(state.child(action)) + for action in state.legal_actions()) + self._state_value.set_value( + state_str, + state.rewards()[state.mean_field_population()] + max_q) + return self._state_value(state_str) + + def evaluate(self): + """Evaluate the best response value on all states.""" + for state in self._root_states: + self.eval_state(state) + + def value(self, state, action=None): + if action is None: + return self._state_value( + state.observation_string(pyspiel.PlayerId.DEFAULT_PLAYER_ID)) + new_state = state.child(action) + return state.rewards()[state.mean_field_population()] + self._state_value( + new_state.observation_string(pyspiel.PlayerId.DEFAULT_PLAYER_ID)) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/best_response_value_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/best_response_value_test.py new file mode 100644 index 0000000..3018420 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/best_response_value_test.py @@ -0,0 +1,44 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for best_response_value.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python import policy +from open_spiel.python.mfg import value +from open_spiel.python.mfg.algorithms import best_response_value +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.games import crowd_modelling # pylint: disable=unused-import +import pyspiel + + +class BestResponseTest(parameterized.TestCase): + + @parameterized.named_parameters(('python', 'python_mfg_crowd_modelling'), + ('cpp', 'mfg_crowd_modelling')) + def test_best_response(self, name): + """Checks if the value of a policy computation works.""" + game = pyspiel.load_game(name) + uniform_policy = policy.UniformRandomPolicy(game) + dist = distribution.DistributionPolicy(game, uniform_policy) + br_value = best_response_value.BestResponse( + game, dist, value.TabularValueFunction(game)) + br_val = br_value(game.new_initial_state()) + self.assertAlmostEqual(br_val, 30.029387484327486) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/boltzmann_policy_iteration.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/boltzmann_policy_iteration.py new file mode 100644 index 0000000..c9faab5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/boltzmann_policy_iteration.py @@ -0,0 +1,35 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Boltzmann Policy Iteration.""" + +from open_spiel.python import policy as policy_lib +from open_spiel.python.mfg.algorithms import mirror_descent + + +class BoltzmannPolicyIteration(mirror_descent.MirrorDescent): + """Boltzmann Policy Iteration algorithm. + + In this algorithm, at each iteration, we update the policy by first computing + the Q-function that evaluates the current policy, and then take a softmax. + This corresponds to using Online Mirror Descent algorithm without summing + Q-functions but simply taking the latest Q-function. + """ + + def get_projected_policy(self) -> policy_lib.Policy: + """Returns the projected policy.""" + return mirror_descent.ProjectedPolicy( + self._game, + list(range(self._game.num_players())), + self._state_value, + coeff=self._lr) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/boltzmann_policy_iteration_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/boltzmann_policy_iteration_test.py new file mode 100644 index 0000000..64cc194 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/boltzmann_policy_iteration_test.py @@ -0,0 +1,46 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for Boltzmann Policy Iteration.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python.mfg import value +from open_spiel.python.mfg.algorithms import boltzmann_policy_iteration +from open_spiel.python.mfg.algorithms import nash_conv +from open_spiel.python.mfg.games import crowd_modelling # pylint: disable=unused-import +import pyspiel + + +class BoltzmannPolicyIterationTest(parameterized.TestCase): + + @parameterized.named_parameters(('python', 'python_mfg_crowd_modelling'), + ('cpp', 'mfg_crowd_modelling')) + def test_run(self, name): + """Checks if the algorithm works.""" + game = pyspiel.load_game(name) + bpi = boltzmann_policy_iteration.BoltzmannPolicyIteration( + game, value.TabularValueFunction(game)) + + for _ in range(10): + bpi.iteration() + + bpi_policy = bpi.get_policy() + nash_conv_bpi = nash_conv.NashConv(game, bpi_policy) + + self.assertAlmostEqual(nash_conv_bpi.nash_conv(), 2.75428, places=5) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/correlated_equilibrium.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/correlated_equilibrium.py new file mode 100644 index 0000000..385c750 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/correlated_equilibrium.py @@ -0,0 +1,196 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Mean-Field Correlated Equilibrium Gap & Best Response Computation Library. + +""" + +import numpy as np +from open_spiel.python.mfg.algorithms import greedy_policy +from open_spiel.python.mfg.algorithms import joint_best_response_value as jbr +from open_spiel.python.mfg.algorithms import utils + + +def get_joint_br(game, weights, mus): + br_value = jbr.JointBestResponse(game, mus, weights) + greedy_pi = greedy_policy.GreedyPolicy(game, None, br_value) + return greedy_pi, br_value + + +def compute_rewards(game, policies, mus): + return np.array([ + [utils.get_exact_value(pi, mu, game) for pi in policies] for mu in mus + ]) + + +def compute_average_welfare(game, policies, mus, rhos, nus): + """Computes average welfare. + + Args: + game: Pyspiel game. + policies: List of policies, length P + mus: List of State Distributions of length T + rhos: Temporal weights, length T + nus: Policy distribution per time, shape [T, P] + + Returns: + Average welfare. + """ + assert len(mus) == len(rhos) + assert len(rhos) == nus.shape[0] + assert len(policies) == nus.shape[1] + + rewards = compute_rewards(game, policies, mus) + return np.sum(rewards * nus * rhos.reshape(-1, 1)) + + +def cce_br(game, policies, weights, mus, nus, rewards=None): + """Computes CCE-BR. + + Args: + game: Pyspiel MFG Game. + policies: List of pyspiel policies, length P. + weights: Array of temporal weights on each distribution in `nu`, length T. + mus: List of state distributions, length T. + nus: Array of policy distribution per timestep, shape (T, P) + rewards: Optional array of policy reward per timestep, shape (T, P) + + Returns: + Best-response, computed exploitability from `rewards`. + """ + assert len(mus) == len(nus) + assert len(mus) == len(weights) + + del policies + pol, val = get_joint_br(game, weights, mus) + cce_gap_value = None + if len(rewards) > 0: # pylint: disable=g-explicit-length-test + deviation_value = val.value(game.new_initial_states()[0]) + on_policy_value = np.sum(weights * np.sum(rewards * nus, axis=1)) + cce_gap_value = deviation_value - on_policy_value + return [pol], cce_gap_value + + +def ce_br(game, policies, weights, mus, nus, rewards=None): + """Computes CE-BR. + + Args: + game: Pyspiel MFG Game. + policies: List of pyspiel policies, length P. + weights: Array of temporal weights on each distribution in `nu`, length T. + mus: List of state distributions, length T. + nus: Array of policy distribution per timestep, shape (T, P) + rewards: Optional array of policy reward per timestep, shape (T, P) + + Returns: + Best-responses, computed exploitability from `rewards`. + """ + assert len(mus) == len(nus) + assert len(mus) == len(weights) + + policy_probability = np.sum(nus, axis=0) + new_policies = [] + ce_gap_value = 0.0 + nus = np.array(nus) + weights = np.array(weights) + for policy_index in range(len(policies)): + if policy_probability[policy_index] > 0: + # Take conditional distribution + pol_weights = nus[:, policy_index] * weights + pol_proba = np.sum(pol_weights) + pol_weights = pol_weights / pol_proba + + # Prune state distribution and weights from 0.0-weightred values + new_mus = [mu for ind, mu in enumerate(mus) if pol_weights[ind] > 0] + new_weights = np.array([ + weight for ind, weight in enumerate(pol_weights) + if pol_weights[ind] > 0 + ]) + + # Compute best-response. + new_pol, new_val = get_joint_br(game, new_weights, new_mus) + new_br_val = new_val.value(game.new_initial_states()[0]) + + # Evaluate CE-Gap + if len(rewards) > 0: # pylint: disable=g-explicit-length-test + on_policy_value = np.sum( + np.array(rewards)[:, policy_index] * pol_weights) + ce_gap_value += pol_proba * (new_br_val - on_policy_value) + new_policies.append(new_pol) + return new_policies, ce_gap_value + + +def partial_ce_br(game, policies, weights, mus, nus, rewards=None): + """Computes CE-BR for a single sampled policy. + + Args: + game: Pyspiel MFG Game. + policies: List of pyspiel policies, length P. + weights: Array of temporal weights on each distribution in `nu`, length T. + mus: List of state distributions, length T. + nus: Array of policy distribution per timestep, shape (T, P) + rewards: Optional array of policy reward per timestep, shape (T, P) + + Returns: + Best-response, noisy exploitability estimation. + """ + policy_probability = np.sum(nus, axis=0) + new_policies = [] + + ce_gap_value = None + policy_index = np.random.choice(list(range(len(policies)))) + if policy_probability[policy_index] > 0: + # Take conditional distribution + pol_weights = [nu[policy_index] * weight for nu, weight in zip( + nus, weights)] + pol_proba = np.sum(pol_weights) + pol_weights = np.array(pol_weights) / pol_proba + + # Prune state distribution and weights from 0.0-weightred values + new_mus = [mu for ind, mu in enumerate(mus) if pol_weights[ind] > 0] + new_weights = [ + weight for ind, weight in enumerate(pol_weights) + if pol_weights[ind] > 0 + ] + + # Compute best-response. + new_pol, new_val = get_joint_br(game, new_weights, new_mus) + new_br_val = new_val.value(game.new_initial_states()[0]) + + # Evaluate CE-Gap + if len(rewards) > 0: # pylint: disable=g-explicit-length-test + on_policy_value = np.sum(np.array(rewards)[:, policy_index] * pol_weights) + ce_gap_value = (new_br_val - on_policy_value) + new_policies.append(new_pol) + return new_policies, ce_gap_value + + +def cce_gap(game, policies, weights, mus, nus, rewards=None, + compute_true_rewards=False): + if compute_true_rewards: + rewards = compute_rewards(game, policies, mus) + assert rewards is not None, ("Must provide rewards matrix when computing CCE " + "Gap.") + _, gap = cce_br(game, policies, weights, mus, nus, rewards=rewards) + return gap + + +def ce_gap(game, policies, weights, mus, nus, rewards=None, + compute_true_rewards=False): + if compute_true_rewards: + rewards = compute_rewards(game, policies, mus) + assert rewards is not None, ("Must provide rewards matrix when computing CE " + "Gap.") + _, gap = ce_br(game, policies, weights, mus, nus, rewards=rewards) + return gap diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/distribution.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/distribution.py new file mode 100644 index 0000000..3ec64eb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/distribution.py @@ -0,0 +1,178 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Computes the distribution of a policy.""" +import collections + +from typing import List, Tuple +from open_spiel.python import policy as policy_module +from open_spiel.python.mfg import tabular_distribution +from open_spiel.python.mfg.tabular_distribution import DistributionDict +import pyspiel + + +def type_from_states(states): + """Get node type of a list of states and assert they are the same.""" + types = [state.get_type() for state in states] + assert len(set(types)) == 1, f"types: {types}" + return types[0] + + +def _check_distribution_sum(distribution: DistributionDict, expected_sum: int): + """Sanity check that the distribution sums to a given value.""" + sum_state_probabilities = sum(distribution.values()) + assert abs(sum_state_probabilities - expected_sum) < 1e-4, ( + "Sum of probabilities of all possible states should be the number of " + f"population, it is {sum_state_probabilities}.") + + +class DistributionPolicy(tabular_distribution.TabularDistribution): + """Computes the distribution of a specified strategy.""" + + def __init__(self, + game: pyspiel.Game, + policy: policy_module.Policy, + root_state: pyspiel.State = None): + """Initializes the distribution calculation. + + Args: + game: The game to analyze. + policy: The policy we compute the distribution of. + root_state: The state of the game at which to start analysis. If `None`, + the game root states are used. + """ + super().__init__(game) + self._policy = policy + if root_state is None: + self._root_states = game.new_initial_states() + else: + self._root_states = [root_state] + self.evaluate() + + def evaluate(self): + """Evaluate the distribution over states of self._policy.""" + # List of all game states that have a non-zero probability at the current + # timestep and player ID. + current_states = self._root_states.copy() + # Distribution at the current timestep. Maps state strings to + # floats. For each group of states for a given population, these + # floats represent a probability distribution. + current_distribution = { + self.state_to_str(state): 1 for state in current_states + } + # List of all distributions computed so far. + all_distributions = [current_distribution] + + while type_from_states(current_states) != pyspiel.StateType.TERMINAL: + new_states, new_distribution = self._one_forward_step( + current_states, current_distribution, self._policy) + _check_distribution_sum(new_distribution, self.game.num_players()) + current_distribution = new_distribution + current_states = new_states + all_distributions.append(new_distribution) + + # Merge all per-timestep distributions into `self.distribution`. + for dist in all_distributions: + for state_str, prob in dist.items(): + if state_str in self.distribution: + raise ValueError( + f"{state_str} has already been seen in distribution.") + self.distribution[state_str] = prob + + def _forward_actions( + self, current_states: List[pyspiel.State], distribution: DistributionDict, + actions_and_probs_fn) -> Tuple[List[pyspiel.State], DistributionDict]: + """Applies one action to each current state. + + Args: + current_states: The states to apply actions on. + distribution: Current distribution. + actions_and_probs_fn: Function that maps one state to the corresponding + list of (action, proba). For decision nodes, this should be the policy, + and for chance nodes, this should be chance outcomes. + + Returns: + A pair: + - new_states: List of new states after applying one action on + each input state. + - new_distribution: Probabilities for each of these states. + """ + new_states = [] + new_distribution = collections.defaultdict(float) + for state in current_states: + state_str = self.state_to_str(state) + for action, prob in actions_and_probs_fn(state): + new_state = state.child(action) + new_state_str = self.state_to_str(new_state) + if new_state_str not in new_distribution: + new_states.append(new_state) + new_distribution[new_state_str] += prob * distribution[state_str] + return new_states, new_distribution + + def _one_forward_step(self, current_states: List[pyspiel.State], + distribution: DistributionDict, + policy: policy_module.Policy): + """Performs one step of the forward equation. + + Namely, this takes as input a list of current state, the current + distribution, and performs one step of the forward equation, using + actions coming from the policy or from the chance node + probabilities, or propagating the distribution to the MFG nodes. + + Args: + current_states: The states to perform the forward step on. All states are + assumed to be of the same type. + distribution: Current distribution. + policy: Policy that will be used if states + + Returns: + A pair: + - new_states: List of new states after applying one step of the + forward equation (either performing one action or doing one + distribution update). + - new_distribution: Probabilities for each of these states. + """ + state_types = type_from_states(current_states) + if state_types == pyspiel.StateType.CHANCE: + return self._forward_actions(current_states, distribution, + lambda state: state.chance_outcomes()) + + if state_types == pyspiel.StateType.MEAN_FIELD: + new_states = [] + new_distribution = {} + for state in current_states: + dist = [ + # We need to default to 0, since the support requested by + # the state in `state.distribution_support()` might have + # states that we might not have reached yet. A probability + # of 0. should be given for them. + distribution.get(str_state, 0.) + for str_state in state.distribution_support() + ] + new_state = state.clone() + new_state.update_distribution(dist) + new_state_str = self.state_to_str(new_state) + if new_state_str not in new_distribution: + new_states.append(new_state) + new_distribution[new_state_str] = 0.0 + new_distribution[new_state_str] += distribution.get( + self.state_to_str(state), 0) + return new_states, new_distribution + + if state_types == pyspiel.StateType.DECISION: + return self._forward_actions( + current_states, distribution, + lambda state: policy.action_probabilities(state).items()) + + raise ValueError( + f"Unpexpected state_stypes: {state_types}, states: {current_states}") diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/distribution_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/distribution_test.py new file mode 100644 index 0000000..c33ef5b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/distribution_test.py @@ -0,0 +1,52 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for distribution.""" + +from absl.testing import absltest +from open_spiel.python import policy +from open_spiel.python.mfg import games # pylint: disable=unused-import +from open_spiel.python.mfg.algorithms import distribution +import pyspiel + + +class DistributionTest(absltest.TestCase): + + def test_basic(self): + game = pyspiel.load_game("python_mfg_crowd_modelling") + uniform_policy = policy.UniformRandomPolicy(game) + dist = distribution.DistributionPolicy(game, uniform_policy) + state = game.new_initial_state().child(0) + self.assertAlmostEqual(dist.value(state), 1 / game.size) + + def test_state_support_outside_distrib(self): + game = pyspiel.load_game("mfg_crowd_modelling_2d", { + "initial_distribution": "[0|0]", + "initial_distribution_value": "[1.]", + }) + uniform_policy = policy.UniformRandomPolicy(game) + _ = distribution.DistributionPolicy(game, uniform_policy) + + def test_multi_pop(self): + game = pyspiel.load_game("python_mfg_predator_prey") + self.assertEqual(game.num_players(), 3) + uniform_policy = policy.UniformRandomPolicy(game) + dist = distribution.DistributionPolicy(game, uniform_policy) + for pop in range(3): + self.assertAlmostEqual( + dist.value(game.new_initial_state_for_population(pop)), 1.) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/fictitious_play.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/fictitious_play.py new file mode 100644 index 0000000..b0c9e02 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/fictitious_play.py @@ -0,0 +1,188 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Implementation of Fictitious Play from Perrin & al. + +Reference: https://arxiv.org/abs/2007.03458. +As presented, the Fictitious Play algorithm provides a robust approximation +scheme for Nash equilibrium by iteratively computing the best response +against the distribution induced by the average of the past best responses. +The provided formulation of Deep Fictitious Play mirrors this procedure, +but substitutes out the exact best reponse computation with an approximation +of best response values through a Reinforcement Learning approach (where +the RL method in question is a user-determined parameter for each iteration). + +Policy is initialized to uniform policy. +Each iteration: + 1. Compute best response against policy + 2. Update policy as weighted average of best response and current policy + (default learning rate is 1 / num_iterations + 1). + +To use fictitious play one should initialize it and run multiple iterations: +fp = FictitiousPlay(game) +for _ in range(num_iterations): + fp.iteration() +policy = fp.get_policy() +""" + +import math +from typing import List, Optional + +from open_spiel.python import policy as policy_std +from open_spiel.python.mfg import distribution as distribution_std +from open_spiel.python.mfg import value +from open_spiel.python.mfg.algorithms import best_response_value +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import greedy_policy +from open_spiel.python.mfg.algorithms import policy_value +from open_spiel.python.mfg.algorithms import softmax_policy +import pyspiel + + +class MergedPolicy(policy_std.Policy): + """Merge several policies.""" + + def __init__( + self, + game, + player_ids: List[int], + policies: List[policy_std.Policy], + distributions: List[distribution_std.Distribution], + weights: List[float], + ): + """Initializes the merged policy. + + Args: + game: The game to analyze. + player_ids: list of player ids for which this policy applies; each should + be in the range 0..game.num_players()-1. + policies: A `List[policy_std.Policy]` object. + distributions: A `List[distribution_std.Distribution]` object. + weights: A `List[float]` object. The elements should sum to 1. + """ + super().__init__(game, player_ids) + self._policies = policies + self._distributions = distributions + self._weights = weights + assert len(policies) == len(distributions), ( + f'Length mismatch {len(policies)} != {len(distributions)}') + assert len(policies) == len(weights), ( + f'Length mismatch {len(policies)} != {len(weights)}') + assert math.isclose( + sum(weights), + 1.0), (f'Weights should sum to 1, but instead sum to {sum(weights)}') + + def action_probabilities(self, state, player_id=None): + action_prob = [] + legal = state.legal_actions() + num_legal = len(legal) + for a in legal: + merged_pi = 0.0 + norm_merged_pi = 0.0 + for p, d, w in zip(self._policies, self._distributions, self._weights): + try: + merged_pi += w * d(state) * p(state)[a] + norm_merged_pi += w * d(state) + except (KeyError, ValueError): + # This happens when the state was not observed in the merged + # distributions or policies. + pass + if norm_merged_pi > 0.0: + action_prob.append((a, merged_pi / norm_merged_pi)) + else: + action_prob.append((a, 1.0 / num_legal)) + return dict(action_prob) + + +class FictitiousPlay(object): + """Computes the value of a specified strategy.""" + + def __init__(self, + game: pyspiel.Game, + lr: Optional[float] = None, + temperature: Optional[float] = None): + """Initializes the greedy policy. + + Args: + game: The game to analyze. + lr: The learning rate of mirror descent. If None, at iteration i it will + be set to 1/i. + temperature: If set, then instead of the greedy policy a softmax policy + with the specified temperature will be used to update the policy at each + iteration. + """ + self._game = game + self._lr = lr + self._temperature = temperature + self._policy = policy_std.UniformRandomPolicy(self._game) + + self._correlating_policy = self._policy + self._distribution = distribution.DistributionPolicy( + self._game, self._correlating_policy + ) + self._fp_step = 0 + + def get_policy(self): + return self._policy + + def get_correlating_policy(self): + return self._policy + + def get_correlating_distribution(self): + return distribution.DistributionPolicy(self._game, self._policy) + + def iteration(self, br_policy=None, learning_rate=None): + """Returns a new `TabularPolicy` equivalent to this policy. + + Args: + br_policy: Policy to compute the best response value for each iteration. + If none provided, the exact value is computed. + learning_rate: The learning rate. + """ + self._fp_step += 1 + + distrib = distribution.DistributionPolicy(self._game, self._policy) + + if br_policy: + br_value = policy_value.PolicyValue(self._game, distrib, br_policy) + else: + br_value = best_response_value.BestResponse( + self._game, distrib, value.TabularValueFunction(self._game)) + + # Policy is either greedy or softmax with respect to the best response if + # temperature is specified. + player_ids = list(range(self._game.num_players())) + if self._temperature is None: + pi = greedy_policy.GreedyPolicy(self._game, player_ids, br_value) + else: + pi = softmax_policy.SoftmaxPolicy(self._game, player_ids, + self._temperature, br_value) + pi = pi.to_tabular() + + distrib_pi = distribution.DistributionPolicy(self._game, pi) + + if learning_rate: + weight = learning_rate + else: + weight = self._lr if self._lr else 1.0 / (self._fp_step + 1) + + self._correlating_policy = pi + self._distribution = distrib_pi + + if math.isclose(weight, 1.0): + self._policy = pi + else: + self._policy = MergedPolicy(self._game, player_ids, [self._policy, pi], + [distrib, distrib_pi], + [1.0 - weight, weight]).to_tabular() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/fictitious_play_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/fictitious_play_test.py new file mode 100644 index 0000000..e3822ed --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/fictitious_play_test.py @@ -0,0 +1,139 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for fictitious play.""" + +from absl.testing import absltest +from absl.testing import parameterized + +import jax + +from open_spiel.python import policy +from open_spiel.python import rl_agent_policy +from open_spiel.python import rl_environment +from open_spiel.python.jax import dqn +from open_spiel.python.mfg import value +from open_spiel.python.mfg.algorithms import best_response_value +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import fictitious_play +from open_spiel.python.mfg.algorithms import greedy_policy +from open_spiel.python.mfg.algorithms import nash_conv +from open_spiel.python.mfg.algorithms import policy_value +from open_spiel.python.mfg.games import crowd_modelling +import pyspiel + +jax.config.update("jax_threefry_partitionable", False) + + +class FictitiousPlayTest(parameterized.TestCase): + + @parameterized.named_parameters(("python", "python_mfg_crowd_modelling"), + ("cpp", "mfg_crowd_modelling")) + def test_run(self, name: str): + """Checks if fictitious play works.""" + game = pyspiel.load_game(name) + fp = fictitious_play.FictitiousPlay(game) + for _ in range(10): + fp.iteration() + fp_policy = fp.get_policy() + nash_conv_fp = nash_conv.NashConv(game, fp_policy) + + self.assertAlmostEqual(nash_conv_fp.nash_conv(), 0.991, places=3) + + @parameterized.named_parameters(("at_init", True), ("at_each_step", False)) + def test_learning_rate(self, at_init: bool): + """Checks if learning rate works.""" + game = crowd_modelling.MFGCrowdModellingGame() + lr = 1.0 + fp = fictitious_play.FictitiousPlay(game, lr=lr if at_init else None) + for _ in range(10): + fp.iteration(learning_rate=None if at_init else lr) + fp_policy = fp.get_policy() + nash_conv_fp = nash_conv.NashConv(game, fp_policy) + + self.assertAlmostEqual(nash_conv_fp.nash_conv(), 55.745, places=3) + + def test_soft_max(self): + """Checks if soft-max policy works.""" + game = crowd_modelling.MFGCrowdModellingGame() + fp = fictitious_play.FictitiousPlay(game, temperature=1) + for _ in range(10): + fp.iteration() + fp_policy = fp.get_policy() + nash_conv_fp = nash_conv.NashConv(game, fp_policy) + + self.assertAlmostEqual(nash_conv_fp.nash_conv(), 1.062, places=3) + + @parameterized.named_parameters(("python", "python_mfg_crowd_modelling"), + ("cpp", "mfg_crowd_modelling")) + def test_dqn(self, name): + """Checks if fictitious play with DQN-based value function works.""" + game = pyspiel.load_game(name) + dfp = fictitious_play.FictitiousPlay(game) + + uniform_policy = policy.UniformRandomPolicy(game) + dist = distribution.DistributionPolicy(game, uniform_policy) + envs = [ + rl_environment.Environment( + game, mfg_distribution=dist, mfg_population=p) + for p in range(game.num_players()) + ] + dqn_agent = dqn.DQN( + 0, + state_representation_size=envs[0].observation_spec()["info_state"][0], + num_actions=envs[0].action_spec()["num_actions"], + hidden_layers_sizes=[256, 128, 64], + replay_buffer_capacity=100, + batch_size=5, + epsilon_start=0.02, + epsilon_end=0.01) + + br_policy = rl_agent_policy.RLAgentPolicy( + game, dqn_agent, 0, use_observation=True) + for _ in range(10): + dfp.iteration(br_policy=br_policy) + + dfp_policy = dfp.get_policy() + nash_conv_dfp = nash_conv.NashConv(game, dfp_policy) + + self.assertLessEqual(nash_conv_dfp.nash_conv(), 1.06) + + def test_average(self): + """Test the average of policies. + + Here we test that the average of values is the value of the average policy. + """ + game = crowd_modelling.MFGCrowdModellingGame() + uniform_policy = policy.UniformRandomPolicy(game) + mfg_dist = distribution.DistributionPolicy(game, uniform_policy) + br_value = best_response_value.BestResponse( + game, mfg_dist, value.TabularValueFunction(game)) + py_value = policy_value.PolicyValue(game, mfg_dist, uniform_policy, + value.TabularValueFunction(game)) + greedy_pi = greedy_policy.GreedyPolicy(game, None, br_value) + greedy_pi = greedy_pi.to_tabular() + merged_pi = fictitious_play.MergedPolicy( + game, list(range(game.num_players())), [uniform_policy, greedy_pi], + [mfg_dist, distribution.DistributionPolicy(game, greedy_pi)], + [0.5, 0.5]) + merged_pi_value = policy_value.PolicyValue(game, mfg_dist, merged_pi, + value.TabularValueFunction(game)) + + self.assertAlmostEqual( + merged_pi_value(game.new_initial_state()), + (br_value(game.new_initial_state()) + + py_value(game.new_initial_state())) / 2) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/fixed_point.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/fixed_point.py new file mode 100644 index 0000000..9e821de --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/fixed_point.py @@ -0,0 +1,77 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Fixed Point.""" + +from typing import Optional + +from open_spiel.python import policy as policy_lib +from open_spiel.python.mfg import value +from open_spiel.python.mfg.algorithms import best_response_value +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import greedy_policy +from open_spiel.python.mfg.algorithms import softmax_policy +import pyspiel + + +class FixedPoint(object): + """The fixed point algorithm. + + This algorithm is based on Banach-Picard iterations for the fixed point + operator characterizing the Nash equilibrium. At each iteration, the policy is + updated by computing a best response against the current mean-field or a + regularized version that is obtained by taking a softmax with respect to the + optimal Q-function, and the mean-field is updated by taking the mean-field + induced by the current policy. + """ + + def __init__(self, game: pyspiel.Game, temperature: Optional[float] = None): + """Initializes the algorithm. + + Args: + game: The game to analyze. + temperature: If set, then instead of the greedy policy a softmax policy + with the specified temperature will be used to update the policy at each + iteration. + """ + self._game = game + self._temperature = temperature + self._policy = policy_lib.UniformRandomPolicy(self._game) + self._distribution = distribution.DistributionPolicy(game, self._policy) + + def iteration(self): + """An itertion of Fixed Point.""" + # Calculate the current distribution and the best response. + distrib = distribution.DistributionPolicy(self._game, self._policy) + br_value = best_response_value.BestResponse( + self._game, distrib, value.TabularValueFunction(self._game)) + + # Policy is either greedy or softmax with respect to the best response if + # temperature is specified. + player_ids = list(range(self._game.num_players())) + if self._temperature is None: + self._policy = greedy_policy.GreedyPolicy(self._game, player_ids, + br_value) + else: + self._policy = softmax_policy.SoftmaxPolicy(self._game, player_ids, + self._temperature, br_value) + + self._distribution = distribution.DistributionPolicy( + self._game, self._policy) + + def get_policy(self) -> policy_lib.Policy: + return self._policy + + @property + def distribution(self) -> distribution.DistributionPolicy: + return self._distribution diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/fixed_point_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/fixed_point_test.py new file mode 100644 index 0000000..c724ef0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/fixed_point_test.py @@ -0,0 +1,59 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for Fixed Point.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python.mfg.algorithms import fixed_point +from open_spiel.python.mfg.algorithms import nash_conv +from open_spiel.python.mfg.games import crowd_modelling # pylint: disable=unused-import +import pyspiel + + +class FixedPointTest(parameterized.TestCase): + + @parameterized.named_parameters(('python', 'python_mfg_crowd_modelling'), + ('cpp', 'mfg_crowd_modelling')) + def test_run(self, name): + """Checks if the algorithm works.""" + game = pyspiel.load_game(name) + fixed_p = fixed_point.FixedPoint(game) + + for _ in range(10): + fixed_p.iteration() + + fixed_p_policy = fixed_p.get_policy() + nash_conv_fixed_p = nash_conv.NashConv(game, fixed_p_policy) + + self.assertAlmostEqual(nash_conv_fixed_p.nash_conv(), 55.745, places=3) + + @parameterized.named_parameters(('python', 'python_mfg_crowd_modelling'), + ('cpp', 'mfg_crowd_modelling')) + def test_softmax(self, name): + """Checks the softmax policy.""" + game = pyspiel.load_game(name) + fixed_p = fixed_point.FixedPoint(game, temperature=10.0) + + for _ in range(10): + fixed_p.iteration() + + fixed_p_policy = fixed_p.get_policy() + nash_conv_fixed_p = nash_conv.NashConv(game, fixed_p_policy) + + self.assertAlmostEqual(nash_conv_fixed_p.nash_conv(), 2.421, places=3) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/greedy_policy.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/greedy_policy.py new file mode 100644 index 0000000..4e0f98b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/greedy_policy.py @@ -0,0 +1,51 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Computes a greedy policy from a value.""" +import numpy as np + +from open_spiel.python import policy as policy_std +from open_spiel.python.mfg import value + + +class GreedyPolicy(policy_std.Policy): + """Computes the greedy policy of a value.""" + + def __init__(self, game, player_ids, state_action_value: value.ValueFunction): + """Initializes the greedy policy. + + Args: + game: The game to analyze. + player_ids: list of player ids for which this policy applies; each should + be in the range 0..game.num_players()-1. + state_action_value: A state-action value function. + """ + super(GreedyPolicy, self).__init__(game, player_ids) + self._state_action_value = state_action_value + + def action_probabilities(self, state, player_id=None): + q = [ + self._state_action_value(state, action) + for action in state.legal_actions() + ] + amax_q = [0.0 for _ in state.legal_actions()] + amax_q[np.argmax(q)] = 1.0 + return dict(zip(state.legal_actions(), amax_q)) + + def action(self, state, player_id=None): + q = [ + self._state_action_value(state, action) + for action in state.legal_actions() + ] + return state.legal_actions()[np.argmax(q)] diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/greedy_policy_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/greedy_policy_test.py new file mode 100644 index 0000000..f473d34 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/greedy_policy_test.py @@ -0,0 +1,59 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for greedy_policy.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python import policy +from open_spiel.python.mfg import value +from open_spiel.python.mfg.algorithms import best_response_value +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import greedy_policy +from open_spiel.python.mfg.algorithms import policy_value +from open_spiel.python.mfg.games import crowd_modelling # pylint: disable=unused-import +import pyspiel + + +class GreedyPolicyTest(parameterized.TestCase): + + @parameterized.named_parameters(('python', 'python_mfg_crowd_modelling'), + ('cpp', 'mfg_crowd_modelling')) + def test_greedy(self, name): + """Check if the greedy policy works as expected. + + The test checks that a greedy policy with respect to an optimal value is + an optimal policy. + + Args: + name: Name of the game. + """ + game = pyspiel.load_game(name) + uniform_policy = policy.UniformRandomPolicy(game) + dist = distribution.DistributionPolicy(game, uniform_policy) + br_value = best_response_value.BestResponse( + game, dist, value.TabularValueFunction(game)) + br_val = br_value(game.new_initial_state()) + + greedy_pi = greedy_policy.GreedyPolicy(game, None, br_value) + greedy_pi = greedy_pi.to_tabular() + pybr_value = policy_value.PolicyValue(game, dist, greedy_pi, + value.TabularValueFunction(game)) + pybr_val = pybr_value(game.new_initial_state()) + self.assertAlmostEqual(br_val, pybr_val) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/joint_best_response_value.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/joint_best_response_value.py new file mode 100644 index 0000000..8c47929 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/joint_best_response_value.py @@ -0,0 +1,136 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Outputs value of best response policy against set of distributions.""" +import collections +from typing import List +from open_spiel.python.mfg import distribution as distribution_std +from open_spiel.python.mfg import value +import pyspiel + + +class JointBestResponse(value.ValueFunction): + """Computes a best response value.""" + + def __init__( + self, + game, + distributions: List[distribution_std.Distribution], + weights, + root_state=None, + ): + """Initializes the joint best response computation. + + The joint best response is computed under the following premisse : the + player does not know which distribution it is playing against. It only knows + their probabilities, and thus tries to find a best response against their + mixture. + + This is accomplished by recursively computing the action that maximizes the + marginalized value of each node over each distribution. + + Warning : This version only works on games whose observation space & + dynamics do NOT depend on state distribution. + + Args: + game: The game to analyze. + distributions: A list of `distribution_std.Distribution`. + weights: A list of floats the same length as `distributions`. Represents + the mixture weight of each member of `distributions`. + root_state: The state of the game at which to start. If `None`, the game + root state is used. + """ + super().__init__(game) + if root_state is None: + self._root_states = game.new_initial_states() + else: + self._root_states = [root_state] + self._distributions = distributions + self._weights = weights + # Maps states (in string format) to the value of the optimal policy given + # 'self._distribution'. + self._state_value = collections.defaultdict(float) + self.evaluate() + + def get_state_rewards(self, mu_states): + return sum([ + weight * mu_state.rewards()[mu_state.mean_field_population()] + for weight, mu_state in zip(self._weights, mu_states) + ]) + + def get_new_mu_states(self, mu_states): + new_mu_states = [] + for mu_ind, mu_state in enumerate(mu_states): + dist = [ + self._distributions[mu_ind].value_str(str_state, 0.0) + for str_state in mu_state.distribution_support() + ] + new_mu_state = mu_state.clone() + new_mu_state.update_distribution(dist) + new_mu_states.append(new_mu_state) + return new_mu_states + + def eval_state(self, mu_states): + """Evaluate the value of a state. + + Args: + mu_states: A list of game states, one for each `distributions` member. + + Returns: + The optimal value of the state. + + Recursively computes the value of the optimal policy given the fixed state + distributions. `self._state_value` is used as a cache for pre-computed + values. + """ + state = mu_states[0] + state_str = state.observation_string(pyspiel.PlayerId.DEFAULT_PLAYER_ID) + if state_str in self._state_value: + return self._state_value[state_str] + if state.is_terminal(): + self._state_value[state_str] = self.get_state_rewards(mu_states) + return self._state_value[state_str] + if state.current_player() == pyspiel.PlayerId.CHANCE: + self._state_value[state_str] = 0.0 + for action, prob in state.chance_outcomes(): + new_mu_states = [mu_state.child(action) for mu_state in mu_states] + self._state_value[state_str] += prob * self.eval_state(new_mu_states) + return self._state_value[state_str] + if state.current_player() == pyspiel.PlayerId.MEAN_FIELD: + new_mu_states = self.get_new_mu_states(mu_states) + self._state_value[state_str] = self.get_state_rewards( + mu_states + ) + self.eval_state(new_mu_states) + return self._state_value[state_str] + else: + assert int(state.current_player()) >= 0, "The player id should be >= 0" + max_q = max( + self.eval_state([mu_state.child(action) for mu_state in mu_states]) + for action in state.legal_actions() + ) + self._state_value[state_str] = self.get_state_rewards(mu_states) + max_q + return self._state_value[state_str] + + def evaluate(self): + """Evaluate the best response value on all states.""" + for state in self._root_states: + self.eval_state([state.clone() for _ in self._distributions]) + + def value(self, state, action=None): + if action is None: + return self._state_value[state.observation_string( + pyspiel.PlayerId.DEFAULT_PLAYER_ID)] + new_state = state.child(action) + return state.rewards()[state.mean_field_population()] + self._state_value[ + new_state.observation_string(pyspiel.PlayerId.DEFAULT_PLAYER_ID)] diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/mf_psro.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/mf_psro.py new file mode 100644 index 0000000..01e33f7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/mf_psro.py @@ -0,0 +1,131 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Mean-Field PSRO. + +As implemented in Muller et al., 2021, https://arxiv.org/abs/2111.08350 +""" + +from open_spiel.python import policy as policy_std +from open_spiel.python.algorithms import get_all_states +from open_spiel.python.mfg.algorithms import correlated_equilibrium +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import greedy_policy + + +def dict_equal(dic1, dic2): + return all([dic1[a] == dic2[a] for a in dic1]) and all( + [dic1[a] == dic2[a] for a in dic2] + ) + + +def equal_policies(pol1, pol2, all_states): + assert isinstance(pol1, greedy_policy.GreedyPolicy) + equal = True + for state_key in all_states: + state = all_states[state_key] + try: + equal = equal and dict_equal(pol1(state), pol2(state)) + except KeyError: + equal = False + except ValueError: + continue + return equal + + +def filter_policies(policies, new_policies, all_states): + all_policies = policies + no_novelty = True + for new_policy in new_policies: + if all([ + not equal_policies(new_policy, policy, all_states) + for policy in all_policies + ]): + all_policies.append(new_policy) + no_novelty = False + return all_policies, no_novelty + + +class MeanFieldPSRO: + """Mean-Field PSRO.""" + + def __init__( + self, + game, + regret_minimizer, + regret_steps_per_step, + best_responder=correlated_equilibrium.cce_br, + filter_new_policies=False, + increase_precision_when_done_early=False, + ): + self._game = game + self._regret_minimizer = regret_minimizer + self._regret_steps_per_step = regret_steps_per_step + + self._filter_new_policies = filter_new_policies + self._increase_precision_when_done_early = ( + increase_precision_when_done_early + ) + + self._best_responder = best_responder + + self._nus = [[1.0]] + self._policies = [policy_std.UniformRandomPolicy(self._game)] + self._mus = [distribution.DistributionPolicy(game, self._policies[0])] + self._weights = [1.0] + + self._all_states = None + if self._filter_new_policies: + self._all_states = get_all_states.get_all_states(game) + + def step(self): + """Does a best-response step.""" + rewards = self._regret_minimizer.get_rewards() + + print("Computing best response.") + new_policies, gap_value = self._best_responder( + self._game, self._policies, self._weights, self._mus, self._nus, rewards + ) + + no_novelty = False + if self._filter_new_policies: + print("Filtering best responses") + self._policies, no_novelty = filter_policies( + self._policies, new_policies, self._all_states + ) + else: + self._policies = self._policies + new_policies + + if no_novelty: + print("No new policy added, PSRO has terminated.") + if self._increase_precision_when_done_early: + print("Increasing precision") + self._regret_minimizer.increase_precision_x_fold(2.0) + self._regret_steps_per_step *= 2 + self._regret_minimizer.restart() + self._regret_minimizer.step_for(self._regret_steps_per_step) + else: + print("Minimizing regret") + self._regret_minimizer.reset(self._policies) + self._regret_minimizer.step_for(self._regret_steps_per_step) + + average_regret = self._regret_minimizer.compute_average_regret() + print("Average Regret : {}".format(average_regret)) + + self._mus, self._weights = self._regret_minimizer.get_mus_and_weights() + self._nus = self._regret_minimizer.get_nus() + return average_regret, gap_value + + def get_equilibrium(self): + return self._policies, self._nus, self._mus, self._weights diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/mirror_descent.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/mirror_descent.py new file mode 100644 index 0000000..3187392 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/mirror_descent.py @@ -0,0 +1,171 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Mirror Descent (https://arxiv.org/pdf/2103.00623.pdf).""" + +from typing import Dict, List, Optional + +import numpy as np + +from open_spiel.python import policy as policy_lib +from open_spiel.python.mfg import value +from open_spiel.python.mfg.algorithms import distribution +import pyspiel + + +def softmax_projection(logits): + max_l = max(logits) + exp_l = [np.exp(l - max_l) for l in logits] + norm_exp = sum(exp_l) + return [l / norm_exp for l in exp_l] + + +class ProjectedPolicy(policy_lib.Policy): + """Project values on the policy simplex.""" + + def __init__( + self, + game: pyspiel.Game, + player_ids: List[int], + state_value: value.ValueFunction, + coeff: float = 1.0, + ): + """Initializes the projected policy. + + Args: + game: The game to analyze. + player_ids: list of player ids for which this policy applies; each should + be in the range 0..game.num_players()-1. + state_value: The (cumulative) state value to project. + coeff: Coefficient for the values of the states. + """ + super(ProjectedPolicy, self).__init__(game, player_ids) + self._state_value = state_value + self._coeff = coeff + + def value(self, state: pyspiel.State, action: Optional[int] = None) -> float: + if action is None: + return self._state_value( + state.observation_string(pyspiel.PlayerId.DEFAULT_PLAYER_ID)) + else: + new_state = state.child(action) + return state.rewards()[0] + self._state_value( + new_state.observation_string(pyspiel.PlayerId.DEFAULT_PLAYER_ID)) + + def action_probabilities(self, + state: pyspiel.State, + player_id: Optional[int] = None) -> Dict[int, float]: + del player_id + action_logit = [(a, self._coeff * self.value(state, action=a)) + for a in state.legal_actions()] + action, logit = zip(*action_logit) + return dict(zip(action, softmax_projection(logit))) + + +class MirrorDescent(object): + """The mirror descent algorithm.""" + + def __init__(self, + game: pyspiel.Game, + state_value: Optional[value.ValueFunction] = None, + lr: float = 0.01, + root_state: Optional[pyspiel.State] = None): + """Initializes mirror descent. + + Args: + game: The game, + state_value: A state value function. Default to TabularValueFunction. + lr: The learning rate of mirror descent, + root_state: The state of the game at which to start. If `None`, the game + root state is used. + """ + self._game = game + if root_state is None: + self._root_states = game.new_initial_states() + else: + self._root_states = [root_state] + self._policy = policy_lib.UniformRandomPolicy(game) + self._distribution = distribution.DistributionPolicy(game, self._policy) + self._md_step = 0 + self._lr = lr + + self._state_value = ( + state_value if state_value else value.TabularValueFunction(game)) + self._cumulative_state_value = value.TabularValueFunction(game) + + def get_state_value(self, state: pyspiel.State, + learning_rate: float) -> float: + """Returns the value of the state.""" + if state.is_terminal(): + return state.rewards()[state.mean_field_population()] + + if state.current_player() == pyspiel.PlayerId.CHANCE: + v = 0.0 + for action, prob in state.chance_outcomes(): + new_state = state.child(action) + v += prob * self.eval_state(new_state, learning_rate) + return v + + if state.current_player() == pyspiel.PlayerId.MEAN_FIELD: + dist_to_register = state.distribution_support() + dist = [ + self._distribution.value_str(str_state, 0.0) + for str_state in dist_to_register + ] + new_state = state.clone() + new_state.update_distribution(dist) + return (state.rewards()[state.mean_field_population()] + + self.eval_state(new_state, learning_rate)) + + assert int(state.current_player()) >= 0, "The player id should be >= 0" + v = 0.0 + for action, prob in self._policy.action_probabilities(state).items(): + new_state = state.child(action) + v += prob * self.eval_state(new_state, learning_rate) + return state.rewards()[state.mean_field_population()] + v + + def eval_state(self, state: pyspiel.State, learning_rate: float) -> float: + """Evaluate the value of a state and update the cumulative sum.""" + state_str = state.observation_string(pyspiel.PlayerId.DEFAULT_PLAYER_ID) + # Return the already calculated value if present. + if self._state_value.has(state_str): + return self._state_value(state_str) + # Otherwise, calculate the value of the state. + v = self.get_state_value(state, learning_rate) + self._state_value.set_value(state_str, v) + # Update the cumulative value of the state. + self._cumulative_state_value.add_value(state_str, learning_rate * v) + return v + + def get_projected_policy(self) -> policy_lib.Policy: + """Returns the projected policy.""" + return ProjectedPolicy(self._game, list(range(self._game.num_players())), + self._cumulative_state_value) + + def iteration(self, learning_rate: Optional[float] = None): + """An iteration of Mirror Descent.""" + self._md_step += 1 + # TODO(sertan): Fix me. + self._state_value = value.TabularValueFunction(self._game) + for state in self._root_states: + self.eval_state(state, learning_rate if learning_rate else self._lr) + self._policy = self.get_projected_policy() + self._distribution = distribution.DistributionPolicy( + self._game, self._policy) + + def get_policy(self) -> policy_lib.Policy: + return self._policy + + @property + def distribution(self) -> distribution.DistributionPolicy: + return self._distribution diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/mirror_descent_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/mirror_descent_test.py new file mode 100644 index 0000000..6520e3d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/mirror_descent_test.py @@ -0,0 +1,44 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for mirror descent.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python.mfg import value +from open_spiel.python.mfg.algorithms import mirror_descent +from open_spiel.python.mfg.algorithms import nash_conv +from open_spiel.python.mfg.games import crowd_modelling # pylint: disable=unused-import +import pyspiel + + +class MirrorDescentTest(parameterized.TestCase): + + @parameterized.named_parameters(('python', 'python_mfg_crowd_modelling'), + ('cpp', 'mfg_crowd_modelling')) + def test_fp(self, name): + """Checks if mirror descent works.""" + game = pyspiel.load_game(name) + md = mirror_descent.MirrorDescent(game, value.TabularValueFunction(game)) + for _ in range(10): + md.iteration() + md_policy = md.get_policy() + nash_conv_md = nash_conv.NashConv(game, md_policy) + + self.assertAlmostEqual(nash_conv_md.nash_conv(), 2.2730324915546056) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/munchausen_deep_mirror_descent.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/munchausen_deep_mirror_descent.py new file mode 100644 index 0000000..a711fe0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/munchausen_deep_mirror_descent.py @@ -0,0 +1,563 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# TODO(sertan): Add link to the reference paper. +"""Munchausen DQN Agent and deep online mirror descent implementation.""" + +import collections +from typing import Any, Callable, Dict, Optional, Tuple + +from absl import logging +import haiku as hk +import jax +import jax.numpy as jnp +import numpy as np +import optax +import rlax + +from open_spiel.python import rl_agent +from open_spiel.python import rl_agent_policy +from open_spiel.python.mfg.algorithms import distribution as distribution_std +from open_spiel.python.utils.replay_buffer import ReplayBuffer + +Transition = collections.namedtuple( + "Transition", + "info_state action legal_one_hots reward next_info_state is_final_step " + "next_legal_one_hots") + +# Penalty for illegal actions in action selection. In epsilon-greedy, this will +# prevent them from being selected and in soft-max the probabilities will be +# (close to) 0. +ILLEGAL_ACTION_PENALTY = -1e9 +# Lower bound for action probabilities to prevent NaNs in log terms. +MIN_ACTION_PROB = 1e-6 + + +def _copy_params(params): + """Returns a copy of the params.""" + return jax.tree_util.tree_map(lambda x: x.copy(), params) + + +class MunchausenDQN(rl_agent.AbstractAgent): + """Munchausen DQN Agent implementation in JAX.""" + + def __init__( + self, + player_id, + state_representation_size, + num_actions, + # Training options. + batch_size: int = 128, + learn_every: int = 64, + epsilon_start: float = 0.1, + epsilon_end: float = 0.1, + epsilon_decay_duration: int = int(20e6), + epsilon_power: float = 1.0, + discount_factor: float = 1.0, + # Replay buffer options. + replay_buffer_capacity: int = int(2e5), + min_buffer_size_to_learn: int = 1000, + replay_buffer_class=ReplayBuffer, + # Loss and optimizer options. + optimizer: str = "sgd", + learning_rate: float = 0.01, + loss: str = "mse", + huber_loss_parameter: float = 1.0, + # Network options. + update_target_network_every: int = 19200, + hidden_layers_sizes=128, + qnn_params_init=None, + # Munchausen options. + tau=0.05, + alpha=0.9, + reset_replay_buffer_on_update: bool = True, + gradient_clipping: Optional[float] = None, + with_munchausen: bool = True, + seed: int = 42): + """Initialize the Munchausen DQN agent.""" + self.player_id = int(player_id) + self._num_actions = num_actions + + self._batch_size = batch_size + self._learn_every = learn_every + self._epsilon_start = epsilon_start + self._epsilon_end = epsilon_end + self._epsilon_decay_duration = epsilon_decay_duration + self._epsilon_power = epsilon_power + self._discount_factor = discount_factor + self._reset_replay_buffer_on_update = reset_replay_buffer_on_update + + self._tau = tau + self._alpha = alpha + + # If true, the target uses Munchausen penalty terms. + self._with_munchausen = with_munchausen + + self._prev_action = None + self._prev_legal_action = None + self._prev_time_step = None + + # Used to select actions. + self._rs = np.random.RandomState(seed) + + # Step counter to keep track of learning, eps decay and target network. + self._step_counter = 0 + + # Keep track of the last training loss achieved in an update step. + self._last_loss_value = None + + # Create the replay buffer. + if not isinstance(replay_buffer_capacity, int): + raise ValueError("Replay buffer capacity not an integer.") + self._replay_buffer = replay_buffer_class(replay_buffer_capacity) + self._min_buffer_size_to_learn = min_buffer_size_to_learn + + # Create the Q-network. + self._update_target_network_every = update_target_network_every + + if isinstance(hidden_layers_sizes, int): + hidden_layers_sizes = [hidden_layers_sizes] + + def network(x): + mlp = hk.nets.MLP(hidden_layers_sizes + [num_actions]) + return mlp(x) + + self.hk_network = hk.without_apply_rng(hk.transform(network)) + self.hk_network_apply = jax.jit(self.hk_network.apply) + + if qnn_params_init: + self._params_q_network = _copy_params(qnn_params_init) + self._params_target_q_network = _copy_params(qnn_params_init) + self._params_prev_q_network = _copy_params(qnn_params_init) + else: + rng = jax.random.PRNGKey(seed) + x = jnp.ones([1, state_representation_size]) + self._params_q_network = self.hk_network.init(rng, x) + self._params_target_q_network = self.hk_network.init(rng, x) + self._params_prev_q_network = self.hk_network.init(rng, x) + + # Create the loss function and the optimizer. + if loss == "mse": + self._loss_func = lambda x: jnp.mean(x**2) + elif loss == "huber": + self._loss_func = lambda x: jnp.mean( # pylint: disable=g-long-lambda + rlax.huber_loss(x, huber_loss_parameter)) + else: + raise ValueError("Not implemented, choose from 'mse', 'huber'.") + + if optimizer == "adam": + optimizer = optax.adam(learning_rate) + elif optimizer == "sgd": + optimizer = optax.sgd(learning_rate) + else: + raise ValueError("Not implemented, choose from 'adam' and 'sgd'.") + + # Clipping the gradients prevent divergence and allow more stable training. + if gradient_clipping: + optimizer = optax.chain(optimizer, + optax.clip_by_global_norm(gradient_clipping)) + + opt_init, opt_update = optimizer.init, optimizer.update + + def _stochastic_gradient_descent(params, opt_state, gradient): + updates, opt_state = opt_update(gradient, opt_state) + new_params = optax.apply_updates(params, updates) + return new_params, opt_state + + self._opt_update_fn = _stochastic_gradient_descent + self._opt_state = opt_init(self._params_q_network) + self._loss_and_grad = jax.value_and_grad(self._loss, has_aux=False) + self._jit_update = jax.jit(self._get_update()) + + def step(self, + time_step, + is_evaluation=False, + add_transition_record=True, + use_softmax=False, + tau: Optional[float] = None): + """Returns the action to be taken and updates the Q-network if needed. + + Args: + time_step: an instance of rl_environment.TimeStep. + is_evaluation: bool, whether this is a training or evaluation call. + add_transition_record: Whether to add to the replay buffer on this step. + use_softmax: Uses soft-max action selection. + tau: Tau for soft-max action selection. If None, then the training value + will be used. + + Returns: + A `rl_agent.StepOutput` containing the action probs and chosen action. + """ + + # Act step: don't act at terminal info states or if its not our turn. + if (not time_step.last()) and (time_step.is_simultaneous_move() or + self.player_id == int( + time_step.current_player())): + # Act according to epsilon-greedy or soft-max for current Q-network. + info_state = time_step.observations["info_state"][self.player_id] + legal_actions = time_step.observations["legal_actions"][self.player_id] + if use_softmax: + action, probs = self._softmax(info_state, legal_actions, + self._tau if tau is None else tau) + else: + epsilon = self._get_epsilon(is_evaluation) + action, probs = self._epsilon_greedy(info_state, legal_actions, epsilon) + else: + action = None + probs = [] + + # Don't mess up with the state during evaluation. + if not is_evaluation: + self._step_counter += 1 + + if self._step_counter % self._learn_every == 0: + self._last_loss_value = self.learn() + + if self._step_counter % self._update_target_network_every == 0: + self._params_target_q_network = _copy_params(self._params_q_network) + + if self._prev_time_step and add_transition_record: + # We may omit record adding here if it's done elsewhere. + self.add_transition(self._prev_time_step, self._prev_action, + self._prev_legal_action, time_step) + + if time_step.last(): # prepare for the next episode. + self._prev_time_step = None + self._prev_action = None + self._prev_legal_action = None + else: + self._prev_time_step = time_step + self._prev_action = action + self._prev_legal_action = legal_actions + + return rl_agent.StepOutput(action=action, probs=probs) + + def add_transition(self, prev_time_step, prev_action, prev_legal_actions, + time_step): + """Adds the new transition using `time_step` to the replay buffer. + + Adds the transition from `self._prev_time_step` to `time_step` by + `self._prev_action`. + + Args: + prev_time_step: prev ts, an instance of rl_environment.TimeStep. + prev_action: int, action taken at `prev_time_step`. + prev_legal_actions: Previous legal actions. + time_step: current ts, an instance of rl_environment.TimeStep. + """ + assert prev_time_step is not None + next_legal_actions = ( + time_step.observations["legal_actions"][self.player_id]) + next_legal_one_hots = self._to_one_hot(next_legal_actions) + # Added for deep OMD: keep previous action mask. + prev_legal_one_hots = self._to_one_hot(prev_legal_actions) + + transition = Transition( + info_state=( + prev_time_step.observations["info_state"][self.player_id][:]), + action=prev_action, + legal_one_hots=prev_legal_one_hots, + reward=time_step.rewards[self.player_id], + next_info_state=time_step.observations["info_state"][self.player_id][:], + is_final_step=float(time_step.last()), + next_legal_one_hots=next_legal_one_hots) + self._replay_buffer.add(transition) + + def _get_action_probs(self, params, info_states, legal_one_hots): + """Returns the soft-max action probability distribution.""" + q_values = self.hk_network.apply(params, info_states) + legal_q_values = q_values + (1 - legal_one_hots) * ILLEGAL_ACTION_PENALTY + return jax.nn.softmax(legal_q_values / self._tau) + + def _loss(self, params, params_target, params_prev, info_states, actions, + legal_one_hots, rewards, next_info_states, are_final_steps, + next_legal_one_hots): + """Returns the Munchausen loss.""" + # Target with 2 parts: reward and value for next state; each part is + # modified according to the Munchausen trick. + q_values = self.hk_network.apply(params, info_states) + target_q_values = self.hk_network.apply(params_target, next_info_states) + + r_term = rewards + if self._with_munchausen: + probs = self._get_action_probs(params_prev, info_states, legal_one_hots) + prob_prev_action = jnp.sum(probs * actions, axis=-1) + penalty_pi = jnp.log(jnp.clip(prob_prev_action, MIN_ACTION_PROB)) + r_term += self._alpha * self._tau * penalty_pi + + if self._with_munchausen: + # Average value over actions + extra log term. + # We clip the probabilities to avoid NaNs in the log term. + next_probs = self._get_action_probs(params_prev, next_info_states, + next_legal_one_hots) + q_term_values = next_probs * ( + target_q_values - + self._tau * jnp.log(jnp.clip(next_probs, MIN_ACTION_PROB))) + q_term = jnp.sum(q_term_values, axis=-1) + else: + # Maximum value. + max_next_q = jnp.max( + target_q_values + (1 - legal_one_hots) * ILLEGAL_ACTION_PENALTY, + axis=-1) + max_next_q = jax.numpy.where( + 1 - are_final_steps, max_next_q, jnp.zeros_like(max_next_q)) + q_term = max_next_q + + target = (r_term + (1 - are_final_steps) * self._discount_factor * q_term) + target = jax.lax.stop_gradient(target) + + predictions = jnp.sum(q_values * actions, axis=-1) + + return self._loss_func(predictions - target) + + def _get_update(self): + """Returns the gradient update function.""" + + def update(params, params_target, params_prev, opt_state, info_states, + actions, legal_one_hots, rewards, next_info_states, + are_final_steps, next_legal_one_hots): + loss_val, grad_val = self._loss_and_grad(params, params_target, + params_prev, info_states, + actions, legal_one_hots, rewards, + next_info_states, + are_final_steps, + next_legal_one_hots) + new_params, new_opt_state = self._opt_update_fn(params, opt_state, + grad_val) + return new_params, new_opt_state, loss_val + + return update + + def _to_one_hot(self, a, value=1.0): + """Returns the one-hot encoding of the action.""" + a_one_hot = np.zeros(self._num_actions) + a_one_hot[a] = value + return a_one_hot + + def learn(self): + """Compute the loss on sampled transitions and perform a Q-network update. + + If there are not enough elements in the buffer, no loss is computed and + `None` is returned instead. + + Returns: + The average loss obtained on this batch of transitions or `None`. + """ + + if (len(self._replay_buffer) < self._batch_size or + len(self._replay_buffer) < self._min_buffer_size_to_learn): + return None + + transitions = self._replay_buffer.sample(self._batch_size) + info_states = np.asarray([t.info_state for t in transitions]) + actions = np.asarray([self._to_one_hot(t.action) for t in transitions]) + legal_one_hots = np.asarray([t.legal_one_hots for t in transitions]) + rewards = np.asarray([t.reward for t in transitions]) + next_info_states = np.asarray([t.next_info_state for t in transitions]) + are_final_steps = np.asarray([t.is_final_step for t in transitions]) + next_legal_one_hots = np.asarray( + [t.next_legal_one_hots for t in transitions]) + + self._params_q_network, self._opt_state, loss_val = self._jit_update( + self._params_q_network, self._params_target_q_network, + self._params_prev_q_network, self._opt_state, info_states, actions, + legal_one_hots, rewards, next_info_states, are_final_steps, + next_legal_one_hots) + + return loss_val + + def _epsilon_greedy(self, info_state, legal_actions, epsilon): + """Returns a valid epsilon-greedy action and action probabilities. + + Args: + info_state: hashable representation of the information state. + legal_actions: list of legal actions at `info_state`. + epsilon: float, probability of taking an exploratory action. + + Returns: + A valid epsilon-greedy action and action probabilities. + """ + if self._rs.rand() < epsilon: + action = self._rs.choice(legal_actions) + probs = self._to_one_hot(legal_actions, value=1.0 / len(legal_actions)) + return action, probs + + info_state = np.reshape(info_state, [1, -1]) + q_values = self.hk_network_apply(self._params_q_network, info_state)[0] + legal_one_hot = self._to_one_hot(legal_actions) + legal_q_values = q_values + (1 - legal_one_hot) * ILLEGAL_ACTION_PENALTY + action = int(np.argmax(legal_q_values)) + probs = self._to_one_hot(action) + return action, probs + + def _get_epsilon(self, is_evaluation): + """Returns the evaluation or decayed epsilon value.""" + if is_evaluation: + return 0.0 + + decay_steps = min(self._step_counter, self._epsilon_decay_duration) + decayed_epsilon = ( + self._epsilon_end + (self._epsilon_start - self._epsilon_end) * + (1 - decay_steps / self._epsilon_decay_duration)**self._epsilon_power) + return decayed_epsilon + + def _softmax(self, info_state, legal_actions, + tau: float) -> Tuple[int, np.ndarray]: + """Returns a valid soft-max action and action probabilities.""" + info_state = np.reshape(info_state, [1, -1]) + q_values = self.hk_network_apply(self._params_q_network, info_state)[0] + legal_one_hot = self._to_one_hot(legal_actions) + legal_q_values = q_values + (1 - legal_one_hot) * ILLEGAL_ACTION_PENALTY + # Apply temperature and subtract the maximum value for numerical stability. + temp = legal_q_values / tau + unnormalized = np.exp(temp - np.amax(temp)) + probs = unnormalized / unnormalized.sum() + action = self._rs.choice(legal_actions, p=probs[legal_actions]) + return action, probs + + def update_prev_q_network(self): + """Updates the parameters of the previous Q-network.""" + self._params_prev_q_network = _copy_params(self._params_q_network) + if self._reset_replay_buffer_on_update: + # Also reset the replay buffer to avoid having transitions from the + # previous policy. + self._replay_buffer.reset() + + @property + def loss(self): + return self._last_loss_value + + +class SoftMaxMunchausenDQN(rl_agent.AbstractAgent): + """Wraps a Munchausen DQN agent to use soft-max action selection.""" + + def __init__(self, agent: MunchausenDQN, tau: Optional[float] = None): + self._agent = agent + self._tau = tau + + def step(self, time_step, is_evaluation=False): + return self._agent.step( + time_step, is_evaluation=is_evaluation, use_softmax=True, tau=self._tau) + + +class DeepOnlineMirrorDescent(object): + """The deep online mirror descent algorithm.""" + + def __init__(self, + game, + envs, + agents, + eval_every=200, + num_episodes_per_iteration=1000, + logging_fn: Optional[Callable[[int, int, Dict[str, Any]], + None]] = None): + """Initializes mirror descent. + + Args: + game: The game, + envs: RL environment for each player. + agents: Munchausen DQN agents for each player. + eval_every: Number of training episodes between two evaluations. + num_episodes_per_iteration: Number of training episodes for each + iiteration. + logging_fn: Callable for logging the metrics. The arguments will be the + current iteration, episode and a dictionary of metrics to log. + """ + assert len(envs) == len(agents) + # Make sure that the agents are all MunchausenDQN. + for agent in agents: + assert isinstance(agent, MunchausenDQN) + + self._game = game + + self._eval_every = eval_every + self._num_episodes_per_iteration = num_episodes_per_iteration + + self._envs = envs + self._agents = agents + self._use_observation = envs[0].use_observation + + self._iteration = 0 + + if logging_fn is None: + logging_fn = lambda it, ep, vals: logging.info("%d/%d %r", it, ep, vals) + self._logging_fn = logging_fn + + # Set the initial policy and distribution. + self._update_policy_and_distribution() + + def _train_agents(self): + """Trains the agents. + + This will evaluate the Q-network for current policy and distribution. + """ + for ep in range(self._num_episodes_per_iteration): + for env, agent in zip(self._envs, self._agents): + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step, use_softmax=False) + action_list = [agent_output.action] + time_step = env.step(action_list) + + # Episode is over, step all agents with final info state. + agent.step(time_step, use_softmax=False) + + if (ep + 1) % self._eval_every == 0: + metrics = {} + for i, agent in enumerate(self._agents): + metrics[f"agent{i}/loss"] = agent.loss + self._logging_fn(self._iteration, ep + 1, metrics) + + def _update_policy_and_distribution(self): + """Updates the current soft-max policy and the distribution.""" + self._policy = self.get_softmax_policy() + self._distribution = distribution_std.DistributionPolicy( + self._game, self._policy) + + def get_softmax_policy(self, + tau: Optional[float] = None + ) -> rl_agent_policy.JointRLAgentPolicy: + """Returns the softmax policy with the specified tau. + + Args: + tau: Tau for soft-max action selection, or None to use the value set in + the MunchausenDQN agents. + + Returns: + A JointRLAgentPolicy. + """ + return rl_agent_policy.JointRLAgentPolicy( + self._game, { + idx: SoftMaxMunchausenDQN(agent, tau=tau) + for idx, agent in enumerate(self._agents) + }, self._use_observation) + + def iteration(self): + """An iteration of Mirror Descent.""" + self._train_agents() + self._update_policy_and_distribution() + self._iteration += 1 + # Update the distributions of the environments and the previous Q-networks + # of the agents. + for env, agent in zip(self._envs, self._agents): + env.update_mfg_distribution(self.distribution) + agent.update_prev_q_network() + + @property + def policy(self): + return self._policy + + @property + def distribution(self): + return self._distribution diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/munchausen_deep_mirror_descent_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/munchausen_deep_mirror_descent_test.py new file mode 100644 index 0000000..afa83f4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/munchausen_deep_mirror_descent_test.py @@ -0,0 +1,73 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Munchausen deep online mirror descent.""" +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np + +from open_spiel.python import policy +from open_spiel.python import rl_environment +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import munchausen_deep_mirror_descent +from open_spiel.python.mfg.algorithms import nash_conv +from open_spiel.python.mfg.games import crowd_modelling # pylint: disable=unused-import +import pyspiel + + +class DeepOnlineMirrorDescentTest(parameterized.TestCase): + + @parameterized.named_parameters(('cpp', 'mfg_crowd_modelling'), + ('python', 'python_mfg_crowd_modelling')) + def test_train(self, name): + """Checks that the training works.""" + game = pyspiel.load_game(name) + assert game.num_players() == 1 + uniform_policy = policy.UniformRandomPolicy(game) + uniform_dist = distribution.DistributionPolicy(game, uniform_policy) + env = rl_environment.Environment( + game, mfg_distribution=uniform_dist, mfg_population=0) + info_state_size = env.observation_spec()['info_state'][0] + num_actions = env.action_spec()['num_actions'] + np.random.seed(0) + args = { + 'alpha': 0.9, + 'batch_size': 128, + 'discount_factor': 1.0, + 'epsilon_decay_duration': 20000000, + 'epsilon_end': 0.1, + 'epsilon_start': 0.1, + 'gradient_clipping': 40, + 'hidden_layers_sizes': [128, 128], + 'learn_every': 64, + 'learning_rate': 0.01, + 'loss': 'mse', + 'min_buffer_size_to_learn': 500, + 'optimizer': 'adam', + 'replay_buffer_capacity': 2000, + 'tau': 10, + 'update_target_network_every': 50 + } + agent = munchausen_deep_mirror_descent.MunchausenDQN( + 0, info_state_size, num_actions, **args) + md = munchausen_deep_mirror_descent.DeepOnlineMirrorDescent( + game, [env], [agent], num_episodes_per_iteration=100) + for _ in range(10): + md.iteration() + nash_conv_md = nash_conv.NashConv(game, md.policy) + self.assertLessEqual(nash_conv_md.nash_conv(), 3) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/munchausen_mirror_descent.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/munchausen_mirror_descent.py new file mode 100644 index 0000000..bfff124 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/munchausen_mirror_descent.py @@ -0,0 +1,86 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Munchausen Online Mirror Descent.""" + +from typing import Dict, List, Optional + +import numpy as np + +from open_spiel.python import policy as policy_lib +from open_spiel.python.mfg import value +from open_spiel.python.mfg.algorithms import mirror_descent +import pyspiel + + +class ProjectedPolicyMunchausen(mirror_descent.ProjectedPolicy): + """Project values on the policy simplex.""" + + def __init__( + self, + game: pyspiel.Game, + player_ids: List[int], + state_value: value.ValueFunction, + learning_rate: float, + policy: policy_lib.Policy, + ): + """Initializes the projected policy. + + Args: + game: The game to analyze. + player_ids: list of player ids for which this policy applies; each should + be in the range 0..game.num_players()-1. + state_value: The state value to project. + learning_rate: The learning rate. + policy: The policy to project. + """ + super().__init__(game, player_ids, state_value) + self._learning_rate = learning_rate + self._policy = policy + + def action_probabilities(self, + state: pyspiel.State, + player_id: Optional[int] = None) -> Dict[int, float]: + del player_id + action_logit = [ + (a, self._learning_rate * self.value(state, action=a) + np.log(p)) + for a, p in self._policy.action_probabilities(state).items() + ] + action, logit = zip(*action_logit) + return dict(zip(action, mirror_descent.softmax_projection(logit))) + + +class MunchausenMirrorDescent(mirror_descent.MirrorDescent): + """Munchausen Online Mirror Descent algorithm. + + This algorithm is equivalent to the online mirror descent algorithm but + instead of summing value functions, it directly computes the cumulative + Q-function using a penalty with respect to the previous policy. + """ + + def eval_state(self, state: pyspiel.State, learning_rate: float): + """Evaluate the value of a state.""" + state_str = state.observation_string(pyspiel.PlayerId.DEFAULT_PLAYER_ID) + # Return the already calculated value if present. + if self._state_value.has(state_str): + return self._state_value(state_str) + # Otherwise, calculate the value of the state. + v = self.get_state_value(state, learning_rate) + self._state_value.set_value(state_str, v) + return v + + def get_projected_policy(self) -> policy_lib.Policy: + """Returns the projected policy.""" + return ProjectedPolicyMunchausen(self._game, + list(range(self._game.num_players())), + self._state_value, self._lr, self._policy) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/munchausen_mirror_descent_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/munchausen_mirror_descent_test.py new file mode 100644 index 0000000..1a4ef85 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/munchausen_mirror_descent_test.py @@ -0,0 +1,44 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for Munchausen Online Mirror Descent.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python.mfg import value +from open_spiel.python.mfg.algorithms import munchausen_mirror_descent +from open_spiel.python.mfg.algorithms import nash_conv +from open_spiel.python.mfg.games import crowd_modelling # pylint: disable=unused-import +import pyspiel + + +class MunchausenMirrorDescentTest(parameterized.TestCase): + + @parameterized.named_parameters(('python', 'python_mfg_crowd_modelling'), + ('cpp', 'mfg_crowd_modelling')) + def test_run(self, name): + """Checks if the algorithm works.""" + game = pyspiel.load_game(name) + md = munchausen_mirror_descent.MunchausenMirrorDescent( + game, value.TabularValueFunction(game)) + for _ in range(10): + md.iteration() + md_policy = md.get_policy() + nash_conv_md = nash_conv.NashConv(game, md_policy) + + self.assertAlmostEqual(nash_conv_md.nash_conv(), 2.27366, places=5) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/nash_conv.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/nash_conv.py new file mode 100644 index 0000000..d1eed72 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/nash_conv.py @@ -0,0 +1,82 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Implementation of Nash Conv metric for a policy. + +In the context of mean field games, the Nash Conv is the difference between: +- the value of a policy against the distribution of that policy, +- and the best response against the distribution of the policy. +""" + +from open_spiel.python import policy as policy_std +from open_spiel.python.mfg import value +from open_spiel.python.mfg.algorithms import best_response_value +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import policy_value + + +class NashConv(object): + """Computes the Nash Conv of a policy.""" + + def __init__(self, game, policy: policy_std.Policy, root_state=None): + """Initializes the nash conv. + + Args: + game: The game to analyze. + policy: A `policy.Policy` object. + root_state: The state of the game at which to start. If `None`, the game + root state is used. + """ + self._game = game + self._policy = policy + if root_state is None: + self._root_states = game.new_initial_states() + else: + self._root_states = [root_state] + self._distrib = distribution.DistributionPolicy( + self._game, self._policy, root_state=root_state) + self._pi_value = policy_value.PolicyValue( + self._game, + self._distrib, + self._policy, + value.TabularValueFunction(self._game), + root_state=root_state) + self._br_value = best_response_value.BestResponse( + self._game, + self._distrib, + value.TabularValueFunction(self._game), + root_state=root_state) + + def nash_conv(self): + """Returns the nash conv. + + Returns: + A float representing the nash conv for the policy. + """ + return sum([ + self._br_value.eval_state(state) - self._pi_value.eval_state(state) + for state in self._root_states + ]) + + def br_values(self): + """Returns the best response values to the policy distribution. + + Returns: + A List[float] representing the best response values for a policy + distribution. + """ + return [self._br_value.eval_state(state) for state in self._root_states] + + @property + def distribution(self) -> distribution.DistributionPolicy: + return self._distrib diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/nash_conv_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/nash_conv_test.py new file mode 100644 index 0000000..87057f5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/nash_conv_test.py @@ -0,0 +1,45 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for nash conv.""" + +from absl.testing import absltest + +from open_spiel.python import policy +from open_spiel.python.mfg.algorithms import nash_conv +from open_spiel.python.mfg.games import crowd_modelling +import pyspiel + + +class BestResponseTest(absltest.TestCase): + + def test_python_game(self): + """Checks if the NashConv is consistent through time.""" + game = crowd_modelling.MFGCrowdModellingGame() + uniform_policy = policy.UniformRandomPolicy(game) + nash_conv_fp = nash_conv.NashConv(game, uniform_policy) + + self.assertAlmostEqual(nash_conv_fp.nash_conv(), 2.8135365543870385) + + def test_cpp_game(self): + """Checks if the NashConv is consistent through time.""" + game = pyspiel.load_game("mfg_crowd_modelling") + uniform_policy = policy.UniformRandomPolicy(game) + nash_conv_fp = nash_conv.NashConv(game, uniform_policy) + + self.assertAlmostEqual(nash_conv_fp.nash_conv(), 2.8135365543870385) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/policy_value.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/policy_value.py new file mode 100644 index 0000000..3790cde --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/policy_value.py @@ -0,0 +1,107 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Does a backward pass to output the value of a policy.""" +from typing import Optional +from open_spiel.python import policy as policy_std +from open_spiel.python.mfg import distribution as distribution_std +from open_spiel.python.mfg import value +import pyspiel + + +class PolicyValue(value.ValueFunction): + """Computes the value of a specified strategy.""" + + def __init__(self, + game, + distribution: distribution_std.Distribution, + policy: policy_std.Policy, + state_value: Optional[value.ValueFunction] = None, + root_state=None): + """Initializes the value calculation. + + Args: + game: The game to analyze. + distribution: A `distribution.Distribution` object. + policy: A `policy.Policy` object. + state_value: A state value function. Defaults to Tabular. + root_state: The state of the game at which to start. If `None`, the game + root state is used. + """ + super(PolicyValue, self).__init__(game) + if root_state is None: + self._root_states = game.new_initial_states() + else: + self._root_states = [root_state] + self._distribution = distribution + self._policy = policy + + self._state_value = (state_value if state_value is not None + else value.TabularValueFunction(game)) + + self.evaluate() + + def eval_state(self, state): + """Evaluate the value of a state.""" + state_str = state.observation_string(pyspiel.PlayerId.DEFAULT_PLAYER_ID) + if self._state_value.has(state_str): + return self._state_value(state_str) + elif state.is_terminal(): + self._state_value.set_value( + state_str, + state.rewards()[state.mean_field_population()]) + return self._state_value(state_str) + elif state.current_player() == pyspiel.PlayerId.CHANCE: + self._state_value.set_value(state_str, 0.0) + for action, prob in state.chance_outcomes(): + new_state = state.child(action) + self._state_value.add_value(state_str, + prob * self.eval_state(new_state)) + return self._state_value(state_str) + elif state.current_player() == pyspiel.PlayerId.MEAN_FIELD: + dist_to_register = state.distribution_support() + dist = [ + self._distribution.value_str(str_state, 0.) + for str_state in dist_to_register + ] + new_state = state.clone() + new_state.update_distribution(dist) + self._state_value.set_value( + state_str, + state.rewards()[state.mean_field_population()] + + self.eval_state(new_state)) + return self._state_value(state_str) + else: + assert int(state.current_player()) >= 0, "The player id should be >= 0" + v = 0.0 + for action, prob in self._policy.action_probabilities(state).items(): + new_state = state.child(action) + v += prob * self.eval_state(new_state) + self._state_value.set_value( + state_str, + state.rewards()[state.mean_field_population()] + v) + return self._state_value(state_str) + + def evaluate(self): + """Evaluate the value over states of self._policy.""" + for state in self._root_states: + self.eval_state(state) + + def value(self, state, action=None): + if action is None: + return self._state_value( + state.observation_string(pyspiel.PlayerId.DEFAULT_PLAYER_ID)) + new_state = state.child(action) + return state.rewards()[state.mean_field_population()] + self._state_value( + new_state.observation_string(pyspiel.PlayerId.DEFAULT_PLAYER_ID)) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/policy_value_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/policy_value_test.py new file mode 100644 index 0000000..9a52d13 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/policy_value_test.py @@ -0,0 +1,48 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for policy_value.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python import policy +from open_spiel.python.mfg import value +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import policy_value +from open_spiel.python.mfg.games import crowd_modelling # pylint: disable=unused-import +import pyspiel + + +class PolicyValueTest(parameterized.TestCase): + + @parameterized.named_parameters(('python', 'python_mfg_crowd_modelling'), + ('cpp', 'mfg_crowd_modelling')) + def test_policy_value(self, name): + """Checks if the value of a policy computation works. + + Args: + name: Name of the game. + """ + game = pyspiel.load_game(name) + uniform_policy = policy.UniformRandomPolicy(game) + dist = distribution.DistributionPolicy(game, uniform_policy) + py_value = policy_value.PolicyValue(game, dist, uniform_policy, + value.TabularValueFunction(game)) + py_val = py_value(game.new_initial_state()) + self.assertAlmostEqual(py_val, 27.215850929940448) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/pytorch/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/pytorch/__init__.py new file mode 100644 index 0000000..a1223b9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/pytorch/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/pytorch/mfg_proximal_policy_optimization.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/pytorch/mfg_proximal_policy_optimization.py new file mode 100644 index 0000000..5860ce6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/pytorch/mfg_proximal_policy_optimization.py @@ -0,0 +1,293 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Mean field proximal policy optimaztion algorithm. + +Reference: + + Algumaei, Talal, et al. "Regularization of the policy updates for + stabilizing + Mean Field Games." Pacific-Asia Conference on Knowledge Discovery and Data + Mining. Cham: Springer Nature Switzerland, 2023. Available at: + https://link.springer.com/chapter/10.1007/978-3-031-33377-4_28 +""" + +# pylint: disable=consider-using-from-import +# pylint: disable=g-importing-member + +import numpy as np +import torch +from torch.distributions.categorical import Categorical +import torch.nn as nn +import torch.nn.functional as F + +from open_spiel.python import policy as policy_std +from open_spiel.python.mfg import value +from open_spiel.python.mfg.algorithms import best_response_value +from open_spiel.python.mfg.algorithms import policy_value +from open_spiel.python.mfg.algorithms.nash_conv import NashConv + + +class NashC(NashConv): + """Mainly used to calculate the exploitability.""" + + def __init__(self, game, distrib, pi_value, root_state=None): + self._game = game + if root_state is None: + self._root_states = game.new_initial_states() + else: + self._root_states = [root_state] + self._distrib = distrib + self._pi_value = pi_value + self._br_value = best_response_value.BestResponse( + self._game, + self._distrib, + value.TabularValueFunction(self._game), + root_state=root_state, + ) + + +class Agent(nn.Module): + """Mainly used to calculate the exploitability.""" + + def __init__(self, info_state_size, num_actions): + super(Agent, self).__init__() + self.num_actions = num_actions + self.info_state_size = info_state_size + self.critic = nn.Sequential( + self.layer_init(nn.Linear(info_state_size, 128)), + nn.Tanh(), + self.layer_init(nn.Linear(128, 128)), + nn.Tanh(), + self.layer_init(nn.Linear(128, 1)), + ) + self.actor = nn.Sequential( + self.layer_init(nn.Linear(info_state_size, 128)), + nn.Tanh(), + self.layer_init(nn.Linear(128, 128)), + nn.Tanh(), + self.layer_init(nn.Linear(128, num_actions)), + ) + + def layer_init(self, layer, bias_const=0.0): + """Used to initalize layers.""" + nn.init.xavier_normal_(layer.weight) + nn.init.constant_(layer.bias, bias_const) + return layer + + def get_value(self, x): + """Get the value of the state.""" + return self.critic(x) + + def get_action_and_value(self, x, action=None): + """Get the action and value of the state.""" + logits = self.actor(x) + probs = Categorical(logits=logits) + if action is None: + action = probs.sample() + return action, probs.log_prob(action), probs.entropy(), self.critic(x) + + +class Policy(policy_std.Policy): + """Required obeject to work with OpenSpiel. + + Used in updating the distribution using the policy nd in calculating the + nash-convergance. + """ + + def __init__(self, game, agent, player_ids, device): + super().__init__(game, player_ids) + self.agent = agent + self.device = device + + def action_probabilities(self, state, player_id=None): + """Calculate the action probabilities of the state.""" + obs = torch.Tensor(state.observation_tensor()).to(self.device) + legal_actions = state.legal_actions() + logits = self.agent.actor(obs).detach().cpu() + legat_logits = np.array([logits[action] for action in legal_actions]) + probs = np.exp(legat_logits - legat_logits.max()) + probs /= probs.sum(axis=0) + + # returns a dict with actions as keys and their probabilities as values + return { + action: probs[legal_actions.index(action)] for action in legal_actions + } + + +def rollout(env, iter_agent, eps_agent, num_epsiodes, steps, device): + """Generates num_epsiodes rollouts.""" + info_state = torch.zeros((steps, iter_agent.info_state_size), device=device) + actions = torch.zeros((steps,), device=device) + logprobs = torch.zeros((steps,), device=device) + rewards = torch.zeros((steps,), device=device) + dones = torch.zeros((steps,), device=device) + values = torch.zeros((steps,), device=device) + entropies = torch.zeros((steps,), device=device) + t_actions = torch.zeros((steps,), device=device) + t_logprobs = torch.zeros((steps,), device=device) + + step = 0 + for _ in range(num_epsiodes): + time_step = env.reset() + while not time_step.last(): + obs = time_step.observations["info_state"][0] + obs = torch.Tensor(obs).to(device) + info_state[step] = obs + with torch.no_grad(): + t_action, t_logprob, _, _ = iter_agent.get_action_and_value(obs) + action, logprob, entropy, ivalue = eps_agent.get_action_and_value(obs) + + time_step = env.step([action.item()]) + + # iteration policy data + t_logprobs[step] = t_logprob + t_actions[step] = t_action + + # episode policy data + logprobs[step] = logprob + dones[step] = time_step.last() + entropies[step] = entropy + values[step] = ivalue + actions[step] = action + rewards[step] = torch.Tensor(time_step.rewards).to(device) + step += 1 + + history = { + "info_state": info_state, + "actions": actions, + "logprobs": logprobs, + "rewards": rewards, + "dones": dones, + "values": values, + "entropies": entropies, + "t_actions": t_actions, + "t_logprobs": t_logprobs, + } + return history + + +def calculate_advantage(gamma, norm, rewards, values, dones, device): + """Function used to calculate the Generalized Advantage estimate.""" + with torch.no_grad(): + next_done = dones[-1] + next_value = values[-1] + steps = len(values) + returns = torch.zeros_like(rewards).to(device) + for t in reversed(range(steps)): + if t == steps - 1: + nextnonterminal = 1.0 - next_done + next_return = next_value + else: + nextnonterminal = 1.0 - dones[t + 1] + next_return = returns[t + 1] + returns[t] = rewards[t] + gamma * nextnonterminal * next_return + + advantages = returns - values + + if norm: + advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) + + return advantages, returns + + +def learn( + history, + optimizer_actor, + optimize_critic, + agent, + num_minibatches=5, + update_epochs=5, + itr_eps=0.05, + eps_eps=0.2, + alpha=0.5, + ent_coef=0.01, + max_grad_norm=5, +): + """Update the agent network (actor and critic).""" + v_loss = None + batch_size = history["actions"].shape[0] + b_inds = np.arange(batch_size) + mini_batch_size = batch_size // num_minibatches + # get batch indices + np.random.shuffle(b_inds) + for _ in range(update_epochs): + for start in range(0, batch_size, mini_batch_size): + end = start + mini_batch_size + mb_inds = b_inds[start:end] + # for each update epoch shuffle the batch indices + # generate the new logprobs, entropy and value then calculate the ratio + b_obs = history["info_state"][mb_inds] + b_advantages = history["advantages"][mb_inds] + + # Get the data under the episode policy (representative agent current + # policy) + _, newlogprob, entropy, new_value = agent.get_action_and_value( + b_obs, history["actions"][mb_inds] + ) + logratio = newlogprob - history["logprobs"][mb_inds] + ratio = torch.exp(logratio) + + # Get the data under the iteration policy (the population policy) + _, t_newlogprob, _, _ = agent.get_action_and_value( + b_obs, history["t_actions"][mb_inds] + ) + t_logratio = t_newlogprob - history["t_logprobs"][mb_inds] + t_ratio = torch.exp(t_logratio) + + # iteration update PPO + t_pg_loss1 = b_advantages * t_ratio + t_pg_loss2 = b_advantages * torch.clamp(t_ratio, 1 - itr_eps, 1 + itr_eps) + + # episodic update PPO + pg_loss1 = b_advantages * ratio + pg_loss2 = b_advantages * torch.clamp(ratio, 1 - eps_eps, 1 + eps_eps) + + # Calculate the loss using our loss function + pg_loss = ( + -alpha * torch.min(pg_loss1, pg_loss2).mean() + - (1 - alpha) * torch.min(t_pg_loss1, t_pg_loss2).mean() + ) + v_loss = F.smooth_l1_loss( + new_value.reshape(-1), history["returns"][mb_inds] + ).mean() + entropy_loss = entropy.mean() + loss = pg_loss - ent_coef * entropy_loss + + # Actor update + optimizer_actor.zero_grad() + loss.backward() + nn.utils.clip_grad_norm_(agent.actor.parameters(), max_grad_norm) + optimizer_actor.step() + + # Critic update + optimize_critic.zero_grad() + v_loss.backward() + nn.utils.clip_grad_norm_(agent.critic.parameters(), max_grad_norm) + optimize_critic.step() + + assert v_loss is not None + return v_loss + + +def calculate_explotability(game, distrib, policy): + """This function is used to log the results to tensor board.""" + initial_states = game.new_initial_states() + pi_value = policy_value.PolicyValue( + game, distrib, policy, value.TabularValueFunction(game) + ) + m = { + f"ppo_br/{state}": pi_value.eval_state(state) for state in initial_states + } + nashc = NashC(game, distrib, pi_value).nash_conv() + m["nash_conv_ppo"] = nashc + + return m diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/pytorch/mfg_proximal_policy_optimization_pytorch_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/pytorch/mfg_proximal_policy_optimization_pytorch_test.py new file mode 100644 index 0000000..9837057 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/pytorch/mfg_proximal_policy_optimization_pytorch_test.py @@ -0,0 +1,111 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for Mean field proximal policy optimaztion.""" + +# pylint: disable=consider-using-from-import +# pylint: disable=g-importing-member + +from absl.testing import absltest +from absl.testing import parameterized +from open_spiel.python.mfg.algorithms.pytorch.mfg_proximal_policy_optimization import Agent as mfg_ppo_agent +from open_spiel.python.mfg.algorithms.pytorch.mfg_proximal_policy_optimization import calculate_advantage +from open_spiel.python.mfg.algorithms.pytorch.mfg_proximal_policy_optimization import calculate_explotability +from open_spiel.python.mfg.algorithms.pytorch.mfg_proximal_policy_optimization import learn +from open_spiel.python.mfg.algorithms.pytorch.mfg_proximal_policy_optimization import Policy as mfg_ppo_policy +from open_spiel.python.mfg.algorithms.pytorch.mfg_proximal_policy_optimization import rollout +import torch +import torch.optim as optim + +from open_spiel.python import policy as policy_std +from open_spiel.python import rl_environment +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.games import factory + + +class PolicyTest(parameterized.TestCase): + """Test the policy.""" + + @parameterized.named_parameters( + ("python", "mfg_crowd_modelling_2d", "crowd_modelling_2d_four_rooms") + ) + def test_train(self, name, setting): + """Checks that the training works.""" + device = torch.device("cpu") + args = { + "num_episodes": 5, + "gamma": 0.9, + } + game = factory.create_game_with_setting(name, setting) + uniform_policy = policy_std.UniformRandomPolicy(game) + mfg_dist = distribution.DistributionPolicy(game, uniform_policy) + env = rl_environment.Environment( + game, mfg_distribution=mfg_dist, mfg_population=0 + ) + + # Set the environment seed for reproduciblility + env.seed(0) + + # Creat the agent and population policies + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + agent = mfg_ppo_agent(info_state_size, num_actions).to(device) + ppo_policy = mfg_ppo_policy(game, agent, None, device) + pop_agent = mfg_ppo_agent(info_state_size, num_actions).to(device) + + optimizer_actor = optim.Adam(agent.actor.parameters(), lr=1e-3, eps=1e-5) + optimizer_critic = optim.Adam(agent.critic.parameters(), lr=1e-3, eps=1e-5) + + # calculate the exploitability + m = calculate_explotability(game, mfg_dist, ppo_policy) + init_nashc = m["nash_conv_ppo"] + + steps = args["num_episodes"] * env.max_game_length + + for _ in range(3): + # collect rollout data + history = rollout( + env, pop_agent, agent, args["num_episodes"], steps, device + ) + # Calculate the advantage function + adv, returns = calculate_advantage( + args["gamma"], + True, + history["rewards"], + history["values"], + history["dones"], + device, + ) + history["advantages"] = adv + history["returns"] = returns + # Update the learned policy and report loss for debugging + learn(history, optimizer_actor, optimizer_critic, agent) + + # Update the iteration policy with the new policy + pop_agent.load_state_dict(agent.state_dict()) + + # Update the distribution + distrib = distribution.DistributionPolicy(game, ppo_policy) + + # calculate the exploitability + m = calculate_explotability(game, distrib, ppo_policy) + nashc = m["nash_conv_ppo"] + + # update the environment distribution + env.update_mfg_distribution(distrib) + + # Test convergence + self.assertLessEqual(nashc, 2 * init_nashc) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/c_ce_optimization.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/c_ce_optimization.py new file mode 100644 index 0000000..8f92232 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/c_ce_optimization.py @@ -0,0 +1,131 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Optimization algorithms to compute (C)CE weights.""" + +import numpy as np +import scipy.optimize +import scipy.sparse.linalg + + +# pylint: disable=invalid-name +def get_proba_constraints_positivity(nus): + A = np.zeros((nus.shape[0], 1 + nus.shape[0])) + A[:, 1:] = -np.eye(nus.shape[0]) + return A, np.zeros(A.shape[0]) + + +def get_proba_constraint_sum_eq(nus): + A = np.ones((1, 1 + nus.shape[0])) + A[0, 0] = 0.0 + return A, np.array([1.0]) + + +def compress_internal_weights(nus, regrets, rewards, lbd=0.0): + """Computes distribution over `nus` while minimizing internal regret. + + Args: + nus: [T, P] array, T the number of different population distributions, P the + number of different policies. + regrets: [T, P, P] array, regrets[t, i, j] = payoff for switching from + policy i to j at time t. + rewards: [T, P] array, T the number of different population distributions, P + the number of different policies + lbd: Sparsity argument. + + Returns: + Computed distribution over `nus`. + """ + + def get_c(nus): + return np.concatenate( + (np.array([1.0]), -lbd * np.sum(rewards * nus, axis=1)) + ) + + def get_max_constraint(regrets): + regrets = np.transpose(np.array(regrets), axes=[0, 2, 1]) + regrets = regrets.reshape(-1, regrets.shape[-1]) + A = np.zeros((regrets.shape[0], 1 + regrets.shape[1])) + A[:, 1:] = regrets + A[:, 0] = -1.0 + + b = np.zeros(A.shape[0]) + return A, b + + def get_a_ub(nus, regrets): + Amax, bmax = get_max_constraint(regrets) + Apos, bpos = get_proba_constraints_positivity(nus) + return np.concatenate((Amax, Apos), axis=0), np.concatenate( + (bmax, bpos), axis=0 + ) + + c = get_c(nus) + + A_ub, b_ub = get_a_ub(nus, regrets) + A_eq, b_eq = get_proba_constraint_sum_eq(nus) + + res = scipy.optimize.linprog( + c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, options={'tol': 1e-10} + ) + new_weights = res.x + return new_weights[1:] + + +def compress_external_weights(nus, regrets, rewards, lbd=0.0): + """Computes distribution over `nus` while minimizing external regret. + + Args: + nus: [T, P] array, T the number of different population distributions, P the + number of different policies. + regrets: [T, P] array, regrets[t, i] = payoff for switching from current + policy to i at time t. + rewards: [T, P] array, reward for playing policy P at time T. + lbd: Sparsity argument. + + Returns: + Computed distribution over `nus`. + """ + + def get_c(nus): + return np.concatenate( + (np.array([1.0]), -lbd * np.sum(rewards * nus, axis=1)) + ) + + def get_max_constraints(nus, regrets, lbd): + A = np.zeros((regrets.shape[1], 1 + nus.shape[0])) + A[:, 0] = -1.0 + A[:, 1:] = np.transpose( + regrets + - np.sum(regrets * nus, axis=1).reshape(-1, 1) + - lbd * np.abs(regrets) + ) + return A, np.zeros(A.shape[0]) + + def get_a_ub(nus, regrets, lbd): + Amax, bmax = get_max_constraints(nus, regrets, lbd) + Apos, bpos = get_proba_constraints_positivity(nus) + return np.concatenate((Amax, Apos), axis=0), np.concatenate( + (bmax, bpos), axis=0 + ) + + c = get_c(nus) + + A_ub, b_ub = get_a_ub(nus, regrets, lbd) + A_eq, b_eq = get_proba_constraint_sum_eq(nus) + + res = scipy.optimize.linprog( + c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, options={'tol': 1e-10} + ) + new_weights = res.x + return new_weights[1:] diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/hedge.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/hedge.py new file mode 100644 index 0000000..80594e3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/hedge.py @@ -0,0 +1,87 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Hedge algorithm for MFGs.""" + +from typing import Optional + +import numpy as np + +from open_spiel.python.mfg.algorithms import utils +from open_spiel.python.mfg.algorithms.regret import polynomial_weights + + +class Hedge(polynomial_weights.PolynomialWeightAlgorithm): + """Hedge algorithm.""" + + def __init__( + self, + game, + policies, + eta: Optional[float] = None, + regret_steps_per_step: int = 1, + rho_tol: float = 1e-4, + compress_nus: bool = True, + compress_lbd: float = 0.0, + compress_every: int = 1, + stop_early: bool = True, + stop_regret_threshold: float = 1e-3, + value_estimator=utils.sample_value, + value_estimation_n: int = 1, + compute_internal_regret: bool = False, + ): + super().__init__( + game, + policies, + eta=eta, + regret_steps_per_step=regret_steps_per_step, + rho_tol=rho_tol, + compress_nus=compress_nus, + compress_every=compress_every, + compress_lbd=compress_lbd, + stop_early=stop_early, + stop_regret_threshold=stop_regret_threshold, + value_estimator=value_estimator, + value_estimation_n=value_estimation_n, + compute_internal_regret=compute_internal_regret, + ) + + if self._compute_internal_regret: + self._ws = [np.ones(len(policies)) for _ in range(len(policies))] + self._p = np.ones(len(policies)) / (1.0 * len(policies)) + else: + self._w = np.ones(len(policies)) + + if eta is None: + assert regret_steps_per_step is not None, ( + "Both `eta` and " + "`regret_steps_per_step` were " + "None, whereas our algorithm " + "requires either value to be " + "set." + ) + self.compute_optimal_eta() + self._constant_eta = 1.0 + else: + self._eta = eta + self._constant_eta = eta + + def _update_weights(self, rewards): + if self._compute_internal_regret: + self._ws = [ + w * np.exp(self._eta * rewards * p) for w, p in zip(self._ws, self._p) + ] + self.compute_p() + else: + self._w = self._w * np.exp(self._eta * rewards) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/nash_evolutionary_search.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/nash_evolutionary_search.py new file mode 100644 index 0000000..ce5cb67 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/nash_evolutionary_search.py @@ -0,0 +1,137 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Randomly searches for a Restricted Nash Equilibrium. + +""" + +from typing import Optional + +import cma +import numpy as np +from open_spiel.python.mfg.algorithms import utils +from open_spiel.python.mfg.algorithms.regret import regret_minimizer + + +def softmax(x): + e = np.exp(x - np.max(x)) + return e / np.sum(e, axis=-1, keepdims=True) + + +class NashCMAES(regret_minimizer.RegretMinimizer): + """Base class for Regret Minimizers. + + Implements base functions for regret minimizers to implement. + + Attributes: + _game: Pyspiel game. + _regret_steps_per_step: Number of regret steps per `step` call (Maximum + number in case `stop_early` is true) + _rho_tol: If `_compress_nus` is true, minimum probability threshold ( + Probabilities below `rho_tol` will be filtered out). + _compress_nus: Whether to compress nus (Remove nus with low selection + probability) or not. + _compress_lbd: Penalty term in L1 minimization when compressing nus. + _stop_early: Whether to stop regret computation when average regret is lower + than `_stop_regret_threshold` or to keep going until + `_regret_steps_per_step` steps have been accomplished. + _stop_regret_threshold: If `stop_early` is true, average regret threshold + under which the algorithm will stop. + _policies: List of Policies + _value_estimator: Value estimation function. + _value_estimation_n: Number of runs to average _value_estimator's result on. + """ + + def __init__(self, + game, + policies, + eta: Optional[float] = None, + regret_steps_per_step: int = 1, + rho_tol: float = 1e-4, + compress_nus: bool = True, + compress_lbd: float = 0.0, + compress_every: int = 1, + stop_early: bool = True, + stop_regret_threshold: float = 1e-3, + value_estimator=utils.sample_value, + value_estimation_n: int = 1): + super().__init__( + game, + policies, + regret_steps_per_step=regret_steps_per_step, + rho_tol=rho_tol, + compress_nus=compress_nus, + compress_every=compress_every, + compress_lbd=compress_lbd, + stop_early=stop_early, + stop_regret_threshold=stop_regret_threshold, + value_estimator=value_estimator, + value_estimation_n=value_estimation_n) + self._nu = np.ones(len(policies)) / len(policies) + self._exploitability = None + + def compute_exploitability(self, nu): + mu = utils.MixedDistribution(self._policy_mus, nu) + per_policy_reward = 0.0 + for _ in range(self._value_estimation_n): + per_policy_reward += np.array( + [self._value_estimator(pi, mu, self._game) for pi in self._policies]) + per_policy_reward /= self._value_estimation_n + on_policy_reward = np.sum(per_policy_reward * nu) + return np.max(per_policy_reward - on_policy_reward) + + def step_for(self, T): # pylint: disable=invalid-name + self.step(T) + + def get_exploitabilities(self, nus): + return np.array([self.compute_exploitability(nu) for nu in nus]) + + def step(self, T): # pylint: disable=invalid-name + best_nu = np.ones(len(self._policies)) / len(self._policies) + nu = best_nu + n = 0 + best_exploitability = self.compute_exploitability(nu) + exploitability = best_exploitability + + optimizer = cma.CMAEvolutionStrategy(x0=nu, sigma0=1.0) + + while best_exploitability > self._rho_tol and n < max( + T, self._regret_steps_per_step): + n += 1 + + logit_nus = optimizer.ask() + nus = softmax(logit_nus) + exploitabilities = self.get_exploitabilities(nus) + optimizer.tell(logit_nus, exploitabilities) + + best_new_exploitability = np.min(exploitabilities[0]) + if best_new_exploitability < best_exploitability: + best_exploitability = best_new_exploitability + best_nu = nus[np.argmin(exploitabilities)] + print(best_exploitability) + + self._nus = [best_nu] + self._nu_weights = [1.0] + self._exploitability = exploitability + + def compute_average_regret(self): + return self._exploitability + + def reset(self, policies): + """Restart the bandit with new policies.""" + self._policies = policies + self._policy_mus = [] + self._nu_weights = [] + self._exploitability = None + self.update_policy_mus() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/nash_random_search.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/nash_random_search.py new file mode 100644 index 0000000..6773b06 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/nash_random_search.py @@ -0,0 +1,133 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Randomly searches for a Restricted Nash Equilibrium.""" + +from typing import Optional + +import numpy as np + +from open_spiel.python.mfg.algorithms import utils +from open_spiel.python.mfg.algorithms.regret import regret_minimizer + + +def softmax(x): + e = np.exp(x - np.max(x)) + return e / np.sum(e, axis=-1, keepdims=True) + + +class NashRandomSearch(regret_minimizer.RegretMinimizer): + """Nash Random Search Exploitability Minimizer. + + Implements base functions for regret minimizers to implement. + + Attributes: + _game: Pyspiel game. + _regret_steps_per_step: Number of regret steps per `step` call (Maximum + number in case `stop_early` is true) + _rho_tol: If `_compress_nus` is true, minimum probability threshold ( + Probabilities below `rho_tol` will be filtered out). + _compress_nus: Whether to compress nus (Remove nus with low selection + probability) or not. + _compress_lbd: Penalty term in L1 minimization when compressing nus. + _stop_early: Whether to stop regret computation when average regret is lower + than `_stop_regret_threshold` or to keep going until + `_regret_steps_per_step` steps have been accomplished. + _stop_regret_threshold: If `stop_early` is true, average regret threshold + under which the algorithm will stop. + _policies: List of Policies + _value_estimator: Value estimation function. + _value_estimation_n: Number of runs to average _value_estimator's result on. + """ + + def __init__( + self, + game, + policies, + eta: Optional[float] = None, + regret_steps_per_step: int = 1, + rho_tol: float = 1e-4, + compress_nus: bool = True, + compress_lbd: float = 0.0, + compress_every: int = 1, + stop_early: bool = True, + stop_regret_threshold: float = 1e-3, + value_estimator=utils.sample_value, + value_estimation_n: int = 1, + ): + super().__init__( + game, + policies, + regret_steps_per_step=regret_steps_per_step, + rho_tol=rho_tol, + compress_nus=compress_nus, + compress_every=compress_every, + compress_lbd=compress_lbd, + stop_early=stop_early, + stop_regret_threshold=stop_regret_threshold, + value_estimator=value_estimator, + value_estimation_n=value_estimation_n, + ) + self._nu = np.ones(len(policies)) / len(policies) + self._exploitability = None + + def compute_exploitability(self, nu): + mu = utils.MixedDistribution(self._policy_mus, nu) + per_policy_reward = 0.0 + for _ in range(self._value_estimation_n): + per_policy_reward += np.array( + [self._value_estimator(pi, mu, self._game) for pi in self._policies] + ) + per_policy_reward /= self._value_estimation_n + on_policy_reward = np.sum(per_policy_reward * nu) + return np.max(per_policy_reward - on_policy_reward) + + def get_nu(self): + x = np.random.normal(size=len(self._policies)) + return softmax(x) + + def step_for(self, T): # pylint: disable=invalid-name + self.step(T) + + def step(self, T): # pylint: disable=invalid-name + best_nu = np.ones(len(self._policies)) / len(self._policies) + nu = best_nu + n = 0 + best_exploitability = self.compute_exploitability(nu) + exploitability = best_exploitability + while exploitability > self._rho_tol and n < max( + T, self._regret_steps_per_step + ): + n += 1 + nu = self.get_nu() + exploitability = self.compute_exploitability(nu) + if exploitability < best_exploitability: + best_exploitability = exploitability + best_nu = nu + print(exploitability) + + self._nus = [best_nu] + self._nu_weights = [1.0] + self._exploitability = exploitability + + def compute_average_regret(self): + return self._exploitability + + def reset(self, policies): + """Restart the bandit with new policies.""" + self._policies = policies + self._policy_mus = [] + self._nu_weights = [] + self._exploitability = None + self.update_policy_mus() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/polynomial_weights.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/polynomial_weights.py new file mode 100644 index 0000000..4ce1526 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/polynomial_weights.py @@ -0,0 +1,148 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Polynomial Weights algorithm for MFGs.""" + +from typing import Optional +import numpy as np +from open_spiel.python.mfg.algorithms import utils +from open_spiel.python.mfg.algorithms.regret import regret_minimizer + + +def polynomial_weight_update(weights, rewards, eta): + return weights * (1 + eta * rewards) + + +class PolynomialWeightAlgorithm(regret_minimizer.RegretMinimizer): + """Implements the Polynomial Weight Algorithm Regret minimizer. + + This is an external-regret minimizer, adapted here to the Mean-Field, + Partially-Observable case. + + References: Muller et al, https://arxiv.org/abs/2111.08350, and + Blum et al, https://www.cs.cmu.edu/~avrim/ML10/regret-chapter.pdf + """ + + def __init__( + self, + game, + policies, + eta: Optional[float] = None, + regret_steps_per_step: int = 1, + rho_tol: float = 1e-4, + compress_nus: bool = True, + compress_lbd: float = 0.0, + compress_every: int = 1, + stop_early: bool = True, + stop_regret_threshold: float = 1e-3, + value_estimator=utils.sample_value, + value_estimation_n: int = 1, + compute_internal_regret: bool = False, + ): + super().__init__( + game, + policies, + regret_steps_per_step=regret_steps_per_step, + rho_tol=rho_tol, + compress_nus=compress_nus, + compress_every=compress_every, + compress_lbd=compress_lbd, + stop_early=stop_early, + stop_regret_threshold=stop_regret_threshold, + value_estimator=value_estimator, + value_estimation_n=value_estimation_n, + compute_internal_regret=compute_internal_regret, + ) + if self._compute_internal_regret: + self._ws = [np.ones(len(policies)) for _ in range(len(policies))] + self._p = np.ones(len(policies)) / (1.0 * len(policies)) + else: + self._w = np.ones(len(policies)) + + if eta is None: + assert regret_steps_per_step is not None, ( + "Both `eta` and " + "`regret_steps_per_step` were " + "None, whereas our algorithm " + "requires either value to be " + "set." + ) + self.compute_optimal_eta() + else: + self._eta = eta + + self._compress_every = compress_every + + def get_all_w_nus(self): + assert self._compute_internal_regret + return [w / np.sum(w) for w in self._ws] + + def get_nu(self): + if self._compute_internal_regret: + return np.sum( + self._p.reshape(-1, 1) * np.array(self.get_all_w_nus()), axis=0 + ) + else: + return self._w / np.sum(self._w) + + def _update_weights(self, rewards): + if self._compute_internal_regret: + self._ws = [ + w * (1 + self._eta * rewards * p) for w, p in zip(self._ws, self._p) + ] + self.compute_p() + else: + self._w = self._w * (1 + self._eta * rewards) + + def step(self, welfare_bonus=0.0): + rewards = np.zeros(len(self._policies)) + nu = self.get_nu() + assert np.all(nu >= 0.0) and (np.abs(np.sum(nu) - 1) < 1e-8) + self._nus.append(nu) + self._nu_weights.append(1.0) + + mu = utils.MixedDistribution(self._policy_mus, nu) + for _ in range(self._value_estimation_n): + for index, policy in enumerate(self._policies): + rewards[index] += self._value_estimator(policy, mu, self._game) + rewards /= self._value_estimation_n + + self._update_weights(rewards) + + welfare = np.sum(np.array(rewards) * np.array(nu)) + + self._rewards.append(rewards + welfare_bonus * welfare * nu) + self._true_rewards.append(rewards) + + def compute_optimal_eta(self): + if self._regret_steps_per_step is not None: + self._eta = min( + np.sqrt(np.log(len(self._policies)) / self._regret_steps_per_step), + 0.5, + ) + + def reset(self, policies): + if self._compute_internal_regret: + self._ws = [np.ones(len(policies)) for _ in range(len(policies))] + self._p = np.ones(len(policies)) / (1.0 * len(policies)) + else: + self._w = np.ones(len(policies)) + self._policies = policies + self._nus = [] + self._rewards = [] + self._true_rewards = [] + self._policy_mus = [] + self._nu_weights = [] + self.update_policy_mus() + self.compute_optimal_eta() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/regret_matching.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/regret_matching.py new file mode 100644 index 0000000..65aca70 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/regret_matching.py @@ -0,0 +1,170 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Regret Matching algorithm for MFGs.""" + +from typing import Optional + +import numpy as np + +from open_spiel.python.mfg.algorithms import utils +from open_spiel.python.mfg.algorithms.regret import regret_minimizer + + +def regret_matching(regrets): + regrets = np.array(regrets) + regret_plus = regrets * (regrets > 0.0) + regrets_sum = np.sum(regret_plus, axis=-1) + regret_plus[regrets_sum > 0.0, :] = regret_plus[ + regrets_sum > 0.0, : + ] / regrets_sum[regrets_sum > 0.0].reshape(-1, 1) + regret_plus[regrets_sum <= 0.0, :] = ( + np.ones_like(regret_plus[regrets_sum <= 0.0, :]) / regret_plus.shape[-1] + ) + return regret_plus + + +class RegretMatching(regret_minimizer.RegretMinimizer): + """Base class for Regret Minimizers. + + Implements base functions for regret minimizers to implement. + + Attributes: + _game: Pyspiel game. + _regret_steps_per_step: Number of regret steps per `step` call (Maximum + number in case `stop_early` is true) + _rho_tol: If `_compress_nus` is true, minimum probability threshold ( + Probabilities below `rho_tol` will be filtered out). + _compress_nus: Whether to compress nus (Remove nus with low selection + probability) or not. + _compress_lbd: Penalty term in L1 minimization when compressing nus. + _stop_early: Whether to stop regret computation when average regret is lower + than `_stop_regret_threshold` or to keep going until + `_regret_steps_per_step` steps have been accomplished. + _stop_regret_threshold: If `stop_early` is true, average regret threshold + under which the algorithm will stop. + _policies: List of Policies + _value_estimator: Value estimation function. + _value_estimation_n: Number of runs to average _value_estimator's result on. + """ + + def __init__( + self, + game, + policies, + eta: Optional[float] = None, + regret_steps_per_step: int = 1, + rho_tol: float = 1e-4, + compress_nus: bool = True, + compress_lbd: float = 0.0, + compress_every: int = 1, + stop_early: bool = True, + stop_regret_threshold: float = 1e-3, + value_estimator=utils.sample_value, + value_estimation_n: int = 1, + compute_internal_regret: bool = False, + ): + super().__init__( + game, + policies, + regret_steps_per_step=regret_steps_per_step, + rho_tol=rho_tol, + compress_nus=compress_nus, + compress_every=compress_every, + compress_lbd=compress_lbd, + stop_early=stop_early, + stop_regret_threshold=stop_regret_threshold, + value_estimator=value_estimator, + value_estimation_n=value_estimation_n, + compute_internal_regret=compute_internal_regret, + ) + + if self._compute_internal_regret: + self._regrets = np.zeros((len(policies), len(policies))) + else: + self._regrets = np.zeros(len(policies)) + self._p = np.ones(len(policies)) / (1.0 * len(policies)) + + def get_all_action_regrets(self): + assert self._compute_internal_regret + return [ + regret_matching(np.sum(action_regret, axis=0)) + for action_regret in self._regrets + ] + + def compute_last_regret(self, nu, reward): + reward = np.array(reward) + if self._compute_internal_regret: + weighted_rewards = nu.reshape(-1, 1) * reward.reshape(1, -1) + on_policy_values = np.sum( + regret_matching(self._regrets) * weighted_rewards, + axis=-1, + keepdims=True, + ) + return weighted_rewards - on_policy_values + else: + on_policy_value = np.sum(np.array(nu) * np.array(reward)) + return reward - on_policy_value + + def update_regret(self, nu, reward): + self._regrets += self.compute_last_regret(nu, reward) + + def get_all_w_nus(self): + assert self._compute_internal_regret + return regret_matching(self._regrets) + + def get_nu(self): + if self._compute_internal_regret: + return np.sum( + self._p.reshape(-1, 1) * regret_matching(self._regrets), axis=0 + ) + else: + return regret_matching(self._regrets) + + def step(self, welfare_bonus=0.0): + rewards = np.zeros(len(self._policies)) + nu = self.get_nu() + assert np.all(nu >= 0.0) and (np.abs(np.sum(nu) - 1) < 1e-8) + self._nus.append(nu) + self._nu_weights.append(1.0) + + mu = utils.MixedDistribution(self._policy_mus, nu) + for _ in range(self._value_estimation_n): + for index, policy in enumerate(self._policies): + rewards[index] += self._value_estimator(policy, mu, self._game) + rewards /= self._value_estimation_n + + welfare = np.sum(np.array(rewards) * np.array(nu)) + + self._rewards.append(rewards + welfare_bonus * welfare * nu) + self._true_rewards.append(rewards) + + self.update_regret(nu, rewards + welfare_bonus * welfare * nu) + if self._compute_internal_regret: + self.compute_p() + + def reset(self, policies): + """Restart the bandit with new policies.""" + self._p = np.ones(len(policies)) / (1.0 * len(policies)) + self._policies = policies + self._nus = [] + self._rewards = [] + self._true_rewards = [] + if self._compute_internal_regret: + self._regrets = np.zeros((len(policies), len(policies))) + else: + self._regrets = np.zeros(len(policies)) + self._policy_mus = [] + self._nu_weights = [] + self.update_policy_mus() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/regret_minimizer.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/regret_minimizer.py new file mode 100644 index 0000000..fad4207 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/regret/regret_minimizer.py @@ -0,0 +1,371 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base class for regret minimizers.""" + +import numpy as np + +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import utils +from open_spiel.python.mfg.algorithms.regret import c_ce_optimization + + +class RegretMinimizer(object): + """Base class for Regret Minimizers. + + Implements base functions for regret minimizers to implement. + + Attributes: + _game: Pyspiel game. + _regret_steps_per_step: Number of regret steps per `step` call (Maximum + number in case `stop_early` is true) + _rho_tol: If `_compress_nus` is true, minimum probability threshold ( + Probabilities below `rho_tol` will be filtered out). + _compress_nus: Whether to compress nus (Remove nus with low selection + probability) or not. + _compress_lbd: Penalty term in L1 minimization when compressing nus. + _stop_early: Whether to stop regret computation when average regret is lower + than `_stop_regret_threshold` or to keep going until + `_regret_steps_per_step` steps have been accomplished. + _stop_regret_threshold: If `stop_early` is true, average regret threshold + under which the algorithm will stop. + _policies: List of Policies + _value_estimator: Value estimation function. + _value_estimation_n: Number of runs to average _value_estimator's result on. + """ + + def __init__( + self, + game, + policies, + regret_steps_per_step: int = 1, + rho_tol: float = 1e-4, + compress_nus: bool = True, + compress_every: int = 1, + compress_lbd: float = 0.0, + stop_early: bool = True, + stop_regret_threshold: float = 1e-3, + value_estimator=utils.sample_value, + value_estimation_n: int = 1, + compute_internal_regret: bool = False, + ): + self._game = game + self._regret_steps_per_step = regret_steps_per_step + + self._compress_nus = compress_nus + self._compress_every = compress_every + self._compress_lbd = compress_lbd + + self._stop_early = stop_early + self._stop_regret_threshold = stop_regret_threshold + + self._rho_tol = rho_tol + self._policies = policies + + self._value_estimator = value_estimator + self._value_estimation_n = value_estimation_n + + self._compute_internal_regret = compute_internal_regret + + self._nus = [] + self._rewards = [] + self._true_rewards = [] + self._policy_mus = [] + self._nu_weights = [] + + def update_policy_mus(self): + """Update the stored distributions of our policies.""" + self._policy_mus = [ + distribution.DistributionPolicy(self._game, policy) + for policy in self._policies + ] + + def get_nu(self): + """Returns current Population Distribution.""" + raise NotImplementedError + + def step(self, welfare_bonus=0.0): + raise NotImplementedError + + def step_for( + self, + T, # pylint: disable=invalid-name + initial_welfare_bonus=None, + welfare_decay=None, + use_true_rewards_when_compressing=True, + ): + """Call `step` method `T` times maximum, potentially stop early. + + Args: + T: Maximum number of `step` calls to run. + initial_welfare_bonus: How much to initially reward high-welfare-inducing + actions. + welfare_decay: Welfare decay term. + use_true_rewards_when_compressing: Compress and compute optimal (C)CE + according to true rewards (= True) or according to modified rewards (= + False) + """ + welfare_bonus = 0.0 + if initial_welfare_bonus is not None: + assert welfare_decay is not None + welfare_bonus = initial_welfare_bonus + + weights = None + for t in range(T): + if welfare_decay is not None: + welfare_bonus = max(0.0, welfare_bonus - welfare_decay * t / T) + self.step(welfare_bonus=welfare_bonus) + if self._stop_early and (t % self._compress_every == 0): + try: + regret, weights = self.get_post_compression_regret_and_weights( + use_true_rewards_when_compressing=use_true_rewards_when_compressing + ) + # print("\t\t{}".format(regret)) + assert np.abs(np.sum(weights) - 1.0) < 1e-8, np.sum(weights) + except: # pylint: disable=bare-except + print("Simplex method encountered an error.") + continue + if regret < self._stop_regret_threshold: + break + if weights is None and self._compress_nus: + regret, weights = self.get_post_compression_regret_and_weights( + use_true_rewards_when_compressing=use_true_rewards_when_compressing + ) + if self._compress_nus: + self.compress_nus_and_weights(weights) + + def get_post_compression_regret_and_weights( + self, use_true_rewards_when_compressing=True + ): + """Computes optimized (C)CE by varying the temporal weight on each `nu`. + + Args: + use_true_rewards_when_compressing: compute optimal (C)CE according to true + rewards (= True) or according to modified rewards (= False) + + Returns: + Regret for new temporal weights, new temporal weights + """ + if self._compute_internal_regret: + nu_weights = c_ce_optimization.compress_internal_weights( + self.get_nus(), + self.compute_regrets( + use_true_rewards=use_true_rewards_when_compressing + ), + rewards=self._rewards, + lbd=self._compress_lbd, + ) + regret = np.max([ + np.max(np.sum(nu_weights.reshape(-1, 1) * a, axis=0)) + for a in self.compute_regrets( + use_true_rewards=use_true_rewards_when_compressing + ) + ]) + else: + nu_weights = c_ce_optimization.compress_external_weights( + self.get_nus(), + self.compute_regrets( + use_true_rewards=use_true_rewards_when_compressing + ), + rewards=self._rewards, + lbd=self._compress_lbd, + ) + regret = np.max( + np.sum( + nu_weights.reshape(-1, 1) + * self.compute_regrets( + use_true_rewards=use_true_rewards_when_compressing + ), + axis=0, + ) + ) + return regret, nu_weights + + def compress_nus_and_weights(self, nu_weights): + """Run L1 optimization to only keep important members of `nus`.""" + if self._compress_nus: + if np.abs(np.sum(nu_weights) - 1.0) > 1e-8: + # If the optimization was unsuccessful, do *not* compress. + print( + "Unsuccessful optimization, weights sum to {}".format( + np.sum(nu_weights) + ) + ) + return + new_nus = [ + nu + for weight, nu in zip(nu_weights, self._nus) + if weight > self._rho_tol + ] + new_rewards = [ + reward + for weight, reward in zip(nu_weights, self._rewards) + if weight > self._rho_tol + ] + new_true_rewards = [ + reward + for weight, reward in zip(nu_weights, self._true_rewards) + if weight > self._rho_tol + ] + + new_nu_weights = [ + weight for weight in nu_weights if weight > self._rho_tol + ] + new_nu_weights = np.array(new_nu_weights) / np.sum(new_nu_weights) + + self._nus = new_nus + self._rewards = new_rewards + self._true_rewards = new_true_rewards + self._nu_weights = new_nu_weights + + def reset(self, policies): + """Restart the bandit with new policies.""" + raise NotImplementedError + + def increase_precision_x_fold(self, x): + self._stop_regret_threshold /= x + self._rho_tol /= x + self._regret_steps_per_step *= x + + def compute_p(self): + """Computes `p` as presented in Blum's External to Internal Regret.""" + assert ( + self._compute_internal_regret + ), "`p` does not exist when computing external regret." + w_nus = np.array(self.get_all_w_nus()) + + p = np.ones(len(self._policies)) + pprime = np.dot(p, w_nus) + n_trials = 100000 + i = 0 + while np.sum(np.abs(pprime - p)) > 1e-8 and i < n_trials: + p = pprime + pprime = np.dot(p, w_nus) + i += 1 + + if np.sum(np.abs(pprime - p)) > 1e-8 and i >= n_trials: + raise ValueError( + "Power method did not converge after {} trials.".format(n_trials) + ) + self._p = p / np.sum(p) + + def get_all_w_nus(self): + """returns all nus for all times and all policies.""" + raise NotImplementedError + + def compute_regrets(self, use_true_rewards=False): + """Computes the algorithm's current external/internal regrets. + + Args: + use_true_rewards: Whether to use altered game rewards, or true game + rewards. + + Returns: + Internal regret of shape [T, P, P] if `self._compute_internal_regret` is + true, otherwise external regret of shape [T, P], where T is the current + number of iterations and P the number of policies. + """ + if use_true_rewards: + rewards = self._true_rewards + else: + rewards = self._rewards + + if self._compute_internal_regret: + regrets = [] + nus = np.array(self._nus) + rewards = np.array(rewards) + for action in range(rewards.shape[1]): + on_policy_values = (rewards[:, action] * nus[:, action]).reshape(-1, 1) + action_values = rewards * nus[:, action].reshape(-1, 1) + regrets.append(action_values - on_policy_values) + else: + on_policy_value = np.sum( + rewards * np.array(self._nus), axis=1, keepdims=True + ) + policy_value = rewards + regrets = policy_value - on_policy_value + return regrets + + def compute_average_regret(self, use_true_rewards=True): + """Computes the algorithm's average external/internal regrets. + + Args: + use_true_rewards: Whether to use altered game rewards, or true game + rewards. + + Returns: + Internal regret if `self._compute_internal_regret` is true, otherwise + external regret. + """ + + if use_true_rewards: + rewards = self._true_rewards + else: + rewards = self._rewards + + nu_weights = self.get_normalized_nu_weights() + if self._compute_internal_regret: + regrets = 0.0 + nus = np.array(self._nus) + rewards = np.array(rewards) + for action in range(rewards.shape[1]): + on_policy_values = (rewards[:, action] * nus[:, action]).reshape(-1, 1) + action_values = rewards * nus[:, action].reshape(-1, 1) + regrets += np.max( + np.sum( + nu_weights.reshape(-1, 1) * (action_values - on_policy_values), + axis=0, + ) + ) + else: + regrets = np.sum( + nu_weights.reshape(-1, 1) + * self.compute_regrets(use_true_rewards=use_true_rewards), + axis=0, + ) + return np.max(regrets) / len(self._nus) + + def get_nus(self): + return np.array(self._nus) + + def get_mus(self): + mus = [] + for nu in self._nus: + mu = utils.MixedDistribution(self._policy_mus, nu) + mus.append(mu) + return mus + + def get_rewards(self): + return self._rewards + + def get_mus_and_weights(self): + mus = self.get_mus() + self.normalize_nu_weights() + return mus, self._nu_weights + + def compute_optimal_eta(self): + if self._regret_steps_per_step is not None: + self._eta = min( + np.sqrt(np.log(len(self._policies)) / self._regret_steps_per_step), + 0.5, + ) + + def normalize_nu_weights(self): + self._nu_weights = np.array(self._nu_weights) / np.sum(self._nu_weights) + + def get_normalized_nu_weights(self): + return np.array(self._nu_weights) / np.sum(self._nu_weights) + + def restart(self): + self._nu_weights = list(self._nu_weights) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/softmax_policy.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/softmax_policy.py new file mode 100644 index 0000000..4effaed --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/softmax_policy.py @@ -0,0 +1,65 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Computes a softmax policy from a value function.""" +from typing import Optional + +import numpy as np + +from open_spiel.python import policy +from open_spiel.python.mfg import value + + +class SoftmaxPolicy(policy.Policy): + """Computes the softmax policy of a value function.""" + + def __init__(self, + game, + player_ids, + temperature: float, + state_action_value: value.ValueFunction, + prior_policy: Optional[policy.Policy] = None): + """Initializes the softmax policy. + + Args: + game: The game to analyze. + player_ids: list of player ids for which this policy applies; each + should be in the range 0..game.num_players()-1. + temperature: float to scale the values (multiplied by 1/temperature). + state_action_value: A state-action value function. + prior_policy: Optional argument. Prior policy to scale the softmax + policy. + """ + super(SoftmaxPolicy, self).__init__(game, player_ids) + self._state_action_value = state_action_value + self._prior_policy = prior_policy + self._temperature = temperature + + def action_probabilities(self, state, player_id=None): + legal_actions = state.legal_actions() + max_q = np.max( + [self._state_action_value(state, action) for action in legal_actions]) + exp_q = [ + np.exp((self._state_action_value(state, action) - max_q) / + self._temperature) for action in legal_actions + ] + if self._prior_policy is not None: + prior_probs = self._prior_policy.action_probabilities(state) + exp_q = [ + prior_probs.get(action, 0) * exp_q[i] + for i, action in enumerate(legal_actions) + ] + denom = sum(exp_q) + smax_q = exp_q if denom == 0 else exp_q / denom + return dict(zip(legal_actions, smax_q)) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/softmax_policy_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/softmax_policy_test.py new file mode 100644 index 0000000..0c98f50 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/softmax_policy_test.py @@ -0,0 +1,98 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for softmax_policy.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python import policy +from open_spiel.python.mfg import value +from open_spiel.python.mfg.algorithms import best_response_value +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import policy_value +from open_spiel.python.mfg.algorithms import softmax_policy +from open_spiel.python.mfg.games import crowd_modelling # pylint: disable=unused-import +import pyspiel + + +class SoftmaxPolicyTest(parameterized.TestCase): + + @parameterized.named_parameters(('python', 'python_mfg_crowd_modelling'), + ('cpp', 'mfg_crowd_modelling')) + def test_softmax(self, name): + """Check if the softmax policy works as expected. + + The test checks that: + - uniform prior policy gives the same results than no prior. + - very high temperature gives almost a uniform policy. + - very low temperature gives almost a deterministic policy for the best + action. + + Args: + name: Name of the game. + """ + + game = pyspiel.load_game(name) + uniform_policy = policy.UniformRandomPolicy(game) + dist = distribution.DistributionPolicy(game, uniform_policy) + br_value = best_response_value.BestResponse( + game, dist, value.TabularValueFunction(game)) + br_init_val = br_value(game.new_initial_state()) + + # uniform prior policy gives the same results than no prior. + softmax_pi_uniform_prior = softmax_policy.SoftmaxPolicy( + game, None, 1.0, br_value, uniform_policy).to_tabular() + softmax_pi_uniform_prior_value = policy_value.PolicyValue( + game, dist, softmax_pi_uniform_prior, value.TabularValueFunction(game)) + softmax_pi_uniform_prior_init_val = softmax_pi_uniform_prior_value( + game.new_initial_state()) + softmax_pi_no_prior = softmax_policy.SoftmaxPolicy(game, None, 1.0, + br_value, None) + softmax_pi_no_prior_value = policy_value.PolicyValue( + game, dist, softmax_pi_no_prior, value.TabularValueFunction(game)) + softmax_pi_no_prior_init_val = softmax_pi_no_prior_value( + game.new_initial_state()) + + self.assertAlmostEqual(softmax_pi_uniform_prior_init_val, + softmax_pi_no_prior_init_val) + + # very high temperature gives almost a uniform policy. + uniform_policy = uniform_policy.to_tabular() + uniform_value = policy_value.PolicyValue(game, dist, uniform_policy, + value.TabularValueFunction(game)) + uniform_init_val = uniform_value(game.new_initial_state()) + + softmax_pi_no_prior = softmax_policy.SoftmaxPolicy(game, None, 100000000, + br_value, None) + softmax_pi_no_prior_value = policy_value.PolicyValue( + game, dist, softmax_pi_no_prior, value.TabularValueFunction(game)) + softmax_pi_no_prior_init_val = softmax_pi_no_prior_value( + game.new_initial_state()) + + self.assertAlmostEqual(uniform_init_val, softmax_pi_no_prior_init_val) + + # very low temperature gives almost a best response policy. + softmax_pi_no_prior = softmax_policy.SoftmaxPolicy(game, None, 0.0001, + br_value, None) + softmax_pi_no_prior_value = policy_value.PolicyValue( + game, dist, softmax_pi_no_prior, value.TabularValueFunction(game)) + softmax_pi_no_prior_init_val = softmax_pi_no_prior_value( + game.new_initial_state()) + + self.assertAlmostEqual(br_init_val, softmax_pi_no_prior_init_val) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/utils.py new file mode 100644 index 0000000..42f1c36 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/algorithms/utils.py @@ -0,0 +1,217 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Collection of useful functions and classes.""" + +from typing import List, Optional + +import numpy as np + +from open_spiel.python import policy as policy_std +from open_spiel.python.mfg import distribution as distribution_std +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import policy_value +import pyspiel + + +class MergedPolicy(policy_std.Policy): + """Merge several policies.""" + + def __init__( + self, + game, + player_ids, + policies: List[policy_std.Policy], + weights: List[float], + distributions: Optional[List[distribution_std.Distribution]] = None, + ): + """Initializes the merged policy. + + Args: + game: The game to analyze. + player_ids: list of player ids for which this policy applies; each should + be in the range 0..game.num_players()-1. + policies: A `List[policy_std.Policy]` object. + weights: A `List[float]` object. They should sum to 1. + distributions: A `List[distribution_std.Distribution]` object. + """ + super(MergedPolicy, self).__init__(game, player_ids) + self._policies = policies + self._distributions = distributions + self._weights = weights + if distributions is None: + distributions = [ + distribution.DistributionPolicy(game, policy) for policy in policies + ] + else: + assert len(policies) == len( + distributions + ), f'Length mismatch {len(policies)} != {len(distributions)}' + assert len(policies) == len( + weights + ), f'Length mismatch {len(policies)} != {len(weights)}' + + def action_probabilities(self, state, player_id=None): + action_prob = [] + legal = state.legal_actions() + num_legal = len(legal) + for a in legal: + merged_pi = 0.0 + norm_merged_pi = 0.0 + for p, d, w in zip(self._policies, self._distributions, self._weights): + merged_pi += w * d(state) * p(state)[a] + norm_merged_pi += w * d(state) + if norm_merged_pi > 0.0: + action_prob.append((a, merged_pi / norm_merged_pi)) + else: + action_prob.append((a, 1.0 / num_legal)) + return dict(action_prob) + + +class MixedDistribution: + """Mixes a list of distributions wrt. a list of weights. + + The mixed distribution remains a probability distribution over states. + + Attributes: + mus: The state distributions being mixed. + weights: The list of weights of each `mus` member. + _mus: The state distributions being mixed, post-pruning. + _weights: The list of weights of each `mus` member, post-pruning. + _tol: Tolerance (`mus` members with weights below tolerance are ignored) + _value_str_cache: Cache for value_str calls. + """ + + def __init__(self, mus, weights, tol=1e-4): + """Mixes the distribution. + + Args: + mus: List of distributions to mix. + weights: List of weights to mix `mus` over. + tol: Tolerance (`mus` members with weights below tolerance are ignored) + """ + self.mus = mus + self.weights = weights + self._tol = tol + self._prune() + self._value_str_cache = {} + + def _prune(self): + self._mus = [mu for w, mu in zip(self.weights, self.mus) if w > self._tol] + self._weights = [w for w in self.weights if w > self._tol] + self._weights = [w / sum(self._weights) for w in self._weights] + + def value(self, state): + """Returns the probability of the distribution on the state. + + Args: + state: A `pyspiel.State` object. + + Returns: + A `float`. + """ + return sum([ + weight * mu.value(state) for weight, mu in zip(self._weights, self._mus) + ]) + + def value_str(self, state_str, default_value=None): + """Returns the probability of the distribution on the given state string. + + Args: + state_str: A string. + default_value: If not None, return this value if the state is not in the + support of the distribution. + + Returns: + A `float`. + """ + if state_str not in self._value_str_cache: + self._value_str_cache[state_str] = sum([ + weight * mu.value_str(state_str, default_value) + for weight, mu in zip(self._weights, self._mus) + ]) + return self._value_str_cache[state_str] + + def __call__(self, state): + """Turns the distribution into a callable. + + Args: + state: The current state of the game. + + Returns: + Float: probability. + """ + return self.value(state) + + +def get_exact_value( + pi: policy_std.Policy, mu: distribution_std.Distribution, game +): + """Computes the exact value of playing `pi` against distribution `mu`. + + Args: + pi: A policy object whose value is evaluated against `mu`. + mu: A distribution object against which `pi` is evaluated. + game: A pyspiel.Game object, the evaluation game. + + Returns: + Exact value of `pi` in `game` against `mu`. + """ + root_state = game.new_initial_states()[0] + return policy_value.PolicyValue(game, mu, pi).value(root_state) + + +def sample_value( + pi: policy_std.Policy, mu: distribution_std.Distribution, game +): + """Samples the value of playing `pi` against distribution `mu`. + + Args: + pi: A policy object whose value is evaluated against `mu`. + mu: A distribution object against which `pi` is evaluated. + game: A pyspiel.Game object, the evaluation game. + + Returns: + Sampled value of `pi` in `game` against `mu`. + """ + mfg_state = game.new_initial_states()[0] + total_reward = 0.0 + while not mfg_state.is_terminal(): + if mfg_state.current_player() == pyspiel.PlayerId.CHANCE: + action_list, prob_list = zip(*mfg_state.chance_outcomes()) + action = np.random.choice(action_list, p=prob_list) + mfg_state.apply_action(action) + elif mfg_state.current_player() == pyspiel.PlayerId.MEAN_FIELD: + dist_to_register = mfg_state.distribution_support() + dist = [mu.value_str(str_state, 0.0) for str_state in dist_to_register] + mfg_state.update_distribution(dist) + else: + total_reward += mfg_state.rewards()[0] + action_prob = pi(mfg_state) + action = np.random.choice( + list(action_prob.keys()), p=list(action_prob.values()) + ) + mfg_state.apply_action(action) + + return total_reward + + +def get_nu_values(policies, nu, game): + rewards = np.zeros(len(policies)) + mu = distribution.DistributionPolicy( + game, MergedPolicy(game, None, policies, nu) + ) + for index, policy in enumerate(policies): + rewards[index] = sample_value(policy, mu, game) + return rewards diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/distribution.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/distribution.py new file mode 100644 index 0000000..f00f93f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/distribution.py @@ -0,0 +1,96 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Representation of a distribution for a game. + +This is a standard representation for passing distributions into algorithms, +with currently the following implementations: + +The main way of using a distribution is to call `value(state)`. +""" + +import abc +from typing import Any, Optional + +import pyspiel + + +class Distribution(abc.ABC): + """Base class for distributions. + + This represents a probability distribution over the states of a game. + + Attributes: + game: the game for which this distribution is derives + """ + + def __init__(self, game: pyspiel.Game): + """Initializes a distribution. + + Args: + game: the game for which this distribution is derives + """ + self.game = game + + @abc.abstractmethod + def value(self, state: pyspiel.State) -> float: + """Returns the probability of the distribution on the state. + + Args: + state: A `pyspiel.State` object. + + Returns: + A `float`. + """ + raise NotImplementedError() + + @abc.abstractmethod + def value_str(self, + state_str: str, + default_value: Optional[float] = None) -> float: + """Returns the probability of the distribution on the state string given. + + Args: + state_str: A string. + default_value: If not None, return this value if the state is not in the + support of the distribution. + + Returns: + A `float`. + """ + raise NotImplementedError() + + def __call__(self, state: pyspiel.State) -> float: + """Turns the distribution into a callable. + + Args: + state: The current state of the game. + + Returns: + Float: probability. + """ + return self.value(state) + + +class ParametricDistribution(Distribution): + """A parametric distribution.""" + + @abc.abstractmethod + def get_params(self) -> Any: + """Returns the distribution parameters.""" + + @abc.abstractmethod + def set_params(self, params: Any): + """Sets the distribution parameters.""" diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_average_network_fp_jax.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_average_network_fp_jax.py new file mode 100644 index 0000000..857bd20 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_average_network_fp_jax.py @@ -0,0 +1,246 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Runs Deep Average-network Fictitious Play with DQN agents.""" + +import os +from typing import Sequence + +from absl import flags +import jax + +from open_spiel.python import policy as policy_std +from open_spiel.python import rl_environment +from open_spiel.python.jax import dqn +from open_spiel.python.mfg import utils +from open_spiel.python.mfg.algorithms import average_network_fictitious_play +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import nash_conv +from open_spiel.python.mfg.algorithms import policy_value +from open_spiel.python.mfg.games import factory +from open_spiel.python.utils import app +from open_spiel.python.utils import metrics +from open_spiel.python.utils import training + +_GAME_NAME = flags.DEFINE_string('game_name', 'mfg_crowd_modelling_2d', + 'Name of the game.') +_ENV_SETTING = flags.DEFINE_string( + 'env_setting', 'mfg_crowd_modelling_2d', + 'Name of the game settings. If None, the game name will be used.') +_LOGDIR = flags.DEFINE_string( + 'logdir', None, + 'Logging dir to use for TF summary files. If None, the metrics will only ' + 'be logged to stderr.') +_LOG_DISTRIBUTION = flags.DEFINE_bool('log_distribution', False, + 'Enables logging of the distribution.') +_NUM_ITERATIONS = flags.DEFINE_integer('num_iterations', 100, + 'Number of iterations.') +_EVAL_EVERY = flags.DEFINE_integer( + 'eval_every', 200, 'Episode frequency at which the agents are evaluated.') + +# Flags for best response RL (DQN) agent. +# Training options. +_BATCH_SIZE = flags.DEFINE_integer( + 'batch_size', 128, 'Number of transitions to sample at each learning step.') +_LEARN_EVERY = flags.DEFINE_integer( + 'learn_every', 40, 'Number of steps between learning updates.') +_NUM_DQN_EPISODES_PER_ITERATION = flags.DEFINE_integer( + 'num_dqn_episodes_per_iteration', 3000, + 'Number of DQN training episodes for each iteration.') +_EPSILON_DECAY_DURATION = flags.DEFINE_integer( + 'epsilon_decay_duration', int(20e6), + 'Number of game steps over which epsilon is decayed.') +_EPSILON_START = flags.DEFINE_float('epsilon_start', 0.1, + 'Starting exploration parameter.') +_EPSILON_END = flags.DEFINE_float('epsilon_end', 0.1, + 'Final exploration parameter.') +_DISCOUNT_FACTOR = flags.DEFINE_float('discount_factor', 1.0, + 'Discount factor for future rewards.') +_SEED = flags.DEFINE_integer('seed', 42, 'Training seed.') +# Network options. +_HIDDEN_LAYERS_SIZES = flags.DEFINE_list( + 'hidden_layers_sizes', ['128', '128'], + 'Number of hidden units in the Q-net.') +_UPDATE_TARGET_NETWORK_EVERY = flags.DEFINE_integer( + 'update_target_network_every', 200, + 'Number of steps between DQN target network updates.') +# Replay buffer options. +_REPLAY_BUFFER_CAPACITY = flags.DEFINE_integer('replay_buffer_capacity', 5000, + 'Size of the replay buffer.') +_MIN_BUFFER_SIZE_TO_LEARN = flags.DEFINE_integer( + 'min_buffer_size_to_learn', 200, + 'Number of samples in buffer before learning begins.') +# Loss and optimizer options. +_OPTIMIZER = flags.DEFINE_enum('optimizer', 'adam', ['sgd', 'adam'], + 'Optimizer.') +_LEARNING_RATE = flags.DEFINE_float('learning_rate', 0.001, + 'Learning rate for inner rl agent.') +_LOSS = flags.DEFINE_enum('loss', 'mse', ['mse', 'huber'], 'Loss function.') +_HUBER_LOSS_PARAMETER = flags.DEFINE_float('huber_loss_parameter', 1.0, + 'Parameter for Huber loss.') +_GRADIENT_CLIPPING = flags.DEFINE_float('gradient_clipping', 40, + 'Value to clip the gradient to.') + +# Flags for average policy RL agent. +# Training options. +_AVG_POL_BATCH_SIZE = flags.DEFINE_integer( + 'avg_pol_batch_size', 128, + 'Number of transitions to sample at each learning step.') +_AVG_POL_NUM_TRAINING_STEPS_PER_ITERATION = flags.DEFINE_integer( + 'avg_pol_num_training_steps_per_iteration', 2000, + 'Number of steps for average policy at each FP iteration.') +_AVG_POL_NUM_EPISODES_PER_ITERATION = flags.DEFINE_integer( + 'avg_pol_num_episodes_per_iteration', 100, + 'Number of samples to store at each FP iteration.') +# Network options. +_AVG_POL_HIDDEN_LAYERS_SIZES = flags.DEFINE_list( + 'avg_pol_hidden_layers_sizes', ['128', '128'], + 'Number of hidden units in the avg-net and Q-net.') +# Reservoir buffer options. +_AVG_POL_RESERVOIR_BUFFER_CAPACITY = flags.DEFINE_integer( + 'avg_pol_reservoir_buffer_capacity', 100000000, + 'Size of the reservoir buffer.') +_AVG_POL_MIN_BUFFER_SIZE_TO_LEARN = flags.DEFINE_integer( + 'avg_pol_min_buffer_size_to_learn', 100, + 'Number of samples in buffer before learning begins.') +# Loss and optimizer options. +_AVG_POL_OPTIMIZER = flags.DEFINE_enum('avg_pol_optimizer', 'sgd', + ['sgd', 'adam'], 'Optimizer.') +_AVG_POL_LEARNING_RATE = flags.DEFINE_float( + 'avg_pol_learning_rate', 0.01, 'Learning rate for inner rl agent.') +_AVG_GRADIENT_CLIPPING = flags.DEFINE_float('avg_gradient_clipping', 100, + 'Value to clip the gradient to.') +_AVG_POL_TAU = flags.DEFINE_float('avg_pol_tau', 10.0, + 'Temperature for softmax in policy.') + + +def main(argv: Sequence[str]) -> None: + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + game = factory.create_game_with_setting(_GAME_NAME.value, _ENV_SETTING.value) + num_players = game.num_players() + + # Create the environments with uniform initial policy. + uniform_policy = policy_std.UniformRandomPolicy(game) + uniform_dist = distribution.DistributionPolicy(game, uniform_policy) + + envs = [ + rl_environment.Environment( + game, mfg_distribution=uniform_dist, mfg_population=p) + for p in range(num_players) + ] + + env = envs[0] + info_state_size = env.observation_spec()['info_state'][0] + num_actions = env.action_spec()['num_actions'] + + # Best response policy agents. + kwargs_dqn = { + 'batch_size': _BATCH_SIZE.value, + 'discount_factor': _DISCOUNT_FACTOR.value, + 'epsilon_decay_duration': _EPSILON_DECAY_DURATION.value, + 'epsilon_end': _EPSILON_END.value, + 'epsilon_start': _EPSILON_START.value, + 'gradient_clipping': _GRADIENT_CLIPPING.value, + 'hidden_layers_sizes': [int(l) for l in _HIDDEN_LAYERS_SIZES.value], + 'huber_loss_parameter': _HUBER_LOSS_PARAMETER.value, + 'learn_every': _LEARN_EVERY.value, + 'learning_rate': _LEARNING_RATE.value, + 'loss_str': _LOSS.value, + 'min_buffer_size_to_learn': _MIN_BUFFER_SIZE_TO_LEARN.value, + 'optimizer_str': _OPTIMIZER.value, + 'replay_buffer_capacity': _REPLAY_BUFFER_CAPACITY.value, + 'seed': _SEED.value, + 'update_target_network_every': _UPDATE_TARGET_NETWORK_EVERY.value, + } + br_rl_agents = [ + dqn.DQN(p, info_state_size, num_actions, **kwargs_dqn) + for p in range(num_players) + ] + + num_training_steps_per_iteration = ( + _AVG_POL_NUM_TRAINING_STEPS_PER_ITERATION.value) + + # Metrics writer will also log the metrics to stderr. + just_logging = _LOGDIR.value is None or jax.host_id() > 0 + writer = metrics.create_default_writer( + _LOGDIR.value, just_logging=just_logging) + + def logging_fn(it, step, vals): + writer.write_scalars(it * num_training_steps_per_iteration + step, vals) + + # Average policy agents. + kwargs_avg = { + 'batch_size': _AVG_POL_BATCH_SIZE.value, + 'hidden_layers_sizes': [ + int(l) for l in _AVG_POL_HIDDEN_LAYERS_SIZES.value + ], + 'reservoir_buffer_capacity': _AVG_POL_RESERVOIR_BUFFER_CAPACITY.value, + 'learning_rate': _AVG_POL_LEARNING_RATE.value, + 'min_buffer_size_to_learn': _AVG_POL_MIN_BUFFER_SIZE_TO_LEARN.value, + 'optimizer_str': _AVG_POL_OPTIMIZER.value, + 'gradient_clipping': _AVG_GRADIENT_CLIPPING.value, + 'seed': _SEED.value, + 'tau': _AVG_POL_TAU.value + } + fp = average_network_fictitious_play.AverageNetworkFictitiousPlay( + game, + envs, + br_rl_agents, + _AVG_POL_NUM_EPISODES_PER_ITERATION.value, + num_training_steps_per_iteration, + eval_every=_EVAL_EVERY.value, + logging_fn=logging_fn, + **kwargs_avg) + + def log_metrics(it): + """Logs the training metrics for each iteration.""" + initial_states = game.new_initial_states() + distrib = distribution.DistributionPolicy(game, fp.policy) + pi_value = policy_value.PolicyValue(game, distrib, fp.policy) + m = { + f'best_response/{state}': pi_value.eval_state(state) + for state in initial_states + } + m.update({ + f'br_agent{i}/loss': agent.loss for i, agent in enumerate(br_rl_agents) + }) + nash_conv_fp = nash_conv.NashConv(game, fp.policy) + m['nash_conv_fp'] = nash_conv_fp.nash_conv() + logging_fn(it, 0, m) + + # Also save the distribution. + if _LOG_DISTRIBUTION.value and not just_logging: + filename = os.path.join(_LOGDIR.value, f'distribution_{it}.pkl') + utils.save_parametric_distribution(nash_conv_fp.distribution, filename) + + for it in range(_NUM_ITERATIONS.value): + # Train the RL agent to learn a best response. + training.run_episodes( + envs, + br_rl_agents, + num_episodes=_NUM_DQN_EPISODES_PER_ITERATION.value, + is_evaluation=False) + + # Run an iteration of average-network fictitious play and log the metrics. + fp.iteration() + log_metrics(it + 1) + + # Make sure all values were written. + writer.flush() + + +if __name__ == '__main__': + jax.config.parse_flags_with_absl() + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_dqn_fp_jax.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_dqn_fp_jax.py new file mode 100644 index 0000000..7592894 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_dqn_fp_jax.py @@ -0,0 +1,186 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Deep Fictitious Play using DQN agents trained on an MFG.""" + +from absl import flags +import jax + +from open_spiel.python import policy +from open_spiel.python import rl_agent_policy +from open_spiel.python import rl_environment +from open_spiel.python.jax import dqn +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import fictitious_play +from open_spiel.python.mfg.algorithms import nash_conv +from open_spiel.python.mfg.algorithms import policy_value +from open_spiel.python.mfg.games import factory +from open_spiel.python.utils import app +from open_spiel.python.utils import metrics + +FLAGS = flags.FLAGS + +flags.DEFINE_string("game_name", "python_mfg_predator_prey", + "Name of the game.") +flags.DEFINE_string( + "env_setting", None, + "Name of the game settings. If None, the game name will be used.") +flags.DEFINE_integer("num_iterations", 100, + "Number of fictitious play iterations.") +flags.DEFINE_integer("num_episodes_per_iteration", 1000, + "Number of training eepisodes for each iteration.") +flags.DEFINE_list("hidden_layers_sizes", [ + 128, + 128, +], "Number of hidden units in the avg-net and Q-net.") +flags.DEFINE_integer("replay_buffer_capacity", int(2e5), + "Size of the replay buffer.") +flags.DEFINE_integer("min_buffer_size_to_learn", 1000, + "Number of samples in buffer before learning begins.") +flags.DEFINE_integer("batch_size", 128, + "Number of transitions to sample at each learning step.") +flags.DEFINE_integer("learn_every", 64, + "Number of steps between learning updates.") +flags.DEFINE_float("rl_learning_rate", 0.01, + "Learning rate for inner rl agent.") +flags.DEFINE_string("optimizer_str", "sgd", + "Optimizer, choose from 'adam', 'sgd'.") +flags.DEFINE_string("loss_str", "mse", + "Loss function, choose from 'mse', 'huber'.") +flags.DEFINE_integer("update_target_network_every", 400, + "Number of steps between DQN target network updates.") +flags.DEFINE_float("discount_factor", 1.0, + "Discount factor for future rewards.") +flags.DEFINE_integer("epsilon_decay_duration", int(20e6), + "Number of game steps over which epsilon is decayed.") +flags.DEFINE_float("epsilon_start", 0.1, "Starting exploration parameter.") +flags.DEFINE_float("epsilon_end", 0.1, "Final exploration parameter.") +flags.DEFINE_bool("use_checkpoints", False, "Save/load neural network weights.") +flags.DEFINE_string("checkpoint_dir", "/tmp/dqn_test", + "Directory to save/load the agent.") +flags.DEFINE_string( + "logdir", None, + "Logging dir to use for TF summary files. If None, the metrics will only " + "be logged to stderr.") + + +def main(unused_argv): + game = factory.create_game_with_setting(FLAGS.game_name, FLAGS.env_setting) + uniform_policy = policy.UniformRandomPolicy(game) + mfg_dist = distribution.DistributionPolicy(game, uniform_policy) + + envs = [ + rl_environment.Environment( + game, mfg_distribution=mfg_dist, mfg_population=p) + for p in range(game.num_players()) + ] + info_state_size = envs[0].observation_spec()["info_state"][0] + num_actions = envs[0].action_spec()["num_actions"] + + hidden_layers_sizes = [int(l) for l in FLAGS.hidden_layers_sizes] + kwargs = { + "replay_buffer_capacity": FLAGS.replay_buffer_capacity, + "min_buffer_size_to_learn": FLAGS.min_buffer_size_to_learn, + "batch_size": FLAGS.batch_size, + "learn_every": FLAGS.learn_every, + "learning_rate": FLAGS.rl_learning_rate, + "optimizer_str": FLAGS.optimizer_str, + "loss_str": FLAGS.loss_str, + "update_target_network_every": FLAGS.update_target_network_every, + "discount_factor": FLAGS.discount_factor, + "epsilon_decay_duration": FLAGS.epsilon_decay_duration, + "epsilon_start": FLAGS.epsilon_start, + "epsilon_end": FLAGS.epsilon_end, + } + + # pylint: disable=g-complex-comprehension + agents = [ + dqn.DQN(idx, info_state_size, num_actions, hidden_layers_sizes, **kwargs) + for idx in range(game.num_players()) + ] + joint_avg_policy = rl_agent_policy.JointRLAgentPolicy( + game, {idx: agent for idx, agent in enumerate(agents)}, + envs[0].use_observation) + + if FLAGS.use_checkpoints: + for agent in agents: + if agent.has_checkpoint(FLAGS.checkpoint_dir): + agent.restore(FLAGS.checkpoint_dir) + + # Metrics writer will also log the metrics to stderr. + just_logging = FLAGS.logdir is None or jax.host_id() > 0 + writer = metrics.create_default_writer( + logdir=FLAGS.logdir, just_logging=just_logging) + + # Save the parameters. + writer.write_hparams(kwargs) + + fp = fictitious_play.FictitiousPlay(game) + num_episodes_per_iteration = FLAGS.num_episodes_per_iteration + + def log_metrics(it, episode=0): + initial_states = game.new_initial_states() + fp_policy = fp.get_policy() + distrib = distribution.DistributionPolicy(game, fp_policy) + pi_value = policy_value.PolicyValue(game, distrib, fp_policy) + m = { + f"dqn_br/{state}": pi_value.eval_state(state) + for state in initial_states + } + # Loss will be None at the beginning. + if agents[0].loss is not None: + m.update({ + f"agent{i}/loss": float(agent.loss) for i, agent in enumerate(agents) + }) + nash_conv_fp = nash_conv.NashConv(game, fp_policy).nash_conv() + m["nash_conv_fp"] = nash_conv_fp + # We log using the total number of episode steps so that runs with different + # training regimes are comparable. + writer.write_scalars(it * num_episodes_per_iteration + episode, m) + + log_metrics(0) + for it in range(FLAGS.num_iterations): + # Update the Fictitious Play policy. + fp.iteration(br_policy=joint_avg_policy) + + # Update the distribution of the environments. + distrib = distribution.DistributionPolicy(game, fp.get_policy()) + for env in envs: + env.update_mfg_distribution(distrib) + + # Train the RL agent to learn a best response. + for _ in range(num_episodes_per_iteration): + for p in range(game.num_players()): + time_step = envs[p].reset() + while not time_step.last(): + agent_output = agents[p].step(time_step) + action_list = [agent_output.action] + time_step = envs[p].step(action_list) + + # Episode is over, step all agents with final info state. + agents[p].step(time_step) + + # Check point the agents. + if FLAGS.use_checkpoints: + for agent in agents: + agent.save(FLAGS.checkpoint_dir) + + # Log the final metrics. + log_metrics(it + 1) + + # Make sure all values were written. + writer.flush() + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_dqn_jax.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_dqn_jax.py new file mode 100644 index 0000000..5b35a46 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_dqn_jax.py @@ -0,0 +1,168 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""DQN agents trained on an MFG against a crowd following a uniform policy.""" + +from absl import flags +import jax + +from open_spiel.python import policy +from open_spiel.python import rl_agent_policy +from open_spiel.python import rl_environment +from open_spiel.python.jax import dqn +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import nash_conv +from open_spiel.python.mfg.algorithms import policy_value +from open_spiel.python.mfg.games import factory +from open_spiel.python.utils import app +from open_spiel.python.utils import metrics + +FLAGS = flags.FLAGS + +flags.DEFINE_string("game_name", "python_mfg_predator_prey", + "Name of the game.") +flags.DEFINE_string( + "env_setting", None, + "Name of the game settings. If None, the game name will be used.") +flags.DEFINE_integer("num_train_episodes", int(20e6), + "Number of training episodes.") +flags.DEFINE_integer("eval_every", 10000, + "Episode frequency at which the agents are evaluated.") +flags.DEFINE_list("hidden_layers_sizes", [ + 128, + 128, +], "Number of hidden units in the avg-net and Q-net.") +flags.DEFINE_integer("replay_buffer_capacity", int(2e5), + "Size of the replay buffer.") +flags.DEFINE_integer("min_buffer_size_to_learn", 1000, + "Number of samples in buffer before learning begins.") +flags.DEFINE_integer("batch_size", 128, + "Number of transitions to sample at each learning step.") +flags.DEFINE_integer("learn_every", 64, + "Number of steps between learning updates.") +flags.DEFINE_float("rl_learning_rate", 0.01, + "Learning rate for inner rl agent.") +flags.DEFINE_string("optimizer_str", "sgd", + "Optimizer, choose from 'adam', 'sgd'.") +flags.DEFINE_string("loss_str", "mse", + "Loss function, choose from 'mse', 'huber'.") +flags.DEFINE_integer("update_target_network_every", 19200, + "Number of steps between DQN target network updates.") +flags.DEFINE_float("discount_factor", 1.0, + "Discount factor for future rewards.") +flags.DEFINE_integer("epsilon_decay_duration", int(20e6), + "Number of game steps over which epsilon is decayed.") +flags.DEFINE_float("epsilon_start", 0.1, "Starting exploration parameter.") +flags.DEFINE_float("epsilon_end", 0.1, "Final exploration parameter.") +flags.DEFINE_bool("use_checkpoints", False, "Save/load neural network weights.") +flags.DEFINE_string("checkpoint_dir", "/tmp/dqn_test", + "Directory to save/load the agent.") +flags.DEFINE_string( + "logdir", None, + "Logging dir to use for TF summary files. If None, the metrics will only " + "be logged to stderr.") + + +def main(unused_argv): + game = factory.create_game_with_setting(FLAGS.game_name, FLAGS.env_setting) + uniform_policy = policy.UniformRandomPolicy(game) + mfg_dist = distribution.DistributionPolicy(game, uniform_policy) + + envs = [ + rl_environment.Environment( + game, mfg_distribution=mfg_dist, mfg_population=p) + for p in range(game.num_players()) + ] + info_state_size = envs[0].observation_spec()["info_state"][0] + num_actions = envs[0].action_spec()["num_actions"] + + hidden_layers_sizes = [int(l) for l in FLAGS.hidden_layers_sizes] + kwargs = { + "replay_buffer_capacity": FLAGS.replay_buffer_capacity, + "min_buffer_size_to_learn": FLAGS.min_buffer_size_to_learn, + "batch_size": FLAGS.batch_size, + "learn_every": FLAGS.learn_every, + "learning_rate": FLAGS.rl_learning_rate, + "optimizer_str": FLAGS.optimizer_str, + "loss_str": FLAGS.loss_str, + "update_target_network_every": FLAGS.update_target_network_every, + "discount_factor": FLAGS.discount_factor, + "epsilon_decay_duration": FLAGS.epsilon_decay_duration, + "epsilon_start": FLAGS.epsilon_start, + "epsilon_end": FLAGS.epsilon_end, + } + + # pylint: disable=g-complex-comprehension + agents = [ + dqn.DQN(idx, info_state_size, num_actions, hidden_layers_sizes, **kwargs) + for idx in range(game.num_players()) + ] + joint_avg_policy = rl_agent_policy.JointRLAgentPolicy( + game, {idx: agent for idx, agent in enumerate(agents)}, + envs[0].use_observation) + if FLAGS.use_checkpoints: + for agent in agents: + if agent.has_checkpoint(FLAGS.checkpoint_dir): + agent.restore(FLAGS.checkpoint_dir) + + # Metrics writer will also log the metrics to stderr. + just_logging = FLAGS.logdir is None or jax.host_id() > 0 + writer = metrics.create_default_writer( + logdir=FLAGS.logdir, just_logging=just_logging) + + # Save the parameters. + writer.write_hparams(kwargs) + + for ep in range(1, FLAGS.num_train_episodes + 1): + if ep % FLAGS.eval_every == 0: + writer.write_scalars(ep, { + f"agent{i}/loss": float(agent.loss) for i, agent in enumerate(agents) + }) + + initial_states = game.new_initial_states() + + # Exact best response to uniform. + nash_conv_obj = nash_conv.NashConv(game, uniform_policy) + writer.write_scalars( + ep, { + f"exact_br/{state}": value + for state, value in zip(initial_states, nash_conv_obj.br_values()) + }) + + # DQN best response to uniform. + pi_value = policy_value.PolicyValue(game, mfg_dist, joint_avg_policy) + writer.write_scalars(ep, { + f"dqn_br/{state}": pi_value.eval_state(state) + for state in initial_states + }) + + if FLAGS.use_checkpoints: + for agent in agents: + agent.save(FLAGS.checkpoint_dir) + + for p in range(game.num_players()): + time_step = envs[p].reset() + while not time_step.last(): + agent_output = agents[p].step(time_step) + action_list = [agent_output.action] + time_step = envs[p].step(action_list) + + # Episode is over, step all agents with final info state. + agents[p].step(time_step) + + # Make sure all values were written. + writer.flush() + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_fictitious_play.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_fictitious_play.py new file mode 100644 index 0000000..199d436 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_fictitious_play.py @@ -0,0 +1,77 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Fictitious play on an MFG game.""" +import os +from typing import Sequence + +from absl import flags + +from open_spiel.python.mfg import utils +from open_spiel.python.mfg.algorithms import fictitious_play +from open_spiel.python.mfg.algorithms import nash_conv +from open_spiel.python.mfg.games import factory +from open_spiel.python.utils import app +from open_spiel.python.utils import metrics + +FLAGS = flags.FLAGS + +flags.DEFINE_string('game_name', 'mfg_crowd_modelling_2d', 'Name of the game.') +flags.DEFINE_string( + 'setting', None, + 'Name of the game settings. If None, the game name will be used.') +flags.DEFINE_integer('num_iterations', 100, + 'Number of fictitious play iterations.') + +flags.DEFINE_float('learning_rate', None, + 'Learning rate. If not, it will be set to 1/iteration.') +_LOGDIR = flags.DEFINE_string( + 'logdir', None, + 'Logging dir to use for TF summary files. If None, the metrics will only ' + 'be logged to stderr.') +_LOG_DISTRIBUTION = flags.DEFINE_bool('log_distribution', False, + 'Enables logging of the distribution.') + + +def main(argv: Sequence[str]) -> None: + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + game = factory.create_game_with_setting(FLAGS.game_name, FLAGS.setting) + + # Metrics writer will also log the metrics to stderr. + just_logging = _LOGDIR.value is None + writer = metrics.create_default_writer( + logdir=_LOGDIR.value, just_logging=just_logging) + + # Save the parameters. + learning_rate = FLAGS.learning_rate + writer.write_hparams({'learning_rate': learning_rate}) + + fp = fictitious_play.FictitiousPlay(game) + + for it in range(FLAGS.num_iterations): + fp.iteration(learning_rate=learning_rate) + fp_policy = fp.get_policy() + nash_conv_fp = nash_conv.NashConv(game, fp_policy) + exploitability = nash_conv_fp.nash_conv() + writer.write_scalars(it, {'exploitability': exploitability}) + if _LOG_DISTRIBUTION.value and not just_logging: + filename = os.path.join(_LOGDIR.value, f'distribution_{it}.pkl') + utils.save_parametric_distribution(nash_conv_fp.distribution, filename) + + writer.flush() + + +if __name__ == '__main__': + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_mirror_descent.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_mirror_descent.py new file mode 100644 index 0000000..b2bc175 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_mirror_descent.py @@ -0,0 +1,76 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Mirror descent on an MFG game.""" + +import os +from typing import Sequence + +from absl import flags + +from open_spiel.python.mfg import utils +from open_spiel.python.mfg.algorithms import mirror_descent +from open_spiel.python.mfg.algorithms import nash_conv +from open_spiel.python.mfg.games import factory +from open_spiel.python.utils import app +from open_spiel.python.utils import metrics + +FLAGS = flags.FLAGS + +_GAME_NAME = flags.DEFINE_string('game_name', 'mfg_crowd_modelling_2d', + 'Name of the game.') +_SETTING = flags.DEFINE_string( + 'setting', None, + 'Name of the game settings. If None, the game name will be used.') +_NUM_ITERATIONS = flags.DEFINE_integer('num_iterations', 100, + 'Number of mirror descent iterations.') +_LEARNING_RATE = flags.DEFINE_float('learning_rate', 0.01, 'Learning rate.') +_LOGDIR = flags.DEFINE_string( + 'logdir', None, + 'Logging dir to use for TF summary files. If None, the metrics will only ' + 'be logged to stderr.') +_LOG_DISTRIBUTION = flags.DEFINE_bool('log_distribution', False, + 'Enables logging of the distribution.') + + +def main(argv: Sequence[str]) -> None: + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + game = factory.create_game_with_setting(_GAME_NAME.value, _SETTING.value) + + # Metrics writer will also log the metrics to stderr. + just_logging = _LOGDIR.value is None + writer = metrics.create_default_writer( + logdir=_LOGDIR.value, just_logging=just_logging) + + # Save the parameters. + learning_rate = _LEARNING_RATE.value + writer.write_hparams({'learning_rate': learning_rate}) + + md = mirror_descent.MirrorDescent(game, lr=learning_rate) + + for it in range(_NUM_ITERATIONS.value): + md.iteration() + md_policy = md.get_policy() + exploitability = nash_conv.NashConv(game, md_policy).nash_conv() + writer.write_scalars(it, {'exploitability': exploitability}) + if _LOG_DISTRIBUTION.value and not just_logging: + filename = os.path.join(_LOGDIR.value, f'distribution_{it}.pkl') + utils.save_parametric_distribution(md.distribution, filename) + + writer.flush() + + +if __name__ == '__main__': + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_munchausen_domd_jax.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_munchausen_domd_jax.py new file mode 100644 index 0000000..d4f7d4a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_munchausen_domd_jax.py @@ -0,0 +1,231 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Run deep online mirror descent algorithm with Munchausen DQN agents.""" + +import os +from typing import Sequence + +from absl import flags +import jax + +from open_spiel.python import policy +from open_spiel.python import rl_environment +from open_spiel.python.mfg import utils +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import munchausen_deep_mirror_descent +from open_spiel.python.mfg.algorithms import nash_conv +from open_spiel.python.mfg.algorithms import policy_value +from open_spiel.python.mfg.games import factory +from open_spiel.python.utils import app +from open_spiel.python.utils import metrics + +FLAGS = flags.FLAGS + +flags.DEFINE_string("game_name", "mfg_crowd_modelling_2d", "Name of the game.") +_ENV_SETTING = flags.DEFINE_string( + "env_setting", + "crowd_modelling_2d_four_rooms", + "Name of the game settings. If None, the game name will be used.", +) + +# Training options. +_BATCH_SIZE = flags.DEFINE_integer( + "batch_size", 128, "Number of transitions to sample at each learning step." +) +_LEARN_EVERY = flags.DEFINE_integer( + "learn_every", 64, "Number of steps between learning updates." +) +_NUM_EPISODES_PER_ITERATION = flags.DEFINE_integer( + "num_episodes_per_iteration", + 1000, + "Number of training eepisodes for each iteration.", +) +flags.DEFINE_integer("num_iterations", 100, "Number of iterations.") +_EPSILON_DECAY_DURATION = flags.DEFINE_integer( + "epsilon_decay_duration", + 100000, + "Number of game steps over which epsilon is decayed.", +) +flags.DEFINE_float("epsilon_power", 1, "Power for the epsilon decay.") +flags.DEFINE_float("epsilon_start", 0.1, "Starting exploration parameter.") +flags.DEFINE_float("epsilon_end", 0.1, "Final exploration parameter.") +_DISCOUNT_FACTOR = flags.DEFINE_float( + "discount_factor", 1.0, "Discount factor for future rewards." +) +_RESET_REPLAY_BUFFER_ON_UPDATE = flags.DEFINE_bool( + "reset_replay_buffer_on_update", + False, + "Reset the replay buffer when the softmax policy is updated.", +) +flags.DEFINE_integer("seed", 42, "Training seed.") +# Evaluation options. +_EVAL_EVERY = flags.DEFINE_integer( + "eval_every", 200, "Episode frequency at which the agents are evaluated." +) +# Network options. +_HIDDEN_LAYERS_SIZES = flags.DEFINE_list( + "hidden_layers_sizes", + ["128", "128"], + "Number of hidden units in the avg-net and Q-net.", +) +_UPDATE_TARGET_NETWORK_EVERY = flags.DEFINE_integer( + "update_target_network_every", + 200, + "Number of steps between DQN target network updates.", +) +# Replay buffer options. +_REPLAY_BUFFER_CAPACITY = flags.DEFINE_integer( + "replay_buffer_capacity", 40000, "Size of the replay buffer." +) +_MIN_BUFFER_SIZE_TO_LEARN = flags.DEFINE_integer( + "min_buffer_size_to_learn", + 1000, + "Number of samples in buffer before learning begins.", +) +# Loss and optimizer options. +flags.DEFINE_enum("optimizer", "adam", ["sgd", "adam"], "Optimizer.") +flags.DEFINE_float("learning_rate", 0.01, "Learning rate for inner rl agent.") +flags.DEFINE_enum("loss", "mse", ["mse", "huber"], "Loss function.") +flags.DEFINE_float("huber_loss_parameter", 1.0, "Parameter for Huber loss.") +flags.DEFINE_float("gradient_clipping", None, "Value to clip the gradient to.") +# Munchausen options. +flags.DEFINE_float("tau", 10, "Temperature parameter in Munchausen target.") +flags.DEFINE_float("alpha", 0.99, "Alpha parameter in Munchausen target.") +_WITH_MUNCHAUSEN = flags.DEFINE_bool( + "with_munchausen", True, "If true, target uses Munchausen penalty terms." +) +# Logging options. +flags.DEFINE_bool("use_checkpoints", False, "Save/load neural network weights.") +_CHECKPOINT_DIR = flags.DEFINE_string( + "checkpoint_dir", "/tmp/dqn_test", "Directory to save/load the agent." +) +_LOGDIR = flags.DEFINE_string( + "logdir", + None, + "Logging dir to use for TF summary files. If None, the metrics will only " + "be logged to stderr.", +) +_LOG_DISTRIBUTION = flags.DEFINE_bool( + "log_distribution", False, "Enables logging of the distribution." +) + + +def main(argv: Sequence[str]) -> None: + if len(argv) > 1: + raise app.UsageError("Too many command-line arguments.") + + game = factory.create_game_with_setting(FLAGS.game_name, _ENV_SETTING.value) + + num_players = game.num_players() + + # Create the environments with uniform initial policy. + uniform_policy = policy.UniformRandomPolicy(game) + uniform_dist = distribution.DistributionPolicy(game, uniform_policy) + + envs = [ + rl_environment.Environment( # pylint: disable=g-complex-comprehension + game, + mfg_distribution=uniform_dist, + mfg_population=p, + observation_type=rl_environment.ObservationType.OBSERVATION, + ) + for p in range(num_players) + ] + + env = envs[0] + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + # Create the agents. + kwargs = { + "alpha": FLAGS.alpha, + "batch_size": _BATCH_SIZE.value, + "discount_factor": _DISCOUNT_FACTOR.value, + "epsilon_decay_duration": _EPSILON_DECAY_DURATION.value, + "epsilon_end": FLAGS.epsilon_end, + "epsilon_power": FLAGS.epsilon_power, + "epsilon_start": FLAGS.epsilon_start, + "gradient_clipping": FLAGS.gradient_clipping, + "hidden_layers_sizes": [int(l) for l in _HIDDEN_LAYERS_SIZES.value], + "huber_loss_parameter": FLAGS.huber_loss_parameter, + "learn_every": _LEARN_EVERY.value, + "learning_rate": FLAGS.learning_rate, + "loss": FLAGS.loss, + "min_buffer_size_to_learn": _MIN_BUFFER_SIZE_TO_LEARN.value, + "optimizer": FLAGS.optimizer, + "replay_buffer_capacity": _REPLAY_BUFFER_CAPACITY.value, + "reset_replay_buffer_on_update": _RESET_REPLAY_BUFFER_ON_UPDATE.value, + "seed": FLAGS.seed, + "tau": FLAGS.tau, + "update_target_network_every": _UPDATE_TARGET_NETWORK_EVERY.value, + "with_munchausen": _WITH_MUNCHAUSEN.value, + } + agents = [ + munchausen_deep_mirror_descent.MunchausenDQN( + p, info_state_size, num_actions, **kwargs + ) + for p in range(num_players) + ] + + # Metrics writer will also log the metrics to stderr. + just_logging = _LOGDIR.value is None or jax.host_id() > 0 + writer = metrics.create_default_writer( + logdir=_LOGDIR.value, just_logging=just_logging + ) + + # # Save the parameters. + writer.write_hparams(kwargs) + + def logging_fn(it, episode, vals): + writer.write_scalars(it * num_episodes_per_iteration + episode, vals) + + num_episodes_per_iteration = _NUM_EPISODES_PER_ITERATION.value + md = munchausen_deep_mirror_descent.DeepOnlineMirrorDescent( + game, + envs, + agents, + eval_every=_EVAL_EVERY.value, + num_episodes_per_iteration=num_episodes_per_iteration, + logging_fn=logging_fn, + ) + + def log_metrics(it): + """Logs the training metrics for each iteration.""" + initial_states = game.new_initial_states() + pi_value = policy_value.PolicyValue(game, md.distribution, md.policy) + m = { + f"best_response/{state}": pi_value.eval_state(state) + for state in initial_states + } + nash_conv_md = nash_conv.NashConv(game, md.policy).nash_conv() + m["nash_conv_md"] = nash_conv_md + if _LOG_DISTRIBUTION.value and _LOGDIR.value: + # We log distribution directly to a Pickle file as it may be large for + # logging as a metric. + filename = os.path.join(_LOGDIR.value, f"distribution_{it}.pkl") + utils.save_parametric_distribution(md.distribution, filename) + logging_fn(it, 0, m) + + log_metrics(0) + for it in range(1, FLAGS.num_iterations + 1): + md.iteration() + log_metrics(it) + + # Make sure all values were written. + writer.flush() + + +if __name__ == "__main__": + jax.config.parse_flags_with_absl() + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_proximal_policy_optimization_pytorch.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_proximal_policy_optimization_pytorch.py new file mode 100644 index 0000000..9efb71c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_proximal_policy_optimization_pytorch.py @@ -0,0 +1,311 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Runs mean field proximal policy optimaztion agents.""" + +# pylint: disable=consider-using-from-import + +import logging +import os +import time + +from absl import flags +import numpy as np +import torch +import torch.optim as optim +from torch.utils.tensorboard import SummaryWriter + +from open_spiel.python import policy as policy_std +from open_spiel.python import rl_environment +from open_spiel.python.mfg import utils +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms.pytorch.mfg_proximal_policy_optimization import Agent as mfg_ppo_agent +from open_spiel.python.mfg.algorithms.pytorch.mfg_proximal_policy_optimization import calculate_advantage +from open_spiel.python.mfg.algorithms.pytorch.mfg_proximal_policy_optimization import calculate_explotability +from open_spiel.python.mfg.algorithms.pytorch.mfg_proximal_policy_optimization import learn +from open_spiel.python.mfg.algorithms.pytorch.mfg_proximal_policy_optimization import Policy as mfg_ppo_policy +from open_spiel.python.mfg.algorithms.pytorch.mfg_proximal_policy_optimization import rollout +from open_spiel.python.mfg.games import factory +from open_spiel.python.utils import app + + +FLAGS = flags.FLAGS + + +flags.DEFINE_integer("seed", default=0, help="Set a random seed.") +flags.DEFINE_string( + "exp_name", default="mf-ppo", help="Set the name of this experiment" +) +flags.DEFINE_string( + "game_setting", + default="crowd_modelling_2d_four_rooms", + help=( + "Set the game to benchmark options:(crowd_modelling_2d_four_rooms) " + " and (crowd_modelling_2d_maze)" + ), +) +flags.DEFINE_float("lr", default=1e-3, help="Learning rate of the optimizer") +flags.DEFINE_integer( + "num_episodes", + default=5, + help=( + "set the number of episodes of to collect per" + " rollout" + ), +) +flags.DEFINE_integer( + "update_episodes", + default=20, + help="set the number of episodes of the inner loop", +) +flags.DEFINE_integer( + "update_iterations", + default=100, + help=( + "Set the number of global update steps of the" + " outer loop" + ), +) +flags.DEFINE_string( + "optimizer", default="Adam", help="Set the optimizer (Adam) or (SGD)" +) +flags.DEFINE_boolean( + "cuda", default=False, help="Use Gpu to run the experiment" +) + +# MFPPO parameters +flags.DEFINE_float("gamma", default=0.9, help="set discount factor gamma") +flags.DEFINE_integer( + "num_minibatches", default=5, help="the number of mini-batches" +) +flags.DEFINE_integer( + "update_epochs", default=5, help="the K epochs to update the policy" +) +flags.DEFINE_float( + "clip_coef", default=0.2, help="the surrogate clipping coefficient" +) +flags.DEFINE_float("ent_coef", default=0.01, help="coefficient of the entropy") +flags.DEFINE_float( + "max_grad_norm", + default=5, + help="the maximum norm for the gradient clipping", +) +flags.DEFINE_float( + "alpha", + default=0.5, + help=( + "Set alpha to controll the iteration and epsiode" + " policy updates" + ), +) +flags.DEFINE_float( + "eps_eps", default=0.2, help="eps to update the episode learned policy" +) +flags.DEFINE_float( + "itr_eps", default=0.05, help="eps to update the episode learned policy" +) + + +def set_seed(seed): + """Set the random seed for reproducibility.""" + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + os.environ["PYTHONHASHSEED"] = str(seed) + print(f"Random seed set as {seed}") + + +def main(unused_argv): + """Main function to run the experiment.""" + + # Set the random seed for reproducibility + set_seed(FLAGS.seed) + + # Set the device (in our experiments CPU vs GPU does not improve time at all) + # we recommend CPU + device = torch.device( + "cuda" if torch.cuda.is_available() and FLAGS.cuda else "cpu" + ) + + # Set the name of the experiment's folder + fname = "./mfppo_experiments/" + + # Log the experiments + run_name = ( + f"{FLAGS.exp_name}_{FLAGS.game_setting}_{FLAGS.optimizer}_num_update_epochs_" + " " + f" {FLAGS.update_epochs}_num_episodes_per_rollout_{FLAGS.num_episodes}_number_of_mini_batches_" + " " + f" {FLAGS.num_minibatches}_{time.asctime(time.localtime(time.time()))}" + ) + log_name = os.path.join(fname, run_name) + tb_writer = SummaryWriter(log_name) + logging.basicConfig( + filename=log_name + "_log.txt", + filemode="a", + level=logging.DEBUG, + force=True, + ) + + # Console handler + console = logging.StreamHandler() + console.setLevel(logging.ERROR) + logging.getLogger("").addHandler(console) + + logger = logging.getLogger() + logger.debug("Initialization") + + tb_writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" + % "\n".join([f"|{key}|{value}" for key, value in vars(FLAGS).items()]), + ) + # Create the game instance + game = factory.create_game_with_setting( + "mfg_crowd_modelling_2d", FLAGS.game_setting + ) + + # Set the initial policy to uniform and generate the distribution + uniform_policy = policy_std.UniformRandomPolicy(game) + mfg_dist = distribution.DistributionPolicy(game, uniform_policy) + env = rl_environment.Environment( + game, mfg_distribution=mfg_dist, mfg_population=0 + ) + + # Set the environment seed for reproduciblility + env.seed(FLAGS.seed) + + # Creat the agent and population policies + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + agent = mfg_ppo_agent(info_state_size, num_actions).to(device) + ppo_policy = mfg_ppo_policy(game, agent, None, device) + pop_agent = mfg_ppo_agent(info_state_size, num_actions).to(device) + + if FLAGS.optimizer == "Adam": + optimizer_actor = optim.Adam( + agent.actor.parameters(), lr=FLAGS.lr, eps=1e-5 + ) + optimizer_critic = optim.Adam( + agent.critic.parameters(), lr=FLAGS.lr, eps=1e-5 + ) + else: + optimizer_actor = optim.SGD( + agent.actor.parameters(), lr=FLAGS.lr, momentum=0.9 + ) + optimizer_critic = optim.SGD( + agent.critic.parameters(), lr=FLAGS.lr, momentum=0.9 + ) + + # Used to log data for debugging + steps = FLAGS.num_episodes * env.max_game_length + episode_entropy = [] + total_entropy = [] + nash_con_vect = [] + eps_reward = [] + total_reward = [] + + for k in range(FLAGS.update_iterations): + for _ in range(FLAGS.update_episodes): + # collect rollout data + history = rollout( + env, pop_agent, agent, FLAGS.num_episodes, steps, device + ) + # store rewards and entropy for debugging + episode_entropy.append(history["entropies"].mean().item()) + eps_reward.append(history["rewards"].sum().item() / FLAGS.num_episodes) + # Calculate the advantage function + adv, returns = calculate_advantage( + FLAGS.gamma, + True, + history["rewards"], + history["values"], + history["dones"], + device, + ) + history["advantages"] = adv + history["returns"] = returns + # Update the learned policy and report loss for debugging + v_loss = learn( + history, + optimizer_actor, + optimizer_critic, + agent, + num_minibatches=FLAGS.num_minibatches, + update_epochs=FLAGS.update_epochs, + itr_eps=FLAGS.itr_eps, + eps_eps=FLAGS.eps_eps, + alpha=FLAGS.alpha, + ent_coef=FLAGS.ent_coef, + max_grad_norm=FLAGS.max_grad_norm, + ) + + # Collect and print the metrics + total_reward.append(np.mean(eps_reward)) + total_entropy.append(np.mean(episode_entropy)) + + print("Value_loss", v_loss.item()) + print("iteration num:", k + 1) + print("Mean reward", total_reward[-1]) + + # Update the iteration policy with the new policy + pop_agent.load_state_dict(agent.state_dict()) + + # Update the distribution + distrib = distribution.DistributionPolicy(game, ppo_policy) + + # calculate the exploitability + m = calculate_explotability(game, distrib, ppo_policy) + nashc = m["nash_conv_ppo"] + nash_con_vect.append(nashc) + + # log the results to tensor board + tb_writer.add_scalar("initial_state_value", m["ppo_br/initial"], k + 1) + tb_writer.add_scalar("rewards", total_reward[-1], k + 1) + tb_writer.add_scalar("entorpy", total_entropy[-1], k + 1) + tb_writer.add_scalar("nash_conv_ppo", nashc, k + 1) + logger.debug( + "ppo_br: %s, and nash_conv: %s, reward: %s, entropy: %s", + m["ppo_br/initial"], + nashc, + total_reward[-1], + total_entropy[-1], + ) + print( + "ppo_br: %s, and nash_conv: %s, reward: %s, entropy: %s" + % (m["ppo_br/initial"], nashc, total_reward[-1], total_entropy[-1]) + ) + + # Update the environment distribution + env.update_mfg_distribution(distrib) + + # if lower than upper_nash we save the weights and distribution + upper_nash = 300 + if nash_con_vect[-1] < upper_nash: + # Save the distribution and weights for further analysis + filename = os.path.join(fname, f"distribution_{run_name}.pkl") + utils.save_parametric_distribution(distrib, filename) + torch.save( + agent.actor.state_dict(), + fname + + f"alpha_{FLAGS.alpha}, itr_eps_{FLAGS.itr_eps}," + f" eps_eps_{FLAGS.eps_eps}_agent_actor_weights.pth", + ) + torch.save( + agent.critic.state_dict(), + fname + + f"alpha_{FLAGS.alpha}, itr_eps_{FLAGS.itr_eps}," + f" eps_eps_{FLAGS.eps_eps}_agent_critic_weights.pth", + ) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_psro.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_psro.py new file mode 100644 index 0000000..8441dbb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/examples/mfg_psro.py @@ -0,0 +1,199 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Mean-Field PSRO examples.""" + +from absl import app +from absl import flags +from absl import logging + +from open_spiel.python.mfg.algorithms import correlated_equilibrium +from open_spiel.python.mfg.algorithms import mf_psro +from open_spiel.python.mfg.algorithms import utils +from open_spiel.python.mfg.algorithms.regret import hedge +from open_spiel.python.mfg.algorithms.regret import polynomial_weights +from open_spiel.python.mfg.algorithms.regret import regret_matching +from open_spiel.python.mfg.games import crowd_modelling # pylint: disable=unused-import +from open_spiel.python.mfg.games import dynamic_routing # pylint: disable=unused-import +from open_spiel.python.mfg.games import normal_form_game # pylint: disable=unused-import +from open_spiel.python.mfg.games import predator_prey # pylint: disable=unused-import +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_string("game_name", "python_mfg_predator_prey", + "Name of the game.") +flags.DEFINE_integer( + "regret_steps_per_step", + 1000, + "number of runs to average value function over.", +) +flags.DEFINE_integer( + "value_estimation_n", 1, "number of runs to average value function over." +) +flags.DEFINE_string( + "value_estimator", "sampled", "Best Response type : `ce` or `cce`." +) +flags.DEFINE_string( + "regret_minimizer", + "hedge", + "Which regret minimization algorithm to use : `rm` for" + "Regret Matching, `hedge` for Hedge, `poly` for Polynomial " + "Weights.", +) +flags.DEFINE_integer("n_iter", 1000, "Num PSRO iterations.") +flags.DEFINE_integer("compress_every", 1, "Compress every") +flags.DEFINE_float("compress_lbd", 0.0, "Compression lambda.") +flags.DEFINE_float("eta", None, "Polynomial Weight algorithm eta.") +flags.DEFINE_string( + "best_responder", "cce", "Best Response type : `ce` or `cce`." +) +flags.DEFINE_bool( + "compute_internal_regret", + False, + "Compute internal (Or external if False) regret", +) +flags.DEFINE_bool("compute_ce_gap", False, "Compute `ce_gap`") +flags.DEFINE_integer("seed", 1, "Seed value.") + +GAME_SETTINGS = { + "mfg_crowd_modelling_2d": { + "only_distribution_reward": False, + "forbidden_states": "[0|0;0|1]", + "initial_distribution": "[0|2;0|3]", + "initial_distribution_value": "[0.5;0.5]", + } +} + + +def main(unused_argv): + logging.info("Loading %s", FLAGS.game_name) + mfg_game = pyspiel.load_game( + FLAGS.game_name, GAME_SETTINGS.get(FLAGS.game_name, {}) + ) + + eta = FLAGS.eta + regret_steps_per_step = FLAGS.regret_steps_per_step + + best_responder = FLAGS.best_responder + compute_ce_gap = FLAGS.compute_ce_gap + compute_internal_regret = FLAGS.compute_internal_regret + + if FLAGS.value_estimator == "sampled": + value_estimator = utils.sample_value + elif FLAGS.value_estimator == "exact": + value_estimator = utils.get_exact_value + else: + raise NameError( + "Unknown value estimator {}. Valid names are `sampled`, `exact`." + .format(FLAGS.value_estimator) + ) + + if FLAGS.regret_minimizer == "hedge": + regret_minimizer = hedge.Hedge( + mfg_game, + [], + eta, + regret_steps_per_step, + compress_nus=True, + compress_every=FLAGS.compress_every, + compress_lbd=FLAGS.compress_lbd, + value_estimator=value_estimator, + value_estimation_n=FLAGS.value_estimation_n, + compute_internal_regret=compute_internal_regret, + ) + elif FLAGS.regret_minimizer == "rm": + regret_minimizer = regret_matching.RegretMatching( + mfg_game, + [], + eta, + regret_steps_per_step, + compress_nus=True, + compress_every=FLAGS.compress_every, + compress_lbd=FLAGS.compress_lbd, + value_estimator=value_estimator, + value_estimation_n=FLAGS.value_estimation_n, + compute_internal_regret=compute_internal_regret, + ) + elif FLAGS.regret_minimizer == "poly": + regret_minimizer = polynomial_weights.PolynomialWeightAlgorithm( + mfg_game, + [], + eta, + regret_steps_per_step, + compress_nus=True, + compress_every=FLAGS.compress_every, + compress_lbd=FLAGS.compress_lbd, + value_estimator=value_estimator, + value_estimation_n=FLAGS.value_estimation_n, + compute_internal_regret=compute_internal_regret, + ) + else: + raise NameError( + "Unknown regret minimizer {}.".format(FLAGS.regret_minimizer) + ) + + if best_responder == "cce": + best_responder = correlated_equilibrium.cce_br + elif best_responder == "ce": + best_responder = correlated_equilibrium.ce_br + elif best_responder == "ce_partial": + best_responder = correlated_equilibrium.partial_ce_br + else: + raise NameError( + "Unknown best responder {}. Valid names are `cce` and `ce`.".format( + FLAGS.best_responder + ) + ) + + mfpsro = mf_psro.MeanFieldPSRO( + mfg_game, + regret_minimizer, + regret_steps_per_step, + best_responder=best_responder, + ) + + for j in range(FLAGS.n_iter): + logging.info("Iteration {} of MF-PSRO".format(j)) # pylint: disable=logging-format-interpolation + print("PSRO Step") + mfpsro.step() + + print("Equilibrium Computation") + policies, nus, mus, rhos = mfpsro.get_equilibrium() + + print("Welfare Computation") + average_welfare = correlated_equilibrium.compute_average_welfare( + mfg_game, policies, mus, rhos, nus + ) + + print("CCE Gap Computation") + cce_gap_value = correlated_equilibrium.cce_gap( + mfg_game, policies, rhos, mus, nus, compute_true_rewards=True + ) + if compute_ce_gap: + print("CE Gap Computation") + ce_gap_value = correlated_equilibrium.ce_gap( + mfg_game, policies, rhos, mus, nus, compute_true_rewards=True + ) + else: + ce_gap_value = 0.0 + + print("CCE Gap value : {}".format(cce_gap_value)) + print("CE Gap value : {}".format(ce_gap_value)) + print("Average welfare : {}".format(average_welfare)) + print("") + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/__init__.py new file mode 100644 index 0000000..b16be8c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/__init__.py @@ -0,0 +1,33 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Mean field games implemented in Python. + +These games are registered as they are imported. It's perfectly possible to +import just a single game if you prefer. There is no need to add new games here, +so long as they register themselves and you import them when wanting to use +them. However, adding them here will make them available for playthroughs and +for automated API testing. + +Registration looks like this: +``` +pyspiel.register_game(_GAME_TYPE, KuhnPokerGame) +``` +""" +from open_spiel.python.mfg.games import crowd_avoidance +from open_spiel.python.mfg.games import crowd_modelling +from open_spiel.python.mfg.games import dynamic_routing +from open_spiel.python.mfg.games import linear_quadratic +from open_spiel.python.mfg.games import periodic_aversion +from open_spiel.python.mfg.games import predator_prey diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_avoidance.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_avoidance.py new file mode 100644 index 0000000..c14ba14 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_avoidance.py @@ -0,0 +1,608 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Mean Field Crowd Avoidance game, implemented in Python. + +This corresponds to an environment in which two populations try to avoid each +other. + +The environment is configurable in the following high-level ways: +- Congestion coefficients matrix. +- Initial distribution. +- Geometry (torus, basic square). +""" + +import enum +import functools +import math +from typing import Any, List, Mapping, Optional, Tuple + +import numpy as np + +from open_spiel.python import observation +import pyspiel +from open_spiel.python.utils import shared_value + + +class Geometry(enum.IntEnum): + SQUARE = 0 + TORUS = 1 + + +_DEFAULT_SIZE = 7 +_DEFAULT_HORIZON = 10 +_NUM_ACTIONS = 5 +_NUM_CHANCE = 5 +_DEFAULT_CONGESTION_MATRIX = np.array( + # The first population feels congestion with respect to the second one, + # and vice-versa. + [[0, 1], [1, 0]] +) +_DEFAULT_NUM_PLAYERS = 2 +# Each population starts in a corner. +_DEFAULT_INIT_DISTRIB = np.array([ + # First population + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.4, 0.4, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + # Second population + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.4, 0.4, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], +]) + + +def grid_to_forbidden_states(grid): + """Converts a grid into string representation of forbidden states. + + Args: + grid: Rows of the grid. '#' character denotes a forbidden state. All rows + should have the same number of columns, i.e. cells. + + Returns: + String representation of forbidden states in the form of x (column) and y + (row) pairs, e.g. [1|1;0|2]. + """ + forbidden_states = [] + num_cols = len(grid[0]) + for y, row in enumerate(grid): + assert len(row) == num_cols, f"Number of columns should be {num_cols}." + for x, cell in enumerate(row): + if cell == "#": + forbidden_states.append(f"{x}|{y}") + return "[" + ";".join(forbidden_states) + "]" + + +def pairs_string_to_list(positions: str) -> List[np.ndarray]: + """Converts a string representing positions into a list of positions.""" + pos = positions[1:-1] # remove [ and ] + split = pos.split(";") + return [np.array([i for i in s.split("|")]) for s in split] + + +forbidden_states_grid = [ + "#######", + "# # #", + "# #", + "# # #", + "# #", + "# # #", + "#######", +] +_DEFAULT_FORBIDDEN_STATES = grid_to_forbidden_states(forbidden_states_grid) + +forbidden_states_indicator = np.array( + [ + [math.nan if c == "#" else 0 for c in [*row]] + for row in forbidden_states_grid + ] +) + +_DEFAULT_PROBA_NOISE = 0.5 + +_DEFAULT_GEOMETRY = Geometry.SQUARE + +_DEFAULT_COEF_CONGESTION = 0.0 + +_DEFAULT_COEF_TARGET = 1.0 + +_DEFAULT_PARAMS = { + "size": _DEFAULT_SIZE, + "horizon": _DEFAULT_HORIZON, + "players": _DEFAULT_NUM_PLAYERS, + # The congestion matrix is represented as a string containing a + # space-separated list of values. + # Its size defines the number of populations in the mean field game. + "congestion_matrix": " ".join( + str(v) for v in _DEFAULT_CONGESTION_MATRIX.flatten() + ), + "geometry": _DEFAULT_GEOMETRY, + "init_distrib": " ".join(str(v) for v in _DEFAULT_INIT_DISTRIB.flatten()), + # Probability that the transition is affected by noise + "proba_noise": _DEFAULT_PROBA_NOISE, + # Weight of congestion term in the reward + "coef_congestion": _DEFAULT_COEF_CONGESTION, + "forbidden_states": _DEFAULT_FORBIDDEN_STATES, + "coef_target": _DEFAULT_COEF_TARGET, +} + +_GAME_TYPE = pyspiel.GameType( + short_name="python_mfg_crowd_avoidance", + long_name="Python Mean Field Crowd Avoidance", + dynamics=pyspiel.GameType.Dynamics.MEAN_FIELD, + chance_mode=pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC, + information=pyspiel.GameType.Information.PERFECT_INFORMATION, + utility=pyspiel.GameType.Utility.GENERAL_SUM, + reward_model=pyspiel.GameType.RewardModel.REWARDS, + # We cannot pass math.inf here, so we pass a very high integer value. + max_num_players=2, + min_num_players=2, + provides_information_state_string=True, + provides_information_state_tensor=False, + provides_observation_string=True, + provides_observation_tensor=True, + parameter_specification=_DEFAULT_PARAMS, +) + + +def get_param(param_name, params): + return params.get(param_name, _DEFAULT_PARAMS[param_name]) + + +@functools.lru_cache(maxsize=None) +def _state_to_str(x, y, t, population, player_id): + """A string that uniquely identify (pos, t, population, player_id).""" + if int(player_id) >= 0: + return f"(pop={population}, t={t}, pos=[{x} {y}])" + if player_id == pyspiel.PlayerId.MEAN_FIELD: + return f"(pop={population}, t={t}_a, pos=[{x} {y}])" + if player_id == pyspiel.PlayerId.CHANCE: + return f"(pop={population}, t={t}_a_mu, pos=[{x} {y}])" + + +class MFGCrowdAvoidanceGame(pyspiel.Game): + """Multi-population MFG.""" + + # pylint:disable=dangerous-default-value + def __init__(self, params: Mapping[str, Any] = _DEFAULT_PARAMS): + self.size = get_param("size", params) + self.horizon = get_param("horizon", params) + flat_congestion_matrix = np.fromstring( + get_param("congestion_matrix", params), dtype=np.float64, sep=" " + ) + num_players = get_param("players", params) + if len(flat_congestion_matrix) != num_players**2: + raise ValueError( + "Congestion matrix passed in flat representation does not represent " + f"a square matrix: {flat_congestion_matrix}" + ) + self.congestion_matrix = flat_congestion_matrix.reshape( + [num_players, num_players] + ) + self.geometry = get_param("geometry", params) + num_states = self.size**2 + game_info = pyspiel.GameInfo( + num_distinct_actions=_NUM_ACTIONS, + max_chance_outcomes=max(num_states, _NUM_CHANCE), + num_players=num_players, + min_utility=-np.inf, + max_utility=+np.inf, + utility_sum=None, + max_game_length=self.horizon, + ) + self.proba_noise = get_param("proba_noise", params) + self.coef_congestion = get_param("coef_congestion", params) + self.forbidden_states = pairs_string_to_list( + get_param("forbidden_states", params) + ) + self.coef_target = get_param("coef_target", params) + # TODO(lauriere): should be given as a parameter of the model. + self.target_positions = np.array([[5, 3], [1, 3]]) + + # Represents the current probability distribution over game states + # (when grouped for each population). + str_init_distrib = get_param("init_distrib", params) + if str_init_distrib: + flat_init_distrib = np.fromstring( + str_init_distrib, dtype=np.float64, sep=" " + ) + if len(flat_init_distrib) != num_players * self.size**2: + raise ValueError( + "Initial distribution matrix passed in flat representation does" + f" not represent a sequence of square matrices: {flat_init_distrib}" + ) + self.initial_distribution = flat_init_distrib + else: + # Initialized with a uniform distribution. + self.initial_distribution = [1.0 / num_states] * ( + num_states * num_players + ) + super().__init__(_GAME_TYPE, game_info, params) + + def new_initial_state(self): + """Returns a new population-less blank state. + + This state is provided for some internal operations that use blank + states (e.g. cloning), but cannot be used to play the game, i.e. + ApplyAction() will fail. Proper playable states should be + instantiated with new_initial_state_for_population(). + """ + return MFGCrowdAvoidanceState(self) + + def max_chance_nodes_in_history(self): + """Maximun chance nodes in game history.""" + return self.horizon + 1 + + def new_initial_state_for_population(self, population): + """State corresponding to the start of a game for a given population.""" + return MFGCrowdAvoidanceState(self, population) + + def make_py_observer(self, iig_obs_type=None, params=None): + """Returns an object used for observing game state.""" + if (iig_obs_type is None) or ( + iig_obs_type.public_info and not iig_obs_type.perfect_recall + ): + return Observer(params, self) + return observation.IIGObserverForPublicInfoGame(iig_obs_type, params) + + +def pos_to_merged(pos: np.ndarray, size: int) -> int: + """Converts a [x, y] position into a single integer.""" + assert (pos >= 0).all(), pos + assert (pos < size).all(), pos + return pos[0] + pos[1] * size + + +def merged_to_pos(merged_pos: int, size: int) -> np.ndarray: + """Inverse of pos_to_merged().""" + assert 0 <= merged_pos < size * size + return np.array([merged_pos % size, merged_pos // size]) + + +class MFGCrowdAvoidanceState(pyspiel.State): + """State for the avoidance MFG.""" + + # Maps legal actions to the corresponding move on the grid of the game. + _ACTION_TO_MOVE = { + 0: np.array([0, 0]), + 1: np.array([1, 0]), + 2: np.array([0, 1]), + 3: np.array([0, -1]), + 4: np.array([-1, 0]), + } + # Action that corresponds to no displacement. + _NEUTRAL_ACTION = 0 + + def __init__(self, game, population=None): + """Constructor; should only be called by Game.new_initial_state.*. + + Args: + game: MFGCrowdAvoidanceGame for which a state should be created. + population: ID of the population to create this state for. Must be in [0, + num_players()) or None. States with population=None cannot be used to + perform game actions. + """ + super().__init__(game) + # Initial state where the initial position is chosen according to + # an initial distribution. + self._is_position_init = True + self._player_id = pyspiel.PlayerId.CHANCE + # Population this state corresponds to. Can be None, in which + # case, ApplyAction() is forbidden. + self._population = population + if self._population is not None: + assert 0 <= self._population < self.num_players() + # When set, [2] numpy array representing the x, y position on the grid. + self._pos = None # type: Optional[np.ndarray] + self._t = 0 + self.size = game.size + # Number of states in the grid. + self.num_states = self.size**2 + self.horizon = game.horizon + self.congestion_matrix = game.congestion_matrix + self.geometry = game.geometry + self._returns = np.zeros([self.num_players()], dtype=np.float64) + self._distribution = shared_value.SharedValue(game.initial_distribution) + self.proba_noise = game.proba_noise + self.coef_congestion = game.coef_congestion + self.forbidden_states = game.forbidden_states + self.coef_target = game.coef_target + self.target_positions = game.target_positions + + @property + def population(self): + return self._population + + @property + def pos(self): + return self._pos + + @property + def t(self): + return self._t + + def state_to_str(self, pos, t, population, player_id=0): + """A string that uniquely identify (pos, t, population, player_id).""" + if self._is_position_init: + return f"position_init_{population}" + assert isinstance(pos, np.ndarray), f"Got type {type(pos)}" + assert len(pos.shape) == 1, f"Got {len(pos.shape)}, expected 1 (pos={pos})." + assert pos.shape[0] == 2, f"Got {pos.shape[0]}, expected 2 (pos={pos})." + return _state_to_str(pos[0], pos[1], t, population, player_id) + + # OpenSpiel (PySpiel) API functions are below. This is the standard set that + # should be implemented by every perfect-information sequential-move game. + + def mean_field_population(self): + return self._population + + def _legal_actions(self, player): + """Returns a list of legal actions for player and MFG nodes.""" + if player == pyspiel.PlayerId.MEAN_FIELD: + return [] + if player >= 0 and player == self.current_player(): + return list(self._ACTION_TO_MOVE) + raise ValueError( + f"Unexpected player {player}." + "Expected a mean field or current player >=0." + ) + + def chance_outcomes(self) -> List[Tuple[int, float]]: + """Returns the possible chance outcomes and their probabilities.""" + if self._is_position_init: + if ( + self._population is None + or not 0 <= self._population < self.num_players() + ): + raise ValueError(f"Invalid population {self._population}") + p = self._population % 2 + dist = self._distribution.value + dist_p = dist[p * self.num_states : (p + 1) * self.num_states] + pos_indices_flat = np.nonzero(dist_p)[0] + pos_indices = [ + np.array([i % self.size, (i - i % self.size) // self.size]) + for i in pos_indices_flat + ] + # Beware: In the initial distribution representation, x and y correspond + # respectively to the row and the column, but in the state representation, + # they correspond to the column and the row. + return [ + (pos_to_merged(i, self.size), dist_p[i[1] * self.size + i[0]]) + for i in pos_indices + ] + return [ + (0, 1.0 - self.proba_noise), + (1, self.proba_noise / 4.0), + (2, self.proba_noise / 4.0), + (3, self.proba_noise / 4.0), + (4, self.proba_noise / 4.0), + ] + + def update_pos(self, action): + """Updates the position of the player given a move action.""" + if action < 0 or action >= len(self._ACTION_TO_MOVE): + raise ValueError( + f"The action must be between 0 and {len(self._ACTION_TO_MOVE)}, " + f"got {action}" + ) + candidate_pos = self._pos + self._ACTION_TO_MOVE[action] + # if candidate_pos in self.forbidden_states: + # if np.any(np.all(candidate_pos == self.forbidden_states, axis=1)): + if any(np.array_equal(candidate_pos, x) for x in self.forbidden_states): + candidate_pos = self._pos + elif self.geometry == Geometry.TORUS: + candidate_pos += self.size + candidate_pos %= self.size + else: + assert ( + self.geometry == Geometry.SQUARE + ), f"Invalid geometry {self.geometry}" + # Keep the position within the bounds of the square. + candidate_pos = np.minimum(candidate_pos, self.size - 1) + candidate_pos = np.maximum(candidate_pos, 0) + self._pos = candidate_pos + + def _apply_action(self, action): + """Applies the specified action to the state.""" + if self._population is None: + raise ValueError( + "Attempting to perform an action with a population-less state." + ) + if self._player_id == pyspiel.PlayerId.MEAN_FIELD: + raise ValueError( + "_apply_action should not be called at a MEAN_FIELD state." + ) + self._returns += np.array(self.rewards()) + if self._is_position_init: + self._pos = merged_to_pos(action, self.size) + self._is_position_init = False + self._player_id = self._population + elif self._player_id == pyspiel.PlayerId.CHANCE: + self.update_pos(action) + self._t += 1 + self._player_id = pyspiel.PlayerId.MEAN_FIELD + elif int(self._player_id) >= 0: + assert self._player_id == self._population, ( + f"Invalid decision player id {self._player_id} " + f"expected {self._population}" + ) + self.update_pos(action) + self._player_id = pyspiel.PlayerId.CHANCE + else: + raise ValueError(f"Unexpected state. Player id: {self._player_id}") + + def _action_to_string(self, player, action): + """Action -> string.""" + del player + if self.is_chance_node() and self._is_position_init: + return f"init_position={action}" + return str(self._ACTION_TO_MOVE[action]) + + def distribution_support(self): + """Returns a list of state string.""" + support = [] + for x in range(self.size): + for y in range(self.size): + for population in range(self.num_players()): + support.append( + self.state_to_str( + np.array([x, y]), + self._t, + population, + player_id=pyspiel.PlayerId.MEAN_FIELD, + ) + ) + return support + + def get_pos_proba(self, pos: np.ndarray, population: int) -> float: + """Gets the probability of a pos and population in the current distrib. + + Args: + pos: 2D position. + population: Population requested. + + Returns: + The probability for the provided position and population. + """ + assert (pos >= 0).all(), pos + assert (pos < self.size).all(), pos + assert 0 <= population < self.num_players(), population + # This logic needs to match the ordering defined in distribution_support(). + index = population + self.num_players() * (pos[1] + self.size * pos[0]) + assert 0 <= index < len(self._distribution.value), ( + f"Invalid index {index} vs dist length:" + f" {len(self._distribution.value)}, population={population}, pos={pos}," + f" state={self}" + ) + return self._distribution.value[index] + + def update_distribution(self, distribution): + """This function is central and specific to the logic of the MFG. + + It should only be called when the node is in MEAN_FIELD state. + + Args: + distribution: List of floats that should contain the probability of each + state returned by distribution_support(). + """ + expected_dist_size = self.num_states * self.num_players() + assert len(distribution) == expected_dist_size, ( + "Unexpected distribution length " + f"{len(distribution)} != {expected_dist_size}" + ) + if self._player_id != pyspiel.PlayerId.MEAN_FIELD: + raise ValueError( + "update_distribution should only be called at a MEAN_FIELD state." + ) + self._distribution = shared_value.SharedValue(distribution) + self._player_id = self._population + + def is_terminal(self): + """Returns True if the game is over.""" + return self.t >= self.horizon + + def current_player(self): + """Returns id of the next player to move, or TERMINAL if game is over.""" + if self.is_terminal(): + return pyspiel.PlayerId.TERMINAL + return self._player_id + + def rewards(self) -> List[float]: + """Crowd avoidance rewards for all populations. + + Returns: + One float per population. + """ + if int(self._player_id) < 0: + return [0.0] * self.num_players() + densities = np.array( + [ + self.get_pos_proba(self._pos, population) + for population in range(self.num_players()) + ], + dtype=np.float64, + ) + rew = -self.coef_congestion * np.dot(self.congestion_matrix, densities) + # Rewards for target positions. + rew[0] += self.coef_target * np.array_equal( + self._pos, self.target_positions[0] + ) + rew[1] += self.coef_target * np.array_equal( + self._pos, self.target_positions[1] + ) + return list(rew) + + def returns(self) -> List[float]: + """Returns is the sum of all payoffs collected so far.""" + return list(self._returns + np.array(self.rewards())) + + def __str__(self): + """A string that uniquely identify the current state.""" + return self.state_to_str( + self._pos, self._t, self._population, player_id=self._player_id + ) + + +class Observer: + """Observer, conforming to the PyObserver interface (see observation.py).""" + + def __init__(self, params, game): + """Initializes an empty observation tensor.""" + del params + + self.size = game.size + self.horizon = game.horizon + # +1 to allow t == horizon. + self.tensor = np.zeros(2 * self.size + self.horizon + 1, np.float32) + self.dict = { + "x": self.tensor[: self.size], + "y": self.tensor[self.size : self.size * 2], + "t": self.tensor[self.size * 2 :], + } + + def set_from(self, state: MFGCrowdAvoidanceState, player: int): + """Updates `tensor` and `dict` to reflect `state` from PoV of `player`.""" + del player + # We update the observation via the shaped tensor since indexing is more + # convenient than with the 1-D tensor. Both are views onto the same memory. + self.tensor.fill(0) + # state.pos is None for the initial (blank) state, don't set any + # position bit in that case. + if state.pos is not None: + if not (state.pos >= 0).all() or not (state.pos < self.size).all(): + raise ValueError( + f"Expected {state} positions to be in [0, {self.size})" + ) + self.dict["x"][state.pos[0]] = 1 + self.dict["y"][state.pos[1]] = 1 + if not 0 <= state.t <= self.horizon: + raise ValueError(f"Expected {state} time to be in [0, {self.horizon}]") + self.dict["t"][state.t] = 1 + + def string_from(self, state, player): + """Observation of `state` from the PoV of `player`, as a string.""" + del player + return str(state) + + +pyspiel.register_game(_GAME_TYPE, MFGCrowdAvoidanceGame) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_avoidance_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_avoidance_test.py new file mode 100644 index 0000000..6d7756b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_avoidance_test.py @@ -0,0 +1,215 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tests for Python Crowd avoidance game.""" + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np +import numpy.testing as npt +from open_spiel.python.mfg.games import crowd_avoidance +import pyspiel + + +class MFGCrowdAvoidanceGameTest(parameterized.TestCase): + + def test_load(self): + game = pyspiel.load_game('python_mfg_crowd_avoidance') + game.new_initial_state_for_population(0) + game.new_initial_state_for_population(1) + + @parameterized.parameters( + { + 'geometry': crowd_avoidance.Geometry.SQUARE, + 'expected_pos': np.array([5, 3]), + }, + { + 'geometry': crowd_avoidance.Geometry.TORUS, + 'expected_pos': np.array([5, 3]), + }, + ) + def test_dynamics(self, geometry, expected_pos): + game = pyspiel.load_game( + 'python_mfg_crowd_avoidance', + { + 'geometry': geometry, + }, + ) + state = game.new_initial_state_for_population(1) + # Initial chance node. + self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE) + self.assertLen(state.chance_outcomes(), 3) + self.assertEqual( + state.chance_outcomes()[0][0], + crowd_avoidance.pos_to_merged(np.array([5, 2]), state.size), + ) + state.apply_action(state.chance_outcomes()[0][0]) + self.assertEqual(state.current_player(), 1) + npt.assert_array_equal(state.pos, [5, 2]) + self.assertEqual(state._action_to_string(player=1, action=2), '[0 1]') + state.apply_action(2) + npt.assert_array_equal(state.pos, expected_pos) + + def test_create_with_params(self): + setting = 'python_mfg_crowd_avoidance()' + game = pyspiel.load_game(setting) + self.assertEqual(game.size, 7) + self.assertEqual(game.horizon, 10) + + @parameterized.parameters( + {'population': 0}, + {'population': 1}, + ) + def test_random_game(self, population): + """Tests basic API functions.""" + congestion_matrix = np.array([[0, 1], [1, 0]]) + init_distrib = np.array([ + # First population + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.4, 0.4, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + # Second population + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.4, 0.4, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + ]) + forbidden_states_grid = [ + '#######', + '# # #', + '# #', + '# # #', + '# #', + '# # #', + '#######', + ] + forbidden_states = crowd_avoidance.grid_to_forbidden_states( + forbidden_states_grid + ) + game = crowd_avoidance.MFGCrowdAvoidanceGame( + params={ + 'horizon': 10, + 'size': 7, + 'players': 2, + 'congestion_matrix': ' '.join( + str(v) for v in congestion_matrix.flatten() + ), + 'init_distrib': ' '.join(str(v) for v in init_distrib.flatten()), + 'forbidden_states': forbidden_states, + } + ) + pyspiel.random_sim_test( + game, + num_sims=10, + serialize=False, + verbose=True, + mean_field_population=population, + ) + + @parameterized.parameters( + { + 'coef_congestion': 1.5, + 'coef_target': 0.6, + 'congestion_matrix': np.array([[0, 1], [1, 0]]), + 'population': 0, + 'players': 2, + 'initial_pos': np.array([0, 0]), + 'distributions': [ + # First population + np.array([[0.8, 0.2], [0.0, 0.0]]), + # Second population + np.array([[0.3, 0.7], [0.0, 0.0]]), + ], + 'expected_rewards': np.array([ + -1.5 * 0.3 + 0.0, + -1.5 * 0.8 + 0.0, + ]), + 'init_distrib': np.array([ + # First population + [0.8, 0.2], + [0.0, 0.0], + # Second population + [0.3, 0.7], + [0.0, 0.0], + ]), + }, + ) + def test_rewards( + self, + coef_congestion, + coef_target, + congestion_matrix, + players, + population, + initial_pos, + distributions, + expected_rewards, + init_distrib, + ): + game = pyspiel.load_game( + 'python_mfg_crowd_avoidance', + { + 'size': 2, + 'coef_congestion': coef_congestion, + 'coef_target': coef_target, + 'congestion_matrix': ' '.join( + str(v) for v in congestion_matrix.flatten() + ), + 'players': players, + 'init_distrib': ' '.join(str(v) for v in init_distrib.flatten()), + 'forbidden_states': '[]', + }, + ) + state = game.new_initial_state_for_population(population) + # Initial chance node. + self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE) + state.apply_action(crowd_avoidance.pos_to_merged(initial_pos, state.size)) + self.assertEqual(state.current_player(), population) + npt.assert_array_equal(state.pos, initial_pos) + state.apply_action(state._NEUTRAL_ACTION) + npt.assert_array_equal(state.pos, initial_pos) + self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE) + state.apply_action(state._NEUTRAL_ACTION) + self.assertEqual(state.current_player(), pyspiel.PlayerId.MEAN_FIELD) + + # Maps states (in string representation) to their proba. + dist = {} + for x in range(state.size): + for y in range(state.size): + for pop in range(len(congestion_matrix)): + state_str = state.state_to_str( + np.array([x, y]), + state.t, + pop, + player_id=pyspiel.PlayerId.MEAN_FIELD, + ) + dist[state_str] = distributions[pop][y][x] + support = state.distribution_support() + state.update_distribution([dist[s] for s in support]) + + # Decision node where we get a reward. + self.assertEqual(state.current_player(), population) + npt.assert_array_equal(state.rewards(), expected_rewards) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_modelling.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_modelling.py new file mode 100644 index 0000000..a271aa8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_modelling.py @@ -0,0 +1,304 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Mean Field Crowd Modelling, implemented in Python. + +This is a demonstration of implementing a mean field game in Python. + +Fictitious play for mean field games: Continuous time analysis and applications, +Perrin & al. 2019 (https://arxiv.org/abs/2007.03458). This game corresponds +to the game in section 4.2. +""" + +from typing import Any, List, Mapping +import numpy as np + +from open_spiel.python import observation +import pyspiel + +_NUM_PLAYERS = 1 +_SIZE = 10 +_HORIZON = 10 +_NUM_ACTIONS = 3 +_NUM_CHANCE = 3 +_EPSILON = 10**(-25) +_DEFAULT_PARAMS = {"size": _SIZE, "horizon": _HORIZON} +_GAME_TYPE = pyspiel.GameType( + short_name="python_mfg_crowd_modelling", + long_name="Python Mean Field Crowd Modelling", + dynamics=pyspiel.GameType.Dynamics.MEAN_FIELD, + chance_mode=pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC, + information=pyspiel.GameType.Information.PERFECT_INFORMATION, + utility=pyspiel.GameType.Utility.GENERAL_SUM, + reward_model=pyspiel.GameType.RewardModel.REWARDS, + max_num_players=_NUM_PLAYERS, + min_num_players=_NUM_PLAYERS, + provides_information_state_string=True, + provides_information_state_tensor=False, + provides_observation_string=True, + provides_observation_tensor=True, + parameter_specification=_DEFAULT_PARAMS) + + +class MFGCrowdModellingGame(pyspiel.Game): + """A Mean Field Crowd Modelling game. + + + A game starts by an initial chance node that select the initial state + of the MFG. + Then the game sequentially alternates between: + - An action selection node (Where the player Id >= 0) + - A chance node (the player id is pyspiel.PlayerId.CHANCE) + - A Mean Field node (the player id is pyspiel.PlayerId.MEAN_FIELD) + """ + + # pylint:disable=dangerous-default-value + def __init__(self, params: Mapping[str, Any] = _DEFAULT_PARAMS): + game_info = pyspiel.GameInfo( + num_distinct_actions=_NUM_ACTIONS, + max_chance_outcomes=max(params["size"], _NUM_CHANCE), + num_players=_NUM_PLAYERS, + min_utility=-np.inf, + max_utility=+np.inf, + utility_sum=None, + max_game_length=params["horizon"]) + super().__init__(_GAME_TYPE, game_info, params) + self.size = params["size"] + self.horizon = params["horizon"] + + def new_initial_state(self): + """Returns a state corresponding to the start of a game.""" + return MFGCrowdModellingState(self) + + def make_py_observer(self, iig_obs_type=None, params=None): + """Returns an object used for observing game state.""" + if ((iig_obs_type is None) or + (iig_obs_type.public_info and not iig_obs_type.perfect_recall)): + return Observer(params, self) + return observation.IIGObserverForPublicInfoGame(iig_obs_type, params) + + def max_chance_nodes_in_history(self): + """Maximun chance nodes in game history.""" + return self.horizon + 1 + + +class MFGCrowdModellingState(pyspiel.State): + """A Mean Field Crowd Modelling state.""" + + # Maps legal actions to the corresponding move along the 1-D axis of the game. + _ACTION_TO_MOVE = {0: -1, 1: 0, 2: 1} + # Action that corresponds to no displacement. + _NEUTRAL_ACTION = 1 + + def __init__(self, game): + """Constructor; should only be called by Game.new_initial_state.""" + super().__init__(game) + self._is_chance_init = True # is true for the first state of the game. + self._player_id = pyspiel.PlayerId.CHANCE + self._x = None + self._t = 0 + # We initialize last_action to the neutral action. This makes sure + # that the first reward does not include any displacement penalty. + self._last_action = self._NEUTRAL_ACTION + self.size = game.size + self.horizon = game.horizon + self.return_value = 0.0 + + # Represents the current probability distribution over game states. + # Initialized with a uniform distribution. + self._distribution = [1. / self.size for _ in range(self.size)] + + @property + def x(self): + return self._x + + @property + def t(self): + return self._t + + def state_to_str(self, x, t, player_id=pyspiel.PlayerId.DEFAULT_PLAYER_ID): + """A string that uniquely identify a triplet x, t, player_id.""" + if self._is_chance_init: + return "initial" + if player_id == pyspiel.PlayerId.DEFAULT_PLAYER_ID: + return str((x, t)) + if player_id == pyspiel.PlayerId.MEAN_FIELD: + return str((x, t)) + "_a" + if player_id == pyspiel.PlayerId.CHANCE: + return str((x, t)) + "_a_mu" + raise ValueError( + "player_id is not mean field, chance or default player id.") + + # OpenSpiel (PySpiel) API functions are below. This is the standard set that + # should be implemented by every perfect-information sequential-move game. + + def _legal_actions(self, player): + """Returns a list of legal actions for player and MFG nodes.""" + if player == pyspiel.PlayerId.MEAN_FIELD: + return [] + if (player == pyspiel.PlayerId.DEFAULT_PLAYER_ID + and player == self.current_player()): + return [0, 1, 2] + raise ValueError(f"Unexpected player {player}. " + "Expected a mean field or current player 0.") + + def chance_outcomes(self): + """Returns the possible chance outcomes and their probabilities.""" + if self._is_chance_init: + return list(enumerate(self._distribution)) + return [(0, 1. / 3.), (1, 1. / 3.), (2, 1. / 3.)] + + def _apply_action(self, action): + """Applies the specified action to the state.""" + if self._player_id == pyspiel.PlayerId.MEAN_FIELD: + raise ValueError( + "_apply_action should not be called at a MEAN_FIELD state.") + self.return_value += self._rewards() + if self._is_chance_init: + # Here the action is between 0 and self.size - 1 + if action < 0 or action >= self.size: + raise ValueError( + "The action is between 0 and self.size - 1 at an init chance node") + self._x = action + self._is_chance_init = False + self._player_id = pyspiel.PlayerId.DEFAULT_PLAYER_ID + elif self._player_id == pyspiel.PlayerId.CHANCE: + # Here the action is between 0 and 2 + if action < 0 or action > 2: + raise ValueError( + "The action is between 0 and 2 at any chance node") + self._x = (self.x + self._ACTION_TO_MOVE[action]) % self.size + self._t += 1 + self._player_id = pyspiel.PlayerId.MEAN_FIELD + elif self._player_id == pyspiel.PlayerId.DEFAULT_PLAYER_ID: + # Here the action is between 0 and 2 + if action < 0 or action > 2: + raise ValueError( + "The action is between 0 and 2 at any chance node") + self._x = (self.x + self._ACTION_TO_MOVE[action]) % self.size + self._last_action = action + self._player_id = pyspiel.PlayerId.CHANCE + + def _action_to_string(self, player, action): + """Action -> string.""" + del player + if self.is_chance_node() and self._is_chance_init: + return f"init_state={action}" + return str(self._ACTION_TO_MOVE[action]) + + def distribution_support(self): + """return a list of state string.""" + return [ + self.state_to_str( + i, self.t, player_id=pyspiel.PlayerId.MEAN_FIELD) + for i in range(self.size) + ] + + def update_distribution(self, distribution): + """This function is central and specific to the logic of the MFG. + + Args: + distribution: a distribution to register. + + - function should be called when the node is in MEAN_FIELD state. + - distribution are probabilities that correspond to each game state + given by distribution_support. + + """ + if self._player_id != pyspiel.PlayerId.MEAN_FIELD: + raise ValueError( + "update_distribution should only be called at a MEAN_FIELD state.") + self._distribution = distribution.copy() + self._player_id = pyspiel.PlayerId.DEFAULT_PLAYER_ID + + def is_terminal(self): + """Returns True if the game is over.""" + return self.t >= self.horizon + + def current_player(self): + """Returns id of the next player to move, or TERMINAL if game is over.""" + if self.is_terminal(): + return pyspiel.PlayerId.TERMINAL + return self._player_id + + def _rewards(self): + """Reward for the player for this state.""" + if self._player_id == pyspiel.PlayerId.DEFAULT_PLAYER_ID: + r_x = 1 - (1.0 * np.abs(self.x - self.size // 2)) / (self.size // 2) + r_a = -(1.0 * np.abs(self._ACTION_TO_MOVE[self._last_action])) / self.size + r_mu = - np.log(self._distribution[self.x] + _EPSILON) + return r_x + r_a + r_mu + return 0.0 + + def rewards(self) -> List[float]: + """Rewards for all players.""" + # For now, only single-population (single-player) mean field games + # are supported. + return [self._rewards()] + + def _returns(self): + """Returns is the sum of all payoffs collected so far.""" + return self.return_value + self._rewards() + + def returns(self) -> List[float]: + """Returns for all players.""" + # For now, only single-population (single-player) mean field games + # are supported. + return [self._returns()] + + def __str__(self): + """A string that uniquely identify the current state.""" + return self.state_to_str(self.x, self.t, player_id=self._player_id) + + +class Observer: + """Observer, conforming to the PyObserver interface (see observation.py).""" + + def __init__(self, params, game): + """Initializes an empty observation tensor.""" + del params + + self.size = game.size + self.horizon = game.horizon + # +1 to allow t == horizon. + self.tensor = np.zeros(self.size + self.horizon + 1, np.float32) + self.dict = {"x": self.tensor[:self.size], "t": self.tensor[self.size:]} + + def set_from(self, state: MFGCrowdModellingState, player: int): + """Updates `tensor` and `dict` to reflect `state` from PoV of `player`.""" + del player + # We update the observation via the shaped tensor since indexing is more + # convenient than with the 1-D tensor. Both are views onto the same memory. + self.tensor.fill(0) + # state.x is None for the initial (blank) state, don't set any + # position bit in that case. + if state.x is not None: + if not 0 <= state.x < self.size: + raise ValueError( + f"Expected {state} x position to be in [0, {self.size})") + self.dict["x"][state.x] = 1 + if not 0 <= state.t <= self.horizon: + raise ValueError(f"Expected {state} time to be in [0, {self.horizon}]") + self.dict["t"][state.t] = 1 + + def string_from(self, state, player): + """Observation of `state` from the PoV of `player`, as a string.""" + del player + return str(state) + + +# Register the game with the OpenSpiel library + +pyspiel.register_game(_GAME_TYPE, MFGCrowdModellingGame) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_modelling_2d.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_modelling_2d.py new file mode 100644 index 0000000..db32c97 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_modelling_2d.py @@ -0,0 +1,101 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Mean Field Crowd Modelling Game in 2d. + +Please see the C++ implementation under games/mfg/crowd_modelling_2d.h for +more information. +""" + +from typing import Sequence + + +def grid_to_forbidden_states(grid: Sequence[str]) -> str: + """Converts a grid into string representation of forbidden states. + + Args: + grid: Rows of the grid. '#' character denotes a forbidden state. All rows + should have the same number of columns, i.e. cells. + + Returns: + String representation of forbidden states in the form of x (column) and y + (row) pairs, e.g. [1|1;0|2]. + """ + forbidden_states = [] + num_cols = len(grid[0]) + for y, row in enumerate(grid): + assert len(row) == num_cols, f'Number of columns should be {num_cols}.' + for x, cell in enumerate(row): + if cell == '#': + forbidden_states.append(f'{x}|{y}') + return '[' + ';'.join(forbidden_states) + ']' + + +FOUR_ROOMS_FORBIDDEN_STATES = grid_to_forbidden_states([ + '#############', + '# # #', + '# # #', + '# #', + '# # #', + '# # #', + '### ##### ###', + '# # #', + '# # #', + '# #', + '# # #', + '# # #', + '#############', +]) + +# Four rooms with an initial state at top-left corner. +FOUR_ROOMS = { + 'forbidden_states': FOUR_ROOMS_FORBIDDEN_STATES, + 'horizon': 40, + 'initial_distribution': '[1|1]', + 'initial_distribution_value': '[1.0]', + 'size': 13, +} + +MAZE_FORBIDDEN_STATES = grid_to_forbidden_states([ + '######################', + '# # # # #', + '# # # # #', + '###### # # ## # #', + '# # # # # #', + '# # # ### # #', + '# ######## # # #', + '# # # # ## # #', + '# # # # # # ###', + '# # # # # # # #', + '###### # ####### # # #', + '# # # # # #', + '# # ## ### # # # #', + '## # # # ##### # #', + '## # # # # # # #', + '# # #### # #', + '# #### # ######## #', + '# # # # ### #', + '# # # # # # # # #', + '# ##### # # # #', + '# # #', + '######################', +]) + +# 22x22 maze with an initial state at top-left corner, +MAZE = { + 'forbidden_states': MAZE_FORBIDDEN_STATES, + 'horizon': 100, + 'initial_distribution': '[1|1]', + 'initial_distribution_value': '[1.0]', + 'size': 22, +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_modelling_2d_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_modelling_2d_test.py new file mode 100644 index 0000000..19acb29 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_modelling_2d_test.py @@ -0,0 +1,37 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for crowd_modelling_2d.""" + +from absl.testing import absltest + +from open_spiel.python.mfg.games import crowd_modelling_2d + + +class CrowdModelling2DTest(absltest.TestCase): + + def test_grid_to_forbidden_states(self): + forbidden_states = crowd_modelling_2d.grid_to_forbidden_states([ + "#####", + "# # #", + "# #", + "#####", + ]) + + self.assertEqual( + forbidden_states, + "[0|0;1|0;2|0;3|0;4|0;0|1;2|1;4|1;0|2;4|2;0|3;1|3;2|3;3|3;4|3]") + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_modelling_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_modelling_test.py new file mode 100644 index 0000000..8113437 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/crowd_modelling_test.py @@ -0,0 +1,139 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tests for Python Crowd Modelling game.""" + +from absl.testing import absltest +import numpy as np +from open_spiel.python.mfg.games import crowd_modelling +import pyspiel + +MFG_STR_CONST = "_a" + + +class MFGCrowdModellingGameTest(absltest.TestCase): + + def test_load(self): + game = pyspiel.load_game("python_mfg_crowd_modelling") + game.new_initial_state() + + def test_create(self): + """Checks we can create the game and clone states.""" + game = crowd_modelling.MFGCrowdModellingGame() + self.assertEqual(game.size, crowd_modelling._SIZE) + self.assertEqual(game.horizon, crowd_modelling._HORIZON) + self.assertEqual(game.get_type().dynamics, + pyspiel.GameType.Dynamics.MEAN_FIELD) + print("Num distinct actions:", game.num_distinct_actions()) + state = game.new_initial_state() + clone = state.clone() + print("Initial state:", state) + print("Cloned initial state:", clone) + + def test_create_with_params(self): + game = pyspiel.load_game("python_mfg_crowd_modelling(horizon=100,size=20)") + self.assertEqual(game.size, 20) + self.assertEqual(game.horizon, 100) + + def test_random_game(self): + """Tests basic API functions.""" + horizon = 20 + size = 50 + game = crowd_modelling.MFGCrowdModellingGame(params={ + "horizon": horizon, + "size": size + }) + pyspiel.random_sim_test( + game, num_sims=10, serialize=False, verbose=True) + + def test_reward(self): + game = crowd_modelling.MFGCrowdModellingGame() + state = game.new_initial_state() + self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE) + state.apply_action(game.size // 2) + self.assertEqual(state.current_player(), 0) + # This expected reward assumes that the game is initialized with + # uniform state distribution. + self.assertAlmostEqual(state.rewards()[0], 1. + np.log(game.size)) + self.assertAlmostEqual(state.returns()[0], 1. + np.log(game.size)) + state.apply_action(1) + self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE) + self.assertAlmostEqual(state.returns()[0], 1. + np.log(game.size)) + + def test_distribution(self): + """Checks that distribution-related functions work.""" + game = crowd_modelling.MFGCrowdModellingGame() + state = game.new_initial_state() + self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE) + state.apply_action(game.size // 2) + self.assertEqual(state.current_player(), 0) + # This expected reward assumes that the game is initialized with + # uniform state distribution. + self.assertAlmostEqual(state.rewards()[0], 1. + np.log(game.size)) + state.apply_action(crowd_modelling.MFGCrowdModellingState._NEUTRAL_ACTION) + # Chance node. + self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE) + state.apply_action(crowd_modelling.MFGCrowdModellingState._NEUTRAL_ACTION) + self.assertEqual(state.distribution_support(), [ + "(0, 1)_a", "(1, 1)_a", "(2, 1)_a", "(3, 1)_a", "(4, 1)_a", "(5, 1)_a", + "(6, 1)_a", "(7, 1)_a", "(8, 1)_a", "(9, 1)_a" + ]) + new_distrib = [0.01] * 9 + [1. - 0.01 * 9] + state.update_distribution(new_distrib) + self.assertAlmostEqual(state._distribution, new_distrib) + + # Check that the distribution is taken into account for the reward + # computation. + self.assertAlmostEqual(state.rewards()[0], 1. - np.log(0.01)) + + def test_compare_py_cpp(self): + """Compares py and cpp implementations of this game.""" + py_game = pyspiel.load_game("python_mfg_crowd_modelling") + cpp_game = pyspiel.load_game("mfg_crowd_modelling") + np.random.seed(7) + py_state = py_game.new_initial_state() + cpp_state = cpp_game.new_initial_state() + t = 0 + while not cpp_state.is_terminal(): + self.assertFalse(py_state.is_terminal()) + self.assertEqual(str(cpp_state), str(py_state)) + self.assertAlmostEqual(cpp_state.returns()[0], py_state.returns()[0]) + if cpp_state.current_player() == pyspiel.PlayerId.CHANCE: + actions, probs = zip(*cpp_state.chance_outcomes()) + action = np.random.choice(actions, p=probs) + self.assertEqual( + cpp_state.action_to_string(action), + py_state.action_to_string(action)) + cpp_state.apply_action(action) + py_state.apply_action(action) + elif cpp_state.current_player() == pyspiel.PlayerId.MEAN_FIELD: + num_cpp_states = len(cpp_state.distribution_support()) + distribution = [1 / num_cpp_states] * num_cpp_states + cpp_state.update_distribution(distribution) + py_state.update_distribution(distribution) + else: + self.assertEqual(cpp_state.current_player(), 0) + legal_actions = cpp_state.legal_actions() + action = np.random.choice(legal_actions) + self.assertEqual( + cpp_state.action_to_string(action), + py_state.action_to_string(action)) + cpp_state.apply_action(action) + py_state.apply_action(action) + t += 1 + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/dynamic_routing.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/dynamic_routing.py new file mode 100644 index 0000000..45a8cb9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/dynamic_routing.py @@ -0,0 +1,613 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Implementation of a mean field routing game. + +The game is derived from https://arxiv.org/abs/2110.11943. +It is the extension of the dynamic routing game python_dynamic_routing. +The list of vehicles decribing the N player of the dynamic routing game is +replaced by a list of OriginDestinationDemand. One OriginDestinationDemand +corresponds to one population of vehicles (with the same origin, destination and +departure time). + +This game is a variant of the mean field route choice game as the vehicle +movement depends on the current network congestion. In the mean field route +choice game, the number of time step to reach the destination is constant and +does not depend on the network congestion, neither of the vehicle cost function. +In the dynamic driving and routing games the vehicle choose its speed to travel +on each link in order to minimize its cost function. Therefore the congestion is +encoded in the cost function. + +More context can be found on the docstring of the python_dynamic_routing class. +""" +import functools +from typing import Any, Iterable, List, Mapping, Optional, Tuple + +import numpy as np + +from open_spiel.python.games import dynamic_routing_data +from open_spiel.python.games import dynamic_routing_utils +from open_spiel.python.observation import IIGObserverForPublicInfoGame +import pyspiel + +_DEFAULT_PARAMS = { + "max_num_time_step": 10, + "time_step_length": 0.5, + "players": -1 +} +_GAME_TYPE = pyspiel.GameType( + short_name="python_mfg_dynamic_routing", + long_name="Python Mean Field Routing Game", + dynamics=pyspiel.GameType.Dynamics.MEAN_FIELD, + chance_mode=pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC, + information=pyspiel.GameType.Information.PERFECT_INFORMATION, + utility=pyspiel.GameType.Utility.GENERAL_SUM, + reward_model=pyspiel.GameType.RewardModel.REWARDS, + max_num_players=1, + min_num_players=1, + provides_information_state_string=True, + provides_information_state_tensor=True, + provides_observation_string=True, + provides_observation_tensor=True, + default_loadable=True, + provides_factored_observation_string=True, + parameter_specification=_DEFAULT_PARAMS) + +WAITING_TIME_NOT_ASSIGNED = -1 + + +@functools.lru_cache(maxsize=None) +def _state_to_str( + is_chance_init: bool, + location: str, + time_step: int, + player_id: int, + waiting_time: int, + destination: str, + final_arrival_time: float, +) -> str: + """Convert the state to a string representation. + + As the string representation will be used in dictionaries for various + algorithms that computes the state value, expected return, best response or + find the mean field Nash equilibrium. + The state is uniquely define by the current time, the type of node + (decision, mean field or chance), the vehicle location, its destination and + its waiting time. + Args: + is_chance_init: True if at chance initialization. + location: the location of the representative player. + time_step: the current time step. + player_id: the current node type as a player id. + waiting_time: the representative player waiting time. + destination: the destination of the representative player. + final_arrival_time: time of arrival. + + Returns: + state_string: string representing uniquely the mean field game. + """ + if is_chance_init: + return "initial chance node" + if player_id == pyspiel.PlayerId.DEFAULT_PLAYER_ID: + time = str(time_step) + elif player_id == pyspiel.PlayerId.MEAN_FIELD: + time = f"{time_step}_mean_field" + elif player_id == pyspiel.PlayerId.CHANCE: + time = f"{time_step}_chance" + else: + raise ValueError( + "Player id should be DEFAULT_PLAYER_ID, MEAN_FIELD or CHANCE") + if final_arrival_time: + return (f"Arrived at {location}, with arrival time " + f"{final_arrival_time}, t={time}") + return (f"Location={location}, waiting_time={waiting_time}," + f" t={time}, destination='{destination}'") + + +class MeanFieldRoutingGame(pyspiel.Game): + """Implementation of mean field routing game. + + The representative vehicle/player is represented as a tuple current location, + current waiting time and destination. When the waiting time is negative, the + vehicle choose on with successor link it would like to go. When arriving on + the link, a waiting time is assigned to the player based on the distribution + of players on the link. The vehicle arrival time is equal to the time step + when they first reach their destination. See module docstring for more + information. + + Attributes inherited from GameInfo: + max_chance_outcomes: maximum number of chance actions. Set to the length of + od_demand, i.e. the number of `OriginDestinationDemand`s. + max_game_length: maximum number of time step played. Passed during + construction. + max_utility: maximum utility is the opposite of the minimum arrival + time. Set to 0. + min_utility: minimum utility is the opposite of the maximum arrival + time. Set to - max_game_length - 1. + num_distinct_actions: maximum number of possible actions. This is + equal to the number of links + 1 (corresponding to having no + possible action _NO_POSSIBLE_ACTION). + num_players: the number of vehicles. Should be 1 as this mean field + game is a one population game. + Attributes: + network: the network of the game. + od_demand: a list of the vehicle. Their origin and their destination should + be road sections of the game. + time_step_length: size of the time step, used to convert travel times into + number of game time steps. + perform_sanity_checks: if true, sanity checks are done during the game, + should be set to false to speed up the game. + total_num_vehicle: total number of vehicles as the sum of the od_demand. + chance_outcomes: chance outcomes based on the initial probability + distribution and their probabilities. + """ + network: dynamic_routing_utils.Network + od_demand: List[dynamic_routing_utils.OriginDestinationDemand] + perform_sanity_checks: bool + time_step_length: float + + def __init__(self, + params: Mapping[str, Any], + network: Optional[dynamic_routing_utils.Network] = None, + od_demand: Optional[List[ + dynamic_routing_utils.OriginDestinationDemand]] = None, + perform_sanity_checks: bool = True): + """Initiliaze the game. + + Args: + params: game parameters. It should define max_num_time_step and + time_step_length. + network: the network of the game. + od_demand: a list of the vehicle. Their origin and their destination + should be road sections of the game. + perform_sanity_checks: set the perform_sanity_checks attribute. + """ + max_num_time_step = params["max_num_time_step"] + time_step_length = params["time_step_length"] + self.network = network if network else dynamic_routing_data.BRAESS_NETWORK + self.od_demand = ( + od_demand + if od_demand else dynamic_routing_data.BRAESS_NETWORK_OD_DEMAND) + self.network.check_list_of_od_demand_is_correct(self.od_demand) + self.perform_sanity_checks = perform_sanity_checks + self.time_step_length = time_step_length + self.total_num_vehicle = sum( + [od_demand_item.counts for od_demand_item in self.od_demand]) + self.chance_outcomes = [(i, od_demand_item.counts / self.total_num_vehicle) + for i, od_demand_item in enumerate(self.od_demand)] + game_info = pyspiel.GameInfo( + num_distinct_actions=self.network.num_actions(), + max_chance_outcomes=len(self.od_demand), + num_players=1, + min_utility=-max_num_time_step - 1, + max_utility=0, + max_game_length=max_num_time_step) + super().__init__(_GAME_TYPE, game_info, params if params else {}) + + def new_initial_state(self) -> "MeanFieldRoutingGameState": + """Returns the state corresponding to the start of a game.""" + return MeanFieldRoutingGameState(self, self.time_step_length) + + def make_py_observer(self, iig_obs_type=None, params=None): + """Returns a NetworkObserver object used for observing game state.""" + if ((iig_obs_type is None) or + (iig_obs_type.public_info and not iig_obs_type.perfect_recall)): + return NetworkObserver(self.network.num_actions(), self.max_game_length()) + return IIGObserverForPublicInfoGame(iig_obs_type, params) + + def max_chance_nodes_in_history(self): + """Maximun chance nodes in game history.""" + return self.max_game_length() + 1 + + def get_road_section_as_int(self, section: Optional[str]) -> int: + """Returns the integer representation of the road section.""" + if section is None: + return 0 + start_node, end_node = ( + dynamic_routing_utils._nodes_from_road_section(section)) # pylint:disable=protected-access + return self.network.get_action_id_from_movement(start_node, end_node) + + +class MeanFieldRoutingGameState(pyspiel.State): + """State of the DynamicRoutingGame. + + One player is equal to one vehicle. + See docstring of the game class and of the file for more information. + Attributes: + _current_time_step: current time step of the game. + _is_chance_init: boolean that encodes weither the current node is the + initial chance node. + _is_terminal: boolean that encodes weither the game is over. + _max_arrival_time: int that encodes maximum arrival time on any link in + number of time steps. Needed to enumerate all the possible state of a + vehicle being on a link to compute volume of cars on the link. + _max_waiting_time: maximum time a vehicle can wait on a time. This is done + in order to limit the number of possible state with a vehicle on a + specific link. + _normed_density_on_vehicle_link: density of vehicles on the link that is + used by the representative vehicle. This is given by the mean field + distribution. + _time_step_length: size of the time step, used to convert travel times into + number of game time steps. + _vehicle_at_destination: boolean that encodes if the representative vehicle + has reached its destination. + _vehicle_destination: the destination of the representative vehicle + corresponding to this state. It is associated to the representative + vehicle after the initial chance node according to the od_demand + distribution. + _vehicle_final_arrival_time: the arrival time of the representative vehicle, + the arrival is either 0 if the vehicle is still in the network or its + arrival time if the vehicle has reached its destination. + _vehicle_location: current location of the vehicle as a network road + section. + _vehicle_without_legal_action: boolean that encodes if the representative + vehicle has reach a sink node, meaning that it will not be able to move + anymore. + _waiting_time: time that the vehicle has to wait before moving to the next + link (equal to the link travel time when the vehicle just reached the + link). + """ + _current_time_step: int + _is_chance_init: bool + _is_terminal: bool + _max_arrival_time: int + _max_waiting_time: int + _normed_density_on_vehicle_link: float + _time_step_length: float + _vehicle_at_destination: bool + _vehicle_destination: Optional[str] + _vehicle_final_arrival_time: float + _vehicle_location: Optional[str] + _vehicle_without_legal_action: bool + _waiting_time: int + + def __init__(self, game: MeanFieldRoutingGame, time_step_length: float): + """Constructor; should only be called by Game.new_initial_state.""" + super().__init__(game) + self._current_time_step = 0 + self._is_chance_init = True # is true for the first state of the game. + self._is_terminal = False + if self.get_game().perform_sanity_checks: + assert game.num_players() == 1, ( + "This mean field routing game should have a unique player.") + self._player_id = pyspiel.PlayerId.CHANCE + self._time_step_length = time_step_length + self._vehicle_at_destination = False + self._vehicle_final_arrival_time = 0.0 + self._vehicle_without_legal_action = False + self._vehicle_location = None + self._vehicle_destination = None + self._max_arrival_time = self.get_game().max_game_length() + # Cap maximum link waiting time to faster simulations. + self._max_waiting_time = self._max_arrival_time + self._waiting_time = WAITING_TIME_NOT_ASSIGNED + + @property + def current_time_step(self) -> int: + """Return current time step.""" + return self._current_time_step + + def current_player(self) -> pyspiel.PlayerId: + """Returns the current player.""" + if self._is_terminal: + return pyspiel.PlayerId.TERMINAL + return self._player_id + + def state_to_str(self, + location: str, + time_step: int, + player_id: int = pyspiel.PlayerId.DEFAULT_PLAYER_ID, + waiting_time: int = 0, + destination: str = ""): + """Convert the state to a string representation.""" + return _state_to_str( + self._is_chance_init, + location, + time_step, + player_id, + waiting_time, + destination or self._vehicle_destination, + self._vehicle_final_arrival_time, + ) + + def distribution_support(self) -> List[str]: + """Returns the state that should be used for update_distribution. + + The distribution of the vehicle is used to determined the number of + cars on the same link of the representative vehicle in order to define + the waiting time of the representative vehicle when joining a link. + Therefore, only the states corresponding to be on the link of the + representative vehicle at this current time are useful. + Returns: + list of the two state: being on the link of the representative vehicle at + the current time and being stuck in traffic or not. + """ + if self._vehicle_without_legal_action: + return [] + od_demand = self.get_game().od_demand + dist = [ + self.state_to_str( # pylint:disable=g-complex-comprehension + self._vehicle_location, + self._current_time_step, + player_id=pyspiel.PlayerId.MEAN_FIELD, + waiting_time=waiting_time, + destination=destination) + for waiting_time in range(WAITING_TIME_NOT_ASSIGNED, + self._max_arrival_time) + for destination in {od._destination for od in od_demand} # pylint:disable=protected-access + ] + assert len(set(dist)) == len(dist), ( + f"Distribution should not have duplicated states: {dist}.") + return dist + + def update_distribution(self, distribution: List[float]): + """Get the number of cars on the same link as the representative player. + + _normed_density_on_vehicle_link stores the number of cars on the link + where the representative player is. + Args: + distribution: the probability for a vehicle to be in the states in + distribution_support. The distribution is a list of probabilities. + """ + game = self.get_game() + if game.perform_sanity_checks: + if self._player_id != pyspiel.PlayerId.MEAN_FIELD: + raise ValueError(("update_distribution should only be called at" + " a MEAN_FIELD state.")) + self._player_id = pyspiel.PlayerId.DEFAULT_PLAYER_ID + if not self._vehicle_without_legal_action: + self._normed_density_on_vehicle_link = sum(distribution) + if game.perform_sanity_checks: + assert 0 <= self._normed_density_on_vehicle_link <= 1 + 1e-4, ( + f"{self._normed_density_on_vehicle_link} is not in [0, 1].") + if self._waiting_time == WAITING_TIME_NOT_ASSIGNED: + volume = (game.total_num_vehicle * self._normed_density_on_vehicle_link) + self._waiting_time = int( + game.network.get_travel_time(self._vehicle_location, volume) / + self._time_step_length) - 1 + self._waiting_time = max(0, self._waiting_time) + + def chance_outcomes(self) -> List[Tuple[int, float]]: + """Returns the initial probability distribution is returned. + + One chance outcome correspond to each possible OD pair with a departure + time, the probability of each chance outcome is the proportion of vehicle in + each OD pair with a departure time. + Returns: + list_tuple_outcome_probabilities: chance outcomes and their probability. + """ + game = self.get_game() + if game.perform_sanity_checks: + assert self._player_id == pyspiel.PlayerId.CHANCE + assert self._is_chance_init + return game.chance_outcomes + + def _legal_actions(self, player: pyspiel.PlayerId) -> List[int]: + """Return the legal actions of the vehicle. + + Legal actions are the succesor road section of the vehicle current road + section. + Args: + player: the vehicle id. + + Returns: + list_legal_actions: a list of legal actions. If the game is finished then + the list is empty. If the vehicle is at its destination, has a positive + waiting time or if it is on a node without successors then an empty list + is returned. Otherwise the list of successors nodes of the current + vehicle location is returned. + """ + if self._is_terminal: + return [] + if self.get_game().perform_sanity_checks: + assert player == pyspiel.PlayerId.DEFAULT_PLAYER_ID, str(player) + if self._vehicle_without_legal_action: + # If the vehicle is at destination it cannot do anything. + return [dynamic_routing_utils.NO_POSSIBLE_ACTION] + if self._waiting_time > 0: + return [dynamic_routing_utils.NO_POSSIBLE_ACTION] + _, end_section_node = dynamic_routing_utils._nodes_from_road_section( # pylint:disable=protected-access + self._vehicle_location) + successors = self.get_game().network.get_successors(end_section_node) + if self.get_game().perform_sanity_checks: + if not successors: + raise ValueError(("If a vehicle is not without legal action, it" + " should have an action.")) + assert isinstance(successors, Iterable) + actions = [ + self.get_game().network.get_action_id_from_movement( + end_section_node, d) for d in successors + ] + map(self.get_game().network.assert_valid_action, actions) + return sorted(actions) + + def _apply_action(self, action: int): + """Apply the action to the state. + + This function can be either called on a chance node or on a decision + node. If called on the initial chance node, the action gives in which OD + demand the representative vehicle belongs too (it put the vehicle at + this location and define its destination). + If called on decision node, the action defines on which link the vehicle + will move (if it is not stuck in traffic) and assign a waiting time to the + vehicle. + Args: + action: the action to apply. + """ + if self._player_id == pyspiel.PlayerId.CHANCE: + self._player_id = pyspiel.PlayerId.DEFAULT_PLAYER_ID + assert self._is_chance_init + # Apply action is called on initial chance node to initialized + # the vehicle position based on the initial location + # distribution. + od_demand = self.get_game().od_demand + self._vehicle_destination = od_demand[action].destination + self._vehicle_location = od_demand[action].origin + self._waiting_time = int(od_demand[action].departure_time / + self._time_step_length) + self._is_chance_init = False + self._normed_density_on_vehicle_link = 0 + elif self._player_id == pyspiel.PlayerId.DEFAULT_PLAYER_ID: + self._player_id = pyspiel.PlayerId.MEAN_FIELD + # Apply action is called on a descision node. If the vehicle can + # move, then it will move to the next road section. + # Has the vehicle already reached a sink node? + if not self._vehicle_without_legal_action: + # If the vehicle is stuck in traffic it cannot move. + if self._waiting_time > 0: + self._waiting_time -= 1 + else: + if self.get_game().perform_sanity_checks: + self.get_game().network.assert_valid_action(action, + self._vehicle_location) + self._vehicle_location = ( + self.get_game().network.get_road_section_from_action_id(action)) + # Has the vehicle just reached its destination? + if self._vehicle_location == self._vehicle_destination: + self._vehicle_final_arrival_time = self._current_time_step + self._vehicle_at_destination = True + self._vehicle_without_legal_action = True + # Will the vehicle have a legal action for next time step? + elif self.get_game().network.is_location_at_sink_node( + self._vehicle_location): + self._vehicle_without_legal_action = True + self._vehicle_final_arrival_time = -self.get_game().min_utility() + else: + self._waiting_time = WAITING_TIME_NOT_ASSIGNED + self._current_time_step += 1 + elif self.get_game().perform_sanity_checks: + if self._is_terminal: + raise ValueError( + "_apply_action should not be called at a end of the game.") + if self._player_id == pyspiel.PlayerId.MEAN_FIELD: + raise ValueError( + "_apply_action should not be called at a MEAN_FIELD state.") + # Is the game finished? + if self._current_time_step >= self.get_game().max_game_length(): + self._is_terminal = True + if not self._vehicle_at_destination: + self._vehicle_final_arrival_time = -self.get_game().min_utility() + + def _action_to_string(self, player, action) -> str: + """Action -> string.""" + if player == pyspiel.PlayerId.CHANCE: + if self._is_chance_init: + return f"Vehicle is assigned to population {action}." + return f"Change node; the vehicle movement is {bool(action)}." + if self.get_game().perform_sanity_checks: + assert player == pyspiel.PlayerId.DEFAULT_PLAYER_ID + if action == dynamic_routing_utils.NO_POSSIBLE_ACTION: + return f"Vehicle {player} reach a sink node or its destination." + if self.get_game().perform_sanity_checks: + self.get_game().network.assert_valid_action(action) + return (f"Vehicle {player} would like to move to " + str( + self.get_game().network.get_road_section_from_action_id(action)) + ".") + + def is_terminal(self) -> bool: + """Returns True if the game is over.""" + return self._is_terminal + + def is_waiting(self) -> bool: + """Returns True if the wait time is non-zero.""" + return self._waiting_time > 0 + + def returns(self) -> List[float]: + """Total reward for each player over the course of the game so far.""" + if not self._is_terminal: + return [0] + return [-self._vehicle_final_arrival_time * self._time_step_length] + + def get_location_as_int(self) -> int: + """Returns the vehicle location. + + This will be 1-based action index of the location, or 0 when the location is + None before the initial chance node. + """ + return self.get_game().get_road_section_as_int(self._vehicle_location) + + def get_destination_as_int(self) -> int: + """Returns the vehicle destination. + + + This will be 1-based action index of the destination, or 0 when the + destination is None before the initial chance node. + """ + return self.get_game().get_road_section_as_int(self._vehicle_destination) + + def __str__(self) -> str: + """String for debug purposes. No particular semantics are required.""" + if self._vehicle_location is not None: + return self.state_to_str( + self._vehicle_location, + self._current_time_step, + player_id=self._player_id, + waiting_time=self._waiting_time) + assert self._current_time_step == 0 + return "Before initial chance node" + + +class NetworkObserver: + """Network observer used by the learning algorithm. + + The state string is the state history string. The state tensor is an array + of size number of locations * 2 + maximum number of time steps + 2, which is + the concatenation of one-hot encodings of the location, destination (1-based; + if location or destination is None, then the 0th element will be set to 1) and + the current time (0-based). The last element of the array will be set to 1 if + waiting time is positive, or 0 otherwise. + + Attributes: + dict: Dictionary of tensors for the components of the observation + corresponding to the location, destination and time. + tensor: The concatenated form of the observation. + """ + + def __init__(self, num_locations: int, max_num_time_step: int): + """Initializes an empty observation tensor.""" + self.tensor = np.zeros(num_locations * 2 + max_num_time_step + 1 + 1, + np.float32) + self.dict = { + "location": self.tensor[:num_locations], + "destination": self.tensor[num_locations:num_locations * 2], + "time": self.tensor[num_locations * 2:-1], + "waiting": self.tensor[-1:] + } + + def set_from(self, state, player): + """Sets the state tensor based on the specified state. + + Note that the function may be called with arbitrary states of the game, e.g. + from different runs, and therefore the tensor should be cleared and updated + instead of preserving any earlier values. + + Args: + state: state of the game. + player: player id that should play. + """ + assert player == pyspiel.PlayerId.DEFAULT_PLAYER_ID + self.tensor.fill(0) + self.dict["location"][state.get_location_as_int()] = 1 + self.dict["destination"][state.get_destination_as_int()] = 1 + self.dict["time"][state.current_time_step] = 1 + self.dict["waiting"][0] = state.is_waiting() + + def string_from(self, state, player): + """Return the state history string.""" + assert player == pyspiel.PlayerId.DEFAULT_PLAYER_ID + return str(state) + + +# Register the game with the OpenSpiel library +pyspiel.register_game(_GAME_TYPE, MeanFieldRoutingGame) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/dynamic_routing_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/dynamic_routing_test.py new file mode 100644 index 0000000..0782744 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/dynamic_routing_test.py @@ -0,0 +1,267 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tests for Python mean field routing game.""" + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np +import numpy.testing as npt + +from open_spiel.python import games # pylint:disable=unused-import +from open_spiel.python import policy +from open_spiel.python.games import dynamic_routing_utils +from open_spiel.python.mfg import games as mfg_games # pylint:disable=unused-import +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import mirror_descent +from open_spiel.python.mfg.algorithms import nash_conv +from open_spiel.python.mfg.algorithms import policy_value +from open_spiel.python.mfg.games import dynamic_routing +from open_spiel.python.mfg.games import factory +from open_spiel.python.observation import make_observation +import pyspiel + +_NUMBER_OF_ITERATIONS_TESTS = 1 + + +class SocialOptimumBraess(policy.Policy): + + def action_probabilities(self, state, player_id=None): + legal_actions = state.legal_actions() + if not legal_actions: + return {dynamic_routing_utils.NO_POSSIBLE_ACTION: 1.0} + elif len(legal_actions) == 1: + return {legal_actions[0]: 1.0} + else: + if legal_actions[0] == 1: + return {1: 0.5, 2: 0.5} + elif legal_actions[0] == 3: + return {4: 1.0} + raise ValueError(f"{legal_actions} is not correct.") + + +class NashEquilibriumBraess(policy.Policy): + + def action_probabilities(self, state, player_id=None): + legal_actions = state.legal_actions() + if not legal_actions: + return {dynamic_routing_utils.NO_POSSIBLE_ACTION: 1.0} + elif len(legal_actions) == 1: + return {legal_actions[0]: 1.0} + else: + if legal_actions[0] == 1: + return {1: 0.75, 2: 0.25} + elif legal_actions[0] == 3: + return {3: 2 / 3, 4: 1 / 3} + raise ValueError(f"{legal_actions} is not correct. {state}.") + + +class MeanFieldRoutingGameTest(absltest.TestCase): + """Checks we can create the game and clone states.""" + + def test_load(self): + """Test load and game creation.""" + game = pyspiel.load_game("python_mfg_dynamic_routing") + game.new_initial_state() + + def test_create(self): + """Checks we can create the game and clone states.""" + game = pyspiel.load_game("python_mfg_dynamic_routing") + self.assertEqual(game.get_type().dynamics, + pyspiel.GameType.Dynamics.MEAN_FIELD) + state = game.new_initial_state() + state.clone() + + def test_random_game(self): + """Test random simulation.""" + game = pyspiel.load_game("python_mfg_dynamic_routing") + pyspiel.random_sim_test(game, num_sims=10, serialize=False, verbose=True) + + def test_evolving_trajectory_with_uniform_policy(self): + """Test evolving distribution.""" + game = pyspiel.load_game("python_mfg_dynamic_routing") + distribution.DistributionPolicy(game, policy.UniformRandomPolicy(game)) + + def test_non_default_param_from_string(self): + """Check params can be given through string loading.""" + game = pyspiel.load_game("python_mfg_dynamic_routing(max_num_time_step=5)") + self.assertEqual(game.max_game_length(), 5) + + def test_non_default_param_from_dict(self): + """Check params can be given through a dictionary.""" + game = pyspiel.load_game("python_mfg_dynamic_routing", + {"max_num_time_step": 5}) + self.assertEqual(game.max_game_length(), 5) + + # Enable ficticious_play with game where the dynamics depend on the + # distribution. + # def test_ficticious_play(self): + # """Test that ficticious play can be used on this game.""" + # mfg_game = pyspiel.load_game("python_mfg_dynamic_routing") + # fp = fictitious_play.FictitiousPlay(mfg_game) + # for _ in range(_NUMBER_OF_ITERATIONS_TESTS): + # fp.iteration() + # nash_conv.NashConv(mfg_game, fp.get_policy()) + + def test_online_mirror_descent(self): + """Test that online mirror descent can be used on this game.""" + mfg_game = pyspiel.load_game("python_mfg_dynamic_routing") + omd = mirror_descent.MirrorDescent(mfg_game) + for _ in range(_NUMBER_OF_ITERATIONS_TESTS): + omd.iteration() + nash_conv.NashConv(mfg_game, omd.get_policy()) + + def test_online_mirror_descent_convergence(self): + """Test that online mirror descent converges to equilibrium in default game.""" + mfg_game = pyspiel.load_game("python_mfg_dynamic_routing", { + "time_step_length": 0.05, + "max_num_time_step": 100 + }) + omd = mirror_descent.MirrorDescent(mfg_game, lr=1) + for _ in range(50): + omd.iteration() + self.assertAlmostEqual( + nash_conv.NashConv(mfg_game, omd.get_policy()).nash_conv(), 0) + + def test_vehicle_origin_outside_network(self): + """Check raise assertion if vehicle's origin is outside the Network.""" + od_demand = [ + dynamic_routing_utils.OriginDestinationDemand("I->O", "D->E", 0, 5) + ] + with self.assertRaises(ValueError): + dynamic_routing.MeanFieldRoutingGame( + { + "max_num_time_step": 10, + "time_step_length": 0.5, + "players": -1 + }, + od_demand=od_demand) + + def test_vehicle_destination_outside_network(self): + """Check raise assertion if vehicle's destination is outside the Network.""" + od_demand = [ + dynamic_routing_utils.OriginDestinationDemand("O->A", "E->F", 0, 5) + ] + with self.assertRaises(ValueError): + dynamic_routing.MeanFieldRoutingGame( + { + "max_num_time_step": 10, + "time_step_length": 0.5, + "players": -1 + }, + od_demand=od_demand) + + def test_multiple_departure_time_vehicle(self): + """Check that departure time can be define.""" + od_demand = [ + dynamic_routing_utils.OriginDestinationDemand("O->A", "D->E", 0, 5), + dynamic_routing_utils.OriginDestinationDemand("O->A", "D->E", 0.5, 5), + dynamic_routing_utils.OriginDestinationDemand("O->A", "D->E", 1.0, 5) + ] + game = dynamic_routing.MeanFieldRoutingGame( + { + "max_num_time_step": 10, + "time_step_length": 0.5, + "players": -1 + }, + od_demand=od_demand) + pyspiel.random_sim_test(game, num_sims=10, serialize=False, verbose=True) + + def test_game_evolution_uniform_policy(self): + """Check game evolution under uniform policy.""" + # Test evolution of the game as expected (test value of the state). + # Test legal_actions(). + + def test_observer_correct(self): + """Checks that the observer is correctly updated.""" + game = pyspiel.load_game("python_mfg_dynamic_routing") + num_locations, steps = 8, 10 + self.assertEqual(game.num_distinct_actions(), num_locations) + self.assertEqual(game.max_game_length(), steps) + py_obs = make_observation(game) + + state = game.new_initial_state() + self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE) + + state.apply_action(0) + self.assertEqual(state.current_player(), 0) + + location, destination = 7, 6 + self.assertEqual(state.get_location_as_int(), location) + self.assertEqual(state.get_destination_as_int(), destination) + + py_obs.set_from(state, state.current_player()) + obs_size = num_locations * 2 + steps + 2 + expected_tensor = np.zeros(obs_size) + # location = 7 + # destination + num_locations = 14 + # time + 2 * num_locations = 16 + # waiting bit at last index. + expected_tensor[[7, 14, 16]] = 1 + npt.assert_array_equal(py_obs.tensor, expected_tensor) + + def test_apply_actions_error_no_movement_with_negative_waiting_time(self): + """Check that a vehicle cannot choose to not move if it has to move.""" + # Test apply_actions(). + + def test_apply_actions_error_wrong_movement_with_negative_waiting_time(self): + """Check that a vehicle cannot choose to move to a not successor link.""" + # Test apply_actions(). + + def test_apply_actions_error_movement_with_positive_waiting_time(self): + """Check that a vehicle cannot choose to move if it cannot move yet.""" + # Test apply_actions(). + + @absltest.skip( + "Test of OMD on Sioux Falls is disabled as it takes a long time to run.") + def test_online_mirror_descent_sioux_falls_dummy(self): + """Test that online mirror descent can be used on the Sioux Falls game.""" + mfg_game = factory.create_game_with_setting( + "python_mfg_dynamic_routing", + "dynamic_routing_sioux_falls_dummy_demand") + omd = mirror_descent.MirrorDescent(mfg_game) + for _ in range(_NUMBER_OF_ITERATIONS_TESTS): + omd.iteration() + nash_conv.NashConv(mfg_game, omd.get_policy()) + + +class CppVsPythonMeanFieldRoutingGameTest(parameterized.TestCase): + + @parameterized.named_parameters( + ("python", ("python_mfg_dynamic_routing(max_num_time_step=100," + "time_step_length=0.05)")), + ("cpp", ("mfg_dynamic_routing(max_num_time_step=100," + "time_step_length=0.05,network=braess)"))) + def test_braess_paradox_game(self, game_name): + """Test that Braess paradox can be reproduced with the mean field game.""" + mfg_game = pyspiel.load_game(game_name) + + ne_policy = NashEquilibriumBraess(mfg_game, 1) + self.assertEqual( + -policy_value.PolicyValue( + mfg_game, distribution.DistributionPolicy(mfg_game, ne_policy), + ne_policy).value(mfg_game.new_initial_state()), 3.75) + self.assertEqual(nash_conv.NashConv(mfg_game, ne_policy).nash_conv(), 0.0) + + so_policy = SocialOptimumBraess(mfg_game, 1) + self.assertEqual( + -policy_value.PolicyValue( + mfg_game, distribution.DistributionPolicy(mfg_game, so_policy), + so_policy).value(mfg_game.new_initial_state()), 3.5) + self.assertEqual(nash_conv.NashConv(mfg_game, so_policy).nash_conv(), 0.75) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/factory.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/factory.py new file mode 100644 index 0000000..5d2d865 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/factory.py @@ -0,0 +1,131 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Factory to create (benchmark) MFG games with different settings.""" + +from typing import Optional + +from absl import logging + +from open_spiel.python.games import dynamic_routing_data +from open_spiel.python.mfg import games # pylint: disable=unused-import +from open_spiel.python.mfg.games import crowd_modelling_2d +from open_spiel.python.mfg.games import dynamic_routing +from open_spiel.python.mfg.games import predator_prey +import pyspiel + +# For each game, the setting with the game name, e.g. python_mfg_dynamic_routing +# for dynamic routing, denotes the default parameters. Variations are not +# prefixed by the exact game name so that they can be used with different +# implementations, e.g. Python or C++, of the same game. Empty parameters use +# the default values as specified in the game. +GAME_SETTINGS = { + # Crowd avoidance game. + "crowd_avoidance": {}, + # 2D crowd modelling game. + "crowd_modelling_2d_10x10": {}, + "crowd_modelling_2d_four_rooms": { + **crowd_modelling_2d.FOUR_ROOMS, + "only_distribution_reward": True, + }, + "crowd_modelling_2d_maze": { + **crowd_modelling_2d.MAZE, + "only_distribution_reward": True, + }, + # Dynamic routing game. + "dynamic_routing_braess": { + "max_num_time_step": 100, + "network": "braess", + "time_step_length": 0.05, + }, + "dynamic_routing_line": { + "max_num_time_step": 5, + "network": "line", + "time_step_length": 1.0, + }, + "dynamic_routing_sioux_falls_dummy_demand": { + "max_num_time_step": 81, + "network": "sioux_falls_dummy_demand", + "time_step_length": 0.5, + }, + "dynamic_routing_sioux_falls": { + "max_num_time_step": 81, + "network": "sioux_falls", + "time_step_length": 0.5, + }, + # Predator and prey game. + "predator_prey_5x5x3": { + **predator_prey.THREE_POPULATIONS, + }, + "predator_prey_5x5x4": { + **predator_prey.FOUR_POPULATIONS, + }, + # Linear-quadratic game. + "linear_quadratic": {}, + # Periodic aversion game. + "periodic_aversion": {}, +} + +# Default settings for the games. +GAME_SETTINGS.update({ + "python_mfg_crowd_avoidance": GAME_SETTINGS["crowd_avoidance"], + "mean_field_lin_quad": GAME_SETTINGS["linear_quadratic"], + "mfg_crowd_modelling_2d": GAME_SETTINGS["crowd_modelling_2d_10x10"], + "mfg_dynamic_routing": GAME_SETTINGS["dynamic_routing_line"], + "python_mfg_dynamic_routing": GAME_SETTINGS["dynamic_routing_line"], + "python_mfg_periodic_aversion": GAME_SETTINGS["periodic_aversion"], + "python_mfg_predator_prey": GAME_SETTINGS["predator_prey_5x5x3"], +}) + +DYNAMIC_ROUTING_NETWORK = { + "line": (dynamic_routing_data.LINE_NETWORK, + dynamic_routing_data.LINE_NETWORK_OD_DEMAND), + "braess": (dynamic_routing_data.BRAESS_NETWORK, + dynamic_routing_data.BRAESS_NETWORK_OD_DEMAND), + "sioux_falls_dummy_demand": + (dynamic_routing_data.SIOUX_FALLS_NETWORK, + dynamic_routing_data.SIOUX_FALLS_DUMMY_OD_DEMAND), + "sioux_falls": (dynamic_routing_data.SIOUX_FALLS_NETWORK, + dynamic_routing_data.SIOUX_FALLS_OD_DEMAND) +} + + +def create_game_with_setting(game_name: str, + setting: Optional[str] = None) -> pyspiel.Game: + """Creates an OpenSpiel game with the specified setting. + + Args: + game_name: Name of a registered game, e.g. mfg_crowd_modelling_2d. + setting: Name of the pre-defined setting. If None, game_name will be used + instead. The setting should be present in the GAME_SETTINGS map above. + + Returns: + a Game. + """ + setting = setting or game_name + params = GAME_SETTINGS.get(setting) + if params is None: + raise ValueError(f"{setting} setting does not exist for {game_name}.") + + logging.info("Creating %s game with parameters: %r", game_name, params) + + # Dynamic routing game requires setting the network and demand explicitly. + if game_name == "python_mfg_dynamic_routing": + # Create a copy since we modify it below removing the network key. + params = params.copy() + network = params.pop("network") + network, od_demand = DYNAMIC_ROUTING_NETWORK[network] + return dynamic_routing.MeanFieldRoutingGame( + params, network=network, od_demand=od_demand) + + return pyspiel.load_game(game_name, params) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/factory_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/factory_test.py new file mode 100644 index 0000000..109de30 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/factory_test.py @@ -0,0 +1,47 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for factory.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python.mfg.games import factory +import pyspiel + + +class FactoryTest(parameterized.TestCase): + + @parameterized.parameters( + ("mfg_crowd_modelling_2d", None), + ("mfg_crowd_modelling_2d", "crowd_modelling_2d_10x10"), + ("mfg_crowd_modelling_2d", "crowd_modelling_2d_four_rooms"), + ("mfg_dynamic_routing", None), + ("mfg_dynamic_routing", "dynamic_routing_line"), + ("mfg_dynamic_routing", "dynamic_routing_braess"), + ("python_mfg_dynamic_routing", None), + ("python_mfg_dynamic_routing", "dynamic_routing_line"), + ("python_mfg_dynamic_routing", "dynamic_routing_braess"), + ("python_mfg_dynamic_routing", + "dynamic_routing_sioux_falls_dummy_demand"), + ("python_mfg_dynamic_routing", "dynamic_routing_sioux_falls"), + ("python_mfg_periodic_aversion", None), + ("python_mfg_predator_prey", None), + ("python_mfg_predator_prey", "predator_prey_5x5x3")) + def test_smoke(self, game_name, setting): + game = factory.create_game_with_setting(game_name, setting) + self.assertIsInstance(game, pyspiel.Game) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/linear_quadratic.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/linear_quadratic.py new file mode 100644 index 0000000..a97939f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/linear_quadratic.py @@ -0,0 +1,420 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Mean Field Linear Quadratic, implemented in Python. + +This is a demonstration of implementing a mean field game in Python. + +Fictitious play for mean field games: Continuous time analysis and applications, +Perrin & al. 2019 (https://arxiv.org/abs/2007.03458). This game corresponds +to the game in section 4.1. +""" +import math +from typing import Any, List, Mapping + +import numpy as np +import scipy.stats + +from open_spiel.python import observation +import pyspiel + +_NUM_PLAYERS = 1 +_SIZE = 10 +_HORIZON = 10 +_MEAN_REVERT = 0.0 +_VOLATILITY = 1.0 +_CROSS_Q = 0.01 +_KAPPA = 0.5 +_TERMINAL_COST = 1.0 +_DELTA_T = 1.0 +_N_ACTIONS_PER_SIDE = 3 +_SPATIAL_BIAS = 0 + +_DEFAULT_PARAMS = { + "size": _SIZE, + "horizon": _HORIZON, + "dt": _DELTA_T, + "n_actions_per_side": _N_ACTIONS_PER_SIDE, + "volatility": _VOLATILITY, + "mean_revert": _MEAN_REVERT, + "cross_q": _CROSS_Q, + "kappa": _KAPPA, + "terminal_cost": _TERMINAL_COST, + "spatial_bias": _SPATIAL_BIAS, +} + +_GAME_TYPE = pyspiel.GameType( + short_name="mean_field_lin_quad", + long_name="Mean-Field Linear Quadratic Game", + dynamics=pyspiel.GameType.Dynamics.MEAN_FIELD, + chance_mode=pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC, + information=pyspiel.GameType.Information.PERFECT_INFORMATION, + utility=pyspiel.GameType.Utility.GENERAL_SUM, + reward_model=pyspiel.GameType.RewardModel.REWARDS, + max_num_players=_NUM_PLAYERS, + min_num_players=_NUM_PLAYERS, + provides_information_state_string=True, + provides_information_state_tensor=False, + provides_observation_string=True, + provides_observation_tensor=True, + parameter_specification=_DEFAULT_PARAMS, +) + + +class MFGLinearQuadraticGame(pyspiel.Game): + """A Mean-Field Linear Quadratic game. + + For now, only single-population setting is covered. A game starts by an + initial chance node that selects the initial state of the player in the MFG. + Then the game sequentially alternates between: + - An action selection node (where the player id is >= 0) + - A chance node (the player id is pyspiel.PlayerId.CHANCE) + - A Mean Field node (the player id is pyspiel.PlayerId.MEAN_FIELD) + """ + + # pylint:disable=dangerous-default-value + def __init__(self, params: Mapping[str, Any] = _DEFAULT_PARAMS): + self.size = params.get("size", _SIZE) + self.horizon = params.get("horizon", _HORIZON) + self.dt = params.get("dt", _DELTA_T) + self.n_actions_per_side = params.get( + "n_actions_per_side", _N_ACTIONS_PER_SIDE + ) + self.volatility = params.get("volatility", _VOLATILITY) + self.mean_revert = params.get("mean_revert", _MEAN_REVERT) + self.cross_q = params.get("cross_q", _CROSS_Q) + self.kappa = params.get("kappa", _KAPPA) + self.terminal_cost = params.get("terminal_cost", _TERMINAL_COST) + self.spatial_bias = params.get("spatial_bias", _SPATIAL_BIAS) + + game_info = pyspiel.GameInfo( + num_distinct_actions=2 * self.n_actions_per_side + 1, + max_chance_outcomes=2 * self.n_actions_per_side + 1, + num_players=_NUM_PLAYERS, + min_utility=-np.inf, + max_utility=+np.inf, + utility_sum=0.0, + max_game_length=self.horizon, + ) + super().__init__(_GAME_TYPE, game_info, params) + + def new_initial_state(self): + """Returns a state corresponding to the start of a game.""" + return MFGLinearQuadraticState(self) + + def make_py_observer(self, iig_obs_type=None, params=None): + """Returns an object used for observing game state.""" + if (iig_obs_type is None) or ( + iig_obs_type.public_info and not iig_obs_type.perfect_recall + ): + return Observer(params, self) + return observation.IIGObserverForPublicInfoGame(iig_obs_type, params) + + def max_chance_nodes_in_history(self): + """Maximun chance nodes in game history.""" + return self.horizon + 1 + + +class MFGLinearQuadraticState(pyspiel.State): + """A Mean Field Normal-Form state.""" + + def __init__(self, game): + """Constructor; should only be called by Game.new_initial_state.""" + super().__init__(game) + self._player_id = pyspiel.PlayerId.CHANCE + + self._last_action = game.n_actions_per_side + self.tick = 0 + self.x = None + self.return_value = 0.0 + + self.game = game + + self.size = game.size + self.horizon = game.horizon + self.dt = game.dt + self.n_actions_per_side = game.n_actions_per_side + self.volatility = game.volatility + self.mean_revert = game.mean_revert + self.cross_q = game.cross_q + self.kappa = game.kappa + self.terminal_cost = game.terminal_cost + + # Represents the current probability distribution over game states. + # Initialized with a uniform distribution. + self._distribution = [1.0 / self.size for _ in range(self.size)] + + def to_string(self): + return self.state_to_str(self.x, self.tick) + + def state_to_str(self, x, tick, player_id=pyspiel.PlayerId.DEFAULT_PLAYER_ID): + """A string that uniquely identifies a triplet x, t, player_id.""" + if self.x is None: + return "initial" + + if self._player_id == pyspiel.PlayerId.DEFAULT_PLAYER_ID: + return "({}, {})".format(x, tick) + elif self._player_id == pyspiel.PlayerId.MEAN_FIELD: + return "({}, {})_a".format(x, tick) + elif self._player_id == pyspiel.PlayerId.CHANCE: + return "({}, {})_a_mu".format(x, tick) + raise ValueError( + "player_id is not mean field, chance or default player id." + ) + + # OpenSpiel (PySpiel) API functions are below. This is the standard set that + # should be implemented by every perfect-information sequential-move game. + + @property + def n_actions(self): + return 2 * self.n_actions_per_side + 1 + + def _legal_actions(self, player): + """Returns a list of legal actions for player and MFG nodes.""" + if player == pyspiel.PlayerId.MEAN_FIELD: + return [] + if ( + player == pyspiel.PlayerId.DEFAULT_PLAYER_ID + and player == self.current_player() + ): + return list(range(self.n_actions)) + raise ValueError( + f"Unexpected player {player}. " + "Expected a mean field or current player 0." + ) + + def _apply_action(self, action): + """Applies the specified action to the state.""" + if self._player_id == pyspiel.PlayerId.MEAN_FIELD: + raise ValueError( + "_apply_action should not be called at a MEAN_FIELD state." + ) + self.return_value = self._rewards() + + assert ( + self._player_id == pyspiel.PlayerId.DEFAULT_PLAYER_ID + or self._player_id == pyspiel.PlayerId.CHANCE + ) + + if self.x is None: + self.x = action + self._player_id = pyspiel.PlayerId.DEFAULT_PLAYER_ID + return + + if action < 0 or action >= self.n_actions: + raise ValueError( + "The action is between 0 and {} at any node".format(self.n_actions) + ) + + move = self.action_to_move(action) + if self._player_id == pyspiel.PlayerId.CHANCE: + self.x += move * math.sqrt(self.dt) * self.volatility + self.x = round(self.x) % self.size + self._player_id = pyspiel.PlayerId.MEAN_FIELD + self.tick += 1 + elif self._player_id == pyspiel.PlayerId.DEFAULT_PLAYER_ID: + dist_mean = self.distribution_average() - self.x + full_move = move + full_move += self.mean_revert * dist_mean + full_move *= self.dt + self.x += round(full_move) + self.x = round(self.x) % self.size + + self._last_action = action + self._player_id = pyspiel.PlayerId.CHANCE + + def _action_to_string(self, player, action): + """Action -> string.""" + del player + return str(action) + + def action_to_move(self, action): + return action - self.n_actions_per_side + + def actions_to_position(self): + return [a - self.n_actions_per_side for a in range(self.n_actions)] + + def chance_outcomes(self): + """Returns the possible chance outcomes and their probabilities.""" + if self.x is None: + return list(enumerate(self._distribution)) + + a = np.array(self.actions_to_position()) + gaussian_vals = scipy.stats.norm.cdf( + a + 0.5, scale=self.volatility + ) - scipy.stats.norm.cdf(a - 0.5, scale=self.volatility) + gaussian_vals[0] += ( + scipy.stats.norm.cdf(a[0] - 0.5, scale=self.volatility) - 0.0 + ) + gaussian_vals[-1] += 1.0 - scipy.stats.norm.cdf( + a[-1] + 0.5, scale=self.volatility + ) + return [ + (act, p) for act, p in zip(list(range(self.n_actions)), gaussian_vals) + ] + + def distribution_support(self): + """return a list of state string.""" + return [ + self.state_to_str(i, self.tick, player_id=pyspiel.PlayerId.MEAN_FIELD) + for i in range(self.size) + ] + + def distribution_average(self): + """return the average of the distribution over the states: 0, ..., Size.""" + states = np.arange(self.size) + pos = states * (self._distribution) + return np.sum(pos) + + def update_distribution(self, distribution): + """This function is central and specific to the logic of the MFG. + + Args: + distribution: a distribution to register. - function should be called + when the node is in MEAN_FIELD state. - distribution are probabilities + that correspond to each game state given by distribution_support. + """ + if self._player_id != pyspiel.PlayerId.MEAN_FIELD: + raise ValueError( + "update_distribution should only be called at a MEAN_FIELD state." + ) + self._distribution = distribution.copy() + self._player_id = pyspiel.PlayerId.DEFAULT_PLAYER_ID + + @property + def t(self): + return self.tick * self.dt + + def is_terminal(self): + """Returns True if the game is over.""" + return self.t >= self.horizon + + def current_player(self): + """Returns id of the next player to move, or TERMINAL if game is over.""" + if self.is_terminal(): + return int(pyspiel.PlayerId.TERMINAL) + return int(self._player_id) + + def eta_t(self): + """Computes the theoretical policy's `eta_t` term.""" + # pylint: disable=invalid-name + kappa = self.kappa + K = self.mean_revert + q = self.cross_q + c = self.terminal_cost + T = self.horizon + t = self.t + + R = (K + q) ** 2 + (kappa - q**2) + deltap = -(K + q) + math.sqrt(R) + deltam = -(K + q) - math.sqrt(R) + numerator = -(kappa - q**2) * ( + math.exp((deltap - deltam) * (T - t)) - 1 + ) - c * (deltap * math.exp((deltap - deltam) * (T - t)) - deltam) + denominator = ( + deltam * math.exp((deltap - deltam) * (T - t)) - deltap + ) - c * (math.exp((deltap - deltam) * (T - t)) - 1) + return numerator / denominator + + def _rewards(self): + """Reward for the player for this state.""" + if self._player_id == pyspiel.PlayerId.DEFAULT_PLAYER_ID: + dist_mean = self.distribution_average() - self.x + + move = self.action_to_move(self._last_action) + action_reward = ( + self.dt + / 2 + * ( + -(move**2) + + 2 * self.cross_q * move * dist_mean + - self.kappa * dist_mean**2 + ) + ) + + if self.is_terminal(): + terminal_reward = -self.terminal_cost * dist_mean**2 / 2.0 + return action_reward + terminal_reward + return action_reward + + return 0.0 + + def rewards(self) -> List[float]: + """Rewards for all players.""" + # For now, only single-population mean field games are supported. + return [self._rewards()] + + def _returns(self): + """Returns is the sum of all payoffs collected so far.""" + return self._rewards() + + def returns(self) -> List[float]: + """Returns for all players.""" + # For now, only single-population mean field games are supported. + return [self._returns()] + + def __str__(self): + """A string that uniquely identify the current state.""" + return self.state_to_str( + x=self.x, tick=self.tick, player_id=self._player_id + ) + + +class Observer: + """Observer, conforming to the PyObserver interface (see observation.py).""" + + def __init__(self, params, game): + """Initializes an empty observation tensor.""" + del params + + self.size = game.size + self.horizon = game.horizon + self.tensor = np.zeros(2, np.float32) + self.dict = { + "x": self.tensor[0], + "t": self.tensor[1], + "observation": self.tensor, + } + + def set_from(self, state, player: int): + """Updates `tensor` and `dict` to reflect `state` from PoV of `player`.""" + del player + # We update the observation via the shaped tensor since indexing is more + # convenient than with the 1-D tensor. Both are views onto the same memory. + self.tensor[0] = state.x + self.tensor[1] = state.t + # state.x is None for the initial (blank) state, don't set any + # position bit in that case. + if state.x is not None: + if not 0 <= state.x < self.size: + raise ValueError( + f"Expected {state} x position to be in [0, {self.size})" + ) + self.dict["x"] = np.array([state.x]) + if not 0 <= state.t <= self.horizon: + raise ValueError(f"Expected {state} time to be in [0, {self.horizon}]") + self.dict["t"] = np.array([state.t]) + + def string_from(self, state, player): + """Observation of `state` from the PoV of `player`, as a string.""" + del player + return state.to_string() + + def plot_mean_field_flow(self, policy): + a = policy + return a + + +pyspiel.register_game(_GAME_TYPE, MFGLinearQuadraticGame) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/linear_quadratic_example.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/linear_quadratic_example.py new file mode 100644 index 0000000..8ee3810 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/linear_quadratic_example.py @@ -0,0 +1,139 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Generate a dummy trajectory and compute the distribution of a policy.""" +# pylint: disable=unused-import +from typing import Sequence + +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python import policy +from open_spiel.python.mfg import games +from open_spiel.python.mfg.algorithms import best_response_value +from open_spiel.python.mfg.algorithms import distribution +from open_spiel.python.mfg.algorithms import fictitious_play +from open_spiel.python.mfg.algorithms import greedy_policy +from open_spiel.python.mfg.algorithms import mirror_descent +from open_spiel.python.mfg.algorithms import nash_conv +from open_spiel.python.mfg.algorithms import policy_value +from open_spiel.python.mfg.games import linear_quadratic +import pyspiel + +FLAGS = flags.FLAGS + +flags.DEFINE_string('game', 'mean_field_lin_quad', 'Game to use.') +flags.DEFINE_integer('size', 10, 'Number of states.') +flags.DEFINE_integer('horizon', 5, 'Horizon size.') +flags.DEFINE_float('dt', 1.0, 'Delta t.') +flags.DEFINE_integer('n_actions_per_side', 3, + 'Number actions per side (Total num actions = 2*x+1).') +flags.DEFINE_float('volatility', 1.0, 'Action noise.') +flags.DEFINE_float('learning_rate', 0.01, 'OMD learning rate.') + + +def get_l1_distribution_dist(mu1, mu2): + mu1d = mu1.distribution + mu2d = mu2.distribution + states = set(list(mu1d.keys()) + list(mu2d.keys())) + return sum([abs(mu1d.get(a, 0.0) - mu2d.get(a, 0.0)) for a in states + ]) * FLAGS.dt / FLAGS.horizon + + +class LinearPolicy(policy.Policy): + """Project values on the policy simplex.""" + + def __init__(self, game, player_ids): # pylint:disable=useless-super-delegation + """Initializes the projected policy. + + Args: + game: The game to analyze. + player_ids: list of player ids for which this policy applies; each should + be in the range 0..game.num_players()-1. + """ + super(LinearPolicy, self).__init__(game, player_ids) + + def action_probabilities(self, state, player_id=None): + mu_bar_t = state.distribution_average() + x_t = state.x + q = state.cross_q + n_actions_per_side = state.n_actions_per_side + lin_action = (q + state.eta_t()) * (mu_bar_t - x_t) + action = n_actions_per_side + min( + n_actions_per_side, max(round(lin_action), -n_actions_per_side)) + action_prob = [(a, 0.0) for a in state.legal_actions()] + action_prob[action] = (action, 1.0) + return dict(action_prob) + + +def main(argv: Sequence[str]) -> None: + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + mfg_game = pyspiel.load_game( + FLAGS.game, { + 'dt': FLAGS.dt, + 'size': FLAGS.size, + 'horizon': FLAGS.horizon, + 'n_actions_per_side': FLAGS.n_actions_per_side, + 'volatility': FLAGS.volatility + }) + + uniform_policy = policy.UniformRandomPolicy(mfg_game) + nash_conv_fp = nash_conv.NashConv(mfg_game, uniform_policy) + print('Uniform Policy Nashconv:', nash_conv_fp.nash_conv()) + + # Optimal control in the continuous setting. + theoretical_control = LinearPolicy(mfg_game, + list(range(mfg_game.num_players()))) + theoretical_distribution = distribution.DistributionPolicy( + mfg_game, theoretical_control) + discretized_optimal_value = policy_value.PolicyValue( + mfg_game, theoretical_distribution, + theoretical_control).eval_state(mfg_game.new_initial_state()) + + th_expl = nash_conv.NashConv(mfg_game, theoretical_control).nash_conv() + print('Theoretical policy NashConv : {}'.format(th_expl)) + print('Theoretical policy Value : {}'.format(discretized_optimal_value)) + + fp = fictitious_play.FictitiousPlay(mfg_game) + md = mirror_descent.MirrorDescent(mfg_game) + for j in range(1000): + print('\n\nIteration', j, '\n') + fp.iteration() + fp_policy = fp.get_policy() + nash_conv_fp = nash_conv.NashConv(mfg_game, fp_policy) + print('Nashconv of the current FP policy', nash_conv_fp.nash_conv()) + fp_current_distribution = distribution.DistributionPolicy( + mfg_game, fp.get_policy()) + fp_l1_dist = get_l1_distribution_dist(fp_current_distribution, + theoretical_distribution) + print( + 'L1 distance between FP and theoretical policy : {}'.format(fp_l1_dist)) + md.iteration() + md_policy = md.get_policy() + nash_conv_md = nash_conv.NashConv(mfg_game, md_policy) + + print('') + + print('Nashconv of the current MD policy', nash_conv_md.nash_conv()) + md_current_distribution = md._distribution # pylint:disable=protected-access + md_l1_dist = get_l1_distribution_dist(md_current_distribution, + theoretical_distribution) + print('L1 distance between OMD and theoretical policy : {}'.format( + md_l1_dist)) + + +if __name__ == '__main__': + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/linear_quadratic_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/linear_quadratic_test.py new file mode 100644 index 0000000..bd69fb1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/linear_quadratic_test.py @@ -0,0 +1,98 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tests for Python Linear Quadratic game.""" + +from absl.testing import absltest +import numpy as np +from open_spiel.python.mfg.games import linear_quadratic +import pyspiel + +MFG_STR_CONST = "_a" + + +class MFGLinearQuadraticGameTest(absltest.TestCase): + + def test_load(self): + game = pyspiel.load_game("mean_field_lin_quad") + game.new_initial_state() + + def test_create(self): + """Checks we can create the game and clone states.""" + game = linear_quadratic.MFGLinearQuadraticGame() + self.assertEqual(game.size, linear_quadratic._SIZE) + self.assertEqual(game.horizon, linear_quadratic._HORIZON) + self.assertEqual(game.get_type().dynamics, + pyspiel.GameType.Dynamics.MEAN_FIELD) + print("Num distinct actions:", game.num_distinct_actions()) + state = game.new_initial_state() + clone = state.clone() + print("Initial state:", state) + print("Cloned initial state:", clone) + + def test_create_with_params(self): + game = pyspiel.load_game("mean_field_lin_quad(horizon=30,size=100)") + self.assertEqual(game.size, 100) + self.assertEqual(game.horizon, 30) + + def check_cloning(self, state): + cloned = state.clone() + self.assertEqual(str(cloned), str(state)) + self.assertEqual(cloned._distribution, state._distribution) + self.assertEqual(cloned._returns(), state._returns()) + self.assertEqual(cloned.current_player(), state.current_player()) + self.assertEqual(cloned.size, state.size) + self.assertEqual(cloned.horizon, state.horizon) + self.assertEqual(cloned._last_action, state._last_action) + + def test_random_game(self): + """Tests basic API functions.""" + np.random.seed(7) + horizon = 30 + size = 100 + game = linear_quadratic.MFGLinearQuadraticGame(params={ + "horizon": horizon, + "size": size + }) + state = game.new_initial_state() + t = 0 + while not state.is_terminal(): + if state.current_player() == pyspiel.PlayerId.CHANCE: + actions, probs = zip(*state.chance_outcomes()) + action = np.random.choice(actions, p=probs) + self.check_cloning(state) + self.assertEqual(len(state.legal_actions()), + len(state.chance_outcomes())) + state.apply_action(action) + elif state.current_player() == pyspiel.PlayerId.MEAN_FIELD: + self.assertEqual(state.legal_actions(), []) + self.check_cloning(state) + num_states = len(state.distribution_support()) + state.update_distribution([1 / num_states] * num_states) + else: + self.assertEqual(state.current_player(), 0) + self.check_cloning(state) + state.observation_string() + state.information_state_string() + legal_actions = state.legal_actions() + action = np.random.choice(legal_actions) + state.apply_action(action) + t += 1 + + self.assertEqual(t, horizon) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/normal_form_game.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/normal_form_game.py new file mode 100644 index 0000000..ccfae6b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/normal_form_game.py @@ -0,0 +1,295 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Mean Field Normal Form Games / Static Mean-Field Games.""" + +from typing import Any, List, Mapping + +import numpy as np + +from open_spiel.python.observation import IIGObserverForPublicInfoGame # pylint:disable=g-importing-member +import pyspiel + + +def coop_reward(last_action, distribution): + """A game incentivising cooperation.""" + nu_a, nu_b, nu_c, *_ = distribution + if last_action == 0: + return 10 * nu_a - 200 / 9 * (nu_a - nu_c) * nu_c - 20 * nu_b + elif last_action == 1: + return 20 * (nu_a - nu_b) - 2380 * nu_c + elif last_action == 2: + return 2000 / 9 * (nu_a - nu_c) * nu_c + else: + raise ValueError("Unknown last action " + str(last_action)) + + +def biased_indirect_rps(last_action, distribution): + """Biased indirect Rock Paper Scissors.""" + nu_a = 0.7 * distribution[0] + nu_b = 0.5 * distribution[1] + nu_c = 0.3 * distribution[2] + if last_action == 0: + return nu_b - nu_c + elif last_action == 1: + return nu_c - nu_a + elif last_action == 2: + return nu_a - nu_b + else: + raise ValueError("Unknown last action " + str(last_action)) + + +def dominated_reward_source(last_action, distribution): + nu_a, nu_b, nu_c, *_ = distribution + if last_action == 0: + return nu_a + nu_c + elif last_action == 1: + return nu_b + elif last_action == 2: + return nu_a + nu_c - 0.25 + else: + raise ValueError("Unknown last action " + str(last_action)) + + +_NUM_PLAYERS = 1 +_NUM_ACTIONS = 3 +_DEFAULT_PARAMS = {"num_actions": _NUM_ACTIONS, "reward_function": "coop"} +_GAME_TYPE = pyspiel.GameType( + short_name="mean_field_nfg", + long_name="Mean-Field Normal-Form Game", + dynamics=pyspiel.GameType.Dynamics.MEAN_FIELD, + chance_mode=pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC, + information=pyspiel.GameType.Information.PERFECT_INFORMATION, + utility=pyspiel.GameType.Utility.GENERAL_SUM, + reward_model=pyspiel.GameType.RewardModel.REWARDS, + max_num_players=_NUM_PLAYERS, + min_num_players=_NUM_PLAYERS, + provides_information_state_string=True, + provides_information_state_tensor=False, + provides_observation_string=True, + provides_observation_tensor=False, + parameter_specification=_DEFAULT_PARAMS, +) + + +class MFGNormalFormGame(pyspiel.Game): + """A Mean Field Normal Form game. + + A game starts by an initial chance node that select the initial state + of the MFG. + Then the game sequentially alternates between: + - An action selection node (Where the player Id >= 0) + - A chance node (the player id is pyspiel.PlayerId.CHANCE) + - A Mean Field node (the player id is pyspiel.PlayerId.MEAN_FIELD) + """ + + # pylint:disable=dangerous-default-value + def __init__(self, params: Mapping[str, Any] = _DEFAULT_PARAMS): + game_info = pyspiel.GameInfo( + num_distinct_actions=_NUM_ACTIONS, + max_chance_outcomes=_NUM_ACTIONS, + num_players=_NUM_PLAYERS, + min_utility=-np.inf, + max_utility=+np.inf, + utility_sum=0.0, + max_game_length=2, + ) + super().__init__(_GAME_TYPE, game_info, params) + if params["reward_function"] == "coop": + self.reward_function = coop_reward + elif params["reward_function"] == "dom": + self.reward_function = dominated_reward_source + elif params["reward_function"] == "biased_indirect_rps": + self.reward_function = biased_indirect_rps + else: + raise ValueError("Unknown reward function " + params["reward_function"]) + self.num_actions = params["num_actions"] + self.size = 1 + self.num_actions + + def new_initial_state(self): + """Returns a state corresponding to the start of a game.""" + return MFGNormalFormState(self) + + def make_py_observer(self, iig_obs_type=None, params=None): + """Returns an object used for observing game state.""" + if (iig_obs_type is None) or ( + iig_obs_type.public_info and not iig_obs_type.perfect_recall + ): + return Observer(params, self) + return IIGObserverForPublicInfoGame(iig_obs_type, params) + + def max_chance_nodes_in_history(self): + """Maximun chance nodes in game history.""" + return 0 + + +class MFGNormalFormState(pyspiel.State): + """A Mean Field Normal-Form state.""" + + def __init__(self, game, last_action=None): + """Constructor; should only be called by Game.new_initial_state.""" + super().__init__(game) + self._player_id = pyspiel.PlayerId.DEFAULT_PLAYER_ID + self._last_action = last_action + self._num_actions = game.num_actions + self.reward_function = game.reward_function + self.size = game.size + self._terminal = False + + # Represents the current probability distribution over game states. + # Initialized with a uniform distribution. + self._distribution = [1.0 / self.size for _ in range(self.size)] + + def state_to_str(self, player_id=pyspiel.PlayerId.DEFAULT_PLAYER_ID): + """A string that uniquely identify a triplet x, t, player_id.""" + if self._last_action is None: + return "initial" + else: + bonus = "_final" if self.is_terminal() else "" + return str(self._last_action) + bonus + + # OpenSpiel (PySpiel) API functions are below. This is the standard set that + # should be implemented by every perfect-information sequential-move game. + + def _legal_actions(self, player): + """Returns a list of legal actions for player and MFG nodes.""" + if player == pyspiel.PlayerId.MEAN_FIELD: + return [] + if ( + player == pyspiel.PlayerId.DEFAULT_PLAYER_ID + and player == self.current_player() + ): + return list(range(self._num_actions)) + raise ValueError( + f"Unexpected player {player}. " + "Expected a mean field or current player 0." + ) + + def _apply_action(self, action): + """Applies the specified action to the state.""" + if self._player_id == pyspiel.PlayerId.MEAN_FIELD: + raise ValueError( + "_apply_action should not be called at a MEAN_FIELD state." + ) + self.return_value = self._rewards() + + assert self._player_id == 0 + # Here the action is between 0 and N-1 + if action < 0 or action > self._num_actions - 1: + raise ValueError( + "The action is between 0 and {} at any node".format( + self._num_actions - 1 + ) + ) + self._last_action = action + self._player_id = pyspiel.PlayerId.MEAN_FIELD + + def _action_to_string(self, player, action): + """Action -> string.""" + del player + return str(action) + + def distribution_support(self): + """return a list of state string.""" + if self._player_id == pyspiel.PlayerId.DEFAULT_PLAYER_ID: + return [self.state_to_str()] + elif self._player_id == pyspiel.PlayerId.MEAN_FIELD: + return [str(i) for i in range(self._num_actions)] + + def update_distribution(self, distribution): + """This function is central and specific to the logic of the MFG. + + Args: + distribution: a distribution to register. - function should be called + when the node is in MEAN_FIELD state. - distribution are probabilities + that correspond to each game state given by distribution_support. + """ + if self._player_id != pyspiel.PlayerId.MEAN_FIELD: + raise ValueError( + "update_distribution should only be called at a MEAN_FIELD state." + ) + self._distribution = distribution.copy() + self._player_id = pyspiel.PlayerId.TERMINAL + + def is_terminal(self): + """Returns True if the game is over.""" + return self._player_id == pyspiel.PlayerId.TERMINAL + + def current_player(self): + """Returns id of the next player to move, or TERMINAL if game is over.""" + if self.is_terminal(): + return pyspiel.PlayerId.TERMINAL + return self._player_id + + def _rewards(self): + """Reward for the player for this state.""" + reward = 0.0 + if self._player_id == pyspiel.PlayerId.TERMINAL: + reward = self.reward_function(self._last_action, self._distribution) + return reward + + def rewards(self) -> List[float]: + """Rewards for all players.""" + # For now, only single-population (single-player) mean field games + # are supported. + return [self._rewards()] + + def _returns(self): + """Returns is the sum of all payoffs collected so far.""" + return self._rewards() + + def returns(self) -> List[float]: + """Returns for all players.""" + # For now, only single-population (single-player) mean field games + # are supported. + return [self._returns()] + + def __str__(self): + """A string that uniquely identify the current state.""" + return self.state_to_str(player_id=self._player_id) + + +class Observer: + """Observer, conforming to the PyObserver interface (see observation.py).""" + + def __init__(self, params, game): + """Initializes an empty observation tensor.""" + del params + + self.size = game.size + # +1 to allow t == horizon. + self.tensor = np.array([]) + self.dict = {} + + def set_from(self, state: MFGNormalFormState, player: int): + """Updates `tensor` and `dict` to reflect `state` from PoV of `player`.""" + del player + # We update the observation via the shaped tensor since indexing is more + # convenient than with the 1-D tensor. Both are views onto the same memory. + del state + self.tensor.fill(0) + # state.x is None for the initial (blank) state, don't set any + # position bit in that case. + pass + + def string_from(self, state, player): + """Observation of `state` from the PoV of `player`, as a string.""" + del player + return str(state) + + +# Register the game with the OpenSpiel library + +pyspiel.register_game(_GAME_TYPE, MFGNormalFormGame) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/normal_form_game_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/normal_form_game_test.py new file mode 100644 index 0000000..b1a7e9f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/normal_form_game_test.py @@ -0,0 +1,68 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tests for Python Crowd Modelling game.""" + +from absl.testing import absltest +from open_spiel.python.mfg.games import normal_form_game +import pyspiel + +MFG_STR_CONST = "_a" + + +class MFGNormalFormGameTest(absltest.TestCase): + + def test_load(self): + game = pyspiel.load_game("mean_field_nfg") + game.new_initial_state() + + def test_create(self): + """Checks we can create the game and clone states.""" + game = normal_form_game.MFGNormalFormGame() + self.assertEqual( + game.get_type().dynamics, pyspiel.GameType.Dynamics.MEAN_FIELD + ) + print("Num distinct actions:", game.num_distinct_actions()) + state = game.new_initial_state() + clone = state.clone() + print("Initial state:", state) + print("Cloned initial state:", clone) + + def test_create_with_params(self): + game = pyspiel.load_game("mean_field_nfg(num_actions=10)") + self.assertEqual(game.num_actions, 10) + + def test_reward(self): + game = normal_form_game.MFGNormalFormGame() + state = game.new_initial_state() + self.assertEqual(state.current_player(), pyspiel.PlayerId.DEFAULT_PLAYER_ID) + + state.apply_action(0) + self.assertEqual(state.current_player(), pyspiel.PlayerId.MEAN_FIELD) + state.update_distribution([1.0, 0.0, 0.0]) + self.assertAlmostEqual(state.rewards()[0], 10.0) + self.assertAlmostEqual(state.returns()[0], 10.0) + + state = game.new_initial_state() + state.apply_action(0) + state.update_distribution([0.0, 1.0, 0.0]) + self.assertAlmostEqual(state.rewards()[0], -20.0) + self.assertAlmostEqual(state.returns()[0], -20.0) + + self.assertTrue(state.is_terminal()) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/periodic_aversion.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/periodic_aversion.py new file mode 100644 index 0000000..2c2c7dd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/periodic_aversion.py @@ -0,0 +1,415 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Mean Field Game on periodic domain with aversion cost. + +This is a demonstration of implementing a mean field game in Python. The model +is an approximation of a continuous space, continuous time model introduced +to study ergodic MFG with explicit solution in: +Almulla, N.; Ferreira, R.; and Gomes, D. 2017. +Two numerical approaches to stationary mean-field games. Dyn. Games Appl. +7(4):657-682. + +See also: +Elie, R., Perolat, J., Laurière, M., Geist, M., & Pietquin, O. (2020, April). +On the convergence of model free learning in mean field games. +In Proceedings of the AAAI Conference on Artificial Intelligence +(Vol. 34, No. 05, pp. 7143-7150). +""" + +import functools +import math +from typing import Any, List, Mapping + +import numpy as np +import scipy.stats + +from open_spiel.python import observation +import pyspiel + +_NUM_PLAYERS = 1 +_SIZE = 21 +_HORIZON = 20 +_VOLATILITY = 1.0 +_COEF_AVERSION = 1.0 +_DELTA_T = 0.01 +_X_MIN = 0.0 +_X_MAX = 1.0 +_N_ACTIONS_PER_SIDE = 10 + +_DEFAULT_PARAMS = { + "size": _SIZE, + "horizon": _HORIZON, + "dt": _DELTA_T, + "xmin": _X_MIN, + "xmax": _X_MAX, + "n_actions_per_side": _N_ACTIONS_PER_SIDE, + "volatility": _VOLATILITY, + "coef_aversion": _COEF_AVERSION, +} + +_GAME_TYPE = pyspiel.GameType( + short_name="python_mfg_periodic_aversion", + long_name="Mean-Field Periodic Aversion Game", + dynamics=pyspiel.GameType.Dynamics.MEAN_FIELD, + chance_mode=pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC, + information=pyspiel.GameType.Information.PERFECT_INFORMATION, + utility=pyspiel.GameType.Utility.GENERAL_SUM, + reward_model=pyspiel.GameType.RewardModel.REWARDS, + max_num_players=_NUM_PLAYERS, + min_num_players=_NUM_PLAYERS, + provides_information_state_string=False, + provides_information_state_tensor=False, + provides_observation_string=True, + provides_observation_tensor=True, + parameter_specification=_DEFAULT_PARAMS, +) + + +@functools.lru_cache(maxsize=None) +def _state_to_str(x, t, player_id): + """A string that uniquely identifies (x, t, player_id).""" + if int(player_id) == pyspiel.PlayerId.DEFAULT_PLAYER_ID: + return f"(t={t}, pos={x})" + if player_id == pyspiel.PlayerId.MEAN_FIELD: + return f"(t={t}_a, pos={x})" + if player_id == pyspiel.PlayerId.CHANCE: + return f"(t={t}_a_mu, pos={x})" + + +class MFGPeriodicAversionGame(pyspiel.Game): + """A Mean-Field Game on periodic domain with crowd aversion cost. + + A game starts by an initial chance node that select the initial state + of the player in the MFG. + Then the game sequentially alternates between: + - An action selection node (where the player id is >= 0) + - A chance node (the player id is pyspiel.PlayerId.CHANCE) + - A Mean Field node (the player id is pyspiel.PlayerId.MEAN_FIELD) + """ + + # pylint:disable=dangerous-default-value + def __init__(self, params: Mapping[str, Any] = _DEFAULT_PARAMS): + self.size = params.get("size", _SIZE) # number of states + self.horizon = params.get("horizon", _HORIZON) # number of time steps + self.dt = params.get("dt", _DELTA_T) # size of one step in time + self.xmin = params.get("xmin", _X_MIN) # smallest position + self.xmax = params.get("xmax", _X_MAX) # largest position + self.dx = (self.xmax - self.xmin) / ( + self.size - 1 + ) # size of one step in space + self.n_actions_per_side = params.get( + "n_actions_per_side", _N_ACTIONS_PER_SIDE + ) # number of actions on each side, for both players and noise + self.volatility = params.get("volatility", _VOLATILITY) + self.coef_aversion = params.get("coef_aversion", _COEF_AVERSION) + + game_info = pyspiel.GameInfo( + num_distinct_actions=2 * self.n_actions_per_side + 1, + max_chance_outcomes=2 * self.n_actions_per_side + 1, + num_players=_NUM_PLAYERS, + min_utility=-np.inf, + max_utility=+np.inf, + utility_sum=0.0, + max_game_length=self.horizon, + ) + super().__init__(_GAME_TYPE, game_info, params) + + def new_initial_state(self): + """Returns a state corresponding to the start of a game.""" + return MFGPeriodicAversionState(self) + + def make_py_observer(self, iig_obs_type=None, params=None): + """Returns an object used for observing game state.""" + if (iig_obs_type is None) or ( + iig_obs_type.public_info and not iig_obs_type.perfect_recall + ): + return Observer(params, self) + return observation.IIGObserverForPublicInfoGame(iig_obs_type, params) + + def max_chance_nodes_in_history(self): + """Maximun chance nodes in game history.""" + return self.horizon + 1 + + +class MFGPeriodicAversionState(pyspiel.State): + """A Mean Field Normal-Form state. + + In this class, x and action are integers. They are converted, when needed, to + spatial variables by using a scaling factor representing the size of a step in + space and by shifting them depending on the minimal allowed value. + """ + + def __init__(self, game): + """Constructor; should only be called by Game.new_initial_state.""" + super().__init__(game) + # Initial state where the initial position is chosen according to + # an initial distribution. + self._player_id = pyspiel.PlayerId.CHANCE + + self._last_action = game.n_actions_per_side # neutral action + self.tick = 0 + self.x = None + self.return_value = 0.0 + + self.game = game + + self.size = game.size + self.horizon = game.horizon + self.dt = game.dt + self.xmin = game.xmin + self.xmax = game.xmax + self.dx = game.dx + self.da = game.dx + self.n_actions_per_side = game.n_actions_per_side + self.volatility = game.volatility + self.coef_aversion = game.coef_aversion + + # Represents the current probability distribution over game states. + # Initialized with a uniform distribution. + self._distribution = [1.0 / self.size for _ in range(self.size)] + + def to_string(self): + return self.state_to_str(self.x, self.tick) + + def state_to_str(self, x, tick, player_id=pyspiel.PlayerId.DEFAULT_PLAYER_ID): + """A string that uniquely identify a triplet x, t, player_id.""" + if self.x is None: + return "initial" + if self._player_id == pyspiel.PlayerId.DEFAULT_PLAYER_ID: + return "({}, {})".format(x, tick) + elif self._player_id == pyspiel.PlayerId.MEAN_FIELD: + return "({}, {})_a".format(x, tick) + elif self._player_id == pyspiel.PlayerId.CHANCE: + return "({}, {})_a_mu".format(x, tick) + raise ValueError( + "player_id is not mean field, chance or default player id." + ) + + # OpenSpiel (PySpiel) API functions are below. This is the standard set that + # should be implemented by every perfect-information sequential-move game. + + @property + def n_actions(self): + return 2 * self.n_actions_per_side + 1 + + def _legal_actions(self, player): + """Returns a list of legal actions for player and MFG nodes.""" + if player == pyspiel.PlayerId.MEAN_FIELD: + return [] + if ( + player == pyspiel.PlayerId.DEFAULT_PLAYER_ID + and player == self.current_player() + ): + return list(range(self.n_actions)) + raise ValueError( + f"Unexpected player {player}. " + "Expected a mean field or current player 0." + ) + + def _apply_action(self, action): + """Applies the specified action to the state.""" + if self._player_id == pyspiel.PlayerId.MEAN_FIELD: + raise ValueError( + "_apply_action should not be called at a MEAN_FIELD state." + ) + self.return_value = self._rewards() + + assert ( + self._player_id == pyspiel.PlayerId.DEFAULT_PLAYER_ID + or self._player_id == pyspiel.PlayerId.CHANCE + ) + + if self.x is None: + self.x = action + self._player_id = pyspiel.PlayerId.DEFAULT_PLAYER_ID + return + + if action < 0 or action >= self.n_actions: + raise ValueError( + "The action is between 0 and {} at any node".format(self.n_actions) + ) + + self.x = (self.x + action - self.n_actions_per_side) % self.size + if self._player_id == pyspiel.PlayerId.CHANCE: + self._player_id = pyspiel.PlayerId.MEAN_FIELD + self.tick += 1 + elif self._player_id == pyspiel.PlayerId.DEFAULT_PLAYER_ID: + self._last_action = action + self._player_id = pyspiel.PlayerId.CHANCE + + def _action_to_string(self, player, action): + """Action -> string.""" + del player + return str(action - self.n_actions_per_side) + + def action_to_move(self, action): + return (action - self.n_actions_per_side) * self.da + + def state_to_position(self, state): + return state * self.dx + self.xmin + + def position_to_state(self, position): + return round((position - self.xmin) / self.dx) + + def chance_outcomes(self): + """Returns the possible chance outcomes and their probabilities.""" + if self.x is None: + # Initial distribution + return list(enumerate(self._distribution)) + actions = np.array( + [(a - self.n_actions_per_side) * self.da for a in range(self.n_actions)] + ) + stddev = self.volatility * math.sqrt(self.dt) + probas = scipy.stats.norm.pdf(actions, scale=stddev) + probas /= np.sum(probas) + return [(act, p) for act, p in zip(list(range(self.n_actions)), probas)] + + def distribution_support(self): + """return a list of state string.""" + return [ + self.state_to_str(i, self.tick, player_id=pyspiel.PlayerId.MEAN_FIELD) + for i in range(self.size) + ] + + def get_state_proba(self, state: int) -> float: + """Gets the probability of a position in the current distrib. + + Args: + state: state requested. + + Returns: + The probability for the provided position. + """ + assert state >= 0, state + assert state < self.size, state + # This logic needs to match the ordering defined in distribution_support(). + index = state + assert 0 <= index < len(self._distribution), ( + f"Invalid index {index} vs dist length:" + f" {len(self._distribution)}, state={state}," + f" state={self}" + ) + return self._distribution[index] + + def update_distribution(self, distribution): + """This function is central and specific to the logic of the MFG. + + Args: + distribution: a distribution to register. - function should be called + when the node is in MEAN_FIELD state. - distribution are probabilities + that correspond to each game state given by distribution_support. + """ + if self._player_id != pyspiel.PlayerId.MEAN_FIELD: + raise ValueError( + "update_distribution should only be called at a MEAN_FIELD state." + ) + self._distribution = distribution.copy() + self._player_id = pyspiel.PlayerId.DEFAULT_PLAYER_ID + + @property + def t(self): + return self.tick + + def is_terminal(self): + """Returns True if the game is over.""" + return self.t >= self.horizon + + def current_player(self): + """Returns id of the next player to move, or TERMINAL if game is over.""" + if self.is_terminal(): + return pyspiel.PlayerId.TERMINAL + return self._player_id + + def _rewards(self): + """Reward for the player for this state.""" + if self._player_id != pyspiel.PlayerId.DEFAULT_PLAYER_ID: + return 0.0 + assert self.x is not None + velocity = self.action_to_move(self._last_action) / self.dt + action_r = -0.5 * velocity**2 + eps = 1e-15 + mu_x = self.get_state_proba(self.x) / self.dx # represents the density + # The density should have an integral equal to 1; here sum_x mu_x * dx = 1 + aversion_r = -np.log(mu_x + eps) + pos = self.state_to_position(self.x) + pix2 = 2 * np.pi * pos + geom_r = ( + self.volatility * 2 * np.pi**2 * np.sin(pix2) + - 2 * np.pi**2 * np.cos(pix2) ** 2 + + (2 / self.volatility**2) * np.sin(pix2) + ) + return (action_r + self.coef_aversion * aversion_r + geom_r) * self.dt + + def rewards(self) -> List[float]: + """Rewards for all players.""" + # For now, only single-population (single-player) mean field games + # are supported. + return [self._rewards()] + + def _returns(self): + """Returns is the sum of all payoffs collected so far.""" + return self.return_value + self._rewards() + + def returns(self) -> List[float]: + """Returns for all players.""" + # For now, only single-population (single-player) mean field games + # are supported. + return [self._returns()] + + def __str__(self): + """A string that uniquely identify the current state.""" + return self.state_to_str( + x=self.x, tick=self.tick, player_id=self._player_id + ) + + +class Observer: + """Observer, conforming to the PyObserver interface (see observation.py).""" + + def __init__(self, params, game): + """Initializes an empty observation tensor.""" + del params + + self.size = game.size + self.horizon = game.horizon + # +1 to allow t == horizon. + self.tensor = np.zeros(self.size + self.horizon + 1, np.float32) + self.dict = {"x": self.tensor[: self.size], "t": self.tensor[self.size :]} + + def set_from(self, state, player: int): + """Updates `tensor` and `dict` to reflect `state` from PoV of `player`.""" + del player + # We update the observation via the shaped tensor since indexing is more + # convenient than with the 1-D tensor. Both are views onto the same memory. + self.tensor.fill(0) + # state.x is None for the initial (blank) state, don't set any + # position bit in that case. + if state.x is not None: + if state.x < 0 or state.x > self.size: + raise ValueError( + f"Expected {state} positions to be in [0, {self.size})" + ) + self.dict["x"][state.x] = 1 + if not 0 <= state.tick <= self.horizon: + raise ValueError(f"Expected {state} time to be in [0, {self.horizon}]") + self.dict["t"][state.tick] = 1 + + def string_from(self, state, player): + """Observation of `state` from the PoV of `player`, as a string.""" + del player + return state.to_string() + + +pyspiel.register_game(_GAME_TYPE, MFGPeriodicAversionGame) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/periodic_aversion_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/periodic_aversion_test.py new file mode 100644 index 0000000..238e03b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/periodic_aversion_test.py @@ -0,0 +1,98 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tests for Python Periodic Aversion game.""" + +from absl.testing import absltest +import numpy as np +from open_spiel.python.mfg.games import periodic_aversion +import pyspiel + +MFG_STR_CONST = "_a" + + +class MFGPeriodicAversionTest(absltest.TestCase): + + def test_load(self): + game = pyspiel.load_game("python_mfg_periodic_aversion") + game.new_initial_state() + + def test_create(self): + """Checks we can create the game and clone states.""" + game = periodic_aversion.MFGPeriodicAversionGame() + self.assertEqual(game.size, periodic_aversion._SIZE) + self.assertEqual(game.horizon, periodic_aversion._HORIZON) + self.assertEqual(game.get_type().dynamics, + pyspiel.GameType.Dynamics.MEAN_FIELD) + print("Num distinct actions:", game.num_distinct_actions()) + state = game.new_initial_state() + clone = state.clone() + print("Initial state:", state) + print("Cloned initial state:", clone) + + def test_create_with_params(self): + game = pyspiel.load_game("python_mfg_periodic_aversion(horizon=30,size=41)") + self.assertEqual(game.size, 41) + self.assertEqual(game.horizon, 30) + + def check_cloning(self, state): + cloned = state.clone() + self.assertEqual(str(cloned), str(state)) + self.assertEqual(cloned._distribution, state._distribution) + self.assertEqual(cloned._returns(), state._returns()) + self.assertEqual(cloned.current_player(), state.current_player()) + self.assertEqual(cloned.size, state.size) + self.assertEqual(cloned.horizon, state.horizon) + self.assertEqual(cloned._last_action, state._last_action) + + def test_random_game(self): + """Tests basic API functions.""" + np.random.seed(7) + horizon = 30 + size = 41 + game = periodic_aversion.MFGPeriodicAversionGame(params={ + "horizon": horizon, + "size": size + }) + state = game.new_initial_state() + t = 0 + while not state.is_terminal(): + if state.current_player() == pyspiel.PlayerId.CHANCE: + actions, probs = zip(*state.chance_outcomes()) + action = np.random.choice(actions, p=probs) + self.check_cloning(state) + self.assertEqual(len(state.legal_actions()), + len(state.chance_outcomes())) + state.apply_action(action) + elif state.current_player() == pyspiel.PlayerId.MEAN_FIELD: + self.assertEqual(state.legal_actions(), []) + self.check_cloning(state) + num_states = len(state.distribution_support()) + state.update_distribution([1 / num_states] * num_states) + else: + self.assertEqual(state.current_player(), 0) + self.check_cloning(state) + state.observation_string() + state.information_state_string() + legal_actions = state.legal_actions() + action = np.random.choice(legal_actions) + state.apply_action(action) + t += 1 + + self.assertEqual(t, horizon) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/predator_prey.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/predator_prey.py new file mode 100644 index 0000000..81527da --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/predator_prey.py @@ -0,0 +1,589 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Mean Field predator-prey game, implemented in Python. + +This corresponds to the predator-prey game described in section 5.4 of +"Scaling up Mean Field Games with Online Mirror Descent" +(https://arxiv.org/abs/2103.00623) + +The environment is configurable in the following high-level ways: +- Number of populations. +- Reward matrix. +- Initial distribution. +- Geometry (torus, basic square). +""" + +import enum +import functools +from typing import Any, List, Mapping, Optional, Tuple + +import numpy as np + +from open_spiel.python import observation +import pyspiel +from open_spiel.python.utils import shared_value + + +class Geometry(enum.IntEnum): + SQUARE = 0 + TORUS = 1 + + +_DEFAULT_SIZE = 5 +_NUM_ACTIONS = 5 +_NUM_CHANCE = 5 +DEFAULT_REWARD_MATRIX_THREE_POPULATIONS = np.array( + # The first population is attracted to the second and tries to avoid the + # third one. + [[0, -1, 1], [1, 0, -1], [-1, 1, 0]] +) +DEFAULT_REWARD_MATRIX_FOUR_POPULATIONS = np.array( + # The first population is attracted to the second and tries to avoid the + # third one, and so on. + [[0, 1, 0, -1], [-1, 0, 1, 0], [0, -1, 0, 1], [1, 0, -1, 0]] +) +# Each population starts in a corner. +DEFAULT_INIT_DISTRIB_THREE_POPULATIONS = np.array([ + # First population + [1.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + # Second population + [0.0, 0.0, 0.0, 0.0, 1.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + # Third population + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0, 0.0], +]) +DEFAULT_INIT_DISTRIB_FOUR_POPULATIONS = np.array([ + # First population + [1.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + # Second population + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0, 0.0], + # Third population + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 1.0], + # Fourth population + [0.0, 0.0, 0.0, 0.0, 1.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], +]) + +_DEFAULT_GEOMETRY = Geometry.SQUARE +_DEFAULT_NOISE_PROBABILITY = 0.8 +_DEFAULT_CONGESTION_COEFF = 1.0 + +THREE_POPULATIONS = { + "size": _DEFAULT_SIZE, + "horizon": 10, + "players": 3, + # The reward matrix is represented as a string containing a + # space-separated list of values. + # Its size defines the number of populations in the mean field game. + "reward_matrix": " ".join( + str(v) for v in DEFAULT_REWARD_MATRIX_THREE_POPULATIONS.flatten() + ), + "geometry": _DEFAULT_GEOMETRY, + "init_distrib": " ".join( + str(v) for v in DEFAULT_INIT_DISTRIB_THREE_POPULATIONS.flatten() + ), + # Probability that the transition is affected by noise + "noise_probability": _DEFAULT_NOISE_PROBABILITY, + # Weight of congestion term in the reward + "congestion_coeff": _DEFAULT_CONGESTION_COEFF, +} + +FOUR_POPULATIONS = { + "size": _DEFAULT_SIZE, + "horizon": 20, + "players": 4, + # The reward matrix is represented as a string containing a + # space-separated list of values. + # Its size defines the number of populations in the mean field game. + "reward_matrix": " ".join( + str(v) for v in DEFAULT_REWARD_MATRIX_FOUR_POPULATIONS.flatten() + ), + "geometry": _DEFAULT_GEOMETRY, + "init_distrib": " ".join( + str(v) for v in DEFAULT_INIT_DISTRIB_FOUR_POPULATIONS.flatten() + ), + # Probability that the transition is affected by noise + "noise_probability": _DEFAULT_NOISE_PROBABILITY, + # Weight of congestion term in the reward + "congestion_coeff": _DEFAULT_CONGESTION_COEFF, +} + + +_DEFAULT_PARAMS = THREE_POPULATIONS + +_GAME_TYPE = pyspiel.GameType( + short_name="python_mfg_predator_prey", + long_name="Python Mean Field Predator Prey", + dynamics=pyspiel.GameType.Dynamics.MEAN_FIELD, + chance_mode=pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC, + information=pyspiel.GameType.Information.PERFECT_INFORMATION, + utility=pyspiel.GameType.Utility.GENERAL_SUM, + reward_model=pyspiel.GameType.RewardModel.REWARDS, + # We cannot pass math.inf here, so we pass a very high integer value. + max_num_players=1000000000, + min_num_players=1, + provides_information_state_string=True, + provides_information_state_tensor=False, + provides_observation_string=True, + provides_observation_tensor=True, + parameter_specification=_DEFAULT_PARAMS, +) + + +def get_param(param_name, params): + return params.get(param_name, _DEFAULT_PARAMS[param_name]) + + +@functools.lru_cache(maxsize=None) +def _state_to_str(x, y, t, population, player_id): + """A string that uniquely identify (pos, t, population, player_id).""" + if int(player_id) >= 0: + return f"(pop={population}, t={t}, pos=[{x} {y}])" + if player_id == pyspiel.PlayerId.MEAN_FIELD: + return f"(pop={population}, t={t}_a, pos=[{x} {y}])" + if player_id == pyspiel.PlayerId.CHANCE: + return f"(pop={population}, t={t}_a_mu, pos=[{x} {y}])" + + +class MFGPredatorPreyGame(pyspiel.Game): + """Predator-prey multi-population MFG.""" + + # pylint:disable=dangerous-default-value + def __init__(self, params: Mapping[str, Any] = _DEFAULT_PARAMS): + self.size = get_param("size", params) + self.horizon = get_param("horizon", params) + flat_reward_matrix = np.fromstring( + get_param("reward_matrix", params), dtype=np.float64, sep=" " + ) + num_players = get_param("players", params) + if len(flat_reward_matrix) != num_players**2: + raise ValueError( + "Reward matrix passed in flat representation does not represent a " + f"square matrix: {flat_reward_matrix}" + f" with number of players: {num_players}" + ) + self.reward_matrix = flat_reward_matrix.reshape([num_players, num_players]) + self.geometry = get_param("geometry", params) + num_states = self.size**2 + game_info = pyspiel.GameInfo( + num_distinct_actions=_NUM_ACTIONS, + max_chance_outcomes=max(num_states, _NUM_CHANCE), + num_players=num_players, + min_utility=-np.inf, + max_utility=+np.inf, + utility_sum=None, + max_game_length=self.horizon, + ) + + self.noise_probability = get_param("noise_probability", params) + self.congestion_coeff = get_param("congestion_coeff", params) + # Represents the current probability distribution over game states + # (when grouped for each population). + str_init_distrib = get_param("init_distrib", params) + if str_init_distrib: + flat_init_distrib = np.fromstring( + str_init_distrib, dtype=np.float64, sep=" " + ) + if len(flat_init_distrib) != num_players * self.size**2: + raise ValueError( + "Initial distribution matrix passed in flat representation does" + f" not represent a sequence of square matrices: {flat_init_distrib}" + f" with number of players: {num_players}" + f" and size: {self.size}" + ) + self.initial_distribution = flat_init_distrib + else: + # Initialized with a uniform distribution. + self.initial_distribution = [1.0 / num_states] * ( + num_states * num_players + ) + super().__init__(_GAME_TYPE, game_info, params) + + def new_initial_state(self): + """Returns a new population-less blank state. + + This state is provided for some internal operations that use blank + states (e.g. cloning), but cannot be used to play the game, i.e. + ApplyAction() will fail. Proper playable states should be + instantiated with new_initial_state_for_population(). + """ + return MFGPredatorPreyState(self) + + def max_chance_nodes_in_history(self): + """Maximun chance nodes in game history.""" + return self.horizon + 1 + + def new_initial_state_for_population(self, population): + """State corresponding to the start of a game for a given population.""" + return MFGPredatorPreyState(self, population) + + def make_py_observer(self, iig_obs_type=None, params=None): + """Returns an object used for observing game state.""" + if (iig_obs_type is None) or ( + iig_obs_type.public_info and not iig_obs_type.perfect_recall + ): + return Observer(params, self) + return observation.IIGObserverForPublicInfoGame(iig_obs_type, params) + + +def pos_to_merged(pos: np.ndarray, size: int) -> int: + """Converts a [x, y] position into a single integer.""" + assert (pos >= 0).all(), pos + assert (pos < size).all(), pos + return pos[0] + pos[1] * size + + +def merged_to_pos(merged_pos: int, size: int) -> np.ndarray: + """Inverse of pos_to_merged().""" + assert 0 <= merged_pos < size * size + return np.array([merged_pos % size, merged_pos // size]) + + +class MFGPredatorPreyState(pyspiel.State): + """State for the predator-prey MFG.""" + + # Maps legal actions to the corresponding move on the grid of the game. + _ACTION_TO_MOVE = { + 0: np.array([0, 0]), + 1: np.array([1, 0]), + 2: np.array([0, 1]), + 3: np.array([0, -1]), + 4: np.array([-1, 0]), + } + # Action that corresponds to no displacement. + _NEUTRAL_ACTION = 0 + + def __init__(self, game, population=None): + """Constructor; should only be called by Game.new_initial_state.*. + + Args: + game: MFGPredatorPreyGame for which a state should be created. + population: ID of the population to create this state for. Must be in [0, + num_players()) or None. States with population=None cannot be used to + perform game actions. + """ + super().__init__(game) + # Initial state where the initial position is chosen according to + # an initial distribution. + self._is_position_init = True + self._player_id = pyspiel.PlayerId.CHANCE + # Population this state corresponds to. Can be None, in which + # case, ApplyAction() is forbidden. + self._population = population + if self._population is not None: + assert 0 <= self._population < self.num_players() + # When set, [2] numpy array representing the x, y position on the grid. + self._pos = None # type: Optional[np.ndarray] + self._t = 0 + self.size = game.size + # Number of states in the grid. + self.num_states = self.size**2 + self.horizon = game.horizon + self.reward_matrix = game.reward_matrix + self.geometry = game.geometry + self._returns = np.zeros([self.num_players()], dtype=np.float64) + self._distribution = shared_value.SharedValue(game.initial_distribution) + self.noise_probability = game.noise_probability + self.congestion_coeff = game.congestion_coeff + + @property + def population(self): + return self._population + + @property + def pos(self): + return self._pos + + @property + def t(self): + return self._t + + def state_to_str(self, pos, t, population, player_id=0): + """A string that uniquely identify (pos, t, population, player_id).""" + if self._is_position_init: + return f"position_init_{population}" + assert isinstance(pos, np.ndarray), f"Got type {type(pos)}" + assert len(pos.shape) == 1, f"Got {len(pos.shape)}, expected 1 (pos={pos})." + assert pos.shape[0] == 2, f"Got {pos.shape[0]}, expected 2 (pos={pos})." + return _state_to_str(pos[0], pos[1], t, population, player_id) + + # OpenSpiel (PySpiel) API functions are below. This is the standard set that + # should be implemented by every perfect-information sequential-move game. + + def mean_field_population(self): + return self._population + + def _legal_actions(self, player): + """Returns a list of legal actions for player and MFG nodes.""" + if player == pyspiel.PlayerId.MEAN_FIELD: + return [] + if player >= 0 and player == self.current_player(): + return list(self._ACTION_TO_MOVE) + raise ValueError( + f"Unexpected player {player}." + "Expected a mean field or current player >=0." + ) + + def chance_outcomes(self) -> List[Tuple[int, float]]: + """Returns the possible chance outcomes and their probabilities.""" + if self._is_position_init: + if ( + self._population is None + or not 0 <= self._population < self.num_players() + ): + raise ValueError(f"Invalid population {self._population}") + return [ + (i, self._distribution.value[self._population * self.num_states + i]) + for i in range(self.num_states) + if self._distribution.value[self._population * self.num_states + i] + != 0.0 + ] + return [ + (0, 1.0 - self.noise_probability), + (1, self.noise_probability / 4.0), + (2, self.noise_probability / 4.0), + (3, self.noise_probability / 4.0), + (4, self.noise_probability / 4.0), + ] + + def update_pos(self, action): + """Updates the position of the player given a move action.""" + if action < 0 or action >= len(self._ACTION_TO_MOVE): + raise ValueError( + f"The action must be between 0 and {len(self._ACTION_TO_MOVE)}, " + f"got {action}" + ) + candidate_pos = self._pos + self._ACTION_TO_MOVE[action] + if self.geometry == Geometry.TORUS: + candidate_pos += self.size + candidate_pos %= self.size + else: + assert ( + self.geometry == Geometry.SQUARE + ), f"Invalid geometry {self.geometry}" + # Keep the position within the bounds of the square. + candidate_pos = np.minimum(candidate_pos, self.size - 1) + candidate_pos = np.maximum(candidate_pos, 0) + self._pos = candidate_pos + + def _apply_action(self, action): + """Applies the specified action to the state.""" + if self._population is None: + raise ValueError( + "Attempting to perform an action with a population-less state." + ) + if self._player_id == pyspiel.PlayerId.MEAN_FIELD: + raise ValueError( + "_apply_action should not be called at a MEAN_FIELD state." + ) + self._returns += np.array(self.rewards()) + if self._is_position_init: + self._pos = merged_to_pos(action, self.size) + self._is_position_init = False + self._player_id = self._population + elif self._player_id == pyspiel.PlayerId.CHANCE: + self.update_pos(action) + self._t += 1 + self._player_id = pyspiel.PlayerId.MEAN_FIELD + elif int(self._player_id) >= 0: + assert self._player_id == self._population, ( + f"Invalid decision player id {self._player_id} " + f"expected {self._population}" + ) + self.update_pos(action) + self._player_id = pyspiel.PlayerId.CHANCE + else: + raise ValueError(f"Unexpected state. Player id: {self._player_id}") + + def _action_to_string(self, player, action): + """Action -> string.""" + del player + if self.is_chance_node() and self._is_position_init: + return f"init_position={action}" + return str(self._ACTION_TO_MOVE[action]) + + def distribution_support(self): + """Returns a list of state string.""" + support = [] + for x in range(self.size): + for y in range(self.size): + for population in range(self.num_players()): + support.append( + self.state_to_str( + np.array([x, y]), + self._t, + population, + player_id=pyspiel.PlayerId.MEAN_FIELD, + ) + ) + return support + + def get_pos_proba(self, pos: np.ndarray, population: int) -> float: + """Gets the probability of a pos and population in the current distrib. + + Args: + pos: 2D position. + population: Population requested. + + Returns: + The probability for the provided position and population. + """ + assert (pos >= 0).all(), pos + assert (pos < self.size).all(), pos + assert 0 <= population < self.num_players(), population + # This logic needs to match the ordering defined in distribution_support(). + index = population + self.num_players() * (pos[1] + self.size * pos[0]) + assert 0 <= index < len(self._distribution.value), ( + f"Invalid index {index} vs dist length: {len(self._distribution.value)}" + f", population={population}, pos={pos}, state={self}" + ) + return self._distribution.value[index] + + def update_distribution(self, distribution): + """This function is central and specific to the logic of the MFG. + + It should only be called when the node is in MEAN_FIELD state. + + Args: + distribution: List of floats that should contain the probability of each + state returned by distribution_support(). + """ + expected_dist_size = self.num_states * self.num_players() + assert len(distribution) == expected_dist_size, ( + "Unexpected distribution length " + f"{len(distribution)} != {expected_dist_size}" + ) + if self._player_id != pyspiel.PlayerId.MEAN_FIELD: + raise ValueError( + "update_distribution should only be called at a MEAN_FIELD state." + ) + self._distribution = shared_value.SharedValue(distribution) + self._player_id = self._population + + def is_terminal(self): + """Returns True if the game is over.""" + return self.t >= self.horizon + + def current_player(self): + """Returns id of the next player to move, or TERMINAL if game is over.""" + if self.is_terminal(): + return pyspiel.PlayerId.TERMINAL + return self._player_id + + def rewards(self) -> List[float]: + """Predator-prey rewards for all populations. + + See section 5.4, paragraph Environment in https://arxiv.org/abs/2103.00623. + + Returns: + One float per population. + """ + if int(self._player_id) < 0: + return [0.0] * self.num_players() + # TODO(author15): Remove this eps once b/191064186 is fixed. + eps = 1e-25 + densities = np.array( + [ + self.get_pos_proba(self._pos, population) + for population in range(self.num_players()) + ], + dtype=np.float64, + ) + rew = -self.congestion_coeff * np.log(densities + eps) + np.dot( + self.reward_matrix, densities + ) + return list(rew) + + def returns(self) -> List[float]: + """Returns is the sum of all payoffs collected so far.""" + return list(self._returns + np.array(self.rewards())) + + def __str__(self): + """A string that uniquely identify the current state.""" + return self.state_to_str( + self._pos, self._t, self._population, player_id=self._player_id + ) + + +class Observer: + """Observer, conforming to the PyObserver interface (see observation.py).""" + + def __init__(self, params, game): + """Initializes an empty observation tensor.""" + del params + + self.size = game.size + self.horizon = game.horizon + # +1 to allow t == horizon. + self.tensor = np.zeros(2 * self.size + self.horizon + 1, np.float32) + self.dict = { + "x": self.tensor[: self.size], + "y": self.tensor[self.size : self.size * 2], + "t": self.tensor[self.size * 2 :], + } + + def set_from(self, state: MFGPredatorPreyState, player: int): + """Updates `tensor` and `dict` to reflect `state` from PoV of `player`.""" + del player + # We update the observation via the shaped tensor since indexing is more + # convenient than with the 1-D tensor. Both are views onto the same memory. + self.tensor.fill(0) + # state.pos is None for the initial (blank) state, don't set any + # position bit in that case. + if state.pos is not None: + if not (state.pos >= 0).all() or not (state.pos < self.size).all(): + raise ValueError( + f"Expected {state} positions to be in [0, {self.size})" + ) + self.dict["x"][state.pos[0]] = 1 + self.dict["y"][state.pos[1]] = 1 + if not 0 <= state.t <= self.horizon: + raise ValueError(f"Expected {state} time to be in [0, {self.horizon}]") + self.dict["t"][state.t] = 1 + + def string_from(self, state, player): + """Observation of `state` from the PoV of `player`, as a string.""" + del player + return str(state) + + +pyspiel.register_game(_GAME_TYPE, MFGPredatorPreyGame) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/predator_prey_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/predator_prey_test.py new file mode 100644 index 0000000..bf9aba9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/games/predator_prey_test.py @@ -0,0 +1,278 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Tests for Python Predator-Prey game.""" + +import math +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np +import numpy.testing as npt +from open_spiel.python.mfg.games import predator_prey +import pyspiel + + +class MFGPredatorPreyGameTest(parameterized.TestCase): + + def test_load(self): + game = pyspiel.load_game('python_mfg_predator_prey') + game.new_initial_state_for_population(0) + game.new_initial_state_for_population(1) + + @parameterized.parameters( + { + 'geometry': predator_prey.Geometry.SQUARE, + 'expected_pos': np.array([0, 4]), + }, + { + 'geometry': predator_prey.Geometry.TORUS, + 'expected_pos': np.array([0, 0]), + }, + ) + def test_dynamics(self, geometry, expected_pos): + num_players = 3 + reward_matrix = np.array([[0, -1, 1], [1, 0, -1], [-1, 1, 0]]) + init_distrib = np.array([ + # First population + [1.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + # Second population + [0.0, 0.0, 0.0, 0.0, 1.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + # Third population + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0, 0.0], + ]) + game = pyspiel.load_game( + 'python_mfg_predator_prey', + { + 'geometry': geometry, + 'reward_matrix': ' '.join(str(v) for v in reward_matrix.flatten()), + 'init_distrib': ' '.join(str(v) for v in init_distrib.flatten()), + 'players': num_players, + 'horizon': 10, + }, + ) + state = game.new_initial_state_for_population(2) + # Initial chance node. + self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE) + self.assertLen(state.chance_outcomes(), 1) + self.assertEqual( + state.chance_outcomes()[0][0], + predator_prey.pos_to_merged(np.array([0, 4]), state.size), + ) + state.apply_action(state.chance_outcomes()[0][0]) + self.assertEqual(state.current_player(), 2) + npt.assert_array_equal(state.pos, [0, 4]) + self.assertEqual(state._action_to_string(player=2, action=2), '[0 1]') + state.apply_action(2) + npt.assert_array_equal(state.pos, expected_pos) + + def test_create_with_params(self): + horizon = 100 + size = 20 + num_players = 3 + zero_mat = np.zeros((size, size)) + reward_matrix = np.array([[0, -1, 1], [1, 0, -1], [-1, 1, 0]]) + reward_matrix_flat = ' '.join(str(v) for v in reward_matrix.flatten()) + pop_1 = zero_mat.copy() + pop_1[0, 0] = 1.0 + pop_1 = pop_1.tolist() + pop_2 = zero_mat.copy() + pop_2[0, -1] = 1.0 + pop_2 = pop_2.tolist() + pop_3 = zero_mat.copy() + pop_3[-1, 0] = 1.0 + pop_3 = pop_3.tolist() + init_distrib = np.array(pop_1 + pop_2 + pop_3) + init_distrib_flat = ' '.join(str(v) for v in init_distrib.flatten()) + setting = 'python_mfg_predator_prey(horizon={}'.format(horizon) + setting += ',size={}'.format(size) + setting += ',players={}'.format(num_players) + setting += ',reward_matrix={}'.format(reward_matrix_flat) + setting += ',init_distrib={}'.format(init_distrib_flat) + setting += ')' + game = pyspiel.load_game(setting) + self.assertEqual(game.size, 20) + self.assertEqual(game.horizon, 100) + + @parameterized.parameters( + {'population': 0}, + {'population': 1}, + {'population': 2}, + ) + def test_random_game(self, population): + """Tests basic API functions.""" + horizon = 10 + size = 20 + num_players = 3 + reward_matrix = np.array([[0, -1, 1], [1, 0, -1], [-1, 1, 0]]) + zero_mat = np.zeros((size, size)) + pop_1 = zero_mat.copy() + pop_1[0, 0] = 1.0 + pop_1 = pop_1.tolist() + pop_2 = zero_mat.copy() + pop_2[0, -1] = 1.0 + pop_2 = pop_2.tolist() + pop_3 = zero_mat.copy() + pop_3[-1, 0] = 1.0 + pop_3 = pop_3.tolist() + pop_4 = zero_mat.copy() + pop_4[-1, -1] = 1.0 + pop_4 = pop_4.tolist() + pops = [pop_1, pop_2, pop_3, pop_4] + init_distrib = [] + for p in range(3): + init_distrib += pops[p] + init_distrib = np.array(init_distrib) + game = predator_prey.MFGPredatorPreyGame( + params={ + 'horizon': horizon, + 'size': size, + 'players': num_players, + 'reward_matrix': ' '.join(str(v) for v in reward_matrix.flatten()), + 'init_distrib': ' '.join(str(v) for v in init_distrib.flatten()), + } + ) + pyspiel.random_sim_test( + game, + num_sims=10, + serialize=False, + verbose=True, + mean_field_population=population, + ) + + @parameterized.parameters( + { + 'reward_matrix': np.array([[0, 1], [-1, 0]]), # + 'population': 0, + 'players': 2, + 'initial_pos': np.array([0, 0]), + 'distributions': [ + # First pop. + np.array([[1, 0], [0, 0]]), # + # Second pop. + np.array([[0.5, 0.1], [0, 0.9]]), # + ], + 'expected_rewards': np.array([ + -math.log(1 + 1e-25) + 0.5, # + -math.log(0.5 + 1e-25) - 1, + ]), + 'init_distrib': np.array([ + # First population + [1.0, 0.0], + [0.0, 0.0], + # Second population + [0.0, 1.0], + [0.0, 0.0], + ]), + }, + { + 'reward_matrix': np.array([ + [0, -1, 0.5], # + [0.5, 0, -1], # + [-0.5, 1, 0], + ]), + 'population': 2, + 'players': 3, + 'initial_pos': np.array([1, 1]), + 'distributions': [ + # First pop. + np.array([[0.1, 0.2], [0.3, 0.4]]), # + # Second pop. + np.array([[0.2, 0.1], [0.1, 0.6]]), # + # Third pop. + np.array([[0, 0.1], [0.1, 0.8]]), # + ], + 'expected_rewards': np.array([ + -math.log(0.4 + 1e-25) - 0.6 + 0.5 * 0.8, + -math.log(0.6 + 1e-25) + 0.5 * 0.4 - 0.8, + -math.log(0.8 + 1e-25) - 0.5 * 0.4 + 0.6, + ]), + 'init_distrib': np.array([ + # First population + [1.0, 0.0], + [0.0, 0.0], + # Second population + [0.0, 1.0], + [0.0, 0.0], + # Third population + [0.0, 0.0], + [1.0, 0.0], + ]), + }, + ) + def test_rewards( + self, + reward_matrix, + players, + population, + initial_pos, + distributions, + expected_rewards, + init_distrib, + ): + game = pyspiel.load_game( + 'python_mfg_predator_prey', + { + 'size': 2, + 'reward_matrix': ' '.join(str(v) for v in reward_matrix.flatten()), + 'players': players, + 'init_distrib': ' '.join(str(v) for v in init_distrib.flatten()), + }, + ) + state = game.new_initial_state_for_population(population) + # Initial chance node. + self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE) + state.apply_action(predator_prey.pos_to_merged(initial_pos, state.size)) + self.assertEqual(state.current_player(), population) + npt.assert_array_equal(state.pos, initial_pos) + state.apply_action(state._NEUTRAL_ACTION) + npt.assert_array_equal(state.pos, initial_pos) + self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE) + state.apply_action(state._NEUTRAL_ACTION) + self.assertEqual(state.current_player(), pyspiel.PlayerId.MEAN_FIELD) + + # Maps states (in string representation) to their proba. + dist = {} + for x in range(state.size): + for y in range(state.size): + for pop in range(len(reward_matrix)): + state_str = state.state_to_str( + np.array([x, y]), + state.t, + pop, + player_id=pyspiel.PlayerId.MEAN_FIELD, + ) + dist[state_str] = distributions[pop][y][x] + support = state.distribution_support() + state.update_distribution([dist[s] for s in support]) + + # Decision node where we get a reward. + self.assertEqual(state.current_player(), population) + npt.assert_array_equal(state.rewards(), expected_rewards) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/tabular_distribution.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/tabular_distribution.py new file mode 100644 index 0000000..5c0a5e8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/tabular_distribution.py @@ -0,0 +1,74 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A tabular representation of a distribution for a game.""" + +from typing import Dict, Optional + +from open_spiel.python.mfg import distribution +import pyspiel + +DistributionDict = Dict[str, float] + + +class TabularDistribution(distribution.ParametricDistribution): + """Distribution that uses a dictionary to store the values of the states.""" + + def __init__(self, game: pyspiel.Game): + self._distribution: DistributionDict = {} + super().__init__(game) + + def value(self, state: pyspiel.State) -> float: + return self.value_str(self.state_to_str(state)) + + def value_str(self, + state_str: str, + default_value: Optional[float] = None) -> float: + """Returns the probability of the distribution on the state string given. + + Args: + state_str: A string. + default_value: If not None, return this value if the state is not in the + support of the distribution. + + Returns: + A `float`. + + Raises: + ValueError: If the state has not been seen by the distribution and no + default value has been passed to the method. + """ + if default_value is None: + try: + return self._distribution[state_str] + except KeyError as e: + raise ValueError( + f"Distribution not computed for state {state_str}") from e + return self._distribution.get(state_str, default_value) + + def get_params(self) -> DistributionDict: + return self._distribution + + def set_params(self, params: DistributionDict): + self._distribution = params + + def state_to_str(self, state: pyspiel.State) -> str: + # TODO(author15): Consider switching to + # state.mean_field_population(). For now, this does not matter in + # practice since games don't have different observation strings for + # different player IDs. + return state.observation_string(pyspiel.PlayerId.DEFAULT_PLAYER_ID) + + @property + def distribution(self) -> DistributionDict: + return self._distribution diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/utils.py new file mode 100644 index 0000000..69ad5de --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/utils.py @@ -0,0 +1,26 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MFG utilities.""" + +import pickle + +from open_spiel.python.utils import gfile +from open_spiel.python.mfg import distribution + + +def save_parametric_distribution(dist: distribution.ParametricDistribution, + filename: str): + """Saves the parametric distribution to a Pickle file.""" + with gfile.Open(filename, "wb") as f: + pickle.dump(dist.get_params(), f, protocol=pickle.DEFAULT_PROTOCOL) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/mfg/value.py b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/value.py new file mode 100644 index 0000000..8fbc00d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/mfg/value.py @@ -0,0 +1,129 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Representation of a value for a game. + +This is a standard representation for passing value functions into algorithms, +with currently the following implementations: + +The main way of using a value is to call `value(state)` +or `value(state, action)`. + +We will prevent calling a value on a state action on a MEAN_FIELD state. + +The state can be a pyspiel.State object or its string representation. For a +particular ValueFunction instance, you should use only one or the other. The +behavior may be undefined for mixed usage depending on the implementation. +""" + +import collections +from typing import Union + +import pyspiel + +ValueFunctionState = Union[pyspiel.State, str] + + +class ValueFunction(object): + """Base class for values. + + A ValueFunction is something that returns a value given + a state of the world or a state and an action. + + Attributes: + game: the game for which this ValueFunction derives + """ + + def __init__(self, game): + """Initializes a value. + + Args: + game: the game for which this value derives + """ + self.game = game + + def value(self, state: ValueFunctionState, action=None) -> float: + """Returns a float representing a value. + + Args: + state: A `pyspiel.State` object or its string representation. + action: may be None or a legal action + + Returns: + A value for the state (and eventuallu state action pair). + """ + raise NotImplementedError() + + def __call__(self, state: ValueFunctionState, action=None) -> float: + """Turns the value into a callable. + + Args: + state: A `pyspiel.State` object or its string representation. + action: may be None or a legal action + + Returns: + Float: the value of the state or the state action pair. + """ + return self.value(state, action=action) + + def set_value(self, state: ValueFunctionState, value: float, action=None): + """Sets the value of the state. + + Args: + state: A `pyspiel.State` object or its string representation. + value: Value of the state. + action: may be None or a legal action + """ + raise NotImplementedError() + + def has(self, state: ValueFunctionState, action=None) -> bool: + """Returns true if state(-action) has an explicit value. + + Args: + state: A `pyspiel.State` object or its string representation. + action: may be None or a legal action + + Returns: + True if there is an explicitly specified value. + """ + raise NotImplementedError() + + def add_value(self, state, value: float, action=None): + """Adds the value to the current value of the state. + + Args: + state: A `pyspiel.State` object or its string representation. + value: Value to add. + action: may be None or a legal action + """ + self.set_value( + state, self.value(state, action=action) + value, action=action) + + +class TabularValueFunction(ValueFunction): + """Tabular value function backed by a dictionary.""" + + def __init__(self, game): + super().__init__(game) + self._values = collections.defaultdict(float) + + def value(self, state: ValueFunctionState, action=None): + return self._values[(state, action)] + + def set_value(self, state: ValueFunctionState, value: float, action=None): + self._values[(state, action)] = value + + def has(self, state: ValueFunctionState, action=None): + return (state, action) in self._values diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/observation.py b/scenarios/bargaining/open_spiel/open_spiel/python/observation.py new file mode 100644 index 0000000..7b9567b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/observation.py @@ -0,0 +1,133 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An observation of a game. + +This is intended to be the main way to get observations of states in Python. +The usage pattern is as follows: + +0. Create the game we will be playing +1. Create each kind of observation required, using `make_observation` +2. Every time a new observation is required, call: + `observation.set_from(state, player)` + The tensor contained in the Observation class will be updated with an + observation of the supplied state. This tensor is updated in-place, so if + you wish to retain it, you must make a copy. + +The following options are available when creating an Observation: + - perfect_recall: if true, each observation must allow the observing player to + reconstruct their history of actions and observations. + - public_info: if true, the observation should include public information + - private_info: specifies for which players private information should be + included - all players, the observing player, or no players + - params: game-specific parameters for observations + +We ultimately aim to have all games support all combinations of these arguments. +However, initially many games will only support the combinations corresponding +to ObservationTensor and InformationStateTensor: + - ObservationTensor: perfect_recall=False, public_info=True, + private_info=SinglePlayer + - InformationStateTensor: perfect_recall=True, public_info=True, + private_info=SinglePlayer + +Three formats of observation are supported: +a. 1-D numpy array, accessed by `observation.tensor` +b. Dict of numpy arrays, accessed by `observation.dict`. These are pieces of the + 1-D array, reshaped. The np.array objects refer to the same memory as the + 1-D array (no copying!). +c. String, hopefully human-readable (primarily for debugging purposes) + +For usage examples, see `observation_test.py`. +""" + +import numpy as np + +import pyspiel + + +# Corresponds to the old information_state_XXX methods. +INFO_STATE_OBS_TYPE = pyspiel.IIGObservationType(perfect_recall=True) + + +class _Observation: + """Contains an observation from a game.""" + + def __init__(self, game, observer): + self._observation = pyspiel._Observation(game, observer) + self.dict = {} + if self._observation.has_tensor(): + self.tensor = np.frombuffer(self._observation, np.float32) + offset = 0 + for tensor_info in self._observation.tensors_info(): + size = np.prod(tensor_info.shape, dtype=np.int64) + values = self.tensor[offset:offset + size].reshape(tensor_info.shape) + self.dict[tensor_info.name] = values + offset += size + else: + self.tensor = None + + def set_from(self, state, player): + self._observation.set_from(state, player) + + def string_from(self, state, player): + return (self._observation.string_from(state, player) + if self._observation.has_string() else None) + + def compress(self): + return self._observation.compress() + + def decompress(self, compressed_observation): + self._observation.decompress(compressed_observation) + + +def make_observation( + game, + imperfect_information_observation_type=None, + params=None, +): + """Returns an _Observation instance if the imperfect_information_observation_type is supported, otherwise None.""" + params = params or {} + if hasattr(game, 'make_py_observer'): + return game.make_py_observer(imperfect_information_observation_type, params) + else: + if imperfect_information_observation_type is not None: + observer = game.make_observer( + imperfect_information_observation_type, params + ) + else: + observer = game.make_observer(params) + if observer is None: + return None + return _Observation(game, observer) + + +class IIGObserverForPublicInfoGame: + """Observer for imperfect information obvservations of public-info games.""" + + def __init__(self, iig_obs_type, params): + if params: + raise ValueError(f'Observation parameters not supported; passed {params}') + self._iig_obs_type = iig_obs_type + self.tensor = None + self.dict = {} + + def set_from(self, state, player): + pass + + def string_from(self, state, player): + del player + if self._iig_obs_type.public_info: + return state.history_str() + else: + return '' # No private information to return diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/policy.py b/scenarios/bargaining/open_spiel/open_spiel/python/policy.py new file mode 100644 index 0000000..fc0427e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/policy.py @@ -0,0 +1,582 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3 +"""Representation of a policy for a game. + +This is a standard representation for passing policies into algorithms, +with currently the following implementations: + + TabularPolicy - an explicit policy per state, stored in an array + of shape `(num_states, num_actions)`, convenient for tabular policy + solution methods. + UniformRandomPolicy - a uniform distribution over all legal actions for + the specified player. This is computed as needed, so can be used for + games where a tabular policy would be unfeasibly large. + +The main way of using a policy is to call `action_probabilities(state, +player_id`), to obtain a dict of {action: probability}. `TabularPolicy` +objects expose a lower-level interface, which may be more efficient for +some use cases. +""" + +import itertools +from typing import Iterable + +import numpy as np + +from open_spiel.python.algorithms import get_all_states +import pyspiel + + +def child(state, action): + """Returns a child state, handling the simultaneous node case.""" + if isinstance(action, Iterable): + child_state = state.clone() + child_state.apply_actions(action) + return child_state + else: + return state.child(action) + + +def joint_action_probabilities_aux(state, policy): + """Auxiliary function for joint_action_probabilities. + + Args: + state: a game state at a simultaneous decision node. + policy: policy that gives the probability distribution over the legal + actions for each players. + + Returns: + actions_per_player: list of list of actions for each player + probs_per_player: list of list of probabilities do the corresponding action + in actions_per_player for each player. + """ + assert state.is_simultaneous_node() + action_probs_per_player = [ + policy.action_probabilities(state, player) + for player in range(state.get_game().num_players()) + ] + actions_per_player = [pi.keys() for pi in action_probs_per_player] + probs_per_player = [pi.values() for pi in action_probs_per_player] + return actions_per_player, probs_per_player + + +def joint_action_probabilities(state, policy): + """Yields action, probability pairs for a joint policy in simultaneous state. + + Args: + state: a game state at a simultaneous decision node. + policy: policy that gives the probability distribution over the legal + actions for each players. + + Yields: + (action, probability) pairs. An action is a tuple of individual + actions for each player of the game. The probability is a single joint + probability (product of all the individual probabilities). + """ + actions_per_player, probs_per_player = joint_action_probabilities_aux( + state, policy) + for actions, probs in zip( + itertools.product(*actions_per_player), + itertools.product(*probs_per_player)): + yield actions, np.prod(probs) + + +class Policy: + """Base class for policies. + + A policy is something that returns a distribution over possible actions + given a state of the world. + + Attributes: + game: the game for which this policy applies + player_ids: list of player ids for which this policy applies; each in the + interval [0..game.num_players()-1]. + """ + + def __init__(self, game, player_ids): + """Initializes a policy. + + Args: + game: the game for which this policy applies + player_ids: list of player ids for which this policy applies; each should + be in the range 0..game.num_players()-1. + """ + self.game = game + self.player_ids = player_ids + + def action_probabilities(self, state, player_id=None): + """Returns a dictionary {action: prob} for all legal actions. + + IMPORTANT: We assume the following properties hold: + - All probabilities are >=0 and sum to 1 + - TLDR: Policy implementations should list the (action, prob) for all legal + actions, but algorithms should not rely on this (yet). + Details: Before May 2020, only legal actions were present in the mapping, + but it did not have to be exhaustive: missing actions were considered to + be associated to a zero probability. + For example, a deterministic state-poliy was previously {action: 1.0}. + Given this change of convention is new and hard to enforce, algorithms + should not rely on the fact that all legal actions should be present. + + Args: + state: A `pyspiel.State` object. + player_id: Optional, the player id for whom we want an action. Optional + unless this is a simultaneous state at which multiple players can act. + + Returns: + A `dict` of `{action: probability}` for the specified player in the + supplied state. + """ + raise NotImplementedError() + + def __call__(self, state, player_id=None): + """Turns the policy into a callable. + + Args: + state: The current state of the game. + player_id: Optional, the player id for whom we want an action. Optional + unless this is a simultaneous state at which multiple players can act. + + Returns: + Dictionary of action: probability. + """ + return self.action_probabilities(state, player_id) + + def to_tabular(self, states=None): + """Returns a new `TabularPolicy` equivalent to this policy. + + Args: + states: States of the game that will be used for the tabular policy. If + None, then get_tabular_policy_states() method will be used to generate + them. + + Returns: + a TabularPolicy. + """ + states = states or get_tabular_policy_states(self.game) + tabular_policy = TabularPolicy(self.game, self.player_ids, states=states) + for index, state in enumerate(tabular_policy.states): + tabular_policy.action_probability_array[index, :] = 0 + for action, probability in self.action_probabilities(state).items(): + tabular_policy.action_probability_array[index, action] = probability + return tabular_policy + + +class TabularPolicy(Policy): + """Policy implementation where the policy is in explicit tabular form. + + In addition to implementing the `Policy` interface, this class exposes + details of the policy representation for easy manipulation. + + The states are guaranteed to be grouped by player, which can simplify + code for users of this class, i.e. `action_probability_array` contains + states for player 0 first, followed by states for player 1, etc. + + The policy uses `state.information_state_string` as the keys if available, + otherwise `state.observation_string`. + + Usages: + + - Set `policy(info_state, action)`: + ``` + tabular_policy = TabularPolicy(game) + info_state_str = state.information_state_string() + state_policy = tabular_policy.policy_for_key(info_state_str) + state_policy[action] = + ``` + - Set `policy(info_state)`: + ``` + tabular_policy = TabularPolicy(game) + info_state_str = state.information_state_string() + state_policy = tabular_policy.policy_for_key(info_state_str) + state_policy[:] = + ``` + + Attributes: + action_probability_array: array of shape `(num_states, num_actions)`, where + `action_probability_array[s, a]` is the probability of choosing action `a` + when at state `s`. + state_lookup: `dict` mapping state key string to index into the + `tabular_policy` array. If information state strings overlap, e.g. for + different players or if the information state string has imperfect recall, + then those states will be mapped to the same policy. + legal_actions_mask: array of shape `(num_states, num_actions)`, each row + representing which of the possible actions in the game are valid in this + particular state, containing 1 for valid actions, 0 for invalid actions. + states_per_player: A `list` per player of the state key strings at which + they have a decision to make. + states: A `list` of the states as ordered in the `action_probability_array`. + state_in: array of shape `(num_states, state_vector_size)` containing the + normalised vector representation of each information state. Populated only + for games which support information_state_tensor(), and is None otherwise. + game_type: The game attributes as returned by `Game::GetType`; used to + determine whether to use information state or observation as the key in + the tabular policy. + """ + + def __init__(self, + game, + players=None, + to_string=lambda s: s.history_str(), + states=None): + """Initializes a uniform random policy for all players in the game.""" + players = sorted(players or range(game.num_players())) + super().__init__(game, players) + self.game_type = game.get_type() + + # Get all states in the game at which players have to make decisions unless + # they are explicitly specified. + states = states or get_all_states.get_all_states( + game, + depth_limit=-1, + include_terminals=False, + include_chance_states=False, + include_mean_field_states=False, + to_string=to_string) + + # Assemble legal actions for every valid (state, player) pair, keyed by + # information state string. + self.state_lookup = {} + self.states_per_player = [[] for _ in range(game.num_players())] + self.states = [] + legal_actions_list = [] + state_in_list = [] + for player in players: + # States are ordered by their history. + for _, state in sorted(states.items(), key=lambda pair: pair[0]): + if state.is_simultaneous_node() or player == state.current_player(): + legal_actions = state.legal_actions_mask(player) + if any(legal_actions): + key = self._state_key(state, player) + if key not in self.state_lookup: + state_index = len(legal_actions_list) + self.state_lookup[key] = state_index + legal_actions_list.append(legal_actions) + self.states_per_player[player].append(key) + self.states.append(state) + if self.game_type.provides_information_state_tensor: + state_in_list.append(state.information_state_tensor(player)) + elif self.game_type.provides_observation_tensor: + state_in_list.append(state.observation_tensor(player)) + + # Put legal action masks in a numpy array and create the uniform random + # policy. + self.state_in = None + if state_in_list: + self.state_in = np.array(state_in_list) + self.legal_actions_mask = np.array(legal_actions_list) + self.action_probability_array = ( + self.legal_actions_mask / + np.sum(self.legal_actions_mask, axis=-1, keepdims=True)) + + def _state_key(self, state, player): + """Returns the key to use to look up this (state, player) pair.""" + if self.game_type.provides_information_state_string: + if player is None: + return state.information_state_string() + return state.information_state_string(player) + if self.game_type.provides_observation_string: + if player is None: + return state.observation_string() + return state.observation_string(player) + return str(state) + + def action_probabilities(self, state, player_id=None): + """Returns an {action: probability} dict, covering all legal actions.""" + legal_actions = ( + state.legal_actions() + if player_id is None else state.legal_actions(player_id)) + if not legal_actions: + return {0: 1.0} + probability = self.policy_for_key(self._state_key(state, player_id)) + return {action: probability[action] for action in legal_actions} + + def state_index(self, state): + """Returns the index in the TabularPolicy associated to `state`.""" + return self.state_lookup[self._state_key(state, state.current_player())] + + def policy_for_key(self, key): + """Returns the policy as a vector given a state key string. + + Args: + key: A key for the specified state. + + Returns: + A vector of probabilities, one per action. This is a slice of the + backing policy array, and so slice or index assignment will update the + policy. For example: + ``` + tabular_policy.policy_for_key(s)[:] = [0.1, 0.5, 0.4] + ``` + """ + return self.action_probability_array[self.state_lookup[key]] + + def to_dict(self): + """Returns a single dictionary representing the tabular policy. + + Returns: + A dictionary of string keys to lists of (action, prob) pairs. + """ + policy_dict = {} + num_actions = self.action_probability_array.shape[1] + for infostate_key, index in self.state_lookup.items(): + probs = self.action_probability_array[index] + actions_and_probs = [(a, probs[a]) for a in range(num_actions)] + policy_dict[infostate_key] = actions_and_probs + return policy_dict + + def __copy__(self, copy_action_probability_array=True): + """Returns a shallow copy of self. + + Most class attributes will be pointers to the copied object's attributes, + and therefore altering them could lead to unexpected behavioural changes. + Only action_probability_array is expected to be modified. + + Args: + copy_action_probability_array: Whether to also include + action_probability_array in the copy operation. + + Returns: + Copy. + """ + result = TabularPolicy.__new__(TabularPolicy) + result.state_lookup = self.state_lookup + result.game_type = self.game_type + result.legal_actions_mask = self.legal_actions_mask + result.state_in = self.state_in + result.state_lookup = self.state_lookup + result.states_per_player = self.states_per_player + result.states = self.states + result.game = self.game + result.player_ids = self.player_ids + if copy_action_probability_array: + result.action_probability_array = np.copy(self.action_probability_array) + return result + + def copy_with_noise(self, + alpha=0.0, + beta=0.0, + random_state=np.random.RandomState()): + """Returns a copy of this policy perturbed with noise. + + Generates a new random distribution using a softmax on normal random + variables with temperature beta, and mixes it with the old distribution + using 1-alpha * old_distribution + alpha * random_distribution. + Args: + alpha: Parameter characterizing the mixture amount between new and old + distributions. Between 0 and 1. + alpha = 0: keep old table. + alpha = 1: keep random table. + beta: Temperature of the softmax. Makes for more extreme policies. + random_state: A numpy `RandomState` object. If not provided, a shared + random state will be used. + + Returns: + Perturbed copy. + """ + copied_instance = self.__copy__(False) + probability_array = self.action_probability_array + noise_mask = random_state.normal(size=probability_array.shape) + noise_mask = np.exp(beta * noise_mask) * self.legal_actions_mask + noise_mask = noise_mask / (np.sum(noise_mask, axis=1).reshape(-1, 1)) + copied_instance.action_probability_array = ( + 1 - alpha) * probability_array + alpha * noise_mask + return copied_instance + + +class UniformRandomPolicy(Policy): + """Policy where the action distribution is uniform over all legal actions. + + This is computed as needed, so can be used for games where a tabular policy + would be unfeasibly large, but incurs a legal action computation every time. + """ + + def __init__(self, game): + """Initializes a uniform random policy for all players in the game.""" + all_players = list(range(game.num_players())) + super().__init__(game, all_players) + + def action_probabilities(self, state, player_id=None): + """Returns a uniform random policy for a player in a state. + + Args: + state: A `pyspiel.State` object. + player_id: Optional, the player id for which we want an action. Optional + unless this is a simultaneous state at which multiple players can act. + + Returns: + A `dict` of `{action: probability}` for the specified player in the + supplied state. This will contain all legal actions, each with the same + probability, equal to 1 / num_legal_actions. + """ + legal_actions = ( + state.legal_actions() + if player_id is None else state.legal_actions(player_id)) + if not legal_actions: + return {0: 1.0} + probability = 1 / len(legal_actions) + return {action: probability for action in legal_actions} + + +class FirstActionPolicy(Policy): + """A policy that always takes the lowest-numbered legal action.""" + + def __init__(self, game): + all_players = list(range(game.num_players())) + super().__init__(game, all_players) + + def action_probabilities(self, state, player_id=None): + legal_actions = ( + state.legal_actions() + if player_id is None else state.legal_actions(player_id)) + if not legal_actions: + return {0: 1.0} + min_action = min(legal_actions) + return { + action: 1.0 if action == min_action else 0.0 for action in legal_actions + } + + +def get_tabular_policy_states(game): + """Returns the states of the game for a tabular policy.""" + if game.get_type().dynamics == pyspiel.GameType.Dynamics.MEAN_FIELD: + # TODO(author18): We use s.observation_string(DEFAULT_MFG_PLAYER) here as the + # number of history is exponential on the depth of the MFG. What we really + # need is a representation of the state. For many player Mean Field games, + # the state will be (x0, x1, x2, ..., xn) and the observation_string(0) will + # output the string of x0. In that case we would need something like + # str([observation_string(i) for i in range(num_player)]) + to_string = lambda s: s.observation_string(pyspiel.PlayerId. + DEFAULT_PLAYER_ID) + else: + to_string = lambda s: s.history_str() + return get_all_states.get_all_states( + game, + depth_limit=-1, + include_terminals=False, + include_chance_states=False, + include_mean_field_states=False, + to_string=to_string) + + +def tabular_policy_from_callable(game, callable_policy, players=None): + """Converts a legacy callable policy into a TabularPolicy. + + Recommendation - instead of using this to convert your policy for evaluation + purposes, work directly with a `TabularPolicy` if possible. + Second choice - work with a `Policy` class and call `to_tabular` as needed. + + Args: + game: The game for which we want a TabularPolicy. + callable_policy: A callable: state -> action probabilities dict or list. + players: List of players this policy applies to. If `None`, applies to all + players. + + Returns: + A TabularPolicy that materializes the callable policy. + """ + tabular_policy = TabularPolicy(game, players) + for state_index, state in enumerate(tabular_policy.states): + action_probabilities = dict(callable_policy(state)) + infostate_policy = [ + action_probabilities.get(action, 0.) + for action in range(game.num_distinct_actions()) + ] + tabular_policy.action_probability_array[state_index, :] = infostate_policy + return tabular_policy + + +def pyspiel_policy_to_python_policy(game, pyspiel_tabular_policy, players=None): + """Converts a pyspiel.TabularPolicy to a TabularPolicy. + + Args: + game: The OpenSpiel game. + pyspiel_tabular_policy: Pyspiel tabular policy to copy from. + players: List of integer player ids to copy policy from. For example, + `players=[0]` will only copy player 0's policy over into the python policy + (the other player's policies will be undefined). Default value of `None` + will copy all players' policies. + + Returns: + python_policy + """ + policy = TabularPolicy(game, players=players) + for item in pyspiel_tabular_policy.policy_table().items(): + info_state_str, actions_probs = item + # If requested, only populate a policy for particular players. + if players is not None and info_state_str not in policy.state_lookup: + continue + state_policy = policy.policy_for_key(info_state_str) + if actions_probs: + state_policy[:] = 0.0 # Ensure policy is zero by default. + for action, prob in actions_probs: + state_policy[action] = prob + return policy + + +def python_policy_to_pyspiel_policy(python_tabular_policy): + """Converts a TabularPolicy to a pyspiel.TabularPolicy.""" + infostates_to_probabilities = dict() + for infostate, index in python_tabular_policy.state_lookup.items(): + probs = python_tabular_policy.action_probability_array[index] + legals = python_tabular_policy.legal_actions_mask[index] + + action_probs = [] + for action, (prob, is_legal) in enumerate(zip(probs, legals)): + if is_legal == 1: + action_probs.append((action, prob)) + infostates_to_probabilities[infostate] = action_probs + return pyspiel.TabularPolicy(infostates_to_probabilities) + + +def python_policies_to_pyspiel_policies(policies): + """Same conversion as above (list version). + + Args: + policies: a list of python.TabularPolicy + + Returns: + a list of pyspiel.TabularPolicy. + """ + return [python_policy_to_pyspiel_policy(p) for p in policies] + + +def merge_tabular_policies(tabular_policies, game): + """Merges n_player policies into single joint policy. + + Missing states are filled with a valid uniform policy. + + Args: + tabular_policies: List of python TabularPolicy (one for each player). + game: The game corresponding to the resulting TabularPolicy. + + Returns: + merged_policy: A TabularPolicy with each player i's policy taken from the + ith joint_policy. + """ + if len(tabular_policies) != game.num_players(): + raise ValueError("len(tabular_policies) != num_players: %d != %d" % + (len(tabular_policies), game.num_players())) + merged_policy = TabularPolicy(game) + for p, p_states in enumerate(merged_policy.states_per_player): + for p_state in p_states: + to_index = merged_policy.state_lookup[p_state] + # Only copy if the state exists, otherwise fall back onto uniform. + if p_state in tabular_policies[p].state_lookup: + from_index = tabular_policies[p].state_lookup[p_state] + merged_policy.action_probability_array[to_index] = ( + tabular_policies[p].action_probability_array[from_index]) + return merged_policy diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/algorithms_corr_dist.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/algorithms_corr_dist.cc new file mode 100644 index 0000000..b623838 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/algorithms_corr_dist.cc @@ -0,0 +1,110 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/algorithms_corr_dist.h" + +#include + +#include "open_spiel/algorithms/corr_dev_builder.h" +#include "open_spiel/algorithms/corr_dist.h" +#include "open_spiel/spiel.h" +#include "pybind11/include/pybind11/cast.h" +#include "pybind11/include/pybind11/pybind11.h" + +namespace open_spiel { +namespace py = ::pybind11; + +using open_spiel::algorithms::CorrDevBuilder; +using open_spiel::algorithms::CorrDistInfo; +using open_spiel::algorithms::CorrelationDevice; + +void init_pyspiel_algorithms_corr_dist(py::module& m) { + m.def("uniform_correlation_device", + &open_spiel::algorithms::UniformCorrelationDevice, + "Returns a uniform correlation device over a set of joint policies."); + + m.def("sampled_determinize_corr_dev", + &open_spiel::algorithms::SampledDeterminizeCorrDev, + "Returns a correlation device over deterministic policies sampled from " + "a correlation device."); + + m.def("determinize_corr_dev", &open_spiel::algorithms::DeterminizeCorrDev, + "Returns an exact correlation device over deterministic policies " + "equivalent to this correlation device. Warning: very costly!"); + + py::class_ corr_dist_info(m, "CorrDistInfo"); + corr_dist_info.def_readonly("dist_value", &CorrDistInfo::dist_value) + .def_readonly("on_policy_values", &CorrDistInfo::on_policy_values) + .def_readonly("best_response_values", &CorrDistInfo::best_response_values) + .def_readonly("deviation_incentives", &CorrDistInfo::deviation_incentives) + .def_readonly("best_response_policies", + &CorrDistInfo::best_response_policies) + .def_readonly("conditional_best_response_policies", + &CorrDistInfo::conditional_best_response_policies); + + py::class_ corr_dev_builder(m, "CorrDevBuilder"); + corr_dev_builder.def(py::init(), py::arg("seed") = 0) + .def("add_deterministic_joint_policy", + &CorrDevBuilder::AddDeterminsticJointPolicy, + py::arg("policy"), py::arg("weight") = 1.0) + .def("add_sampled_joint_policy", + &CorrDevBuilder::AddSampledJointPolicy, + py::arg("policy"), py::arg("num_samples"), py::arg("weight") = 1.0) + .def("add_mixed_joint_policy", + &CorrDevBuilder::AddMixedJointPolicy, + py::arg("policy"), + py::arg("weight") = 1.0) + .def("get_correlation_device", &CorrDevBuilder::GetCorrelationDevice); + + m.def( + "cce_dist", + [](std::shared_ptr game, + const CorrelationDevice& correlation_device, int player, + float prob_cut_threshold, const float action_value_tolerance) { + return algorithms::CCEDist(*game, correlation_device, player, + prob_cut_threshold, action_value_tolerance); + }, + "Returns a player's distance to a coarse-correlated equilibrium.", + py::arg("game"), py::arg("correlation_device"), py::arg("player"), + py::arg("prob_cut_threshold") = -1.0, + py::arg("action_value_tolerance") = -1.0); + + m.def( + "cce_dist", + [](std::shared_ptr game, + const CorrelationDevice& correlation_device, float prob_cut_threshold, + const float action_value_tolerance) { + return algorithms::CCEDist(*game, correlation_device, + prob_cut_threshold, action_value_tolerance); + }, + "Returns the distance to a coarse-correlated equilibrium.", + py::arg("game"), py::arg("correlation_device"), + py::arg("prob_cut_threshold") = -1.0, + py::arg("action_value_tolerance") = false); + + m.def( + "ce_dist", + [](std::shared_ptr game, + const CorrelationDevice& correlation_device, + const float action_value_tolerance) { + return algorithms::CEDist(*game, correlation_device, + action_value_tolerance); + }, + "Returns the distance to a correlated equilibrium.", py::arg("game"), + py::arg("correlation_device"), py::arg("action_value_tolerance") = -1.0); + + // TODO(author5): expose the rest of the functions. +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/algorithms_corr_dist.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/algorithms_corr_dist.h new file mode 100644 index 0000000..78ce4a9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/algorithms_corr_dist.h @@ -0,0 +1,25 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_ALGORITHMS_CORR_DIST_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_ALGORITHMS_CORR_DIST_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialze the Python interface for trajectories. +namespace open_spiel { +void init_pyspiel_algorithms_corr_dist(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_ALGORITHMS_CORR_DIST_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/algorithms_trajectories.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/algorithms_trajectories.cc new file mode 100644 index 0000000..0a31e6d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/algorithms_trajectories.cc @@ -0,0 +1,81 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/algorithms_trajectories.h" + +// Python bindings for trajectories.h + +#include "open_spiel/algorithms/trajectories.h" +#include "open_spiel/python/pybind11/pybind11.h" + +namespace open_spiel { +namespace py = ::pybind11; + +void init_pyspiel_algorithms_trajectories(py::module& m) { + py::class_(m, "BatchedTrajectory") + .def(py::init()) + .def_readwrite("observations", + &open_spiel::algorithms::BatchedTrajectory::observations) + .def_readwrite("state_indices", + &open_spiel::algorithms::BatchedTrajectory::state_indices) + .def_readwrite("legal_actions", + &open_spiel::algorithms::BatchedTrajectory::legal_actions) + .def_readwrite("actions", + &open_spiel::algorithms::BatchedTrajectory::actions) + .def_readwrite( + "player_policies", + &open_spiel::algorithms::BatchedTrajectory::player_policies) + .def_readwrite("player_ids", + &open_spiel::algorithms::BatchedTrajectory::player_ids) + .def_readwrite("rewards", + &open_spiel::algorithms::BatchedTrajectory::rewards) + .def_readwrite("valid", &open_spiel::algorithms::BatchedTrajectory::valid) + .def_readwrite( + "next_is_terminal", + &open_spiel::algorithms::BatchedTrajectory::next_is_terminal) + .def_readwrite("batch_size", + &open_spiel::algorithms::BatchedTrajectory::batch_size) + .def_readwrite( + "max_trajectory_length", + &open_spiel::algorithms::BatchedTrajectory::max_trajectory_length) + .def("resize_fields", + &open_spiel::algorithms::BatchedTrajectory::ResizeFields); + + m.def( + "record_batched_trajectories", + [](std::shared_ptr game, + const std::vector& policies, + const std::unordered_map& state_to_index, + int batch_size, bool include_full_observations, int seed, + int max_unroll_length) { + return open_spiel::algorithms::RecordBatchedTrajectory( + *game, policies, state_to_index, batch_size, + include_full_observations, seed, max_unroll_length); + }, + "Records a batch of trajectories."); + + py::class_(m, + "TrajectoryRecorder") + .def(py::init( + [](std::shared_ptr game, + const std::unordered_map& state_to_index, + int seed) { + return new algorithms::TrajectoryRecorder(*game, state_to_index, + seed); + })) + .def("record_batch", + &open_spiel::algorithms::TrajectoryRecorder::RecordBatch); +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/algorithms_trajectories.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/algorithms_trajectories.h new file mode 100644 index 0000000..4381b2d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/algorithms_trajectories.h @@ -0,0 +1,25 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_ALGORITHMS_TRAJECTORIES_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_ALGORITHMS_TRAJECTORIES_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialze the Python interface for trajectories. +namespace open_spiel { +void init_pyspiel_algorithms_trajectories(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_ALGORITHMS_TRAJECTORIES_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/bots.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/bots.cc new file mode 100644 index 0000000..164b9f9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/bots.cc @@ -0,0 +1,227 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/bots.h" + +#include + +#include +#include +#include + +#include "open_spiel/algorithms/evaluate_bots.h" +#include "open_spiel/algorithms/is_mcts.h" +#include "open_spiel/algorithms/mcts.h" +#include "open_spiel/bots/gin_rummy/simple_gin_rummy_bot.h" +#include "open_spiel/bots/uci/uci_bot.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/python/pybind11/pybind11.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" +#include "open_spiel/spiel_utils.h" +#include "pybind11/include/pybind11/cast.h" +#include "pybind11/include/pybind11/detail/common.h" +#include "pybind11/include/pybind11/pybind11.h" +#include "pybind11/include/pybind11/pytypes.h" + +// Optional headers. +#if OPEN_SPIEL_BUILD_WITH_ROSHAMBO +#include "open_spiel/bots/roshambo/roshambo_bot.h" +#endif + +namespace open_spiel { +namespace { + +using ::open_spiel::algorithms::Evaluator; +using ::open_spiel::algorithms::SearchNode; + +namespace py = ::pybind11; + +} // namespace + +void init_pyspiel_bots(py::module& m) { + py::classh> bot(m, "Bot"); + bot.def(py::init<>()) + .def("step", &Bot::Step) + .def("step_verbose", &Bot::StepVerbose) + .def("restart", &Bot::Restart) + .def("restart_at", &Bot::RestartAt) + .def("provides_force_action", &Bot::ProvidesForceAction) + .def("force_action", &Bot::ForceAction) + .def("inform_action", &Bot::InformAction) + .def("inform_actions", &Bot::InformActions) + .def("provides_policy", &Bot::ProvidesPolicy) + .def("get_policy", &Bot::GetPolicy) + .def("step_with_policy", &Bot::StepWithPolicy) + .def("is_clonable", &Bot::IsClonable) + .def("clone", &Bot::Clone); + + m.def( + "load_bot", + py::overload_cast&, + Player>(&open_spiel::LoadBot), + py::arg("bot_name"), py::arg("game"), py::arg("player"), + "Returns a new bot object for the specified bot name using default " + "parameters"); + m.def( + "load_bot", + py::overload_cast&, + Player, const GameParameters&>(&open_spiel::LoadBot), + py::arg("bot_name"), py::arg("game"), py::arg("player"), + py::arg("params"), + "Returns a new bot object for the specified bot name using given " + "parameters"); + m.def("is_bot_registered", &IsBotRegistered, + "Checks if a bot under the given name is registered."); + m.def("registered_bots", &RegisteredBots, + "Returns a list of registered bot names."); + m.def( + "bots_that_can_play_game", + [](std::shared_ptr game, int player) { + return BotsThatCanPlayGame(*game, player); + }, + py::arg("game"), py::arg("player"), + "Returns a list of bot names that can play specified game for the " + "given player."); + m.def( + "bots_that_can_play_game", + [](std::shared_ptr game) { + return BotsThatCanPlayGame(*game); + }, + py::arg("game"), + "Returns a list of bot names that can play specified game for any " + "player."); + + py::class_> + mcts_evaluator(m, "Evaluator"); + py::class_>( + m, "RandomRolloutEvaluator") + .def(py::init(), py::arg("n_rollouts"), py::arg("seed")); + + py::enum_(m, "ChildSelectionPolicy") + .value("UCT", algorithms::ChildSelectionPolicy::UCT) + .value("PUCT", algorithms::ChildSelectionPolicy::PUCT); + + py::class_ search_node(m, "SearchNode"); + search_node.def_readonly("action", &SearchNode::action) + .def_readonly("prior", &SearchNode::prior) + .def_readonly("player", &SearchNode::player) + .def_readonly("explore_count", &SearchNode::explore_count) + .def_readonly("total_reward", &SearchNode::total_reward) + .def_readonly("outcome", &SearchNode::outcome) + .def_readonly("children", &SearchNode::children) + .def("best_child", &SearchNode::BestChild) + .def("to_string", &SearchNode::ToString) + .def("children_str", &SearchNode::ChildrenStr); + + py::classh(m, "MCTSBot") + .def( + py::init([](std::shared_ptr game, + std::shared_ptr evaluator, double uct_c, + int max_simulations, int64_t max_memory_mb, bool solve, + int seed, bool verbose, + algorithms::ChildSelectionPolicy child_selection_policy) { + return new algorithms::MCTSBot( + *game, evaluator, uct_c, max_simulations, max_memory_mb, solve, + seed, verbose, child_selection_policy); + }), + py::arg("game"), py::arg("evaluator"), py::arg("uct_c"), + py::arg("max_simulations"), py::arg("max_memory_mb"), + py::arg("solve"), py::arg("seed"), py::arg("verbose"), + py::arg("child_selection_policy") = + algorithms::ChildSelectionPolicy::UCT) + .def("step", &algorithms::MCTSBot::Step) + .def("mcts_search", &algorithms::MCTSBot::MCTSearch); + + py::enum_(m, "ISMCTSFinalPolicyType") + .value("NORMALIZED_VISIT_COUNT", + algorithms::ISMCTSFinalPolicyType::kNormalizedVisitCount) + .value("MAX_VISIT_COUNT", + algorithms::ISMCTSFinalPolicyType::kMaxVisitCount) + .value("MAX_VALUE", algorithms::ISMCTSFinalPolicyType::kMaxValue); + + py::classh(m, "ISMCTSBot") + .def(py::init, double, int, int, + algorithms::ISMCTSFinalPolicyType, bool, bool>(), + py::arg("seed"), py::arg("evaluator"), py::arg("uct_c"), + py::arg("max_simulations"), + py::arg("max_world_samples") = algorithms::kUnlimitedNumWorldSamples, + py::arg("final_policy_type") = + algorithms::ISMCTSFinalPolicyType::kNormalizedVisitCount, + py::arg("use_observation_string") = false, + py::arg("allow_inconsistent_action_sets") = false) + .def("step", &algorithms::ISMCTSBot::Step) + .def("provides_policy", &algorithms::MCTSBot::ProvidesPolicy) + .def("get_policy", &algorithms::ISMCTSBot::GetPolicy) + .def("step_with_policy", &algorithms::ISMCTSBot::StepWithPolicy) + .def("restart", &algorithms::ISMCTSBot::Restart) + .def("restart_at", &algorithms::ISMCTSBot::RestartAt); + + m.def("evaluate_bots", + py::overload_cast&, int>( + open_spiel::EvaluateBots), + py::arg("state"), py::arg("bots"), py::arg("seed"), + "Plays a single game with the given bots and returns the final " + "utilities."); + + m.def("make_uniform_random_bot", open_spiel::MakeUniformRandomBot, + "A uniform random bot, for test purposes."); + + m.def("make_stateful_random_bot", open_spiel::MakeStatefulRandomBot, + "A stateful random bot, for test purposes."); + m.def( + "make_policy_bot", + [](std::shared_ptr game, Player player_id, int seed, + std::shared_ptr policy) { + return MakePolicyBot(*game, player_id, seed, policy); + }, + "A bot that samples from a policy."); + + py::enum_(m, "SearchLimitType") + .value("MOVETIME", open_spiel::uci::SearchLimitType::kMoveTime) + .value("NODES", open_spiel::uci::SearchLimitType::kNodes) + .value("DEPTH", open_spiel::uci::SearchLimitType::kDepth) + .export_values(); + +#ifndef _WIN32 + m.def("make_uci_bot", open_spiel::uci::MakeUCIBot, py::arg("bot_binary_path"), + py::arg("search_limit_value"), py::arg("ponder"), py::arg("options"), + py::arg("search_limit_type") = + open_spiel::uci::SearchLimitType::kMoveTime, + py::arg("use_game_history_for_position") = false, + "Bot that can play chess using UCI chess engine."); +#endif + +#if OPEN_SPIEL_BUILD_WITH_ROSHAMBO + m.attr("ROSHAMBO_NUM_THROWS") = py::int_(open_spiel::roshambo::kNumThrows); + m.attr("ROSHAMBO_NUM_BOTS") = py::int_(open_spiel::roshambo::kNumBots); + // no arguments; returns vector of strings + m.def("roshambo_bot_names", open_spiel::roshambo::RoshamboBotNames); + // args: player_int (int), bot name (string), num throws (int), returns bot + m.def("make_roshambo_bot", open_spiel::roshambo::MakeRoshamboBot, + py::arg("player_id"), py::arg("bot_name"), + py::arg("num_throws") = open_spiel::roshambo::kNumThrows); +#endif + + m.def( + "make_simple_gin_rummy_bot", + [](const GameParameters& params, + int player_id) -> std::unique_ptr { + return std::make_unique(params, + player_id); + }, + py::arg("params"), py::arg("player_id")); +} +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/bots.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/bots.h new file mode 100644 index 0000000..e9f648e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/bots.h @@ -0,0 +1,25 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_BOTS_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_BOTS_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Python bindings for bots. +namespace open_spiel { +void init_pyspiel_bots(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_BOTS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/evaluation_sco.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/evaluation_sco.cc new file mode 100644 index 0000000..0f567e0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/evaluation_sco.cc @@ -0,0 +1,65 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/evaluation_sco.h" + +#include +#include +#include + +#include "open_spiel/evaluation/soft_condorcet_optimization.h" +#include "pybind11/include/pybind11/cast.h" +#include "pybind11/include/pybind11/pybind11.h" + +namespace py = ::pybind11; +using open_spiel::evaluation::FenchelYoungOptimizer; +using open_spiel::evaluation::Optimizer; +using open_spiel::evaluation::SoftCondorcetOptimizer; +using open_spiel::evaluation::TupleListVote; + +void open_spiel::init_pyspiel_evaluation_sco(py::module& m) { + py::module_ sco = m.def_submodule("sco"); + + // Abstract base class. Needed for inheritance of classes below. + py::classh(sco, "Optimizer"); // NOLINT. + + py::classh(sco, "SoftCondorcetOptimizer") + .def(py::init&>(), + py::arg("votes"), py::arg("rating_lower_bound"), + py::arg("rating_upper_bound"), py::arg("batch_size"), + py::arg("temperature") = 1, py::arg("rng_seed") = 0, + py::arg("compute_norm_freq") = 1000, + py::arg("initial_param_noise") = 0.0, + py::arg("alternative_names") = + static_cast&>( + std::initializer_list{})) + .def("run_solver", &Optimizer::RunSolver, py::arg("iterations"), + py::arg("learning_rate")) + .def("ratings", &Optimizer::ratings); + + py::classh(sco, "FenchelYoungOptimizer") + .def(py::init&>(), + py::arg("votes"), py::arg("rating_lower_bound"), + py::arg("rating_upper_bound"), py::arg("batch_size"), + py::arg("rng_seed") = 0, py::arg("compute_norm_freq") = 1000, + py::arg("initial_param_noise") = 0.0, py::arg("sigma") = 100.0, + py::arg("alternative_names") = + static_cast&>( + std::initializer_list{})) + .def("run_solver", &Optimizer::RunSolver, py::arg("iterations"), + py::arg("learning_rate")) + .def("ratings", &Optimizer::ratings); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/evaluation_sco.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/evaluation_sco.h new file mode 100644 index 0000000..2375cb2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/evaluation_sco.h @@ -0,0 +1,25 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_EVALUATION_SCO_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_EVALUATION_SCO_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialize the Python interface for games/negotiation. +namespace open_spiel { +void init_pyspiel_evaluation_sco(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_EVALUATION_SCO_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/game_transforms.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/game_transforms.cc new file mode 100644 index 0000000..775bd8f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/game_transforms.cc @@ -0,0 +1,82 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/game_transforms.h" + +// Python bindings for policies and algorithms handling them. + +#include +#include + +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/game_transforms/cached_tree.h" +#include "open_spiel/game_transforms/normal_form_extensive_game.h" +#include "open_spiel/game_transforms/repeated_game.h" +#include "open_spiel/game_transforms/turn_based_simultaneous_game.h" +#include "open_spiel/python/pybind11/pybind11.h" // NOLINT + +namespace open_spiel { +namespace py = ::pybind11; + +void init_pyspiel_game_transforms(py::module& m) { + m.def("load_game_as_turn_based", + py::overload_cast(&LoadGameAsTurnBased), + "Converts a simultaneous game into an turn-based game with infosets."); + + m.def("load_game_as_turn_based", + py::overload_cast( + &LoadGameAsTurnBased), + "Converts a simultaneous game into an turn-based game with infosets."); + + m.def("extensive_to_tensor_game", ExtensiveToTensorGame, + "Converts an extensive-game to its equivalent tensor game, " + "which is exponentially larger. Use only with small games."); + + m.def( + "convert_to_turn_based", + [](std::shared_ptr game) { + return ConvertToTurnBased(*game); + }, + "Returns a turn-based version of the given game."); + + m.def( + "create_repeated_game", + [](std::shared_ptr game, const GameParameters& params) { + return CreateRepeatedGame(*game, params); + }, + "Creates a repeated game from a stage game."); + + m.def("create_repeated_game", + py::overload_cast( + &CreateRepeatedGame), + "Creates a repeated game from a stage game."); + + m.def("convert_to_cached_tree", + [](std::shared_ptr game) { + return cached_tree::ConvertToCachedTree(*game); + }, + "Returns a cached tree version of the given game."); + + m.def("load_game_as_cached_tree", + py::overload_cast( + &cached_tree::LoadGameAsCachedTree), + "Loads a game as cached tree wrapped game."); + + m.def("load_game_as_cached_tree", + py::overload_cast( + &cached_tree::LoadGameAsCachedTree), + "Loads a game as cached tree wrapped game."); +} +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/game_transforms.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/game_transforms.h new file mode 100644 index 0000000..b3f28cb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/game_transforms.h @@ -0,0 +1,25 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_GAME_TRANSFORMS_PYSPIEL_H_ +#define OPEN_SPIEL_GAME_TRANSFORMS_PYSPIEL_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialze the Python interface for game transforms. +namespace open_spiel { +void init_pyspiel_game_transforms(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_GAME_TRANSFORMS_PYSPIEL_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_backgammon.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_backgammon.cc new file mode 100644 index 0000000..8945456 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_backgammon.cc @@ -0,0 +1,52 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/games_backgammon.h" + +#include "open_spiel/games/backgammon/backgammon.h" +#include "open_spiel/python/pybind11/pybind11.h" +#include "open_spiel/spiel.h" + +namespace py = ::pybind11; +using open_spiel::Game; +using open_spiel::State; +using open_spiel::backgammon::BackgammonState; +using open_spiel::backgammon::CheckerMove; + +void open_spiel::init_pyspiel_games_backgammon(py::module& m) { + py::class_(m, "CheckerMove") + .def_readwrite("pos", &CheckerMove::pos) + .def_readwrite("num", &CheckerMove::num) + .def_readwrite("hit", &CheckerMove::hit); + + py::classh(m, "BackgammonState") + .def("augment_with_hit_info", &BackgammonState::AugmentWithHitInfo) + .def("board", &BackgammonState::board) + .def("checker_moves_to_spiel_move", + &BackgammonState::CheckerMovesToSpielMove) + .def("spiel_move_to_checker_moves", + &BackgammonState::SpielMoveToCheckerMoves) + .def("translate_action", &BackgammonState::TranslateAction) + // Pickle support + .def(py::pickle( + [](const BackgammonState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(data); + return dynamic_cast( + game_and_state.second.release()); + })); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_backgammon.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_backgammon.h new file mode 100644 index 0000000..acfb23b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_backgammon.h @@ -0,0 +1,25 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_BACKGAMMON_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_BACKGAMMON_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialze the Python interface for games/negotiation. +namespace open_spiel { +void init_pyspiel_games_backgammon(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_BACKGAMMON_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_bargaining.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_bargaining.cc new file mode 100644 index 0000000..297680b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_bargaining.cc @@ -0,0 +1,87 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/games_bargaining.h" + +#include "open_spiel/games/bargaining/bargaining.h" +#include "open_spiel/python/pybind11/pybind11.h" +#include "open_spiel/spiel.h" + +namespace py = ::pybind11; +using open_spiel::Game; +using open_spiel::State; +using open_spiel::bargaining::BargainingGame; +using open_spiel::bargaining::BargainingState; +using open_spiel::bargaining::Instance; +using open_spiel::bargaining::Offer; + +void open_spiel::init_pyspiel_games_bargaining(py::module& m) { + py::module_ bargaining = m.def_submodule("bargaining"); + bargaining.attr("NumItemTypes") = &bargaining::kNumItemTypes; + bargaining.attr("PoolMinNumItems") = &bargaining::kPoolMinNumItems; + bargaining.attr("PoolMaxNumItems") = &bargaining::kPoolMaxNumItems; + bargaining.attr("TotalValueAllItems") = &bargaining::kTotalValueAllItems; + + py::class_(m, "Instance") + .def(py::init<>()) + .def_readwrite("pool", &Instance::pool) + .def_readwrite("values", &Instance::values) + .def("__str__", &Instance::ToString); + + py::class_(m, "Offer") + .def(py::init<>()) + .def_readwrite("quantities", &Offer::quantities) + .def("__str__", &Offer::ToString); + + py::classh(m, "BargainingState") + .def("instance", &BargainingState::GetInstance) + .def("offers", &BargainingState::Offers) + .def("agree_action", &BargainingState::AgreeAction) + // set_instance(instance) + .def("set_instance", &BargainingState::SetInstance) + // Pickle support + .def(py::pickle( + [](const BargainingState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(data); + return dynamic_cast( + game_and_state.second.release()); + })); + + py::classh(m, "BargainingGame") + .def("max_turns", &BargainingGame::max_turns) + .def("discount", &BargainingGame::discount) + .def("prob_end", &BargainingGame::prob_end) + .def("all_instances", &BargainingGame::AllInstances) + .def("all_offers", &BargainingGame::AllOffers) + // get_offer_by_quantities(quantities: List[int]). Returns a tuple + // of (offer, OpenSpiel action) + .def("get_offer_by_quantities", &BargainingGame::GetOfferByQuantities) + .def("get_instance_index", &BargainingGame::GetInstanceIndex) + .def("get_offer_index", &BargainingGame::GetOfferIndex) + .def("get_possible_opponent_values", + &BargainingGame::GetPossibleOpponentValues) + // Pickle support + .def(py::pickle( + [](std::shared_ptr game) { // __getstate__ + return game->ToString(); + }, + [](const std::string& data) { // __setstate__ + return std::dynamic_pointer_cast( + std::const_pointer_cast(LoadGame(data))); + })); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_bargaining.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_bargaining.h new file mode 100644 index 0000000..a2271c9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_bargaining.h @@ -0,0 +1,25 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_BARGAINING_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_BARGAINING_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialze the Python interface for games/bargaining. +namespace open_spiel { +void init_pyspiel_games_bargaining(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_BARGAINING_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_blackjack.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_blackjack.cc new file mode 100644 index 0000000..2b76253 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_blackjack.cc @@ -0,0 +1,98 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/games_blackjack.h" + +#include +#include +#include + +#include "open_spiel/games/blackjack/blackjack.h" +#include "open_spiel/spiel.h" + +namespace py = ::pybind11; +using open_spiel::Game; +using open_spiel::State; +using open_spiel::blackjack::ActionType; +using open_spiel::blackjack::Phase; +using open_spiel::blackjack::BlackjackGame; +using open_spiel::blackjack::BlackjackState; + +void open_spiel::init_pyspiel_games_blackjack(py::module& m) { + py::module_ blackjack = m.def_submodule("blackjack"); + + blackjack.attr("HIDDEN_CARD_STR") = py::str(blackjack::kHiddenCardStr); + + py::enum_(blackjack, "ActionType") + .value("HIT", ActionType::kHit) + .value("STAND", ActionType::kStand) + .export_values(); + + py::enum_(blackjack, "Phase") + .value("INITIAL_DEAL", Phase::kInitialDeal) + .value("PLAYER_TURN", Phase::kPlayerTurn) + .value("DEALER_TURN", Phase::kDealerTurn) + .export_values(); + + // args: int card; returns: string + blackjack.def("card_to_string", open_spiel::blackjack::CardToString) + // args: list of ints and a start index; returns: list of strings + .def("cards_to_strings", open_spiel::blackjack::CardsToStrings, + py::arg("cards"), py::arg("start_index") = 0) + // args: string; returns: int (-1 if invalid) + .def("get_card_by_string", open_spiel::blackjack::GetCardByString) + // args: phase; returns: string + .def("phase_to_string", open_spiel::blackjack::PhaseToString); + + py::classh(blackjack, "BlackjackState") + .def("dealer_id", &BlackjackState::DealerId) // no args + // args: int player; returns: int + .def("get_best_player_total", &BlackjackState::GetBestPlayerTotal) + // args: int player, returns: list of ints + .def("cards", &BlackjackState::cards) + // args: none; returns: phase + .def("phase", &BlackjackState::phase) + // args: int player + .def("visible_cards_sorted_vector", + &BlackjackState::VisibleCardsSortedVector) + // args: none, returns: int + .def("dealers_visible_card", &BlackjackState::DealersVisibleCard) + // args: none, returns: list of ints + .def("player_cards_sorted_vector", + &BlackjackState::PlayerCardsSortedVector) + // args: int player + .def("is_turn_over", &BlackjackState::IsTurnOver) + // Pickle support + .def(py::pickle( + [](const BlackjackState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(data); + return dynamic_cast( + game_and_state.second.release()); + })); + + py::classh(blackjack, "BlackjackGame") + // Pickle support + .def(py::pickle( + [](std::shared_ptr game) { // __getstate__ + return game->ToString(); + }, + [](const std::string& data) { // __setstate__ + return std::dynamic_pointer_cast( + std::const_pointer_cast(LoadGame(data))); + })); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_blackjack.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_blackjack.h new file mode 100644 index 0000000..3968b0d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_blackjack.h @@ -0,0 +1,25 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_BLACKJACK_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_BLACKJACK_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialize the Python interface for blackjack. +namespace open_spiel { +void init_pyspiel_games_blackjack(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_BLACKJACK_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_bridge.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_bridge.cc new file mode 100644 index 0000000..16f4acf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_bridge.cc @@ -0,0 +1,77 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/games_bridge.h" + +#include + +#include "open_spiel/games/bridge/bridge.h" +#include "open_spiel/python/pybind11/pybind11.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { + +namespace py = ::pybind11; +using bridge::BridgeGame; +using bridge::BridgeState; + +void init_pyspiel_games_bridge(py::module& m) { + py::classh(m, "BridgeState") + .def("contract_index", &BridgeState::ContractIndex) + .def("possible_contracts", &BridgeState::PossibleContracts) + .def("score_by_contract", &BridgeState::ScoreByContract) + .def("score_for_contracts", &BridgeState::ScoreForContracts) + .def("current_phase", &BridgeState::CurrentPhase) + .def("write_observation_tensor", + [](const BridgeState& state, + py::array_t array) { + py::buffer_info buf = array.request(); + SPIEL_CHECK_EQ(buf.ndim, 1); + SPIEL_CHECK_EQ(buf.strides.front(), buf.itemsize); + state.WriteObservationTensor( + state.CurrentPlayer(), + absl::MakeSpan(static_cast(buf.ptr), + buf.shape.front())); + }) + .def("private_observation_tensor", &BridgeState::PrivateObservationTensor) + .def("public_observation_tensor", &BridgeState::PublicObservationTensor) + // Pickle support + .def(py::pickle( + [](const BridgeState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(data); + return dynamic_cast(game_and_state.second.release()); + })); + + py::classh(m, "BridgeGame") + .def("num_possible_contracts", &BridgeGame::NumPossibleContracts) + .def("contract_string", &BridgeGame::ContractString) + .def("private_observation_tensor_size", + &BridgeGame::PrivateObservationTensorSize) + .def("public_observation_tensor_size", + &BridgeGame::PublicObservationTensorSize) + // Pickle support + .def(py::pickle( + [](std::shared_ptr game) { // __getstate__ + return game->ToString(); + }, + [](const std::string& data) { // __setstate__ + return std::dynamic_pointer_cast( + std::const_pointer_cast(LoadGame(data))); + })); +} +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_bridge.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_bridge.h new file mode 100644 index 0000000..cd5d5ad --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_bridge.h @@ -0,0 +1,25 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_BRIDGE_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_BRIDGE_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialize the Python interface for bridge. +namespace open_spiel { +void init_pyspiel_games_bridge(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_BRIDGE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_chess.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_chess.cc new file mode 100644 index 0000000..49d10ef --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_chess.cc @@ -0,0 +1,127 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/games_chess.h" + +#include +#include +#include + +#include "open_spiel/games/chess/chess.h" +#include "open_spiel/games/chess/chess_board.h" +#include "open_spiel/games/chess/chess_common.h" +#include "open_spiel/spiel.h" +#include "pybind11/include/pybind11/cast.h" +#include "pybind11/include/pybind11/pybind11.h" + +namespace py = ::pybind11; +using open_spiel::Game; +using open_spiel::State; +using open_spiel::chess::ChessGame; +using open_spiel::chess::ChessState; +using open_spiel::chess::ChessBoard; +using open_spiel::chess::Color; +using open_spiel::chess::Square; +using open_spiel::chess::Piece; +using open_spiel::chess::PieceType; +using open_spiel::chess::Move; + +void open_spiel::init_pyspiel_games_chess(py::module& m) { + py::module_ chess = m.def_submodule("chess"); + + py::enum_(chess, "Color") + .value("BLACK", Color::kBlack) + .value("WHITE", Color::kWhite) + .value("EMPTY", Color::kEmpty) + .export_values(); + + py::enum_(chess, "PieceType") + .value("EMPTY", PieceType::kEmpty) + .value("KING", PieceType::kKing) + .value("QUEEN", PieceType::kQueen) + .value("ROOK", PieceType::kRook) + .value("BISHOP", PieceType::kBishop) + .value("KNIGHT", PieceType::kKnight) + .value("PAWN", PieceType::kPawn) + .export_values(); + + py::class_(chess, "Piece") + .def(py::init<>()) + .def_readonly("color", &Piece::color) + .def_readonly("type", &Piece::type); + + py::class_(chess, "Square") + .def(py::init<>()) + .def_readonly("x", &Square::x) + .def_readonly("y", &Square::y); + + py::class_(chess, "Move") + .def(py::init<>()) + .def_readonly("from_square", &Move::from) // "from" is a python keyword + .def_readonly("to_square", &Move::to) + .def_readonly("piece", &Move::piece) + .def_readonly("promotion_type", &Move::promotion_type) + .def("is_castling", &Move::is_castling) + .def("to_string", &Move::ToString) + .def("to_san", &Move::ToSAN) + .def("to_lan", &Move::ToLAN, py::arg("chess960") = false, + py::arg("board") = nullptr); + + py::classh(chess, "ChessBoard") + .def("has_legal_moves", &ChessBoard::HasLegalMoves) + .def("debug_string", &ChessBoard::DebugString, + py::arg("shredder_fen") = false) + .def("to_fen", &ChessBoard::ToFEN, py::arg("shredder") = false) + .def("to_unicode_string", &ChessBoard::ToUnicodeString); + + py::classh(m, "ChessState") + .def("board", py::overload_cast<>(&ChessState::Board)) + .def("debug_string", &ChessState::DebugString) + .def("is_repetition_draw", &ChessState::IsRepetitionDraw) + .def("moves_history", py::overload_cast<>(&ChessState::MovesHistory)) + // num_repetitions(state: ChessState) -> int + .def("num_repetitions", &ChessState::NumRepetitions) + .def("parse_move_to_action", &ChessState::ParseMoveToAction) + // Pickle support + .def(py::pickle( + [](const ChessState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(data); + return dynamic_cast(game_and_state.second.release()); + })); + + py::classh(m, "ChessGame") + .def("is_chess960", &ChessGame::IsChess960) + // Pickle support + .def(py::pickle( + [](std::shared_ptr game) { // __getstate__ + return game->ToString(); + }, + [](const std::string& data) { // __setstate__ + return std::dynamic_pointer_cast( + std::const_pointer_cast(LoadGame(data))); + })); + + // action_to_move(action: int, board: ChessBoard, chess960: bool = false) + chess.def("action_to_move", &chess::ActionToMove, py::arg("action"), + py::arg("board")); + + // move_to_action(move: Move, board_size: int = default_size, + // chess960: bool = false) + chess.def("move_to_action", &chess::MoveToAction, + py::arg("move"), py::arg("board_size") = chess::kDefaultBoardSize); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_chess.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_chess.h new file mode 100644 index 0000000..e3c46d6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_chess.h @@ -0,0 +1,25 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_CHESS_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_CHESS_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialze the Python interface for games/negotiation. +namespace open_spiel { +void init_pyspiel_games_chess(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_CHESS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_colored_trails.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_colored_trails.cc new file mode 100644 index 0000000..3e8dc62 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_colored_trails.cc @@ -0,0 +1,121 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/games_colored_trails.h" + +#include + +#include "open_spiel/games/colored_trails/colored_trails.h" +#include "open_spiel/python/pybind11/pybind11.h" +#include "open_spiel/spiel.h" + +namespace py = ::pybind11; +using open_spiel::Game; +using open_spiel::State; +using open_spiel::colored_trails::ColoredTrailsGame; +using open_spiel::colored_trails::ColoredTrailsState; +using open_spiel::colored_trails::Trade; +using open_spiel::colored_trails::Board; + +using open_spiel::colored_trails::kDefaultNumColors; +using open_spiel::colored_trails::kNumChipsLowerBound; +using open_spiel::colored_trails::kNumChipsUpperBound; + +void open_spiel::init_pyspiel_games_colored_trails(py::module& m) { + m.attr("NUM_COLORS") = py::int_(kDefaultNumColors); + m.attr("NUM_CHIPS_LOWER_BOUND") = py::int_(kNumChipsLowerBound); + m.attr("NUM_CHIPS_UPPER_BOUND") = py::int_(kNumChipsUpperBound); + + py::class_(m, "Trade") + // arguments: giving, receiving + .def(py::init&, const std::vector&>()) + .def_readwrite("giving", &Trade::giving) + .def_readwrite("receiving", &Trade::receiving) + .def("to_string", &Trade::ToString) + .def("__str__", &Trade::ToString); + + py::class_(m, "Board") + .def(py::init<>()) + // arguments: size, num_colors, num_players + .def(py::init()) + .def_readonly("size", &Board::size) + .def_readonly("num_colors", &Board::num_colors) + .def_readonly("num_players", &Board::num_players) + // one-dimensional list in row-major form, contains colors of each cell + .def_readonly("board", &Board::board) + // list integers, one per player, for the number of chips they have + .def_readonly("num_chips", &Board::num_chips) + // list of lists, one per player, of the actual chips that player has + .def_readonly("chips", &Board::chips) + // list if positions of the players and the flag (the last element) + .def_readonly("positions", &Board::positions) + // arguments: (player: List[int], trade: trade) + .def("apply_trade", &Board::ApplyTrade) + // no arguments; returns a clone of this board + .def("clone", &Board::Clone) + // in_bounds(row, col); returns true/false + .def("in_bounds", &Board::InBounds) + // return a string description of the board, as in the instances file + .def("to_string", &Board::ToString) + // returns a nicer representation of the board as a string + .def("pretty_board_string", &Board::PrettyBoardString); + + py::classh(m, "ColoredTrailsState") + .def("get_board", &ColoredTrailsState::board) + // arguments: none, returns list of current proposals (in order made) + .def("get_proposals", &ColoredTrailsState::proposals) + // arguments: (player: int, chips: List[int], proposal: Trade, + // rng_rolls: List[float]), returns nothing. + .def("set_chips_and_trade_proposals", + &ColoredTrailsState::SetChipsAndTradeProposal) + // Pickle support + .def(py::pickle( + [](const ColoredTrailsState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(data); + return dynamic_cast( + game_and_state.second.release()); + })); + + py::classh(m, "ColoredTrailsGame") + // arguments(trade_action: int); returns Trade + .def("lookup_trade", &ColoredTrailsGame::LookupTrade) + // arguments (player: int); returns responder action to trade with player + .def("responder_trade_with_player_action", + &ColoredTrailsGame::ResponderTradeWithPlayerAction) + // no arguments; returns the pass action + .def("pass_action", &ColoredTrailsGame::PassAction) + // arguments (seed: int, board: Board, player: int) + // returns: (board, action) + .def("sample_random_board_completion", + &ColoredTrailsGame::SampleRandomBoardCompletion) + // Pickle support + .def(py::pickle( + [](std::shared_ptr game) { // __getstate__ + return game->ToString(); + }, + [](const std::string& data) { // __setstate__ + return std::dynamic_pointer_cast( + std::const_pointer_cast(LoadGame(data))); + })); + + // arguments: (player: int, board: board). Returns the gain of the player. + m.def("score", &colored_trails::Score); + + // arguments: (combo: List[int]) + m.def("combo_to_string", &colored_trails::ComboToString); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_colored_trails.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_colored_trails.h new file mode 100644 index 0000000..729b7d5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_colored_trails.h @@ -0,0 +1,25 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_COLORED_TRAILS_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_COLORED_TRAILS_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialze the Python interface for games/negotiation. +namespace open_spiel { +void init_pyspiel_games_colored_trails(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_BARGAINING_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_dots_and_boxes.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_dots_and_boxes.cc new file mode 100644 index 0000000..eca2273 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_dots_and_boxes.cc @@ -0,0 +1,45 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/games_dots_and_boxes.h" + +#include +#include +#include + +#include "open_spiel/games/dots_and_boxes/dots_and_boxes.h" +#include "open_spiel/spiel.h" +#include "pybind11/include/pybind11/pybind11.h" + + +namespace py = ::pybind11; +using open_spiel::Game; +using open_spiel::State; +using open_spiel::dots_and_boxes::DotsAndBoxesState; + +void open_spiel::init_pyspiel_games_dots_and_boxes(py::module& m) { + py::classh(m, "DotsAndBoxesState") + .def("dbn_string", &DotsAndBoxesState::DbnString) + // Pickle support + .def(py::pickle( + [](const DotsAndBoxesState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(data); + return dynamic_cast( + game_and_state.second.release()); + })); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_dots_and_boxes.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_dots_and_boxes.h new file mode 100644 index 0000000..a15691b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_dots_and_boxes.h @@ -0,0 +1,25 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_DOTS_AND_BOXES_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_DOTS_AND_BOXES_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialze the Python interface for games/negotiation. +namespace open_spiel { +void init_pyspiel_games_dots_and_boxes(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_DOTS_AND_BOXES_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_euchre.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_euchre.cc new file mode 100644 index 0000000..5edad14 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_euchre.cc @@ -0,0 +1,134 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/games_euchre.h" + +#include + +#include "open_spiel/games/euchre/euchre.h" +#include "open_spiel/python/pybind11/pybind11.h" +#include "open_spiel/spiel.h" + +// Several function return absl::optional or lists of absl::optional, so must +// use pybind11_abseil here. +#include "pybind11/include/pybind11/detail/common.h" +#include "pybind11_abseil/absl_casters.h" + +namespace open_spiel { + +namespace py = ::pybind11; +using euchre::EuchreGame; +using euchre::EuchreState; + +void init_pyspiel_games_euchre(py::module& m) { + py::module_ euchre = m.def_submodule("euchre"); + + euchre.attr("JACK_RANK") = py::int_(euchre::kJackRank); + euchre.attr("NUM_SUITS") = py::int_(euchre::kNumSuits); + euchre.attr("NUM_CARDS_PER_SUIT") = py::int_(euchre::kNumCardsPerSuit); + euchre.attr("NUM_CARDS") = py::int_(euchre::kNumCards); + euchre.attr("PASS_ACTION") = py::int_(euchre::kPassAction); + euchre.attr("CLUBS_TRUMP_ACTION") = py::int_(euchre::kClubsTrumpAction); + euchre.attr("DIAMONDS_TRUMP_ACTION") = py::int_(euchre::kDiamondsTrumpAction); + euchre.attr("HEARTS_TRUMP_ACTION") = py::int_(euchre::kHeartsTrumpAction); + euchre.attr("SPADES_TRUMP_ACTION") = py::int_(euchre::kSpadesTrumpAction); + euchre.attr("GO_ALONE_ACTION") = py::int_(euchre::kGoAloneAction); + euchre.attr("PLAY_WITH_PARTNER_ACTION") = py::int_( + euchre::kPlayWithPartnerAction); + euchre.attr("MAX_BIDS") = py::int_(euchre::kMaxBids); + euchre.attr("NUM_TRICKS") = py::int_(euchre::kNumTricks); + euchre.attr("FULL_HAND_SIZE") = py::int_(euchre::kFullHandSize); + + euchre.def("card_string", euchre::CardString); + euchre.def("card_rank", py::overload_cast(euchre::CardRank)); + euchre.def("card_rank", + py::overload_cast(euchre::CardRank)); + euchre.def("card_suit", py::overload_cast(euchre::CardSuit)); + euchre.def("card_suit", + py::overload_cast(euchre::CardSuit)); + + py::enum_(euchre, "Suit") + .value("INVALID_SUIT", euchre::Suit::kInvalidSuit) + .value("CLUBS", euchre::Suit::kClubs) + .value("DIAMONDS", euchre::Suit::kDiamonds) + .value("HEARTS", euchre::Suit::kHearts) + .value("SPADES", euchre::Suit::kSpades) + .export_values(); + + py::enum_(euchre, "Phase") + .value("DEALER_SELECTION", euchre::Phase::kDealerSelection) + .value("DEAL", euchre::Phase::kDeal) + .value("BIDDING", euchre::Phase::kBidding) + .value("DISCARD", euchre::Phase::kDiscard) + .value("GO_ALONE", euchre::Phase::kGoAlone) + .value("PLAY", euchre::Phase::kPlay) + .value("GAME_OVER", euchre::Phase::kGameOver) + .export_values(); + + py::classh state_class(euchre, "EuchreState"); + state_class + .def("num_cards_dealt", &EuchreState::NumCardsDealt) + .def("num_cards_played", &EuchreState::NumCardsPlayed) + .def("num_passes", &EuchreState::NumPasses) + .def("upcard", &EuchreState::Upcard) + .def("discard", &EuchreState::Discard) + .def("trump_suit", &EuchreState::TrumpSuit) + .def("left_bower", &EuchreState::LeftBower) + .def("right_bower", &EuchreState::RightBower) + .def("declarer", &EuchreState::Declarer) + .def("declarer_partner", &EuchreState::DeclarerPartner) + .def("first_defender", &EuchreState::FirstDefender) + .def("second_defender", &EuchreState::SecondDefender) + .def("declarer_go_alone", &EuchreState::DeclarerGoAlone) + .def("lone_defender", &EuchreState::LoneDefender) + .def("active_players", &EuchreState::ActivePlayers) + .def("dealer", &EuchreState::Dealer) + .def("current_phase", &EuchreState::CurrentPhase) + .def("current_trick_index", &EuchreState::CurrentTrickIndex) + .def("current_trick", + py::overload_cast<>(&EuchreState::CurrentTrick, py::const_)) + .def("card_holder", &EuchreState::CardHolder) + .def("tricks", &EuchreState::Tricks) + // Pickle support + .def(py::pickle( + [](const EuchreState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(data); + return dynamic_cast(game_and_state.second.release()); + })); + + py::class_(state_class, "Trick") + .def("winning_card", &euchre::Trick::WinningCard) + .def("led_suit", &euchre::Trick::LedSuit) + .def("trump_suit", &euchre::Trick::TrumpSuit) + .def("trump_played", &euchre::Trick::TrumpPlayed) + .def("leader", &euchre::Trick::Leader) + .def("winner", &euchre::Trick::Winner) + .def("cards", &euchre::Trick::Cards); + + py::classh(m, "EuchreGame") + // Pickle support + .def(py::pickle( + [](std::shared_ptr game) { // __getstate__ + return game->ToString(); + }, + [](const std::string& data) { // __setstate__ + return std::dynamic_pointer_cast( + std::const_pointer_cast(LoadGame(data))); + })); +} +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_euchre.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_euchre.h new file mode 100644 index 0000000..d5b0c22 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_euchre.h @@ -0,0 +1,25 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_EUCHRE_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_EUCHRE_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialize the Python interface for euchre. +namespace open_spiel { +void init_pyspiel_games_euchre(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_EUCHRE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_gin_rummy.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_gin_rummy.cc new file mode 100644 index 0000000..b788a57 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_gin_rummy.cc @@ -0,0 +1,154 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/games_gin_rummy.h" + +#include +#include + +#include "open_spiel/games/gin_rummy/gin_rummy.h" +#include "open_spiel/games/gin_rummy/gin_rummy_utils.h" +#include "open_spiel/python/pybind11/pybind11.h" +#include "open_spiel/spiel.h" + +// Several function return absl::optional or lists of absl::optional, so must +// use pybind11_abseil here. +#include "pybind11/include/pybind11/detail/common.h" +#include "pybind11_abseil/absl_casters.h" + +namespace open_spiel { + +namespace py = ::pybind11; +using gin_rummy::GinRummyGame; +using gin_rummy::GinRummyState; +using gin_rummy::GinRummyUtils; + +void init_pyspiel_games_gin_rummy(py::module& m) { + py::module_ gin_rummy = m.def_submodule("gin_rummy"); + + gin_rummy.attr("DEFAULT_NUM_RANKS") = py::int_(gin_rummy::kDefaultNumRanks); + gin_rummy.attr("DEFAULT_NUM_SUITS") = py::int_(gin_rummy::kDefaultNumSuits); + gin_rummy.attr("DEFAULT_NUM_CARDS") = py::int_(gin_rummy::kDefaultNumCards); + gin_rummy.attr("NUM_PLAYERS") = py::int_(gin_rummy::kNumPlayers); + gin_rummy.attr("MAX_POSSIBLE_DEADWOOD") = py::int_( + gin_rummy::kMaxPossibleDeadwood); + gin_rummy.attr("MAX_NUM_DRAW_UPCARD_ACTIONS") = py::int_( + gin_rummy::kMaxNumDrawUpcardActions); + gin_rummy.attr("DEFAULT_HAND_SIZE") = py::int_(gin_rummy::kDefaultHandSize); + gin_rummy.attr("WALL_STOCK_SIZE") = py::int_(gin_rummy::kWallStockSize); + gin_rummy.attr("DEFAULT_KNOCK_CARD") = py::int_(gin_rummy::kDefaultKnockCard); + gin_rummy.attr("DEFAULT_GIN_BONUS") = py::int_(gin_rummy::kDefaultGinBonus); + gin_rummy.attr("DEFAULT_UNDERCUT_BONUS") = py::int_( + gin_rummy::kDefaultUndercutBonus); + gin_rummy.attr("DRAW_UPCARD_ACTION") = py::int_(gin_rummy::kDrawUpcardAction); + gin_rummy.attr("DRAW_STOCK_ACTION") = py::int_(gin_rummy::kDrawStockAction); + gin_rummy.attr("PASS_ACTION") = py::int_(gin_rummy::kPassAction); + gin_rummy.attr("KNOCK_ACTION") = py::int_(gin_rummy::kKnockAction); + gin_rummy.attr("MELD_ACTION_BASE") = py::int_(gin_rummy::kMeldActionBase); + gin_rummy.attr("NUM_MELD_ACTIONS") = py::int_(gin_rummy::kNumMeldActions); + gin_rummy.attr("NUM_DISTINCT_ACTIONS") = py::int_( + gin_rummy::kNumDistinctActions); + gin_rummy.attr("OBSERVATION_TENSOR_SIZE") = py::int_( + gin_rummy::kObservationTensorSize); + + py::enum_(gin_rummy, "Phase") + .value("DEAL", gin_rummy::Phase::kDeal) + .value("FIRST_UPCARD", gin_rummy::Phase::kFirstUpcard) + .value("DRAW", gin_rummy::Phase::kDraw) + .value("DISCARD", gin_rummy::Phase::kDiscard) + .value("KNOCK", gin_rummy::Phase::kKnock) + .value("LAYOFF", gin_rummy::Phase::kLayoff) + .value("WALL", gin_rummy::Phase::kWall) + .value("GAME_OVER", gin_rummy::Phase::kGameOver) + .export_values(); + + py::classh state_class(gin_rummy, "GinRummyState"); + state_class + .def("current_phase", &GinRummyState::CurrentPhase) + .def("current_player", &GinRummyState::CurrentPlayer) + .def("finished_layoffs", &GinRummyState::FinishedLayoffs) + .def("upcard", &GinRummyState::Upcard) + .def("stock_size", &GinRummyState::StockSize) + .def("hands", &GinRummyState::Hands) + .def("discard_pile", &GinRummyState::DiscardPile) + .def("deadwood", &GinRummyState::Deadwood) + .def("knocked", &GinRummyState::Knocked) + .def("pass_on_first_upcard", &GinRummyState::PassOnFirstUpcard) + .def("layed_melds", &GinRummyState::LayedMelds) + .def("layoffs", &GinRummyState::Layoffs) + // Pickle support + .def(py::pickle( + [](const GinRummyState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(data); + return dynamic_cast( + game_and_state.second.release()); + })); + + py::classh(m, "GinRummyGame") + .def("oklahoma", &GinRummyGame::Oklahoma) + .def("knock_card", &GinRummyGame::KnockCard) + // Pickle support + .def(py::pickle( + [](std::shared_ptr game) { // __getstate__ + return game->ToString(); + }, + [](const std::string& data) { // __setstate__ + return std::dynamic_pointer_cast( + std::const_pointer_cast(LoadGame(data))); + })); + + py::class_(gin_rummy, "GinRummyUtils") + .def(py::init()) + .def("card_string", &GinRummyUtils::CardString) + .def("hand_to_string", &GinRummyUtils::HandToString) + .def("card_int", &GinRummyUtils::CardInt) + .def("card_ints_to_card_strings", &GinRummyUtils::CardIntsToCardStrings) + .def("card_strings_to_card_ints", &GinRummyUtils::CardStringsToCardInts) + .def("card_value", &GinRummyUtils::CardValue) + .def("total_card_value", + py::overload_cast( + &GinRummyUtils::TotalCardValue, py::const_)) + .def("total_card_value", + py::overload_cast( + &GinRummyUtils::TotalCardValue, py::const_)) + .def("card_rank", &GinRummyUtils::CardRank) + .def("card_suit", &GinRummyUtils::CardSuit) + .def("is_consecutive", &GinRummyUtils::IsConsecutive) + .def("is_rank_meld", &GinRummyUtils::IsRankMeld) + .def("is_suit_meld", &GinRummyUtils::IsSuitMeld) + .def("rank_melds", &GinRummyUtils::RankMelds) + .def("suit_melds", &GinRummyUtils::SuitMelds) + .def("all_melds", &GinRummyUtils::AllMelds) + .def("all_meld_groups", &GinRummyUtils::AllMeldGroups) + .def("best_meld_group", &GinRummyUtils::BestMeldGroup) + .def("min_deadwood", + py::overload_cast>( + &GinRummyUtils::MinDeadwood, py::const_)) + .def("min_deadwood", + py::overload_cast( + &GinRummyUtils::MinDeadwood, py::const_)) + .def("rank_meld_layoff", &GinRummyUtils::RankMeldLayoff) + .def("suit_meld_layoffs", &GinRummyUtils::SuitMeldLayoffs) + .def("legal_melds", &GinRummyUtils::LegalMelds) + .def("legal_discards", &GinRummyUtils::LegalDiscards) + .def("all_layoffs", &GinRummyUtils::AllLayoffs) + .def_readonly("int_to_meld", &GinRummyUtils::int_to_meld) + .def("meld_to_int", &GinRummyUtils::MeldToInt); +} +} // namespace open_spiel + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_gin_rummy.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_gin_rummy.h new file mode 100644 index 0000000..d5bbb66 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_gin_rummy.h @@ -0,0 +1,25 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_GIN_RUMMY_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_GIN_RUMMY_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialize the Python interface for gin_rummy. +namespace open_spiel { +void init_pyspiel_games_gin_rummy(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_GIN_RUMMY_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_kuhn_poker.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_kuhn_poker.cc new file mode 100644 index 0000000..0dc4425 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_kuhn_poker.cc @@ -0,0 +1,25 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/games_kuhn_poker.h" + +#include "open_spiel/games/kuhn_poker/kuhn_poker.h" +#include "open_spiel/python/pybind11/pybind11.h" + +namespace py = ::pybind11; + +void open_spiel::init_pyspiel_games_kuhn_poker(py::module& m) { + py::module sub = m.def_submodule("kuhn_poker"); + sub.def("get_optimal_policy", &kuhn_poker::GetOptimalPolicy); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_kuhn_poker.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_kuhn_poker.h new file mode 100644 index 0000000..3325c28 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_kuhn_poker.h @@ -0,0 +1,25 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_KUHN_POKER_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_KUHN_POKER_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialze the Python interface for games/negotiation. +namespace open_spiel { +void init_pyspiel_games_kuhn_poker(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_KUHN_POKER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_leduc_poker.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_leduc_poker.cc new file mode 100644 index 0000000..8c1c181 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_leduc_poker.cc @@ -0,0 +1,63 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/games_leduc_poker.h" + +#include "open_spiel/games/leduc_poker/leduc_poker.h" +#include "open_spiel/python/pybind11/pybind11.h" +#include "open_spiel/spiel.h" + +namespace py = ::pybind11; +using open_spiel::Game; +using open_spiel::State; +using open_spiel::leduc_poker::LeducState; +using open_spiel::leduc_poker::ActionType; + +void open_spiel::init_pyspiel_games_leduc_poker(py::module& m) { + py::module_ leduc_poker = m.def_submodule("leduc_poker"); + + leduc_poker.attr("INVALID_CARD") = py::int_( + open_spiel::leduc_poker::kInvalidCard); + + py::enum_(leduc_poker, "ActionType") + .value("FOLD", ActionType::kFold) + .value("CALL", ActionType::kCall) + .value("RAISE", ActionType::kRaise) + .export_values(); + + py::classh(leduc_poker, "LeducState") + // Gets the private cards; no arguments, returns vector of ints. + .def("get_private_cards", &LeducState::GetPrivateCards) + // Sets the private cards; takes a vector of ints, no returns. + .def("set_private_cards", &LeducState::SetPrivateCards) + // Expose additional state features. + .def("private_card", &LeducState::private_card) + .def("public_card", &LeducState::public_card) + .def("round", &LeducState::round) + .def("money", &LeducState::GetMoney) + .def("pot", &LeducState::GetPot) + .def("round1", &LeducState::GetRound1) + .def("round2", &LeducState::GetRound2) + // Pickle support + .def(py::pickle( + [](const LeducState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(data); + return dynamic_cast( + game_and_state.second.release()); + })); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_leduc_poker.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_leduc_poker.h new file mode 100644 index 0000000..04e29dc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_leduc_poker.h @@ -0,0 +1,25 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_LEDUC_POKER_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_LEDUC_POKER_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialze the Python interface for games/negotiation. +namespace open_spiel { +void init_pyspiel_games_leduc_poker(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_LEDUC_POKER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_negotiation.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_negotiation.cc new file mode 100644 index 0000000..2064051 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_negotiation.cc @@ -0,0 +1,44 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/games_negotiation.h" + +#include "open_spiel/games/negotiation/negotiation.h" +#include "open_spiel/python/pybind11/pybind11.h" +#include "open_spiel/spiel.h" + +namespace py = ::pybind11; +using open_spiel::Game; +using open_spiel::State; +using open_spiel::negotiation::NegotiationState; + +void open_spiel::init_pyspiel_games_negotiation(py::module& m) { + py::classh(m, "NegotiationState") + .def("item_pool", + (const std::vector& (NegotiationState::*)() const) & + NegotiationState::ItemPool) + .def("agent_utils", [](const NegotiationState& state, + int player) { return state.AgentUtils()[player]; }) + // Pickle support + .def(py::pickle( + [](const NegotiationState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(data); + return dynamic_cast( + game_and_state.second.release()); + })); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_negotiation.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_negotiation.h new file mode 100644 index 0000000..6e8d2a8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_negotiation.h @@ -0,0 +1,25 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_NEGOTIATION_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_NEGOTIATION_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialze the Python interface for games/negotiation. +namespace open_spiel { +void init_pyspiel_games_negotiation(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_NEGOTIATION_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_spades.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_spades.cc new file mode 100644 index 0000000..dcf00ae --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_spades.cc @@ -0,0 +1,83 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/games_spades.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/games/spades/spades.h" +#include "open_spiel/python/pybind11/pybind11.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { + +namespace py = ::pybind11; +using spades::SpadesGame; +using spades::SpadesState; + +void init_pyspiel_games_spades(py::module& m) { + py::classh(m, "SpadesState") + .def("get_current_scores", &SpadesState::GetCurrentScores) + .def("set_current_scores", &SpadesState::SetCurrentScores) + .def("is_game_over", &SpadesState::IsGameOver) + .def("set_current_player", &SpadesState::SetCurrentPlayer) + .def("contract_indexes", &SpadesState::ContractIndexes) + .def("possible_contracts", &SpadesState::PossibleContracts) + .def("current_phase", &SpadesState::CurrentPhase) + .def("write_observation_tensor", + [](const SpadesState& state, + py::array_t array) { + py::buffer_info buf = array.request(); + SPIEL_CHECK_EQ(buf.ndim, 1); + SPIEL_CHECK_EQ(buf.strides.front(), buf.itemsize); + state.WriteObservationTensor( + state.CurrentPlayer(), + absl::MakeSpan(static_cast(buf.ptr), + buf.shape.front())); + }) + .def("private_observation_tensor", &SpadesState::PrivateObservationTensor) + .def("public_observation_tensor", &SpadesState::PublicObservationTensor) + // Pickle support + .def(py::pickle( + [](const SpadesState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(data); + return dynamic_cast(game_and_state.second.release()); + })); + + py::classh(m, "SpadesGame") + .def("num_possible_contracts", &SpadesGame::NumPossibleContracts) + .def("contract_string", &SpadesGame::ContractString) + .def("private_observation_tensor_size", + &SpadesGame::PrivateObservationTensorSize) + .def("public_observation_tensor_size", + &SpadesGame::PublicObservationTensorSize) + // Pickle support + .def(py::pickle( + [](std::shared_ptr game) { // __getstate__ + return game->ToString(); + }, + [](const std::string& data) { // __setstate__ + return std::dynamic_pointer_cast( + std::const_pointer_cast(LoadGame(data))); + })); +} +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_spades.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_spades.h new file mode 100644 index 0000000..a3b5152 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_spades.h @@ -0,0 +1,25 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_SPADES_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_SPADES_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialize the Python interface for spades. +namespace open_spiel { +void init_pyspiel_games_spades(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_SPADES_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tarok.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tarok.cc new file mode 100644 index 0000000..9cce775 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tarok.cc @@ -0,0 +1,76 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/games/tarok/tarok.h" +#include "open_spiel/python/pybind11/pybind11.h" + +namespace open_spiel { + +namespace py = ::pybind11; +using tarok::TarokState; + +void init_pyspiel_games_tarok(py::module& m) { + // state object + py::classh tarok_state(m, "TarokState"); + tarok_state.def("card_action_to_string", + &TarokState::CardActionToString); + tarok_state.def("current_game_phase", &TarokState::CurrentGamePhase); + tarok_state.def("player_cards", &TarokState::PlayerCards); + tarok_state.def("selected_contract", + &TarokState::SelectedContractName); + tarok_state.def("talon", &TarokState::Talon); + tarok_state.def("talon_sets", &TarokState::TalonSets); + tarok_state.def("trick_cards", &TarokState::TrickCards); + tarok_state.def("captured_mond_penalties", + &TarokState::CapturedMondPenalties); + tarok_state.def("scores_without_captured_mond_penalties", + &TarokState::ScoresWithoutCapturedMondPenalties); + tarok_state.def(py::pickle( + [](const TarokState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(data); + return dynamic_cast(game_and_state.second.release()); + })); + + // game phase object + py::enum_ game_phase(m, "TarokGamePhase"); + game_phase.value("CARD_DEALING", tarok::GamePhase::kCardDealing); + game_phase.value("BIDDING", tarok::GamePhase::kBidding); + game_phase.value("KING_CALLING", tarok::GamePhase::kKingCalling); + game_phase.value("TALON_EXCHANGE", tarok::GamePhase::kTalonExchange); + game_phase.value("TRICKS_PLAYING", tarok::GamePhase::kTricksPlaying); + game_phase.value("FINISHED", tarok::GamePhase::kFinished); + + // contract name object + py::enum_ contract(m, "TarokContract"); + contract.value("KLOP", tarok::ContractName::kKlop); + contract.value("THREE", tarok::ContractName::kThree); + contract.value("TWO", tarok::ContractName::kTwo); + contract.value("ONE", tarok::ContractName::kOne); + contract.value("SOLO_THREE", tarok::ContractName::kSoloThree); + contract.value("SOLO_TWO", tarok::ContractName::kSoloTwo); + contract.value("SOLO_ONE", tarok::ContractName::kSoloOne); + contract.value("BEGGAR", tarok::ContractName::kBeggar); + contract.value("SOLO_WITHOUT", tarok::ContractName::kSoloWithout); + contract.value("OPEN_BEGGAR", tarok::ContractName::kOpenBeggar); + contract.value("COLOUR_VALAT_WITHOUT", + tarok::ContractName::kColourValatWithout); + contract.value("VALAT_WITHOUT", tarok::ContractName::kValatWithout); + contract.value("NOT_SELECTED", tarok::ContractName::kNotSelected); +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tarok.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tarok.h new file mode 100644 index 0000000..b8e56cf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tarok.h @@ -0,0 +1,26 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_TAROK_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_TAROK_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +namespace open_spiel { + +void init_pyspiel_games_tarok(::pybind11::module& m); + +} // namespace open_spiel + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_TAROK_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tic_tac_toe.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tic_tac_toe.cc new file mode 100644 index 0000000..32afa41 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tic_tac_toe.cc @@ -0,0 +1,64 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/games_tic_tac_toe.h" + +#include +#include +#include + +#include "open_spiel/games/tic_tac_toe/tic_tac_toe.h" +#include "open_spiel/spiel.h" + +namespace py = ::pybind11; +using open_spiel::Game; +using open_spiel::State; +using open_spiel::tic_tac_toe::CellState; +using open_spiel::tic_tac_toe::TicTacToeState; + +void open_spiel::init_pyspiel_games_tic_tac_toe(py::module& m) { + py::module_ tic_tac_toe = m.def_submodule("tic_tac_toe"); + + tic_tac_toe.def("player_to_cellstate", &tic_tac_toe::PlayerToState); + tic_tac_toe.def("cellstate_to_string", &tic_tac_toe::StateToString); + + tic_tac_toe.attr("NUM_ROWS") = &tic_tac_toe::kNumRows; + tic_tac_toe.attr("NUM_COLS") = &tic_tac_toe::kNumCols; + tic_tac_toe.attr("NUM_CELLS") = &tic_tac_toe::kNumCells; + + py::enum_(tic_tac_toe, "CellState") + .value("EMPTY", CellState::kEmpty) + .value("NOUGHT", CellState::kNought) + .value("CROSS", CellState::kCross) + .export_values(); + + py::classh(tic_tac_toe, "TicTacToeState") + .def("board", &TicTacToeState::Board, + "Returns the board as a list of CellStates.") + .def("board_at", + py::overload_cast(&TicTacToeState::BoardAt, py::const_), + py::arg("row"), py::arg("col"), + "Returns the CellState at row, col coordinates.") + // Pickle support + .def(py::pickle( + [](const TicTacToeState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(data); + return dynamic_cast( + game_and_state.second.release()); + })); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tic_tac_toe.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tic_tac_toe.h new file mode 100644 index 0000000..c1c6511 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tic_tac_toe.h @@ -0,0 +1,26 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_TIC_TAC_TOE_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_TIC_TAC_TOE_H_ +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialize the Python interface for games/tic_tac_toe. +namespace open_spiel { + +void init_pyspiel_games_tic_tac_toe(::pybind11::module &m); + +} // namespace open_spiel + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_TIC_TAC_TOE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tiny_bridge.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tiny_bridge.cc new file mode 100644 index 0000000..8d22671 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tiny_bridge.cc @@ -0,0 +1,53 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/games_tiny_bridge.h" + +#include "open_spiel/games/tiny_bridge/tiny_bridge.h" +#include "open_spiel/spiel.h" +#include "open_spiel/python/pybind11/pybind11.h" + +namespace py = ::pybind11; +using open_spiel::Game; +using open_spiel::State; +using open_spiel::tiny_bridge::TinyBridgeAuctionState; +using open_spiel::tiny_bridge::TinyBridgePlayState; + +void open_spiel::init_pyspiel_games_tiny_bridge(py::module& m) { + py::classh(m, "TinyBridgePlayState") + // Pickle support + .def(py::pickle( + [](const TinyBridgePlayState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(data); + return dynamic_cast( + game_and_state.second.release()); + })); + + py::classh(m, "TinyBridgeAuctionState") + // Pickle support + .def(py::pickle( + [](const TinyBridgeAuctionState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(data); + return dynamic_cast( + game_and_state.second.release()); + })); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tiny_bridge.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tiny_bridge.h new file mode 100644 index 0000000..bb9aabf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_tiny_bridge.h @@ -0,0 +1,26 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_TINY_BRIDGE_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_TINY_BRIDGE_H_ +#include "open_spiel/python/pybind11/pybind11.h" + +namespace open_spiel { + +void init_pyspiel_games_tiny_bridge(::pybind11::module& m); + +} // namespace open_spiel + + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_TINY_BRIDGE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_trade_comm.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_trade_comm.cc new file mode 100644 index 0000000..0a09478 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_trade_comm.cc @@ -0,0 +1,39 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/games_trade_comm.h" + +#include "open_spiel/games/trade_comm/trade_comm.h" +#include "open_spiel/spiel.h" +#include "open_spiel/python/pybind11/pybind11.h" + +namespace py = ::pybind11; +using open_spiel::Game; +using open_spiel::State; +using open_spiel::trade_comm::TradeCommState; + +void open_spiel::init_pyspiel_games_trade_comm(py::module& m) { + py::classh(m, "TradeCommState") + // Pickle support + .def(py::pickle( + [](const TradeCommState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(data); + return dynamic_cast( + game_and_state.second.release()); + })); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_trade_comm.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_trade_comm.h new file mode 100644 index 0000000..fd872cb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_trade_comm.h @@ -0,0 +1,28 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_TRADE_COMM_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_TRADE_COMM_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +namespace open_spiel { + +void init_pyspiel_games_trade_comm(::pybind11::module& m); + +} // namespace open_spiel + + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_TRADE_COMM_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_universal_poker.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_universal_poker.cc new file mode 100644 index 0000000..f2b5c62 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_universal_poker.cc @@ -0,0 +1,26 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/games_universal_poker.h" + +#include "open_spiel/games/universal_poker/universal_poker.h" +#include "open_spiel/python/pybind11/pybind11.h" + +namespace py = ::pybind11; + +void open_spiel::init_pyspiel_games_universal_poker(py::module& m) { + py::module sub = m.def_submodule("universal_poker"); + sub.def("load_universal_poker_from_acpc_gamedef", + &universal_poker::LoadUniversalPokerGameFromACPCGamedef); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_universal_poker.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_universal_poker.h new file mode 100644 index 0000000..a7c9680 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/games_universal_poker.h @@ -0,0 +1,26 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_GAMES_UNIVERSAL_POKER_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_GAMES_UNIVERSAL_POKER_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialize the Python interface for games/negotiation. +namespace open_spiel { +void init_pyspiel_games_universal_poker(::pybind11::module &m); +void init_pyspiel_games_kuhn_poker(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_GAMES_UNIVERSAL_POKER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/observer.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/observer.cc new file mode 100644 index 0000000..8a42311 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/observer.cc @@ -0,0 +1,87 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/observer.h" + +// Python bindings for observers. + +#include "open_spiel/game_transforms/normal_form_extensive_game.h" +#include "open_spiel/game_transforms/turn_based_simultaneous_game.h" +#include "open_spiel/observer.h" +#include "open_spiel/python/pybind11/pybind11.h" + +namespace open_spiel { +namespace py = ::pybind11; + +void init_pyspiel_observer(py::module& m) { + // C++ Observer, intended only for the Python Observation class, not + // for general Python code. + py::class_>(m, "Observer") + .def("__str__", [](const Observer& self) { return "Observer()"; }); + + py::class_(m, "SpanTensorInfo") + .def_property_readonly( + "name", [](const SpanTensorInfo& info) { return info.name(); }) + .def_property_readonly( + "shape", + [](const SpanTensorInfo& info) { return info.vector_shape(); }) + .def("__str__", &SpanTensorInfo::DebugString); + + py::class_(m, "SpanTensor") + .def_property_readonly( + "name", [](const SpanTensor& tensor) { return tensor.info().name(); }) + .def_property_readonly( + "shape", + [](const SpanTensor& tensor) { return tensor.info().vector_shape(); }) + .def_property_readonly("data", + [](const SpanTensor& tensor) { + // absl::Span requires pybind11_abseil which + // open spiel forbids. Thus copy the data + // and expose a vector through pybind. + std::vector data(tensor.data().begin(), + tensor.data().end()); + return data; + }) + .def("__str__", &SpanTensor::DebugString); + + // C++ Observation, intended only for the Python Observation class, not + // for general Python code. + py::class_(m, "_Observation", py::buffer_protocol()) + .def(py::init([](std::shared_ptr game, + std::shared_ptr observer) { + return new Observation(*game, observer); + }), + py::arg("game"), py::arg("observer")) + .def("tensors", &Observation::tensors) + .def("tensors_info", &Observation::tensors_info) + .def("string_from", &Observation::StringFrom) + .def("set_from", &Observation::SetFrom) + .def("has_string", &Observation::HasString) + .def("has_tensor", &Observation::HasTensor) + .def("compress", + [](const Observation& self) { return py::bytes(self.Compress()); }) + .def("decompress", &Observation::Decompress) + .def_buffer([](Observation& buffer_observer) -> py::buffer_info { + return py::buffer_info( + buffer_observer.Tensor().data(), // Pointer to buffer + sizeof(float), // Size of one scalar + py::format_descriptor::format(), // Format descriptor + 1, // Num dimensions + {buffer_observer.Tensor().size()}, // Dimensions + {sizeof(float)} // Stride + ); + }); +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/observer.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/observer.h new file mode 100644 index 0000000..ce20d9b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/observer.h @@ -0,0 +1,25 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_OBSERVER_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_OBSERVER_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Initialze the Python interface for observers. +namespace open_spiel { +void init_pyspiel_observer(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_OBSERVER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/policy.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/policy.cc new file mode 100644 index 0000000..813a123 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/policy.cc @@ -0,0 +1,455 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/policy.h" + +// Python bindings for policies and algorithms handling them. + +#include "open_spiel/algorithms/best_response.h" +#include "open_spiel/algorithms/cfr.h" +#include "open_spiel/algorithms/cfr_br.h" +#include "open_spiel/algorithms/deterministic_policy.h" +#include "open_spiel/algorithms/expected_returns.h" +#include "open_spiel/algorithms/external_sampling_mccfr.h" +#include "open_spiel/algorithms/is_mcts.h" +#include "open_spiel/algorithms/mcts.h" +#include "open_spiel/algorithms/outcome_sampling_mccfr.h" +#include "open_spiel/algorithms/tabular_best_response_mdp.h" +#include "open_spiel/algorithms/tabular_exploitability.h" +#include "open_spiel/policy.h" +#include "open_spiel/python/pybind11/pybind11.h" +#include "open_spiel/python/pybind11/python_policy.h" +#include "open_spiel/spiel.h" +#include "pybind11/include/pybind11/detail/common.h" + +namespace open_spiel { +namespace { + +using ::open_spiel::ActionsAndProbs; +using ::open_spiel::Policy; +using ::open_spiel::TabularPolicy; +using ::open_spiel::algorithms::Exploitability; +using ::open_spiel::algorithms::NashConv; +using ::open_spiel::algorithms::TabularBestResponse; +using ::open_spiel::algorithms::TabularBestResponseMDP; +using ::open_spiel::algorithms::TabularBestResponseMDPInfo; + +namespace py = ::pybind11; +} // namespace + +void init_pyspiel_policy(py::module& m) { + py::classh policy(m, "Policy"); + policy.def(py::init<>()) + .def("action_probabilities", + py::overload_cast(&Policy::GetStatePolicyAsMap, + py::const_), + py::arg("state"), + "Returns a dictionary mapping actions to probabilities for the " + "policy at the given " + "state.") + .def("action_probabilities", + py::overload_cast(&Policy::GetStatePolicyAsMap, + py::const_), + py::arg("info_state"), + "Returns a dictionary mapping actions to probabilities for the " + "policy at the given " + "information state.") + .def("get_state_policy", + py::overload_cast(&Policy::GetStatePolicy, py::const_), + py::arg("state"), + "Returns a list of (action, prob) pairs for the policy at the given " + "state.") + .def("get_state_policy", + py::overload_cast(&Policy::GetStatePolicy, + py::const_), + py::arg("state"), py::arg("player"), + "Returns a list of (action, prob) pairs for the policy for the " + "specified player at the " + "given state.") + .def("get_state_policy", + py::overload_cast(&Policy::GetStatePolicy, + py::const_), + py::arg("info_state"), + "Returns a list of (action, prob) pairs for the policy at the given " + "info state.") + .def("get_state_policy_as_parallel_vectors", + py::overload_cast( + &Policy::GetStatePolicyAsParallelVectors, py::const_), + py::arg("state"), + "Returns a pair of parallel vectors (actions, probs) for the policy " + "at the given state.") + .def("get_state_policy_as_parallel_vectors", + py::overload_cast( + &Policy::GetStatePolicyAsParallelVectors, py::const_), + py::arg("info_state"), + "Returns a pair of parallel vectors (actions, probs) for the policy " + "at the given " + "information state.") + .def("serialize", &Policy::Serialize, py::arg("double_precision") = -1, + py::arg("delimiter") = "<~>", "Serializes the policy to a string."); + + auto ptt = m.def_submodule( + "_policy_trampoline_testing", + "Internal test functions for calling policy member functions."); + ptt.def("call_action_probabilities", + [](const Policy& policy, const State& state) { + return policy.GetStatePolicyAsMap(state); + }); + ptt.def("call_action_probabilities", + [](const Policy& policy, const std::string& info_state) { + return policy.GetStatePolicyAsMap(info_state); + }); + ptt.def("call_get_state_policy", + [](const Policy& policy, const State& state) { + return policy.GetStatePolicy(state); + }); + ptt.def("call_get_state_policy", + [](const Policy& policy, const State& state, Player player) { + return policy.GetStatePolicy(state, player); + }); + ptt.def("call_get_state_policy", + [](const Policy& policy, const std::string& info_state) { + return policy.GetStatePolicy(info_state); + }); + ptt.def("call_get_state_policy_as_parallel_vectors", + [](const Policy& policy, const State& state) { + return policy.GetStatePolicyAsParallelVectors(state); + }); + ptt.def("call_get_state_policy_as_parallel_vectors", + [](const Policy& policy, const std::string& info_state) { + return policy.GetStatePolicyAsParallelVectors(info_state); + }); + ptt.def("call_serialize", [](const Policy& policy, int precision, + const std::string& delimiter = "<~>") { + return policy.Serialize(precision, delimiter); + }); + + py::class_(m, "TabularBestResponse") + .def(py::init&>()) + .def(py::init()) + .def(py::init< + const open_spiel::Game&, int, + const std::unordered_map&, + const float, const float>()) + .def(py::init()) + .def("value", + py::overload_cast(&TabularBestResponse::Value)) + .def("value_from_state", py::overload_cast( + &TabularBestResponse::Value)) + .def("get_best_response_policy", + &TabularBestResponse::GetBestResponsePolicy) + .def("get_best_response_actions", + &TabularBestResponse::GetBestResponseActions) + .def("set_policy", py::overload_cast&>( + &TabularBestResponse::SetPolicy)) + .def("set_policy", + py::overload_cast(&TabularBestResponse::SetPolicy)); + + // A tabular policy represented internally as a map. Note that this + // implementation is not directly compatible with the Python TabularPolicy + // implementation; the latter is implemented as a table of size + // [num_states, num_actions], while this is implemented as a map. It is + // non-trivial to convert between the two, but we have a function that does so + // in the open_spiel/python/policy.py file. + py::classh(m, "TabularPolicy") + .def(py::init&>()) + .def("__str__", &TabularPolicy::ToString) + .def("__repr__", &TabularPolicy::ToString) + .def("__len__", &TabularPolicy::size) + .def("get_state_policy", &TabularPolicy::GetStatePolicy) + .def("policy_table", + py::overload_cast<>(&TabularPolicy::PolicyTable)) + .def("size", &TabularPolicy::size) + .def("to_string", &TabularPolicy::ToString); + + py::classh( + m, "PartialTabularPolicy") + .def(py::init<>()) + .def(py::init&>()) + .def(py::init&, + std::shared_ptr>()) + .def("get_state_policy", + (ActionsAndProbs(open_spiel::Policy::*)(const State&) const) & + open_spiel::PartialTabularPolicy::GetStatePolicy) + .def( + "get_state_policy", + (ActionsAndProbs(open_spiel::Policy::*)(const State&, Player) const) & + open_spiel::PartialTabularPolicy::GetStatePolicy) + .def("get_state_policy", + (ActionsAndProbs(open_spiel::Policy::*)(const std::string&) const) & + open_spiel::PartialTabularPolicy::GetStatePolicy) + .def("set_prob", &open_spiel::PartialTabularPolicy::SetProb) + .def("set_state_policy", + &open_spiel::PartialTabularPolicy::SetStatePolicy) + .def("policy_table", + py::overload_cast<>(&open_spiel::PartialTabularPolicy::PolicyTable)); + + m.def("GetRandomPolicy", &open_spiel::GetRandomPolicy, + py::arg("game"), py::arg("seed"), py::arg("player") = -1); + m.def("GetFlatDirichletPolicy", &open_spiel::GetFlatDirichletPolicy, + py::arg("game"), py::arg("seed"), py::arg("player") = -1); + m.def("GetRandomDeterministicPolicy", + &open_spiel::GetRandomDeterministicPolicy, + py::arg("game"), py::arg("seed"), py::arg("player") = -1); + m.def("GetRandomDeterministicVisitPolicy", + &open_spiel::GetRandomDeterministicVisitPolicy, + py::arg("game"), py::arg("seed"), py::arg("player") = -1); + m.def("UniformRandomPolicy", &open_spiel::GetUniformPolicy); + + py::classh(m, "UniformPolicy") + .def(py::init<>()) + .def("get_state_policy", &open_spiel::UniformPolicy::GetStatePolicy); + + py::classh( + m, "PreferredActionPolicy") + .def(py::init&>()) + .def("get_state_policy", + &open_spiel::PreferredActionPolicy::GetStatePolicy); + + py::class_(m, "CFRSolver") + .def(py::init([](std::shared_ptr game) { + return new algorithms::CFRSolver(*game); + })) + .def("evaluate_and_update_policy", + &open_spiel::algorithms::CFRSolver::EvaluateAndUpdatePolicy) + .def("current_policy", &open_spiel::algorithms::CFRSolver::CurrentPolicy) + .def("average_policy", &open_spiel::algorithms::CFRSolver::AveragePolicy) + .def("tabular_average_policy", + &open_spiel::algorithms::CFRSolver::TabularAveragePolicy) + .def("tabular_current_policy", + &open_spiel::algorithms::CFRSolver::TabularCurrentPolicy) + .def(py::pickle( + [](const open_spiel::algorithms::CFRSolver& solver) { // __getstate__ + return solver.Serialize(); + }, + [](const std::string& serialized) { // __setstate__ + return open_spiel::algorithms::DeserializeCFRSolver(serialized); + })); + + py::class_(m, "CFRPlusSolver") + .def(py::init([](std::shared_ptr game) { + return new algorithms::CFRPlusSolver(*game); + })) + .def("evaluate_and_update_policy", + &open_spiel::algorithms::CFRPlusSolver::EvaluateAndUpdatePolicy) + .def("current_policy", &open_spiel::algorithms::CFRSolver::CurrentPolicy) + .def("average_policy", + &open_spiel::algorithms::CFRPlusSolver::AveragePolicy) + .def("tabular_average_policy", + &open_spiel::algorithms::CFRPlusSolver::TabularAveragePolicy) + .def(py::pickle( + [](const open_spiel::algorithms::CFRPlusSolver& + solver) { // __getstate__ + return solver.Serialize(); + }, + [](const std::string& serialized) { // __setstate__ + return open_spiel::algorithms::DeserializeCFRPlusSolver(serialized); + })); + + py::class_(m, "CFRBRSolver") + .def(py::init([](std::shared_ptr game) { + return new algorithms::CFRBRSolver(*game); + })) + .def("evaluate_and_update_policy", + &open_spiel::algorithms::CFRPlusSolver::EvaluateAndUpdatePolicy) + .def("current_policy", &open_spiel::algorithms::CFRSolver::CurrentPolicy) + .def("average_policy", + &open_spiel::algorithms::CFRPlusSolver::AveragePolicy) + .def(py::pickle( + [](const open_spiel::algorithms::CFRBRSolver& + solver) { // __getstate__ + return solver.Serialize(); + }, + [](const std::string& serialized) { // __setstate__ + return open_spiel::algorithms::DeserializeCFRBRSolver(serialized); + })); + + py::enum_(m, "MCCFRAverageType") + .value("SIMPLE", open_spiel::algorithms::AverageType::kSimple) + .value("FULL", open_spiel::algorithms::AverageType::kFull); + + py::class_( + m, "ExternalSamplingMCCFRSolver") + .def(py::init([](std::shared_ptr game, int seed, + algorithms::AverageType average_type) { + return new algorithms::ExternalSamplingMCCFRSolver(*game, seed, + average_type); + }), + py::arg("game"), py::arg("seed") = 0, + py::arg("avg_type") = open_spiel::algorithms::AverageType::kSimple) + .def("run_iteration", + py::overload_cast<>(&open_spiel::algorithms:: + ExternalSamplingMCCFRSolver::RunIteration)) + .def("average_policy", + &open_spiel::algorithms::ExternalSamplingMCCFRSolver::AveragePolicy) + .def(py::pickle( + [](const open_spiel::algorithms::ExternalSamplingMCCFRSolver& + solver) { // __getstate__ + return solver.Serialize(); + }, + [](const std::string& serialized) { // __setstate__ + return open_spiel::algorithms:: + DeserializeExternalSamplingMCCFRSolver(serialized); + })); + + py::class_( + m, "OutcomeSamplingMCCFRSolver") + .def(py::init( + [](std::shared_ptr game, double epsilon, int seed) { + return new algorithms::OutcomeSamplingMCCFRSolver( + *game, epsilon, seed); + }), + py::arg("game"), + py::arg("epsilon") = open_spiel::algorithms:: + OutcomeSamplingMCCFRSolver::kDefaultEpsilon, + py::arg("seed") = -1) + .def("run_iteration", + py::overload_cast<>(&open_spiel::algorithms:: + OutcomeSamplingMCCFRSolver::RunIteration)) + .def("average_policy", + &open_spiel::algorithms::OutcomeSamplingMCCFRSolver::AveragePolicy) + .def(py::pickle( + [](const open_spiel::algorithms::OutcomeSamplingMCCFRSolver& + solver) { // __getstate__ + return solver.Serialize(); + }, + [](const std::string& serialized) { // __setstate__ + return open_spiel::algorithms:: + DeserializeOutcomeSamplingMCCFRSolver(serialized); + })); + + py::class_(m, "TabularBestResponseMDPInfo") + .def_readonly("br_values", &TabularBestResponseMDPInfo::br_values) + .def_readonly("br_policies", &TabularBestResponseMDPInfo::br_policies) + .def_readonly("on_policy_values", + &TabularBestResponseMDPInfo::on_policy_values) + .def_readonly("deviation_incentives", + &TabularBestResponseMDPInfo::deviation_incentives) + .def_readonly("nash_conv", &TabularBestResponseMDPInfo::nash_conv) + .def_readonly("exploitability", + &TabularBestResponseMDPInfo::exploitability); + + py::class_(m, "TabularBestResponseMDP") + .def(py::init()) + .def("compute_best_responses", // Takes no arguments. + &TabularBestResponseMDP::ComputeBestResponses) + .def("compute_best_response", // Takes one argument: Player max_player. + &TabularBestResponseMDP::ComputeBestResponse, py::arg("max_player")) + .def("nash_conv", &TabularBestResponseMDP::NashConv) + .def("exploitability", &TabularBestResponseMDP::Exploitability); + + m.def( + "expected_returns", + py::overload_cast&, int, + bool, float>(&open_spiel::algorithms::ExpectedReturns), + "Computes the undiscounted expected returns from a depth-limited " + "search.", + py::arg("state"), py::arg("policies"), py::arg("depth_limit"), + py::arg("use_infostate_get_policy"), py::arg("prob_cut_threshold") = 0.0); + + m.def("expected_returns", + py::overload_cast( + &open_spiel::algorithms::ExpectedReturns), + "Computes the undiscounted expected returns from a depth-limited " + "search.", + py::arg("state"), py::arg("joint_policy"), py::arg("depth_limit"), + py::arg("use_infostate_get_policy"), + py::arg("prob_cut_threshold") = 0.0); + + m.def("expected_returns_of_deterministic_policies_from_seeds", + py::overload_cast&>( + &open_spiel::algorithms:: + ExpectedReturnsOfDeterministicPoliciesFromSeeds), + py::call_guard(), + "Computes the undiscounted expected returns from seeds.", + py::arg("state"), py::arg("policy_seeds")); + + m.def("expected_returns_of_deterministic_policies_from_seeds", + py::overload_cast&, + const std::vector&>( + &open_spiel::algorithms:: + ExpectedReturnsOfDeterministicPoliciesFromSeeds), + py::call_guard(), + "Computes the expected returns from seeds and policies.", + py::arg("state"), py::arg("policy_seeds"), py::arg("policies")); + + m.def( + "exploitability", + [](std::shared_ptr game, const Policy& policy) { + return Exploitability(*game, policy); + }, + "Returns the sum of the utility that a best responder wins when when " + "playing against 1) the player 0 policy contained in `policy` and 2) " + "the player 1 policy contained in `policy`." + "This only works for two player, zero- or constant-sum sequential " + "games, and raises a SpielFatalError if an incompatible game is passed " + "to it."); + + m.def( + "exploitability", + [](std::shared_ptr game, + const std::unordered_map& policy) { + return Exploitability(*game, policy); + }, + "Returns the sum of the utility that a best responder wins when when " + "playing against 1) the player 0 policy contained in `policy` and 2) " + "the player 1 policy contained in `policy`." + "This only works for two player, zero- or constant-sum sequential " + "games, and raises a SpielFatalError if an incompatible game is passed " + "to it."); + + m.def( + "nash_conv", + [](std::shared_ptr game, const Policy& policy, + bool use_state_get_policy) { + return NashConv(*game, policy, use_state_get_policy); + }, + "Calculates a measure of how far the given policy is from a Nash " + "equilibrium by returning the sum of the improvements in the value " + "that each player could obtain by unilaterally changing their strategy " + "while the opposing player maintains their current strategy (which " + "for a Nash equilibrium, this value is 0). The third parameter is to " + "indicate whether to use the Policy::GetStatePolicy(const State&) " + "instead of Policy::GetStatePolicy(const std::string& info_state) for " + "computation of the on-policy expected values.", + py::arg("game"), py::arg("policy"), + py::arg("use_state_get_policy") = false); + + m.def( + "nash_conv", + [](std::shared_ptr game, + const std::unordered_map& policy) { + return NashConv(*game, policy); + }, + "Calculates a measure of how far the given policy is from a Nash " + "equilibrium by returning the sum of the improvements in the value " + "that each player could obtain by unilaterally changing their strategy " + "while the opposing player maintains their current strategy (which " + "for a Nash equilibrium, this value is 0)."); + + m.def("num_deterministic_policies", + &open_spiel::algorithms::NumDeterministicPolicies, + "Returns number of determinstic policies in this game for a player, " + "or -1 if there are more than 2^64 - 1 policies."); + + m.def("to_joint_tabular_policy", &open_spiel::ToJointTabularPolicy, + "Returns a merged tabular policy from a list of TabularPolicy. The " + "second argument is a bool which, if true, checks that there is no " + "overlap among all the policies."); +} +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/policy.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/policy.h new file mode 100644 index 0000000..4c08ae3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/policy.h @@ -0,0 +1,25 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_POLICY_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_POLICY_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +// Python bindings for policies and algorithms handling them. +namespace open_spiel { +void init_pyspiel_policy(::pybind11::module &m); +} + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_POLICY_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/pybind11.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/pybind11.h new file mode 100644 index 0000000..71c5a67 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/pybind11.h @@ -0,0 +1,266 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_PYBIND11_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_PYBIND11_H_ + +// Common definitions and includes for pybind code. + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" +#include "open_spiel/spiel_utils.h" +#include "pybind11/include/pybind11/cast.h" +#include "pybind11/include/pybind11/detail/common.h" +#include "pybind11/include/pybind11/detail/descr.h" +#include "pybind11/include/pybind11/functional.h" // IWYU pragma: keep +#include "pybind11/include/pybind11/numpy.h" // IWYU pragma: keep +#include "pybind11/include/pybind11/pybind11.h" +#include "pybind11/include/pybind11/stl.h" // IWYU pragma: keep + +namespace open_spiel { + +class Policy; +class TabularPolicy; +class PartialTabularPolicy; +class UniformPolicy; +class PreferredActionPolicy; + +class NormalFormGame; +class Bot; + +namespace matrix_game { +class MatrixGame; +} + +namespace tensor_game { +class TensorGame; +} + +namespace algorithms { +class MCTSBot; +class ISMCTSBot; +} // namespace algorithms +} // namespace open_spiel + +namespace open_spiel { +// Trampoline helper class to allow implementing Bots in Python. See +// https://pybind11.readthedocs.io/en/stable/advanced/classes.html#overriding-virtual-functions-in-python +template +class PyBot : public BotBase, public ::pybind11::trampoline_self_life_support { + public: + // We need the bot constructor + using BotBase::BotBase; + ~PyBot() override = default; + + // Choose and execute an action in a game. The bot should return its + // distribution over actions and also its selected action. + open_spiel::Action Step(const State& state) override { + PYBIND11_OVERLOAD_PURE_NAME( + open_spiel::Action, // Return type (must be simple token) + BotBase, // Parent class + "step", // Name of function in Python + Step, // Name of function in C++ + state // Arguments + ); + } + + // Restart at the specified state. + void Restart() override { + PYBIND11_OVERLOAD_NAME( + void, // Return type (must be a simple token for macro parser) + BotBase, // Parent class + "restart", // Name of function in Python + Restart, // Name of function in C++ + // The trailing coma after Restart is necessary to say "No argument" + ); + } + bool ProvidesForceAction() override { + PYBIND11_OVERLOAD_NAME( + bool, // Return type (must be a simple token for macro parser) + BotBase, // Parent class + "provides_force_action", // Name of function in Python + ProvidesForceAction, // Name of function in C++ + // Arguments + ); + } + void ForceAction(const State& state, Action action) override { + PYBIND11_OVERLOAD_NAME( + void, // Return type (must be a simple token for macro parser) + BotBase, // Parent class + "force_action", // Name of function in Python + ForceAction, // Name of function in C++ + state, // Arguments + action); + } + void InformAction(const State& state, Player player_id, + Action action) override { + PYBIND11_OVERLOAD_NAME( + void, // Return type (must be a simple token for macro parser) + BotBase, // Parent class + "inform_action", // Name of function in Python + InformAction, // Name of function in C++ + state, // Arguments + player_id, action); + } + void InformActions(const State& state, + const std::vector& actions) override { + PYBIND11_OVERLOAD_NAME( + void, // Return type (must be a simple token for macro parser) + BotBase, // Parent class + "inform_actions", // Name of function in Python + InformActions, // Name of function in C++ + state, // Arguments + actions); + } + + void RestartAt(const State& state) override { + PYBIND11_OVERLOAD_NAME( + void, // Return type (must be a simple token for macro parser) + BotBase, // Parent class + "restart_at", // Name of function in Python + RestartAt, // Name of function in C++ + state // Arguments + ); + } + bool ProvidesPolicy() override { + PYBIND11_OVERLOAD_NAME( + bool, // Return type (must be a simple token for macro parser) + BotBase, // Parent class + "provides_policy", // Name of function in Python + ProvidesPolicy, // Name of function in C++ + // Arguments + ); + } + ActionsAndProbs GetPolicy(const State& state) override { + PYBIND11_OVERLOAD_NAME(ActionsAndProbs, // Return type (must be a simple + // token for macro parser) + BotBase, // Parent class + "get_policy", // Name of function in Python + GetPolicy, // Name of function in C++ + state); + } + std::pair StepWithPolicy( + const State& state) override { + using step_retval_t = std::pair; + PYBIND11_OVERLOAD_NAME( + step_retval_t, // Return type (must be a simple token for macro parser) + BotBase, // Parent class + "step_with_policy", // Name of function in Python + StepWithPolicy, // Name of function in C++ + state // Arguments + ); + } + + bool IsClonable() const override { + PYBIND11_OVERLOAD_NAME( + bool, // Return type (must be a simple token for macro parser) + BotBase, // Parent class + "is_clonable", // Name of function in Python + IsClonable, // Name of function in C++ + ); + } + + std::unique_ptr Clone() override { + using BotUniquePtr = std::unique_ptr; + PYBIND11_OVERLOAD_NAME( + BotUniquePtr, // Return type (must be a simple token for macro parser) + BotBase, // Parent class + "clone", // Name of function in Python + Clone, // Name of function in C++ + ); + } +}; +} // namespace open_spiel + +// Custom caster for GameParameter (essentially a variant). +namespace pybind11 { +namespace detail { + +template <> +struct type_caster { + public: + PYBIND11_TYPE_CASTER(open_spiel::GameParameter, _("GameParameter")); + + bool load(handle src, bool convert) { + if (src.is_none()) { + // value is default-constructed to an unset value + return true; + } else if (PyBool_Check(src.ptr())) { + value = open_spiel::GameParameter(src.cast()); + return true; + } else if (auto str_val = maybe_load(src, convert)) { + value = open_spiel::GameParameter(*str_val); + return true; + } else if (PyFloat_Check(src.ptr())) { + value = open_spiel::GameParameter(src.cast()); + return true; + } else if (PyLong_Check(src.ptr())) { + value = open_spiel::GameParameter(src.cast()); + return true; + } else { + auto dict = src.cast(); + std::map d; + for (const auto& [k, v] : dict) { + d[k.cast()] = v.cast(); + } + value = open_spiel::GameParameter(d); + return true; + } + } + + static handle cast(const open_spiel::GameParameter& gp, + return_value_policy policy, handle parent) { + if (gp.has_bool_value()) { + return pybind11::bool_(gp.bool_value()).release(); + } else if (gp.has_double_value()) { + return pybind11::float_(gp.double_value()).release(); + } else if (gp.has_string_value()) { + return pybind11::str(gp.string_value()).release(); + } else if (gp.has_int_value()) { + return pybind11::int_(gp.int_value()).release(); + } else if (gp.has_game_value()) { + pybind11::dict d; + for (const auto& [k, v] : gp.game_value()) { + d[pybind11::str(k)] = pybind11::cast(v); + } + return d.release(); + } else { + return pybind11::none(); + } + } + + private: + template + absl::optional maybe_load(handle src, bool convert) { + auto caster = pybind11::detail::make_caster(); + if (caster.load(src, convert)) { + return cast_op(caster); + } else { + return absl::nullopt; + } + } +}; + +} // namespace detail +} // namespace pybind11 + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_PYBIND11_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/pyspiel.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/pyspiel.cc new file mode 100644 index 0000000..692f007 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/pyspiel.cc @@ -0,0 +1,717 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include // NOLINT +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/flags/flag.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/algorithms/matrix_game_utils.h" +#include "open_spiel/algorithms/nfg_writer.h" +#include "open_spiel/algorithms/tensor_game_utils.h" +#include "open_spiel/canonical_game_strings.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/games/efg_game/efg_game.h" +#include "open_spiel/games/efg_game/efg_game_data.h" +#include "open_spiel/games/nfg_game/nfg_game.h" +#include "open_spiel/game_transforms/turn_based_simultaneous_game.h" +#include "open_spiel/matrix_game.h" +#include "open_spiel/normal_form_game.h" +#include "open_spiel/observer.h" +#include "open_spiel/python/pybind11/algorithms_corr_dist.h" +#include "open_spiel/python/pybind11/algorithms_trajectories.h" +#include "open_spiel/python/pybind11/bots.h" +#include "open_spiel/python/pybind11/evaluation_sco.h" +#include "open_spiel/python/pybind11/game_transforms.h" +#include "open_spiel/python/pybind11/games_backgammon.h" +#include "open_spiel/python/pybind11/games_bargaining.h" +#include "open_spiel/python/pybind11/games_blackjack.h" +#include "open_spiel/python/pybind11/games_bridge.h" +#include "open_spiel/python/pybind11/games_chess.h" +#include "open_spiel/python/pybind11/games_colored_trails.h" +#include "open_spiel/python/pybind11/games_dots_and_boxes.h" +#include "open_spiel/python/pybind11/games_euchre.h" +#include "open_spiel/python/pybind11/games_gin_rummy.h" +#include "open_spiel/python/pybind11/games_kuhn_poker.h" +#include "open_spiel/python/pybind11/games_leduc_poker.h" +#include "open_spiel/python/pybind11/games_negotiation.h" +#include "open_spiel/python/pybind11/games_spades.h" +#include "open_spiel/python/pybind11/games_tarok.h" +#include "open_spiel/python/pybind11/games_tic_tac_toe.h" +#include "open_spiel/python/pybind11/games_tiny_bridge.h" +#include "open_spiel/python/pybind11/games_trade_comm.h" +#include "open_spiel/python/pybind11/observer.h" +#include "open_spiel/python/pybind11/policy.h" +#include "open_spiel/python/pybind11/pybind11.h" +#include "open_spiel/python/pybind11/python_games.h" +#include "open_spiel/python/pybind11/utils.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tensor_game.h" +#include "open_spiel/tests/basic_tests.h" + +// Includes needed for absl::optional. +#include "pybind11/include/pybind11/detail/common.h" +#include "pybind11_abseil/absl_casters.h" + +// List of optional python submodules. +#if OPEN_SPIEL_BUILD_WITH_GAMUT +#include "open_spiel/games/gamut/gamut_pybind11.h" +#endif +#if OPEN_SPIEL_BUILD_WITH_XINXIN +#include "open_spiel/bots/xinxin/xinxin_pybind11.h" +#endif +#if OPEN_SPIEL_BUILD_WITH_ACPC +#include "open_spiel/python/pybind11/games_universal_poker.h" +#endif + +#define PYSPIEL_VERSION "1.6" + +// Flags governing Open Spiel behaviour +ABSL_FLAG(bool, log_exceptions_to_stderr, true, + "Log all exceptions raised in OpenSpiel C++ code to stderr."); + +// This file contains OpenSpiel's Python API. The best place to see an overview +// of the API is to refer to python/examples/example.py. Generally, all the core +// functions are exposed as snake case in Python (i.e. CurrentPlayer becomes +// current_player, ApplyAction becomes apply_action, etc.) but otherwise the +// functions and their effect remain the same. For a more detailed documentation +// of each of the core API functions, please see spiel.h. + +namespace open_spiel { +namespace { + +using ::open_spiel::matrix_game::MatrixGame; +using ::open_spiel::tensor_game::TensorGame; + +namespace py = ::pybind11; + +// This exception class is used to forward errors from Spiel to Python. +// Do not create exceptions of this type directly! Instead, call +// SpielFatalError, which will raise a Python exception when called from +// Python, and exit the process otherwise. +class SpielException : public std::exception { + public: + explicit SpielException(std::string message) : message_(message) {} + const char* what() const noexcept override { return message_.c_str(); } + + private: + std::string message_; +}; + +// Definition of our Python module. +PYBIND11_MODULE(pyspiel, m) { + m.doc() = "Open Spiel"; + m.attr("__version__") = PYSPIEL_VERSION; + + m.def("game_parameters_from_string", GameParametersFromString, + "Parses a string as a GameParameter dictionary."); + + m.def("game_parameters_to_string", GameParametersToString, + "Converts a GameParameter dictionary to string."); + + py::enum_(m, "PrivateInfoType") + .value("ALL_PLAYERS", PrivateInfoType::kAllPlayers) + .value("NONE", PrivateInfoType::kNone) + .value("SINGLE_PLAYER", PrivateInfoType::kSinglePlayer) + .export_values(); + + py::class_(m, "IIGObservationType") + .def(py::init(), + py::arg("public_info") = true, py::arg("perfect_recall"), + py::arg("private_info") = PrivateInfoType::kSinglePlayer) + .def_readonly("public_info", &IIGObservationType::public_info) + .def_readonly("perfect_recall", &IIGObservationType::perfect_recall) + .def_readonly("private_info", &IIGObservationType::private_info); + + py::class_ uniform_sampler( + m, "UniformProbabilitySampler"); + uniform_sampler.def(py::init()) + .def(py::init()) + .def("__call__", &UniformProbabilitySampler::operator()); + + py::enum_(m, "StateType") + .value("TERMINAL", open_spiel::StateType::kTerminal) + .value("CHANCE", open_spiel::StateType::kChance) + .value("DECISION", open_spiel::StateType::kDecision) + .value("MEAN_FIELD", open_spiel::StateType::kMeanField) + .export_values(); + + py::class_ game_type(m, "GameType"); + game_type + .def(py::init(), + py::arg("short_name"), py::arg("long_name"), py::arg("dynamics"), + py::arg("chance_mode"), py::arg("information"), py::arg("utility"), + py::arg("reward_model"), py::arg("max_num_players"), + py::arg("min_num_players"), + py::arg("provides_information_state_string"), + py::arg("provides_information_state_tensor"), + py::arg("provides_observation_string"), + py::arg("provides_observation_tensor"), + py::arg("parameter_specification") = GameParameters(), + py::arg("default_loadable") = true, + py::arg("provides_factored_observation_string") = false) + .def(py::init()) + .def_readonly("short_name", &GameType::short_name) + .def_readonly("long_name", &GameType::long_name) + .def_readonly("dynamics", &GameType::dynamics) + .def_readonly("chance_mode", &GameType::chance_mode) + .def_readonly("information", &GameType::information) + .def_readonly("utility", &GameType::utility) + .def_readonly("reward_model", &GameType::reward_model) + .def_readonly("max_num_players", &GameType::max_num_players) + .def_readonly("min_num_players", &GameType::min_num_players) + .def_readonly("provides_information_state_string", + &GameType::provides_information_state_string) + .def_readonly("provides_information_state_tensor", + &GameType::provides_information_state_tensor) + .def_readonly("provides_observation_string", + &GameType::provides_observation_string) + .def_readonly("provides_observation_tensor", + &GameType::provides_observation_tensor) + .def_readonly("parameter_specification", + &GameType::parameter_specification) + .def_readonly("default_loadable", &GameType::default_loadable) + .def_readonly("provides_factored_observation_string", + &GameType::provides_factored_observation_string) + .def_readonly("is_concrete", &GameType::is_concrete) + .def("pretty_print", + [](const GameType& value) { return GameTypeToString(value); }) + .def("__repr__", + [](const GameType& gt) { + return ""; + }) + .def("__eq__", + [](const GameType& value, GameType* value2) { + return value2 && + GameTypeToString(value) == GameTypeToString(*value2); + }) + .def(py::pickle( // Pickle support + [](const GameType& game_type) { // __getstate__ + return GameTypeToString(game_type); + }, + [](const std::string& data) { // __setstate__ + return GameTypeFromString(data); + })); + + py::enum_(game_type, "Dynamics") + .value("SEQUENTIAL", GameType::Dynamics::kSequential) + .value("MEAN_FIELD", + GameType::Dynamics::kMeanField) + .value("SIMULTANEOUS", GameType::Dynamics::kSimultaneous); + + py::enum_(game_type, "ChanceMode") + .value("DETERMINISTIC", GameType::ChanceMode::kDeterministic) + .value("EXPLICIT_STOCHASTIC", GameType::ChanceMode::kExplicitStochastic) + .value("SAMPLED_STOCHASTIC", GameType::ChanceMode::kSampledStochastic); + + py::enum_(game_type, "Information") + .value("ONE_SHOT", GameType::Information::kOneShot) + .value("PERFECT_INFORMATION", GameType::Information::kPerfectInformation) + .value("IMPERFECT_INFORMATION", + GameType::Information::kImperfectInformation); + + py::enum_(game_type, "Utility") + .value("ZERO_SUM", GameType::Utility::kZeroSum) + .value("CONSTANT_SUM", GameType::Utility::kConstantSum) + .value("GENERAL_SUM", GameType::Utility::kGeneralSum) + .value("IDENTICAL", GameType::Utility::kIdentical); + + py::enum_(game_type, "RewardModel") + .value("REWARDS", GameType::RewardModel::kRewards) + .value("TERMINAL", GameType::RewardModel::kTerminal); + + py::enum_(m, "PlayerId") + .value("DEFAULT_PLAYER_ID", open_spiel::kDefaultPlayerId) + .value("INVALID", open_spiel::kInvalidPlayer) + .value("TERMINAL", open_spiel::kTerminalPlayerId) + .value("CHANCE", open_spiel::kChancePlayerId) + .value("MEAN_FIELD", open_spiel::kMeanFieldPlayerId) + .value("SIMULTANEOUS", open_spiel::kSimultaneousPlayerId); + + py::class_ game_info(m, "GameInfo"); + game_info + .def(py::init, + int>(), + py::arg("num_distinct_actions"), py::arg("max_chance_outcomes"), + py::arg("num_players"), py::arg("min_utility"), + py::arg("max_utility"), py::arg("utility_sum") = absl::nullopt, + py::arg("max_game_length")) + .def(py::init()) + .def_readonly("num_distinct_actions", &GameInfo::num_distinct_actions) + .def_readonly("max_chance_outcomes", &GameInfo::max_chance_outcomes) + .def_readonly("num_players", &GameInfo::num_players) + .def_readonly("min_utility", &GameInfo::min_utility) + .def_readonly("max_utility", &GameInfo::max_utility) + .def_readonly("utility_sum", &GameInfo::utility_sum) + .def_readonly("max_game_length", &GameInfo::max_game_length); + + m.attr("INVALID_ACTION") = py::int_(open_spiel::kInvalidAction); + + py::enum_(m, "TensorLayout") + .value("HWC", open_spiel::TensorLayout::kHWC) + .value("CHW", open_spiel::TensorLayout::kCHW); + + py::class_ player_action(m, "PlayerAction"); + player_action.def_readonly("player", &State::PlayerAction::player) + .def_readonly("action", &State::PlayerAction::action); + + // https://github.com/pybind/pybind11/blob/smart_holder/README_smart_holder.rst + py::classh state(m, "State"); + state.def(py::init>()) + .def("current_player", &State::CurrentPlayer) + .def("apply_action", &State::ApplyAction) + .def("apply_action_with_legality_check", + py::overload_cast( + &State::ApplyActionWithLegalityCheck)) + .def("legal_actions", + (std::vector(State::*)(int) const) & + State::LegalActions) + .def("legal_actions", + (std::vector(State::*)(void) const) & + State::LegalActions) + .def("legal_actions_mask", + (std::vector(State::*)(int) const) & State::LegalActionsMask) + .def("legal_actions_mask", + (std::vector(State::*)(void) const) & State::LegalActionsMask) + .def("action_to_string", (std::string(State::*)(Player, Action) const) & + State::ActionToString) + .def("action_to_string", + (std::string(State::*)(Action) const) & State::ActionToString) + .def("string_to_action", + (Action(State::*)(Player, const std::string&) const) & + State::StringToAction) + .def("string_to_action", + (Action(State::*)(const std::string&) const) & State::StringToAction) + .def("__str__", &State::ToString) + .def("__repr__", &State::ToString) + .def("to_string", &State::ToString) + .def("is_terminal", &State::IsTerminal) + .def("is_initial_state", &State::IsInitialState) + .def("move_number", &State::MoveNumber) + .def("rewards", &State::Rewards) + .def("returns", &State::Returns) + .def("player_reward", &State::PlayerReward) + .def("player_return", &State::PlayerReturn) + .def("is_chance_node", &State::IsChanceNode) + .def("is_mean_field_node", &State::IsMeanFieldNode) + .def("is_simultaneous_node", &State::IsSimultaneousNode) + .def("is_player_node", &State::IsPlayerNode) + .def("history", &State::History) + .def("history_str", &State::HistoryString) + .def("full_history", &State::FullHistory) + .def("information_state_string", + (std::string(State::*)(int) const) & State::InformationStateString) + .def("information_state_string", + (std::string(State::*)() const) & State::InformationStateString) + .def("information_state_tensor", + (std::vector(State::*)(int) const) & + State::InformationStateTensor) + .def("information_state_tensor", (std::vector(State::*)() const) & + State::InformationStateTensor) + .def("observation_string", + (std::string(State::*)(int) const) & State::ObservationString) + .def("observation_string", + (std::string(State::*)() const) & State::ObservationString) + .def("observation_tensor", + (std::vector(State::*)(int) const) & State::ObservationTensor) + .def("observation_tensor", + (std::vector(State::*)() const) & State::ObservationTensor) + .def("clone", &State::Clone) + .def("child", &State::Child) + .def("undo_action", &State::UndoAction) + .def("apply_actions", &State::ApplyActions) + .def("apply_actions_with_legality_checks", + &State::ApplyActionsWithLegalityChecks) + .def("num_distinct_actions", &State::NumDistinctActions) + .def("num_players", &State::NumPlayers) + .def("chance_outcomes", &State::ChanceOutcomes) + .def("get_game", &State::GetGame) + .def("get_type", &State::GetType) + .def("serialize", &State::Serialize) + .def("resample_from_infostate", &State::ResampleFromInfostate) + .def(py::pickle( // Pickle support + [](const State& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + auto state = DeserializeGameAndState(data).second; + auto pydict = PyDict(*state); + return std::make_pair(std::move(state), pydict); + })) + .def("distribution_support", &State::DistributionSupport) + .def("update_distribution", &State::UpdateDistribution) + .def("mean_field_population", &State::MeanFieldPopulation); + + py::classh game(m, "Game"); + game.def(py::init()) + .def("num_distinct_actions", &Game::NumDistinctActions) + .def("new_initial_states", &Game::NewInitialStates) + .def("new_initial_state", + (std::unique_ptr(open_spiel::Game::*)() const) + &Game::NewInitialState) + .def("new_initial_state", + (std::unique_ptr(open_spiel::Game::*)( + const std::string&) const) + &Game::NewInitialState) + .def("new_initial_state_for_population", + &Game::NewInitialStateForPopulation) + .def("max_chance_outcomes", &Game::MaxChanceOutcomes) + .def("get_parameters", &Game::GetParameters) + .def("num_players", &Game::NumPlayers) + .def("min_utility", &Game::MinUtility) + .def("max_utility", &Game::MaxUtility) + .def("get_type", &Game::GetType) + .def("utility_sum", &Game::UtilitySum) + .def("information_state_tensor_shape", &Game::InformationStateTensorShape) + .def("information_state_tensor_layout", + &Game::InformationStateTensorLayout) + .def("information_state_tensor_size", &Game::InformationStateTensorSize) + .def("observation_tensor_shape", &Game::ObservationTensorShape) + .def("observation_tensor_layout", &Game::ObservationTensorLayout) + .def("observation_tensor_size", &Game::ObservationTensorSize) + .def("policy_tensor_shape", &Game::PolicyTensorShape) + .def("deserialize_state", &Game::DeserializeState) + .def("max_game_length", &Game::MaxGameLength) + .def("action_to_string", &Game::ActionToString) + .def("max_chance_nodes_in_history", &Game::MaxChanceNodesInHistory) + .def("max_move_number", &Game::MaxMoveNumber) + .def("max_history_length", &Game::MaxHistoryLength) + .def("make_observer", + [](std::shared_ptr game, IIGObservationType iig_obs_type, + const GameParameters& params) { + return game->MakeObserver(iig_obs_type, params); + }) + .def("make_observer", + [](std::shared_ptr game, const GameParameters& params) { + return game->MakeObserver(absl::nullopt, params); + }) + .def("__str__", &Game::ToString) + .def("__repr__", &Game::ToString) + .def("__eq__", + [](std::shared_ptr a, std::shared_ptr b) { + return b && a->ToString() == b->ToString(); + }) + .def(py::pickle( // Pickle support + [](std::shared_ptr game) { // __getstate__ + return game->Serialize(); + }, + [](const std::string& data) { // __setstate__ + // We must remove the const for this to compile. + return std::shared_ptr( + std::const_pointer_cast(DeserializeGame(data))); + })); + + py::classh normal_form_game(m, "NormalFormGame", game); + normal_form_game.def("get_utilities", &NormalFormGame::GetUtilities) + .def("get_utility", &NormalFormGame::GetUtility) + .def(py::pickle( // Pickle support + [](std::shared_ptr game) { // __getstate__ + return game->ToString(); + }, + [](const std::string& data) { // __setstate__ + // Have to remove the const here for this to compile, presumably + // because the holder type is non-const. But seems like you can't + // set the holder type to std::shared_ptr either. + return std::const_pointer_cast( + std::static_pointer_cast(LoadGame(data))); + })); + + // Put this here rather than in game_transforms.cc because it depends on + // State, which is defined above. + py::classh tbs_state(m, + "TurnBasedSimultaneousState", state); + tbs_state.def("simultaneous_game_state", + &TurnBasedSimultaneousState::SimultaneousGameState, + py::return_value_policy::reference) + .def(py::pickle( // Pickle support + [](const TurnBasedSimultaneousState& state) { // __getstate__ + return SerializeGameAndState(*state.GetGame(), state); + }, + [](const std::string& data) { // __setstate__ + auto state = DeserializeGameAndState(data).second; + auto pydict = PyDict(*state); + return std::make_pair(std::move(state), pydict); + })); + + py::classh matrix_game(m, "MatrixGame", normal_form_game); + matrix_game + .def(py::init, + std::vector, std::vector, + std::vector>()) + .def(py::init, + std::vector, + const std::vector>&, + const std::vector>&>()) + .def("num_rows", &MatrixGame::NumRows) + .def("num_cols", &MatrixGame::NumCols) + .def("row_utility", &MatrixGame::RowUtility) + .def("col_utility", &MatrixGame::ColUtility) + .def("player_utility", &MatrixGame::PlayerUtility) + .def("row_utilities", + [](const MatrixGame& game) { + const std::vector& row_utilities = game.RowUtilities(); + return py::array_t({game.NumRows(), game.NumCols()}, + &row_utilities[0]); + }) + .def("col_utilities", + [](const MatrixGame& game) { + const std::vector& col_utilities = game.ColUtilities(); + return py::array_t({game.NumRows(), game.NumCols()}, + &col_utilities[0]); + }) + .def("player_utilities", + [](const MatrixGame& game, const Player player) { + const std::vector& player_utilities = + game.PlayerUtilities(player); + return py::array_t({game.NumRows(), game.NumCols()}, + &player_utilities[0]); + }) + .def("row_action_name", &MatrixGame::RowActionName) + .def("col_action_name", &MatrixGame::ColActionName) + .def(py::pickle( // Pickle support + [](std::shared_ptr game) { // __getstate__ + return game->ToString(); + }, + [](const std::string& data) { // __setstate__ + // Have to remove the const here for this to compile, presumably + // because the holder type is non-const. But seems like you can't + // set the holder type to std::shared_ptr either. + return std::const_pointer_cast( + algorithms::LoadMatrixGame(data)); + })); + + py::classh tensor_game(m, "TensorGame", normal_form_game); + tensor_game + .def(py::init>, + std::vector>>()) + .def("shape", &TensorGame::Shape) + .def("player_utility", &TensorGame::PlayerUtility) + .def("player_utilities", + [](const TensorGame& game, const Player player) { + const std::vector& utilities = + game.PlayerUtilities(player); + return py::array_t(game.Shape(), &utilities[0]); + }) + .def("action_name", &TensorGame::ActionName) + .def("as_matrix_game", &TensorGame::AsMatrixGame) + .def(py::pickle( // Pickle support + [](std::shared_ptr game) { // __getstate__ + return game->ToString(); + }, + [](const std::string& data) { // __setstate__ + // Have to remove the const here for this to compile, presumably + // because the holder type is non-const. But seems like you can't + // set the holder type to std::shared_ptr either. + return std::const_pointer_cast( + algorithms::LoadTensorGame(data)); + })); + + m.def("hulh_game_string", &open_spiel::HulhGameString); + m.def("hunl_game_string", &open_spiel::HunlGameString); + m.def("turn_based_goofspiel_game_string", + &open_spiel::TurnBasedGoofspielGameString); + + m.def("create_matrix_game", + py::overload_cast&, + const std::vector&, + const std::vector>&, + const std::vector>&>( + &open_spiel::matrix_game::CreateMatrixGame), + "Creates an arbitrary matrix game from named rows/cols and utilities."); + + m.def("create_matrix_game", + py::overload_cast>&, + const std::vector>&>( + &open_spiel::matrix_game::CreateMatrixGame), + "Creates an arbitrary matrix game from dimensions and utilities."); + + m.def("create_tensor_game", + py::overload_cast>&, + const std::vector>&>( + &open_spiel::tensor_game::CreateTensorGame), + "Creates an arbitrary tensor game from named actions and utilities."); + + m.def("create_matrix_game", + py::overload_cast>&, + const std::vector>&>( + &open_spiel::matrix_game::CreateMatrixGame), + "Creates an arbitrary matrix game from dimensions and utilities."); + + m.def("create_tensor_game", + py::overload_cast>&, + const std::vector&>( + &open_spiel::tensor_game::CreateTensorGame), + "Creates an arbitrary matrix game from dimensions and utilities."); + + m.def( + "create_tensor_game", + [](const std::vector>& utilities) { + const int num_players = utilities.size(); + const std::vector shape( + utilities[0].shape(), utilities[0].shape() + utilities[0].ndim()); + std::vector> flat_utilities; + for (const auto& player_utilities : utilities) { + SPIEL_CHECK_EQ(player_utilities.ndim(), num_players); + SPIEL_CHECK_TRUE( + std::equal(shape.begin(), shape.end(), player_utilities.shape())); + flat_utilities.push_back(std::vector( + player_utilities.data(), + player_utilities.data() + player_utilities.size())); + } + return open_spiel::tensor_game::CreateTensorGame(flat_utilities, shape); + }, + "Creates an arbitrary matrix game from dimensions and utilities."); + + m.def("game_to_nfg_string", open_spiel::GameToNFGString, + "Get the Gambit .nfg text for a normal-form game."); + + m.def("load_game", + py::overload_cast(&open_spiel::LoadGame), + "Returns a new game object for the specified short name using default " + "parameters"); + + m.def("load_game", + py::overload_cast( + &open_spiel::LoadGame), + "Returns a new game object for the specified short name using given " + "parameters"); + + m.def("load_matrix_game", open_spiel::algorithms::LoadMatrixGame, + "Loads a game as a matrix game (will fail if not a matrix game."); + + m.def("load_tensor_game", open_spiel::algorithms::LoadTensorGame, + "Loads a game as a tensor game (will fail if not a tensor game."); + + m.def("load_efg_game", open_spiel::efg_game::LoadEFGGame, + "Load a gambit extensive form game (.efg) from string data."); + m.def("get_sample_efg_data", open_spiel::efg_game::GetSampleEFGData, + "Get Kuhn poker EFG data."); + m.def("get_kuhn_poker_efg_data", open_spiel::efg_game::GetKuhnPokerEFGData, + "Get sample EFG data."); + + m.def("load_nfg_game", open_spiel::nfg_game::LoadNFGGame, + "Load a gambit normal form game (.nfg) from string data."); + + m.def("extensive_to_matrix_game", + open_spiel::algorithms::ExtensiveToMatrixGame, + "Converts a two-player extensive-game to its equivalent matrix game, " + "which is exponentially larger. Use only with small games."); + + m.def("registered_names", GameRegisterer::RegisteredNames, + "Returns the names of all available games."); + + m.def("registered_concrete_names", GameRegisterer::RegisteredConcreteNames, + "Returns the names of all available concrete games."); + + m.def("registered_games", GameRegisterer::RegisteredGames, + "Returns the GameType objects of all available games."); + + m.def("registered_concrete_games", GameRegisterer::RegisteredConcreteGames, + "Returns the GameType objects of all available concrete games."); + + m.def("serialize_game_and_state", open_spiel::SerializeGameAndState, + "A general implementation of game and state serialization."); + + m.def( + "deserialize_game_and_state", + [](const std::string& data) { + auto rv = open_spiel::DeserializeGameAndState(data); + return std::make_pair(rv.first, std::move(rv.second)); + }, + "A general implementation of deserialization of a game and state " + "string serialized by serialize_game_and_state."); + + m.def("register_game", RegisterPyGame, + "Register a Python game implementation"); + + m.def("random_sim_test", testing::RandomSimTest, py::arg("game"), + py::arg("num_sims"), py::arg("serialize"), py::arg("verbose"), + py::arg("mask_test") = true, + py::arg("state_checker_fn") = + py::cpp_function(&testing::DefaultStateChecker), + py::arg("mean_field_population") = -1, py::arg("observer") = nullptr, + "Run the C++ tests on a game"); + + m.def("build_state_from_history_string", BuildStateFromHistoryString, + "Builds a state from a game string and history string.", + py::arg("game_string"), py::arg("history_string"), + py::arg("max_steps") = -1); + + // Set an error handler that will raise exceptions. These exceptions are for + // the Python interface only. When used from C++, OpenSpiel will never raise + // exceptions - the process will be terminated instead. + open_spiel::SetErrorHandler([](const std::string& string) { + if (absl::GetFlag(FLAGS_log_exceptions_to_stderr)) { + std::cerr << "OpenSpiel exception: " << string << std::endl << std::flush; + } + throw SpielException(string); + }); + py::register_exception(m, "SpielError", PyExc_RuntimeError); + + // Register other bits of the API. + init_pyspiel_bots(m); // Bots and bot-related algorithms. + init_pyspiel_policy(m); // Policies and policy-related algorithms. + init_pyspiel_algorithms_corr_dist(m); // Correlated eq. distance funcs + init_pyspiel_algorithms_trajectories(m); // Trajectories. + init_pyspiel_evaluation_sco(m); // Soft Condorcet Optimization. + init_pyspiel_game_transforms(m); // Game transformations. + init_pyspiel_games_backgammon(m); // Backgammon game. + init_pyspiel_games_bargaining(m); // Bargaining game. + init_pyspiel_games_blackjack(m); // Blackjack game. + init_pyspiel_games_bridge(m); // Game-specific functions for bridge. + init_pyspiel_games_chess(m); // Chess game. + init_pyspiel_games_colored_trails(m); // Colored Trails game. + init_pyspiel_games_dots_and_boxes(m); // Dots-and-Boxes game. + init_pyspiel_games_euchre(m); // Game-specific functions for euchre. + init_pyspiel_games_gin_rummy(m); // Game-specific functions for gin_rummy. + init_pyspiel_games_kuhn_poker(m); // Kuhn Poker game. + init_pyspiel_games_leduc_poker(m); // Leduc poker game. + init_pyspiel_games_negotiation(m); // Negotiation game. + init_pyspiel_games_spades(m); // Game-specific functions for spades. + init_pyspiel_games_tarok(m); // Game-specific functions for tarok. + init_pyspiel_games_tic_tac_toe(m); // Tic-Tac-Toe game. + init_pyspiel_games_tiny_bridge( + m); // Game-specific functions for tiny_bridge. + init_pyspiel_games_trade_comm(m); // Game-specific functions for trade_comm. + init_pyspiel_observer(m); // Observers and observations. + init_pyspiel_utils(m); // Utilities. + + // List of optional python submodules. +#if OPEN_SPIEL_BUILD_WITH_GAMUT + init_pyspiel_gamut(m); +#endif +#if OPEN_SPIEL_BUILD_WITH_XINXIN + init_pyspiel_xinxin(m); +#endif +#if OPEN_SPIEL_BUILD_WITH_ACPC + init_pyspiel_games_universal_poker(m); // Universal poker game. +#endif +} // NOLINT + +} // namespace +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/python_games.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/python_games.cc new file mode 100644 index 0000000..5ab8dcc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/python_games.cc @@ -0,0 +1,394 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/python_games.h" + +#include +#include +#include +#include + +// Interface code for using Python Games and States from C++. + +#include "open_spiel/abseil-cpp/absl/container/inlined_vector.h" +#include "open_spiel/abseil-cpp/absl/strings/escaping.h" +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/python/pybind11/pybind11.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + + +namespace open_spiel { + +namespace py = ::pybind11; + +PyGame::PyGame(GameType game_type, GameInfo game_info, + GameParameters game_parameters) + : Game(game_type, game_parameters), info_(game_info) {} + +std::unique_ptr PyGame::NewInitialState() const { + PYBIND11_OVERLOAD_PURE_NAME(std::unique_ptr, Game, "new_initial_state", + NewInitialState); +} + +std::unique_ptr PyGame::NewInitialState(const std::string& str) const { + PYBIND11_OVERLOAD_PURE_NAME(std::unique_ptr, Game, "new_initial_state", + NewInitialState, str); +} + +std::unique_ptr PyGame::NewInitialStateForPopulation( + int population) const { + PYBIND11_OVERLOAD_PURE_NAME(std::unique_ptr, Game, + "new_initial_state_for_population", + NewInitialStateForPopulation, population); +} + +int PyGame::MaxChanceNodesInHistory() const { + PYBIND11_OVERLOAD_PURE_NAME(int, Game, + "max_chance_nodes_in_history", + MaxChanceNodesInHistory); +} + +const Observer& PyGame::default_observer() const { + if (!default_observer_) default_observer_ = MakeObserver(kDefaultObsType, {}); + return *default_observer_; +} + +const Observer& PyGame::info_state_observer() const { + if (!info_state_observer_) + info_state_observer_ = MakeObserver(kInfoStateObsType, {}); + return *info_state_observer_; +} + +PyState::PyState(std::shared_ptr game) : State(game) {} + +Player PyState::CurrentPlayer() const { + PYBIND11_OVERLOAD_PURE_NAME(Player, State, "current_player", CurrentPlayer); +} + +std::vector PyState::LegalActions() const { + return LegalActions(CurrentPlayer()); +} + +std::vector PyState::LegalActions(Player player) const { + if (IsTerminal()) return {}; + if (IsChanceNode()) return LegalChanceOutcomes(); + if ((player == CurrentPlayer()) || (player >= 0 && IsSimultaneousNode())) { + PYBIND11_OVERLOAD_PURE_NAME(std::vector, State, "_legal_actions", + LegalActions, player); + } else if (player < 0) { + SpielFatalError( + absl::StrCat("Called LegalActions for pseudo-player ", player)); + } else { + return {}; + } +} + +std::string PyState::ActionToString(Player player, Action action_id) const { + PYBIND11_OVERLOAD_PURE_NAME(std::string, State, "_action_to_string", + ActionToString, player, action_id); +} + +std::string PyState::ToString() const { + PYBIND11_OVERLOAD_PURE_NAME(std::string, State, "__str__", ToString); +} + +bool PyState::IsTerminal() const { + PYBIND11_OVERLOAD_PURE_NAME(bool, State, "is_terminal", IsTerminal); +} + +std::vector PyState::Returns() const { + PYBIND11_OVERRIDE_PURE_NAME(std::vector, State, "returns", Returns); +} + +std::vector PyState::Rewards() const { + PYBIND11_OVERRIDE_NAME(std::vector, State, "rewards", Rewards); +} + +void PyState::DoApplyAction(Action action_id) { + PYBIND11_OVERLOAD_PURE_NAME(void, State, "_apply_action", DoApplyAction, + action_id); +} + +void PyState::DoApplyActions(const std::vector& actions) { + PYBIND11_OVERLOAD_PURE_NAME(void, State, "_apply_actions", DoApplyActions, + actions); +} + +ActionsAndProbs PyState::ChanceOutcomes() const { + PYBIND11_OVERLOAD_PURE_NAME(ActionsAndProbs, State, "chance_outcomes", + ChanceOutcomes); +} + +std::unique_ptr PyState::Clone() const { + // Create a new State of the right type. + auto rv = game_->NewInitialState(); + + // Copy the Python-side properties of the state. + py::function deepcopy = py::module::import("copy").attr("deepcopy"); + py::object py_state = py::cast(*rv); + for (auto [k, v] : PyDict(*this)) { + py_state.attr(k) = deepcopy(v); + } + + // Copy the C++-side properties of the state (all on the parent class). + // Since we started with a valid initial state, we only need to copy + // properties that change during the life of the state - hence num_players, + // num_distinct_actions are omitted. + PyState* state = open_spiel::down_cast(rv.get()); + state->history_ = history_; + state->move_number_ = move_number_; + + return rv; +} + +std::vector PyState::DistributionSupport() { + PYBIND11_OVERLOAD_PURE_NAME(std::vector, State, + "distribution_support", DistributionSupport); +} +void PyState::UpdateDistribution(const std::vector& distribution) { + PYBIND11_OVERLOAD_PURE_NAME(void, State, "update_distribution", + UpdateDistribution, distribution); +} + +// Register a Python game. +void RegisterPyGame(const GameType& game_type, py::function creator) { + GameRegisterer::RegisterGame( + game_type, [game_type, creator](const GameParameters& game_parameters) { + py::dict params = py::cast(game_parameters); + for (const auto& [k, v] : game_type.parameter_specification) { + if (game_parameters.count(k) == 0) { + params[pybind11::str(k)] = v; + } + } + auto py_game = creator(params); + return py::cast>(py_game); + }); +} + +// Observers and observations. We implement the C++ Observer in terms of the +// Python one. + +// Wrapper for using a Python observer from C++. +// This is not a 'trampoline' class, just a wrapper. +class PyObserver : public Observer { + public: + PyObserver(py::object py_observer); + void WriteTensor(const State& state, int player, + Allocator* allocator) const override; + std::string StringFrom(const State& state, int player) const override; + + private: + py::object py_observer_; + py::function set_from_; + py::function string_from_; +}; + +PyObserver::PyObserver(py::object py_observer) + : Observer(/*has_string=*/true, /*has_tensor=*/true), + py_observer_(py_observer), + set_from_(py_observer_.attr("set_from")), + string_from_(py_observer_.attr("string_from")) { + has_tensor_ = !py_observer_.attr("tensor").is_none(); +} + +void PyObserver::WriteTensor(const State& state, int player, + Allocator* allocator) const { + using Array = py::array_t; + const PyState& py_state = open_spiel::down_cast(state); + set_from_(py_state, player); + py::dict dict = py_observer_.attr("dict"); + for (auto [k, v] : dict) { + auto a = py::cast(v); + const int dims = a.ndim(); + absl::InlinedVector shape(dims); + for (int i = 0; i < dims; ++i) shape[i] = a.shape(i); + SpanTensor out = allocator->Get(k.cast(), shape); + std::copy(a.data(), a.data() + a.size(), out.data().begin()); + } +} + +std::string PyObserver::StringFrom(const State& state, int player) const { + const PyState& py_state = open_spiel::down_cast(state); + return py::cast(string_from_(py_state, player)); +} + +std::shared_ptr PyGame::MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const { + py::object h = py::cast(this); + py::function f = h.attr("make_py_observer"); + if (!f) SpielFatalError("make_py_observer not implemented"); + py::object observer = (iig_obs_type.has_value() ? + f(iig_obs_type.value(), params) : f(params)); + return std::make_shared(observer); +} + +std::string PyState::InformationStateString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, NumPlayers()); + const PyGame& game = open_spiel::down_cast(*game_); + return game.info_state_observer().StringFrom(*this, player); +} + +std::string PyState::ObservationString(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, NumPlayers()); + const PyGame& game = open_spiel::down_cast(*game_); + return game.default_observer().StringFrom(*this, player); +} + +void PyState::InformationStateTensor(Player player, + absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, NumPlayers()); + ContiguousAllocator allocator(values); + const PyGame& game = open_spiel::down_cast(*game_); + game.info_state_observer().WriteTensor(*this, player, &allocator); +} + +namespace { +std::vector TensorShape(const TrackingVectorAllocator& allocator) { + switch (allocator.tensors_info().size()) { + case 0: + return {}; + case 1: + return allocator.tensors_info().front().vector_shape(); + default: { + int size = 0; + for (const auto& info : allocator.tensors_info()) { + size += info.size(); + } + return {size}; + } + } +} +} // namespace + +std::vector PyGame::InformationStateTensorShape() const { + TrackingVectorAllocator allocator; + auto state = NewInitialState(); + info_state_observer().WriteTensor(*state, kDefaultPlayerId, &allocator); + return TensorShape(allocator); +} + +void PyState::ObservationTensor(Player player, absl::Span values) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, NumPlayers()); + ContiguousAllocator allocator(values); + const PyGame& game = open_spiel::down_cast(*game_); + game.default_observer().WriteTensor(*this, player, &allocator); +} + +std::vector PyGame::ObservationTensorShape() const { + TrackingVectorAllocator allocator; + auto state = NewInitialState(); + default_observer().WriteTensor(*state, kDefaultPlayerId, &allocator); + return TensorShape(allocator); +} + +py::dict PyDict(const State& state) { + py::object obj = py::cast(&state); + if (py::hasattr(obj, "__dict__")) { + return obj.attr("__dict__"); + } else { + return py::dict(); + } +} + +std::unique_ptr PyGame::DeserializeState(const std::string& str) const { + std::unique_ptr state = NewInitialState(); + open_spiel::down_cast(state.get())->Deserialize(str); + return state; +} + +// Serialization form for the Python-side attributes is a b64-encoded pickled +// Python dict (the __dict__ member of the Python object). + +py::dict decode_dict(const absl::string_view str) { + std::string bytes; + SPIEL_CHECK_TRUE(absl::Base64Unescape(str, &bytes)); + py::function pickle_loads = py::module::import("pickle").attr("loads"); + return pickle_loads(py::bytes(bytes)); +} + +std::string encode_dict(py::dict dict) { + py::function pickle_dumps = py::module::import("pickle").attr("dumps"); + py::bytes bytes = pickle_dumps(dict); + return absl::Base64Escape(std::string(bytes)); +} + +inline constexpr const absl::string_view kTagHistory = "history="; +inline constexpr const absl::string_view kTagMoveNumber = "move_number="; +inline constexpr const absl::string_view kTagDict = "__dict__="; + +void PyState::Deserialize(const std::string& str) { + std::vector pieces = + absl::StrSplit(str, absl::MaxSplits('\n', 2)); + SPIEL_CHECK_EQ(pieces.size(), 3); + + SPIEL_CHECK_EQ(pieces[0].substr(0, kTagHistory.size()), kTagHistory); + auto history_str = pieces[0].substr(kTagHistory.size()); + if (!history_str.empty()) { + for (auto& h : absl::StrSplit(history_str, ',')) { + std::vector p = absl::StrSplit(h, ':'); + SPIEL_CHECK_EQ(p.size(), 2); + int player, action; + SPIEL_CHECK_TRUE(absl::SimpleAtoi(p[0], &player)); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(p[1], &action)); + history_.push_back({player, action}); + } + } + + SPIEL_CHECK_EQ(pieces[1].substr(0, kTagMoveNumber.size()), kTagMoveNumber); + SPIEL_CHECK_TRUE( + absl::SimpleAtoi(pieces[1].substr(kTagMoveNumber.size()), &move_number_)); + + SPIEL_CHECK_EQ(pieces[2].substr(0, kTagDict.size()), kTagDict); + py::object py_state = py::cast(*this); + for (const auto& [k, v] : decode_dict(pieces[2].substr(kTagDict.size()))) { + py_state.attr(k) = v; + } +} + +std::string PyState::Serialize() const { + return absl::StrCat( + // C++ Attributes + kTagHistory, + absl::StrJoin(history_, ",", + [](std::string* out, const PlayerAction& pa) { + absl::StrAppend(out, pa.player, ":", pa.action); + }), + "\n", kTagMoveNumber, move_number_, "\n", + // Python attributes + kTagDict, encode_dict(PyDict(*this))); +} + +int PyState::MeanFieldPopulation() const { + // Use a Python implementation if available, fall back to the C++ + // implementation if not. + PYBIND11_OVERRIDE_NAME(int, State, "mean_field_population", + MeanFieldPopulation, /* no arguments */); +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/python_games.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/python_games.h new file mode 100644 index 0000000..a2dd2e7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/python_games.h @@ -0,0 +1,118 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_PYTHON_GAMES_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_PYTHON_GAMES_H_ + +// Interface and supporting functions for defining games in Python and using +// them from C++. + +#include +#include +#include + +#include "open_spiel/python/pybind11/pybind11.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { + +namespace py = ::pybind11; + +// Trampoline for using Python-defined games from C++. +class PyGame : public Game, public py::trampoline_self_life_support { + public: + PyGame(GameType game_type, GameInfo game_info, + GameParameters game_parameters); + + // Implementation of the Game API. + std::unique_ptr NewInitialState() const override; + std::unique_ptr NewInitialState(const std::string& str) const override; + std::unique_ptr NewInitialStateForPopulation( + int population) const override; + int MaxChanceNodesInHistory() const override; + int NumDistinctActions() const override { return info_.num_distinct_actions; } + int NumPlayers() const override { return info_.num_players; } + double MinUtility() const override { return info_.min_utility; } + double MaxUtility() const override { return info_.max_utility; } + absl::optional UtilitySum() const override { + return info_.utility_sum; + } + int MaxGameLength() const override { return info_.max_game_length; } + int MaxChanceOutcomes() const override { return info_.max_chance_outcomes; } + std::shared_ptr MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const override; + std::vector InformationStateTensorShape() const override; + std::vector ObservationTensorShape() const override; + std::unique_ptr DeserializeState( + const std::string& str) const override; + + // Observers for the old observation API. + const Observer& default_observer() const; + const Observer& info_state_observer() const; + + private: + GameInfo info_; + + // Used to implement the old observation API. + mutable std::shared_ptr default_observer_; + mutable std::shared_ptr info_state_observer_; +}; + +// Trampoline for using Python-defined states from C++. +class PyState : public State, public py::trampoline_self_life_support { + public: + PyState(std::shared_ptr game); + + // Implementation of the State API. + Player CurrentPlayer() const override; + std::vector LegalActions() const override; + std::vector LegalActions(Player player) const override; + std::string ActionToString(Player player, Action action_id) const override; + std::string ToString() const override; + bool IsTerminal() const override; + std::vector Returns() const override; + std::vector Rewards() const override; + std::unique_ptr Clone() const override; + std::vector DistributionSupport() override; + void UpdateDistribution(const std::vector& distribution) override; + void DoApplyAction(Action action_id) override; + void DoApplyActions(const std::vector& actions) override; + std::string InformationStateString(Player player) const override; + std::string ObservationString(Player player) const override; + void InformationStateTensor(Player player, + absl::Span values) const override; + void ObservationTensor(Player player, + absl::Span values) const override; + ActionsAndProbs ChanceOutcomes() const override; + std::string Serialize() const override; + int MeanFieldPopulation() const override; + + // Python-specific details. + void Deserialize(const std::string& str); +}; + +// Register a Python game. +void RegisterPyGame(const GameType& game_type, py::function creator); + +// Get the dict for a Python state implementation. +py::dict PyDict(const State& state); + +} // namespace open_spiel + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_PYTHON_GAMES_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/python_policy.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/python_policy.cc new file mode 100644 index 0000000..4108a72 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/python_policy.cc @@ -0,0 +1,69 @@ +// Copyright 2023 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/python/pybind11/python_policy.h" + +#include "open_spiel/spiel_utils.h" + +#ifndef SINGLE_ARG +#define SINGLE_ARG(...) __VA_ARGS__ +#endif + +namespace open_spiel { + +std::pair, std::vector > +PyPolicy::GetStatePolicyAsParallelVectors(const State& state) const { + PYBIND11_OVERRIDE_NAME( + SINGLE_ARG(std::pair, std::vector >), Policy, + "get_state_policy_as_parallel_vectors", GetStatePolicyAsParallelVectors, + state); +} +std::pair, std::vector > +PyPolicy::GetStatePolicyAsParallelVectors(const std::string& info_state) const { + PYBIND11_OVERRIDE_NAME( + SINGLE_ARG(std::pair, std::vector >), Policy, + "get_state_policy_as_parallel_vectors", GetStatePolicyAsParallelVectors, + info_state); +} +std::unordered_map PyPolicy::GetStatePolicyAsMap( + const State& state) const { + PYBIND11_OVERRIDE_NAME(SINGLE_ARG(std::unordered_map), Policy, + "action_probabilities", GetStatePolicyAsMap, state); +} +std::unordered_map PyPolicy::GetStatePolicyAsMap( + const std::string& info_state) const { + PYBIND11_OVERRIDE_NAME(SINGLE_ARG(std::unordered_map), Policy, + "action_probabilities", GetStatePolicyAsMap, + info_state); +} +ActionsAndProbs PyPolicy::GetStatePolicy(const State& state) const { + PYBIND11_OVERRIDE_NAME(ActionsAndProbs, Policy, "get_state_policy", + GetStatePolicy, state); +} +ActionsAndProbs PyPolicy::GetStatePolicy(const State& state, + Player player) const { + PYBIND11_OVERRIDE_NAME(ActionsAndProbs, Policy, "get_state_policy", + GetStatePolicy, state, player); +} +ActionsAndProbs PyPolicy::GetStatePolicy(const std::string& info_state) const { + PYBIND11_OVERRIDE_NAME(ActionsAndProbs, Policy, "get_state_policy", + GetStatePolicy, info_state); +} +std::string PyPolicy::Serialize(int double_precision, + std::string delimiter) const { + PYBIND11_OVERRIDE_NAME(std::string, Policy, "serialize", Serialize, + double_precision, delimiter); +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/python_policy.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/python_policy.h new file mode 100644 index 0000000..4551c09 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/python_policy.h @@ -0,0 +1,59 @@ +// Copyright 2023 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_PYTHON_POLICY_H +#define OPEN_SPIEL_PYTHON_POLICY_H + +#include +#include +#include + +#include +#include + +#include "open_spiel/policy.h" +#include "pybind11/include/pybind11/trampoline_self_life_support.h" + +namespace open_spiel { +namespace py = pybind11; + +class PyPolicy : public Policy, public py::trampoline_self_life_support { + public: + ~PyPolicy() override = default; + PyPolicy() = default; + + std::pair, std::vector > + GetStatePolicyAsParallelVectors(const State& state) const override; + + std::pair, std::vector > + GetStatePolicyAsParallelVectors(const std::string& info_state) const override; + + std::unordered_map GetStatePolicyAsMap( + const State& state) const override; + + std::unordered_map GetStatePolicyAsMap( + const std::string& info_state) const override; + + ActionsAndProbs GetStatePolicy(const State& state) const override; + + ActionsAndProbs GetStatePolicy(const State& state, + Player player) const override; + + ActionsAndProbs GetStatePolicy(const std::string& info_state) const override; + + std::string Serialize(int double_precision, + std::string delimiter) const override; +}; +} // namespace open_spiel +#endif // OPEN_SPIEL_PYTHON_POLICY_H diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/utils.cc b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/utils.cc new file mode 100644 index 0000000..3d7c2a7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/utils.cc @@ -0,0 +1,33 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "open_spiel/python/pybind11/utils.h" + +#include "open_spiel/python/pybind11/pybind11.h" +#include "open_spiel/utils/file.h" + +namespace open_spiel { + +namespace py = ::pybind11; + +void init_pyspiel_utils(py::module& m) { + // read_contents_from_file(string filename, string mode) + m.def("read_contents_from_file", file::ReadContentsFromFile, + "Read the entire contents of a file."); + + // write_contents_to_file(string filename, string mode, string contents) + m.def("write_contents_to_file", open_spiel::file::WriteContentsToFile, + "Write the contents of the string to the specified filename."); +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/utils.h b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/utils.h new file mode 100644 index 0000000..81daece --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pybind11/utils.h @@ -0,0 +1,25 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef OPEN_SPIEL_PYTHON_PYBIND11_UTILS_H_ +#define OPEN_SPIEL_PYTHON_PYBIND11_UTILS_H_ + +#include "open_spiel/python/pybind11/pybind11.h" + +namespace open_spiel { + +void init_pyspiel_utils(::pybind11::module& m); + +} // namespace open_spiel + +#endif // OPEN_SPIEL_PYTHON_PYBIND11_UTILS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/__init__.py new file mode 100644 index 0000000..1bf6252 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/deep_cfr.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/deep_cfr.py new file mode 100644 index 0000000..9a03163 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/deep_cfr.py @@ -0,0 +1,515 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Implements Deep CFR Algorithm. + +See https://arxiv.org/abs/1811.00164. + +The algorithm defines an `advantage` and `strategy` networks that compute +advantages used to do regret matching across information sets and to approximate +the strategy profiles of the game. To train these networks a fixed ring buffer +(other data structures may be used) memory is used to accumulate samples to +train the networks. +""" + +import collections +import math +import random +import numpy as np +from scipy import stats +import torch +from torch import nn +import torch.nn.functional as F + +from open_spiel.python import policy +import pyspiel + +AdvantageMemory = collections.namedtuple( + "AdvantageMemory", "info_state iteration advantage action") + +StrategyMemory = collections.namedtuple( + "StrategyMemory", "info_state iteration strategy_action_probs") + + +class SonnetLinear(nn.Module): + """A Sonnet linear module. + + Always includes biases and only supports ReLU activations. + """ + + def __init__(self, in_size, out_size, activate_relu=True): + """Creates a Sonnet linear layer. + + Args: + in_size: (int) number of inputs + out_size: (int) number of outputs + activate_relu: (bool) whether to include a ReLU activation layer + """ + super(SonnetLinear, self).__init__() + self._activate_relu = activate_relu + self._in_size = in_size + self._out_size = out_size + # stddev = 1.0 / math.sqrt(self._in_size) + # mean = 0 + # lower = (-2 * stddev - mean) / stddev + # upper = (2 * stddev - mean) / stddev + # # Weight initialization inspired by Sonnet's Linear layer, + # # which cites https://arxiv.org/abs/1502.03167v3 + # # pytorch default: initialized from + # # uniform(-sqrt(1/in_features), sqrt(1/in_features)) + self._weight = None + self._bias = None + self.reset() + + def forward(self, tensor): + y = F.linear(tensor, self._weight, self._bias) + return F.relu(y) if self._activate_relu else y + + def reset(self): + stddev = 1.0 / math.sqrt(self._in_size) + mean = 0 + lower = (-2 * stddev - mean) / stddev + upper = (2 * stddev - mean) / stddev + # Weight initialization inspired by Sonnet's Linear layer, + # which cites https://arxiv.org/abs/1502.03167v3 + # pytorch default: initialized from + # uniform(-sqrt(1/in_features), sqrt(1/in_features)) + self._weight = nn.Parameter( + torch.Tensor( + stats.truncnorm.rvs( + lower, + upper, + loc=mean, + scale=stddev, + size=[self._out_size, self._in_size]))) + self._bias = nn.Parameter(torch.zeros([self._out_size])) + + +class MLP(nn.Module): + """A simple network built from nn.linear layers.""" + + def __init__(self, + input_size, + hidden_sizes, + output_size, + activate_final=False): + """Create the MLP. + + Args: + input_size: (int) number of inputs + hidden_sizes: (list) sizes (number of units) of each hidden layer + output_size: (int) number of outputs + activate_final: (bool) should final layer should include a ReLU + """ + + super(MLP, self).__init__() + self._layers = [] + # Hidden layers + for size in hidden_sizes: + self._layers.append(SonnetLinear(in_size=input_size, out_size=size)) + input_size = size + # Output layer + self._layers.append( + SonnetLinear( + in_size=input_size, + out_size=output_size, + activate_relu=activate_final)) + + self.model = nn.ModuleList(self._layers) + + def forward(self, x): + for layer in self.model: + x = layer(x) + return x + + def reset(self): + for layer in self._layers: + layer.reset() + + +class ReservoirBuffer(object): + """Allows uniform sampling over a stream of data. + + This class supports the storage of arbitrary elements, such as observation + tensors, integer actions, etc. + See https://en.wikipedia.org/wiki/Reservoir_sampling for more details. + """ + + def __init__(self, reservoir_buffer_capacity): + self._reservoir_buffer_capacity = reservoir_buffer_capacity + self._data = [] + self._add_calls = 0 + + def add(self, element): + """Potentially adds `element` to the reservoir buffer. + + Args: + element: data to be added to the reservoir buffer. + """ + if len(self._data) < self._reservoir_buffer_capacity: + self._data.append(element) + else: + idx = np.random.randint(0, self._add_calls + 1) + if idx < self._reservoir_buffer_capacity: + self._data[idx] = element + self._add_calls += 1 + + def sample(self, num_samples): + """Returns `num_samples` uniformly sampled from the buffer. + + Args: + num_samples: `int`, number of samples to draw. + + Returns: + An iterable over `num_samples` random elements of the buffer. + Raises: + ValueError: If there are less than `num_samples` elements in the buffer + """ + if len(self._data) < num_samples: + raise ValueError("{} elements could not be sampled from size {}".format( + num_samples, len(self._data))) + return random.sample(self._data, num_samples) + + def clear(self): + self._data = [] + self._add_calls = 0 + + def __len__(self): + return len(self._data) + + def __iter__(self): + return iter(self._data) + + +class DeepCFRSolver(policy.Policy): + """Implements a solver for the Deep CFR Algorithm with PyTorch. + + See https://arxiv.org/abs/1811.00164. + + Define all networks and sampling buffers/memories. Derive losses & learning + steps. Initialize the game state and algorithmic variables. + + Note: batch sizes default to `None` implying that training over the full + dataset in memory is done by default. To sample from the memories you + may set these values to something less than the full capacity of the + memory. + """ + + def __init__(self, + game, + policy_network_layers=(256, 256), + advantage_network_layers=(128, 128), + num_iterations: int = 100, + num_traversals: int = 20, + learning_rate: float = 1e-4, + batch_size_advantage=None, + batch_size_strategy=None, + memory_capacity: int = int(1e6), + policy_network_train_steps: int = 1, + advantage_network_train_steps: int = 1, + reinitialize_advantage_networks: bool = True): + """Initialize the Deep CFR algorithm. + + Args: + game: Open Spiel game. + policy_network_layers: (list[int]) Layer sizes of strategy net MLP. + advantage_network_layers: (list[int]) Layer sizes of advantage net MLP. + num_iterations: (int) Number of training iterations. + num_traversals: (int) Number of traversals per iteration. + learning_rate: (float) Learning rate. + batch_size_advantage: (int or None) Batch size to sample from advantage + memories. + batch_size_strategy: (int or None) Batch size to sample from strategy + memories. + memory_capacity: Number af samples that can be stored in memory. + policy_network_train_steps: Number of policy network training steps (per + iteration). + advantage_network_train_steps: Number of advantage network training steps + (per iteration). + reinitialize_advantage_networks: Whether to re-initialize the advantage + network before training on each iteration. + """ + all_players = list(range(game.num_players())) + super(DeepCFRSolver, self).__init__(game, all_players) + self._game = game + if game.get_type().dynamics == pyspiel.GameType.Dynamics.SIMULTANEOUS: + # `_traverse_game_tree` does not take into account this option. + raise ValueError("Simulatenous games are not supported.") + self._batch_size_advantage = batch_size_advantage + self._batch_size_strategy = batch_size_strategy + self._policy_network_train_steps = policy_network_train_steps + self._advantage_network_train_steps = advantage_network_train_steps + self._num_players = game.num_players() + self._root_node = self._game.new_initial_state() + self._embedding_size = len(self._root_node.information_state_tensor(0)) + self._num_iterations = num_iterations + self._num_traversals = num_traversals + self._reinitialize_advantage_networks = reinitialize_advantage_networks + self._num_actions = game.num_distinct_actions() + self._iteration = 1 + + # Define strategy network, loss & memory. + self._strategy_memories = ReservoirBuffer(memory_capacity) + self._policy_network = MLP(self._embedding_size, + list(policy_network_layers), + self._num_actions) + # Illegal actions are handled in the traversal code where expected payoff + # and sampled regret is computed from the advantage networks. + self._policy_sm = nn.Softmax(dim=-1) + self._loss_policy = nn.MSELoss() + self._optimizer_policy = torch.optim.Adam( + self._policy_network.parameters(), lr=learning_rate) + + # Define advantage network, loss & memory. (One per player) + self._advantage_memories = [ + ReservoirBuffer(memory_capacity) for _ in range(self._num_players) + ] + self._advantage_networks = [ + MLP(self._embedding_size, list(advantage_network_layers), + self._num_actions) for _ in range(self._num_players) + ] + self._loss_advantages = nn.MSELoss(reduction="mean") + self._optimizer_advantages = [] + for p in range(self._num_players): + self._optimizer_advantages.append( + torch.optim.Adam( + self._advantage_networks[p].parameters(), lr=learning_rate)) + self._learning_rate = learning_rate + + @property + def advantage_buffers(self): + return self._advantage_memories + + @property + def strategy_buffer(self): + return self._strategy_memories + + def clear_advantage_buffers(self): + for p in range(self._num_players): + self._advantage_memories[p].clear() + + def reinitialize_advantage_network(self, player): + self._advantage_networks[player].reset() + self._optimizer_advantages[player] = torch.optim.Adam( + self._advantage_networks[player].parameters(), lr=self._learning_rate) + + def reinitialize_advantage_networks(self): + for p in range(self._num_players): + self.reinitialize_advantage_network(p) + + def solve(self): + """Solution logic for Deep CFR. + + Traverses the game tree, while storing the transitions for training + advantage and policy networks. + + Returns: + 1. (nn.Module) Instance of the trained policy network for inference. + 2. (list of floats) Advantage network losses for + each player during each iteration. + 3. (float) Policy loss. + """ + advantage_losses = collections.defaultdict(list) + for _ in range(self._num_iterations): + for p in range(self._num_players): + for _ in range(self._num_traversals): + self._traverse_game_tree(self._root_node, p) + if self._reinitialize_advantage_networks: + # Re-initialize advantage network for player and train from scratch. + self.reinitialize_advantage_network(p) + # Re-initialize advantage networks and train from scratch. + advantage_losses[p].append(self._learn_advantage_network(p)) + self._iteration += 1 + # Train policy network. + policy_loss = self._learn_strategy_network() + return self._policy_network, advantage_losses, policy_loss + + def _traverse_game_tree(self, state, player): + """Performs a traversal of the game tree. + + Over a traversal the advantage and strategy memories are populated with + computed advantage values and matched regrets respectively. + + Args: + state: Current OpenSpiel game state. + player: (int) Player index for this traversal. + + Returns: + (float) Recursively returns expected payoffs for each action. + """ + expected_payoff = collections.defaultdict(float) + if state.is_terminal(): + # Terminal state get returns. + return state.returns()[player] + elif state.is_chance_node(): + # If this is a chance node, sample an action + chance_outcome, chance_proba = zip(*state.chance_outcomes()) + action = np.random.choice(chance_outcome, p=chance_proba) + return self._traverse_game_tree(state.child(action), player) + elif state.current_player() == player: + sampled_regret = collections.defaultdict(float) + # Update the policy over the info set & actions via regret matching. + _, strategy = self._sample_action_from_advantage(state, player) + for action in state.legal_actions(): + expected_payoff[action] = self._traverse_game_tree( + state.child(action), player) + cfv = 0 + for a_ in state.legal_actions(): + cfv += strategy[a_] * expected_payoff[a_] + for action in state.legal_actions(): + sampled_regret[action] = expected_payoff[action] + sampled_regret[action] -= cfv + sampled_regret_arr = [0] * self._num_actions + for action in sampled_regret: + sampled_regret_arr[action] = sampled_regret[action] + self._advantage_memories[player].add( + AdvantageMemory(state.information_state_tensor(), self._iteration, + sampled_regret_arr, action)) + return cfv + else: + other_player = state.current_player() + _, strategy = self._sample_action_from_advantage(state, other_player) + # Recompute distribution for numerical errors. + probs = np.array(strategy) + probs /= probs.sum() + sampled_action = np.random.choice(range(self._num_actions), p=probs) + self._strategy_memories.add( + StrategyMemory( + state.information_state_tensor(other_player), self._iteration, + strategy)) + return self._traverse_game_tree(state.child(sampled_action), player) + + def _sample_action_from_advantage(self, state, player): + """Returns an info state policy by applying regret-matching. + + Args: + state: Current OpenSpiel game state. + player: (int) Player index over which to compute regrets. + + Returns: + 1. (list) Advantage values for info state actions indexed by action. + 2. (list) Matched regrets, prob for actions indexed by action. + """ + info_state = state.information_state_tensor(player) + legal_actions = state.legal_actions(player) + with torch.no_grad(): + state_tensor = torch.FloatTensor(np.expand_dims(info_state, axis=0)) + raw_advantages = self._advantage_networks[player](state_tensor)[0].numpy() + advantages = [max(0., advantage) for advantage in raw_advantages] + cumulative_regret = np.sum([advantages[action] for action in legal_actions]) + matched_regrets = np.array([0.] * self._num_actions) + if cumulative_regret > 0.: + for action in legal_actions: + matched_regrets[action] = advantages[action] / cumulative_regret + else: + matched_regrets[max(legal_actions, key=lambda a: raw_advantages[a])] = 1 + return advantages, matched_regrets + + def action_probabilities(self, state, player_id=None): + """Computes action probabilities for the current player in state. + + Args: + state: (pyspiel.State) The state to compute probabilities for. + player_id: unused, but needed to implement the Policy API. + + Returns: + (dict) action probabilities for a single batch. + """ + del player_id + cur_player = state.current_player() + legal_actions = state.legal_actions(cur_player) + info_state_vector = np.array(state.information_state_tensor()) + if len(info_state_vector.shape) == 1: + info_state_vector = np.expand_dims(info_state_vector, axis=0) + with torch.no_grad(): + logits = self._policy_network(torch.FloatTensor(info_state_vector)) + probs = self._policy_sm(logits).numpy() + return {action: probs[0][action] for action in legal_actions} + + def _learn_advantage_network(self, player): + """Compute the loss on sampled transitions and perform a Q-network update. + + If there are not enough elements in the buffer, no loss is computed and + `None` is returned instead. + + Args: + player: (int) player index. + + Returns: + (float) The average loss over the advantage network. + """ + for _ in range(self._advantage_network_train_steps): + + if self._batch_size_advantage: + if self._batch_size_advantage > len(self._advantage_memories[player]): + ## Skip if there aren't enough samples + return None + samples = self._advantage_memories[player].sample( + self._batch_size_advantage) + else: + samples = self._advantage_memories[player] + info_states = [] + advantages = [] + iterations = [] + for s in samples: + info_states.append(s.info_state) + advantages.append(s.advantage) + iterations.append([s.iteration]) + # Ensure some samples have been gathered. + if not info_states: + return None + self._optimizer_advantages[player].zero_grad() + advantages = torch.FloatTensor(np.array(advantages)) + iters = torch.FloatTensor(np.sqrt(np.array(iterations))) + outputs = self._advantage_networks[player]( + torch.FloatTensor(np.array(info_states))) + loss_advantages = self._loss_advantages(iters * outputs, + iters * advantages) + loss_advantages.backward() + self._optimizer_advantages[player].step() + + return loss_advantages.detach().numpy() + + def _learn_strategy_network(self): + """Compute the loss over the strategy network. + + Returns: + (float) The average loss obtained on this batch of transitions or `None`. + """ + for _ in range(self._policy_network_train_steps): + if self._batch_size_strategy: + if self._batch_size_strategy > len(self._strategy_memories): + ## Skip if there aren't enough samples + return None + samples = self._strategy_memories.sample(self._batch_size_strategy) + else: + samples = self._strategy_memories + info_states = [] + action_probs = [] + iterations = [] + for s in samples: + info_states.append(s.info_state) + action_probs.append(s.strategy_action_probs) + iterations.append([s.iteration]) + + self._optimizer_policy.zero_grad() + iters = torch.FloatTensor(np.sqrt(np.array(iterations))) + ac_probs = torch.FloatTensor(np.array(np.squeeze(action_probs))) + logits = self._policy_network(torch.FloatTensor(np.array(info_states))) + outputs = self._policy_sm(logits) + loss_strategy = self._loss_policy(iters * outputs, iters * ac_probs) + loss_strategy.backward() + self._optimizer_policy.step() + + return loss_strategy.detach().numpy() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/deep_cfr_pytorch_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/deep_cfr_pytorch_test.py new file mode 100644 index 0000000..910b747 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/deep_cfr_pytorch_test.py @@ -0,0 +1,75 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.pytorch.deep_cfr.""" + +from absl import app +from absl import logging +from absl.testing import absltest +from absl.testing import parameterized +import torch + +from open_spiel.python import policy +import pyspiel +from open_spiel.python.pytorch import deep_cfr + +SEED = 24984617 + + +class DeepCFRPyTorchTest(parameterized.TestCase): + + @parameterized.parameters('leduc_poker', 'kuhn_poker', 'liars_dice') + def test_deep_cfr_runs(self, game_name): + game = pyspiel.load_game(game_name) + deep_cfr_solver = deep_cfr.DeepCFRSolver( + game, + policy_network_layers=(8, 4), + advantage_network_layers=(4, 2), + num_iterations=2, + num_traversals=2, + learning_rate=1e-3, + batch_size_advantage=None, + batch_size_strategy=None, + memory_capacity=1e7) + deep_cfr_solver.solve() + + def test_matching_pennies_3p(self): + game = pyspiel.load_game_as_turn_based('matching_pennies_3p') + deep_cfr_solver = deep_cfr.DeepCFRSolver( + game, + policy_network_layers=(16, 8), + advantage_network_layers=(32, 16), + num_iterations=2, + num_traversals=2, + learning_rate=1e-3, + batch_size_advantage=None, + batch_size_strategy=None, + memory_capacity=1e7) + deep_cfr_solver.solve() + conv = pyspiel.nash_conv( + game, + policy.python_policy_to_pyspiel_policy( + policy.tabular_policy_from_callable( + game, deep_cfr_solver.action_probabilities))) + logging.info('Deep CFR in Matching Pennies 3p. NashConv: %.2f', conv) + + +def main(_): + torch.manual_seed(SEED) + absltest.main() + + +if __name__ == '__main__': + # Necessary to run main via app.run for internal tests. + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/dqn.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/dqn.py new file mode 100644 index 0000000..0bb2bb6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/dqn.py @@ -0,0 +1,433 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""DQN agent implemented in PyTorch.""" + +import collections +import math +import numpy as np +from scipy import stats +import torch +from torch import nn +import torch.nn.functional as F + +from open_spiel.python import rl_agent +from open_spiel.python.utils.replay_buffer import ReplayBuffer + +Transition = collections.namedtuple( + "Transition", + "info_state action reward next_info_state is_final_step legal_actions_mask") + +ILLEGAL_ACTION_LOGITS_PENALTY = torch.finfo(torch.float).min + + +class SonnetLinear(nn.Module): + """A Sonnet linear module. + + Always includes biases and only supports ReLU activations. + """ + + def __init__(self, in_size, out_size, activate_relu=True): + """Creates a Sonnet linear layer. + + Args: + in_size: (int) number of inputs + out_size: (int) number of outputs + activate_relu: (bool) whether to include a ReLU activation layer + """ + super(SonnetLinear, self).__init__() + self._activate_relu = activate_relu + stddev = 1.0 / math.sqrt(in_size) + mean = 0 + lower = (-2 * stddev - mean) / stddev + upper = (2 * stddev - mean) / stddev + # Weight initialization inspired by Sonnet's Linear layer, + # which cites https://arxiv.org/abs/1502.03167v3 + # pytorch default: initialized from + # uniform(-sqrt(1/in_features), sqrt(1/in_features)) + self._weight = nn.Parameter( + torch.Tensor( + stats.truncnorm.rvs( + lower, upper, loc=mean, scale=stddev, size=[out_size, + in_size]))) + self._bias = nn.Parameter(torch.zeros([out_size])) + + def forward(self, tensor): + y = F.linear(tensor, self._weight, self._bias) + return F.relu(y) if self._activate_relu else y + + +class MLP(nn.Module): + """A simple network built from nn.linear layers.""" + + def __init__(self, + input_size, + hidden_sizes, + output_size, + activate_final=False): + """Create the MLP. + + Args: + input_size: (int) number of inputs + hidden_sizes: (list) sizes (number of units) of each hidden layer + output_size: (int) number of outputs + activate_final: (bool) should final layer should include a ReLU + """ + + super(MLP, self).__init__() + self._layers = [] + # Hidden layers + for size in hidden_sizes: + self._layers.append(SonnetLinear(in_size=input_size, out_size=size)) + input_size = size + # Output layer + self._layers.append( + SonnetLinear( + in_size=input_size, + out_size=output_size, + activate_relu=activate_final)) + + self.model = nn.ModuleList(self._layers) + + def forward(self, x): + for layer in self.model: + x = layer(x) + return x + + +class DQN(rl_agent.AbstractAgent): + """DQN Agent implementation in PyTorch. + + See open_spiel/python/examples/breakthrough_dqn.py for an usage example. + """ + + def __init__(self, + player_id, + state_representation_size, + num_actions, + hidden_layers_sizes=128, + replay_buffer_capacity=10000, + batch_size=128, + replay_buffer_class=ReplayBuffer, + learning_rate=0.01, + update_target_network_every=1000, + learn_every=10, + discount_factor=1.0, + min_buffer_size_to_learn=1000, + epsilon_start=1.0, + epsilon_end=0.1, + epsilon_decay_duration=int(1e6), + optimizer_str="sgd", + loss_str="mse"): + """Initialize the DQN agent.""" + + # This call to locals() is used to store every argument used to initialize + # the class instance, so it can be copied with no hyperparameter change. + self._kwargs = locals() + + self.player_id = player_id + self._num_actions = num_actions + if isinstance(hidden_layers_sizes, int): + hidden_layers_sizes = [hidden_layers_sizes] + self._layer_sizes = hidden_layers_sizes + self._batch_size = batch_size + self._update_target_network_every = update_target_network_every + self._learn_every = learn_every + self._min_buffer_size_to_learn = min_buffer_size_to_learn + self._discount_factor = discount_factor + + self._epsilon_start = epsilon_start + self._epsilon_end = epsilon_end + self._epsilon_decay_duration = epsilon_decay_duration + + # TODO(author6) Allow for optional replay buffer config. + if not isinstance(replay_buffer_capacity, int): + raise ValueError("Replay buffer capacity not an integer.") + self._replay_buffer = replay_buffer_class(replay_buffer_capacity) + self._prev_timestep = None + self._prev_action = None + + # Step counter to keep track of learning, eps decay and target network. + self._step_counter = 0 + + # Keep track of the last training loss achieved in an update step. + self._last_loss_value = None + + # Create the Q-network instances + self._q_network = MLP(state_representation_size, self._layer_sizes, + num_actions) + + self._target_q_network = MLP(state_representation_size, self._layer_sizes, + num_actions) + + if loss_str == "mse": + self.loss_class = F.mse_loss + elif loss_str == "huber": + self.loss_class = F.smooth_l1_loss + else: + raise ValueError("Not implemented, choose from 'mse', 'huber'.") + + if optimizer_str == "adam": + self._optimizer = torch.optim.Adam( + self._q_network.parameters(), lr=learning_rate) + elif optimizer_str == "sgd": + self._optimizer = torch.optim.SGD( + self._q_network.parameters(), lr=learning_rate) + else: + raise ValueError("Not implemented, choose from 'adam' and 'sgd'.") + + def step(self, time_step, is_evaluation=False, add_transition_record=True): + """Returns the action to be taken and updates the Q-network if needed. + + Args: + time_step: an instance of rl_environment.TimeStep. + is_evaluation: bool, whether this is a training or evaluation call. + add_transition_record: Whether to add to the replay buffer on this step. + + Returns: + A `rl_agent.StepOutput` containing the action probs and chosen action. + """ + + # Act step: don't act at terminal info states or if its not our turn. + if (not time_step.last()) and ( + time_step.is_simultaneous_move() or + self.player_id == time_step.current_player()): + info_state = time_step.observations["info_state"][self.player_id] + legal_actions = time_step.observations["legal_actions"][self.player_id] + epsilon = self._get_epsilon(is_evaluation) + action, probs = self._epsilon_greedy(info_state, legal_actions, epsilon) + else: + action = None + probs = [] + + # Don't mess up with the state during evaluation. + if not is_evaluation: + self._step_counter += 1 + + if self._step_counter % self._learn_every == 0: + self._last_loss_value = self.learn() + + if self._step_counter % self._update_target_network_every == 0: + # state_dict method returns a dictionary containing a whole state of the + # module. + self._target_q_network.load_state_dict(self._q_network.state_dict()) + + if self._prev_timestep and add_transition_record: + # We may omit record adding here if it's done elsewhere. + self.add_transition(self._prev_timestep, self._prev_action, time_step) + + if time_step.last(): # prepare for the next episode. + self._prev_timestep = None + self._prev_action = None + return + else: + self._prev_timestep = time_step + self._prev_action = action + + return rl_agent.StepOutput(action=action, probs=probs) + + def add_transition(self, prev_time_step, prev_action, time_step): + """Adds the new transition using `time_step` to the replay buffer. + + Adds the transition from `self._prev_timestep` to `time_step` by + `self._prev_action`. + + Args: + prev_time_step: prev ts, an instance of rl_environment.TimeStep. + prev_action: int, action taken at `prev_time_step`. + time_step: current ts, an instance of rl_environment.TimeStep. + """ + assert prev_time_step is not None + legal_actions = (time_step.observations["legal_actions"][self.player_id]) + legal_actions_mask = np.zeros(self._num_actions) + legal_actions_mask[legal_actions] = 1.0 + transition = Transition( + info_state=( + prev_time_step.observations["info_state"][self.player_id][:]), + action=prev_action, + reward=time_step.rewards[self.player_id], + next_info_state=time_step.observations["info_state"][self.player_id][:], + is_final_step=float(time_step.last()), + legal_actions_mask=legal_actions_mask) + self._replay_buffer.add(transition) + + def _epsilon_greedy(self, info_state, legal_actions, epsilon): + """Returns a valid epsilon-greedy action and valid action probs. + + Action probabilities are given by a softmax over legal q-values. + + Args: + info_state: hashable representation of the information state. + legal_actions: list of legal actions at `info_state`. + epsilon: float, probability of taking an exploratory action. + + Returns: + A valid epsilon-greedy action and valid action probabilities. + """ + probs = np.zeros(self._num_actions) + if np.random.rand() < epsilon: + action = np.random.choice(legal_actions) + probs[legal_actions] = 1.0 / len(legal_actions) + else: + info_state = torch.Tensor(np.reshape(info_state, [1, -1])) + q_values = self._q_network(info_state).detach()[0] + legal_q_values = q_values[legal_actions] + action = legal_actions[torch.argmax(legal_q_values)] + probs[action] = 1.0 + return action, probs + + def _get_epsilon(self, is_evaluation, power=1.0): + """Returns the evaluation or decayed epsilon value.""" + if is_evaluation: + return 0.0 + decay_steps = min(self._step_counter, self._epsilon_decay_duration) + decayed_epsilon = ( + self._epsilon_end + (self._epsilon_start - self._epsilon_end) * + (1 - decay_steps / self._epsilon_decay_duration)**power) + return decayed_epsilon + + def learn(self): + """Compute the loss on sampled transitions and perform a Q-network update. + + If there are not enough elements in the buffer, no loss is computed and + `None` is returned instead. + + Returns: + The average loss obtained on this batch of transitions or `None`. + """ + + if (len(self._replay_buffer) < self._batch_size or + len(self._replay_buffer) < self._min_buffer_size_to_learn): + return None + + transitions = self._replay_buffer.sample(self._batch_size) + info_states = torch.Tensor([t.info_state for t in transitions]) + actions = torch.LongTensor([t.action for t in transitions]) + rewards = torch.Tensor([t.reward for t in transitions]) + next_info_states = torch.Tensor([t.next_info_state for t in transitions]) + are_final_steps = torch.Tensor([t.is_final_step for t in transitions]) + legal_actions_mask = torch.Tensor( + np.array([t.legal_actions_mask for t in transitions])) + + self._q_values = self._q_network(info_states) + self._target_q_values = self._target_q_network(next_info_states).detach() + + illegal_actions_mask = 1 - legal_actions_mask + legal_target_q_values = self._target_q_values.masked_fill( + illegal_actions_mask.bool(), ILLEGAL_ACTION_LOGITS_PENALTY + ) + max_next_q = torch.max(legal_target_q_values, dim=1)[0] + + target = ( + rewards + (1 - are_final_steps) * self._discount_factor * max_next_q) + action_indices = torch.stack([ + torch.arange(self._q_values.shape[0], dtype=torch.long), actions + ], + dim=0) + predictions = self._q_values[list(action_indices)] + + loss = self.loss_class(predictions, target) + + self._optimizer.zero_grad() + loss.backward() + self._optimizer.step() + + return loss + + @property + def q_values(self): + return self._q_values + + @property + def replay_buffer(self): + return self._replay_buffer + + @property + def loss(self): + return self._last_loss_value + + @property + def prev_timestep(self): + return self._prev_timestep + + @property + def prev_action(self): + return self._prev_action + + @property + def step_counter(self): + return self._step_counter + + def get_weights(self): + variables = [m.weight for m in self._q_network.model] + variables.append([m.weight for m in self._target_q_network.model]) + return variables + + def copy_with_noise(self, sigma=0.0, copy_weights=True): + """Copies the object and perturbates it with noise. + + Args: + sigma: gaussian dropout variance term : Multiplicative noise following + (1+sigma*epsilon), epsilon standard gaussian variable, multiplies each + model weight. sigma=0 means no perturbation. + copy_weights: Boolean determining whether to copy model weights (True) or + just model hyperparameters. + + Returns: + Perturbated copy of the model. + """ + _ = self._kwargs.pop("self", None) + copied_object = DQN(**self._kwargs) + + q_network = getattr(copied_object, "_q_network") + target_q_network = getattr(copied_object, "_target_q_network") + + if copy_weights: + with torch.no_grad(): + for q_model in q_network.model: + q_model.weight *= (1 + sigma * torch.randn(q_model.weight.shape)) + for tq_model in target_q_network.model: + tq_model.weight *= (1 + sigma * torch.randn(tq_model.weight.shape)) + return copied_object + + def save(self, data_path, optimizer_data_path=None): + """Save checkpoint/trained model and optimizer. + + Args: + data_path: Path for saving model. It can be relative or absolute but the + filename should be included. For example: q_network.pt or + /path/to/q_network.pt + optimizer_data_path: Path for saving the optimizer states. It can be + relative or absolute but the filename should be included. For example: + optimizer.pt or /path/to/optimizer.pt + """ + torch.save(self._q_network, data_path) + if optimizer_data_path is not None: + torch.save(self._optimizer, optimizer_data_path) + + def load(self, data_path, optimizer_data_path=None): + """Load checkpoint/trained model and optimizer. + + Args: + data_path: Path for loading model. It can be relative or absolute but the + filename should be included. For example: q_network.pt or + /path/to/q_network.pt + optimizer_data_path: Path for loading the optimizer states. It can be + relative or absolute but the filename should be included. For example: + optimizer.pt or /path/to/optimizer.pt + """ + self._q_network = torch.load(data_path) + self._target_q_network = torch.load(data_path) + if optimizer_data_path is not None: + self._optimizer = torch.load(optimizer_data_path) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/dqn_pytorch_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/dqn_pytorch_test.py new file mode 100644 index 0000000..3c28300 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/dqn_pytorch_test.py @@ -0,0 +1,138 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.dqn.""" + +import random +from absl.testing import absltest +import numpy as np +import torch + +from open_spiel.python import rl_environment +import pyspiel +from open_spiel.python.pytorch import dqn + +# A simple two-action game encoded as an EFG game. Going left gets -1, going +# right gets a +1. +SIMPLE_EFG_DATA = """ + EFG 2 R "Simple single-agent problem" { "Player 1" } "" + p "ROOT" 1 1 "ROOT" { "L" "R" } 0 + t "L" 1 "Outcome L" { -1.0 } + t "R" 2 "Outcome R" { 1.0 } +""" +SEED = 24261711 + + +class DQNTest(absltest.TestCase): + + def test_simple_game(self): + game = pyspiel.load_efg_game(SIMPLE_EFG_DATA) + env = rl_environment.Environment(game=game) + agent = dqn.DQN( + 0, + state_representation_size=game.information_state_tensor_shape()[0], + num_actions=game.num_distinct_actions(), + min_buffer_size_to_learn=10, + hidden_layers_sizes=[16], + replay_buffer_capacity=1000, + update_target_network_every=100, + learn_every=10, + discount_factor=0.99, + epsilon_decay_duration=1000, + batch_size=32, + epsilon_start=0.5, + epsilon_end=0.01) + total_eval_reward = 0 + for _ in range(1000): + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step) + time_step = env.step([agent_output.action]) + agent.step(time_step) + for _ in range(1000): + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step, is_evaluation=True) + time_step = env.step([agent_output.action]) + total_eval_reward += time_step.rewards[0] + self.assertGreaterEqual(total_eval_reward, 250) + + def test_run_tic_tac_toe(self): + env = rl_environment.Environment("tic_tac_toe") + state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + agents = [ + dqn.DQN( # pylint: disable=g-complex-comprehension + player_id, + state_representation_size=state_size, + num_actions=num_actions, + hidden_layers_sizes=[16], + replay_buffer_capacity=10, + batch_size=5) for player_id in [0, 1] + ] + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + current_agent = agents[current_player] + agent_output = current_agent.step(time_step) + time_step = env.step([agent_output.action]) + + for agent in agents: + agent.step(time_step) + + def test_run_hanabi(self): + # Hanabi is an optional game, so check we have it before running the test. + game = "hanabi" + if game not in pyspiel.registered_names(): + return + + num_players = 3 + env_configs = { + "players": num_players, + "max_life_tokens": 1, + "colors": 2, + "ranks": 3, + "hand_size": 2, + "max_information_tokens": 3, + "discount": 0. + } + env = rl_environment.Environment(game, **env_configs) + state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + agents = [ + dqn.DQN( # pylint: disable=g-complex-comprehension + player_id, + state_representation_size=state_size, + num_actions=num_actions, + hidden_layers_sizes=[16], + replay_buffer_capacity=10, + batch_size=5) for player_id in range(num_players) + ] + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + agent_output = [agent.step(time_step) for agent in agents] + time_step = env.step([agent_output[current_player].action]) + + for agent in agents: + agent.step(time_step) + + +if __name__ == "__main__": + random.seed(SEED) + torch.manual_seed(SEED) + np.random.seed(SEED) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/eva.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/eva.py new file mode 100644 index 0000000..414859a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/eva.py @@ -0,0 +1,393 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Implements an Ephemeral Value Adjustment Agent. + +See https://arxiv.org/abs/1810.08163. +The algorithm queries trajectories from a replay buffer based on similarities +to embedding representations and uses a parametric model to compute values for +counterfactual state-action pairs when integrating across those trajectories. +Finally, a weighted average between the parametric (DQN in this case) and the +non-parametric model is used to compute the policy. +""" + +# pylint: disable=protected-access + +import collections +import copy +import numpy as np +import torch + +from open_spiel.python import rl_agent +from open_spiel.python.pytorch import dqn + +MEM_KEY_NAME = "embedding" + +ValueBufferElement = collections.namedtuple("ValueElement", "embedding value") + +ReplayBufferElement = collections.namedtuple( + "ReplayElement", "embedding info_state action reward next_info_state " + "is_final_step legal_actions_mask") + + +# TODO(author3) Refactor into data structures lib. +class QueryableFixedSizeRingBuffer(dqn.ReplayBuffer): + """ReplayBuffer of fixed size with a FIFO replacement policy. + + Stored transitions can be sampled uniformly. This extends the DQN replay + buffer by allowing the contents to be fetched by L2 proximity to a query + value. + The underlying datastructure is a ring buffer, allowing 0(1) adding and + sampling. + """ + + def knn(self, key, key_name, k, trajectory_len=1): + """Computes top-k neighbours based on L2 distance. + + Args: + key: (np.array) key value to query memory. + key_name: (str) attribute name of key in memory elements. + k: (int) number of neighbours to fetch. + trajectory_len: (int) length of trajectory to fetch from replay buffer. + + Returns: + List of tuples (L2 negative distance, BufferElement) sorted in increasing + order by the negative L2 distqances from the key. + """ + distances = [(np.linalg.norm(getattr(sample, key_name) - key, 2, + axis=0), sample) for sample in self._data] + return sorted(distances, key=lambda v: -v[0])[:k] + + +class EVAAgent(object): + """Implements a solver for Ephemeral VAlue Adjustment. + + See https://arxiv.org/abs/1810.08163. + Define all networks and sampling buffers/memories. Derive losses & learning + steps. Initialize the game state and algorithmic variables. + """ + + def __init__(self, + game, + player_id, + state_size, + num_actions, + embedding_network_layers=(128,), + embedding_size=16, + dqn_hidden_layers=(128, 128), + batch_size=16, + trajectory_len=10, + num_neighbours=5, + learning_rate=1e-4, + mixing_parameter=0.9, + memory_capacity=int(1e6), + discount_factor=1.0, + update_target_network_every=1000, + epsilon_start=1.0, + epsilon_end=0.1, + epsilon_decay_duration=int(1e4), + embedding_as_parametric_input=False): + """Initialize the Ephemeral VAlue Adjustment algorithm. + + Args: + game: (rl_environment.Environment) Open Spiel game. + player_id: (int) Player id for this player. + state_size: (int) Size of info state vector. + num_actions: (int) number of actions. + embedding_network_layers: (list[int]) Layer sizes of strategy net MLP. + embedding_size: (int) Size of memory embeddings. + dqn_hidden_layers: (list(int)) MLP layer sizes of DQN network. + batch_size: (int) Size of batches for DQN learning steps. + trajectory_len: (int) Length of trajectories from replay buffer. + num_neighbours: (int) Number of neighbours to fetch from replay buffer. + learning_rate: (float) Learning rate. + mixing_parameter: (float) Value mixing parameter between 0 and 1. + memory_capacity: Number af samples that can be stored in memory. + discount_factor: (float) Discount factor for Q-Learning. + update_target_network_every: How often to update DQN target network. + epsilon_start: (float) Starting epsilon-greedy value. + epsilon_end: (float) Final epsilon-greedy value. + epsilon_decay_duration: (float) Number of steps over which epsilon decays. + embedding_as_parametric_input: (bool) Whether we use embeddings as input + to the parametric model. + """ + assert (mixing_parameter >= 0 and mixing_parameter <= 1) + self._game = game + self.player_id = player_id + self._env = game + self._num_actions = num_actions + self._info_state_size = state_size + self._embedding_size = embedding_size + self._lambda = mixing_parameter + self._trajectory_len = trajectory_len + self._num_neighbours = num_neighbours + self._discount = discount_factor + self._epsilon_start = epsilon_start + self._epsilon_end = epsilon_end + self._epsilon_decay_duration = epsilon_decay_duration + self._last_time_step = None + self._last_action = None + self._embedding_as_parametric_input = embedding_as_parametric_input + + self._embedding_network = dqn.MLP(self._info_state_size, + list(embedding_network_layers), + embedding_size) + + # The DQN agent requires this be an integer. + if not isinstance(memory_capacity, int): + raise ValueError("Memory capacity not an integer.") + + # Initialize the parametric & non-parametric Q-networks. + self._agent = dqn.DQN( + player_id, + state_representation_size=self._info_state_size, + num_actions=self._num_actions, + hidden_layers_sizes=list(dqn_hidden_layers), + replay_buffer_capacity=memory_capacity, + replay_buffer_class=QueryableFixedSizeRingBuffer, + batch_size=batch_size, + learning_rate=learning_rate, + update_target_network_every=update_target_network_every, + learn_every=batch_size, + discount_factor=1.0, + epsilon_start=1.0, + epsilon_end=0.1, + epsilon_decay_duration=int(1e6)) + # Initialize Value Buffers - Fetch Replay buffers from agents. + self._value_buffer = QueryableFixedSizeRingBuffer(memory_capacity) + self._replay_buffer = self._agent.replay_buffer + + # Initialize non-parametric & EVA Q-values. + self._v_np = collections.defaultdict(float) + self._q_np = collections.defaultdict(lambda: [0] * self._num_actions) + self._q_eva = collections.defaultdict(lambda: [0] * self._num_actions) + + @property + def env(self): + return self._env + + @property + def loss(self): + return self._agent.loss + + def _add_transition_value(self, infostate_embedding, value): + """Adds the embedding and value to the ValueBuffer. + + Args: + infostate_embedding: (np.array) embeddig vector. + value: (float) Value associated with state embeding. + """ + transition = ValueBufferElement(embedding=infostate_embedding, value=value) + self._value_buffer.add(transition) + + def _add_transition_replay(self, infostate_embedding, time_step): + """Adds the new transition using `time_step` to the replay buffer. + + Adds the transition from `self._prev_timestep` to `time_step` by + `self._prev_action`. + Args: + infostate_embedding: embeddig vector. + time_step: an instance of rl_environment.TimeStep. + """ + prev_timestep = self._last_time_step + assert prev_timestep is not None + legal_actions = ( + prev_timestep.observations["legal_actions"][self.player_id]) + legal_actions_mask = np.zeros(self._num_actions) + legal_actions_mask[legal_actions] = 1.0 + reward = time_step.rewards[self.player_id] if time_step.rewards else 0.0 + transition = ReplayBufferElement( + embedding=infostate_embedding, + info_state=(prev_timestep.observations["info_state"][self.player_id]), + action=self._last_action, + reward=reward, + next_info_state=time_step.observations["info_state"][self.player_id], + is_final_step=float(time_step.last()), + legal_actions_mask=legal_actions_mask) + self._replay_buffer.add(transition) + + def step(self, time_step, is_evaluation=False): + """Returns the action to be taken and updates the value functions. + + Args: + time_step: an instance of rl_environment.TimeStep. + is_evaluation: bool, whether this is a training or evaluation call. + + Returns: + A `rl_agent.StepOutput` containing the action probs and chosen action. + """ + # Act step: don't act at terminal info states. + if not time_step.last(): + info_state = time_step.observations["info_state"][self.player_id] + legal_actions = time_step.observations["legal_actions"][self.player_id] + epsilon = self._get_epsilon(self._agent.step_counter, is_evaluation) + + # Sample an action from EVA via epsilon greedy policy. + action, probs = self._epsilon_greedy(self._q_eva[tuple(info_state)], + legal_actions, epsilon) + + # Update Step: Only with transitions and not when evaluating. + if (not is_evaluation and self._last_time_step is not None): + info_state = self._last_time_step.observations["info_state"][ + self.player_id] + legal_actions = self._last_time_step.observations["legal_actions"][ + self.player_id] + epsilon = self._get_epsilon(self._agent.step_counter, is_evaluation) + + # Get embedding. + self._info_state = torch.Tensor(np.expand_dims(info_state, axis=0)) + infostate_embedding = self._embedding_network( + self._info_state).detach()[0] + + neighbours_value = self._value_buffer.knn(infostate_embedding, + MEM_KEY_NAME, + self._num_neighbours, 1) + # collect trace values of knn from L (value buffer) .. Q_np(s_k) + neighbours_replay = self._replay_buffer.knn(infostate_embedding, + MEM_KEY_NAME, + self._num_neighbours, + self._trajectory_len) + + # Take a step with the parametric model and get q-values. Use embedding as + # input to the parametric meodel. + # TODO(author6) Recompute embeddings for buffers on learning steps. + if self._embedding_as_parametric_input: + last_time_step_copy = copy.deepcopy(self._last_time_step) + last_time_step_copy.observations["info_state"][ + self.player_id] = infostate_embedding + self._agent.step(last_time_step_copy, add_transition_record=False) + else: + self._agent.step(self._last_time_step, add_transition_record=False) + q_values = self._agent._q_network(self._info_state).detach()[0] + # Update EVA: Q_eva = lambda q_theta(s_t) + (1-lambda) sum(Q_np(s_k, .))/K + for a in legal_actions: + q_theta = q_values[a] + self._q_eva[tuple(info_state)][a] = ( + self._lambda * q_theta + (1 - self._lambda) * + sum([elem[1].value + for elem in neighbours_value]) / self._num_neighbours) + + # Append (e,s,a,r,s') to Replay Buffer + self._add_transition_replay(infostate_embedding, time_step) + + # update Q_np with Traces using TCP + self._trajectory_centric_planning(neighbours_replay) + + # Append Q_np(s, a) to Value Buffer + self._add_transition_value( + infostate_embedding, self._q_np[tuple(info_state)][self._last_action]) + + # Prepare for the next episode. + if time_step.last(): + self._last_time_step = None + self._last_action = None + return + + self._last_time_step = time_step + self._last_action = action + return rl_agent.StepOutput(action=action, probs=probs) + + def _trajectory_centric_planning(self, trajectories): + """Performs trajectory centric planning. + + Uses trajectories from the replay buffer to update the non-parametric values + while supplying counter-factual values with the parametric model. + Args: + trajectories: Current OpenSpiel game state. + """ + # Calculate non-parametric values over the trajectories. + # Iterate backward through trajectories + for t in range(len(trajectories) - 1, 0, -1): + elem = trajectories[t][1] + s_tp1 = tuple(elem.next_info_state) + s_t = tuple(elem.info_state) + a_t = elem.action + r_t = elem.reward + legal_actions = elem.legal_actions_mask + if t < len(trajectories) - 1: + for action in range(len(legal_actions)): + if not legal_actions[action]: + continue + if action == elem.action: + self._q_np[s_t][a_t] = (r_t + self._discount * self._v_np[s_tp1]) + else: + self._agent.info_state = torch.Tensor( + np.expand_dims(elem.info_state, axis=0)) + q_values_parametric = self._agent._q_network( + self._agent.info_state).detach().numpy() + self._q_np[s_t][a_t] = q_values_parametric[0][action] + + # Set V(s_t) + if t == len(trajectories) - 1: + # Sample from the parametric model. + self._agent.info_state = torch.Tensor( + np.expand_dims(elem.info_state, axis=0)) + q_values_parametric = self._agent._q_network( + self._agent.info_state).detach().numpy() + self._v_np[s_t] = np.max(q_values_parametric) + else: + self._v_np[s_t] = max(self._q_np[s_t]) + + def _epsilon_greedy(self, q_values, legal_actions, epsilon): + """Returns a valid epsilon-greedy action and valid action probs. + + Action probabilities are given by a softmax over legal q-values. + Args: + q_values: list of Q-values by action. + legal_actions: list of legal actions at `info_state`. + epsilon: float, probability of taking an exploratory action. + + Returns: + A valid epsilon-greedy action and valid action probabilities. + """ + probs = np.zeros(self._num_actions) + q_values = np.array(q_values) + if np.random.rand() < epsilon: + action = np.random.choice(legal_actions) + probs[legal_actions] = 1.0 / len(legal_actions) + else: + legal_q_values = q_values[legal_actions] + action = legal_actions[np.argmax(legal_q_values)] + # Reduce max_q for numerical stability. Result is the same. + max_q = np.max(legal_q_values) + e_x = np.exp(legal_q_values - max_q) + probs[legal_actions] = e_x / e_x.sum(axis=0) + return action, probs + + def _get_epsilon(self, step_counter, is_evaluation): + """Returns the evaluation or decayed epsilon value.""" + if is_evaluation: + return 0.0 + decay_steps = min(step_counter, self._epsilon_decay_duration) + decayed_epsilon = ( + self._epsilon_end + (self._epsilon_start - self._epsilon_end) * + (1 - decay_steps / self._epsilon_decay_duration)) + return decayed_epsilon + + def action_probabilities(self, state): + """Returns action probabilites dict for a single batch.""" + # TODO(author3, author6): Refactor this to expect pre-normalized form. + if hasattr(state, "information_state_tensor"): + state_rep = tuple(state.information_state_tensor(self.player_id)) + elif hasattr(state, "observation_tensor"): + state_rep = tuple(state.observation_tensor(self.player_id)) + else: + raise AttributeError("Unable to extract normalized state vector.") + legal_actions = state.legal_actions(self.player_id) + if legal_actions: + _, probs = self._epsilon_greedy( + self._q_eva[state_rep], legal_actions, epsilon=0.0) + return {a: probs[a] for a in range(self._num_actions)} + else: + raise ValueError("Node has no legal actions to take.") diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/eva_pytorch_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/eva_pytorch_test.py new file mode 100644 index 0000000..54d6def --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/eva_pytorch_test.py @@ -0,0 +1,105 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.algorithms.eva.""" + +from absl.testing import absltest +from absl.testing import parameterized +import torch + +from open_spiel.python import rl_environment +from open_spiel.python.pytorch import eva + + +SEED = 24984617 + + +class EVATest(parameterized.TestCase): + + @parameterized.parameters("tic_tac_toe", "kuhn_poker", "liars_dice") + def test_run_games(self, game): + env = rl_environment.Environment(game) + num_players = env.num_players + eva_agents = [] + num_actions = env.action_spec()["num_actions"] + state_size = env.observation_spec()["info_state"][0] + for player in range(num_players): + eva_agents.append( + eva.EVAAgent( + env, + player, + state_size, + num_actions, + embedding_network_layers=(64, 32), + embedding_size=12, + learning_rate=1e-4, + mixing_parameter=0.5, + memory_capacity=int(1e6), + discount_factor=1.0, + epsilon_start=1.0, + epsilon_end=0.1, + epsilon_decay_duration=int(1e6))) + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + current_agent = eva_agents[current_player] + # 1. Step the agent. + # 2. Step the Environment. + agent_output = current_agent.step(time_step) + time_step = env.step([agent_output.action]) + for agent in eva_agents: + agent.step(time_step) + + +class QueryableFixedSizeRingBufferTest(absltest.TestCase): + + def test_replay_buffer_add(self): + replay_buffer = eva.QueryableFixedSizeRingBuffer(replay_buffer_capacity=10) + self.assertEmpty(replay_buffer) + replay_buffer.add("entry1") + self.assertLen(replay_buffer, 1) + replay_buffer.add("entry2") + self.assertLen(replay_buffer, 2) + + self.assertIn("entry1", replay_buffer) + self.assertIn("entry2", replay_buffer) + + def test_replay_buffer_max_capacity(self): + replay_buffer = eva.QueryableFixedSizeRingBuffer(replay_buffer_capacity=2) + replay_buffer.add("entry1") + replay_buffer.add("entry2") + replay_buffer.add("entry3") + self.assertLen(replay_buffer, 2) + + self.assertIn("entry2", replay_buffer) + self.assertIn("entry3", replay_buffer) + + def test_replay_buffer_sample(self): + replay_buffer = eva.QueryableFixedSizeRingBuffer(replay_buffer_capacity=3) + replay_buffer.add("entry1") + replay_buffer.add("entry2") + replay_buffer.add("entry3") + + samples = replay_buffer.sample(3) + + self.assertIn("entry1", samples) + self.assertIn("entry2", samples) + self.assertIn("entry3", samples) + + # TODO(author6) Test knn query. + + +if __name__ == "__main__": + torch.manual_seed(SEED) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/losses/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/losses/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/losses/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/losses/rl_losses.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/losses/rl_losses.py new file mode 100644 index 0000000..b3cad9f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/losses/rl_losses.py @@ -0,0 +1,271 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Reinforcement learning loss functions. + +All the loss functions implemented here compute the loss for the policy (actor). +The critic loss functions are typically regression loss are omitted for their +simplicity. + +For the batch QPG, RM and RPG loss, please refer to the paper: +https://papers.nips.cc/paper/7602-actor-critic-policy-optimization-in-partially-observable-multiagent-environments.pdf + +The BatchA2C loss uses code from the `TRFL` library: +https://github.com/deepmind/trfl/blob/master/trfl/discrete_policy_gradient_ops.py +""" + +import torch +import torch.nn.functional as F + + +def _assert_rank_and_shape_compatibility(tensors, rank): + if not tensors: + raise ValueError("List of tensors cannot be empty") + + tmp_shape = tensors[0].shape + for tensor in tensors: + if tensor.ndim != rank: + raise ValueError("Shape %s must have rank %d" % (tensor.ndim, rank)) + if tensor.shape != tmp_shape: + raise ValueError("Shapes %s and %s are not compatible" % + (tensor.shape, tmp_shape)) + + +def thresholded(logits, regrets, threshold=2.0): + """Zeros out `regrets` where `logits` are too negative or too large.""" + can_decrease = logits.gt(-threshold).float() + can_increase = logits.lt(threshold).float() + regrets_negative = regrets.clamp(max=0.0) + regrets_positive = regrets.clamp(min=0.0) + return can_decrease * regrets_negative + can_increase * regrets_positive + + +def compute_baseline(policy, action_values): + # V = pi * Q, backprop through pi but not Q. + return torch.sum(torch.mul(policy, action_values.detach()), dim=1) + + +def compute_regrets(policy_logits, action_values): + """Compute regrets using pi and Q.""" + # Compute regret. + policy = F.softmax(policy_logits, dim=1) + # Avoid computing gradients for action_values. + action_values = action_values.detach() + + baseline = compute_baseline(policy, action_values) + + regrets = torch.sum( + F.relu(action_values - torch.unsqueeze(baseline, 1)), dim=1) + + return regrets + + +def compute_advantages(policy_logits, + action_values, + use_relu=False, + threshold_fn=None): + """Compute advantages using pi and Q.""" + # Compute advantage. + policy = F.softmax(policy_logits, dim=1) + # Avoid computing gradients for action_values. + action_values = action_values.detach() + + baseline = compute_baseline(policy, action_values) + + advantages = action_values - torch.unsqueeze(baseline, 1) + if use_relu: + advantages = F.relu(advantages) + + if threshold_fn: + # Compute thresholded advanteges weighted by policy logits for NeuRD. + policy_logits = policy_logits - policy_logits.mean(-1, keepdim=True) + advantages = threshold_fn(policy_logits, advantages) + policy_advantages = -torch.mul(policy_logits, advantages.detach()) + else: + # Compute advantage weighted by policy. + policy_advantages = -torch.mul(policy, advantages.detach()) + return torch.sum(policy_advantages, dim=1) + + +def compute_a2c_loss(policy_logits, actions, advantages): + cross_entropy = F.cross_entropy(policy_logits, actions, reduction="none") + advantages = advantages.detach() + if advantages.ndim != cross_entropy.ndim: + raise ValueError("Shapes %s and %s are not compatible" % + (advantages.ndim, cross_entropy.ndim)) + return torch.mul(cross_entropy, advantages) + + +def compute_entropy(policy_logits): + return torch.sum( + -F.softmax(policy_logits, dim=1) * F.log_softmax(policy_logits, dim=1), + dim=-1) + + +class BatchQPGLoss(object): + """Defines the batch QPG loss op.""" + + def __init__(self, entropy_cost=None, name="batch_qpg_loss"): + self._entropy_cost = entropy_cost + self._name = name + + def loss(self, policy_logits, action_values): + """Constructs a PyTorch Crierion that computes the QPG loss for batches. + + Args: + policy_logits: `B x A` tensor corresponding to policy logits. + action_values: `B x A` tensor corresponding to Q-values. + + Returns: + loss: A 0-D `float` tensor corresponding the loss. + """ + _assert_rank_and_shape_compatibility([policy_logits, action_values], 2) + advantages = compute_advantages(policy_logits, action_values) + _assert_rank_and_shape_compatibility([advantages], 1) + total_adv = torch.mean(advantages, dim=0) + + total_loss = total_adv + if self._entropy_cost: + policy_entropy = torch.mean(compute_entropy(policy_logits)) + entropy_loss = torch.mul(float(self._entropy_cost), policy_entropy) + total_loss = torch.add(total_loss, entropy_loss) + + return total_loss + + +class BatchNeuRDLoss(object): + """Defines the batch NeuRD loss op.""" + + def __init__(self, entropy_cost=None, name="batch_neurd_loss"): + self._entropy_cost = entropy_cost + self._name = name + + def loss(self, policy_logits, action_values): + """Constructs a PyTorch Crierion that computes the NeuRD loss for batches. + + Args: + policy_logits: `B x A` tensor corresponding to policy logits. + action_values: `B x A` tensor corresponding to Q-values. + + Returns: + loss: A 0-D `float` tensor corresponding the loss. + """ + _assert_rank_and_shape_compatibility([policy_logits, action_values], 2) + advantages = compute_advantages( + policy_logits, action_values, threshold_fn=thresholded) + _assert_rank_and_shape_compatibility([advantages], 1) + total_adv = torch.mean(advantages, axis=0) + + total_loss = total_adv + if self._entropy_cost: + policy_entropy = torch.mean(compute_entropy(policy_logits)) + entropy_loss = torch.mul(float(self._entropy_cost), policy_entropy) + total_loss = torch.add(total_loss, entropy_loss) + + return total_loss + + +class BatchRMLoss(object): + """Defines the batch RM loss op.""" + + def __init__(self, entropy_cost=None, name="batch_rm_loss"): + self._entropy_cost = entropy_cost + self._name = name + + def loss(self, policy_logits, action_values): + """Constructs a PyTorch Crierion that computes the RM loss for batches. + + Args: + policy_logits: `B x A` tensor corresponding to policy logits. + action_values: `B x A` tensor corresponding to Q-values. + + Returns: + loss: A 0-D `float` tensor corresponding the loss. + """ + _assert_rank_and_shape_compatibility([policy_logits, action_values], 2) + advantages = compute_advantages(policy_logits, action_values, use_relu=True) + _assert_rank_and_shape_compatibility([advantages], 1) + total_adv = torch.mean(advantages, dim=0) + + total_loss = total_adv + if self._entropy_cost: + policy_entropy = torch.mean(compute_entropy(policy_logits)) + entropy_loss = torch.mul(float(self._entropy_cost), policy_entropy) + total_loss = torch.add(total_loss, entropy_loss) + + return total_loss + + +class BatchRPGLoss(object): + """Defines the batch RPG loss op.""" + + def __init__(self, entropy_cost=None, name="batch_rpg_loss"): + self._entropy_cost = entropy_cost + self._name = name + + def loss(self, policy_logits, action_values): + """Constructs a PyTorch Crierion that computes the RPG loss for batches. + + Args: + policy_logits: `B x A` tensor corresponding to policy logits. + action_values: `B x A` tensor corresponding to Q-values. + + Returns: + loss: A 0-D `float` tensor corresponding the loss. + """ + _assert_rank_and_shape_compatibility([policy_logits, action_values], 2) + regrets = compute_regrets(policy_logits, action_values) + _assert_rank_and_shape_compatibility([regrets], 1) + total_regret = torch.mean(regrets, dim=0) + + total_loss = total_regret + if self._entropy_cost: + policy_entropy = torch.mean(compute_entropy(policy_logits)) + entropy_loss = torch.mul(float(self._entropy_cost), policy_entropy) + total_loss = torch.add(total_loss, entropy_loss) + + return total_loss + + +class BatchA2CLoss(object): + """Defines the batch A2C loss op.""" + + def __init__(self, entropy_cost=None, name="batch_a2c_loss"): + self._entropy_cost = entropy_cost + self._name = name + + def loss(self, policy_logits, baseline, actions, returns): + """Constructs a PyTorch Crierion that computes the A2C loss for batches. + + Args: + policy_logits: `B x A` tensor corresponding to policy logits. + baseline: `B` tensor corresponding to baseline (V-values). + actions: `B` tensor corresponding to actions taken. + returns: `B` tensor corresponds to returns accumulated. + + Returns: + loss: A 0-D `float` tensor corresponding the loss. + """ + _assert_rank_and_shape_compatibility([policy_logits], 2) + _assert_rank_and_shape_compatibility([baseline, actions, returns], 1) + advantages = returns - baseline + + policy_loss = compute_a2c_loss(policy_logits, actions, advantages) + total_loss = torch.mean(policy_loss, dim=0) + if self._entropy_cost: + policy_entropy = torch.mean(compute_entropy(policy_logits)) + entropy_loss = torch.mul(float(self._entropy_cost), policy_entropy) + total_loss = torch.add(total_loss, entropy_loss) + + return total_loss diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/losses/rl_losses_pytorch_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/losses/rl_losses_pytorch_test.py new file mode 100644 index 0000000..187d1ff --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/losses/rl_losses_pytorch_test.py @@ -0,0 +1,102 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.pytorch.losses.rl_losses.""" + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np +import torch + +from open_spiel.python.pytorch.losses import rl_losses + +SEED = 24984617 + + +class RLLossesTest(parameterized.TestCase, absltest.TestCase): + + @parameterized.named_parameters(('no_entropy_cost', 0.), + ('with_entropy_cost', 1.)) + def test_batch_qpg_loss_with_entropy_cost(self, entropy_cost): + batch_qpg_loss = rl_losses.BatchQPGLoss(entropy_cost=entropy_cost) + q_values = torch.FloatTensor([[0., -1., 1.], [1., -1., 0]]) + policy_logits = torch.FloatTensor([[1., 1., 1.], [1., 1., 4.]]) + total_loss = batch_qpg_loss.loss(policy_logits, q_values) + # Compute expected quantities. + expected_policy_entropy = (1.0986 + 0.3665) / 2 + # baseline = \sum_a pi_a * Q_a = 0. + # -\sum_a pi_a * (Q_a - baseline) + expected_policy_loss = (0.0 + 0.0) / 2 + expected_total_loss = ( + expected_policy_loss + entropy_cost * expected_policy_entropy) + np.testing.assert_allclose(total_loss, expected_total_loss, atol=1e-4) + + @parameterized.named_parameters(('no_entropy_cost', 0.), + ('with_entropy_cost', 1.)) + def test_batch_rm_loss_with_entropy_cost(self, entropy_cost): + batch_rpg_loss = rl_losses.BatchRMLoss(entropy_cost=entropy_cost) + q_values = torch.FloatTensor([[0., -1., 1.], [1., -1., 0]]) + policy_logits = torch.FloatTensor([[1., 1., 1.], [1., 1., 4.]]) + total_loss = batch_rpg_loss.loss(policy_logits, q_values) + # Compute expected quantities. + expected_policy_entropy = (1.0986 + 0.3665) / 2 + # baseline = \sum_a pi_a * Q_a = 0. + # -\sum_a pi_a * relu(Q_a - baseline) + # negative sign as it's a loss term and loss needs to be minimized. + expected_policy_loss = -(.3333 + .0452) / 2 + expected_total_loss = ( + expected_policy_loss + entropy_cost * expected_policy_entropy) + np.testing.assert_allclose(total_loss, expected_total_loss, atol=1e-4) + + @parameterized.named_parameters(('no_entropy_cost', 0.), + ('with_entropy_cost', 1.)) + def test_batch_rpg_loss_with_entropy_cost(self, entropy_cost): + batch_rpg_loss = rl_losses.BatchRPGLoss(entropy_cost=entropy_cost) + q_values = torch.FloatTensor([[0., -1., 1.], [1., -1., 0]]) + policy_logits = torch.FloatTensor([[1., 1., 1.], [1., 1., 4.]]) + total_loss = batch_rpg_loss.loss(policy_logits, q_values) + # Compute expected quantities. + expected_policy_entropy = (1.0986 + 0.3665) / 2 + # baseline = \sum_a pi_a * Q_a = 0. + # \sum_a relu(Q_a - baseline) + expected_policy_loss = (1.0 + 1.0) / 2 + expected_total_loss = ( + expected_policy_loss + entropy_cost * expected_policy_entropy) + + np.testing.assert_allclose(total_loss, expected_total_loss, atol=1e-4) + + @parameterized.named_parameters(('no_entropy_cost', 0.), + ('with_entropy_cost', 1.)) + def test_batch_a2c_loss_with_entropy_cost(self, entropy_cost): + batch_a2c_loss = rl_losses.BatchA2CLoss(entropy_cost=entropy_cost) + policy_logits = torch.FloatTensor([[1., 1., 1.], [1., 1., 4.]]) + baseline = torch.FloatTensor([1. / 3, 0.5]) + actions = torch.LongTensor([1, 2]) + returns = torch.FloatTensor([0., 1.]) + total_loss = batch_a2c_loss.loss(policy_logits, baseline, actions, returns) + # Compute expected quantities. + # advantages = returns - baseline = [-1./3, 0.5] + # cross_entropy = [-log(e^1./3 * e^1), -log(e^4/(e^4+ e + e))] + # = [1.0986, 0.09492] + # policy_loss = cross_entropy * advantages = [-0.3662, 0.04746] + expected_policy_entropy = (1.0986 + 0.3665) / 2 + expected_policy_loss = (-0.3662 + 0.04746) / 2 + expected_total_loss = ( + expected_policy_loss + entropy_cost * expected_policy_entropy) + np.testing.assert_allclose(total_loss, expected_total_loss, atol=1e-4) + + +if __name__ == '__main__': + torch.manual_seed(SEED) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/neurd.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/neurd.py new file mode 100644 index 0000000..855ca7e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/neurd.py @@ -0,0 +1,251 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Neural Replicator Dynamics [Omidshafiei et al, 2019]. + +A policy gradient-like extension to replicator dynamics and the hedge algorithm +that incorporates function approximation. + +# References + +Shayegan Omidshafiei, Daniel Hennes, Dustin Morrill, Remi Munos, + Julien Perolat, Marc Lanctot, Audrunas Gruslys, Jean-Baptiste Lespiau, + Karl Tuyls. Neural Replicator Dynamics. https://arxiv.org/abs/1906.00190. + 2019. +""" + +import numpy as np +import torch +from torch import nn + +from open_spiel.python.pytorch import rcfr + + +def thresholded(logits, regrets, threshold=2.0): + """Zeros out `regrets` where `logits` are too negative or too large.""" + can_decrease = torch.gt(logits, -threshold).float() + can_increase = torch.lt(logits, threshold).float() + regrets_negative = torch.minimum(regrets, torch.Tensor([0.0])) + regrets_positive = torch.maximum(regrets, torch.Tensor([0.0])) + return can_decrease * regrets_negative + can_increase * regrets_positive + + +def train(model, + data, + batch_size, + step_size=1.0, + threshold=2.0, + autoencoder_loss=None): + """Train NeuRD `model` on `data`.""" + data = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True) + + for x, regrets in data: + output = model(x, training=True) + logits = output[:, :1] + logits = logits - torch.mean(logits) + + regrets = thresholded(logits, regrets, threshold=threshold).detach() + utility = torch.mean(logits * regrets) + + if autoencoder_loss is not None: + utility = utility - autoencoder_loss(x, output[:, 1:]) + model.zero_grad() + utility.backward() + with torch.no_grad(): + for var in model.layers.parameters(): + new_var = var + step_size * var.grad + var.copy_(new_var) + + +class DeepNeurdModel(nn.Module): + """A flexible deep feedforward NeuRD model class. + + Properties: + layers: The `torch.nn.Linear` layers describing this model. + """ + + def __init__(self, + game, + num_hidden_units, + num_hidden_layers=1, + num_hidden_factors=0, + hidden_activation=nn.ReLU, + use_skip_connections=False, + autoencode=False): + """Creates a new `DeepNeurdModel. + + Args: + game: The OpenSpiel game being solved. + num_hidden_units: The number of units in each hidden layer. + num_hidden_layers: The number of hidden layers. Defaults to 1. + num_hidden_factors: The number of hidden factors or the matrix rank of the + layer. If greater than zero, hidden layers will be split into two + separate linear transformations, the first with + `num_hidden_factors`-columns and the second with + `num_hidden_units`-columns. The result is that the logical hidden layer + is a rank-`num_hidden_units` matrix instead of a rank-`num_hidden_units` + matrix. When `num_hidden_units < num_hidden_units`, this is effectively + implements weight sharing. Defaults to 0. + hidden_activation: The activation function to apply over hidden layers. + Defaults to `torch.nn.Relu`. + use_skip_connections: Whether or not to apply skip connections (layer + output = layer(x) + x) on hidden layers. Zero padding or truncation is + used to match the number of columns on layer inputs and outputs. + autoencode: Whether or not to output a reconstruction of the inputs upon + being called. Defaults to `False`. + """ + super(DeepNeurdModel, self).__init__() + self._autoencode = autoencode + self._use_skip_connections = use_skip_connections + self._hidden_are_factored = num_hidden_factors > 0 + + self.layers = nn.ModuleList() + self.input_size = rcfr.num_features(game) + for _ in range(num_hidden_layers): + if self._hidden_are_factored: + self.layers.append( + nn.Linear(self.input_size, num_hidden_factors, bias=True)) + self.input_size = num_hidden_factors + + self.layers.append( + nn.Linear(self.input_size, num_hidden_units, bias=True)) + if hidden_activation: + self.layers.append(hidden_activation()) + self.input_size = num_hidden_units + + self.layers.append( + nn.Linear( + self.input_size, + 1 + self._autoencode * rcfr.num_features(game), + bias=True)) + + def forward(self, x, training=False): + """Evaluates this model on x. + + Args: + x: Model input. + training: Whether or not this is being called during training. If + `training` and the constructor argument `autoencode` was `True`, then + the output will contain the estimated regrets concatenated with a + reconstruction of the input, otherwise only regrets will be returned. + Defaults to `False`. + + Returns: + The `torch.Tensor` resulting from evaluating this model on `x`. If + `training` and the constructor argument `autoencode` was `True`, then + it will contain the estimated regrets concatenated with a + reconstruction of the input, otherwise only regrets will be returned. + """ + y = rcfr.feedforward_evaluate( + layers=self.layers, + x=x, + use_skip_connections=self._use_skip_connections, + hidden_are_factored=self._hidden_are_factored) + return y if training else y[:, :1] + + +class CounterfactualNeurdSolver(object): + """All-actions, strong NeuRD on counterfactual regrets. + + No regularization bonus is applied, so the current policy likely will not + converge. The average policy profile is updated and stored in a full + game-size table and may converge to an approximate Nash equilibrium in + two-player, zero-sum games. + """ + + def __init__(self, game, models): + """Creates a new `CounterfactualNeurdSolver`. + + Args: + game: An OpenSpiel `Game`. + models: Current policy models (optimizable array-like -> `torch.Tensor` + callables) for both players. + """ + self._game = game + self._models = models + self._root_wrapper = rcfr.RootStateWrapper(game.new_initial_state()) + + self._cumulative_seq_probs = [ + np.zeros(n) for n in self._root_wrapper.num_player_sequences + ] + + def _sequence_weights(self, player=None): + """Returns exponentiated weights for each sequence as an `np.array`.""" + if player is None: + return [ + self._sequence_weights(player) + for player in range(self._game.num_players()) + ] + else: + tensor = torch.squeeze(self._models[player]( + self._root_wrapper.sequence_features[player])) + tensor = tensor - torch.max(tensor, dim=0)[0] + tensor = torch.exp(tensor) + return tensor.detach().numpy() + + def current_policy(self): + """Returns the current policy profile. + + Returns: + A `dict>` that maps info state + strings to `Action`-probability pairs describing each player's policy. + """ + return self._root_wrapper.sequence_weights_to_tabular_profile( + self._sequence_weights()) + + def average_policy(self): + """Returns the average of all policies iterated. + + The policy is computed using the accumulated policy probabilities computed + using `evaluate_and_update_policy`. + + Returns: + A `dict>` that maps info state + strings to (Action, probability) pairs describing each player's policy. + """ + return self._root_wrapper.sequence_weights_to_tabular_profile( + self._cumulative_seq_probs) + + def _previous_player(self, player): + """The previous player in the turn ordering.""" + return player - 1 if player > 0 else self._game.num_players() - 1 + + def _average_policy_update_player(self, regret_player): + """The player for whom the average policy should be updated.""" + return self._previous_player(regret_player) + + def evaluate_and_update_policy(self, train_fn): + """Performs a single step of policy evaluation and policy improvement. + + Args: + train_fn: A (model, `torch.utils.data.TensorDataset`) function that trains + the given regression model to accurately reproduce the x to y mapping + given x-y data. + """ + sequence_weights = self._sequence_weights() + player_seq_features = self._root_wrapper.sequence_features + for regret_player in range(self._game.num_players()): + seq_prob_player = self._average_policy_update_player(regret_player) + + regrets, seq_probs = ( + self._root_wrapper.counterfactual_regrets_and_reach_weights( + regret_player, seq_prob_player, *sequence_weights)) + + self._cumulative_seq_probs[seq_prob_player] += seq_probs + targets = torch.unsqueeze(torch.Tensor(regrets), axis=1) + data = torch.utils.data.TensorDataset(player_seq_features[regret_player], + targets) + + regret_player_model = self._models[regret_player] + train_fn(regret_player_model, data) + sequence_weights[regret_player] = self._sequence_weights(regret_player) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/neurd_pytorch_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/neurd_pytorch_test.py new file mode 100644 index 0000000..23a1433 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/neurd_pytorch_test.py @@ -0,0 +1,66 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import absltest +import torch +import torch.nn.functional as F + +import pyspiel +from open_spiel.python.pytorch import neurd + +_GAME = pyspiel.load_game('kuhn_poker') + + +def _new_model(): + return neurd.DeepNeurdModel( + _GAME, + num_hidden_layers=1, + num_hidden_units=13, + num_hidden_factors=1, + use_skip_connections=True, + autoencode=True) + + +class NeurdTest(absltest.TestCase): + + def setUp(self): + super(NeurdTest, self).setUp() + torch.manual_seed(42) + + def test_neurd(self): + num_iterations = 2 + models = [_new_model() for _ in range(_GAME.num_players())] + + solver = neurd.CounterfactualNeurdSolver(_GAME, models) + + average_policy = solver.average_policy() + self.assertGreater(pyspiel.nash_conv(_GAME, average_policy), 0.91) + + def _train(model, data): + neurd.train( + model=model, + data=data, + batch_size=12, + step_size=10.0, + autoencoder_loss=F.huber_loss) + + for _ in range(num_iterations): + solver.evaluate_and_update_policy(_train) + + average_policy = solver.average_policy() + self.assertLess(pyspiel.nash_conv(_GAME, average_policy), 0.91) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/policy_gradient.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/policy_gradient.py new file mode 100644 index 0000000..22cc579 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/policy_gradient.py @@ -0,0 +1,515 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as python3. +r"""Policy Gradient based agents implemented in PyTorch. + +This class is composed of three policy gradient (PG) algorithms: + +- Q-based Policy Gradient (QPG): an "all-actions" advantage actor-critic +algorithm differing from A2C in that all action values are used to estimate the +policy gradient (as opposed to only using the action taken into account): + + baseline = \sum_a pi_a * Q_a + loss = - \sum_a pi_a * (Q_a - baseline) + +where (Q_a - baseline) is the usual advantage. QPG is also known as Mean +Actor-Critic (https://arxiv.org/abs/1709.00503). + + +- Regret policy gradient (RPG): a PG algorithm inspired by counterfactual regret +minimization (CFR). Unlike standard actor-critic methods (e.g. A2C), the loss is +defined purely in terms of thresholded regrets as follows: + + baseline = \sum_a pi_a * Q_a + loss = regret = \sum_a relu(Q_a - baseline) + +where gradients only flow through the action value (Q_a) part and are blocked on +the baseline part (which is trained separately by usual MSE loss). +The lack of negative sign in the front of the loss represents a switch from +gradient ascent on the score to descent on the loss. + + +- Regret Matching Policy Gradient (RMPG): inspired by regret-matching, the +policy gradient is by weighted by the thresholded regret: + + baseline = \sum_a pi_a * Q_a + loss = - \sum_a pi_a * relu(Q_a - baseline) + + +These algorithms were published in NeurIPS 2018. Paper title: "Actor-Critic +Policy Optimization in Partially Observable Multiagent Environment", the paper +is available at: https://arxiv.org/abs/1810.09026. + +- Advantage Actor Critic (A2C): The popular advantage actor critic (A2C) +algorithm. The algorithm uses the baseline (Value function) as a control variate +to reduce variance of the policy gradient. The loss is only computed for the +actions actually taken in the episode as opposed to a loss computed for all +actions in the variants above. + + advantages = returns - baseline + loss = -log(pi_a) * advantages + +The algorithm can be found in the textbook: +https://incompleteideas.net/book/RLbook2018.pdf under the chapter on +`Policy Gradients`. + +See open_spiel/python/pytorch/losses/rl_losses_test.py for an example of the +loss computation. +""" + +import collections +import os +from absl import logging +import numpy as np +import torch +from torch import nn +from torch import optim +import torch.nn.functional as F + +from open_spiel.python import rl_agent +from open_spiel.python.pytorch.dqn import SonnetLinear +from open_spiel.python.pytorch.losses import rl_losses + +Transition = collections.namedtuple( + "Transition", "info_state action reward discount legal_actions_mask") + + +class MLPTorso(nn.Module): + """A specialized half-MLP module when constructing multiple heads. + + Note that every layer includes a ReLU non-linearity activation. + """ + + def __init__(self, input_size, hidden_sizes): + """Create the MLPTorso. + + Args: + input_size: (int) number of inputs + hidden_sizes: (list) sizes (number of units) of each hidden layer + """ + + super(MLPTorso, self).__init__() + self._layers = [] + # Hidden layers + for size in hidden_sizes: + self._layers.append(SonnetLinear(in_size=input_size, out_size=size)) + input_size = size + + self.model = nn.ModuleList(self._layers) + + def forward(self, x): + for layer in self.model: + x = layer(x) + return x + + +class PolicyGradient(rl_agent.AbstractAgent): + """RPG Agent implementation in PyTorch. + + See open_spiel/python/examples/single_agent_catch.py for an usage example. + """ + + def __init__(self, + player_id, + info_state_size, + num_actions, + loss_str="a2c", + loss_class=None, + hidden_layers_sizes=(128,), + batch_size=16, + critic_learning_rate=0.01, + pi_learning_rate=0.001, + entropy_cost=0.01, + num_critic_before_pi=8, + additional_discount_factor=1.0, + max_global_gradient_norm=None, + optimizer_str="sgd"): + """Initialize the PolicyGradient agent. + + Args: + player_id: int, player identifier. Usually its position in the game. + info_state_size: int, info_state vector size. + num_actions: int, number of actions per info state. + loss_str: string or None. If string, must be one of ["rpg", "qpg", "rm", + "a2c", "neurd"] and defined in `_get_loss_class`. If None, a loss class + must be passed through `loss_class`. Defaults to "a2c". + loss_class: Class or None. If Class, it must define the policy gradient + loss. If None a loss class in a string format must be passed through + `loss_str`. Defaults to None. + hidden_layers_sizes: iterable, defines the neural network layers. Defaults + to (128,), which produces a NN: [INPUT] -> [128] -> ReLU -> [OUTPUT]. + batch_size: int, batch size to use for Q and Pi learning. Defaults to 128. + critic_learning_rate: float, learning rate used for Critic (Q or V). + Defaults to 0.001. + pi_learning_rate: float, learning rate used for Pi. Defaults to 0.001. + entropy_cost: float, entropy cost used to multiply the entropy loss. Can + be set to None to skip entropy computation. Defaults to 0.001. + num_critic_before_pi: int, number of Critic (Q or V) updates before each + Pi update. Defaults to 8 (every 8th critic learning step, Pi also + learns). + additional_discount_factor: float, additional discount to compute returns. + Defaults to 1.0, in which case, no extra discount is applied. None that + users must provide *only one of* `loss_str` or `loss_class`. + max_global_gradient_norm: float or None, maximum global norm of a gradient + to which the gradient is shrunk if its value is larger. + optimizer_str: String defining which optimizer to use. Supported values + are {sgd, adam} + """ + assert bool(loss_str) ^ bool(loss_class), "Please provide only one option." + self._kwargs = locals() + loss_class = loss_class if loss_class else self._get_loss_class(loss_str) + self._loss_class = loss_class + + self.player_id = player_id + self._num_actions = num_actions + self._layer_sizes = hidden_layers_sizes + self._batch_size = batch_size + self._extra_discount = additional_discount_factor + self._num_critic_before_pi = num_critic_before_pi + self._max_global_gradient_norm = max_global_gradient_norm + + self._episode_data = [] + self._dataset = collections.defaultdict(list) + self._prev_time_step = None + self._prev_action = None + + # Step counters + self._step_counter = 0 + self._episode_counter = 0 + self._num_learn_steps = 0 + + # Keep track of the last training loss achieved in an update step. + self._last_loss_value = None + + # Network + # activate final as we plug logit and qvalue heads afterwards. + self._net_torso = MLPTorso(info_state_size, self._layer_sizes) + torso_out_size = self._layer_sizes[-1] + self._policy_logits_layer = SonnetLinear( + torso_out_size, self._num_actions, activate_relu=False) + # Do not remove policy_logits_network. Even if it's not used directly here, + # other code outside this file refers to it. + self.policy_logits_network = nn.Sequential(self._net_torso, + self._policy_logits_layer) + + self._savers = [] + + # Add baseline (V) head for A2C (or Q-head for QPG / RPG / RMPG / NeuRD) + if optimizer_str == "adam": + self._critic_optimizer = optim.Adam + elif optimizer_str == "sgd": + self._critic_optimizer = optim.SGD + else: + raise ValueError("Not implemented, choose from 'adam' and 'sgd'.") + + if loss_class.__name__ == "BatchA2CLoss": + self._baseline_layer = SonnetLinear( + torso_out_size, 1, activate_relu=False) + self._critic_network = nn.Sequential(self._net_torso, + self._baseline_layer) + else: + self._q_values_layer = SonnetLinear( + torso_out_size, self._num_actions, activate_relu=False) + self._critic_network = nn.Sequential(self._net_torso, + self._q_values_layer) + + self._critic_optimizer = self._critic_optimizer( + self._critic_network.parameters(), lr=critic_learning_rate) + + # Pi loss + self.pg_class = loss_class(entropy_cost=entropy_cost) + self._pi_network = nn.Sequential(self._net_torso, self._policy_logits_layer) + if optimizer_str == "adam": + self._pi_optimizer = optim.Adam( + self._pi_network.parameters(), lr=pi_learning_rate) + elif optimizer_str == "sgd": + self._pi_optimizer = optim.SGD( + self._pi_network.parameters(), lr=pi_learning_rate) + + self._loss_str = loss_str + + def _get_loss_class(self, loss_str): + if loss_str == "rpg": + return rl_losses.BatchRPGLoss + elif loss_str == "qpg": + return rl_losses.BatchQPGLoss + elif loss_str == "rm": + return rl_losses.BatchRMLoss + elif loss_str == "a2c": + return rl_losses.BatchA2CLoss + elif loss_str == "neurd": + return rl_losses.BatchNeuRDLoss + + def minimize_with_clipping(self, model, optimizer, loss): + optimizer.zero_grad() + loss.backward() + if self._max_global_gradient_norm is not None: + nn.utils.clip_grad_norm_(model.parameters(), + self._max_global_gradient_norm) + optimizer.step() + + def _act(self, info_state, legal_actions): + # Make a singleton batch for NN compatibility: [1, info_state_size] + info_state = torch.Tensor(np.reshape(info_state, [1, -1])) + torso_out = self._net_torso(info_state) + self._policy_logits = self._policy_logits_layer(torso_out) + policy_probs = F.softmax(self._policy_logits, dim=1).detach() + + # Remove illegal actions, re-normalize probs + probs = np.zeros(self._num_actions) + probs[legal_actions] = policy_probs[0][legal_actions] + if sum(probs) != 0: + probs /= sum(probs) + else: + probs[legal_actions] = 1 / len(legal_actions) + action = np.random.choice(len(probs), p=probs) + return action, probs + + def step(self, time_step, is_evaluation=False): + """Returns the action to be taken and updates the network if needed. + + Args: + time_step: an instance of rl_environment.TimeStep. + is_evaluation: bool, whether this is a training or evaluation call. + + Returns: + A `rl_agent.StepOutput` containing the action probs and chosen action. + """ + # Act step: don't act at terminal info states or if its not our turn. + if (not time_step.last()) and ( + time_step.is_simultaneous_move() or + self.player_id == time_step.current_player()): + info_state = time_step.observations["info_state"][self.player_id] + legal_actions = time_step.observations["legal_actions"][self.player_id] + action, probs = self._act(info_state, legal_actions) + else: + action = None + probs = [] + + if not is_evaluation: + self._step_counter += 1 + + # Add data points to current episode buffer. + if self._prev_time_step: + self._add_transition(time_step) + + # Episode done, add to dataset and maybe learn. + if time_step.last(): + self._add_episode_data_to_dataset() + self._episode_counter += 1 + + if len(self._dataset["returns"]) >= self._batch_size: + self._critic_update() + self._num_learn_steps += 1 + if self._num_learn_steps % self._num_critic_before_pi == 0: + self._pi_update() + self._dataset = collections.defaultdict(list) + + self._prev_time_step = None + self._prev_action = None + return + else: + self._prev_time_step = time_step + self._prev_action = action + + return rl_agent.StepOutput(action=action, probs=probs) + + def _full_checkpoint_name(self, checkpoint_dir, name): + checkpoint_filename = "_".join( + [self._loss_str, name, "pid" + str(self.player_id)]) + return os.path.join(checkpoint_dir, checkpoint_filename) + + def _latest_checkpoint_filename(self, name): + checkpoint_filename = "_".join( + [self._loss_str, name, "pid" + str(self.player_id)]) + return checkpoint_filename + "_latest" + + def save(self, checkpoint_dir): + for name, model in self._savers: + path = self._full_checkpoint_name(checkpoint_dir, name) + torch.save(model.state_dict(), path) + logging.info("Saved to path: %s", path) + + def has_checkpoint(self, checkpoint_dir): + for name, _ in self._savers: + path = self._full_checkpoint_name(checkpoint_dir, name) + if os.path.exists(path): + return True + return False + + def restore(self, checkpoint_dir): + for name, model in self._savers: + full_checkpoint_dir = self._full_checkpoint_name(checkpoint_dir, name) + logging.info("Restoring checkpoint: %s", full_checkpoint_dir) + model.load_state_dict(torch.load(full_checkpoint_dir)) + + @property + def loss(self): + return (self._last_critic_loss_value, self._last_pi_loss_value) + + def _add_episode_data_to_dataset(self): + """Add episode data to the buffer.""" + info_states = [data.info_state for data in self._episode_data] + rewards = [data.reward for data in self._episode_data] + discount = [data.discount for data in self._episode_data] + actions = [data.action for data in self._episode_data] + + # Calculate returns + returns = np.array(rewards) + for idx in reversed(range(len(rewards[:-1]))): + returns[idx] = ( + rewards[idx] + + discount[idx] * returns[idx + 1] * self._extra_discount) + + # Add flattened data points to dataset + self._dataset["actions"].extend(actions) + self._dataset["returns"].extend(returns) + self._dataset["info_states"].extend(info_states) + self._episode_data = [] + + def _add_transition(self, time_step): + """Adds intra-episode transition to the `_episode_data` buffer. + + Adds the transition from `self._prev_time_step` to `time_step`. + + Args: + time_step: an instance of rl_environment.TimeStep. + """ + assert self._prev_time_step is not None + legal_actions = ( + self._prev_time_step.observations["legal_actions"][self.player_id]) + legal_actions_mask = np.zeros(self._num_actions) + legal_actions_mask[legal_actions] = 1.0 + transition = Transition( + info_state=( + self._prev_time_step.observations["info_state"][self.player_id][:]), + action=self._prev_action, + reward=time_step.rewards[self.player_id], + discount=time_step.discounts[self.player_id], + legal_actions_mask=legal_actions_mask) + + self._episode_data.append(transition) + + def _critic_update(self): + """Compute the Critic loss on sampled transitions & perform a critic update. + + Returns: + The average Critic loss obtained on this batch. + """ + # TODO(author3): illegal action handling. + info_state = torch.Tensor(self._dataset["info_states"]) + action = torch.LongTensor(self._dataset["actions"]) + return_ = torch.Tensor(self._dataset["returns"]) + torso_out = self._net_torso(info_state) + + # Critic loss + # Baseline loss in case of A2C + if self._loss_class.__name__ == "BatchA2CLoss": + baseline = torch.squeeze(self._baseline_layer(torso_out), dim=1) + critic_loss = torch.mean(F.mse_loss(baseline, return_)) + self.minimize_with_clipping(self._baseline_layer, self._critic_optimizer, + critic_loss) + else: + # Q-loss otherwise. + q_values = self._q_values_layer(torso_out) + action_indices = torch.stack( + [torch.arange(q_values.shape[0], dtype=torch.long), action], dim=0) + value_predictions = q_values[list(action_indices)] + critic_loss = torch.mean(F.mse_loss(value_predictions, return_)) + self.minimize_with_clipping(self._q_values_layer, self._critic_optimizer, + critic_loss) + self._last_critic_loss_value = critic_loss + return critic_loss + + def _pi_update(self): + """Compute the Pi loss on sampled transitions and perform a Pi update. + + Returns: + The average Pi loss obtained on this batch. + """ + # TODO(author3): illegal action handling. + info_state = torch.Tensor(self._dataset["info_states"]) + action = torch.LongTensor(self._dataset["actions"]) + return_ = torch.Tensor(self._dataset["returns"]) + torso_out = self._net_torso(info_state) + self._policy_logits = self._policy_logits_layer(torso_out) + + if self._loss_class.__name__ == "BatchA2CLoss": + baseline = torch.squeeze(self._baseline_layer(torso_out), dim=1) + pi_loss = self.pg_class.loss( + policy_logits=self._policy_logits, + baseline=baseline, + actions=action, + returns=return_) + self.minimize_with_clipping(self._policy_logits_layer, self._pi_optimizer, + pi_loss) + else: + q_values = self._q_values_layer(torso_out) + pi_loss = self.pg_class.loss( + policy_logits=self._policy_logits, action_values=q_values) + self.minimize_with_clipping(self._policy_logits_layer, self._pi_optimizer, + pi_loss) + self._last_pi_loss_value = pi_loss + return pi_loss + + def get_weights(self): + variables = [m.weight for m in self._net_torso.model] + variables.append(self._policy_logits_layer.weight) + if self._loss_class.__name__ == "BatchA2CLoss": + variables.append(self._baseline_layer.weight) + else: + variables.append(self._q_values_layer.weight) + return variables + + def copy_with_noise(self, sigma=0.0, copy_weights=True): + """Copies the object and perturbates its network's weights with noise. + + Args: + sigma: gaussian dropout variance term : Multiplicative noise following + (1+sigma*epsilon), epsilon standard gaussian variable, multiplies each + model weight. sigma=0 means no perturbation. + copy_weights: Boolean determining whether to copy model weights (True) or + just model hyperparameters. + + Returns: + Perturbated copy of the model. + """ + _ = self._kwargs.pop("self", None) + copied_object = PolicyGradient(**self._kwargs) + + net_torso = getattr(copied_object, "_net_torso") + policy_logits_layer = getattr(copied_object, "_policy_logits_layer") + if hasattr(copied_object, "_q_values_layer"): + q_values_layer = getattr(copied_object, "_q_values_layer") + if hasattr(copied_object, "_baseline_layer"): + baseline_layer = getattr(copied_object, "_baseline_layer") + + if copy_weights: + with torch.no_grad(): + for layer in net_torso.model: + layer.weight *= (1 + sigma * torch.randn(layer.weight.shape)) + + policy_logits_layer.weight *= ( + 1 + sigma * torch.randn(policy_logits_layer.weight.shape)) + + if hasattr(copied_object, "_q_values_layer"): + q_values_layer.weight *= ( + 1 + sigma * torch.randn(q_values_layer.weight.shape)) + + if hasattr(copied_object, "_baseline_layer"): + baseline_layer.weight *= ( + 1 + sigma * torch.randn(baseline_layer.weight.shape)) + + return copied_object diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/policy_gradient_pytorch_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/policy_gradient_pytorch_test.py new file mode 100644 index 0000000..da876e3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/policy_gradient_pytorch_test.py @@ -0,0 +1,205 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np +import torch + +from open_spiel.python import policy +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import exploitability +import pyspiel +from open_spiel.python.pytorch import policy_gradient +from open_spiel.python.pytorch.losses import rl_losses + +SEED = 24984617 + + +class PolicyGradientPolicies(policy.Policy): + """Joint policy to be evaluated.""" + + def __init__(self, env, nfsp_policies): + game = env.game + player_ids = [0, 1] + super(PolicyGradientPolicies, self).__init__(game, player_ids) + self._policies = nfsp_policies + self._obs = {"info_state": [None, None], "legal_actions": [None, None]} + + def action_probabilities(self, state, player_id=None): + cur_player = state.current_player() + legal_actions = state.legal_actions(cur_player) + + self._obs["current_player"] = cur_player + self._obs["info_state"][cur_player] = state.information_state_tensor( + cur_player + ) + self._obs["legal_actions"][cur_player] = legal_actions + + info_state = rl_environment.TimeStep( + observations=self._obs, rewards=None, discounts=None, step_type=None + ) + + p = self._policies[cur_player].step(info_state, is_evaluation=True).probs + prob_dict = {action: p[action] for action in legal_actions} + return prob_dict + + +class PolicyGradientTest(parameterized.TestCase, absltest.TestCase): + + @parameterized.parameters( + itertools.product(("rpg", "qpg", "rm", "a2c", "neurd"), + ("kuhn_poker", "leduc_poker"))) + def test_run_game(self, loss_str, game_name): + env = rl_environment.Environment(game_name) + env.seed(SEED) + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + agents = [ + policy_gradient.PolicyGradient( # pylint: disable=g-complex-comprehension + player_id=player_id, + info_state_size=info_state_size, + num_actions=num_actions, + loss_str=loss_str, + hidden_layers_sizes=[32, 32], + batch_size=16, + entropy_cost=0.001, + critic_learning_rate=0.01, + pi_learning_rate=0.01, + num_critic_before_pi=4) for player_id in [0, 1] + ] + + for _ in range(2): + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + current_agent = agents[current_player] + agent_output = current_agent.step(time_step) + time_step = env.step([agent_output.action]) + + for agent in agents: + agent.step(time_step) + + def test_neurd_kuhn(self): + env = rl_environment.Environment("kuhn_poker") + env.seed(SEED) + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + agents = [ + policy_gradient.PolicyGradient( # pylint: disable=g-complex-comprehension + player_id=player_id, + info_state_size=info_state_size, + num_actions=num_actions, + loss_str="neurd", + hidden_layers_sizes=[32], + batch_size=16, + entropy_cost=0.001, + critic_learning_rate=0.01, + pi_learning_rate=0.01, + num_critic_before_pi=4) for player_id in [0, 1] + ] + expl_policies_avg = PolicyGradientPolicies(env, agents) + + for _ in range(100): + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + current_agent = agents[current_player] + agent_output = current_agent.step(time_step) + time_step = env.step([agent_output.action]) + + for agent in agents: + agent.step(time_step) + + expl = exploitability.exploitability(env.game, expl_policies_avg) + # Check the exploitability is less than the target upper bound. + self.assertLess(expl, 0.7) + + def test_run_hanabi(self): + # Hanabi is an optional game, so check we have it before running the test. + game = "hanabi" + if game not in pyspiel.registered_names(): + return + + num_players = 3 + env_configs = { + "players": num_players, + "max_life_tokens": 1, + "colors": 2, + "ranks": 3, + "hand_size": 2, + "max_information_tokens": 3, + "discount": 0.99 + } + env = rl_environment.Environment(game, **env_configs) + env.seed(SEED) + info_state_size = env.observation_spec()["info_state"][0] + num_actions = env.action_spec()["num_actions"] + + agents = [ + policy_gradient.PolicyGradient( # pylint: disable=g-complex-comprehension + player_id=player_id, + info_state_size=info_state_size, + num_actions=num_actions, + hidden_layers_sizes=[8, 8], + batch_size=16, + entropy_cost=0.001, + critic_learning_rate=0.001, + pi_learning_rate=0.001, + num_critic_before_pi=4) for player_id in range(num_players) + ] + + time_step = env.reset() + while not time_step.last(): + current_player = time_step.observations["current_player"] + agent_output = [agent.step(time_step) for agent in agents] + time_step = env.step([agent_output[current_player].action]) + + for agent in agents: + agent.step(time_step) + + def test_loss_modes(self): + loss_dict = { + "qpg": rl_losses.BatchQPGLoss, + "rpg": rl_losses.BatchRPGLoss, + "rm": rl_losses.BatchRMLoss, + "a2c": rl_losses.BatchA2CLoss, + "neurd": rl_losses.BatchNeuRDLoss, + } + + for loss_str, loss_class in loss_dict.items(): + agent_by_str = policy_gradient.PolicyGradient( + player_id=0, + info_state_size=32, + num_actions=2, + loss_str=loss_str, + loss_class=None) + agent_by_class = policy_gradient.PolicyGradient( + player_id=0, + info_state_size=32, + num_actions=2, + loss_str=None, + loss_class=loss_class) + + self.assertEqual(agent_by_str._loss_class, agent_by_class._loss_class) + + +if __name__ == "__main__": + np.random.seed(SEED) + torch.manual_seed(SEED) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/ppo.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/ppo.py new file mode 100644 index 0000000..9ee80b1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/ppo.py @@ -0,0 +1,451 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An implementation of PPO. + +Note: code adapted (with permission) from +https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppo.py and +https://github.com/vwxyzjn/ppo-implementation-details/blob/main/ppo_atari.py. + +Currently only supports the single-agent case. +""" + +import time + +import numpy as np +import torch +from torch import nn +from torch import optim +from torch.distributions.categorical import Categorical + +from open_spiel.python.rl_agent import StepOutput + +INVALID_ACTION_PENALTY = -1e6 + + +def layer_init(layer, std=np.sqrt(2), bias_const=0.0): + torch.nn.init.orthogonal_(layer.weight, std) + torch.nn.init.constant_(layer.bias, bias_const) + return layer + + +class CategoricalMasked(Categorical): + """A masked categorical.""" + + # pylint: disable=dangerous-default-value + def __init__(self, + probs=None, + logits=None, + validate_args=None, + masks=[], + mask_value=None): + logits = torch.where(masks.bool(), logits, mask_value) + super(CategoricalMasked, self).__init__(probs, logits, validate_args) + + +class PPOAgent(nn.Module): + """A PPO agent module.""" + + def __init__(self, num_actions, observation_shape, device): + super().__init__() + self.critic = nn.Sequential( + layer_init(nn.Linear(np.array(observation_shape).prod(), 64)), + nn.Tanh(), + layer_init(nn.Linear(64, 64)), + nn.Tanh(), + layer_init(nn.Linear(64, 1), std=1.0), + ) + self.actor = nn.Sequential( + layer_init(nn.Linear(np.array(observation_shape).prod(), 64)), + nn.Tanh(), + layer_init(nn.Linear(64, 64)), + nn.Tanh(), + layer_init(nn.Linear(64, num_actions), std=0.01), + ) + self.device = device + self.num_actions = num_actions + self.register_buffer("mask_value", torch.tensor(INVALID_ACTION_PENALTY)) + + def get_value(self, x): + return self.critic(x) + + def get_action_and_value(self, x, legal_actions_mask=None, action=None): + if legal_actions_mask is None: + legal_actions_mask = torch.ones((len(x), self.num_actions)).bool() + + logits = self.actor(x) + probs = CategoricalMasked( + logits=logits, masks=legal_actions_mask, mask_value=self.mask_value) + if action is None: + action = probs.sample() + return action, probs.log_prob(action), probs.entropy(), self.critic( + x), probs.probs + + +class PPOAtariAgent(nn.Module): + """A PPO Atari agent module.""" + + def __init__(self, num_actions, observation_shape, device): + super(PPOAtariAgent, self).__init__() + # Note: this network is intended for atari games, taken from + # https://github.com/vwxyzjn/ppo-implementation-details/blob/main/ppo_atari.py + self.network = nn.Sequential( + layer_init(nn.Conv2d(4, 32, 8, stride=4)), + nn.ReLU(), + layer_init(nn.Conv2d(32, 64, 4, stride=2)), + nn.ReLU(), + layer_init(nn.Conv2d(64, 64, 3, stride=1)), + nn.ReLU(), + nn.Flatten(), + layer_init(nn.Linear(64 * 7 * 7, 512)), + nn.ReLU(), + ) + self.actor = layer_init(nn.Linear(512, num_actions), std=0.01) + self.critic = layer_init(nn.Linear(512, 1), std=1) + self.num_actions = num_actions + self.device = device + self.register_buffer("mask_value", torch.tensor(INVALID_ACTION_PENALTY)) + + def get_value(self, x): + return self.critic(self.network(x / 255.0)) + + def get_action_and_value(self, x, legal_actions_mask=None, action=None): + if legal_actions_mask is None: + legal_actions_mask = torch.ones((len(x), self.num_actions)).bool() + + hidden = self.network(x / 255.0) + logits = self.actor(hidden) + probs = CategoricalMasked( + logits=logits, masks=legal_actions_mask, mask_value=self.mask_value) + + if action is None: + action = probs.sample() + return action, probs.log_prob(action), probs.entropy(), self.critic( + hidden), probs.probs + + +def legal_actions_to_mask(legal_actions_list, num_actions): + """Converts a list of legal actions to a mask. + + The mask has size num actions with a 1 in a legal positions. + + Args: + legal_actions_list: the list of legal actions + num_actions: number of actions (width of mask) + + Returns: + legal actions mask. + """ + legal_actions_mask = torch.zeros((len(legal_actions_list), num_actions), + dtype=torch.bool) + for i, legal_actions in enumerate(legal_actions_list): + legal_actions_mask[i, legal_actions] = 1 + return legal_actions_mask + + +class PPO(nn.Module): + """PPO Agent implementation in PyTorch. + + See open_spiel/python/examples/ppo_example.py for an usage example. + + Note that PPO runs multiple environments concurrently on each step (see + open_spiel/python/vector_env.py). In practice, this tends to improve PPO's + performance. The number of parallel environments is controlled by the + num_envs argument. + """ + + def __init__( + self, + input_shape, + num_actions, + num_players, + player_id=0, + num_envs=1, + steps_per_batch=128, + num_minibatches=4, + update_epochs=4, + learning_rate=2.5e-4, + gae=True, + gamma=0.99, + gae_lambda=0.95, + normalize_advantages=True, + clip_coef=0.2, + clip_vloss=True, + entropy_coef=0.01, + value_coef=0.5, + max_grad_norm=0.5, + target_kl=None, + device="cpu", + writer=None, # Tensorboard SummaryWriter + agent_fn=PPOAtariAgent, + ): + super().__init__() + + self.input_shape = input_shape + self.num_actions = num_actions + self.num_players = num_players + self.player_id = player_id + self.device = device + + # Training settings + self.num_envs = num_envs + self.steps_per_batch = steps_per_batch + self.batch_size = self.num_envs * self.steps_per_batch + self.num_minibatches = num_minibatches + self.minibatch_size = self.batch_size // self.num_minibatches + self.update_epochs = update_epochs + self.learning_rate = learning_rate + + # Loss function + self.gae = gae + self.gamma = gamma + self.gae_lambda = gae_lambda + self.normalize_advantages = normalize_advantages + self.clip_coef = clip_coef + self.clip_vloss = clip_vloss + self.entropy_coef = entropy_coef + self.value_coef = value_coef + self.max_grad_norm = max_grad_norm + self.target_kl = target_kl + + # Logging + self.writer = writer + + # Initialize networks + self.network = agent_fn(self.num_actions, self.input_shape, + device).to(device) + self.optimizer = optim.Adam( + self.parameters(), lr=self.learning_rate, eps=1e-5) + + # Initialize training buffers + self.legal_actions_mask = torch.zeros( + (self.steps_per_batch, self.num_envs, self.num_actions), + dtype=torch.bool).to(device) + self.obs = torch.zeros((self.steps_per_batch, self.num_envs) + + self.input_shape).to(device) + self.actions = torch.zeros((self.steps_per_batch, self.num_envs)).to(device) + self.logprobs = torch.zeros( + (self.steps_per_batch, self.num_envs)).to(device) + self.rewards = torch.zeros((self.steps_per_batch, self.num_envs)).to(device) + self.dones = torch.zeros((self.steps_per_batch, self.num_envs)).to(device) + self.values = torch.zeros((self.steps_per_batch, self.num_envs)).to(device) + + # Initialize counters + self.cur_batch_idx = 0 + self.total_steps_done = 0 + self.updates_done = 0 + self.start_time = time.time() + + def get_value(self, x): + return self.network.get_value(x) + + def get_action_and_value(self, x, legal_actions_mask=None, action=None): + return self.network.get_action_and_value(x, legal_actions_mask, action) + + def step(self, time_step, is_evaluation=False): + if is_evaluation: + with torch.no_grad(): + legal_actions_mask = legal_actions_to_mask([ + ts.observations["legal_actions"][self.player_id] for ts in time_step + ], self.num_actions).to(self.device) + obs = torch.Tensor( + np.array([ + np.reshape(ts.observations["info_state"][self.player_id], + self.input_shape) for ts in time_step + ])).to(self.device) + action, _, _, value, probs = self.get_action_and_value( + obs, legal_actions_mask=legal_actions_mask) + return [ + StepOutput(action=a.item(), probs=p) + for (a, p) in zip(action, probs) + ] + else: + with torch.no_grad(): + # act + obs = torch.Tensor( + np.array([ + np.reshape(ts.observations["info_state"][self.player_id], + self.input_shape) for ts in time_step + ])).to(self.device) + legal_actions_mask = legal_actions_to_mask([ + ts.observations["legal_actions"][self.player_id] for ts in time_step + ], self.num_actions).to(self.device) + action, logprob, _, value, probs = self.get_action_and_value( + obs, legal_actions_mask=legal_actions_mask) + + # store + self.legal_actions_mask[self.cur_batch_idx] = legal_actions_mask + self.obs[self.cur_batch_idx] = obs + self.actions[self.cur_batch_idx] = action + self.logprobs[self.cur_batch_idx] = logprob + self.values[self.cur_batch_idx] = value.flatten() + + agent_output = [ + StepOutput(action=a.item(), probs=p) + for (a, p) in zip(action, probs) + ] + return agent_output + + def post_step(self, reward, done): + self.rewards[self.cur_batch_idx] = torch.tensor(reward).to( + self.device).view(-1) + self.dones[self.cur_batch_idx] = torch.tensor(done).to(self.device).view(-1) + + self.total_steps_done += self.num_envs + self.cur_batch_idx += 1 + + def learn(self, time_step): + next_obs = torch.Tensor( + np.array([ + np.reshape(ts.observations["info_state"][self.player_id], + self.input_shape) for ts in time_step + ])).to(self.device) + + # bootstrap value if not done + with torch.no_grad(): + next_value = self.get_value(next_obs).reshape(1, -1) + if self.gae: + advantages = torch.zeros_like(self.rewards).to(self.device) + lastgaelam = 0 + for t in reversed(range(self.steps_per_batch)): + nextvalues = next_value if t == self.steps_per_batch - 1 else self.values[ + t + 1] + nextnonterminal = 1.0 - self.dones[t] + delta = self.rewards[ + t] + self.gamma * nextvalues * nextnonterminal - self.values[t] + advantages[ + t] = lastgaelam = delta + self.gamma * self.gae_lambda * nextnonterminal * lastgaelam + returns = advantages + self.values + else: + returns = torch.zeros_like(self.rewards).to(self.device) + for t in reversed(range(self.steps_per_batch)): + next_return = next_value if t == self.steps_per_batch - 1 else returns[ + t + 1] + nextnonterminal = 1.0 - self.dones[t] + returns[ + t] = self.rewards[t] + self.gamma * nextnonterminal * next_return + advantages = returns - self.values + + # flatten the batch + b_legal_actions_mask = self.legal_actions_mask.reshape( + (-1, self.num_actions)) + b_obs = self.obs.reshape((-1,) + self.input_shape) + b_logprobs = self.logprobs.reshape(-1) + b_actions = self.actions.reshape(-1) + b_advantages = advantages.reshape(-1) + b_returns = returns.reshape(-1) + b_values = self.values.reshape(-1) + + # Optimizing the policy and value network + b_inds = np.arange(self.batch_size) + clipfracs = [] + for _ in range(self.update_epochs): + np.random.shuffle(b_inds) + for start in range(0, self.batch_size, self.minibatch_size): + end = start + self.minibatch_size + mb_inds = b_inds[start:end] + + _, newlogprob, entropy, newvalue, _ = self.get_action_and_value( + b_obs[mb_inds], + legal_actions_mask=b_legal_actions_mask[mb_inds], + action=b_actions.long()[mb_inds]) + logratio = newlogprob - b_logprobs[mb_inds] + ratio = logratio.exp() + + with torch.no_grad(): + # calculate approx_kl http://joschu.net/blog/kl-approx.html + old_approx_kl = (-logratio).mean() + approx_kl = ((ratio - 1) - logratio).mean() + clipfracs += [ + ((ratio - 1.0).abs() > self.clip_coef).float().mean().item() + ] + + mb_advantages = b_advantages[mb_inds] + if self.normalize_advantages: + mb_advantages = (mb_advantages - mb_advantages.mean()) / ( + mb_advantages.std() + 1e-8) + + # Policy loss + pg_loss1 = -mb_advantages * ratio + pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - self.clip_coef, + 1 + self.clip_coef) + pg_loss = torch.max(pg_loss1, pg_loss2).mean() + + # Value loss + newvalue = newvalue.view(-1) + if self.clip_vloss: + v_loss_unclipped = (newvalue - b_returns[mb_inds])**2 + v_clipped = b_values[mb_inds] + torch.clamp( + newvalue - b_values[mb_inds], + -self.clip_coef, + self.clip_coef, + ) + v_loss_clipped = (v_clipped - b_returns[mb_inds])**2 + v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped) + v_loss = 0.5 * v_loss_max.mean() + else: + v_loss = 0.5 * ((newvalue - b_returns[mb_inds])**2).mean() + + entropy_loss = entropy.mean() + loss = pg_loss - self.entropy_coef * entropy_loss + v_loss * self.value_coef + + self.optimizer.zero_grad() + loss.backward() + nn.utils.clip_grad_norm_(self.parameters(), self.max_grad_norm) + self.optimizer.step() + + if self.target_kl is not None: + if approx_kl > self.target_kl: + break + + y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() + var_y = np.var(y_true) + explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - + y_pred) / var_y + + # TRY NOT TO MODIFY: record rewards for plotting purposes + if self.writer is not None: + self.writer.add_scalar("charts/learning_rate", + self.optimizer.param_groups[0]["lr"], + self.total_steps_done) + self.writer.add_scalar("losses/value_loss", v_loss.item(), + self.total_steps_done) + self.writer.add_scalar("losses/policy_loss", pg_loss.item(), + self.total_steps_done) + self.writer.add_scalar("losses/entropy", entropy_loss.item(), + self.total_steps_done) + self.writer.add_scalar("losses/old_approx_kl", old_approx_kl.item(), + self.total_steps_done) + self.writer.add_scalar("losses/approx_kl", approx_kl.item(), + self.total_steps_done) + self.writer.add_scalar("losses/clipfrac", np.mean(clipfracs), + self.total_steps_done) + self.writer.add_scalar("losses/explained_variance", explained_var, + self.total_steps_done) + self.writer.add_scalar( + "charts/SPS", + int(self.total_steps_done / (time.time() - self.start_time)), + self.total_steps_done) + + # Update counters + self.updates_done += 1 + self.cur_batch_idx = 0 + + def anneal_learning_rate(self, update, num_total_updates): + # Annealing the rate + frac = 1.0 - (update / num_total_updates) + if frac <= 0: + raise ValueError("Annealing learning rate to <= 0") + lrnow = frac * self.learning_rate + self.optimizer.param_groups[0]["lr"] = lrnow diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/ppo_pytorch_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/ppo_pytorch_test.py new file mode 100644 index 0000000..6418a48 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/ppo_pytorch_test.py @@ -0,0 +1,93 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for open_spiel.python.algorithms.ppo.""" + +import random +from absl.testing import absltest +import numpy as np +import torch + +from open_spiel.python import rl_environment +import pyspiel +from open_spiel.python.pytorch.ppo import PPO +from open_spiel.python.pytorch.ppo import PPOAgent +from open_spiel.python.vector_env import SyncVectorEnv + +# A simple two-action game encoded as an EFG game. Going left gets -1, going +# right gets a +1. +SIMPLE_EFG_DATA = """ + EFG 2 R "Simple single-agent problem" { "Player 1" } "" + p "ROOT" 1 1 "ROOT" { "L" "R" } 0 + t "L" 1 "Outcome L" { -1.0 } + t "R" 2 "Outcome R" { 1.0 } +""" +SEED = 24261711 + + +class PPOTest(absltest.TestCase): + + def test_simple_game(self): + game = pyspiel.load_efg_game(SIMPLE_EFG_DATA) + env = rl_environment.Environment(game=game) + envs = SyncVectorEnv([env]) + agent_fn = PPOAgent + anneal_lr = True + + info_state_shape = tuple( + np.array(env.observation_spec()["info_state"]).flatten()) + + total_timesteps = 1000 + steps_per_batch = 8 + batch_size = int(len(envs) * steps_per_batch) + num_updates = total_timesteps // batch_size + agent = PPO( + input_shape=info_state_shape, + num_actions=game.num_distinct_actions(), + num_players=game.num_players(), + player_id=0, + num_envs=1, + agent_fn=agent_fn, + ) + + time_step = envs.reset() + for update in range(num_updates): + for _ in range(steps_per_batch): + agent_output = agent.step(time_step) + time_step, reward, done, _ = envs.step( + agent_output, reset_if_done=True) + agent.post_step(reward, done) + + if anneal_lr: + agent.anneal_learning_rate(update, num_updates) + + agent.learn(time_step) + + total_eval_reward = 0 + n_total_evaluations = 1000 + n_evaluations = 0 + time_step = envs.reset() + while n_evaluations < n_total_evaluations: + agent_output = agent.step(time_step, is_evaluation=True) + time_step, reward, done, _ = envs.step( + agent_output, reset_if_done=True) + total_eval_reward += reward[0][0] + n_evaluations += sum(done) + self.assertGreaterEqual(total_eval_reward, 900) + + +if __name__ == "__main__": + random.seed(SEED) + torch.manual_seed(SEED) + np.random.seed(SEED) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/rcfr.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/rcfr.py new file mode 100644 index 0000000..c76c260 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/rcfr.py @@ -0,0 +1,864 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Regression counterfactual regret minimization (RCFR) [Waugh et al., 2015; Morrill, 2016]. + +In contrast to (tabular) counterfactual regret minimization (CFR) +[Zinkevich et al., 2007], RCFR replaces the table of regrets that generate the +current policy profile with a profile of regression models. The average +policy is still tracked exactly with a full game-size table. The exploitability +of the average policy in zero-sum games decreases as the model accuracy and +the number of iterations increase [Waugh et al., 2015; Morrill, 2016]. As long +as the regression model errors decrease across iterations, the average policy +converges toward a Nash equilibrium in zero-sum games. + +# References + +Dustin Morrill. Using Regret Estimation to Solve Games Compactly. + M.Sc. thesis, Computing Science Department, University of Alberta, + Apr 1, 2016, Edmonton Alberta, Canada. +Kevin Waugh, Dustin Morrill, J. Andrew Bagnell, and Michael Bowling. + Solving Games with Functional Regret Estimation. At the Twenty-Ninth AAAI + Conference on Artificial Intelligence, January 25-29, 2015, Austin Texas, + USA. Pages 2138-2145. +Martin Zinkevich, Michael Johanson, Michael Bowling, and Carmelo Piccione. + Regret Minimization in Games with Incomplete Information. + At Advances in Neural Information Processing Systems 20 (NeurIPS). 2007. +""" + +import numpy as np +import torch +from torch import nn +import torch.nn.functional as F + + +def tensor_to_matrix(tensor): + """Converts `tensor` to a matrix (a rank-2 tensor) or raises an exception. + + Args: + tensor: The tensor to convert. + + Returns: + A PyTorch matrix (rank-2 `torch.Tensor`). + + Raises: + ValueError: If `tensor` cannot be trivially converted to a matrix, i.e. + `tensor` has a rank > 2. + """ + tensor = torch.Tensor(tensor) + rank = tensor.ndim + # rank = len(list(tensor.shape)) + if rank > 2: + raise ValueError( + ("Tensor {} cannot be converted into a matrix as it is rank " + "{} > 2.").format(tensor, rank)) + elif rank < 2: + num_columns = 1 if rank == 0 else tensor.shape[0] + tensor = torch.reshape(tensor, [1, num_columns]) + return tensor + + +def with_one_hot_action_features(state_features, legal_actions, + num_distinct_actions): + """Constructs features for each sequence by extending state features. + + Sequences features are constructed by concatenating one-hot features + indicating each action to the information state features and stacking them. + + Args: + state_features: The features for the information state alone. Must be a + `torch.Tensor` with a rank less than or equal to (if batched) 2. + legal_actions: The list of legal actions in this state. Determines the + number of rows in the returned feature matrix. + num_distinct_actions: The number of globally distinct actions in the game. + Determines the length of the action feature vector concatenated onto the + state features. + + Returns: + A `torch.Tensor` feature matrix with one row for each sequence and # state + features plus `num_distinct_actions`-columns. + + Raises: + ValueError: If `state_features` has a rank > 2. + """ + state_features = tensor_to_matrix(state_features) + with_action_features = [] + for action in legal_actions: + action_features = F.one_hot( + torch.tensor([action]), num_classes=num_distinct_actions) + all_features = torch.cat([state_features, action_features], axis=1) + with_action_features.append(all_features) + return torch.cat(with_action_features, axis=0) + + +def sequence_features(state, num_distinct_actions): + """The sequence features at `state`. + + Features are constructed by concatenating `state`'s normalized feature + vector with one-hot vectors indicating each action (see + `with_one_hot_action_features`). + + Args: + state: An OpenSpiel `State`. + num_distinct_actions: The number of globally distinct actions in `state`'s + game. + + Returns: + A `torch.Tensor` feature matrix with one row for each sequence. + """ + return with_one_hot_action_features(state.information_state_tensor(), + state.legal_actions(), + num_distinct_actions) + + +def num_features(game): + """Returns the number of features returned by `sequence_features`. + + Args: + game: An OpenSpiel `Game`. + """ + return game.information_state_tensor_size() + game.num_distinct_actions() + + +class RootStateWrapper(object): + """Analyzes the subgame at a given root state. + + It enumerates features for each player sequence, creates a mapping between + information states to sequence index offsets, and caches terminal values + in a dictionary with history string keys. + + Properties: + root: An OpenSpiel `State`. + sequence_features: A `list` of sequence feature matrices, one for each + player. This list uses depth-first, information state-major ordering, so + sequences are grouped by information state. I.e. the first legal action + in the first state has index 0, the second action in the same information + state has index 1, the third action will have index 3, and so on. + Sequences in the next information state descendant of the first action + will begin indexing its sequences at the number of legal actions in the + ancestor information state. + num_player_sequences: The number of sequences for each player. + info_state_to_sequence_idx: A `dict` mapping each information state string + to the `sequence_features` index of the first sequence in the + corresponding information state. + terminal_values: A `dict` mapping history strings to terminal values for + each player. + """ + + def __init__(self, state): + self.root = state + self._num_distinct_actions = len(state.legal_actions_mask(0)) + + self.sequence_features = [[] for _ in range(state.num_players())] + self.num_player_sequences = [0] * state.num_players() + self.info_state_to_sequence_idx = {} + self.terminal_values = {} + self._walk_descendants(state) + self.sequence_features = [ + torch.cat(rows, axis=0) for rows in self.sequence_features + ] + + def _walk_descendants(self, state): + """Records information about `state` and its descendants.""" + if state.is_terminal(): + self.terminal_values[state.history_str()] = np.array(state.returns()) + return + + elif state.is_chance_node(): + for action, _ in state.chance_outcomes(): + self._walk_descendants(state.child(action)) + return + + player = state.current_player() + info_state = state.information_state_string(player) + actions = state.legal_actions() + + if info_state not in self.info_state_to_sequence_idx: + n = self.num_player_sequences[player] + self.info_state_to_sequence_idx[info_state] = n + self.sequence_features[player].append( + sequence_features(state, self._num_distinct_actions)) + self.num_player_sequences[player] += len(actions) + + for action in actions: + self._walk_descendants(state.child(action)) + + def sequence_weights_to_policy(self, sequence_weights, state): + """Returns a behavioral policy at `state` from sequence weights. + + Args: + sequence_weights: An array of non-negative weights, one for each of + `state.current_player()`'s sequences in `state`'s game. + state: An OpenSpiel `State` that represents an information state in an + alternating-move game. + + Returns: + A `np.array` probability distribution representing the policy in + `state` encoded by `sequence_weights`. Weights corresponding to actions + in `state` are normalized by their sum. + + Raises: + ValueError: If there are too few sequence weights at `state`. + """ + info_state = state.information_state_string() + sequence_offset = self.info_state_to_sequence_idx[info_state] + actions = state.legal_actions() + + sequence_idx_end = sequence_offset + len(actions) + weights = sequence_weights[sequence_offset:sequence_idx_end] + + if len(weights) < len(actions): + raise ValueError( + ("Invalid policy: Policy {player} at sequence offset " + "{sequence_offset} has only {policy_len} elements but there " + "are {num_actions} legal actions.").format( + player=state.current_player(), + sequence_offset=sequence_offset, + policy_len=len(weights), + num_actions=len(actions))) + return normalized_by_sum(weights) + + def sequence_weights_to_policy_fn(self, player_sequence_weights): + """Returns a policy function based on sequence weights for each player. + + Args: + player_sequence_weights: A list of weight arrays, one for each player. + Each array should have a weight for each of that player's sequences in + `state`'s game. + + Returns: + A `State` -> `np.array` function. The output of this function is + a probability distribution that represents the policy at the given + `State` encoded by `player_sequence_weights` according to + `sequence_weights_to_policy`. + """ + + def policy_fn(state): + player = state.current_player() + return self.sequence_weights_to_policy(player_sequence_weights[player], + state) + + return policy_fn + + def sequence_weights_to_tabular_profile(self, player_sequence_weights): + """Returns the tabular profile-form of `player_sequence_weights`.""" + return sequence_weights_to_tabular_profile( + self.root, self.sequence_weights_to_policy_fn(player_sequence_weights)) + + def counterfactual_regrets_and_reach_weights(self, regret_player, + reach_weight_player, + *sequence_weights): + """Returns counterfactual regrets and reach weights as a tuple. + + Args: + regret_player: The player for whom counterfactual regrets are computed. + reach_weight_player: The player for whom reach weights are computed. + *sequence_weights: A list of non-negative sequence weights for each player + determining the policy profile. Behavioral policies are generated by + normalizing sequence weights corresponding to actions in each + information state by their sum. + + Returns: + The counterfactual regrets and reach weights as an `np.array`-`np.array` + tuple. + + Raises: + ValueError: If there are too few sequence weights at any information state + for any player. + """ + num_players = len(sequence_weights) + regrets = np.zeros(self.num_player_sequences[regret_player]) + reach_weights = np.zeros(self.num_player_sequences[reach_weight_player]) + + def _walk_descendants(state, reach_probabilities, chance_reach_probability): + """Compute `state`'s counterfactual regrets and reach weights. + + Args: + state: An OpenSpiel `State`. + reach_probabilities: The probability that each player plays to reach + `state`'s history. + chance_reach_probability: The probability that all chance outcomes in + `state`'s history occur. + + Returns: + The counterfactual value of `state`'s history. + Raises: + ValueError if there are too few sequence weights at any information + state for any player. + """ + + if state.is_terminal(): + player_reach = ( + np.prod(reach_probabilities[:regret_player]) * + np.prod(reach_probabilities[regret_player + 1:])) + + counterfactual_reach_prob = player_reach * chance_reach_probability + u = self.terminal_values[state.history_str()] + return u[regret_player] * counterfactual_reach_prob + + elif state.is_chance_node(): + v = 0.0 + for action, action_prob in state.chance_outcomes(): + v += _walk_descendants( + state.child(action), reach_probabilities, + chance_reach_probability * action_prob) + return v + + player = state.current_player() + info_state = state.information_state_string(player) + sequence_idx_offset = self.info_state_to_sequence_idx[info_state] + actions = state.legal_actions(player) + + sequence_idx_end = sequence_idx_offset + len(actions) + my_sequence_weights = sequence_weights[player][ + sequence_idx_offset:sequence_idx_end] + + if len(my_sequence_weights) < len(actions): + raise ValueError( + ("Invalid policy: Policy {player} at sequence offset " + "{sequence_idx_offset} has only {policy_len} elements but there " + "are {num_actions} legal actions.").format( + player=player, + sequence_idx_offset=sequence_idx_offset, + policy_len=len(my_sequence_weights), + num_actions=len(actions))) + + policy = normalized_by_sum(my_sequence_weights) + action_values = np.zeros(len(actions)) + state_value = 0.0 + + is_reach_weight_player_node = player == reach_weight_player + is_regret_player_node = player == regret_player + + reach_prob = reach_probabilities[player] + for action_idx, action in enumerate(actions): + action_prob = policy[action_idx] + next_reach_prob = reach_prob * action_prob + + if is_reach_weight_player_node: + reach_weight_player_plays_down_this_line = next_reach_prob > 0 + if not reach_weight_player_plays_down_this_line: + continue + sequence_idx = sequence_idx_offset + action_idx + reach_weights[sequence_idx] += next_reach_prob + + reach_probabilities[player] = next_reach_prob + + action_value = _walk_descendants( + state.child(action), reach_probabilities, chance_reach_probability) + + if is_regret_player_node: + state_value = state_value + action_prob * action_value + else: + state_value = state_value + action_value + action_values[action_idx] = action_value + + reach_probabilities[player] = reach_prob + + if is_regret_player_node: + regrets[sequence_idx_offset:sequence_idx_end] += ( + action_values - state_value) + return state_value + + # End of _walk_descendants + + _walk_descendants(self.root, np.ones(num_players), 1.0) + return regrets, reach_weights + + +def normalized_by_sum(v, axis=0, mutate=False): + """Divides each element of `v` along `axis` by the sum of `v` along `axis`. + + Assumes `v` is non-negative. Sets of `v` elements along `axis` that sum to + zero are normalized to `1 / v.shape[axis]` (a uniform distribution). + + Args: + v: Non-negative array of values. + axis: An integer axis. + mutate: Whether or not to store the result in `v`. + + Returns: + The normalized array. + """ + v = np.asarray(v) + denominator = v.sum(axis=axis, keepdims=True) + denominator_is_zero = denominator == 0 + + # Every element of `denominator_is_zero` that is true corresponds to a + # set of elements in `v` along `axis` that are all zero. By setting these + # denominators to `v.shape[axis]` and adding 1 to each of the corresponding + # elements in `v`, these elements are normalized to `1 / v.shape[axis]` + # (a uniform distribution). + denominator += v.shape[axis] * denominator_is_zero + if mutate: + v += denominator_is_zero + v /= denominator + else: + v = (v + denominator_is_zero) / denominator + return v + + +def relu(v): + """Returns the element-wise maximum between `v` and 0.""" + return np.maximum(v, 0) + + +def _descendant_states(state, depth_limit, depth, include_terminals, + include_chance_states): + """Recursive descendant state generator. + + Decision states are always yielded. + + Args: + state: The current state. + depth_limit: The descendant depth limit. Zero will ensure only + `initial_state` is generated and negative numbers specify the absence of a + limit. + depth: The current descendant depth. + include_terminals: Whether or not to include terminal states. + include_chance_states: Whether or not to include chance states. + + Yields: + `State`, a state that is `initial_state` or one of its descendants. + """ + if state.is_terminal(): + if include_terminals: + yield state + return + + if depth > depth_limit >= 0: + return + + if not state.is_chance_node() or include_chance_states: + yield state + + for action in state.legal_actions(): + state_for_search = state.child(action) + for substate in _descendant_states(state_for_search, depth_limit, depth + 1, + include_terminals, + include_chance_states): + yield substate + + +def all_states(initial_state, + depth_limit=-1, + include_terminals=False, + include_chance_states=False): + """Generates states from `initial_state`. + + Generates the set of states that includes only the `initial_state` and its + descendants that satisfy the inclusion criteria specified by the remaining + parameters. Decision states are always included. + + Args: + initial_state: The initial state from which to generate states. + depth_limit: The descendant depth limit. Zero will ensure only + `initial_state` is generated and negative numbers specify the absence of a + limit. Defaults to no limit. + include_terminals: Whether or not to include terminal states. Defaults to + `False`. + include_chance_states: Whether or not to include chance states. Defaults to + `False`. + + Returns: + A generator that yields the `initial_state` and its descendants that + satisfy the inclusion criteria specified by the remaining parameters. + """ + return _descendant_states( + state=initial_state, + depth_limit=depth_limit, + depth=0, + include_terminals=include_terminals, + include_chance_states=include_chance_states) + + +def sequence_weights_to_tabular_profile(root, policy_fn): + """Returns the `dict` of `list`s of action-prob pairs-form of `policy_fn`.""" + tabular_policy = {} + players = list(range(root.num_players())) + for state in all_states(root): + for player in players: + legal_actions = state.legal_actions(player) + if len(legal_actions) < 1: + continue + info_state = state.information_state_string(player) + if info_state in tabular_policy: + continue + my_policy = policy_fn(state) + tabular_policy[info_state] = list(zip(legal_actions, my_policy)) + return tabular_policy + + +def feedforward_evaluate(layers, + x, + use_skip_connections=False, + hidden_are_factored=False, + hidden_activation=nn.ReLU): + """Evaluates `layers` as a feedforward neural network on `x`. + + Args: + layers: The neural network layers (`torch.Tensor` -> `torch.Tensor` + callables). + x: The array-like input to evaluate. Must be trivially convertible to a + matrix (tensor rank <= 2). + use_skip_connections: Whether or not to use skip connections between layers. + If the layer input has too few features to be added to the layer output, + then the end of input is padded with zeros. If it has too many features, + then the input is truncated. + hidden_are_factored: Whether or not hidden logical layers are factored into + two separate linear transformations stored as adjacent elements of + `layers`. + hidden_activation: the activation function following the hidden layers. + + Returns: + The `torch.Tensor` evaluation result. + + Raises: + ValueError: If `x` has a rank greater than 2. + """ + x = tensor_to_matrix(x) + i = 0 + while i < len(layers) - 1: + if isinstance(layers[i], hidden_activation): + x = layers[i](x) + i += 1 + continue + y = layers[i](x) + i += 1 + if hidden_are_factored: + y = layers[i](y) + i += 1 + if use_skip_connections: + my_num_features = x.shape[1] + padding = y.shape[1] - my_num_features + if padding > 0: + zeros = torch.zeros([x.shape[0], padding]) + x = torch.cat([x, zeros], axis=1) + elif padding < 0: + x = x[0:x.shape[0], 0:y.shape[1]] + y = x + y + x = y + return layers[-1](x) + + +class DeepRcfrModel(nn.Module): + """A flexible deep feedforward RCFR model class. + + Properties: + layers: The `torch.keras.Layer` layers describing this model. + """ + + def __init__(self, + game, + num_hidden_units, + num_hidden_layers=1, + num_hidden_factors=0, + hidden_activation=nn.ReLU, + use_skip_connections=False, + regularizer=None): + """Creates a new `DeepRcfrModel. + + Args: + game: The OpenSpiel game being solved. + num_hidden_units: The number of units in each hidden layer. + num_hidden_layers: The number of hidden layers. Defaults to 1. + num_hidden_factors: The number of hidden factors or the matrix rank of the + layer. If greater than zero, hidden layers will be split into two + separate linear transformations, the first with + `num_hidden_factors`-columns and the second with + `num_hidden_units`-columns. The result is that the logical hidden layer + is a rank-`num_hidden_units` matrix instead of a rank-`num_hidden_units` + matrix. When `num_hidden_units < num_hidden_units`, this is effectively + implements weight sharing. Defaults to 0. + hidden_activation: The activation function to apply over hidden layers. + Defaults to `torch.nn.ReLU`. + use_skip_connections: Whether or not to apply skip connections (layer + output = layer(x) + x) on hidden layers. Zero padding or truncation is + used to match the number of columns on layer inputs and outputs. + regularizer: A regularizer to apply to each layer. Defaults to `None`. + """ + super(DeepRcfrModel, self).__init__() + self._use_skip_connections = use_skip_connections + self._hidden_are_factored = num_hidden_factors > 0 + self._hidden_activation = hidden_activation + input_size = num_features(game) + + self.layers = [] + for _ in range(num_hidden_layers): + if self._hidden_are_factored: + self.layers.append(nn.Linear(input_size, num_hidden_factors, bias=True)) + + self.layers.append( + nn.Linear( + num_hidden_factors if self._hidden_are_factored else input_size, + num_hidden_units, + bias=True)) + if hidden_activation: + self.layers.append(hidden_activation()) + + self.layers.append(nn.Linear(num_hidden_units, 1, bias=True)) + + self.layers = nn.ModuleList(self.layers) + # Construct variables for all layers by exercising the network. + x = torch.zeros([1, num_features(game)]) + for layer in self.layers: + x = layer(x) + + def __call__(self, x): + """Evaluates this model on `x`.""" + return feedforward_evaluate( + layers=self.layers, + x=x, + use_skip_connections=self._use_skip_connections, + hidden_are_factored=self._hidden_are_factored, + hidden_activation=self._hidden_activation) + + +class _RcfrSolver(object): + """An abstract RCFR solver class. + + Requires that subclasses implement `evaluate_and_update_policy`. + """ + + def __init__(self, game, models, truncate_negative=False): + """Creates a new `_RcfrSolver`. + + Args: + game: An OpenSpiel `Game`. + models: Current policy models (optimizable array-like -> `torch.Tensor` + callables) for both players. + truncate_negative: Whether or not to truncate negative (approximate) + cumulative regrets to zero to implement RCFR+. Defaults to `False`. + """ + self._game = game + self._models = models + self._truncate_negative = truncate_negative + self._root_wrapper = RootStateWrapper(game.new_initial_state()) + + self._cumulative_seq_probs = [ + np.zeros(n) for n in self._root_wrapper.num_player_sequences + ] + + def _sequence_weights(self, player=None): + """Returns regret-like weights for each sequence as an `np.array`. + + Negative weights are truncated to zero. + + Args: + player: The player to compute weights for, or both if `player` is `None`. + Defaults to `None`. + """ + if player is None: + return [ + self._sequence_weights(player) + for player in range(self._game.num_players()) + ] + else: + tensor = F.relu( + torch.squeeze(self._models[player]( + self._root_wrapper.sequence_features[player]))) + return tensor.detach().numpy() + + def evaluate_and_update_policy(self, train_fn): + """Performs a single step of policy evaluation and policy improvement. + + Args: + train_fn: A (model, `torch.data.Dataset`) function that trains the given + regression model to accurately reproduce the x to y mapping given x-y + data. + + Raises: + NotImplementedError: If not overridden by child class. + """ + raise NotImplementedError() + + def current_policy(self): + """Returns the current policy profile. + + Returns: + A `dict>` that maps info state + strings to `Action`-probability pairs describing each player's policy. + """ + return self._root_wrapper.sequence_weights_to_tabular_profile( + self._sequence_weights()) + + def average_policy(self): + """Returns the average of all policies iterated. + + This average policy converges toward a Nash policy as the number of + iterations increases as long as the regret prediction error decreases + continually [Morrill, 2016]. + + The policy is computed using the accumulated policy probabilities computed + using `evaluate_and_update_policy`. + + Returns: + A `dict>` that maps info state + strings to (Action, probability) pairs describing each player's policy. + """ + return self._root_wrapper.sequence_weights_to_tabular_profile( + self._cumulative_seq_probs) + + def _previous_player(self, player): + """The previous player in the turn ordering.""" + return player - 1 if player > 0 else self._game.num_players() - 1 + + def _average_policy_update_player(self, regret_player): + """The player for whom the average policy should be updated.""" + return self._previous_player(regret_player) + + +class RcfrSolver(_RcfrSolver): + """RCFR with an effectively infinite regret data buffer. + + Exact or bootstrapped cumulative regrets are stored as if an infinitely + large data buffer. The average strategy is updated and stored in a full + game-size table. Reproduces the RCFR versions used in experiments by + Waugh et al. [2015] and Morrill [2016] except that this class does not + restrict the user to regression tree models. + """ + + def __init__(self, game, models, bootstrap=None, truncate_negative=False): + self._bootstrap = bootstrap + super(RcfrSolver, self).__init__( + game, models, truncate_negative=truncate_negative) + + self._regret_targets = [ + np.zeros(n) for n in self._root_wrapper.num_player_sequences + ] + + def evaluate_and_update_policy(self, train_fn): + """Performs a single step of policy evaluation and policy improvement. + + Args: + train_fn: A (model, `torch.data.Dataset`) function that trains the given + regression model to accurately reproduce the x to y mapping given x-y + data. + """ + sequence_weights = self._sequence_weights() + player_seq_features = self._root_wrapper.sequence_features + for regret_player in range(self._game.num_players()): + seq_prob_player = self._average_policy_update_player(regret_player) + + regrets, seq_probs = ( + self._root_wrapper.counterfactual_regrets_and_reach_weights( + regret_player, seq_prob_player, *sequence_weights)) + + if self._bootstrap: + self._regret_targets[regret_player][:] = sequence_weights[regret_player] + if self._truncate_negative: + regrets = np.maximum(-relu(self._regret_targets[regret_player]), + regrets) + + self._regret_targets[regret_player] += regrets + self._cumulative_seq_probs[seq_prob_player] += seq_probs + + targets = torch.unsqueeze( + torch.Tensor(self._regret_targets[regret_player]), axis=1) + data = torch.utils.data.TensorDataset(player_seq_features[regret_player], + targets) + + regret_player_model = self._models[regret_player] + train_fn(regret_player_model, data) + sequence_weights[regret_player] = self._sequence_weights(regret_player) + + +class ReservoirBuffer(object): + """A generic reservoir buffer data structure. + + After every insertion, its contents represents a `size`-size uniform + random sample from the stream of candidates that have been encountered. + """ + + def __init__(self, size): + self.size = size + self.num_elements = 0 + self._buffer = np.full([size], None, dtype=object) + self._num_candidates = 0 + + @property + def buffer(self): + return self._buffer[:self.num_elements] + + def insert(self, candidate): + """Consider this `candidate` for inclusion in this sampling buffer.""" + self._num_candidates += 1 + if self.num_elements < self.size: + self._buffer[self.num_elements] = candidate + self.num_elements += 1 + return + idx = np.random.choice(self._num_candidates) + if idx < self.size: + self._buffer[idx] = candidate + + def insert_all(self, candidates): + """Consider all `candidates` for inclusion in this sampling buffer.""" + for candidate in candidates: + self.insert(candidate) + + def num_available_spaces(self): + """The number of freely available spaces in this buffer.""" + return self.size - self.num_elements + + +class ReservoirRcfrSolver(_RcfrSolver): + """RCFR with a reservoir buffer for storing regret data. + + The average strategy is updated and stored in a full game-size table. + """ + + def __init__(self, game, models, buffer_size, truncate_negative=False): + self._buffer_size = buffer_size + super(ReservoirRcfrSolver, self).__init__( + game, models, truncate_negative=truncate_negative) + self._reservoirs = [ + ReservoirBuffer(self._buffer_size) for _ in range(game.num_players()) + ] + + def evaluate_and_update_policy(self, train_fn): + """Performs a single step of policy evaluation and policy improvement. + + Args: + train_fn: A (model, `torch.data.Dataset`) function that trains the given + regression model to accurately reproduce the x to y mapping given x-y + data. + """ + sequence_weights = self._sequence_weights() + player_seq_features = self._root_wrapper.sequence_features + for regret_player in range(self._game.num_players()): + seq_prob_player = self._average_policy_update_player(regret_player) + + regrets, seq_probs = ( + self._root_wrapper.counterfactual_regrets_and_reach_weights( + regret_player, seq_prob_player, *sequence_weights)) + + if self._truncate_negative: + regrets = np.maximum(-relu(sequence_weights[regret_player]), regrets) + + next_data = list( + zip(player_seq_features[regret_player], + torch.unsqueeze(torch.Tensor(regrets), axis=1))) + + self._reservoirs[regret_player].insert_all(next_data) + + self._cumulative_seq_probs[seq_prob_player] += seq_probs + + my_buffer = list( + torch.stack(a) for a in zip(*self._reservoirs[regret_player].buffer)) + + data = torch.utils.data.TensorDataset(*my_buffer) + + regret_player_model = self._models[regret_player] + train_fn(regret_player_model, data) + sequence_weights[regret_player] = self._sequence_weights(regret_player) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/rcfr_pytorch_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/rcfr_pytorch_test.py new file mode 100644 index 0000000..1e55e8c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/pytorch/rcfr_pytorch_test.py @@ -0,0 +1,569 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools + +from absl.testing import absltest +from absl.testing import parameterized + +import numpy as np +import torch +import torch.nn as nn +# Note: this import needs to come before Tensorflow to fix a malloc error. +import pyspiel # pylint: disable=g-bad-import-order + +from open_spiel.python.pytorch import rcfr + +_GAME = pyspiel.load_game('kuhn_poker') +_BOOLEANS = [False, True] + +_BATCH_SIZE = 12 +SEED = 24984617 + + +def _new_model(): + return rcfr.DeepRcfrModel( + _GAME, + num_hidden_layers=1, + num_hidden_units=13, + num_hidden_factors=1, + use_skip_connections=True) + + +class RcfrTest(parameterized.TestCase, absltest.TestCase): + + def setUp(self): + # pylint: disable=useless-super-delegation + super(RcfrTest, self).setUp() + + def assertListAlmostEqual(self, list1, list2, delta=1e-06): + self.assertEqual(len(list1), len(list2)) + for a, b in zip(list1, list2): + self.assertAlmostEqual(a, b, delta=delta) + + def test_with_one_hot_action_features_single_state_vector(self): + information_state_features = [1., 2., 3.] + features = rcfr.with_one_hot_action_features( + information_state_features, + legal_actions=[0, 1], + num_distinct_actions=3) + np.testing.assert_array_equal([1., 2., 3., 1., 0., 0.], features[0]) + np.testing.assert_array_equal([1., 2., 3., 0., 1., 0.], features[1]) + + features = rcfr.with_one_hot_action_features( + information_state_features, + legal_actions=[1, 2], + num_distinct_actions=3) + np.testing.assert_array_equal([1., 2., 3., 0., 1., 0.], features[0]) + np.testing.assert_array_equal([1., 2., 3., 0., 0., 1.], features[1]) + + def test_sequence_features(self): + state = _GAME.new_initial_state() + while state.is_chance_node(): + state.apply_action(state.legal_actions()[0]) + assert len(state.legal_actions()) == 2 + features = rcfr.sequence_features(state, 3) + + x = state.information_state_tensor() + np.testing.assert_array_equal(x + [1., 0., 0.], features[0]) + np.testing.assert_array_equal(x + [0., 1., 0.], features[1]) + + def test_num_features(self): + assert rcfr.num_features(_GAME) == 13 + + def test_root_state_wrapper_num_sequences(self): + root_state_wrapper = rcfr.RootStateWrapper(_GAME.new_initial_state()) + assert root_state_wrapper.num_player_sequences[0] == 12 + assert root_state_wrapper.num_player_sequences[1] == 12 + + def test_root_state_wrapper_sequence_indices(self): + root_state_wrapper = rcfr.RootStateWrapper(_GAME.new_initial_state()) + self.assertEqual( + { + # Info state string -> initial sequence index map for player 1. + '0': 0, + '0pb': 2, + '1': 4, + '1pb': 6, + '2': 8, + '2pb': 10, + # Info state string -> initial sequence index map for player 2. + '1p': 0, + '1b': 2, + '2p': 4, + '2b': 6, + '0p': 8, + '0b': 10, + }, + root_state_wrapper.info_state_to_sequence_idx) + + def test_root_state_wrapper_sequence_features(self): + root_state_wrapper = rcfr.RootStateWrapper(_GAME.new_initial_state()) + + p1_info_state_features = [ + [1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.], + [1., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0.], + [1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.], + [1., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0.], + [1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.], + [1., 0., 0., 0., 1., 1., 0., 0., 1., 0., 0.], + ] + p2_info_state_features = [ + [0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0.], + [0., 1., 0., 1., 0., 0., 1., 0., 0., 0., 0.], + [0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0.], + [0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0.], + [0., 1., 1., 0., 0., 1., 0., 0., 0., 0., 0.], + [0., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0.], + ] + action_features = [[1., 0.], [0., 1.]] + expected_p1_sequence_features = [ + p1_info_state_features[0] + action_features[0], + p1_info_state_features[0] + action_features[1], + p1_info_state_features[1] + action_features[0], + p1_info_state_features[1] + action_features[1], + p1_info_state_features[2] + action_features[0], + p1_info_state_features[2] + action_features[1], + p1_info_state_features[3] + action_features[0], + p1_info_state_features[3] + action_features[1], + p1_info_state_features[4] + action_features[0], + p1_info_state_features[4] + action_features[1], + p1_info_state_features[5] + action_features[0], + p1_info_state_features[5] + action_features[1], + ] + expected_p2_sequence_features = [ + p2_info_state_features[0] + action_features[0], + p2_info_state_features[0] + action_features[1], + p2_info_state_features[1] + action_features[0], + p2_info_state_features[1] + action_features[1], + p2_info_state_features[2] + action_features[0], + p2_info_state_features[2] + action_features[1], + p2_info_state_features[3] + action_features[0], + p2_info_state_features[3] + action_features[1], + p2_info_state_features[4] + action_features[0], + p2_info_state_features[4] + action_features[1], + p2_info_state_features[5] + action_features[0], + p2_info_state_features[5] + action_features[1], + ] + np.testing.assert_array_equal(expected_p1_sequence_features, + root_state_wrapper.sequence_features[0]) + np.testing.assert_array_equal(expected_p2_sequence_features, + root_state_wrapper.sequence_features[1]) + + def test_root_state_wrapper_sequence_terminal_values(self): + root_state_wrapper = rcfr.RootStateWrapper(_GAME.new_initial_state()) + + expected_terminal_values = {} + no_call_histories_p1_win = [ + '2, 0, 0, 0', '2, 0, 1, 0', '0, 1, 1, 0', '1, 2, 1, 0', '1, 0, 1, 0', + '1, 0, 0, 0', '2, 1, 1, 0', '2, 1, 0, 0', '0, 2, 1, 0' + ] + for h in no_call_histories_p1_win: + expected_terminal_values[h] = [1., -1.] + + no_call_histories_p2_win = [ + '0, 2, 0, 1, 0', '0, 1, 0, 0', '0, 1, 0, 1, 0', '0, 2, 0, 0', + '1, 2, 0, 0', '2, 0, 0, 1, 0', '1, 2, 0, 1, 0', '2, 1, 0, 1, 0', + '1, 0, 0, 1, 0' + ] + for h in no_call_histories_p2_win: + expected_terminal_values[h] = [-1., 1.] + + call_histories_p1_win = [ + '1, 0, 1, 1', '2, 1, 1, 1', '2, 1, 0, 1, 1', '2, 0, 0, 1, 1', + '1, 0, 0, 1, 1', '2, 0, 1, 1' + ] + for h in call_histories_p1_win: + expected_terminal_values[h] = [2., -2.] + + call_histories_p2_win = [ + '0, 2, 0, 1, 1', '0, 1, 0, 1, 1', '0, 1, 1, 1', '1, 2, 1, 1', + '1, 2, 0, 1, 1', '0, 2, 1, 1' + ] + for h in call_histories_p2_win: + expected_terminal_values[h] = [-2., 2.] + + self.assertEqual( + expected_terminal_values, + {k: v.tolist() for k, v in root_state_wrapper.terminal_values.items()}) + + def test_normalized_by_sum(self): + self.assertListAlmostEqual( + rcfr.normalized_by_sum([1., 2., 3., 4.]), [0.1, 0.2, 0.3, 0.4]) + + def test_counterfactual_regrets_and_reach_weights_value_error(self): + root = rcfr.RootStateWrapper(_GAME.new_initial_state()) + + # Initialize arbitrary weights to generate an arbitrary profile. + sequence_weights1_with_a_missing_sequence = [ + 0.4967141530112327, + 0.0, + 0.6476885381006925, + 1.5230298564080254, + 0.0, + 0.0, + 1.5792128155073915, + 0.7674347291529088, + 0.0, + 0.5425600435859647, + 0.0, + # 0.0, + ] + # Ensure this player's policy is fully mixed so that each of player 1's + # information states are reached. + sequence_weights2 = [ + 0.24196227156603412, + 0.1, + 0.1, + 0.1, + 0.1, + 0.3142473325952739, + 0.1, + 0.1, + 1.465648768921554, + 0.1, + 0.06752820468792384, + 0.1, + ] + + with self.assertRaises(ValueError): + root.counterfactual_regrets_and_reach_weights( + 0, 1, sequence_weights1_with_a_missing_sequence, sequence_weights2) + + def test_counterfactual_regrets_and_reach_weights(self): + root = rcfr.RootStateWrapper(_GAME.new_initial_state()) + + # Initialize arbitrary weights to generate an arbitrary profile. + sequence_weights1 = [ + 0.4967141530112327, + 0.0, + 0.6476885381006925, + 1.5230298564080254, + 0.0, + 0.0, + 1.5792128155073915, + 0.7674347291529088, + 0.0, + 0.5425600435859647, + 0.0, + 0.0, + ] + sequence_weights2 = [ + 0.24196227156603412, + 0.0, + 0.0, + 0.0, + 0.0, + 0.3142473325952739, + 0.0, + 0.0, + 1.465648768921554, + 0.0, + 0.06752820468792384, + 0.0, + ] + + # These expected regrets and sequence weights were computed for the given + # sequence weights. + expected_regrets_given_sequence_weights = [ + 0., + 0.283604, + 0.116937, + -0.049729, + -0.06892, + 0.06892, + 0.054506, + -0.112161, + -0.083333, + 0., + 0., + 0., + ] + expected_reach_weights_given_sequence_weights = [ + 2., + 0., + 1., + 1., + 0., + 2., + 1., + 1., + 2., + 0., + 2., + 0., + ] + + regrets, weights = root.counterfactual_regrets_and_reach_weights( + 0, 1, sequence_weights1, sequence_weights2) + + self.assertListAlmostEqual( + regrets, + expected_regrets_given_sequence_weights) + self.assertListAlmostEqual( + weights, + expected_reach_weights_given_sequence_weights) + + def test_all_states(self): + states = rcfr.all_states( + _GAME.new_initial_state(), + depth_limit=-1, + include_terminals=False, + include_chance_states=False) + self.assertLen(list(states), 24) + + states = rcfr.all_states( + _GAME.new_initial_state(), + depth_limit=-1, + include_terminals=True, + include_chance_states=False) + self.assertLen(list(states), 54) + + states = rcfr.all_states( + _GAME.new_initial_state(), + depth_limit=-1, + include_terminals=False, + include_chance_states=True) + self.assertLen(list(states), 28) + + states = rcfr.all_states( + _GAME.new_initial_state(), + depth_limit=-1, + include_terminals=True, + include_chance_states=True) + self.assertLen(list(states), 58) + + def test_sequence_weights_to_tabular_profile(self): + root = rcfr.RootStateWrapper(_GAME.new_initial_state()) + + def policy_fn(state): + """Generates a policy profile by treating sequence indices as weights.""" + info_state = state.information_state_string() + sequence_offset = root.info_state_to_sequence_idx[info_state] + num_actions = len(state.legal_actions()) + return rcfr.normalized_by_sum( + list(range(sequence_offset, sequence_offset + num_actions))) + + profile = rcfr.sequence_weights_to_tabular_profile(root.root, policy_fn) + + expected_profile = { + # Player 1 + '0': [(0, 0.), (1, 1.)], # Sequences 0 and 1 (sums to 1) + '0pb': [(0, 0.4), (1, 0.6)], # Sequences 2 and 3 (sums to 5) + # Sequences 4 and 5 (sums to 9) + '1': [(0, 0.44444444444444442), (1, 0.55555555555555558)], + # Sequences 6 and 7 (sums to 13) + '1pb': [(0, 0.46153846153846156), (1, 0.53846153846153844)], + # Sequences 8 and 9 (sums to 17) + '2': [(0, 0.47058823529411764), (1, 0.52941176470588236)], + # Sequences 10 and 11 (sums to 21) + '2pb': [(0, 0.47619047619047616), (1, 0.52380952380952384)], + + # Player 2 + '1p': [(0, 0.), (1, 1.)], # Sequences 0 and 1 (sums to 1) + '1b': [(0, 0.4), (1, 0.6)], # Sequences 2 and 3 (sums to 5) + # Sequences 4 and 5 (sums to 9) + '2p': [(0, 0.44444444444444442), (1, 0.55555555555555558)], + # Sequences 6 and 7 (sums to 13) + '2b': [(0, 0.46153846153846156), (1, 0.53846153846153844)], + # Sequences 8 and 9 (sums to 17) + '0p': [(0, 0.47058823529411764), (1, 0.52941176470588236)], + # Sequences 10 and 11 (sums to 21) + '0b': [(0, 0.47619047619047616), (1, 0.52380952380952384)], + } + self.assertAlmostEqual(profile, expected_profile, delta=1e-06) + + def test_cfr(self): + root = rcfr.RootStateWrapper(_GAME.new_initial_state()) + num_half_iterations = 6 + + cumulative_regrets = [np.zeros(n) for n in root.num_player_sequences] + cumulative_reach_weights = [np.zeros(n) for n in root.num_player_sequences] + + average_profile = root.sequence_weights_to_tabular_profile( + cumulative_reach_weights) + # parameterized.TestCase + self.assertGreater(pyspiel.nash_conv(_GAME, average_profile), 0.91) + + regret_player = 0 + for _ in range(num_half_iterations): + reach_weights_player = 1 if regret_player == 0 else 0 + + regrets, reach = root.counterfactual_regrets_and_reach_weights( + regret_player, reach_weights_player, *rcfr.relu(cumulative_regrets)) + + cumulative_regrets[regret_player] += regrets + cumulative_reach_weights[reach_weights_player] += reach + + regret_player = reach_weights_player + + average_profile = root.sequence_weights_to_tabular_profile( + cumulative_reach_weights) + self.assertLess(pyspiel.nash_conv(_GAME, average_profile), 0.27) + + def test_rcfr_functions(self): + models = [_new_model() for _ in range(_GAME.num_players())] + root = rcfr.RootStateWrapper(_GAME.new_initial_state()) + + num_half_iterations = 4 + num_epochs = 100 + + cumulative_regrets = [np.zeros(n) for n in root.num_player_sequences] + cumulative_reach_weights = [np.zeros(n) for n in root.num_player_sequences] + + average_profile = root.sequence_weights_to_tabular_profile( + cumulative_reach_weights) + self.assertGreater(pyspiel.nash_conv(_GAME, average_profile), 0.91) + + regret_player = 0 + sequence_weights = [ + model(root.sequence_features[player]).detach().numpy() + for player, model in enumerate(models) + ] + + for _ in range(num_half_iterations): + reach_weights_player = 1 if regret_player == 0 else 0 + + sequence_weights[reach_weights_player] = models[reach_weights_player]( + root.sequence_features[reach_weights_player]).detach().numpy() + + regrets, seq_probs = root.counterfactual_regrets_and_reach_weights( + regret_player, reach_weights_player, *sequence_weights) + + cumulative_regrets[regret_player] += regrets + cumulative_reach_weights[reach_weights_player] += seq_probs + + data = torch.utils.data.TensorDataset( + root.sequence_features[regret_player], + torch.unsqueeze( + torch.Tensor(cumulative_regrets[regret_player]), axis=1)) + data = torch.utils.data.DataLoader( + data, batch_size=_BATCH_SIZE, shuffle=True) + + loss_fn = nn.SmoothL1Loss() + optimizer = torch.optim.Adam( + models[regret_player].parameters(), lr=0.005, amsgrad=True) + for _ in range(num_epochs): + for x, y in data: + optimizer.zero_grad() + output = models[regret_player](x) + loss = loss_fn(output, y) + loss.backward() + optimizer.step() + + regret_player = reach_weights_player + + average_profile = root.sequence_weights_to_tabular_profile( + cumulative_reach_weights) + self.assertLess(pyspiel.nash_conv(_GAME, average_profile), 0.91) + + @parameterized.parameters(list(itertools.product(_BOOLEANS, _BOOLEANS))) + def test_rcfr(self, bootstrap, truncate_negative): + num_epochs = 100 + num_iterations = 2 + models = [_new_model() for _ in range(_GAME.num_players())] + + patient = rcfr.RcfrSolver( + _GAME, models, bootstrap=bootstrap, truncate_negative=truncate_negative) + + def _train(model, data): + data = torch.utils.data.DataLoader( + data, batch_size=_BATCH_SIZE, shuffle=True) + + loss_fn = nn.SmoothL1Loss() + optimizer = torch.optim.Adam(model.parameters(), lr=0.005, amsgrad=True) + for _ in range(num_epochs): + for x, y in data: + optimizer.zero_grad() + output = model(x) + loss = loss_fn(output, y) + loss.backward() + optimizer.step() + + average_policy = patient.average_policy() + self.assertGreater(pyspiel.nash_conv(_GAME, average_policy), 0.91) + + for _ in range(num_iterations): + patient.evaluate_and_update_policy(_train) + + average_policy = patient.average_policy() + self.assertLess(pyspiel.nash_conv(_GAME, average_policy), 0.91) + + def test_reservior_buffer_insert(self): + buffer_size = 10 + patient = rcfr.ReservoirBuffer(buffer_size) + + x_buffer = [] + for i in range(buffer_size): + patient.insert(i) + x_buffer.append(i) + assert patient.num_elements == len(x_buffer) + np.testing.assert_array_equal(x_buffer, patient.buffer) + + assert patient.num_available_spaces() == 0 + + for i in range(buffer_size): + patient.insert(buffer_size + i) + assert patient.num_elements == buffer_size + + def test_reservior_buffer_insert_all(self): + buffer_size = 10 + patient = rcfr.ReservoirBuffer(buffer_size) + + x_buffer = list(range(buffer_size)) + patient.insert_all(x_buffer) + assert patient.num_elements == buffer_size + np.testing.assert_array_equal(x_buffer, patient.buffer) + + assert patient.num_available_spaces() == 0 + + x_buffer = list(range(buffer_size, 2 * buffer_size)) + patient.insert_all(x_buffer) + assert patient.num_elements == buffer_size + + def test_rcfr_with_buffer(self): + buffer_size = 12 + num_epochs = 100 + num_iterations = 2 + models = [_new_model() for _ in range(_GAME.num_players())] + + patient = rcfr.ReservoirRcfrSolver(_GAME, models, buffer_size=buffer_size) + + def _train(model, data): + data = torch.utils.data.DataLoader( + data, batch_size=_BATCH_SIZE, shuffle=True) + + loss_fn = nn.SmoothL1Loss() + optimizer = torch.optim.Adam(model.parameters(), lr=0.005, amsgrad=True) + for _ in range(num_epochs): + for x, y in data: + optimizer.zero_grad() + output = model(x) + loss = loss_fn(output, y) + loss.backward() + optimizer.step() + + average_policy = patient.average_policy() + self.assertGreater(pyspiel.nash_conv(_GAME, average_policy), 0.91) + + for _ in range(num_iterations): + patient.evaluate_and_update_policy(_train) + + average_policy = patient.average_policy() + self.assertLess(pyspiel.nash_conv(_GAME, average_policy), 0.91) + + +if __name__ == '__main__': + torch.manual_seed(SEED) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/rl_agent.py b/scenarios/bargaining/open_spiel/open_spiel/python/rl_agent.py new file mode 100644 index 0000000..6ef8271 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/rl_agent.py @@ -0,0 +1,63 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Reinforcement Learning (RL) Agent Base for Open Spiel.""" + +import abc +import collections + +StepOutput = collections.namedtuple("step_output", ["action", "probs"]) + + +class AbstractAgent(metaclass=abc.ABCMeta): + """Abstract base class for Open Spiel RL agents.""" + + @abc.abstractmethod + def __init__(self, + player_id, + session=None, + observation_spec=None, + name="agent", + **agent_specific_kwargs): + """Initializes agent. + + Args: + player_id: integer, mandatory. Corresponds to the player position in the + game and is used to index the observation list. + session: optional Tensorflow session. + observation_spec: optional dict containing observation specifications. + name: string. Must be used to scope TF variables. Defaults to `agent`. + **agent_specific_kwargs: optional extra args. + """ + + @abc.abstractmethod + def step(self, time_step, is_evaluation=False): + """Returns action probabilities and chosen action at `time_step`. + + Agents should handle `time_step` and extract the required part of the + `time_step.observations` field. This flexibility enables algorithms which + rely on opponent observations / information, e.g. CFR. + + `is_evaluation` can be used so agents change their behaviour for evaluation + purposes, e.g.: preventing exploration rate decaying during test and + insertion of data to replay buffers. + + Arguments: + time_step: an instance of rl_environment.TimeStep. + is_evaluation: bool indicating whether the step is an evaluation routine, + as opposed to a normal training step. + + Returns: + A `StepOutput` for the current `time_step`. + """ diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/rl_agent_policy.py b/scenarios/bargaining/open_spiel/open_spiel/python/rl_agent_policy.py new file mode 100644 index 0000000..9771d0c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/rl_agent_policy.py @@ -0,0 +1,100 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Joint policy denoted by the RL agents of a game.""" + +from typing import Dict + +from open_spiel.python import policy +from open_spiel.python import rl_agent +from open_spiel.python import rl_environment + + +class JointRLAgentPolicy(policy.Policy): + """Joint policy denoted by the RL agents of a game. + + Given a list of RL agents of players for a game, this class can be used derive + the corresponding (joint) policy. In particular, the distribution over + possible actions will be those that are returned by the step() method of + the RL agents given the state. + """ + + def __init__(self, game, agents: Dict[int, rl_agent.AbstractAgent], + use_observation: bool): + """Initializes the joint RL agent policy. + + Args: + game: The game. + agents: Dictionary of agents keyed by the player IDs. + use_observation: If true then observation tensor will be used as the + `info_state` in the step() calls; otherwise, information state tensor + will be used. See `use_observation` property of + rl_environment.Environment. + """ + player_ids = list(sorted(agents.keys())) + super().__init__(game, player_ids) + self._agents = agents + self._obs = { + "info_state": [None] * game.num_players(), + "legal_actions": [None] * game.num_players() + } + self._use_observation = use_observation + + def action_probabilities(self, state, player_id=None): + if state.is_simultaneous_node(): + assert player_id is not None, "Player ID should be specified." + else: + if player_id is None: + player_id = state.current_player() + else: + assert player_id == state.current_player() + + # Make sure that player_id is an integer and not an enum as it is used to + # index lists. + player_id = int(player_id) + + legal_actions = state.legal_actions(player_id) + + self._obs["current_player"] = player_id + self._obs["info_state"][player_id] = ( + state.observation_tensor(player_id) + if self._use_observation else state.information_state_tensor(player_id)) + self._obs["legal_actions"][player_id] = legal_actions + + info_state = rl_environment.TimeStep( + observations=self._obs, rewards=None, discounts=None, step_type=None) + + p = self._agents[player_id].step(info_state, is_evaluation=True).probs + prob_dict = {action: p[action] for action in legal_actions} + return prob_dict + + +class RLAgentPolicy(JointRLAgentPolicy): + """A policy for a specific agent trained in an RL environment.""" + + def __init__(self, game, agent: rl_agent.AbstractAgent, player_id: int, + use_observation: bool): + """Initializes the RL agent policy. + + Args: + game: The game. + agent: RL agent. + player_id: ID of the player. + use_observation: See JointRLAgentPolicy above. + """ + self._player_id = player_id + super().__init__(game, {player_id: agent}, use_observation) + + def action_probabilities(self, state, player_id=None): + return super().action_probabilities( + state, self._player_id if player_id is None else player_id) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/rl_environment.py b/scenarios/bargaining/open_spiel/open_spiel/python/rl_environment.py new file mode 100644 index 0000000..8a0a748 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/rl_environment.py @@ -0,0 +1,480 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Reinforcement Learning (RL) Environment for Open Spiel. + +This module wraps Open Spiel Python interface providing an RL-friendly API. It +covers both turn-based and simultaneous move games. Interactions between agents +and the underlying game occur mostly through the `reset` and `step` methods, +which return a `TimeStep` structure (see its docstrings for more info). + +The following example illustrates the interaction dynamics. Consider a 2-player +Kuhn Poker (turn-based game). Agents have access to the `observations` (a dict) +field from `TimeSpec`, containing the following members: + * `info_state`: list containing the game information state for each player. The + size of the list always correspond to the number of players. E.g.: + [[0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0]]. + * `legal_actions`: list containing legal action ID lists (one for each player). + E.g.: [[0, 1], [0]], which corresponds to actions 0 and 1 being valid for + player 0 (the 1st player) and action 0 being valid for player 1 (2nd player). + * `current_player`: zero-based integer representing the player to make a move. + +At each `step` call, the environment expects a singleton list with the action +(as it's a turn-based game), e.g.: [1]. This (zero-based) action must correspond +to the player specified at `current_player`. The game (which is at decision +node) will process the action and take as many steps necessary to cover chance +nodes, halting at a new decision or final node. Finally, a new `TimeStep`is +returned to the agent. + +Simultaneous-move games follow analogous dynamics. The only differences is the +environment expects a list of actions, one per player. Note the `current_player` +field is "irrelevant" here, admitting a constant value defined in spiel.h, which +defaults to -2 (module level constant `SIMULTANEOUS_PLAYER_ID`). + +See open_spiel/python/examples/rl_example.py for example usages. +""" + +import collections + +import enum +from absl import logging +import numpy as np + +import pyspiel + +SIMULTANEOUS_PLAYER_ID = pyspiel.PlayerId.SIMULTANEOUS + + +class TimeStep( + collections.namedtuple( + "TimeStep", ["observations", "rewards", "discounts", "step_type"])): + """Returned with every call to `step` and `reset`. + + A `TimeStep` contains the data emitted by a game at each step of interaction. + A `TimeStep` holds an `observation` (list of dicts, one per player), + associated lists of `rewards`, `discounts` and a `step_type`. + + The first `TimeStep` in a sequence will have `StepType.FIRST`. The final + `TimeStep` will have `StepType.LAST`. All other `TimeStep`s in a sequence will + have `StepType.MID. + + Attributes: + observations: a list of dicts containing observations per player. + rewards: A list of scalars (one per player), or `None` if `step_type` is + `StepType.FIRST`, i.e. at the start of a sequence. + discounts: A list of discount values in the range `[0, 1]` (one per player), + or `None` if `step_type` is `StepType.FIRST`. + step_type: A `StepType` enum value. + """ + __slots__ = () + + def first(self): + return self.step_type == StepType.FIRST + + def mid(self): + return self.step_type == StepType.MID + + def last(self): + return self.step_type == StepType.LAST + + def is_simultaneous_move(self): + return self.observations["current_player"] == SIMULTANEOUS_PLAYER_ID + + def current_player(self): + return self.observations["current_player"] + + +class StepType(enum.Enum): + """Defines the status of a `TimeStep` within a sequence.""" + + FIRST = 0 # Denotes the first `TimeStep` in a sequence. + MID = 1 # Denotes any `TimeStep` in a sequence that is not FIRST or LAST. + LAST = 2 # Denotes the last `TimeStep` in a sequence. + + def first(self): + return self is StepType.FIRST + + def mid(self): + return self is StepType.MID + + def last(self): + return self is StepType.LAST + + +# Global pyspiel members +def registered_games(): + return pyspiel.registered_games() + + +class ChanceEventSampler(object): + """Default sampler for external chance events.""" + + def __init__(self, seed=None): + self.seed(seed) + + def seed(self, seed=None): + self._rng = np.random.RandomState(seed) + + def __call__(self, state): + """Sample a chance event in the given state.""" + actions, probs = zip(*state.chance_outcomes()) + return self._rng.choice(actions, p=probs) + + +class ObservationType(enum.Enum): + """Defines what kind of observation to use.""" + OBSERVATION = 0 # Use observation_tensor + INFORMATION_STATE = 1 # Use information_state_tensor + + +class Environment(object): + """Open Spiel reinforcement learning environment class.""" + + def __init__(self, + game, + discount=1.0, + chance_event_sampler=None, + observation_type=None, + include_full_state=False, + mfg_distribution=None, + mfg_population=None, + enable_legality_check=False, + **kwargs): + """Constructor. + + Args: + game: [string, pyspiel.Game] Open Spiel game name or game instance. + discount: float, discount used in non-initial steps. Defaults to 1.0. + chance_event_sampler: optional object with `sample_external_events` method + to sample chance events. + observation_type: what kind of observation to use. If not specified, will + default to INFORMATION_STATE unless the game doesn't provide it. + include_full_state: whether or not to include the full serialized + OpenSpiel state in the observations (sometimes useful for debugging). + mfg_distribution: the distribution over states if the game is a mean field + game. + mfg_population: The Mean Field Game population to consider. + enable_legality_check: Check the legality of the move before stepping. + **kwargs: dict, additional settings passed to the Open Spiel game. + """ + self._chance_event_sampler = chance_event_sampler or ChanceEventSampler() + self._include_full_state = include_full_state + self._mfg_distribution = mfg_distribution + self._mfg_population = mfg_population + self._enable_legality_check = enable_legality_check + + if isinstance(game, str): + if kwargs: + game_settings = {key: val for (key, val) in kwargs.items()} + logging.info("Using game settings: %s", game_settings) + self._game = pyspiel.load_game(game, game_settings) + else: + logging.info("Using game string: %s", game) + self._game = pyspiel.load_game(game) + else: # pyspiel.Game or API-compatible object. + logging.info("Using game instance: %s", game.get_type().short_name) + self._game = game + + self._num_players = self._game.num_players() + self._state = None + self._should_reset = True + + # Discount returned at non-initial steps. + self._discounts = [discount] * self._num_players + + # Determine what observation type to use. + if observation_type is None: + if self._game.get_type().provides_information_state_tensor: + observation_type = ObservationType.INFORMATION_STATE + else: + observation_type = ObservationType.OBSERVATION + + # Check the requested observation type is supported. + if observation_type == ObservationType.OBSERVATION: + if not self._game.get_type().provides_observation_tensor: + raise ValueError(f"observation_tensor not supported by {game}") + elif observation_type == ObservationType.INFORMATION_STATE: + if not self._game.get_type().provides_information_state_tensor: + raise ValueError(f"information_state_tensor not supported by {game}") + self._use_observation = (observation_type == ObservationType.OBSERVATION) + + if self._game.get_type().dynamics == pyspiel.GameType.Dynamics.MEAN_FIELD: + assert mfg_distribution is not None + assert mfg_population is not None + assert 0 <= mfg_population < self._num_players + + def seed(self, seed=None): + self._chance_event_sampler.seed(seed) + + def get_time_step(self): + """Returns a `TimeStep` without updating the environment. + + Returns: + A `TimeStep` namedtuple containing: + observation: list of dicts containing one observations per player, each + corresponding to `observation_spec()`. + reward: list of rewards at this timestep, or None if step_type is + `StepType.FIRST`. + discount: list of discounts in the range [0, 1], or None if step_type is + `StepType.FIRST`. + step_type: A `StepType` value. + """ + observations = { + "info_state": [], + "legal_actions": [], + "current_player": [], + "serialized_state": [] + } + rewards = [] + step_type = StepType.LAST if self._state.is_terminal() else StepType.MID + self._should_reset = step_type == StepType.LAST + + cur_rewards = self._state.rewards() + for player_id in range(self.num_players): + rewards.append(cur_rewards[player_id]) + observations["info_state"].append( + self._state.observation_tensor(player_id) if self._use_observation + else self._state.information_state_tensor(player_id)) + + observations["legal_actions"].append(self._state.legal_actions(player_id)) + observations["current_player"] = self._state.current_player() + discounts = self._discounts + if step_type == StepType.LAST: + # When the game is in a terminal state set the discount to 0. + discounts = [0. for _ in discounts] + + if self._include_full_state: + observations["serialized_state"] = pyspiel.serialize_game_and_state( + self._game, self._state) + + # For gym environments + if hasattr(self._state, "last_info"): + observations["info"] = self._state.last_info + + return TimeStep( + observations=observations, + rewards=rewards, + discounts=discounts, + step_type=step_type) + + def _check_legality(self, actions): + if self.is_turn_based: + legal_actions = self._state.legal_actions() + if actions[0] not in legal_actions: + raise RuntimeError(f"step() called on illegal action {actions[0]}") + else: + for p in range(len(actions)): + legal_actions = self._state.legal_actions(p) + if legal_actions and actions[p] not in legal_actions: + raise RuntimeError(f"step() by player {p} called on illegal " + + f"action: {actions[p]}") + + def step(self, actions): + """Updates the environment according to `actions` and returns a `TimeStep`. + + If the environment returned a `TimeStep` with `StepType.LAST` at the + previous step, this call to `step` will start a new sequence and `actions` + will be ignored. + + This method will also start a new sequence if called after the environment + has been constructed and `reset` has not been called. Again, in this case + `actions` will be ignored. + + Args: + actions: a list containing one action per player, following specifications + defined in `action_spec()`. + + Returns: + A `TimeStep` namedtuple containing: + observation: list of dicts containing one observations per player, each + corresponding to `observation_spec()`. + reward: list of rewards at this timestep, or None if step_type is + `StepType.FIRST`. + discount: list of discounts in the range [0, 1], or None if step_type is + `StepType.FIRST`. + step_type: A `StepType` value. + """ + assert len(actions) == self.num_actions_per_step, ( + "Invalid number of actions! Expected {}".format( + self.num_actions_per_step)) + if self._should_reset: + return self.reset() + + if self._enable_legality_check: + self._check_legality(actions) + + if self.is_turn_based: + self._state.apply_action(actions[0]) + else: + self._state.apply_actions(actions) + self._sample_external_events() + + return self.get_time_step() + + def reset(self): + """Starts a new sequence and returns the first `TimeStep` of this sequence. + + Returns: + A `TimeStep` namedtuple containing: + observations: list of dicts containing one observations per player, each + corresponding to `observation_spec()`. + rewards: list of rewards at this timestep, or None if step_type is + `StepType.FIRST`. + discounts: list of discounts in the range [0, 1], or None if step_type + is `StepType.FIRST`. + step_type: A `StepType` value. + """ + self._should_reset = False + if self._game.get_type( + ).dynamics == pyspiel.GameType.Dynamics.MEAN_FIELD and self._num_players > 1: + self._state = self._game.new_initial_state_for_population( + self._mfg_population) + else: + self._state = self._game.new_initial_state() + self._sample_external_events() + + observations = { + "info_state": [], + "legal_actions": [], + "current_player": [], + "serialized_state": [] + } + for player_id in range(self.num_players): + observations["info_state"].append( + self._state.observation_tensor(player_id) if self._use_observation + else self._state.information_state_tensor(player_id)) + observations["legal_actions"].append(self._state.legal_actions(player_id)) + observations["current_player"] = self._state.current_player() + + if self._include_full_state: + observations["serialized_state"] = pyspiel.serialize_game_and_state( + self._game, self._state) + + return TimeStep( + observations=observations, + rewards=None, + discounts=None, + step_type=StepType.FIRST) + + def _sample_external_events(self): + """Sample chance events until we get to a decision node.""" + while self._state.is_chance_node() or (self._state.current_player() + == pyspiel.PlayerId.MEAN_FIELD): + if self._state.is_chance_node(): + outcome = self._chance_event_sampler(self._state) + self._state.apply_action(outcome) + if self._state.current_player() == pyspiel.PlayerId.MEAN_FIELD: + dist_to_register = self._state.distribution_support() + dist = [ + self._mfg_distribution.value_str(str_state, default_value=0.0) + for str_state in dist_to_register + ] + self._state.update_distribution(dist) + + def observation_spec(self): + """Defines the observation per player provided by the environment. + + Each dict member will contain its expected structure and shape. E.g.: for + Kuhn Poker {"info_state": (6,), "legal_actions": (2,), "current_player": (), + "serialized_state": ()} + + Returns: + A specification dict describing the observation fields and shapes. + """ + return dict( + info_state=tuple([ + self._game.observation_tensor_size() if self._use_observation else + self._game.information_state_tensor_size() + ]), + legal_actions=(self._game.num_distinct_actions(),), + current_player=(), + serialized_state=(), + ) + + def action_spec(self): + """Defines per player action specifications. + + Specifications include action boundaries and their data type. + E.g.: for Kuhn Poker {"num_actions": 2, "min": 0, "max":1, "dtype": int} + + Returns: + A specification dict containing per player action properties. + """ + return dict( + num_actions=self._game.num_distinct_actions(), + min=0, + max=self._game.num_distinct_actions() - 1, + dtype=int, + ) + + # Environment properties + @property + def use_observation(self): + """Returns whether the environment is using the game's observation. + + If false, it is using the game's information state. + """ + return self._use_observation + + # Game properties + @property + def name(self): + return self._game.get_type().short_name + + @property + def num_players(self): + return self._game.num_players() + + @property + def num_actions_per_step(self): + return 1 if self.is_turn_based else self.num_players + + # New RL calls for more advanced use cases (e.g. search + RL). + @property + def is_turn_based(self): + return ((self._game.get_type().dynamics + == pyspiel.GameType.Dynamics.SEQUENTIAL) or + (self._game.get_type().dynamics + == pyspiel.GameType.Dynamics.MEAN_FIELD)) + + @property + def max_game_length(self): + return self._game.max_game_length() + + @property + def is_chance_node(self): + return self._state.is_chance_node() + + @property + def game(self): + return self._game + + def set_state(self, new_state): + """Updates the game state.""" + assert new_state.get_game() == self.game, ( + "State must have been created by the same game.") + self._state = new_state + + @property + def get_state(self): + return self._state + + @property + def mfg_distribution(self): + return self._mfg_distribution + + def update_mfg_distribution(self, mfg_distribution): + """Updates the distribution over the states of the mean field game.""" + assert ( + self._game.get_type().dynamics == pyspiel.GameType.Dynamics.MEAN_FIELD) + self._mfg_distribution = mfg_distribution diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/rl_tools.py b/scenarios/bargaining/open_spiel/open_spiel/python/rl_tools.py new file mode 100644 index 0000000..e59ae57 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/rl_tools.py @@ -0,0 +1,90 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Reinforcement Learning (RL) tools Open Spiel.""" + +import abc + + +class ValueSchedule(metaclass=abc.ABCMeta): + """Abstract base class changing (decaying) values.""" + + @abc.abstractmethod + def __init__(self): + """Initialize the value schedule.""" + + @abc.abstractmethod + def step(self): + """Apply a potential change in the value. + + This method should be called every time the agent takes a training step. + + Returns: + the value after the step. + """ + + @property + @abc.abstractmethod + def value(self): + """Return the current value.""" + + +class ConstantSchedule(ValueSchedule): + """A schedule that keeps the value constant.""" + + def __init__(self, value): + super(ConstantSchedule, self).__init__() + self._value = value + + def step(self): + return self._value + + @property + def value(self): + return self._value + + +class LinearSchedule(ValueSchedule): + """A simple linear schedule.""" + + def __init__(self, init_val, final_val, num_steps): + """A simple linear schedule. + + Once the the number of steps is reached, value is always equal to the final + value. + + Arguments: + init_val: the initial value. + final_val: the final_value + num_steps: the number of steps to get from the initial to final value. + """ + super(LinearSchedule, self).__init__() + self._value = init_val + self._final_value = final_val + assert isinstance(num_steps, int) + self._num_steps = num_steps + self._steps_taken = 0 + self._increment = (final_val - init_val) / num_steps + + def step(self): + self._steps_taken += 1 + if self._steps_taken < self._num_steps: + self._value += self._increment + elif self._steps_taken == self._num_steps: + self._value = self._final_value + return self._value + + @property + def value(self): + return self._value diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/simple_nets.py b/scenarios/bargaining/open_spiel/open_spiel/python/simple_nets.py new file mode 100644 index 0000000..4128e3a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/simple_nets.py @@ -0,0 +1,140 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Simple network classes for Tensorflow based on tf.Module.""" + +import math +import tensorflow.compat.v1 as tf + +# Temporarily disable TF2 behavior until code is updated. +tf.disable_v2_behavior() + +# This code is based directly on the TF docs: +# https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/Module + + +class Linear(tf.Module): + """A simple linear module. + + Always includes biases and only supports ReLU activations. + """ + + def __init__(self, in_size, out_size, activate_relu=True, name=None): + """Creates a linear layer. + + Args: + in_size: (int) number of inputs + out_size: (int) number of outputs + activate_relu: (bool) whether to include a ReLU activation layer + name: (string): the name to give to this layer + """ + + super(Linear, self).__init__(name=name) + self._activate_relu = activate_relu + # Weight initialization inspired by Sonnet's Linear layer, + # which cites https://arxiv.org/abs/1502.03167v3 + stddev = 1.0 / math.sqrt(in_size) + self._weights = tf.Variable( + tf.random.truncated_normal([in_size, out_size], mean=0.0, + stddev=stddev), + name="weights") + self._bias = tf.Variable(tf.zeros([out_size]), name="bias") + + def __call__(self, tensor): + y = tf.matmul(tensor, self._weights) + self._bias + return tf.nn.relu(y) if self._activate_relu else y + + +class Sequential(tf.Module): + """A simple sequential module. + + Always includes biases and only supports ReLU activations. + """ + + def __init__(self, layers, name=None): + """Creates a model from successively applying layers. + + Args: + layers: Iterable[tf.Module] that can be applied. + name: (string): the name to give to this layer + """ + + super(Sequential, self).__init__(name=name) + self._layers = layers + + def __call__(self, tensor): + for layer in self._layers: + tensor = layer(tensor) + return tensor + + +class MLP(tf.Module): + """A simple dense network built from linear layers above.""" + + def __init__(self, + input_size, + hidden_sizes, + output_size, + activate_final=False, + name=None): + """Create the MLP. + + Args: + input_size: (int) number of inputs + hidden_sizes: (list) sizes (number of units) of each hidden layer + output_size: (int) number of outputs + activate_final: (bool) should final layer should include a ReLU + name: (string): the name to give to this network + """ + + super(MLP, self).__init__(name=name) + self._layers = [] + with self.name_scope: + # Hidden layers + for size in hidden_sizes: + self._layers.append(Linear(in_size=input_size, out_size=size)) + input_size = size + # Output layer + self._layers.append( + Linear( + in_size=input_size, + out_size=output_size, + activate_relu=activate_final)) + + @tf.Module.with_name_scope + def __call__(self, x): + for layer in self._layers: + x = layer(x) + return x + + +class MLPTorso(tf.Module): + """A specialized half-MLP module when constructing multiple heads. + + Note that every layer includes a ReLU non-linearity activation. + """ + + def __init__(self, input_size, hidden_sizes, name=None): + super(MLPTorso, self).__init__(name=name) + self._layers = [] + with self.name_scope: + for size in hidden_sizes: + self._layers.append(Linear(in_size=input_size, out_size=size)) + input_size = size + + @tf.Module.with_name_scope + def __call__(self, x): + for layer in self._layers: + x = layer(x) + return x diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/test_utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/test_utils.py new file mode 100644 index 0000000..64f77d9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/test_utils.py @@ -0,0 +1,27 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Useful functions for testing.""" + +from typing import Optional +import numpy as np +import pyspiel + + +def random_playout(state: pyspiel.State, seed: Optional[int] = None): + """Plays random actions until the state is terminal.""" + rng = np.random.RandomState(seed) + while not state.is_terminal(): + state.apply_action(rng.choice(state.legal_actions())) + return state diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/bot_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/bot_test.py new file mode 100644 index 0000000..57786b3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/bot_test.py @@ -0,0 +1,123 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test that Python and C++ bots can be called by a C++ algorithm.""" + +import os +from absl.testing import absltest +import numpy as np + +from open_spiel.python.bots import uniform_random +import pyspiel + +# Specify bot names in alphabetical order, to make it easier to read. +SPIEL_BOTS_LIST = [ + # Chooses actions in a fixed order. + "fixed_action_preference", + + "uniform_random", +] + + +class BotTest(absltest.TestCase): + + def test_python_and_cpp_bot(self): + game = pyspiel.load_game("kuhn_poker") + bots = [ + pyspiel.make_uniform_random_bot(0, 1234), + uniform_random.UniformRandomBot(1, np.random.RandomState(4321)), + ] + results = np.array([ + pyspiel.evaluate_bots(game.new_initial_state(), bots, iteration) + for iteration in range(10000) + ]) + average_results = np.mean(results, axis=0) + np.testing.assert_allclose(average_results, [0.125, -0.125], atol=0.1) + + def test_registered_bots(self): + expected = SPIEL_BOTS_LIST[:] + if os.environ.get("OPEN_SPIEL_BUILD_WITH_ACPC", "OFF") == "ON": + expected.append("uniform_restricted_actions") + self.assertCountEqual(pyspiel.registered_bots(), expected) + + def test_cpp_mcts_bot(self): + game = pyspiel.load_game("tic_tac_toe") + bots = [ + pyspiel.MCTSBot(game, pyspiel.RandomRolloutEvaluator(1, 0), 2.0, + 100, 100, False, 42, False) + ] * 2 + _ = np.array([ + pyspiel.evaluate_bots(game.new_initial_state(), bots, iteration) + for iteration in range(10) + ]) + # Do a search directly, and inspect the values. + state = game.new_initial_state() + search_node = bots[0].mcts_search(state) + for child in search_node.children: + print(f"Child action {child.action}, total reward: {child.total_reward}" + + f", explore count: {child.explore_count}") + # Similar way to achieve the above. + print(f"Children string: {search_node.children_str(state)}") + print(f"Best child: {search_node.best_child().to_string(state)}") + + def test_can_play_game(self): + game = pyspiel.load_game("kuhn_poker") + self.assertIn("uniform_random", pyspiel.bots_that_can_play_game(game)) + + def test_passing_params(self): + game = pyspiel.load_game("tic_tac_toe") + bots = [ + pyspiel.load_bot( + "fixed_action_preference", + game, + player=0, + params={"actions": "0:1:2"}), + pyspiel.load_bot( + "fixed_action_preference", + game, + player=1, + params={"actions": "3:4"}), + ] + result = pyspiel.evaluate_bots(game.new_initial_state(), bots, seed=0) + self.assertEqual(result, [1, -1]) # Player 0 wins. + + def test_roshambo_bot(self): + if hasattr(pyspiel, "make_roshambo_bot"): + game = pyspiel.load_game("repeated_game(stage_game=matrix_rps()," + + "num_repetitions=" + + f"{pyspiel.ROSHAMBO_NUM_THROWS})") + num_players = 2 + bots = [ + pyspiel.make_roshambo_bot(0, "rotatebot", + pyspiel.ROSHAMBO_NUM_THROWS), + pyspiel.make_roshambo_bot(1, "copybot", pyspiel.ROSHAMBO_NUM_THROWS) + ] + state = game.new_initial_state() + for i in range(pyspiel.ROSHAMBO_NUM_THROWS): + joint_action = [-1] * num_players + for p in range(num_players): + joint_action[p] = bots[p].step(state) + state.apply_actions(joint_action) + if i == 0: + # copybot wins the first round + self.assertListEqual(state.returns(), [-1, 1]) + else: + # the rest are a draw + self.assertListEqual(state.rewards(), [0, 0]) + self.assertTrue(state.is_terminal()) + self.assertListEqual(state.returns(), [-1, 1]) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/game_transforms_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/game_transforms_test.py new file mode 100644 index 0000000..69411c3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/game_transforms_test.py @@ -0,0 +1,136 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test Python bindings for game transforms.""" + +from absl.testing import absltest + +import numpy as np + +from open_spiel.python.algorithms import cfr +from open_spiel.python.algorithms import expected_game_score +from open_spiel.python.games import iterated_prisoners_dilemma as ipd # pylint: disable=unused-import +import pyspiel + + +SEED = 1098097 + + +class GameTransformsTest(absltest.TestCase): + + def setUp(self): + super().setUp() + np.random.seed(SEED) + + def test_create_repeated_game(self): + """Test both create_repeated_game function signatures.""" + repeated_game = pyspiel.create_repeated_game("matrix_rps", + {"num_repetitions": 10}) + assert repeated_game.utility_sum() == 0 + state = repeated_game.new_initial_state() + for _ in range(10): + state.apply_actions([0, 0]) + assert state.is_terminal() + + stage_game = pyspiel.load_game("matrix_mp") + repeated_game = pyspiel.create_repeated_game(stage_game, + {"num_repetitions": 5}) + state = repeated_game.new_initial_state() + for _ in range(5): + state.apply_actions([0, 0]) + assert state.is_terminal() + + stage_game = pyspiel.load_game("matrix_pd") + repeated_game = pyspiel.create_repeated_game(stage_game, + {"num_repetitions": 5}) + assert repeated_game.utility_sum() is None + + def test_cached_tree_sim(self): + """Test both create_cached_tree function signatures.""" + for game_name in ["kuhn_poker", "python_tic_tac_toe"]: + cached_tree_game = pyspiel.convert_to_cached_tree( + pyspiel.load_game(game_name)) + assert cached_tree_game.num_players() == 2 + for _ in range(10): + state = cached_tree_game.new_initial_state() + while not state.is_terminal(): + legal_actions = state.legal_actions() + action = np.random.choice(legal_actions) + state.apply_action(action) + self.assertTrue(state.is_terminal()) + + def test_cached_tree_cfr_kuhn(self): + game = pyspiel.load_game("cached_tree(game=kuhn_poker())") + cfr_solver = cfr.CFRSolver(game) + for _ in range(300): + cfr_solver.evaluate_and_update_policy() + average_policy = cfr_solver.average_policy() + average_policy_values = expected_game_score.policy_value( + game.new_initial_state(), [average_policy] * 2) + # 1/18 is the Nash value. See https://en.wikipedia.org/wiki/Kuhn_poker + np.testing.assert_allclose( + average_policy_values, [-1 / 18, 1 / 18], atol=1e-3) + + def test_turn_based_simultaneous_game(self): + tb_game = pyspiel.load_game( + "turn_based_simultaneous_game(game=" + "goofspiel(num_cards=13,players=2,points_order=descending))") + state = tb_game.new_initial_state() + # Play 11 moves. This gets to the decision right before the game is over. + # Because the last action is taken automatically there are only 12 moves + # total. + for _ in range(11): + state.apply_action(state.legal_actions()[0]) + state.apply_action(state.legal_actions()[0]) + sim_state = state.simultaneous_game_state() + assert not state.is_terminal() + assert not sim_state.is_terminal() + # For the last joint action, pull out the simultaneous state from inside the + # wrapper and and apply joint action to it. Both the wrapped state and + # the simultaneous state should be terminal after this. + sim_state.apply_actions([sim_state.legal_actions(0)[0], + sim_state.legal_actions(1)[0]]) + assert state.is_terminal() + assert sim_state.is_terminal() + + def test_turn_based_simultaneous_python_game(self): + tb_game = pyspiel.load_game( + "turn_based_simultaneous_game(game=" + "python_iterated_prisoners_dilemma())" + ) + state = tb_game.new_initial_state() + # Play 10 rounds, then continue. + for _ in range(10): + state.apply_action(state.legal_actions()[0]) + state.apply_action(state.legal_actions()[0]) + if state.is_chance_node(): + state.apply_action(ipd.Chance.CONTINUE) + # Pull out the simultaneous state from inside the wrapper. + sim_state = state.simultaneous_game_state() + assert not state.is_terminal() + assert not sim_state.is_terminal() + sim_state.apply_actions([sim_state.legal_actions(0)[0], + sim_state.legal_actions(1)[0]]) + assert not state.is_terminal() + assert not sim_state.is_terminal() + # Cannot properly check is_chance_node() because the wrapper is still in + # rollout mode, so this would fail: assert state.is_chance_node() + assert sim_state.is_chance_node() + sim_state.apply_action(ipd.Chance.STOP) + assert state.is_terminal() + assert sim_state.is_terminal() + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_bargaining_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_bargaining_test.py new file mode 100644 index 0000000..36b11f9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_bargaining_test.py @@ -0,0 +1,101 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the game-specific functions for bargaining.""" + + +from absl.testing import absltest + +import pyspiel +barg = pyspiel.bargaining + + +class GamesBargainingTest(absltest.TestCase): + + def test_constants(self): + self.assertEqual(barg.NumItemTypes, 3) + self.assertEqual(barg.PoolMinNumItems, 5) + self.assertEqual(barg.PoolMaxNumItems, 7) + self.assertEqual(barg.TotalValueAllItems, 10) + + def test_game_specific_constants(self): + game0 = pyspiel.load_game("bargaining") + self.assertEqual(game0.max_turns(), 10) + self.assertEqual(game0.discount(), 1.0) + self.assertEqual(game0.prob_end(), 0.0) + + game1 = pyspiel.load_game( + "bargaining(max_turns=15,discount=0.9,prob_end=0.1)" + ) + self.assertEqual(game1.max_turns(), 15) + self.assertEqual(game1.discount(), 0.9) + self.assertEqual(game1.prob_end(), 0.1) + + def test_game_mechanism(self): + game = pyspiel.load_game("bargaining") + state = game.new_initial_state() + + # first check the instance matches the true instance + true_instance = [(1, 2, 3), (8, 1, 0), (4, 0, 2)] + state.apply_action(0) + cur_instance = state.instance() + cur_instance = [ + tuple(cur_instance.pool), + tuple(cur_instance.values[0]), + tuple(cur_instance.values[1]) + ] + + for item1, item2 in zip(true_instance, cur_instance): + self.assertEqual(item1, item2) + + # then set a new instance and check it works + all_instances = game.all_instances() + new_instance = all_instances[2] + state.set_instance(new_instance) + new_instance = [ + tuple(new_instance.pool), + tuple(new_instance.values[0]), + tuple(new_instance.values[1]) + ] + cur_instance = state.instance() + cur_instance = [ + tuple(cur_instance.pool), + tuple(cur_instance.values[0]), + tuple(cur_instance.values[1]) + ] + for item1, item2 in zip(cur_instance, new_instance): + self.assertEqual(item1, item2) + + def test_offer_and_instance_map(self): + game = pyspiel.load_game("bargaining") + all_offers = game.all_offers() + for i, offer in enumerate(all_offers): + self.assertEqual(game.get_offer_index(offer), i) + for i, instance in enumerate(game.all_instances()): + self.assertEqual(game.get_instance_index(instance), i) + + def test_get_possible_opponent_values(self): + game = pyspiel.load_game("bargaining") + self.assertEqual( + game.get_possible_opponent_values(0, [1, 2, 3], [8, 1, 0]), + [ + [4, 0, 2], + [7, 0, 1], + [1, 3, 1], + ], + ) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_blackjack_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_blackjack_test.py new file mode 100644 index 0000000..368004d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_blackjack_test.py @@ -0,0 +1,127 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the game-specific functions for chess.""" + + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np + +import pyspiel + +blackjack = pyspiel.blackjack + +NUM_SIM_GAMES = 10 +SEED = 87375711 + + +class GamesBlackjackTest(parameterized.TestCase): + + def test_blackjack_game_sim(self): + game = pyspiel.load_game("blackjack") + for _ in range(NUM_SIM_GAMES): + print("----------------") + print("New game") + print("----------------") + state = game.new_initial_state() + while not state.is_terminal(): + if state.is_chance_node(): + outcomes = state.chance_outcomes() + action_list, prob_list = zip(*outcomes) + action = np.random.choice(action_list, p=prob_list) + print("Chance samples", state.action_to_string(action)) + else: + print("Player turn") + print("My cards:", blackjack.cards_to_strings(state.cards(0))) + print("My best total:", state.get_best_player_total(0)) + print( + "Dealer's cards:", + blackjack.cards_to_strings(state.cards(state.dealer_id())), + ) + print( + "Dealer's best total:", + state.get_best_player_total(state.dealer_id()), + ) + actions = state.legal_actions() + action = np.random.choice(actions) + print("Action chosen:", state.action_to_string(action)) + state.apply_action(action) + print("") + print("Terminal state: ") + print(str(state)) + print("Returns:", state.returns()) + print("") + + def test_card_to_string_conversion(self): + for i in range(52): + self.assertEqual(i, blackjack.get_card_by_string( + blackjack.card_to_string(i))) + + def test_blackjack_three_aces(self): + game = pyspiel.load_game("blackjack") + state = game.new_initial_state() + self.assertTrue(state.is_chance_node()) + # Player's cards + state.apply_action(blackjack.get_card_by_string("D6")) + state.apply_action(blackjack.get_card_by_string("DA")) + # Dealer's cards + state.apply_action(blackjack.get_card_by_string("CQ")) + state.apply_action(blackjack.get_card_by_string("C3")) + # Play starts. + self.assertFalse(state.is_chance_node()) + self.assertListEqual(blackjack.cards_to_strings(state.cards(0)), + ["D6", "DA"]) + self.assertEqual(state.get_best_player_total(0), 17) + state.apply_action(blackjack.HIT) + state.apply_action(blackjack.get_card_by_string("SA")) + self.assertEqual(state.get_best_player_total(0), 18) + self.assertListEqual(blackjack.cards_to_strings(state.cards(0)), + ["D6", "DA", "SA"]) + state.apply_action(blackjack.HIT) + state.apply_action(blackjack.get_card_by_string("CA")) + self.assertEqual(state.get_best_player_total(0), 19) + self.assertListEqual(blackjack.cards_to_strings(state.cards(0)), + ["D6", "DA", "SA", "CA"]) + state.apply_action(blackjack.HIT) + state.apply_action(blackjack.get_card_by_string("C2")) + self.assertEqual(state.get_best_player_total(0), 21) + state.apply_action(blackjack.STAND) + self.assertListEqual(blackjack.cards_to_strings(state.cards(0)), + ["D6", "DA", "SA", "CA", "C2"]) + # Dealer's turn. + # Dealer has a 13, must hit. + self.assertTrue(state.is_chance_node()) + self.assertListEqual( + blackjack.cards_to_strings(state.cards(state.dealer_id())), + ["CQ", "C3"]) + state.apply_action(blackjack.get_card_by_string("HA")) + self.assertEqual(state.get_best_player_total(1), 14) + self.assertTrue(state.is_chance_node()) + self.assertListEqual( + blackjack.cards_to_strings(state.cards(state.dealer_id())), + ["CQ", "C3", "HA"]) + state.apply_action(blackjack.get_card_by_string("S3")) + self.assertEqual(state.get_best_player_total(1), 17) + self.assertListEqual( + blackjack.cards_to_strings(state.cards(state.dealer_id())), + ["CQ", "C3", "HA", "S3"]) + # Dealer must stop on 17. This should be a terminal state. + self.assertTrue(state.is_terminal()) + self.assertEqual(state.returns(), [1.0]) + + +if __name__ == "__main__": + np.random.seed(SEED) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_bridge_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_bridge_test.py new file mode 100644 index 0000000..c96aa2c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_bridge_test.py @@ -0,0 +1,239 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the game-specific functions for bridge.""" + +import random +import timeit + +from absl.testing import absltest +import numpy as np + +import pyspiel + + +class GamesBridgeTest(absltest.TestCase): + + def test_contract_names(self): + game = pyspiel.load_game('bridge(use_double_dummy_result=false)') + self.assertEqual(game.contract_string(0), 'Passed Out') + self.assertEqual(game.contract_string(38), '1SX N') + + def test_possible_contracts(self): + game = pyspiel.load_game('bridge(use_double_dummy_result=false)') + state = game.new_initial_state() + for a in range(52): + state.apply_action(a) + state.apply_action(59) # 1NT - now South cannot declare notrump + state.apply_action(67) # 3H - now West cannot declare hearts + state.apply_action(86) # 7D + state.apply_action(53) # Dbl + possible_contracts = [ + game.contract_string(i) + for i, v in enumerate(state.possible_contracts()) + if v + ] + self.assertCountEqual(possible_contracts, [ + '7DX S', '7DXX S', '7H N', '7HX N', '7HXX N', '7H E', '7HX E', '7HXX E', + '7H S', '7HX S', '7HXX S', '7S N', '7SX N', '7SXX N', '7S E', '7SX E', + '7SXX E', '7S S', '7SX S', '7SXX S', '7S W', '7SX W', '7SXX W', '7N N', + '7NX N', '7NXX N', '7N E', '7NX E', '7NXX E', '7N W', '7NX W', '7NXX W' + ]) + + def test_scoring(self): + game = pyspiel.load_game('bridge') + state = game.new_initial_state() + # S J9873 + # H A7 + # D KT74 + # C KT + # S AKQT S 42 + # H T852 H K63 + # D AQ D 52 + # C Q64 C A98732 + # S 65 + # H QJ94 + # D J9863 + # C J5 + for a in [ + 7, 28, 37, 2, 45, 3, 25, 51, 27, 48, 5, 43, 23, 13, 12, 8, 22, 46, 38, + 26, 9, 20, 36, 34, 32, 11, 29, 35, 44, 1, 10, 14, 39, 4, 19, 40, 50, 6, + 17, 41, 33, 0, 42, 16, 21, 18, 30, 49, 31, 24, 15, 47 + ]: + state.apply_action(a) + score = { + game.contract_string(i): s + for i, s in enumerate(state.score_by_contract()) + } + self.assertEqual(score['3N E'], 100) + self.assertEqual(score['3N W'], -460) + self.assertEqual(score['1N W'], -210) + self.assertEqual(score['3DX S'], -100) + self.assertEqual(score['1CXX E'], -830) + self.assertEqual(score['1CXX W'], -1030) + + def test_score_single_contract(self): + game = pyspiel.load_game('bridge(use_double_dummy_result=false)') + state = game.new_initial_state() + # S T3 + # H QT42 + # D A82 + # C A632 + # S KJ5 S Q7 + # H A965 H KJ8 + # D Q43 D KJT5 + # C T87 C Q954 + # S A98642 + # H 73 + # D 976 + # C KJ + for a in [ + 49, 45, 31, 5, 10, 40, 27, 47, 35, 38, 17, 14, 0, 33, 21, 39, 34, 12, + 22, 41, 1, 13, 36, 9, 4, 46, 11, 32, 2, 37, 29, 30, 7, 8, 19, 24, 16, + 43, 51, 15, 48, 23, 6, 20, 42, 26, 44, 50, 25, 28, 3, 18 + ]: + state.apply_action(a) + cid = { + game.contract_string(i): i for i in range(game.num_possible_contracts()) + } + self.assertEqual(state.score_for_contracts(0, [cid['1H E']]), [-110]) + self.assertEqual( + state.score_for_contracts(1, [cid['1H E'], cid['1H W']]), [110, 80]) + self.assertEqual( + state.score_for_contracts(2, [cid['1H E'], cid['2H E'], cid['3H E']]), + [-110, -110, 50]) + self.assertEqual( + state.score_for_contracts(3, [cid['1H W'], cid['3N W']]), [80, -50]) + self.assertEqual(state.score_for_contracts(0, [cid['1DX N']]), [-300]) + self.assertEqual(state.score_for_contracts(1, [cid['1CXX W']]), [430]) + + def test_benchmark_score_single(self): + game = pyspiel.load_game('bridge(use_double_dummy_result=false)') + state = game.new_initial_state() + for a in [ + 49, 45, 31, 5, 10, 40, 27, 47, 35, 38, 17, 14, 0, 33, 21, 39, 34, 12, + 22, 41, 1, 13, 36, 9, 4, 46, 11, 32, 2, 37, 29, 30, 7, 8, 19, 24, 16, + 43, 51, 15, 48, 23, 6, 20, 42, 26, 44, 50, 25, 28, 3, 18 + ]: + state.apply_action(a) + cid = { + game.contract_string(i): i for i in range(game.num_possible_contracts()) + } + + for contracts in ( + ['1H E'], + ['1H E', '1H W'], + ['1H E', '2H E', '3H E'], + ['1H E', '1CXX W'], + list(cid), + ): + cids = [cid[c] for c in contracts] + def benchmark(cids=cids): + working_state = state.clone() + _ = working_state.score_for_contracts(0, cids) + repeat = 1 + times = np.array(timeit.repeat(benchmark, number=1, repeat=repeat)) + print(f'{contracts} mean {times.mean():.4}s, min {times.min():.4}s') + + def test_public_observation(self): + game = pyspiel.load_game('bridge(use_double_dummy_result=false)') + state = game.new_initial_state() + for a in range(52): + state.apply_action(a) + state.apply_action(52) # Pass + state.apply_action(59) # 1NT + obs = state.public_observation_tensor() + self.assertLen(obs, game.public_observation_tensor_size()) + + def test_private_observation(self): + game = pyspiel.load_game('bridge(use_double_dummy_result=false)') + state = game.new_initial_state() + # S T3 + # H QT42 + # D A82 + # C A632 + # S KJ5 S Q7 + # H A965 H KJ8 + # D Q43 D KJT5 + # C T87 C Q954 + # S A98642 + # H 73 + # D 976 + # C KJ + for a in [ + 49, 45, 31, 5, 10, 40, 27, 47, 35, 38, 17, 14, 0, 33, 21, 39, 34, 12, + 22, 41, 1, 13, 36, 9, 4, 46, 11, 32, 2, 37, 29, 30, 7, 8, 19, 24, 16, + 43, 51, 15, 48, 23, 6, 20, 42, 26, 44, 50, 25, 28, 3, 18 + ]: + state.apply_action(a) + obs = state.private_observation_tensor(0) + self.assertLen(obs, game.private_observation_tensor_size()) + self.assertEqual(obs, [ + 1.0, 1.0, 1.0, 0.0, # C2, D2, H2 + 1.0, 0.0, 0.0, 1.0, # C3, S3 + 0.0, 0.0, 1.0, 0.0, # H4 + 0.0, 0.0, 0.0, 0.0, # No 5s + 1.0, 0.0, 0.0, 0.0, # C6 + 0.0, 0.0, 0.0, 0.0, # No 7s + 0.0, 1.0, 0.0, 0.0, # D8 + 0.0, 0.0, 0.0, 0.0, # No 9s + 0.0, 0.0, 1.0, 1.0, # H10, S10 + 0.0, 0.0, 0.0, 0.0, # No Jacks + 0.0, 0.0, 1.0, 0.0, # HQ + 0.0, 0.0, 0.0, 0.0, # No kings + 1.0, 1.0, 0.0, 0.0 # CA, DA + ]) + + def test_benchmark_observation(self): + game = pyspiel.load_game('bridge(use_double_dummy_result=false)') + + def make_state(): + state = game.new_initial_state() + for _ in range(60): + a = random.choice(state.legal_actions()) + state.apply_action(a) + if state.is_terminal(): break + return state + + batch_size = 16 + obs_shape = [batch_size] + game.observation_tensor_shape() + states = [make_state() for _ in range(batch_size)] + + def make_obs_copy(): + inputs = np.zeros(obs_shape) + for i in range(batch_size): + if not states[i].is_terminal(): + inputs[i, :] = states[i].observation_tensor() + return inputs + + def make_obs_inplace(): + inputs = np.zeros(obs_shape, np.float32) + for i in range(batch_size): + if not states[i].is_terminal(): + states[i].write_observation_tensor(inputs[i]) + return inputs + + repeat = 2 + number = 2 + times = np.array(timeit.repeat(make_obs_copy, number=number, repeat=repeat)) + print(f'OpenSpiel {times.mean():.4}s, min {times.min():.4}s') + times = np.array( + timeit.repeat(make_obs_inplace, number=number, repeat=repeat)) + print(f'In-place {times.mean():.4}s, min {times.min():.4}s') + + np.testing.assert_array_equal(make_obs_copy(), make_obs_inplace()) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_chess_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_chess_test.py new file mode 100644 index 0000000..be599fd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_chess_test.py @@ -0,0 +1,144 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the game-specific functions for chess.""" + + +from absl import flags +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np + +import pyspiel +from open_spiel.python.utils import file_utils + +chess = pyspiel.chess + + +FLAGS = flags.FLAGS + +# From CMakeLists.txt:Python tests are run from the main binary directory which +# will be something like build/python. +flags.DEFINE_string( + "chess960_fens_file", + "../../open_spiel/games/chess/chess960_starting_positions.txt", + "FENs database for chess960", +) + + +class GamesChessTest(parameterized.TestCase): + + def test_bindings_sim(self): + game = pyspiel.load_game("chess") + state = game.new_initial_state() + board = None + while not state.is_terminal(): + print(state) + player = state.current_player() + legal_actions = state.legal_actions() + board = state.board() + for action in legal_actions: + action_str = state.action_to_string(player, action) + move = chess.action_to_move(action, board) + move_from = move.from_square + move_to = move.to_square + decoded_from_to = (f"({move_from.x} {move_from.y}) -> " + + f"({move_to.x} {move_to.y})") + print(f"Legal action: {action_str} decoded from to {decoded_from_to}") + print(f"Move representations: {move.to_string()} | " + + f"{move.to_lan()} | {move.to_san(board)}") + # Now do the reverse mapping from both string representations to check + # that they correspond to this action. + action_from_lan = state.parse_move_to_action(move.to_lan()) + action_from_san = state.parse_move_to_action(move.to_san(board)) + self.assertEqual(action, action_from_lan) + self.assertEqual(action, action_from_san) + action = np.random.choice(legal_actions) + state.apply_action(action) + print(board.to_unicode_string()) + print(board.debug_string()) + print("Moves history:") + print(" ".join([move.to_lan() for move in state.moves_history()])) + self.assertTrue(state.is_terminal()) + + def test_state_from_fen(self): + game = pyspiel.load_game("chess") + fen_string = "8/k1P5/8/1K6/8/8/8/8 w - - 0 1" + state = game.new_initial_state(fen_string) + self.assertEqual(state.board().to_fen(), fen_string) + self.assertEqual(state.num_repetitions(state), 1) + + @parameterized.parameters( + "bbqnnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNNRKR w KQkq - 0 1", + "rnbnkbqr/pppppppp/8/8/8/8/PPPPPPPP/RNBNKBQR w KQkq - 0 1", + "rkrnnqbb/pppppppp/8/8/8/8/PPPPPPPP/RKRNNQBB w KQkq - 0 1", + ) + def test_chess960_sim_specific_fens(self, initial_fen): + game = pyspiel.load_game("chess(chess960=true)") + state = game.new_initial_state(initial_fen) + while not state.is_terminal(): + assert not state.is_chance_node() + legal_actions = state.legal_actions() + action = np.random.choice(legal_actions) + state.apply_action(action) + + def test_chess_action_conversions(self): + game = pyspiel.load_game("chess") + state = game.new_initial_state() + for _ in range(10): + while not state.is_terminal(): + assert not state.is_chance_node() + legal_actions = state.legal_actions() + for action in legal_actions: + move = chess.action_to_move(action, state.board()) + move_uci = move.to_lan() + action_mapped = chess.move_to_action(move, 8) + self.assertEqual( + action, action_mapped, f"Error for action {move_uci}" + ) + action = np.random.choice(legal_actions) + state.apply_action(action) + + def test_chess960_game_sim(self): + fens_filename = file_utils.find_file(FLAGS.chess960_fens_file, 1) + if fens_filename is not None: + print("Found chess960 fens file. Running simulation tests.") + game = pyspiel.load_game( + f"chess(chess960=true,chess960_fens_file={fens_filename})" + ) + for _ in range(10): + state = game.new_initial_state() + assert state.is_chance_node() + outcomes = state.chance_outcomes() + assert len(outcomes) == 960 + action_list, prob_list = zip(*outcomes) + action = np.random.choice(action_list, p=prob_list) + state.apply_action(action) + while not state.is_terminal(): + assert not state.is_chance_node() + legal_actions = state.legal_actions() + for action in legal_actions: + move = chess.action_to_move(action, state.board()) + move_uci = move.to_lan() + action_mapped = chess.move_to_action(move, 8) + self.assertEqual( + action, action_mapped, f"Error for action {move_uci}" + ) + action = np.random.choice(legal_actions) + state.apply_action(action) + + +if __name__ == "__main__": + np.random.seed(87375711) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_euchre_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_euchre_test.py new file mode 100644 index 0000000..5cccc06 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_euchre_test.py @@ -0,0 +1,86 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the game-specific functions for euchre.""" + + +from absl.testing import absltest + +import pyspiel +euchre = pyspiel.euchre + + +class GamesEuchreTest(absltest.TestCase): + + def test_bindings(self): + self.assertEqual(euchre.JACK_RANK, 2) + self.assertEqual(euchre.NUM_SUITS, 4) + self.assertEqual(euchre.NUM_CARDS_PER_SUIT, 6) + self.assertEqual(euchre.NUM_CARDS, 24) + self.assertEqual(euchre.PASS_ACTION, 24) + self.assertEqual(euchre.CLUBS_TRUMP_ACTION, 25) + self.assertEqual(euchre.DIAMONDS_TRUMP_ACTION, 26) + self.assertEqual(euchre.HEARTS_TRUMP_ACTION, 27) + self.assertEqual(euchre.SPADES_TRUMP_ACTION, 28) + self.assertEqual(euchre.GO_ALONE_ACTION, 29) + self.assertEqual(euchre.PLAY_WITH_PARTNER_ACTION, 30) + self.assertEqual(euchre.MAX_BIDS, 8) + self.assertEqual(euchre.NUM_TRICKS, 5) + self.assertEqual(euchre.FULL_HAND_SIZE, 5) + game = pyspiel.load_game('euchre') + state = game.new_initial_state() + self.assertEqual(state.num_cards_dealt(), 0) + self.assertEqual(state.num_cards_played(), 0) + self.assertEqual(state.num_passes(), 0) + self.assertEqual(state.upcard(), pyspiel.INVALID_ACTION) + self.assertEqual(state.discard(), pyspiel.INVALID_ACTION) + self.assertEqual(state.trump_suit(), pyspiel.INVALID_ACTION) + self.assertEqual(state.left_bower(), pyspiel.INVALID_ACTION) + self.assertEqual(state.right_bower(), pyspiel.INVALID_ACTION) + self.assertEqual(state.declarer(), pyspiel.PlayerId.INVALID) + self.assertEqual(state.declarer_partner(), pyspiel.PlayerId.INVALID) + self.assertEqual(state.first_defender(), pyspiel.PlayerId.INVALID) + self.assertEqual(state.second_defender(), pyspiel.PlayerId.INVALID) + self.assertIsNone(state.declarer_go_alone()) + self.assertEqual(state.lone_defender(), pyspiel.PlayerId.INVALID) + self.assertEqual(state.active_players(), [True, True, True, True]) + self.assertEqual(state.dealer(), pyspiel.INVALID_ACTION) + self.assertEqual(state.current_phase(), euchre.Phase.DEALER_SELECTION) + self.assertEqual(state.current_trick_index(), 0) + self.assertEqual(state.card_holder(), [None] * 24) + self.assertEqual(euchre.card_rank(8), euchre.JACK_RANK) + self.assertEqual(euchre.card_rank(8, euchre.Suit.CLUBS), 100) + self.assertEqual(euchre.card_suit(8), euchre.Suit.CLUBS) + self.assertEqual(euchre.card_suit(8, euchre.Suit.SPADES), + euchre.Suit.SPADES) + self.assertEqual(euchre.card_string(8), 'CJ') + trick = state.tricks()[state.current_trick_index()] + self.assertEqual(trick.winning_card(), pyspiel.INVALID_ACTION) + self.assertEqual(trick.led_suit(), euchre.Suit.INVALID_SUIT) + self.assertEqual(trick.trump_suit(), euchre.Suit.INVALID_SUIT) + self.assertFalse(trick.trump_played()) + self.assertEqual(trick.leader(), pyspiel.PlayerId.INVALID) + self.assertEqual(trick.winner(), pyspiel.PlayerId.INVALID) + self.assertEqual(trick.cards(), [pyspiel.INVALID_ACTION]) + trick = state.current_trick() + self.assertEqual(trick.led_suit(), euchre.Suit.INVALID_SUIT) + self.assertEqual(trick.trump_suit(), euchre.Suit.INVALID_SUIT) + self.assertFalse(trick.trump_played()) + self.assertEqual(trick.leader(), pyspiel.PlayerId.INVALID) + self.assertEqual(trick.winner(), pyspiel.PlayerId.INVALID) + self.assertEqual(trick.cards(), [pyspiel.INVALID_ACTION]) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_gin_rummy_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_gin_rummy_test.py new file mode 100644 index 0000000..e63d664 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_gin_rummy_test.py @@ -0,0 +1,110 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the game-specific functions for gin rummy.""" + + +from absl.testing import absltest + +import pyspiel +gin_rummy = pyspiel.gin_rummy + + +class GamesGinRummyTest(absltest.TestCase): + + def test_bindings(self): + # gin_rummy submodule attributes + self.assertEqual(gin_rummy.DEFAULT_NUM_RANKS, 13) + self.assertEqual(gin_rummy.DEFAULT_NUM_SUITS, 4) + self.assertEqual(gin_rummy.DEFAULT_NUM_CARDS, 52) + self.assertEqual(gin_rummy.NUM_PLAYERS, 2) + self.assertEqual(gin_rummy.MAX_POSSIBLE_DEADWOOD, 98) + self.assertEqual(gin_rummy.MAX_NUM_DRAW_UPCARD_ACTIONS, 50) + self.assertEqual(gin_rummy.DEFAULT_HAND_SIZE, 10) + self.assertEqual(gin_rummy.WALL_STOCK_SIZE, 2) + self.assertEqual(gin_rummy.DEFAULT_KNOCK_CARD, 10) + self.assertEqual(gin_rummy.DEFAULT_GIN_BONUS, 25) + self.assertEqual(gin_rummy.DEFAULT_UNDERCUT_BONUS, 25) + self.assertEqual(gin_rummy.DRAW_UPCARD_ACTION, 52) + self.assertEqual(gin_rummy.DRAW_STOCK_ACTION, 53) + self.assertEqual(gin_rummy.PASS_ACTION, 54) + self.assertEqual(gin_rummy.KNOCK_ACTION, 55) + self.assertEqual(gin_rummy.MELD_ACTION_BASE, 56) + self.assertEqual(gin_rummy.NUM_MELD_ACTIONS, 185) + self.assertEqual(gin_rummy.NUM_DISTINCT_ACTIONS, 241) + self.assertEqual(gin_rummy.OBSERVATION_TENSOR_SIZE, 644) + # Game bindings + game = pyspiel.load_game('gin_rummy') + self.assertFalse(game.oklahoma()) + self.assertEqual(game.knock_card(), 10) + # State bindings + state = game.new_initial_state() + self.assertEqual(state.current_phase(), gin_rummy.Phase.DEAL) + self.assertEqual(state.current_player(), pyspiel.PlayerId.CHANCE) + self.assertIsNone(state.upcard()) + self.assertEqual(state.stock_size(), 52) + self.assertEqual(state.hands(), [[], []]) + self.assertEqual(state.discard_pile(), []) + self.assertEqual(state.deadwood(), [0, 0]) + self.assertEqual(state.knocked(), [False, False]) + self.assertEqual(state.pass_on_first_upcard(), [False, False]) + self.assertEqual(state.layed_melds(), [[], []]) + self.assertEqual(state.layoffs(), []) + self.assertFalse(state.finished_layoffs()) + # Utils + utils = gin_rummy.GinRummyUtils(gin_rummy.DEFAULT_NUM_RANKS, + gin_rummy.DEFAULT_NUM_SUITS, + gin_rummy.DEFAULT_HAND_SIZE) + self.assertEqual(utils.card_string(0), 'As') + self.assertEqual(utils.hand_to_string([0, 1, 2]), + '+--------------------------+\n' + '|As2s3s |\n' + '| |\n' + '| |\n' + '| |\n' + '+--------------------------+\n') + self.assertEqual(utils.card_int('As'), 0) + self.assertEqual(utils.card_ints_to_card_strings([0, 1, 2]), + ['As', '2s', '3s']) + self.assertEqual(utils.card_strings_to_card_ints(['As', '2s', '3s']), + [0, 1, 2]) + self.assertEqual(utils.card_value(0), 1) + self.assertEqual(utils.total_card_value([50, 51]), 20) + self.assertEqual(utils.total_card_value([[0, 1], [50, 51]]), 23) + self.assertEqual(utils.card_rank(51), 12) + self.assertEqual(utils.card_suit(51), 3) + self.assertTrue(utils.is_consecutive([0, 1, 2])) + self.assertTrue(utils.is_rank_meld([0, 13, 26])) + self.assertTrue(utils.is_suit_meld([0, 1, 2])) + self.assertEqual(utils.rank_melds([0, 1, 13, 26]), [[0, 13, 26]]) + self.assertEqual(utils.suit_melds([0, 5, 6, 7]), [[5, 6, 7]]) + self.assertEqual(utils.all_melds([0, 5, 6, 7, 13, 26]), + [[0, 13, 26], [5, 6, 7]]) + self.assertEqual(utils.all_meld_groups([0, 5, 6, 7, 13, 26]), + [[[0, 13, 26], [5, 6, 7]], [[5, 6, 7], [0, 13, 26]]]) + self.assertEqual(utils.best_meld_group([0, 5, 6, 7, 13, 26]), + [[0, 13, 26], [5, 6, 7]]) + self.assertEqual(utils.min_deadwood([0, 1, 2], 3), 0) + self.assertEqual(utils.min_deadwood([0, 1, 2]), 0) + self.assertEqual(utils.rank_meld_layoff([0, 13, 26]), 39) + self.assertEqual(utils.suit_meld_layoffs([0, 1, 2]), [3]) + self.assertEqual(utils.legal_melds([0, 1, 2, 3], 10), [65, 66, 109]) + self.assertEqual(utils.legal_discards([0, 1, 2], 10), [0, 1, 2]) + self.assertEqual(utils.all_layoffs([65], [3]), [4]) + self.assertEqual(utils.meld_to_int([0, 1, 2]), 65) + self.assertEqual(utils.int_to_meld[65], [0, 1, 2]) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_sim_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_sim_test.py new file mode 100644 index 0000000..c6b391c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_sim_test.py @@ -0,0 +1,424 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python spiel example.""" + + +import pickle + +from absl import app +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np + +from open_spiel.python import games # pylint: disable=unused-import +from open_spiel.python.algorithms import get_all_states +from open_spiel.python.mfg import games as mfg_games # pylint:disable=unused-import +import pyspiel +from open_spiel.python.utils import file_utils +# TODO(author18): add predator_prey in the list of game tested + +# Put a bound on length of game so test does not timeout. +MAX_ACTIONS_PER_GAME = 1000 + +# All games registered in the main spiel library. +SPIEL_GAMES_LIST = pyspiel.registered_games() + +# All games loadable without parameter values. +SPIEL_LOADABLE_GAMES_LIST = [g for g in SPIEL_GAMES_LIST if g.default_loadable] + +# A list of games to exclude from the general simulation tests. This should +# remain empty, but it is helpful to use while a game is under construction. +SPIEL_EXCLUDE_SIMS_TEST_GAMES_LIST = [] + +# TODO(b/141950198): Stop hard-coding the number of loadable games. +assert len(SPIEL_LOADABLE_GAMES_LIST) >= 38, len(SPIEL_LOADABLE_GAMES_LIST) + +# All simultaneous games. +SPIEL_SIMULTANEOUS_GAMES_LIST = [ + g for g in SPIEL_LOADABLE_GAMES_LIST + if g.dynamics == pyspiel.GameType.Dynamics.SIMULTANEOUS +] +assert len(SPIEL_SIMULTANEOUS_GAMES_LIST) >= 14, len( + SPIEL_SIMULTANEOUS_GAMES_LIST) + +# All multiplayer games. This is a list of (game, num_players) pairs to test. +SPIEL_MULTIPLAYER_GAMES_LIST = [ + # pylint: disable=g-complex-comprehension + (g, p) + for g in SPIEL_LOADABLE_GAMES_LIST + for p in range(max(g.min_num_players, 2), 1 + min(g.max_num_players, 6)) + if g.max_num_players > 2 and g.max_num_players > g.min_num_players and + g.short_name != "tiny_hanabi" # default payoff only works for 2p + # cannot change the number of players without changing other parameters + and g.short_name != "universal_poker" and g.short_name != "scotland_yard" +] +assert len(SPIEL_MULTIPLAYER_GAMES_LIST) >= 35, len( + SPIEL_MULTIPLAYER_GAMES_LIST) + + +class GamesSimTest(parameterized.TestCase): + + def apply_action(self, state, action): + if state.is_simultaneous_node(): + assert isinstance(action, list) + state.apply_actions(action) + else: + state.apply_action(action) + + def apply_action_test_clone(self, state, action): + """Applies the action and tests the clone method if it's implemented.""" + try: + state_clone = state.clone() + except Exception: # pylint: disable=broad-except + self.apply_action(state, action) + return + self.assertEqual(str(state), str(state_clone)) + self.assertEqual(state.history(), state_clone.history()) + self.apply_action(state, action) + self.apply_action(state_clone, action) + self.assertEqual(str(state), str(state_clone)) + self.assertEqual(state.history(), state_clone.history()) + + def serialize_deserialize(self, game, state, check_pyspiel_serialization, + check_pickle_serialization): + # OpenSpiel native serialization + if check_pyspiel_serialization: + ser_str = pyspiel.serialize_game_and_state(game, state) + new_game, new_state = pyspiel.deserialize_game_and_state(ser_str) + self.assertEqual(str(game), str(new_game)) + self.assertEqual(str(state), str(new_state)) + if check_pickle_serialization: + # Pickle serialization + deserialization (of the state). + pickled_state = pickle.dumps(state) + unpickled_state = pickle.loads(pickled_state) + self.assertEqual(str(state), str(unpickled_state)) + + def sim_game( + self, + game, + check_pyspiel_serialization=True, + check_pickle_serialization=True, + make_distribution_fn=( + lambda states: ([1 / len(states)] * len(states) if states else [])) + ): + min_utility = game.min_utility() + max_utility = game.max_utility() + self.assertLess(min_utility, max_utility) + + if check_pickle_serialization: + # Pickle serialization + deserialization (of the game). + pickled_game = pickle.dumps(game) + unpickled_game = pickle.loads(pickled_game) + self.assertEqual(str(game), str(unpickled_game)) + + # Pickle serialization + deserialization (of the game type). + pickled_game_type = pickle.dumps(game.get_type()) + unpickled_game_type = pickle.loads(pickled_game_type) + self.assertEqual(game.get_type(), unpickled_game_type) + + # Get a new state + for state in game.new_initial_states(): + total_actions = 0 + + next_serialize_check = 1 + + while not state.is_terminal() and total_actions <= MAX_ACTIONS_PER_GAME: + total_actions += 1 + + # Serialize/Deserialize is costly. Only do it every power of 2 actions. + if total_actions >= next_serialize_check: + self.serialize_deserialize(game, state, check_pyspiel_serialization, + check_pickle_serialization) + next_serialize_check *= 2 + + # The state can be four different types: chance node, + # mean-field-game node, simultaneous node, or decision node + if state.is_chance_node(): + # Chance node: sample an outcome + outcomes = state.chance_outcomes() + self.assertNotEmpty(outcomes) + action_list, prob_list = zip(*outcomes) + action = np.random.choice(action_list, p=prob_list) + state.apply_action(action) + elif state.is_simultaneous_node(): + # Simultaneous node: sample actions for all players + chosen_actions = [] + for pid in range(game.num_players()): + legal_actions = state.legal_actions(pid) + action = 0 if not legal_actions else np.random.choice(legal_actions) + chosen_actions.append(action) + # Apply the joint action and test cloning states. + self.apply_action_test_clone(state, chosen_actions) + elif state.is_mean_field_node(): + self.assertEqual(game.get_type().dynamics, + pyspiel.GameType.Dynamics.MEAN_FIELD) + state.update_distribution( + make_distribution_fn(state.distribution_support())) + else: + self.assertTrue(state.is_player_node()) + # Decision node: sample action for the single current player + action = np.random.choice(state.legal_actions(state.current_player())) + # Apply action and test state cloning. + self.apply_action_test_clone(state, action) + + # Max sure at least one action was made. + self.assertGreater(total_actions, 0, + "No actions taken in sim of " + str(game)) + + # Either the game is now done, or the maximum actions has been taken. + if state.is_terminal(): + # Check there are no legal actions. + self.assertEmpty(state.legal_actions()) + for player in range(game.num_players()): + self.assertEmpty(state.legal_actions(player)) + # Print utilities for each player. + utilities = state.returns() + # Check that player returns are correct + for player in range(game.num_players()): + self.assertEqual(state.player_return(player), utilities[player]) + # Check that each one is in range + for utility in utilities: + self.assertGreaterEqual(utility, game.min_utility()) + self.assertLessEqual(utility, game.max_utility()) + print("Sim of game {} terminated with {} total actions. Utilities: {}" + .format(game, total_actions, utilities)) + else: + print("Sim of game {} terminated after maximum number of actions {}" + .format(game, MAX_ACTIONS_PER_GAME)) + + @parameterized.named_parameters((game_info.short_name, game_info) + for game_info in SPIEL_LOADABLE_GAMES_LIST) + def test_game_sim(self, game_info): + if game_info.short_name in SPIEL_EXCLUDE_SIMS_TEST_GAMES_LIST: + print(f"{game_info.short_name} is excluded from sim tests. Skipping.") + return + game = pyspiel.load_game(game_info.short_name) + self.assertLessEqual(game_info.min_num_players, game.num_players()) + self.assertLessEqual(game.num_players(), game_info.max_num_players) + self.sim_game(game) + + @parameterized.named_parameters( + (game_info.short_name, game_info) + for game_info in SPIEL_SIMULTANEOUS_GAMES_LIST) + def test_simultaneous_game_as_turn_based(self, game_info): + converted_game = pyspiel.load_game_as_turn_based(game_info.short_name) + self.sim_game(converted_game) + + @parameterized.named_parameters((f"{p}p_{g.short_name}", g, p) + for g, p in SPIEL_MULTIPLAYER_GAMES_LIST) + def test_multiplayer_game(self, game_info, num_players): + if game_info.short_name == "python_mfg_predator_prey": + reward_matrix = np.ones((num_players, num_players)) + # Construct an initial distribution matrix of suitable dimensions. + zero_mat = np.zeros((5, 5)) + pop_1 = zero_mat.copy() + pop_1[0, 0] = 1.0 + pop_1 = pop_1.tolist() + pop_2 = zero_mat.copy() + pop_2[0, -1] = 1.0 + pop_2 = pop_2.tolist() + pop_3 = zero_mat.copy() + pop_3[-1, 0] = 1.0 + pop_3 = pop_3.tolist() + pop_4 = zero_mat.copy() + pop_4[-1, -1] = 1.0 + pop_4 = pop_4.tolist() + pops = [pop_1, pop_2, pop_3, pop_4] + init_distrib = [] + for p in range(num_players): + init_distrib += pops[p%4] + init_distrib = np.array(init_distrib) + dict_args = { + "players": num_players, + "reward_matrix": " ".join(str(v) for v in reward_matrix.flatten()), + "init_distrib": " ".join(str(v) for v in init_distrib.flatten()), + } + elif game_info.short_name == "quoridor" and num_players == 4: + print("Skipping 4P Quoridor in tests as it has known problems.") + print("See https://github.com/google-deepmind/open_spiel/issues/1349") + return + else: + dict_args = {"players": num_players} + game = pyspiel.load_game(game_info.short_name, dict_args) + self.sim_game(game) + + def test_breakthrough(self): + # make a smaller (6x6) board + game = pyspiel.load_game("breakthrough(rows=6,columns=6)") + self.sim_game(game) + + def test_pig(self): + # make a smaller lower win score + game = pyspiel.load_game("pig(players=2,winscore=15)") + self.sim_game(game) + + def test_efg_game(self): + game = pyspiel.load_efg_game(pyspiel.get_sample_efg_data()) + # EFG games loaded directly by string cannot serialize because the game's + # data cannot be passed in via string parameter. + for _ in range(0, 100): + self.sim_game( + game, + check_pyspiel_serialization=False, + check_pickle_serialization=False) + game = pyspiel.load_efg_game(pyspiel.get_kuhn_poker_efg_data()) + for _ in range(0, 100): + self.sim_game( + game, + check_pyspiel_serialization=False, + check_pickle_serialization=False) + # EFG games loaded by file should serialize properly: + filename = file_utils.find_file( + "third_party/open_spiel/games/efg/sample.efg", 2) + if filename is not None: + game = pyspiel.load_game("efg_game(filename=" + filename + ")") + for _ in range(0, 100): + self.sim_game(game) + filename = file_utils.find_file( + "third_party/open_spiel/games/efg/sample.efg", 2) + if filename is not None: + game = pyspiel.load_game("efg_game(filename=" + filename + ")") + for _ in range(0, 100): + self.sim_game(game) + + def test_backgammon_checker_moves(self): + game = pyspiel.load_game("backgammon") + state = game.new_initial_state() + state.apply_action(0) # Roll 12 and X starts + action = state.legal_actions()[0] # First legal action + # X has player id 0. + checker_moves = state.spiel_move_to_checker_moves(0, action) + print("Checker moves:") + for i in range(2): + print("pos {}, num {}, hit? {}".format(checker_moves[i].pos, + checker_moves[i].num, + checker_moves[i].hit)) + action2 = state.checker_moves_to_spiel_move(checker_moves) + self.assertEqual(action, action2) + action3 = state.translate_action(0, 0, True) # 0->2, 0->1 + self.assertEqual(action3, 0) + + def test_backgammon_checker_moves_with_hit_info(self): + game = pyspiel.load_game("backgammon") + state = game.new_initial_state() + while not state.is_terminal(): + if state.is_chance_node(): + outcomes_with_probs = state.chance_outcomes() + action_list, prob_list = zip(*outcomes_with_probs) + action = np.random.choice(action_list, p=prob_list) + state.apply_action(action) + else: + legal_actions = state.legal_actions() + player = state.current_player() + for action in legal_actions: + action_str = state.action_to_string(player, action) + checker_moves = ( + state.augment_with_hit_info( + player, state.spiel_move_to_checker_moves(player, action))) + if checker_moves[0].hit or checker_moves[1].hit: + self.assertGreaterEqual(action_str.find("*"), 0) + else: + self.assertLess(action_str.find("*"), 0) + if action_str.find("*") > 0: + self.assertTrue(checker_moves[0].hit or checker_moves[1].hit) + else: + self.assertTrue(not checker_moves[0].hit and + not checker_moves[1].hit) + action = np.random.choice(legal_actions) + state.apply_action(action) + + def test_leduc_get_and_set_private_cards(self): + game = pyspiel.load_game("leduc_poker") + state = game.new_initial_state() + state.apply_action(0) # give player 0 jack of first suit + state.apply_action(1) # give player 1 jack of second suit + # check that we can retrieve those cards + print(state) + private_cards = state.get_private_cards() + self.assertEqual(private_cards, [0, 1]) + # now give them queens instead, get them again, and check that it worked + state.set_private_cards([2, 3]) + print(state) + private_cards = state.get_private_cards() + self.assertEqual(private_cards, [2, 3]) + + def test_dots_and_boxes_with_notation(self): + game = pyspiel.load_game("dots_and_boxes") + state = game.new_initial_state() + state.apply_action(0) # horizontal 0, 0 + state.apply_action(1) # horizontal 0, 1 + # check that we can retrieve the notiation + dbn = state.dbn_string() + self.assertEqual(dbn, "110000000000") + + def test_spades_get_and_set_scores(self): + game = pyspiel.load_game("spades") + state = game.new_initial_state() + # check that we can retrieve those cards + current_scores = state.get_current_scores() + self.assertEqual(current_scores, [0, 0]) + # now set scores to something else and check again + state.set_current_scores([59, 131]) + current_scores = state.get_current_scores() + self.assertEqual(current_scores, [59, 131]) + + @parameterized.parameters( + {"game_name": "blotto"}, + {"game_name": "goofspiel"}, + {"game_name": "kuhn_poker"}, + {"game_name": "tiny_hanabi"}, + {"game_name": "phantom_ttt"}, + {"game_name": "matrix_rps"}, + {"game_name": "kuhn_poker"}, + ) + def test_restricted_nash_response_test(self, game_name): + rnr_game = pyspiel.load_game( + f"restricted_nash_response(game={game_name}())") + for _ in range(10): + self.sim_game(rnr_game, check_pyspiel_serialization=False, + check_pickle_serialization=False) + + # TODO(author18): find the list of games where it is reasonable to call + # get_all_states + @parameterized.parameters( + {"game_name": "python_mfg_crowd_modelling"}, + {"game_name": "mfg_crowd_modelling"}, + # {"game_name": "mfg_crowd_modelling_2d"}, + {"game_name": "kuhn_poker"}, + {"game_name": "leduc_poker"}, + ) + def test_has_at_least_an_action(self, game_name): + """Check that all population's state have at least one action.""" + game = pyspiel.load_game(game_name) + to_string = ( + lambda s: s.observation_string(pyspiel.PlayerId.DEFAULT_PLAYER_ID)) + states = get_all_states.get_all_states( + game, + depth_limit=-1, + include_terminals=False, + include_chance_states=False, + include_mean_field_states=False, + to_string=to_string) + for state in states.values(): + self.assertNotEmpty(state.legal_actions()) + + +def main(_): + absltest.main() + + +if __name__ == "__main__": + # Necessary to run main via app.run for internal tests. + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_tic_tac_toe_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_tic_tac_toe_test.py new file mode 100644 index 0000000..1c78ec7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/games_tic_tac_toe_test.py @@ -0,0 +1,66 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the game-specific functions for tic_tac_toe.""" + + +from absl.testing import absltest + +import pyspiel +ttt = pyspiel.tic_tac_toe + + +def make_game(): + return pyspiel.load_game("tic_tac_toe") + + +class GamesTicTacToeTest(absltest.TestCase): + + def test_constants(self): + self.assertEqual(ttt.NUM_ROWS, 3) + self.assertEqual(ttt.NUM_COLS, 3) + self.assertEqual(ttt.NUM_CELLS, 9) + self.assertEqual(ttt.CellState.EMPTY.value, 0) + self.assertEqual(ttt.CellState.NOUGHT.value, 1) + self.assertEqual(ttt.CellState.CROSS.value, 2) + + def test_player_to_cellstate(self): + self.assertEqual(ttt.player_to_cellstate(0), + ttt.CellState.CROSS) + self.assertEqual(ttt.player_to_cellstate(1), + ttt.CellState.NOUGHT) + + def test_cellstate_to_string(self): + self.assertEqual(ttt.cellstate_to_string(ttt.CellState.EMPTY), ".") + self.assertEqual(ttt.cellstate_to_string(ttt.CellState.NOUGHT), "o") + self.assertEqual(ttt.cellstate_to_string(ttt.CellState.CROSS), "x") + + def test_board_at(self): + game = make_game() + state = game.new_initial_state() + state.apply_action(4) + self.assertEqual(state.board_at(1, 1), ttt.CellState.CROSS) + + def test_board(self): + game = make_game() + state = game.new_initial_state() + state.apply_action(0) + state.apply_action(1) + self.assertEqual(state.board(), [ + ttt.CellState.CROSS, + ttt.CellState.NOUGHT] + [ttt.CellState.EMPTY] * 7) + +if __name__ == "__main__": + absltest.main() + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/matrix_game_utils_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/matrix_game_utils_test.py new file mode 100644 index 0000000..b6e52a0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/matrix_game_utils_test.py @@ -0,0 +1,68 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests the C++ matrix game utility methods exposed to Python.""" + +from absl.testing import absltest + +from open_spiel.python.algorithms import lp_solver +import pyspiel + + +class MatrixGamesUtilsTest(absltest.TestCase): + + def test_num_deterministic_policies(self): + # Kuhn poker has six information sets with two actions each (2^6 = 64). + game = pyspiel.load_game("kuhn_poker") + self.assertEqual(pyspiel.num_deterministic_policies(game, 0), 64) + self.assertEqual(pyspiel.num_deterministic_policies(game, 1), 64) + # Leduc poker has larger than 2^64 - 1, so -1 will be returned. + game = pyspiel.load_game("leduc_poker") + self.assertEqual(pyspiel.num_deterministic_policies(game, 0), -1) + self.assertEqual(pyspiel.num_deterministic_policies(game, 1), -1) + + def test_extensive_to_matrix_game(self): + kuhn_game = pyspiel.load_game("kuhn_poker") + kuhn_matrix_game = pyspiel.extensive_to_matrix_game(kuhn_game) + unused_p0_strategy, unused_p1_strategy, p0_sol_val, p1_sol_val = ( + lp_solver.solve_zero_sum_matrix_game(kuhn_matrix_game)) + # value from Kuhn 1950 or https://en.wikipedia.org/wiki/Kuhn_poker + self.assertAlmostEqual(p0_sol_val, -1 / 18) + self.assertAlmostEqual(p1_sol_val, +1 / 18) + + def test_extensive_to_matrix_game_type(self): + game = pyspiel.extensive_to_matrix_game(pyspiel.load_game("kuhn_poker")) + game_type = game.get_type() + self.assertEqual(game_type.dynamics, pyspiel.GameType.Dynamics.SIMULTANEOUS) + self.assertEqual(game_type.chance_mode, + pyspiel.GameType.ChanceMode.DETERMINISTIC) + self.assertEqual(game_type.information, + pyspiel.GameType.Information.ONE_SHOT) + self.assertEqual(game_type.utility, pyspiel.GameType.Utility.ZERO_SUM) + + def test_extensive_to_matrix_game_payoff_matrix(self): + turn_based_game = pyspiel.load_game_as_turn_based("matrix_pd") + matrix_game = pyspiel.extensive_to_matrix_game(turn_based_game) + orig_game = pyspiel.load_matrix_game("matrix_pd") + + for row in range(orig_game.num_rows()): + for col in range(orig_game.num_cols()): + for player in range(2): + self.assertEqual( + orig_game.player_utility(player, row, col), + matrix_game.player_utility(player, row, col)) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/mfg_implementation_test/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/mfg_implementation_test/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/mfg_implementation_test/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/mfg_implementation_test/mfg_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/mfg_implementation_test/mfg_test.py new file mode 100644 index 0000000..7f42e0a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/mfg_implementation_test/mfg_test.py @@ -0,0 +1,177 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests that Mean Field Games are implemented properly. + +These tests are intended to help developers to write mean field games that +satisfy most of the unspecified constraints assumed by the following algorithms: +- python/mfg/algorithms/policy_value.py +- python/mfg/algorithms/nash_conv.py +- python/mfg/algorithms/mirror_descent.py +- python/mfg/algorithms/fictitious_play.py +- python/mfg/algorithms/distribution.py +- python/mfg/algorithms/best_response_value.py +- python/rl_environment.py +These tests are not exhaustive and will be updated with time. +""" +import random + +from absl import flags +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python import policy +from open_spiel.python import rl_environment +from open_spiel.python.algorithms import get_all_states +from open_spiel.python.mfg import games as mfg_games # pylint:disable=unused-import +from open_spiel.python.mfg.algorithms import distribution +import pyspiel + +FLAGS = flags.FLAGS + +# Use a small depth limit to keep the length of the test reasonable. +flags.DEFINE_integer( + 'get_all_states_depth_limit', 10, + 'Depth limit of getting all the states (-1 for unlimited)') +flags.DEFINE_integer('rl_env_simulations', 10, + 'Number of simulations for the RL environment tests') + + +def _get_next_states(state, next_states, to_string): + """Extract non-chance states for a subgame into the all_states dict.""" + is_mean_field = state.current_player() == pyspiel.PlayerId.MEAN_FIELD + if state.is_chance_node(): + # Add only if not already present + + for action, _ in state.chance_outcomes(): + next_state = state.child(action) + state_str = to_string(next_state) + if state_str not in next_states: + next_states[state_str] = next_state + + if is_mean_field: + support = state.distribution_support() + next_state = state.clone() + support_length = len(support) + # update with a dummy distribution + next_state.update_distribution( + [1.0 / support_length for _ in range(support_length)]) + state_str = to_string(next_state) + if state_str not in next_states: + next_states[state_str] = next_state + + if int(state.current_player()) >= 0: + for action in state.legal_actions(): + next_state = state.child(action) + state_str = to_string(next_state) + if state_str not in next_states: + next_states[state_str] = next_state + + +def _next_states(states, to_string): + next_states = {} + for state in states: + _get_next_states(state, next_states, to_string) + return set(next_states.keys()), set(next_states.values()) + + +def type_from_states(states): + """Get node type of a list of states and assert they are the same.""" + types = [state.get_type() for state in states] + assert len(set(types)) == 1 + return types[0] + + +class FiniteHorizonTest(parameterized.TestCase): + + @parameterized.parameters( + {'game_name': 'python_mfg_crowd_modelling'}, + {'game_name': 'mfg_crowd_modelling'}, + {'game_name': 'mfg_garnet'}, + {'game_name': 'mfg_crowd_modelling_2d'}, + {'game_name': 'python_mfg_periodic_aversion'}, + {'game_name': 'python_mfg_predator_prey'}, + ) + def test_is_finite_horizon(self, game_name): + """Check that the game has no loop.""" + game = pyspiel.load_game(game_name) + states = set(game.new_initial_states()) + def to_string(s): + return s.observation_string(pyspiel.PlayerId.DEFAULT_PLAYER_ID) + all_states_key = set(to_string(state) for state in states) + while type_from_states(states) != pyspiel.StateType.TERMINAL: + new_states_key, states = _next_states(states, to_string) + self.assertEmpty(all_states_key.intersection(new_states_key)) + all_states_key.update(new_states_key) + + @parameterized.parameters( + {'game_name': 'python_mfg_crowd_modelling'}, + {'game_name': 'mfg_crowd_modelling'}, + {'game_name': 'mfg_garnet'}, + {'game_name': 'mfg_crowd_modelling_2d'}, + {'game_name': 'python_mfg_periodic_aversion'}, + {'game_name': 'python_mfg_predator_prey'}, + ) + def test_has_at_least_an_action(self, game_name): + """Check that all population's state have at least one action.""" + game = pyspiel.load_game(game_name) + def to_string(s): + return s.observation_string(pyspiel.PlayerId.DEFAULT_PLAYER_ID) + states = get_all_states.get_all_states( + game, + depth_limit=FLAGS.get_all_states_depth_limit, + include_terminals=False, + include_chance_states=False, + include_mean_field_states=False, + to_string=to_string) + for state in states.values(): + self.assertNotEmpty(state.legal_actions()) + + @parameterized.parameters( + {'game_name': 'python_mfg_crowd_modelling'}, + {'game_name': 'mfg_crowd_modelling'}, + {'game_name': 'mfg_garnet'}, + {'game_name': 'mfg_crowd_modelling_2d'}, + {'game_name': 'python_mfg_periodic_aversion'}, + {'game_name': 'python_mfg_predator_prey'}, + ) + def test_rl_environment(self, game_name): + """Check that the RL environment runs for a few trajectories.""" + game = pyspiel.load_game(game_name) + uniform_policy = policy.UniformRandomPolicy(game) + mfg_dist = distribution.DistributionPolicy(game, uniform_policy) + + envs = [ + rl_environment.Environment( + game, mfg_distribution=mfg_dist, mfg_population=p) + for p in range(game.num_players()) + ] + for p, env in enumerate(envs): + for _ in range(FLAGS.rl_env_simulations): + time_step = env.reset() + while not time_step.last(): + a = random.choice(time_step.observations['legal_actions'][p]) + time_step = env.step([a]) + + env = envs[0] + self.assertEqual(env.mfg_distribution, mfg_dist) + + # Update the distribution. + new_mfg_dist = distribution.DistributionPolicy(game, uniform_policy) + env.update_mfg_distribution(new_mfg_dist) + self.assertEqual(env.mfg_distribution, new_mfg_dist) + + +if __name__ == '__main__': + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/nfg_game_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/nfg_game_test.py new file mode 100644 index 0000000..7b355ad --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/nfg_game_test.py @@ -0,0 +1,65 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests the C++ nfg_game methods exposed to Python.""" + +from absl.testing import absltest + +import pyspiel + + +class NFGGameTest(absltest.TestCase): + + def test_pd(self): + pd_nfg_string = ("""NFG 1 R "OpenSpiel export of matrix_pd()" +{ "Player 0" "Player 1" } { 2 2 } + +5 5 +10 0 +0 10 +1 1 +""") + game = pyspiel.load_nfg_game(pd_nfg_string) + # First (row) player utilities (player, row, col) + self.assertEqual(game.player_utility(0, 0, 0), 5) + self.assertEqual(game.player_utility(0, 1, 0), 10) + self.assertEqual(game.player_utility(0, 0, 1), 0) + self.assertEqual(game.player_utility(0, 1, 1), 1) + # Now, second (column) player + self.assertEqual(game.player_utility(1, 0, 0), 5) + self.assertEqual(game.player_utility(1, 1, 0), 0) + self.assertEqual(game.player_utility(1, 0, 1), 10) + self.assertEqual(game.player_utility(1, 1, 1), 1) + + def test_native_export_import(self): + """Check that we can import games that we've exported. + + We do not do any additional checking here, as these methods are already + being extensively tested in nfg_test.cc. The purpose of this test is only + to check that the python wrapping works. + """ + game_strings = [ + "matrix_rps", "matrix_shapleys_game", "matrix_pd", "matrix_sh", + "blotto(players=2,coins=5,fields=3)", + "blotto(players=3,coins=5,fields=3)" + ] + for game_string in game_strings: + game = pyspiel.load_game(game_string) + nfg_text = pyspiel.game_to_nfg_string(game) + nfg_game = pyspiel.load_nfg_game(nfg_text) + self.assertIsNotNone(nfg_game) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/nfg_writer_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/nfg_writer_test.py new file mode 100644 index 0000000..2ecda86 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/nfg_writer_test.py @@ -0,0 +1,74 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests the C++ nfg_writer methods exposed to Python.""" + +from absl.testing import absltest + +import pyspiel + + +class NFGWriterTest(absltest.TestCase): + + def test_rps(self): + expected_rps_nfg = ("""NFG 1 R "OpenSpiel export of matrix_rps()" +{ "Player 0" "Player 1" } { 3 3 } + +0 0 +1 -1 +-1 1 +-1 1 +0 0 +1 -1 +1 -1 +-1 1 +0 0 +""") + game = pyspiel.load_game("matrix_rps") + nfg_text = pyspiel.game_to_nfg_string(game) + self.assertEqual(nfg_text, expected_rps_nfg) + + def test_pd(self): + expected_pd_nfg = ("""NFG 1 R "OpenSpiel export of matrix_pd()" +{ "Player 0" "Player 1" } { 2 2 } + +5 5 +10 0 +0 10 +1 1 +""") + game = pyspiel.load_game("matrix_pd") + nfg_text = pyspiel.game_to_nfg_string(game) + self.assertEqual(nfg_text, expected_pd_nfg) + + def test_mp3p(self): + expected_mp3p_nfg = ("""NFG 1 R "OpenSpiel export of matching_pennies_3p()" +{ "Player 0" "Player 1" "Player 2" } { 2 2 2 } + +1 1 -1 +-1 1 1 +-1 -1 -1 +1 -1 1 +1 -1 1 +-1 -1 -1 +-1 1 1 +1 1 -1 +""") + game = pyspiel.load_game("matching_pennies_3p") + nfg_text = pyspiel.game_to_nfg_string(game) + self.assertEqual(nfg_text, expected_mp3p_nfg) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/observation_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/observation_test.py new file mode 100644 index 0000000..8cb2a40 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/observation_test.py @@ -0,0 +1,205 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for third_party.open_spiel.python.observation.""" + +import collections +import random +import time + +from absl.testing import absltest +import numpy as np + +from open_spiel.python.algorithms import get_all_states +from open_spiel.python.observation import INFO_STATE_OBS_TYPE +from open_spiel.python.observation import make_observation +import pyspiel + + +class ObservationTest(absltest.TestCase): + + def test_leduc_observation(self): + game = pyspiel.load_game("leduc_poker") + observation = make_observation(game) + state = game.new_initial_state() + state.apply_action(1) # Deal 1 + state.apply_action(2) # Deal 2 + state.apply_action(2) # Bet + state.apply_action(1) # Call + state.apply_action(3) # Deal 3 + observation.set_from(state, player=0) + np.testing.assert_array_equal( + observation.tensor, [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 3, 3]) + self.assertEqual( + list(observation.dict), + ["player", "private_card", "community_card", "pot_contribution"]) + np.testing.assert_array_equal(observation.dict["player"], [1, 0]) + np.testing.assert_array_equal(observation.dict["private_card"], + [0, 1, 0, 0, 0, 0]) + np.testing.assert_array_equal(observation.dict["community_card"], + [0, 0, 0, 1, 0, 0]) + np.testing.assert_array_equal(observation.dict["pot_contribution"], [3, 3]) + self.assertEqual( + observation.string_from(state, 0), + "[Observer: 0][Private: 1][Round 2][Player: 0][Pot: 6]" + "[Money: 97 97][Public: 3][Ante: 3 3]") + + def test_leduc_info_state(self): + game = pyspiel.load_game("leduc_poker") + observation = make_observation(game, INFO_STATE_OBS_TYPE) + state = game.new_initial_state() + state.apply_action(1) # Deal 1 + state.apply_action(2) # Deal 2 + state.apply_action(2) # Bet + state.apply_action(1) # Call + state.apply_action(3) # Deal 3 + observation.set_from(state, player=0) + np.testing.assert_array_equal(observation.tensor, [ + 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 + ]) + self.assertEqual( + list(observation.dict), + ["player", "private_card", "community_card", "betting"]) + np.testing.assert_array_equal(observation.dict["player"], [1, 0]) + np.testing.assert_array_equal(observation.dict["private_card"], + [0, 1, 0, 0, 0, 0]) + np.testing.assert_array_equal(observation.dict["community_card"], + [0, 0, 0, 1, 0, 0]) + np.testing.assert_array_equal( + observation.dict["betting"], + [ + [[0, 1], [1, 0], [0, 0], [0, 0]], # First round + [[0, 0], [0, 0], [0, 0], [0, 0]], # Second round + ]) + self.assertEqual( + observation.string_from(state, 0), + "[Observer: 0][Private: 1][Round 2][Player: 0][Pot: 6]" + "[Money: 97 97][Public: 3][Round1: 2 1][Round2: ]") + + def test_leduc_info_state_as_single_tensor(self): + game = pyspiel.load_game("leduc_poker") + observation = make_observation( + game, INFO_STATE_OBS_TYPE, + pyspiel.game_parameters_from_string("single_tensor")) + state = game.new_initial_state() + state.apply_action(1) # Deal 1 + state.apply_action(2) # Deal 2 + state.apply_action(2) # Bet + state.apply_action(1) # Call + state.apply_action(3) # Deal 3 + observation.set_from(state, player=0) + self.assertEqual(list(observation.dict), ["info_state"]) + np.testing.assert_array_equal(observation.dict["info_state"], [ + 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 + ]) + + def test_leduc_all_player_privates(self): + game = pyspiel.load_game("leduc_poker") + observation = make_observation( + game, + pyspiel.IIGObservationType( + perfect_recall=True, + private_info=pyspiel.PrivateInfoType.ALL_PLAYERS)) + state = game.new_initial_state() + state.apply_action(1) # Deal 1 + state.apply_action(2) # Deal 2 + state.apply_action(2) # Bet + state.apply_action(1) # Call + state.apply_action(3) # Deal 3 + observation.set_from(state, player=0) + np.testing.assert_array_equal(observation.dict["private_cards"], [ + [0, 1, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0], + ]) + + def test_benchmark_state_generation(self): + # Generate trajectories to test on + game = pyspiel.load_game("chess") + trajectories = [] + for _ in range(20): + state = game.new_initial_state() + while not state.is_terminal(): + state.apply_action(random.choice(state.legal_actions())) + trajectories.append(state.history()) + + # New API + total = 0 + observation = make_observation(game) + start = time.time() + for trajectory in trajectories: + state = game.new_initial_state() + for action in trajectory: + state.apply_action(action) + observation.set_from(state, 0) + total += np.mean(observation.tensor) + end = time.time() + print("New API time per iteration " + f"{1000*(end-start)/len(trajectories)}msec") + + # Old API + total = 0 + start = time.time() + for trajectory in trajectories: + state = game.new_initial_state() + for action in trajectory: + state.apply_action(action) + obs = state.observation_tensor(0) + tensor = np.asarray(obs) + total += np.mean(tensor) + end = time.time() + print("Old API time per iteration " + f"{1000*(end-start)/len(trajectories)}msec") + + def test_compression_binary(self): + # All infostates for leduc are binary, so we can compress them effectively. + game = pyspiel.load_game("leduc_poker") + obs1 = make_observation(game, INFO_STATE_OBS_TYPE) + obs2 = make_observation(game, INFO_STATE_OBS_TYPE) + self.assertLen(obs1.tensor, 30) # 30 floats = 120 bytes + for state in get_all_states.get_all_states(game).values(): + for player in range(game.num_players()): + obs1.set_from(state, player) + compressed = obs1.compress() + self.assertEqual(type(compressed), bytes) + self.assertLen(compressed, 5) + obs2.decompress(compressed) + np.testing.assert_array_equal(obs1.tensor, obs2.tensor) + + def test_compression_none(self): + # Most observations for leduc have non-binary data, so we can't + # currently compress them. + game = pyspiel.load_game("leduc_poker") + obs1 = make_observation(game) + obs2 = make_observation(game) + self.assertLen(obs1.tensor, 16) # 16 floats = 64 bytes + freq = collections.Counter() + for state in get_all_states.get_all_states(game).values(): + for player in range(game.num_players()): + obs1.set_from(state, player) + compressed = obs1.compress() + self.assertEqual(type(compressed), bytes) + freq[len(compressed)] += 1 + obs2.decompress(compressed) + np.testing.assert_array_equal(obs1.tensor, obs2.tensor) + expected_freq = { + 3: 840, # Compressible states take 3 bytes + 65: 17760, # Uncompressible states take 65 bytes + } + self.assertEqual(freq, expected_freq) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/policy_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/policy_test.py new file mode 100644 index 0000000..9071b36 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/policy_test.py @@ -0,0 +1,685 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np + +from open_spiel.python import games # pylint: disable=unused-import +from open_spiel.python import policy +from open_spiel.python.algorithms import get_all_states +import pyspiel + + +SEED = 187461917 + +_TIC_TAC_TOE_STATES = [ + { + # ... + # xoo + # ..x + "state": "3, 4, 8, 5", + "legal_actions": (0, 1, 2, 6, 7) + }, + { + # xo. + # oxx + # o.. + "state": "4, 1, 0, 3, 5, 6", + "legal_actions": (2, 7, 8) + }, + { + # ... + # ... + # ... + "state": "", + "legal_actions": (0, 1, 2, 3, 4, 5, 6, 7, 8) + } +] + + +class DerivedPolicyTest(absltest.TestCase): + + def test_derive_from_policy(self): + class DerivedPolicy(pyspiel.Policy): + + def action_probabilities(self, state): + return {0: 0.1, 1: 0.9} + + def get_state_policy(self, infostate): + return {10: 0.9, 11: 0.1} + + policy_obj = DerivedPolicy() + self.assertEqual(DerivedPolicy.__bases__, (pyspiel.Policy,)) + self.assertIsInstance(policy_obj, pyspiel.Policy) + self.assertEqual( + {0: 0.1, 1: 0.9}, + policy_obj.action_probabilities( + pyspiel.load_game("kuhn_poker").new_initial_state() + ), + ) + self.assertEqual( + {0: 0.1, 1: 0.9}, policy_obj.action_probabilities("some infostate") + ) + self.assertEqual( + {10: 0.9, 11: 0.1}, policy_obj.get_state_policy("some infostate") + ) + with self.assertRaises(RuntimeError): + policy_obj.serialize() + + def test_cpp_policy_from_py(self): + class DerivedPolicy(pyspiel.Policy): + + def action_probabilities(self, state): + return {0: 0.0, 1: 0.0} + + def get_state_policy(self, infostate): + return [(2, 0.0), (3, 0.0)] + + def get_state_policy_as_parallel_vectors(self, state): + if isinstance(state, str): + return [4, 5], [0, 0] + else: + return [6, 7], [0, 0] + + def serialize(self, precision, delim): + return f"Serialized string, {precision=}, {delim=}" + + policy_obj = DerivedPolicy() + self.assertEqual( + {0: 0.0, 1: 0.0}, + pyspiel._policy_trampoline_testing.call_action_probabilities( + policy_obj, pyspiel.load_game("kuhn_poker").new_initial_state() + ), + ) + self.assertEqual( + {0: 0.0, 1: 0.0}, + pyspiel._policy_trampoline_testing.call_action_probabilities( + policy_obj, "some infostate"), + ) + self.assertEqual( + [(2, 0.0), (3, 0.0)], + pyspiel._policy_trampoline_testing.call_get_state_policy( + policy_obj, pyspiel.load_game("kuhn_poker").new_initial_state() + ), + ) + self.assertEqual( + [(2, 0.0), (3, 0.0)], + pyspiel._policy_trampoline_testing.call_get_state_policy( + policy_obj, "some infostate"), + ) + self.assertEqual( + ([4, 5], [0, 0]), + pyspiel._policy_trampoline_testing.call_get_state_policy_as_parallel_vectors( + policy_obj, "some infostate"), + ) + self.assertEqual( + ([6, 7], [0, 0]), + pyspiel._policy_trampoline_testing.call_get_state_policy_as_parallel_vectors( + policy_obj, pyspiel.load_game("kuhn_poker").new_initial_state() + ), + ) + self.assertEqual( + pyspiel._policy_trampoline_testing.call_serialize(policy_obj, 3, "!?"), + "Serialized string, precision=3, delim='!?'", + ) + + +def test_policy_on_game(self, game, policy_object, player=-1): + """Checks the policy conforms to the conventions. + + Checks the Policy.action_probabilities contains only legal actions (but not + necessarily all). + Checks that the probabilities are positive and sum to 1. + + Args: + self: The Test class. This methid targets as being used as a utility + function to test policies. + game: A `pyspiel.Game`, same as the one used in the policy. + policy_object: A `policy.Policy` object on `game`. to test. + player: Restrict testing policy to a player. + """ + + all_states = get_all_states.get_all_states( + game, + depth_limit=-1, + include_terminals=False, + include_chance_states=False, + to_string=lambda s: s.information_state_string()) + + for state in all_states.values(): + legal_actions = set(state.legal_actions()) + action_probabilities = policy_object.action_probabilities(state) + + for action in action_probabilities.keys(): + # We want a clearer error message to be able to debug. + actions_missing = set(legal_actions) - set(action_probabilities.keys()) + illegal_actions = set(action_probabilities.keys()) - set(legal_actions) + self.assertIn( + action, + legal_actions, + msg="The action {} is present in the policy but is not a legal " + "actions (these are {})\n" + "Legal actions missing from policy: {}\n" + "Illegal actions present in policy: {}".format( + action, legal_actions, actions_missing, illegal_actions)) + + sum_ = 0 + for prob in action_probabilities.values(): + sum_ += prob + self.assertGreaterEqual(prob, 0) + if player < 0 or state.current_player() == player: + self.assertAlmostEqual(1, sum_) + else: + self.assertAlmostEqual(0, sum_) + + +_LEDUC_POKER = pyspiel.load_game("leduc_poker") + + +class CommonTest(parameterized.TestCase): + + @parameterized.parameters([ + policy.TabularPolicy(_LEDUC_POKER), + policy.UniformRandomPolicy(_LEDUC_POKER), + policy.FirstActionPolicy(_LEDUC_POKER), + ]) + def test_policy_on_leduc(self, policy_object): + test_policy_on_game(self, _LEDUC_POKER, policy_object) + + @parameterized.named_parameters([ + ("pyspiel.UniformRandomPolicy", + pyspiel.UniformRandomPolicy(_LEDUC_POKER)), + ("pyspiel.GetRandomPolicy", + pyspiel.GetRandomPolicy(_LEDUC_POKER, 1)), + ("pyspiel.GetFlatDirichletPolicy", + pyspiel.GetFlatDirichletPolicy(_LEDUC_POKER, 1)), + ("pyspiel.GetRandomDeterministicPolicy", + pyspiel.GetRandomDeterministicPolicy(_LEDUC_POKER, 1)), + ]) + def test_cpp_policies_on_leduc(self, policy_object): + test_policy_on_game(self, _LEDUC_POKER, policy_object) + + @parameterized.named_parameters([ + ("pyspiel.GetRandomPolicy0", + pyspiel.GetRandomPolicy(_LEDUC_POKER, 1, 0), 0), + ("pyspiel.GetFlatDirichletPolicy1", + pyspiel.GetFlatDirichletPolicy(_LEDUC_POKER, 1, 1), 1), + ("pyspiel.GetRandomDeterministicPolicym1", + pyspiel.GetRandomDeterministicPolicy(_LEDUC_POKER, 1, -1), -1), + ]) + def test_cpp_player_policies_on_leduc(self, policy_object, player): + test_policy_on_game(self, _LEDUC_POKER, policy_object, player) + + +class TabularTicTacToePolicyTest(parameterized.TestCase): + + # Enumerating all the states for tic tac toe is quite slow, so we do this + # ony once. + @classmethod + def setUpClass(cls): + super(TabularTicTacToePolicyTest, cls).setUpClass() + cls.game = pyspiel.load_game("tic_tac_toe") + cls.tabular_policy = policy.TabularPolicy(cls.game) + + def test_policy_shape(self): + # Tic tac toe has 4520 decision states; ref + # https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.5b00324 + # There are 9 possible moves in the game (one per grid cell). + # However, the TabularPolicy uses InformationState as keys, which in the + # case of TicTacToe corresponds to the number of unique sequences (due to + # perfect recall) requires by several algorithms, i.e. CFR. + self.assertEqual(self.tabular_policy.action_probability_array.shape, + (294778, 9)) + + def test_policy_attributes(self): + # Verify the base class attributes of the policy + self.assertEqual(self.tabular_policy.player_ids, [0, 1]) + + @parameterized.parameters(*_TIC_TAC_TOE_STATES) + def test_policy_at_state(self, state, legal_actions): + index = self.tabular_policy.state_lookup[state] + prob = 1 / len(legal_actions) + np.testing.assert_array_equal( + self.tabular_policy.action_probability_array[index], + [prob if action in legal_actions else 0 for action in range(9)]) + + @parameterized.parameters(*_TIC_TAC_TOE_STATES) + def test_legal_actions_at_state(self, state, legal_actions): + index = self.tabular_policy.state_lookup[state] + np.testing.assert_array_equal( + self.tabular_policy.legal_actions_mask[index], + [1 if action in legal_actions else 0 for action in range(9)]) + + def test_call_for_state(self): + state = self.game.new_initial_state() + state.apply_action(3) + state.apply_action(4) + state.apply_action(5) + state.apply_action(6) + state.apply_action(7) + self.assertEqual( + self.tabular_policy.action_probabilities(state), { + 0: 0.25, + 1: 0.25, + 2: 0.25, + 8: 0.25 + }) + + def test_states_ordered_by_player(self): + max_player0_index = max( + self.tabular_policy.state_lookup[state] + for state in self.tabular_policy.states_per_player[0]) + min_player1_index = min( + self.tabular_policy.state_lookup[state] + for state in self.tabular_policy.states_per_player[1]) + self.assertEqual(max_player0_index + 1, min_player1_index) + + def test_state_in(self): + # Per state, we have 9 cells each with 3 possible states (o, x, empty) + # Tic tac toe has 4520 decision states, but the tabular policy indexes by + # InformationState, which leads to a larger number due to perfect recall + self.assertEqual(self.tabular_policy.state_in.shape, (294778, 27)) + + @parameterized.parameters(*_TIC_TAC_TOE_STATES) + def test_policy_for_state_string(self, state, legal_actions): + prob = 1 / len(legal_actions) + np.testing.assert_array_equal( + self.tabular_policy.policy_for_key(state), + [prob if action in legal_actions else 0 for action in range(9)]) + + +class TabularPolicyTest(parameterized.TestCase): + + def test_update_elementwise(self): + game = pyspiel.load_game("kuhn_poker") + tabular_policy = policy.TabularPolicy(game) + state = "0pb" + np.testing.assert_array_equal( + tabular_policy.policy_for_key(state), [0.5, 0.5]) + tabular_policy.policy_for_key(state)[0] = 0.9 + tabular_policy.policy_for_key(state)[1] = 0.1 + np.testing.assert_array_equal( + tabular_policy.policy_for_key(state), [0.9, 0.1]) + + def test_update_slice(self): + game = pyspiel.load_game("kuhn_poker") + tabular_policy = policy.TabularPolicy(game) + state = "2b" + np.testing.assert_array_equal( + tabular_policy.policy_for_key(state), [0.5, 0.5]) + tabular_policy.policy_for_key(state)[:] = [0.8, 0.2] + np.testing.assert_array_equal( + tabular_policy.policy_for_key(state), [0.8, 0.2]) + + def test_state_ordering_is_deterministic(self): + game = pyspiel.load_game("kuhn_poker") + tabular_policy = policy.TabularPolicy(game) + + expected = { + "0": 0, + "0pb": 1, + "1": 2, + "1pb": 3, + "2": 4, + "2pb": 5, + "1p": 6, + "1b": 7, + "2p": 8, + "2b": 9, + "0p": 10, + "0b": 11, + } + self.assertEqual(expected, tabular_policy.state_lookup) + + def test_partial_tabular_policy_empty_uniform(self): + """Tests that a partial tabular policy works for an empty policy.""" + game = pyspiel.load_game("kuhn_poker") + # python tabular policy is initialized to uniform + python_tabular_policy = policy.TabularPolicy(game) + partial_pyspiel_policy = pyspiel.PartialTabularPolicy() + self.assertNotEmpty(python_tabular_policy.state_lookup) + all_states = get_all_states.get_all_states(game, + depth_limit=-1, + include_terminals=False, + include_chance_states=False, + include_mean_field_states=False) + self.assertNotEmpty(all_states) + for _, state in all_states.items(): + tabular_probs = python_tabular_policy.action_probabilities(state) + state_policy = partial_pyspiel_policy.get_state_policy(state) + self.assertLen(state_policy, 2) + for a, p in state_policy: + self.assertAlmostEqual(p, tabular_probs[a]) + + def test_partial_tabular_policy_set_full(self): + """Tests the partial tabular policy works for a complete policy.""" + game = pyspiel.load_game("kuhn_poker") + # python tabular policy is initialized to uniform + python_tabular_policy = policy.TabularPolicy(game) + partial_pyspiel_policy = pyspiel.PartialTabularPolicy() + self.assertNotEmpty(python_tabular_policy.state_lookup) + all_states = get_all_states.get_all_states(game, + depth_limit=-1, + include_terminals=False, + include_chance_states=False, + include_mean_field_states=False) + self.assertNotEmpty(all_states) + policy_dict = python_tabular_policy.to_dict() + partial_pyspiel_policy = pyspiel.PartialTabularPolicy(policy_dict) + for _, state in all_states.items(): + tabular_probs = python_tabular_policy.action_probabilities(state) + state_policy = partial_pyspiel_policy.get_state_policy(state) + self.assertLen(state_policy, 2) + for a, p in state_policy: + self.assertAlmostEqual(p, tabular_probs[a]) + + def test_partial_tabular_policy_override_fallback(self): + """Tests the partial tabular policy for a truly partial policy. + + Specifically: assigns a full policy, overrides some entries, and + removes others. Checks that the overridden ones return correctly and that + the missing ones return the fallback. + """ + game = pyspiel.load_game("kuhn_poker") + # python tabular policy is initialized to uniform + python_tabular_policy = policy.TabularPolicy(game) + partial_pyspiel_policy = pyspiel.PartialTabularPolicy() + self.assertNotEmpty(python_tabular_policy.state_lookup) + all_states = get_all_states.get_all_states(game, + depth_limit=-1, + include_terminals=False, + include_chance_states=False, + include_mean_field_states=False) + self.assertNotEmpty(all_states) + policy_dict = python_tabular_policy.to_dict() + partial_pyspiel_policy = pyspiel.PartialTabularPolicy(policy_dict) + perturbed_policy_dict = {} + for key in policy_dict: + if np.random.uniform() < 0.5: + perturbed_policy_dict[key] = [(0, 1.0)] + partial_pyspiel_policy = pyspiel.PartialTabularPolicy(perturbed_policy_dict) + for _, state in all_states.items(): + infostate_key = state.information_state_string() + state_policy = partial_pyspiel_policy.get_state_policy(state) + if infostate_key in perturbed_policy_dict: + self.assertLen(state_policy, 1) + self.assertAlmostEqual(state_policy[0][1], 1.0) + else: + tabular_probs = python_tabular_policy.action_probabilities(state) + for a, p in state_policy: + self.assertAlmostEqual(p, tabular_probs[a]) + + def test_states(self): + game = pyspiel.load_game("leduc_poker") + tabular_policy = policy.TabularPolicy(game) + i = 0 + for state in tabular_policy.states: + self.assertEqual(i, tabular_policy.state_index(state)) + i += 1 + + self.assertEqual(936, i) + + @parameterized.parameters((policy.FirstActionPolicy, "kuhn_poker"), + (policy.UniformRandomPolicy, "kuhn_poker"), + (policy.FirstActionPolicy, "leduc_poker"), + (policy.UniformRandomPolicy, "leduc_poker")) + def test_can_turn_policy_into_tabular_policy(self, policy_class, game_name): + game = pyspiel.load_game(game_name) + realized_policy = policy_class(game) + tabular_policy = realized_policy.to_tabular() + for state in tabular_policy.states: + self.assertEqual( + realized_policy.action_probabilities(state), + tabular_policy.action_probabilities(state)) + + +class TabularRockPaperScissorsPolicyTest(absltest.TestCase): + + # Enumerating all the states for rock-paper-scissors is fast, but + # we initialize only once for consistency with slower games. + @classmethod + def setUpClass(cls): + super(TabularRockPaperScissorsPolicyTest, cls).setUpClass() + game = pyspiel.load_game_as_turn_based("matrix_rps") + cls.tabular_policy = policy.TabularPolicy(game) + + def test_policy_attributes(self): + # Verify the base class attributes of the policy + self.assertEqual(self.tabular_policy.player_ids, [0, 1]) + + def test_tabular_policy(self): + # Test that the tabular policy is uniform random in each state. + np.testing.assert_array_equal( + self.tabular_policy.action_probability_array, + [[1 / 3, 1 / 3, 1 / 3], [1 / 3, 1 / 3, 1 / 3]]) + + def test_states_lookup(self): + # Test that there are two valid states, indexed as 0 and 1. + game = pyspiel.load_game_as_turn_based("matrix_rps") + state = game.new_initial_state() + first_info_state = state.information_state_string() + state.apply_action(state.legal_actions()[0]) + second_info_state = state.information_state_string() + self.assertCountEqual(self.tabular_policy.state_lookup, + [first_info_state, second_info_state]) + self.assertCountEqual(self.tabular_policy.state_lookup.values(), [0, 1]) + + def test_legal_actions_mask(self): + # Test that all actions are valid in all states. + np.testing.assert_array_equal(self.tabular_policy.legal_actions_mask, + [[1, 1, 1], [1, 1, 1]]) + + +class UniformRandomPolicyTest(absltest.TestCase): + + def test_policy_attributes(self): + game = pyspiel.load_game("tiny_bridge_4p") + uniform_random_policy = policy.UniformRandomPolicy(game) + self.assertEqual(uniform_random_policy.player_ids, [0, 1, 2, 3]) + + def test_policy_at_state(self): + game = pyspiel.load_game("tic_tac_toe") + uniform_random_policy = policy.UniformRandomPolicy(game) + state = game.new_initial_state() + state.apply_action(2) + state.apply_action(4) + state.apply_action(6) + state.apply_action(8) + self.assertEqual( + uniform_random_policy.action_probabilities(state), { + 0: 0.2, + 1: 0.2, + 3: 0.2, + 5: 0.2, + 7: 0.2 + }) + + def test_players_have_different_legal_actions(self): + game = pyspiel.load_game("oshi_zumo") + uniform_random_policy = policy.UniformRandomPolicy(game) + state = game.new_initial_state() + state.apply_actions([46, 49]) + # Started with 50 coins each, now have 4 and 1 respectively + self.assertEqual( + uniform_random_policy.action_probabilities(state, player_id=0), { + 0: 0.2, + 1: 0.2, + 2: 0.2, + 3: 0.2, + 4: 0.2 + }) + self.assertEqual( + uniform_random_policy.action_probabilities(state, player_id=1), { + 0: 0.5, + 1: 0.5 + }) + + +class MergeTabularPoliciesTest(absltest.TestCase): + + def test_identity(self): + num_players = 2 + game = pyspiel.load_game("kuhn_poker", {"players": num_players}) + + tabular_policies = [ # Policy limited to player. + policy.TabularPolicy(game, players=(player,)) + for player in range(num_players) + ] + for player, tabular_policy in enumerate(tabular_policies): + tabular_policy.action_probability_array[:] = 0 + tabular_policy.action_probability_array[:, player] = 1.0 + + merged_tabular_policy = policy.merge_tabular_policies( + tabular_policies, game) + + self.assertIdentityPoliciesEqual(tabular_policies, merged_tabular_policy, + game) + + def test_identity_redundant(self): + num_players = 2 + game = pyspiel.load_game("kuhn_poker", {"players": num_players}) + + tabular_policies = [ # Policy for all players. + policy.TabularPolicy(game, players=None) + for player in range(num_players) + ] + for player, tabular_policy in enumerate(tabular_policies): + tabular_policy.action_probability_array[:] = 0 + tabular_policy.action_probability_array[:, player] = 1.0 + + merged_tabular_policy = policy.merge_tabular_policies( + tabular_policies, game) + + self.assertIdentityPoliciesEqual(tabular_policies, merged_tabular_policy, + game) + + def test_identity_missing(self): + num_players = 2 + game = pyspiel.load_game("kuhn_poker", {"players": num_players}) + + tabular_policies = [ # Only first player (repeated). + policy.TabularPolicy(game, players=(0,)) + for player in range(num_players) + ] + for player, tabular_policy in enumerate(tabular_policies): + tabular_policy.action_probability_array[:] = 0 + tabular_policy.action_probability_array[:, player] = 1.0 + + merged_tabular_policy = policy.merge_tabular_policies( + tabular_policies, game) + + for player in range(game.num_players()): + if player == 0: + self.assertListEqual(tabular_policies[player].states_per_player[player], + merged_tabular_policy.states_per_player[player]) + for p_state in merged_tabular_policy.states_per_player[player]: + to_index = merged_tabular_policy.state_lookup[p_state] + from_index = tabular_policies[player].state_lookup[p_state] + self.assertTrue( + np.allclose( + merged_tabular_policy.action_probability_array[to_index], + tabular_policies[player].action_probability_array[from_index]) + ) + + self.assertTrue( + np.allclose( + merged_tabular_policy.action_probability_array[to_index, + player], 1)) + else: + # Missing players have uniform policy. + self.assertEmpty(tabular_policies[player].states_per_player[player]) + for p_state in merged_tabular_policy.states_per_player[player]: + to_index = merged_tabular_policy.state_lookup[p_state] + self.assertTrue( + np.allclose( + merged_tabular_policy.action_probability_array[to_index, + player], 0.5)) + + def assertIdentityPoliciesEqual(self, tabular_policies, merged_tabular_policy, + game): + for player in range(game.num_players()): + self.assertListEqual(tabular_policies[player].states_per_player[player], + merged_tabular_policy.states_per_player[player]) + + for p_state in merged_tabular_policy.states_per_player[player]: + to_index = merged_tabular_policy.state_lookup[p_state] + from_index = tabular_policies[player].state_lookup[p_state] + self.assertTrue( + np.allclose( + merged_tabular_policy.action_probability_array[to_index], + tabular_policies[player].action_probability_array[from_index])) + + self.assertTrue( + np.allclose( + merged_tabular_policy.action_probability_array[to_index, + player], 1)) + + +class JointActionProbTest(absltest.TestCase): + + def test_joint_action_probabilities(self): + """Test expected behavior of joint_action_probabilities.""" + game = pyspiel.load_game("python_iterated_prisoners_dilemma") + uniform_policy = policy.UniformRandomPolicy(game) + joint_action_probs = policy.joint_action_probabilities( + game.new_initial_state(), uniform_policy) + self.assertCountEqual( + list(joint_action_probs), [ + ((0, 0), 0.25), + ((1, 1), 0.25), + ((1, 0), 0.25), + ((0, 1), 0.25), + ]) + + def test_joint_action_probabilities_failure_on_seq_game(self): + """Test failure of child on sequential games.""" + game = pyspiel.load_game("kuhn_poker") + with self.assertRaises(AssertionError): + list(policy.joint_action_probabilities( + game.new_initial_state(), policy.UniformRandomPolicy(game))) + + +class ChildTest(absltest.TestCase): + + def test_child_function_expected_behavior_for_seq_game(self): + """Test expected behavior of child on sequential games.""" + game = pyspiel.load_game("tic_tac_toe") + initial_state = game.new_initial_state() + action = 3 + new_state = policy.child(initial_state, action) + self.assertNotEqual(new_state.history(), initial_state.history()) + expected_new_state = initial_state.child(action) + self.assertNotEqual(new_state, expected_new_state) + self.assertEqual(new_state.history(), expected_new_state.history()) + + def test_child_function_expected_behavior_for_sim_game(self): + """Test expected behavior of child on simultaneous games.""" + game = pyspiel.load_game("python_iterated_prisoners_dilemma") + parameter_state = game.new_initial_state() + actions = [1, 1] + new_state = policy.child(parameter_state, actions) + self.assertEqual(str(new_state), ("p0:D p1:D")) + + def test_child_function_failure_behavior_for_sim_game(self): + """Test failure behavior of child on simultaneous games.""" + game = pyspiel.load_game("python_iterated_prisoners_dilemma") + parameter_state = game.new_initial_state() + with self.assertRaises(AssertionError): + policy.child(parameter_state, 0) + + +if __name__ == "__main__": + np.random.seed(SEED) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/pyspiel_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/pyspiel_test.py new file mode 100644 index 0000000..87dc7d9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/pyspiel_test.py @@ -0,0 +1,346 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""General tests for pyspiel python bindings.""" + +import os +from absl.testing import absltest + +from open_spiel.python import games # pylint: disable=unused-import +from open_spiel.python import policy +from open_spiel.python.mfg import games as mfgs # pylint: disable=unused-import +import pyspiel + +# Specify game names in alphabetical order, to make the test easier to read. +EXPECTED_GAMES = frozenset([ + "2048", + "add_noise", + "amazons", + "backgammon", + "bargaining", + "battleship", + "blackjack", + "blotto", + "breakthrough", + "bridge", + "bridge_uncontested_bidding", + "cached_tree", + "catch", + "chat_game", # python game locating in python/games/chat_games/ + "checkers", + "chess", + "cliff_walking", + "clobber", + "coin_game", + "colored_trails", + "connect_four", + "coop_box_pushing", + "coop_to_1p", + "coordinated_mp", + "crazy_eights", + "cribbage", + "cursor_go", + "dark_chess", + "dark_hex", + "dark_hex_ir", + "deep_sea", + "dots_and_boxes", + "dou_dizhu", + "efg_game", + "einstein_wurfelt_nicht", + "euchre", + "first_sealed_auction", + "gin_rummy", + "go", + "goofspiel", + "havannah", + "hex", + "hearts", + "hive", + "kriegspiel", + "kuhn_poker", + "laser_tag", + "lewis_signaling", + "leduc_poker", + "liars_dice", + "liars_dice_ir", + "maedn", + "mancala", + "markov_soccer", + "matching_pennies_3p", + "matrix_bos", + "matrix_brps", + "matrix_cd", + "matrix_coordination", + "matrix_mp", + "matrix_pd", + "matrix_rps", + "matrix_rpsw", + "matrix_sh", + "matrix_shapleys_game", + "mean_field_lin_quad", + "mfg_crowd_modelling", + "mfg_crowd_modelling_2d", + "mfg_dynamic_routing", + "mfg_garnet", + "misere", + "mnk", + "morpion_solitaire", + "negotiation", + "nfg_game", + "nim", + "nine_mens_morris", + "normal_form_extensive_game", + "oh_hell", + "oshi_zumo", + "othello", + "oware", + "pentago", + "pathfinding", + "phantom_go", + "phantom_ttt", + "phantom_ttt_ir", + "pig", + "python_block_dominoes", + "python_dynamic_routing", + "python_iterated_prisoners_dilemma", + "python_mfg_crowd_avoidance", + "python_mfg_crowd_modelling", + "python_mfg_dynamic_routing", + "python_mfg_periodic_aversion", + "python_mfg_predator_prey", + "python_kuhn_poker", + "python_team_dominoes", + "python_tic_tac_toe", + "python_liars_poker", + "quoridor", + "repeated_game", + "rbc", + "restricted_nash_response", + "sheriff", + "skat", + "start_at", + "solitaire", + "spades", + "stones_and_gems", + "tarok", + "tic_tac_toe", + "tiny_bridge_2p", + "tiny_bridge_4p", + "tiny_hanabi", + "trade_comm", + "turn_based_simultaneous_game", + "twixt", + "ultimate_tic_tac_toe", + "y", + "zerosum", +]) + + +class PyspielTest(absltest.TestCase): + + def test_registered_names(self): + game_names = pyspiel.registered_names() + + expected = list(EXPECTED_GAMES) + if (os.environ.get("OPEN_SPIEL_BUILD_WITH_HANABI", "OFF") == "ON" and + "hanabi" not in expected): + expected.append("hanabi") + if (os.environ.get("OPEN_SPIEL_BUILD_WITH_ACPC", "OFF") == "ON" and + "universal_poker" not in expected): + expected.append("universal_poker") + expected = sorted(expected) + self.assertCountEqual(game_names, expected) + + def test_default_loadable(self): + # Games which cannmot be loaded with default parameters will be skipped by + # several standard tests. We make a list of such games here in order to make + # implementors think twice about making new games non-default-loadable + non_default_loadable = [ + game.short_name + for game in pyspiel.registered_games() + if not game.default_loadable + ] + expected = [ + # Being non-default-loadable prevents various automated tests. + # Only add games here if there is no sensible default for a parameter. + "add_noise", + "cached_tree", + "coop_to_1p", + "efg_game", + "nfg_game", + "misere", + "turn_based_simultaneous_game", + "normal_form_extensive_game", + "repeated_game", + "restricted_nash_response", + "start_at", + "zerosum", + ] + self.assertCountEqual(non_default_loadable, expected) + + def test_registered_game_attributes(self): + game_list = {game.short_name: game for game in pyspiel.registered_games()} + self.assertEqual(game_list["kuhn_poker"].dynamics, + pyspiel.GameType.Dynamics.SEQUENTIAL) + self.assertEqual(game_list["kuhn_poker"].chance_mode, + pyspiel.GameType.ChanceMode.EXPLICIT_STOCHASTIC) + self.assertEqual(game_list["kuhn_poker"].information, + pyspiel.GameType.Information.IMPERFECT_INFORMATION) + self.assertEqual(game_list["kuhn_poker"].utility, + pyspiel.GameType.Utility.ZERO_SUM) + self.assertEqual(game_list["kuhn_poker"].min_num_players, 2) + + def test_create_game(self): + game = pyspiel.load_game("kuhn_poker") + game_info = game.get_type() + self.assertEqual(game_info.information, + pyspiel.GameType.Information.IMPERFECT_INFORMATION) + self.assertEqual(game.num_players(), 2) + + def test_play_kuhn_poker(self): + game = pyspiel.load_game("kuhn_poker") + state = game.new_initial_state() + self.assertEqual(state.is_chance_node(), True) + self.assertEqual(state.chance_outcomes(), [(0, 1 / 3), (1, 1 / 3), + (2, 1 / 3)]) + state.apply_action(1) + self.assertEqual(state.is_chance_node(), True) + self.assertEqual(state.chance_outcomes(), [(0, 0.5), (2, 0.5)]) + state.apply_action(2) + self.assertEqual(state.is_chance_node(), False) + self.assertEqual(state.legal_actions(), [0, 1]) + sampler = pyspiel.UniformProbabilitySampler(0., 1.) + clone = state.resample_from_infostate(1, sampler) + self.assertEqual( + clone.information_state_string(1), state.information_state_string(1)) + + def test_othello(self): + game = pyspiel.load_game("othello") + state = game.new_initial_state() + self.assertFalse(state.is_chance_node()) + self.assertFalse(state.is_terminal()) + self.assertEqual(state.legal_actions(), [19, 26, 37, 44]) + + def test_tic_tac_toe(self): + game = pyspiel.load_game("tic_tac_toe") + state = game.new_initial_state() + self.assertFalse(state.is_chance_node()) + self.assertFalse(state.is_terminal()) + self.assertEqual(state.legal_actions(), [0, 1, 2, 3, 4, 5, 6, 7, 8]) + + def test_game_parameters_from_string_empty(self): + self.assertEqual(pyspiel.game_parameters_from_string(""), {}) + + def test_game_parameters_from_string_simple(self): + self.assertEqual( + pyspiel.game_parameters_from_string("foo"), {"name": "foo"}) + + def test_game_parameters_from_string_with_options(self): + self.assertEqual( + pyspiel.game_parameters_from_string("foo(x=2,y=true)"), { + "name": "foo", + "x": 2, + "y": True + }) + + def test_game_parameters_from_string_with_subgame(self): + self.assertEqual( + pyspiel.game_parameters_from_string( + "foo(x=2,y=true,subgame=bar(z=False))"), { + "name": "foo", + "x": 2, + "y": True, + "subgame": { + "name": "bar", + "z": False + } + }) + + def test_game_parameters_to_string_empty(self): + self.assertEqual(pyspiel.game_parameters_to_string({}), "") + + def test_game_parameters_to_string_simple(self): + self.assertEqual( + pyspiel.game_parameters_to_string({"name": "foo"}), "foo()") + + def test_game_parameters_to_string_with_options(self): + self.assertEqual( + pyspiel.game_parameters_to_string({ + "name": "foo", + "x": 2, + "y": True + }), "foo(x=2,y=True)") + + def test_game_parameters_to_string_with_subgame(self): + self.assertEqual( + pyspiel.game_parameters_to_string({ + "name": "foo", + "x": 2, + "y": True, + "subgame": { + "name": "bar", + "z": False + } + }), "foo(subgame=bar(z=False),x=2,y=True)") + + def test_game_type(self): + game_type = pyspiel.GameType( + "matrix_mp", "Matching Pennies", pyspiel.GameType.Dynamics.SIMULTANEOUS, + pyspiel.GameType.ChanceMode.DETERMINISTIC, + pyspiel.GameType.Information.PERFECT_INFORMATION, + pyspiel.GameType.Utility.ZERO_SUM, + pyspiel.GameType.RewardModel.TERMINAL, 2, 2, True, True, False, False, + dict()) + self.assertEqual(game_type.chance_mode, + pyspiel.GameType.ChanceMode.DETERMINISTIC) + + def test_error_handling(self): + with self.assertRaisesRegex(RuntimeError, + "Unknown game 'invalid_game_name'"): + unused_game = pyspiel.load_game("invalid_game_name") + + def test_can_create_cpp_tabular_policy(self): + for game_name in ["kuhn_poker", "leduc_poker", "liars_dice"]: + game = pyspiel.load_game(game_name) + + # We just test that we can create a tabular policy. + policy.python_policy_to_pyspiel_policy(policy.TabularPolicy(game)) + + def test_simultaneous_game_history(self): + game = pyspiel.load_game("coop_box_pushing") + state = game.new_initial_state() + state.apply_action(0) + state2 = game.new_initial_state() + state2.apply_actions([0] * game.num_players()) + self.assertEqual(state.history(), state2.history()) + + def test_record_batched_trajectories(self): + for game_name in ["kuhn_poker", "leduc_poker", "liars_dice"]: + game = pyspiel.load_game(game_name) + python_policy = policy.TabularPolicy(game) + tabular_policy = policy.python_policy_to_pyspiel_policy(python_policy) + policies = [tabular_policy] * 2 + + # We test that we can create a batch of trajectories. + seed = 0 + batch_size = 128 + include_full_observations = False + pyspiel.record_batched_trajectories(game, policies, + python_policy.state_lookup, + batch_size, include_full_observations, + seed, -1) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/rl_environment_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/rl_environment_test.py new file mode 100644 index 0000000..8b3b502 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/rl_environment_test.py @@ -0,0 +1,143 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.pybind11.pyspiel.""" + +from absl.testing import absltest + +from open_spiel.python import rl_environment +import pyspiel + + +class RLEnvironmentTest(absltest.TestCase): + + def test_create_game(self): + env = rl_environment.Environment("tic_tac_toe") + self.assertEqual(env.is_turn_based, True) + self.assertEqual(env.num_players, 2) + + def test_create_game_with_args(self): + env = rl_environment.Environment("kuhn_poker", **{"players": 3}) + self.assertEqual(env.is_turn_based, True) + self.assertEqual(env.num_players, 3) + + def test_create_env_from_game_instance(self): + game = pyspiel.load_game("tic_tac_toe") + env = rl_environment.Environment(game) + self.assertEqual(env.is_turn_based, True) + self.assertEqual(env.num_players, 2) + + def test_reset(self): + env = rl_environment.Environment("kuhn_poker", **{"players": 3}) + time_step = env.reset() + self.assertEqual(time_step.observations["current_player"], 0) + self.assertEmpty(time_step.observations["serialized_state"], 0) + self.assertLen(time_step.observations["info_state"], 3) + self.assertLen(time_step.observations["legal_actions"], 3) + self.assertIsNone(time_step.rewards) + self.assertIsNone(time_step.discounts) + self.assertEqual(time_step.step_type.first(), True) + + def test_initial_info_state_is_decision_node(self): + env = rl_environment.Environment("kuhn_poker") + time_step = env.reset() + self.assertEqual(time_step.step_type.first(), True) + self.assertEqual(env.is_chance_node, False) + + def test_full_game(self): + env = rl_environment.Environment("tic_tac_toe", include_full_state=True) + _ = env.reset() + time_step = env.step([0]) + self.assertEqual(time_step.observations["current_player"], 1) + self.assertLen(time_step.observations["info_state"], 2) + self.assertLen(time_step.observations["legal_actions"], 2) + self.assertLen(time_step.rewards, 2) + self.assertLen(time_step.discounts, 2) + self.assertLen(time_step.observations, 4) + + # O X O # Moves 0, 1, 2 + # X O X # Moves 3, 4, 5 + # O . . # Move 6, game over (player 0 wins). + + for i in range(1, 7): + self.assertEqual(time_step.step_type.mid(), True) + time_step = env.step([i]) + self.assertEqual(time_step.step_type.last(), True) + + def test_spec_fields(self): + env = rl_environment.Environment("tic_tac_toe") + env_spec = env.observation_spec() + action_spec = env.action_spec() + + ttt_max_actions = 9 + ttt_normalized_info_set_shape = (27,) + + self.assertEqual(action_spec["num_actions"], ttt_max_actions) + self.assertEqual(env_spec["info_state"], ttt_normalized_info_set_shape) + self.assertCountEqual( + env_spec.keys(), + ["current_player", "info_state", "serialized_state", "legal_actions"]) + self.assertCountEqual(action_spec.keys(), + ["dtype", "max", "min", "num_actions"]) + + def test_full_game_simultaneous_move(self): + env = rl_environment.Environment("goofspiel") + _ = env.reset() + time_step = env.step([0, 0]) + self.assertEqual(time_step.observations["current_player"], + rl_environment.SIMULTANEOUS_PLAYER_ID) + self.assertLen(time_step.observations["info_state"], 2) + self.assertLen(time_step.observations["legal_actions"], 2) + self.assertLen(time_step.rewards, 2) + self.assertLen(time_step.discounts, 2) + self.assertLen(time_step.observations, 4) + + actions = [act[0] for act in time_step.observations["legal_actions"]] + time_step = env.step(actions) + self.assertEqual(time_step.step_type.mid(), True) + + while not time_step.last(): + actions = [act[0] for act in time_step.observations["legal_actions"]] + time_step = env.step(actions) + + def test_set_and_get_state(self): + env_ttt1 = rl_environment.Environment("tic_tac_toe") + env_ttt2 = rl_environment.Environment("tic_tac_toe") + env_kuhn1 = rl_environment.Environment("kuhn_poker", players=2) + env_kuhn2 = rl_environment.Environment("kuhn_poker", players=3) + + env_ttt1.reset() + env_ttt2.reset() + env_kuhn1.reset() + env_kuhn2.reset() + + # Transfering states between identical games should work. + env_ttt1.set_state(env_ttt2.get_state) + env_ttt2.set_state(env_ttt1.get_state) + + # Transfering states between different games or games with different + # parameters should fail. + with self.assertRaises(AssertionError): + self.fail(env_ttt1.set_state(env_kuhn1.get_state)) + with self.assertRaises(AssertionError): + self.fail(env_kuhn1.set_state(env_ttt1.get_state)) + + with self.assertRaises(AssertionError): + self.fail(env_kuhn1.set_state(env_kuhn2.get_state)) + with self.assertRaises(AssertionError): + self.fail(env_kuhn2.set_state(env_kuhn1.get_state)) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/sampled_stochastic_games_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/sampled_stochastic_games_test.py new file mode 100644 index 0000000..77a19fb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/sampled_stochastic_games_test.py @@ -0,0 +1,53 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pickle + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python import test_utils +import pyspiel + +# All games with kSampledStochastic chance mode. +SPIEL_SAMPLED_STOCHASTIC_GAMES_LIST = [ + g for g in pyspiel.registered_games() if g.default_loadable and + g.chance_mode == pyspiel.GameType.ChanceMode.SAMPLED_STOCHASTIC +] +assert len(SPIEL_SAMPLED_STOCHASTIC_GAMES_LIST) >= 2 + +# We only do 2 runs as this is slow. +NUM_RUNS = 2 + + +class SampledStochasticGamesTest(parameterized.TestCase): + + @parameterized.parameters(*SPIEL_SAMPLED_STOCHASTIC_GAMES_LIST) + def test_stateful_game_serialization(self, game_info): + game = pyspiel.load_game(game_info.short_name, {"rng_seed": 0}) + + for seed in range(NUM_RUNS): + # Mutate game's internal RNG state by doing a full playout. + test_utils.random_playout(game.new_initial_state(), seed) + deserialized_game = pickle.loads(pickle.dumps(game)) + + # Make sure initial states are the same after game deserialization. + state = test_utils.random_playout(game.new_initial_state(), seed) + deserialized_state = test_utils.random_playout( + deserialized_game.new_initial_state(), seed) + self.assertEqual(str(state), str(deserialized_state)) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/tests/tensor_game_utils_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/tests/tensor_game_utils_test.py new file mode 100644 index 0000000..bfa9c52 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/tests/tensor_game_utils_test.py @@ -0,0 +1,63 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests the C++ matrix game utility methods exposed to Python.""" + +from absl.testing import absltest + +import pyspiel + + +class TensorGamesUtilsTest(absltest.TestCase): + + def test_extensive_to_tensor_game_type(self): + game = pyspiel.extensive_to_tensor_game( + pyspiel.load_game( + "turn_based_simultaneous_game(game=blotto(players=3,coins=5))")) + game_type = game.get_type() + self.assertEqual(game_type.dynamics, pyspiel.GameType.Dynamics.SIMULTANEOUS) + self.assertEqual(game_type.chance_mode, + pyspiel.GameType.ChanceMode.DETERMINISTIC) + self.assertEqual(game_type.information, + pyspiel.GameType.Information.ONE_SHOT) + self.assertEqual(game_type.utility, pyspiel.GameType.Utility.ZERO_SUM) + + def test_extensive_to_tensor_game_payoff_tensor(self): + turn_based_game = pyspiel.load_game_as_turn_based( + "blotto(players=3,coins=5)") + tensor_game1 = pyspiel.extensive_to_tensor_game(turn_based_game) + tensor_game2 = pyspiel.load_tensor_game("blotto(players=3,coins=5)") + self.assertEqual(tensor_game1.shape(), tensor_game2.shape()) + s0 = turn_based_game.new_initial_state() + self.assertEqual(tensor_game1.shape()[0], s0.num_distinct_actions()) + for a0 in range(s0.num_distinct_actions()): + s1 = s0.child(a0) + self.assertEqual(tensor_game1.shape()[1], s1.num_distinct_actions()) + for a1 in range(s1.num_distinct_actions()): + s2 = s1.child(a1) + self.assertEqual(tensor_game1.shape()[2], s2.num_distinct_actions()) + for a2 in range(s2.num_distinct_actions()): + s3 = s2.child(a2) + self.assertTrue(s3.is_terminal()) + for player in range(3): + self.assertEqual( + s3.returns()[player], + tensor_game1.player_utility(player, (a0, a1, a2))) + self.assertEqual( + s3.returns()[player], + tensor_game2.player_utility(player, (a0, a1, a2))) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/app.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/app.py new file mode 100644 index 0000000..071bd42 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/app.py @@ -0,0 +1,17 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Entry point for applications with platform specific initializations.""" + +from absl.app import * # pylint: disable=wildcard-import diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/data_logger.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/data_logger.py new file mode 100644 index 0000000..3a80bc9 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/data_logger.py @@ -0,0 +1,55 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Log data to a jsonl file.""" + +import datetime +import json +import os +import time +from typing import Any, Dict, Text + +from open_spiel.python.utils import gfile + + +class DataLoggerJsonLines: + """Log data to a jsonl file.""" + + def __init__(self, path: str, name: str, flush=True): + self._fd = gfile.Open(os.path.join(path, name + ".jsonl"), "w") + self._flush = flush + self._start_time = time.time() + + def __del__(self): + self.close() + + def close(self): + if hasattr(self, "_fd") and self._fd is not None: + self._fd.flush() + self._fd.close() + self._fd = None + + def flush(self): + self._fd.flush() + + def write(self, data: Dict[Text, Any]): + now = time.time() + data["time_abs"] = now + data["time_rel"] = now - self._start_time + dt_now = datetime.datetime.utcfromtimestamp(now) + data["time_str"] = dt_now.strftime("%Y-%m-%d %H:%M:%S.%f +0000") + self._fd.write(json.dumps(data)) + self._fd.write("\n") + if self._flush: + self.flush() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/file_logger.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/file_logger.py new file mode 100644 index 0000000..98e6a68 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/file_logger.py @@ -0,0 +1,52 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A class to log stuff to a file, mainly useful in parallel situations.""" + +import datetime +import os + + +class FileLogger(object): + """A logger to print stuff to a file.""" + + def __init__(self, path, name, quiet=False, also_to_stdout=False): + self._fd = open(os.path.join(path, "log-{}.txt".format(name)), "w") + self._quiet = quiet + self.also_to_stdout = also_to_stdout + + def print(self, *args): + # Date/time with millisecond precision. + date_prefix = "[{}]".format(datetime.datetime.now().isoformat(" ")[:-3]) + print(date_prefix, *args, file=self._fd, flush=True) + if self.also_to_stdout: + print(date_prefix, *args, flush=True) + + def opt_print(self, *args): + if not self._quiet: + self.print(*args) + + def __enter__(self): + return self + + def __exit__(self, unused_exception_type, unused_exc_value, unused_traceback): + self.close() + + def close(self): + if self._fd: + self._fd.close() + self._fd = None + + def __del__(self): + self.close() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/file_logger_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/file_logger_test.py new file mode 100644 index 0000000..a99ce32 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/file_logger_test.py @@ -0,0 +1,55 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.utils.file_logger.""" + +import os +import tempfile + +from absl.testing import absltest + +from open_spiel.python.utils import file_logger + + +class FileLoggerTest(absltest.TestCase): + + def test_file_logger(self): + tmp_dir = tempfile.mkdtemp() + try: + log_name = "test" + log_file_name = os.path.join(tmp_dir, "log-{}.txt".format(log_name)) + + self.assertTrue(os.path.isdir(tmp_dir)) + self.assertFalse(os.path.exists(log_file_name)) + + with file_logger.FileLogger(tmp_dir, log_name) as logger: + logger.print("line 1") + logger.print("line", 2) + logger.print("line", 3, "asdf") + + with open(log_file_name, "r") as f: + lines = f.readlines() + + self.assertLen(lines, 3) + self.assertIn("line 1", lines[0]) + self.assertIn("line 2", lines[1]) + self.assertIn("line 3 asdf", lines[2]) + finally: + if os.path.exists(log_file_name): + os.remove(log_file_name) + os.rmdir(tmp_dir) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/file_utils.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/file_utils.py new file mode 100644 index 0000000..f9b5c0c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/file_utils.py @@ -0,0 +1,29 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""File utilities.""" + +import os + + +def find_file(filename, levels): + if os.path.isfile(filename): + return filename + else: + for _ in range(levels): + filename = '../' + filename + if os.path.isfile(filename): + return filename + return None + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/gfile.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/gfile.py new file mode 100644 index 0000000..21fc799 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/gfile.py @@ -0,0 +1,28 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This is replaces google's gfile used for network storage. + +A more complete public version of gfile: +https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/platform/gfile.py +""" + +import os + +# pylint: disable=invalid-name +Exists = os.path.exists +IsDirectory = os.path.isdir +ListDir = os.listdir +MakeDirs = os.makedirs +Open = open diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/lru_cache.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/lru_cache.py new file mode 100644 index 0000000..b90ec34 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/lru_cache.py @@ -0,0 +1,92 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A Least Recently Used cache.""" + +import collections + + +class CacheInfo(collections.namedtuple("CacheInfo", [ + "hits", "misses", "size", "max_size"])): + """Info for LRUCache.""" + + @property + def usage(self): + return self.size / self.max_size if self.max_size else 0 + + @property + def total(self): + return self.hits + self.misses + + @property + def hit_rate(self): + return self.hits / self.total if self.total else 0 + + +class LRUCache(object): + """A Least Recently Used cache. + + This is more general than functools.lru_cache since that one requires the + key to also be the input to the function to generate the value, which + isn't possible when the input is not hashable, eg a numpy.ndarray. + """ + + def __init__(self, max_size): + self._max_size = max_size + self._data = collections.OrderedDict() + self._hits = 0 + self._misses = 0 + + def clear(self): + self._data.clear() + self._hits = 0 + self._misses = 0 + + def make(self, key, fn): + """Return the value, either from cache, or make it and save it.""" + try: + val = self._data.pop(key) # Take it out. + self._hits += 1 + except KeyError: + self._misses += 1 + val = fn() + if len(self._data) >= self._max_size: + self._data.popitem(False) + self._data[key] = val # Insert/reinsert it at the back. + return val + + def get(self, key): + """Get the value and move it to the back, or return None on a miss.""" + try: + val = self._data.pop(key) # Take it out. + self._data[key] = val # Reinsert it at the back. + self._hits += 1 + return val + except KeyError: + self._misses += 1 + return None + + def set(self, key, val): + """Set the value.""" + self._data.pop(key, None) # Take it out if it existed. + self._data[key] = val # Insert/reinsert it at the back. + if len(self._data) > self._max_size: + self._data.popitem(False) + return val + + def info(self): + return CacheInfo(self._hits, self._misses, len(self._data), self._max_size) + + def __len__(self): + return len(self._data) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/lru_cache_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/lru_cache_test.py new file mode 100644 index 0000000..a65d022 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/lru_cache_test.py @@ -0,0 +1,78 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.utils.lru_cache.""" + +from absl.testing import absltest + +from open_spiel.python.utils import lru_cache + + +class LruCacheTest(absltest.TestCase): + + def test_lru_cache(self): + cache = lru_cache.LRUCache(4) + + self.assertEmpty(cache) + + info = cache.info() + self.assertEqual(info.hits, 0) + self.assertEqual(info.misses, 0) + self.assertEqual(info.size, 0) + self.assertEqual(info.max_size, 4) + self.assertEqual(info.usage, 0) + self.assertEqual(info.hit_rate, 0) + + self.assertIsNone(cache.get(1)) + + cache.set(13, "13") + self.assertLen(cache, 1) + + self.assertIsNone(cache.get(1)) + + self.assertEqual(cache.get(13), "13") + + cache.set(14, "14") + cache.set(15, "15") + cache.set(16, "16") + + self.assertLen(cache, 4) + + cache.set(17, "17") + + self.assertLen(cache, 4) + + self.assertIsNone(cache.get(13)) # evicted + self.assertTrue(cache.get(14)) + + self.assertLen(cache, 4) + + cache.set(18, "18") + + self.assertIsNone(cache.get(15)) # evicted + self.assertTrue(cache.get(14)) # older but more recently used + + info = cache.info() + self.assertEqual(info.usage, 1) + + cache.clear() + + self.assertIsNone(cache.get(18)) # evicted + + self.assertEqual(cache.make(19, lambda: "19"), "19") + self.assertEqual(cache.get(19), "19") + self.assertEqual(cache.make(19, lambda: "20"), "19") + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/metrics.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/metrics.py new file mode 100644 index 0000000..3b57376 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/metrics.py @@ -0,0 +1,57 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Metrics and logging helpers.""" + +from typing import Optional + +# pylint: disable=g-import-not-at-top disable=unused-import +try: + from clu import metric_writers + from clu.metric_writers import ensure_flushes + from clu.metric_writers import write_values + from clu.values import * # pylint: disable=wildcard-import +except ImportError as e: + raise ImportError( + str(e) + + "\nCLU not found. Please install CLU: python3 -m pip install clu") from e +# pylint: enable=g-import-not-at-top enable=unused-import + + +def create_default_writer(logdir: Optional[str] = None, + just_logging: bool = False, + **kwargs) -> metric_writers.MetricWriter: + """Create the default metrics writer. + + See metric_writers.LoggingWriter interface for the API to write the metrics + and other metadata, e.g. hyper-parameters. Sample usage is as follows: + + writer = metrics.create_default_writer('/some/path') + writer.write_hparams({"learning_rate": 0.001, "batch_size": 64}) + ... + # e.g. in training loop. + writer.write_scalars(step, {"loss": loss}) + ... + writer.flush() + + Args: + logdir: Path of the directory to store the metric logs as TF summary files. + If None, files will not be created. + just_logging: If true, metrics will be outputted only to INFO log. + **kwargs: kwargs passed to the CLU default writer. + + Returns: + a metric_writers.MetricWriter. + """ + return metric_writers.create_default_writer( + logdir=logdir, just_logging=just_logging, **kwargs) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/metrics_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/metrics_test.py new file mode 100644 index 0000000..287d6f3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/metrics_test.py @@ -0,0 +1,54 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for metrics.""" + +import glob +import os + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python.utils import metrics + + +class MetricsTest(parameterized.TestCase): + + @parameterized.parameters((True,), (False,)) + def test_create(self, just_logging: bool): + logdir = self.create_tempdir() + # Create the writer. + writer = metrics.create_default_writer( + logdir.full_path, just_logging=just_logging) + self.assertIsInstance(writer, metrics.metric_writers.MultiWriter) + + # Write some metrics. + writer.write_hparams({"param1": 1.0, "param2": 2.0}) + for step in range(5): + writer.write_scalars(step, {"value": step * step}) + + metrics.write_values(writer, 5, { + "scalar": 1.23, + "text": metrics.Text(value="foo") + }) + # Flush the writer. + writer.flush() + + # Check that the summary file exists if not just logging. + self.assertLen( + glob.glob(os.path.join(logdir.full_path, "events.out.tfevents.*")), + 0 if just_logging else 1) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/replay_buffer.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/replay_buffer.py new file mode 100644 index 0000000..0c277d3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/replay_buffer.py @@ -0,0 +1,75 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Replay buffer of fixed size with a FIFI replacement policy.""" + +import random + + +class ReplayBuffer(object): + """ReplayBuffer of fixed size with a FIFO replacement policy. + + Stored transitions can be sampled uniformly. + + The underlying datastructure is a ring buffer, allowing 0(1) adding and + sampling. + """ + + def __init__(self, replay_buffer_capacity): + self._replay_buffer_capacity = replay_buffer_capacity + self._data = [] + self._next_entry_index = 0 + + def add(self, element): + """Adds `element` to the buffer. + + If the buffer is full, the oldest element will be replaced. + + Args: + element: data to be added to the buffer. + """ + if len(self._data) < self._replay_buffer_capacity: + self._data.append(element) + else: + self._data[self._next_entry_index] = element + self._next_entry_index += 1 + self._next_entry_index %= self._replay_buffer_capacity + + def sample(self, num_samples): + """Returns `num_samples` uniformly sampled from the buffer. + + Args: + num_samples: `int`, number of samples to draw. + + Returns: + An iterable over `num_samples` random elements of the buffer. + + Raises: + ValueError: If there are less than `num_samples` elements in the buffer + """ + if len(self._data) < num_samples: + raise ValueError("{} elements could not be sampled from size {}".format( + num_samples, len(self._data))) + return random.sample(self._data, num_samples) + + def reset(self): + """Resets the contents of the replay buffer.""" + self._data = [] + self._next_entry_index = 0 + + def __len__(self): + return len(self._data) + + def __iter__(self): + return iter(self._data) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/replay_buffer_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/replay_buffer_test.py new file mode 100644 index 0000000..1c15e4f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/replay_buffer_test.py @@ -0,0 +1,69 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.utils.replay_buffer.""" + +from absl.testing import absltest + +from open_spiel.python.utils.replay_buffer import ReplayBuffer + + +class ReplayBufferTest(absltest.TestCase): + + def test_replay_buffer_add(self): + # pylint: disable=g-generic-assert + replay_buffer = ReplayBuffer(replay_buffer_capacity=10) + self.assertEqual(len(replay_buffer), 0) + replay_buffer.add("entry1") + self.assertEqual(len(replay_buffer), 1) + replay_buffer.add("entry2") + self.assertEqual(len(replay_buffer), 2) + + self.assertIn("entry1", replay_buffer) + self.assertIn("entry2", replay_buffer) + + def test_replay_buffer_max_capacity(self): + # pylint: disable=g-generic-assert + replay_buffer = ReplayBuffer(replay_buffer_capacity=2) + replay_buffer.add("entry1") + replay_buffer.add("entry2") + replay_buffer.add("entry3") + self.assertEqual(len(replay_buffer), 2) + + self.assertIn("entry2", replay_buffer) + self.assertIn("entry3", replay_buffer) + + def test_replay_buffer_sample(self): + replay_buffer = ReplayBuffer(replay_buffer_capacity=3) + replay_buffer.add("entry1") + replay_buffer.add("entry2") + replay_buffer.add("entry3") + + samples = replay_buffer.sample(3) + + self.assertIn("entry1", samples) + self.assertIn("entry2", samples) + self.assertIn("entry3", samples) + + def test_replay_buffer_reset(self): + replay_buffer = ReplayBuffer(replay_buffer_capacity=3) + replay_buffer.add("entry1") + replay_buffer.add("entry2") + + replay_buffer.reset() + self.assertEmpty(replay_buffer) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/reservoir_buffer.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/reservoir_buffer.py new file mode 100644 index 0000000..d88892a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/reservoir_buffer.py @@ -0,0 +1,78 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Reservoir buffer implemented in Numpy. + +See https://en.wikipedia.org/wiki/Reservoir_sampling for more details. +""" + +import random +import numpy as np + + +# TODO(author18): refactor the reservoir with the NFSP Pytorch implementation +class ReservoirBuffer(object): + """Allows uniform sampling over a stream of data. + + This class supports the storage of arbitrary elements, such as observation + tensors, integer actions, etc. + + See https://en.wikipedia.org/wiki/Reservoir_sampling for more details. + """ + + def __init__(self, reservoir_buffer_capacity): + self._reservoir_buffer_capacity = reservoir_buffer_capacity + self._data = [] + self._add_calls = 0 + + def add(self, element): + """Potentially adds `element` to the reservoir buffer. + + Args: + element: data to be added to the reservoir buffer. + """ + if len(self._data) < self._reservoir_buffer_capacity: + self._data.append(element) + else: + idx = np.random.randint(0, self._add_calls + 1) + if idx < self._reservoir_buffer_capacity: + self._data[idx] = element + self._add_calls += 1 + + def sample(self, num_samples): + """Returns `num_samples` uniformly sampled from the buffer. + + Args: + num_samples: `int`, number of samples to draw. + + Returns: + An iterable over `num_samples` random elements of the buffer. + + Raises: + ValueError: If there are less than `num_samples` elements in the buffer + """ + if len(self._data) < num_samples: + raise ValueError("{} elements could not be sampled from size {}".format( + num_samples, len(self._data))) + return random.sample(self._data, num_samples) + + def clear(self): + self._data = [] + self._add_calls = 0 + + def __len__(self): + return len(self._data) + + def __iter__(self): + return iter(self._data) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/shared_value.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/shared_value.py new file mode 100644 index 0000000..ca25ec7 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/shared_value.py @@ -0,0 +1,25 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A shared value without deep copy.""" + + +class SharedValue(object): + """A shared value without deep copy.""" + + def __init__(self, value): + self.value = value + + def __deepcopy__(self, memo): + return SharedValue(self.value) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/spawn.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/spawn.py new file mode 100644 index 0000000..e599028 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/spawn.py @@ -0,0 +1,100 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A wrapper around multiprocessing to be compatible at google.""" + +import contextlib +import multiprocessing +import queue + +Empty = queue.Empty + +# Without this line, this fails on latest MacOS with Python 3.8. See +# https://github.com/pytest-dev/pytest-flask/issues/104#issuecomment-577908228 +# and for more details see +# https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods +multiprocessing.set_start_method("fork") + + +# For compatibility so that it works inside Google. +@contextlib.contextmanager +def main_handler(): + yield + + +class Process(object): + """A wrapper around `multiprocessing` that allows it to be used at google. + + It spawns a subprocess from the given target function. That function should + take an additional argument `queue` which will get a bidirectional + _ProcessQueue for communicating with the parent. + """ + + def __init__(self, target, args=(), kwargs=None): + if kwargs is None: + kwargs = {} + elif "queue" in kwargs: + raise ValueError("`queue` is reserved for use by `Process`.") + + q1 = multiprocessing.Queue() + q2 = multiprocessing.Queue() + self._queue = _ProcessQueue(q1, q2) + kwargs["queue"] = _ProcessQueue(q2, q1) + + self._process = multiprocessing.Process( + target=target, args=args, kwargs=kwargs) + self._process.start() + + def join(self, *args): + return self._process.join(*args) + + @property + def exitcode(self): + return self._process.exitcode + + @property + def queue(self): + return self._queue + + +class _ProcessQueue(object): + """A bidirectional queue for talking to a subprocess. + + `empty`, `get` and `get_nowait` act on the incoming queue, while + `full`, `put` and `put_nowait` act on the outgoing queue. + + This class should only be created by the Process object. + """ + + def __init__(self, q_in, q_out): + self._q_in = q_in + self._q_out = q_out + + def empty(self): + return self._q_in.empty() + + def full(self): + return self._q_out.full() + + def get(self, block=True, timeout=None): + return self._q_in.get(block=block, timeout=timeout) + + def get_nowait(self): + return self.get(False) + + def put(self, obj, block=True, timeout=None): + return self._q_out.put(obj, block=block, timeout=timeout) + + def put_nowait(self, obj): + return self.put(obj, False) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/spawn_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/spawn_test.py new file mode 100644 index 0000000..b50c4a0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/spawn_test.py @@ -0,0 +1,90 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.utils.spawn.""" + +import random +import time + +from absl.testing import absltest + +from open_spiel.python.utils import spawn + + +class SpawnTest(absltest.TestCase): + + def test_spawn_works(self): + max_sleep_time = 0.01 # 10ms + + def worker_fn(worker_id, queue): + queue.put(worker_id) # Show it's up and running. + random.seed(time.time() + worker_id) + while True: + value = queue.get() + if value is None: + break + time.sleep(max_sleep_time * random.random()) + queue.put((worker_id, value)) + + num_workers = 5 + workers = [spawn.Process(worker_fn, kwargs={"worker_id": i}) + for i in range(num_workers)] + + # Make sure they're warmed up. + for worker_id, worker in enumerate(workers): + self.assertEqual(worker_id, worker.queue.get()) + + num_work_units = 40 + expected_output = [] + for worker_id, worker in enumerate(workers): + for i in range(num_work_units): + worker.queue.put(i) + expected_output.append((worker_id, i)) + worker.queue.put(None) + + start_time = time.time() + + output = [] + i = 0 + while len(output) < len(expected_output): + for worker in workers: + try: + output.append(worker.queue.get_nowait()) + except spawn.Empty: + pass + + time.sleep(0.001) + i += 1 + self.assertLess(time.time() - start_time, + 20 * max_sleep_time * num_work_units, + msg=f"Don't wait forever. Loop {i}, found {len(output)}") + + time_taken = time.time() - start_time + print("Finished in {:.3f}s, {:.2f}x the max".format( + time_taken, time_taken / (max_sleep_time * num_work_units))) + + for worker in workers: + worker.join() + + # All messages arrived + self.assertLen(output, len(expected_output)) + self.assertCountEqual(output, expected_output) + + # The messages arrived out of order, showing parallelism. + self.assertNotEqual(output, expected_output) + + +if __name__ == "__main__": + with spawn.main_handler(): + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/stats.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/stats.py new file mode 100644 index 0000000..0fa0088 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/stats.py @@ -0,0 +1,221 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Some basic stats classes.""" + +import math +from typing import List + + +class BasicStats(object): + """A set of statistics about a single value series.""" + __slots__ = ("_num", "_min", "_max", "_sum", "_sum_sq") + + def __init__(self): + self.reset() + + def reset(self): + self._num = 0 + self._min = float("inf") + self._max = float("-inf") + self._sum = 0 + self._sum_sq = 0 + + def add(self, val: float): + self._num += 1 + if self._min > val: + self._min = val + if self._max < val: + self._max = val + self._sum += val + self._sum_sq += val**2 + + @property + def num(self): + return self._num + + @property + def min(self): + return 0 if self._num == 0 else self._min + + @property + def max(self): + return 0 if self._num == 0 else self._max + + @property + def avg(self): + return 0 if self._num == 0 else self._sum / self._num + + @property + def std_dev(self): + """Standard deviation.""" + if self._num == 0: + return 0 + return math.sqrt( + max(0, self._sum_sq / self._num - (self._sum / self._num)**2)) + + def merge(self, other: "BasicStats"): + # pylint: disable=protected-access + self._num += other._num + self._min = min(self._min, other._min) + self._max = max(self._max, other._max) + self._sum += other._sum + self._sum_sq += other._sum_sq + # pylint: enable=protected-access + + @property + def as_dict(self): + return { + "num": self.num, + "min": float(self.min), + "max": float(self.max), + "avg": float(self.avg), + "std_dev": self.std_dev, + } + + def __str__(self): + if self.num == 0: + return "num=0" + return "sum: %.4f, avg: %.4f, dev: %.4f, min: %.4f, max: %.4f, num: %d" % ( + self._sum, self.avg, self.std_dev, self.min, self.max, self.num) + + +class SlidingWindowAccumulator(object): + """A utility object to compute the mean of a sliding window of values.""" + + def __init__(self, max_window_size: int): + self._max_window_size = max_window_size + self._index = -1 + self._values = [] + + def add(self, value: float): + if len(self._values) < self._max_window_size: + self._values.append(value) + self._index += 1 + else: + self._values[self._index] = value + self._index += 1 + if self._index >= self._max_window_size: + self._index = 0 + + def mean(self): + return sum(self._values) / len(self._values) + + +class StatCounter: + """An object for incrementally counting statistics. + + Uses Welford's online algorithm for computing variance. + https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm + + Note: everything returns 0 if there are no data points. While technically + incorrect, this makes workin with the StatCounter objects easier (i.e. they + can print values even with zero data). + """ + + def __init__(self): + self._sum = 0 + self._m2 = 0 + self._mean = 0 + self._n = 0 + self._max = -math.inf + self._min = math.inf + + def add(self, value: float): + self._sum = self._sum + value + self._n += 1 + + delta = value - self._mean + self._mean = self._sum / self._n + self._m2 = self._m2 + delta*(value - self._mean) + + self._min = min(self._min, value) + self._max = max(self._max, value) + + def variance(self): + if self._n == 0: return 0 # technically wrong but easier to work with + return self._m2 / self._n + + def sample_variance(self): + if self._n < 2: return 0 + return self._m2 / (self._n - 1) + + def stddev(self): + return math.sqrt(self.variance()) + + def mean(self): + if self._n == 0: return 0 + return self._mean + + @property + def max(self): + return self._max + + @property + def min(self): + return self._min + + @property + def n(self): + return self.n + + def ci95(self): + if self._n == 0: return 0 + return 1.96 * self.stddev() / math.sqrt(self._n) + + +class HistogramNumbered: + """Track a histogram of occurences for `count` buckets. + + You need to decide how to map your data into the buckets. Mainly useful for + scalar values. + """ + + def __init__(self, num_buckets: int): + self._counts = [0] * num_buckets + + def reset(self): + self._counts = [0] * len(self._counts) + + def add(self, bucket_id: int): + self._counts[bucket_id] += 1 + + @property + def data(self): + return self._counts + + +class HistogramNamed: + """Track a histogram of occurences for named buckets. + + Same as HistogramNumbered, but each bucket has a name associated with it. + Mainly useful for categorical values. + """ + + def __init__(self, bucket_names: List[str]): + self._names = bucket_names + self.reset() + + def reset(self): + self._counts = [0] * len(self._names) + + def add(self, bucket_id: int): + self._counts[bucket_id] += 1 + + @property + def data(self): + return { + "counts": self._counts, + "names": self._names, + } diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/utils/training.py b/scenarios/bargaining/open_spiel/open_spiel/python/utils/training.py new file mode 100644 index 0000000..339ce4c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/utils/training.py @@ -0,0 +1,45 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Training utilities.""" + +from typing import Sequence + +from open_spiel.python import rl_agent +from open_spiel.python import rl_environment + + +def run_episodes(envs: Sequence[rl_environment.Environment], + agents: Sequence[rl_agent.AbstractAgent], + num_episodes: int = 1, + is_evaluation: bool = False) -> None: + """Runs the agents on the environments for the specified number of episodes. + + Args: + envs: RL environments. + agents: RL agents. + num_episodes: Number of episodes to run. + is_evaluation: Indicates whether the agent should use the evaluation or + training behavior. + """ + assert len(envs) == len(agents), 'Environments should match the agents.' + for _ in range(num_episodes): + for env, agent in zip(envs, agents): + time_step = env.reset() + while not time_step.last(): + agent_output = agent.step(time_step, is_evaluation=is_evaluation) + if agent_output: + action_list = [agent_output.action] + time_step = env.step(action_list) + # Episode is over, step all agents with final info state. + agent.step(time_step) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/vector_env.py b/scenarios/bargaining/open_spiel/open_spiel/python/vector_env.py new file mode 100644 index 0000000..852fb28 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/vector_env.py @@ -0,0 +1,78 @@ +# Copyright 2022 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A vectorized RL Environment.""" + + +class SyncVectorEnv(object): + """A vectorized RL Environment. + + This environment is synchronized - games do not execute in parallel. Speedups + are realized by calling models on many game states simultaneously. + """ + + def __init__(self, envs): + if not isinstance(envs, list): + raise ValueError( + "Need to call this with a list of rl_environment.Environment objects") + self.envs = envs + + def __len__(self): + return len(self.envs) + + def observation_spec(self): + return self.envs[0].observation_spec() + + @property + def num_players(self): + return self.envs[0].num_players + + def step(self, step_outputs, reset_if_done=False): + """Apply one step. + + Args: + step_outputs: the step outputs + reset_if_done: if True, automatically reset the environment + when the epsiode ends + + Returns: + time_steps: the time steps, + reward: the reward + done: done flag + unreset_time_steps: unreset time steps + """ + time_steps = [ + self.envs[i].step([step_outputs[i].action]) + for i in range(len(self.envs)) + ] + reward = [step.rewards for step in time_steps] + done = [step.last() for step in time_steps] + unreset_time_steps = time_steps # Copy these because you may want to look + # at the unreset versions to extract + # information from them + + if reset_if_done: + time_steps = self.reset(envs_to_reset=done) + + return time_steps, reward, done, unreset_time_steps + + def reset(self, envs_to_reset=None): + if envs_to_reset is None: + envs_to_reset = [True for _ in range(len(self.envs))] + + time_steps = [ + self.envs[i].reset() + if envs_to_reset[i] else self.envs[i].get_time_step() + for i in range(len(self.envs)) + ] + return time_steps diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/visualizations/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/visualizations/__init__.py new file mode 100644 index 0000000..3f0c683 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/visualizations/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/visualizations/treeviz.py b/scenarios/bargaining/open_spiel/open_spiel/python/visualizations/treeviz.py new file mode 100644 index 0000000..1e41813 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/visualizations/treeviz.py @@ -0,0 +1,240 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Visualizing game trees with graphviz. + +GameTree builds a `pygraphviz.AGraph` reprensentation of the game tree. The +resulting tree can be directly visualized in Jupyter notebooks or Google Colab +via SVG plotting - or written to a file by calling `draw(filename, prog="dot")`. + +See `examples/treeviz_example.py` for a more detailed example. + +This module relies on external dependencies, which need to be installed before +use. On a debian system follow these steps: +``` +sudo apt-get install graphviz libgraphviz-dev +pip install pygraphviz +``` +""" + +import collections +import pyspiel + +# pylint: disable=g-import-not-at-top +try: + import pygraphviz +except (ImportError, Exception) as e: + raise ImportError( + str(e) + "\nPlease make sure to install the following dependencies:\n" + "sudo apt-get install graphviz libgraphviz-dev\n" + "pip install pygraphviz") from None +# pylint: enable=g-import-not-at-top + +_PLAYER_SHAPES = {0: "square", 1: "ellipse"} +_PLAYER_COLORS = {-1: "black", 0: "blue", 1: "red"} +_FONTSIZE = 8 +_WIDTH = _HEIGHT = 0.25 +_ARROWSIZE = .5 +_MARGIN = 0.01 + + +def default_node_decorator(state): + """Decorates a state-node of the game tree. + + This method can be called by a custom decorator to prepopulate the attributes + dictionary. Then only relevant attributes need to be changed, or added. + + Args: + state: The state. + + Returns: + `dict` with graphviz node style attributes. + """ + player = state.current_player() + attrs = { + "label": "", + "fontsize": _FONTSIZE, + "width": _WIDTH, + "height": _HEIGHT, + "margin": _MARGIN + } + if state.is_terminal(): + attrs["label"] = ", ".join(map(str, state.returns())) + attrs["shape"] = "diamond" + elif state.is_chance_node(): + attrs["shape"] = "point" + attrs["width"] = _WIDTH / 2. + attrs["height"] = _HEIGHT / 2. + else: + attrs["label"] = str(state.information_state_string()) + attrs["shape"] = _PLAYER_SHAPES.get(player, "ellipse") + attrs["color"] = _PLAYER_COLORS.get(player, "black") + return attrs + + +def default_edge_decorator(parent, unused_child, action): + """Decorates a state-node of the game tree. + + This method can be called by a custom decorator to prepopulate the attributes + dictionary. Then only relevant attributes need to be changed, or added. + + Args: + parent: The parent state. + unused_child: The child state, not used in the default decorator. + action: `int` the selected action in the parent state. + + Returns: + `dict` with graphviz node style attributes. + """ + player = parent.current_player() + attrs = { + "label": " " + parent.action_to_string(player, action), + "fontsize": _FONTSIZE, + "arrowsize": _ARROWSIZE + } + attrs["color"] = _PLAYER_COLORS.get(player, "black") + return attrs + + +class GameTree(pygraphviz.AGraph): + """Builds `pygraphviz.AGraph` of the game tree. + + Attributes: + game: A `pyspiel.Game` object. + depth_limit: Maximum depth of the tree. Optional, default=-1 (no limit). + node_decorator: Decorator function for nodes (states). Optional, default= + `treeviz.default_node_decorator`. + edge_decorator: Decorator function for edges (actions). Optional, default= + `treeviz.default_edge_decorator`. + group_terminal: Whether to display all terminal states at same level, + default=False. + group_infosets: Whether to group infosets together, default=False. + group_pubsets: Whether to group public sets together, default=False. + target_pubset: Whether to group all public sets "*" or a specific one. + infoset_attrs: Attributes to style infoset grouping. + pubset_attrs: Attributes to style public set grouping. + kwargs: Keyword arguments passed on to `pygraphviz.AGraph.__init__`. + """ + + def __init__(self, + game=None, + depth_limit=-1, + node_decorator=default_node_decorator, + edge_decorator=default_edge_decorator, + group_terminal=False, + group_infosets=False, + group_pubsets=False, + target_pubset="*", + infoset_attrs=None, + pubset_attrs=None, + **kwargs): + + kwargs["directed"] = kwargs.get("directed", True) + super(GameTree, self).__init__(**kwargs) + + # We use pygraphviz.AGraph.add_subgraph to cluster nodes, and it requires a + # default constructor. Thus game needs to be optional. + if game is None: + return + + self.game = game + self._node_decorator = node_decorator + self._edge_decorator = edge_decorator + + self._group_infosets = group_infosets + self._group_pubsets = group_pubsets + if self._group_infosets: + if not self.game.get_type().provides_information_state_string: + raise RuntimeError( + "Grouping of infosets requested, but the game does not " + "provide information state string.") + if self._group_pubsets: + if not self.game.get_type().provides_factored_observation_string: + raise RuntimeError( + "Grouping of public sets requested, but the game does not " + "provide factored observations strings.") + + self._infosets = collections.defaultdict(lambda: []) + self._pubsets = collections.defaultdict(lambda: []) + self._terminal_nodes = [] + + root = game.new_initial_state() + self.add_node(self.state_to_str(root), **self._node_decorator(root)) + self._build_tree(root, 0, depth_limit) + + for (player, info_state), sibblings in self._infosets.items(): + cluster_name = "cluster_{}_{}".format(player, info_state) + self.add_subgraph(sibblings, cluster_name, + **(infoset_attrs or { + "style": "dashed" + })) + + for pubset, sibblings in self._pubsets.items(): + if target_pubset == "*" or target_pubset == pubset: + cluster_name = "cluster_{}".format(pubset) + self.add_subgraph(sibblings, cluster_name, + **(pubset_attrs or { + "style": "dashed" + })) + + if group_terminal: + self.add_subgraph(self._terminal_nodes, rank="same") + + def state_to_str(self, state): + """Unique string representation of a state. + + Args: + state: The state. + + Returns: + String representation of state. + """ + assert not state.is_simultaneous_node() + # AGraph nodes can't have empty string == None as a key, thus we prepend " " + return " " + state.history_str() + + def _build_tree(self, state, depth, depth_limit): + """Recursively builds the game tree.""" + state_str = self.state_to_str(state) + + if state.is_terminal(): + self._terminal_nodes.append(state_str) + return + if depth > depth_limit >= 0: + return + + for action in state.legal_actions(): + child = state.child(action) + child_str = self.state_to_str(child) + self.add_node(child_str, **self._node_decorator(child)) + self.add_edge(state_str, child_str, + **self._edge_decorator(state, child, action)) + + if (self._group_infosets and not child.is_chance_node() and + not child.is_terminal()): + player = child.current_player() + info_state = child.information_state_string() + self._infosets[(player, info_state)].append(child_str) + + if self._group_pubsets: + pub_obs_history = str(pyspiel.PublicObservationHistory(child)) + self._pubsets[pub_obs_history].append(child_str) + + self._build_tree(child, depth + 1, depth_limit) + + def _repr_svg_(self): + """Allows to render directly in Jupyter notebooks and Google Colab.""" + if not self.has_layout: + self.layout(prog="dot") + return self.draw(format="svg").decode(self.encoding) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/README.md b/scenarios/bargaining/open_spiel/open_spiel/python/voting/README.md new file mode 100644 index 0000000..1b2acfd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/README.md @@ -0,0 +1,10 @@ + +A general implementation of voting rules from computational social choice. + +This code implements the voting rules in Voting as Evaluation (VasE): Lanctot et +al. +[Evaluating Agents using Social Choice Theory](https://arxiv.org/abs/2312.03121). + +It also includes a few example uses of running VasE on the Atari datasets +referenced in the paper. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/__init__.py new file mode 100644 index 0000000..526bf17 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/approval.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/approval.py new file mode 100644 index 0000000..9154593 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/approval.py @@ -0,0 +1,58 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Implements approval voting method. + +Based on: https://en.wikipedia.org/wiki/Approval_voting. +""" + +from open_spiel.python.voting import base + + +# This seems arbitrary.. is there something sensible we should default to? +DEFAULT_K = 3 + + +class ApprovalVoting(base.AbstractVotingMethod): + """Implements approval voting.""" + + def __init__(self, k: int = 1): + """Construct an k-Approval voting scheme. + + Note: there are no checks on the length of the votes and how they relate to + the value of k. So, the user is responsible for appropriately balancing the + lengths of the votes appropriately. + + Arguments: + k: the number of top positions to count in each vote. + """ + self._k = k + + def name(self) -> str: + return f"approval(k={self._k})" + + def run_election(self, profile: base.PreferenceProfile) -> base.RankOutcome: + assert self.is_valid_profile(profile) + scores = {alternative: 0 for alternative in profile.alternatives} + for vote in profile.votes: + vote_len = len(vote.vote) + for i in range(self._k): + if i >= vote_len: break + alternative = vote.vote[i] + scores[alternative] += vote.weight + sorted_scores = sorted(scores.items(), key=lambda item: item[1], + reverse=True) + outcome = base.RankOutcome() + outcome.unpack_from(sorted_scores) + return outcome diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/approval_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/approval_test.py new file mode 100644 index 0000000..3d76737 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/approval_test.py @@ -0,0 +1,62 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.voting.approval.""" + +from absl.testing import absltest + +from open_spiel.python.voting import approval +from open_spiel.python.voting import base + + +class ApprovalVotingTest(absltest.TestCase): + + def test_approval_name_correct(self): + method = approval.ApprovalVoting(k=7) + self.assertEqual(method.name(), "approval(k=7)") + + def test_approval_basic_run(self): + votes = [ + ["a", "b", "c", "d"], + ["b", "d", "a", "c"], + ["a", "c", "d", "b"], + ["d", "b", "c", "a"] + ] + profile = base.PreferenceProfile(votes=votes) + method = approval.ApprovalVoting(k=2) + outcome = method.run_election(profile) + with self.subTest("Approval voting gets basic ranking correct"): + self.assertTrue(outcome.ranking == ["b", "d", "a", "c"] or + outcome.ranking == ["b", "a", "d", "c"]) + with self.subTest("Approval voting gets basic scores correct"): + self.assertListEqual(outcome.scores, [3, 2, 2, 1]) + + def test_approval_basic_run_with_weights(self): + votes = [ + base.WeightedVote(1, ["a", "b", "c", "d"]), + base.WeightedVote(2, ["b", "d", "a", "c"]), + base.WeightedVote(3, ["a", "c", "d", "b"]), + base.WeightedVote(4, ["d", "b", "c", "a"]) + ] + profile = base.PreferenceProfile(votes=votes) + method = approval.ApprovalVoting(k=2) + outcome = method.run_election(profile) + with self.subTest("Approval voting gets weighted ranking correct"): + self.assertListEqual(outcome.ranking, ["b", "d", "a", "c"]) + with self.subTest("Approval voting gets weighted scores correct"): + self.assertListEqual(outcome.scores, [7, 6, 4, 3]) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/base.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/base.py new file mode 100644 index 0000000..28c38f5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/base.py @@ -0,0 +1,506 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base classes for voting methods.""" + +import abc +from typing import Dict, List, NamedTuple, Tuple, Union +import numpy as np + + +# The id of an alternative can be a string or an integer. +AlternativeId = Union[str, int] + +# List of alternative ids. +PreferenceList = List[AlternativeId] + + +# Basic type to represent a vote. +# - The weight is an integer representing the number of voters +# - The vote is a list of alternative ids, e.g. ["a", "b", "c"], +# corresponding to a preference a > b > c. +class WeightedVote(NamedTuple): + weight: int + vote: PreferenceList + + +class PreferenceProfile(object): + """Base class for preference profiles. + + IMPORTANT NOTE: see the assumptions below about indexing of alternatives. + """ + _votes: List[WeightedVote] # Tracks cast votes along with their count + _alternatives_dict: Dict[AlternativeId, int] # Maps ID to index + # Identifiers for all possible alternatives + _alternatives_ids: List[AlternativeId] + + def __init__( + self, + votes: Union[List[PreferenceList], List[WeightedVote], None] = None, + alternatives: Union[List[AlternativeId], None] = None, + ): + """Initialize the preference profile. + + Args: + votes: Either (i) a list of lists, each containing ids of alternatives, + e.g. ["a", "b", "c"] signifiying a > b > c, or None for no votes, or + (ii) a list of Vote tuples containing the weight and vote. + alternatives: a list of alternatives ids. + + Note regarding how alternatives are indexed: if the second argument is + passed, then the index of each alternative (e.g. when calling functions + like margin_matrix etc.) will be assigned 0 up to the (number of + alternatives) - 1 in the order of the list. If this argument is omitted, + then alternatives will be indexed depending on when they are first seen + (i.e. via a add_vote method) and so (only) in the latter case the indexing + could depend on the order votes are added. Hence it is advised to pass in + the list of alternatives to this function whenever they are known ahead of + time. + + The alternatives_dict property below will return a dictionary of alternative + IDs to index. + """ + # List of Vote named tuples from above. + self._votes: List[WeightedVote] = [] + # alternative id -> index (used for registering alternatives) + self._alternatives_dict: Dict[AlternativeId, int] = {} + # IDs (labels) of each alternative (usually strings). The alternative's + # index is then the index of this array. + self._alternatives_ids: List[AlternativeId] = [] + + # Register the alternatives and add the votes, if any are provided. + if alternatives is not None: + for alternative in alternatives: + self._register_alternative(alternative) + if votes is not None: + for vote in votes: + self.add_vote(vote) + if self._votes and not self._alternatives_ids: + self._register_alternatives_from_votes() + + def _register_index_based_alternatives(self, num: int): + """Register indices up to num-1 as possible alternatives.""" + for idx in range(num): + self._register_alternative(idx) + + def _register_alternative(self, alternative: AlternativeId): + """Add this alternative to internal records if not already there.""" + idx = self._alternatives_dict.get(alternative) + if idx is None: + self._alternatives_ids.append(alternative) + self._alternatives_dict[alternative] = len(self._alternatives_ids) - 1 + assert (self._alternatives_ids[self._alternatives_dict[alternative]] + == alternative) + + def _register_alternatives_from_votes(self): + for vote in self._votes: + for alternative in vote: + self._register_alternative(alternative) + + def add_vote( + self, vote: Union[PreferenceList, WeightedVote], weight: int = 1 + ): + """Add a vote to this preference profile. + + Args: + vote: Either (i) a list of ids, e.g. ["a", "b", "c"] signifying a > b > c, + or, (ii) a Vote tuple containing both the weight and the vote of the + form in (i). + weight: the count, i.e. how many people have submitted this vote. Only + used when the first argument is a list. + """ + # For now support only integral weights (counts). Makes some things easier, + # like N(x,y) and the margin matrices can be integers. Should be easy to + # extend if we need to. + assert isinstance(weight, int) + assert weight > 0 + if isinstance(vote, WeightedVote): + self._votes.append(vote) + for alternative in vote.vote: + self._register_alternative(alternative) + else: + weighted_vote = WeightedVote(weight, vote) + self._votes.append(weighted_vote) + for alternative in vote: + self._register_alternative(alternative) + + def add_vote_from_values( + self, + values: Union[List[float], List[int]], + tie_tolerance: float = 1e-10, + weight: int = 1, + ): + """Adds a vote from a list of values. + + Note: this list is expected to cover all of the alternatives. + + WARNING: to ensure that ties are broken randomly, small random values are + added to the values (within [0, tie_tolarance]). If the values are smaller + than the tie_tolerance, this can be disabled by setting the tie_tolerance to + 0. + + Does not add the vote if the values are all within tie_tolerance of each + other. For all others, adds a uniform * tie_tolerance to break ties. + + If the alternatives ids are not registered for this profile yet, then this + method uses the indices of these values as the alternative IDs. Otherwise, + the length of the array must be equal to the number of alternatives. + + Args: + values: a list or numpy array of values for the alternative labeled by + the index. + tie_tolerance: a numerical threshold for determining ties. + weight: the weight for the resulting vote. + """ + # Check if any alternatives are registered for this profile. If not, then + # first register ids for them all first. + if not self._alternatives_ids: + self._register_index_based_alternatives(len(values)) + else: + assert len(values) == len(self._alternatives_ids) + vals_copy = np.copy(np.asarray(values)) + max_val = vals_copy.max() + min_val = vals_copy.min() + if (max_val - min_val) < tie_tolerance: + print(f"Warning: not casting vote from values: {vals_copy}") + return + # Add noise for tie_breaking + vals_copy += tie_tolerance * np.random.uniform(size=len(vals_copy)) + vote = np.argsort(-vals_copy) + # The vote is currently based on indices. Now convert to names. + alternatives = self.alternatives + assert alternatives + assert len(alternatives) == len(vote) + named_vote = [] + for idx in vote: + assert 0 <= idx < len(alternatives) + named_vote.append(alternatives[idx]) + self.add_vote(named_vote, weight=weight) + + @property + def votes(self) -> List[WeightedVote]: + """Returns a list of votes.""" + return self._votes + + @property + def alternatives(self) -> List[AlternativeId]: + """Returns a list of alternatives.""" + return self._alternatives_ids + + @property + def alternatives_dict(self) -> Dict[AlternativeId, int]: + """Returns a dict of alternative id -> index for each alternative.""" + return self._alternatives_dict + + def num_alternatives(self) -> int: + return len(self._alternatives_ids) + + def num_votes(self) -> int: + """Returns the number of votes.""" + total = 0 + for vote in self._votes: + total += vote.weight + return total + + def pref_matrix(self) -> np.ndarray: + """Returns the candidate preference matrix for this profile. + + Define N(x,y) as number of voters that prefer x > y. The candidate + preference matrix is one whose entries are N(x,y) for row x and column y. + """ + # First map the alternatives to indices. + m = self.num_alternatives() + mat = np.zeros(shape=(m, m), dtype=np.int32) + for vote in self._votes: + vote_len = len(vote.vote) + for i in range(vote_len): + for j in range(i + 1, vote_len): + # vote.vote[i] > vote.vote[j] + idx_i = self._alternatives_dict[vote.vote[i]] + idx_j = self._alternatives_dict[vote.vote[j]] + mat[idx_i, idx_j] += vote.weight + return mat + + def margin_matrix(self) -> np.ndarray: + """Returns the margin matrix for this profile. + + Define N(x,y) = number of voters that prefer x > y. The margin matrix + is a num_alternatives x num_alternatives whose entry at (r,c) is: + delta(r,c) = N(r, c) - N(c, r). The r and c refer to columns, which + correspond to the indices in the list returned by self.alternatives. + """ + pref_matrix = self.pref_matrix() + return pref_matrix - pref_matrix.T + + def condorcet_winner( + self, strong: bool = True, margin_matrix: Union[np.ndarray, None] = None + ): + """Returns the Condorcet winner(s). + + Args: + strong: whether it's a strong Condorcet winner (see below). + margin_matrix: the margin matrix (optional: only used to to avoid + recomputing). + + Returns: + A list containing the Condorcet winners. There may be multiple weak + Condorcet winners, but there is at most one strong winner. + + A strong Condorcet winner is an alternative a* in A such that for all + a' in A: N(a*, a') > N(a', a*). A weak Condorcet winner is a similar + definition using great-than-or-equal-to >=. + """ + condorcet_winners = [] + if margin_matrix is None: + margin_matrix = self.margin_matrix() + for alt_idx in range(self.num_alternatives()): + if strong and np.all(np.delete(margin_matrix[alt_idx] > 0, alt_idx)): + # Don't count the diagonal 0 in the checking of > 0. + condorcet_winners.append(self._alternatives_ids[alt_idx]) + elif not strong and np.all(margin_matrix[alt_idx] >= 0): + condorcet_winners.append(self._alternatives_ids[alt_idx]) + if strong: + assert len(condorcet_winners) <= 1 + return condorcet_winners + + def group(self): + """Group up the votes. + + This will combine multiple identical votes into the smallest set of unique + weighted votes. + """ + old_votes = self._votes + self._votes = [] + while old_votes: + vote = old_votes[0].vote + total_weight = old_votes[0].weight + del old_votes[0] + i = 0 + while i < len(old_votes): + if old_votes[i].vote == vote: + total_weight += old_votes[i].weight + del old_votes[i] + else: + i += 1 + self._votes.append(WeightedVote(total_weight, vote)) + + def ungroup(self): + """Splits the votes into individual votes (each with weight of 1).""" + old_votes = self._votes + self._votes = [] + for vote in old_votes: + for _ in range(vote.weight): + self._votes.append(WeightedVote(1, vote.vote)) + + def __str__(self) -> str: + """Get a string representation of this profile.""" + string = "" + for vote in self._votes: + string += str(vote) + "\n" + return string + + def total_weight(self) -> int: + w = 0 + for vote in self._votes: + w += vote.weight + return w + + def get_weight(self, vote: PreferenceList) -> int: + total_weight = 0 + for v in self._votes: + if v.vote == vote: + total_weight += v.weight + return total_weight + + def set_weight(self, index: int, value: int): + self._votes[index] = self._votes[index]._replace(weight=value) + + def set_all_weights(self, value: int): + """Sets the weight of all the votes to the specified value.""" + for i in range(len(self._votes)): + self.set_weight(i, value) + + def to_list_of_tuples( + self, convert_alternatives_to_strings: bool = False + ) -> List[Tuple[float, PreferenceList]]: + """Returns a list of (alternative, score) tuples.""" + list_of_tuples = [] + for vote in self._votes: + vote_lst = ( + [str(a) for a in vote.vote] + if convert_alternatives_to_strings + else vote.vote + ) + list_of_tuples.append((vote.weight, vote_lst)) + return list_of_tuples + + +class RankOutcome(object): + """Basic object for outcomes of the voting methods.""" + + def __init__(self, rankings=None, scores=None): + self._rankings: List[AlternativeId] = rankings + self._scores: List[float] = scores + self._rank_dict: Dict[AlternativeId, int] = None + if self._rankings is not None: + self.make_rank_dict() + + def unpack_from( + self, ranked_alternatives_and_scores: List[Tuple[AlternativeId, float]] + ): + """A rank outcome that comes packed as (alternative id, score) tuples.""" + self._rankings, self._scores = zip(*ranked_alternatives_and_scores) + self._rankings = list(self._rankings) + self._scores = list(self._scores) + self.make_rank_dict() + + @property + def ranking(self) -> List[AlternativeId]: + """Returns an ordered list W of alternatives' ids (winner is first).""" + return self._rankings + + @property + def scores(self) -> List[float]: + """Returns a alternative's scores S (in the same order as the ranking).""" + return self._scores + + def ranking_with_scores(self) -> Tuple[List[AlternativeId], List[float]]: + """Returns an ordered list of alternative ids and dict of scores W, S.""" + return self._rankings, self._scores + + def make_rank_dict(self): + """Makes the rank dictionary from the rankings and scores.""" + self._rank_dict = {} + for r, alt in enumerate(self._rankings): + self._rank_dict[alt] = r + + def get_rank(self, alternative: AlternativeId) -> int: + """Returns the rank of a specific alternative.""" + return self._rank_dict[alternative] + + def get_score(self, alternative: AlternativeId) -> float: + """Returns the score of a specific alternative.""" + return self._scores[self.get_index(alternative)] + + def get_index(self, alternative: AlternativeId) -> int: + """Returns the index of a specific alternative.""" + return self._rankings.index(alternative) + + def __str__(self) -> str: + str_rep = "Rank: " + str(self._rankings) + "\n" + if self._scores is not None: + str_rep += "Scores: " + str(self._scores) + return str_rep + + def pretty_table_string(self, top: Union[int, None] = None): + """Return an easier-to-read table for the rankings and scores. + + Args: + top: (optional) if specified, only returns the top `top` alternatives. + + Returns: + An easier-to-read table string. + """ + if top is None: + top = len(self._rankings) + max_len = -1 + for i, alt in enumerate(self._rankings): + if i == top: + break + max_len = max(max_len, len(str(alt))) + table_string = "" + max_len += 1 + for i, alt in enumerate(self._rankings): + if i == top: + break + score = self._scores[i] + prefix = f" Rank {i+1}: " + while len(prefix) < 14: + prefix += " " + prefix += str(alt) + while len(prefix) < (14 + max_len): + prefix += " " + table_string += f"{prefix} ({score})\n" + return table_string + + def pretty_latex_table( + self, header: Union[str, None] = None, top: Union[int, None] = None + ): + """Return an easier-to-read table string for the rankings and scores. + + The string returned include LaTeX formatting for putting the tables into + papers. + + Args: + header: (optional) if specified, uses this as the header of the table. + top: (optional) if specified, only returns the top `top` alternatives. + + Returns: + An easier-to-read table string (with LaTeX formattinf) + """ + + if top is None: + top = len(self._rankings) + table_string = "\\begin{center}\n\\begin{tabular}{|c|ll|}\n" + if header is not None: + table_string += "\\multicolumn{3}{c}{\\bf " + header + "}\\\\\n\\hline\n" + table_string += "Rank & Agent & Score\\\\\n\\hline\n" + for i, alt in enumerate(self._rankings): + if i == top: + break + score = self._scores[i] + # table_string += f"{i+1} & \\textsc" + "{" + table_string += f"{i+1} & " + "{\\tt " + table_string += f"{alt}" + "} & " + f"{score}\\\\\n" + table_string += "\\hline\n" + table_string += "\\end{tabular}\n\\end{center}" + return table_string + + +class AbstractVotingMethod(metaclass=abc.ABCMeta): + """Abstract base class for voting methods.""" + + @abc.abstractmethod + def __init__(self, **method_specific_kwargs): + """Initializes the voting method. + + Args: + **method_specific_kwargs: optional extra args. + """ + + @abc.abstractmethod + def name(self) -> str: + """Returns the name of the voting method.""" + + @abc.abstractmethod + def run_election(self, profile: PreferenceProfile) -> RankOutcome: + """Runs the election and returns the result. + + Args: + profile: a preference profile. + + Returns: + a RankOutcome object that can be queried for the results. + """ + + def is_valid_profile(self, profile: PreferenceProfile) -> bool: + """Returns true if a profile is valid. + + A valid profile is valid if it contains at least one vote and one + alternative. Most voting schemes can't run unless the profile is valid. + + Args: + profile: the profile to check. + """ + return profile.num_votes() > 0 and profile.num_alternatives() > 0 diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/base_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/base_test.py new file mode 100644 index 0000000..c6005a4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/base_test.py @@ -0,0 +1,167 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.voting.base.""" + +from absl.testing import absltest + +import numpy as np + +from open_spiel.python.voting import base + + +class BaseTest(absltest.TestCase): + + def test_basic_preference_profile(self): + # Create a preference profile from preferences: + # + # a > b > c > d + # b > d > a > c + # a > c > d > b + # d > b > c > a + # + # Each has a weight of 1 by default. E.g. each corresponds to one voter. + votes = [ + ["a", "b", "c", "d"], + ["b", "d", "a", "c"], + ["a", "c", "d", "b"], + ["d", "b", "c", "a"] + ] + profile = base.PreferenceProfile(votes=votes) + self.assertLen(profile.votes, 4) + self.assertEqual(profile.total_weight(), 4) + + def test_basic_preference_profile_weighted(self): + # Create a weighted preference profile from preferences: + # + # 1: a > b > c + # 2: a > c > b + # 3: b > a > c + # + # Each vote has a weight of 1, 2, and 3 respectively. + votes = [ + base.WeightedVote(1, ["a", "b", "c"]), + base.WeightedVote(2, ["a", "c", "b"]), + base.WeightedVote(3, ["b", "a", "c"]) + ] + profile = base.PreferenceProfile(votes=votes) + self.assertLen(profile.votes, 3) + self.assertEqual(profile.total_weight(), 6) + + def test_preference_profile_incremental_group(self): + # Create a weighted preference profile from preferences: + # + # 1: a > b > c + # 2: a > c > b + # 3: b > a > c + # + # by incrementally adding individual groups and then grouping them. + profile = base.PreferenceProfile() + for _ in range(1): + profile.add_vote(["a", "b", "c"]) + for _ in range(2): + profile.add_vote(["a", "c", "b"]) + for _ in range(3): + profile.add_vote(["b", "a", "c"]) + + # Assure there are 6 votes, each with weight 1. + with self.subTest("All votes added correctly"): + self.assertLen(profile.votes, 6) + self.assertEqual(profile.total_weight(), 6) + with self.subTest("Vote weight defaults to 1"): + for vote in profile.votes: + self.assertEqual(vote.weight, 1) + + # Group up the votes. Check that there are 3 but with total weight + # unchanged (6). + profile.group() + with self.subTest("Grouping votes reduced to correct number"): + self.assertLen(profile.votes, 3) + with self.subTest("Grouping votes did not change total weight"): + self.assertEqual(profile.total_weight(), 6) + with self.subTest("Grouping votes computed weights correctly"): + self.assertEqual(profile.get_weight(["a", "b", "c"]), 1) + self.assertEqual(profile.get_weight(["a", "c", "b"]), 2) + self.assertEqual(profile.get_weight(["b", "a", "c"]), 3) + + def test_pref_margin_matrices_strong_condorcet(self): + votes = [ + base.WeightedVote(1, ["a", "b", "c"]), + base.WeightedVote(1, ["a", "c", "b"]), + base.WeightedVote(2, ["c", "a", "b"]), + base.WeightedVote(1, ["b", "c", "a"]), + ] + profile = base.PreferenceProfile(votes=votes) + + pref_matrix = profile.pref_matrix() + expected_pref_matrix = np.array( + [[0, 4, 2], + [1, 0, 2], + [3, 3, 0]] + ) + with self.subTest("Preference matrix calculated correctly."): + self.assertTrue(np.array_equal(pref_matrix, expected_pref_matrix)) + + margin_matrix = profile.margin_matrix() + expected_margin_matrix = np.array( + [[0, 3, -1], + [-3, 0, -1], + [1, 1, 0]] # <-- all positive, except diagonal: + ) # "c" is a strong Condorcet winner. + with self.subTest("Expected margin matrix calculated correctly."): + self.assertTrue(np.array_equal(margin_matrix, expected_margin_matrix)) + + # Check that there is exactly one strong Condorcet winner. + condorcet_winners = profile.condorcet_winner(strong=True, + margin_matrix=margin_matrix) + with self.subTest("Exactly one strong Condorcet winner found."): + self.assertListEqual(condorcet_winners, ["c"]) + + # A strong Condorcet winner is also a weak Condorcet winner, by definition. + condorcet_winners = profile.condorcet_winner(strong=False, + margin_matrix=margin_matrix) + with self.subTest("A strong Cond. winner is also a weak Cond. winner."): + self.assertListEqual(condorcet_winners, ["c"]) + + def test_weak_condorcet(self): + votes = [ + base.WeightedVote(1, ["a", "b", "c"]), + base.WeightedVote(1, ["a", "c", "b"]), + base.WeightedVote(1, ["c", "a", "b"]), + base.WeightedVote(1, ["b", "c", "a"]), + ] + profile = base.PreferenceProfile(votes=votes) + + # Leads to margin matrix: + # [[ 0 2 0] + # [-2 0 0] + # [ 0 0 0]] + # ==> no strong Condorcet winners, and two weak Condorcet winners + margin_matrix = profile.margin_matrix() + + strong_condorcet_winners = profile.condorcet_winner( + strong=True, margin_matrix=margin_matrix) + with self.subTest("No strong Condorect winner found."): + self.assertListEqual(strong_condorcet_winners, []) + + # A strong Condorcet winner is also a weak Condorcet winner, by definition. + weak_condorcet_winners = profile.condorcet_winner( + strong=False, margin_matrix=margin_matrix) + self.assertLen(weak_condorcet_winners, 2) + with self.subTest("Found all weak Condorcet winners."): + self.assertCountEqual(["a", "c"], weak_condorcet_winners) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/borda.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/borda.py new file mode 100644 index 0000000..2fe5102 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/borda.py @@ -0,0 +1,48 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Implements Borda's method. + +Based on: https://en.wikipedia.org/wiki/Borda_count. +""" + +from open_spiel.python.voting import base + + +class BordaVoting(base.AbstractVotingMethod): + """Implements Borda's method of voting.""" + + def __init__(self): + pass + + def name(self) -> str: + return "borda" + + def run_election(self, profile: base.PreferenceProfile) -> base.RankOutcome: + assert self.is_valid_profile(profile) + scores = {} + for alternative in profile.alternatives: + scores[alternative] = 0 + for vote in profile.votes: + # Do we need a check here for the length of the vote? + points = len(vote.vote) - 1 + for alternative in vote.vote: + scores[alternative] += (points * vote.weight) + points -= 1 + assert points == -1 + sorted_scores = sorted(scores.items(), key=lambda item: item[1], + reverse=True) + outcome = base.RankOutcome() + outcome.unpack_from(sorted_scores) + return outcome diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/borda_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/borda_test.py new file mode 100644 index 0000000..16b0b61 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/borda_test.py @@ -0,0 +1,54 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.voting.borda.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python.voting import base +from open_spiel.python.voting import borda + + +class BordaVotingTest(parameterized.TestCase): + + def test_borda_setup(self): + method = borda.BordaVoting() + self.assertEqual(method.name(), "borda") + + @parameterized.named_parameters( + dict(testcase_name="uniform votes", + votes=[["a", "b", "c"], ["a", "c", "b"], ["b", "a", "c"]], + ranking=["a", "b", "c"], + scores=[5, 3, 1]), + dict(testcase_name="weighted votes", + votes=[ + base.WeightedVote(1, ["a", "b", "c"]), + base.WeightedVote(2, ["a", "c", "b"]), + base.WeightedVote(3, ["b", "a", "c"]) + ], + ranking=["a", "b", "c"], + scores=[9, 7, 2])) + def test_borda_basic_run(self, votes, ranking, scores): + profile = base.PreferenceProfile(votes=votes) + method = borda.BordaVoting() + outcome = method.run_election(profile) + with self.subTest("ranking correct"): + self.assertListEqual(outcome.ranking, ranking) + with self.subTest("scores correct"): + self.assertListEqual(outcome.scores, scores) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/copeland.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/copeland.py new file mode 100644 index 0000000..d1b3cf2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/copeland.py @@ -0,0 +1,47 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Copeland's method. + +Based on https://en.wikipedia.org/wiki/Copeland%27s_method. +""" + +from open_spiel.python.voting import base + + +class CopelandVoting(base.AbstractVotingMethod): + """Implements Copeland's method.""" + + def __init__(self): + pass + + def name(self) -> str: + return "copeland" + + def run_election(self, profile: base.PreferenceProfile) -> base.RankOutcome: + assert self.is_valid_profile(profile) + copeland_scores = {} + alternatives = profile.alternatives + m = len(alternatives) + margin_matrix = profile.margin_matrix() + for r in range(m): + alternative = alternatives[r] + num_majority = (margin_matrix[r] > 0).sum() + # Subtract one because we don't include the diagonal. + num_ties = (margin_matrix[r] == 0).sum() - 1 + copeland_scores[alternative] = num_majority + 0.5 * num_ties + sorted_scores = sorted(copeland_scores.items(), key=lambda item: item[1], + reverse=True) + outcome = base.RankOutcome() + outcome.unpack_from(sorted_scores) + return outcome diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/copeland_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/copeland_test.py new file mode 100644 index 0000000..c48cc65 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/copeland_test.py @@ -0,0 +1,51 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.voting.plurality.""" + +from absl.testing import absltest + +from open_spiel.python.voting import base +from open_spiel.python.voting import copeland + + +class CopelandVotingTest(absltest.TestCase): + def test_copeland_construction(self): + method = copeland.CopelandVoting() + self.assertEqual(method.name(), "copeland") + + def test_copeland_basic_run(self): + votes = [["a", "b", "c"], ["a", "c", "b"], ["b", "a", "c"]] + profile = base.PreferenceProfile(votes=votes) + method = copeland.CopelandVoting() + outcome = method.run_election(profile) + self.assertListEqual(outcome.ranking, ["a", "b", "c"]) + self.assertListEqual(outcome.scores, [2.0, 1.0, 0.0]) + + def test_copeland_basic_run2(self): + votes = [ + base.WeightedVote(1, ["a", "b", "c"]), + base.WeightedVote(2, ["a", "c", "b"]), + base.WeightedVote(3, ["b", "a", "c"]), + ] + profile = base.PreferenceProfile(votes=votes) + method = copeland.CopelandVoting() + outcome = method.run_election(profile) + self.assertTrue(outcome.ranking == ["a", "b", "c"] or + outcome.ranking == ["b", "a", "c"]) + self.assertListEqual(outcome.scores, [1.5, 1.5, 0.0]) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/__init__.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/__init__.py new file mode 100644 index 0000000..df17722 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari.py new file mode 100644 index 0000000..16adb25 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari.py @@ -0,0 +1,152 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Run some analyses on some Atari data sets.""" + +# pylint: disable=unused-import + +import sys +import time +from absl import app +from absl import flags +import numpy as np + +import pyspiel +from open_spiel.python.voting import approval +from open_spiel.python.voting import base +from open_spiel.python.voting import borda +from open_spiel.python.voting import copeland +from open_spiel.python.voting import kemeny_young +from open_spiel.python.voting import maximal_lotteries +from open_spiel.python.voting import plurality +from open_spiel.python.voting import ranked_pairs +from open_spiel.python.voting import schulze +from open_spiel.python.voting import soft_condorcet_optimization as sco +from open_spiel.python.voting import stv +from open_spiel.python.voting.examples import atari_datasets + +_DATASET_PATH_PREFIX = flags.DEFINE_string( + "dataset_path_prefix", default=".", help="Where to find the dataset files") + + +def main(_): + print("Loading dataset(s)...") + dataset_filename = (_DATASET_PATH_PREFIX.value + "/" + + atari_datasets.RAINBOW_TABLE5) + dataset = atari_datasets.parse_atari_table(dataset_filename) + + # If you load others, you can merge some columns from them like this: + # dataset.add_column(dataset_ag57.get_column("random"), "random") + # dataset.add_column(dataset_ag57.get_column("human"), "human") + + print(dataset.agent_names) + print(dataset.game_names) + print(f"Num agents: {len(dataset.agent_names)}") + print(f"Num games: {len(dataset.game_names)}") + + # Alts for rainbow table 5: + # dqn a3c ddqn prior-ddqn dueling-ddqn distrib-dqn noisy-dqn rainbow + + game_names = [] + profile = base.PreferenceProfile(alternatives=dataset.agent_names) + for game_name, scores in dataset.table_data.items(): + profile.add_vote_from_values(scores) + game_names.append(game_name) + + # Group up the profile and then print it to show that every vote is unique. + profile.group() + print(profile) + + print("Margin matrix:") + margin_matrix = profile.margin_matrix() + print(margin_matrix) + print( + "Weak Condorcet winners? " + + f"{profile.condorcet_winner(False, margin_matrix)}" + ) + print( + "Strong Condorcet winner? " + + f"{profile.condorcet_winner(True, margin_matrix)}" + ) + + voting_methods = [ + approval.ApprovalVoting(k=3), + borda.BordaVoting(), + copeland.CopelandVoting(), + kemeny_young.KemenyYoungVoting(), + maximal_lotteries.MaximalLotteriesVoting(iterative=True), + plurality.PluralityVoting(), + ranked_pairs.RankedPairsVoting(), + schulze.SchulzeVoting(), + stv.STVVoting(num_winners=3), + ] + for method in voting_methods: + print("") + print(method.name()) + outcome = method.run_election(profile) + print(outcome.pretty_table_string()) + + print("Soft Condorcet Optimization (Python):") + py_sco_solver = sco.SoftCondorcetOptimizer( + profile, + batch_size=4, + rating_lower_bound=-100.0, + rating_upper_bound=100.0, + temperature=1, + ) + start_time = time.time() + ratings, ranking = py_sco_solver.run_solver(10000, learning_rate=0.01) + end_time = time.time() + print(f"Time taken: {end_time - start_time}") + alt_idx = profile.alternatives_dict + for alt in ranking: + print(f" {alt}: {ratings[alt_idx[alt]]}") + + print("Soft Condorcet Optimization Sigmoid (C++):") + cpp_sco_solver = pyspiel.sco.SoftCondorcetOptimizer( + profile.to_list_of_tuples(), + rating_lower_bound=-100.0, + rating_upper_bound=100.0, + batch_size=4, + temperature=1, + rng_seed=0, + ) + start_time = time.time() + cpp_sco_solver.run_solver(10000, learning_rate=0.01) + end_time = time.time() + print(f"Time taken: {end_time - start_time}") + ratings_dict = cpp_sco_solver.ratings() + for alt in ranking: + print(f" {alt}: {ratings_dict[alt]}") + + print("Soft Condorcet Optimization Fenchel-Young (C++):") + cpp_fy_solver = pyspiel.sco.FenchelYoungOptimizer( + profile.to_list_of_tuples(), + rating_lower_bound=-100.0, + rating_upper_bound=100.0, + batch_size=4, + temperature=1, + rng_seed=0, + ) + start_time = time.time() + cpp_fy_solver.run_solver(10000, learning_rate=0.01) + end_time = time.time() + print(f"Time taken: {end_time - start_time}") + ratings_dict = cpp_fy_solver.ratings() + for alt in ranking: + print(f" {alt}: {ratings_dict[alt]}") + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari_agent57_table.txt b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari_agent57_table.txt new file mode 100644 index 0000000..78d320b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari_agent57_table.txt @@ -0,0 +1,59 @@ +# https://arxiv.org/pdf/2003.13350.pdf, Section H.4 +# game human random agent57 r2d2(bandit) muzero ngu r2d2(retrace) r2d2 +alien 7127.70 227.80 297638.17±37054.55 464232.43±7988.66 741812.63 312024.15±91963.92 228483.74±111660.11 399709.08±106191.42 +amidar 1719.50 5.80 29660.08±880.39 31331.37±817.79 28634.39 18369.47±2141.76 28777.05±803.90 30338.91±1087.62 +assault 742.00 222.40 67212.67±6150.59 110100.04±346.06 143972.03 42829.17±7452.17 46003.71±8996.65 124931.33±2627.16 +asterix 8503.30 210.00 991384.42±9493.32 999354.03±12.94 998425.00 996141.15±3993.26 998867.54±191.35 999403.53±76.75 +asteroids 47388.70 719.10 150854.61±16116.72 431072.45±1799.13 6785558.64 248951.23±7561.86 345910.03±13189.10 394765.73±16944.82 +atlantis 29028.10 12850.00 1528841.76±28282.53 1660721.85±14643.83 1674767.20 1659575.47±4140.68 1659411.83±9934.57 1644680.76±5784.97 +bank_heist 753.10 14.20 23071.50±15834.73 27117.85±963.12 1278.98 20012.54±20377.89 16726.07±10992.11 38536.66±11645.73 +battle_zone 37187.50 2360.00 934134.88±38916.03 992600.31±1096.19 848623.00 813965.40±94503.50 845666.67±51527.68 956179.17±31019.66 +beam_rider 16926.50 363.90 300509.80±13075.35 390603.06±23304.09 4549993.53 75889.70±18226.52 123281.81±4566.16 246078.69±3667.61 +berzerk 2630.40 123.70 61507.83±26539.54 77725.62±4556.93 85932.60 45601.93±5170.98 73475.91±8107.24 64852.56±17875.17 +bowling 160.70 23.10 251.18±13.22 161.77±99.84 260.13 215.38±13.27 257.88±4.84 229.39±24.57 +boxing 12.10 0.10 100.00±0.00 100.00±0.00 100.00 99.71±0.25 100.00±0.00 99.27±0.35 +breakout 30.50 1.70 790.40±60.05 863.92±0.08 864.00 625.86±42.66 859.60±2.04 863.25±0.34 +centipede 12017.00 2090.90 412847.86±26087.14 908137.24±7330.99 1159049.27 596427.16±7149.84 737655.85±25568.85 693733.73±74495.81 +chopper_command 7387.80 811.00 999900.00±0.00 999900.00±0.00 991039.70 999900.00±0.00 999900.00±0.00 999900.00±0.00 +crazy_climber 35829.40 10780.50 565909.85±89183.85 729482.83±87975.74 458315.40 351390.64±62150.96 322741.20±23024.88 549054.89±39413.08 +defender 18688.90 2874.50 677642.78±16858.59 730714.53±715.54 839642.95 684414.06±3876.41 681291.73±3469.95 692114.71±4864.99 +demon_attack 1971.00 152.10 143161.44±220.32 143913.32±92.93 143964.26 143695.73±154.88 143899.22±53.78 143830.91±107.18 +double_dunk -16.40 -18.60 23.93±0.06 24.00±0.00 23.94 -12.63±5.29 24.00±0.00 23.97±0.03 +enduro 860.50 0.00 2367.71±8.69 2378.66±3.66 2382.44 2095.40±80.81 2372.77±3.50 2380.22±5.47 +fishing_derby -38.70 -91.70 86.97±3.25 90.34±2.66 91.16 34.62±4.91 87.83±2.78 87.81±1.28 +freeway 29.60 0.00 32.59±0.71 34.00±0.00 33.03 28.71±2.07 33.48±0.16 32.90±0.11 +frostbite 4334.70 65.20 541280.88±17485.76 309077.30±274879.03 631378.53 284044.19±227850.49 12290.11±7936.49 446703.01±63780.51 +gopher 2412.50 257.60 117777.08±3108.06 129736.13±653.03 130345.58 119110.87±463.03 119803.94±3197.88 126241.97±519.70 +gravitar 3351.40 173.00 19213.96±348.25 21068.03±497.25 6682.70 14771.91±843.17 14194.45±1250.63 17352.78±2675.27 +hero 30826.40 1027.00 114736.26±49116.60 49339.62±4617.76 49244.11 71592.84±12109.10 54967.97±5411.73 39786.01±7638.19 +ice_hockey 0.90 -11.20 63.64±6.48 86.59±0.59 67.04 -3.15±0.47 86.56±1.21 86.89±0.88 +jamesbond 302.80 29.00 135784.96±9132.28 158142.36±904.45 41063.25 28725.27±2902.52 32926.31±3073.94 28988.32±263.79 +kangaroo 3035.00 52.00 24034.16±12565.88 18284.99±817.25 16763.60 37392.82±6170.95 15185.87±931.58 14492.75±5.29 +krull 2665.50 1598.00 251997.31±20274.39 245315.44±48249.07 269358.27 150896.04±33729.56 149221.98±17583.30 291043.06±10051.59 +kung_fu_master 22736.30 258.50 206845.82±11112.10 267766.63±2895.73 204824.00 215938.95±22050.67 228228.90±5316.74 252876.65±10424.57 +montezuma_revenge 4753.30 0.00 9352.01±2939.78 3000.00±0.00 0.00 19093.74±12627.66 2300.00±668.33 2666.67±235.70 +ms_pacman 6951.60 307.30 63994.44±6652.16 62595.90±1755.82 243401.10 48695.12±1599.94 45011.73±1822.30 50337.02±4004.55 +name_this_game 8049.00 2292.30 54386.77±6148.50 138030.67±5279.91 157177.85 25608.90±1943.41 74104.70±9053.70 74501.48±11562.26 +phoenix 7242.60 761.40 908264.15±28978.92 990638.12±6278.77 955137.84 966685.41±6127.24 937874.90±22525.79 876045.70±25511.04 +pitfall 6463.70 -229.40 18756.01±9783.91 0.00±0.00 0.00 15334.30±15106.90 -0.45±0.50 0.00±0.00 +pong 14.60 -20.70 20.67±0.47 21.00±0.00 21.00 19.85±0.31 20.95±0.01 21.00±0.00 +private_eye 69571.30 24.90 79716.46±29515.48 40700.00±0.00 15299.98 100314.44±291.22 34601.01±5266.39 18765.05±16672.27 +qbert 13455.00 163.90 580328.14±151251.66 777071.30±190653.94 72276.00 479024.20±98094.39 434753.72±99793.58 771069.21±152722.56 +riverraid 17118.00 1338.50 63318.67±5659.55 93569.66±13308.08 323417.18 40770.82±748.42 43174.10±2335.12 54280.32±1245.60 +road_runner 7845.00 11.50 243025.80±79555.98 593186.78±88650.69 613411.80 151326.54±77209.43 116149.17±18257.21 613659.42±397.72 +robotank 11.90 2.20 127.32±12.50 144.00±0.00 131.13 11.62±0.67 143.59±0.29 130.72±9.75 +seaquest 42054.70 68.40 999997.63±1.42 999999.00±0.00 999976.52 999999.00±0.00 999999.00±0.00 999999.00±0.00 +skiing -4336.90 -17098.10 -4202.60±607.85 -3851.44±517.52 -29968.36 -24271.33±6936.26 -14576.05±875.96 -17797.59±866.55 +solaris 12326.70 1236.30 44199.93±8055.50 67306.29±10378.22 56.62 7254.03±3653.55 6566.03±2209.91 11247.88±1999.22 +space_invaders 1668.70 148.00 48680.86±5894.01 67898.71±1744.74 74335.30 48087.13±11219.39 36069.75±23408.12 67229.37±2316.31 +star_gunner 10250.00 664.00 839573.53±67132.17 998600.28±218.66 549271.70 450096.08±158979.59 420337.48±8309.08 923739.89±69234.32 +surround 6.50 -10.00 9.50±0.19 10.00±0.00 9.99 -9.32±0.67 9.96±0.01 10.00±0.00 +tennis -8.30 -23.80 23.84±0.10 24.00±0.00 0.00 11.06±6.10 24.00±0.00 7.93±11.36 +time_pilot 5229.20 3568.00 405425.31±17044.45 460596.49±3139.33 476763.90 368520.34±70829.26 452966.67±5300.62 454055.63±2205.07 +tutankham 167.60 11.40 2354.91±3421.43 483.78±37.90 491.48 197.90±7.47 466.59±38.40 413.80±3.89 +up_n_down 11693.20 533.40 623805.73±23493.75 702700.36±8937.59 715545.61 630463.10±31175.20 679303.61±4852.85 599134.12±3394.48 +venture 1187.50 0.00 2623.71±442.13 2258.93±29.90 0.40 1747.32±101.40 2013.31±11.24 2047.51±20.83 +video_pinball 17667.90 0.00 992340.74±12867.87 999645.92±57.93 981791.88 973898.32±20593.14 964670.12±4015.52 999697.05±53.37 +wizard_of_wor 4756.50 563.50 157306.41±16000.00 183090.81±6070.10 197126.00 121791.35±27909.14 134017.82±11871.88 179376.15±6659.14 +yars_revenge 54576.90 3092.90 998532.37±375.82 999807.02±54.85 553311.46 997642.09±455.73 998474.20±589.50 999748.54±46.19 +zaxxon 9173.30 32.50 249808.90±58261.59 370649.03±19761.32 725853.90 129330.99±56872.31 114990.68±56726.18 366028.59±49366.03 diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari_datasets.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari_datasets.py new file mode 100644 index 0000000..1ab77da --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari_datasets.py @@ -0,0 +1,151 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Helper functions for loading Atari data.""" + +import logging +from typing import Dict, List +import numpy as np + + +RAINBOW_TABLE5 = "atari_rainbow_table5.txt" +RAINBOW_TABLE6 = "atari_rainbow_table6.txt" +AGENT57_TABLE = "atari_agent57_table.txt" +MUESLI_TABLE11 = "atari_muesli_table11.txt" + + +class DataSet: + """A DataSet container for Atari tables.""" + + def __init__( + self, + agent_names: List[str], + game_names: List[str], + table_data: Dict[str, List[float]], + ): + self.agent_names = agent_names + self.game_names = game_names + self.table_data = table_data + + def get_column(self, agent_name: str) -> Dict[str, float]: + column_dict = {} + agent_idx = self.agent_names.index(agent_name) + assert 0 <= agent_idx < len(self.agent_names) + for game_name, scores in self.table_data.items(): + column_dict[game_name] = scores[agent_idx] + return column_dict + + def delete_column(self, agent_name: str): + agent_idx = self.agent_names.index(agent_name) + assert 0 <= agent_idx < len(self.agent_names) + del self.agent_names[agent_idx] + for game_name in self.game_names: + del self.table_data[game_name][agent_idx] + + def delete_game(self, game_name: str): + assert game_name in self.game_names + self.game_names.remove(game_name) + del self.table_data[game_name] + + def add_column(self, column, agent_name): + """Add a column. + + Args: + column: a dictionary of game_name -> score, + agent_name: name for the new agent. + + Note: beware! This can delete rows within this data set, in order to keep + data complete, i.e. it deletes rows if you don't have this agent's score for + that game. + """ + self.agent_names.append(agent_name) + game_names_copy = self.game_names[:] + for game_name in game_names_copy: + if game_name not in column: + logging.warning("Warning: deleting game {%s}", game_name) + self.delete_game(game_name) + else: + self.table_data[game_name].append(column[game_name]) + + def to_task_by_agent_matrix(self) -> np.ndarray: + num_tasks = len(self.game_names) + num_agents = len(self.agent_names) + mat = np.zeros(shape=(num_tasks, num_agents)) + i = 0 + for game_name in self.game_names: + mat[i] = np.asarray(self.table_data[game_name]) + i += 1 + return mat + + +def parse_value(val_str: str) -> float: + """Parse a numerical value from string, dropping ± part.""" + val_str = val_str.replace(",", "") + val_str = val_str.split("±")[0] + return float(val_str) + + +def parse_values(string_values_list: List[str]) -> List[float]: + """Turn a list of strings into a list of floats.""" + return [parse_value(val) for val in string_values_list] + + +def delete_agent(dataset: DataSet, agent: str): + idx = dataset.agent_names.index(agent) + assert 0 <= idx < len(dataset.agent_names) + del dataset.agent_names[idx] + for key in dataset.table_data.keys(): + del dataset.table_data[key][idx] + + +def make_subset(dataset: DataSet, agent_subset: List[str]): + for agent in dataset.agent_names: + if agent not in agent_subset: + delete_agent(dataset, agent) + + +def parse_atari_table(filename: str) -> DataSet: + """Parse an Atari data file. + + The files are created by copy/paste from the papers. + + Args: + filename: the file that contains the dataset. + + Returns: + a DataSet object referring to the Atari data. + """ + with open(filename, "r") as f: + string_data = f.read() + + # First line is a comment + # Second line format is column descriptions, e.g.: + # "# game ..." + # Rest of the lines are copy/paste from the paper tables. + lines = string_data.split("\n") + assert lines[1].startswith("# game ") + agent_names = lines[1].split()[2:] + num_agents = len(agent_names) + game_names = [] + table_data = {} + for i in range(2, len(lines)): + if lines[i].strip(): + parts = lines[i].split() + game_name = parts[0] + game_names.append(game_name) + str_scores = parts[1:] + assert len(str_scores) == num_agents, f"Error line: {lines[i]}" + scores = parse_values(str_scores) + table_data[game_name] = scores + return DataSet(agent_names, game_names, table_data) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari_muesli_table11.txt b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari_muesli_table11.txt new file mode 100644 index 0000000..7bad69f --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari_muesli_table11.txt @@ -0,0 +1,59 @@ +# https://arxiv.org/pdf/2104.06159.pdf table 11 +# game random human muzero muesli +alien 228 7128 135541±65349 139409±12178 +amidar 6 1720 1061±136 21653±2019 +assault 222 742 29697±3595 36963±533 +asterix 210 8503 918628±56222 316210±48368 +asteroids 719 47389 509953±33541 484609±5047 +atlantis 12850 29028 1136009±1466 1363427±81093 +bank_heist 14 753 14176±13044 1213±0 +battle_zone 2360 37188 320641±141924 414107±13422 +beam_rider 364 16927 319684±13394 288870±137 +berzerk 124 2630 19523±16817 44478±36140 +bowling 23 161 156±25 191±37 +boxing 0 12 100±0 99±1 +breakout 2 30 778±20 791±10 +centipede 2091 12017 862737±11564 869751±16547 +chopper_command 811 7388 494578±488588 101289±24339 +crazy_climber 10780 35829 176172±17630 175322±3408 +defender 2874 18689 544320±12881 629482±39646 +demon_attack 152 1971 143846±8 129544±11792 +double_dunk -19 -16 24±0 -3±2 +enduro 0 861 2363±2 2362±1 +fishing_derby -92 -39 69±5 51±0 +freeway 0 30 34±0 33±0 +frostbite 65 4335 410173±35403 301694±275298 +gopher 258 2412 121342±1540 104441±424 +gravitar 173 3351 10926±2919 11660±481 +hero 1027 30826 37249±15 37161±114 +ice_hockey -11 1 40±2 25±13 +jamesbond 29 303 32107±3480 19319±3673 +kangaroo 52 3035 13928±90 14096±421 +krull 1598 2666 50137±22433 34221±1385 +kung_fu_master 258 22736 148533±31806 134689±9557 +montezuma_revenge 0 4753 1450±1050 2359±309 +ms_pacman 307 6952 79319±8659 65278±1589 +name_this_game 2292 8049 108133±6935 105043±732 +phoenix 761 7243 748424±67304 805305±26719 +pitfall -229 6464 0±0 0±0 +pong -21 15 21±0 20±1 +private_eye 25 69571 7600±7500 10323±4735 +qbert 164 13455 85926±8980 157353±6593 +riverraid 1338 17118 172266±592 47323±1079 +road_runner 12 7845 554956±23859 327025±45241 +robotank 2 12 85±15 59±2 +seaquest 68 42055 501236±498423 815970±128885 +skiing -17098 -4337 -30000±0 -18407±1171 +solaris 1236 12327 4401±732 3031±491 +space_invaders 148 1669 31265±27619 59602±2759 +star_gunner 664 10250 158608±4060 214383±23087 +surround -10 7 10±0 9±0 +tennis -24 -8 -0±0 12±12 +time_pilot 3568 5229 413988±10023 359105±21396 +tutankham 11 168 318±30 252±47 +up_n_down 533 11693 606602±28296 549190±70789 +venture 0 1188 866±866 2104±291 +video_pinball 0 17668 921563±56020 685436±155718 +wizard_of_wor 564 4757 103463±3366 93291±5 +yars_revenge 3093 54577 187731±32107 557818±1895 +zaxxon 32 9173 106935±45495 65325±395 diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari_rainbow_table5.txt b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari_rainbow_table5.txt new file mode 100644 index 0000000..e47ee5e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari_rainbow_table5.txt @@ -0,0 +1,56 @@ +# https://arxiv.org/pdf/1710.02298.pdf Table 6: No-op starts evaluation regime +# game dqn a3c ddqn prior-ddqn dueling-ddqn distrib-dqn noisy-dqn rainbow +alien 634.0 518.4 1033.4 900.5 1,486.5 1,997.5 533.3 6,022.9 +amidar 178.4 263.9 169.1 218.4 172.7 237.7 148.0 202.8 +assault 3489.3 5474.9 6060.8 7,748.5 3,994.8 5,101.3 5,124.3 14,491.7 +asterix 3170.5 22140.5 16837.0 31,907.5 15,840.0 395,599.5 8,277.3 280,114.0 +asteroids 1458.7 4474.5 1193.2 1,654.0 2,035.4 2,071.7 4,078.1 2,249.4 +atlantis 292491.0 911,091.0 319688.0 593,642.0 445,360.0 289,803.0 303,666.5 814,684.0 +bank_heist 312.7 970.1 886.0 816.8 1,129.3 835.6 955.0 826.0 +battle_zone 23750.0 12950.0 24740.0 29,100.0 31,320.0 32,250.0 26,985.0 52,040.0 +beam_rider 9743.2 22707.9 17417.2 26,172.7 14,591.3 15,002.4 15,241.5 21,768.5 +berzerk 493.4 817.9 1011.1 1,165.6 910.6 1,000.0 670.8 1,793.4 +bowling 56.5 35.1 69.6 65.8 65.7 76.8 79.3 39.4 +boxing 70.3 59.8 73.5 68.6 77.3 62.1 66.3 54.9 +breakout 354.5 681.9 368.9 371.6 411.6 548.7 423.3 379.5 +centipede 3973.9 3755.8 3853.5 3,421.9 4,881.0 7,476.9 4,214.4 7,160.9 +chopper_command 5017.0 7021.0 3495.0 6,604.0 3,784.0 9,600.5 8,778.5 10,916.0 +crazy_climber 98128.0 112646.0 113782.0 131,086.0 124,566.0 154,416.5 98,576.5 143,962.0 +defender 15917.5 56533.0 27510.0 21,093.5 33,996.0 32,246.0 18,037.5 47,671.3 +demon_attack 12550.7 113,308.4 69803.4 73,185.8 56,322.8 109,856.6 25,207.8 109,670.7 +double_dunk -6.0 -0.1 -0.3 2.7 -0.8 -3.7 -1.0 -0.6 +enduro 626.7 -82.5 1216.6 1,884.4 2,077.4 2,133.4 1,021.5 2,061.1 +fishing_derby -1.6 18.8 3.2 9.2 -4.1 -4.9 -3.7 22.6 +freeway 26.9 0.1 28.8 27.9 0.2 28.8 27.1 29.1 +frostbite 496.1 190.5 1448.1 2,930.2 2,332.4 2,813.9 418.8 4,141.1 +gopher 8190.4 10022.8 15253.0 57,783.8 20,051.4 27,778.3 13,131.0 72,595.7 +gravitar 298.0 303.5 200.5 218.0 297.0 422.0 250.5 567.5 +hero 14992.9 32464.1 14892.5 20,506.4 15,207.9 28,554.2 2,454.2 50,496.8 +ice_hockey -1.6 -2.8 -2.5 -1.0 -1.3 -0.1 -2.4 -0.7 +kangaroo 4496.0 94.0 11204.0 10,241.0 10,334.0 9,555.5 7,465.0 10,841.0 +krull 6206.0 5560.0 6796.1 7,406.5 8,051.6 6,757.8 6,833.5 6,715.5 +kung_fu_master 20882.0 28819.0 30207.0 31,244.0 24,288.0 33,890.0 27,921.0 28,999.8 +montezuma_revenge 47.0 67.0 42.0 13.0 22.0 130.0 55.0 154.0 +ms_pacman 1092.3 653.7 1241.3 1,824.6 2,250.6 2,064.1 1,012.1 2,570.2 +name_this_game 6738.8 10476.1 8960.3 11,836.1 11,185.1 11,382.3 7,186.4 11,686.5 +phoenix 7484.8 52894.1 12366.5 27,430.1 20,410.5 31,358.3 15,505.0 103,061.6 +pitfall -113.2 -78.5 -186.7 -14.8 -46.9 -342.8 -154.4 -37.6 +pong 18.0 5.6 19.1 18.9 18.8 18.9 18.0 19.0 +private_eye 207.9 206.9 -575.5 179.0 292.6 5,717.5 5,955.4 1,704.4 +qbert 9271.5 15148.8 11020.8 11,277.0 14,175.8 15,035.9 9,176.6 18,397.6 +road_runner 35215.0 34216.0 43156.0 56,990.0 58,549.0 56,086.0 35,376.5 54,261.0 +robotank 58.7 32.8 59.1 55.4 62.0 49.8 50.9 55.2 +seaquest 4216.7 2355.4 14498.0 39,096.7 37,361.6 3,275.4 2,353.1 19,176.0 +skiing -12142.1 -10911.1 -11490.4 -10,852.8 -11,928.0 -13,247.7 -13,905.9 -11,685.8 +solaris 1295.4 1956.0 810.0 2,238.2 1,768.4 2,530.2 2,608.2 2,860.7 +space_invaders 1293.8 15,730.5 2628.7 9,063.0 5,993.1 6,368.6 1,697.2 12,629.0 +star_gunner 52970.0 138218.0 58365.0 51,959.0 90,804.0 67,054.5 31,864.5 123,853.0 +surround -6.0 -9.7 1.9 -0.9 4.0 4.5 -3.1 7.0 +tennis 11.1 -6.3 -7.8 -2.0 4.4 22.6 -2.1 -2.2 +time_pilot 4786.0 12,679.0 6608.0 7,448.0 6,601.0 7,684.5 5,311.0 11,190.5 +tutankham 45.6 156.3 92.2 33.6 48.0 124.3 123.3 126.9 +venture 136.0 23.0 21.0 244.0 200.0 462.0 10.5 45.0 +video_pinball 154414.1 331628.1 367823.7 374,886.9 110,976.2 455,052.7 241,851.7 506,817.2 +wizard_of_wor 1609.0 17,244.0 6201.0 7,451.0 7,054.0 11,824.5 4,796.5 14,631.5 +yars_revenge 4577.5 7157.5 6270.6 5,965.1 25,976.5 8,267.7 5,487.3 93,007.9 +zaxxon 4412.0 24,622.0 8593.0 9,501.0 10,164.0 15,130.0 7,650.5 19,658.0 diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari_rainbow_table6.txt b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari_rainbow_table6.txt new file mode 100644 index 0000000..e47ee5e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/atari_rainbow_table6.txt @@ -0,0 +1,56 @@ +# https://arxiv.org/pdf/1710.02298.pdf Table 6: No-op starts evaluation regime +# game dqn a3c ddqn prior-ddqn dueling-ddqn distrib-dqn noisy-dqn rainbow +alien 634.0 518.4 1033.4 900.5 1,486.5 1,997.5 533.3 6,022.9 +amidar 178.4 263.9 169.1 218.4 172.7 237.7 148.0 202.8 +assault 3489.3 5474.9 6060.8 7,748.5 3,994.8 5,101.3 5,124.3 14,491.7 +asterix 3170.5 22140.5 16837.0 31,907.5 15,840.0 395,599.5 8,277.3 280,114.0 +asteroids 1458.7 4474.5 1193.2 1,654.0 2,035.4 2,071.7 4,078.1 2,249.4 +atlantis 292491.0 911,091.0 319688.0 593,642.0 445,360.0 289,803.0 303,666.5 814,684.0 +bank_heist 312.7 970.1 886.0 816.8 1,129.3 835.6 955.0 826.0 +battle_zone 23750.0 12950.0 24740.0 29,100.0 31,320.0 32,250.0 26,985.0 52,040.0 +beam_rider 9743.2 22707.9 17417.2 26,172.7 14,591.3 15,002.4 15,241.5 21,768.5 +berzerk 493.4 817.9 1011.1 1,165.6 910.6 1,000.0 670.8 1,793.4 +bowling 56.5 35.1 69.6 65.8 65.7 76.8 79.3 39.4 +boxing 70.3 59.8 73.5 68.6 77.3 62.1 66.3 54.9 +breakout 354.5 681.9 368.9 371.6 411.6 548.7 423.3 379.5 +centipede 3973.9 3755.8 3853.5 3,421.9 4,881.0 7,476.9 4,214.4 7,160.9 +chopper_command 5017.0 7021.0 3495.0 6,604.0 3,784.0 9,600.5 8,778.5 10,916.0 +crazy_climber 98128.0 112646.0 113782.0 131,086.0 124,566.0 154,416.5 98,576.5 143,962.0 +defender 15917.5 56533.0 27510.0 21,093.5 33,996.0 32,246.0 18,037.5 47,671.3 +demon_attack 12550.7 113,308.4 69803.4 73,185.8 56,322.8 109,856.6 25,207.8 109,670.7 +double_dunk -6.0 -0.1 -0.3 2.7 -0.8 -3.7 -1.0 -0.6 +enduro 626.7 -82.5 1216.6 1,884.4 2,077.4 2,133.4 1,021.5 2,061.1 +fishing_derby -1.6 18.8 3.2 9.2 -4.1 -4.9 -3.7 22.6 +freeway 26.9 0.1 28.8 27.9 0.2 28.8 27.1 29.1 +frostbite 496.1 190.5 1448.1 2,930.2 2,332.4 2,813.9 418.8 4,141.1 +gopher 8190.4 10022.8 15253.0 57,783.8 20,051.4 27,778.3 13,131.0 72,595.7 +gravitar 298.0 303.5 200.5 218.0 297.0 422.0 250.5 567.5 +hero 14992.9 32464.1 14892.5 20,506.4 15,207.9 28,554.2 2,454.2 50,496.8 +ice_hockey -1.6 -2.8 -2.5 -1.0 -1.3 -0.1 -2.4 -0.7 +kangaroo 4496.0 94.0 11204.0 10,241.0 10,334.0 9,555.5 7,465.0 10,841.0 +krull 6206.0 5560.0 6796.1 7,406.5 8,051.6 6,757.8 6,833.5 6,715.5 +kung_fu_master 20882.0 28819.0 30207.0 31,244.0 24,288.0 33,890.0 27,921.0 28,999.8 +montezuma_revenge 47.0 67.0 42.0 13.0 22.0 130.0 55.0 154.0 +ms_pacman 1092.3 653.7 1241.3 1,824.6 2,250.6 2,064.1 1,012.1 2,570.2 +name_this_game 6738.8 10476.1 8960.3 11,836.1 11,185.1 11,382.3 7,186.4 11,686.5 +phoenix 7484.8 52894.1 12366.5 27,430.1 20,410.5 31,358.3 15,505.0 103,061.6 +pitfall -113.2 -78.5 -186.7 -14.8 -46.9 -342.8 -154.4 -37.6 +pong 18.0 5.6 19.1 18.9 18.8 18.9 18.0 19.0 +private_eye 207.9 206.9 -575.5 179.0 292.6 5,717.5 5,955.4 1,704.4 +qbert 9271.5 15148.8 11020.8 11,277.0 14,175.8 15,035.9 9,176.6 18,397.6 +road_runner 35215.0 34216.0 43156.0 56,990.0 58,549.0 56,086.0 35,376.5 54,261.0 +robotank 58.7 32.8 59.1 55.4 62.0 49.8 50.9 55.2 +seaquest 4216.7 2355.4 14498.0 39,096.7 37,361.6 3,275.4 2,353.1 19,176.0 +skiing -12142.1 -10911.1 -11490.4 -10,852.8 -11,928.0 -13,247.7 -13,905.9 -11,685.8 +solaris 1295.4 1956.0 810.0 2,238.2 1,768.4 2,530.2 2,608.2 2,860.7 +space_invaders 1293.8 15,730.5 2628.7 9,063.0 5,993.1 6,368.6 1,697.2 12,629.0 +star_gunner 52970.0 138218.0 58365.0 51,959.0 90,804.0 67,054.5 31,864.5 123,853.0 +surround -6.0 -9.7 1.9 -0.9 4.0 4.5 -3.1 7.0 +tennis 11.1 -6.3 -7.8 -2.0 4.4 22.6 -2.1 -2.2 +time_pilot 4786.0 12,679.0 6608.0 7,448.0 6,601.0 7,684.5 5,311.0 11,190.5 +tutankham 45.6 156.3 92.2 33.6 48.0 124.3 123.3 126.9 +venture 136.0 23.0 21.0 244.0 200.0 462.0 10.5 45.0 +video_pinball 154414.1 331628.1 367823.7 374,886.9 110,976.2 455,052.7 241,851.7 506,817.2 +wizard_of_wor 1609.0 17,244.0 6201.0 7,451.0 7,054.0 11,824.5 4,796.5 14,631.5 +yars_revenge 4577.5 7157.5 6270.6 5,965.1 25,976.5 8,267.7 5,487.3 93,007.9 +zaxxon 4412.0 24,622.0 8593.0 9,501.0 10,164.0 15,130.0 7,650.5 19,658.0 diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/chatbot_arena.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/chatbot_arena.py new file mode 100644 index 0000000..56d3697 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/chatbot_arena.py @@ -0,0 +1,193 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Chat bot Arena dataset.""" + +# pylint: disable=unused-import + +import collections +import sys +from absl import app +from absl import flags +import numpy as np +import pandas as pd +import pygraphviz as pgv + +from open_spiel.python.utils import gfile + +from open_spiel.python.algorithms import nash_averaging +from open_spiel.python.voting import approval +from open_spiel.python.voting import base +from open_spiel.python.voting import borda +from open_spiel.python.voting import copeland +from open_spiel.python.voting import kemeny_young +from open_spiel.python.voting import maximal_lotteries +from open_spiel.python.voting import plurality +from open_spiel.python.voting import ranked_pairs +from open_spiel.python.voting import schulze +from open_spiel.python.voting import stv + + +SEED = 23875711 + +# Downloaded from: https://lmsys.org/blog/2023-07-20-dataset/ +DATASET_FILE = "/tmp/chatbot_arena_battles.csv" + + +def parse_battles_dataset(filter_ties=False): + """Parse the data set from the raw CSV.""" + dataset = [] + model_names = {} + with gfile.Open(DATASET_FILE, "r") as f: + lines = f.readlines() + for line in lines: + if line.startswith("#"): + continue + # ,question_id,model_a,model_b,winner,judge,conversation_a,conversation_b,turn,anony,language,tstamp,openai_moderation,toxic_chat_tag + parts = line.split(",") + model_a, model_b, winner = ( + parts[2].strip(), + parts[3].strip(), + parts[4].strip(), + ) + if filter_ties and winner.startswith("tie"): + continue + else: + model_names[model_a] = True + model_names[model_b] = True + if winner == "model_a": + dataset.append((model_a, model_b, -1)) + elif winner == "model_b": + dataset.append((model_a, model_b, 1)) + else: + assert winner.startswith("tie") + dataset.append((model_a, model_b, 0)) + return list(model_names.keys()), dataset + + +def chatbot_arena_vase(model_names, dataset): + """Run VasE over Chatbot Arena data set.""" + + alternatives = model_names[:] + profile = base.PreferenceProfile(alternatives=alternatives) + for datapoint in dataset: + alt_a, alt_b, outcome = datapoint + if outcome == 0: + pass + elif outcome == -1: + profile.add_vote([alt_a, alt_b]) + elif outcome == 1: + profile.add_vote([alt_b, alt_a]) + + margin_matrix = profile.margin_matrix() + strong_cond_winners = profile.condorcet_winner(True, margin_matrix) + weak_cond_winners = profile.condorcet_winner(False, margin_matrix) + print(f"Strong Condorcet winner? {strong_cond_winners}") + print(f"Weak Condorcet winner(s)? {weak_cond_winners}") + + voting_methods = [ + # approval.ApprovalVoting(k=8), + # borda.BordaVoting(), + copeland.CopelandVoting(), + # kemeny_young.KemenyYoungVoting(), + # Use verbose=True to get more information about the levels + maximal_lotteries.MaximalLotteriesVoting(iterative=True), + # maximal_lotteries.MaximalLotteriesVoting(iterative=True, verbose=True), + # plurality.PluralityVoting(), + ranked_pairs.RankedPairsVoting(), + # stv.STVVoting(num_winners=8) + schulze.SchulzeVoting(), + ] + for method in voting_methods: + print("") + print(method.name()) + outcome = method.run_election(profile) + print(outcome.pretty_table_string()) + # print(outcome.pretty_latex_table(header=method.name())) + + +def ranked_pairs_viz(model_names, dataset): + """Produce the ranked pairs visualization.""" + + alternatives = model_names[:] + profile = base.PreferenceProfile(alternatives=alternatives) + num_alternatives = len(alternatives) + alt_dict = profile.alternatives_dict + for datapoint in dataset: + alt_a, alt_b, outcome = datapoint + if outcome == 0: + pass + elif outcome == -1: + profile.add_vote([alt_a, alt_b]) + elif outcome == 1: + profile.add_vote([alt_b, alt_a]) + margin_matrix = profile.margin_matrix() + method = ranked_pairs.RankedPairsVoting() + outcome = method.run_election(profile) + graph_mat = outcome.graph + # Visualize only over the top 8: + keep_alternatives = [ + "gpt-4", + "claude-v1", + "claude-instant-v1", + "guanaco-33b", + "gpt-3.5-turbo", + "wizardlm-13b", + "palm-2", + "vicuna-13b", + ] + keep_alternatives.sort() + for j in range(num_alternatives): + idx = num_alternatives - j - 1 + alt = alternatives[idx] + if alt not in keep_alternatives: + graph_mat = np.delete(graph_mat, (idx), axis=0) + graph_mat = np.delete(graph_mat, (idx), axis=1) + orig_alternatives = model_names[:] + alternatives = keep_alternatives + m = len(alternatives) + graph = pgv.AGraph(directed=True, strict=True) + for alternative in alternatives: + graph.add_node(alternative) + for i in range(m): + for j in range(m): + if graph_mat[i, j] == 1: + graph.add_edge(alternatives[i], alternatives[j]) + idx_i = alt_dict[alternatives[i]] + idx_j = alt_dict[alternatives[j]] + edge = graph.get_edge( + orig_alternatives[idx_i], orig_alternatives[idx_j] + ) + edge.attr["label"] = margin_matrix[idx_i, idx_j] + graph.write("/tmp/chatbot_arena_rps.dot") # write to simple.dot + graph.draw( + "/tmp/chatbot_arena_rps.png", + # args='-Gdpi=100', + prog="dot", + ) # , args="-n2") # draw + print("Wrote to /tmp/chatbot_arena_rps.png") + + +def main(_): + model_names, dataset = parse_battles_dataset() + model_names.sort() + print(f"{len(model_names)} models.") + print(f"{len(dataset)} datapoints.") + chatbot_arena_vase(model_names, dataset) + ranked_pairs_viz(model_names, dataset) + + +if __name__ == "__main__": + np.random.seed(SEED) + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/example.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/example.py new file mode 100644 index 0000000..d4a1f8c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/examples/example.py @@ -0,0 +1,96 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Simple basic example.""" + +# pylint: disable=unused-import + +import sys +from absl import app +from absl import flags +import numpy as np + +from open_spiel.python.voting import base +from open_spiel.python.voting import copeland + + +def main(_): + # Create a preference profile that represents the following votes: + # A > B > C + # A > C > B + # C > A > B + # C > A > B + # B > C > A + # This profile has three alternatives: A, B, and C. The strings here "A", "B", + # "C" represent the alternative's ID and is of type base.AlternativeId. + # (They can be strings or integers.) + alternatives = ["A", "B", "C"] + + # Easiest way to make this profile: + _ = base.PreferenceProfile(alternatives=alternatives, votes=[ + ["A", "B", "C"], ["A", "C", "B"], ["C", "A", "B"], ["C", "A", "B"], + ["B", "C", "A"] + ]) + + # Note that the C > A > B vote is there twice, so another common way to show + # this is: + # 1: A > B > C + # 1: A > C > B + # 2: C > A > B + # 1: B > C > A + # and can be created with the WeightedVote type directly. + profile = base.PreferenceProfile(alternatives=alternatives, votes=[ + base.WeightedVote(1, ["A", "B", "C"]), + base.WeightedVote(1, ["A", "C", "B"]), + base.WeightedVote(2, ["C", "A", "B"]), + base.WeightedVote(1, ["B", "C", "A"]) + ]) + + # Print some information about the profile + print(f"Number of alternatives: {profile.num_alternatives()}") + print(f"Number of votes: {profile.num_votes()}") + print(f"Alternatives: {profile.alternatives}") + print("Profile:") + print(profile) + + # Print a reverse mapping of AlternativeId -> index + # indices will always be numbered 0 to num_alternatives - 1. + # Some methods work directly with the indices. + alt_idx = profile.alternatives_dict + print("Alternative ids -> index map:") + print(alt_idx) + + # Iterating through a profile + print("Iterating through profile:") + for vote in profile.votes: + # Each item is a weighted vote: + print(f" {vote.weight}: {vote.vote}") + + # Margin matrix and Condorcet winner check + margin_matrix = profile.margin_matrix() + cond_winners = profile.condorcet_winner(strong=True, + margin_matrix=margin_matrix) + print("Margin matrix:") + print(margin_matrix) + print(f"Condorcet winners: {cond_winners}") + + # Run Copeland on this profile and print the results + method = copeland.CopelandVoting() + outcome = method.run_election(profile) + print("Copeland outcome:") + print(outcome.pretty_table_string()) + + +if __name__ == "__main__": + app.run(main) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/kemeny_young.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/kemeny_young.py new file mode 100644 index 0000000..add159d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/kemeny_young.py @@ -0,0 +1,75 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Kemeny-Young method. + +Based on https://en.wikipedia.org/wiki/Kemeny%E2%80%93Young_method. +""" + +import itertools +from typing import List, Tuple +import numpy as np +from open_spiel.python.voting import base + + +class KemenyYoungVoting(base.AbstractVotingMethod): + """Implements Kemeny-Young's method.""" + + def __init__(self): + pass + + def name(self) -> str: + return "kemeny_young" + + def _score( + self, + pref_mat: np.ndarray, + perm: Tuple[int, ...], + ) -> np.ndarray: + # The score of alternative a_i in a ranking R is defined to be: + # KemenyScore(a_i) = sum_{a_j s.t. R(a_i) >= R(a_j)} N(a_i, a_j) + # The score of ranking R is then sum_i KemenyScore(a_i). + num_alts = len(perm) + scores = np.zeros(num_alts, dtype=np.int32) + for i in range(num_alts): + for j in range(i+1, num_alts): + scores[i] += pref_mat[perm[i], perm[j]] + return scores + + def _permutation_to_ranking( + self, + alternatives: List[base.AlternativeId], + permutation: Tuple[base.AlternativeId, ...]) -> List[base.AlternativeId]: + assert len(permutation) == len(alternatives) + return [alternatives[permutation[i]] for i in range(len(alternatives))] + + def run_election(self, profile: base.PreferenceProfile) -> base.RankOutcome: + assert self.is_valid_profile(profile) + pref_mat = profile.pref_matrix() + alternatives = profile.alternatives + m = profile.num_alternatives() + best_permutation = None + best_score = -1 + best_score_array = None + for permutation in itertools.permutations(range(m)): + scores = self._score(pref_mat, permutation) + total_score = scores.sum() + if total_score > best_score: + best_score = total_score + best_score_array = scores + best_permutation = permutation + best_ranking = self._permutation_to_ranking(alternatives, best_permutation) + outcome = base.RankOutcome(rankings=best_ranking, + scores=list(best_score_array)) + return outcome + diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/kemeny_young_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/kemeny_young_test.py new file mode 100644 index 0000000..85b3f6e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/kemeny_young_test.py @@ -0,0 +1,60 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.voting.kemeny_young.""" + +from absl.testing import absltest + +from open_spiel.python.voting import base +from open_spiel.python.voting import kemeny_young + + +class KemenyYoungTest(absltest.TestCase): + + def test_ranked_pairs_wikipedia_example(self): + alternatives = ["Memphis", "Nashville", "Chattanooga", "Knoxville"] + votes = [ + base.WeightedVote(42, + ["Memphis", "Nashville", "Chattanooga", "Knoxville"]), + base.WeightedVote(26, + ["Nashville", "Chattanooga", "Knoxville", "Memphis"]), + base.WeightedVote(15, + ["Chattanooga", "Knoxville", "Nashville", "Memphis"]), + base.WeightedVote(17, + ["Knoxville", "Chattanooga", "Nashville", "Memphis"]), + ] + profile = base.PreferenceProfile(votes=votes, alternatives=alternatives) + method = kemeny_young.KemenyYoungVoting() + outcome = method.run_election(profile) + self.assertListEqual(outcome.ranking, + ["Nashville", "Chattanooga", "Knoxville", "Memphis"]) + self.assertListEqual(outcome.scores, [194, 141, 58, 0]) + + def test_meeple_pentathlon(self): + alternatives = ["A", "B", "C"] + votes = [ + base.WeightedVote(1, ["A", "B", "C"]), + base.WeightedVote(1, ["A", "C", "B"]), + base.WeightedVote(2, ["C", "A", "B"]), + base.WeightedVote(1, ["B", "C", "A"]), + ] + profile = base.PreferenceProfile(votes=votes, alternatives=alternatives) + method = kemeny_young.KemenyYoungVoting() + outcome = method.run_election(profile) + self.assertListEqual(outcome.ranking, ["C", "A", "B"]) + self.assertListEqual(outcome.scores, [6, 4, 0]) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/maximal_lotteries.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/maximal_lotteries.py new file mode 100644 index 0000000..230678a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/maximal_lotteries.py @@ -0,0 +1,146 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Fishburn's Maximal lotteries method. + +Based on https://en.wikipedia.org/wiki/Maximal_lotteries. +""" + +from typing import List +import numpy as np +from open_spiel.python.algorithms import lp_solver +import pyspiel +from open_spiel.python.voting import base + + +class MaximalLotteriesVoting(base.AbstractVotingMethod): + """Implements Copeland's method.""" + + def __init__(self, + iterative: bool = True, + verbose: bool = False, + zero_tolerance: float = 1e-6): + self._iterative = iterative + self._verbose = verbose + self._zero_tolerance = zero_tolerance + + def name(self) -> str: + return f"maximal_lotteries(iterative={self._iterative})" + + def _create_matrix_game(self, matrix: np.ndarray): + return pyspiel.create_tensor_game([matrix, -matrix]).as_matrix_game() + + def _solve_game( + self, margin_matrix: np.ndarray + ) -> np.ndarray: + matrix_game = self._create_matrix_game(margin_matrix) + p0_sol, _, _, _ = lp_solver.solve_zero_sum_matrix_game(matrix_game) + return p0_sol + + def run_election(self, profile: base.PreferenceProfile) -> base.RankOutcome: + margin_matrix = profile.margin_matrix() + alternatives = profile.alternatives + m = profile.num_alternatives() + if self._verbose: + print(f"Margin matrix: \n{margin_matrix}") + print(f"Alternatives: {alternatives}") + p0_sol = self._solve_game(margin_matrix) + + # For now define scores as the probabilities. + scores = {} + if not self._iterative: + # and negligible noise to break ties + noise = 1e-10 * np.random.uniform(size=m) + for i in range(m): + scores[alternatives[i]] = p0_sol[i] + noise[i] + sorted_scores = sorted(scores.items(), key=lambda item: item[1]) + sorted_scores.reverse() + outcome = base.RankOutcome() + outcome.unpack_from(sorted_scores) + return outcome + else: + # Continue to iteratively solve all the remaining subgames. + return self._iterate(alternatives, margin_matrix, p0_sol) + + def _iterate( + self, + alternatives: List[base.AlternativeId], + margin_matrix: np.ndarray, + p0_sol: np.ndarray, + ): + remaining_alternatives = alternatives[:] + leveled_ranking = [] + leveled_scores = [] + while remaining_alternatives: + # Pull out the nonzero entries and make them winners of this level. + m = len(remaining_alternatives) + if self._verbose: + print(f"\nRemaining alternatives: {remaining_alternatives}") + cur_level = len(leveled_ranking) + print(f"IML Level {cur_level}") + print(f"Remaining alternatives: {remaining_alternatives}") + print(f"Margin matrix: \n{margin_matrix}\n") + if m == 1: + leveled_ranking.append(remaining_alternatives[:]) + leveled_scores.append([1]) + break + noise = 1e-10 * np.random.uniform(size=m) + for i in range(m): + p0_sol[i] += noise[i] + values = -1 * np.ones(m, dtype=np.float64) + level_winners_idxs = [] + for i in range(m): + if p0_sol[i] > self._zero_tolerance: + # print(f"p0_sol[{i}] = {p0_sol[i]}") + level_winners_idxs.append(i) + values[i] = p0_sol[i] + num_level_winners = len(level_winners_idxs) + assert num_level_winners >= 1 + indices = np.argsort(-values) + level_winners_ranked = [] + level_winners_scores = [] + for j in range(num_level_winners): + idx = int(indices[j]) + level_winners_ranked.append(remaining_alternatives[idx]) + level_winners_scores.append(p0_sol[idx]) + leveled_ranking.append(level_winners_ranked) + leveled_scores.append(level_winners_scores) + if self._verbose: + print(f"Level winners: {level_winners_ranked}") + print(f"Level scores: {level_winners_scores}") + # Now, take them out of the margin matrix and remaining alternatives + # Delete in reverse order. + for j in range(num_level_winners): + idx = level_winners_idxs[num_level_winners - 1 - j] + del remaining_alternatives[idx] + margin_matrix = np.delete(margin_matrix, (idx), axis=0) + margin_matrix = np.delete(margin_matrix, (idx), axis=1) + if len(remaining_alternatives) > 1: + p0_sol = self._solve_game(margin_matrix) + # Now bump up the scores by level, and put them in the outcome. + scores = {} + num_levels = len(leveled_ranking) + if self._verbose: + print(f"Num levels: {num_levels}") + level_base_points = num_levels - 1 + for level in range(num_levels): + for j in range(len(leveled_ranking[level])): + alternative = leveled_ranking[level][j] + score = level_base_points + leveled_scores[level][j] + scores[alternative] = score + level_base_points -= 1 + sorted_scores = sorted(scores.items(), key=lambda item: item[1]) + sorted_scores.reverse() + outcome = base.RankOutcome() + outcome.unpack_from(sorted_scores) + return outcome diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/maximal_lotteries_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/maximal_lotteries_test.py new file mode 100644 index 0000000..11b4f01 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/maximal_lotteries_test.py @@ -0,0 +1,95 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import absltest +from absl.testing import parameterized + +import numpy as np +from open_spiel.python.voting import base +from open_spiel.python.voting import maximal_lotteries + + +class MaximalLotteriesTest(parameterized.TestCase): + @parameterized.named_parameters(("iterative", True), ("non-iterative", False)) + def test_stv_records_number(self, iterate): + method = maximal_lotteries.MaximalLotteriesVoting(iterative=iterate) + self.assertEqual( + method.name(), f"maximal_lotteries(iterative={iterate})" + ) + + def test_maximal_lotteries_basic_run(self): + # "a" is a dominant strategy of the margin game, so it should be chosen with + # probablity 1. + votes = [["a", "b", "c"], ["a", "c", "b"], ["b", "a", "c"]] + profile = base.PreferenceProfile(votes=votes) + method = maximal_lotteries.MaximalLotteriesVoting(iterative=False) + outcome = method.run_election(profile) + with self.subTest("Top-rank the condorcet winner"): + self.assertEqual(outcome.ranking[0], "a") + with self.subTest("Check extreme scores"): + self.assertAlmostEqual(outcome.scores[0], 1.0) + self.assertAlmostEqual(outcome.scores[1], 0.0) + self.assertAlmostEqual(outcome.scores[2], 0.0) + + def test_maximal_lotteries_basic_iterative(self): + votes = [["a", "b", "c"], ["a", "c", "b"], ["b", "a", "c"]] + profile = base.PreferenceProfile(votes=votes) + # "a" is a dominant strategy, so in the iterative version it should be + # chosen first, leading to a new matrix with the first row and column + # deleted. This then means that "b" is dominant in the subgame. + expected_margin_matrix = np.array([ + [0, 1, 3], + [-1, 0, 1], + [-3, -1, 0]]) + with self.subTest("Check margin matrix"): + self.assertTrue(np.array_equal(profile.margin_matrix(), + expected_margin_matrix)) + method = maximal_lotteries.MaximalLotteriesVoting(iterative=True) + outcome = method.run_election(profile) + with self.subTest("Check ranking"): + self.assertListEqual(outcome.ranking, ["a", "b", "c"]) + with self.subTest("Check scores"): + self.assertAlmostEqual(outcome.scores[0], 3.0) + self.assertAlmostEqual(outcome.scores[1], 2.0) + self.assertAlmostEqual(outcome.scores[2], 1.0) + + def test_maximal_lotteries_cycle(self): + # Cyclical profile leads to a Rock, Paper, Scissors margin game. + votes = [["a", "b", "c"], ["b", "c", "a"], ["c", "a", "b"]] + profile = base.PreferenceProfile(votes=votes) + method = maximal_lotteries.MaximalLotteriesVoting() + outcome = method.run_election(profile) + with self.subTest("Check prob 1/3"): + self.assertAlmostEqual(outcome.scores[0], 1.0 / 3.0) + with self.subTest("Check uniform"): + self.assertAlmostEqual(outcome.scores[0], outcome.scores[1]) + self.assertAlmostEqual(outcome.scores[1], outcome.scores[2]) + + def test_maximal_lotteries_iterative_cycle(self): + # Cyclical profile leads to a Rock, Paper, Scissors margin game. + # Iterative maximal lotteries should yield the same result as the + # non-iterative version. + votes = [["a", "b", "c"], ["b", "c", "a"], ["c", "a", "b"]] + profile = base.PreferenceProfile(votes=votes) + method = maximal_lotteries.MaximalLotteriesVoting(iterative=True) + outcome = method.run_election(profile) + with self.subTest("Check prob 1/3"): + self.assertAlmostEqual(outcome.scores[0], 1.0 / 3.0) + with self.subTest("Check uniform"): + self.assertAlmostEqual(outcome.scores[0], outcome.scores[1]) + self.assertAlmostEqual(outcome.scores[1], outcome.scores[2]) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/plurality.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/plurality.py new file mode 100644 index 0000000..6db074a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/plurality.py @@ -0,0 +1,42 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Plurality voting method. + +Based on https://en.wikipedia.org/wiki/Plurality_voting. +""" + +from open_spiel.python.voting import base + + +class PluralityVoting(base.AbstractVotingMethod): + """Implements the plurality (first past the post) voting rule.""" + + def __init__(self): + pass + + def name(self) -> str: + return "plurality" + + def run_election(self, profile: base.PreferenceProfile) -> base.RankOutcome: + assert self.is_valid_profile(profile) + tally = {} + for alternative in profile.alternatives: + tally[alternative] = 0 + for vote in profile.votes: + tally[vote.vote[0]] += vote.weight + sorted_tally = sorted(tally.items(), key=lambda item: item[1], reverse=True) + outcome = base.RankOutcome() + outcome.unpack_from(sorted_tally) + return outcome diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/plurality_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/plurality_test.py new file mode 100644 index 0000000..f382b6c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/plurality_test.py @@ -0,0 +1,70 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.voting.plurality.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python.voting import base +from open_spiel.python.voting import plurality + +_SIMPLE_VOTE = [["a", "b", "c"], ["a", "c", "b"], ["b", "a", "c"]] +_SIMPLE_WINNER = (_SIMPLE_VOTE, "a") +_WEIGHTED_WINNER = (_SIMPLE_VOTE, [1, 2, 3], [3, 3, 0], ["a", "b"]) + + +class PluralityVotingTest(parameterized.TestCase): + def setUp(self): + super().setUp() + self.method = plurality.PluralityVoting() + + @parameterized.parameters(_SIMPLE_WINNER) + def test_plurality_with_votes_in_profile_constructor(self, votes, winner): + profile = base.PreferenceProfile(votes=votes) + outcome = self.method.run_election(profile) + self.assertEqual(outcome.ranking[0], winner) + + @parameterized.parameters(_SIMPLE_WINNER) + def test_plurality_with_alternatives_specified(self, votes, winner): + profile = base.PreferenceProfile(alternatives=["c", "b", "a"]) + for vote in votes: + profile.add_vote(vote) + outcome = self.method.run_election(profile) + self.assertEqual(outcome.ranking[0], winner) + + @parameterized.parameters(_SIMPLE_WINNER) + def test_plurality_with_no_default_votes(self, votes, winner): + profile = base.PreferenceProfile() + for vote in votes: + profile.add_vote(vote) + outcome = self.method.run_election(profile) + self.assertEqual(outcome.ranking[0], winner) + + @parameterized.parameters(_WEIGHTED_WINNER) + def test_plurality_with_weighted_votes(self, votes, weights, + correct_scores, winner): + profile = base.PreferenceProfile() + for i, vote in enumerate(votes): + profile.add_vote(vote, weight=weights[i]) + outcome = self.method.run_election(profile) + + with self.subTest("Weighted score correctly calculated."): + self.assertListEqual(correct_scores, outcome.scores) + with self.subTest("Winners take the top spots in the ranking."): + self.assertCountEqual(outcome.ranking[:len(winner)], winner) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/preflib_util.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/preflib_util.py new file mode 100644 index 0000000..f1fe118 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/preflib_util.py @@ -0,0 +1,79 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Helpers to work with PrefLib data.""" + +import pyspiel +from open_spiel.python.voting import base + + +def parse_preflib_data(string_data: str) -> base.PreferenceProfile: + """Parses the contents of a PrefLib data file. + + Currently only supports SOC and SOI. See https://www.preflib.org/format. + + Args: + string_data: the name of the file to parse. + + Returns: + A preference profile. + """ + lines = string_data.split("\n") + alternatives = [] + num_alternatives = None + num_votes = None + profile = base.PreferenceProfile() + for raw_line in lines: + line = raw_line.strip() + if not line: continue + if line.startswith("#"): + parts = line.split(" ") + if line.startswith("# DATA TYPE: "): + assert(parts[3] == "soc" or parts[3] == "soi") + elif line.startswith("# NUMBER ALTERNATIVES:"): + num_alternatives = int(parts[3]) + alternatives = [None] * num_alternatives + elif line.startswith("# NUMBER VOTERS:"): + num_votes = int(parts[3]) + elif line.startswith("# ALTERNATIVE NAME "): + num = int(parts[3].split(":")[0]) + index_of_colon = line.index(":") + assert 1 <= num <= num_alternatives + alternatives[num-1] = line[index_of_colon+2:] + else: + if profile.num_alternatives() == 0: + profile = base.PreferenceProfile(alternatives=alternatives) + index_of_colon = line.index(":") + weight = int(line[:index_of_colon]) + vote_parts = line[index_of_colon+2:].split(",") + vote = [alternatives[int(part) - 1] for part in vote_parts] + if weight > 0: + profile.add_vote(vote, weight) + assert num_votes == profile.num_votes() + return profile + + +def parse_preflib_datafile(filename: str) -> base.PreferenceProfile: + """Parses a Preflib data file. + + Currently only supports SOC and SOI. See https://www.preflib.org/format. + + Args: + filename: the name of the file to parse. + + Returns: + A preference profile. + """ + contents = pyspiel.read_contents_from_file(filename, "r") + return parse_preflib_data(contents) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/preflib_util_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/preflib_util_test.py new file mode 100644 index 0000000..bc967ad --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/preflib_util_test.py @@ -0,0 +1,60 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.voting.util.""" + +from absl.testing import absltest +from open_spiel.python.voting import preflib_util + +TEST_DATA = """ +# FILE NAME: 00004-00000050.soc +# TITLE: Netflix Prize Data +# DESCRIPTION: +# DATA TYPE: soc +# MODIFICATION TYPE: induced +# RELATES TO: +# RELATED FILES: +# PUBLICATION DATE: 2013-08-17 +# MODIFICATION DATE: 2022-09-16 +# NUMBER ALTERNATIVES: 3 +# NUMBER VOTERS: 391 +# NUMBER UNIQUE ORDERS: 6 +# ALTERNATIVE NAME 1: The Amityville Horror +# ALTERNATIVE NAME 2: Mars Attacks! +# ALTERNATIVE NAME 3: Lean on Me +186: 3,1,2 +71: 1,3,2 +58: 3,2,1 +45: 2,3,1 +18: 1,2,3 +13: 2,1,3 +""" + + +class UtilTest(absltest.TestCase): + def test_load_preflib(self): + print(TEST_DATA) + profile = preflib_util.parse_preflib_data(TEST_DATA) + print(profile) + self.assertEqual(profile.num_alternatives(), 3) + self.assertEqual(profile.num_votes(), 391) + self.assertListEqual(profile.alternatives, [ + "The Amityville Horror", "Mars Attacks!", "Lean on Me" + ]) + print(profile.alternatives) + print(profile.margin_matrix()) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/ranked_pairs.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/ranked_pairs.py new file mode 100644 index 0000000..2c74a54 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/ranked_pairs.py @@ -0,0 +1,221 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Ranked Pairs A.K.A. the Tideman method. + +Based on https://en.wikipedia.org/wiki/Ranked_pairs. +""" + +from typing import List, Tuple +import numpy as np +from open_spiel.python.voting import base + +# TODO(author5): either one of the following: (i) change graph representation to +# adjacency lists for more efficient cycle checking, (ii) use a graph library +# such as networkx to represent the graph and support graph functions. + + +class RankedPairsRankOutcome(base.RankOutcome): + """A custom RankOutcome class for Ranked Pairs. + + Provides an extra method to get the graph. + """ + + def __init__( + self, + rankings: List[base.AlternativeId], + scores: List[float], + graph: np.ndarray, + ): + super().__init__(rankings, scores) + self._graph = graph + + @property + def graph(self) -> np.ndarray: + return self._graph + + +class RankedPairsVoting(base.AbstractVotingMethod): + """Implements Ranked Pairs / Tideman's method.""" + + def __init__(self): + pass + + def name(self) -> str: + return "ranked_pairs" + + def _would_create_cycle( + self, + alternatives: List[base.AlternativeId], + graph: np.ndarray, + from_idx: int, + to_idx: int, + ) -> bool: + """Checks if adding a specific directed edge would result in a cycle. + + Args: + alternatives: list of alternatives. + graph: 2D adjacency matrix representing a directed acyclic graph. Row is + the from node index, column the to node index. + from_idx: the edge to add (from index). + to_idx: the edge to add (to index). + + Returns: + True if adding the specified edge would result in a cycle in the graph. + """ + # Perform a breadth-first flood fill using a status table. + # Values in the status table represent: + # 0 means it does not exist in the flood yet + # 1 means it needs to be expanded + # -1 means it has been expanded (now closed, do not revisit) + m = len(alternatives) + status_table = np.zeros(m) + status_table[to_idx] = 1 + num_expanded = 1 + while num_expanded > 0: + num_expanded = 0 + for i in np.where(status_table == 1)[0]: + num_expanded += 1 + for j in np.where(graph[i][:] == 1)[0]: + if status_table[j] == 0: + if j == from_idx: + return True + status_table[j] = 1 + status_table[i] = -1 + return False + + def _is_source(self, graph: np.ndarray, idx: int): + """Returns true if this node is a source, false otherwise.""" + num_incoming = np.sum(graph[:, idx]) + num_outgoing = np.sum(graph[idx]) + return num_outgoing > 0 and num_incoming == 0 + + def _remove_node(self, graph: np.ndarray, idx: int): + """Removes a node from the graph.""" + graph[idx, :] = 0 + graph[:, idx] = 0 + + def _get_score( + self, graph: np.ndarray, margin_matrix: np.ndarray, node_idx: int + ) -> int: + """Computes the score of an alternative. + + The score is defined as the sum of the margins between the subgraph + containing all reachable nodes from this node. + + Args: + graph: 2D adjacency matrix representing a directed acyclic graph. Row is + the from node index, column the to node index. + margin_matrix: the margin matrix from the profile + node_idx: the node index in question. + + Returns: + the score of the alternative represented by this node index. + """ + # Flood fill to compute score from a source + score = 0 + open_list = {node_idx: True} + closed_list = {} + while open_list: + i = list(open_list.keys())[0] + open_list.pop(i) + outgoing_edges = np.where(graph[i][:] == 1)[0] + for j in outgoing_edges: + score += margin_matrix[i, j] + if j not in open_list and j not in closed_list: + open_list[j] = True + closed_list[i] = True + return score + + def _get_ranked_pairs( + self, alternatives: List[base.AlternativeId], margin_matrix: np.ndarray + ) -> List[Tuple[Tuple[base.AlternativeId, base.AlternativeId], int]]: + """Returns the positively-valued ranked pairs coupled with their values. + + Arguments: + alternatives: the list of alternatives ids. + margin_matrix: the margin matrix we use to get the values for each ranked + pair. + + Returns: + A list of tuples of the form ((x, y), value) indicating x beating y by + the specified value. + """ + ranked_pairs = {} + rows, cols = np.where(margin_matrix > 0) + for i, j in zip(rows, cols): + key_tup = (alternatives[i], alternatives[j]) + ranked_pairs[key_tup] = margin_matrix[i, j] + return sorted(ranked_pairs.items(), key=lambda item: item[1], reverse=True) + + def run_election( + self, profile: base.PreferenceProfile + ) -> RankedPairsRankOutcome: + assert self.is_valid_profile(profile) + alternatives = profile.alternatives + m = len(alternatives) + alt_idx = profile.alternatives_dict + margin_matrix = profile.margin_matrix() + + # First, get the ranked pairs annotated with their values (delta(a,b)). + sorted_pairs = self._get_ranked_pairs(alternatives, margin_matrix) + + # Now, create the graph: add edges that do not create cycles. + graph = np.zeros(shape=(m, m), dtype=np.int32) + if sorted_pairs: + # Create the top-ranked pair. This needs to be in a conditional block, + # because some profiles can legitimately lead to a graph with no edges (no + # positively-valued ranked pairs) + first_pair = sorted_pairs[0][0] + p0_idx = alt_idx[first_pair[0]] + p1_idx = alt_idx[first_pair[1]] + graph[p0_idx, p1_idx] = 1 + for j in range(1, len(sorted_pairs)): + pair = sorted_pairs[j][0] + p0_idx = alt_idx[pair[0]] + p1_idx = alt_idx[pair[1]] + if not self._would_create_cycle(alternatives, graph, p0_idx, p1_idx): + graph[p0_idx, p1_idx] = 1 + full_graph = graph.copy() # Make a copy to return later. + + # Now, remove sources nodes in sequence to get the ranking. + ranking = [] + scores = [] + alt_idx_remaining = [] + for i in range(m): + alt_idx_remaining.append(i) + while len(ranking) < m: + has_source = False + for j in range(m): + if self._is_source(graph, j): + ranking.append(alternatives[j]) + scores.append(self._get_score(graph, margin_matrix, j)) + self._remove_node(graph, j) + alt_idx_remaining.remove(j) + has_source = True + break + if not has_source: + # At the end, it can happen that there are a number of disconnected + # nodes (no incoming nor outgoing edges). Take the first one from the + # graph. + j = alt_idx_remaining[0] + ranking.append(alternatives[j]) + scores.append(0) + self._remove_node(graph, j) + alt_idx_remaining.remove(j) + + # Finally, return the ranking and scores. + outcome = RankedPairsRankOutcome( + rankings=ranking, scores=scores, graph=full_graph + ) + return outcome diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/ranked_pairs_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/ranked_pairs_test.py new file mode 100644 index 0000000..bdbc72d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/ranked_pairs_test.py @@ -0,0 +1,116 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.voting.ranked_pairs.""" + +from absl.testing import absltest +import numpy as np +from open_spiel.python.voting import base +from open_spiel.python.voting import ranked_pairs + + +class RankedPairsTest(absltest.TestCase): + + def test_ranked_pairs_wikipedia_example1(self): + alternatives = ["w", "x", "y", "z"] + votes = [ + base.WeightedVote(7, ["w", "x", "z", "y"]), + base.WeightedVote(2, ["w", "y", "x", "z"]), + base.WeightedVote(4, ["x", "y", "z", "w"]), + base.WeightedVote(5, ["x", "z", "w", "y"]), + base.WeightedVote(1, ["y", "w", "x", "z"]), + base.WeightedVote(8, ["y", "z", "w", "x"]), + ] + profile = base.PreferenceProfile(votes=votes, alternatives=alternatives) + method = ranked_pairs.RankedPairsVoting() + outcome = method.run_election(profile) + with self.subTest("Ranking and scores"): + self.assertListEqual(outcome.ranking, ["w", "x", "y", "z"]) + self.assertListEqual(outcome.scores, [29, 19, 3, 0]) + with self.subTest("Check the graph"): + expected_graph = np.array( + [[0, 1, 1, 0], [0, 0, 1, 1], [0, 0, 0, 1], [0, 0, 0, 0]] + ) + self.assertTrue(np.array_equal(outcome.graph, expected_graph)) + + def test_ranked_pairs_wikipedia_example2(self): + alternatives = ["Memphis", "Nashville", "Chattanooga", "Knoxville"] + votes = [ + base.WeightedVote( + 42, ["Memphis", "Nashville", "Chattanooga", "Knoxville"] + ), + base.WeightedVote( + 26, ["Nashville", "Chattanooga", "Knoxville", "Memphis"] + ), + base.WeightedVote( + 15, ["Chattanooga", "Knoxville", "Nashville", "Memphis"] + ), + base.WeightedVote( + 17, ["Knoxville", "Chattanooga", "Nashville", "Memphis"] + ), + ] + profile = base.PreferenceProfile(votes=votes, alternatives=alternatives) + method = ranked_pairs.RankedPairsVoting() + outcome = method.run_election(profile) + with self.subTest("Ranking and scores"): + self.assertListEqual( + outcome.ranking, ["Nashville", "Chattanooga", "Knoxville", "Memphis"] + ) + self.assertListEqual(outcome.scores, [186, 98, 16, 0]) + with self.subTest("Check the graph"): + expected_graph = np.array( + [[0, 0, 0, 0], [1, 0, 1, 1], [1, 0, 0, 1], [1, 0, 0, 0]] + ) + self.assertTrue(np.array_equal(outcome.graph, expected_graph)) + + def test_meeple_pentathlon(self): + alternatives = ["A", "B", "C"] + votes = [ + base.WeightedVote(1, ["A", "B", "C"]), + base.WeightedVote(1, ["A", "C", "B"]), + base.WeightedVote(2, ["C", "A", "B"]), + base.WeightedVote(1, ["B", "C", "A"]), + ] + profile = base.PreferenceProfile(votes=votes, alternatives=alternatives) + method = ranked_pairs.RankedPairsVoting() + outcome = method.run_election(profile) + with self.subTest("Ranking and scores"): + self.assertListEqual(outcome.ranking, ["C", "A", "B"]) + self.assertListEqual(outcome.scores, [5, 3, 0]) + with self.subTest("Check the graph"): + # A -> B, C -> A, C -> B + expected_graph = np.array([[0, 1, 0], [0, 0, 0], [1, 1, 0]]) + self.assertTrue(np.array_equal(outcome.graph, expected_graph)) + + def test_ranked_pairs_simple_cycle(self): + alternatives = ["A", "B"] + votes = [ + base.WeightedVote(1, ["A", "B"]), + base.WeightedVote(1, ["B", "A"]), + ] + profile = base.PreferenceProfile(votes=votes, alternatives=alternatives) + method = ranked_pairs.RankedPairsVoting() + outcome = method.run_election(profile) + with self.subTest("Check the graph is empty"): + expected_graph = np.array( + [[0, 0], [0, 0]] + ) + self.assertTrue(np.array_equal(outcome.graph, expected_graph)) + with self.subTest("Rankings and scores"): + self.assertTrue(outcome.ranking == ["A", "B"] or + outcome.ranking == ["B", "A"]) + self.assertListEqual(outcome.scores, [0, 0]) + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/schulze.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/schulze.py new file mode 100644 index 0000000..d3f1c96 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/schulze.py @@ -0,0 +1,78 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Schulze method. + +Based on https://en.wikipedia.org/wiki/Schulze_method. +""" + +import functools +import numpy as np +from open_spiel.python.voting import base + + +class SchulzeVoting(base.AbstractVotingMethod): + """Implements Schulze's method.""" + + def __init__(self): + pass + + def name(self) -> str: + return "schulze" + + def run_election(self, profile: base.PreferenceProfile) -> base.RankOutcome: + assert self.is_valid_profile(profile) + alternatives = profile.alternatives + num_alternatives = profile.num_alternatives() + pref_mat = profile.pref_matrix() + strongest_paths = np.zeros(shape=(num_alternatives, num_alternatives), + dtype=np.float32) + # calculating the direct paths + for i in range(num_alternatives): + for j in range(num_alternatives): + if i != j: + if pref_mat[i, j] > pref_mat[j, i]: + strongest_paths[i, j] = pref_mat[i, j] + else: + strongest_paths[i, j] = 0 + # checking if any indirect paths are better + for i in range(num_alternatives): + for j in range(num_alternatives): + if i != j and strongest_paths[j, i] > 0: + for k in range(num_alternatives): + if i != k and j != k: + # if the path from j to k through i is better, replace + strongest_paths[j, k] = max(strongest_paths[j, k], + min(strongest_paths[j, i], + strongest_paths[i, k])) + + def compare(x, y): + return strongest_paths[x, y] - strongest_paths[y, x] + ranking_idx = np.arange(num_alternatives) + sorted_ranking_idx = sorted(ranking_idx, key=functools.cmp_to_key(compare), + reverse=True) + # Define the scores as the sum of preferences for everything it beats in + # the order. + cumul_score = 0 + # start at the end and work backwards + ranking_alts = [alternatives[sorted_ranking_idx[-1]]] + scores = [0] + i = num_alternatives - 2 + while i >= 0: + alt_idx_i = sorted_ranking_idx[i] + alt_idx_j = sorted_ranking_idx[i+1] + ranking_alts.insert(0, alternatives[alt_idx_i]) + cumul_score += pref_mat[alt_idx_i, alt_idx_j] + scores.insert(0, cumul_score) + i -= 1 + return base.RankOutcome(rankings=ranking_alts, scores=scores) diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/schulze_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/schulze_test.py new file mode 100644 index 0000000..7bc92a5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/schulze_test.py @@ -0,0 +1,62 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.voting.schulze.""" + +from absl.testing import absltest + +from open_spiel.python.voting import base +from open_spiel.python.voting import schulze + + +class SchulzeTest(absltest.TestCase): + def test_shulze_construction(self): + method = schulze.SchulzeVoting() + self.assertEqual(method.name(), "schulze") + + def test_shulze_wikipedia_example(self): + votes = [ + base.WeightedVote(5, ["A", "C", "B", "E", "D"]), + base.WeightedVote(5, ["A", "D", "E", "C", "B"]), + base.WeightedVote(8, ["B", "E", "D", "A", "C"]), + base.WeightedVote(3, ["C", "A", "B", "E", "D"]), + base.WeightedVote(7, ["C", "A", "E", "B", "D"]), + base.WeightedVote(2, ["C", "B", "A", "D", "E"]), + base.WeightedVote(7, ["D", "C", "E", "B", "A"]), + base.WeightedVote(8, ["E", "B", "A", "D", "C"]) + ] + profile = base.PreferenceProfile(votes=votes, + alternatives=["A", "B", "C", "D", "E"]) + method = schulze.SchulzeVoting() + outcome = method.run_election(profile) + self.assertListEqual(outcome.ranking, ["E", "A", "C", "B", "D"]) + self.assertListEqual(outcome.scores, [111, 88, 62, 33, 0]) + + def test_meeple_pentathlon(self): + alternatives = ["A", "B", "C"] + votes = [ + base.WeightedVote(1, ["A", "B", "C"]), + base.WeightedVote(1, ["A", "C", "B"]), + base.WeightedVote(2, ["C", "A", "B"]), + base.WeightedVote(1, ["B", "C", "A"]) + ] + profile = base.PreferenceProfile(votes=votes, alternatives=alternatives) + method = schulze.SchulzeVoting() + outcome = method.run_election(profile) + self.assertListEqual(outcome.ranking, ["C", "A", "B"]) + self.assertListEqual(outcome.scores, [7, 4, 0]) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/soft_condorcet_optimization.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/soft_condorcet_optimization.py new file mode 100644 index 0000000..3ab4da3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/soft_condorcet_optimization.py @@ -0,0 +1,328 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Find ratings by minimizing a differentiable Kendall-tau distance. + +This is an idea for an optimization algorithm motivated by work on +voting-as-evaluation (VasE); see go/gdm-vase. Please contact lanctot@ if you +have any comments or questions. + +This method was first inspired by Condorcet's vision that there is an underlying +true ranking and that voting rules are simply noisy estimates of the ground +truth rank. As shown in Section 8.3 of the Handbook of Computational Social +Choice (https://www.cse.unsw.edu.au/~haziz/comsoc.pdf) the maximum likelihood +estimate leads to a ranking that minimized the Kendall-tau distance to the +votes, which is precisely what the Kemeny-Young voting method does. However, +its complexity is O(m!) where m is the number of alternatives. Also, it is not +clear how the social choice methods handle sparsity in the ranking data. + +This method, Soft Condorcet Optimization (SCO), assigns a numerical rating to +each alternative: r_i, and defines the loss to be: + + sum_{v in Votes} sum_{alternatives a, b in v s.t. a > b} D(r_a, r_b) + +If D is defined to be: + 0 if r_a - r_b > 0 (r_a > r_b -> correct ordering) + or 1 otherwise (r_b >= r_a -> incorrect ordering) + +then the loss above is the sum of Kendall-tau distances and the minimum +corresponds to the solution Kemeny-Young would find. But, it's not +differentiable. In SCO, we replace D by a sigmoid (smooth step function). + + D(r_a, r_b) = sigmoid((r_b - r_a) / tau) + = sigmoid(Delta_{ab}) + +where sigmoid(x) = 1.0 / (1.0 + exp(-x)). The partial derivatives of D(r_a, r_b) + - w.r.t r_a: is sigmoid(Delta_{ab}) (1 - sigmoid(Delta_{ab})) (-1/tau) + - w.r.t r_b: is sigmoid(Delta_{ab}) (1 - sigmoid(Delta_{ab})) (1/tau). + +which makes the losses easy to compute for any batch of votes. + +We call this loss the "sigmoid loss", and it is implemented in the +SoftCondorcetOptimizer class. There is also the Fenchel-Young loss, as described +in Section 3.3 of the paper, which uses a similar gradient descent form but +optimizes a different loss based on perturbed optimizers in machine learning. +The optimizer using the Fenchel-Young loss is implemented in the +FenchelYoungOptimizer class. + +Note: this python implementation was the one used for the results in the +original paper. For a faster version, see the C++ implementation in +evaluation/soft_condorcet_optimization.h which is exposed via python bindings +in pyspiel (for an example use from Python, see voting/examples/atari.py). +""" + +import abc +import collections +from absl import logging +import numpy as np +from open_spiel.python.voting import base + + +class Optimizer(abc.ABC): + """Optimizer without a gradient.""" + + def __init__( + self, + profile: base.PreferenceProfile, + batch_size: int, + rating_lower_bound: float, + rating_upper_bound: float, + compute_norm_freq: int, + initial_param_noise: float, + verbose: bool = False, + ): + self._verbose = verbose + self._profile = profile + # Ungroup the profile (make all the votes have weight 1) to make it easier + # to sample from when assembling batches. + self._profile.ungroup() + self._num_alternatives = self._profile.num_alternatives() + if rating_upper_bound <= rating_lower_bound: + raise ValueError( + f"Upper bound ({rating_upper_bound}) must be higher than lower" + f" bound ({rating_lower_bound})." + ) + + self._rating_ub = rating_upper_bound + self._rating_lb = rating_lower_bound + self._batch_size = batch_size + self._compute_norm_freq = compute_norm_freq + midpoint_rating = ( + self._rating_ub - self._rating_lb + ) / 2.0 + self._rating_lb + self._ratings = np.ones(self._profile.num_alternatives(), dtype=np.float32) + self._ratings.fill(midpoint_rating) + self._initial_noise = np.zeros(self._num_alternatives, dtype=np.float32) + if initial_param_noise > 0.0: + self._initial_noise = ( + np.random.rand(self._num_alternatives) * initial_param_noise + ) + self._ratings = self._ratings + self._initial_noise + self._avg_l2_grad_norm = 0 + self._avg_l1_sum_grad_norm = 0 + self._total_iterations = 0 + + @property + def ratings(self) -> np.ndarray: + return self._ratings + + @property + def initial_noise(self) -> np.ndarray: + return self._initial_noise + + @property + def total_iterations(self) -> int: + return self._total_iterations + + @property + def avg_l2_grad_norm(self) -> float: + return self._avg_l2_grad_norm + + @property + def avg_l1_sum_grad_norm(self) -> float: + return self._avg_l1_sum_grad_norm + + def _gradient(self, ratings: np.ndarray, batch: np.ndarray) -> np.ndarray: + raise NotImplementedError() + + def step(self, learning_rate: float, batch: np.ndarray) -> np.ndarray: + """Applies one step of gradient descent on the batch. + + Args: + learning_rate: a step size for the update. + batch: the batch of votes (integer indices) + + Returns: + gradient: the gradient over all parameters. + """ + gradient = self._gradient(self._ratings, batch) + self._ratings = self._ratings - learning_rate * gradient + self._ratings = np.clip(self._ratings, self._rating_lb, self._rating_ub) + return gradient + + def ranking(self) -> base.PreferenceList: + """Return a sorted list by decreasing rating.""" + sorted_indices = np.argsort(-self._ratings) + return [self._profile.alternatives[i] for i in sorted_indices] + + def run_solver(self, + iterations: int = 1000, + learning_rate: float = 0.01) -> tuple[np.ndarray, + base.PreferenceList]: + """Soft Condorcet optimizer.""" + + l1_sum_norms = [] + l2_norms = [] + batch = np.arange(self._profile.num_votes(), dtype=int) + for i in range(iterations): + self._total_iterations += 1 + if self._batch_size > 0: + # SGD case: Sample a batch of votes. + batch = np.random.randint(self._profile.num_votes(), + size=self._batch_size) + gradient = self.step(learning_rate, batch) + l2_norms.append(np.linalg.norm(gradient)) + l1_sum_norms.append(np.absolute(gradient).sum()) + if (i - 1) % self._compute_norm_freq == 0: + self._avg_l1_sum_grad_norm = ( + np.asarray(l1_sum_norms).sum() / self._compute_norm_freq) + self._avg_l2_grad_norm = ( + np.asarray(l2_norms).sum() / self._compute_norm_freq) + l2_norms = [] + l1_sum_norms = [] + if self._verbose: + logging.info("L1 gradient norm = %d", self._avg_l1_sum_grad_norm) + logging.info("L2 gradient norm = %d", self._avg_l2_grad_norm) + return self._ratings, self.ranking() + + def approximate_posterior(self, + num_posterior_samples: int, + num_cov_samples: int) -> np.ndarray: + """Stochastic Gradient Descent as Approximate Bayesian Inference.""" + + gradients = [] + for _ in range(num_cov_samples): + batch = np.random.randint(self._profile.num_votes(), + size=self._batch_size) + gradient = np.asarray(self._gradient(self._ratings, batch)) + gradients.append(gradient) + gradients = np.stack(gradients, axis=0) + + gradients_centered = gradients - gradients.mean(axis=0, keepdims=True) + cov = np.dot(gradients_centered.T, gradients_centered) / num_cov_samples + # cov_factor = np.linalg.cholesky(cov) # cov = cov_factor.dot(cov_factor.T) + + coeff = 2 * self._batch_size / self._profile.num_votes() + precon = coeff * np.linalg.pinv(cov) + + samples = [] + + sample = np.array(self._ratings) + for _ in range(num_posterior_samples): + batch = np.random.randint(self._profile.num_votes(), + size=self._batch_size) + gradient = self._gradient(sample, batch) + sample -= precon.dot(gradient) + sample = np.clip(sample, self._rating_lb, self._rating_ub) + samples.append(sample) + + samples = np.stack(samples, axis=0) + + return samples + + +class SoftCondorcetOptimizer(Optimizer): + """Soft Condorcet optimizer.""" + + def __init__( + self, + profile: base.PreferenceProfile, + batch_size: int = 0, # full GD by default + rating_lower_bound: float = 0.0, + rating_upper_bound: float = 1000.0, + compute_norm_freq: int = 1000, + initial_param_noise: float = 0.0, + temperature: float = 1.0, + ): + super().__init__( + profile, + batch_size, + rating_lower_bound, + rating_upper_bound, + compute_norm_freq, + initial_param_noise, + ) + self._temperature = temperature + + def _gradient(self, ratings: np.ndarray, batch: np.ndarray) -> np.ndarray: + """Compute the gradient of a batch of data. Explained above.""" + + alt_idx = self._profile.alternatives_dict + wins_dict = collections.defaultdict(lambda: 0) + grad = np.zeros(self._profile.num_alternatives(), dtype=np.float32) + for idx in batch: + vote = self._profile.votes[idx] + vote_len = len(vote.vote) + for i in range(vote_len): + for j in range(i+1, vote_len): + wins_dict[(vote.vote[i], vote.vote[j])] += vote.weight + # print(dict(wins_dict)) + for alt_tuple, weight in wins_dict.items(): + alt_a, alt_b = alt_tuple + a_idx = alt_idx[alt_a] + b_idx = alt_idx[alt_b] + # print(f"{alt_a} ({a_idx}) {alt_b} ({b_idx}) {weight}") + delta_ab = ((ratings[b_idx] - ratings[a_idx]) + / self._temperature) + sigma_ab = 1.0 / (1.0 + np.exp(-delta_ab)) + grad[a_idx] -= (weight * sigma_ab * (1.0 - sigma_ab) + / self._temperature) + grad[b_idx] += (weight * sigma_ab * (1.0 - sigma_ab) + / self._temperature) + grad /= len(batch) + return grad + + +class FenchelYoungOptimizer(Optimizer): + """Replace gradient by Fenchel Young loss gradient.""" + + def __init__( + self, + profile: base.PreferenceProfile, + batch_size: int = 0, # full GD by default + rating_lower_bound: float = 0.0, + rating_upper_bound: float = 1000.0, + compute_norm_freq: int = 1000, + initial_param_noise: float = 0.0, + sigma: float = 100.0, + ): + super().__init__( + profile, + batch_size, + rating_lower_bound, + rating_upper_bound, + compute_norm_freq, + initial_param_noise, + ) + self._sigma = sigma + + def _gradient(self, ratings: np.ndarray, batch: np.ndarray) -> np.ndarray: + """Compute FY gradient y_eps - y.""" + alt_idx = self._profile.alternatives_dict + grad = np.zeros(self._profile.num_alternatives(), dtype=np.float32) + for idx in batch: + vote = self._profile.votes[idx] + if vote.weight != 1: + raise ValueError("Fenchel Young Optimizer only works with weight 1.") + vote_len = len(vote.vote) + target_ranking = np.arange(vote_len).astype(np.float32) + player_ids = [alt_idx[a] for a in vote.vote] + gumbel_noise = np.random.gumbel(loc=0.0, scale=1.0, size=vote_len).astype( + np.float32 + ) + # Sample one perturbed ranking. Could do averages of multiple. + predicted_ratings = ratings[player_ids] + gumbel_noise * self._sigma + # Randomize tie-breaking by shuffling and unshuffling. + shuffled = np.random.permutation(len(player_ids)) + unshuffle = np.argsort(shuffled) + predicted_ranking = np.argsort(np.argsort(-predicted_ratings[shuffled]))[ + unshuffle + ].astype(np.float32) + + local_grad = predicted_ranking - target_ranking + # Flipping the sign due to argmin in ranking: Since the loss was derived + # for f(x)=argmax x, and g(x)=argmin x=f(-x), the gradient g'=-f'(-x). + grad[player_ids] += -local_grad + grad /= len(batch) + return grad diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/soft_condorcet_optimization_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/soft_condorcet_optimization_test.py new file mode 100644 index 0000000..3bbc5b6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/soft_condorcet_optimization_test.py @@ -0,0 +1,111 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Basic tests for Soft Condorcet Optimization.""" + +from absl.testing import absltest +import numpy as np +import pyspiel +from open_spiel.python.voting import base +from open_spiel.python.voting import soft_condorcet_optimization as sco + +SEED = 0 + + +class SCOTest(absltest.TestCase): + """Soft Condorcet Optimization tests.""" + + def test_simple_case(self): + # Simple case: a > b > c + profile = base.PreferenceProfile(votes=[["a", "b", "c"]]) + solver = sco.SoftCondorcetOptimizer(profile, temperature=1) + ratings, ranking = solver.run_solver(1000, learning_rate=0.01) + alt_idx = profile.alternatives_dict + for alt in ranking: + print(f"{alt}: {ratings[alt_idx[alt]]}") + self.assertGreater(ratings[0], ratings[1]) + self.assertGreater(ratings[1], ratings[2]) + + def test_meeple_pentathlon(self): + """Meeple pentathlon from the VasE paper.""" + profile = base.PreferenceProfile( + votes=[ + ["A", "B", "C"], + ["A", "C", "B"], + ["C", "A", "B"], + ["C", "A", "B"], + ["B", "C", "A"], + ] + ) + solver = sco.SoftCondorcetOptimizer(profile, batch_size=4, temperature=1) + ratings, ranking = solver.run_solver(10000, learning_rate=0.01) + alt_idx = profile.alternatives_dict + for alt in ranking: + print(f"{alt}: {ratings[alt_idx[alt]]}") + # Correct ranking is C > A > B. + self.assertGreater(ratings[2], ratings[0]) + self.assertGreater(ratings[0], ratings[1]) + + def test_cpp_meeple_pentathlon_sigmoid(self): + # Tests the C++ implementation of the SCO with sigmoid solver. + profile = base.PreferenceProfile( + votes=[ + ["A", "B", "C"], + ["A", "C", "B"], + ["C", "A", "B"], + ["C", "A", "B"], + ["B", "C", "A"], + ] + ) + cpp_sco_solver = pyspiel.sco.SoftCondorcetOptimizer( + profile.to_list_of_tuples(), + rating_lower_bound=-100.0, + rating_upper_bound=100.0, + batch_size=4, + temperature=1, + rng_seed=SEED, + ) + cpp_sco_solver.run_solver(10000, learning_rate=0.01) + ratings_dict = cpp_sco_solver.ratings() + self.assertGreater(ratings_dict["C"], ratings_dict["A"]) + self.assertGreater(ratings_dict["A"], ratings_dict["B"]) + + def test_cpp_meeple_pentathlon_fenchel_young(self): + # Tests the C++ implementation of the FY solver. + profile = base.PreferenceProfile( + votes=[ + ["A", "B", "C"], + ["A", "C", "B"], + ["C", "A", "B"], + ["C", "A", "B"], + ["B", "C", "A"], + ] + ) + cpp_fy_solver = pyspiel.sco.FenchelYoungOptimizer( + profile.to_list_of_tuples(), + rating_lower_bound=-100.0, + rating_upper_bound=100.0, + batch_size=4, + rng_seed=SEED, + ) + cpp_fy_solver.run_solver(10000, learning_rate=0.01) + ratings_dict = cpp_fy_solver.ratings() + # C is not necessarily better than A here, just like with Elo. + # But both should have higher ratings than B. + self.assertGreater(ratings_dict["C"], ratings_dict["B"]) + self.assertGreater(ratings_dict["A"], ratings_dict["B"]) + + +if __name__ == "__main__": + np.random.seed(SEED) + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/stv.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/stv.py new file mode 100644 index 0000000..8ab1c07 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/stv.py @@ -0,0 +1,200 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Single Transferrable Vote (STV) method. + +Based on https://en.wikipedia.org/wiki/Single_transferable_vote. +""" + +from typing import Dict, List, Union +from open_spiel.python.voting import base + + +class MutableVote(object): + """A mutable vote annotated with the current preferred alternative. + + This is used to keep track of votes and which index (into the preference list) + is currently active, i.e. the most preferred. When votes get used to determine + winners or elimintations, some of these votes get "transfered" down to the + next alternative. To transfer the vote, the index here is incremented to + indicate that this vote is now representing a vote for the next highest + alternative. + """ + + def __init__(self, idx: int, weight: int, vote: List[base.AlternativeId]): + self.idx = idx + self.weight = weight + self.vote = vote + + +class STVVoting(base.AbstractVotingMethod): + """Implements STV method.""" + + def __init__( + self, num_winners: Union[int, None] = None, verbose: bool = False + ): + """Construct an instance of STV with the specified number of winners. + + Args: + num_winners: number of winners. Should be less than number of + alternatives (m). If not specified, defaults to int(m/2). + verbose: whether or not to print debug information as STV is running. + """ + self._num_winners = num_winners + self._verbose = verbose + + def name(self) -> str: + return f"single_transferable_vote(num_winners={self._num_winners})" + + def _is_still_active( + self, + alternative: base.AlternativeId, + winners: List[base.AlternativeId], + losers: List[base.AlternativeId], + ) -> bool: + """Returns whether the alternative is still in the running.""" + return alternative not in winners and alternative not in losers + + def _next_idx_in_the_running( + self, + mutable_vote: MutableVote, + winners: List[base.AlternativeId], + losers: List[base.AlternativeId], + ) -> int: + """"Returns the next index in the list that is still in the running.""" + new_idx = mutable_vote.idx + 1 + while (new_idx < len(mutable_vote.vote) and + not self._is_still_active(mutable_vote.vote[new_idx], winners, + losers)): + new_idx += 1 + return new_idx + + def _initial_scores_for_round( + self, + profile: base.PreferenceProfile, + winners: List[base.AlternativeId], + losers: List[base.AlternativeId], + ) -> Dict[base.AlternativeId, float]: + """Returns round's initial scores for alternatives still in the running.""" + alt_scores = {} + for alt in profile.alternatives: + if self._is_still_active(alt, winners, losers): + alt_scores[alt] = 0 + return alt_scores + + def _remove_winning_votes( + self, + winning_alt: base.AlternativeId, + num_to_remove: int, + all_votes: List[MutableVote], + ): + while num_to_remove > 0: + for mutable_vote in all_votes: + if (mutable_vote.idx < len(mutable_vote.vote) and + mutable_vote.vote[mutable_vote.idx] == winning_alt): + removing_now = min(mutable_vote.weight, num_to_remove) + mutable_vote.weight -= removing_now + num_to_remove -= removing_now + if num_to_remove == 0: + break + + def run_election(self, profile: base.PreferenceProfile) -> base.RankOutcome: + assert self.is_valid_profile(profile) + winners = [] + losers = [] + winner_scores = [] + loser_scores = [] + votes = profile.votes + total_votes = profile.total_weight() + m = profile.num_alternatives() + num_winners = self._num_winners + if num_winners is None: + num_winners = int(m/2) + if self._verbose: + print("Warning: number of winners not specified." + + f"Choosing {num_winners}") + assert num_winners < m + quota = int(total_votes / float(num_winners + 1) + 1) + # Table holds a list of the IndexAndWeightedVote. The index corresponds to + # the current alternative that this vote is representing. They all start at + # 0 at the start, corresponding to their highest preference, and they get + # incremented as they become used up. + all_votes: List[MutableVote] = [] + for vote in votes: + all_votes.append(MutableVote(idx=0, weight=vote.weight, vote=vote.vote)) + while len(winners) + len(losers) < m: + scores = self._initial_scores_for_round(profile, winners, losers) + for mutable_vote in all_votes: + if (mutable_vote.idx < len(mutable_vote.vote) and + mutable_vote.weight > 0): + alt = mutable_vote.vote[mutable_vote.idx] + scores[alt] += mutable_vote.weight + sorted_scores = sorted(scores.items(), key=lambda item: item[1], + reverse=True) + best_score = sorted_scores[0][1] + if best_score >= quota: + # Quota reached. A candidate wins! + if self._verbose: + print(f"Quota {quota} reached. Candidate {sorted_scores[0][0]} wins!") + winning_alt = sorted_scores[0][0] + winners.append(winning_alt) + winner_scores.append(best_score) + surplus = sorted_scores[0][1] - quota + # Remove votes that contributed to the winner, up to the quota. + self._remove_winning_votes(winning_alt, quota, all_votes) + # Then, convert all the rest. + num_converted = 0 + for mutable_vote in all_votes: + if (mutable_vote.idx < len(mutable_vote.vote) and + mutable_vote.vote[mutable_vote.idx] == winning_alt and + mutable_vote.weight > 0): + # find the next one in the list still in the running. + new_idx = self._next_idx_in_the_running(mutable_vote, winners, + losers) + mutable_vote.idx = new_idx + num_converted += mutable_vote.weight + assert num_converted == surplus + else: + # No winner, eliminate the bottom candidate. + eliminated_alt = sorted_scores[-1][0] + eliminated_score = sorted_scores[-1][1] + if self._verbose: + print(f"No winner. Quota = {quota}. Eliminating candidate: " + + f"{eliminated_alt} with score: {eliminated_score}") + elim_count = sorted_scores[-1][1] + losers.insert(0, eliminated_alt) + loser_scores.insert(0, eliminated_score) + # All of the votes with this alternative as the top is converted. + votes_counted = 0 + for mutable_vote in all_votes: + if (mutable_vote.idx < len(mutable_vote.vote) and + mutable_vote.vote[mutable_vote.idx] == eliminated_alt and + mutable_vote.weight > 0): + # find the next one in the list still in the running. + new_idx = self._next_idx_in_the_running(mutable_vote, winners, + losers) + mutable_vote.idx = new_idx + votes_counted += mutable_vote.weight + assert votes_counted == elim_count + ranking = winners + losers + scores = [] + win_score_base = profile.num_alternatives() * 2 + lose_score_base = profile.num_alternatives() + for winner_score in winner_scores: + scores.append(float(str(win_score_base) + "." + str(winner_score))) + win_score_base -= 1 + for loser_score in loser_scores: + scores.append(float(str(lose_score_base) + "." + str(loser_score))) + lose_score_base -= 1 + outcome = base.RankOutcome(rankings=ranking, scores=scores) + return outcome diff --git a/scenarios/bargaining/open_spiel/open_spiel/python/voting/stv_test.py b/scenarios/bargaining/open_spiel/open_spiel/python/voting/stv_test.py new file mode 100644 index 0000000..5fb835d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/python/voting/stv_test.py @@ -0,0 +1,69 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for open_spiel.python.voting.stv.""" + +from absl.testing import absltest +from absl.testing import parameterized + +from open_spiel.python.voting import base +from open_spiel.python.voting import stv + + +class STVTest(parameterized.TestCase): + @parameterized.named_parameters(("four", 4), ("one", 1)) + def test_stv_records_number(self, num): + method = stv.STVVoting(num_winners=num) + self.assertEqual( + method.name(), f"single_transferable_vote(num_winners={num})" + ) + + def test_ranked_pairs_wikipedia_example(self): + alternatives = ["Orange", "Pear", "Strawberry", "Cake", "Chocolate", + "Hamburger", "Chicken"] + votes = [ + base.WeightedVote(4, ["Orange", "Pear"]), + base.WeightedVote(7, ["Pear", "Strawberry", "Cake"]), + base.WeightedVote(1, ["Strawberry", "Cake", "Pear"]), + base.WeightedVote(3, ["Cake", "Chocolate", "Strawberry"]), + base.WeightedVote(1, ["Cake", "Chocolate", "Hamburger"]), + base.WeightedVote(4, ["Hamburger"]), + base.WeightedVote(3, ["Chicken", "Hamburger"]), + ] + profile = base.PreferenceProfile(votes=votes, + alternatives=alternatives) + method = stv.STVVoting(num_winners=3) + outcome = method.run_election(profile) + self.assertListEqual(outcome.ranking, + ["Pear", "Cake", "Hamburger", "Orange", "Chicken", + "Strawberry", "Chocolate"]) + self.assertListEqual(outcome.scores, [14.7, 13.6, 12.7, 7.4, 6.3, 5.2, 4.0]) + + def test_meeple_pentathlon(self): + alternatives = ["A", "B", "C"] + votes = [ + base.WeightedVote(1, ["A", "B", "C"]), + base.WeightedVote(1, ["A", "C", "B"]), + base.WeightedVote(2, ["C", "A", "B"]), + base.WeightedVote(1, ["B", "C", "A"]), + ] + profile = base.PreferenceProfile(votes=votes, alternatives=alternatives) + method = stv.STVVoting() + outcome = method.run_election(profile) + self.assertListEqual(outcome.ranking, ["C", "A", "B"]) + self.assertListEqual(outcome.scores, [6.3, 3.2, 2.1]) + + +if __name__ == "__main__": + absltest.main() diff --git a/scenarios/bargaining/open_spiel/open_spiel/rust/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/rust/CMakeLists.txt new file mode 100644 index 0000000..58e7568 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/rust/CMakeLists.txt @@ -0,0 +1,34 @@ +set(RUST_BINDINGS ${RUST_BINDINGS} + src/rust_open_spiel.cc + src/rust_open_spiel.h +) + +set(RUST_API_FILES + Cargo.toml + build.rs + src/rust_open_spiel.rs + src/example.rs +) + +# Note: needs to be SHARED rather than MODULE to work on MacOS +add_library(rust_spiel SHARED ${RUST_BINDINGS} ${OPEN_SPIEL_OBJECTS}) + +# Copy the files keeping the directories intact +foreach(rust_api_file IN LISTS RUST_API_FILES) + get_filename_component(file_dir ${rust_api_file} DIRECTORY) + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${file_dir}) + file(COPY ${rust_api_file} DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/${file_dir}) +endforeach(rust_api_file) + +add_custom_target(rust_bindgen ALL $ENV{HOME}/.cargo/bin/bindgen ${CMAKE_CURRENT_SOURCE_DIR}/src/rust_open_spiel.h -o ${CMAKE_CURRENT_BINARY_DIR}/src/open_spiel_bindings.rs + DEPENDS rust_spiel) + +add_custom_target(rust_open_spiel ALL cargo build + DEPENDS rust_spiel rust_bindgen + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + +add_test(NAME rust_example_test COMMAND cargo run --example example) +set_property(TEST rust_example_test + PROPERTY ENVIRONMENT + LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}; + TEST_SRCDIR=${CMAKE_CURRENT_BINARY_DIR}) diff --git a/scenarios/bargaining/open_spiel/open_spiel/rust/Cargo.toml b/scenarios/bargaining/open_spiel/open_spiel/rust/Cargo.toml new file mode 100644 index 0000000..04449f4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/rust/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "libopen_spiel-sys" +version = "1.0.2" +links = "rust_spiel" +build = "build.rs" +edition = "2018" +crate_type = "lib" + +[dependencies] +libc = "0.2" + +[lib] +name = "rust_open_spiel" +path = "src/rust_open_spiel.rs" +test = false +bench = false + +[[example]] +name = "example" +path = "src/example.rs" +test = false +bench = false + +[build-dependencies] +cc = { version = "1.0", features = ["parallel"] } +pkg-config = "0.3" diff --git a/scenarios/bargaining/open_spiel/open_spiel/rust/README.md b/scenarios/bargaining/open_spiel/open_spiel/rust/README.md new file mode 100644 index 0000000..83b32ac --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/rust/README.md @@ -0,0 +1,18 @@ +# OpenSpiel Rust API + +*Note:* This API is no longer maintained. + +This is a basic [Rust](https://www.rust-lang.org/) API for OpenSpiel. Please +note that it is currently experimental and may not work as expected. If you use +it, please report any issues. Fixes and improvements are more than welcome! + +See the `CMakeLists.txt` to see how it is setup: a dynamic shared library is +created similarly to python extension (`librust_spiel.so`). A simple rust crate +is created in this directory using `cargo build` and a simple example is run +using cargo as well. Note that currently `LD_LIBRARY_PATH` must include the +location of the dynamic library so that it gets properly loaded at run time. + +Note: this API currently only supports turn-based games. To support +simultaneous-move games, several API functions would need to be added, such as +legal actions for specific players, observation and information state tensors +for specific players, and apply action for joint actions. diff --git a/scenarios/bargaining/open_spiel/open_spiel/rust/build.rs b/scenarios/bargaining/open_spiel/open_spiel/rust/build.rs new file mode 100644 index 0000000..9039f5d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/rust/build.rs @@ -0,0 +1,8 @@ +use std::env; + +fn main() { + let cwd = env::current_dir().unwrap(); + let path_str = cwd.into_os_string().into_string().unwrap(); + println!("cargo:rustc-link-search={}", path_str); + println!("cargo:rustc-link-lib=dylib=rust_spiel"); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/rust/src/example.rs b/scenarios/bargaining/open_spiel/open_spiel/rust/src/example.rs new file mode 100644 index 0000000..15b1d14 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/rust/src/example.rs @@ -0,0 +1,95 @@ +use rust_open_spiel::*; + +pub fn play_tic_tac_toe() { + let game = Game::new("tic_tac_toe"); + println!("The short name is: {}", game.short_name()); + println!("The long name is: {}", game.long_name()); + println!("Number of players: {}", game.num_players()); + println!("Number of distinct actions: {}", game.num_distinct_actions()); + println!("Max game length: {}", game.max_game_length()); + + let state = game.new_initial_state(); + println!("Initial state:\n{}", state.to_string()); + + let clone = state.clone(); + println!("Cloned initial state:\n{}", clone.to_string()); + + while !state.is_terminal() { + println!(""); + println!("State:\n{}", state.to_string()); + let legal_actions = state.legal_actions(); + let player = state.current_player(); + println!("Legal actions: "); + let action = legal_actions[0]; + for a in legal_actions { + println!(" {}: {}", a, state.action_to_string(player, a)); + } + println!("Taking action {}: {}", action, state.action_to_string(player, action)); + state.apply_action(action); + } + + println!("Terminal state reached:\n{}\n", state.to_string()); + let returns = state.returns(); + for i in 0..game.num_players() { + println!("Utility for player {} is {}", i, returns[i as usize]); + } +} + +pub fn play_tic_tac_toe_with_bots() { + let game = Game::new("tic_tac_toe"); + let state = game.new_initial_state(); + println!("Initial state:\n{}", state.to_string()); + + let mut params = GameParameters::default(); + params.set_int("seed", 42); + + let mut bots = vec![ + create_bot_by_name("uniform_random", &game, 0, ¶ms), + create_bot_by_name("uniform_random", &game, 1, ¶ms), + ]; + + for _ in 0..2 { + while !state.is_terminal() { + let player = state.current_player(); + let action = bots[player as usize].step(&state); + let enemy = 1 - player; + bots[enemy as usize].inform_action(&state, player, action); + state.apply_action(action); + } + for bot in bots.iter_mut() { + bot.restart(); + } + } + + println!("Terminal state reached:\n{}\n", state.to_string()); +} + +#[test] +fn tic_tac_toe_test() { + play_tic_tac_toe(); +} + +#[test] +fn tic_tac_toe_with_bots_test() { + play_tic_tac_toe_with_bots(); +} + +#[test] +fn new_game_with_parameters_test() { + let mut params = GameParameters::default(); + params.set_str("name", "go"); + params.set_int("board_size", 9); + params.set_f64("komi", 7.5); + let game = Game::new_with_parameters(¶ms); + assert_eq!( + params.serialize(), + "board_size=kInt***9***false|||komi=kDouble***7.5***false|||name=kString***go***false" + ); + assert_eq!(game.short_name(), "go"); + assert_eq!(game.observation_shape(), vec![4, 9, 9]); +} + +fn main() { + play_tic_tac_toe(); + play_tic_tac_toe_with_bots(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/rust/src/open_spiel_bindings.rs b/scenarios/bargaining/open_spiel/open_spiel/rust/src/open_spiel_bindings.rs new file mode 100644 index 0000000..948b515 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/rust/src/open_spiel_bindings.rs @@ -0,0 +1,210 @@ +/* automatically generated by rust-bindgen 0.59.2 */ + +extern "C" { + pub fn NewGameParameters() -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn DeleteGameParameters(params_ptr: *mut ::std::os::raw::c_void); +} +extern "C" { + pub fn GameParametersSetInt( + params_ptr: *mut ::std::os::raw::c_void, + key: *const ::std::os::raw::c_char, + value: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn GameParametersSetDouble( + params_ptr: *mut ::std::os::raw::c_void, + key: *const ::std::os::raw::c_char, + value: ::std::os::raw::c_double, + ); +} +extern "C" { + pub fn GameParametersSetString( + params_ptr: *mut ::std::os::raw::c_void, + key: *const ::std::os::raw::c_char, + value: *const ::std::os::raw::c_char, + ); +} +extern "C" { + pub fn GameParametersSerialize( + params_ptr: *mut ::std::os::raw::c_void, + length: *mut ::std::os::raw::c_ulong, + ) -> *mut ::std::os::raw::c_char; +} +extern "C" { + pub fn LoadGame(name: *const ::std::os::raw::c_char) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn LoadGameFromParameters( + params_ptr: *const ::std::os::raw::c_void, + ) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn DeleteGame(game_ptr: *mut ::std::os::raw::c_void); +} +extern "C" { + pub fn GameShortName( + game_ptr: *const ::std::os::raw::c_void, + length: *mut ::std::os::raw::c_ulong, + ) -> *mut ::std::os::raw::c_char; +} +extern "C" { + pub fn GameLongName( + game_ptr: *const ::std::os::raw::c_void, + length: *mut ::std::os::raw::c_ulong, + ) -> *mut ::std::os::raw::c_char; +} +extern "C" { + pub fn GameNewInitialState( + game_ptr: *const ::std::os::raw::c_void, + ) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn GameNumPlayers(game_ptr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn GameMaxGameLength(game_ptr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn GameNumDistinctActions(game_ptr: *const ::std::os::raw::c_void) + -> ::std::os::raw::c_int; +} +extern "C" { + pub fn GameObservationTensorShape( + game_ptr: *const ::std::os::raw::c_void, + size: *mut ::std::os::raw::c_int, + ) -> *mut ::std::os::raw::c_int; +} +extern "C" { + pub fn GameInformationStateTensorShape( + game_ptri: *const ::std::os::raw::c_void, + size: *mut ::std::os::raw::c_int, + ) -> *mut ::std::os::raw::c_int; +} +extern "C" { + pub fn DeleteState(state_ptr: *mut ::std::os::raw::c_void); +} +extern "C" { + pub fn StateClone(state_ptr: *const ::std::os::raw::c_void) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn StateToString( + state_ptr: *const ::std::os::raw::c_void, + length: *mut ::std::os::raw::c_ulong, + ) -> *mut ::std::os::raw::c_char; +} +extern "C" { + pub fn StateLegalActions( + state_ptr: *const ::std::os::raw::c_void, + num_legal_actions: *mut ::std::os::raw::c_int, + ) -> *mut ::std::os::raw::c_long; +} +extern "C" { + pub fn StateCurrentPlayer(state_ptr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn StateActionToString( + state_ptr: *const ::std::os::raw::c_void, + player: ::std::os::raw::c_int, + action: ::std::os::raw::c_long, + length: *mut ::std::os::raw::c_ulong, + ) -> *mut ::std::os::raw::c_char; +} +extern "C" { + pub fn StateIsTerminal(state_ptr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn StateIsChanceNode(state_ptr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn StateNumPlayers(state_ptr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn StateApplyAction(state_ptr: *mut ::std::os::raw::c_void, action: ::std::os::raw::c_long); +} +extern "C" { + pub fn StateReturns( + state_ptr: *const ::std::os::raw::c_void, + returns_buf: *mut ::std::os::raw::c_double, + ); +} +extern "C" { + pub fn StatePlayerReturn( + state_ptr: *const ::std::os::raw::c_void, + player: ::std::os::raw::c_int, + ) -> f64; +} +extern "C" { + pub fn StateChanceOutcomeProbs( + state_ptr: *const ::std::os::raw::c_void, + size: *mut ::std::os::raw::c_int, + ) -> *mut f64; +} +extern "C" { + pub fn StateObservationString( + state_ptr: *const ::std::os::raw::c_void, + length: *mut ::std::os::raw::c_ulong, + ) -> *mut ::std::os::raw::c_char; +} +extern "C" { + pub fn StateInformationStateString( + state_ptr: *const ::std::os::raw::c_void, + length: *mut ::std::os::raw::c_ulong, + ) -> *mut ::std::os::raw::c_char; +} +extern "C" { + pub fn StateObservationTensorSize( + state_ptr: *const ::std::os::raw::c_void, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn StateInformationStateTensorSize( + state_ptr: *const ::std::os::raw::c_void, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn StateObservationTensor( + state_ptr: *const ::std::os::raw::c_void, + player: ::std::os::raw::c_int, + obs_buf: *mut ::std::os::raw::c_float, + length: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn StateInformationStateTensor( + state_ptr: *const ::std::os::raw::c_void, + player: ::std::os::raw::c_int, + infostate_buf: *mut ::std::os::raw::c_float, + length: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn DeleteBot(bot_ptr: *mut ::std::os::raw::c_void); +} +extern "C" { + pub fn BotStep( + bot_ptr: *mut ::std::os::raw::c_void, + state_ptr: *const ::std::os::raw::c_void, + ) -> ::std::os::raw::c_long; +} +extern "C" { + pub fn BotInformAction( + bot_ptr: *mut ::std::os::raw::c_void, + state_ptr: *const ::std::os::raw::c_void, + player_id: ::std::os::raw::c_int, + action: ::std::os::raw::c_long, + ) -> ::std::os::raw::c_long; +} +extern "C" { + pub fn BotRestart(bot_ptr: *mut ::std::os::raw::c_void); +} +extern "C" { + pub fn BotRegistererCreateByName( + bot_name_ptr: *const ::std::os::raw::c_char, + game_ptr: *const ::std::os::raw::c_void, + player_id: i32, + params_ptr: *const ::std::os::raw::c_void, + ) -> *mut ::std::os::raw::c_void; +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/rust/src/rust_open_spiel.cc b/scenarios/bargaining/open_spiel/open_spiel/rust/src/rust_open_spiel.cc new file mode 100644 index 0000000..f637f31 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/rust/src/rust_open_spiel.cc @@ -0,0 +1,336 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" + +using ::open_spiel::Action; +using ::open_spiel::ActionsAndProbs; +using ::open_spiel::Bot; +using ::open_spiel::BotRegisterer; +using ::open_spiel::Game; +using ::open_spiel::GameParameter; +using ::open_spiel::GameParameters; +using ::open_spiel::SerializeGameParameters; +using ::open_spiel::State; + +// A number of functions in this file returns pointers to +// dynamically-allocated memory. These are temporary memory buffers used to +// store data that must be freed on the Rust API (rust_open_spiel.rs). + +/* We need this because games are shared pointers and we need to return + raw pointers to objects that contain them.*/ +namespace { +struct GamePointerHolder { + std::shared_ptr ptr; +}; + +template +T* AllocBuf(const std::vector& vec, int* size) { + *size = vec.size(); + size_t num_bytes = *size * sizeof(T); + T* buf = static_cast(malloc(num_bytes)); + memcpy(buf, vec.data(), num_bytes); + return buf; +} + +char* AllocAndCopyString(const std::string& str) { + char* buf = static_cast(malloc(str.length() * sizeof(char))); + strncpy(buf, str.data(), str.length()); + return buf; +} + +} // namespace + +extern "C" { + +/* GameParameters functions. */ +void* NewGameParameters() { + return reinterpret_cast(new GameParameters()); +} + +void DeleteGameParameters(void* params_ptr) { + GameParameters* params = reinterpret_cast(params_ptr); + delete params; +} + +void GameParametersSetInt(void* params_ptr, const char* key, int value) { + GameParameters* params = reinterpret_cast(params_ptr); + params->insert_or_assign(std::string(key), GameParameter(value)); +} + +void GameParametersSetDouble(void* params_ptr, const char* key, double value) { + GameParameters* params = reinterpret_cast(params_ptr); + params->insert_or_assign(std::string(key), GameParameter(value)); +} + +void GameParametersSetString(void* params_ptr, const char* key, + const char* value) { + GameParameters* params = reinterpret_cast(params_ptr); + params->insert_or_assign(std::string(key), GameParameter(std::string(value))); +} + +char* GameParametersSerialize(const void* params_ptr, + unsigned long* length) { // NOLINT + const GameParameters* params = + reinterpret_cast(params_ptr); + std::string serialized = SerializeGameParameters(*params); + *length = serialized.length(); + return AllocAndCopyString(serialized); +} + +/* Game functions. */ +void* LoadGame(const char* name) { + return reinterpret_cast( + new GamePointerHolder{open_spiel::LoadGame(name)}); +} + +void* LoadGameFromParameters(const void* params_ptr) { + const GameParameters* params = + reinterpret_cast(params_ptr); + return reinterpret_cast( + new GamePointerHolder{open_spiel::LoadGame(*params)}); +} + +void DeleteGame(void* game_ptr) { + GamePointerHolder* game = reinterpret_cast(game_ptr); + delete game; +} + +char* GameShortName(const void* game_ptr, unsigned long* length) { // NOLINT + const Game* game = + reinterpret_cast(game_ptr)->ptr.get(); + std::string short_name = game->GetType().short_name; + *length = short_name.length(); + return AllocAndCopyString(short_name); +} + +char* GameLongName(const void* game_ptr, unsigned long* length) { // NOLINT + const Game* game = + reinterpret_cast(game_ptr)->ptr.get(); + std::string long_name = game->GetType().long_name; + *length = long_name.length(); + return AllocAndCopyString(long_name); +} + +void* GameNewInitialState(const void* game_ptr) { + const Game* game = + reinterpret_cast(game_ptr)->ptr.get(); + std::unique_ptr state = game->NewInitialState(); + void* state_ptr = reinterpret_cast(state.release()); + return state_ptr; +} + +int GameNumPlayers(const void* game_ptr) { + const Game* game = + reinterpret_cast(game_ptr)->ptr.get(); + return game->NumPlayers(); +} + +int GameMaxGameLength(const void* game_ptr) { + const Game* game = + reinterpret_cast(game_ptr)->ptr.get(); + return game->MaxGameLength(); +} + +int GameNumDistinctActions(const void* game_ptr) { + const Game* game = + reinterpret_cast(game_ptr)->ptr.get(); + return game->NumDistinctActions(); +} + +int* GameObservationTensorShape(const void* game_ptr, int* size) { + const Game* game = + reinterpret_cast(game_ptr)->ptr.get(); + std::vector shape = game->ObservationTensorShape(); + return AllocBuf(shape, size); +} + +int* GameInformationStateTensorShape(const void* game_ptr, int* size) { + const Game* game = + reinterpret_cast(game_ptr)->ptr.get(); + std::vector shape = game->InformationStateTensorShape(); + return AllocBuf(shape, size); +} + +/* State functions. */ +void DeleteState(void* state_ptr) { + State* state = reinterpret_cast(state_ptr); + delete state; +} + +void* StateClone(const void* state_ptr) { + const State* state = reinterpret_cast(state_ptr); + std::unique_ptr state_copy = state->Clone(); + return reinterpret_cast(state_copy.release()); +} + +char* StateToString(const void* state_ptr, unsigned long* length) { // NOLINT + const State* state = reinterpret_cast(state_ptr); + std::string state_str = state->ToString(); + *length = state_str.length(); + return AllocAndCopyString(state_str); +} + +long* StateLegalActions(const void* state_ptr, // NOLINT + int* num_legal_actions) { + assert(sizeof(long) == sizeof(Action)); // NOLINT + const State* state = reinterpret_cast(state_ptr); + std::vector legal_actions = state->LegalActions(); + return AllocBuf(legal_actions, num_legal_actions); +} + +int StateCurrentPlayer(const void* state_ptr) { + const State* state = reinterpret_cast(state_ptr); + return state->CurrentPlayer(); +} + +char* StateActionToString(const void* state_ptr, int player, int action, + unsigned long* length) { // NOLINT + const State* state = reinterpret_cast(state_ptr); + std::string action_str = state->ActionToString(player, action); + *length = action_str.length(); + return AllocAndCopyString(action_str); +} + +int StateIsTerminal(const void* state_ptr) { + const State* state = reinterpret_cast(state_ptr); + return state->IsTerminal() ? 1 : 0; +} + +int StateIsChanceNode(const void* state_ptr) { + const State* state = reinterpret_cast(state_ptr); + return state->IsChanceNode() ? 1 : 0; +} + +void StateApplyAction(void* state_ptr, long action) { // NOLINT + State* state = reinterpret_cast(state_ptr); + state->ApplyAction(action); +} + +double StatePlayerReturn(const void* state_ptr, int player) { + const State* state = reinterpret_cast(state_ptr); + return state->PlayerReturn(player); +} + +int StateNumPlayers(const void* state_ptr) { + const State* state = reinterpret_cast(state_ptr); + return state->NumPlayers(); +} + +void StateReturns(const void* state_ptr, double* returns_buf) { + const State* state = reinterpret_cast(state_ptr); + std::vector returns = state->Returns(); + memcpy(returns_buf, returns.data(), returns.size() * sizeof(double)); +} + +double* StateChanceOutcomeProbs(const void* state_ptr, int* size) { + const State* state = reinterpret_cast(state_ptr); + ActionsAndProbs chance_outcomes = state->ChanceOutcomes(); + *size = chance_outcomes.size(); + size_t num_bytes = *size * sizeof(double); + double* buf = static_cast(malloc(num_bytes)); + for (int i = 0; i < chance_outcomes.size(); ++i) { + buf[i] = chance_outcomes[i].second; + } + return buf; +} + +char* StateObservationString(const void* state_ptr, + unsigned long* length) { // NOLINT + const State* state = reinterpret_cast(state_ptr); + std::string obs_str = state->ObservationString(); + *length = obs_str.length(); + return AllocAndCopyString(obs_str); +} + +char* StateInformationStateString(const void* state_ptr, + unsigned long* length) { // NOLINT + const State* state = reinterpret_cast(state_ptr); + std::string infostate_str = state->InformationStateString(); + *length = infostate_str.length(); + return AllocAndCopyString(infostate_str); +} + +int StateInformationStateTensorSize(const void* state_ptr) { + const Game* parent_game = + reinterpret_cast(state_ptr)->GetGame().get(); + return parent_game->InformationStateTensorSize(); +} + +int StateObservationTensorSize(const void* state_ptr) { + const Game* parent_game = + reinterpret_cast(state_ptr)->GetGame().get(); + return parent_game->ObservationTensorSize(); +} + +void StateObservationTensor(const void* state_ptr, int player, float* obs_buf, + int length) { + const State* state = reinterpret_cast(state_ptr); + state->ObservationTensor(player, absl::MakeSpan(obs_buf, length)); +} + +void StateInformationStateTensor(const void* state_ptr, int player, + float* infostate_buf, int length) { + const State* state = reinterpret_cast(state_ptr); + state->InformationStateTensor(player, absl::MakeSpan(infostate_buf, length)); +} + +/* Bot functions */ +void DeleteBot(void* bot_ptr) { + Bot* bot = reinterpret_cast(bot_ptr); + delete bot; +} + +long BotStep(void* bot_ptr, const void* state_ptr) { /* NOLINT */ + Bot* bot = reinterpret_cast(bot_ptr); + const State* state = reinterpret_cast(state_ptr); + return bot->Step(*state); +} + +void BotInformAction(void* bot_ptr, const void* state_ptr, int player_id, + long action) { /* NOLINT */ + Bot* bot = reinterpret_cast(bot_ptr); + const State* state = reinterpret_cast(state_ptr); + bot->InformAction(*state, player_id, action); +} + +void BotRestart(void* bot_ptr) { + Bot* bot = reinterpret_cast(bot_ptr); + bot->Restart(); +} + +/* BotRegisterer functions */ +void* BotRegistererCreateByName(const char* bot_name_ptr, const void* game_ptr, + int player_id, const void* params_ptr) { + const std::string bot_name(bot_name_ptr); + const GamePointerHolder* game = + reinterpret_cast(game_ptr); + const GameParameters* params = + reinterpret_cast(params_ptr); + std::unique_ptr bot = + BotRegisterer::CreateByName(bot_name, game->ptr, player_id, *params); + return reinterpret_cast(bot.release()); +} + +} /* extern "C" */ diff --git a/scenarios/bargaining/open_spiel/open_spiel/rust/src/rust_open_spiel.h b/scenarios/bargaining/open_spiel/open_spiel/rust/src/rust_open_spiel.h new file mode 100644 index 0000000..950804a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/rust/src/rust_open_spiel.h @@ -0,0 +1,90 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef __RUST_OPEN_SPIEL_H__ +#define __RUST_OPEN_SPIEL_H__ + +/* A pure C API that wraps the C++ OpenSpiel core. */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* GameParameters functions */ +void* NewGameParameters(); +void DeleteGameParameters(void* params_ptr); +void GameParametersSetInt(void* params_ptr, const char* key, int value); +void GameParametersSetDouble(void* params_ptr, const char* key, double value); +void GameParametersSetString(void* params_ptr, const char* key, + const char* value); +char* GameParametersSerialize(const void* params_ptr, + unsigned long* length); /* NOLINT */ + +/* Game functions. */ +void* LoadGame(const char* name); +void* LoadGameFromParameters(const void* params_ptr); +void DeleteGame(void* game_ptr); +char* GameShortName(const void* game_ptr, unsigned long* length); /* NOLINT */ +char* GameLongName(const void* game_ptr, unsigned long* length); /* NOLINT */ +void* GameNewInitialState(const void* game_ptr); +int GameNumPlayers(const void* game_ptr); +int GameMaxGameLength(const void* game_ptr); +int GameNumDistinctActions(const void* game_ptr); +int* GameObservationTensorShape(const void* game_ptr, int* size); +int* GameInformationStateTensorShape(const void* game_ptri, int* size); + +/* State functions. */ +void DeleteState(void* state_ptr); +void* StateClone(const void* state_ptr); +char* StateToString(const void* state_ptr, unsigned long* length); /* NOLINT */ +long* StateLegalActions(const void* state_ptr, /* NOLINT */ + int* num_legal_actions); +int StateCurrentPlayer(const void* state_ptr); +char* StateActionToString(const void* state_ptr, int player, + long action, /* NOLINT */ + unsigned long* length); /* NOLINT */ +int StateIsTerminal(const void* state_ptr); +int StateIsChanceNode(const void* state_ptr); +int StateNumPlayers(const void* state_ptr); +void StateApplyAction(void* state_ptr, long action); /* NOLINT */ +void StateReturns(const void* state_ptr, double* returns_buf); +double StatePlayerReturn(const void* state_ptr, int player); +double* StateChanceOutcomeProbs(const void* state_ptr, int* size); +char* StateObservationString(const void* state_ptr, + unsigned long* length); /* NOLINT */ +char* StateInformationStateString(const void* state_ptr, + unsigned long* length); /* NOLINT */ +int StateInformationStateTensorSize(const void* state_ptr); +int StateObservationTensorSize(const void* state_ptr); +void StateObservationTensor(const void* state_ptr, int player, float* obs_buf, + int length); +void StateInformationStateTensor(const void* state_ptr, int player, + float* infostate_buf, int length); + +/* Bot functions */ +void DeleteBot(void* bot_ptr); +long BotStep(void* bot_ptr, const void* state_ptr); /* NOLINT */ +void BotInformAction(void* bot_ptr, const void* state_ptr, int player_id, + long action); /* NOLINT */ +void BotRestart(void* bot_ptr); + +/* BotRegisterer functions */ +void* BotRegistererCreateByName(const char* bot_name_ptr, const void* game_ptr, + int player_id, const void* params_ptr); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/scenarios/bargaining/open_spiel/open_spiel/rust/src/rust_open_spiel.rs b/scenarios/bargaining/open_spiel/open_spiel/rust/src/rust_open_spiel.rs new file mode 100644 index 0000000..1b7f92b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/rust/src/rust_open_spiel.rs @@ -0,0 +1,323 @@ +extern crate libc; + +use libc::{c_char, free}; +use std::ffi::CString; +use std::os::raw::c_void; +use std::slice; + +mod open_spiel_bindings; +use open_spiel_bindings::*; + +fn convert_and_free_cstring(c_buf: *mut c_char, len: u64) -> String { + let bytes = unsafe { std::slice::from_raw_parts(c_buf as *const u8, len as usize) }; + let str_slice = unsafe { std::str::from_utf8_unchecked(bytes) }; + let str_buf: String = str_slice.to_owned(); + unsafe { free(c_buf as *mut c_void) }; + str_buf +} + +pub struct GameParameters { + params: *mut c_void, +} + +pub struct State { + state: *mut c_void, +} + +pub struct Game { + game: *mut c_void, +} + +pub struct Bot { + bot: *mut c_void, +} + +impl Default for GameParameters { + fn default() -> Self { + Self { params: unsafe { NewGameParameters() } } + } +} + +impl GameParameters { + pub fn set_int(&mut self, key: &str, value: i32) { + let key = CString::new(key).unwrap(); + unsafe { + GameParametersSetInt(self.params, key.as_ptr(), value); + } + } + + pub fn set_f64(&mut self, key: &str, value: f64) { + let key = CString::new(key).unwrap(); + unsafe { + GameParametersSetDouble(self.params, key.as_ptr(), value); + } + } + + pub fn set_str(&mut self, key: &str, value: &str) { + let key = CString::new(key).unwrap(); + let value = CString::new(value).unwrap(); + unsafe { + GameParametersSetString(self.params, key.as_ptr(), value.as_ptr()); + } + } + + pub fn serialize(&self) -> String { + let mut length = 0; + let c_buf: *mut c_char = unsafe { GameParametersSerialize(self.params, &mut length) }; + convert_and_free_cstring(c_buf, length) + } +} + +impl Drop for GameParameters { + fn drop(&mut self) { + unsafe { DeleteGameParameters(self.params) } + } +} + +unsafe impl Send for GameParameters {} +unsafe impl Sync for GameParameters {} + +impl State { + pub fn new(sptr: *mut c_void) -> State { + State { state: sptr } + } + + pub fn current_player(&self) -> i32 { + unsafe { StateCurrentPlayer(self.state) } + } + + pub fn clone(&self) -> State { + unsafe { State { state: StateClone(self.state) } } + } + + pub fn is_chance_node(&self) -> bool { + let ret = unsafe { StateIsChanceNode(self.state) }; + ret == 1 + } + + pub fn is_terminal(&self) -> bool { + let ret = unsafe { StateIsTerminal(self.state) }; + ret == 1 + } + + pub fn num_players(&self) -> i32 { + unsafe { StateNumPlayers(self.state) } + } + + pub fn returns(&self) -> Vec { + let length = self.num_players() as usize; + let mut returns_vec = Vec::with_capacity(length); + unsafe { + StateReturns(self.state, returns_vec.as_mut_ptr()); + returns_vec.set_len(length); + } + returns_vec + } + + pub fn player_return(&self, player: i32) -> f64 { + unsafe { StatePlayerReturn(self.state, player) } + } + + pub fn legal_actions(&self) -> Vec { + let mut c_num_legal_moves = 0; + let c_buf = unsafe { StateLegalActions(self.state, &mut c_num_legal_moves) }; + unsafe { + let vec = slice::from_raw_parts(c_buf, c_num_legal_moves as usize).to_vec(); + free(c_buf as *mut c_void); + vec + } + } + + pub fn chance_outcomes(&self) -> Vec<(i64, f64)> { + let legal_actions: Vec = self.legal_actions(); + let mut size = 0; + let c_buf = unsafe { StateChanceOutcomeProbs(self.state, &mut size) }; + let length = size as usize; + let mut vec = vec![(0, 0.0); length]; + unsafe { + let probs_slice = slice::from_raw_parts(c_buf, length); + for i in 0..length { + vec[i] = (legal_actions[i], probs_slice[i]); + } + free(c_buf as *mut c_void); + } + vec + } + + pub fn apply_action(&self, action: i64) { + unsafe { StateApplyAction(self.state, action) } + } + + pub fn action_to_string(&self, player: i32, action: i64) -> String { + let mut length = 0; + let c_buf: *mut c_char = + unsafe { StateActionToString(self.state, player, action, &mut length) }; + convert_and_free_cstring(c_buf, length) + } + + pub fn to_string(&self) -> String { + let mut length = 0; + let c_buf: *mut c_char = unsafe { StateToString(self.state, &mut length) }; + convert_and_free_cstring(c_buf, length) + } + + pub fn observation_string(&self) -> String { + let mut length = 0; + let c_buf: *mut c_char = unsafe { StateObservationString(self.state, &mut length) }; + convert_and_free_cstring(c_buf, length) + } + + pub fn information_state_string(&self) -> String { + let mut length = 0; + let c_buf: *mut c_char = unsafe { StateInformationStateString(self.state, &mut length) }; + convert_and_free_cstring(c_buf, length) + } + + pub fn current_observation_tensor(&self) -> Vec { + self.observation_tensor(self.current_player()) + } + + pub fn current_information_state_tensor(&self) -> Vec { + self.information_state_tensor(self.current_player()) + } + + pub fn observation_tensor(&self, player: i32) -> Vec { + assert!(player >= 0); + let length = unsafe { StateObservationTensorSize(self.state) as usize }; + let mut obs_vec = Vec::with_capacity(length); + unsafe { + StateObservationTensor(self.state, player, obs_vec.as_mut_ptr(), length as i32); + obs_vec.set_len(length); + } + obs_vec + } + + pub fn information_state_tensor(&self, player: i32) -> Vec { + assert!(player >= 0); + let length = unsafe { StateInformationStateTensorSize(self.state) as usize }; + let mut infostate_vec = Vec::with_capacity(length); + unsafe { + StateInformationStateTensor( + self.state, + player, + infostate_vec.as_mut_ptr(), + length as i32, + ); + infostate_vec.set_len(length); + } + infostate_vec + } +} + +impl Drop for State { + fn drop(&mut self) { + unsafe { DeleteState(self.state) } + } +} + +unsafe impl Send for State {} +unsafe impl Sync for State {} + +impl Game { + pub fn new(game_name: &str) -> Self { + let game_name = CString::new(game_name).unwrap(); + Self { game: unsafe { LoadGame(game_name.as_ptr()) } } + } + + pub fn new_with_parameters(parameters: &GameParameters) -> Self { + Self { game: unsafe { LoadGameFromParameters(parameters.params) } } + } + + pub fn short_name(&self) -> String { + let mut length = 0; + let c_buf = unsafe { GameShortName(self.game, &mut length) }; + convert_and_free_cstring(c_buf, length) + } + + pub fn long_name(&self) -> String { + let mut length = 0; + let c_buf = unsafe { GameLongName(self.game, &mut length) }; + convert_and_free_cstring(c_buf, length) + } + + pub fn new_initial_state(&self) -> State { + unsafe { State::new(GameNewInitialState(self.game)) } + } + + pub fn num_players(&self) -> i32 { + unsafe { GameNumPlayers(self.game) } + } + + pub fn max_game_length(&self) -> i32 { + unsafe { GameMaxGameLength(self.game) } + } + + pub fn num_distinct_actions(&self) -> i32 { + unsafe { GameNumDistinctActions(self.game) } + } + + pub fn observation_shape(&self) -> Vec { + let mut size = 0; + let c_buf = unsafe { GameObservationTensorShape(self.game, &mut size) }; + unsafe { + let vec = slice::from_raw_parts(c_buf, size as usize).to_vec(); + free(c_buf as *mut c_void); + vec + } + } + + pub fn information_state_tensor_shape(&self) -> Vec { + let mut size = 0; + let c_buf = unsafe { GameInformationStateTensorShape(self.game, &mut size) }; + unsafe { + let vec = slice::from_raw_parts(c_buf, size as usize).to_vec(); + free(c_buf as *mut c_void); + vec + } + } +} + +impl Drop for Game { + fn drop(&mut self) { + unsafe { DeleteGame(self.game) } + } +} + +unsafe impl Send for Game {} +unsafe impl Sync for Game {} + +impl Bot { + pub fn step(&mut self, state: &State) -> i64 { + unsafe { BotStep(self.bot, state.state) } + } + + pub fn inform_action(&mut self, state: &State, player_id: i32, action: i64) { + unsafe { BotInformAction(self.bot, state.state, player_id, action) }; + } + + pub fn restart(&mut self) { + unsafe { BotRestart(self.bot) }; + } +} + +impl Drop for Bot { + fn drop(&mut self) { + unsafe { DeleteBot(self.bot) } + } +} + +unsafe impl Send for Bot {} +unsafe impl Sync for Bot {} + +pub fn create_bot_by_name( + bot_name: &str, + game: &Game, + player_id: i32, + params: &GameParameters, +) -> Bot { + let bot_name = CString::new(bot_name).unwrap(); + let bot = unsafe { + BotRegistererCreateByName(bot_name.as_ptr(), game.game, player_id, params.params) + }; + Bot { bot } +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/scripts/argslib.sh b/scenarios/bargaining/open_spiel/open_spiel/scripts/argslib.sh new file mode 100644 index 0000000..2379edc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/scripts/argslib.sh @@ -0,0 +1,194 @@ +#!/usr/bin/env bash + +# Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# An ultra-simple comand-line arguments library for bash. Does not support +# spaces in string arguments or argument names. Also: looking up a flag is +# linear since it's an iterative through the array of names (and parsing a +# value, since its type needs to be looked up); this is in order to support bash +# 3 on MacOS, which does not support associative arrays. Worst case: parsing all +# the arguments is quadratic in the number of arguments. Hence, this is a +# barebones command-line argument library and it should only be used for simple +# use cases. There are better libraries for complex use cases (e.g. shflags); +# please use them instead! +# +# Run script with single --help to print argument helper text. +# +# Example usage: +# source argslib.sh +# # Main function is: ArgsLibAddArg name type default_value helper_string +# # where type is one of { bool, int, float, string } +# ArgsLibAddArg arg1 bool true "Arg1 helper text" +# ArgsLibAddArg arg2 int 4 "Arg2 helper text" +# ArgsLibAddArg arg3 float 0.3 "Arg3 helper text" +# ArgsLibAddArg arg4 string helloworld "Arg4 helper text" +# ArgsLibParse $@ +# ArgsLibPrintAll # optional! +# echo $ARG_arg1 +# echo $ARG_arg2 +# . +# . +# . +# + +if [[ -z ${argslib_n} ]]; +then + argslib_n=0 + declare -a argslib_names + declare -a argslib_types + declare -a argslib_defaults + declare -a argslib_values + declare -a argslib_desc +fi + +function _die { + echo "$1" + exit -1 +} + +function _print_usage_exit { + echo "$0 arguments:" + echo "" + for (( i=0; i<$argslib_n; i++ )) + do + j=`expr $i - 1` + echo -n "--${argslib_names[$i]} (" + echo -n "${argslib_types[$i]}): " + echo "[defval=${argslib_defaults[$i]}]" + echo " ${argslib_desc[$i]}" + echo "" + done + exit +} + +function _check_parse_value { + # type value + # TODO: check the values based on the type + case $1 in + bool) + if [ "$2" != "false" -a "$2" != "true" ] + then + _die "Invalid boolean value: $2" + fi + ;; + int) + if ! [[ "$2" =~ ^[-+]?[0-9]+$ ]]; + then + _die "Invalid integer value: $2" + fi + ;; + float) + if ! [[ "$2" =~ ^[-+]?[0-9]+[\.]?[0-9]*$ ]]; + then + _die "Invalid float value: $2" + fi + ;; + string) + # Anything goes + ;; + *) + _die "Unrecognized argument type: $1" + ;; + esac + return 0 +} + +function _parse_arg { + # one argment: --name=value + IFS="=" read -ra parts <<< "$@" + [ ${#parts[@]} -eq 2 ] || _die "Incorrect syntax: $@" + for (( i=0; i<$argslib_n; i++ )) + do + if [ "${parts[0]}" = "--${argslib_names[$i]}" ] + then + _check_parse_value ${argslib_types[$i]} ${parts[1]} + argslib_values[$i]=${parts[1]} + setvalcmd="ARG_${argslib_names[$i]}=${parts[1]}" + # echo $setvalcmd + eval $setvalcmd + return 0 + fi + done + _die "Argument not defined: ${parts[0]}" + return 1 +} + +function ArgsLibAddArg { + [ ${#@} -eq 4 ] || _die "Incorrect number of arguments for AddArg" + _check_parse_value $2 $3 + + # Not found? Append it to the end. + argslib_names[$argslib_n]=$1 + argslib_types[$argslib_n]=$2 + argslib_defaults[$argslib_n]=$3 + argslib_desc[$argslib_n]=$4 + let argslib_n=argslib_n+1 + + _parse_arg "--$1=$3" +} + +function ArgsLibPrintAll { + echo "$argslib_n command-line argument(s)" + for (( i=0; i<$argslib_n; i++ )) + do + echo -n " ${argslib_names[$i]} (" + echo -n "${argslib_types[$i]}): " + echo -n "${argslib_values[$i]} " + echo "[defval=${argslib_defaults[$i]}]" + done +} + +function ArgsLibParse { + if [ "$1" = "--help" ] + then + _print_usage_exit + fi + + # Parse arguments in the form --name=value + for arg in $@ + do + [[ $arg == --* ]] || _die "Invalid syntax for argument name: $arg" + _parse_arg $arg + done +} + +function ArgsLibGet { + [ ${#@} -eq 1 ] || _die "Incorrect number of arguments for ArgsLibGet" + for (( i=0; i<$argslib_n; i++ )) + do + if [ "$1" = ${argslib_names[$i]} ] + then + echo ${argslib_values[$i]} + return 0 + fi + done + echo "" + return 1 +} + +function ArgsLibSet { + [ ${#@} -eq 2 ] || _die "Incorrect number of arguments for ArgsLibSet" + for (( i=0; i<$argslib_n; i++ )) + do + if [ "$1" = ${argslib_names[$i]} ] + then + _parse_arg "--$1=$2" + return 0 + fi + done + echo "Arg $1 not found" + return 1 +} + diff --git a/scenarios/bargaining/open_spiel/open_spiel/scripts/build_and_run_tests.sh b/scenarios/bargaining/open_spiel/open_spiel/scripts/build_and_run_tests.sh new file mode 100755 index 0000000..6220104 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/scripts/build_and_run_tests.sh @@ -0,0 +1,251 @@ +#!/bin/bash + +# Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# The following scripts: +# (optionally) create a virtualenv +# (optionally) install the pip package dependencies +# builds open_spiel +# executes the C++ tests +# executes the Python tests using the `python3` command. +# (optionally) runs the Julia tests +# +# We assume "install.sh` has been run once before. + +# As we encourage the use of a virtualenv, it is set to be used by default. +# Use the --virtualenv=false flag to disable this feature. + +# You will need to install at some points the requirements, within the +# virtualenv or as system wide packages. By default, it will be installed the +# first time the virtualenv is setup, but you can force an install using the +# --install=true flag. + +# Load argslib for parsing of command-line arguments. +source $(dirname "$0")/argslib.sh + +ArgsLibAddArg virtualenv bool true "Whether to use virtualenv. We enter a virtualenv (stored in venv/) only if this flag is true and we are not already in one." +# We define a string and not a boolean, because we can to know whether this flag +# has been explicitly set or not. +ArgsLibAddArg install string "default" 'Whether to install requirements.txt packages. Doing it is slow. By default, it will be true (a) the first-time a virtualenv is being setup (if system_wide_packages is false), (b) if the user overrides it with "true".' +ArgsLibAddArg system_wide_packages bool false 'Whether to use --system-site-packages on the virtualenv.' +ArgsLibAddArg build_with_pip bool false 'Whether to use "python3 -m pip install ." or the usual cmake&make and ctest.' +ArgsLibAddArg build_only bool false 'Builds only the library, without running tests.' +ArgsLibAddArg test_only string "all" 'Build and runs the tests matching this string (use "all" to run all tests)' +ArgsLibAddArg build_dir string "build" 'Location of the build directory.' +ArgsLibAddArg num_threads int -1 'Number of threads to use when paralellizing build / tests. (Defaults to 4*)' +ArgsLibParse $@ + +function die() { + echo -e "\033[31m${1}\e[0m" + exit -1 +} + +set -e # exit when any command fails +# set -x # Prints all executed command + +MYDIR="$(dirname "$(realpath "$0")")" +source "${MYDIR}/global_variables.sh" + +CXX=${CXX:-`which clang++`} +if [ ! -x $CXX ] +then + echo -n "clang++ not found (the clang C++ compiler is needed to " + echo "compile OpenSpiel). Exiting..." + exit 1 +fi + +if [ "$ARG_num_threads" -eq -1 ]; then + NPROC="nproc" + if [[ "$OSTYPE" == "darwin"* ]]; then # Mac OSX + NPROC="sysctl -n hw.physicalcpu" + fi + + MAKE_NUM_PROCS=$(${NPROC}) + let TEST_NUM_PROCS=4*${MAKE_NUM_PROCS} +else + MAKE_NUM_PROCS=$ARG_num_threads + TEST_NUM_PROCS=$ARG_num_threads +fi + +echo -e "\e[33mRunning ${0} from $PWD\e[0m" +PYBIN=${PYBIN:-"python3"} +PYBIN=`which ${PYBIN}` +if [ ! -x $PYBIN ] +then + echo -e "\e[1m\e[93m$PYBIN not found! Skip build and test.\e[0m" + continue +fi + +# if we are in a virtual_env, we will not create a new one inside. +if [[ "$VIRTUAL_ENV" != "" ]] +then + echo -e "\e[1m\e[93mVirtualenv already detected. We do not create a new one.\e[0m" + ArgsLibSet virtualenv false + # When you're in a virtual environment, the python binary should be just python3. + # Otherwise, it uses the environment's python. + PYBIN="python3" +fi + +VENV_DIR="./venv" +if [[ $ARG_virtualenv == "true" ]]; then + if ! [ -d "$VENV_DIR" ]; then + extra_args='' + if [[ $ARG_system_wide_packages == "true" ]]; then + extra_args="--system-site-packages" + else + # If we are in a virtual-env, and are not using the system-wide packages + # then we need to install the dependencies the first time the virtualenv + # is created + ArgsLibSet install true + fi + echo "Installing..." + echo -e "\e[33mInstalling a virtualenv to $VENV_DIR. The setup is long the first time, please wait.\e[0m" + virtualenv -p $PYBIN $VENV_DIR $extra_args + else + echo -e "\e[33mReusing virtualenv from $VENV_DIR.\e[0m" + fi + PYBIN=python + source $VENV_DIR/bin/activate + # When you're in a virtual environment, the python binary should be just python3. + # Otherwise, it uses the environment's python. + PYBIN="python3" +fi + +# We only exit the virtualenv if we were asked to create one. +function cleanup { + if [[ $ARG_virtualenv == "true" ]]; then + echo "Exiting virtualenv" + deactivate + fi +} +trap cleanup EXIT + +if [[ $ARG_install == "true" ]]; then + echo -e "\e[33mInstalling the requirements (use --noinstall to skip).\e[0m" + $PYBIN -m pip install --upgrade -r ./requirements.txt +else + echo -e "\e[33mSkipping installation of requirements.txt.\e[0m" +fi + +PYVERSION=$($PYBIN -c 'import sys; print(".".join(map(str, sys.version_info[:3])))') +BUILD_DIR="$ARG_build_dir" +mkdir -p $BUILD_DIR + +# Configure Julia compilation if required. +if [[ ${OPEN_SPIEL_BUILD_WITH_JULIA:-"OFF"} == "ON" ]]; then + # Check that Julia is in the path. + if [[ ! -x `which julia` ]] || [[ "$(julia -e 'println(VERSION >= v"1.6.0-rc1")')" == "false" ]] + then + echo -e "\e[33mWarning: julia not in your PATH or it's too old. Trying \$HOME/.local/bin\e[0m" + PATH=${HOME}/.local/bin:${PATH} + [[ -x `which julia` ]] && [[ "$(julia -e 'println(VERSION >= v"1.6.0-rc1")')" == "true" ]] || die "could not find julia command. Please add it to PATH and rerun." + fi + LIBCXXWRAP_JULIA_DIR=`julia --project=${MYDIR}/../julia -e 'using CxxWrap; print(dirname(dirname(CxxWrap.CxxWrapCore.libcxxwrap_julia_jll.libcxxwrap_julia_path)))'` + JULIA_VERSION_INFO=`julia --version` + echo "Found libcxxwrap_julia at $LIBCXXWRAP_JULIA_DIR with $JULIA_VERSION_INFO" +fi + +function print_tests_passed { + echo -e "\033[32mAll tests passed. Nicely done!\e[0m" +} + +function print_tests_failed { + echo -e "\033[31mAt least one test failed.\e[0m" + echo "If this is the first time you have run these tests, try:" + echo "python3 -m pip install -r requirements.txt" + echo "Note that outside a virtualenv, you will need to install the " + echo "system-wide matplotlib: sudo apt-get install python-matplotlib" + exit 1 +} + +function print_skipping_tests { + echo -e "\033[32m*** Skipping to run tests.\e[0m" +} + +# Build / install everything and run tests (C++, Python, optionally Julia). +if [[ $ARG_build_with_pip == "true" ]]; then + ${PYBIN} -m pip install . + if ctest -j$TEST_NUM_PROCS --output-on-failure ../open_spiel; then + print_tests_passed + else + print_tests_failed + exit 1 + fi +else + cd $BUILD_DIR + echo "Building and testing in $PWD using 'python' (version $PYVERSION)." + + pwd=`pwd` + export PYTHONPATH=$PYTHONPATH:$pwd/.. + export PYTHONPATH=$PYTHONPATH:$pwd/../open_spiel + export PYTHONPATH=$PYTHONPATH:$pwd/python # For pyspiel bindings + + # Build in testing, so that we can run tests fast. + cmake -DPython3_EXECUTABLE=${PYBIN} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DCMAKE_PREFIX_PATH=${LIBCXXWRAP_JULIA_DIR} \ + -DBUILD_TYPE=Testing \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=1 \ + ../open_spiel + + if [ "$ARG_test_only" != "all" ] + then + # Check for building and running a specific test. + # TODO(author5): generlize this; currently only covers Python and C++ tests + echo "Build and testing only $ARG_test_only" + if [[ $ARG_test_only == python_* ]]; then + echo "Building pyspiel" + make -j$MAKE_NUM_PROCS pyspiel + elif [[ $ARG_test_only == julia_test ]]; then + echo "Building Julia API" + make -j$MAKE_NUM_PROCS spieljl + elif [[ $ARG_test_only == gospiel_test ]]; then + echo "Building Go API" + make -j$MAKE_NUM_PROCS gospiel + else + echo "Building everything" + make -j$MAKE_NUM_PROCS + fi + + if [[ $ARG_build_only == "true" ]]; then + echo -e "\033[32m*** Skipping runing tests as build_only is ${ARG_build_only} \e[0m" + else + if ctest -j$TEST_NUM_PROCS --output-on-failure -R "$ARG_test_only" ../open_spiel; then + print_tests_passed + else + print_tests_failed + fi + fi + else + # Make everything + echo "Building project" + make -j$MAKE_NUM_PROCS + + if [[ $ARG_build_only == "true" ]]; then + echo -e "\033[32m*** Skipping runing tests as build_only is ${ARG_build_only} \e[0m" + else + # Test everything + echo "Running all tests" + + if ctest -j$TEST_NUM_PROCS --output-on-failure ../open_spiel; then + print_tests_passed + else + print_tests_failed + fi + fi + fi + + cd .. +fi diff --git a/scenarios/bargaining/open_spiel/open_spiel/scripts/ci_python_prechecks.sh b/scenarios/bargaining/open_spiel/open_spiel/scripts/ci_python_prechecks.sh new file mode 100755 index 0000000..08c90b0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/scripts/ci_python_prechecks.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Only use for Github Actions CI! +OS=`uname -a | awk '{print $1}'` +if [[ "$OS" = "Darwin" ]]; then + # This seems to be necessary to install python via brew in Github Actions + rm -f /usr/local/bin/2to3-${OS_PYTHON_VERSION} + rm -f /usr/local/bin/idle${OS_PYTHON_VERSION} + rm -f /usr/local/bin/pydoc${OS_PYTHON_VERSION} + rm -f /usr/local/bin/python${OS_PYTHON_VERSION} + rm -f /usr/local/bin/python${OS_PYTHON_VERSION}* +fi + diff --git a/scenarios/bargaining/open_spiel/open_spiel/scripts/ci_script.sh b/scenarios/bargaining/open_spiel/open_spiel/scripts/ci_script.sh new file mode 100755 index 0000000..6208e5a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/scripts/ci_script.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +# Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e +set -x + +# Python 3.9 not default on Ubuntu yet (Ubuntu 20.04). +OS=`uname -a | awk '{print $1}'` +if [[ "$OS" = "Linux" && "$OS_PYTHON_VERSION" = "3.9" ]]; then + echo "Linux detected and Python 3.9 requested. Installing Python 3.9 and setting as default." + sudo apt-get install python3.9 python3.9-dev + sudo update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1 + sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 + # Still needed to support using venv on Ubuntu 20.04: + sudo apt-get install python3.9-venv +elif [[ "$OS" = "Darwin" ]]; then + # Python is already intalled via brew in install.sh from actions.yml + brew link --force python@${OS_PYTHON_VERSION} +fi + +PYBIN=${PYBIN:-"python${OS_PYTHON_VERSION}"} +PYBIN=${PYBIN:-"python"} +PYBIN=${PYBIN:-"python3"} +PYBIN=`which $PYBIN` + +source ./open_spiel/scripts/python_extra_deps.sh $PYBIN + +if [[ "$OS" = "Linux" && ( "$OS_PYTHON_VERSION" = "3.9" || "$OS_PYTHON_VERSION" = "3.10" || "$OS_PYTHON_VERSION" = "3.11" || "$OS_PYTHON_VERSION" = "3.12" ) ]]; then + # Ubuntu 22.04 must execute the virtual env this way: + ${PYBIN} -m venv ./venv +elif [[ "$OS" = "Darwin" && "$OS_PYTHON_VERSION" = "3.12" ]]; then + ${PYBIN} -m venv ./venv +else + # Ubuntu 20.04 and earlier + ${PYBIN} -m pip install virtualenv + virtualenv -p ${PYBIN} ./venv +fi + +source ./venv/bin/activate + +pip install --upgrade pip +pip install --upgrade setuptools + +# Can use python and pip directly after here because we're in the virtual env + +python --version +pip install --upgrade -r requirements.txt + +[[ "$OPEN_SPIEL_ENABLE_JAX" = "ON" ]] && pip install --no-cache-dir --upgrade $OPEN_SPIEL_PYTHON_JAX_DEPS +[[ "$OPEN_SPIEL_ENABLE_PYTORCH" = "ON" ]] && pip install --no-cache-dir --upgrade $OPEN_SPIEL_PYTHON_PYTORCH_DEPS +[[ "$OPEN_SPIEL_ENABLE_PYTHON_MISC" = "ON" ]] && pip install --no-cache-dir --upgrade $OPEN_SPIEL_PYTHON_MISC_DEPS + +./open_spiel/scripts/build_and_run_tests.sh + +deactivate diff --git a/scenarios/bargaining/open_spiel/open_spiel/scripts/find_jax.sh b/scenarios/bargaining/open_spiel/open_spiel/scripts/find_jax.sh new file mode 100755 index 0000000..0d71d9a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/scripts/find_jax.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +read -r -d '' TESTSCRIPT << EOT +import jax +import jaxlib +import haiku +import chex +import optax +print(jax.__version__) +EOT + +PY_EXEC=$(which $1) +if [[ ! -x $PY_EXEC ]] +then + echo "Python executable: $PY_EXEC not found or not executable." + exit -1 +fi + +echo "$TESTSCRIPT" | $PY_EXEC +exit $? diff --git a/scenarios/bargaining/open_spiel/open_spiel/scripts/find_pytorch.sh b/scenarios/bargaining/open_spiel/open_spiel/scripts/find_pytorch.sh new file mode 100755 index 0000000..44e8b1a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/scripts/find_pytorch.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +# Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +read -r -d '' TESTSCRIPT << EOT +import torch +print(torch.__version__) +EOT + +PY_EXEC=$(which $1) +if [[ ! -x $PY_EXEC ]] +then + echo "Python executable: $PY_EXEC not found or not executable." + exit -1 +fi + +echo "$TESTSCRIPT" | $PY_EXEC +exit $? diff --git a/scenarios/bargaining/open_spiel/open_spiel/scripts/find_tensorflow.sh b/scenarios/bargaining/open_spiel/open_spiel/scripts/find_tensorflow.sh new file mode 100755 index 0000000..8f8b1f8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/scripts/find_tensorflow.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +# Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +read -r -d '' TESTSCRIPT << EOT +import tensorflow as tf +print(tf.__version__) +EOT + +PY_EXEC=$(which $1) +if [[ ! -x $PY_EXEC ]] +then + echo "Python executable: $PY_EXEC not found or not executable." + exit -1 +fi + +echo "$TESTSCRIPT" | $PY_EXEC +exit $? diff --git a/scenarios/bargaining/open_spiel/open_spiel/scripts/generate_new_playthrough.sh b/scenarios/bargaining/open_spiel/open_spiel/scripts/generate_new_playthrough.sh new file mode 100755 index 0000000..a6f92e5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/scripts/generate_new_playthrough.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +# Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Generates a playthrough for a new game with optional parameters. +# This script exists mainly as a reminder for the command to run. + +GAME="$1" +shift + +if [ "$GAME" = "" ] +then + echo "Usage: generate_new_playthrough GAME" + exit +fi + +python open_spiel/python/examples/playthrough.py \ +--game $GAME \ +--output_file open_spiel/integration_tests/playthroughs/$GAME.txt \ +--alsologtostdout diff --git a/scenarios/bargaining/open_spiel/open_spiel/scripts/global_variables.sh b/scenarios/bargaining/open_spiel/open_spiel/scripts/global_variables.sh new file mode 100644 index 0000000..79a1ffe --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/scripts/global_variables.sh @@ -0,0 +1,107 @@ +#!/bin/sh + +# Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file contains the global variables that control conditional dependencies. +# It is being used to know whether we should: +# (a) download a dependency (done in install.sh) +# (b) build it and link against it during the `cmake` build process +# +# Note that we do not change the value of the constants if they are already +# defined by an enclosing scope (useful for command line overrides). + +# We add a single flag, to enable/disable all conditional dependencies, in +# particular to be able to use that in the Travis CI test. +export DEFAULT_OPTIONAL_DEPENDENCY=${DEFAULT_OPTIONAL_DEPENDENCY:-"OFF"} + +# Building the Python API can be disabled by setting this to OFF. +export OPEN_SPIEL_BUILD_WITH_PYTHON=${OPEN_SPIEL_BUILD_WITH_PYTHON:-"ON"} + +# Abseil version. Must support older versions for previous wheels on MacOS. +export OPEN_SPIEL_ABSL_VERSION=${OPEN_SPIEL_ABSL_VERSION:-"20250127.1"} + +# Each optional dependency has their own flag, that defaults to the global +# "$DEFAULT_OPTIONAL_DEPENDENCY" if undefined. To enable an optional dependency, +# we recomment defining the associated environment variable in your bashrc or +# your virtualenv bashrc, e.g. export OPEN_SPIEL_BUILD_WITH_HANABI="ON" +export OPEN_SPIEL_BUILD_WITH_HANABI=${OPEN_SPIEL_BUILD_WITH_HANABI:-$DEFAULT_OPTIONAL_DEPENDENCY} +export OPEN_SPIEL_BUILD_WITH_ACPC=${OPEN_SPIEL_BUILD_WITH_ACPC:-$DEFAULT_OPTIONAL_DEPENDENCY} +export OPEN_SPIEL_BUILD_WITH_JULIA=${OPEN_SPIEL_BUILD_WITH_JULIA:-$DEFAULT_OPTIONAL_DEPENDENCY} +export OPEN_SPIEL_BUILD_WITH_XINXIN=${OPEN_SPIEL_BUILD_WITH_XINXIN:-$DEFAULT_OPTIONAL_DEPENDENCY} +export OPEN_SPIEL_BUILD_WITH_ROSHAMBO=${OPEN_SPIEL_BUILD_WITH_ROSHAMBO:-$DEFAULT_OPTIONAL_DEPENDENCY} +export OPEN_SPIEL_BUILD_WITH_GO=${OPEN_SPIEL_BUILD_WITH_GO:-$DEFAULT_OPTIONAL_DEPENDENCY} +export OPEN_SPIEL_BUILD_WITH_RUST=${OPEN_SPIEL_BUILD_WITH_RUST:-$DEFAULT_OPTIONAL_DEPENDENCY} + +# Download the header-only library, libnop (https://github.com/google/libnop), +# to support the serialization and deserialization of C++ data types. +export OPEN_SPIEL_BUILD_WITH_LIBNOP="${OPEN_SPIEL_BUILD_WITH_LIBNOP:-"OFF"}" + +# Download precompiled binaries for libtorch (PyTorch C++ API). +# See https://pytorch.org/cppdocs/ for C++ documentation. +# This dependency is currently not supported by Travis CI test. +# +# From PyTorch documentation: +# +# > If you would prefer to write Python, and can afford to write Python, we +# > recommend using the Python interface to PyTorch. However, if you would +# > prefer to write C++, or need to write C++ (because of multithreading, +# > latency or deployment requirements), the C++ frontend to PyTorch provides +# > an API that is approximately as convenient, flexible, friendly and intuitive +# > as its Python counterpart. +# +# You can find an example usage in open_spiel/libtorch/torch_integration_test.cc +export OPEN_SPIEL_BUILD_WITH_LIBTORCH="${OPEN_SPIEL_BUILD_WITH_LIBTORCH:-"OFF"}" + +# Libtorch download URL - you may need to change this depending on your system +# +# Optional prerequesites: +# 1) CUDA drivers via toolkit https://developer.nvidia.com/cuda-toolkit-archive +# Local runfile installer is quite friendly. If your system already comes +# with drivers you may want to skip over that option in the installer. +# 2) CUDNN https://developer.nvidia.com/cudnn +# (Nvidia developer program membership required) +# +# The download URL may need to be changed for your system. You can construct the +# correct URL for you system from https://pytorch.org/get-started/locally/ +# +# Some examples +# For Linux/CUDA 12.1: https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.3.0%2Bcu121.zip +# For Linux/no CUDA: https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.3.0%2Bcpu.zip +# For macOS/no CUDA: https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-2.3.0.zip +# +# Note: there are currently known problems with the C++ PyTorch: inteferences +# with pybind11 versions. Until it is properly fixed, there is a workaround: +# https://github.com/deepmind/open_spiel/issues/966#issuecomment-1322982393 +export OPEN_SPIEL_BUILD_WITH_LIBTORCH_DOWNLOAD_URL="${OPEN_SPIEL_BUILD_WITH_LIBTORCH_DOWNLOAD_URL:-"https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.3.0%2Bcu121.zip"}" + +# Enable integration with GAMUT game generator (see games/gamut). +# Requires java and GAMUT, so disabled by default. +export OPEN_SPIEL_BUILD_WITH_GAMUT="${OPEN_SPIEL_BUILD_WITH_GAMUT:-"OFF"}" + +# Flag to enable building with OR-Tools to get C++ optimization routines. +# Disabled by default as it requires installation of third party software. +# See algorithms/ortools/CMakeLists.txt for specific instructions. +export OPEN_SPIEL_BUILD_WITH_ORTOOLS="${OPEN_SPIEL_BUILD_WITH_ORTOOLS:-"OFF"}" +# You may want to replace this URL according to your system. +# Use version 9.6 at minimum, due to compatibility between absl library versions +# used in OpenSpiel and in OrTools. +# Other links to archives found here: https://developers.google.com/optimization/install/cpp/linux +export OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL="${OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL:-"https://github.com/google/or-tools/releases/download/v9.6/or-tools_amd64_ubuntu-22.04_cpp_v9.6.2534.tar.gz"}" +# Used to determine whether to include the Python ML frameworks in the tests. +# A value of AUTO runs the appropriate find_X script in open_spiel/scripts to check what is installed. +# To override automatic detection, set to either ON or OFF. +export OPEN_SPIEL_ENABLE_JAX=${OPEN_SPIEL_ENABLE_JAX:-"AUTO"} +export OPEN_SPIEL_ENABLE_PYTORCH=${OPEN_SPIEL_ENABLE_PYTORCH:-"AUTO"} +export OPEN_SPIEL_ENABLE_PYTHON_MISC=${OPEN_SPIEL_ENABLE_PYTHON_MISC:-"OFF"} diff --git a/scenarios/bargaining/open_spiel/open_spiel/scripts/install.sh b/scenarios/bargaining/open_spiel/open_spiel/scripts/install.sh new file mode 100755 index 0000000..583cfa0 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/scripts/install.sh @@ -0,0 +1,335 @@ +#!/usr/bin/env bash + +# Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# The following should be easy to setup as a submodule: +# https://git-scm.com/docs/git-submodule + +export OPEN_SPIEL_ABSL_VERSION=${OPEN_SPIEL_ABSL_VERSION:-"20250127.1"} + +die() { + echo "$*" 1>&2 + exit 1 +} + +set -e # exit when any command fails +set -x # show evaluation trace + +PYBIN="python3" +if [[ "$1" != "" ]]; then + PYBIN=$1 +fi +${PYBIN} --version + +MYDIR="$(dirname "$(realpath "$0")")" + +# This function is only run on Github Actions! +function ci_check_install_python() { + if [[ ! "$CI" ]]; then + echo "Only run this function on Github Actions!" + exit 1 + fi + + # Need the trap here to make sure the return value of grep being 1 doesn't cause set -e to fail + # https://stackoverflow.com/questions/77047127/bash-capture-stderr-of-a-function-while-using-trap + trap 'ret=0; output=$(brew list --versions | grep "python ${OS_PYTHON_VERSION}") || ret="$?"; trap - RETURN' RETURN + if [[ "$output" = "" ]]; then + # The --force is needed because there seems to be a phantom installation in /usr/local/ + # and errors show up for files that already exist + brew install --force "python@${OS_PYTHON_VERSION}" + fi + return 0 +} + +# Calling this file from the project root is not allowed, +# as all the paths here are hard-coded to be relative to it. +# +# So this is not allowed: +# $ ./open_spiel/scripts/install.sh +# +# Instead, just call project-root install.sh file: +# $ ./install.sh +if [[ `basename $MYDIR` == "scripts" ]]; then + die "Please run ./install.sh from the directory where you cloned the" \ + "project, do not run $0" +fi + +# Load all the build settings. +source "${MYDIR}/open_spiel/scripts/global_variables.sh" + +# Specify a download cache directory for all external dependencies. +# If a dependency version is updated you may need to clean this directory. +DEFAULT_DOWNLOAD_CACHE_DIR="$MYDIR/download_cache" + +# Use the ENV variable if defined, or the default location otherwise. +DOWNLOAD_CACHE_DIR=${DOWNLOAD_CACHE_DIR:-$DEFAULT_DOWNLOAD_CACHE_DIR} + +# Create the cache directory. +[[ -d "${DOWNLOAD_CACHE_DIR}" ]] || mkdir "${DOWNLOAD_CACHE_DIR}" + +# 1. Clone the external dependencies before installing systen packages, to make +# sure they are present even if later commands fail. +# +# We do not use submodules because the CL versions are stored within Git +# metadata and we do not use Git within DeepMind, so it's hard to maintain. + +# Note that this needs Git intalled, so we check for that. +if [[ ! -x `which git` ]]; then + echo "Did not find git, attempting to install it." + if [[ "$OSTYPE" == "linux-gnu" ]]; then + sudo apt-get install git + elif [[ "$OSTYPE" == "darwin"* ]]; then # Mac OSX + brew install git + else + echo "The OS '$OSTYPE' is not supported (Only Linux and MacOS is). " \ + "Feel free to contribute the install for a new OS." + exit 1 + fi +fi + +# Cache git clone of the dependencies. +function cached_clone() { + # Extract args + ALL_ARGS_EXCEPT_LAST="${@:1:$#-1}" + LAST_ARG="${@: -1}" + TARGET_DIR="$LAST_ARG" + + # Used for naming the cache directory + CACHED_TARGET="${DOWNLOAD_CACHE_DIR}/$(basename "$TARGET_DIR")" + + if [[ ! -d "$CACHED_TARGET" ]]; then + git clone $ALL_ARGS_EXCEPT_LAST "$CACHED_TARGET" + fi + cp -r "$CACHED_TARGET" "$TARGET_DIR" +} + +# For the external dependencies, we use fixed releases for the repositories that +# the OpenSpiel team do not control. +# Feel free to upgrade the version after having checked it works. + +DIR="./pybind11" +if [[ ! -d ${DIR} ]]; then + cached_clone -b master --single-branch --depth 1 https://github.com/pybind/pybind11.git ${DIR} +fi + +# The official https://github.com/dds-bridge/dds.git seems to not accept PR, +# so we have forked it. +DIR="open_spiel/games/bridge/double_dummy_solver" +if [[ ! -d ${DIR} ]]; then + cached_clone -b 'develop' --single-branch --depth 1 https://github.com/jblespiau/dds.git ${DIR} +fi + +DIR="open_spiel/abseil-cpp" +if [[ ! -d ${DIR} ]]; then + cached_clone -b "${OPEN_SPIEL_ABSL_VERSION}" --single-branch --depth 1 https://github.com/abseil/abseil-cpp.git ${DIR} +fi + +DIR="open_spiel/pybind11_abseil" +if [[ ! -d ${DIR} ]]; then + cached_clone -b 'master' https://github.com/pybind/pybind11_abseil.git ${DIR} + pushd ${DIR} + git checkout '73992b5' + popd +fi + +# Optional dependencies. +DIR="open_spiel/games/hanabi/hanabi-learning-environment" +if [[ ${OPEN_SPIEL_BUILD_WITH_HANABI:-"ON"} == "ON" ]] && [[ ! -d ${DIR} ]]; then + cached_clone -b 'master' https://github.com/deepmind/hanabi-learning-environment.git ${DIR} + # We checkout a specific CL to prevent future breakage due to changes upstream + # The repository is very infrequently updated, thus the last 15 commits should + # be ok for a long time. + pushd ${DIR} + git checkout '54e7959' + popd +fi + +# This Github repository contains the raw code from the ACPC server +# http://www.computerpokercompetition.org/downloads/code/competition_server/project_acpc_server_v1.0.42.tar.bz2 +# with the code compiled as C++ within a namespace. +DIR="open_spiel/games/universal_poker/acpc" +if [[ ${OPEN_SPIEL_BUILD_WITH_ACPC:-"ON"} == "ON" ]] && [[ ! -d ${DIR} ]]; then + cached_clone -b 'master' --single-branch --depth 1 https://github.com/jblespiau/project_acpc_server.git ${DIR} +fi + +# This GitHub repository contains Nathan Sturtevant's state of the art +# Hearts program xinxin. +DIR="open_spiel/bots/xinxin/hearts" +if [[ ${OPEN_SPIEL_BUILD_WITH_XINXIN:-"ON"} == "ON" ]] && [[ ! -d ${DIR} ]]; then + cached_clone -b 'master' --single-branch --depth 1 https://github.com/nathansttt/hearts.git ${DIR} +fi + +# This GitHub repository contains bots from the RoShamBo Programming Competition +DIR="open_spiel/bots/roshambo/roshambo" +if [[ ${OPEN_SPIEL_BUILD_WITH_ROSHAMBO:-"ON"} == "ON" ]] && [[ ! -d ${DIR} ]]; then + cached_clone -b 'open_spiel' --single-branch --depth 1 https://github.com/jhtschultz/roshambo.git ${DIR} +fi + +# This GitHub repository allows for serialization of custom C++ objects. +DIR="open_spiel/libnop/libnop" +if [[ ${OPEN_SPIEL_BUILD_WITH_LIBNOP:-"ON"} == "ON" ]] && [[ ! -d ${DIR} ]]; then + git clone -b 'master' --single-branch --depth 1 https://github.com/google/libnop.git ${DIR} +fi + +# Add libtorch (PyTorch C++ API). +# This downloads the precompiled binaries available from the pytorch website. +DIR="open_spiel/libtorch/libtorch" +if [[ ${OPEN_SPIEL_BUILD_WITH_LIBTORCH:-"ON"} == "ON" ]] && [[ ! -d ${DIR} ]]; then + DOWNLOAD_FILE="${DOWNLOAD_CACHE_DIR}/libtorch.zip" + [[ -f "${DOWNLOAD_FILE}" ]] || wget --show-progress -O "${DOWNLOAD_FILE}" "${OPEN_SPIEL_BUILD_WITH_LIBTORCH_DOWNLOAD_URL}" + unzip "${DOWNLOAD_FILE}" -d "open_spiel/libtorch/" +fi + +# Add OrTools +# This downloads the precompiled binaries available from the official website. +# https://developers.google.com/optimization/install/cpp/ +DIR="open_spiel/ortools" +if [[ ${OPEN_SPIEL_BUILD_WITH_ORTOOLS:-"ON"} == "ON" ]] && [[ ! -d ${DIR} ]]; then + DOWNLOAD_FILE="${DOWNLOAD_CACHE_DIR}/ortools.tar.gz" + [[ -f "${DOWNLOAD_FILE}" ]] || wget --show-progress -O "${DOWNLOAD_FILE}" "${OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL}" + mkdir "$DIR" + tar -xzf "${DOWNLOAD_FILE}" --strip 1 -C "$DIR" +fi + +# 2. Install other required system-wide dependencies + +# Install Julia if required and not present already. +if [[ ${OPEN_SPIEL_BUILD_WITH_JULIA:-"OFF"} == "ON" ]]; then + # Check that Julia is in the path. + if [[ ! -x `which julia` ]] || [ "$(julia -e 'println(VERSION >= v"1.6.1")')" == "false" ] + then + echo -e "\e[33mWarning: julia not in your PATH or its too old. Trying \$HOME/.local/bin\e[0m" + PATH=${HOME}/.local/bin:${PATH} + fi + + if which julia >/dev/null && [ "$(julia -e 'println(VERSION >= v"1.6.1")')" == "true" ] ; then + JULIA_VERSION_INFO=`julia --version` + echo -e "\e[33m$JULIA_VERSION_INFO is already installed.\e[0m" + else + # Julia installed needs wget, make sure it's accessible. + if [[ "$OSTYPE" == "linux-gnu" ]] + then + [[ -x `which wget` ]] || sudo apt-get install wget + elif [[ "$OSTYPE" == "darwin"* ]] + then + [[ -x `which wget` ]] || brew install wget + fi + # Now install Julia + JULIA_INSTALLER="open_spiel/scripts/jill.sh" + if [[ ! -f $JULIA_INSTALLER ]]; then + curl https://raw.githubusercontent.com/abelsiqueira/jill/master/jill.sh -o jill.sh + mv jill.sh $JULIA_INSTALLER + fi + bash $JULIA_INSTALLER -y -v 1.6.1 + # This is needed on Ubuntu 19.10 and above, see: + # https://github.com/deepmind/open_spiel/issues/201 + if [[ -f /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ]]; then + cp /usr/lib/x86_64-linux-gnu/libstdc++.so.6 $HOME/packages/julias/julia-1.6.1/lib/julia + fi + # Should install in $HOME/.local/bin which was added to the path above + [[ -x `which julia` ]] && [ "$(julia -e 'println(VERSION >= v"1.6.1")')" == "true" ] || die "julia not found PATH after install." + fi + + # Install dependencies. + julia --project="${MYDIR}/open_spiel/julia" -e 'using Pkg; Pkg.instantiate();' +fi + +# Install other system-wide packages. +if [[ "$OSTYPE" == "linux-gnu" ]]; then + PYTHON_PKGS="python3-dev python3-pip python3-setuptools python3-wheel python3-tk python3-venv" + if [[ "$OS_PYTHON_VERSION" == "3.11" ]]; then + # Need to special-case this until it's installed by default. + # https://vegastack.com/tutorials/how-to-install-python-3-11-on-ubuntu-22-04/ + echo "Adding Python 3.11 ppa repos" + sudo add-apt-repository ppa:deadsnakes/ppa + PYTHON_PKGS="python3.11 python3.11-dev python3-pip python3-setuptools python3-wheel python3-tk python3.11-venv" + elif [[ "$OS_PYTHON_VERSION" == "3.12" ]]; then + # Need to special-case this until it's installed by default. + # https://ubuntuhandbook.org/index.php/2023/05/install-python-3-12-ubuntu/ + # No longer need to add the ppa repos on Ubuntu 24.04 runner + # echo "Adding Python 3.12 ppa repos" + # sudo add-apt-repository ppa:deadsnakes/ppa + PYTHON_PKGS="python3.12 python3.12-dev python3-pip python3-setuptools python3-wheel python3-tk python3.12-venv" + fi + EXT_DEPS="virtualenv clang cmake curl $PYTHON_PKGS" + if [[ ${OPEN_SPIEL_BUILD_WITH_GO:-"OFF"} == "ON" ]]; then + EXT_DEPS="${EXT_DEPS} golang" + fi + if [[ ${OPEN_SPIEL_BUILD_WITH_RUST:-"OFF"} == "ON" ]]; then + EXT_DEPS="${EXT_DEPS} rustc cargo" + fi + + APT_GET=`which apt-get` + if [ "$APT_GET" = "" ] + then + echo "This script assumes a Debian-based Linux distribution. Please install these packages manually or using your distribution's package manager:" + echo "$EXT_DEPS" + exit 1 + fi + + # We install the packages only if they are not present yet. + # See https://stackoverflow.com/questions/18621990/bash-get-exit-status-of-command-when-set-e-is-active + already_installed=0 + /usr/bin/dpkg-query --show --showformat='${db:Status-Status}\n' $EXT_DEPS || already_installed=$? + if [ $already_installed -eq 0 ] + then + echo -e "\e[33mSystem wide packages already installed, skipping their installation.\e[0m" + else + echo "System wide packages missing. Installing them..." + sudo apt-get -y update + sudo apt-get -y install $EXT_DEPS + fi + if [[ ${OPEN_SPIEL_BUILD_WITH_RUST:-"OFF"} == "ON" ]]; then + if [[ ! -f $HOME/.cargo/bin/bindgen ]]; then + cargo install bindgen-cli + fi + fi + + if [[ "$TRAVIS" ]]; then + sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${OS_PYTHON_VERSION} 10 + fi +elif [[ "$OSTYPE" == "darwin"* ]]; then # Mac OSX + brew search python + [[ -x `which realpath` ]] || brew install coreutils || echo "** Warning: failed 'brew install coreutils' -- continuing" + [[ -x `which cmake` ]] || brew install cmake || echo "** Warning: failed 'brew install cmake' -- continuing" + [[ -x `which python3` ]] || brew install python3 || echo "** Warning: failed 'brew install python3' -- continuing" + # On Github Actions, macOS comes with Python 3.9. + # We want to test multiple Python versions determined by OS_PYTHON_VERSION. + if [[ "$CI" ]]; then + # Set brew to use the specific python version + ci_check_install_python + brew link --force --overwrite "python@${OS_PYTHON_VERSION}" + fi + `python3 -c "import tkinter" > /dev/null 2>&1` || brew install tcl-tk || echo "** Warning: failed 'brew install tcl-tk' -- continuing" + python3 --version + [[ -x `which clang++` ]] || die "Clang not found. Please install or upgrade XCode and run the command-line developer tools" + [[ -x `which curl` ]] || brew install curl || echo "** Warning: failed 'brew install curl' -- continuing" + if [[ ${OPEN_SPIEL_BUILD_WITH_GO:-"OFF"} == "ON" ]]; then + [[ -x `which go` ]] || brew install golang || echo "** Warning: failed 'brew install golang' -- continuing" + fi + if [[ ${OPEN_SPIEL_BUILD_WITH_RUST:-"OFF"} == "ON" ]]; then + [[ -x `which rustc` ]] || brew install rust || echo "** Warning: failed 'brew install rust' -- continuing" + if [[ ! -f $HOME/.cargo/bin/bindgen ]]; then + cargo install bindgen-cli + fi + fi + # Removed getting pip via git-pip.py. See #1200. + brew install virtualenv # May be the required way to do this as of Python 3.12? + # ${PYBIN} -m pip install virtualenv +else + echo "The OS '$OSTYPE' is not supported (Only Linux and MacOS is). " \ + "Feel free to contribute the install for a new OS." + exit 1 +fi diff --git a/scenarios/bargaining/open_spiel/open_spiel/scripts/python_extra_deps.sh b/scenarios/bargaining/open_spiel/open_spiel/scripts/python_extra_deps.sh new file mode 100644 index 0000000..144c5e1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/scripts/python_extra_deps.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash + +# Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# These are extra packages that are not strictly required to run the OpenSpiel +# Python API, but are required by certain algorithms or tools. Packages here +# are for testing purposes: they are not installed by any of the install +# scripts, and are referred to only in the testing scripts run on GitHub, so +# they must be installed separately. The versions are pinned to ensure that +# tests are covering only those versions supported by the algorithms that use +# them, but could work for other versions too. +# +# To enable specific tests, please use the environment variables found in +# scripts/global_variables.sh + +# This script depends on the Python version, which it gets from $PYBIN or +# $CI_PYBIN passed in as $1. If it's not defined, Python 3.9 is assumed. + +PY_VER="3.9" +if [ "$1" != "" ]; then + PY_VER=`$1 --version | awk '{print $2}'` + if [ "$PY_VER" = "" ]; then + PY_VER="3.9" + fi +fi + +verlte() { + stuff=`echo -e "$1\n$2" | sort -V | head -n1` + [ "$1" = "$stuff" ] +} + +verlt() { + [ "$1" = "$2" ] && return 1 || verlte $1 $2 +} + +# +# Python-version dependent versions +# + +echo "Set Python version: $PY_VER" +if verlt $PY_VER 3.10; then + echo "Detected Python version < 3.10" + export OPEN_SPIEL_PYTHON_PYTORCH_DEPS="torch==1.13.1" + export OPEN_SPIEL_PYTHON_JAX_DEPS="jax==0.4.6 jaxlib==0.4.6 dm-haiku==0.0.10 optax==0.1.7 chex==0.1.7 rlax==0.1.5 distrax==0.1.3" + export OPEN_SPIEL_PYTHON_TENSORFLOW_DEPS="numpy==1.23.5 tensorflow==2.13.1 tensorflow-probability==0.19.0 tensorflow_datasets==4.9.2 keras==2.13.1" + export OPEN_SPIEL_PYTHON_MISC_DEPS="IPython==5.8.0 networkx==2.4 matplotlib==3.5.2 mock==4.0.2 nashpy==0.0.19 scipy==1.10.1 testresources==2.0.1 cvxopt==1.3.1 cvxpy==1.2.0 ecos==2.0.10 osqp==0.6.2.post5 clu==0.0.6 flax==0.5.3" +elif verlt $PY_VER 3.12; then + echo "Detected Python version in {3.10, 3.11}" + export OPEN_SPIEL_PYTHON_PYTORCH_DEPS="torch==2.1.0" + export OPEN_SPIEL_PYTHON_JAX_DEPS="jax==0.4.20 jaxlib==0.4.20 dm-haiku==0.0.10 optax==0.1.7 chex==0.1.84 rlax==0.1.6 distrax==0.1.4" + export OPEN_SPIEL_PYTHON_TENSORFLOW_DEPS="numpy==1.26.1 tensorflow==2.14.0 tensorflow-probability==0.22.1 tensorflow_datasets==4.9.7 keras==2.14.0" + export OPEN_SPIEL_PYTHON_MISC_DEPS="IPython==5.8.0 networkx==3.2 matplotlib==3.5.2 mock==4.0.2 nashpy==0.0.19 scipy==1.11.3 testresources==2.0.1 cvxopt==1.3.1 cvxpy==1.4.1 ecos==2.0.10 osqp==0.6.2.post5 clu==0.0.6 flax==0.5.3" +elif verlt $PY_VER 3.13; then + echo "Detected Python version 3.12" + export OPEN_SPIEL_PYTHON_PYTORCH_DEPS="torch==2.2.2" + export OPEN_SPIEL_PYTHON_JAX_DEPS="jax==0.4.26 jaxlib==0.4.26 dm-haiku==0.0.12 optax==0.2.2 chex==0.1.86 rlax==0.1.6 distrax==0.1.5" + export OPEN_SPIEL_PYTHON_TENSORFLOW_DEPS="numpy==1.26.4 tensorflow==2.16.1 tensorflow_datasets==4.9.7 keras==3.1.1" + export OPEN_SPIEL_PYTHON_MISC_DEPS="IPython==8.23.0 networkx==3.3 matplotlib==3.8.4 mock==5.1.0 nashpy==0.0.41 scipy==1.11.4 testresources==2.0.1 cvxopt==1.3.2 cvxpy==1.4.2 ecos==2.0.13 osqp==0.6.5 clu==0.0.11 flax==0.8.2" +else + echo "Detected Python version 3.13" + export OPEN_SPIEL_PYTHON_PYTORCH_DEPS="torch==2.6.0" + export OPEN_SPIEL_PYTHON_JAX_DEPS="jax==0.5.0 jaxlib==0.5.0 dm-haiku==0.0.13 optax==0.2.4 chex==0.1.88 rlax==0.1.6 distrax==0.1.5 flax==0.10.3" + export OPEN_SPIEL_PYTHON_MISC_DEPS="IPython==8.23.0 networkx==3.3 matplotlib==3.10.0 mock==5.1.0 nashpy==0.0.41 scipy==1.15.2 testresources==2.0.1 cvxopt==1.3.2 cvxpy==1.6.1 ecos==2.0.14 osqp==1.0.0b3 clu==0.0.11 tensorflow_datasets==4.9.3" +fi + + diff --git a/scenarios/bargaining/open_spiel/open_spiel/scripts/regenerate_playthroughs.sh b/scenarios/bargaining/open_spiel/open_spiel/scripts/regenerate_playthroughs.sh new file mode 100755 index 0000000..a7f7312 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/scripts/regenerate_playthroughs.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Regenerates playthroughs for all the existing playthroughs. +# Run this from open_spiel/ (scripts/regenerate_playthroughs.sh) whenever +# a change is made to the public API. + +python3 python/examples/playthrough.py \ +--update_path integration_tests/playthroughs diff --git a/scenarios/bargaining/open_spiel/open_spiel/scripts/test_wheel.sh b/scenarios/bargaining/open_spiel/open_spiel/scripts/test_wheel.sh new file mode 100755 index 0000000..3cf42eb --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/scripts/test_wheel.sh @@ -0,0 +1,94 @@ +#!/bin/bash + +# Copyright 2019 DeepMind Technologies Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file is called by the wheels workflow .github/workflows/wheels.yml. +set -e +set -x + +if [ "$2" = "" ]; +then + echo "Usage: test_wheel [python binary]" + echo "" + echo "Basic mode tests only the python functionaly (no ML libraries)" + echo "Full mode installs the extra ML libraries and the wheel. (requires Python >= 3.7 for JAX)." + exit -1 +fi + +MODE=$1 +PROJDIR=$2 + +uname -a +OS=`uname -a | awk '{print $1}'` + +# If it's full mode on Linux, we have to install Python 3.9 and make it the default. +if [[ "$MODE" = "full" && "$OS" = "Linux" && "$OS_PYTHON_VERSION" = "3.9" ]]; then + echo "Linux detected and Python 3.9 requested. Installing Python 3.9 and setting as default." + sudo apt-get install python3.9 python3.9-dev + sudo update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1 + sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 +fi + +# Setting of PYBIN is complicated because of all the different environments this is run from. +if [[ "$3" != "" ]]; then + PYBIN=$3 +else + PYBIN=${PYBIN:-"python3"} +fi + +PYBIN=`which $PYBIN` +$PYBIN -m pip install --upgrade setuptools +$PYBIN -m pip install --upgrade -r $PROJDIR/requirements.txt -q + +if [[ "$MODE" = "full" ]]; then + echo "Full mode. Installing Python extra deps libraries." + source $PROJDIR/open_spiel/scripts/python_extra_deps.sh $PYBIN + $PYBIN -m pip install --upgrade $OPEN_SPIEL_PYTHON_JAX_DEPS + $PYBIN -m pip install --upgrade $OPEN_SPIEL_PYTHON_PYTORCH_DEPS + $PYBIN -m pip install --upgrade $OPEN_SPIEL_PYTHON_MISC_DEPS +fi + +if [[ "$MODE" = "full" ]]; then + if [[ "$OS" = "Linux" ]]; then + ${PYBIN} -m pip install wheelhouse/open_spiel-*-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl + elif [[ "$OS" = "Darwin" && "$OS_PYTHON_VERSION" = "3.9" ]]; then + ${PYBIN} -m pip install wheelhouse/open_spiel-*-cp39-cp39-macosx_10_9_x86_64.whl + elif [[ "$OS" = "Darwin" && "$OS_PYTHON_VERSION" = "3.11" ]]; then + # Run on the xlarge macs for arm64 + ${PYBIN} -m pip install wheelhouse/open_spiel-*-cp311-cp311-macosx_11_0_arm64.whl + elif [[ "$OS" = "Darwin" && "$OS_PYTHON_VERSION" = "3.12" ]]; then + ${PYBIN} -m pip install wheelhouse/open_spiel-*-cp312-cp312-macosx_10_13_x86_64.whl + else + echo "Config not found for full tests" + exit -1 + fi +fi + +export OPEN_SPIEL_BUILDING_WHEEL="ON" +export OPEN_SPIEL_BUILD_WITH_HANABI="ON" +export OPEN_SPIEL_BUILD_WITH_ACPC="ON" + +rm -rf build && mkdir build && cd build +cmake -DPython3_EXECUTABLE=${PYBIN} $PROJDIR/open_spiel + +NPROC="nproc" +if [[ "$OS" == "darwin"* || "$OS" == "Darwin"* ]]; then + NPROC="sysctl -n hw.physicalcpu" +fi + +MAKE_NUM_PROCS=$(${NPROC}) +let TEST_NUM_PROCS=4*${MAKE_NUM_PROCS} + +ctest -j$TEST_NUM_PROCS --output-on-failure -R "^python/*" ../open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/simultaneous_move_game.cc b/scenarios/bargaining/open_spiel/open_spiel/simultaneous_move_game.cc new file mode 100644 index 0000000..71c4e61 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/simultaneous_move_game.cc @@ -0,0 +1,78 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/simultaneous_move_game.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/action_view.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { + +std::vector SimMoveState::FlatJointActionToActions( + Action flat_action) const { + std::vector actions(num_players_, kInvalidAction); + for (Player player = 0; player < num_players_; ++player) { + // For each player with legal actions available: + const auto legal_actions = LegalActions(player); + int num_actions = legal_actions.size(); + if (num_actions > 0) { + // Extract the least-significant digit (radix = the number legal actions + // for the current player) from flat_action. Use the digit as an index + // into the player's set of legal actions. + actions[player] = legal_actions[flat_action % num_actions]; + // Update the flat_action to be for the remaining players only. + flat_action /= num_actions; + } + } + return actions; +} + +void SimMoveState::ApplyFlatJointAction(Action flat_action) { + ApplyActions(FlatJointActionToActions(flat_action)); +} + +std::vector SimMoveState::LegalFlatJointActions() const { + ActionView view(*this); + FlatJointActions flat_joint_actions = view.flat_joint_actions(); + std::vector joint_actions; + joint_actions.reserve(flat_joint_actions.num_flat_joint_actions); + for (Action flat_joint_action : flat_joint_actions) { + joint_actions.push_back(flat_joint_action); + } + return joint_actions; +} + +std::string SimMoveState::FlatJointActionToString(Action flat_action) const { + // Assembles the string for each individual player action into a single + // string. For example, [Heads, Tails] would mean than player 0 chooses Heads, + // and player 1 chooses Tails. + std::string str; + for (auto player = Player{0}; player < num_players_; ++player) { + if (!str.empty()) str.append(", "); + const auto legal_actions = LegalActions(player); + int num_actions = legal_actions.size(); + str.append( + ActionToString(player, legal_actions[flat_action % num_actions])); + flat_action /= num_actions; + } + return absl::StrCat("[", str, "]"); +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/simultaneous_move_game.h b/scenarios/bargaining/open_spiel/open_spiel/simultaneous_move_game.h new file mode 100644 index 0000000..0fbbfcf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/simultaneous_move_game.h @@ -0,0 +1,130 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_SIMULTANEOUS_MOVE_GAME_H_ +#define OPEN_SPIEL_SIMULTANEOUS_MOVE_GAME_H_ + +#include +#include +#include + +#include "open_spiel/spiel.h" + +// This is the generic superclass for simultaneous move games. A simultaneous +// move game (AKA Markov Game) is one where all agents submit actions on each +// time step, and the state transists to a new state as a function of the joint +// action. +// +// Note that this implementation also supports mixed turn-based and simultaneous +// steps at which it is only a single player submits an action, when not at +// simultaneous nodes and not at chance nodes. +// +// For normal-form or matrix games, see normal_form_game.h or +// matrix.h. + +namespace open_spiel { + +class SimMoveGame; + +class SimMoveState : public State { + public: + SimMoveState(std::shared_ptr game) : State(game) {} + SimMoveState(const SimMoveState&) = default; + + // Subclasses must implement a per-player LegalActions function. + std::vector LegalActions(Player player) const override = 0; + + // LegalActions() returns either the chance outcomes (at a chance node), a + // flattened form of the joint legal actions (at simultaneous move nodes) - + // see discussion below, or the actions for the current player (at nodes + // where only a single player is making a decision). + std::vector LegalActions() const override { + if (IsSimultaneousNode()) { + return LegalFlatJointActions(); + } else if (IsTerminal()) { + return {}; + } else if (IsChanceNode()) { + return LegalChanceOutcomes(); + } else { + return LegalActions(CurrentPlayer()); + } + } + + // We override this rather than DoApplyAction() since we want to prevent + // saving the flat action in the history. + void ApplyAction(Action action) override { + if (IsSimultaneousNode()) { + ApplyFlatJointAction(action); + } else { + const Player player = CurrentPlayer(); + DoApplyAction(action); + history_.push_back({player, action}); + } + } + + // Convert a flat joint action to a list of actions. + std::vector FlatJointActionToActions(Action flat_action) const; + + protected: + // To make the implementation of algorithms which traverse the whole game + // tree easier, we support the mapping of joint actions (one per player) + // to a single flat action taken by the player kSimultaneousPlayerId. + + // If we have three players with legal sets (a, b), (x, y, z), (p, q) + // respectively, then their 12 possible joint actions will be numbered as + // follows: + // 0 - (a, x, p) + // 1 - (b, x, p) + // 2 - (a, y, p) + // ... + // 10 - (b, y, q) + // 11 - (a, z, q) + // 12 - (b, z, q) + + // Implementors of simultaneous move games don't have to worry about this + // mapping, but simply check for player == kSimultaneousPlayerId and forward + // method calls as follows: + // ActionToString --> FlatJointActionToString + // LegalActions --> LegalFlatJointActions + // ApplyAction --> ApplyFlatJointAction + + // Since we repeatedly index into the list of legal actions, it is necessary + // that LegalActions returns the same list (in the same order) when called + // twice on the same state. + + // if the number of legal actions overflows int64, this will of course not + // work correctly. + + // Map a flat joint action for the simultaneous player to a string, in the + // form "[action1, action2, ...]". + std::string FlatJointActionToString(Action flat_action) const; + + // Return a list of legal flat joint actions. See above for details. + std::vector LegalFlatJointActions() const; + + // Apply a flat joint action, updating the state. + void ApplyFlatJointAction(Action flat_action); + + void DoApplyActions(const std::vector& actions) override = 0; +}; + +class SimMoveGame : public Game { + protected: + SimMoveGame(GameType game_type, GameParameters game_parameters) + : Game(game_type, game_parameters) {} +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_SIMULTANEOUS_MOVE_GAME_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/spiel.cc b/scenarios/bargaining/open_spiel/open_spiel/spiel.cc new file mode 100644 index 0000000..2743bd1 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/spiel.cc @@ -0,0 +1,923 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/container/btree_map.h" +#include "open_spiel/abseil-cpp/absl/random/bit_gen_ref.h" +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/strings/ascii.h" +#include "open_spiel/abseil-cpp/absl/strings/match.h" +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/usage_logging.h" + +namespace open_spiel { +namespace { + +constexpr const int kSerializationVersion = 1; +constexpr const char* kSerializeMetaSectionHeader = "[Meta]"; +constexpr const char* kSerializeGameSectionHeader = "[Game]"; +constexpr const char* kSerializeGameRNGStateSectionHeader = "[GameRNGState]"; +constexpr const char* kSerializeStateSectionHeader = "[State]"; + +// Returns the available parameter keys, to be used as a utility function. +std::string ListValidParameters( + const GameParameters& param_spec) { + std::vector available_keys; + available_keys.reserve(param_spec.size()); + for (const auto& item : param_spec) { + available_keys.push_back(item.first); + } + std::sort(available_keys.begin(), available_keys.end()); + return absl::StrJoin(available_keys, ", "); +} + +// Check on supplied parameters for game creation. +// Issues a SpielFatalError if any are missing, of the wrong type, or +// unexpectedly present. +void ValidateParams(const GameParameters& params, + const GameParameters& param_spec) { + // Check all supplied parameters are supported and of the right type. + for (const auto& param : params) { + const auto it = param_spec.find(param.first); + if (it == param_spec.end()) { + SpielFatalError(absl::StrCat( + "Unknown parameter '", param.first, + "'. Available parameters are: ", ListValidParameters(param_spec))); + } + if (it->second.type() != param.second.type()) { + SpielFatalError(absl::StrCat( + "Wrong type for parameter ", param.first, + ". Expected type: ", GameParameterTypeToString(it->second.type()), + ", got ", GameParameterTypeToString(param.second.type()), " with ", + param.second.ToString())); + } + } + // Check we aren't missing any mandatory parameters. + for (const auto& param : param_spec) { + if (param.second.is_mandatory() && !params.count(param.first)) { + SpielFatalError(absl::StrCat("Missing parameter ", param.first)); + } + } +} + +} // namespace + +std::ostream& operator<<(std::ostream& os, const StateType& type) { + switch (type) { + case StateType::kMeanField: { + os << "MEAN_FIELD"; + break; + } + case StateType::kChance: { + os << "CHANCE"; + break; + } + case StateType::kDecision: { + os << "DECISION"; + break; + } + case StateType::kTerminal: { + os << "TERMINAL"; + break; + } + } + return os; +} + +StateType State::GetType() const { + if (IsChanceNode()) { + return StateType::kChance; + } else if (IsTerminal()) { + return StateType::kTerminal; + } else if (CurrentPlayer() == kMeanFieldPlayerId) { + return StateType::kMeanField; + } else { + return StateType::kDecision; + } +} + +std::unique_ptr State::ResampleFromInfostate( + int player_id, + std::function rng) const { + if (GetGame()->GetType().information == + GameType::Information::kPerfectInformation) { + return Clone(); + } + SpielFatalError("ResampleFromInfostate() not implemented."); +} + +bool GameType::ContainsRequiredParameters() const { + for (const auto& key_val : parameter_specification) { + if (key_val.second.is_mandatory()) { + return true; + } + } + return false; +} + +GameRegisterer::GameRegisterer(const GameType& game_type, CreateFunc creator) { + RegisterGame(game_type, creator); +} + +std::shared_ptr GameRegisterer::CreateByName( + const std::string& short_name, const GameParameters& params) { + // Check if it's a game with a known issue. If so, output a warning. + if (absl::c_linear_search(GamesWithKnownIssues(), short_name)) { + std::cerr << "Warning! This game has known issues. Please see the games " + << "list on github or the code for details." << std::endl; + } + + // Find the factory for this game and load it. + auto iter = factories().find(short_name); + if (iter == factories().end()) { + SpielFatalError(absl::StrCat("Unknown game '", short_name, + "'. Available games are:\n", + absl::StrJoin(RegisteredNames(), "\n"))); + + } else { + ValidateParams(params, iter->second.first.parameter_specification); + return (iter->second.second)(params); + } +} + +std::vector GameRegisterer::GameTypesToShortNames( + const std::vector& game_types) { + std::vector names; + names.reserve(game_types.size()); + for (const auto& game_type : game_types) { + names.push_back(game_type.short_name); + } + return names; +} + +std::vector GameRegisterer::RegisteredNames() { + return GameTypesToShortNames(RegisteredGames()); +} + +std::vector GameRegisterer::GamesWithKnownIssues() { + return {"quoridor", "rbc"}; +} + +std::vector GameRegisterer::RegisteredGames() { + std::vector games; + for (const auto& key_val : factories()) { + games.push_back(key_val.second.first); + } + return games; +} + +std::vector GameRegisterer::RegisteredConcreteGames() { + std::vector games; + for (const auto& key_val : factories()) { + if (key_val.second.first.is_concrete) { + games.push_back(key_val.second.first); + } + } + return games; +} + +std::vector GameRegisterer::RegisteredConcreteNames() { + return GameTypesToShortNames(RegisteredConcreteGames()); +} + +bool GameRegisterer::IsValidName(const std::string& short_name) { + return factories().find(short_name) != factories().end(); +} + +void GameRegisterer::RegisterGame(const GameType& game_type, + GameRegisterer::CreateFunc creator) { + factories()[game_type.short_name] = std::make_pair(game_type, creator); +} + +bool IsGameRegistered(const std::string& short_name) { + return GameRegisterer::IsValidName(short_name); +} + +std::vector RegisteredGames() { + return GameRegisterer::RegisteredNames(); +} + +std::vector RegisteredGameTypes() { + return GameRegisterer::RegisteredGames(); +} + +std::shared_ptr DeserializeGame(const std::string& serialized) { + std::pair game_and_rng_state = + absl::StrSplit(serialized, kSerializeGameRNGStateSectionHeader); + + // Remove the trailing "\n" from the game section. + if (!game_and_rng_state.first.empty() && + game_and_rng_state.first.back() == '\n') { + game_and_rng_state.first.pop_back(); + } + std::shared_ptr game = LoadGame(game_and_rng_state.first); + + if (!game_and_rng_state.second.empty()) { + // Game is implicitly stochastic. + // Remove the trailing "\n" from the RNG state section. + if (game_and_rng_state.second.back() == '\n') { + game_and_rng_state.second.pop_back(); + } + game->SetRNGState(game_and_rng_state.second); + } + return game; +} + +std::shared_ptr LoadGame(const std::string& game_string) { + return LoadGame(GameParametersFromString(game_string)); +} + +std::shared_ptr LoadGame(const std::string& short_name, + const GameParameters& params) { + std::shared_ptr result = + GameRegisterer::CreateByName(short_name, params); + if (result == nullptr) { + SpielFatalError(absl::StrCat("Unable to create game: ", short_name)); + } + return result; +} + +std::shared_ptr LoadGame(GameParameters params) { + auto it = params.find("name"); + if (it == params.end()) { + SpielFatalError(absl::StrCat("No 'name' parameter in params: ", + GameParametersToString(params))); + } + std::string name = it->second.string_value(); + params.erase(it); + std::shared_ptr result = + GameRegisterer::CreateByName(name, params); + if (result == nullptr) { + SpielFatalError(absl::StrCat("Unable to create game: ", name)); + } + LogUsage(); + return result; +} + +State::State(std::shared_ptr game) + : game_(game), + num_distinct_actions_(game->NumDistinctActions()), + num_players_(game->NumPlayers()), + move_number_(0) {} + +void NormalizePolicy(ActionsAndProbs* policy) { + const double sum = absl::c_accumulate( + *policy, 0.0, [](double a, auto& b) { return a + b.second; }); + absl::c_for_each(*policy, [sum](auto& o) { o.second /= sum; }); +} + +std::pair SampleAction(const ActionsAndProbs& outcomes, + absl::BitGenRef rng) { + return SampleAction(outcomes, absl::Uniform(rng, 0.0, 1.0)); +} +std::pair SampleAction(const ActionsAndProbs& outcomes, + double z) { + SPIEL_CHECK_GE(z, 0); + SPIEL_CHECK_LT(z, 1); + + // Special case for one-item lists. + if (outcomes.size() == 1) { + SPIEL_CHECK_FLOAT_EQ(outcomes[0].second, 1.0); + return outcomes[0]; + } + + // First do a check that this is indeed a proper discrete distribution. + double sum = 0; + for (const std::pair& outcome : outcomes) { + double prob = outcome.second; + SPIEL_CHECK_PROB(prob); + sum += prob; + } + SPIEL_CHECK_FLOAT_EQ(sum, 1.0); + + // Now sample an outcome. + sum = 0; + for (const std::pair& outcome : outcomes) { + double prob = outcome.second; + if (sum <= z && z < (sum + prob)) { + return outcome; + } + sum += prob; + } + + // If we get here, something has gone wrong + std::cerr << "Chance sampling failed; outcomes:" << std::endl; + for (const std::pair& outcome : outcomes) { + std::cerr << outcome.first << " " << outcome.second << std::endl; + } + SpielFatalError( + absl::StrCat("Internal error: failed to sample an outcome; z=", z)); +} + +std::string State::Serialize() const { + // This simple serialization doesn't work for the following games: + // - games with sampled chance nodes, since the history doesn't give us enough + // information to reconstruct the state. + // - Mean field games, since this base class does not store the history of + // state distributions passed in UpdateDistribution() (and it would be + // very expensive to do so for games with many possible states and a long + // time horizon). + // If you wish to serialize states in such games, you must implement custom + // serialization and deserialization for the state. + SPIEL_CHECK_NE(game_->GetType().chance_mode, + GameType::ChanceMode::kSampledStochastic); + SPIEL_CHECK_NE(game_->GetType().dynamics, GameType::Dynamics::kMeanField); + return absl::StrCat(absl::StrJoin(History(), "\n"), "\n"); +} + +Action State::StringToAction(Player player, + const std::string& action_str) const { + for (const Action action : LegalActions()) { + if (action_str == ActionToString(player, action)) return action; + } + SpielFatalError( + absl::StrCat("Couldn't find an action matching ", action_str)); +} + +void State::ApplyAction(Action action_id) { + // history_ needs to be modified *after* DoApplyAction which could + // be using it. + + // Cannot apply an invalid action. + SPIEL_CHECK_NE(action_id, kInvalidAction); + Player player = CurrentPlayer(); + DoApplyAction(action_id); + history_.push_back({player, action_id}); + ++move_number_; +} + +void State::ApplyActionWithLegalityCheck(Action action_id) { + std::vector legal_actions = LegalActions(); + if (absl::c_find(legal_actions, action_id) == legal_actions.end()) { + Player cur_player = CurrentPlayer(); + SpielFatalError( + absl::StrCat("Current player ", cur_player, " calling ApplyAction ", + "with illegal action (", action_id, "): ", + ActionToString(cur_player, action_id))); + } + ApplyAction(action_id); +} + +void State::ApplyActions(const std::vector& actions) { + // history_ needs to be modified *after* DoApplyActions which could + // be using it. + DoApplyActions(actions); + history_.reserve(history_.size() + actions.size()); + for (int player = 0; player < actions.size(); ++player) { + history_.push_back({player, actions[player]}); + } + ++move_number_; +} + +void State::ApplyActionsWithLegalityChecks(const std::vector& actions) { + for (Player player = 0; player < actions.size(); ++player) { + std::vector legal_actions = LegalActions(player); + if (!legal_actions.empty() && + absl::c_find(legal_actions, actions[player]) == legal_actions.end()) { + SpielFatalError( + absl::StrCat("Player ", player, " calling ApplyAction ", + "with illegal action (", actions[player], "): ", + ActionToString(player, actions[player]))); + } + } + ApplyActions(actions); +} + +std::vector State::LegalActionsMask(Player player) const { + int length = (player == kChancePlayerId) ? game_->MaxChanceOutcomes() + : num_distinct_actions_; + std::vector mask(length, 0); + for (int action : LegalActions(player)) mask[action] = 1; + return mask; +} + +std::vector> Game::NewInitialStates() const { + std::vector> states; + if (GetType().dynamics == GameType::Dynamics::kMeanField && + NumPlayers() >= 2) { + states.reserve(NumPlayers()); + for (int p = 0; p < NumPlayers(); ++p) { + states.push_back(NewInitialStateForPopulation(p)); + } + return states; + } + states.push_back(NewInitialState()); + return states; +} + +std::unique_ptr Game::DeserializeState(const std::string& str) const { + // This does not work for games with sampled chance nodes and for mean field + // games. See comments in State::Serialize() for the explanation. If you wish + // to serialize states in such games, you must implement custom serialization + // and deserialization for the state. + SPIEL_CHECK_NE(game_type_.chance_mode, + GameType::ChanceMode::kSampledStochastic); + SPIEL_CHECK_NE(game_type_.dynamics, + GameType::Dynamics::kMeanField); + + std::unique_ptr state = NewInitialState(); + if (str.empty()) { + return state; + } + std::vector lines = absl::StrSplit(str, '\n'); + for (int i = 0; i < lines.size(); ++i) { + if (lines[i].empty()) continue; + if (state->IsSimultaneousNode()) { + std::vector actions; + for (int p = 0; p < state->NumPlayers(); ++p, ++i) { + SPIEL_CHECK_LT(i, lines.size()); + Action action = static_cast(std::stol(lines[i])); + actions.push_back(action); + } + state->ApplyActions(actions); + // Must decrement i here, otherwise it is incremented too many times. + --i; + } else { + Action action = static_cast(std::stol(lines[i])); + state->ApplyAction(action); + } + } + return state; +} + +std::string SerializeGameAndState(const Game& game, const State& state) { + std::string str = ""; + + // Meta section. + absl::StrAppend(&str, + "# Automatically generated by OpenSpiel " + "SerializeGameAndState\n"); + absl::StrAppend(&str, kSerializeMetaSectionHeader, "\n"); + absl::StrAppend(&str, "Version: ", kSerializationVersion, "\n"); + absl::StrAppend(&str, "\n"); + + // Game section. + absl::StrAppend(&str, kSerializeGameSectionHeader, "\n"); + absl::StrAppend(&str, game.Serialize(), "\n"); + + // State section. + absl::StrAppend(&str, kSerializeStateSectionHeader, "\n"); + absl::StrAppend(&str, state.Serialize(), "\n"); + + return str; +} + +std::pair, std::unique_ptr> +DeserializeGameAndState(const std::string& serialized_state) { + std::vector lines = absl::StrSplit(serialized_state, '\n'); + + enum Section { kInvalid = -1, kMeta = 0, kGame = 1, kState = 2 }; + std::vector section_strings = {"", "", ""}; + Section cur_section = kInvalid; + + for (int i = 0; i < lines.size(); ++i) { + if (lines[i].empty() || lines[i].at(0) == '#') { + // Skip comments and blank lines. + } else if (lines[i] == kSerializeMetaSectionHeader) { + SPIEL_CHECK_EQ(cur_section, kInvalid); + cur_section = kMeta; + } else if (lines[i] == kSerializeGameSectionHeader) { + SPIEL_CHECK_EQ(cur_section, kMeta); + cur_section = kGame; + } else if (lines[i] == kSerializeStateSectionHeader) { + SPIEL_CHECK_EQ(cur_section, kGame); + cur_section = kState; + } else { + SPIEL_CHECK_NE(cur_section, kInvalid); + absl::StrAppend(§ion_strings[cur_section], lines[i], "\n"); + } + } + + // Remove the trailing "\n" from the game and state sections. + if (!section_strings[kGame].empty() && + section_strings[kGame].back() == '\n') { + section_strings[kGame].pop_back(); + } + if (!section_strings[kState].empty() && + section_strings[kState].back() == '\n') { + section_strings[kState].pop_back(); + } + + // We currently just ignore the meta section. + std::shared_ptr game = DeserializeGame(section_strings[kGame]); + std::unique_ptr state = + game->DeserializeState(section_strings[kState]); + + return std::pair, std::unique_ptr>( + game, std::move(state)); +} + +std::ostream& operator<<(std::ostream& stream, GameType::Dynamics value) { + switch (value) { + case GameType::Dynamics::kSimultaneous: + return stream << "Simultaneous"; + case GameType::Dynamics::kSequential: + return stream << "Sequential"; + case GameType::Dynamics::kMeanField: + return stream << "MeanField"; + default: + SpielFatalError(absl::StrCat("Unknown dynamics: ", value)); + } +} + +std::istream& operator>>(std::istream& stream, GameType::Dynamics& var) { + std::string str; + stream >> str; + if (str == "Simultaneous") { + var = GameType::Dynamics::kSimultaneous; + } else if (str == "Sequential") { + var = GameType::Dynamics::kSequential; + } else if (str == "MeanField") { + var = GameType::Dynamics::kMeanField; + } else { + SpielFatalError(absl::StrCat("Unknown dynamics ", str, ".")); + } + return stream; +} + +std::ostream& operator<<(std::ostream& stream, GameType::ChanceMode value) { + switch (value) { + case GameType::ChanceMode::kDeterministic: + return stream << "Deterministic"; + case GameType::ChanceMode::kExplicitStochastic: + return stream << "ExplicitStochastic"; + case GameType::ChanceMode::kSampledStochastic: + return stream << "SampledStochastic"; + default: + SpielFatalError("Unknown mode."); + } +} + +std::ostream& operator<<(std::ostream& stream, const State& state) { + return stream << state.ToString(); +} + +std::istream& operator>>(std::istream& stream, GameType::ChanceMode& var) { + std::string str; + stream >> str; + if (str == "Deterministic") { + var = GameType::ChanceMode::kDeterministic; + } else if (str == "ExplicitStochastic") { + var = GameType::ChanceMode::kExplicitStochastic; + } else if (str == "SampledStochastic") { + var = GameType::ChanceMode::kSampledStochastic; + } else { + SpielFatalError(absl::StrCat("Unknown chance mode ", str, ".")); + } + return stream; +} + +std::ostream& operator<<(std::ostream& stream, GameType::Information value) { + switch (value) { + case GameType::Information::kOneShot: + return stream << "OneShot"; + case GameType::Information::kPerfectInformation: + return stream << "PerfectInformation"; + case GameType::Information::kImperfectInformation: + return stream << "ImperfectInformation"; + default: + SpielFatalError("Unknown value."); + } +} + +std::istream& operator>>(std::istream& stream, GameType::Information& var) { + std::string str; + stream >> str; + if (str == "OneShot") { + var = GameType::Information::kOneShot; + } else if (str == "PerfectInformation") { + var = GameType::Information::kPerfectInformation; + } else if (str == "ImperfectInformation") { + var = GameType::Information::kImperfectInformation; + } else { + SpielFatalError(absl::StrCat("Unknown information ", str, ".")); + } + return stream; +} + +std::ostream& operator<<(std::ostream& stream, GameType::Utility value) { + switch (value) { + case GameType::Utility::kZeroSum: + return stream << "ZeroSum"; + case GameType::Utility::kConstantSum: + return stream << "ConstantSum"; + case GameType::Utility::kGeneralSum: + return stream << "GeneralSum"; + case GameType::Utility::kIdentical: + return stream << "Identical"; + default: + SpielFatalError("Unknown value."); + } +} + +std::istream& operator>>(std::istream& stream, GameType::Utility& var) { + std::string str; + stream >> str; + if (str == "ZeroSum") { + var = GameType::Utility::kZeroSum; + } else if (str == "ConstantSum") { + var = GameType::Utility::kConstantSum; + } else if (str == "GeneralSum") { + var = GameType::Utility::kGeneralSum; + } else if (str == "Identical") { + var = GameType::Utility::kIdentical; + } else { + SpielFatalError(absl::StrCat("Unknown utility ", str, ".")); + } + return stream; +} + +std::ostream& operator<<(std::ostream& stream, GameType::RewardModel value) { + switch (value) { + case GameType::RewardModel::kRewards: + return stream << "Rewards"; + case GameType::RewardModel::kTerminal: + return stream << "Terminal"; + default: + SpielFatalError("Unknown value."); + } +} + +std::istream& operator>>(std::istream& stream, GameType::RewardModel& var) { + std::string str; + stream >> str; + if (str == "Rewards") { + var = GameType::RewardModel::kRewards; + } else if (str == "Terminal") { + var = GameType::RewardModel::kTerminal; + } else { + SpielFatalError(absl::StrCat("Unknown reward model ", str, ".")); + } + return stream; +} + +std::string Game::Serialize() const { + std::string str = ToString(); + if (GetType().chance_mode == GameType::ChanceMode::kSampledStochastic) { + absl::StrAppend(&str, "\n", kSerializeGameRNGStateSectionHeader, "\n", + GetRNGState()); + } + return str; +} + +std::string Game::ToString() const { + GameParameters params = game_parameters_; + params["name"] = GameParameter(game_type_.short_name); + return GameParametersToString(params); +} + +std::string GameTypeToString(const GameType& game_type) { + std::string str = ""; + + absl::StrAppend(&str, "short_name: ", game_type.short_name, "\n"); + absl::StrAppend(&str, "long_name: ", game_type.long_name, "\n"); + + absl::StrAppend(&str, "dynamics: ", + open_spiel::internal::SpielStrCat(game_type.dynamics), "\n"); + + absl::StrAppend(&str, "chance_mode: ", + open_spiel::internal::SpielStrCat(game_type.chance_mode), + "\n"); + + absl::StrAppend(&str, "information: ", + open_spiel::internal::SpielStrCat(game_type.information), + "\n"); + + absl::StrAppend(&str, "utility: ", + open_spiel::internal::SpielStrCat(game_type.utility), "\n"); + + absl::StrAppend(&str, "reward_model: ", + open_spiel::internal::SpielStrCat(game_type.reward_model), + "\n"); + + absl::StrAppend(&str, "max_num_players: ", game_type.max_num_players, "\n"); + absl::StrAppend(&str, "min_num_players: ", game_type.min_num_players, "\n"); + + absl::StrAppend( + &str, "provides_information_state_string: ", + game_type.provides_information_state_string ? "true" : "false", "\n"); + absl::StrAppend( + &str, "provides_information_state_tensor: ", + game_type.provides_information_state_tensor ? "true" : "false", "\n"); + + absl::StrAppend(&str, "provides_observation_string: ", + game_type.provides_observation_string ? "true" : "false", + "\n"); + absl::StrAppend(&str, "provides_observation_tensor: ", + game_type.provides_observation_tensor ? "true" : "false", + "\n"); + absl::StrAppend( + &str, "provides_factored_observation_string: ", + game_type.provides_factored_observation_string ? "true" : "false", "\n"); + + // Check that there are no newlines in the serialized params. + std::string serialized_params = + SerializeGameParameters(game_type.parameter_specification); + SPIEL_CHECK_TRUE(!absl::StrContains(serialized_params, "\n")); + absl::StrAppend(&str, "parameter_specification: ", serialized_params); + + return str; +} + +GameType GameTypeFromString(const std::string& game_type_str) { + absl::btree_map game_type_values; + std::vector parts = absl::StrSplit(game_type_str, '\n'); + + SPIEL_CHECK_EQ(parts.size(), 15); + + for (const auto& part : parts) { + std::pair pair = + absl::StrSplit(part, absl::MaxSplits(": ", 1)); + game_type_values.insert(pair); + } + + GameType game_type = GameType(); + game_type.short_name = game_type_values.at("short_name"); + game_type.long_name = game_type_values.at("long_name"); + + std::istringstream(game_type_values.at("dynamics")) >> game_type.dynamics; + std::istringstream(game_type_values.at("chance_mode")) >> + game_type.chance_mode; + std::istringstream(game_type_values.at("information")) >> + game_type.information; + std::istringstream(game_type_values.at("utility")) >> game_type.utility; + std::istringstream(game_type_values.at("reward_model")) >> + game_type.reward_model; + + SPIEL_CHECK_TRUE(absl::SimpleAtoi(game_type_values.at("max_num_players"), + &(game_type.max_num_players))); + SPIEL_CHECK_TRUE(absl::SimpleAtoi(game_type_values.at("min_num_players"), + &(game_type.min_num_players))); + + game_type.provides_information_state_string = + game_type_values.at("provides_information_state_string") == "true"; + game_type.provides_information_state_tensor = + game_type_values.at("provides_information_state_tensor") == "true"; + + game_type.provides_observation_string = + game_type_values.at("provides_observation_string") == "true"; + game_type.provides_observation_tensor = + game_type_values.at("provides_observation_tensor") == "true"; + game_type.provides_factored_observation_string = + game_type_values.at("provides_factored_observation_string") == "true"; + + game_type.parameter_specification = + DeserializeGameParameters(game_type_values.at("parameter_specification")); + return game_type; +} + +std::vector State::ObservationTensor(Player player) const { + // We add this player check, to prevent errors if the game implementation + // lacks that check (in particular as this function is the one used in + // Python). This can lead to doing this check twice. + // TODO(author2): Do we want to prevent executing this twice for games + // that implement it? + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + std::vector observation(game_->ObservationTensorSize()); + ObservationTensor(player, absl::MakeSpan(observation)); + return observation; +} + +void State::ObservationTensor(Player player, std::vector* values) const { + // Retained for backwards compatibility. + values->resize(game_->ObservationTensorSize()); + ObservationTensor(player, absl::MakeSpan(*values)); +} + +std::vector State::InformationStateTensor(Player player) const { + // We add this player check, to prevent errors if the game implementation + // lacks that check (in particular as this function is the one used in + // Python). This can lead to doing this check twice. + // TODO(author2): Do we want to prevent executing this twice for games + // that implement it? + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, num_players_); + std::vector info_state(game_->InformationStateTensorSize()); + InformationStateTensor(player, absl::MakeSpan(info_state)); + return info_state; +} + +void State::InformationStateTensor(Player player, + std::vector* values) const { + // Retained for backwards compatibility. + values->resize(game_->InformationStateTensorSize()); + InformationStateTensor(player, absl::MakeSpan(*values)); +} + +bool State::PlayerAction::operator==(const PlayerAction& other) const { + return player == other.player && action == other.action; +} + +int State::MeanFieldPopulation() const { + if (GetGame()->GetType().dynamics != GameType::Dynamics::kMeanField) { + SpielFatalError( + "MeanFieldPopulation() does not make sense for games that are not mean " + "field games."); + } + return 0; +} + +std::ostream& operator<<(std::ostream& os, const State::PlayerAction& action) { + os << absl::StreamFormat("PlayerAction(player=%i,action=%i)", action.player, + action.action); + return os; +} + +std::vector ActionsToStrings(const State& state, + const std::vector& actions) { + std::vector out; + out.reserve(actions.size()); + for (Action action : actions) out.push_back(state.ActionToString(action)); + return out; +} + +std::string ActionsToString(const State& state, + const std::vector& actions) { + return absl::StrCat( + "[", absl::StrJoin(ActionsToStrings(state, actions), ", "), "]"); +} + +void SpielFatalErrorWithStateInfo(const std::string& error_msg, + const Game& game, + const State& state) { + // A fatal error wrapper designed to return useful debugging information. + const std::string& info = SerializeGameAndState(game, state); + SpielFatalError(absl::StrCat(error_msg, "Serialized state:\n", info)); +} + +std::pair, + std::unique_ptr> BuildStateFromHistoryString( + const std::string& game_string, + const std::string& history, + int max_steps) { + std::pair, std::unique_ptr> game_and_state; + game_and_state.first = LoadGame(game_string); + game_and_state.second = game_and_state.first->NewInitialState(); + std::string history_copy(absl::StripAsciiWhitespace(history)); + if (history_copy[0] == '[') { + history_copy = history_copy.substr(1); + } + if (history_copy[history_copy.length() - 1] == ']') { + history_copy = history_copy.substr(0, history_copy.length() - 1); + } + + std::vector legal_actions; + State* state = game_and_state.second.get(); + int steps = 0; + std::vector parts = absl::StrSplit(history_copy, ','); + for (const std::string& part : parts) { + if (max_steps > 0 && steps >= max_steps) { + break; + } + Action action; + bool atoi_ret = absl::SimpleAtoi(absl::StripAsciiWhitespace(part), &action); + if (!atoi_ret) { + SpielFatalError(absl::StrCat("Problem parsing action: ", part)); + } + legal_actions = state->LegalActions(); + if (absl::c_find(legal_actions, action) == legal_actions.end()) { + SpielFatalError(absl::StrCat("Illegal move detected!\nState:\n", + state->ToString(), "\nAction: ", action, + " (", state->ActionToString(action), ")\n", + "History: ", state->HistoryString())); + } + state->ApplyAction(action); + steps++; + } + + return game_and_state; +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/spiel.h b/scenarios/bargaining/open_spiel/open_spiel/spiel.h new file mode 100644 index 0000000..1c2e206 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/spiel.h @@ -0,0 +1,1196 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_SPIEL_H_ +#define OPEN_SPIEL_SPIEL_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/bit_gen_ref.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/synchronization/mutex.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { + +// Static information for a game. This will determine what algorithms are +// applicable. For example, minimax search is only applicable to two-player, +// zero-sum games with perfect information. (Though can be made applicable to +// games that are constant-sum.) +// +// The number of players is not considered part of this static game type, +// because this depends on the parameterization. See Game::NumPlayers. +struct GameType { + // A short name with no spaces that uniquely identifies the game, e.g. + // "msoccer". This is the key used to distinguish games. + std::string short_name; + + // A long human-readable name, e.g. "Markov Soccer". + std::string long_name; + + // Is the game one-player-at-a-time or do players act simultaneously? + enum class Dynamics { + kSimultaneous, // In some or all nodes every player acts. + kSequential, // Turn-based games. + // Mean field game. In particular, this adds mean field nodes. Support for + // mean field games is experimental. See details in games/mfg/README.md. + kMeanField, // Is a Mean Field Game + }; + Dynamics dynamics; + + // Are there any chance nodes? If so, how is chance treated? + // Either all possible chance outcomes are explicitly returned as + // ChanceOutcomes(), and the result of ApplyAction() is deterministic. Or + // just one ChanceOutcome is returned, and the result of ApplyAction() is + // stochastic. If in doubt, it is better to implement stochastic games with + // kExplicitStochastic, as this makes more information available to any + // learning algorithms you choose to use (i.e. the whole chance outcome + // distribution is visible to the algorithm, rather than just the sampled + // outcome). For more discussion of this field, see the github issue: + // https://github.com/deepmind/open_spiel/issues/792. + enum class ChanceMode { + kDeterministic, // No chance nodes + kExplicitStochastic, // Has at least one chance node, all with + // deterministic ApplyAction() + kSampledStochastic, // At least one chance node with non-deterministic + // ApplyAction() + }; + ChanceMode chance_mode; + + // The information type of the game. + enum class Information { + kOneShot, // aka Normal-form games (single simultaneous turn). + kPerfectInformation, // All players know the state of the game. + kImperfectInformation, // Some information is hidden from some players. + }; + Information information; + + // Whether the game has any constraints on the player utilities. + enum class Utility { + kZeroSum, // Utilities of all players sum to 0 + kConstantSum, // Utilities of all players sum to a constant + kGeneralSum, // Total utility of all players differs in different outcomes + kIdentical, // Every player gets an identical value (cooperative game). + }; + Utility utility; + + // When are rewards handed out? Note that even if the game only specifies + // utilities at terminal states, the default implementation of State::Rewards + // should work for RL uses (giving 0 everywhere except terminal states). + enum class RewardModel { + kRewards, // RL-style func r(s, a, s') via State::Rewards() call at s'. + kTerminal, // Games-style, only at terminals. Call (State::Returns()). + }; + RewardModel reward_model; + + // How many players can play the game. If the number can vary, the actual + // instantiation of the game should specify how many players there are. + int max_num_players; + int min_num_players; + + // Which type of information state representations are supported? + // The information state is a perfect-recall state-of-the-game from the + // perspective of one player. + bool provides_information_state_string; + bool provides_information_state_tensor; + + // Which type of observation representations are supported? + // The observation is some subset of the information state with the property + // that remembering all the player's observations and actions is sufficient + // to reconstruct the information state. + bool provides_observation_string; + bool provides_observation_tensor; + + GameParameters parameter_specification; + bool ContainsRequiredParameters() const; + + // A number of optional values that have defaults, whose values can be + // overridden in each game. + + // Can the game be loaded with no parameters? It is strongly recommended that + // games be loadable with default arguments. + bool default_loadable = true; + + // Can we factorize observations into public and private parts? + // This is similar to observation fields before, but adds additional + // distinction between public and private observations. + bool provides_factored_observation_string = false; + + bool provides_information_state() const { + return provides_information_state_tensor + || provides_information_state_string; + } + bool provides_observation() const { + return provides_observation_tensor + || provides_observation_string; + } + + // Is this a concrete game, i.e. an actual game? Most games in OpenSpiel are + // concrete games. Some games that are registered are not concrete games; for + // example, game wrappers and other game transforms, or games that are + // constructed from a file (e.g. efg_game). + bool is_concrete = true; +}; + +// Information about a concrete Game instantiation. +// This information may depend on the game parameters, and hence cannot +// be part of `GameType`. +struct GameInfo { + // The size of the action space. See `Game` for a full description. + int num_distinct_actions; + + // Maximum number of distinct chance outcomes for chance nodes in the game. + int max_chance_outcomes; + + // The number of players in this instantiation of the game. + // Does not include the chance-player. + int num_players; + + // Utility range. These functions define the lower and upper bounds on the + // values returned by State::PlayerReturn(Player player) over all valid player + // numbers. This range should be as tight as possible; the intention is to + // give some information to algorithms that require it, and so their + // performance may suffer if the range is not tight. Loss/draw/win outcomes + // are common among games and should use the standard values of {-1,0,1}. + double min_utility; + double max_utility; + + // The total utility for all players, if this is a constant-sum-utility game. + // Should be zero if the game is zero-sum. + absl::optional utility_sum; + + // The maximum number of player decisions in a game. Does not include chance + // events. For a simultaneous action game, this is the maximum number of joint + // decisions. In a turn-based game, this is the maximum number of individual + // decisions summed over all players. + int max_game_length; +}; + +std::ostream& operator<<(std::ostream& os, const StateType& type); + +std::ostream& operator<<(std::ostream& stream, GameType::Dynamics value); +std::ostream& operator<<(std::ostream& stream, GameType::ChanceMode value); +std::ostream& operator<<(std::ostream& stream, GameType::Information value); +std::ostream& operator<<(std::ostream& stream, GameType::Utility value); +std::ostream& operator<<(std::ostream& stream, GameType::RewardModel value); + +// The probability of taking each possible action in a particular info state. +using ActionsAndProbs = std::vector>; + +// We alias this here as we can't import state_distribution.h or we'd have a +// circular dependency. +using HistoryDistribution = + std::pair>, std::vector>; + +// Forward declarations. +class Game; +class Observer; + +// An abstract class that represents a state of the game. +class State { + public: + virtual ~State() = default; + + // Derived classes must call one of these constructors. Note that a state must + // be passed a pointer to the game which created it. Some methods in some + // games rely on this and so it must correspond to a valid game object. + // The easiest way to ensure this is to use Game::NewInitialState to create + // new states, which will pass a pointer to the parent game object. Also, + // since this shared pointer to the parent is required, Game objects cannot + // be used as value types and should always be created via a shared pointer. + // See the documentation of the Game object for further details. + State(std::shared_ptr game); + State(const State&) = default; + + // Returns current player. Player numbers start from 0. + // Negative numbers are for chance (-1) or simultaneous (-2). + // kTerminalPlayerId should be returned on a TerminalNode(). + virtual Player CurrentPlayer() const = 0; + + // Change the state of the game by applying the specified action in turn-based + // games or in non-simultaneous nodes of simultaneous move games. + // This function encodes the logic of the game rules. + // + // In the case of chance nodes, the behavior of this function depends on + // GameType::chance_mode. If kExplicit, then the outcome should be + // directly applied. If kSampled, then a dummy outcome is passed and the + // sampling of and outcome should be done in this function and then applied. + // + // Games should implement DoApplyAction. + virtual void ApplyAction(Action action_id); + + // Helper versions of ApplyAction that first does a legality check. + virtual void ApplyActionWithLegalityCheck(Action action_id); + + // `LegalActions(Player player)` is valid for all nodes in all games, + // returning an empty list for players who don't act at this state. The + // actions should be returned in ascending order. + // + // This default implementation is fine for turn-based games, but should + // be overridden by simultaneous-move games. At least one player should have a + // legal action or the game should be terminal. + // + // Since games mostly override LegalActions(), this method will not be visible + // in derived classes unless a using directive is added. + virtual std::vector LegalActions(Player player) const { + if (!IsTerminal() && player == CurrentPlayer()) { + return IsChanceNode() ? LegalChanceOutcomes() : LegalActions(); + } else { + return {}; + } + } + + // `LegalActions()` returns the actions for the current player (including at + // chance nodes). All games should implement this function. + // At a player node, all returned actions should be in + // [0, NumDistinctActions()). For a chance node, they should all be in + // [0, MaxChanceOutcomes()). + // The actions should be returned in ascending order. + // If the state is non-terminal (and not a mean field node), there must be at + // least one legal action. + // + // In simultaneous-move games, the abstract base class implements it in + // terms of LegalActions(player) and LegalChanceOutcomes(), and so derived + // classes only need to implement `LegalActions(Player player)`. + // This will result in LegalActions() being hidden unless a using directive + // is added. + virtual std::vector LegalActions() const = 0; + + // Returns a vector containing 1 for legal actions and 0 for illegal actions. + // The length is `game.NumDistinctActions()` for player nodes, and + // `game.MaxChanceOutcomes()` for chance nodes. + std::vector LegalActionsMask(Player player) const; + + // Convenience function for turn-based games. + std::vector LegalActionsMask() const { + return LegalActionsMask(CurrentPlayer()); + } + + // Returns a string representation of the specified action for the player. + // The representation may depend on the current state of the game, e.g. + // for chess the string "Nf3" would correspond to different starting squares + // in different states (and hence probably different action ids). + // This method will format chance outcomes if player == kChancePlayerId + virtual std::string ActionToString(Player player, Action action_id) const = 0; + std::string ActionToString(Action action_id) const { + return ActionToString(CurrentPlayer(), action_id); + } + + // Reverses the mapping done by ActionToString. + // Note: This currently just loops over all legal actions, converts them into + // a string, and checks equality, so it can be very slow. + virtual Action StringToAction(Player player, + const std::string& action_str) const; + Action StringToAction(const std::string& action_str) const { + return StringToAction(CurrentPlayer(), action_str); + } + + // Returns a string representation of the state. Also used as in the default + // implementation of operator==. + virtual std::string ToString() const = 0; + + // Returns true if these states are equal, false otherwise. Two states are + // equal if they are the same world state; the interpretation might differ + // across games. For instance, in an imperfect information game, the full + // history might be relevant for distinguishing states whereas it might not be + // relevant for single-player games or perfect information games such as + // Tic-Tac-Toe, where only the current board state is necessary. + virtual bool operator==(const State& other) const { + return ToString() == other.ToString(); + } + + // Is this a terminal state? (i.e. has the game ended?) + virtual bool IsTerminal() const = 0; + + // Returns reward from the most recent state transition (s, a, s') for all + // players. This is provided so that RL-style games with intermediate rewards + // (along the episode, rather than just one value at the end) can be properly + // implemented. The default is to return 0 except at terminal states, where + // the terminal returns are returned. + // + // Note: This must agree with Returns(). That is, for any state S_t, + // Returns(St) = Sum(Rewards(S_0), Rewards(S_1)... Rewards(S_t)). + // The default implementation is only correct for games that only + // have a final reward. Games with intermediate rewards must override + // both this method and Returns(). + virtual std::vector Rewards() const { + if (IsTerminal()) { + return Returns(); + } else { + return std::vector(num_players_, 0.0); + } + } + + // Returns sums of all rewards for each player up to the current state. + // For games that only have a final reward, it should be 0 for all + // non-terminal states, and the terminal utility for the final state. + virtual std::vector Returns() const = 0; + + // Returns Reward for one player (see above for definition). If Rewards for + // multiple players are required it is more efficient to use Rewards() above. + virtual double PlayerReward(Player player) const { + auto rewards = Rewards(); + SPIEL_CHECK_LT(player, rewards.size()); + return rewards[player]; + } + + // Returns Return for one player (see above for definition). If Returns for + // multiple players are required it is more efficient to use Returns() above. + virtual double PlayerReturn(Player player) const { + auto returns = Returns(); + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, returns.size()); + return returns[player]; + } + + // Is this state a chance node? Chance nodes are "states" whose actions + // represent stochastic outcomes. "Chance" or "Nature" is thought of as a + // player with a fixed (randomized) policy. + virtual bool IsChanceNode() const { + return CurrentPlayer() == kChancePlayerId; + } + + // Is this a mean field node? In that case, no action should be performed, but + // instead the global state distribution should be updated with + // UpdateDistribution(). See more details in games/mfg/README.md. + virtual bool IsMeanFieldNode() const { + return CurrentPlayer() == kMeanFieldPlayerId; + } + + // Is this state a player node, with a single player acting? + virtual bool IsPlayerNode() const { return CurrentPlayer() >= 0; } + + // Is this state a node that requires simultaneous action choices from more + // than one player? If this is ever true, then the game should be marked as + // a simultaneous game. + bool IsSimultaneousNode() const { + return CurrentPlayer() == kSimultaneousPlayerId; + } + + // Is the specified player acting at this state? + bool IsPlayerActing(Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, NumPlayers()); + return CurrentPlayer() == player || IsSimultaneousNode(); + } + + // We store (player, action) pairs in the history. + struct PlayerAction { + Player player; + Action action; + bool operator==(const PlayerAction&) const; + }; + + // For backward-compatibility reasons, this is the history of actions only. + // To get the (player, action) pairs, use `FullHistory` instead. + std::vector History() const { + std::vector history; + history.reserve(history_.size()); + for (auto& h : history_) history.push_back(h.action); + return history; + } + + // The full (player, action) history. + const std::vector& FullHistory() const { return history_; } + + // A string representation for the history. There should be a one to one + // mapping between histories (i.e. sequences of actions for all players, + // including chance) and the `State` objects. + std::string HistoryString() const { return absl::StrJoin(History(), ", "); } + + // Return how many moves have been done so far in the game. + // When players make simultaneous moves, this counts only as a one move. + // Chance transitions count also as one move. + // Note that game transformations are not required to preserve the move + // number in the transformed game. + int MoveNumber() const { return move_number_; } + + // Is this a first state in the game, i.e. the initial state (root node)? + bool IsInitialState() const { return history_.empty(); } + + // For imperfect information games. Returns an identifier for the current + // information state for the specified player. + // Different ground states can yield the same information state for a player + // when the only part of the state that differs is not observable by that + // player (e.g. opponents' cards in Poker.) + // + // The identifiers must be unique across all players. + // This allows an algorithm to maintain a single table of identifiers + // instead of maintaining a table per player to avoid name collisions. + // + // A simple way to do so is for example, in a card game, if both players can + // hold the card Jack, the identifier can contain player identification as + // well, like P1Jack and P2Jack. However prefixing by player number is not + // a requirement. The only thing that is necessary is that it is unambiguous + // who is the observer. + // + // Games that do not have imperfect information do not need to implement + // these methods, but most algorithms intended for imperfect information + // games will work on perfect information games provided the InformationState + // is returned in a form they support. For example, InformationState() + // could simply return the history for a perfect information game. + // + // A valid InformationStateString must be returned at terminal states, since + // this is required in some applications (e.g. final observation in an RL + // environment). + // + // The information state should be perfect-recall, i.e. if two states + // have a different InformationState, then all successors of one must have + // a different InformationState to all successors of the other. + // For example, in tic-tac-toe, the current state of the board would not be + // a perfect-recall representation, but the sequence of moves played would + // be. + // + // If you implement both InformationState and Observation, the two must be + // consistent for all the players (even the non-acting player(s)). + // By consistency we mean that when you maintain an Action-Observation + // history (AOH) for different ground states, the (in)equality of two AOHs + // implies the (in)equality of two InformationStates. In other words, AOH is a + // factored representation of InformationState. + // + // For details, see Section 3.1 of https://arxiv.org/abs/1908.09453 + // or Section 2.1 of https://arxiv.org/abs/1906.11110 + + // There are currently no use-case for calling this function with + // `kChancePlayerId`. Thus, games are expected to raise an error in those + // cases using (and it's tested in api_test.py). Use this: + // SPIEL_CHECK_GE(player, 0); + // SPIEL_CHECK_LT(player, num_players_); + virtual std::string InformationStateString(Player player) const { + SpielFatalError("InformationStateString is not implemented."); + } + std::string InformationStateString() const { + return InformationStateString(CurrentPlayer()); + } + + // Vector form, useful for neural-net function approximation approaches. + // The size of the vector must match Game::InformationStateShape() + // with values in lexicographic order. E.g. for 2x4x3, order would be: + // (0,0,0), (0,0,1), (0,0,2), (0,1,0), ... , (1,3,2). + // This function should resize the supplied vector if required. + // + // A valid InformationStateTensor must be returned at terminal states, since + // this is required in some applications (e.g. final observation in an RL + // environment). + // + // There are currently no use-case for calling this function with + // `kChancePlayerId`. Thus, games are expected to raise an error in those + // cases. + // + // Implementations should start with (and it's tested in api_test.py): + // SPIEL_CHECK_GE(player, 0); + // SPIEL_CHECK_LT(player, num_players_); + virtual void InformationStateTensor(Player player, + absl::Span values) const { + SpielFatalError("InformationStateTensor unimplemented!"); + } + std::vector InformationStateTensor(Player player) const; + std::vector InformationStateTensor() const { + return InformationStateTensor(CurrentPlayer()); + } + virtual void InformationStateTensor(Player player, + std::vector* values) const; + + // We have functions for observations which are parallel to those for + // information states. An observation should have the following properties: + // - It has at most the same information content as the information state + // - The complete history of observations and our actions over the + // course of the game is sufficient to reconstruct the information + // state for any players at any point in the game. + // + // For example, an observation is the cards revealed and bets made in Poker, + // or the current state of the board in Chess. + // Note that neither of these are valid information states, since the same + // observation may arise from two different observation histories (i.e. they + // are not perfect recall). + // + // Observations should cover all observations: a combination of both public + // and private observations. They are not factored into these individual + // constituent parts. + // + // A valid observation must be returned at terminal states, since this is + // required in some applications (e.g. final observation in an RL + // environment). + // + // Implementations should start with (and it's tested in api_test.py): + // SPIEL_CHECK_GE(player, 0); + // SPIEL_CHECK_LT(player, num_players_); + virtual std::string ObservationString(Player player) const { + SpielFatalError("ObservationString is not implemented."); + } + std::string ObservationString() const { + return ObservationString(CurrentPlayer()); + } + + // Returns the view of the game, preferably from `player`'s perspective. + // + // Implementations should start with (and it's tested in api_test.py): + // SPIEL_CHECK_GE(player, 0); + // SPIEL_CHECK_LT(player, num_players_); + virtual void ObservationTensor(Player player, + absl::Span values) const { + SpielFatalError("ObservationTensor unimplemented!"); + } + std::vector ObservationTensor(Player player) const; + std::vector ObservationTensor() const { + return ObservationTensor(CurrentPlayer()); + } + void ObservationTensor(Player player, std::vector* values) const; + + // Return a copy of this state. + virtual std::unique_ptr Clone() const = 0; + + // Creates the child from State corresponding to action. + std::unique_ptr Child(Action action) const { + std::unique_ptr child = Clone(); + child->ApplyAction(action); + return child; + } + + // Undoes the last action, which must be supplied. This is a fast method to + // undo an action. It is only necessary for algorithms that need a fast undo + // (e.g. minimax search). + // One must call history_.pop_back() and --move_number_ in the implementations + // (and do these appropriately especially in simultaneous games). + virtual void UndoAction(Player player, Action action) { + SpielFatalError("UndoAction function is not overridden; not undoing."); + } + + // Change the state of the game by applying the specified actions, one per + // player, for simultaneous action games. This function encodes the logic of + // the game rules. Element i of the vector is the action for player i. + // + // Every player must submit a action. If some of the players have no legal + // actions at this node, then 0 should be passed instead. + // + // Simultaneous games should implement DoApplyActions. + void ApplyActions(const std::vector& actions); + + // A helper version of ApplyActions that first does legality checks. + void ApplyActionsWithLegalityChecks(const std::vector& actions); + + + // The size of the action space. See `Game` for a full description. + int NumDistinctActions() const { return num_distinct_actions_; } + + // Returns the number of players in this game. + int NumPlayers() const { return num_players_; } + + // Get the game object that generated this state. + std::shared_ptr GetGame() const { return game_; } + + // Get the chance outcomes and their probabilities. + // + // Chance actions do not have a separate UID space from regular actions. + // + // Note: what is returned here depending on the game's chance_mode (in + // its GameType): + // - Option 1. kExplicit. All chance node outcomes are returned along with + // their respective probabilities. Then State::ApplyAction(...) is + // deterministic. + // - Option 2. kSampled. Return a dummy single action here with probability + // 1, and then State::ApplyAction(...) does the real sampling. In this + // case, the game has to maintain its own RNG. + virtual ActionsAndProbs ChanceOutcomes() const { + SpielFatalError("ChanceOutcomes unimplemented!"); + } + + // Lists the valid chance outcomes at the current state. + // Derived classes may substitute this with a more efficient implementation. + virtual std::vector LegalChanceOutcomes() const { + ActionsAndProbs outcomes_with_probs = ChanceOutcomes(); + std::vector outcome_list; + outcome_list.reserve(outcomes_with_probs.size()); + for (auto& pair : outcomes_with_probs) { + outcome_list.push_back(pair.first); + } + return outcome_list; + } + + // Returns the type of the state. Either Chance, Terminal, MeanField or + // Decision. See StateType definition for definitions of the different types. + StateType GetType() const; + + // Serializes a state into a string. + // + // The default implementation writes out a sequence of actions, one per line, + // taken from the initial state. Note: this default serialization scheme will + // not work games whose chance mode is kSampledStochastic, as there is + // currently no general way to set the state's seed to ensure that it samples + // the same chance event at chance nodes. + // + // If overridden, this must be the inverse of Game::DeserializeState. + virtual std::string Serialize() const; + + // Resamples a new history from the information state from player_id's view. + // This resamples a private for the other players, but holds player_id's + // privates constant, and the public information constant. + // The privates are sampled uniformly at each chance node. For games with + // partially-revealed actions that require some policy, we sample uniformly + // from the list of actions that are consistent with what player_id observed. + // For rng, we need something that returns a double in [0, 1). This value will + // be interpreted as a cumulative distribution function, and will be used to + // sample from the legal chance actions. A good choice would be + // absl/std::uniform_real_distribution(0., 1.). + // + // Default implementation checks if the game is a perfect information game. + // If so, it returns a clone, otherwise an error is thrown. + virtual std::unique_ptr ResampleFromInfostate( + int player_id, std::function rng) const; + + // Returns a vector of states & probabilities that are consistent with the + // infostate from the view of the current player. By default, this is not + // implemented and returns an empty list. This doesn't make any attempt to + // correct for the opponent's policy in the probabilities, and so this is + // wrong for any state that's not the first non-chance node. + virtual std::unique_ptr + GetHistoriesConsistentWithInfostate(int player_id) const { + return {}; + } + + virtual std::unique_ptr + GetHistoriesConsistentWithInfostate() const { + return GetHistoriesConsistentWithInfostate(CurrentPlayer()); + } + + // Returns a vector of all actions that are consistent with the information + // revealed by taking action. E.g. in Poker, this does nothing but return the + // current action as poker only has public actions. In a game like Battleship, + // where the placement phase is hidden, this would return all possible + // placements. + virtual std::vector ActionsConsistentWithInformationFrom( + Action action) const { + SpielFatalError( + "ActionsConsistentWithInformationFrom has not been implemented."); + return {}; + } + + // These functions only apply to mean field games. + // Mean field game support in open_spiel is experimental, and these functions + // are subject to change. + + // At the current mean field node, the support of the state distribution that + // needs to be updated. States are identified by their corresponding string + // representation. In multi-population mean field nodes, the support will + // typically include states for all the populations. + // This should only be called when when CurrentPlayer() == kMeanFieldPlayerId. + // This can return an empty list in case the distribution is not needed at + // this time. + virtual std::vector DistributionSupport() { + SpielFatalError("DistributionSupport has not been implemented"); + } + // Update the state distribution. `distribution[i]` must correspond to + // `DistributionSupport()[i]`. After this is called, the state will be of + // Chance type. + // This should only be called when when CurrentPlayer() == kMeanFieldPlayerId. + virtual void UpdateDistribution(const std::vector& distribution) { + SpielFatalError("UpdateDistribution has not been implemented"); + } + + // Only makes sense for mean field games. This is the population a state + // belongs to. It returns 0 by default, so multi-population mean field games + // should override this function. + virtual int MeanFieldPopulation() const; + + protected: + // See ApplyAction. + virtual void DoApplyAction(Action action_id) { + SpielFatalError("DoApplyAction is not implemented."); + } + // See ApplyActions. + virtual void DoApplyActions(const std::vector& actions) { + SpielFatalError("DoApplyActions is not implemented."); + } + + // The game that created this state, plus some static information about it, + // cached here for efficient access. + const std::shared_ptr game_; + const int num_distinct_actions_; + const int num_players_; + + // Information that changes over the course of the game. + std::vector history_; + int move_number_; +}; + +std::ostream& operator<<(std::ostream& stream, const State& state); + +// A class that refers to a particular game instantiation, for example +// Breakthrough(8x8). +// +// Important note: Game objects cannot be instantiated on the stack or via +// unique_ptr, because shared pointers to the game object must be sent down to +// the states that created them. So, they *must* be created via +// shared_ptr or via the LoadGame methods. +class Game : public std::enable_shared_from_this { + public: + virtual ~Game() = default; + Game(const Game&) = delete; + Game& operator=(const Game&) = delete; + + // Maximum number of distinct actions in the game for any one player. This is + // not the same as max number of legal actions in any state as distinct + // actions are independent of the context (state), and often independent of + // the player as well. So, for instance in Tic-Tac-Toe this value is 9, one + // for each square. In games where pieces move, like e.g. Breakthrough, then + // it would be 64*6*2, since from an 8x8 board a single piece could only ever + // move to at most 6 places, and it can be a regular move or a capture move. + // Note: chance node outcomes are not included in this count. + // For example, this would correspond to the size of the policy net head + // learning which move to play. + virtual int NumDistinctActions() const = 0; + + // Returns a newly allocated initial state. + virtual std::unique_ptr NewInitialState() const = 0; + + // Return a new state from a string description. This is an unspecified and + // unrestricted function to construct a new state from a string. + virtual std::unique_ptr NewInitialState(const std::string& str) const { + SpielFatalError("NewInitialState from string is not implemented."); + } + + // Returns newly allocated initial states. In most cases, this will be a + // single state. + // Games with multi-population mean field dynamics have multiple initial + // states, one per population. In that case, N initial states will be + // returned, from population 0 to population N-1 (where N is the number of + // populations, which is equal to the number of players). + virtual std::vector> NewInitialStates() const; + + // Maximum number of distinct chance outcomes for chance nodes in the game. + virtual int MaxChanceOutcomes() const { return 0; } + + // If the game is parameterizable, returns an object with the current + // parameter values, including defaulted values. Returns empty parameters + // otherwise. + GameParameters GetParameters() const { + absl::MutexLock lock(&mutex_defaulted_parameters_); + GameParameters params = game_parameters_; + params.insert(defaulted_parameters_.begin(), defaulted_parameters_.end()); + return params; + } + + // The number of players in this instantiation of the game. + // Does not include the chance-player. + virtual int NumPlayers() const = 0; + + // Utility range. These functions define the lower and upper bounds on the + // values returned by State::PlayerReturn(Player player) over all valid player + // numbers. This range should be as tight as possible; the intention is to + // give some information to algorithms that require it, and so their + // performance may suffer if the range is not tight. Loss/draw/win outcomes + // are common among games and should use the standard values of {-1,0,1}. + virtual double MinUtility() const = 0; + virtual double MaxUtility() const = 0; + + // Static information on the game type. This should match the information + // provided when registering the game. + const GameType& GetType() const { return game_type_; } + + // The total utility for all players, if this is a constant-sum-utility game. + // Should return 0 if the game is zero-sum. + virtual absl::optional UtilitySum() const { return absl::nullopt; } + + // Describes the structure of the information state representation in a + // tensor-like format. This is especially useful for experiments involving + // reinforcement learning and neural networks. Note: the actual information is + // returned in a 1-D vector by State::InformationStateTensor - + // see the documentation of that function for details of the data layout. + virtual std::vector InformationStateTensorShape() const { + SpielFatalError("InformationStateTensorShape unimplemented."); + } + virtual TensorLayout InformationStateTensorLayout() const { + return TensorLayout::kCHW; + } + + // The size of the (flat) vector needed for the information state tensor-like + // format. + int InformationStateTensorSize() const { + std::vector shape = InformationStateTensorShape(); + return shape.empty() ? 0 + : absl::c_accumulate(shape, 1, std::multiplies()); + } + + // Describes the structure of the observation representation in a + // tensor-like format. This is especially useful for experiments involving + // reinforcement learning and neural networks. Note: the actual observation is + // returned in a 1-D vector by State::ObservationTensor - + // see the documentation of that function for details of the data layout. + virtual std::vector ObservationTensorShape() const { + SpielFatalError("ObservationTensorShape unimplemented."); + } + virtual TensorLayout ObservationTensorLayout() const { + return TensorLayout::kCHW; + } + + // The size of the (flat) vector needed for the observation tensor-like + // format. + int ObservationTensorSize() const { + std::vector shape = ObservationTensorShape(); + return shape.empty() ? 0 + : absl::c_accumulate(shape, 1, std::multiplies()); + } + + // Describes the structure of the policy representation in a + // tensor-like format. This is especially useful for experiments involving + // reinforcement learning and neural networks. Note: the actual policy is + // expected to be in the shape of a 1-D vector. + virtual std::vector PolicyTensorShape() const { + return {NumDistinctActions()}; + } + + // Returns a newly allocated state built from a string. Caller takes ownership + // of the state. + // + // The default implementation assumes a sequence of actions, one per line, + // that is taken from the initial state. + // + // If this method is overridden, then it should be the inverse of + // State::Serialize (i.e. that method should also be overridden). + virtual std::unique_ptr DeserializeState(const std::string& str) const; + + // The maximum length of any one game (in terms of number of decision nodes + // visited in the game tree). For a simultaneous action game, this is the + // maximum number of joint decisions. In a turn-based game, this is the + // maximum number of individual decisions summed over all players. Outcomes + // of chance nodes are not included in this length. + virtual int MaxGameLength() const = 0; + + // The maximum number of chance nodes occurring in any history of the game. + // This is typically something like the number of times dice are rolled. + virtual int MaxChanceNodesInHistory() const { + if (GetType().chance_mode == GameType::ChanceMode::kDeterministic) { + return 0; + } + SpielFatalError("MaxChanceNodesInHistory() is not implemented"); + } + + // The maximum number of moves in the game. The value State::MoveNumber() + // must never be higher than this value. + virtual int MaxMoveNumber() const { + return MaxGameLength() + MaxChanceNodesInHistory(); + } + + // The maximum length of any history in the game. + // The value State::History().size() must never be higher than this value. + virtual int MaxHistoryLength() const { + if (GetType().dynamics == GameType::Dynamics::kSimultaneous) { + // The history of simultaneous move games is flattened, so count number + // of actions of each player. + return MaxGameLength() * NumPlayers() + MaxChanceNodesInHistory(); + } + if (GetType().dynamics == GameType::Dynamics::kSequential) { + return MaxGameLength() + MaxChanceNodesInHistory(); + } + SpielFatalError("Unknown game dynamics."); + } + + // A string representation of the game, which can be passed to + // DeserializeGame. The difference with Game::ToString is that it also + // serializes internal RNG state used with sampled stochastic game + // implementations. + std::string Serialize() const; + + // A string representation of the game, which can be passed to LoadGame. + std::string ToString() const; + + // Returns true if these games are equal, false otherwise. + virtual bool operator==(const Game& other) const { + // GetParameters() includes default values. So comparing GetParameters + // instead of game_parameters_ makes sure that game equality is independent + // of the presence of explicitly passed game parameters with default values. + return game_type_.short_name == other.game_type_.short_name && + GetParameters() == other.GetParameters(); + } + + // Get and set game's internal RNG state for de/serialization purposes. These + // two methods only need to be overridden by sampled stochastic games that + // need to hold an RNG state. Note that stateful game implementations are + // discouraged in general. + virtual std::string GetRNGState() const { + SpielFatalError("GetRNGState unimplemented."); + } + // SetRNGState is const despite the fact that it changes game's internal + // state. Sampled stochastic games need to be explicit about mutability of the + // RNG, i.e. have to use the mutable keyword. + virtual void SetRNGState(const std::string& rng_state) const { + SpielFatalError("SetRNGState unimplemented."); + } + + // Returns an Observer, used to obtain observations of the game state. + // If the requested iig_obs_type is not supported by the game, the + // implementation must return a nullptr. If params are provided and + // unsupported this can result in an error. + // The observations are created according to requested observation type. + // Games can include additional observation fields when requested by + // `params`. + // See `observer.h` for further information. + virtual std::shared_ptr MakeObserver( + absl::optional iig_obs_type, + const GameParameters& params) const; + + // Returns a string representation of the specified action for the player, + // independent of the state. + virtual std::string ActionToString(Player player, Action action_id) const { + return absl::StrCat("Action(id=", action_id, ", player=", player, ")"); + } + + // Returns an observer that was registered, based on its name. + std::shared_ptr MakeRegisteredObserver( + absl::optional iig_obs_type, + const GameParameters& params) const; + // Returns an observer that uses the observation or informationstate tensor + // or string as defined directly on the state. Returns a nullptr if the + // requested iig_obs_type is not supported. + std::shared_ptr MakeBuiltInObserver( + absl::optional iig_obs_type) const; + + // Public member functions below only apply to games with mean field dynamics. + + // Creates a new initial state for the given population (which must be in [0, + // NumPlayers())). This must be implemented for multi-population mean field + // games. + virtual std::unique_ptr NewInitialStateForPopulation( + int population) const { + SpielFatalError("NewInitialStateForPopulation is not implemented."); + } + + protected: + Game(GameType game_type, GameParameters game_parameters) + : game_type_(game_type), game_parameters_(game_parameters) {} + + // Access to game parameters. Returns the value provided by the user. If not: + // - Defaults to the value stored as the default in + // game_type.parameter_specification if the `default_value` is absl::nullopt + // - Returns `default_value` if provided. + template + T ParameterValue(const std::string& key, + absl::optional default_value = absl::nullopt) const { + // Return the value if found. + auto iter = game_parameters_.find(key); + if (iter != game_parameters_.end()) { + return iter->second.value(); + } + + // Pick the defaulted value. + GameParameter default_game_parameter; + if (default_value.has_value()) { + default_game_parameter = GameParameter(default_value.value()); + } else { + auto default_iter = game_type_.parameter_specification.find(key); + if (default_iter == game_type_.parameter_specification.end()) { + SpielFatalError(absl::StrCat("The parameter for ", key, + " is missing in game ", ToString())); + } + default_game_parameter = default_iter->second; + } + + // Return the default value, storing it. + absl::MutexLock lock(&mutex_defaulted_parameters_); + iter = defaulted_parameters_.find(key); + if (iter == defaulted_parameters_.end()) { + // We haven't previously defaulted this value, so store the default we + // used. + defaulted_parameters_[key] = default_game_parameter; + } else { + // Already defaulted, so check we are being consistent. + // Using different default values at different times means the game isn't + // well-defined. + if (default_game_parameter != iter->second) { + SpielFatalError(absl::StrCat("Parameter ", key, " is defaulted to ", + default_game_parameter.ToReprString(), + " having previously been defaulted to ", + iter->second.ToReprString(), " in game ", + ToString())); + } + } + return default_game_parameter.value(); + } + + // The game type. + GameType game_type_; + + // Any parameters supplied when constructing the game. + GameParameters game_parameters_; + + // Track the parameters for which a default value has been used. This + // enables us to report the actual value used for every parameter. + mutable GameParameters defaulted_parameters_ + ABSL_GUARDED_BY(mutex_defaulted_parameters_); + mutable absl::Mutex mutex_defaulted_parameters_; +}; + +#define CONCAT_(x, y) x##y +#define CONCAT(x, y) CONCAT_(x, y) +#define REGISTER_SPIEL_GAME(info, factory) \ + GameRegisterer CONCAT(game, __COUNTER__)(info, factory); + +class GameRegisterer { + public: + using CreateFunc = + std::function(const GameParameters& params)>; + + GameRegisterer(const GameType& game_type, CreateFunc creator); + + static std::shared_ptr CreateByName(const std::string& short_name, + const GameParameters& params); + + static std::vector GamesWithKnownIssues(); + static std::vector RegisteredNames(); + static std::vector RegisteredConcreteNames(); + static std::vector RegisteredGames(); + static std::vector RegisteredConcreteGames(); + static bool IsValidName(const std::string& short_name); + static void RegisterGame(const GameType& game_type, CreateFunc creator); + + private: + // Returns a "global" map of registrations (i.e. an object that lives from + // initialization to the end of the program). Note that we do not just use + // a static data member, as we want the map to be initialized before first + // use. + static std::map>& factories() { + static std::map> impl; + return impl; + } + + static std::vector GameTypesToShortNames( + const std::vector& game_types); +}; + +// Returns true if the game is registered, false otherwise. +bool IsGameRegistered(const std::string& short_name); + +// Returns a list of registered games' short names. +std::vector RegisteredGames(); + +// Returns a list of registered game types. +std::vector RegisteredGameTypes(); + +std::shared_ptr DeserializeGame(const std::string& serialized); + +// Returns a new game object from the specified string, which is the short +// name plus optional parameters, e.g. "go(komi=4.5,board_size=19)" +std::shared_ptr LoadGame(const std::string& game_string); + +// Returns a new game object with the specified parameters. +std::shared_ptr LoadGame(const std::string& short_name, + const GameParameters& params); + +// Returns a new game object with the specified parameters; reads the name +// of the game from the 'name' parameter (which is not passed to the game +// implementation). +std::shared_ptr LoadGame(GameParameters params); + +// Normalize a policy into a proper discrete distribution where the +// probabilities sum to 1. +void NormalizePolicy(ActionsAndProbs* policy); + +// Used to sample a policy or chance outcome distribution. +// Probabilities of the actions must sum to 1. +// The parameter z should be a sample from a uniform distribution on the range +// [0, 1). Returns the sampled action and its probability. +std::pair SampleAction(const ActionsAndProbs& outcomes, + double z); +std::pair SampleAction(const ActionsAndProbs& outcomes, + absl::BitGenRef rng); + +// Serialize the game and the state into one self-contained string that can +// be reloaded via open_spiel::DeserializeGameAndState. +// +// The format of the string is the following (contains three sections, +// marked by single-line headers in square brackets with specific keywords), +// see below. The meta section contains general info. The game string is +// parsed using LoadGame(string) and the state section is parsed using +// Game::DeserializeState. +// +// Example file contents: +// +// # Comments are ok, but hash '#' must be first chatacter in the line. +// # Blank lines and lines that start with hash '#' are ignored +// [Meta] +// Version: +// +// [Game] +// +// +// [State] +// +std::string SerializeGameAndState(const Game& game, const State& state); + +// A general deserialization which reconstructs both the game and the state, +// which have been saved using the default simple implementation in +// SerializeGameAndState. The game must be registered so that it is loadable via +// LoadGame. +// +// The state string must have a specific format. See +// Game::SerializeGameAndState for a description of the saved format. +// +// Note: This serialization scheme will not work for games whose chance mode is +// kSampledStochastic, as there is currently no general way to set the state's +// seed. +std::pair, std::unique_ptr> +DeserializeGameAndState(const std::string& serialized_state); + +// Convert GameTypes from and to strings. Used for serialization of objects +// that contain them. +// Note: these are not finished! They will be finished by an external +// contributor. See https://github.com/deepmind/open_spiel/issues/234 for +// details. +std::string GameTypeToString(const GameType& game_type); +GameType GameTypeFromString(const std::string& game_type_str); + +std::ostream& operator<<(std::ostream& os, const State::PlayerAction& action); + +// Utility functions used mostly for debugging. This calls State::ActionToString +// for every action. +std::vector ActionsToStrings(const State& state, + const std::vector& actions); + +// Calls ActionsToStrings and then calls absl::StrJoin to concatenate all the +// strings together. +std::string ActionsToString(const State& state, + const std::vector& actions); + +// A utility to broadcast an error message with game and state info. +// It is a wrapper around SpielFatalError and meant to facilitate debugging. +void SpielFatalErrorWithStateInfo(const std::string& error_msg, + const Game& game, + const State& state); + + +// Builds the state from a history string. Checks legalities of every action +// on the way. The history string is a comma-separated actions with whitespace +// allowed, and can include square brackets on either side: +// E.g. "[1, 3, 4, 5, 6]" and "57,12,72,85" are both valid. +// Proceeds up to a maximum of max_steps, unless max_steps is negative, in +// which case it proceeds until the end of the sequence. +std::pair, + std::unique_ptr> BuildStateFromHistoryString( + const std::string& game_string, const std::string& history, + int max_steps = -1); + +} // namespace open_spiel + +#endif // OPEN_SPIEL_SPIEL_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/spiel_bots.cc b/scenarios/bargaining/open_spiel/open_spiel/spiel_bots.cc new file mode 100644 index 0000000..11dfe4e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/spiel_bots.cc @@ -0,0 +1,383 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel_bots.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/distributions.h" +#include "open_spiel/abseil-cpp/absl/random/random.h" +#include "open_spiel/abseil-cpp/absl/random/uniform_int_distribution.h" +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { + +class UniformRandomBot : public Bot { + public: + UniformRandomBot(Player player_id, int seed) + : player_id_(player_id), rng_(seed) {} + ~UniformRandomBot() = default; + + void RestartAt(const State&) override {} + Action Step(const State& state) override { + return StepWithPolicy(state).second; + } + bool ProvidesPolicy() override { return true; } + ActionsAndProbs GetPolicy(const State& state) override { + ActionsAndProbs policy; + auto legal_actions = state.LegalActions(player_id_); + const int num_legal_actions = legal_actions.size(); + const double p = 1.0 / num_legal_actions; + for (auto action : legal_actions) policy.emplace_back(action, p); + return policy; + } + + std::pair StepWithPolicy( + const State& state) override { + ActionsAndProbs policy = GetPolicy(state); + const int num_legal_actions = policy.size(); + + int selection = + absl::uniform_int_distribution(0, num_legal_actions - 1)(rng_); + return std::make_pair(policy, policy[selection].first); + } + + bool IsClonable() const override { return true; } + std::unique_ptr Clone() override { + return std::make_unique(*this); + } + UniformRandomBot(const UniformRandomBot& other) = default; + + private: + const Player player_id_; + std::mt19937 rng_; +}; + +// A UniformRandomBot that keeps a copy of the state up to date. This exists +// primarily to verify that InformAction is called correctly by the run loop. +class StatefulRandomBot : public UniformRandomBot { + public: + StatefulRandomBot(const Game& game, Player player_id, int seed) + : UniformRandomBot(player_id, seed), state_(game.NewInitialState()) {} + + void Restart() override { state_ = state_->GetGame()->NewInitialState(); } + void RestartAt(const State& state) override { state_ = state.Clone(); } + void InformAction(const State& state, Player player_id, + Action action) override { + CheckStatesEqual(state, *state_); + state_->ApplyAction(action); + } + ActionsAndProbs GetPolicy(const State& state) override { + CheckStatesEqual(state, *state_); + return UniformRandomBot::GetPolicy(*state_); + } + std::pair StepWithPolicy( + const State& state) override { + std::pair ret = + UniformRandomBot::StepWithPolicy(*state_); + state_->ApplyAction(ret.second); + return ret; + } + + std::unique_ptr Clone() override { + return std::make_unique(*this); + } + StatefulRandomBot(const StatefulRandomBot& other) + : UniformRandomBot(other), state_(other.state_->Clone()) {} + + private: + void CheckStatesEqual(const State& state1, const State& state2) const { + SPIEL_CHECK_EQ(state1.History(), state2.History()); + SPIEL_CHECK_EQ(state1.CurrentPlayer(), state2.CurrentPlayer()); + SPIEL_CHECK_EQ(state1.LegalActions(), state2.LegalActions()); + if (!state1.IsChanceNode()) { + SPIEL_CHECK_EQ(state1.ObservationTensor(), state2.ObservationTensor()); + } + } + std::unique_ptr state_; +}; + +class PolicyBot : public Bot { + public: + PolicyBot(int seed, std::shared_ptr policy) + : Bot(), rng_(seed), policy_(std::move(policy)) {} + ~PolicyBot() = default; + + void RestartAt(const State&) override {} + Action Step(const State& state) override { + return StepWithPolicy(state).second; + } + bool ProvidesPolicy() override { return true; } + ActionsAndProbs GetPolicy(const State& state) override { + return policy_->GetStatePolicy(state); + } + + std::pair StepWithPolicy( + const State& state) override { + ActionsAndProbs actions_and_probs = GetPolicy(state); + return {actions_and_probs, SampleAction(actions_and_probs, rng_).first}; + } + + bool IsClonable() const override { return true; } + std::unique_ptr Clone() override { + return std::make_unique(*this); + } + PolicyBot(const PolicyBot& other) = default; + + private: + std::mt19937 rng_; + std::shared_ptr policy_; +}; + +class FixedActionPreferenceBot : public Bot { + public: + FixedActionPreferenceBot(Player player_id, const std::vector& actions) + : Bot(), player_id_(player_id), actions_(actions) {} + ~FixedActionPreferenceBot() = default; + + void RestartAt(const State&) override {} + Action Step(const State& state) override { + return StepWithPolicy(state).second; + } + bool ProvidesPolicy() override { return true; } + ActionsAndProbs GetPolicy(const State& state) override { + std::vector legal_actions = state.LegalActions(player_id_); + std::unordered_set legal_actions_set = + std::unordered_set(legal_actions.begin(), legal_actions.end()); + for (Action action : actions_) { + if (legal_actions_set.count(action) == 1) { + return {{action, 1.0}}; + } + } + SpielFatalError("No legal actions in action list."); + } + + std::pair StepWithPolicy( + const State& state) override { + ActionsAndProbs actions_and_probs = GetPolicy(state); + return {actions_and_probs, actions_and_probs[0].first}; + } + + bool IsClonable() const override { return true; } + std::unique_ptr Clone() override { + return std::make_unique(*this); + } + FixedActionPreferenceBot(const FixedActionPreferenceBot& other) = default; + + private: + const Player player_id_; + std::vector actions_; +}; + +} // namespace + +// A uniform random bot, for test purposes. +std::unique_ptr MakeUniformRandomBot(Player player_id, int seed) { + return std::make_unique(player_id, seed); +} +namespace { +class UniformRandomBotFactory : public BotFactory { + public: + ~UniformRandomBotFactory() = default; + + bool CanPlayGame(const Game& game, Player player_id) const override { + return true; + } + std::unique_ptr Create(std::shared_ptr game, + Player player_id, + const GameParameters& bot_params) const override { + int seed = 0; + if (IsParameterSpecified(bot_params, "seed")) { + const GameParameter& seed_param = bot_params.at("seed"); + seed = seed_param.int_value(); + } else { + absl::BitGen gen; + seed = absl::Uniform(gen, std::numeric_limits::min(), + std::numeric_limits::max()); + } + return MakeUniformRandomBot(player_id, seed); + } +}; +REGISTER_SPIEL_BOT("uniform_random", UniformRandomBotFactory); +} // namespace + +// A bot that samples from a policy. +std::unique_ptr MakePolicyBot(int seed, std::shared_ptr policy) { + return std::make_unique(seed, std::move(policy)); +} +std::unique_ptr MakePolicyBot(const Game& game, Player player_id, int seed, + std::shared_ptr policy) { + return MakePolicyBot(seed, std::move(policy)); +} +// A bot with a fixed action preference, for test purposes. +// Picks the first legal action found in the list of actions. +std::unique_ptr MakeFixedActionPreferenceBot( + Player player_id, const std::vector& actions) { + return std::make_unique(player_id, actions); +} +namespace { +std::vector ActionsFromStr(const absl::string_view& str, + const absl::string_view& delim) { + std::vector actions; + for (absl::string_view token : absl::StrSplit(str, delim)) { + int v; + SPIEL_CHECK_TRUE(absl::SimpleAtoi(token, &v)); + actions.push_back(v); + } + return actions; +} + +class FixedActionPreferenceFactory : public BotFactory { + public: + ~FixedActionPreferenceFactory() = default; + + bool CanPlayGame(const Game& game, Player player_id) const override { + return true; + } + std::unique_ptr Create(std::shared_ptr game, + Player player_id, + const GameParameters& bot_params) const override { + std::vector actions{0, 1, 2, 3, 4, 5, 6, 7}; + if (IsParameterSpecified(bot_params, "actions")) { + const GameParameter& actions_param = bot_params.at("actions"); + actions = ActionsFromStr(actions_param.string_value(), ":"); + } + return MakeFixedActionPreferenceBot(player_id, actions); + } +}; +REGISTER_SPIEL_BOT("fixed_action_preference", FixedActionPreferenceFactory); +} // namespace + +std::unique_ptr MakeStatefulRandomBot(const Game& game, Player player_id, + int seed) { + return std::make_unique(game, player_id, seed); +} + +BotRegisterer::BotRegisterer(const std::string& bot_name, + std::unique_ptr factory) { + RegisterBot(bot_name, std::move(factory)); +} + +std::unique_ptr BotRegisterer::CreateByName( + const std::string& bot_name, std::shared_ptr game, + Player player_id, const GameParameters& params) { + auto iter = factories().find(bot_name); + if (iter == factories().end()) { + SpielFatalError(absl::StrCat("Unknown bot '", bot_name, + "'. Available bots are:\n", + absl::StrJoin(RegisteredBots(), "\n"))); + + } else { + const std::unique_ptr& factory = iter->second; + return factory->Create(std::move(game), player_id, params); + } +} + +std::vector BotRegisterer::BotsThatCanPlayGame(const Game& game, + Player player_id) { + std::vector bot_names; + for (const auto& key_val : factories()) { + if (key_val.second->CanPlayGame(game, player_id)) { + bot_names.push_back(key_val.first); + } + } + return bot_names; +} + +std::vector BotRegisterer::BotsThatCanPlayGame(const Game& game) { + std::vector bot_names; + for (const auto& key_val : factories()) { + bool can_play_for_all = true; + for (int player_id = 0; player_id < game.NumPlayers(); ++player_id) { + if (!key_val.second->CanPlayGame(game, player_id)) { + can_play_for_all = false; + break; + } + } + if (can_play_for_all) bot_names.push_back(key_val.first); + } + return bot_names; +} + +void BotRegisterer::RegisterBot(const std::string& bot_name, + std::unique_ptr factory) { + factories()[bot_name] = std::move(factory); +} + +std::vector BotRegisterer::RegisteredBots() { + std::vector names; + for (const auto& key_val : factories()) names.push_back(key_val.first); + return names; +} + +std::vector RegisteredBots() { + return BotRegisterer::RegisteredBots(); +} + +bool BotRegisterer::IsBotRegistered(const std::string& bot_name) { + return factories().find(bot_name) != factories().end(); +} + +bool IsBotRegistered(const std::string& bot_name) { + return BotRegisterer::IsBotRegistered(bot_name); +} + +std::unique_ptr LoadBot(const std::string& bot_name, + const std::shared_ptr& game, + Player player_id) { + GameParameters params = GameParametersFromString(bot_name); + + // We use the "name" parameter, as that is the "short_name", which is what we + // want. Otherwise, this will use the "long name", which includes the config. + // e.g. if the bot_name is "my_bot(parameter=value)", then we want the + // bot_name here to be "my_bot", not "my_bot(parameter=value)". + return LoadBot(params["name"].string_value(), game, player_id, params); +} + +std::unique_ptr LoadBot(const std::string& bot_name, + const std::shared_ptr& game, + Player player_id, const GameParameters& params) { + std::unique_ptr result = + BotRegisterer::CreateByName(bot_name, game, player_id, params); + if (result == nullptr) { + SpielFatalError(absl::StrCat("Unable to create bot: ", bot_name)); + } + return result; +} + +std::vector BotsThatCanPlayGame(const Game& game, + Player player_id) { + return BotRegisterer::BotsThatCanPlayGame(game, player_id); +} + +std::vector BotsThatCanPlayGame(const Game& game) { + return BotRegisterer::BotsThatCanPlayGame(game); +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/spiel_bots.h b/scenarios/bargaining/open_spiel/open_spiel/spiel_bots.h new file mode 100644 index 0000000..3a28f91 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/spiel_bots.h @@ -0,0 +1,281 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_SPIEL_BOTS_H_ +#define OPEN_SPIEL_SPIEL_BOTS_H_ + +#include +#include +#include +#include + +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// Bots are things which can play a game. Here we define the interface for +// various features of a bot, and some trivial uniform and fixed action bots. + +// Different use-cases include: +// - play a bot versus another bot (we just need an action). This should be +// general enough to support simultaneous games (in which case the bot needs +// to know a player_id). +// - restart the game (to the initial state, and an arbitrary state) +// - interact with a bot and study its behavior, for example by looking at its +// policy in specific states, or by accessing its action distribution. This +// implies being able to set the bot into a specific game state. + +// Bots can differ, in particular with respect to: +// +// 1. Bot determinism. +// - deterministic: the (state -> action) suggestion is deterministic +// - Explicit Stochastic: the (state-> distribution over actions) is +// deterministic and the bot exposes it +// - Implicitly stochastic: even though the (state -> actions distribution) +// may exist in theory, it's intractable or not implemented. Thus, the +// (state -> action) suggestion is stochastic. +// +// 2. Bot statefulness. A bot can be stateless, or stateful (the policy can +// depend on the history of states, observations and/or actions). + +namespace open_spiel { + +// A simple bot that can play moves and be restarted. The bot may be stateful, +// thus, one should restart it to provide states from a different history line. +// +// For simultaneous games, or for bots playing as a single player, the +// implementation should take the player_id in the constructor. +// +// Optionally, the Bot can provide additional functionality (see +// `IsOverridable` and `ProvidesPolicy`). +// In Python, the simplest way to implement such a bot is: +// +// class MyBot(pyspiel.Bot): +// +// def __init__(self): +// pyspiel.Bot.__init__(self) +// # If you do implement get_policy and step_with_policy +// def provides_force_action(self): +// return True +// def force_action(self, state, action): +// ... +class Bot { + public: + // Constructs a Bot that only supports `Step` and `Restart` (maybe RestartAt). + virtual ~Bot() = default; + + // Asks the bot to decide on an action to play. The bot should be able to + // safely assumes the action was played. + virtual Action Step(const State& state) = 0; + + // Same as Action except the bot is given the opportunity to return verbose + // output. This will allow callers of `StepVerbose` to log information about + // the action for bots that support this function. + virtual std::pair StepVerbose(const State& state) { + return {Step(state), ""}; + } + + // Let the bot know that a different player made an action at a given state. + // + // The state is the state at which the `player_id` player decided to take + // the given `action` (but before it is applied to the state). Some usage + // example looks like: + // + // Player current_player = state->CurrentPlayer(); + // Action action = bots[current_player]->Step(*state); + // for (Player p = 0; p < num_players; ++p) { + // if (p != current_player) { + // bots[p]->InformAction(*state, current_player, action); + // } + // } + // state->ApplyAction(action); # We apply the action after informing bots. + // + // This is useful for stateful bots so they know that the state of the game + // has advanced. This should not be called for the bot that generated the + // action as it already knows the action it took. As most bots are not + // stateful, the default implementation is a no-op. + // This is more explicit and less error prone than having bots inspect and + // potentially replay the history of actions. + virtual void InformAction(const State& state, Player player_id, + Action action) {} + // In simultaneous move games the bot receives a vector containing the + // actions taken by all players in the given state. + virtual void InformActions(const State& state, + const std::vector& actions) {} + + // Restarts the bot to its initial state, ready to start a new trajectory. + virtual void Restart() {} + // Configure the bot to be on the given `state` which can be arbitrary. + // Bot not supporting this feature can raise an error. + virtual void RestartAt(const State& state) { + SpielFatalError("RestartAt(state) not implemented."); + } + + // Returns `true` if it is possible to force the Bot to take a specific + // action on playable states. In case of a stateful bot, it should correctly + // update its internal state. + virtual bool ProvidesForceAction() { return false; } + // Notifies the bot that it should consider that it took action action in + // the given state. + virtual void ForceAction(const State& state, Action action) { + if (ProvidesForceAction()) { + SpielFatalError( + "ForceAction not implemented but should because the bot is " + "registered as overridable."); + } else { + SpielFatalError( + "ForceAction not implemented because the bot is not overridable"); + } + } + + // Extends a bot to support explicit stochasticity, meaning that it can + // return a distribution over moves. + virtual bool ProvidesPolicy() { return false; } + virtual ActionsAndProbs GetPolicy(const State& state) { + if (ProvidesPolicy()) { + SpielFatalError( + "GetPolicy not implemented but should because the bot is registered " + "as exposing its policy."); + } else { + SpielFatalError( + "GetPolicy not implemented because the bot is not exposing any " + "policy."); + } + } + virtual std::pair StepWithPolicy( + const State& state) { + if (ProvidesPolicy()) { + SpielFatalError( + "StepWithPolicy not implemented but should because the bot is " + "registered as exposing its policy."); + } else { + SpielFatalError( + "StepWithPolicy not implemented because the bot is not exposing any " + "policy."); + } + } + + // Creates a clone of the bot with an independent copy of its internal state. + // The original bot and the clone are completely independent. + // The Clone method should be as cheap to execute as possible. + // + // Important: the cloned bot must sample actions independently and differently + // from the original bot. I.e. if the bot uses any randomness controlling key, + // that key *must* be reseeded when cloning the bot. + // The typical use-case for cloning is generating multiple continuations + // of a game. The cloned bot should produce the same policy as the original + // bot, but there *must* be no correllation between action sampling of + // the original bot and its clone. + // Note that bot clones must also sample actions independently. + virtual bool IsClonable() const { return false; } + virtual std::unique_ptr Clone() { + SpielFatalError("Clone method not implemented."); + } +}; + +class BotFactory { + public: + virtual ~BotFactory() = default; + + // Asks the bot whether it can play the game as the given player. + virtual bool CanPlayGame(const Game& game, Player player_id) const = 0; + + // Creates an instance of the bot for a given game and a player + // for which it should play. + virtual std::unique_ptr Create( + std::shared_ptr game, Player player_id, + const GameParameters& bot_params) const = 0; +}; + +// A uniform random bot, for test purposes. +std::unique_ptr MakeUniformRandomBot(Player player_id, int seed); + +// A uniform random bot that takes actions based on its own copy of the state, +// for test purposes. +std::unique_ptr MakeStatefulRandomBot(const Game& game, Player player_id, + int seed); + +// A bot that samples from a policy. +std::unique_ptr MakePolicyBot(int seed, std::shared_ptr policy); +std::unique_ptr MakePolicyBot(const Game& game, Player player_id, int seed, + std::shared_ptr policy); + +// A bot with a fixed action preference, for test purposes. +// Picks the first legal action found in the list of actions. +std::unique_ptr MakeFixedActionPreferenceBot( + Player player_id, const std::vector& actions); + +#define REGISTER_SPIEL_BOT(info, factory) \ + BotRegisterer CONCAT(bot, __COUNTER__)(info, std::make_unique()); + +class BotRegisterer { + public: + BotRegisterer(const std::string& bot_name, + std::unique_ptr factory); + + static std::unique_ptr CreateByName(const std::string& bot_name, + std::shared_ptr game, + Player player_id, + const GameParameters& params); + static std::vector BotsThatCanPlayGame(const Game& game, + Player player_id); + static std::vector BotsThatCanPlayGame(const Game& game); + + static std::vector RegisteredBots(); + static bool IsBotRegistered(const std::string& bot_name); + static void RegisterBot(const std::string& bot_name, + std::unique_ptr factory); + + private: + // Returns a "global" map of registrations (i.e. an object that lives from + // initialization to the end of the program). Note that we do not just use + // a static data member, as we want the map to be initialized before first + // use. + static std::map>& factories() { + static std::map> impl; + return impl; + } +}; + +// Returns true if the bot is registered, false otherwise. +bool IsBotRegistered(const std::string& bot_name); + +// Returns a list of registered bots' short names. +std::vector RegisteredBots(); + +// Returns a list of registered bots' short names that can play specified game +// for a given player. +std::vector BotsThatCanPlayGame(const Game& game, + Player player_id); + +// Returns a list of registered bots' short names that can play specified game +// for any player. +std::vector BotsThatCanPlayGame(const Game& game); + +// Returns a new bot from the specified string, which is the short +// name plus optional parameters, e.g. +// "fixed_action_preference(action_list=0;1;2;3)" +std::unique_ptr LoadBot(const std::string& bot_name, + const std::shared_ptr& game, + Player player_id); + +// Returns a new bot with the specified parameters. +std::unique_ptr LoadBot(const std::string& bot_name, + const std::shared_ptr& game, + Player player_id, + const GameParameters& bot_params); + +} // namespace open_spiel + +#endif // OPEN_SPIEL_SPIEL_BOTS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/spiel_globals.h b/scenarios/bargaining/open_spiel/open_spiel/spiel_globals.h new file mode 100644 index 0000000..2296ca5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/spiel_globals.h @@ -0,0 +1,64 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_SPIEL_CONSTANTS_H_ +#define OPEN_SPIEL_SPIEL_CONSTANTS_H_ + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { + +// Player ids are 0, 1, 2, ... +// Negative numbers are used for various special values. +enum PlayerId { + // Player 0 is always valid, and is used in single-player games. + kDefaultPlayerId = 0, + // The fixed player id for chance/nature. + kChancePlayerId = -1, + // What is returned as a player id when the game is simultaneous. + kSimultaneousPlayerId = -2, + // Invalid player. + kInvalidPlayer = -3, + // What is returned as the player id on terminal nodes. + kTerminalPlayerId = -4, + // player id of a mean field node + kMeanFieldPlayerId = -5 +}; + +// Constant representing an invalid action. +inline constexpr Action kInvalidAction = -1; + +enum class StateType { + kTerminal, // If the state is terminal. + kChance, // If the player to act equals kChanceId. + kDecision, // If a player other than kChanceId (and kMeanField) is acting. + // The Mean Field state. We expect that logic external to the game will update + // the state distribution in each game state. See details in + // games/mfg/README.md. + kMeanField, +}; + +// Layouts for 3-D tensors. For 2-D tensors, we assume that the layout is a +// single spatial dimension and a channel dimension. If a 2-D tensor should be +// interpreted as a 2-D space, report it as 3-D with a channel dimension of +// size 1. We have no standard for higher-dimensional tensors. +enum class TensorLayout { + kHWC, // indexes are in the order (height, width, channels) + kCHW, // indexes are in the order (channels, height, width) +}; + + +} // namespace open_spiel + +#endif // OPEN_SPIEL_SPIEL_CONSTANTS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/spiel_utils.cc b/scenarios/bargaining/open_spiel/open_spiel/spiel_utils.cc new file mode 100644 index 0000000..5496483 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/spiel_utils.cc @@ -0,0 +1,201 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel_utils.h" + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" + + +namespace open_spiel { + +int NextPlayerRoundRobin(Player player, int nplayers) { + if (player + 1 < nplayers) { + return player + 1; + } else { + return 0; + } +} + +// Helper function to determine the previous player in a round robin. +int PreviousPlayerRoundRobin(Player player, int nplayers) { + if (player - 1 >= 0) { + return player - 1; + } else { + return nplayers - 1; + } +} + +// Used to convert actions represented as integers in mixed bases. +Action RankActionMixedBase(const std::vector& bases, + const std::vector& digits) { + SPIEL_CHECK_EQ(bases.size(), digits.size()); + SPIEL_CHECK_GT(digits.size(), 0); + + Action action = 0; + int one_plus_max = 1; + for (int i = digits.size() - 1; i >= 0; --i) { + SPIEL_CHECK_GE(digits[i], 0); + SPIEL_CHECK_LT(digits[i], bases[i]); + SPIEL_CHECK_GT(bases[i], 1); + action += digits[i] * one_plus_max; + one_plus_max *= bases[i]; + SPIEL_CHECK_LT(action, one_plus_max); + } + + return action; +} + +std::vector UnrankActionMixedBase(Action action, + const std::vector& bases) { + std::vector digits(bases.size()); + for (int i = digits.size() - 1; i >= 0; --i) { + SPIEL_CHECK_GT(bases[i], 1); + digits[i] = action % bases[i]; + action /= bases[i]; + } + SPIEL_CHECK_EQ(action, 0); + return digits; +} + +absl::optional FindFile(const std::string& filename, int levels) { + std::string candidate_filename = filename; + for (int i = 0; i <= levels; ++i) { + if (i == 0) { + std::ifstream file(candidate_filename.c_str()); + if (file.good()) { + return candidate_filename; + } + } else { + candidate_filename = "../" + candidate_filename; + std::ifstream file(candidate_filename.c_str()); + if (file.good()) { + return candidate_filename; + } + } + } + return absl::nullopt; +} + +std::string FormatDouble(double value) { + // We cannot use StrCat as that would default to exponential notation + // sometimes. For example, the default format of 10^-9 is the string + // "1e-9". For that reason, we use StrFormat with %f explicitly, and add + // the .0 if necessary (to clarify that it's a double value). + std::string double_str = absl::StrFormat("%.15f", value); + size_t idx = double_str.find('.'); + + if (double_str.find('.') == std::string::npos) { // NOLINT + absl::StrAppend(&double_str, ".0"); + } else { + // Remove the extra trailing zeros, if there are any. + while (double_str.length() > idx + 2 && double_str.back() == '0') { + double_str.pop_back(); + } + } + return double_str; +} + +void SpielDefaultErrorHandler(const std::string& error_msg) { + std::cerr << "Spiel Fatal Error: " << error_msg << std::endl + << std::endl + << std::flush; + std::exit(1); +} + +ErrorHandler error_handler = SpielDefaultErrorHandler; + +void SetErrorHandler(ErrorHandler new_error_handler) { + error_handler = new_error_handler; +} + +void SpielFatalError(const std::string& error_msg) { + error_handler(error_msg); + // The error handler should not return. If it does, we will abort the process. + std::cerr << "Error handler failure - exiting" << std::endl; + std::exit(1); +} + +std::ostream& operator<<(std::ostream& stream, const absl::nullopt_t& v) { + return stream << "(nullopt)"; +} + +void Normalize(absl::Span weights) { + SPIEL_CHECK_FALSE(weights.empty()); + const double normalizer = absl::c_accumulate(weights, 0.); + SPIEL_CHECK_FALSE(std::isnan(normalizer)); + const double uniform_prob = 1.0 / weights.size(); + absl::c_for_each(weights, [&](double& w) { + w = (normalizer == 0.0 ? uniform_prob : w / normalizer); + }); +} + +std::string BoolToStr(bool b) { return b ? "true" : "false"; } + +template +std::string VectorOfPairsToString(std::vector>& vec, + const std::string& delimiter, + const std::string& pair_delimiter) { + std::string str; + for (int i = 0; i < vec.size(); ++i) { + absl::StrAppend(&str, vec[i].first, pair_delimiter, vec[i].second); + if (i != vec.size() - 1) { + absl::StrAppend(&str, delimiter); + } + } + return str; +} + +// TODO(author5): remove this when the abseil version is upgraded. +bool StrContainsIgnoreCase(const std::string& haystack, + const std::string& needle) { + std::string haystack_copy = haystack; + std::string needle_copy = needle; + for (int i = 0; i < haystack_copy.size(); ++i) { + haystack_copy[i] = std::tolower(haystack_copy[i]); + } + for (int i = 0; i < needle_copy.size(); ++i) { + needle_copy[i] = std::tolower(needle_copy[i]); + } + return (haystack_copy.find(needle_copy) != std::string::npos); +} + +int SamplerFromRng::operator()(absl::Span probs) { + const float cutoff = rng_(); + float sum = 0.0f; + for (int i = 0; i < probs.size(); ++i) { + sum += probs[i]; + if (cutoff < sum) { + return i; + } + } + + // To be on the safe side, cover case cutoff == 1.0 and sum < 1 + for (int i = probs.size() - 1; i >= 0; --i) { + if (probs[i] > 0.0) return i; + } + + SpielFatalError("SamplerFromRng: not a probability distribution."); +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/spiel_utils.h b/scenarios/bargaining/open_spiel/open_spiel/spiel_utils.h new file mode 100644 index 0000000..217225e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/spiel_utils.h @@ -0,0 +1,443 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_SPIEL_UTILS_H_ +#define OPEN_SPIEL_SPIEL_UTILS_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/uniform_real_distribution.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/time/clock.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/abseil-cpp/absl/types/span.h" + +// Code that is not part of the API, but is widely useful in implementations. + +namespace open_spiel { + +// Generic ostream operator<< overloads for std:: containers. They have to be +// defined here before call sites because we cannot rely on argument-dependent +// lookup here since that requires putting these overloads into std::, which is +// not allowed (only template specializations on std:: template classes may be +// added to std::, and this is not one of them). + +// Make sure that arbitrary structures can be printed out. +template +std::ostream& operator<<(std::ostream& stream, const std::unique_ptr& v); +template +std::ostream& operator<<(std::ostream& stream, const std::pair& v); +template +std::ostream& operator<<(std::ostream& stream, const std::vector& v); +template +std::ostream& operator<<(std::ostream& stream, const std::array& v); +template +std::ostream& operator<<(std::ostream& stream, const absl::optional& v); +std::ostream& operator<<(std::ostream& stream, const absl::nullopt_t& v); +template +std::ostream& operator<<(std::ostream& stream, absl::Span v); + +// Actual template implementations. +template +std::ostream& operator<<(std::ostream& stream, absl::Span v) { + stream << "["; + for (const auto& element : v) { + stream << element << " "; + } + stream << "]"; + return stream; +} +template +std::ostream& operator<<(std::ostream& stream, const std::vector& v) { + return stream << absl::MakeSpan(v); +} +template +std::ostream& operator<<(std::ostream& stream, const std::array& v) { + stream << "["; + for (const auto& element : v) { + stream << element << " "; + } + stream << "]"; + return stream; +} +template +std::ostream& operator<<(std::ostream& stream, const std::unique_ptr& v) { + return stream << *v; +} +template +std::ostream& operator<<(std::ostream& stream, const absl::optional& v) { + return stream << *v; +} +template +std::ostream& operator<<(std::ostream& stream, const std::pair& v) { + stream << "(" << v.first << "," << v.second << ")"; + return stream; +} + +namespace internal { +// SpielStrOut(out, a, b, c) is equivalent to: +// out << a << b << c; +// It is used to enable SpielStrCat, below. +template +void SpielStrOut(Out& out, const T& arg) { + out << arg; +} + +template +void SpielStrOut(Out& out, const T& arg1, Args&&... args) { + out << arg1; + SpielStrOut(out, std::forward(args)...); +} + +// Builds a string from pieces: +// +// SpielStrCat(1, " + ", 1, " = ", 2) --> "1 + 1 = 2" +// +// Converting the parameters to strings is done using the stream operator<<. +// This is only kept around to be used in the SPIEL_CHECK_* macros and should +// not be called by any code outside of this file. Prefer absl::StrCat instead. +// It is kept here due to support for more types, including char. +template +std::string SpielStrCat(Args&&... args) { + std::ostringstream out; + SpielStrOut(out, std::forward(args)...); + return out.str(); +} + +} // namespace internal + +using Player = int; +using Action = int64_t; + +// Default floating point tolerance between two numbers. +inline constexpr float FloatingPointDefaultTolerance() { return 1e-6; } + +// Default tolerance applied when validating variables are valid probability. +inline constexpr float ProbabilityDefaultTolerance() { return 1e-9; } + + +// Helpers used to convert actions represented as integers in mixed bases. +// E.g. RankActionMixedBase({2, 3, 6}, {1, 1, 1}) = 1*18 + 1*6 + 1 = 25, +// and UnrankActioMixedBase(25, {2, 3, 6}, &digits) sets digits to {1, 1, 1}. +// For the rank, both vectors must be the same size. For the unrank, the digits +// must already have size equal to bases.size(). +Action RankActionMixedBase(const std::vector& bases, + const std::vector& digits); + +std::vector UnrankActionMixedBase(Action action, + const std::vector& bases); + +// Helper function to determine the next player in a round robin. +int NextPlayerRoundRobin(Player player, int nplayers); + +// Helper function to determine the previous player in a round robin. +int PreviousPlayerRoundRobin(Player player, int nplayers); + +// Finds a file by looking up a number of directories. For example: if levels is +// 3 and filename is my.txt, it will look for ./my.txt, ../my.txt, ../../my.txt, +// and ../../../my.txt, return the first file found or absl::nullopt if not +// found. +absl::optional FindFile(const std::string& filename, int levels); + +// Normalizes the span. +void Normalize(absl::Span weights); + +// Format in decimal format, with at most 15 places for the fractional part, +// adding ".0" for integer values, and removing any additional trailing zeroes +// after the first decimal place. +std::string FormatDouble(double value); + +// Converts a bool to either "true" or "false". +std::string BoolToStr(bool b); + +// Converts a vector of pairs to a string. +template +std::string VectorOfPairsToString(const std::vector>& vec, + const std::string& delimiter, + const std::string& pair_delimiter); + +// Returns whether the absolute difference between floating point values a and +// b is less than or equal to. +template +bool Near(T a, T b) { + static_assert(std::is_floating_point::value, + "Near() is only for floating point args."); + return fabs(a - b) <= FloatingPointDefaultTolerance(); +} + +// Returns whether |a - b| <= epsilon. +template +bool Near(T a, T b, T epsilon) { + static_assert(std::is_floating_point::value, + "Near() is only for floating point args."); + return fabs(a - b) <= epsilon; +} + +template +bool AllNear(const std::vector& vector1, const std::vector& vector2, + T epsilon) { + if (vector1.size() != vector2.size()) { + return false; + } + for (int i = 0; i < vector1.size(); ++i) { + if (!Near(vector1[i], vector2[i], epsilon)) { + return false; + } + } + return true; +} + +// Some string helpers. We should remove some of these as we upgrade abseil +// versions. +bool StrContainsIgnoreCase(const std::string& haystack, + const std::string& needle); + +// Macros to check for error conditions. +// These trigger SpielFatalError if the condition is violated. +// These macros are always executed. If you want to use checks +// only for debugging, use SPIEL_DCHECK_* + +#define SPIEL_CHECK_OP(x_exp, op, y_exp) \ + do { \ + auto x = x_exp; \ + auto y = y_exp; \ + if (!((x)op(y))) \ + open_spiel::SpielFatalError(open_spiel::internal::SpielStrCat( \ + __FILE__, ":", __LINE__, " ", #x_exp " " #op " " #y_exp, \ + "\n" #x_exp, " = ", x, ", " #y_exp " = ", y)); \ + } while (false) + +#define SPIEL_CHECK_FN2(x_exp, y_exp, fn) \ + do { \ + auto x = x_exp; \ + auto y = y_exp; \ + if (!fn(x, y)) \ + open_spiel::SpielFatalError(open_spiel::internal::SpielStrCat( \ + __FILE__, ":", __LINE__, " ", #fn "(" #x_exp ", " #y_exp ")\n", \ + #x_exp " = ", x, ", " #y_exp " = ", y)); \ + } while (false) + +#define SPIEL_CHECK_FN3(x_exp, y_exp, z_exp, fn) \ + do { \ + auto x = x_exp; \ + auto y = y_exp; \ + auto z = z_exp; \ + if (!fn(x, y, z)) \ + open_spiel::SpielFatalError(open_spiel::internal::SpielStrCat( \ + __FILE__, ":", __LINE__, " ", \ + #fn "(" #x_exp ", " #y_exp ", " #z_exp ")\n", #x_exp " = ", x, \ + ", " #y_exp " = ", y, ", " #z_exp " = ", z)); \ + } while (false) + +#define SPIEL_CHECK_GE(x, y) SPIEL_CHECK_OP(x, >=, y) +#define SPIEL_CHECK_GT(x, y) SPIEL_CHECK_OP(x, >, y) +#define SPIEL_CHECK_LE(x, y) SPIEL_CHECK_OP(x, <=, y) +#define SPIEL_CHECK_LT(x, y) SPIEL_CHECK_OP(x, <, y) +#define SPIEL_CHECK_EQ(x, y) SPIEL_CHECK_OP(x, ==, y) +#define SPIEL_CHECK_NE(x, y) SPIEL_CHECK_OP(x, !=, y) +#define SPIEL_CHECK_PROB(x) \ + SPIEL_CHECK_GE(x, 0); \ + SPIEL_CHECK_LE(x, 1); \ + SPIEL_CHECK_FALSE(std::isnan(x) || std::isinf(x)) +#define SPIEL_CHECK_PROB_TOLERANCE(x, tol) \ + SPIEL_CHECK_GE(x, -(tol)); \ + SPIEL_CHECK_LE(x, 1.0 + (tol)); \ + SPIEL_CHECK_FALSE(std::isnan(x) || std::isinf(x)) + + +// Checks that x and y are equal to the default dynamic threshold proportional +// to max(|x|, |y|). +#define SPIEL_CHECK_FLOAT_EQ(x, y) \ + SPIEL_CHECK_FN2(static_cast(x), static_cast(y), \ + open_spiel::Near) + +// Checks that x and y are epsilon apart or closer. +#define SPIEL_CHECK_FLOAT_NEAR(x, y, epsilon) \ + SPIEL_CHECK_FN3(static_cast(x), static_cast(y), \ + static_cast(epsilon), open_spiel::Near) + +#define SPIEL_CHECK_TRUE(x) \ + while (!(x)) \ + open_spiel::SpielFatalError(open_spiel::internal::SpielStrCat( \ + __FILE__, ":", __LINE__, " CHECK_TRUE(", #x, ")")) + +// A verbose checker that will print state info: +// Use as SPIEL_CHECK_TRUE_WSI(bool cond, const std::string& error_message, +// const Game& game_ref, const State& state_ref) +#define SPIEL_CHECK_TRUE_WSI(x, e, g, s) \ + while (!(x)) \ + open_spiel::SpielFatalErrorWithStateInfo( \ + open_spiel::internal::SpielStrCat( \ + __FILE__, ":", __LINE__, " CHECK_TRUE(", #x, "): ", e), \ + (g), (s)) + +#define SPIEL_CHECK_FALSE(x) \ + while (x) \ + open_spiel::SpielFatalError(open_spiel::internal::SpielStrCat( \ + __FILE__, ":", __LINE__, " CHECK_FALSE(", #x, ")")) + +#if !defined(NDEBUG) + +// Checks that are executed in Debug / Testing build type, +// and turned off for Release build type. +#define SPIEL_DCHECK_OP(x_exp, op, y_exp) SPIEL_CHECK_OP(x_exp, op, y_exp) +#define SPIEL_DCHECK_FN2(x_exp, y_exp, fn) SPIEL_CHECK_FN2(x_exp, y_exp, fn) +#define SPIEL_DCHECK_FN3(x_exp, y_exp, z_exp, fn) \ + SPIEL_CHECK_FN3(x_exp, y_exp, z_exp, fn) +#define SPIEL_DCHECK_GE(x, y) SPIEL_CHECK_GE(x, y) +#define SPIEL_DCHECK_GT(x, y) SPIEL_CHECK_GT(x, y) +#define SPIEL_DCHECK_LE(x, y) SPIEL_CHECK_LE(x, y) +#define SPIEL_DCHECK_LT(x, y) SPIEL_CHECK_LT(x, y) +#define SPIEL_DCHECK_EQ(x, y) SPIEL_CHECK_EQ(x, y) +#define SPIEL_DCHECK_NE(x, y) SPIEL_CHECK_NE(x, y) +#define SPIEL_DCHECK_PROB(x) SPIEL_DCHECK_PROB(x) +#define SPIEL_DCHECK_FLOAT_EQ(x, y) SPIEL_CHECK_FLOAT_EQ(x, y) +#define SPIEL_DCHECK_FLOAT_NEAR(x, y, epsilon) \ + SPIEL_CHECK_FLOAT_NEAR(x, y, epsilon) +#define SPIEL_DCHECK_TRUE(x) SPIEL_CHECK_TRUE(x) +#define SPIEL_DCHECK_FALSE(x) SPIEL_CHECK_FALSE(x) + +#else // defined(NDEBUG) + +// Turn off checks for the (optimized) Release build type. +#define SPIEL_DCHECK_OP(x_exp, op, y_exp) +#define SPIEL_DCHECK_FN2(x_exp, y_exp, fn) +#define SPIEL_DCHECK_FN3(x_exp, y_exp, z_exp, fn) +#define SPIEL_DCHECK_GE(x, y) +#define SPIEL_DCHECK_GT(x, y) +#define SPIEL_DCHECK_LE(x, y) +#define SPIEL_DCHECK_LT(x, y) +#define SPIEL_DCHECK_EQ(x, y) +#define SPIEL_DCHECK_NE(x, y) +#define SPIEL_DCHECK_PROB(x) +#define SPIEL_DCHECK_FLOAT_EQ(x, y) +#define SPIEL_DCHECK_FLOAT_NEAR(x, y, epsilon) +#define SPIEL_DCHECK_TRUE(x) +#define SPIEL_DCHECK_FALSE(x) + +#endif // !defined(NDEBUG) + +// When an error is encountered, OpenSpiel code should call SpielFatalError() +// which will forward the message to the current error handler. +// The default error handler outputs the error message to stderr, and exits +// the process with exit code 1. + +// When called from Python, a different error handled is used, which returns +// RuntimeException to the caller, containing the error message. + +// Report a runtime error. +[[noreturn]] void SpielFatalError(const std::string& error_msg); + +// Specify a new error handler. +using ErrorHandler = void (*)(const std::string&); +void SetErrorHandler(ErrorHandler error_handler); + +// A ProbabilitySampler that samples uniformly from a distribution. +class UniformProbabilitySampler { + public: + UniformProbabilitySampler(int seed, double min = 0., double max = 1.) + : seed_(seed), rng_(seed_), dist_(min, max), min_(min), max_(max) {} + + UniformProbabilitySampler(double min = 0., double max = 1.) + : rng_(seed_), dist_(min, max), min_(min), max_(max) {} + + // When copying, we reinitialize the sampler to have the initial seed. + UniformProbabilitySampler(const UniformProbabilitySampler& other) + : seed_(other.seed_), + rng_(other.seed_), + dist_(other.min_, other.max_), + min_(other.min_), + max_(other.max_) {} + + double operator()() { return dist_(rng_); } + + private: + // Set the seed as the number of nanoseconds + const int seed_ = absl::ToInt64Nanoseconds(absl::Now() - absl::UnixEpoch()); + std::mt19937 rng_; + absl::uniform_real_distribution dist_; + + const double min_; + const double max_; +}; + +// Utility functions intended to be used for casting +// from a Base class to a Derived subclass. +// These functions handle various use cases, such as pointers and const +// references. For shared or unique pointers you can get the underlying pointer. +// When you use debug mode, a more expensive dynamic_cast is used and it checks +// whether the casting has been successful. In optimized builds only static_cast +// is used when possible. + +// use like this: down_cast(foo); +template +inline To down_cast(From* f) { +#if !defined(NDEBUG) + if (f != nullptr && dynamic_cast(f) == nullptr) { + std::string from = typeid(From).name(); + std::string to = typeid(From).name(); + SpielFatalError( + absl::StrCat("Cast failure: could not cast a pointer from '", from, + "' to '", to, "'")); + } +#endif + return static_cast(f); +} + +// use like this: down_cast(foo); +template +inline To down_cast(From& f) { + typedef typename std::remove_reference::type* ToAsPointer; +#if !defined(NDEBUG) + if (dynamic_cast(&f) == nullptr) { + std::string from = typeid(From).name(); + std::string to = typeid(From).name(); + SpielFatalError( + absl::StrCat("Cast failure: could not cast a reference from '", from, + "' to '", to, "'")); + } +#endif + return *static_cast(&f); +} + +// Creates a sampler from a std::function conforming to the +// probabilities received. absl::discrete_distribution requires a URBG as a +// source of randomness (as opposed to a std::function) so cannot +// be used directly. +class SamplerFromRng { + public: + explicit SamplerFromRng(std::function rng) : rng_(std::move(rng)) {} + + int operator()(absl::Span probs); + + private: + std::function rng_; +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_SPIEL_UTILS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/tensor_game.cc b/scenarios/bargaining/open_spiel/open_spiel/tensor_game.cc new file mode 100644 index 0000000..d68c004 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/tensor_game.cc @@ -0,0 +1,146 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/tensor_game.h" + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace tensor_game { +namespace { +// Check the utilities to see if the game is constant-sum or identical +// (cooperative). +GameType::Utility GetUtilityType( + const std::vector>& utils) { + double util_sum = 0; + // Assume both are true until proven otherwise. + bool constant_sum = true; + bool identical = true; + for (int i = 0; i < utils[0].size(); ++i) { + double util_sum_i = 0; + for (int player = 0; player < utils.size(); ++player) { + util_sum_i += utils[player][i]; + } + + if (i == 0) { + util_sum = util_sum_i; + } else { + if (constant_sum && !Near(util_sum_i, util_sum)) { + constant_sum = false; + } + } + + if (identical) { + for (int player = 1; player < utils.size(); ++player) { + if (utils[0][i] != utils[player][i]) { + identical = false; + break; + } + } + } + } + + if (constant_sum && Near(util_sum, 0.0)) { + return GameType::Utility::kZeroSum; + } else if (constant_sum) { + return GameType::Utility::kConstantSum; + } else if (identical) { + return GameType::Utility::kIdentical; + } else { + return GameType::Utility::kGeneralSum; + } +} +} // namespace + +TensorState::TensorState(std::shared_ptr game) + : NFGState(game), + tensor_game_(static_cast(game.get())) {} + +std::string TensorState::ToString() const { + std::string result = ""; + absl::StrAppend(&result, "Terminal? ", IsTerminal() ? "true" : "false", "\n"); + if (IsTerminal()) { + absl::StrAppend(&result, "History: ", HistoryString(), "\n"); + absl::StrAppend(&result, "Returns: ", absl::StrJoin(Returns(), ","), "\n"); + } + + return result; +} + +std::unique_ptr TensorGame::NewInitialState() const { + return std::unique_ptr(new TensorState(shared_from_this())); +} + +std::shared_ptr CreateTensorGame( + const std::vector>& utils, + const std::vector& shape) { + std::vector> action_names(shape.size()); + for (Player player = 0; player < shape.size(); ++player) { + for (int i = 0; i < shape[player]; ++i) { + action_names[player].push_back(absl::StrCat("action", player, "_", i)); + } + } + return CreateTensorGame("short_name", "Long Name", action_names, utils); +} + +// Create a matrix game with the specified utilities and row/column names. +// Utilities must be in row-major form. + +std::shared_ptr CreateTensorGame( + const std::string& short_name, const std::string& long_name, + const std::vector>& action_names, + const std::vector>& utils) { + const int size = + std::accumulate(action_names.begin(), action_names.end(), 1, + [](const int s, auto names) { return s * names.size(); }); + SPIEL_CHECK_TRUE( + std::all_of(utils.begin(), utils.end(), [size](const auto& player_utils) { + return player_utils.size() == size; + })); + + // Detect the utility type from the utilities. + const GameType::Utility utility = GetUtilityType(utils); + + const GameType game_type{ + /*short_name=*/short_name, + /*long_name=*/long_name, + GameType::Dynamics::kSimultaneous, + GameType::ChanceMode::kDeterministic, + GameType::Information::kOneShot, + utility, + GameType::RewardModel::kTerminal, + /*max_num_players=*/static_cast(utils.size()), + /*min_num_players=*/static_cast(utils.size()), + /*provides_information_state_string=*/true, + /*provides_information_state_tensor=*/true, + /*provides_observation_string=*/false, + /*provides_observation_tensor=*/false, + /*parameter_specification=*/{} // no parameters + }; + + return std::shared_ptr( + new TensorGame(game_type, {}, action_names, utils)); +} + +} // namespace tensor_game +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/tensor_game.h b/scenarios/bargaining/open_spiel/open_spiel/tensor_game.h new file mode 100644 index 0000000..cdac792 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/tensor_game.h @@ -0,0 +1,253 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_TENSOR_GAME_H_ +#define OPEN_SPIEL_TENSOR_GAME_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/matrix_game.h" +#include "open_spiel/normal_form_game.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +// A tensor game is an example of an n-player normal-form game. + +namespace open_spiel { +namespace tensor_game { + +class TensorGame : public NormalFormGame { + public: + // action_names[player] is the list of action names for player. + // utilities[player] is a flattened tensor of utilities for player, in + // row-major/C-style/lexicographic order of all players' actions. + TensorGame(GameType game_type, GameParameters game_parameters, + std::vector> action_names, + std::vector> utilities) + : NormalFormGame(std::move(game_type), std::move(game_parameters)), + action_names_(std::move(action_names)), + utilities_(std::move(utilities)), + shape_(utilities_.size()) { + int size = 1; + for (Player player = 0; player < action_names_.size(); ++player) { + size *= action_names_[player].size(); + shape_[player] = action_names_[player].size(); + } + ComputeMinMaxUtility(); + SPIEL_CHECK_TRUE(std::all_of(utilities_.begin(), utilities_.end(), + [size](const auto& player_utils) { + return player_utils.size() == size; + })); + } + + // Implementation of Game interface + int NumDistinctActions() const override { + return *std::max_element(begin(shape_), end(shape_)); + } + + std::unique_ptr NewInitialState() const override; + + int NumPlayers() const override { return utilities_.size(); } + + double MinUtility() const override { return min_utility_; } + + double MaxUtility() const override { return max_utility_; } + + std::string ActionToString(Player player, Action action) const override { + return ActionName(player, action); + } + + const std::vector& Shape() const { return shape_; } + const double PlayerUtility(const Player player, + const std::vector& actions) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, NumPlayers()); + return utilities_[player][index(actions)]; + } + const std::vector& PlayerUtilities(const Player player) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, NumPlayers()); + return utilities_[player]; + } + const std::string& ActionName(const Player player, + const Action& action) const { + SPIEL_CHECK_GE(player, 0); + SPIEL_CHECK_LT(player, NumPlayers()); + return action_names_[player][action]; + } + + bool operator==(const Game& other_game) const override { + const auto& other = down_cast(other_game); + return (shape_ == other.shape_ && utilities_ == other.utilities_); + } + + bool ApproxEqual(const Game& other_game, double tolerance) const { + const auto& other = down_cast(other_game); + if (shape_ != other.shape_) { + return false; + } + for (Player p = 0; p < NumPlayers(); ++p) { + if (!AllNear(utilities_[p], other.utilities_[p], tolerance)) { + return false; + } + } + return true; + } + + std::vector GetUtilities(const std::vector& joint_action) + const override { + int idx = index(joint_action); + std::vector utilities; + utilities.reserve(NumPlayers()); + for (Player p = 0; p < NumPlayers(); ++p) { + utilities.push_back(utilities_[p][idx]); + } + return utilities; + } + + double GetUtility(Player player, const std::vector& joint_action) + const override { + return PlayerUtility(player, joint_action); + } + + std::shared_ptr AsMatrixGame() const { + SPIEL_CHECK_EQ(NumPlayers(), 2); + const GameType& game_type = GetType(); + return matrix_game::CreateMatrixGame( + game_type.short_name, game_type.long_name, + action_names_[0], action_names_[1], + utilities_[0], utilities_[1]); + } + + private: + const int index(const std::vector& args) const { + int ind = 0; + for (int i = 0; i < NumPlayers(); ++i) { + ind = ind * shape_[i] + args[i]; + } + return ind; + } + + void ComputeMinMaxUtility() { + min_utility_ = *std::min_element(begin(utilities_[0]), end(utilities_[0])); + for (Player player = 1; player < NumPlayers(); ++player) { + min_utility_ = + std::min(min_utility_, *std::min_element(begin(utilities_[player]), + end(utilities_[player]))); + } + + max_utility_ = *std::max_element(begin(utilities_[0]), end(utilities_[0])); + for (Player player = 1; player < NumPlayers(); ++player) { + max_utility_ = + std::max(max_utility_, *std::max_element(begin(utilities_[player]), + end(utilities_[player]))); + } + } + + // action_names_[player] is the list of action names for player. + const std::vector> action_names_; + // utilities_[player] is a flattened tensor of utilities for player, in + // row-major/C-style/lexicographic order of all players' actions. + const std::vector> utilities_; + std::vector shape_; + double min_utility_; + double max_utility_; +}; + +class TensorState : public NFGState { + public: + explicit TensorState(std::shared_ptr game); + explicit TensorState(const TensorState&) = default; + + std::vector LegalActions(Player player) const override { + if (IsTerminal()) return {}; + if (player == kSimultaneousPlayerId) { + return LegalFlatJointActions(); + } else { + std::vector moves(tensor_game_->Shape()[player]); + std::iota(moves.begin(), moves.end(), 0); // fill with values 0...n-1 + return moves; + } + } + + std::string ToString() const override; + + std::string ActionToString(Player player, Action action_id) const override { + if (player == kSimultaneousPlayerId) + return FlatJointActionToString(action_id); + else + return tensor_game_->ActionName(player, action_id); + } + + bool IsTerminal() const override { return !joint_move_.empty(); } + + std::vector Returns() const override { + std::vector returns(NumPlayers()); + if (IsTerminal()) { + for (Player player = 0; player < returns.size(); player++) { + returns[player] = tensor_game_->PlayerUtility(player, joint_move_); + } + } + return returns; + } + + std::unique_ptr Clone() const override { + return std::unique_ptr(new TensorState(*this)); + } + + protected: + void DoApplyActions(const std::vector& moves) override { + SPIEL_CHECK_EQ(moves.size(), NumPlayers()); + for (Player player = 0; player < NumPlayers(); player++) { + SPIEL_CHECK_GE(moves[player], 0); + SPIEL_CHECK_LT(moves[player], tensor_game_->Shape()[player]); + } + joint_move_ = moves; + } + + private: + std::vector joint_move_{}; // joint move that was chosen + const TensorGame* tensor_game_; +}; + +// Create a tensor game with the specified utilities and action names. +// utils[player] is a flattened tensor of utilities for player, in +// row-major/C-style/lexicographic order of all players' actions. + + +std::shared_ptr CreateTensorGame( + const std::string& short_name, const std::string& long_name, + const std::vector>& action_names, + const std::vector>& utils); + +// Create a tensor game with the specified utilities, with names +// "short_name", "Long Name" and action names +// action0_0, action0_1.. for player 0, and so forth for other players. +// utils[player] is a flattened tensor of utilities for player, in +// row-major/C-style/lexicographic order of all players' actions. + +std::shared_ptr CreateTensorGame( + const std::vector>& utils, + const std::vector& shape); + +} // namespace tensor_game +} // namespace open_spiel + +#endif // OPEN_SPIEL_TENSOR_GAME_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/tests/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/tests/CMakeLists.txt new file mode 100644 index 0000000..6fb2d31 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/tests/CMakeLists.txt @@ -0,0 +1,25 @@ +add_library (tests OBJECT + basic_tests.h + basic_tests.cc + console_play_test.h + console_play_test.cc +) +target_include_directories (tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +add_executable(spiel_test spiel_test.cc + $ ${OPEN_SPIEL_OBJECTS}) +add_test(spiel_test spiel_test) + +add_executable(action_view_test action_view_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(action_view_test action_view_test) + +if (BUILD_SHARED_LIB) + if (WIN32) + add_executable(shared_lib_test shared_lib_test.cc ${OPEN_SPIEL_OBJECTS}) + else() + add_executable(shared_lib_test shared_lib_test.cc) + endif() + target_link_libraries(shared_lib_test open_spiel) + add_test(shared_lib_test shared_lib_test) +endif() diff --git a/scenarios/bargaining/open_spiel/open_spiel/tests/action_view_test.cc b/scenarios/bargaining/open_spiel/open_spiel/tests/action_view_test.cc new file mode 100644 index 0000000..edd4678 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/tests/action_view_test.cc @@ -0,0 +1,62 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/action_view.h" + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { + +void TestFixedActions() { + ActionView view(/*current_player=*/kSimultaneousPlayerId, + /*legal_actions=*/{{0, 1}, {2, 3, 4}, {5, 6}}); + + std::vector< // Player + std::vector< // Fixed action + std::vector>> // Expected joint actions. + expected_joint_actions = {{{0, 2, 4, 6, 8, 10}, {1, 3, 5, 7, 9, 11}}, + {{0, 1, 6, 7}, {2, 3, 8, 9}, {4, 5, 10, 11}}, + {{0, 1, 2, 3, 4, 5}, {6, 7, 8, 9, 10, 11}}}; + + for (int pl = 0; pl < view.num_players(); ++pl) { + for (int action_index = 0; action_index < view.num_actions(pl); + ++action_index) { + int i = 0; + for (Action actual_joint_action : view.fixed_action(pl, action_index)) { + SPIEL_CHECK_EQ(expected_joint_actions[pl][action_index][i++], + actual_joint_action); + } + } + } +} + +void TestFlatJointActions() { + ActionView view(/*current_player=*/kSimultaneousPlayerId, + /*legal_actions=*/{{0, 1}, {2, 3, 4}, {5, 6}}); + + int expected_joint_action = 0; + for (Action actual_joint_action : view.flat_joint_actions()) { + SPIEL_CHECK_EQ(expected_joint_action++, actual_joint_action); + } + SPIEL_CHECK_EQ(expected_joint_action, 2 * 3 * 2); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::TestFixedActions(); + open_spiel::TestFlatJointActions(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/tests/basic_tests.cc b/scenarios/bargaining/open_spiel/open_spiel/tests/basic_tests.cc new file mode 100644 index 0000000..e38aeb4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/tests/basic_tests.cc @@ -0,0 +1,753 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/tests/basic_tests.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/container/btree_set.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace testing { + +namespace { + +constexpr int kInvalidHistoryPlayer = -300; +constexpr int kInvalidHistoryAction = -301; +constexpr double kRewardEpsilon = 1e-9; + +// Information about the simulation history. Used to track past states and +// actions for rolling back simulations via UndoAction, and check History. +// For simultaneous games, a simultaneous move will be stored as several items. +// The state will be nullptr and the player kInvalidHistoryPlayer for invalid +// transitions. +// The transition state_0 --[action]--> state_1 --[action2]--> ... is stored as: +// (state_0, state_0.CurrentPlayer(), action), +// (state_1, state_1.CurrentPlayer(), action2), ... +struct HistoryItem { + std::unique_ptr state; + Player player; + Action action; + HistoryItem(std::unique_ptr _state, Player _player, int _action) + : state(std::move(_state)), player(_player), action(_action) {} +}; + +// Apply the action to the specified state. If clone is implemented, then do +// more: clone the state, apply the action to the cloned state, and check the +// original state and cloned state are equal using their string +// representation. +void ApplyActionTestClone(const Game& game, State* state, + const std::vector& joint_action) { + std::unique_ptr clone = state->Clone(); + state->ApplyActions(joint_action); + clone->ApplyActions(joint_action); + SPIEL_CHECK_EQ(state->ToString(), clone->ToString()); + SPIEL_CHECK_EQ(state->History(), clone->History()); +} + +// Apply the action to the specified state. If clone is implemented, then do +// more: clone the state, apply the action to the cloned state, and check the +// original state and cloned state are equal using their string +// representation. +void ApplyActionTestClone(const Game& game, State* state, Action action) { + std::unique_ptr clone = state->Clone(); + state->ApplyAction(action); + clone->ApplyAction(action); + SPIEL_CHECK_EQ(state->ToString(), clone->ToString()); + SPIEL_CHECK_EQ(state->History(), clone->History()); +} + +// Check that the legal actions list is empty for the non-current player. +// We only check that for turned-base games. + +void LegalActionsIsEmptyForOtherPlayers(const Game& game, State& state) { + if (game.GetType().dynamics == GameType::Dynamics::kSimultaneous) { + return; + } + + Player current_player = state.CurrentPlayer(); + for (Player player = 0; player < game.NumPlayers(); ++player) { + if (state.IsChanceNode()) { + continue; + } + if (player != current_player) { + int size = state.LegalActions(player).size(); + // We do not use SPIEL_CHECK_EQ because it does not print the values. + if (size != 0) { + std::string str = ""; + absl::StrJoin(state.LegalActions(player), str); + SpielFatalError(absl::StrCat( + __FILE__, ":", __LINE__, " ", size, " should be 0 for player ", + player, "(current_player:", current_player, ")", str)); + } + } + } +} + +void LegalActionsMaskTest(const Game& game, const State& state, int player, + const std::vector& legal_actions) { + std::vector legal_actions_mask = state.LegalActionsMask(player); + const int expected_length = state.IsChanceNode() ? game.MaxChanceOutcomes() + : game.NumDistinctActions(); + SPIEL_CHECK_EQ(legal_actions_mask.size(), expected_length); + for (Action action : legal_actions) { + SPIEL_CHECK_GE(action, 0); + SPIEL_CHECK_LT(action, expected_length); + SPIEL_CHECK_EQ(legal_actions_mask[action], 1); + } + + int num_ones = 0; + for (int i = 0; i < expected_length; ++i) { + SPIEL_CHECK_TRUE(legal_actions_mask[i] == 0 || legal_actions_mask[i] == 1); + num_ones += legal_actions_mask[i]; + } + + SPIEL_CHECK_EQ(num_ones, legal_actions.size()); +} + +bool IsPowerOfTwo(int n) { return n == 0 || (n & (n - 1)) == 0; } + +} // namespace + +void DefaultStateChecker(const State& state) {} + +// Checks that the game can be loaded. +void LoadGameTest(const std::string& game_name) { + std::shared_ptr game = LoadGame(game_name); + SPIEL_CHECK_TRUE(game != nullptr); +} + +void NoChanceOutcomesTest(const Game& game) { + std::cout << "NoChanceOutcomesTest, game = " << game.GetType().short_name + << std::endl; + int max_outcomes = game.MaxChanceOutcomes(); + SPIEL_CHECK_EQ(max_outcomes, 0); +} + +void ChanceOutcomesTest(const Game& game) { + std::cout << "ChanceOutcomesTest, game = " << game.GetType().short_name + << std::endl; + int max_outcomes = game.MaxChanceOutcomes(); + SPIEL_CHECK_GT(max_outcomes, 0); +} + +void TestUndo(std::unique_ptr state, + const std::vector& history) { + // TODO(author2): We can just check each UndoAction. + for (auto prev = history.rbegin(); prev != history.rend(); ++prev) { + state->UndoAction(prev->player, prev->action); + SPIEL_CHECK_EQ(state->ToString(), prev->state->ToString()); + // We also check that UndoActions correctly updates history_. + SPIEL_CHECK_EQ(state->History(), prev->state->History()); + // And correctly updates move_number_. + SPIEL_CHECK_EQ(state->MoveNumber(), prev->state->MoveNumber()); + } +} + +void TestSerializeDeserialize(const Game& game, const State* state) { + const std::string& ser_str = SerializeGameAndState(game, *state); + std::pair, std::unique_ptr> + game_and_state = DeserializeGameAndState(ser_str); + SPIEL_CHECK_EQ(game.ToString(), game_and_state.first->ToString()); + SPIEL_CHECK_EQ(state->ToString(), game_and_state.second->ToString()); +} + +void TestHistoryContainsActions(const Game& game, + const std::vector& history) { + std::vector actions = {}; + for (const auto& history_item : history) { + if (history_item.state != nullptr) { + SPIEL_CHECK_EQ(history_item.state->History(), actions); + } + actions.push_back(history_item.action); + } +} + +void CheckReturnsSum(const Game& game, const State& state) { + std::vector returns = state.Returns(); + double rsum = std::accumulate(returns.begin(), returns.end(), 0.0); + absl::optional utility_sum = game.UtilitySum(); + + switch (game.GetType().utility) { + case GameType::Utility::kZeroSum: { + SPIEL_CHECK_EQ(utility_sum, 0.0); + SPIEL_CHECK_TRUE(Near(rsum, 0.0, kRewardEpsilon)); + break; + } + case GameType::Utility::kConstantSum: { + SPIEL_CHECK_TRUE(utility_sum.has_value()); + SPIEL_CHECK_FLOAT_NEAR(rsum, *utility_sum, kRewardEpsilon); + break; + } + case GameType::Utility::kIdentical: { + SPIEL_CHECK_FALSE(utility_sum.has_value()); + for (int i = 1; i < returns.size(); ++i) { + SPIEL_CHECK_TRUE(Near(returns[i], returns[i - 1], kRewardEpsilon)); + } + break; + } + case GameType::Utility::kGeneralSum: { + SPIEL_CHECK_FALSE(utility_sum.has_value()); + } + } +} + +// Tests all observation and information_state related methods which are +// supported by the game, for all players. +// +// The following functions should return valid outputs for valid player, even +// on terminal states: +// - std::string InformationStateString(Player player) +// - std::vector InformationStateTensor(Player player) +// - std::string ObservationString(Player player) +// - std::vector ObservationTensor(Player player) +// +// These functions should crash on invalid players: this is tested in +// api_test.py as it's simpler to catch the error from Python. +void CheckObservables(const Game& game, + const State& state, + Observation* observation // Can be nullptr + ) { + for (auto p = Player{0}; p < game.NumPlayers(); ++p) { + if (game.GetType().provides_information_state_tensor) { + std::vector tensor = state.InformationStateTensor(p); + for (float val : tensor) SPIEL_CHECK_TRUE(std::isfinite(val)); + SPIEL_CHECK_EQ(tensor.size(), game.InformationStateTensorSize()); + } + if (game.GetType().provides_observation_tensor) { + std::vector tensor = state.ObservationTensor(p); + for (float val : tensor) SPIEL_CHECK_TRUE(std::isfinite(val)); + SPIEL_CHECK_EQ(tensor.size(), game.ObservationTensorSize()); + } + if (game.GetType().provides_information_state_string) { + // Checking it does not raise errors. + state.InformationStateString(p); + } + if (game.GetType().provides_observation_string) { + // Checking it does not have errors. + state.ObservationString(p); + } + + if (observation != nullptr) { + if (observation->HasString()) observation->StringFrom(state, p); + if (observation->HasTensor()) observation->SetFrom(state, p); + } + } +} + +void CheckActionStringsAreUniqueForPlayer(const Game& game, State& state, + Player player) { + absl::flat_hash_set action_strings; + for (Action action : state.LegalActions(player)) { + const auto action_str = state.ActionToString(player, action); + const auto& [unused, was_inserted] = action_strings.insert(action_str); + SPIEL_CHECK_TRUE_WSI( + was_inserted, + absl::StrCat("Duplicate action string '", action_str, "' in state"), + game, state); + } +} + +void CheckActionStringsAreUnique(const Game& game, State& state) { + if (state.IsTerminal() || state.IsMeanFieldNode()) return; + if (state.IsSimultaneousNode()) { + for (int player = 0; player < game.NumPlayers(); ++player) { + CheckActionStringsAreUniqueForPlayer(game, state, player); + } + } else{ + // Also works for chance node. + CheckActionStringsAreUniqueForPlayer(game, state, state.CurrentPlayer()); + } +} + +// This is used for mean-field games. +std::vector RandomDistribution(int num_states, std::mt19937* rng) { + std::uniform_real_distribution rand(0, 1); + std::vector distrib; + distrib.reserve(num_states); + for (int i = 0; i < num_states; ++i) { + distrib.push_back(rand(*rng)); + } + double sum = std::accumulate(distrib.begin(), distrib.end(), 0.); + for (int i = 0; i < num_states; ++i) { + distrib[i] /= sum; + } + return distrib; +} + +void RandomSimulation(std::mt19937* rng, const Game& game, bool undo, + bool serialize, bool verbose, bool mask_test, + std::shared_ptr observer, // Can be nullptr + std::function state_checker_fn, + int mean_field_population = -1) { + std::unique_ptr observation = + observer == nullptr ? nullptr + : std::make_unique(game, observer); + std::vector history; + std::vector episode_returns(game.NumPlayers(), 0); + + int infostate_vector_size = game.GetType().provides_information_state_tensor + ? game.InformationStateTensorSize() + : 0; + if (verbose) { + std::cout << "Information state vector size: " << infostate_vector_size + << std::endl; + } + + int observation_vector_size = game.GetType().provides_observation_tensor + ? game.ObservationTensorSize() + : 0; + if (verbose) { + std::cout << "Observation vector size: " << observation_vector_size + << std::endl; + } + + SPIEL_CHECK_TRUE(game.MinUtility() < game.MaxUtility()); + if (verbose) { + std::cout << "Utility range: " << game.MinUtility() << " " + << game.MaxUtility() << std::endl; + + std::cout << "Starting new game.." << std::endl; + } + std::unique_ptr state; + if (mean_field_population == -1) { + state = game.NewInitialState(); + } else { + state = game.NewInitialStateForPopulation(mean_field_population); + } + + if (verbose) { + std::cout << "Initial state:" << std::endl; + std::cout << "State:" << std::endl << state->ToString() << std::endl; + } + int game_length = 0; + int num_moves = 0; + + while (!state->IsTerminal()) { + state_checker_fn(*state); + + if (verbose) { + std::cout << "player " << state->CurrentPlayer() << std::endl; + } + + LegalActionsIsEmptyForOtherPlayers(game, *state); + CheckLegalActionsAreSorted(game, *state); + CheckActionStringsAreUnique(game, *state); + + // Test cloning the state. + std::unique_ptr state_copy = state->Clone(); + SPIEL_CHECK_EQ(state->ToString(), state_copy->ToString()); + SPIEL_CHECK_EQ(state->History(), state_copy->History()); + + if (game.GetType().dynamics == GameType::Dynamics::kMeanField) { + SPIEL_CHECK_LT(state->MoveNumber(), game.MaxMoveNumber()); + SPIEL_CHECK_EQ(state->MoveNumber(), num_moves); + } + + if (serialize && (history.size() < 10 || IsPowerOfTwo(history.size()))) { + TestSerializeDeserialize(game, state.get()); + } + + if (state->IsChanceNode()) { + if (mask_test) LegalActionsMaskTest(game, *state, kChancePlayerId, + state->LegalActions()); + // Chance node; sample one according to underlying distribution + std::vector> outcomes = state->ChanceOutcomes(); + auto [action, prob] = open_spiel::SampleAction(outcomes, *rng); + + if (verbose) { + std::cout << "sampled outcome: " + << state->ActionToString(kChancePlayerId, action) + << " with prob " << prob + << std::endl; + } + history.emplace_back(state->Clone(), kChancePlayerId, action); + state->ApplyAction(action); + + if (undo && (history.size() < 10 || IsPowerOfTwo(history.size()))) { + TestUndo(state->Clone(), history); + } + num_moves++; + } else if (state->CurrentPlayer() == open_spiel::kSimultaneousPlayerId) { + std::vector rewards = state->Rewards(); + std::vector returns = state->Returns(); + SPIEL_CHECK_EQ(rewards.size(), game.NumPlayers()); + SPIEL_CHECK_EQ(returns.size(), game.NumPlayers()); + for (auto p = Player{0}; p < game.NumPlayers(); ++p) { + episode_returns[p] += rewards[p]; + } + if (verbose) { + std::cout << "Rewards: " << absl::StrJoin(rewards, " ") << std::endl; + std::cout << "Returns: " << absl::StrJoin(returns, " ") << std::endl; + std::cout << "Sum Rewards: " << absl::StrJoin(episode_returns, " ") + << std::endl; + } + for (auto p = Player{0}; p < game.NumPlayers(); ++p) { + SPIEL_CHECK_TRUE(Near(episode_returns[p], returns[p], kRewardEpsilon)); + } + + // Players choose simultaneously. + std::vector joint_action; + + // Sample an action for each player + for (auto p = Player{0}; p < game.NumPlayers(); p++) { + std::vector actions = state->LegalActions(p); + Action action = 0; + if (!actions.empty()) { + if (mask_test) LegalActionsMaskTest(game, *state, p, actions); + std::uniform_int_distribution dis(0, actions.size() - 1); + action = actions[dis(*rng)]; + } + joint_action.push_back(action); + if (p == 0) { + history.emplace_back(state->Clone(), kInvalidHistoryPlayer, action); + } else { + history.emplace_back(nullptr, kInvalidHistoryPlayer, action); + } + if (verbose) { + std::cout << "player " << p << " chose " + << state->ActionToString(p, action) << std::endl; + } + CheckObservables(game, *state, observation.get()); + } + + ApplyActionTestClone(game, state.get(), joint_action); + game_length++; + } else if (state->CurrentPlayer() == open_spiel::kMeanFieldPlayerId) { + auto support = state->DistributionSupport(); + state->UpdateDistribution(RandomDistribution(support.size(), rng)); + } else { + std::vector rewards = state->Rewards(); + std::vector returns = state->Returns(); + SPIEL_CHECK_EQ(rewards.size(), game.NumPlayers()); + SPIEL_CHECK_EQ(returns.size(), game.NumPlayers()); + for (auto p = Player{0}; p < game.NumPlayers(); ++p) { + episode_returns[p] += rewards[p]; + } + if (verbose) { + std::cout << "Rewards: " << absl::StrJoin(rewards, " ") << std::endl; + std::cout << "Returns: " << absl::StrJoin(returns, " ") << std::endl; + std::cout << "Sum Rewards: " << absl::StrJoin(episode_returns, " ") + << std::endl; + } + for (auto p = Player{0}; p < game.NumPlayers(); ++p) { + SPIEL_CHECK_TRUE(Near(episode_returns[p], returns[p], kRewardEpsilon)); + } + + // Decision node. + Player player = state->CurrentPlayer(); + + CheckObservables(game, *state, observation.get()); + + // Sample an action uniformly. + std::vector actions = state->LegalActions(); + if (mask_test) LegalActionsMaskTest(game, *state, state->CurrentPlayer(), + actions); + if (state->IsTerminal()) + SPIEL_CHECK_TRUE(actions.empty()); + else + SPIEL_CHECK_FALSE(actions.empty()); + std::uniform_int_distribution dis(0, actions.size() - 1); + Action action = actions[dis(*rng)]; + + if (verbose) { + std::cout << "chose action: " << action << " (" + << state->ActionToString(player, action) << ")" << std::endl; + } + history.emplace_back(state->Clone(), player, action); + ApplyActionTestClone(game, state.get(), action); + game_length++; + num_moves++; + + if (undo && (history.size() < 10 || IsPowerOfTwo(history.size()))) { + TestUndo(state->Clone(), history); + } + } + + if (verbose) { + std::cout << "State: " << std::endl << state->ToString() << std::endl; + } + } + + state_checker_fn(*state); + SPIEL_CHECK_LE(game_length, game.MaxGameLength()); + + if (verbose) { + std::cout << "Reached a terminal state!" << std::endl; + } + SPIEL_CHECK_EQ(state->CurrentPlayer(), kTerminalPlayerId); + std::vector rewards = state->Rewards(); + if (verbose) { + std::cout << "Rewards: " << absl::StrJoin(rewards, " ") << std::endl; + } + + history.emplace_back(state->Clone(), kTerminalPlayerId, + kInvalidHistoryAction); + TestHistoryContainsActions(game, history); + + // Check the information state of the terminal, too. This is commonly needed, + // for example, as a final observation in an RL environment. + CheckObservables(game, *state, observation.get()); + + // Check that the returns satisfy the constraints based on the game type. + CheckReturnsSum(game, *state); + + // Now, check each individual return is within bounds. + auto returns = state->Returns(); + SPIEL_CHECK_EQ(returns.size(), game.NumPlayers()); + for (Player player = 0; player < game.NumPlayers(); player++) { + double final_return = returns[player]; + SPIEL_CHECK_FLOAT_EQ(final_return, state->PlayerReturn(player)); + SPIEL_CHECK_GE(final_return, game.MinUtility()); + SPIEL_CHECK_LE(final_return, game.MaxUtility()); + if (verbose) { + std::cout << "Final return to player " << player << " is " << final_return + << std::endl; + } + episode_returns[player] += rewards[player]; + SPIEL_CHECK_TRUE(Near(episode_returns[player], final_return)); + } +} + +// Perform sims random simulations of the specified game. +void RandomSimTest(const Game& game, int num_sims, bool serialize, bool verbose, + bool mask_test, + const std::function& state_checker_fn, + int mean_field_population, + std::shared_ptr observer) { + std::mt19937 rng; + if (verbose) { + std::cout << "\nRandomSimTest, game = " << game.GetType().short_name + << ", num_sims = " << num_sims << std::endl; + } + for (int sim = 0; sim < num_sims; ++sim) { + RandomSimulation(&rng, game, /*undo=*/false, /*serialize=*/serialize, + verbose, mask_test, observer, state_checker_fn, + mean_field_population); + } +} + +void RandomSimTestWithUndo(const Game& game, int num_sims) { + std::mt19937 rng; + std::cout << "RandomSimTestWithUndo, game = " << game.GetType().short_name + << ", num_sims = " << num_sims << std::endl; + for (int sim = 0; sim < num_sims; ++sim) { + RandomSimulation(&rng, game, /*undo=*/true, /*serialize=*/true, + /*verbose=*/true, /*mask_test=*/true, nullptr, + &DefaultStateChecker); + } +} + +void RandomSimTestNoSerialize(const Game& game, int num_sims) { + std::mt19937 rng; + std::cout << "RandomSimTestNoSerialize, game = " << game.GetType().short_name + << ", num_sims = " << num_sims << std::endl; + for (int sim = 0; sim < num_sims; ++sim) { + RandomSimulation(&rng, game, /*undo=*/false, /*serialize=*/false, + /*verbose=*/true, /*mask_test=*/true, nullptr, + &DefaultStateChecker); + } +} + +void RandomSimTestCustomObserver(const Game& game, + const std::shared_ptr observer) { + std::mt19937 rng; + RandomSimulation(&rng, game, /*undo=*/false, /*serialize=*/false, + /*verbose=*/false, /*mask_test=*/true, observer, + &DefaultStateChecker); +} + +// Format chance outcomes as a string, for error messages. +std::string ChanceOutcomeStr(const ActionsAndProbs& chance_outcomes) { + std::string str; + for (auto outcome : chance_outcomes) { + if (!str.empty()) str.append(", "); + absl::StrAppend(&str, "(", outcome.first, ", ", outcome.second, ")"); + } + return str; +} + +// Check chance outcomes in a state and all child states. +// We check that: +// - That LegalActions(kChancePlayerId) (which often defaults to the actions in +// ChanceOutcomes) and LegalActions() return the same result. +// - All the chance outcome actions are legal actions +// - All the chance outcome actions are different from each other. +// - That the probabilities are within [0, 1] and sum to 1. +void CheckChanceOutcomes(const State& state) { + if (state.IsTerminal()) return; + if (state.IsChanceNode()) { + auto legal_actions = state.LegalActions(kChancePlayerId); + auto default_legal_actions = state.LegalActions(); + if (legal_actions != default_legal_actions) { + SpielFatalError(absl::StrCat( + "Legalactions() and LegalActions(kChancePlayerId) do not give the " + "same result:", + "\nLegalActions(): ", + absl::StrJoin(default_legal_actions, ", "), + "\nLegalActions(kChancePlayerId): ", + absl::StrJoin(legal_actions, ", "))); + } + absl::btree_set legal_action_set(legal_actions.begin(), + legal_actions.end()); + auto chance_outcomes = state.ChanceOutcomes(); + + std::vector chance_outcome_actions; + double sum = 0; + for (const auto& [action, prob] : chance_outcomes) { + chance_outcome_actions.push_back(action); + if (legal_action_set.count(action) == 0) { + SpielFatalError(absl::StrCat("LegalActions()=[", + absl::StrJoin(legal_actions, ", "), + "] inconsistent with ChanceOutcomes()=", + ChanceOutcomeStr(chance_outcomes), ".")); + } + if (prob <= 0. || prob > 1) { + SpielFatalError(absl::StrCat( + "Invalid probability for outcome: P(", action, ")=", prob, + "; all outcomes=", ChanceOutcomeStr(chance_outcomes))); + } + sum += prob; + } + absl::btree_set chance_outcome_actions_set( + chance_outcome_actions.begin(), chance_outcome_actions.end()); + if (chance_outcome_actions.size() != chance_outcome_actions_set.size()) { + std::sort(chance_outcome_actions.begin(), chance_outcome_actions.end()); + SpielFatalError(absl::StrCat( + "There are some duplicate actions in ChanceOutcomes\n. There are: ", + chance_outcome_actions_set.size(), " unique legal actions over ", + chance_outcome_actions.size(), + " chance outcome actions.\n Sorted legal actions:\n", + absl::StrJoin(chance_outcome_actions, ", "))); + } + constexpr double eps = 1e-5; + if (sum < 1 - eps || sum > 1 + eps) { + SpielFatalError( + absl::StrCat("Invalid probabilities; sum=", sum, + "; all outcomes=", ChanceOutcomeStr(chance_outcomes))); + } + } + // Handles chance nodes, player nodes, including simultaneous nodes if + // supported. + for (auto action : state.LegalActions()) { + auto next_state = state.Child(action); + CheckChanceOutcomes(*next_state); + } +} + +void CheckChanceOutcomes(const Game& game) { + CheckChanceOutcomes(*game.NewInitialState()); +} + +// Verifies that ResampleFromInfostate is correctly implemented. +void ResampleInfostateTest(const Game& game, int num_sims) { + std::mt19937 rng; + UniformProbabilitySampler sampler; + for (int i = 0; i < num_sims; ++i) { + std::unique_ptr state = game.NewInitialState(); + while (!state->IsTerminal()) { + if (!state->IsChanceNode()) { + for (int p = 0; p < state->NumPlayers(); ++p) { + std::unique_ptr other_state = + state->ResampleFromInfostate(p, sampler); + SPIEL_CHECK_EQ(state->InformationStateString(p), + other_state->InformationStateString(p)); + SPIEL_CHECK_EQ(state->InformationStateTensor(p), + other_state->InformationStateTensor(p)); + SPIEL_CHECK_EQ(state->CurrentPlayer(), other_state->CurrentPlayer()); + } + } + std::vector actions = state->LegalActions(); + std::uniform_int_distribution dis(0, actions.size() - 1); + Action action = actions[dis(rng)]; + state->ApplyAction(action); + } + } +} + +void TestPoliciesCanPlay(TabularPolicyGenerator policy_generator, + const Game& game, int numSims) { + TabularPolicy policy = policy_generator(game); + std::mt19937 rng(0); + for (int i = 0; i < numSims; ++i) { + std::unique_ptr state = game.NewInitialState(); + while (!state->IsTerminal()) { + ActionsAndProbs outcomes; + if (state->IsChanceNode()) { + outcomes = state->ChanceOutcomes(); + } else { + outcomes = policy.GetStatePolicy(state->InformationStateString()); + } + state->ApplyAction(open_spiel::SampleAction(outcomes, rng).first); + } + } +} + +void TestPoliciesCanPlay(const Policy& policy, const Game& game, int numSims) { + std::mt19937 rng(0); + for (int i = 0; i < numSims; ++i) { + std::unique_ptr state = game.NewInitialState(); + while (!state->IsTerminal()) { + ActionsAndProbs outcomes; + if (state->IsChanceNode()) { + outcomes = state->ChanceOutcomes(); + } else { + outcomes = policy.GetStatePolicy(*state); + } + state->ApplyAction(open_spiel::SampleAction(outcomes, rng).first); + } + } +} + +void TestEveryInfostateInPolicy(TabularPolicyGenerator policy_generator, + const Game& game) { + TabularPolicy policy = policy_generator(game); + std::vector> to_visit; + to_visit.push_back(game.NewInitialState()); + while (!to_visit.empty()) { + std::unique_ptr state = std::move(to_visit.back()); + to_visit.pop_back(); + for (Action action : state->LegalActions()) { + to_visit.push_back(state->Child(action)); + } + if (!state->IsChanceNode() && !state->IsTerminal()) { + SPIEL_CHECK_EQ( + policy.GetStatePolicy(state->InformationStateString()).size(), + state->LegalActions().size()); + } + } +} + +void CheckLegalActionsAreSorted(const Game& game, State& state) { + if (state.IsChanceNode()) return; + for (int player = 0; player < game.NumPlayers(); ++player) { + auto actions = state.LegalActions(player); + for (int i = 1; i < actions.size(); ++i) { + SPIEL_CHECK_LT(actions[i - 1], actions[i]); + } + } +} + +} // namespace testing +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/tests/basic_tests.h b/scenarios/bargaining/open_spiel/open_spiel/tests/basic_tests.h new file mode 100644 index 0000000..6ada88a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/tests/basic_tests.h @@ -0,0 +1,94 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_TESTS_BASIC_TESTS_H_ +#define OPEN_SPIEL_TESTS_BASIC_TESTS_H_ + +#include +#include + +#include "open_spiel/game_parameters.h" +#include "open_spiel/policy.h" +#include "open_spiel/spiel.h" + +namespace open_spiel { +namespace testing { + +constexpr int kDefaultNumSimsForPolicyTests = 10; + +// Default state checker function (does nothing). +void DefaultStateChecker(const State& state); + +// Checks that the game can be loaded. +void LoadGameTest(const std::string& game_name); + +// Test to ensure that there are chance outcomes. +void ChanceOutcomesTest(const Game& game); + +// Test to ensure that there are no chance outcomes. +void NoChanceOutcomesTest(const Game& game); + +// Perform num_sims random simulations of the specified game. The optional +// state_checker_fn is called at every state (including chance nodes and +// terminals), and is intended to be an easy way to pass context-specific +// testing functions to the simulation tests. +void RandomSimTest(const Game& game, int num_sims, bool serialize = true, + bool verbose = true, bool mask_test = true, + const std::function& state_checker_fn = + &DefaultStateChecker, + int mean_field_population = -1, + std::shared_ptr observer = nullptr); + +// Perform num_sims random simulations of the specified game. Also tests the +// Undo function. Note: for every step in the simulation, the entire simulation +// up to that point is rolled backward all the way to the beginning via undo, +// checking that the states match the ones along the history. Therefore, this +// is very slow! Please use sparingly. +void RandomSimTestWithUndo(const Game& game, int num_sims); + +// Check that chance outcomes are valid and consistent. +// Performs an exhaustive search of the game tree, so should only be +// used for smallish games. +void CheckChanceOutcomes(const Game& game); + +// Same as above but without checking the serialization functions. Every game +// should support serialization: only use this function when developing a new +// game, in order to test the implementation using the basic tests before having +// to implement the custom serialization (only useful for games that have chance +// mode kSampledStochastic). +void RandomSimTestNoSerialize(const Game& game, int num_sims); + +void RandomSimTestCustomObserver(const Game& game, + const std::shared_ptr observer); +// Verifies that ResampleFromInfostate is correctly implemented. +void ResampleInfostateTest(const Game& game, int num_sims); + +using TabularPolicyGenerator = std::function; + +void TestPoliciesCanPlay( + TabularPolicyGenerator policy_generator, const Game& game, + int numSims = kDefaultNumSimsForPolicyTests); +void TestPoliciesCanPlay( + const Policy& policy, const Game& game, + int numSims = kDefaultNumSimsForPolicyTests); +void TestEveryInfostateInPolicy(TabularPolicyGenerator policy_generator, + const Game& game); + +// Checks that the legal actions list is sorted. +void CheckLegalActionsAreSorted(const Game& game, State& state); + +} // namespace testing +} // namespace open_spiel + +#endif // OPEN_SPIEL_TESTS_BASIC_TESTS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/tests/console_play_test.cc b/scenarios/bargaining/open_spiel/open_spiel/tests/console_play_test.cc new file mode 100644 index 0000000..a23ed09 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/tests/console_play_test.cc @@ -0,0 +1,167 @@ +// Copyright 2023 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/tests/console_play_test.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/abseil-cpp/absl/strings/ascii.h" +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace testing { + +namespace { +void PrintHelpMenu() { + std::cout << "Extra commands: " << std::endl; + std::cout << " #b: Back one move" << std::endl; + std::cout << " #h: Print the history" << std::endl; + std::cout << " #l: List legal actions" << std::endl; + std::cout << " #q: Quit" << std::endl; + std::cout << std::endl; +} + +void PrintLegals(const std::vector& legal_actions, const State* state) { + std::cout << "Legal actions: " << std::endl; + for (Action action : legal_actions) { + std::cout << " " << action << ": " + << state->ActionToString(state->CurrentPlayer(), action) + << std::endl; + } +} + +bool ParseCommand(const std::string& line, const Game& game, const State* state, + const std::vector& legal_actions) { + if (line == "#h") { + std::cout << "History: " << absl::StrJoin(state->History(), ", ") + << std::endl; + return true; + } else if (line == "#l") { + PrintLegals(legal_actions, state); + return true; + } else { + return false; + } +} + +} // namespace + +void ConsolePlayTest( + const Game& game, const State* start_state, + const std::vector* start_history, + const std::unordered_map>* bots) { + // Sampled stochastic and simultaneous move games are not yet supported. + GameType type = game.GetType(); + SPIEL_CHECK_NE(type.chance_mode, GameType::ChanceMode::kSampledStochastic); + SPIEL_CHECK_NE(type.dynamics, GameType::Dynamics::kSimultaneous); + + std::unique_ptr state; + if (start_state != nullptr) { + state = start_state->Clone(); + } else { + state = game.NewInitialState(); + if (start_history != nullptr) { + for (Action action : *start_history) { + state->ApplyAction(action); + } + } + } + + bool applied_action = true; + std::unique_ptr new_state; + + while (true) { + if (applied_action) { + std::cout << state->ToString() << std::endl << std::endl; + } + applied_action = false; + Player player = state->CurrentPlayer(); + std::vector legal_actions = state->LegalActions(); + + if (state->IsTerminal()) { + std::cout << "Warning! State is terminal. Returns: "; + for (Player p = 0; p < game.NumPlayers(); ++p) { + std::cout << state->PlayerReturn(p) << " "; + } + std::cout << std::endl; + } + + if (bots != nullptr && bots->at(player) != nullptr) { + Action action = bots->at(player)->Step(*state); + std::cout << "Bot chose action: " << state->ActionToString(player, action) + << std::endl; + state->ApplyAction(action); + applied_action = true; + } else { + std::cout << "[Enter move, or press enter for help menu]> "; + std::string line = ""; + std::getline(std::cin, line); + absl::StripAsciiWhitespace(&line); + if (line.empty()) { + PrintHelpMenu(); + } else if (line == "#b") { + Action last_action = state->History().back(); + new_state = game.NewInitialState(); + std::vector history = state->History(); + for (int i = 0; i < history.size() - 1; ++i) { + new_state->ApplyAction(history[i]); + } + state = std::move(new_state); + std::cout << "Popped action: " << last_action << std::endl; + applied_action = true; + } else if (line == "#q") { + return; + } else if (ParseCommand(line, game, state.get(), legal_actions)) { + // Do nothing, was already handled. + } else { + Action action; + bool valid_integer = absl::SimpleAtoi(line, &action); + if (valid_integer) { + auto iter = absl::c_find(legal_actions, action); + SPIEL_CHECK_TRUE(iter != legal_actions.end()); + state->ApplyAction(action); + applied_action = true; + } else { + // Search for the move string. + for (Action action : legal_actions) { + if (line == state->ActionToString(player, action)) { + state->ApplyAction(action); + applied_action = true; + break; + } + } + } + } + } + } + + std::cout << "Terminal state:" << std::endl + << std::endl + << state->ToString() << std::endl; + std::cout << "Returns: "; + std::vector returns = state->Returns(); + for (Player p = 0; p < game.NumPlayers(); ++p) { + std::cout << returns[p] << " "; + } + std::cout << std::endl; +} + +} // namespace testing +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/tests/console_play_test.h b/scenarios/bargaining/open_spiel/open_spiel/tests/console_play_test.h new file mode 100644 index 0000000..2d60952 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/tests/console_play_test.h @@ -0,0 +1,45 @@ +// Copyright 2023 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_TESTS_CONSOLE_PLAY_TEST_H_ +#define OPEN_SPIEL_TESTS_CONSOLE_PLAY_TEST_H_ + +#include +#include + +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_bots.h" + +namespace open_spiel { +namespace testing { + +// Play the game via the console to test its functionality. +// +// If a start_state or start_history is passed, the game starts from the +// specified state or history. If both remain null, the game starts from the +// initial state. +// +// Bots can be specified by passing in a map to a bot per with the player id +// as the key. If the bots map remains null, then there are no bots and play +// is entirely guided by the console. +void ConsolePlayTest( + const Game& game, const State* start_state = nullptr, + const std::vector* start_history = nullptr, + const std::unordered_map>* bots = nullptr); + +} // namespace testing +} // namespace open_spiel + +#endif // THIRD_PARTY_OPEN_SPIEL_TESTS_CONSOLE_PLAY_TEST_H_ + diff --git a/scenarios/bargaining/open_spiel/open_spiel/tests/shared_lib_test.cc b/scenarios/bargaining/open_spiel/open_spiel/tests/shared_lib_test.cc new file mode 100644 index 0000000..da003a4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/tests/shared_lib_test.cc @@ -0,0 +1,56 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file tests whether we can build a shared library that contains all +// the optional dependencies. + +#include + +#include "open_spiel/games/kuhn_poker/kuhn_poker.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" + +#if OPEN_SPIEL_BUILD_WITH_ORTOOLS +#include "open_spiel/algorithms/ortools/lp_solver.h" +#include "open_spiel/algorithms/matrix_game_utils.h" +#endif // OPEN_SPIEL_BUILD_WITH_ORTOOLS + +namespace { + +void TestLinkingWithOpenSpielCore() { + std::cout << "Running open_spiel_core" << '\n'; + std::shared_ptr game = + open_spiel::LoadGame("kuhn_poker"); + SPIEL_CHECK_EQ(game->GetType().short_name, "kuhn_poker"); +} + +#if OPEN_SPIEL_BUILD_WITH_ORTOOLS +void TestLinkingWithOpenSpielOrtools() { + std::cout << "Running open_spiel_ortools" << '\n'; + std::shared_ptr game = + open_spiel::algorithms::LoadMatrixGame("matrix_rps"); + open_spiel::algorithms::ortools::ZeroSumGameSolution solution = + open_spiel::algorithms::ortools::SolveZeroSumMatrixGame(*game); + SPIEL_CHECK_FLOAT_NEAR(solution.values[0], 0., 1e-10); +} +#endif // OPEN_SPIEL_BUILD_WITH_ORTOOLS + +} // namespace + +int main() { + TestLinkingWithOpenSpielCore(); +#if OPEN_SPIEL_BUILD_WITH_ORTOOLS + TestLinkingWithOpenSpielOrtools(); +#endif +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/tests/spiel_test.cc b/scenarios/bargaining/open_spiel/open_spiel/tests/spiel_test.cc new file mode 100644 index 0000000..567fafc --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/tests/spiel_test.cc @@ -0,0 +1,375 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/spiel.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/policy.h" +#include "open_spiel/simultaneous_move_game.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/tests/basic_tests.h" + +namespace open_spiel { +namespace testing { +namespace { + +void GeneralTests() { + // Number of supported games should be > 0. + std::vector game_names = RegisteredGames(); + SPIEL_CHECK_GT(game_names.size(), 0); +} + +void KuhnTests() { + // Default params (2 players) + RandomSimTest(*LoadGame("kuhn_poker"), /*num_sims=*/100); + + // More than two players. + for (Player players = 3; players <= 5; players++) { + RandomSimTest( + *LoadGame("kuhn_poker", {{"players", GameParameter(players)}}), + /*num_sims=*/100); + } +} + +void GameEqualityTests() { + // 2 players is the default in kuhn poker. + SPIEL_CHECK_TRUE( + *LoadGame("kuhn_poker") == *LoadGame("kuhn_poker(players=2)")); + SPIEL_CHECK_FALSE( + *LoadGame("kuhn_poker") == *LoadGame("kuhn_poker(players=3)")); +} + +void TicTacToeTests() { + auto tic_tac_toe = LoadGame("tic_tac_toe"); + NoChanceOutcomesTest(*tic_tac_toe); + RandomSimTest(*tic_tac_toe, /*num_sims=*/100); +} + +// Dummy game to test flat joint action logic. +class FlatJointActionTestGame : public SimMoveGame { + public: + explicit FlatJointActionTestGame(const GameParameters& params) + : SimMoveGame(GameType{}, params) {} + int NumDistinctActions() const override { return 8; } + std::unique_ptr NewInitialState() const override { return nullptr; } + int MaxChanceOutcomes() const override { return 4; } + int NumPlayers() const override { return 3; } + double MinUtility() const override { return -10; } + double MaxUtility() const override { return 10; } + std::vector InformationStateTensorShape() const override { return {}; } + int MaxGameLength() const override { return 1; } + int MaxChanceNodesInHistory() const override { return 0; } +}; + +// Dummy state to test flat joint action logic. +class FlatJointActionTestState : public SimMoveState { + public: + FlatJointActionTestState() + : SimMoveState(std::shared_ptr( + new FlatJointActionTestGame({}))) {} + const std::vector& JointAction() const { return joint_action_; } + std::vector LegalActions(Player player) const override { + if (player == kSimultaneousPlayerId) return LegalFlatJointActions(); + switch (player) { + case 0: + return {2, 4, 6}; + case 1: + return {1, 3, 5}; + case 2: + return {0, 100}; + } + SpielFatalError("Invalid player id"); + } + Player CurrentPlayer() const override { return kSimultaneousPlayerId; } + std::string ActionToString(Player player, Action action_id) const override { + if (player == kSimultaneousPlayerId) + return FlatJointActionToString(action_id); + return absl::StrCat("(p=", player, ",a=", action_id, ")"); + } + std::string ToString() const override { return ""; } + bool IsTerminal() const override { return false; } + std::vector Returns() const override { return {}; } + std::unique_ptr Clone() const override { return nullptr; } + + protected: + void DoApplyActions(const std::vector& actions) override { + joint_action_ = actions; + } + + protected: + std::vector joint_action_; +}; + +void FlatJointactionTest() { + FlatJointActionTestState state; + auto legal_flat_joint_actions = state.LegalActions(kSimultaneousPlayerId); + SPIEL_CHECK_EQ(legal_flat_joint_actions.size(), 18); + for (int i = 0; i < 18; ++i) { + std::cerr << "Joint action " << i << " expands to " + << state.ActionToString(kSimultaneousPlayerId, i) << std ::endl; + } + // Last-but-one joint action --> last action for everyone except p0 (which + // takes its last-but-one action). + SPIEL_CHECK_EQ(state.ActionToString(kSimultaneousPlayerId, 16), + "[(p=0,a=4), (p=1,a=5), (p=2,a=100)]"); + state.ApplyAction(16); + std::vector expected_joint_action{4, 5, 100}; + SPIEL_CHECK_EQ(state.JointAction(), expected_joint_action); +} + +using PolicyGenerator = std::function; + +void PolicyTest() { + auto random_policy_default_seed = [](const Game& game) { + return GetRandomPolicy(game); + }; + auto flat_dirichlet_policy_default_seed = [](const Game& game) { + return GetFlatDirichletPolicy(game); + }; + std::vector policy_generators = { + GetUniformPolicy, random_policy_default_seed, GetFirstActionPolicy, + flat_dirichlet_policy_default_seed, + }; + + // For some reason, this can't seem to be brace-initialized, so instead we use + // push_back. + std::unique_ptr uniform_policy = std::make_unique(); + for (const std::string& game_name : + {"leduc_poker", "kuhn_poker", "liars_dice"}) { + std::shared_ptr game = LoadGame(game_name); + for (const auto& policy_generator : policy_generators) { + TestEveryInfostateInPolicy(policy_generator, *game); + TestPoliciesCanPlay(policy_generator, *game); + } + TestPoliciesCanPlay(*uniform_policy, *game); + } +} + +void LeducPokerDeserializeTest() { + // Example Leduc state: player 1 gets the 0th card, player 2 gets the 3rd card + // and the first two actions are: check, check. + std::string serialized_game_and_state = + "# Automatically generated by OpenSpiel SerializeGameAndState\n" + "[Meta]\n" + "Version: 1\n" + "\n" + "[Game]\n" + "leduc_poker()\n" + "[State]\n" + "0\n" // first chance event (deal to first player) + "3\n" // second chance event (deal to second player) + "1\n" // check + "1\n" // check + "\n"; + + std::pair, std::unique_ptr> + game_and_state = + open_spiel::DeserializeGameAndState(serialized_game_and_state); + + // Should be at round 2 deal (chance node). + SPIEL_CHECK_TRUE(game_and_state.second->IsChanceNode()); + + // Check that the game got deserialized properly. + SPIEL_CHECK_EQ(game_and_state.first->ToString(), + LoadGame("leduc_poker")->ToString()); + + // And now check that serializing this game and state gives the same string as + // above. + SPIEL_CHECK_EQ( + SerializeGameAndState(*game_and_state.first, *game_and_state.second), + serialized_game_and_state); +} + +void GameParametersTest() { + // Basic types + SPIEL_CHECK_TRUE(GameParameter(1).has_int_value()); + SPIEL_CHECK_TRUE(GameParameter(1.0).has_double_value()); + SPIEL_CHECK_TRUE(GameParameter(true).has_bool_value()); + SPIEL_CHECK_TRUE(GameParameter(std::string("1")).has_string_value()); + SPIEL_CHECK_TRUE(GameParameter("1").has_string_value()); // See issue #380. + + // Writing to string + SPIEL_CHECK_EQ(GameParameter("1").ToString(), "1"); + SPIEL_CHECK_EQ(GameParameter(1).ToString(), "1"); + // -- Currently we serialize doubles with 10 digits after the point. + SPIEL_CHECK_EQ(GameParameter(1.0).ToString(), "1.0"); + SPIEL_CHECK_EQ(GameParameter(1.).ToString(), "1.0"); + SPIEL_CHECK_EQ(GameParameter(1.5).ToString(), "1.5"); + SPIEL_CHECK_EQ(GameParameter(001.0485760000).ToString(), "1.048576"); + SPIEL_CHECK_EQ(GameParameter(1e-9).ToString(), "0.000000001"); + + // Parsing from string + // + // XXX: Game parameter parsing from string is a bit quirky at the + // moment. For example, the strings "+" or "-" make the parser + // throw since the parses eagerly tries to parse those as integers and + // passes them to std::stoi. + // + // Similarly, "." would be parsed using std::stod with a similar outcome. + // + // Doubles must contain a point . inside, or they would be parsed as + // integer, and exponential notation is not allowed for now. + // + // Leading or trailing whitespace is not stripped before parsing, so " 1" + // would be parsed as a string instead of an integer. + // + // See also: #382. + // + // + // The next few tests are not always intended to check the long term desired + // behavior, but rather that no accidental regression is introduced in the + // current behavior. + + // -- Quirks + // TODO: find a way to test the failures. These four fail (on purpose). + // GameParameterFromString("+"); + // GameParameterFromString("---"); + // GameParameterFromString("."); + // GameParameterFromString("..."); + SPIEL_CHECK_TRUE(GameParameterFromString("1.2e-1").has_string_value()); + + // -- Whitespace related + SPIEL_CHECK_TRUE(GameParameterFromString(" 1").has_string_value()); + SPIEL_CHECK_TRUE(GameParameterFromString("1 ").has_string_value()); + + // -- Intended behavior + SPIEL_CHECK_TRUE(GameParameterFromString("true").has_bool_value()); + SPIEL_CHECK_TRUE(GameParameterFromString("True").has_bool_value()); + SPIEL_CHECK_TRUE(GameParameterFromString("false").has_bool_value()); + SPIEL_CHECK_TRUE(GameParameterFromString("False").has_bool_value()); + SPIEL_CHECK_TRUE(GameParameterFromString("1").has_int_value()); + SPIEL_CHECK_TRUE(GameParameterFromString("1.0").has_double_value()); + SPIEL_CHECK_TRUE(GameParameterFromString("1. 0").has_string_value()); + + // Tests for GameParametersFromString + // Empty string + auto params = GameParametersFromString(""); + SPIEL_CHECK_TRUE(params.empty()); + + // Bare name + params = GameParametersFromString("game_one"); + SPIEL_CHECK_EQ(params.size(), 1); + SPIEL_CHECK_EQ(params["name"].string_value(), "game_one"); + + // Name with empty list + params = GameParametersFromString("game_two()"); + SPIEL_CHECK_EQ(params.size(), 1); + SPIEL_CHECK_EQ(params["name"].string_value(), "game_two"); + + // Single string parameter + params = GameParametersFromString("game_three(foo=bar)"); + SPIEL_CHECK_EQ(params.size(), 2); + SPIEL_CHECK_EQ(params["name"].string_value(), "game_three"); + SPIEL_CHECK_EQ(params["foo"].string_value(), "bar"); + + // Every type of parameter + params = GameParametersFromString( + "game_four(str=strval,int=42,float=-1.2,game1=nested()," + "game2=nested2(param=val),bool1=True,bool2=False)"); + SPIEL_CHECK_EQ(params.size(), 8); + SPIEL_CHECK_EQ(params["name"].string_value(), "game_four"); + SPIEL_CHECK_EQ(params["str"].string_value(), "strval"); + SPIEL_CHECK_EQ(params["int"].int_value(), 42); + SPIEL_CHECK_EQ(params["float"].double_value(), -1.2); + SPIEL_CHECK_EQ(params["bool1"].bool_value(), true); + SPIEL_CHECK_EQ(params["bool2"].bool_value(), false); + + auto game1 = params["game1"].game_value(); + SPIEL_CHECK_EQ(game1.size(), 1); + SPIEL_CHECK_EQ(game1["name"].string_value(), "nested"); + + auto game2 = params["game2"].game_value(); + SPIEL_CHECK_EQ(game2.size(), 2); + SPIEL_CHECK_EQ(game2["name"].string_value(), "nested2"); + SPIEL_CHECK_EQ(game2["param"].string_value(), "val"); +} + +void PolicySerializationTest() { + // Check empty tabular policy + auto policy = std::make_unique(); + std::shared_ptr deserialized_policy = + DeserializePolicy(policy->Serialize()); + auto deserialized = + std::static_pointer_cast(deserialized_policy); + SPIEL_CHECK_EQ(policy->PolicyTable().size(), 0); + SPIEL_CHECK_EQ(deserialized->PolicyTable().size(), 0); + + // Check non-empty tabular policy + auto game = LoadGame("tic_tac_toe"); + policy = std::make_unique(*game); + deserialized_policy = DeserializePolicy(policy->Serialize(6)); + deserialized = std::static_pointer_cast(deserialized_policy); + SPIEL_CHECK_EQ(policy->PolicyTable().size(), + deserialized->PolicyTable().size()); + for (const auto& [info_state, policy] : policy->PolicyTable()) { + for (int i = 0; i < policy.size(); i++) { + auto original_val = policy.at(i); + auto deserialized_val = deserialized->PolicyTable().at(info_state).at(i); + SPIEL_CHECK_EQ(original_val.first, deserialized_val.first); + SPIEL_CHECK_FLOAT_NEAR(original_val.second, deserialized_val.second, + 1e-6); + } + } + + // Check uniform policy + DeserializePolicy(std::make_unique()->Serialize()); +} + +void ConcreteGamesTest() { + // Note: not intended to be an exhaustive list. + std::vector non_concrete_game_names = { + "add_noise", "cached_tree", + "coop_to_1p", "efg_game", + "misere", "normal_form_extensive_game", + "repeated_game", "restricted_nash_response", + "start_at", "turn_based_simultaneous_game", + "zero_sum"}; + std::vector concrete_game_types = + GameRegisterer::RegisteredConcreteGames(); + SPIEL_CHECK_GT(concrete_game_types.size(), 0); + for (const auto& game_type : concrete_game_types) { + std::cout << "Loading game: " << game_type.short_name << std::endl; + SPIEL_CHECK_TRUE(game_type.is_concrete); + auto iter = std::find(non_concrete_game_names.begin(), + non_concrete_game_names.end(), game_type.short_name); + SPIEL_CHECK_TRUE(iter == non_concrete_game_names.end()); + } +} + +} // namespace +} // namespace testing +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::testing::GeneralTests(); + open_spiel::testing::KuhnTests(); + open_spiel::testing::GameEqualityTests(); + open_spiel::testing::TicTacToeTests(); + open_spiel::testing::FlatJointactionTest(); + open_spiel::testing::PolicyTest(); + open_spiel::testing::LeducPokerDeserializeTest(); + open_spiel::testing::GameParametersTest(); + open_spiel::testing::PolicySerializationTest(); + open_spiel::testing::ConcreteGamesTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/CMakeLists.txt b/scenarios/bargaining/open_spiel/open_spiel/utils/CMakeLists.txt new file mode 100644 index 0000000..1a2d8b4 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/CMakeLists.txt @@ -0,0 +1,87 @@ +add_library (utils OBJECT + circular_buffer.h + combinatorics.h + combinatorics.cc + data_logger.h + data_logger.cc + file.h + file.cc + functional.h + init.h + init.cc + json.h + json.cc + logger.h + lru_cache.h + random.h + random.cc + serializable_circular_buffer.h + serialization.h + stats.h + tensor_view.h + thread.h + thread.cc + threaded_queue.h +) +target_include_directories (utils PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +add_executable(circular_buffer_test circular_buffer_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(circular_buffer_test circular_buffer_test) + +add_executable(combinatorics_test combinatorics_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(combinatorics_test combinatorics_test) + +add_executable(data_logger_test data_logger_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(data_logger_test data_logger_test) + +add_executable(file_test file_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(file_test file_test) + +add_executable(functional_test functional_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(functional_test functional_test) + +add_executable(json_test json_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(json_test json_test) + +add_executable(logger_test logger_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(logger_test logger_test) + +add_executable(lru_cache_test lru_cache_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(lru_cache_test lru_cache_test) + +add_executable(random_test random_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(random_test random_test) + +if (OPEN_SPIEL_BUILD_WITH_LIBNOP) + add_executable(serializable_circular_buffer_test + serializable_circular_buffer_test.cc ${OPEN_SPIEL_OBJECTS} + $) + add_test(serializable_circular_buffer_test serializable_circular_buffer_test) +endif() + +add_executable(stats_test stats_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(stats_test stats_test) + +add_executable(tensor_view_test tensor_view_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(tensor_view_test tensor_view_test) + +# Failing on Ubuntu 18.04 since upgrade of abseil version (2021-05-17). +# Disabling while we look into it. +# add_executable(thread_test thread_test.cc ${OPEN_SPIEL_OBJECTS} +# $) +# add_test(thread_test thread_test) + +add_executable(threaded_queue_test threaded_queue_test.cc ${OPEN_SPIEL_OBJECTS} + $) +add_test(threaded_queue_test threaded_queue_test) diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/circular_buffer.h b/scenarios/bargaining/open_spiel/open_spiel/utils/circular_buffer.h new file mode 100644 index 0000000..b63fbf8 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/circular_buffer.h @@ -0,0 +1,73 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_CIRCULAR_BUFFER_H_ +#define OPEN_SPIEL_UTILS_CIRCULAR_BUFFER_H_ + +#include +#include +#include +#include + +namespace open_spiel { + +// A simple circular buffer of fixed size. +template +class CircularBuffer { + public: + explicit CircularBuffer(int max_size) + : max_size_(max_size), total_added_(0) {} + + // Add one element, replacing the oldest once it's full. + void Add(const T& value) { + if (data_.size() < max_size_) { + data_.push_back(value); + } else { + data_[total_added_ % max_size_] = value; + } + total_added_ += 1; + } + + // Return `num` elements without replacement. + std::vector Sample(std::mt19937* rng, int num) { + std::vector out; + out.reserve(num); + std::sample(data_.begin(), data_.end(), std::back_inserter(out), num, *rng); + return out; + } + + // Return the full buffer. + const std::vector& Data() const { return data_; } + + // Access a single element from the buffer. + const T& operator[](int i) const { return data_[i]; } + + // How many elements are in the buffer. + int Size() const { return data_.size(); } + + // Is the buffer empty? + bool Empty() const { return data_.empty(); } + + // How many elements have ever been added to the buffer. + int64_t TotalAdded() const { return total_added_; } + + protected: + const int max_size_; + int64_t total_added_; + std::vector data_; +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_UTILS_CIRCULAR_BUFFER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/circular_buffer_test.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/circular_buffer_test.cc new file mode 100644 index 0000000..43a27a6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/circular_buffer_test.cc @@ -0,0 +1,71 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/circular_buffer.h" + +#include + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { + +void TestCircularBuffer() { + CircularBuffer buffer(4); + std::mt19937 rng; + std::vector sample; + + SPIEL_CHECK_TRUE(buffer.Empty()); + SPIEL_CHECK_EQ(buffer.Size(), 0); + + buffer.Add(13); + SPIEL_CHECK_FALSE(buffer.Empty()); + SPIEL_CHECK_EQ(buffer.Size(), 1); + SPIEL_CHECK_EQ(buffer.TotalAdded(), 1); + SPIEL_CHECK_EQ(buffer[0], 13); + + sample = buffer.Sample(&rng, 1); + SPIEL_CHECK_EQ(sample.size(), 1); + SPIEL_CHECK_EQ(sample[0], 13); + + buffer.Add(14); + buffer.Add(15); + buffer.Add(16); + + SPIEL_CHECK_EQ(buffer.Size(), 4); + SPIEL_CHECK_EQ(buffer.TotalAdded(), 4); + + sample = buffer.Sample(&rng, 2); + SPIEL_CHECK_EQ(sample.size(), 2); + SPIEL_CHECK_GE(sample[0], 13); + SPIEL_CHECK_LE(sample[0], 16); + SPIEL_CHECK_GE(sample[1], 13); + SPIEL_CHECK_LE(sample[1], 16); + + buffer.Add(17); + buffer.Add(18); + + SPIEL_CHECK_EQ(buffer.Size(), 4); + SPIEL_CHECK_EQ(buffer.TotalAdded(), 6); + + sample = buffer.Sample(&rng, 1); + SPIEL_CHECK_EQ(sample.size(), 1); + SPIEL_CHECK_GE(sample[0], 15); + SPIEL_CHECK_LE(sample[0], 18); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::TestCircularBuffer(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/combinatorics.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/combinatorics.cc new file mode 100644 index 0000000..c827452 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/combinatorics.cc @@ -0,0 +1,58 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/combinatorics.h" + +#include +#include +#include +#include +#include +#include + +namespace open_spiel { + +bool NextPowerSetMask(std::vector* bs) { + for (std::size_t i = 0; i != bs->size(); ++i) { + (*bs)[i] = !(*bs)[i]; + if ((*bs)[i]) { + return true; + } + } + return false; // overflow +} + +std::vector> GenerateMasks( + std::vector& values, int k, std::vector& permutation_stack) { + if (k == permutation_stack.size()) { + return {permutation_stack}; + } + + std::vector> vs; + auto end_valid = values.size() - permutation_stack.size(); + permutation_stack.push_back(0); + for (int i = 0; i < end_valid; ++i) { + permutation_stack.back() = values[i]; + std::swap(values[i], values[end_valid - 1]); + auto child_vs = GenerateMasks(values, k, permutation_stack); + vs.insert(vs.begin(), child_vs.begin(), child_vs.end()); + std::swap(values[i], values[end_valid - 1]); + } + permutation_stack.pop_back(); + return vs; +} + +int Factorial(int n) { return n <= 1 ? 1 : n * Factorial(n - 1); } + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/combinatorics.h b/scenarios/bargaining/open_spiel/open_spiel/utils/combinatorics.h new file mode 100644 index 0000000..3440e1a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/combinatorics.h @@ -0,0 +1,165 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_COMBINATORICS_H_ +#define OPEN_SPIEL_UTILS_COMBINATORICS_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "open_spiel/spiel_utils.h" + +// A suite of basic combinatorial operations. + +namespace open_spiel { + +// Return all permutations of a vector. +// This returns n! vectors, where n is the size of the vector. +template +std::vector> Permutations(std::vector v) { + std::vector> vs; + int perm_size = 1; + for (int i = 2; i <= v.size(); ++i) perm_size *= i; + vs.reserve(perm_size); + std::sort(v.begin(), v.end()); + do { + vs.push_back(v); + } while (std::next_permutation(v.begin(), v.end())); + return vs; +} + +// Return all subsets of size k from a vector of size n (all combinations). +// This implements "n choose k" (or also known as binomial coefficient). +// Returns (n k) = n! / ( k! * (n-k)! ) vectors. +template +std::vector> SubsetsOfSize(const std::vector& v, int k) { + SPIEL_CHECK_LE(k, v.size()); + SPIEL_CHECK_GE(k, 0); + std::vector bitset(v.size() - k, 0); + bitset.resize(v.size(), 1); + std::vector> vs; + + do { + std::vector x; + x.reserve(k); + for (std::size_t i = 0; i != v.size(); ++i) { + if (bitset[i]) { + x.push_back(v[i]); + } + } + vs.push_back(x); + } while (std::next_permutation(bitset.begin(), bitset.end())); + + return vs; +} + +bool NextPowerSetMask(std::vector* bs); + +// Return the power set of a vector of size n. +// Returns 2^n vectors. +template +std::vector> PowerSet(const std::vector& v) { + std::vector bitset(v.size()); + std::vector> vs; + do { + std::vector x; + for (std::size_t i = 0; i != v.size(); ++i) { + if (bitset[i]) { + x.push_back(v[i]); + } + } + vs.push_back(x); + } while (NextPowerSetMask(&bitset)); + return vs; +} + +std::vector> GenerateMasks( + std::vector& values, int k, std::vector& permutation_stack); + +// Return all k-variations without repetition of a vector with the size n. +// Also known as k-permutations of n. +// The input is assumed that it does not contain repetitions. +// This returns n! / (n-k)! vectors. +// TODO(author13): more efficient implementation with something like +// NextVariationsWithoutRepetitionMask +template +std::vector> VariationsWithoutRepetition(const std::vector& v, + int k) { + SPIEL_CHECK_LE(k, v.size()); + SPIEL_CHECK_GE(k, 0); + + // Generate masks -- avoid copying of T, as that might + // be more expensive than juggling integers. + std::vector current_permutation; + std::vector rng(v.size()); + std::iota(rng.begin(), rng.end(), 0); + auto masks = GenerateMasks(rng, k, current_permutation); + + // Apply the masks. + std::vector> vs; + for (auto& mask : masks) { + std::vector x; + for (auto& i : mask) { + x.push_back(v[i]); + } + vs.push_back(x); + } + return vs; +} + +int Factorial(int n); + +// Returns the k^th permutation of the elements in v. This algorithm skips over +// ranges of digits by computing the number of permutations for each digit from +// factorials over the suffixes in reading order. +// +// E.g. for v = {0, 1, 2, 3} and k = 19 +// - Skip over 6 (= 3!) permutations starting with 0. +// - Skip over 6 (= 3!) permutations starting with 1. +// - Skip over 6 (= 3!) permutations starting with 2. +// - Skip over two permutations starting with (3, 0) ((3, 0, 1, 2) and +// (3, 0, 2, 1)) to arrive at (3, 1, 0, 2). +template +std::vector UnrankPermutation(const std::vector& v, int k) { + int n = v.size(); + std::vector used(v.size(), false); + std::vector perm(v.size()); + for (int i = 1; i <= n; ++i) { + int divisor = Factorial(n - i); + int digit_idx = k / divisor; + int j = 0, l = 0; + for (; j < n; ++j) { + if (used[j]) { + continue; + } + if (l == digit_idx) { + break; + } + ++l; + } + perm[i - 1] = v[j]; + used[j] = true; + k -= digit_idx * divisor; + } + return perm; +} + +} // namespace open_spiel + +#endif // OPEN_SPIEL_UTILS_COMBINATORICS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/combinatorics_test.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/combinatorics_test.cc new file mode 100644 index 0000000..8eee7a3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/combinatorics_test.cc @@ -0,0 +1,104 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/combinatorics.h" + +#include +#include + +namespace open_spiel { +namespace { + +void CheckPermutation( + std::vector v, std::vector> expected) { + std::vector> actual = Permutations(v); + SPIEL_CHECK_EQ(actual, expected); +} + +void TestPermutations() { + CheckPermutation({}, {{}}); + CheckPermutation({1}, {{1}}); + CheckPermutation({1, 2, 3}, {{1, 2, 3}, {1, 3, 2}, {2, 1, 3}, + {2, 3, 1}, {3, 1, 2}, {3, 2, 1}}); +} + +void CheckSubsetsOfSize( + std::vector v, int k, std::vector> expected) { + std::vector> actual = SubsetsOfSize(v, k); + SPIEL_CHECK_EQ(actual, expected); +} + + +void TestSubsetsOfSize() { + CheckSubsetsOfSize({}, 0, {{}}); + CheckSubsetsOfSize({1}, 0, {{}}); + CheckSubsetsOfSize({1}, 1, {{1}}); + CheckSubsetsOfSize({1, 2, 3, 4}, 2, {{3, 4}, {2, 4}, {2, 3}, + {1, 4}, {1, 3}, {1, 2}}); +} + +void CheckPowerSet( + std::vector v, std::vector> expected) { + std::vector> actual = PowerSet(v); + SPIEL_CHECK_EQ(actual, expected); +} + + +void TestPowerSet() { + CheckPowerSet({}, {{}}); + CheckPowerSet({1}, {{}, {1}}); + CheckPowerSet({1, 2, 3}, + {{}, {1}, {2}, {1, 2}, {3}, {1, 3}, {2, 3}, {1, 2, 3}}); +} + +void CheckVariationsWithoutRepetition( + std::vector v, int k, std::vector> expected) { + std::vector> actual = VariationsWithoutRepetition(v, k); + SPIEL_CHECK_EQ(actual, expected); +} + + +void TestVariationsWithoutRepetition() { + CheckVariationsWithoutRepetition({}, 0, {{}}); + CheckVariationsWithoutRepetition({1}, 0, {{}}); + CheckVariationsWithoutRepetition({1}, 1, {{1}}); + CheckVariationsWithoutRepetition({1, 2, 3}, 2, {{3, 2}, {3, 1}, {2, 3}, + {2, 1}, {1, 2}, {1, 3}}); +} + +void UnrankPermutationTest() { + std::vector> all_perms = { + {0, 1, 2, 3}, {0, 1, 3, 2}, {0, 2, 1, 3}, {0, 2, 3, 1}, {0, 3, 1, 2}, + {0, 3, 2, 1}, {1, 0, 2, 3}, {1, 0, 3, 2}, {1, 2, 0, 3}, {1, 2, 3, 0}, + {1, 3, 0, 2}, {1, 3, 2, 0}, {2, 0, 1, 3}, {2, 0, 3, 1}, {2, 1, 0, 3}, + {2, 1, 3, 0}, {2, 3, 0, 1}, {2, 3, 1, 0}, {3, 0, 1, 2}, {3, 0, 2, 1}, + {3, 1, 0, 2}, {3, 1, 2, 0}, {3, 2, 0, 1}, {3, 2, 1, 0}}; + + std::vector elements = {0, 1, 2, 3}; + for (int k = 0; k < 24; ++k) { + std::vector perm = UnrankPermutation(elements, k); + SPIEL_CHECK_TRUE(perm == all_perms[k]); + } +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::TestPermutations(); + open_spiel::TestSubsetsOfSize(); + open_spiel::TestPowerSet(); + open_spiel::TestVariationsWithoutRepetition(); + open_spiel::UnrankPermutationTest(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/data_logger.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/data_logger.cc new file mode 100644 index 0000000..4ea4fcf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/data_logger.cc @@ -0,0 +1,53 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/data_logger.h" + +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/strings/str_join.h" +#include "open_spiel/abseil-cpp/absl/time/clock.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/json.h" + +namespace open_spiel { + +DataLoggerJsonLines::DataLoggerJsonLines(const std::string& path, + const std::string& name, bool flush, + const std::string& mode, + absl::Time start_time) + : fd_(absl::StrFormat("%s/%s.jsonl", path, name), mode), + flush_(flush), + start_time_(start_time) {} + +void DataLoggerJsonLines::Write(DataLogger::Record record) { + static absl::TimeZone utc = absl::UTCTimeZone(); + absl::Time now = absl::Now(); + record.insert({ + {"time_str", absl::FormatTime("%Y-%m-%d %H:%M:%E6S %z", now, utc)}, + {"time_abs", absl::ToUnixMicros(now) / 1000000.}, + {"time_rel", absl::ToDoubleSeconds(now - start_time_)}, + }); + fd_.Write(json::ToString(record)); + fd_.Write("\n"); + if (flush_) { + Flush(); + } +} + +void DataLoggerJsonLines::Flush() { fd_.Flush(); } + +DataLoggerJsonLines::~DataLoggerJsonLines() { Flush(); } + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/data_logger.h b/scenarios/bargaining/open_spiel/open_spiel/utils/data_logger.h new file mode 100644 index 0000000..cdc63cd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/data_logger.h @@ -0,0 +1,69 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_DATA_LOGGER_H_ +#define OPEN_SPIEL_UTILS_DATA_LOGGER_H_ + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/utils/file.h" +#include "open_spiel/utils/json.h" + +namespace open_spiel { + +class DataLogger { + public: + using Record = json::Object; + + virtual ~DataLogger() = default; + virtual void Write(Record record) = 0; + virtual void Flush() {} +}; + +// Writes to a file in http://jsonlines.org/ format. +class DataLoggerJsonLines : public DataLogger { + public: + explicit DataLoggerJsonLines(const std::string& path, const std::string& name, + bool flush = false, + const std::string& mode = "w", + absl::Time start_time = absl::Now()); + ~DataLoggerJsonLines() override; + + // The json lines logger is move only. + DataLoggerJsonLines(DataLoggerJsonLines&& other) = default; + DataLoggerJsonLines& operator=(DataLoggerJsonLines&& other) = default; + DataLoggerJsonLines(const DataLoggerJsonLines&) = delete; + DataLoggerJsonLines& operator=(const DataLoggerJsonLines&) = delete; + + void Write(Record record) override; + void Flush() override; + + private: + file::File fd_; + bool flush_; + absl::Time start_time_; +}; + +class DataLoggerNoop : public DataLogger { + public: + ~DataLoggerNoop() override = default; + void Write(Record record) override {} +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_UTILS_DATA_LOGGER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/data_logger_test.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/data_logger_test.cc new file mode 100644 index 0000000..adda538 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/data_logger_test.cc @@ -0,0 +1,79 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/data_logger.h" + +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/file.h" +#include "open_spiel/utils/json.h" + +namespace open_spiel { +namespace { + +void TestDataLogger() { + std::string val = std::to_string(std::rand()); // NOLINT + std::string tmp_dir = file::GetTmpDir(); + std::string dir = tmp_dir + "/open_spiel-test-" + val; + std::string filename = dir + "/data-test.jsonl"; + + SPIEL_CHECK_TRUE(file::Exists(tmp_dir)); + SPIEL_CHECK_TRUE(file::IsDirectory(tmp_dir)); + SPIEL_CHECK_FALSE(file::Exists(dir)); + SPIEL_CHECK_TRUE(file::Mkdir(dir)); + SPIEL_CHECK_TRUE(file::Exists(dir)); + SPIEL_CHECK_TRUE(file::IsDirectory(dir)); + + { + DataLoggerJsonLines logger(dir, "data-test"); + logger.Write({{"step", 1}, {"avg", 1.5}}); + logger.Write({{"step", 2}, {"avg", 2.5}}); + } + + { + file::File f(filename, "r"); + std::vector lines = absl::StrSplit(f.ReadContents(), '\n'); + SPIEL_CHECK_EQ(lines.size(), 3); + SPIEL_CHECK_EQ(lines[2], ""); + + json::Object obj1 = json::FromString(lines[0])->GetObject(); + SPIEL_CHECK_EQ(obj1["step"], 1); + SPIEL_CHECK_EQ(obj1["avg"], 1.5); + SPIEL_CHECK_TRUE(obj1["time_str"].IsString()); + SPIEL_CHECK_TRUE(obj1["time_abs"].IsDouble()); + SPIEL_CHECK_GT(obj1["time_abs"].GetDouble(), 1'500'000'000); // July 2017 + SPIEL_CHECK_TRUE(obj1["time_rel"].IsDouble()); + SPIEL_CHECK_GT(obj1["time_rel"].GetDouble(), 0); + + json::Object obj2 = json::FromString(lines[1])->GetObject(); + SPIEL_CHECK_EQ(obj2["step"], 2); + SPIEL_CHECK_EQ(obj2["avg"], 2.5); + + SPIEL_CHECK_LT(obj1["time_abs"].GetDouble(), obj2["time_abs"].GetDouble()); + SPIEL_CHECK_LT(obj1["time_rel"].GetDouble(), obj2["time_rel"].GetDouble()); + } + + SPIEL_CHECK_TRUE(file::Remove(filename)); + SPIEL_CHECK_TRUE(file::Remove(dir)); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::TestDataLogger(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/file.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/file.cc new file mode 100644 index 0000000..9016e6d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/file.cc @@ -0,0 +1,167 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/file.h" + +#include +#include + +#include + +#ifdef _WIN32 +// https://stackoverflow.com/a/42906151 +#include +#include +#include +#define mkdir(dir, mode) _mkdir(dir) +#define unlink(file) _unlink(file) +#define rmdir(dir) _rmdir(dir) +#else +#include +#endif + +#include +#include +#include + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel::file { + +class File::FileImpl : public std::FILE {}; + +File::File(const std::string& filename, const std::string& mode) { + fd_.reset(static_cast(std::fopen(filename.c_str(), mode.c_str()))); + SPIEL_CHECK_TRUE(fd_); +} + +File::~File() { + if (fd_) { + Flush(); + Close(); + } +} + +File::File(File&& other) = default; +File& File::operator=(File&& other) = default; + +bool File::Close() { return !std::fclose(fd_.release()); } +bool File::Flush() { return !std::fflush(fd_.get()); } +std::int64_t File::Tell() { return std::ftell(fd_.get()); } +bool File::Seek(std::int64_t offset) { + return !std::fseek(fd_.get(), offset, SEEK_SET); +} + +std::string File::Read(std::int64_t count) { + std::string out(count, '\0'); + int read = std::fread(out.data(), sizeof(char), count, fd_.get()); + out.resize(read); + return out; +} + +std::string File::ReadContents() { + Seek(0); + return Read(Length()); +} + +bool File::Write(absl::string_view str) { + return std::fwrite(str.data(), sizeof(char), str.size(), fd_.get()) == + str.size(); +} + +std::int64_t File::Length() { + std::int64_t current = std::ftell(fd_.get()); + std::fseek(fd_.get(), 0, SEEK_END); + std::int64_t length = std::ftell(fd_.get()); + std::fseek(fd_.get(), current, SEEK_SET); + return length; +} + +std::string ReadContentsFromFile(const std::string& filename, + const std::string& mode) { + File f(filename, mode); + return f.ReadContents(); +} + +void WriteContentsToFile(const std::string& filename, const std::string& mode, + const std::string& contents) { + File f(filename, mode); + f.Write(contents); +} + +bool Exists(const std::string& path) { + struct stat info; + return stat(path.c_str(), &info) == 0; +} + +std::string RealPath(const std::string& path) { +#ifdef _WIN32 + char real_path[MAX_PATH]; + if (_fullpath(real_path, path.c_str(), MAX_PATH) == nullptr) { +#else + char real_path[PATH_MAX]; + if (realpath(path.c_str(), real_path) == nullptr) { + // If there was an error return an empty path +#endif + return ""; + } + + return std::string(real_path); +} + +bool IsDirectory(const std::string& path) { + struct stat info; + return stat(path.c_str(), &info) == 0 && info.st_mode & S_IFDIR; +} + +bool Mkdir(const std::string& path, int mode) { + return mkdir(path.c_str(), mode) == 0; +} + +bool Mkdirs(const std::string& path, int mode) { + struct stat info; + size_t pos = 0; + while (pos != std::string::npos) { + pos = path.find_first_of("\\/", pos + 1); + std::string sub_path = path.substr(0, pos); + if (stat(sub_path.c_str(), &info) == 0) { + if (info.st_mode & S_IFDIR) { + continue; // directory already exists + } else { + return false; // is a file? + } + } + if (!Mkdir(sub_path, mode)) { + return false; // permission error? + } + } + return true; +} + +bool Remove(const std::string& path) { + if (IsDirectory(path)) { + return rmdir(path.c_str()) == 0; + } else { + return unlink(path.c_str()) == 0; + } +} + +std::string GetEnv(const std::string& key, const std::string& default_value) { + char* val = std::getenv(key.c_str()); + return ((val != nullptr) ? std::string(val) : default_value); +} + +std::string GetTmpDir() { return GetEnv("TMPDIR", "/tmp"); } + +} // namespace open_spiel::file diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/file.h b/scenarios/bargaining/open_spiel/open_spiel/utils/file.h new file mode 100644 index 0000000..de155db --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/file.h @@ -0,0 +1,79 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_FILE_H_ +#define OPEN_SPIEL_UTILS_FILE_H_ + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" + +namespace open_spiel::file { + +// A simple file abstraction. Needed for compatibility with Google's libraries. +class File { + public: + File(const std::string& filename, const std::string& mode); + + // File is move only. + File(File&& other); + File& operator=(File&& other); + File(const File&) = delete; + File& operator=(const File&) = delete; + + ~File(); // Flush and Close. + + bool Flush(); // Flush the buffer to disk. + + std::int64_t Tell(); // Offset of the current point in the file. + bool Seek(std::int64_t offset); // Move the current point. + + std::string Read(std::int64_t count); // Read count bytes. + std::string ReadContents(); // Read the entire file. + + bool Write(absl::string_view str); // Write to the file. + + std::int64_t Length(); // Length of the entire file. + + private: + bool Close(); // Close the file. Use the destructor instead. + + class FileImpl; + std::unique_ptr fd_; +}; + +// Reads the file at filename to a string. Dies if this doesn't succeed. +std::string ReadContentsFromFile(const std::string& filename, + const std::string& mode); + +// Write the string contents to the file. Dies if it doesn't succeed. +void WriteContentsToFile(const std::string& filename, const std::string& mode, + const std::string& contents); + +bool Exists(const std::string& path); // Does the file/directory exist? +bool IsDirectory(const std::string& path); // Is it a directory? +bool Mkdir(const std::string& path, int mode = 0755); // Make a directory. +bool Mkdirs(const std::string& path, int mode = 0755); // Mkdir recursively. +bool Remove(const std::string& path); // Remove/delete the file/directory. + +std::string RealPath(const std::string& path); // Get the canonical file path. + +std::string GetEnv(const std::string& key, const std::string& default_value); +std::string GetTmpDir(); + +} // namespace open_spiel::file + +#endif // OPEN_SPIEL_UTILS_FILE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/file_test.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/file_test.cc new file mode 100644 index 0000000..d757d42 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/file_test.cc @@ -0,0 +1,95 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/file.h" + +#include +#include + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel::file { +namespace { + +void TestFile() { + std::string val = std::to_string(std::rand()); // NOLINT + std::string tmp_dir = file::GetTmpDir(); + std::string dir = tmp_dir + "/open_spiel-test-" + val; + std::string filename = dir + "/test-file.txt"; + + SPIEL_CHECK_TRUE(Exists(tmp_dir)); + SPIEL_CHECK_TRUE(IsDirectory(tmp_dir)); + + SPIEL_CHECK_FALSE(Exists(dir)); + SPIEL_CHECK_TRUE(Mkdir(dir)); + SPIEL_CHECK_FALSE(Mkdir(dir)); // already exists + SPIEL_CHECK_TRUE(Exists(dir)); + SPIEL_CHECK_TRUE(IsDirectory(dir)); + + std::string prefix = "hello world "; + std::string expected = prefix + val + "\n"; + { + File f(filename, "w"); + SPIEL_CHECK_EQ(f.Tell(), 0); + SPIEL_CHECK_TRUE(f.Write(expected)); + SPIEL_CHECK_TRUE(f.Flush()); + SPIEL_CHECK_EQ(f.Tell(), expected.size()); + SPIEL_CHECK_EQ(f.Length(), expected.size()); + } + + SPIEL_CHECK_TRUE(Exists(filename)); + SPIEL_CHECK_FALSE(IsDirectory(filename)); + // Ensure that realpath returns a string. + SPIEL_CHECK_FALSE(RealPath(filename).empty()); + + { + File f(filename, "r"); + SPIEL_CHECK_EQ(f.Tell(), 0); + SPIEL_CHECK_EQ(f.Length(), expected.size()); + std::string found = f.ReadContents(); + SPIEL_CHECK_EQ(found, expected); + SPIEL_CHECK_EQ(f.Tell(), expected.size()); + f.Seek(0); + SPIEL_CHECK_EQ(f.Read(6), "hello "); + SPIEL_CHECK_EQ(f.Read(6), "world "); + } + + { // Test the move constructor/assignment. + File f(filename, "r"); + File f2 = std::move(f); + File f3(std::move(f2)); + } + + SPIEL_CHECK_TRUE(Remove(filename)); + SPIEL_CHECK_FALSE(Remove(filename)); // already gone + SPIEL_CHECK_FALSE(Exists(filename)); + + std::string deep_dir = dir + "/1/2/3"; + SPIEL_CHECK_FALSE(IsDirectory(dir + "/1")); + SPIEL_CHECK_TRUE(Mkdirs(dir + "/1/2/3")); + SPIEL_CHECK_TRUE(IsDirectory(dir + "/1/2/3")); + SPIEL_CHECK_TRUE(Remove(dir + "/1/2/3")); + SPIEL_CHECK_TRUE(Remove(dir + "/1/2")); + SPIEL_CHECK_TRUE(Remove(dir + "/1")); + + SPIEL_CHECK_TRUE(Remove(dir)); + SPIEL_CHECK_FALSE(Exists(dir)); +} + +} // namespace +} // namespace open_spiel::file + +int main(int argc, char** argv) { + open_spiel::file::TestFile(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/functional.h b/scenarios/bargaining/open_spiel/open_spiel/utils/functional.h new file mode 100644 index 0000000..3be8e71 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/functional.h @@ -0,0 +1,41 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_FUNCTIONAL_H_ +#define OPEN_SPIEL_UTILS_FUNCTIONAL_H_ + +#include +#include +#include +#include +#include + + +// A suite of utilities common in functional programming languages. + +namespace open_spiel { + +template +void Zip(const InputSequence1& first1, const InputSequence1& last1, + const InputSequence2& first2, ZippedOutputIterator& output) { + std::transform( + first1, last1, first2, std::back_inserter(output), + [](const auto& a, const auto& b) { return std::make_pair(a, b); }); +} + +} // namespace open_spiel + +#endif // OPEN_SPIEL_UTILS_FUNCTIONAL_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/functional_test.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/functional_test.cc new file mode 100644 index 0000000..c3b704b --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/functional_test.cc @@ -0,0 +1,37 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/functional.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { + +void TestZip() { + std::vector actions = {1, 2, 3}; + std::vector probs = {0.1, 0.2, 0.3}; + std::vector> action_probs; + Zip(actions.begin(), actions.end(), probs.begin(), action_probs); + + std::vector> expected_action_probs = { + {1, 0.1}, {2, 0.2}, {3, 0.3}}; + SPIEL_CHECK_EQ(action_probs, expected_action_probs); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::TestZip(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/init.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/init.cc new file mode 100644 index 0000000..594ab4a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/init.cc @@ -0,0 +1,23 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/init.h" + + +namespace open_spiel { + +void Init(const char* usage, int* argc, char*** argv, bool remove_flags) { +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/init.h b/scenarios/bargaining/open_spiel/open_spiel/utils/init.h new file mode 100644 index 0000000..0741949 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/init.h @@ -0,0 +1,27 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_INIT_H_ +#define OPEN_SPIEL_UTILS_INIT_H_ + +namespace open_spiel { + +// A utility function useful for mixing internal and external use of OpenSpiel. +// Intended to be called early in a program's main. Currently only necessary +// in programs that mix internal and external use (e.g. utils/file.h). +void Init(const char* usage, int* argc, char*** argv, bool remove_flags); + +} // namespace open_spiel + +#endif // OPEN_SPIEL_UTILS_INIT_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/json.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/json.cc new file mode 100644 index 0000000..57e537d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/json.cc @@ -0,0 +1,340 @@ +// Copyright 2019 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/json.h" + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/match.h" +#include "open_spiel/abseil-cpp/absl/strings/numbers.h" +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel::json { + +namespace { + +std::string Escape(const std::string& input) { + std::string out; + out.reserve(input.length()); + for (const char c : input) { + switch (c) { + case '"': out.append("\\\""); break; + case '\\': out.append("\\\\"); break; + case '\b': out.append("\\b"); break; + case '\f': out.append("\\f"); break; + case '\n': out.append("\\n"); break; + case '\r': out.append("\\r"); break; + case '\t': out.append("\\t"); break; + default: out.push_back(c); break; + } + } + return out; +} + +void ConsumeWhitespace(absl::string_view* str) { + for (auto p = str->begin(); p < str->end(); ++p) { + switch (*p) { + case ' ': + case '\n': + case '\r': + case '\t': + break; + default: + str->remove_prefix(p - str->begin()); + return; + } + } +} + +absl::nullopt_t ParseError(absl::string_view error, absl::string_view str) { + // Comment out this check if you want parse errors to return nullopt instead + // of crash with an error message of where the problem is. + SPIEL_CHECK_EQ(error, str.substr(0, + std::min(30, static_cast(str.size())))); + + // TODO(author7): Maybe return a variant of error string or Value? + return absl::nullopt; +} + +bool ConsumeToken(absl::string_view* str, absl::string_view token) { + if (absl::StartsWith(*str, token)) { + str->remove_prefix(token.size()); + return true; + } + return false; +} + +template +absl::optional ParseConstant(absl::string_view* str, + absl::string_view token, T value) { + if (ConsumeToken(str, token)) { + return value; + } + return ParseError("Invalid constant: ", *str); +} + +absl::optional ParseNumber(absl::string_view* str) { + size_t valid_double = + std::min(str->find_first_not_of("-+.0123456789eE"), str->size()); + size_t valid_int = + std::min(str->find_first_not_of("-0123456789"), str->size()); + if (valid_double == valid_int) { + if (int64_t v; absl::SimpleAtoi(str->substr(0, valid_int), &v)) { + str->remove_prefix(valid_int); + return Value(v); + } + } else { + if (double v; absl::SimpleAtod(str->substr(0, valid_double), &v)) { + str->remove_prefix(valid_double); + return Value(v); + } + } + return ParseError("Invalid number", *str); +} + +absl::optional ParseString(absl::string_view* str) { + if (!ConsumeToken(str, "\"")) { + return ParseError("Expected '\"'", *str); + } + std::string out; + bool escape = false; + for (auto p = str->begin(); p < str->end(); ++p) { + switch (*p) { + case '\\': + if (escape) { + out.push_back('\\'); + } + escape = !escape; + break; + case '"': + if (escape) { + out.push_back('"'); + escape = false; + break; + } else { + str->remove_prefix(p - str->begin() + 1); + return out; + } + default: + if (escape) { + switch (*p) { + case 'b': out.append("\b"); break; + case 'f': out.append("\f"); break; + case 'n': out.append("\n"); break; + case 'r': out.append("\r"); break; + case 't': out.append("\t"); break; + default: out.push_back(*p); break; + } + escape = false; + } else { + out.push_back(*p); + } + break; + } + } + return ParseError("Unfinished string", *str); +} + +absl::optional ParseValue(absl::string_view* str); + +absl::optional ParseArray(absl::string_view* str) { + if (!ConsumeToken(str, "[")) { + return ParseError("Expected '['", *str); + } + Array out; + bool first = true; + while (!str->empty()) { + ConsumeWhitespace(str); + if (ConsumeToken(str, "]")) { + return out; + } + if (!first && !ConsumeToken(str, ",")) { + return ParseError("Expected ','", *str); + } + first = false; + ConsumeWhitespace(str); + absl::optional v = ParseValue(str); + if (!v) { + return absl::nullopt; + } + out.push_back(*v); + } + return ParseError("Unfinished array", *str); +} + +absl::optional ParseObject(absl::string_view* str) { + if (!ConsumeToken(str, "{")) { + return ParseError("Expected '{'", *str); + } + Object out; + bool first = true; + while (!str->empty()) { + ConsumeWhitespace(str); + if (ConsumeToken(str, "}")) { + return out; + } + if (!first && !ConsumeToken(str, ",")) { + return ParseError("Expected ','", *str); + } + first = false; + ConsumeWhitespace(str); + absl::optional key = ParseString(str); + if (!key) { + return absl::nullopt; + } + ConsumeWhitespace(str); + if (!ConsumeToken(str, ":")) { + return ParseError("Expected ':'", *str); + } + ConsumeWhitespace(str); + absl::optional v = ParseValue(str); + if (!v) { + return absl::nullopt; + } + out.emplace(*key, *v); + } + return ParseError("Unfinished object", *str); +} + +absl::optional ParseValue(absl::string_view* str) { + ConsumeWhitespace(str); + if (str->empty()) { + return ParseError("Empty string", *str); + } + switch (str->at(0)) { + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': return ParseNumber(str); + case 'n': return ParseConstant(str, "null", Null()); + case 't': return ParseConstant(str, "true", true); + case 'f': return ParseConstant(str, "false", false); + case '"': return ParseString(str); + case '[': return ParseArray(str); + case '{': return ParseObject(str); + default: return ParseError("Unexpected char: ", *str); + } +} + +} // namespace + +bool Null::operator==(const Null& o) const { return true; } +bool Null::operator!=(const Null& o) const { return false; } + +std::string ToString(const Array& array, bool wrap, int indent) { + std::string out = "["; + bool first = true; + for (const Value& v : array) { + if (!first) { + absl::StrAppend(&out, ","); + } + if (wrap) { + absl::StrAppend(&out, "\n", std::string(indent + 2, ' ')); + } else if (!first) { + absl::StrAppend(&out, " "); + } + first = false; + absl::StrAppend(&out, json::ToString(v, wrap, indent + 2)); + } + if (wrap) { + absl::StrAppend(&out, "\n", std::string(indent, ' ')); + } + absl::StrAppend(&out, "]"); + return out; +} + +std::string ToString(const Object& obj, bool wrap, int indent) { + std::string out = "{"; + bool first = true; + for (const auto& [key, value] : obj) { + if (!first) { + absl::StrAppend(&out, ","); + } + if (wrap) { + absl::StrAppend(&out, "\n", std::string(indent + 2, ' ')); + } else if (!first) { + absl::StrAppend(&out, " "); + } + first = false; + absl::StrAppend(&out, "\"", Escape(key), "\": ", + json::ToString(value, wrap, indent + 2)); + } + if (wrap) { + absl::StrAppend(&out, "\n", std::string(indent, ' ')); + } + absl::StrAppend(&out, "}"); + return out; +} + + +std::string ToString(const Value& value, bool wrap, int indent) { + if (value.IsNull()) { + return "null"; + } else if (value.IsBool()) { + return (value.GetBool() ? "true" : "false"); + } else if (value.IsInt()) { + return std::to_string(value.GetInt()); + } else if (value.IsDouble()) { + double v = value.GetDouble(); + if (std::isfinite(v)) { + return std::to_string(v); + } else { + // It'd be nice to show an error with a path, but at least this is + // debuggable by looking at the json. Crashing doesn't tell you where + // the problem is. + return absl::StrCat("\"", std::to_string(v), "\""); + } + } else if (value.IsString()) { + return absl::StrCat("\"", Escape(value.GetString()), "\""); + } else if (value.IsArray()) { + return ToString(value.GetArray(), wrap, indent); + } else if (value.IsObject()) { + return ToString(value.GetObject(), wrap, indent); + } else { + SpielFatalError("json::ToString is missing a type."); + } +} + +std::ostream& operator<<(std::ostream& os, const Null& n) { + return os << ToString(n); +} + +std::ostream& operator<<(std::ostream& os, const Array& a) { + return os << ToString(a); +} + +std::ostream& operator<<(std::ostream& os, const Object& o) { + return os << ToString(o); +} + +std::ostream& operator<<(std::ostream& os, const Value& v) { + return os << ToString(v); +} + +absl::optional FromString(absl::string_view str) { + return ParseValue(&str); +} + +} // namespace open_spiel::json diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/json.h b/scenarios/bargaining/open_spiel/open_spiel/utils/json.h new file mode 100644 index 0000000..4cc585c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/json.h @@ -0,0 +1,166 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_JSON_H_ +#define OPEN_SPIEL_UTILS_JSON_H_ + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/string_view.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel::json { + +class Null { + public: + bool operator==(const Null& o) const; + bool operator!=(const Null& o) const; +}; + +class Value; +using Array = std::vector; +using Object = std::map; + +class Value : public std::variant { + public: + using std::variant::variant; // Inherit the constructors. + Value(int v) : Value(static_cast(v)) {} + Value(const char* v) : Value(std::string(v)) {} + + bool IsNull() const { return std::holds_alternative(*this); } + bool IsBool() const { return std::holds_alternative(*this); } + bool IsTrue() const { return IsBool() && GetBool(); } + bool IsFalse() const { return IsBool() && !GetBool(); } + bool IsInt() const { return std::holds_alternative(*this); } + bool IsDouble() const { return std::holds_alternative(*this); } + bool IsNumber() const { return IsInt() || IsDouble(); } + bool IsString() const { return std::holds_alternative(*this); } + bool IsArray() const { return std::holds_alternative(*this); } + bool IsObject() const { return std::holds_alternative(*this); } + + // Do not use std::get here, as it causes compilation problems on on older + // MacOS. For details, see: + // https://stackoverflow.com/questions/52521388/stdvariantget-does-not-compile-with-apple-llvm-10-0 + template T get_val() const { + if (auto *val = std::get_if(this)) { + return *val; + } else { + SpielFatalError(absl::StrCat( + "Value does not contain the specified type: ", typeid(T).name())); + } + } + + template T &get_ref() { + if (auto *val = std::get_if(this)) { + return *val; + } else { + SpielFatalError(absl::StrCat( + "Value does not contain the specified type: ", typeid(T).name())); + } + } + + template const T &get_const_ref() const { + if (auto *val = std::get_if(this)) { + return *val; + } else { + SpielFatalError(absl::StrCat( + "Value does not contain the specified type: ", typeid(T).name())); + } + } + + bool GetBool() const { return get_val(); } + int64_t GetInt() const { return get_val(); } + int64_t &GetInt() { return get_ref(); } + double GetDouble() const { return get_val(); } + double &GetDouble() { return get_ref(); } + const std::string &GetString() const { return get_const_ref(); } + std::string &GetString() { return get_ref(); } + const Array &GetArray() const { return get_const_ref(); } + Array &GetArray() { return get_ref(); } + const Object &GetObject() const { return get_const_ref(); } + Object &GetObject() { return get_ref(); } + + bool operator==(const Null& o) const { return IsNull(); } + bool operator==(const bool& o) const { return IsBool() && GetBool() == o; } + bool operator==(const int& o) const { return IsInt() && GetInt() == o; } + bool operator==(const int64_t& o) const { return IsInt() && GetInt() == o; } + bool operator==(const double& o) const { + return IsDouble() && GetDouble() == o; + } + bool operator==(const char* o) const { + return IsString() && GetString() == o; + } + bool operator==(const std::string& o) const { + return IsString() && GetString() == o; + } + bool operator==(const Array& o) const { return IsArray() && GetArray() == o; } + bool operator==(const Object& o) const { + return IsObject() && GetObject() == o; + } + template + bool operator!=(const T& o) const { return !(*this == o); } +}; + +// Accept a std::vector of any of the types that are constructible to Value. +// For example accept std::vector. +template +Array CastToArray(std::vector vec) { + Array out; + out.reserve(vec.size()); + for (const T& val : vec) { + out.emplace_back(val); + } + return out; +} + +// Accept a std::vector of any type with a fn that converts to any type +// constructible to Value. +template +Array TransformToArray(std::vector vec, Fn fn) { + Array out; + out.reserve(vec.size()); + for (const T& val : vec) { + out.emplace_back(fn(val)); + } + return out; +} + +// Serialize a JSON value into a string. +// Set wrap to true to pretty print with each element of a list or array on +// its own line. Indent is how much to start indenting by, and is mainly used +// for the recursive printer, which increments it by 2 each level. +std::string ToString(const Array& array, bool wrap = false, int indent = 0); +std::string ToString(const Object& obj, bool wrap = false, int indent = 0); +std::string ToString(const Value& value, bool wrap = false, int indent = 0); + +std::ostream& operator<<(std::ostream& os, const Null& n); +std::ostream& operator<<(std::ostream& os, const Array& a); +std::ostream& operator<<(std::ostream& os, const Object& o); +std::ostream& operator<<(std::ostream& os, const Value& v); + +// Deserialize a string into a JSON value. Returns nullopt on parse failure. +absl::optional FromString(absl::string_view str); + +} // namespace open_spiel::json + +#endif // OPEN_SPIEL_UTILS_JSON_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/json_test.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/json_test.cc new file mode 100644 index 0000000..450cdc6 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/json_test.cc @@ -0,0 +1,224 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/json.h" + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel::json { + +namespace { + +void TestToString() { + SPIEL_CHECK_EQ(ToString(Null()), "null"); + SPIEL_CHECK_EQ(ToString(true), "true"); + SPIEL_CHECK_EQ(ToString(false), "false"); + SPIEL_CHECK_EQ(ToString(1), "1"); + SPIEL_CHECK_EQ(ToString(3.1415923), "3.141592"); + SPIEL_CHECK_EQ(ToString("asdf"), "\"asdf\""); + SPIEL_CHECK_EQ(ToString(std::string("asdf")), "\"asdf\""); + SPIEL_CHECK_EQ(ToString(Array({"asdf"})), "[\"asdf\"]"); + SPIEL_CHECK_EQ(ToString(Array({1, Null(), "asdf"})), "[1, null, \"asdf\"]"); + SPIEL_CHECK_EQ(ToString(Array({1, 2, 3})), "[1, 2, 3]"); + SPIEL_CHECK_EQ(ToString(Array({1, 2, 3, 4})), "[1, 2, 3, 4]"); + SPIEL_CHECK_EQ(ToString(Object({{"asdf", 1}, {"foo", 2}})), + "{\"asdf\": 1, \"foo\": 2}"); + SPIEL_CHECK_EQ(ToString( + Object({{"asdf", Object({{"bar", 6}})}, {"foo", Array({1, 2, 3})}})), + "{\"asdf\": {\"bar\": 6}, \"foo\": [1, 2, 3]}"); + SPIEL_CHECK_EQ(ToString(Object({{"asdf", Object({{"bar", 6}})}, + {"foo", Array({1, true, false})}}), + true), + R"({ + "asdf": { + "bar": 6 + }, + "foo": [ + 1, + true, + false + ] +})"); +} + +void TestFromString() { + absl::optional v; + + v = FromString("null"); + SPIEL_CHECK_TRUE(v); + SPIEL_CHECK_TRUE(v->IsNull()); + + v = FromString("true"); + SPIEL_CHECK_TRUE(v); + SPIEL_CHECK_TRUE(v->IsBool()); + SPIEL_CHECK_TRUE(v->IsTrue()); + SPIEL_CHECK_EQ(v->GetBool(), true); + + v = FromString("false"); + SPIEL_CHECK_TRUE(v); + SPIEL_CHECK_TRUE(v->IsBool()); + SPIEL_CHECK_TRUE(v->IsFalse()); + SPIEL_CHECK_EQ(v->GetBool(), false); + + v = FromString("1"); + SPIEL_CHECK_TRUE(v); + SPIEL_CHECK_TRUE(v->IsInt()); + SPIEL_CHECK_EQ(v->GetInt(), 1); + + v = FromString("-163546"); + SPIEL_CHECK_TRUE(v); + SPIEL_CHECK_TRUE(v->IsInt()); + SPIEL_CHECK_EQ(v->GetInt(), -163546); + + v = FromString("3.5"); + SPIEL_CHECK_TRUE(v); + SPIEL_CHECK_TRUE(v->IsDouble()); + SPIEL_CHECK_EQ(v->GetDouble(), 3.5); + + v = FromString("\"asdf\""); + SPIEL_CHECK_TRUE(v); + SPIEL_CHECK_TRUE(v->IsString()); + SPIEL_CHECK_EQ(v->GetString(), "asdf"); + + v = FromString(R"("as \" \\ df")"); + SPIEL_CHECK_TRUE(v); + SPIEL_CHECK_TRUE(v->IsString()); + SPIEL_CHECK_EQ(v->GetString(), R"(as " \ df)"); + + v = FromString("[\"asdf\"]"); + SPIEL_CHECK_TRUE(v); + SPIEL_CHECK_TRUE(v->IsArray()); + SPIEL_CHECK_EQ(v->GetArray(), Array({"asdf"})); + + v = FromString("[ null, true, 1 , 3.5, \"asdf\" ]"); + SPIEL_CHECK_TRUE(v); + SPIEL_CHECK_TRUE(v->IsArray()); + SPIEL_CHECK_EQ(v->GetArray(), Array({Null(), true, 1, 3.5, "asdf"})); + + v = FromString("{\"asdf\" : 1, \"foo\": 2}"); + SPIEL_CHECK_TRUE(v); + SPIEL_CHECK_TRUE(v->IsObject()); + SPIEL_CHECK_EQ(v->GetObject(), Object({{"asdf", 1}, {"foo", 2}})); + + v = FromString(R"({ + "asdf": { + "bar": 6 + }, + "foo": [ + 1, + true, + false + ] +})"); + SPIEL_CHECK_TRUE(v); + SPIEL_CHECK_TRUE(v->IsObject()); + SPIEL_CHECK_EQ(v->GetObject(), Object({{"asdf", Object({{"bar", 6}})}, + {"foo", Array({1, true, false})}})); +} + +void TestValue() { + SPIEL_CHECK_EQ(Value(true), Value(true)); + SPIEL_CHECK_EQ(Value(true), true); + SPIEL_CHECK_EQ(Value(1), Value(1)); + SPIEL_CHECK_EQ(Value(1), 1); + SPIEL_CHECK_EQ(Value(1.5), Value(1.5)); + SPIEL_CHECK_EQ(Value(1.5), 1.5); + + SPIEL_CHECK_TRUE(Value(Null()).IsNull()); + SPIEL_CHECK_EQ(std::get(Value(Null())), Null()); + SPIEL_CHECK_EQ(Value(Null()), Null()); + + SPIEL_CHECK_TRUE(Value(true).IsBool()); + SPIEL_CHECK_TRUE(Value(true).IsTrue()); + SPIEL_CHECK_TRUE(Value(false).IsBool()); + SPIEL_CHECK_TRUE(Value(false).IsFalse()); + SPIEL_CHECK_FALSE(Value(true).IsFalse()); + SPIEL_CHECK_FALSE(Value(false).IsTrue()); + SPIEL_CHECK_FALSE(Value(1).IsTrue()); + SPIEL_CHECK_FALSE(Value(1).IsFalse()); + SPIEL_CHECK_EQ(std::get(Value(true)), true); + SPIEL_CHECK_EQ(Value(true).GetBool(), true); + SPIEL_CHECK_EQ(Value(true), true); + SPIEL_CHECK_EQ(Value(false).GetBool(), false); + SPIEL_CHECK_EQ(Value(false), false); + SPIEL_CHECK_NE(Value(true), 1); + SPIEL_CHECK_NE(Value(false), 1.5); + + SPIEL_CHECK_TRUE(Value(1).IsInt()); + SPIEL_CHECK_TRUE(Value(1).IsNumber()); + SPIEL_CHECK_FALSE(Value(true).IsInt()); + SPIEL_CHECK_FALSE(Value(1.5).IsInt()); + SPIEL_CHECK_EQ(std::get(Value(1)), 1); + SPIEL_CHECK_EQ(Value(1).GetInt(), 1); + SPIEL_CHECK_EQ(Value(1), 1); + SPIEL_CHECK_NE(Value(1), 2); + + SPIEL_CHECK_TRUE(Value(1.5).IsDouble()); + SPIEL_CHECK_TRUE(Value(1.5).IsNumber()); + SPIEL_CHECK_FALSE(Value(1.5).IsInt()); + SPIEL_CHECK_FALSE(Value(1.5).IsBool()); + SPIEL_CHECK_EQ(std::get(Value(1.5)), 1.5); + SPIEL_CHECK_EQ(Value(1.5).GetDouble(), 1.5); + SPIEL_CHECK_EQ(Value(1.5), 1.5); + SPIEL_CHECK_NE(Value(1.5), 2.5); + + SPIEL_CHECK_TRUE(Value("asdf").IsString()); + SPIEL_CHECK_TRUE(Value(std::string("asdf")).IsString()); + SPIEL_CHECK_FALSE(Value("asdf").IsArray()); + SPIEL_CHECK_EQ(Value("asdf"), "asdf"); + SPIEL_CHECK_EQ(Value("asdf"), std::string("asdf")); + SPIEL_CHECK_EQ(Value("asdf").GetString(), "asdf"); + SPIEL_CHECK_EQ(std::get(Value("asdf")), "asdf"); + + SPIEL_CHECK_EQ(Array({1, 2, 3}), Array({1, 2, 3})); + SPIEL_CHECK_EQ(CastToArray(std::vector({1, 2, 3})), Array({1, 2, 3})); + SPIEL_CHECK_EQ( + TransformToArray(std::vector({1u, 2u, 3u}), + [](unsigned int i) { return static_cast(i); }), + Array({1, 2, 3})); + SPIEL_CHECK_TRUE(Value(Array({1, 2, 3})).IsArray()); + SPIEL_CHECK_FALSE(Value(Array({1, 2, 3})).IsObject()); + SPIEL_CHECK_EQ(Value(Array({1, 2, 3})), Array({1, 2, 3})); + SPIEL_CHECK_EQ(Value(Array({1, 2, 3})).GetArray(), Array({1, 2, 3})); + SPIEL_CHECK_NE(Value(Array({1, 2, 3})).GetArray(), Array({1, 3, 5})); + SPIEL_CHECK_EQ(std::get(Value(Array({1, 2, 3}))), Array({1, 2, 3})); + + SPIEL_CHECK_EQ(Object({{"asdf", 1}, {"bar", 2}}), + Object({{"asdf", 1}, {"bar", 2}})); + SPIEL_CHECK_NE(Object({{"asdf", 1}, {"bar", 2}}), + Object({{"asdf", 1}, {"bar", 3}})); + SPIEL_CHECK_NE(Object({{"asdf", 1}, {"bar", 2}}), + Object({{"asdf", 1}, {"foo", 2}})); + SPIEL_CHECK_EQ(Value(Object({{"asdf", 1}, {"bar", 2}})), + Object({{"asdf", 1}, {"bar", 2}})); + SPIEL_CHECK_NE(Value(Object({{"asdf", 1}, {"bar", 2}})), + Object({{"asdf", 1}})); + SPIEL_CHECK_TRUE(Value(Object({{"asdf", 1}, {"bar", 2}})).IsObject()); + SPIEL_CHECK_FALSE(Value(Object({{"asdf", 1}, {"bar", 2}})).IsArray()); + SPIEL_CHECK_EQ(Value(Object({{"asdf", 1}, {"bar", 2}})).GetObject(), + Object({{"asdf", 1}, {"bar", 2}})); + SPIEL_CHECK_EQ(std::get(Value(Object({{"asdf", 1}, {"bar", 2}}))), + Object({{"asdf", 1}, {"bar", 2}})); +} + +} // namespace + +} // namespace open_spiel::json + + +int main(int argc, char** argv) { + open_spiel::json::TestToString(); + open_spiel::json::TestFromString(); + open_spiel::json::TestValue(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/logger.h b/scenarios/bargaining/open_spiel/open_spiel/utils/logger.h new file mode 100644 index 0000000..6b8e1c3 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/logger.h @@ -0,0 +1,76 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_LOGGER_H_ +#define OPEN_SPIEL_UTILS_LOGGER_H_ + +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/abseil-cpp/absl/time/clock.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/utils/file.h" + +namespace open_spiel { + +class Logger { + public: + virtual ~Logger() = default; + virtual void Print(const std::string& str) = 0; + + // A specialization of Print that passes everything through StrFormat first. + template + void Print(const absl::FormatSpec& format, const Arg1& arg1, + const Args&... args) { + Print(absl::StrFormat(format, arg1, args...)); + } +}; + + +// A logger to print stuff to a file. +class FileLogger : public Logger { + public: + FileLogger(const std::string& path, const std::string& name, + const std::string& mode = "w") + : fd_(absl::StrFormat("%s/log-%s.txt", path, name), mode), + tz_(absl::LocalTimeZone()) { + Print("%s started", name); + } + + using Logger::Print; + void Print(const std::string& str) override { + std::string time = + absl::FormatTime("%Y-%m-%d %H:%M:%E3S", absl::Now(), tz_); + fd_.Write(absl::StrFormat("[%s] %s\n", time, str)); + fd_.Flush(); + } + + ~FileLogger() override { Print("Closing the log."); } + + private: + open_spiel::file::File fd_; + absl::TimeZone tz_; +}; + + +class NoopLogger : public Logger { + public: + using Logger::Print; + void Print(const std::string& str) override {} +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_UTILS_LOGGER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/logger_test.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/logger_test.cc new file mode 100644 index 0000000..8d4debf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/logger_test.cc @@ -0,0 +1,69 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/match.h" +#include "open_spiel/abseil-cpp/absl/strings/str_split.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/file.h" +#include "open_spiel/utils/logger.h" + +namespace open_spiel { +namespace { + +void TestFileLogger() { + std::string val = std::to_string(std::rand()); // NOLINT + std::string tmp_dir = file::GetTmpDir(); + std::string dir = tmp_dir + "/open_spiel-test-" + val; + std::string filename = dir + "/log-test.txt"; + + SPIEL_CHECK_TRUE(file::Exists(tmp_dir)); + SPIEL_CHECK_TRUE(file::IsDirectory(tmp_dir)); + SPIEL_CHECK_FALSE(file::Exists(dir)); + SPIEL_CHECK_TRUE(file::Mkdir(dir)); + SPIEL_CHECK_TRUE(file::Exists(dir)); + SPIEL_CHECK_TRUE(file::IsDirectory(dir)); + + { + FileLogger logger(dir, "test"); + logger.Print("line 1"); + logger.Print("line %d", 2); + logger.Print("line %d: %s", 3, "asdf"); + } + + { + file::File f(filename, "r"); + std::vector lines = absl::StrSplit(f.ReadContents(), '\n'); + SPIEL_CHECK_EQ(lines.size(), 6); + SPIEL_CHECK_TRUE(absl::StrContains(lines[0], "test started")); + SPIEL_CHECK_TRUE(absl::StrContains(lines[1], "line 1")); + SPIEL_CHECK_TRUE(absl::StrContains(lines[2], "line 2")); + SPIEL_CHECK_TRUE(absl::StrContains(lines[3], "line 3: asdf")); + SPIEL_CHECK_TRUE(absl::StrContains(lines[4], "Closing the log")); + SPIEL_CHECK_EQ(lines[5], ""); + } + + SPIEL_CHECK_TRUE(file::Remove(filename)); + SPIEL_CHECK_TRUE(file::Remove(dir)); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::TestFileLogger(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/lru_cache.h b/scenarios/bargaining/open_spiel/open_spiel/utils/lru_cache.h new file mode 100644 index 0000000..80a72fd --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/lru_cache.h @@ -0,0 +1,136 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_LRU_CACHE_H_ +#define OPEN_SPIEL_UTILS_LRU_CACHE_H_ + +#include + +#include "open_spiel/abseil-cpp/absl/container/flat_hash_map.h" +#include "open_spiel/abseil-cpp/absl/synchronization/mutex.h" + +namespace open_spiel { + +struct LRUCacheInfo { + int64_t hits = 0; + int64_t misses = 0; + int size = 0; + int max_size = 0; + + double Usage() const { + return max_size == 0 ? 0 : static_cast(size) / max_size; + } + int64_t Total() const { return hits + misses; } + double HitRate() const { + return Total() == 0 ? 0 : static_cast(hits) / Total(); + } + + void operator+=(const LRUCacheInfo& o) { + hits += o.hits; + misses += o.misses; + size += o.size; + max_size += o.max_size; + } +}; + +template +class LRUCache { // Least Recently Used Cache. + // TODO(author7): Consider the performance implications here. Some ideas: + // - Shard the cache to avoid lock contention. Can be done by the user. + // - Use shared pointers to avoid copying data out, and shorten the lock. + // - Use two generations to avoid order updates on hot items. The mature + // generation wouldn't be ordered or evicted so can use a reader/writer lock + // - Use atomics for hits/misses to shorten lock times. + // - Embed the list directly into the map value to avoid extra indirection. + public: + explicit LRUCache(int max_size) : hits_(0), misses_(0) { + SetMaxSize(max_size); + } + + // Move only, not copyable. + LRUCache(LRUCache&& other) = default; + LRUCache& operator=(LRUCache&& other) = default; + LRUCache(const LRUCache&) = delete; + LRUCache& operator=(const LRUCache&) = delete; + + void SetMaxSize(int max_size) { max_size_ = std::max(max_size, 4); } + + int Size() { + absl::MutexLock lock(&m_); + return map_.size(); + } + + void Clear() { + absl::MutexLock lock(&m_); + order_.clear(); + map_.clear(); + hits_ = 0; + misses_ = 0; + } + + void Set(const K& key, const V& value) { + absl::MutexLock lock(&m_); + auto pos = map_.find(key); + if (pos == map_.end()) { // Not found, add it. + if (map_.size() >= max_size_) { // Make space if needed. + map_.erase(order_.back()); + order_.pop_back(); + } + order_.push_front(key); + map_[key] = Entry{value, order_.begin()}; + } else { // Found, move it to the front. + order_.erase(pos->second.order_iterator); + order_.push_front(key); + pos->second.order_iterator = order_.begin(); + } + } + + absl::optional Get(const K& key) { + absl::MutexLock lock(&m_); + auto pos = map_.find(key); + if (pos == map_.end()) { // Not found. + misses_ += 1; + return absl::nullopt; + } else { // Found, move it to the front, and return the value. + hits_ += 1; + order_.erase(pos->second.order_iterator); + order_.push_front(key); + pos->second.order_iterator = order_.begin(); + return pos->second.value; + } + } + + LRUCacheInfo Info() { + absl::MutexLock lock(&m_); + return LRUCacheInfo{hits_, misses_, static_cast(map_.size()), + max_size_}; + } + + private: + struct Entry { + V value; + typename std::list::iterator order_iterator; + }; + + int64_t hits_; + int64_t misses_; + int max_size_; + std::list order_; + absl::flat_hash_map map_; + absl::Mutex m_; +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_UTILS_LRU_CACHE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/lru_cache_test.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/lru_cache_test.cc new file mode 100644 index 0000000..9011083 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/lru_cache_test.cc @@ -0,0 +1,79 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/lru_cache.h" + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { + +void TestLRUCache() { + LRUCache cache(4); + + SPIEL_CHECK_EQ(cache.Size(), 0); + + LRUCacheInfo info = cache.Info(); + SPIEL_CHECK_EQ(info.hits, 0); + SPIEL_CHECK_EQ(info.misses, 0); + SPIEL_CHECK_EQ(info.size, 0); + SPIEL_CHECK_EQ(info.max_size, 4); + SPIEL_CHECK_EQ(info.Usage(), 0); + SPIEL_CHECK_EQ(info.HitRate(), 0); + + SPIEL_CHECK_FALSE(cache.Get(1)); + + cache.Set(13, "13"); + SPIEL_CHECK_EQ(cache.Size(), 1); + + SPIEL_CHECK_FALSE(cache.Get(1)); + + { + absl::optional v = cache.Get(13); + SPIEL_CHECK_TRUE(v); + SPIEL_CHECK_EQ(*v, "13"); + } + + cache.Set(14, "14"); + cache.Set(15, "15"); + cache.Set(16, "16"); + + SPIEL_CHECK_EQ(cache.Size(), 4); + + cache.Set(17, "17"); + + SPIEL_CHECK_EQ(cache.Size(), 4); + + SPIEL_CHECK_FALSE(cache.Get(13)); // evicted + SPIEL_CHECK_TRUE(cache.Get(14)); + + SPIEL_CHECK_EQ(cache.Size(), 4); + + cache.Set(18, "18"); + + SPIEL_CHECK_FALSE(cache.Get(15)); // evicted + SPIEL_CHECK_TRUE(cache.Get(14)); // older but more recently used + + info = cache.Info(); + SPIEL_CHECK_EQ(info.Usage(), 1); + + cache.Clear(); + + SPIEL_CHECK_FALSE(cache.Get(18)); // evicted +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::TestLRUCache(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/random.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/random.cc new file mode 100644 index 0000000..36c013d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/random.cc @@ -0,0 +1,32 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "open_spiel/utils/random.h" + +namespace open_spiel { + +namespace { +std::uniform_real_distribution uniformDist; +} // namespace + +double RandomMT::RandomUniform() { return uniformDist(generator_); } + +double RandomFixedSequence::RandomUniform() { + double v = values_[position_]; + if (++position_ == values_.size()) position_ = 0; + return v; +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/random.h b/scenarios/bargaining/open_spiel/open_spiel/utils/random.h new file mode 100644 index 0000000..9e498c2 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/random.h @@ -0,0 +1,68 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_RANDOM_H_ +#define OPEN_SPIEL_UTILS_RANDOM_H_ + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/random/uniform_real_distribution.h" + +// A suite of utilities that wrap random number generators. +// +// It makes it easy to mock stochastic algorithms, as you can supply +// a fixed "random" sequence that will produce desired behaviour +// you can test against. + +namespace open_spiel { + +class Random { + public: + // Return a random value in the interval <0,1) + virtual double RandomUniform() = 0; + + Random() = default; + Random(const Random &) = default; + virtual ~Random() = default; +}; + +// Random Mersenne Twister. +class RandomMT : public Random { + std::mt19937 generator_; + + public: + explicit RandomMT(int seed) : generator_(std::mt19937(seed)) {} + explicit RandomMT(const std::mt19937 &generator) : generator_(generator) {} + double RandomUniform() final; +}; + +// Helper class to provide fixed sampling, according to specified values. +// It keeps cycling through them when end of the list is reached. +// It is not "random", but we keep the prefix name for consistency. +class RandomFixedSequence : public Random { + const std::vector values_; + int position_ = 0; + + public: + // Return values from this specified list. + RandomFixedSequence(std::initializer_list l) : values_(l) {} + + double RandomUniform() final; +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_UTILS_RANDOM_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/random_test.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/random_test.cc new file mode 100644 index 0000000..654409c --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/random_test.cc @@ -0,0 +1,57 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "open_spiel/utils/random.h" + +#include + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { + +void TestRandomUtility() { + { + RandomFixedSequence r{0.}; + SPIEL_CHECK_EQ(r.RandomUniform(), 0.); + SPIEL_CHECK_EQ(r.RandomUniform(), 0.); + SPIEL_CHECK_EQ(r.RandomUniform(), 0.); + } + + { + RandomFixedSequence r{0., 1., 2.}; + SPIEL_CHECK_EQ(r.RandomUniform(), 0.); + SPIEL_CHECK_EQ(r.RandomUniform(), 1.); + SPIEL_CHECK_EQ(r.RandomUniform(), 2.); + SPIEL_CHECK_EQ(r.RandomUniform(), 0.); + SPIEL_CHECK_EQ(r.RandomUniform(), 1.); + SPIEL_CHECK_EQ(r.RandomUniform(), 2.); + } + + { + std::mt19937 gen(0); + std::uniform_real_distribution uniformDist; + + RandomMT r(0); + SPIEL_CHECK_EQ(r.RandomUniform(), uniformDist(gen)); + SPIEL_CHECK_EQ(r.RandomUniform(), uniformDist(gen)); + SPIEL_CHECK_EQ(r.RandomUniform(), uniformDist(gen)); + } +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char **argv) { open_spiel::TestRandomUtility(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/serializable_circular_buffer.h b/scenarios/bargaining/open_spiel/open_spiel/utils/serializable_circular_buffer.h new file mode 100644 index 0000000..7fa6d78 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/serializable_circular_buffer.h @@ -0,0 +1,69 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_SERIALIZABLE_CIRCULAR_BUFFER_H_ +#define OPEN_SPIEL_UTILS_SERIALIZABLE_CIRCULAR_BUFFER_H_ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/circular_buffer.h" + +namespace open_spiel { + +// A serializable circular buffer of fixed size. +template +class SerializableCircularBuffer : public CircularBuffer { + public: + explicit SerializableCircularBuffer(int max_size) + : CircularBuffer(max_size) {} + + // Serialize the data of the buffer to a file. + void SaveBuffer(const std::string& path) const { + nop::Serializer> serializer{path}; + serializer.Write(this->max_size_); + serializer.Write(this->total_added_); + serializer.Write(this->data_); + } + + // Populate the buffer with data from a saved buffer's file. + void LoadBuffer(const std::string& path) { + nop::Deserializer> deserializer{path}; + + // Ensure this buffer's max size equals the max size of the saved buffer. + int max_size; + deserializer.Read(&max_size); + if (max_size != this->max_size_) { + SpielFatalError(absl::StrFormat("Cannot load data from a buffer with max" + "size %d into a buffer with max size %d.", + max_size, + this->max_size_)); + } + + deserializer.Read(&(this->total_added_)); + deserializer.Read(&(this->data_)); + } +}; +} // namespace open_spiel + +#endif // OPEN_SPIEL_UTILS_SERIALIZABLE_CIRCULAR_BUFFER_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/serializable_circular_buffer_test.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/serializable_circular_buffer_test.cc new file mode 100644 index 0000000..c43d817 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/serializable_circular_buffer_test.cc @@ -0,0 +1,157 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/serializable_circular_buffer.h" + +#include + +#include +#include +#include +#include + +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/file.h" +#include "open_spiel/utils/init.h" + +namespace open_spiel { +namespace { + +const char* kSimpleSerializationFilename = "simple_buffer_data.nop"; +const char* kComplexSerializationFilename = "complex_buffer_data.nop"; + +struct TestStruct { + std::vector action_vector; + std::vector float_vector; + std::vector> actions_and_probs; + double double_value; + + bool operator==(const TestStruct& other_test_struct) const { + return action_vector == other_test_struct.action_vector && + float_vector == other_test_struct.float_vector && + actions_and_probs == other_test_struct.actions_and_probs && + double_value == other_test_struct.double_value; + } + + NOP_STRUCTURE(TestStruct, + action_vector, + float_vector, + actions_and_probs, + double_value); +}; + +void TestSerializableCircularBuffer() { + SerializableCircularBuffer buffer(4); + std::mt19937 rng; + std::vector sample; + + SPIEL_CHECK_TRUE(buffer.Empty()); + SPIEL_CHECK_EQ(buffer.Size(), 0); + + buffer.Add(13); + SPIEL_CHECK_FALSE(buffer.Empty()); + SPIEL_CHECK_EQ(buffer.Size(), 1); + SPIEL_CHECK_EQ(buffer.TotalAdded(), 1); + SPIEL_CHECK_EQ(buffer[0], 13); + + sample = buffer.Sample(&rng, 1); + SPIEL_CHECK_EQ(sample.size(), 1); + SPIEL_CHECK_EQ(sample[0], 13); + + buffer.Add(14); + buffer.Add(15); + buffer.Add(16); + + SPIEL_CHECK_EQ(buffer.Size(), 4); + SPIEL_CHECK_EQ(buffer.TotalAdded(), 4); + + sample = buffer.Sample(&rng, 2); + SPIEL_CHECK_EQ(sample.size(), 2); + SPIEL_CHECK_GE(sample[0], 13); + SPIEL_CHECK_LE(sample[0], 16); + SPIEL_CHECK_GE(sample[1], 13); + SPIEL_CHECK_LE(sample[1], 16); + + buffer.Add(17); + buffer.Add(18); + + SPIEL_CHECK_EQ(buffer.Size(), 4); + SPIEL_CHECK_EQ(buffer.TotalAdded(), 6); + + sample = buffer.Sample(&rng, 1); + SPIEL_CHECK_EQ(sample.size(), 1); + SPIEL_CHECK_GE(sample[0], 15); + SPIEL_CHECK_LE(sample[0], 18); +} + +void TestSimpleSerializableCircularBufferSerialization() { + std::string filename = file::GetTmpDir() + "/" + kSimpleSerializationFilename; + SerializableCircularBuffer original_buffer(6); + original_buffer.Add(1); + original_buffer.Add(2); + original_buffer.Add(3); + original_buffer.Add(4); + original_buffer.Add(5); + original_buffer.Add(6); + original_buffer.SaveBuffer(filename); + + SerializableCircularBuffer new_buffer(6); + new_buffer.LoadBuffer(filename); + + SPIEL_CHECK_EQ(original_buffer.Size(), new_buffer.Size()); + SPIEL_CHECK_EQ(original_buffer.TotalAdded(), new_buffer.TotalAdded()); + SPIEL_CHECK_TRUE(original_buffer.Data() == new_buffer.Data()); + SPIEL_CHECK_TRUE(file::Remove(filename)); +} + +void TestComplexSerializableCircularBufferSerialization() { + std::string filename = + file::GetTmpDir() + "/" + kComplexSerializationFilename; + TestStruct struct1 = {.action_vector = {1, 2, 3}, + .float_vector = {1.0f, 2.0f, 3.0f}, + .actions_and_probs = {{1, 1.0}, {2, 2.0}, {3, 3.0}}, + .double_value = 1.23}; + TestStruct struct2 = {.action_vector = {4, 5, 6}, + .float_vector = {4.0f, 5.0f, 6.0f}, + .actions_and_probs = {{4, 4.0}, {5, 5.0}, {6, 6.0}}, + .double_value = 4.56}; + TestStruct struct3 = {.action_vector = {7, 8, 9}, + .float_vector = {7.0f, 8.0f, 9.0f}, + .actions_and_probs = {{7, 7.0}, {8, 8.0}, {9, 9.0}}, + .double_value = 7.89}; + + SerializableCircularBuffer original_buffer(3); + original_buffer.Add(struct1); + original_buffer.Add(struct2); + original_buffer.Add(struct3); + original_buffer.SaveBuffer(filename); + + SerializableCircularBuffer new_buffer(3); + new_buffer.LoadBuffer(filename); + + SPIEL_CHECK_EQ(original_buffer.Size(), new_buffer.Size()); + SPIEL_CHECK_EQ(original_buffer.TotalAdded(), new_buffer.TotalAdded()); + SPIEL_CHECK_TRUE(original_buffer.Data() == new_buffer.Data()); + SPIEL_CHECK_TRUE(file::Remove(filename)); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::Init("", &argc, &argv, true); + open_spiel::TestSerializableCircularBuffer(); + open_spiel::TestSimpleSerializableCircularBufferSerialization(); + open_spiel::TestComplexSerializableCircularBufferSerialization(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/serialization.h b/scenarios/bargaining/open_spiel/open_spiel/utils/serialization.h new file mode 100644 index 0000000..d15c5bf --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/serialization.h @@ -0,0 +1,50 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_SERIALIZATION_H_ +#define OPEN_SPIEL_UTILS_SERIALIZATION_H_ + +#include +#include + +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/abseil-cpp/absl/strings/str_format.h" + +namespace open_spiel { + +// Formats doubles with human-readable strings with a specified number of +// decimal places, i.e. results in lossy serialization. +struct SimpleDoubleFormatter { + SimpleDoubleFormatter(int precision = 6) : precision(precision) {} + + void operator()(std::string* out, const double& d) const { + std::stringstream stream; + stream << std::fixed << std::setprecision(precision) << d; + absl::StrAppend(out, stream.str()); + } + + const int precision; +}; + +// Formats doubles with non-portable bitwise representation hex strings, i.e. +// results in lossless serialization. +struct HexDoubleFormatter { + void operator()(std::string* out, const double& d) const { + absl::StrAppend(out, absl::StrFormat("%a", d)); + } +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_UTILS_SERIALIZATION_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/stats.h b/scenarios/bargaining/open_spiel/open_spiel/utils/stats.h new file mode 100644 index 0000000..799f70e --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/stats.h @@ -0,0 +1,129 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_STATS_H_ +#define OPEN_SPIEL_UTILS_STATS_H_ + +#include +#include +#include +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/algorithm/container.h" +#include "open_spiel/utils/json.h" + +namespace open_spiel { + +// Track the count, min max, avg and standard deviation. +class BasicStats { + public: + BasicStats() { Reset(); } + + // Reset all the stats to 0. + void Reset() { + num_ = 0; + min_ = std::numeric_limits::max(); + max_ = std::numeric_limits::min(); + sum_ = 0; + sum_sq_ = 0; + } + + // Merge two BasicStats. Useful for merging per thread stats before printing. + BasicStats& operator+=(const BasicStats& o) { + num_ += o.num_; + sum_ += o.sum_; + sum_sq_ += o.sum_sq_; + min_ = std::min(min_, o.min_); + max_ = std::max(max_, o.max_); + return *this; + } + + void Add(double val) { + min_ = std::min(min_, val); + max_ = std::max(max_, val); + sum_ += val; + sum_sq_ += val * val; + num_ += 1; + } + + int64_t Num() const { return num_; } + double Min() const { return (num_ == 0 ? 0 : min_); } + double Max() const { return (num_ == 0 ? 0 : max_); } + double Avg() const { return (num_ == 0 ? 0 : sum_ / num_); } + double StdDev() const { + if (num_ <= 1) return 0; + // Numerical precision can cause variance to be negative, leading to NaN's. + double variance = (sum_sq_ - sum_ * sum_ / num_) / (num_ - 1); + return variance <= 0 ? 0 : std::sqrt(variance); + } + + json::Object ToJson() const { + return { + {"num", Num()}, + {"min", Min()}, + {"max", Max()}, + {"avg", Avg()}, + {"std_dev", StdDev()}, + }; + } + + private: + int64_t num_; + double min_; + double max_; + double sum_; + double sum_sq_; +}; + +// Track the occurrences for `count` buckets. You need to decide how to map your +// data into the buckets. Mainly useful for scalar values. +class HistogramNumbered { + public: + explicit HistogramNumbered(int num_buckets) : counts_(num_buckets, 0) {} + void Reset() { absl::c_fill(counts_, 0); } + void Add(int bucket_id) { + bucket_id = std::clamp(bucket_id, 0, counts_.size() - 1); + counts_[bucket_id] += 1; + } + json::Array ToJson() const { return json::CastToArray(counts_); } + + private: + std::vector counts_; +}; + +// Same as HistogramNumbered, but each bucket has a name associated with it +// and is returned in the json output. Mainly useful for categorical values. +class HistogramNamed { + public: + explicit HistogramNamed(std::vector names) + : counts_(names.size(), 0), names_(names) {} + void Reset() { absl::c_fill(counts_, 0); } + void Add(int bucket_id) { counts_[bucket_id] += 1; } + json::Object ToJson() const { + return { + {"counts", json::CastToArray(counts_)}, + {"names", json::CastToArray(names_)}, + }; + } + + private: + std::vector counts_; + std::vector names_; +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_UTILS_STATS_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/stats_test.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/stats_test.cc new file mode 100644 index 0000000..f375490 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/stats_test.cc @@ -0,0 +1,130 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/stats.h" + +#include "open_spiel/spiel_utils.h" +#include "open_spiel/utils/json.h" + +namespace open_spiel { +namespace { + +void TestBasicStats() { + BasicStats stats; + + SPIEL_CHECK_EQ(stats.Num(), 0); + SPIEL_CHECK_EQ(stats.Min(), 0); + SPIEL_CHECK_EQ(stats.Max(), 0); + SPIEL_CHECK_EQ(stats.Avg(), 0); + SPIEL_CHECK_EQ(stats.StdDev(), 0); + + stats.Add(10); + + SPIEL_CHECK_EQ(stats.Num(), 1); + SPIEL_CHECK_EQ(stats.Min(), 10); + SPIEL_CHECK_EQ(stats.Max(), 10); + SPIEL_CHECK_EQ(stats.Avg(), 10); + SPIEL_CHECK_EQ(stats.StdDev(), 0); + + stats.Add(30); + + SPIEL_CHECK_EQ(stats.Num(), 2); + SPIEL_CHECK_EQ(stats.Min(), 10); + SPIEL_CHECK_EQ(stats.Max(), 30); + SPIEL_CHECK_EQ(stats.Avg(), 20); + SPIEL_CHECK_FLOAT_EQ(stats.StdDev(), 14.14213562); + + stats.Add(20); + + SPIEL_CHECK_EQ(stats.Num(), 3); + SPIEL_CHECK_EQ(stats.Min(), 10); + SPIEL_CHECK_EQ(stats.Max(), 30); + SPIEL_CHECK_EQ(stats.Avg(), 20); + SPIEL_CHECK_FLOAT_EQ(stats.StdDev(), 10); + + SPIEL_CHECK_EQ(stats.ToJson(), json::Object({ + {"num", 3}, + {"min", 10.0}, + {"max", 30.0}, + {"avg", 20.0}, + {"std_dev", 10.0}, + })); + + stats.Reset(); + + SPIEL_CHECK_EQ(stats.Num(), 0); + SPIEL_CHECK_EQ(stats.Min(), 0); + SPIEL_CHECK_EQ(stats.Max(), 0); + SPIEL_CHECK_EQ(stats.Avg(), 0); + SPIEL_CHECK_EQ(stats.StdDev(), 0); +} + +void TestHistogramNumbered() { + HistogramNumbered hist(3); + hist.Add(0); + hist.Add(1); + hist.Add(2); + hist.Add(2); + hist.Add(2); + + SPIEL_CHECK_EQ(hist.ToJson(), json::Array({1, 1, 3})); + + hist.Reset(); + + SPIEL_CHECK_EQ(hist.ToJson(), json::Array({0, 0, 0})); +} + +void TestHistogramTooLarge() { + HistogramNumbered hist(3); + hist.Add(-2); + hist.Add(-1); + hist.Add(0); + hist.Add(1); + hist.Add(2); + hist.Add(3); + hist.Add(4); + + SPIEL_CHECK_EQ(hist.ToJson(), json::Array({3, 1, 3})); +} + +void TestHistogramNamed() { + HistogramNamed hist({"win", "loss", "draw"}); + hist.Add(0); + hist.Add(1); + hist.Add(2); + hist.Add(2); + hist.Add(2); + + SPIEL_CHECK_EQ(hist.ToJson(), json::Object({ + {"counts", json::Array({1, 1, 3})}, + {"names", json::Array({"win", "loss", "draw"})}, + })); + + hist.Reset(); + + SPIEL_CHECK_EQ(hist.ToJson(), json::Object({ + {"counts", json::Array({0, 0, 0})}, + {"names", json::Array({"win", "loss", "draw"})}, + })); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::TestBasicStats(); + open_spiel::TestHistogramNumbered(); + open_spiel::TestHistogramTooLarge(); + open_spiel::TestHistogramNamed(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/tensor_view.h b/scenarios/bargaining/open_spiel/open_spiel/utils/tensor_view.h new file mode 100644 index 0000000..b0e5b74 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/tensor_view.h @@ -0,0 +1,74 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_TENSOR_VIEW_H_ +#define OPEN_SPIEL_UTILS_TENSOR_VIEW_H_ + +#include +#include +#include + +#include "open_spiel/abseil-cpp/absl/types/span.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { + +// Treat a `absl::Span` as a tensor of fixed shape. The rank (number of +// dimensions) must be known at compile time, though the actual sizes of the +// dimensions can be supplied at construction time. It then lets you index into +// the vector easily without having to compute the 1d-vector's indices manually. +template +class TensorView { + public: + constexpr TensorView(absl::Span values, + const std::array& shape, bool reset) + : values_(values), shape_(shape) { + SPIEL_CHECK_EQ(size(), values_.size()); + if (reset) std::fill(values.begin(), values.end(), 0); + } + + constexpr int size() const { + return std::accumulate(shape_.begin(), shape_.end(), 1, + std::multiplies()); + } + + void clear() { std::fill(values_.begin(), values_.end(), 0.0); } + + constexpr int index(const std::array& args) const { + int ind = 0; + for (int i = 0; i < Rank; ++i) { + ind = ind * shape_[i] + args[i]; + } + return ind; + } + + constexpr float& operator[](const std::array& args) { + return values_[index(args)]; + } + constexpr const float& operator[](const std::array& args) const { + return values_[index(args)]; + } + + constexpr int rank() const { return Rank; } + constexpr const std::array shape() const { return shape_; } + constexpr int shape(int i) const { return shape_[i]; } + + private: + absl::Span values_; + const std::array shape_; +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_UTILS_TENSOR_VIEW_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/tensor_view_test.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/tensor_view_test.cc new file mode 100644 index 0000000..3e0409d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/tensor_view_test.cc @@ -0,0 +1,141 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/tensor_view.h" + +#include +#include + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { + +void TestTensorView() { + std::vector values; + + values.resize(6); + TensorView<2> view2(absl::MakeSpan(values), {2, 3}, true); + SPIEL_CHECK_EQ(view2.size(), 6); + SPIEL_CHECK_EQ(values.size(), 6); + SPIEL_CHECK_EQ(view2.rank(), 2); + SPIEL_CHECK_EQ(view2.shape(), (std::array{2, 3})); + SPIEL_CHECK_EQ(view2.shape(0), 2); + SPIEL_CHECK_EQ(view2.shape(1), 3); + + // All 0 initialized + for (int i = 0; i < values.size(); ++i) { + SPIEL_CHECK_EQ(values[i], 0); + values[i] = i + 1; + } + + // Index correctly + for (int a = 0, i = 0; a < view2.shape(0); ++a) { + for (int b = 0; b < view2.shape(1); ++b, ++i) { + SPIEL_CHECK_EQ(view2.index({a, b}), i); + SPIEL_CHECK_EQ((view2[{a, b}]), i + 1); + view2[{a, b}] = -i; + } + } + + // Index correctly + for (int i = 0; i < values.size(); ++i) { + SPIEL_CHECK_EQ(values[i], -i); + } + + // Clear works + view2.clear(); + + for (int i = 0; i < values.size(); ++i) { + SPIEL_CHECK_EQ(values[i], 0); + values[i] = i + 1; + } + + // Works for more dimensions + values.resize(24); + TensorView<3> view3(absl::MakeSpan(values), {4, 2, 3}, true); + SPIEL_CHECK_EQ(view3.size(), 24); + SPIEL_CHECK_EQ(values.size(), 24); + SPIEL_CHECK_EQ(view3.rank(), 3); + SPIEL_CHECK_EQ(view3.shape(), (std::array{4, 2, 3})); + SPIEL_CHECK_EQ(view3.shape(0), 4); + SPIEL_CHECK_EQ(view3.shape(1), 2); + SPIEL_CHECK_EQ(view3.shape(2), 3); + + // All 0 initialized + for (int i = 0; i < values.size(); ++i) { + SPIEL_CHECK_EQ(values[i], 0); + values[i] = i + 1; + } + + // Index correctly + for (int a = 0, i = 0; a < view3.shape(0); ++a) { + for (int b = 0; b < view3.shape(1); ++b) { + for (int c = 0; c < view3.shape(2); ++c, ++i) { + SPIEL_CHECK_EQ(view3.index({a, b, c}), i); + SPIEL_CHECK_EQ((view3[{a, b, c}]), i + 1); + view3[{a, b, c}] = -i; + } + } + } + + // Index correctly + for (int i = 0; i < values.size(); ++i) { + SPIEL_CHECK_EQ(values[i], -i); + } + + // Works for a single dimension + values.resize(8); + TensorView<1> view1(absl::MakeSpan(values), {8}, true); + SPIEL_CHECK_EQ(view1.size(), 8); + SPIEL_CHECK_EQ(values.size(), 8); + SPIEL_CHECK_EQ(view1.rank(), 1); + SPIEL_CHECK_EQ(view1.shape(), (std::array{8})); + SPIEL_CHECK_EQ(view1.shape(0), 8); + + // All 0 initialized + for (int i = 0; i < values.size(); ++i) { + SPIEL_CHECK_EQ(values[i], 0); + values[i] = i + 1; + } + + // Index correctly + for (int a = 0; a < view1.shape(0); ++a) { + SPIEL_CHECK_EQ(view1.index({a}), a); + SPIEL_CHECK_EQ(view1[{a}], a + 1); + view1[{a}] = -a; + } + + // Keeps the previous values. + TensorView<2> view_keep(absl::MakeSpan(values), {2, 4}, false); + SPIEL_CHECK_EQ(view_keep.size(), 8); + SPIEL_CHECK_EQ(values.size(), 8); + SPIEL_CHECK_EQ(view_keep.rank(), 2); + SPIEL_CHECK_EQ(view_keep.shape(), (std::array{2, 4})); + SPIEL_CHECK_EQ(view_keep.shape(0), 2); + SPIEL_CHECK_EQ(view_keep.shape(1), 4); + + // Index correctly + for (int a = 0, i = 0; a < view_keep.shape(0); ++a) { + for (int b = 0; b < view_keep.shape(1); ++b, ++i) { + SPIEL_CHECK_EQ(view_keep.index({a, b}), i); + SPIEL_CHECK_EQ((view_keep[{a, b}]), -i); + } + } +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { open_spiel::TestTensorView(); } diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/thread.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/thread.cc new file mode 100644 index 0000000..e1da13d --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/thread.cc @@ -0,0 +1,35 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/thread.h" + +#include // NOLINT + +namespace open_spiel { + +class Thread::ThreadImpl : public std::thread { + public: + using std::thread::thread; // Inherit the constructors. +}; + +Thread::Thread(std::function fn) : thread_(new ThreadImpl(fn)) {} + +// defaults required to be here for pimpl to work. +Thread::~Thread() = default; +Thread::Thread(Thread&& other) = default; +Thread& Thread::operator=(Thread&& other) = default; + +void Thread::join() { thread_->join(); } + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/thread.h b/scenarios/bargaining/open_spiel/open_spiel/utils/thread.h new file mode 100644 index 0000000..ead8aab --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/thread.h @@ -0,0 +1,57 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_THREAD_H_ +#define OPEN_SPIEL_UTILS_THREAD_H_ + +#include +#include +#include + +namespace open_spiel { + +// A simple thread class similar to std::thread, but only accepting a function +// without args. Wrap your args in a lambda if necessary. Needed for +// compatibility with Google's libraries. +class Thread { + public: + explicit Thread(std::function fn); + ~Thread(); + + // Thread is move only. + Thread(Thread&& other); + Thread& operator=(Thread&& other); + Thread(const Thread&) = delete; + Thread& operator=(const Thread&) = delete; + + void join(); + + private: + class ThreadImpl; + std::unique_ptr thread_; +}; + +// A token for whether a thread has been requested to stop. +class StopToken { + public: + StopToken() : token_(false) {} + void Stop() { token_ = true; } + bool StopRequested() const { return token_; } + private: + std::atomic token_; +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_UTILS_THREAD_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/thread_test.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/thread_test.cc new file mode 100644 index 0000000..52d4571 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/thread_test.cc @@ -0,0 +1,52 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/thread.h" + +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { + +void TestThread() { + int value = 1; + Thread thread([&](){ value = 2; }); + thread.join(); + SPIEL_CHECK_EQ(value, 2); +} + +void TestThreadMove() { + int value = 1; + Thread thread([&](){ value = 2; }); + Thread thread2(std::move(thread)); + thread2.join(); + SPIEL_CHECK_EQ(value, 2); +} + +void TestThreadMoveAssign() { + int value = 1; + Thread thread([&](){ value = 2; }); + Thread thread2 = std::move(thread); + thread2.join(); + SPIEL_CHECK_EQ(value, 2); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::TestThread(); + open_spiel::TestThreadMove(); + open_spiel::TestThreadMoveAssign(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/threaded_queue.h b/scenarios/bargaining/open_spiel/open_spiel/utils/threaded_queue.h new file mode 100644 index 0000000..c84d58a --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/threaded_queue.h @@ -0,0 +1,104 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_THREADED_QUEUE_H_ +#define OPEN_SPIEL_UTILS_THREADED_QUEUE_H_ + +#include + +#include "open_spiel/abseil-cpp/absl/synchronization/mutex.h" +#include "open_spiel/abseil-cpp/absl/time/clock.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" + +namespace open_spiel { + +// A threadsafe-queue. +template +class ThreadedQueue { + public: + explicit ThreadedQueue(int max_size) : max_size_(max_size) {} + + // Add an element to the queue. + bool Push(const T& value) { return Push(value, absl::InfiniteDuration()); } + bool Push(const T& value, absl::Duration wait) { + return Push(value, absl::Now() + wait); + } + bool Push(const T& value, absl::Time deadline) { + absl::MutexLock lock(&m_); + if (block_new_values_) { + return false; + } + while (q_.size() >= max_size_) { + if (absl::Now() > deadline || block_new_values_) { + return false; + } + cv_.WaitWithDeadline(&m_, deadline); + } + q_.push(value); + cv_.Signal(); + return true; + } + + absl::optional Pop() { return Pop(absl::InfiniteDuration()); } + absl::optional Pop(absl::Duration wait) { return Pop(absl::Now() + wait); } + absl::optional Pop(absl::Time deadline) { + absl::MutexLock lock(&m_); + while (q_.empty()) { + if (absl::Now() > deadline || block_new_values_) { + return absl::nullopt; + } + cv_.WaitWithDeadline(&m_, deadline); + } + T val = q_.front(); + q_.pop(); + cv_.Signal(); + return val; + } + + bool Empty() { + absl::MutexLock lock(&m_); + return q_.empty(); + } + + void Clear() { + absl::MutexLock lock(&m_); + while (!q_.empty()) { + q_.pop(); + } + } + + int Size() { + absl::MutexLock lock(&m_); + return q_.size(); + } + + // Causes pushing new values to fail. Useful for shutting down the queue. + void BlockNewValues() { + absl::MutexLock lock(&m_); + block_new_values_ = true; + cv_.SignalAll(); + } + + private: + bool block_new_values_ = false; + int max_size_; + std::queue q_; + absl::Mutex m_; + absl::CondVar cv_; +}; + +} // namespace open_spiel + +#endif // OPEN_SPIEL_UTILS_THREADED_QUEUE_H_ diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/threaded_queue_test.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/threaded_queue_test.cc new file mode 100644 index 0000000..8d3e0b5 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/threaded_queue_test.cc @@ -0,0 +1,91 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/threaded_queue.h" + +#include "open_spiel/abseil-cpp/absl/time/clock.h" +#include "open_spiel/abseil-cpp/absl/time/time.h" +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/spiel_utils.h" + +namespace open_spiel { +namespace { + +void TestThreadedQueue() { + ThreadedQueue q(4); + + auto CheckPopEq = [&q](int expected) { + absl::optional v = q.Pop(); + SPIEL_CHECK_TRUE(v); + SPIEL_CHECK_EQ(*v, expected); + }; + + SPIEL_CHECK_TRUE(q.Empty()); + SPIEL_CHECK_EQ(q.Size(), 0); + + SPIEL_CHECK_FALSE(q.Pop(absl::Milliseconds(1))); + SPIEL_CHECK_FALSE(q.Pop(absl::Now() + absl::Milliseconds(1))); + + SPIEL_CHECK_TRUE(q.Push(10, absl::Now() + absl::Milliseconds(1))); + SPIEL_CHECK_FALSE(q.Empty()); + SPIEL_CHECK_EQ(q.Size(), 1); + + CheckPopEq(10); + + SPIEL_CHECK_TRUE(q.Push(11)); + SPIEL_CHECK_TRUE(q.Push(12)); + SPIEL_CHECK_EQ(q.Size(), 2); + SPIEL_CHECK_TRUE(q.Push(13)); + SPIEL_CHECK_TRUE(q.Push(14)); + SPIEL_CHECK_EQ(q.Size(), 4); + SPIEL_CHECK_FALSE(q.Push(15, absl::Milliseconds(1))); + + CheckPopEq(11); + + SPIEL_CHECK_TRUE(q.Push(16, absl::Milliseconds(1))); + + CheckPopEq(12); + CheckPopEq(13); + CheckPopEq(14); + CheckPopEq(16); + SPIEL_CHECK_EQ(q.Size(), 0); + + SPIEL_CHECK_TRUE(q.Push(17)); + SPIEL_CHECK_TRUE(q.Push(18)); + SPIEL_CHECK_EQ(q.Size(), 2); + + q.Clear(); + + SPIEL_CHECK_TRUE(q.Empty()); + SPIEL_CHECK_EQ(q.Size(), 0); + + SPIEL_CHECK_TRUE(q.Push(19)); + SPIEL_CHECK_TRUE(q.Push(20)); + + q.BlockNewValues(); + + SPIEL_CHECK_EQ(q.Size(), 2); + SPIEL_CHECK_FALSE(q.Push(21)); + SPIEL_CHECK_EQ(q.Size(), 2); + CheckPopEq(19); + CheckPopEq(20); + SPIEL_CHECK_FALSE(q.Pop()); +} + +} // namespace +} // namespace open_spiel + +int main(int argc, char** argv) { + open_spiel::TestThreadedQueue(); +} diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/usage_logging.cc b/scenarios/bargaining/open_spiel/open_spiel/utils/usage_logging.cc new file mode 100644 index 0000000..07faf02 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/usage_logging.cc @@ -0,0 +1,23 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "open_spiel/utils/usage_logging.h" + + +namespace open_spiel { + +void LogUsage() { +} + +} // namespace open_spiel diff --git a/scenarios/bargaining/open_spiel/open_spiel/utils/usage_logging.h b/scenarios/bargaining/open_spiel/open_spiel/utils/usage_logging.h new file mode 100644 index 0000000..091b969 --- /dev/null +++ b/scenarios/bargaining/open_spiel/open_spiel/utils/usage_logging.h @@ -0,0 +1,26 @@ +// Copyright 2021 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_SPIEL_UTILS_USAGE_LOGGING_H_ +#define OPEN_SPIEL_UTILS_USAGE_LOGGING_H_ + +namespace open_spiel { + +// Record OpenSpiel usage. No-op for external users and is called upon creation +// of an OpenSpiel game. +void LogUsage(); + +} // namespace open_spiel + +#endif // OPEN_SPIEL_UTILS_USAGE_LOGGING_H_ diff --git a/scenarios/bargaining/open_spiel/readthedocs.yml b/scenarios/bargaining/open_spiel/readthedocs.yml new file mode 100644 index 0000000..bc90f5a --- /dev/null +++ b/scenarios/bargaining/open_spiel/readthedocs.yml @@ -0,0 +1,32 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally build your docs in additional formats such as PDF and ePub +formats: all + +# Optionally set the version of Python and requirements required to build your docs +python: + install: + - requirements: docs/requirements.readthedocs.txt + +build: + os: ubuntu-20.04 + tools: + python: "3.8" + jobs: + pre_build: + - echo "Running pre-build commands." + - echo `date` + - echo `pwd` + - echo `ls` + - echo `ls docs` + - echo "Fixing the table links" + - bash docs/fix_table_links.sh diff --git a/scenarios/bargaining/open_spiel/requirements.txt b/scenarios/bargaining/open_spiel/requirements.txt new file mode 100644 index 0000000..5bf8d9f --- /dev/null +++ b/scenarios/bargaining/open_spiel/requirements.txt @@ -0,0 +1,17 @@ +# The core OpenSpiel pip dependencies. +# +# Note that there are optional python packages used by some of the python +# algorithms or tools in OpenSpiel that are purposely excluded (e.g., +# cvxopt, nashpy, matplotlib, etc.) This is because we want to keep only +# the dependencies that are absolutely necessary to use the Python API. +# +# However, when testing using continuous integration like GitHub Actions, +# we install several more packages to ensure the proper tests are +# included. See open_spiel/scripts/python_extra_deps.sh for the extra +# packages and their versions we use for testing purposes. +pip >= 20.0.2 +attrs >= 19.3.0 +absl-py >= 0.10.0 +numpy >= 1.21.5 +scipy >= 1.10.1 +ml-collections >= 0.1.1 diff --git a/scenarios/bargaining/open_spiel/setup.py b/scenarios/bargaining/open_spiel/setup.py new file mode 100644 index 0000000..8f65a22 --- /dev/null +++ b/scenarios/bargaining/open_spiel/setup.py @@ -0,0 +1,141 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The setup script for setuptools. + +See https://setuptools.readthedocs.io/en/latest/setuptools.html +""" + +import os +import subprocess +import sys + +import setuptools +from setuptools.command.build_ext import build_ext + + +class CMakeExtension(setuptools.Extension): + """An extension with no sources. + + We do not want distutils to handle any of the compilation (instead we rely + on CMake), so we always pass an empty list to the constructor. + """ + + def __init__(self, name, sourcedir=""): + super().__init__(name, sources=[]) + self.sourcedir = os.path.abspath(sourcedir) + + +class BuildExt(build_ext): + """Our custom build_ext command. + + Uses CMake to build extensions instead of a bare compiler (e.g. gcc, clang). + """ + + def run(self): + self._check_build_environment() + for ext in self.extensions: + self.build_extension(ext) + + def _check_build_environment(self): + """Check for required build tools: CMake, C++ compiler, and python dev.""" + try: + subprocess.check_call(["cmake", "--version"]) + except OSError as e: + ext_names = ", ".join(e.name for e in self.extensions) + raise RuntimeError( + "CMake must be installed to build" + + f"the following extensions: {ext_names}") from e + print("Found CMake") + + cxx = "clang++" + if os.environ.get("CXX") is not None: + cxx = os.environ.get("CXX") + try: + subprocess.check_call([cxx, "--version"]) + except OSError as e: + ext_names = ", ".join(e.name for e in self.extensions) + raise RuntimeError( + "A C++ compiler that supports c++17 must be installed to build the " + + "following extensions: {}".format(ext_names) + + ". We recommend: Clang version >= 7.0.0." + ) from e + print("Found C++ compiler: {}".format(cxx)) + + def build_extension(self, ext): + extension_dir = os.path.abspath( + os.path.dirname(self.get_ext_fullpath(ext.name))) + cxx = "clang++" + if os.environ.get("CXX") is not None: + cxx = os.environ.get("CXX") + env = os.environ.copy() + cmake_args = [ + f"-DPython3_EXECUTABLE={sys.executable}", + f"-DCMAKE_CXX_COMPILER={cxx}", + f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extension_dir}", + ] + if not os.path.exists(self.build_temp): + os.makedirs(self.build_temp) + subprocess.check_call( + ["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp, + env=env) + + # Build only pyspiel (for pip package) + subprocess.check_call(["make", "pyspiel", f"-j{os.cpu_count()}"], + cwd=self.build_temp, + env=env) + + +def _get_requirements(requirements_file): # pylint: disable=g-doc-args + """Returns a list of dependencies for setup() from requirements.txt. + + Currently a requirements.txt is being used to specify dependencies. In order + to avoid specifying it in two places, we're going to use that file as the + source of truth. + """ + with open(requirements_file) as f: + return [_parse_line(line) for line in f if line] + + +def _parse_line(s): + """Parses a line of a requirements.txt file.""" + requirement, *_ = s.split("#") + return requirement.strip() + + +# Get the requirements from file. +# When installing from pip it is in the parent directory +req_file = "" +if os.path.exists("requirements.txt"): + req_file = "requirements.txt" +else: + req_file = "../requirements.txt" + +setuptools.setup( + name="open_spiel", + version="1.6", + license="Apache 2.0", + author="The OpenSpiel authors", + author_email="open_spiel@google.com", + description="A Framework for Reinforcement Learning in Games", + long_description=open("README.md").read(), + long_description_content_type="text/markdown", + url="https://github.com/deepmind/open_spiel", + install_requires=_get_requirements(req_file), + python_requires=">=3.9", + ext_modules=[CMakeExtension("pyspiel", sourcedir="open_spiel")], + cmdclass={"build_ext": BuildExt}, + zip_safe=False, + packages=setuptools.find_packages(include=["open_spiel", "open_spiel.*"]), +) diff --git a/scenarios/bargaining/rl_agent_checkpoints/__init__.py b/scenarios/bargaining/rl_agent_checkpoints/__init__.py new file mode 100644 index 0000000..73b1a09 --- /dev/null +++ b/scenarios/bargaining/rl_agent_checkpoints/__init__.py @@ -0,0 +1,4 @@ +""" +Package marker for RL agent checkpoints (NFSP/RNaD). +""" + diff --git a/scenarios/bargaining/rl_agent_checkpoints/nfsp/nfsp_bg4.pt b/scenarios/bargaining/rl_agent_checkpoints/nfsp/nfsp_bg4.pt new file mode 100644 index 0000000..299683e Binary files /dev/null and b/scenarios/bargaining/rl_agent_checkpoints/nfsp/nfsp_bg4.pt differ diff --git a/scenarios/bargaining/rl_agent_checkpoints/nfsp/nfsp_bg6.pt b/scenarios/bargaining/rl_agent_checkpoints/nfsp/nfsp_bg6.pt new file mode 100644 index 0000000..43f709c Binary files /dev/null and b/scenarios/bargaining/rl_agent_checkpoints/nfsp/nfsp_bg6.pt differ diff --git a/scenarios/bargaining/rl_agent_checkpoints/nfsp/nfsp_ng5.pt b/scenarios/bargaining/rl_agent_checkpoints/nfsp/nfsp_ng5.pt new file mode 100644 index 0000000..f49f473 Binary files /dev/null and b/scenarios/bargaining/rl_agent_checkpoints/nfsp/nfsp_ng5.pt differ diff --git a/scenarios/bargaining/rl_agent_checkpoints/rnad/__init__.py b/scenarios/bargaining/rl_agent_checkpoints/rnad/__init__.py new file mode 100644 index 0000000..0006460 --- /dev/null +++ b/scenarios/bargaining/rl_agent_checkpoints/rnad/__init__.py @@ -0,0 +1,4 @@ +""" +Package marker for RNAD checkpoints and implementation. +""" + diff --git a/scenarios/bargaining/rl_agent_checkpoints/rnad/rnad.py b/scenarios/bargaining/rl_agent_checkpoints/rnad/rnad.py new file mode 100644 index 0000000..eb40177 --- /dev/null +++ b/scenarios/bargaining/rl_agent_checkpoints/rnad/rnad.py @@ -0,0 +1,1194 @@ +# Copyright 2019 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Python implementation of R-NaD (https://arxiv.org/pdf/2206.15378.pdf).""" + +import enum +import functools +from typing import Any, Callable, Sequence, Tuple + +import chex +import haiku as hk +import jax +from jax import lax +from jax import numpy as jnp +from jax import tree_util as tree +import numpy as np +import optax + +import sys, os + +#makesure load in local openspiel +repo_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +local_open_spiel = os.path.join(repo_root, "open_spiel") +local_pyspiel_build = os.path.join(repo_root, "build", "python") +for p in (local_pyspiel_build, local_open_spiel): + if p not in sys.path: + sys.path.insert(0, p) +from open_spiel.python import policy as policy_lib +import pyspiel + +print("pyspiel:", getattr(pyspiel, "__file__", pyspiel)) +print("open_spiel.policy:", policy_lib.__file__) + + +# Some handy aliases. +# Since most of these are just aliases for a "bag of tensors", the goal +# is to improve the documentation, and not to actually enforce correctness +# through pytype. +Params = chex.ArrayTree + + +class EntropySchedule: + """An increasing list of steps where the regularisation network is updated. + + Example + EntropySchedule([3, 5, 10], [2, 4, 1]) + => [0, 3, 6, 11, 16, 21, 26, 10] + | 3 x2 | 5 x4 | 10 x1 + """ + + def __init__(self, *, sizes: Sequence[int], repeats: Sequence[int]): + """Constructs a schedule of entropy iterations. + + Args: + sizes: the list of iteration sizes. + repeats: the list, parallel to sizes, with the number of times for each + size from `sizes` to repeat. + """ + try: + if len(repeats) != len(sizes): + raise ValueError("`repeats` must be parallel to `sizes`.") + if not sizes: + raise ValueError("`sizes` and `repeats` must not be empty.") + if any([(repeat <= 0) for repeat in repeats]): + raise ValueError("All repeat values must be strictly positive") + if repeats[-1] != 1: + raise ValueError("The last value in `repeats` must be equal to 1, " + "ince the last iteration size is repeated forever.") + except ValueError as e: + raise ValueError( + f"Entropy iteration schedule: repeats ({repeats}) and sizes" + f" ({sizes})." + ) from e + + schedule = [0] + for size, repeat in zip(sizes, repeats): + schedule.extend([schedule[-1] + (i + 1) * size for i in range(repeat)]) + + self.schedule = np.array(schedule, dtype=np.int32) + + def __call__(self, learner_step: int) -> Tuple[float, bool]: + """Entropy scheduling parameters for a given `learner_step`. + + Args: + learner_step: The current learning step. + + Returns: + alpha: The mixing weight (from [0, 1]) of the previous policy with + the one before for computing the intrinsic reward. + update_target_net: A boolean indicator for updating the target network + with the current network. + """ + + # The complexity below is because at some point we might go past + # the explicit schedule, and then we'd need to just use the last step + # in the schedule and apply the logic of + # ((learner_step - last_step) % last_iteration) == 0) + + # The schedule might look like this: + # X----X-------X--X--X--X--------X + # learner_step | might be here ^ | + # or there ^ | + # or even past the schedule ^ + + # We need to deal with two cases below. + # Instead of going for the complicated conditional, let's just + # compute both and then do the A * s + B * (1 - s) with s being a bool + # selector between A and B. + + # 1. assume learner_step is past the schedule, + # ie schedule[-1] <= learner_step. + last_size = self.schedule[-1] - self.schedule[-2] + last_start = self.schedule[-1] + ( + learner_step - self.schedule[-1]) // last_size * last_size + # 2. assume learner_step is within the schedule. + start = jnp.amax(self.schedule * (self.schedule <= learner_step)) + finish = jnp.amin( + self.schedule * (learner_step < self.schedule), + initial=self.schedule[-1], + where=(learner_step < self.schedule)) + size = finish - start + + # Now select between the two. + beyond = (self.schedule[-1] <= learner_step) # Are we past the schedule? + iteration_start = (last_start * beyond + start * (1 - beyond)) + iteration_size = (last_size * beyond + size * (1 - beyond)) + + update_target_net = jnp.logical_and( + learner_step > 0, jnp.sum(learner_step == iteration_start)) + alpha = jnp.minimum( + (2.0 * (learner_step - iteration_start)) / iteration_size, 1.0) + + return alpha, update_target_net # pytype: disable=bad-return-type # jax-types + + +@chex.dataclass(frozen=True) +class FineTuning: + """Fine tuning options, aka policy post-processing. + + Even when fully trained, the resulting softmax-based policy may put + a small probability mass on bad actions. This results in an agent + waiting for the opponent (itself in self-play) to commit an error. + + To address that the policy is post-processed using: + - thresholding: any action with probability smaller than self.threshold + is simply removed from the policy. + - discretization: the probability values are rounded to the closest + multiple of 1/self.discretization. + + The post-processing is used on the learner, and thus must be jit-friendly. + """ + # The learner step after which the policy post processing (aka finetuning) + # will be enabled when learning. A strictly negative value is equivalent + # to infinity, ie disables finetuning completely. + from_learner_steps: int = -1 + # All policy probabilities below `threshold` are zeroed out. Thresholding + # is disabled if this value is non-positive. + policy_threshold: float = 0.03 + # Rounds the policy probabilities to the "closest" + # multiple of 1/`self.discretization`. + # Discretization is disabled for non-positive values. + policy_discretization: int = 32 + + def __call__(self, policy: chex.Array, mask: chex.Array, + learner_steps: int) -> chex.Array: + """A configurable fine tuning of a policy.""" + chex.assert_equal_shape((policy, mask)) + do_finetune = jnp.logical_and(self.from_learner_steps >= 0, + learner_steps > self.from_learner_steps) + + return jnp.where(do_finetune, self.post_process_policy(policy, mask), + policy) + + def post_process_policy( + self, + policy: chex.Array, + mask: chex.Array, + ) -> chex.Array: + """Unconditionally post process a given masked policy.""" + chex.assert_equal_shape((policy, mask)) + policy = self._threshold(policy, mask) + policy = self._discretize(policy) + return policy + + def _threshold(self, policy: chex.Array, mask: chex.Array) -> chex.Array: + """Remove from the support the actions 'a' where policy(a) < threshold.""" + chex.assert_equal_shape((policy, mask)) + if self.policy_threshold <= 0: + return policy + + mask = mask * ( + # Values over the threshold. + (policy >= self.policy_threshold) + + # Degenerate case is when policy is less than threshold *everywhere*. + # In that case we just keep the policy as-is. + (jnp.max(policy, axis=-1, keepdims=True) < self.policy_threshold)) + return mask * policy / jnp.sum(mask * policy, axis=-1, keepdims=True) + + def _discretize(self, policy: chex.Array) -> chex.Array: + """Round all action probabilities to a multiple of 1/self.discretize.""" + if self.policy_discretization <= 0: + return policy + + # The unbatched/single policy case: + if len(policy.shape) == 1: + return self._discretize_single(policy) + + # policy may be [B, A] or [T, B, A], etc. Thus add hk.BatchApply. + dims = len(policy.shape) - 1 + + # TODO(author18): avoid mixing vmap and BatchApply since the two could + # be folded into either a single BatchApply or a sequence of vmaps, but + # not the mix. + vmapped = jax.vmap(self._discretize_single) + policy = hk.BatchApply(vmapped, num_dims=dims)(policy) + + return policy + + def _discretize_single(self, mu: chex.Array) -> chex.Array: + """A version of self._discretize but for the unbatched data.""" + # TODO(author18): try to merge _discretize and _discretize_single + # into one function that handles both batched and unbatched cases. + if len(mu.shape) == 2: + mu_ = jnp.squeeze(mu, axis=0) + else: + mu_ = mu + n_actions = mu_.shape[-1] + roundup = jnp.ceil(mu_ * self.policy_discretization).astype(jnp.int32) + result = jnp.zeros_like(mu_) + order = jnp.argsort(-mu_) # Indices of descending order. + weight_left = self.policy_discretization + + def f_disc(i, order, roundup, weight_left, result): + x = jnp.minimum(roundup[order[i]], weight_left) + result = jax.numpy.where(weight_left >= 0, result.at[order[i]].add(x), + result) + weight_left -= x + return i + 1, order, roundup, weight_left, result + + def f_scan_scan(carry, x): + i, order, roundup, weight_left, result = carry + i_next, order_next, roundup_next, weight_left_next, result_next = f_disc( + i, order, roundup, weight_left, result) + carry_next = (i_next, order_next, roundup_next, weight_left_next, + result_next) + return carry_next, x + + (_, _, _, weight_left_next, result_next), _ = jax.lax.scan( + f_scan_scan, + init=(jnp.asarray(0), order, roundup, weight_left, result), + xs=None, + length=n_actions) + + result_next = jnp.where(weight_left_next > 0, + result_next.at[order[0]].add(weight_left_next), + result_next) + if len(mu.shape) == 2: + result_next = jnp.expand_dims(result_next, axis=0) + return result_next / self.policy_discretization + + +def _legal_policy(logits: chex.Array, legal_actions: chex.Array) -> chex.Array: + """A soft-max policy that respects legal_actions.""" + chex.assert_equal_shape((logits, legal_actions)) + # Fiddle a bit to make sure we don't generate NaNs or Inf in the middle. + l_min = logits.min(axis=-1, keepdims=True) + logits = jnp.where(legal_actions, logits, l_min) + logits -= logits.max(axis=-1, keepdims=True) + logits *= legal_actions + exp_logits = jnp.where(legal_actions, jnp.exp(logits), + 0) # Illegal actions become 0. + exp_logits_sum = jnp.sum(exp_logits, axis=-1, keepdims=True) + return exp_logits / exp_logits_sum + + +def legal_log_policy(logits: chex.Array, + legal_actions: chex.Array) -> chex.Array: + """Return the log of the policy on legal action, 0 on illegal action.""" + chex.assert_equal_shape((logits, legal_actions)) + # logits_masked has illegal actions set to -inf. + logits_masked = logits + jnp.log(legal_actions) + max_legal_logit = logits_masked.max(axis=-1, keepdims=True) + logits_masked = logits_masked - max_legal_logit + # exp_logits_masked is 0 for illegal actions. + exp_logits_masked = jnp.exp(logits_masked) + + baseline = jnp.log(jnp.sum(exp_logits_masked, axis=-1, keepdims=True)) + # Subtract baseline from logits. We do not simply return + # logits_masked - baseline + # because that has -inf for illegal actions, or + # legal_actions * (logits_masked - baseline) + # because that leads to 0 * -inf == nan for illegal actions. + log_policy = jnp.multiply(legal_actions, + (logits - max_legal_logit - baseline)) + return log_policy + + +def _player_others(player_ids: chex.Array, valid: chex.Array, + player: int) -> chex.Array: + """A vector of 1 for the current player and -1 for others. + + Args: + player_ids: Tensor [...] containing player ids (0 <= player_id < N). + valid: Tensor [...] containing whether these states are valid. + player: The player id as int. + + Returns: + player_other: is 1 for the current player and -1 for others [..., 1]. + """ + chex.assert_equal_shape((player_ids, valid)) + current_player_tensor = (player_ids == player).astype( + jnp.int32) # pytype: disable=attribute-error # numpy-scalars + + res = 2 * current_player_tensor - 1 + res = res * valid + return jnp.expand_dims(res, axis=-1) + + +def _policy_ratio(pi: chex.Array, mu: chex.Array, actions_oh: chex.Array, + valid: chex.Array) -> chex.Array: + """Returns a ratio of policy pi/mu when selecting action a. + + By convention, this ratio is 1 on non valid states + Args: + pi: the policy of shape [..., A]. + mu: the sampling policy of shape [..., A]. + actions_oh: a one-hot encoding of the current actions of shape [..., A]. + valid: 0 if the state is not valid and else 1 of shape [...]. + + Returns: + pi/mu on valid states and 1 otherwise. The shape is the same + as pi, mu or actions_oh but without the last dimension A. + """ + chex.assert_equal_shape((pi, mu, actions_oh)) + chex.assert_shape((valid,), actions_oh.shape[:-1]) + + def _select_action_prob(pi): + return (jnp.sum(actions_oh * pi, axis=-1, keepdims=False) * valid + + (1 - valid)) + + pi_actions_prob = _select_action_prob(pi) + mu_actions_prob = _select_action_prob(mu) + return pi_actions_prob / mu_actions_prob + + +def _where(pred: chex.Array, true_data: chex.ArrayTree, + false_data: chex.ArrayTree) -> chex.ArrayTree: + """Similar to jax.where but treats `pred` as a broadcastable prefix.""" + + def _where_one(t, f): + chex.assert_equal_rank((t, f)) + # Expand the dimensions of pred if true_data and false_data are higher rank. + p = jnp.reshape(pred, pred.shape + (1,) * (len(t.shape) - len(pred.shape))) + return jnp.where(p, t, f) + + return tree.tree_map(_where_one, true_data, false_data) + + +def _has_played(valid: chex.Array, player_id: chex.Array, + player: int) -> chex.Array: + """Compute a mask of states which have a next state in the sequence.""" + chex.assert_equal_shape((valid, player_id)) + + def _loop_has_played(carry, x): + valid, player_id = x + chex.assert_equal_shape((valid, player_id)) + + our_res = jnp.ones_like(player_id) + opp_res = carry + reset_res = jnp.zeros_like(carry) + + our_carry = carry + opp_carry = carry + reset_carry = jnp.zeros_like(player_id) + + # pyformat: disable + return _where(valid, _where((player_id == player), + (our_carry, our_res), + (opp_carry, opp_res)), + (reset_carry, reset_res)) + # pyformat: enable + + _, result = lax.scan( + f=_loop_has_played, + init=jnp.zeros_like(player_id[-1]), + xs=(valid, player_id), + reverse=True) + return result + + +# V-Trace +# +# Custom implementation of VTrace to handle trajectories having a mix of +# different player steps. The standard rlax.vtrace can't be applied here +# out of the box because a trajectory could look like '121211221122'. + + +def v_trace( + v: chex.Array, + valid: chex.Array, + player_id: chex.Array, + acting_policy: chex.Array, + merged_policy: chex.Array, + merged_log_policy: chex.Array, + player_others: chex.Array, + actions_oh: chex.Array, + reward: chex.Array, + player: int, + # Scalars below. + eta: float, + lambda_: float, + c: float, + rho: float, +) -> Tuple[Any, Any, Any]: + """Custom VTrace for trajectories with a mix of different player steps.""" + gamma = 1.0 + + has_played = _has_played(valid, player_id, player) + + policy_ratio = _policy_ratio(merged_policy, acting_policy, actions_oh, valid) + inv_mu = _policy_ratio( + jnp.ones_like(merged_policy), acting_policy, actions_oh, valid) + + eta_reg_entropy = (-eta * + jnp.sum(merged_policy * merged_log_policy, axis=-1) * + jnp.squeeze(player_others, axis=-1)) + eta_log_policy = -eta * merged_log_policy * player_others + + @chex.dataclass(frozen=True) + class LoopVTraceCarry: + """The carry of the v-trace scan loop.""" + reward: chex.Array + # The cumulated reward until the end of the episode. Uncorrected (v-trace). + # Gamma discounted and includes eta_reg_entropy. + reward_uncorrected: chex.Array + next_value: chex.Array + next_v_target: chex.Array + importance_sampling: chex.Array + + init_state_v_trace = LoopVTraceCarry( + reward=jnp.zeros_like(reward[-1]), + reward_uncorrected=jnp.zeros_like(reward[-1]), + next_value=jnp.zeros_like(v[-1]), + next_v_target=jnp.zeros_like(v[-1]), + importance_sampling=jnp.ones_like(policy_ratio[-1])) + + def _loop_v_trace(carry: LoopVTraceCarry, x) -> Tuple[LoopVTraceCarry, Any]: + (cs, player_id, v, reward, eta_reg_entropy, valid, inv_mu, actions_oh, + eta_log_policy) = x + + reward_uncorrected = ( + reward + gamma * carry.reward_uncorrected + eta_reg_entropy) + discounted_reward = reward + gamma * carry.reward + + # V-target: + our_v_target = ( + v + jnp.expand_dims( + jnp.minimum(rho, cs * carry.importance_sampling), axis=-1) * + (jnp.expand_dims(reward_uncorrected, axis=-1) + + gamma * carry.next_value - v) + lambda_ * jnp.expand_dims( + jnp.minimum(c, cs * carry.importance_sampling), axis=-1) * gamma * + (carry.next_v_target - carry.next_value)) + + opp_v_target = jnp.zeros_like(our_v_target) + reset_v_target = jnp.zeros_like(our_v_target) + + # Learning output: + our_learning_output = ( + v + # value + eta_log_policy + # regularisation + actions_oh * jnp.expand_dims(inv_mu, axis=-1) * + (jnp.expand_dims(discounted_reward, axis=-1) + gamma * jnp.expand_dims( + carry.importance_sampling, axis=-1) * carry.next_v_target - v)) + + opp_learning_output = jnp.zeros_like(our_learning_output) + reset_learning_output = jnp.zeros_like(our_learning_output) + + # State carry: + our_carry = LoopVTraceCarry( + reward=jnp.zeros_like(carry.reward), + next_value=v, + next_v_target=our_v_target, + reward_uncorrected=jnp.zeros_like(carry.reward_uncorrected), + importance_sampling=jnp.ones_like(carry.importance_sampling)) + opp_carry = LoopVTraceCarry( + reward=eta_reg_entropy + cs * discounted_reward, + reward_uncorrected=reward_uncorrected, + next_value=gamma * carry.next_value, + next_v_target=gamma * carry.next_v_target, + importance_sampling=cs * carry.importance_sampling) + reset_carry = init_state_v_trace + + # Invalid turn: init_state_v_trace and (zero target, learning_output) + # pyformat: disable + return _where(valid, # pytype: disable=bad-return-type # numpy-scalars + _where((player_id == player), + (our_carry, (our_v_target, our_learning_output)), + (opp_carry, (opp_v_target, opp_learning_output))), + (reset_carry, (reset_v_target, reset_learning_output))) + # pyformat: enable + + _, (v_target, learning_output) = lax.scan( + f=_loop_v_trace, + init=init_state_v_trace, + xs=(policy_ratio, player_id, v, reward, eta_reg_entropy, valid, inv_mu, + actions_oh, eta_log_policy), + reverse=True) + + return v_target, has_played, learning_output + + +def get_loss_v(v_list: Sequence[chex.Array], + v_target_list: Sequence[chex.Array], + mask_list: Sequence[chex.Array]) -> chex.Array: + """Define the loss function for the critic.""" + chex.assert_trees_all_equal_shapes(v_list, v_target_list) + # v_list and v_target_list come with a degenerate trailing dimension, + # which mask_list tensors do not have. + chex.assert_shape(mask_list, v_list[0].shape[:-1]) + loss_v_list = [] + for (v_n, v_target, mask) in zip(v_list, v_target_list, mask_list): + assert v_n.shape[0] == v_target.shape[0] + + loss_v = jnp.expand_dims( + mask, axis=-1) * (v_n - lax.stop_gradient(v_target))**2 + normalization = jnp.sum(mask) + loss_v = jnp.sum(loss_v) / (normalization + (normalization == 0.0)) + + loss_v_list.append(loss_v) + return sum(loss_v_list) + + +def apply_force_with_threshold(decision_outputs: chex.Array, force: chex.Array, + threshold: float, + threshold_center: chex.Array) -> chex.Array: + """Apply the force with below a given threshold.""" + chex.assert_equal_shape((decision_outputs, force, threshold_center)) + can_decrease = decision_outputs - threshold_center > -threshold + can_increase = decision_outputs - threshold_center < threshold + force_negative = jnp.minimum(force, 0.0) + force_positive = jnp.maximum(force, 0.0) + clipped_force = can_decrease * force_negative + can_increase * force_positive + return decision_outputs * lax.stop_gradient(clipped_force) + + +def renormalize(loss: chex.Array, mask: chex.Array) -> chex.Array: + """The `normalization` is the number of steps over which loss is computed.""" + chex.assert_equal_shape((loss, mask)) + loss = jnp.sum(loss * mask) + normalization = jnp.sum(mask) + return loss / (normalization + (normalization == 0.0)) + + +def get_loss_nerd(logit_list: Sequence[chex.Array], + policy_list: Sequence[chex.Array], + q_vr_list: Sequence[chex.Array], + valid: chex.Array, + player_ids: Sequence[chex.Array], + legal_actions: chex.Array, + importance_sampling_correction: Sequence[chex.Array], + clip: float = 100, + threshold: float = 2) -> chex.Array: + """Define the nerd loss.""" + assert isinstance(importance_sampling_correction, list) + loss_pi_list = [] + for k, (logit_pi, pi, q_vr, is_c) in enumerate( + zip(logit_list, policy_list, q_vr_list, importance_sampling_correction)): + assert logit_pi.shape[0] == q_vr.shape[0] + # loss policy + adv_pi = q_vr - jnp.sum(pi * q_vr, axis=-1, keepdims=True) + adv_pi = is_c * adv_pi # importance sampling correction + adv_pi = jnp.clip(adv_pi, a_min=-clip, a_max=clip) + adv_pi = lax.stop_gradient(adv_pi) + + logits = logit_pi - jnp.mean( + logit_pi * legal_actions, axis=-1, keepdims=True) + + threshold_center = jnp.zeros_like(logits) + + nerd_loss = jnp.sum( + legal_actions * + apply_force_with_threshold( + logits, adv_pi, threshold, threshold_center), + axis=-1) + nerd_loss = -renormalize(nerd_loss, valid * (player_ids == k)) + loss_pi_list.append(nerd_loss) + return sum(loss_pi_list) + + +@chex.dataclass(frozen=True) +class AdamConfig: + """Adam optimizer related params.""" + b1: float = 0.0 + b2: float = 0.999 + eps: float = 10e-8 + + +@chex.dataclass(frozen=True) +class NerdConfig: + """Nerd related params.""" + beta: float = 2.0 + clip: float = 10_000 + + +class StateRepresentation(str, enum.Enum): + INFO_SET = "info_set" + OBSERVATION = "observation" + + +@chex.dataclass(frozen=True) +class RNaDConfig: + """Configuration parameters for the RNaDSolver.""" + # The game parameter string including its name and parameters. + game_name: str + # The games longer than this value are truncated. Must be strictly positive. + trajectory_max: int = 10 + + game_name_only: str = "negotiation" + + + # The content of the EnvStep.obs tensor. + state_representation: StateRepresentation = StateRepresentation.INFO_SET + + # Network configuration. + policy_network_layers: Sequence[int] = (256, 256) + + # The batch size to use when learning/improving parameters. + batch_size: int = 256 + # The learning rate for `params`. + learning_rate: float = 0.00005 + # The config related to the ADAM optimizer used for updating `params`. + adam: AdamConfig = AdamConfig() + # All gradients values are clipped to [-clip_gradient, clip_gradient]. + clip_gradient: float = 10_000 + # The "speed" at which `params_target` is following `params`. + target_network_avg: float = 0.001 + + # RNaD algorithm configuration. + # Entropy schedule configuration. See EntropySchedule class documentation. + entropy_schedule_repeats: Sequence[int] = (1,) + entropy_schedule_size: Sequence[int] = (20_000,) + # The weight of the reward regularisation term in RNaD. + eta_reward_transform: float = 0.2 + nerd: NerdConfig = NerdConfig() + c_vtrace: float = 1.0 + + # Options related to fine tuning of the agent. + finetune: FineTuning = FineTuning() + + # The seed that fully controls the randomness. + seed: int = 42 + + +@chex.dataclass(frozen=True) +class EnvStep: + """Holds the tensor data representing the current game state.""" + # Indicates whether the state is a valid one or just a padding. Shape: [...] + # The terminal state being the first one to be marked !valid. + # All other tensors in EnvStep contain data, but only for valid timesteps. + # Once !valid the data needs to be ignored, since it's a duplicate of + # some other previous state. + # The rewards is the only exception that contains reward values + # in the terminal state, which is marked !valid. + # TODO(author16): This is a confusion point and would need to be clarified. + valid: chex.Array = () # pytype: disable=annotation-type-mismatch # numpy-scalars + # The single tensor representing the state observation. Shape: [..., ??] + obs: chex.Array = () # pytype: disable=annotation-type-mismatch # numpy-scalars + # The legal actions mask for the current player. Shape: [..., A] + legal: chex.Array = () # pytype: disable=annotation-type-mismatch # numpy-scalars + # The current player id as an int. Shape: [...] + player_id: chex.Array = () # pytype: disable=annotation-type-mismatch # numpy-scalars + # The rewards of all the players. Shape: [..., P] + rewards: chex.Array = () # pytype: disable=annotation-type-mismatch # numpy-scalars + + +@chex.dataclass(frozen=True) +class ActorStep: + """The actor step tensor summary.""" + # The action (as one-hot) of the current player. Shape: [..., A] + action_oh: chex.Array = () # pytype: disable=annotation-type-mismatch # numpy-scalars + # The policy of the current player. Shape: [..., A] + policy: chex.Array = () # pytype: disable=annotation-type-mismatch # numpy-scalars + # The rewards of all the players. Shape: [..., P] + # Note - these are rewards obtained *after* the actor step, and thus + # these are the same as EnvStep.rewards visible before the *next* step. + rewards: chex.Array = () # pytype: disable=annotation-type-mismatch # numpy-scalars + + +@chex.dataclass(frozen=True) +class TimeStep: + """The tensor data for one game transition (env_step, actor_step).""" + env: EnvStep = EnvStep() + actor: ActorStep = ActorStep() + + +Optimizer = Callable[[Params, Params], Params] # (params, grads) -> params + + +def optax_optimizer( + params: chex.ArrayTree, + init_and_update: optax.GradientTransformation) -> Optimizer: + """Creates a parameterized function that represents an optimizer.""" + init_fn, update_fn = init_and_update + + @chex.dataclass + class OptaxOptimizer: + """A jax-friendly representation of an optimizer state with the update.""" + state: chex.Array + + def __call__(self, params: Params, grads: Params) -> Params: + # pytype: disable=annotation-type-mismatch # numpy-scalars + updates, self.state = update_fn(grads, self.state) + return optax.apply_updates(params, updates) + + return OptaxOptimizer(state=init_fn(params)) + + +class RNaDSolver(policy_lib.Policy): + """Implements a solver for the R-NaD Algorithm. + + See https://arxiv.org/abs/2206.15378. + + Define all networks. Derive losses & learning steps. Initialize the game + state and algorithmic variables. + """ + + def __init__(self, config: RNaDConfig, game_params: dict): + self.config = config + + + # Learner and actor step counters. + self.learner_steps = 0 + self.actor_steps = 0 + + self.game_params = game_params + + + self.init() + + def init(self): + """Initialize the network and losses.""" + # The random facilities for jax and numpy. + + + self._rngkey = jax.random.PRNGKey(self.config.seed) + self._np_rng = np.random.RandomState(self.config.seed) + # TODO(author16): serialize both above to get the fully deterministic behaviour. + + # Create a game and an example of a state. + #self._game = pyspiel.load_game(self.config.game_name) + self._game = pyspiel.load_game(self.config.game_name_only, self.game_params) #to work with chris's changes we need to pass game_params + self._ex_state = self._play_chance(self._game.new_initial_state()) + + # The network. + def network( + env_step: EnvStep + ) -> Tuple[chex.Array, chex.Array, chex.Array, chex.Array]: + mlp_torso = hk.nets.MLP( + self.config.policy_network_layers, activate_final=True + ) + torso = mlp_torso(env_step.obs) + + mlp_policy_head = hk.nets.MLP([self._game.num_distinct_actions()]) + logit = mlp_policy_head(torso) + + mlp_policy_value = hk.nets.MLP([self._game.num_players()]) + v = mlp_policy_value(torso) + + pi = _legal_policy(logit, env_step.legal) + log_pi = legal_log_policy(logit, env_step.legal) + return pi, v, log_pi, logit + + self.network = hk.without_apply_rng(hk.transform(network)) + + # The machinery related to updating parameters/learner. + self._entropy_schedule = EntropySchedule( + sizes=self.config.entropy_schedule_size, + repeats=self.config.entropy_schedule_repeats) + self._loss_and_grad = jax.value_and_grad(self.loss, has_aux=False) #gradients of loss fn, has_aux=False means loss fn returns a single value not a tuple + + # Create initial parameters. + env_step = self._state_as_env_step(self._ex_state) + key = self._next_rng_key() # Make sure to use the same key for all. + self.params = self.network.init(key, env_step) + self.params_target = self.network.init(key, env_step) + self.params_prev = self.network.init(key, env_step) + self.params_prev_ = self.network.init(key, env_step) + + # Parameter optimizers. + # self.optimizer = optax_optimizer( + # self.params, + # optax.chain( + # optax.scale_by_adam( + # eps_root=0.0, + # **self.config.adam, + # ), optax.scale(-self.config.learning_rate), + # optax.clip(self.config.clip_gradient))) + + self.optimizer = optax_optimizer( #need to update as self.config.adam is a dataclass not dict + self.params, + optax.chain( + optax.clip_by_global_norm(5.0), + optax.scale_by_adam( + b1=self.config.adam.b1, + b2=self.config.adam.b2, + eps=self.config.adam.eps, + eps_root=0.0, + ), + optax.scale(-self.config.learning_rate), + optax.clip(self.config.clip_gradient) + ) + ) + + self.optimizer_target = optax_optimizer( + self.params_target, optax.sgd(self.config.target_network_avg)) + + def loss(self, params: Params, params_target: Params, params_prev: Params, + params_prev_: Params, ts: TimeStep, alpha: float, + learner_steps: int) -> float: + rollout = jax.vmap(self.network.apply, (None, 0), 0) + pi, v, log_pi, logit = rollout(params, ts.env) + # chex.assert_equal_shape([pi, v]) + + policy_pprocessed = self.config.finetune(pi, ts.env.legal, learner_steps) + + _, v_target, _, _ = rollout(params_target, ts.env) + _, _, log_pi_prev, _ = rollout(params_prev, ts.env) + _, _, log_pi_prev_, _ = rollout(params_prev_, ts.env) + # This line creates the reward transform log(pi(a|x)/pi_reg(a|x)). + # For the stability reasons, reward changes smoothly between iterations. + # The mixing between old and new reward transform is a convex combination + # parametrised by alpha. + log_policy_reg = log_pi - \ + (alpha * log_pi_prev + (1 - alpha) * log_pi_prev_) + + v_target_list, has_played_list, v_trace_policy_target_list = [], [], [] + for player in range(self._game.num_players()): + reward = ts.actor.rewards[:, :, player] # [T, B, Player] + v_target_, has_played, policy_target_ = v_trace( + jnp.expand_dims(v_target[:, :, player], axis=-1), + ts.env.valid, + ts.env.player_id, + ts.actor.policy, + policy_pprocessed, + log_policy_reg, + _player_others(ts.env.player_id, ts.env.valid, player), + ts.actor.action_oh, + reward, + player, + lambda_=1.0, + c=self.config.c_vtrace, + rho=np.inf, + eta=self.config.eta_reward_transform) + v_target_list.append(v_target_) + has_played_list.append(has_played) + v_trace_policy_target_list.append(policy_target_) + # loss_v = get_loss_v([v] * self._game.num_players(), v_target_list, + # has_played_list) + transformed_v = [jnp.expand_dims(v[..., p], axis=-1) + for p in range(self._game.num_players())] + + # chex.assert_equal_shape([transformed_v, jnp.array(v_target_list)]) + loss_v = get_loss_v(transformed_v, v_target_list, + has_played_list) + + is_vector = jnp.expand_dims(jnp.ones_like(ts.env.valid), axis=-1) + importance_sampling_correction = [is_vector] * self._game.num_players() + # Uses v-trace to define q-values for Nerd + loss_nerd = get_loss_nerd( + [logit] * self._game.num_players(), [pi] * self._game.num_players(), + v_trace_policy_target_list, + ts.env.valid, + ts.env.player_id, + ts.env.legal, + importance_sampling_correction, + clip=self.config.nerd.clip, + threshold=self.config.nerd.beta) + return loss_v + loss_nerd # pytype: disable=bad-return-type # numpy-scalars + + @functools.partial(jax.jit, static_argnums=(0,)) + def update_parameters( + self, + params: Params, + params_target: Params, + params_prev: Params, + params_prev_: Params, + optimizer: Optimizer, + optimizer_target: Optimizer, + timestep: TimeStep, + alpha: float, + learner_steps: int, + update_target_net: bool): + """A jitted pure-functional part of the `step`.""" + loss_val, grad = self._loss_and_grad(params, params_target, params_prev, + params_prev_, timestep, alpha, + learner_steps) + # Update `params`` using the computed gradient. + params = optimizer(params, grad) + # Update `params_target` towards `params`. + params_target = optimizer_target( + params_target, tree.tree_map(lambda a, b: a - b, params_target, params)) + + # Rolls forward the prev and prev_ params if update_target_net is 1. + # pyformat: disable + params_prev, params_prev_ = jax.lax.cond( + update_target_net, + lambda: (params_target, params_prev), + lambda: (params_prev, params_prev_)) + # pyformat: enable + + logs = { + "loss": loss_val, + } + return (params, params_target, params_prev, params_prev_, optimizer, + optimizer_target), logs + + # def __getstate__(self): + # """To serialize the agent.""" + # return dict( + #RNaD config. + # config=self.config, + # game_params=self.game_params, # <-- add + + + #Learner and actor step counters. + # learner_steps=self.learner_steps, + # actor_steps=self.actor_steps, + + #The randomness keys. + # np_rng=self._np_rng.get_state(), + # rngkey=self._rngkey, + + #Network params. + # params=self.params, + # params_target=self.params_target, + # params_prev=self.params_prev, + # params_prev_=self.params_prev_, + #Optimizer state. + #pytype: disable=attribute-error # always-use-return-annotations + # optimizer=self.optimizer.state, + #pytype: disable=attribute-error # always-use-return-annotations + # optimizer_target=self.optimizer_target.state, + # ) + + def __getstate__(self): + """To serialize the agent.""" + return dict( + # RNaD config. + config=self.config, + # Persist game params so we can rebuild the OpenSpiel game on load. + game_params=getattr(self, "game_params", { + "enable_proposals": True, + "enable_utterances": False, + "num_items": 3, + "discount": 0.9, + "min_value": 1, + "max_value": 100, + "max_rounds": 3, + "max_quantity": 10, + "item_quantities": "7,4,1", + }), + # Learner and actor step counters. + learner_steps=self.learner_steps, + actor_steps=self.actor_steps, + # The randomness keys. + np_rng=self._np_rng.get_state(), + rngkey=self._rngkey, + # Network params. + params=self.params, + params_target=self.params_target, + params_prev=self.params_prev, + params_prev_=self.params_prev_, + # Optimizer state. + optimizer=self.optimizer.state, + optimizer_target=self.optimizer_target.state, + ) + + # def __setstate__(self, state): + # """To deserialize the agent.""" + #RNaD config. + # self.config = state["config"] + # + + # self.init() + # self.game_params = state["game_params"] + + #Learner and actor step counters. + # self.learner_steps = state["learner_steps"] + # self.actor_steps = state["actor_steps"] + + #The randomness keys. + # self._np_rng.set_state(state["np_rng"]) + # self._rngkey = state["rngkey"] + + #Network params. + # self.params = state["params"] + # self.params_target = state["params_target"] + # self.params_prev = state["params_prev"] + # self.params_prev_ = state["params_prev_"] + #Optimizer state. + # self.optimizer.state = state["optimizer"] + # self.optimizer_target.state = state["optimizer_target"] + + def __setstate__(self, state): + """To deserialize the agent.""" + self.config = state["config"] + + # Restore game params BEFORE init; fallback for older checkpoints. + self.game_params = state.get("game_params", { + "enable_proposals": True, + "enable_utterances": False, + "num_items": 3, + "discount": 0.9, + "min_value": 1, + "max_value": 100, + "max_rounds": 3, + "max_quantity": 10, + "item_quantities": "7,4,1", + }) + + self.init() + + self.learner_steps = state["learner_steps"] + self.actor_steps = state["actor_steps"] + self._np_rng.set_state(state["np_rng"]) + self._rngkey = state["rngkey"] + self.params = state["params"] + self.params_target = state["params_target"] + self.params_prev = state["params_prev"] + self.params_prev_ = state["params_prev_"] + self.optimizer.state = state["optimizer"] + self.optimizer_target.state = state["optimizer_target"] + + def step(self): + """One step of the algorithm, that plays the game and improves params.""" + timestep = self.collect_batch_trajectory() + alpha, update_target_net = self._entropy_schedule(self.learner_steps) + (self.params, self.params_target, self.params_prev, self.params_prev_, + self.optimizer, self.optimizer_target), logs = self.update_parameters( + self.params, self.params_target, self.params_prev, self.params_prev_, + self.optimizer, self.optimizer_target, timestep, alpha, + self.learner_steps, update_target_net) + self.learner_steps += 1 + logs.update({ + "actor_steps": self.actor_steps, + "learner_steps": self.learner_steps, + }) + return logs + + def _next_rng_key(self) -> chex.PRNGKey: + """Get the next rng subkey from class rngkey. + + Must *not* be called from under a jitted function! + + Returns: + A fresh rng_key. + """ + self._rngkey, subkey = jax.random.split(self._rngkey) + return subkey + + def _state_as_env_step(self, state: pyspiel.State) -> EnvStep: + # A terminal state must be communicated to players, however since + # it's a terminal state things like the state_representation or + # the set of legal actions are meaningless and only needed + # for the sake of creating well a defined trajectory tensor. + # Therefore the code below: + # - extracts the rewards + # - if the state is terminal, uses a dummy other state for other fields. + rewards = np.array(state.returns(), dtype=np.float64) + + valid = not state.is_terminal() + if not valid: + state = self._ex_state + + if self.config.state_representation == StateRepresentation.OBSERVATION: + obs = state.observation_tensor() + elif self.config.state_representation == StateRepresentation.INFO_SET: + obs = state.information_state_tensor() + else: + raise ValueError( + f"Invalid StateRepresentation: {self.config.state_representation}.") + + # TODO(author16): clarify the story around rewards and valid. + return EnvStep( + obs=np.array(obs, dtype=np.float64), + legal=np.array(state.legal_actions_mask(), dtype=np.int8), + player_id=np.array(state.current_player(), dtype=np.float64), + valid=np.array(valid, dtype=np.float64), + rewards=rewards) + + def action_probabilities(self, + state: pyspiel.State, + player_id: Any = None): + """Returns action probabilities dict for a single batch.""" + env_step = self._batch_of_states_as_env_step([state]) + probs = self._network_jit_apply_and_post_process( + self.params_target, env_step) + probs = jax.device_get(probs[0]) # Squeeze out the 1-element batch. + return { + action: probs[action] + for action, valid in enumerate(jax.device_get(env_step.legal[0])) + if valid + } + + @functools.partial(jax.jit, static_argnums=(0,)) + def _network_jit_apply_and_post_process( + self, params: Params, env_step: EnvStep) -> chex.Array: + pi, _, _, _ = self.network.apply(params, env_step) + pi = self.config.finetune.post_process_policy(pi, env_step.legal) + return pi + + # TODO(author16): jit actor_step. + def actor_step(self, env_step: EnvStep): + pi, _, _, _ = self.network.apply(self.params, env_step) + pi = np.asarray(pi).astype("float64") + # TODO(author18): is this policy normalization really needed? + pi = pi / np.sum(pi, axis=-1, keepdims=True) + + action = np.apply_along_axis( + lambda x: self._np_rng.choice(range(pi.shape[1]), p=x), axis=-1, arr=pi) + # TODO(author16): reapply the legal actions mask to bullet-proof sampling. + action_oh = np.zeros(pi.shape, dtype="float64") + action_oh[range(pi.shape[0]), action] = 1.0 + + # pytype: disable=wrong-arg-types # numpy-scalars + actor_step = ActorStep(policy=pi, action_oh=action_oh, rewards=()) + + return action, actor_step + + def collect_batch_trajectory(self) -> TimeStep: + states = [ + self._play_chance(self._game.new_initial_state()) + for _ in range(self.config.batch_size) + ] + timesteps = [] + + env_step = self._batch_of_states_as_env_step(states) + for _ in range(self.config.trajectory_max): + prev_env_step = env_step + a, actor_step = self.actor_step(env_step) + + states = self._batch_of_states_apply_action(states, a) + env_step = self._batch_of_states_as_env_step(states) + timesteps.append( + TimeStep( + env=prev_env_step, + actor=ActorStep( + action_oh=actor_step.action_oh, + policy=actor_step.policy, + rewards=env_step.rewards), + )) + # Concatenate all the timesteps together to form a single rollout [T, B, ..] + return jax.tree_util.tree_map(lambda *xs: np.stack(xs, axis=0), *timesteps) + + def _batch_of_states_as_env_step(self, + states: Sequence[pyspiel.State]) -> EnvStep: + envs = [self._state_as_env_step(state) for state in states] + return jax.tree_util.tree_map(lambda *e: np.stack(e, axis=0), *envs) + + def _batch_of_states_apply_action( + self, states: Sequence[pyspiel.State], + actions: chex.Array) -> Sequence[pyspiel.State]: + """Apply a batch of `actions` to a parallel list of `states`.""" + for state, action in zip(states, list(actions)): + if not state.is_terminal(): + self.actor_steps += 1 + state.apply_action(action) + self._play_chance(state) + return states + + def _play_chance(self, state: pyspiel.State) -> pyspiel.State: + """Plays the chance nodes until we end up at another type of node. + + Args: + state: to be updated until it does not correspond to a chance node. + Returns: + The same input state object, but updated. The state is returned + only for convenience, to allow chaining function calls. + """ + while state.is_chance_node(): + chance_outcome, chance_proba = zip(*state.chance_outcomes()) + action = self._np_rng.choice(chance_outcome, p=chance_proba) + state.apply_action(action) + return state diff --git a/scenarios/bargaining/rl_agent_checkpoints/rnad/rnad_bg4.pkl b/scenarios/bargaining/rl_agent_checkpoints/rnad/rnad_bg4.pkl new file mode 100644 index 0000000..85d0652 Binary files /dev/null and b/scenarios/bargaining/rl_agent_checkpoints/rnad/rnad_bg4.pkl differ diff --git a/scenarios/bargaining/rl_agent_checkpoints/rnad/rnad_bg5.pkl b/scenarios/bargaining/rl_agent_checkpoints/rnad/rnad_bg5.pkl new file mode 100644 index 0000000..0855684 Binary files /dev/null and b/scenarios/bargaining/rl_agent_checkpoints/rnad/rnad_bg5.pkl differ diff --git a/scenarios/bargaining/rl_agent_checkpoints/rnad/rnad_bg6.pkl b/scenarios/bargaining/rl_agent_checkpoints/rnad/rnad_bg6.pkl new file mode 100644 index 0000000..b163f9c Binary files /dev/null and b/scenarios/bargaining/rl_agent_checkpoints/rnad/rnad_bg6.pkl differ diff --git a/scenarios/debate/__init__.py b/scenarios/debate/__init__.py new file mode 100644 index 0000000..59adfd6 --- /dev/null +++ b/scenarios/debate/__init__.py @@ -0,0 +1 @@ +# Debate scenario package diff --git a/scenarios/prompts/__init__.py b/scenarios/prompts/__init__.py new file mode 100644 index 0000000..139597f --- /dev/null +++ b/scenarios/prompts/__init__.py @@ -0,0 +1,2 @@ + + diff --git a/scenarios/prompts/make_prompt.py b/scenarios/prompts/make_prompt.py new file mode 100644 index 0000000..984673f --- /dev/null +++ b/scenarios/prompts/make_prompt.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +import numpy as np # noqa: F401 + +from ..utils.offer import Offer +from .prompt_texts.circle0 import make_prompt_circle_0 +from .prompt_texts.circle1 import make_prompt_circle_1 +from .prompt_texts.circle2 import make_prompt_circle_2 +from .prompt_texts.circle3 import make_prompt_circle_3 +from .prompt_texts.circle4 import make_prompt_circle_4 +from .prompt_texts.circle5 import make_prompt_circle_5 +from .prompt_texts.circle6 import make_prompt_circle_6 + + +def make_prompt( + T: int, + quantities: list[int], + V: int, + values: list[float], + W1: int, + W2: int, + w: int, + R: int, + g: float, + r: int, + history: dict, + current_offer: Offer | None = None, + player_num: int = 0, + p1_outside_offer: list[int] | None = None, + p2_outside_offer: list[int] | None = None, + circle: int = 0, + example_offer_less_than_outside_offer_self: list[int] | None = None, +) -> str: + my_player_num = player_num + 1 + other_player_num = 2 if my_player_num == 1 else 1 + + history_str = "" + for round_num in range(len(history.get(0, [])) + len(history.get(1, []))): + player = round_num % 2 + round_idx = round_num // 2 + offer = None + if round_idx < len(history.get(player, [])): + offer = history[player][round_idx] + if isinstance(offer, Offer): + history_str += f"\nRound {round_idx + 1}: Player {player + 1} offered {offer.offer}" + elif offer is True: + history_str += f"\nRound {round_idx + 1}: Player {player + 1} ACCEPTED" + elif offer is False: + history_str += f"\nRound {round_idx + 1}: Player {player + 1} WALKED away" + + current_offer_str = f"\nCurrent offer on the table (the amount of each item being offered to you): {current_offer.offer if isinstance(current_offer, Offer) else 'None'}" + + if r == 1 and my_player_num == 1: + action_prompt = f""" + What is your action? As the first player, your available actions are: + - WALK to walk away + - A list of numbers [n1, n2, ...] representing your initial offer (what you give to Player 2)""" + elif current_offer is None: + action_prompt = f""" + What is your action? You can: + - WALK to walk away + - A list of numbers [n1, n2, ...] representing your offer (what you give to Player {other_player_num})""" + else: + action_prompt = f""" + What is your action? You can: + - ACCEPT to accept the current offer + - WALK to walk away + - A list of numbers [n1, n2, ...] representing your counteroffer (what you give to Player {other_player_num})""" + + if circle == 0: + prompt = make_prompt_circle_0(T, quantities, V, values, W1, W2, w, R, g, r, history, current_offer, player_num, p1_outside_offer, p2_outside_offer, circle, other_player_num, my_player_num) + elif circle == 1: + prompt = make_prompt_circle_1(T, quantities, V, values, W1, W2, w, R, g, r, history, current_offer, player_num, p1_outside_offer, p2_outside_offer, circle, other_player_num, my_player_num) + elif circle == 2: + prompt = make_prompt_circle_2(T, quantities, V, values, W1, W2, w, R, g, r, history, current_offer, player_num, p1_outside_offer, p2_outside_offer, circle, other_player_num, my_player_num) + elif circle == 3: + prompt = make_prompt_circle_3(T, quantities, V, values, W1, W2, w, R, g, r, history, current_offer, player_num, p1_outside_offer, p2_outside_offer, circle, other_player_num, my_player_num) + elif circle == 4: + prompt = make_prompt_circle_4(T, quantities, V, values, W1, W2, w, R, g, r, history, current_offer, player_num, p1_outside_offer, p2_outside_offer, circle, other_player_num, my_player_num) + elif circle == 5: + prompt = make_prompt_circle_5(T, quantities, V, values, W1, W2, w, R, g, r, history, current_offer, player_num, p1_outside_offer, p2_outside_offer, circle, other_player_num, my_player_num, example_offer_less_than_outside_offer_self) + elif circle == 6: + prompt = make_prompt_circle_6(T, quantities, V, values, W1, W2, w, R, g, r, history, current_offer, player_num, p1_outside_offer, p2_outside_offer, circle, other_player_num, my_player_num, example_offer_less_than_outside_offer_self) + else: + raise ValueError(f"Circle {circle} not supported") + + return f"{prompt}\n Negotiation history:{history_str}\n{current_offer_str}\n{action_prompt}" + + diff --git a/scenarios/prompts/prompt_texts/__init__.py b/scenarios/prompts/prompt_texts/__init__.py new file mode 100644 index 0000000..139597f --- /dev/null +++ b/scenarios/prompts/prompt_texts/__init__.py @@ -0,0 +1,2 @@ + + diff --git a/scenarios/prompts/prompt_texts/circle0.py b/scenarios/prompts/prompt_texts/circle0.py new file mode 100644 index 0000000..3145019 --- /dev/null +++ b/scenarios/prompts/prompt_texts/circle0.py @@ -0,0 +1,34 @@ +import numpy as np # noqa: F401 +from scenarios.utils.offer import Offer + +''' +THE VESTIBULE CIRCLE SETTING THE STAGE +- Initial rules, instructions, and structure of game +''' + +def make_prompt_circle_0(T: int, quantities: list[int], V: int, values: list[float], W1: int, W2: int, w: int, R: int, g: float, r: int, history: dict, current_offer: Offer = None, player_num: int = 0, p1_outside_offer: list[int] = None, p2_outside_offer: list[int] = None, circle: int = 0, other_player_num: int = 0, my_player_num: int = 0) -> str: + return f""" + You and another agent have to negotiate a division of items between the two of you. + You are Player {my_player_num} and the other agent is Player {other_player_num}. + There are {T} types of items, called item 1 through item {T}. + There are {', '.join([f"{q} unit{'s' if q != 1 else ''} of item {i+1}" for i, q in enumerate(quantities)])} to divide. + Both you and Player {other_player_num} have a private value per unit of each item type. + These values are drawn from a uniform random distribution, ranging from 1 to {V-1}. + Your private values are {', '.join([str(v) + ' for item ' + str(i+1) for i, v in enumerate(values)])}. + You have a private outside offer drawn from a uniform random distribution ranging from {p1_outside_offer[0] if my_player_num == 1 else p2_outside_offer[0]} to your total value of all items, which is {p1_outside_offer[1] if my_player_num == 1 else p2_outside_offer[1]}. Player {other_player_num} has a private outside offer drawn from a uniform random distribution ranging from 1 to to their total value of all items. + Your outside offer value is {w}. + The negotiation proceeds in {R} rounds. + There is a discount rate gamma = {g}, such that if the process concludes after r rounds the overall value of the negotiation to each player is their value for the outcome multiplied by gamma to the power (r-1). + At each round, Player 1 takes an action, followed by Player 2. + The possible actions are to ACCEPT the other player's current offer (if any), make a COUNTEROFFER, or WALK away. If the game gets to the last round, and player 2 chooses to make a counteroffer, this is treated as a WALK. + If a player chooses ACCEPT, the negotiation ends in a deal to divide the items according to the accepted offer. + The value of an outcome is determined by each player's private values per unit of each item and the quantities they receive in the deal. This value is adjusted by the discount factor, which is used to compute the present value of the negotiation outcome. + If a player chooses WALK, the negotiation ends without a deal, and each player receives the value of their private outside offer. + Please show your reasoning step by step, then provide your action in one of these formats in your response (if you do not do this your response will be invalid): + {{"action": "ACCEPT"}} - to accept the current offer + {{"action": "WALK"}} - to walk away from negotiations + {{"action": "COUNTEROFFER", "offer": [n1, n2, ...]}} - where n1, n2, ... are numbers representing the number of units of each item being offered to the other player as part of the counteroffer. + Any response not in these exact formats will be invalid and treated as a WALK. If you provide a counteroffer, it must be a valid offer, otherwise it will be treated as a WALK. + It is now round {r}. + """ + diff --git a/scenarios/prompts/prompt_texts/circle1.py b/scenarios/prompts/prompt_texts/circle1.py new file mode 100644 index 0000000..59dcd28 --- /dev/null +++ b/scenarios/prompts/prompt_texts/circle1.py @@ -0,0 +1,34 @@ +import numpy as np # noqa: F401 +from scenarios.utils.offer import Offer + +''' +CIRCLE 1, THE LIMBO OF REASONING: +- Provide objective & emphasize consideration for outside offer +''' + +def make_prompt_circle_1(T: int, quantities: list[int], V: int, values: list[float], W1: int, W2: int, w: int, R: int, g: float, r: int, history: dict, current_offer: Offer = None, player_num: int = 0, p1_outside_offer: list[int] = None, p2_outside_offer: list[int] = None, circle: int = 0, other_player_num: int = 0, my_player_num: int = 0) -> str: + return f""" + You and another agent have to negotiate a division of items between the two of you. + You are Player {my_player_num} and the other agent is Player {other_player_num}. + There are {T} types of items, called item 1 through item {T}. + There are {', '.join([f"{q} unit{'s' if q != 1 else ''} of item {i+1}" for i, q in enumerate(quantities)])} to divide. + Both you and Player {other_player_num} have a private value per unit of each item type. + These values are drawn from a uniform random distribution, ranging from 1 to {V-1}. + Your private values are {', '.join([str(v) + ' for item ' + str(i+1) for i, v in enumerate(values)])}. + You have a private outside offer drawn from a uniform random distribution ranging from {p1_outside_offer[0] if my_player_num == 1 else p2_outside_offer[0]} to your total value of all items, which is {p1_outside_offer[1] if my_player_num == 1 else p2_outside_offer[1]}. Player {other_player_num} has a private outside offer drawn from a uniform random distribution ranging from 1 to to their total value of all items. + Your outside offer value is {w}. Your objective is to maximize your value of the outcome of the negotiation game. Remember, you have a guaranteed alternative: your outside offer. + The negotiation proceeds in {R} rounds. + There is a discount rate gamma = {g}, such that if the process concludes after r rounds the overall value of the negotiation to each player is their value for the outcome multiplied by gamma to the power (r-1). + At each round, Player 1 takes an action, followed by Player 2. + The possible actions are to ACCEPT the other player's current offer (if any), make a COUNTEROFFER, or WALK away. If the game gets to the last round, and player 2 chooses to make a counteroffer, this is treated as a WALK. + If a player chooses ACCEPT, the negotiation ends in a deal to divide the items according to the accepted offer. + The value of an outcome is determined by each player's private values per unit of each item and the quantities they receive in the deal. This value is adjusted by the discount factor, which is used to compute the present value of the negotiation outcome. + If a player chooses WALK, the negotiation ends without a deal, and each player receives the value of their private outside offer. + Please show your reasoning step by step, then provide your action in one of these formats in your response (if you do not do this your response will be invalid): + {{"action": "ACCEPT"}} - to accept the current offer + {{"action": "WALK"}} - to walk away from negotiations + {{"action": "COUNTEROFFER", "offer": [n1, n2, ...]}} - where n1, n2, ... are numbers representing the number of units of each item being offered to the other player as part of the counteroffer. + Any response not in these exact formats will be invalid and treated as a WALK. If you provide a counteroffer, it must be a valid offer, otherwise it will be treated as a WALK. + It is now round {r}. + """ + diff --git a/scenarios/prompts/prompt_texts/circle2.py b/scenarios/prompts/prompt_texts/circle2.py new file mode 100644 index 0000000..cc8038b --- /dev/null +++ b/scenarios/prompts/prompt_texts/circle2.py @@ -0,0 +1,40 @@ +import numpy as np # noqa: F401 +from scenarios.utils.offer import Offer + +''' +CIRCLE 2, THE CIRCLE OF ILLUSTRATIONS: +- Provide base example of how to compute the value of an allocation/offer +''' + +def make_prompt_circle_2(T: int, quantities: list[int], V: int, values: list[float], W1: int, W2: int, w: int, R: int, g: float, r: int, history: dict, current_offer: Offer = None, player_num: int = 0, p1_outside_offer: list[int] = None, p2_outside_offer: list[int] = None, circle: int = 0, other_player_num: int = 0, my_player_num: int = 0) -> str: + return f""" + You and another agent have to negotiate a division of items between the two of you. + You are Player {my_player_num} and the other agent is Player {other_player_num}. + There are {T} types of items, called item 1 through item {T}. + There are {', '.join([f"{q} unit{'s' if q != 1 else ''} of item {i+1}" for i, q in enumerate(quantities)])} to divide. + Both you and Player {other_player_num} have a private value per unit of each item type. + These values are drawn from a uniform random distribution, ranging from 1 to {V-1}. + Your private values are {', '.join([str(v) + ' for item ' + str(i+1) for i, v in enumerate(values)])}. + You have a private outside offer drawn from a uniform random distribution ranging from {p1_outside_offer[0] if my_player_num == 1 else p2_outside_offer[0]} to your total value of all items, which is {p1_outside_offer[1] if my_player_num == 1 else p2_outside_offer[1]}. Player {other_player_num} has a private outside offer drawn from a uniform random distribution ranging from 1 to to their total value of all items. + Your outside offer value is {w}. Your objective is to maximize your value of the outcome of the negotiation game. Remember, you have a guaranteed alternative: your outside offer. + Before making any counteroffer, you should calculate its total value to you and compare it to your outside offer value of {w}. + For example, if you were considering offering the other player 2 units of each item (keeping 3 units of each for yourself), you would calculate: + 3 units of item 1 = 3 × {values[0]} = {3*values[0]} + 3 units of item 2 = 3 × {values[1]} = {3*values[1]} + 3 units of item 3 = 3 × {values[2]} = {3*values[2]} + Total value = {sum([3*values[i] for i in range(T)])} (sum of all item values) + The negotiation proceeds in {R} rounds. + There is a discount rate gamma = {g}, such that if the process concludes after r rounds the overall value of the negotiation to each player is their value for the outcome multiplied by gamma to the power (r-1). + At each round, Player 1 takes an action, followed by Player 2. + The possible actions are to ACCEPT the other player's current offer (if any), make a COUNTEROFFER, or WALK away. If the game gets to the last round, and player 2 chooses to make a counteroffer, this is treated as a WALK. + If a player chooses ACCEPT, the negotiation ends in a deal to divide the items according to the accepted offer. + The value of an outcome is determined by each player's private values per unit of each item and the quantities they receive in the deal. This value is adjusted by the discount factor, which is used to compute the present value of the negotiation outcome. + If a player chooses WALK, the negotiation ends without a deal, and each player receives the value of their private outside offer. + Please show your reasoning step by step, then provide your action in one of these formats in your response (if you do not do this your response will be invalid): + {{"action": "ACCEPT"}} - to accept the current offer + {{"action": "WALK"}} - to walk away from negotiations + {{"action": "COUNTEROFFER", "offer": [n1, n2, ...]}} - where n1, n2, ... are numbers representing the number of units of each item being offered to the other player as part of the counteroffer. + Any response not in these exact formats will be invalid and treated as a WALK. If you provide a counteroffer, it must be a valid offer, otherwise it will be treated as a WALK. + It is now round {r}. + """ + diff --git a/scenarios/prompts/prompt_texts/circle3.py b/scenarios/prompts/prompt_texts/circle3.py new file mode 100644 index 0000000..78c8ff6 --- /dev/null +++ b/scenarios/prompts/prompt_texts/circle3.py @@ -0,0 +1,56 @@ +import numpy as np # noqa: F401 +from scenarios.utils.offer import Offer + +''' +CIRCLE OF ERRORS +- Addition add description of errors & explain that errors are obstructive to the objective +''' + +def make_prompt_circle_3(T: int, quantities: list[int], V: int, values: list[float], W1: int, W2: int, w: int, R: int, g: float, r: int, history: dict, current_offer: Offer = None, player_num: int = 0, p1_outside_offer: list[int] = None, p2_outside_offer: list[int] = None, circle: int = 0, other_player_num: int = 0, my_player_num: int = 0) -> str: + return f""" + You and another agent have to negotiate a division of items between the two of you. + You are Player {my_player_num} and the other agent is Player {other_player_num}. + There are {T} types of items, called item 1 through item {T}. + There are {', '.join([f"{q} unit{'s' if q != 1 else ''} of item {i+1}" for i, q in enumerate(quantities)])} to divide. + Both you and Player {other_player_num} have a private value per unit of each item type. + These values are drawn from a uniform random distribution, ranging from 1 to {V-1}. + Your private values are {', '.join([str(v) + ' for item ' + str(i+1) for i, v in enumerate(values)])}. + You have a private outside offer drawn from a uniform random distribution ranging from {p1_outside_offer[0] if my_player_num == 1 else p2_outside_offer[0]} to your total value of all items, which is {p1_outside_offer[1] if my_player_num == 1 else p2_outside_offer[1]}. Player {other_player_num} has a private outside offer drawn from a uniform random distribution ranging from 1 to to their total value of all items. + Your outside offer value is {w}. Your objective is to maximize your value of the outcome of the negotiation game. Remember, you have a guaranteed alternative: your outside offer. + Before making any counteroffer, you should calculate its total value to you and compare it to your outside offer value of {w}. + For example, if you were considering offering the other player 2 units of each item (keeping 3 units of each for yourself), you would calculate: + 3 units of item 1 = 3 × {values[0]} = {3*values[0]} (multiplying units by your value per unit) + 3 units of item 2 = 3 × {values[1]} = {3*values[1]} (multiplying units by your value per unit) + 3 units of item 3 = 3 × {values[2]} = {3*values[2]} (multiplying units by your value per unit) + Total value = {sum([3*values[i] for i in range(T)])} (sum of all item values) + + The negotiation proceeds in {R} rounds. + There is a discount rate gamma = {g}, such that if the process concludes after r rounds the overall value of the negotiation to each player is their value for the outcome multiplied by gamma to the power (r-1). + At each round, Player 1 takes an action, followed by Player 2. + The possible actions are to ACCEPT the other player's current offer (if any), make a COUNTEROFFER, or WALK away. If the game gets to the last round, and player 2 chooses to make a counteroffer, this is treated as a WALK. + If a player chooses ACCEPT, the negotiation ends in a deal to divide the items according to the accepted offer. + The value of an outcome is determined by each player's private values per unit of each item and the quantities they receive in the deal. This value is adjusted by the discount factor, which is used to compute the present value of the negotiation outcome. + If a player chooses WALK, the negotiation ends without a deal, and each player receives the value of their private outside offer. + The following step-by-step questions are designed to guide you through a comprehensive analysis. By systematically addressing these questions, you can evaluate the current state of the negotiation, assess potential offers, and make informed decisions. You must use the information that you acquired through the step-by-step questioning above to decide what action you will make. + Let's walk through this step by step: + 1) First, analyze the current situation: + - What is my outside offer value? + - What are the values of the items involved? + - What is the total pool of items? + - How does the discount factor influence the value of accepting the current offer versus waiting for future offers? + 2) Assess the value of offers: + - For the current offer (if any): What is my total value if I accept it? + - For potential counteroffers: What would be my total value for different proposed divisions? + - How do these values compare to my outside offer value? + 3) Make a decision based on the analysis: + - Should I accept the current offer? + - Should I walk away and take my outside offer? + - Or should I propose a specific counteroffer? + Please show your reasoning step by step, then provide your action in one of these formats in your response (if you do not do this your response will be invalid): + {{"action": "ACCEPT"}} - to accept the current offer + {{"action": "WALK"}} - to walk away from negotiations + {{"action": "COUNTEROFFER", "offer": [n1, n2, ...]}} - where n1, n2, ... are numbers representing the number of units of each item being offered to the other player as part of the counteroffer. + Any response not in these exact formats will be invalid and treated as a WALK. If you provide a counteroffer, it must be a valid offer, otherwise it will be treated as a WALK. + It is now round {r}. + """ + diff --git a/scenarios/prompts/prompt_texts/circle4.py b/scenarios/prompts/prompt_texts/circle4.py new file mode 100644 index 0000000..bd4763c --- /dev/null +++ b/scenarios/prompts/prompt_texts/circle4.py @@ -0,0 +1,56 @@ +import numpy as np # noqa: F401 +from scenarios.utils.offer import Offer + +''' +CIRCLE OF ERRORS +- Addition add description of errors & explain that errors are obstructive to the objective +''' + +def make_prompt_circle_4(T: int, quantities: list[int], V: int, values: list[float], W1: int, W2: int, w: int, R: int, g: float, r: int, history: dict, current_offer: Offer = None, player_num: int = 0, p1_outside_offer: list[int] = None, p2_outside_offer: list[int] = None, circle: int = 0, other_player_num: int = 0, my_player_num: int = 0) -> str: + return f""" + You and another agent have to negotiate a division of items between the two of you. + You are Player {my_player_num} and the other agent is Player {other_player_num}. + There are {T} types of items, called item 1 through item {T}. + There are {', '.join([f"{q} unit{'s' if q != 1 else ''} of item {i+1}" for i, q in enumerate(quantities)])} to divide. + Both you and Player {other_player_num} have a private value per unit of each item type. + These values are drawn from a uniform random distribution, ranging from 1 to {V-1}. + Your private values are {', '.join([str(v) + ' for item ' + str(i+1) for i, v in enumerate(values)])}. + You have a private outside offer drawn from a uniform random distribution ranging from {p1_outside_offer[0] if my_player_num == 1 else p2_outside_offer[0]} to your total value of all items, which is {p1_outside_offer[1] if my_player_num == 1 else p2_outside_offer[1]}. Player {other_player_num} has a private outside offer drawn from a uniform random distribution ranging from 1 to to their total value of all items. + Your outside offer value is {w}. Your objective is to maximize your value of the outcome of the negotiation game. Remember, you have a guaranteed alternative: your outside offer. + Before making any counteroffer, you should calculate its total value to you and compare it to your outside offer value of {w}. + For example, if you were considering offering the other player 2 units of each item (keeping 3 units of each for yourself), you would calculate: + 3 units of item 1 = 3 × {values[0]} = {3*values[0]} (multiplying units by your value per unit) + 3 units of item 2 = 3 × {values[1]} = {3*values[1]} (multiplying units by your value per unit) + 3 units of item 3 = 3 × {values[2]} = {3*values[2]} (multiplying units by your value per unit) + Total value = {sum([3*values[i] for i in range(T)])} (sum of all item values) + + The negotiation proceeds in {R} rounds. + There is a discount rate gamma = {g}, such that if the process concludes after r rounds the overall value of the negotiation to each player is their value for the outcome multiplied by gamma to the power (r-1). + At each round, Player 1 takes an action, followed by Player 2. + The possible actions are to ACCEPT the other player's current offer (if any), make a COUNTEROFFER, or WALK away. If the game gets to the last round, and player 2 chooses to make a counteroffer, this is treated as a WALK. + If a player chooses ACCEPT, the negotiation ends in a deal to divide the items according to the accepted offer. + The value of an outcome is determined by each player's private values per unit of each item and the quantities they receive in the deal. This value is adjusted by the discount factor, which is used to compute the present value of the negotiation outcome. + If a player chooses WALK, the negotiation ends without a deal, and each player receives the value of their private outside offer. + The following step-by-step questions are designed to guide you through a comprehensive analysis. By systematically addressing these questions, you can evaluate the current state of the negotiation, assess potential offers, and make informed decisions. You must use the information that you acquired through the step-by-step questioning above to decide what action you will make. + Let's walk through this step by step: + 1) First, analyze the current situation: + - What is my outside offer value? + - What are the values of the items involved? + - What is the total pool of items? + - How does the discount factor influence the value of accepting the current offer versus waiting for future offers? + 2) Assess the value of offers: + - For the current offer (if any): What is my total value if I accept it? + - For potential counteroffers: What would be my total value for different proposed divisions? + - How do these values compare to my outside offer value? + 3) Make a decision based on the analysis: + - Should I accept the current offer? + - Should I walk away and take my outside offer? + - Or should I propose a specific counteroffer? + Please show your reasoning step by step, then provide your action in one of these formats in your response (if you do not do this your response will be invalid): + {{"action": "ACCEPT"}} - to accept the current offer + {{"action": "WALK"}} - to walk away from negotiations + {{"action": "COUNTEROFFER", "offer": [n1, n2, ...]}} - where n1, n2, ... are numbers representing the number of units of each item being offered to the other player as part of the counteroffer. + Any response not in these exact formats will be invalid and treated as a WALK. If you provide a counteroffer, it must be a valid offer, otherwise it will be treated as a WALK. + It is now round {r}. + """ + diff --git a/scenarios/prompts/prompt_texts/circle5.py b/scenarios/prompts/prompt_texts/circle5.py new file mode 100644 index 0000000..e60b37e --- /dev/null +++ b/scenarios/prompts/prompt_texts/circle5.py @@ -0,0 +1,73 @@ +import numpy as np +from scenarios.utils.offer import Offer + +''' +CIRCLE OF ERRORS +- Addition add description of errors & explain that errors are obstructive to the objective +''' + +def make_prompt_circle_5(T: int, quantities: list[int], V: int, values: list[float], W1: int, W2: int, w: int, R: int, g: float, r: int, history: dict, current_offer: Offer = None, player_num: int = 0, p1_outside_offer: list[int] = None, p2_outside_offer: list[int] = None, circle: int = 0, other_player_num: int = 0, my_player_num: int = 0, example_offer_less_than_outside_offer_self: list[int] = None) -> str: + if example_offer_less_than_outside_offer_self is None: + example_offer_less_than_outside_offer_self = [0 for _ in range(len(quantities))] + return f""" + You and another agent have to negotiate a division of items between the two of you. + You are Player {my_player_num} and the other agent is Player {other_player_num}. + There are {T} types of items, called item 1 through item {T}. + There are {', '.join([f"{q} unit{'s' if q != 1 else ''} of item {i+1}" for i, q in enumerate(quantities)])} to divide. + Both you and Player {other_player_num} have a private value per unit of each item type. + These values are drawn from a uniform random distribution, ranging from 1 to {V-1}. + Your private values are {', '.join([str(v) + ' for item ' + str(i+1) for i, v in enumerate(values)])}. + You have a private outside offer drawn from a uniform random distribution ranging from {p1_outside_offer[0] if my_player_num == 1 else p2_outside_offer[0]} to your total value of all items, which is {p1_outside_offer[1] if my_player_num == 1 else p2_outside_offer[1]}. Player {other_player_num} has a private outside offer drawn from a uniform random distribution ranging from 1 to to their total value of all items. + Your outside offer value is {w}. Your objective is to maximize your value of the outcome of the negotiation game. Remember, you have a guaranteed alternative: your outside offer. + Before making any counteroffer, you should calculate its total value to you and compare it to your outside offer value of {w}. + For example, if you were considering offering the other player 2 units of each item (keeping 3 units of each for yourself), you would calculate: + 3 units of item 1 = 3 × {values[0]} = {3*values[0]} (multiplying units by your value per unit) + 3 units of item 2 = 3 × {values[1]} = {3*values[1]} (multiplying units by your value per unit) + 3 units of item 3 = 3 × {values[2]} = {3*values[2]} (multiplying units by your value per unit) + Total value = {sum([3*values[i] for i in range(T)])} (sum of all item values) + + The negotiation proceeds in {R} rounds. + There is a discount rate gamma = {g}, such that if the process concludes after r rounds the overall value of the negotiation to each player is their value for the outcome multiplied by gamma to the power (r-1). + At each round, Player 1 takes an action, followed by Player 2. + The possible actions are to ACCEPT the other player's current offer (if any), make a COUNTEROFFER, or WALK away. If the game gets to the last round, and player 2 chooses to make a counteroffer, this is treated as a WALK. + If a player chooses ACCEPT, the negotiation ends in a deal to divide the items according to the accepted offer. + The value of an outcome is determined by each player's private values per unit of each item and the quantities they receive in the deal. This value is adjusted by the discount factor, which is used to compute the present value of the negotiation outcome. + If a player chooses WALK, the negotiation ends without a deal, and each player receives the value of their private outside offer. + The following step-by-step questions are designed to guide you through a comprehensive analysis. By systematically addressing these questions, you can evaluate the current state of the negotiation, assess potential offers, and make informed decisions. You must use the information that you acquired through the step-by-step questioning above to decide what action you will make. + Let's walk through this step by step: + 1) First, analyze the current situation: + - What is my outside offer value? + - What are the values of the items involved? + - What is the total pool of items? + - How does the discount factor influence the value of accepting the current offer versus waiting for future offers? + 2) Assess the value of offers: + - For the current offer (if any): What is my total value if I accept it? + - For potential counteroffers: What would be my total value for different proposed divisions? + - How do these values compare to my outside offer value? + 3) Make a decision based on the analysis: + - Should I accept the current offer? + - Should I walk away and take my outside offer? + - Or should I propose a specific counteroffer? + In the bargaining game, there are five mistakes you can make that conflict with your objectives. + While these aren't the only possible errors, they represent undesirable negotiation behaviors that can undermine your payoff or cause you to miss out on better deals. + These mistakes are: + - Mistake 1: Making an offer worse than your previous offer. This occurs when you reject an offer better for you than the one you subsequently propose. + - Mistake 2: Making an offer worse for you than your outside offer. This happens if you propose giving away so much that what you keep is worth less than your guaranteed alternative, which is your outside offer. + - Mistake 3: Offering no items or all items. Offering nothing (or everything) to the opponent (in the early or middle rounds) can be a clear suboptimal move. + - Mistake 4: Accepting an offer worse for you than your outside offer. This occurs if you accept a division that yields a payoff lower than your guaranteed fallback. + - Mistake 5: Walking away from an offer better than your outside offer. This occurs when you reject a division that actually yields a higher payoff than your fallback. + To prevent these mistakes, adopt a strategy similar to the following example: Before making any counteroffer, + calculate its total value to you and compare it to your outside offer value. For instance, suppose you keep only {example_offer_less_than_outside_offer_self} items and offer the rest to the other party. Your value would be: + {values[0]} x {example_offer_less_than_outside_offer_self[0]} + {values[1]} x {example_offer_less_than_outside_offer_self[1]} + {values[2]} x {example_offer_less_than_outside_offer_self[2]} + which is {np.dot(values, example_offer_less_than_outside_offer_self)} (sum of all item values) + + which is less than your outside offer of {w}. If your proposed offer results in a value lower than your outside offer, continue iterating until you develop a more advantageous offer that is better than your outside offer. + This reasoning can be applied to each of the five highlighted mistakes to ensure that your offers align with your objectives and avoid undesirable negotiation behaviors. + Please show your reasoning step by step, then provide your action in one of these formats in your response (if you do not do this your response will be invalid): + {{"action": "ACCEPT"}} - to accept the current offer + {{"action": "WALK"}} - to walk away from negotiations + {{"action": "COUNTEROFFER", "offer": [n1, n2, ...]}} - where n1, n2, ... are numbers representing the number of units of each item being offered to the other player as part of the counteroffer. + Any response not in these exact formats will be invalid and treated as a WALK. If you provide a counteroffer, it must be a valid offer, otherwise it will be treated as a WALK. + It is now round {r}. + """ + diff --git a/scenarios/prompts/prompt_texts/circle6.py b/scenarios/prompts/prompt_texts/circle6.py new file mode 100644 index 0000000..06b6fc6 --- /dev/null +++ b/scenarios/prompts/prompt_texts/circle6.py @@ -0,0 +1,70 @@ +import numpy as np +from scenarios.utils.offer import Offer + +def make_prompt_circle_6(T: int, quantities: list[int], V: int, values: list[float], W1: int, W2: int, w: int, R: int, g: float, r: int, history: dict, current_offer: Offer = None, player_num: int = 0, p1_outside_offer: list[int] = None, p2_outside_offer: list[int] = None, circle: int = 0, other_player_num: int = 0, my_player_num: int = 0, example_offer_less_than_outside_offer_self: list[int] = None) -> str: + if example_offer_less_than_outside_offer_self is None: + example_offer_less_than_outside_offer_self = [0 for _ in range(len(quantities))] + return f""" + You and another agent have to negotiate a division of items between the two of you. + You are Player {my_player_num} and the other agent is Player {other_player_num}. + There are {T} types of items, called item 1 through item {T}. + There are {', '.join([f"{q} unit{'s' if q != 1 else ''} of item {i+1}" for i, q in enumerate(quantities)])} to divide. + Both you and Player {other_player_num} have a private value per unit of each item type. + These values are drawn from a uniform random distribution, ranging from 1 to {V-1}. + Your private values are {', '.join([str(v) + ' for item ' + str(i+1) for i, v in enumerate(values)])}. + You have a private outside offer drawn from a uniform random distribution ranging from {p1_outside_offer[0] if my_player_num == 1 else p2_outside_offer[0]} to your total value of all items, which is {p1_outside_offer[1] if my_player_num == 1 else p2_outside_offer[1]}. Player {other_player_num} has a private outside offer drawn from a uniform random distribution ranging from 1 to to their total value of all items. + Your outside offer value is {w}. Your objective is to maximize your value of the outcome of the negotiation game. Remember, you have a guaranteed alternative: your outside offer. + Before making any counteroffer, you should calculate its total value to you and compare it to your outside offer value of {w}. + For example, if you were considering offering the other player 2 units of each item (keeping 3 units of each for yourself), you would calculate: + 3 units of item 1 = 3 × {values[0]} = {3*values[0]} (multiplying units by your value per unit) + 3 units of item 2 = 3 × {values[1]} = {3*values[1]} (multiplying units by your value per unit) + 3 units of item 3 = 3 × {values[2]} = {3*values[2]} (multiplying units by your value per unit) + Total value = {sum([3*values[i] for i in range(T)])} (sum of all item values) + + The negotiation proceeds in {R} rounds. + There is a discount rate gamma = {g}, such that if the process concludes after r rounds the overall value of the negotiation to each player is their value for the outcome multiplied by gamma to the power (r-1). + At each round, Player 1 takes an action, followed by Player 2. + The possible actions are to ACCEPT the other player's current offer (if any), make a COUNTEROFFER, or WALK away. If the game gets to the last round, and player 2 chooses to make a counteroffer, this is treated as a WALK. + If a player chooses ACCEPT, the negotiation ends in a deal to divide the items according to the accepted offer. + The value of an outcome is determined by each player's private values per unit of each item and the quantities they receive in the deal. This value is adjusted by the discount factor, which is used to compute the present value of the negotiation outcome. + If a player chooses WALK, the negotiation ends without a deal, and each player receives the value of their private outside offer. + The following step-by-step questions are designed to guide you through a comprehensive analysis. By systematically addressing these questions, you can evaluate the current state of the negotiation, assess potential offers, and make informed decisions. You must use the information that you acquired through the step-by-step questioning above to decide what action you will make. + Let's walk through this step by step: + 1) First, analyze the current situation: + - What is my outside offer value? + - What are the values of the items involved? + - What is the total pool of items? + - How does the discount factor influence the value of accepting the current offer versus waiting for future offers? + 2) Assess the value of offers: + - For the current offer (if any): What is my total value if I accept it? + - For potential counteroffers: What would be my total value for different proposed divisions? + - How do these values compare to my outside offer value? + 3) Make a decision based on the analysis: + - Should I accept the current offer? + - Should I walk away and take my outside offer? + - Or should I propose a specific counteroffer? + In the bargaining game, there are five mistakes you can make that conflict with your objectives. + While these aren't the only possible errors, they represent undesirable negotiation behaviors that can undermine your payoff or cause you to miss out on better deals. + These mistakes are: + - Mistake 1: Making an offer worse than your previous offer. This occurs when you reject an offer better for you than the one you subsequently propose. + - Mistake 2: Making an offer worse for you than your outside offer. This happens if you propose giving away so much that what you keep is worth less than your guaranteed alternative, which is your outside offer. + - Mistake 3: Offering no items or all items. Offering nothing (or everything) to the opponent (in the early or middle rounds) can be a clear suboptimal move. + - Mistake 4: Accepting an offer worse for you than your outside offer. This occurs if you accept a division that yields a payoff lower than your guaranteed fallback. + - Mistake 5: Walking away from an offer better than your outside offer. This occurs when you reject a division that actually yields a higher payoff than your fallback. + To prevent these mistakes, adopt a strategy similar to the following example: Before making any counteroffer, + calculate its total value to you and compare it to your outside offer value. For instance, suppose you keep only {example_offer_less_than_outside_offer_self} items and offer the rest to the other party. Your value would be: + {values[0]} x {example_offer_less_than_outside_offer_self[0]} + {values[1]} x {example_offer_less_than_outside_offer_self[1]} + {values[2]} x {example_offer_less_than_outside_offer_self[2]} + which is {np.dot(values, example_offer_less_than_outside_offer_self)} (sum of all item values) + + which is less than your outside offer of {w}. If your proposed offer results in a value lower than your outside offer, continue iterating until you develop a more advantageous offer that is better than your outside offer. + This reasoning can be applied to each of the five highlighted mistakes to ensure that your offers align with your objectives and avoid undesirable negotiation behaviors. + Keep in mind the offers the opposing agent makes reflects its own values. If their offer includes most or all units of a particular item, it might indicate that the agent does not highly value that item, whereas offering none could suggest the opposite. + You can use this kind of evidence to help inform your decision-making. + Please show your reasoning step by step, then provide your action in one of these formats in your response (if you do not do this your response will be invalid): + {{"action": "ACCEPT"}} - to accept the current offer + {{"action": "WALK"}} - to walk away from negotiations + {{"action": "COUNTEROFFER", "offer": [n1, n2, ...]}} - where n1, n2, ... are numbers representing the number of units of each item being offered to the other player as part of the counteroffer. + Any response not in these exact formats will be invalid and treated as a WALK. If you provide a counteroffer, it must be a valid offer, otherwise it will be treated as a WALK. + It is now round {r}. + """ + diff --git a/scenarios/utils/__init__.py b/scenarios/utils/__init__.py new file mode 100644 index 0000000..af25f78 --- /dev/null +++ b/scenarios/utils/__init__.py @@ -0,0 +1 @@ +# Utilities package diff --git a/scenarios/utils/offer.py b/scenarios/utils/offer.py new file mode 100644 index 0000000..b4d7ffc --- /dev/null +++ b/scenarios/utils/offer.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import List + + +@dataclass +class Offer: + player: int + offer: List[int] + + diff --git a/src/agentbeats/green_executor.py b/src/agentbeats/green_executor.py index 8993f02..e78798e 100644 --- a/src/agentbeats/green_executor.py +++ b/src/agentbeats/green_executor.py @@ -65,7 +65,7 @@ async def execute( try: await self.agent.run_eval(req, updater) - await updater.complete() + # `run_eval` is responsible for marking completion/failure. except Exception as e: print(f"Agent error: {e}") await updater.failed(new_agent_text_message(f"Agent error: {e}", context_id=context.context_id)) diff --git a/test_config.json b/test_config.json new file mode 100644 index 0000000..0ad265c --- /dev/null +++ b/test_config.json @@ -0,0 +1,12 @@ +{ + "participants": { + "challenger": "http://localhost:9999" + }, + "games": 3, + "max_rounds": 3, + "discount": 0.98, + "bootstrap": 10, + "parallel": false, + "dry_run": true, + "debug": true +} diff --git a/uv.lock b/uv.lock index b354248..50b8b0d 100644 --- a/uv.lock +++ b/uv.lock @@ -1,10 +1,6 @@ version = 1 revision = 3 -requires-python = ">=3.11" -resolution-markers = [ - "python_full_version < '3.13'", - "python_full_version >= '3.13'", -] +requires-python = "==3.11.*" [[package]] name = "a2a-sdk" @@ -22,6 +18,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4c/96/c33802d929b0f884cb6e509195d69914632536256d273bd7127e900d79ea/a2a_sdk-0.3.5-py3-none-any.whl", hash = "sha256:fd85b1e4e7be18a89b5d723e4013171510150a235275876f98de9e1ba869457e", size = 136911, upload-time = "2025-09-08T17:30:34.091Z" }, ] +[[package]] +name = "absl-py" +version = "2.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/10/2a/c93173ffa1b39c1d0395b7e842bbdc62e556ca9d8d3b5572926f3e4ca752/absl_py-2.3.1.tar.gz", hash = "sha256:a97820526f7fbfd2ec1bce83f3f25e3a14840dac0d8e02a0b71cd75db3f77fc9", size = 116588, upload-time = "2025-07-03T09:31:44.05Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/aa/ba0014cc4659328dc818a28827be78e6d97312ab0cb98105a770924dc11e/absl_py-2.3.1-py3-none-any.whl", hash = "sha256:eeecf07f0c2a93ace0772c92e596ace6d3d3996c042b2128459aaae2a76de11d", size = 135811, upload-time = "2025-07-03T09:31:42.253Z" }, +] + [[package]] name = "absolufy-imports" version = "0.3.1" @@ -37,10 +42,21 @@ version = "0.1.0" source = { editable = "." } dependencies = [ { name = "a2a-sdk" }, + { name = "chex" }, + { name = "cvxpy" }, + { name = "dm-haiku" }, + { name = "ecos" }, { name = "google-adk" }, { name = "google-genai" }, + { name = "jax" }, + { name = "jaxlib" }, + { name = "ml-collections" }, + { name = "numpy" }, + { name = "optax" }, { name = "pydantic" }, { name = "python-dotenv" }, + { name = "rlax" }, + { name = "torch" }, { name = "uvicorn" }, ] @@ -52,10 +68,21 @@ dev = [ [package.metadata] requires-dist = [ { name = "a2a-sdk", specifier = ">=0.3.5" }, + { name = "chex", specifier = ">=0.1.8" }, + { name = "cvxpy", specifier = ">=1.4.0" }, + { name = "dm-haiku", specifier = ">=0.0.12" }, + { name = "ecos", specifier = ">=2.0.0" }, { name = "google-adk", specifier = ">=1.14.1" }, { name = "google-genai", specifier = ">=1.36.0" }, + { name = "jax", specifier = ">=0.4.20" }, + { name = "jaxlib", specifier = ">=0.4.20" }, + { name = "ml-collections", specifier = ">=0.1.1" }, + { name = "numpy", specifier = ">=1.26.0" }, + { name = "optax", specifier = ">=0.1.7" }, { name = "pydantic", specifier = ">=2.11.9" }, { name = "python-dotenv", specifier = ">=1.1.1" }, + { name = "rlax", specifier = ">=0.1.6" }, + { name = "torch", specifier = ">=2.0.0" }, { name = "uvicorn", specifier = ">=0.35.0" }, ] @@ -92,7 +119,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "idna" }, { name = "sniffio" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions" }, ] sdist = { url = "https://files.pythonhosted.org/packages/f1/b4/636b3b65173d3ce9a38ef5f0522789614e590dab6a8d505340a4efe4c567/anyio-4.10.0.tar.gz", hash = "sha256:3f3fae35c96039744587aa5b8371e7e8e603c0702999535961dd336026973ba6", size = 213252, upload-time = "2025-08-04T08:54:26.451Z" } wheels = [ @@ -160,52 +187,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2b/c0/015b25184413d7ab0a410775fdb4a50fca20f5589b5dab1dbbfa3baad8ce/cffi-2.0.0-cp311-cp311-win32.whl", hash = "sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5", size = 172076, upload-time = "2025-09-08T23:22:40.95Z" }, { url = "https://files.pythonhosted.org/packages/ae/8f/dc5531155e7070361eb1b7e4c1a9d896d0cb21c49f807a6c03fd63fc877e/cffi-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5", size = 182820, upload-time = "2025-09-08T23:22:42.463Z" }, { url = "https://files.pythonhosted.org/packages/95/5c/1b493356429f9aecfd56bc171285a4c4ac8697f76e9bbbbb105e537853a1/cffi-2.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d", size = 177635, upload-time = "2025-09-08T23:22:43.623Z" }, - { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271, upload-time = "2025-09-08T23:22:44.795Z" }, - { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048, upload-time = "2025-09-08T23:22:45.938Z" }, - { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" }, - { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" }, - { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" }, - { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" }, - { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" }, - { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" }, - { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" }, - { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932, upload-time = "2025-09-08T23:22:57.188Z" }, - { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557, upload-time = "2025-09-08T23:22:58.351Z" }, - { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" }, - { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" }, - { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" }, - { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" }, - { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" }, - { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" }, - { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" }, - { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" }, - { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" }, - { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" }, - { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" }, - { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" }, - { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" }, - { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320, upload-time = "2025-09-08T23:23:18.087Z" }, - { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload-time = "2025-09-08T23:23:19.622Z" }, - { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" }, - { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" }, - { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" }, - { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" }, - { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" }, - { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" }, - { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328, upload-time = "2025-09-08T23:23:44.61Z" }, - { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650, upload-time = "2025-09-08T23:23:45.848Z" }, - { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687, upload-time = "2025-09-08T23:23:47.105Z" }, - { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773, upload-time = "2025-09-08T23:23:29.347Z" }, - { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload-time = "2025-09-08T23:23:30.63Z" }, - { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" }, - { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" }, - { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" }, - { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" }, - { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" }, - { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" }, - { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487, upload-time = "2025-09-08T23:23:40.423Z" }, - { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726, upload-time = "2025-09-08T23:23:41.742Z" }, - { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" }, ] [[package]] @@ -225,42 +206,44 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2f/36/77da9c6a328c54d17b960c89eccacfab8271fdaaa228305330915b88afa9/charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1e8ac75d72fa3775e0b7cb7e4629cec13b7514d928d15ef8ea06bca03ef01cae", size = 151600, upload-time = "2025-08-09T07:56:04.089Z" }, { url = "https://files.pythonhosted.org/packages/64/d4/9eb4ff2c167edbbf08cdd28e19078bf195762e9bd63371689cab5ecd3d0d/charset_normalizer-3.4.3-cp311-cp311-win32.whl", hash = "sha256:6cf8fd4c04756b6b60146d98cd8a77d0cdae0e1ca20329da2ac85eed779b6849", size = 99616, upload-time = "2025-08-09T07:56:05.658Z" }, { url = "https://files.pythonhosted.org/packages/f4/9c/996a4a028222e7761a96634d1820de8a744ff4327a00ada9c8942033089b/charset_normalizer-3.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:31a9a6f775f9bcd865d88ee350f0ffb0e25936a7f930ca98995c05abf1faf21c", size = 107108, upload-time = "2025-08-09T07:56:07.176Z" }, - { url = "https://files.pythonhosted.org/packages/e9/5e/14c94999e418d9b87682734589404a25854d5f5d0408df68bc15b6ff54bb/charset_normalizer-3.4.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e28e334d3ff134e88989d90ba04b47d84382a828c061d0d1027b1b12a62b39b1", size = 205655, upload-time = "2025-08-09T07:56:08.475Z" }, - { url = "https://files.pythonhosted.org/packages/7d/a8/c6ec5d389672521f644505a257f50544c074cf5fc292d5390331cd6fc9c3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0cacf8f7297b0c4fcb74227692ca46b4a5852f8f4f24b3c766dd94a1075c4884", size = 146223, upload-time = "2025-08-09T07:56:09.708Z" }, - { url = "https://files.pythonhosted.org/packages/fc/eb/a2ffb08547f4e1e5415fb69eb7db25932c52a52bed371429648db4d84fb1/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c6fd51128a41297f5409deab284fecbe5305ebd7e5a1f959bee1c054622b7018", size = 159366, upload-time = "2025-08-09T07:56:11.326Z" }, - { url = "https://files.pythonhosted.org/packages/82/10/0fd19f20c624b278dddaf83b8464dcddc2456cb4b02bb902a6da126b87a1/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cfb2aad70f2c6debfbcb717f23b7eb55febc0bb23dcffc0f076009da10c6392", size = 157104, upload-time = "2025-08-09T07:56:13.014Z" }, - { url = "https://files.pythonhosted.org/packages/16/ab/0233c3231af734f5dfcf0844aa9582d5a1466c985bbed6cedab85af9bfe3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1606f4a55c0fd363d754049cdf400175ee96c992b1f8018b993941f221221c5f", size = 151830, upload-time = "2025-08-09T07:56:14.428Z" }, - { url = "https://files.pythonhosted.org/packages/ae/02/e29e22b4e02839a0e4a06557b1999d0a47db3567e82989b5bb21f3fbbd9f/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:027b776c26d38b7f15b26a5da1044f376455fb3766df8fc38563b4efbc515154", size = 148854, upload-time = "2025-08-09T07:56:16.051Z" }, - { url = "https://files.pythonhosted.org/packages/05/6b/e2539a0a4be302b481e8cafb5af8792da8093b486885a1ae4d15d452bcec/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:42e5088973e56e31e4fa58eb6bd709e42fc03799c11c42929592889a2e54c491", size = 160670, upload-time = "2025-08-09T07:56:17.314Z" }, - { url = "https://files.pythonhosted.org/packages/31/e7/883ee5676a2ef217a40ce0bffcc3d0dfbf9e64cbcfbdf822c52981c3304b/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cc34f233c9e71701040d772aa7490318673aa7164a0efe3172b2981218c26d93", size = 158501, upload-time = "2025-08-09T07:56:18.641Z" }, - { url = "https://files.pythonhosted.org/packages/c1/35/6525b21aa0db614cf8b5792d232021dca3df7f90a1944db934efa5d20bb1/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:320e8e66157cc4e247d9ddca8e21f427efc7a04bbd0ac8a9faf56583fa543f9f", size = 153173, upload-time = "2025-08-09T07:56:20.289Z" }, - { url = "https://files.pythonhosted.org/packages/50/ee/f4704bad8201de513fdc8aac1cabc87e38c5818c93857140e06e772b5892/charset_normalizer-3.4.3-cp312-cp312-win32.whl", hash = "sha256:fb6fecfd65564f208cbf0fba07f107fb661bcd1a7c389edbced3f7a493f70e37", size = 99822, upload-time = "2025-08-09T07:56:21.551Z" }, - { url = "https://files.pythonhosted.org/packages/39/f5/3b3836ca6064d0992c58c7561c6b6eee1b3892e9665d650c803bd5614522/charset_normalizer-3.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:86df271bf921c2ee3818f0522e9a5b8092ca2ad8b065ece5d7d9d0e9f4849bcc", size = 107543, upload-time = "2025-08-09T07:56:23.115Z" }, - { url = "https://files.pythonhosted.org/packages/65/ca/2135ac97709b400c7654b4b764daf5c5567c2da45a30cdd20f9eefe2d658/charset_normalizer-3.4.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:14c2a87c65b351109f6abfc424cab3927b3bdece6f706e4d12faaf3d52ee5efe", size = 205326, upload-time = "2025-08-09T07:56:24.721Z" }, - { url = "https://files.pythonhosted.org/packages/71/11/98a04c3c97dd34e49c7d247083af03645ca3730809a5509443f3c37f7c99/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41d1fc408ff5fdfb910200ec0e74abc40387bccb3252f3f27c0676731df2b2c8", size = 146008, upload-time = "2025-08-09T07:56:26.004Z" }, - { url = "https://files.pythonhosted.org/packages/60/f5/4659a4cb3c4ec146bec80c32d8bb16033752574c20b1252ee842a95d1a1e/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1bb60174149316da1c35fa5233681f7c0f9f514509b8e399ab70fea5f17e45c9", size = 159196, upload-time = "2025-08-09T07:56:27.25Z" }, - { url = "https://files.pythonhosted.org/packages/86/9e/f552f7a00611f168b9a5865a1414179b2c6de8235a4fa40189f6f79a1753/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30d006f98569de3459c2fc1f2acde170b7b2bd265dc1943e87e1a4efe1b67c31", size = 156819, upload-time = "2025-08-09T07:56:28.515Z" }, - { url = "https://files.pythonhosted.org/packages/7e/95/42aa2156235cbc8fa61208aded06ef46111c4d3f0de233107b3f38631803/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:416175faf02e4b0810f1f38bcb54682878a4af94059a1cd63b8747244420801f", size = 151350, upload-time = "2025-08-09T07:56:29.716Z" }, - { url = "https://files.pythonhosted.org/packages/c2/a9/3865b02c56f300a6f94fc631ef54f0a8a29da74fb45a773dfd3dcd380af7/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6aab0f181c486f973bc7262a97f5aca3ee7e1437011ef0c2ec04b5a11d16c927", size = 148644, upload-time = "2025-08-09T07:56:30.984Z" }, - { url = "https://files.pythonhosted.org/packages/77/d9/cbcf1a2a5c7d7856f11e7ac2d782aec12bdfea60d104e60e0aa1c97849dc/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabf8315679312cfa71302f9bd509ded4f2f263fb5b765cf1433b39106c3cc9", size = 160468, upload-time = "2025-08-09T07:56:32.252Z" }, - { url = "https://files.pythonhosted.org/packages/f6/42/6f45efee8697b89fda4d50580f292b8f7f9306cb2971d4b53f8914e4d890/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:bd28b817ea8c70215401f657edef3a8aa83c29d447fb0b622c35403780ba11d5", size = 158187, upload-time = "2025-08-09T07:56:33.481Z" }, - { url = "https://files.pythonhosted.org/packages/70/99/f1c3bdcfaa9c45b3ce96f70b14f070411366fa19549c1d4832c935d8e2c3/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:18343b2d246dc6761a249ba1fb13f9ee9a2bcd95decc767319506056ea4ad4dc", size = 152699, upload-time = "2025-08-09T07:56:34.739Z" }, - { url = "https://files.pythonhosted.org/packages/a3/ad/b0081f2f99a4b194bcbb1934ef3b12aa4d9702ced80a37026b7607c72e58/charset_normalizer-3.4.3-cp313-cp313-win32.whl", hash = "sha256:6fb70de56f1859a3f71261cbe41005f56a7842cc348d3aeb26237560bfa5e0ce", size = 99580, upload-time = "2025-08-09T07:56:35.981Z" }, - { url = "https://files.pythonhosted.org/packages/9a/8f/ae790790c7b64f925e5c953b924aaa42a243fb778fed9e41f147b2a5715a/charset_normalizer-3.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:cf1ebb7d78e1ad8ec2a8c4732c7be2e736f6e5123a4146c5b89c9d1f585f8cef", size = 107366, upload-time = "2025-08-09T07:56:37.339Z" }, - { url = "https://files.pythonhosted.org/packages/8e/91/b5a06ad970ddc7a0e513112d40113e834638f4ca1120eb727a249fb2715e/charset_normalizer-3.4.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3cd35b7e8aedeb9e34c41385fda4f73ba609e561faedfae0a9e75e44ac558a15", size = 204342, upload-time = "2025-08-09T07:56:38.687Z" }, - { url = "https://files.pythonhosted.org/packages/ce/ec/1edc30a377f0a02689342f214455c3f6c2fbedd896a1d2f856c002fc3062/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b89bc04de1d83006373429975f8ef9e7932534b8cc9ca582e4db7d20d91816db", size = 145995, upload-time = "2025-08-09T07:56:40.048Z" }, - { url = "https://files.pythonhosted.org/packages/17/e5/5e67ab85e6d22b04641acb5399c8684f4d37caf7558a53859f0283a650e9/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2001a39612b241dae17b4687898843f254f8748b796a2e16f1051a17078d991d", size = 158640, upload-time = "2025-08-09T07:56:41.311Z" }, - { url = "https://files.pythonhosted.org/packages/f1/e5/38421987f6c697ee3722981289d554957c4be652f963d71c5e46a262e135/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8dcfc373f888e4fb39a7bc57e93e3b845e7f462dacc008d9749568b1c4ece096", size = 156636, upload-time = "2025-08-09T07:56:43.195Z" }, - { url = "https://files.pythonhosted.org/packages/a0/e4/5a075de8daa3ec0745a9a3b54467e0c2967daaaf2cec04c845f73493e9a1/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18b97b8404387b96cdbd30ad660f6407799126d26a39ca65729162fd810a99aa", size = 150939, upload-time = "2025-08-09T07:56:44.819Z" }, - { url = "https://files.pythonhosted.org/packages/02/f7/3611b32318b30974131db62b4043f335861d4d9b49adc6d57c1149cc49d4/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ccf600859c183d70eb47e05a44cd80a4ce77394d1ac0f79dbd2dd90a69a3a049", size = 148580, upload-time = "2025-08-09T07:56:46.684Z" }, - { url = "https://files.pythonhosted.org/packages/7e/61/19b36f4bd67f2793ab6a99b979b4e4f3d8fc754cbdffb805335df4337126/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:53cd68b185d98dde4ad8990e56a58dea83a4162161b1ea9272e5c9182ce415e0", size = 159870, upload-time = "2025-08-09T07:56:47.941Z" }, - { url = "https://files.pythonhosted.org/packages/06/57/84722eefdd338c04cf3030ada66889298eaedf3e7a30a624201e0cbe424a/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:30a96e1e1f865f78b030d65241c1ee850cdf422d869e9028e2fc1d5e4db73b92", size = 157797, upload-time = "2025-08-09T07:56:49.756Z" }, - { url = "https://files.pythonhosted.org/packages/72/2a/aff5dd112b2f14bcc3462c312dce5445806bfc8ab3a7328555da95330e4b/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d716a916938e03231e86e43782ca7878fb602a125a91e7acb8b5112e2e96ac16", size = 152224, upload-time = "2025-08-09T07:56:51.369Z" }, - { url = "https://files.pythonhosted.org/packages/b7/8c/9839225320046ed279c6e839d51f028342eb77c91c89b8ef2549f951f3ec/charset_normalizer-3.4.3-cp314-cp314-win32.whl", hash = "sha256:c6dbd0ccdda3a2ba7c2ecd9d77b37f3b5831687d8dc1b6ca5f56a4880cc7b7ce", size = 100086, upload-time = "2025-08-09T07:56:52.722Z" }, - { url = "https://files.pythonhosted.org/packages/ee/7a/36fbcf646e41f710ce0a563c1c9a343c6edf9be80786edeb15b6f62e17db/charset_normalizer-3.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:73dc19b562516fc9bcf6e5d6e596df0b4eb98d87e4f79f3ae71840e6ed21361c", size = 107400, upload-time = "2025-08-09T07:56:55.172Z" }, { url = "https://files.pythonhosted.org/packages/8a/1f/f041989e93b001bc4e44bb1669ccdcf54d3f00e628229a85b08d330615c5/charset_normalizer-3.4.3-py3-none-any.whl", hash = "sha256:ce571ab16d890d23b5c278547ba694193a45011ff86a9162a71307ed9f86759a", size = 53175, upload-time = "2025-08-09T07:57:26.864Z" }, ] +[[package]] +name = "chex" +version = "0.1.91" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "absl-py" }, + { name = "jax" }, + { name = "jaxlib" }, + { name = "numpy" }, + { name = "toolz" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/7d/812f01e7b2ddf28a0caa8dde56bd951a2c8f691c9bbfce38d469458d1502/chex-0.1.91.tar.gz", hash = "sha256:65367a521415ada905b8c0222b0a41a68337fcadf79a1fb6fc992dbd95dd9f76", size = 90302, upload-time = "2025-09-01T21:49:32.834Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/0c/96102c01dd02ae740d4afc3644d5c7d7fc51d3feefd67300a2aa1ddbf7cb/chex-0.1.91-py3-none-any.whl", hash = "sha256:6fc4cbfc22301c08d4a7ef706045668410100962eba8ba6af03fa07f4e5dcf9b", size = 100965, upload-time = "2025-09-01T21:49:31.141Z" }, +] + +[[package]] +name = "clarabel" +version = "0.11.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi" }, + { name = "numpy" }, + { name = "scipy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/81/e2/47f692161779dbd98876015de934943effb667a014e6f79a6d746b3e4c2a/clarabel-0.11.1.tar.gz", hash = "sha256:e7c41c47f0e59aeab99aefff9e58af4a8753ee5269bbeecbd5526fc6f41b9598", size = 253949, upload-time = "2025-06-11T16:49:05.864Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/34/f7/f82698b6d00a40a80c67e9a32b2628886aadfaf7f7b32daa12a463e44571/clarabel-0.11.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c39160e4222040f051f2a0598691c4f9126b4d17f5b9e7678f76c71d611e12d8", size = 1039511, upload-time = "2025-06-11T16:48:58.525Z" }, + { url = "https://files.pythonhosted.org/packages/b0/8f/13650cfe25762b51175c677330e6471d5d2c5851a6fbd6df77f0681bb34e/clarabel-0.11.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:8963687ee250d27310d139eea5a6816f9c3ae31f33691b56579ca4f0f0b64b63", size = 935135, upload-time = "2025-06-11T16:48:59.901Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9e/7af10d2b540b39f1a05d1ebba604fce933cc9bc0e65e88ec3b7a84976425/clarabel-0.11.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4837b9d0db01e98239f04b1e3526a6cf568529d3c19a8b3f591befdc467f9bb", size = 1079226, upload-time = "2025-06-11T16:49:00.987Z" }, + { url = "https://files.pythonhosted.org/packages/6b/a9/c76edf781ca3283186ff4b54a9a4fb51367fd04313a68e2b09f062407439/clarabel-0.11.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8c41aaa6f3f8c0f3bd9d86c3e568dcaee079562c075bd2ec9fb3a80287380ef", size = 1164345, upload-time = "2025-06-11T16:49:02.675Z" }, + { url = "https://files.pythonhosted.org/packages/41/e6/4eee3062088c221e5a18b054e51c69f616e0bb0dc1b0a1a5e0fe90dfa18e/clarabel-0.11.1-cp39-abi3-win_amd64.whl", hash = "sha256:557d5148a4377ae1980b65d00605ae870a8f34f95f0f6a41e04aa6d3edf67148", size = 887310, upload-time = "2025-06-11T16:49:04.277Z" }, +] + [[package]] name = "click" version = "8.2.1" @@ -315,21 +298,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f5/c4/0da6e55595d9b9cd3b6eb5dc22f3a07ded7f116a3ea72629cab595abb804/cryptography-46.0.1-cp311-abi3-win32.whl", hash = "sha256:cbb8e769d4cac884bb28e3ff620ef1001b75588a5c83c9c9f1fdc9afbe7f29b0", size = 3058327, upload-time = "2025-09-17T00:09:03.726Z" }, { url = "https://files.pythonhosted.org/packages/95/0f/cd29a35e0d6e78a0ee61793564c8cff0929c38391cb0de27627bdc7525aa/cryptography-46.0.1-cp311-abi3-win_amd64.whl", hash = "sha256:92e8cfe8bd7dd86eac0a677499894862cd5cc2fd74de917daa881d00871ac8e7", size = 3523893, upload-time = "2025-09-17T00:09:06.272Z" }, { url = "https://files.pythonhosted.org/packages/f2/dd/eea390f3e78432bc3d2f53952375f8b37cb4d37783e626faa6a51e751719/cryptography-46.0.1-cp311-abi3-win_arm64.whl", hash = "sha256:db5597a4c7353b2e5fb05a8e6cb74b56a4658a2b7bf3cb6b1821ae7e7fd6eaa0", size = 2932145, upload-time = "2025-09-17T00:09:08.568Z" }, - { url = "https://files.pythonhosted.org/packages/0a/fb/c73588561afcd5e24b089952bd210b14676c0c5bf1213376350ae111945c/cryptography-46.0.1-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:4c49eda9a23019e11d32a0eb51a27b3e7ddedde91e099c0ac6373e3aacc0d2ee", size = 7193928, upload-time = "2025-09-17T00:09:10.595Z" }, - { url = "https://files.pythonhosted.org/packages/26/34/0ff0bb2d2c79f25a2a63109f3b76b9108a906dd2a2eb5c1d460b9938adbb/cryptography-46.0.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9babb7818fdd71394e576cf26c5452df77a355eac1a27ddfa24096665a27f8fd", size = 4293515, upload-time = "2025-09-17T00:09:12.861Z" }, - { url = "https://files.pythonhosted.org/packages/df/b7/d4f848aee24ecd1be01db6c42c4a270069a4f02a105d9c57e143daf6cf0f/cryptography-46.0.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9f2c4cc63be3ef43c0221861177cee5d14b505cd4d4599a89e2cd273c4d3542a", size = 4545619, upload-time = "2025-09-17T00:09:15.397Z" }, - { url = "https://files.pythonhosted.org/packages/44/a5/42fedefc754fd1901e2d95a69815ea4ec8a9eed31f4c4361fcab80288661/cryptography-46.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:41c281a74df173876da1dc9a9b6953d387f06e3d3ed9284e3baae3ab3f40883a", size = 4299160, upload-time = "2025-09-17T00:09:17.155Z" }, - { url = "https://files.pythonhosted.org/packages/86/a1/cd21174f56e769c831fbbd6399a1b7519b0ff6280acec1b826d7b072640c/cryptography-46.0.1-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0a17377fa52563d730248ba1f68185461fff36e8bc75d8787a7dd2e20a802b7a", size = 3994491, upload-time = "2025-09-17T00:09:18.971Z" }, - { url = "https://files.pythonhosted.org/packages/8d/2f/a8cbfa1c029987ddc746fd966711d4fa71efc891d37fbe9f030fe5ab4eec/cryptography-46.0.1-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:0d1922d9280e08cde90b518a10cd66831f632960a8d08cb3418922d83fce6f12", size = 4960157, upload-time = "2025-09-17T00:09:20.923Z" }, - { url = "https://files.pythonhosted.org/packages/67/ae/63a84e6789e0d5a2502edf06b552bcb0fa9ff16147265d5c44a211942abe/cryptography-46.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:af84e8e99f1a82cea149e253014ea9dc89f75b82c87bb6c7242203186f465129", size = 4577263, upload-time = "2025-09-17T00:09:23.356Z" }, - { url = "https://files.pythonhosted.org/packages/ef/8f/1b9fa8e92bd9cbcb3b7e1e593a5232f2c1e6f9bd72b919c1a6b37d315f92/cryptography-46.0.1-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:ef648d2c690703501714588b2ba640facd50fd16548133b11b2859e8655a69da", size = 4298703, upload-time = "2025-09-17T00:09:25.566Z" }, - { url = "https://files.pythonhosted.org/packages/c3/af/bb95db070e73fea3fae31d8a69ac1463d89d1c084220f549b00dd01094a8/cryptography-46.0.1-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:e94eb5fa32a8a9f9bf991f424f002913e3dd7c699ef552db9b14ba6a76a6313b", size = 4926363, upload-time = "2025-09-17T00:09:27.451Z" }, - { url = "https://files.pythonhosted.org/packages/f5/3b/d8fb17ffeb3a83157a1cc0aa5c60691d062aceecba09c2e5e77ebfc1870c/cryptography-46.0.1-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:534b96c0831855e29fc3b069b085fd185aa5353033631a585d5cd4dd5d40d657", size = 4576958, upload-time = "2025-09-17T00:09:29.924Z" }, - { url = "https://files.pythonhosted.org/packages/d9/46/86bc3a05c10c8aa88c8ae7e953a8b4e407c57823ed201dbcba55c4d655f4/cryptography-46.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f9b55038b5c6c47559aa33626d8ecd092f354e23de3c6975e4bb205df128a2a0", size = 4422507, upload-time = "2025-09-17T00:09:32.222Z" }, - { url = "https://files.pythonhosted.org/packages/a8/4e/387e5a21dfd2b4198e74968a541cfd6128f66f8ec94ed971776e15091ac3/cryptography-46.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ec13b7105117dbc9afd023300fb9954d72ca855c274fe563e72428ece10191c0", size = 4683964, upload-time = "2025-09-17T00:09:34.118Z" }, - { url = "https://files.pythonhosted.org/packages/25/a3/f9f5907b166adb8f26762071474b38bbfcf89858a5282f032899075a38a1/cryptography-46.0.1-cp314-cp314t-win32.whl", hash = "sha256:504e464944f2c003a0785b81668fe23c06f3b037e9cb9f68a7c672246319f277", size = 3029705, upload-time = "2025-09-17T00:09:36.381Z" }, - { url = "https://files.pythonhosted.org/packages/12/66/4d3a4f1850db2e71c2b1628d14b70b5e4c1684a1bd462f7fffb93c041c38/cryptography-46.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c52fded6383f7e20eaf70a60aeddd796b3677c3ad2922c801be330db62778e05", size = 3502175, upload-time = "2025-09-17T00:09:38.261Z" }, - { url = "https://files.pythonhosted.org/packages/52/c7/9f10ad91435ef7d0d99a0b93c4360bea3df18050ff5b9038c489c31ac2f5/cryptography-46.0.1-cp314-cp314t-win_arm64.whl", hash = "sha256:9495d78f52c804b5ec8878b5b8c7873aa8e63db9cd9ee387ff2db3fffe4df784", size = 2912354, upload-time = "2025-09-17T00:09:40.078Z" }, { url = "https://files.pythonhosted.org/packages/98/e5/fbd632385542a3311915976f88e0dfcf09e62a3fc0aff86fb6762162a24d/cryptography-46.0.1-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:d84c40bdb8674c29fa192373498b6cb1e84f882889d21a471b45d1f868d8d44b", size = 7255677, upload-time = "2025-09-17T00:09:42.407Z" }, { url = "https://files.pythonhosted.org/packages/56/3e/13ce6eab9ad6eba1b15a7bd476f005a4c1b3f299f4c2f32b22408b0edccf/cryptography-46.0.1-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9ed64e5083fa806709e74fc5ea067dfef9090e5b7a2320a49be3c9df3583a2d8", size = 4301110, upload-time = "2025-09-17T00:09:45.614Z" }, { url = "https://files.pythonhosted.org/packages/a2/67/65dc233c1ddd688073cf7b136b06ff4b84bf517ba5529607c9d79720fc67/cryptography-46.0.1-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:341fb7a26bc9d6093c1b124b9f13acc283d2d51da440b98b55ab3f79f2522ead", size = 4562369, upload-time = "2025-09-17T00:09:47.601Z" }, @@ -353,6 +321,99 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/cd/fe6b65e1117ec7631f6be8951d3db076bac3e1b096e3e12710ed071ffc3c/cryptography-46.0.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:34f04b7311174469ab3ac2647469743720f8b6c8b046f238e5cb27905695eb2a", size = 3448210, upload-time = "2025-09-17T00:10:30.145Z" }, ] +[[package]] +name = "cvxpy" +version = "1.7.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "clarabel" }, + { name = "numpy" }, + { name = "osqp" }, + { name = "scipy" }, + { name = "scs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9a/7f/2a13e0e7ee76c03bc11aae397572e82d8a8bd23c1c3ac020766f0e15da8e/cvxpy-1.7.5.tar.gz", hash = "sha256:4b512218001c27659e16fc914a2490038635874681032c3c3485ff1099b83f5d", size = 1651490, upload-time = "2025-12-05T03:48:49.127Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/65/00/1d3c92ee50976ad8804b4526eceaba098f3455069e409cda39dfaadf6427/cvxpy-1.7.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0df3bc1aee0431ee6419cfc77fb7543ad7588150b9bb5d8ef44da7a76770ba1d", size = 1548998, upload-time = "2025-12-05T03:42:14.7Z" }, + { url = "https://files.pythonhosted.org/packages/a8/6f/052a0e80339f8080ca7788452efbb6164e9d543ed31e5ea23fb94882206f/cvxpy-1.7.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:86876084d1874c837b6dc9dad61ba1e873e979d06462fdc149a6ba0b067a8638", size = 1207819, upload-time = "2025-12-05T03:42:16.01Z" }, + { url = "https://files.pythonhosted.org/packages/f0/71/b7282178f46a744d6acd6c45122a3a5600458ba1aaa89612d618cd8b9d60/cvxpy-1.7.5-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7633c2a369188aa0fa3df4a767267774257c9dba71ac8e5b9e8eefb17e2613f8", size = 1220587, upload-time = "2025-12-05T03:45:08.717Z" }, + { url = "https://files.pythonhosted.org/packages/69/5a/168630f5aaaaf5d8be935369b8b7e7c8f9752921027679e9d79ac67305e8/cvxpy-1.7.5-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9f9d93892f0805a9fa1b0702ca4c6d3b8deb056ab0140a58f41b933fe8f28aae", size = 1249941, upload-time = "2025-12-05T03:45:09.961Z" }, + { url = "https://files.pythonhosted.org/packages/21/25/bf0914023100d1ee3c4ccf04e1638cc6b70b8c623f56a431051bc40d2540/cvxpy-1.7.5-cp311-cp311-win_amd64.whl", hash = "sha256:911575f28ecd3fd913165354aad24ebfe264a59a1d86a2c0e296177c6a13092f", size = 1148858, upload-time = "2025-12-05T03:36:16.144Z" }, +] + +[[package]] +name = "decorator" +version = "5.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/fa/6d96a0978d19e17b68d634497769987b16c8f4cd0a7a05048bec693caa6b/decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360", size = 56711, upload-time = "2025-02-24T04:41:34.073Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" }, +] + +[[package]] +name = "distrax" +version = "0.1.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "absl-py" }, + { name = "chex" }, + { name = "jax" }, + { name = "jaxlib" }, + { name = "numpy" }, + { name = "tfp-nightly" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/21/a1/eb2086d2fc27ddf627f47af392d67e7e023a153d1b00087d3d12eab0465a/distrax-0.1.7.tar.gz", hash = "sha256:78deec181894e811d416b34e6b500f5e8f6d39bc850bd6f3152ee9e2164c9bef", size = 183672, upload-time = "2025-09-01T23:06:00.108Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/3e/e67008a232c37f98f1563469e206b1f2820f01c9cce1118c02215f1bf361/distrax-0.1.7-py3-none-any.whl", hash = "sha256:cf502d64022c7a682882ae6f07aea35206c6a3d8ddfb1e3beb006723baf283fd", size = 312680, upload-time = "2025-09-01T23:05:58.175Z" }, +] + +[[package]] +name = "dm-env" +version = "1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "absl-py" }, + { name = "dm-tree" }, + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/62/c9/93e8d6239d5806508a2ee4b370e67c6069943ca149f59f533923737a99b7/dm-env-1.6.tar.gz", hash = "sha256:a436eb1c654c39e0c986a516cee218bea7140b510fceff63f97eb4fcff3d93de", size = 20187, upload-time = "2022-12-21T00:25:29.306Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/7e/36d548040e61337bf9182637a589c44da407a47a923ee88aec7f0e89867c/dm_env-1.6-py3-none-any.whl", hash = "sha256:0eabb6759dd453b625e041032f7ae0c1e87d4eb61b6a96b9ca586483837abf29", size = 26339, upload-time = "2022-12-21T00:25:37.128Z" }, +] + +[[package]] +name = "dm-haiku" +version = "0.0.16" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "absl-py" }, + { name = "jmp" }, + { name = "numpy" }, + { name = "tabulate" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2a/fc/daf4689198f4c0af8b71611f39fcd5d68ce0ae59fa919b9e58192a7d70f5/dm_haiku-0.0.16.tar.gz", hash = "sha256:1830b0ce63c5cef2fb3a63a13033c9d8f612ee7f896f2b0b25a6ba484f5fad28", size = 263092, upload-time = "2025-12-17T15:55:35.145Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/91/0f53835d0292a74e6b37e68125b669827e2a75a26e01c34741d6c13cca6c/dm_haiku-0.0.16-py3-none-any.whl", hash = "sha256:cc355d4d5aaa85af20e5a23ccd278bc751232ac8e5971261bed39318c07d744f", size = 374267, upload-time = "2025-12-17T15:55:33.9Z" }, +] + +[[package]] +name = "dm-tree" +version = "0.1.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "absl-py" }, + { name = "attrs" }, + { name = "numpy" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a6/83/ce29720ccf934c6cfa9b9c95ebbe96558386e66886626066632b5e44afed/dm_tree-0.1.9.tar.gz", hash = "sha256:a4c7db3d3935a5a2d5e4b383fc26c6b0cd6f78c6d4605d3e7b518800ecd5342b", size = 35623, upload-time = "2025-01-30T20:45:37.13Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ac/b6/2d2de9f8901ccc5b6f34aea678e732816853015b9d756c86efcec189bf4b/dm_tree-0.1.9-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7d7d784afaeb4b67d87d858261aaf02503939ddc1f09c4cca70728f9892ab004", size = 173561, upload-time = "2025-03-31T08:35:40.042Z" }, + { url = "https://files.pythonhosted.org/packages/3e/07/57459f32cf5683c25b596ab58f42a3305f91876c2f03d2fa6e9d0df75fcb/dm_tree-0.1.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e660d1779ddcbd1348410d08f67db4870d413a3ec4ba8b4b045bd5ce4bd8f35c", size = 146926, upload-time = "2025-01-30T20:45:20.622Z" }, + { url = "https://files.pythonhosted.org/packages/e8/46/939fbf81177c7cb3b1e5ddebd696237b3be9520769cce882f064de497103/dm_tree-0.1.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:294dc1cecf87552a45cdd5ddb215e7f5295a5a47c46f1f0a0463c3dd02a527d7", size = 152851, upload-time = "2025-01-30T20:45:23.032Z" }, + { url = "https://files.pythonhosted.org/packages/35/3e/a46933e0157b0ac87619a754ce1a796b2afc6386fca7c11f95c010f40745/dm_tree-0.1.9-cp311-cp311-win_amd64.whl", hash = "sha256:12f4cc6cd52a39aa38ff31577b6d79b6136a9a89273a876bf62335c9f65c27bf", size = 101522, upload-time = "2025-01-30T20:45:24.433Z" }, +] + [[package]] name = "docstring-parser" version = "0.17.0" @@ -362,6 +423,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" }, ] +[[package]] +name = "ecos" +version = "2.0.14" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "scipy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2e/5f/17716c533da95ed110815b159efa22b1064c8c41fd5c862f21aff7a7fec0/ecos-2.0.14.tar.gz", hash = "sha256:64b3201c0e0a7f0129050557c4ac50b00031e80a10534506dba1200c8dc1efe4", size = 142430, upload-time = "2024-06-18T03:48:34.809Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2b/9b/c886a268d4b7adfaa1171244cdbfa3c944e5a599fe7a5e738ee27390ab20/ecos-2.0.14-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dc90b54eaae16ead128bfdd95e04bf808b73578bdf40ed652c55aa36a6d02e42", size = 92594, upload-time = "2024-06-18T03:47:51.721Z" }, + { url = "https://files.pythonhosted.org/packages/49/e9/fae34e8ef6a9b78c3098a4428ed0e8f77cdeb334a7dc17c649abb686ed08/ecos-2.0.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a8be3b4856838ae351fec40fb3589181d52b41cf75bf4d35342686a508c37a6", size = 220084, upload-time = "2024-06-18T03:47:47.343Z" }, + { url = "https://files.pythonhosted.org/packages/2f/45/1e52519d6c29dd26bbfaf92ece5b45ca3de3b7c8b2615a818aaeadb7ad63/ecos-2.0.14-cp311-cp311-win_amd64.whl", hash = "sha256:7495b3031ccc2d4cec72cdb40aed8a2d1fdd734fe40519b7e6047aead5e811cf", size = 72199, upload-time = "2024-06-18T03:49:07.772Z" }, +] + [[package]] name = "fastapi" version = "0.116.1" @@ -376,6 +452,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e5/47/d63c60f59a59467fda0f93f46335c9d18526d7071f025cb5b89d5353ea42/fastapi-0.116.1-py3-none-any.whl", hash = "sha256:c46ac7c312df840f0c9e220f7964bada936781bc4e2e6eb71f1c4d7553786565", size = 95631, upload-time = "2025-07-11T16:22:30.485Z" }, ] +[[package]] +name = "filelock" +version = "3.20.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c1/e0/a75dbe4bca1e7d41307323dad5ea2efdd95408f74ab2de8bd7dba9b51a1a/filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64", size = 19510, upload-time = "2026-01-02T15:33:32.582Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/30/ab407e2ec752aa541704ed8f93c11e2a5d92c168b8a755d818b74a3c5c2d/filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8", size = 16697, upload-time = "2026-01-02T15:33:31.133Z" }, +] + +[[package]] +name = "fsspec" +version = "2025.12.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b6/27/954057b0d1f53f086f681755207dda6de6c660ce133c829158e8e8fe7895/fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973", size = 309748, upload-time = "2025-12-03T15:23:42.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/c7/b64cae5dba3a1b138d7123ec36bb5ccd39d39939f18454407e5468f4763f/fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b", size = 201422, upload-time = "2025-12-03T15:23:41.434Z" }, +] + +[[package]] +name = "gast" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/91/f6/e73969782a2ecec280f8a176f2476149dd9dba69d5f8779ec6108a7721e6/gast-0.7.0.tar.gz", hash = "sha256:0bb14cd1b806722e91ddbab6fb86bba148c22b40e7ff11e248974e04c8adfdae", size = 33630, upload-time = "2025-11-29T15:30:05.266Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/33/f1c6a276de27b7d7339a34749cc33fa87f077f921969c47185d34a887ae2/gast-0.7.0-py3-none-any.whl", hash = "sha256:99cbf1365633a74099f69c59bd650476b96baa5ef196fec88032b00b31ba36f7", size = 22966, upload-time = "2025-11-29T15:30:03.983Z" }, +] + [[package]] name = "google-adk" version = "1.15.1" @@ -744,18 +847,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/b8/976a2b843610c211e7ccb3e248996a61e87dbb2c09b1499847e295080aec/google_crc32c-1.7.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee6547b657621b6cbed3562ea7826c3e11cab01cd33b74e1f677690652883e77", size = 33048, upload-time = "2025-03-26T14:41:30.679Z" }, { url = "https://files.pythonhosted.org/packages/c9/16/a3842c2cf591093b111d4a5e2bfb478ac6692d02f1b386d2a33283a19dc9/google_crc32c-1.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d68e17bad8f7dd9a49181a1f5a8f4b251c6dbc8cc96fb79f1d321dfd57d66f53", size = 32669, upload-time = "2025-03-26T14:41:31.432Z" }, { url = "https://files.pythonhosted.org/packages/04/17/ed9aba495916fcf5fe4ecb2267ceb851fc5f273c4e4625ae453350cfd564/google_crc32c-1.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:6335de12921f06e1f774d0dd1fbea6bf610abe0887a1638f64d694013138be5d", size = 33476, upload-time = "2025-03-26T14:29:10.211Z" }, - { url = "https://files.pythonhosted.org/packages/dd/b7/787e2453cf8639c94b3d06c9d61f512234a82e1d12d13d18584bd3049904/google_crc32c-1.7.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2d73a68a653c57281401871dd4aeebbb6af3191dcac751a76ce430df4d403194", size = 30470, upload-time = "2025-03-26T14:34:31.655Z" }, - { url = "https://files.pythonhosted.org/packages/ed/b4/6042c2b0cbac3ec3a69bb4c49b28d2f517b7a0f4a0232603c42c58e22b44/google_crc32c-1.7.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:22beacf83baaf59f9d3ab2bbb4db0fb018da8e5aebdce07ef9f09fce8220285e", size = 30315, upload-time = "2025-03-26T15:01:54.634Z" }, - { url = "https://files.pythonhosted.org/packages/29/ad/01e7a61a5d059bc57b702d9ff6a18b2585ad97f720bd0a0dbe215df1ab0e/google_crc32c-1.7.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19eafa0e4af11b0a4eb3974483d55d2d77ad1911e6cf6f832e1574f6781fd337", size = 33180, upload-time = "2025-03-26T14:41:32.168Z" }, - { url = "https://files.pythonhosted.org/packages/3b/a5/7279055cf004561894ed3a7bfdf5bf90a53f28fadd01af7cd166e88ddf16/google_crc32c-1.7.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6d86616faaea68101195c6bdc40c494e4d76f41e07a37ffdef270879c15fb65", size = 32794, upload-time = "2025-03-26T14:41:33.264Z" }, - { url = "https://files.pythonhosted.org/packages/0f/d6/77060dbd140c624e42ae3ece3df53b9d811000729a5c821b9fd671ceaac6/google_crc32c-1.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:b7491bdc0c7564fcf48c0179d2048ab2f7c7ba36b84ccd3a3e1c3f7a72d3bba6", size = 33477, upload-time = "2025-03-26T14:29:10.94Z" }, - { url = "https://files.pythonhosted.org/packages/8b/72/b8d785e9184ba6297a8620c8a37cf6e39b81a8ca01bb0796d7cbb28b3386/google_crc32c-1.7.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:df8b38bdaf1629d62d51be8bdd04888f37c451564c2042d36e5812da9eff3c35", size = 30467, upload-time = "2025-03-26T14:36:06.909Z" }, - { url = "https://files.pythonhosted.org/packages/34/25/5f18076968212067c4e8ea95bf3b69669f9fc698476e5f5eb97d5b37999f/google_crc32c-1.7.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:e42e20a83a29aa2709a0cf271c7f8aefaa23b7ab52e53b322585297bb94d4638", size = 30309, upload-time = "2025-03-26T15:06:15.318Z" }, - { url = "https://files.pythonhosted.org/packages/92/83/9228fe65bf70e93e419f38bdf6c5ca5083fc6d32886ee79b450ceefd1dbd/google_crc32c-1.7.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:905a385140bf492ac300026717af339790921f411c0dfd9aa5a9e69a08ed32eb", size = 33133, upload-time = "2025-03-26T14:41:34.388Z" }, - { url = "https://files.pythonhosted.org/packages/c3/ca/1ea2fd13ff9f8955b85e7956872fdb7050c4ace8a2306a6d177edb9cf7fe/google_crc32c-1.7.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b211ddaf20f7ebeec5c333448582c224a7c90a9d98826fbab82c0ddc11348e6", size = 32773, upload-time = "2025-03-26T14:41:35.19Z" }, - { url = "https://files.pythonhosted.org/packages/89/32/a22a281806e3ef21b72db16f948cad22ec68e4bdd384139291e00ff82fe2/google_crc32c-1.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:0f99eaa09a9a7e642a61e06742856eec8b19fc0037832e03f941fe7cf0c8e4db", size = 33475, upload-time = "2025-03-26T14:29:11.771Z" }, - { url = "https://files.pythonhosted.org/packages/b8/c5/002975aff514e57fc084ba155697a049b3f9b52225ec3bc0f542871dd524/google_crc32c-1.7.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32d1da0d74ec5634a05f53ef7df18fc646666a25efaaca9fc7dcfd4caf1d98c3", size = 33243, upload-time = "2025-03-26T14:41:35.975Z" }, - { url = "https://files.pythonhosted.org/packages/61/cb/c585282a03a0cea70fcaa1bf55d5d702d0f2351094d663ec3be1c6c67c52/google_crc32c-1.7.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e10554d4abc5238823112c2ad7e4560f96c7bf3820b202660373d769d9e6e4c9", size = 32870, upload-time = "2025-03-26T14:41:37.08Z" }, { url = "https://files.pythonhosted.org/packages/16/1b/1693372bf423ada422f80fd88260dbfd140754adb15cbc4d7e9a68b1cb8e/google_crc32c-1.7.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85fef7fae11494e747c9fd1359a527e5970fc9603c90764843caabd3a16a0a48", size = 28241, upload-time = "2025-03-26T14:41:45.898Z" }, { url = "https://files.pythonhosted.org/packages/fd/3c/2a19a60a473de48717b4efb19398c3f914795b64a96cf3fbe82588044f78/google_crc32c-1.7.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6efb97eb4369d52593ad6f75e7e10d053cf00c48983f7a973105bc70b0ac4d82", size = 28048, upload-time = "2025-03-26T14:41:46.696Z" }, ] @@ -831,32 +922,9 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1f/8e/abdd3f14d735b2929290a018ecf133c901be4874b858dd1c604b9319f064/greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8", size = 587684, upload-time = "2025-08-07T13:18:25.164Z" }, { url = "https://files.pythonhosted.org/packages/5d/65/deb2a69c3e5996439b0176f6651e0052542bb6c8f8ec2e3fba97c9768805/greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52", size = 1116647, upload-time = "2025-08-07T13:42:38.655Z" }, { url = "https://files.pythonhosted.org/packages/3f/cc/b07000438a29ac5cfb2194bfc128151d52f333cee74dd7dfe3fb733fc16c/greenlet-3.2.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:55e9c5affaa6775e2c6b67659f3a71684de4c549b3dd9afca3bc773533d284fa", size = 1142073, upload-time = "2025-08-07T13:18:21.737Z" }, + { url = "https://files.pythonhosted.org/packages/67/24/28a5b2fa42d12b3d7e5614145f0bd89714c34c08be6aabe39c14dd52db34/greenlet-3.2.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c9c6de1940a7d828635fbd254d69db79e54619f165ee7ce32fda763a9cb6a58c", size = 1548385, upload-time = "2025-11-04T12:42:11.067Z" }, + { url = "https://files.pythonhosted.org/packages/6a/05/03f2f0bdd0b0ff9a4f7b99333d57b53a7709c27723ec8123056b084e69cd/greenlet-3.2.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03c5136e7be905045160b1b9fdca93dd6727b180feeafda6818e6496434ed8c5", size = 1613329, upload-time = "2025-11-04T12:42:12.928Z" }, { url = "https://files.pythonhosted.org/packages/d8/0f/30aef242fcab550b0b3520b8e3561156857c94288f0332a79928c31a52cf/greenlet-3.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:9c40adce87eaa9ddb593ccb0fa6a07caf34015a29bf8d344811665b573138db9", size = 299100, upload-time = "2025-08-07T13:44:12.287Z" }, - { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" }, - { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" }, - { url = "https://files.pythonhosted.org/packages/3b/16/035dcfcc48715ccd345f3a93183267167cdd162ad123cd93067d86f27ce4/greenlet-3.2.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f28588772bb5fb869a8eb331374ec06f24a83a9c25bfa1f38b6993afe9c1e968", size = 655185, upload-time = "2025-08-07T13:45:27.624Z" }, - { url = "https://files.pythonhosted.org/packages/31/da/0386695eef69ffae1ad726881571dfe28b41970173947e7c558d9998de0f/greenlet-3.2.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5c9320971821a7cb77cfab8d956fa8e39cd07ca44b6070db358ceb7f8797c8c9", size = 649926, upload-time = "2025-08-07T13:53:15.251Z" }, - { url = "https://files.pythonhosted.org/packages/68/88/69bf19fd4dc19981928ceacbc5fd4bb6bc2215d53199e367832e98d1d8fe/greenlet-3.2.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c60a6d84229b271d44b70fb6e5fa23781abb5d742af7b808ae3f6efd7c9c60f6", size = 651839, upload-time = "2025-08-07T13:18:30.281Z" }, - { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" }, - { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" }, - { url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142, upload-time = "2025-08-07T13:18:22.981Z" }, - { url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899, upload-time = "2025-08-07T13:38:53.448Z" }, - { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" }, - { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" }, - { url = "https://files.pythonhosted.org/packages/f7/0b/bc13f787394920b23073ca3b6c4a7a21396301ed75a655bcb47196b50e6e/greenlet-3.2.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:710638eb93b1fa52823aa91bf75326f9ecdfd5e0466f00789246a5280f4ba0fc", size = 655191, upload-time = "2025-08-07T13:45:29.752Z" }, - { url = "https://files.pythonhosted.org/packages/f2/d6/6adde57d1345a8d0f14d31e4ab9c23cfe8e2cd39c3baf7674b4b0338d266/greenlet-3.2.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c5111ccdc9c88f423426df3fd1811bfc40ed66264d35aa373420a34377efc98a", size = 649516, upload-time = "2025-08-07T13:53:16.314Z" }, - { url = "https://files.pythonhosted.org/packages/7f/3b/3a3328a788d4a473889a2d403199932be55b1b0060f4ddd96ee7cdfcad10/greenlet-3.2.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d76383238584e9711e20ebe14db6c88ddcedc1829a9ad31a584389463b5aa504", size = 652169, upload-time = "2025-08-07T13:18:32.861Z" }, - { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" }, - { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" }, - { url = "https://files.pythonhosted.org/packages/a2/15/0d5e4e1a66fab130d98168fe984c509249c833c1a3c16806b90f253ce7b9/greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae", size = 1149210, upload-time = "2025-08-07T13:18:24.072Z" }, - { url = "https://files.pythonhosted.org/packages/0b/55/2321e43595e6801e105fcfdee02b34c0f996eb71e6ddffca6b10b7e1d771/greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b", size = 299685, upload-time = "2025-08-07T13:24:38.824Z" }, - { url = "https://files.pythonhosted.org/packages/22/5c/85273fd7cc388285632b0498dbbab97596e04b154933dfe0f3e68156c68c/greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0", size = 273586, upload-time = "2025-08-07T13:16:08.004Z" }, - { url = "https://files.pythonhosted.org/packages/d1/75/10aeeaa3da9332c2e761e4c50d4c3556c21113ee3f0afa2cf5769946f7a3/greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f", size = 686346, upload-time = "2025-08-07T13:42:59.944Z" }, - { url = "https://files.pythonhosted.org/packages/c0/aa/687d6b12ffb505a4447567d1f3abea23bd20e73a5bed63871178e0831b7a/greenlet-3.2.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c17b6b34111ea72fc5a4e4beec9711d2226285f0386ea83477cbb97c30a3f3a5", size = 699218, upload-time = "2025-08-07T13:45:30.969Z" }, - { url = "https://files.pythonhosted.org/packages/dc/8b/29aae55436521f1d6f8ff4e12fb676f3400de7fcf27fccd1d4d17fd8fecd/greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1", size = 694659, upload-time = "2025-08-07T13:53:17.759Z" }, - { url = "https://files.pythonhosted.org/packages/92/2e/ea25914b1ebfde93b6fc4ff46d6864564fba59024e928bdc7de475affc25/greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735", size = 695355, upload-time = "2025-08-07T13:18:34.517Z" }, - { url = "https://files.pythonhosted.org/packages/72/60/fc56c62046ec17f6b0d3060564562c64c862948c9d4bc8aa807cf5bd74f4/greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337", size = 657512, upload-time = "2025-08-07T13:18:33.969Z" }, - { url = "https://files.pythonhosted.org/packages/e3/a5/6ddab2b4c112be95601c13428db1d8b6608a8b6039816f2ba09c346c08fc/greenlet-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:e37ab26028f12dbb0ff65f29a8d3d44a765c61e729647bf2ddfbbed621726f01", size = 303425, upload-time = "2025-08-07T13:32:27.59Z" }, ] [[package]] @@ -904,36 +972,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7e/b6/4bf9aacff45deca5eac5562547ed212556b831064da77971a4e632917da3/grpcio-1.75.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b10ad908118d38c2453ade7ff790e5bce36580c3742919007a2a78e3a1e521ca", size = 7503290, upload-time = "2025-09-26T09:01:49.28Z" }, { url = "https://files.pythonhosted.org/packages/3b/15/d8d69d10223cb54c887a2180bd29fe5fa2aec1d4995c8821f7aa6eaf72e4/grpcio-1.75.1-cp311-cp311-win32.whl", hash = "sha256:d6be2b5ee7bea656c954dcf6aa8093c6f0e6a3ef9945c99d99fcbfc88c5c0bfe", size = 3950631, upload-time = "2025-09-26T09:01:51.23Z" }, { url = "https://files.pythonhosted.org/packages/8a/40/7b8642d45fff6f83300c24eaac0380a840e5e7fe0e8d80afd31b99d7134e/grpcio-1.75.1-cp311-cp311-win_amd64.whl", hash = "sha256:61c692fb05956b17dd6d1ab480f7f10ad0536dba3bc8fd4e3c7263dc244ed772", size = 4646131, upload-time = "2025-09-26T09:01:53.266Z" }, - { url = "https://files.pythonhosted.org/packages/3a/81/42be79e73a50aaa20af66731c2defeb0e8c9008d9935a64dd8ea8e8c44eb/grpcio-1.75.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:7b888b33cd14085d86176b1628ad2fcbff94cfbbe7809465097aa0132e58b018", size = 5668314, upload-time = "2025-09-26T09:01:55.424Z" }, - { url = "https://files.pythonhosted.org/packages/c5/a7/3686ed15822fedc58c22f82b3a7403d9faf38d7c33de46d4de6f06e49426/grpcio-1.75.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:8775036efe4ad2085975531d221535329f5dac99b6c2a854a995456098f99546", size = 11476125, upload-time = "2025-09-26T09:01:57.927Z" }, - { url = "https://files.pythonhosted.org/packages/14/85/21c71d674f03345ab183c634ecd889d3330177e27baea8d5d247a89b6442/grpcio-1.75.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bb658f703468d7fbb5dcc4037c65391b7dc34f808ac46ed9136c24fc5eeb041d", size = 6246335, upload-time = "2025-09-26T09:02:00.76Z" }, - { url = "https://files.pythonhosted.org/packages/fd/db/3beb661bc56a385ae4fa6b0e70f6b91ac99d47afb726fe76aaff87ebb116/grpcio-1.75.1-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4b7177a1cdb3c51b02b0c0a256b0a72fdab719600a693e0e9037949efffb200b", size = 6916309, upload-time = "2025-09-26T09:02:02.894Z" }, - { url = "https://files.pythonhosted.org/packages/1e/9c/eda9fe57f2b84343d44c1b66cf3831c973ba29b078b16a27d4587a1fdd47/grpcio-1.75.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7d4fa6ccc3ec2e68a04f7b883d354d7fea22a34c44ce535a2f0c0049cf626ddf", size = 6435419, upload-time = "2025-09-26T09:02:05.055Z" }, - { url = "https://files.pythonhosted.org/packages/c3/b8/090c98983e0a9d602e3f919a6e2d4e470a8b489452905f9a0fa472cac059/grpcio-1.75.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3d86880ecaeb5b2f0a8afa63824de93adb8ebe4e49d0e51442532f4e08add7d6", size = 7064893, upload-time = "2025-09-26T09:02:07.275Z" }, - { url = "https://files.pythonhosted.org/packages/ec/c0/6d53d4dbbd00f8bd81571f5478d8a95528b716e0eddb4217cc7cb45aae5f/grpcio-1.75.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a8041d2f9e8a742aeae96f4b047ee44e73619f4f9d24565e84d5446c623673b6", size = 8011922, upload-time = "2025-09-26T09:02:09.527Z" }, - { url = "https://files.pythonhosted.org/packages/f2/7c/48455b2d0c5949678d6982c3e31ea4d89df4e16131b03f7d5c590811cbe9/grpcio-1.75.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3652516048bf4c314ce12be37423c79829f46efffb390ad64149a10c6071e8de", size = 7466181, upload-time = "2025-09-26T09:02:12.279Z" }, - { url = "https://files.pythonhosted.org/packages/fd/12/04a0e79081e3170b6124f8cba9b6275871276be06c156ef981033f691880/grpcio-1.75.1-cp312-cp312-win32.whl", hash = "sha256:44b62345d8403975513af88da2f3d5cc76f73ca538ba46596f92a127c2aea945", size = 3938543, upload-time = "2025-09-26T09:02:14.77Z" }, - { url = "https://files.pythonhosted.org/packages/5f/d7/11350d9d7fb5adc73d2b0ebf6ac1cc70135577701e607407fe6739a90021/grpcio-1.75.1-cp312-cp312-win_amd64.whl", hash = "sha256:b1e191c5c465fa777d4cafbaacf0c01e0d5278022082c0abbd2ee1d6454ed94d", size = 4641938, upload-time = "2025-09-26T09:02:16.927Z" }, - { url = "https://files.pythonhosted.org/packages/46/74/bac4ab9f7722164afdf263ae31ba97b8174c667153510322a5eba4194c32/grpcio-1.75.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:3bed22e750d91d53d9e31e0af35a7b0b51367e974e14a4ff229db5b207647884", size = 5672779, upload-time = "2025-09-26T09:02:19.11Z" }, - { url = "https://files.pythonhosted.org/packages/a6/52/d0483cfa667cddaa294e3ab88fd2c2a6e9dc1a1928c0e5911e2e54bd5b50/grpcio-1.75.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:5b8f381eadcd6ecaa143a21e9e80a26424c76a0a9b3d546febe6648f3a36a5ac", size = 11470623, upload-time = "2025-09-26T09:02:22.117Z" }, - { url = "https://files.pythonhosted.org/packages/cf/e4/d1954dce2972e32384db6a30273275e8c8ea5a44b80347f9055589333b3f/grpcio-1.75.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5bf4001d3293e3414d0cf99ff9b1139106e57c3a66dfff0c5f60b2a6286ec133", size = 6248838, upload-time = "2025-09-26T09:02:26.426Z" }, - { url = "https://files.pythonhosted.org/packages/06/43/073363bf63826ba8077c335d797a8d026f129dc0912b69c42feaf8f0cd26/grpcio-1.75.1-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:9f82ff474103e26351dacfe8d50214e7c9322960d8d07ba7fa1d05ff981c8b2d", size = 6922663, upload-time = "2025-09-26T09:02:28.724Z" }, - { url = "https://files.pythonhosted.org/packages/c2/6f/076ac0df6c359117676cacfa8a377e2abcecec6a6599a15a672d331f6680/grpcio-1.75.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0ee119f4f88d9f75414217823d21d75bfe0e6ed40135b0cbbfc6376bc9f7757d", size = 6436149, upload-time = "2025-09-26T09:02:30.971Z" }, - { url = "https://files.pythonhosted.org/packages/6b/27/1d08824f1d573fcb1fa35ede40d6020e68a04391709939e1c6f4193b445f/grpcio-1.75.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:664eecc3abe6d916fa6cf8dd6b778e62fb264a70f3430a3180995bf2da935446", size = 7067989, upload-time = "2025-09-26T09:02:33.233Z" }, - { url = "https://files.pythonhosted.org/packages/c6/98/98594cf97b8713feb06a8cb04eeef60b4757e3e2fb91aa0d9161da769843/grpcio-1.75.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:c32193fa08b2fbebf08fe08e84f8a0aad32d87c3ad42999c65e9449871b1c66e", size = 8010717, upload-time = "2025-09-26T09:02:36.011Z" }, - { url = "https://files.pythonhosted.org/packages/8c/7e/bb80b1bba03c12158f9254762cdf5cced4a9bc2e8ed51ed335915a5a06ef/grpcio-1.75.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5cebe13088b9254f6e615bcf1da9131d46cfa4e88039454aca9cb65f639bd3bc", size = 7463822, upload-time = "2025-09-26T09:02:38.26Z" }, - { url = "https://files.pythonhosted.org/packages/23/1c/1ea57fdc06927eb5640f6750c697f596f26183573069189eeaf6ef86ba2d/grpcio-1.75.1-cp313-cp313-win32.whl", hash = "sha256:4b4c678e7ed50f8ae8b8dbad15a865ee73ce12668b6aaf411bf3258b5bc3f970", size = 3938490, upload-time = "2025-09-26T09:02:40.268Z" }, - { url = "https://files.pythonhosted.org/packages/4b/24/fbb8ff1ccadfbf78ad2401c41aceaf02b0d782c084530d8871ddd69a2d49/grpcio-1.75.1-cp313-cp313-win_amd64.whl", hash = "sha256:5573f51e3f296a1bcf71e7a690c092845fb223072120f4bdb7a5b48e111def66", size = 4642538, upload-time = "2025-09-26T09:02:42.519Z" }, - { url = "https://files.pythonhosted.org/packages/f2/1b/9a0a5cecd24302b9fdbcd55d15ed6267e5f3d5b898ff9ac8cbe17ee76129/grpcio-1.75.1-cp314-cp314-linux_armv7l.whl", hash = "sha256:c05da79068dd96723793bffc8d0e64c45f316248417515f28d22204d9dae51c7", size = 5673319, upload-time = "2025-09-26T09:02:44.742Z" }, - { url = "https://files.pythonhosted.org/packages/c6/ec/9d6959429a83fbf5df8549c591a8a52bb313976f6646b79852c4884e3225/grpcio-1.75.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:06373a94fd16ec287116a825161dca179a0402d0c60674ceeec8c9fba344fe66", size = 11480347, upload-time = "2025-09-26T09:02:47.539Z" }, - { url = "https://files.pythonhosted.org/packages/09/7a/26da709e42c4565c3d7bf999a9569da96243ce34a8271a968dee810a7cf1/grpcio-1.75.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4484f4b7287bdaa7a5b3980f3c7224c3c622669405d20f69549f5fb956ad0421", size = 6254706, upload-time = "2025-09-26T09:02:50.4Z" }, - { url = "https://files.pythonhosted.org/packages/f1/08/dcb26a319d3725f199c97e671d904d84ee5680de57d74c566a991cfab632/grpcio-1.75.1-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:2720c239c1180eee69f7883c1d4c83fc1a495a2535b5fa322887c70bf02b16e8", size = 6922501, upload-time = "2025-09-26T09:02:52.711Z" }, - { url = "https://files.pythonhosted.org/packages/78/66/044d412c98408a5e23cb348845979a2d17a2e2b6c3c34c1ec91b920f49d0/grpcio-1.75.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:07a554fa31c668cf0e7a188678ceeca3cb8fead29bbe455352e712ec33ca701c", size = 6437492, upload-time = "2025-09-26T09:02:55.542Z" }, - { url = "https://files.pythonhosted.org/packages/4e/9d/5e3e362815152aa1afd8b26ea613effa005962f9da0eec6e0e4527e7a7d1/grpcio-1.75.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:3e71a2105210366bfc398eef7f57a664df99194f3520edb88b9c3a7e46ee0d64", size = 7081061, upload-time = "2025-09-26T09:02:58.261Z" }, - { url = "https://files.pythonhosted.org/packages/1e/1a/46615682a19e100f46e31ddba9ebc297c5a5ab9ddb47b35443ffadb8776c/grpcio-1.75.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:8679aa8a5b67976776d3c6b0521e99d1c34db8a312a12bcfd78a7085cb9b604e", size = 8010849, upload-time = "2025-09-26T09:03:00.548Z" }, - { url = "https://files.pythonhosted.org/packages/67/8e/3204b94ac30b0f675ab1c06540ab5578660dc8b690db71854d3116f20d00/grpcio-1.75.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:aad1c774f4ebf0696a7f148a56d39a3432550612597331792528895258966dc0", size = 7464478, upload-time = "2025-09-26T09:03:03.096Z" }, - { url = "https://files.pythonhosted.org/packages/b7/97/2d90652b213863b2cf466d9c1260ca7e7b67a16780431b3eb1d0420e3d5b/grpcio-1.75.1-cp314-cp314-win32.whl", hash = "sha256:62ce42d9994446b307649cb2a23335fa8e927f7ab2cbf5fcb844d6acb4d85f9c", size = 4012672, upload-time = "2025-09-26T09:03:05.477Z" }, - { url = "https://files.pythonhosted.org/packages/f9/df/e2e6e9fc1c985cd1a59e6996a05647c720fe8a03b92f5ec2d60d366c531e/grpcio-1.75.1-cp314-cp314-win_amd64.whl", hash = "sha256:f86e92275710bea3000cb79feca1762dc0ad3b27830dd1a74e82ab321d4ee464", size = 4772475, upload-time = "2025-09-26T09:03:07.661Z" }, ] [[package]] @@ -1029,6 +1067,71 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" }, ] +[[package]] +name = "jax" +version = "0.8.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jaxlib" }, + { name = "ml-dtypes" }, + { name = "numpy" }, + { name = "opt-einsum" }, + { name = "scipy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e6/25/5efb46e5492076622d9150ed394da97ef9aad393aa52f7dd7e980f836e1f/jax-0.8.2.tar.gz", hash = "sha256:1a685ded06a8223a7b52e45e668e406049dbbead02873f2b5a4d881ba7b421ae", size = 2505776, upload-time = "2025-12-18T18:41:59.274Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/f7/ae4ecf183d9693cd5fcce7ee063c5e54f173b66dc80a8a79951861e1b557/jax-0.8.2-py3-none-any.whl", hash = "sha256:d0478c5dc74406441efcd25731166a65ee782f13c352fa72dc7d734351909355", size = 2925344, upload-time = "2025-12-18T18:39:38.645Z" }, +] + +[[package]] +name = "jaxlib" +version = "0.8.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ml-dtypes" }, + { name = "numpy" }, + { name = "scipy" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/87/0a44b1a5c558e6d8e4fd796d4f9efe5c8cac2b3013ab7349968c65931fa4/jaxlib-0.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:490bf0cb029c73c65c9431124b86cdc95082dbc1fb76fc549d24d75da33e5454", size = 55929353, upload-time = "2025-12-18T18:40:35.844Z" }, + { url = "https://files.pythonhosted.org/packages/d1/d2/b37c86ee35d9ea7ee67c81e9166b31e18aa3784e1b96e8a60f52bbb8c9c0/jaxlib-0.8.2-cp311-cp311-manylinux_2_27_aarch64.whl", hash = "sha256:bb89be452b1b808d3f88fc01c415b364a260be4cc7ac120c038009f6150a32dc", size = 74548611, upload-time = "2025-12-18T18:40:39.67Z" }, + { url = "https://files.pythonhosted.org/packages/65/7d/9bb1cd620d8093098203b17d227a902939afec00da1c63cb719a9fe89525/jaxlib-0.8.2-cp311-cp311-manylinux_2_27_x86_64.whl", hash = "sha256:ccf77da917a20935247c990691decfcbdd06c25ef0ac94d914a04aadb22f714c", size = 80127195, upload-time = "2025-12-18T18:40:43.795Z" }, + { url = "https://files.pythonhosted.org/packages/e7/f1/56d830c7fcf1736cbfb11d8cf79c1932f826f319d2467becb02933df3ba9/jaxlib-0.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:dffc22b5b732b9556d92c918b251c61bcc046617c4dbb51e1f7a656587fddffb", size = 60338464, upload-time = "2025-12-18T18:40:47.427Z" }, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, +] + +[[package]] +name = "jmp" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ab/b0/e90fbbffef4b345329c878a69f0336d3edc5a1f9fcba193931aca2132d62/jmp-0.0.4.tar.gz", hash = "sha256:5dfeb0fd7c7a9f72a70fff0aab9d0cbfae32a809c02f4037ff3485ceb33e1730", size = 18582, upload-time = "2023-01-30T12:47:13.634Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/27/e5/cce82de2831e5aff9332d8d624bb57188f1b2af6ccf6979caf898a8a4348/jmp-0.0.4-py3-none-any.whl", hash = "sha256:6aa7adbddf2bd574b28c7faf6e81a735eb11f53386447896909c6968dc36807d", size = 18274, upload-time = "2023-01-30T12:47:11.931Z" }, +] + +[[package]] +name = "joblib" +version = "1.5.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603, upload-time = "2025-12-15T08:41:46.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" }, +] + [[package]] name = "jsonschema" version = "4.25.1" @@ -1084,36 +1187,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/69/84/83439e16197337b8b14b6a5b9c2105fff81d42c2a7c5b58ac7b62ee2c3b1/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4", size = 23306, upload-time = "2024-10-18T15:21:10.185Z" }, { url = "https://files.pythonhosted.org/packages/9a/34/a15aa69f01e2181ed8d2b685c0d2f6655d5cca2c4db0ddea775e631918cd/MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d", size = 15094, upload-time = "2024-10-18T15:21:11.005Z" }, { url = "https://files.pythonhosted.org/packages/da/b8/3a3bd761922d416f3dc5d00bfbed11f66b1ab89a0c2b6e887240a30b0f6b/MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b", size = 15521, upload-time = "2024-10-18T15:21:12.911Z" }, - { url = "https://files.pythonhosted.org/packages/22/09/d1f21434c97fc42f09d290cbb6350d44eb12f09cc62c9476effdb33a18aa/MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", size = 14274, upload-time = "2024-10-18T15:21:13.777Z" }, - { url = "https://files.pythonhosted.org/packages/6b/b0/18f76bba336fa5aecf79d45dcd6c806c280ec44538b3c13671d49099fdd0/MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", size = 12348, upload-time = "2024-10-18T15:21:14.822Z" }, - { url = "https://files.pythonhosted.org/packages/e0/25/dd5c0f6ac1311e9b40f4af06c78efde0f3b5cbf02502f8ef9501294c425b/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", size = 24149, upload-time = "2024-10-18T15:21:15.642Z" }, - { url = "https://files.pythonhosted.org/packages/f3/f0/89e7aadfb3749d0f52234a0c8c7867877876e0a20b60e2188e9850794c17/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8", size = 23118, upload-time = "2024-10-18T15:21:17.133Z" }, - { url = "https://files.pythonhosted.org/packages/d5/da/f2eeb64c723f5e3777bc081da884b414671982008c47dcc1873d81f625b6/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c", size = 22993, upload-time = "2024-10-18T15:21:18.064Z" }, - { url = "https://files.pythonhosted.org/packages/da/0e/1f32af846df486dce7c227fe0f2398dc7e2e51d4a370508281f3c1c5cddc/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557", size = 24178, upload-time = "2024-10-18T15:21:18.859Z" }, - { url = "https://files.pythonhosted.org/packages/c4/f6/bb3ca0532de8086cbff5f06d137064c8410d10779c4c127e0e47d17c0b71/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22", size = 23319, upload-time = "2024-10-18T15:21:19.671Z" }, - { url = "https://files.pythonhosted.org/packages/a2/82/8be4c96ffee03c5b4a034e60a31294daf481e12c7c43ab8e34a1453ee48b/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48", size = 23352, upload-time = "2024-10-18T15:21:20.971Z" }, - { url = "https://files.pythonhosted.org/packages/51/ae/97827349d3fcffee7e184bdf7f41cd6b88d9919c80f0263ba7acd1bbcb18/MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30", size = 15097, upload-time = "2024-10-18T15:21:22.646Z" }, - { url = "https://files.pythonhosted.org/packages/c1/80/a61f99dc3a936413c3ee4e1eecac96c0da5ed07ad56fd975f1a9da5bc630/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", size = 15601, upload-time = "2024-10-18T15:21:23.499Z" }, - { url = "https://files.pythonhosted.org/packages/83/0e/67eb10a7ecc77a0c2bbe2b0235765b98d164d81600746914bebada795e97/MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd", size = 14274, upload-time = "2024-10-18T15:21:24.577Z" }, - { url = "https://files.pythonhosted.org/packages/2b/6d/9409f3684d3335375d04e5f05744dfe7e9f120062c9857df4ab490a1031a/MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430", size = 12352, upload-time = "2024-10-18T15:21:25.382Z" }, - { url = "https://files.pythonhosted.org/packages/d2/f5/6eadfcd3885ea85fe2a7c128315cc1bb7241e1987443d78c8fe712d03091/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094", size = 24122, upload-time = "2024-10-18T15:21:26.199Z" }, - { url = "https://files.pythonhosted.org/packages/0c/91/96cf928db8236f1bfab6ce15ad070dfdd02ed88261c2afafd4b43575e9e9/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396", size = 23085, upload-time = "2024-10-18T15:21:27.029Z" }, - { url = "https://files.pythonhosted.org/packages/c2/cf/c9d56af24d56ea04daae7ac0940232d31d5a8354f2b457c6d856b2057d69/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79", size = 22978, upload-time = "2024-10-18T15:21:27.846Z" }, - { url = "https://files.pythonhosted.org/packages/2a/9f/8619835cd6a711d6272d62abb78c033bda638fdc54c4e7f4272cf1c0962b/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a", size = 24208, upload-time = "2024-10-18T15:21:28.744Z" }, - { url = "https://files.pythonhosted.org/packages/f9/bf/176950a1792b2cd2102b8ffeb5133e1ed984547b75db47c25a67d3359f77/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca", size = 23357, upload-time = "2024-10-18T15:21:29.545Z" }, - { url = "https://files.pythonhosted.org/packages/ce/4f/9a02c1d335caabe5c4efb90e1b6e8ee944aa245c1aaaab8e8a618987d816/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c", size = 23344, upload-time = "2024-10-18T15:21:30.366Z" }, - { url = "https://files.pythonhosted.org/packages/ee/55/c271b57db36f748f0e04a759ace9f8f759ccf22b4960c270c78a394f58be/MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1", size = 15101, upload-time = "2024-10-18T15:21:31.207Z" }, - { url = "https://files.pythonhosted.org/packages/29/88/07df22d2dd4df40aba9f3e402e6dc1b8ee86297dddbad4872bd5e7b0094f/MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f", size = 15603, upload-time = "2024-10-18T15:21:32.032Z" }, - { url = "https://files.pythonhosted.org/packages/62/6a/8b89d24db2d32d433dffcd6a8779159da109842434f1dd2f6e71f32f738c/MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c", size = 14510, upload-time = "2024-10-18T15:21:33.625Z" }, - { url = "https://files.pythonhosted.org/packages/7a/06/a10f955f70a2e5a9bf78d11a161029d278eeacbd35ef806c3fd17b13060d/MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb", size = 12486, upload-time = "2024-10-18T15:21:34.611Z" }, - { url = "https://files.pythonhosted.org/packages/34/cf/65d4a571869a1a9078198ca28f39fba5fbb910f952f9dbc5220afff9f5e6/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c", size = 25480, upload-time = "2024-10-18T15:21:35.398Z" }, - { url = "https://files.pythonhosted.org/packages/0c/e3/90e9651924c430b885468b56b3d597cabf6d72be4b24a0acd1fa0e12af67/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d", size = 23914, upload-time = "2024-10-18T15:21:36.231Z" }, - { url = "https://files.pythonhosted.org/packages/66/8c/6c7cf61f95d63bb866db39085150df1f2a5bd3335298f14a66b48e92659c/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe", size = 23796, upload-time = "2024-10-18T15:21:37.073Z" }, - { url = "https://files.pythonhosted.org/packages/bb/35/cbe9238ec3f47ac9a7c8b3df7a808e7cb50fe149dc7039f5f454b3fba218/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5", size = 25473, upload-time = "2024-10-18T15:21:37.932Z" }, - { url = "https://files.pythonhosted.org/packages/e6/32/7621a4382488aa283cc05e8984a9c219abad3bca087be9ec77e89939ded9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a", size = 24114, upload-time = "2024-10-18T15:21:39.799Z" }, - { url = "https://files.pythonhosted.org/packages/0d/80/0985960e4b89922cb5a0bac0ed39c5b96cbc1a536a99f30e8c220a996ed9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9", size = 24098, upload-time = "2024-10-18T15:21:40.813Z" }, - { url = "https://files.pythonhosted.org/packages/82/78/fedb03c7d5380df2427038ec8d973587e90561b2d90cd472ce9254cf348b/MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6", size = 15208, upload-time = "2024-10-18T15:21:41.814Z" }, - { url = "https://files.pythonhosted.org/packages/4f/65/6079a46068dfceaeabb5dcad6d674f5f5c61a6fa5673746f42a9f4c233b3/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", size = 15739, upload-time = "2024-10-18T15:21:42.784Z" }, ] [[package]] @@ -1138,6 +1211,44 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/7b/84b0dd4c2c5a499d2c5d63fb7a1224c25fc4c8b6c24623fa7a566471480d/mcp-1.14.0-py3-none-any.whl", hash = "sha256:b2d27feba27b4c53d41b58aa7f4d090ae0cb740cbc4e339af10f8cbe54c4e19d", size = 163805, upload-time = "2025-09-11T17:40:46.891Z" }, ] +[[package]] +name = "ml-collections" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "absl-py" }, + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b8/f8/1a9ae6696dbb6bc9c44ddf5c5e84710d77fe9a35a57e8a06722e1836a4a6/ml_collections-1.1.0.tar.gz", hash = "sha256:0ac1ac6511b9f1566863e0bb0afad0c64e906ea278ad3f4d2144a55322671f6f", size = 61356, upload-time = "2025-04-17T08:25:02.247Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ab/8a/18d4ff2c7bd83f30d6924bd4ad97abf418488c3f908dea228d6f0961ad68/ml_collections-1.1.0-py3-none-any.whl", hash = "sha256:23b6fa4772aac1ae745a96044b925a5746145a70734f087eaca6626e92c05cbc", size = 76707, upload-time = "2025-04-17T08:24:59.038Z" }, +] + +[[package]] +name = "ml-dtypes" +version = "0.5.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0e/4a/c27b42ed9b1c7d13d9ba8b6905dece787d6259152f2309338aed29b2447b/ml_dtypes-0.5.4.tar.gz", hash = "sha256:8ab06a50fb9bf9666dd0fe5dfb4676fa2b0ac0f31ecff72a6c3af8e22c063453", size = 692314, upload-time = "2025-11-17T22:32:31.031Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/5e/712092cfe7e5eb667b8ad9ca7c54442f21ed7ca8979745f1000e24cf8737/ml_dtypes-0.5.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6c7ecb74c4bd71db68a6bea1edf8da8c34f3d9fe218f038814fd1d310ac76c90", size = 679734, upload-time = "2025-11-17T22:31:39.223Z" }, + { url = "https://files.pythonhosted.org/packages/4f/cf/912146dfd4b5c0eea956836c01dcd2fce6c9c844b2691f5152aca196ce4f/ml_dtypes-0.5.4-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc11d7e8c44a65115d05e2ab9989d1e045125d7be8e05a071a48bc76eb6d6040", size = 5056165, upload-time = "2025-11-17T22:31:41.071Z" }, + { url = "https://files.pythonhosted.org/packages/a9/80/19189ea605017473660e43762dc853d2797984b3c7bf30ce656099add30c/ml_dtypes-0.5.4-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:19b9a53598f21e453ea2fbda8aa783c20faff8e1eeb0d7ab899309a0053f1483", size = 5034975, upload-time = "2025-11-17T22:31:42.758Z" }, + { url = "https://files.pythonhosted.org/packages/b4/24/70bd59276883fdd91600ca20040b41efd4902a923283c4d6edcb1de128d2/ml_dtypes-0.5.4-cp311-cp311-win_amd64.whl", hash = "sha256:7c23c54a00ae43edf48d44066a7ec31e05fdc2eee0be2b8b50dd1903a1db94bb", size = 210742, upload-time = "2025-11-17T22:31:44.068Z" }, + { url = "https://files.pythonhosted.org/packages/a0/c9/64230ef14e40aa3f1cb254ef623bf812735e6bec7772848d19131111ac0d/ml_dtypes-0.5.4-cp311-cp311-win_arm64.whl", hash = "sha256:557a31a390b7e9439056644cb80ed0735a6e3e3bb09d67fd5687e4b04238d1de", size = 160709, upload-time = "2025-11-17T22:31:46.557Z" }, +] + +[[package]] +name = "mpmath" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload-time = "2023-03-07T16:47:11.061Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, +] + [[package]] name = "mypy" version = "1.18.1" @@ -1155,24 +1266,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/71/e8/7a20407aafb488acb5734ad7fb5e8c2ef78d292ca2674335350fa8ebef67/mypy-1.18.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:82ace21edf7ba8af31c3308a61dc72df30500f4dbb26f99ac36b4b80809d7e94", size = 13164555, upload-time = "2025-09-11T23:00:13.803Z" }, { url = "https://files.pythonhosted.org/packages/e8/c9/5f39065252e033b60f397096f538fb57c1d9fd70a7a490f314df20dd9d64/mypy-1.18.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a2dfd53dfe632f1ef5d161150a4b1f2d0786746ae02950eb3ac108964ee2975a", size = 13359222, upload-time = "2025-09-11T23:00:33.469Z" }, { url = "https://files.pythonhosted.org/packages/85/b6/d54111ef3c1e55992cd2ec9b8b6ce9c72a407423e93132cae209f7e7ba60/mypy-1.18.1-cp311-cp311-win_amd64.whl", hash = "sha256:320f0ad4205eefcb0e1a72428dde0ad10be73da9f92e793c36228e8ebf7298c0", size = 9760441, upload-time = "2025-09-11T23:00:44.826Z" }, - { url = "https://files.pythonhosted.org/packages/e7/14/1c3f54d606cb88a55d1567153ef3a8bc7b74702f2ff5eb64d0994f9e49cb/mypy-1.18.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:502cde8896be8e638588b90fdcb4c5d5b8c1b004dfc63fd5604a973547367bb9", size = 12911082, upload-time = "2025-09-11T23:00:41.465Z" }, - { url = "https://files.pythonhosted.org/packages/90/83/235606c8b6d50a8eba99773add907ce1d41c068edb523f81eb0d01603a83/mypy-1.18.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7509549b5e41be279afc1228242d0e397f1af2919a8f2877ad542b199dc4083e", size = 11919107, upload-time = "2025-09-11T22:58:40.903Z" }, - { url = "https://files.pythonhosted.org/packages/ca/25/4e2ce00f8d15b99d0c68a2536ad63e9eac033f723439ef80290ec32c1ff5/mypy-1.18.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5956ecaabb3a245e3f34100172abca1507be687377fe20e24d6a7557e07080e2", size = 12472551, upload-time = "2025-09-11T22:58:37.272Z" }, - { url = "https://files.pythonhosted.org/packages/32/bb/92642a9350fc339dd9dcefcf6862d171b52294af107d521dce075f32f298/mypy-1.18.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8750ceb014a96c9890421c83f0db53b0f3b8633e2864c6f9bc0a8e93951ed18d", size = 13340554, upload-time = "2025-09-11T22:59:38.756Z" }, - { url = "https://files.pythonhosted.org/packages/cd/ee/38d01db91c198fb6350025d28f9719ecf3c8f2c55a0094bfbf3ef478cc9a/mypy-1.18.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fb89ea08ff41adf59476b235293679a6eb53a7b9400f6256272fb6029bec3ce5", size = 13530933, upload-time = "2025-09-11T22:59:20.228Z" }, - { url = "https://files.pythonhosted.org/packages/da/8d/6d991ae631f80d58edbf9d7066e3f2a96e479dca955d9a968cd6e90850a3/mypy-1.18.1-cp312-cp312-win_amd64.whl", hash = "sha256:2657654d82fcd2a87e02a33e0d23001789a554059bbf34702d623dafe353eabf", size = 9828426, upload-time = "2025-09-11T23:00:21.007Z" }, - { url = "https://files.pythonhosted.org/packages/e4/ec/ef4a7260e1460a3071628a9277a7579e7da1b071bc134ebe909323f2fbc7/mypy-1.18.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d70d2b5baf9b9a20bc9c730015615ae3243ef47fb4a58ad7b31c3e0a59b5ef1f", size = 12918671, upload-time = "2025-09-11T22:58:29.814Z" }, - { url = "https://files.pythonhosted.org/packages/a1/82/0ea6c3953f16223f0b8eda40c1aeac6bd266d15f4902556ae6e91f6fca4c/mypy-1.18.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b8367e33506300f07a43012fc546402f283c3f8bcff1dc338636affb710154ce", size = 11913023, upload-time = "2025-09-11T23:00:29.049Z" }, - { url = "https://files.pythonhosted.org/packages/ae/ef/5e2057e692c2690fc27b3ed0a4dbde4388330c32e2576a23f0302bc8358d/mypy-1.18.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:913f668ec50c3337b89df22f973c1c8f0b29ee9e290a8b7fe01cc1ef7446d42e", size = 12473355, upload-time = "2025-09-11T23:00:04.544Z" }, - { url = "https://files.pythonhosted.org/packages/98/43/b7e429fc4be10e390a167b0cd1810d41cb4e4add4ae50bab96faff695a3b/mypy-1.18.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a0e70b87eb27b33209fa4792b051c6947976f6ab829daa83819df5f58330c71", size = 13346944, upload-time = "2025-09-11T22:58:23.024Z" }, - { url = "https://files.pythonhosted.org/packages/89/4e/899dba0bfe36bbd5b7c52e597de4cf47b5053d337b6d201a30e3798e77a6/mypy-1.18.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c378d946e8a60be6b6ede48c878d145546fb42aad61df998c056ec151bf6c746", size = 13512574, upload-time = "2025-09-11T22:59:52.152Z" }, - { url = "https://files.pythonhosted.org/packages/f5/f8/7661021a5b0e501b76440454d786b0f01bb05d5c4b125fcbda02023d0250/mypy-1.18.1-cp313-cp313-win_amd64.whl", hash = "sha256:2cd2c1e0f3a7465f22731987fff6fc427e3dcbb4ca5f7db5bbeaff2ff9a31f6d", size = 9837684, upload-time = "2025-09-11T22:58:44.454Z" }, - { url = "https://files.pythonhosted.org/packages/bf/87/7b173981466219eccc64c107cf8e5ab9eb39cc304b4c07df8e7881533e4f/mypy-1.18.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ba24603c58e34dd5b096dfad792d87b304fc6470cbb1c22fd64e7ebd17edcc61", size = 12900265, upload-time = "2025-09-11T22:59:03.4Z" }, - { url = "https://files.pythonhosted.org/packages/ae/cc/b10e65bae75b18a5ac8f81b1e8e5867677e418f0dd2c83b8e2de9ba96ebd/mypy-1.18.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ed36662fb92ae4cb3cacc682ec6656208f323bbc23d4b08d091eecfc0863d4b5", size = 11942890, upload-time = "2025-09-11T23:00:00.607Z" }, - { url = "https://files.pythonhosted.org/packages/39/d4/aeefa07c44d09f4c2102e525e2031bc066d12e5351f66b8a83719671004d/mypy-1.18.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:040ecc95e026f71a9ad7956fea2724466602b561e6a25c2e5584160d3833aaa8", size = 12472291, upload-time = "2025-09-11T22:59:43.425Z" }, - { url = "https://files.pythonhosted.org/packages/c6/07/711e78668ff8e365f8c19735594ea95938bff3639a4c46a905e3ed8ff2d6/mypy-1.18.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:937e3ed86cb731276706e46e03512547e43c391a13f363e08d0fee49a7c38a0d", size = 13318610, upload-time = "2025-09-11T23:00:17.604Z" }, - { url = "https://files.pythonhosted.org/packages/ca/85/df3b2d39339c31d360ce299b418c55e8194ef3205284739b64962f6074e7/mypy-1.18.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:1f95cc4f01c0f1701ca3b0355792bccec13ecb2ec1c469e5b85a6ef398398b1d", size = 13513697, upload-time = "2025-09-11T22:58:59.534Z" }, - { url = "https://files.pythonhosted.org/packages/b1/df/462866163c99ea73bb28f0eb4d415c087e30de5d36ee0f5429d42e28689b/mypy-1.18.1-cp314-cp314-win_amd64.whl", hash = "sha256:e4f16c0019d48941220ac60b893615be2f63afedaba6a0801bdcd041b96991ce", size = 9985739, upload-time = "2025-09-11T22:58:51.644Z" }, { url = "https://files.pythonhosted.org/packages/e0/1d/4b97d3089b48ef3d904c9ca69fab044475bd03245d878f5f0b3ea1daf7ce/mypy-1.18.1-py3-none-any.whl", hash = "sha256:b76a4de66a0ac01da1be14ecc8ae88ddea33b8380284a9e3eae39d57ebcbe26e", size = 2352212, upload-time = "2025-09-11T22:59:26.576Z" }, ] @@ -1185,6 +1278,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, ] +[[package]] +name = "networkx" +version = "3.6.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509", size = 2517025, upload-time = "2025-12-08T17:02:39.908Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload-time = "2025-12-08T17:02:38.159Z" }, +] + [[package]] name = "numpy" version = "2.3.3" @@ -1202,61 +1304,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/45/fa/7f43ba10c77575e8be7b0138d107e4f44ca4a1ef322cd16980ea3e8b8222/numpy-2.3.3-cp311-cp311-win32.whl", hash = "sha256:eb63d443d7b4ffd1e873f8155260d7f58e7e4b095961b01c91062935c2491e57", size = 6599794, upload-time = "2025-09-09T15:56:23.258Z" }, { url = "https://files.pythonhosted.org/packages/0a/a2/a4f78cb2241fe5664a22a10332f2be886dcdea8784c9f6a01c272da9b426/numpy-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:ec9d249840f6a565f58d8f913bccac2444235025bbb13e9a4681783572ee3caa", size = 13088104, upload-time = "2025-09-09T15:56:25.476Z" }, { url = "https://files.pythonhosted.org/packages/79/64/e424e975adbd38282ebcd4891661965b78783de893b381cbc4832fb9beb2/numpy-2.3.3-cp311-cp311-win_arm64.whl", hash = "sha256:74c2a948d02f88c11a3c075d9733f1ae67d97c6bdb97f2bb542f980458b257e7", size = 10460772, upload-time = "2025-09-09T15:56:27.679Z" }, - { url = "https://files.pythonhosted.org/packages/51/5d/bb7fc075b762c96329147799e1bcc9176ab07ca6375ea976c475482ad5b3/numpy-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cfdd09f9c84a1a934cde1eec2267f0a43a7cd44b2cca4ff95b7c0d14d144b0bf", size = 20957014, upload-time = "2025-09-09T15:56:29.966Z" }, - { url = "https://files.pythonhosted.org/packages/6b/0e/c6211bb92af26517acd52125a237a92afe9c3124c6a68d3b9f81b62a0568/numpy-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cb32e3cf0f762aee47ad1ddc6672988f7f27045b0783c887190545baba73aa25", size = 14185220, upload-time = "2025-09-09T15:56:32.175Z" }, - { url = "https://files.pythonhosted.org/packages/22/f2/07bb754eb2ede9073f4054f7c0286b0d9d2e23982e090a80d478b26d35ca/numpy-2.3.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:396b254daeb0a57b1fe0ecb5e3cff6fa79a380fa97c8f7781a6d08cd429418fe", size = 5113918, upload-time = "2025-09-09T15:56:34.175Z" }, - { url = "https://files.pythonhosted.org/packages/81/0a/afa51697e9fb74642f231ea36aca80fa17c8fb89f7a82abd5174023c3960/numpy-2.3.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:067e3d7159a5d8f8a0b46ee11148fc35ca9b21f61e3c49fbd0a027450e65a33b", size = 6647922, upload-time = "2025-09-09T15:56:36.149Z" }, - { url = "https://files.pythonhosted.org/packages/5d/f5/122d9cdb3f51c520d150fef6e87df9279e33d19a9611a87c0d2cf78a89f4/numpy-2.3.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c02d0629d25d426585fb2e45a66154081b9fa677bc92a881ff1d216bc9919a8", size = 14281991, upload-time = "2025-09-09T15:56:40.548Z" }, - { url = "https://files.pythonhosted.org/packages/51/64/7de3c91e821a2debf77c92962ea3fe6ac2bc45d0778c1cbe15d4fce2fd94/numpy-2.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9192da52b9745f7f0766531dcfa978b7763916f158bb63bdb8a1eca0068ab20", size = 16641643, upload-time = "2025-09-09T15:56:43.343Z" }, - { url = "https://files.pythonhosted.org/packages/30/e4/961a5fa681502cd0d68907818b69f67542695b74e3ceaa513918103b7e80/numpy-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cd7de500a5b66319db419dc3c345244404a164beae0d0937283b907d8152e6ea", size = 16056787, upload-time = "2025-09-09T15:56:46.141Z" }, - { url = "https://files.pythonhosted.org/packages/99/26/92c912b966e47fbbdf2ad556cb17e3a3088e2e1292b9833be1dfa5361a1a/numpy-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:93d4962d8f82af58f0b2eb85daaf1b3ca23fe0a85d0be8f1f2b7bb46034e56d7", size = 18579598, upload-time = "2025-09-09T15:56:49.844Z" }, - { url = "https://files.pythonhosted.org/packages/17/b6/fc8f82cb3520768718834f310c37d96380d9dc61bfdaf05fe5c0b7653e01/numpy-2.3.3-cp312-cp312-win32.whl", hash = "sha256:5534ed6b92f9b7dca6c0a19d6df12d41c68b991cef051d108f6dbff3babc4ebf", size = 6320800, upload-time = "2025-09-09T15:56:52.499Z" }, - { url = "https://files.pythonhosted.org/packages/32/ee/de999f2625b80d043d6d2d628c07d0d5555a677a3cf78fdf868d409b8766/numpy-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:497d7cad08e7092dba36e3d296fe4c97708c93daf26643a1ae4b03f6294d30eb", size = 12786615, upload-time = "2025-09-09T15:56:54.422Z" }, - { url = "https://files.pythonhosted.org/packages/49/6e/b479032f8a43559c383acb20816644f5f91c88f633d9271ee84f3b3a996c/numpy-2.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:ca0309a18d4dfea6fc6262a66d06c26cfe4640c3926ceec90e57791a82b6eee5", size = 10195936, upload-time = "2025-09-09T15:56:56.541Z" }, - { url = "https://files.pythonhosted.org/packages/7d/b9/984c2b1ee61a8b803bf63582b4ac4242cf76e2dbd663efeafcb620cc0ccb/numpy-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f5415fb78995644253370985342cd03572ef8620b934da27d77377a2285955bf", size = 20949588, upload-time = "2025-09-09T15:56:59.087Z" }, - { url = "https://files.pythonhosted.org/packages/a6/e4/07970e3bed0b1384d22af1e9912527ecbeb47d3b26e9b6a3bced068b3bea/numpy-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d00de139a3324e26ed5b95870ce63be7ec7352171bc69a4cf1f157a48e3eb6b7", size = 14177802, upload-time = "2025-09-09T15:57:01.73Z" }, - { url = "https://files.pythonhosted.org/packages/35/c7/477a83887f9de61f1203bad89cf208b7c19cc9fef0cebef65d5a1a0619f2/numpy-2.3.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:9dc13c6a5829610cc07422bc74d3ac083bd8323f14e2827d992f9e52e22cd6a6", size = 5106537, upload-time = "2025-09-09T15:57:03.765Z" }, - { url = "https://files.pythonhosted.org/packages/52/47/93b953bd5866a6f6986344d045a207d3f1cfbad99db29f534ea9cee5108c/numpy-2.3.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:d79715d95f1894771eb4e60fb23f065663b2298f7d22945d66877aadf33d00c7", size = 6640743, upload-time = "2025-09-09T15:57:07.921Z" }, - { url = "https://files.pythonhosted.org/packages/23/83/377f84aaeb800b64c0ef4de58b08769e782edcefa4fea712910b6f0afd3c/numpy-2.3.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:952cfd0748514ea7c3afc729a0fc639e61655ce4c55ab9acfab14bda4f402b4c", size = 14278881, upload-time = "2025-09-09T15:57:11.349Z" }, - { url = "https://files.pythonhosted.org/packages/9a/a5/bf3db6e66c4b160d6ea10b534c381a1955dfab34cb1017ea93aa33c70ed3/numpy-2.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5b83648633d46f77039c29078751f80da65aa64d5622a3cd62aaef9d835b6c93", size = 16636301, upload-time = "2025-09-09T15:57:14.245Z" }, - { url = "https://files.pythonhosted.org/packages/a2/59/1287924242eb4fa3f9b3a2c30400f2e17eb2707020d1c5e3086fe7330717/numpy-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b001bae8cea1c7dfdb2ae2b017ed0a6f2102d7a70059df1e338e307a4c78a8ae", size = 16053645, upload-time = "2025-09-09T15:57:16.534Z" }, - { url = "https://files.pythonhosted.org/packages/e6/93/b3d47ed882027c35e94ac2320c37e452a549f582a5e801f2d34b56973c97/numpy-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8e9aced64054739037d42fb84c54dd38b81ee238816c948c8f3ed134665dcd86", size = 18578179, upload-time = "2025-09-09T15:57:18.883Z" }, - { url = "https://files.pythonhosted.org/packages/20/d9/487a2bccbf7cc9d4bfc5f0f197761a5ef27ba870f1e3bbb9afc4bbe3fcc2/numpy-2.3.3-cp313-cp313-win32.whl", hash = "sha256:9591e1221db3f37751e6442850429b3aabf7026d3b05542d102944ca7f00c8a8", size = 6312250, upload-time = "2025-09-09T15:57:21.296Z" }, - { url = "https://files.pythonhosted.org/packages/1b/b5/263ebbbbcede85028f30047eab3d58028d7ebe389d6493fc95ae66c636ab/numpy-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f0dadeb302887f07431910f67a14d57209ed91130be0adea2f9793f1a4f817cf", size = 12783269, upload-time = "2025-09-09T15:57:23.034Z" }, - { url = "https://files.pythonhosted.org/packages/fa/75/67b8ca554bbeaaeb3fac2e8bce46967a5a06544c9108ec0cf5cece559b6c/numpy-2.3.3-cp313-cp313-win_arm64.whl", hash = "sha256:3c7cf302ac6e0b76a64c4aecf1a09e51abd9b01fc7feee80f6c43e3ab1b1dbc5", size = 10195314, upload-time = "2025-09-09T15:57:25.045Z" }, - { url = "https://files.pythonhosted.org/packages/11/d0/0d1ddec56b162042ddfafeeb293bac672de9b0cfd688383590090963720a/numpy-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:eda59e44957d272846bb407aad19f89dc6f58fecf3504bd144f4c5cf81a7eacc", size = 21048025, upload-time = "2025-09-09T15:57:27.257Z" }, - { url = "https://files.pythonhosted.org/packages/36/9e/1996ca6b6d00415b6acbdd3c42f7f03ea256e2c3f158f80bd7436a8a19f3/numpy-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:823d04112bc85ef5c4fda73ba24e6096c8f869931405a80aa8b0e604510a26bc", size = 14301053, upload-time = "2025-09-09T15:57:30.077Z" }, - { url = "https://files.pythonhosted.org/packages/05/24/43da09aa764c68694b76e84b3d3f0c44cb7c18cdc1ba80e48b0ac1d2cd39/numpy-2.3.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:40051003e03db4041aa325da2a0971ba41cf65714e65d296397cc0e32de6018b", size = 5229444, upload-time = "2025-09-09T15:57:32.733Z" }, - { url = "https://files.pythonhosted.org/packages/bc/14/50ffb0f22f7218ef8af28dd089f79f68289a7a05a208db9a2c5dcbe123c1/numpy-2.3.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:6ee9086235dd6ab7ae75aba5662f582a81ced49f0f1c6de4260a78d8f2d91a19", size = 6738039, upload-time = "2025-09-09T15:57:34.328Z" }, - { url = "https://files.pythonhosted.org/packages/55/52/af46ac0795e09657d45a7f4db961917314377edecf66db0e39fa7ab5c3d3/numpy-2.3.3-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94fcaa68757c3e2e668ddadeaa86ab05499a70725811e582b6a9858dd472fb30", size = 14352314, upload-time = "2025-09-09T15:57:36.255Z" }, - { url = "https://files.pythonhosted.org/packages/a7/b1/dc226b4c90eb9f07a3fff95c2f0db3268e2e54e5cce97c4ac91518aee71b/numpy-2.3.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da1a74b90e7483d6ce5244053399a614b1d6b7bc30a60d2f570e5071f8959d3e", size = 16701722, upload-time = "2025-09-09T15:57:38.622Z" }, - { url = "https://files.pythonhosted.org/packages/9d/9d/9d8d358f2eb5eced14dba99f110d83b5cd9a4460895230f3b396ad19a323/numpy-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2990adf06d1ecee3b3dcbb4977dfab6e9f09807598d647f04d385d29e7a3c3d3", size = 16132755, upload-time = "2025-09-09T15:57:41.16Z" }, - { url = "https://files.pythonhosted.org/packages/b6/27/b3922660c45513f9377b3fb42240bec63f203c71416093476ec9aa0719dc/numpy-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ed635ff692483b8e3f0fcaa8e7eb8a75ee71aa6d975388224f70821421800cea", size = 18651560, upload-time = "2025-09-09T15:57:43.459Z" }, - { url = "https://files.pythonhosted.org/packages/5b/8e/3ab61a730bdbbc201bb245a71102aa609f0008b9ed15255500a99cd7f780/numpy-2.3.3-cp313-cp313t-win32.whl", hash = "sha256:a333b4ed33d8dc2b373cc955ca57babc00cd6f9009991d9edc5ddbc1bac36bcd", size = 6442776, upload-time = "2025-09-09T15:57:45.793Z" }, - { url = "https://files.pythonhosted.org/packages/1c/3a/e22b766b11f6030dc2decdeff5c2fb1610768055603f9f3be88b6d192fb2/numpy-2.3.3-cp313-cp313t-win_amd64.whl", hash = "sha256:4384a169c4d8f97195980815d6fcad04933a7e1ab3b530921c3fef7a1c63426d", size = 12927281, upload-time = "2025-09-09T15:57:47.492Z" }, - { url = "https://files.pythonhosted.org/packages/7b/42/c2e2bc48c5e9b2a83423f99733950fbefd86f165b468a3d85d52b30bf782/numpy-2.3.3-cp313-cp313t-win_arm64.whl", hash = "sha256:75370986cc0bc66f4ce5110ad35aae6d182cc4ce6433c40ad151f53690130bf1", size = 10265275, upload-time = "2025-09-09T15:57:49.647Z" }, - { url = "https://files.pythonhosted.org/packages/6b/01/342ad585ad82419b99bcf7cebe99e61da6bedb89e213c5fd71acc467faee/numpy-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cd052f1fa6a78dee696b58a914b7229ecfa41f0a6d96dc663c1220a55e137593", size = 20951527, upload-time = "2025-09-09T15:57:52.006Z" }, - { url = "https://files.pythonhosted.org/packages/ef/d8/204e0d73fc1b7a9ee80ab1fe1983dd33a4d64a4e30a05364b0208e9a241a/numpy-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:414a97499480067d305fcac9716c29cf4d0d76db6ebf0bf3cbce666677f12652", size = 14186159, upload-time = "2025-09-09T15:57:54.407Z" }, - { url = "https://files.pythonhosted.org/packages/22/af/f11c916d08f3a18fb8ba81ab72b5b74a6e42ead4c2846d270eb19845bf74/numpy-2.3.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:50a5fe69f135f88a2be9b6ca0481a68a136f6febe1916e4920e12f1a34e708a7", size = 5114624, upload-time = "2025-09-09T15:57:56.5Z" }, - { url = "https://files.pythonhosted.org/packages/fb/11/0ed919c8381ac9d2ffacd63fd1f0c34d27e99cab650f0eb6f110e6ae4858/numpy-2.3.3-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:b912f2ed2b67a129e6a601e9d93d4fa37bef67e54cac442a2f588a54afe5c67a", size = 6642627, upload-time = "2025-09-09T15:57:58.206Z" }, - { url = "https://files.pythonhosted.org/packages/ee/83/deb5f77cb0f7ba6cb52b91ed388b47f8f3c2e9930d4665c600408d9b90b9/numpy-2.3.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9e318ee0596d76d4cb3d78535dc005fa60e5ea348cd131a51e99d0bdbe0b54fe", size = 14296926, upload-time = "2025-09-09T15:58:00.035Z" }, - { url = "https://files.pythonhosted.org/packages/77/cc/70e59dcb84f2b005d4f306310ff0a892518cc0c8000a33d0e6faf7ca8d80/numpy-2.3.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce020080e4a52426202bdb6f7691c65bb55e49f261f31a8f506c9f6bc7450421", size = 16638958, upload-time = "2025-09-09T15:58:02.738Z" }, - { url = "https://files.pythonhosted.org/packages/b6/5a/b2ab6c18b4257e099587d5b7f903317bd7115333ad8d4ec4874278eafa61/numpy-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e6687dc183aa55dae4a705b35f9c0f8cb178bcaa2f029b241ac5356221d5c021", size = 16071920, upload-time = "2025-09-09T15:58:05.029Z" }, - { url = "https://files.pythonhosted.org/packages/b8/f1/8b3fdc44324a259298520dd82147ff648979bed085feeacc1250ef1656c0/numpy-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d8f3b1080782469fdc1718c4ed1d22549b5fb12af0d57d35e992158a772a37cf", size = 18577076, upload-time = "2025-09-09T15:58:07.745Z" }, - { url = "https://files.pythonhosted.org/packages/f0/a1/b87a284fb15a42e9274e7fcea0dad259d12ddbf07c1595b26883151ca3b4/numpy-2.3.3-cp314-cp314-win32.whl", hash = "sha256:cb248499b0bc3be66ebd6578b83e5acacf1d6cb2a77f2248ce0e40fbec5a76d0", size = 6366952, upload-time = "2025-09-09T15:58:10.096Z" }, - { url = "https://files.pythonhosted.org/packages/70/5f/1816f4d08f3b8f66576d8433a66f8fa35a5acfb3bbd0bf6c31183b003f3d/numpy-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:691808c2b26b0f002a032c73255d0bd89751425f379f7bcd22d140db593a96e8", size = 12919322, upload-time = "2025-09-09T15:58:12.138Z" }, - { url = "https://files.pythonhosted.org/packages/8c/de/072420342e46a8ea41c324a555fa90fcc11637583fb8df722936aed1736d/numpy-2.3.3-cp314-cp314-win_arm64.whl", hash = "sha256:9ad12e976ca7b10f1774b03615a2a4bab8addce37ecc77394d8e986927dc0dfe", size = 10478630, upload-time = "2025-09-09T15:58:14.64Z" }, - { url = "https://files.pythonhosted.org/packages/d5/df/ee2f1c0a9de7347f14da5dd3cd3c3b034d1b8607ccb6883d7dd5c035d631/numpy-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9cc48e09feb11e1db00b320e9d30a4151f7369afb96bd0e48d942d09da3a0d00", size = 21047987, upload-time = "2025-09-09T15:58:16.889Z" }, - { url = "https://files.pythonhosted.org/packages/d6/92/9453bdc5a4e9e69cf4358463f25e8260e2ffc126d52e10038b9077815989/numpy-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:901bf6123879b7f251d3631967fd574690734236075082078e0571977c6a8e6a", size = 14301076, upload-time = "2025-09-09T15:58:20.343Z" }, - { url = "https://files.pythonhosted.org/packages/13/77/1447b9eb500f028bb44253105bd67534af60499588a5149a94f18f2ca917/numpy-2.3.3-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:7f025652034199c301049296b59fa7d52c7e625017cae4c75d8662e377bf487d", size = 5229491, upload-time = "2025-09-09T15:58:22.481Z" }, - { url = "https://files.pythonhosted.org/packages/3d/f9/d72221b6ca205f9736cb4b2ce3b002f6e45cd67cd6a6d1c8af11a2f0b649/numpy-2.3.3-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:533ca5f6d325c80b6007d4d7fb1984c303553534191024ec6a524a4c92a5935a", size = 6737913, upload-time = "2025-09-09T15:58:24.569Z" }, - { url = "https://files.pythonhosted.org/packages/3c/5f/d12834711962ad9c46af72f79bb31e73e416ee49d17f4c797f72c96b6ca5/numpy-2.3.3-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0edd58682a399824633b66885d699d7de982800053acf20be1eaa46d92009c54", size = 14352811, upload-time = "2025-09-09T15:58:26.416Z" }, - { url = "https://files.pythonhosted.org/packages/a1/0d/fdbec6629d97fd1bebed56cd742884e4eead593611bbe1abc3eb40d304b2/numpy-2.3.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:367ad5d8fbec5d9296d18478804a530f1191e24ab4d75ab408346ae88045d25e", size = 16702689, upload-time = "2025-09-09T15:58:28.831Z" }, - { url = "https://files.pythonhosted.org/packages/9b/09/0a35196dc5575adde1eb97ddfbc3e1687a814f905377621d18ca9bc2b7dd/numpy-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8f6ac61a217437946a1fa48d24c47c91a0c4f725237871117dea264982128097", size = 16133855, upload-time = "2025-09-09T15:58:31.349Z" }, - { url = "https://files.pythonhosted.org/packages/7a/ca/c9de3ea397d576f1b6753eaa906d4cdef1bf97589a6d9825a349b4729cc2/numpy-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:179a42101b845a816d464b6fe9a845dfaf308fdfc7925387195570789bb2c970", size = 18652520, upload-time = "2025-09-09T15:58:33.762Z" }, - { url = "https://files.pythonhosted.org/packages/fd/c2/e5ed830e08cd0196351db55db82f65bc0ab05da6ef2b72a836dcf1936d2f/numpy-2.3.3-cp314-cp314t-win32.whl", hash = "sha256:1250c5d3d2562ec4174bce2e3a1523041595f9b651065e4a4473f5f48a6bc8a5", size = 6515371, upload-time = "2025-09-09T15:58:36.04Z" }, - { url = "https://files.pythonhosted.org/packages/47/c7/b0f6b5b67f6788a0725f744496badbb604d226bf233ba716683ebb47b570/numpy-2.3.3-cp314-cp314t-win_amd64.whl", hash = "sha256:b37a0b2e5935409daebe82c1e42274d30d9dd355852529eab91dab8dcca7419f", size = 13112576, upload-time = "2025-09-09T15:58:37.927Z" }, - { url = "https://files.pythonhosted.org/packages/06/b9/33bba5ff6fb679aa0b1f8a07e853f002a6b04b9394db3069a1270a7784ca/numpy-2.3.3-cp314-cp314t-win_arm64.whl", hash = "sha256:78c9f6560dc7e6b3990e32df7ea1a50bbd0e2a111e05209963f5ddcab7073b0b", size = 10545953, upload-time = "2025-09-09T15:58:40.576Z" }, { url = "https://files.pythonhosted.org/packages/b8/f2/7e0a37cfced2644c9563c529f29fa28acbd0960dde32ece683aafa6f4949/numpy-2.3.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1e02c7159791cd481e1e6d5ddd766b62a4d5acf8df4d4d1afe35ee9c5c33a41e", size = 21131019, upload-time = "2025-09-09T15:58:42.838Z" }, { url = "https://files.pythonhosted.org/packages/1a/7e/3291f505297ed63831135a6cc0f474da0c868a1f31b0dd9a9f03a7a0d2ed/numpy-2.3.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:dca2d0fc80b3893ae72197b39f69d55a3cd8b17ea1b50aa4c62de82419936150", size = 14376288, upload-time = "2025-09-09T15:58:45.425Z" }, { url = "https://files.pythonhosted.org/packages/bf/4b/ae02e985bdeee73d7b5abdefeb98aef1207e96d4c0621ee0cf228ddfac3c/numpy-2.3.3-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:99683cbe0658f8271b333a1b1b4bb3173750ad59c0c61f5bbdc5b318918fffe3", size = 5305425, upload-time = "2025-09-09T15:58:48.6Z" }, @@ -1266,6 +1313,140 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/af/11/0cc63f9f321ccf63886ac203336777140011fb669e739da36d8db3c53b98/numpy-2.3.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:2e267c7da5bf7309670523896df97f93f6e469fb931161f483cd6882b3b1a5dc", size = 12971844, upload-time = "2025-09-09T15:58:57.359Z" }, ] +[[package]] +name = "nvidia-cublas-cu12" +version = "12.8.4.1" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" }, +] + +[[package]] +name = "nvidia-cuda-cupti-cu12" +version = "12.8.90" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" }, +] + +[[package]] +name = "nvidia-cuda-nvrtc-cu12" +version = "12.8.93" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" }, +] + +[[package]] +name = "nvidia-cuda-runtime-cu12" +version = "12.8.90" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" }, +] + +[[package]] +name = "nvidia-cudnn-cu12" +version = "9.10.2.21" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-cublas-cu12" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, +] + +[[package]] +name = "nvidia-cufft-cu12" +version = "11.3.3.83" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-nvjitlink-cu12" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, +] + +[[package]] +name = "nvidia-cufile-cu12" +version = "1.13.1.3" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" }, +] + +[[package]] +name = "nvidia-curand-cu12" +version = "10.3.9.90" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" }, +] + +[[package]] +name = "nvidia-cusolver-cu12" +version = "11.7.3.90" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-cublas-cu12" }, + { name = "nvidia-cusparse-cu12" }, + { name = "nvidia-nvjitlink-cu12" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, +] + +[[package]] +name = "nvidia-cusparse-cu12" +version = "12.5.8.93" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-nvjitlink-cu12" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, +] + +[[package]] +name = "nvidia-cusparselt-cu12" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" }, +] + +[[package]] +name = "nvidia-nccl-cu12" +version = "2.27.5" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" }, +] + +[[package]] +name = "nvidia-nvjitlink-cu12" +version = "12.8.93" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" }, +] + +[[package]] +name = "nvidia-nvshmem-cu12" +version = "3.3.20" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/6c/99acb2f9eb85c29fc6f3a7ac4dccfd992e22666dd08a642b303311326a97/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5", size = 124657145, upload-time = "2025-08-04T20:25:19.995Z" }, +] + +[[package]] +name = "nvidia-nvtx-cu12" +version = "12.8.90" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" }, +] + [[package]] name = "opentelemetry-api" version = "1.37.0" @@ -1407,6 +1588,51 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/07/90/68152b7465f50285d3ce2481b3aec2f82822e3f52e5152eeeaf516bab841/opentelemetry_semantic_conventions-0.58b0-py3-none-any.whl", hash = "sha256:5564905ab1458b96684db1340232729fce3b5375a06e140e8904c78e4f815b28", size = 207954, upload-time = "2025-09-11T10:28:59.218Z" }, ] +[[package]] +name = "opt-einsum" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/b9/2ac072041e899a52f20cf9510850ff58295003aa75525e58343591b0cbfb/opt_einsum-3.4.0.tar.gz", hash = "sha256:96ca72f1b886d148241348783498194c577fa30a8faac108586b14f1ba4473ac", size = 63004, upload-time = "2024-09-26T14:33:24.483Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/23/cd/066e86230ae37ed0be70aae89aabf03ca8d9f39c8aea0dec8029455b5540/opt_einsum-3.4.0-py3-none-any.whl", hash = "sha256:69bb92469f86a1565195ece4ac0323943e83477171b91d24c35afe028a90d7cd", size = 71932, upload-time = "2024-09-26T14:33:23.039Z" }, +] + +[[package]] +name = "optax" +version = "0.2.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "absl-py" }, + { name = "chex" }, + { name = "jax" }, + { name = "jaxlib" }, + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6d/3b/90c11f740a3538200b61cd2b7d9346959cb9e31e0bdea3d2f886b7262203/optax-0.2.6.tar.gz", hash = "sha256:ba8d1e12678eba2657484d6feeca4fb281b8066bdfd5efbfc0f41b87663109c0", size = 269660, upload-time = "2025-09-15T22:41:24.76Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/ec/19c6cc6064c7fc8f0cd6d5b37c4747849e66040c6ca98f86565efc2c227c/optax-0.2.6-py3-none-any.whl", hash = "sha256:f875251a5ab20f179d4be57478354e8e21963373b10f9c3b762b94dcb8c36d91", size = 367782, upload-time = "2025-09-15T22:41:22.825Z" }, +] + +[[package]] +name = "osqp" +version = "1.0.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jinja2" }, + { name = "joblib" }, + { name = "numpy" }, + { name = "scipy" }, + { name = "setuptools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/85/cf/023078d9985526494901e9ca91c59d17b2d2e5f87a047f4b8b9749ce5922/osqp-1.0.5.tar.gz", hash = "sha256:60b484cf829c99d94bb7ae4e9beb2e0895d94c5e64e074b5b27b6ef887941936", size = 56757, upload-time = "2025-10-15T14:05:33.613Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/01/417ccf73d61b24a00c56fc207db316c72ff86234aa21417d70148447e091/osqp-1.0.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7bd899ea81ac03030ea0e28a1102797779ffe6450315ad79009c89bb20156887", size = 326124, upload-time = "2025-10-15T14:05:05.069Z" }, + { url = "https://files.pythonhosted.org/packages/1a/ae/dfc315af542489706b5659bb7759de2f29367dee1d6918753d21f2391728/osqp-1.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b837236c847ac90dbd074001dbe5c921701a717fbfebe25f86af93adcad496be", size = 301870, upload-time = "2025-10-15T14:05:06.325Z" }, + { url = "https://files.pythonhosted.org/packages/05/34/7d2478c822edb53a38a3ed2cae89c7a9375e6a8d04897f3fb974c431a189/osqp-1.0.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e65dde66bf5001a6884082090e7311e1f6881a475e9b6c1b5924d7afa7cd5adc", size = 336553, upload-time = "2025-10-15T14:05:07.437Z" }, + { url = "https://files.pythonhosted.org/packages/82/5f/a3376f56f4d209618c22492fe02b47be05b47bbb6c263460e0f38b36fc1d/osqp-1.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c83f4a164e03fba91c244f6cfaa52acc3e6a93d11b3279a9f768f0a14e82fb18", size = 357238, upload-time = "2025-10-15T14:05:08.66Z" }, + { url = "https://files.pythonhosted.org/packages/3a/c4/d47ccafc3e149c1b9b860c63fbdbaa18dfc06784593cd221c5896be9945c/osqp-1.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:e1f6d0231ea47269ccf3df5587987797a5a2fca4083058ea6d53e2d777c9e3fb", size = 309670, upload-time = "2025-10-15T14:05:09.716Z" }, +] + [[package]] name = "packaging" version = "25.0" @@ -1519,37 +1745,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e0/ed/55532bb88f674d5d8f67ab121a2a13c385df382de2a1677f30ad385f7438/pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51", size = 1910538, upload-time = "2025-04-23T18:31:20.541Z" }, { url = "https://files.pythonhosted.org/packages/fe/1b/25b7cccd4519c0b23c2dd636ad39d381abf113085ce4f7bec2b0dc755eb1/pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab", size = 1952909, upload-time = "2025-04-23T18:31:22.371Z" }, { url = "https://files.pythonhosted.org/packages/49/a9/d809358e49126438055884c4366a1f6227f0f84f635a9014e2deb9b9de54/pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65", size = 1897786, upload-time = "2025-04-23T18:31:24.161Z" }, - { url = "https://files.pythonhosted.org/packages/18/8a/2b41c97f554ec8c71f2a8a5f85cb56a8b0956addfe8b0efb5b3d77e8bdc3/pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc", size = 2009000, upload-time = "2025-04-23T18:31:25.863Z" }, - { url = "https://files.pythonhosted.org/packages/a1/02/6224312aacb3c8ecbaa959897af57181fb6cf3a3d7917fd44d0f2917e6f2/pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7", size = 1847996, upload-time = "2025-04-23T18:31:27.341Z" }, - { url = "https://files.pythonhosted.org/packages/d6/46/6dcdf084a523dbe0a0be59d054734b86a981726f221f4562aed313dbcb49/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025", size = 1880957, upload-time = "2025-04-23T18:31:28.956Z" }, - { url = "https://files.pythonhosted.org/packages/ec/6b/1ec2c03837ac00886ba8160ce041ce4e325b41d06a034adbef11339ae422/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011", size = 1964199, upload-time = "2025-04-23T18:31:31.025Z" }, - { url = "https://files.pythonhosted.org/packages/2d/1d/6bf34d6adb9debd9136bd197ca72642203ce9aaaa85cfcbfcf20f9696e83/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f", size = 2120296, upload-time = "2025-04-23T18:31:32.514Z" }, - { url = "https://files.pythonhosted.org/packages/e0/94/2bd0aaf5a591e974b32a9f7123f16637776c304471a0ab33cf263cf5591a/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88", size = 2676109, upload-time = "2025-04-23T18:31:33.958Z" }, - { url = "https://files.pythonhosted.org/packages/f9/41/4b043778cf9c4285d59742281a769eac371b9e47e35f98ad321349cc5d61/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1", size = 2002028, upload-time = "2025-04-23T18:31:39.095Z" }, - { url = "https://files.pythonhosted.org/packages/cb/d5/7bb781bf2748ce3d03af04d5c969fa1308880e1dca35a9bd94e1a96a922e/pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b", size = 2100044, upload-time = "2025-04-23T18:31:41.034Z" }, - { url = "https://files.pythonhosted.org/packages/fe/36/def5e53e1eb0ad896785702a5bbfd25eed546cdcf4087ad285021a90ed53/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1", size = 2058881, upload-time = "2025-04-23T18:31:42.757Z" }, - { url = "https://files.pythonhosted.org/packages/01/6c/57f8d70b2ee57fc3dc8b9610315949837fa8c11d86927b9bb044f8705419/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6", size = 2227034, upload-time = "2025-04-23T18:31:44.304Z" }, - { url = "https://files.pythonhosted.org/packages/27/b9/9c17f0396a82b3d5cbea4c24d742083422639e7bb1d5bf600e12cb176a13/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea", size = 2234187, upload-time = "2025-04-23T18:31:45.891Z" }, - { url = "https://files.pythonhosted.org/packages/b0/6a/adf5734ffd52bf86d865093ad70b2ce543415e0e356f6cacabbc0d9ad910/pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290", size = 1892628, upload-time = "2025-04-23T18:31:47.819Z" }, - { url = "https://files.pythonhosted.org/packages/43/e4/5479fecb3606c1368d496a825d8411e126133c41224c1e7238be58b87d7e/pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2", size = 1955866, upload-time = "2025-04-23T18:31:49.635Z" }, - { url = "https://files.pythonhosted.org/packages/0d/24/8b11e8b3e2be9dd82df4b11408a67c61bb4dc4f8e11b5b0fc888b38118b5/pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab", size = 1888894, upload-time = "2025-04-23T18:31:51.609Z" }, - { url = "https://files.pythonhosted.org/packages/46/8c/99040727b41f56616573a28771b1bfa08a3d3fe74d3d513f01251f79f172/pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f", size = 2015688, upload-time = "2025-04-23T18:31:53.175Z" }, - { url = "https://files.pythonhosted.org/packages/3a/cc/5999d1eb705a6cefc31f0b4a90e9f7fc400539b1a1030529700cc1b51838/pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6", size = 1844808, upload-time = "2025-04-23T18:31:54.79Z" }, - { url = "https://files.pythonhosted.org/packages/6f/5e/a0a7b8885c98889a18b6e376f344da1ef323d270b44edf8174d6bce4d622/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef", size = 1885580, upload-time = "2025-04-23T18:31:57.393Z" }, - { url = "https://files.pythonhosted.org/packages/3b/2a/953581f343c7d11a304581156618c3f592435523dd9d79865903272c256a/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a", size = 1973859, upload-time = "2025-04-23T18:31:59.065Z" }, - { url = "https://files.pythonhosted.org/packages/e6/55/f1a813904771c03a3f97f676c62cca0c0a4138654107c1b61f19c644868b/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916", size = 2120810, upload-time = "2025-04-23T18:32:00.78Z" }, - { url = "https://files.pythonhosted.org/packages/aa/c3/053389835a996e18853ba107a63caae0b9deb4a276c6b472931ea9ae6e48/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a", size = 2676498, upload-time = "2025-04-23T18:32:02.418Z" }, - { url = "https://files.pythonhosted.org/packages/eb/3c/f4abd740877a35abade05e437245b192f9d0ffb48bbbbd708df33d3cda37/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d", size = 2000611, upload-time = "2025-04-23T18:32:04.152Z" }, - { url = "https://files.pythonhosted.org/packages/59/a7/63ef2fed1837d1121a894d0ce88439fe3e3b3e48c7543b2a4479eb99c2bd/pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56", size = 2107924, upload-time = "2025-04-23T18:32:06.129Z" }, - { url = "https://files.pythonhosted.org/packages/04/8f/2551964ef045669801675f1cfc3b0d74147f4901c3ffa42be2ddb1f0efc4/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5", size = 2063196, upload-time = "2025-04-23T18:32:08.178Z" }, - { url = "https://files.pythonhosted.org/packages/26/bd/d9602777e77fc6dbb0c7db9ad356e9a985825547dce5ad1d30ee04903918/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e", size = 2236389, upload-time = "2025-04-23T18:32:10.242Z" }, - { url = "https://files.pythonhosted.org/packages/42/db/0e950daa7e2230423ab342ae918a794964b053bec24ba8af013fc7c94846/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162", size = 2239223, upload-time = "2025-04-23T18:32:12.382Z" }, - { url = "https://files.pythonhosted.org/packages/58/4d/4f937099c545a8a17eb52cb67fe0447fd9a373b348ccfa9a87f141eeb00f/pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849", size = 1900473, upload-time = "2025-04-23T18:32:14.034Z" }, - { url = "https://files.pythonhosted.org/packages/a0/75/4a0a9bac998d78d889def5e4ef2b065acba8cae8c93696906c3a91f310ca/pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9", size = 1955269, upload-time = "2025-04-23T18:32:15.783Z" }, - { url = "https://files.pythonhosted.org/packages/f9/86/1beda0576969592f1497b4ce8e7bc8cbdf614c352426271b1b10d5f0aa64/pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9", size = 1893921, upload-time = "2025-04-23T18:32:18.473Z" }, - { url = "https://files.pythonhosted.org/packages/a4/7d/e09391c2eebeab681df2b74bfe6c43422fffede8dc74187b2b0bf6fd7571/pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac", size = 1806162, upload-time = "2025-04-23T18:32:20.188Z" }, - { url = "https://files.pythonhosted.org/packages/f1/3d/847b6b1fed9f8ed3bb95a9ad04fbd0b212e832d4f0f50ff4d9ee5a9f15cf/pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5", size = 1981560, upload-time = "2025-04-23T18:32:22.354Z" }, - { url = "https://files.pythonhosted.org/packages/6f/9a/e73262f6c6656262b5fdd723ad90f518f579b7bc8622e43a942eec53c938/pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9", size = 1935777, upload-time = "2025-04-23T18:32:25.088Z" }, { url = "https://files.pythonhosted.org/packages/7b/27/d4ae6487d73948d6f20dddcd94be4ea43e74349b56eba82e9bdee2d7494c/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8", size = 2025200, upload-time = "2025-04-23T18:33:14.199Z" }, { url = "https://files.pythonhosted.org/packages/f1/b8/b3cb95375f05d33801024079b9392a5ab45267a63400bf1866e7ce0f0de4/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593", size = 1859123, upload-time = "2025-04-23T18:33:16.555Z" }, { url = "https://files.pythonhosted.org/packages/05/bc/0d0b5adeda59a261cd30a1235a445bf55c7e46ae44aea28f7bd6ed46e091/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612", size = 1892852, upload-time = "2025-04-23T18:33:18.513Z" }, @@ -1622,15 +1817,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7c/af/449a6a91e5d6db51420875c54f6aff7c97a86a3b13a0b4f1a5c13b988de3/pywin32-311-cp311-cp311-win32.whl", hash = "sha256:184eb5e436dea364dcd3d2316d577d625c0351bf237c4e9a5fabbcfa5a58b151", size = 8697031, upload-time = "2025-07-14T20:13:13.266Z" }, { url = "https://files.pythonhosted.org/packages/51/8f/9bb81dd5bb77d22243d33c8397f09377056d5c687aa6d4042bea7fbf8364/pywin32-311-cp311-cp311-win_amd64.whl", hash = "sha256:3ce80b34b22b17ccbd937a6e78e7225d80c52f5ab9940fe0506a1a16f3dab503", size = 9508308, upload-time = "2025-07-14T20:13:15.147Z" }, { url = "https://files.pythonhosted.org/packages/44/7b/9c2ab54f74a138c491aba1b1cd0795ba61f144c711daea84a88b63dc0f6c/pywin32-311-cp311-cp311-win_arm64.whl", hash = "sha256:a733f1388e1a842abb67ffa8e7aad0e70ac519e09b0f6a784e65a136ec7cefd2", size = 8703930, upload-time = "2025-07-14T20:13:16.945Z" }, - { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543, upload-time = "2025-07-14T20:13:20.765Z" }, - { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040, upload-time = "2025-07-14T20:13:22.543Z" }, - { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102, upload-time = "2025-07-14T20:13:24.682Z" }, - { url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700, upload-time = "2025-07-14T20:13:26.471Z" }, - { url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700, upload-time = "2025-07-14T20:13:28.243Z" }, - { url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318, upload-time = "2025-07-14T20:13:30.348Z" }, - { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714, upload-time = "2025-07-14T20:13:32.449Z" }, - { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800, upload-time = "2025-07-14T20:13:34.312Z" }, - { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" }, ] [[package]] @@ -1648,44 +1834,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f2/6a/b627b4e0c1dd03718543519ffb2f1deea4a1e6d42fbab8021936a4d22589/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4", size = 794986, upload-time = "2025-09-25T21:32:07.367Z" }, { url = "https://files.pythonhosted.org/packages/45/91/47a6e1c42d9ee337c4839208f30d9f09caa9f720ec7582917b264defc875/pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b", size = 142543, upload-time = "2025-09-25T21:32:08.95Z" }, { url = "https://files.pythonhosted.org/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size = 158763, upload-time = "2025-09-25T21:32:09.96Z" }, - { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" }, - { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" }, - { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" }, - { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" }, - { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" }, - { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" }, - { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" }, - { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" }, - { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" }, - { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" }, - { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" }, - { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" }, - { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" }, - { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" }, - { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" }, - { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" }, - { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" }, - { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" }, - { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" }, - { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" }, - { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" }, - { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" }, - { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" }, - { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" }, - { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" }, - { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" }, - { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" }, - { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" }, - { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" }, - { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" }, - { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" }, - { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" }, - { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" }, - { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" }, - { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" }, - { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" }, - { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" }, - { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, ] [[package]] @@ -1695,7 +1843,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, { name = "rpds-py" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions" }, ] sdist = { url = "https://files.pythonhosted.org/packages/2f/db/98b5c277be99dd18bfd91dd04e1b759cad18d1a338188c936e92f921c7e2/referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa", size = 74744, upload-time = "2025-01-25T08:48:16.138Z" } wheels = [ @@ -1717,6 +1865,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, ] +[[package]] +name = "rlax" +version = "0.1.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "absl-py" }, + { name = "chex" }, + { name = "distrax" }, + { name = "dm-env" }, + { name = "jax" }, + { name = "jaxlib" }, + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/94/96/5e68f3dac7a4cc730ffa5885f9c057023daa86384dc29a8c6da0fb65c7ae/rlax-0.1.8.tar.gz", hash = "sha256:4ec5dcd3969099e8699a979b15b967187ce2536bba467ad7c1d980f1eeade541", size = 84704, upload-time = "2025-09-01T23:40:00.106Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/0e/04727f672ba609c4d4ed9f9be82cc1e7afb0d7355051f9f406b55defac10/rlax-0.1.8-py3-none-any.whl", hash = "sha256:c6d5c45cc6727dd7ad58baacf12b92ca53568d7d68063b8a97f3c5c36552cb8f", size = 116178, upload-time = "2025-09-01T23:39:58.593Z" }, +] + [[package]] name = "rpds-py" version = "0.27.1" @@ -1738,79 +1904,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b2/d5/0b2a55415931db4f112bdab072443ff76131b5ac4f4dc98d10d2d357eb03/rpds_py-0.27.1-cp311-cp311-win32.whl", hash = "sha256:3182af66048c00a075010bc7f4860f33913528a4b6fc09094a6e7598e462fe39", size = 217154, upload-time = "2025-08-27T12:13:06.278Z" }, { url = "https://files.pythonhosted.org/packages/24/75/3b7ffe0d50dc86a6a964af0d1cc3a4a2cdf437cb7b099a4747bbb96d1819/rpds_py-0.27.1-cp311-cp311-win_amd64.whl", hash = "sha256:b4938466c6b257b2f5c4ff98acd8128ec36b5059e5c8f8372d79316b1c36bb15", size = 228627, upload-time = "2025-08-27T12:13:07.625Z" }, { url = "https://files.pythonhosted.org/packages/8d/3f/4fd04c32abc02c710f09a72a30c9a55ea3cc154ef8099078fd50a0596f8e/rpds_py-0.27.1-cp311-cp311-win_arm64.whl", hash = "sha256:2f57af9b4d0793e53266ee4325535a31ba48e2f875da81a9177c9926dfa60746", size = 220998, upload-time = "2025-08-27T12:13:08.972Z" }, - { url = "https://files.pythonhosted.org/packages/bd/fe/38de28dee5df58b8198c743fe2bea0c785c6d40941b9950bac4cdb71a014/rpds_py-0.27.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ae2775c1973e3c30316892737b91f9283f9908e3cc7625b9331271eaaed7dc90", size = 361887, upload-time = "2025-08-27T12:13:10.233Z" }, - { url = "https://files.pythonhosted.org/packages/7c/9a/4b6c7eedc7dd90986bf0fab6ea2a091ec11c01b15f8ba0a14d3f80450468/rpds_py-0.27.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2643400120f55c8a96f7c9d858f7be0c88d383cd4653ae2cf0d0c88f668073e5", size = 345795, upload-time = "2025-08-27T12:13:11.65Z" }, - { url = "https://files.pythonhosted.org/packages/6f/0e/e650e1b81922847a09cca820237b0edee69416a01268b7754d506ade11ad/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16323f674c089b0360674a4abd28d5042947d54ba620f72514d69be4ff64845e", size = 385121, upload-time = "2025-08-27T12:13:13.008Z" }, - { url = "https://files.pythonhosted.org/packages/1b/ea/b306067a712988e2bff00dcc7c8f31d26c29b6d5931b461aa4b60a013e33/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a1f4814b65eacac94a00fc9a526e3fdafd78e439469644032032d0d63de4881", size = 398976, upload-time = "2025-08-27T12:13:14.368Z" }, - { url = "https://files.pythonhosted.org/packages/2c/0a/26dc43c8840cb8fe239fe12dbc8d8de40f2365e838f3d395835dde72f0e5/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ba32c16b064267b22f1850a34051121d423b6f7338a12b9459550eb2096e7ec", size = 525953, upload-time = "2025-08-27T12:13:15.774Z" }, - { url = "https://files.pythonhosted.org/packages/22/14/c85e8127b573aaf3a0cbd7fbb8c9c99e735a4a02180c84da2a463b766e9e/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5c20f33fd10485b80f65e800bbe5f6785af510b9f4056c5a3c612ebc83ba6cb", size = 407915, upload-time = "2025-08-27T12:13:17.379Z" }, - { url = "https://files.pythonhosted.org/packages/ed/7b/8f4fee9ba1fb5ec856eb22d725a4efa3deb47f769597c809e03578b0f9d9/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:466bfe65bd932da36ff279ddd92de56b042f2266d752719beb97b08526268ec5", size = 386883, upload-time = "2025-08-27T12:13:18.704Z" }, - { url = "https://files.pythonhosted.org/packages/86/47/28fa6d60f8b74fcdceba81b272f8d9836ac0340570f68f5df6b41838547b/rpds_py-0.27.1-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:41e532bbdcb57c92ba3be62c42e9f096431b4cf478da9bc3bc6ce5c38ab7ba7a", size = 405699, upload-time = "2025-08-27T12:13:20.089Z" }, - { url = "https://files.pythonhosted.org/packages/d0/fd/c5987b5e054548df56953a21fe2ebed51fc1ec7c8f24fd41c067b68c4a0a/rpds_py-0.27.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f149826d742b406579466283769a8ea448eed82a789af0ed17b0cd5770433444", size = 423713, upload-time = "2025-08-27T12:13:21.436Z" }, - { url = "https://files.pythonhosted.org/packages/ac/ba/3c4978b54a73ed19a7d74531be37a8bcc542d917c770e14d372b8daea186/rpds_py-0.27.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:80c60cfb5310677bd67cb1e85a1e8eb52e12529545441b43e6f14d90b878775a", size = 562324, upload-time = "2025-08-27T12:13:22.789Z" }, - { url = "https://files.pythonhosted.org/packages/b5/6c/6943a91768fec16db09a42b08644b960cff540c66aab89b74be6d4a144ba/rpds_py-0.27.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7ee6521b9baf06085f62ba9c7a3e5becffbc32480d2f1b351559c001c38ce4c1", size = 593646, upload-time = "2025-08-27T12:13:24.122Z" }, - { url = "https://files.pythonhosted.org/packages/11/73/9d7a8f4be5f4396f011a6bb7a19fe26303a0dac9064462f5651ced2f572f/rpds_py-0.27.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a512c8263249a9d68cac08b05dd59d2b3f2061d99b322813cbcc14c3c7421998", size = 558137, upload-time = "2025-08-27T12:13:25.557Z" }, - { url = "https://files.pythonhosted.org/packages/6e/96/6772cbfa0e2485bcceef8071de7821f81aeac8bb45fbfd5542a3e8108165/rpds_py-0.27.1-cp312-cp312-win32.whl", hash = "sha256:819064fa048ba01b6dadc5116f3ac48610435ac9a0058bbde98e569f9e785c39", size = 221343, upload-time = "2025-08-27T12:13:26.967Z" }, - { url = "https://files.pythonhosted.org/packages/67/b6/c82f0faa9af1c6a64669f73a17ee0eeef25aff30bb9a1c318509efe45d84/rpds_py-0.27.1-cp312-cp312-win_amd64.whl", hash = "sha256:d9199717881f13c32c4046a15f024971a3b78ad4ea029e8da6b86e5aa9cf4594", size = 232497, upload-time = "2025-08-27T12:13:28.326Z" }, - { url = "https://files.pythonhosted.org/packages/e1/96/2817b44bd2ed11aebacc9251da03689d56109b9aba5e311297b6902136e2/rpds_py-0.27.1-cp312-cp312-win_arm64.whl", hash = "sha256:33aa65b97826a0e885ef6e278fbd934e98cdcfed80b63946025f01e2f5b29502", size = 222790, upload-time = "2025-08-27T12:13:29.71Z" }, - { url = "https://files.pythonhosted.org/packages/cc/77/610aeee8d41e39080c7e14afa5387138e3c9fa9756ab893d09d99e7d8e98/rpds_py-0.27.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e4b9fcfbc021633863a37e92571d6f91851fa656f0180246e84cbd8b3f6b329b", size = 361741, upload-time = "2025-08-27T12:13:31.039Z" }, - { url = "https://files.pythonhosted.org/packages/3a/fc/c43765f201c6a1c60be2043cbdb664013def52460a4c7adace89d6682bf4/rpds_py-0.27.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1441811a96eadca93c517d08df75de45e5ffe68aa3089924f963c782c4b898cf", size = 345574, upload-time = "2025-08-27T12:13:32.902Z" }, - { url = "https://files.pythonhosted.org/packages/20/42/ee2b2ca114294cd9847d0ef9c26d2b0851b2e7e00bf14cc4c0b581df0fc3/rpds_py-0.27.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55266dafa22e672f5a4f65019015f90336ed31c6383bd53f5e7826d21a0e0b83", size = 385051, upload-time = "2025-08-27T12:13:34.228Z" }, - { url = "https://files.pythonhosted.org/packages/fd/e8/1e430fe311e4799e02e2d1af7c765f024e95e17d651612425b226705f910/rpds_py-0.27.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d78827d7ac08627ea2c8e02c9e5b41180ea5ea1f747e9db0915e3adf36b62dcf", size = 398395, upload-time = "2025-08-27T12:13:36.132Z" }, - { url = "https://files.pythonhosted.org/packages/82/95/9dc227d441ff2670651c27a739acb2535ccaf8b351a88d78c088965e5996/rpds_py-0.27.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae92443798a40a92dc5f0b01d8a7c93adde0c4dc965310a29ae7c64d72b9fad2", size = 524334, upload-time = "2025-08-27T12:13:37.562Z" }, - { url = "https://files.pythonhosted.org/packages/87/01/a670c232f401d9ad461d9a332aa4080cd3cb1d1df18213dbd0d2a6a7ab51/rpds_py-0.27.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c46c9dd2403b66a2a3b9720ec4b74d4ab49d4fabf9f03dfdce2d42af913fe8d0", size = 407691, upload-time = "2025-08-27T12:13:38.94Z" }, - { url = "https://files.pythonhosted.org/packages/03/36/0a14aebbaa26fe7fab4780c76f2239e76cc95a0090bdb25e31d95c492fcd/rpds_py-0.27.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2efe4eb1d01b7f5f1939f4ef30ecea6c6b3521eec451fb93191bf84b2a522418", size = 386868, upload-time = "2025-08-27T12:13:40.192Z" }, - { url = "https://files.pythonhosted.org/packages/3b/03/8c897fb8b5347ff6c1cc31239b9611c5bf79d78c984430887a353e1409a1/rpds_py-0.27.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:15d3b4d83582d10c601f481eca29c3f138d44c92187d197aff663a269197c02d", size = 405469, upload-time = "2025-08-27T12:13:41.496Z" }, - { url = "https://files.pythonhosted.org/packages/da/07/88c60edc2df74850d496d78a1fdcdc7b54360a7f610a4d50008309d41b94/rpds_py-0.27.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4ed2e16abbc982a169d30d1a420274a709949e2cbdef119fe2ec9d870b42f274", size = 422125, upload-time = "2025-08-27T12:13:42.802Z" }, - { url = "https://files.pythonhosted.org/packages/6b/86/5f4c707603e41b05f191a749984f390dabcbc467cf833769b47bf14ba04f/rpds_py-0.27.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a75f305c9b013289121ec0f1181931975df78738cdf650093e6b86d74aa7d8dd", size = 562341, upload-time = "2025-08-27T12:13:44.472Z" }, - { url = "https://files.pythonhosted.org/packages/b2/92/3c0cb2492094e3cd9baf9e49bbb7befeceb584ea0c1a8b5939dca4da12e5/rpds_py-0.27.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:67ce7620704745881a3d4b0ada80ab4d99df390838839921f99e63c474f82cf2", size = 592511, upload-time = "2025-08-27T12:13:45.898Z" }, - { url = "https://files.pythonhosted.org/packages/10/bb/82e64fbb0047c46a168faa28d0d45a7851cd0582f850b966811d30f67ad8/rpds_py-0.27.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9d992ac10eb86d9b6f369647b6a3f412fc0075cfd5d799530e84d335e440a002", size = 557736, upload-time = "2025-08-27T12:13:47.408Z" }, - { url = "https://files.pythonhosted.org/packages/00/95/3c863973d409210da7fb41958172c6b7dbe7fc34e04d3cc1f10bb85e979f/rpds_py-0.27.1-cp313-cp313-win32.whl", hash = "sha256:4f75e4bd8ab8db624e02c8e2fc4063021b58becdbe6df793a8111d9343aec1e3", size = 221462, upload-time = "2025-08-27T12:13:48.742Z" }, - { url = "https://files.pythonhosted.org/packages/ce/2c/5867b14a81dc217b56d95a9f2a40fdbc56a1ab0181b80132beeecbd4b2d6/rpds_py-0.27.1-cp313-cp313-win_amd64.whl", hash = "sha256:f9025faafc62ed0b75a53e541895ca272815bec18abe2249ff6501c8f2e12b83", size = 232034, upload-time = "2025-08-27T12:13:50.11Z" }, - { url = "https://files.pythonhosted.org/packages/c7/78/3958f3f018c01923823f1e47f1cc338e398814b92d83cd278364446fac66/rpds_py-0.27.1-cp313-cp313-win_arm64.whl", hash = "sha256:ed10dc32829e7d222b7d3b93136d25a406ba9788f6a7ebf6809092da1f4d279d", size = 222392, upload-time = "2025-08-27T12:13:52.587Z" }, - { url = "https://files.pythonhosted.org/packages/01/76/1cdf1f91aed5c3a7bf2eba1f1c4e4d6f57832d73003919a20118870ea659/rpds_py-0.27.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:92022bbbad0d4426e616815b16bc4127f83c9a74940e1ccf3cfe0b387aba0228", size = 358355, upload-time = "2025-08-27T12:13:54.012Z" }, - { url = "https://files.pythonhosted.org/packages/c3/6f/bf142541229374287604caf3bb2a4ae17f0a580798fd72d3b009b532db4e/rpds_py-0.27.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:47162fdab9407ec3f160805ac3e154df042e577dd53341745fc7fb3f625e6d92", size = 342138, upload-time = "2025-08-27T12:13:55.791Z" }, - { url = "https://files.pythonhosted.org/packages/1a/77/355b1c041d6be40886c44ff5e798b4e2769e497b790f0f7fd1e78d17e9a8/rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb89bec23fddc489e5d78b550a7b773557c9ab58b7946154a10a6f7a214a48b2", size = 380247, upload-time = "2025-08-27T12:13:57.683Z" }, - { url = "https://files.pythonhosted.org/packages/d6/a4/d9cef5c3946ea271ce2243c51481971cd6e34f21925af2783dd17b26e815/rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e48af21883ded2b3e9eb48cb7880ad8598b31ab752ff3be6457001d78f416723", size = 390699, upload-time = "2025-08-27T12:13:59.137Z" }, - { url = "https://files.pythonhosted.org/packages/3a/06/005106a7b8c6c1a7e91b73169e49870f4af5256119d34a361ae5240a0c1d/rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6f5b7bd8e219ed50299e58551a410b64daafb5017d54bbe822e003856f06a802", size = 521852, upload-time = "2025-08-27T12:14:00.583Z" }, - { url = "https://files.pythonhosted.org/packages/e5/3e/50fb1dac0948e17a02eb05c24510a8fe12d5ce8561c6b7b7d1339ab7ab9c/rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08f1e20bccf73b08d12d804d6e1c22ca5530e71659e6673bce31a6bb71c1e73f", size = 402582, upload-time = "2025-08-27T12:14:02.034Z" }, - { url = "https://files.pythonhosted.org/packages/cb/b0/f4e224090dc5b0ec15f31a02d746ab24101dd430847c4d99123798661bfc/rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dc5dceeaefcc96dc192e3a80bbe1d6c410c469e97bdd47494a7d930987f18b2", size = 384126, upload-time = "2025-08-27T12:14:03.437Z" }, - { url = "https://files.pythonhosted.org/packages/54/77/ac339d5f82b6afff1df8f0fe0d2145cc827992cb5f8eeb90fc9f31ef7a63/rpds_py-0.27.1-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:d76f9cc8665acdc0c9177043746775aa7babbf479b5520b78ae4002d889f5c21", size = 399486, upload-time = "2025-08-27T12:14:05.443Z" }, - { url = "https://files.pythonhosted.org/packages/d6/29/3e1c255eee6ac358c056a57d6d6869baa00a62fa32eea5ee0632039c50a3/rpds_py-0.27.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:134fae0e36022edad8290a6661edf40c023562964efea0cc0ec7f5d392d2aaef", size = 414832, upload-time = "2025-08-27T12:14:06.902Z" }, - { url = "https://files.pythonhosted.org/packages/3f/db/6d498b844342deb3fa1d030598db93937a9964fcf5cb4da4feb5f17be34b/rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb11a4f1b2b63337cfd3b4d110af778a59aae51c81d195768e353d8b52f88081", size = 557249, upload-time = "2025-08-27T12:14:08.37Z" }, - { url = "https://files.pythonhosted.org/packages/60/f3/690dd38e2310b6f68858a331399b4d6dbb9132c3e8ef8b4333b96caf403d/rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:13e608ac9f50a0ed4faec0e90ece76ae33b34c0e8656e3dceb9a7db994c692cd", size = 587356, upload-time = "2025-08-27T12:14:10.034Z" }, - { url = "https://files.pythonhosted.org/packages/86/e3/84507781cccd0145f35b1dc32c72675200c5ce8d5b30f813e49424ef68fc/rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dd2135527aa40f061350c3f8f89da2644de26cd73e4de458e79606384f4f68e7", size = 555300, upload-time = "2025-08-27T12:14:11.783Z" }, - { url = "https://files.pythonhosted.org/packages/e5/ee/375469849e6b429b3516206b4580a79e9ef3eb12920ddbd4492b56eaacbe/rpds_py-0.27.1-cp313-cp313t-win32.whl", hash = "sha256:3020724ade63fe320a972e2ffd93b5623227e684315adce194941167fee02688", size = 216714, upload-time = "2025-08-27T12:14:13.629Z" }, - { url = "https://files.pythonhosted.org/packages/21/87/3fc94e47c9bd0742660e84706c311a860dcae4374cf4a03c477e23ce605a/rpds_py-0.27.1-cp313-cp313t-win_amd64.whl", hash = "sha256:8ee50c3e41739886606388ba3ab3ee2aae9f35fb23f833091833255a31740797", size = 228943, upload-time = "2025-08-27T12:14:14.937Z" }, - { url = "https://files.pythonhosted.org/packages/70/36/b6e6066520a07cf029d385de869729a895917b411e777ab1cde878100a1d/rpds_py-0.27.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:acb9aafccaae278f449d9c713b64a9e68662e7799dbd5859e2c6b3c67b56d334", size = 362472, upload-time = "2025-08-27T12:14:16.333Z" }, - { url = "https://files.pythonhosted.org/packages/af/07/b4646032e0dcec0df9c73a3bd52f63bc6c5f9cda992f06bd0e73fe3fbebd/rpds_py-0.27.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:b7fb801aa7f845ddf601c49630deeeccde7ce10065561d92729bfe81bd21fb33", size = 345676, upload-time = "2025-08-27T12:14:17.764Z" }, - { url = "https://files.pythonhosted.org/packages/b0/16/2f1003ee5d0af4bcb13c0cf894957984c32a6751ed7206db2aee7379a55e/rpds_py-0.27.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe0dd05afb46597b9a2e11c351e5e4283c741237e7f617ffb3252780cca9336a", size = 385313, upload-time = "2025-08-27T12:14:19.829Z" }, - { url = "https://files.pythonhosted.org/packages/05/cd/7eb6dd7b232e7f2654d03fa07f1414d7dfc980e82ba71e40a7c46fd95484/rpds_py-0.27.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b6dfb0e058adb12d8b1d1b25f686e94ffa65d9995a5157afe99743bf7369d62b", size = 399080, upload-time = "2025-08-27T12:14:21.531Z" }, - { url = "https://files.pythonhosted.org/packages/20/51/5829afd5000ec1cb60f304711f02572d619040aa3ec033d8226817d1e571/rpds_py-0.27.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ed090ccd235f6fa8bb5861684567f0a83e04f52dfc2e5c05f2e4b1309fcf85e7", size = 523868, upload-time = "2025-08-27T12:14:23.485Z" }, - { url = "https://files.pythonhosted.org/packages/05/2c/30eebca20d5db95720ab4d2faec1b5e4c1025c473f703738c371241476a2/rpds_py-0.27.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bf876e79763eecf3e7356f157540d6a093cef395b65514f17a356f62af6cc136", size = 408750, upload-time = "2025-08-27T12:14:24.924Z" }, - { url = "https://files.pythonhosted.org/packages/90/1a/cdb5083f043597c4d4276eae4e4c70c55ab5accec078da8611f24575a367/rpds_py-0.27.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:12ed005216a51b1d6e2b02a7bd31885fe317e45897de81d86dcce7d74618ffff", size = 387688, upload-time = "2025-08-27T12:14:27.537Z" }, - { url = "https://files.pythonhosted.org/packages/7c/92/cf786a15320e173f945d205ab31585cc43969743bb1a48b6888f7a2b0a2d/rpds_py-0.27.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:ee4308f409a40e50593c7e3bb8cbe0b4d4c66d1674a316324f0c2f5383b486f9", size = 407225, upload-time = "2025-08-27T12:14:28.981Z" }, - { url = "https://files.pythonhosted.org/packages/33/5c/85ee16df5b65063ef26017bef33096557a4c83fbe56218ac7cd8c235f16d/rpds_py-0.27.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0b08d152555acf1f455154d498ca855618c1378ec810646fcd7c76416ac6dc60", size = 423361, upload-time = "2025-08-27T12:14:30.469Z" }, - { url = "https://files.pythonhosted.org/packages/4b/8e/1c2741307fcabd1a334ecf008e92c4f47bb6f848712cf15c923becfe82bb/rpds_py-0.27.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:dce51c828941973a5684d458214d3a36fcd28da3e1875d659388f4f9f12cc33e", size = 562493, upload-time = "2025-08-27T12:14:31.987Z" }, - { url = "https://files.pythonhosted.org/packages/04/03/5159321baae9b2222442a70c1f988cbbd66b9be0675dd3936461269be360/rpds_py-0.27.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:c1476d6f29eb81aa4151c9a31219b03f1f798dc43d8af1250a870735516a1212", size = 592623, upload-time = "2025-08-27T12:14:33.543Z" }, - { url = "https://files.pythonhosted.org/packages/ff/39/c09fd1ad28b85bc1d4554a8710233c9f4cefd03d7717a1b8fbfd171d1167/rpds_py-0.27.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:3ce0cac322b0d69b63c9cdb895ee1b65805ec9ffad37639f291dd79467bee675", size = 558800, upload-time = "2025-08-27T12:14:35.436Z" }, - { url = "https://files.pythonhosted.org/packages/c5/d6/99228e6bbcf4baa764b18258f519a9035131d91b538d4e0e294313462a98/rpds_py-0.27.1-cp314-cp314-win32.whl", hash = "sha256:dfbfac137d2a3d0725758cd141f878bf4329ba25e34979797c89474a89a8a3a3", size = 221943, upload-time = "2025-08-27T12:14:36.898Z" }, - { url = "https://files.pythonhosted.org/packages/be/07/c802bc6b8e95be83b79bdf23d1aa61d68324cb1006e245d6c58e959e314d/rpds_py-0.27.1-cp314-cp314-win_amd64.whl", hash = "sha256:a6e57b0abfe7cc513450fcf529eb486b6e4d3f8aee83e92eb5f1ef848218d456", size = 233739, upload-time = "2025-08-27T12:14:38.386Z" }, - { url = "https://files.pythonhosted.org/packages/c8/89/3e1b1c16d4c2d547c5717377a8df99aee8099ff050f87c45cb4d5fa70891/rpds_py-0.27.1-cp314-cp314-win_arm64.whl", hash = "sha256:faf8d146f3d476abfee026c4ae3bdd9ca14236ae4e4c310cbd1cf75ba33d24a3", size = 223120, upload-time = "2025-08-27T12:14:39.82Z" }, - { url = "https://files.pythonhosted.org/packages/62/7e/dc7931dc2fa4a6e46b2a4fa744a9fe5c548efd70e0ba74f40b39fa4a8c10/rpds_py-0.27.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:ba81d2b56b6d4911ce735aad0a1d4495e808b8ee4dc58715998741a26874e7c2", size = 358944, upload-time = "2025-08-27T12:14:41.199Z" }, - { url = "https://files.pythonhosted.org/packages/e6/22/4af76ac4e9f336bfb1a5f240d18a33c6b2fcaadb7472ac7680576512b49a/rpds_py-0.27.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:84f7d509870098de0e864cad0102711c1e24e9b1a50ee713b65928adb22269e4", size = 342283, upload-time = "2025-08-27T12:14:42.699Z" }, - { url = "https://files.pythonhosted.org/packages/1c/15/2a7c619b3c2272ea9feb9ade67a45c40b3eeb500d503ad4c28c395dc51b4/rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9e960fc78fecd1100539f14132425e1d5fe44ecb9239f8f27f079962021523e", size = 380320, upload-time = "2025-08-27T12:14:44.157Z" }, - { url = "https://files.pythonhosted.org/packages/a2/7d/4c6d243ba4a3057e994bb5bedd01b5c963c12fe38dde707a52acdb3849e7/rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:62f85b665cedab1a503747617393573995dac4600ff51869d69ad2f39eb5e817", size = 391760, upload-time = "2025-08-27T12:14:45.845Z" }, - { url = "https://files.pythonhosted.org/packages/b4/71/b19401a909b83bcd67f90221330bc1ef11bc486fe4e04c24388d28a618ae/rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fed467af29776f6556250c9ed85ea5a4dd121ab56a5f8b206e3e7a4c551e48ec", size = 522476, upload-time = "2025-08-27T12:14:47.364Z" }, - { url = "https://files.pythonhosted.org/packages/e4/44/1a3b9715c0455d2e2f0f6df5ee6d6f5afdc423d0773a8a682ed2b43c566c/rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2729615f9d430af0ae6b36cf042cb55c0936408d543fb691e1a9e36648fd35a", size = 403418, upload-time = "2025-08-27T12:14:49.991Z" }, - { url = "https://files.pythonhosted.org/packages/1c/4b/fb6c4f14984eb56673bc868a66536f53417ddb13ed44b391998100a06a96/rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b207d881a9aef7ba753d69c123a35d96ca7cb808056998f6b9e8747321f03b8", size = 384771, upload-time = "2025-08-27T12:14:52.159Z" }, - { url = "https://files.pythonhosted.org/packages/c0/56/d5265d2d28b7420d7b4d4d85cad8ef891760f5135102e60d5c970b976e41/rpds_py-0.27.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:639fd5efec029f99b79ae47e5d7e00ad8a773da899b6309f6786ecaf22948c48", size = 400022, upload-time = "2025-08-27T12:14:53.859Z" }, - { url = "https://files.pythonhosted.org/packages/8f/e9/9f5fc70164a569bdd6ed9046486c3568d6926e3a49bdefeeccfb18655875/rpds_py-0.27.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fecc80cb2a90e28af8a9b366edacf33d7a91cbfe4c2c4544ea1246e949cfebeb", size = 416787, upload-time = "2025-08-27T12:14:55.673Z" }, - { url = "https://files.pythonhosted.org/packages/d4/64/56dd03430ba491db943a81dcdef115a985aac5f44f565cd39a00c766d45c/rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42a89282d711711d0a62d6f57d81aa43a1368686c45bc1c46b7f079d55692734", size = 557538, upload-time = "2025-08-27T12:14:57.245Z" }, - { url = "https://files.pythonhosted.org/packages/3f/36/92cc885a3129993b1d963a2a42ecf64e6a8e129d2c7cc980dbeba84e55fb/rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:cf9931f14223de59551ab9d38ed18d92f14f055a5f78c1d8ad6493f735021bbb", size = 588512, upload-time = "2025-08-27T12:14:58.728Z" }, - { url = "https://files.pythonhosted.org/packages/dd/10/6b283707780a81919f71625351182b4f98932ac89a09023cb61865136244/rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f39f58a27cc6e59f432b568ed8429c7e1641324fbe38131de852cd77b2d534b0", size = 555813, upload-time = "2025-08-27T12:15:00.334Z" }, - { url = "https://files.pythonhosted.org/packages/04/2e/30b5ea18c01379da6272a92825dd7e53dc9d15c88a19e97932d35d430ef7/rpds_py-0.27.1-cp314-cp314t-win32.whl", hash = "sha256:d5fa0ee122dc09e23607a28e6d7b150da16c662e66409bbe85230e4c85bb528a", size = 217385, upload-time = "2025-08-27T12:15:01.937Z" }, - { url = "https://files.pythonhosted.org/packages/32/7d/97119da51cb1dd3f2f3c0805f155a3aa4a95fa44fe7d78ae15e69edf4f34/rpds_py-0.27.1-cp314-cp314t-win_amd64.whl", hash = "sha256:6567d2bb951e21232c2f660c24cf3470bb96de56cdcb3f071a83feeaff8a2772", size = 230097, upload-time = "2025-08-27T12:15:03.961Z" }, { url = "https://files.pythonhosted.org/packages/0c/ed/e1fba02de17f4f76318b834425257c8ea297e415e12c68b4361f63e8ae92/rpds_py-0.27.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cdfe4bb2f9fe7458b7453ad3c33e726d6d1c7c0a72960bcc23800d77384e42df", size = 371402, upload-time = "2025-08-27T12:15:51.561Z" }, { url = "https://files.pythonhosted.org/packages/af/7c/e16b959b316048b55585a697e94add55a4ae0d984434d279ea83442e460d/rpds_py-0.27.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:8fabb8fd848a5f75a2324e4a84501ee3a5e3c78d8603f83475441866e60b94a3", size = 354084, upload-time = "2025-08-27T12:15:53.219Z" }, { url = "https://files.pythonhosted.org/packages/de/c1/ade645f55de76799fdd08682d51ae6724cb46f318573f18be49b1e040428/rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eda8719d598f2f7f3e0f885cba8646644b55a187762bec091fa14a2b819746a9", size = 383090, upload-time = "2025-08-27T12:15:55.158Z" }, @@ -1837,6 +1930,53 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" }, ] +[[package]] +name = "scipy" +version = "1.16.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0a/ca/d8ace4f98322d01abcd52d381134344bf7b431eba7ed8b42bdea5a3c2ac9/scipy-1.16.3.tar.gz", hash = "sha256:01e87659402762f43bd2fee13370553a17ada367d42e7487800bf2916535aecb", size = 30597883, upload-time = "2025-10-28T17:38:54.068Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/5f/6f37d7439de1455ce9c5a556b8d1db0979f03a796c030bafdf08d35b7bf9/scipy-1.16.3-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:40be6cf99e68b6c4321e9f8782e7d5ff8265af28ef2cd56e9c9b2638fa08ad97", size = 36630881, upload-time = "2025-10-28T17:31:47.104Z" }, + { url = "https://files.pythonhosted.org/packages/7c/89/d70e9f628749b7e4db2aa4cd89735502ff3f08f7b9b27d2e799485987cd9/scipy-1.16.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:8be1ca9170fcb6223cc7c27f4305d680ded114a1567c0bd2bfcbf947d1b17511", size = 28941012, upload-time = "2025-10-28T17:31:53.411Z" }, + { url = "https://files.pythonhosted.org/packages/a8/a8/0e7a9a6872a923505dbdf6bb93451edcac120363131c19013044a1e7cb0c/scipy-1.16.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:bea0a62734d20d67608660f69dcda23e7f90fb4ca20974ab80b6ed40df87a005", size = 20931935, upload-time = "2025-10-28T17:31:57.361Z" }, + { url = "https://files.pythonhosted.org/packages/bd/c7/020fb72bd79ad798e4dbe53938543ecb96b3a9ac3fe274b7189e23e27353/scipy-1.16.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:2a207a6ce9c24f1951241f4693ede2d393f59c07abc159b2cb2be980820e01fb", size = 23534466, upload-time = "2025-10-28T17:32:01.875Z" }, + { url = "https://files.pythonhosted.org/packages/be/a0/668c4609ce6dbf2f948e167836ccaf897f95fb63fa231c87da7558a374cd/scipy-1.16.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:532fb5ad6a87e9e9cd9c959b106b73145a03f04c7d57ea3e6f6bb60b86ab0876", size = 33593618, upload-time = "2025-10-28T17:32:06.902Z" }, + { url = "https://files.pythonhosted.org/packages/ca/6e/8942461cf2636cdae083e3eb72622a7fbbfa5cf559c7d13ab250a5dbdc01/scipy-1.16.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0151a0749efeaaab78711c78422d413c583b8cdd2011a3c1d6c794938ee9fdb2", size = 35899798, upload-time = "2025-10-28T17:32:12.665Z" }, + { url = "https://files.pythonhosted.org/packages/79/e8/d0f33590364cdbd67f28ce79368b373889faa4ee959588beddf6daef9abe/scipy-1.16.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b7180967113560cca57418a7bc719e30366b47959dd845a93206fbed693c867e", size = 36226154, upload-time = "2025-10-28T17:32:17.961Z" }, + { url = "https://files.pythonhosted.org/packages/39/c1/1903de608c0c924a1749c590064e65810f8046e437aba6be365abc4f7557/scipy-1.16.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:deb3841c925eeddb6afc1e4e4a45e418d19ec7b87c5df177695224078e8ec733", size = 38878540, upload-time = "2025-10-28T17:32:23.907Z" }, + { url = "https://files.pythonhosted.org/packages/f1/d0/22ec7036ba0b0a35bccb7f25ab407382ed34af0b111475eb301c16f8a2e5/scipy-1.16.3-cp311-cp311-win_amd64.whl", hash = "sha256:53c3844d527213631e886621df5695d35e4f6a75f620dca412bcd292f6b87d78", size = 38722107, upload-time = "2025-10-28T17:32:29.921Z" }, + { url = "https://files.pythonhosted.org/packages/7b/60/8a00e5a524bb3bf8898db1650d350f50e6cffb9d7a491c561dc9826c7515/scipy-1.16.3-cp311-cp311-win_arm64.whl", hash = "sha256:9452781bd879b14b6f055b26643703551320aa8d79ae064a71df55c00286a184", size = 25506272, upload-time = "2025-10-28T17:32:34.577Z" }, +] + +[[package]] +name = "scs" +version = "3.2.10" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "scipy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ad/1f/12ad77d3857aca6f9ffce2b8767a25ca64c042bef61f72b51f2fbd20827d/scs-3.2.10.tar.gz", hash = "sha256:8204a88f423ea1fdeda358690b0e01552c6737a9b2408dbe956a937b60882ff7", size = 1690234, upload-time = "2025-12-28T15:58:27.015Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/29/6226881ff64ff0bf734d78699e43db01e32011528dd9b6a97ad214527f4a/scs-3.2.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:00c0eb6cd042260e9ec07f7a6f830635a41c24a5c31a2c8969ad8b5f9591c441", size = 96227, upload-time = "2025-12-28T15:57:33.769Z" }, + { url = "https://files.pythonhosted.org/packages/9b/96/59c360a3d45099a0c170326859ceec3a51bc52c3313c18fe8c2cec9e6b13/scs-3.2.10-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b33c7a3866f24bd70553fd8798bf3e9cf24ac8f05bca96836f87e69e83aed558", size = 5071340, upload-time = "2025-12-28T15:57:35.265Z" }, + { url = "https://files.pythonhosted.org/packages/f9/a4/1d5f3f7ee2b2aa77d2e43cb1b2f6939b48de3cacdcca0923050c865df17c/scs-3.2.10-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6ecd7b6c74eeed61b4b3243da11a3034a66c75623dbba575f90f29d76b395a25", size = 12079843, upload-time = "2025-12-28T15:57:37.149Z" }, + { url = "https://files.pythonhosted.org/packages/c2/35/6b2f1be268820b43fdd5eae74d4c1a10d2d3f675cb6c2980cbefed8cb0b6/scs-3.2.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:57f619f0488a513de059e67f22c44dd72417b689810a44c7e326da27272beae0", size = 11973744, upload-time = "2025-12-28T15:57:39.419Z" }, + { url = "https://files.pythonhosted.org/packages/da/85/82b2c60394ea9b8010ccc1428ae99dc9830649704fd3e7ba789cae7d0b2e/scs-3.2.10-cp311-cp311-win_amd64.whl", hash = "sha256:2a62fde9e6a0c9533825d48a0ea91eabfb6505468e8692c8359c9f8e64677bb1", size = 7478398, upload-time = "2025-12-28T15:57:41.34Z" }, +] + +[[package]] +name = "setuptools" +version = "80.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" }, +] + [[package]] name = "shapely" version = "2.1.2" @@ -1854,46 +1994,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d3/d4/9b2a9fe6039f9e42ccf2cb3e84f219fd8364b0c3b8e7bbc857b5fbe9c14c/shapely-2.1.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6ddc759f72b5b2b0f54a7e7cde44acef680a55019eb52ac63a7af2cf17cb9cd2", size = 4178586, upload-time = "2025-09-24T13:50:25.443Z" }, { url = "https://files.pythonhosted.org/packages/16/f6/9840f6963ed4decf76b08fd6d7fed14f8779fb7a62cb45c5617fa8ac6eab/shapely-2.1.2-cp311-cp311-win32.whl", hash = "sha256:2fa78b49485391224755a856ed3b3bd91c8455f6121fee0db0e71cefb07d0ef6", size = 1543961, upload-time = "2025-09-24T13:50:26.968Z" }, { url = "https://files.pythonhosted.org/packages/38/1e/3f8ea46353c2a33c1669eb7327f9665103aa3a8dfe7f2e4ef714c210b2c2/shapely-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:c64d5c97b2f47e3cd9b712eaced3b061f2b71234b3fc263e0fcf7d889c6559dc", size = 1722856, upload-time = "2025-09-24T13:50:28.497Z" }, - { url = "https://files.pythonhosted.org/packages/24/c0/f3b6453cf2dfa99adc0ba6675f9aaff9e526d2224cbd7ff9c1a879238693/shapely-2.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fe2533caae6a91a543dec62e8360fe86ffcdc42a7c55f9dfd0128a977a896b94", size = 1833550, upload-time = "2025-09-24T13:50:30.019Z" }, - { url = "https://files.pythonhosted.org/packages/86/07/59dee0bc4b913b7ab59ab1086225baca5b8f19865e6101db9ebb7243e132/shapely-2.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ba4d1333cc0bc94381d6d4308d2e4e008e0bd128bdcff5573199742ee3634359", size = 1643556, upload-time = "2025-09-24T13:50:32.291Z" }, - { url = "https://files.pythonhosted.org/packages/26/29/a5397e75b435b9895cd53e165083faed5d12fd9626eadec15a83a2411f0f/shapely-2.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0bd308103340030feef6c111d3eb98d50dc13feea33affc8a6f9fa549e9458a3", size = 2988308, upload-time = "2025-09-24T13:50:33.862Z" }, - { url = "https://files.pythonhosted.org/packages/b9/37/e781683abac55dde9771e086b790e554811a71ed0b2b8a1e789b7430dd44/shapely-2.1.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1e7d4d7ad262a48bb44277ca12c7c78cb1b0f56b32c10734ec9a1d30c0b0c54b", size = 3099844, upload-time = "2025-09-24T13:50:35.459Z" }, - { url = "https://files.pythonhosted.org/packages/d8/f3/9876b64d4a5a321b9dc482c92bb6f061f2fa42131cba643c699f39317cb9/shapely-2.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e9eddfe513096a71896441a7c37db72da0687b34752c4e193577a145c71736fc", size = 3988842, upload-time = "2025-09-24T13:50:37.478Z" }, - { url = "https://files.pythonhosted.org/packages/d1/a0/704c7292f7014c7e74ec84eddb7b109e1fbae74a16deae9c1504b1d15565/shapely-2.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:980c777c612514c0cf99bc8a9de6d286f5e186dcaf9091252fcd444e5638193d", size = 4152714, upload-time = "2025-09-24T13:50:39.9Z" }, - { url = "https://files.pythonhosted.org/packages/53/46/319c9dc788884ad0785242543cdffac0e6530e4d0deb6c4862bc4143dcf3/shapely-2.1.2-cp312-cp312-win32.whl", hash = "sha256:9111274b88e4d7b54a95218e243282709b330ef52b7b86bc6aaf4f805306f454", size = 1542745, upload-time = "2025-09-24T13:50:41.414Z" }, - { url = "https://files.pythonhosted.org/packages/ec/bf/cb6c1c505cb31e818e900b9312d514f381fbfa5c4363edfce0fcc4f8c1a4/shapely-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:743044b4cfb34f9a67205cee9279feaf60ba7d02e69febc2afc609047cb49179", size = 1722861, upload-time = "2025-09-24T13:50:43.35Z" }, - { url = "https://files.pythonhosted.org/packages/c3/90/98ef257c23c46425dc4d1d31005ad7c8d649fe423a38b917db02c30f1f5a/shapely-2.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b510dda1a3672d6879beb319bc7c5fd302c6c354584690973c838f46ec3e0fa8", size = 1832644, upload-time = "2025-09-24T13:50:44.886Z" }, - { url = "https://files.pythonhosted.org/packages/6d/ab/0bee5a830d209adcd3a01f2d4b70e587cdd9fd7380d5198c064091005af8/shapely-2.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8cff473e81017594d20ec55d86b54bc635544897e13a7cfc12e36909c5309a2a", size = 1642887, upload-time = "2025-09-24T13:50:46.735Z" }, - { url = "https://files.pythonhosted.org/packages/2d/5e/7d7f54ba960c13302584c73704d8c4d15404a51024631adb60b126a4ae88/shapely-2.1.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fe7b77dc63d707c09726b7908f575fc04ff1d1ad0f3fb92aec212396bc6cfe5e", size = 2970931, upload-time = "2025-09-24T13:50:48.374Z" }, - { url = "https://files.pythonhosted.org/packages/f2/a2/83fc37e2a58090e3d2ff79175a95493c664bcd0b653dd75cb9134645a4e5/shapely-2.1.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7ed1a5bbfb386ee8332713bf7508bc24e32d24b74fc9a7b9f8529a55db9f4ee6", size = 3082855, upload-time = "2025-09-24T13:50:50.037Z" }, - { url = "https://files.pythonhosted.org/packages/44/2b/578faf235a5b09f16b5f02833c53822294d7f21b242f8e2d0cf03fb64321/shapely-2.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a84e0582858d841d54355246ddfcbd1fce3179f185da7470f41ce39d001ee1af", size = 3979960, upload-time = "2025-09-24T13:50:51.74Z" }, - { url = "https://files.pythonhosted.org/packages/4d/04/167f096386120f692cc4ca02f75a17b961858997a95e67a3cb6a7bbd6b53/shapely-2.1.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc3487447a43d42adcdf52d7ac73804f2312cbfa5d433a7d2c506dcab0033dfd", size = 4142851, upload-time = "2025-09-24T13:50:53.49Z" }, - { url = "https://files.pythonhosted.org/packages/48/74/fb402c5a6235d1c65a97348b48cdedb75fb19eca2b1d66d04969fc1c6091/shapely-2.1.2-cp313-cp313-win32.whl", hash = "sha256:9c3a3c648aedc9f99c09263b39f2d8252f199cb3ac154fadc173283d7d111350", size = 1541890, upload-time = "2025-09-24T13:50:55.337Z" }, - { url = "https://files.pythonhosted.org/packages/41/47/3647fe7ad990af60ad98b889657a976042c9988c2807cf322a9d6685f462/shapely-2.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:ca2591bff6645c216695bdf1614fca9c82ea1144d4a7591a466fef64f28f0715", size = 1722151, upload-time = "2025-09-24T13:50:57.153Z" }, - { url = "https://files.pythonhosted.org/packages/3c/49/63953754faa51ffe7d8189bfbe9ca34def29f8c0e34c67cbe2a2795f269d/shapely-2.1.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2d93d23bdd2ed9dc157b46bc2f19b7da143ca8714464249bef6771c679d5ff40", size = 1834130, upload-time = "2025-09-24T13:50:58.49Z" }, - { url = "https://files.pythonhosted.org/packages/7f/ee/dce001c1984052970ff60eb4727164892fb2d08052c575042a47f5a9e88f/shapely-2.1.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:01d0d304b25634d60bd7cf291828119ab55a3bab87dc4af1e44b07fb225f188b", size = 1642802, upload-time = "2025-09-24T13:50:59.871Z" }, - { url = "https://files.pythonhosted.org/packages/da/e7/fc4e9a19929522877fa602f705706b96e78376afb7fad09cad5b9af1553c/shapely-2.1.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8d8382dd120d64b03698b7298b89611a6ea6f55ada9d39942838b79c9bc89801", size = 3018460, upload-time = "2025-09-24T13:51:02.08Z" }, - { url = "https://files.pythonhosted.org/packages/a1/18/7519a25db21847b525696883ddc8e6a0ecaa36159ea88e0fef11466384d0/shapely-2.1.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:19efa3611eef966e776183e338b2d7ea43569ae99ab34f8d17c2c054d3205cc0", size = 3095223, upload-time = "2025-09-24T13:51:04.472Z" }, - { url = "https://files.pythonhosted.org/packages/48/de/b59a620b1f3a129c3fecc2737104a0a7e04e79335bd3b0a1f1609744cf17/shapely-2.1.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:346ec0c1a0fcd32f57f00e4134d1200e14bf3f5ae12af87ba83ca275c502498c", size = 4030760, upload-time = "2025-09-24T13:51:06.455Z" }, - { url = "https://files.pythonhosted.org/packages/96/b3/c6655ee7232b417562bae192ae0d3ceaadb1cc0ffc2088a2ddf415456cc2/shapely-2.1.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6305993a35989391bd3476ee538a5c9a845861462327efe00dd11a5c8c709a99", size = 4170078, upload-time = "2025-09-24T13:51:08.584Z" }, - { url = "https://files.pythonhosted.org/packages/a0/8e/605c76808d73503c9333af8f6cbe7e1354d2d238bda5f88eea36bfe0f42a/shapely-2.1.2-cp313-cp313t-win32.whl", hash = "sha256:c8876673449f3401f278c86eb33224c5764582f72b653a415d0e6672fde887bf", size = 1559178, upload-time = "2025-09-24T13:51:10.73Z" }, - { url = "https://files.pythonhosted.org/packages/36/f7/d317eb232352a1f1444d11002d477e54514a4a6045536d49d0c59783c0da/shapely-2.1.2-cp313-cp313t-win_amd64.whl", hash = "sha256:4a44bc62a10d84c11a7a3d7c1c4fe857f7477c3506e24c9062da0db0ae0c449c", size = 1739756, upload-time = "2025-09-24T13:51:12.105Z" }, - { url = "https://files.pythonhosted.org/packages/fc/c4/3ce4c2d9b6aabd27d26ec988f08cb877ba9e6e96086eff81bfea93e688c7/shapely-2.1.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:9a522f460d28e2bf4e12396240a5fc1518788b2fcd73535166d748399ef0c223", size = 1831290, upload-time = "2025-09-24T13:51:13.56Z" }, - { url = "https://files.pythonhosted.org/packages/17/b9/f6ab8918fc15429f79cb04afa9f9913546212d7fb5e5196132a2af46676b/shapely-2.1.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1ff629e00818033b8d71139565527ced7d776c269a49bd78c9df84e8f852190c", size = 1641463, upload-time = "2025-09-24T13:51:14.972Z" }, - { url = "https://files.pythonhosted.org/packages/a5/57/91d59ae525ca641e7ac5551c04c9503aee6f29b92b392f31790fcb1a4358/shapely-2.1.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f67b34271dedc3c653eba4e3d7111aa421d5be9b4c4c7d38d30907f796cb30df", size = 2970145, upload-time = "2025-09-24T13:51:16.961Z" }, - { url = "https://files.pythonhosted.org/packages/8a/cb/4948be52ee1da6927831ab59e10d4c29baa2a714f599f1f0d1bc747f5777/shapely-2.1.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21952dc00df38a2c28375659b07a3979d22641aeb104751e769c3ee825aadecf", size = 3073806, upload-time = "2025-09-24T13:51:18.712Z" }, - { url = "https://files.pythonhosted.org/packages/03/83/f768a54af775eb41ef2e7bec8a0a0dbe7d2431c3e78c0a8bdba7ab17e446/shapely-2.1.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1f2f33f486777456586948e333a56ae21f35ae273be99255a191f5c1fa302eb4", size = 3980803, upload-time = "2025-09-24T13:51:20.37Z" }, - { url = "https://files.pythonhosted.org/packages/9f/cb/559c7c195807c91c79d38a1f6901384a2878a76fbdf3f1048893a9b7534d/shapely-2.1.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cf831a13e0d5a7eb519e96f58ec26e049b1fad411fc6fc23b162a7ce04d9cffc", size = 4133301, upload-time = "2025-09-24T13:51:21.887Z" }, - { url = "https://files.pythonhosted.org/packages/80/cd/60d5ae203241c53ef3abd2ef27c6800e21afd6c94e39db5315ea0cbafb4a/shapely-2.1.2-cp314-cp314-win32.whl", hash = "sha256:61edcd8d0d17dd99075d320a1dd39c0cb9616f7572f10ef91b4b5b00c4aeb566", size = 1583247, upload-time = "2025-09-24T13:51:23.401Z" }, - { url = "https://files.pythonhosted.org/packages/74/d4/135684f342e909330e50d31d441ace06bf83c7dc0777e11043f99167b123/shapely-2.1.2-cp314-cp314-win_amd64.whl", hash = "sha256:a444e7afccdb0999e203b976adb37ea633725333e5b119ad40b1ca291ecf311c", size = 1773019, upload-time = "2025-09-24T13:51:24.873Z" }, - { url = "https://files.pythonhosted.org/packages/a3/05/a44f3f9f695fa3ada22786dc9da33c933da1cbc4bfe876fe3a100bafe263/shapely-2.1.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:5ebe3f84c6112ad3d4632b1fd2290665aa75d4cef5f6c5d77c4c95b324527c6a", size = 1834137, upload-time = "2025-09-24T13:51:26.665Z" }, - { url = "https://files.pythonhosted.org/packages/52/7e/4d57db45bf314573427b0a70dfca15d912d108e6023f623947fa69f39b72/shapely-2.1.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5860eb9f00a1d49ebb14e881f5caf6c2cf472c7fd38bd7f253bbd34f934eb076", size = 1642884, upload-time = "2025-09-24T13:51:28.029Z" }, - { url = "https://files.pythonhosted.org/packages/5a/27/4e29c0a55d6d14ad7422bf86995d7ff3f54af0eba59617eb95caf84b9680/shapely-2.1.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b705c99c76695702656327b819c9660768ec33f5ce01fa32b2af62b56ba400a1", size = 3018320, upload-time = "2025-09-24T13:51:29.903Z" }, - { url = "https://files.pythonhosted.org/packages/9f/bb/992e6a3c463f4d29d4cd6ab8963b75b1b1040199edbd72beada4af46bde5/shapely-2.1.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a1fd0ea855b2cf7c9cddaf25543e914dd75af9de08785f20ca3085f2c9ca60b0", size = 3094931, upload-time = "2025-09-24T13:51:32.699Z" }, - { url = "https://files.pythonhosted.org/packages/9c/16/82e65e21070e473f0ed6451224ed9fa0be85033d17e0c6e7213a12f59d12/shapely-2.1.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:df90e2db118c3671a0754f38e36802db75fe0920d211a27481daf50a711fdf26", size = 4030406, upload-time = "2025-09-24T13:51:34.189Z" }, - { url = "https://files.pythonhosted.org/packages/7c/75/c24ed871c576d7e2b64b04b1fe3d075157f6eb54e59670d3f5ffb36e25c7/shapely-2.1.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:361b6d45030b4ac64ddd0a26046906c8202eb60d0f9f53085f5179f1d23021a0", size = 4169511, upload-time = "2025-09-24T13:51:36.297Z" }, - { url = "https://files.pythonhosted.org/packages/b1/f7/b3d1d6d18ebf55236eec1c681ce5e665742aab3c0b7b232720a7d43df7b6/shapely-2.1.2-cp314-cp314t-win32.whl", hash = "sha256:b54df60f1fbdecc8ebc2c5b11870461a6417b3d617f555e5033f1505d36e5735", size = 1602607, upload-time = "2025-09-24T13:51:37.757Z" }, - { url = "https://files.pythonhosted.org/packages/9a/f6/f09272a71976dfc138129b8faf435d064a811ae2f708cb147dccdf7aacdb/shapely-2.1.2-cp314-cp314t-win_amd64.whl", hash = "sha256:0036ac886e0923417932c2e6369b6c52e38e0ff5d9120b90eef5cd9a5fc5cae9", size = 1796682, upload-time = "2025-09-24T13:51:39.233Z" }, ] [[package]] @@ -1919,7 +2019,7 @@ name = "sqlalchemy" version = "2.0.43" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "greenlet", marker = "(python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')" }, + { name = "greenlet", marker = "platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'" }, { name = "typing-extensions" }, ] sdist = { url = "https://files.pythonhosted.org/packages/d7/bc/d59b5d97d27229b0e009bd9098cd81af71c2fa5549c580a0a67b9bed0496/sqlalchemy-2.0.43.tar.gz", hash = "sha256:788bfcef6787a7764169cfe9859fe425bf44559619e1d9f56f5bddf2ebf6f417", size = 9762949, upload-time = "2025-08-11T14:24:58.438Z" } @@ -1932,22 +2032,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/aa/79/c0121b12b1b114e2c8a10ea297a8a6d5367bc59081b2be896815154b1163/sqlalchemy-2.0.43-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4d3d9b904ad4a6b175a2de0738248822f5ac410f52c2fd389ada0b5262d6a1e3", size = 3258240, upload-time = "2025-08-11T15:57:52.983Z" }, { url = "https://files.pythonhosted.org/packages/79/99/a2f9be96fb382f3ba027ad42f00dbe30fdb6ba28cda5f11412eee346bec5/sqlalchemy-2.0.43-cp311-cp311-win32.whl", hash = "sha256:5cda6b51faff2639296e276591808c1726c4a77929cfaa0f514f30a5f6156921", size = 2101248, upload-time = "2025-08-11T15:55:01.855Z" }, { url = "https://files.pythonhosted.org/packages/ee/13/744a32ebe3b4a7a9c7ea4e57babae7aa22070d47acf330d8e5a1359607f1/sqlalchemy-2.0.43-cp311-cp311-win_amd64.whl", hash = "sha256:c5d1730b25d9a07727d20ad74bc1039bbbb0a6ca24e6769861c1aa5bf2c4c4a8", size = 2126109, upload-time = "2025-08-11T15:55:04.092Z" }, - { url = "https://files.pythonhosted.org/packages/61/db/20c78f1081446095450bdc6ee6cc10045fce67a8e003a5876b6eaafc5cc4/sqlalchemy-2.0.43-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:20d81fc2736509d7a2bd33292e489b056cbae543661bb7de7ce9f1c0cd6e7f24", size = 2134891, upload-time = "2025-08-11T15:51:13.019Z" }, - { url = "https://files.pythonhosted.org/packages/45/0a/3d89034ae62b200b4396f0f95319f7d86e9945ee64d2343dcad857150fa2/sqlalchemy-2.0.43-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:25b9fc27650ff5a2c9d490c13c14906b918b0de1f8fcbb4c992712d8caf40e83", size = 2123061, upload-time = "2025-08-11T15:51:14.319Z" }, - { url = "https://files.pythonhosted.org/packages/cb/10/2711f7ff1805919221ad5bee205971254845c069ee2e7036847103ca1e4c/sqlalchemy-2.0.43-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6772e3ca8a43a65a37c88e2f3e2adfd511b0b1da37ef11ed78dea16aeae85bd9", size = 3320384, upload-time = "2025-08-11T15:52:35.088Z" }, - { url = "https://files.pythonhosted.org/packages/6e/0e/3d155e264d2ed2778484006ef04647bc63f55b3e2d12e6a4f787747b5900/sqlalchemy-2.0.43-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a113da919c25f7f641ffbd07fbc9077abd4b3b75097c888ab818f962707eb48", size = 3329648, upload-time = "2025-08-11T15:56:34.153Z" }, - { url = "https://files.pythonhosted.org/packages/5b/81/635100fb19725c931622c673900da5efb1595c96ff5b441e07e3dd61f2be/sqlalchemy-2.0.43-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4286a1139f14b7d70141c67a8ae1582fc2b69105f1b09d9573494eb4bb4b2687", size = 3258030, upload-time = "2025-08-11T15:52:36.933Z" }, - { url = "https://files.pythonhosted.org/packages/0c/ed/a99302716d62b4965fded12520c1cbb189f99b17a6d8cf77611d21442e47/sqlalchemy-2.0.43-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:529064085be2f4d8a6e5fab12d36ad44f1909a18848fcfbdb59cc6d4bbe48efe", size = 3294469, upload-time = "2025-08-11T15:56:35.553Z" }, - { url = "https://files.pythonhosted.org/packages/5d/a2/3a11b06715149bf3310b55a98b5c1e84a42cfb949a7b800bc75cb4e33abc/sqlalchemy-2.0.43-cp312-cp312-win32.whl", hash = "sha256:b535d35dea8bbb8195e7e2b40059e2253acb2b7579b73c1b432a35363694641d", size = 2098906, upload-time = "2025-08-11T15:55:00.645Z" }, - { url = "https://files.pythonhosted.org/packages/bc/09/405c915a974814b90aa591280623adc6ad6b322f61fd5cff80aeaef216c9/sqlalchemy-2.0.43-cp312-cp312-win_amd64.whl", hash = "sha256:1c6d85327ca688dbae7e2b06d7d84cfe4f3fffa5b5f9e21bb6ce9d0e1a0e0e0a", size = 2126260, upload-time = "2025-08-11T15:55:02.965Z" }, - { url = "https://files.pythonhosted.org/packages/41/1c/a7260bd47a6fae7e03768bf66451437b36451143f36b285522b865987ced/sqlalchemy-2.0.43-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e7c08f57f75a2bb62d7ee80a89686a5e5669f199235c6d1dac75cd59374091c3", size = 2130598, upload-time = "2025-08-11T15:51:15.903Z" }, - { url = "https://files.pythonhosted.org/packages/8e/84/8a337454e82388283830b3586ad7847aa9c76fdd4f1df09cdd1f94591873/sqlalchemy-2.0.43-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:14111d22c29efad445cd5021a70a8b42f7d9152d8ba7f73304c4d82460946aaa", size = 2118415, upload-time = "2025-08-11T15:51:17.256Z" }, - { url = "https://files.pythonhosted.org/packages/cf/ff/22ab2328148492c4d71899d62a0e65370ea66c877aea017a244a35733685/sqlalchemy-2.0.43-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21b27b56eb2f82653168cefe6cb8e970cdaf4f3a6cb2c5e3c3c1cf3158968ff9", size = 3248707, upload-time = "2025-08-11T15:52:38.444Z" }, - { url = "https://files.pythonhosted.org/packages/dc/29/11ae2c2b981de60187f7cbc84277d9d21f101093d1b2e945c63774477aba/sqlalchemy-2.0.43-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c5a9da957c56e43d72126a3f5845603da00e0293720b03bde0aacffcf2dc04f", size = 3253602, upload-time = "2025-08-11T15:56:37.348Z" }, - { url = "https://files.pythonhosted.org/packages/b8/61/987b6c23b12c56d2be451bc70900f67dd7d989d52b1ee64f239cf19aec69/sqlalchemy-2.0.43-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d79f9fdc9584ec83d1b3c75e9f4595c49017f5594fee1a2217117647225d738", size = 3183248, upload-time = "2025-08-11T15:52:39.865Z" }, - { url = "https://files.pythonhosted.org/packages/86/85/29d216002d4593c2ce1c0ec2cec46dda77bfbcd221e24caa6e85eff53d89/sqlalchemy-2.0.43-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9df7126fd9db49e3a5a3999442cc67e9ee8971f3cb9644250107d7296cb2a164", size = 3219363, upload-time = "2025-08-11T15:56:39.11Z" }, - { url = "https://files.pythonhosted.org/packages/b6/e4/bd78b01919c524f190b4905d47e7630bf4130b9f48fd971ae1c6225b6f6a/sqlalchemy-2.0.43-cp313-cp313-win32.whl", hash = "sha256:7f1ac7828857fcedb0361b48b9ac4821469f7694089d15550bbcf9ab22564a1d", size = 2096718, upload-time = "2025-08-11T15:55:05.349Z" }, - { url = "https://files.pythonhosted.org/packages/ac/a5/ca2f07a2a201f9497de1928f787926613db6307992fe5cda97624eb07c2f/sqlalchemy-2.0.43-cp313-cp313-win_amd64.whl", hash = "sha256:971ba928fcde01869361f504fcff3b7143b47d30de188b11c6357c0505824197", size = 2123200, upload-time = "2025-08-11T15:55:07.932Z" }, { url = "https://files.pythonhosted.org/packages/b8/d9/13bdde6521f322861fab67473cec4b1cc8999f3871953531cf61945fad92/sqlalchemy-2.0.43-py3-none-any.whl", hash = "sha256:1681c21dd2ccee222c2fe0bef671d1aef7c504087c9c4e800371cfcc8ac966fc", size = 1924759, upload-time = "2025-08-11T15:39:53.024Z" }, ] @@ -1992,13 +2076,34 @@ version = "0.47.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions" }, ] sdist = { url = "https://files.pythonhosted.org/packages/15/b9/cc3017f9a9c9b6e27c5106cc10cc7904653c3eec0729793aec10479dd669/starlette-0.47.3.tar.gz", hash = "sha256:6bc94f839cc176c4858894f1f8908f0ab79dfec1a6b8402f6da9be26ebea52e9", size = 2584144, upload-time = "2025-08-24T13:36:42.122Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/ce/fd/901cfa59aaa5b30a99e16876f11abe38b59a1a2c51ffb3d7142bb6089069/starlette-0.47.3-py3-none-any.whl", hash = "sha256:89c0778ca62a76b826101e7c709e70680a1699ca7da6b44d38eb0a7e61fe4b51", size = 72991, upload-time = "2025-08-24T13:36:40.887Z" }, ] +[[package]] +name = "sympy" +version = "1.14.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mpmath" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" }, +] + +[[package]] +name = "tabulate" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", size = 81090, upload-time = "2022-10-06T17:21:48.54Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" }, +] + [[package]] name = "tenacity" version = "8.5.0" @@ -2008,6 +2113,75 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/3f/8ba87d9e287b9d385a02a7114ddcef61b26f86411e121c9003eb509a1773/tenacity-8.5.0-py3-none-any.whl", hash = "sha256:b594c2a5945830c267ce6b79a166228323ed52718f30302c1359836112346687", size = 28165, upload-time = "2024-07-05T07:25:29.591Z" }, ] +[[package]] +name = "tfp-nightly" +version = "0.26.0.dev20260105" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "absl-py" }, + { name = "cloudpickle" }, + { name = "decorator" }, + { name = "dm-tree" }, + { name = "gast" }, + { name = "numpy" }, + { name = "six" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/47/f814cc2e6a0e4f4392915755858c271bca7c0341d5ba053f3dea983103f9/tfp_nightly-0.26.0.dev20260105-py2.py3-none-any.whl", hash = "sha256:bc97e1dfd6ac99b4d973e957c2d8641a85531b53385be817bc01f09e7e98db4e", size = 6975583, upload-time = "2026-01-05T09:52:03.064Z" }, +] + +[[package]] +name = "toolz" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/11/d6/114b492226588d6ff54579d95847662fc69196bdeec318eb45393b24c192/toolz-1.1.0.tar.gz", hash = "sha256:27a5c770d068c110d9ed9323f24f1543e83b2f300a687b7891c1a6d56b697b5b", size = 52613, upload-time = "2025-10-17T04:03:21.661Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/12/5911ae3eeec47800503a238d971e51722ccea5feb8569b735184d5fcdbc0/toolz-1.1.0-py3-none-any.whl", hash = "sha256:15ccc861ac51c53696de0a5d6d4607f99c210739caf987b5d2054f3efed429d8", size = 58093, upload-time = "2025-10-17T04:03:20.435Z" }, +] + +[[package]] +name = "torch" +version = "2.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "jinja2" }, + { name = "networkx" }, + { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "sympy" }, + { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "typing-extensions" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/15/db/c064112ac0089af3d2f7a2b5bfbabf4aa407a78b74f87889e524b91c5402/torch-2.9.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:62b3fd888277946918cba4478cf849303da5359f0fb4e3bfb86b0533ba2eaf8d", size = 104220430, upload-time = "2025-11-12T15:20:31.705Z" }, + { url = "https://files.pythonhosted.org/packages/56/be/76eaa36c9cd032d3b01b001e2c5a05943df75f26211f68fae79e62f87734/torch-2.9.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d033ff0ac3f5400df862a51bdde9bad83561f3739ea0046e68f5401ebfa67c1b", size = 899821446, upload-time = "2025-11-12T15:20:15.544Z" }, + { url = "https://files.pythonhosted.org/packages/47/cc/7a2949e38dfe3244c4df21f0e1c27bce8aedd6c604a587dd44fc21017cb4/torch-2.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:0d06b30a9207b7c3516a9e0102114024755a07045f0c1d2f2a56b1819ac06bcb", size = 110973074, upload-time = "2025-11-12T15:21:39.958Z" }, + { url = "https://files.pythonhosted.org/packages/1e/ce/7d251155a783fb2c1bb6837b2b7023c622a2070a0a72726ca1df47e7ea34/torch-2.9.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:52347912d868653e1528b47cafaf79b285b98be3f4f35d5955389b1b95224475", size = 74463887, upload-time = "2025-11-12T15:20:36.611Z" }, +] + +[[package]] +name = "triton" +version = "3.5.1" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b0/72/ec90c3519eaf168f22cb1757ad412f3a2add4782ad3a92861c9ad135d886/triton-3.5.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:61413522a48add32302353fdbaaf92daaaab06f6b5e3229940d21b5207f47579", size = 170425802, upload-time = "2025-11-11T17:40:53.209Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -2090,12 +2264,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e0/24/d9be5cd6642a6aa68352ded4b4b10fb0d7889cb7f45814fb92cecd35f101/watchdog-6.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6eb11feb5a0d452ee41f824e271ca311a09e250441c262ca2fd7ebcf2461a06c", size = 96393, upload-time = "2024-11-01T14:06:31.756Z" }, { url = "https://files.pythonhosted.org/packages/63/7a/6013b0d8dbc56adca7fdd4f0beed381c59f6752341b12fa0886fa7afc78b/watchdog-6.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ef810fbf7b781a5a593894e4f439773830bdecb885e6880d957d5b9382a960d2", size = 88392, upload-time = "2024-11-01T14:06:32.99Z" }, { url = "https://files.pythonhosted.org/packages/d1/40/b75381494851556de56281e053700e46bff5b37bf4c7267e858640af5a7f/watchdog-6.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:afd0fe1b2270917c5e23c2a65ce50c2a4abb63daafb0d419fde368e272a76b7c", size = 89019, upload-time = "2024-11-01T14:06:34.963Z" }, - { url = "https://files.pythonhosted.org/packages/39/ea/3930d07dafc9e286ed356a679aa02d777c06e9bfd1164fa7c19c288a5483/watchdog-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bdd4e6f14b8b18c334febb9c4425a878a2ac20efd1e0b231978e7b150f92a948", size = 96471, upload-time = "2024-11-01T14:06:37.745Z" }, - { url = "https://files.pythonhosted.org/packages/12/87/48361531f70b1f87928b045df868a9fd4e253d9ae087fa4cf3f7113be363/watchdog-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c7c15dda13c4eb00d6fb6fc508b3c0ed88b9d5d374056b239c4ad1611125c860", size = 88449, upload-time = "2024-11-01T14:06:39.748Z" }, - { url = "https://files.pythonhosted.org/packages/5b/7e/8f322f5e600812e6f9a31b75d242631068ca8f4ef0582dd3ae6e72daecc8/watchdog-6.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6f10cb2d5902447c7d0da897e2c6768bca89174d0c6e1e30abec5421af97a5b0", size = 89054, upload-time = "2024-11-01T14:06:41.009Z" }, - { url = "https://files.pythonhosted.org/packages/68/98/b0345cabdce2041a01293ba483333582891a3bd5769b08eceb0d406056ef/watchdog-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c", size = 96480, upload-time = "2024-11-01T14:06:42.952Z" }, - { url = "https://files.pythonhosted.org/packages/85/83/cdf13902c626b28eedef7ec4f10745c52aad8a8fe7eb04ed7b1f111ca20e/watchdog-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:76aae96b00ae814b181bb25b1b98076d5fc84e8a53cd8885a318b42b6d3a5134", size = 88451, upload-time = "2024-11-01T14:06:45.084Z" }, - { url = "https://files.pythonhosted.org/packages/fe/c4/225c87bae08c8b9ec99030cd48ae9c4eca050a59bf5c2255853e18c87b50/watchdog-6.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b", size = 89057, upload-time = "2024-11-01T14:06:47.324Z" }, { url = "https://files.pythonhosted.org/packages/a9/c7/ca4bf3e518cb57a686b2feb4f55a1892fd9a3dd13f470fca14e00f80ea36/watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13", size = 79079, upload-time = "2024-11-01T14:06:59.472Z" }, { url = "https://files.pythonhosted.org/packages/5c/51/d46dc9332f9a647593c947b4b88e2381c8dfc0942d15b8edc0310fa4abb1/watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379", size = 79078, upload-time = "2024-11-01T14:07:01.431Z" }, { url = "https://files.pythonhosted.org/packages/d4/57/04edbf5e169cd318d5f07b4766fee38e825d64b6913ca157ca32d1a42267/watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e", size = 79076, upload-time = "2024-11-01T14:07:02.568Z" }, @@ -2125,31 +2293,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/08/ff/e9eed2ee5fed6f76fdd6032ca5cd38c57ca9661430bb3d5fb2872dc8703c/websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf", size = 181918, upload-time = "2025-03-05T20:02:11.968Z" }, { url = "https://files.pythonhosted.org/packages/d8/75/994634a49b7e12532be6a42103597b71098fd25900f7437d6055ed39930a/websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85", size = 176388, upload-time = "2025-03-05T20:02:13.32Z" }, { url = "https://files.pythonhosted.org/packages/98/93/e36c73f78400a65f5e236cd376713c34182e6663f6889cd45a4a04d8f203/websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065", size = 176828, upload-time = "2025-03-05T20:02:14.585Z" }, - { url = "https://files.pythonhosted.org/packages/51/6b/4545a0d843594f5d0771e86463606a3988b5a09ca5123136f8a76580dd63/websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3", size = 175437, upload-time = "2025-03-05T20:02:16.706Z" }, - { url = "https://files.pythonhosted.org/packages/f4/71/809a0f5f6a06522af902e0f2ea2757f71ead94610010cf570ab5c98e99ed/websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665", size = 173096, upload-time = "2025-03-05T20:02:18.832Z" }, - { url = "https://files.pythonhosted.org/packages/3d/69/1a681dd6f02180916f116894181eab8b2e25b31e484c5d0eae637ec01f7c/websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2", size = 173332, upload-time = "2025-03-05T20:02:20.187Z" }, - { url = "https://files.pythonhosted.org/packages/a6/02/0073b3952f5bce97eafbb35757f8d0d54812b6174ed8dd952aa08429bcc3/websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215", size = 183152, upload-time = "2025-03-05T20:02:22.286Z" }, - { url = "https://files.pythonhosted.org/packages/74/45/c205c8480eafd114b428284840da0b1be9ffd0e4f87338dc95dc6ff961a1/websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5", size = 182096, upload-time = "2025-03-05T20:02:24.368Z" }, - { url = "https://files.pythonhosted.org/packages/14/8f/aa61f528fba38578ec553c145857a181384c72b98156f858ca5c8e82d9d3/websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65", size = 182523, upload-time = "2025-03-05T20:02:25.669Z" }, - { url = "https://files.pythonhosted.org/packages/ec/6d/0267396610add5bc0d0d3e77f546d4cd287200804fe02323797de77dbce9/websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe", size = 182790, upload-time = "2025-03-05T20:02:26.99Z" }, - { url = "https://files.pythonhosted.org/packages/02/05/c68c5adbf679cf610ae2f74a9b871ae84564462955d991178f95a1ddb7dd/websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4", size = 182165, upload-time = "2025-03-05T20:02:30.291Z" }, - { url = "https://files.pythonhosted.org/packages/29/93/bb672df7b2f5faac89761cb5fa34f5cec45a4026c383a4b5761c6cea5c16/websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597", size = 182160, upload-time = "2025-03-05T20:02:31.634Z" }, - { url = "https://files.pythonhosted.org/packages/ff/83/de1f7709376dc3ca9b7eeb4b9a07b4526b14876b6d372a4dc62312bebee0/websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9", size = 176395, upload-time = "2025-03-05T20:02:33.017Z" }, - { url = "https://files.pythonhosted.org/packages/7d/71/abf2ebc3bbfa40f391ce1428c7168fb20582d0ff57019b69ea20fa698043/websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7", size = 176841, upload-time = "2025-03-05T20:02:34.498Z" }, - { url = "https://files.pythonhosted.org/packages/cb/9f/51f0cf64471a9d2b4d0fc6c534f323b664e7095640c34562f5182e5a7195/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931", size = 175440, upload-time = "2025-03-05T20:02:36.695Z" }, - { url = "https://files.pythonhosted.org/packages/8a/05/aa116ec9943c718905997412c5989f7ed671bc0188ee2ba89520e8765d7b/websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675", size = 173098, upload-time = "2025-03-05T20:02:37.985Z" }, - { url = "https://files.pythonhosted.org/packages/ff/0b/33cef55ff24f2d92924923c99926dcce78e7bd922d649467f0eda8368923/websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151", size = 173329, upload-time = "2025-03-05T20:02:39.298Z" }, - { url = "https://files.pythonhosted.org/packages/31/1d/063b25dcc01faa8fada1469bdf769de3768b7044eac9d41f734fd7b6ad6d/websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22", size = 183111, upload-time = "2025-03-05T20:02:40.595Z" }, - { url = "https://files.pythonhosted.org/packages/93/53/9a87ee494a51bf63e4ec9241c1ccc4f7c2f45fff85d5bde2ff74fcb68b9e/websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f", size = 182054, upload-time = "2025-03-05T20:02:41.926Z" }, - { url = "https://files.pythonhosted.org/packages/ff/b2/83a6ddf56cdcbad4e3d841fcc55d6ba7d19aeb89c50f24dd7e859ec0805f/websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8", size = 182496, upload-time = "2025-03-05T20:02:43.304Z" }, - { url = "https://files.pythonhosted.org/packages/98/41/e7038944ed0abf34c45aa4635ba28136f06052e08fc2168520bb8b25149f/websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375", size = 182829, upload-time = "2025-03-05T20:02:48.812Z" }, - { url = "https://files.pythonhosted.org/packages/e0/17/de15b6158680c7623c6ef0db361da965ab25d813ae54fcfeae2e5b9ef910/websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d", size = 182217, upload-time = "2025-03-05T20:02:50.14Z" }, - { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195, upload-time = "2025-03-05T20:02:51.561Z" }, - { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393, upload-time = "2025-03-05T20:02:53.814Z" }, - { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837, upload-time = "2025-03-05T20:02:55.237Z" }, { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" }, ] +[[package]] +name = "wrapt" +version = "2.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/49/2a/6de8a50cb435b7f42c46126cf1a54b2aab81784e74c8595c8e025e8f36d3/wrapt-2.0.1.tar.gz", hash = "sha256:9c9c635e78497cacb81e84f8b11b23e0aacac7a136e73b8e5b2109a1d9fc468f", size = 82040, upload-time = "2025-11-07T00:45:33.312Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/60/553997acf3939079dab022e37b67b1904b5b0cc235503226898ba573b10c/wrapt-2.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0e17283f533a0d24d6e5429a7d11f250a58d28b4ae5186f8f47853e3e70d2590", size = 77480, upload-time = "2025-11-07T00:43:30.573Z" }, + { url = "https://files.pythonhosted.org/packages/2d/50/e5b3d30895d77c52105c6d5cbf94d5b38e2a3dd4a53d22d246670da98f7c/wrapt-2.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:85df8d92158cb8f3965aecc27cf821461bb5f40b450b03facc5d9f0d4d6ddec6", size = 60690, upload-time = "2025-11-07T00:43:31.594Z" }, + { url = "https://files.pythonhosted.org/packages/f0/40/660b2898703e5cbbb43db10cdefcc294274458c3ca4c68637c2b99371507/wrapt-2.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c1be685ac7700c966b8610ccc63c3187a72e33cab53526a27b2a285a662cd4f7", size = 61578, upload-time = "2025-11-07T00:43:32.918Z" }, + { url = "https://files.pythonhosted.org/packages/5b/36/825b44c8a10556957bc0c1d84c7b29a40e05fcf1873b6c40aa9dbe0bd972/wrapt-2.0.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:df0b6d3b95932809c5b3fecc18fda0f1e07452d05e2662a0b35548985f256e28", size = 114115, upload-time = "2025-11-07T00:43:35.605Z" }, + { url = "https://files.pythonhosted.org/packages/83/73/0a5d14bb1599677304d3c613a55457d34c344e9b60eda8a737c2ead7619e/wrapt-2.0.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4da7384b0e5d4cae05c97cd6f94faaf78cc8b0f791fc63af43436d98c4ab37bb", size = 116157, upload-time = "2025-11-07T00:43:37.058Z" }, + { url = "https://files.pythonhosted.org/packages/01/22/1c158fe763dbf0a119f985d945711d288994fe5514c0646ebe0eb18b016d/wrapt-2.0.1-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ec65a78fbd9d6f083a15d7613b2800d5663dbb6bb96003899c834beaa68b242c", size = 112535, upload-time = "2025-11-07T00:43:34.138Z" }, + { url = "https://files.pythonhosted.org/packages/5c/28/4f16861af67d6de4eae9927799b559c20ebdd4fe432e89ea7fe6fcd9d709/wrapt-2.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7de3cc939be0e1174969f943f3b44e0d79b6f9a82198133a5b7fc6cc92882f16", size = 115404, upload-time = "2025-11-07T00:43:39.214Z" }, + { url = "https://files.pythonhosted.org/packages/a0/8b/7960122e625fad908f189b59c4aae2d50916eb4098b0fb2819c5a177414f/wrapt-2.0.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:fb1a5b72cbd751813adc02ef01ada0b0d05d3dcbc32976ce189a1279d80ad4a2", size = 111802, upload-time = "2025-11-07T00:43:40.476Z" }, + { url = "https://files.pythonhosted.org/packages/3e/73/7881eee5ac31132a713ab19a22c9e5f1f7365c8b1df50abba5d45b781312/wrapt-2.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3fa272ca34332581e00bf7773e993d4f632594eb2d1b0b162a9038df0fd971dd", size = 113837, upload-time = "2025-11-07T00:43:42.921Z" }, + { url = "https://files.pythonhosted.org/packages/45/00/9499a3d14e636d1f7089339f96c4409bbc7544d0889f12264efa25502ae8/wrapt-2.0.1-cp311-cp311-win32.whl", hash = "sha256:fc007fdf480c77301ab1afdbb6ab22a5deee8885f3b1ed7afcb7e5e84a0e27be", size = 58028, upload-time = "2025-11-07T00:43:47.369Z" }, + { url = "https://files.pythonhosted.org/packages/70/5d/8f3d7eea52f22638748f74b102e38fdf88cb57d08ddeb7827c476a20b01b/wrapt-2.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:47434236c396d04875180171ee1f3815ca1eada05e24a1ee99546320d54d1d1b", size = 60385, upload-time = "2025-11-07T00:43:44.34Z" }, + { url = "https://files.pythonhosted.org/packages/14/e2/32195e57a8209003587bbbad44d5922f13e0ced2a493bb46ca882c5b123d/wrapt-2.0.1-cp311-cp311-win_arm64.whl", hash = "sha256:837e31620e06b16030b1d126ed78e9383815cbac914693f54926d816d35d8edf", size = 58893, upload-time = "2025-11-07T00:43:46.161Z" }, + { url = "https://files.pythonhosted.org/packages/15/d1/b51471c11592ff9c012bd3e2f7334a6ff2f42a7aed2caffcf0bdddc9cb89/wrapt-2.0.1-py3-none-any.whl", hash = "sha256:4d2ce1bf1a48c5277d7969259232b57645aae5686dba1eaeade39442277afbca", size = 44046, upload-time = "2025-11-07T00:45:32.116Z" }, +] + [[package]] name = "zipp" version = "3.23.0"